Commit 5207477a by 曹润柘

合并分支 'caorunzhe' 到 'master'

Caorunzhe

查看合并请求 !449
parents a85691e1 1d20a367
......@@ -78,7 +78,7 @@
\parinterval (2)分布式表示
\vspace{0.3em}
\parinterval 分布式表示的主要思想是“一个复杂系统的任何部分的输入都应该是多个特征共同表示的结果”,这种思想在自然语言处理领域的影响尤其深刻,它改变了刻画语言世界的角度,将语言文字从离散空间映射到多维连续空间。例如,在现实世界中,“张三”这个代号就代表着一个人。如果想要知道这个人亲属都有谁,因为有“A和B如果姓氏相同,在一个家谱中,那么A和B是本家”这个先验知识在,在知道代号“张三”的情况下,可以得知“张三”的亲属是谁。但是如果不依靠这个先验知识,就无法得知“张三”的亲属是谁。但在分布式表示中,可以用一个实数向量,如$ (0.1,0.3,0.4) $来表示“张三”这个人,这个人的所有特征信息都包含在这个实数向量中,通过在向量空间中的一些操作(如计算距离等),哪怕没有任何先验知识的存在,也完全可以找到这个人的所有亲属。在自然语言处理中,一个单词也用一个实数向量(词向量或词嵌入)表示,通过这种方式将语义空间重新刻画,将这个离散空间转化成了一个连续空间,这时单词就不再是一个简单的词条,而是由成百上千个特征共同描述出来的,其中每个特征分别代表这个词的某个“ 方面”。
\parinterval 分布式表示的主要思想是“一个复杂系统的任何部分的输入都应该是多个特征共同表示的结果”,这种思想在自然语言处理领域的影响尤其深刻,它改变了刻画语言世界的角度,将语言文字从离散空间映射到多维连续空间。例如,在现实世界中,“张三”这个代号就代表着一个人。如果想要知道这个人亲属都有谁,因为有“如果A和B姓氏相同且在同一个家谱中,那么A和B是本家”这个先验知识在,在知道代号“张三”的情况下,可以得知“张三”的亲属是谁。但是如果不依靠这个先验知识,就无法得知“张三”的亲属是谁。但在分布式表示中,可以用一个实数向量,如$ (0.1,0.3,0.4) $来表示“张三”这个人,这个人的所有特征信息都包含在这个实数向量中,通过在向量空间中的一些操作(如计算距离等),哪怕没有任何先验知识的存在,也完全可以找到这个人的所有亲属。在自然语言处理中,一个单词也用一个实数向量(词向量或词嵌入)表示,通过这种方式将语义空间重新刻画,将这个离散空间转化成了一个连续空间,这时单词就不再是一个简单的词条,而是由成百上千个特征共同描述出来的,其中每个特征分别代表这个词的某个“ 方面”。
\parinterval 随着第二代人工神经网络的“脱胎换骨”,学者们又对神经网络方法燃起了希望之火,这也导致有些时候过分夸大了神经网络的能力。20世纪90年代后期,由于在语音识别、自然语言处理等应用中,人们对神经网络方法期望过高,但是结果并没有达到预期,这也让很多人丧失了对神经网络方法的信任。相反,核方法、图模型等机器学习方法取得了很好的效果,这导致神经网络研究又一次进入低谷。
......@@ -1080,7 +1080,7 @@ f(x)=\begin{cases} 0 & x\le 0 \\x & x>0\end{cases}
\end{figure}
%-------------------------------------------
\parinterval 它可以被描述为公式\eqref{eq:9-27},其中隐藏层的激活函数是Tanh函数,输出层的激活函数是Sigmoid函数,${\mathbi{W}}^{[1]}$${\mathbi{b}}^{[1]}$分别表示第一层的权重矩阵和偏置,${\mathbi{W}}^{[2]}$$b^{[2]}$分别表示第二层的权重矩阵和偏置且偏置$b^{[2]}$是标量
\parinterval 它可以被描述为公式\eqref{eq:9-27},其中隐藏层的激活函数是Tanh函数,输出层的激活函数是Sigmoid函数,${\mathbi{W}}^{[1]}$${\mathbi{b}}^{[1]}$分别表示第一层的权重矩阵和偏置,${\mathbi{W}}^{[2]}$$b^{[2]}$分别表示第二层的权重矩阵和偏置\footnote{注意这里${\mathbi{b}}^{[1]}$是向量而$b^{[2]}$是标量,因而前者加粗后者未加粗}
\begin{eqnarray}
y&=&{\textrm{Sigmoid}}({\textrm{Tanh}}({\mathbi{x}}\cdot {\mathbi{W}}^{[1]}+{\mathbi{b}}^{[1]})\cdot {\mathbi{W}}^{[2]}+ b^{[2]} )
\label{eq:9-27}
......
......@@ -7219,8 +7219,6 @@ author = {Yoshua Bengio and
publisher = {Conference on Empirical Methods in Natural Language Processing},
year = {2017}
}
@inproceedings{DBLP:conf/naacl/MohiuddinJ19,
author = {Tasnim Mohiuddin and
Shafiq R. Joty},
......@@ -7232,8 +7230,203 @@ author = {Yoshua Bengio and
}
@inproceedings{DBLP:conf/emnlp/ArtetxeLA18,
author = {Mikel Artetxe and
Gorka Labaka and
Eneko Agirre},
title = {Unsupervised Statistical Machine Translation},
pages = {3632--3642},
publisher = {Conference on Empirical Methods in Natural Language Processing},
year = {2018}
}
@article{DBLP:journals/tacl/LeeCH17,
author = {Jason Lee and
Kyunghyun Cho and
Thomas Hofmann},
title = {Fully Character-Level Neural Machine Translation without Explicit
Segmentation},
journal = {Transactions of the Association for Computational Linguistics},
volume = {5},
pages = {365--378},
year = {2017}
}
@inproceedings{DBLP:conf/naacl/FiratCB16,
author = {Orhan Firat and
Kyunghyun Cho and
Yoshua Bengio},
title = {Multi-Way, Multilingual Neural Machine Translation with a Shared Attention
Mechanism},
pages = {866--875},
publisher = {Annual Conference of the North American Chapter of the Association for Computational Linguistics},
year = {2016}
}
@article{DBLP:journals/corr/HaNW16,
author = {Thanh-Le Ha and
Jan Niehues and
Alexander H. Waibel},
title = {Toward Multilingual Neural Machine Translation with Universal Encoder
and Decoder},
journal = {CoRR},
volume = {abs/1611.04798},
year = {2016}
}
@article{DBLP:journals/tacl/JohnsonSLKWCTVW17,
author = {Melvin Johnson and
Mike Schuster and
Quoc V. Le and
Maxim Krikun and
Yonghui Wu and
Zhifeng Chen and
Nikhil Thorat and
Fernanda B. Vi{\'{e}}gas and
Martin Wattenberg and
Greg Corrado and
Macduff Hughes and
Jeffrey Dean},
title = {Google's Multilingual Neural Machine Translation System: Enabling
Zero-Shot Translation},
journal = {Transactions of the Association for Computational Linguistics},
volume = {5},
pages = {339--351},
year = {2017}
}
@inproceedings{DBLP:conf/coling/BlackwoodBW18,
author = {Graeme W. Blackwood and
Miguel Ballesteros and
Todd Ward},
title = {Multilingual Neural Machine Translation with Task-Specific Attention},
pages = {3112--3122},
publisher = {International Conference on Computational Linguistics},
year = {2018}
}
@inproceedings{DBLP:conf/wmt/SachanN18,
author = {Devendra Singh Sachan and
Graham Neubig},
title = {Parameter Sharing Methods for Multilingual Self-Attentional Translation
Models},
pages = {261--271},
publisher = {Association for Computational Linguistics},
year = {2018}
}
@inproceedings{DBLP:conf/wmt/LuKLBZS18,
author = {Yichao Lu and
Phillip Keung and
Faisal Ladhak and
Vikas Bhardwaj and
Shaonan Zhang and
Jason Sun},
title = {A neural interlingua for multilingual machine translation},
pages = {84--92},
publisher = {Association for Computational Linguistics},
year = {2018}
}
@inproceedings{DBLP:conf/acl/WangZZZXZ19,
author = {Yining Wang and
Long Zhou and
Jiajun Zhang and
Feifei Zhai and
Jingfang Xu and
Chengqing Zong},
title = {A Compact and Language-Sensitive Multilingual Translation Method},
pages = {1213--1223},
publisher = {Association for Computational Linguistics},
year = {2019}
}
@inproceedings{DBLP:conf/iclr/WangPAN19,
author = {Xinyi Wang and
Hieu Pham and
Philip Arthur and
Graham Neubig},
title = {Multilingual Neural Machine Translation With Soft Decoupled Encoding},
publisher = {International Conference on Learning Representations},
year = {2019}
}
@inproceedings{DBLP:conf/emnlp/TanCHXQL19,
author = {Xu Tan and
Jiale Chen and
Di He and
Yingce Xia and
Tao Qin and
Tie-Yan Liu},
title = {Multilingual Neural Machine Translation with Language Clustering},
pages = {963--973},
publisher = {Conference on Empirical Methods in Natural Language Processing},
year = {2019}
}
@inproceedings{2019Consistency,
title={Consistency by Agreement in Zero-Shot Neural Machine Translation},
author={Al-Shedivat, Maruan and Parikh, Ankur },
publisher={Proceedings of the 2019 Conference of the North},
year={2019},
}
@article{DBLP:journals/corr/abs-1903-07091,
author = {Naveen Arivazhagan and
Ankur Bapna and
Orhan Firat and
Roee Aharoni and
Melvin Johnson and
Wolfgang Macherey},
title = {The Missing Ingredient in Zero-Shot Neural Machine Translation},
journal = {CoRR},
volume = {abs/1903.07091},
year = {2019}
}
@inproceedings{DBLP:conf/naacl/Al-ShedivatP19,
author = {Maruan Al-Shedivat and
Ankur P. Parikh},
title = {Consistency by Agreement in Zero-Shot Neural Machine Translation},
pages = {1184--1197},
publisher = {Annual Conference of the North American Chapter of the Association for Computational Linguistics},
year = {2019}
}
@article{firat2016zero,
title={Zero-resource translation with multi-lingual neural machine translation},
author={Firat, Orhan and Sankaran, Baskaran and Al-Onaizan, Yaser and Vural, Fatos T Yarman and Cho, Kyunghyun},
journal={arXiv preprint arXiv:1606.04164},
year={2016}
}
@article{DBLP:journals/corr/abs-1805-10338,
author = {Lierni Sestorain and
Massimiliano Ciaramita and
Christian Buck and
Thomas Hofmann},
title = {Zero-Shot Dual Machine Translation},
journal = {CoRR},
volume = {abs/1805.10338},
year = {2018}
}
@inproceedings{DBLP:conf/acl/GuWCL19,
author = {Jiatao Gu and
Yong Wang and
Kyunghyun Cho and
Victor O. K. Li},
title = {Improved Zero-shot Neural Machine Translation via Ignoring Spurious
Correlations},
pages = {1258--1268},
publisher = {Association for Computational Linguistics},
year = {2019}
}
@inproceedings{DBLP:conf/emnlp/FiratSAYC16,
author = {Orhan Firat and
Baskaran Sankaran and
Yaser Al-Onaizan and
Fatos T. Yarman-Vural and
Kyunghyun Cho},
title = {Zero-Resource Translation with Multi-Lingual Neural Machine Translation},
pages = {268--277},
publisher = {Conference on Empirical Methods in Natural Language Processing},
year = {2016}
}
@inproceedings{DBLP:conf/emnlp/CurreyH19,
author = {Anna Currey and
Kenneth Heafield},
title = {Zero-Resource Neural Machine Translation with Monolingual Pivot Data},
pages = {99--107},
publisher = {Conference on Empirical Methods in Natural Language Processing},
year = {2019}
}
%%%%% chapter 16------------------------------------------------------
......
Markdown 格式
0%
您添加了 0 到此讨论。请谨慎行事。
请先完成此评论的编辑!
注册 或者 后发表评论