Commit 4027b354 by 曹润柘

bib update

parent 3994e2fd
......@@ -52,7 +52,7 @@
%\node[minimum width = 1.8cm] (sub) at ([xshift=-5.5cm,yshift=2cm]num9_9.east) {};
\draw[decorate,decoration={brace,mirror,raise=0pt,amplitude=0.3cm},black,thick] ([yshift=0.4cm,xshift=-0.1cm]num1_1.west) -- node[att,xshift=-0.5cm]{$Q$} ([yshift=-0.4cm,xshift=-0.1cm]num3_3.west);
\draw[decorate,decoration={brace,raise=0pt,amplitude=0.3cm},black,thick] ([xshift=-0.4cm,yshift=0.1cm]num1.north) -- node[att,yshift=0.5cm]{$U$}([xshift=0.4cm,yshift=0.1cm]num7.north);
\draw[decorate,decoration={brace,raise=0pt,amplitude=0.3cm},black,thick] ([xshift=-0.4cm,yshift=0.1cm]num1.north) -- node[att,yshift=0.5cm]{$K$}([xshift=0.4cm,yshift=0.1cm]num7.north);
\draw[decorate,decoration={brace,mirror,raise=0pt,amplitude=0.3cm},black,thick] ([xshift=0.5cm,yshift=0.00cm]num9_9.south) -- node[att,xshift=0.5cm,yshift=-0.3cm]{$O$}([xshift=0.5cm,yshift=0.00cm]num9.south);
......
......@@ -21,10 +21,10 @@
\draw[line width=0.9pt, gray!80, -latex] (l\point_3.east) -- (r2_3.west);
}
\node[vuale] at (-1.5em, 1.9em) {$\mathbi{x}_2$};
\node[vuale] at (-1.5em, 9.9em) {$\mathbi{x}_1$};
\node[vuale] at (6.5em, 1.9em) {$\mathbi{z}_2$};
\node[vuale] at (6.5em, 9.9em) {$\mathbi{z}_1$};
\node[vuale] at (-1.5em, 1.9em) {$x_2$};
\node[vuale] at (-1.5em, 9.9em) {$x_1$};
\node[vuale] at (6.5em, 1.9em) {$y_1$};
\node[vuale] at (6.5em, 9.9em) {$y_2$};
\node (t2) at (2.5em, -1em) {\large{$\cdots$}};
\node [anchor=north,font=\tiny] at ([yshift=-0.2em]t2.south) {深度卷积};
......@@ -46,10 +46,10 @@
\draw[line width=0.9pt, cyan!80, -latex] (l\point_3.east) -- (r2_3.west);
}
\node[vuale] at (-1.5em, 1.9em) {$\mathbi{x}_2$};
\node[vuale] at (-1.5em, 9.9em) {$\mathbi{x}_1$};
\node[vuale] at (6.5em, 1.9em) {$\mathbi{z}_2$};
\node[vuale] at (6.5em, 9.9em) {$\mathbi{z}_1$};
\node[vuale] at (-1.5em, 1.9em) {$x_2$};
\node[vuale] at (-1.5em, 9.9em) {$x_1$};
\node[vuale] at (6.5em, 1.9em) {$y_1$};
\node[vuale] at (6.5em, 9.9em) {$y_2$};
\node (t2) at (2.5em, -1em) {\large{$\cdots$}};
\node [anchor=north,font=\tiny] at ([yshift=-0.2em]t2.south) {轻量卷积};
......
......@@ -32,12 +32,12 @@
\draw[line width=0.5pt, cyan!80, -latex] (l3_\point.east) -- ([xshift=0em,yshift=0.1em]r3_2.west);
}
\node[vuale] at ([xshift=-0.9em]l1_1.west) {$\mathbi{x}_3$};
\node[vuale] at ([xshift=-0.9em]l2_1.west) {$\mathbi{x}_2$};
\node[vuale] at ([xshift=-0.9em]l3_1.west) {$\mathbi{x}_1$};
\node[vuale] at ([xshift=0.9em]r1_1.east) {$\mathbi{z}_3$};
\node[vuale] at ([xshift=0.9em]r2_1.east) {$\mathbi{z}_2$};
\node[vuale] at ([xshift=0.9em]r3_1.east) {$\mathbi{z}_1$};
\node[vuale] at ([xshift=-0.9em]l1_1.west) {$x_3$};
\node[vuale] at ([xshift=-0.9em]l2_1.west) {$x_2$};
\node[vuale] at ([xshift=-0.9em]l3_1.west) {$x_1$};
\node[vuale] at ([xshift=0.9em]r1_1.east) {$y_3$};
\node[vuale] at ([xshift=0.9em]r2_1.east) {$y_3$};
\node[vuale] at ([xshift=0.9em]r3_1.east) {$y_3$};
\node (t1) at (2.5em, -1em) {\large{$\cdots$}};
\node [anchor=north,font=\tiny] at ([yshift=-0.2em]t1.south) {传统卷积};
......@@ -66,12 +66,12 @@
\draw[line width=0.5pt, cyan!80, -latex] (l\point_2.east) -- (r3_2.west);
}
\node[vuale] at ([xshift=-0.9em]l1_1.west) {$\mathbi{x}_3$};
\node[vuale] at ([xshift=-0.9em]l2_1.west) {$\mathbi{x}_2$};
\node[vuale] at ([xshift=-0.9em]l3_1.west) {$\mathbi{x}_1$};
\node[vuale] at ([xshift=0.9em]r1_1.east) {$\mathbi{z}_3$};
\node[vuale] at ([xshift=0.9em]r2_1.east) {$\mathbi{z}_2$};
\node[vuale] at ([xshift=0.9em]r3_1.east) {$\mathbi{z}_1$};
\node[vuale] at ([xshift=-0.9em]l1_1.west) {$x_3$};
\node[vuale] at ([xshift=-0.9em]l2_1.west) {$x_2$};
\node[vuale] at ([xshift=-0.9em]l3_1.west) {$x_1$};
\node[vuale] at ([xshift=0.9em]r1_1.east) {$y_3$};
\node[vuale] at ([xshift=0.9em]r2_1.east) {$y_3$};
\node[vuale] at ([xshift=0.9em]r3_1.east) {$y_3$};
\node (t2) at (2.5em, -1em) {\large{$\cdots$}};
\node [anchor=north,font=\tiny] at ([yshift=-0.2em]t2.south) {深度卷积};
......@@ -102,12 +102,12 @@
\draw[line width=0.5pt, cyan!80, -latex] (l3_\point.east) -- (r3_2.west);
}
\node[vuale] at ([xshift=-0.9em]l1_1.west) {$\mathbi{x}_3$};
\node[vuale] at ([xshift=-0.9em]l2_1.west) {$\mathbi{x}_2$};
\node[vuale] at ([xshift=-0.9em]l3_1.west) {$\mathbi{x}_1$};
\node[vuale] at ([xshift=0.9em]r1_1.east) {$\mathbi{z}_3$};
\node[vuale] at ([xshift=0.9em]r2_1.east) {$\mathbi{z}_2$};
\node[vuale] at ([xshift=0.9em]r3_1.east) {$\mathbi{z}_1$};
\node[vuale] at ([xshift=-0.9em]l1_1.west) {$x_3$};
\node[vuale] at ([xshift=-0.9em]l2_1.west) {$x_2$};
\node[vuale] at ([xshift=-0.9em]l3_1.west) {$x_1$};
\node[vuale] at ([xshift=0.9em]r1_1.east) {$y_3$};
\node[vuale] at ([xshift=0.9em]r2_1.east) {$y_3$};
\node[vuale] at ([xshift=0.9em]r3_1.east) {$y_3$};
\node (t3) at (2.5em, -1em) {\large{$\cdots$}};
\node [anchor=north,font=\tiny] at ([yshift=-0.2em]t3.south) {逐点卷积};
......
......@@ -83,7 +83,7 @@
\end{figure}
%----------------------------------------------
\parinterval 在图像卷积中,卷积核是一组$Q \times U \times O$的参数(如图\ref{fig:11-3})。其中$Q$$U$表示卷积核窗口的长度与宽度,分别对应图像中的长和宽两个维度,$Q \times U$决定了该卷积核窗口的大小。$O$是该卷积核的深度,它的取值和输入数据通道数保持一致。在这里,通道可以看作图像不同的特征,比如灰色图像只有灰度信息,通道数为1;而RGB格式的图像有3个通道,分别对应红绿蓝三种颜色信息。
\parinterval 在图像卷积中,卷积核是一组$Q \times K \times O$的参数(如图\ref{fig:11-3})。其中$Q$$K$表示卷积核窗口的长度与宽度,分别对应图像中的长和宽两个维度,$Q \times K$决定了该卷积核窗口的大小。$O$是该卷积核的深度,它的取值和输入数据通道数保持一致。在这里,通道可以看作图像不同的特征,比如灰色图像只有灰度信息,通道数为1;而RGB格式的图像有3个通道,分别对应红绿蓝三种颜色信息。
%----------------------------------------------
% 图4.
......@@ -567,7 +567,7 @@
\parinterval 卷积是一种高效处理网格数据的计算方式,在图像、语音等领域取得了令人瞩目的成绩。本章介绍了卷积的概念及其特性,并对池化、填充等操作进行了详细的讨论。前面介绍的基于循环神经网络的翻译模型在引入注意力机制后已经大幅度超越了基于统计的机器翻译模型,但由于循环神经网络的计算方式导致网络整体的并行能力差,训练耗时。本章介绍了具有高并行计算的能力的模型范式,即基于卷积神经网络的编码器-解码器框架。其在机器翻译任务上取得了与基于循环神经网络的GNMT模型相当的性能,并大幅度缩短了模型的训练周期。除了基础部分,本章还针对卷积计算进行了延伸,包括逐通道卷积、逐点卷积、轻量卷积和动态卷积等。除了上述提及的内容,卷积神经网络及其变种在文本分类、命名实体识别等其他自然语言处理任务上也有许多应用。
\parinterval 和机器翻译任务不同的是,文本分类任务侧重于对序列特征的提取,然后通过压缩后的特征表示做出类别预测。卷积神经网络可以对序列中一些$n$-gram特征进行提取,也可以用在文本分类任务中,其基本结构包括输入层、卷积层、池化层和全连接层。除了在本章介绍过的TextCNN模型\upcite{Kim2014ConvolutionalNN},不少研究工作在此基础上对其进行改进。比如,通过改变输入层来引入更多特征\upcite{DBLP:conf/acl/NguyenG15,DBLP:conf/aaai/LaiXLZ15},对卷积层的改进\upcite{DBLP:conf/acl/ChenXLZ015,DBLP:conf/emnlp/LeiBJ15}以及对池化层的改进\upcite{Kalchbrenner2014ACN,DBLP:conf/acl/ChenXLZ015}。在命名实体识别任务中,同样可以使用卷积神经网络来进行特征提取\upcite{DBLP:journals/jmlr/CollobertWBKKK11,DBLP:conf/cncl/ZhouZXQBX17},或者使用更高效的空洞卷积对更长的上下文进行建模\upcite{DBLP:conf/emnlp/StrubellVBM17}。此外,也有一些研究工作尝试使用卷积神经网络来提取字符级特征\upcite{DBLP:conf/acl/MaH16,DBLP:conf/emnlp/LiDWCM17,DBLP:conf/acl-codeswitch/WangCK18}
\parinterval 和机器翻译任务不同的是,文本分类任务侧重于对序列特征的提取,然后通过压缩后的特征表示做出类别预测。卷积神经网络可以对序列中一些$n$-gram特征进行提取,也可以用在文本分类任务中,其基本结构包括输入层、卷积层、池化层和全连接层。除了在本章介绍过的TextCNN模型\upcite{Kim2014ConvolutionalNN},不少研究工作在此基础上对其进行改进。比如,通过改变输入层来引入更多特征\upcite{DBLP:conf/acl/NguyenG15,DBLP:conf/aaai/LaiXLZ15},对卷积层的改进\upcite{DBLP:conf/acl/ChenXLZ015,DBLP:conf/emnlp/LeiBJ15}以及对池化层的改进\upcite{Kalchbrenner2014ACN,DBLP:conf/acl/ChenXLZ015}。在命名实体识别任务中,同样可以使用卷积神经网络来进行特征提取\upcite{2011Natural,DBLP:conf/cncl/ZhouZXQBX17},或者使用更高效的空洞卷积对更长的上下文进行建模\upcite{DBLP:conf/emnlp/StrubellVBM17}。此外,也有一些研究工作尝试使用卷积神经网络来提取字符级特征\upcite{DBLP:conf/acl/MaH16,DBLP:conf/emnlp/LiDWCM17,DBLP:conf/acl-codeswitch/WangCK18}
......
......@@ -2166,6 +2166,6 @@ Jobs was the CEO of {\red{\underline{apple}}}.
\vspace{0.5em}
\item 为了进一步提高神经语言模型性能,除了改进模型,还可以在模型中引入新的结构或是其他有效信息,该领域也有很多典型工作值得关注。例如在神经语言模型中引入除了词嵌入以外的单词特征,如语言特征(形态、语法、语义特征等)\upcite{Wu2012FactoredLM,Adel2015SyntacticAS}、上下文信息\upcite{mikolov2012context,Wang2015LargerContextLM}、知识图谱等外部知识\upcite{Ahn2016ANK};或是在神经语言模型中引入字符级信息,将其作为字符特征单独\upcite{Kim2016CharacterAwareNL,Hwang2017CharacterlevelLM}或与单词特征一起\upcite{Onoe2016GatedWR,Verwimp2017CharacterWordLL}送入模型中;在神经语言模型中引入双向模型也是一种十分有效的尝试,在单词预测时可以同时利用来自过去和未来的文本信息\upcite{Graves2013HybridSR,bahdanau2014neural,Peters2018DeepCW}
\vspace{0.5em}
\item 词嵌入是自然语言处理近些年的重要进展。所谓“嵌入”是一类方法,理论上,把一个事物进行分布式表示的过程都可以被看作是广义上的“嵌入”。基于这种思想的表示学习也成为了自然语言处理中的前沿方法。比如,如何对树结构,甚至图结构进行分布式表示成为了分析自然语言的重要方法\upcite{DBLP:journals/corr/abs-1809-01854,Yin2018StructVAETL,Aharoni2017TowardsSN,Bastings2017GraphCE,KoncelKedziorski2019TextGF}。此外,除了语言建模,还有很多方式可以进行词嵌入的学习,比如,SENNA\upcite{collobert2011natural}、word2vec\upcite{DBLP:journals/corr/abs-1301-3781,mikolov2013distributed}、Glove\upcite{DBLP:conf/emnlp/PenningtonSM14}、CoVe\upcite{mccann2017learned} 等。
\item 词嵌入是自然语言处理近些年的重要进展。所谓“嵌入”是一类方法,理论上,把一个事物进行分布式表示的过程都可以被看作是广义上的“嵌入”。基于这种思想的表示学习也成为了自然语言处理中的前沿方法。比如,如何对树结构,甚至图结构进行分布式表示成为了分析自然语言的重要方法\upcite{DBLP:journals/corr/abs-1809-01854,Yin2018StructVAETL,Aharoni2017TowardsSN,Bastings2017GraphCE,KoncelKedziorski2019TextGF}。此外,除了语言建模,还有很多方式可以进行词嵌入的学习,比如,SENNA\upcite{2011Natural}、word2vec\upcite{DBLP:journals/corr/abs-1301-3781,mikolov2013distributed}、Glove\upcite{DBLP:conf/emnlp/PenningtonSM14}、CoVe\upcite{mccann2017learned} 等。
\vspace{0.5em}
\end{itemize}
......@@ -3867,8 +3867,7 @@ year = {2012}
volume={18},
number={4},
pages={467--479},
year={1992},
publisher={MIT Press}
year={1992}
}
@inproceedings{mikolov2012context,
......@@ -3877,10 +3876,9 @@ year = {2012}
Tomas and
Zweig and
Geoffrey},
booktitle={2012 IEEE Spoken Language Technology Workshop (SLT)},
publisher={IEEE Spoken Language Technology Workshop},
pages={234--239},
year={2012},
organization={IEEE}
year={2012}
}
@article{zaremba2014recurrent,
......@@ -3905,7 +3903,7 @@ year = {2012}
Jan and
Schmidhuber and
Jurgen},
journal={arXiv: Learning},
journal={International Conference on Machine Learning},
year={2016}
}
......@@ -3917,7 +3915,7 @@ year = {2012}
Nitish Shirish and
Socher and
Richard},
journal={arXiv: Computation and Language},
journal={International Conference on Learning Representations},
year={2017}
}
......@@ -3934,12 +3932,11 @@ year = {2012}
@article{baydin2017automatic,
title ={Automatic differentiation in machine learning: a survey},
author ={Baydin, At{\i}l{\i}m G{\"u}nes and Pearlmutter, Barak A and Radul, Alexey Andreyevich and Siskind, Jeffrey Mark},
journal ={The Journal of Machine Learning Research},
journal ={Journal of Machine Learning Research},
volume ={18},
number ={1},
pages ={5595--5637},
year ={2017},
publisher ={JMLR. org}
year ={2017}
}
@article{qian1999momentum,
......@@ -3977,9 +3974,8 @@ year = {2012}
author = {Diederik P. Kingma and
Jimmy Ba},
title = {Adam: {A} Method for Stochastic Optimization},
booktitle = {3rd International Conference on Learning Representations, {ICLR} 2015,
San Diego, CA, USA, May 7-9, 2015, Conference Track Proceedings},
year = {2015},
publisher = {International Conference on Learning Representations},
year = {2015}
}
@inproceedings{ioffe2015batch,
......@@ -3987,13 +3983,10 @@ year = {2012}
Christian Szegedy},
title = {Batch Normalization: Accelerating Deep Network Training by Reducing
Internal Covariate Shift},
booktitle = {Proceedings of the 32nd International Conference on Machine Learning,
{ICML} 2015, Lille, France, 6-11 July 2015},
series = {{JMLR} Workshop and Conference Proceedings},
publisher = {International Conference on Machine Learning},
volume = {37},
pages = {448--456},
publisher = {JMLR.org},
year = {2015},
year = {2015}
}
@article{Ba2016LayerN,
......@@ -4003,7 +3996,7 @@ year = {2012}
title = {Layer Normalization},
journal = {CoRR},
volume = {abs/1607.06450},
year = {2016},
year = {2016}
}
@inproceedings{mikolov2013distributed,
......@@ -4013,11 +4006,9 @@ year = {2012}
Gregory S. Corrado and
Jeffrey Dean},
title = {Distributed Representations of Words and Phrases and their Compositionality},
booktitle = {Advances in Neural Information Processing Systems 26: 27th Annual
Conference on Neural Information Processing Systems 2013. Proceedings
of a meeting held December 5-8, 2013, Lake Tahoe, Nevada, United States},
publisher = {Conference on Neural Information Processing Systems},
pages = {3111--3119},
year = {2013},
year = {2013}
}
@inproceedings{arthur2016incorporating,
......@@ -4025,12 +4016,9 @@ year = {2012}
Graham Neubig and
Satoshi Nakamura},
title = {Incorporating Discrete Translation Lexicons into Neural Machine Translation},
booktitle = {Proceedings of the 2016 Conference on Empirical Methods in Natural
Language Processing, {EMNLP} 2016, Austin, Texas, USA, November 1-4,
2016},
pages = {1557--1567},
publisher = {The Association for Computational Linguistics},
year = {2016},
publisher = {Annual Meeting of the Association for Computational Linguistics},
year = {2016}
}
@inproceedings{stahlberg2016syntactically,
......@@ -4039,10 +4027,7 @@ year = {2012}
Aurelien Waite and
Bill Byrne},
title = {Syntactically Guided Neural Machine Translation},
booktitle = {Proceedings of the 54th Annual Meeting of the Association for Computational
Linguistics, {ACL} 2016, August 7-12, 2016, Berlin, Germany, Volume
2: Short Papers},
publisher = {The Association for Computer Linguistics},
publisher = {Annual Meeting of the Association for Computational Linguistics}
year = {2016},
}
......@@ -4051,12 +4036,9 @@ year = {2012}
Alessandro Moschitti},
title = {Embedding Semantic Similarity in Tree Kernels for Domain Adaptation
of Relation Extraction},
booktitle = {Proceedings of the 51st Annual Meeting of the Association for Computational
Linguistics, {ACL} 2013, 4-9 August 2013, Sofia, Bulgaria, Volume
1: Long Papers},
pages = {1498--1507},
publisher = {The Association for Computer Linguistics},
year = {2013},
publisher = {Annual Meeting of the Association for Computational Linguistics},
year = {2013}
}
@inproceedings{perozzi2014deepwalk,
......@@ -4064,42 +4046,32 @@ year = {2012}
Rami Al-Rfou and
Steven Skiena},
title = {DeepWalk: online learning of social representations},
booktitle = {The 20th {ACM} {SIGKDD} International Conference on Knowledge Discovery
and Data Mining, {KDD} '14, New York, NY, {USA} - August 24 - 27,
2014},
publisher = {ACM SIGKDD Conference on Knowledge Discovery and Data Mining},
pages = {701--710},
publisher = {{ACM}},
year = {2014},
year = {2014}
}
@article{collobert2011natural,
author = {Ronan Collobert and
Jason Weston and
L{\'{e}}on Bottou and
Michael Karlen and
Koray Kavukcuoglu and
Pavel P. Kuksa},
title = {Natural Language Processing (Almost) from Scratch},
journal = {Journal of Machine Learning Research},
volume = {12},
pages = {2493--2537},
year = {2011},
@article{2011Natural,
title={Natural Language Processing (almost) from Scratch},
author={ Collobert, Ronan and Weston, Jason and Bottou, Léon and Karlen, Michael and Kavukcuoglu, Koray and Kuksa, Pavel },
journal={Journal of Machine Learning Research},
volume={12},
number={1},
pages={2493-2537},
year={2011}
}
@inproceedings{mccann2017learned,
author = {Bryan McCann and
James Bradbury and
Caiming Xiong and
Richard Socher},
title = {Learned in Translation: Contextualized Word Vectors},
booktitle = {Advances in Neural Information Processing Systems 30: Annual Conference
on Neural Information Processing Systems 2017, 4-9 December 2017,
Long Beach, CA, {USA}},
booktitle = {Conference on Neural Information Processing Systems},
pages = {6294--6305},
year = {2017},
year = {2017}
}
%%%%%%%%%%%%%%%%%%%%%%%神经语言模型,检查修改%%%%%%%%%%%%%%%%%%%%%%%%%
%%%%%%%%%%%%%%%%%%%%%%%神经语言模型,检查修改%%%%%%%%%%%%%%%%%%%%%%%%%
@inproceedings{Peters2018DeepCW,
title={Deep contextualized word representations},
author={Matthew E. Peters and
......@@ -4135,13 +4107,13 @@ year = {2012}
}
@inproceedings{Onoe2016GatedWR,
title={Gated Word-Character Recurrent Language Model},
author={Yasumasa Miyamoto and
author = {Yasumasa Miyamoto and
Kyunghyun Cho},
publisher={arXiv preprint arXiv:1606.01700},
year={2016}
title = {Gated Word-Character Recurrent Language Model},
pages = {1992--1997},
publisher = {Annual Meeting of the Association for Computational Linguistics},
year = {2016}
}
@inproceedings{Hwang2017CharacterlevelLM,
title={Character-level language modeling with hierarchical recurrent neural networks},
author={Kyuyeon Hwang and
......@@ -4216,12 +4188,11 @@ year = {2012}
Ruocheng Guo and
Adrienne Raglin and
Huan Liu},
journal={ACM SIGKDD Explorations Newsletter},
journal={ACM SIGKDD Conference on Knowledge Discovery and Data Mining},
volume={22},
number={1},
pages={18--33},
year={2020},
publisher={ACM New York, NY, USA}
year={2020}
}
@incollection{nguyen2019understanding,
......@@ -4231,7 +4202,7 @@ year = {2012}
Jeff Clune},
pages={55--76},
year={2019},
publisher={Explainable AI}
publisher={Springer}
}
@inproceedings{yang2017improving,
title={Improving adversarial neural machine translation with prior knowledge},
......@@ -4250,15 +4221,16 @@ year = {2012}
title={Incorporating source syntax into transformer-based neural machine translation},
author={Anna Currey and
Kenneth Heafield},
publisher={Proceedings of the Fourth Conference on Machine Translation},
publisher={Annual Meeting of the Association for Computational Linguistics},
pages={24--33},
year={2019}
}
@article{currey2018multi,
title={Multi-source syntactic neural machine translation},
author={Anna Currey and
Kenneth Heafield},
journal={arXiv preprint arXiv:1808.10267},
journal={Conference on Empirical Methods in Natural Language Processing},
year={2018}
}
@inproceedings{marevcek2018extracting,
......@@ -4272,7 +4244,7 @@ year = {2012}
@article{blevins2018deep,
title={Deep rnns encode soft hierarchical syntax},
author={Blevins, Terra and Levy, Omer and Zettlemoyer, Luke},
journal={arXiv preprint arXiv:1805.04218},
journal={Annual Meeting of the Association for Computational Linguistics},
year={2018}
}
@inproceedings{Yin2018StructVAETL,
......@@ -4288,7 +4260,7 @@ year = {2012}
title={Towards String-To-Tree Neural Machine Translation},
author={Roee Aharoni and
Yoav Goldberg},
journal={arXiv preprint arXiv:1704.04743},
journal={Annual Meeting of the Association for Computational Linguistics},
year={2017}
}
......@@ -4308,9 +4280,8 @@ year = {2012}
Dhanush Bekal and Yi Luan and
Mirella Lapata and
Hannaneh Hajishirzi},
journal={ArXiv},
year={2019},
volume={abs/1904.02342}
journal={Annual Conference of the North American Chapter of the Association for Computational Linguistics},
year={2019}
}
@article{Kovalerchuk2020SurveyOE,
......@@ -4327,7 +4298,7 @@ year = {2012}
title={Towards A Rigorous Science of Interpretable Machine Learning},
author={Finale Doshi-Velez and
Been Kim},
journal={arXiv: Machine Learning},
journal={arXiv preprint arXiv:1702.08608},
year={2017}
}
......@@ -4349,7 +4320,7 @@ year = {2012}
title = {Does Multi-Encoder Help? {A} Case Study on Context-Aware Neural Machine
Translation},
pages = {3512--3518},
publisher = {Association for Computational Linguistics},
publisher = {Annual Meeting of the Association for Computational Linguistics},
year = {2020}
}
......@@ -4359,7 +4330,7 @@ year = {2012}
Abe Ittycheriah},
title = {Supervised Attentions for Neural Machine Translation},
pages = {2283--2288},
publisher = {The Association for Computational Linguistics},
publisher = {Annual Meeting of the Association for Computational Linguistics},
year = {2016}
}
......@@ -4370,7 +4341,7 @@ year = {2012}
Eiichiro Sumita},
title = {Neural Machine Translation with Supervised Attention},
pages = {3093--3102},
publisher = {The Association for Computational Linguistics},
publisher = {Annual Meeting of the Association for Computational Linguistics},
year = {2016}
}
......@@ -4384,16 +4355,16 @@ year = {2012}
title = {Fast and Robust Neural Network Joint Models for Statistical Machine
Translation},
pages = {1370--1380},
publisher = {The Association for Computer Linguistics},
year = {2014},
publisher = {Annual Meeting of the Association for Computational Linguistics},
year = {2014}
}
@inproceedings{Schwenk_continuousspace,
author = {Holger Schwenk},
title = {Continuous Space Translation Models for Phrase-Based Statistical Machine
Translation},
pages = {1071--1080},
publisher = {Indian Institute of Technology Bombay},
year = {2012},
publisher = {International Conference on Computational Linguistics},
year = {2012}
}
@inproceedings{kalchbrenner-blunsom-2013-recurrent,
author = {Nal Kalchbrenner and
......@@ -4401,25 +4372,24 @@ year = {2012}
title = {Recurrent Continuous Translation Models},
pages = {1700--1709},
publisher = {Annual Meeting of the Association for Computational Linguistics},
year = {2013},
year = {2013}
}
@article{HochreiterThe,
author = {Sepp Hochreiter},
title = {The Vanishing Gradient Problem During Learning Recurrent Neural Nets
and Problem Solutions},
journal = {International Journal of Uncertainty, Fuzziness and Knowledge-Based
Systems},
journal = {International Journal of Uncertainty, Fuzziness and Knowledge-Based Systems},
volume = {6},
number = {2},
pages = {107--116},
year = {1998},
year = {1998}
}
@article{BENGIO1994Learning,
author = {Yoshua Bengio and
Patrice Y. Simard and
Paolo Frasconi},
title = {Learning long-term dependencies with gradient descent is difficult},
journal = {Institute of Electrical and Electronics Engineers},
journal = {IEEE Transportation Neural Networks},
volume = {5},
number = {2},
pages = {157--166},
......@@ -4435,15 +4405,14 @@ author = {Yoshua Bengio and
Lukasz Kaiser and
Illia Polosukhin},
title = {Attention is All you Need},
publisher = {Advances in Neural Information Processing Systems 30: Annual Conference
on Neural Information Processing Systems},
publisher = {Conference on Neural Information Processing Systems},
pages = {5998--6008},
year = {2017},
year = {2017}
}
@article{StahlbergNeural,
title={Neural Machine Translation: A Review},
author={Felix Stahlberg},
journal={journal of artificial intelligence research},
journal={Journal of Artificial Intelligence Research},
year={2020},
volume={69},
pages={343-418}
......@@ -4455,8 +4424,8 @@ author = {Yoshua Bengio and
Marcello Federico},
title = {Neural versus Phrase-Based Machine Translation Quality: a Case Study},
pages = {257--267},
publisher = {The Association for Computational Linguistics},
year = {2016},
publisher = {Annual Meeting of the Association for Computational Linguistics},
year = {2016}
}
@article{Hassan2018AchievingHP,
author = {Hany Hassan and
......@@ -4498,19 +4467,19 @@ author = {Yoshua Bengio and
Lidia S. Chao},
title = {Learning Deep Transformer Models for Machine Translation},
pages = {1810--1822},
publisher = {Association for Computational Linguistics},
publisher = {Annual Meeting of the Association for Computational Linguistics},
year = {2019}
}
@article{Li2020NeuralMT,
@inproceedings{Li2020NeuralMT,
author = {Yanyang Li and
Qiang Wang and
Tong Xiao and
Tongran Liu and
Jingbo Zhu},
title = {Neural Machine Translation with Joint Representation},
journal = {CoRR},
volume = {abs/2002.06546},
year = {2020},
pages = {8285--8292},
publisher = {AAAI Conference on Artificial Intelligence},
year = {2020}
}
@article{HochreiterLong,
author = {Hochreiter, Sepp and Schmidhuber, Jürgen},
......@@ -4519,7 +4488,7 @@ author = {Yoshua Bengio and
pages = {1735-80},
title = {Long Short-term Memory},
volume = {9},
journal = {Neural computation},
journal = {Neural Computation}
}
@inproceedings{Cho2014Learning,
author = {Kyunghyun Cho and
......@@ -4531,24 +4500,18 @@ author = {Yoshua Bengio and
Yoshua Bengio},
title = {Learning Phrase Representations using {RNN} Encoder-Decoder for Statistical
Machine Translation},
publisher = {Proceedings of the 2014 Conference on Empirical Methods in Natural
Language Processing, {EMNLP} 2014, October 25-29, 2014, Doha, Qatar,
{A} meeting of SIGDAT, a Special Interest Group of the {ACL}},
publisher = {Annual Meeting of the Association for Computational Linguistics},
pages = {1724--1734},
//publisher = {{ACL}},
year = {2014},
year = {2014}
}
@inproceedings{pmlr-v9-glorot10a,
author = {Xavier Glorot and
Yoshua Bengio},
title = {Understanding the difficulty of training deep feedforward neural networks},
publisher = {Proceedings of the Thirteenth International Conference on Artificial
Intelligence and Statistics, {AISTATS} 2010, Chia Laguna Resort, Sardinia,
Italy, May 13-15, 2010},
publisher = {International Conference on Artificial Intelligence and Statistics},
volume = {9},
pages = {249--256},
//publisher = {JMLR.org},
year = {2010},
year = {2010}
}
@inproceedings{xiao2017fast,
author = {Tong Xiao and
......@@ -4556,12 +4519,9 @@ author = {Yoshua Bengio and
Tongran Liu and
Chunliang Zhang},
title = {Fast Parallel Training of Neural Language Models},
publisher = {Proceedings of the Twenty-Sixth International Joint Conference on
Artificial Intelligence, {IJCAI} 2017, Melbourne, Australia, August
19-25, 2017},
publisher = {International Joint Conference on Artificial Intelligence},
pages = {4193--4199},
//publisher = {ijcai.org},
year = {2017},
year = {2017}
}
@inproceedings{Gu2017NonAutoregressiveNM,
author = {Jiatao Gu and
......@@ -4571,7 +4531,7 @@ author = {Yoshua Bengio and
Richard Socher},
title = {Non-Autoregressive Neural Machine Translation},
publisher = {International Conference on Learning Representations},
year = {2018},
year = {2018}
}
@inproceedings{li-etal-2018-simple,
author = {Yanyang Li and
......@@ -4581,12 +4541,9 @@ author = {Yoshua Bengio and
Changming Xu and
Jingbo Zhu},
title = {A Simple and Effective Approach to Coverage-Aware Neural Machine Translation},
publisher = {Proceedings of the 56th Annual Meeting of the Association for Computational
Linguistics, {ACL} 2018, Melbourne, Australia, July 15-20, 2018, Volume
2: Short Papers},
publisher = {Annual Meeting of the Association for Computational Linguistics},
pages = {292--297},
//publisher = {Association for Computational Linguistics},
year = {2018},
year = {2018}
}
@inproceedings{TuModeling,
author = {Zhaopeng Tu and
......@@ -4595,11 +4552,8 @@ author = {Yoshua Bengio and
Xiaohua Liu and
Hang Li},
title = {Modeling Coverage for Neural Machine Translation},
publisher = {Proceedings of the 54th Annual Meeting of the Association for Computational
Linguistics, {ACL} 2016, August 7-12, 2016, Berlin, Germany, Volume
1: Long Papers},
//publisher = {The Association for Computer Linguistics},
year = {2016},
publisher = {Annual Meeting of the Association for Computational Linguistics},
year = {2016}
}
@inproceedings{DBLP:journals/corr/SennrichFCBHHJL17,
author = {Rico Sennrich and
......@@ -4614,23 +4568,17 @@ author = {Yoshua Bengio and
Jozef Mokry and
Maria Nadejde},
title = {Nematus: a Toolkit for Neural Machine Translation},
publisher = {Proceedings of the 15th Conference of the European Chapter of the
Association for Computational Linguistics, {EACL} 2017, Valencia,
Spain, April 3-7, 2017, Software Demonstrations},
publisher = {European Association of Computational Linguistics},
pages = {65--68},
//publisher = {Association for Computational Linguistics},
year = {2017},
year = {2017}
}
@inproceedings{DBLP:journals/corr/abs-1905-13324,
author = {Biao Zhang and
Rico Sennrich},
title = {A Lightweight Recurrent Network for Sequence Modeling},
publisher = {Proceedings of the 57th Conference of the Association for Computational
Linguistics, {ACL} 2019, Florence, Italy, July 28- August 2, 2019,
Volume 1: Long Papers},
publisher = {Annual Meeting of the Association for Computational Linguistics},
pages = {1538--1548},
//publisher = {Association for Computational Linguistics},
year = {2019},
year = {2019}
}
@article{Lei2017TrainingRA,
author = {Tao Lei and
......@@ -4639,7 +4587,7 @@ author = {Yoshua Bengio and
title = {Training RNNs as Fast as CNNs},
journal = {CoRR},
volume = {abs/1709.02755},
year = {2017},
year = {2017}
}
@inproceedings{Zhang2018SimplifyingNM,
author = {Biao Zhang and
......@@ -4649,22 +4597,18 @@ author = {Yoshua Bengio and
Huiji Zhang},
title = {Simplifying Neural Machine Translation with Addition-Subtraction Twin-Gated
Recurrent Networks},
publisher = {Proceedings of the 2018 Conference on Empirical Methods in Natural
Language Processing, Brussels, Belgium, October 31 - November 4, 2018},
publisher = {Conference on Empirical Methods in Natural Language Processing},
pages = {4273--4283},
//publisher = {Association for Computational Linguistics},
year = {2018},
year = {2018}
}
@inproceedings{Liu_2019_CVPR,
author = {Shikun Liu and
Edward Johns and
Andrew J. Davison},
title = {End-To-End Multi-Task Learning With Attention},
publisher = {{IEEE} Conference on Computer Vision and Pattern Recognition, {CVPR}
2019, Long Beach, CA, USA, June 16-20, 2019},
publisher = {IEEE Conference on Computer Vision and Pattern Recognition},
pages = {1871--1880},
//publisher = {Computer Vision Foundation / {IEEE}},
year = {2019},
year = {2019}
}
@inproceedings{DBLP:journals/corr/abs-1811-00498,
author = {Ra{\'{u}}l V{\'{a}}zquez and
......@@ -4672,11 +4616,9 @@ author = {Yoshua Bengio and
J{\"{o}}rg Tiedemann and
Mathias Creutz},
title = {Multilingual {NMT} with a Language-Independent Attention Bridge},
publisher = {Proceedings of the 4th Workshop on Representation Learning for NLP,
RepL4NLP@ACL 2019, Florence, Italy, August 2, 2019},
publisher = {Annual Meeting of the Association for Computational Linguistics},
pages = {33--39},
//publisher = {Association for Computational Linguistics},
year = {2019},
year = {2019}
}
@inproceedings{MoradiInterrogating,
author = {Pooya Moradi and
......@@ -4684,11 +4626,9 @@ author = {Yoshua Bengio and
Anoop Sarkar},
title = {Interrogating the Explanatory Power of Attention in Neural Machine
Translation},
publisher = {Proceedings of the 3rd Workshop on Neural Generation and Translation@EMNLP-IJCNLP
2019, Hong Kong, November 4, 2019},
publisher = {Conference on Empirical Methods in Natural Language Processing},
pages = {221--230},
//publisher = {Association for Computational Linguistics},
year = {2019},
year = {2019}
}
@inproceedings{WangNeural,
author = {Xing Wang and
......@@ -4698,11 +4638,9 @@ author = {Yoshua Bengio and
Deyi Xiong and
Min Zhang},
title = {Neural Machine Translation Advised by Statistical Machine Translation},
publisher = {Proceedings of the Thirty-First {AAAI} Conference on Artificial Intelligence,
February 4-9, 2017, San Francisco, California, {USA}},
publisher = {AAAI Conference on Artificial Intelligence},
pages = {3330--3336},
//publisher = {{AAAI} Press},
year = {2017},
year = {2017}
}
@inproceedings{Xiao2019SharingAW,
author = {Tong Xiao and
......@@ -4711,12 +4649,9 @@ author = {Yoshua Bengio and
Zhengtao Yu and
Tongran Liu},
title = {Sharing Attention Weights for Fast Transformer},
publisher = {Proceedings of the Twenty-Eighth International Joint Conference on
Artificial Intelligence, {IJCAI} 2019, Macao, China, August 10-16,
2019},
publisher = {International Joint Conference on Artificial Intelligence},
pages = {5292--5298},
//publisher = {ijcai.org},
year = {2019},
year = {2019}
}
@inproceedings{Yang2017TowardsBH,
author = {Baosong Yang and
......@@ -4726,36 +4661,27 @@ author = {Yoshua Bengio and
Jingbo Zhu},
title = {Towards Bidirectional Hierarchical Representations for Attention-based
Neural Machine Translation},
publisher = {Proceedings of the 2017 Conference on Empirical Methods in Natural
Language Processing, {EMNLP} 2017, Copenhagen, Denmark, September
9-11, 2017},
publisher = {Conference on Empirical Methods in Natural Language Processing},
pages = {1432--1441},
//publisher = {Association for Computational Linguistics},
year = {2017},
year = {2017}
}
@inproceedings{Wang2019TreeTI,
author = {Yau-Shian Wang and
Hung-yi Lee and
Yun-Nung Chen},
title = {Tree Transformer: Integrating Tree Structures into Self-Attention},
publisher = {Proceedings of the 2019 Conference on Empirical Methods in Natural
Language Processing and the 9th International Joint Conference on
Natural Language Processing, {EMNLP-IJCNLP} 2019, Hong Kong, China,
November 3-7, 2019},
//publisher = {Association for Computational Linguistics},
publisher = {Conference on Empirical Methods in Natural Language Processing},
pages = {1061--1070},
year = {2019},
year = {2019}
}
@inproceedings{DBLP:journals/corr/abs-1809-01854,
author = {Jetic Gu and
Hassan S. Shavarani and
Anoop Sarkar},
title = {Top-down Tree Structured Decoding with Syntactic Connections for Neural Machine Translation and Parsing},
publisher = {Proceedings of the 2018 Conference on Empirical Methods in Natural
Language Processing, Brussels, Belgium, October 31 - November 4, 2018},
publisher = {Conference on Empirical Methods in Natural Language Processing},
pages = {401--413},
//publisher = {Association for Computational Linguistics},
year = {2018},
year = {2018}
}
@inproceedings{DBLP:journals/corr/abs-1808-09374,
author = {Xinyi Wang and
......@@ -4763,11 +4689,9 @@ author = {Yoshua Bengio and
Pengcheng Yin and
Graham Neubig},
title = {A Tree-based Decoder for Neural Machine Translation},
publisher = {Proceedings of the 2018 Conference on Empirical Methods in Natural
Language Processing, Brussels, Belgium, October 31 - November 4, 2018},
publisher = {Conference on Empirical Methods in Natural Language Processing},
pages = {4772--4777},
//publisher = {Association for Computational Linguistics},
year = {2018},
year = {2018}
}
@article{DBLP:journals/corr/ZhangZ16c,
author = {Jiajun Zhang and
......@@ -4775,7 +4699,7 @@ author = {Yoshua Bengio and
title = {Bridging Neural Machine Translation and Bilingual Dictionaries},
journal = {CoRR},
volume = {abs/1610.07272},
year = {2016},
year = {2016}
}
@article{Dai2019TransformerXLAL,
author = {Zihang Dai and
......@@ -4787,7 +4711,7 @@ author = {Yoshua Bengio and
title = {Transformer-XL: Attentive Language Models Beyond a Fixed-Length Context},
journal = {CoRR},
volume = {abs/1901.02860},
year = {2019},
year = {2019}
}
@inproceedings{li-etal-2019-word,
author = {Xintong Li and
......@@ -4796,12 +4720,9 @@ author = {Yoshua Bengio and
Max Meng and
Shuming Shi},
title = {On the Word Alignment from Neural Machine Translation},
publisher = {Proceedings of the 57th Conference of the Association for Computational
Linguistics, {ACL} 2019, Florence, Italy, July 28- August 2, 2019,
Volume 1: Long Papers},
publisher = {Annual Meeting of the Association for Computational Linguistics},
pages = {1293--1303},
//publisher = {Association for Computational Linguistics},
year = {2019},
year = {2019}
}
@inproceedings{Werlen2018DocumentLevelNM,
......@@ -4811,11 +4732,9 @@ author = {Yoshua Bengio and
James Henderson},
title = {Document-Level Neural Machine Translation with Hierarchical Attention
Networks},
publisher = {Proceedings of the 2018 Conference on Empirical Methods in Natural
Language Processing, Brussels, Belgium, October 31 - November 4, 2018},
publisher = {Conference on Empirical Methods in Natural Language Processing},
pages = {2947--2954},
//publisher = {Association for Computational Linguistics},
year = {2018},
year = {2018}
}
@inproceedings{DBLP:journals/corr/abs-1805-10163,
author = {Elena Voita and
......@@ -4823,12 +4742,9 @@ author = {Yoshua Bengio and
Rico Sennrich and
Ivan Titov},
title = {Context-Aware Neural Machine Translation Learns Anaphora Resolution},
publisher = {Proceedings of the 56th Annual Meeting of the Association for Computational
Linguistics, {ACL} 2018, Melbourne, Australia, July 15-20, 2018, Volume
1: Long Papers},
publisher = {Annual Meeting of the Association for Computational Linguistics},
pages = {1264--1274},
//publisher = {Association for Computational Linguistics},
year = {2018},
year = {2018}
}
@article{DBLP:journals/corr/abs-1906-00532,
author = {Aishwarya Bhandare and
......@@ -4842,7 +4758,7 @@ author = {Yoshua Bengio and
Translation Model},
journal = {CoRR},
volume = {abs/1906.00532},
year = {2019},
year = {2019}
}
@inproceedings{Zhang2018SpeedingUN,
......@@ -4852,22 +4768,18 @@ author = {Yoshua Bengio and
Lei Shen and
Qun Liu},
title = {Speeding Up Neural Machine Translation Decoding by Cube Pruning},
publisher = {Proceedings of the 2018 Conference on Empirical Methods in Natural
Language Processing, Brussels, Belgium, October 31 - November 4, 2018},
publisher = {Conference on Empirical Methods in Natural Language Processing},
pages = {4284--4294},
//publisher = {Association for Computational Linguistics},
year = {2018},
year = {2018}
}
@inproceedings{DBLP:journals/corr/SeeLM16,
author = {Abigail See and
Minh-Thang Luong and
Christopher D. Manning},
title = {Compression of Neural Machine Translation Models via Pruning},
publisher = {Proceedings of the 20th {SIGNLL} Conference on Computational Natural
Language Learning, CoNLL 2016, Berlin, Germany, August 11-12, 2016},
publisher = {International Conference on Computational Linguistics},
pages = {291--301},
//publisher = {{ACL}},
year = {2016},
year = {2016}
}
@inproceedings{DBLP:journals/corr/ChenLCL17,
author = {Yun Chen and
......@@ -4875,12 +4787,9 @@ author = {Yoshua Bengio and
Yong Cheng and
Victor O. K. Li},
title = {A Teacher-Student Framework for Zero-Resource Neural Machine Translation},
publisher = {Proceedings of the 55th Annual Meeting of the Association for Computational
Linguistics, {ACL} 2017, Vancouver, Canada, July 30 - August 4, Volume
1: Long Papers},
pages = {1925--1935},
//publisher = {Association for Computational Linguistics},
year = {2017},
publisher = {Annual Meeting of the Association for Computational Linguistics},
year = {2017}
}
@article{Hinton2015Distilling,
author = {Geoffrey E. Hinton and
......@@ -4889,13 +4798,13 @@ author = {Yoshua Bengio and
title = {Distilling the Knowledge in a Neural Network},
journal = {CoRR},
volume = {abs/1503.02531},
year = {2015},
year = {2015}
}
@inproceedings{Ott2018ScalingNM,
title={Scaling Neural Machine Translation},
author={Myle Ott and Sergey Edunov and David Grangier and M. Auli},
publisher={Workshop on Machine Translation},
publisher={Annual Meeting of the Association for Computational Linguistics},
year={2018}
}
@inproceedings{Lin2020TowardsF8,
......@@ -4915,7 +4824,7 @@ author = {Yoshua Bengio and
Alexander M. Rush},
title = {Sequence-Level Knowledge Distillation},
pages = {1317--1327},
publisher = {The Association for Computational Linguistics},
publisher = {Annual Meeting of the Association for Computational Linguistics},
year = {2016}
}
@article{Akaike1969autoregressive,
......@@ -4946,13 +4855,13 @@ author = {Yoshua Bengio and
title = {The Best of Both Worlds: Combining Recent Advances in Neural Machine
Translation},
pages = {76--86},
publisher = {Association for Computational Linguistics},
publisher = {Annual Meeting of the Association for Computational Linguistics},
year = {2018}
}
@inproceedings{He2018LayerWiseCB,
title={Layer-Wise Coordination between Encoder and Decoder for Neural Machine Translation},
author={Tianyu He and X. Tan and Yingce Xia and D. He and T. Qin and Zhibo Chen and T. Liu},
publisher={Conference and Workshop on Neural Information Processing Systems},
publisher={Conference on Neural Information Processing Systems},
year={2018}
}
@inproceedings{cho-etal-2014-properties,
......@@ -4962,7 +4871,7 @@ author = {Yoshua Bengio and
Yoshua Bengio},
title = {On the Properties of Neural Machine Translation: Encoder-Decoder Approaches},
pages = {103--111},
publisher = {Association for Computational Linguistics},
publisher = {Annual Meeting of the Association for Computational Linguistics},
year = {2014}
}
......@@ -4973,7 +4882,7 @@ author = {Yoshua Bengio and
Yoshua Bengio},
title = {On Using Very Large Target Vocabulary for Neural Machine Translation},
pages = {1--10},
publisher = {The Association for Computer Linguistics},
publisher = {Annual Meeting of the Association for Computational Linguistics},
year = {2015}
}
......@@ -4982,8 +4891,7 @@ author = {Yoshua Bengio and
Hieu Pham and
Christopher D. Manning},
title = {Effective Approaches to Attention-based Neural Machine Translation},
publisher = {Conference on Empirical Methods in Natural
Language Processing},
publisher = {Conference on Empirical Methods in Natural Language Processing},
pages = {1412--1421},
year = {2015}
}
......@@ -4994,7 +4902,7 @@ author = {Yoshua Bengio and
Haifeng Wang},
title = {Improved Neural Machine Translation with {SMT} Features},
pages = {151--157},
publisher = {the Association for the Advance of Artificial Intelligence},
publisher = {AAAI Conference on Artificial Intelligence},
year = {2016}
}
@inproceedings{zhang-etal-2017-prior,
......@@ -5005,7 +4913,7 @@ author = {Yoshua Bengio and
Xu, Jingfang and
Sun, Maosong},
year = {2017},
publisher = {Association for Computational Linguistics},
publisher = {Annual Meeting of the Association for Computational Linguistics},
pages = {1514--1523},
}
......@@ -5021,7 +4929,7 @@ author = {Yoshua Bengio and
title = {Bilingual Dictionary Based Neural Machine Translation without Using
Parallel Sentences},
pages = {1570--1579},
publisher = {Association for Computational Linguistics},
publisher = {Annual Meeting of the Association for Computational Linguistics},
year = {2020}
}
......@@ -5030,7 +4938,7 @@ author = {Yoshua Bengio and
Deyi Xiong},
title = {Encoding Gated Translation Memory into Neural Machine Translation},
pages = {3042--3047},
publisher = {Association for Computational Linguistics},
publisher = {Annual Meeting of the Association for Computational Linguistics},
year = {2018}
}
@inproceedings{yang-etal-2016-hierarchical,
......@@ -5042,7 +4950,7 @@ author = {Yoshua Bengio and
Eduard H. Hovy},
title = {Hierarchical Attention Networks for Document Classification},
pages = {1480--1489},
publisher = {The Association for Computational Linguistics},
publisher = {Annual Meeting of the Association for Computational Linguistics},
year = {2016}
}
%%%%% chapter 10------------------------------------------------------
......@@ -5056,7 +4964,7 @@ author = {Yoshua Bengio and
Douwe Kiela},
title = {Code-Switched Named Entity Recognition with Embedding Attention},
pages = {154--158},
publisher = {Association for Computational Linguistics},
publisher = {Annual Meeting of the Association for Computational Linguistics},
year = {2018}
}
......@@ -5069,7 +4977,7 @@ author = {Yoshua Bengio and
title = {Leveraging Linguistic Structures for Named Entity Recognition with
Bidirectional Recursive Neural Networks},
pages = {2664--2669},
publisher = {Association for Computational Linguistics},
publisher = {Annual Meeting of the Association for Computational Linguistics},
year = {2017}
}
......@@ -5077,7 +4985,7 @@ author = {Yoshua Bengio and
author = {Xuezhe Ma and
Eduard H. Hovy},
title = {End-to-end Sequence Labeling via Bi-directional LSTM-CNNs-CRF},
publisher = {The Association for Computer Linguistics},
publisher = {Annual Meeting of the Association for Computational Linguistics},
year = {2016}
}
......@@ -5088,7 +4996,7 @@ author = {Yoshua Bengio and
Andrew McCallum},
title = {Fast and Accurate Entity Recognition with Iterated Dilated Convolutions},
pages = {2670--2680},
publisher = {Association for Computational Linguistics},
publisher = {Annual Meeting of the Association for Computational Linguistics},
year = {2017}
}
......@@ -5107,26 +5015,21 @@ author = {Yoshua Bengio and
year = {2017}
}
@article{DBLP:journals/jmlr/CollobertWBKKK11,
author = {Ronan Collobert and
Jason Weston and
L{\'{e}}on Bottou and
Michael Karlen and
Koray Kavukcuoglu and
Pavel P. Kuksa},
title = {Natural Language Processing (Almost) from Scratch},
journal = {J. Mach. Learn. Res.},
volume = {12},
pages = {2493--2537},
year = {2011}
@article{2011Natural,
title={Natural Language Processing (almost) from Scratch},
author={ Collobert, Ronan and Weston, Jason and Bottou, Léon and Karlen, Michael and Kavukcuoglu, Koray and Kuksa, Pavel },
journal={Journal of Machine Learning Research},
volume={12},
number={1},
pages={2493-2537},
year={2011},
}
@inproceedings{DBLP:conf/acl/NguyenG15,
author = {Thien Huu Nguyen and
Ralph Grishman},
title = {Event Detection and Domain Adaptation with Convolutional Neural Networks},
pages = {365--371},
publisher = {The Association for Computer Linguistics},
publisher = {Annual Meeting of the Association for Computational Linguistics},
year = {2015}
}
......@@ -5137,7 +5040,7 @@ author = {Yoshua Bengio and
Jun Zhao},
title = {Recurrent Convolutional Neural Networks for Text Classification},
pages = {2267--2273},
publisher = {the Association for the Advance of Artificial Intelligence},
publisher = {AAAI Conference on Artificial Intelligence},
year = {2015}
}
......@@ -5149,7 +5052,7 @@ author = {Yoshua Bengio and
Jun Zhao},
title = {Event Extraction via Dynamic Multi-Pooling Convolutional Neural Networks},
pages = {167--176},
publisher = {The Association for Computer Linguistics},
publisher = {Annual Meeting of the Association for Computational Linguistics},
year = {2015}
}
......@@ -5159,7 +5062,7 @@ author = {Yoshua Bengio and
Tommi S. Jaakkola},
title = {Molding CNNs for text: non-linear, non-consecutive convolutions},
pages = {1565--1575},
publisher = {The Association for Computational Linguistics},
publisher = {Annual Meeting of the Association for Computational Linguistics},
year = {2015}
}
......@@ -5169,7 +5072,7 @@ author = {Yoshua Bengio and
title = {Effective Use of Word Order for Text Categorization with Convolutional
Neural Networks},
pages = {103--112},
publisher = {The Association for Computational Linguistics},
publisher = {Annual Meeting of the Association for Computational Linguistics},
year = {2015}
}
......@@ -5178,14 +5081,14 @@ author = {Yoshua Bengio and
Ralph Grishman},
title = {Relation Extraction: Perspective from Convolutional Neural Networks},
pages = {39--48},
publisher = {The Association for Computational Linguistics},
publisher = {Annual Meeting of the Association for Computational Linguistics},
year = {2015}
}
@article{StahlbergNeural,
title={Neural Machine Translation: A Review},
author={Felix Stahlberg},
journal={journal of artificial intelligence research},
journal={Journal of Artificial Intelligence Research},
year={2020},
volume={69},
pages={343-418}
......@@ -5211,7 +5114,7 @@ author = {Yoshua Bengio and
@article{Waibel1989PhonemeRU,
title={Phoneme recognition using time-delay neural networks},
author={Alexander H. Waibel and Toshiyuki Hanazawa and Geoffrey E. Hinton and K. Shikano and K. Lang},
journal={IEEE Trans. Acoust. Speech Signal Process.},
journal={IEEE Transactions on Acoustics, Speech, and Signal Processing},
year={1989},
volume={37},
pages={328-339}
......@@ -5226,7 +5129,7 @@ author = {Yoshua Bengio and
pages={541-551}
}
@ARTICLE{726791,
@article{726791,
author={Y. {Lecun} and L. {Bottou} and Y. {Bengio} and P. {Haffner}},
journal={Proceedings of the IEEE},
title={Gradient-based learning applied to document recognition},
......@@ -5234,7 +5137,6 @@ author = {Yoshua Bengio and
volume={86},
number={11},
pages={2278-2324},
//doi={10.1109/5.726791}
}
@inproceedings{DBLP:journals/corr/HeZRS15,
......@@ -5262,7 +5164,7 @@ author = {Yoshua Bengio and
@article{Girshick2015FastR,
title={Fast R-CNN},
author={Ross B. Girshick},
journal={2015 IEEE International Conference on Computer Vision (ICCV)},
journal={International Conference on Computer Vision},
year={2015},
pages={1440-1448}
}
......@@ -5279,7 +5181,7 @@ author = {Yoshua Bengio and
@inproceedings{Kalchbrenner2014ACN,
title={A Convolutional Neural Network for Modelling Sentences},
author={Nal Kalchbrenner and Edward Grefenstette and P. Blunsom},
booktitle={ACL},
publisher={Annual Meeting of the Association for Computational Linguistics},
pages={655--665},
year={2014}
}
......@@ -5287,7 +5189,7 @@ author = {Yoshua Bengio and
@inproceedings{Kim2014ConvolutionalNN,
title={Convolutional Neural Networks for Sentence Classification},
author={Yoon Kim},
booktitle={Proceedings of the 2014 Conference on Empirical Methods in Natural Language Processing},
publisher={Conference on Empirical Methods in Natural Language Processing},
pages = {1746--1751},
year={2014}
}
......@@ -5299,7 +5201,7 @@ author = {Yoshua Bengio and
Bowen Zhou and
Bing Xiang},
pages = {174--179},
booktitle={The Association for Computer Linguistics},
publisher={Annual Meeting of the Association for Computational Linguistics},
year={2015}
}
......@@ -5308,7 +5210,7 @@ author = {Yoshua Bengio and
author = {C{\'{\i}}cero Nogueira dos Santos and
Maira Gatti},
pages = {69--78},
publisher = {The Association for Computer Linguistics},
publisher = {Annual Meeting of the Association for Computational Linguistics},
year={2014}
}
......@@ -5318,7 +5220,7 @@ author = {Yoshua Bengio and
Angela Fan and
Michael Auli and
David Grangier},
booktitle={Proceedings of the 34th International Conference on Machine Learning},
publisher={International Conference on Machine Learning},
volume = {70},
pages = {933--941},
year={2017}
......@@ -5330,7 +5232,7 @@ author = {Yoshua Bengio and
Michael Auli and
David Grangier and
Yann N. Dauphin},
booktitle={The Association for Computer Linguistics},
publisher={Annual Meeting of the Association for Computational Linguistics},
pages = {123--135},
year={2017}
}
......@@ -5353,7 +5255,7 @@ author = {Yoshua Bengio and
author = {Lukasz Kaiser and
Aidan N. Gomez and
Fran{\c{c}}ois Chollet},
publisher = {OpenReview.net},
journal = {International Conference on Learning Representations},
year={2018},
}
......@@ -5364,7 +5266,7 @@ author = {Yoshua Bengio and
Yann N. Dauphin and
Michael Auli},
title = {Pay Less Attention with Lightweight and Dynamic Convolutions},
publisher = {7th International Conference on Learning Representations},
publisher = {International Conference on Learning Representations},
year = {2019},
}
......@@ -5421,7 +5323,7 @@ author = {Yoshua Bengio and
Shaoqing Ren and
Jian Sun},
title = {Deep Residual Learning for Image Recognition},
publisher = {{IEEE} Conference on Computer Vision and Pattern Recognition},
publisher = {IEEE Conference on Computer Vision and Pattern Recognition},
pages = {770--778},
year = {2016},
}
......@@ -5432,26 +5334,26 @@ author = {Yoshua Bengio and
Arthur Szlam and
Jason Weston and
Rob Fergus},
booktitle={Conference and Workshop on Neural Information Processing Systems},
publisher={Conference on Neural Information Processing Systems},
pages = {2440--2448},
year={2015}
}
@article{Islam2020HowMP,
title={How Much Position Information Do Convolutional Neural Networks Encode?},
author={Md. Amirul Islam and Sen Jia and Neil D. B. Bruce},
journal={ArXiv},
year={2020},
volume={abs/2001.08248}
@inproceedings{Islam2020HowMP,
author = {Md. Amirul Islam and
Sen Jia and
Neil D. B. Bruce},
title = {How much Position Information Do Convolutional Neural Networks Encode?},
publisher = {International Conference on Learning Representations},
year = {2020},
}
@inproceedings{Sutskever2013OnTI,
title={On the importance of initialization and momentum in deep learning},
author = {Ilya Sutskever and
James Martens and
George E. Dahl and
Geoffrey E. Hinton},
booktitle={International Conference on Machine Learning},
publisher = {International Conference on Machine Learning},
pages = {1139--1147},
year={2013}
}
......@@ -5459,7 +5361,7 @@ author = {Yoshua Bengio and
@article{Bengio2013AdvancesIO,
title={Advances in optimizing recurrent networks},
author={Yoshua Bengio and Nicolas Boulanger-Lewandowski and Razvan Pascanu},
journal={2013 IEEE International Conference on Acoustics, Speech and Signal Processing},
journal={IEEE Transactions on Acoustics, Speech, and Signal Processing},
year={2013},
pages={8624-8628}
}
......@@ -5476,7 +5378,7 @@ author = {Yoshua Bengio and
@article{Chollet2017XceptionDL,
title={Xception: Deep Learning with Depthwise Separable Convolutions},
author = {Fran{\c{c}}ois Chollet},
journal={2017 IEEE Conference on Computer Vision and Pattern Recognition (CVPR)},
journal={IEEE Conference on Computer Vision and Pattern Recognition},
year={2017},
pages={1800-1807}
}
......@@ -5512,7 +5414,7 @@ author = {Yoshua Bengio and
title={Rotation, Scaling and Deformation Invariant Scattering for Texture Discrimination},
author = {Laurent Sifre and
St{\'{e}}phane Mallat},
journal={2013 IEEE Conference on Computer Vision and Pattern Recognition},
journal={IEEE Conference on Computer Vision and Pattern Recognition},
year={2013},
pages={1233-1240}
}
......@@ -5520,7 +5422,7 @@ author = {Yoshua Bengio and
@article{Taigman2014DeepFaceCT,
title={DeepFace: Closing the Gap to Human-Level Performance in Face Verification},
author={Yaniv Taigman and Ming Yang and Marc'Aurelio Ranzato and Lior Wolf},
journal={2014 IEEE Conference on Computer Vision and Pattern Recognition},
journal={IEEE Conference on Computer Vision and Pattern Recognition},
year={2014},
pages={1701-1708}
}
......@@ -5533,7 +5435,7 @@ author = {Yoshua Bengio and
Mirk{\'{o}} Visontai and
Raziel Alvarez and
Carolina Parada},
booktitle={the International Speech Communication Association},
publisher={Conference of the International Speech Communication Association},
pages = {1136--1140},
year={2015}
}
......@@ -5546,7 +5448,7 @@ author = {Yoshua Bengio and
Dongdong Chen and
Lu Yuan and
Zicheng Liu},
publisher = {Institute of Electrical and Electronics Engineers},
journal = {IEEE Conference on Computer Vision and Pattern Recognition},
year={2020},
pages={11027-11036}
}
......@@ -5563,7 +5465,7 @@ author = {Yoshua Bengio and
Chloe Hillier and
Timothy P. Lillicrap},
title = {Compressive Transformers for Long-Range Sequence Modelling},
publisher = {OpenReview.net},
publisher = {International Conference on Learning Representations},
year = {2020}
}
......@@ -5597,7 +5499,7 @@ author = {Yoshua Bengio and
Yujun Lin and
Song Han},
title = {Lite Transformer with Long-Short Range Attention},
publisher = {OpenReview.net},
publisher = {International Conference on Learning Representations},
year = {2020}
}
......@@ -5610,7 +5512,7 @@ author = {Yoshua Bengio and
title = {Analyzing Multi-Head Self-Attention: Specialized Heads Do the Heavy
Lifting, the Rest Can Be Pruned},
pages = {5797--5808},
publisher = {Association for Computational Linguistics},
publisher = {Annual Meeting of the Association for Computational Linguistics},
year = {2019},
}
......@@ -5623,7 +5525,7 @@ author = {Yoshua Bengio and
Bowen Zhou and
Yoshua Bengio},
title = {A Structured Self-Attentive Sentence Embedding},
publisher = {5th International Conference on Learning Representations},
publisher = {International Conference on Learning Representations},
year = {2017},
}
@inproceedings{Shaw2018SelfAttentionWR,
......@@ -5631,8 +5533,8 @@ author = {Yoshua Bengio and
Jakob Uszkoreit and
Ashish Vaswani},
title = {Self-Attention with Relative Position Representations},
publisher = {Proceedings of the 2018 Conference of the North American Chapter of
the Association for Computational Linguistics: Human Language Technologies},
publisher = {Proceedings of the Human Language Technology Conference of
the North American Chapter of the Association for Computational Linguistics},
pages = {464--468},
year = {2018},
}
......@@ -5642,7 +5544,7 @@ author = {Yoshua Bengio and
Shaoqing Ren and
Jian Sun},
title = {Deep Residual Learning for Image Recognition},
publisher = {{IEEE} Conference on Computer Vision and Pattern Recognition},
publisher = {IEEE Conference on Computer Vision and Pattern Recognition},
pages = {770--778},
year = {2016},
}
......@@ -5661,7 +5563,7 @@ author = {Yoshua Bengio and
Jonathon Shlens and
Zbigniew Wojna},
title = {Rethinking the Inception Architecture for Computer Vision},
publisher = {{IEEE} Conference on Computer Vision and Pattern Recognition},
publisher = {IEEE Conference on Computer Vision and Pattern Recognition},
pages = {2818--2826},
year = {2016},
}
......@@ -5670,8 +5572,7 @@ author = {Yoshua Bengio and
Deyi Xiong and
Jinsong Su},
title = {Accelerating Neural Transformer via an Average Attention Network},
publisher = {Proceedings of the 56th Annual Meeting of the Association for Computational
Linguistics},
publisher = {Annual Meeting of the Association for Computational Linguistics},
pages = {1789--1798},
year = {2018},
}
......@@ -5691,7 +5592,7 @@ author = {Yoshua Bengio and
Yann N. Dauphin and
Michael Auli},
title = {Pay Less Attention with Lightweight and Dynamic Convolutions},
publisher = {7th International Conference on Learning Representations},
publisher = {International Conference on Learning Representations},
year = {2019},
}
......@@ -5704,7 +5605,7 @@ author = {Yoshua Bengio and
Ruslan Salakhutdinov},
title = {Transformer-XL: Attentive Language Models beyond a Fixed-Length Context},
pages = {2978--2988},
publisher = {Association for Computational Linguistics},
publisher = {Annual Meeting of the Association for Computational Linguistics},
year = {2019}
}
@article{Liu2020LearningTE,
......@@ -5729,7 +5630,7 @@ author = {Yoshua Bengio and
Tong Zhang},
title = {Modeling Localness for Self-Attention Networks},
pages = {4449--4458},
publisher = {Association for Computational Linguistics},
publisher = {Annual Meeting of the Association for Computational Linguistics},
year = {2018}
}
@inproceedings{DBLP:journals/corr/abs-1904-03107,
......@@ -5740,7 +5641,7 @@ author = {Yoshua Bengio and
Zhaopeng Tu},
title = {Convolutional Self-Attention Networks},
pages = {4040--4045},
publisher = {Association for Computational Linguistics},
publisher = {Annual Meeting of the Association for Computational Linguistics},
year = {2019},
}
@article{Wang2018MultilayerRF,
......@@ -5759,7 +5660,7 @@ author = {Yoshua Bengio and
title = {Training Deeper Neural Machine Translation Models with Transparent
Attention},
pages = {3028--3033},
publisher = {Association for Computational Linguistics},
publisher = {Annual Meeting of the Association for Computational Linguistics},
year = {2018}
}
@inproceedings{Dou2018ExploitingDR,
......@@ -5770,7 +5671,7 @@ author = {Yoshua Bengio and
Tong Zhang},
title = {Exploiting Deep Representations for Neural Machine Translation},
pages = {4253--4262},
publisher = {Association for Computational Linguistics},
publisher = {Annual Meeting of the Association for Computational Linguistics},
year = {2018}
}
@inproceedings{Wang2019ExploitingSC,
......@@ -5789,13 +5690,13 @@ author = {Yoshua Bengio and
Tong Zhang},
title = {Dynamic Layer Aggregation for Neural Machine Translation with Routing-by-Agreement},
pages = {86--93},
publisher = {the Association for the Advance of Artificial Intelligence},
publisher = {AAAI Conference on Artificial Intelligence},
year = {2019}
}
@inproceedings{Wei2020MultiscaleCD,
title={Multiscale Collaborative Deep Models for Neural Machine Translation},
author={Xiangpeng Wei and Heng Yu and Yue Hu and Yue Zhang and Rongxiang Weng and Weihua Luo},
booktitle={Annual Meeting of the Association for Computational Linguistics},
publisher={Annual Meeting of the Association for Computational Linguistics},
year={2020}
}
......@@ -5824,7 +5725,7 @@ author = {Yoshua Bengio and
Lukasz Kaiser and
Anselm Levskaya},
title = {Reformer: The Efficient Transformer},
publisher = {OpenReview.net},
journal = {International Conference on Learning Representations},
year = {2020}
}
......@@ -5839,7 +5740,7 @@ author = {Yoshua Bengio and
@article{li2020shallow,
title={Shallow-to-Deep Training for Neural Machine Translation},
author={Li, Bei and Wang, Ziyang and Liu, Hui and Jiang, Yufan and Du, Quan and Xiao, Tong and Wang, Huizhen and Zhu, Jingbo},
publisher={Conference on Empirical Methods in Natural Language Processing},
journal={Conference on Empirical Methods in Natural Language Processing},
year={2020}
}
%%%%% chapter 12------------------------------------------------------
......@@ -6673,15 +6574,7 @@ author = {Yoshua Bengio and
publisher = {Annual Meeting of the Annual Meeting of the Association for Computational Linguistics},
year = {2019}
}
@inproceedings{DBLP:conf/naacl/MohiuddinJ19,
author = {Tasnim Mohiuddin and
Shafiq R. Joty},
title = {Revisiting Adversarial Autoencoder for Unsupervised Word Translation
with Cycle Consistency and Improved Training},
pages = {3857--3867},
publisher = {Annual Meeting of the Annual Meeting of the Association for Computational Linguistics},
year = {2019}
}
@article{DBLP:journals/corr/abs-1811-01124,
author = {Jean Alaux and
Edouard Grave and
......@@ -6896,394 +6789,6 @@ author = {Yoshua Bengio and
publisher = {Annual Meeting of the Annual Meeting of the Association for Computational Linguistics},
year = {2019}
}
@article{2019ADabre,
title={A Survey of Multilingual Neural Machine Translation},
author={Dabre, Raj and Chu, Chenhui and Kunchukuttan, Anoop },
year={2019},
}
@inproceedings{DBLP:conf/naacl/ZophK16,
author = {Barret Zoph and
Kevin Knight},
title = {Multi-Source Neural Translation},
pages = {30--34},
publisher = {Annual Conference of the North American Chapter of the Association for Computational Linguistics},
year = {2016}
}
@inproceedings{DBLP:conf/naacl/FiratCB16,
author = {Orhan Firat and
Kyunghyun Cho and
Yoshua Bengio},
title = {Multi-Way, Multilingual Neural Machine Translation with a Shared Attention
Mechanism},
pages = {866--875},
publisher = {Annual Meeting of the Association for Computational Linguistics},
year = {2016}
}
@article{DBLP:journals/tacl/JohnsonSLKWCTVW17,
author = {Melvin Johnson and
Mike Schuster and
Quoc V. Le and
Maxim Krikun and
Yonghui Wu and
Zhifeng Chen and
Nikhil Thorat and
Fernanda B. Vi{\'{e}}gas and
Martin Wattenberg and
Greg Corrado and
Macduff Hughes and
Jeffrey Dean},
title = {Google's Multilingual Neural Machine Translation System: Enabling
Zero-Shot Translation},
journal = {Trans. Assoc. Comput. Linguistics},
volume = {5},
pages = {339--351},
year = {2017}
}
@inproceedings{DBLP:conf/emnlp/KimPPKN19,
author = {Yunsu Kim and
Petre Petrov and
Pavel Petrushkov and
Shahram Khadivi and
Hermann Ney},
title = {Pivot-based Transfer Learning for Neural Machine Translation between
Non-English Languages},
pages = {866--876},
publisher = {Association for Computational Linguistics},
year = {2019}
}
@inproceedings{DBLP:conf/acl/ChenLCL17,
author = {Yun Chen and
Yang Liu and
Yong Cheng and
Victor O. K. Li},
title = {A Teacher-Student Framework for Zero-Resource Neural Machine Translation},
pages = {1925--1935},
publisher = {Association for Computational Linguistics},
year = {2017}
}
@article{DBLP:journals/mt/WuW07,
author = {Hua Wu and
Haifeng Wang},
title = {Pivot language approach for phrase-based statistical machine translation},
journal = {Mach. Transl.},
volume = {21},
number = {3},
pages = {165--181},
year = {2007}
}
@article{Farsi2010somayeh,
author = {Somayeh Bakhshaei and Shahram Khadivi and Noushin Riahi },
title = {Farsi-german statistical machine translation through bridge language},
publisher = {International Telecommunications Symposium},
pages = {165--181},
year = {2010}
}
@inproceedings{DBLP:conf/acl/ZahabiBK13,
author = {Samira Tofighi Zahabi and
Somayeh Bakhshaei and
Shahram Khadivi},
title = {Using Context Vectors in Improving a Machine Translation System with
Bridge Language},
pages = {318--322},
publisher = {The Association for Computer Linguistics},
year = {2013}
}
@inproceedings{DBLP:conf/emnlp/ZhuHWZWZ14,
author = {Xiaoning Zhu and
Zhongjun He and
Hua Wu and
Conghui Zhu and
Haifeng Wang and
Tiejun Zhao},
title = {Improving Pivot-Based Statistical Machine Translation by Pivoting
the Co-occurrence Count of Phrase Pairs},
pages = {1665--1675},
publisher = {{ACL}},
year = {2014}
}
@inproceedings{DBLP:conf/acl/MiuraNSTN15,
author = {Akiva Miura and
Graham Neubig and
Sakriani Sakti and
Tomoki Toda and
Satoshi Nakamura},
title = {Improving Pivot Translation by Remembering the Pivot},
pages = {573--577},
publisher = {The Association for Computer Linguistics},
year = {2015}
}
@inproceedings{DBLP:conf/acl/CohnL07,
author = {Trevor Cohn and
Mirella Lapata},
title = {Machine Translation by Triangulation: Making Effective Use of Multi-Parallel
Corpora},
publisher = {The Association for Computational Linguistics},
year = {2007}
}
@article{DBLP:journals/mt/WuW07,
author = {Hua Wu and
Haifeng Wang},
title = {Pivot language approach for phrase-based statistical machine translation},
journal = {Mach. Transl.},
volume = {21},
number = {3},
pages = {165--181},
year = {2007}
}
@inproceedings{DBLP:conf/acl/WuW09,
author = {Hua Wu and
Haifeng Wang},
title = {Revisiting Pivot Language Approach for Machine Translation},
pages = {154--162},
publisher = {The Association for Computer Linguistics},
year = {2009}
}
@article{DBLP:journals/corr/ChengLYSX16,
author = {Yong Cheng and
Yang Liu and
Qian Yang and
Maosong Sun and
Wei Xu},
title = {Neural Machine Translation with Pivot Languages},
journal = {CoRR},
volume = {abs/1611.04928},
year = {2016}
}
@inproceedings{DBLP:conf/interspeech/KauersVFW02,
author = {Manuel Kauers and
Stephan Vogel and
Christian F{\"{u}}gen and
Alex Waibel},
title = {Interlingua based statistical machine translation},
publisher = {International Symposium on Computer Architecture},
year = {2002}
}
@inproceedings{de2006catalan,
title={Catalan-English statistical machine translation without parallel corpus: bridging through Spanish},
author={De Gispert, Adri{\`a} and Marino, Jose B},
booktitle={Proc. of 5th International Conference on Language Resources and Evaluation (LREC)},
pages={65--68},
year={2006}
}
@inproceedings{DBLP:conf/naacl/UtiyamaI07,
author = {Masao Utiyama and
Hitoshi Isahara},
title = {A Comparison of Pivot Methods for Phrase-Based Statistical Machine
Translation},
pages = {484--491},
publisher = {The Association for Computational Linguistics},
year = {2007}
}
@inproceedings{DBLP:conf/ijcnlp/Costa-JussaHB11,
author = {Marta R. Costa-juss{\`{a}} and
Carlos A. Henr{\'{\i}}quez Q. and
Rafael E. Banchs},
title = {Enhancing scarce-resource language translation through pivot combinations},
pages = {1361--1365},
publisher = {The Association for Computer Linguistics},
year = {2011}
}
@article{DBLP:journals/corr/HintonVD15,
author = {Geoffrey E. Hinton and
Oriol Vinyals and
Jeffrey Dean},
title = {Distilling the Knowledge in a Neural Network},
journal = {CoRR},
volume = {abs/1503.02531},
year = {2015}
}
@article{gu2018meta,
title={Meta-learning for low-resource neural machine translation},
author={Gu, Jiatao and Wang, Yong and Chen, Yun and Cho, Kyunghyun and Li, Victor OK},
journal={arXiv preprint arXiv:1808.08437},
year={2018}
}
@inproceedings{DBLP:conf/naacl/GuHDL18,
author = {Jiatao Gu and
Hany Hassan and
Jacob Devlin and
Victor O. K. Li},
title = {Universal Neural Machine Translation for Extremely Low Resource Languages},
pages = {344--354},
publisher = {Association for Computational Linguistics},
year = {2018}
}
@inproceedings{DBLP:conf/icml/FinnAL17,
author = {Chelsea Finn and
Pieter Abbeel and
Sergey Levine},
title = {Model-Agnostic Meta-Learning for Fast Adaptation of Deep Networks},
series = {Proceedings of Machine Learning Research},
volume = {70},
pages = {1126--1135},
publisher = {International Conference on Machine Learning},
year = {2017}
}
@inproceedings{DBLP:conf/acl/DongWHYW15,
author = {Daxiang Dong and
Hua Wu and
Wei He and
Dianhai Yu and
Haifeng Wang},
title = {Multi-Task Learning for Multiple Language Translation},
pages = {1723--1732},
publisher = {The Association for Computer Linguistics},
year = {2015}
}
@article{DBLP:journals/tacl/LeeCH17,
author = {Jason Lee and
Kyunghyun Cho and
Thomas Hofmann},
title = {Fully Character-Level Neural Machine Translation without Explicit
Segmentation},
journal = {Trans. Assoc. Comput. Linguistics},
volume = {5},
pages = {365--378},
year = {2017}
}
@inproceedings{DBLP:conf/lrec/RiktersPK18,
author = {Matiss Rikters and
Marcis Pinnis and
Rihards Krislauks},
title = {Training and Adapting Multilingual {NMT} for Less-resourced and Morphologically
Rich Languages},
publisher = {European Language Resources Association},
year = {2018}
}
@article{DBLP:journals/tkde/PanY10,
author = {Sinno Jialin Pan and
Qiang Yang},
title = {A Survey on Transfer Learning},
journal = {{IEEE} Trans. Knowl. Data Eng.},
volume = {22},
number = {10},
pages = {1345--1359},
year = {2010}
}
@article{DBLP:journals/tacl/JohnsonSLKWCTVW17,
author = {Melvin Johnson and
Mike Schuster and
Quoc V. Le and
Maxim Krikun and
Yonghui Wu and
Zhifeng Chen and
Nikhil Thorat and
Fernanda B. Vi{\'{e}}gas and
Martin Wattenberg and
Greg Corrado and
Macduff Hughes and
Jeffrey Dean},
title = {Google's Multilingual Neural Machine Translation System: Enabling
Zero-Shot Translation},
journal = {Trans. Assoc. Comput. Linguistics},
volume = {5},
pages = {339--351},
year = {2017}
}
@book{2009Handbook,
title={Handbook Of Research On Machine Learning Applications and Trends: Algorithms, Methods and Techniques - 2 Volumes},
author={ Olivas, Emilio Soria and Guerrero, Jose David Martin and Sober, Marcelino Martinez and Benedito, Jose Rafael Magdalena and Lopez, Antonio Jose Serrano },
publisher={Information Science Reference - Imprint of: IGI Publishing},
year={2009},
}
@incollection{DBLP:books/crc/aggarwal14/Pan14,
author = {Sinno Jialin Pan},
title = {Transfer Learning},
booktitle = {Data Classification: Algorithms and Applications},
pages = {537--570},
publisher = {{CRC} Press},
year = {2014}
}
@inproceedings{DBLP:conf/iclr/TanRHQZL19,
author = {Xu Tan and
Yi Ren and
Di He and
Tao Qin and
Zhou Zhao and
Tie-Yan Liu},
title = {Multilingual Neural Machine Translation with Knowledge Distillation},
publisher = {OpenReview.net},
year = {2019}
}
@article{platanios2018contextual,
title={Contextual parameter generation for universal neural machine translation},
author={Platanios, Emmanouil Antonios and Sachan, Mrinmaya and Neubig, Graham and Mitchell, Tom},
journal={arXiv preprint arXiv:1808.08493},
year={2018}
}
@inproceedings{ji2020cross,
title={Cross-Lingual Pre-Training Based Transfer for Zero-Shot Neural Machine Translation},
author={Ji, Baijun and Zhang, Zhirui and Duan, Xiangyu and Zhang, Min and Chen, Boxing and Luo, Weihua},
booktitle={Proceedings of the AAAI Conference on Artificial Intelligence},
volume={34},
number={01},
pages={115--122},
year={2020}
}
@inproceedings{DBLP:conf/wmt/KocmiB18,
author = {Tom Kocmi and
Ondrej Bojar},
title = {Trivial Transfer Learning for Low-Resource Neural Machine Translation},
pages = {244--252},
publisher = {Association for Computational Linguistics},
year = {2018}
}
@inproceedings{DBLP:conf/acl/ZhangWTS20,
author = {Biao Zhang and
Philip Williams and
Ivan Titov and
Rico Sennrich},
title = {Improving Massively Multilingual Neural Machine Translation and Zero-Shot
Translation},
pages = {1628--1639},
publisher = {Association for Computational Linguistics},
year = {2020}
}
@inproceedings{DBLP:conf/naacl/PaulYSN09,
author = {Michael Paul and
Hirofumi Yamamoto and
Eiichiro Sumita and
Satoshi Nakamura},
title = {On the Importance of Pivot Language Selection for Statistical Machine
Translation},
pages = {221--224},
publisher = {The Association for Computational Linguistics},
year = {2009}
}
@article{dabre2019brief,
title={A Brief Survey of Multilingual Neural Machine Translation},
author={Dabre, Raj and Chu, Chenhui and Kunchukuttan, Anoop},
journal={arXiv preprint arXiv:1905.05395},
year={2019}
}
@article{dabre2020survey,
title={A survey of multilingual neural machine translation},
author={Dabre, Raj and Chu, Chenhui and Kunchukuttan, Anoop},
journal={ACM Computing Surveys (CSUR)},
volume={53},
number={5},
pages={1--38},
year={2020}
}
@inproceedings{DBLP:conf/emnlp/VulicGRK19,
author = {Ivan Vulic and
Goran Glavas and
Roi Reichart and
Anna Korhonen},
title = {Do We Really Need Fully Unsupervised Cross-Lingual Embeddings?},
pages = {4406--4417},
publisher = {Association for Computational Linguistics},
year = {2019}
}
@article{DBLP:journals/corr/MikolovLS13,
author = {Tomas Mikolov and
Quoc V. Le and
Ilya Sutskever},
title = {Exploiting Similarities among Languages for Machine Translation},
journal = {CoRR},
volume = {abs/1309.4168},
year = {2013}
}
%%%%% chapter 16------------------------------------------------------
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
......
Markdown 格式
0%
您添加了 0 到此讨论。请谨慎行事。
请先完成此评论的编辑!
注册 或者 后发表评论