Commit 4027b354 by 曹润柘

bib update

parent 3994e2fd
...@@ -52,7 +52,7 @@ ...@@ -52,7 +52,7 @@
%\node[minimum width = 1.8cm] (sub) at ([xshift=-5.5cm,yshift=2cm]num9_9.east) {}; %\node[minimum width = 1.8cm] (sub) at ([xshift=-5.5cm,yshift=2cm]num9_9.east) {};
\draw[decorate,decoration={brace,mirror,raise=0pt,amplitude=0.3cm},black,thick] ([yshift=0.4cm,xshift=-0.1cm]num1_1.west) -- node[att,xshift=-0.5cm]{$Q$} ([yshift=-0.4cm,xshift=-0.1cm]num3_3.west); \draw[decorate,decoration={brace,mirror,raise=0pt,amplitude=0.3cm},black,thick] ([yshift=0.4cm,xshift=-0.1cm]num1_1.west) -- node[att,xshift=-0.5cm]{$Q$} ([yshift=-0.4cm,xshift=-0.1cm]num3_3.west);
\draw[decorate,decoration={brace,raise=0pt,amplitude=0.3cm},black,thick] ([xshift=-0.4cm,yshift=0.1cm]num1.north) -- node[att,yshift=0.5cm]{$U$}([xshift=0.4cm,yshift=0.1cm]num7.north); \draw[decorate,decoration={brace,raise=0pt,amplitude=0.3cm},black,thick] ([xshift=-0.4cm,yshift=0.1cm]num1.north) -- node[att,yshift=0.5cm]{$K$}([xshift=0.4cm,yshift=0.1cm]num7.north);
\draw[decorate,decoration={brace,mirror,raise=0pt,amplitude=0.3cm},black,thick] ([xshift=0.5cm,yshift=0.00cm]num9_9.south) -- node[att,xshift=0.5cm,yshift=-0.3cm]{$O$}([xshift=0.5cm,yshift=0.00cm]num9.south); \draw[decorate,decoration={brace,mirror,raise=0pt,amplitude=0.3cm},black,thick] ([xshift=0.5cm,yshift=0.00cm]num9_9.south) -- node[att,xshift=0.5cm,yshift=-0.3cm]{$O$}([xshift=0.5cm,yshift=0.00cm]num9.south);
......
...@@ -21,10 +21,10 @@ ...@@ -21,10 +21,10 @@
\draw[line width=0.9pt, gray!80, -latex] (l\point_3.east) -- (r2_3.west); \draw[line width=0.9pt, gray!80, -latex] (l\point_3.east) -- (r2_3.west);
} }
\node[vuale] at (-1.5em, 1.9em) {$\mathbi{x}_2$}; \node[vuale] at (-1.5em, 1.9em) {$x_2$};
\node[vuale] at (-1.5em, 9.9em) {$\mathbi{x}_1$}; \node[vuale] at (-1.5em, 9.9em) {$x_1$};
\node[vuale] at (6.5em, 1.9em) {$\mathbi{z}_2$}; \node[vuale] at (6.5em, 1.9em) {$y_1$};
\node[vuale] at (6.5em, 9.9em) {$\mathbi{z}_1$}; \node[vuale] at (6.5em, 9.9em) {$y_2$};
\node (t2) at (2.5em, -1em) {\large{$\cdots$}}; \node (t2) at (2.5em, -1em) {\large{$\cdots$}};
\node [anchor=north,font=\tiny] at ([yshift=-0.2em]t2.south) {深度卷积}; \node [anchor=north,font=\tiny] at ([yshift=-0.2em]t2.south) {深度卷积};
...@@ -46,10 +46,10 @@ ...@@ -46,10 +46,10 @@
\draw[line width=0.9pt, cyan!80, -latex] (l\point_3.east) -- (r2_3.west); \draw[line width=0.9pt, cyan!80, -latex] (l\point_3.east) -- (r2_3.west);
} }
\node[vuale] at (-1.5em, 1.9em) {$\mathbi{x}_2$}; \node[vuale] at (-1.5em, 1.9em) {$x_2$};
\node[vuale] at (-1.5em, 9.9em) {$\mathbi{x}_1$}; \node[vuale] at (-1.5em, 9.9em) {$x_1$};
\node[vuale] at (6.5em, 1.9em) {$\mathbi{z}_2$}; \node[vuale] at (6.5em, 1.9em) {$y_1$};
\node[vuale] at (6.5em, 9.9em) {$\mathbi{z}_1$}; \node[vuale] at (6.5em, 9.9em) {$y_2$};
\node (t2) at (2.5em, -1em) {\large{$\cdots$}}; \node (t2) at (2.5em, -1em) {\large{$\cdots$}};
\node [anchor=north,font=\tiny] at ([yshift=-0.2em]t2.south) {轻量卷积}; \node [anchor=north,font=\tiny] at ([yshift=-0.2em]t2.south) {轻量卷积};
......
...@@ -32,12 +32,12 @@ ...@@ -32,12 +32,12 @@
\draw[line width=0.5pt, cyan!80, -latex] (l3_\point.east) -- ([xshift=0em,yshift=0.1em]r3_2.west); \draw[line width=0.5pt, cyan!80, -latex] (l3_\point.east) -- ([xshift=0em,yshift=0.1em]r3_2.west);
} }
\node[vuale] at ([xshift=-0.9em]l1_1.west) {$\mathbi{x}_3$}; \node[vuale] at ([xshift=-0.9em]l1_1.west) {$x_3$};
\node[vuale] at ([xshift=-0.9em]l2_1.west) {$\mathbi{x}_2$}; \node[vuale] at ([xshift=-0.9em]l2_1.west) {$x_2$};
\node[vuale] at ([xshift=-0.9em]l3_1.west) {$\mathbi{x}_1$}; \node[vuale] at ([xshift=-0.9em]l3_1.west) {$x_1$};
\node[vuale] at ([xshift=0.9em]r1_1.east) {$\mathbi{z}_3$}; \node[vuale] at ([xshift=0.9em]r1_1.east) {$y_3$};
\node[vuale] at ([xshift=0.9em]r2_1.east) {$\mathbi{z}_2$}; \node[vuale] at ([xshift=0.9em]r2_1.east) {$y_3$};
\node[vuale] at ([xshift=0.9em]r3_1.east) {$\mathbi{z}_1$}; \node[vuale] at ([xshift=0.9em]r3_1.east) {$y_3$};
\node (t1) at (2.5em, -1em) {\large{$\cdots$}}; \node (t1) at (2.5em, -1em) {\large{$\cdots$}};
\node [anchor=north,font=\tiny] at ([yshift=-0.2em]t1.south) {传统卷积}; \node [anchor=north,font=\tiny] at ([yshift=-0.2em]t1.south) {传统卷积};
...@@ -66,12 +66,12 @@ ...@@ -66,12 +66,12 @@
\draw[line width=0.5pt, cyan!80, -latex] (l\point_2.east) -- (r3_2.west); \draw[line width=0.5pt, cyan!80, -latex] (l\point_2.east) -- (r3_2.west);
} }
\node[vuale] at ([xshift=-0.9em]l1_1.west) {$\mathbi{x}_3$}; \node[vuale] at ([xshift=-0.9em]l1_1.west) {$x_3$};
\node[vuale] at ([xshift=-0.9em]l2_1.west) {$\mathbi{x}_2$}; \node[vuale] at ([xshift=-0.9em]l2_1.west) {$x_2$};
\node[vuale] at ([xshift=-0.9em]l3_1.west) {$\mathbi{x}_1$}; \node[vuale] at ([xshift=-0.9em]l3_1.west) {$x_1$};
\node[vuale] at ([xshift=0.9em]r1_1.east) {$\mathbi{z}_3$}; \node[vuale] at ([xshift=0.9em]r1_1.east) {$y_3$};
\node[vuale] at ([xshift=0.9em]r2_1.east) {$\mathbi{z}_2$}; \node[vuale] at ([xshift=0.9em]r2_1.east) {$y_3$};
\node[vuale] at ([xshift=0.9em]r3_1.east) {$\mathbi{z}_1$}; \node[vuale] at ([xshift=0.9em]r3_1.east) {$y_3$};
\node (t2) at (2.5em, -1em) {\large{$\cdots$}}; \node (t2) at (2.5em, -1em) {\large{$\cdots$}};
\node [anchor=north,font=\tiny] at ([yshift=-0.2em]t2.south) {深度卷积}; \node [anchor=north,font=\tiny] at ([yshift=-0.2em]t2.south) {深度卷积};
...@@ -102,12 +102,12 @@ ...@@ -102,12 +102,12 @@
\draw[line width=0.5pt, cyan!80, -latex] (l3_\point.east) -- (r3_2.west); \draw[line width=0.5pt, cyan!80, -latex] (l3_\point.east) -- (r3_2.west);
} }
\node[vuale] at ([xshift=-0.9em]l1_1.west) {$\mathbi{x}_3$}; \node[vuale] at ([xshift=-0.9em]l1_1.west) {$x_3$};
\node[vuale] at ([xshift=-0.9em]l2_1.west) {$\mathbi{x}_2$}; \node[vuale] at ([xshift=-0.9em]l2_1.west) {$x_2$};
\node[vuale] at ([xshift=-0.9em]l3_1.west) {$\mathbi{x}_1$}; \node[vuale] at ([xshift=-0.9em]l3_1.west) {$x_1$};
\node[vuale] at ([xshift=0.9em]r1_1.east) {$\mathbi{z}_3$}; \node[vuale] at ([xshift=0.9em]r1_1.east) {$y_3$};
\node[vuale] at ([xshift=0.9em]r2_1.east) {$\mathbi{z}_2$}; \node[vuale] at ([xshift=0.9em]r2_1.east) {$y_3$};
\node[vuale] at ([xshift=0.9em]r3_1.east) {$\mathbi{z}_1$}; \node[vuale] at ([xshift=0.9em]r3_1.east) {$y_3$};
\node (t3) at (2.5em, -1em) {\large{$\cdots$}}; \node (t3) at (2.5em, -1em) {\large{$\cdots$}};
\node [anchor=north,font=\tiny] at ([yshift=-0.2em]t3.south) {逐点卷积}; \node [anchor=north,font=\tiny] at ([yshift=-0.2em]t3.south) {逐点卷积};
......
...@@ -83,7 +83,7 @@ ...@@ -83,7 +83,7 @@
\end{figure} \end{figure}
%---------------------------------------------- %----------------------------------------------
\parinterval 在图像卷积中,卷积核是一组$Q \times U \times O$的参数(如图\ref{fig:11-3})。其中$Q$$U$表示卷积核窗口的长度与宽度,分别对应图像中的长和宽两个维度,$Q \times U$决定了该卷积核窗口的大小。$O$是该卷积核的深度,它的取值和输入数据通道数保持一致。在这里,通道可以看作图像不同的特征,比如灰色图像只有灰度信息,通道数为1;而RGB格式的图像有3个通道,分别对应红绿蓝三种颜色信息。 \parinterval 在图像卷积中,卷积核是一组$Q \times K \times O$的参数(如图\ref{fig:11-3})。其中$Q$$K$表示卷积核窗口的长度与宽度,分别对应图像中的长和宽两个维度,$Q \times K$决定了该卷积核窗口的大小。$O$是该卷积核的深度,它的取值和输入数据通道数保持一致。在这里,通道可以看作图像不同的特征,比如灰色图像只有灰度信息,通道数为1;而RGB格式的图像有3个通道,分别对应红绿蓝三种颜色信息。
%---------------------------------------------- %----------------------------------------------
% 图4. % 图4.
...@@ -567,7 +567,7 @@ ...@@ -567,7 +567,7 @@
\parinterval 卷积是一种高效处理网格数据的计算方式,在图像、语音等领域取得了令人瞩目的成绩。本章介绍了卷积的概念及其特性,并对池化、填充等操作进行了详细的讨论。前面介绍的基于循环神经网络的翻译模型在引入注意力机制后已经大幅度超越了基于统计的机器翻译模型,但由于循环神经网络的计算方式导致网络整体的并行能力差,训练耗时。本章介绍了具有高并行计算的能力的模型范式,即基于卷积神经网络的编码器-解码器框架。其在机器翻译任务上取得了与基于循环神经网络的GNMT模型相当的性能,并大幅度缩短了模型的训练周期。除了基础部分,本章还针对卷积计算进行了延伸,包括逐通道卷积、逐点卷积、轻量卷积和动态卷积等。除了上述提及的内容,卷积神经网络及其变种在文本分类、命名实体识别等其他自然语言处理任务上也有许多应用。 \parinterval 卷积是一种高效处理网格数据的计算方式,在图像、语音等领域取得了令人瞩目的成绩。本章介绍了卷积的概念及其特性,并对池化、填充等操作进行了详细的讨论。前面介绍的基于循环神经网络的翻译模型在引入注意力机制后已经大幅度超越了基于统计的机器翻译模型,但由于循环神经网络的计算方式导致网络整体的并行能力差,训练耗时。本章介绍了具有高并行计算的能力的模型范式,即基于卷积神经网络的编码器-解码器框架。其在机器翻译任务上取得了与基于循环神经网络的GNMT模型相当的性能,并大幅度缩短了模型的训练周期。除了基础部分,本章还针对卷积计算进行了延伸,包括逐通道卷积、逐点卷积、轻量卷积和动态卷积等。除了上述提及的内容,卷积神经网络及其变种在文本分类、命名实体识别等其他自然语言处理任务上也有许多应用。
\parinterval 和机器翻译任务不同的是,文本分类任务侧重于对序列特征的提取,然后通过压缩后的特征表示做出类别预测。卷积神经网络可以对序列中一些$n$-gram特征进行提取,也可以用在文本分类任务中,其基本结构包括输入层、卷积层、池化层和全连接层。除了在本章介绍过的TextCNN模型\upcite{Kim2014ConvolutionalNN},不少研究工作在此基础上对其进行改进。比如,通过改变输入层来引入更多特征\upcite{DBLP:conf/acl/NguyenG15,DBLP:conf/aaai/LaiXLZ15},对卷积层的改进\upcite{DBLP:conf/acl/ChenXLZ015,DBLP:conf/emnlp/LeiBJ15}以及对池化层的改进\upcite{Kalchbrenner2014ACN,DBLP:conf/acl/ChenXLZ015}。在命名实体识别任务中,同样可以使用卷积神经网络来进行特征提取\upcite{DBLP:journals/jmlr/CollobertWBKKK11,DBLP:conf/cncl/ZhouZXQBX17},或者使用更高效的空洞卷积对更长的上下文进行建模\upcite{DBLP:conf/emnlp/StrubellVBM17}。此外,也有一些研究工作尝试使用卷积神经网络来提取字符级特征\upcite{DBLP:conf/acl/MaH16,DBLP:conf/emnlp/LiDWCM17,DBLP:conf/acl-codeswitch/WangCK18} \parinterval 和机器翻译任务不同的是,文本分类任务侧重于对序列特征的提取,然后通过压缩后的特征表示做出类别预测。卷积神经网络可以对序列中一些$n$-gram特征进行提取,也可以用在文本分类任务中,其基本结构包括输入层、卷积层、池化层和全连接层。除了在本章介绍过的TextCNN模型\upcite{Kim2014ConvolutionalNN},不少研究工作在此基础上对其进行改进。比如,通过改变输入层来引入更多特征\upcite{DBLP:conf/acl/NguyenG15,DBLP:conf/aaai/LaiXLZ15},对卷积层的改进\upcite{DBLP:conf/acl/ChenXLZ015,DBLP:conf/emnlp/LeiBJ15}以及对池化层的改进\upcite{Kalchbrenner2014ACN,DBLP:conf/acl/ChenXLZ015}。在命名实体识别任务中,同样可以使用卷积神经网络来进行特征提取\upcite{2011Natural,DBLP:conf/cncl/ZhouZXQBX17},或者使用更高效的空洞卷积对更长的上下文进行建模\upcite{DBLP:conf/emnlp/StrubellVBM17}。此外,也有一些研究工作尝试使用卷积神经网络来提取字符级特征\upcite{DBLP:conf/acl/MaH16,DBLP:conf/emnlp/LiDWCM17,DBLP:conf/acl-codeswitch/WangCK18}
......
...@@ -2166,6 +2166,6 @@ Jobs was the CEO of {\red{\underline{apple}}}. ...@@ -2166,6 +2166,6 @@ Jobs was the CEO of {\red{\underline{apple}}}.
\vspace{0.5em} \vspace{0.5em}
\item 为了进一步提高神经语言模型性能,除了改进模型,还可以在模型中引入新的结构或是其他有效信息,该领域也有很多典型工作值得关注。例如在神经语言模型中引入除了词嵌入以外的单词特征,如语言特征(形态、语法、语义特征等)\upcite{Wu2012FactoredLM,Adel2015SyntacticAS}、上下文信息\upcite{mikolov2012context,Wang2015LargerContextLM}、知识图谱等外部知识\upcite{Ahn2016ANK};或是在神经语言模型中引入字符级信息,将其作为字符特征单独\upcite{Kim2016CharacterAwareNL,Hwang2017CharacterlevelLM}或与单词特征一起\upcite{Onoe2016GatedWR,Verwimp2017CharacterWordLL}送入模型中;在神经语言模型中引入双向模型也是一种十分有效的尝试,在单词预测时可以同时利用来自过去和未来的文本信息\upcite{Graves2013HybridSR,bahdanau2014neural,Peters2018DeepCW} \item 为了进一步提高神经语言模型性能,除了改进模型,还可以在模型中引入新的结构或是其他有效信息,该领域也有很多典型工作值得关注。例如在神经语言模型中引入除了词嵌入以外的单词特征,如语言特征(形态、语法、语义特征等)\upcite{Wu2012FactoredLM,Adel2015SyntacticAS}、上下文信息\upcite{mikolov2012context,Wang2015LargerContextLM}、知识图谱等外部知识\upcite{Ahn2016ANK};或是在神经语言模型中引入字符级信息,将其作为字符特征单独\upcite{Kim2016CharacterAwareNL,Hwang2017CharacterlevelLM}或与单词特征一起\upcite{Onoe2016GatedWR,Verwimp2017CharacterWordLL}送入模型中;在神经语言模型中引入双向模型也是一种十分有效的尝试,在单词预测时可以同时利用来自过去和未来的文本信息\upcite{Graves2013HybridSR,bahdanau2014neural,Peters2018DeepCW}
\vspace{0.5em} \vspace{0.5em}
\item 词嵌入是自然语言处理近些年的重要进展。所谓“嵌入”是一类方法,理论上,把一个事物进行分布式表示的过程都可以被看作是广义上的“嵌入”。基于这种思想的表示学习也成为了自然语言处理中的前沿方法。比如,如何对树结构,甚至图结构进行分布式表示成为了分析自然语言的重要方法\upcite{DBLP:journals/corr/abs-1809-01854,Yin2018StructVAETL,Aharoni2017TowardsSN,Bastings2017GraphCE,KoncelKedziorski2019TextGF}。此外,除了语言建模,还有很多方式可以进行词嵌入的学习,比如,SENNA\upcite{collobert2011natural}、word2vec\upcite{DBLP:journals/corr/abs-1301-3781,mikolov2013distributed}、Glove\upcite{DBLP:conf/emnlp/PenningtonSM14}、CoVe\upcite{mccann2017learned} 等。 \item 词嵌入是自然语言处理近些年的重要进展。所谓“嵌入”是一类方法,理论上,把一个事物进行分布式表示的过程都可以被看作是广义上的“嵌入”。基于这种思想的表示学习也成为了自然语言处理中的前沿方法。比如,如何对树结构,甚至图结构进行分布式表示成为了分析自然语言的重要方法\upcite{DBLP:journals/corr/abs-1809-01854,Yin2018StructVAETL,Aharoni2017TowardsSN,Bastings2017GraphCE,KoncelKedziorski2019TextGF}。此外,除了语言建模,还有很多方式可以进行词嵌入的学习,比如,SENNA\upcite{2011Natural}、word2vec\upcite{DBLP:journals/corr/abs-1301-3781,mikolov2013distributed}、Glove\upcite{DBLP:conf/emnlp/PenningtonSM14}、CoVe\upcite{mccann2017learned} 等。
\vspace{0.5em} \vspace{0.5em}
\end{itemize} \end{itemize}
...@@ -3867,8 +3867,7 @@ year = {2012} ...@@ -3867,8 +3867,7 @@ year = {2012}
volume={18}, volume={18},
number={4}, number={4},
pages={467--479}, pages={467--479},
year={1992}, year={1992}
publisher={MIT Press}
} }
@inproceedings{mikolov2012context, @inproceedings{mikolov2012context,
...@@ -3877,10 +3876,9 @@ year = {2012} ...@@ -3877,10 +3876,9 @@ year = {2012}
Tomas and Tomas and
Zweig and Zweig and
Geoffrey}, Geoffrey},
booktitle={2012 IEEE Spoken Language Technology Workshop (SLT)}, publisher={IEEE Spoken Language Technology Workshop},
pages={234--239}, pages={234--239},
year={2012}, year={2012}
organization={IEEE}
} }
@article{zaremba2014recurrent, @article{zaremba2014recurrent,
...@@ -3905,7 +3903,7 @@ year = {2012} ...@@ -3905,7 +3903,7 @@ year = {2012}
Jan and Jan and
Schmidhuber and Schmidhuber and
Jurgen}, Jurgen},
journal={arXiv: Learning}, journal={International Conference on Machine Learning},
year={2016} year={2016}
} }
...@@ -3917,7 +3915,7 @@ year = {2012} ...@@ -3917,7 +3915,7 @@ year = {2012}
Nitish Shirish and Nitish Shirish and
Socher and Socher and
Richard}, Richard},
journal={arXiv: Computation and Language}, journal={International Conference on Learning Representations},
year={2017} year={2017}
} }
...@@ -3934,12 +3932,11 @@ year = {2012} ...@@ -3934,12 +3932,11 @@ year = {2012}
@article{baydin2017automatic, @article{baydin2017automatic,
title ={Automatic differentiation in machine learning: a survey}, title ={Automatic differentiation in machine learning: a survey},
author ={Baydin, At{\i}l{\i}m G{\"u}nes and Pearlmutter, Barak A and Radul, Alexey Andreyevich and Siskind, Jeffrey Mark}, author ={Baydin, At{\i}l{\i}m G{\"u}nes and Pearlmutter, Barak A and Radul, Alexey Andreyevich and Siskind, Jeffrey Mark},
journal ={The Journal of Machine Learning Research}, journal ={Journal of Machine Learning Research},
volume ={18}, volume ={18},
number ={1}, number ={1},
pages ={5595--5637}, pages ={5595--5637},
year ={2017}, year ={2017}
publisher ={JMLR. org}
} }
@article{qian1999momentum, @article{qian1999momentum,
...@@ -3977,9 +3974,8 @@ year = {2012} ...@@ -3977,9 +3974,8 @@ year = {2012}
author = {Diederik P. Kingma and author = {Diederik P. Kingma and
Jimmy Ba}, Jimmy Ba},
title = {Adam: {A} Method for Stochastic Optimization}, title = {Adam: {A} Method for Stochastic Optimization},
booktitle = {3rd International Conference on Learning Representations, {ICLR} 2015, publisher = {International Conference on Learning Representations},
San Diego, CA, USA, May 7-9, 2015, Conference Track Proceedings}, year = {2015}
year = {2015},
} }
@inproceedings{ioffe2015batch, @inproceedings{ioffe2015batch,
...@@ -3987,13 +3983,10 @@ year = {2012} ...@@ -3987,13 +3983,10 @@ year = {2012}
Christian Szegedy}, Christian Szegedy},
title = {Batch Normalization: Accelerating Deep Network Training by Reducing title = {Batch Normalization: Accelerating Deep Network Training by Reducing
Internal Covariate Shift}, Internal Covariate Shift},
booktitle = {Proceedings of the 32nd International Conference on Machine Learning, publisher = {International Conference on Machine Learning},
{ICML} 2015, Lille, France, 6-11 July 2015},
series = {{JMLR} Workshop and Conference Proceedings},
volume = {37}, volume = {37},
pages = {448--456}, pages = {448--456},
publisher = {JMLR.org}, year = {2015}
year = {2015},
} }
@article{Ba2016LayerN, @article{Ba2016LayerN,
...@@ -4003,7 +3996,7 @@ year = {2012} ...@@ -4003,7 +3996,7 @@ year = {2012}
title = {Layer Normalization}, title = {Layer Normalization},
journal = {CoRR}, journal = {CoRR},
volume = {abs/1607.06450}, volume = {abs/1607.06450},
year = {2016}, year = {2016}
} }
@inproceedings{mikolov2013distributed, @inproceedings{mikolov2013distributed,
...@@ -4013,11 +4006,9 @@ year = {2012} ...@@ -4013,11 +4006,9 @@ year = {2012}
Gregory S. Corrado and Gregory S. Corrado and
Jeffrey Dean}, Jeffrey Dean},
title = {Distributed Representations of Words and Phrases and their Compositionality}, title = {Distributed Representations of Words and Phrases and their Compositionality},
booktitle = {Advances in Neural Information Processing Systems 26: 27th Annual publisher = {Conference on Neural Information Processing Systems},
Conference on Neural Information Processing Systems 2013. Proceedings
of a meeting held December 5-8, 2013, Lake Tahoe, Nevada, United States},
pages = {3111--3119}, pages = {3111--3119},
year = {2013}, year = {2013}
} }
@inproceedings{arthur2016incorporating, @inproceedings{arthur2016incorporating,
...@@ -4025,12 +4016,9 @@ year = {2012} ...@@ -4025,12 +4016,9 @@ year = {2012}
Graham Neubig and Graham Neubig and
Satoshi Nakamura}, Satoshi Nakamura},
title = {Incorporating Discrete Translation Lexicons into Neural Machine Translation}, title = {Incorporating Discrete Translation Lexicons into Neural Machine Translation},
booktitle = {Proceedings of the 2016 Conference on Empirical Methods in Natural
Language Processing, {EMNLP} 2016, Austin, Texas, USA, November 1-4,
2016},
pages = {1557--1567}, pages = {1557--1567},
publisher = {The Association for Computational Linguistics}, publisher = {Annual Meeting of the Association for Computational Linguistics},
year = {2016}, year = {2016}
} }
@inproceedings{stahlberg2016syntactically, @inproceedings{stahlberg2016syntactically,
...@@ -4039,10 +4027,7 @@ year = {2012} ...@@ -4039,10 +4027,7 @@ year = {2012}
Aurelien Waite and Aurelien Waite and
Bill Byrne}, Bill Byrne},
title = {Syntactically Guided Neural Machine Translation}, title = {Syntactically Guided Neural Machine Translation},
booktitle = {Proceedings of the 54th Annual Meeting of the Association for Computational publisher = {Annual Meeting of the Association for Computational Linguistics}
Linguistics, {ACL} 2016, August 7-12, 2016, Berlin, Germany, Volume
2: Short Papers},
publisher = {The Association for Computer Linguistics},
year = {2016}, year = {2016},
} }
...@@ -4051,12 +4036,9 @@ year = {2012} ...@@ -4051,12 +4036,9 @@ year = {2012}
Alessandro Moschitti}, Alessandro Moschitti},
title = {Embedding Semantic Similarity in Tree Kernels for Domain Adaptation title = {Embedding Semantic Similarity in Tree Kernels for Domain Adaptation
of Relation Extraction}, of Relation Extraction},
booktitle = {Proceedings of the 51st Annual Meeting of the Association for Computational
Linguistics, {ACL} 2013, 4-9 August 2013, Sofia, Bulgaria, Volume
1: Long Papers},
pages = {1498--1507}, pages = {1498--1507},
publisher = {The Association for Computer Linguistics}, publisher = {Annual Meeting of the Association for Computational Linguistics},
year = {2013}, year = {2013}
} }
@inproceedings{perozzi2014deepwalk, @inproceedings{perozzi2014deepwalk,
...@@ -4064,42 +4046,32 @@ year = {2012} ...@@ -4064,42 +4046,32 @@ year = {2012}
Rami Al-Rfou and Rami Al-Rfou and
Steven Skiena}, Steven Skiena},
title = {DeepWalk: online learning of social representations}, title = {DeepWalk: online learning of social representations},
booktitle = {The 20th {ACM} {SIGKDD} International Conference on Knowledge Discovery publisher = {ACM SIGKDD Conference on Knowledge Discovery and Data Mining},
and Data Mining, {KDD} '14, New York, NY, {USA} - August 24 - 27,
2014},
pages = {701--710}, pages = {701--710},
publisher = {{ACM}}, year = {2014}
year = {2014},
} }
@article{collobert2011natural, @article{2011Natural,
author = {Ronan Collobert and title={Natural Language Processing (almost) from Scratch},
Jason Weston and author={ Collobert, Ronan and Weston, Jason and Bottou, Léon and Karlen, Michael and Kavukcuoglu, Koray and Kuksa, Pavel },
L{\'{e}}on Bottou and journal={Journal of Machine Learning Research},
Michael Karlen and volume={12},
Koray Kavukcuoglu and number={1},
Pavel P. Kuksa}, pages={2493-2537},
title = {Natural Language Processing (Almost) from Scratch}, year={2011}
journal = {Journal of Machine Learning Research},
volume = {12},
pages = {2493--2537},
year = {2011},
} }
@inproceedings{mccann2017learned, @inproceedings{mccann2017learned,
author = {Bryan McCann and author = {Bryan McCann and
James Bradbury and James Bradbury and
Caiming Xiong and Caiming Xiong and
Richard Socher}, Richard Socher},
title = {Learned in Translation: Contextualized Word Vectors}, title = {Learned in Translation: Contextualized Word Vectors},
booktitle = {Advances in Neural Information Processing Systems 30: Annual Conference booktitle = {Conference on Neural Information Processing Systems},
on Neural Information Processing Systems 2017, 4-9 December 2017,
Long Beach, CA, {USA}},
pages = {6294--6305}, pages = {6294--6305},
year = {2017}, year = {2017}
} }
%%%%%%%%%%%%%%%%%%%%%%%神经语言模型,检查修改%%%%%%%%%%%%%%%%%%%%%%%%% %%%%%%%%%%%%%%%%%%%%%%%神经语言模型,检查修改%%%%%%%%%%%%%%%%%%%%%%%%%
@inproceedings{Peters2018DeepCW, @inproceedings{Peters2018DeepCW,
title={Deep contextualized word representations}, title={Deep contextualized word representations},
author={Matthew E. Peters and author={Matthew E. Peters and
...@@ -4135,13 +4107,13 @@ year = {2012} ...@@ -4135,13 +4107,13 @@ year = {2012}
} }
@inproceedings{Onoe2016GatedWR, @inproceedings{Onoe2016GatedWR,
title={Gated Word-Character Recurrent Language Model}, author = {Yasumasa Miyamoto and
author={Yasumasa Miyamoto and Kyunghyun Cho},
Kyunghyun Cho}, title = {Gated Word-Character Recurrent Language Model},
publisher={arXiv preprint arXiv:1606.01700}, pages = {1992--1997},
year={2016} publisher = {Annual Meeting of the Association for Computational Linguistics},
year = {2016}
} }
@inproceedings{Hwang2017CharacterlevelLM, @inproceedings{Hwang2017CharacterlevelLM,
title={Character-level language modeling with hierarchical recurrent neural networks}, title={Character-level language modeling with hierarchical recurrent neural networks},
author={Kyuyeon Hwang and author={Kyuyeon Hwang and
...@@ -4216,12 +4188,11 @@ year = {2012} ...@@ -4216,12 +4188,11 @@ year = {2012}
Ruocheng Guo and Ruocheng Guo and
Adrienne Raglin and Adrienne Raglin and
Huan Liu}, Huan Liu},
journal={ACM SIGKDD Explorations Newsletter}, journal={ACM SIGKDD Conference on Knowledge Discovery and Data Mining},
volume={22}, volume={22},
number={1}, number={1},
pages={18--33}, pages={18--33},
year={2020}, year={2020}
publisher={ACM New York, NY, USA}
} }
@incollection{nguyen2019understanding, @incollection{nguyen2019understanding,
...@@ -4231,7 +4202,7 @@ year = {2012} ...@@ -4231,7 +4202,7 @@ year = {2012}
Jeff Clune}, Jeff Clune},
pages={55--76}, pages={55--76},
year={2019}, year={2019},
publisher={Explainable AI} publisher={Springer}
} }
@inproceedings{yang2017improving, @inproceedings{yang2017improving,
title={Improving adversarial neural machine translation with prior knowledge}, title={Improving adversarial neural machine translation with prior knowledge},
...@@ -4250,15 +4221,16 @@ year = {2012} ...@@ -4250,15 +4221,16 @@ year = {2012}
title={Incorporating source syntax into transformer-based neural machine translation}, title={Incorporating source syntax into transformer-based neural machine translation},
author={Anna Currey and author={Anna Currey and
Kenneth Heafield}, Kenneth Heafield},
publisher={Proceedings of the Fourth Conference on Machine Translation}, publisher={Annual Meeting of the Association for Computational Linguistics},
pages={24--33}, pages={24--33},
year={2019} year={2019}
} }
@article{currey2018multi, @article{currey2018multi,
title={Multi-source syntactic neural machine translation}, title={Multi-source syntactic neural machine translation},
author={Anna Currey and author={Anna Currey and
Kenneth Heafield}, Kenneth Heafield},
journal={arXiv preprint arXiv:1808.10267}, journal={Conference on Empirical Methods in Natural Language Processing},
year={2018} year={2018}
} }
@inproceedings{marevcek2018extracting, @inproceedings{marevcek2018extracting,
...@@ -4272,7 +4244,7 @@ year = {2012} ...@@ -4272,7 +4244,7 @@ year = {2012}
@article{blevins2018deep, @article{blevins2018deep,
title={Deep rnns encode soft hierarchical syntax}, title={Deep rnns encode soft hierarchical syntax},
author={Blevins, Terra and Levy, Omer and Zettlemoyer, Luke}, author={Blevins, Terra and Levy, Omer and Zettlemoyer, Luke},
journal={arXiv preprint arXiv:1805.04218}, journal={Annual Meeting of the Association for Computational Linguistics},
year={2018} year={2018}
} }
@inproceedings{Yin2018StructVAETL, @inproceedings{Yin2018StructVAETL,
...@@ -4288,7 +4260,7 @@ year = {2012} ...@@ -4288,7 +4260,7 @@ year = {2012}
title={Towards String-To-Tree Neural Machine Translation}, title={Towards String-To-Tree Neural Machine Translation},
author={Roee Aharoni and author={Roee Aharoni and
Yoav Goldberg}, Yoav Goldberg},
journal={arXiv preprint arXiv:1704.04743}, journal={Annual Meeting of the Association for Computational Linguistics},
year={2017} year={2017}
} }
...@@ -4308,9 +4280,8 @@ year = {2012} ...@@ -4308,9 +4280,8 @@ year = {2012}
Dhanush Bekal and Yi Luan and Dhanush Bekal and Yi Luan and
Mirella Lapata and Mirella Lapata and
Hannaneh Hajishirzi}, Hannaneh Hajishirzi},
journal={ArXiv}, journal={Annual Conference of the North American Chapter of the Association for Computational Linguistics},
year={2019}, year={2019}
volume={abs/1904.02342}
} }
@article{Kovalerchuk2020SurveyOE, @article{Kovalerchuk2020SurveyOE,
...@@ -4327,7 +4298,7 @@ year = {2012} ...@@ -4327,7 +4298,7 @@ year = {2012}
title={Towards A Rigorous Science of Interpretable Machine Learning}, title={Towards A Rigorous Science of Interpretable Machine Learning},
author={Finale Doshi-Velez and author={Finale Doshi-Velez and
Been Kim}, Been Kim},
journal={arXiv: Machine Learning}, journal={arXiv preprint arXiv:1702.08608},
year={2017} year={2017}
} }
...@@ -4349,7 +4320,7 @@ year = {2012} ...@@ -4349,7 +4320,7 @@ year = {2012}
title = {Does Multi-Encoder Help? {A} Case Study on Context-Aware Neural Machine title = {Does Multi-Encoder Help? {A} Case Study on Context-Aware Neural Machine
Translation}, Translation},
pages = {3512--3518}, pages = {3512--3518},
publisher = {Association for Computational Linguistics}, publisher = {Annual Meeting of the Association for Computational Linguistics},
year = {2020} year = {2020}
} }
...@@ -4359,7 +4330,7 @@ year = {2012} ...@@ -4359,7 +4330,7 @@ year = {2012}
Abe Ittycheriah}, Abe Ittycheriah},
title = {Supervised Attentions for Neural Machine Translation}, title = {Supervised Attentions for Neural Machine Translation},
pages = {2283--2288}, pages = {2283--2288},
publisher = {The Association for Computational Linguistics}, publisher = {Annual Meeting of the Association for Computational Linguistics},
year = {2016} year = {2016}
} }
...@@ -4370,7 +4341,7 @@ year = {2012} ...@@ -4370,7 +4341,7 @@ year = {2012}
Eiichiro Sumita}, Eiichiro Sumita},
title = {Neural Machine Translation with Supervised Attention}, title = {Neural Machine Translation with Supervised Attention},
pages = {3093--3102}, pages = {3093--3102},
publisher = {The Association for Computational Linguistics}, publisher = {Annual Meeting of the Association for Computational Linguistics},
year = {2016} year = {2016}
} }
...@@ -4384,16 +4355,16 @@ year = {2012} ...@@ -4384,16 +4355,16 @@ year = {2012}
title = {Fast and Robust Neural Network Joint Models for Statistical Machine title = {Fast and Robust Neural Network Joint Models for Statistical Machine
Translation}, Translation},
pages = {1370--1380}, pages = {1370--1380},
publisher = {The Association for Computer Linguistics}, publisher = {Annual Meeting of the Association for Computational Linguistics},
year = {2014}, year = {2014}
} }
@inproceedings{Schwenk_continuousspace, @inproceedings{Schwenk_continuousspace,
author = {Holger Schwenk}, author = {Holger Schwenk},
title = {Continuous Space Translation Models for Phrase-Based Statistical Machine title = {Continuous Space Translation Models for Phrase-Based Statistical Machine
Translation}, Translation},
pages = {1071--1080}, pages = {1071--1080},
publisher = {Indian Institute of Technology Bombay}, publisher = {International Conference on Computational Linguistics},
year = {2012}, year = {2012}
} }
@inproceedings{kalchbrenner-blunsom-2013-recurrent, @inproceedings{kalchbrenner-blunsom-2013-recurrent,
author = {Nal Kalchbrenner and author = {Nal Kalchbrenner and
...@@ -4401,25 +4372,24 @@ year = {2012} ...@@ -4401,25 +4372,24 @@ year = {2012}
title = {Recurrent Continuous Translation Models}, title = {Recurrent Continuous Translation Models},
pages = {1700--1709}, pages = {1700--1709},
publisher = {Annual Meeting of the Association for Computational Linguistics}, publisher = {Annual Meeting of the Association for Computational Linguistics},
year = {2013}, year = {2013}
} }
@article{HochreiterThe, @article{HochreiterThe,
author = {Sepp Hochreiter}, author = {Sepp Hochreiter},
title = {The Vanishing Gradient Problem During Learning Recurrent Neural Nets title = {The Vanishing Gradient Problem During Learning Recurrent Neural Nets
and Problem Solutions}, and Problem Solutions},
journal = {International Journal of Uncertainty, Fuzziness and Knowledge-Based journal = {International Journal of Uncertainty, Fuzziness and Knowledge-Based Systems},
Systems},
volume = {6}, volume = {6},
number = {2}, number = {2},
pages = {107--116}, pages = {107--116},
year = {1998}, year = {1998}
} }
@article{BENGIO1994Learning, @article{BENGIO1994Learning,
author = {Yoshua Bengio and author = {Yoshua Bengio and
Patrice Y. Simard and Patrice Y. Simard and
Paolo Frasconi}, Paolo Frasconi},
title = {Learning long-term dependencies with gradient descent is difficult}, title = {Learning long-term dependencies with gradient descent is difficult},
journal = {Institute of Electrical and Electronics Engineers}, journal = {IEEE Transportation Neural Networks},
volume = {5}, volume = {5},
number = {2}, number = {2},
pages = {157--166}, pages = {157--166},
...@@ -4435,15 +4405,14 @@ author = {Yoshua Bengio and ...@@ -4435,15 +4405,14 @@ author = {Yoshua Bengio and
Lukasz Kaiser and Lukasz Kaiser and
Illia Polosukhin}, Illia Polosukhin},
title = {Attention is All you Need}, title = {Attention is All you Need},
publisher = {Advances in Neural Information Processing Systems 30: Annual Conference publisher = {Conference on Neural Information Processing Systems},
on Neural Information Processing Systems},
pages = {5998--6008}, pages = {5998--6008},
year = {2017}, year = {2017}
} }
@article{StahlbergNeural, @article{StahlbergNeural,
title={Neural Machine Translation: A Review}, title={Neural Machine Translation: A Review},
author={Felix Stahlberg}, author={Felix Stahlberg},
journal={journal of artificial intelligence research}, journal={Journal of Artificial Intelligence Research},
year={2020}, year={2020},
volume={69}, volume={69},
pages={343-418} pages={343-418}
...@@ -4455,8 +4424,8 @@ author = {Yoshua Bengio and ...@@ -4455,8 +4424,8 @@ author = {Yoshua Bengio and
Marcello Federico}, Marcello Federico},
title = {Neural versus Phrase-Based Machine Translation Quality: a Case Study}, title = {Neural versus Phrase-Based Machine Translation Quality: a Case Study},
pages = {257--267}, pages = {257--267},
publisher = {The Association for Computational Linguistics}, publisher = {Annual Meeting of the Association for Computational Linguistics},
year = {2016}, year = {2016}
} }
@article{Hassan2018AchievingHP, @article{Hassan2018AchievingHP,
author = {Hany Hassan and author = {Hany Hassan and
...@@ -4498,19 +4467,19 @@ author = {Yoshua Bengio and ...@@ -4498,19 +4467,19 @@ author = {Yoshua Bengio and
Lidia S. Chao}, Lidia S. Chao},
title = {Learning Deep Transformer Models for Machine Translation}, title = {Learning Deep Transformer Models for Machine Translation},
pages = {1810--1822}, pages = {1810--1822},
publisher = {Association for Computational Linguistics}, publisher = {Annual Meeting of the Association for Computational Linguistics},
year = {2019} year = {2019}
} }
@article{Li2020NeuralMT, @inproceedings{Li2020NeuralMT,
author = {Yanyang Li and author = {Yanyang Li and
Qiang Wang and Qiang Wang and
Tong Xiao and Tong Xiao and
Tongran Liu and Tongran Liu and
Jingbo Zhu}, Jingbo Zhu},
title = {Neural Machine Translation with Joint Representation}, title = {Neural Machine Translation with Joint Representation},
journal = {CoRR}, pages = {8285--8292},
volume = {abs/2002.06546}, publisher = {AAAI Conference on Artificial Intelligence},
year = {2020}, year = {2020}
} }
@article{HochreiterLong, @article{HochreiterLong,
author = {Hochreiter, Sepp and Schmidhuber, Jürgen}, author = {Hochreiter, Sepp and Schmidhuber, Jürgen},
...@@ -4519,7 +4488,7 @@ author = {Yoshua Bengio and ...@@ -4519,7 +4488,7 @@ author = {Yoshua Bengio and
pages = {1735-80}, pages = {1735-80},
title = {Long Short-term Memory}, title = {Long Short-term Memory},
volume = {9}, volume = {9},
journal = {Neural computation}, journal = {Neural Computation}
} }
@inproceedings{Cho2014Learning, @inproceedings{Cho2014Learning,
author = {Kyunghyun Cho and author = {Kyunghyun Cho and
...@@ -4531,24 +4500,18 @@ author = {Yoshua Bengio and ...@@ -4531,24 +4500,18 @@ author = {Yoshua Bengio and
Yoshua Bengio}, Yoshua Bengio},
title = {Learning Phrase Representations using {RNN} Encoder-Decoder for Statistical title = {Learning Phrase Representations using {RNN} Encoder-Decoder for Statistical
Machine Translation}, Machine Translation},
publisher = {Proceedings of the 2014 Conference on Empirical Methods in Natural publisher = {Annual Meeting of the Association for Computational Linguistics},
Language Processing, {EMNLP} 2014, October 25-29, 2014, Doha, Qatar,
{A} meeting of SIGDAT, a Special Interest Group of the {ACL}},
pages = {1724--1734}, pages = {1724--1734},
//publisher = {{ACL}}, year = {2014}
year = {2014},
} }
@inproceedings{pmlr-v9-glorot10a, @inproceedings{pmlr-v9-glorot10a,
author = {Xavier Glorot and author = {Xavier Glorot and
Yoshua Bengio}, Yoshua Bengio},
title = {Understanding the difficulty of training deep feedforward neural networks}, title = {Understanding the difficulty of training deep feedforward neural networks},
publisher = {Proceedings of the Thirteenth International Conference on Artificial publisher = {International Conference on Artificial Intelligence and Statistics},
Intelligence and Statistics, {AISTATS} 2010, Chia Laguna Resort, Sardinia,
Italy, May 13-15, 2010},
volume = {9}, volume = {9},
pages = {249--256}, pages = {249--256},
//publisher = {JMLR.org}, year = {2010}
year = {2010},
} }
@inproceedings{xiao2017fast, @inproceedings{xiao2017fast,
author = {Tong Xiao and author = {Tong Xiao and
...@@ -4556,12 +4519,9 @@ author = {Yoshua Bengio and ...@@ -4556,12 +4519,9 @@ author = {Yoshua Bengio and
Tongran Liu and Tongran Liu and
Chunliang Zhang}, Chunliang Zhang},
title = {Fast Parallel Training of Neural Language Models}, title = {Fast Parallel Training of Neural Language Models},
publisher = {Proceedings of the Twenty-Sixth International Joint Conference on publisher = {International Joint Conference on Artificial Intelligence},
Artificial Intelligence, {IJCAI} 2017, Melbourne, Australia, August
19-25, 2017},
pages = {4193--4199}, pages = {4193--4199},
//publisher = {ijcai.org}, year = {2017}
year = {2017},
} }
@inproceedings{Gu2017NonAutoregressiveNM, @inproceedings{Gu2017NonAutoregressiveNM,
author = {Jiatao Gu and author = {Jiatao Gu and
...@@ -4571,7 +4531,7 @@ author = {Yoshua Bengio and ...@@ -4571,7 +4531,7 @@ author = {Yoshua Bengio and
Richard Socher}, Richard Socher},
title = {Non-Autoregressive Neural Machine Translation}, title = {Non-Autoregressive Neural Machine Translation},
publisher = {International Conference on Learning Representations}, publisher = {International Conference on Learning Representations},
year = {2018}, year = {2018}
} }
@inproceedings{li-etal-2018-simple, @inproceedings{li-etal-2018-simple,
author = {Yanyang Li and author = {Yanyang Li and
...@@ -4581,12 +4541,9 @@ author = {Yoshua Bengio and ...@@ -4581,12 +4541,9 @@ author = {Yoshua Bengio and
Changming Xu and Changming Xu and
Jingbo Zhu}, Jingbo Zhu},
title = {A Simple and Effective Approach to Coverage-Aware Neural Machine Translation}, title = {A Simple and Effective Approach to Coverage-Aware Neural Machine Translation},
publisher = {Proceedings of the 56th Annual Meeting of the Association for Computational publisher = {Annual Meeting of the Association for Computational Linguistics},
Linguistics, {ACL} 2018, Melbourne, Australia, July 15-20, 2018, Volume
2: Short Papers},
pages = {292--297}, pages = {292--297},
//publisher = {Association for Computational Linguistics}, year = {2018}
year = {2018},
} }
@inproceedings{TuModeling, @inproceedings{TuModeling,
author = {Zhaopeng Tu and author = {Zhaopeng Tu and
...@@ -4595,11 +4552,8 @@ author = {Yoshua Bengio and ...@@ -4595,11 +4552,8 @@ author = {Yoshua Bengio and
Xiaohua Liu and Xiaohua Liu and
Hang Li}, Hang Li},
title = {Modeling Coverage for Neural Machine Translation}, title = {Modeling Coverage for Neural Machine Translation},
publisher = {Proceedings of the 54th Annual Meeting of the Association for Computational publisher = {Annual Meeting of the Association for Computational Linguistics},
Linguistics, {ACL} 2016, August 7-12, 2016, Berlin, Germany, Volume year = {2016}
1: Long Papers},
//publisher = {The Association for Computer Linguistics},
year = {2016},
} }
@inproceedings{DBLP:journals/corr/SennrichFCBHHJL17, @inproceedings{DBLP:journals/corr/SennrichFCBHHJL17,
author = {Rico Sennrich and author = {Rico Sennrich and
...@@ -4614,23 +4568,17 @@ author = {Yoshua Bengio and ...@@ -4614,23 +4568,17 @@ author = {Yoshua Bengio and
Jozef Mokry and Jozef Mokry and
Maria Nadejde}, Maria Nadejde},
title = {Nematus: a Toolkit for Neural Machine Translation}, title = {Nematus: a Toolkit for Neural Machine Translation},
publisher = {Proceedings of the 15th Conference of the European Chapter of the publisher = {European Association of Computational Linguistics},
Association for Computational Linguistics, {EACL} 2017, Valencia,
Spain, April 3-7, 2017, Software Demonstrations},
pages = {65--68}, pages = {65--68},
//publisher = {Association for Computational Linguistics}, year = {2017}
year = {2017},
} }
@inproceedings{DBLP:journals/corr/abs-1905-13324, @inproceedings{DBLP:journals/corr/abs-1905-13324,
author = {Biao Zhang and author = {Biao Zhang and
Rico Sennrich}, Rico Sennrich},
title = {A Lightweight Recurrent Network for Sequence Modeling}, title = {A Lightweight Recurrent Network for Sequence Modeling},
publisher = {Proceedings of the 57th Conference of the Association for Computational publisher = {Annual Meeting of the Association for Computational Linguistics},
Linguistics, {ACL} 2019, Florence, Italy, July 28- August 2, 2019,
Volume 1: Long Papers},
pages = {1538--1548}, pages = {1538--1548},
//publisher = {Association for Computational Linguistics}, year = {2019}
year = {2019},
} }
@article{Lei2017TrainingRA, @article{Lei2017TrainingRA,
author = {Tao Lei and author = {Tao Lei and
...@@ -4639,7 +4587,7 @@ author = {Yoshua Bengio and ...@@ -4639,7 +4587,7 @@ author = {Yoshua Bengio and
title = {Training RNNs as Fast as CNNs}, title = {Training RNNs as Fast as CNNs},
journal = {CoRR}, journal = {CoRR},
volume = {abs/1709.02755}, volume = {abs/1709.02755},
year = {2017}, year = {2017}
} }
@inproceedings{Zhang2018SimplifyingNM, @inproceedings{Zhang2018SimplifyingNM,
author = {Biao Zhang and author = {Biao Zhang and
...@@ -4649,22 +4597,18 @@ author = {Yoshua Bengio and ...@@ -4649,22 +4597,18 @@ author = {Yoshua Bengio and
Huiji Zhang}, Huiji Zhang},
title = {Simplifying Neural Machine Translation with Addition-Subtraction Twin-Gated title = {Simplifying Neural Machine Translation with Addition-Subtraction Twin-Gated
Recurrent Networks}, Recurrent Networks},
publisher = {Proceedings of the 2018 Conference on Empirical Methods in Natural publisher = {Conference on Empirical Methods in Natural Language Processing},
Language Processing, Brussels, Belgium, October 31 - November 4, 2018},
pages = {4273--4283}, pages = {4273--4283},
//publisher = {Association for Computational Linguistics}, year = {2018}
year = {2018},
} }
@inproceedings{Liu_2019_CVPR, @inproceedings{Liu_2019_CVPR,
author = {Shikun Liu and author = {Shikun Liu and
Edward Johns and Edward Johns and
Andrew J. Davison}, Andrew J. Davison},
title = {End-To-End Multi-Task Learning With Attention}, title = {End-To-End Multi-Task Learning With Attention},
publisher = {{IEEE} Conference on Computer Vision and Pattern Recognition, {CVPR} publisher = {IEEE Conference on Computer Vision and Pattern Recognition},
2019, Long Beach, CA, USA, June 16-20, 2019},
pages = {1871--1880}, pages = {1871--1880},
//publisher = {Computer Vision Foundation / {IEEE}}, year = {2019}
year = {2019},
} }
@inproceedings{DBLP:journals/corr/abs-1811-00498, @inproceedings{DBLP:journals/corr/abs-1811-00498,
author = {Ra{\'{u}}l V{\'{a}}zquez and author = {Ra{\'{u}}l V{\'{a}}zquez and
...@@ -4672,11 +4616,9 @@ author = {Yoshua Bengio and ...@@ -4672,11 +4616,9 @@ author = {Yoshua Bengio and
J{\"{o}}rg Tiedemann and J{\"{o}}rg Tiedemann and
Mathias Creutz}, Mathias Creutz},
title = {Multilingual {NMT} with a Language-Independent Attention Bridge}, title = {Multilingual {NMT} with a Language-Independent Attention Bridge},
publisher = {Proceedings of the 4th Workshop on Representation Learning for NLP, publisher = {Annual Meeting of the Association for Computational Linguistics},
RepL4NLP@ACL 2019, Florence, Italy, August 2, 2019},
pages = {33--39}, pages = {33--39},
//publisher = {Association for Computational Linguistics}, year = {2019}
year = {2019},
} }
@inproceedings{MoradiInterrogating, @inproceedings{MoradiInterrogating,
author = {Pooya Moradi and author = {Pooya Moradi and
...@@ -4684,11 +4626,9 @@ author = {Yoshua Bengio and ...@@ -4684,11 +4626,9 @@ author = {Yoshua Bengio and
Anoop Sarkar}, Anoop Sarkar},
title = {Interrogating the Explanatory Power of Attention in Neural Machine title = {Interrogating the Explanatory Power of Attention in Neural Machine
Translation}, Translation},
publisher = {Proceedings of the 3rd Workshop on Neural Generation and Translation@EMNLP-IJCNLP publisher = {Conference on Empirical Methods in Natural Language Processing},
2019, Hong Kong, November 4, 2019},
pages = {221--230}, pages = {221--230},
//publisher = {Association for Computational Linguistics}, year = {2019}
year = {2019},
} }
@inproceedings{WangNeural, @inproceedings{WangNeural,
author = {Xing Wang and author = {Xing Wang and
...@@ -4698,11 +4638,9 @@ author = {Yoshua Bengio and ...@@ -4698,11 +4638,9 @@ author = {Yoshua Bengio and
Deyi Xiong and Deyi Xiong and
Min Zhang}, Min Zhang},
title = {Neural Machine Translation Advised by Statistical Machine Translation}, title = {Neural Machine Translation Advised by Statistical Machine Translation},
publisher = {Proceedings of the Thirty-First {AAAI} Conference on Artificial Intelligence, publisher = {AAAI Conference on Artificial Intelligence},
February 4-9, 2017, San Francisco, California, {USA}},
pages = {3330--3336}, pages = {3330--3336},
//publisher = {{AAAI} Press}, year = {2017}
year = {2017},
} }
@inproceedings{Xiao2019SharingAW, @inproceedings{Xiao2019SharingAW,
author = {Tong Xiao and author = {Tong Xiao and
...@@ -4711,12 +4649,9 @@ author = {Yoshua Bengio and ...@@ -4711,12 +4649,9 @@ author = {Yoshua Bengio and
Zhengtao Yu and Zhengtao Yu and
Tongran Liu}, Tongran Liu},
title = {Sharing Attention Weights for Fast Transformer}, title = {Sharing Attention Weights for Fast Transformer},
publisher = {Proceedings of the Twenty-Eighth International Joint Conference on publisher = {International Joint Conference on Artificial Intelligence},
Artificial Intelligence, {IJCAI} 2019, Macao, China, August 10-16,
2019},
pages = {5292--5298}, pages = {5292--5298},
//publisher = {ijcai.org}, year = {2019}
year = {2019},
} }
@inproceedings{Yang2017TowardsBH, @inproceedings{Yang2017TowardsBH,
author = {Baosong Yang and author = {Baosong Yang and
...@@ -4726,36 +4661,27 @@ author = {Yoshua Bengio and ...@@ -4726,36 +4661,27 @@ author = {Yoshua Bengio and
Jingbo Zhu}, Jingbo Zhu},
title = {Towards Bidirectional Hierarchical Representations for Attention-based title = {Towards Bidirectional Hierarchical Representations for Attention-based
Neural Machine Translation}, Neural Machine Translation},
publisher = {Proceedings of the 2017 Conference on Empirical Methods in Natural publisher = {Conference on Empirical Methods in Natural Language Processing},
Language Processing, {EMNLP} 2017, Copenhagen, Denmark, September
9-11, 2017},
pages = {1432--1441}, pages = {1432--1441},
//publisher = {Association for Computational Linguistics}, year = {2017}
year = {2017},
} }
@inproceedings{Wang2019TreeTI, @inproceedings{Wang2019TreeTI,
author = {Yau-Shian Wang and author = {Yau-Shian Wang and
Hung-yi Lee and Hung-yi Lee and
Yun-Nung Chen}, Yun-Nung Chen},
title = {Tree Transformer: Integrating Tree Structures into Self-Attention}, title = {Tree Transformer: Integrating Tree Structures into Self-Attention},
publisher = {Proceedings of the 2019 Conference on Empirical Methods in Natural publisher = {Conference on Empirical Methods in Natural Language Processing},
Language Processing and the 9th International Joint Conference on
Natural Language Processing, {EMNLP-IJCNLP} 2019, Hong Kong, China,
November 3-7, 2019},
//publisher = {Association for Computational Linguistics},
pages = {1061--1070}, pages = {1061--1070},
year = {2019}, year = {2019}
} }
@inproceedings{DBLP:journals/corr/abs-1809-01854, @inproceedings{DBLP:journals/corr/abs-1809-01854,
author = {Jetic Gu and author = {Jetic Gu and
Hassan S. Shavarani and Hassan S. Shavarani and
Anoop Sarkar}, Anoop Sarkar},
title = {Top-down Tree Structured Decoding with Syntactic Connections for Neural Machine Translation and Parsing}, title = {Top-down Tree Structured Decoding with Syntactic Connections for Neural Machine Translation and Parsing},
publisher = {Proceedings of the 2018 Conference on Empirical Methods in Natural publisher = {Conference on Empirical Methods in Natural Language Processing},
Language Processing, Brussels, Belgium, October 31 - November 4, 2018},
pages = {401--413}, pages = {401--413},
//publisher = {Association for Computational Linguistics}, year = {2018}
year = {2018},
} }
@inproceedings{DBLP:journals/corr/abs-1808-09374, @inproceedings{DBLP:journals/corr/abs-1808-09374,
author = {Xinyi Wang and author = {Xinyi Wang and
...@@ -4763,11 +4689,9 @@ author = {Yoshua Bengio and ...@@ -4763,11 +4689,9 @@ author = {Yoshua Bengio and
Pengcheng Yin and Pengcheng Yin and
Graham Neubig}, Graham Neubig},
title = {A Tree-based Decoder for Neural Machine Translation}, title = {A Tree-based Decoder for Neural Machine Translation},
publisher = {Proceedings of the 2018 Conference on Empirical Methods in Natural publisher = {Conference on Empirical Methods in Natural Language Processing},
Language Processing, Brussels, Belgium, October 31 - November 4, 2018},
pages = {4772--4777}, pages = {4772--4777},
//publisher = {Association for Computational Linguistics}, year = {2018}
year = {2018},
} }
@article{DBLP:journals/corr/ZhangZ16c, @article{DBLP:journals/corr/ZhangZ16c,
author = {Jiajun Zhang and author = {Jiajun Zhang and
...@@ -4775,7 +4699,7 @@ author = {Yoshua Bengio and ...@@ -4775,7 +4699,7 @@ author = {Yoshua Bengio and
title = {Bridging Neural Machine Translation and Bilingual Dictionaries}, title = {Bridging Neural Machine Translation and Bilingual Dictionaries},
journal = {CoRR}, journal = {CoRR},
volume = {abs/1610.07272}, volume = {abs/1610.07272},
year = {2016}, year = {2016}
} }
@article{Dai2019TransformerXLAL, @article{Dai2019TransformerXLAL,
author = {Zihang Dai and author = {Zihang Dai and
...@@ -4787,7 +4711,7 @@ author = {Yoshua Bengio and ...@@ -4787,7 +4711,7 @@ author = {Yoshua Bengio and
title = {Transformer-XL: Attentive Language Models Beyond a Fixed-Length Context}, title = {Transformer-XL: Attentive Language Models Beyond a Fixed-Length Context},
journal = {CoRR}, journal = {CoRR},
volume = {abs/1901.02860}, volume = {abs/1901.02860},
year = {2019}, year = {2019}
} }
@inproceedings{li-etal-2019-word, @inproceedings{li-etal-2019-word,
author = {Xintong Li and author = {Xintong Li and
...@@ -4796,12 +4720,9 @@ author = {Yoshua Bengio and ...@@ -4796,12 +4720,9 @@ author = {Yoshua Bengio and
Max Meng and Max Meng and
Shuming Shi}, Shuming Shi},
title = {On the Word Alignment from Neural Machine Translation}, title = {On the Word Alignment from Neural Machine Translation},
publisher = {Proceedings of the 57th Conference of the Association for Computational publisher = {Annual Meeting of the Association for Computational Linguistics},
Linguistics, {ACL} 2019, Florence, Italy, July 28- August 2, 2019,
Volume 1: Long Papers},
pages = {1293--1303}, pages = {1293--1303},
//publisher = {Association for Computational Linguistics}, year = {2019}
year = {2019},
} }
@inproceedings{Werlen2018DocumentLevelNM, @inproceedings{Werlen2018DocumentLevelNM,
...@@ -4811,11 +4732,9 @@ author = {Yoshua Bengio and ...@@ -4811,11 +4732,9 @@ author = {Yoshua Bengio and
James Henderson}, James Henderson},
title = {Document-Level Neural Machine Translation with Hierarchical Attention title = {Document-Level Neural Machine Translation with Hierarchical Attention
Networks}, Networks},
publisher = {Proceedings of the 2018 Conference on Empirical Methods in Natural publisher = {Conference on Empirical Methods in Natural Language Processing},
Language Processing, Brussels, Belgium, October 31 - November 4, 2018},
pages = {2947--2954}, pages = {2947--2954},
//publisher = {Association for Computational Linguistics}, year = {2018}
year = {2018},
} }
@inproceedings{DBLP:journals/corr/abs-1805-10163, @inproceedings{DBLP:journals/corr/abs-1805-10163,
author = {Elena Voita and author = {Elena Voita and
...@@ -4823,12 +4742,9 @@ author = {Yoshua Bengio and ...@@ -4823,12 +4742,9 @@ author = {Yoshua Bengio and
Rico Sennrich and Rico Sennrich and
Ivan Titov}, Ivan Titov},
title = {Context-Aware Neural Machine Translation Learns Anaphora Resolution}, title = {Context-Aware Neural Machine Translation Learns Anaphora Resolution},
publisher = {Proceedings of the 56th Annual Meeting of the Association for Computational publisher = {Annual Meeting of the Association for Computational Linguistics},
Linguistics, {ACL} 2018, Melbourne, Australia, July 15-20, 2018, Volume
1: Long Papers},
pages = {1264--1274}, pages = {1264--1274},
//publisher = {Association for Computational Linguistics}, year = {2018}
year = {2018},
} }
@article{DBLP:journals/corr/abs-1906-00532, @article{DBLP:journals/corr/abs-1906-00532,
author = {Aishwarya Bhandare and author = {Aishwarya Bhandare and
...@@ -4842,7 +4758,7 @@ author = {Yoshua Bengio and ...@@ -4842,7 +4758,7 @@ author = {Yoshua Bengio and
Translation Model}, Translation Model},
journal = {CoRR}, journal = {CoRR},
volume = {abs/1906.00532}, volume = {abs/1906.00532},
year = {2019}, year = {2019}
} }
@inproceedings{Zhang2018SpeedingUN, @inproceedings{Zhang2018SpeedingUN,
...@@ -4852,22 +4768,18 @@ author = {Yoshua Bengio and ...@@ -4852,22 +4768,18 @@ author = {Yoshua Bengio and
Lei Shen and Lei Shen and
Qun Liu}, Qun Liu},
title = {Speeding Up Neural Machine Translation Decoding by Cube Pruning}, title = {Speeding Up Neural Machine Translation Decoding by Cube Pruning},
publisher = {Proceedings of the 2018 Conference on Empirical Methods in Natural publisher = {Conference on Empirical Methods in Natural Language Processing},
Language Processing, Brussels, Belgium, October 31 - November 4, 2018},
pages = {4284--4294}, pages = {4284--4294},
//publisher = {Association for Computational Linguistics}, year = {2018}
year = {2018},
} }
@inproceedings{DBLP:journals/corr/SeeLM16, @inproceedings{DBLP:journals/corr/SeeLM16,
author = {Abigail See and author = {Abigail See and
Minh-Thang Luong and Minh-Thang Luong and
Christopher D. Manning}, Christopher D. Manning},
title = {Compression of Neural Machine Translation Models via Pruning}, title = {Compression of Neural Machine Translation Models via Pruning},
publisher = {Proceedings of the 20th {SIGNLL} Conference on Computational Natural publisher = {International Conference on Computational Linguistics},
Language Learning, CoNLL 2016, Berlin, Germany, August 11-12, 2016},
pages = {291--301}, pages = {291--301},
//publisher = {{ACL}}, year = {2016}
year = {2016},
} }
@inproceedings{DBLP:journals/corr/ChenLCL17, @inproceedings{DBLP:journals/corr/ChenLCL17,
author = {Yun Chen and author = {Yun Chen and
...@@ -4875,12 +4787,9 @@ author = {Yoshua Bengio and ...@@ -4875,12 +4787,9 @@ author = {Yoshua Bengio and
Yong Cheng and Yong Cheng and
Victor O. K. Li}, Victor O. K. Li},
title = {A Teacher-Student Framework for Zero-Resource Neural Machine Translation}, title = {A Teacher-Student Framework for Zero-Resource Neural Machine Translation},
publisher = {Proceedings of the 55th Annual Meeting of the Association for Computational
Linguistics, {ACL} 2017, Vancouver, Canada, July 30 - August 4, Volume
1: Long Papers},
pages = {1925--1935}, pages = {1925--1935},
//publisher = {Association for Computational Linguistics}, publisher = {Annual Meeting of the Association for Computational Linguistics},
year = {2017}, year = {2017}
} }
@article{Hinton2015Distilling, @article{Hinton2015Distilling,
author = {Geoffrey E. Hinton and author = {Geoffrey E. Hinton and
...@@ -4889,13 +4798,13 @@ author = {Yoshua Bengio and ...@@ -4889,13 +4798,13 @@ author = {Yoshua Bengio and
title = {Distilling the Knowledge in a Neural Network}, title = {Distilling the Knowledge in a Neural Network},
journal = {CoRR}, journal = {CoRR},
volume = {abs/1503.02531}, volume = {abs/1503.02531},
year = {2015}, year = {2015}
} }
@inproceedings{Ott2018ScalingNM, @inproceedings{Ott2018ScalingNM,
title={Scaling Neural Machine Translation}, title={Scaling Neural Machine Translation},
author={Myle Ott and Sergey Edunov and David Grangier and M. Auli}, author={Myle Ott and Sergey Edunov and David Grangier and M. Auli},
publisher={Workshop on Machine Translation}, publisher={Annual Meeting of the Association for Computational Linguistics},
year={2018} year={2018}
} }
@inproceedings{Lin2020TowardsF8, @inproceedings{Lin2020TowardsF8,
...@@ -4915,7 +4824,7 @@ author = {Yoshua Bengio and ...@@ -4915,7 +4824,7 @@ author = {Yoshua Bengio and
Alexander M. Rush}, Alexander M. Rush},
title = {Sequence-Level Knowledge Distillation}, title = {Sequence-Level Knowledge Distillation},
pages = {1317--1327}, pages = {1317--1327},
publisher = {The Association for Computational Linguistics}, publisher = {Annual Meeting of the Association for Computational Linguistics},
year = {2016} year = {2016}
} }
@article{Akaike1969autoregressive, @article{Akaike1969autoregressive,
...@@ -4946,13 +4855,13 @@ author = {Yoshua Bengio and ...@@ -4946,13 +4855,13 @@ author = {Yoshua Bengio and
title = {The Best of Both Worlds: Combining Recent Advances in Neural Machine title = {The Best of Both Worlds: Combining Recent Advances in Neural Machine
Translation}, Translation},
pages = {76--86}, pages = {76--86},
publisher = {Association for Computational Linguistics}, publisher = {Annual Meeting of the Association for Computational Linguistics},
year = {2018} year = {2018}
} }
@inproceedings{He2018LayerWiseCB, @inproceedings{He2018LayerWiseCB,
title={Layer-Wise Coordination between Encoder and Decoder for Neural Machine Translation}, title={Layer-Wise Coordination between Encoder and Decoder for Neural Machine Translation},
author={Tianyu He and X. Tan and Yingce Xia and D. He and T. Qin and Zhibo Chen and T. Liu}, author={Tianyu He and X. Tan and Yingce Xia and D. He and T. Qin and Zhibo Chen and T. Liu},
publisher={Conference and Workshop on Neural Information Processing Systems}, publisher={Conference on Neural Information Processing Systems},
year={2018} year={2018}
} }
@inproceedings{cho-etal-2014-properties, @inproceedings{cho-etal-2014-properties,
...@@ -4962,7 +4871,7 @@ author = {Yoshua Bengio and ...@@ -4962,7 +4871,7 @@ author = {Yoshua Bengio and
Yoshua Bengio}, Yoshua Bengio},
title = {On the Properties of Neural Machine Translation: Encoder-Decoder Approaches}, title = {On the Properties of Neural Machine Translation: Encoder-Decoder Approaches},
pages = {103--111}, pages = {103--111},
publisher = {Association for Computational Linguistics}, publisher = {Annual Meeting of the Association for Computational Linguistics},
year = {2014} year = {2014}
} }
...@@ -4973,7 +4882,7 @@ author = {Yoshua Bengio and ...@@ -4973,7 +4882,7 @@ author = {Yoshua Bengio and
Yoshua Bengio}, Yoshua Bengio},
title = {On Using Very Large Target Vocabulary for Neural Machine Translation}, title = {On Using Very Large Target Vocabulary for Neural Machine Translation},
pages = {1--10}, pages = {1--10},
publisher = {The Association for Computer Linguistics}, publisher = {Annual Meeting of the Association for Computational Linguistics},
year = {2015} year = {2015}
} }
...@@ -4982,8 +4891,7 @@ author = {Yoshua Bengio and ...@@ -4982,8 +4891,7 @@ author = {Yoshua Bengio and
Hieu Pham and Hieu Pham and
Christopher D. Manning}, Christopher D. Manning},
title = {Effective Approaches to Attention-based Neural Machine Translation}, title = {Effective Approaches to Attention-based Neural Machine Translation},
publisher = {Conference on Empirical Methods in Natural publisher = {Conference on Empirical Methods in Natural Language Processing},
Language Processing},
pages = {1412--1421}, pages = {1412--1421},
year = {2015} year = {2015}
} }
...@@ -4994,7 +4902,7 @@ author = {Yoshua Bengio and ...@@ -4994,7 +4902,7 @@ author = {Yoshua Bengio and
Haifeng Wang}, Haifeng Wang},
title = {Improved Neural Machine Translation with {SMT} Features}, title = {Improved Neural Machine Translation with {SMT} Features},
pages = {151--157}, pages = {151--157},
publisher = {the Association for the Advance of Artificial Intelligence}, publisher = {AAAI Conference on Artificial Intelligence},
year = {2016} year = {2016}
} }
@inproceedings{zhang-etal-2017-prior, @inproceedings{zhang-etal-2017-prior,
...@@ -5005,7 +4913,7 @@ author = {Yoshua Bengio and ...@@ -5005,7 +4913,7 @@ author = {Yoshua Bengio and
Xu, Jingfang and Xu, Jingfang and
Sun, Maosong}, Sun, Maosong},
year = {2017}, year = {2017},
publisher = {Association for Computational Linguistics}, publisher = {Annual Meeting of the Association for Computational Linguistics},
pages = {1514--1523}, pages = {1514--1523},
} }
...@@ -5021,7 +4929,7 @@ author = {Yoshua Bengio and ...@@ -5021,7 +4929,7 @@ author = {Yoshua Bengio and
title = {Bilingual Dictionary Based Neural Machine Translation without Using title = {Bilingual Dictionary Based Neural Machine Translation without Using
Parallel Sentences}, Parallel Sentences},
pages = {1570--1579}, pages = {1570--1579},
publisher = {Association for Computational Linguistics}, publisher = {Annual Meeting of the Association for Computational Linguistics},
year = {2020} year = {2020}
} }
...@@ -5030,7 +4938,7 @@ author = {Yoshua Bengio and ...@@ -5030,7 +4938,7 @@ author = {Yoshua Bengio and
Deyi Xiong}, Deyi Xiong},
title = {Encoding Gated Translation Memory into Neural Machine Translation}, title = {Encoding Gated Translation Memory into Neural Machine Translation},
pages = {3042--3047}, pages = {3042--3047},
publisher = {Association for Computational Linguistics}, publisher = {Annual Meeting of the Association for Computational Linguistics},
year = {2018} year = {2018}
} }
@inproceedings{yang-etal-2016-hierarchical, @inproceedings{yang-etal-2016-hierarchical,
...@@ -5042,7 +4950,7 @@ author = {Yoshua Bengio and ...@@ -5042,7 +4950,7 @@ author = {Yoshua Bengio and
Eduard H. Hovy}, Eduard H. Hovy},
title = {Hierarchical Attention Networks for Document Classification}, title = {Hierarchical Attention Networks for Document Classification},
pages = {1480--1489}, pages = {1480--1489},
publisher = {The Association for Computational Linguistics}, publisher = {Annual Meeting of the Association for Computational Linguistics},
year = {2016} year = {2016}
} }
%%%%% chapter 10------------------------------------------------------ %%%%% chapter 10------------------------------------------------------
...@@ -5056,7 +4964,7 @@ author = {Yoshua Bengio and ...@@ -5056,7 +4964,7 @@ author = {Yoshua Bengio and
Douwe Kiela}, Douwe Kiela},
title = {Code-Switched Named Entity Recognition with Embedding Attention}, title = {Code-Switched Named Entity Recognition with Embedding Attention},
pages = {154--158}, pages = {154--158},
publisher = {Association for Computational Linguistics}, publisher = {Annual Meeting of the Association for Computational Linguistics},
year = {2018} year = {2018}
} }
...@@ -5069,7 +4977,7 @@ author = {Yoshua Bengio and ...@@ -5069,7 +4977,7 @@ author = {Yoshua Bengio and
title = {Leveraging Linguistic Structures for Named Entity Recognition with title = {Leveraging Linguistic Structures for Named Entity Recognition with
Bidirectional Recursive Neural Networks}, Bidirectional Recursive Neural Networks},
pages = {2664--2669}, pages = {2664--2669},
publisher = {Association for Computational Linguistics}, publisher = {Annual Meeting of the Association for Computational Linguistics},
year = {2017} year = {2017}
} }
...@@ -5077,7 +4985,7 @@ author = {Yoshua Bengio and ...@@ -5077,7 +4985,7 @@ author = {Yoshua Bengio and
author = {Xuezhe Ma and author = {Xuezhe Ma and
Eduard H. Hovy}, Eduard H. Hovy},
title = {End-to-end Sequence Labeling via Bi-directional LSTM-CNNs-CRF}, title = {End-to-end Sequence Labeling via Bi-directional LSTM-CNNs-CRF},
publisher = {The Association for Computer Linguistics}, publisher = {Annual Meeting of the Association for Computational Linguistics},
year = {2016} year = {2016}
} }
...@@ -5088,7 +4996,7 @@ author = {Yoshua Bengio and ...@@ -5088,7 +4996,7 @@ author = {Yoshua Bengio and
Andrew McCallum}, Andrew McCallum},
title = {Fast and Accurate Entity Recognition with Iterated Dilated Convolutions}, title = {Fast and Accurate Entity Recognition with Iterated Dilated Convolutions},
pages = {2670--2680}, pages = {2670--2680},
publisher = {Association for Computational Linguistics}, publisher = {Annual Meeting of the Association for Computational Linguistics},
year = {2017} year = {2017}
} }
...@@ -5107,26 +5015,21 @@ author = {Yoshua Bengio and ...@@ -5107,26 +5015,21 @@ author = {Yoshua Bengio and
year = {2017} year = {2017}
} }
@article{DBLP:journals/jmlr/CollobertWBKKK11, @article{2011Natural,
author = {Ronan Collobert and title={Natural Language Processing (almost) from Scratch},
Jason Weston and author={ Collobert, Ronan and Weston, Jason and Bottou, Léon and Karlen, Michael and Kavukcuoglu, Koray and Kuksa, Pavel },
L{\'{e}}on Bottou and journal={Journal of Machine Learning Research},
Michael Karlen and volume={12},
Koray Kavukcuoglu and number={1},
Pavel P. Kuksa}, pages={2493-2537},
title = {Natural Language Processing (Almost) from Scratch}, year={2011},
journal = {J. Mach. Learn. Res.},
volume = {12},
pages = {2493--2537},
year = {2011}
} }
@inproceedings{DBLP:conf/acl/NguyenG15, @inproceedings{DBLP:conf/acl/NguyenG15,
author = {Thien Huu Nguyen and author = {Thien Huu Nguyen and
Ralph Grishman}, Ralph Grishman},
title = {Event Detection and Domain Adaptation with Convolutional Neural Networks}, title = {Event Detection and Domain Adaptation with Convolutional Neural Networks},
pages = {365--371}, pages = {365--371},
publisher = {The Association for Computer Linguistics}, publisher = {Annual Meeting of the Association for Computational Linguistics},
year = {2015} year = {2015}
} }
...@@ -5137,7 +5040,7 @@ author = {Yoshua Bengio and ...@@ -5137,7 +5040,7 @@ author = {Yoshua Bengio and
Jun Zhao}, Jun Zhao},
title = {Recurrent Convolutional Neural Networks for Text Classification}, title = {Recurrent Convolutional Neural Networks for Text Classification},
pages = {2267--2273}, pages = {2267--2273},
publisher = {the Association for the Advance of Artificial Intelligence}, publisher = {AAAI Conference on Artificial Intelligence},
year = {2015} year = {2015}
} }
...@@ -5149,7 +5052,7 @@ author = {Yoshua Bengio and ...@@ -5149,7 +5052,7 @@ author = {Yoshua Bengio and
Jun Zhao}, Jun Zhao},
title = {Event Extraction via Dynamic Multi-Pooling Convolutional Neural Networks}, title = {Event Extraction via Dynamic Multi-Pooling Convolutional Neural Networks},
pages = {167--176}, pages = {167--176},
publisher = {The Association for Computer Linguistics}, publisher = {Annual Meeting of the Association for Computational Linguistics},
year = {2015} year = {2015}
} }
...@@ -5159,7 +5062,7 @@ author = {Yoshua Bengio and ...@@ -5159,7 +5062,7 @@ author = {Yoshua Bengio and
Tommi S. Jaakkola}, Tommi S. Jaakkola},
title = {Molding CNNs for text: non-linear, non-consecutive convolutions}, title = {Molding CNNs for text: non-linear, non-consecutive convolutions},
pages = {1565--1575}, pages = {1565--1575},
publisher = {The Association for Computational Linguistics}, publisher = {Annual Meeting of the Association for Computational Linguistics},
year = {2015} year = {2015}
} }
...@@ -5169,7 +5072,7 @@ author = {Yoshua Bengio and ...@@ -5169,7 +5072,7 @@ author = {Yoshua Bengio and
title = {Effective Use of Word Order for Text Categorization with Convolutional title = {Effective Use of Word Order for Text Categorization with Convolutional
Neural Networks}, Neural Networks},
pages = {103--112}, pages = {103--112},
publisher = {The Association for Computational Linguistics}, publisher = {Annual Meeting of the Association for Computational Linguistics},
year = {2015} year = {2015}
} }
...@@ -5178,14 +5081,14 @@ author = {Yoshua Bengio and ...@@ -5178,14 +5081,14 @@ author = {Yoshua Bengio and
Ralph Grishman}, Ralph Grishman},
title = {Relation Extraction: Perspective from Convolutional Neural Networks}, title = {Relation Extraction: Perspective from Convolutional Neural Networks},
pages = {39--48}, pages = {39--48},
publisher = {The Association for Computational Linguistics}, publisher = {Annual Meeting of the Association for Computational Linguistics},
year = {2015} year = {2015}
} }
@article{StahlbergNeural, @article{StahlbergNeural,
title={Neural Machine Translation: A Review}, title={Neural Machine Translation: A Review},
author={Felix Stahlberg}, author={Felix Stahlberg},
journal={journal of artificial intelligence research}, journal={Journal of Artificial Intelligence Research},
year={2020}, year={2020},
volume={69}, volume={69},
pages={343-418} pages={343-418}
...@@ -5211,7 +5114,7 @@ author = {Yoshua Bengio and ...@@ -5211,7 +5114,7 @@ author = {Yoshua Bengio and
@article{Waibel1989PhonemeRU, @article{Waibel1989PhonemeRU,
title={Phoneme recognition using time-delay neural networks}, title={Phoneme recognition using time-delay neural networks},
author={Alexander H. Waibel and Toshiyuki Hanazawa and Geoffrey E. Hinton and K. Shikano and K. Lang}, author={Alexander H. Waibel and Toshiyuki Hanazawa and Geoffrey E. Hinton and K. Shikano and K. Lang},
journal={IEEE Trans. Acoust. Speech Signal Process.}, journal={IEEE Transactions on Acoustics, Speech, and Signal Processing},
year={1989}, year={1989},
volume={37}, volume={37},
pages={328-339} pages={328-339}
...@@ -5226,7 +5129,7 @@ author = {Yoshua Bengio and ...@@ -5226,7 +5129,7 @@ author = {Yoshua Bengio and
pages={541-551} pages={541-551}
} }
@ARTICLE{726791, @article{726791,
author={Y. {Lecun} and L. {Bottou} and Y. {Bengio} and P. {Haffner}}, author={Y. {Lecun} and L. {Bottou} and Y. {Bengio} and P. {Haffner}},
journal={Proceedings of the IEEE}, journal={Proceedings of the IEEE},
title={Gradient-based learning applied to document recognition}, title={Gradient-based learning applied to document recognition},
...@@ -5234,7 +5137,6 @@ author = {Yoshua Bengio and ...@@ -5234,7 +5137,6 @@ author = {Yoshua Bengio and
volume={86}, volume={86},
number={11}, number={11},
pages={2278-2324}, pages={2278-2324},
//doi={10.1109/5.726791}
} }
@inproceedings{DBLP:journals/corr/HeZRS15, @inproceedings{DBLP:journals/corr/HeZRS15,
...@@ -5262,7 +5164,7 @@ author = {Yoshua Bengio and ...@@ -5262,7 +5164,7 @@ author = {Yoshua Bengio and
@article{Girshick2015FastR, @article{Girshick2015FastR,
title={Fast R-CNN}, title={Fast R-CNN},
author={Ross B. Girshick}, author={Ross B. Girshick},
journal={2015 IEEE International Conference on Computer Vision (ICCV)}, journal={International Conference on Computer Vision},
year={2015}, year={2015},
pages={1440-1448} pages={1440-1448}
} }
...@@ -5279,7 +5181,7 @@ author = {Yoshua Bengio and ...@@ -5279,7 +5181,7 @@ author = {Yoshua Bengio and
@inproceedings{Kalchbrenner2014ACN, @inproceedings{Kalchbrenner2014ACN,
title={A Convolutional Neural Network for Modelling Sentences}, title={A Convolutional Neural Network for Modelling Sentences},
author={Nal Kalchbrenner and Edward Grefenstette and P. Blunsom}, author={Nal Kalchbrenner and Edward Grefenstette and P. Blunsom},
booktitle={ACL}, publisher={Annual Meeting of the Association for Computational Linguistics},
pages={655--665}, pages={655--665},
year={2014} year={2014}
} }
...@@ -5287,7 +5189,7 @@ author = {Yoshua Bengio and ...@@ -5287,7 +5189,7 @@ author = {Yoshua Bengio and
@inproceedings{Kim2014ConvolutionalNN, @inproceedings{Kim2014ConvolutionalNN,
title={Convolutional Neural Networks for Sentence Classification}, title={Convolutional Neural Networks for Sentence Classification},
author={Yoon Kim}, author={Yoon Kim},
booktitle={Proceedings of the 2014 Conference on Empirical Methods in Natural Language Processing}, publisher={Conference on Empirical Methods in Natural Language Processing},
pages = {1746--1751}, pages = {1746--1751},
year={2014} year={2014}
} }
...@@ -5299,7 +5201,7 @@ author = {Yoshua Bengio and ...@@ -5299,7 +5201,7 @@ author = {Yoshua Bengio and
Bowen Zhou and Bowen Zhou and
Bing Xiang}, Bing Xiang},
pages = {174--179}, pages = {174--179},
booktitle={The Association for Computer Linguistics}, publisher={Annual Meeting of the Association for Computational Linguistics},
year={2015} year={2015}
} }
...@@ -5308,7 +5210,7 @@ author = {Yoshua Bengio and ...@@ -5308,7 +5210,7 @@ author = {Yoshua Bengio and
author = {C{\'{\i}}cero Nogueira dos Santos and author = {C{\'{\i}}cero Nogueira dos Santos and
Maira Gatti}, Maira Gatti},
pages = {69--78}, pages = {69--78},
publisher = {The Association for Computer Linguistics}, publisher = {Annual Meeting of the Association for Computational Linguistics},
year={2014} year={2014}
} }
...@@ -5318,7 +5220,7 @@ author = {Yoshua Bengio and ...@@ -5318,7 +5220,7 @@ author = {Yoshua Bengio and
Angela Fan and Angela Fan and
Michael Auli and Michael Auli and
David Grangier}, David Grangier},
booktitle={Proceedings of the 34th International Conference on Machine Learning}, publisher={International Conference on Machine Learning},
volume = {70}, volume = {70},
pages = {933--941}, pages = {933--941},
year={2017} year={2017}
...@@ -5330,7 +5232,7 @@ author = {Yoshua Bengio and ...@@ -5330,7 +5232,7 @@ author = {Yoshua Bengio and
Michael Auli and Michael Auli and
David Grangier and David Grangier and
Yann N. Dauphin}, Yann N. Dauphin},
booktitle={The Association for Computer Linguistics}, publisher={Annual Meeting of the Association for Computational Linguistics},
pages = {123--135}, pages = {123--135},
year={2017} year={2017}
} }
...@@ -5353,7 +5255,7 @@ author = {Yoshua Bengio and ...@@ -5353,7 +5255,7 @@ author = {Yoshua Bengio and
author = {Lukasz Kaiser and author = {Lukasz Kaiser and
Aidan N. Gomez and Aidan N. Gomez and
Fran{\c{c}}ois Chollet}, Fran{\c{c}}ois Chollet},
publisher = {OpenReview.net}, journal = {International Conference on Learning Representations},
year={2018}, year={2018},
} }
...@@ -5364,7 +5266,7 @@ author = {Yoshua Bengio and ...@@ -5364,7 +5266,7 @@ author = {Yoshua Bengio and
Yann N. Dauphin and Yann N. Dauphin and
Michael Auli}, Michael Auli},
title = {Pay Less Attention with Lightweight and Dynamic Convolutions}, title = {Pay Less Attention with Lightweight and Dynamic Convolutions},
publisher = {7th International Conference on Learning Representations}, publisher = {International Conference on Learning Representations},
year = {2019}, year = {2019},
} }
...@@ -5421,7 +5323,7 @@ author = {Yoshua Bengio and ...@@ -5421,7 +5323,7 @@ author = {Yoshua Bengio and
Shaoqing Ren and Shaoqing Ren and
Jian Sun}, Jian Sun},
title = {Deep Residual Learning for Image Recognition}, title = {Deep Residual Learning for Image Recognition},
publisher = {{IEEE} Conference on Computer Vision and Pattern Recognition}, publisher = {IEEE Conference on Computer Vision and Pattern Recognition},
pages = {770--778}, pages = {770--778},
year = {2016}, year = {2016},
} }
...@@ -5432,26 +5334,26 @@ author = {Yoshua Bengio and ...@@ -5432,26 +5334,26 @@ author = {Yoshua Bengio and
Arthur Szlam and Arthur Szlam and
Jason Weston and Jason Weston and
Rob Fergus}, Rob Fergus},
booktitle={Conference and Workshop on Neural Information Processing Systems}, publisher={Conference on Neural Information Processing Systems},
pages = {2440--2448}, pages = {2440--2448},
year={2015} year={2015}
} }
@article{Islam2020HowMP, @inproceedings{Islam2020HowMP,
title={How Much Position Information Do Convolutional Neural Networks Encode?}, author = {Md. Amirul Islam and
author={Md. Amirul Islam and Sen Jia and Neil D. B. Bruce}, Sen Jia and
journal={ArXiv}, Neil D. B. Bruce},
year={2020}, title = {How much Position Information Do Convolutional Neural Networks Encode?},
volume={abs/2001.08248} publisher = {International Conference on Learning Representations},
year = {2020},
} }
@inproceedings{Sutskever2013OnTI, @inproceedings{Sutskever2013OnTI,
title={On the importance of initialization and momentum in deep learning}, title={On the importance of initialization and momentum in deep learning},
author = {Ilya Sutskever and author = {Ilya Sutskever and
James Martens and James Martens and
George E. Dahl and George E. Dahl and
Geoffrey E. Hinton}, Geoffrey E. Hinton},
booktitle={International Conference on Machine Learning}, publisher = {International Conference on Machine Learning},
pages = {1139--1147}, pages = {1139--1147},
year={2013} year={2013}
} }
...@@ -5459,7 +5361,7 @@ author = {Yoshua Bengio and ...@@ -5459,7 +5361,7 @@ author = {Yoshua Bengio and
@article{Bengio2013AdvancesIO, @article{Bengio2013AdvancesIO,
title={Advances in optimizing recurrent networks}, title={Advances in optimizing recurrent networks},
author={Yoshua Bengio and Nicolas Boulanger-Lewandowski and Razvan Pascanu}, author={Yoshua Bengio and Nicolas Boulanger-Lewandowski and Razvan Pascanu},
journal={2013 IEEE International Conference on Acoustics, Speech and Signal Processing}, journal={IEEE Transactions on Acoustics, Speech, and Signal Processing},
year={2013}, year={2013},
pages={8624-8628} pages={8624-8628}
} }
...@@ -5476,7 +5378,7 @@ author = {Yoshua Bengio and ...@@ -5476,7 +5378,7 @@ author = {Yoshua Bengio and
@article{Chollet2017XceptionDL, @article{Chollet2017XceptionDL,
title={Xception: Deep Learning with Depthwise Separable Convolutions}, title={Xception: Deep Learning with Depthwise Separable Convolutions},
author = {Fran{\c{c}}ois Chollet}, author = {Fran{\c{c}}ois Chollet},
journal={2017 IEEE Conference on Computer Vision and Pattern Recognition (CVPR)}, journal={IEEE Conference on Computer Vision and Pattern Recognition},
year={2017}, year={2017},
pages={1800-1807} pages={1800-1807}
} }
...@@ -5512,7 +5414,7 @@ author = {Yoshua Bengio and ...@@ -5512,7 +5414,7 @@ author = {Yoshua Bengio and
title={Rotation, Scaling and Deformation Invariant Scattering for Texture Discrimination}, title={Rotation, Scaling and Deformation Invariant Scattering for Texture Discrimination},
author = {Laurent Sifre and author = {Laurent Sifre and
St{\'{e}}phane Mallat}, St{\'{e}}phane Mallat},
journal={2013 IEEE Conference on Computer Vision and Pattern Recognition}, journal={IEEE Conference on Computer Vision and Pattern Recognition},
year={2013}, year={2013},
pages={1233-1240} pages={1233-1240}
} }
...@@ -5520,7 +5422,7 @@ author = {Yoshua Bengio and ...@@ -5520,7 +5422,7 @@ author = {Yoshua Bengio and
@article{Taigman2014DeepFaceCT, @article{Taigman2014DeepFaceCT,
title={DeepFace: Closing the Gap to Human-Level Performance in Face Verification}, title={DeepFace: Closing the Gap to Human-Level Performance in Face Verification},
author={Yaniv Taigman and Ming Yang and Marc'Aurelio Ranzato and Lior Wolf}, author={Yaniv Taigman and Ming Yang and Marc'Aurelio Ranzato and Lior Wolf},
journal={2014 IEEE Conference on Computer Vision and Pattern Recognition}, journal={IEEE Conference on Computer Vision and Pattern Recognition},
year={2014}, year={2014},
pages={1701-1708} pages={1701-1708}
} }
...@@ -5533,7 +5435,7 @@ author = {Yoshua Bengio and ...@@ -5533,7 +5435,7 @@ author = {Yoshua Bengio and
Mirk{\'{o}} Visontai and Mirk{\'{o}} Visontai and
Raziel Alvarez and Raziel Alvarez and
Carolina Parada}, Carolina Parada},
booktitle={the International Speech Communication Association}, publisher={Conference of the International Speech Communication Association},
pages = {1136--1140}, pages = {1136--1140},
year={2015} year={2015}
} }
...@@ -5546,7 +5448,7 @@ author = {Yoshua Bengio and ...@@ -5546,7 +5448,7 @@ author = {Yoshua Bengio and
Dongdong Chen and Dongdong Chen and
Lu Yuan and Lu Yuan and
Zicheng Liu}, Zicheng Liu},
publisher = {Institute of Electrical and Electronics Engineers}, journal = {IEEE Conference on Computer Vision and Pattern Recognition},
year={2020}, year={2020},
pages={11027-11036} pages={11027-11036}
} }
...@@ -5563,7 +5465,7 @@ author = {Yoshua Bengio and ...@@ -5563,7 +5465,7 @@ author = {Yoshua Bengio and
Chloe Hillier and Chloe Hillier and
Timothy P. Lillicrap}, Timothy P. Lillicrap},
title = {Compressive Transformers for Long-Range Sequence Modelling}, title = {Compressive Transformers for Long-Range Sequence Modelling},
publisher = {OpenReview.net}, publisher = {International Conference on Learning Representations},
year = {2020} year = {2020}
} }
...@@ -5597,7 +5499,7 @@ author = {Yoshua Bengio and ...@@ -5597,7 +5499,7 @@ author = {Yoshua Bengio and
Yujun Lin and Yujun Lin and
Song Han}, Song Han},
title = {Lite Transformer with Long-Short Range Attention}, title = {Lite Transformer with Long-Short Range Attention},
publisher = {OpenReview.net}, publisher = {International Conference on Learning Representations},
year = {2020} year = {2020}
} }
...@@ -5610,7 +5512,7 @@ author = {Yoshua Bengio and ...@@ -5610,7 +5512,7 @@ author = {Yoshua Bengio and
title = {Analyzing Multi-Head Self-Attention: Specialized Heads Do the Heavy title = {Analyzing Multi-Head Self-Attention: Specialized Heads Do the Heavy
Lifting, the Rest Can Be Pruned}, Lifting, the Rest Can Be Pruned},
pages = {5797--5808}, pages = {5797--5808},
publisher = {Association for Computational Linguistics}, publisher = {Annual Meeting of the Association for Computational Linguistics},
year = {2019}, year = {2019},
} }
...@@ -5623,7 +5525,7 @@ author = {Yoshua Bengio and ...@@ -5623,7 +5525,7 @@ author = {Yoshua Bengio and
Bowen Zhou and Bowen Zhou and
Yoshua Bengio}, Yoshua Bengio},
title = {A Structured Self-Attentive Sentence Embedding}, title = {A Structured Self-Attentive Sentence Embedding},
publisher = {5th International Conference on Learning Representations}, publisher = {International Conference on Learning Representations},
year = {2017}, year = {2017},
} }
@inproceedings{Shaw2018SelfAttentionWR, @inproceedings{Shaw2018SelfAttentionWR,
...@@ -5631,8 +5533,8 @@ author = {Yoshua Bengio and ...@@ -5631,8 +5533,8 @@ author = {Yoshua Bengio and
Jakob Uszkoreit and Jakob Uszkoreit and
Ashish Vaswani}, Ashish Vaswani},
title = {Self-Attention with Relative Position Representations}, title = {Self-Attention with Relative Position Representations},
publisher = {Proceedings of the 2018 Conference of the North American Chapter of publisher = {Proceedings of the Human Language Technology Conference of
the Association for Computational Linguistics: Human Language Technologies}, the North American Chapter of the Association for Computational Linguistics},
pages = {464--468}, pages = {464--468},
year = {2018}, year = {2018},
} }
...@@ -5642,7 +5544,7 @@ author = {Yoshua Bengio and ...@@ -5642,7 +5544,7 @@ author = {Yoshua Bengio and
Shaoqing Ren and Shaoqing Ren and
Jian Sun}, Jian Sun},
title = {Deep Residual Learning for Image Recognition}, title = {Deep Residual Learning for Image Recognition},
publisher = {{IEEE} Conference on Computer Vision and Pattern Recognition}, publisher = {IEEE Conference on Computer Vision and Pattern Recognition},
pages = {770--778}, pages = {770--778},
year = {2016}, year = {2016},
} }
...@@ -5661,7 +5563,7 @@ author = {Yoshua Bengio and ...@@ -5661,7 +5563,7 @@ author = {Yoshua Bengio and
Jonathon Shlens and Jonathon Shlens and
Zbigniew Wojna}, Zbigniew Wojna},
title = {Rethinking the Inception Architecture for Computer Vision}, title = {Rethinking the Inception Architecture for Computer Vision},
publisher = {{IEEE} Conference on Computer Vision and Pattern Recognition}, publisher = {IEEE Conference on Computer Vision and Pattern Recognition},
pages = {2818--2826}, pages = {2818--2826},
year = {2016}, year = {2016},
} }
...@@ -5670,8 +5572,7 @@ author = {Yoshua Bengio and ...@@ -5670,8 +5572,7 @@ author = {Yoshua Bengio and
Deyi Xiong and Deyi Xiong and
Jinsong Su}, Jinsong Su},
title = {Accelerating Neural Transformer via an Average Attention Network}, title = {Accelerating Neural Transformer via an Average Attention Network},
publisher = {Proceedings of the 56th Annual Meeting of the Association for Computational publisher = {Annual Meeting of the Association for Computational Linguistics},
Linguistics},
pages = {1789--1798}, pages = {1789--1798},
year = {2018}, year = {2018},
} }
...@@ -5691,7 +5592,7 @@ author = {Yoshua Bengio and ...@@ -5691,7 +5592,7 @@ author = {Yoshua Bengio and
Yann N. Dauphin and Yann N. Dauphin and
Michael Auli}, Michael Auli},
title = {Pay Less Attention with Lightweight and Dynamic Convolutions}, title = {Pay Less Attention with Lightweight and Dynamic Convolutions},
publisher = {7th International Conference on Learning Representations}, publisher = {International Conference on Learning Representations},
year = {2019}, year = {2019},
} }
...@@ -5704,7 +5605,7 @@ author = {Yoshua Bengio and ...@@ -5704,7 +5605,7 @@ author = {Yoshua Bengio and
Ruslan Salakhutdinov}, Ruslan Salakhutdinov},
title = {Transformer-XL: Attentive Language Models beyond a Fixed-Length Context}, title = {Transformer-XL: Attentive Language Models beyond a Fixed-Length Context},
pages = {2978--2988}, pages = {2978--2988},
publisher = {Association for Computational Linguistics}, publisher = {Annual Meeting of the Association for Computational Linguistics},
year = {2019} year = {2019}
} }
@article{Liu2020LearningTE, @article{Liu2020LearningTE,
...@@ -5729,7 +5630,7 @@ author = {Yoshua Bengio and ...@@ -5729,7 +5630,7 @@ author = {Yoshua Bengio and
Tong Zhang}, Tong Zhang},
title = {Modeling Localness for Self-Attention Networks}, title = {Modeling Localness for Self-Attention Networks},
pages = {4449--4458}, pages = {4449--4458},
publisher = {Association for Computational Linguistics}, publisher = {Annual Meeting of the Association for Computational Linguistics},
year = {2018} year = {2018}
} }
@inproceedings{DBLP:journals/corr/abs-1904-03107, @inproceedings{DBLP:journals/corr/abs-1904-03107,
...@@ -5740,7 +5641,7 @@ author = {Yoshua Bengio and ...@@ -5740,7 +5641,7 @@ author = {Yoshua Bengio and
Zhaopeng Tu}, Zhaopeng Tu},
title = {Convolutional Self-Attention Networks}, title = {Convolutional Self-Attention Networks},
pages = {4040--4045}, pages = {4040--4045},
publisher = {Association for Computational Linguistics}, publisher = {Annual Meeting of the Association for Computational Linguistics},
year = {2019}, year = {2019},
} }
@article{Wang2018MultilayerRF, @article{Wang2018MultilayerRF,
...@@ -5759,7 +5660,7 @@ author = {Yoshua Bengio and ...@@ -5759,7 +5660,7 @@ author = {Yoshua Bengio and
title = {Training Deeper Neural Machine Translation Models with Transparent title = {Training Deeper Neural Machine Translation Models with Transparent
Attention}, Attention},
pages = {3028--3033}, pages = {3028--3033},
publisher = {Association for Computational Linguistics}, publisher = {Annual Meeting of the Association for Computational Linguistics},
year = {2018} year = {2018}
} }
@inproceedings{Dou2018ExploitingDR, @inproceedings{Dou2018ExploitingDR,
...@@ -5770,7 +5671,7 @@ author = {Yoshua Bengio and ...@@ -5770,7 +5671,7 @@ author = {Yoshua Bengio and
Tong Zhang}, Tong Zhang},
title = {Exploiting Deep Representations for Neural Machine Translation}, title = {Exploiting Deep Representations for Neural Machine Translation},
pages = {4253--4262}, pages = {4253--4262},
publisher = {Association for Computational Linguistics}, publisher = {Annual Meeting of the Association for Computational Linguistics},
year = {2018} year = {2018}
} }
@inproceedings{Wang2019ExploitingSC, @inproceedings{Wang2019ExploitingSC,
...@@ -5789,13 +5690,13 @@ author = {Yoshua Bengio and ...@@ -5789,13 +5690,13 @@ author = {Yoshua Bengio and
Tong Zhang}, Tong Zhang},
title = {Dynamic Layer Aggregation for Neural Machine Translation with Routing-by-Agreement}, title = {Dynamic Layer Aggregation for Neural Machine Translation with Routing-by-Agreement},
pages = {86--93}, pages = {86--93},
publisher = {the Association for the Advance of Artificial Intelligence}, publisher = {AAAI Conference on Artificial Intelligence},
year = {2019} year = {2019}
} }
@inproceedings{Wei2020MultiscaleCD, @inproceedings{Wei2020MultiscaleCD,
title={Multiscale Collaborative Deep Models for Neural Machine Translation}, title={Multiscale Collaborative Deep Models for Neural Machine Translation},
author={Xiangpeng Wei and Heng Yu and Yue Hu and Yue Zhang and Rongxiang Weng and Weihua Luo}, author={Xiangpeng Wei and Heng Yu and Yue Hu and Yue Zhang and Rongxiang Weng and Weihua Luo},
booktitle={Annual Meeting of the Association for Computational Linguistics}, publisher={Annual Meeting of the Association for Computational Linguistics},
year={2020} year={2020}
} }
...@@ -5824,7 +5725,7 @@ author = {Yoshua Bengio and ...@@ -5824,7 +5725,7 @@ author = {Yoshua Bengio and
Lukasz Kaiser and Lukasz Kaiser and
Anselm Levskaya}, Anselm Levskaya},
title = {Reformer: The Efficient Transformer}, title = {Reformer: The Efficient Transformer},
publisher = {OpenReview.net}, journal = {International Conference on Learning Representations},
year = {2020} year = {2020}
} }
...@@ -5839,7 +5740,7 @@ author = {Yoshua Bengio and ...@@ -5839,7 +5740,7 @@ author = {Yoshua Bengio and
@article{li2020shallow, @article{li2020shallow,
title={Shallow-to-Deep Training for Neural Machine Translation}, title={Shallow-to-Deep Training for Neural Machine Translation},
author={Li, Bei and Wang, Ziyang and Liu, Hui and Jiang, Yufan and Du, Quan and Xiao, Tong and Wang, Huizhen and Zhu, Jingbo}, author={Li, Bei and Wang, Ziyang and Liu, Hui and Jiang, Yufan and Du, Quan and Xiao, Tong and Wang, Huizhen and Zhu, Jingbo},
publisher={Conference on Empirical Methods in Natural Language Processing}, journal={Conference on Empirical Methods in Natural Language Processing},
year={2020} year={2020}
} }
%%%%% chapter 12------------------------------------------------------ %%%%% chapter 12------------------------------------------------------
...@@ -6673,15 +6574,7 @@ author = {Yoshua Bengio and ...@@ -6673,15 +6574,7 @@ author = {Yoshua Bengio and
publisher = {Annual Meeting of the Annual Meeting of the Association for Computational Linguistics}, publisher = {Annual Meeting of the Annual Meeting of the Association for Computational Linguistics},
year = {2019} year = {2019}
} }
@inproceedings{DBLP:conf/naacl/MohiuddinJ19,
author = {Tasnim Mohiuddin and
Shafiq R. Joty},
title = {Revisiting Adversarial Autoencoder for Unsupervised Word Translation
with Cycle Consistency and Improved Training},
pages = {3857--3867},
publisher = {Annual Meeting of the Annual Meeting of the Association for Computational Linguistics},
year = {2019}
}
@article{DBLP:journals/corr/abs-1811-01124, @article{DBLP:journals/corr/abs-1811-01124,
author = {Jean Alaux and author = {Jean Alaux and
Edouard Grave and Edouard Grave and
...@@ -6896,394 +6789,6 @@ author = {Yoshua Bengio and ...@@ -6896,394 +6789,6 @@ author = {Yoshua Bengio and
publisher = {Annual Meeting of the Annual Meeting of the Association for Computational Linguistics}, publisher = {Annual Meeting of the Annual Meeting of the Association for Computational Linguistics},
year = {2019} year = {2019}
} }
@article{2019ADabre,
title={A Survey of Multilingual Neural Machine Translation},
author={Dabre, Raj and Chu, Chenhui and Kunchukuttan, Anoop },
year={2019},
}
@inproceedings{DBLP:conf/naacl/ZophK16,
author = {Barret Zoph and
Kevin Knight},
title = {Multi-Source Neural Translation},
pages = {30--34},
publisher = {Annual Conference of the North American Chapter of the Association for Computational Linguistics},
year = {2016}
}
@inproceedings{DBLP:conf/naacl/FiratCB16,
author = {Orhan Firat and
Kyunghyun Cho and
Yoshua Bengio},
title = {Multi-Way, Multilingual Neural Machine Translation with a Shared Attention
Mechanism},
pages = {866--875},
publisher = {Annual Meeting of the Association for Computational Linguistics},
year = {2016}
}
@article{DBLP:journals/tacl/JohnsonSLKWCTVW17,
author = {Melvin Johnson and
Mike Schuster and
Quoc V. Le and
Maxim Krikun and
Yonghui Wu and
Zhifeng Chen and
Nikhil Thorat and
Fernanda B. Vi{\'{e}}gas and
Martin Wattenberg and
Greg Corrado and
Macduff Hughes and
Jeffrey Dean},
title = {Google's Multilingual Neural Machine Translation System: Enabling
Zero-Shot Translation},
journal = {Trans. Assoc. Comput. Linguistics},
volume = {5},
pages = {339--351},
year = {2017}
}
@inproceedings{DBLP:conf/emnlp/KimPPKN19,
author = {Yunsu Kim and
Petre Petrov and
Pavel Petrushkov and
Shahram Khadivi and
Hermann Ney},
title = {Pivot-based Transfer Learning for Neural Machine Translation between
Non-English Languages},
pages = {866--876},
publisher = {Association for Computational Linguistics},
year = {2019}
}
@inproceedings{DBLP:conf/acl/ChenLCL17,
author = {Yun Chen and
Yang Liu and
Yong Cheng and
Victor O. K. Li},
title = {A Teacher-Student Framework for Zero-Resource Neural Machine Translation},
pages = {1925--1935},
publisher = {Association for Computational Linguistics},
year = {2017}
}
@article{DBLP:journals/mt/WuW07,
author = {Hua Wu and
Haifeng Wang},
title = {Pivot language approach for phrase-based statistical machine translation},
journal = {Mach. Transl.},
volume = {21},
number = {3},
pages = {165--181},
year = {2007}
}
@article{Farsi2010somayeh,
author = {Somayeh Bakhshaei and Shahram Khadivi and Noushin Riahi },
title = {Farsi-german statistical machine translation through bridge language},
publisher = {International Telecommunications Symposium},
pages = {165--181},
year = {2010}
}
@inproceedings{DBLP:conf/acl/ZahabiBK13,
author = {Samira Tofighi Zahabi and
Somayeh Bakhshaei and
Shahram Khadivi},
title = {Using Context Vectors in Improving a Machine Translation System with
Bridge Language},
pages = {318--322},
publisher = {The Association for Computer Linguistics},
year = {2013}
}
@inproceedings{DBLP:conf/emnlp/ZhuHWZWZ14,
author = {Xiaoning Zhu and
Zhongjun He and
Hua Wu and
Conghui Zhu and
Haifeng Wang and
Tiejun Zhao},
title = {Improving Pivot-Based Statistical Machine Translation by Pivoting
the Co-occurrence Count of Phrase Pairs},
pages = {1665--1675},
publisher = {{ACL}},
year = {2014}
}
@inproceedings{DBLP:conf/acl/MiuraNSTN15,
author = {Akiva Miura and
Graham Neubig and
Sakriani Sakti and
Tomoki Toda and
Satoshi Nakamura},
title = {Improving Pivot Translation by Remembering the Pivot},
pages = {573--577},
publisher = {The Association for Computer Linguistics},
year = {2015}
}
@inproceedings{DBLP:conf/acl/CohnL07,
author = {Trevor Cohn and
Mirella Lapata},
title = {Machine Translation by Triangulation: Making Effective Use of Multi-Parallel
Corpora},
publisher = {The Association for Computational Linguistics},
year = {2007}
}
@article{DBLP:journals/mt/WuW07,
author = {Hua Wu and
Haifeng Wang},
title = {Pivot language approach for phrase-based statistical machine translation},
journal = {Mach. Transl.},
volume = {21},
number = {3},
pages = {165--181},
year = {2007}
}
@inproceedings{DBLP:conf/acl/WuW09,
author = {Hua Wu and
Haifeng Wang},
title = {Revisiting Pivot Language Approach for Machine Translation},
pages = {154--162},
publisher = {The Association for Computer Linguistics},
year = {2009}
}
@article{DBLP:journals/corr/ChengLYSX16,
author = {Yong Cheng and
Yang Liu and
Qian Yang and
Maosong Sun and
Wei Xu},
title = {Neural Machine Translation with Pivot Languages},
journal = {CoRR},
volume = {abs/1611.04928},
year = {2016}
}
@inproceedings{DBLP:conf/interspeech/KauersVFW02,
author = {Manuel Kauers and
Stephan Vogel and
Christian F{\"{u}}gen and
Alex Waibel},
title = {Interlingua based statistical machine translation},
publisher = {International Symposium on Computer Architecture},
year = {2002}
}
@inproceedings{de2006catalan,
title={Catalan-English statistical machine translation without parallel corpus: bridging through Spanish},
author={De Gispert, Adri{\`a} and Marino, Jose B},
booktitle={Proc. of 5th International Conference on Language Resources and Evaluation (LREC)},
pages={65--68},
year={2006}
}
@inproceedings{DBLP:conf/naacl/UtiyamaI07,
author = {Masao Utiyama and
Hitoshi Isahara},
title = {A Comparison of Pivot Methods for Phrase-Based Statistical Machine
Translation},
pages = {484--491},
publisher = {The Association for Computational Linguistics},
year = {2007}
}
@inproceedings{DBLP:conf/ijcnlp/Costa-JussaHB11,
author = {Marta R. Costa-juss{\`{a}} and
Carlos A. Henr{\'{\i}}quez Q. and
Rafael E. Banchs},
title = {Enhancing scarce-resource language translation through pivot combinations},
pages = {1361--1365},
publisher = {The Association for Computer Linguistics},
year = {2011}
}
@article{DBLP:journals/corr/HintonVD15,
author = {Geoffrey E. Hinton and
Oriol Vinyals and
Jeffrey Dean},
title = {Distilling the Knowledge in a Neural Network},
journal = {CoRR},
volume = {abs/1503.02531},
year = {2015}
}
@article{gu2018meta,
title={Meta-learning for low-resource neural machine translation},
author={Gu, Jiatao and Wang, Yong and Chen, Yun and Cho, Kyunghyun and Li, Victor OK},
journal={arXiv preprint arXiv:1808.08437},
year={2018}
}
@inproceedings{DBLP:conf/naacl/GuHDL18,
author = {Jiatao Gu and
Hany Hassan and
Jacob Devlin and
Victor O. K. Li},
title = {Universal Neural Machine Translation for Extremely Low Resource Languages},
pages = {344--354},
publisher = {Association for Computational Linguistics},
year = {2018}
}
@inproceedings{DBLP:conf/icml/FinnAL17,
author = {Chelsea Finn and
Pieter Abbeel and
Sergey Levine},
title = {Model-Agnostic Meta-Learning for Fast Adaptation of Deep Networks},
series = {Proceedings of Machine Learning Research},
volume = {70},
pages = {1126--1135},
publisher = {International Conference on Machine Learning},
year = {2017}
}
@inproceedings{DBLP:conf/acl/DongWHYW15,
author = {Daxiang Dong and
Hua Wu and
Wei He and
Dianhai Yu and
Haifeng Wang},
title = {Multi-Task Learning for Multiple Language Translation},
pages = {1723--1732},
publisher = {The Association for Computer Linguistics},
year = {2015}
}
@article{DBLP:journals/tacl/LeeCH17,
author = {Jason Lee and
Kyunghyun Cho and
Thomas Hofmann},
title = {Fully Character-Level Neural Machine Translation without Explicit
Segmentation},
journal = {Trans. Assoc. Comput. Linguistics},
volume = {5},
pages = {365--378},
year = {2017}
}
@inproceedings{DBLP:conf/lrec/RiktersPK18,
author = {Matiss Rikters and
Marcis Pinnis and
Rihards Krislauks},
title = {Training and Adapting Multilingual {NMT} for Less-resourced and Morphologically
Rich Languages},
publisher = {European Language Resources Association},
year = {2018}
}
@article{DBLP:journals/tkde/PanY10,
author = {Sinno Jialin Pan and
Qiang Yang},
title = {A Survey on Transfer Learning},
journal = {{IEEE} Trans. Knowl. Data Eng.},
volume = {22},
number = {10},
pages = {1345--1359},
year = {2010}
}
@article{DBLP:journals/tacl/JohnsonSLKWCTVW17,
author = {Melvin Johnson and
Mike Schuster and
Quoc V. Le and
Maxim Krikun and
Yonghui Wu and
Zhifeng Chen and
Nikhil Thorat and
Fernanda B. Vi{\'{e}}gas and
Martin Wattenberg and
Greg Corrado and
Macduff Hughes and
Jeffrey Dean},
title = {Google's Multilingual Neural Machine Translation System: Enabling
Zero-Shot Translation},
journal = {Trans. Assoc. Comput. Linguistics},
volume = {5},
pages = {339--351},
year = {2017}
}
@book{2009Handbook,
title={Handbook Of Research On Machine Learning Applications and Trends: Algorithms, Methods and Techniques - 2 Volumes},
author={ Olivas, Emilio Soria and Guerrero, Jose David Martin and Sober, Marcelino Martinez and Benedito, Jose Rafael Magdalena and Lopez, Antonio Jose Serrano },
publisher={Information Science Reference - Imprint of: IGI Publishing},
year={2009},
}
@incollection{DBLP:books/crc/aggarwal14/Pan14,
author = {Sinno Jialin Pan},
title = {Transfer Learning},
booktitle = {Data Classification: Algorithms and Applications},
pages = {537--570},
publisher = {{CRC} Press},
year = {2014}
}
@inproceedings{DBLP:conf/iclr/TanRHQZL19,
author = {Xu Tan and
Yi Ren and
Di He and
Tao Qin and
Zhou Zhao and
Tie-Yan Liu},
title = {Multilingual Neural Machine Translation with Knowledge Distillation},
publisher = {OpenReview.net},
year = {2019}
}
@article{platanios2018contextual,
title={Contextual parameter generation for universal neural machine translation},
author={Platanios, Emmanouil Antonios and Sachan, Mrinmaya and Neubig, Graham and Mitchell, Tom},
journal={arXiv preprint arXiv:1808.08493},
year={2018}
}
@inproceedings{ji2020cross,
title={Cross-Lingual Pre-Training Based Transfer for Zero-Shot Neural Machine Translation},
author={Ji, Baijun and Zhang, Zhirui and Duan, Xiangyu and Zhang, Min and Chen, Boxing and Luo, Weihua},
booktitle={Proceedings of the AAAI Conference on Artificial Intelligence},
volume={34},
number={01},
pages={115--122},
year={2020}
}
@inproceedings{DBLP:conf/wmt/KocmiB18,
author = {Tom Kocmi and
Ondrej Bojar},
title = {Trivial Transfer Learning for Low-Resource Neural Machine Translation},
pages = {244--252},
publisher = {Association for Computational Linguistics},
year = {2018}
}
@inproceedings{DBLP:conf/acl/ZhangWTS20,
author = {Biao Zhang and
Philip Williams and
Ivan Titov and
Rico Sennrich},
title = {Improving Massively Multilingual Neural Machine Translation and Zero-Shot
Translation},
pages = {1628--1639},
publisher = {Association for Computational Linguistics},
year = {2020}
}
@inproceedings{DBLP:conf/naacl/PaulYSN09,
author = {Michael Paul and
Hirofumi Yamamoto and
Eiichiro Sumita and
Satoshi Nakamura},
title = {On the Importance of Pivot Language Selection for Statistical Machine
Translation},
pages = {221--224},
publisher = {The Association for Computational Linguistics},
year = {2009}
}
@article{dabre2019brief,
title={A Brief Survey of Multilingual Neural Machine Translation},
author={Dabre, Raj and Chu, Chenhui and Kunchukuttan, Anoop},
journal={arXiv preprint arXiv:1905.05395},
year={2019}
}
@article{dabre2020survey,
title={A survey of multilingual neural machine translation},
author={Dabre, Raj and Chu, Chenhui and Kunchukuttan, Anoop},
journal={ACM Computing Surveys (CSUR)},
volume={53},
number={5},
pages={1--38},
year={2020}
}
@inproceedings{DBLP:conf/emnlp/VulicGRK19,
author = {Ivan Vulic and
Goran Glavas and
Roi Reichart and
Anna Korhonen},
title = {Do We Really Need Fully Unsupervised Cross-Lingual Embeddings?},
pages = {4406--4417},
publisher = {Association for Computational Linguistics},
year = {2019}
}
@article{DBLP:journals/corr/MikolovLS13,
author = {Tomas Mikolov and
Quoc V. Le and
Ilya Sutskever},
title = {Exploiting Similarities among Languages for Machine Translation},
journal = {CoRR},
volume = {abs/1309.4168},
year = {2013}
}
%%%%% chapter 16------------------------------------------------------ %%%%% chapter 16------------------------------------------------------
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
......
Markdown 格式
0%
您添加了 0 到此讨论。请谨慎行事。
请先完成此评论的编辑!
注册 或者 后发表评论