Commit 7b394bd1 by 单韦乔

增加第一章、第二章参考文献和句法树

parent 30db2c09
...@@ -2,35 +2,30 @@ ...@@ -2,35 +2,30 @@
%%% 句法树(层次短语) %%% 句法树(层次短语)
\begin{tikzpicture} \begin{tikzpicture}
{\small {\small
\begin{scope}[sibling distance=10pt, level distance = 20pt] \begin{scope}[sibling distance=15pt, level distance = 20pt]
{\scriptsize {\scriptsize
\Tree[.\node(n1){\textbf{zj}}; \Tree[.\node(r){IP};
[.\node(n2){\textbf{dj}}; [.\node(n11){NP}; [.\node(n21){PN}; [.\node(l1){};]]]
[.\node(n3){\textbf{np}}; \node(cw1){}; ] [.\node(n12){VP};
[.\node(n4){\textbf{vp}}; [.\node(n22){BA}; \node(l2){}; ]
[. \node(cw1){pp}; [.\node(n23){IP};
[. \node(cw2){}; ] [. \node(n31){NP};
[. \node(cw3){np}; [. \node(n41){QP};
[. \node(cw4){mp}; [. \node(n51){CD}; \node(l3){};]
[. \node(p1){}; ] [. \node(n52){CLP}; [.\node(n61){M}; [.\node(l4){};]]]
[. \node(p2){}; ]
] ]
[. \node(cw5){np}; \node(p3){}; ] [. \node(n42){NP}; [.\node(n53){NN}; [.\node(l5){};]]]
] ]
[. \node(n32){VP};
[. \node(n43){VV}; \node(l6){放在};]
[. \node(n44){LCP};
[. \node(n54){NP}; [.\node(n62){NN}; [.\node(l7){};]]]
[. \node(n55){LC}; \node(l8){};]
] ]
[. \node(cw6){vp};
[. \node(cw7){}; ]
[. \node(cw8){pp};
[. \node(cw9){}; ]
[. \node(cw10){sp};
[. \node(cw11){}; ]
[. \node(cw12){}; ]
] ]
] ]
] ]
] [.\node(n13){PU}; \node(l9){};]
]
[.\node(n5){\textbf{}}; ]
] ]
} }
\end{scope} \end{scope}
......
...@@ -9,12 +9,12 @@ ...@@ -9,12 +9,12 @@
{ {
\begin{scope} \begin{scope}
{\scriptsize {\scriptsize
\node [anchor=north west] (example1) at (0,0) {\textbf{1:} 源=他\ \ \ ?}; \node [anchor=north west] (example1) at (0,0) {\textbf{1:} 源=他\ \ };
\node [anchor=north west] (example1part2) at ([yshift=0.2em]example1.south west) {\hspace{1em} 译=\ Where is he ?}; \node [anchor=north west] (example1part2) at ([yshift=0.2em]example1.south west) {\hspace{1em} 译=\ Where is he};
\node [anchor=north west] (example2) at ([yshift=0.1em]example1part2.south west) {\textbf{2:} 源=我\ 真高兴}; \node [anchor=north west] (example2) at ([yshift=0.1em]example1part2.south west) {\textbf{2:} 源=我\ 真高兴};
\node [anchor=north west] (example2part2) at ([yshift=0.2em]example2.south west) {\hspace{1em} 译=\ I'm so happy}; \node [anchor=north west] (example2part2) at ([yshift=0.2em]example2.south west) {\hspace{1em} 译=\ I'm so happy};
\node [anchor=north west] (example3) at ([yshift=0.1em]example2part2.south west) {\textbf{3:} 源=出发\ }; \node [anchor=north west] (example3) at ([yshift=0.1em]example2part2.south west) {\textbf{3:} 源=出发};
\node [anchor=north west] (example3part2) at ([yshift=0.2em]example3.south west) {\hspace{1em} 译=\ Let's go!}; \node [anchor=north west] (example3part2) at ([yshift=0.2em]example3.south west) {\hspace{1em} 译=\ Let's go};
\node [anchor=north west] (example4) at ([yshift=0.1em]example3part2.south west) {\hspace{1em} ...}; \node [anchor=north west] (example4) at ([yshift=0.1em]example3part2.south west) {\hspace{1em} ...};
\node [anchor=north west] (example5) at ([yshift=0.1em]example4.south west) {\hspace{1em}\quad}; \node [anchor=north west] (example5) at ([yshift=0.1em]example4.south west) {\hspace{1em}\quad};
\node [anchor=north west] (example6) at ([yshift=0.1em]example5.south west) {\hspace{1em}\quad}; \node [anchor=north west] (example6) at ([yshift=0.1em]example5.south west) {\hspace{1em}\quad};
...@@ -33,9 +33,9 @@ ...@@ -33,9 +33,9 @@
{ {
\begin{scope} [yshift=-1.55in] \begin{scope} [yshift=-1.55in]
{\scriptsize {\scriptsize
\node [anchor=north west] (entry1) at (0,0) {\textbf{1:} What is NiuTrans ?\qquad \qquad }; \node [anchor=north west] (entry1) at (0,0) {\textbf{1:} What is NiuTrans\qquad \qquad };
\node [anchor=north west] (entry2) at ([yshift=0.0em]entry1.south west) {\textbf{2:} Are you fulfilled ?}; \node [anchor=north west] (entry2) at ([yshift=0.0em]entry1.south west) {\textbf{2:} Are you fulfilled};
\node [anchor=north west] (entry3) at ([yshift=0.0em]entry2.south west) {\textbf{3:} Yes, you are right .}; \node [anchor=north west] (entry3) at ([yshift=0.0em]entry2.south west) {\textbf{3:} You are right};
\node [anchor=north west] (entry4) at ([yshift=0.0em]entry3.south west) {\hspace{1em} ...}; \node [anchor=north west] (entry4) at ([yshift=0.0em]entry3.south west) {\hspace{1em} ...};
\node [anchor=north west] (entry5) at ([yshift=0.1em]entry4.south west) {\hspace{1em}{\quad}}; \node [anchor=north west] (entry5) at ([yshift=0.1em]entry4.south west) {\hspace{1em}{\quad}};
\node [anchor=north west] (entry6) at ([xshift=11.6em,yshift=0.65em]entry5.south west) {}; \node [anchor=north west] (entry6) at ([xshift=11.6em,yshift=0.65em]entry5.south west) {};
...@@ -79,15 +79,16 @@ ...@@ -79,15 +79,16 @@
\begin{scope}[xshift=1.7in,yshift=-1.55in] \begin{scope}[xshift=1.7in,yshift=-1.55in]
{\scriptsize {\scriptsize
\node [anchor=north west] (ngram1) at (0,0) {$\textrm{Pr}(\textrm{I}) = 0.0001$}; \node [anchor=north west] (ngram1) at (0,0) {$\textrm{Pr}(\textrm{I}) = 0.0001$};
\node [anchor=north west] (ngram2) at ([yshift=0.0em]ngram1.south west) {$\textrm{Pr}(\textrm{I} \to \textrm{am}) = 0.623$}; \node [anchor=north west] (ngram2) at ([yshift=0.0em]ngram1.south west) {$\textrm{Pr}(\textrm{I}\ \textrm{am}) = 0.623$};
\node [anchor=north west] (ngram3) at ([yshift=0.0em]ngram2.south west) {$\textrm{Pr}(\textrm{I} \to \textrm{was}) = 0.21$}; \node [anchor=north west] (ngram3) at ([yshift=0.0em]ngram2.south west) {$\textrm{Pr}(\textrm{I}\ \textrm{was}) = 0.21$};
\node [anchor=north west] (ngram4) at ([yshift=-0.2em]ngram3.south west) {...}; \node [anchor=north west] (ngram4) at ([yshift=-0.2em]ngram3.south west) {...};
\node [anchor=north east] (ngrame) at ([yshift=-1in]phrase5.south east) {};
\node [anchor=south west] (lmlabel) at (ngram1.north west) {{\color{ublue} \small{语言模型}}}; \node [anchor=south west] (lmlabel) at (ngram1.north west) {{\color{ublue} \small{语言模型}}};
} }
\begin{pgfonlayer}{background} \begin{pgfonlayer}{background}
{ {
\node[rectangle,draw=ublue, inner sep=0mm] [fit = (ngram1) (ngram2) (ngram3) (ngram4) (lmlabel)] (langaugemodel) {}; \node[rectangle,draw=ublue, inner sep=0mm] [fit = (ngram1) (ngram2) (ngram3) (ngram4) (lmlabel) (ngrame)] (langaugemodel) {};
} }
\end{pgfonlayer} \end{pgfonlayer}
......
...@@ -10,8 +10,8 @@ ...@@ -10,8 +10,8 @@
\begin{scope} \begin{scope}
{ {
{\footnotesize {\footnotesize
\node [anchor=north west] (example1) at (0,0) {\textbf{1:} 源=什么\ 时候\ 开始\ ?}; \node [anchor=north west] (example1) at (0,0) {\textbf{1:} 源=什么\ 时候\ 开始};
\node [anchor=north west] (example1part2) at ([yshift=0.5em]example1.south west) {\hspace{1em} 译=\ When will it start ?}; \node [anchor=north west] (example1part2) at ([yshift=0.5em]example1.south west) {\hspace{1em} 译=\ When will it start};
\node [anchor=north west] (example2) at ([yshift=0.1em]example1part2.south west) {\textbf{2:} 源=我\ \ \ 感到\ 高兴}; \node [anchor=north west] (example2) at ([yshift=0.1em]example1part2.south west) {\textbf{2:} 源=我\ \ \ 感到\ 高兴};
\node [anchor=north west] (example2part2) at ([yshift=0.5em]example2.south west) {\hspace{1em} 译=\ I am happy with him}; \node [anchor=north west] (example2part2) at ([yshift=0.5em]example2.south west) {\hspace{1em} 译=\ I am happy with him};
\node [anchor=north west] (example3) at ([yshift=0.1em]example2part2.south west) {\hspace{1em} ...}; \node [anchor=north west] (example3) at ([yshift=0.1em]example2part2.south west) {\hspace{1em} ...};
...@@ -35,14 +35,12 @@ ...@@ -35,14 +35,12 @@
\node [anchor=north west] (entry3) at ([yshift=0.1em]entry2.south west) {\textbf{3:} 满意 \hspace{-0.4em} $\to$ \hspace{-0.4em} satisfy \hspace{-0.12em}$\mid$\hspace{-0.12em} satisfied ... }; \node [anchor=north west] (entry3) at ([yshift=0.1em]entry2.south west) {\textbf{3:} 满意 \hspace{-0.4em} $\to$ \hspace{-0.4em} satisfy \hspace{-0.12em}$\mid$\hspace{-0.12em} satisfied ... };
\node [anchor=north west] (entry4) at ([yshift=0.1em]entry3.south west) {\hspace{1em} ...}; \node [anchor=north west] (entry4) at ([yshift=0.1em]entry3.south west) {\hspace{1em} ...};
\node [anchor=south west] (dictionarylabel) at (entry1.north west) {{\color{ublue} 资源2:翻译词典}}; \node [anchor=south west] (dictionarylabel) at (entry1.north west) {{\color{ublue} 资源2:翻译词典}};
\node [anchor=west,opacity=0.0] (empty) at ([yshift=-0.2em]entry3.west) {\hspace{1em} 译=\ I am happy with him};
} }
} }
\begin{pgfonlayer}{background} \begin{pgfonlayer}{background}
{ {
\node[rectangle,draw=ublue, inner sep=0mm] [fit = (entry1) (entry2) (entry3) (entry4) (dictionarylabel) (empty)] {}; \node[rectangle,draw=ublue, inner sep=0mm] [fit = (entry1) (entry2) (entry3) (entry4) (dictionarylabel)] {};
} }
\end{pgfonlayer} \end{pgfonlayer}
......
...@@ -111,7 +111,7 @@ ...@@ -111,7 +111,7 @@
\parinterval 人工翻译已经存在了上千年,而机器翻译又起源于什么时候呢?机器翻译跌宕起伏的发展史可以分为萌芽期、受挫期、快速成长期和爆发期四个阶段。 \parinterval 人工翻译已经存在了上千年,而机器翻译又起源于什么时候呢?机器翻译跌宕起伏的发展史可以分为萌芽期、受挫期、快速成长期和爆发期四个阶段。
\parinterval 17世纪,Descartes提出世界语言的概念\upcite{knowlson1975universal},他希望使用统一符号表示不同语言、相同含义的词汇,以此来克服语言障碍,这种想法在当时是很超前的。随着语言学、计算机科学等学科的发展,在19世纪30年代使用计算模型进行自动翻译的思想开始萌芽,如当时法国科学家Georges Artsrouni就提出用机器来进行翻译的想法。只是那时依然没有合适的实现手段,所以这种想法的合理性无法被证实。 \parinterval 早在17世纪,如Descartes、Leibniz、Cave\ Beck、Athanasius\ Kircher和Johann\ Joachim\ Becher等很多学者就提出采用机器词典(电子词典)来克服语言障碍的想法\upcite{knowlson1975universal},这种想法在当时是很超前的。随着语言学、计算机科学等学科的发展,在19世纪30年代使用计算模型进行自动翻译的思想开始萌芽,如当时法国科学家Georges Artsrouni就提出用机器来进行翻译的想法。只是那时依然没有合适的实现手段,所以这种想法的合理性无法被证实。
\parinterval 随着第二次世界大战爆发, 对文字进行加密和解密成为重要的军事需求,这也使得数学和密码学变得相当发达。在战争结束一年后,世界上第一台通用电子数字计算机于1946年研制成功(图\ref{fig:1-4}),至此使用机器进行翻译有了真正实现的可能。 \parinterval 随着第二次世界大战爆发, 对文字进行加密和解密成为重要的军事需求,这也使得数学和密码学变得相当发达。在战争结束一年后,世界上第一台通用电子数字计算机于1946年研制成功(图\ref{fig:1-4}),至此使用机器进行翻译有了真正实现的可能。
...@@ -257,7 +257,7 @@ ...@@ -257,7 +257,7 @@
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
\parinterval 机器翻译技术大体上可以分为三种方法,分别为基于规则的机器翻译、统计机器翻译以及神经机器翻译。第一代机器翻译技术是主要使用基于规则的机器翻译方法,其主要思想是通过形式文法定义的规则引入源语言和目标语中的语言学知识。此类方法在机器翻译技术诞生之初就被人所关注,特别是在上世纪70年代,以基于规则方法为代表的专家系统是人工智能中最具代表性的研究领域。甚至到了统计机器翻译时代,很多系统中也大量地使用了基于规则的翻译知识表达形式。 \parinterval 机器翻译技术大体上可以分为三种方法,分别为基于规则的机器翻译、统计机器翻译以及神经机器翻译。第一代机器翻译技术是主要使用基于规则的机器翻译方法,其主要思想是通过形式文法定义的规则引入源语言和目标语中的语言学知识。此类方法在机器翻译技术诞生之初就被人所关注,特别是在上世纪70年代,以基于规则方法为代表的专家系统是人工智能中最具代表性的研究领域。甚至到了统计机器翻译时代,很多系统中也大量地使用了基于规则的翻译知识表达形式。
\parinterval 早期,基于规则的机器翻译大多依赖人工定义及书写的规则。主要有两类方法\upcite{nirenburg1989knowledge}:一类是基于转换规则的机器翻译方法,简称转换法。另一类是基于中间语言的方法。它们都以词典和人工书写的规则库作为翻译知识,用一系列规则的组合完成翻译。 \parinterval 早期,基于规则的机器翻译大多依赖人工定义及书写的规则。主要有两类方法\upcite{nirenburg1989knowledge,hutchins1986machine,zarechnak1979history}:一类是基于转换规则的机器翻译方法,简称转换法。另一类是基于中间语言的方法。它们都以词典和人工书写的规则库作为翻译知识,用一系列规则的组合完成翻译。
%---------------------------------------------------------------------------------------- %----------------------------------------------------------------------------------------
% NEW SUB-SECTION % NEW SUB-SECTION
......
...@@ -925,7 +925,7 @@ c_{\textrm{KN}}(\cdot) = \left\{\begin{array}{ll} ...@@ -925,7 +925,7 @@ c_{\textrm{KN}}(\cdot) = \left\{\begin{array}{ll}
\end{figure} \end{figure}
%------------------------------------------- %-------------------------------------------
\parinterval 这样,语言模型的打分与解空间树的遍历就融合了在一起。于是,序列生成的问题可以被重新描述为:寻找所有单词序列组成的解空间树中权重总和最大的一条路径。在这个定义下,前面提到的两种枚举词序列的方法就是经典的{\small\bfnew{深度优先搜索}}\index{深度优先搜索}(Depth-first Search)\upcite{even2011graph}\index{Depth-first Search}{\small\bfnew{宽度优先搜索}}\index{宽度优先搜索}(Breadth-first Search)\upcite{lee1961an}\index{Breadth-first Search}的雏形。在后面的内容中可以看到,从遍历解空间树的角度出发,可以对原始这些搜索策略的效率进行优化。 \parinterval 这样,语言模型的打分与解空间树的遍历就融合了在一起。于是,序列生成的问题可以被重新描述为:寻找所有单词序列组成的解空间树中权重总和最大的一条路径。在这个定义下,前面提到的两种枚举词序列的方法就是经典的{\small\bfnew{深度优先搜索}}\index{深度优先搜索}(Depth-first Search)\index{Depth-first Search}{\small\bfnew{宽度优先搜索}}\index{宽度优先搜索}(Breadth-first Search)\index{Breadth-first Search}的雏形\upcite{even2011graph,tarjan1972depth}。在后面的内容中可以看到,从遍历解空间树的角度出发,可以对原始这些搜索策略的效率进行优化。
%---------------------------------------------------------------------------------------- %----------------------------------------------------------------------------------------
% NEW SUB-SECTION % NEW SUB-SECTION
...@@ -1033,7 +1033,7 @@ c_{\textrm{KN}}(\cdot) = \left\{\begin{array}{ll} ...@@ -1033,7 +1033,7 @@ c_{\textrm{KN}}(\cdot) = \left\{\begin{array}{ll}
\parinterval 束搜索也有很多的改进版本。回忆一下,在无信息搜索策略中可以使用剪枝技术来提升搜索的效率。而实际上,束搜索本身也是一种剪枝方法。因此有时也把束搜索称作{\small\bfnew{束剪枝}}\index{束剪枝}(Beam Pruning)\index{Beam Pruning}。在这里有很多其它的剪枝策略可供选择,例如可以只保留与当前最佳路径得分相差在$\theta$之内的路径,也就是搜索只保留得分差距在一定范围内的路径,这种方法也被称作{\small\bfnew{直方图剪枝}}\index{直方图剪枝}(Histogram Pruning)\index{Histogram Pruning} \parinterval 束搜索也有很多的改进版本。回忆一下,在无信息搜索策略中可以使用剪枝技术来提升搜索的效率。而实际上,束搜索本身也是一种剪枝方法。因此有时也把束搜索称作{\small\bfnew{束剪枝}}\index{束剪枝}(Beam Pruning)\index{Beam Pruning}。在这里有很多其它的剪枝策略可供选择,例如可以只保留与当前最佳路径得分相差在$\theta$之内的路径,也就是搜索只保留得分差距在一定范围内的路径,这种方法也被称作{\small\bfnew{直方图剪枝}}\index{直方图剪枝}(Histogram Pruning)\index{Histogram Pruning}
\parinterval 对于语言模型来说,当多个路径中最高得分比当前搜索到的最好的解的得分低时,可以立刻停止搜索。因为此时序列越长语言模型得分$\log \funp{P}(w_1 w_2 \ldots w_m)$会越低,继续扩展这些路径不会产生更好的结果。这个技术通常也被称为{\small\bfnew{最佳停止条件}}\index{最佳停止条件}(Optimal Stopping Criteria)\index{Optimal Stopping Criteria}。类似的思想也被用于机器翻译等任务{\color{red} (此处引用liang huang的论文)} \parinterval 对于语言模型来说,当多个路径中最高得分比当前搜索到的最好的解的得分低时,可以立刻停止搜索。因为此时序列越长语言模型得分$\log \funp{P}(w_1 w_2 \ldots w_m)$会越低,继续扩展这些路径不会产生更好的结果。这个技术通常也被称为{\small\bfnew{最佳停止条件}}\index{最佳停止条件}(Optimal Stopping Criteria)\index{Optimal Stopping Criteria}。类似的思想也被用于机器翻译等任务\upcite{DBLP:conf/emnlp/HuangZM17,DBLP:conf/emnlp/Yang0M18}
\parinterval 总的来说,虽然局部搜索由于没有遍历完整的解空间,使得这类方法无法保证找到最优解。但是,局部搜索算法大大降低了搜索过程的时间、空间复杂度。因此在语言模型生成和机器翻译的解码过程中常常使用局部搜索算法。在{\chapterseven}{\chapterten}中还将介绍这些算法的具体应用。 \parinterval 总的来说,虽然局部搜索由于没有遍历完整的解空间,使得这类方法无法保证找到最优解。但是,局部搜索算法大大降低了搜索过程的时间、空间复杂度。因此在语言模型生成和机器翻译的解码过程中常常使用局部搜索算法。在{\chapterseven}{\chapterten}中还将介绍这些算法的具体应用。
...@@ -1057,7 +1057,7 @@ c_{\textrm{KN}}(\cdot) = \left\{\begin{array}{ll} ...@@ -1057,7 +1057,7 @@ c_{\textrm{KN}}(\cdot) = \left\{\begin{array}{ll}
\vspace{0.5em} \vspace{0.5em}
\item 本章更多地关注了语言模型的基本问题和求解思路,但是基于$n$-gram的方法并不是语言建模的唯一方法。从现在自然语言处理的前沿看,端到端的深度学习方法在很多任务中都取得了领先的性能。语言模型同样可以使用这些方法\upcite{jing2019a},而且在近些年取得了巨大成功。例如,最早提出的前馈神经语言模型\upcite{bengio2003a}和后来的基于循环单元的语言模型\upcite{mikolov2010recurrent}、基于长短期记忆单元的语言模型\upcite{sundermeyer2012lstm}以及现在非常流行的Transformer\upcite{vaswani2017attention}。 关于神经语言模型的内容,会在{\chapternine}进行进一步介绍。 \item 本章更多地关注了语言模型的基本问题和求解思路,但是基于$n$-gram的方法并不是语言建模的唯一方法。从现在自然语言处理的前沿看,端到端的深度学习方法在很多任务中都取得了领先的性能。语言模型同样可以使用这些方法\upcite{jing2019a},而且在近些年取得了巨大成功。例如,最早提出的前馈神经语言模型\upcite{bengio2003a}和后来的基于循环单元的语言模型\upcite{mikolov2010recurrent}、基于长短期记忆单元的语言模型\upcite{sundermeyer2012lstm}以及现在非常流行的Transformer\upcite{vaswani2017attention}。 关于神经语言模型的内容,会在{\chapternine}进行进一步介绍。
\vspace{0.5em} \vspace{0.5em}
\item 最后,本章结合语言模型的序列生成任务对搜索技术进行了介绍。类似地,机器翻译任务也需要从大量的翻译后选中快速寻找最优译文。因此在机器翻译任务中也使用了搜索方法,这个过程通常被称作{\small\bfnew{解码}}\index{解码}(Decoding)\index{Decoding}。例如,有研究者在基于词的翻译模型中尝试使用启发式搜索\upcite{DBLP:conf/acl/OchUN01,DBLP:conf/acl/WangW97,tillmann1997a}以及贪婪搜索方法\upcite{germann2001fast}\upcite{germann2003greedy},也有研究者研究基于短语的栈解码方法{\color{red}(引用Moses)}。此外,解码方法还包括有限状态机解码\upcite{bangalore2001a}\upcite{bangalore2000stochastic}以及基于语言学约束的解码\upcite{venugopal2007an}{\color{red}(找SAMT、树到串、串到树翻译、Hiero的论文)}。相关内容将在{\chaptereight}{\chapterfourteen} 进行介绍。 \item 最后,本章结合语言模型的序列生成任务对搜索技术进行了介绍。类似地,机器翻译任务也需要从大量的翻译后选中快速寻找最优译文。因此在机器翻译任务中也使用了搜索方法,这个过程通常被称作{\small\bfnew{解码}}\index{解码}(Decoding)\index{Decoding}。例如,有研究者在基于词的翻译模型中尝试使用启发式搜索\upcite{DBLP:conf/acl/OchUN01,DBLP:conf/acl/WangW97,tillmann1997a}以及贪婪搜索方法\upcite{germann2001fast}\upcite{germann2003greedy},也有研究者研究基于短语的栈解码方法\upcite{Koehn2007Moses,DBLP:conf/amta/Koehn04}。此外,解码方法还包括有限状态机解码\upcite{bangalore2001a}\upcite{bangalore2000stochastic}以及基于语言学约束的解码\upcite{venugopal2007an,zollmann2007the,liu2006tree,galley2006scalable,chiang2005a}。相关内容将在{\chaptereight}{\chapterfourteen} 进行介绍。
\vspace{0.5em} \vspace{0.5em}
\end{itemize} \end{itemize}
\end{adjustwidth} \end{adjustwidth}
...@@ -718,6 +718,22 @@ ...@@ -718,6 +718,22 @@
publisher={Springer} publisher={Springer}
} }
@book{hutchins1986machine,
title={Machine translation: past, present, future},
author={Hutchins, William John},
year={1986},
publisher={Ellis Horwood Chichester}
}
@article{zarechnak1979history,
title={The history of machine translation},
author={Zarechnak, Michael},
journal={Machine Translation},
volume={1979},
pages={1--87},
year={1979}
}
@article{jurafsky2000speech, @article{jurafsky2000speech,
title="Speech and Language Processing: An Introduction to Natural Language Processing, Computational Linguistics, and Speech Recognition", title="Speech and Language Processing: An Introduction to Natural Language Processing, Computational Linguistics, and Speech Recognition",
author="Daniel {Jurafsky} and James H. {Martin}", author="Daniel {Jurafsky} and James H. {Martin}",
...@@ -1697,16 +1713,6 @@ ...@@ -1697,16 +1713,6 @@
year={2000} year={2000}
} }
@article{lee1961an,
title="An Algorithm for Path Connections and Its Applications",
author="C. Y. {Lee}",
journal="Ire Transactions on Electronic Computers",
volume="10",
number="3",
pages="346--365",
year="1961"
}
@book{even2011graph, @book{even2011graph,
title={Graph algorithms}, title={Graph algorithms},
author={Even, Shimon}, author={Even, Shimon},
...@@ -1714,6 +1720,49 @@ ...@@ -1714,6 +1720,49 @@
publisher={Cambridge University Press} publisher={Cambridge University Press}
} }
@article{tarjan1972depth,
title={Depth-First Search and Linear Graph Algorithms},
author={Robert Endre {Tarjan}},
journal={SIAM Journal on Computing},
volume={1},
number={2},
pages={146--160},
year={1972}
}
@inproceedings{DBLP:conf/emnlp/Yang0M18,
author = {Yilin Yang and
Liang Huang and
Mingbo Ma},
editor = {Ellen Riloff and
David Chiang and
Julia Hockenmaier and
Jun'ichi Tsujii},
title = {Breaking the Beam Search Curse: {A} Study of (Re-)Scoring Methods
and Stopping Criteria for Neural Machine Translation},
booktitle = {Proceedings of the 2018 Conference on Empirical Methods in Natural
Language Processing, Brussels, Belgium, October 31 - November 4, 2018},
pages = {3054--3059},
publisher = {Association for Computational Linguistics},
year = {2018}
}
@inproceedings{DBLP:conf/amta/Koehn04,
author = {Philipp Koehn},
editor = {Robert E. Frederking and
Kathryn Taylor},
title = {Pharaoh: {A} Beam Search Decoder for Phrase-Based Statistical Machine
Translation Models},
booktitle = {Machine Translation: From Real Users to Research, 6th Conference of
the Association for Machine Translation in the Americas, {AMTA} 2004,
Washington, DC, USA, September 28-October 2, 2004, Proceedings},
series = {Lecture Notes in Computer Science},
volume = {3265},
pages = {115--124},
publisher = {Springer},
year = {2004}
}
%%%%% chapter 2------------------------------------------------------ %%%%% chapter 2------------------------------------------------------
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
......
...@@ -122,6 +122,22 @@ ...@@ -122,6 +122,22 @@
publisher={Springer} publisher={Springer}
} }
@book{hutchins1986machine,
title={Machine translation: past, present, future},
author={Hutchins, William John},
year={1986},
publisher={Ellis Horwood Chichester}
}
@article{zarechnak1979history,
title={The history of machine translation},
author={Zarechnak, Michael},
journal={Machine Translation},
volume={1979},
pages={1--87},
year={1979}
}
@book{冯志伟2004机器翻译研究, @book{冯志伟2004机器翻译研究,
title={机器翻译研究}, title={机器翻译研究},
author={冯志伟}, author={冯志伟},
...@@ -176,6 +192,45 @@ ...@@ -176,6 +192,45 @@
year={1993} year={1993}
} }
@article{Wu2016GooglesNM,
author = {Yonghui Wu and
Mike Schuster and
Zhifeng Chen and
Quoc V. Le and
Mohammad Norouzi and
Wolfgang Macherey and
Maxim Krikun and
Yuan Cao and
Qin Gao and
Klaus Macherey and
Jeff Klingner and
Apurva Shah and
Melvin Johnson and
Xiaobing Liu and
Lukasz Kaiser and
Stephan Gouws and
Yoshikiyo Kato and
Taku Kudo and
Hideto Kazawa and
Keith Stevens and
George Kurian and
Nishant Patil and
Wei Wang and
Cliff Young and
Jason Smith and
Jason Riesa and
Alex Rudnick and
Oriol Vinyals and
Greg Corrado and
Macduff Hughes and
Jeffrey Dean},
title = {Google's Neural Machine Translation System: Bridging the Gap between
Human and Machine Translation},
journal = {CoRR},
volume = {abs/1609.08144},
year = {2016}
}
@inproceedings{DBLP:journals/corr/LuongPM15, @inproceedings{DBLP:journals/corr/LuongPM15,
author = {Thang Luong and author = {Thang Luong and
Hieu Pham and Hieu Pham and
...@@ -502,14 +557,14 @@ ...@@ -502,14 +557,14 @@
publisher={Cambridge University Press} publisher={Cambridge University Press}
} }
@article{lee1961an, @article{tarjan1972depth,
title="An Algorithm for Path Connections and Its Applications", title={Depth-First Search and Linear Graph Algorithms},
author="C. Y. {Lee}", author={Robert Endre {Tarjan}},
journal="Ire Transactions on Electronic Computers", journal={SIAM Journal on Computing},
volume="10", volume={1},
number="3", number={2},
pages="346--365", pages={146--160},
year="1961" year={1972}
} }
@article{DBLP:journals/ai/SabharwalS11, @article{DBLP:journals/ai/SabharwalS11,
...@@ -574,6 +629,37 @@ ...@@ -574,6 +629,37 @@
pages={189--211}, pages={189--211},
year={1990} year={1990}
} }
@inproceedings{DBLP:conf/emnlp/HuangZM17,
author = {Liang Huang and
Kai Zhao and
Mingbo Ma},
title = {When to Finish? Optimal Beam Search for Neural Text Generation (modulo
beam size)},
booktitle = {Proceedings of the 2017 Conference on Empirical Methods in Natural
Language Processing, {EMNLP} 2017, Copenhagen, Denmark, September
9-11, 2017},
pages = {2134--2139},
publisher = {Association for Computational Linguistics},
year = {2017}
}
@inproceedings{DBLP:conf/emnlp/Yang0M18,
author = {Yilin Yang and
Liang Huang and
Mingbo Ma},
editor = {Ellen Riloff and
David Chiang and
Julia Hockenmaier and
Jun'ichi Tsujii},
title = {Breaking the Beam Search Curse: {A} Study of (Re-)Scoring Methods
and Stopping Criteria for Neural Machine Translation},
booktitle = {Proceedings of the 2018 Conference on Empirical Methods in Natural
Language Processing, Brussels, Belgium, October 31 - November 4, 2018},
pages = {3054--3059},
publisher = {Association for Computational Linguistics},
year = {2018}
}
%缩写 %缩写
@article{jelinek1980interpolated, @article{jelinek1980interpolated,
title={Interpolated estimation of Markov source parameters from sparse data}, title={Interpolated estimation of Markov source parameters from sparse data},
...@@ -795,6 +881,47 @@ ...@@ -795,6 +881,47 @@
year={2003} year={2003}
} }
@inproceedings{Koehn2007Moses,
author = {Philipp Koehn and
Hieu Hoang and
Alexandra Birch and
Chris Callison{-}Burch and
Marcello Federico and
Nicola Bertoldi and
Brooke Cowan and
Wade Shen and
Christine Moran and
Richard Zens and
Chris Dyer and
Ondrej Bojar and
Alexandra Constantin and
Evan Herbst},
//editor = {John A. Carroll and
Antal van den Bosch and
Annie Zaenen},
title = {Moses: Open Source Toolkit for Statistical Machine Translation},
booktitle = {{ACL} 2007, Proceedings of the 45th Annual Meeting of the Association
for Computational Linguistics, June 23-30, 2007, Prague, Czech Republic},
publisher = {The Association for Computational Linguistics},
year = {2007}
}
@inproceedings{DBLP:conf/amta/Koehn04,
author = {Philipp Koehn},
editor = {Robert E. Frederking and
Kathryn Taylor},
title = {Pharaoh: {A} Beam Search Decoder for Phrase-Based Statistical Machine
Translation Models},
booktitle = {Machine Translation: From Real Users to Research, 6th Conference of
the Association for Machine Translation in the Americas, {AMTA} 2004,
Washington, DC, USA, September 28-October 2, 2004, Proceedings},
series = {Lecture Notes in Computer Science},
volume = {3265},
pages = {115--124},
publisher = {Springer},
year = {2004}
}
@inproceedings{bangalore2001a, @inproceedings{bangalore2001a,
title={A finite-state approach to machine translation}, title={A finite-state approach to machine translation},
author={S. {Bangalore} and G. {Riccardi}}, author={S. {Bangalore} and G. {Riccardi}},
...@@ -819,6 +946,73 @@ ...@@ -819,6 +946,73 @@
year={2007} year={2007}
} }
@inproceedings{zollmann2007the,
author = {Andreas Zollmann and
Ashish Venugopal and
Matthias Paulik and
Stephan Vogel},
//editor = {Chris Callison{-}Burch and
Philipp Koehn and
Cameron S. Fordyce and
Christof Monz},
title = {The Syntax Augmented {MT} {(SAMT)} System at the Shared Task for the
2007 {ACL} Workshop on Statistical Machine Translation},
booktitle = {Proceedings of the Second Workshop on Statistical Machine Translation,
WMT@ACL 2007, Prague, Czech Republic, June 23, 2007},
pages = {216--219},
publisher = {Association for Computational Linguistics},
year = {2007}
}
@inproceedings{liu2006tree,
author = {Yang Liu and
Qun Liu and
Shouxun Lin},
//editor = {Nicoletta Calzolari and
Claire Cardie and
Pierre Isabelle},
title = {Tree-to-String Alignment Template for Statistical Machine Translation},
booktitle = {{ACL} 2006, 21st International Conference on Computational Linguistics
and 44th Annual Meeting of the Association for Computational Linguistics,
Proceedings of the Conference, Sydney, Australia, 17-21 July 2006},
publisher = {The Association for Computer Linguistics},
year = {2006}
}
@inproceedings{galley2006scalable,
author = {Michel Galley and
Jonathan Graehl and
Kevin Knight and
Daniel Marcu and
Steve DeNeefe and
Wei Wang and
Ignacio Thayer},
//editor = {Nicoletta Calzolari and
Claire Cardie and
Pierre Isabelle},
title = {Scalable Inference and Training of Context-Rich Syntactic Translation
Models},
booktitle = {{ACL} 2006, 21st International Conference on Computational Linguistics
and 44th Annual Meeting of the Association for Computational Linguistics,
Proceedings of the Conference, Sydney, Australia, 17-21 July 2006},
publisher = {The Association for Computer Linguistics},
year = {2006}
}
@inproceedings{chiang2005a,
author = {David Chiang},
//editor = {Kevin Knight and
Hwee Tou Ng and
Kemal Oflazer},
title = {A Hierarchical Phrase-Based Model for Statistical Machine Translation},
booktitle = {{ACL} 2005, 43rd Annual Meeting of the Association for Computational
Linguistics, Proceedings of the Conference, 25-30 June 2005, University
of Michigan, {USA}},
pages = {263--270},
publisher = {The Association for Computer Linguistics},
year = {2005}
}
%%%%% chapter 2------------------------------------------------------ %%%%% chapter 2------------------------------------------------------
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
......
Markdown 格式
0%
您添加了 0 到此讨论。请谨慎行事。
请先完成此评论的编辑!
注册 或者 后发表评论