Commit 8b9754b1 by 单韦乔

合并分支 'shanweiqiao' 到 'caorunzhe'

Shanweiqiao

查看合并请求 !90
parents e0b84536 0001aff6
...@@ -2,35 +2,30 @@ ...@@ -2,35 +2,30 @@
%%% 句法树(层次短语) %%% 句法树(层次短语)
\begin{tikzpicture} \begin{tikzpicture}
{\small {\small
\begin{scope}[sibling distance=10pt, level distance = 20pt] \begin{scope}[sibling distance=15pt, level distance = 20pt]
{\scriptsize {\scriptsize
\Tree[.\node(n1){\textbf{zj}}; \Tree[.\node(r){IP};
[.\node(n2){\textbf{dj}}; [.\node(n11){NP}; [.\node(n21){PN}; [.\node(l1){};]]]
[.\node(n3){\textbf{np}}; \node(cw1){}; ] [.\node(n12){VP};
[.\node(n4){\textbf{vp}}; [.\node(n22){BA}; \node(l2){}; ]
[. \node(cw1){pp}; [.\node(n23){IP};
[. \node(cw2){}; ] [. \node(n31){NP};
[. \node(cw3){np}; [. \node(n41){QP};
[. \node(cw4){mp}; [. \node(n51){CD}; \node(l3){};]
[. \node(p1){}; ] [. \node(n52){CLP}; [.\node(n61){M}; [.\node(l4){};]]]
[. \node(p2){}; ]
] ]
[. \node(cw5){np}; \node(p3){}; ] [. \node(n42){NP}; [.\node(n53){NN}; [.\node(l5){};]]]
] ]
[. \node(n32){VP};
[. \node(n43){VV}; \node(l6){放在};]
[. \node(n44){LCP};
[. \node(n54){NP}; [.\node(n62){NN}; [.\node(l7){};]]]
[. \node(n55){LC}; \node(l8){};]
] ]
[. \node(cw6){vp};
[. \node(cw7){}; ]
[. \node(cw8){pp};
[. \node(cw9){}; ]
[. \node(cw10){sp};
[. \node(cw11){}; ]
[. \node(cw12){}; ]
] ]
] ]
] ]
] [.\node(n13){PU}; \node(l9){};]
]
[.\node(n5){\textbf{}}; ]
] ]
} }
\end{scope} \end{scope}
......
...@@ -9,12 +9,12 @@ ...@@ -9,12 +9,12 @@
{ {
\begin{scope} \begin{scope}
{\scriptsize {\scriptsize
\node [anchor=north west] (example1) at (0,0) {\textbf{1:} 源=他\ \ \ ?}; \node [anchor=north west] (example1) at (0,0) {\textbf{1:} 源=他\ \ };
\node [anchor=north west] (example1part2) at ([yshift=0.2em]example1.south west) {\hspace{1em} 译=\ Where is he ?}; \node [anchor=north west] (example1part2) at ([yshift=0.2em]example1.south west) {\hspace{1em} 译=\ Where is he};
\node [anchor=north west] (example2) at ([yshift=0.1em]example1part2.south west) {\textbf{2:} 源=我\ 真高兴}; \node [anchor=north west] (example2) at ([yshift=0.1em]example1part2.south west) {\textbf{2:} 源=我\ 真高兴};
\node [anchor=north west] (example2part2) at ([yshift=0.2em]example2.south west) {\hspace{1em} 译=\ I'm so happy}; \node [anchor=north west] (example2part2) at ([yshift=0.2em]example2.south west) {\hspace{1em} 译=\ I'm so happy};
\node [anchor=north west] (example3) at ([yshift=0.1em]example2part2.south west) {\textbf{3:} 源=出发\ }; \node [anchor=north west] (example3) at ([yshift=0.1em]example2part2.south west) {\textbf{3:} 源=出发};
\node [anchor=north west] (example3part2) at ([yshift=0.2em]example3.south west) {\hspace{1em} 译=\ Let's go!}; \node [anchor=north west] (example3part2) at ([yshift=0.2em]example3.south west) {\hspace{1em} 译=\ Let's go};
\node [anchor=north west] (example4) at ([yshift=0.1em]example3part2.south west) {\hspace{1em} ...}; \node [anchor=north west] (example4) at ([yshift=0.1em]example3part2.south west) {\hspace{1em} ...};
\node [anchor=north west] (example5) at ([yshift=0.1em]example4.south west) {\hspace{1em}\quad}; \node [anchor=north west] (example5) at ([yshift=0.1em]example4.south west) {\hspace{1em}\quad};
\node [anchor=north west] (example6) at ([yshift=0.1em]example5.south west) {\hspace{1em}\quad}; \node [anchor=north west] (example6) at ([yshift=0.1em]example5.south west) {\hspace{1em}\quad};
...@@ -33,9 +33,9 @@ ...@@ -33,9 +33,9 @@
{ {
\begin{scope} [yshift=-1.55in] \begin{scope} [yshift=-1.55in]
{\scriptsize {\scriptsize
\node [anchor=north west] (entry1) at (0,0) {\textbf{1:} What is NiuTrans ?\qquad \qquad }; \node [anchor=north west] (entry1) at (0,0) {\textbf{1:} What is NiuTrans\qquad \qquad };
\node [anchor=north west] (entry2) at ([yshift=0.0em]entry1.south west) {\textbf{2:} Are you fulfilled ?}; \node [anchor=north west] (entry2) at ([yshift=0.0em]entry1.south west) {\textbf{2:} Are you fulfilled};
\node [anchor=north west] (entry3) at ([yshift=0.0em]entry2.south west) {\textbf{3:} Yes, you are right .}; \node [anchor=north west] (entry3) at ([yshift=0.0em]entry2.south west) {\textbf{3:} You are right};
\node [anchor=north west] (entry4) at ([yshift=0.0em]entry3.south west) {\hspace{1em} ...}; \node [anchor=north west] (entry4) at ([yshift=0.0em]entry3.south west) {\hspace{1em} ...};
\node [anchor=north west] (entry5) at ([yshift=0.1em]entry4.south west) {\hspace{1em}{\quad}}; \node [anchor=north west] (entry5) at ([yshift=0.1em]entry4.south west) {\hspace{1em}{\quad}};
\node [anchor=north west] (entry6) at ([xshift=11.6em,yshift=0.65em]entry5.south west) {}; \node [anchor=north west] (entry6) at ([xshift=11.6em,yshift=0.65em]entry5.south west) {};
...@@ -79,15 +79,16 @@ ...@@ -79,15 +79,16 @@
\begin{scope}[xshift=1.7in,yshift=-1.55in] \begin{scope}[xshift=1.7in,yshift=-1.55in]
{\scriptsize {\scriptsize
\node [anchor=north west] (ngram1) at (0,0) {$\textrm{Pr}(\textrm{I}) = 0.0001$}; \node [anchor=north west] (ngram1) at (0,0) {$\textrm{Pr}(\textrm{I}) = 0.0001$};
\node [anchor=north west] (ngram2) at ([yshift=0.0em]ngram1.south west) {$\textrm{Pr}(\textrm{I} \to \textrm{am}) = 0.623$}; \node [anchor=north west] (ngram2) at ([yshift=0.0em]ngram1.south west) {$\textrm{Pr}(\textrm{I}\ \textrm{am}) = 0.623$};
\node [anchor=north west] (ngram3) at ([yshift=0.0em]ngram2.south west) {$\textrm{Pr}(\textrm{I} \to \textrm{was}) = 0.21$}; \node [anchor=north west] (ngram3) at ([yshift=0.0em]ngram2.south west) {$\textrm{Pr}(\textrm{I}\ \textrm{was}) = 0.21$};
\node [anchor=north west] (ngram4) at ([yshift=-0.2em]ngram3.south west) {...}; \node [anchor=north west] (ngram4) at ([yshift=-0.2em]ngram3.south west) {...};
\node [anchor=north east] (ngrame) at ([yshift=-1in]phrase5.south east) {};
\node [anchor=south west] (lmlabel) at (ngram1.north west) {{\color{ublue} \small{语言模型}}}; \node [anchor=south west] (lmlabel) at (ngram1.north west) {{\color{ublue} \small{语言模型}}};
} }
\begin{pgfonlayer}{background} \begin{pgfonlayer}{background}
{ {
\node[rectangle,draw=ublue, inner sep=0mm] [fit = (ngram1) (ngram2) (ngram3) (ngram4) (lmlabel)] (langaugemodel) {}; \node[rectangle,draw=ublue, inner sep=0mm] [fit = (ngram1) (ngram2) (ngram3) (ngram4) (lmlabel) (ngrame)] (langaugemodel) {};
} }
\end{pgfonlayer} \end{pgfonlayer}
......
...@@ -10,8 +10,8 @@ ...@@ -10,8 +10,8 @@
\begin{scope} \begin{scope}
{ {
{\footnotesize {\footnotesize
\node [anchor=north west] (example1) at (0,0) {\textbf{1:} 源=什么\ 时候\ 开始\ ?}; \node [anchor=north west] (example1) at (0,0) {\textbf{1:} 源=什么\ 时候\ 开始};
\node [anchor=north west] (example1part2) at ([yshift=0.5em]example1.south west) {\hspace{1em} 译=\ When will it start ?}; \node [anchor=north west] (example1part2) at ([yshift=0.5em]example1.south west) {\hspace{1em} 译=\ When will it start};
\node [anchor=north west] (example2) at ([yshift=0.1em]example1part2.south west) {\textbf{2:} 源=我\ \ \ 感到\ 高兴}; \node [anchor=north west] (example2) at ([yshift=0.1em]example1part2.south west) {\textbf{2:} 源=我\ \ \ 感到\ 高兴};
\node [anchor=north west] (example2part2) at ([yshift=0.5em]example2.south west) {\hspace{1em} 译=\ I am happy with him}; \node [anchor=north west] (example2part2) at ([yshift=0.5em]example2.south west) {\hspace{1em} 译=\ I am happy with him};
\node [anchor=north west] (example3) at ([yshift=0.1em]example2part2.south west) {\hspace{1em} ...}; \node [anchor=north west] (example3) at ([yshift=0.1em]example2part2.south west) {\hspace{1em} ...};
...@@ -35,14 +35,12 @@ ...@@ -35,14 +35,12 @@
\node [anchor=north west] (entry3) at ([yshift=0.1em]entry2.south west) {\textbf{3:} 满意 \hspace{-0.4em} $\to$ \hspace{-0.4em} satisfy \hspace{-0.12em}$\mid$\hspace{-0.12em} satisfied ... }; \node [anchor=north west] (entry3) at ([yshift=0.1em]entry2.south west) {\textbf{3:} 满意 \hspace{-0.4em} $\to$ \hspace{-0.4em} satisfy \hspace{-0.12em}$\mid$\hspace{-0.12em} satisfied ... };
\node [anchor=north west] (entry4) at ([yshift=0.1em]entry3.south west) {\hspace{1em} ...}; \node [anchor=north west] (entry4) at ([yshift=0.1em]entry3.south west) {\hspace{1em} ...};
\node [anchor=south west] (dictionarylabel) at (entry1.north west) {{\color{ublue} 资源2:翻译词典}}; \node [anchor=south west] (dictionarylabel) at (entry1.north west) {{\color{ublue} 资源2:翻译词典}};
\node [anchor=west,opacity=0.0] (empty) at ([yshift=-0.2em]entry3.west) {\hspace{1em} 译=\ I am happy with him};
} }
} }
\begin{pgfonlayer}{background} \begin{pgfonlayer}{background}
{ {
\node[rectangle,draw=ublue, inner sep=0mm] [fit = (entry1) (entry2) (entry3) (entry4) (dictionarylabel) (empty)] {}; \node[rectangle,draw=ublue, inner sep=0mm] [fit = (entry1) (entry2) (entry3) (entry4) (dictionarylabel)] {};
} }
\end{pgfonlayer} \end{pgfonlayer}
......
...@@ -111,7 +111,7 @@ ...@@ -111,7 +111,7 @@
\parinterval 人工翻译已经存在了上千年,而机器翻译又起源于什么时候呢?机器翻译跌宕起伏的发展史可以分为萌芽期、受挫期、快速成长期和爆发期四个阶段。 \parinterval 人工翻译已经存在了上千年,而机器翻译又起源于什么时候呢?机器翻译跌宕起伏的发展史可以分为萌芽期、受挫期、快速成长期和爆发期四个阶段。
\parinterval 17世纪,Descartes提出世界语言的概念\upcite{knowlson1975universal},他希望使用统一符号表示不同语言、相同含义的词汇,以此来克服语言障碍,这种想法在当时是很超前的。随着语言学、计算机科学等学科的发展,在19世纪30年代使用计算模型进行自动翻译的思想开始萌芽,如当时法国科学家Georges Artsrouni就提出用机器来进行翻译的想法。只是那时依然没有合适的实现手段,所以这种想法的合理性无法被证实。 \parinterval 早在17世纪,如Descartes、Leibniz、Cave\ Beck、Athanasius\ Kircher和Johann\ Joachim\ Becher等很多学者就提出采用机器词典(电子词典)来克服语言障碍的想法\upcite{knowlson1975universal},这种想法在当时是很超前的。随着语言学、计算机科学等学科的发展,在19世纪30年代使用计算模型进行自动翻译的思想开始萌芽,如当时法国科学家Georges Artsrouni就提出用机器来进行翻译的想法。只是那时依然没有合适的实现手段,所以这种想法的合理性无法被证实。
\parinterval 随着第二次世界大战爆发, 对文字进行加密和解密成为重要的军事需求,这也使得数学和密码学变得相当发达。在战争结束一年后,世界上第一台通用电子数字计算机于1946年研制成功(图\ref{fig:1-4}),至此使用机器进行翻译有了真正实现的可能。 \parinterval 随着第二次世界大战爆发, 对文字进行加密和解密成为重要的军事需求,这也使得数学和密码学变得相当发达。在战争结束一年后,世界上第一台通用电子数字计算机于1946年研制成功(图\ref{fig:1-4}),至此使用机器进行翻译有了真正实现的可能。
...@@ -257,7 +257,7 @@ ...@@ -257,7 +257,7 @@
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
\parinterval 机器翻译技术大体上可以分为三种方法,分别为基于规则的机器翻译、统计机器翻译以及神经机器翻译。第一代机器翻译技术是主要使用基于规则的机器翻译方法,其主要思想是通过形式文法定义的规则引入源语言和目标语中的语言学知识。此类方法在机器翻译技术诞生之初就被人所关注,特别是在上世纪70年代,以基于规则方法为代表的专家系统是人工智能中最具代表性的研究领域。甚至到了统计机器翻译时代,很多系统中也大量地使用了基于规则的翻译知识表达形式。 \parinterval 机器翻译技术大体上可以分为三种方法,分别为基于规则的机器翻译、统计机器翻译以及神经机器翻译。第一代机器翻译技术是主要使用基于规则的机器翻译方法,其主要思想是通过形式文法定义的规则引入源语言和目标语中的语言学知识。此类方法在机器翻译技术诞生之初就被人所关注,特别是在上世纪70年代,以基于规则方法为代表的专家系统是人工智能中最具代表性的研究领域。甚至到了统计机器翻译时代,很多系统中也大量地使用了基于规则的翻译知识表达形式。
\parinterval 早期,基于规则的机器翻译大多依赖人工定义及书写的规则。主要有两类方法\upcite{nirenburg1989knowledge}:一类是基于转换规则的机器翻译方法,简称转换法。另一类是基于中间语言的方法。它们都以词典和人工书写的规则库作为翻译知识,用一系列规则的组合完成翻译。 \parinterval 早期,基于规则的机器翻译大多依赖人工定义及书写的规则。主要有两类方法\upcite{nirenburg1989knowledge,hutchins1986machine,zarechnak1979history}:一类是基于转换规则的机器翻译方法,简称转换法。另一类是基于中间语言的方法。它们都以词典和人工书写的规则库作为翻译知识,用一系列规则的组合完成翻译。
%---------------------------------------------------------------------------------------- %----------------------------------------------------------------------------------------
% NEW SUB-SECTION % NEW SUB-SECTION
......
...@@ -618,7 +618,7 @@ F(x)=\int_{-\infty}^x f(x)\textrm{d}x ...@@ -618,7 +618,7 @@ F(x)=\int_{-\infty}^x f(x)\textrm{d}x
\subsubsection{2.古德-图灵估计法} \subsubsection{2.古德-图灵估计法}
\vspace{-0.5em} \vspace{-0.5em}
\parinterval {\small\bfnew{古德-图灵估计法}}\index{古德-图灵估计法}(Good-Turing Estimate)\index{Good-Turing Estimate}是Alan Turing和他的助手I.J.Good开发的,作为他们在二战期间破解德国密码机Enigma所使用的方法的一部分,在1953 年I.J.Good将其发表。这一方法也是很多平滑算法的核心,其基本思路是:把非零的$n$元语法单元的概率降低匀给一些低概率$n$元语法单元,以减小最大似然估计与真实概率之间的偏离\upcite{good1953population,gale1995good} \parinterval {\small\bfnew{古德-图灵估计法}}\index{古德-图灵估计法}(Good-Turing Estimate)\index{Good-Turing Estimate}是Alan Turing和他的助手Irving John Good开发的,作为他们在二战期间破解德国密码机Enigma所使用的方法的一部分,在1953 年Irving John Good将其发表。这一方法也是很多平滑算法的核心,其基本思路是:把非零的$n$元语法单元的概率降低匀给一些低概率$n$元语法单元,以减小最大似然估计与真实概率之间的偏离\upcite{good1953population,gale1995good}
\parinterval 假定在语料库中出现$r$次的$n$-gram有$n_r$个,特别的,出现0次的$n$-gram(即未登录词及词串)出现的次数为$n_0$个。语料库中全部单词的总个数为$N$,显然 \parinterval 假定在语料库中出现$r$次的$n$-gram有$n_r$个,特别的,出现0次的$n$-gram(即未登录词及词串)出现的次数为$n_0$个。语料库中全部单词的总个数为$N$,显然
\begin{eqnarray} \begin{eqnarray}
...@@ -841,7 +841,6 @@ c_{\textrm{KN}}(\cdot) = \left\{\begin{array}{ll} ...@@ -841,7 +841,6 @@ c_{\textrm{KN}}(\cdot) = \left\{\begin{array}{ll}
\parinterval 在这种序列生成方式的基础上,实现搜索通常有两种方法\ \dash\ 深度优先遍历和宽度优先遍历\upcite{DBLP:books/mg/CormenLR89}。在深度优先遍历中,每次从词表中可重复地选择一个单词,然后从左至右地生成序列,直到<eos>被选择,此时一个完整的单词序列被生成出来。然后从<eos>回退到上一个单词,选择之前词表中未被选择到的候选单词代替<eos>,并继续挑选下一个单词直到<eos>被选到,如果上一个单词的所有可能都被枚举过,那么回退到上上一个单词继续枚举,直到回退到<sos>,这时候枚举结束。在宽度优先遍历中,每次不是只选择一个单词,而是枚举所有单词。 \parinterval 在这种序列生成方式的基础上,实现搜索通常有两种方法\ \dash\ 深度优先遍历和宽度优先遍历\upcite{DBLP:books/mg/CormenLR89}。在深度优先遍历中,每次从词表中可重复地选择一个单词,然后从左至右地生成序列,直到<eos>被选择,此时一个完整的单词序列被生成出来。然后从<eos>回退到上一个单词,选择之前词表中未被选择到的候选单词代替<eos>,并继续挑选下一个单词直到<eos>被选到,如果上一个单词的所有可能都被枚举过,那么回退到上上一个单词继续枚举,直到回退到<sos>,这时候枚举结束。在宽度优先遍历中,每次不是只选择一个单词,而是枚举所有单词。
有一个一个简单的例子。假设词表只含两个单词\{a, b\},从<sos>开始枚举所有单词,有三种可能: 有一个一个简单的例子。假设词表只含两个单词\{a, b\},从<sos>开始枚举所有单词,有三种可能:
\begin{eqnarray} \begin{eqnarray}
\text{\{<sos> a, <sos> b, <sos> <eos>\}} \nonumber \text{\{<sos> a, <sos> b, <sos> <eos>\}} \nonumber
\end{eqnarray} \end{eqnarray}
...@@ -900,14 +899,12 @@ c_{\textrm{KN}}(\cdot) = \left\{\begin{array}{ll} ...@@ -900,14 +899,12 @@ c_{\textrm{KN}}(\cdot) = \left\{\begin{array}{ll}
%------------------------------------------- %-------------------------------------------
\parinterval 从这个角度来看,在树的遍历中,可以很自然地引入语言模型打分:在解空间树中引入节点的权重\ \dash\ 将当前节点$i$的得分重设为语言模型打分$\log \funp{P}(w_i | w_1 w_2 \ldots w_{i-1})$,其中$w_1 w_2 \ldots w_{i-1}$是该节点的全部祖先。与先前不同的是,由于在使用语言模型打分时,词的概率通常小于1,因此句子很长时概率会非常小,容易造成浮点误差,所以这里使用概率的对数形式$\log \funp{P}(w_i | w_1 w_2 \ldots w_{i-1})$代替$\funp{P}(w_i | w_1 w_2 \ldots w_{i-1})$。此时对于图中一条包含<eos>的完整序列来说,它的最终得分$\textrm{score}(\cdot)$可以被定义为: \parinterval 从这个角度来看,在树的遍历中,可以很自然地引入语言模型打分:在解空间树中引入节点的权重\ \dash\ 将当前节点$i$的得分重设为语言模型打分$\log \funp{P}(w_i | w_1 w_2 \ldots w_{i-1})$,其中$w_1 w_2 \ldots w_{i-1}$是该节点的全部祖先。与先前不同的是,由于在使用语言模型打分时,词的概率通常小于1,因此句子很长时概率会非常小,容易造成浮点误差,所以这里使用概率的对数形式$\log \funp{P}(w_i | w_1 w_2 \ldots w_{i-1})$代替$\funp{P}(w_i | w_1 w_2 \ldots w_{i-1})$。此时对于图中一条包含<eos>的完整序列来说,它的最终得分$\textrm{score}(\cdot)$可以被定义为:
\begin{eqnarray} \begin{eqnarray}
\textrm{score}(w_1 w_2 \ldots w_m) & = & \log \funp{P}(w_1 w_2 \ldots w_m) \nonumber \\ \textrm{score}(w_1 w_2 \ldots w_m) & = & \log \funp{P}(w_1 w_2 \ldots w_m) \nonumber \\
& = & \sum_{i=1}^{m}\log \funp{P}(w_i | w_1 w_2 \ldots w_{i-1}) & = & \sum_{i=1}^{m}\log \funp{P}(w_i | w_1 w_2 \ldots w_{i-1})
\end{eqnarray} \end{eqnarray}
通常,$\textrm{score}(\cdot)$也被称作{\small\bfnew{模型得分}}\index{模型得分}(Model Score\index{Model Score})。如图\ref{fig:2-15}所示,可知红线所示单词序列“<sos>\ I\ agree\ <eos>”的模型得分为: 通常,$\textrm{score}(\cdot)$也被称作{\small\bfnew{模型得分}}\index{模型得分}(Model Score\index{Model Score})。如图\ref{fig:2-15}所示,可知红线所示单词序列“<sos>\ I\ agree\ <eos>”的模型得分为:
\begin{eqnarray} \begin{eqnarray}
&&\textrm{score(<sos>\ I\ agree\ <eos>)} \nonumber \\ &&\textrm{score(<sos>\ I\ agree\ <eos>)} \nonumber \\
& = & \log \funp{P}(\textrm{<sos>}) + \log \funp{P}(\textrm{I} | \textrm{<sos>}) + \log \funp{P}(\textrm{agree} | \textrm{<sos>\ I}) + \log \funp{P}(\textrm{<sos>}| \textrm{<sos>\ I\ agree}) \nonumber \\ & = & \log \funp{P}(\textrm{<sos>}) + \log \funp{P}(\textrm{I} | \textrm{<sos>}) + \log \funp{P}(\textrm{agree} | \textrm{<sos>\ I}) + \log \funp{P}(\textrm{<sos>}| \textrm{<sos>\ I\ agree}) \nonumber \\
...@@ -925,7 +922,7 @@ c_{\textrm{KN}}(\cdot) = \left\{\begin{array}{ll} ...@@ -925,7 +922,7 @@ c_{\textrm{KN}}(\cdot) = \left\{\begin{array}{ll}
\end{figure} \end{figure}
%------------------------------------------- %-------------------------------------------
\parinterval 这样,语言模型的打分与解空间树的遍历就融合了在一起。于是,序列生成的问题可以被重新描述为:寻找所有单词序列组成的解空间树中权重总和最大的一条路径。在这个定义下,前面提到的两种枚举词序列的方法就是经典的{\small\bfnew{深度优先搜索}}\index{深度优先搜索}(Depth-first Search)\upcite{even2011graph}\index{Depth-first Search}{\small\bfnew{宽度优先搜索}}\index{宽度优先搜索}(Breadth-first Search)\upcite{lee1961an}\index{Breadth-first Search}的雏形。在后面的内容中可以看到,从遍历解空间树的角度出发,可以对原始这些搜索策略的效率进行优化。 \parinterval 这样,语言模型的打分与解空间树的遍历就融合了在一起。于是,序列生成的问题可以被重新描述为:寻找所有单词序列组成的解空间树中权重总和最大的一条路径。在这个定义下,前面提到的两种枚举词序列的方法就是经典的{\small\bfnew{深度优先搜索}}\index{深度优先搜索}(Depth-first Search)\index{Depth-first Search}{\small\bfnew{宽度优先搜索}}\index{宽度优先搜索}(Breadth-first Search)\index{Breadth-first Search}的雏形\upcite{even2011graph,tarjan1972depth}。在后面的内容中可以看到,从遍历解空间树的角度出发,可以对原始这些搜索策略的效率进行优化。
%---------------------------------------------------------------------------------------- %----------------------------------------------------------------------------------------
% NEW SUB-SECTION % NEW SUB-SECTION
...@@ -1033,7 +1030,7 @@ c_{\textrm{KN}}(\cdot) = \left\{\begin{array}{ll} ...@@ -1033,7 +1030,7 @@ c_{\textrm{KN}}(\cdot) = \left\{\begin{array}{ll}
\parinterval 束搜索也有很多的改进版本。回忆一下,在无信息搜索策略中可以使用剪枝技术来提升搜索的效率。而实际上,束搜索本身也是一种剪枝方法。因此有时也把束搜索称作{\small\bfnew{束剪枝}}\index{束剪枝}(Beam Pruning)\index{Beam Pruning}。在这里有很多其它的剪枝策略可供选择,例如可以只保留与当前最佳路径得分相差在$\theta$之内的路径,也就是搜索只保留得分差距在一定范围内的路径,这种方法也被称作{\small\bfnew{直方图剪枝}}\index{直方图剪枝}(Histogram Pruning)\index{Histogram Pruning} \parinterval 束搜索也有很多的改进版本。回忆一下,在无信息搜索策略中可以使用剪枝技术来提升搜索的效率。而实际上,束搜索本身也是一种剪枝方法。因此有时也把束搜索称作{\small\bfnew{束剪枝}}\index{束剪枝}(Beam Pruning)\index{Beam Pruning}。在这里有很多其它的剪枝策略可供选择,例如可以只保留与当前最佳路径得分相差在$\theta$之内的路径,也就是搜索只保留得分差距在一定范围内的路径,这种方法也被称作{\small\bfnew{直方图剪枝}}\index{直方图剪枝}(Histogram Pruning)\index{Histogram Pruning}
\parinterval 对于语言模型来说,当多个路径中最高得分比当前搜索到的最好的解的得分低时,可以立刻停止搜索。因为此时序列越长语言模型得分$\log \funp{P}(w_1 w_2 \ldots w_m)$会越低,继续扩展这些路径不会产生更好的结果。这个技术通常也被称为{\small\bfnew{最佳停止条件}}\index{最佳停止条件}(Optimal Stopping Criteria)\index{Optimal Stopping Criteria}。类似的思想也被用于机器翻译等任务{\color{red} (此处引用liang huang的论文)} \parinterval 对于语言模型来说,当多个路径中最高得分比当前搜索到的最好的解的得分低时,可以立刻停止搜索。因为此时序列越长语言模型得分$\log \funp{P}(w_1 w_2 \ldots w_m)$会越低,继续扩展这些路径不会产生更好的结果。这个技术通常也被称为{\small\bfnew{最佳停止条件}}\index{最佳停止条件}(Optimal Stopping Criteria)\index{Optimal Stopping Criteria}。类似的思想也被用于机器翻译等任务\upcite{DBLP:conf/emnlp/HuangZM17,DBLP:conf/emnlp/Yang0M18}
\parinterval 总的来说,虽然局部搜索由于没有遍历完整的解空间,使得这类方法无法保证找到最优解。但是,局部搜索算法大大降低了搜索过程的时间、空间复杂度。因此在语言模型生成和机器翻译的解码过程中常常使用局部搜索算法。在{\chapterseven}{\chapterten}中还将介绍这些算法的具体应用。 \parinterval 总的来说,虽然局部搜索由于没有遍历完整的解空间,使得这类方法无法保证找到最优解。但是,局部搜索算法大大降低了搜索过程的时间、空间复杂度。因此在语言模型生成和机器翻译的解码过程中常常使用局部搜索算法。在{\chapterseven}{\chapterten}中还将介绍这些算法的具体应用。
...@@ -1057,7 +1054,7 @@ c_{\textrm{KN}}(\cdot) = \left\{\begin{array}{ll} ...@@ -1057,7 +1054,7 @@ c_{\textrm{KN}}(\cdot) = \left\{\begin{array}{ll}
\vspace{0.5em} \vspace{0.5em}
\item 本章更多地关注了语言模型的基本问题和求解思路,但是基于$n$-gram的方法并不是语言建模的唯一方法。从现在自然语言处理的前沿看,端到端的深度学习方法在很多任务中都取得了领先的性能。语言模型同样可以使用这些方法\upcite{jing2019a},而且在近些年取得了巨大成功。例如,最早提出的前馈神经语言模型\upcite{bengio2003a}和后来的基于循环单元的语言模型\upcite{mikolov2010recurrent}、基于长短期记忆单元的语言模型\upcite{sundermeyer2012lstm}以及现在非常流行的Transformer\upcite{vaswani2017attention}。 关于神经语言模型的内容,会在{\chapternine}进行进一步介绍。 \item 本章更多地关注了语言模型的基本问题和求解思路,但是基于$n$-gram的方法并不是语言建模的唯一方法。从现在自然语言处理的前沿看,端到端的深度学习方法在很多任务中都取得了领先的性能。语言模型同样可以使用这些方法\upcite{jing2019a},而且在近些年取得了巨大成功。例如,最早提出的前馈神经语言模型\upcite{bengio2003a}和后来的基于循环单元的语言模型\upcite{mikolov2010recurrent}、基于长短期记忆单元的语言模型\upcite{sundermeyer2012lstm}以及现在非常流行的Transformer\upcite{vaswani2017attention}。 关于神经语言模型的内容,会在{\chapternine}进行进一步介绍。
\vspace{0.5em} \vspace{0.5em}
\item 最后,本章结合语言模型的序列生成任务对搜索技术进行了介绍。类似地,机器翻译任务也需要从大量的翻译后选中快速寻找最优译文。因此在机器翻译任务中也使用了搜索方法,这个过程通常被称作{\small\bfnew{解码}}\index{解码}(Decoding)\index{Decoding}。例如,有研究者在基于词的翻译模型中尝试使用启发式搜索\upcite{DBLP:conf/acl/OchUN01,DBLP:conf/acl/WangW97,tillmann1997a}以及贪婪搜索方法\upcite{germann2001fast}\upcite{germann2003greedy},也有研究者研究基于短语的栈解码方法{\color{red}(引用Moses)}。此外,解码方法还包括有限状态机解码\upcite{bangalore2001a}\upcite{bangalore2000stochastic}以及基于语言学约束的解码\upcite{venugopal2007an}{\color{red}(找SAMT、树到串、串到树翻译、Hiero的论文)}。相关内容将在{\chaptereight}{\chapterfourteen} 进行介绍。 \item 最后,本章结合语言模型的序列生成任务对搜索技术进行了介绍。类似地,机器翻译任务也需要从大量的翻译后选中快速寻找最优译文。因此在机器翻译任务中也使用了搜索方法,这个过程通常被称作{\small\bfnew{解码}}\index{解码}(Decoding)\index{Decoding}。例如,有研究者在基于词的翻译模型中尝试使用启发式搜索\upcite{DBLP:conf/acl/OchUN01,DBLP:conf/acl/WangW97,tillmann1997a}以及贪婪搜索方法\upcite{germann2001fast}\upcite{germann2003greedy},也有研究者研究基于短语的栈解码方法\upcite{Koehn2007Moses,DBLP:conf/amta/Koehn04}。此外,解码方法还包括有限状态机解码\upcite{bangalore2001a}\upcite{bangalore2000stochastic}以及基于语言学约束的解码\upcite{venugopal2007an,zollmann2007the,liu2006tree,galley2006scalable,chiang2005a}。相关内容将在{\chaptereight}{\chapterfourteen} 进行介绍。
\vspace{0.5em} \vspace{0.5em}
\end{itemize} \end{itemize}
\end{adjustwidth} \end{adjustwidth}
...@@ -718,6 +718,22 @@ ...@@ -718,6 +718,22 @@
publisher={Springer} publisher={Springer}
} }
@book{hutchins1986machine,
title={Machine translation: past, present, future},
author={Hutchins, William John},
year={1986},
publisher={Ellis Horwood Chichester}
}
@article{zarechnak1979history,
title={The history of machine translation},
author={Zarechnak, Michael},
journal={Machine Translation},
volume={1979},
pages={1--87},
year={1979}
}
@article{jurafsky2000speech, @article{jurafsky2000speech,
title="Speech and Language Processing: An Introduction to Natural Language Processing, Computational Linguistics, and Speech Recognition", title="Speech and Language Processing: An Introduction to Natural Language Processing, Computational Linguistics, and Speech Recognition",
author="Daniel {Jurafsky} and James H. {Martin}", author="Daniel {Jurafsky} and James H. {Martin}",
...@@ -1697,16 +1713,6 @@ ...@@ -1697,16 +1713,6 @@
year={2000} year={2000}
} }
@article{lee1961an,
title="An Algorithm for Path Connections and Its Applications",
author="C. Y. {Lee}",
journal="Ire Transactions on Electronic Computers",
volume="10",
number="3",
pages="346--365",
year="1961"
}
@book{even2011graph, @book{even2011graph,
title={Graph algorithms}, title={Graph algorithms},
author={Even, Shimon}, author={Even, Shimon},
...@@ -1714,6 +1720,49 @@ ...@@ -1714,6 +1720,49 @@
publisher={Cambridge University Press} publisher={Cambridge University Press}
} }
@article{tarjan1972depth,
title={Depth-First Search and Linear Graph Algorithms},
author={Robert Endre {Tarjan}},
journal={SIAM Journal on Computing},
volume={1},
number={2},
pages={146--160},
year={1972}
}
@inproceedings{DBLP:conf/emnlp/Yang0M18,
author = {Yilin Yang and
Liang Huang and
Mingbo Ma},
editor = {Ellen Riloff and
David Chiang and
Julia Hockenmaier and
Jun'ichi Tsujii},
title = {Breaking the Beam Search Curse: {A} Study of (Re-)Scoring Methods
and Stopping Criteria for Neural Machine Translation},
booktitle = {Proceedings of the 2018 Conference on Empirical Methods in Natural
Language Processing, Brussels, Belgium, October 31 - November 4, 2018},
pages = {3054--3059},
publisher = {Association for Computational Linguistics},
year = {2018}
}
@inproceedings{DBLP:conf/amta/Koehn04,
author = {Philipp Koehn},
editor = {Robert E. Frederking and
Kathryn Taylor},
title = {Pharaoh: {A} Beam Search Decoder for Phrase-Based Statistical Machine
Translation Models},
booktitle = {Machine Translation: From Real Users to Research, 6th Conference of
the Association for Machine Translation in the Americas, {AMTA} 2004,
Washington, DC, USA, September 28-October 2, 2004, Proceedings},
series = {Lecture Notes in Computer Science},
volume = {3265},
pages = {115--124},
publisher = {Springer},
year = {2004}
}
%%%%% chapter 2------------------------------------------------------ %%%%% chapter 2------------------------------------------------------
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
......
...@@ -122,6 +122,22 @@ ...@@ -122,6 +122,22 @@
publisher={Springer} publisher={Springer}
} }
@book{hutchins1986machine,
title={Machine translation: past, present, future},
author={Hutchins, William John},
year={1986},
publisher={Ellis Horwood Chichester}
}
@article{zarechnak1979history,
title={The history of machine translation},
author={Zarechnak, Michael},
journal={Machine Translation},
volume={1979},
pages={1--87},
year={1979}
}
@book{冯志伟2004机器翻译研究, @book{冯志伟2004机器翻译研究,
title={机器翻译研究}, title={机器翻译研究},
author={冯志伟}, author={冯志伟},
...@@ -176,6 +192,45 @@ ...@@ -176,6 +192,45 @@
year={1993} year={1993}
} }
@article{Wu2016GooglesNM,
author = {Yonghui Wu and
Mike Schuster and
Zhifeng Chen and
Quoc V. Le and
Mohammad Norouzi and
Wolfgang Macherey and
Maxim Krikun and
Yuan Cao and
Qin Gao and
Klaus Macherey and
Jeff Klingner and
Apurva Shah and
Melvin Johnson and
Xiaobing Liu and
Lukasz Kaiser and
Stephan Gouws and
Yoshikiyo Kato and
Taku Kudo and
Hideto Kazawa and
Keith Stevens and
George Kurian and
Nishant Patil and
Wei Wang and
Cliff Young and
Jason Smith and
Jason Riesa and
Alex Rudnick and
Oriol Vinyals and
Greg Corrado and
Macduff Hughes and
Jeffrey Dean},
title = {Google's Neural Machine Translation System: Bridging the Gap between
Human and Machine Translation},
journal = {CoRR},
volume = {abs/1609.08144},
year = {2016}
}
@inproceedings{DBLP:journals/corr/LuongPM15, @inproceedings{DBLP:journals/corr/LuongPM15,
author = {Thang Luong and author = {Thang Luong and
Hieu Pham and Hieu Pham and
...@@ -502,14 +557,14 @@ ...@@ -502,14 +557,14 @@
publisher={Cambridge University Press} publisher={Cambridge University Press}
} }
@article{lee1961an, @article{tarjan1972depth,
title="An Algorithm for Path Connections and Its Applications", title={Depth-First Search and Linear Graph Algorithms},
author="C. Y. {Lee}", author={Robert Endre {Tarjan}},
journal="Ire Transactions on Electronic Computers", journal={SIAM Journal on Computing},
volume="10", volume={1},
number="3", number={2},
pages="346--365", pages={146--160},
year="1961" year={1972}
} }
@article{DBLP:journals/ai/SabharwalS11, @article{DBLP:journals/ai/SabharwalS11,
...@@ -574,11 +629,42 @@ ...@@ -574,11 +629,42 @@
pages={189--211}, pages={189--211},
year={1990} year={1990}
} }
%缩写
@inproceedings{DBLP:conf/emnlp/HuangZM17,
author = {Liang Huang and
Kai Zhao and
Mingbo Ma},
title = {When to Finish? Optimal Beam Search for Neural Text Generation (modulo
beam size)},
booktitle = {Proceedings of the 2017 Conference on Empirical Methods in Natural
Language Processing, {EMNLP} 2017, Copenhagen, Denmark, September
9-11, 2017},
pages = {2134--2139},
publisher = {Association for Computational Linguistics},
year = {2017}
}
@inproceedings{DBLP:conf/emnlp/Yang0M18,
author = {Yilin Yang and
Liang Huang and
Mingbo Ma},
//editor = {Ellen Riloff and
David Chiang and
Julia Hockenmaier and
Jun'ichi Tsujii},
title = {Breaking the Beam Search Curse: {A} Study of (Re-)Scoring Methods
and Stopping Criteria for Neural Machine Translation},
booktitle = {Proceedings of the 2018 Conference on Empirical Methods in Natural
Language Processing, Brussels, Belgium, October 31 - November 4, 2018},
pages = {3054--3059},
publisher = {Association for Computational Linguistics},
year = {2018}
}
@article{jelinek1980interpolated, @article{jelinek1980interpolated,
title={Interpolated estimation of Markov source parameters from sparse data}, title={Interpolated estimation of Markov source parameters from sparse data},
author={F. {Jelinek}}, author={F. {Jelinek}},
journal={Proc. Workshop on Pattern Recognition in Practice, 1980}, journal={Pattern Recognition in Practice},
pages={381--397}, pages={381--397},
year={1980} year={1980}
} }
...@@ -756,7 +842,7 @@ ...@@ -756,7 +842,7 @@
@inproceedings{DBLP:conf/acl/WangW97, @inproceedings{DBLP:conf/acl/WangW97,
author = {Ye{-}Yi Wang and author = {Ye{-}Yi Wang and
Alex Waibel}, Alex Waibel},
editor = {Philip R. Cohen and //editor = {Philip R. Cohen and
Wolfgang Wahlster}, Wolfgang Wahlster},
title = {Decoding Algorithm in Statistical Machine Translation}, title = {Decoding Algorithm in Statistical Machine Translation},
booktitle = {35th Annual Meeting of the Association for Computational Linguistics booktitle = {35th Annual Meeting of the Association for Computational Linguistics
...@@ -795,6 +881,47 @@ ...@@ -795,6 +881,47 @@
year={2003} year={2003}
} }
@inproceedings{Koehn2007Moses,
author = {Philipp Koehn and
Hieu Hoang and
Alexandra Birch and
Chris Callison{-}Burch and
Marcello Federico and
Nicola Bertoldi and
Brooke Cowan and
Wade Shen and
Christine Moran and
Richard Zens and
Chris Dyer and
Ondrej Bojar and
Alexandra Constantin and
Evan Herbst},
//editor = {John A. Carroll and
Antal van den Bosch and
Annie Zaenen},
title = {Moses: Open Source Toolkit for Statistical Machine Translation},
booktitle = {{ACL} 2007, Proceedings of the 45th Annual Meeting of the Association
for Computational Linguistics, June 23-30, 2007, Prague, Czech Republic},
publisher = {The Association for Computational Linguistics},
year = {2007}
}
@inproceedings{DBLP:conf/amta/Koehn04,
author = {Philipp Koehn},
//editor = {Robert E. Frederking and
Kathryn Taylor},
title = {Pharaoh: {A} Beam Search Decoder for Phrase-Based Statistical Machine
Translation Models},
booktitle = {Machine Translation: From Real Users to Research, 6th Conference of
the Association for Machine Translation in the Americas, {AMTA} 2004,
Washington, DC, USA, September 28-October 2, 2004, Proceedings},
series = {Lecture Notes in Computer Science},
volume = {3265},
pages = {115--124},
publisher = {Springer},
year = {2004}
}
@inproceedings{bangalore2001a, @inproceedings{bangalore2001a,
title={A finite-state approach to machine translation}, title={A finite-state approach to machine translation},
author={S. {Bangalore} and G. {Riccardi}}, author={S. {Bangalore} and G. {Riccardi}},
...@@ -819,6 +946,73 @@ ...@@ -819,6 +946,73 @@
year={2007} year={2007}
} }
@inproceedings{zollmann2007the,
author = {Andreas Zollmann and
Ashish Venugopal and
Matthias Paulik and
Stephan Vogel},
//editor = {Chris Callison{-}Burch and
Philipp Koehn and
Cameron S. Fordyce and
Christof Monz},
title = {The Syntax Augmented {MT} {(SAMT)} System at the Shared Task for the
2007 {ACL} Workshop on Statistical Machine Translation},
booktitle = {Proceedings of the Second Workshop on Statistical Machine Translation,
WMT@ACL 2007, Prague, Czech Republic, June 23, 2007},
pages = {216--219},
publisher = {Association for Computational Linguistics},
year = {2007}
}
@inproceedings{liu2006tree,
author = {Yang Liu and
Qun Liu and
Shouxun Lin},
//editor = {Nicoletta Calzolari and
Claire Cardie and
Pierre Isabelle},
title = {Tree-to-String Alignment Template for Statistical Machine Translation},
booktitle = {{ACL} 2006, 21st International Conference on Computational Linguistics
and 44th Annual Meeting of the Association for Computational Linguistics,
Proceedings of the Conference, Sydney, Australia, 17-21 July 2006},
publisher = {The Association for Computer Linguistics},
year = {2006}
}
@inproceedings{galley2006scalable,
author = {Michel Galley and
Jonathan Graehl and
Kevin Knight and
Daniel Marcu and
Steve DeNeefe and
Wei Wang and
Ignacio Thayer},
//editor = {Nicoletta Calzolari and
Claire Cardie and
Pierre Isabelle},
title = {Scalable Inference and Training of Context-Rich Syntactic Translation
Models},
booktitle = {{ACL} 2006, 21st International Conference on Computational Linguistics
and 44th Annual Meeting of the Association for Computational Linguistics,
Proceedings of the Conference, Sydney, Australia, 17-21 July 2006},
publisher = {The Association for Computer Linguistics},
year = {2006}
}
@inproceedings{chiang2005a,
author = {David Chiang},
//editor = {Kevin Knight and
Hwee Tou Ng and
Kemal Oflazer},
title = {A Hierarchical Phrase-Based Model for Statistical Machine Translation},
booktitle = {{ACL} 2005, 43rd Annual Meeting of the Association for Computational
Linguistics, Proceedings of the Conference, 25-30 June 2005, University
of Michigan, {USA}},
pages = {263--270},
publisher = {The Association for Computer Linguistics},
year = {2005}
}
%%%%% chapter 2------------------------------------------------------ %%%%% chapter 2------------------------------------------------------
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
......
Markdown 格式
0%
您添加了 0 到此讨论。请谨慎行事。
请先完成此评论的编辑!
注册 或者 后发表评论