update chapter 9

12e5897f · 曹润柘 · f1e66dc5 · 12e5897f · 12e5897f · 12e5897f
Commit 12e5897f authored Sep 14, 2020 by 曹润柘
--- a/Chapter8/Figures/figure-classification-of-models-based-on-syntax.tex
+++ b/Chapter8/Figures/figure-classification-of-models-based-on-syntax.tex
@@ -7,18 +7,20 @@
 \tikzstyle{cnode} = [minimum width=7.0em,minimum height=2.5em,rounded corners=0.2em];
 \tikzstyle{xnode} = [minimum width=4.5em,minimum height=2.5em,rounded corners=0.2em];

-\node[xnode,anchor=west,fill=red!25,align=left] (itg) at (0,0) {\footnotesize{反向转录}\\\footnotesize{文法}};
-\node[xnode,anchor=west,fill=red!25,align=left] (hiero) at ([xshift=0.5em]itg.east) {\footnotesize{层次短语}\\\footnotesize{模型}};
-\node[xnode,anchor=west,fill=blue!25,align=left] (s2t) at ([xshift=0.5em]hiero.east) {\footnotesize{串到树}\\\footnotesize{模型}};
-\node[xnode,anchor=west,fill=blue!25,align=left] (t2s) at ([xshift=0.5em]s2t.east) {\footnotesize{树到串}\\\footnotesize{模型}};
-\node[xnode,anchor=west,fill=blue!25,align=left] (t2t) at ([xshift=0.5em]t2s.east) {\footnotesize{树到树}\\\footnotesize{模型}};
-
-\node[cnode,anchor=south,fill=red!25,align=left] (cat1) at ([xshift=-0.2em,yshift=2em]hiero.north west) {\footnotesize{基于形式文法}\\\footnotesize{的模型}};
-\node[cnode,anchor=south,fill=blue!25,align=left] (cat2) at ([xshift=-0.0em,yshift=2em]t2s.north) {\footnotesize{基于语言学}\\\footnotesize{句法的模型}};
-\node[cnode,anchor=south,minimum width=10.0em,fill=green!25,align=center] (cat0) at ([xshift=-3em,yshift=2em]cat2.north west) {\footnotesize{(广义上)}\\\footnotesize{基于句法的模型}};
-
-\draw [-,thick] ([yshift=0.1em,xshift=1em]cat1.north) -- ([xshift=-1.5em,yshift=-0.1em]cat0.south);
-\draw [-,thick] ([yshift=0.1em,xshift=-1em]cat2.north) -- ([xshift=1.5em,yshift=-0.1em]cat0.south);
+\node[cnode,anchor=south,minimum width=10.0em,fill=green!25,align=center] (cat0) at (0,0) {\footnotesize{（广义上）}\\\footnotesize{基于句法的模型}};
+\node[cnode,anchor=north,fill=red!25,align=left] (cat1) at ([xshift=-6.5em,yshift=-2em]cat0.south) {\footnotesize{基于形式文法}\\\footnotesize{的模型}};
+\node[cnode,anchor=north,fill=blue!25,align=left] (cat2) at ([xshift=6.5em,yshift=-2em]cat0.south) {\footnotesize{基于语言学}\\\footnotesize{句法的模型}};
+
+
+\node[xnode,anchor=north,fill=red!25,align=left] (itg) at ([xshift=-2.5em,yshift=-2.0em]cat1.south) {\footnotesize{反向转录}\\\footnotesize{文法}};
+\node[xnode,anchor=north,fill=red!25,align=left] (hiero) at ([xshift=2.5em,yshift=-2.0em]cat1.south) {\footnotesize{层次短语}\\\footnotesize{模型}};
+\node[xnode,anchor=north,fill=blue!25,align=left] (s2t) at ([xshift=-5.0em,yshift=-2.0em]cat2.south) {\footnotesize{串到树}\\\footnotesize{模型}};
+\node[xnode,anchor=north,fill=blue!25,align=left] (t2s) at ([xshift=0.0em,yshift=-2.0em]cat2.south) {\footnotesize{树到串}\\\footnotesize{模型}};
+\node[xnode,anchor=north,fill=blue!25,align=left] (t2t) at ([xshift=5.0em,yshift=-2.0em]cat2.south) {\footnotesize{树到树}\\\footnotesize{模型}};
+
+
+\draw [-,thick] ([yshift=0.1em,xshift=1em]cat0.south) -- ([xshift=-1.5em,yshift=-0.1em]cat2.north);
+\draw [-,thick] ([yshift=0.1em,xshift=-1em]cat0.south) -- ([xshift=1.5em,yshift=-0.1em]cat1.north);
 \draw [-,thick] ([yshift=0.1em]itg.north) -- ([xshift=-0.5em,yshift=-0.1em]cat1.south);
 \draw [-,thick] ([yshift=0.1em]hiero.north) -- ([xshift=0.5em,yshift=-0.1em]cat1.south);
 \draw [-,thick] ([yshift=0.1em]s2t.north) -- ([xshift=-0.8em,yshift=-0.1em]cat2.south);

--- a/Chapter8/Figures/figure-one-best-node-alignment-and-alignment-matrix.tex
+++ b/Chapter8/Figures/figure-one-best-node-alignment-and-alignment-matrix.tex
@@ -105,7 +105,7 @@
 \end{flushright}
 \begin{center}
 \vspace{-1em}
-\footnotesize{(a)节点对齐矩阵（1-best vs. Matrix）}
+\footnotesize{(a)节点对齐矩阵（1-best vs Matrix）}
 \end{center}

 \begin{center}
@@ -120,7 +120,7 @@
 \footnotesize{$r_6$} & \footnotesize{AS(了) $\rightarrow$ VBZ(have)} \\
 \footnotesize{$r_8$} & \footnotesize{VP(AD$_1$ VP(VV$_2$ AS$_3$)) $\rightarrow$} \\
                     & \footnotesize{VP(VBZ$_3$ ADVP(RB$_1$ VBN$_2$)} \\
-\rule{0pt}{11pt} \\
+\rule{0pt}{9.5pt} \\
 \\
 \\
 \end{tabular}

--- a/Chapter8/chapter8.tex
+++ b/Chapter8/chapter8.tex
@@ -190,7 +190,7 @@

 \subsubsection{1. 文法定义}

-\parinterval 层次短语模型中一个重要的概念是{\small\bfnew{同步上下文无关文法}}\index{同步上下文无关文法}（Synchronous Context-free Grammar\index{Synchronous Context-free Grammar}，简称SCFG）。SCFG可以被看作是对源语言和目标语言上下文无关文法的融合，它要求源语言和目标语言的产生式及产生式中的变量具有对应关系。具体定义如下：
+\parinterval 层次短语模型中一个重要的概念是{\small\bfnew{同步上下文无关文法}}\index{同步上下文无关文法}（Synchronous Context-free Grammar\index{Synchronous Context-free Grammar}，SCFG）。SCFG可以被看作是对源语言和目标语言上下文无关文法的融合，它要求源语言和目标语言的产生式及产生式中的变量具有对应关系。具体定义如下：

 %-------------------------------------------
 \vspace{0.5em}
@@ -201,7 +201,7 @@
 \begin{enumerate}
 \item $N$是非终结符集合。
 \item $T_s$和$T_t$分别是源语言和目标语言的终结符集合。
-\item $I \subseteq N$起始非终结符集合。
+\item $I \subseteq N$是起始非终结符集合。
 \item $R$是规则集合，每条规则$r \in R$有如下形式：
 \end{enumerate}
 \vspace{0.3em}
@@ -319,9 +319,15 @@ d = {r_1} \circ {r_2} \circ {r_3} \circ {r_4}

 \subsection{层次短语规则抽取}

-\parinterval 层次短语系统所使用的文法包括两部分：1）不含变量的层次短语规则（短语翻译）；2）含有变量的层次短语规则。短语翻译的抽取直接复用基于短语的系统即可。此处重点讨论如何抽取含有变量的层次短语规则。
-
-\parinterval 在{\chapterseven}短语抽取一节已经介绍了短语与词对齐相兼容的概念。这里，所有层次短语规则也是与词对齐相兼容（一致）的。
+\parinterval 层次短语系统所使用的文法包括两部分：
+\begin{itemize}
+\vspace{0.5em}
+\item 不含变量的层次短语规则（短语翻译）；
+\vspace{0.5em}
+\item 含有变量的层次短语规则。短语翻译的抽取直接复用基于短语的系统即可。
+\vspace{0.5em}
+\end{itemize}
+\parinterval 此处重点讨论如何抽取含有变量的层次短语规则。在{\chapterseven}短语抽取一节已经介绍了短语与词对齐相兼容的概念。这里，所有层次短语规则也是与词对齐相兼容（一致）的。

 %-------------------------------------------
 \vspace{0.5em}
@@ -400,7 +406,7 @@ h_i (d,\vectorn{\emph{t}},\vectorn{\emph{s}})=\sum_{r \in d}h_i (r)
 \label{eq:8-4}
 \end{eqnarray}

-\parinterval 公式\ref{eq:8-4}中，$r$表示推导$d$中的一条规则，$h_i (r)$表示规则$r$上的第$i$个特征。可以看出，推导$d$的特征值就是所有包含在$d$中规则的特征值的和。进一步，可以定义
+\parinterval 公式\eqref{eq:8-4}中，$r$表示推导$d$中的一条规则，$h_i (r)$表示规则$r$上的第$i$个特征。可以看出，推导$d$的特征值就是所有包含在$d$中规则的特征值的和。进一步，可以定义
 \begin{eqnarray}
 \textrm{rscore}(d,\vectorn{\emph{t}},\vectorn{\emph{s}})=\sum_{i=1}^7 \lambda_i \cdot h_i (d,\vectorn{\emph{t}},\vectorn{\emph{s}})
 \label{eq:8-5}
@@ -443,7 +449,7 @@ h_i (d,\vectorn{\emph{t}},\vectorn{\emph{s}})=\sum_{r \in d}h_i (r)
 \label{eq:8-8}
 \end{eqnarray}

-\parinterval 由于层次短语规则本质上就是CFG规则，因此公式\ref{eq:8-7}代表了一个典型的句法分析过程。需要做的是，用模型源语言端的CFG对输入句子进行分析，同时用模型目标语言端的CFG生成译文。基于CFG的句法分析是自然语言处理中的经典问题。一种广泛使用的方法是：首先把CFG转化为$\varepsilon$-free的{\small\bfnew{乔姆斯基范式}}\index{乔姆斯基范式}（Chomsky Normal Form）\index{Chomsky Normal Form}\footnote[5]{能够证明任意的CFG都可以被转换为乔姆斯基范式，即文法只包含形如A$\to$BC或A$\to$a的规则。这里，假设文法中不包含空串产生式A$\to\varepsilon$，其中$\varepsilon$表示空字符串。}，之后采用CKY方法进行分析。
+\parinterval 由于层次短语规则本质上就是CFG规则，因此公式\eqref{eq:8-7}代表了一个典型的句法分析过程。需要做的是，用模型源语言端的CFG对输入句子进行分析，同时用模型目标语言端的CFG生成译文。基于CFG的句法分析是自然语言处理中的经典问题。一种广泛使用的方法是：首先把CFG转化为$\varepsilon$-free的{\small\bfnew{乔姆斯基范式}}\index{乔姆斯基范式}（Chomsky Normal Form）\index{Chomsky Normal Form}\footnote[5]{能够证明任意的CFG都可以被转换为乔姆斯基范式，即文法只包含形如A$\to$BC或A$\to$a的规则。这里，假设文法中不包含空串产生式A$\to\varepsilon$，其中$\varepsilon$表示空字符串。}，之后采用CKY方法进行分析。

 \parinterval CKY是形式语言中一种常用的句法分析方法\upcite{cocke1969programming,younger1967recognition,kasami1966efficient}。它主要用于分析符合乔姆斯基范式的句子。由于乔姆斯基范式中每个规则最多包含两叉（或者说两个变量），因此CKY方法也可以被看作是基于二叉规则的一种分析方法。对于一个待分析的字符串，CKY方法从小的“范围”开始，不断扩大分析的“范围”，最终完成对整个字符串的分析。在CKY方法中，一个重要的概念是{\small\bfnew{跨度}}\index{跨度}（Span）\index{Span}，所谓跨度表示了一个符号串的范围。这里可以把跨度简单的理解为从一个起始位置到一个结束位置中间的部分。

@@ -726,11 +732,11 @@ span\textrm{[0,4]}&=&\textrm{“猫} \quad \textrm{喜欢} \quad \textrm{吃} \q
 \cline{3-5}
 \rule{0pt}{15pt} & & \multicolumn{1}{c|}{树到串} & \multicolumn{1}{c}{串到树} & \multicolumn{1}{|c}{树到树} \\
 \hline
-源语句法 & 否 & 是 & 否 & 是 \\
-目标语句法 & 否 & 否 & 是 & 是 \\
-基于串的解码 & 是 & 否 & 是 & 是 \\
-基于树的解码 & 否 & 是 & 否 & 是 \\
-健壮性 & 高 & 中 & 中 & 低 \\
+\rule{0pt}{15pt}源语句法 & 否 & 是 & 否 & 是 \\
+\rule{0pt}{15pt}目标语句法 & 否 & 否 & 是 & 是 \\
+\rule{0pt}{15pt}基于串的解码 & 是 & 否 & 是 & 是 \\
+\rule{0pt}{15pt}基于树的解码 & 否 & 是 & 否 & 是 \\
+\rule{0pt}{15pt}健壮性 & 高 & 中 & 中 & 低 \\
 \end{tabular}
 }
 \end{center}
@@ -815,7 +821,6 @@ span\textrm{[0,4]}&=&\textrm{“猫} \quad \textrm{喜欢} \quad \textrm{吃} \q
 \parinterval 树片段的叶子节点既可以是终结符（单词）也可以是非终结符。当叶子节点为非终结符时，表示这个非终结符会被进一步替换，因此它可以被看作是变量。而源语言树结构和目标语言树结构中的变量是一一对应的，对应关系用虚线表示。

 \parinterval 这个双语映射关系可以被表示为一个基于树结构的文法规则，套用规则的定义$\langle\  \alpha_h, \beta_h\ \rangle \to \langle\ \alpha_r, \beta_r, \sim\ \rangle$形式，可以知道：
-
 \begin{eqnarray}
 \alpha_h &=& \textrm{VP} \nonumber \\
 \beta_h &=& \textrm{VP} \nonumber \\
@@ -823,13 +828,11 @@ span\textrm{[0,4]}&=&\textrm{“猫} \quad \textrm{喜欢} \quad \textrm{吃} \q
 \beta_r &=& \textrm{VP}(\textrm{VBZ(was)}\ \ \textrm{VP(VBN:}x\ \ \textrm{PP:}x)) \nonumber \\
 \sim &=& \{1-2,2-1\} \nonumber
 \end{eqnarray}
-
 \noindent 这里，$\alpha_h$和$\beta_h$表示规则的左部，对应树片段的根节点；$\alpha_r$和$\beta_r$是两种语言的树结构（序列化表示），其中标记为$x$的非终结符是变量。$\sim = \{1-2,2-1\}$表示源语言的第一个变量对应目标语言的第二个变量，而源语言的第二个变量对应目标语言的第一个变量，这也反应出两种语言句法结构中的调序现象。类似于层次短语规则，可以把规则中变量的对应关系用下标进行表示。例如，上面的规则也可以被写为如下形式：

 \begin{eqnarray}
 \langle\ \textrm{VP}, \textrm{VP}\ \rangle\ \to\ \langle\ \textrm{PP}_{1} \ \textrm{VP(VV(表示)}\ \textrm{NN}_{2})),\ \ \textrm{VP}(\textrm{VBZ(was)}\ \textrm{VP(VBN}_{2} \ \textrm{PP}_{1})) \ \rangle \nonumber
 \end{eqnarray}
-
 \noindent 其中，两种语言中变量的对应关系为$\textrm{PP}_1 \leftrightarrow \textrm{PP}_1$，$\textrm{NN}_2 \leftrightarrow \textrm{VBN}_2$。

 %----------------------------------------------------------------------------------------
@@ -1575,7 +1578,7 @@ d_1 = {d'} \circ {r_5}
 \vspace{0.5em}
 \end{itemize}

-\parinterval 总的来说，基于句法的解码器较为复杂。无论是算法的设计还是工程技巧的运用，对开发者的能力都有一定要求。因此开发一个优秀的基于句法的机器翻译系统是一项有挑战的工作。
+\parinterval 总的来说，基于句法的解码器较为复杂，无论是算法的设计还是工程技巧的运用，对开发者的能力都有一定要求。因此开发一个优秀的基于句法的机器翻译系统是一项有挑战的工作。

 %----------------------------------------------------------------------------------------
 %    NEW SECTION
@@ -1583,7 +1586,7 @@ d_1 = {d'} \circ {r_5}
 \sectionnewpage
 \section{小结及深入阅读}

-\parinterval 自基于规则的方法开始，如何句法信息就是机器翻译研究人员关注的热点。在统计机器翻译时代，句法信息与机器翻译的结合成为了最具时态特色的研究方向之一。句法结构具有高度的抽象性，因此可以缓解基于词串方法不善于处理句子上层结构的问题。
+\parinterval 自基于规则的方法开始，如何使用句法信息就是机器翻译研究人员关注的热点。在统计机器翻译时代，句法信息与机器翻译的结合成为了最具时态特色的研究方向之一。句法结构具有高度的抽象性，因此可以缓解基于词串方法不善于处理句子上层结构的问题。

 \parinterval 本章对基于句法的机器翻译模型进行了介绍，并重点讨论了相关的建模、翻译规则抽取以及解码问题。从某种意义上说，基于句法的模型与基于短语的模型都同属一类模型，因为二者都假设：两种语言间存在由短语或者规则构成的翻译推导，而机器翻译的目标就是找到最优的翻译推导。但是，由于句法信息有其独特的性质，因此也给机器翻译带来了新的问题。有几方面问题值得关注：


--- a/bibliography.bib
+++ b/bibliography.bib
@@ -1862,7 +1862,7 @@
               Kenneth Ward Church and
               Willian Gale},
  title     = {Robust Bilingual Word Alignment for Machine Aided Translation},
-  publisher = {Very Large Corpora},
+  publisher = {Very Large Corpora}
  year      = {1993}
 }

@@ -2130,7 +2130,7 @@
  pages     = {711--715},
  publisher = {AAAI Press},
  year      = {2000}
-}
+
 @inproceedings{dyer2013a,
  author    = {Chris Dyer and
               Victor Chahuneau and
@@ -2142,7 +2142,7 @@
 }
 @inproceedings{taskar2005a,
  author    = {Benjamin Taskar and
-               Simon Lacoste-Julien and
+               Simon Lacoste{-}Julien and
               Dan Klein},
  title     = {A Discriminative Matching Approach to Word Alignment},
  pages     = {73--80},
@@ -2260,7 +2260,7 @@
  year      = {2002}
 }
 @inproceedings{DBLP:conf/acl/WangW98,
-  author    = {Ye-Yi Wang and
+  author    = {Ye{-}Yi Wang and
               Alex Waibel},
  title     = {Modeling with Structures in Statistical Machine Translation},
  pages     = {1357--1363},
@@ -2656,7 +2656,7 @@
 }
 @inproceedings{DBLP:conf/emnlp/SchwenkCF07,
  author    = {Holger Schwenk and
-               Marta R. Costa-juss{\`{a}} and
+               Marta R. Costa{-}juss{\`{a}} and
               Jos{\'{e}} A. R. Fonollosa},
  title     = {Smooth Bilingual N-Gram Translation},
  pages     = {430--438},
@@ -2696,7 +2696,7 @@
               Adri{\`{a}} de Gispert and
               Patrik Lambert and
               Jos{\'{e}} A. R. Fonollosa and
-               Marta R. Costa-juss{\`{a}}},
+               Marta R. Costa{-}juss{\`{a}}},
  title     = {\emph{N}-gram-based Machine Translation},
  journal   = {Computational Linguistics},
  volume    = {32},
@@ -2753,7 +2753,7 @@
  year      = {2007},
 }
 @inproceedings{DBLP:conf/acl/Callison-BurchBS05,
-  author    = {Chris Callison-Burch and
+  author    = {Chris Callison{-}Burch and
               Colin J. Bannard and
               Josh Schroeder},
  title     = {Scaling Phrase-Based Statistical Machine Translation to Larger Corpora
@@ -2863,7 +2863,7 @@
  author ={Huang, Liang and Knight, Kevin and Joshi, Aravind},
  pages ={66--73},
  year ={2006},
-  publisher ={Computationally Hard Problems \& Joint Inference in Speech \& Language Processing}
+  publisher ={Computationally Hard Problems & Joint Inference in Speech & Language Processing}
 }

 @inproceedings{galley2006scalable,
@@ -3387,7 +3387,7 @@
 @inproceedings{DBLP:conf/coling/TuLHLL10,
  author    = {Zhaopeng Tu and
               Yang Liu and
-               Young-Sook Hwang and
+               Young{-}Sook Hwang and
               Qun Liu and
               Shouxun Lin},
  title     = {Dependency Forest for Statistical Machine Translation},
@@ -3403,7 +3403,7 @@
  organization ={The Institute of Electrical and Electronics Engineers}
 }
 @inproceedings{rosti2007combining,
-  author    = {Antti-Veikko I. Rosti and
+  author    = {Antti{-}Veikko I. Rosti and
               Necip Fazil Ayan and
               Bing Xiang and
               Spyridon Matsoukas and
@@ -3446,7 +3446,7 @@
  year      = {2008}
 }
 @inproceedings{Li2009Incremental,
-  author    = {Chi-Ho Li and
+  author    = {Chi{-}Ho Li and
               Xiaodong He and
               Yupeng Liu and
               Ning Xi},
@@ -3469,7 +3469,7 @@
  author    = {Mu Li and
               Nan Duan and
               Dongdong Zhang and
-               Chi-Ho Li and
+               Chi{-}Ho Li and
               Ming Zhou},
  title     = {Collaborative Decoding: Partial Hypothesis Re-ranking Using Translation
               Consensus between Decoders},