wording (sec8, hiero decoding)

e444c279 · xiaotong · d92a6600 · e444c279 · e444c279 · e444c279
Commit e444c279 authored Sep 08, 2020 by xiaotong
--- a/Chapter8/Figures/figure-cky-algorithm.tex
+++ b/Chapter8/Figures/figure-cky-algorithm.tex
@@ -12,11 +12,11 @@
 \node[srcnode,anchor=north west] (c21) at ([xshift=1.5em,yshift=0.4em]c1.south west) {\normalsize{\textbf{for} $j=0$ to $ J - 1$}};
 \node[srcnode,anchor=north west] (c22) at ([xshift=1.5em,yshift=0.4em]c21.south west) {\normalsize{$span[j,j+1 ]$.Add($A \to a \in G$)}};
 \node[srcnode,anchor=north west] (c3) at ([xshift=-1.5em,yshift=0.4em]c22.south west) {\normalsize{\textbf{for} $l$ = 1 to $J$}};
-\node[srcnode,anchor=west] (c31) at ([xshift=6em]c3.east) {\normalsize{// length of span}};
+\node[srcnode,anchor=west] (c31) at ([xshift=6em]c3.east) {\normalsize{// 跨度长度}};
 \node[srcnode,anchor=north west] (c4) at ([xshift=1.5em,yshift=0.4em]c3.south west) {\normalsize{\textbf{for} $j$ = 0 to $J-l$}};
-\node[srcnode,anchor=north west] (c41) at ([yshift=0.4em]c31.south west) {\normalsize{// beginning of span}};
+\node[srcnode,anchor=north west] (c41) at ([yshift=0.4em]c31.south west) {\normalsize{// 跨度起始位置}};
 \node[srcnode,anchor=north west] (c5) at ([xshift=1.5em,yshift=0.4em]c4.south west) {\normalsize{\textbf{for} $k$ = $j$ to $j+l$}};
-\node[srcnode,anchor=north west] (c51) at ([yshift=0.4em]c41.south west) {\normalsize{// partition of span}};
+\node[srcnode,anchor=north west] (c51) at ([yshift=0.4em]c41.south west) {\normalsize{// 跨度结束位置}};
 \node[srcnode,anchor=north west] (c6) at ([xshift=1.5em,yshift=0.4em]c5.south west) {\normalsize{$hypos$ = Compose($span[j, k], span[k, j+l]$)}};
 \node[srcnode,anchor=north west] (c7) at ([yshift=0.4em]c6.south west) {\normalsize{$span[j, j+l]$.Update($hypos$)}};
 \node[srcnode,anchor=north west] (c8) at ([xshift=-4.5em,yshift=0.4em]c7.south west) {\normalsize{\textbf{return} $span[0, J]$}};

--- a/Chapter8/Figures/figure-combination-of-translation-with-different-rules.tex
+++ b/Chapter8/Figures/figure-combination-of-translation-with-different-rules.tex
@@ -22,13 +22,13 @@
 \draw[decorate,decoration={mirror,brace}]([xshift=0.5em,yshift=-1em]q2.west) --([xshift=7em,yshift=-1em]q2.west) node [xshift=0em,yshift=-1em,align=center](label1) {};	

 {\scriptsize
-\node[anchor=west] (h1) at ([xshift=1em,yshift=-12em]q2.west) {{Span[0,3]下的翻译假设：}};
+\node[anchor=west] (h1) at ([xshift=1em,yshift=-15em]q2.west) {{Span[0,3]下的翻译假设：}};
 \node[anchor=west] (h2) at ([xshift=0em,yshift=-1.3em]h1.west) {{X：imports and exports}};
 \node[anchor=west] (h6) at ([xshift=0em,yshift=-1.3em]h2.west) {{S：the import and export}};
 }

 {\scriptsize
-\node[anchor=west] (h21) at ([xshift=9em,yshift=2em]h1.east) {{替换$\textrm{X}_1$后生成的翻译假设：}};
+\node[anchor=west] (h21) at ([xshift=9em,yshift=5.0em]h1.east) {{替换$\textrm{X}_1$后生成的翻译假设：}};
 \node[anchor=west] (h22) at ([xshift=0em,yshift=-1.3em]h21.west) {{X：imports and exports have drastically fallen}};
 \node[anchor=west] (h23) at ([xshift=0em,yshift=-1.3em]h22.west) {{X：the import and export have drastically fallen}};
 \node[anchor=west] (h24) at ([xshift=0em,yshift=-1.3em]h23.west) {{X：imports and exports have drastically fallen}};
@@ -37,11 +37,14 @@
 \node[anchor=west] (h27) at ([xshift=0em,yshift=-1.3em]h26.west) {{X：the import and export has drastically fallen}};
 }

-\node [rectangle,inner sep=0.1em,rounded corners=1pt,draw] [fit = (h1) (h5) (h6)] (gl1) {};
+\node [rectangle,inner sep=0.1em,rounded corners=1pt,draw] [fit = (h1) (h2) (h6)] (gl1) {};
 \node [rectangle,inner sep=0.1em,rounded corners=1pt,draw] [fit = (h21) (h25) (h27)] (gl2) {};

-\draw [->,ublue,thick] ([xshift=0.6em,yshift=0.2em]n4.south) .. controls +(south:2em) and +(east:0em) ..   ([xshift=-0em,yshift=2em]gl2.west);
-\draw [->,ublue,thick] ([xshift=0em,yshift=0em]gl1.east) .. controls +(north:2.2em) and +(east:0em) ..   ([xshift=-0em,yshift=2em]gl2.west);
+\node [anchor=east,circle,inner sep=2pt,drop shadow,thick,draw=ublue,fill=white] (join) at ([xshift=4em,yshift=1em]gl1.north east) {\tiny{组合}};
+
+\draw [->,ublue,thick] ([xshift=0.6em,yshift=0.2em]n4.south) .. controls +(south:2em) and +(north:2em) ..   (join.90);
+\draw [->,ublue,thick] ([xshift=0em,yshift=1em]gl1.south east) .. controls +(east:2em) and +(south:2em) ..   (join.-90);
+\draw [->,ublue,thick] (join.0) -- ([xshift=2.3em]join.0);

 \end{scope}
 \end{tikzpicture}

--- a/Chapter8/Figures/figure-hierarchical-phrase-rule-match-generate.tex
+++ b/Chapter8/Figures/figure-hierarchical-phrase-rule-match-generate.tex
@@ -35,8 +35,11 @@
 \node [rectangle,inner sep=0.1em,rounded corners=1pt,draw] [fit = (h1) (h5) (h6)] (gl1) {};
 \node [rectangle,inner sep=0.1em,rounded corners=1pt,draw] [fit = (h21) (h25)] (gl2) {};

-\draw [->,ublue,thick] ([xshift=0.6em,yshift=0.2em]n2.south) .. controls +(south:2em) and +(east:0em) ..   ([xshift=-0em,yshift=2em]gl2.west);
-\draw [->,ublue,thick] ([xshift=0em,yshift=1em]gl1.east) .. controls +(north:2.2em) and +(east:0em) ..   ([xshift=-0em,yshift=2em]gl2.west);
+\node [anchor=east,circle,inner sep=2pt,drop shadow,thick,draw=ublue,fill=white] (join) at ([xshift=3em,yshift=-2em]gl1.north east) {\tiny{组合}};
+
+\draw [->,ublue,thick] ([xshift=0.6em,yshift=0.2em]n2.south) .. controls +(south:2em) and +(north:2em) ..   (join.90);
+\draw [->,ublue,thick] ([xshift=0em,yshift=1em]gl1.south east) .. controls +(east:2em) and +(south:2em) ..   (join.-90);
+\draw [->,ublue,thick] (join.0) -- ([xshift=1.7em]join.0);

 \end{scope}
 \end{tikzpicture}

--- a/Chapter8/chapter8.tex
+++ b/Chapter8/chapter8.tex
@@ -504,7 +504,7 @@ span\textrm{[0,4]}&=&\textrm{“猫} \quad \textrm{喜欢} \quad \textrm{吃} \q
 \vspace{0.5em}
 \item 层次短语模型的文法不符合乔姆斯基范式；
 \vspace{0.5em}
-\item 机器翻译中需要语言模型。由于当前词的语言模型得分需要前面的词做条件，因此机器翻译的解码过程并不是上下文无关的。
+\item 机器翻译中需要语言模型。由于计算当前词的语言模型得分需要前面的词做条件，因此机器翻译的解码过程并不是上下文无关的。
 \vspace{0.5em}
 \end{itemize}

@@ -526,7 +526,7 @@ span\textrm{[0,4]}&=&\textrm{“猫} \quad \textrm{喜欢} \quad \textrm{吃} \q

 \begin{itemize}
 \vspace{0.5em}
-\item 剪枝：在CKY中，每个跨度都可以生成非常多的推导（局部翻译假设）。理论上，这些推导的数量会和跨度大小成指数关系。显然不可能保存如此大量的翻译推导。对于这个问题，常用的办法是只保留top-$k$个推导。也就是每个局部结果只保留最好的$k$个。这种方法也被称作{\small\bfnew{束剪枝}}\index{束剪枝}（Beam Pruning）\index{Beam Pruning}。在极端情况下，当$k$=1时，这个方法就变成了贪婪的方法；
+\item 剪枝：在CKY中，每个跨度都可以生成非常多的推导（局部翻译假设）。理论上，这些推导的数量会和跨度大小成指数关系。显然不可能保存如此大量的翻译推导。对于这个问题，常用的办法是只保留top-$k$个推导。也就是每个局部结果只保留最好的$k$个，即{\small\bfnew{束剪枝}}\index{束剪枝}（Beam Pruning）\index{Beam Pruning}。在极端情况下，当$k$=1时，这个方法就变成了贪婪的方法；
 \vspace{0.5em}
 \item $n$-best结果的生成：$n$-best推导（译文）的生成是统计机器翻译必要的功能。比如，最小错误率训练中就需要最好的$n$个结果用于特征权重调优。在基于CKY的方法中，整个句子的翻译结果会被保存在最大跨度所对应的结构中。因此一种简单的$n$-best生成方法是从这个结构中取出排名最靠前的$n$个结果。另外，也可以考虑自上而下遍历CKY生成的推导空间，得到更好的$n$-best结果\upcite{huang2005better}。
 \end{itemize}
@@ -554,7 +554,7 @@ span\textrm{[0,4]}&=&\textrm{“猫} \quad \textrm{喜欢} \quad \textrm{吃} \q
 \textrm{X} & \to & \langle\ \textrm{X}_1\ \text{大幅度}\ \text{下降}\ \text{了},\ \textrm{X}_1\ \textrm{has}\ \textrm{drastically}\ \textrm{fallen}\ \rangle \nonumber
 \end{eqnarray}

-\parinterval 这也就是说，当匹配规则的源语言部分“$\textrm{X}_1$ 大幅度 下降 了”时会有三个译文可以选择。而变量$\textrm{X}_1$部分又有很多不同的局部翻译结果。不同的规则译文和不同的变量译文都可以组合出一个局部翻译结果。图\ref{fig:8-12}展示了这种情况的实例。
+\parinterval 这也就是说，当匹配规则的源语言部分“$\textrm{X}_1$\ \ 大幅度\ \ 下降\ \ 了”时会有三个译文可以选择。而变量$\textrm{X}_1$部分又有很多不同的局部翻译结果。不同的规则译文和不同的变量译文都可以组合出一个局部翻译结果。图\ref{fig:8-12}展示了这种情况的实例。

 %----------------------------------------------
 \begin{figure}[htp]
@@ -580,7 +580,7 @@ span\textrm{[0,4]}&=&\textrm{“猫} \quad \textrm{喜欢} \quad \textrm{吃} \q

 \parinterval 图\ref{fig:8-13}展示了立方剪枝的过程（规则只含有一个变量的情况）。可以看到，每个步骤中，算法只会扩展当前最好结果周围的两个点（对应两个维度，横轴对应变量被替换的内容，纵轴对应规则的目标语端）。

-\parinterval 理论上，立方剪枝最多访问$n{m}^2$个点。但是在实践中发现，如果终止条件设计的合理，搜索的代价基本上与$m$或者$n$呈线性关系。因此，立方剪枝可以大大提高解码速度。立方剪枝实际上是一种启发性的搜索方法。它把搜索空间表示为一个三维空间。它假设：如果空间中某个点的模型得分较高，那么它“周围”的点的得分也很可能较高。这也是对模型得分沿着空间中不同维度具有连续性的一种假设。这种方法也大量的使用在句法分析中，并取得了很好的效果。
+\parinterval 理论上，立方剪枝最多访问$n{m}^2$个点。但是在实践中发现，如果终止条件设计的合理，搜索的代价基本上与$m$或者$n$呈线性关系。因此，立方剪枝可以大大提高解码速度。立方剪枝实际上是一种启发性的搜索方法。它把搜索空间表示为一个三维空间。它假设：如果空间中某个点的模型得分较高，那么它“周围”的点的得分也很可能较高。这也是对模型得分沿着空间中不同维度具有连续性的一种假设。这种方法也可以使用在句法分析中，并取得了很好的效果。

 %----------------------------------------------------------------------------------------
 %    NEW SECTION