合并分支 'master' 到 'zengxin'

Master 查看合并请求 !295

合并分支 'master' 到 'zengxin'
Master 查看合并请求 !295
bd87e7ad · zengxin · e10fac8e · dee6cf16 · bd87e7ad · bd87e7ad
Commit bd87e7ad authored Nov 24, 2021 by zengxin
--- a/Book/Chapter3/Figures/figure-human-translation.tex
+++ b/Book/Chapter3/Figures/figure-human-translation.tex
@@ -8,7 +8,7 @@
 \node [anchor=west] (s1) at (0,0) {{我}};
 \node [anchor=west] (s2) at ([xshift=2em]s1.east) {{对}};
 \node [anchor=west] (s3) at ([xshift=2em]s2.east) {{你}};
-\node [anchor=west] (s4) at ([xshift=2em]s3.east) {{表示}};
+\node [anchor=west] (s4) at ([xshift=2em]s3.east) {{感到}};
 \node [anchor=west] (s5) at ([xshift=2em]s4.east) {{满意}};
 \node [anchor=south west] (sentlabel) at ([yshift=-0.5em]s1.north west) {\scriptsize{\sffamily\bfseries{\color{red}{待翻译句子(已经分词):}}}};

@@ -38,7 +38,7 @@
 \node [anchor=north west,inner sep=1pt,fill=black] (tl31) at (t31.north west) {\tiny{{\color{white} \textbf{3}}}};

 \node [anchor=north,inner sep=2pt,fill=orange!20,minimum height=1.5em,minimum width=3em] (t41) at ([yshift=-1em]s4.south) {$\phi$};
-\node [anchor=north,inner sep=2pt,fill=orange!20,minimum height=1.5em,minimum width=3em] (t42) at ([yshift=-0.2em]t41.south) {show};
+\node [anchor=north,inner sep=2pt,fill=orange!20,minimum height=1.5em,minimum width=3em] (t42) at ([yshift=-0.2em]t41.south) {feel};
 \node [anchor=north west,inner sep=1pt,fill=black] (tl41) at (t41.north west) {\tiny{{\color{white} \textbf{4}}}};
 \node [anchor=north west,inner sep=1pt,fill=black] (tl42) at (t42.north west) {\tiny{{\color{white} \textbf{4}}}};


--- a/Book/Chapter3/Figures/figure-noise-channel-model.tex
+++ b/Book/Chapter3/Figures/figure-noise-channel-model.tex
@@ -9,7 +9,8 @@
 \node [draw,red,fill=red!10,thick,anchor=center,circle,inner sep=3.5pt] (s) at (0,0) {\black{$\mathbf{s}$}};
 \node [draw,ublue,fill=blue!10,thick,anchor=center,circle,inner sep=3.3pt] (t) at ([xshift=1.5in]s.east) {\black{$\mathbf{t}$}};

-\draw [<->,thick,] (s.east) -- (t.west) node [pos=0.5,draw,fill=white] {噪声信道};
+\draw [->,thick,] (s.east) -- (t.west) node [pos=0.5,draw,fill=white] {噪声信道};
+\draw[->,thick](s.east) -- ([xshift=2.2em]s.east);
 \node [anchor=east] at (s.west) {\scriptsize{信宿}};
 \node [anchor=west] at (t.east) {\scriptsize{信源}};


--- a/Book/Chapter3/Figures/figure-process-of-machine-translation.tex
+++ b/Book/Chapter3/Figures/figure-process-of-machine-translation.tex
@@ -5,7 +5,7 @@
 \node [anchor=west] (s1) at (0,0) {{我}};
 \node [anchor=west] (s2) at ([xshift=2em]s1.east) {{对}};
 \node [anchor=west] (s3) at ([xshift=2em]s2.east) {{你}};
-\node [anchor=west] (s4) at ([xshift=2em]s3.east) {{表示}};
+\node [anchor=west] (s4) at ([xshift=2em]s3.east) {{感到}};
 \node [anchor=west] (s5) at ([xshift=2em]s4.east) {{满意}};

 \node [anchor=south west] (sentlabel) at ([yshift=-0.5em]s1.north west) {\scriptsize{{\color{red}{待翻译句子(已经分词):}}}};
@@ -35,7 +35,7 @@
 \node [anchor=north west,inner sep=1pt,fill=black] (tl31) at (t31.north west) {\tiny{{\color{white} \textbf{3}}}};

 \node [anchor=north,inner sep=2pt,fill=orange!20,minimum height=1.5em,minimum width=3em] (t41) at ([yshift=-1em]s4.south) {$\phi$};
-\node [anchor=north,inner sep=2pt,fill=orange!20,minimum height=1.5em,minimum width=3em] (t42) at ([yshift=-0.2em]t41.south) {show};
+\node [anchor=north,inner sep=2pt,fill=orange!20,minimum height=1.5em,minimum width=3em] (t42) at ([yshift=-0.2em]t41.south) {feel};
 \node [anchor=north west,inner sep=1pt,fill=black] (tl41) at (t41.north west) {\tiny{{\color{white} \textbf{4}}}};
 \node [anchor=north west,inner sep=1pt,fill=black] (tl42) at (t42.north west) {\tiny{{\color{white} \textbf{4}}}};


--- a/Book/Chapter3/Figures/greedy-mt-decoding-process-1.tex
+++ b/Book/Chapter3/Figures/greedy-mt-decoding-process-1.tex
@@ -16,7 +16,7 @@
 \node [anchor=west] (s1) at (0,0) {{我}};
 \node [anchor=west] (s2) at ([xshift=3em]s1.east) {{对}};
 \node [anchor=west] (s3) at ([xshift=3em]s2.east) {{你}};
-\node [anchor=west] (s4) at ([xshift=2.5em]s3.east) {{表示}};
+\node [anchor=west] (s4) at ([xshift=2.5em]s3.east) {{感到}};
 \node [anchor=west] (s5) at ([xshift=2.5em]s4.east) {{满意}};

 \node [anchor=south west,inner sep=1pt] (sentlabel) at ([yshift=0.3em]s1.north west) {\scriptsize{{输入: 待翻译句子(已经分词)}}};
@@ -53,8 +53,8 @@

 {
 \node [anchor=north,inner sep=2pt,fill=orange!20,minimum height=1.5em,minimum width=3.5em] (t41) at ([yshift=-1.3em]s4.south) {$\phi$};
-\node [anchor=north,inner sep=2pt,fill=orange!20,minimum height=1.5em,minimum width=3.5em] (t42) at ([yshift=-0.2em]t41.south) {show};
-\node [anchor=north,inner sep=2pt,fill=orange!20,minimum height=1.5em,minimum width=3.5em] (t43) at ([yshift=-0.2em]t42.south) {shows};
+\node [anchor=north,inner sep=2pt,fill=orange!20,minimum height=1.5em,minimum width=3.5em] (t42) at ([yshift=-0.2em]t41.south) {feel};
+\node [anchor=north,inner sep=2pt,fill=orange!20,minimum height=1.5em,minimum width=3.5em] (t43) at ([yshift=-0.2em]t42.south) {feels};
 }

 {
@@ -121,7 +121,7 @@
 \node [anchor=west] (s1) at (0,0) {{我}};
 \node [anchor=west] (s2) at ([xshift=3em]s1.east) {{对}};
 \node [anchor=west] (s3) at ([xshift=3em]s2.east) {{你}};
-\node [anchor=west] (s4) at ([xshift=2.5em]s3.east) {{表示}};
+\node [anchor=west] (s4) at ([xshift=2.5em]s3.east) {{感到}};
 \node [anchor=west] (s5) at ([xshift=2.5em]s4.east) {{满意}};

 \node [anchor=south west,inner sep=1pt] (sentlabel) at ([yshift=0.3em]s1.north west) {\scriptsize{{输入: 待翻译句子(已经分词)}}};
@@ -160,8 +160,8 @@

 {
 \node [anchor=north,inner sep=2pt,fill=orange!20,minimum height=1.5em,minimum width=3.5em] (t41) at ([yshift=-1.3em]s4.south) {$\phi$};
-\node [anchor=north,inner sep=2pt,fill=orange!20,minimum height=1.5em,minimum width=3.5em] (t42) at ([yshift=-0.2em]t41.south) {show};
-\node [anchor=north,inner sep=2pt,fill=orange!20,minimum height=1.5em,minimum width=3.5em] (t43) at ([yshift=-0.2em]t42.south) {shows};
+\node [anchor=north,inner sep=2pt,fill=orange!20,minimum height=1.5em,minimum width=3.5em] (t42) at ([yshift=-0.2em]t41.south) {feel};
+\node [anchor=north,inner sep=2pt,fill=orange!20,minimum height=1.5em,minimum width=3.5em] (t43) at ([yshift=-0.2em]t42.south) {feels};
 }



--- a/Book/Chapter3/Figures/greedy-mt-decoding-process-3.tex
+++ b/Book/Chapter3/Figures/greedy-mt-decoding-process-3.tex
@@ -11,7 +11,7 @@
 \node [anchor=west] (s1) at (0,0) {{我}};
 \node [anchor=west] (s2) at ([xshift=3em]s1.east) {{对}};
 \node [anchor=west] (s3) at ([xshift=3em]s2.east) {{你}};
-\node [anchor=west] (s4) at ([xshift=2.5em]s3.east) {{表示}};
+\node [anchor=west] (s4) at ([xshift=2.5em]s3.east) {{感到}};
 \node [anchor=west] (s5) at ([xshift=2.5em]s4.east) {{满意}};

 \node [anchor=south west,inner sep=1pt] (sentlabel) at ([yshift=0.3em]s1.north west) {\scriptsize{{输入: 待翻译句子(已经分词)}}};
@@ -50,8 +50,8 @@

 {
 \node [anchor=north,inner sep=2pt,fill=orange!20,minimum height=1.5em,minimum width=3.5em] (t41) at ([yshift=-1.3em]s4.south) {$\phi$};
-\node [anchor=north,inner sep=2pt,fill=orange!20,minimum height=1.5em,minimum width=3.5em] (t42) at ([yshift=-0.2em]t41.south) {show};
-\node [anchor=north,inner sep=2pt,fill=orange!20,minimum height=1.5em,minimum width=3.5em] (t43) at ([yshift=-0.2em]t42.south) {shows};
+\node [anchor=north,inner sep=2pt,fill=orange!20,minimum height=1.5em,minimum width=3.5em] (t42) at ([yshift=-0.2em]t41.south) {feel};
+\node [anchor=north,inner sep=2pt,fill=orange!20,minimum height=1.5em,minimum width=3.5em] (t43) at ([yshift=-0.2em]t42.south) {feels};
 }


@@ -176,7 +176,7 @@
 \node [anchor=west] (s1) at (0,0) {{我}};
 \node [anchor=west] (s2) at ([xshift=3em]s1.east) {{对}};
 \node [anchor=west] (s3) at ([xshift=3em]s2.east) {{你}};
-\node [anchor=west] (s4) at ([xshift=2.5em]s3.east) {{表示}};
+\node [anchor=west] (s4) at ([xshift=2.5em]s3.east) {{感到}};
 \node [anchor=west] (s5) at ([xshift=2.5em]s4.east) {{满意}};

 \node [anchor=south west,inner sep=1pt] (sentlabel) at ([yshift=0.3em]s1.north west) {\scriptsize{{输入: 待翻译句子(已经分词)}}};
@@ -215,8 +215,8 @@

 {
 \node [anchor=north,inner sep=2pt,fill=orange!20,minimum height=1.5em,minimum width=3.5em] (t41) at ([yshift=-1.3em]s4.south) {$\phi$};
-\node [anchor=north,inner sep=2pt,fill=orange!20,minimum height=1.5em,minimum width=3.5em] (t42) at ([yshift=-0.2em]t41.south) {show};
-\node [anchor=north,inner sep=2pt,fill=orange!20,minimum height=1.5em,minimum width=3.5em] (t43) at ([yshift=-0.2em]t42.south) {shows};
+\node [anchor=north,inner sep=2pt,fill=orange!20,minimum height=1.5em,minimum width=3.5em] (t42) at ([yshift=-0.2em]t41.south) {feel};
+\node [anchor=north,inner sep=2pt,fill=orange!20,minimum height=1.5em,minimum width=3.5em] (t43) at ([yshift=-0.2em]t42.south) {feels};
 }



--- a/Book/Chapter3/chapter3.tex
+++ b/Book/Chapter3/chapter3.tex
@@ -111,7 +111,7 @@
 %----------------------------------------------
 \vspace{-0.2em}

-\parinterval 图\ref{fig:3-3}展示了人在翻译``我 对 你表示 满意''时可能会思考的内容。具体来说，有如下两方面内容。
+\parinterval 图\ref{fig:3-3}展示了人在翻译``我\;对\;你\;感到\;满意''时可能会思考的内容。具体来说，有如下两方面内容。

 \begin{itemize}
 \vspace{0.5em}
@@ -243,9 +243,9 @@
 \begin{example}
 一个汉英互译的句对

-\qquad\qquad\quad $\mathbf{s}$ = 机器\quad {\color{red}翻译}\; 就\; 是\; 用\; 计算机\; 来\; 进行\; {\color{red}翻译}
+$\mathbf{s}$ = 机器\quad {\color{red}翻译}\; 就\; 是\; 用\; 计算机\; 来\; 生成\; {\color{red}翻译}\; 的\; 过程

-\qquad\qquad\quad $\mathbf{t}$ = machine\; {\color{red}translation}\; is\; just\; {\color{red}translation}\; by\; computer
+$\mathbf{t}$ = machine\; {\color{red}translation}\; is\; a\; process\; of\; generating\; a\;  {\color{red}translation}\; by\; computer
 \label{eg:3-1}
 \end{example}

@@ -253,14 +253,14 @@
 \begin{eqnarray}
 \textrm{P}(\text{``翻译''},\text{``translation''}; \mathbf{s},\mathbf{t})  & = & \frac{c(\textrm{``翻译''},\textrm{``translation''};\mathbf{s},\mathbf{t})}{\sum_{x',y'} c(x',y';\mathbf{s},\mathbf{t})} \nonumber \\
                                                                                                         & =  & \frac{4}{|\mathbf{s}|\times |\mathbf{t}|} \nonumber \\
-                                                                                                         & = & \frac{4}{63}
+                                                                                                         & = & \frac{4}{121}
 \label{eq:3-2}
 \end{eqnarray}

 \noindent 这里运算$|\cdot|$表示句子长度。类似的，可以得到``机器''和``translation''、``机器''和``look''的单词翻译概率：
 \begin{eqnarray}
-\textrm{P}(\text{``机器''},\text{``translation''}; \mathbf{s},\mathbf{t})  & = & \frac{2}{63} \\
-\textrm{P}(\text{``机器''},\text{``look''}; \mathbf{s},\mathbf{t})  & =  & \frac{0}{63}
+\textrm{P}(\text{``机器''},\text{``translation''}; \mathbf{s},\mathbf{t})  & = & \frac{2}{121} \\
+\textrm{P}(\text{``机器''},\text{``look''}; \mathbf{s},\mathbf{t})  & =  & \frac{0}{121}
 \label{eq:3-3}
 \end{eqnarray}

@@ -283,13 +283,13 @@
 \begin{example}
 两个汉英互译的句对

-\qquad\qquad \; $\mathbf{s}^1$ = 机器\quad {\color{red}翻译}\; 就\; 是\; 用\; 计算机\; 来\; 进行\; {\color{red}翻译}
+$\mathbf{s}^{[1]}$ = 机器\quad {\color{red}翻译}\; 就\; 是\; 用\; 计算机\; 来\; 生成\; {\color{red}翻译}\; 的\; 过程

-\qquad\qquad\; $\mathbf{s}^1$ = Machine\; {\color{red}translation}\; is\; just\; {\color{red}translation}\; by\; computer
+$\mathbf{t}^{[1]}$ = machine\; {\color{red}translation}\; is\; a\; process\; of\; generating\; a\;  {\color{red}translation}\; by\; computer

-\qquad\qquad\; $\mathbf{s}^2$ = 那\quad 人工\quad {\color{red}翻译}\quad 呢\quad ?
+$\mathbf{s}^{[2]}$ = 那\quad 人工\quad {\color{red}翻译}\quad 呢\quad ?

-\qquad\qquad\; $\mathbf{t}^2$ = So\; ,\; what\; is\; human\; {\color{red}translation}\; ?
+$\mathbf{t}^{[2]}$ = So\; ,\; what\; is\; human\; {\color{red}translation}\; ?
 \label{eg:3-2}
 \end{example}

@@ -298,8 +298,8 @@
 \begin{eqnarray}
 {\textrm{P}(\textrm{``翻译''},\textrm{``translation''})} & = & {\frac{c(\textrm{``翻译''},\textrm{``translation''};\mathbf{s}^{[1]},\mathbf{t}^{[1]})+c(\textrm{``翻译''},\textrm{``translation''};\mathbf{s}^{[2]},\mathbf{t}^{[2]})}{\sum_{x',y'} c(x',y';\mathbf{s}^{[1]},\mathbf{t}^{[1]}) + \sum_{x',y'} c(x',y';\mathbf{s}^{[2]},\mathbf{t}^{[2]})}} \nonumber \\
                                                                            & = & \frac{4 + 1}{|\mathbf{s}^{[1]}| \times |\mathbf{t}^{[1]}| + |\mathbf{s}^{[2]}| \times |\mathbf{t}^{[2]}|} \nonumber \\
-                                                                            & = & \frac{4 + 1}{9 \times 7 + 5 \times 7} \nonumber \\
-                                                                            & = & \frac{5}{98}
+                                                                            & = & \frac{4 + 1}{11 \times 11 + 5 \times 7} \nonumber \\
+                                                                            & = & \frac{5}{156}
 \label{eq:3-5}
 \end{eqnarray}
 }

--- a/Book/Chapter4/Figures/grid-search-2.tex
+++ b/Book/Chapter4/Figures/grid-search-2.tex
-\begin{tikzpicture}
-\begin{scope}[scale=0.62] 
-{\tiny
-\draw[step=1,help lines,color=black] (0,0) grid (4,4); 
-
-\node[anchor=north] (y2) at ([xshift=-3.3em,yshift=0em]n1.north) {0.01};
-\node[anchor=north] (y1) at ([xshift=0em,yshift=-3.3em]y2.south) {0.00};
-\node[anchor=north] (y3) at ([xshift=0em,yshift=4.5em]y2.north) {0.02};
-\node[anchor=north] (y4) at ([xshift=0em,yshift=6.6em]y3.north) {$\vdots$};
-\node[anchor=north] (y5) at ([xshift=0em,yshift=2em]y4.north) {1.00};
-
-\node[anchor=north] (x1) at ([xshift=2em,yshift=-3em]n1.south) {$\lambda_1$};
-\node[anchor=north] (x2) at ([xshift=4.5em,yshift=0em]x1.north) {$\lambda_2$};
-\node[anchor=north] (x3) at ([xshift=4em,yshift=-1em]x2.north) {$...$};
-\node[anchor=north] (x4) at ([xshift=5em,yshift=1em]x3.north) {$\lambda_{M-1}$};
-\node[anchor=north] (x5) at ([xshift=5em,yshift=0em]x4.north) {$\lambda_M$};
-
-\draw [-](n1) (0,4) -- (0,4.4);
-\draw [-](n2) (1,4) -- (1,4.4);
-\draw [-](n3) (2,4) -- (2,4.4);
-\draw [-](n4) (3,4) -- (3,4.4);
-\draw [-](n5) (4,4) -- (4,4.4);
-
-\node [anchor=center,draw,circle,inner sep=1.5pt,red!30,fill=red!30] (r31) at (2,4) {};
-\node [anchor=center,draw,circle,inner sep=1.5pt,red!30,fill=red!30] (r32) at (2,0) {};
-\node [anchor=center,draw,circle,inner sep=1.5pt,red!30,fill=red!30] (r33) at (2,2) {};
-\node [anchor=center,draw,circle,inner sep=1.5pt,red!30,fill=red!30] (r35) at (2,1) {};
-
-\node [anchor=center,draw,circle,inner sep=1.5pt,ugreen!50,fill=ugreen!50] (r34) at (2,3) {};
-
-\draw [-,very thick,red!50, dashed] (1,2) -- (2,4) -- (3,2) -- (2,3) -- (1,2) -- (3,2) -- (2,1) -- (1,2) -- (2,0) -- (3,2);
-\draw [-,very thick,blue!50] (0,1) -- (1,2);
-\draw [-,very thick,blue!50] (3,2) -- (4,4);
-\draw [-,very thick,ugreen!50, dashed] (1,2) -- (2,3) -- (3,2);
-
-\node [anchor=center,draw,circle,inner sep=1.5pt,blue!30,fill=blue!30] (r11) at (0,1) {};
-\node [anchor=center,draw,circle,inner sep=1.5pt,blue!30,fill=blue!30] (r12) at (1,2) {};
-\node [anchor=center,draw,circle,inner sep=1.5pt,blue!30,fill=blue!30] (r14) at (3,2) {};
-\node [anchor=center,draw,circle,inner sep=1.5pt,blue!30,fill=blue!30] (r15) at (4,4) {};
-}
-\end{scope}
-\end{tikzpicture}
\ No newline at end of file
--- a/Book/Chapter4/Figures/grid-search-1.tex
+++ b/Book/Chapter4/Figures/grid-search-1.tex
@@ -3,13 +3,13 @@
 {\tiny
 \draw[step=1,help lines,color=black] (0,0) grid (4,4); 

-\node[anchor=north] (y2) at ([xshift=-3.3em,yshift=0em]n1.north) {0.01};
+\node[anchor=north] (y2) at (-5.3em,1.5) {0.01};
 \node[anchor=north] (y1) at ([xshift=0em,yshift=-3.3em]y2.south) {0.00};
 \node[anchor=north] (y3) at ([xshift=0em,yshift=4.5em]y2.north) {0.02};
 \node[anchor=north] (y4) at ([xshift=0em,yshift=6.6em]y3.north) {$\vdots$};
 \node[anchor=north] (y5) at ([xshift=0em,yshift=2em]y4.north) {1.00};

-\node[anchor=north] (x1) at ([xshift=2em,yshift=-3em]n1.south) {$\lambda_1$};
+\node[anchor=north] (x1) at (1em,-3em) {$\lambda_1$};
 \node[anchor=north] (x2) at ([xshift=4.5em,yshift=0em]x1.north) {$\lambda_2$};
 \node[anchor=north] (x3) at ([xshift=4em,yshift=-1em]x2.north) {$...$};
 \node[anchor=north] (x4) at ([xshift=5em,yshift=1em]x3.north) {$\lambda_{M-1}$};
@@ -44,4 +44,45 @@
 \node [anchor=center,draw,circle,inner sep=1.5pt,blue!30,fill=blue!30] (r15) at (4,4) {};
 }
 \end{scope}
+
+\begin{scope}[scale=0.62,xshift=3in] 
+{\tiny
+\draw[step=1,help lines,color=black] (0,0) grid (4,4); 
+
+\node[anchor=north] (y2) at (-5.3em,1.5) {0.01};
+\node[anchor=north] (y1) at ([xshift=0em,yshift=-3.3em]y2.south) {0.00};
+\node[anchor=north] (y3) at ([xshift=0em,yshift=4.5em]y2.north) {0.02};
+\node[anchor=north] (y4) at ([xshift=0em,yshift=6.6em]y3.north) {$\vdots$};
+\node[anchor=north] (y5) at ([xshift=0em,yshift=2em]y4.north) {1.00};
+
+\node[anchor=north] (x1) at (1em,-3em) {$\lambda_1$};
+\node[anchor=north] (x2) at ([xshift=4.5em,yshift=0em]x1.north) {$\lambda_2$};
+\node[anchor=north] (x3) at ([xshift=4em,yshift=-1em]x2.north) {$...$};
+\node[anchor=north] (x4) at ([xshift=5em,yshift=1em]x3.north) {$\lambda_{M-1}$};
+\node[anchor=north] (x5) at ([xshift=5em,yshift=0em]x4.north) {$\lambda_M$};
+
+\draw [-](n1) (0,4) -- (0,4.4);
+\draw [-](n2) (1,4) -- (1,4.4);
+\draw [-](n3) (2,4) -- (2,4.4);
+\draw [-](n4) (3,4) -- (3,4.4);
+\draw [-](n5) (4,4) -- (4,4.4);
+
+\node [anchor=center,draw,circle,inner sep=1.5pt,red!30,fill=red!30] (r31) at (2,4) {};
+\node [anchor=center,draw,circle,inner sep=1.5pt,red!30,fill=red!30] (r32) at (2,0) {};
+\node [anchor=center,draw,circle,inner sep=1.5pt,red!30,fill=red!30] (r33) at (2,2) {};
+\node [anchor=center,draw,circle,inner sep=1.5pt,red!30,fill=red!30] (r35) at (2,1) {};
+
+\node [anchor=center,draw,circle,inner sep=1.5pt,ugreen!50,fill=ugreen!50] (r34) at (2,3) {};
+
+\draw [-,very thick,red!50, dashed] (1,2) -- (2,4) -- (3,2) -- (2,3) -- (1,2) -- (3,2) -- (2,1) -- (1,2) -- (2,0) -- (3,2);
+\draw [-,very thick,blue!50] (0,1) -- (1,2);
+\draw [-,very thick,blue!50] (3,2) -- (4,4);
+\draw [-,very thick,ugreen!50, dashed] (1,2) -- (2,3) -- (3,2);
+
+\node [anchor=center,draw,circle,inner sep=1.5pt,blue!30,fill=blue!30] (r11) at (0,1) {};
+\node [anchor=center,draw,circle,inner sep=1.5pt,blue!30,fill=blue!30] (r12) at (1,2) {};
+\node [anchor=center,draw,circle,inner sep=1.5pt,blue!30,fill=blue!30] (r14) at (3,2) {};
+\node [anchor=center,draw,circle,inner sep=1.5pt,blue!30,fill=blue!30] (r15) at (4,4) {};
+}
+\end{scope}
 \end{tikzpicture}
\ No newline at end of file
--- a/Book/Chapter4/Figures/search-space-representation-of-feature-weight-1.tex
+++ b/Book/Chapter4/Figures/search-space-representation-of-feature-weight-1.tex
-\begin{tikzpicture}
-\begin{scope}[scale=0.55] 
-{\tiny
-\draw[step=1,help lines,color=black] grid (4,4); 
-
-\node[anchor=north] (y2) at ([xshift=-3.3em,yshift=0em]n1.north) {0.01};
-\node[anchor=north] (y1) at ([xshift=0em,yshift=-3.3em]y2.south) {0.00};
-\node[anchor=north] (y3) at ([xshift=0em,yshift=4.5em]y2.north) {0.02};
-\node[anchor=north] (y4) at ([xshift=0em,yshift=6.6em]y3.north) {$\vdots$};
-\node[anchor=north] (y5) at ([xshift=0em,yshift=2em]y4.north) {1.00};
-
-\node[anchor=north] (x1) at ([xshift=2em,yshift=-3em]n1.south) {$\lambda_1$};
-\node[anchor=north] (x2) at ([xshift=4.5em,yshift=0em]x1.north) {$\lambda_2$};
-\node[anchor=north] (x3) at ([xshift=4em,yshift=-1em]x2.north) {$...$};
-\node[anchor=north] (x4) at ([xshift=5em,yshift=1em]x3.north) {$\lambda_{M-1}$};
-\node[anchor=north] (x5) at ([xshift=5em,yshift=0em]x4.north) {$\lambda_M$};
-
-\draw [-](n1) (0,4) -- (0,4.4);
-\draw [-](n2) (1,4) -- (1,4.4);
-\draw [-](n3) (2,4) -- (2,4.4);
-\draw [-](n4) (3,4) -- (3,4.4);
-\draw [-](n5) (4,4) -- (4,4.4);
-
-\draw[decorate,decoration={brace}](0,4.7) --(4,4.7) node [xshift=-4em,yshift=1.5em,align=center](label1) {M dimensions};	
-
-\draw[decorate,decoration={brace}](4.5,4.3) --(4.5,0) node [xshift=2.3em,yshift=5.8em,align=center](label2) {Values};	
-}
-\end{scope}
-\end{tikzpicture}
\ No newline at end of file
--- a/Book/Chapter4/Figures/search-space-representation-of-feature-weight-2.tex
+++ b/Book/Chapter4/Figures/search-space-representation-of-feature-weight-2.tex
-\begin{tikzpicture}
-\begin{scope}[scale=0.55] 
-{\tiny
-\draw[step=1,help lines,color=black] grid (4,4); 
-
-\node[anchor=north] (y2) at ([xshift=-3.3em,yshift=0em]n1.north) {0.01};
-\node[anchor=north] (y1) at ([xshift=0em,yshift=-3.3em]y2.south) {0.00};
-\node[anchor=north] (y3) at ([xshift=0em,yshift=4.5em]y2.north) {0.02};
-\node[anchor=north] (y4) at ([xshift=0em,yshift=6.6em]y3.north) {$\vdots$};
-\node[anchor=north] (y5) at ([xshift=0em,yshift=2em]y4.north) {1.00};
-
-\node[anchor=north] (x1) at ([xshift=2em,yshift=-3em]n1.south) {$\lambda_1$};
-\node[anchor=north] (x2) at ([xshift=4.5em,yshift=0em]x1.north) {$\lambda_2$};
-\node[anchor=north] (x3) at ([xshift=4em,yshift=-1em]x2.north) {$...$};
-\node[anchor=north] (x4) at ([xshift=5em,yshift=1em]x3.north) {$\lambda_{M-1}$};
-\node[anchor=north] (x5) at ([xshift=5em,yshift=0em]x4.north) {$\lambda_M$};
-
-\draw [-](n1) (0,4) -- (0,4.4);
-\draw [-](n2) (1,4) -- (1,4.4);
-\draw [-](n3) (2,4) -- (2,4.4);
-\draw [-](n4) (3,4) -- (3,4.4);
-\draw [-](n5) (4,4) -- (4,4.4);
-
-\node [anchor=center,draw,circle,inner sep=1.5pt,blue!30,fill=blue!30] (r11) at (0,1) {};
-\node [anchor=center,draw,circle,inner sep=1.5pt,blue!30,fill=blue!30] (r12) at (1,2) {};
-\node [anchor=center,draw,circle,inner sep=1.5pt,blue!30,fill=blue!30] (r13) at (2,1) {};
-\node [anchor=center,draw,circle,inner sep=1.5pt,blue!30,fill=blue!30] (r14) at (3,2) {};
-\node [anchor=center,draw,circle,inner sep=1.5pt,blue!30,fill=blue!30] (r15) at (4,4) {};
-
-\draw [-,very thick,blue!50] (0,1) -- (1,2) -- (2,1) -- (3,2) -- (4,4);
-
-\node[anchor=north] (p1) at ([xshift=5em,yshift=13em]n5.north) {\scriptsize{$\leftarrow$ \textbf{path}:}};
-
-\node[anchor=north] (e1) at ([xshift=0,yshift=-0.4em]p1.south) {$w_1 = 0.01$};
-\node[anchor=north] (e2) at ([xshift=0,yshift=-0.8em]e1.south) {$w_2 = 0.02$};
-\node[anchor=north] (e3) at ([xshift=0,yshift=0.4em]e2.south) {$\vdots$};
-\node[anchor=north] (e4) at ([xshift=0,yshift=-0.2em]e3.south) {$w_M = 1.00$};
-}
-\end{scope}
-\end{tikzpicture}
\ No newline at end of file
--- a/Book/Chapter4/Figures/search-space-representation-of-feature-weight-3.tex
+++ b/Book/Chapter4/Figures/search-space-representation-of-feature-weight-3.tex
@@ -3,13 +3,80 @@
 {\tiny
 \draw[step=1,help lines,color=black] grid (4,4); 

-\node[anchor=north] (y2) at ([xshift=-3.3em,yshift=0em]n1.north) {0.01};
+\draw [-](n1) (0,4) -- (0,4.4);
+\draw [-](n2) (1,4) -- (1,4.4);
+\draw [-](n3) (2,4) -- (2,4.4);
+\draw [-](n4) (3,4) -- (3,4.4);
+\draw [-](n5) (4,4) -- (4,4.4);
+
+\node[anchor=north] (y2) at (-5.3em,1.5) {0.01};
+\node[anchor=north] (y1) at ([xshift=0em,yshift=-3.3em]y2.south) {0.00};
+\node[anchor=north] (y3) at ([xshift=0em,yshift=4.5em]y2.north) {0.02};
+\node[anchor=north] (y4) at ([xshift=0em,yshift=6.6em]y3.north) {$\vdots$};
+\node[anchor=north] (y5) at ([xshift=0em,yshift=2em]y4.north) {1.00};
+
+\node[anchor=north] (x1) at (1em,-3em) {$\lambda_1$};
+\node[anchor=north] (x2) at ([xshift=4.5em,yshift=0em]x1.north) {$\lambda_2$};
+\node[anchor=north] (x3) at ([xshift=4em,yshift=-1em]x2.north) {$...$};
+\node[anchor=north] (x4) at ([xshift=5em,yshift=1em]x3.north) {$\lambda_{M-1}$};
+\node[anchor=north] (x5) at ([xshift=5em,yshift=0em]x4.north) {$\lambda_M$};
+
+\draw[decorate,decoration={brace}](0,4.7) --(4,4.7) node [xshift=-4em,yshift=1.5em,align=center](label1) {M dimensions};	
+
+\draw[decorate,decoration={brace}](4.5,4.3) --(4.5,0) node [xshift=2.3em,yshift=5.8em,align=center](label2) {Values};	
+}
+\end{scope}
+
+\begin{scope}[scale=0.55,xshift=3.2in] 
+{\tiny
+\draw[step=1,help lines,color=black] grid (4,4); 
+
+\node[anchor=north] (y2) at (-5.3em,1.5) {0.01};
+\node[anchor=north] (y1) at ([xshift=0em,yshift=-3.3em]y2.south) {0.00};
+\node[anchor=north] (y3) at ([xshift=0em,yshift=4.5em]y2.north) {0.02};
+\node[anchor=north] (y4) at ([xshift=0em,yshift=6.6em]y3.north) {$\vdots$};
+\node[anchor=north] (y5) at ([xshift=0em,yshift=2em]y4.north) {1.00};
+
+\node[anchor=north] (x1) at (1em,-3em) {$\lambda_1$};
+\node[anchor=north] (x2) at ([xshift=4.5em,yshift=0em]x1.north) {$\lambda_2$};
+\node[anchor=north] (x3) at ([xshift=4em,yshift=-1em]x2.north) {$...$};
+\node[anchor=north] (x4) at ([xshift=5em,yshift=1em]x3.north) {$\lambda_{M-1}$};
+\node[anchor=north] (x5) at ([xshift=5em,yshift=0em]x4.north) {$\lambda_M$};
+
+\draw [-](n1) (0,4) -- (0,4.4);
+\draw [-](n2) (1,4) -- (1,4.4);
+\draw [-](n3) (2,4) -- (2,4.4);
+\draw [-](n4) (3,4) -- (3,4.4);
+\draw [-](n5) (4,4) -- (4,4.4);
+
+\node [anchor=center,draw,circle,inner sep=1.5pt,blue!30,fill=blue!30] (r11) at (0,1) {};
+\node [anchor=center,draw,circle,inner sep=1.5pt,blue!30,fill=blue!30] (r12) at (1,2) {};
+\node [anchor=center,draw,circle,inner sep=1.5pt,blue!30,fill=blue!30] (r13) at (2,1) {};
+\node [anchor=center,draw,circle,inner sep=1.5pt,blue!30,fill=blue!30] (r14) at (3,2) {};
+\node [anchor=center,draw,circle,inner sep=1.5pt,blue!30,fill=blue!30] (r15) at (4,4) {};
+
+\draw [-,very thick,blue!50] (0,1) -- (1,2) -- (2,1) -- (3,2) -- (4,4);
+
+\node[anchor=north] (p1) at (5.7,4.3) {\scriptsize{$\leftarrow$ \textbf{path}:}};
+
+\node[anchor=north] (e1) at ([xshift=0,yshift=-0.4em]p1.south) {$w_1 = 0.01$};
+\node[anchor=north] (e2) at ([xshift=0,yshift=-0.8em]e1.south) {$w_2 = 0.02$};
+\node[anchor=north] (e3) at ([xshift=0,yshift=0.4em]e2.south) {$\vdots$};
+\node[anchor=north] (e4) at ([xshift=0,yshift=-0.2em]e3.south) {$w_M = 1.00$};
+}
+\end{scope}
+
+\begin{scope}[scale=0.55,xshift=6.8in] 
+{\tiny
+\draw[step=1,help lines,color=black] grid (4,4); 
+
+\node[anchor=north] (y2) at (-5.3em,1.5) {0.01};
 \node[anchor=north] (y1) at ([xshift=0em,yshift=-3.3em]y2.south) {0.00};
 \node[anchor=north] (y3) at ([xshift=0em,yshift=4.5em]y2.north) {0.02};
 \node[anchor=north] (y4) at ([xshift=0em,yshift=6.6em]y3.north) {$\vdots$};
 \node[anchor=north] (y5) at ([xshift=0em,yshift=2em]y4.north) {1.00};

-\node[anchor=north] (x1) at ([xshift=2em,yshift=-3em]n1.south) {$\lambda_1$};
+\node[anchor=north] (x1) at (1em,-3em) {$\lambda_1$};
 \node[anchor=north] (x2) at ([xshift=4.5em,yshift=0em]x1.north) {$\lambda_2$};
 \node[anchor=north] (x3) at ([xshift=4em,yshift=-1em]x2.north) {$...$};
 \node[anchor=north] (x4) at ([xshift=5em,yshift=1em]x3.north) {$\lambda_{M-1}$};

--- a/Book/Chapter4/chapter4.tex
+++ b/Book/Chapter4/chapter4.tex
@@ -701,9 +701,7 @@ dr = start_i-end_{i-1}-1
 %----------------------------------------------
 \begin{figure}[htp]
 \centering
-\begin{tabular}{l l l}
-& \subfigure{\input{./Chapter4/Figures/search-space-representation-of-feature-weight-1}} \subfigure{\input{./Chapter4/Figures/search-space-representation-of-feature-weight-2}} \subfigure{\input{./Chapter4/Figures/search-space-representation-of-feature-weight-3}} &  \\
-\end{tabular}
+\input{./Chapter4/Figures/search-space-representation-of-feature-weight}
 \caption{特征权重的搜索空间表示}
 \label{fig:4-23}
 \end{figure}
@@ -716,9 +714,7 @@ dr = start_i-end_{i-1}-1
 %----------------------------------------------
 \begin{figure}[htp]
 \centering
-\begin{tabular}{l l}
-\subfigure{\input{./Chapter4/Figures/grid-search-1}} &  \subfigure{\input{./Chapter4/Figures/grid-search-2}} \\
-\end{tabular}
+\input{./Chapter4/Figures/grid-search}
 \caption{格搜索（左侧：所有点都访问（蓝色）；右侧：避开无效点（绿色））}
 \label{fig:4-24}
 \end{figure}

--- a/Book/ChapterAppend/chapterappend.tex
+++ b/Book/ChapterAppend/chapterappend.tex
@@ -173,7 +173,7 @@
 %----------------------------------------------------------------------------------------

 \section{IBM模型3训练方法}
-\parinterval 模型3的参数估计与模型1和模型2采用相同的方法。这里直接给出辅助函数。
+\parinterval IBM模型3的参数估计与模型1和模型2采用相同的方法。这里直接给出辅助函数。
 \begin{eqnarray}
 h(t,d,n,p, \lambda,\mu, \nu, \zeta) & = &  \textrm{P}_{\theta}(\mathbf{s}|\mathbf{t})-\sum_{t}\lambda_{t}\big(\sum_{s}t(s|t)-1\big)  \nonumber \\
 & & -\sum_{i}\mu_{iml}\big(\sum_{j}d(j|i,m,l)-1\big) \nonumber \\
@@ -181,7 +181,7 @@ h(t,d,n,p, \lambda,\mu, \nu, \zeta) & = &  \textrm{P}_{\theta}(\mathbf{s}|\mathb
 \label{eq:1.1}
 \end{eqnarray}

-\parinterval 由于篇幅所限这里略去了推导步骤直接给出一些用于参数估计的等式。
+\parinterval 由于篇幅所限这里略去了推导步骤直接给出具体公式。
 \begin{eqnarray}
 c(s|t,\mathbf{s},\mathbf{t}) & = & \sum_{\mathbf{a}}\big[\textrm{P}_{\theta}(\mathbf{s},\mathbf{a}|\mathbf{t}) \times \sum_{j=1}^{m} (\delta(s_j,s) \cdot \delta(t_{a_{j}},t))\big] \label{eq:1.2} \\
 c(j|i,m,l;\mathbf{s},\mathbf{t}) & = & \sum_{\mathbf{a}}\big[\textrm{P}_{\theta}(\mathbf{s},\mathbf{a}|\mathbf{t}) \times \delta(i,a_j)\big] \label{eq:1.3} \\
@@ -202,9 +202,9 @@ n(\varphi|t) & = & \nu_{t}^{-1} \times \sum_{s=1}^{K}c(\varphi |t;\mathbf{s}^{[k
 p_x & = & \zeta^{-1} \sum_{k=1}^{K}c(x;\mathbf{s}^{[k]},\mathbf{t}^{[k]}) \label{eq:1.10}
 \end{eqnarray}

-\parinterval 在模型3中，因为产出率的引入，并不能像模型1和模型2那样，在保证正确性的情况下加速参数估计的过程。这就使得每次迭代过程中，都不得不面对大小为$(l+1)^m$的词对齐空间。遍历所有$(l+1)^m$个词对齐所带来的高时间复杂度显然是不能被接受的。因此就要考虑能否仅利用词对齐空间中的部分词对齐对这些参数进行估计。比较简单且直接的方法就是仅利用Viterbi对齐来进行参数估计\footnote{Viterbi词对齐可以被简单的看作搜索到的最好词对齐。}。 遗憾的是，在模型3中并没有方法直接获得Viterbi对齐。这样只能采用一种折中的策略，即仅考虑那些使得$\textrm{P}_{\theta}(\mathbf{s},\mathbf{a}|\mathbf{t})$达到较高值的词对齐。这里把这部分词对齐组成的集合记为$S$。式\ref{eq:1.2}可以被修改为：
+\parinterval 在模型3中，因为繁衍率的引入，并不能像模型1和模型2那样，在保证正确性的情况下加速参数估计的过程。这就使得每次迭代过程中，都不得不面对大小为$(l+1)^m$的词对齐空间。遍历所有$(l+1)^m$个词对齐所带来的高时间复杂度显然是不能被接受的。因此就要考虑能否仅利用词对齐空间中的部分词对齐对这些参数进行估计。比较简单的方法是仅使用Viterbi对齐来进行参数估计，这里Viterbi 词对齐可以被简单的看作搜索到的最好词对齐。遗憾的是，在模型3中并没有方法直接获得Viterbi对齐。这样只能采用一种折中的策略，即仅考虑那些使得$\textrm{P}_{\theta}(\mathbf{s},\mathbf{a}|\mathbf{t})$ 达到较高值的词对齐。这里把这部分词对齐组成的集合记为$S$。式\ref{eq:1.2}可以被修改为：
 \begin{eqnarray}
-c(s|t,\mathbf{s},\mathbf{t}) \approx \sum_{\mathbf{a} \in \mathbf{S}}\big[\textrm{P}_{\theta}(\mathbf{s},\mathbf{a}|\mathbf{t}) \times \sum_{j=1}^{m}(\delta(s_j,\mathbf{s}) \cdot \delta(t_{a_{j}},\mathbf{t})) \big]
+c(s|t,\mathbf{s},\mathbf{t}) \approx \sum_{\mathbf{a} \in S}\big[\textrm{P}_{\theta}(\mathbf{s},\mathbf{a}|\mathbf{t}) \times \sum_{j=1}^{m}(\delta(s_j,\mathbf{s}) \cdot \delta(t_{a_{j}},\mathbf{t})) \big]
 \label{eq:1.11}
 \end{eqnarray}

@@ -222,7 +222,7 @@ S = N(b^{\infty}(V(\mathbf{s}|\mathbf{t};2))) \cup (\mathop{\cup}\limits_{ij} N(
 \end{itemize}

 \vspace{0.5em}
-\parinterval 公式\ref{eq:1.12}中，$b^{\infty}(V(\mathbf{s}|\mathbf{t};2))$ 和 $b_{i \leftrightarrow j}^{\infty}(V_{i \leftrightarrow j}(\mathbf{s}|\mathbf{t},2))$ 分别是对 $V(\mathbf{s}|\mathbf{t};3)$ 和 $V_{i \leftrightarrow j}(\mathbf{s}|\mathbf{t},3)$ 的估计。在计算$S$的过程中，需要知道一个对齐$\bf{a}$的邻居$\bf{a}^{'}$的概率，即通过$\textrm{P}_{\theta}(\mathbf{a},\mathbf{s}|\mathbf{t})$计算$\textrm{p}_{\theta}(\mathbf{a}',\mathbf{s}|\mathbf{t})$。在模型3中，如果$\bf{a}$和$\bf{a}'$仅区别于某个源语单词对齐到的目标位置上（$a_j \neq a_{j}'$），那么
+\parinterval 公式\ref{eq:1.12}中，$b^{\infty}(V(\mathbf{s}|\mathbf{t};2))$ 和 $b_{i \leftrightarrow j}^{\infty}(V_{i \leftrightarrow j}(\mathbf{s}|\mathbf{t},2))$ 分别是对 $V(\mathbf{s}|\mathbf{t};3)$ 和 $V_{i \leftrightarrow j}(\mathbf{s}|\mathbf{t},3)$ 的估计。在计算$S$的过程中，需要知道一个对齐$\bf{a}$的邻居$\bf{a}^{'}$的概率，即通过$\textrm{P}_{\theta}(\mathbf{a},\mathbf{s}|\mathbf{t})$计算$\textrm{P}_{\theta}(\mathbf{a}',\mathbf{s}|\mathbf{t})$。在模型3中，如果$\bf{a}$和$\bf{a}'$仅区别于某个源语单词对齐到的目标位置上（$a_j \neq a_{j}'$），那么

 \begin{eqnarray}
 \textrm{P}_{\theta}(\mathbf{a}',\mathbf{s}|\mathbf{t}) & = & \textrm{P}_{\theta}(\mathbf{a},\mathbf{s}|\mathbf{t}) \cdot  \nonumber \\
@@ -247,7 +247,7 @@ S = N(b^{\infty}(V(\mathbf{s}|\mathbf{t};2))) \cup (\mathop{\cup}\limits_{ij} N(

 \parinterval 模型4的参数估计基本与模型3一致。需要修改的是扭曲度的估计公式，对于目标语第$i$个cept.生成的第一单词，可以得到（假设有$K$个训练样本）：
 \begin{eqnarray}
-d_1(\Delta_j|ca,cb;\mathbf{s},\mathbf{t}) = \mu_{1cacb}^{-1} \times \sum_{k=1}^{K}c_1(\Delta_j|ca,cb;\mathbf{s}^{[k]},\mathbf{t}^{[k]})
+d_1(\Delta_j|ca,cb) = \mu_{1cacb}^{-1} \times \sum_{k=1}^{K}c_1(\Delta_j|ca,cb;\mathbf{s}^{[k]},\mathbf{t}^{[k]})
 \label{eq:1.15}
 \end{eqnarray}

@@ -255,7 +255,7 @@ d_1(\Delta_j|ca,cb;\mathbf{s},\mathbf{t}) = \mu_{1cacb}^{-1} \times \sum_{k=1}^{

 \begin{eqnarray}
 c_1(\Delta_j|ca,cb;\mathbf{s},\mathbf{t})           & = & \sum_{\mathbf{a}}\big[\textrm{P}_{\theta}(\mathbf{s},\mathbf{a}|\mathbf{t}) \times s_1(\Delta_j|ca,cb;\mathbf{a},\mathbf{s},\mathbf{t})\big] \label{eq:1.16} \\
-s_1(\Delta_j|ca,cb;\rm{a},\mathbf{s},\mathbf{t}) & = & \sum_{i=1}^l \big[\varepsilon(\phi_i) \cdot \delta(\pi_{i1}-\odot _{i},\Delta_j) \cdot \nonumber \\
+s_1(\Delta_j|ca,cb;\rm{a},\mathbf{s},\mathbf{t}) & = & \sum_{i=1}^l \big[\varepsilon(\varphi_i) \cdot \delta(\pi_{i1}-\odot _{i},\Delta_j) \cdot \nonumber \\
                                                                           &     & \delta(A(t_{i-1}),ca) \cdot \delta(B(\tau_{i1}),cb) \big] \label{eq:1.17}
 \end{eqnarray}

@@ -272,7 +272,7 @@ s_1(\Delta_j|ca,cb;\rm{a},\mathbf{s},\mathbf{t}) & = & \sum_{i=1}^l \big[\vareps
 对于目标语第$i$个cept.生成的其他单词（非第一个单词），可以得到：

 \begin{eqnarray}
-d_{>1}(\Delta_j|cb;\mathbf{s},\mathbf{t}) = \mu_{>1cb}^{-1} \times \sum_{k=1}^{K}c_{>1}(\Delta_j|cb;\mathbf{s}^{[k]},\mathbf{t}^{[k]})
+d_{>1}(\Delta_j|cb) = \mu_{>1cb}^{-1} \times \sum_{k=1}^{K}c_{>1}(\Delta_j|cb;\mathbf{s}^{[k]},\mathbf{t}^{[k]})
 \label{eq:1.18}
 \end{eqnarray}

@@ -280,7 +280,7 @@ d_{>1}(\Delta_j|cb;\mathbf{s},\mathbf{t}) = \mu_{>1cb}^{-1} \times \sum_{k=1}^{K

 \begin{eqnarray}
 c_{>1}(\Delta_j|cb;\mathbf{s},\mathbf{t})                  & = & \sum_{\mathbf{a}}\big[\textrm{p}_{\theta}(\mathbf{s},\mathbf{a}|\mathbf{t}) \times s_{>1}(\Delta_j|cb;\mathbf{a},\mathbf{s},\mathbf{t}) \big] \label{eq:1.19} \\
-s_{>1}(\Delta_j|cb;\mathbf{a},\mathbf{s},\mathbf{t}) & = & \sum_{i=1}^l \big[\varepsilon(\phi_i-1)\sum_{k=2}^{\phi_i}\delta(\pi_{[i]k}-\pi_{[i]k-1},\Delta_j) \cdot \nonumber ß\\
+s_{>1}(\Delta_j|cb;\mathbf{a},\mathbf{s},\mathbf{t}) & = & \sum_{i=1}^l \big[\varepsilon(\varphi_i-1)\sum_{k=2}^{\varphi_i}\delta(\pi_{[i]k}-\pi_{[i]k-1},\Delta_j) \cdot \nonumber ß\\
                                                                                  &    & \delta(B(\tau_{[i]k}),cb) \big] \label{eq:1.20}
 \end{eqnarray}

@@ -291,7 +291,7 @@ s_{>1}(\Delta_j|cb;\mathbf{a},\mathbf{s},\mathbf{t}) & = & \sum_{i=1}^l \big[\va
 \label{eq:1.22}
 \end{eqnarray}

-\parinterval 对于一个对齐$\mathbf{a}$，可用模型3对它的邻居进行排名，即按$\textrm{P}_{\theta}(b(\mathbf{a})|\mathbf{s},\mathbf{t};3)$排序，其中$b(\mathbf{a})$表示$\mathbf{a}$的邻居。$\tilde{b}(\mathbf{a})$ 表示这个排名表中满足$\textrm{P}_{\theta}(\mathbf{a}'|\mathbf{s},\mathbf{t};4) > \textrm{P}_{\theta}⁡(\mathbf{a}|\mathbf{s},\mathbf{t};4)$的最高排名的$\mathbf{a}'$。同理可知$\tilde{b}_{i \leftrightarrow j}^{\infty}(\mathbf{a})$ 的意义。这里之所以不用模型3中采用的方法直接利用$b^{\infty}(\mathbf{a})$得到模型4中高概率的对齐，是因为模型4中，要想获得某个对齐$\mathbf{a}$的邻居$\mathbf{a}'$，必须做很大调整，比如：调整$\tau_{[i]1}$和$\odot_{i}$等等。这个过程要比模型3的相应过程复杂得多。因此在模型4中只能借助于模型3的中间步骤来进行参数估计。
+\parinterval 对于一个对齐$\mathbf{a}$，可用模型3对它的邻居进行排名，即按$\textrm{P}_{\theta}(b(\mathbf{a})|\mathbf{s},\mathbf{t};3)$排序，其中$b(\mathbf{a})$表示$\mathbf{a}$的邻居。$\tilde{b}(\mathbf{a})$ 表示这个排名表中满足$\textrm{P}_{\theta}(\mathbf{a}'|\mathbf{s},\mathbf{t};4) > \textrm{P}_{\theta}⁡(\mathbf{a}|\mathbf{s},\mathbf{t};4)$的最高排名的$\mathbf{a}'$。 同理可知$\tilde{b}_{i \leftrightarrow j}^{\infty}(\mathbf{a})$ 的意义。这里之所以不用模型3中采用的方法直接利用$b^{\infty}(\mathbf{a})$得到模型4中高概率的对齐，是因为模型4中要想获得某个对齐$\mathbf{a}$的邻居$\mathbf{a}'$必须做很大调整，比如：调整$\tau_{[i]1}$和$\odot_{i}$等等。这个过程要比模型3的相应过程复杂得多。因此在模型4中只能借助于模型3的中间步骤来进行参数估计。
 \setlength{\belowdisplayskip}{3pt}%调整空白大小

 %----------------------------------------------------------------------------------------
@@ -299,10 +299,10 @@ s_{>1}(\Delta_j|cb;\mathbf{a},\mathbf{s},\mathbf{t}) & = & \sum_{i=1}^l \big[\va
 %----------------------------------------------------------------------------------------

 \section{IBM模型5训练方法}
-\parinterval 模型5的参数估计过程也与模型3的过程基本一致，二者的区别在于扭曲度的估计公式。在模型5中，对于目标语第$i$个cept.生成的第一单词，可以得到（假设有$K$个训练样本）：
+\parinterval 模型5的参数估计过程也模型4的过程基本一致，二者的区别在于扭曲度的估计公式。在模型5中，对于目标语第$i$个cept.生成的第一单词，可以得到（假设有$K$个训练样本）：

 \begin{eqnarray}
-d_1(\Delta_j|cb;\mathbf{s},\mathbf{t}) = \mu_{1cb}^{-1} \times \sum_{k=1}^{K}c_1(\Delta_j|cb;\mathbf{s}^{[k]},\mathbf{t}^{[k]})
+d_1(\Delta_j|cb) = \mu_{1cb}^{-1} \times \sum_{k=1}^{K}c_1(\Delta_j|cb;\mathbf{s}^{[k]},\mathbf{t}^{[k]})
 \label{eq:1.23}
 \end{eqnarray}

@@ -310,15 +310,15 @@ d_1(\Delta_j|cb;\mathbf{s},\mathbf{t}) = \mu_{1cb}^{-1} \times \sum_{k=1}^{K}c_1

 \begin{eqnarray}
 c_1(\Delta_j|cb,v_x,v_y;\mathbf{s},\mathbf{t})                   & = & \sum_{\mathbf{a}}\Big[ \textrm{P}(\mathbf{s},\mathbf{a}|\mathbf{t}) \times s_1(\Delta_j|cb,v_x,v_y;\mathbf{a},\mathbf{s},\mathbf{t}) \Big] \label{eq:1.24} \\
-s_1(\Delta_j|cb,v_x,v_y;\mathbf{a},\mathbf{s},\mathbf{t}) & = & \sum_{i=1}^l \Big [ \varepsilon(\phi_i) \cdot \delta(v_{\pi_{i1}},\Delta_j) \cdot \delta(v_{\odot _{i-1}},v_x) \nonumber \\
-                                                                                          &    & \cdot \delta(v_m-\phi_i+1,v_y) \cdot \delta(v_{\pi_{i1}},v_{\pi_{i1}-1} )\Big] \label{eq:1.25}
+s_1(\Delta_j|cb,v_x,v_y;\mathbf{a},\mathbf{s},\mathbf{t}) & = & \sum_{i=1}^l \Big [ \varepsilon(\varphi_i) \cdot \delta(v_{\pi_{i1}},\Delta_j) \cdot \delta(v_{\odot _{i-1}},v_x) \nonumber \\
+                                                                                          &    & \cdot \delta(v_m-\varphi_i+1,v_y) \cdot \delta(v_{\pi_{i1}},v_{\pi_{i1}-1} )\Big] \label{eq:1.25}
 \end{eqnarray}


 对于目标语第$i$个cept.生成的其他单词（非第一个单词），可以得到：

 \begin{eqnarray}
-d_{>1}(\Delta_j|cb,v;\mathbf{s},\mathbf{t}) = \mu_{>1cb}^{-1} \times \sum_{k=1}^{K}c_{>1}(\Delta_j|cb,v;\mathbf{s}^{[k]},\mathbf{t}^{[k]})
+d_{>1}(\Delta_j|cb,v) = \mu_{>1cb}^{-1} \times \sum_{k=1}^{K}c_{>1}(\Delta_j|cb,v;\mathbf{s}^{[k]},\mathbf{t}^{[k]})
 \label{eq:1.26}
 \end{eqnarray}

@@ -326,18 +326,18 @@ d_{>1}(\Delta_j|cb,v;\mathbf{s},\mathbf{t}) = \mu_{>1cb}^{-1} \times \sum_{k=1}^

 \begin{eqnarray}
 c_{>1}(\Delta_j|cb,v;\mathbf{s},\mathbf{t})                   & =  & \sum_{\mathbf{a}}\Big[\textrm{P}(\mathbf{a},\mathbf{s}|\mathbf{t}) \times s_{>1}(\Delta_j|cb,v;\mathbf{a},\mathbf{s},\mathbf{t}) \Big] \label{eq:1.27} \\
-s_{>1}(\Delta_j|cb,v;\mathbf{a},\mathbf{s},\mathbf{t}) & = & \sum_{i=1}^l\Big[\varepsilon(\phi_i-1)\sum_{k=2}^{\phi_i} \big[\delta(v_{\pi_{ik}}-v_{\pi_{[i]k}-1},\Delta_j)  \nonumber \\
-                                                                                    &     & \cdot \delta(B(\tau_{[i]k}) ,cb) \cdot \delta(v_m-v_{\pi_{i(k-1)}}-\phi_i+k,v) \nonumber \\
+s_{>1}(\Delta_j|cb,v;\mathbf{a},\mathbf{s},\mathbf{t}) & = & \sum_{i=1}^l\Big[\varepsilon(\varphi_i-1)\sum_{k=2}^{\varphi_i} \big[\delta(v_{\pi_{ik}}-v_{\pi_{[i]k}-1},\Delta_j)  \nonumber \\
+                                                                                    &     & \cdot \delta(B(\tau_{[i]k}) ,cb) \cdot \delta(v_m-v_{\pi_{i(k-1)}}-\varphi_i+k,v) \nonumber \\
                                                                                    &     & \cdot \delta(v_{\pi_{i1}},v_{\pi_{i1}-1}) \big] \Big] \label{eq:1.28}
 \end{eqnarray}

 \vspace{0.5em}

-\parinterval 从式(\ref{eq:1.24})中可以看出因子$\delta(v_{\pi_{i1}},v_{\pi_{i1}-1})$保证了，即使对齐$\mathbf{a}$不合理（一个源语位置对应多个目标语位置）也可以避免在这个不合理的对齐上计算结果。需要注意的是因子$\delta(v_{\pi_{p1}},v_{\pi_{p1-1}})$，确保了$\mathbf{a}$中不合理的部分不产生坏的影响，而$\mathbf{a}$中其他正确的部分仍会参与迭代。
+\parinterval 从式\ref{eq:1.24}中可以看出因子$\delta(v_{\pi_{i1}},v_{\pi_{i1}-1})$保证了，即使对齐$\mathbf{a}$不合理（一个源语言位置对应多个目标语言位置）也可以避免在这个不合理的对齐上计算结果。需要注意的是因子$\delta(v_{\pi_{p1}},v_{\pi_{p1-1}})$，确保了$\mathbf{a}$中不合理的部分不产生坏的影响，而$\mathbf{a}$中其他正确的部分仍会参与迭代。

 \parinterval 不过上面的参数估计过程与IBM前4个模型的参数估计过程并不完全一样。IBM前4个模型在每次迭代中，可以在给定$\mathbf{s}$、$\mathbf{t}$和一个对齐$\mathbf{a}$的情况下直接计算并更新参数。但是在模型5的参数估计过程中（如公式\ref{eq:1.24}），需要模拟出由$\mathbf{t}$生成$\mathbf{s}$的过程才能得到正确的结果，因为从$\mathbf{t}$、$\mathbf{s}$和$\mathbf{a}$中是不能直接得到 的正确结果的。具体说，就是要从目标语言句子的第一个单词开始到最后一个单词结束，依次生成每个目标语言单词对应的源语言单词，每处理完一个目标语言单词就要暂停，然后才能计算式\ref{eq:1.24}中求和符号里面的内容。这也就是说即使给定了$\mathbf{s}$、$\mathbf{t}$和一个对齐$\mathbf{a}$，也不能直接在它们上进行计算，必须重新模拟$\mathbf{t}$到$\mathbf{s}$的生成过程。

-\parinterval 从前面的分析可以看出，虽然模型5比模型4更精确，但是模型5过于复杂以至于给参数估计增加了计算量（对于每组$\mathbf{t}$、$\mathbf{s}$和$\mathbf{a}$都要模拟$\mathbf{t}$生成$\mathbf{s}$的翻译过程）。因此模型5的开发对于系统实现是一个挑战。
+\parinterval 从前面的分析可以看出，虽然模型5比模型4更精确，但是模型5过于复杂以至于给参数估计增加了计算量（对于每组$\mathbf{t}$、$\mathbf{s}$和$\mathbf{a}$都要模拟$\mathbf{t}$生成$\mathbf{s}$的翻译过程）。因此模型5的系统实现是一个挑战。

 \parinterval 在模型5中同样需要定义一个词对齐集合$S$，使得每次迭代都在$S$上进行。可以对$S$进行如下定义
 \begin{eqnarray}
@@ -346,7 +346,7 @@ s_{>1}(\Delta_j|cb,v;\mathbf{a},\mathbf{s},\mathbf{t}) & = & \sum_{i=1}^l\Big[\v
 \end{eqnarray}
 \vspace{0.5em}

-\parinterval 这里$\tilde{\tilde{b}}(\mathbf{a})$借用了模型4中$\tilde{b}(\mathbf{a})$的概念。不过$\tilde{\tilde{b}}(\mathbf{a})$表示在利用模型3进行排名的列表中满足$\textrm{P}_{\theta}(\mathbf{a}'|\mathbf{s},\mathbf{t};5)$的最高排名的词对齐。
+\noindent 其中，$\tilde{\tilde{b}}(\mathbf{a})$借用了模型4中$\tilde{b}(\mathbf{a})$的概念。不过$\tilde{\tilde{b}}(\mathbf{a})$表示在利用模型3进行排名的列表中满足$\textrm{P}_{\theta}(\mathbf{a}'|\mathbf{s},\mathbf{t};5)$的最高排名的词对齐，这里$\mathbf{a}'$表示$\mathbf{a}$的邻居。
 \end{appendices}



--- a/Book/mt-book-xelatex.idx
+++ b/Book/mt-book-xelatex.idx
@@ -7,14 +7,14 @@
 \indexentry{数据驱动|hyperpage}{23}
 \indexentry{Data-Driven|hyperpage}{23}
 \indexentry{编码器-解码器|hyperpage}{30}
-\indexentry{encoder-decoder|hyperpage}{30}
+\indexentry{Encoder-Decoder|hyperpage}{30}
 \indexentry{质量评价|hyperpage}{32}
 \indexentry{Quality Evaluation|hyperpage}{32}
 \indexentry{无参考答案的评价|hyperpage}{32}
 \indexentry{Quality Estimation|hyperpage}{32}
 \indexentry{$n$元语法单元|hyperpage}{33}
-\indexentry{$n$-gram准确率|hyperpage}{34}
-\indexentry{$n$-gram Precision|hyperpage}{34}
+\indexentry{$n$-gram准确率|hyperpage}{33}
+\indexentry{$n$-gram Precision|hyperpage}{33}
 \indexentry{短句惩罚因子|hyperpage}{34}
 \indexentry{Brevity Penalty|hyperpage}{34}
 \indexentry{分词|hyperpage}{50}
@@ -115,10 +115,10 @@
 \indexentry{Disambiguation|hyperpage}{79}
 \indexentry{最左优先推导|hyperpage}{79}
 \indexentry{Left-most Derivation|hyperpage}{79}
-\indexentry{概率上下文无关文法|hyperpage}{81}
-\indexentry{Probabilistic Context-Free Grammar|hyperpage}{81}
-\indexentry{树库|hyperpage}{82}
-\indexentry{Treebank|hyperpage}{82}
+\indexentry{概率上下文无关文法|hyperpage}{80}
+\indexentry{Probabilistic Context-Free Grammar|hyperpage}{80}
+\indexentry{树库|hyperpage}{81}
+\indexentry{Treebank|hyperpage}{81}
 \indexentry{生成模型|hyperpage}{83}
 \indexentry{Generative Model|hyperpage}{83}
 \indexentry{判别模型|hyperpage}{83}
@@ -153,525 +153,525 @@
 \indexentry{The Lagrange Multiplier Method|hyperpage}{113}
 \indexentry{期望最大化|hyperpage}{115}
 \indexentry{Expectation Maximization|hyperpage}{115}
-\indexentry{期望频次|hyperpage}{116}
-\indexentry{Expected Count|hyperpage}{116}
-\indexentry{产出率|hyperpage}{119}
-\indexentry{繁衍率|hyperpage}{119}
-\indexentry{Fertility|hyperpage}{119}
-\indexentry{扭曲度|hyperpage}{121}
-\indexentry{Distortion|hyperpage}{121}
-\indexentry{概念单元|hyperpage}{123}
-\indexentry{概念|hyperpage}{123}
-\indexentry{Concept|hyperpage}{123}
-\indexentry{缺陷|hyperpage}{125}
-\indexentry{Deficiency|hyperpage}{125}
-\indexentry{凸函数|hyperpage}{129}
-\indexentry{Convex function|hyperpage}{129}
-\indexentry{对称化|hyperpage}{130}
-\indexentry{Symmetrization|hyperpage}{130}
-\indexentry{系统偏置|hyperpage}{131}
-\indexentry{System Bias|hyperpage}{131}
-\indexentry{组合性翻译|hyperpage}{136}
-\indexentry{Compositional Translation|hyperpage}{136}
-\indexentry{短语|hyperpage}{136}
-\indexentry{短语切分|hyperpage}{141}
-\indexentry{Phrasal Segmentation|hyperpage}{141}
-\indexentry{短语对|hyperpage}{141}
-\indexentry{推导|hyperpage}{141}
-\indexentry{Derivation|hyperpage}{141}
-\indexentry{生成式模型|hyperpage}{144}
-\indexentry{Generative Model|hyperpage}{144}
-\indexentry{判别式模型|hyperpage}{144}
-\indexentry{Discriminative Model|hyperpage}{144}
-\indexentry{对数线性模型|hyperpage}{145}
-\indexentry{Log-linear Model|hyperpage}{145}
-\indexentry{短语抽取|hyperpage}{146}
-\indexentry{Phrase Extraction|hyperpage}{146}
-\indexentry{词汇化翻译概率|hyperpage}{149}
-\indexentry{Lexical Translation Probability|hyperpage}{149}
-\indexentry{短语表|hyperpage}{150}
-\indexentry{Phrase Table|hyperpage}{150}
-\indexentry{调序|hyperpage}{150}
-\indexentry{Reordering|hyperpage}{150}
-\indexentry{模型训练|hyperpage}{154}
-\indexentry{Model Training|hyperpage}{154}
-\indexentry{权重调优|hyperpage}{154}
-\indexentry{Weight Tuning|hyperpage}{154}
-\indexentry{最小错误率训练|hyperpage}{154}
-\indexentry{Minimum Error Rate Training|hyperpage}{154}
-\indexentry{调优集合|hyperpage}{154}
-\indexentry{Tuning Set|hyperpage}{154}
-\indexentry{线搜索|hyperpage}{155}
-\indexentry{Line Search|hyperpage}{155}
-\indexentry{格搜索|hyperpage}{156}
-\indexentry{Grid Search|hyperpage}{156}
-\indexentry{覆盖度模型|hyperpage}{158}
-\indexentry{Coverage Model|hyperpage}{158}
-\indexentry{翻译候选|hyperpage}{158}
-\indexentry{Translation Candidate|hyperpage}{158}
-\indexentry{翻译假设|hyperpage}{159}
-\indexentry{Translation Hypothesis|hyperpage}{159}
-\indexentry{剪枝|hyperpage}{160}
-\indexentry{Pruning|hyperpage}{160}
-\indexentry{束剪枝|hyperpage}{160}
-\indexentry{Beam Pruning|hyperpage}{160}
-\indexentry{直方图剪枝|hyperpage}{160}
-\indexentry{Histogram Pruning|hyperpage}{160}
-\indexentry{阈值剪枝|hyperpage}{160}
-\indexentry{Threshold Pruning|hyperpage}{160}
-\indexentry{假设重组|hyperpage}{160}
-\indexentry{Hypothesis Recombination|hyperpage}{160}
-\indexentry{基于层次短语的模型|hyperpage}{164}
-\indexentry{Hierarchical Phrase-based Model|hyperpage}{164}
-\indexentry{同步上下文无关文法|hyperpage}{165}
-\indexentry{Synchronous Context-free Grammar|hyperpage}{165}
-\indexentry{基于层次短语的文法|hyperpage}{166}
-\indexentry{Hierarchical Phrase-based Grammar|hyperpage}{166}
-\indexentry{推导|hyperpage}{167}
-\indexentry{Derivation|hyperpage}{167}
-\indexentry{胶水规则|hyperpage}{167}
-\indexentry{Glue Rule|hyperpage}{167}
-\indexentry{乔姆斯基范式|hyperpage}{171}
-\indexentry{Chomsky Normal Form|hyperpage}{171}
-\indexentry{跨度|hyperpage}{171}
-\indexentry{Span|hyperpage}{171}
-\indexentry{自下而上的分析|hyperpage}{172}
-\indexentry{Top-down Parsing|hyperpage}{172}
-\indexentry{束剪枝|hyperpage}{174}
-\indexentry{Beam Pruning|hyperpage}{174}
-\indexentry{立方剪枝|hyperpage}{176}
-\indexentry{Cube Pruning|hyperpage}{176}
+\indexentry{期望频次|hyperpage}{115}
+\indexentry{Expected Count|hyperpage}{115}
+\indexentry{产出率|hyperpage}{118}
+\indexentry{繁衍率|hyperpage}{118}
+\indexentry{Fertility|hyperpage}{118}
+\indexentry{扭曲度|hyperpage}{120}
+\indexentry{Distortion|hyperpage}{120}
+\indexentry{概念单元|hyperpage}{122}
+\indexentry{概念|hyperpage}{122}
+\indexentry{Concept|hyperpage}{122}
+\indexentry{缺陷|hyperpage}{124}
+\indexentry{Deficiency|hyperpage}{124}
+\indexentry{凸函数|hyperpage}{128}
+\indexentry{Convex function|hyperpage}{128}
+\indexentry{对称化|hyperpage}{129}
+\indexentry{Symmetrization|hyperpage}{129}
+\indexentry{系统偏置|hyperpage}{130}
+\indexentry{System Bias|hyperpage}{130}
+\indexentry{组合性翻译|hyperpage}{134}
+\indexentry{Compositional Translation|hyperpage}{134}
+\indexentry{短语|hyperpage}{134}
+\indexentry{短语切分|hyperpage}{139}
+\indexentry{Phrasal Segmentation|hyperpage}{139}
+\indexentry{短语对|hyperpage}{139}
+\indexentry{推导|hyperpage}{140}
+\indexentry{Derivation|hyperpage}{140}
+\indexentry{生成式模型|hyperpage}{142}
+\indexentry{Generative Model|hyperpage}{142}
+\indexentry{判别式模型|hyperpage}{143}
+\indexentry{Discriminative Model|hyperpage}{143}
+\indexentry{对数线性模型|hyperpage}{143}
+\indexentry{Log-linear Model|hyperpage}{143}
+\indexentry{短语抽取|hyperpage}{144}
+\indexentry{Phrase Extraction|hyperpage}{144}
+\indexentry{词汇化翻译概率|hyperpage}{147}
+\indexentry{Lexical Translation Probability|hyperpage}{147}
+\indexentry{短语表|hyperpage}{148}
+\indexentry{Phrase Table|hyperpage}{148}
+\indexentry{调序|hyperpage}{148}
+\indexentry{Reordering|hyperpage}{148}
+\indexentry{模型训练|hyperpage}{152}
+\indexentry{Model Training|hyperpage}{152}
+\indexentry{权重调优|hyperpage}{152}
+\indexentry{Weight Tuning|hyperpage}{152}
+\indexentry{最小错误率训练|hyperpage}{152}
+\indexentry{Minimum Error Rate Training|hyperpage}{152}
+\indexentry{调优集合|hyperpage}{152}
+\indexentry{Tuning Set|hyperpage}{152}
+\indexentry{线搜索|hyperpage}{153}
+\indexentry{Line Search|hyperpage}{153}
+\indexentry{格搜索|hyperpage}{154}
+\indexentry{Grid Search|hyperpage}{154}
+\indexentry{覆盖度模型|hyperpage}{156}
+\indexentry{Coverage Model|hyperpage}{156}
+\indexentry{翻译候选|hyperpage}{156}
+\indexentry{Translation Candidate|hyperpage}{156}
+\indexentry{翻译假设|hyperpage}{157}
+\indexentry{Translation Hypothesis|hyperpage}{157}
+\indexentry{剪枝|hyperpage}{158}
+\indexentry{Pruning|hyperpage}{158}
+\indexentry{束剪枝|hyperpage}{158}
+\indexentry{Beam Pruning|hyperpage}{158}
+\indexentry{直方图剪枝|hyperpage}{158}
+\indexentry{Histogram Pruning|hyperpage}{158}
+\indexentry{阈值剪枝|hyperpage}{158}
+\indexentry{Threshold Pruning|hyperpage}{158}
+\indexentry{假设重组|hyperpage}{158}
+\indexentry{Hypothesis Recombination|hyperpage}{158}
+\indexentry{基于层次短语的模型|hyperpage}{163}
+\indexentry{Hierarchical Phrase-based Model|hyperpage}{163}
+\indexentry{同步上下文无关文法|hyperpage}{163}
+\indexentry{Synchronous Context-free Grammar|hyperpage}{163}
+\indexentry{基于层次短语的文法|hyperpage}{164}
+\indexentry{Hierarchical Phrase-based Grammar|hyperpage}{164}
+\indexentry{推导|hyperpage}{165}
+\indexentry{Derivation|hyperpage}{165}
+\indexentry{胶水规则|hyperpage}{165}
+\indexentry{Glue Rule|hyperpage}{165}
+\indexentry{乔姆斯基范式|hyperpage}{169}
+\indexentry{Chomsky Normal Form|hyperpage}{169}
+\indexentry{跨度|hyperpage}{169}
+\indexentry{Span|hyperpage}{169}
+\indexentry{自下而上的分析|hyperpage}{170}
+\indexentry{Top-down Parsing|hyperpage}{170}
+\indexentry{束剪枝|hyperpage}{172}
+\indexentry{Beam Pruning|hyperpage}{172}
+\indexentry{立方剪枝|hyperpage}{174}
+\indexentry{Cube Pruning|hyperpage}{174}
 \indexentry{序列化|hyperpage}{179}
 \indexentry{线性化|hyperpage}{179}
 \indexentry{Linearization|hyperpage}{179}
-\indexentry{树到串翻译规则|hyperpage}{181}
-\indexentry{Tree-to-String Translation Rule|hyperpage}{181}
-\indexentry{树到树翻译规则|hyperpage}{181}
-\indexentry{Tree-to-Tree Translation Rule|hyperpage}{181}
-\indexentry{树片段|hyperpage}{182}
-\indexentry{Tree Fragment|hyperpage}{182}
-\indexentry{同步树替换文法规则|hyperpage}{183}
-\indexentry{Synchronous Tree Substitution Grammar Rule|hyperpage}{183}
-\indexentry{边缘集合|hyperpage}{189}
-\indexentry{Frontier Set|hyperpage}{189}
-\indexentry{最小规则|hyperpage}{190}
-\indexentry{Minimal Rules|hyperpage}{190}
-\indexentry{二叉化|hyperpage}{194}
-\indexentry{Binarization|hyperpage}{194}
-\indexentry{基于短语的特征|hyperpage}{198}
-\indexentry{基于句法的特征|hyperpage}{198}
-\indexentry{有向超图|hyperpage}{199}
-\indexentry{Directed Hyper-graph|hyperpage}{199}
-\indexentry{超边|hyperpage}{199}
-\indexentry{Hyper-edge|hyperpage}{199}
-\indexentry{半环分析|hyperpage}{200}
-\indexentry{Semi-ring Parsing|hyperpage}{200}
-\indexentry{组合|hyperpage}{201}
-\indexentry{Composition|hyperpage}{201}
-\indexentry{基于串的解码|hyperpage}{201}
-\indexentry{String-based Decoding|hyperpage}{201}
-\indexentry{基于树的解码|hyperpage}{201}
-\indexentry{Tree-based Decoding|hyperpage}{201}
-\indexentry{Lexicalized Norm Form|hyperpage}{205}
-\indexentry{人工神经网络|hyperpage}{211}
-\indexentry{Artificial Neural Networks|hyperpage}{211}
-\indexentry{神经网络|hyperpage}{211}
-\indexentry{Neural Networks|hyperpage}{211}
-\indexentry{深度学习|hyperpage}{212}
-\indexentry{Deep Learning|hyperpage}{212}
-\indexentry{连接主义|hyperpage}{213}
-\indexentry{Connectionism|hyperpage}{213}
-\indexentry{分布式表示|hyperpage}{213}
-\indexentry{Distributed representation|hyperpage}{213}
-\indexentry{符号主义|hyperpage}{213}
-\indexentry{Symbolicism|hyperpage}{213}
-\indexentry{端到端学习|hyperpage}{215}
-\indexentry{End-to-End Learning|hyperpage}{215}
-\indexentry{表示学习|hyperpage}{215}
-\indexentry{Representation Learning|hyperpage}{215}
-\indexentry{分布式表示|hyperpage}{216}
-\indexentry{Distributed Representation|hyperpage}{216}
-\indexentry{标量|hyperpage}{217}
-\indexentry{Scalar|hyperpage}{217}
-\indexentry{向量|hyperpage}{217}
-\indexentry{Vector|hyperpage}{217}
-\indexentry{矩阵|hyperpage}{217}
-\indexentry{Matrix|hyperpage}{217}
-\indexentry{转置|hyperpage}{218}
-\indexentry{Transpose|hyperpage}{218}
-\indexentry{按元素加法|hyperpage}{218}
-\indexentry{Element-wise Addition|hyperpage}{218}
-\indexentry{数乘|hyperpage}{219}
-\indexentry{Scalar Multiplication|hyperpage}{219}
-\indexentry{按元素乘积|hyperpage}{220}
-\indexentry{Element-wise Product|hyperpage}{220}
-\indexentry{线性映射|hyperpage}{220}
-\indexentry{Linear Mapping|hyperpage}{220}
-\indexentry{线性变换|hyperpage}{220}
-\indexentry{Linear Transformation|hyperpage}{220}
-\indexentry{范数|hyperpage}{221}
-\indexentry{Norm|hyperpage}{221}
-\indexentry{欧几里得范数|hyperpage}{222}
-\indexentry{Euclidean Norm|hyperpage}{222}
-\indexentry{Frobenius 范数|hyperpage}{222}
-\indexentry{Frobenius Norm|hyperpage}{222}
-\indexentry{权重|hyperpage}{223}
-\indexentry{weight|hyperpage}{223}
-\indexentry{张量|hyperpage}{233}
-\indexentry{Tensor|hyperpage}{233}
-\indexentry{阶|hyperpage}{233}
-\indexentry{Rank|hyperpage}{233}
-\indexentry{广播机制|hyperpage}{237}
-\indexentry{向量化|hyperpage}{237}
-\indexentry{Vectorization|hyperpage}{237}
-\indexentry{前向传播|hyperpage}{241}
-\indexentry{计算图|hyperpage}{242}
-\indexentry{Computation Graph|hyperpage}{242}
-\indexentry{模型参数|hyperpage}{243}
-\indexentry{Model Parameters|hyperpage}{243}
-\indexentry{训练|hyperpage}{243}
-\indexentry{Training|hyperpage}{243}
-\indexentry{有标注数据|hyperpage}{243}
-\indexentry{Annotated Data/Labeled Data|hyperpage}{243}
-\indexentry{有指导的训练|hyperpage}{243}
-\indexentry{有监督的训练|hyperpage}{243}
-\indexentry{Supervised Training|hyperpage}{243}
-\indexentry{训练数据集合|hyperpage}{244}
-\indexentry{Training Data Set|hyperpage}{244}
-\indexentry{损失函数|hyperpage}{244}
-\indexentry{Loss Function|hyperpage}{244}
-\indexentry{目标函数|hyperpage}{244}
-\indexentry{Objective Function|hyperpage}{244}
-\indexentry{代价函数|hyperpage}{246}
-\indexentry{Cost Function|hyperpage}{246}
-\indexentry{梯度下降方法|hyperpage}{246}
-\indexentry{Gradient Descent Method|hyperpage}{246}
-\indexentry{参数更新的规则|hyperpage}{246}
-\indexentry{Update Rule|hyperpage}{246}
-\indexentry{学习率|hyperpage}{246}
-\indexentry{Learning Rate|hyperpage}{246}
-\indexentry{基于梯度的方法|hyperpage}{246}
-\indexentry{Gradient-based Method|hyperpage}{246}
-\indexentry{批量梯度下降|hyperpage}{247}
-\indexentry{Batch Gradient Descent|hyperpage}{247}
-\indexentry{随机梯度下降|hyperpage}{247}
-\indexentry{Stochastic Gradient Descent|hyperpage}{247}
-\indexentry{小批量梯度下降|hyperpage}{247}
-\indexentry{Mini-Batch Gradient Descent|hyperpage}{247}
-\indexentry{数值微分|hyperpage}{248}
-\indexentry{Numerical Differentiation|hyperpage}{248}
-\indexentry{截断误差|hyperpage}{248}
-\indexentry{Truncation Error|hyperpage}{248}
-\indexentry{舍入误差|hyperpage}{248}
-\indexentry{Round-off Error|hyperpage}{248}
-\indexentry{符号微分|hyperpage}{249}
-\indexentry{Symbolic Differentiation|hyperpage}{249}
-\indexentry{表达式膨胀|hyperpage}{249}
-\indexentry{Expression Swell|hyperpage}{249}
-\indexentry{自动微分|hyperpage}{249}
-\indexentry{Automatic Differentiation|hyperpage}{249}
-\indexentry{反向模式|hyperpage}{250}
-\indexentry{Backward Mode|hyperpage}{250}
-\indexentry{学习率|hyperpage}{251}
-\indexentry{Learning Rate|hyperpage}{251}
-\indexentry{Momentum|hyperpage}{251}
-\indexentry{AdaGrad|hyperpage}{252}
-\indexentry{衰减|hyperpage}{252}
-\indexentry{Decay|hyperpage}{252}
-\indexentry{RMSprop|hyperpage}{252}
-\indexentry{Adam|hyperpage}{253}
-\indexentry{数据并行|hyperpage}{253}
-\indexentry{同步更新|hyperpage}{254}
-\indexentry{Synchronous Update|hyperpage}{254}
-\indexentry{异步更新|hyperpage}{254}
-\indexentry{Asynchronous Update|hyperpage}{254}
-\indexentry{参数服务器|hyperpage}{254}
-\indexentry{Parameter Server|hyperpage}{254}
-\indexentry{梯度消失|hyperpage}{255}
-\indexentry{Gradient Vanishing|hyperpage}{255}
-\indexentry{梯度爆炸|hyperpage}{255}
-\indexentry{Gradient Explosion|hyperpage}{255}
-\indexentry{梯度裁剪|hyperpage}{256}
-\indexentry{Gradient Clipping|hyperpage}{256}
-\indexentry{批量归一化|hyperpage}{257}
-\indexentry{Batch Normalization|hyperpage}{257}
-\indexentry{层归一化|hyperpage}{257}
-\indexentry{Layer Normalization|hyperpage}{257}
-\indexentry{残差网络|hyperpage}{257}
-\indexentry{Residual Networks|hyperpage}{257}
-\indexentry{跳接|hyperpage}{257}
-\indexentry{Shortcut Connection|hyperpage}{257}
-\indexentry{过拟合|hyperpage}{258}
-\indexentry{Overfitting|hyperpage}{258}
-\indexentry{正则化|hyperpage}{258}
-\indexentry{Regularization|hyperpage}{258}
-\indexentry{反向传播|hyperpage}{259}
-\indexentry{back propagation|hyperpage}{259}
-\indexentry{神经语言模型|hyperpage}{265}
-\indexentry{Neural Language Model|hyperpage}{265}
-\indexentry{前馈神经网络语言模型|hyperpage}{266}
-\indexentry{Feed-forward Neural Network Language Model|hyperpage}{266}
-\indexentry{循环神经网络|hyperpage}{268}
-\indexentry{Recurrent Neural Network|hyperpage}{268}
-\indexentry{循环神经网络语言模型|hyperpage}{268}
-\indexentry{RNNLM|hyperpage}{268}
-\indexentry{循环单元|hyperpage}{268}
-\indexentry{RNN Cell|hyperpage}{268}
-\indexentry{自注意力机制|hyperpage}{270}
-\indexentry{Self-Attention Mechanism|hyperpage}{270}
-\indexentry{注意力权重|hyperpage}{270}
-\indexentry{Attention Weight|hyperpage}{270}
-\indexentry{困惑度|hyperpage}{271}
-\indexentry{Perplexity|hyperpage}{271}
-\indexentry{One-hot编码|hyperpage}{271}
-\indexentry{独热编码|hyperpage}{271}
-\indexentry{分布式表示|hyperpage}{272}
-\indexentry{Distributed Representation|hyperpage}{272}
-\indexentry{词嵌入|hyperpage}{272}
-\indexentry{Word Embedding|hyperpage}{272}
-\indexentry{句子表示模型|hyperpage}{274}
-\indexentry{句子的表示|hyperpage}{274}
-\indexentry{表示学习|hyperpage}{274}
-\indexentry{Representation Learning|hyperpage}{274}
-\indexentry{可解释机器学习|hyperpage}{278}
-\indexentry{Explainable Machine Learning|hyperpage}{278}
-\indexentry{神经机器翻译|hyperpage}{281}
-\indexentry{Neural Machine Translation|hyperpage}{281}
-\indexentry{分布式表示|hyperpage}{283}
-\indexentry{Distributed Representation|hyperpage}{283}
-\indexentry{特征工程|hyperpage}{289}
-\indexentry{Feature Engineering|hyperpage}{289}
-\indexentry{编码器-解码器模型|hyperpage}{290}
-\indexentry{Encoder-Decoder Paradigm|hyperpage}{290}
-\indexentry{编码器-解码器框架|hyperpage}{290}
-\indexentry{循环神经网络|hyperpage}{295}
-\indexentry{Recurrent Neural Network, RNN|hyperpage}{295}
-\indexentry{词嵌入|hyperpage}{297}
-\indexentry{Word Embedding|hyperpage}{297}
-\indexentry{表示学习|hyperpage}{297}
-\indexentry{Representation Learning|hyperpage}{297}
-\indexentry{生成|hyperpage}{297}
-\indexentry{Generation|hyperpage}{297}
-\indexentry{长短时记忆|hyperpage}{302}
-\indexentry{Long Short-Term Memory|hyperpage}{302}
-\indexentry{遗忘|hyperpage}{302}
-\indexentry{记忆更新|hyperpage}{303}
-\indexentry{输出|hyperpage}{303}
-\indexentry{门循环单元|hyperpage}{304}
-\indexentry{Gated Recurrent Unit，GRU|hyperpage}{304}
-\indexentry{注意力权重|hyperpage}{309}
-\indexentry{Attention Weight|hyperpage}{309}
-\indexentry{一阶矩估计|hyperpage}{315}
-\indexentry{First Moment Estimation|hyperpage}{315}
-\indexentry{二阶矩估计|hyperpage}{315}
-\indexentry{Second Moment Estimation|hyperpage}{315}
-\indexentry{学习率|hyperpage}{316}
-\indexentry{Learning Rate|hyperpage}{316}
-\indexentry{逐渐预热|hyperpage}{316}
-\indexentry{Gradual Warmup|hyperpage}{316}
-\indexentry{分段常数衰减|hyperpage}{317}
-\indexentry{Piecewise Constant Decay|hyperpage}{317}
-\indexentry{数据并行|hyperpage}{318}
-\indexentry{模型并行|hyperpage}{318}
-\indexentry{全搜索|hyperpage}{320}
-\indexentry{Full Search|hyperpage}{320}
-\indexentry{贪婪搜索|hyperpage}{320}
-\indexentry{Greedy Search|hyperpage}{320}
-\indexentry{束搜索|hyperpage}{320}
-\indexentry{Beam Search|hyperpage}{320}
-\indexentry{自回归模型|hyperpage}{320}
-\indexentry{Autoregressive Model|hyperpage}{321}
-\indexentry{非自回归模型|hyperpage}{321}
-\indexentry{Non-autoregressive Model|hyperpage}{321}
-\indexentry{自注意力机制|hyperpage}{326}
-\indexentry{Self-Attention|hyperpage}{326}
-\indexentry{特征提取|hyperpage}{327}
-\indexentry{自注意力子层|hyperpage}{328}
-\indexentry{Self-attention Sub-layer|hyperpage}{328}
-\indexentry{前馈神经网络子层|hyperpage}{328}
-\indexentry{Feed-forward Sub-layer|hyperpage}{328}
-\indexentry{残差连接|hyperpage}{328}
-\indexentry{Residual Connection|hyperpage}{328}
-\indexentry{层正则化|hyperpage}{328}
-\indexentry{Layer Normalization|hyperpage}{328}
-\indexentry{编码-解码注意力子层|hyperpage}{329}
-\indexentry{Encoder-decoder Attention Sub-layer|hyperpage}{329}
-\indexentry{词嵌入|hyperpage}{329}
-\indexentry{Word Embedding|hyperpage}{329}
-\indexentry{位置编码|hyperpage}{329}
-\indexentry{Position Embedding|hyperpage}{329}
-\indexentry{点乘注意力|hyperpage}{333}
-\indexentry{Scaled Dot-Product Attention|hyperpage}{333}
-\indexentry{多头注意力|hyperpage}{335}
-\indexentry{Multi-head Attention|hyperpage}{335}
-\indexentry{残差连接|hyperpage}{336}
-\indexentry{短连接|hyperpage}{337}
-\indexentry{Short-cut Connection|hyperpage}{337}
-\indexentry{后正则化|hyperpage}{338}
-\indexentry{Post-norm|hyperpage}{338}
-\indexentry{前正则化|hyperpage}{338}
-\indexentry{Pre-norm|hyperpage}{338}
-\indexentry{交叉熵损失|hyperpage}{339}
-\indexentry{Cross Entropy Loss|hyperpage}{339}
-\indexentry{预热|hyperpage}{339}
-\indexentry{Warmup|hyperpage}{339}
-\indexentry{小批量训练|hyperpage}{340}
-\indexentry{Mini-batch Training|hyperpage}{340}
-\indexentry{Dropout|hyperpage}{340}
-\indexentry{过拟合|hyperpage}{340}
-\indexentry{Over fitting|hyperpage}{340}
-\indexentry{标签平滑|hyperpage}{340}
-\indexentry{Label Smoothing|hyperpage}{340}
-\indexentry{序列到序列的转换/生成问题|hyperpage}{342}
-\indexentry{Sequence-to-Sequence Problem|hyperpage}{342}
-\indexentry{未登录词|hyperpage}{353}
-\indexentry{Out of Vocabulary Word，OOV Word|hyperpage}{353}
-\indexentry{子词切分|hyperpage}{353}
-\indexentry{Sub-word Segmentation|hyperpage}{353}
-\indexentry{标准化|hyperpage}{353}
-\indexentry{Normalization|hyperpage}{353}
-\indexentry{数据清洗|hyperpage}{353}
-\indexentry{Dada Cleaning|hyperpage}{353}
-\indexentry{数据选择|hyperpage}{355}
-\indexentry{Data Selection|hyperpage}{355}
-\indexentry{数据过滤|hyperpage}{355}
-\indexentry{Data Filtering|hyperpage}{355}
-\indexentry{开放词表|hyperpage}{358}
-\indexentry{Open-Vocabulary|hyperpage}{358}
-\indexentry{子词|hyperpage}{359}
-\indexentry{Sub-word|hyperpage}{359}
-\indexentry{字节对编码|hyperpage}{359}
-\indexentry{双字节编码|hyperpage}{359}
-\indexentry{Byte Pair Encoding，BPE|hyperpage}{359}
-\indexentry{正则化|hyperpage}{362}
-\indexentry{Regularization|hyperpage}{362}
-\indexentry{过拟合问题|hyperpage}{362}
-\indexentry{Overfitting Problem|hyperpage}{362}
-\indexentry{反问题|hyperpage}{362}
-\indexentry{Inverse Problem|hyperpage}{362}
-\indexentry{适定的|hyperpage}{363}
-\indexentry{Well-posed|hyperpage}{363}
-\indexentry{不适定问题|hyperpage}{363}
-\indexentry{Ill-posed Problem|hyperpage}{363}
-\indexentry{降噪|hyperpage}{363}
-\indexentry{Denoising|hyperpage}{363}
-\indexentry{泛化|hyperpage}{364}
-\indexentry{Generalization|hyperpage}{364}
-\indexentry{标签平滑|hyperpage}{365}
-\indexentry{Label Smoothing|hyperpage}{365}
-\indexentry{相互适应|hyperpage}{366}
-\indexentry{Co-Adaptation|hyperpage}{366}
-\indexentry{集成学习|hyperpage}{368}
-\indexentry{Ensemble Learning|hyperpage}{368}
-\indexentry{容量|hyperpage}{369}
-\indexentry{Capacity|hyperpage}{369}
-\indexentry{宽残差网络|hyperpage}{369}
-\indexentry{Wide Residual Network|hyperpage}{369}
-\indexentry{探测任务|hyperpage}{371}
-\indexentry{Probing Task|hyperpage}{371}
-\indexentry{表面信息|hyperpage}{371}
-\indexentry{Surface Information|hyperpage}{371}
-\indexentry{语法信息|hyperpage}{371}
-\indexentry{Syntactic Information|hyperpage}{371}
-\indexentry{语义信息|hyperpage}{371}
-\indexentry{Semantic Information|hyperpage}{371}
-\indexentry{词嵌入|hyperpage}{371}
-\indexentry{Embedding|hyperpage}{371}
-\indexentry{数据并行|hyperpage}{372}
-\indexentry{Data Parallelism|hyperpage}{372}
-\indexentry{模型并行|hyperpage}{372}
-\indexentry{Model Parallelism|hyperpage}{372}
-\indexentry{小批量训练|hyperpage}{372}
-\indexentry{Mini-batch Training|hyperpage}{372}
-\indexentry{课程学习|hyperpage}{374}
-\indexentry{Curriculum Learning|hyperpage}{374}
-\indexentry{推断|hyperpage}{375}
-\indexentry{Inference|hyperpage}{375}
-\indexentry{解码|hyperpage}{375}
-\indexentry{Decoding|hyperpage}{375}
-\indexentry{准确性|hyperpage}{375}
-\indexentry{Accuracy|hyperpage}{375}
-\indexentry{时延|hyperpage}{375}
-\indexentry{Latency|hyperpage}{375}
-\indexentry{时延|hyperpage}{375}
-\indexentry{Memory|hyperpage}{375}
-\indexentry{搜索错误|hyperpage}{375}
-\indexentry{Search Error|hyperpage}{375}
-\indexentry{模型错误|hyperpage}{375}
-\indexentry{Modeling Error|hyperpage}{375}
-\indexentry{重排序|hyperpage}{377}
-\indexentry{Re-ranking|hyperpage}{377}
-\indexentry{双向推断|hyperpage}{377}
-\indexentry{Bidirectional Inference|hyperpage}{377}
-\indexentry{批量推断|hyperpage}{381}
-\indexentry{Batch Inference|hyperpage}{381}
-\indexentry{批量处理|hyperpage}{381}
-\indexentry{Batching|hyperpage}{381}
-\indexentry{二值网络|hyperpage}{383}
-\indexentry{Binarized Neural Networks|hyperpage}{383}
-\indexentry{自回归翻译|hyperpage}{383}
-\indexentry{Autoregressive Translation|hyperpage}{383}
-\indexentry{非自回归翻译|hyperpage}{383}
-\indexentry{Regressive Translation|hyperpage}{383}
-\indexentry{繁衍率|hyperpage}{383}
-\indexentry{Fertility|hyperpage}{383}
-\indexentry{偏置|hyperpage}{385}
-\indexentry{Bias|hyperpage}{385}
-\indexentry{退化|hyperpage}{385}
-\indexentry{Degenerate|hyperpage}{385}
-\indexentry{过翻译|hyperpage}{386}
-\indexentry{Over Translation|hyperpage}{386}
-\indexentry{欠翻译|hyperpage}{386}
-\indexentry{Under Translation|hyperpage}{386}
-\indexentry{充分性|hyperpage}{387}
-\indexentry{Adequacy|hyperpage}{387}
-\indexentry{系统融合|hyperpage}{388}
-\indexentry{System Combination|hyperpage}{388}
-\indexentry{假设选择|hyperpage}{388}
-\indexentry{Hypothesis Selection|hyperpage}{388}
-\indexentry{多样性|hyperpage}{388}
-\indexentry{Diversity|hyperpage}{388}
-\indexentry{重排序|hyperpage}{389}
-\indexentry{Re-ranking|hyperpage}{389}
-\indexentry{混淆网络|hyperpage}{390}
-\indexentry{Confusion Network|hyperpage}{390}
-\indexentry{动态线性层聚合方法|hyperpage}{394}
-\indexentry{Dynamic Linear Combination of Layers，DLCL|hyperpage}{394}
-\indexentry{相互适应|hyperpage}{398}
-\indexentry{Co-adaptation|hyperpage}{398}
-\indexentry{数据增强|hyperpage}{401}
-\indexentry{Data Augmentation|hyperpage}{401}
-\indexentry{回译|hyperpage}{401}
-\indexentry{Back Translation|hyperpage}{401}
-\indexentry{迭代式回译|hyperpage}{401}
-\indexentry{Iterative Back Translation|hyperpage}{401}
-\indexentry{前向翻译|hyperpage}{402}
-\indexentry{Forward Translation|hyperpage}{402}
-\indexentry{预训练|hyperpage}{402}
-\indexentry{Pre-training|hyperpage}{402}
-\indexentry{微调|hyperpage}{402}
-\indexentry{Fine-tuning|hyperpage}{402}
-\indexentry{多任务学习|hyperpage}{404}
-\indexentry{Multitask Learning|hyperpage}{404}
-\indexentry{模型压缩|hyperpage}{405}
-\indexentry{Model Compression|hyperpage}{405}
-\indexentry{学习难度|hyperpage}{405}
-\indexentry{Learning Difficulty|hyperpage}{406}
-\indexentry{教师模型|hyperpage}{406}
-\indexentry{Teacher Model|hyperpage}{406}
-\indexentry{学生模型|hyperpage}{406}
-\indexentry{Student Model|hyperpage}{406}
-\indexentry{基于单词的知识精炼|hyperpage}{406}
-\indexentry{Word-level Knowledge Distillation|hyperpage}{406}
-\indexentry{基于序列的知识精炼|hyperpage}{407}
-\indexentry{Sequence-level Knowledge Distillation|hyperpage}{407}
-\indexentry{中间层输出|hyperpage}{408}
-\indexentry{Hint-based Knowledge Transfer|hyperpage}{408}
-\indexentry{注意力分布|hyperpage}{408}
-\indexentry{Attention To Attention Transfer|hyperpage}{408}
-\indexentry{循环一致性|hyperpage}{410}
-\indexentry{Circle Consistency|hyperpage}{410}
-\indexentry{翻译中回译|hyperpage}{411}
-\indexentry{On-the-fly Back-translation|hyperpage}{411}
-\indexentry{网络结构搜索技术|hyperpage}{414}
-\indexentry{Neural Architecture Search；NAS|hyperpage}{414}
+\indexentry{树到串翻译规则|hyperpage}{179}
+\indexentry{Tree-to-String Translation Rule|hyperpage}{179}
+\indexentry{树到树翻译规则|hyperpage}{179}
+\indexentry{Tree-to-Tree Translation Rule|hyperpage}{179}
+\indexentry{树片段|hyperpage}{180}
+\indexentry{Tree Fragment|hyperpage}{180}
+\indexentry{同步树替换文法规则|hyperpage}{181}
+\indexentry{Synchronous Tree Substitution Grammar Rule|hyperpage}{181}
+\indexentry{边缘集合|hyperpage}{187}
+\indexentry{Frontier Set|hyperpage}{187}
+\indexentry{最小规则|hyperpage}{188}
+\indexentry{Minimal Rules|hyperpage}{188}
+\indexentry{二叉化|hyperpage}{191}
+\indexentry{Binarization|hyperpage}{191}
+\indexentry{基于短语的特征|hyperpage}{195}
+\indexentry{基于句法的特征|hyperpage}{195}
+\indexentry{有向超图|hyperpage}{196}
+\indexentry{Directed Hyper-graph|hyperpage}{196}
+\indexentry{超边|hyperpage}{196}
+\indexentry{Hyper-edge|hyperpage}{196}
+\indexentry{半环分析|hyperpage}{197}
+\indexentry{Semi-ring Parsing|hyperpage}{197}
+\indexentry{组合|hyperpage}{198}
+\indexentry{Composition|hyperpage}{198}
+\indexentry{基于串的解码|hyperpage}{199}
+\indexentry{String-based Decoding|hyperpage}{199}
+\indexentry{基于树的解码|hyperpage}{199}
+\indexentry{Tree-based Decoding|hyperpage}{199}
+\indexentry{Lexicalized Norm Form|hyperpage}{202}
+\indexentry{人工神经网络|hyperpage}{207}
+\indexentry{Artificial Neural Networks|hyperpage}{207}
+\indexentry{神经网络|hyperpage}{207}
+\indexentry{Neural Networks|hyperpage}{207}
+\indexentry{深度学习|hyperpage}{208}
+\indexentry{Deep Learning|hyperpage}{208}
+\indexentry{连接主义|hyperpage}{209}
+\indexentry{Connectionism|hyperpage}{209}
+\indexentry{分布式表示|hyperpage}{209}
+\indexentry{Distributed representation|hyperpage}{209}
+\indexentry{符号主义|hyperpage}{209}
+\indexentry{Symbolicism|hyperpage}{209}
+\indexentry{端到端学习|hyperpage}{211}
+\indexentry{End-to-End Learning|hyperpage}{211}
+\indexentry{表示学习|hyperpage}{211}
+\indexentry{Representation Learning|hyperpage}{211}
+\indexentry{分布式表示|hyperpage}{212}
+\indexentry{Distributed Representation|hyperpage}{212}
+\indexentry{标量|hyperpage}{213}
+\indexentry{Scalar|hyperpage}{213}
+\indexentry{向量|hyperpage}{213}
+\indexentry{Vector|hyperpage}{213}
+\indexentry{矩阵|hyperpage}{213}
+\indexentry{Matrix|hyperpage}{213}
+\indexentry{转置|hyperpage}{214}
+\indexentry{Transpose|hyperpage}{214}
+\indexentry{按元素加法|hyperpage}{214}
+\indexentry{Element-wise Addition|hyperpage}{214}
+\indexentry{数乘|hyperpage}{215}
+\indexentry{Scalar Multiplication|hyperpage}{215}
+\indexentry{按元素乘积|hyperpage}{216}
+\indexentry{Element-wise Product|hyperpage}{216}
+\indexentry{线性映射|hyperpage}{216}
+\indexentry{Linear Mapping|hyperpage}{216}
+\indexentry{线性变换|hyperpage}{216}
+\indexentry{Linear Transformation|hyperpage}{216}
+\indexentry{范数|hyperpage}{217}
+\indexentry{Norm|hyperpage}{217}
+\indexentry{欧几里得范数|hyperpage}{218}
+\indexentry{Euclidean Norm|hyperpage}{218}
+\indexentry{Frobenius 范数|hyperpage}{218}
+\indexentry{Frobenius Norm|hyperpage}{218}
+\indexentry{权重|hyperpage}{219}
+\indexentry{weight|hyperpage}{219}
+\indexentry{张量|hyperpage}{229}
+\indexentry{Tensor|hyperpage}{229}
+\indexentry{阶|hyperpage}{229}
+\indexentry{Rank|hyperpage}{229}
+\indexentry{广播机制|hyperpage}{233}
+\indexentry{向量化|hyperpage}{233}
+\indexentry{Vectorization|hyperpage}{233}
+\indexentry{前向传播|hyperpage}{237}
+\indexentry{计算图|hyperpage}{238}
+\indexentry{Computation Graph|hyperpage}{238}
+\indexentry{模型参数|hyperpage}{239}
+\indexentry{Model Parameters|hyperpage}{239}
+\indexentry{训练|hyperpage}{239}
+\indexentry{Training|hyperpage}{239}
+\indexentry{有标注数据|hyperpage}{239}
+\indexentry{Annotated Data/Labeled Data|hyperpage}{239}
+\indexentry{有指导的训练|hyperpage}{239}
+\indexentry{有监督的训练|hyperpage}{239}
+\indexentry{Supervised Training|hyperpage}{239}
+\indexentry{训练数据集合|hyperpage}{240}
+\indexentry{Training Data Set|hyperpage}{240}
+\indexentry{损失函数|hyperpage}{240}
+\indexentry{Loss Function|hyperpage}{240}
+\indexentry{目标函数|hyperpage}{240}
+\indexentry{Objective Function|hyperpage}{240}
+\indexentry{代价函数|hyperpage}{242}
+\indexentry{Cost Function|hyperpage}{242}
+\indexentry{梯度下降方法|hyperpage}{242}
+\indexentry{Gradient Descent Method|hyperpage}{242}
+\indexentry{参数更新的规则|hyperpage}{242}
+\indexentry{Update Rule|hyperpage}{242}
+\indexentry{学习率|hyperpage}{242}
+\indexentry{Learning Rate|hyperpage}{242}
+\indexentry{基于梯度的方法|hyperpage}{242}
+\indexentry{Gradient-based Method|hyperpage}{242}
+\indexentry{批量梯度下降|hyperpage}{243}
+\indexentry{Batch Gradient Descent|hyperpage}{243}
+\indexentry{随机梯度下降|hyperpage}{243}
+\indexentry{Stochastic Gradient Descent|hyperpage}{243}
+\indexentry{小批量梯度下降|hyperpage}{243}
+\indexentry{Mini-Batch Gradient Descent|hyperpage}{243}
+\indexentry{数值微分|hyperpage}{244}
+\indexentry{Numerical Differentiation|hyperpage}{244}
+\indexentry{截断误差|hyperpage}{244}
+\indexentry{Truncation Error|hyperpage}{244}
+\indexentry{舍入误差|hyperpage}{244}
+\indexentry{Round-off Error|hyperpage}{244}
+\indexentry{符号微分|hyperpage}{245}
+\indexentry{Symbolic Differentiation|hyperpage}{245}
+\indexentry{表达式膨胀|hyperpage}{245}
+\indexentry{Expression Swell|hyperpage}{245}
+\indexentry{自动微分|hyperpage}{245}
+\indexentry{Automatic Differentiation|hyperpage}{245}
+\indexentry{反向模式|hyperpage}{246}
+\indexentry{Backward Mode|hyperpage}{246}
+\indexentry{学习率|hyperpage}{247}
+\indexentry{Learning Rate|hyperpage}{247}
+\indexentry{Momentum|hyperpage}{247}
+\indexentry{AdaGrad|hyperpage}{248}
+\indexentry{衰减|hyperpage}{248}
+\indexentry{Decay|hyperpage}{248}
+\indexentry{RMSprop|hyperpage}{248}
+\indexentry{Adam|hyperpage}{249}
+\indexentry{数据并行|hyperpage}{249}
+\indexentry{同步更新|hyperpage}{250}
+\indexentry{Synchronous Update|hyperpage}{250}
+\indexentry{异步更新|hyperpage}{250}
+\indexentry{Asynchronous Update|hyperpage}{250}
+\indexentry{参数服务器|hyperpage}{250}
+\indexentry{Parameter Server|hyperpage}{250}
+\indexentry{梯度消失|hyperpage}{251}
+\indexentry{Gradient Vanishing|hyperpage}{251}
+\indexentry{梯度爆炸|hyperpage}{251}
+\indexentry{Gradient Explosion|hyperpage}{251}
+\indexentry{梯度裁剪|hyperpage}{252}
+\indexentry{Gradient Clipping|hyperpage}{252}
+\indexentry{批量归一化|hyperpage}{253}
+\indexentry{Batch Normalization|hyperpage}{253}
+\indexentry{层归一化|hyperpage}{253}
+\indexentry{Layer Normalization|hyperpage}{253}
+\indexentry{残差网络|hyperpage}{253}
+\indexentry{Residual Networks|hyperpage}{253}
+\indexentry{跳接|hyperpage}{253}
+\indexentry{Shortcut Connection|hyperpage}{253}
+\indexentry{过拟合|hyperpage}{254}
+\indexentry{Overfitting|hyperpage}{254}
+\indexentry{正则化|hyperpage}{254}
+\indexentry{Regularization|hyperpage}{254}
+\indexentry{反向传播|hyperpage}{255}
+\indexentry{back propagation|hyperpage}{255}
+\indexentry{神经语言模型|hyperpage}{261}
+\indexentry{Neural Language Model|hyperpage}{261}
+\indexentry{前馈神经网络语言模型|hyperpage}{262}
+\indexentry{Feed-forward Neural Network Language Model|hyperpage}{262}
+\indexentry{循环神经网络|hyperpage}{264}
+\indexentry{Recurrent Neural Network|hyperpage}{264}
+\indexentry{循环神经网络语言模型|hyperpage}{264}
+\indexentry{RNNLM|hyperpage}{264}
+\indexentry{循环单元|hyperpage}{264}
+\indexentry{RNN Cell|hyperpage}{264}
+\indexentry{自注意力机制|hyperpage}{266}
+\indexentry{Self-Attention Mechanism|hyperpage}{266}
+\indexentry{注意力权重|hyperpage}{266}
+\indexentry{Attention Weight|hyperpage}{266}
+\indexentry{困惑度|hyperpage}{267}
+\indexentry{Perplexity|hyperpage}{267}
+\indexentry{One-hot编码|hyperpage}{267}
+\indexentry{独热编码|hyperpage}{267}
+\indexentry{分布式表示|hyperpage}{268}
+\indexentry{Distributed Representation|hyperpage}{268}
+\indexentry{词嵌入|hyperpage}{268}
+\indexentry{Word Embedding|hyperpage}{268}
+\indexentry{句子表示模型|hyperpage}{270}
+\indexentry{句子的表示|hyperpage}{270}
+\indexentry{表示学习|hyperpage}{270}
+\indexentry{Representation Learning|hyperpage}{270}
+\indexentry{可解释机器学习|hyperpage}{274}
+\indexentry{Explainable Machine Learning|hyperpage}{274}
+\indexentry{神经机器翻译|hyperpage}{275}
+\indexentry{Neural Machine Translation|hyperpage}{275}
+\indexentry{分布式表示|hyperpage}{277}
+\indexentry{Distributed Representation|hyperpage}{277}
+\indexentry{特征工程|hyperpage}{283}
+\indexentry{Feature Engineering|hyperpage}{283}
+\indexentry{编码器-解码器模型|hyperpage}{283}
+\indexentry{Encoder-Decoder Paradigm|hyperpage}{283}
+\indexentry{编码器-解码器框架|hyperpage}{283}
+\indexentry{循环神经网络|hyperpage}{289}
+\indexentry{Recurrent Neural Network, RNN|hyperpage}{289}
+\indexentry{词嵌入|hyperpage}{291}
+\indexentry{Word Embedding|hyperpage}{291}
+\indexentry{表示学习|hyperpage}{291}
+\indexentry{Representation Learning|hyperpage}{291}
+\indexentry{生成|hyperpage}{291}
+\indexentry{Generation|hyperpage}{291}
+\indexentry{长短时记忆|hyperpage}{295}
+\indexentry{Long Short-Term Memory|hyperpage}{295}
+\indexentry{遗忘|hyperpage}{296}
+\indexentry{记忆更新|hyperpage}{296}
+\indexentry{输出|hyperpage}{297}
+\indexentry{门循环单元|hyperpage}{298}
+\indexentry{Gated Recurrent Unit，GRU|hyperpage}{298}
+\indexentry{注意力权重|hyperpage}{303}
+\indexentry{Attention Weight|hyperpage}{303}
+\indexentry{一阶矩估计|hyperpage}{309}
+\indexentry{First Moment Estimation|hyperpage}{309}
+\indexentry{二阶矩估计|hyperpage}{309}
+\indexentry{Second Moment Estimation|hyperpage}{309}
+\indexentry{学习率|hyperpage}{309}
+\indexentry{Learning Rate|hyperpage}{309}
+\indexentry{逐渐预热|hyperpage}{310}
+\indexentry{Gradual Warmup|hyperpage}{310}
+\indexentry{分段常数衰减|hyperpage}{311}
+\indexentry{Piecewise Constant Decay|hyperpage}{311}
+\indexentry{数据并行|hyperpage}{311}
+\indexentry{模型并行|hyperpage}{312}
+\indexentry{全搜索|hyperpage}{313}
+\indexentry{Full Search|hyperpage}{313}
+\indexentry{贪婪搜索|hyperpage}{313}
+\indexentry{Greedy Search|hyperpage}{313}
+\indexentry{束搜索|hyperpage}{314}
+\indexentry{Beam Search|hyperpage}{314}
+\indexentry{自回归模型|hyperpage}{314}
+\indexentry{Autoregressive Model|hyperpage}{314}
+\indexentry{非自回归模型|hyperpage}{314}
+\indexentry{Non-autoregressive Model|hyperpage}{314}
+\indexentry{自注意力机制|hyperpage}{320}
+\indexentry{Self-Attention|hyperpage}{320}
+\indexentry{特征提取|hyperpage}{321}
+\indexentry{自注意力子层|hyperpage}{321}
+\indexentry{Self-attention Sub-layer|hyperpage}{321}
+\indexentry{前馈神经网络子层|hyperpage}{322}
+\indexentry{Feed-forward Sub-layer|hyperpage}{322}
+\indexentry{残差连接|hyperpage}{322}
+\indexentry{Residual Connection|hyperpage}{322}
+\indexentry{层正则化|hyperpage}{322}
+\indexentry{Layer Normalization|hyperpage}{322}
+\indexentry{编码-解码注意力子层|hyperpage}{322}
+\indexentry{Encoder-decoder Attention Sub-layer|hyperpage}{322}
+\indexentry{词嵌入|hyperpage}{323}
+\indexentry{Word Embedding|hyperpage}{323}
+\indexentry{位置编码|hyperpage}{323}
+\indexentry{Position Embedding|hyperpage}{323}
+\indexentry{点乘注意力|hyperpage}{326}
+\indexentry{Scaled Dot-Product Attention|hyperpage}{326}
+\indexentry{多头注意力|hyperpage}{328}
+\indexentry{Multi-head Attention|hyperpage}{328}
+\indexentry{残差连接|hyperpage}{329}
+\indexentry{短连接|hyperpage}{330}
+\indexentry{Short-cut Connection|hyperpage}{330}
+\indexentry{后正则化|hyperpage}{331}
+\indexentry{Post-norm|hyperpage}{331}
+\indexentry{前正则化|hyperpage}{331}
+\indexentry{Pre-norm|hyperpage}{331}
+\indexentry{交叉熵损失|hyperpage}{332}
+\indexentry{Cross Entropy Loss|hyperpage}{332}
+\indexentry{预热|hyperpage}{332}
+\indexentry{Warmup|hyperpage}{332}
+\indexentry{小批量训练|hyperpage}{333}
+\indexentry{Mini-batch Training|hyperpage}{333}
+\indexentry{Dropout|hyperpage}{333}
+\indexentry{过拟合|hyperpage}{333}
+\indexentry{Over fitting|hyperpage}{333}
+\indexentry{标签平滑|hyperpage}{333}
+\indexentry{Label Smoothing|hyperpage}{333}
+\indexentry{序列到序列的转换/生成问题|hyperpage}{335}
+\indexentry{Sequence-to-Sequence Problem|hyperpage}{335}
+\indexentry{未登录词|hyperpage}{345}
+\indexentry{Out of Vocabulary Word，OOV Word|hyperpage}{345}
+\indexentry{子词切分|hyperpage}{345}
+\indexentry{Sub-word Segmentation|hyperpage}{345}
+\indexentry{标准化|hyperpage}{345}
+\indexentry{Normalization|hyperpage}{345}
+\indexentry{数据清洗|hyperpage}{345}
+\indexentry{Dada Cleaning|hyperpage}{345}
+\indexentry{数据选择|hyperpage}{347}
+\indexentry{Data Selection|hyperpage}{347}
+\indexentry{数据过滤|hyperpage}{347}
+\indexentry{Data Filtering|hyperpage}{347}
+\indexentry{开放词表|hyperpage}{350}
+\indexentry{Open-Vocabulary|hyperpage}{350}
+\indexentry{子词|hyperpage}{351}
+\indexentry{Sub-word|hyperpage}{351}
+\indexentry{字节对编码|hyperpage}{351}
+\indexentry{双字节编码|hyperpage}{351}
+\indexentry{Byte Pair Encoding，BPE|hyperpage}{351}
+\indexentry{正则化|hyperpage}{354}
+\indexentry{Regularization|hyperpage}{354}
+\indexentry{过拟合问题|hyperpage}{354}
+\indexentry{Overfitting Problem|hyperpage}{354}
+\indexentry{反问题|hyperpage}{354}
+\indexentry{Inverse Problem|hyperpage}{354}
+\indexentry{适定的|hyperpage}{354}
+\indexentry{Well-posed|hyperpage}{354}
+\indexentry{不适定问题|hyperpage}{354}
+\indexentry{Ill-posed Problem|hyperpage}{354}
+\indexentry{降噪|hyperpage}{355}
+\indexentry{Denoising|hyperpage}{355}
+\indexentry{泛化|hyperpage}{355}
+\indexentry{Generalization|hyperpage}{355}
+\indexentry{标签平滑|hyperpage}{357}
+\indexentry{Label Smoothing|hyperpage}{357}
+\indexentry{相互适应|hyperpage}{358}
+\indexentry{Co-Adaptation|hyperpage}{358}
+\indexentry{集成学习|hyperpage}{359}
+\indexentry{Ensemble Learning|hyperpage}{359}
+\indexentry{容量|hyperpage}{360}
+\indexentry{Capacity|hyperpage}{360}
+\indexentry{宽残差网络|hyperpage}{361}
+\indexentry{Wide Residual Network|hyperpage}{361}
+\indexentry{探测任务|hyperpage}{362}
+\indexentry{Probing Task|hyperpage}{362}
+\indexentry{表面信息|hyperpage}{362}
+\indexentry{Surface Information|hyperpage}{362}
+\indexentry{语法信息|hyperpage}{362}
+\indexentry{Syntactic Information|hyperpage}{362}
+\indexentry{语义信息|hyperpage}{362}
+\indexentry{Semantic Information|hyperpage}{362}
+\indexentry{词嵌入|hyperpage}{362}
+\indexentry{Embedding|hyperpage}{362}
+\indexentry{数据并行|hyperpage}{363}
+\indexentry{Data Parallelism|hyperpage}{363}
+\indexentry{模型并行|hyperpage}{363}
+\indexentry{Model Parallelism|hyperpage}{363}
+\indexentry{小批量训练|hyperpage}{363}
+\indexentry{Mini-batch Training|hyperpage}{363}
+\indexentry{课程学习|hyperpage}{365}
+\indexentry{Curriculum Learning|hyperpage}{365}
+\indexentry{推断|hyperpage}{366}
+\indexentry{Inference|hyperpage}{366}
+\indexentry{解码|hyperpage}{366}
+\indexentry{Decoding|hyperpage}{366}
+\indexentry{准确性|hyperpage}{366}
+\indexentry{Accuracy|hyperpage}{366}
+\indexentry{时延|hyperpage}{366}
+\indexentry{Latency|hyperpage}{366}
+\indexentry{时延|hyperpage}{366}
+\indexentry{Memory|hyperpage}{366}
+\indexentry{搜索错误|hyperpage}{366}
+\indexentry{Search Error|hyperpage}{366}
+\indexentry{模型错误|hyperpage}{366}
+\indexentry{Modeling Error|hyperpage}{366}
+\indexentry{重排序|hyperpage}{368}
+\indexentry{Re-ranking|hyperpage}{368}
+\indexentry{双向推断|hyperpage}{368}
+\indexentry{Bidirectional Inference|hyperpage}{368}
+\indexentry{批量推断|hyperpage}{371}
+\indexentry{Batch Inference|hyperpage}{371}
+\indexentry{批量处理|hyperpage}{371}
+\indexentry{Batching|hyperpage}{371}
+\indexentry{二值网络|hyperpage}{373}
+\indexentry{Binarized Neural Networks|hyperpage}{373}
+\indexentry{自回归翻译|hyperpage}{373}
+\indexentry{Autoregressive Translation|hyperpage}{373}
+\indexentry{非自回归翻译|hyperpage}{374}
+\indexentry{Non-Autoregressive Translation|hyperpage}{374}
+\indexentry{繁衍率|hyperpage}{374}
+\indexentry{Fertility|hyperpage}{374}
+\indexentry{偏置|hyperpage}{375}
+\indexentry{Bias|hyperpage}{375}
+\indexentry{退化|hyperpage}{375}
+\indexentry{Degenerate|hyperpage}{375}
+\indexentry{过翻译|hyperpage}{377}
+\indexentry{Over Translation|hyperpage}{377}
+\indexentry{欠翻译|hyperpage}{377}
+\indexentry{Under Translation|hyperpage}{377}
+\indexentry{充分性|hyperpage}{377}
+\indexentry{Adequacy|hyperpage}{377}
+\indexentry{系统融合|hyperpage}{378}
+\indexentry{System Combination|hyperpage}{378}
+\indexentry{假设选择|hyperpage}{379}
+\indexentry{Hypothesis Selection|hyperpage}{379}
+\indexentry{多样性|hyperpage}{379}
+\indexentry{Diversity|hyperpage}{379}
+\indexentry{重排序|hyperpage}{379}
+\indexentry{Re-ranking|hyperpage}{379}
+\indexentry{混淆网络|hyperpage}{381}
+\indexentry{Confusion Network|hyperpage}{381}
+\indexentry{动态线性层聚合方法|hyperpage}{385}
+\indexentry{Dynamic Linear Combination of Layers，DLCL|hyperpage}{385}
+\indexentry{相互适应|hyperpage}{389}
+\indexentry{Co-adaptation|hyperpage}{389}
+\indexentry{数据增强|hyperpage}{391}
+\indexentry{Data Augmentation|hyperpage}{391}
+\indexentry{回译|hyperpage}{391}
+\indexentry{Back Translation|hyperpage}{391}
+\indexentry{迭代式回译|hyperpage}{392}
+\indexentry{Iterative Back Translation|hyperpage}{392}
+\indexentry{前向翻译|hyperpage}{392}
+\indexentry{Forward Translation|hyperpage}{392}
+\indexentry{预训练|hyperpage}{393}
+\indexentry{Pre-training|hyperpage}{393}
+\indexentry{微调|hyperpage}{393}
+\indexentry{Fine-tuning|hyperpage}{393}
+\indexentry{多任务学习|hyperpage}{394}
+\indexentry{Multitask Learning|hyperpage}{394}
+\indexentry{模型压缩|hyperpage}{396}
+\indexentry{Model Compression|hyperpage}{396}
+\indexentry{学习难度|hyperpage}{396}
+\indexentry{Learning Difficulty|hyperpage}{396}
+\indexentry{教师模型|hyperpage}{396}
+\indexentry{Teacher Model|hyperpage}{396}
+\indexentry{学生模型|hyperpage}{397}
+\indexentry{Student Model|hyperpage}{397}
+\indexentry{基于单词的知识精炼|hyperpage}{397}
+\indexentry{Word-level Knowledge Distillation|hyperpage}{397}
+\indexentry{基于序列的知识精炼|hyperpage}{397}
+\indexentry{Sequence-level Knowledge Distillation|hyperpage}{397}
+\indexentry{中间层输出|hyperpage}{398}
+\indexentry{Hint-based Knowledge Transfer|hyperpage}{398}
+\indexentry{注意力分布|hyperpage}{398}
+\indexentry{Attention To Attention Transfer|hyperpage}{398}
+\indexentry{循环一致性|hyperpage}{401}
+\indexentry{Circle Consistency|hyperpage}{401}
+\indexentry{翻译中回译|hyperpage}{402}
+\indexentry{On-the-fly Back-translation|hyperpage}{402}
+\indexentry{网络结构搜索技术|hyperpage}{404}
+\indexentry{Neural Architecture Search；NAS|hyperpage}{404}
--- a/Book/mt-book-xelatex.ptc
+++ b/Book/mt-book-xelatex.ptc
 \boolfalse {citerequest}\boolfalse {citetracker}\boolfalse {pagetracker}\boolfalse {backtracker}\relax 
+\babel@toc {english}{}
 \defcounter {refsection}{0}\relax 
-\select@language {english}
-\defcounter {refsection}{0}\relax 
-\contentsline {part}{\@mypartnumtocformat {I}{机器翻译基础}}{15}{part.1}
+\contentsline {part}{\@mypartnumtocformat {I}{机器翻译基础}}{15}{part.1}%
 \ttl@starttoc {default@1}
 \defcounter {refsection}{0}\relax 
-\contentsline {chapter}{\numberline {1}机器翻译简介}{17}{chapter.1}
+\contentsline {chapter}{\numberline {1}机器翻译简介}{17}{chapter.1}%
+\defcounter {refsection}{0}\relax 
+\contentsline {section}{\numberline {1.1}机器翻译的概念}{17}{section.1.1}%
+\defcounter {refsection}{0}\relax 
+\contentsline {section}{\numberline {1.2}机器翻译简史}{20}{section.1.2}%
+\defcounter {refsection}{0}\relax 
+\contentsline {subsection}{\numberline {1.2.1}人工翻译}{20}{subsection.1.2.1}%
 \defcounter {refsection}{0}\relax 
-\contentsline {section}{\numberline {1.1}机器翻译的概念}{17}{section.1.1}
+\contentsline {subsection}{\numberline {1.2.2}机器翻译的萌芽}{21}{subsection.1.2.2}%
 \defcounter {refsection}{0}\relax 
-\contentsline {section}{\numberline {1.2}机器翻译简史}{20}{section.1.2}
+\contentsline {subsection}{\numberline {1.2.3}机器翻译的受挫}{22}{subsection.1.2.3}%
 \defcounter {refsection}{0}\relax 
-\contentsline {subsection}{\numberline {1.2.1}人工翻译}{20}{subsection.1.2.1}
+\contentsline {subsection}{\numberline {1.2.4}机器翻译的快速成长}{23}{subsection.1.2.4}%
 \defcounter {refsection}{0}\relax 
-\contentsline {subsection}{\numberline {1.2.2}机器翻译的萌芽}{21}{subsection.1.2.2}
+\contentsline {subsection}{\numberline {1.2.5}机器翻译的爆发}{24}{subsection.1.2.5}%
 \defcounter {refsection}{0}\relax 
-\contentsline {subsection}{\numberline {1.2.3}机器翻译的受挫}{22}{subsection.1.2.3}
+\contentsline {section}{\numberline {1.3}机器翻译现状}{25}{section.1.3}%
 \defcounter {refsection}{0}\relax 
-\contentsline {subsection}{\numberline {1.2.4}机器翻译的快速成长}{23}{subsection.1.2.4}
+\contentsline {section}{\numberline {1.4}机器翻译方法}{27}{section.1.4}%
 \defcounter {refsection}{0}\relax 
-\contentsline {subsection}{\numberline {1.2.5}机器翻译的爆发}{24}{subsection.1.2.5}
+\contentsline {subsection}{\numberline {1.4.1}基于规则的机器翻译}{27}{subsection.1.4.1}%
 \defcounter {refsection}{0}\relax 
-\contentsline {section}{\numberline {1.3}机器翻译现状}{25}{section.1.3}
+\contentsline {subsection}{\numberline {1.4.2}基于实例的机器翻译}{28}{subsection.1.4.2}%
 \defcounter {refsection}{0}\relax 
-\contentsline {section}{\numberline {1.4}机器翻译方法}{27}{section.1.4}
+\contentsline {subsection}{\numberline {1.4.3}统计机器翻译}{29}{subsection.1.4.3}%
 \defcounter {refsection}{0}\relax 
-\contentsline {subsection}{\numberline {1.4.1}基于规则的机器翻译}{27}{subsection.1.4.1}
+\contentsline {subsection}{\numberline {1.4.4}神经机器翻译}{30}{subsection.1.4.4}%
 \defcounter {refsection}{0}\relax 
-\contentsline {subsection}{\numberline {1.4.2}基于实例的机器翻译}{28}{subsection.1.4.2}
+\contentsline {subsection}{\numberline {1.4.5}对比分析}{31}{subsection.1.4.5}%
 \defcounter {refsection}{0}\relax 
-\contentsline {subsection}{\numberline {1.4.3}统计机器翻译}{29}{subsection.1.4.3}
+\contentsline {section}{\numberline {1.5}翻译质量评价}{32}{section.1.5}%
 \defcounter {refsection}{0}\relax 
-\contentsline {subsection}{\numberline {1.4.4}神经机器翻译}{30}{subsection.1.4.4}
+\contentsline {subsection}{\numberline {1.5.1}人工评价}{32}{subsection.1.5.1}%
 \defcounter {refsection}{0}\relax 
-\contentsline {subsection}{\numberline {1.4.5}对比分析}{31}{subsection.1.4.5}
+\contentsline {subsection}{\numberline {1.5.2}自动评价}{33}{subsection.1.5.2}%
 \defcounter {refsection}{0}\relax 
-\contentsline {section}{\numberline {1.5}翻译质量评价}{32}{section.1.5}
+\contentsline {subsubsection}{BLEU}{33}{section*.17}%
 \defcounter {refsection}{0}\relax 
-\contentsline {subsection}{\numberline {1.5.1}人工评价}{32}{subsection.1.5.1}
+\contentsline {subsubsection}{TER}{34}{section*.18}%
 \defcounter {refsection}{0}\relax 
-\contentsline {subsection}{\numberline {1.5.2}自动评价}{33}{subsection.1.5.2}
+\contentsline {subsubsection}{基于检测点的评价}{35}{section*.19}%
 \defcounter {refsection}{0}\relax 
-\contentsline {subsubsection}{BLEU}{33}{section*.17}
+\contentsline {section}{\numberline {1.6}机器翻译应用}{36}{section.1.6}%
 \defcounter {refsection}{0}\relax 
-\contentsline {subsubsection}{TER}{35}{section*.18}
+\contentsline {section}{\numberline {1.7}开源项目与评测}{38}{section.1.7}%
 \defcounter {refsection}{0}\relax 
-\contentsline {subsubsection}{基于检测点的评价}{35}{section*.19}
+\contentsline {subsection}{\numberline {1.7.1}开源机器翻译系统}{38}{subsection.1.7.1}%
 \defcounter {refsection}{0}\relax 
-\contentsline {section}{\numberline {1.6}机器翻译应用}{36}{section.1.6}
+\contentsline {subsubsection}{统计机器翻译开源系统}{38}{section*.21}%
 \defcounter {refsection}{0}\relax 
-\contentsline {section}{\numberline {1.7}开源项目与评测}{38}{section.1.7}
+\contentsline {subsubsection}{神经机器翻译开源系统}{40}{section*.22}%
 \defcounter {refsection}{0}\relax 
-\contentsline {subsection}{\numberline {1.7.1}开源机器翻译系统}{38}{subsection.1.7.1}
+\contentsline {subsection}{\numberline {1.7.2}常用数据集及公开评测任务}{42}{subsection.1.7.2}%
 \defcounter {refsection}{0}\relax 
-\contentsline {subsubsection}{统计机器翻译开源系统}{39}{section*.21}
+\contentsline {section}{\numberline {1.8}推荐学习资源}{44}{section.1.8}%
 \defcounter {refsection}{0}\relax 
-\contentsline {subsubsection}{神经机器翻译开源系统}{40}{section*.22}
+\contentsline {subsection}{\numberline {1.8.1}经典书籍}{44}{subsection.1.8.1}%
 \defcounter {refsection}{0}\relax 
-\contentsline {subsection}{\numberline {1.7.2}常用数据集及公开评测任务}{42}{subsection.1.7.2}
+\contentsline {subsection}{\numberline {1.8.2}网络资源}{45}{subsection.1.8.2}%
 \defcounter {refsection}{0}\relax 
-\contentsline {section}{\numberline {1.8}推荐学习资源}{44}{section.1.8}
+\contentsline {subsection}{\numberline {1.8.3}专业组织和会议}{45}{subsection.1.8.3}%
 \defcounter {refsection}{0}\relax 
-\contentsline {chapter}{\numberline {2}词法、语法及统计建模基础}{49}{chapter.2}
+\contentsline {chapter}{\numberline {2}词法、语法及统计建模基础}{49}{chapter.2}%
 \defcounter {refsection}{0}\relax 
-\contentsline {section}{\numberline {2.1}问题概述 }{50}{section.2.1}
+\contentsline {section}{\numberline {2.1}问题概述 }{50}{section.2.1}%
 \defcounter {refsection}{0}\relax 
-\contentsline {section}{\numberline {2.2}概率论基础}{51}{section.2.2}
+\contentsline {section}{\numberline {2.2}概率论基础}{51}{section.2.2}%
 \defcounter {refsection}{0}\relax 
-\contentsline {subsection}{\numberline {2.2.1}随机变量和概率}{52}{subsection.2.2.1}
+\contentsline {subsection}{\numberline {2.2.1}随机变量和概率}{52}{subsection.2.2.1}%
 \defcounter {refsection}{0}\relax 
-\contentsline {subsection}{\numberline {2.2.2}联合概率、条件概率和边缘概率}{53}{subsection.2.2.2}
+\contentsline {subsection}{\numberline {2.2.2}联合概率、条件概率和边缘概率}{53}{subsection.2.2.2}%
 \defcounter {refsection}{0}\relax 
-\contentsline {subsection}{\numberline {2.2.3}链式法则}{54}{subsection.2.2.3}
+\contentsline {subsection}{\numberline {2.2.3}链式法则}{54}{subsection.2.2.3}%
 \defcounter {refsection}{0}\relax 
-\contentsline {subsection}{\numberline {2.2.4}贝叶斯法则}{55}{subsection.2.2.4}
+\contentsline {subsection}{\numberline {2.2.4}贝叶斯法则}{55}{subsection.2.2.4}%
 \defcounter {refsection}{0}\relax 
-\contentsline {subsection}{\numberline {2.2.5}KL距离和熵}{57}{subsection.2.2.5}
+\contentsline {subsection}{\numberline {2.2.5}KL距离和熵}{57}{subsection.2.2.5}%
 \defcounter {refsection}{0}\relax 
-\contentsline {subsubsection}{信息熵}{57}{section*.29}
+\contentsline {subsubsection}{信息熵}{57}{section*.29}%
 \defcounter {refsection}{0}\relax 
-\contentsline {subsubsection}{KL距离}{58}{section*.31}
+\contentsline {subsubsection}{KL距离}{58}{section*.31}%
 \defcounter {refsection}{0}\relax 
-\contentsline {subsubsection}{交叉熵}{58}{section*.32}
+\contentsline {subsubsection}{交叉熵}{58}{section*.32}%
 \defcounter {refsection}{0}\relax 
-\contentsline {section}{\numberline {2.3}中文分词}{59}{section.2.3}
+\contentsline {section}{\numberline {2.3}中文分词}{59}{section.2.3}%
 \defcounter {refsection}{0}\relax 
-\contentsline {subsection}{\numberline {2.3.1}基于词典的分词方法}{60}{subsection.2.3.1}
+\contentsline {subsection}{\numberline {2.3.1}基于词典的分词方法}{60}{subsection.2.3.1}%
 \defcounter {refsection}{0}\relax 
-\contentsline {subsection}{\numberline {2.3.2}基于统计的分词方法}{61}{subsection.2.3.2}
+\contentsline {subsection}{\numberline {2.3.2}基于统计的分词方法}{61}{subsection.2.3.2}%
 \defcounter {refsection}{0}\relax 
-\contentsline {subsubsection}{统计模型的学习与推断}{61}{section*.36}
+\contentsline {subsubsection}{统计模型的学习与推断}{61}{section*.36}%
 \defcounter {refsection}{0}\relax 
-\contentsline {subsubsection}{掷骰子游戏}{62}{section*.38}
+\contentsline {subsubsection}{掷骰子游戏}{62}{section*.38}%
 \defcounter {refsection}{0}\relax 
-\contentsline {subsubsection}{全概率分词方法}{64}{section*.42}
+\contentsline {subsubsection}{全概率分词方法}{64}{section*.42}%
 \defcounter {refsection}{0}\relax 
-\contentsline {section}{\numberline {2.4}$n$-gram语言模型 }{66}{section.2.4}
+\contentsline {section}{\numberline {2.4}$n$-gram语言模型 }{66}{section.2.4}%
 \defcounter {refsection}{0}\relax 
-\contentsline {subsection}{\numberline {2.4.1}建模}{67}{subsection.2.4.1}
+\contentsline {subsection}{\numberline {2.4.1}建模}{67}{subsection.2.4.1}%
 \defcounter {refsection}{0}\relax 
-\contentsline {subsection}{\numberline {2.4.2}未登录词和平滑算法}{69}{subsection.2.4.2}
+\contentsline {subsection}{\numberline {2.4.2}未登录词和平滑算法}{69}{subsection.2.4.2}%
 \defcounter {refsection}{0}\relax 
-\contentsline {subsubsection}{加法平滑方法}{70}{section*.48}
+\contentsline {subsubsection}{加法平滑方法}{70}{section*.48}%
 \defcounter {refsection}{0}\relax 
-\contentsline {subsubsection}{古德-图灵估计法}{71}{section*.50}
+\contentsline {subsubsection}{古德-图灵估计法}{71}{section*.50}%
 \defcounter {refsection}{0}\relax 
-\contentsline {subsubsection}{Kneser-Ney平滑方法}{72}{section*.52}
+\contentsline {subsubsection}{Kneser-Ney平滑方法}{72}{section*.52}%
 \defcounter {refsection}{0}\relax 
-\contentsline {section}{\numberline {2.5}句法分析（短语结构分析）}{74}{section.2.5}
+\contentsline {section}{\numberline {2.5}句法分析（短语结构分析）}{74}{section.2.5}%
 \defcounter {refsection}{0}\relax 
-\contentsline {subsection}{\numberline {2.5.1}句子的句法树表示}{74}{subsection.2.5.1}
+\contentsline {subsection}{\numberline {2.5.1}句子的句法树表示}{74}{subsection.2.5.1}%
 \defcounter {refsection}{0}\relax 
-\contentsline {subsection}{\numberline {2.5.2}上下文无关文法}{76}{subsection.2.5.2}
+\contentsline {subsection}{\numberline {2.5.2}上下文无关文法}{76}{subsection.2.5.2}%
 \defcounter {refsection}{0}\relax 
-\contentsline {subsection}{\numberline {2.5.3}规则和推导的概率}{81}{subsection.2.5.3}
+\contentsline {subsection}{\numberline {2.5.3}规则和推导的概率}{80}{subsection.2.5.3}%
 \defcounter {refsection}{0}\relax 
-\contentsline {section}{\numberline {2.6}小结及深入阅读}{83}{section.2.6}
+\contentsline {section}{\numberline {2.6}小结及深入阅读}{82}{section.2.6}%
 \defcounter {refsection}{0}\relax 
-\contentsline {part}{\@mypartnumtocformat {II}{统计机器翻译}}{85}{part.2}
+\contentsline {part}{\@mypartnumtocformat {II}{统计机器翻译}}{85}{part.2}%
 \ttl@stoptoc {default@1}
 \ttl@starttoc {default@2}
 \defcounter {refsection}{0}\relax 
-\contentsline {chapter}{\numberline {3}基于词的机器翻译模型}{87}{chapter.3}
+\contentsline {chapter}{\numberline {3}基于词的机器翻译模型}{87}{chapter.3}%
 \defcounter {refsection}{0}\relax 
-\contentsline {section}{\numberline {3.1}什么是基于词的翻译模型}{87}{section.3.1}
+\contentsline {section}{\numberline {3.1}什么是基于词的翻译模型}{87}{section.3.1}%
 \defcounter {refsection}{0}\relax 
-\contentsline {section}{\numberline {3.2}构建一个简单的机器翻译系统}{89}{section.3.2}
+\contentsline {section}{\numberline {3.2}构建一个简单的机器翻译系统}{89}{section.3.2}%
 \defcounter {refsection}{0}\relax 
-\contentsline {subsection}{\numberline {3.2.1}如何进行翻译？}{89}{subsection.3.2.1}
+\contentsline {subsection}{\numberline {3.2.1}如何进行翻译？}{89}{subsection.3.2.1}%
 \defcounter {refsection}{0}\relax 
-\contentsline {subsubsection}{机器翻译流程}{90}{section*.65}
+\contentsline {subsubsection}{机器翻译流程}{90}{section*.65}%
 \defcounter {refsection}{0}\relax 
-\contentsline {subsubsection}{人工翻译 vs. 机器翻译}{91}{section*.67}
+\contentsline {subsubsection}{人工翻译 vs. 机器翻译}{91}{section*.67}%
 \defcounter {refsection}{0}\relax 
-\contentsline {subsection}{\numberline {3.2.2}基本框架}{91}{subsection.3.2.2}
+\contentsline {subsection}{\numberline {3.2.2}基本框架}{91}{subsection.3.2.2}%
 \defcounter {refsection}{0}\relax 
-\contentsline {subsection}{\numberline {3.2.3}单词翻译概率}{92}{subsection.3.2.3}
+\contentsline {subsection}{\numberline {3.2.3}单词翻译概率}{92}{subsection.3.2.3}%
 \defcounter {refsection}{0}\relax 
-\contentsline {subsubsection}{什么是单词翻译概率？}{92}{section*.69}
+\contentsline {subsubsection}{什么是单词翻译概率？}{92}{section*.69}%
 \defcounter {refsection}{0}\relax 
-\contentsline {subsubsection}{如何从一个双语平行数据中学习？}{93}{section*.71}
+\contentsline {subsubsection}{如何从一个双语平行数据中学习？}{93}{section*.71}%
 \defcounter {refsection}{0}\relax 
-\contentsline {subsubsection}{如何从大量的双语平行数据中学习？}{94}{section*.72}
+\contentsline {subsubsection}{如何从大量的双语平行数据中学习？}{94}{section*.72}%
 \defcounter {refsection}{0}\relax 
-\contentsline {subsection}{\numberline {3.2.4}句子级翻译模型}{95}{subsection.3.2.4}
+\contentsline {subsection}{\numberline {3.2.4}句子级翻译模型}{95}{subsection.3.2.4}%
 \defcounter {refsection}{0}\relax 
-\contentsline {subsubsection}{基础模型}{95}{section*.74}
+\contentsline {subsubsection}{基础模型}{95}{section*.74}%
 \defcounter {refsection}{0}\relax 
-\contentsline {subsubsection}{生成流畅的译文}{97}{section*.76}
+\contentsline {subsubsection}{生成流畅的译文}{97}{section*.76}%
 \defcounter {refsection}{0}\relax 
-\contentsline {subsection}{\numberline {3.2.5}解码}{99}{subsection.3.2.5}
+\contentsline {subsection}{\numberline {3.2.5}解码}{99}{subsection.3.2.5}%
 \defcounter {refsection}{0}\relax 
-\contentsline {section}{\numberline {3.3}基于词的翻译建模}{101}{section.3.3}
+\contentsline {section}{\numberline {3.3}基于词的翻译建模}{101}{section.3.3}%
 \defcounter {refsection}{0}\relax 
-\contentsline {subsection}{\numberline {3.3.1}噪声信道模型}{101}{subsection.3.3.1}
+\contentsline {subsection}{\numberline {3.3.1}噪声信道模型}{101}{subsection.3.3.1}%
 \defcounter {refsection}{0}\relax 
-\contentsline {subsection}{\numberline {3.3.2}统计机器翻译的三个基本问题}{104}{subsection.3.3.2}
+\contentsline {subsection}{\numberline {3.3.2}统计机器翻译的三个基本问题}{104}{subsection.3.3.2}%
 \defcounter {refsection}{0}\relax 
-\contentsline {subsubsection}{词对齐}{104}{section*.86}
+\contentsline {subsubsection}{词对齐}{104}{section*.86}%
 \defcounter {refsection}{0}\relax 
-\contentsline {subsubsection}{基于词对齐的翻译模型}{105}{section*.89}
+\contentsline {subsubsection}{基于词对齐的翻译模型}{105}{section*.89}%
 \defcounter {refsection}{0}\relax 
-\contentsline {subsubsection}{基于词对齐的翻译实例}{107}{section*.91}
+\contentsline {subsubsection}{基于词对齐的翻译实例}{107}{section*.91}%
 \defcounter {refsection}{0}\relax 
-\contentsline {section}{\numberline {3.4}IBM模型1-2}{107}{section.3.4}
+\contentsline {section}{\numberline {3.4}IBM模型1-2}{107}{section.3.4}%
 \defcounter {refsection}{0}\relax 
-\contentsline {subsection}{\numberline {3.4.1}IBM模型1}{108}{subsection.3.4.1}
+\contentsline {subsection}{\numberline {3.4.1}IBM模型1}{108}{subsection.3.4.1}%
 \defcounter {refsection}{0}\relax 
-\contentsline {subsection}{\numberline {3.4.2}IBM模型2}{109}{subsection.3.4.2}
+\contentsline {subsection}{\numberline {3.4.2}IBM模型2}{109}{subsection.3.4.2}%
 \defcounter {refsection}{0}\relax 
-\contentsline {subsection}{\numberline {3.4.3}解码及计算优化}{110}{subsection.3.4.3}
+\contentsline {subsection}{\numberline {3.4.3}解码及计算优化}{110}{subsection.3.4.3}%
 \defcounter {refsection}{0}\relax 
-\contentsline {subsection}{\numberline {3.4.4}训练}{112}{subsection.3.4.4}
+\contentsline {subsection}{\numberline {3.4.4}训练}{112}{subsection.3.4.4}%
 \defcounter {refsection}{0}\relax 
-\contentsline {subsubsection}{目标函数}{112}{section*.96}
+\contentsline {subsubsection}{目标函数}{112}{section*.96}%
 \defcounter {refsection}{0}\relax 
-\contentsline {subsubsection}{优化}{113}{section*.98}
+\contentsline {subsubsection}{优化}{113}{section*.98}%
 \defcounter {refsection}{0}\relax 
-\contentsline {section}{\numberline {3.5}IBM模型3-5及隐马尔可夫模型}{119}{section.3.5}
+\contentsline {section}{\numberline {3.5}IBM模型3-5及隐马尔可夫模型}{118}{section.3.5}%
 \defcounter {refsection}{0}\relax 
-\contentsline {subsection}{\numberline {3.5.1}基于产出率的翻译模型}{119}{subsection.3.5.1}
+\contentsline {subsection}{\numberline {3.5.1}基于产出率的翻译模型}{118}{subsection.3.5.1}%
 \defcounter {refsection}{0}\relax 
-\contentsline {subsection}{\numberline {3.5.2}IBM 模型3}{122}{subsection.3.5.2}
+\contentsline {subsection}{\numberline {3.5.2}IBM 模型3}{120}{subsection.3.5.2}%
 \defcounter {refsection}{0}\relax 
-\contentsline {subsection}{\numberline {3.5.3}IBM 模型4}{123}{subsection.3.5.3}
+\contentsline {subsection}{\numberline {3.5.3}IBM 模型4}{122}{subsection.3.5.3}%
 \defcounter {refsection}{0}\relax 
-\contentsline {subsection}{\numberline {3.5.4} IBM 模型5}{125}{subsection.3.5.4}
+\contentsline {subsection}{\numberline {3.5.4} IBM 模型5}{124}{subsection.3.5.4}%
 \defcounter {refsection}{0}\relax 
-\contentsline {subsection}{\numberline {3.5.5}隐马尔可夫模型}{126}{subsection.3.5.5}
+\contentsline {subsection}{\numberline {3.5.5}隐马尔可夫模型}{125}{subsection.3.5.5}%
 \defcounter {refsection}{0}\relax 
-\contentsline {subsubsection}{隐马尔可夫模型}{126}{section*.110}
+\contentsline {subsubsection}{隐马尔可夫模型}{125}{section*.110}%
 \defcounter {refsection}{0}\relax 
-\contentsline {subsubsection}{词对齐模型}{127}{section*.112}
+\contentsline {subsubsection}{词对齐模型}{126}{section*.112}%
 \defcounter {refsection}{0}\relax 
-\contentsline {subsection}{\numberline {3.5.6}解码和训练}{128}{subsection.3.5.6}
+\contentsline {subsection}{\numberline {3.5.6}解码和训练}{127}{subsection.3.5.6}%
 \defcounter {refsection}{0}\relax 
-\contentsline {section}{\numberline {3.6}问题分析}{129}{section.3.6}
+\contentsline {section}{\numberline {3.6}问题分析}{128}{section.3.6}%
 \defcounter {refsection}{0}\relax 
-\contentsline {subsection}{\numberline {3.6.1}词对齐及对称化}{129}{subsection.3.6.1}
+\contentsline {subsection}{\numberline {3.6.1}词对齐及对称化}{128}{subsection.3.6.1}%
 \defcounter {refsection}{0}\relax 
-\contentsline {subsection}{\numberline {3.6.2}Deficiency}{130}{subsection.3.6.2}
+\contentsline {subsection}{\numberline {3.6.2}Deficiency}{129}{subsection.3.6.2}%
 \defcounter {refsection}{0}\relax 
-\contentsline {subsection}{\numberline {3.6.3}句子长度}{131}{subsection.3.6.3}
+\contentsline {subsection}{\numberline {3.6.3}句子长度}{130}{subsection.3.6.3}%
 \defcounter {refsection}{0}\relax 
-\contentsline {subsection}{\numberline {3.6.4}其他问题}{131}{subsection.3.6.4}
+\contentsline {subsection}{\numberline {3.6.4}其他问题}{130}{subsection.3.6.4}%
 \defcounter {refsection}{0}\relax 
-\contentsline {section}{\numberline {3.7}小结及深入阅读}{132}{section.3.7}
+\contentsline {section}{\numberline {3.7}小结及深入阅读}{131}{section.3.7}%
 \defcounter {refsection}{0}\relax 
-\contentsline {chapter}{\numberline {4}基于短语和句法的机器翻译模型}{135}{chapter.4}
+\contentsline {chapter}{\numberline {4}基于短语和句法的机器翻译模型}{133}{chapter.4}%
 \defcounter {refsection}{0}\relax 
-\contentsline {section}{\numberline {4.1}翻译中的结构信息}{135}{section.4.1}
+\contentsline {section}{\numberline {4.1}翻译中的结构信息}{133}{section.4.1}%
 \defcounter {refsection}{0}\relax 
-\contentsline {subsection}{\numberline {4.1.1}更大粒度的翻译单元}{136}{subsection.4.1.1}
+\contentsline {subsection}{\numberline {4.1.1}更大粒度的翻译单元}{134}{subsection.4.1.1}%
 \defcounter {refsection}{0}\relax 
-\contentsline {subsection}{\numberline {4.1.2}句子的结构信息}{138}{subsection.4.1.2}
+\contentsline {subsection}{\numberline {4.1.2}句子的结构信息}{136}{subsection.4.1.2}%
 \defcounter {refsection}{0}\relax 
-\contentsline {section}{\numberline {4.2}基于短语的翻译模型}{140}{section.4.2}
+\contentsline {section}{\numberline {4.2}基于短语的翻译模型}{138}{section.4.2}%
 \defcounter {refsection}{0}\relax 
-\contentsline {subsection}{\numberline {4.2.1}机器翻译中的短语}{140}{subsection.4.2.1}
+\contentsline {subsection}{\numberline {4.2.1}机器翻译中的短语}{138}{subsection.4.2.1}%
 \defcounter {refsection}{0}\relax 
-\contentsline {subsection}{\numberline {4.2.2}数学建模及判别式模型}{143}{subsection.4.2.2}
+\contentsline {subsection}{\numberline {4.2.2}数学建模及判别式模型}{141}{subsection.4.2.2}%
 \defcounter {refsection}{0}\relax 
-\contentsline {subsubsection}{基于翻译推导的建模}{143}{section*.124}
+\contentsline {subsubsection}{基于翻译推导的建模}{141}{section*.124}%
 \defcounter {refsection}{0}\relax 
-\contentsline {subsubsection}{对数线性模型}{144}{section*.125}
+\contentsline {subsubsection}{对数线性模型}{142}{section*.125}%
 \defcounter {refsection}{0}\relax 
-\contentsline {subsubsection}{搭建模型的基本流程}{145}{section*.126}
+\contentsline {subsubsection}{搭建模型的基本流程}{143}{section*.126}%
 \defcounter {refsection}{0}\relax 
-\contentsline {subsection}{\numberline {4.2.3}短语抽取}{146}{subsection.4.2.3}
+\contentsline {subsection}{\numberline {4.2.3}短语抽取}{144}{subsection.4.2.3}%
 \defcounter {refsection}{0}\relax 
-\contentsline {subsubsection}{与词对齐一致的短语}{147}{section*.129}
+\contentsline {subsubsection}{与词对齐一致的短语}{145}{section*.129}%
 \defcounter {refsection}{0}\relax 
-\contentsline {subsubsection}{获取词对齐}{148}{section*.133}
+\contentsline {subsubsection}{获取词对齐}{146}{section*.133}%
 \defcounter {refsection}{0}\relax 
-\contentsline {subsubsection}{度量双语短语质量}{149}{section*.135}
+\contentsline {subsubsection}{度量双语短语质量}{147}{section*.135}%
 \defcounter {refsection}{0}\relax 
-\contentsline {subsection}{\numberline {4.2.4}调序}{150}{subsection.4.2.4}
+\contentsline {subsection}{\numberline {4.2.4}调序}{148}{subsection.4.2.4}%
 \defcounter {refsection}{0}\relax 
-\contentsline {subsubsection}{基于距离的调序}{151}{section*.139}
+\contentsline {subsubsection}{基于距离的调序}{149}{section*.139}%
 \defcounter {refsection}{0}\relax 
-\contentsline {subsubsection}{基于方向的调序}{151}{section*.141}
+\contentsline {subsubsection}{基于方向的调序}{149}{section*.141}%
 \defcounter {refsection}{0}\relax 
-\contentsline {subsubsection}{基于分类的调序}{152}{section*.144}
+\contentsline {subsubsection}{基于分类的调序}{151}{section*.144}%
 \defcounter {refsection}{0}\relax 
-\contentsline {subsection}{\numberline {4.2.5}特征}{153}{subsection.4.2.5}
+\contentsline {subsection}{\numberline {4.2.5}特征}{151}{subsection.4.2.5}%
 \defcounter {refsection}{0}\relax 
-\contentsline {subsection}{\numberline {4.2.6}最小错误率训练}{154}{subsection.4.2.6}
+\contentsline {subsection}{\numberline {4.2.6}最小错误率训练}{152}{subsection.4.2.6}%
 \defcounter {refsection}{0}\relax 
-\contentsline {subsection}{\numberline {4.2.7}栈解码}{157}{subsection.4.2.7}
+\contentsline {subsection}{\numberline {4.2.7}栈解码}{155}{subsection.4.2.7}%
 \defcounter {refsection}{0}\relax 
-\contentsline {subsubsection}{翻译候选匹配}{158}{section*.149}
+\contentsline {subsubsection}{翻译候选匹配}{156}{section*.149}%
 \defcounter {refsection}{0}\relax 
-\contentsline {subsubsection}{翻译假设扩展}{159}{section*.151}
+\contentsline {subsubsection}{翻译假设扩展}{157}{section*.151}%
 \defcounter {refsection}{0}\relax 
-\contentsline {subsubsection}{剪枝}{160}{section*.153}
+\contentsline {subsubsection}{剪枝}{158}{section*.153}%
 \defcounter {refsection}{0}\relax 
-\contentsline {subsubsection}{解码中的栈结构}{161}{section*.155}
+\contentsline {subsubsection}{解码中的栈结构}{159}{section*.155}%
 \defcounter {refsection}{0}\relax 
-\contentsline {section}{\numberline {4.3}基于层次短语的模型}{162}{section.4.3}
+\contentsline {section}{\numberline {4.3}基于层次短语的模型}{160}{section.4.3}%
 \defcounter {refsection}{0}\relax 
-\contentsline {subsection}{\numberline {4.3.1}同步上下文无关文法}{164}{subsection.4.3.1}
+\contentsline {subsection}{\numberline {4.3.1}同步上下文无关文法}{163}{subsection.4.3.1}%
 \defcounter {refsection}{0}\relax 
-\contentsline {subsubsection}{文法定义}{165}{section*.160}
+\contentsline {subsubsection}{文法定义}{163}{section*.160}%
 \defcounter {refsection}{0}\relax 
-\contentsline {subsubsection}{推导}{166}{section*.161}
+\contentsline {subsubsection}{推导}{164}{section*.161}%
 \defcounter {refsection}{0}\relax 
-\contentsline {subsubsection}{胶水规则}{167}{section*.162}
+\contentsline {subsubsection}{胶水规则}{165}{section*.162}%
 \defcounter {refsection}{0}\relax 
-\contentsline {subsubsection}{处理流程}{168}{section*.163}
+\contentsline {subsubsection}{处理流程}{166}{section*.163}%
 \defcounter {refsection}{0}\relax 
-\contentsline {subsection}{\numberline {4.3.2}层次短语规则抽取}{168}{subsection.4.3.2}
+\contentsline {subsection}{\numberline {4.3.2}层次短语规则抽取}{166}{subsection.4.3.2}%
 \defcounter {refsection}{0}\relax 
-\contentsline {subsection}{\numberline {4.3.3}翻译模型及特征}{170}{subsection.4.3.3}
+\contentsline {subsection}{\numberline {4.3.3}翻译模型及特征}{168}{subsection.4.3.3}%
 \defcounter {refsection}{0}\relax 
-\contentsline {subsection}{\numberline {4.3.4}CKY解码}{171}{subsection.4.3.4}
+\contentsline {subsection}{\numberline {4.3.4}CKY解码}{169}{subsection.4.3.4}%
 \defcounter {refsection}{0}\relax 
-\contentsline {subsection}{\numberline {4.3.5}立方剪枝}{174}{subsection.4.3.5}
+\contentsline {subsection}{\numberline {4.3.5}立方剪枝}{172}{subsection.4.3.5}%
 \defcounter {refsection}{0}\relax 
-\contentsline {section}{\numberline {4.4}基于语言学句法的模型}{177}{section.4.4}
+\contentsline {section}{\numberline {4.4}基于语言学句法的模型}{175}{section.4.4}%
 \defcounter {refsection}{0}\relax 
-\contentsline {subsection}{\numberline {4.4.1}基于句法的翻译模型分类}{179}{subsection.4.4.1}
+\contentsline {subsection}{\numberline {4.4.1}基于句法的翻译模型分类}{177}{subsection.4.4.1}%
 \defcounter {refsection}{0}\relax 
-\contentsline {subsection}{\numberline {4.4.2}基于树结构的文法}{179}{subsection.4.4.2}
+\contentsline {subsection}{\numberline {4.4.2}基于树结构的文法}{179}{subsection.4.4.2}%
 \defcounter {refsection}{0}\relax 
-\contentsline {subsubsection}{树到树翻译规则}{182}{section*.179}
+\contentsline {subsubsection}{树到树翻译规则}{180}{section*.180}%
 \defcounter {refsection}{0}\relax 
-\contentsline {subsubsection}{基于树结构的翻译推导}{183}{section*.181}
+\contentsline {subsubsection}{基于树结构的翻译推导}{181}{section*.182}%
 \defcounter {refsection}{0}\relax 
-\contentsline {subsubsection}{树到串翻译规则}{185}{section*.184}
+\contentsline {subsubsection}{树到串翻译规则}{183}{section*.185}%
 \defcounter {refsection}{0}\relax 
-\contentsline {subsection}{\numberline {4.4.3}树到串翻译规则抽取}{186}{subsection.4.4.3}
+\contentsline {subsection}{\numberline {4.4.3}树到串翻译规则抽取}{184}{subsection.4.4.3}%
 \defcounter {refsection}{0}\relax 
-\contentsline {subsubsection}{树的切割与最小规则}{186}{section*.186}
+\contentsline {subsubsection}{树的切割与最小规则}{184}{section*.187}%
 \defcounter {refsection}{0}\relax 
-\contentsline {subsubsection}{空对齐处理}{190}{section*.192}
+\contentsline {subsubsection}{空对齐处理}{188}{section*.193}%
 \defcounter {refsection}{0}\relax 
-\contentsline {subsubsection}{组合规则}{191}{section*.194}
+\contentsline {subsubsection}{组合规则}{189}{section*.195}%
 \defcounter {refsection}{0}\relax 
-\contentsline {subsubsection}{SPMT规则}{191}{section*.196}
+\contentsline {subsubsection}{SPMT规则}{190}{section*.197}%
 \defcounter {refsection}{0}\relax 
-\contentsline {subsubsection}{句法树二叉化}{192}{section*.198}
+\contentsline {subsubsection}{句法树二叉化}{191}{section*.199}%
 \defcounter {refsection}{0}\relax 
-\contentsline {subsection}{\numberline {4.4.4}树到树翻译规则抽取}{194}{subsection.4.4.4}
+\contentsline {subsection}{\numberline {4.4.4}树到树翻译规则抽取}{192}{subsection.4.4.4}%
 \defcounter {refsection}{0}\relax 
-\contentsline {subsubsection}{基于节点对齐的规则抽取}{195}{section*.202}
+\contentsline {subsubsection}{基于节点对齐的规则抽取}{192}{section*.203}%
 \defcounter {refsection}{0}\relax 
-\contentsline {subsubsection}{基于对齐矩阵的规则抽取}{195}{section*.205}
+\contentsline {subsubsection}{基于对齐矩阵的规则抽取}{194}{section*.206}%
 \defcounter {refsection}{0}\relax 
-\contentsline {subsection}{\numberline {4.4.5}句法翻译模型的特征}{196}{subsection.4.4.5}
+\contentsline {subsection}{\numberline {4.4.5}句法翻译模型的特征}{195}{subsection.4.4.5}%
 \defcounter {refsection}{0}\relax 
-\contentsline {subsection}{\numberline {4.4.6}基于超图的推导空间表示}{199}{subsection.4.4.6}
+\contentsline {subsection}{\numberline {4.4.6}基于超图的推导空间表示}{196}{subsection.4.4.6}%
 \defcounter {refsection}{0}\relax 
-\contentsline {subsection}{\numberline {4.4.7}基于树的解码 vs 基于串的解码}{201}{subsection.4.4.7}
+\contentsline {subsection}{\numberline {4.4.7}基于树的解码 vs 基于串的解码}{199}{subsection.4.4.7}%
 \defcounter {refsection}{0}\relax 
-\contentsline {subsubsection}{基于树的解码}{202}{section*.213}
+\contentsline {subsubsection}{基于树的解码}{200}{section*.214}%
 \defcounter {refsection}{0}\relax 
-\contentsline {subsubsection}{基于串的解码}{204}{section*.216}
+\contentsline {subsubsection}{基于串的解码}{201}{section*.217}%
 \defcounter {refsection}{0}\relax 
-\contentsline {section}{\numberline {4.5}小结及深入阅读}{206}{section.4.5}
+\contentsline {section}{\numberline {4.5}小结及深入阅读}{203}{section.4.5}%
 \defcounter {refsection}{0}\relax 
-\contentsline {part}{\@mypartnumtocformat {III}{神经机器翻译}}{209}{part.3}
+\contentsline {part}{\@mypartnumtocformat {III}{神经机器翻译}}{205}{part.3}%
 \ttl@stoptoc {default@2}
 \ttl@starttoc {default@3}
 \defcounter {refsection}{0}\relax 
-\contentsline {chapter}{\numberline {5}人工神经网络和神经语言建模}{211}{chapter.5}
+\contentsline {chapter}{\numberline {5}人工神经网络和神经语言建模}{207}{chapter.5}%
 \defcounter {refsection}{0}\relax 
-\contentsline {section}{\numberline {5.1}深度学习与人工神经网络}{212}{section.5.1}
+\contentsline {section}{\numberline {5.1}深度学习与人工神经网络}{208}{section.5.1}%
 \defcounter {refsection}{0}\relax 
-\contentsline {subsection}{\numberline {5.1.1}发展简史}{212}{subsection.5.1.1}
+\contentsline {subsection}{\numberline {5.1.1}发展简史}{208}{subsection.5.1.1}%
 \defcounter {refsection}{0}\relax 
-\contentsline {subsubsection}{早期的人工神经网络和第一次寒冬}{212}{section*.218}
+\contentsline {subsubsection}{早期的人工神经网络和第一次寒冬}{208}{section*.219}%
 \defcounter {refsection}{0}\relax 
-\contentsline {subsubsection}{神经网络的第二次高潮和第二次寒冬}{213}{section*.219}
+\contentsline {subsubsection}{神经网络的第二次高潮和第二次寒冬}{209}{section*.220}%
 \defcounter {refsection}{0}\relax 
-\contentsline {subsubsection}{深度学习和神经网络方法的崛起}{214}{section*.220}
+\contentsline {subsubsection}{深度学习和神经网络方法的崛起}{210}{section*.221}%
 \defcounter {refsection}{0}\relax 
-\contentsline {subsection}{\numberline {5.1.2}为什么需要深度学习}{215}{subsection.5.1.2}
+\contentsline {subsection}{\numberline {5.1.2}为什么需要深度学习}{211}{subsection.5.1.2}%
 \defcounter {refsection}{0}\relax 
-\contentsline {subsubsection}{端到端学习和表示学习}{215}{section*.222}
+\contentsline {subsubsection}{端到端学习和表示学习}{211}{section*.223}%
 \defcounter {refsection}{0}\relax 
-\contentsline {subsubsection}{深度学习的效果}{216}{section*.224}
+\contentsline {subsubsection}{深度学习的效果}{212}{section*.225}%
 \defcounter {refsection}{0}\relax 
-\contentsline {section}{\numberline {5.2}神经网络基础}{216}{section.5.2}
+\contentsline {section}{\numberline {5.2}神经网络基础}{212}{section.5.2}%
 \defcounter {refsection}{0}\relax 
-\contentsline {subsection}{\numberline {5.2.1}线性代数基础}{216}{subsection.5.2.1}
+\contentsline {subsection}{\numberline {5.2.1}线性代数基础}{212}{subsection.5.2.1}%
 \defcounter {refsection}{0}\relax 
-\contentsline {subsubsection}{标量、向量和矩阵}{217}{section*.226}
+\contentsline {subsubsection}{标量、向量和矩阵}{213}{section*.227}%
 \defcounter {refsection}{0}\relax 
-\contentsline {subsubsection}{矩阵的转置}{218}{section*.227}
+\contentsline {subsubsection}{矩阵的转置}{214}{section*.228}%
 \defcounter {refsection}{0}\relax 
-\contentsline {subsubsection}{矩阵加法和数乘}{218}{section*.228}
+\contentsline {subsubsection}{矩阵加法和数乘}{214}{section*.229}%
 \defcounter {refsection}{0}\relax 
-\contentsline {subsubsection}{矩阵乘法和矩阵点乘}{219}{section*.229}
+\contentsline {subsubsection}{矩阵乘法和矩阵点乘}{215}{section*.230}%
 \defcounter {refsection}{0}\relax 
-\contentsline {subsubsection}{线性映射}{220}{section*.230}
+\contentsline {subsubsection}{线性映射}{216}{section*.231}%
 \defcounter {refsection}{0}\relax 
-\contentsline {subsubsection}{范数}{221}{section*.231}
+\contentsline {subsubsection}{范数}{217}{section*.232}%
 \defcounter {refsection}{0}\relax 
-\contentsline {subsection}{\numberline {5.2.2}人工神经元和感知机}{222}{subsection.5.2.2}
+\contentsline {subsection}{\numberline {5.2.2}人工神经元和感知机}{218}{subsection.5.2.2}%
 \defcounter {refsection}{0}\relax 
-\contentsline {subsubsection}{感知机\ \raisebox {0.5mm}{------}\ 最简单的人工神经元模型}{223}{section*.234}
+\contentsline {subsubsection}{感知机\ \raisebox {0.5mm}{------}\ 最简单的人工神经元模型}{219}{section*.235}%
 \defcounter {refsection}{0}\relax 
-\contentsline {subsubsection}{神经元内部权重}{224}{section*.237}
+\contentsline {subsubsection}{神经元内部权重}{220}{section*.238}%
 \defcounter {refsection}{0}\relax 
-\contentsline {subsubsection}{神经元的输入\ \raisebox {0.5mm}{------}\ 离散 vs 连续}{225}{section*.239}
+\contentsline {subsubsection}{神经元的输入\ \raisebox {0.5mm}{------}\ 离散 vs 连续}{221}{section*.240}%
 \defcounter {refsection}{0}\relax 
-\contentsline {subsubsection}{神经元内部的参数学习}{225}{section*.241}
+\contentsline {subsubsection}{神经元内部的参数学习}{221}{section*.242}%
 \defcounter {refsection}{0}\relax 
-\contentsline {subsection}{\numberline {5.2.3}多层神经网络}{226}{subsection.5.2.3}
+\contentsline {subsection}{\numberline {5.2.3}多层神经网络}{222}{subsection.5.2.3}%
 \defcounter {refsection}{0}\relax 
-\contentsline {subsubsection}{线性变换和激活函数}{226}{section*.243}
+\contentsline {subsubsection}{线性变换和激活函数}{222}{section*.244}%
 \defcounter {refsection}{0}\relax 
-\contentsline {subsubsection}{单层神经网络$\rightarrow $多层神经网络}{229}{section*.250}
+\contentsline {subsubsection}{单层神经网络$\rightarrow $多层神经网络}{225}{section*.251}%
 \defcounter {refsection}{0}\relax 
-\contentsline {subsection}{\numberline {5.2.4}函数拟合能力}{230}{subsection.5.2.4}
+\contentsline {subsection}{\numberline {5.2.4}函数拟合能力}{226}{subsection.5.2.4}%
 \defcounter {refsection}{0}\relax 
-\contentsline {section}{\numberline {5.3}神经网络的张量实现}{233}{section.5.3}
+\contentsline {section}{\numberline {5.3}神经网络的张量实现}{229}{section.5.3}%
 \defcounter {refsection}{0}\relax 
-\contentsline {subsection}{\numberline {5.3.1} 张量及其计算}{233}{subsection.5.3.1}
+\contentsline {subsection}{\numberline {5.3.1} 张量及其计算}{229}{subsection.5.3.1}%
 \defcounter {refsection}{0}\relax 
-\contentsline {subsubsection}{\ 张量}{233}{section*.259}
+\contentsline {subsubsection}{\ 张量}{229}{section*.260}%
 \defcounter {refsection}{0}\relax 
-\contentsline {subsubsection}{张量的矩阵乘法}{235}{section*.262}
+\contentsline {subsubsection}{张量的矩阵乘法}{231}{section*.263}%
 \defcounter {refsection}{0}\relax 
-\contentsline {subsubsection}{张量的单元操作}{236}{section*.264}
+\contentsline {subsubsection}{张量的单元操作}{232}{section*.265}%
 \defcounter {refsection}{0}\relax 
-\contentsline {subsection}{\numberline {5.3.2}张量的物理存储形式}{237}{subsection.5.3.2}
+\contentsline {subsection}{\numberline {5.3.2}张量的物理存储形式}{233}{subsection.5.3.2}%
 \defcounter {refsection}{0}\relax 
-\contentsline {subsection}{\numberline {5.3.3}使用开源框架实现张量计算}{238}{subsection.5.3.3}
+\contentsline {subsection}{\numberline {5.3.3}使用开源框架实现张量计算}{234}{subsection.5.3.3}%
 \defcounter {refsection}{0}\relax 
-\contentsline {subsection}{\numberline {5.3.4}前向传播与计算图}{241}{subsection.5.3.4}
+\contentsline {subsection}{\numberline {5.3.4}前向传播与计算图}{237}{subsection.5.3.4}%
 \defcounter {refsection}{0}\relax 
-\contentsline {subsection}{\numberline {5.3.5}神经网络实例}{242}{subsection.5.3.5}
+\contentsline {subsection}{\numberline {5.3.5}神经网络实例}{238}{subsection.5.3.5}%
 \defcounter {refsection}{0}\relax 
-\contentsline {section}{\numberline {5.4}神经网络的参数训练}{243}{section.5.4}
+\contentsline {section}{\numberline {5.4}神经网络的参数训练}{239}{section.5.4}%
 \defcounter {refsection}{0}\relax 
-\contentsline {subsection}{\numberline {5.4.1}损失函数}{244}{subsection.5.4.1}
+\contentsline {subsection}{\numberline {5.4.1}损失函数}{240}{subsection.5.4.1}%
 \defcounter {refsection}{0}\relax 
-\contentsline {subsection}{\numberline {5.4.2}基于梯度的参数优化}{245}{subsection.5.4.2}
+\contentsline {subsection}{\numberline {5.4.2}基于梯度的参数优化}{241}{subsection.5.4.2}%
 \defcounter {refsection}{0}\relax 
-\contentsline {subsubsection}{梯度下降}{246}{section*.278}
+\contentsline {subsubsection}{梯度下降}{242}{section*.279}%
 \defcounter {refsection}{0}\relax 
-\contentsline {subsubsection}{梯度获取}{248}{section*.280}
+\contentsline {subsubsection}{梯度获取}{244}{section*.281}%
 \defcounter {refsection}{0}\relax 
-\contentsline {subsubsection}{基于梯度的方法的变种和改进}{250}{section*.284}
+\contentsline {subsubsection}{基于梯度的方法的变种和改进}{246}{section*.285}%
 \defcounter {refsection}{0}\relax 
-\contentsline {subsection}{\numberline {5.4.3}参数更新的并行化策略}{253}{subsection.5.4.3}
+\contentsline {subsection}{\numberline {5.4.3}参数更新的并行化策略}{249}{subsection.5.4.3}%
 \defcounter {refsection}{0}\relax 
-\contentsline {subsection}{\numberline {5.4.4}梯度消失、梯度爆炸和稳定性训练}{255}{subsection.5.4.4}
+\contentsline {subsection}{\numberline {5.4.4}梯度消失、梯度爆炸和稳定性训练}{251}{subsection.5.4.4}%
 \defcounter {refsection}{0}\relax 
-\contentsline {subsubsection}{易于优化的激活函数}{255}{section*.287}
+\contentsline {subsubsection}{易于优化的激活函数}{251}{section*.288}%
 \defcounter {refsection}{0}\relax 
-\contentsline {subsubsection}{梯度裁剪}{256}{section*.291}
+\contentsline {subsubsection}{梯度裁剪}{252}{section*.292}%
 \defcounter {refsection}{0}\relax 
-\contentsline {subsubsection}{稳定性训练}{257}{section*.292}
+\contentsline {subsubsection}{稳定性训练}{253}{section*.293}%
 \defcounter {refsection}{0}\relax 
-\contentsline {subsection}{\numberline {5.4.5}过拟合}{258}{subsection.5.4.5}
+\contentsline {subsection}{\numberline {5.4.5}过拟合}{254}{subsection.5.4.5}%
 \defcounter {refsection}{0}\relax 
-\contentsline {subsection}{\numberline {5.4.6}反向传播}{259}{subsection.5.4.6}
+\contentsline {subsection}{\numberline {5.4.6}反向传播}{255}{subsection.5.4.6}%
 \defcounter {refsection}{0}\relax 
-\contentsline {subsubsection}{输出层的反向传播}{260}{section*.295}
+\contentsline {subsubsection}{输出层的反向传播}{256}{section*.296}%
 \defcounter {refsection}{0}\relax 
-\contentsline {subsubsection}{隐藏层的反向传播}{262}{section*.299}
+\contentsline {subsubsection}{隐藏层的反向传播}{258}{section*.300}%
 \defcounter {refsection}{0}\relax 
-\contentsline {subsubsection}{程序实现}{264}{section*.302}
+\contentsline {subsubsection}{程序实现}{260}{section*.303}%
 \defcounter {refsection}{0}\relax 
-\contentsline {section}{\numberline {5.5}神经语言模型}{265}{section.5.5}
+\contentsline {section}{\numberline {5.5}神经语言模型}{261}{section.5.5}%
 \defcounter {refsection}{0}\relax 
-\contentsline {subsection}{\numberline {5.5.1}基于神经网络的语言建模}{265}{subsection.5.5.1}
+\contentsline {subsection}{\numberline {5.5.1}基于神经网络的语言建模}{261}{subsection.5.5.1}%
 \defcounter {refsection}{0}\relax 
-\contentsline {subsubsection}{基于前馈神经网络的语言模型}{266}{section*.305}
+\contentsline {subsubsection}{基于前馈神经网络的语言模型}{262}{section*.306}%
 \defcounter {refsection}{0}\relax 
-\contentsline {subsubsection}{基于循环神经网络的语言模型}{268}{section*.308}
+\contentsline {subsubsection}{基于循环神经网络的语言模型}{264}{section*.309}%
 \defcounter {refsection}{0}\relax 
-\contentsline {subsubsection}{基于自注意力机制的语言模型}{269}{section*.310}
+\contentsline {subsubsection}{基于自注意力机制的语言模型}{265}{section*.311}%
 \defcounter {refsection}{0}\relax 
-\contentsline {subsubsection}{语言模型的评价}{271}{section*.312}
+\contentsline {subsubsection}{语言模型的评价}{267}{section*.313}%
 \defcounter {refsection}{0}\relax 
-\contentsline {subsection}{\numberline {5.5.2}单词表示模型}{271}{subsection.5.5.2}
+\contentsline {subsection}{\numberline {5.5.2}单词表示模型}{267}{subsection.5.5.2}%
 \defcounter {refsection}{0}\relax 
-\contentsline {subsubsection}{One-hot编码}{271}{section*.313}
+\contentsline {subsubsection}{One-hot编码}{267}{section*.314}%
 \defcounter {refsection}{0}\relax 
-\contentsline {subsubsection}{分布式表示}{272}{section*.315}
+\contentsline {subsubsection}{分布式表示}{268}{section*.316}%
 \defcounter {refsection}{0}\relax 
-\contentsline {subsection}{\numberline {5.5.3}句子表示模型及预训练}{273}{subsection.5.5.3}
+\contentsline {subsection}{\numberline {5.5.3}句子表示模型及预训练}{269}{subsection.5.5.3}%
 \defcounter {refsection}{0}\relax 
-\contentsline {subsubsection}{简单的上下文表示模型}{274}{section*.319}
+\contentsline {subsubsection}{简单的上下文表示模型}{270}{section*.320}%
 \defcounter {refsection}{0}\relax 
-\contentsline {subsubsection}{ELMO模型}{275}{section*.322}
+\contentsline {subsubsection}{ELMO模型}{271}{section*.323}%
 \defcounter {refsection}{0}\relax 
-\contentsline {subsubsection}{GPT模型}{275}{section*.324}
+\contentsline {subsubsection}{GPT模型}{271}{section*.325}%
 \defcounter {refsection}{0}\relax 
-\contentsline {subsubsection}{BERT模型}{276}{section*.326}
+\contentsline {subsubsection}{BERT模型}{272}{section*.327}%
 \defcounter {refsection}{0}\relax 
-\contentsline {subsubsection}{为什么要预训练？}{277}{section*.328}
+\contentsline {subsubsection}{为什么要预训练？}{273}{section*.329}%
 \defcounter {refsection}{0}\relax 
-\contentsline {section}{\numberline {5.6}小结及深入阅读}{278}{section.5.6}
+\contentsline {section}{\numberline {5.6}小结及深入阅读}{274}{section.5.6}%
 \defcounter {refsection}{0}\relax 
-\contentsline {chapter}{\numberline {6}神经机器翻译模型}{281}{chapter.6}
+\contentsline {chapter}{\numberline {6}神经机器翻译模型}{275}{chapter.6}%
 \defcounter {refsection}{0}\relax 
-\contentsline {section}{\numberline {6.1}神经机器翻译的发展简史}{281}{section.6.1}
+\contentsline {section}{\numberline {6.1}神经机器翻译的发展简史}{275}{section.6.1}%
 \defcounter {refsection}{0}\relax 
-\contentsline {subsection}{\numberline {6.1.1}神经机器翻译的起源}{283}{subsection.6.1.1}
+\contentsline {subsection}{\numberline {6.1.1}神经机器翻译的起源}{277}{subsection.6.1.1}%
 \defcounter {refsection}{0}\relax 
-\contentsline {subsection}{\numberline {6.1.2}神经机器翻译的品质 }{285}{subsection.6.1.2}
+\contentsline {subsection}{\numberline {6.1.2}神经机器翻译的品质 }{279}{subsection.6.1.2}%
 \defcounter {refsection}{0}\relax 
-\contentsline {subsection}{\numberline {6.1.3}神经机器翻译的优势 }{288}{subsection.6.1.3}
+\contentsline {subsection}{\numberline {6.1.3}神经机器翻译的优势 }{282}{subsection.6.1.3}%
 \defcounter {refsection}{0}\relax 
-\contentsline {section}{\numberline {6.2}编码器-解码器框架}{290}{section.6.2}
+\contentsline {section}{\numberline {6.2}编码器-解码器框架}{283}{section.6.2}%
 \defcounter {refsection}{0}\relax 
-\contentsline {subsection}{\numberline {6.2.1}框架结构}{290}{subsection.6.2.1}
+\contentsline {subsection}{\numberline {6.2.1}框架结构}{284}{subsection.6.2.1}%
 \defcounter {refsection}{0}\relax 
-\contentsline {subsection}{\numberline {6.2.2}表示学习}{291}{subsection.6.2.2}
+\contentsline {subsection}{\numberline {6.2.2}表示学习}{285}{subsection.6.2.2}%
 \defcounter {refsection}{0}\relax 
-\contentsline {subsection}{\numberline {6.2.3}简单的运行实例}{292}{subsection.6.2.3}
+\contentsline {subsection}{\numberline {6.2.3}简单的运行实例}{286}{subsection.6.2.3}%
 \defcounter {refsection}{0}\relax 
-\contentsline {subsection}{\numberline {6.2.4}机器翻译范式的对比}{293}{subsection.6.2.4}
+\contentsline {subsection}{\numberline {6.2.4}机器翻译范式的对比}{287}{subsection.6.2.4}%
 \defcounter {refsection}{0}\relax 
-\contentsline {section}{\numberline {6.3}基于循环神经网络的翻译模型及注意力机制}{294}{section.6.3}
+\contentsline {section}{\numberline {6.3}基于循环神经网络的翻译模型及注意力机制}{288}{section.6.3}%
 \defcounter {refsection}{0}\relax 
-\contentsline {subsection}{\numberline {6.3.1}建模}{295}{subsection.6.3.1}
+\contentsline {subsection}{\numberline {6.3.1}建模}{289}{subsection.6.3.1}%
 \defcounter {refsection}{0}\relax 
-\contentsline {subsection}{\numberline {6.3.2}输入（词嵌入）及输出（Softmax）}{298}{subsection.6.3.2}
+\contentsline {subsection}{\numberline {6.3.2}输入（词嵌入）及输出（Softmax）}{292}{subsection.6.3.2}%
 \defcounter {refsection}{0}\relax 
-\contentsline {subsection}{\numberline {6.3.3}循环神经网络结构}{301}{subsection.6.3.3}
+\contentsline {subsection}{\numberline {6.3.3}循环神经网络结构}{295}{subsection.6.3.3}%
 \defcounter {refsection}{0}\relax 
-\contentsline {subsubsection}{循环神经单元（RNN）}{301}{section*.351}
+\contentsline {subsubsection}{循环神经单元（RNN）}{295}{section*.352}%
 \defcounter {refsection}{0}\relax 
-\contentsline {subsubsection}{长短时记忆网络（LSTM）}{302}{section*.352}
+\contentsline {subsubsection}{长短时记忆网络（LSTM）}{295}{section*.353}%
 \defcounter {refsection}{0}\relax 
-\contentsline {subsubsection}{门控循环单元（GRU）}{304}{section*.355}
+\contentsline {subsubsection}{门控循环单元（GRU）}{298}{section*.356}%
 \defcounter {refsection}{0}\relax 
-\contentsline {subsubsection}{双向模型}{305}{section*.357}
+\contentsline {subsubsection}{双向模型}{299}{section*.358}%
 \defcounter {refsection}{0}\relax 
-\contentsline {subsubsection}{多层循环神经网络}{306}{section*.359}
+\contentsline {subsubsection}{多层循环神经网络}{299}{section*.360}%
 \defcounter {refsection}{0}\relax 
-\contentsline {subsection}{\numberline {6.3.4}注意力机制}{306}{subsection.6.3.4}
+\contentsline {subsection}{\numberline {6.3.4}注意力机制}{300}{subsection.6.3.4}%
 \defcounter {refsection}{0}\relax 
-\contentsline {subsubsection}{翻译中的注意力机制}{307}{section*.362}
+\contentsline {subsubsection}{翻译中的注意力机制}{301}{section*.363}%
 \defcounter {refsection}{0}\relax 
-\contentsline {subsubsection}{上下文向量的计算}{309}{section*.365}
+\contentsline {subsubsection}{上下文向量的计算}{302}{section*.366}%
 \defcounter {refsection}{0}\relax 
-\contentsline {subsubsection}{注意力机制的解读}{312}{section*.370}
+\contentsline {subsubsection}{注意力机制的解读}{305}{section*.371}%
 \defcounter {refsection}{0}\relax 
-\contentsline {subsection}{\numberline {6.3.5}训练}{313}{subsection.6.3.5}
+\contentsline {subsection}{\numberline {6.3.5}训练}{307}{subsection.6.3.5}%
 \defcounter {refsection}{0}\relax 
-\contentsline {subsubsection}{损失函数}{314}{section*.373}
+\contentsline {subsubsection}{损失函数}{307}{section*.374}%
 \defcounter {refsection}{0}\relax 
-\contentsline {subsubsection}{参数初始化}{314}{section*.374}
+\contentsline {subsubsection}{参数初始化}{308}{section*.375}%
 \defcounter {refsection}{0}\relax 
-\contentsline {subsubsection}{优化策略}{315}{section*.375}
+\contentsline {subsubsection}{优化策略}{309}{section*.376}%
 \defcounter {refsection}{0}\relax 
-\contentsline {subsubsection}{梯度裁剪}{315}{section*.377}
+\contentsline {subsubsection}{梯度裁剪}{309}{section*.378}%
 \defcounter {refsection}{0}\relax 
-\contentsline {subsubsection}{学习率策略}{316}{section*.378}
+\contentsline {subsubsection}{学习率策略}{309}{section*.379}%
 \defcounter {refsection}{0}\relax 
-\contentsline {subsubsection}{并行训练}{317}{section*.381}
+\contentsline {subsubsection}{并行训练}{311}{section*.382}%
 \defcounter {refsection}{0}\relax 
-\contentsline {subsection}{\numberline {6.3.6}推断}{320}{subsection.6.3.6}
+\contentsline {subsection}{\numberline {6.3.6}推断}{313}{subsection.6.3.6}%
 \defcounter {refsection}{0}\relax 
-\contentsline {subsubsection}{贪婪搜索}{321}{section*.386}
+\contentsline {subsubsection}{贪婪搜索}{314}{section*.387}%
 \defcounter {refsection}{0}\relax 
-\contentsline {subsubsection}{束搜索}{321}{section*.389}
+\contentsline {subsubsection}{束搜索}{315}{section*.390}%
 \defcounter {refsection}{0}\relax 
-\contentsline {subsubsection}{长度惩罚}{323}{section*.391}
+\contentsline {subsubsection}{长度惩罚}{316}{section*.392}%
 \defcounter {refsection}{0}\relax 
-\contentsline {subsection}{\numberline {6.3.7}实例-GNMT}{324}{subsection.6.3.7}
+\contentsline {subsection}{\numberline {6.3.7}实例-GNMT}{317}{subsection.6.3.7}%
 \defcounter {refsection}{0}\relax 
-\contentsline {section}{\numberline {6.4}Transformer}{324}{section.6.4}
+\contentsline {section}{\numberline {6.4}Transformer}{318}{section.6.4}%
 \defcounter {refsection}{0}\relax 
-\contentsline {subsection}{\numberline {6.4.1}自注意力模型}{326}{subsection.6.4.1}
+\contentsline {subsection}{\numberline {6.4.1}自注意力模型}{319}{subsection.6.4.1}%
 \defcounter {refsection}{0}\relax 
-\contentsline {subsection}{\numberline {6.4.2}Transformer架构}{328}{subsection.6.4.2}
+\contentsline {subsection}{\numberline {6.4.2}Transformer架构}{321}{subsection.6.4.2}%
 \defcounter {refsection}{0}\relax 
-\contentsline {subsection}{\numberline {6.4.3}位置编码}{330}{subsection.6.4.3}
+\contentsline {subsection}{\numberline {6.4.3}位置编码}{323}{subsection.6.4.3}%
 \defcounter {refsection}{0}\relax 
-\contentsline {subsection}{\numberline {6.4.4}基于点乘的注意力机制}{332}{subsection.6.4.4}
+\contentsline {subsection}{\numberline {6.4.4}基于点乘的注意力机制}{325}{subsection.6.4.4}%
 \defcounter {refsection}{0}\relax 
-\contentsline {subsection}{\numberline {6.4.5}掩码操作}{334}{subsection.6.4.5}
+\contentsline {subsection}{\numberline {6.4.5}掩码操作}{327}{subsection.6.4.5}%
 \defcounter {refsection}{0}\relax 
-\contentsline {subsection}{\numberline {6.4.6}多头注意力}{335}{subsection.6.4.6}
+\contentsline {subsection}{\numberline {6.4.6}多头注意力}{328}{subsection.6.4.6}%
 \defcounter {refsection}{0}\relax 
-\contentsline {subsection}{\numberline {6.4.7}残差网络和层正则化}{336}{subsection.6.4.7}
+\contentsline {subsection}{\numberline {6.4.7}残差网络和层正则化}{329}{subsection.6.4.7}%
 \defcounter {refsection}{0}\relax 
-\contentsline {subsection}{\numberline {6.4.8}前馈全连接网络子层}{338}{subsection.6.4.8}
+\contentsline {subsection}{\numberline {6.4.8}前馈全连接网络子层}{331}{subsection.6.4.8}%
 \defcounter {refsection}{0}\relax 
-\contentsline {subsection}{\numberline {6.4.9}训练}{339}{subsection.6.4.9}
+\contentsline {subsection}{\numberline {6.4.9}训练}{332}{subsection.6.4.9}%
 \defcounter {refsection}{0}\relax 
-\contentsline {subsection}{\numberline {6.4.10}推断}{341}{subsection.6.4.10}
+\contentsline {subsection}{\numberline {6.4.10}推断}{334}{subsection.6.4.10}%
 \defcounter {refsection}{0}\relax 
-\contentsline {section}{\numberline {6.5}序列到序列问题及应用}{342}{section.6.5}
+\contentsline {section}{\numberline {6.5}序列到序列问题及应用}{335}{section.6.5}%
 \defcounter {refsection}{0}\relax 
-\contentsline {subsection}{\numberline {6.5.1}自动问答}{342}{subsection.6.5.1}
+\contentsline {subsection}{\numberline {6.5.1}自动问答}{335}{subsection.6.5.1}%
 \defcounter {refsection}{0}\relax 
-\contentsline {subsection}{\numberline {6.5.2}自动文摘}{343}{subsection.6.5.2}
+\contentsline {subsection}{\numberline {6.5.2}自动文摘}{335}{subsection.6.5.2}%
 \defcounter {refsection}{0}\relax 
-\contentsline {subsection}{\numberline {6.5.3}文言文翻译}{343}{subsection.6.5.3}
+\contentsline {subsection}{\numberline {6.5.3}文言文翻译}{336}{subsection.6.5.3}%
 \defcounter {refsection}{0}\relax 
-\contentsline {subsection}{\numberline {6.5.4}对联生成}{344}{subsection.6.5.4}
+\contentsline {subsection}{\numberline {6.5.4}对联生成}{337}{subsection.6.5.4}%
 \defcounter {refsection}{0}\relax 
-\contentsline {subsection}{\numberline {6.5.5}古诗生成}{344}{subsection.6.5.5}
+\contentsline {subsection}{\numberline {6.5.5}古诗生成}{338}{subsection.6.5.5}%
 \defcounter {refsection}{0}\relax 
-\contentsline {section}{\numberline {6.6}小结及深入阅读}{346}{section.6.6}
+\contentsline {section}{\numberline {6.6}小结及深入阅读}{338}{section.6.6}%
 \defcounter {refsection}{0}\relax 
-\contentsline {chapter}{\numberline {7}神经机器翻译实战 \ \raisebox {0.5mm}{------}\ 参加一次比赛}{349}{chapter.7}
+\contentsline {chapter}{\numberline {7}神经机器翻译实战}{341}{chapter.7}%
 \defcounter {refsection}{0}\relax 
-\contentsline {section}{\numberline {7.1}神经机器翻译并不简单}{349}{section.7.1}
+\contentsline {section}{\numberline {7.1}神经机器翻译并不简单}{341}{section.7.1}%
 \defcounter {refsection}{0}\relax 
-\contentsline {subsection}{\numberline {7.1.1}影响神经机器翻译性能的因素}{350}{subsection.7.1.1}
+\contentsline {subsection}{\numberline {7.1.1}影响神经机器翻译性能的因素}{342}{subsection.7.1.1}%
 \defcounter {refsection}{0}\relax 
-\contentsline {subsection}{\numberline {7.1.2}搭建神经机器翻译系统的步骤 }{351}{subsection.7.1.2}
+\contentsline {subsection}{\numberline {7.1.2}搭建神经机器翻译系统的步骤 }{343}{subsection.7.1.2}%
 \defcounter {refsection}{0}\relax 
-\contentsline {subsection}{\numberline {7.1.3}架构选择 }{352}{subsection.7.1.3}
+\contentsline {subsection}{\numberline {7.1.3}架构选择 }{344}{subsection.7.1.3}%
 \defcounter {refsection}{0}\relax 
-\contentsline {section}{\numberline {7.2}数据处理}{352}{section.7.2}
+\contentsline {section}{\numberline {7.2}数据处理}{344}{section.7.2}%
 \defcounter {refsection}{0}\relax 
-\contentsline {subsection}{\numberline {7.2.1}分词}{353}{subsection.7.2.1}
+\contentsline {subsection}{\numberline {7.2.1}分词}{345}{subsection.7.2.1}%
 \defcounter {refsection}{0}\relax 
-\contentsline {subsection}{\numberline {7.2.2}标准化}{354}{subsection.7.2.2}
+\contentsline {subsection}{\numberline {7.2.2}标准化}{346}{subsection.7.2.2}%
 \defcounter {refsection}{0}\relax 
-\contentsline {subsection}{\numberline {7.2.3}数据清洗}{355}{subsection.7.2.3}
+\contentsline {subsection}{\numberline {7.2.3}数据清洗}{347}{subsection.7.2.3}%
 \defcounter {refsection}{0}\relax 
-\contentsline {subsection}{\numberline {7.2.4}子词切分}{357}{subsection.7.2.4}
+\contentsline {subsection}{\numberline {7.2.4}子词切分}{349}{subsection.7.2.4}%
 \defcounter {refsection}{0}\relax 
-\contentsline {subsubsection}{大词表和OOV问题}{358}{section*.428}
+\contentsline {subsubsection}{大词表和OOV问题}{350}{section*.429}%
 \defcounter {refsection}{0}\relax 
-\contentsline {subsubsection}{子词}{358}{section*.430}
+\contentsline {subsubsection}{子词}{350}{section*.431}%
 \defcounter {refsection}{0}\relax 
-\contentsline {subsubsection}{双字节编码（BPE）}{359}{section*.432}
+\contentsline {subsubsection}{双字节编码（BPE）}{351}{section*.433}%
 \defcounter {refsection}{0}\relax 
-\contentsline {subsubsection}{其他方法}{362}{section*.435}
+\contentsline {subsubsection}{其他方法}{354}{section*.436}%
 \defcounter {refsection}{0}\relax 
-\contentsline {section}{\numberline {7.3}建模与训练}{362}{section.7.3}
+\contentsline {section}{\numberline {7.3}建模与训练}{354}{section.7.3}%
 \defcounter {refsection}{0}\relax 
-\contentsline {subsection}{\numberline {7.3.1}正则化}{362}{subsection.7.3.1}
+\contentsline {subsection}{\numberline {7.3.1}正则化}{354}{subsection.7.3.1}%
 \defcounter {refsection}{0}\relax 
-\contentsline {subsubsection}{L1/L2正则化}{364}{section*.437}
+\contentsline {subsubsection}{L1/L2正则化}{356}{section*.438}%
 \defcounter {refsection}{0}\relax 
-\contentsline {subsubsection}{标签平滑}{365}{section*.438}
+\contentsline {subsubsection}{标签平滑}{357}{section*.439}%
 \defcounter {refsection}{0}\relax 
-\contentsline {subsubsection}{Dropout}{366}{section*.440}
+\contentsline {subsubsection}{Dropout}{357}{section*.441}%
 \defcounter {refsection}{0}\relax 
-\contentsline {subsubsection}{Layer Dropout}{368}{section*.443}
+\contentsline {subsubsection}{Layer Dropout}{359}{section*.444}%
 \defcounter {refsection}{0}\relax 
-\contentsline {subsection}{\numberline {7.3.2}增大模型容量}{369}{subsection.7.3.2}
+\contentsline {subsection}{\numberline {7.3.2}增大模型容量}{360}{subsection.7.3.2}%
 \defcounter {refsection}{0}\relax 
-\contentsline {subsubsection}{宽网络}{369}{section*.445}
+\contentsline {subsubsection}{宽网络}{360}{section*.446}%
 \defcounter {refsection}{0}\relax 
-\contentsline {subsubsection}{深网络}{370}{section*.447}
+\contentsline {subsubsection}{深网络}{361}{section*.448}%
 \defcounter {refsection}{0}\relax 
-\contentsline {subsubsection}{增大输入层和输出层表示能力}{371}{section*.449}
+\contentsline {subsubsection}{增大输入层和输出层表示能力}{362}{section*.450}%
 \defcounter {refsection}{0}\relax 
-\contentsline {subsubsection}{大模型的分布式计算}{372}{section*.450}
+\contentsline {subsubsection}{大模型的分布式计算}{363}{section*.451}%
 \defcounter {refsection}{0}\relax 
-\contentsline {subsection}{\numberline {7.3.3}大批量训练}{372}{subsection.7.3.3}
+\contentsline {subsection}{\numberline {7.3.3}大批量训练}{363}{subsection.7.3.3}%
 \defcounter {refsection}{0}\relax 
-\contentsline {subsubsection}{为什么需要大批量训练}{372}{section*.451}
+\contentsline {subsubsection}{为什么需要大批量训练}{363}{section*.452}%
 \defcounter {refsection}{0}\relax 
-\contentsline {subsubsection}{如何构建批次}{373}{section*.454}
+\contentsline {subsubsection}{如何构建批次}{365}{section*.455}%
 \defcounter {refsection}{0}\relax 
-\contentsline {section}{\numberline {7.4}推断}{375}{section.7.4}
+\contentsline {section}{\numberline {7.4}推断}{366}{section.7.4}%
 \defcounter {refsection}{0}\relax 
-\contentsline {subsection}{\numberline {7.4.1}推断优化}{375}{subsection.7.4.1}
+\contentsline {subsection}{\numberline {7.4.1}推断优化}{366}{subsection.7.4.1}%
 \defcounter {refsection}{0}\relax 
-\contentsline {subsubsection}{推断系统的架构}{375}{section*.456}
+\contentsline {subsubsection}{推断系统的架构}{366}{section*.457}%
 \defcounter {refsection}{0}\relax 
-\contentsline {subsubsection}{自左向右推断 vs 自右向左推断}{376}{section*.458}
+\contentsline {subsubsection}{自左向右推断 vs 自右向左推断}{367}{section*.459}%
 \defcounter {refsection}{0}\relax 
-\contentsline {subsubsection}{推断加速}{377}{section*.459}
+\contentsline {subsubsection}{推断加速}{368}{section*.460}%
 \defcounter {refsection}{0}\relax 
-\contentsline {subsection}{\numberline {7.4.2}译文长度控制}{384}{subsection.7.4.2}
+\contentsline {subsection}{\numberline {7.4.2}译文长度控制}{375}{subsection.7.4.2}%
 \defcounter {refsection}{0}\relax 
-\contentsline {subsubsection}{长度惩罚因子}{385}{section*.465}
+\contentsline {subsubsection}{长度惩罚因子}{376}{section*.466}%
 \defcounter {refsection}{0}\relax 
-\contentsline {subsubsection}{译文长度范围约束}{386}{section*.467}
+\contentsline {subsubsection}{译文长度范围约束}{376}{section*.468}%
 \defcounter {refsection}{0}\relax 
-\contentsline {subsubsection}{覆盖度模型}{386}{section*.468}
+\contentsline {subsubsection}{覆盖度模型}{377}{section*.469}%
 \defcounter {refsection}{0}\relax 
-\contentsline {subsection}{\numberline {7.4.3}多模型集成}{387}{subsection.7.4.3}
+\contentsline {subsection}{\numberline {7.4.3}多模型集成}{378}{subsection.7.4.3}%
 \defcounter {refsection}{0}\relax 
-\contentsline {subsubsection}{假设选择}{388}{section*.469}
+\contentsline {subsubsection}{假设选择}{379}{section*.470}%
 \defcounter {refsection}{0}\relax 
-\contentsline {subsubsection}{局部预测融合}{389}{section*.471}
+\contentsline {subsubsection}{局部预测融合}{380}{section*.472}%
 \defcounter {refsection}{0}\relax 
-\contentsline {subsubsection}{译文重组}{390}{section*.473}
+\contentsline {subsubsection}{译文重组}{381}{section*.474}%
 \defcounter {refsection}{0}\relax 
-\contentsline {section}{\numberline {7.5}进阶技术}{391}{section.7.5}
+\contentsline {section}{\numberline {7.5}进阶技术}{382}{section.7.5}%
 \defcounter {refsection}{0}\relax 
-\contentsline {subsection}{\numberline {7.5.1}深层模型}{391}{subsection.7.5.1}
+\contentsline {subsection}{\numberline {7.5.1}深层模型}{382}{subsection.7.5.1}%
 \defcounter {refsection}{0}\relax 
-\contentsline {subsubsection}{Post-Norm vs Pre-Norm}{392}{section*.476}
+\contentsline {subsubsection}{Post-Norm vs Pre-Norm}{382}{section*.477}%
 \defcounter {refsection}{0}\relax 
-\contentsline {subsubsection}{层聚合}{394}{section*.479}
+\contentsline {subsubsection}{层聚合}{384}{section*.480}%
 \defcounter {refsection}{0}\relax 
-\contentsline {subsubsection}{深层模型的训练加速}{395}{section*.481}
+\contentsline {subsubsection}{深层模型的训练加速}{386}{section*.482}%
 \defcounter {refsection}{0}\relax 
-\contentsline {subsubsection}{渐进式训练}{395}{section*.482}
+\contentsline {subsubsection}{渐进式训练}{386}{section*.483}%
 \defcounter {refsection}{0}\relax 
-\contentsline {subsubsection}{分组稠密连接}{396}{section*.484}
+\contentsline {subsubsection}{分组稠密连接}{387}{section*.485}%
 \defcounter {refsection}{0}\relax 
-\contentsline {subsubsection}{学习率重置策略}{397}{section*.486}
+\contentsline {subsubsection}{学习率重置策略}{387}{section*.487}%
 \defcounter {refsection}{0}\relax 
-\contentsline {subsubsection}{深层模型的鲁棒性训练}{398}{section*.488}
+\contentsline {subsubsection}{深层模型的鲁棒性训练}{389}{section*.489}%
 \defcounter {refsection}{0}\relax 
-\contentsline {subsection}{\numberline {7.5.2}单语数据的使用}{400}{subsection.7.5.2}
+\contentsline {subsection}{\numberline {7.5.2}单语数据的使用}{390}{subsection.7.5.2}%
 \defcounter {refsection}{0}\relax 
-\contentsline {subsubsection}{伪数据}{401}{section*.492}
+\contentsline {subsubsection}{伪数据}{391}{section*.493}%
 \defcounter {refsection}{0}\relax 
-\contentsline {subsubsection}{预训练}{402}{section*.495}
+\contentsline {subsubsection}{预训练}{393}{section*.496}%
 \defcounter {refsection}{0}\relax 
-\contentsline {subsubsection}{联合训练}{404}{section*.498}
+\contentsline {subsubsection}{联合训练}{394}{section*.499}%
 \defcounter {refsection}{0}\relax 
-\contentsline {subsection}{\numberline {7.5.3}知识精炼}{404}{subsection.7.5.3}
+\contentsline {subsection}{\numberline {7.5.3}知识精炼}{395}{subsection.7.5.3}%
 \defcounter {refsection}{0}\relax 
-\contentsline {subsubsection}{什么是知识精炼}{405}{section*.500}
+\contentsline {subsubsection}{什么是知识精炼}{396}{section*.501}%
 \defcounter {refsection}{0}\relax 
-\contentsline {subsubsection}{知识精炼的基本方法}{406}{section*.501}
+\contentsline {subsubsection}{知识精炼的基本方法}{397}{section*.502}%
 \defcounter {refsection}{0}\relax 
-\contentsline {subsubsection}{机器翻译中的知识精炼}{408}{section*.503}
+\contentsline {subsubsection}{机器翻译中的知识精炼}{398}{section*.504}%
 \defcounter {refsection}{0}\relax 
-\contentsline {subsection}{\numberline {7.5.4}双向训练}{408}{subsection.7.5.4}
+\contentsline {subsection}{\numberline {7.5.4}双向训练}{399}{subsection.7.5.4}%
 \defcounter {refsection}{0}\relax 
-\contentsline {subsubsection}{有监督对偶学习}{410}{section*.505}
+\contentsline {subsubsection}{有监督对偶学习}{400}{section*.506}%
 \defcounter {refsection}{0}\relax 
-\contentsline {subsubsection}{无监督对偶学习}{410}{section*.506}
+\contentsline {subsubsection}{无监督对偶学习}{401}{section*.507}%
 \defcounter {refsection}{0}\relax 
-\contentsline {subsubsection}{翻译中回译}{411}{section*.508}
+\contentsline {subsubsection}{翻译中回译}{402}{section*.509}%
 \defcounter {refsection}{0}\relax 
-\contentsline {section}{\numberline {7.6}小结及深入阅读}{412}{section.7.6}
+\contentsline {section}{\numberline {7.6}小结及深入阅读}{402}{section.7.6}%
 \defcounter {refsection}{0}\relax 
-\contentsline {part}{\@mypartnumtocformat {IV}{附录}}{417}{part.4}
+\contentsline {part}{\@mypartnumtocformat {IV}{附录}}{405}{part.4}%
 \ttl@stoptoc {default@3}
 \ttl@starttoc {default@4}
 \defcounter {refsection}{0}\relax 
-\contentsline {chapter}{\numberline {A}附录A}{419}{Appendix.1.A}
+\contentsline {chapter}{\numberline {A}附录A}{407}{appendix.1.A}%
 \defcounter {refsection}{0}\relax 
-\contentsline {section}{\numberline {A.1}基准数据集}{419}{section.1.A.1}
+\contentsline {section}{\numberline {A.1}基准数据集}{407}{section.1.A.1}%
 \defcounter {refsection}{0}\relax 
-\contentsline {section}{\numberline {A.2}平行语料}{420}{section.1.A.2}
+\contentsline {section}{\numberline {A.2}平行语料}{408}{section.1.A.2}%
 \defcounter {refsection}{0}\relax 
-\contentsline {section}{\numberline {A.3}相关工具}{421}{section.1.A.3}
+\contentsline {section}{\numberline {A.3}相关工具}{409}{section.1.A.3}%
 \defcounter {refsection}{0}\relax 
-\contentsline {subsection}{\numberline {A.3.1}数据预处理工具}{421}{subsection.1.A.3.1}
+\contentsline {subsection}{\numberline {A.3.1}数据预处理工具}{409}{subsection.1.A.3.1}%
 \defcounter {refsection}{0}\relax 
-\contentsline {subsection}{\numberline {A.3.2}评价工具}{422}{subsection.1.A.3.2}
+\contentsline {subsection}{\numberline {A.3.2}评价工具}{410}{subsection.1.A.3.2}%
 \defcounter {refsection}{0}\relax 
-\contentsline {chapter}{\numberline {B}附录B}{423}{Appendix.2.B}
+\contentsline {chapter}{\numberline {B}附录B}{411}{appendix.2.B}%
 \defcounter {refsection}{0}\relax 
-\contentsline {section}{\numberline {B.1}IBM模型3训练方法}{423}{section.2.B.1}
+\contentsline {section}{\numberline {B.1}IBM模型3训练方法}{411}{section.2.B.1}%
 \defcounter {refsection}{0}\relax 
-\contentsline {section}{\numberline {B.2}IBM模型4训练方法}{425}{section.2.B.2}
+\contentsline {section}{\numberline {B.2}IBM模型4训练方法}{413}{section.2.B.2}%
 \defcounter {refsection}{0}\relax 
-\contentsline {section}{\numberline {B.3}IBM模型5训练方法}{427}{section.2.B.3}
+\contentsline {section}{\numberline {B.3}IBM模型5训练方法}{415}{section.2.B.3}%
 \contentsfinish 
--- a/Section03-Word-Based-Models/section03.tex
+++ b/Section03-Word-Based-Models/section03.tex
@@ -909,12 +909,12 @@

 \begin{itemize}

-\item 很多时候，我们有多个互译句对$(\mathbf{s}^{[1]},\mathbf{t}^{[1]}),...,(\mathbf{s}^{[n]},\mathbf{t}^{[n]})$，称之为\alert{双语平行数据(语料)}。翻译概率可以被定义为
+\item 如果有多个互译句对$\{(\mathbf{s}^{[1]},\mathbf{t}^{[1]}),...,(\mathbf{s}^{[K]},\mathbf{t}^{[K]})\}$，称之为\alert{双语平行数据(语料)}。翻译概率可以被定义为

 \vspace{-1em}

 \begin{eqnarray}
-\textrm{P}(x,y) & = & \frac{\sum_{i=1}^{n}c(x,y;\mathbf{s}^{[i]},\mathbf{t}^{[i]})}{\sum_{i=1}^{n} \sum_{x',y'} c(x',y';\mathbf{s}^{[i]},\mathbf{t}^{[i]})} \nonumber
+\textrm{P}(x,y) & = & \frac{\sum_{k=1}^{K}c(x,y;\mathbf{s}^{[k]},\mathbf{t}^{[k]})}{\sum_{k=1}^{K} \sum_{x',y'} c(x',y';\mathbf{s}^{[k]},\mathbf{t}^{[k]})} \nonumber
 \end{eqnarray}

 \item<2-> 说白了就是计算$(x,y)$的频次时，在每个句子上累加
@@ -1414,7 +1414,7 @@ $m$ & $n$ & $n^m \cdot m!$ \\ \hline
 \node [anchor=north west,inner sep=2pt,align=left] (line4) at ([yshift=-1pt]line3.south west) {\textrm{3: \textbf{for} $i$ in $[1,m]$ \textbf{do}}};
 \node [anchor=north west,inner sep=2pt,align=left] (line5) at ([yshift=-1pt]line4.south west) {\textrm{4: \hspace{1em} $h = \phi$}};
 \node [anchor=north west,inner sep=2pt,align=left] (line6) at ([yshift=-1pt]line5.south west) {\textrm{5: \hspace{1em} \textbf{foreach} $j$ in $[1,m]$ \textbf{do}}};
-\node [anchor=north west,inner sep=2pt,align=left] (line7) at ([yshift=-1pt]line6.south west) {\textrm{6: \hspace{2em} \textbf{if} $used[j]=$ \textbf{true} \textbf{then}}};
+\node [anchor=north west,inner sep=2pt,align=left] (line7) at ([yshift=-1pt]line6.south west) {\textrm{6: \hspace{2em} \textbf{if} $used[j]=$ \textbf{false} \textbf{then}}};
 \node [anchor=north west,inner sep=2pt,align=left] (line8) at ([yshift=-1pt]line7.south west) {\textrm{7: \hspace{3em} $h = h \cup \textrm{\textsc{Join}}(best,\pi[j])$}};
 \node [anchor=north west,inner sep=2pt,align=left] (line9) at ([yshift=-1pt]line8.south west) {\textrm{8: \hspace{1em} $best = \textrm{\textsc{PruneForTop1}}(h)$}};
 \node [anchor=north west,inner sep=2pt,align=left] (line10) at ([yshift=-1pt]line9.south west) {\textrm{9: \hspace{1em} $used[best.j] = \textrm{\textsc{\textbf{true}}}$}};
@@ -2395,7 +2395,7 @@ $m$ & $n$ & $n^m \cdot m!$ \\ \hline
    \item \textbf{翻译模型参数估计} - 计算$\textrm{P}(\mathbf{s}|\mathbf{t})$所需的参数
    \end{itemize}
    \vspace{0.5em}
-\item<2-> \textbf{IBM模型的假设}：$\mathbf{s}=s_1...s_m$和$\mathbf{t}=t_1...t_n$之间有单词一级的对应，称作\alert{单词对齐}或者\alert{词对齐}。此外：
+\item<2-> \textbf{IBM模型的假设}：$\mathbf{s}=s_1...s_m$和$\mathbf{t}=t_1...t_l$之间有单词一级的对应，称作\alert{单词对齐}或者\alert{词对齐}。此外：
    \begin{itemize}
    \item \textbf{约束}：一个源语言单词只能对应一个目标语单词
    \vspace{0.5em}
@@ -2792,11 +2792,11 @@ $\mathbf{s}$ = 在 桌子 上 \ \ \ \ \ $\mathbf{t}$ = $t_0$ on the table \ \ \ 
 \textrm{P}(\mathbf{s},\mathbf{a}|\mathbf{t}) & = & \textrm{P}(m|\mathbf{t}) \prod\limits_{j=1}^{m} \textrm{P}(a_j|a_{1}^{j-1},s_{1}^{j-1},m,\mathbf{t}) \textrm{P}(s_j|a_{1}^{j},s_{1}^{j-1},m,\mathbf{t}) \nonumber \\
                  & \visible<2->{=} & \visible<2->{\textrm{P}(m=3 \mid \textrm{'$t_0$ on the table'})} \visible<3->{\times} \nonumber \\
                  &   & \visible<3->{\textrm{P}(a_1=0 \mid \phi,\phi,3,\textrm{'$t_0$ on the table'})} \visible<4->{\times} \nonumber \\
-                  &   & \visible<4->{\textrm{P}(f_1=\textrm{在} \mid \textrm{\{1-0\}},\phi,3,\textrm{'$t_0$ on the table'})} \visible<5->{\times} \nonumber \\
+                  &   & \visible<4->{\textrm{P}(s_1=\textrm{在} \mid \textrm{\{1-0\}},\phi,3,\textrm{'$t_0$ on the table'})} \visible<5->{\times} \nonumber \\
                  &   & \visible<5->{\textrm{P}(a_2=3 \mid \textrm{\{1-0\}},\textrm{'在'},3,\textrm{'$t_0$ on the table'})} \visible<6->{\times} \nonumber \\
-                  &   & \visible<6->{\textrm{P}(f_2=\textrm{桌子} \mid \textrm{\{1-0,2-3\}},\textrm{'在'},3,\textrm{'$t_0$ on the table'})} \visible<7->{\times} \nonumber \\
+                  &   & \visible<6->{\textrm{P}(s_2=\textrm{桌子} \mid \textrm{\{1-0,2-3\}},\textrm{'在'},3,\textrm{'$t_0$ on the table'})} \visible<7->{\times} \nonumber \\
                  &   & \visible<7->{\textrm{P}(a_3=1 \mid \textrm{\{1-0,2-3\}},\textrm{'在 桌子'},3,\textrm{'$t_0$ on the table'})} \visible<8->{\times} \nonumber \\
-                  &   & \visible<8->{\textrm{P}(f_3=\textrm{上} \mid \textrm{\{1-0,2-3,3-1\}},\textrm{'在 桌子'},3,\textrm{'$t_0$ on the table'})} \nonumber
+                  &   & \visible<8->{\textrm{P}(s_3=\textrm{上} \mid \textrm{\{1-0,2-3,3-1\}},\textrm{'在 桌子'},3,\textrm{'$t_0$ on the table'})} \nonumber
 \end{eqnarray}
 }

@@ -3730,7 +3730,7 @@ $\mathbf{s}$ = 在 桌子 上 \ \ \ \ \ $\mathbf{t}$ = $t_0$ on the table \ \ \ 

    {\small
    \begin{eqnarray}
-    L(f,\lambda) & = & \frac{\epsilon}{(l+1)^{m}} \prod\limits_{j=1}^{m} \sum\limits_{i=0}^{l} \prod\limits_{j=1}^{m} f(s_j|t_i) - \nonumber \\
+    L(f,\lambda) & = & \frac{\epsilon}{(l+1)^{m}} \prod\limits_{j=1}^{m} \sum\limits_{i=0}^{l} f(s_j|t_i) - \nonumber \\
                 &   & \sum_{t_y} \lambda_{t_y} (\sum_{s_x} f(s_x|t_y) -1) \nonumber
    \end{eqnarray}
    }
@@ -4190,9 +4190,9 @@ f(s_u|t_v) & = & \lambda_{t_v}^{-1} \cdot \textrm{P}(\mathbf{s}|\mathbf{t}) \cdo
 %%% scale it up to the full corpus
 \begin{frame}{在整个数据集上计算}
 \begin{itemize}
-\item \textbf{更真实的情况}：我们拥有一系列互译的句对（称作\alert{平行语料}），记为$\{(\mathbf{s}^{[1]},\mathbf{t}^{[1]}),(\mathbf{s}^{[2]},\mathbf{t}^{[2]}),...,(\mathbf{s}^{[N]},\mathbf{t}^{[N]})\}$。对于这$N$个训练用句对，定义$f(s_u|t_v)$的期望频次为
+\item \textbf{更真实的情况}：我们拥有一系列互译的句对（称作\alert{平行语料}），记为$\{(\mathbf{s}^{[1]},\mathbf{t}^{[1]}),(\mathbf{s}^{[2]},\mathbf{t}^{[2]}),...,(\mathbf{s}^{[K]},\mathbf{t}^{[K]})\}$。对于这$K$个训练用句对，定义$f(s_u|t_v)$的期望频次为
    \begin{displaymath}
-    c_{\mathbb{E}}(s_u|t_v) = \sum_{i=1}^{N} c_{\mathbb{E}}(s_u|t_v;\mathbf{s}^{[i]},\mathbf{t}^{[i]})
+    c_{\mathbb{E}}(s_u|t_v) = \sum_{k=1}^{K} c_{\mathbb{E}}(s_u|t_v;\mathbf{s}^{[k]},\mathbf{t}^{[k]})
    \end{displaymath}
 \item<2-> \textbf{于是}
    \begin{center}
@@ -4200,8 +4200,8 @@ f(s_u|t_v) & = & \lambda_{t_v}^{-1} \cdot \textrm{P}(\mathbf{s}|\mathbf{t}) \cdo
    \node [anchor=west,inner sep=2pt] (eq1) at (0,0) {$f(s_u|t_v)$};
    \node [anchor=west] (eq2) at (eq1.east) {$=$\ };
    \draw [-] ([xshift=0.3em]eq2.east) -- ([xshift=11.6em]eq2.east);
-    \node [anchor=south west] (eq3) at ([xshift=1em]eq2.east) {$\sum_{i=1}^{N} c_{\mathbb{E}}(s_u|t_v;\mathbf{s}^{[i]},\mathbf{t}^{[i]})$};
-    \node [anchor=north west] (eq4) at (eq2.east) {$\sum_{s_u} \sum_{i=1}^{N} c_{\mathbb{E}}(s_u|t_v;\mathbf{s}^{[i]},\mathbf{t}^{[i]})$};
+    \node [anchor=south west] (eq3) at ([xshift=1em]eq2.east) {$\sum_{k=1}^{K} c_{\mathbb{E}}(s_u|t_v;\mathbf{s}^{[k]},\mathbf{t}^{[k]})$};
+    \node [anchor=north west] (eq4) at (eq2.east) {$\sum_{s_u} \sum_{k=1}^{K} c_{\mathbb{E}}(s_u|t_v;\mathbf{s}^{[k]},\mathbf{t}^{[k]})$};

    \visible<4->{
    \node [anchor=south] (label1) at ([yshift=-6em,xshift=3em]eq1.north west) {利用这个公式计算};
@@ -4250,16 +4250,16 @@ f(s_u|t_v) & = & \lambda_{t_v}^{-1} \cdot \textrm{P}(\mathbf{s}|\mathbf{t}) \cdo
 \label{ibmtraining}

 \begin{beamerboxesrounded}[upper=uppercolblue,lower=lowercolblue,shadow=true]{IBM模型1的训练（EM算法）}
-输入: 平行语料$\{(\mathbf{s}^{[1]},\mathbf{t}^{[1]}),...,(\mathbf{s}^{[N]},\mathbf{t}^{[N]})\}$\\
+输入: 平行语料$\{(\mathbf{s}^{[1]},\mathbf{t}^{[1]}),...,(\mathbf{s}^{[K]},\mathbf{t}^{[K]})\}$\\
 输出：参数$f(\cdot|\cdot)$的最优值\\
-1: \textbf{Function} \textsc{TrainItWithEM}($\{(\mathbf{s}^{[1]},\mathbf{t}^{[1]}),...,(\mathbf{s}^{[N]},\mathbf{t}^{[N]})\}$) \\
+1: \textbf{Function} \textsc{TrainItWithEM}($\{(\mathbf{s}^{[1]},\mathbf{t}^{[1]}),...,(\mathbf{s}^{[K]},\mathbf{t}^{[K]})\}$) \\
 2: \ \ Initialize $f(\cdot|\cdot)$ \hspace{5em} $\rhd$ 比如给$f(\cdot|\cdot)$一个均匀分布\\
 3: \ \ Loop until $f(\cdot|\cdot)$ converges\\
-4: \ \ \ \ \textbf{foreach} $k = 1$ to $N$ \textbf{do}\\
+4: \ \ \ \ \textbf{foreach} $k = 1$ to $K$ \textbf{do}\\
 5: \ \ \ \ \ \ \ \footnotesize{$c_{\mathbb{E}}(s_u|t_v;\mathbf{s}^{[k]},\mathbf{t}^{[k]}) = \sum\limits_{j=1}^{|\mathbf{s}^{[k]}|} \delta(s_j,s_u) \sum\limits_{i=0}^{|\mathbf{t}^{[k]}|} \delta(t_i,t_v) \cdot \frac{f(s_u|t_v)}{\sum_{i=0}^{l}f(s_u|t_i)}$}\normalsize{}\\
-6: \ \ \ \ \textbf{foreach} $t_v$ appears at least one of $\{\mathbf{t}^{[1]},...,\mathbf{t}^{[N]}\}$ \textbf{do}\\
+6: \ \ \ \ \textbf{foreach} $t_v$ appears at least one of $\{\mathbf{t}^{[1]},...,\mathbf{t}^{[K]}\}$ \textbf{do}\\
 7: \ \ \ \ \ \ \ $\lambda_{t_v}^{'} = \sum_{s_u} \sum_{k=1}^{N} c_{\mathbb{E}}(s_u|t_v;\mathbf{s}^{[k]},\mathbf{t}^{[k]})$\\
-8: \ \ \ \ \ \ \ \textbf{foreach} $s_u$ appears at least one of $\{\mathbf{s}^{[1]},...,\mathbf{s}^{[N]}\}$ \textbf{do}\\
+8: \ \ \ \ \ \ \ \textbf{foreach} $s_u$ appears at least one of $\{\mathbf{s}^{[1]},...,\mathbf{s}^{[K]}\}$ \textbf{do}\\
 9: \ \ \ \ \ \ \ \ \ $f(s_u|t_v) = \sum_{k=1}^{N} c_{\mathbb{E}}(s_u|t_v;\mathbf{s}^{[k]},\mathbf{t}^{[k]}) \cdot (\lambda_{t_v}^{'})^{-1}$\\
 10: \ \textbf{return} $f(\cdot|\cdot)$
 \end{beamerboxesrounded}
@@ -4287,8 +4287,8 @@ c_{\mathbb{E}}(i|j,m,l;\mathbf{s},\mathbf{t}) & = & \frac{f(s_j|t_i)a(i|j,m,l)}{
 \end{eqnarray}
 \item \textbf{M-Step}
 \begin{eqnarray}
-f(s_u|t_v) & = & \frac{\sum_{k=0}^{K} c_{\mathbb{E}}(s_u|t_v;\mathbf{s}^{[k]},\mathbf{t}^{[k]})}{\sum_{s_u} \sum_{k=0}^{K} c_{\mathbb{E}}(s_u|t_v;\mathbf{s}^{[k]},\mathbf{t}^{[k]})}  \nonumber \\
-a(i|j,m,l) & = & \frac{\sum_{k=0}^{K} c_{\mathbb{E}}(i|j;\mathbf{s}^{[k]},\mathbf{t}^{[k]})}{\sum_{i} \sum_{k=0}^{K} c_{\mathbb{E}}(i|j;\mathbf{s}^{[k]},\mathbf{t}^{[k]})} \nonumber
+f(s_u|t_v) & = & \frac{\sum_{k=1}^{K} c_{\mathbb{E}}(s_u|t_v;\mathbf{s}^{[k]},\mathbf{t}^{[k]})}{\sum_{s_u} \sum_{k=1}^{K} c_{\mathbb{E}}(s_u|t_v;\mathbf{s}^{[k]},\mathbf{t}^{[k]})}  \nonumber \\
+a(i|j,m,l) & = & \frac{\sum_{k=1}^{K} c_{\mathbb{E}}(i|j;\mathbf{s}^{[k]},\mathbf{t}^{[k]})}{\sum_{i} \sum_{k=1}^{K} c_{\mathbb{E}}(i|j;\mathbf{s}^{[k]},\mathbf{t}^{[k]})} \nonumber
 \end{eqnarray}
 \end{enumerate}
 \end{frame}

--- a/Section05-Neural-Networks-and-Language-Modeling/section05.tex
+++ b/Section05-Neural-Networks-and-Language-Modeling/section05.tex
@@ -541,7 +541,7 @@ GPT-2 (Transformer) & Radford et al. & 2019 & \alert{35.7}
    \end{itemize}
 \item<2-> \textbf{当然}，你是一个勇于实践的人
    \begin{itemize}
-    \item 方法很简单：不断地尝试，根据结构不断地调整权重
+    \item 方法很简单：不断地尝试，根据结果不断地调整权重
    \item<10-> 在进行了很多次实验后，发现了相对好的一组权重
    \end{itemize}
 \end{itemize}
@@ -1034,7 +1034,7 @@ T(\alpha \textbf{a}) & = & \alpha T(\textbf{a}) \nonumber

 \visible<3->{
 \node [anchor=center,fill=green!20] (w2) at (w) {\Large{$\textbf{w}$}};
-\node [anchor=north,inner sep=1pt] (wlabel) at ([yshift=-0.7em]w.south) {\small{旋转(rotation)}};
+\node [anchor=north,inner sep=1pt] (wlabel) at ([yshift=-0.7em]w.south) {\small{旋转(rotation)、扩张(dilation)、挤压(squeeze)等}};
 \draw [<-] ([yshift=-0.2em]w2.south) -- (wlabel.north);

 \tikzstyle{neuron} = [rectangle,draw,thick,fill=red!30,red!35,minimum height=2em,minimum width=2em,font=\small]