updates of section 4

69f7c2af · xiaotong · b361b2f5 · 69f7c2af · 69f7c2af · 69f7c2af
Commit 69f7c2af authored Apr 24, 2020 by xiaotong
--- a/Book/Chapter4/Figures/chinese-syntax-tree.tex
+++ b/Book/Chapter4/Figures/chinese-syntax-tree.tex
@@ -3,7 +3,7 @@
 \begin{center}
 \begin{tikzpicture}

-\begin{scope}[scale=0.8, sibling distance=1pt, level distance=30pt, yshift=-1.4in]
+\begin{scope}[scale=1.0, sibling distance=5pt, level distance=30pt, yshift=-1.4in]
 \Tree[. S
        [.NP
            [.NP

--- a/Book/Chapter4/Figures/derivation-consist-of-bilingual-phrase.tex
+++ b/Book/Chapter4/Figures/derivation-consist-of-bilingual-phrase.tex
@@ -4,7 +4,7 @@
 \begin{tikzpicture}

 \begin{scope}[minimum height = 18pt]
-{\scriptsize
+{\small

 \node[anchor=north,fill=green!20] (s1) at (0,0) {进口};
 \node [anchor=north,fill=red!20] (s2) at ([xshift=4em,yshift=0em]s1.north) {大幅度};
@@ -20,14 +20,14 @@
 }

 \node[anchor=south] (s0) at ([xshift=-2em,yshift=0em]s1.south) {\textbf{s:}};
-\node[anchor=east] (t0) at ([xshift=0em,yshift=-2.85em]s0.east) {\textbf{t:}};
+\node[anchor=east] (t0) at ([xshift=0em,yshift=-3.5em]s0.east) {\textbf{t:}};

-\node[anchor=south,inner sep=0pt,yshift=-0.3em] (sp1) at (s1.north) {\scriptsize{$\bar{s}_{a_1 = 1}$}};
-\node[anchor=south,inner sep=0pt,yshift=-0.3em] (sp2) at (s2.north) {\scriptsize{$\bar{s}_{a_2 = 2}$}};
-\node[anchor=south,inner sep=0pt,yshift=-0.3em] (sp3) at (s3.north) {\scriptsize{$\bar{s}_{a_3 = 3}$}};
-\node[anchor=north,inner sep=0pt,yshift=0.3em] (tp1) at (t1.south) {\scriptsize{$\bar{t}_1$}};
-\node[anchor=north,inner sep=0pt,yshift=0.3em] (tp2) at (t2.south) {\scriptsize{$\bar{t}_2$}};
-\node[anchor=north,inner sep=0pt,yshift=0.3em] (tp3) at (t3.south) {\scriptsize{$\bar{t}_3$}};
+\node[anchor=south,inner sep=0pt,yshift=-0.3em] (sp1) at (s1.north) {\footnotesize{$\bar{s}_{a_1 = 1}$}};
+\node[anchor=south,inner sep=0pt,yshift=-0.3em] (sp2) at (s2.north) {\footnotesize{$\bar{s}_{a_2 = 2}$}};
+\node[anchor=south,inner sep=0pt,yshift=-0.3em] (sp3) at (s3.north) {\footnotesize{$\bar{s}_{a_3 = 3}$}};
+\node[anchor=north,inner sep=0pt,yshift=0.3em] (tp1) at (t1.south) {\footnotesize{$\bar{t}_1$}};
+\node[anchor=north,inner sep=0pt,yshift=0.3em] (tp2) at (t2.south) {\footnotesize{$\bar{t}_2$}};
+\node[anchor=north,inner sep=0pt,yshift=0.3em] (tp3) at (t3.south) {\footnotesize{$\bar{t}_3$}};

 \end{scope}
 \end{tikzpicture}

--- a/Book/Chapter4/Figures/example-of-n-gram-2.tex
+++ b/Book/Chapter4/Figures/example-of-n-gram-2.tex
@@ -60,7 +60,7 @@

 \begin{pgfonlayer}{background}
 {
-\node [rectangle,draw=red,thick,inner sep=0.0em,fill=white] [fit = (s3) (s4)] (sphrase1) {};
+\node [rectangle,draw=black,thick,inner sep=0.0em,fill=white] [fit = (s3) (s4)] (sphrase1) {};
 \node [rectangle,draw=black,thick,inner sep=0.0em,fill=white] [fit = (t3) (t4)] (tphrase1) {};
 }
 \end{pgfonlayer}

--- a/Book/Chapter4/Figures/example-of-translation-use-syntactic-structure.tex
+++ b/Book/Chapter4/Figures/example-of-translation-use-syntactic-structure.tex
@@ -6,14 +6,14 @@
 %% example
 \begin{scope}[xshift=-0.1in,yshift=-1.5in]

-{\tiny
+{\scriptsize

-\node[anchor=west] (ref) at (0,0) {{{人工翻译:}} {\red{After}} North Korea demanded concessions from U.S. again before the start of a new round of six-nation talks ...};
+\node[anchor=west] (ref) at (0,0) {{\sffamily\bfseries{人工翻译:}} {\red{After}} North Korea demanded concessions from U.S. again before the start of a new round of six-nation talks ...};

-\node[anchor=north west] (hifst) at ([yshift=-0.3em]ref.south west) {{{机器翻译:}} \blue{In}\black{} the new round of six-nation talks on North Korea again demanded that U.S. in the former promise ...};
+\node[anchor=north west] (hifst) at ([yshift=-0.3em]ref.south west) {{\sffamily\bfseries{机器翻译:}} \blue{In}\black{} the new round of six-nation talks on North Korea again demanded that U.S. in the former promise ...};

 {
-\node[anchor=north west] (synhifst) at ([yshift=-0.3em]hifst.south west) {\sffamily\bfseries{better?:}};
+\node[anchor=north west] (synhifst) at ([yshift=-0.3em]hifst.south west) {\sffamily\bfseries{更好?:}};

 \node[anchor=west, fill=red!20!white, inner sep=0.3em] (synhifstpart1) at ([xshift=-0.5em]synhifst.east) {After};

@@ -22,7 +22,7 @@
 \node[anchor=west] (synhifstpart3) at ([xshift=-0.2em]synhifstpart2.east) {...};
 }

-\node [anchor=west] (inputlabel) at ([yshift=-0.4in]synhifst.west) {\sffamily\bfseries{Input:}};
+\node [anchor=west] (inputlabel) at ([yshift=-0.4in]synhifst.west) {\sffamily\bfseries{输入:}};

 \node [anchor=west,minimum height=12pt] (inputseg1) at (inputlabel.east) {在$_1$ };
 \node [anchor=west,minimum height=12pt] (inputseg2) at ([xshift=0.2em]inputseg1.east) {北韩$_2$ 再度$_3$ 要求$_4$ 美国$_5$ 于$_6$ 新$_7$ 回合$_8$ 六$_9$ 国$_{10}$ 会谈$_{11}$ 前$_{12}$ 承诺$_{13}$ 让步$_{14}$};

--- a/Book/Chapter4/Figures/example-of-zh2en-translation-base-phrase.tex
+++ b/Book/Chapter4/Figures/example-of-zh2en-translation-base-phrase.tex
@@ -4,13 +4,13 @@
 \begin{tikzpicture}

 \begin{scope}[minimum height = 18pt]
-{\scriptsize
+{\small

 \node[anchor=north,fill=green!20] (s1) at (0,0) {进口};
 \node [anchor=north,fill=red!20] (s2) at ([xshift=4em,yshift=0em]s1.north) {大幅度};
 \node[anchor=north,fill=blue!20] (s3) at ([xshift=4.5em,yshift=0em]s2.north) {下降 了};

-\node[anchor=west,fill=green!20] (t1) at ([xshift=0em,yshift=-4em]s1.west) {the imports have};
+\node[anchor=west,fill=green!20] (t1) at ([xshift=0em,yshift=-4em]s1.west) {The imports have};
 \node[anchor=north,fill=red!20] (t2) at ([xshift=8em,yshift=0em]t1.north) {drastically};
 \node[anchor=north,fill=blue!20] (t3) at ([xshift=5.7em,yshift=0em]t2.north) {fallen};

@@ -20,7 +20,7 @@
 }

 \node[anchor=south] (s0) at ([xshift=-3em,yshift=0em]s1.south) {源语:};
-\node[anchor=east] (t0) at ([xshift=0em,yshift=-2.85em]s0.east) {目标语:};
+\node[anchor=east] (t0) at ([xshift=0em,yshift=-3.5em]s0.east) {目标语:};
 \end{scope}

 \end{tikzpicture}

--- a/Book/Chapter4/Figures/long-distance-dependence-in-zh2en-translation.tex
+++ b/Book/Chapter4/Figures/long-distance-dependence-in-zh2en-translation.tex
@@ -3,11 +3,11 @@
 \begin{tikzpicture}

 \node[anchor=west, fill=blue!30, inner sep=0.05cm] (sp1) at (0, 0) {进口\ \ };
-\node[anchor=west] (sp2) at (2.5em, 0) {在 过去的 五 到 十 年 间};
-\node[anchor=west, fill=red!30, inner sep=0.05cm] (sp3) at (14em, 0) {有了 大幅度 下降};
+\node[anchor=west] (sp2) at (2.5em, 0) {在\ 过去\ 的\ 五\ 到\ 十\ 年\ 间};
+\node[anchor=west, fill=red!30, inner sep=0.05cm] (sp3) at (14em, 0) {有了\ 大幅度\ 下降};
 \draw[->] (sp1) edge [out=15, in=170] (sp3);

-\node[anchor=west, fill=blue!30, inner sep=0.05cm] (tp1) at (0, -0.8) {the imports};
+\node[anchor=west, fill=blue!30, inner sep=0.05cm] (tp1) at (0, -0.8) {The imports};
 \node[anchor=west, fill=red!30, inner sep=0.05cm] (tp2) at (5.3em, -0.8) {drastically fell};
 \node[anchor=west] (tp3) at (11.3em, -0.8) {in the past five to ten years};
 \path[->] (tp1) edge [out=30, in=150] (tp2);

--- a/Book/Chapter4/Figures/word-and-phrase-translation-regard-as-path.tex
+++ b/Book/Chapter4/Figures/word-and-phrase-translation-regard-as-path.tex
@@ -10,7 +10,7 @@
 \node [anchor=west] (s4) at ([xshift=2em]s3.east) {\textbf{表示}};
 \node [anchor=west] (s5) at ([xshift=2em]s4.east) {\textbf{满意}};

-\node [anchor=south west] (sentlabel) at ([yshift=-0.5em]s1.north west) {\scriptsize{\textbf{\red{待翻译句子(已经分词):}}}};
+\node [anchor=south west] (sentlabel) at ([yshift=-0.5em]s1.north west) {\scriptsize{\textbf{待翻译句子(已经分词):}}};

 \draw [->,very thick,ublue] (s1.south) -- ([yshift=-0.7em]s1.south);
 \draw [->,very thick,ublue] (s2.south) -- ([yshift=-0.7em]s2.south);

--- a/Book/Chapter4/Figures/word-translation-regard-as-path.tex
+++ b/Book/Chapter4/Figures/word-translation-regard-as-path.tex
@@ -10,7 +10,7 @@
 \node [anchor=west] (s4) at ([xshift=2em]s3.east) {\textbf{表示}};
 \node [anchor=west] (s5) at ([xshift=2em]s4.east) {\textbf{满意}};

-\node [anchor=south west] (sentlabel) at ([yshift=-0.5em]s1.north west) {\scriptsize{\textbf{\red{待翻译句子(已经分词):}}}};
+\node [anchor=south west] (sentlabel) at ([yshift=-0.5em]s1.north west) {\scriptsize{\textbf{待翻译句子(已经分词):}}};

 \draw [->,very thick,ublue] (s1.south) -- ([yshift=-0.7em]s1.south);
 \draw [->,very thick,ublue] (s2.south) -- ([yshift=-0.7em]s2.south);

--- a/Book/Chapter4/chapter4.tex
+++ b/Book/Chapter4/chapter4.tex
@@ -64,7 +64,7 @@
 \end{figure}
 %-------------------------------------------

-\parinterval 一般来说，统计机器翻译的建模对应着一个两阶段的过程：首先，得到每个翻译单元所有可能的译文；然后，通过对这些译文的组合得到可能的句子翻译结果，并选择最佳的目标语言句子输出。如果基本的翻译单元被定义下来，机器翻译系统可以学习这些单元翻译所对应的翻译知识（对应训练过程），之后运用这些知识完成对新的句子的翻译（对应解码过程）。图\ref{fig:word-translation-regard-as-path}给出了一个基于单词的机器翻译过程。
+\parinterval 一般来说，统计机器翻译的建模对应着一个两阶段的过程：首先，得到每个翻译单元所有可能的译文；然后，通过对这些译文的组合得到可能的句子翻译结果，并选择最佳的目标语言句子输出。如果基本的翻译单元被定义下来，机器翻译系统可以学习这些单元翻译所对应的翻译知识（对应训练过程），之后运用这些知识完成对新的句子进行翻译（对应解码过程）。

 %----------------------------------------------
 % 图4.4
@@ -76,7 +76,7 @@
 \end{figure}
 %-------------------------------------------

-\parinterval 首先，每个单词的候选译文都被列举出来，而机器翻译就是要找到覆盖所有源语言单词的一条路径，它所对应的译文概率是最高的。比如，图\ref{fig:word-translation-regard-as-path}中的红色折线就代表了一条翻译路径，也就是一个单词译文的序列\footnote[1]{为了简化问题，这里没有描述单词译文的调序。对于调序的建模，可以把它当作是对目标语单词串的排列，这个排列的好坏需要用额外的调序模型进行描述。详细内容见\ref{subsection-4.2.4}节。}。
+\parinterval 图\ref{fig:word-translation-regard-as-path}给出了基于单词的机器翻译过程的一个示例。首先，每个单词的候选译文都被列举出来，而机器翻译系统就是要找到覆盖所有源语言单词的一条路径，它所对应的译文概率是最高的。比如，图中的红色折线就代表了一条翻译路径，也就是一个单词译文的序列\footnote[1]{为了简化问题，这里没有描述单词译文的调序。对于调序的建模，可以把它当作是对目标语单词串的排列，这个排列的好坏需要用额外的调序模型进行描述。详细内容见\ref{subsection-4.2.4}节。}。

 \parinterval 在引入短语翻译之后，并不需要对上述过程进行太大的修改。仍然可以把翻译当作是一条贯穿源语言所有单词译文的路径，只是这条路径中会包含短语，而非一个个单词。图\ref{fig:word-and-phrase-translation-regard-as-path}给出了一个实例，其中的蓝色折线表示包含短语的翻译路径。

@@ -90,12 +90,12 @@
 \end{figure}
 %-------------------------------------------

-\parinterval 实际上，单词本身也是一种短语。从这个角度说，基于单词的翻译模型是包含在基于短语的翻译模型中的。而这里的所说的短语包括多个连续的单词，可以直接捕捉翻译中的一些局部依赖。而且，由于引入了更多样翻译单元，可选择的翻译路径数量也大大增加。本质上，引入更大颗粒度的翻译单元为建模增加了灵活性，也增大了翻译假设空间。如果建模合理，更多的翻译路径会增加找到高质量译文的机会。在\ref{section-4.2}节还将看到，基于短语的模型会从多个角度对翻译问题进行描述，包括基础数学建模、调序等等。
+\parinterval 实际上，单词本身也是一种短语。从这个角度说，基于单词的翻译模型是包含在基于短语的翻译模型中的。而这里的所说的短语包括多个连续的单词，可以直接捕捉翻译中的一些局部依赖。而且，由于引入了更多样翻译单元，可选择的翻译路径数量也大大增加。本质上，引入更大颗粒度的翻译单元给建模增加了灵活性，同时增大了翻译假设空间。如果建模合理，更多的翻译路径会增加找到高质量译文的机会。在\ref{section-4.2}节还将看到，基于短语的模型会从多个角度对翻译问题进行描述，包括基础数学建模、调序等等。

 %--4.1.2 句子的结构信息---------------------
 \subsection{句子的结构信息}\index{Chapter4.1.2}

-\parinterval 短语的优点在于可以捕捉具有完整意思的连续词串，因此能够对局部上下文信息进行建模。当单词之间的搭配和依赖关系出现在连续词串中时，短语都可以很好的进行描述。但是，当单词之间距离很远时，使用短语的``效率''很低。同$n$-gram语言模型一样，当短语长度变长时，数据会变得非常稀疏。比如，很多实验已经证明，测试数据中超过5个的连续单词在训练数据中往往是很低频的现象，更长的短语甚至都很难在训练数据中找到。当然，可以使用平滑算法对长短语的概率进行估计，但是使用过长的短语在实际系统研发中仍然不现实。图\ref{fig:long-distance-dependence-in-zh2en-translation}展示了一个汉语到英语的翻译实例。源语言的两个短语（蓝色和红色高亮）在译文中产生了调序。但是，这两个短语在源语言句子中横跨11个单词。如果直接使用这个11个单词构成的短语进行翻译，显然会有非常严重的数据稀疏问题，因为很难期望在训练数据中见到一模一样的短语。
+\parinterval 使用短语的优点在于可以捕捉具有完整意思的连续词串，因此能够对局部上下文信息进行建模。当单词之间的搭配和依赖关系出现在连续词串中时，短语可以很好的对其进行描述。但是，当单词之间距离很远时，使用短语的``效率''很低。同$n$-gram语言模型一样，当短语长度变长时，数据会变得非常稀疏。比如，很多实验已经证明，测试数据中超过5个的连续单词在训练数据中往往是很低频的现象，更长的短语甚至都很难在训练数据中找到。当然，可以使用平滑算法对长短语的概率进行估计，但是使用过长的短语在实际系统研发中仍然不现实。图\ref{fig:long-distance-dependence-in-zh2en-translation}展示了一个汉语到英语的翻译实例。源语言的两个短语（蓝色和红色高亮）在译文中产生了调序。但是，这两个短语在源语言句子中横跨11个单词。如果直接使用这个11个单词构成的短语进行翻译，显然会有非常严重的数据稀疏问题，因为很难期望在训练数据中见到一模一样的短语。

 %----------------------------------------------
 % 图4.6
@@ -126,7 +126,7 @@
 \begin{figure}[htp]
 \centering
 \input{./Chapter4/Figures/example-of-translation-use-syntactic-structure}
-\caption{使用句法结构进行机器翻译的实例，其中PP是一个包含15个词的介词短语}
+\caption{使用句法结构进行机器翻译的实例}
 \label{fig:example-of-translation-use-syntactic-structure}
 \end{figure}
 %-------------------------------------------
@@ -141,7 +141,7 @@
 %--4.2.1 机器翻译中的短语---------------------
 \subsection{机器翻译中的短语}\index{Chapter4.2.1}

-\parinterval 基于短语的机器翻译的基本假设是：双语句子的生成可以用短语之间的对应关系进行表示。图\ref{fig:example-of-zh2en-translation-base-phrase}展示了一个基于短语的翻译实例。可以看到，这里的翻译单元是连续的词串。比如，``进口''的译文``the imports have''就包含了三个单词，而``下降 了''也是一个包含两个单词的源语言片段。
+\parinterval 基于短语的机器翻译的基本假设是：双语句子的生成可以用短语之间的对应关系进行表示。图\ref{fig:example-of-zh2en-translation-base-phrase}展示了一个基于短语的翻译实例。可以看到，这里的翻译单元是连续的词串。比如，``进口''的译文``The imports have''就包含了三个单词，而``下降\ 了''也是一个包含两个单词的源语言片段。

 %----------------------------------------------
 % 图4.9

--- a/Book/mt-book-xelatex.idx
+++ b/Book/mt-book-xelatex.idx
@@ -23,7 +23,7 @@
 \indexentry{Chapter4.2.7.3|hyperpage}{31}
 \indexentry{Chapter4.2.7.4|hyperpage}{32}
 \indexentry{Chapter4.3|hyperpage}{33}
-\indexentry{Chapter4.3.1|hyperpage}{35}
+\indexentry{Chapter4.3.1|hyperpage}{36}
 \indexentry{Chapter4.3.1.1|hyperpage}{36}
 \indexentry{Chapter4.3.1.2|hyperpage}{37}
 \indexentry{Chapter4.3.1.3|hyperpage}{38}

--- a/Book/mt-book-xelatex.ptc
+++ b/Book/mt-book-xelatex.ptc
-\boolfalse {citerequest}\boolfalse {citetracker}\boolfalse {pagetracker}\boolfalse {backtracker}\relax 
-\babel@toc {english}{}
-\defcounter {refsection}{0}\relax 
-\contentsline {part}{\@mypartnumtocformat {I}{机器翻译基础}}{11}{part.1}%
-\ttl@starttoc {default@1}
-\defcounter {refsection}{0}\relax 
-\contentsline {chapter}{\numberline {1}机器翻译简介}{13}{chapter.1}%
-\defcounter {refsection}{0}\relax 
-\contentsline {section}{\numberline {1.1}机器翻译的概念}{13}{section.1.1}%
-\defcounter {refsection}{0}\relax 
-\contentsline {section}{\numberline {1.2}机器翻译简史}{16}{section.1.2}%
-\defcounter {refsection}{0}\relax 
-\contentsline {subsection}{\numberline {1.2.1}人工翻译}{16}{subsection.1.2.1}%
-\defcounter {refsection}{0}\relax 
-\contentsline {subsection}{\numberline {1.2.2}机器翻译的萌芽}{17}{subsection.1.2.2}%
-\defcounter {refsection}{0}\relax 
-\contentsline {subsection}{\numberline {1.2.3}机器翻译的受挫}{18}{subsection.1.2.3}%
-\defcounter {refsection}{0}\relax 
-\contentsline {subsection}{\numberline {1.2.4}机器翻译的快速成长}{19}{subsection.1.2.4}%
-\defcounter {refsection}{0}\relax 
-\contentsline {subsection}{\numberline {1.2.5}机器翻译的爆发}{20}{subsection.1.2.5}%
-\defcounter {refsection}{0}\relax 
-\contentsline {section}{\numberline {1.3}机器翻译现状}{21}{section.1.3}%
-\defcounter {refsection}{0}\relax 
-\contentsline {section}{\numberline {1.4}机器翻译方法}{22}{section.1.4}%
-\defcounter {refsection}{0}\relax 
-\contentsline {subsection}{\numberline {1.4.1}基于规则的机器翻译}{22}{subsection.1.4.1}%
-\defcounter {refsection}{0}\relax 
-\contentsline {subsection}{\numberline {1.4.2}基于实例的机器翻译}{24}{subsection.1.4.2}%
-\defcounter {refsection}{0}\relax 
-\contentsline {subsection}{\numberline {1.4.3}统计机器翻译}{25}{subsection.1.4.3}%
-\defcounter {refsection}{0}\relax 
-\contentsline {subsection}{\numberline {1.4.4}神经机器翻译}{26}{subsection.1.4.4}%
-\defcounter {refsection}{0}\relax 
-\contentsline {subsection}{\numberline {1.4.5}对比分析}{27}{subsection.1.4.5}%
-\defcounter {refsection}{0}\relax 
-\contentsline {section}{\numberline {1.5}翻译质量评价}{28}{section.1.5}%
-\defcounter {refsection}{0}\relax 
-\contentsline {subsection}{\numberline {1.5.1}人工评价}{28}{subsection.1.5.1}%
-\defcounter {refsection}{0}\relax 
-\contentsline {subsection}{\numberline {1.5.2}自动评价}{29}{subsection.1.5.2}%
-\defcounter {refsection}{0}\relax 
-\contentsline {subsubsection}{BLEU}{29}{section*.15}%
-\defcounter {refsection}{0}\relax 
-\contentsline {subsubsection}{TER}{31}{section*.16}%
-\defcounter {refsection}{0}\relax 
-\contentsline {subsubsection}{基于检测点的评价}{31}{section*.17}%
-\defcounter {refsection}{0}\relax 
-\contentsline {section}{\numberline {1.6}机器翻译应用}{32}{section.1.6}%
-\defcounter {refsection}{0}\relax 
-\contentsline {section}{\numberline {1.7}开源项目与评测}{34}{section.1.7}%
-\defcounter {refsection}{0}\relax 
-\contentsline {subsection}{\numberline {1.7.1}开源机器翻译系统}{34}{subsection.1.7.1}%
-\defcounter {refsection}{0}\relax 
-\contentsline {subsubsection}{统计机器翻译开源系统}{34}{section*.19}%
-\defcounter {refsection}{0}\relax 
-\contentsline {subsubsection}{神经机器翻译开源系统}{36}{section*.20}%
-\defcounter {refsection}{0}\relax 
-\contentsline {subsection}{\numberline {1.7.2}常用数据集及公开评测任务}{38}{subsection.1.7.2}%
-\defcounter {refsection}{0}\relax 
-\contentsline {section}{\numberline {1.8}推荐学习资源}{40}{section.1.8}%
-\defcounter {refsection}{0}\relax 
-\contentsline {chapter}{\numberline {2}词法、语法及统计建模基础}{45}{chapter.2}%
-\defcounter {refsection}{0}\relax 
-\contentsline {section}{\numberline {2.1}问题概述 }{46}{section.2.1}%
-\defcounter {refsection}{0}\relax 
-\contentsline {section}{\numberline {2.2}概率论基础}{47}{section.2.2}%
-\defcounter {refsection}{0}\relax 
-\contentsline {subsection}{\numberline {2.2.1}随机变量和概率}{47}{subsection.2.2.1}%
-\defcounter {refsection}{0}\relax 
-\contentsline {subsection}{\numberline {2.2.2}联合概率、条件概率和边缘概率}{49}{subsection.2.2.2}%
-\defcounter {refsection}{0}\relax 
-\contentsline {subsection}{\numberline {2.2.3}链式法则}{50}{subsection.2.2.3}%
-\defcounter {refsection}{0}\relax 
-\contentsline {subsection}{\numberline {2.2.4}贝叶斯法则}{51}{subsection.2.2.4}%
-\defcounter {refsection}{0}\relax 
-\contentsline {subsection}{\numberline {2.2.5}KL距离和熵}{53}{subsection.2.2.5}%
-\defcounter {refsection}{0}\relax 
-\contentsline {subsubsection}{信息熵}{53}{section*.27}%
-\defcounter {refsection}{0}\relax 
-\contentsline {subsubsection}{KL距离}{54}{section*.29}%
-\defcounter {refsection}{0}\relax 
-\contentsline {subsubsection}{交叉熵}{54}{section*.30}%
-\defcounter {refsection}{0}\relax 
-\contentsline {section}{\numberline {2.3}中文分词}{55}{section.2.3}%
-\defcounter {refsection}{0}\relax 
-\contentsline {subsection}{\numberline {2.3.1}基于词典的分词方法}{56}{subsection.2.3.1}%
-\defcounter {refsection}{0}\relax 
-\contentsline {subsection}{\numberline {2.3.2}基于统计的分词方法}{57}{subsection.2.3.2}%
-\defcounter {refsection}{0}\relax 
-\contentsline {subsubsection}{统计模型的学习与推断}{57}{section*.34}%
-\defcounter {refsection}{0}\relax 
-\contentsline {subsubsection}{掷骰子游戏}{58}{section*.36}%
-\defcounter {refsection}{0}\relax 
-\contentsline {subsubsection}{全概率分词方法}{60}{section*.40}%
-\defcounter {refsection}{0}\relax 
-\contentsline {section}{\numberline {2.4}$n$-gram语言模型 }{62}{section.2.4}%
-\defcounter {refsection}{0}\relax 
-\contentsline {subsection}{\numberline {2.4.1}建模}{63}{subsection.2.4.1}%
-\defcounter {refsection}{0}\relax 
-\contentsline {subsection}{\numberline {2.4.2}未登录词和平滑算法}{65}{subsection.2.4.2}%
-\defcounter {refsection}{0}\relax 
-\contentsline {subsubsection}{加法平滑方法}{66}{section*.46}%
-\defcounter {refsection}{0}\relax 
-\contentsline {subsubsection}{古德-图灵估计法}{67}{section*.48}%
-\defcounter {refsection}{0}\relax 
-\contentsline {subsubsection}{Kneser-Ney平滑方法}{68}{section*.50}%
-\defcounter {refsection}{0}\relax 
-\contentsline {section}{\numberline {2.5}句法分析（短语结构分析）}{70}{section.2.5}%
-\defcounter {refsection}{0}\relax 
-\contentsline {subsection}{\numberline {2.5.1}句子的句法树表示}{70}{subsection.2.5.1}%
-\defcounter {refsection}{0}\relax 
-\contentsline {subsection}{\numberline {2.5.2}上下文无关文法}{72}{subsection.2.5.2}%
-\defcounter {refsection}{0}\relax 
-\contentsline {subsection}{\numberline {2.5.3}规则和推导的概率}{76}{subsection.2.5.3}%
-\defcounter {refsection}{0}\relax 
-\contentsline {section}{\numberline {2.6}小结及深入阅读}{78}{section.2.6}%
-\defcounter {refsection}{0}\relax 
-\contentsline {part}{\@mypartnumtocformat {II}{统计机器翻译}}{81}{part.2}%
-\ttl@stoptoc {default@1}
-\ttl@starttoc {default@2}
-\defcounter {refsection}{0}\relax 
-\contentsline {chapter}{\numberline {3}基于词的机器翻译模型}{83}{chapter.3}%
-\defcounter {refsection}{0}\relax 
-\contentsline {section}{\numberline {3.1}什么是基于词的翻译模型}{83}{section.3.1}%
-\defcounter {refsection}{0}\relax 
-\contentsline {section}{\numberline {3.2}构建一个简单的机器翻译系统}{85}{section.3.2}%
-\defcounter {refsection}{0}\relax 
-\contentsline {subsection}{\numberline {3.2.1}如何进行翻译？}{85}{subsection.3.2.1}%
-\defcounter {refsection}{0}\relax 
-\contentsline {subsubsection}{机器翻译流程}{86}{section*.63}%
-\defcounter {refsection}{0}\relax 
-\contentsline {subsubsection}{人工翻译 vs. 机器翻译}{87}{section*.65}%
-\defcounter {refsection}{0}\relax 
-\contentsline {subsection}{\numberline {3.2.2}基本框架}{87}{subsection.3.2.2}%
-\defcounter {refsection}{0}\relax 
-\contentsline {subsection}{\numberline {3.2.3}单词翻译概率}{88}{subsection.3.2.3}%
-\defcounter {refsection}{0}\relax 
-\contentsline {subsubsection}{什么是单词翻译概率？}{88}{section*.67}%
-\defcounter {refsection}{0}\relax 
-\contentsline {subsubsection}{如何从一个双语平行数据中学习？}{88}{section*.69}%
-\defcounter {refsection}{0}\relax 
-\contentsline {subsubsection}{如何从大量的双语平行数据中学习？}{90}{section*.70}%
-\defcounter {refsection}{0}\relax 
-\contentsline {subsection}{\numberline {3.2.4}句子级翻译模型}{91}{subsection.3.2.4}%
-\defcounter {refsection}{0}\relax 
-\contentsline {subsubsection}{基础模型}{91}{section*.72}%
-\defcounter {refsection}{0}\relax 
-\contentsline {subsubsection}{生成流畅的译文}{93}{section*.74}%
-\defcounter {refsection}{0}\relax 
-\contentsline {subsection}{\numberline {3.2.5}解码}{95}{subsection.3.2.5}%
-\defcounter {refsection}{0}\relax 
-\contentsline {section}{\numberline {3.3}基于词的翻译建模}{98}{section.3.3}%
-\defcounter {refsection}{0}\relax 
-\contentsline {subsection}{\numberline {3.3.1}噪声信道模型}{98}{subsection.3.3.1}%
-\defcounter {refsection}{0}\relax 
-\contentsline {subsection}{\numberline {3.3.2}统计机器翻译的三个基本问题}{100}{subsection.3.3.2}%
-\defcounter {refsection}{0}\relax 
-\contentsline {subsubsection}{词对齐}{101}{section*.83}%
-\defcounter {refsection}{0}\relax 
-\contentsline {subsubsection}{基于词对齐的翻译模型}{101}{section*.86}%
-\defcounter {refsection}{0}\relax 
-\contentsline {subsubsection}{基于词对齐的翻译实例}{103}{section*.88}%
-\defcounter {refsection}{0}\relax 
-\contentsline {section}{\numberline {3.4}IBM模型1-2}{104}{section.3.4}%
-\defcounter {refsection}{0}\relax 
-\contentsline {subsection}{\numberline {3.4.1}IBM模型1}{104}{subsection.3.4.1}%
-\defcounter {refsection}{0}\relax 
-\contentsline {subsection}{\numberline {3.4.2}IBM模型2}{106}{subsection.3.4.2}%
-\defcounter {refsection}{0}\relax 
-\contentsline {subsection}{\numberline {3.4.3}解码及计算优化}{107}{subsection.3.4.3}%
-\defcounter {refsection}{0}\relax 
-\contentsline {subsection}{\numberline {3.4.4}训练}{108}{subsection.3.4.4}%
-\defcounter {refsection}{0}\relax 
-\contentsline {subsubsection}{目标函数}{108}{section*.93}%
-\defcounter {refsection}{0}\relax 
-\contentsline {subsubsection}{优化}{109}{section*.95}%
-\defcounter {refsection}{0}\relax 
-\contentsline {section}{\numberline {3.5}IBM模型3-5及隐马尔可夫模型}{115}{section.3.5}%
-\defcounter {refsection}{0}\relax 
-\contentsline {subsection}{\numberline {3.5.1}基于产出率的翻译模型}{115}{subsection.3.5.1}%
-\defcounter {refsection}{0}\relax 
-\contentsline {subsection}{\numberline {3.5.2}IBM 模型3}{118}{subsection.3.5.2}%
-\defcounter {refsection}{0}\relax 
-\contentsline {subsection}{\numberline {3.5.3}IBM 模型4}{119}{subsection.3.5.3}%
-\defcounter {refsection}{0}\relax 
-\contentsline {subsection}{\numberline {3.5.4} IBM 模型5}{121}{subsection.3.5.4}%
-\defcounter {refsection}{0}\relax 
-\contentsline {subsection}{\numberline {3.5.5}隐马尔可夫模型}{122}{subsection.3.5.5}%
-\defcounter {refsection}{0}\relax 
-\contentsline {subsubsection}{隐马尔可夫模型}{123}{section*.107}%
-\defcounter {refsection}{0}\relax 
-\contentsline {subsubsection}{词对齐模型}{124}{section*.109}%
-\defcounter {refsection}{0}\relax 
-\contentsline {subsection}{\numberline {3.5.6}解码和训练}{125}{subsection.3.5.6}%
-\defcounter {refsection}{0}\relax 
-\contentsline {section}{\numberline {3.6}问题分析}{125}{section.3.6}%
-\defcounter {refsection}{0}\relax 
-\contentsline {subsection}{\numberline {3.6.1}词对齐及对称化}{125}{subsection.3.6.1}%
-\defcounter {refsection}{0}\relax 
-\contentsline {subsection}{\numberline {3.6.2}Deficiency}{126}{subsection.3.6.2}%
-\defcounter {refsection}{0}\relax 
-\contentsline {subsection}{\numberline {3.6.3}句子长度}{127}{subsection.3.6.3}%
-\defcounter {refsection}{0}\relax 
-\contentsline {subsection}{\numberline {3.6.4}其他问题}{128}{subsection.3.6.4}%
-\defcounter {refsection}{0}\relax 
-\contentsline {section}{\numberline {3.7}小结及深入阅读}{128}{section.3.7}%
-\defcounter {refsection}{0}\relax 
-\contentsline {chapter}{\numberline {4}基于短语和句法的机器翻译模型}{131}{chapter.4}%
-\defcounter {refsection}{0}\relax 
-\contentsline {section}{\numberline {4.1}翻译中的结构信息}{131}{section.4.1}%
-\defcounter {refsection}{0}\relax 
-\contentsline {subsection}{\numberline {4.1.1}更大粒度的翻译单元}{132}{subsection.4.1.1}%
-\defcounter {refsection}{0}\relax 
-\contentsline {subsection}{\numberline {4.1.2}句子的结构信息}{134}{subsection.4.1.2}%
-\defcounter {refsection}{0}\relax 
-\contentsline {section}{\numberline {4.2}基于短语的翻译模型}{136}{section.4.2}%
-\defcounter {refsection}{0}\relax 
-\contentsline {subsection}{\numberline {4.2.1}机器翻译中的短语}{136}{subsection.4.2.1}%
-\defcounter {refsection}{0}\relax 
-\contentsline {subsection}{\numberline {4.2.2}数学建模及判别式模型}{139}{subsection.4.2.2}%
-\defcounter {refsection}{0}\relax 
-\contentsline {subsubsection}{基于翻译推导的建模}{139}{section*.121}%
-\defcounter {refsection}{0}\relax 
-\contentsline {subsubsection}{对数线性模型}{140}{section*.122}%
-\defcounter {refsection}{0}\relax 
-\contentsline {subsubsection}{搭建模型的基本流程}{141}{section*.123}%
-\defcounter {refsection}{0}\relax 
-\contentsline {subsection}{\numberline {4.2.3}短语抽取}{142}{subsection.4.2.3}%
-\defcounter {refsection}{0}\relax 
-\contentsline {subsubsection}{与词对齐一致的短语}{142}{section*.126}%
-\defcounter {refsection}{0}\relax 
-\contentsline {subsubsection}{获取词对齐}{143}{section*.130}%
-\defcounter {refsection}{0}\relax 
-\contentsline {subsubsection}{度量双语短语质量}{144}{section*.132}%
-\defcounter {refsection}{0}\relax 
-\contentsline {subsection}{\numberline {4.2.4}调序}{146}{subsection.4.2.4}%
-\defcounter {refsection}{0}\relax 
-\contentsline {subsubsection}{基于距离的调序}{146}{section*.136}%
-\defcounter {refsection}{0}\relax 
-\contentsline {subsubsection}{基于方向的调序}{147}{section*.138}%
-\defcounter {refsection}{0}\relax 
-\contentsline {subsubsection}{基于分类的调序}{148}{section*.141}%
-\defcounter {refsection}{0}\relax 
-\contentsline {subsection}{\numberline {4.2.5}特征}{149}{subsection.4.2.5}%
-\defcounter {refsection}{0}\relax 
-\contentsline {subsection}{\numberline {4.2.6}最小错误率训练}{149}{subsection.4.2.6}%
-\defcounter {refsection}{0}\relax 
-\contentsline {subsection}{\numberline {4.2.7}栈解码}{153}{subsection.4.2.7}%
-\defcounter {refsection}{0}\relax 
-\contentsline {subsubsection}{翻译候选匹配}{154}{section*.146}%
-\defcounter {refsection}{0}\relax 
-\contentsline {subsubsection}{翻译假设扩展}{154}{section*.148}%
-\defcounter {refsection}{0}\relax 
-\contentsline {subsubsection}{剪枝}{155}{section*.150}%
-\defcounter {refsection}{0}\relax 
-\contentsline {subsubsection}{解码中的栈结构}{156}{section*.152}%
-\defcounter {refsection}{0}\relax 
-\contentsline {section}{\numberline {4.3}基于层次短语的模型}{157}{section.4.3}%
-\defcounter {refsection}{0}\relax 
-\contentsline {subsection}{\numberline {4.3.1}同步上下文无关文法}{159}{subsection.4.3.1}%
-\defcounter {refsection}{0}\relax 
-\contentsline {subsubsection}{文法定义}{160}{section*.157}%
-\defcounter {refsection}{0}\relax 
-\contentsline {subsubsection}{推导}{161}{section*.158}%
-\defcounter {refsection}{0}\relax 
-\contentsline {subsubsection}{胶水规则}{162}{section*.159}%
-\defcounter {refsection}{0}\relax 
-\contentsline {subsubsection}{处理流程}{163}{section*.160}%
-\defcounter {refsection}{0}\relax 
-\contentsline {subsection}{\numberline {4.3.2}层次短语规则抽取}{163}{subsection.4.3.2}%
-\defcounter {refsection}{0}\relax 
-\contentsline {subsection}{\numberline {4.3.3}翻译模型及特征}{165}{subsection.4.3.3}%
-\defcounter {refsection}{0}\relax 
-\contentsline {subsection}{\numberline {4.3.4}CYK解码}{166}{subsection.4.3.4}%
-\defcounter {refsection}{0}\relax 
-\contentsline {subsection}{\numberline {4.3.5}立方剪枝}{169}{subsection.4.3.5}%
-\defcounter {refsection}{0}\relax 
-\contentsline {section}{\numberline {4.4}基于语言学句法的模型}{172}{section.4.4}%
-\defcounter {refsection}{0}\relax 
-\contentsline {subsection}{\numberline {4.4.1}基于句法的翻译模型分类}{173}{subsection.4.4.1}%
-\defcounter {refsection}{0}\relax 
-\contentsline {subsection}{\numberline {4.4.2}基于树结构的文法}{176}{subsection.4.4.2}%
-\defcounter {refsection}{0}\relax 
-\contentsline {subsubsection}{树到树翻译规则}{177}{section*.176}%
-\defcounter {refsection}{0}\relax 
-\contentsline {subsubsection}{基于树结构的翻译推导}{178}{section*.178}%
-\defcounter {refsection}{0}\relax 
-\contentsline {subsubsection}{树到串翻译规则}{180}{section*.181}%
-\defcounter {refsection}{0}\relax 
-\contentsline {subsection}{\numberline {4.4.3}树到串翻译规则抽取}{181}{subsection.4.4.3}%
-\defcounter {refsection}{0}\relax 
-\contentsline {subsubsection}{树的切割与最小规则}{182}{section*.183}%
-\defcounter {refsection}{0}\relax 
-\contentsline {subsubsection}{空对齐处理}{186}{section*.189}%
-\defcounter {refsection}{0}\relax 
-\contentsline {subsubsection}{组合规则}{186}{section*.191}%
-\defcounter {refsection}{0}\relax 
-\contentsline {subsubsection}{SPMT规则}{187}{section*.193}%
-\defcounter {refsection}{0}\relax 
-\contentsline {subsubsection}{句法树二叉化}{188}{section*.195}%
-\defcounter {refsection}{0}\relax 
-\contentsline {subsection}{\numberline {4.4.4}树到树翻译规则抽取}{189}{subsection.4.4.4}%
-\defcounter {refsection}{0}\relax 
-\contentsline {subsubsection}{基于节点对齐的规则抽取}{190}{section*.199}%
-\defcounter {refsection}{0}\relax 
-\contentsline {subsubsection}{基于对齐矩阵的规则抽取}{191}{section*.202}%
-\defcounter {refsection}{0}\relax 
-\contentsline {subsection}{\numberline {4.4.5}句法翻译模型的特征}{193}{subsection.4.4.5}%
-\defcounter {refsection}{0}\relax 
-\contentsline {subsection}{\numberline {4.4.6}基于超图的推导空间表示}{194}{subsection.4.4.6}%
-\defcounter {refsection}{0}\relax 
-\contentsline {subsection}{\numberline {4.4.7}基于树的解码 vs 基于串的解码}{196}{subsection.4.4.7}%
-\defcounter {refsection}{0}\relax 
-\contentsline {subsubsection}{基于树的解码}{197}{section*.209}%
-\defcounter {refsection}{0}\relax 
-\contentsline {subsubsection}{基于串的解码}{198}{section*.212}%
-\defcounter {refsection}{0}\relax 
-\contentsline {section}{\numberline {4.5}小结及深入阅读}{200}{section.4.5}%
-\defcounter {refsection}{0}\relax 
-\contentsline {part}{\@mypartnumtocformat {III}{神经机器翻译}}{203}{part.3}%
-\ttl@stoptoc {default@2}
-\ttl@starttoc {default@3}
-\defcounter {refsection}{0}\relax 
-\contentsline {chapter}{\numberline {5}人工神经网络和神经语言建模}{205}{chapter.5}%
-\defcounter {refsection}{0}\relax 
-\contentsline {section}{\numberline {5.1}深度学习与人工神经网络}{206}{section.5.1}%
-\defcounter {refsection}{0}\relax 
-\contentsline {subsection}{\numberline {5.1.1}发展简史}{206}{subsection.5.1.1}%
-\defcounter {refsection}{0}\relax 
-\contentsline {subsubsection}{早期的人工神经网络和第一次寒冬}{206}{section*.214}%
-\defcounter {refsection}{0}\relax 
-\contentsline {subsubsection}{神经网络的第二次高潮和第二次寒冬}{207}{section*.215}%
-\defcounter {refsection}{0}\relax 
-\contentsline {subsubsection}{深度学习和神经网络方法的崛起}{208}{section*.216}%
-\defcounter {refsection}{0}\relax 
-\contentsline {subsection}{\numberline {5.1.2}为什么需要深度学习}{209}{subsection.5.1.2}%
-\defcounter {refsection}{0}\relax 
-\contentsline {subsubsection}{端到端学习和表示学习}{209}{section*.218}%
-\defcounter {refsection}{0}\relax 
-\contentsline {subsubsection}{深度学习的效果}{210}{section*.220}%
-\defcounter {refsection}{0}\relax 
-\contentsline {section}{\numberline {5.2}神经网络基础}{210}{section.5.2}%
-\defcounter {refsection}{0}\relax 
-\contentsline {subsection}{\numberline {5.2.1}线性代数基础}{210}{subsection.5.2.1}%
-\defcounter {refsection}{0}\relax 
-\contentsline {subsubsection}{标量、向量和矩阵}{211}{section*.222}%
-\defcounter {refsection}{0}\relax 
-\contentsline {subsubsection}{矩阵的转置}{212}{section*.223}%
-\defcounter {refsection}{0}\relax 
-\contentsline {subsubsection}{矩阵加法和数乘}{212}{section*.224}%
-\defcounter {refsection}{0}\relax 
-\contentsline {subsubsection}{矩阵乘法和矩阵点乘}{213}{section*.225}%
-\defcounter {refsection}{0}\relax 
-\contentsline {subsubsection}{线性映射}{214}{section*.226}%
-\defcounter {refsection}{0}\relax 
-\contentsline {subsubsection}{范数}{215}{section*.227}%
-\defcounter {refsection}{0}\relax 
-\contentsline {subsection}{\numberline {5.2.2}人工神经元和感知机}{216}{subsection.5.2.2}%
-\defcounter {refsection}{0}\relax 
-\contentsline {subsubsection}{感知机\ \raisebox {0.5mm}{------}\ 最简单的人工神经元模型}{217}{section*.230}%
-\defcounter {refsection}{0}\relax 
-\contentsline {subsubsection}{神经元内部权重}{218}{section*.233}%
-\defcounter {refsection}{0}\relax 
-\contentsline {subsubsection}{神经元的输入\ \raisebox {0.5mm}{------}\ 离散 vs 连续}{219}{section*.235}%
-\defcounter {refsection}{0}\relax 
-\contentsline {subsubsection}{神经元内部的参数学习}{219}{section*.237}%
-\defcounter {refsection}{0}\relax 
-\contentsline {subsection}{\numberline {5.2.3}多层神经网络}{220}{subsection.5.2.3}%
-\defcounter {refsection}{0}\relax 
-\contentsline {subsubsection}{线性变换和激活函数}{220}{section*.239}%
-\defcounter {refsection}{0}\relax 
-\contentsline {subsubsection}{单层神经网络$\rightarrow $多层神经网络}{222}{section*.246}%
-\defcounter {refsection}{0}\relax 
-\contentsline {subsection}{\numberline {5.2.4}函数拟合能力}{223}{subsection.5.2.4}%
-\defcounter {refsection}{0}\relax 
-\contentsline {section}{\numberline {5.3}神经网络的张量实现}{227}{section.5.3}%
-\defcounter {refsection}{0}\relax 
-\contentsline {subsection}{\numberline {5.3.1} 张量及其计算}{228}{subsection.5.3.1}%
-\defcounter {refsection}{0}\relax 
-\contentsline {subsubsection}{张量}{228}{section*.256}%
-\defcounter {refsection}{0}\relax 
-\contentsline {subsubsection}{张量的矩阵乘法}{230}{section*.259}%
-\defcounter {refsection}{0}\relax 
-\contentsline {subsubsection}{张量的单元操作}{231}{section*.261}%
-\defcounter {refsection}{0}\relax 
-\contentsline {subsection}{\numberline {5.3.2}张量的物理存储形式}{232}{subsection.5.3.2}%
-\defcounter {refsection}{0}\relax 
-\contentsline {subsection}{\numberline {5.3.3}使用开源框架实现张量计算}{232}{subsection.5.3.3}%
-\defcounter {refsection}{0}\relax 
-\contentsline {subsection}{\numberline {5.3.4}前向传播与计算图}{234}{subsection.5.3.4}%
-\defcounter {refsection}{0}\relax 
-\contentsline {subsection}{\numberline {5.3.5}神经网络实例}{237}{subsection.5.3.5}%
-\defcounter {refsection}{0}\relax 
-\contentsline {section}{\numberline {5.4}神经网络的参数训练}{238}{section.5.4}%
-\defcounter {refsection}{0}\relax 
-\contentsline {subsection}{\numberline {5.4.1}损失函数}{239}{subsection.5.4.1}%
-\defcounter {refsection}{0}\relax 
-\contentsline {subsection}{\numberline {5.4.2}基于梯度的参数优化}{240}{subsection.5.4.2}%
-\defcounter {refsection}{0}\relax 
-\contentsline {subsubsection}{梯度下降}{240}{section*.279}%
-\defcounter {refsection}{0}\relax 
-\contentsline {subsubsection}{梯度获取}{242}{section*.281}%
-\defcounter {refsection}{0}\relax 
-\contentsline {subsubsection}{基于梯度的方法的变种和改进}{245}{section*.285}%
-\defcounter {refsection}{0}\relax 
-\contentsline {subsection}{\numberline {5.4.3}参数更新的并行化策略}{248}{subsection.5.4.3}%
-\defcounter {refsection}{0}\relax 
-\contentsline {subsection}{\numberline {5.4.4}梯度消失、梯度爆炸和稳定性训练}{250}{subsection.5.4.4}%
-\defcounter {refsection}{0}\relax 
-\contentsline {subsubsection}{易于优化的激活函数}{250}{section*.288}%
-\defcounter {refsection}{0}\relax 
-\contentsline {subsubsection}{梯度裁剪}{251}{section*.292}%
-\defcounter {refsection}{0}\relax 
-\contentsline {subsubsection}{稳定性训练}{252}{section*.293}%
-\defcounter {refsection}{0}\relax 
-\contentsline {subsection}{\numberline {5.4.5}过拟合}{253}{subsection.5.4.5}%
-\defcounter {refsection}{0}\relax 
-\contentsline {subsection}{\numberline {5.4.6}反向传播}{254}{subsection.5.4.6}%
-\defcounter {refsection}{0}\relax 
-\contentsline {subsubsection}{输出层的反向传播}{255}{section*.296}%
-\defcounter {refsection}{0}\relax 
-\contentsline {subsubsection}{隐藏层的反向传播}{257}{section*.300}%
-\defcounter {refsection}{0}\relax 
-\contentsline {subsubsection}{程序实现}{258}{section*.303}%
-\defcounter {refsection}{0}\relax 
-\contentsline {section}{\numberline {5.5}神经语言模型}{259}{section.5.5}%
-\defcounter {refsection}{0}\relax 
-\contentsline {subsection}{\numberline {5.5.1}基于神经网络的语言建模}{260}{subsection.5.5.1}%
-\defcounter {refsection}{0}\relax 
-\contentsline {subsubsection}{基于前馈神经网络的语言模型}{261}{section*.306}%
-\defcounter {refsection}{0}\relax 
-\contentsline {subsubsection}{基于循环神经网络的语言模型}{263}{section*.309}%
-\defcounter {refsection}{0}\relax 
-\contentsline {subsubsection}{基于自注意力机制的语言模型}{265}{section*.311}%
-\defcounter {refsection}{0}\relax 
-\contentsline {subsubsection}{语言模型的评价}{266}{section*.313}%
-\defcounter {refsection}{0}\relax 
-\contentsline {subsection}{\numberline {5.5.2}单词表示模型}{266}{subsection.5.5.2}%
-\defcounter {refsection}{0}\relax 
-\contentsline {subsubsection}{One-hot编码}{266}{section*.314}%
-\defcounter {refsection}{0}\relax 
-\contentsline {subsubsection}{分布式表示}{267}{section*.316}%
-\defcounter {refsection}{0}\relax 
-\contentsline {subsection}{\numberline {5.5.3}句子表示模型及预训练}{268}{subsection.5.5.3}%
-\defcounter {refsection}{0}\relax 
-\contentsline {subsubsection}{简单的上下文表示模型}{269}{section*.320}%
-\defcounter {refsection}{0}\relax 
-\contentsline {subsubsection}{ELMO模型}{270}{section*.323}%
-\defcounter {refsection}{0}\relax 
-\contentsline {subsubsection}{GPT模型}{270}{section*.325}%
-\defcounter {refsection}{0}\relax 
-\contentsline {subsubsection}{BERT模型}{271}{section*.327}%
-\defcounter {refsection}{0}\relax 
-\contentsline {subsubsection}{为什么要预训练？}{272}{section*.329}%
-\defcounter {refsection}{0}\relax 
-\contentsline {section}{\numberline {5.6}小结及深入阅读}{273}{section.5.6}%
-\defcounter {refsection}{0}\relax 
-\contentsline {chapter}{\numberline {6}神经机器翻译模型}{275}{chapter.6}%
-\defcounter {refsection}{0}\relax 
-\contentsline {section}{\numberline {6.1}神经机器翻译的发展简史}{275}{section.6.1}%
-\defcounter {refsection}{0}\relax 
-\contentsline {subsection}{\numberline {6.1.1}神经机器翻译的起源}{277}{subsection.6.1.1}%
-\defcounter {refsection}{0}\relax 
-\contentsline {subsection}{\numberline {6.1.2}神经机器翻译的品质 }{279}{subsection.6.1.2}%
-\defcounter {refsection}{0}\relax 
-\contentsline {subsection}{\numberline {6.1.3}神经机器翻译的优势 }{282}{subsection.6.1.3}%
-\defcounter {refsection}{0}\relax 
-\contentsline {section}{\numberline {6.2}编码器-解码器框架}{284}{section.6.2}%
-\defcounter {refsection}{0}\relax 
-\contentsline {subsection}{\numberline {6.2.1}框架结构}{284}{subsection.6.2.1}%
-\defcounter {refsection}{0}\relax 
-\contentsline {subsection}{\numberline {6.2.2}表示学习}{285}{subsection.6.2.2}%
-\defcounter {refsection}{0}\relax 
-\contentsline {subsection}{\numberline {6.2.3}简单的运行实例}{286}{subsection.6.2.3}%
-\defcounter {refsection}{0}\relax 
-\contentsline {subsection}{\numberline {6.2.4}机器翻译范式的对比}{287}{subsection.6.2.4}%
-\defcounter {refsection}{0}\relax 
-\contentsline {section}{\numberline {6.3}基于循环神经网络的翻译模型及注意力机制}{288}{section.6.3}%
-\defcounter {refsection}{0}\relax 
-\contentsline {subsection}{\numberline {6.3.1}建模}{289}{subsection.6.3.1}%
-\defcounter {refsection}{0}\relax 
-\contentsline {subsection}{\numberline {6.3.2}输入（词嵌入）及输出（Softmax）}{292}{subsection.6.3.2}%
-\defcounter {refsection}{0}\relax 
-\contentsline {subsection}{\numberline {6.3.3}循环神经网络结构}{295}{subsection.6.3.3}%
-\defcounter {refsection}{0}\relax 
-\contentsline {subsubsection}{循环神经单元（RNN）}{295}{section*.351}%
-\defcounter {refsection}{0}\relax 
-\contentsline {subsubsection}{长短时记忆网络（LSTM）}{296}{section*.352}%
-\defcounter {refsection}{0}\relax 
-\contentsline {subsubsection}{门控循环单元（GRU）}{297}{section*.355}%
-\defcounter {refsection}{0}\relax 
-\contentsline {subsubsection}{双向模型}{300}{section*.357}%
-\defcounter {refsection}{0}\relax 
-\contentsline {subsubsection}{多层循环神经网络}{300}{section*.359}%
-\defcounter {refsection}{0}\relax 
-\contentsline {subsection}{\numberline {6.3.4}注意力机制}{301}{subsection.6.3.4}%
-\defcounter {refsection}{0}\relax 
-\contentsline {subsubsection}{翻译中的注意力机制}{302}{section*.362}%
-\defcounter {refsection}{0}\relax 
-\contentsline {subsubsection}{上下文向量的计算}{303}{section*.365}%
-\defcounter {refsection}{0}\relax 
-\contentsline {subsubsection}{注意力机制的解读}{306}{section*.370}%
-\defcounter {refsection}{0}\relax 
-\contentsline {subsection}{\numberline {6.3.5}训练}{308}{subsection.6.3.5}%
-\defcounter {refsection}{0}\relax 
-\contentsline {subsubsection}{损失函数}{308}{section*.373}%
-\defcounter {refsection}{0}\relax 
-\contentsline {subsubsection}{长参数初始化}{309}{section*.374}%
-\defcounter {refsection}{0}\relax 
-\contentsline {subsubsection}{优化策略}{310}{section*.375}%
-\defcounter {refsection}{0}\relax 
-\contentsline {subsubsection}{梯度裁剪}{310}{section*.377}%
-\defcounter {refsection}{0}\relax 
-\contentsline {subsubsection}{学习率策略}{310}{section*.378}%
-\defcounter {refsection}{0}\relax 
-\contentsline {subsubsection}{并行训练}{312}{section*.381}%
-\defcounter {refsection}{0}\relax 
-\contentsline {subsection}{\numberline {6.3.6}推断}{313}{subsection.6.3.6}%
-\defcounter {refsection}{0}\relax 
-\contentsline {subsubsection}{贪婪搜索}{315}{section*.385}%
-\defcounter {refsection}{0}\relax 
-\contentsline {subsubsection}{束搜索}{316}{section*.388}%
-\defcounter {refsection}{0}\relax 
-\contentsline {subsubsection}{长度惩罚}{317}{section*.390}%
-\defcounter {refsection}{0}\relax 
-\contentsline {subsection}{\numberline {6.3.7}实例-GNMT}{318}{subsection.6.3.7}%
-\defcounter {refsection}{0}\relax 
-\contentsline {section}{\numberline {6.4}Transformer}{319}{section.6.4}%
-\defcounter {refsection}{0}\relax 
-\contentsline {subsection}{\numberline {6.4.1}自注意力模型}{321}{subsection.6.4.1}%
-\defcounter {refsection}{0}\relax 
-\contentsline {subsection}{\numberline {6.4.2}Transformer架构}{322}{subsection.6.4.2}%
-\defcounter {refsection}{0}\relax 
-\contentsline {subsection}{\numberline {6.4.3}位置编码}{324}{subsection.6.4.3}%
-\defcounter {refsection}{0}\relax 
-\contentsline {subsection}{\numberline {6.4.4}基于点乘的注意力机制}{326}{subsection.6.4.4}%
-\defcounter {refsection}{0}\relax 
-\contentsline {subsection}{\numberline {6.4.5}掩码操作}{328}{subsection.6.4.5}%
-\defcounter {refsection}{0}\relax 
-\contentsline {subsection}{\numberline {6.4.6}多头注意力}{329}{subsection.6.4.6}%
-\defcounter {refsection}{0}\relax 
-\contentsline {subsection}{\numberline {6.4.7}残差网络和层正则化}{331}{subsection.6.4.7}%
-\defcounter {refsection}{0}\relax 
-\contentsline {subsection}{\numberline {6.4.8}前馈全连接网络子层}{332}{subsection.6.4.8}%
-\defcounter {refsection}{0}\relax 
-\contentsline {subsection}{\numberline {6.4.9}训练}{333}{subsection.6.4.9}%
-\defcounter {refsection}{0}\relax 
-\contentsline {subsection}{\numberline {6.4.10}推断}{336}{subsection.6.4.10}%
-\defcounter {refsection}{0}\relax 
-\contentsline {section}{\numberline {6.5}序列到序列问题及应用}{337}{section.6.5}%
-\defcounter {refsection}{0}\relax 
-\contentsline {subsection}{\numberline {6.5.1}自动问答}{337}{subsection.6.5.1}%
-\defcounter {refsection}{0}\relax 
-\contentsline {subsection}{\numberline {6.5.2}自动文摘}{337}{subsection.6.5.2}%
-\defcounter {refsection}{0}\relax 
-\contentsline {subsection}{\numberline {6.5.3}文言文翻译}{337}{subsection.6.5.3}%
-\defcounter {refsection}{0}\relax 
-\contentsline {subsection}{\numberline {6.5.4}对联生成}{339}{subsection.6.5.4}%
-\defcounter {refsection}{0}\relax 
-\contentsline {subsection}{\numberline {6.5.5}古诗生成}{339}{subsection.6.5.5}%
-\defcounter {refsection}{0}\relax 
-\contentsline {section}{\numberline {6.6}小结及深入阅读}{339}{section.6.6}%
-\defcounter {refsection}{0}\relax 
-\contentsline {part}{\@mypartnumtocformat {IV}{附录}}{343}{part.4}%
-\ttl@stoptoc {default@3}
-\ttl@starttoc {default@4}
-\defcounter {refsection}{0}\relax 
-\contentsline {chapter}{\numberline {A}附录A}{345}{appendix.1.A}%
-\defcounter {refsection}{0}\relax 
-\contentsline {chapter}{\numberline {B}附录B}{347}{appendix.2.B}%
-\defcounter {refsection}{0}\relax 
-\contentsline {section}{\numberline {B.1}IBM模型3训练方法}{347}{section.2.B.1}%
-\defcounter {refsection}{0}\relax 
-\contentsline {section}{\numberline {B.2}IBM模型4训练方法}{349}{section.2.B.2}%
-\defcounter {refsection}{0}\relax 
-\contentsline {section}{\numberline {B.3}IBM模型5训练方法}{351}{section.2.B.3}%
-\contentsfinish