Commit 8d1ff05e by xiaotong

minor updates

parent ef6e3eae
...@@ -51,10 +51,10 @@ ...@@ -51,10 +51,10 @@
\end{pgfonlayer} \end{pgfonlayer}
{ {
\node [anchor=south] (rule1label) at ([xshift=1em]rule1s.north west) {\footnotesize\sffamily\bfseries{\red{正确的规则}}}; \node [anchor=south] (rule1label) at ([xshift=1em]rule1s.north west) {{\footnotesize\red{正确的规则}}};
} }
{ {
\node [anchor=north west,align=left] (rule2label) at (rule2s.north east) {\footnotesize{\sffamily\bfseries{\color{blue} 错误的规则}}\\\footnotesize{因为``satisfied''会}\\\footnotesize{对齐到规则外,}\\\footnotesize{也就是这条规则}\\\footnotesize{与词对齐不相容}}; \node [anchor=north west,align=left] (rule2label) at (rule2s.north east) {\footnotesize{{\color{blue} 错误的规则}}\\\footnotesize{因为``satisfied''会}\\\footnotesize{对齐到规则外,}\\\footnotesize{也就是这条规则}\\\footnotesize{与词对齐不相容}};
} }
\end{scope} \end{scope}
......
...@@ -986,7 +986,7 @@ f(x)=\begin{cases} 0 & x\le 0 \\x & x>0\end{cases} ...@@ -986,7 +986,7 @@ f(x)=\begin{cases} 0 & x\le 0 \\x & x>0\end{cases}
\begin{figure}[htp] \begin{figure}[htp]
\centering \centering
\input{./Chapter5/Figures/fig-save} \input{./Chapter5/Figures/fig-save}
\caption{1阶、2阶、3阶张量的物理存储} \caption{1阶(a)、2阶(b)、3阶张量(c)的物理存储}
\label{fig:save} \label{fig:save}
\end{figure} \end{figure}
%------------------------------------------- %-------------------------------------------
......
...@@ -1646,7 +1646,7 @@ L_{\textrm{seq}} = - \textrm{logP}_{\textrm{s}}(\hat{\textbf{y}} | \textbf{x}) ...@@ -1646,7 +1646,7 @@ L_{\textrm{seq}} = - \textrm{logP}_{\textrm{s}}(\hat{\textbf{y}} | \textbf{x})
\begin{itemize} \begin{itemize}
\item 无指导机器翻译。无指导机器翻译由于其不需要双语语料即可训练翻译模型的特性,在稀缺资源机器翻译的场景中有非常大的潜力而得到广泛的关注。目前无指导机器翻译主要有两种范式:第一种先得到词典的翻译,然后得到短语表的翻译和相应的统计机器翻译系统,最后使用统计机器翻译系统生成伪双语平行语料训练神经机器翻译系统\cite{DBLP:conf/acl/ArtetxeLA19};第二种是先预训练语言模型来初始化神经机器翻译系统的编码器和解码器,然后使用翻译中回译以及降噪自编码器来训练神经机器翻译系统\cite{lample2019cross}。尽管目前无指导机器翻译在富资源的语种上取得了很大进展,但是离实际应用还有很远距离。比如,目前无指导系统都依赖于大量单语数据,而实际上稀缺资源的语种不但双语语料少,单语语料也少;此外,这些系统还无法在远距离如中英这些字母表重合少,需要大范围调序的语种对上取得可接受的结果;使用大量单语训练无指导系统还面临数据来自于不同领域的问题\cite{DBLP:journals/corr/abs-2004-05516}。设计更鲁棒,使用单语数据更高效的无指导机器翻译方法乃至新范式会是未来的趋势。 \item 无指导机器翻译。无指导机器翻译由于其不需要双语语料即可训练翻译模型的特性,在稀缺资源机器翻译的场景中有非常大的潜力而得到广泛的关注。目前无指导机器翻译主要有两种范式:第一种先得到词典的翻译,然后得到短语表的翻译和相应的统计机器翻译系统,最后使用统计机器翻译系统生成伪双语平行语料训练神经机器翻译系统\cite{DBLP:conf/acl/ArtetxeLA19};第二种是先预训练语言模型来初始化神经机器翻译系统的编码器和解码器,然后使用翻译中回译以及降噪自编码器来训练神经机器翻译系统\cite{lample2019cross}。尽管目前无指导机器翻译在富资源的语种上取得了很大进展,但是离实际应用还有很远距离。比如,目前无指导系统都依赖于大量单语数据,而实际上稀缺资源的语种不但双语语料少,单语语料也少;此外,这些系统还无法在远距离如中英这些字母表重合少,需要大范围调序的语种对上取得可接受的结果;使用大量单语训练无指导系统还面临数据来自于不同领域的问题\cite{DBLP:journals/corr/abs-2004-05516}。设计更鲁棒,使用单语数据更高效的无指导机器翻译方法乃至新范式会是未来的趋势。
\vspace{0.5em} \vspace{0.5em}
\item 更多上下文信息的建模,多模态、基于树的模型、篇章翻译。由于人类语言潜在的歧义性,传统的神经机器翻译在单句翻译中可能会出现歧义。为此,一些研究工作在翻译过程中尝试引入更多的上下文信息,比如多模态翻译、基于树的翻译或者篇章级翻译。多模态翻译的目标就是在给定一个图片和其源语描述的情况下,生成目标语言的描述。一般做法就是通过一个额外的编码器来提取图像特征\cite{elliott2015multilingual,DBLP:conf/acl/HitschlerSR16},然后通过权重门控机制、注意力网络等融合到系统中\cite{DBLP:conf/wmt/HuangLSOD16} \item 更多上下文信息的建模。由于人类语言潜在的歧义性,传统的神经机器翻译在单句翻译中可能会出现歧义。为此,一些研究工作在翻译过程中尝试引入更多的上下文信息,比如多模态翻译、基于树的翻译或者篇章级翻译。多模态翻译的目标就是在给定一个图片和其源语描述的情况下,生成目标语言的描述。一般做法就是通过一个额外的编码器来提取图像特征\cite{elliott2015multilingual,DBLP:conf/acl/HitschlerSR16},然后通过权重门控机制、注意力网络等融合到系统中\cite{DBLP:conf/wmt/HuangLSOD16}
\parinterval 基于树的翻译是指在翻译模型中引入句法结构树或依存树,从而引入更多的句法信息。一种常用的做法是将句法树进行序列化,从而保留序列到序列的模型结构\cite{DBLP:conf/emnlp/CurreyH18,DBLP:conf/acl/SaundersSGB18,DBLP:conf/wmt/NadejdeRSDJKB17}。在此基础上,一些研究工作引入了更多的解析结果\cite{DBLP:conf/acl/SumitaUZTM18,DBLP:conf/coling/ZaremoodiH18}。同时,也有一些研究工作直接使用Tree-LSTMs等网络结构\cite{DBLP:conf/acl/TaiSM15,DBLP:conf/iclr/ShenTSC19}来直接表示树结构,并将其应用到神经机器翻译模型中\cite{DBLP:conf/acl/EriguchiHT16,Yang2017TowardsBH,DBLP:conf/acl/ChenHCC17} \parinterval 基于树的翻译是指在翻译模型中引入句法结构树或依存树,从而引入更多的句法信息。一种常用的做法是将句法树进行序列化,从而保留序列到序列的模型结构\cite{DBLP:conf/emnlp/CurreyH18,DBLP:conf/acl/SaundersSGB18,DBLP:conf/wmt/NadejdeRSDJKB17}。在此基础上,一些研究工作引入了更多的解析结果\cite{DBLP:conf/acl/SumitaUZTM18,DBLP:conf/coling/ZaremoodiH18}。同时,也有一些研究工作直接使用Tree-LSTMs等网络结构\cite{DBLP:conf/acl/TaiSM15,DBLP:conf/iclr/ShenTSC19}来直接表示树结构,并将其应用到神经机器翻译模型中\cite{DBLP:conf/acl/EriguchiHT16,Yang2017TowardsBH,DBLP:conf/acl/ChenHCC17}
......
\indexentry{Chapter4.1|hyperpage}{7} \indexentry{Chapter1.1|hyperpage}{15}
\indexentry{Chapter4.1.1|hyperpage}{8} \indexentry{Chapter1.2|hyperpage}{18}
\indexentry{Chapter4.1.2|hyperpage}{10} \indexentry{Chapter1.3|hyperpage}{23}
\indexentry{Chapter4.2|hyperpage}{12} \indexentry{Chapter1.4|hyperpage}{24}
\indexentry{Chapter4.2.1|hyperpage}{12} \indexentry{Chapter1.4.1|hyperpage}{26}
\indexentry{Chapter4.2.2|hyperpage}{15} \indexentry{Chapter1.4.2|hyperpage}{26}
\indexentry{Chapter4.2.2.1|hyperpage}{15} \indexentry{Chapter1.4.3|hyperpage}{27}
\indexentry{Chapter4.2.2.2|hyperpage}{16} \indexentry{Chapter1.4.4|hyperpage}{28}
\indexentry{Chapter4.2.2.3|hyperpage}{17} \indexentry{Chapter1.4.5|hyperpage}{29}
\indexentry{Chapter4.2.3|hyperpage}{18} \indexentry{Chapter1.5|hyperpage}{30}
\indexentry{Chapter4.2.3.1|hyperpage}{19} \indexentry{Chapter1.5.1|hyperpage}{30}
\indexentry{Chapter4.2.3.2|hyperpage}{20} \indexentry{Chapter1.5.2|hyperpage}{31}
\indexentry{Chapter4.2.3.3|hyperpage}{21} \indexentry{Chapter1.5.2.1|hyperpage}{31}
\indexentry{Chapter4.2.4|hyperpage}{22} \indexentry{Chapter1.5.2.2|hyperpage}{33}
\indexentry{Chapter4.2.4.1|hyperpage}{22} \indexentry{Chapter1.5.2.3|hyperpage}{33}
\indexentry{Chapter4.2.4.2|hyperpage}{23} \indexentry{Chapter1.6|hyperpage}{34}
\indexentry{Chapter4.2.4.3|hyperpage}{25} \indexentry{Chapter1.7|hyperpage}{36}
\indexentry{Chapter4.2.5|hyperpage}{25} \indexentry{Chapter1.7.1|hyperpage}{36}
\indexentry{Chapter4.2.6|hyperpage}{26} \indexentry{Chapter1.7.1.1|hyperpage}{37}
\indexentry{Chapter4.2.7|hyperpage}{29} \indexentry{Chapter1.7.1.2|hyperpage}{38}
\indexentry{Chapter4.2.7.1|hyperpage}{30} \indexentry{Chapter1.7.2|hyperpage}{40}
\indexentry{Chapter4.2.7.2|hyperpage}{30} \indexentry{Chapter1.8|hyperpage}{42}
\indexentry{Chapter4.2.7.3|hyperpage}{31} \indexentry{分词|hyperpage}{48}
\indexentry{Chapter4.2.7.4|hyperpage}{32} \indexentry{Segmentation|hyperpage}{48}
\indexentry{Chapter4.3|hyperpage}{33} \indexentry{句法分析|hyperpage}{49}
\indexentry{Chapter4.3.1|hyperpage}{36} \indexentry{Parsing|hyperpage}{49}
\indexentry{Chapter4.3.1.1|hyperpage}{37} \indexentry{预处理|hyperpage}{49}
\indexentry{Chapter4.3.1.2|hyperpage}{38} \indexentry{Pre-processing|hyperpage}{49}
\indexentry{Chapter4.3.1.3|hyperpage}{39} \indexentry{后处理|hyperpage}{49}
\indexentry{Chapter4.3.1.4|hyperpage}{40} \indexentry{Post-processing|hyperpage}{49}
\indexentry{Chapter4.3.2|hyperpage}{40} \indexentry{事件|hyperpage}{49}
\indexentry{Chapter4.3.3|hyperpage}{41} \indexentry{Event|hyperpage}{49}
\indexentry{Chapter4.3.4|hyperpage}{42} \indexentry{随机事件|hyperpage}{50}
\indexentry{Chapter4.3.5|hyperpage}{46} \indexentry{随机变量|hyperpage}{50}
\indexentry{Chapter4.4|hyperpage}{49} \indexentry{Random Variable|hyperpage}{50}
\indexentry{Chapter4.4.1|hyperpage}{51} \indexentry{概率|hyperpage}{50}
\indexentry{Chapter4.4.2|hyperpage}{51} \indexentry{Probability|hyperpage}{50}
\indexentry{Chapter4.4.2.1|hyperpage}{53} \indexentry{估计|hyperpage}{50}
\indexentry{Chapter4.4.2.2|hyperpage}{55} \indexentry{估计值|hyperpage}{50}
\indexentry{Chapter4.4.2.3|hyperpage}{57} \indexentry{Estimate|hyperpage}{50}
\indexentry{Chapter4.4.3|hyperpage}{58} \indexentry{概率分布函数|hyperpage}{51}
\indexentry{Chapter4.4.3.1|hyperpage}{59} \indexentry{概率密度函数|hyperpage}{51}
\indexentry{Chapter4.4.3.2|hyperpage}{62} \indexentry{联合概率|hyperpage}{51}
\indexentry{Chapter4.4.3.3|hyperpage}{63} \indexentry{Joint Probability|hyperpage}{51}
\indexentry{Chapter4.4.3.4|hyperpage}{64} \indexentry{条件概率|hyperpage}{51}
\indexentry{Chapter4.4.3.5|hyperpage}{65} \indexentry{Conditional Probability|hyperpage}{51}
\indexentry{Chapter4.4.4|hyperpage}{66} \indexentry{边缘概率|hyperpage}{52}
\indexentry{Chapter4.4.4.1|hyperpage}{67} \indexentry{marginal probability|hyperpage}{52}
\indexentry{Chapter4.4.4.2|hyperpage}{67} \indexentry{全概率公式|hyperpage}{53}
\indexentry{Chapter4.4.5|hyperpage}{68} \indexentry{Law of Total Probability|hyperpage}{53}
\indexentry{Chapter4.4.5|hyperpage}{71} \indexentry{贝叶斯法则|hyperpage}{54}
\indexentry{Chapter4.4.7|hyperpage}{73} \indexentry{Bayes' rule|hyperpage}{54}
\indexentry{Chapter4.4.7.1|hyperpage}{74} \indexentry{熵|hyperpage}{55}
\indexentry{Chapter4.4.7.2|hyperpage}{76} \indexentry{Entropy|hyperpage}{55}
\indexentry{Chapter4.5|hyperpage}{77} \indexentry{自信息|hyperpage}{55}
\indexentry{Self-information|hyperpage}{55}
\indexentry{相对熵|hyperpage}{56}
\indexentry{Relative Entropy|hyperpage}{56}
\indexentry{交叉熵|hyperpage}{56}
\indexentry{Cross-entropy|hyperpage}{56}
\indexentry{分词|hyperpage}{57}
\indexentry{Segmentation|hyperpage}{57}
\indexentry{单词|hyperpage}{57}
\indexentry{Word|hyperpage}{57}
\indexentry{词|hyperpage}{57}
\indexentry{词法分析|hyperpage}{57}
\indexentry{Lexical Analysis|hyperpage}{57}
\indexentry{标注数据|hyperpage}{59}
\indexentry{Annotated Data|hyperpage}{59}
\indexentry{训练|hyperpage}{60}
\indexentry{Training|hyperpage}{60}
\indexentry{推断|hyperpage}{60}
\indexentry{Inference|hyperpage}{60}
\indexentry{参数估计|hyperpage}{61}
\indexentry{Parameter Estimation|hyperpage}{61}
\indexentry{偏置|hyperpage}{61}
\indexentry{Bias|hyperpage}{61}
\indexentry{语言模型|hyperpage}{65}
\indexentry{Language Model|hyperpage}{65}
\indexentry{语言建模|hyperpage}{65}
\indexentry{Language Modeling|hyperpage}{65}
\indexentry{极大似然估计|hyperpage}{66}
\indexentry{人工神经网络方法|hyperpage}{66}
\indexentry{未登录词|hyperpage}{67}
\indexentry{Out-of-Vocabulary Word,OOV Word|hyperpage}{67}
\indexentry{加法平滑|hyperpage}{68}
\indexentry{Additive Smoothing|hyperpage}{68}
\indexentry{古德-图灵估计法|hyperpage}{69}
\indexentry{Good-Turing Estimate|hyperpage}{69}
\indexentry{句法|hyperpage}{72}
\indexentry{Syntax|hyperpage}{72}
\indexentry{短语结构分析|hyperpage}{72}
\indexentry{Phrase Structure Parsing|hyperpage}{72}
\indexentry{依存分析|hyperpage}{72}
\indexentry{Dependency Parsing|hyperpage}{72}
\indexentry{成分分析|hyperpage}{73}
\indexentry{完全分析|hyperpage}{73}
\indexentry{Full Parsing|hyperpage}{73}
\indexentry{终结符|hyperpage}{73}
\indexentry{Terminal|hyperpage}{73}
\indexentry{预终结符|hyperpage}{73}
\indexentry{Pre-terminal|hyperpage}{73}
\indexentry{非终结符|hyperpage}{73}
\indexentry{Non-terminal|hyperpage}{73}
\indexentry{上下文无关文法|hyperpage}{74}
\indexentry{Context-Free Grammar|hyperpage}{74}
\indexentry{产生式规则|hyperpage}{75}
\indexentry{Production Rule|hyperpage}{75}
\indexentry{推导|hyperpage}{76}
\indexentry{Derivation|hyperpage}{76}
\indexentry{句子|hyperpage}{76}
\indexentry{Sentence|hyperpage}{76}
\indexentry{语言|hyperpage}{76}
\indexentry{Language|hyperpage}{76}
\indexentry{歧义|hyperpage}{76}
\indexentry{Ambiguity|hyperpage}{76}
\indexentry{消歧|hyperpage}{77}
\indexentry{Disambiguation|hyperpage}{77}
\indexentry{最左优先推导|hyperpage}{77}
\indexentry{Left-most Derivation|hyperpage}{77}
\indexentry{概率上下文无关文法|hyperpage}{78}
\indexentry{Probabilistic Context-Free Grammar|hyperpage}{78}
\indexentry{树库|hyperpage}{79}
\indexentry{Treebank|hyperpage}{79}
\indexentry{生成模型|hyperpage}{80}
\indexentry{Generative Model|hyperpage}{80}
\indexentry{判别模型|hyperpage}{80}
\indexentry{Discriminative Model|hyperpage}{80}
\indexentry{Chapter3.1|hyperpage}{85}
\indexentry{Chapter3.2|hyperpage}{87}
\indexentry{Chapter3.2.1|hyperpage}{87}
\indexentry{Chapter3.2.1.1|hyperpage}{87}
\indexentry{Chapter3.2.1.2|hyperpage}{88}
\indexentry{Chapter3.2.1.3|hyperpage}{89}
\indexentry{Chapter3.2.2|hyperpage}{89}
\indexentry{Chapter3.2.3|hyperpage}{90}
\indexentry{Chapter3.2.3.1|hyperpage}{90}
\indexentry{Chapter3.2.3.2|hyperpage}{90}
\indexentry{Chapter3.2.3.3|hyperpage}{92}
\indexentry{Chapter3.2.4|hyperpage}{93}
\indexentry{Chapter3.2.4.1|hyperpage}{93}
\indexentry{Chapter3.2.4.2|hyperpage}{95}
\indexentry{Chapter3.2.5|hyperpage}{97}
\indexentry{Chapter3.3|hyperpage}{100}
\indexentry{Chapter3.3.1|hyperpage}{100}
\indexentry{Chapter3.3.2|hyperpage}{102}
\indexentry{Chapter3.3.2.1|hyperpage}{103}
\indexentry{Chapter3.3.2.2|hyperpage}{103}
\indexentry{Chapter3.3.2.3|hyperpage}{105}
\indexentry{Chapter3.4|hyperpage}{106}
\indexentry{Chapter3.4.1|hyperpage}{106}
\indexentry{Chapter3.4.2|hyperpage}{108}
\indexentry{Chapter3.4.3|hyperpage}{109}
\indexentry{Chapter3.4.4|hyperpage}{110}
\indexentry{Chapter3.4.4.1|hyperpage}{110}
\indexentry{Chapter3.4.4.2|hyperpage}{111}
\indexentry{Chapter3.5|hyperpage}{117}
\indexentry{Chapter3.5.1|hyperpage}{117}
\indexentry{Chapter3.5.2|hyperpage}{120}
\indexentry{Chapter3.5.3|hyperpage}{121}
\indexentry{Chapter3.5.4|hyperpage}{123}
\indexentry{Chapter3.5.5|hyperpage}{124}
\indexentry{Chapter3.5.5|hyperpage}{127}
\indexentry{Chapter3.6|hyperpage}{127}
\indexentry{Chapter3.6.1|hyperpage}{127}
\indexentry{Chapter3.6.2|hyperpage}{128}
\indexentry{Chapter3.6.4|hyperpage}{129}
\indexentry{Chapter3.6.5|hyperpage}{130}
\indexentry{Chapter3.7|hyperpage}{130}
\indexentry{Chapter4.1|hyperpage}{133}
\indexentry{Chapter4.1.1|hyperpage}{134}
\indexentry{Chapter4.1.2|hyperpage}{136}
\indexentry{Chapter4.2|hyperpage}{138}
\indexentry{Chapter4.2.1|hyperpage}{138}
\indexentry{Chapter4.2.2|hyperpage}{141}
\indexentry{Chapter4.2.2.1|hyperpage}{141}
\indexentry{Chapter4.2.2.2|hyperpage}{142}
\indexentry{Chapter4.2.2.3|hyperpage}{143}
\indexentry{Chapter4.2.3|hyperpage}{144}
\indexentry{Chapter4.2.3.1|hyperpage}{145}
\indexentry{Chapter4.2.3.2|hyperpage}{146}
\indexentry{Chapter4.2.3.3|hyperpage}{147}
\indexentry{Chapter4.2.4|hyperpage}{148}
\indexentry{Chapter4.2.4.1|hyperpage}{148}
\indexentry{Chapter4.2.4.2|hyperpage}{149}
\indexentry{Chapter4.2.4.3|hyperpage}{151}
\indexentry{Chapter4.2.5|hyperpage}{151}
\indexentry{Chapter4.2.6|hyperpage}{152}
\indexentry{Chapter4.2.7|hyperpage}{155}
\indexentry{Chapter4.2.7.1|hyperpage}{156}
\indexentry{Chapter4.2.7.2|hyperpage}{156}
\indexentry{Chapter4.2.7.3|hyperpage}{157}
\indexentry{Chapter4.2.7.4|hyperpage}{158}
\indexentry{Chapter4.3|hyperpage}{159}
\indexentry{Chapter4.3.1|hyperpage}{162}
\indexentry{Chapter4.3.1.1|hyperpage}{163}
\indexentry{Chapter4.3.1.2|hyperpage}{164}
\indexentry{Chapter4.3.1.3|hyperpage}{165}
\indexentry{Chapter4.3.1.4|hyperpage}{166}
\indexentry{Chapter4.3.2|hyperpage}{166}
\indexentry{Chapter4.3.3|hyperpage}{167}
\indexentry{Chapter4.3.4|hyperpage}{168}
\indexentry{Chapter4.3.5|hyperpage}{172}
\indexentry{Chapter4.4|hyperpage}{175}
\indexentry{Chapter4.4.1|hyperpage}{177}
\indexentry{Chapter4.4.2|hyperpage}{177}
\indexentry{Chapter4.4.2.1|hyperpage}{179}
\indexentry{Chapter4.4.2.2|hyperpage}{181}
\indexentry{Chapter4.4.2.3|hyperpage}{183}
\indexentry{Chapter4.4.3|hyperpage}{184}
\indexentry{Chapter4.4.3.1|hyperpage}{185}
\indexentry{Chapter4.4.3.2|hyperpage}{188}
\indexentry{Chapter4.4.3.3|hyperpage}{189}
\indexentry{Chapter4.4.3.4|hyperpage}{190}
\indexentry{Chapter4.4.3.5|hyperpage}{191}
\indexentry{Chapter4.4.4|hyperpage}{192}
\indexentry{Chapter4.4.4.1|hyperpage}{193}
\indexentry{Chapter4.4.4.2|hyperpage}{193}
\indexentry{Chapter4.4.5|hyperpage}{194}
\indexentry{Chapter4.4.5|hyperpage}{197}
\indexentry{Chapter4.4.7|hyperpage}{199}
\indexentry{Chapter4.4.7.1|hyperpage}{200}
\indexentry{Chapter4.4.7.2|hyperpage}{202}
\indexentry{Chapter4.5|hyperpage}{203}
\indexentry{Chapter5.1|hyperpage}{210}
\indexentry{Chapter5.1.1|hyperpage}{210}
\indexentry{Chapter5.1.1.1|hyperpage}{210}
\indexentry{Chapter5.1.1.2|hyperpage}{211}
\indexentry{Chapter5.1.1.3|hyperpage}{212}
\indexentry{Chapter5.1.2|hyperpage}{213}
\indexentry{Chapter5.1.2.1|hyperpage}{213}
\indexentry{Chapter5.1.2.2|hyperpage}{214}
\indexentry{Chapter5.2|hyperpage}{214}
\indexentry{Chapter5.2.1|hyperpage}{214}
\indexentry{Chapter5.2.1.1|hyperpage}{215}
\indexentry{Chapter5.2.1.2|hyperpage}{216}
\indexentry{Chapter5.2.1.3|hyperpage}{216}
\indexentry{Chapter5.2.1.4|hyperpage}{217}
\indexentry{Chapter5.2.1.5|hyperpage}{218}
\indexentry{Chapter5.2.1.6|hyperpage}{219}
\indexentry{Chapter5.2.2|hyperpage}{220}
\indexentry{Chapter5.2.2.1|hyperpage}{221}
\indexentry{Chapter5.2.2.2|hyperpage}{222}
\indexentry{Chapter5.2.2.3|hyperpage}{223}
\indexentry{Chapter5.2.2.4|hyperpage}{223}
\indexentry{Chapter5.2.3|hyperpage}{224}
\indexentry{Chapter5.2.3.1|hyperpage}{224}
\indexentry{Chapter5.2.3.2|hyperpage}{226}
\indexentry{Chapter5.2.4|hyperpage}{227}
\indexentry{Chapter5.3|hyperpage}{231}
\indexentry{Chapter5.3.1|hyperpage}{232}
\indexentry{Chapter5.3.1.1|hyperpage}{232}
\indexentry{Chapter5.3.1.2|hyperpage}{234}
\indexentry{Chapter5.3.1.3|hyperpage}{235}
\indexentry{Chapter5.3.2|hyperpage}{236}
\indexentry{Chapter5.3.3|hyperpage}{236}
\indexentry{Chapter5.3.4|hyperpage}{238}
\indexentry{Chapter5.3.5|hyperpage}{241}
\indexentry{Chapter5.4|hyperpage}{242}
\indexentry{Chapter5.4.1|hyperpage}{243}
\indexentry{Chapter5.4.2|hyperpage}{243}
\indexentry{Chapter5.4.2.1|hyperpage}{244}
\indexentry{Chapter5.4.2.2|hyperpage}{246}
\indexentry{Chapter5.4.2.3|hyperpage}{249}
\indexentry{Chapter5.4.3|hyperpage}{252}
\indexentry{Chapter5.4.4|hyperpage}{254}
\indexentry{Chapter5.4.4.1|hyperpage}{254}
\indexentry{Chapter5.4.4.2|hyperpage}{255}
\indexentry{Chapter5.4.4.3|hyperpage}{256}
\indexentry{Chapter5.4.5|hyperpage}{257}
\indexentry{Chapter5.4.6|hyperpage}{258}
\indexentry{Chapter5.4.6.1|hyperpage}{259}
\indexentry{Chapter5.4.6.2|hyperpage}{261}
\indexentry{Chapter5.4.6.3|hyperpage}{262}
\indexentry{Chapter5.5|hyperpage}{263}
\indexentry{Chapter5.5.1|hyperpage}{264}
\indexentry{Chapter5.5.1.1|hyperpage}{265}
\indexentry{Chapter5.5.1.2|hyperpage}{267}
\indexentry{Chapter5.5.1.3|hyperpage}{268}
\indexentry{Chapter5.5.1.4|hyperpage}{269}
\indexentry{Chapter5.5.2|hyperpage}{270}
\indexentry{Chapter5.5.2.1|hyperpage}{270}
\indexentry{Chapter5.5.2.2|hyperpage}{270}
\indexentry{Chapter5.5.3|hyperpage}{272}
\indexentry{Chapter5.5.3.1|hyperpage}{272}
\indexentry{Chapter5.5.3.2|hyperpage}{274}
\indexentry{Chapter5.5.3.3|hyperpage}{274}
\indexentry{Chapter5.5.3.4|hyperpage}{275}
\indexentry{Chapter5.5.3.5|hyperpage}{276}
\indexentry{Chapter5.6|hyperpage}{277}
\indexentry{神经机器翻译|hyperpage}{279}
\indexentry{Neural Machine Translation|hyperpage}{279}
\indexentry{分布式表示|hyperpage}{281}
\indexentry{Distributed Representation|hyperpage}{281}
\indexentry{特征工程|hyperpage}{287}
\indexentry{Feature Engineering|hyperpage}{287}
\indexentry{编码器-解码器模型|hyperpage}{288}
\indexentry{Encoder-Decoder Paradigm|hyperpage}{288}
\indexentry{编码器-解码器框架|hyperpage}{288}
\indexentry{循环神经网络|hyperpage}{293}
\indexentry{Recurrent Neural Network, RNN|hyperpage}{293}
\indexentry{词嵌入|hyperpage}{295}
\indexentry{Word Embedding|hyperpage}{295}
\indexentry{表示学习|hyperpage}{295}
\indexentry{Representation Learning|hyperpage}{295}
\indexentry{生成|hyperpage}{295}
\indexentry{Generation|hyperpage}{295}
\indexentry{长短时记忆|hyperpage}{300}
\indexentry{Long Short-Term Memory|hyperpage}{300}
\indexentry{遗忘|hyperpage}{300}
\indexentry{记忆更新|hyperpage}{301}
\indexentry{输出|hyperpage}{301}
\indexentry{门循环单元|hyperpage}{301}
\indexentry{Gated Recurrent Unit,GRU|hyperpage}{301}
\indexentry{注意力权重|hyperpage}{307}
\indexentry{Attention Weight|hyperpage}{307}
\indexentry{一阶矩估计|hyperpage}{313}
\indexentry{First Moment Estimation|hyperpage}{313}
\indexentry{二阶矩估计|hyperpage}{313}
\indexentry{Second Moment Estimation|hyperpage}{313}
\indexentry{学习率|hyperpage}{314}
\indexentry{Learning Rate|hyperpage}{314}
\indexentry{逐渐预热|hyperpage}{315}
\indexentry{Gradual Warmup|hyperpage}{315}
\indexentry{分段常数衰减|hyperpage}{315}
\indexentry{Piecewise Constant Decay|hyperpage}{315}
\indexentry{数据并行|hyperpage}{316}
\indexentry{模型并行|hyperpage}{316}
\indexentry{全搜索|hyperpage}{318}
\indexentry{Full Search|hyperpage}{318}
\indexentry{贪婪搜索|hyperpage}{318}
\indexentry{Greedy Search|hyperpage}{318}
\indexentry{束搜索|hyperpage}{318}
\indexentry{Beam Search|hyperpage}{318}
\indexentry{自回归模型|hyperpage}{318}
\indexentry{Autoregressive Model|hyperpage}{318}
\indexentry{非自回归模型|hyperpage}{318}
\indexentry{Non-autoregressive Model|hyperpage}{318}
\indexentry{自注意力机制|hyperpage}{324}
\indexentry{Self-Attention|hyperpage}{324}
\indexentry{特征提取|hyperpage}{325}
\indexentry{自注意力子层|hyperpage}{326}
\indexentry{Self-attention Sub-layer|hyperpage}{326}
\indexentry{前馈神经网络子层|hyperpage}{326}
\indexentry{Feed-forward Sub-layer|hyperpage}{326}
\indexentry{残差连接|hyperpage}{326}
\indexentry{Residual Connection|hyperpage}{326}
\indexentry{层正则化|hyperpage}{326}
\indexentry{Layer Normalization|hyperpage}{326}
\indexentry{编码-解码注意力子层|hyperpage}{327}
\indexentry{Encoder-decoder Attention Sub-layer|hyperpage}{327}
\indexentry{词嵌入|hyperpage}{327}
\indexentry{Word Embedding|hyperpage}{327}
\indexentry{位置编码|hyperpage}{327}
\indexentry{Position Embedding|hyperpage}{327}
\indexentry{点乘注意力|hyperpage}{330}
\indexentry{Scaled Dot-Product Attention|hyperpage}{330}
\indexentry{多头注意力|hyperpage}{333}
\indexentry{Multi-head Attention|hyperpage}{333}
\indexentry{残差连接|hyperpage}{334}
\indexentry{短连接|hyperpage}{334}
\indexentry{Short-cut Connection|hyperpage}{334}
\indexentry{后正则化|hyperpage}{335}
\indexentry{Post-norm|hyperpage}{335}
\indexentry{前正则化|hyperpage}{335}
\indexentry{Pre-norm|hyperpage}{335}
\indexentry{交叉熵损失|hyperpage}{336}
\indexentry{Cross Entropy Loss|hyperpage}{336}
\indexentry{预热|hyperpage}{337}
\indexentry{Warmup|hyperpage}{337}
\indexentry{小批量训练|hyperpage}{337}
\indexentry{Mini-batch Training|hyperpage}{337}
\indexentry{Dropout|hyperpage}{337}
\indexentry{过拟合|hyperpage}{338}
\indexentry{Over fitting|hyperpage}{338}
\indexentry{标签平滑|hyperpage}{338}
\indexentry{Label Smoothing|hyperpage}{338}
\indexentry{序列到序列的转换/生成问题|hyperpage}{339}
\indexentry{Sequence-to-Sequence Problem|hyperpage}{339}
\indexentry{Chapter7.1|hyperpage}{345}
\indexentry{Chapter7.1.1|hyperpage}{346}
\indexentry{Chapter7.1.2|hyperpage}{347}
\indexentry{Chapter7.1.3|hyperpage}{348}
\indexentry{Chapter7.2|hyperpage}{349}
\indexentry{Chapter7.2.1|hyperpage}{349}
\indexentry{Chapter7.2.2|hyperpage}{350}
\indexentry{Chapter7.2.3|hyperpage}{351}
\indexentry{Chapter7.2.4|hyperpage}{354}
\indexentry{Chapter7.2.4.1|hyperpage}{354}
\indexentry{Chapter7.2.4.2|hyperpage}{355}
\indexentry{Chapter7.2.4.3|hyperpage}{356}
\indexentry{Chapter7.2.4.4|hyperpage}{358}
\indexentry{Chapter7.3|hyperpage}{359}
\indexentry{Chapter7.3.1|hyperpage}{359}
\indexentry{Chapter7.3.1.1|hyperpage}{360}
\indexentry{Chapter7.3.1.2|hyperpage}{361}
\indexentry{Chapter7.3.1.3|hyperpage}{362}
\indexentry{Chapter7.3.1.4|hyperpage}{363}
\indexentry{Chapter7.3.2|hyperpage}{364}
\indexentry{Chapter7.3.2.1|hyperpage}{365}
\indexentry{Chapter7.3.2.2|hyperpage}{366}
\indexentry{Chapter7.3.2.3|hyperpage}{367}
\indexentry{Chapter7.3.2.4|hyperpage}{367}
\indexentry{Chapter7.3.3|hyperpage}{368}
\indexentry{Chapter7.3.3.1|hyperpage}{368}
\indexentry{Chapter7.3.3.2|hyperpage}{369}
\indexentry{Chapter7.4|hyperpage}{370}
\indexentry{Chapter7.4.1|hyperpage}{370}
\indexentry{Chapter7.4.1.1|hyperpage}{371}
\indexentry{Chapter7.4.1.2|hyperpage}{372}
\indexentry{Chapter7.4.1.3|hyperpage}{373}
\indexentry{Chapter7.4.1.3.1|hyperpage}{373}
\indexentry{Chapter7.4.1.3.2|hyperpage}{373}
\indexentry{Chapter7.4.1.3.3|hyperpage}{375}
\indexentry{Chapter7.4.1.3.4|hyperpage}{376}
\indexentry{Chapter7.4.1.3.5|hyperpage}{376}
\indexentry{Chapter7.4.1.3.6|hyperpage}{378}
\indexentry{Chapter7.4.1.3.7|hyperpage}{379}
\indexentry{Chapter7.4.2|hyperpage}{379}
\indexentry{Chapter7.4.2.1|hyperpage}{380}
\indexentry{Chapter7.4.2.2|hyperpage}{381}
\indexentry{Chapter7.4.2.3|hyperpage}{381}
\indexentry{Chapter7.4.3|hyperpage}{382}
\indexentry{Chapter7.4.3.1|hyperpage}{383}
\indexentry{Chapter7.4.3.2|hyperpage}{384}
\indexentry{Chapter7.4.3.3|hyperpage}{385}
\indexentry{Chapter7.5|hyperpage}{386}
\indexentry{Chapter7.5.1|hyperpage}{386}
\indexentry{Chapter7.5.1.1|hyperpage}{386}
\indexentry{Chapter7.5.1.2|hyperpage}{389}
\indexentry{Chapter7.5.1.3|hyperpage}{390}
\indexentry{Chapter7.5.1.4|hyperpage}{393}
\indexentry{Chapter7.5.2|hyperpage}{395}
\indexentry{Chapter7.5.2.1|hyperpage}{395}
\indexentry{Chapter7.5.2.2|hyperpage}{396}
\indexentry{Chapter7.5.2.2.1|hyperpage}{397}
\indexentry{Chapter7.5.2.2.2|hyperpage}{398}
\indexentry{Chapter7.5.2.2.3|hyperpage}{398}
\indexentry{Chapter7.5.2.3|hyperpage}{399}
\indexentry{Chapter7.5.3|hyperpage}{400}
\indexentry{Chapter7.5.3.1|hyperpage}{400}
\indexentry{Chapter7.5.3.2|hyperpage}{401}
\indexentry{Chapter7.5.3.3|hyperpage}{403}
\indexentry{Chapter7.5.4|hyperpage}{404}
\indexentry{Chapter7.5.4.1|hyperpage}{404}
\indexentry{Chapter7.5.4.2|hyperpage}{405}
\indexentry{Chapter7.5.4.3|hyperpage}{406}
\indexentry{Chapter7.6|hyperpage}{407}
...@@ -2,578 +2,714 @@ ...@@ -2,578 +2,714 @@
\defcounter {refsection}{0}\relax \defcounter {refsection}{0}\relax
\select@language {english} \select@language {english}
\defcounter {refsection}{0}\relax \defcounter {refsection}{0}\relax
\contentsline {part}{\@mypartnumtocformat {I}{机器翻译基础}}{11}{part.1} \contentsline {part}{\@mypartnumtocformat {I}{机器翻译基础}}{13}{part.1}
\ttl@starttoc {default@1} \ttl@starttoc {default@1}
\defcounter {refsection}{0}\relax \defcounter {refsection}{0}\relax
\contentsline {chapter}{\numberline {1}机器翻译简介}{13}{chapter.1} \contentsline {chapter}{\numberline {1}机器翻译简介}{15}{chapter.1}
\defcounter {refsection}{0}\relax \defcounter {refsection}{0}\relax
\contentsline {section}{\numberline {1.1}机器翻译的概念}{13}{section.1.1} \contentsline {section}{\numberline {1.1}机器翻译的概念}{15}{section.1.1}
\defcounter {refsection}{0}\relax \defcounter {refsection}{0}\relax
\contentsline {section}{\numberline {1.2}机器翻译简史}{16}{section.1.2} \contentsline {section}{\numberline {1.2}机器翻译简史}{18}{section.1.2}
\defcounter {refsection}{0}\relax \defcounter {refsection}{0}\relax
\contentsline {subsection}{\numberline {1.2.1}人工翻译}{16}{subsection.1.2.1} \contentsline {subsection}{\numberline {1.2.1}人工翻译}{18}{subsection.1.2.1}
\defcounter {refsection}{0}\relax \defcounter {refsection}{0}\relax
\contentsline {subsection}{\numberline {1.2.2}机器翻译的萌芽}{17}{subsection.1.2.2} \contentsline {subsection}{\numberline {1.2.2}机器翻译的萌芽}{19}{subsection.1.2.2}
\defcounter {refsection}{0}\relax \defcounter {refsection}{0}\relax
\contentsline {subsection}{\numberline {1.2.3}机器翻译的受挫}{18}{subsection.1.2.3} \contentsline {subsection}{\numberline {1.2.3}机器翻译的受挫}{20}{subsection.1.2.3}
\defcounter {refsection}{0}\relax \defcounter {refsection}{0}\relax
\contentsline {subsection}{\numberline {1.2.4}机器翻译的快速成长}{19}{subsection.1.2.4} \contentsline {subsection}{\numberline {1.2.4}机器翻译的快速成长}{21}{subsection.1.2.4}
\defcounter {refsection}{0}\relax \defcounter {refsection}{0}\relax
\contentsline {subsection}{\numberline {1.2.5}机器翻译的爆发}{20}{subsection.1.2.5} \contentsline {subsection}{\numberline {1.2.5}机器翻译的爆发}{22}{subsection.1.2.5}
\defcounter {refsection}{0}\relax \defcounter {refsection}{0}\relax
\contentsline {section}{\numberline {1.3}机器翻译现状}{21}{section.1.3} \contentsline {section}{\numberline {1.3}机器翻译现状}{23}{section.1.3}
\defcounter {refsection}{0}\relax \defcounter {refsection}{0}\relax
\contentsline {section}{\numberline {1.4}机器翻译方法}{22}{section.1.4} \contentsline {section}{\numberline {1.4}机器翻译方法}{24}{section.1.4}
\defcounter {refsection}{0}\relax \defcounter {refsection}{0}\relax
\contentsline {subsection}{\numberline {1.4.1}基于规则的机器翻译}{22}{subsection.1.4.1} \contentsline {subsection}{\numberline {1.4.1}基于规则的机器翻译}{26}{subsection.1.4.1}
\defcounter {refsection}{0}\relax \defcounter {refsection}{0}\relax
\contentsline {subsection}{\numberline {1.4.2}基于实例的机器翻译}{24}{subsection.1.4.2} \contentsline {subsection}{\numberline {1.4.2}基于实例的机器翻译}{26}{subsection.1.4.2}
\defcounter {refsection}{0}\relax \defcounter {refsection}{0}\relax
\contentsline {subsection}{\numberline {1.4.3}统计机器翻译}{25}{subsection.1.4.3} \contentsline {subsection}{\numberline {1.4.3}统计机器翻译}{27}{subsection.1.4.3}
\defcounter {refsection}{0}\relax \defcounter {refsection}{0}\relax
\contentsline {subsection}{\numberline {1.4.4}神经机器翻译}{26}{subsection.1.4.4} \contentsline {subsection}{\numberline {1.4.4}神经机器翻译}{28}{subsection.1.4.4}
\defcounter {refsection}{0}\relax \defcounter {refsection}{0}\relax
\contentsline {subsection}{\numberline {1.4.5}对比分析}{27}{subsection.1.4.5} \contentsline {subsection}{\numberline {1.4.5}对比分析}{29}{subsection.1.4.5}
\defcounter {refsection}{0}\relax \defcounter {refsection}{0}\relax
\contentsline {section}{\numberline {1.5}翻译质量评价}{28}{section.1.5} \contentsline {section}{\numberline {1.5}翻译质量评价}{30}{section.1.5}
\defcounter {refsection}{0}\relax \defcounter {refsection}{0}\relax
\contentsline {subsection}{\numberline {1.5.1}人工评价}{28}{subsection.1.5.1} \contentsline {subsection}{\numberline {1.5.1}人工评价}{30}{subsection.1.5.1}
\defcounter {refsection}{0}\relax \defcounter {refsection}{0}\relax
\contentsline {subsection}{\numberline {1.5.2}自动评价}{29}{subsection.1.5.2} \contentsline {subsection}{\numberline {1.5.2}自动评价}{31}{subsection.1.5.2}
\defcounter {refsection}{0}\relax \defcounter {refsection}{0}\relax
\contentsline {subsubsection}{BLEU}{29}{section*.15} \contentsline {subsubsection}{BLEU}{31}{section*.15}
\defcounter {refsection}{0}\relax \defcounter {refsection}{0}\relax
\contentsline {subsubsection}{TER}{31}{section*.16} \contentsline {subsubsection}{TER}{33}{section*.16}
\defcounter {refsection}{0}\relax \defcounter {refsection}{0}\relax
\contentsline {subsubsection}{基于检测点的评价}{31}{section*.17} \contentsline {subsubsection}{基于检测点的评价}{33}{section*.17}
\defcounter {refsection}{0}\relax \defcounter {refsection}{0}\relax
\contentsline {section}{\numberline {1.6}机器翻译应用}{32}{section.1.6} \contentsline {section}{\numberline {1.6}机器翻译应用}{34}{section.1.6}
\defcounter {refsection}{0}\relax \defcounter {refsection}{0}\relax
\contentsline {section}{\numberline {1.7}开源项目与评测}{34}{section.1.7} \contentsline {section}{\numberline {1.7}开源项目与评测}{36}{section.1.7}
\defcounter {refsection}{0}\relax \defcounter {refsection}{0}\relax
\contentsline {subsection}{\numberline {1.7.1}开源机器翻译系统}{34}{subsection.1.7.1} \contentsline {subsection}{\numberline {1.7.1}开源机器翻译系统}{36}{subsection.1.7.1}
\defcounter {refsection}{0}\relax \defcounter {refsection}{0}\relax
\contentsline {subsubsection}{统计机器翻译开源系统}{34}{section*.19} \contentsline {subsubsection}{统计机器翻译开源系统}{37}{section*.19}
\defcounter {refsection}{0}\relax \defcounter {refsection}{0}\relax
\contentsline {subsubsection}{神经机器翻译开源系统}{36}{section*.20} \contentsline {subsubsection}{神经机器翻译开源系统}{38}{section*.20}
\defcounter {refsection}{0}\relax \defcounter {refsection}{0}\relax
\contentsline {subsection}{\numberline {1.7.2}常用数据集及公开评测任务}{38}{subsection.1.7.2} \contentsline {subsection}{\numberline {1.7.2}常用数据集及公开评测任务}{40}{subsection.1.7.2}
\defcounter {refsection}{0}\relax \defcounter {refsection}{0}\relax
\contentsline {section}{\numberline {1.8}推荐学习资源}{40}{section.1.8} \contentsline {section}{\numberline {1.8}推荐学习资源}{42}{section.1.8}
\defcounter {refsection}{0}\relax \defcounter {refsection}{0}\relax
\contentsline {chapter}{\numberline {2}词法、语法及统计建模基础}{45}{chapter.2} \contentsline {chapter}{\numberline {2}词法、语法及统计建模基础}{47}{chapter.2}
\defcounter {refsection}{0}\relax \defcounter {refsection}{0}\relax
\contentsline {section}{\numberline {2.1}问题概述 }{46}{section.2.1} \contentsline {section}{\numberline {2.1}问题概述 }{48}{section.2.1}
\defcounter {refsection}{0}\relax \defcounter {refsection}{0}\relax
\contentsline {section}{\numberline {2.2}概率论基础}{47}{section.2.2} \contentsline {section}{\numberline {2.2}概率论基础}{49}{section.2.2}
\defcounter {refsection}{0}\relax \defcounter {refsection}{0}\relax
\contentsline {subsection}{\numberline {2.2.1}随机变量和概率}{47}{subsection.2.2.1} \contentsline {subsection}{\numberline {2.2.1}随机变量和概率}{49}{subsection.2.2.1}
\defcounter {refsection}{0}\relax \defcounter {refsection}{0}\relax
\contentsline {subsection}{\numberline {2.2.2}联合概率、条件概率和边缘概率}{49}{subsection.2.2.2} \contentsline {subsection}{\numberline {2.2.2}联合概率、条件概率和边缘概率}{51}{subsection.2.2.2}
\defcounter {refsection}{0}\relax \defcounter {refsection}{0}\relax
\contentsline {subsection}{\numberline {2.2.3}链式法则}{50}{subsection.2.2.3} \contentsline {subsection}{\numberline {2.2.3}链式法则}{52}{subsection.2.2.3}
\defcounter {refsection}{0}\relax \defcounter {refsection}{0}\relax
\contentsline {subsection}{\numberline {2.2.4}贝叶斯法则}{51}{subsection.2.2.4} \contentsline {subsection}{\numberline {2.2.4}贝叶斯法则}{53}{subsection.2.2.4}
\defcounter {refsection}{0}\relax \defcounter {refsection}{0}\relax
\contentsline {subsection}{\numberline {2.2.5}KL距离和熵}{53}{subsection.2.2.5} \contentsline {subsection}{\numberline {2.2.5}KL距离和熵}{55}{subsection.2.2.5}
\defcounter {refsection}{0}\relax \defcounter {refsection}{0}\relax
\contentsline {subsubsection}{信息熵}{53}{section*.27} \contentsline {subsubsection}{信息熵}{55}{section*.27}
\defcounter {refsection}{0}\relax \defcounter {refsection}{0}\relax
\contentsline {subsubsection}{KL距离}{54}{section*.29} \contentsline {subsubsection}{KL距离}{56}{section*.29}
\defcounter {refsection}{0}\relax \defcounter {refsection}{0}\relax
\contentsline {subsubsection}{交叉熵}{54}{section*.30} \contentsline {subsubsection}{交叉熵}{56}{section*.30}
\defcounter {refsection}{0}\relax \defcounter {refsection}{0}\relax
\contentsline {section}{\numberline {2.3}中文分词}{55}{section.2.3} \contentsline {section}{\numberline {2.3}中文分词}{57}{section.2.3}
\defcounter {refsection}{0}\relax \defcounter {refsection}{0}\relax
\contentsline {subsection}{\numberline {2.3.1}基于词典的分词方法}{56}{subsection.2.3.1} \contentsline {subsection}{\numberline {2.3.1}基于词典的分词方法}{58}{subsection.2.3.1}
\defcounter {refsection}{0}\relax \defcounter {refsection}{0}\relax
\contentsline {subsection}{\numberline {2.3.2}基于统计的分词方法}{57}{subsection.2.3.2} \contentsline {subsection}{\numberline {2.3.2}基于统计的分词方法}{59}{subsection.2.3.2}
\defcounter {refsection}{0}\relax \defcounter {refsection}{0}\relax
\contentsline {subsubsection}{统计模型的学习与推断}{57}{section*.34} \contentsline {subsubsection}{统计模型的学习与推断}{59}{section*.34}
\defcounter {refsection}{0}\relax \defcounter {refsection}{0}\relax
\contentsline {subsubsection}{掷骰子游戏}{58}{section*.36} \contentsline {subsubsection}{掷骰子游戏}{60}{section*.36}
\defcounter {refsection}{0}\relax \defcounter {refsection}{0}\relax
\contentsline {subsubsection}{全概率分词方法}{60}{section*.40} \contentsline {subsubsection}{全概率分词方法}{62}{section*.40}
\defcounter {refsection}{0}\relax \defcounter {refsection}{0}\relax
\contentsline {section}{\numberline {2.4}$n$-gram语言模型 }{62}{section.2.4} \contentsline {section}{\numberline {2.4}$n$-gram语言模型 }{64}{section.2.4}
\defcounter {refsection}{0}\relax \defcounter {refsection}{0}\relax
\contentsline {subsection}{\numberline {2.4.1}建模}{63}{subsection.2.4.1} \contentsline {subsection}{\numberline {2.4.1}建模}{65}{subsection.2.4.1}
\defcounter {refsection}{0}\relax \defcounter {refsection}{0}\relax
\contentsline {subsection}{\numberline {2.4.2}未登录词和平滑算法}{65}{subsection.2.4.2} \contentsline {subsection}{\numberline {2.4.2}未登录词和平滑算法}{67}{subsection.2.4.2}
\defcounter {refsection}{0}\relax \defcounter {refsection}{0}\relax
\contentsline {subsubsection}{加法平滑方法}{66}{section*.46} \contentsline {subsubsection}{加法平滑方法}{68}{section*.46}
\defcounter {refsection}{0}\relax \defcounter {refsection}{0}\relax
\contentsline {subsubsection}{古德-图灵估计法}{67}{section*.48} \contentsline {subsubsection}{古德-图灵估计法}{69}{section*.48}
\defcounter {refsection}{0}\relax \defcounter {refsection}{0}\relax
\contentsline {subsubsection}{Kneser-Ney平滑方法}{68}{section*.50} \contentsline {subsubsection}{Kneser-Ney平滑方法}{70}{section*.50}
\defcounter {refsection}{0}\relax \defcounter {refsection}{0}\relax
\contentsline {section}{\numberline {2.5}句法分析(短语结构分析)}{70}{section.2.5} \contentsline {section}{\numberline {2.5}句法分析(短语结构分析)}{72}{section.2.5}
\defcounter {refsection}{0}\relax \defcounter {refsection}{0}\relax
\contentsline {subsection}{\numberline {2.5.1}句子的句法树表示}{70}{subsection.2.5.1} \contentsline {subsection}{\numberline {2.5.1}句子的句法树表示}{72}{subsection.2.5.1}
\defcounter {refsection}{0}\relax \defcounter {refsection}{0}\relax
\contentsline {subsection}{\numberline {2.5.2}上下文无关文法}{72}{subsection.2.5.2} \contentsline {subsection}{\numberline {2.5.2}上下文无关文法}{74}{subsection.2.5.2}
\defcounter {refsection}{0}\relax \defcounter {refsection}{0}\relax
\contentsline {subsection}{\numberline {2.5.3}规则和推导的概率}{76}{subsection.2.5.3} \contentsline {subsection}{\numberline {2.5.3}规则和推导的概率}{78}{subsection.2.5.3}
\defcounter {refsection}{0}\relax \defcounter {refsection}{0}\relax
\contentsline {section}{\numberline {2.6}小结及深入阅读}{78}{section.2.6} \contentsline {section}{\numberline {2.6}小结及深入阅读}{80}{section.2.6}
\defcounter {refsection}{0}\relax \defcounter {refsection}{0}\relax
\contentsline {part}{\@mypartnumtocformat {II}{统计机器翻译}}{81}{part.2} \contentsline {part}{\@mypartnumtocformat {II}{统计机器翻译}}{83}{part.2}
\ttl@stoptoc {default@1} \ttl@stoptoc {default@1}
\ttl@starttoc {default@2} \ttl@starttoc {default@2}
\defcounter {refsection}{0}\relax \defcounter {refsection}{0}\relax
\contentsline {chapter}{\numberline {3}基于词的机器翻译模型}{83}{chapter.3} \contentsline {chapter}{\numberline {3}基于词的机器翻译模型}{85}{chapter.3}
\defcounter {refsection}{0}\relax \defcounter {refsection}{0}\relax
\contentsline {section}{\numberline {3.1}什么是基于词的翻译模型}{83}{section.3.1} \contentsline {section}{\numberline {3.1}什么是基于词的翻译模型}{85}{section.3.1}
\defcounter {refsection}{0}\relax \defcounter {refsection}{0}\relax
\contentsline {section}{\numberline {3.2}构建一个简单的机器翻译系统}{85}{section.3.2} \contentsline {section}{\numberline {3.2}构建一个简单的机器翻译系统}{87}{section.3.2}
\defcounter {refsection}{0}\relax \defcounter {refsection}{0}\relax
\contentsline {subsection}{\numberline {3.2.1}如何进行翻译?}{85}{subsection.3.2.1} \contentsline {subsection}{\numberline {3.2.1}如何进行翻译?}{87}{subsection.3.2.1}
\defcounter {refsection}{0}\relax \defcounter {refsection}{0}\relax
\contentsline {subsubsection}{机器翻译流程}{86}{section*.63} \contentsline {subsubsection}{机器翻译流程}{88}{section*.63}
\defcounter {refsection}{0}\relax \defcounter {refsection}{0}\relax
\contentsline {subsubsection}{人工翻译 vs. 机器翻译}{87}{section*.65} \contentsline {subsubsection}{人工翻译 vs. 机器翻译}{89}{section*.65}
\defcounter {refsection}{0}\relax \defcounter {refsection}{0}\relax
\contentsline {subsection}{\numberline {3.2.2}基本框架}{87}{subsection.3.2.2} \contentsline {subsection}{\numberline {3.2.2}基本框架}{89}{subsection.3.2.2}
\defcounter {refsection}{0}\relax \defcounter {refsection}{0}\relax
\contentsline {subsection}{\numberline {3.2.3}单词翻译概率}{88}{subsection.3.2.3} \contentsline {subsection}{\numberline {3.2.3}单词翻译概率}{90}{subsection.3.2.3}
\defcounter {refsection}{0}\relax \defcounter {refsection}{0}\relax
\contentsline {subsubsection}{什么是单词翻译概率?}{88}{section*.67} \contentsline {subsubsection}{什么是单词翻译概率?}{90}{section*.67}
\defcounter {refsection}{0}\relax \defcounter {refsection}{0}\relax
\contentsline {subsubsection}{如何从一个双语平行数据中学习?}{88}{section*.69} \contentsline {subsubsection}{如何从一个双语平行数据中学习?}{90}{section*.69}
\defcounter {refsection}{0}\relax \defcounter {refsection}{0}\relax
\contentsline {subsubsection}{如何从大量的双语平行数据中学习?}{90}{section*.70} \contentsline {subsubsection}{如何从大量的双语平行数据中学习?}{92}{section*.70}
\defcounter {refsection}{0}\relax \defcounter {refsection}{0}\relax
\contentsline {subsection}{\numberline {3.2.4}句子级翻译模型}{91}{subsection.3.2.4} \contentsline {subsection}{\numberline {3.2.4}句子级翻译模型}{93}{subsection.3.2.4}
\defcounter {refsection}{0}\relax \defcounter {refsection}{0}\relax
\contentsline {subsubsection}{基础模型}{91}{section*.72} \contentsline {subsubsection}{基础模型}{93}{section*.72}
\defcounter {refsection}{0}\relax \defcounter {refsection}{0}\relax
\contentsline {subsubsection}{生成流畅的译文}{93}{section*.74} \contentsline {subsubsection}{生成流畅的译文}{95}{section*.74}
\defcounter {refsection}{0}\relax \defcounter {refsection}{0}\relax
\contentsline {subsection}{\numberline {3.2.5}解码}{95}{subsection.3.2.5} \contentsline {subsection}{\numberline {3.2.5}解码}{97}{subsection.3.2.5}
\defcounter {refsection}{0}\relax \defcounter {refsection}{0}\relax
\contentsline {section}{\numberline {3.3}基于词的翻译建模}{98}{section.3.3} \contentsline {section}{\numberline {3.3}基于词的翻译建模}{100}{section.3.3}
\defcounter {refsection}{0}\relax \defcounter {refsection}{0}\relax
\contentsline {subsection}{\numberline {3.3.1}噪声信道模型}{98}{subsection.3.3.1} \contentsline {subsection}{\numberline {3.3.1}噪声信道模型}{100}{subsection.3.3.1}
\defcounter {refsection}{0}\relax \defcounter {refsection}{0}\relax
\contentsline {subsection}{\numberline {3.3.2}统计机器翻译的三个基本问题}{100}{subsection.3.3.2} \contentsline {subsection}{\numberline {3.3.2}统计机器翻译的三个基本问题}{102}{subsection.3.3.2}
\defcounter {refsection}{0}\relax \defcounter {refsection}{0}\relax
\contentsline {subsubsection}{词对齐}{101}{section*.83} \contentsline {subsubsection}{词对齐}{103}{section*.83}
\defcounter {refsection}{0}\relax \defcounter {refsection}{0}\relax
\contentsline {subsubsection}{基于词对齐的翻译模型}{101}{section*.86} \contentsline {subsubsection}{基于词对齐的翻译模型}{103}{section*.86}
\defcounter {refsection}{0}\relax \defcounter {refsection}{0}\relax
\contentsline {subsubsection}{基于词对齐的翻译实例}{103}{section*.88} \contentsline {subsubsection}{基于词对齐的翻译实例}{105}{section*.88}
\defcounter {refsection}{0}\relax \defcounter {refsection}{0}\relax
\contentsline {section}{\numberline {3.4}IBM模型1-2}{104}{section.3.4} \contentsline {section}{\numberline {3.4}IBM模型1-2}{106}{section.3.4}
\defcounter {refsection}{0}\relax \defcounter {refsection}{0}\relax
\contentsline {subsection}{\numberline {3.4.1}IBM模型1}{104}{subsection.3.4.1} \contentsline {subsection}{\numberline {3.4.1}IBM模型1}{106}{subsection.3.4.1}
\defcounter {refsection}{0}\relax \defcounter {refsection}{0}\relax
\contentsline {subsection}{\numberline {3.4.2}IBM模型2}{106}{subsection.3.4.2} \contentsline {subsection}{\numberline {3.4.2}IBM模型2}{108}{subsection.3.4.2}
\defcounter {refsection}{0}\relax \defcounter {refsection}{0}\relax
\contentsline {subsection}{\numberline {3.4.3}解码及计算优化}{107}{subsection.3.4.3} \contentsline {subsection}{\numberline {3.4.3}解码及计算优化}{109}{subsection.3.4.3}
\defcounter {refsection}{0}\relax \defcounter {refsection}{0}\relax
\contentsline {subsection}{\numberline {3.4.4}训练}{108}{subsection.3.4.4} \contentsline {subsection}{\numberline {3.4.4}训练}{110}{subsection.3.4.4}
\defcounter {refsection}{0}\relax \defcounter {refsection}{0}\relax
\contentsline {subsubsection}{目标函数}{108}{section*.93} \contentsline {subsubsection}{目标函数}{110}{section*.93}
\defcounter {refsection}{0}\relax \defcounter {refsection}{0}\relax
\contentsline {subsubsection}{优化}{109}{section*.95} \contentsline {subsubsection}{优化}{111}{section*.95}
\defcounter {refsection}{0}\relax \defcounter {refsection}{0}\relax
\contentsline {section}{\numberline {3.5}IBM模型3-5及隐马尔可夫模型}{115}{section.3.5} \contentsline {section}{\numberline {3.5}IBM模型3-5及隐马尔可夫模型}{117}{section.3.5}
\defcounter {refsection}{0}\relax \defcounter {refsection}{0}\relax
\contentsline {subsection}{\numberline {3.5.1}基于产出率的翻译模型}{115}{subsection.3.5.1} \contentsline {subsection}{\numberline {3.5.1}基于产出率的翻译模型}{117}{subsection.3.5.1}
\defcounter {refsection}{0}\relax \defcounter {refsection}{0}\relax
\contentsline {subsection}{\numberline {3.5.2}IBM 模型3}{118}{subsection.3.5.2} \contentsline {subsection}{\numberline {3.5.2}IBM 模型3}{120}{subsection.3.5.2}
\defcounter {refsection}{0}\relax \defcounter {refsection}{0}\relax
\contentsline {subsection}{\numberline {3.5.3}IBM 模型4}{119}{subsection.3.5.3} \contentsline {subsection}{\numberline {3.5.3}IBM 模型4}{121}{subsection.3.5.3}
\defcounter {refsection}{0}\relax \defcounter {refsection}{0}\relax
\contentsline {subsection}{\numberline {3.5.4} IBM 模型5}{121}{subsection.3.5.4} \contentsline {subsection}{\numberline {3.5.4} IBM 模型5}{123}{subsection.3.5.4}
\defcounter {refsection}{0}\relax \defcounter {refsection}{0}\relax
\contentsline {subsection}{\numberline {3.5.5}隐马尔可夫模型}{122}{subsection.3.5.5} \contentsline {subsection}{\numberline {3.5.5}隐马尔可夫模型}{124}{subsection.3.5.5}
\defcounter {refsection}{0}\relax \defcounter {refsection}{0}\relax
\contentsline {subsubsection}{隐马尔可夫模型}{123}{section*.107} \contentsline {subsubsection}{隐马尔可夫模型}{125}{section*.107}
\defcounter {refsection}{0}\relax \defcounter {refsection}{0}\relax
\contentsline {subsubsection}{词对齐模型}{124}{section*.109} \contentsline {subsubsection}{词对齐模型}{126}{section*.109}
\defcounter {refsection}{0}\relax \defcounter {refsection}{0}\relax
\contentsline {subsection}{\numberline {3.5.6}解码和训练}{125}{subsection.3.5.6} \contentsline {subsection}{\numberline {3.5.6}解码和训练}{127}{subsection.3.5.6}
\defcounter {refsection}{0}\relax \defcounter {refsection}{0}\relax
\contentsline {section}{\numberline {3.6}问题分析}{125}{section.3.6} \contentsline {section}{\numberline {3.6}问题分析}{127}{section.3.6}
\defcounter {refsection}{0}\relax \defcounter {refsection}{0}\relax
\contentsline {subsection}{\numberline {3.6.1}词对齐及对称化}{125}{subsection.3.6.1} \contentsline {subsection}{\numberline {3.6.1}词对齐及对称化}{127}{subsection.3.6.1}
\defcounter {refsection}{0}\relax \defcounter {refsection}{0}\relax
\contentsline {subsection}{\numberline {3.6.2}Deficiency}{126}{subsection.3.6.2} \contentsline {subsection}{\numberline {3.6.2}Deficiency}{128}{subsection.3.6.2}
\defcounter {refsection}{0}\relax \defcounter {refsection}{0}\relax
\contentsline {subsection}{\numberline {3.6.3}句子长度}{127}{subsection.3.6.3} \contentsline {subsection}{\numberline {3.6.3}句子长度}{129}{subsection.3.6.3}
\defcounter {refsection}{0}\relax \defcounter {refsection}{0}\relax
\contentsline {subsection}{\numberline {3.6.4}其他问题}{128}{subsection.3.6.4} \contentsline {subsection}{\numberline {3.6.4}其他问题}{130}{subsection.3.6.4}
\defcounter {refsection}{0}\relax \defcounter {refsection}{0}\relax
\contentsline {section}{\numberline {3.7}小结及深入阅读}{128}{section.3.7} \contentsline {section}{\numberline {3.7}小结及深入阅读}{130}{section.3.7}
\defcounter {refsection}{0}\relax \defcounter {refsection}{0}\relax
\contentsline {chapter}{\numberline {4}基于短语和句法的机器翻译模型}{131}{chapter.4} \contentsline {chapter}{\numberline {4}基于短语和句法的机器翻译模型}{133}{chapter.4}
\defcounter {refsection}{0}\relax \defcounter {refsection}{0}\relax
\contentsline {section}{\numberline {4.1}翻译中的结构信息}{131}{section.4.1} \contentsline {section}{\numberline {4.1}翻译中的结构信息}{133}{section.4.1}
\defcounter {refsection}{0}\relax \defcounter {refsection}{0}\relax
\contentsline {subsection}{\numberline {4.1.1}更大粒度的翻译单元}{132}{subsection.4.1.1} \contentsline {subsection}{\numberline {4.1.1}更大粒度的翻译单元}{134}{subsection.4.1.1}
\defcounter {refsection}{0}\relax \defcounter {refsection}{0}\relax
\contentsline {subsection}{\numberline {4.1.2}句子的结构信息}{134}{subsection.4.1.2} \contentsline {subsection}{\numberline {4.1.2}句子的结构信息}{136}{subsection.4.1.2}
\defcounter {refsection}{0}\relax \defcounter {refsection}{0}\relax
\contentsline {section}{\numberline {4.2}基于短语的翻译模型}{136}{section.4.2} \contentsline {section}{\numberline {4.2}基于短语的翻译模型}{138}{section.4.2}
\defcounter {refsection}{0}\relax \defcounter {refsection}{0}\relax
\contentsline {subsection}{\numberline {4.2.1}机器翻译中的短语}{136}{subsection.4.2.1} \contentsline {subsection}{\numberline {4.2.1}机器翻译中的短语}{138}{subsection.4.2.1}
\defcounter {refsection}{0}\relax \defcounter {refsection}{0}\relax
\contentsline {subsection}{\numberline {4.2.2}数学建模及判别式模型}{139}{subsection.4.2.2} \contentsline {subsection}{\numberline {4.2.2}数学建模及判别式模型}{141}{subsection.4.2.2}
\defcounter {refsection}{0}\relax \defcounter {refsection}{0}\relax
\contentsline {subsubsection}{基于翻译推导的建模}{139}{section*.121} \contentsline {subsubsection}{基于翻译推导的建模}{141}{section*.121}
\defcounter {refsection}{0}\relax \defcounter {refsection}{0}\relax
\contentsline {subsubsection}{对数线性模型}{140}{section*.122} \contentsline {subsubsection}{对数线性模型}{142}{section*.122}
\defcounter {refsection}{0}\relax \defcounter {refsection}{0}\relax
\contentsline {subsubsection}{搭建模型的基本流程}{141}{section*.123} \contentsline {subsubsection}{搭建模型的基本流程}{143}{section*.123}
\defcounter {refsection}{0}\relax \defcounter {refsection}{0}\relax
\contentsline {subsection}{\numberline {4.2.3}短语抽取}{142}{subsection.4.2.3} \contentsline {subsection}{\numberline {4.2.3}短语抽取}{144}{subsection.4.2.3}
\defcounter {refsection}{0}\relax \defcounter {refsection}{0}\relax
\contentsline {subsubsection}{与词对齐一致的短语}{143}{section*.126} \contentsline {subsubsection}{与词对齐一致的短语}{145}{section*.126}
\defcounter {refsection}{0}\relax \defcounter {refsection}{0}\relax
\contentsline {subsubsection}{获取词对齐}{144}{section*.130} \contentsline {subsubsection}{获取词对齐}{146}{section*.130}
\defcounter {refsection}{0}\relax \defcounter {refsection}{0}\relax
\contentsline {subsubsection}{度量双语短语质量}{145}{section*.132} \contentsline {subsubsection}{度量双语短语质量}{147}{section*.132}
\defcounter {refsection}{0}\relax \defcounter {refsection}{0}\relax
\contentsline {subsection}{\numberline {4.2.4}调序}{146}{subsection.4.2.4} \contentsline {subsection}{\numberline {4.2.4}调序}{148}{subsection.4.2.4}
\defcounter {refsection}{0}\relax \defcounter {refsection}{0}\relax
\contentsline {subsubsection}{基于距离的调序}{146}{section*.136} \contentsline {subsubsection}{基于距离的调序}{148}{section*.136}
\defcounter {refsection}{0}\relax \defcounter {refsection}{0}\relax
\contentsline {subsubsection}{基于方向的调序}{147}{section*.138} \contentsline {subsubsection}{基于方向的调序}{149}{section*.138}
\defcounter {refsection}{0}\relax \defcounter {refsection}{0}\relax
\contentsline {subsubsection}{基于分类的调序}{149}{section*.141} \contentsline {subsubsection}{基于分类的调序}{151}{section*.141}
\defcounter {refsection}{0}\relax \defcounter {refsection}{0}\relax
\contentsline {subsection}{\numberline {4.2.5}特征}{149}{subsection.4.2.5} \contentsline {subsection}{\numberline {4.2.5}特征}{151}{subsection.4.2.5}
\defcounter {refsection}{0}\relax \defcounter {refsection}{0}\relax
\contentsline {subsection}{\numberline {4.2.6}最小错误率训练}{150}{subsection.4.2.6} \contentsline {subsection}{\numberline {4.2.6}最小错误率训练}{152}{subsection.4.2.6}
\defcounter {refsection}{0}\relax \defcounter {refsection}{0}\relax
\contentsline {subsection}{\numberline {4.2.7}栈解码}{153}{subsection.4.2.7} \contentsline {subsection}{\numberline {4.2.7}栈解码}{155}{subsection.4.2.7}
\defcounter {refsection}{0}\relax \defcounter {refsection}{0}\relax
\contentsline {subsubsection}{翻译候选匹配}{154}{section*.146} \contentsline {subsubsection}{翻译候选匹配}{156}{section*.146}
\defcounter {refsection}{0}\relax \defcounter {refsection}{0}\relax
\contentsline {subsubsection}{翻译假设扩展}{154}{section*.148} \contentsline {subsubsection}{翻译假设扩展}{156}{section*.148}
\defcounter {refsection}{0}\relax \defcounter {refsection}{0}\relax
\contentsline {subsubsection}{剪枝}{155}{section*.150} \contentsline {subsubsection}{剪枝}{157}{section*.150}
\defcounter {refsection}{0}\relax \defcounter {refsection}{0}\relax
\contentsline {subsubsection}{解码中的栈结构}{156}{section*.152} \contentsline {subsubsection}{解码中的栈结构}{158}{section*.152}
\defcounter {refsection}{0}\relax \defcounter {refsection}{0}\relax
\contentsline {section}{\numberline {4.3}基于层次短语的模型}{157}{section.4.3} \contentsline {section}{\numberline {4.3}基于层次短语的模型}{159}{section.4.3}
\defcounter {refsection}{0}\relax \defcounter {refsection}{0}\relax
\contentsline {subsection}{\numberline {4.3.1}同步上下文无关文法}{160}{subsection.4.3.1} \contentsline {subsection}{\numberline {4.3.1}同步上下文无关文法}{162}{subsection.4.3.1}
\defcounter {refsection}{0}\relax \defcounter {refsection}{0}\relax
\contentsline {subsubsection}{文法定义}{161}{section*.157} \contentsline {subsubsection}{文法定义}{163}{section*.157}
\defcounter {refsection}{0}\relax \defcounter {refsection}{0}\relax
\contentsline {subsubsection}{推导}{162}{section*.158} \contentsline {subsubsection}{推导}{164}{section*.158}
\defcounter {refsection}{0}\relax \defcounter {refsection}{0}\relax
\contentsline {subsubsection}{胶水规则}{163}{section*.159} \contentsline {subsubsection}{胶水规则}{165}{section*.159}
\defcounter {refsection}{0}\relax \defcounter {refsection}{0}\relax
\contentsline {subsubsection}{处理流程}{164}{section*.160} \contentsline {subsubsection}{处理流程}{166}{section*.160}
\defcounter {refsection}{0}\relax \defcounter {refsection}{0}\relax
\contentsline {subsection}{\numberline {4.3.2}层次短语规则抽取}{164}{subsection.4.3.2} \contentsline {subsection}{\numberline {4.3.2}层次短语规则抽取}{166}{subsection.4.3.2}
\defcounter {refsection}{0}\relax \defcounter {refsection}{0}\relax
\contentsline {subsection}{\numberline {4.3.3}翻译模型及特征}{165}{subsection.4.3.3} \contentsline {subsection}{\numberline {4.3.3}翻译模型及特征}{167}{subsection.4.3.3}
\defcounter {refsection}{0}\relax \defcounter {refsection}{0}\relax
\contentsline {subsection}{\numberline {4.3.4}CYK解码}{166}{subsection.4.3.4} \contentsline {subsection}{\numberline {4.3.4}CYK解码}{168}{subsection.4.3.4}
\defcounter {refsection}{0}\relax \defcounter {refsection}{0}\relax
\contentsline {subsection}{\numberline {4.3.5}立方剪枝}{170}{subsection.4.3.5} \contentsline {subsection}{\numberline {4.3.5}立方剪枝}{172}{subsection.4.3.5}
\defcounter {refsection}{0}\relax \defcounter {refsection}{0}\relax
\contentsline {section}{\numberline {4.4}基于语言学句法的模型}{173}{section.4.4} \contentsline {section}{\numberline {4.4}基于语言学句法的模型}{175}{section.4.4}
\defcounter {refsection}{0}\relax \defcounter {refsection}{0}\relax
\contentsline {subsection}{\numberline {4.4.1}基于句法的翻译模型分类}{175}{subsection.4.4.1} \contentsline {subsection}{\numberline {4.4.1}基于句法的翻译模型分类}{177}{subsection.4.4.1}
\defcounter {refsection}{0}\relax \defcounter {refsection}{0}\relax
\contentsline {subsection}{\numberline {4.4.2}基于树结构的文法}{175}{subsection.4.4.2} \contentsline {subsection}{\numberline {4.4.2}基于树结构的文法}{177}{subsection.4.4.2}
\defcounter {refsection}{0}\relax \defcounter {refsection}{0}\relax
\contentsline {subsubsection}{树到树翻译规则}{177}{section*.176} \contentsline {subsubsection}{树到树翻译规则}{179}{section*.176}
\defcounter {refsection}{0}\relax \defcounter {refsection}{0}\relax
\contentsline {subsubsection}{基于树结构的翻译推导}{179}{section*.178} \contentsline {subsubsection}{基于树结构的翻译推导}{181}{section*.178}
\defcounter {refsection}{0}\relax \defcounter {refsection}{0}\relax
\contentsline {subsubsection}{树到串翻译规则}{181}{section*.181} \contentsline {subsubsection}{树到串翻译规则}{183}{section*.181}
\defcounter {refsection}{0}\relax \defcounter {refsection}{0}\relax
\contentsline {subsection}{\numberline {4.4.3}树到串翻译规则抽取}{182}{subsection.4.4.3} \contentsline {subsection}{\numberline {4.4.3}树到串翻译规则抽取}{184}{subsection.4.4.3}
\defcounter {refsection}{0}\relax \defcounter {refsection}{0}\relax
\contentsline {subsubsection}{树的切割与最小规则}{183}{section*.183} \contentsline {subsubsection}{树的切割与最小规则}{185}{section*.183}
\defcounter {refsection}{0}\relax \defcounter {refsection}{0}\relax
\contentsline {subsubsection}{空对齐处理}{186}{section*.189} \contentsline {subsubsection}{空对齐处理}{188}{section*.189}
\defcounter {refsection}{0}\relax \defcounter {refsection}{0}\relax
\contentsline {subsubsection}{组合规则}{187}{section*.191} \contentsline {subsubsection}{组合规则}{189}{section*.191}
\defcounter {refsection}{0}\relax \defcounter {refsection}{0}\relax
\contentsline {subsubsection}{SPMT规则}{188}{section*.193} \contentsline {subsubsection}{SPMT规则}{190}{section*.193}
\defcounter {refsection}{0}\relax \defcounter {refsection}{0}\relax
\contentsline {subsubsection}{句法树二叉化}{189}{section*.195} \contentsline {subsubsection}{句法树二叉化}{191}{section*.195}
\defcounter {refsection}{0}\relax \defcounter {refsection}{0}\relax
\contentsline {subsection}{\numberline {4.4.4}树到树翻译规则抽取}{190}{subsection.4.4.4} \contentsline {subsection}{\numberline {4.4.4}树到树翻译规则抽取}{192}{subsection.4.4.4}
\defcounter {refsection}{0}\relax \defcounter {refsection}{0}\relax
\contentsline {subsubsection}{基于节点对齐的规则抽取}{191}{section*.199} \contentsline {subsubsection}{基于节点对齐的规则抽取}{193}{section*.199}
\defcounter {refsection}{0}\relax \defcounter {refsection}{0}\relax
\contentsline {subsubsection}{基于对齐矩阵的规则抽取}{191}{section*.202} \contentsline {subsubsection}{基于对齐矩阵的规则抽取}{193}{section*.202}
\defcounter {refsection}{0}\relax \defcounter {refsection}{0}\relax
\contentsline {subsection}{\numberline {4.4.5}句法翻译模型的特征}{192}{subsection.4.4.5} \contentsline {subsection}{\numberline {4.4.5}句法翻译模型的特征}{194}{subsection.4.4.5}
\defcounter {refsection}{0}\relax \defcounter {refsection}{0}\relax
\contentsline {subsection}{\numberline {4.4.6}基于超图的推导空间表示}{195}{subsection.4.4.6} \contentsline {subsection}{\numberline {4.4.6}基于超图的推导空间表示}{197}{subsection.4.4.6}
\defcounter {refsection}{0}\relax \defcounter {refsection}{0}\relax
\contentsline {subsection}{\numberline {4.4.7}基于树的解码 vs 基于串的解码}{197}{subsection.4.4.7} \contentsline {subsection}{\numberline {4.4.7}基于树的解码 vs 基于串的解码}{199}{subsection.4.4.7}
\defcounter {refsection}{0}\relax \defcounter {refsection}{0}\relax
\contentsline {subsubsection}{基于树的解码}{198}{section*.209} \contentsline {subsubsection}{基于树的解码}{200}{section*.209}
\defcounter {refsection}{0}\relax \defcounter {refsection}{0}\relax
\contentsline {subsubsection}{基于串的解码}{200}{section*.212} \contentsline {subsubsection}{基于串的解码}{202}{section*.212}
\defcounter {refsection}{0}\relax \defcounter {refsection}{0}\relax
\contentsline {section}{\numberline {4.5}小结及深入阅读}{201}{section.4.5} \contentsline {section}{\numberline {4.5}小结及深入阅读}{203}{section.4.5}
\defcounter {refsection}{0}\relax \defcounter {refsection}{0}\relax
\contentsline {part}{\@mypartnumtocformat {III}{神经机器翻译}}{205}{part.3} \contentsline {part}{\@mypartnumtocformat {III}{神经机器翻译}}{207}{part.3}
\ttl@stoptoc {default@2} \ttl@stoptoc {default@2}
\ttl@starttoc {default@3} \ttl@starttoc {default@3}
\defcounter {refsection}{0}\relax \defcounter {refsection}{0}\relax
\contentsline {chapter}{\numberline {5}人工神经网络和神经语言建模}{207}{chapter.5} \contentsline {chapter}{\numberline {5}人工神经网络和神经语言建模}{209}{chapter.5}
\defcounter {refsection}{0}\relax \defcounter {refsection}{0}\relax
\contentsline {section}{\numberline {5.1}深度学习与人工神经网络}{208}{section.5.1} \contentsline {section}{\numberline {5.1}深度学习与人工神经网络}{210}{section.5.1}
\defcounter {refsection}{0}\relax \defcounter {refsection}{0}\relax
\contentsline {subsection}{\numberline {5.1.1}发展简史}{208}{subsection.5.1.1} \contentsline {subsection}{\numberline {5.1.1}发展简史}{210}{subsection.5.1.1}
\defcounter {refsection}{0}\relax \defcounter {refsection}{0}\relax
\contentsline {subsubsection}{早期的人工神经网络和第一次寒冬}{208}{section*.214} \contentsline {subsubsection}{早期的人工神经网络和第一次寒冬}{210}{section*.214}
\defcounter {refsection}{0}\relax \defcounter {refsection}{0}\relax
\contentsline {subsubsection}{神经网络的第二次高潮和第二次寒冬}{209}{section*.215} \contentsline {subsubsection}{神经网络的第二次高潮和第二次寒冬}{211}{section*.215}
\defcounter {refsection}{0}\relax \defcounter {refsection}{0}\relax
\contentsline {subsubsection}{深度学习和神经网络方法的崛起}{210}{section*.216} \contentsline {subsubsection}{深度学习和神经网络方法的崛起}{212}{section*.216}
\defcounter {refsection}{0}\relax \defcounter {refsection}{0}\relax
\contentsline {subsection}{\numberline {5.1.2}为什么需要深度学习}{211}{subsection.5.1.2} \contentsline {subsection}{\numberline {5.1.2}为什么需要深度学习}{213}{subsection.5.1.2}
\defcounter {refsection}{0}\relax \defcounter {refsection}{0}\relax
\contentsline {subsubsection}{端到端学习和表示学习}{211}{section*.218} \contentsline {subsubsection}{端到端学习和表示学习}{213}{section*.218}
\defcounter {refsection}{0}\relax \defcounter {refsection}{0}\relax
\contentsline {subsubsection}{深度学习的效果}{212}{section*.220} \contentsline {subsubsection}{深度学习的效果}{214}{section*.220}
\defcounter {refsection}{0}\relax \defcounter {refsection}{0}\relax
\contentsline {section}{\numberline {5.2}神经网络基础}{212}{section.5.2} \contentsline {section}{\numberline {5.2}神经网络基础}{214}{section.5.2}
\defcounter {refsection}{0}\relax \defcounter {refsection}{0}\relax
\contentsline {subsection}{\numberline {5.2.1}线性代数基础}{212}{subsection.5.2.1} \contentsline {subsection}{\numberline {5.2.1}线性代数基础}{214}{subsection.5.2.1}
\defcounter {refsection}{0}\relax \defcounter {refsection}{0}\relax
\contentsline {subsubsection}{标量、向量和矩阵}{213}{section*.222} \contentsline {subsubsection}{标量、向量和矩阵}{215}{section*.222}
\defcounter {refsection}{0}\relax \defcounter {refsection}{0}\relax
\contentsline {subsubsection}{矩阵的转置}{214}{section*.223} \contentsline {subsubsection}{矩阵的转置}{216}{section*.223}
\defcounter {refsection}{0}\relax \defcounter {refsection}{0}\relax
\contentsline {subsubsection}{矩阵加法和数乘}{214}{section*.224} \contentsline {subsubsection}{矩阵加法和数乘}{216}{section*.224}
\defcounter {refsection}{0}\relax \defcounter {refsection}{0}\relax
\contentsline {subsubsection}{矩阵乘法和矩阵点乘}{215}{section*.225} \contentsline {subsubsection}{矩阵乘法和矩阵点乘}{217}{section*.225}
\defcounter {refsection}{0}\relax \defcounter {refsection}{0}\relax
\contentsline {subsubsection}{线性映射}{216}{section*.226} \contentsline {subsubsection}{线性映射}{218}{section*.226}
\defcounter {refsection}{0}\relax \defcounter {refsection}{0}\relax
\contentsline {subsubsection}{范数}{217}{section*.227} \contentsline {subsubsection}{范数}{219}{section*.227}
\defcounter {refsection}{0}\relax \defcounter {refsection}{0}\relax
\contentsline {subsection}{\numberline {5.2.2}人工神经元和感知机}{218}{subsection.5.2.2} \contentsline {subsection}{\numberline {5.2.2}人工神经元和感知机}{220}{subsection.5.2.2}
\defcounter {refsection}{0}\relax \defcounter {refsection}{0}\relax
\contentsline {subsubsection}{感知机\ \raisebox {0.5mm}{------}\ 最简单的人工神经元模型}{219}{section*.230} \contentsline {subsubsection}{感知机\ \raisebox {0.5mm}{------}\ 最简单的人工神经元模型}{221}{section*.230}
\defcounter {refsection}{0}\relax \defcounter {refsection}{0}\relax
\contentsline {subsubsection}{神经元内部权重}{220}{section*.233} \contentsline {subsubsection}{神经元内部权重}{222}{section*.233}
\defcounter {refsection}{0}\relax \defcounter {refsection}{0}\relax
\contentsline {subsubsection}{神经元的输入\ \raisebox {0.5mm}{------}\ 离散 vs 连续}{221}{section*.235} \contentsline {subsubsection}{神经元的输入\ \raisebox {0.5mm}{------}\ 离散 vs 连续}{223}{section*.235}
\defcounter {refsection}{0}\relax \defcounter {refsection}{0}\relax
\contentsline {subsubsection}{神经元内部的参数学习}{221}{section*.237} \contentsline {subsubsection}{神经元内部的参数学习}{223}{section*.237}
\defcounter {refsection}{0}\relax \defcounter {refsection}{0}\relax
\contentsline {subsection}{\numberline {5.2.3}多层神经网络}{222}{subsection.5.2.3} \contentsline {subsection}{\numberline {5.2.3}多层神经网络}{224}{subsection.5.2.3}
\defcounter {refsection}{0}\relax \defcounter {refsection}{0}\relax
\contentsline {subsubsection}{线性变换和激活函数}{222}{section*.239} \contentsline {subsubsection}{线性变换和激活函数}{224}{section*.239}
\defcounter {refsection}{0}\relax \defcounter {refsection}{0}\relax
\contentsline {subsubsection}{单层神经网络$\rightarrow $多层神经网络}{224}{section*.246} \contentsline {subsubsection}{单层神经网络$\rightarrow $多层神经网络}{226}{section*.246}
\defcounter {refsection}{0}\relax \defcounter {refsection}{0}\relax
\contentsline {subsection}{\numberline {5.2.4}函数拟合能力}{225}{subsection.5.2.4} \contentsline {subsection}{\numberline {5.2.4}函数拟合能力}{227}{subsection.5.2.4}
\defcounter {refsection}{0}\relax \defcounter {refsection}{0}\relax
\contentsline {section}{\numberline {5.3}神经网络的张量实现}{229}{section.5.3} \contentsline {section}{\numberline {5.3}神经网络的张量实现}{231}{section.5.3}
\defcounter {refsection}{0}\relax \defcounter {refsection}{0}\relax
\contentsline {subsection}{\numberline {5.3.1} 张量及其计算}{230}{subsection.5.3.1} \contentsline {subsection}{\numberline {5.3.1} 张量及其计算}{232}{subsection.5.3.1}
\defcounter {refsection}{0}\relax \defcounter {refsection}{0}\relax
\contentsline {subsubsection}{张量}{230}{section*.256} \contentsline {subsubsection}{张量}{232}{section*.256}
\defcounter {refsection}{0}\relax \defcounter {refsection}{0}\relax
\contentsline {subsubsection}{张量的矩阵乘法}{232}{section*.259} \contentsline {subsubsection}{张量的矩阵乘法}{234}{section*.259}
\defcounter {refsection}{0}\relax \defcounter {refsection}{0}\relax
\contentsline {subsubsection}{张量的单元操作}{233}{section*.261} \contentsline {subsubsection}{张量的单元操作}{235}{section*.261}
\defcounter {refsection}{0}\relax \defcounter {refsection}{0}\relax
\contentsline {subsection}{\numberline {5.3.2}张量的物理存储形式}{234}{subsection.5.3.2} \contentsline {subsection}{\numberline {5.3.2}张量的物理存储形式}{236}{subsection.5.3.2}
\defcounter {refsection}{0}\relax \defcounter {refsection}{0}\relax
\contentsline {subsection}{\numberline {5.3.3}使用开源框架实现张量计算}{234}{subsection.5.3.3} \contentsline {subsection}{\numberline {5.3.3}使用开源框架实现张量计算}{236}{subsection.5.3.3}
\defcounter {refsection}{0}\relax \defcounter {refsection}{0}\relax
\contentsline {subsection}{\numberline {5.3.4}前向传播与计算图}{236}{subsection.5.3.4} \contentsline {subsection}{\numberline {5.3.4}前向传播与计算图}{238}{subsection.5.3.4}
\defcounter {refsection}{0}\relax \defcounter {refsection}{0}\relax
\contentsline {subsection}{\numberline {5.3.5}神经网络实例}{239}{subsection.5.3.5} \contentsline {subsection}{\numberline {5.3.5}神经网络实例}{241}{subsection.5.3.5}
\defcounter {refsection}{0}\relax \defcounter {refsection}{0}\relax
\contentsline {section}{\numberline {5.4}神经网络的参数训练}{240}{section.5.4} \contentsline {section}{\numberline {5.4}神经网络的参数训练}{242}{section.5.4}
\defcounter {refsection}{0}\relax \defcounter {refsection}{0}\relax
\contentsline {subsection}{\numberline {5.4.1}损失函数}{241}{subsection.5.4.1} \contentsline {subsection}{\numberline {5.4.1}损失函数}{243}{subsection.5.4.1}
\defcounter {refsection}{0}\relax \defcounter {refsection}{0}\relax
\contentsline {subsection}{\numberline {5.4.2}基于梯度的参数优化}{241}{subsection.5.4.2} \contentsline {subsection}{\numberline {5.4.2}基于梯度的参数优化}{243}{subsection.5.4.2}
\defcounter {refsection}{0}\relax \defcounter {refsection}{0}\relax
\contentsline {subsubsection}{梯度下降}{242}{section*.279} \contentsline {subsubsection}{梯度下降}{244}{section*.279}
\defcounter {refsection}{0}\relax \defcounter {refsection}{0}\relax
\contentsline {subsubsection}{梯度获取}{244}{section*.281} \contentsline {subsubsection}{梯度获取}{246}{section*.281}
\defcounter {refsection}{0}\relax \defcounter {refsection}{0}\relax
\contentsline {subsubsection}{基于梯度的方法的变种和改进}{247}{section*.285} \contentsline {subsubsection}{基于梯度的方法的变种和改进}{249}{section*.285}
\defcounter {refsection}{0}\relax \defcounter {refsection}{0}\relax
\contentsline {subsection}{\numberline {5.4.3}参数更新的并行化策略}{250}{subsection.5.4.3} \contentsline {subsection}{\numberline {5.4.3}参数更新的并行化策略}{252}{subsection.5.4.3}
\defcounter {refsection}{0}\relax \defcounter {refsection}{0}\relax
\contentsline {subsection}{\numberline {5.4.4}梯度消失、梯度爆炸和稳定性训练}{252}{subsection.5.4.4} \contentsline {subsection}{\numberline {5.4.4}梯度消失、梯度爆炸和稳定性训练}{254}{subsection.5.4.4}
\defcounter {refsection}{0}\relax \defcounter {refsection}{0}\relax
\contentsline {subsubsection}{易于优化的激活函数}{252}{section*.288} \contentsline {subsubsection}{易于优化的激活函数}{254}{section*.288}
\defcounter {refsection}{0}\relax \defcounter {refsection}{0}\relax
\contentsline {subsubsection}{梯度裁剪}{253}{section*.292} \contentsline {subsubsection}{梯度裁剪}{255}{section*.292}
\defcounter {refsection}{0}\relax \defcounter {refsection}{0}\relax
\contentsline {subsubsection}{稳定性训练}{254}{section*.293} \contentsline {subsubsection}{稳定性训练}{256}{section*.293}
\defcounter {refsection}{0}\relax \defcounter {refsection}{0}\relax
\contentsline {subsection}{\numberline {5.4.5}过拟合}{255}{subsection.5.4.5} \contentsline {subsection}{\numberline {5.4.5}过拟合}{257}{subsection.5.4.5}
\defcounter {refsection}{0}\relax \defcounter {refsection}{0}\relax
\contentsline {subsection}{\numberline {5.4.6}反向传播}{256}{subsection.5.4.6} \contentsline {subsection}{\numberline {5.4.6}反向传播}{258}{subsection.5.4.6}
\defcounter {refsection}{0}\relax \defcounter {refsection}{0}\relax
\contentsline {subsubsection}{输出层的反向传播}{257}{section*.296} \contentsline {subsubsection}{输出层的反向传播}{259}{section*.296}
\defcounter {refsection}{0}\relax \defcounter {refsection}{0}\relax
\contentsline {subsubsection}{隐藏层的反向传播}{259}{section*.300} \contentsline {subsubsection}{隐藏层的反向传播}{261}{section*.300}
\defcounter {refsection}{0}\relax \defcounter {refsection}{0}\relax
\contentsline {subsubsection}{程序实现}{260}{section*.303} \contentsline {subsubsection}{程序实现}{262}{section*.303}
\defcounter {refsection}{0}\relax \defcounter {refsection}{0}\relax
\contentsline {section}{\numberline {5.5}神经语言模型}{261}{section.5.5} \contentsline {section}{\numberline {5.5}神经语言模型}{263}{section.5.5}
\defcounter {refsection}{0}\relax \defcounter {refsection}{0}\relax
\contentsline {subsection}{\numberline {5.5.1}基于神经网络的语言建模}{262}{subsection.5.5.1} \contentsline {subsection}{\numberline {5.5.1}基于神经网络的语言建模}{264}{subsection.5.5.1}
\defcounter {refsection}{0}\relax \defcounter {refsection}{0}\relax
\contentsline {subsubsection}{基于前馈神经网络的语言模型}{263}{section*.306} \contentsline {subsubsection}{基于前馈神经网络的语言模型}{265}{section*.306}
\defcounter {refsection}{0}\relax \defcounter {refsection}{0}\relax
\contentsline {subsubsection}{基于循环神经网络的语言模型}{265}{section*.309} \contentsline {subsubsection}{基于循环神经网络的语言模型}{267}{section*.309}
\defcounter {refsection}{0}\relax \defcounter {refsection}{0}\relax
\contentsline {subsubsection}{基于自注意力机制的语言模型}{266}{section*.311} \contentsline {subsubsection}{基于自注意力机制的语言模型}{268}{section*.311}
\defcounter {refsection}{0}\relax \defcounter {refsection}{0}\relax
\contentsline {subsubsection}{语言模型的评价}{267}{section*.313} \contentsline {subsubsection}{语言模型的评价}{269}{section*.313}
\defcounter {refsection}{0}\relax \defcounter {refsection}{0}\relax
\contentsline {subsection}{\numberline {5.5.2}单词表示模型}{268}{subsection.5.5.2} \contentsline {subsection}{\numberline {5.5.2}单词表示模型}{270}{subsection.5.5.2}
\defcounter {refsection}{0}\relax \defcounter {refsection}{0}\relax
\contentsline {subsubsection}{One-hot编码}{268}{section*.314} \contentsline {subsubsection}{One-hot编码}{270}{section*.314}
\defcounter {refsection}{0}\relax \defcounter {refsection}{0}\relax
\contentsline {subsubsection}{分布式表示}{268}{section*.316} \contentsline {subsubsection}{分布式表示}{270}{section*.316}
\defcounter {refsection}{0}\relax \defcounter {refsection}{0}\relax
\contentsline {subsection}{\numberline {5.5.3}句子表示模型及预训练}{270}{subsection.5.5.3} \contentsline {subsection}{\numberline {5.5.3}句子表示模型及预训练}{272}{subsection.5.5.3}
\defcounter {refsection}{0}\relax \defcounter {refsection}{0}\relax
\contentsline {subsubsection}{简单的上下文表示模型}{270}{section*.320} \contentsline {subsubsection}{简单的上下文表示模型}{272}{section*.320}
\defcounter {refsection}{0}\relax \defcounter {refsection}{0}\relax
\contentsline {subsubsection}{ELMO模型}{272}{section*.323} \contentsline {subsubsection}{ELMO模型}{274}{section*.323}
\defcounter {refsection}{0}\relax \defcounter {refsection}{0}\relax
\contentsline {subsubsection}{GPT模型}{272}{section*.325} \contentsline {subsubsection}{GPT模型}{274}{section*.325}
\defcounter {refsection}{0}\relax \defcounter {refsection}{0}\relax
\contentsline {subsubsection}{BERT模型}{273}{section*.327} \contentsline {subsubsection}{BERT模型}{275}{section*.327}
\defcounter {refsection}{0}\relax \defcounter {refsection}{0}\relax
\contentsline {subsubsection}{为什么要预训练?}{274}{section*.329} \contentsline {subsubsection}{为什么要预训练?}{276}{section*.329}
\defcounter {refsection}{0}\relax \defcounter {refsection}{0}\relax
\contentsline {section}{\numberline {5.6}小结及深入阅读}{275}{section.5.6} \contentsline {section}{\numberline {5.6}小结及深入阅读}{277}{section.5.6}
\defcounter {refsection}{0}\relax \defcounter {refsection}{0}\relax
\contentsline {chapter}{\numberline {6}神经机器翻译模型}{277}{chapter.6} \contentsline {chapter}{\numberline {6}神经机器翻译模型}{279}{chapter.6}
\defcounter {refsection}{0}\relax \defcounter {refsection}{0}\relax
\contentsline {section}{\numberline {6.1}神经机器翻译的发展简史}{277}{section.6.1} \contentsline {section}{\numberline {6.1}神经机器翻译的发展简史}{279}{section.6.1}
\defcounter {refsection}{0}\relax \defcounter {refsection}{0}\relax
\contentsline {subsection}{\numberline {6.1.1}神经机器翻译的起源}{279}{subsection.6.1.1} \contentsline {subsection}{\numberline {6.1.1}神经机器翻译的起源}{281}{subsection.6.1.1}
\defcounter {refsection}{0}\relax \defcounter {refsection}{0}\relax
\contentsline {subsection}{\numberline {6.1.2}神经机器翻译的品质 }{281}{subsection.6.1.2} \contentsline {subsection}{\numberline {6.1.2}神经机器翻译的品质 }{283}{subsection.6.1.2}
\defcounter {refsection}{0}\relax \defcounter {refsection}{0}\relax
\contentsline {subsection}{\numberline {6.1.3}神经机器翻译的优势 }{284}{subsection.6.1.3} \contentsline {subsection}{\numberline {6.1.3}神经机器翻译的优势 }{286}{subsection.6.1.3}
\defcounter {refsection}{0}\relax \defcounter {refsection}{0}\relax
\contentsline {section}{\numberline {6.2}编码器-解码器框架}{286}{section.6.2} \contentsline {section}{\numberline {6.2}编码器-解码器框架}{288}{section.6.2}
\defcounter {refsection}{0}\relax \defcounter {refsection}{0}\relax
\contentsline {subsection}{\numberline {6.2.1}框架结构}{286}{subsection.6.2.1} \contentsline {subsection}{\numberline {6.2.1}框架结构}{288}{subsection.6.2.1}
\defcounter {refsection}{0}\relax \defcounter {refsection}{0}\relax
\contentsline {subsection}{\numberline {6.2.2}表示学习}{287}{subsection.6.2.2} \contentsline {subsection}{\numberline {6.2.2}表示学习}{289}{subsection.6.2.2}
\defcounter {refsection}{0}\relax \defcounter {refsection}{0}\relax
\contentsline {subsection}{\numberline {6.2.3}简单的运行实例}{288}{subsection.6.2.3} \contentsline {subsection}{\numberline {6.2.3}简单的运行实例}{290}{subsection.6.2.3}
\defcounter {refsection}{0}\relax \defcounter {refsection}{0}\relax
\contentsline {subsection}{\numberline {6.2.4}机器翻译范式的对比}{289}{subsection.6.2.4} \contentsline {subsection}{\numberline {6.2.4}机器翻译范式的对比}{291}{subsection.6.2.4}
\defcounter {refsection}{0}\relax \defcounter {refsection}{0}\relax
\contentsline {section}{\numberline {6.3}基于循环神经网络的翻译模型及注意力机制}{290}{section.6.3} \contentsline {section}{\numberline {6.3}基于循环神经网络的翻译模型及注意力机制}{292}{section.6.3}
\defcounter {refsection}{0}\relax \defcounter {refsection}{0}\relax
\contentsline {subsection}{\numberline {6.3.1}建模}{291}{subsection.6.3.1} \contentsline {subsection}{\numberline {6.3.1}建模}{293}{subsection.6.3.1}
\defcounter {refsection}{0}\relax \defcounter {refsection}{0}\relax
\contentsline {subsection}{\numberline {6.3.2}输入(词嵌入)及输出(Softmax)}{294}{subsection.6.3.2} \contentsline {subsection}{\numberline {6.3.2}输入(词嵌入)及输出(Softmax)}{296}{subsection.6.3.2}
\defcounter {refsection}{0}\relax \defcounter {refsection}{0}\relax
\contentsline {subsection}{\numberline {6.3.3}循环神经网络结构}{297}{subsection.6.3.3} \contentsline {subsection}{\numberline {6.3.3}循环神经网络结构}{299}{subsection.6.3.3}
\defcounter {refsection}{0}\relax \defcounter {refsection}{0}\relax
\contentsline {subsubsection}{循环神经单元(RNN)}{297}{section*.351} \contentsline {subsubsection}{循环神经单元(RNN)}{299}{section*.351}
\defcounter {refsection}{0}\relax \defcounter {refsection}{0}\relax
\contentsline {subsubsection}{长短时记忆网络(LSTM)}{298}{section*.352} \contentsline {subsubsection}{长短时记忆网络(LSTM)}{300}{section*.352}
\defcounter {refsection}{0}\relax \defcounter {refsection}{0}\relax
\contentsline {subsubsection}{门控循环单元(GRU)}{299}{section*.355} \contentsline {subsubsection}{门控循环单元(GRU)}{301}{section*.355}
\defcounter {refsection}{0}\relax \defcounter {refsection}{0}\relax
\contentsline {subsubsection}{双向模型}{301}{section*.357} \contentsline {subsubsection}{双向模型}{303}{section*.357}
\defcounter {refsection}{0}\relax \defcounter {refsection}{0}\relax
\contentsline {subsubsection}{多层循环神经网络}{302}{section*.359} \contentsline {subsubsection}{多层循环神经网络}{304}{section*.359}
\defcounter {refsection}{0}\relax \defcounter {refsection}{0}\relax
\contentsline {subsection}{\numberline {6.3.4}注意力机制}{302}{subsection.6.3.4} \contentsline {subsection}{\numberline {6.3.4}注意力机制}{304}{subsection.6.3.4}
\defcounter {refsection}{0}\relax \defcounter {refsection}{0}\relax
\contentsline {subsubsection}{翻译中的注意力机制}{304}{section*.362} \contentsline {subsubsection}{翻译中的注意力机制}{306}{section*.362}
\defcounter {refsection}{0}\relax \defcounter {refsection}{0}\relax
\contentsline {subsubsection}{上下文向量的计算}{305}{section*.365} \contentsline {subsubsection}{上下文向量的计算}{307}{section*.365}
\defcounter {refsection}{0}\relax \defcounter {refsection}{0}\relax
\contentsline {subsubsection}{注意力机制的解读}{308}{section*.370} \contentsline {subsubsection}{注意力机制的解读}{310}{section*.370}
\defcounter {refsection}{0}\relax \defcounter {refsection}{0}\relax
\contentsline {subsection}{\numberline {6.3.5}训练}{310}{subsection.6.3.5} \contentsline {subsection}{\numberline {6.3.5}训练}{312}{subsection.6.3.5}
\defcounter {refsection}{0}\relax \defcounter {refsection}{0}\relax
\contentsline {subsubsection}{损失函数}{310}{section*.373} \contentsline {subsubsection}{损失函数}{312}{section*.373}
\defcounter {refsection}{0}\relax \defcounter {refsection}{0}\relax
\contentsline {subsubsection}{长参数初始化}{311}{section*.374} \contentsline {subsubsection}{长参数初始化}{313}{section*.374}
\defcounter {refsection}{0}\relax \defcounter {refsection}{0}\relax
\contentsline {subsubsection}{优化策略}{311}{section*.375} \contentsline {subsubsection}{优化策略}{313}{section*.375}
\defcounter {refsection}{0}\relax \defcounter {refsection}{0}\relax
\contentsline {subsubsection}{梯度裁剪}{312}{section*.377} \contentsline {subsubsection}{梯度裁剪}{314}{section*.377}
\defcounter {refsection}{0}\relax \defcounter {refsection}{0}\relax
\contentsline {subsubsection}{学习率策略}{312}{section*.378} \contentsline {subsubsection}{学习率策略}{314}{section*.378}
\defcounter {refsection}{0}\relax \defcounter {refsection}{0}\relax
\contentsline {subsubsection}{并行训练}{313}{section*.381} \contentsline {subsubsection}{并行训练}{315}{section*.381}
\defcounter {refsection}{0}\relax \defcounter {refsection}{0}\relax
\contentsline {subsection}{\numberline {6.3.6}推断}{315}{subsection.6.3.6} \contentsline {subsection}{\numberline {6.3.6}推断}{317}{subsection.6.3.6}
\defcounter {refsection}{0}\relax \defcounter {refsection}{0}\relax
\contentsline {subsubsection}{贪婪搜索}{317}{section*.385} \contentsline {subsubsection}{贪婪搜索}{319}{section*.385}
\defcounter {refsection}{0}\relax \defcounter {refsection}{0}\relax
\contentsline {subsubsection}{束搜索}{318}{section*.388} \contentsline {subsubsection}{束搜索}{320}{section*.388}
\defcounter {refsection}{0}\relax \defcounter {refsection}{0}\relax
\contentsline {subsubsection}{长度惩罚}{318}{section*.390} \contentsline {subsubsection}{长度惩罚}{320}{section*.390}
\defcounter {refsection}{0}\relax \defcounter {refsection}{0}\relax
\contentsline {subsection}{\numberline {6.3.7}实例-GNMT}{320}{subsection.6.3.7} \contentsline {subsection}{\numberline {6.3.7}实例-GNMT}{322}{subsection.6.3.7}
\defcounter {refsection}{0}\relax \defcounter {refsection}{0}\relax
\contentsline {section}{\numberline {6.4}Transformer}{320}{section.6.4} \contentsline {section}{\numberline {6.4}Transformer}{322}{section.6.4}
\defcounter {refsection}{0}\relax \defcounter {refsection}{0}\relax
\contentsline {subsection}{\numberline {6.4.1}自注意力模型}{322}{subsection.6.4.1} \contentsline {subsection}{\numberline {6.4.1}自注意力模型}{324}{subsection.6.4.1}
\defcounter {refsection}{0}\relax \defcounter {refsection}{0}\relax
\contentsline {subsection}{\numberline {6.4.2}Transformer架构}{324}{subsection.6.4.2} \contentsline {subsection}{\numberline {6.4.2}Transformer架构}{326}{subsection.6.4.2}
\defcounter {refsection}{0}\relax \defcounter {refsection}{0}\relax
\contentsline {subsection}{\numberline {6.4.3}位置编码}{326}{subsection.6.4.3} \contentsline {subsection}{\numberline {6.4.3}位置编码}{328}{subsection.6.4.3}
\defcounter {refsection}{0}\relax \defcounter {refsection}{0}\relax
\contentsline {subsection}{\numberline {6.4.4}基于点乘的注意力机制}{328}{subsection.6.4.4} \contentsline {subsection}{\numberline {6.4.4}基于点乘的注意力机制}{330}{subsection.6.4.4}
\defcounter {refsection}{0}\relax \defcounter {refsection}{0}\relax
\contentsline {subsection}{\numberline {6.4.5}掩码操作}{330}{subsection.6.4.5} \contentsline {subsection}{\numberline {6.4.5}掩码操作}{332}{subsection.6.4.5}
\defcounter {refsection}{0}\relax \defcounter {refsection}{0}\relax
\contentsline {subsection}{\numberline {6.4.6}多头注意力}{331}{subsection.6.4.6} \contentsline {subsection}{\numberline {6.4.6}多头注意力}{333}{subsection.6.4.6}
\defcounter {refsection}{0}\relax \defcounter {refsection}{0}\relax
\contentsline {subsection}{\numberline {6.4.7}残差网络和层正则化}{332}{subsection.6.4.7} \contentsline {subsection}{\numberline {6.4.7}残差网络和层正则化}{334}{subsection.6.4.7}
\defcounter {refsection}{0}\relax \defcounter {refsection}{0}\relax
\contentsline {subsection}{\numberline {6.4.8}前馈全连接网络子层}{333}{subsection.6.4.8} \contentsline {subsection}{\numberline {6.4.8}前馈全连接网络子层}{335}{subsection.6.4.8}
\defcounter {refsection}{0}\relax \defcounter {refsection}{0}\relax
\contentsline {subsection}{\numberline {6.4.9}训练}{334}{subsection.6.4.9} \contentsline {subsection}{\numberline {6.4.9}训练}{336}{subsection.6.4.9}
\defcounter {refsection}{0}\relax \defcounter {refsection}{0}\relax
\contentsline {subsection}{\numberline {6.4.10}推断}{337}{subsection.6.4.10} \contentsline {subsection}{\numberline {6.4.10}推断}{339}{subsection.6.4.10}
\defcounter {refsection}{0}\relax \defcounter {refsection}{0}\relax
\contentsline {section}{\numberline {6.5}序列到序列问题及应用}{337}{section.6.5} \contentsline {section}{\numberline {6.5}序列到序列问题及应用}{339}{section.6.5}
\defcounter {refsection}{0}\relax \defcounter {refsection}{0}\relax
\contentsline {subsection}{\numberline {6.5.1}自动问答}{338}{subsection.6.5.1} \contentsline {subsection}{\numberline {6.5.1}自动问答}{340}{subsection.6.5.1}
\defcounter {refsection}{0}\relax \defcounter {refsection}{0}\relax
\contentsline {subsection}{\numberline {6.5.2}自动文摘}{338}{subsection.6.5.2} \contentsline {subsection}{\numberline {6.5.2}自动文摘}{340}{subsection.6.5.2}
\defcounter {refsection}{0}\relax \defcounter {refsection}{0}\relax
\contentsline {subsection}{\numberline {6.5.3}文言文翻译}{339}{subsection.6.5.3} \contentsline {subsection}{\numberline {6.5.3}文言文翻译}{341}{subsection.6.5.3}
\defcounter {refsection}{0}\relax \defcounter {refsection}{0}\relax
\contentsline {subsection}{\numberline {6.5.4}对联生成}{339}{subsection.6.5.4} \contentsline {subsection}{\numberline {6.5.4}对联生成}{341}{subsection.6.5.4}
\defcounter {refsection}{0}\relax \defcounter {refsection}{0}\relax
\contentsline {subsection}{\numberline {6.5.5}古诗生成}{340}{subsection.6.5.5} \contentsline {subsection}{\numberline {6.5.5}古诗生成}{342}{subsection.6.5.5}
\defcounter {refsection}{0}\relax \defcounter {refsection}{0}\relax
\contentsline {section}{\numberline {6.6}小结及深入阅读}{341}{section.6.6} \contentsline {section}{\numberline {6.6}小结及深入阅读}{343}{section.6.6}
\defcounter {refsection}{0}\relax \defcounter {refsection}{0}\relax
\contentsline {part}{\@mypartnumtocformat {IV}{附录}}{343}{part.4} \contentsline {chapter}{\numberline {7}神经机器翻译实战 \ \raisebox {0.5mm}{------}\ 参加一次比赛}{345}{chapter.7}
\defcounter {refsection}{0}\relax
\contentsline {section}{\numberline {7.1}神经机器翻译并不简单}{345}{section.7.1}
\defcounter {refsection}{0}\relax
\contentsline {subsection}{\numberline {7.1.1}影响神经机器翻译性能的因素}{346}{subsection.7.1.1}
\defcounter {refsection}{0}\relax
\contentsline {subsection}{\numberline {7.1.2}搭建神经机器翻译系统的步骤 }{347}{subsection.7.1.2}
\defcounter {refsection}{0}\relax
\contentsline {subsection}{\numberline {7.1.3}架构选择 }{348}{subsection.7.1.3}
\defcounter {refsection}{0}\relax
\contentsline {section}{\numberline {7.2}数据处理}{349}{section.7.2}
\defcounter {refsection}{0}\relax
\contentsline {subsection}{\numberline {7.2.1}分词}{349}{subsection.7.2.1}
\defcounter {refsection}{0}\relax
\contentsline {subsection}{\numberline {7.2.2}标准化}{350}{subsection.7.2.2}
\defcounter {refsection}{0}\relax
\contentsline {subsection}{\numberline {7.2.3}数据清洗}{351}{subsection.7.2.3}
\defcounter {refsection}{0}\relax
\contentsline {subsection}{\numberline {7.2.4}子词切分}{354}{subsection.7.2.4}
\defcounter {refsection}{0}\relax
\contentsline {subsubsection}{大词表和OOV问题}{354}{section*.428}
\defcounter {refsection}{0}\relax
\contentsline {subsubsection}{子词}{355}{section*.430}
\defcounter {refsection}{0}\relax
\contentsline {subsubsection}{双字节编码(BPE)}{356}{section*.432}
\defcounter {refsection}{0}\relax
\contentsline {subsubsection}{其他方法}{358}{section*.435}
\defcounter {refsection}{0}\relax
\contentsline {section}{\numberline {7.3}建模与训练}{359}{section.7.3}
\defcounter {refsection}{0}\relax
\contentsline {subsection}{\numberline {7.3.1}正则化}{359}{subsection.7.3.1}
\defcounter {refsection}{0}\relax
\contentsline {subsubsection}{L1/L2正则化}{360}{section*.437}
\defcounter {refsection}{0}\relax
\contentsline {subsubsection}{标签平滑}{361}{section*.438}
\defcounter {refsection}{0}\relax
\contentsline {subsubsection}{Dropout}{362}{section*.440}
\defcounter {refsection}{0}\relax
\contentsline {subsubsection}{Layer Dropout}{363}{section*.443}
\defcounter {refsection}{0}\relax
\contentsline {subsection}{\numberline {7.3.2}增大模型容量}{364}{subsection.7.3.2}
\defcounter {refsection}{0}\relax
\contentsline {subsubsection}{宽网络}{365}{section*.445}
\defcounter {refsection}{0}\relax
\contentsline {subsubsection}{深网络}{366}{section*.447}
\defcounter {refsection}{0}\relax
\contentsline {subsubsection}{增大输入层和输出层表示能力}{367}{section*.449}
\defcounter {refsection}{0}\relax
\contentsline {subsubsection}{大模型的分布式计算}{367}{section*.450}
\defcounter {refsection}{0}\relax
\contentsline {subsection}{\numberline {7.3.3}大批量训练}{368}{subsection.7.3.3}
\defcounter {refsection}{0}\relax
\contentsline {subsubsection}{为什么需要大批量训练}{368}{section*.451}
\defcounter {refsection}{0}\relax
\contentsline {subsubsection}{如何构建批次}{369}{section*.454}
\defcounter {refsection}{0}\relax
\contentsline {section}{\numberline {7.4}推断}{370}{section.7.4}
\defcounter {refsection}{0}\relax
\contentsline {subsection}{\numberline {7.4.1}推断优化}{370}{subsection.7.4.1}
\defcounter {refsection}{0}\relax
\contentsline {subsubsection}{推断系统的架构}{371}{section*.456}
\defcounter {refsection}{0}\relax
\contentsline {subsubsection}{自左向右推断 vs 自右向左推断}{372}{section*.458}
\defcounter {refsection}{0}\relax
\contentsline {subsubsection}{推断加速}{373}{section*.459}
\defcounter {refsection}{0}\relax
\contentsline {subsection}{\numberline {7.4.2}译文长度控制}{379}{subsection.7.4.2}
\defcounter {refsection}{0}\relax
\contentsline {subsubsection}{长度惩罚因子}{380}{section*.465}
\defcounter {refsection}{0}\relax
\contentsline {subsubsection}{译文长度范围约束}{381}{section*.467}
\defcounter {refsection}{0}\relax
\contentsline {subsubsection}{覆盖度模型}{381}{section*.468}
\defcounter {refsection}{0}\relax
\contentsline {subsection}{\numberline {7.4.3}多模型集成}{382}{subsection.7.4.3}
\defcounter {refsection}{0}\relax
\contentsline {subsubsection}{假设选择}{383}{section*.469}
\defcounter {refsection}{0}\relax
\contentsline {subsubsection}{局部预测融合}{384}{section*.471}
\defcounter {refsection}{0}\relax
\contentsline {subsubsection}{译文重组}{385}{section*.473}
\defcounter {refsection}{0}\relax
\contentsline {section}{\numberline {7.5}进阶技术}{386}{section.7.5}
\defcounter {refsection}{0}\relax
\contentsline {subsection}{\numberline {7.5.1}深层模型}{386}{subsection.7.5.1}
\defcounter {refsection}{0}\relax
\contentsline {subsubsection}{Post-Norm vs Pre-Norm}{386}{section*.476}
\defcounter {refsection}{0}\relax
\contentsline {subsubsection}{层聚合}{389}{section*.479}
\defcounter {refsection}{0}\relax
\contentsline {subsubsection}{深层模型的训练加速}{390}{section*.481}
\defcounter {refsection}{0}\relax
\contentsline {subsubsection}{渐进式训练}{390}{section*.482}
\defcounter {refsection}{0}\relax
\contentsline {subsubsection}{分组稠密连接}{390}{section*.484}
\defcounter {refsection}{0}\relax
\contentsline {subsubsection}{学习率重置策略}{391}{section*.486}
\defcounter {refsection}{0}\relax
\contentsline {subsubsection}{深层模型的鲁棒性训练}{393}{section*.488}
\defcounter {refsection}{0}\relax
\contentsline {subsection}{\numberline {7.5.2}单语数据的使用}{395}{subsection.7.5.2}
\defcounter {refsection}{0}\relax
\contentsline {subsubsection}{伪数据}{395}{section*.491}
\defcounter {refsection}{0}\relax
\contentsline {subsubsection}{预训练}{396}{section*.494}
\defcounter {refsection}{0}\relax
\contentsline {subsubsection}{联合训练}{399}{section*.497}
\defcounter {refsection}{0}\relax
\contentsline {subsection}{\numberline {7.5.3}知识精炼}{400}{subsection.7.5.3}
\defcounter {refsection}{0}\relax
\contentsline {subsubsection}{什么是知识精炼}{400}{section*.499}
\defcounter {refsection}{0}\relax
\contentsline {subsubsection}{知识精炼的基本方法}{401}{section*.500}
\defcounter {refsection}{0}\relax
\contentsline {subsubsection}{机器翻译中的知识精炼}{403}{section*.502}
\defcounter {refsection}{0}\relax
\contentsline {subsection}{\numberline {7.5.4}双向训练}{404}{subsection.7.5.4}
\defcounter {refsection}{0}\relax
\contentsline {subsubsection}{有监督对偶学习}{404}{section*.504}
\defcounter {refsection}{0}\relax
\contentsline {subsubsection}{无监督对偶学习}{405}{section*.505}
\defcounter {refsection}{0}\relax
\contentsline {subsubsection}{翻译中回译}{406}{section*.507}
\defcounter {refsection}{0}\relax
\contentsline {section}{\numberline {7.6}小结及深入阅读}{407}{section.7.6}
\defcounter {refsection}{0}\relax
\contentsline {part}{\@mypartnumtocformat {IV}{附录}}{411}{part.4}
\ttl@stoptoc {default@3} \ttl@stoptoc {default@3}
\ttl@starttoc {default@4} \ttl@starttoc {default@4}
\defcounter {refsection}{0}\relax \defcounter {refsection}{0}\relax
\contentsline {chapter}{\numberline {A}附录A}{345}{Appendix.1.A} \contentsline {chapter}{\numberline {A}附录A}{413}{Appendix.1.A}
\defcounter {refsection}{0}\relax
\contentsline {section}{\numberline {A.1}基准数据集}{413}{section.1.A.1}
\defcounter {refsection}{0}\relax
\contentsline {section}{\numberline {A.2}平行语料}{414}{section.1.A.2}
\defcounter {refsection}{0}\relax
\contentsline {section}{\numberline {A.3}相关工具}{415}{section.1.A.3}
\defcounter {refsection}{0}\relax
\contentsline {subsection}{\numberline {A.3.1}数据预处理工具}{415}{subsection.1.A.3.1}
\defcounter {refsection}{0}\relax
\contentsline {subsection}{\numberline {A.3.2}评价工具}{416}{subsection.1.A.3.2}
\defcounter {refsection}{0}\relax \defcounter {refsection}{0}\relax
\contentsline {chapter}{\numberline {B}附录B}{347}{Appendix.2.B} \contentsline {chapter}{\numberline {B}附录B}{417}{Appendix.2.B}
\defcounter {refsection}{0}\relax \defcounter {refsection}{0}\relax
\contentsline {section}{\numberline {B.1}IBM模型3训练方法}{347}{section.2.B.1} \contentsline {section}{\numberline {B.1}IBM模型3训练方法}{417}{section.2.B.1}
\defcounter {refsection}{0}\relax \defcounter {refsection}{0}\relax
\contentsline {section}{\numberline {B.2}IBM模型4训练方法}{349}{section.2.B.2} \contentsline {section}{\numberline {B.2}IBM模型4训练方法}{419}{section.2.B.2}
\defcounter {refsection}{0}\relax \defcounter {refsection}{0}\relax
\contentsline {section}{\numberline {B.3}IBM模型5训练方法}{351}{section.2.B.3} \contentsline {section}{\numberline {B.3}IBM模型5训练方法}{421}{section.2.B.3}
\contentsfinish \contentsfinish
Markdown 格式
0%
您添加了 0 到此讨论。请谨慎行事。
请先完成此评论的编辑!
注册 或者 后发表评论