Commit 95e6c2de by xiaotong

minor updates of sections 5-6

parent 28b2ae5b
...@@ -49,10 +49,10 @@ ...@@ -49,10 +49,10 @@
\draw [->,thick] (rlayer3.north) -- ([yshift=1em]rlayer3.north); \draw [->,thick] (rlayer3.north) -- ([yshift=1em]rlayer3.north);
{ {
\node [anchor=south,draw,rounded corners,inner sep=2pt,minimum width=9.4em,minimum height=1.0em,fill=green!30!white,blur shadow={shadow xshift=1pt,shadow yshift=-1pt}] (h1) at ([yshift=1em]rlayer2.north) {\tiny{h1 = Merge($\cdot$)}}; \node [anchor=south,draw,rounded corners,inner sep=2pt,minimum width=9.4em,minimum height=1.0em,fill=green!30!white,blur shadow={shadow xshift=1pt,shadow yshift=-1pt}] (h1) at ([yshift=1em]rlayer2.north) {\scriptsize{h1 = Merge($\cdot$)}};
\node [anchor=south,draw,rounded corners,inner sep=2pt,minimum width=9.4em,minimum height=1.0em,fill=green!30!white,blur shadow={shadow xshift=1pt,shadow yshift=-1pt}] (h2) at ([yshift=1em]h1.north) {\tiny{h2 = Relu($\cdot$)}}; \node [anchor=south,draw,rounded corners,inner sep=2pt,minimum width=9.4em,minimum height=1.0em,fill=green!30!white,blur shadow={shadow xshift=1pt,shadow yshift=-1pt}] (h2) at ([yshift=1em]h1.north) {\scriptsize{h2 = Relu($\cdot$)}};
\node [anchor=south,draw,rounded corners,inner sep=2pt,minimum width=9.4em,minimum height=1.0em,fill=green!30!white,blur shadow={shadow xshift=1pt,shadow yshift=-1pt}] (h3) at ([yshift=1em]h2.north) {\tiny{h3 = Sum($\cdot$)}}; \node [anchor=south,draw,rounded corners,inner sep=2pt,minimum width=9.4em,minimum height=1.0em,fill=green!30!white,blur shadow={shadow xshift=1pt,shadow yshift=-1pt}] (h3) at ([yshift=1em]h2.north) {\scriptsize{h3 = Sum($\cdot$)}};
\node [anchor=south,draw,rounded corners,inner sep=2pt,minimum width=9.4em,minimum height=1.0em,fill=green!30!white,blur shadow={shadow xshift=1pt,shadow yshift=-1pt}] (h4) at ([yshift=1em]h3.north) {\tiny{h4 = Softmax($\cdot$)}}; \node [anchor=south,draw,rounded corners,inner sep=2pt,minimum width=9.4em,minimum height=1.0em,fill=green!30!white,blur shadow={shadow xshift=1pt,shadow yshift=-1pt}] (h4) at ([yshift=1em]h3.north) {\scriptsize{h4 = Softmax($\cdot$)}};
\draw [->,thick] (h1.north) -- (h2.south); \draw [->,thick] (h1.north) -- (h2.south);
\draw [->,thick] (h2.north) -- (h3.south); \draw [->,thick] (h2.north) -- (h3.south);
\draw [->,thick] (h3.north) -- (h4.south); \draw [->,thick] (h3.north) -- (h4.south);
...@@ -60,7 +60,7 @@ ...@@ -60,7 +60,7 @@
} }
{ {
\node [anchor=south,draw,rounded corners,inner sep=2pt,minimum width=9.4em,minimum height=1.0em,fill=green!30!white,blur shadow={shadow xshift=1pt,shadow yshift=-1pt}] (slayer) at ([yshift=1em]h4.north) {\tiny{Split($\cdot$)}}; \node [anchor=south,draw,rounded corners,inner sep=2pt,minimum width=9.4em,minimum height=1.0em,fill=green!30!white,blur shadow={shadow xshift=1pt,shadow yshift=-1pt}] (slayer) at ([yshift=1em]h4.north) {\scriptsize{Split($\cdot$)}};
\node [anchor=south,draw,circle,inner sep=1pt,fill=red!30!white,blur shadow={shadow xshift=1pt,shadow yshift=-1pt}] (y2) at ([yshift=1em]slayer.north) {\footnotesize{$\textrm{y}_2$}}; \node [anchor=south,draw,circle,inner sep=1pt,fill=red!30!white,blur shadow={shadow xshift=1pt,shadow yshift=-1pt}] (y2) at ([yshift=1em]slayer.north) {\footnotesize{$\textrm{y}_2$}};
\node [anchor=east,draw,circle,inner sep=1pt,fill=red!30!white,blur shadow={shadow xshift=1pt,shadow yshift=-1pt}] (y1) at ([xshift=-2em]y2.west) {\footnotesize{$\textrm{y}_1$}}; \node [anchor=east,draw,circle,inner sep=1pt,fill=red!30!white,blur shadow={shadow xshift=1pt,shadow yshift=-1pt}] (y1) at ([xshift=-2em]y2.west) {\footnotesize{$\textrm{y}_1$}};
\node [anchor=west,draw,circle,inner sep=1pt,fill=red!30!white,blur shadow={shadow xshift=1pt,shadow yshift=-1pt}] (y3) at ([xshift=2em]y2.east) {\footnotesize{$\textrm{y}_3$}}; \node [anchor=west,draw,circle,inner sep=1pt,fill=red!30!white,blur shadow={shadow xshift=1pt,shadow yshift=-1pt}] (y3) at ([xshift=2em]y2.east) {\footnotesize{$\textrm{y}_3$}};
......
...@@ -4,7 +4,7 @@ ...@@ -4,7 +4,7 @@
\node [anchor=center] (node1) at (0,0) {}; \node [anchor=center] (node1) at (0,0) {};
\node [anchor=north,draw,thick](node2)at ([yshift=-1.5em]node1.south){\small{weight layer}}; \node [anchor=north,draw,thick](node2)at ([yshift=-1.5em]node1.south){\small{\ \ layer\ \ }};
\draw[->,thick](node1.south)--(node2.north); \draw[->,thick](node1.south)--(node2.north);
\node [anchor=north](node3)at ([yshift=-1.2em]node2.south){$\bigoplus$}; \node [anchor=north](node3)at ([yshift=-1.2em]node2.south){$\bigoplus$};
......
...@@ -1550,7 +1550,7 @@ w_{t+1}&=&w_t-\frac{\eta}{\sqrt{z_t+\epsilon}} v_t ...@@ -1550,7 +1550,7 @@ w_{t+1}&=&w_t-\frac{\eta}{\sqrt{z_t+\epsilon}} v_t
\begin{figure}[htp] \begin{figure}[htp]
\centering \centering
\input{./Chapter5/Figures/fig-parallel} \input{./Chapter5/Figures/fig-parallel}
\caption{同步更新与异步更新对比} \caption{同步更新与异步更新对比}
\label{fig:parallel} \label{fig:parallel}
\end {figure} \end {figure}
%------------------------------------------- %-------------------------------------------
...@@ -1719,8 +1719,7 @@ w_{t+1}&=&w_t-\frac{\eta}{\sqrt{z_t+\epsilon}} v_t ...@@ -1719,8 +1719,7 @@ w_{t+1}&=&w_t-\frac{\eta}{\sqrt{z_t+\epsilon}} v_t
%公式-------------------------------------------------------------------- %公式--------------------------------------------------------------------
\begin{eqnarray} \begin{eqnarray}
\mathbf h^k & = & f^k(\mathbf s^k) \nonumber \nonumber \\ \mathbf h^k & = & f^k(\mathbf s^k) \nonumber \nonumber \\
& = & f^k(\mathbf h^{k-1}\mathbf w^k) \nonumber \\ & = & f^k(\mathbf h^{k-1}\mathbf w^k)
& = & f^k(\sum_{j}{h_j^{k-1}w_{j,i}^k})
\label{eqa1.46} \label{eqa1.46}
\end{eqnarray} \end{eqnarray}
%公式-------------------------------------------------------------------- %公式--------------------------------------------------------------------
......
...@@ -264,13 +264,12 @@ NMT & $ 21.7^{\ast}$ & $18.7^{\ast}$ & -1 ...@@ -264,13 +264,12 @@ NMT & $ 21.7^{\ast}$ & $18.7^{\ast}$ & -1
\subsection{简单的运行实例}\index{Chapter6.2.3}\label{chapter6.2.3} \subsection{简单的运行实例}\index{Chapter6.2.3}\label{chapter6.2.3}
\parinterval 为了对编码器-解码器框架和神经机器翻译的运行过程有一个直观的认识,这里演示一个简单的翻译实例。这里采用标准的循环神经网络作为编码器和解码器的结构。假设系统的输入和输出为: \parinterval 为了对编码器-解码器框架和神经机器翻译的运行过程有一个直观的认识,这里演示一个简单的翻译实例。这里采用标准的循环神经网络作为编码器和解码器的结构。假设系统的输入和输出为:
\begin{example} \vspace{0.5em}
\quad \parinterval \hspace{5em} 源语(中文)输入:\{``我'',\ ``很'',\ ``好'',\ ``<eos>''\}
源语(中文)输入:``我''、``很''、``好''、``<eos>''
目标语(英文)输出:``I''、``am''、``fine''、``<eos>'' \vspace{0.3em}
\end{example} \parinterval \hspace{5em} 目标语(英文)输出:\{``I'',\ ``am'',\ ``fine'',\ ``<eos>''\}
\vspace{0.5em}
%figure-a simple example for tl %figure-a simple example for tl
%---------------------------------------------- %----------------------------------------------
...@@ -369,14 +368,9 @@ NMT & $ 21.7^{\ast}$ & $18.7^{\ast}$ & -1 ...@@ -369,14 +368,9 @@ NMT & $ 21.7^{\ast}$ & $18.7^{\ast}$ & -1
\parinterval 同大多数自然语言处理任务一样,神经机器翻译要解决的一个基本问题是如何描述文字序列,称为序列表示问题。例如,处理语音数据、文本数据都可以被看作是典型的序列表示问题。如果把一个序列看作一个时序上的一系列变量,不同时刻的变量之间往往是存在相关性的。也就是说,一个时序中某个时刻变量的状态会依赖其他时刻变量的状态,即上下文的语境信息。下面是一个简单的例子,假设有一个句子,但是最后两个单词被擦掉了,如何猜测被擦掉的单词是什么? \parinterval 同大多数自然语言处理任务一样,神经机器翻译要解决的一个基本问题是如何描述文字序列,称为序列表示问题。例如,处理语音数据、文本数据都可以被看作是典型的序列表示问题。如果把一个序列看作一个时序上的一系列变量,不同时刻的变量之间往往是存在相关性的。也就是说,一个时序中某个时刻变量的状态会依赖其他时刻变量的状态,即上下文的语境信息。下面是一个简单的例子,假设有一个句子,但是最后两个单词被擦掉了,如何猜测被擦掉的单词是什么?
\begin{example} \vspace{0.5em}
\quad \centerline{中午没吃饭,又刚打了一下午篮球,我现在很饿,我想\underline{\quad \quad \quad}}
\vspace{0.5em}
中午没吃饭,又刚打了一下午篮球,我现在很饿,我想\underline{\quad \quad \quad}
\end{example}
%\vspace{0.5em}
%\centerline{中午没吃饭,又刚打了一下午篮球,我现在很饿,我想\underline{\quad \quad \quad} 。}
%\vspace{0.5em}
\parinterval 显然,根据上下文中提到的``没吃饭''、``很饿'',最佳的答案是``吃 饭''或者``吃 东西''。也就是,对序列中某个位置的答案进行预测时需要记忆当前时刻之前的序列信息,因此,循环神经网络(Recurrent Neural Network, RNN)应运而生。实际上循环神经网络有着极为广泛的应用,例如语音识别、语言建模以及即将要介绍的神经机器翻译。 \parinterval 显然,根据上下文中提到的``没吃饭''、``很饿'',最佳的答案是``吃 饭''或者``吃 东西''。也就是,对序列中某个位置的答案进行预测时需要记忆当前时刻之前的序列信息,因此,循环神经网络(Recurrent Neural Network, RNN)应运而生。实际上循环神经网络有着极为广泛的应用,例如语音识别、语言建模以及即将要介绍的神经机器翻译。
...@@ -938,7 +932,7 @@ $\textrm{a}(\cdot)$可以被看作是目标语表示和源语言表示的一种` ...@@ -938,7 +932,7 @@ $\textrm{a}(\cdot)$可以被看作是目标语表示和源语言表示的一种`
\parinterval 将公式\ref{eqC6.29}应用于神经机器翻译有几个基本问题需要考虑:1)损失函数的选择;2)参数初始化的策略,也就是如何设置$\mathbf{w}_0$;3)优化策略和学习率调整策略;4)训练加速。下面对这些问题进行讨论。 \parinterval 将公式\ref{eqC6.29}应用于神经机器翻译有几个基本问题需要考虑:1)损失函数的选择;2)参数初始化的策略,也就是如何设置$\mathbf{w}_0$;3)优化策略和学习率调整策略;4)训练加速。下面对这些问题进行讨论。
%%%%%%%%%%%%%%%%%% %%%%%%%%%%%%%%%%%%
\subsubsection{损失函数}\index{Chapter6.3.5.1} \subsubsection{损失函数}\index{Chapter6.3.5.1}
\parinterval 因为神经机器翻译在每个目标语位置都会输出一个概率分布,表示这个位置上不同单词出现的可能性,因此需要知道当前位置输出的分布相比于标准答案的``损失''。对于这个问题,常用的是交叉熵损失函数\footnote{\ \ 百度百科:\url{https://baike.baidu.com/item/\%E4\%BA\%A4\%E5\%8F\%89\%E7\%86\%B5/8983241?fr=aladdin}}。令$\mathbf{y}$表示机器翻译模型输出的分布,$\hat{\mathbf{y}}$ 表示标准答案,则交叉熵损失可以被定义为$L_{\textrm{ce}}(\mathbf{y},\hat{\mathbf{y}}) = - \sum_{k=1}^{|V|} \mathbf{y}[k] \textrm{log} (\hat{\mathbf{y}}[k])$,其中$\mathbf{y}[k]$$\hat{\mathbf{y}}[k]$分别表示向量$\mathbf{y}$$\hat{\mathbf{y}}$的第$k$维,$|V|$表示输出向量得维度(等于词表大小)。对于一个模型输出的概率分布$\mathbf{Y} = \{ \mathbf{y}_1,\mathbf{y}_2,..., \mathbf{y}_n \}$和标准答案分布$\hat{\mathbf{Y}}=\{ \hat{\mathbf{y}}_1, \hat{\mathbf{y}}_2,...,\hat{\mathbf{y}}_n \}$,损失函数可以被定义为 \parinterval 因为神经机器翻译在每个目标语位置都会输出一个概率分布,表示这个位置上不同单词出现的可能性,因此需要知道当前位置输出的分布相比于标准答案的``损失''。对于这个问题,常用的是交叉熵损失函数。令$\mathbf{y}$表示机器翻译模型输出的分布,$\hat{\mathbf{y}}$ 表示标准答案,则交叉熵损失可以被定义为$L_{\textrm{ce}}(\mathbf{y},\hat{\mathbf{y}}) = - \sum_{k=1}^{|V|} \mathbf{y}[k] \textrm{log} (\hat{\mathbf{y}}[k])$,其中$\mathbf{y}[k]$$\hat{\mathbf{y}}[k]$分别表示向量$\mathbf{y}$$\hat{\mathbf{y}}$的第$k$维,$|V|$表示输出向量得维度(等于词表大小)。对于一个模型输出的概率分布$\mathbf{Y} = \{ \mathbf{y}_1,\mathbf{y}_2,..., \mathbf{y}_n \}$和标准答案分布$\hat{\mathbf{Y}}=\{ \hat{\mathbf{y}}_1, \hat{\mathbf{y}}_2,...,\hat{\mathbf{y}}_n \}$,损失函数可以被定义为
%------------- %-------------
\begin{eqnarray} \begin{eqnarray}
L(\mathbf{Y},\hat{\mathbf{Y}}) = \sum_{j=1}^n L_{\textrm{ce}}(\mathbf{y}_j,\hat{\mathbf{y}}_j) L(\mathbf{Y},\hat{\mathbf{Y}}) = \sum_{j=1}^n L_{\textrm{ce}}(\mathbf{y}_j,\hat{\mathbf{y}}_j)
...@@ -1024,7 +1018,7 @@ L(\mathbf{Y},\hat{\mathbf{Y}}) = \sum_{j=1}^n L_{\textrm{ce}}(\mathbf{y}_j,\hat{ ...@@ -1024,7 +1018,7 @@ L(\mathbf{Y},\hat{\mathbf{Y}}) = \sum_{j=1}^n L_{\textrm{ce}}(\mathbf{y}_j,\hat{
\end{figure} \end{figure}
%---------------------------------------------- %----------------------------------------------
\parinterval\ref{fig:6-28}展示了一种常用的学习率调整策略。它分为两个阶段:预热阶段和衰减阶段。模型训练初期梯度通常很大,如果直接使用较大的学习率很容易让模型陷入局部最优。学习率的预热阶段便是通过在训练初期使学习率从小到大逐渐增加来减缓在初始阶段模型``跑偏''的现象。一般来说,初始学习率太高会使得模型进入一种损失函数曲面非常不平滑的区域,进而使得模型进入一种混乱状态,后续的优化过程很难取得很好的效果。一个常用的学习率预热方法是逐渐预热(Gradual Warmup),如果令预热的更新次数为$T'$,初始学习率为$\alpha_0$,则预热阶段第$t$次更新的学习率为: \parinterval\ref{fig:6-28}展示了一种常用的学习率调整策略。它分为两个阶段:预热阶段和衰减阶段。模型训练初期梯度通常很大,如果直接使用较大的学习率很容易让模型陷入局部最优。学习率的预热阶段便是通过在训练初期使学习率从小到大逐渐增加来减缓在初始阶段模型``跑偏''的现象。一般来说,初始学习率太高会使得模型进入一种损失函数曲面非常不平滑的区域,进而使得模型进入一种混乱状态,后续的优化过程很难取得很好的效果。一个常用的学习率预热方法是{\small\bfnew{逐渐预热}}(Gradual Warmup),如果令预热的更新次数为$T'$,初始学习率为$\alpha_0$,则预热阶段第$t$次更新的学习率为:
%------------------------------- %-------------------------------
\begin{eqnarray} \begin{eqnarray}
\alpha_t = \frac{t}{T'} \alpha_0 \quad,\quad 1 \leq t \leq T' \alpha_t = \frac{t}{T'} \alpha_0 \quad,\quad 1 \leq t \leq T'
......
\indexentry{Chapter1.1|hyperpage}{13} \indexentry{Chapter6.1|hyperpage}{7}
\indexentry{Chapter1.2|hyperpage}{16} \indexentry{Chapter6.1.1|hyperpage}{9}
\indexentry{Chapter1.3|hyperpage}{21} \indexentry{Chapter6.1.2|hyperpage}{11}
\indexentry{Chapter1.4|hyperpage}{22} \indexentry{Chapter6.1.3|hyperpage}{14}
\indexentry{Chapter1.4.1|hyperpage}{22} \indexentry{Chapter6.2|hyperpage}{16}
\indexentry{Chapter1.4.2|hyperpage}{24} \indexentry{Chapter6.2.1|hyperpage}{16}
\indexentry{Chapter1.4.3|hyperpage}{25} \indexentry{Chapter6.2.2|hyperpage}{17}
\indexentry{Chapter1.4.4|hyperpage}{26} \indexentry{Chapter6.2.3|hyperpage}{18}
\indexentry{Chapter1.4.5|hyperpage}{27} \indexentry{Chapter6.2.4|hyperpage}{19}
\indexentry{Chapter1.5|hyperpage}{28} \indexentry{Chapter6.3|hyperpage}{20}
\indexentry{Chapter1.5.1|hyperpage}{28} \indexentry{Chapter6.3.1|hyperpage}{22}
\indexentry{Chapter1.5.2|hyperpage}{29} \indexentry{Chapter6.3.2|hyperpage}{24}
\indexentry{Chapter1.5.2.1|hyperpage}{29} \indexentry{Chapter6.3.3|hyperpage}{28}
\indexentry{Chapter1.5.2.2|hyperpage}{31} \indexentry{Chapter6.3.3.1|hyperpage}{28}
\indexentry{Chapter1.5.2.3|hyperpage}{31} \indexentry{Chapter6.3.3.2|hyperpage}{28}
\indexentry{Chapter1.6|hyperpage}{32} \indexentry{Chapter6.3.3.3|hyperpage}{30}
\indexentry{Chapter1.7|hyperpage}{34} \indexentry{Chapter6.3.3.4|hyperpage}{31}
\indexentry{Chapter1.7.1|hyperpage}{34} \indexentry{Chapter6.3.3.5|hyperpage}{33}
\indexentry{Chapter1.7.1.1|hyperpage}{34} \indexentry{Chapter6.3.4|hyperpage}{33}
\indexentry{Chapter1.7.1.2|hyperpage}{36} \indexentry{Chapter6.3.4.1|hyperpage}{34}
\indexentry{Chapter1.7.2|hyperpage}{38} \indexentry{Chapter6.3.4.2|hyperpage}{35}
\indexentry{Chapter1.8|hyperpage}{40} \indexentry{Chapter6.3.4.3|hyperpage}{38}
\indexentry{Chapter2.1|hyperpage}{46} \indexentry{Chapter6.3.5|hyperpage}{40}
\indexentry{Chapter2.2|hyperpage}{47} \indexentry{Chapter6.3.5.1|hyperpage}{41}
\indexentry{Chapter2.2.1|hyperpage}{47} \indexentry{Chapter6.3.5.2|hyperpage}{41}
\indexentry{Chapter2.2.2|hyperpage}{49} \indexentry{Chapter6.3.5.3|hyperpage}{42}
\indexentry{Chapter2.2.3|hyperpage}{50} \indexentry{Chapter6.3.5.4|hyperpage}{42}
\indexentry{Chapter2.2.4|hyperpage}{51} \indexentry{Chapter6.3.5.5|hyperpage}{43}
\indexentry{Chapter2.2.5|hyperpage}{53} \indexentry{Chapter6.3.5.5|hyperpage}{44}
\indexentry{Chapter2.2.5.1|hyperpage}{53} \indexentry{Chapter6.3.6|hyperpage}{45}
\indexentry{Chapter2.2.5.2|hyperpage}{54} \indexentry{Chapter6.3.6.1|hyperpage}{47}
\indexentry{Chapter2.2.5.3|hyperpage}{54} \indexentry{Chapter6.3.6.2|hyperpage}{48}
\indexentry{Chapter2.3|hyperpage}{55} \indexentry{Chapter6.3.6.3|hyperpage}{49}
\indexentry{Chapter2.3.1|hyperpage}{56} \indexentry{Chapter6.3.7|hyperpage}{50}
\indexentry{Chapter2.3.2|hyperpage}{57} \indexentry{Chapter6.4|hyperpage}{52}
\indexentry{Chapter2.3.2.1|hyperpage}{57} \indexentry{Chapter6.4.1|hyperpage}{53}
\indexentry{Chapter2.3.2.2|hyperpage}{58} \indexentry{Chapter6.4.2|hyperpage}{54}
\indexentry{Chapter2.3.2.3|hyperpage}{60} \indexentry{Chapter6.4.3|hyperpage}{56}
\indexentry{Chapter2.4|hyperpage}{62} \indexentry{Chapter6.4.4|hyperpage}{58}
\indexentry{Chapter2.4.1|hyperpage}{63} \indexentry{Chapter6.4.5|hyperpage}{60}
\indexentry{Chapter2.4.2|hyperpage}{65} \indexentry{Chapter6.4.6|hyperpage}{62}
\indexentry{Chapter2.4.2.1|hyperpage}{66} \indexentry{Chapter6.4.7|hyperpage}{63}
\indexentry{Chapter2.4.2.2|hyperpage}{67} \indexentry{Chapter6.4.8|hyperpage}{64}
\indexentry{Chapter2.4.2.3|hyperpage}{68} \indexentry{Chapter6.4.9|hyperpage}{65}
\indexentry{Chapter2.5|hyperpage}{70} \indexentry{Chapter6.4.10|hyperpage}{68}
\indexentry{Chapter2.5.1|hyperpage}{70} \indexentry{Chapter6.5|hyperpage}{68}
\indexentry{Chapter2.5.2|hyperpage}{72} \indexentry{Chapter6.5.1|hyperpage}{69}
\indexentry{Chapter2.5.3|hyperpage}{76} \indexentry{Chapter6.5.2|hyperpage}{69}
\indexentry{Chapter2.6|hyperpage}{78} \indexentry{Chapter6.5.3|hyperpage}{69}
\indexentry{Chapter3.1|hyperpage}{83} \indexentry{Chapter6.5.4|hyperpage}{71}
\indexentry{Chapter3.2|hyperpage}{85} \indexentry{Chapter6.5.5|hyperpage}{71}
\indexentry{Chapter3.2.1|hyperpage}{85} \indexentry{Chapter6.6|hyperpage}{71}
\indexentry{Chapter3.2.1.1|hyperpage}{85}
\indexentry{Chapter3.2.1.2|hyperpage}{86}
\indexentry{Chapter3.2.1.3|hyperpage}{87}
\indexentry{Chapter3.2.2|hyperpage}{87}
\indexentry{Chapter3.2.3|hyperpage}{88}
\indexentry{Chapter3.2.3.1|hyperpage}{88}
\indexentry{Chapter3.2.3.2|hyperpage}{88}
\indexentry{Chapter3.2.3.3|hyperpage}{90}
\indexentry{Chapter3.2.4|hyperpage}{91}
\indexentry{Chapter3.2.4.1|hyperpage}{91}
\indexentry{Chapter3.2.4.2|hyperpage}{93}
\indexentry{Chapter3.2.5|hyperpage}{95}
\indexentry{Chapter3.3|hyperpage}{98}
\indexentry{Chapter3.3.1|hyperpage}{98}
\indexentry{Chapter3.3.2|hyperpage}{100}
\indexentry{Chapter3.3.2.1|hyperpage}{101}
\indexentry{Chapter3.3.2.2|hyperpage}{101}
\indexentry{Chapter3.3.2.3|hyperpage}{103}
\indexentry{Chapter3.4|hyperpage}{104}
\indexentry{Chapter3.4.1|hyperpage}{104}
\indexentry{Chapter3.4.2|hyperpage}{106}
\indexentry{Chapter3.4.3|hyperpage}{107}
\indexentry{Chapter3.4.4|hyperpage}{108}
\indexentry{Chapter3.4.4.1|hyperpage}{108}
\indexentry{Chapter3.4.4.2|hyperpage}{109}
\indexentry{Chapter3.5|hyperpage}{115}
\indexentry{Chapter3.5.1|hyperpage}{115}
\indexentry{Chapter3.5.2|hyperpage}{118}
\indexentry{Chapter3.5.3|hyperpage}{119}
\indexentry{Chapter3.5.4|hyperpage}{121}
\indexentry{Chapter3.5.5|hyperpage}{122}
\indexentry{Chapter3.5.5|hyperpage}{125}
\indexentry{Chapter3.6|hyperpage}{125}
\indexentry{Chapter3.6.1|hyperpage}{125}
\indexentry{Chapter3.6.2|hyperpage}{126}
\indexentry{Chapter3.6.4|hyperpage}{127}
\indexentry{Chapter3.6.5|hyperpage}{128}
\indexentry{Chapter3.7|hyperpage}{128}
\indexentry{Chapter4.1|hyperpage}{131}
\indexentry{Chapter4.1.1|hyperpage}{132}
\indexentry{Chapter4.1.2|hyperpage}{134}
\indexentry{Chapter4.2|hyperpage}{136}
\indexentry{Chapter4.2.1|hyperpage}{136}
\indexentry{Chapter4.2.2|hyperpage}{139}
\indexentry{Chapter4.2.2.1|hyperpage}{139}
\indexentry{Chapter4.2.2.2|hyperpage}{140}
\indexentry{Chapter4.2.2.3|hyperpage}{141}
\indexentry{Chapter4.2.3|hyperpage}{142}
\indexentry{Chapter4.2.3.1|hyperpage}{142}
\indexentry{Chapter4.2.3.2|hyperpage}{143}
\indexentry{Chapter4.2.3.3|hyperpage}{144}
\indexentry{Chapter4.2.4|hyperpage}{146}
\indexentry{Chapter4.2.4.1|hyperpage}{146}
\indexentry{Chapter4.2.4.2|hyperpage}{147}
\indexentry{Chapter4.2.4.3|hyperpage}{148}
\indexentry{Chapter4.2.5|hyperpage}{149}
\indexentry{Chapter4.2.6|hyperpage}{149}
\indexentry{Chapter4.2.7|hyperpage}{153}
\indexentry{Chapter4.2.7.1|hyperpage}{154}
\indexentry{Chapter4.2.7.2|hyperpage}{154}
\indexentry{Chapter4.2.7.3|hyperpage}{155}
\indexentry{Chapter4.2.7.4|hyperpage}{156}
\indexentry{Chapter4.3|hyperpage}{157}
\indexentry{Chapter4.3.1|hyperpage}{159}
\indexentry{Chapter4.3.1.1|hyperpage}{160}
\indexentry{Chapter4.3.1.2|hyperpage}{161}
\indexentry{Chapter4.3.1.3|hyperpage}{162}
\indexentry{Chapter4.3.1.4|hyperpage}{163}
\indexentry{Chapter4.3.2|hyperpage}{163}
\indexentry{Chapter4.3.3|hyperpage}{165}
\indexentry{Chapter4.3.4|hyperpage}{166}
\indexentry{Chapter4.3.5|hyperpage}{169}
\indexentry{Chapter4.4|hyperpage}{172}
\indexentry{Chapter4.4.1|hyperpage}{173}
\indexentry{Chapter4.4.2|hyperpage}{176}
\indexentry{Chapter4.4.2.1|hyperpage}{177}
\indexentry{Chapter4.4.2.2|hyperpage}{178}
\indexentry{Chapter4.4.2.3|hyperpage}{180}
\indexentry{Chapter4.4.3|hyperpage}{181}
\indexentry{Chapter4.4.3.1|hyperpage}{182}
\indexentry{Chapter4.4.3.2|hyperpage}{186}
\indexentry{Chapter4.4.3.3|hyperpage}{186}
\indexentry{Chapter4.4.3.4|hyperpage}{187}
\indexentry{Chapter4.4.3.5|hyperpage}{188}
\indexentry{Chapter4.4.4|hyperpage}{189}
\indexentry{Chapter4.4.4.1|hyperpage}{190}
\indexentry{Chapter4.4.4.2|hyperpage}{191}
\indexentry{Chapter4.4.5|hyperpage}{193}
\indexentry{Chapter4.4.5|hyperpage}{194}
\indexentry{Chapter4.4.7|hyperpage}{196}
\indexentry{Chapter4.4.7.1|hyperpage}{197}
\indexentry{Chapter4.4.7.2|hyperpage}{198}
\indexentry{Chapter4.5|hyperpage}{200}
\indexentry{Chapter5.1|hyperpage}{206}
\indexentry{Chapter5.1.1|hyperpage}{206}
\indexentry{Chapter5.1.1.1|hyperpage}{206}
\indexentry{Chapter5.1.1.2|hyperpage}{207}
\indexentry{Chapter5.1.1.3|hyperpage}{208}
\indexentry{Chapter5.1.2|hyperpage}{209}
\indexentry{Chapter5.1.2.1|hyperpage}{209}
\indexentry{Chapter5.1.2.2|hyperpage}{210}
\indexentry{Chapter5.2|hyperpage}{210}
\indexentry{Chapter5.2.1|hyperpage}{210}
\indexentry{Chapter5.2.1.1|hyperpage}{211}
\indexentry{Chapter5.2.1.2|hyperpage}{212}
\indexentry{Chapter5.2.1.3|hyperpage}{212}
\indexentry{Chapter5.2.1.4|hyperpage}{213}
\indexentry{Chapter5.2.1.5|hyperpage}{214}
\indexentry{Chapter5.2.1.6|hyperpage}{215}
\indexentry{Chapter5.2.2|hyperpage}{216}
\indexentry{Chapter5.2.2.1|hyperpage}{217}
\indexentry{Chapter5.2.2.2|hyperpage}{218}
\indexentry{Chapter5.2.2.3|hyperpage}{219}
\indexentry{Chapter5.2.2.4|hyperpage}{219}
\indexentry{Chapter5.2.3|hyperpage}{220}
\indexentry{Chapter5.2.3.1|hyperpage}{220}
\indexentry{Chapter5.2.3.2|hyperpage}{222}
\indexentry{Chapter5.2.4|hyperpage}{223}
\indexentry{Chapter5.3|hyperpage}{227}
\indexentry{Chapter5.3.1|hyperpage}{228}
\indexentry{Chapter5.3.1.1|hyperpage}{228}
\indexentry{Chapter5.3.1.2|hyperpage}{230}
\indexentry{Chapter5.3.1.3|hyperpage}{231}
\indexentry{Chapter5.3.2|hyperpage}{232}
\indexentry{Chapter5.3.3|hyperpage}{232}
\indexentry{Chapter5.3.4|hyperpage}{234}
\indexentry{Chapter5.3.5|hyperpage}{237}
\indexentry{Chapter5.4|hyperpage}{238}
\indexentry{Chapter5.4.1|hyperpage}{239}
\indexentry{Chapter5.4.2|hyperpage}{240}
\indexentry{Chapter5.4.2.1|hyperpage}{240}
\indexentry{Chapter5.4.2.2|hyperpage}{242}
\indexentry{Chapter5.4.2.3|hyperpage}{245}
\indexentry{Chapter5.4.3|hyperpage}{248}
\indexentry{Chapter5.4.4|hyperpage}{250}
\indexentry{Chapter5.4.4.1|hyperpage}{250}
\indexentry{Chapter5.4.4.2|hyperpage}{251}
\indexentry{Chapter5.4.4.3|hyperpage}{252}
\indexentry{Chapter5.4.5|hyperpage}{253}
\indexentry{Chapter5.4.6|hyperpage}{254}
\indexentry{Chapter5.4.6.1|hyperpage}{255}
\indexentry{Chapter5.4.6.2|hyperpage}{257}
\indexentry{Chapter5.4.6.3|hyperpage}{258}
\indexentry{Chapter5.5|hyperpage}{259}
\indexentry{Chapter5.5.1|hyperpage}{260}
\indexentry{Chapter5.5.1.1|hyperpage}{261}
\indexentry{Chapter5.5.1.2|hyperpage}{263}
\indexentry{Chapter5.5.1.3|hyperpage}{265}
\indexentry{Chapter5.5.1.4|hyperpage}{266}
\indexentry{Chapter5.5.2|hyperpage}{266}
\indexentry{Chapter5.5.2.1|hyperpage}{266}
\indexentry{Chapter5.5.2.2|hyperpage}{267}
\indexentry{Chapter5.5.3|hyperpage}{268}
\indexentry{Chapter5.5.3.1|hyperpage}{269}
\indexentry{Chapter5.5.3.2|hyperpage}{270}
\indexentry{Chapter5.5.3.3|hyperpage}{270}
\indexentry{Chapter5.5.3.4|hyperpage}{271}
\indexentry{Chapter5.5.3.5|hyperpage}{272}
\indexentry{Chapter5.6|hyperpage}{273}
\indexentry{Chapter6.1|hyperpage}{275}
\indexentry{Chapter6.1.1|hyperpage}{277}
\indexentry{Chapter6.1.2|hyperpage}{279}
\indexentry{Chapter6.1.3|hyperpage}{282}
\indexentry{Chapter6.2|hyperpage}{284}
\indexentry{Chapter6.2.1|hyperpage}{284}
\indexentry{Chapter6.2.2|hyperpage}{285}
\indexentry{Chapter6.2.3|hyperpage}{286}
\indexentry{Chapter6.2.4|hyperpage}{287}
\indexentry{Chapter6.3|hyperpage}{288}
\indexentry{Chapter6.3.1|hyperpage}{290}
\indexentry{Chapter6.3.2|hyperpage}{292}
\indexentry{Chapter6.3.3|hyperpage}{296}
\indexentry{Chapter6.3.3.1|hyperpage}{296}
\indexentry{Chapter6.3.3.2|hyperpage}{296}
\indexentry{Chapter6.3.3.3|hyperpage}{298}
\boolfalse {citerequest}\boolfalse {citetracker}\boolfalse {pagetracker}\boolfalse {backtracker}\relax \boolfalse {citerequest}\boolfalse {citetracker}\boolfalse {pagetracker}\boolfalse {backtracker}\relax
\babel@toc {english}{}
\defcounter {refsection}{0}\relax \defcounter {refsection}{0}\relax
\contentsline {part}{\@mypartnumtocformat {I}{机器翻译基础}}{7}{part.1}% \select@language {english}
\ttl@starttoc {default@1}
\defcounter {refsection}{0}\relax
\contentsline {chapter}{\numberline {1}机器翻译简介}{9}{chapter.1}%
\defcounter {refsection}{0}\relax
\contentsline {section}{\numberline {1.1}机器翻译的概念}{9}{section.1.1}%
\defcounter {refsection}{0}\relax
\contentsline {section}{\numberline {1.2}机器翻译简史}{12}{section.1.2}%
\defcounter {refsection}{0}\relax
\contentsline {subsection}{\numberline {1.2.1}人工翻译}{12}{subsection.1.2.1}%
\defcounter {refsection}{0}\relax
\contentsline {subsection}{\numberline {1.2.2}机器翻译的萌芽}{13}{subsection.1.2.2}%
\defcounter {refsection}{0}\relax
\contentsline {subsection}{\numberline {1.2.3}机器翻译的受挫}{14}{subsection.1.2.3}%
\defcounter {refsection}{0}\relax
\contentsline {subsection}{\numberline {1.2.4}机器翻译的快速成长}{15}{subsection.1.2.4}%
\defcounter {refsection}{0}\relax
\contentsline {subsection}{\numberline {1.2.5}机器翻译的爆发}{16}{subsection.1.2.5}%
\defcounter {refsection}{0}\relax
\contentsline {section}{\numberline {1.3}机器翻译现状}{17}{section.1.3}%
\defcounter {refsection}{0}\relax
\contentsline {section}{\numberline {1.4}机器翻译方法}{18}{section.1.4}%
\defcounter {refsection}{0}\relax
\contentsline {subsection}{\numberline {1.4.1}基于规则的机器翻译}{18}{subsection.1.4.1}%
\defcounter {refsection}{0}\relax
\contentsline {subsection}{\numberline {1.4.2}基于实例的机器翻译}{20}{subsection.1.4.2}%
\defcounter {refsection}{0}\relax
\contentsline {subsection}{\numberline {1.4.3}统计机器翻译}{21}{subsection.1.4.3}%
\defcounter {refsection}{0}\relax
\contentsline {subsection}{\numberline {1.4.4}神经机器翻译}{22}{subsection.1.4.4}%
\defcounter {refsection}{0}\relax
\contentsline {subsection}{\numberline {1.4.5}对比分析}{23}{subsection.1.4.5}%
\defcounter {refsection}{0}\relax
\contentsline {section}{\numberline {1.5}翻译质量评价}{24}{section.1.5}%
\defcounter {refsection}{0}\relax
\contentsline {subsection}{\numberline {1.5.1}人工评价}{24}{subsection.1.5.1}%
\defcounter {refsection}{0}\relax
\contentsline {subsection}{\numberline {1.5.2}自动评价}{25}{subsection.1.5.2}%
\defcounter {refsection}{0}\relax
\contentsline {subsubsection}{BLEU}{25}{section*.15}%
\defcounter {refsection}{0}\relax
\contentsline {subsubsection}{TER}{27}{section*.16}%
\defcounter {refsection}{0}\relax
\contentsline {subsubsection}{基于检测点的评价}{27}{section*.17}%
\defcounter {refsection}{0}\relax
\contentsline {section}{\numberline {1.6}机器翻译应用}{28}{section.1.6}%
\defcounter {refsection}{0}\relax
\contentsline {section}{\numberline {1.7}开源项目与评测}{30}{section.1.7}%
\defcounter {refsection}{0}\relax
\contentsline {subsection}{\numberline {1.7.1}开源机器翻译系统}{30}{subsection.1.7.1}%
\defcounter {refsection}{0}\relax
\contentsline {subsubsection}{统计机器翻译开源系统}{30}{section*.19}%
\defcounter {refsection}{0}\relax
\contentsline {subsubsection}{神经机器翻译开源系统}{32}{section*.20}%
\defcounter {refsection}{0}\relax
\contentsline {subsection}{\numberline {1.7.2}常用数据集及公开评测任务}{34}{subsection.1.7.2}%
\defcounter {refsection}{0}\relax
\contentsline {section}{\numberline {1.8}推荐学习资源}{36}{section.1.8}%
\defcounter {refsection}{0}\relax
\contentsline {chapter}{\numberline {2}词法、语法及统计建模基础}{41}{chapter.2}%
\defcounter {refsection}{0}\relax
\contentsline {section}{\numberline {2.1}问题概述 }{42}{section.2.1}%
\defcounter {refsection}{0}\relax
\contentsline {section}{\numberline {2.2}概率论基础}{43}{section.2.2}%
\defcounter {refsection}{0}\relax
\contentsline {subsection}{\numberline {2.2.1}随机变量和概率}{43}{subsection.2.2.1}%
\defcounter {refsection}{0}\relax
\contentsline {subsection}{\numberline {2.2.2}联合概率、条件概率和边缘概率}{45}{subsection.2.2.2}%
\defcounter {refsection}{0}\relax
\contentsline {subsection}{\numberline {2.2.3}链式法则}{46}{subsection.2.2.3}%
\defcounter {refsection}{0}\relax
\contentsline {subsection}{\numberline {2.2.4}贝叶斯法则}{47}{subsection.2.2.4}%
\defcounter {refsection}{0}\relax
\contentsline {subsection}{\numberline {2.2.5}KL距离和熵}{49}{subsection.2.2.5}%
\defcounter {refsection}{0}\relax
\contentsline {subsubsection}{信息熵}{49}{section*.27}%
\defcounter {refsection}{0}\relax
\contentsline {subsubsection}{KL距离}{50}{section*.29}%
\defcounter {refsection}{0}\relax
\contentsline {subsubsection}{交叉熵}{50}{section*.30}%
\defcounter {refsection}{0}\relax
\contentsline {section}{\numberline {2.3}中文分词}{51}{section.2.3}%
\defcounter {refsection}{0}\relax
\contentsline {subsection}{\numberline {2.3.1}基于词典的分词方法}{52}{subsection.2.3.1}%
\defcounter {refsection}{0}\relax
\contentsline {subsection}{\numberline {2.3.2}基于统计的分词方法}{53}{subsection.2.3.2}%
\defcounter {refsection}{0}\relax
\contentsline {subsubsection}{统计模型的学习与推断}{53}{section*.34}%
\defcounter {refsection}{0}\relax
\contentsline {subsubsection}{掷骰子游戏}{54}{section*.36}%
\defcounter {refsection}{0}\relax
\contentsline {subsubsection}{全概率分词方法}{56}{section*.40}%
\defcounter {refsection}{0}\relax
\contentsline {section}{\numberline {2.4}$n$-gram语言模型 }{58}{section.2.4}%
\defcounter {refsection}{0}\relax
\contentsline {subsection}{\numberline {2.4.1}建模}{59}{subsection.2.4.1}%
\defcounter {refsection}{0}\relax
\contentsline {subsection}{\numberline {2.4.2}未登录词和平滑算法}{61}{subsection.2.4.2}%
\defcounter {refsection}{0}\relax
\contentsline {subsubsection}{加法平滑方法}{62}{section*.46}%
\defcounter {refsection}{0}\relax
\contentsline {subsubsection}{古德-图灵估计法}{63}{section*.48}%
\defcounter {refsection}{0}\relax
\contentsline {subsubsection}{Kneser-Ney平滑方法}{64}{section*.50}%
\defcounter {refsection}{0}\relax
\contentsline {section}{\numberline {2.5}句法分析(短语结构分析)}{66}{section.2.5}%
\defcounter {refsection}{0}\relax
\contentsline {subsection}{\numberline {2.5.1}句子的句法树表示}{66}{subsection.2.5.1}%
\defcounter {refsection}{0}\relax
\contentsline {subsection}{\numberline {2.5.2}上下文无关文法}{68}{subsection.2.5.2}%
\defcounter {refsection}{0}\relax
\contentsline {subsection}{\numberline {2.5.3}规则和推导的概率}{72}{subsection.2.5.3}%
\defcounter {refsection}{0}\relax
\contentsline {section}{\numberline {2.6}小结及深入阅读}{74}{section.2.6}%
\defcounter {refsection}{0}\relax
\contentsline {part}{\@mypartnumtocformat {II}{统计机器翻译}}{77}{part.2}%
\ttl@stoptoc {default@1}
\ttl@starttoc {default@2}
\defcounter {refsection}{0}\relax
\contentsline {chapter}{\numberline {3}基于词的机器翻译模型}{79}{chapter.3}%
\defcounter {refsection}{0}\relax
\contentsline {section}{\numberline {3.1}什么是基于词的翻译模型}{79}{section.3.1}%
\defcounter {refsection}{0}\relax
\contentsline {section}{\numberline {3.2}构建一个简单的机器翻译系统}{81}{section.3.2}%
\defcounter {refsection}{0}\relax
\contentsline {subsection}{\numberline {3.2.1}如何进行翻译?}{81}{subsection.3.2.1}%
\defcounter {refsection}{0}\relax
\contentsline {subsubsection}{机器翻译流程}{82}{section*.63}%
\defcounter {refsection}{0}\relax
\contentsline {subsubsection}{人工翻译 vs. 机器翻译}{83}{section*.65}%
\defcounter {refsection}{0}\relax
\contentsline {subsection}{\numberline {3.2.2}基本框架}{83}{subsection.3.2.2}%
\defcounter {refsection}{0}\relax
\contentsline {subsection}{\numberline {3.2.3}单词翻译概率}{84}{subsection.3.2.3}%
\defcounter {refsection}{0}\relax
\contentsline {subsubsection}{什么是单词翻译概率?}{84}{section*.67}%
\defcounter {refsection}{0}\relax
\contentsline {subsubsection}{如何从一个双语平行数据中学习?}{84}{section*.69}%
\defcounter {refsection}{0}\relax
\contentsline {subsubsection}{如何从大量的双语平行数据中学习?}{86}{section*.70}%
\defcounter {refsection}{0}\relax
\contentsline {subsection}{\numberline {3.2.4}句子级翻译模型}{87}{subsection.3.2.4}%
\defcounter {refsection}{0}\relax
\contentsline {subsubsection}{基础模型}{87}{section*.72}%
\defcounter {refsection}{0}\relax
\contentsline {subsubsection}{生成流畅的译文}{89}{section*.74}%
\defcounter {refsection}{0}\relax
\contentsline {subsection}{\numberline {3.2.5}解码}{91}{subsection.3.2.5}%
\defcounter {refsection}{0}\relax
\contentsline {section}{\numberline {3.3}基于词的翻译建模}{94}{section.3.3}%
\defcounter {refsection}{0}\relax
\contentsline {subsection}{\numberline {3.3.1}噪声信道模型}{94}{subsection.3.3.1}%
\defcounter {refsection}{0}\relax
\contentsline {subsection}{\numberline {3.3.2}统计机器翻译的三个基本问题}{96}{subsection.3.3.2}%
\defcounter {refsection}{0}\relax
\contentsline {subsubsection}{词对齐}{97}{section*.83}%
\defcounter {refsection}{0}\relax
\contentsline {subsubsection}{基于词对齐的翻译模型}{97}{section*.86}%
\defcounter {refsection}{0}\relax
\contentsline {subsubsection}{基于词对齐的翻译实例}{99}{section*.88}%
\defcounter {refsection}{0}\relax
\contentsline {section}{\numberline {3.4}IBM模型1-2}{100}{section.3.4}%
\defcounter {refsection}{0}\relax
\contentsline {subsection}{\numberline {3.4.1}IBM模型1}{100}{subsection.3.4.1}%
\defcounter {refsection}{0}\relax
\contentsline {subsection}{\numberline {3.4.2}IBM模型2}{102}{subsection.3.4.2}%
\defcounter {refsection}{0}\relax
\contentsline {subsection}{\numberline {3.4.3}解码及计算优化}{103}{subsection.3.4.3}%
\defcounter {refsection}{0}\relax
\contentsline {subsection}{\numberline {3.4.4}训练}{104}{subsection.3.4.4}%
\defcounter {refsection}{0}\relax
\contentsline {subsubsection}{目标函数}{104}{section*.93}%
\defcounter {refsection}{0}\relax
\contentsline {subsubsection}{优化}{105}{section*.95}%
\defcounter {refsection}{0}\relax
\contentsline {section}{\numberline {3.5}IBM模型3-5及隐马尔可夫模型}{111}{section.3.5}%
\defcounter {refsection}{0}\relax
\contentsline {subsection}{\numberline {3.5.1}基于产出率的翻译模型}{111}{subsection.3.5.1}%
\defcounter {refsection}{0}\relax
\contentsline {subsection}{\numberline {3.5.2}IBM 模型3}{114}{subsection.3.5.2}%
\defcounter {refsection}{0}\relax
\contentsline {subsection}{\numberline {3.5.3}IBM 模型4}{115}{subsection.3.5.3}%
\defcounter {refsection}{0}\relax
\contentsline {subsection}{\numberline {3.5.4} IBM 模型5}{117}{subsection.3.5.4}%
\defcounter {refsection}{0}\relax
\contentsline {subsection}{\numberline {3.5.5}隐马尔可夫模型}{118}{subsection.3.5.5}%
\defcounter {refsection}{0}\relax
\contentsline {subsubsection}{隐马尔可夫模型}{119}{section*.107}%
\defcounter {refsection}{0}\relax
\contentsline {subsubsection}{词对齐模型}{120}{section*.109}%
\defcounter {refsection}{0}\relax
\contentsline {subsection}{\numberline {3.5.6}解码和训练}{121}{subsection.3.5.6}%
\defcounter {refsection}{0}\relax
\contentsline {section}{\numberline {3.6}问题分析}{121}{section.3.6}%
\defcounter {refsection}{0}\relax
\contentsline {subsection}{\numberline {3.6.1}词对齐及对称化}{121}{subsection.3.6.1}%
\defcounter {refsection}{0}\relax
\contentsline {subsection}{\numberline {3.6.2}Deficiency}{122}{subsection.3.6.2}%
\defcounter {refsection}{0}\relax
\contentsline {subsection}{\numberline {3.6.3}句子长度}{123}{subsection.3.6.3}%
\defcounter {refsection}{0}\relax
\contentsline {subsection}{\numberline {3.6.4}其他问题}{124}{subsection.3.6.4}%
\defcounter {refsection}{0}\relax
\contentsline {section}{\numberline {3.7}小结及深入阅读}{124}{section.3.7}%
\defcounter {refsection}{0}\relax
\contentsline {chapter}{\numberline {4}基于短语和句法的机器翻译模型}{127}{chapter.4}%
\defcounter {refsection}{0}\relax
\contentsline {section}{\numberline {4.1}翻译中的结构信息}{127}{section.4.1}%
\defcounter {refsection}{0}\relax
\contentsline {subsection}{\numberline {4.1.1}更大粒度的翻译单元}{128}{subsection.4.1.1}%
\defcounter {refsection}{0}\relax
\contentsline {subsection}{\numberline {4.1.2}句子的结构信息}{130}{subsection.4.1.2}%
\defcounter {refsection}{0}\relax
\contentsline {section}{\numberline {4.2}基于短语的翻译模型}{132}{section.4.2}%
\defcounter {refsection}{0}\relax
\contentsline {subsection}{\numberline {4.2.1}机器翻译中的短语}{132}{subsection.4.2.1}%
\defcounter {refsection}{0}\relax
\contentsline {subsection}{\numberline {4.2.2}数学建模及判别式模型}{135}{subsection.4.2.2}%
\defcounter {refsection}{0}\relax
\contentsline {subsubsection}{基于翻译推导的建模}{135}{section*.121}%
\defcounter {refsection}{0}\relax \defcounter {refsection}{0}\relax
\contentsline {subsubsection}{对数线性模型}{136}{section*.122}% \contentsline {part}{\@mypartnumtocformat {I}{神经机器翻译}}{7}{part.1}
\defcounter {refsection}{0}\relax \ttl@starttoc {default@1}
\contentsline {subsubsection}{搭建模型的基本流程}{137}{section*.123}%
\defcounter {refsection}{0}\relax
\contentsline {subsection}{\numberline {4.2.3}短语抽取}{138}{subsection.4.2.3}%
\defcounter {refsection}{0}\relax
\contentsline {subsubsection}{与词对齐一致的短语}{138}{section*.126}%
\defcounter {refsection}{0}\relax
\contentsline {subsubsection}{获取词对齐}{139}{section*.130}%
\defcounter {refsection}{0}\relax
\contentsline {subsubsection}{度量双语短语质量}{140}{section*.132}%
\defcounter {refsection}{0}\relax
\contentsline {subsection}{\numberline {4.2.4}调序}{142}{subsection.4.2.4}%
\defcounter {refsection}{0}\relax
\contentsline {subsubsection}{基于距离的调序}{142}{section*.136}%
\defcounter {refsection}{0}\relax
\contentsline {subsubsection}{基于方向的调序}{143}{section*.138}%
\defcounter {refsection}{0}\relax
\contentsline {subsubsection}{基于分类的调序}{144}{section*.141}%
\defcounter {refsection}{0}\relax
\contentsline {subsection}{\numberline {4.2.5}特征}{145}{subsection.4.2.5}%
\defcounter {refsection}{0}\relax
\contentsline {subsection}{\numberline {4.2.6}最小错误率训练}{145}{subsection.4.2.6}%
\defcounter {refsection}{0}\relax
\contentsline {subsection}{\numberline {4.2.7}栈解码}{149}{subsection.4.2.7}%
\defcounter {refsection}{0}\relax
\contentsline {subsubsection}{翻译候选匹配}{150}{section*.146}%
\defcounter {refsection}{0}\relax
\contentsline {subsubsection}{翻译假设扩展}{150}{section*.148}%
\defcounter {refsection}{0}\relax
\contentsline {subsubsection}{剪枝}{151}{section*.150}%
\defcounter {refsection}{0}\relax
\contentsline {subsubsection}{解码中的栈结构}{152}{section*.152}%
\defcounter {refsection}{0}\relax
\contentsline {section}{\numberline {4.3}基于层次短语的模型}{153}{section.4.3}%
\defcounter {refsection}{0}\relax
\contentsline {subsection}{\numberline {4.3.1}同步上下文无关文法}{155}{subsection.4.3.1}%
\defcounter {refsection}{0}\relax
\contentsline {subsubsection}{文法定义}{156}{section*.157}%
\defcounter {refsection}{0}\relax
\contentsline {subsubsection}{推导}{157}{section*.158}%
\defcounter {refsection}{0}\relax
\contentsline {subsubsection}{胶水规则}{158}{section*.159}%
\defcounter {refsection}{0}\relax
\contentsline {subsubsection}{处理流程}{159}{section*.160}%
\defcounter {refsection}{0}\relax
\contentsline {subsection}{\numberline {4.3.2}层次短语规则抽取}{159}{subsection.4.3.2}%
\defcounter {refsection}{0}\relax
\contentsline {subsection}{\numberline {4.3.3}翻译模型及特征}{161}{subsection.4.3.3}%
\defcounter {refsection}{0}\relax
\contentsline {subsection}{\numberline {4.3.4}CYK解码}{162}{subsection.4.3.4}%
\defcounter {refsection}{0}\relax
\contentsline {subsection}{\numberline {4.3.5}立方剪枝}{165}{subsection.4.3.5}%
\defcounter {refsection}{0}\relax
\contentsline {section}{\numberline {4.4}基于语言学句法的模型}{168}{section.4.4}%
\defcounter {refsection}{0}\relax
\contentsline {subsection}{\numberline {4.4.1}基于句法的翻译模型分类}{169}{subsection.4.4.1}%
\defcounter {refsection}{0}\relax
\contentsline {subsection}{\numberline {4.4.2}基于树结构的文法}{172}{subsection.4.4.2}%
\defcounter {refsection}{0}\relax
\contentsline {subsubsection}{树到树翻译规则}{173}{section*.176}%
\defcounter {refsection}{0}\relax
\contentsline {subsubsection}{基于树结构的翻译推导}{174}{section*.178}%
\defcounter {refsection}{0}\relax
\contentsline {subsubsection}{树到串翻译规则}{176}{section*.181}%
\defcounter {refsection}{0}\relax
\contentsline {subsection}{\numberline {4.4.3}树到串翻译规则抽取}{177}{subsection.4.4.3}%
\defcounter {refsection}{0}\relax
\contentsline {subsubsection}{树的切割与最小规则}{178}{section*.183}%
\defcounter {refsection}{0}\relax
\contentsline {subsubsection}{空对齐处理}{182}{section*.189}%
\defcounter {refsection}{0}\relax
\contentsline {subsubsection}{组合规则}{182}{section*.191}%
\defcounter {refsection}{0}\relax
\contentsline {subsubsection}{SPMT规则}{183}{section*.193}%
\defcounter {refsection}{0}\relax
\contentsline {subsubsection}{句法树二叉化}{184}{section*.195}%
\defcounter {refsection}{0}\relax
\contentsline {subsection}{\numberline {4.4.4}树到树翻译规则抽取}{185}{subsection.4.4.4}%
\defcounter {refsection}{0}\relax
\contentsline {subsubsection}{基于节点对齐的规则抽取}{186}{section*.199}%
\defcounter {refsection}{0}\relax
\contentsline {subsubsection}{基于对齐矩阵的规则抽取}{187}{section*.202}%
\defcounter {refsection}{0}\relax
\contentsline {subsection}{\numberline {4.4.5}句法翻译模型的特征}{189}{subsection.4.4.5}%
\defcounter {refsection}{0}\relax
\contentsline {subsection}{\numberline {4.4.6}基于超图的推导空间表示}{190}{subsection.4.4.6}%
\defcounter {refsection}{0}\relax
\contentsline {subsection}{\numberline {4.4.7}基于树的解码 vs 基于串的解码}{192}{subsection.4.4.7}%
\defcounter {refsection}{0}\relax
\contentsline {subsubsection}{基于树的解码}{193}{section*.209}%
\defcounter {refsection}{0}\relax
\contentsline {subsubsection}{基于串的解码}{194}{section*.212}%
\defcounter {refsection}{0}\relax
\contentsline {section}{\numberline {4.5}小结及深入阅读}{196}{section.4.5}%
\defcounter {refsection}{0}\relax
\contentsline {part}{\@mypartnumtocformat {III}{神经机器翻译}}{199}{part.3}%
\ttl@stoptoc {default@2}
\ttl@starttoc {default@3}
\defcounter {refsection}{0}\relax
\contentsline {chapter}{\numberline {5}人工神经网络和神经语言建模}{201}{chapter.5}%
\defcounter {refsection}{0}\relax
\contentsline {section}{\numberline {5.1}深度学习与人工神经网络}{202}{section.5.1}%
\defcounter {refsection}{0}\relax
\contentsline {subsection}{\numberline {5.1.1}发展简史}{202}{subsection.5.1.1}%
\defcounter {refsection}{0}\relax
\contentsline {subsubsection}{早期的人工神经网络和第一次寒冬}{202}{section*.214}%
\defcounter {refsection}{0}\relax
\contentsline {subsubsection}{神经网络的第二次高潮和第二次寒冬}{203}{section*.215}%
\defcounter {refsection}{0}\relax
\contentsline {subsubsection}{深度学习和神经网络方法的崛起}{204}{section*.216}%
\defcounter {refsection}{0}\relax
\contentsline {subsection}{\numberline {5.1.2}为什么需要深度学习}{205}{subsection.5.1.2}%
\defcounter {refsection}{0}\relax
\contentsline {subsubsection}{端到端学习和表示学习}{205}{section*.218}%
\defcounter {refsection}{0}\relax
\contentsline {subsubsection}{深度学习的效果}{206}{section*.220}%
\defcounter {refsection}{0}\relax
\contentsline {section}{\numberline {5.2}神经网络基础}{206}{section.5.2}%
\defcounter {refsection}{0}\relax
\contentsline {subsection}{\numberline {5.2.1}线性代数基础}{206}{subsection.5.2.1}%
\defcounter {refsection}{0}\relax
\contentsline {subsubsection}{标量、向量和矩阵}{207}{section*.222}%
\defcounter {refsection}{0}\relax
\contentsline {subsubsection}{矩阵的转置}{208}{section*.223}%
\defcounter {refsection}{0}\relax
\contentsline {subsubsection}{矩阵加法和数乘}{208}{section*.224}%
\defcounter {refsection}{0}\relax
\contentsline {subsubsection}{矩阵乘法和矩阵点乘}{209}{section*.225}%
\defcounter {refsection}{0}\relax
\contentsline {subsubsection}{线性映射}{210}{section*.226}%
\defcounter {refsection}{0}\relax
\contentsline {subsubsection}{范数}{211}{section*.227}%
\defcounter {refsection}{0}\relax
\contentsline {subsection}{\numberline {5.2.2}人工神经元和感知机}{212}{subsection.5.2.2}%
\defcounter {refsection}{0}\relax
\contentsline {subsubsection}{感知机\ \raisebox {0.5mm}{------}\ 最简单的人工神经元模型}{213}{section*.230}%
\defcounter {refsection}{0}\relax
\contentsline {subsubsection}{神经元内部权重}{214}{section*.233}%
\defcounter {refsection}{0}\relax
\contentsline {subsubsection}{神经元的输入\ \raisebox {0.5mm}{------}\ 离散 vs 连续}{215}{section*.235}%
\defcounter {refsection}{0}\relax
\contentsline {subsubsection}{神经元内部的参数学习}{215}{section*.237}%
\defcounter {refsection}{0}\relax
\contentsline {subsection}{\numberline {5.2.3}多层神经网络}{216}{subsection.5.2.3}%
\defcounter {refsection}{0}\relax
\contentsline {subsubsection}{线性变换和激活函数}{216}{section*.239}%
\defcounter {refsection}{0}\relax
\contentsline {subsubsection}{单层神经网络$\rightarrow $多层神经网络}{218}{section*.246}%
\defcounter {refsection}{0}\relax
\contentsline {subsection}{\numberline {5.2.4}函数拟合能力}{219}{subsection.5.2.4}%
\defcounter {refsection}{0}\relax
\contentsline {section}{\numberline {5.3}神经网络的张量实现}{223}{section.5.3}%
\defcounter {refsection}{0}\relax
\contentsline {subsection}{\numberline {5.3.1} 张量及其计算}{224}{subsection.5.3.1}%
\defcounter {refsection}{0}\relax
\contentsline {subsubsection}{张量}{224}{section*.256}%
\defcounter {refsection}{0}\relax
\contentsline {subsubsection}{张量的矩阵乘法}{226}{section*.259}%
\defcounter {refsection}{0}\relax
\contentsline {subsubsection}{张量的单元操作}{227}{section*.261}%
\defcounter {refsection}{0}\relax
\contentsline {subsection}{\numberline {5.3.2}张量的物理存储形式}{228}{subsection.5.3.2}%
\defcounter {refsection}{0}\relax
\contentsline {subsection}{\numberline {5.3.3}使用开源框架实现张量计算}{228}{subsection.5.3.3}%
\defcounter {refsection}{0}\relax
\contentsline {subsection}{\numberline {5.3.4}前向传播与计算图}{230}{subsection.5.3.4}%
\defcounter {refsection}{0}\relax
\contentsline {subsection}{\numberline {5.3.5}神经网络实例}{233}{subsection.5.3.5}%
\defcounter {refsection}{0}\relax
\contentsline {section}{\numberline {5.4}神经网络的参数训练}{234}{section.5.4}%
\defcounter {refsection}{0}\relax
\contentsline {subsection}{\numberline {5.4.1}损失函数}{235}{subsection.5.4.1}%
\defcounter {refsection}{0}\relax
\contentsline {subsection}{\numberline {5.4.2}基于梯度的参数优化}{236}{subsection.5.4.2}%
\defcounter {refsection}{0}\relax
\contentsline {subsubsection}{梯度下降}{236}{section*.279}%
\defcounter {refsection}{0}\relax
\contentsline {subsubsection}{梯度获取}{238}{section*.281}%
\defcounter {refsection}{0}\relax
\contentsline {subsubsection}{基于梯度的方法的变种和改进}{241}{section*.285}%
\defcounter {refsection}{0}\relax
\contentsline {subsection}{\numberline {5.4.3}参数更新的并行化策略}{244}{subsection.5.4.3}%
\defcounter {refsection}{0}\relax
\contentsline {subsection}{\numberline {5.4.4}梯度消失、梯度爆炸和稳定性训练}{246}{subsection.5.4.4}%
\defcounter {refsection}{0}\relax
\contentsline {subsubsection}{易于优化的激活函数}{246}{section*.288}%
\defcounter {refsection}{0}\relax
\contentsline {subsubsection}{梯度裁剪}{247}{section*.292}%
\defcounter {refsection}{0}\relax
\contentsline {subsubsection}{稳定性训练}{248}{section*.293}%
\defcounter {refsection}{0}\relax
\contentsline {subsection}{\numberline {5.4.5}过拟合}{249}{subsection.5.4.5}%
\defcounter {refsection}{0}\relax
\contentsline {subsection}{\numberline {5.4.6}反向传播}{250}{subsection.5.4.6}%
\defcounter {refsection}{0}\relax
\contentsline {subsubsection}{输出层的反向传播}{251}{section*.296}%
\defcounter {refsection}{0}\relax
\contentsline {subsubsection}{隐藏层的反向传播}{253}{section*.300}%
\defcounter {refsection}{0}\relax
\contentsline {subsubsection}{程序实现}{254}{section*.303}%
\defcounter {refsection}{0}\relax
\contentsline {section}{\numberline {5.5}神经语言模型}{255}{section.5.5}%
\defcounter {refsection}{0}\relax
\contentsline {subsection}{\numberline {5.5.1}基于神经网络的语言建模}{256}{subsection.5.5.1}%
\defcounter {refsection}{0}\relax
\contentsline {subsubsection}{基于前馈神经网络的语言模型}{257}{section*.306}%
\defcounter {refsection}{0}\relax
\contentsline {subsubsection}{基于循环神经网络的语言模型}{259}{section*.309}%
\defcounter {refsection}{0}\relax
\contentsline {subsubsection}{基于自注意力机制的语言模型}{261}{section*.311}%
\defcounter {refsection}{0}\relax
\contentsline {subsubsection}{语言模型的评价}{262}{section*.313}%
\defcounter {refsection}{0}\relax
\contentsline {subsection}{\numberline {5.5.2}单词表示模型}{262}{subsection.5.5.2}%
\defcounter {refsection}{0}\relax
\contentsline {subsubsection}{One-hot编码}{262}{section*.314}%
\defcounter {refsection}{0}\relax \defcounter {refsection}{0}\relax
\contentsline {subsubsection}{分布式表示}{263}{section*.316}% \contentsline {chapter}{\numberline {1}人工神经网络和神经语言建模}{9}{chapter.1}
\defcounter {refsection}{0}\relax \defcounter {refsection}{0}\relax
\contentsline {subsection}{\numberline {5.5.3}句子表示模型及预训练}{264}{subsection.5.5.3}% \contentsline {section}{\numberline {1.1}深度学习与人工神经网络}{10}{section.1.1}
\defcounter {refsection}{0}\relax \defcounter {refsection}{0}\relax
\contentsline {subsubsection}{简单的上下文表示模型}{265}{section*.320}% \contentsline {subsection}{\numberline {1.1.1}发展简史}{10}{subsection.1.1.1}
\defcounter {refsection}{0}\relax \defcounter {refsection}{0}\relax
\contentsline {subsubsection}{ELMO模型}{266}{section*.323}% \contentsline {subsubsection}{早期的人工神经网络和第一次寒冬}{10}{section*.2}
\defcounter {refsection}{0}\relax \defcounter {refsection}{0}\relax
\contentsline {subsubsection}{GPT模型}{266}{section*.325}% \contentsline {subsubsection}{神经网络的第二次高潮和第二次寒冬}{11}{section*.3}
\defcounter {refsection}{0}\relax \defcounter {refsection}{0}\relax
\contentsline {subsubsection}{BERT模型}{267}{section*.327}% \contentsline {subsubsection}{深度学习和神经网络方法的崛起}{12}{section*.4}
\defcounter {refsection}{0}\relax \defcounter {refsection}{0}\relax
\contentsline {subsubsection}{为什么要预训练?}{268}{section*.329}% \contentsline {subsection}{\numberline {1.1.2}为什么需要深度学习}{13}{subsection.1.1.2}
\defcounter {refsection}{0}\relax \defcounter {refsection}{0}\relax
\contentsline {section}{\numberline {5.6}小结及深入阅读}{269}{section.5.6}% \contentsline {subsubsection}{端到端学习和表示学习}{13}{section*.6}
\defcounter {refsection}{0}\relax \defcounter {refsection}{0}\relax
\contentsline {chapter}{\numberline {6}神经机器翻译模型}{271}{chapter.6}% \contentsline {subsubsection}{深度学习的效果}{14}{section*.8}
\defcounter {refsection}{0}\relax \defcounter {refsection}{0}\relax
\contentsline {section}{\numberline {6.1}神经机器翻译的发展简史}{271}{section.6.1}% \contentsline {section}{\numberline {1.2}神经网络基础}{14}{section.1.2}
\defcounter {refsection}{0}\relax \defcounter {refsection}{0}\relax
\contentsline {subsection}{\numberline {6.1.1}神经机器翻译的起源}{273}{subsection.6.1.1}% \contentsline {subsection}{\numberline {1.2.1}线性代数基础}{14}{subsection.1.2.1}
\defcounter {refsection}{0}\relax \defcounter {refsection}{0}\relax
\contentsline {subsection}{\numberline {6.1.2}神经机器翻译的品质 }{275}{subsection.6.1.2}% \contentsline {subsubsection}{标量、向量和矩阵}{15}{section*.10}
\defcounter {refsection}{0}\relax \defcounter {refsection}{0}\relax
\contentsline {subsection}{\numberline {6.1.3}神经机器翻译的优势 }{278}{subsection.6.1.3}% \contentsline {subsubsection}{矩阵的转置}{16}{section*.11}
\defcounter {refsection}{0}\relax \defcounter {refsection}{0}\relax
\contentsline {section}{\numberline {6.2}编码器-解码器框架}{280}{section.6.2}% \contentsline {subsubsection}{矩阵加法和数乘}{16}{section*.12}
\defcounter {refsection}{0}\relax \defcounter {refsection}{0}\relax
\contentsline {subsection}{\numberline {6.2.1}框架结构}{280}{subsection.6.2.1}% \contentsline {subsubsection}{矩阵乘法和矩阵点乘}{17}{section*.13}
\defcounter {refsection}{0}\relax \defcounter {refsection}{0}\relax
\contentsline {subsection}{\numberline {6.2.2}表示学习}{281}{subsection.6.2.2}% \contentsline {subsubsection}{线性映射}{18}{section*.14}
\defcounter {refsection}{0}\relax \defcounter {refsection}{0}\relax
\contentsline {subsection}{\numberline {6.2.3}简单的运行实例}{282}{subsection.6.2.3}% \contentsline {subsubsection}{范数}{19}{section*.15}
\defcounter {refsection}{0}\relax \defcounter {refsection}{0}\relax
\contentsline {subsection}{\numberline {6.2.4}机器翻译范式的对比}{283}{subsection.6.2.4}% \contentsline {subsection}{\numberline {1.2.2}人工神经元和感知机}{20}{subsection.1.2.2}
\defcounter {refsection}{0}\relax \defcounter {refsection}{0}\relax
\contentsline {section}{\numberline {6.3}基于循环神经网络的翻译模型及注意力机制}{284}{section.6.3}% \contentsline {subsubsection}{感知机\ \raisebox {0.5mm}{------}\ 最简单的人工神经元模型}{21}{section*.18}
\defcounter {refsection}{0}\relax \defcounter {refsection}{0}\relax
\contentsline {subsection}{\numberline {6.3.1}建模}{286}{subsection.6.3.1}% \contentsline {subsubsection}{神经元内部权重}{22}{section*.21}
\defcounter {refsection}{0}\relax \defcounter {refsection}{0}\relax
\contentsline {subsection}{\numberline {6.3.2}输入(词嵌入)及输出(Softmax)}{288}{subsection.6.3.2}% \contentsline {subsubsection}{神经元的输入\ \raisebox {0.5mm}{------}\ 离散 vs 连续}{23}{section*.23}
\defcounter {refsection}{0}\relax \defcounter {refsection}{0}\relax
\contentsline {subsection}{\numberline {6.3.3}循环神经网络结构}{292}{subsection.6.3.3}% \contentsline {subsubsection}{神经元内部的参数学习}{23}{section*.25}
\defcounter {refsection}{0}\relax \defcounter {refsection}{0}\relax
\contentsline {subsubsection}{循环神经单元(RNN)}{292}{section*.351}% \contentsline {subsection}{\numberline {1.2.3}多层神经网络}{24}{subsection.1.2.3}
\defcounter {refsection}{0}\relax \defcounter {refsection}{0}\relax
\contentsline {subsubsection}{长短时记忆网络(LSTM)}{292}{section*.352}% \contentsline {subsubsection}{线性变换和激活函数}{24}{section*.27}
\defcounter {refsection}{0}\relax \defcounter {refsection}{0}\relax
\contentsline {subsubsection}{门控循环单元(GRU)}{294}{section*.355}% \contentsline {subsubsection}{单层神经网络$\rightarrow $多层神经网络}{26}{section*.34}
\defcounter {refsection}{0}\relax \defcounter {refsection}{0}\relax
\contentsline {subsubsection}{双向模型}{295}{section*.357}% \contentsline {subsection}{\numberline {1.2.4}函数拟合能力}{27}{subsection.1.2.4}
\defcounter {refsection}{0}\relax \defcounter {refsection}{0}\relax
\contentsline {subsubsection}{多层循环神经网络}{297}{section*.359}% \contentsline {section}{\numberline {1.3}神经网络的张量实现}{31}{section.1.3}
\defcounter {refsection}{0}\relax \defcounter {refsection}{0}\relax
\contentsline {subsection}{\numberline {6.3.4}注意力机制}{297}{subsection.6.3.4}% \contentsline {subsection}{\numberline {1.3.1} 张量及其计算}{32}{subsection.1.3.1}
\defcounter {refsection}{0}\relax \defcounter {refsection}{0}\relax
\contentsline {subsubsection}{翻译中的注意力机制}{298}{section*.362}% \contentsline {subsubsection}{张量}{32}{section*.44}
\defcounter {refsection}{0}\relax \defcounter {refsection}{0}\relax
\contentsline {subsubsection}{上下文向量的计算}{299}{section*.365}% \contentsline {subsubsection}{张量的矩阵乘法}{34}{section*.47}
\defcounter {refsection}{0}\relax \defcounter {refsection}{0}\relax
\contentsline {subsubsection}{注意力机制的解读}{302}{section*.370}% \contentsline {subsubsection}{张量的单元操作}{35}{section*.49}
\defcounter {refsection}{0}\relax \defcounter {refsection}{0}\relax
\contentsline {subsection}{\numberline {6.3.5}训练}{304}{subsection.6.3.5}% \contentsline {subsection}{\numberline {1.3.2}张量的物理存储形式}{36}{subsection.1.3.2}
\defcounter {refsection}{0}\relax \defcounter {refsection}{0}\relax
\contentsline {subsubsection}{损失函数}{305}{section*.373}% \contentsline {subsection}{\numberline {1.3.3}使用开源框架实现张量计算}{36}{subsection.1.3.3}
\defcounter {refsection}{0}\relax \defcounter {refsection}{0}\relax
\contentsline {subsubsection}{长参数初始化}{305}{section*.374}% \contentsline {subsection}{\numberline {1.3.4}前向传播与计算图}{38}{subsection.1.3.4}
\defcounter {refsection}{0}\relax \defcounter {refsection}{0}\relax
\contentsline {subsubsection}{优化策略}{306}{section*.375}% \contentsline {subsection}{\numberline {1.3.5}神经网络实例}{41}{subsection.1.3.5}
\defcounter {refsection}{0}\relax \defcounter {refsection}{0}\relax
\contentsline {subsubsection}{梯度裁剪}{306}{section*.377}% \contentsline {section}{\numberline {1.4}神经网络的参数训练}{42}{section.1.4}
\defcounter {refsection}{0}\relax \defcounter {refsection}{0}\relax
\contentsline {subsubsection}{学习率策略}{307}{section*.378}% \contentsline {subsection}{\numberline {1.4.1}损失函数}{43}{subsection.1.4.1}
\defcounter {refsection}{0}\relax \defcounter {refsection}{0}\relax
\contentsline {subsubsection}{并行训练}{308}{section*.381}% \contentsline {subsection}{\numberline {1.4.2}基于梯度的参数优化}{44}{subsection.1.4.2}
\defcounter {refsection}{0}\relax \defcounter {refsection}{0}\relax
\contentsline {subsection}{\numberline {6.3.6}推断}{309}{subsection.6.3.6}% \contentsline {subsubsection}{梯度下降}{44}{section*.67}
\defcounter {refsection}{0}\relax \defcounter {refsection}{0}\relax
\contentsline {subsubsection}{贪婪搜索}{311}{section*.385}% \contentsline {subsubsection}{梯度获取}{46}{section*.69}
\defcounter {refsection}{0}\relax \defcounter {refsection}{0}\relax
\contentsline {subsubsection}{束搜索}{312}{section*.388}% \contentsline {subsubsection}{基于梯度的方法的变种和改进}{49}{section*.73}
\defcounter {refsection}{0}\relax \defcounter {refsection}{0}\relax
\contentsline {subsubsection}{长度惩罚}{313}{section*.390}% \contentsline {subsection}{\numberline {1.4.3}参数更新的并行化策略}{52}{subsection.1.4.3}
\defcounter {refsection}{0}\relax \defcounter {refsection}{0}\relax
\contentsline {subsection}{\numberline {6.3.7}实例-GNMT}{314}{subsection.6.3.7}% \contentsline {subsection}{\numberline {1.4.4}梯度消失、梯度爆炸和稳定性训练}{54}{subsection.1.4.4}
\defcounter {refsection}{0}\relax \defcounter {refsection}{0}\relax
\contentsline {section}{\numberline {6.4}Transformer}{316}{section.6.4}% \contentsline {subsubsection}{易于优化的激活函数}{54}{section*.76}
\defcounter {refsection}{0}\relax \defcounter {refsection}{0}\relax
\contentsline {subsection}{\numberline {6.4.1}自注意力模型}{317}{subsection.6.4.1}% \contentsline {subsubsection}{梯度裁剪}{55}{section*.80}
\defcounter {refsection}{0}\relax \defcounter {refsection}{0}\relax
\contentsline {subsection}{\numberline {6.4.2}Transformer架构}{318}{subsection.6.4.2}% \contentsline {subsubsection}{稳定性训练}{56}{section*.81}
\defcounter {refsection}{0}\relax \defcounter {refsection}{0}\relax
\contentsline {subsection}{\numberline {6.4.3}位置编码}{320}{subsection.6.4.3}% \contentsline {subsection}{\numberline {1.4.5}过拟合}{57}{subsection.1.4.5}
\defcounter {refsection}{0}\relax \defcounter {refsection}{0}\relax
\contentsline {subsection}{\numberline {6.4.4}基于点乘的注意力机制}{322}{subsection.6.4.4}% \contentsline {subsection}{\numberline {1.4.6}反向传播}{58}{subsection.1.4.6}
\defcounter {refsection}{0}\relax \defcounter {refsection}{0}\relax
\contentsline {subsection}{\numberline {6.4.5}掩码操作}{324}{subsection.6.4.5}% \contentsline {subsubsection}{输出层的反向传播}{59}{section*.84}
\defcounter {refsection}{0}\relax \defcounter {refsection}{0}\relax
\contentsline {subsection}{\numberline {6.4.6}多头注意力}{326}{subsection.6.4.6}% \contentsline {subsubsection}{隐藏层的反向传播}{61}{section*.88}
\defcounter {refsection}{0}\relax \defcounter {refsection}{0}\relax
\contentsline {subsection}{\numberline {6.4.7}残差网络和层正则化}{327}{subsection.6.4.7}% \contentsline {subsubsection}{程序实现}{62}{section*.91}
\defcounter {refsection}{0}\relax \defcounter {refsection}{0}\relax
\contentsline {subsection}{\numberline {6.4.8}前馈全连接网络子层}{328}{subsection.6.4.8}% \contentsline {section}{\numberline {1.5}神经语言模型}{63}{section.1.5}
\defcounter {refsection}{0}\relax \defcounter {refsection}{0}\relax
\contentsline {subsection}{\numberline {6.4.9}训练}{329}{subsection.6.4.9}% \contentsline {subsection}{\numberline {1.5.1}基于神经网络的语言建模}{64}{subsection.1.5.1}
\defcounter {refsection}{0}\relax \defcounter {refsection}{0}\relax
\contentsline {subsection}{\numberline {6.4.10}推断}{332}{subsection.6.4.10}% \contentsline {subsubsection}{基于前馈神经网络的语言模型}{65}{section*.94}
\defcounter {refsection}{0}\relax \defcounter {refsection}{0}\relax
\contentsline {section}{\numberline {6.5}序列到序列问题及应用}{332}{section.6.5}% \contentsline {subsubsection}{基于循环神经网络的语言模型}{67}{section*.97}
\defcounter {refsection}{0}\relax \defcounter {refsection}{0}\relax
\contentsline {subsection}{\numberline {6.5.1}自动问答}{333}{subsection.6.5.1}% \contentsline {subsubsection}{基于自注意力机制的语言模型}{69}{section*.99}
\defcounter {refsection}{0}\relax \defcounter {refsection}{0}\relax
\contentsline {subsection}{\numberline {6.5.2}自动文摘}{333}{subsection.6.5.2}% \contentsline {subsubsection}{语言模型的评价}{70}{section*.101}
\defcounter {refsection}{0}\relax \defcounter {refsection}{0}\relax
\contentsline {subsection}{\numberline {6.5.3}文言文翻译}{333}{subsection.6.5.3}% \contentsline {subsection}{\numberline {1.5.2}单词表示模型}{70}{subsection.1.5.2}
\defcounter {refsection}{0}\relax \defcounter {refsection}{0}\relax
\contentsline {subsection}{\numberline {6.5.4}对联生成}{335}{subsection.6.5.4}% \contentsline {subsubsection}{One-hot编码}{70}{section*.102}
\defcounter {refsection}{0}\relax \defcounter {refsection}{0}\relax
\contentsline {subsection}{\numberline {6.5.5}古诗生成}{335}{subsection.6.5.5}% \contentsline {subsubsection}{分布式表示}{71}{section*.104}
\defcounter {refsection}{0}\relax \defcounter {refsection}{0}\relax
\contentsline {section}{\numberline {6.6}小结及深入阅读}{335}{section.6.6}% \contentsline {subsection}{\numberline {1.5.3}句子表示模型及预训练}{72}{subsection.1.5.3}
\defcounter {refsection}{0}\relax \defcounter {refsection}{0}\relax
\contentsline {part}{\@mypartnumtocformat {IV}{附录}}{339}{part.4}% \contentsline {subsubsection}{简单的上下文表示模型}{73}{section*.108}
\ttl@stoptoc {default@3}
\ttl@starttoc {default@4}
\defcounter {refsection}{0}\relax \defcounter {refsection}{0}\relax
\contentsline {chapter}{\numberline {A}附录A}{341}{appendix.1.A}% \contentsline {subsubsection}{ELMO模型}{74}{section*.111}
\defcounter {refsection}{0}\relax \defcounter {refsection}{0}\relax
\contentsline {chapter}{\numberline {B}附录B}{343}{appendix.2.B}% \contentsline {subsubsection}{GPT模型}{74}{section*.113}
\defcounter {refsection}{0}\relax \defcounter {refsection}{0}\relax
\contentsline {section}{\numberline {B.1}IBM模型3训练方法}{343}{section.2.B.1}% \contentsline {subsubsection}{BERT模型}{75}{section*.115}
\defcounter {refsection}{0}\relax \defcounter {refsection}{0}\relax
\contentsline {section}{\numberline {B.2}IBM模型4训练方法}{345}{section.2.B.2}% \contentsline {subsubsection}{为什么要预训练?}{76}{section*.117}
\defcounter {refsection}{0}\relax \defcounter {refsection}{0}\relax
\contentsline {section}{\numberline {B.3}IBM模型5训练方法}{347}{section.2.B.3}% \contentsline {section}{\numberline {1.6}小结及深入阅读}{77}{section.1.6}
\contentsfinish \contentsfinish
...@@ -92,7 +92,7 @@ ...@@ -92,7 +92,7 @@
{\large {\large
\noindent {\color{red} 在此感谢所有为本书做出贡献的人} \\ \noindent {\color{red} 在此感谢所有为本书做出贡献的人} \\
\noindent 曹润柘、曾信、孟霞、单韦乔、姜雨帆、王子扬、刘辉、许诺、李北、刘继强、张哲旸、周书涵、周涛、张裕浩、李炎洋刘晓倩、牛蕊 \\ \noindent 曹润柘、曾信、孟霞、单韦乔、姜雨帆、王子扬、刘辉、许诺、李北、刘继强、张哲旸、周书涵、周涛、张裕浩、李炎洋、林野、刘晓倩、牛蕊 \\
} }
...@@ -112,13 +112,13 @@ ...@@ -112,13 +112,13 @@
% CHAPTERS % CHAPTERS
%---------------------------------------------------------------------------------------- %----------------------------------------------------------------------------------------
\include{Chapter1/chapter1} %\include{Chapter1/chapter1}
\include{Chapter2/chapter2} %\include{Chapter2/chapter2}
\include{Chapter3/chapter3} %\include{Chapter3/chapter3}
\include{Chapter4/chapter4} %\include{Chapter4/chapter4}
\include{Chapter5/chapter5} %\include{Chapter5/chapter5}
\include{Chapter6/chapter6} \include{Chapter6/chapter6}
\include{ChapterAppend/chapterappend} %\include{ChapterAppend/chapterappend}
......
Markdown 格式
0%
您添加了 0 到此讨论。请谨慎行事。
请先完成此评论的编辑!
注册 或者 后发表评论