Commit dd9acda0 by 曹润柘

合并分支 'master' 到 'caorunzhe'

Master

查看合并请求 !49
parents 1a304483 7ae53c52
...@@ -1033,7 +1033,7 @@ a(i|j,m,l) &=\frac{\sum_{k=0}^{K}c_{\mathbb{E}}(i|j;\mathbf{s}^{[k]},\mathbf{t}^ ...@@ -1033,7 +1033,7 @@ a(i|j,m,l) &=\frac{\sum_{k=0}^{K}c_{\mathbb{E}}(i|j;\mathbf{s}^{[k]},\mathbf{t}^
\begin{itemize} \begin{itemize}
\item 对每个$i\in[1,l]$的目标语单词的产出率建模({\color{red!70} 红色}),即$\varphi_i$的概率。它依赖于$\mathbf{t}$和区间$[1,i-1]$的目标语单词的产出率$\varphi_1^{i-1}$\footnote{这里约定,当$i=1$ 时,$\varphi_1^0$ 表示一个空} \item 对每个$i\in[1,l]$的目标语单词的产出率建模({\color{red!70} 红色}),即$\varphi_i$的概率。它依赖于$\mathbf{t}$和区间$[1,i-1]$的目标语单词的产出率$\varphi_1^{i-1}$\footnote{这里约定,当$i=1$ 时,$\varphi_1^0$ 表示空。}
\item $i=0$时的产出率建模({\color{blue!70} 蓝色}),即空标记$t_0$的产出率的概率。它依赖于$\mathbf{t}$和区间$[1,i-1]$的目标语单词的产出率$\varphi_1^l$ \item $i=0$时的产出率建模({\color{blue!70} 蓝色}),即空标记$t_0$的产出率的概率。它依赖于$\mathbf{t}$和区间$[1,i-1]$的目标语单词的产出率$\varphi_1^l$
\item 词汇翻译建模({\color{green!70} 绿色}),目标语言单词$t_i$生成第$k$个源语言单词$\tau_{ik}$时的概率,依赖于$\mathbf{t}$、所有目标语言单词的产出率$\varphi_0^l$、区间$i\in[1,l]$的目标语言单词生成的源语言单词$\tau_1^{i-1}$和目标语单词$t_i$生成的前$k$个源语言单词$\tau_{i1}^{k-1}$ \item 词汇翻译建模({\color{green!70} 绿色}),目标语言单词$t_i$生成第$k$个源语言单词$\tau_{ik}$时的概率,依赖于$\mathbf{t}$、所有目标语言单词的产出率$\varphi_0^l$、区间$i\in[1,l]$的目标语言单词生成的源语言单词$\tau_1^{i-1}$和目标语单词$t_i$生成的前$k$个源语言单词$\tau_{i1}^{k-1}$
\item 对于每个$i\in[1,l]$的目标语言单词生成的源语言单词的{\small\bfnew{扭曲度}}\index{扭曲度}(Distortion)\index{Distortion}建模({\color{yellow!70!black} 黄色}),即第$i$个译文单词生成的第$k$个源语言单词在源文中的位置$\pi_{ik}$ 的概率。其中$\pi_1^{i-1}$$\pi_{i1}^{k-1}$分别表示区间$[1,i-1]$的目标语言单词生成的源语言单词的扭曲度和第$i$译文单词生成的前$k$个源语言单词的扭曲度。 \item 对于每个$i\in[1,l]$的目标语言单词生成的源语言单词的{\small\bfnew{扭曲度}}\index{扭曲度}(Distortion)\index{Distortion}建模({\color{yellow!70!black} 黄色}),即第$i$个译文单词生成的第$k$个源语言单词在源文中的位置$\pi_{ik}$ 的概率。其中$\pi_1^{i-1}$$\pi_{i1}^{k-1}$分别表示区间$[1,i-1]$的目标语言单词生成的源语言单词的扭曲度和第$i$译文单词生成的前$k$个源语言单词的扭曲度。
......
...@@ -2,7 +2,7 @@ ...@@ -2,7 +2,7 @@
%------------------------------------------------------------------------- %-------------------------------------------------------------------------
\begin{tabular}{| l | l |} \begin{tabular}{| l | l |}
\hline \hline
& {\footnotesize{$\prod\limits_{(j,i) \in \hat{A}} \textrm{P}(s_j,t_i)$} \color{red}{{\footnotesize{$\times\textrm{P}_{lm}(\mathbf{t})$}}}} \\ \hline & {\footnotesize{$\prod\limits_{(j,i) \in \hat{A}} \textrm{P}(s_j,t_i)$} \color{red}{{\footnotesize{$\times\textrm{P}_{\textrm{lm}}(\mathbf{t})$}}}} \\ \hline
\begin{tikzpicture} \begin{tikzpicture}
......
...@@ -5,9 +5,9 @@ ...@@ -5,9 +5,9 @@
\begin{scope}[minimum height = 18pt] \begin{scope}[minimum height = 18pt]
\node[anchor=east] (s0) at (-0.5em, 0) {$\textbf{s}$:}; \node[anchor=east] (s0) at (-0.5em, 0) {$\textbf{s}$:};
\node[anchor=west,fill=green!20] (s1) at (0, 0) {\footnotesize{桌子 上}}; \node[anchor=west,fill=gray!20] (s1) at (0, 0) {\footnotesize{桌子 上}};
\node[anchor=west,fill=red!20] (s2) at ([xshift=1em]s1.east) {\footnotesize{}}; \node[anchor=west,fill=gray!20] (s2) at ([xshift=1em]s1.east) {\footnotesize{}};
\node[anchor=west,fill=blue!20] (s3) at ([xshift=1em]s2.east) {\footnotesize{一个 苹果}}; \node[anchor=west,fill=gray!20] (s3) at ([xshift=1em]s2.east) {\footnotesize{一个 苹果}};
\node[anchor=east] (t0) at (-0.5em, -1.5) {$\textbf{t}$:}; \node[anchor=east] (t0) at (-0.5em, -1.5) {$\textbf{t}$:};
...@@ -19,9 +19,9 @@ ...@@ -19,9 +19,9 @@
\begin{scope}[xshift=15em,minimum height = 18pt] \begin{scope}[xshift=15em,minimum height = 18pt]
\node[anchor=east] (s0) at (-0.5em, 0) {$\textbf{s}$:}; \node[anchor=east] (s0) at (-0.5em, 0) {$\textbf{s}$:};
\node[anchor=west,fill=green!20] (s1) at (0, 0) {\footnotesize{桌子 上}}; \node[anchor=west,fill=gray!20] (s1) at (0, 0) {\footnotesize{桌子 上}};
\node[anchor=west,fill=red!20] (s2) at ([xshift=1em]s1.east) {\footnotesize{}}; \node[anchor=west,fill=red!20] (s2) at ([xshift=1em]s1.east) {\footnotesize{}};
\node[anchor=west,fill=blue!20] (s3) at ([xshift=1em]s2.east) {\footnotesize{一个 苹果}}; \node[anchor=west,fill=gray!20] (s3) at ([xshift=1em]s2.east) {\footnotesize{一个 苹果}};
\node[anchor=east] (t0) at (-0.5em, -1.5) {$\textbf{t}$:}; \node[anchor=east] (t0) at (-0.5em, -1.5) {$\textbf{t}$:};
{ {
...@@ -37,17 +37,17 @@ ...@@ -37,17 +37,17 @@
\begin{scope}[yshift=-9.5em,minimum height = 18pt] \begin{scope}[yshift=-9.5em,minimum height = 18pt]
\node[anchor=east] (s0) at (-0.5em, 0) {$\textbf{s}$:}; \node[anchor=east] (s0) at (-0.5em, 0) {$\textbf{s}$:};
\node[anchor=west,fill=green!20] (s1) at (0, 0) {\footnotesize{桌子 上}}; \node[anchor=west,fill=gray!20] (s1) at (0, 0) {\footnotesize{桌子 上}};
\node[anchor=west,fill=red!20] (s2) at ([xshift=1em]s1.east) {\footnotesize{}}; \node[anchor=west,fill=gray!20] (s2) at ([xshift=1em]s1.east) {\footnotesize{}};
\node[anchor=west,fill=blue!20] (s3) at ([xshift=1em]s2.east) {\footnotesize{一个 苹果}}; \node[anchor=west,fill=red!20] (s3) at ([xshift=1em]s2.east) {\footnotesize{一个 苹果}};
\node[anchor=east] (t0) at (-0.5em, -1.5) {$\textbf{t}$:}; \node[anchor=east] (t0) at (-0.5em, -1.5) {$\textbf{t}$:};
{ {
\node[anchor=west,fill=red!20] (t1) at (0, -1.5) {\footnotesize{There is}}; \node[anchor=west,fill=gray!20] (t1) at (0, -1.5) {\footnotesize{There is}};
\path[<->, thick] (s2.south) edge (t1.north); \path[<->, thick] (s2.south) edge (t1.north);
} }
{ {
\node[anchor=west,fill=blue!20] (t2) at ([xshift=1em]t1.east) {\footnotesize{an apple}}; \node[anchor=west,fill=red!20] (t2) at ([xshift=1em]t1.east) {\footnotesize{an apple}};
\path[<->, thick] (s3.south) edge (t2.north); \path[<->, thick] (s3.south) edge (t2.north);
} }
\node[anchor=north] (l) at ([xshift=7em,yshift=-0.5em]t0.south) {\footnotesize{(c)\ }}; \node[anchor=north] (l) at ([xshift=7em,yshift=-0.5em]t0.south) {\footnotesize{(c)\ }};
...@@ -59,21 +59,21 @@ ...@@ -59,21 +59,21 @@
\begin{scope}[xshift=15em,yshift=-9.5em,minimum height = 18pt]%[scale=0.5] \begin{scope}[xshift=15em,yshift=-9.5em,minimum height = 18pt]%[scale=0.5]
\node[anchor=east] (s0) at (-0.5em, 0) {$\textbf{s}$:}; \node[anchor=east] (s0) at (-0.5em, 0) {$\textbf{s}$:};
\node[anchor=west,fill=green!20] (s1) at (0, 0) {\footnotesize{桌子 上}}; \node[anchor=west,fill=red!20] (s1) at (0, 0) {\footnotesize{桌子 上}};
\node[anchor=west,fill=red!20] (s2) at ([xshift=1em]s1.east) {\footnotesize{}}; \node[anchor=west,fill=gray!20] (s2) at ([xshift=1em]s1.east) {\footnotesize{}};
\node[anchor=west,fill=blue!20] (s3) at ([xshift=1em]s2.east) {\footnotesize{一个 苹果}}; \node[anchor=west,fill=gray!20] (s3) at ([xshift=1em]s2.east) {\footnotesize{一个 苹果}};
\node[anchor=east] (t0) at (-0.5em, -1.5) {$\textbf{t}$:}; \node[anchor=east] (t0) at (-0.5em, -1.5) {$\textbf{t}$:};
{ {
\node[anchor=west,fill=red!20] (t1) at (0, -1.5) {\footnotesize{There is}}; \node[anchor=west,fill=gray!20] (t1) at (0, -1.5) {\footnotesize{There is}};
\path[<->, thick] (s2.south) edge (t1.north); \path[<->, thick] (s2.south) edge (t1.north);
} }
{ {
\node[anchor=west,fill=blue!20] (t2) at ([xshift=1em]t1.east) {\footnotesize{an apple}}; \node[anchor=west,fill=gray!20] (t2) at ([xshift=1em]t1.east) {\footnotesize{an apple}};
\path[<->, thick] (s3.south) edge (t2.north); \path[<->, thick] (s3.south) edge (t2.north);
} }
{ {
\node[anchor=west,fill=green!20] (t3) at ([xshift=1em]t2.east) {\footnotesize{on the table}}; \node[anchor=west,fill=red!20] (t3) at ([xshift=1em]t2.east) {\footnotesize{on the table}};
\path[<->, thick] (s1.south) edge (t3.north); \path[<->, thick] (s1.south) edge (t3.north);
} }
\node[anchor=north] (l) at ([xshift=7em,yshift=-0.5em]t0.south) {\footnotesize{(d)\ }}; \node[anchor=north] (l) at ([xshift=7em,yshift=-0.5em]t0.south) {\footnotesize{(d)\ }};
......
...@@ -10,7 +10,7 @@ ...@@ -10,7 +10,7 @@
\node [anchor=west] (s7) at ([yshift=-1.1em]s5.west) {\small{...}}; \node [anchor=west] (s7) at ([yshift=-1.1em]s5.west) {\small{...}};
\node [anchor=west] (s6) at ([yshift=1.0em]s1.west) {\small{...}}; \node [anchor=west] (s6) at ([yshift=1.0em]s1.west) {\small{...}};
\begin{pgfonlayer}{background} \begin{pgfonlayer}{background}
\node [rectangle,inner sep=0.3em,fill=red!20] [fit = (s1) (s3) (s4) (s6) (s7)] (box1) {}; \node [rectangle,inner sep=0.3em,fill=red!10] [fit = (s1) (s3) (s4) (s6) (s7)] (box1) {};
\end{pgfonlayer} \end{pgfonlayer}
\end{tikzpicture} \end{tikzpicture}
\end{center} \end{center}
\ No newline at end of file
...@@ -19,8 +19,8 @@ ...@@ -19,8 +19,8 @@
\path[<->, thick] (s3.south) edge (t3.north); \path[<->, thick] (s3.south) edge (t3.north);
} }
\node[anchor=south] (s0) at ([xshift=-3em,yshift=0em]s1.south) {源语:}; \node[anchor=south] (s0) at ([xshift=-3em,yshift=0em]s1.south) {源语:};
\node[anchor=east] (t0) at ([xshift=0em,yshift=-3.5em]s0.east) {目标语:}; \node[anchor=east] (t0) at ([xshift=0em,yshift=-3.5em]s0.east) {目标语:};
\end{scope} \end{scope}
\end{tikzpicture} \end{tikzpicture}
......
...@@ -7,9 +7,9 @@ ...@@ -7,9 +7,9 @@
\node[anchor=west, fill=red!30, inner sep=0.05cm] (sp3) at (14em, 0) {有了\ 大幅度\ 下降}; \node[anchor=west, fill=red!30, inner sep=0.05cm] (sp3) at (14em, 0) {有了\ 大幅度\ 下降};
\draw[->] (sp1) edge [out=15, in=170] (sp3); \draw[->] (sp1) edge [out=15, in=170] (sp3);
\node[anchor=west, fill=blue!30, inner sep=0.05cm] (tp1) at (0, -0.8) {The imports}; \node[anchor=west, fill=blue!30, inner sep=0.05cm] (tp1) at (0, -1.2) {The imports};
\node[anchor=west, fill=red!30, inner sep=0.05cm] (tp2) at (5.3em, -0.8) {drastically fell}; \node[anchor=west, fill=red!30, inner sep=0.05cm] (tp2) at (5.3em, -1.2) {drastically fell};
\node[anchor=west] (tp3) at (11.3em, -0.8) {in the past five to ten years}; \node[anchor=west] (tp3) at (11.3em, -1.2) {in the past five to ten years};
\path[->] (tp1) edge [out=30, in=150] (tp2); \path[->] (tp1) edge [out=30, in=150] (tp2);
\end{tikzpicture} \end{tikzpicture}
\ No newline at end of file
...@@ -1393,8 +1393,8 @@ $M=0$莉」陦ィ隸・蟄仙アり「ォ荳「蠑シ瑚$M=1$莉」陦ィ豁」蟶ク霑幄。悟ス灘燕蟄仙アら噪隶。邂励 ...@@ -1393,8 +1393,8 @@ $M=0$莉」陦ィ隸・蟄仙アり「ォ荳「蠑シ瑚$M=1$莉」陦ィ豁」蟶ク霑幄。悟ス灘燕蟄仙アら噪隶。邂励
\parinterval 除此之外,有研究者已经发现残差网络中底层的子网络通过对输入进行抽象得到的表示对最终的输出有很大的影响,上层网络是通过对底层网络得到的表示不断修正来拟合训练目标\cite{journals/corr/GreffSS16}。该结论同样适用于Transformer模型,比如,在训练中,残差支路以及底层的梯度范数通常比较大,这也间接表明底层网络在整个优化的过程中需要更大的更新。考虑到这个因素,在设计每一个子层被丢弃的概率时可以采用自底向上线性增大的策略,保证底层的网络相比于顶层更容易保留下来。这里用$L$来代表编码端块的个数,$l$代表当前的子层的编号,那么$M$可以通过以下的方式得到: \parinterval 除此之外,有研究者已经发现残差网络中底层的子网络通过对输入进行抽象得到的表示对最终的输出有很大的影响,上层网络是通过对底层网络得到的表示不断修正来拟合训练目标\cite{journals/corr/GreffSS16}。该结论同样适用于Transformer模型,比如,在训练中,残差支路以及底层的梯度范数通常比较大,这也间接表明底层网络在整个优化的过程中需要更大的更新。考虑到这个因素,在设计每一个子层被丢弃的概率时可以采用自底向上线性增大的策略,保证底层的网络相比于顶层更容易保留下来。这里用$L$来代表编码端块的个数,$l$代表当前的子层的编号,那么$M$可以通过以下的方式得到:
\begin{eqnarray} \begin{eqnarray}
M = \left\{\begin{array}{ll} M = \left\{\begin{array}{ll}
0&\textrm{p} \leqslant p_l\\ 0&P \leqslant p_l\\
1&\textrm{p} > p_l 1&P > p_l
\end{array}\right. \end{array}\right.
\label{eq:7.5-11} \label{eq:7.5-11}
\end{eqnarray} \end{eqnarray}
......
...@@ -22,7 +22,7 @@ ...@@ -22,7 +22,7 @@
\fill [black] (h) circle(1pt); \fill [black] (h) circle(1pt);
% y=0.73x + 2.54 % y=0.73x + 2.54
\draw [thick,red] (-1*0.3,1.81*0.2) to (10*0.3,9.84*0.2); \draw [thick,red] (0.5*0.3,1.81*0.2) to (10*0.3,9.84*0.2);
\node [font=\footnotesize] at (1.5,-0.5) {欠拟合}; \node [font=\footnotesize] at (1.5,-0.5) {欠拟合};
\end{tikzpicture} \end{tikzpicture}
......
...@@ -3,7 +3,7 @@ ...@@ -3,7 +3,7 @@
\node [anchor=center] (node1) at (-2.9,1) {\small{训练:}}; \node [anchor=center] (node1) at (-2.9,1) {\small{训练:}};
\node [anchor=center] (node11) at (-2.5,1) {}; \node [anchor=center] (node11) at (-2.5,1) {};
\node [anchor=center] (node12) at (-1.7,1) {}; \node [anchor=center] (node12) at (-1.7,1) {};
\node [anchor=center] (node2) at (-2.9,0.5) {\small{}}; \node [anchor=center] (node2) at (-2.9,0.5) {\small{}};
\node [anchor=center] (node21) at (-2.5,0.5) {}; \node [anchor=center] (node21) at (-2.5,0.5) {};
\node [anchor=center] (node22) at (-1.7,0.5) {}; \node [anchor=center] (node22) at (-1.7,0.5) {};
\node [anchor=west,draw=black,minimum width=5.6em,minimum height=2.2em,fill=blue!20,rounded corners=2pt] (node1-1) at (0,0) {\footnotesize{双语数据}}; \node [anchor=west,draw=black,minimum width=5.6em,minimum height=2.2em,fill=blue!20,rounded corners=2pt] (node1-1) at (0,0) {\footnotesize{双语数据}};
......
...@@ -3,7 +3,7 @@ ...@@ -3,7 +3,7 @@
\node [anchor=center] (node1) at (-2.3,1) {\small{训练:}}; \node [anchor=center] (node1) at (-2.3,1) {\small{训练:}};
\node [anchor=center] (node11) at (-2.0,1) {}; \node [anchor=center] (node11) at (-2.0,1) {};
\node [anchor=center] (node12) at (-1.1,1) {}; \node [anchor=center] (node12) at (-1.1,1) {};
\node [anchor=center] (node2) at (-2.3,0.5) {\small{}}; \node [anchor=center] (node2) at (-2.3,0.5) {\small{}};
\node [anchor=center] (node21) at (-2.0,0.5) {}; \node [anchor=center] (node21) at (-2.0,0.5) {};
\node [anchor=center] (node22) at (-1.1,0.5) {}; \node [anchor=center] (node22) at (-1.1,0.5) {};
\node [anchor=west,draw=black,minimum width=5.6em,minimum height=2.2em,fill=blue!20,rounded corners=2pt] (node1-1) at (0,0) {\footnotesize{双语数据}}; \node [anchor=west,draw=black,minimum width=5.6em,minimum height=2.2em,fill=blue!20,rounded corners=2pt] (node1-1) at (0,0) {\footnotesize{双语数据}};
......
\begin{center} \begin{center}
\begin{tikzpicture}[scale=1.3] \begin{tikzpicture}[scale=1.0]
\footnotesize{ \footnotesize{
\begin{axis}[ \begin{axis}[
width=.40\textwidth, width=.50\textwidth,
height=.30\textwidth, height=.40\textwidth,
legend style={at={(0.60,0.08)}, anchor=south west}, legend style={at={(0.60,0.08)}, anchor=south west},
xlabel={\scriptsize{更新次数(10k)}}, xlabel={\scriptsize{更新次数(10k)}},
ylabel={\scriptsize{学习率 ($10^{-3}$}}, ylabel={\scriptsize{学习率 ($10^{-3}$}},
......
\indexentry{源语言|hyperpage}{17}
\indexentry{Source Language|hyperpage}{17}
\indexentry{目标语言|hyperpage}{17}
\indexentry{Target Language|hyperpage}{17}
\indexentry{机器翻译|hyperpage}{18}
\indexentry{Machine Translation|hyperpage}{18}
\indexentry{数据驱动|hyperpage}{23}
\indexentry{Data-Driven|hyperpage}{23}
\indexentry{编码器-解码器|hyperpage}{30}
\indexentry{encoder-decoder|hyperpage}{30}
\indexentry{质量评价|hyperpage}{32}
\indexentry{Quality Evaluation|hyperpage}{32}
\indexentry{无参考答案的评价|hyperpage}{32}
\indexentry{Quality Estimation|hyperpage}{32}
\indexentry{$n$元语法单元|hyperpage}{33}
\indexentry{$n$-gram准确率|hyperpage}{34}
\indexentry{$n$-gram Precision|hyperpage}{34}
\indexentry{短句惩罚因子|hyperpage}{34}
\indexentry{Brevity Penalty|hyperpage}{34}
\indexentry{分词|hyperpage}{50}
\indexentry{Segmentation|hyperpage}{50}
\indexentry{句法分析|hyperpage}{51}
\indexentry{Parsing|hyperpage}{51}
\indexentry{预处理|hyperpage}{51}
\indexentry{Pre-processing|hyperpage}{51}
\indexentry{后处理|hyperpage}{51}
\indexentry{Post-processing|hyperpage}{51}
\indexentry{事件|hyperpage}{52}
\indexentry{Event|hyperpage}{52}
\indexentry{随机事件|hyperpage}{52}
\indexentry{随机变量|hyperpage}{52}
\indexentry{Random Variable|hyperpage}{52}
\indexentry{概率|hyperpage}{52}
\indexentry{Probability|hyperpage}{52}
\indexentry{估计|hyperpage}{52}
\indexentry{估计值|hyperpage}{52}
\indexentry{Estimate|hyperpage}{52}
\indexentry{概率分布函数|hyperpage}{53}
\indexentry{概率密度函数|hyperpage}{53}
\indexentry{联合概率|hyperpage}{53}
\indexentry{Joint Probability|hyperpage}{53}
\indexentry{条件概率|hyperpage}{53}
\indexentry{Conditional Probability|hyperpage}{53}
\indexentry{边缘概率|hyperpage}{54}
\indexentry{marginal probability|hyperpage}{54}
\indexentry{全概率公式|hyperpage}{55}
\indexentry{Law of Total Probability|hyperpage}{55}
\indexentry{贝叶斯法则|hyperpage}{56}
\indexentry{Bayes' rule|hyperpage}{56}
\indexentry{熵|hyperpage}{57}
\indexentry{Entropy|hyperpage}{57}
\indexentry{自信息|hyperpage}{57}
\indexentry{Self-information|hyperpage}{57}
\indexentry{相对熵|hyperpage}{58}
\indexentry{Relative Entropy|hyperpage}{58}
\indexentry{交叉熵|hyperpage}{58}
\indexentry{Cross-entropy|hyperpage}{58}
\indexentry{分词|hyperpage}{59}
\indexentry{Segmentation|hyperpage}{59}
\indexentry{单词|hyperpage}{59}
\indexentry{Word|hyperpage}{59}
\indexentry{词|hyperpage}{59}
\indexentry{词法分析|hyperpage}{59}
\indexentry{Lexical Analysis|hyperpage}{59}
\indexentry{标注数据|hyperpage}{61}
\indexentry{Annotated Data|hyperpage}{61}
\indexentry{训练|hyperpage}{62}
\indexentry{Training|hyperpage}{62}
\indexentry{推断|hyperpage}{62}
\indexentry{Inference|hyperpage}{62}
\indexentry{参数估计|hyperpage}{63}
\indexentry{Parameter Estimation|hyperpage}{63}
\indexentry{偏置|hyperpage}{63}
\indexentry{Bias|hyperpage}{63}
\indexentry{语言模型|hyperpage}{67}
\indexentry{Language Model|hyperpage}{67}
\indexentry{语言建模|hyperpage}{67}
\indexentry{Language Modeling|hyperpage}{67}
\indexentry{极大似然估计|hyperpage}{68}
\indexentry{人工神经网络方法|hyperpage}{68}
\indexentry{未登录词|hyperpage}{69}
\indexentry{Out-of-Vocabulary Word,OOV Word|hyperpage}{69}
\indexentry{加法平滑|hyperpage}{70}
\indexentry{Additive Smoothing|hyperpage}{70}
\indexentry{古德-图灵估计法|hyperpage}{71}
\indexentry{Good-Turing Estimate|hyperpage}{71}
\indexentry{句法|hyperpage}{74}
\indexentry{Syntax|hyperpage}{74}
\indexentry{短语结构分析|hyperpage}{74}
\indexentry{Phrase Structure Parsing|hyperpage}{74}
\indexentry{依存分析|hyperpage}{74}
\indexentry{Dependency Parsing|hyperpage}{74}
\indexentry{成分分析|hyperpage}{75}
\indexentry{完全分析|hyperpage}{75}
\indexentry{Full Parsing|hyperpage}{75}
\indexentry{终结符|hyperpage}{75}
\indexentry{Terminal|hyperpage}{75}
\indexentry{预终结符|hyperpage}{75}
\indexentry{Pre-terminal|hyperpage}{75}
\indexentry{非终结符|hyperpage}{75}
\indexentry{Non-terminal|hyperpage}{75}
\indexentry{上下文无关文法|hyperpage}{76}
\indexentry{Context-Free Grammar|hyperpage}{76}
\indexentry{产生式规则|hyperpage}{77}
\indexentry{Production Rule|hyperpage}{77}
\indexentry{推导|hyperpage}{78}
\indexentry{Derivation|hyperpage}{78}
\indexentry{句子|hyperpage}{78}
\indexentry{Sentence|hyperpage}{78}
\indexentry{语言|hyperpage}{78}
\indexentry{Language|hyperpage}{78}
\indexentry{歧义|hyperpage}{78}
\indexentry{Ambiguity|hyperpage}{78}
\indexentry{消歧|hyperpage}{79}
\indexentry{Disambiguation|hyperpage}{79}
\indexentry{最左优先推导|hyperpage}{79}
\indexentry{Left-most Derivation|hyperpage}{79}
\indexentry{概率上下文无关文法|hyperpage}{80}
\indexentry{Probabilistic Context-Free Grammar|hyperpage}{80}
\indexentry{树库|hyperpage}{81}
\indexentry{Treebank|hyperpage}{81}
\indexentry{生成模型|hyperpage}{82}
\indexentry{Generative Model|hyperpage}{82}
\indexentry{判别模型|hyperpage}{82}
\indexentry{Discriminative Model|hyperpage}{82}
\indexentry{流畅度|hyperpage}{88}
\indexentry{Fluency|hyperpage}{88}
\indexentry{准确性|hyperpage}{88}
\indexentry{Accuracy|hyperpage}{88}
\indexentry{充分性|hyperpage}{88}
\indexentry{Adequacy|hyperpage}{88}
\indexentry{翻译候选|hyperpage}{89}
\indexentry{Translation Candidate|hyperpage}{89}
\indexentry{训练|hyperpage}{91}
\indexentry{Training|hyperpage}{91}
\indexentry{解码|hyperpage}{91}
\indexentry{Decoding|hyperpage}{91}
\indexentry{推断|hyperpage}{91}
\indexentry{Inference|hyperpage}{91}
\indexentry{词对齐|hyperpage}{96}
\indexentry{Word Alignment|hyperpage}{96}
\indexentry{词对齐连接|hyperpage}{96}
\indexentry{解码|hyperpage}{99}
\indexentry{Decoding|hyperpage}{99}
\indexentry{噪声信道模型|hyperpage}{102}
\indexentry{Noise Channel Model|hyperpage}{102}
\indexentry{词对齐|hyperpage}{105}
\indexentry{Word Alignment|hyperpage}{105}
\indexentry{非对称的词对齐|hyperpage}{105}
\indexentry{Asymmetric Word Alignment|hyperpage}{105}
\indexentry{空对齐|hyperpage}{105}
\indexentry{拉格朗日乘数法|hyperpage}{113}
\indexentry{The Lagrange Multiplier Method|hyperpage}{113}
\indexentry{期望最大化|hyperpage}{115}
\indexentry{Expectation Maximization|hyperpage}{115}
\indexentry{期望频次|hyperpage}{116}
\indexentry{Expected Count|hyperpage}{116}
\indexentry{产出率|hyperpage}{119}
\indexentry{繁衍率|hyperpage}{119}
\indexentry{Fertility|hyperpage}{119}
\indexentry{扭曲度|hyperpage}{122}
\indexentry{Distortion|hyperpage}{122}
\indexentry{概念单元|hyperpage}{124}
\indexentry{概念|hyperpage}{124}
\indexentry{Concept|hyperpage}{124}
\indexentry{缺陷|hyperpage}{125}
\indexentry{Deficiency|hyperpage}{125}
\indexentry{凸函数|hyperpage}{129}
\indexentry{Convex function|hyperpage}{129}
\indexentry{对称化|hyperpage}{130}
\indexentry{Symmetrization|hyperpage}{130}
\indexentry{系统偏置|hyperpage}{131}
\indexentry{System Bias|hyperpage}{131}
\indexentry{组合性翻译|hyperpage}{136}
\indexentry{Compositional Translation|hyperpage}{136}
\indexentry{短语|hyperpage}{136}
\indexentry{短语切分|hyperpage}{141}
\indexentry{Phrasal Segmentation|hyperpage}{141}
\indexentry{短语对|hyperpage}{141}
\indexentry{推导|hyperpage}{141}
\indexentry{Derivation|hyperpage}{141}
\indexentry{生成式模型|hyperpage}{144}
\indexentry{Generative Model|hyperpage}{144}
\indexentry{判别式模型|hyperpage}{144}
\indexentry{Discriminative Model|hyperpage}{144}
\indexentry{对数线性模型|hyperpage}{145}
\indexentry{Log-linear Model|hyperpage}{145}
\indexentry{短语抽取|hyperpage}{146}
\indexentry{Phrase Extraction|hyperpage}{146}
\indexentry{词汇化翻译概率|hyperpage}{149}
\indexentry{Lexical Translation Probability|hyperpage}{149}
\indexentry{短语表|hyperpage}{149}
\indexentry{Phrase Table|hyperpage}{149}
\indexentry{调序|hyperpage}{150}
\indexentry{Reordering|hyperpage}{150}
\indexentry{模型训练|hyperpage}{154}
\indexentry{Model Training|hyperpage}{154}
\indexentry{权重调优|hyperpage}{154}
\indexentry{Weight Tuning|hyperpage}{154}
\indexentry{最小错误率训练|hyperpage}{154}
\indexentry{Minimum Error Rate Training|hyperpage}{154}
\indexentry{调优集合|hyperpage}{154}
\indexentry{Tuning Set|hyperpage}{154}
\indexentry{线搜索|hyperpage}{155}
\indexentry{Line Search|hyperpage}{155}
\indexentry{格搜索|hyperpage}{156}
\indexentry{Grid Search|hyperpage}{156}
\indexentry{覆盖度模型|hyperpage}{158}
\indexentry{Coverage Model|hyperpage}{158}
\indexentry{翻译候选|hyperpage}{158}
\indexentry{Translation Candidate|hyperpage}{158}
\indexentry{翻译假设|hyperpage}{158}
\indexentry{Translation Hypothesis|hyperpage}{158}
\indexentry{剪枝|hyperpage}{159}
\indexentry{Pruning|hyperpage}{159}
\indexentry{束剪枝|hyperpage}{159}
\indexentry{Beam Pruning|hyperpage}{159}
\indexentry{直方图剪枝|hyperpage}{160}
\indexentry{Histogram Pruning|hyperpage}{160}
\indexentry{阈值剪枝|hyperpage}{160}
\indexentry{Threshold Pruning|hyperpage}{160}
\indexentry{假设重组|hyperpage}{160}
\indexentry{Hypothesis Recombination|hyperpage}{160}
\indexentry{基于层次短语的模型|hyperpage}{165}
\indexentry{Hierarchical Phrase-based Model|hyperpage}{165}
\indexentry{同步上下文无关文法|hyperpage}{165}
\indexentry{Synchronous Context-free Grammar|hyperpage}{165}
\indexentry{基于层次短语的文法|hyperpage}{166}
\indexentry{Hierarchical Phrase-based Grammar|hyperpage}{166}
\indexentry{推导|hyperpage}{167}
\indexentry{Derivation|hyperpage}{167}
\indexentry{胶水规则|hyperpage}{167}
\indexentry{Glue Rule|hyperpage}{167}
\indexentry{乔姆斯基范式|hyperpage}{171}
\indexentry{Chomsky Normal Form|hyperpage}{171}
\indexentry{跨度|hyperpage}{171}
\indexentry{Span|hyperpage}{171}
\indexentry{自下而上的分析|hyperpage}{172}
\indexentry{Top-down Parsing|hyperpage}{172}
\indexentry{束剪枝|hyperpage}{174}
\indexentry{Beam Pruning|hyperpage}{174}
\indexentry{立方剪枝|hyperpage}{176}
\indexentry{Cube Pruning|hyperpage}{176}
\indexentry{序列化|hyperpage}{179}
\indexentry{线性化|hyperpage}{179}
\indexentry{Linearization|hyperpage}{179}
\indexentry{树到串翻译规则|hyperpage}{181}
\indexentry{Tree-to-String Translation Rule|hyperpage}{181}
\indexentry{树到树翻译规则|hyperpage}{181}
\indexentry{Tree-to-Tree Translation Rule|hyperpage}{181}
\indexentry{树片段|hyperpage}{182}
\indexentry{Tree Fragment|hyperpage}{182}
\indexentry{同步树替换文法规则|hyperpage}{183}
\indexentry{Synchronous Tree Substitution Grammar Rule|hyperpage}{183}
\indexentry{边缘集合|hyperpage}{189}
\indexentry{Frontier Set|hyperpage}{189}
\indexentry{最小规则|hyperpage}{190}
\indexentry{Minimal Rules|hyperpage}{190}
\indexentry{二叉化|hyperpage}{193}
\indexentry{Binarization|hyperpage}{193}
\indexentry{基于短语的特征|hyperpage}{198}
\indexentry{基于句法的特征|hyperpage}{198}
\indexentry{有向超图|hyperpage}{199}
\indexentry{Directed Hyper-graph|hyperpage}{199}
\indexentry{超边|hyperpage}{199}
\indexentry{Hyper-edge|hyperpage}{199}
\indexentry{半环分析|hyperpage}{199}
\indexentry{Semi-ring Parsing|hyperpage}{199}
\indexentry{组合|hyperpage}{201}
\indexentry{Composition|hyperpage}{201}
\indexentry{基于串的解码|hyperpage}{202}
\indexentry{String-based Decoding|hyperpage}{202}
\indexentry{基于树的解码|hyperpage}{202}
\indexentry{Tree-based Decoding|hyperpage}{202}
\indexentry{Lexicalized Norm Form|hyperpage}{205}
\indexentry{人工神经网络|hyperpage}{211}
\indexentry{Artificial Neural Networks|hyperpage}{211}
\indexentry{神经网络|hyperpage}{211}
\indexentry{Neural Networks|hyperpage}{211}
\indexentry{深度学习|hyperpage}{212}
\indexentry{Deep Learning|hyperpage}{212}
\indexentry{连接主义|hyperpage}{213}
\indexentry{Connectionism|hyperpage}{213}
\indexentry{分布式表示|hyperpage}{213}
\indexentry{Distributed representation|hyperpage}{213}
\indexentry{符号主义|hyperpage}{213}
\indexentry{Symbolicism|hyperpage}{213}
\indexentry{端到端学习|hyperpage}{215}
\indexentry{End-to-End Learning|hyperpage}{215}
\indexentry{表示学习|hyperpage}{215}
\indexentry{Representation Learning|hyperpage}{215}
\indexentry{分布式表示|hyperpage}{216}
\indexentry{Distributed Representation|hyperpage}{216}
\indexentry{标量|hyperpage}{217}
\indexentry{Scalar|hyperpage}{217}
\indexentry{向量|hyperpage}{217}
\indexentry{Vector|hyperpage}{217}
\indexentry{矩阵|hyperpage}{217}
\indexentry{Matrix|hyperpage}{217}
\indexentry{转置|hyperpage}{218}
\indexentry{Transpose|hyperpage}{218}
\indexentry{按元素加法|hyperpage}{218}
\indexentry{Element-wise Addition|hyperpage}{218}
\indexentry{数乘|hyperpage}{219}
\indexentry{Scalar Multiplication|hyperpage}{219}
\indexentry{按元素乘积|hyperpage}{220}
\indexentry{Element-wise Product|hyperpage}{220}
\indexentry{线性映射|hyperpage}{220}
\indexentry{Linear Mapping|hyperpage}{220}
\indexentry{线性变换|hyperpage}{220}
\indexentry{Linear Transformation|hyperpage}{220}
\indexentry{范数|hyperpage}{221}
\indexentry{Norm|hyperpage}{221}
\indexentry{欧几里得范数|hyperpage}{222}
\indexentry{Euclidean Norm|hyperpage}{222}
\indexentry{Frobenius 范数|hyperpage}{222}
\indexentry{Frobenius Norm|hyperpage}{222}
\indexentry{权重|hyperpage}{223}
\indexentry{weight|hyperpage}{223}
\indexentry{张量|hyperpage}{234}
\indexentry{Tensor|hyperpage}{234}
\indexentry{阶|hyperpage}{234}
\indexentry{Rank|hyperpage}{234}
\indexentry{广播机制|hyperpage}{237}
\indexentry{向量化|hyperpage}{237}
\indexentry{Vectorization|hyperpage}{237}
\indexentry{前向传播|hyperpage}{240}
\indexentry{计算图|hyperpage}{243}
\indexentry{Computation Graph|hyperpage}{243}
\indexentry{模型参数|hyperpage}{244}
\indexentry{Model Parameters|hyperpage}{244}
\indexentry{训练|hyperpage}{244}
\indexentry{Training|hyperpage}{244}
\indexentry{有标注数据|hyperpage}{244}
\indexentry{Annotated Data/Labeled Data|hyperpage}{244}
\indexentry{有指导的训练|hyperpage}{244}
\indexentry{有监督的训练|hyperpage}{244}
\indexentry{Supervised Training|hyperpage}{244}
\indexentry{训练数据集合|hyperpage}{245}
\indexentry{Training Data Set|hyperpage}{245}
\indexentry{损失函数|hyperpage}{245}
\indexentry{Loss Function|hyperpage}{245}
\indexentry{目标函数|hyperpage}{245}
\indexentry{Objective Function|hyperpage}{245}
\indexentry{代价函数|hyperpage}{246}
\indexentry{Cost Function|hyperpage}{246}
\indexentry{梯度下降方法|hyperpage}{246}
\indexentry{Gradient Descent Method|hyperpage}{246}
\indexentry{参数更新的规则|hyperpage}{247}
\indexentry{Update Rule|hyperpage}{247}
\indexentry{学习率|hyperpage}{247}
\indexentry{Learning Rate|hyperpage}{247}
\indexentry{基于梯度的方法|hyperpage}{247}
\indexentry{Gradient-based Method|hyperpage}{247}
\indexentry{批量梯度下降|hyperpage}{247}
\indexentry{Batch Gradient Descent|hyperpage}{247}
\indexentry{随机梯度下降|hyperpage}{247}
\indexentry{Stochastic Gradient Descent|hyperpage}{247}
\indexentry{小批量梯度下降|hyperpage}{248}
\indexentry{Mini-Batch Gradient Descent|hyperpage}{248}
\indexentry{数值微分|hyperpage}{248}
\indexentry{Numerical Differentiation|hyperpage}{248}
\indexentry{截断误差|hyperpage}{249}
\indexentry{Truncation Error|hyperpage}{249}
\indexentry{舍入误差|hyperpage}{249}
\indexentry{Round-off Error|hyperpage}{249}
\indexentry{符号微分|hyperpage}{249}
\indexentry{Symbolic Differentiation|hyperpage}{249}
\indexentry{表达式膨胀|hyperpage}{249}
\indexentry{Expression Swell|hyperpage}{249}
\indexentry{自动微分|hyperpage}{249}
\indexentry{Automatic Differentiation|hyperpage}{249}
\indexentry{反向模式|hyperpage}{250}
\indexentry{Backward Mode|hyperpage}{250}
\indexentry{学习率|hyperpage}{251}
\indexentry{Learning Rate|hyperpage}{251}
\indexentry{Momentum|hyperpage}{251}
\indexentry{AdaGrad|hyperpage}{252}
\indexentry{衰减|hyperpage}{252}
\indexentry{Decay|hyperpage}{252}
\indexentry{RMSprop|hyperpage}{253}
\indexentry{Adam|hyperpage}{253}
\indexentry{数据并行|hyperpage}{254}
\indexentry{同步更新|hyperpage}{254}
\indexentry{Synchronous Update|hyperpage}{254}
\indexentry{异步更新|hyperpage}{254}
\indexentry{Asynchronous Update|hyperpage}{254}
\indexentry{参数服务器|hyperpage}{254}
\indexentry{Parameter Server|hyperpage}{254}
\indexentry{梯度消失|hyperpage}{256}
\indexentry{Gradient Vanishing|hyperpage}{256}
\indexentry{梯度爆炸|hyperpage}{256}
\indexentry{Gradient Explosion|hyperpage}{256}
\indexentry{梯度裁剪|hyperpage}{257}
\indexentry{Gradient Clipping|hyperpage}{257}
\indexentry{批量归一化|hyperpage}{258}
\indexentry{Batch Normalization|hyperpage}{258}
\indexentry{层归一化|hyperpage}{258}
\indexentry{Layer Normalization|hyperpage}{258}
\indexentry{残差网络|hyperpage}{258}
\indexentry{Residual Networks|hyperpage}{258}
\indexentry{跳接|hyperpage}{258}
\indexentry{Shortcut Connection|hyperpage}{258}
\indexentry{过拟合|hyperpage}{259}
\indexentry{Overfitting|hyperpage}{259}
\indexentry{正则化|hyperpage}{259}
\indexentry{Regularization|hyperpage}{259}
\indexentry{反向传播|hyperpage}{260}
\indexentry{back propagation|hyperpage}{260}
\indexentry{神经语言模型|hyperpage}{266}
\indexentry{Neural Language Model|hyperpage}{266}
\indexentry{前馈神经网络语言模型|hyperpage}{267}
\indexentry{Feed-forward Neural Network Language Model|hyperpage}{267}
\indexentry{循环神经网络|hyperpage}{269}
\indexentry{Recurrent Neural Network|hyperpage}{269}
\indexentry{循环神经网络语言模型|hyperpage}{269}
\indexentry{RNNLM|hyperpage}{269}
\indexentry{循环单元|hyperpage}{269}
\indexentry{RNN Cell|hyperpage}{269}
\indexentry{自注意力机制|hyperpage}{270}
\indexentry{Self-Attention Mechanism|hyperpage}{271}
\indexentry{注意力权重|hyperpage}{271}
\indexentry{Attention Weight|hyperpage}{271}
\indexentry{困惑度|hyperpage}{271}
\indexentry{Perplexity|hyperpage}{271}
\indexentry{One-hot编码|hyperpage}{272}
\indexentry{独热编码|hyperpage}{272}
\indexentry{分布式表示|hyperpage}{272}
\indexentry{Distributed Representation|hyperpage}{272}
\indexentry{词嵌入|hyperpage}{272}
\indexentry{Word Embedding|hyperpage}{272}
\indexentry{句子表示模型|hyperpage}{275}
\indexentry{句子的表示|hyperpage}{275}
\indexentry{表示学习|hyperpage}{275}
\indexentry{Representation Learning|hyperpage}{275}
\indexentry{可解释机器学习|hyperpage}{279}
\indexentry{Explainable Machine Learning|hyperpage}{279}
\indexentry{神经机器翻译|hyperpage}{281}
\indexentry{Neural Machine Translation|hyperpage}{281}
\indexentry{分布式表示|hyperpage}{283}
\indexentry{Distributed Representation|hyperpage}{283}
\indexentry{特征工程|hyperpage}{289}
\indexentry{Feature Engineering|hyperpage}{289}
\indexentry{编码器-解码器模型|hyperpage}{290}
\indexentry{Encoder-Decoder Paradigm|hyperpage}{290}
\indexentry{编码器-解码器框架|hyperpage}{290}
\indexentry{循环神经网络|hyperpage}{296}
\indexentry{Recurrent Neural Network, RNN|hyperpage}{296}
\indexentry{词嵌入|hyperpage}{298}
\indexentry{Word Embedding|hyperpage}{298}
\indexentry{表示学习|hyperpage}{298}
\indexentry{Representation Learning|hyperpage}{298}
\indexentry{生成|hyperpage}{298}
\indexentry{Generation|hyperpage}{298}
\indexentry{长短时记忆|hyperpage}{302}
\indexentry{Long Short-Term Memory|hyperpage}{302}
\indexentry{遗忘|hyperpage}{302}
\indexentry{记忆更新|hyperpage}{303}
\indexentry{输出|hyperpage}{304}
\indexentry{门循环单元|hyperpage}{304}
\indexentry{Gated Recurrent Unit,GRU|hyperpage}{304}
\indexentry{注意力权重|hyperpage}{310}
\indexentry{Attention Weight|hyperpage}{310}
\indexentry{一阶矩估计|hyperpage}{316}
\indexentry{First Moment Estimation|hyperpage}{316}
\indexentry{二阶矩估计|hyperpage}{316}
\indexentry{Second Moment Estimation|hyperpage}{316}
\indexentry{学习率|hyperpage}{316}
\indexentry{Learning Rate|hyperpage}{316}
\indexentry{逐渐预热|hyperpage}{317}
\indexentry{Gradual Warmup|hyperpage}{317}
\indexentry{分段常数衰减|hyperpage}{318}
\indexentry{Piecewise Constant Decay|hyperpage}{318}
\indexentry{数据并行|hyperpage}{318}
\indexentry{模型并行|hyperpage}{318}
\indexentry{全搜索|hyperpage}{319}
\indexentry{Full Search|hyperpage}{319}
\indexentry{贪婪搜索|hyperpage}{321}
\indexentry{Greedy Search|hyperpage}{321}
\indexentry{束搜索|hyperpage}{321}
\indexentry{Beam Search|hyperpage}{321}
\indexentry{自回归模型|hyperpage}{321}
\indexentry{Autoregressive Model|hyperpage}{321}
\indexentry{非自回归模型|hyperpage}{321}
\indexentry{Non-autoregressive Model|hyperpage}{321}
\indexentry{自注意力机制|hyperpage}{327}
\indexentry{Self-Attention|hyperpage}{327}
\indexentry{特征提取|hyperpage}{328}
\indexentry{自注意力子层|hyperpage}{328}
\indexentry{Self-attention Sub-layer|hyperpage}{328}
\indexentry{前馈神经网络子层|hyperpage}{328}
\indexentry{Feed-forward Sub-layer|hyperpage}{328}
\indexentry{残差连接|hyperpage}{328}
\indexentry{Residual Connection|hyperpage}{328}
\indexentry{层正则化|hyperpage}{329}
\indexentry{Layer Normalization|hyperpage}{329}
\indexentry{编码-解码注意力子层|hyperpage}{329}
\indexentry{Encoder-decoder Attention Sub-layer|hyperpage}{329}
\indexentry{词嵌入|hyperpage}{330}
\indexentry{Word Embedding|hyperpage}{330}
\indexentry{位置编码|hyperpage}{330}
\indexentry{Position Embedding|hyperpage}{330}
\indexentry{点乘注意力|hyperpage}{333}
\indexentry{Scaled Dot-Product Attention|hyperpage}{333}
\indexentry{多头注意力|hyperpage}{336}
\indexentry{Multi-head Attention|hyperpage}{336}
\indexentry{残差连接|hyperpage}{337}
\indexentry{短连接|hyperpage}{337}
\indexentry{Short-cut Connection|hyperpage}{337}
\indexentry{后正则化|hyperpage}{338}
\indexentry{Post-norm|hyperpage}{338}
\indexentry{前正则化|hyperpage}{338}
\indexentry{Pre-norm|hyperpage}{338}
\indexentry{交叉熵损失|hyperpage}{339}
\indexentry{Cross Entropy Loss|hyperpage}{339}
\indexentry{预热|hyperpage}{340}
\indexentry{Warmup|hyperpage}{340}
\indexentry{小批量训练|hyperpage}{340}
\indexentry{Mini-batch Training|hyperpage}{340}
\indexentry{Dropout|hyperpage}{340}
\indexentry{过拟合|hyperpage}{341}
\indexentry{Over fitting|hyperpage}{341}
\indexentry{标签平滑|hyperpage}{341}
\indexentry{Label Smoothing|hyperpage}{341}
\indexentry{序列到序列的转换/生成问题|hyperpage}{342}
\indexentry{Sequence-to-Sequence Problem|hyperpage}{342}
\indexentry{未登录词|hyperpage}{353}
\indexentry{Out of Vocabulary Word,OOV Word|hyperpage}{353}
\indexentry{子词切分|hyperpage}{353}
\indexentry{Sub-word Segmentation|hyperpage}{353}
\indexentry{标准化|hyperpage}{353}
\indexentry{Normalization|hyperpage}{353}
\indexentry{数据清洗|hyperpage}{353}
\indexentry{Dada Cleaning|hyperpage}{353}
\indexentry{数据选择|hyperpage}{355}
\indexentry{Data Selection|hyperpage}{355}
\indexentry{数据过滤|hyperpage}{355}
\indexentry{Data Filtering|hyperpage}{355}
\indexentry{开放词表|hyperpage}{358}
\indexentry{Open-Vocabulary|hyperpage}{358}
\indexentry{子词|hyperpage}{359}
\indexentry{Sub-word|hyperpage}{359}
\indexentry{字节对编码|hyperpage}{359}
\indexentry{双字节编码|hyperpage}{359}
\indexentry{Byte Pair Encoding,BPE|hyperpage}{359}
\indexentry{正则化|hyperpage}{362}
\indexentry{Regularization|hyperpage}{362}
\indexentry{过拟合问题|hyperpage}{362}
\indexentry{Overfitting Problem|hyperpage}{362}
\indexentry{反问题|hyperpage}{362}
\indexentry{Inverse Problem|hyperpage}{362}
\indexentry{适定的|hyperpage}{362}
\indexentry{Well-posed|hyperpage}{362}
\indexentry{不适定问题|hyperpage}{362}
\indexentry{Ill-posed Problem|hyperpage}{362}
\indexentry{降噪|hyperpage}{363}
\indexentry{Denoising|hyperpage}{363}
\indexentry{泛化|hyperpage}{363}
\indexentry{Generalization|hyperpage}{363}
\indexentry{标签平滑|hyperpage}{365}
\indexentry{Label Smoothing|hyperpage}{365}
\indexentry{相互适应|hyperpage}{366}
\indexentry{Co-Adaptation|hyperpage}{366}
\indexentry{集成学习|hyperpage}{367}
\indexentry{Ensemble Learning|hyperpage}{367}
\indexentry{容量|hyperpage}{368}
\indexentry{Capacity|hyperpage}{368}
\indexentry{宽残差网络|hyperpage}{369}
\indexentry{Wide Residual Network|hyperpage}{369}
\indexentry{探测任务|hyperpage}{370}
\indexentry{Probing Task|hyperpage}{370}
\indexentry{表面信息|hyperpage}{370}
\indexentry{Surface Information|hyperpage}{370}
\indexentry{语法信息|hyperpage}{370}
\indexentry{Syntactic Information|hyperpage}{370}
\indexentry{语义信息|hyperpage}{370}
\indexentry{Semantic Information|hyperpage}{370}
\indexentry{词嵌入|hyperpage}{371}
\indexentry{Embedding|hyperpage}{371}
\indexentry{数据并行|hyperpage}{371}
\indexentry{Data Parallelism|hyperpage}{371}
\indexentry{模型并行|hyperpage}{371}
\indexentry{Model Parallelism|hyperpage}{371}
\indexentry{小批量训练|hyperpage}{371}
\indexentry{Mini-batch Training|hyperpage}{371}
\indexentry{课程学习|hyperpage}{374}
\indexentry{Curriculum Learning|hyperpage}{374}
\indexentry{推断|hyperpage}{374}
\indexentry{Inference|hyperpage}{374}
\indexentry{解码|hyperpage}{374}
\indexentry{Decoding|hyperpage}{374}
\indexentry{准确性|hyperpage}{374}
\indexentry{Accuracy|hyperpage}{374}
\indexentry{时延|hyperpage}{374}
\indexentry{Latency|hyperpage}{374}
\indexentry{时延|hyperpage}{374}
\indexentry{Memory|hyperpage}{374}
\indexentry{搜索错误|hyperpage}{374}
\indexentry{Search Error|hyperpage}{374}
\indexentry{模型错误|hyperpage}{374}
\indexentry{Modeling Error|hyperpage}{374}
\indexentry{重排序|hyperpage}{376}
\indexentry{Re-ranking|hyperpage}{376}
\indexentry{双向推断|hyperpage}{376}
\indexentry{Bidirectional Inference|hyperpage}{376}
\indexentry{批量推断|hyperpage}{380}
\indexentry{Batch Inference|hyperpage}{380}
\indexentry{批量处理|hyperpage}{380}
\indexentry{Batching|hyperpage}{380}
\indexentry{二值网络|hyperpage}{382}
\indexentry{Binarized Neural Networks|hyperpage}{382}
\indexentry{自回归翻译|hyperpage}{382}
\indexentry{Autoregressive Translation|hyperpage}{382}
\indexentry{非自回归翻译|hyperpage}{382}
\indexentry{Regressive Translation|hyperpage}{382}
\indexentry{繁衍率|hyperpage}{382}
\indexentry{Fertility|hyperpage}{382}
\indexentry{偏置|hyperpage}{384}
\indexentry{Bias|hyperpage}{384}
\indexentry{退化|hyperpage}{384}
\indexentry{Degenerate|hyperpage}{384}
\indexentry{过翻译|hyperpage}{385}
\indexentry{Over Translation|hyperpage}{385}
\indexentry{欠翻译|hyperpage}{385}
\indexentry{Under Translation|hyperpage}{385}
\indexentry{充分性|hyperpage}{386}
\indexentry{Adequacy|hyperpage}{386}
\indexentry{系统融合|hyperpage}{386}
\indexentry{System Combination|hyperpage}{386}
\indexentry{假设选择|hyperpage}{387}
\indexentry{Hypothesis Selection|hyperpage}{387}
\indexentry{多样性|hyperpage}{387}
\indexentry{Diversity|hyperpage}{387}
\indexentry{重排序|hyperpage}{388}
\indexentry{Re-ranking|hyperpage}{388}
\indexentry{混淆网络|hyperpage}{389}
\indexentry{Confusion Network|hyperpage}{389}
\indexentry{动态线性层聚合方法|hyperpage}{393}
\indexentry{Dynamic Linear Combination of Layers,DLCL|hyperpage}{393}
\indexentry{相互适应|hyperpage}{397}
\indexentry{Co-adaptation|hyperpage}{397}
\indexentry{数据增强|hyperpage}{399}
\indexentry{Data Augmentation|hyperpage}{399}
\indexentry{回译|hyperpage}{399}
\indexentry{Back Translation|hyperpage}{399}
\indexentry{迭代式回译|hyperpage}{400}
\indexentry{Iterative Back Translation|hyperpage}{400}
\indexentry{前向翻译|hyperpage}{400}
\indexentry{Forward Translation|hyperpage}{400}
\indexentry{预训练|hyperpage}{401}
\indexentry{Pre-training|hyperpage}{401}
\indexentry{微调|hyperpage}{401}
\indexentry{Fine-tuning|hyperpage}{401}
\indexentry{多任务学习|hyperpage}{402}
\indexentry{Multitask Learning|hyperpage}{402}
\indexentry{模型压缩|hyperpage}{404}
\indexentry{Model Compression|hyperpage}{404}
\indexentry{学习难度|hyperpage}{404}
\indexentry{Learning Difficulty|hyperpage}{404}
\indexentry{教师模型|hyperpage}{405}
\indexentry{Teacher Model|hyperpage}{405}
\indexentry{学生模型|hyperpage}{405}
\indexentry{Student Model|hyperpage}{405}
\indexentry{基于单词的知识精炼|hyperpage}{405}
\indexentry{Word-level Knowledge Distillation|hyperpage}{405}
\indexentry{基于序列的知识精炼|hyperpage}{405}
\indexentry{Sequence-level Knowledge Distillation|hyperpage}{405}
\indexentry{中间层输出|hyperpage}{406}
\indexentry{Hint-based Knowledge Transfer|hyperpage}{406}
\indexentry{注意力分布|hyperpage}{406}
\indexentry{Attention To Attention Transfer|hyperpage}{406}
\indexentry{循环一致性|hyperpage}{409}
\indexentry{Circle Consistency|hyperpage}{409}
\indexentry{翻译中回译|hyperpage}{410}
\indexentry{On-the-fly Back-translation|hyperpage}{410}
\indexentry{网络结构搜索技术|hyperpage}{412}
\indexentry{Neural Architecture Search;NAS|hyperpage}{412}
...@@ -623,9 +623,9 @@ ...@@ -623,9 +623,9 @@
\defcounter {refsection}{0}\relax \defcounter {refsection}{0}\relax
\contentsline {subsection}{\numberline {7.4.1}推断优化}{374}{subsection.7.4.1} \contentsline {subsection}{\numberline {7.4.1}推断优化}{374}{subsection.7.4.1}
\defcounter {refsection}{0}\relax \defcounter {refsection}{0}\relax
\contentsline {subsubsection}{推断系统的架构}{374}{section*.457} \contentsline {subsubsection}{推断系统的架构}{375}{section*.457}
\defcounter {refsection}{0}\relax \defcounter {refsection}{0}\relax
\contentsline {subsubsection}{自左向右推断 vs 自右向左推断}{375}{section*.459} \contentsline {subsubsection}{自左向右推断 vs 自右向左推断}{376}{section*.459}
\defcounter {refsection}{0}\relax \defcounter {refsection}{0}\relax
\contentsline {subsubsection}{推断加速}{376}{section*.460} \contentsline {subsubsection}{推断加速}{376}{section*.460}
\defcounter {refsection}{0}\relax \defcounter {refsection}{0}\relax
...@@ -649,7 +649,7 @@ ...@@ -649,7 +649,7 @@
\defcounter {refsection}{0}\relax \defcounter {refsection}{0}\relax
\contentsline {subsection}{\numberline {7.5.1}深层模型}{390}{subsection.7.5.1} \contentsline {subsection}{\numberline {7.5.1}深层模型}{390}{subsection.7.5.1}
\defcounter {refsection}{0}\relax \defcounter {refsection}{0}\relax
\contentsline {subsubsection}{Post-Norm vs Pre-Norm}{390}{section*.477} \contentsline {subsubsection}{Post-Norm vs Pre-Norm}{391}{section*.477}
\defcounter {refsection}{0}\relax \defcounter {refsection}{0}\relax
\contentsline {subsubsection}{层聚合}{393}{section*.480} \contentsline {subsubsection}{层聚合}{393}{section*.480}
\defcounter {refsection}{0}\relax \defcounter {refsection}{0}\relax
...@@ -663,17 +663,17 @@ ...@@ -663,17 +663,17 @@
\defcounter {refsection}{0}\relax \defcounter {refsection}{0}\relax
\contentsline {subsubsection}{深层模型的鲁棒性训练}{397}{section*.489} \contentsline {subsubsection}{深层模型的鲁棒性训练}{397}{section*.489}
\defcounter {refsection}{0}\relax \defcounter {refsection}{0}\relax
\contentsline {subsection}{\numberline {7.5.2}单语数据的使用}{399}{subsection.7.5.2} \contentsline {subsection}{\numberline {7.5.2}单语数据的使用}{398}{subsection.7.5.2}
\defcounter {refsection}{0}\relax \defcounter {refsection}{0}\relax
\contentsline {subsubsection}{伪数据}{399}{section*.492} \contentsline {subsubsection}{伪数据}{399}{section*.492}
\defcounter {refsection}{0}\relax \defcounter {refsection}{0}\relax
\contentsline {subsubsection}{预训练}{400}{section*.495} \contentsline {subsubsection}{预训练}{401}{section*.495}
\defcounter {refsection}{0}\relax \defcounter {refsection}{0}\relax
\contentsline {subsubsection}{联合训练}{402}{section*.498} \contentsline {subsubsection}{联合训练}{402}{section*.498}
\defcounter {refsection}{0}\relax \defcounter {refsection}{0}\relax
\contentsline {subsection}{\numberline {7.5.3}知识精炼}{403}{subsection.7.5.3} \contentsline {subsection}{\numberline {7.5.3}知识精炼}{403}{subsection.7.5.3}
\defcounter {refsection}{0}\relax \defcounter {refsection}{0}\relax
\contentsline {subsubsection}{什么是知识精炼}{403}{section*.500} \contentsline {subsubsection}{什么是知识精炼}{404}{section*.500}
\defcounter {refsection}{0}\relax \defcounter {refsection}{0}\relax
\contentsline {subsubsection}{知识精炼的基本方法}{405}{section*.501} \contentsline {subsubsection}{知识精炼的基本方法}{405}{section*.501}
\defcounter {refsection}{0}\relax \defcounter {refsection}{0}\relax
......
...@@ -76,13 +76,13 @@ ...@@ -76,13 +76,13 @@
~\vfill ~\vfill
\thispagestyle{empty} \thispagestyle{empty}
\noindent Copyright \copyright\ 2020 肖桐\ \ 朱靖波\\ % Copyright notice \noindent Copyright \copyright\ 2020 肖桐\ \ 朱靖波\\
\noindent \textsc{东北大学自然语言处理实验室\ /\ 小牛翻译}\\ % Publisher \noindent \textsc{东北大学自然语言处理实验室\ /\ 小牛翻译}\\
\noindent \textsc{\url{https://github.com/NiuTrans/MTBook}}\\ % URL \noindent \textsc{\url{https://github.com/NiuTrans/MTBook}}\\
\noindent {\red{Licensed under the Creative Commons Attribution-NonCommercial 4.0 Unported License (the ``License''). You may not use this file except in compliance with the License. You may obtain a copy of the License at \url{http://creativecommons.org/licenses/by-nc/4.0}. Unless required by applicable law or agreed to in writing, software distributed under the License is distributed on an \textsc{``as is'' basis, without warranties or conditions of any kind}, either express or implied. See the License for the specific language governing permissions and limitations under the License.}}\\ % License information, replace this with your own license (if any) \noindent {\red{Licensed under the Creative Commons Attribution-NonCommercial 4.0 Unported License (the ``License''). You may not use this file except in compliance with the License. You may obtain a copy of the License at \url{http://creativecommons.org/licenses/by-nc/4.0}. Unless required by applicable law or agreed to in writing, software distributed under the License is distributed on an \textsc{``as is'' basis, without warranties or conditions of any kind}, either express or implied. See the License for the specific language governing permissions and limitations under the License.}}\\
\noindent \textit{First Edition, April 2020} \noindent \textit{First Edition, April 2020}
...@@ -121,14 +121,14 @@ ...@@ -121,14 +121,14 @@
% CHAPTERS % CHAPTERS
%---------------------------------------------------------------------------------------- %----------------------------------------------------------------------------------------
%\include{Chapter1/chapter1} \include{Chapter1/chapter1}
%\include{Chapter2/chapter2} \include{Chapter2/chapter2}
%\include{Chapter3/chapter3} \include{Chapter3/chapter3}
%\include{Chapter4/chapter4} \include{Chapter4/chapter4}
%\include{Chapter5/chapter5} \include{Chapter5/chapter5}
%\include{Chapter6/chapter6} \include{Chapter6/chapter6}
%\include{Chapter7/chapter7} \include{Chapter7/chapter7}
%\include{ChapterAppend/chapterappend} \include{ChapterAppend/chapterappend}
%---------------------------------------------------------------------------------------- %----------------------------------------------------------------------------------------
......
Markdown 格式
0%
您添加了 0 到此讨论。请谨慎行事。
请先完成此评论的编辑!
注册 或者 后发表评论