Commit d85642f9 by zengxin

figure

parent 0b980300
...@@ -45,7 +45,7 @@ ...@@ -45,7 +45,7 @@
%---------------------------------------------- %----------------------------------------------
\begin{figure}[htp] \begin{figure}[htp]
\centering \centering
\input{./Chapter1/Figures/figure-Required-parts-of-MT} \input{./Chapter1/Figures/figure-required-parts-of-mt}
\caption{机器翻译系统的组成} \caption{机器翻译系统的组成}
\label{fig:1-2} \label{fig:1-2}
\end{figure} \end{figure}
...@@ -220,7 +220,7 @@ ...@@ -220,7 +220,7 @@
%---------------------------------------------- %----------------------------------------------
\begin{figure}[htp] \begin{figure}[htp]
\centering \centering
\input{./Chapter1/Figures/figure-Example-RBMT} \input{./Chapter1/Figures/figure-example-rbmt}
\setlength{\belowcaptionskip}{-1.5em} \setlength{\belowcaptionskip}{-1.5em}
\caption{基于规则的机器翻译的示例图(左:规则库;右:规则匹配结果)} \caption{基于规则的机器翻译的示例图(左:规则库;右:规则匹配结果)}
\label{fig:1-8} \label{fig:1-8}
...@@ -290,7 +290,7 @@ ...@@ -290,7 +290,7 @@
%---------------------------------------------- %----------------------------------------------
\begin{figure}[htp] \begin{figure}[htp]
\centering \centering
\input{./Chapter1/Figures/figure-Example-SMT} \input{./Chapter1/Figures/figure-example-smt}
\caption{统计机器翻译的示例图(左:语料资源;中:翻译模型与语言模型;右:翻译假设与翻译引擎)} \caption{统计机器翻译的示例图(左:语料资源;中:翻译模型与语言模型;右:翻译假设与翻译引擎)}
\label{fig:1-11} \label{fig:1-11}
\end{figure} \end{figure}
...@@ -311,7 +311,7 @@ ...@@ -311,7 +311,7 @@
%---------------------------------------------- %----------------------------------------------
\begin{figure}[htp] \begin{figure}[htp]
\centering \centering
\input{./Chapter1/Figures/figure-Example-NMT} \input{./Chapter1/Figures/figure-example-nmt}
\caption{神经机器翻译的示例图(左:编码器-解码器网络;右:编码器示例网络)} \caption{神经机器翻译的示例图(左:编码器-解码器网络;右:编码器示例网络)}
\label{fig:1-12} \label{fig:1-12}
\end{figure} \end{figure}
......
...@@ -35,8 +35,8 @@ ...@@ -35,8 +35,8 @@
%---------------------------------------------- %----------------------------------------------
\begin{figure}[htp] \begin{figure}[htp]
\centering \centering
\subfigure[机器翻译系统被看作一个黑盒] {\input{./Chapter2/Figures/figure-MT-system-as-a-black-box} } \subfigure[机器翻译系统被看作一个黑盒] {\input{./Chapter2/Figures/figure-mt-system-as-a-black-box} }
\subfigure[机器翻系统 = 前/后处理 + 翻译引擎] {\input{./Chapter2/Figures/figure-MT=language-analysis+translation-engine}} \subfigure[机器翻系统 = 前/后处理 + 翻译引擎] {\input{./Chapter2/Figures/figure-mt=language-analysis+translation-engine}}
\caption{机器翻译系统的结构} \caption{机器翻译系统的结构}
\label{fig:2-1} \label{fig:2-1}
\end{figure} \end{figure}
...@@ -125,7 +125,7 @@ F(X)=\int_{-\infty}^x f(x)dx ...@@ -125,7 +125,7 @@ F(X)=\int_{-\infty}^x f(x)dx
%---------------------------------------------- %----------------------------------------------
\begin{figure}[htp] \begin{figure}[htp]
\centering \centering
\input{./Chapter2/Figures/figure-Probability-density-function&Distribution-function} \input{./Chapter2/Figures/figure-probability-density-function&distribution-function}
\caption{一个概率密度函数(左)与其对应的分布函数(右)} \caption{一个概率密度函数(左)与其对应的分布函数(右)}
\label{fig:2-3} \label{fig:2-3}
\end{figure} \end{figure}
...@@ -310,7 +310,7 @@ F(X)=\int_{-\infty}^x f(x)dx ...@@ -310,7 +310,7 @@ F(X)=\int_{-\infty}^x f(x)dx
%---------------------------------------------- %----------------------------------------------
\begin{figure}[htp] \begin{figure}[htp]
\centering \centering
\input{./Chapter2/Figures/figure-Self-information-function} \input{./Chapter2/Figures/figure-self-information-function}
\caption{自信息函数$\textrm{I}(x)$关于$\textrm{P}(x)$的曲线} \caption{自信息函数$\textrm{I}(x)$关于$\textrm{P}(x)$的曲线}
\label{fig:2-6} \label{fig:2-6}
\end{figure} \end{figure}
...@@ -429,7 +429,7 @@ F(X)=\int_{-\infty}^x f(x)dx ...@@ -429,7 +429,7 @@ F(X)=\int_{-\infty}^x f(x)dx
%---------------------------------------------- %----------------------------------------------
\begin{figure}[htp] \begin{figure}[htp]
\centering \centering
\input{./Chapter2/Figures/figure-Example-of-word-segmentation-based-on-dictionary} \input{./Chapter2/Figures/figure-example-of-word-segmentation-based-on-dictionary}
\caption{基于词典进行分词的实例} \caption{基于词典进行分词的实例}
\label{fig:2-8} \label{fig:2-8}
\end{figure} \end{figure}
...@@ -638,7 +638,7 @@ F(X)=\int_{-\infty}^x f(x)dx ...@@ -638,7 +638,7 @@ F(X)=\int_{-\infty}^x f(x)dx
%---------------------------------------------- %----------------------------------------------
\begin{figure}[htp] \begin{figure}[htp]
\centering \centering
\input{./Chapter2/Figures/figure-examples-of-Chinese-word-segmentation-based-on-1-gram-model} \input{./Chapter2/Figures/figure-examples-of-chinese-word-segmentation-based-on-1-gram-model}
\caption{基于1-gram语言模型的中文分词实例} \caption{基于1-gram语言模型的中文分词实例}
\label{fig:2-17} \label{fig:2-17}
\end{figure} \end{figure}
......
...@@ -170,7 +170,7 @@ ...@@ -170,7 +170,7 @@
%---------------------------------------------- %----------------------------------------------
\begin{figure}[htp] \begin{figure}[htp]
\centering \centering
\input{./Chapter3/Figures/figure-processes-SMT} \input{./Chapter3/Figures/figure-processes-smt}
\caption{简单的统计机器翻译流程} \caption{简单的统计机器翻译流程}
\label{fig:3-5} \label{fig:3-5}
\end{figure} \end{figure}
...@@ -472,7 +472,7 @@ g(\mathbf{s},\mathbf{t}) \equiv \prod_{j,i \in \widehat{A}}{\textrm{P}(s_j,t_i)} ...@@ -472,7 +472,7 @@ g(\mathbf{s},\mathbf{t}) \equiv \prod_{j,i \in \widehat{A}}{\textrm{P}(s_j,t_i)}
%---------------------------------------------- %----------------------------------------------
\begin{figure}[htp] \begin{figure}[htp]
\centering \centering
\input{./Chapter3/Figures/figure-greedy-MT-decoding-pseudo-code} \input{./Chapter3/Figures/figure-greedy-mt-decoding-pseudo-code}
\caption{贪婪的机器翻译解码算法的伪代码} \caption{贪婪的机器翻译解码算法的伪代码}
\label{fig:3-10} \label{fig:3-10}
\end{figure} \end{figure}
...@@ -483,8 +483,8 @@ g(\mathbf{s},\mathbf{t}) \equiv \prod_{j,i \in \widehat{A}}{\textrm{P}(s_j,t_i)} ...@@ -483,8 +483,8 @@ g(\mathbf{s},\mathbf{t}) \equiv \prod_{j,i \in \widehat{A}}{\textrm{P}(s_j,t_i)}
%---------------------------------------------- %----------------------------------------------
\begin{figure}[htp] \begin{figure}[htp]
\centering \centering
\subfigure{\input{./Chapter3/Figures/greedy-MT-decoding-process-1}} \subfigure{\input{./Chapter3/Figures/greedy-mt-decoding-process-1}}
\subfigure{\input{./Chapter3/Figures/greedy-MT-decoding-process-3}} \subfigure{\input{./Chapter3/Figures/greedy-mt-decoding-process-3}}
\setlength{\belowcaptionskip}{14.0em} \setlength{\belowcaptionskip}{14.0em}
\caption{贪婪的机器翻译解码过程实例} \caption{贪婪的机器翻译解码过程实例}
\label{fig:3-11} \label{fig:3-11}
......
...@@ -2162,7 +2162,7 @@ d_1 = {d'} \circ {r_5} ...@@ -2162,7 +2162,7 @@ d_1 = {d'} \circ {r_5}
%---------------------------------------------- %----------------------------------------------
\begin{figure}[htp] \begin{figure}[htp]
\centering \centering
\input{./Chapter4/Figures/structure-of-Chart} \input{./Chapter4/Figures/structure-of-chart}
\caption{Chart结构} \caption{Chart结构}
\label{fig:4-65} \label{fig:4-65}
\end{figure} \end{figure}
...@@ -2252,7 +2252,7 @@ d_1 = {d'} \circ {r_5} ...@@ -2252,7 +2252,7 @@ d_1 = {d'} \circ {r_5}
%---------------------------------------------- %----------------------------------------------
\begin{figure}[htp] \begin{figure}[htp]
\centering \centering
\input{./Chapter4/Figures/content-of-Chart-in-tree-based-decoding} \input{./Chapter4/Figures/content-of-chart-in-tree-based-decoding}
\caption{基于树的解码中Chart的内容} \caption{基于树的解码中Chart的内容}
\label{fig:4-68} \label{fig:4-68}
\end{figure} \end{figure}
......
This source diff could not be displayed because it is too large. You can view the blob instead.
...@@ -252,7 +252,7 @@ NMT & $ 21.7^{\ast}$ & $18.7^{\ast}$ & -1 ...@@ -252,7 +252,7 @@ NMT & $ 21.7^{\ast}$ & $18.7^{\ast}$ & -1
% 图3.6 % 图3.6
\begin{figure}[htp] \begin{figure}[htp]
\centering \centering
\input{./Chapter6/Figures/figure-Presentation-space} \input{./Chapter6/Figures/figure-presentation-space}
\caption{统计机器翻译和神经机器翻译的表示空间} \caption{统计机器翻译和神经机器翻译的表示空间}
\label{fig:6-6} \label{fig:6-6}
\end{figure} \end{figure}
...@@ -288,7 +288,7 @@ NMT & $ 21.7^{\ast}$ & $18.7^{\ast}$ & -1 ...@@ -288,7 +288,7 @@ NMT & $ 21.7^{\ast}$ & $18.7^{\ast}$ & -1
% 图3.10 % 图3.10
\begin{figure}[htp] \begin{figure}[htp]
\centering \centering
\input{./Chapter6/Figures/figure-A-working-example-of-neural-machine-translation} \input{./Chapter6/Figures/figure-a-working-example-of-neural-machine-translation}
\caption{神经机器翻译的运行实例} \caption{神经机器翻译的运行实例}
\label{fig:6-7} \label{fig:6-7}
\end{figure} \end{figure}
...@@ -384,7 +384,7 @@ NMT & $ 21.7^{\ast}$ & $18.7^{\ast}$ & -1 ...@@ -384,7 +384,7 @@ NMT & $ 21.7^{\ast}$ & $18.7^{\ast}$ & -1
% 图3.10 % 图3.10
\begin{figure}[htp] \begin{figure}[htp]
\centering \centering
\input{./Chapter6/Figures/figure-Structure-of-a-recurrent-network-model} \input{./Chapter6/Figures/figure-structure-of-a-recurrent-network-model}
\caption{循环网络模型的结构} \caption{循环网络模型的结构}
\label{fig:6-9} \label{fig:6-9}
\end{figure} \end{figure}
...@@ -396,7 +396,7 @@ NMT & $ 21.7^{\ast}$ & $18.7^{\ast}$ & -1 ...@@ -396,7 +396,7 @@ NMT & $ 21.7^{\ast}$ & $18.7^{\ast}$ & -1
% 图3.10 % 图3.10
\begin{figure}[htp] \begin{figure}[htp]
\centering \centering
\input{./Chapter6/Figures/figure-Model-structure-based-on-recurrent-neural-network-translation} \input{./Chapter6/Figures/figure-model-structure-based-on-recurrent-neural-network-translation}
\caption{基于循环神经网络翻译的模型结构} \caption{基于循环神经网络翻译的模型结构}
\label{fig:6-10} \label{fig:6-10}
\end{figure} \end{figure}
...@@ -480,7 +480,7 @@ $\textrm{P}({y_j | \mathbf{s}_{j-1} ,y_{j-1},\mathbf{C}})$由Softmax实现,Sof ...@@ -480,7 +480,7 @@ $\textrm{P}({y_j | \mathbf{s}_{j-1} ,y_{j-1},\mathbf{C}})$由Softmax实现,Sof
% 图3.10 % 图3.10
\begin{figure}[htp] \begin{figure}[htp]
\centering \centering
\input{./Chapter6/Figures/figure-Word-embedding-structure} \input{./Chapter6/Figures/figure-word-embedding-structure}
\caption{词嵌入层结构} \caption{词嵌入层结构}
\label{fig:6-12} \label{fig:6-12}
\end{figure} \end{figure}
...@@ -494,7 +494,7 @@ $\textrm{P}({y_j | \mathbf{s}_{j-1} ,y_{j-1},\mathbf{C}})$由Softmax实现,Sof ...@@ -494,7 +494,7 @@ $\textrm{P}({y_j | \mathbf{s}_{j-1} ,y_{j-1},\mathbf{C}})$由Softmax实现,Sof
% 图3.10 % 图3.10
\begin{figure}[htp] \begin{figure}[htp]
\centering \centering
\input{./Chapter6/Figures/figure-Output-layer-structur} \input{./Chapter6/Figures/figure-output-layer-structur}
\caption{输出层结构} \caption{输出层结构}
\label{fig:6-13} \label{fig:6-13}
\end{figure} \end{figure}
...@@ -525,7 +525,7 @@ $\textrm{P}({y_j | \mathbf{s}_{j-1} ,y_{j-1},\mathbf{C}})$由Softmax实现,Sof ...@@ -525,7 +525,7 @@ $\textrm{P}({y_j | \mathbf{s}_{j-1} ,y_{j-1},\mathbf{C}})$由Softmax实现,Sof
\begin{figure}[htp] \begin{figure}[htp]
\centering \centering
% \includegraphics[scale=0.7]{./Chapter6/Figures/Softmax.png} % \includegraphics[scale=0.7]{./Chapter6/Figures/Softmax.png}
\input{./Chapter6/Figures/figure-Softmax} \input{./Chapter6/Figures/figure-softmax}
\caption{ Softmax函数(一维)所对应的曲线} \caption{ Softmax函数(一维)所对应的曲线}
\label{fig:6-14} \label{fig:6-14}
\end{figure} \end{figure}
...@@ -697,7 +697,7 @@ $\textrm{P}({y_j | \mathbf{s}_{j-1} ,y_{j-1},\mathbf{C}})$由Softmax实现,Sof ...@@ -697,7 +697,7 @@ $\textrm{P}({y_j | \mathbf{s}_{j-1} ,y_{j-1},\mathbf{C}})$由Softmax实现,Sof
% 图3.10 % 图3.10
\begin{figure}[htp] \begin{figure}[htp]
\centering \centering
\input{./Chapter6/Figures/figure-Double-layer-RNN} \hspace{10em} \input{./Chapter6/Figures/figure-double-layer-RNN} \hspace{10em}
\caption{双层循环神经网络} \caption{双层循环神经网络}
\label{fig:6-19} \label{fig:6-19}
\end{figure} \end{figure}
...@@ -744,7 +744,7 @@ $\textrm{P}({y_j | \mathbf{s}_{j-1} ,y_{j-1},\mathbf{C}})$由Softmax实现,Sof ...@@ -744,7 +744,7 @@ $\textrm{P}({y_j | \mathbf{s}_{j-1} ,y_{j-1},\mathbf{C}})$由Softmax实现,Sof
% 图3.10 % 图3.10
\begin{figure}[htp] \begin{figure}[htp]
\centering \centering
\input{./Chapter6/Figures/figure-Attention-of-source-and-target-words} \input{./Chapter6/Figures/figure-attention-of-source-and-target-words}
\caption{源语词和目标语词的关注度} \caption{源语词和目标语词的关注度}
\label{fig:6-21} \label{fig:6-21}
\end{figure} \end{figure}
...@@ -758,7 +758,7 @@ $\textrm{P}({y_j | \mathbf{s}_{j-1} ,y_{j-1},\mathbf{C}})$由Softmax实现,Sof ...@@ -758,7 +758,7 @@ $\textrm{P}({y_j | \mathbf{s}_{j-1} ,y_{j-1},\mathbf{C}})$由Softmax实现,Sof
% 图3.10 % 图3.10
\begin{figure}[htp] \begin{figure}[htp]
\centering \centering
\input{./Chapter6/Figures/figure-encoder-decoder-with-Attention} \input{./Chapter6/Figures/figure-encoder-decoder-with-attention}
\caption{不使用(a)和使用(b)注意力机制的翻译模型对比} \caption{不使用(a)和使用(b)注意力机制的翻译模型对比}
\label{fig:6-22} \label{fig:6-22}
\end{figure} \end{figure}
...@@ -780,7 +780,7 @@ $\textrm{P}({y_j | \mathbf{s}_{j-1} ,y_{j-1},\mathbf{C}})$由Softmax实现,Sof ...@@ -780,7 +780,7 @@ $\textrm{P}({y_j | \mathbf{s}_{j-1} ,y_{j-1},\mathbf{C}})$由Softmax实现,Sof
% 图3.10 % 图3.10
\begin{figure}[htp] \begin{figure}[htp]
\centering \centering
\input{./Chapter6/Figures/figure-Calculation-process-of-context-vector-C} \input{./Chapter6/Figures/figure-calculation-process-of-context-vector-C}
\caption{上下文向量$\mathbf{C}_j$的计算过程} \caption{上下文向量$\mathbf{C}_j$的计算过程}
\label{fig:6-23} \label{fig:6-23}
\end{figure} \end{figure}
...@@ -824,7 +824,7 @@ a (\mathbf{s},\mathbf{h}) = \left\{ \begin{array}{ll} ...@@ -824,7 +824,7 @@ a (\mathbf{s},\mathbf{h}) = \left\{ \begin{array}{ll}
% 图3.10 % 图3.10
\begin{figure}[htp] \begin{figure}[htp]
\centering \centering
\input{./Chapter6/Figures/figure-Matrix-Representation-of-Attention-Weights-Between-Chinese-English-Sentence-Pairs} \input{./Chapter6/Figures/figure-matrix-representation-of-attention-weights-between-chinese-english-sentence-pairs}
\caption{一个汉英句对之间的注意力权重{$\alpha_{i,j}$}的矩阵表示} \caption{一个汉英句对之间的注意力权重{$\alpha_{i,j}$}的矩阵表示}
\label{fig:6-24} \label{fig:6-24}
\end{figure} \end{figure}
...@@ -837,7 +837,7 @@ a (\mathbf{s},\mathbf{h}) = \left\{ \begin{array}{ll} ...@@ -837,7 +837,7 @@ a (\mathbf{s},\mathbf{h}) = \left\{ \begin{array}{ll}
% 图3.10 % 图3.10
\begin{figure}[htp] \begin{figure}[htp]
\centering \centering
\input{./Chapter6/Figures/figure-Example-of-context-vector-calculation-process} \input{./Chapter6/Figures/figure-example-of-context-vector-calculation-process}
\caption{上下文向量计算过程实例} \caption{上下文向量计算过程实例}
\label{fig:6-25} \label{fig:6-25}
\end{figure} \end{figure}
...@@ -878,7 +878,7 @@ a (\mathbf{s},\mathbf{h}) = \left\{ \begin{array}{ll} ...@@ -878,7 +878,7 @@ a (\mathbf{s},\mathbf{h}) = \left\{ \begin{array}{ll}
% 图3.10 % 图3.10
\begin{figure}[htp] \begin{figure}[htp]
\centering \centering
\input{./Chapter6/Figures/figure-Query-model-corresponding-to-traditional-query-model-vs-attention-mechanism} \input{./Chapter6/Figures/figure-query-model-corresponding-to-traditional-query-model-vs-attention-mechanism}
\caption{传统查询模型(a)和注意力机制所对应的查询模型(b)} \caption{传统查询模型(a)和注意力机制所对应的查询模型(b)}
\label{fig:6-26} \label{fig:6-26}
\end{figure} \end{figure}
...@@ -898,7 +898,7 @@ a (\mathbf{s},\mathbf{h}) = \left\{ \begin{array}{ll} ...@@ -898,7 +898,7 @@ a (\mathbf{s},\mathbf{h}) = \left\{ \begin{array}{ll}
% 图3.10 % 图3.10
\begin{figure}[htp] \begin{figure}[htp]
\centering \centering
\input{./Chapter6/Figures/figure-Query-model-corresponding-to-attention-mechanism} \input{./Chapter6/Figures/figure-query-model-corresponding-to-attention-mechanism}
\caption{注意力机制所对应的查询模型} \caption{注意力机制所对应的查询模型}
\label{fig:6-27} \label{fig:6-27}
\end{figure} \end{figure}
...@@ -1012,7 +1012,7 @@ L(\mathbf{Y},\widehat{\mathbf{Y}}) = \sum_{j=1}^n L_{\textrm{ce}}(\mathbf{y}_j,\ ...@@ -1012,7 +1012,7 @@ L(\mathbf{Y},\widehat{\mathbf{Y}}) = \sum_{j=1}^n L_{\textrm{ce}}(\mathbf{y}_j,\
% 图3.10 % 图3.10
\begin{figure}[htp] \begin{figure}[htp]
\centering \centering
\input{./Chapter6/Figures/figure-Relationship-between-learning-rate-and-number-of-updates} \input{./Chapter6/Figures/figure-relationship-between-learning-rate-and-number-of-updates}
\caption{学习率与更新次数的变化关系} \caption{学习率与更新次数的变化关系}
\label{fig:6-29} \label{fig:6-29}
\end{figure} \end{figure}
...@@ -1054,7 +1054,7 @@ L(\mathbf{Y},\widehat{\mathbf{Y}}) = \sum_{j=1}^n L_{\textrm{ce}}(\mathbf{y}_j,\ ...@@ -1054,7 +1054,7 @@ L(\mathbf{Y},\widehat{\mathbf{Y}}) = \sum_{j=1}^n L_{\textrm{ce}}(\mathbf{y}_j,\
% 图3.10 % 图3.10
\begin{figure}[htp] \begin{figure}[htp]
\centering \centering
\input{./Chapter6/Figures/figure-Data-parallel-process} \input{./Chapter6/Figures/figure-data-parallel-process}
\caption{数据并行过程} \caption{数据并行过程}
\label{fig:6-30} \label{fig:6-30}
\end{figure} \end{figure}
...@@ -1112,7 +1112,7 @@ L(\mathbf{Y},\widehat{\mathbf{Y}}) = \sum_{j=1}^n L_{\textrm{ce}}(\mathbf{y}_j,\ ...@@ -1112,7 +1112,7 @@ L(\mathbf{Y},\widehat{\mathbf{Y}}) = \sum_{j=1}^n L_{\textrm{ce}}(\mathbf{y}_j,\
% 图3.10 % 图3.10
\begin{figure}[htp] \begin{figure}[htp]
\centering \centering
\input{./Chapter6/Figures/figure-Decoding-process-based-on-greedy-method} \input{./Chapter6/Figures/figure-decoding-process-based-on-greedy-method}
\caption{基于贪婪方法的解码过程} \caption{基于贪婪方法的解码过程}
\label{fig:6-32} \label{fig:6-32}
\end{figure} \end{figure}
...@@ -1124,7 +1124,7 @@ L(\mathbf{Y},\widehat{\mathbf{Y}}) = \sum_{j=1}^n L_{\textrm{ce}}(\mathbf{y}_j,\ ...@@ -1124,7 +1124,7 @@ L(\mathbf{Y},\widehat{\mathbf{Y}}) = \sum_{j=1}^n L_{\textrm{ce}}(\mathbf{y}_j,\
% 图3.10 % 图3.10
\begin{figure}[htp] \begin{figure}[htp]
\centering \centering
\input{./Chapter6/Figures/figure-Decode-the-word-probability-distribution-at-the-first-position} \input{./Chapter6/Figures/figure-decode-the-word-probability-distribution-at-the-first-position}
\caption{解码第一个位置输出的单词概率分布(``Have''的概率最高)} \caption{解码第一个位置输出的单词概率分布(``Have''的概率最高)}
\label{fig:6-33} \label{fig:6-33}
\end{figure} \end{figure}
...@@ -1147,7 +1147,7 @@ L(\mathbf{Y},\widehat{\mathbf{Y}}) = \sum_{j=1}^n L_{\textrm{ce}}(\mathbf{y}_j,\ ...@@ -1147,7 +1147,7 @@ L(\mathbf{Y},\widehat{\mathbf{Y}}) = \sum_{j=1}^n L_{\textrm{ce}}(\mathbf{y}_j,\
% 图3.10 % 图3.10
\begin{figure}[htp] \begin{figure}[htp]
\centering \centering
\input{./Chapter6/Figures/figure-Beam-search-process} \input{./Chapter6/Figures/figure-beam-search-process}
\caption{束搜索过程} \caption{束搜索过程}
\label{fig:6-34} \label{fig:6-34}
\end{figure} \end{figure}
...@@ -1285,7 +1285,7 @@ L(\mathbf{Y},\widehat{\mathbf{Y}}) = \sum_{j=1}^n L_{\textrm{ce}}(\mathbf{y}_j,\ ...@@ -1285,7 +1285,7 @@ L(\mathbf{Y},\widehat{\mathbf{Y}}) = \sum_{j=1}^n L_{\textrm{ce}}(\mathbf{y}_j,\
% 图3.10 % 图3.10
\begin{figure}[htp] \begin{figure}[htp]
\centering \centering
\input{./Chapter6/Figures/figure-Dependencies-between-words-in-a-recurrent-neural-network} \input{./Chapter6/Figures/figure-dependencies-between-words-in-a-recurrent-neural-network}
\caption{循环神经网络中单词之间的依赖关系} \caption{循环神经网络中单词之间的依赖关系}
\label{fig:6-36} \label{fig:6-36}
\end{figure} \end{figure}
...@@ -1297,7 +1297,7 @@ L(\mathbf{Y},\widehat{\mathbf{Y}}) = \sum_{j=1}^n L_{\textrm{ce}}(\mathbf{y}_j,\ ...@@ -1297,7 +1297,7 @@ L(\mathbf{Y},\widehat{\mathbf{Y}}) = \sum_{j=1}^n L_{\textrm{ce}}(\mathbf{y}_j,\
% 图3.10 % 图3.10
\begin{figure}[htp] \begin{figure}[htp]
\centering \centering
\input{./Chapter6/Figures/figure-Dependencies-between-words-of-Attention} \input{./Chapter6/Figures/figure-dependencies-between-words-of-attention}
\caption{自注意力机制中单词之间的依赖关系} \caption{自注意力机制中单词之间的依赖关系}
\label{fig:6-37} \label{fig:6-37}
\end{figure} \end{figure}
...@@ -1309,7 +1309,7 @@ L(\mathbf{Y},\widehat{\mathbf{Y}}) = \sum_{j=1}^n L_{\textrm{ce}}(\mathbf{y}_j,\ ...@@ -1309,7 +1309,7 @@ L(\mathbf{Y},\widehat{\mathbf{Y}}) = \sum_{j=1}^n L_{\textrm{ce}}(\mathbf{y}_j,\
% 图3.10 % 图3.10
\begin{figure}[htp] \begin{figure}[htp]
\centering \centering
\input{./Chapter6/Figures/figure-Example-of-self-attention-mechanism-calculation} \input{./Chapter6/Figures/figure-example-of-self-attention-mechanism-calculation}
\caption{自注意力计算实例} \caption{自注意力计算实例}
\label{fig:6-38} \label{fig:6-38}
\end{figure} \end{figure}
...@@ -1383,7 +1383,7 @@ L(\mathbf{Y},\widehat{\mathbf{Y}}) = \sum_{j=1}^n L_{\textrm{ce}}(\mathbf{y}_j,\ ...@@ -1383,7 +1383,7 @@ L(\mathbf{Y},\widehat{\mathbf{Y}}) = \sum_{j=1}^n L_{\textrm{ce}}(\mathbf{y}_j,\
% 图3.10 % 图3.10
\begin{figure}[htp] \begin{figure}[htp]
\centering \centering
\input{./Chapter6/Figures/figure-Calculation-of-context-vector-C} \input{./Chapter6/Figures/figure-calculation-of-context-vector-C}
\caption{上下文向量$\mathbf{C}$的计算} \caption{上下文向量$\mathbf{C}$的计算}
\label{fig:6-41} \label{fig:6-41}
\end{figure} \end{figure}
...@@ -1418,7 +1418,7 @@ L(\mathbf{Y},\widehat{\mathbf{Y}}) = \sum_{j=1}^n L_{\textrm{ce}}(\mathbf{y}_j,\ ...@@ -1418,7 +1418,7 @@ L(\mathbf{Y},\widehat{\mathbf{Y}}) = \sum_{j=1}^n L_{\textrm{ce}}(\mathbf{y}_j,\
% 图3.10 % 图3.10
\begin{figure}[htp] \begin{figure}[htp]
\centering \centering
\input{./Chapter6/Figures/figure-A-combination-of-position-encoding-and-word-encoding} \input{./Chapter6/Figures/figure-a-combination-of-position-encoding-and-word-encoding}
\caption{位置编码与词编码的组合} \caption{位置编码与词编码的组合}
\label{fig:6-43} \label{fig:6-43}
\end{figure} \end{figure}
...@@ -1448,7 +1448,7 @@ L(\mathbf{Y},\widehat{\mathbf{Y}}) = \sum_{j=1}^n L_{\textrm{ce}}(\mathbf{y}_j,\ ...@@ -1448,7 +1448,7 @@ L(\mathbf{Y},\widehat{\mathbf{Y}}) = \sum_{j=1}^n L_{\textrm{ce}}(\mathbf{y}_j,\
% 图3.10 % 图3.10
\begin{figure}[htp] \begin{figure}[htp]
\centering \centering
\input{./Chapter6/Figures/figure-Position-of-self-attention-mechanism-in-the-model} \input{./Chapter6/Figures/figure-position-of-self-attention-mechanism-in-the-model}
\caption{自注意力机制在模型中的位置} \caption{自注意力机制在模型中的位置}
\label{fig:6-44} \label{fig:6-44}
\end{figure} \end{figure}
...@@ -1479,7 +1479,7 @@ L(\mathbf{Y},\widehat{\mathbf{Y}}) = \sum_{j=1}^n L_{\textrm{ce}}(\mathbf{y}_j,\ ...@@ -1479,7 +1479,7 @@ L(\mathbf{Y},\widehat{\mathbf{Y}}) = \sum_{j=1}^n L_{\textrm{ce}}(\mathbf{y}_j,\
% 图3.10 % 图3.10
\begin{figure}[htp] \begin{figure}[htp]
\centering \centering
\input{./Chapter6/Figures/figure-Point-product-attention-model} \input{./Chapter6/Figures/figure-point-product-attention-model}
\caption{点乘注意力力模型 } \caption{点乘注意力力模型 }
\label{fig:6-45} \label{fig:6-45}
\end{figure} \end{figure}
...@@ -1511,7 +1511,7 @@ L(\mathbf{Y},\widehat{\mathbf{Y}}) = \sum_{j=1}^n L_{\textrm{ce}}(\mathbf{y}_j,\ ...@@ -1511,7 +1511,7 @@ L(\mathbf{Y},\widehat{\mathbf{Y}}) = \sum_{j=1}^n L_{\textrm{ce}}(\mathbf{y}_j,\
% 图3.10 % 图3.10
\begin{figure}[htp] \begin{figure}[htp]
\centering \centering
\input{./Chapter6/Figures/figure-Mask-instance-for-future-positions-in-Transformer} \input{./Chapter6/Figures/figure-mask-instance-for-future-positions-in-transformer}
\caption{Transformer中对于未来位置进行的屏蔽的Mask实例} \caption{Transformer中对于未来位置进行的屏蔽的Mask实例}
\label{fig:6-47} \label{fig:6-47}
\end{figure} \end{figure}
...@@ -1535,7 +1535,7 @@ L(\mathbf{Y},\widehat{\mathbf{Y}}) = \sum_{j=1}^n L_{\textrm{ce}}(\mathbf{y}_j,\ ...@@ -1535,7 +1535,7 @@ L(\mathbf{Y},\widehat{\mathbf{Y}}) = \sum_{j=1}^n L_{\textrm{ce}}(\mathbf{y}_j,\
% 图3.10 % 图3.10
\begin{figure}[htp] \begin{figure}[htp]
\centering \centering
\input{./Chapter6/Figures/figure-Multi-Head-Attention-Model} \input{./Chapter6/Figures/figure-multi-head-attention-model}
\caption{多头注意力模型} \caption{多头注意力模型}
\label{fig:6-48} \label{fig:6-48}
\end{figure} \end{figure}
...@@ -1560,7 +1560,7 @@ L(\mathbf{Y},\widehat{\mathbf{Y}}) = \sum_{j=1}^n L_{\textrm{ce}}(\mathbf{y}_j,\ ...@@ -1560,7 +1560,7 @@ L(\mathbf{Y},\widehat{\mathbf{Y}}) = \sum_{j=1}^n L_{\textrm{ce}}(\mathbf{y}_j,\
% 图3.10 % 图3.10
\begin{figure}[htp] \begin{figure}[htp]
\centering \centering
\input{./Chapter6/Figures/figure-Residual-network-structure} \input{./Chapter6/Figures/figure-residual-network-structure}
\caption{残差网络结构} \caption{残差网络结构}
\label{fig:6-49} \label{fig:6-49}
\end{figure} \end{figure}
...@@ -1579,7 +1579,7 @@ x_{l+1} = x_l + \digamma (x_l) ...@@ -1579,7 +1579,7 @@ x_{l+1} = x_l + \digamma (x_l)
% 图3.10 % 图3.10
\begin{figure}[htp] \begin{figure}[htp]
\centering \centering
\input{./Chapter6/Figures/figure-Position-of-difference-and-layer-regularization-in-the-model} \input{./Chapter6/Figures/figure-position-of-difference-and-layer-regularization-in-the-model}
\caption{残差和层正则化在模型中的位置} \caption{残差和层正则化在模型中的位置}
\label{fig:6-50} \label{fig:6-50}
\end{figure} \end{figure}
...@@ -1600,7 +1600,7 @@ x_{l+1} = x_l + \digamma (x_l) ...@@ -1600,7 +1600,7 @@ x_{l+1} = x_l + \digamma (x_l)
% 图3.10 % 图3.10
\begin{figure}[htp] \begin{figure}[htp]
\centering \centering
\input{./Chapter6/Figures/figure-Different-regularization-methods} \input{./Chapter6/Figures/figure-different-regularization-methods}
\caption{不同正则化方式 } \caption{不同正则化方式 }
\label{fig:6-51} \label{fig:6-51}
\end{figure} \end{figure}
...@@ -1613,7 +1613,7 @@ x_{l+1} = x_l + \digamma (x_l) ...@@ -1613,7 +1613,7 @@ x_{l+1} = x_l + \digamma (x_l)
% 图3.10 % 图3.10
\begin{figure}[htp] \begin{figure}[htp]
\centering \centering
\input{./Chapter6/Figures/figure-Position-of-feedforward-neural-network-in-the-model} \input{./Chapter6/Figures/figure-position-of-feedforward-neural-network-in-the-model}
\caption{前馈神经网络在模型中的位置} \caption{前馈神经网络在模型中的位置}
\label{fig:6-52} \label{fig:6-52}
\end{figure} \end{figure}
...@@ -1636,7 +1636,7 @@ x_{l+1} = x_l + \digamma (x_l) ...@@ -1636,7 +1636,7 @@ x_{l+1} = x_l + \digamma (x_l)
% 图3.10 % 图3.10
\begin{figure}[htp] \begin{figure}[htp]
\centering \centering
\input{./Chapter6/Figures/figure-Structure-of-the-network-during-Transformer-training} \input{./Chapter6/Figures/figure-structure-of-the-network-during-transformer-training}
\caption{Transformer训练时网络的结构} \caption{Transformer训练时网络的结构}
\label{fig:6-53} \label{fig:6-53}
\end{figure} \end{figure}
...@@ -1676,7 +1676,7 @@ lrate = d_{model}^{-0.5} \cdot \textrm{min} (step^{-0.5} , step \cdot warmup\_st ...@@ -1676,7 +1676,7 @@ lrate = d_{model}^{-0.5} \cdot \textrm{min} (step^{-0.5} , step \cdot warmup\_st
% 图3.10 % 图3.10
\begin{figure}[htp] \begin{figure}[htp]
\centering \centering
\input{./Chapter6/Figures/figure-Comparison-of-the-number-of-padding-in-batch} \input{./Chapter6/Figures/figure-comparison-of-the-number-of-padding-in-batch}
\caption{batch中padding数量对比(白色部分为padding)} \caption{batch中padding数量对比(白色部分为padding)}
\label{fig:6-55} \label{fig:6-55}
\end{figure} \end{figure}
...@@ -1752,7 +1752,7 @@ Transformer Deep(48层) & 30.2 & 43.1 & 194$\times 10^{6}$ ...@@ -1752,7 +1752,7 @@ Transformer Deep(48层) & 30.2 & 43.1 & 194$\times 10^{6}$
% 图3.6.1 % 图3.6.1
\begin{figure}[htp] \begin{figure}[htp]
\centering \centering
\input{./Chapter6/Figures/figure-Generate-summary} \input{./Chapter6/Figures/figure-generate-summary}
\caption{文本自动摘要实例} \caption{文本自动摘要实例}
\label{fig:6-57} \label{fig:6-57}
\end{figure} \end{figure}
...@@ -1764,7 +1764,7 @@ Transformer Deep(48层) & 30.2 & 43.1 & 194$\times 10^{6}$ ...@@ -1764,7 +1764,7 @@ Transformer Deep(48层) & 30.2 & 43.1 & 194$\times 10^{6}$
% 图3.6.1 % 图3.6.1
\begin{figure}[htp] \begin{figure}[htp]
\centering \centering
\input{./Chapter6/Figures/figure-Example-of-automatic-translation-of-classical-Chinese} \input{./Chapter6/Figures/figure-example-of-automatic-translation-of-classical-chinese}
\caption{文言文自动翻译实例} \caption{文言文自动翻译实例}
\label{fig:6-58} \label{fig:6-58}
\end{figure} \end{figure}
...@@ -1780,7 +1780,7 @@ Transformer Deep(48层) & 30.2 & 43.1 & 194$\times 10^{6}$ ...@@ -1780,7 +1780,7 @@ Transformer Deep(48层) & 30.2 & 43.1 & 194$\times 10^{6}$
\begin{figure}[htp] \begin{figure}[htp]
\centering \centering
\input{./Chapter6/Figures/figure-Automatically-generate-instances-of-couplets} \input{./Chapter6/Figures/figure-automatically-generate-instances-of-couplets}
\caption{对联自动生成实例(人工给定上联)} \caption{对联自动生成实例(人工给定上联)}
\label{fig:6-59} \label{fig:6-59}
\end{figure} \end{figure}
...@@ -1796,7 +1796,7 @@ Transformer Deep(48层) & 30.2 & 43.1 & 194$\times 10^{6}$ ...@@ -1796,7 +1796,7 @@ Transformer Deep(48层) & 30.2 & 43.1 & 194$\times 10^{6}$
\begin{figure}[htp] \begin{figure}[htp]
\centering \centering
\input{./Chapter6/Figures/figure-Automatic-generation-of-ancient-poems-based-on-encoder-decoder-framework} \input{./Chapter6/Figures/figure-automatic-generation-of-ancient-poems-based-on-encoder-decoder-framework}
\caption{基于编码器-解码器框架的古诗自动生成} \caption{基于编码器-解码器框架的古诗自动生成}
\label{fig:6-60} \label{fig:6-60}
\end{figure} \end{figure}
......
...@@ -90,7 +90,7 @@ ...@@ -90,7 +90,7 @@
%---------------------------------------------- %----------------------------------------------
\begin{figure}[htp] \begin{figure}[htp]
\centering \centering
\input{./Chapter7/Figures/figure-construction-steps-of-MT-system} \input{./Chapter7/Figures/figure-construction-steps-of-mt-system}
\caption{构建神经机器翻译系统的主要步骤} \caption{构建神经机器翻译系统的主要步骤}
\label{fig:7-2} \label{fig:7-2}
\end{figure} \end{figure}
...@@ -417,7 +417,7 @@ y = f(x) ...@@ -417,7 +417,7 @@ y = f(x)
% 图7. % 图7.
\begin{figure}[htp] \begin{figure}[htp]
\centering \centering
\input{./Chapter7/Figures/figure-Underfitting-vs-Overfitting} \input{./Chapter7/Figures/figure-underfitting-vs-overfitting}
\caption{欠拟合 vs 过拟合} \caption{欠拟合 vs 过拟合}
\label{fig:7-11} \label{fig:7-11}
\end{figure} \end{figure}
...@@ -1191,7 +1191,7 @@ b &=& \omega_{\textrm{high}}\cdot |\mathbf{x}| ...@@ -1191,7 +1191,7 @@ b &=& \omega_{\textrm{high}}\cdot |\mathbf{x}|
% 图7.5.1 % 图7.5.1
\begin{figure}[htp] \begin{figure}[htp]
\centering \centering
\input{./Chapter7/Figures/Post-Norm-vs-Pre-Norm} \input{./Chapter7/Figures/figure-post-norm-vs-pre-norm}
\caption{Post-Norm Transformer vs Pre-Norm Transformer} \caption{Post-Norm Transformer vs Pre-Norm Transformer}
\label{fig:7-28} \label{fig:7-28}
\end{figure} \end{figure}
...@@ -1273,7 +1273,7 @@ $g_l$会作为输入的一部分送入第$l+1$层。其网络的结构图\ref{fi ...@@ -1273,7 +1273,7 @@ $g_l$会作为输入的一部分送入第$l+1$层。其网络的结构图\ref{fi
% 图7.5.2 % 图7.5.2
\begin{figure}[htp] \begin{figure}[htp]
\centering \centering
\input{./Chapter7/Figures/dynamic-linear-aggregation-network-structure} \input{./Chapter7/Figures/figure-dynamic-linear-aggregation-network-structure}
\caption{动态线性层聚合网络结构图} \caption{动态线性层聚合网络结构图}
\label{fig:7-29} \label{fig:7-29}
\end{figure} \end{figure}
...@@ -1299,7 +1299,7 @@ $g_l$会作为输入的一部分送入第$l+1$层。其网络的结构图\ref{fi ...@@ -1299,7 +1299,7 @@ $g_l$会作为输入的一部分送入第$l+1$层。其网络的结构图\ref{fi
% 图7.5.3 % 图7.5.3
\begin{figure}[htp] \begin{figure}[htp]
\centering \centering
\input{./Chapter7/Figures/progressive-training} \input{./Chapter7/Figures/figure-progressive-training}
\caption{渐进式深层网络训练过程} \caption{渐进式深层网络训练过程}
\label{fig:7-30} \label{fig:7-30}
\end{figure} \end{figure}
...@@ -1316,7 +1316,7 @@ $g_l$会作为输入的一部分送入第$l+1$层。其网络的结构图\ref{fi ...@@ -1316,7 +1316,7 @@ $g_l$会作为输入的一部分送入第$l+1$层。其网络的结构图\ref{fi
% 图7.5.4 % 图7.5.4
\begin{figure}[htp] \begin{figure}[htp]
\centering \centering
\input{./Chapter7/Figures/sparse-connections-between-different-groups} \input{./Chapter7/Figures/figure-sparse-connections-between-different-groups}
\caption{不同组之间的稀疏连接} \caption{不同组之间的稀疏连接}
\label{fig:7-31} \label{fig:7-31}
\end{figure} \end{figure}
...@@ -1335,7 +1335,7 @@ $g_l$会作为输入的一部分送入第$l+1$层。其网络的结构图\ref{fi ...@@ -1335,7 +1335,7 @@ $g_l$会作为输入的一部分送入第$l+1$层。其网络的结构图\ref{fi
% 图7.5.5 % 图7.5.5
\begin{figure}[htp] \begin{figure}[htp]
\centering \centering
\input{./Chapter7/Figures/learning-rate} \input{./Chapter7/Figures/figure-learning-rate}
\caption{学习率重置vs从头训练的学习率曲线} \caption{学习率重置vs从头训练的学习率曲线}
\label{fig:7-32} \label{fig:7-32}
\end{figure} \end{figure}
...@@ -1411,7 +1411,7 @@ p_l=\frac{l}{2L}\cdot \varphi ...@@ -1411,7 +1411,7 @@ p_l=\frac{l}{2L}\cdot \varphi
% 图7.5.7 % 图7.5.7
\begin{figure}[htp] \begin{figure}[htp]
\centering \centering
\input{./Chapter7/Figures/expanded-residual-network} \input{./Chapter7/Figures/figure-expanded-residual-network}
\caption{Layer Dropout中残差网络的展开图} \caption{Layer Dropout中残差网络的展开图}
\label{fig:7-34} \label{fig:7-34}
\end{figure} \end{figure}
......
...@@ -122,13 +122,13 @@ ...@@ -122,13 +122,13 @@
% CHAPTERS % CHAPTERS
%---------------------------------------------------------------------------------------- %----------------------------------------------------------------------------------------
\include{Chapter1/chapter1} %\include{Chapter1/chapter1}
%\include{Chapter2/chapter2} %\include{Chapter2/chapter2}
%\include{Chapter3/chapter3} %\include{Chapter3/chapter3}
%\include{Chapter4/chapter4} %\include{Chapter4/chapter4}
%\include{Chapter5/chapter5} %\include{Chapter5/chapter5}
%\include{Chapter6/chapter6} %\include{Chapter6/chapter6}
%\include{Chapter7/chapter7} \include{Chapter7/chapter7}
%\include{ChapterAppend/chapterappend} %\include{ChapterAppend/chapterappend}
......
Markdown 格式
0%
您添加了 0 到此讨论。请谨慎行事。
请先完成此评论的编辑!
注册 或者 后发表评论