Commit 1dd53ad3 by zengxin

‘cchapter7 fig

parent 3e2fb2f9
...@@ -364,7 +364,7 @@ ...@@ -364,7 +364,7 @@
\begin{figure}[htp] \begin{figure}[htp]
\centering \centering
\input{./Chapter7/Figures/figure-unk-of-bpe} \input{./Chapter7/Figures/figure-unk-of-bpe}
\caption{BPE中<UNK>的生成} \caption{BPE中的子词切分过程}
\label{fig:7-10} \label{fig:7-10}
\end{figure} \end{figure}
%---------------------------------------------- %----------------------------------------------
...@@ -1155,7 +1155,7 @@ b &=& \omega_{\textrm{high}}\cdot |\mathbf{x}| ...@@ -1155,7 +1155,7 @@ b &=& \omega_{\textrm{high}}\cdot |\mathbf{x}|
\parinterval 有了lattice这样的结构,多模型融合又有了新的思路。首先,可以将多个模型的译文融合为lattice。注意,这个lattice会包含这些模型无法生成的完整译文句子。之后,用一个更强的模型在lattice上搜索最优的结果。这个过程有可能找到一些``新''的译文,即结果可能是从多个模型的结果中重组而来的。lattice上的搜索模型可以基于多模型的融合,也可以使用一个简单的模型,这里需要考虑的是将神经机器翻译模型适应到lattice上进行推断\cite{DBLP:conf/aaai/SuTXJSL17}。其过程基本与原始的模型推断没有区别,只是需要把模型预测的结果附着到lattice中的每条边上,再进行推断。 \parinterval 有了lattice这样的结构,多模型融合又有了新的思路。首先,可以将多个模型的译文融合为lattice。注意,这个lattice会包含这些模型无法生成的完整译文句子。之后,用一个更强的模型在lattice上搜索最优的结果。这个过程有可能找到一些``新''的译文,即结果可能是从多个模型的结果中重组而来的。lattice上的搜索模型可以基于多模型的融合,也可以使用一个简单的模型,这里需要考虑的是将神经机器翻译模型适应到lattice上进行推断\cite{DBLP:conf/aaai/SuTXJSL17}。其过程基本与原始的模型推断没有区别,只是需要把模型预测的结果附着到lattice中的每条边上,再进行推断。
\parinterval\ref{fig:7-27}对比了不同模型集成方法的区别。从系统开发的角度看,假设选择和模型预测融合的复杂度较低,适合快速原型,而且性能稳定。译文重组需要更多的模块,系统调试的复杂度较高,但是由于看到了更大的搜索空间,因此系统性能提升的潜力较大\footnote{一般来说lattice上的Oracle要比$n$-best译文上的oracle的质量高。} \parinterval\ref{fig:7-27}对比了不同模型集成方法的区别。从系统开发的角度看,假设选择和模型预测融合的复杂度较低,适合快速原型,而且性能稳定。译文重组需要更多的模块,系统调试的复杂度较高,但是由于看到了更大的搜索空间,因此系统性能提升的潜力较大\footnote{一般来说lattice上的Oracle要比$n$-best译文上的Oracle的质量高。}
%---------------------------------------------- %----------------------------------------------
% 图7. % 图7.
...@@ -1261,7 +1261,7 @@ z_{l}=\textrm{LN}(x_{l+1}) ...@@ -1261,7 +1261,7 @@ z_{l}=\textrm{LN}(x_{l+1})
\end{eqnarray} \end{eqnarray}
注意,$z_0$表示词嵌入层的输出,$z_l(l>0)$表示Transformer网络中最终的各层输出。 注意,$z_0$表示词嵌入层的输出,$z_l(l>0)$表示Transformer网络中最终的各层输出。
\vspace{0.5em} \vspace{0.5em}
\item 定义一个维度为$(L+1)\times(L+1)$的权值矩阵$\mathbf{W}$,矩阵中每一行表示之前各层对当前层计算的贡献度,其中$L$是编码端(或解码端)的层数。令$\mathbf{W}_{l,i}$代表权值矩阵$\mathbf{W}$$l$行第$i$列的权重,则层聚合的输出为$z_i$的线性加权和: \item 定义一个维度为$(L+1)\times(L+1)$的权值矩阵$\mathbf{W}$,矩阵中每一行表示之前各层对当前层计算的贡献度,其中$L$是编码端(或解码端)的层数。令$\mathbf{W}_{l,i}$代表权值矩阵$\mathbf{W}$$l$行第$i$列的权重,则层聚合的输出为$z_i$的线性加权和:
\begin{eqnarray} \begin{eqnarray}
g_l=\sum_{i=0}^{l}z_i\times \mathbf{W}_{l,i} g_l=\sum_{i=0}^{l}z_i\times \mathbf{W}_{l,i}
\label{eq:7-21} \label{eq:7-21}
...@@ -1633,7 +1633,7 @@ L_{\textrm{seq}} = - \textrm{logP}_{\textrm{s}}(\hat{\textbf{y}} | \textbf{x}) ...@@ -1633,7 +1633,7 @@ L_{\textrm{seq}} = - \textrm{logP}_{\textrm{s}}(\hat{\textbf{y}} | \textbf{x})
\begin{figure}[htp] \begin{figure}[htp]
\centering \centering
\input{./Chapter7/Figures/figure-ensemble-knowledge-distillation} \input{./Chapter7/Figures/figure-ensemble-knowledge-distillation}
\caption{Ensemble知识精炼} \caption{迭代式知识精炼}
\label{fig:7-41} \label{fig:7-41}
\end{figure} \end{figure}
%------------------------------------------- %-------------------------------------------
......
\begin{tikzpicture} \begin{tikzpicture}
\begin{scope} \begin{scope}
\node [anchor=center] (node1) at (-2.9,1) {\small{训练:}}; \node [anchor=center] (node1) at (4.9,1) {\small{训练:}};
\node [anchor=center] (node11) at (-2.5,1) {}; \node [anchor=center] (node11) at (5.5,1) {};
\node [anchor=center] (node12) at (-1.7,1) {}; \node [anchor=center] (node12) at (6.7,1) {};
\node [anchor=center] (node2) at (-2.9,0.5) {\small{推理:}}; \node [anchor=center] (node2) at (4.9,0.5) {\small{推理:}};
\node [anchor=center] (node21) at (-2.5,0.5) {}; \node [anchor=center] (node21) at (5.5,0.5) {};
\node [anchor=center] (node22) at (-1.7,0.5) {}; \node [anchor=center] (node22) at (6.7,0.5) {};
\node [anchor=west,line width=0.6pt,draw=black,minimum width=5.6em,minimum height=2.2em,fill=blue!20,rounded corners=2pt] (node1-1) at (0,0) {\footnotesize{双语数据}}; \node [anchor=west,line width=0.6pt,draw=black,minimum width=5.6em,minimum height=2.2em,fill=blue!20,rounded corners=2pt] (node1-1) at (0,0) {\footnotesize{双语数据}};
\node [anchor=south,line width=0.6pt,draw=black,minimum width=4.5em,minimum height=2.2em,fill=blue!20,rounded corners=2pt] (node1-2) at ([yshift=-5em]node1-1.south) {\footnotesize{目标语伪数据}}; \node [anchor=south,line width=0.6pt,draw=black,minimum width=4.5em,minimum height=2.2em,fill=blue!20,rounded corners=2pt] (node1-2) at ([yshift=-5em]node1-1.south) {\footnotesize{目标语伪数据}};
\node [anchor=west,line width=0.6pt,draw=black,minimum width=4.5em,minimum height=2.2em,fill=red!20,rounded corners=2pt] (node2-1) at ([xshift=-8.8em,yshift=-2.5em]node1-1.west) {\footnotesize{反向NMT系统}}; \node [anchor=west,line width=0.6pt,draw=black,minimum width=4.5em,minimum height=2.2em,fill=red!20,rounded corners=2pt] (node2-1) at ([xshift=-8.8em,yshift=-2.5em]node1-1.west) {\footnotesize{反向NMT系统}};
......
\begin{tikzpicture} \begin{tikzpicture}
\tikzstyle{layer} = [rectangle,draw,rounded corners=3pt,minimum width=1cm,minimum height=0.5cm]; \tikzstyle{layer} = [rectangle,draw,rounded corners=3pt,minimum width=1cm,minimum height=0.5cm,line width=1pt];
\tikzstyle{prob} = [minimum width=0.3cm,rectangle,fill=ugreen!20!white,inner sep=0pt]; \tikzstyle{prob} = [minimum width=0.3cm,rectangle,fill=ugreen!20!white,inner sep=0pt];
\begin{scope}[local bounding box=STANDARD] \begin{scope}[local bounding box=STANDARD]
...@@ -22,8 +22,8 @@ ...@@ -22,8 +22,8 @@
\path [fill=blue!20!white,draw=white] (out1.north west) -- (prob1.south west) -- (prob9.south east) -- (out1.north east) -- (out1.north west); \path [fill=blue!20!white,draw=white] (out1.north west) -- (prob1.south west) -- (prob9.south east) -- (out1.north east) -- (out1.north west);
\draw [->] (input1) to (net1); \draw [->,line width=1pt] (input1) to (net1);
\draw [->] (net1) to (out1); \draw [->,line width=1pt] (net1) to (out1);
\node [font=\small] (label1) at ([yshift=0.6cm]out1.north) {Softmax}; \node [font=\small] (label1) at ([yshift=0.6cm]out1.north) {Softmax};
\end{scope} \end{scope}
...@@ -51,8 +51,8 @@ ...@@ -51,8 +51,8 @@
\path [fill=blue!20!white,draw=white] (out2.north west) -- (prob1.south west) -- (prob9.south east) -- (out2.north east) -- (out2.north west); \path [fill=blue!20!white,draw=white] (out2.north west) -- (prob1.south west) -- (prob9.south east) -- (out2.north east) -- (out2.north west);
\draw [->] (input2) to (net2); \draw [->,line width=1pt] (input2) to (net2);
\draw [->] (net2) to (out2); \draw [->,line width=1pt] (net2) to (out2);
\node [font=\small] (label2) at ([yshift=0.6cm]out2.north) {Softmax}; \node [font=\small] (label2) at ([yshift=0.6cm]out2.north) {Softmax};
...@@ -60,9 +60,9 @@ ...@@ -60,9 +60,9 @@
\node [anchor=north,font=\scriptsize] (input3) at ([yshift=-0.5cm]net3.south) {源语}; \node [anchor=north,font=\scriptsize] (input3) at ([yshift=-0.5cm]net3.south) {源语};
\node [anchor=south,layer,align=center,font=\scriptsize,fill=yellow!10!white] (out3) at ([yshift=0.9cm]net3.north) {Candidate\\List}; \node [anchor=south,layer,align=center,font=\scriptsize,fill=yellow!10!white] (out3) at ([yshift=0.9cm]net3.north) {Candidate\\List};
\draw [->] (input3) to (net3); \draw [->,line width=1pt] (input3) to (net3);
\draw [->] (net3) to (out3); \draw [->,line width=1pt] (net3) to (out3);
\draw [->] (out3) |- (plabel9.east); \draw [->,line width=1pt] (out3) |- (plabel9.east);
\end{scope} \end{scope}
\node [anchor=north,font=\scriptsize] () at ([yshift=-0.2em]STANDARD.south) {(a) 标准方法}; \node [anchor=north,font=\scriptsize] () at ([yshift=-0.2em]STANDARD.south) {(a) 标准方法};
......
...@@ -50,8 +50,8 @@ ...@@ -50,8 +50,8 @@
\node [rectangle,inner sep=1em,fill=black!5,rounded corners=4pt] [fit =(w4) (w6) (w9) (encoder0) ] (box) {}; \node [rectangle,inner sep=1em,fill=black!5,rounded corners=4pt] [fit =(w4) (w6) (w9) (encoder0) ] (box) {};
\end{pgfonlayer} \end{pgfonlayer}
\node [] (left) at ([yshift=-1.5em]box.south) {编码器使用单语数据预训练}; \node [font=\footnotesize] (left) at ([yshift=-1.5em]box.south) {编码器使用单语数据预训练};
\node [] (right) at ([xshift=11em]left.east) {在翻译任务上进行微调}; \node [font=\footnotesize] (right) at ([xshift=11em]left.east) {在翻译任务上进行微调};
\node[anchor=north] (arrow1) at (3.85,0.1){}; \node[anchor=north] (arrow1) at (3.85,0.1){};
......
\begin{tikzpicture} \begin{tikzpicture}
\begin{scope} \begin{scope}
\node [anchor=center] (node1) at (-2.6,1) {\small{训练:}}; \node [anchor=center] (node1) at (9.6,1) {\small{训练:}};
\node [anchor=center] (node11) at (-2.2,1) {}; \node [anchor=center] (node11) at (10.2,1) {};
\node [anchor=center] (node12) at (-1.1,1) {}; \node [anchor=center] (node12) at (11.4,1) {};
\node [anchor=center] (node2) at (-2.6,0.5) {\small{推理:}}; \node [anchor=center] (node2) at (9.6,0.5) {\small{推理:}};
\node [anchor=center] (node21) at (-2.2,0.5) {}; \node [anchor=center] (node21) at (10.2,0.5) {};
\node [anchor=center] (node22) at (-1.1,0.5) {}; \node [anchor=center] (node22) at (11.4,0.5) {};
\node [anchor=west,draw=black,line width=0.6pt,minimum width=5.6em,minimum height=2.2em,fill=blue!20,rounded corners=2pt] (node1-1) at (0,0) {\footnotesize{双语数据}}; \node [anchor=west,draw=black,line width=0.6pt,minimum width=5.6em,minimum height=2.2em,fill=blue!20,rounded corners=2pt] (node1-1) at (0,0) {\footnotesize{双语数据}};
\node [anchor=south,draw=black,line width=0.6pt,minimum width=4.5em,minimum height=2.2em,fill=blue!20,rounded corners=2pt] (node1-2) at ([yshift=-5em]node1-1.south) {\footnotesize{目标语伪数据}}; \node [anchor=south,draw=black,line width=0.6pt,minimum width=4.5em,minimum height=2.2em,fill=blue!20,rounded corners=2pt] (node1-2) at ([yshift=-5em]node1-1.south) {\footnotesize{目标语伪数据}};
\node [anchor=west,draw=black,line width=0.6pt,minimum width=4.5em,minimum height=2.2em,fill=red!20,rounded corners=2pt] (node2-1) at ([xshift=-7.7em,yshift=-2.5em]node1-1.west) {\footnotesize{前向NMT系统}}; \node [anchor=west,draw=black,line width=0.6pt,minimum width=4.5em,minimum height=2.2em,fill=red!20,rounded corners=2pt] (node2-1) at ([xshift=-7.7em,yshift=-2.5em]node1-1.west) {\footnotesize{前向NMT系统}};
......
...@@ -125,10 +125,10 @@ ...@@ -125,10 +125,10 @@
%\include{Chapter1/chapter1} %\include{Chapter1/chapter1}
%\include{Chapter2/chapter2} %\include{Chapter2/chapter2}
%\include{Chapter3/chapter3} %\include{Chapter3/chapter3}
\include{Chapter4/chapter4} %\include{Chapter4/chapter4}
%\include{Chapter5/chapter5} %\include{Chapter5/chapter5}
%\include{Chapter6/chapter6} %\include{Chapter6/chapter6}
%\include{Chapter7/chapter7} \include{Chapter7/chapter7}
%\include{ChapterAppend/chapterappend} %\include{ChapterAppend/chapterappend}
......
Markdown 格式
0%
您添加了 0 到此讨论。请谨慎行事。
请先完成此评论的编辑!
注册 或者 后发表评论