Commit 93fe50af by zengxin

合并分支 'caorunzhe' 到 'zengxin'

Caorunzhe

查看合并请求 !1019
parents 80911497 ff80b1bc
......@@ -43,7 +43,7 @@
\node [anchor=west,mnode] (n8) at ([xshift=5em,yshift=0em]n7.east) {};
\node [anchor=north,align=center,font=\footnotesize] (n81) at ([xshift=0em,yshift=-0.2em]n8.north) {{\small 符号合并表}\\(e,s)};
\node [anchor=west,align=left,font=\footnotesize] (nt3) at ([xshift=0.1em,yshift=0em]n6.east) {统计二元组\\[0.5ex]的频次};
\node [anchor=west,align=left,font=\footnotesize] (nt3) at ([xshift=0.1em,yshift=0em]n6.east) {统计二元组\\[0.5ex]出现的频次};
\node [anchor=west,align=left,font=\footnotesize] (nt4) at ([xshift=0em,yshift=-0.4em]n7.east) {频次最高的\\[0.5ex](e,s)加入表\\};
\draw [->,thick,ublue] ([xshift=0em,yshift=0em]n6.east)--([xshift=0em,yshift=0em]n7.west);
\draw [->,thick,ublue] ([xshift=0em,yshift=0em]n7.east)--([xshift=0em,yshift=0em]n8.west);
......@@ -63,7 +63,7 @@
\node [anchor=west,mnode] (n11) at ([xshift=5em,yshift=0em]n10.east) {};
\node [anchor=north,align=center,font=\footnotesize] (n111) at ([xshift=0em,yshift=-0.2em]n11.north) {{\small 符号合并表}\\(e,s)\\(es,t)};
\node [anchor=west,align=left,font=\footnotesize] (nt5) at ([xshift=0.1em,yshift=0em]n9.east) {统计二元组\\[0.5ex]的频次};
\node [anchor=west,align=left,font=\footnotesize] (nt5) at ([xshift=0.1em,yshift=0em]n9.east) {统计二元组\\[0.5ex]出现的频次};
\node [anchor=west,align=left,font=\footnotesize] (nt6) at ([xshift=0em,yshift=-0.4em]n10.east) {频次最高的\\[0.5ex](es,t)加入表\\};
\draw [->,thick,ublue] ([xshift=0em,yshift=0em]n9.east)--([xshift=0em,yshift=0em]n10.west);
\draw [->,thick,ublue] ([xshift=0em,yshift=0em]n10.east)--([xshift=0em,yshift=0em]n11.west);
......@@ -72,7 +72,7 @@
\node [anchor=north,ublue] (cd) at ([xshift=0em,yshift=-2.0em]n10.south) {$\cdots$};
\draw [->,thick,ublue] ([xshift=-0em,yshift=-0em]n11.south) .. controls +(south:2em) and +(north:2em) .. ([xshift=-0em,yshift=-0em]cd.north);
\node [anchor=north west,ublue,font=\footnotesize,align=left] (l2) at ([xshift=1em,yshift=-1.0em]n10.south east) {在词表中\\[0.8ex]合并(es,t)};
\node [anchor=east,ublue,align=left,font=\footnotesize] (l3) at ([xshift=-0.5em,yshift=0em]cd.west) {直至达到设定的符号合\\并表大小或无法合并};
\node [anchor=east,ublue,align=left,font=\footnotesize] (l3) at ([xshift=-0.5em,yshift=0em]cd.west) {直至达到预设的符号合并表\\大小,或没有二元组可以被\\合并};
\begin{pgfonlayer}{background}
\node [rectangle,inner sep=0.7em,draw,orange!40,dashed,thick,rounded corners=7pt] [fit = (n5) (n8) (l3) (cd)] (box2) {};
......
......@@ -14,7 +14,7 @@
\node [neuronnode] (neuron_z) at (1.2 * \nodespace,-1.5 * \neuronsep) {\scriptsize{$z_{i}^{l+1}$}};
\node [neuronnode] (neuron_y') at (2.4 * \nodespace,-1.5 * \neuronsep) {\scriptsize{$x_{i}^{l+1}$}};
\node [anchor=north] (standard) at ([yshift=-4em]neuron_z.south) {\scriptsize{标准网络}};
\node [anchor=north,align=left,font=\scriptsize] (standard) at ([xshift=2em,yshift=-3em]neuron_z.south) {使用Dropout前的\\一层神经网络};
\node [] (standard) at ([xshift=-1em]neuron_z.west) {\scriptsize{$\mathbi{w}_{i}^{l}$}};
\node [] (standard) at ([xshift=0.6em,yshift=0.3em]neuron_z.east) {\scriptsize{$f$}};
......@@ -40,7 +40,7 @@
\node [neuronnode] (drop_neuron_r2) at (4.4*\nodespace,-1.5*\neuronsep) {\scriptsize{$r_{2}^{l}$}};
\node [neuronnode] (drop_neuron_r1) at (4.4*\nodespace,-2.5*\neuronsep) {\scriptsize{$r_{1}^{l}$}};
\node [anchor=north] (standard) at ([xshift=2em,yshift=-4em]drop_neuron_z.south) {\scriptsize{应用Dropout后的网络}};
\node [anchor=north,align=left,font=\scriptsize] (standard) at ([xshift=2em,yshift=-3em]drop_neuron_z.south) {使用Dropout后的\\一层神经网络};
\node [] (standard) at ([xshift=-1em]drop_neuron_z.west) {\scriptsize{$\mathbi{w}_{i}^{l}$}};
\node [] (standard) at ([xshift=0.6em,yshift=0.3em]drop_neuron_z.east) {\scriptsize{$f$}};
%structure
......@@ -63,7 +63,7 @@
\node [anchor=north west,inner sep = 2pt] (line2) at (line1.south west) {$z_{i}^{l+1}=\mathbi{w}^{l} \mathbi{x}^{l} + b^{l}$};
\node [anchor=north west,inner sep = 2pt] (line3) at (line2.south west) {$x_{i}^{l+1}=f\left(z_{i}^{l+1}\right)$};
\node [anchor=north west,inner sep = 2pt] (line4) at (line3.south west) {应用Dropout:};
\node [anchor=north west,inner sep = 2pt] (line5) at (line4.south west) {$r_{j}^{l} \sim$ Bernoulli $(1-p)$};
\node [anchor=north west,inner sep = 2pt] (line5) at (line4.south west) {$r_{i}^{l} \sim$ Bernoulli $(1-p)$};
\node [anchor=north west,inner sep = 2pt] (line6) at (line5.south west) {$\tilde{\mathbi{x}}=\mathbi{r} * \mathbi{x}$};
\node [anchor=north west,inner sep = 2pt] (line7) at (line6.south west) {$z_{i}^{l+1}=\mathbi{w}^{l} \widetilde{\mathbi{x}}^{l} + b^{l}$};
\node [anchor=north west,inner sep = 2pt] (line8) at (line7.south west) {$x_{i}^{l+1}=f\left(z_{i}^{l+1}\right)$};
......
......@@ -4,10 +4,10 @@
\tikzstyle{node}=[inner sep=0mm, draw,thick,minimum height=3em,minimum width=6em,rounded corners=5pt]
\node[anchor=west,node,fill=ugreen!15] (n1) at (0,0) {训练集};
\node[anchor=west,node,fill=yellow!15] (n2) at ([xshift=4em,yshift=0em]n1.east) {难度评估器};
\node[anchor=west,node,fill=red!15] (n3) at ([xshift=4em,yshift=0em]n2.east) {训练调度器};
\node[anchor=west,node,fill=blue!15] (n4) at ([xshift=4em,yshift=0em]n3.east) {模型训练器};
\node[anchor=west,node,fill=ugreen!30] (n1) at (0,0) {训练集};
\node[anchor=west,node,fill=yellow!30] (n2) at ([xshift=4em,yshift=0em]n1.east) {难度评估器};
\node[anchor=west,node,fill=red!30] (n3) at ([xshift=4em,yshift=0em]n2.east) {训练调度器};
\node[anchor=west,node,fill=blue!30] (n4) at ([xshift=4em,yshift=0em]n3.east) {模型训练器};
\draw [->,very thick] ([xshift=0em,yshift=0em]n1.east) -- ([xshift=0em,yshift=0em]n2.west);
\draw [->,very thick] ([xshift=0em,yshift=0em]n2.east) -- ([xshift=0em,yshift=0em]n3.west);
......
......@@ -680,8 +680,6 @@ b &=& \omega_{\textrm{high}}\cdot |\seq{x}| \label{eq:14-4}
%----------------------------------------------------------------------
\parinterval 有了词格这样的结构,多模型集成又有了新的思路。首先,可以将多个模型的译文融合为词格。注意,这个词格会包含这些模型无法生成的完整译文句子。之后,用一个更强的模型在词格上搜索最优的结果。这个过程有可能找到一些“新”的译文,即结果可能是从多个模型的结果中重组而来的。词格上的搜索模型可以基于多模型的融合,也可以使用一个简单的模型,这里需要考虑的是将神经机器翻译模型适应到词格上进行推断\upcite{DBLP:conf/aaai/SuTXJSL17}。其过程基本与原始的模型推断没有区别,只是需要把模型预测的结果附着到词格中的每条边上,再进行推断。
\parinterval\ref{fig:14-11}对比了不同模型集成方法的区别。从系统开发的角度看,假设选择和模型预测融合的复杂度较低,适合快速开发原型系统,而且性能稳定。译文重组需要更多的模块,系统调试的复杂度较高,但是由于看到了更大的搜索空间,因此系统性能提升的潜力较大\footnote{一般来说词格上的Oracle 要比$n$-best译文上的Oracle 的质量高。}
%----------------------------------------------------------------------
\begin{figure}[htp]
\centering
......@@ -691,6 +689,8 @@ b &=& \omega_{\textrm{high}}\cdot |\seq{x}| \label{eq:14-4}
\end{figure}
%----------------------------------------------------------------------
\parinterval\ref{fig:14-11}对比了不同模型集成方法的区别。从系统开发的角度看,假设选择和模型预测融合的复杂度较低,适合快速开发原型系统,而且性能稳定。译文重组需要更多的模块,系统调试的复杂度较高,但是由于看到了更大的搜索空间,因此系统性能提升的潜力较大\footnote{一般来说词格上的Oracle 要比$n$-best译文上的Oracle 的质量高。}
%----------------------------------------------------------------------------------------
% NEW SECTION
%----------------------------------------------------------------------------------------
......
\begin{tikzpicture}
\tikzstyle{elementnode} = [anchor=center,draw,minimum size=0.6em,inner sep=0.1pt,gray!80]
\tikzstyle{elementnode} = [anchor=center,draw=gray,minimum size=0.6em,inner sep=0.1pt]
\begin{scope}[scale=1.0]
\foreach \i / \j in
......@@ -17,7 +17,7 @@
0/2, 1/2, 2/2, 3/2, 4/2, 5/2, 6/2, 7/2,
0/1, 1/1, 2/1, 3/1, 4/1, 5/1, 6/1, 7/1,
0/0, 1/0, 2/0, 3/0, 4/0, 5/0, 6/0, 7/0}
\node[elementnode,fill=gray!50] (b\i\j) at (0.6em*\i+5.5em,0.6em*\j) {};
\node[elementnode,fill=orange!15] (b\i\j) at (0.6em*\i+5.5em,0.6em*\j) {};
\node [anchor=south west,minimum height=0.5em,minimum width=4.8em,inner sep=0.1pt,very thick,blue!60,draw] (n1) at ([xshift=0em,yshift=0em]a01.south west) {};
......@@ -51,7 +51,7 @@
0/2, 1/2, 2/2, 3/2, 4/2, 5/2, 6/2, 7/2,
0/1, 1/1, 2/1, 3/1, 4/1, 5/1, 6/1, 7/1,
0/0, 1/0, 2/0, 3/0, 4/0, 5/0, 6/0, 7/0}
\node[elementnode,fill=gray!50] (b\i\j) at (0.6em*\i+5.5em,0.6em*\j) {};
\node[elementnode,fill=orange!15] (b\i\j) at (0.6em*\i+5.5em,0.6em*\j) {};
\node [anchor=south west,minimum height=0.5em,minimum width=3em,inner sep=0.1pt,very thick,blue!60,draw] (n1) at ([xshift=0em,yshift=0em]a01.south west) {};
......@@ -85,7 +85,7 @@
0/2, 1/2, 2/2, 3/2, 4/2, 5/2, 6/2, 7/2,
0/1, 1/1, 2/1, 3/1, 4/1, 5/1, 6/1, 7/1,
0/0, 1/0, 2/0, 3/0, 4/0, 5/0, 6/0, 7/0}
\node[elementnode,fill=gray!50] (b\i\j) at (0.6em*\i+5.5em,0.6em*\j) {};
\node[elementnode,fill=orange!15] (b\i\j) at (0.6em*\i+5.5em,0.6em*\j) {};
\node [anchor=south west,minimum height=1.8em,minimum width=3em,inner sep=0.1pt,very thick,blue!60,draw] (n1) at ([xshift=0em,yshift=0em]a00.south west) {};
......
......@@ -20,7 +20,7 @@
\node [anchor=north,rectangle,draw, inner sep=0mm,minimum height=1.2em,minimum width=15em,fill=purple!30,rounded corners=5pt,thick] (n9) at ([xshift=0em,yshift=-1em]n8.south) {$\mathbi{X}\ \quad \mathbi{h}^1\ \quad \mathbi{h}^2\quad \ldots \quad\ \mathbi{h}^l$};
\node [anchor=north,rectangle,draw, inner sep=0mm,minimum height=1.2em,minimum width=15em,fill=teal!30,rounded corners=5pt,thick] (n10) at ([xshift=0em,yshift=-2em]n9.south) {权重累加\ {\red $\mathbi{g}^l$}};
\node [anchor=north,rectangle,draw, inner sep=0mm,minimum height=1.2em,minimum width=15em,fill=teal!30,rounded corners=5pt,thick] (n10) at ([xshift=0em,yshift=-2em]n9.south) {权重累加\ $\mathbi{g}^l$};
\node [anchor=west,rectangle, inner sep=0mm,minimum height=1.2em, rounded corners=5pt,thick] (n11) at ([xshift=0em,yshift=-4.5em]n1.west) {聚合网络};
......
......@@ -4,7 +4,7 @@
%left
\begin{scope}
\foreach \x/\d in {1/2em, 2/8em}
\node[unit,fill=yellow!30] at (0,\d) (ln_\x) {层标准化};
\node[unit,fill=orange!30] at (0,\d) (ln_\x) {层标准化};
\foreach \x/\d in {1/4em}
\node[unit,fill=green!30] at (0,\d) (sa_\x) {8头自注意力:512};
......@@ -19,7 +19,7 @@
\node[unit,fill=red!30] at (0,\d) (conv_\x) {卷积$1 \times 1$:2048};
\foreach \x/\d in {1/12em}
\node[unit,fill=blue!30] at (0,\d) (relu_\x) {RELU};
\node[unit,fill=blue!30] at (0,\d) (relu_\x) {ReLU};
\draw[->,thick] ([yshift=-1.4em]ln_1.-90) -- ([yshift=-0.1em]ln_1.-90);
\draw[->,thick] ([yshift=0.1em]ln_1.90) -- ([yshift=-0.1em]sa_1.-90);
......@@ -44,7 +44,7 @@
\foreach \x/\d in {1/2em, 2/8em, 3/16em}
\node[unit,fill=yellow!30] at (0,\d) (ln_\x) {层标准化};
\node[unit,fill=orange!30] at (0,\d) (ln_\x) {层标准化};
\foreach \x/\d in {1/6em, 2/14em, 3/20em}
\node[draw,circle,minimum size=1em,inner sep=1pt] at (0,\d) (add_\x) {\scriptsize\bfnew{+}};
......@@ -52,8 +52,8 @@
\node[unit,fill=red!30] at (0,4em) (glu_1) {门控线性单元:512};
\node[unit,fill=red!30] at (-3em,10em) (conv_1) {卷积$1 \times 1$:2048};
\node[unit,fill=cyan!30] at (3em,10em) (conv_2) {卷积$3 \times 1$:256};
\node[unit,fill=blue!30] at (-3em,12em) (relu_1) {RELU};
\node[unit,fill=blue!30] at (3em,12em) (relu_2) {RELU};
\node[unit,fill=blue!30] at (-3em,12em) (relu_1) {ReLU};
\node[unit,fill=blue!30] at (3em,12em) (relu_2) {ReLU};
\node[unit,fill=cyan!30] at (0em,18em) (conv_3) {Sep卷积$9 \times 1$:256};
......@@ -83,7 +83,7 @@
\node[minimum size=0.8em,inner sep=0pt,rounded corners=1pt,draw,fill=blue!30] (act) at (8em, 20em){};
\node[anchor=west,font=\footnotesize] at ([xshift=0.1em]act.east){激活函数};
\node[anchor=north,minimum size=0.8em,inner sep=0pt,rounded corners=1pt,draw,fill=yellow!30] (nor) at ([yshift=-0.6em]act.south){};
\node[anchor=north,minimum size=0.8em,inner sep=0pt,rounded corners=1pt,draw,fill=orange!30] (nor) at ([yshift=-0.6em]act.south){};
\node[anchor=west,font=\footnotesize] at ([xshift=0.1em]nor.east){层标准化};
\node[anchor=north,minimum size=0.8em,inner sep=0pt,rounded corners=1pt,draw,fill=cyan!30] (wc) at ([yshift=-0.6em]nor.south){};
\node[anchor=west,font=\footnotesize] at ([xshift=0.1em]wc.east){宽卷积};
......
\begin{tikzpicture}
\tikzstyle{opnode}=[rectangle,inner sep=0mm,minimum height=2em,minimum width=4em,rounded corners=5pt,fill=teal!30,draw,thick,drop shadow]
\tikzstyle{opnode}=[rectangle,inner sep=0mm,minimum height=2em,minimum width=4em,rounded corners=5pt,fill=orange!30,draw,thick,drop shadow]
\tikzstyle{cnode}=[circle,draw,minimum size=1.2em]
\tikzstyle{mnode}=[rectangle,inner sep=0mm,minimum height=5em,minimum width=11em,rounded corners=5pt,fill=yellow!30,draw,thick,drop shadow]
\tikzstyle{wnode}=[inner sep=0mm,minimum height=1.5em]
......@@ -41,7 +41,7 @@
\node[anchor=south,cnode,fill=white] (cl1) at ([xshift=-4em,yshift=1.5em]m1.south){};
\node[anchor=north,cnode,fill=white] (cl2) at ([xshift=0em,yshift=-1em]m1.north){};
\node[anchor=south west,wnode,align=left,font=\tiny] (wl7) at ([xshift=0.5em,yshift=0em]cl1.east){使用{\color{ugreen}\bfnew{特征}}{\color{blue}\bfnew{数据}}\\中信息进行提取};
\node[anchor=south west,wnode,align=right,font=\tiny] (wl7) at ([xshift=0.5em,yshift=-1em]cl1.east){使用{\color{ugreen}\bfnew{特征}}{\color{blue}\bfnew{数据}}\\中的信息进行\\提取};
\node[anchor=west,wnode,align=right,font=\tiny] (wl8) at ([xshift=0.5em,yshift=0em]cl2.east){使用提取的信息对\\{\color{red!50}\bfnew{模型}}中的参数\\进行训练};
\draw [-,thick,dashed] ([xshift=0em,yshift=0em]ml1.west) -- ([xshift=0em,yshift=0em]ml1.east);
......@@ -65,7 +65,7 @@
\node[anchor=south,cnode,fill=white] (cc1) at ([xshift=-4em,yshift=1.5em]m2.south){};
\node[anchor=north,cnode,fill=white] (cc2) at ([xshift=0em,yshift=-1em]m2.north){};
\node[anchor=south west,wnode,align=left,font=\tiny] (wl7) at ([xshift=0.5em,yshift=0em]cc1.east){使用{\color{red!50} \bfnew{模型}}{\color{blue} \bfnew{数据}}\\中信息进行提取};
\node[anchor=south west,wnode,align=right,font=\tiny] (wl7) at ([xshift=0.5em,yshift=-0.5em]cc1.east){使用{\color{red!50} \bfnew{模型}}{\color{blue} \bfnew{数据}}\\中的信息进行\\提取};
\node[anchor=west,wnode,align=right,font=\tiny] (wl8) at ([xshift=0.5em,yshift=0em]cc2.east){使用提取的信息对\\{\color{red!50} \bfnew{模型}}中的参数\\进行训练};
\draw [-,thick,dashed] ([xshift=0em,yshift=0em]mc1.west) -- ([xshift=0em,yshift=0em]mc1.east);
......
......@@ -31,7 +31,7 @@
\addplot[blue,line width=1.25pt] coordinates {(2.9706,2) (3.1706,1.79) (3.3706,1.63) (3.4656,1.572) (3.6706,1.4602) (3.7136,1.44)};
\addplot[blue,dashed,line width=1.25pt] coordinates {(3.7136,1.44) (3.7136,2)};
\addplot[blue,line width=1.25pt] coordinates {(3.7136,2) (3.9136,1.79) (4.1136,1.63) (4.2086,1.572) (4.4136,1.4602) (4.4566,1.44) (4.7000,1.3574) (5.0000,1.2531)};
\addlegendentry{\scriptsize 调整后的学习率}
\addlegendentry{\scriptsize 重置后的学习率}
\end{axis}
}
......
......@@ -6,8 +6,8 @@
\tikzstyle{every node}=[scale=0.36]
\node[draw,very thick,rounded corners=3pt,drop shadow,fill=red!30,minimum width=40em,minimum height=25em] (rec3) at (2.25,0){};
\node[draw,very thick,rounded corners=3pt,drop shadow,fill=green!30,minimum width=22em,minimum height=25em] (rec2) at (-12.4,0){};
\node[draw,very thick,rounded corners=3pt,drop shadow,fill=yellow!30,minimum width=24em,minimum height=25em] (rec1) at (-24,0){};
\node[draw,very thick,rounded corners=3pt,drop shadow,fill=yellow!30,minimum width=22em,minimum height=25em] (rec2) at (-12.4,0){};
\node[draw,very thick,rounded corners=3pt,drop shadow,fill=orange!30,minimum width=24em,minimum height=25em] (rec1) at (-24,0){};
%left
\node[] (label1) at (-26.4,4){\Huge\bfnew{结构空间}};
......
......@@ -2,7 +2,7 @@
\begin{tikzpicture}
\begin{scope}
\tikzstyle{enode}=[rectangle,inner sep=0mm,minimum height=5em,minimum width=5em,rounded corners=7pt,fill=green!30,draw,thick]
\tikzstyle{enode}=[rectangle,inner sep=0mm,minimum height=5em,minimum width=5em,rounded corners=7pt,fill=blue!30,draw,thick]
\tikzstyle{dnode}=[rectangle,inner sep=0mm,minimum height=2em,minimum width=6.5em,rounded corners=5pt,fill=red!30,draw,thick]
\tikzstyle{wnode}=[inner sep=0mm,minimum height=2em,minimum width=4em]
......
\begin{tikzpicture}
\tikzstyle{node}=[minimum height=2.5em,minimum width=8em,draw,rounded corners=2pt,thick,drop shadow]
\tikzstyle{labelnode}=[minimum height=1.8em]
\tikzstyle{word}=[minimum height=1.8em,font=\scriptsize]
\tikzfading[name=fadeouts, inner color=transparent!60,outer color=transparent!100]
\tikzstyle{wordnodes}=[inner sep=0mm,font=\footnotesize,text=white]
\tikzstyle{cnodes}=[path fading=fadeouts,minimum size=6em,fill=orange]
\tikzfading[name=fadeoutn, inner color=transparent!30,outer color=transparent!100]
\tikzstyle{wordnoden}=[inner sep=0mm,text=white]
\tikzstyle{cnoden}=[path fading=fadeoutn,minimum size=9em,fill=orange]
\tikzfading[name=fadeoutl, inner color=transparent!0,outer color=transparent!100]
\tikzstyle{wordnodel}=[inner sep=0mm,font=\Large,text=white]
\tikzstyle{cnodel}=[path fading=fadeoutl,minimum size=12em,fill=orange]
\tikzstyle{attn}=[]
\tikzstyle{rnn}=[minimum size=7em]
\tikzstyle{cnn}=[minimum size=5em]
\node[anchor=north west] (label) at (0,0){\small\bfnew{结构空间}};
\node[anchor=north west,wordnodes] (w11) at ([xshift=-0em,yshift=-2.5em]label.south){Reformer};
\node[anchor=north west,wordnodel] (w12) at ([xshift=0.2em,yshift=-1em]w11.south east){Transformer-XL};
\node[anchor=north,wordnodel] (w13) at ([xshift=-1.5em,yshift=-0.5em]w12.south){Transformer-DLCL};
\node[anchor=north,wordnodes] (w14) at ([xshift=-1em,yshift=-0.5em]w13.south){Transformer};
\node[anchor=north west,wordnodel] (w15) at ([xshift=2em,yshift=-0.5em]w14.south east){BERT};
\node[anchor=north,wordnodes] (w16) at ([xshift=-2em,yshift=-1em]w14.south){Transformer-ANN};
\node[anchor=north west,wordnodes] (w17) at ([xshift=-0em,yshift=-1em]w16.south east){Transformer-SAN};
\node[anchor=north,wordnoden] (w18) at ([xshift=-0em,yshift=-1.5em]w16.south){ALBERT};
\node[anchor=north west,wordnodes] (w19) at ([xshift=-0em,yshift=-0.5em]w18.south east){universal Transformer};
\node[anchor=north west,word] (we1) at ([xshift=-0em,yshift=0.3em]w11.north west){};
\node[anchor=south east,word] (we2) at ([xshift=-0em,yshift=-0.3em]w19.south east){};
\begin{pgfonlayer}{background}
\node[anchor=center,cnodes] (b11) at ([xshift=-0em,yshift=-0em]w11.center){};
\node[anchor=center,cnodel] (b12) at ([xshift=-0em,yshift=-0em]w12.center){};
\node[anchor=center,cnodel] (b13) at ([xshift=-0em,yshift=-0em]w13.center){};
\node[anchor=center,cnodes] (b14) at ([xshift=-0em,yshift=-0em]w14.center){};
\node[anchor=center,cnodel] (b15) at ([xshift=-2em,yshift=-0em]w15.center){};
\node[anchor=center,cnodes] (b16) at ([xshift=-0em,yshift=-0em]w16.center){};
\node[anchor=center,cnodes] (b17) at ([xshift=-0em,yshift=-0em]w17.center){};
\node[anchor=center,cnoden] (b18) at ([xshift=-0em,yshift=-0em]w18.center){};
\node[anchor=center,cnodes] (b19) at ([xshift=-0em,yshift=-0em]w19.center){};
\node [rectangle,inner sep=1em,draw=black,dashed,thick,rounded corners=8pt] [fit = (w11) (w15) (w18) (w19) (we1) (we2)] (box1) {};
\node[anchor=center,cnodes] (bb1) at ([xshift=1em,yshift=-0em]w18.east){};
\node[anchor=center,cnodes] (bb2) at ([xshift=-0.5em,yshift=-0em]w13.west){};
\node[anchor=center,cnodes] (bb3) at ([xshift=-0.5em,yshift=0.5em]w18.west){};
\end{pgfonlayer}
\node[anchor=south,word] (l1) at ([xshift=-0em,yshift=-0.5em]box1.north){基于注意力的结构};
\node[anchor=south west,wordnoden] (w21) at ([xshift=6.5em,yshift=1em]w12.north east){SRU};
\node[anchor=north west,wordnodel] (w22) at ([xshift=0.2em,yshift=0.5em]w21.south east){GRU};
\node[anchor=north west,wordnoden] (w23) at ([xshift=0em,yshift=0em]w22.south east){RNN};
\node[anchor=north,wordnoden] (w24) at ([xshift=0em,yshift=-1.5em]w22.south){LSTM};
\node[anchor=north,wordnodel] (w25) at ([xshift=0em,yshift=-0.5em]w24.south){Bi-LSTM};
\begin{pgfonlayer}{background}
\node[anchor=center,cnoden,rnn] (b21) at ([xshift=-0em,yshift=-0em]w21.center){};
\node[anchor=center,cnodel,rnn] (b22) at ([xshift=-0em,yshift=-0em]w22.center){};
\node[anchor=center,cnoden,rnn] (b23) at ([xshift=-0em,yshift=-0em]w23.center){};
\node[anchor=center,cnoden,rnn] (b24) at ([xshift=-0em,yshift=-0em]w24.center){};
\node[anchor=center,cnodel,rnn] (b25) at ([xshift=-0em,yshift=-0em]w25.center){};
\node [rectangle,inner sep=1em,draw=black,dashed,thick,rounded corners=8pt] [fit = (w21) (w25) (w23)] (box2) {};
\node[anchor=center,cnodes] (bb4) at ([xshift=-0.5em,yshift=-0em]w24.west){};
\node[anchor=center,cnodes] (bb5) at ([xshift=0.5em,yshift=-0em]w24.west){};
\node[anchor=south east,cnodel,minimum size=4em] (bb6) at ([xshift=0em,yshift=1em]w21.north west){};
\node[anchor=south west,cnodel,minimum size=4em] (bb7) at ([xshift=-0.5em,yshift=0.5em]w23.north east){};
\node[anchor=west,cnodel,minimum size=4em] (bb8) at ([xshift=-0em,yshift=0em]w23.east){};
\node[anchor=south,cnodel,minimum size=4em] (bb9) at ([xshift=0.2em,yshift=0em]bb8.north){};
\end{pgfonlayer}
\node[anchor=south,word] (l2) at ([xshift=-0em,yshift=-0.5em]box2.north){基于循环单元的结构};
\node[anchor= west,wordnoden] (w31) at ([xshift=4em,yshift=-0.2em]w17.east){GoogleNet};
\node[anchor=north,wordnodes] (w32) at ([xshift=2em,yshift=-0.2em]w31.south){ResNet};
\node[anchor=north,wordnoden] (w33) at ([xshift=0em,yshift=-1.5em]w31.south){LeNet};
\node[anchor=east,wordnoden] (w34) at ([xshift=2.5em,yshift=0.4em]w32.east){CNN};
\node[anchor=south west,wordnoden] (w35) at ([xshift=0.5em,yshift=0.3em]w34.north east){AlexNet};
\node[anchor=north,wordnodel] (w36) at ([xshift=-1em,yshift=-2em]w35.south){VGG-Net};
\begin{pgfonlayer}{background}
\node[anchor=center,cnodel,cnn] (b31) at ([xshift=-0.5em,yshift=-0em]w31.center){};
\node[anchor=center,cnoden,cnn] (b32) at ([xshift=-0em,yshift=-0em]w32.center){};
\node[anchor=center,cnodel,cnn] (b33) at ([xshift=-0em,yshift=-0em]w33.center){};
\node[anchor=center,cnodel,cnn] (b34) at ([xshift=-0em,yshift=-0em]w34.center){};
\node[anchor=center,cnoden,cnn] (b35) at ([xshift=-0em,yshift=-0em]w35.center){};
\node[anchor=center,cnodel,cnn] (b36) at ([xshift=-0em,yshift=-0em]w36.center){};
\node [rectangle,inner sep=0.5em,draw=black,dashed,thick,rounded corners=8pt] [fit = (w31) (w33) (w35) (w36)] (box3) {};
\node[anchor=center,cnodes] (bb10) at ([xshift=1em,yshift=-0em]w31.west){};
\node[anchor=center,cnodes] (bb11) at ([xshift=0.5em,yshift=-0em]w34.west){};
\node[anchor=center,cnodes] (bb12) at ([xshift=0em,yshift=1em]w34.north){};
\end{pgfonlayer}
\node[anchor=south,word] (l3) at ([xshift=-0em,yshift=-0.5em]box3.north){基于卷积单元的结构};
\node [rectangle,inner sep=1em,draw=black,very thick,rounded corners=8pt] [fit = (label) (box1) (box2) (box3)] (box4) {};
\node[anchor=south east,word,text=ublue] (l4) at ([xshift=-0em,yshift=0em]box4.north east){颜色越深表示模型对当前任务的建模能力越强};
\end{tikzpicture}
\ No newline at end of file
......@@ -4,9 +4,9 @@
\begin{tikzpicture}
\tikzstyle{node}=[minimum height=6em,inner sep=4pt,align=left,draw,font=\footnotesize,rounded corners=4pt,thick,drop shadow]
\node[node,fill=red!30] (n1) at (0,0){\scriptsize\bfnew{超网络} \\ [1ex] 模型结构参数 \\[0.4ex] 网络参数};
\node[anchor=west,node,fill=yellow!30] (n2) at ([xshift=4em]n1.east){\scriptsize\bfnew{优化后的超网络} \\ [1ex]模型{\color{red}结构参数}(已优化) \\ [0.4ex]网络参数(已优化)};
\node[anchor=west,node,fill=green!30] (n3) at ([xshift=6em]n2.east){\scriptsize\bfnew{找到的模型结构}};
\node[node,fill=orange!30] (n1) at (0,0){\scriptsize\bfnew{超网络} \\ [1ex] 模型结构参数 \\[0.4ex] 网络参数};
\node[anchor=west,node,fill=yellow!30] (n2) at ([xshift=4em]n1.east){\scriptsize\bfnew{优化后的超网络} \\ [1ex]模型{\color{red}结构参数}(已优化) \\ [0.4ex]网络参数(已优化)};
\node[anchor=west,node,fill=red!30] (n3) at ([xshift=6em]n2.east){\scriptsize\bfnew{找到的模型结构}};
\draw[-latex,thick] (n1.0) -- node[above,align=center,font=\scriptsize]{优化后的\\超网络}(n2.180);
\draw[-latex,thick] (n2.0) -- node[above,align=center,font=\scriptsize]{根据结构参数\\离散化结构}(n3.180);
......
......@@ -5,7 +5,7 @@
\tikzstyle{node}=[minimum height=2.5em,minimum width=8em,draw,rounded corners=2pt,thick,drop shadow]
\node[node,fill=red!30] (n1) at (0,0){\small\bfnew{环境}};
\node[anchor=south,node,fill=green!30] (n2) at ([yshift=5em]n1.north){\small\bfnew{智能体}};
\node[anchor=south,node,fill=blue!30] (n2) at ([yshift=5em]n1.north){\small\bfnew{智能体}};
\node[anchor=north,font=\footnotesize] at ([yshift=-0.2em]n1.south){(结构所应用于的任务)};
\node[anchor=south,font=\footnotesize] at ([yshift=0.2em]n2.north){(结构生成器)};
......
......@@ -48,6 +48,9 @@
\node [anchor=north,rotate=90] (n2) at (5.4cm,1cm) {\scriptsize 训练集\ PPL};
\node [anchor=north,rotate=90] (n3) at (4.2cm,1cm) {\scriptsize 校验集\ PPL};
\node [anchor=north,rotate=90] (n4) at (10.7cm,1cm) {\scriptsize 校验集\ PPL};
\node [anchor=north] (label1) at (1.6cm,-1.2cm) {\small (a)浅层模型};
\node [anchor=north] (label2) at (8.4cm,-1.2cm) {\small (b)深层模型};
\end{tikzpicture}
%---------------------------------------------------------------------
\ No newline at end of file
......@@ -8,7 +8,7 @@
\begin{tikzpicture}[scale=0.6]
\begin{scope}
{\footnotesize
{\small
\foreach \i in {1,...,5}{
\node [draw,thick,minimum size=10pt] at (1.2*\i,0) {1};
}
......@@ -17,7 +17,7 @@
\end{scope}
\begin{scope}[yshift=-2.5em]
{\footnotesize
{\small
\foreach \i in {1,...,4}{
\node [draw,thick,minimum size=10pt] at (1.2*\i,0) {{\color{red} 2}};
}
......@@ -26,7 +26,7 @@
\end{scope}
\begin{scope}[yshift=-5.0em]
{\footnotesize
{\small
\foreach \i in {1,...,6}{
\node [draw,thick,minimum size=10pt] at (1.2*\i,0) {{\color{ublue} 3}};
}
......@@ -35,7 +35,7 @@
\end{scope}
\begin{scope}[yshift=-7.5em]
{\footnotesize
{\small
\foreach \i in {1,...,12}{
\node [draw,thick,minimum size=10pt] at (1.2*\i,0) {{\color{ugreen} 4}};
}
......@@ -44,7 +44,7 @@
\end{scope}
\begin{scope}[yshift=-10.0em]
{\footnotesize
{\small
\foreach \i in {1,...,2}{
\node [draw,thick,minimum size=10pt] at (1.2*\i,0) {{\color{purple} 5}};
}
......@@ -53,7 +53,7 @@
\end{scope}
\begin{scope}[yshift=-12.5em]
{\footnotesize
{\small
\foreach \i in {1,...,1}{
\node [draw,thick,minimum size=10pt] at (1.2*\i,0) {{\color{orange} 6}};
}
......
......@@ -118,11 +118,11 @@ F(x)=\int_{-\infty}^x f(x)\textrm{d}x
\begin{itemize}
\vspace{0.5em}
\item 边缘概率:矩形$A$或者矩形$B$的面积。
\item {\small\bfnew{边缘概率}}:矩形$A$或者矩形$B$的面积。
\vspace{0.5em}
\item 联合概率:矩形$C$的面积。
\item {\small\bfnew{联合概率}}:矩形$C$的面积。
\vspace{0.5em}
\item 条件概率:联合概率/对应的边缘概率,如:$\funp{P}(A \mid B)$=矩形$C$的面积/矩形B的面积。
\item {\small\bfnew{条件概率}}:联合概率/对应的边缘概率,如:$\funp{P}(A \mid B)$=矩形$C$的面积/矩形B的面积。
\vspace{0.5em}
\end{itemize}
......@@ -277,9 +277,9 @@ F(x)=\int_{-\infty}^x f(x)\textrm{d}x
\begin{itemize}
\vspace{0.5em}
\item 非负性,即$\funp{D}_{\textrm{KL}} (\funp{P} \parallel \funp{Q}) \ge 0$,等号成立条件是$\funp{P}$$\funp{Q}$相等。
\item {\small\bfnew{非负性}},即$\funp{D}_{\textrm{KL}} (\funp{P} \parallel \funp{Q}) \ge 0$,等号成立条件是$\funp{P}$$\funp{Q}$相等。
\vspace{0.5em}
\item 不对称性,即$\funp{D}_{\textrm{KL}} (\funp{P} \parallel \funp{Q}) \neq \funp{D}_{\textrm{KL}} (\funp{Q} \parallel \funp{P})$,所以$\textrm{KL}$距离并不是常用的欧式空间中的距离。为了消除这种不确定性,有时也会使用$\funp{D}_{\textrm{KL}} (\funp{P} \parallel \funp{Q})+\funp{D}_{\textrm{KL}} (\funp{Q} \parallel \funp{P})$作为度量两个分布差异性的函数。
\item {\small\bfnew{不对称性}},即$\funp{D}_{\textrm{KL}} (\funp{P} \parallel \funp{Q}) \neq \funp{D}_{\textrm{KL}} (\funp{Q} \parallel \funp{P})$,所以$\textrm{KL}$距离并不是常用的欧式空间中的距离。为了消除这种不确定性,有时也会使用$\funp{D}_{\textrm{KL}} (\funp{P} \parallel \funp{Q})+\funp{D}_{\textrm{KL}} (\funp{Q} \parallel \funp{P})$作为度量两个分布差异性的函数。
\vspace{0.5em}
\end{itemize}
......@@ -850,13 +850,13 @@ c(\cdot) & \textrm{当计算最高阶模型时} \\
\begin{itemize}
\vspace{0.5em}
\item 完备性:当问题有解时,使用该策略能否找到问题的解。
\item {\small\bfnew{完备性}}:当问题有解时,使用该策略能否找到问题的解。
\vspace{0.5em}
\item 最优性:搜索策略能否找到最优解。
\item {\small\bfnew{最优性}}:搜索策略能否找到最优解。
\vspace{0.5em}
\item 时间复杂度:找到最优解需要多长时间。
\item {\small\bfnew{时间复杂度}}:找到最优解需要多长时间。
\vspace{0.5em}
\item 空间复杂度:执行策略需要多少内存。
\item {\small\bfnew{空间复杂度}}:执行策略需要多少内存。
\vspace{0.5em}
\end{itemize}
......
......@@ -496,7 +496,7 @@ g(\seq{s},\seq{t}) & \equiv & \prod_{j,i \in \widehat{A}}{\funp{P}(s_j,t_i)} \ti
%----------------------------------------------------------------------------------------
% NEW SECTION
%----------------------------------------------------------------------------------------
\sectionnewpage
\section{噪声信道模型}
\vspace{0.5em}
......@@ -571,7 +571,7 @@ g(\seq{s},\seq{t}) & \equiv & \prod_{j,i \in \widehat{A}}{\funp{P}(s_j,t_i)} \ti
%----------------------------------------------------------------------------------------
% NEW SECTION
%----------------------------------------------------------------------------------------
\sectionnewpage
\section{统计机器翻译的三个基本问题}
\parinterval 公式\eqref{eq:5-17}给出了统计机器翻译的数学描述。为了实现这个过程,面临着三个基本问题:
......
......@@ -31,7 +31,6 @@
% NEW SECTION
%----------------------------------------------------------------------------------------
\sectionnewpage
\section{基于扭曲度的模型}
下面将介绍扭曲度在机器翻译中的定义及使用方法。这也带来了两个新的翻译模型\ \dash\ IBM模型2\upcite{DBLP:journals/coling/BrownPPM94}和HMM\upcite{vogel1996hmm}
......
......@@ -30,7 +30,7 @@
%----------------------------------------------------------------------------------------
% NEW SECTION
%----------------------------------------------------------------------------------------
\sectionnewpage
\section{翻译中的短语信息}
不难发现,基于单词的模型并不能很好地捕捉单词间的搭配关系。相比之下,使用更大颗粒度的翻译单元是一种对搭配进行处理的方法。下面来一起看看,基于单词的模型所产生的问题以及如何使用基于短语的模型来缓解该问题。
......
......@@ -30,7 +30,7 @@
%----------------------------------------------------------------------------------------
% NEW SECTION
%----------------------------------------------------------------------------------------
\sectionnewpage
\section{翻译中句法信息的使用}
\parinterval 使用短语的优点在于可以捕捉到具有完整意思的连续词串,因此能够对局部上下文信息进行建模。当单词之间的搭配和依赖关系出现在连续词串中时,短语可以很好地对其进行描述。但是,当单词之间距离很远时,使用短语的“效率”很低。同$n$-gram语言模型一样,当短语长度变长时,数据会变得非常稀疏。比如,很多实验已经证明,如果在测试数据中有一个超过5个单词的连续词串,那么它在训练数据中往往是很低频的现象,更长的短语甚至都很难在训练数据中找到。
......
......@@ -52,7 +52,7 @@
\node [secnode,anchor=south west,fill=cyan!20,minimum width=14.0em,align=center] (sec13) at ([yshift=0.5em,xshift=0.5em]part4.south west) {第十三章\hspace{1em} 神经机器翻译模型训练};
\node [secnode,anchor=west,fill=cyan!20,minimum width=14.0em,align=center] (sec14) at ([xshift=0.6em]sec13.east) {第十四章\hspace{1em} 神经机器翻译模型推断};
\node [secnode,anchor=south west,fill=green!30,minimum width=9em,minimum height=4.5em,align=center] (sec15) at ([yshift=0.8em]sec13.north west) {第十五章\\ 神经机器翻译 \\ 结构优化};
\node [secnode,anchor=south west,fill=green!30,minimum width=9em,minimum height=4.5em,align=center] (sec16) at ([xshift=0.8em]sec15.south east) {第十六章\\ 低资源 \\ 机器翻译};
\node [secnode,anchor=south west,fill=green!30,minimum width=9em,minimum height=4.5em,align=center] (sec16) at ([xshift=0.8em]sec15.south east) {第十六章\\ 低资源 \\ 神经机器翻译};
\node [secnode,anchor=south west,fill=green!30,minimum width=9em,minimum height=4.5em,align=center] (sec17) at ([xshift=0.8em]sec16.south east) {第十七章\\ 多模态、多层次 \\ 机器翻译};
\node [secnode,anchor=south west,fill=amber!25,minimum width=28.7em,align=center] (sec18) at ([yshift=0.8em]sec15.north west) {第十八章\hspace{1em} 机器翻译应用技术};
\node [rectangle,draw,dotted,thick,inner sep=0.1em,fill opacity=1] [fit = (sec13) (sec14)] (nmtbasebox) {};
......
......@@ -89,7 +89,7 @@
\noindent 顾问:姚天顺\ \ 王宝库\\
\noindent \textsc{\url{https://opensource.niutrans.com/mtbook/index.html}}\\
\noindent \textsc{\url{https://opensource.niutrans.com/mtbook/homepage.html}}\\
\noindent \textsc{\url{https://github.com/NiuTrans/MTBook}}\\
\noindent {\red{Licensed under the Creative Commons Attribution-NonCommercial 4.0 Unported License (the ``License''). You may not use this file except in compliance with the License. You may obtain a copy of the License at \url{http://creativecommons.org/licenses/by-nc/4.0}. Unless required by applicable law or agreed to in writing, software distributed under the License is distributed on an \textsc{``as is'' basis, without warranties or conditions of any kind}, either express or implied. See the License for the specific language governing permissions and limitations under the License.}}\\
......@@ -144,7 +144,7 @@
%\include{Chapter10/chapter10}
%\include{Chapter11/chapter11}
%\include{Chapter12/chapter12}
\include{Chapter13/chapter13}
%\include{Chapter13/chapter13}
%\include{Chapter14/chapter14}
%\include{Chapter15/chapter15}
%\include{Chapter16/chapter16}
......
Markdown 格式
0%
您添加了 0 到此讨论。请谨慎行事。
请先完成此评论的编辑!
注册 或者 后发表评论