Commit b0ec92cf by 曹润柘

合并分支 'caorunzhe' 到 'master'

add chapter7

查看合并请求 !28
parents b72acbc8 78eaebf6
...@@ -7,11 +7,11 @@ ...@@ -7,11 +7,11 @@
\begin{tikzpicture} \begin{tikzpicture}
\newlength{\mystep} %\newlength{\mystep}
\newlength{\wseg} %\newlength{\wseg}
\newlength{\hseg} %\newlength{\hseg}
\newlength{\wnode} %\newlength{\wnode}
\newlength{\hnode} %\newlength{\hnode}
\setlength{\wseg}{1.5cm} \setlength{\wseg}{1.5cm}
\setlength{\hseg}{1.0cm} \setlength{\hseg}{1.0cm}
......
This source diff could not be displayed because it is too large. You can view the blob instead.
%%%------------------------------------------------------------------------------------------------------------
%%% 调序模型1:基于距离的调序
\begin{center}
\begin{tikzpicture}
\begin{scope}[minimum height = 20pt]
\node [anchor=east] (x1) at (-0.5em, 0) {$x_l$};
\node [anchor=west,draw=green,fill=green!20,inner xsep=5pt] (F1) at ([xshift=2em]x1.east){$\mathcal{F}$};
\node [anchor=west,circle,draw,minimum size=1em] (n1) at ([xshift=2em]F1.east) {};
\node [anchor=west,draw=green,fill=green!20,inner xsep=5pt] (ln1) at ([xshift=2em]n1.east){\textrm{LN}};
\node [anchor=west] (x2) at ([xshift=2em]ln1.east) {$x_{l+l}$};
\node [anchor=north] (x3) at ([yshift=-5em]x1.south) {$x_l$};
\node [anchor=west,draw=green,fill=green!20,inner xsep=5pt] (F2) at ([xshift=2em]x3.east){$\mathcal{F}$};
\node [anchor=west,draw=green,fill=green!20,inner xsep=5pt] (ln2) at ([xshift=2em]F2.east){\textrm{LN}};
\node [anchor=west,circle,draw,,minimum size=1em] (n2) at ([xshift=2em]ln2.east){};
\node [anchor=west] (x4) at ([xshift=2em]n2.east) {$x_{l+l}$};
\draw[->, line width=1pt] ([xshift=-0.1em]x1.east)--(F1.west);
\draw[->, line width=1pt] ([xshift=-0.1em]F1.east)--(n1.west);
\draw[->, line width=1pt] (n1.east)--node[above]{$y_l$}(ln1.west);
\draw[->, line width=1pt] ([xshift=-0.1em]ln1.east)--(x2.west);
\draw[->, line width=1pt] ([xshift=-0.1em]x3.east)--(F2.west);
\draw[->, line width=1pt] ([xshift=-0.1em]F2.east)--(ln2.west);
\draw[->, line width=1pt] ([xshift=0.1em]ln2.east)--node[above]{$y_l$}(n2.west);
\draw[->, line width=1pt] (n2.east)--(x4.west);
\draw[->, line width=1pt] (x1.north) -- ([yshift=1em]x1.north) -- ([yshift=1.4em]n1.north) -- (n1.north);
\draw[->, line width=1pt] (x3.north) -- ([yshift=1em]x3.north) -- ([yshift=1.4em]n2.north) -- (n2.north);
\draw[-] (n1.west)--(n1.east);
\draw[-] (n1.north)--(n1.south);
\draw[-] (n2.west)--(n2.east);
\draw[-] (n2.north)--(n2.south);
\node [anchor=south] (k1) at ([yshift=-0.1em]x1.north){};
\node [anchor=south] (k2) at ([yshift=-0.1em]x3.north){};
\begin{pgfonlayer}{background}
\node [rectangle,inner sep=0.3em,fill=orange!10] [fit = (x1) (F1) (n1) (ln1) (x2) (k1)] (box0) {};
\node [rectangle,inner sep=0.3em,fill=blue!10] [fit = (x3) (F2) (n2) (ln2) (x4) (k2)] (box1) {};
\end{pgfonlayer}
\node [anchor=north] (c1) at (box0.south){\small (a)后作方式的残差连接};
\node [anchor=north] (c2) at (box1.south){\small (b)前作方式的残差连接};
\end{scope}
\end{tikzpicture}
\end{center}
\ No newline at end of file
\begin{tikzpicture}
\begin{scope}
\node [anchor=north,rectangle, inner sep=0mm,minimum height=1.2em,minimum width=2em,rounded corners=5pt,thick] (n1) at (0, 0) {编码端};
\node [anchor=west,rectangle, inner sep=0mm,minimum height=1.2em,minimum width=0em,rounded corners=5pt,thick] (n2) at ([xshift=3.5em,yshift=-0.5em]n1.east) {$z_0$};
\node [anchor=west,rectangle,draw, inner sep=0mm,minimum height=1.2em,minimum width=3em,fill=orange!20,rounded corners=5pt,thick] (n3) at ([xshift=3.5em,yshift=0em]n2.east) {$z_1$};
\node [anchor=west,rectangle,draw, inner sep=0mm,minimum height=1.2em,minimum width=3em,fill=orange!20,rounded corners=5pt,thick] (n4) at ([xshift=3.5em,yshift=0em]n3.east) {$z_2$};
\node [anchor=west,rectangle, inner sep=0mm,minimum height=1.2em,minimum width=1em,rounded corners=5pt,thick] (n6) at ([xshift=1.5em,yshift=0em]n4.east) {$\ldots$};
\node [anchor=west,rectangle,draw, inner sep=0mm,minimum height=1.2em,minimum width=3em,fill=orange!20,rounded corners=5pt,thick] (n5) at ([xshift=3.5em,yshift=0em]n6.east) {$z_{l}$};
\node [anchor=west,rectangle,draw, inner sep=0mm,minimum height=1.2em,minimum width=3em,fill=orange!20,rounded corners=5pt,thick] (n7) at ([xshift=1.5em,yshift=0em]n5.east) {$z_{l+1}$};
\node [anchor=north,rectangle,draw, inner sep=0mm,minimum height=1.2em,minimum width=15em,fill=teal!17,rounded corners=5pt,thick] (n8) at ([xshift=0em,yshift=-3em]n4.south) {层正则化};
\node [anchor=north,rectangle,draw, inner sep=0mm,minimum height=1.2em,minimum width=15em,fill=purple!17,rounded corners=5pt,thick] (n9) at ([xshift=0em,yshift=-1em]n8.south) {$L_0\ \quad L_1\ \quad L_2\quad \ldots \quad\ L_l$};
\node [anchor=north,rectangle,draw, inner sep=0mm,minimum height=1.2em,minimum width=15em,fill=teal!17,rounded corners=5pt,thick] (n10) at ([xshift=0em,yshift=-2em]n9.south) {权重累加};
\node [anchor=west,rectangle, inner sep=0mm,minimum height=1.2em, rounded corners=5pt,thick] (n11) at ([xshift=0em,yshift=-4.5em]n1.west) {聚合网络};
\node [anchor=east,rectangle, inner sep=0mm,minimum height=1.2em,minimum width=9em,rounded corners=5pt,thick] (n12) at ([xshift=0em,yshift=-4.5em]n7.east) {};
\node [anchor=south,rectangle, inner sep=0mm,minimum height=1em,minimum width=1em,rounded corners=5pt,thick] (n13) at ([xshift=0em,yshift=1em]n8.north) {};
\begin{pgfonlayer}{background}
{
\node[rectangle,inner sep=2pt,fill=blue!7] [fit = (n1) (n7) (n13)] (bg1) {};
\node[rectangle,inner sep=2pt,fill=red!7] [fit = (n10) (n8) (n11) (n12)] (bg2) {};
}
\end{pgfonlayer}
\draw[->,thick] ([xshift=0.5em,yshift=-0em]n2.south)..controls +(south:2em) and +(north:2em)..([xshift=-0em,yshift=-0em]n8.north) ;
\draw[->,thick] ([xshift=-0em,yshift=-0em]n3.south)..controls +(south:2em) and +(north:2em)..([xshift=-0em,yshift=-0em]n8.north) ;
\draw[->,thick] ([xshift=-0em,yshift=-0em]n5.south)..controls +(south:2em) and +(north:2em)..([xshift=-0em,yshift=-0em]n8.north) ;
\draw [->,thick] ([xshift=0em,yshift=0em]n4.south) -- ([xshift=0em,yshift=0em]n8.north);
\draw [->,thick] ([xshift=0em,yshift=0em]n8.south) -- ([xshift=0em,yshift=0em]n9.north);
\draw[->,thick] ([xshift=-4.5em,yshift=-0em]n9.south)..controls +(south:0.8em) and +(north:0.8em)..([xshift=-0em,yshift=-0em]n10.north) ;
\draw[->,thick] ([xshift=-2em,yshift=-0em]n9.south)..controls +(south:0.8em) and +(north:0.8em)..([xshift=-0em,yshift=-0em]n10.north) ;
\draw[->,thick] ([xshift=0em,yshift=-0em]n9.south)..controls +(south:0.8em) and +(north:0.8em)..([xshift=-0em,yshift=-0em]n10.north) ;
\draw[->,thick] ([xshift=4.5em,yshift=-0em]n9.south)..controls +(south:0.8em) and +(north:0.8em)..([xshift=-0em,yshift=-0em]n10.north) ;
\draw[->,thick] ([xshift=0em,yshift=-0em]n10.east)..controls +(east:5em) and +(south:1.5em)..([xshift=-0em,yshift=-0em]n7.south) ;
\end{scope}
\end{tikzpicture}
\ No newline at end of file
%%%------------------------------------------------------------------------------------------------------------
%%% 短语系统的问题 - 一个实例
\begin{center}
\begin{tikzpicture}
\begin{scope}[scale=0.7]
\node [anchor=east,fill=red!50,draw,rounded corners=3pt] (s11) at (-0.5em, 0) {sublayer1};
\node [anchor=west,draw,circle,line width=1pt] (c11) at ([xshift=2em]s11.east) {};
\node [anchor=north,fill=red!10,draw,dashed,rounded corners=3pt] (s21) at ([yshift=-3em]s11.south) {sublayer1};
\node [anchor=west, draw,circle,dashed,line width=1pt] (c21) at ([xshift=2em]s21.east) {};
\node [anchor=west,fill=red!10,draw,dashed,rounded corners=3pt] (s22) at ([xshift=2em]c21.east) {sublayer2};
\node [anchor=west, draw,circle,dashed,line width=1pt] (c22) at ([xshift=2em]s22.east) {};
\node [anchor=north,fill=red!50,draw,rounded corners=3pt] (s31) at ([yshift=-3em]s21.south) {sublayer1};
\node [anchor=west,draw,circle,line width=1pt] (c31) at ([xshift=2em]s31.east) {};
\node [anchor=north,fill=red!10,draw,dashed,rounded corners=3pt] (s41) at ([yshift=-3em]s31.south) {sublayer1};
\node [anchor=east, draw,circle,line width=1pt] (c44) at ([xshift=-2em]s41.west) {};
\node [anchor=west, draw,circle,dashed,line width=1pt] (c41) at ([xshift=2em]s41.east) {};
\node [anchor=west,fill=red!10,draw,dashed,rounded corners=3pt] (s42) at ([xshift=2em]c41.east) {sublayer2};
\node [anchor=west, draw,circle,dashed,line width=1pt] (c42) at ([xshift=2em]s42.east) {};
\node [anchor=west,fill=red!50,draw,rounded corners=3pt] (s43) at ([xshift=2em]c42.east) {sublayer3};
\node [anchor=west, draw,circle,line width=1pt] (c43) at ([xshift=2em]s43.east) {};
\draw[-,rounded corners,line width=1pt] (c44.east) -- ([xshift=0.8em]c44.east) -- ([xshift=-1.2em,yshift=2.2em]s11.west) -- ([xshift=2.7em,,yshift=2.2em]s11.east) -- (c11.north);
\draw[-,rounded corners,line width=1pt] (c44.east) -- ([xshift=0.8em]c44.east) -- ([xshift=-1.2em]s11.west) -- (s11.west);
\draw[-,rounded corners,line width=1pt] (s11.east) -- (c11.west);
\draw[-,rounded corners,line width=1pt] (c11.east) -- ([xshift=11.3em]c11.east) -- (c22.north);
\draw[-,rounded corners,line width=1pt,dashed] (c44.east) -- ([xshift=0.8em]c44.east) -- ([xshift=-1.2em,yshift=2.2em]s21.west) -- ([xshift=2.7em,,yshift=2.2em]s21.east) -- (c21.north);
\draw[-,rounded corners,line width=1pt,dashed] (c44.east) -- ([xshift=0.8em]c44.east) -- ([xshift=-1.2em]s21.west) -- (s21.west);
\draw[-,rounded corners,line width=1pt,dashed] (s21.east) -- (c21.west);
\draw[-,rounded corners,line width=1pt,dashed] (c21.east) -- (s22.west);
\draw[-,rounded corners,line width=1pt,dashed] (s22.east) -- (c22.west);
\draw[-,rounded corners,line width=1pt] (c22.east) -- ([xshift=11.3em]c22.east) -- (c43.north);
\draw[-,rounded corners,line width=1pt] (c44.east) -- ([xshift=0.8em]c44.east) -- ([xshift=-1.2em,yshift=2.2em]s31.west) -- ([xshift=2.7em,,yshift=2.2em]s31.east) -- (c31.north);
\draw[-,rounded corners,line width=1pt] (c44.east) -- ([xshift=0.8em]c44.east) -- ([xshift=-1.2em]s31.west) -- (s31.west);
\draw[-,rounded corners,line width=1pt] (s31.east) -- (c31.west);
\draw[-,rounded corners,line width=1pt] (c31.east) -- ([xshift=11.3em]c31.east) -- (c42.north);
\draw[-,rounded corners,line width=1pt,dashed] (c44.east) -- ([xshift=0.8em]c44.east) -- ([xshift=-1.2em,yshift=2.2em]s41.west) -- ([xshift=2.7em,,yshift=2.2em]s41.east) -- (c41.north);
\draw[-,rounded corners,line width=1pt,dashed] (c44.east) -- (s41.west);
\draw[-,rounded corners,line width=1pt,dashed] (s41.east) -- (c41.west);
\draw[-,rounded corners,line width=1pt,dashed] (c41.east) -- (s42.west);
\draw[-,rounded corners,line width=1pt,dashed] (s42.east) -- (c42.west);
\draw[-,rounded corners,line width=1pt] (c42.east) -- (s43.west);
\draw[-,rounded corners,line width=1pt] (s43.east) -- (c43.west);
\draw[->,rounded corners,line width=1pt] (c43.east) -- ([xshift=2em]c43.east);
\end{scope}
\end{tikzpicture}
\end{center}
\begin{tikzpicture}
\begin{scope}
\tikzstyle{word} = [font=\scriptsize]
\tikzstyle{model} = [rectangle,draw,minimum height=3em,minimum width=6em,rounded corners=4pt,fill=red!15!white]
\node [model,fill=blue!15!white] (ate) at (0,0) {Attention};
\node [model,minimum width=10.5em] (decoder) at ([xshift=8em]ate.east) {Decoder};
\node [word] (w1) at ([yshift=-2em,xshift=1em]decoder.south) {$x_3$};
\node [word] (w2) at ([xshift=-1em]w1.west) {\#};
\node [word] (w3) at ([xshift=-1em]w2.west) {\#};
\node [word] (w4) at ([xshift=-1em]w3.west) {\#};
\node [word] (w5) at ([xshift=1em]w1.east) {$x_4$};
\node [word] (w6) at ([xshift=1em]w5.east) {\#};
\node [word] (w7) at ([yshift=2em,xshift=1em]decoder.north) {$x_4$};
\node [word] (w8) at ([yshift=0em,xshift=-1em]w7.west) {$x_3$};
\node [word] (w9) at ([yshift=0em,xshift=1em]w7.east) {$x_5$};
\draw [->] (w1.north) -- ([yshift=1.4em]w1.north);
\draw [->] (w2.north) -- ([yshift=1.3em]w2.north);
\draw [->] (w3.north) -- ([yshift=1.3em]w3.north);
\draw [->] (w4.north) -- ([yshift=1.3em]w4.north);
\draw [->] (w5.north) -- ([yshift=1.4em]w5.north);
\draw [->] (w6.north) -- ([yshift=1.4em]w6.north);
\draw [->] ([yshift=-1.4em]w7.south) -- (w7.south);
\draw [->] ([yshift=-1.4em]w8.south) -- (w8.south);
\draw [->] ([yshift=-1.4em]w9.south) -- (w9.south);
%encoder
\node [model,minimum width=10.5em] (encoder) at ([xshift=-8em]ate.west) {Encoder};
\node [word] (we1) at ([yshift=-2em,xshift=1em]encoder.south) {\#};
\node [word] (we2) at ([xshift=-1em]we1.west) {\#};
\node [word] (we3) at ([xshift=-1em]we2.west) {$x_2$};
\node [word] (we4) at ([xshift=-1em]we3.west) {$x_3$};
\node [word] (we5) at ([xshift=1em]we1.east) {\#};
\node [word] (we6) at ([xshift=1em]we5.east) {$x_6$};
\draw [->] (we1.north) -- ([yshift=1.3em]we1.north);
\draw [->] (we2.north) -- ([yshift=1.3em]we2.north);
\draw [->] (we3.north) -- ([yshift=1.4em]we3.north);
\draw [->] (we4.north) -- ([yshift=1.4em]we4.north);
\draw [->] (we5.north) -- ([yshift=1.3em]we5.north);
\draw [->] (we6.north) -- ([yshift=1.4em]we6.north);
\draw [->,very thick] ([xshift=0.5em]encoder) -- ([xshift=-0.5em]ate);
\draw [->,very thick] ([xshift=0.5em]ate) -- ([xshift=-0.5em]decoder);
\end{scope}
\end{tikzpicture}
\ No newline at end of file
\begin{tabular}{l l l}
\begin{tikzpicture}
\draw[->, thick] (0,0) to (3,0);
\draw[->, thick] (0,-0) to (0,2);
\node (a) at (1*0.3,6*0.2) {};
\node (b) at (2*0.3,4*0.2) {};
\node (c) at (3*0.3,3*0.2) {};
\node (d) at (4*0.3,3*0.2) {};
\node (e) at (6*0.3,4*0.2) {};
\node (f) at (7*0.3,6*0.2) {};
\node (g) at (8*0.3,8.4*0.2) {};
\node (h) at (9*0.3,9.7*0.2) {};
\fill [black] (a) circle(1pt);
\fill [black] (b) circle(1pt);
\fill [black] (c) circle(1pt);
\fill [black] (d) circle(1pt);
\fill [black] (e) circle(1pt);
\fill [black] (f) circle(1pt);
\fill [black] (g) circle(1pt);
\fill [black] (h) circle(1pt);
% y=0.73x + 2.54
\draw [thick,red] (-1*0.3,1.81*0.2) to (10*0.3,9.84*0.2);
\node [font=\footnotesize] at (1.5,-0.5) {欠拟合};
\end{tikzpicture}
&\begin{tikzpicture}
\draw[->, thick] (0,0) to (3,0);
\draw[->, thick] (0,-0) to (0,2);
\node (a) at (1*0.3,6*0.2) {};
\node (b) at (2*0.3,4*0.2) {};
\node (c) at (3*0.3,3*0.2) {};
\node (d) at (4*0.3,3*0.2) {};
\node (e) at (6*0.3,4*0.2) {};
\node (f) at (7*0.3,6*0.2) {};
\node (g) at (8*0.3,8.4*0.2) {};
\node (h) at (9*0.3,9.7*0.2) {};
\fill [black] (a) circle(1pt);
\fill [black] (b) circle(1pt);
\fill [black] (c) circle(1pt);
\fill [black] (d) circle(1pt);
\fill [black] (e) circle(1pt);
\fill [black] (f) circle(1pt);
\fill [black] (g) circle(1pt);
\fill [black] (h) circle(1pt);
\draw [thick,red] (0.5*0.3,6.15*0.2) to [bend right] (5*0.3,3*0.2) ;
\draw [thick,red] (5*0.3,3*0.2) to [bend right] (8.5*0.3,10*0.2) ;
\node [font=\footnotesize] at (1.5,-0.5) {拟合合适};
\end{tikzpicture}
&\begin{tikzpicture}
\draw[->, thick] (0,0) to (3,0);
\draw[->, thick] (0,-0) to (0,2);
\node (a) at (1*0.3,6*0.2) {};
\node (b) at (2*0.3,4*0.2) {};
\node (c) at (3*0.3,3*0.2) {};
\node (d) at (4*0.3,3*0.2) {};
\node (e) at (6*0.3,4*0.2) {};
\node (f) at (7*0.3,6*0.2) {};
\node (g) at (8.4*0.3,8.4*0.2) {};
\node (h) at (9.4*0.3,9.7*0.2) {};
\fill [black] (a) circle(1pt);
\fill [black] (b) circle(1pt);
\fill [black] (c) circle(1pt);
\fill [black] (d) circle(1pt);
\fill [black] (e) circle(1pt);
\fill [black] (f) circle(1pt);
\fill [black] (g) circle(1pt);
\fill [black] (h) circle(1pt);
%0-a
\draw [thick,red] (0.2*0.3,4*0.2) to [bend left] (1*0.3,6*0.2) ;
% a-b
\draw [thick,red] (1*0.3,6*0.2) to [bend left] (2*0.3,3*0.2) ;
% b-c
\draw [thick,red] (2*0.3,3*0.2) to [bend right] (3*0.3,2.5*0.2) ;
% c-d
\draw [thick,red] (3*0.3,2.5*0.2) to [bend left] (3.5*0.3,4*0.2) ;
\draw [thick,red] (3.5*0.3,4*0.2) to [bend left] (4.3*0.3,2*0.2) ;
\draw [thick,red] (4.3*0.3,2*0.2) to [bend right] (5*0.3,1.5*0.2) ;
% d-e
\draw [thick,red] (5*0.3,1.5*0.2) to [bend right] (6.2*0.3,7*0.2) ;
\draw [thick,red] (6.2*0.3,7*0.2) to [bend right] (6.5*0.3,7*0.2) ;
% e-f
\draw [thick,red] (6.5*0.3,7*0.2) to [bend left] (7*0.3,5*0.2) ;
\draw [thick,red] (7*0.3,5*0.2) to [bend right] (7.5*0.3,4*0.2) ;
\draw [thick,red] (7.5*0.3,4*0.2) to [bend right] (8*0.3,4*0.2) ;
%
% f-g
\draw [thick,red] (8*0.3,4*0.2) to [bend right] (8*0.3,10*0.2) ;
\draw [thick,red] (8*0.3,10*0.2) to [bend left] (8.7*0.3,10*0.2) ;
% g-h
\draw [thick,red] (8.7*0.3,10*0.2) to [bend left] (9.7*0.3,9.4*0.2) ;
\node [font=\footnotesize] at (1.5,-0.5) {过拟合};
\end{tikzpicture} \\
\end{tabular}
\ No newline at end of file
\begin{tikzpicture}
\begin{scope}
\tikzstyle{node1} = [rectangle,draw,minimum height=2em,minimum width=8em,rounded corners=2pt,fill=orange!10]
\tikzstyle{node2} = [rectangle,draw,minimum height=1.3em,minimum width=10em,rounded corners=2pt,fill=blue!15!white]
\tikzstyle{node3} = [rectangle,draw,minimum height=2em,minimum width=4em,rounded corners=2pt,fill=orange!10]
\node [anchor=north,inner sep=0mm,node1] (n1) at (0,0) {Parallel\ Data};
\node [anchor=north,node2] (n2) at ([xshift=0em,yshift=-2em]n1.south) {Reverse\ NMT\ System};
\node [anchor=north,node3] (n3) at ([xshift=-3em,yshift=-2em]n2.south) {M$_{\textrm{pseudo}}$};
\node [anchor=west,node3] (n31) at ([xshift=2em,yshift=0em]n3.east) {M$_{\textrm{target}}$};
\node [anchor=north west,node1,minimum height=4em,minimum width=8em] (n4) at ([xshift=5em,yshift=0em]n1.north east) {};
\node [anchor=south west,fill=orange!10,minimum height=1.6em,minimum width=3.6em] (n41) at ([xshift=0.2em,yshift=0.2em]n4.south west) {M$_{\textrm{pseudo}}$};
\node [anchor=south east,fill=orange!10,minimum height=1.6em,minimum width=3.6em] (n42) at ([xshift=-0.2em,yshift=0.2em]n4.south east) {M$_{\textrm{target}}$};
\node [anchor=north,fill=orange!10,minimum height=1.6em,minimum width=7.6em] (n43) at ([xshift=0em,yshift=-0.2em]n4.north) {Parallel\ Data};
\node [anchor=north,node2] (n5) at ([xshift=0em,yshift=-3em]n4.south) {Final\ NMT\ System};
\draw [->,thick,black!60,line width=1mm] (n1.east) -- ([xshift=0em,yshift=1em]n4.west);
\draw [->,thick,black!20,line width=1mm] (n1.south) -- (n2.north);
\draw [->,thick,black!20,line width=1mm] (n2.south) -- ([xshift=0em,yshift=-2em]n2.south);
\draw [->,thick,black!40,line width=1mm] (n3.east) -- (n31.west);
\draw [->,thick,black!60,line width=1mm] (n31.north east) -- (n4.south west);
\draw [->,thick,black!20,line width=1mm] (n4.south) -- (n5.north);
\draw [-,thick] (n4.west) -- (n4.east);
\draw [-,thick] (n4.south) -- ([xshift=0em,yshift=2em]n4.south);
\end{scope}
\end{tikzpicture}
\ No newline at end of file
\begin{tikzpicture}
\tikzstyle{node} = [minimum height=0.8em,draw=teal,fill=teal!10]
\tikzstyle{legend} = [minimum height=0.8em,minimum width=0.8em,draw]
\tikzstyle{node2} = [minimum width=0.8em,minimum height=3.3em,draw=blue,fill=blue!10]
\node[node,minimum width=2.8em] (node1) at (0,0) {};
\node[node,minimum width=4.0em,anchor=north west] (node2) at (node1.south west) {};
\node[node,minimum width=3.2em,anchor=north west] (node3) at (node2.south west) {};
\node[node,minimum width=3.0em,anchor=north west] (node4) at (node3.south west) {};
\node[node2,anchor = north west] (grad1) at ([xshift=1.2em]node1.north east) {};
\node[node,minimum width=3.7em,anchor=north west] (node5) at (grad1.north east) {};
\node[node,minimum width=2.8em,anchor=north west] (node6) at (node5.south west) {};
\node[node,minimum width=3.2em,anchor=north west] (node7) at (node6.south west) {};
\node[node,minimum width=4.0em,anchor=north west] (node8) at (node7.south west) {};
\node[font=\scriptsize,anchor=east] (line1) at (node1.west) {gpu1};
\node[font=\scriptsize,anchor=east] (line2) at (node2.west) {gpu2};
\node[font=\scriptsize,anchor=east] (line3) at (node3.west) {gpu3};
\node[font=\scriptsize,anchor=east] (line4) at (node4.west) {gpu4};
\node[node2,anchor = north west] (grad2) at ([xshift=0.3em]node5.north east) {};
\draw[->] (-1.4em,-2.92em) -- (9em,-2.92em);
\node[node,minimum width=2.8em] (node9) at (13em,0) {};
\node[node,minimum width=4.0em,anchor=north west] (node10) at (node9.south west) {};
\node[node,minimum width=3.2em,anchor=north west] (node11) at (node10.south west) {};
\node[node,minimum width=3.0em,anchor=north west] (node12) at (node11.south west) {};
\node[node,minimum width=3.7em,anchor=north west] (node13) at (node9.north east) {};
\node[node,minimum width=2.8em,anchor=north west] (node14) at (node10.north east) {};
\node[node,minimum width=3.2em,anchor=north west] (node15) at (node11.north east) {};
\node[node,minimum width=4.0em,anchor=north west] (node16) at (node12.north east) {};
\node[node2,anchor = north west] (grad3) at ([xshift=0.5em]node13.north east) {};
\node[font=\scriptsize,anchor=east] (line1) at (node9.west) {gpu1};
\node[font=\scriptsize,anchor=east] (line2) at (node10.west) {gpu2};
\node[font=\scriptsize,anchor=east] (line3) at (node11.west) {gpu3};
\node[font=\scriptsize,anchor=east] (line4) at (node12.west) {gpu4};
\draw[->] (11.6em,-2.92em) -- (20.2em,-2.92em);
\begin{pgfonlayer}{background}
\node [rectangle,inner sep=-0.0em,draw] [fit = (node1) (node2) (node3) (node4)] (box1) {};
\node [rectangle,inner sep=-0.0em,draw] [fit = (node5) (node6) (node7) (node8)] (box2) {};
\node [rectangle,inner sep=-0.0em,draw] [fit = (node9) (node13) (node12) (node16)] (box2) {};
\end{pgfonlayer}
\node[font=\tiny,anchor=north] (legend1) at ([xshift=3em]node4.south) {一步一更新};
\node[font=\tiny,anchor=north] (legend2) at ([xshift=2.5em]node12.south) {累积两步更新};
\node[font=\tiny,anchor=north] (time1) at (grad2.south) {time};
\node[font=\tiny,anchor=north] (time1) at (grad3.south) {time};
\node[legend] (legend3) at (2em,2em) {};
\node[font=\tiny,anchor=west] (idle) at (legend3.east) {:空闲};
\node[legend,anchor=west,draw=teal,fill=teal!10] (legend4) at ([xshift = 2em]idle.east) {};
\node[font=\tiny,anchor=west] (FB) at (legend4.east) {:前向/反向};
\node[legend,anchor=west,draw=blue,fill=blue!10] (legend5) at ([xshift = 2em]FB.east) {};
\node[font=\tiny,anchor=west] (grad_sync) at (legend5.east) {:梯度更新};
\end{tikzpicture}
\ No newline at end of file
\begin{tikzpicture}
\tikzstyle{snode} = [draw,inner sep=1pt,minimum width=3em,minimum height=0.5em,rounded corners=1pt,fill=green!30!white]
\tikzstyle{pnode} = [draw,inner sep=1pt,minimum width=1em,minimum height=0.5em,rounded corners=1pt]
\node [anchor=west,snode] (s1) at (0,0) {\tiny{}};
\node [anchor=north west,snode,minimum width=6.3em] (s2) at ([yshift=-0.3em]s1.south west) {\tiny{}};
\node [anchor=north west,snode,minimum width=2em] (s3) at ([yshift=-0.3em]s2.south west) {\tiny{}};
\node [anchor=north west,snode,minimum width=5.5em] (s4) at ([yshift=-0.3em]s3.south west) {\tiny{}};
\node [anchor=north west,snode,minimum width=5.8em] (s5) at ([yshift=-0.3em]s4.south west) {\tiny{}};
\node [anchor=north west,snode,minimum width=3em] (s6) at ([yshift=-0.3em]s5.south west) {\tiny{}};
\node [anchor=east] (label1) at ([xshift=-0.8em,yshift=0.6em]s1.west) {{句子:}};
\node [anchor=west,pnode,minimum width=3em] (p1) at ([xshift=0.3em]s1.east) {\tiny{}};
\node [anchor=west,pnode,minimum width=4em] (p3) at ([xshift=0.3em]s3.east) {\tiny{}};
\node [anchor=west,pnode,minimum width=0.5em] (p4) at ([xshift=0.3em]s4.east) {\tiny{}};
\node [anchor=west,pnode,minimum width=0.2em] (p5) at ([xshift=0.3em]s5.east) {\tiny{}};
\node [anchor=west,pnode,minimum width=3em] (p6) at ([xshift=0.3em]s6.east) {\tiny{}};
\node [rectangle,inner sep=0.5em,rounded corners=2pt,very thick,dotted,draw=ugreen!80] [fit = (s1) (s6) (p1) (p6)] (box0) {};
\node[rectangle,inner sep=0.5em,rounded corners=1pt,draw,fill=blue!15] (model) at ([xshift=4em]box0.east){{Model}};
% big batch
\node [anchor=west,snode] (sbi1) at ([xshift=3em,yshift=6em]model.east) {\tiny{}};
\node [anchor=north west,snode,minimum width=6.3em] (sbi2) at ([yshift=-0.3em]sbi1.south west) {\tiny{}};
\node [anchor=north west,snode,minimum width=2em] (sbi3) at ([yshift=-0.3em]sbi2.south west) {\tiny{}};
\node [anchor=north west,snode,minimum width=5.5em] (sbi4) at ([yshift=-0.3em]sbi3.south west) {\tiny{}};
\node [anchor=north west,snode,minimum width=5.8em] (sbi5) at ([yshift=-0.3em]sbi4.south west) {\tiny{}};
\node [anchor=north west,snode,minimum width=3em] (sbi6) at ([yshift=-0.3em]sbi5.south west) {\tiny{}};
\node [anchor=east] (label1) at ([xshift=-0.8em,yshift=-1em]sbi1.west) {{大batch}};
\node [anchor=west,pnode,minimum width=3em] (pbi1) at ([xshift=0.3em]sbi1.east) {\tiny{}};
\node [anchor=west,pnode,minimum width=4em] (pbi3) at ([xshift=0.3em]sbi3.east) {\tiny{}};
\node [anchor=west,pnode,minimum width=0.5em] (pbi4) at ([xshift=0.3em]sbi4.east) {\tiny{}};
\node [anchor=west,pnode,minimum width=0.2em] (pbi5) at ([xshift=0.3em]sbi5.east) {\tiny{}};
\node [anchor=west,pnode,minimum width=3em] (pbi6) at ([xshift=0.3em]sbi6.east) {\tiny{}};
\node [rectangle,inner sep=0.5em,rounded corners=2pt,very thick,dotted,draw=ugreen!80] [fit = (sbi1) (sbi6) (pbi1) (pbi6)] (box1) {};
% small batch
\node [anchor=west,snode,minimum width=5.5em] (sma1) at ([xshift=3em,yshift=-3em]model.east) {\tiny{}};
\node [anchor=north west,snode,minimum width=5.8em] (sma2) at ([yshift=-0.3em]sma1.south west) {\tiny{}};
\node [anchor=north west,snode,minimum width=6.3em] (sma3) at ([yshift=-0.3em]sma2.south west) {\tiny{}};
\node [anchor=east] (label1) at ([xshift=-0.8em,yshift=-2em]sma1.west) {{小batch}};
\node [anchor=west,pnode,minimum width=0.5em] (pma1) at ([xshift=0.3em]sma1.east) {\tiny{}};
\node [anchor=west,pnode,minimum width=0.2em] (pma2) at ([xshift=0.3em]sma2.east) {\tiny{}};
\node [rectangle,inner sep=0.5em,rounded corners=2pt,very thick,dotted,draw=ugreen!80] [fit = (sma1) (sma3) (pma1) (pma2)] (box2) {};
% small batch
\node [anchor=west,snode,minimum width=2em] (sma4) at ([xshift=4em,yshift=0em]sma1.east) {\tiny{}};
\node [anchor=north west,snode,minimum width=3em] (sma5) at ([yshift=-0.3em]sma4.south west) {\tiny{}};
\node [anchor=north west,snode,minimum width=3em] (sma6) at ([yshift=-0.3em]sma5.south west) {\tiny{}};
\node [anchor=west,pnode,minimum width=0.7em] (pma4) at ([xshift=0.3em]sma4.east) {\tiny{}};
\node [rectangle,inner sep=0.5em,rounded corners=2pt,very thick,dotted,draw=ugreen!80] [fit = (sma4) (sma6) (pma4)] (box3) {};
\draw [->,very thick] (box0.east) -- (model.west);
\draw [->,thick] (model.east) .. controls +(east:0.5) and +(west:0.5) .. ([xshift=-1em]box1.west);
\draw [->,thick] (model.east) .. controls +(east:0.5) and +(west:0.5) .. ([xshift=-1em]box2.west);
\draw [->,very thick] (box2.east) -- (box3.west);
%%%%%
\node [] (t10) at ([yshift=1.5em]box1.north) {t1};
\node [] (t11) at ([yshift=1.5em]box2.north) {t1};
\node [] (t2) at ([yshift=1.5em]box3.north) {t2};
\draw [very thick,decorate,decoration={brace}] ([xshift=0em,yshift=0.3em]box1.north west) to node [midway,name=final] {} ([xshift=0em,yshift=0.3em]box1.north east);
\draw [very thick,decorate,decoration={brace}] ([xshift=0em,yshift=0.3em]box2.north west) to node [midway,name=final] {} ([xshift=0em,yshift=0.3em]box2.north east);
\draw [very thick,decorate,decoration={brace}] ([xshift=0em,yshift=0.3em]box3.north west) to node [midway,name=final] {} ([xshift=0em,yshift=0.3em]box3.north east);
\node [] (m1) at ([xshift=1.5em]box1.east) {m1};
\node [] (m2) at ([xshift=1.5em]box3.east) {m2};
\draw [very thick,decorate,decoration={brace}] ([xshift=3pt]box1.north east) to node [midway,name=final] {} ([xshift=3pt]box1.south east);
\draw [very thick,decorate,decoration={brace}] ([xshift=3pt]box3.north east) to node [midway,name=final] {} ([xshift=3pt]box3.south east);
\node [rectangle,inner sep=0.5em,rounded corners=2pt,draw,fill=red!5,font=\scriptsize] at ([yshift=-2em,xshift=10em]sbi1.east) {
\begin{tabular}{l}
m: 显存 \\
t: 时间 \\
$\textrm{m}_1>\textrm{m}_2$ \\
$\textrm{t}_1>\textrm{t}_2$
\end{tabular}
};
\end{tikzpicture}
\ No newline at end of file
\begin{tikzpicture}
\tikzstyle{node} =[font=\scriptsize]
\tikzstyle{sentence} =[font=\scriptsize,fill=blue!5!white]
\node[sentence] (node1) at (0,0) {[`low', `lower', `newest', `widest']};
\node[sentence,anchor = north] (node2) at ([yshift = -1em]node1.south) {[`l o w $<$e$>$':5, `l o w e r $<$e$>$':2, `n e w e s t $<$e$>$':6, `w i d e s t $<$e$>$':3]};
\node[sentence,anchor = north] (node3) at ([yshift = -1.5em]node2.south) {[`l o w $<$e$>$':5, `l o w e r $<$e$>$':2, `n e w {\red es} t $<$e$>$':6, `w i d {\red es} t $<$e$>$':3]};
\node[sentence,anchor = north] (node4) at ([yshift = -1em]node3.south) {[`l o w $<$e$>$':5, `l o w e r $<$e$>$':2, `n e w {\red est} $<$e$>$':6, `w i d {\red est} $<$e$>$':3]};
\node[sentence,anchor = north] (node5) at ([yshift = -1em]node4.south) {[`l o w $<$e$>$':5, `l o w e r $<$e$>$':2, `n e w {\red est$<$e$>$}':6, `w i d {\red est$<$e$>$}':3]};
\node[sentence,anchor = north] (node6) at ([yshift = -1em]node5.south) {$\cdots$};
\node[node,anchor = north] (node7) at ([yshift = -1.6em]node6.south) {直到达到预设的子词词表大小或下一个最高频的字节对出现频率为1。};
\draw[->,line width=.03cm] ([yshift=0em]node1.south) -- ([yshift=0em]node2.north);
\draw[->,line width=.03cm] ([yshift=0em]node3.south) -- ([yshift=0em]node4.north);
\draw[->,line width=.03cm] ([yshift=0em]node4.south) -- ([yshift=0em]node5.north);
\draw[->,line width=.03cm] ([yshift=0em]node5.south) -- ([yshift=0em]node6.north);
\node[node,anchor = west] (node8) at ([xshift = 2em,yshift = 2em]node7.east) {对于词表外的词lowest};
\node[node,anchor = north west] (node9) at ([yshift = 0.3em]node8.south west) {可以被分割为low est};
\node[node,font=\scriptsize,anchor = north,fill=ugreen!5,drop shadow] (dict) at ([xshift = 8em,yshift = -5em]node6.south){\begin{tabular}{llllll}
\multirow{3}{*}{子词词表:} & `es' & `est' & `est$<$e$>$' & `lo' & `low' \\
& `ne' & `new'&`newest$<$e$>$' & `low$<$e$>$'& `wi'\\
& `wid' & `widest$<$e$>$' & `lowe' & `lower'& `lower$<$e$>$'
\end{tabular}};
\node[node,anchor=west] (line1) at ([xshift = 8em]node1.south east) {按字符拆分单词,并添加};
\node[node,anchor=north west] (line2) at ([yshift=0.3em]line1.south west) {终结符$<$e$>$,统计词频。};
\node[node,anchor=north west] (line3) at ([yshift=-4em]line2.south west) {统计每一个连续字节对};
\node[node,anchor=north west] (line4) at ([yshift=0.3em]line3.south west) {的出现频率,选择最高};
\node[node,anchor=north west] (line5) at ([yshift=0.3em]line4.south west) {频者合并成新的子词};
\begin{pgfonlayer}{background}
%\node [rectangle,inner sep=0.2em,rounded corners=1pt,fill=red!10,drop shadow,draw=red] [fit = (line1) (line2) (line3) (line4)] (box1) {};
\node [rectangle,inner sep=0.2em,rounded corners=1pt,very thick,dotted,draw=purple] [fit = (node1) (node2)] (box1) {};
\node [rectangle,inner sep=0.2em,rounded corners=1pt,very thick,dotted,draw=teal] [fit = (node3) (node4) (node5) (node6)] (box2) {};
\node [rectangle,inner sep=0.2em,rounded corners=1pt,fill=purple!5,drop shadow] [fit = (line1) (line2)] (box3) {};
\node [rectangle,inner sep=0.2em,rounded corners=1pt,fill=ugreen!5,drop shadow] [fit = (line3) (line4) (line5)] (box4) {};
\node [rectangle,inner sep=0.2em,rounded corners=1pt,fill=purple!5,drop shadow] [fit = (node7)] (box5) {};
\node [rectangle,inner sep=0.2em,rounded corners=1pt,fill=blue!5,drop shadow] [fit = (node8) (node9)] (box6) {};
\end{pgfonlayer}
\draw[->,line width=.03cm] ([yshift=0em]box2.south) -- ([yshift=0.2em]node7.north);
\draw[->,line width=.03cm] ([yshift=0em]box1.south) -- ([yshift=0em]box2.north);
\draw [->,dotted,very thick,purple] (box3.west) -- ([xshift=-1.5em]box3.west);
\draw [->,dotted,very thick,teal] (box4.west) -- ([xshift=-1.7em]box4.west);
\draw [->,dotted,very thick] ([xshift=6em]dict.north) .. controls +(north:1) and +(south:1) .. (box6.south);
\end{tikzpicture}
\ No newline at end of file
%\definecolor{dblue}{cmyk}{0.99998,1,0,0 }
\definecolor{dblue}{cmyk}{100,0,90,0 }
\begin{tikzpicture}[decoration=brace]
\begin{scope}
\setlength{\wseg}{1.5cm}
\setlength{\hseg}{0.6cm}
\setlength{\wnode}{2cm}
\setlength{\hnode}{1.2cm}
\tikzstyle{layernode} = [rectangle,draw,thick,densely dotted,inner sep=3pt,rounded corners,minimum width=2.1\wnode,minimum height=2.7\hnode]
\tikzstyle{attnnode} = [rectangle,draw,inner sep=3pt,rounded corners, minimum width=2\wnode,minimum height=2.2\hnode]
\tikzstyle{thinnode} = [rectangle,inner sep=1pt,rounded corners=1pt,minimum size=0.3\hnode,font=\scriptsize]
\tikzstyle{fatnode} = [rectangle,inner sep=1pt,rounded corners=1pt,minimum height=0.3\hnode,minimum width=\wnode,font=\small]
% 0.3\wseg here can be used to determine the distance between two adjacent blocks
\coordinate (layer00) at (0,0);
\foreach \i / \j in {1/0,2/1,3/2,4/3,5/4}
\coordinate (layer0\i) at ([xshift=2.05\wnode+0.3\wseg]layer0\j);
\node[layernode,anchor=north] (layer11) at ([yshift=-\hseg]layer01.south) {};
\node[attnnode,anchor=south] (attn11) at ([yshift=0.1\hnode]layer11.south) {};
\node[anchor=north west,inner sep=4pt,font=\small] () at (attn11.north west) {Attention};
\node[anchor=south,inner sep=0pt] (out11) at ([yshift=0.3\hseg]attn11.north) {$\cdots$};
\node[thinnode,anchor=south west,thick,draw=dblue,text=black] (q11) at ([xshift=0.1\wseg,yshift=0.2\hseg]attn11.south west) {$Q^n$};
\node[thinnode,anchor=south,thick,draw=orange,text=black] (k11) at ([yshift=0.2\hseg]attn11.south) {$K^n$};
\node[thinnode,anchor=south east,thick,draw=purple,text=black] (v11) at ([xshift=-0.1\wseg,yshift=0.2\hseg]attn11.south east) {$V^n$};
\node[fatnode,anchor=south,thick,draw] (s11) at ([xshift=0.5\wseg,yshift=0.8\hseg]q11.north east) {$S^n\!=\!S(Q^n\!\cdot\!K^n)$};
\node[fatnode,anchor=south,thick,draw] (a11) at ([xshift=0.45\wseg,yshift=1.3\hseg+0.6\hnode]k11.north east) {$A^n\!=\!S^n\!\cdot\!V$};
\begin{scope}[fill=black!100]
\draw[-latex',thick,draw=black!100] (q11.north) .. controls +(north:0.5\hseg) and +(south:0.8\hseg) .. (s11.south);
\draw[-latex',thick,draw=black!100] (k11.north) .. controls +(north:0.5\hseg) and +(south:0.8\hseg) .. (s11.south);
\end{scope}
\begin{scope}[fill=black!100]
\draw[-latex',thick,draw=black!100] (s11.north) .. controls +(north:0.7\hseg) and +(south:0.8\hseg) ..(a11.south);
\draw[-latex',thick,draw=black!100] (v11.north) .. controls +(north:2.7\hseg) and +(south:0.9\hseg) .. (a11.south);
\end{scope}
\draw[-latex',thick] (a11.north).. controls +(north:0.3\hseg) and +(south:0.7\hseg) ..(out11.south);
\node[layernode,anchor=north] (layer12) at ([yshift=-\hseg]layer02.south) {};
\node[attnnode,anchor=south] (attn12) at ([yshift=0.1\hnode]layer12.south) {};
\node[anchor=north west,inner sep=4pt,font=\small] () at (attn12.north west) {Attention};
\node[anchor=south,inner sep=0pt] (out12) at ([yshift=0.3\hseg]attn12.north) {$\cdots$};
\node[thinnode,anchor=south west,thick,draw=dblue!40,text=black!40] (q12) at ([xshift=0.1\wseg,yshift=0.2\hseg]attn12.south west) {$Q^n$};
\node[thinnode,anchor=south,thick,draw=orange!40,text=black!40] (k12) at ([yshift=0.2\hseg]attn12.south) {$K^n$};
\node[thinnode,anchor=south east,thick,draw=purple,text=black] (v12) at ([xshift=-0.1\wseg,yshift=0.2\hseg]attn12.south east) {$V^n$};
\node[fatnode,anchor=south,thick,densely dashed,draw] (s12) at ([xshift=0.5\wseg,yshift=0.8\hseg]q12.north east) {$S^n\!=\!S^m$};
\node[fatnode,anchor=south,thick,draw] (a12) at ([xshift=0.45\wseg,yshift=1.3\hseg+0.6\hnode]k12.north east) {$A^n\!=\!S^n\!\cdot\!V$};
\begin{scope}[fill=black!40]
\draw[-latex',thick,draw=black!40] (q12.north) .. controls +(north:0.5\hseg) and +(south:0.8\hseg) .. (s12.south);
\draw[-latex',thick,draw=black!40] (k12.north) .. controls +(north:0.5\hseg) and +(south:0.8\hseg) .. (s12.south);
\end{scope}
\begin{scope}[fill=black!100]
\draw[-latex',thick,draw=black!100] (s12.north).. controls +(north:0.7\hseg) and +(south:0.8\hseg) .. (a12.south);
\draw[-latex',thick,draw=black!100] (v12.north).. controls +(north:2.7\hseg) and +(south:0.9\hseg) .. (a12.south);
\end{scope}
\draw[-latex',thick] (a12.north).. controls +(north:0.3\hseg) and +(south:0.7\hseg) ..(out12.south);
\node[layernode,anchor=north] (layer13) at ([yshift=-\hseg]layer03.south) {};
\node[attnnode,anchor=south] (attn13) at ([yshift=0.1\hnode]layer13.south) {};
\node[anchor=north west,inner sep=4pt,font=\small] () at (attn13.north west) {Attention};
\node[anchor=south,inner sep=0pt] (out13) at ([yshift=0.3\hseg]attn13.north) {$\cdots$};
\node[thinnode,anchor=south west,thick,draw=dblue!40,text=black!40] (q13) at ([xshift=0.1\wseg,yshift=0.2\hseg]attn13.south west) {$Q^n$};
\node[thinnode,anchor=south,thick,draw=orange!40,text=black!40] (k13) at ([yshift=0.2\hseg]attn13.south) {$K^n$};
\node[thinnode,anchor=south east,thick,draw=purple!40,text=black!40] (v13) at ([xshift=-0.1\wseg,yshift=0.2\hseg]attn13.south east) {$V^n$};
\node[fatnode,anchor=south,thick,draw=black!40,text=black!40] (s13) at ([xshift=0.5\wseg,yshift=0.8\hseg]q13.north east) {$S^n$};
\node[fatnode,anchor=south,thick,densely dashed,draw] (a13) at ([xshift=0.45\wseg,yshift=1.3\hseg+0.6\hnode]k13.north east) {$A^n\!=\!A^m$};
\begin{scope}[fill=black!40]
\draw[-latex',thick,draw=black!40] (q13.north) .. controls +(north:0.5\hseg) and +(south:0.8\hseg) .. (s13.south);
\draw[-latex',thick,draw=black!40] (k13.north) .. controls +(north:0.5\hseg) and +(south:0.8\hseg) .. (s13.south);
\end{scope}
\begin{scope}[fill=black!40]
\draw[-latex',thick,draw=black!40] (s13.north) .. controls +(north:0.7\hseg) and +(south:0.8\hseg) .. (a13.south);
\draw[-latex',thick,draw=black!40] (v13.north) .. controls +(north:2.7\hseg) and +(south:0.9\hseg) .. (a13.south);
\end{scope}
\draw[-latex',thick] (a13.north).. controls +(north:0.3\hseg) and +(south:0.7\hseg) ..(out13.south);
\foreach \i / \j / \k / \q / \s / \t / \v in
{2/1/1/100/100/100/100, 2/2/1/100/100/100/100, 2/3/1/100/100/100/100}
{
\node[layernode,anchor=north] (layer\i\j) at ([yshift=-0.8\hseg]layer\k\j.south) {};
\node[attnnode,anchor=south] (attn\i\j) at ([yshift=0.1\hnode]layer\i\j.south) {};
\node[anchor=north west,inner sep=4pt,font=\small] () at (attn\i\j.north west) {Attention};
\node[anchor=south,inner sep=0pt] (out\i\j) at ([yshift=0.3\hseg]attn\i\j.north) {$\cdots$};
\node[thinnode,anchor=south west,thick,draw=dblue!\q,text=black] (q\i\j) at ([xshift=0.1\wseg,yshift=0.2\hseg]attn\i\j.south west) {$Q^m$};
\node[thinnode,anchor=south,thick,draw=orange!\q,text=black] (k\i\j) at ([yshift=0.2\hseg]attn\i\j.south) {$K^m$};
\node[thinnode,anchor=south east,thick,draw=purple!\s,text=black] (v\i\j) at ([xshift=-0.1\wseg,yshift=0.2\hseg]attn\i\j.south east) {$V^m$};
\node[fatnode,anchor=south,thick,draw=black!\s] (s\i\j) at ([xshift=0.45\wseg,yshift=0.8\hseg]q\i\j.north east) {$S^m\!=\!S(Q^m\!\cdot\!K^m)$};
\node[fatnode,anchor=south,thick,draw=black!80] (a\i\j) at ([xshift=0.45\wseg,yshift=1.3\hseg+0.6\hnode]k\i\j.north east) {$A^m\!=\!S^m\!\cdot\!V$};
\begin{scope}[fill=black!\q]
\draw[-latex',thick,draw=black!\t] (q\i\j.north) .. controls +(north:0.5\hseg) and +(south:0.8\hseg) .. (s\i\j.south);
\draw[-latex',thick,draw=black!\t] (k\i\j.north) .. controls +(north:0.5\hseg) and +(south:0.8\hseg) .. (s\i\j.south);
\end{scope}
\begin{scope}[fill=black!\s]
\draw[-latex',thick,draw=black!\v] (s\i\j.north).. controls +(north:0.7\hseg) and +(south:0.8\hseg) ..(a\i\j.south);
\draw[-latex',thick,draw=black!\v] (v\i\j.north).. controls +(north:2.7\hseg) and +(south:0.9\hseg) ..(a\i\j.south);
\end{scope}
\draw[-latex',thick] (a\i\j.north).. controls +(north:0.3\hseg) and +(south:0.7\hseg) ..(out\i\j.south);
}
\draw[-latex',densely dashed,very thick] (s22.west) to [out=120,in=-120] (s12.west);
\draw[-latex',densely dashed,very thick] (a23.east) to [out=60,in=-60] (a13.east);
\foreach \i in {1,2,3}
{
\node[anchor=north west,inner sep=3pt,font=\tiny] () at ([yshift=-0.2em]layer1\i.north west) {Layer $n\!=\!m\!+\!i$};
\node[anchor=north west,inner sep=3pt,font=\tiny] () at ([yshift=-0.2em]layer2\i.north west) {Layer $m$};
\node[anchor=center,inner sep=1pt] (dot1\i) at ([yshift=0.5\hseg]layer1\i.north) {$\cdots$};
\draw[->,thick] (out1\i.north) -- ([yshift=0.1em]dot1\i.south);
\node[anchor=center,inner sep=1pt] (dot2\i) at ([yshift=-0.4\hseg]layer1\i.south) {$\cdots$};
\draw[->,thick] ([yshift=-0.15em]dot2\i.north) -- ([yshift=-0.3em]attn1\i.south);
\draw[->,thick] (out2\i.north) -- ([yshift=0.1em]dot2\i.south);
\node[anchor=center,inner sep=1pt] (dot3\i) at ([yshift=-0.4\hseg]layer2\i.south) {$\cdots$};
\draw[->,thick] ([yshift=-0.15em]dot3\i.north) -- ([yshift=-0.3em]attn2\i.south);
}
\node[anchor=north,align=left,inner sep=1pt,font=\footnotesize] () at (dot31.south) {(a) Standard Transformer Attention};
\node[anchor=north,align=left,inner sep=1pt,font=\footnotesize] () at (dot32.south) {(b) \textsc{San} Self-Attention};
\node[anchor=north,align=left,inner sep=1pt,font=\footnotesize] () at (dot33.south) {(c) \textsc{San} Encoder-Decoder Attention};
\end{scope}
\end{tikzpicture}
\ No newline at end of file
\begin{tikzpicture}
\begin{scope}\large
\node [anchor=north] (n1) at (0, 0) {Lowcase\ \ :};
\node [anchor=west] (n2) at ([xshift=1.1em,yshift=1.7em]n1.east) {What\ \ is\ \ the\ \ WTO\ ?};
\node [anchor=west] (n3) at ([xshift=0em,yshift=-1.7em]n1.west) {Truecase\ \ :};
\node [anchor=west] (n4) at ([xshift=0em,yshift=-1.7em]n2.west) {\,what\ \ is\ \ the\ \ \ wto\ \ \ ?};
\node [anchor=west] (n5) at ([xshift=0em,yshift=-1.7em]n4.west) {\,what\ \ is\ \ the\ \ WTO\ ?};
\end{scope}
\end{tikzpicture}
\ No newline at end of file
\begin{tikzpicture}
\def\neuronsep{1.5}
\def\nodespace{1}
\def\picturespace{0.8}
\tikzstyle{neuronnode} = [minimum size=1.8em,circle,draw,very thick,ublue,inner sep=0pt, fill=white,align=center]
%standard
\node [neuronnode] (neuron_b) at (0,0) {\scriptsize{$b_{i}^{l}$}};
\node [neuronnode] (neuron_y3) at (0,-1*\neuronsep) {\scriptsize{$x_{3}^{l}$}};
\node [neuronnode] (neuron_y2) at (0,-2*\neuronsep) {\scriptsize{$x_{2}^{l}$}};
\node [neuronnode] (neuron_y1) at (0,-3*\neuronsep) {\scriptsize{$x_{1}^{l}$}};
\node [neuronnode] (neuron_z) at (1.2 * \nodespace,-1.5 * \neuronsep) {\scriptsize{$z_{i}^{l+1}$}};
\node [neuronnode] (neuron_y') at (2.4 * \nodespace,-1.5 * \neuronsep) {\scriptsize{$x_{i}^{l+1}$}};
\node [anchor=north,ublue] (standard) at ([yshift=-4em]neuron_z.south) {\scriptsize{standard}};
\node [ublue] (standard) at ([xshift=-1em]neuron_z.west) {\scriptsize{$\mathbf{w}_{i}^{l}$}};
\node [ublue] (standard) at ([xshift=0.6em,yshift=0.3em]neuron_z.east) {\scriptsize{$f$}};
\draw [->,line width=0.3mm] (neuron_b.east) -- (neuron_z.west);
\draw [->,line width=0.3mm] (neuron_y3.east) -- (neuron_z.west);
\draw [->,line width=0.3mm] (neuron_y2.east) -- (neuron_z.west);
\draw [->,line width=0.3mm] (neuron_y1.east) -- (neuron_z.west);
\draw [->,line width=0.3mm] (neuron_z.east) -- (neuron_y'.west);
%dropout
\node [neuronnode] (drop_neuron_b) at (5*\nodespace,0) {\scriptsize{$b_{i}^{l}$}};
\node [neuronnode] (drop_neuron_y3') at (5*\nodespace,-1*\neuronsep) {\scriptsize{$\tilde{x}_{3}^{l}$}};
\node [neuronnode] (drop_neuron_y2') at (5*\nodespace,-2*\neuronsep) {\scriptsize{$\tilde{x}_{2}^{l}$}};
\node [neuronnode] (drop_neuron_y1') at (5*\nodespace,-3*\neuronsep) {\scriptsize{$\tilde{x}_{1}^{l}$}};
\node [neuronnode] (drop_neuron_z) at (6.2 * \nodespace,-1.5 * \neuronsep) {\scriptsize{$z_{i}^{l+1}$}};
\node [neuronnode] (drop_neuron_y') at (7.4 * \nodespace,-1.5 * \neuronsep) {\scriptsize{$x_{i}^{l+1}$}};
\node [neuronnode] (drop_neuron_y3) at (3.8*\nodespace,-1*\neuronsep) {\scriptsize{$x_{3}^{l}$}};
\node [neuronnode] (drop_neuron_y2) at (3.8*\nodespace,-2*\neuronsep) {\scriptsize{$x_{2}^{l}$}};
\node [neuronnode] (drop_neuron_y1) at (3.8*\nodespace,-3*\neuronsep) {\scriptsize{$x_{1}^{l}$}};
\node [neuronnode] (drop_neuron_r3) at (4.4*\nodespace,-0.5*\neuronsep) {\scriptsize{$r_{3}^{l}$}};
\node [neuronnode] (drop_neuron_r2) at (4.4*\nodespace,-1.5*\neuronsep) {\scriptsize{$r_{2}^{l}$}};
\node [neuronnode] (drop_neuron_r1) at (4.4*\nodespace,-2.5*\neuronsep) {\scriptsize{$r_{1}^{l}$}};
\node [anchor=north,ublue] (standard) at ([yshift=-4em]drop_neuron_z.south) {\scriptsize{dropout}};
\node [ublue] (standard) at ([xshift=-1em]drop_neuron_z.west) {\scriptsize{$\mathbf{w}_{i}^{l}$}};
\node [ublue] (standard) at ([xshift=0.6em,yshift=0.3em]drop_neuron_z.east) {\scriptsize{$f$}};
%structure
\draw [->,line width=0.3mm] (drop_neuron_b.east) -- (drop_neuron_z.west);
\draw [->,line width=0.3mm] (drop_neuron_y3'.east) -- (drop_neuron_z.west);
\draw [->,line width=0.3mm] (drop_neuron_y2'.east) -- (drop_neuron_z.west);
\draw [->,line width=0.3mm] (drop_neuron_y1'.east) -- (drop_neuron_z.west);
\draw [->,line width=0.3mm] (drop_neuron_z.east) -- (drop_neuron_y'.west);
%r
\draw [->,line width=0.3mm] (drop_neuron_y3.east) -- (drop_neuron_y3'.west);
\draw [->,line width=0.3mm] (drop_neuron_y2.east) -- (drop_neuron_y2'.west);
\draw [->,line width=0.3mm] (drop_neuron_y1.east) -- (drop_neuron_y1'.west);
\draw [-,line width=0.3mm] (drop_neuron_r3.south) -- ([yshift=-1em]drop_neuron_r3.south);
\draw [-,line width=0.3mm] (drop_neuron_r2.south) -- ([yshift=-1em]drop_neuron_r2.south);
\draw [-,line width=0.3mm] (drop_neuron_r1.south) -- ([yshift=-1em]drop_neuron_r1.south);
%equ
\node [anchor=west,inner sep = 2pt] (line1) at (9*\nodespace,0) {未应用dropout:};
\node [anchor=north west,inner sep = 2pt] (line2) at (line1.south west) {$z_{i}^{l+1}=\mathbf{w}_{i}^{l} \mathbf{x}+b_{i}^{l}$};
\node [anchor=north west,inner sep = 2pt] (line3) at (line2.south west) {$x_{i}^{l+1}=f\left(x_{i}^{l}\right)$};
\node [anchor=north west,inner sep = 2pt] (line4) at (line3.south west) {应用dropout:};
\node [anchor=north west,inner sep = 2pt] (line5) at (line4.south west) {$r_{j}^{l} \sim$ Bernoulli $(1-p)$};
\node [anchor=north west,inner sep = 2pt] (line6) at (line5.south west) {$\tilde{\mathbf{x}}=\mathbf{r} * \mathbf{x}$};
\node [anchor=north west,inner sep = 2pt] (line7) at (line6.south west) {$z_{i}^{l+1}=\mathbf{w}_{i}^{l} \widetilde{\mathbf{x}}+b_{i}^{l}$};
\node [anchor=north west,inner sep = 2pt] (line8) at (line7.south west) {$x_{i}^{l+1}=f\left(z_{i}^{l}\right)$};
\end{tikzpicture}
\ No newline at end of file
\begin{tikzpicture}
\begin{scope}
{\small
\node [anchor=north,rectangle,draw, inner sep=0mm,minimum height=3em,minimum width=6em,rounded corners=5pt,thick,fill=blue!10!white] (n1) at (0, 0) {数据处理};
\node [anchor=west,rectangle,draw, inner sep=0mm,minimum height=3em,minimum width=6em,rounded corners=5pt,thick,fill=yellow!10!white] (n2) at ([xshift=3em,yshift=0em]n1.east) {训练};
\node [anchor=south,rectangle,draw, inner sep=0mm,minimum height=3em,minimum width=6em,rounded corners=5pt,thick,fill=red!10!white] (n3) at ([xshift=0em,yshift=2em]n2.north) {架构设计};
\node [anchor=north,rectangle,draw, inner sep=0mm,minimum height=3em,minimum width=6em,rounded corners=5pt,thick,fill=green!10!white] (n4) at ([xshift=0em,yshift=-2em]n2.south) {推断};
}
\draw [-,very thick] ([xshift=0em,yshift=0em]n1.south) -- ([xshift=0em,yshift=-3.25em]n1.south);
\draw [->,very thick] ([xshift=-5.5em,yshift=0em]n4.west) -- ([xshift=0em,yshift=0em]n4.west);
\draw [->,very thick] ([xshift=0em,yshift=0em]n1.east) -- ([xshift=0em,yshift=0em]n2.west);
\draw [->,very thick] ([xshift=0em,yshift=0em]n3.south) -- ([xshift=0em,yshift=0em]n2.north);
\draw [->,very thick] ([xshift=0em,yshift=0em]n2.south) -- ([xshift=0em,yshift=0em]n4.north);
{\footnotesize
\node [anchor=west] (n11) at ([xshift=-13em,yshift=2em]n1.west) {对训练和测试数据进行};
\node [anchor=west] (n12) at ([xshift=0em,yshift=-1.5em]n11.west) {处理,包括:数据清洗、};
\node [anchor=west] (n13) at ([xshift=0em,yshift=-1.5em]n12.west) {翻译单元切分、译文后};
\node [anchor=west] (n14) at ([xshift=0em,yshift=-1.5em]n13.west) {处理等};
\node [anchor=west] (n31) at ([xshift=2em,yshift=0em]n3.north east) {神经网络模型设计,包括};
\node [anchor=west] (n32) at ([xshift=0em,yshift=-1.5em]n31.west) {编码器、解码器、注意力};
\node [anchor=west] (n33) at ([xshift=0em,yshift=-1.5em]n32.west) {机制的设计};
\node [anchor=west] (n21) at ([xshift=0em,yshift=-2em]n33.south west) {在训练数据上优化模型参};
\node [anchor=west] (n22) at ([xshift=0em,yshift=-1.5em]n21.west) {数,包括训练的策略、损};
\node [anchor=west] (n23) at ([xshift=0em,yshift=-1.5em]n22.west) {失函数设计、超参数的调};
\node [anchor=west] (n24) at ([xshift=0em,yshift=-1.5em]n23.west) {};
\node [anchor=west] (n41) at ([xshift=0em,yshift=-2em]n24.south west) {使用训练好的模型在新的};
\node [anchor=west] (n42) at ([xshift=0em,yshift=-1.5em]n41.west) {数据上进行翻译,包括解};
\node [anchor=west] (n43) at ([xshift=0em,yshift=-1.5em]n42.west) {码策略的选择、压缩、优};
\node [anchor=west] (n44) at ([xshift=0em,yshift=-1.5em]n43.west) {化等};
}
\begin{pgfonlayer}{background}
\node [rectangle,inner sep=0.2em,rounded corners=1pt,thick,draw,fill=red!5!white] [fit = (n31) (n32) (n33)] (box1) {};
\node [rectangle,inner sep=0.2em,rounded corners=1pt,thick,draw,fill=yellow!5!white] [fit = (n21) (n22) (n23) (n24) ] (box2) {};
\node [rectangle,inner sep=0.2em,rounded corners=1pt,thick,draw,fill=green!5!white] [fit = (n41) (n42) (n43) (n44) ] (box3) {};
\node [rectangle,inner sep=0.2em,rounded corners=1pt,thick,draw,fill=blue!5!white] [fit = (n11) (n12) (n13) (n14) ] (box4) {};
\end{pgfonlayer}
\draw [->,dotted,very thick,red] (n3.east) -- ([xshift=1.4em]n3.east);
\draw [->,dotted,very thick] (n2.east) -- ([xshift=1.4em]n2.east);
\draw [->,dotted,very thick,ugreen] (n4.east) -- ([xshift=1.4em]n4.east);
\draw [->,dotted,very thick,blue] (n1.west) -- ([xshift=-1.4em]n1.west);
\end{scope}
\end{tikzpicture}
\ No newline at end of file
\begin{tikzpicture}
\node [rectangle,inner sep=2pt,font=\scriptsize] (center) at (0,0) {
\begin{tabular}{c}
Feedback signals during the loop:\\
$R(x,f,g)=s(x,x')$:BLEU of $x'$ given $x$ \\
$L(y)$ and $L(x')$:Language model of $y$ and $x'$
\end{tabular}
};
\node [rectangle,inner sep=2pt,font=\scriptsize] (top) at ([yshift=3em,xshift=0em]center.north) {
\begin{tabular}{c}
En$->$Ch translation \\
Primal Task $f:x\rightarrow y$
\end{tabular}
};
\node [rectangle,inner sep=2pt,font=\scriptsize] (left) at ([yshift=0em,xshift=-3em]center.west) {
\begin{tabular}{c}
English sentence $x$ \\
New English sentence \\
$x' = g(y)$
\end{tabular}
};
\node [rectangle,inner sep=2pt,font=\scriptsize] (right) at ([yshift=0em,xshift=3em]center.east) {
\begin{tabular}{c}
Chinese sentence \\
$y= f(y) $
\end{tabular}
};
\node [rectangle,inner sep=2pt,font=\scriptsize] (down) at ([yshift=-3em,xshift=0em]center.south) {
Dual Task $g:y\rightarrow x$
};
\node [rectangle,inner sep=2pt,draw,thick,fill=green!20] (agent1) at ([xshift=-1em]left.west) {Agent};
\node [rectangle,inner sep=2pt,draw,thick,fill=blue!20] (agent2) at ([xshift=1em]right.east) {Agent};
\draw [-,line width=0.8pt] (left.north) .. controls +(north:0.8) and +(west:0.8) .. (top.west);
\draw [->,line width=0.8pt] (top.east) .. controls +(east:0.8) and +(north:0.8) .. (right.north);
\draw [->,line width=0.8pt] (down.west) .. controls +(west:0.8) and +(south:0.8) .. (left.south);
\draw [-,line width=0.8pt] (right.south) .. controls +(south:0.8) and +(east:0.8) .. (down.east) ;
\end{tikzpicture}
\ No newline at end of file
\begin{tikzpicture}
\node [rectangle,inner sep=2pt,font=\scriptsize] (center) at (0,0) {};
\node [rectangle,inner sep=2pt,font=\scriptsize] (top) at ([yshift=3em,xshift=0em]center.north) {
\begin{tabular}{c}
翻译模型 \\
$\textrm{P}(\mathbf t|\mathbf s)$
\end{tabular}
};
\node [rectangle,inner sep=2pt,font=\scriptsize] (left) at ([yshift=0em,xshift=-4em]center.west) {
\begin{tabular}{c}
今天天气真好。
\end{tabular}
};
\node [rectangle,inner sep=2pt,font=\scriptsize] (right) at ([yshift=0em,xshift=4em]center.east) {
\begin{tabular}{c}
The weather is \\so good today.
\end{tabular}
};
\node [rectangle,inner sep=2pt,font=\scriptsize] (down) at ([yshift=-3em,xshift=0em]center.south) {
\begin{tabular}{c}
翻译模型 \\
$\textrm{P}(\mathbf s|\mathbf t)$
\end{tabular}
};
\draw [->,line width=0.8pt] (left.north) .. controls +(north:0.5) and +(west:0.5) .. (top.west);
\draw [->,line width=0.8pt] (top.east) .. controls +(east:0.5) and +(north:0.5) .. (right.north);
\draw [->,line width=0.8pt] (down.west) .. controls +(west:0.5) and +(south:0.5) .. (left.south);
\draw [->,line width=0.8pt] (right.south) .. controls +(south:0.5) and +(east:0.5) .. (down.east) ;
\end{tikzpicture}
\ No newline at end of file
\begin{tikzpicture}
\setlength{\base}{1.2em}
\node[minimum width=8em,minimum height=18em,inner sep=1pt,rounded corners=10pt,draw,thick,font=\scriptsize,fill=white!50,drop shadow,align=center] (word1) at (0,0){
\begin{tabular}{l l}
\begin{tikzpicture}
\tikzstyle{node} = [minimum width=0em,minimum height=1em,inner sep=2pt,font=\scriptsize,anchor = west,rounded corners=0pt,outer sep=0pt]
\node [node,fill=green!15] (a1) at (0,0) {天气 \ \ \ \ };
\node [node] (a2) at (0,-\base*1) {\ \ 喜欢 \ \ 下雨 \ \ \ \ };
\node [node,fill=green!15] (a3) at (0,-\base*2) {\textless{}p\textgreater \ \ 显示 \ \ 所选 \ \ \ \ \ \ \ \ \textless{}\textbackslash{}p\textgreater{} };
\node [node,fill=blue!15] (a41) at (0,-\base*3) {桃树 \ \ \ \ 杏树 \ \ \ \ 梨树 \ \ , \ \ \ \ 不让 \ \ \ \ , \ \ };
\node [node,fill=blue!15] (a42) at (0,-\base*4) {\ \ 不让 \ \ \ \ , \ \ \ \ 开满 \ \ \ \ \ \ };
\node [node] (a5) at (0,-\base*5.5) {机器 \ \ 翻译 \ \ \ \ 人们 \ \ \ \ 生活 \ \ 带来了 \ \ 便利 \ \ };
\node [node] (a6) at (0,-\base*7) {这件 \ \ 事情 \ \ \ \ 成功率 \ \ \ \ 50 \ \ $\%$ \ \ };
\node [node,fill=green!15] (a7) at (0,-\base*8) {翻译 \ \ \ \ \ \ 特别 \ \ 感兴趣 \ \ };
\node [node] (a8) at (0,-\base*9) {他说 \ \ : \ \ `` 这个 \ \ 深深 \ \ 有趣 \ \ \ \ 想法 \ \ \ \ };
\node [node] (a81) at (0,-\base*10) {\ \ 心里 \ \ 。'' };
\node [node] (a9) at (0,-\base*11) {\ \ 喜欢 \ \ 下雨 \ \ \ \ };
\node [node,fill=yellow!15] (a10) at (0,-\base*12) {\ \ 喜欢 \ \ 下雨 \ \ \ \ };
\node [node] (a11) at (0,-\base*13) {花下 \ \ 成千成百 \ \ \ \ 蜜蜂 \ \ 嗡嗡 \ \ \ \ 闹着 \ \ };
\end{tikzpicture}
&
\begin{tikzpicture}
\tikzstyle{node} = [minimum width=0em,minimum height=1em,inner sep=2pt,font=\scriptsize,anchor = west,rounded corners=0pt,outer sep=0pt]
\node [node,fill=green!15] (a1) at (0,0) {The weather today is good , but ... };
\node [node] (a2) at (0,-\base*1) {I like rainy days .};
\node [node,fill=green!15] (a3) at (0,-\base*2) {\textless{}p\textgreater to show the selected side . \textless{}\textbackslash{}p\textgreater{}};
\node [node,fill=blue!15] (a4) at (0,-\base*3.5) {Flowers bloom .};
\node [node] (a51) at (0,-\base*5) {Machine translation brings convenience to people's };
\node [node] (a52) at (0,-\base*6) {lives. };
\node [node] (a6) at (0,-\base*7) {The success rate for this matter is $\%$ . };
\node [node,fill=green!15] (a7) at (0,-\base*8) {I'm interested in translation . };
\node [node] (a8) at (0,-\base*9) {He said: `` This interesting idea is deeply };
\node [node] (a81) at (0,-\base*10) {imprinted in my heart . '' };
\node [node] (a9) at (0,-\base*11) {I like rainy days .};
\node [node,fill=yellow!15] (a10) at (0,-\base*12) {I like rainy days .};
\node [node] (a11) at (0,-\base*13) {Hundreds of bees hummed under the flowers . };
\end{tikzpicture}
\end{tabular}
};
\node[minimum width=8em,minimum height=10.5em,inner sep=2pt,rounded corners=10pt,draw,thick,font=\scriptsize,fill=white!50,drop shadow,align=center] (word2) at (0,-6.6){
\begin{tabular}{l l}
\begin{tikzpicture}
\tikzstyle{node} = [minimum width=0em,minimum height=1em,inner sep=2pt,font=\scriptsize,anchor = west,rounded corners=0pt,outer sep=0pt]
\node [node] (a1) at (0,0) {\ \ 喜欢 \ \ 下雨 \ \ \ \ };
\node [node] (a2) at (0,-\base*1.5) {机器 \ \ 翻译 \ \ \ \ 人们 \ \ \ \ 生活 \ \ 带来了 \ \ 便利 \ \ };
\node [node] (a3) at (0,-\base*3) {这件 \ \ 事情 \ \ \ \ 成功率 \ \ \ \ 50 \ \ $\%$ \ \ };
\node [node] (a4) at (0,-\base*4) {他说 \ \ : \ \ `` 这个 \ \ 深深 \ \ 有趣 \ \ \ \ 想法 \ \ \ \ };
\node [node] (a42) at (0,-\base*5) {\ \ 心里 \ \ 。'' };
\node [node] (a5) at (0,-\base*6) {\ \ 喜欢 \ \ 下雨 \ \ \ \ };
\node [node] (a6) at (0,-\base*7) {花下 \ \ 成千成百 \ \ \ \ 蜜蜂 \ \ 嗡嗡 \ \ \ \ 闹着 \ \ };
\end{tikzpicture}
&
\begin{tikzpicture}
\tikzstyle{node} = [minimum width=0em,minimum height=1em,inner sep=2pt,font=\scriptsize,anchor = west,rounded corners=0pt,outer sep=0pt]
\node [node] (a1) at (0,0) {I like rainy days .};
\node [node] (a2) at (0,-\base*1) {Machine translation brings convenience to people's };
\node [node] (a22) at (0,-\base*2) {lives. };
\node [node] (a3) at (0,-\base*3) {The success rate for this matter is $\%$ . };
\node [node] (a4) at (0,-\base*4) {He said: `` This interesting idea is deeply };
\node [node] (a42) at (0,-\base*5) {imprinted in my heart . '' };
\node [node] (a5) at (0,-\base*6) {I like rainy days .};
\node [node] (a6) at (0,-\base*7) {Hundreds of bees hummed under the flowers . };
\end{tikzpicture}
\end{tabular}
};
\draw[->,line width=.1cm,blue!40 ] ([yshift=-0.3\base]word1.south) -- ([yshift=0.3\base]word2.north);
\end{tikzpicture}
\ No newline at end of file
\begin{tikzpicture}[scale=1]
\tikzstyle{prob} = [rectangle,fill=blue!40,text=white,inner sep=0pt,font=\scriptsize];
\tikzstyle{word} = [inner sep=0pt,font=\small];
\begin{scope}[]
% Column 1
\node [prob,minimum size=0.1cm] (prob11) at (0,0) {};
\node [prob,minimum size=0.5cm,anchor=center] (prob21) at ([yshift=-0.5cm]prob11.center) {$.7$};
\node [prob,minimum size=0.1cm,anchor=center] (prob31) at ([yshift=-0.5cm]prob21.center) {};
\node [prob,minimum size=0.1cm,anchor=center] (prob41) at ([yshift=-0.5cm]prob31.center) {};
\node [prob,minimum size=0.3cm,anchor=center] (prob51) at ([yshift=-0.5cm]prob41.center) {$.2$};
\begin{pgfonlayer}{background}
\coordinate (bottomleft) at ([shift={(-0.25cm,-0.25cm)}]prob51.center);
\coordinate (topright) at ([shift={(0.25cm,0.25cm)}]prob11.center);
\node [draw,fit=(prob11) (prob21) (prob31) (prob41) (prob51) (topright) (bottomleft)] (prob1) {};
\end{pgfonlayer}
% Column 2
\node [prob,minimum size=0.1cm,anchor=center] (prob12) at ([xshift=1cm]prob11.center) {};
\node [prob,minimum size=0.1cm,anchor=center] (prob22) at ([yshift=-0.5cm]prob12.center) {};
\node [prob,minimum size=0.4cm,anchor=center] (prob32) at ([yshift=-0.5cm]prob22.center) {$.4$};
\node [prob,minimum size=0.3cm,anchor=center] (prob42) at ([yshift=-0.5cm]prob32.center) {$.3$};
\node [prob,minimum size=0.1cm,anchor=center] (prob52) at ([yshift=-0.5cm]prob42.center) {};
\begin{pgfonlayer}{background}
\coordinate (bottomleft) at ([shift={(-0.25cm,-0.25cm)}]prob52.center);
\coordinate (topright) at ([shift={(0.25cm,0.25cm)}]prob12.center);
\node [draw,fit=(prob12) (prob22) (prob32) (prob42) (prob52) (topright) (bottomleft)] (prob2) {};
\end{pgfonlayer}
% Column 3
\node [prob,minimum size=0.1cm,anchor=center] (prob13) at ([xshift=1cm]prob12.center) {};
\node [prob,minimum size=0.1cm,anchor=center] (prob23) at ([yshift=-0.5cm]prob13.center) {};
\node [prob,minimum size=0.1cm,anchor=center] (prob33) at ([yshift=-0.5cm]prob23.center) {};
\node [prob,minimum size=0.4cm,anchor=center] (prob43) at ([yshift=-0.5cm]prob33.center) {$.6$};
\node [prob,minimum size=0.1cm,anchor=center] (prob53) at ([yshift=-0.5cm]prob43.center) {};
\begin{pgfonlayer}{background}
\coordinate (bottomleft) at ([shift={(-0.25cm,-0.25cm)}]prob53.center);
\coordinate (topright) at ([shift={(0.25cm,0.25cm)}]prob13.center);
\node [draw,fit=(prob13) (prob23) (prob33) (prob43) (prob53) (topright) (bottomleft)] (prob3) {};
\end{pgfonlayer}
% Column 4
\node [prob,minimum size=0.5cm,anchor=center] (prob14) at ([xshift=1cm]prob13.center) {$.8$};
\node [prob,minimum size=0.1cm,anchor=center] (prob24) at ([yshift=-0.5cm]prob14.center) {};
\node [prob,minimum size=0.1cm,anchor=center] (prob34) at ([yshift=-0.5cm]prob24.center) {};
\node [prob,minimum size=0.1cm,anchor=center] (prob44) at ([yshift=-0.5cm]prob34.center) {};
\node [prob,minimum size=0.1cm,anchor=center] (prob54) at ([yshift=-0.5cm]prob44.center) {};
\begin{pgfonlayer}{background}
\coordinate (bottomleft) at ([shift={(-0.25cm,-0.25cm)}]prob54.center);
\coordinate (topright) at ([shift={(0.25cm,0.25cm)}]prob14.center);
\node [draw,fit=(prob14) (prob24) (prob34) (prob44) (prob54) (topright) (bottomleft)] (prob4) {};
\end{pgfonlayer}
% Label
\draw [decorate,decoration={brace}] ([yshift=0.1cm]prob1.north west) to node [midway,above,font=\small] {学习目标(Teacher输出)} ([yshift=0.1cm]prob4.north east);
% Vocab
\node [word,anchor=center] () at ([xshift=-0.9cm]prob11.center) {EOS};
\node [word,anchor=center] () at ([xshift=-0.9cm]prob21.center) {I};
\node [word,anchor=center] () at ([xshift=-0.9cm]prob31.center) {am};
\node [word,anchor=center] () at ([xshift=-0.9cm]prob41.center) {fine};
\node [word,anchor=center] () at ([xshift=-0.9cm]prob51.center) {good};
\draw [decorate,decoration={brace,mirror}] ([xshift=-1cm]prob1.north west) to node [midway,left,font=\small,align=center] {\\} ([xshift=-1cm]prob1.south west);
% Model
\coordinate (bottomleft) at ([yshift=-1cm]prob1.south west);
\coordinate (topright) at ([yshift=-0.5cm]prob4.south east);
\node [draw,rounded corners=3pt,fill=green!20,inner sep=0pt,fit=(bottomleft) (topright)] (model) {};
\node [word] () at (model.center) {Student};
\foreach \i in {1,2,...,4}
\draw [-latex,thick] (prob\i.south) to ([yshift=-0.5cm]prob\i.south);
% Input
\node [word,anchor=south] (input1) at ([yshift=-1.8cm]prob1.south) {EOS};
\node [word,anchor=south] (input2) at ([yshift=-1.8cm]prob2.south) {I};
\node [word,anchor=south] (input3) at ([yshift=-1.8cm]prob3.south) {am};
\node [word,anchor=south] (input4) at ([yshift=-1.8cm]prob4.south) {fine};
\foreach \i in {1,2,...,4}
\draw [-latex,thick] ([yshift=0.3cm]input\i.south) to ([yshift=0.8cm]input\i.south);
\node [word,anchor=south] (ns) at ([xshift=-1cm]input1.south) {输入:};
\node [word,anchor=north] () at ([xshift=2.1cm,yshift=-0.5cm]ns.south) {(a)\ Word-level};
\end{scope}
\begin{scope}[xshift=2.5in]
% Column 1
\node [prob,minimum size=0.1cm] (prob11) at (0,0) {};
\node [prob,minimum size=0.5cm,anchor=center] (prob21) at ([yshift=-0.5cm]prob11.center) {$1.$};
\node [prob,minimum size=0.1cm,anchor=center] (prob31) at ([yshift=-0.5cm]prob21.center) {};
\node [prob,minimum size=0.1cm,anchor=center] (prob41) at ([yshift=-0.5cm]prob31.center) {};
\node [prob,minimum size=0.1cm,anchor=center] (prob51) at ([yshift=-0.5cm]prob41.center) {};
\begin{pgfonlayer}{background}
\coordinate (bottomleft) at ([shift={(-0.25cm,-0.25cm)}]prob51.center);
\coordinate (topright) at ([shift={(0.25cm,0.25cm)}]prob11.center);
\node [draw,fit=(prob11) (prob21) (prob31) (prob41) (prob51) (topright) (bottomleft)] (prob1) {};
\end{pgfonlayer}
% Column 2
\node [prob,minimum size=0.1cm,anchor=center] (prob12) at ([xshift=1cm]prob11.center) {};
\node [prob,minimum size=0.1cm,anchor=center] (prob22) at ([yshift=-0.5cm]prob12.center) {};
\node [prob,minimum size=0.5cm,anchor=center] (prob32) at ([yshift=-0.5cm]prob22.center) {$1.$};
\node [prob,minimum size=0.1cm,anchor=center] (prob42) at ([yshift=-0.5cm]prob32.center) {};
\node [prob,minimum size=0.1cm,anchor=center] (prob52) at ([yshift=-0.5cm]prob42.center) {};
\begin{pgfonlayer}{background}
\coordinate (bottomleft) at ([shift={(-0.25cm,-0.25cm)}]prob52.center);
\coordinate (topright) at ([shift={(0.25cm,0.25cm)}]prob12.center);
\node [draw,fit=(prob12) (prob22) (prob32) (prob42) (prob52) (topright) (bottomleft)] (prob2) {};
\end{pgfonlayer}
% Column 3
\node [prob,minimum size=0.1cm,anchor=center] (prob13) at ([xshift=1cm]prob12.center) {};
\node [prob,minimum size=0.1cm,anchor=center] (prob23) at ([yshift=-0.5cm]prob13.center) {};
\node [prob,minimum size=0.1cm,anchor=center] (prob33) at ([yshift=-0.5cm]prob23.center) {};
\node [prob,minimum size=0.1cm,anchor=center] (prob43) at ([yshift=-0.5cm]prob33.center) {};
\node [prob,minimum size=0.5cm,anchor=center] (prob53) at ([yshift=-0.5cm]prob43.center) {$1.$};
\begin{pgfonlayer}{background}
\coordinate (bottomleft) at ([shift={(-0.25cm,-0.25cm)}]prob53.center);
\coordinate (topright) at ([shift={(0.25cm,0.25cm)}]prob13.center);
\node [draw,fit=(prob13) (prob23) (prob33) (prob43) (prob53) (topright) (bottomleft)] (prob3) {};
\end{pgfonlayer}
% Column 4
\node [prob,minimum size=0.5cm,anchor=center] (prob14) at ([xshift=1cm]prob13.center) {$1.$};
\node [prob,minimum size=0.1cm,anchor=center] (prob24) at ([yshift=-0.5cm]prob14.center) {};
\node [prob,minimum size=0.1cm,anchor=center] (prob34) at ([yshift=-0.5cm]prob24.center) {};
\node [prob,minimum size=0.1cm,anchor=center] (prob44) at ([yshift=-0.5cm]prob34.center) {};
\node [prob,minimum size=0.1cm,anchor=center] (prob54) at ([yshift=-0.5cm]prob44.center) {};
\begin{pgfonlayer}{background}
\coordinate (bottomleft) at ([shift={(-0.25cm,-0.25cm)}]prob54.center);
\coordinate (topright) at ([shift={(0.25cm,0.25cm)}]prob14.center);
\node [draw,fit=(prob14) (prob24) (prob34) (prob44) (prob54) (topright) (bottomleft)] (prob4) {};
\end{pgfonlayer}
% Label
\draw [decorate,decoration={brace}] ([yshift=0.1cm]prob1.north west) to node [midway,above,font=\small] {学习目标(Teacher输出)} ([yshift=0.1cm]prob4.north east);
% Vocab
\node [word,anchor=center] () at ([xshift=-0.9cm]prob11.center) {EOS};
\node [word,anchor=center] () at ([xshift=-0.9cm]prob21.center) {I};
\node [word,anchor=center] () at ([xshift=-0.9cm]prob31.center) {am};
\node [word,anchor=center] () at ([xshift=-0.9cm]prob41.center) {fine};
\node [word,anchor=center] () at ([xshift=-0.9cm]prob51.center) {good};
\draw [decorate,decoration={brace,mirror}] ([xshift=-1cm]prob1.north west) to node [midway,left,font=\small,align=center] {\\} ([xshift=-1cm]prob1.south west);
% Model
\coordinate (bottomleft) at ([yshift=-1cm]prob1.south west);
\coordinate (topright) at ([yshift=-0.5cm]prob4.south east);
\node [draw,rounded corners=3pt,fill=green!20,inner sep=0pt,fit=(bottomleft) (topright)] (model) {};
\node [word] () at (model.center) {Student};
\foreach \i in {1,2,...,4}
\draw [-latex,thick] (prob\i.south) to ([yshift=-0.5cm]prob\i.south);
% Input
\node [word,anchor=south] (input1) at ([yshift=-1.8cm]prob1.south) {EOS};
\node [word,anchor=south] (input2) at ([yshift=-1.8cm]prob2.south) {I};
\node [word,anchor=south] (input3) at ([yshift=-1.8cm]prob3.south) {am};
\node [word,anchor=center] (input4) at ([xshift=1cm]input3.center) {good};
\foreach \i in {1,2,3}
\draw [-latex,thick] ([yshift=0.3cm]input\i.south) to ([yshift=0.8cm]input\i.south);
\draw [-latex,thick] ([yshift=0.36cm]input4.south) to ([yshift=0.86cm]input4.south);
\node [word,anchor=south] (ns) at ([xshift=-1cm]input1.south) {输入:};
\node [word,anchor=north] () at ([xshift=2.1cm,yshift=-0.5cm]ns.south) {(b)\ Sequence-level };
\end{scope}
\end{tikzpicture}
\centering
\hspace*{\fill}
\subfigure[假设选择]
{
\begin{tikzpicture}[scale=0.5]
\tikzstyle{system} = [rectangle,very thick,minimum width=1cm,font=\tiny];
\tikzstyle{output} = [rectangle,very thick,rounded corners=3pt,minimum width=1cm,align=center,font=\tiny];
\begin{scope}
\node [system,draw=orange,text=orange] (model3) at (0,0) {Model $3$};
\node [system,draw=ugreen,text=ugreen,anchor=south] (model2) at ([yshift=0.3cm]model3.north) {Model $2$};
\node [system,draw=red,text=red,anchor=south] (model1) at ([yshift=0.3cm]model2.north) {Model $1$};
\node [output,draw=orange,text=orange,anchor=west] (output3) at ([xshift=0.5cm]model3.east) {Output $3$};
\node [output,draw=ugreen,text=ugreen,anchor=west] (output2) at ([xshift=0.5cm]model2.east) {Output $2$};
\node [output,draw=red,text=red,anchor=west] (output1) at ([xshift=0.5cm]model1.east) {Output $1$};
\begin{pgfonlayer}{background}
\node [draw,thick,dashed,rounded corners=3pt,inner sep=2pt,fit=(output1) (output2) (output3)] (output) {};
\end{pgfonlayer}
\node [output,draw=ublue,text=ublue,minimum width=1cm,right=1cm of output] (final) {Final\\Output};
\draw [->,very thick] (model1) to (output1);
\draw [->,very thick] (model2) to (output2);
\draw [->,very thick] (model3) to (output3);
\draw [->,very thick] (output) to node [above,pos=0.5,font=\tiny] {Selection} (final);
\end{scope}
\end{tikzpicture}
}
\hfill
\subfigure[预测融合]
{
\begin{tikzpicture}[scale=0.5]
\tikzstyle{system} = [rectangle,very thick,minimum width=1cm,font=\tiny];
\tikzstyle{output} = [rectangle,very thick,rounded corners=3pt,minimum width=1cm,align=center,font=\tiny];
\begin{scope}
\node [system,draw=orange,text=orange] (model3) at (0,0) {Model $3$};
\node [system,draw=ugreen,text=ugreen,anchor=south] (model2) at ([yshift=0.3cm]model3.north) {Model $2$};
\node [system,draw=red,text=red,anchor=south] (model1) at ([yshift=0.3cm]model2.north) {Model $1$};
\begin{pgfonlayer}{background}
\node [draw,thick,dashed,inner sep=2pt,fit=(model3) (model2) (model1)] (ensemble) {};
\end{pgfonlayer}
\node [system,draw=ugreen,text=ugreen,right=1cm of ensemble] (model) {Model};
\node [output,draw=ublue,text=ublue,minimum width=1cm,anchor=west] (final) at ([xshift=0.5cm]model.east) {Final\\Output};
\draw [->,very thick] (ensemble) to node [above,pos=0.5,font=\tiny] {Ensemble} (model);
\draw [->,very thick] (model) to (final);
\end{scope}
\end{tikzpicture}
}
\hspace*{\fill}
\\
\subfigure[译文重组]
{
\begin{tikzpicture}[scale=0.5]
\tikzstyle{system} = [rectangle,very thick,minimum width=1cm,font=\tiny];
\tikzstyle{output} = [rectangle,very thick,rounded corners=3pt,minimum width=1cm,align=center,font=\tiny];
\tikzstyle{dot} = [circle,fill=blue!40!white,minimum size=5pt,inner sep=0pt];
\begin{scope}
\node [system,draw=orange,text=orange] (model3) at (0,0) {Model $3$};
\node [system,draw=ugreen,text=ugreen,anchor=south] (model2) at ([yshift=0.3cm]model3.north) {Model $2$};
\node [system,draw=red,text=red,anchor=south] (model1) at ([yshift=0.3cm]model2.north) {Model $1$};
\node [output,draw=orange,text=orange,anchor=west] (output3) at ([xshift=0.5cm]model3.east) {Output $3$};
\node [output,draw=ugreen,text=ugreen,anchor=west] (output2) at ([xshift=0.5cm]model2.east) {Output $2$};
\node [output,draw=red,text=red,anchor=west] (output1) at ([xshift=0.5cm]model1.east) {Output $1$};
\draw [->,very thick] (model1) to (output1);
\draw [->,very thick] (model2) to (output2);
\draw [->,very thick] (model3) to (output3);
\begin{pgfonlayer}{background}
\node [draw,thick,dashed,rounded corners=3pt,inner sep=2pt,fit=(output1) (output2) (output3)] (output) {};
\end{pgfonlayer}
\node [dot,anchor=west] (lattice1) at ([shift={(1.5cm,0.5cm)}]output2.east) {};
\node [dot,anchor=west] (lattice2) at ([shift={(1cm,0)}]lattice1.east) {};
\node [dot,anchor=west] (lattice3) at ([shift={(1cm,0)}]lattice2.east) {};
\node [dot,anchor=west] (lattice4) at ([shift={(1.5cm,-0.5cm)}]output2.east) {};
\node [dot,anchor=west] (lattice5) at ([shift={(1cm,0)}]lattice4.east) {};
\draw [-latex,blue] (lattice1) to [out=30,in=150] (lattice2);
\draw [-latex,blue] (lattice2) to [out=30,in=150] (lattice3);
\draw [-latex,blue] (lattice4) to [out=15,in=-120] (lattice2);
\draw [-latex,blue] (lattice4) to [out=-30,in=-150] (lattice5);
\draw [-latex,blue] (lattice5) to [out=15,in=-120] (lattice3);
\draw [-latex,blue] (lattice5) to [out=-60,in=-90] (lattice3);
\begin{pgfonlayer}{background}
\node [draw=blue,fill=white,drop shadow,thick,rounded corners=3pt,inner sep=5pt,fit=(lattice1) (lattice2) (lattice3) (lattice4) (lattice5),label={[font=\tiny,label distance=0pt]90:Lattice}] (lattice) {};
\end{pgfonlayer}
\draw [->,very thick] (output) to (lattice);
\node [system,draw=purple,text=purple,anchor=west] (model) at ([xshift=5.3cm]output1.east) {Model};
\node [output,draw=ublue,text=ublue,minimum width=1cm,right=1.3cm of lattice] (final) {Final Output};
\draw [->,very thick] (model) |- (final);
\draw [->,very thick] (lattice) -- (final);
\end{scope}
\end{tikzpicture}
}
\begin{tikzpicture}
\tikzstyle{layer} = [rectangle,draw,rounded corners=3pt,minimum width=1cm,minimum height=0.5cm];
\tikzstyle{prob} = [minimum width=0.3cm,rectangle,fill=ugreen!20!white,inner sep=0pt];
\begin{scope}[local bounding box=STANDARD]
\node [] (input1) at (0,0) {$\cdots$};
\node [anchor=south,layer,fill=orange!15!white] (net1) at ([yshift=0.5cm]input1.north) {};
\node [anchor=south,layer,fill=orange!15!white] (out1) at ([yshift=0.5cm]net1.north) {};
\node [anchor=south,prob,minimum height=0.9cm] (prob5) at ([yshift=1.2cm]out1.north) {};
\node [anchor=south east,prob,minimum height=0.1cm] (prob4) at ([xshift=-1pt]prob5.south west) {};
\node [anchor=south east,prob,minimum height=0.2cm] (prob3) at ([xshift=-1pt]prob4.south west) {};
\node [anchor=south east,prob,minimum height=0.5cm] (prob2) at ([xshift=-1pt]prob3.south west) {};
\node [anchor=south east,prob,minimum height=0.4cm] (prob1) at ([xshift=-1pt]prob2.south west) {};
\node [anchor=south west,prob,minimum height=0.6cm] (prob6) at ([xshift=1pt]prob5.south east) {};
\node [anchor=south west,prob,minimum height=0.3cm] (prob7) at ([xshift=1pt]prob6.south east) {};
\node [anchor=south west,prob,minimum height=0.2cm] (prob8) at ([xshift=1pt]prob7.south east) {};
\node [anchor=south west,prob,minimum height=0.1cm] (prob9) at ([xshift=1pt]prob8.south east) {};
\path [fill=blue!20!white,draw=white] (out1.north west) -- (prob1.south west) -- (prob9.south east) -- (out1.north east) -- (out1.north west);
\draw [->] (input1) to (net1);
\draw [->] (net1) to (out1);
\node [font=\small] (label1) at ([yshift=0.6cm]out1.north) {Softmax};
\end{scope}
\begin{scope}[local bounding box=SELECTION]
\node [] (input2) at (4.5cm,0) {$\cdots$};
\node [anchor=south,layer,fill=orange!15!white] (net2) at ([yshift=0.5cm]input2.north) {};
\node [anchor=south,layer,fill=orange!15!white] (out2) at ([yshift=0.5cm]net2.north) {};
\node [anchor=south,prob,minimum height=0.9cm] (prob5) at ([yshift=1.2cm]out2.north) {};
\node [anchor=south east,prob,minimum height=0.1cm,opacity=0] (prob4) at ([xshift=-1pt]prob5.south west) {};
\node [text=red,anchor=south,inner sep=1pt] () at (prob4.south) {$\times$};
\node [anchor=south east,prob,minimum height=0.2cm,opacity=0] (prob3) at ([xshift=-1pt]prob4.south west) {};
\node [text=red,anchor=south,inner sep=1pt] () at (prob3.south) {$\times$};
\node [anchor=south east,prob,minimum height=0.5cm] (prob2) at ([xshift=-1pt]prob3.south west) {};
\node [anchor=south east,prob,minimum height=0.4cm] (prob1) at ([xshift=-1pt]prob2.south west) {};
\node [anchor=south west,prob,minimum height=0.6cm,opacity=0] (prob6) at ([xshift=1pt]prob5.south east) {};
\node [text=red,anchor=south,inner sep=1pt] () at (prob6.south) {$\times$};
\node [anchor=south west,prob,minimum height=0.3cm,opacity=0] (prob7) at ([xshift=1pt]prob6.south east) {};
\node [text=red,anchor=south,inner sep=1pt] () at (prob7.south) {$\times$};
\node [anchor=south west,prob,minimum height=0.2cm] (prob8) at ([xshift=1pt]prob7.south east) {};
\node [anchor=south west,prob,minimum height=0.1cm,opacity=0] (prob9) at ([xshift=1pt]prob8.south east) {};
\node [text=red,anchor=south,inner sep=1pt] (plabel9) at (prob9.south) {$\times$};
\path [fill=blue!20!white,draw=white] (out2.north west) -- (prob1.south west) -- (prob9.south east) -- (out2.north east) -- (out2.north west);
\draw [->] (input2) to (net2);
\draw [->] (net2) to (out2);
\node [font=\small] (label2) at ([yshift=0.6cm]out2.north) {Softmax};
\node [anchor=west,layer,fill=orange!15!white] (net3) at ([xshift=2cm]net2.east) {};
\node [anchor=north,font=\scriptsize] (input3) at ([yshift=-0.5cm]net3.south) {源语};
\node [anchor=south,layer,align=center,font=\scriptsize,fill=yellow!10!white] (out3) at ([yshift=0.9cm]net3.north) {Candidate\\List};
\draw [->] (input3) to (net3);
\draw [->] (net3) to (out3);
\draw [->] (out3) |- (plabel9.east);
\end{scope}
\node [anchor=north,font=\scriptsize] () at ([yshift=-0.2em]STANDARD.south) {(a) Standard};
\node [anchor=north,font=\scriptsize] () at (SELECTION.south) {(b) Word selection};
\end{tikzpicture}
\ No newline at end of file
\begin{tikzpicture}
\begin{scope}
\tikzstyle{word} = [font=\scriptsize]
\tikzstyle{model} = [rectangle,draw,minimum height=3em,minimum width=5em,rounded corners=4pt,fill=blue!15!white]
\node [model,minimum width=10.5em] (encoder0) at (0,0) {Encoder};
\node [word] (w1) at ([yshift=-2em,xshift=1em]encoder0.south) {\#};
\node [word] (w2) at ([xshift=-1em]w1.west) {\#};
\node [word] (w3) at ([xshift=-1em]w2.west) {$x_2$};
\node [word] (w4) at ([xshift=-1em]w3.west) {$x_1$};
\node [word] (w5) at ([xshift=1em]w1.east) {\#};
\node [word] (w6) at ([xshift=1em]w5.east) {$x_6$};
\node [word] (w7) at ([yshift=2em,xshift=1em]encoder0.north) {$x_4$};
\node [word] (w8) at ([yshift=0em,xshift=-1em]w7.west) {$x_3$};
\node [word] (w9) at ([yshift=0em,xshift=1em]w7.east) {$x_5$};
\draw [->] (w1.north) -- ([yshift=1.3em]w1.north);
\draw [->] (w2.north) -- ([yshift=1.3em]w2.north);
\draw [->] (w3.north) -- ([yshift=1.4em]w3.north);
\draw [->] (w4.north) -- ([yshift=1.4em]w4.north);
\draw [->] (w5.north) -- ([yshift=1.3em]w5.north);
\draw [->] (w6.north) -- ([yshift=1.4em]w6.north);
\draw [->] (w7.south) -- ([yshift=-1.4em]w7.south);
\draw [->] (w8.south) -- ([yshift=-1.4em]w8.south);
\draw [->] (w9.south) -- ([yshift=-1.4em]w9.south);
\node [model] (encoder1) at ([xshift=8em]encoder0.east) {Encoder};
\node [model,fill=red!15!white] (decoder) at ([xshift=6em]encoder1.east) {Decoder};
\node [] (sinput) at ([yshift=-3em]encoder1.south) {source input};
\node [] (tinput) at ([yshift=-3em]decoder.south) {target input};
\node [] (output) at ([yshift=3em]decoder.north) {target output};
\draw [->] (sinput) -- (encoder1);
\draw [->] (tinput) -- (decoder);
\draw [->] (decoder) -- (output);
\coordinate (do0) at ([yshift=1em]encoder1.north);
\coordinate (do1) at ([xshift=4em]do0.east);
\coordinate (do2) at ([yshift=-2.5em]do1.south);
\draw [-] (encoder1.north) -- (do0);
\draw [-] (do0) -- (do1);
\draw [-] (do1) -- (do2);
\draw [->] (do2) -- (decoder.west);
\begin{pgfonlayer}{background}
\node [rectangle,inner sep=2em,fill=black!5,rounded corners=4pt] [fit =(w4) (w6) (w9) (encoder0) ] (box) {};
\end{pgfonlayer}
\node [] (left) at ([yshift=-1.5em]box.south) {Pre-training with monolingual data};
\node [] (right) at ([xshift=9.8em]left.east) {Fine-tune on translation task};
\draw [->,black!50!white,line width=3pt,draw] ([xshift=1em]encoder0.east) -- ([xshift=-1em]encoder1.west);
\end{scope}
\end{tikzpicture}
\ No newline at end of file
\begin{tikzpicture}
\newlength{\YShift}
\newlength{\XShift}
\setlength{\YShift}{0.8\base}
\setlength{\XShift}{0.8\base}
\tikzstyle{modelnode} = [rectangle,draw,rounded corners=2pt,inner sep=0pt,minimum height=4.2em,minimum width=2em,font=\small,anchor=north]
\coordinate (stu01) at (0,0);
\coordinate (stu02) at ([xshift=3em]stu01);
\coordinate (stu03) at ([xshift=3em]stu02);
\coordinate (stu04) at ([xshift=3em]stu03);
\coordinate (stu05) at ([xshift=3em]stu04);
\coordinate (tea01) at ([xshift=8em]stu05);
\coordinate (tea02) at ([xshift=3em]tea01);
% iterations
\foreach \curr / \prev in {1/0,2/1,3/2}
{
% models
\node[modelnode,fill=yellow!20] (stu\curr1) at ([yshift=-2em]stu\prev1.south) {\rotatebox{90}{Student $1$}};
\node[modelnode,fill=yellow!20] (stu\curr2) at ([yshift=-2em]stu\prev2.south) {\rotatebox{90}{Student $2$}};
\node[modelnode,fill=yellow!20] (stu\curr3) at ([yshift=-2em]stu\prev3.south) {\rotatebox{90}{Student $3$}};
\node[modelnode,fill=yellow!20] (stu\curr4) at ([yshift=-2em]stu\prev4.south) {\rotatebox{90}{Student $4$}};
\node[modelnode,fill=yellow!20] (stu\curr5) at ([yshift=-2em]stu\prev5.south) {\rotatebox{90}{Student $5$}};
\node[modelnode] (tea\curr1) at ([yshift=-2em]tea\prev1.south) {\rotatebox{90}{\color{red!60} Teacher $1$}};
\node[modelnode] (tea\curr2) at ([yshift=-2em]tea\prev2.south) {\rotatebox{90}{\color{blue!60} Teacher $2$}};
% ensemble labels
\draw[-latex'] ([xshift=2pt]stu\curr5.east) to node [auto] {\small Ensemble} ([xshift=-2pt]tea\curr1.west);
}
% iteration labels
\node[font=\small,anchor=east,purple!80] (iterate1) at ([xshift=-1em]stu21.west) {\rotatebox{90}{Iteration $1$}};
\node[font=\small,anchor=east,purple!80] (iterate2) at ([xshift=-1em]stu31.west) {\rotatebox{90}{Iteration $2$}};
% distillation labels
\node[font=\small,anchor=south west] (distill1) at ([yshift=0.2em]iterate1.north west) {Distillation};
\node[font=\small,anchor=south west] (distill2) at ([yshift=0.2em]iterate2.north west) {Distillation};
% student groups
\begin{pgfonlayer}{background}
\node[rectangle,draw,very thick,red!60,densely dotted,inner sep=2pt,rounded corners=2pt,fill=red!20] [fit = (stu21) (stu22) (stu23) ] (group21) {};
\node[rectangle,draw,very thick,blue!60,densely dotted,inner sep=2pt,rounded corners=2pt,fill=blue!20] [fit = (stu24) (stu25) ] (group22) {};
\node[rectangle,draw,very thick,blue!60,densely dotted,inner sep=2pt,rounded corners=2pt,fill=blue!20] [fit = (stu31) (stu32) ] (group31) {};
\node[rectangle,draw,very thick,red!60,densely dotted,inner sep=2pt,rounded corners=2pt,fill=red!20] [fit = (stu33) (stu34) (stu35) ] (group32) {};
\end{pgfonlayer}
% distillation
\draw[-latex',red!60,very thick] (tea11.south) .. controls +(south:1.5em) and +(north:2em) .. (group21.north);
\draw[-latex',blue!60,very thick] (tea12.south) .. controls +(south:2em) and +(north:1.5em) .. (group22.north);
\draw[-latex',red!60,very thick] (tea21.south) .. controls +(south:2em) and +(north:2.5em) .. (group32.north);
\draw[-latex',blue!60,very thick] (tea22.south) .. controls +(south:2em) and +(north:1.5em) .. (group31.north);
\end{tikzpicture}
\ No newline at end of file
\begin{tikzpicture}
\begin{scope}
\tikzstyle{node1} = [rectangle,draw,minimum height=2em,minimum width=8em,rounded corners=2pt,fill=orange!10]
\tikzstyle{node2} = [rectangle,draw,minimum height=1.3em,minimum width=10em,rounded corners=2pt,fill=blue!15!white]
\tikzstyle{node3} = [rectangle,draw,minimum height=2em,minimum width=4em,rounded corners=2pt,fill=orange!10]
\node [anchor=north,inner sep=0mm,node1] (n1) at (0,0) {Parallel\ Data};
\node [anchor=north,node2] (n2) at ([xshift=0em,yshift=-2em]n1.south) {NMT\ System};
\node [anchor=north,node3] (n3) at ([xshift=-3em,yshift=-2em]n2.south) {Mono$_{src}$};
\node [anchor=west,node3] (n31) at ([xshift=2em,yshift=0em]n3.east) {Pseudo$_{tgt}$};
\node [anchor=north west,node1,minimum height=4em,minimum width=8em] (n4) at ([xshift=4.7em,yshift=0em]n1.north east) {};
\node [anchor=south west,fill=orange!10,minimum height=1.6em,minimum width=3.6em] (n41) at ([xshift=0.2em,yshift=0.2em]n4.south west) {M$_{src}$};
\node [anchor=south east,fill=orange!10,minimum height=1.6em,minimum width=3.6em] (n42) at ([xshift=-0.2em,yshift=0.2em]n4.south east) {P$_{tgt}$};
\node [anchor=north,fill=orange!10,minimum height=1.6em,minimum width=7.6em] (n43) at ([xshift=0em,yshift=-0.2em]n4.north) {Parallel\ Data};
\node [anchor=north,node2] (n5) at ([xshift=0em,yshift=-2em]n4.south) {Reverse\ NMT\ System};
\node [anchor=north,node3] (n6) at ([xshift=-3.3em,yshift=-2em]n5.south) {Pseudo$_{src}$};
\node [anchor=west,node3] (n61) at ([xshift=2em,yshift=0em]n6.east) {Mono$_{tgt}$};
\node [anchor=north west,node1,minimum height=4em,minimum width=8em] (n7) at ([xshift=4.7em,yshift=0em]n4.north east) {};
\node [anchor=south west,fill=orange!10,minimum height=1.6em,minimum width=3.6em] (n71) at ([xshift=0.2em,yshift=0.2em]n7.south west) {P$_{src}$};
\node [anchor=south east,fill=orange!10,minimum height=1.6em,minimum width=3.6em] (n72) at ([xshift=-0.2em,yshift=0.2em]n7.south east) {M$_{tgt}$};
\node [anchor=north,fill=orange!10,minimum height=1.6em,minimum width=7.6em] (n73) at ([xshift=0em,yshift=-0.2em]n7.north) {Parallel\ Data};
\node [anchor=north,node2] (n8) at ([xshift=0em,yshift=-3em]n7.south) {Final\ NMT\ System};
\draw [->,thick,black!60,line width=1mm] (n1.east) -- ([xshift=0em,yshift=1em]n4.west);
\draw [->,thick,black!20,line width=1mm] (n1.south) -- (n2.north);
\draw [->,thick,black!20,line width=1mm] (n2.south) -- ([xshift=0em,yshift=-2em]n2.south);
\draw [->,thick,black!40,line width=1mm] (n3.east) -- (n31.west);
\draw [->,thick,black!60,line width=1mm] (n31.north east) -- (n4.south west);
\draw [->,thick,black!60,line width=1mm] ([xshift=0em,yshift=1em]n4.east) -- ([xshift=0em,yshift=1em]n7.west);
\draw [->,thick,black!20,line width=1mm] (n4.south) -- (n5.north);
\draw [->,thick,black!20,line width=1mm] (n5.south) -- ([xshift=0em,yshift=-2em]n5.south);
\draw [->,thick,black!40,line width=1mm] (n61.west) -- (n6.east);
\draw [->,black!60,line width=1mm] (n61.north east) -- (n7.south west);
\draw [->,thick,black!20,line width=1mm] (n7.south) -- (n8.north);
\draw [-,thick] (n4.west) -- (n4.east);
\draw [-,thick] (n4.south) -- ([xshift=0em,yshift=2em]n4.south);
\draw [-,thick] (n7.west) -- (n7.east);
\draw [-,thick] (n7.south) -- ([xshift=0em,yshift=2em]n7.south);
\end{scope}
\end{tikzpicture}
\ No newline at end of file
\begin{tikzpicture}
\node [anchor=north west] (part1) at (0,0) {\small{$\begin{bmatrix} Have \; 0.5 \\ Has \ \ \; 0.1 \\ . \\ . \\ . \\ . \\ . \end{bmatrix}$}};
\node [anchor=north](p1) at ([yshift=-0.3em]part1.south) {$P1$};
\node [anchor=west](part2) at ([xshift=0.5em]part1.east){\small{$\begin{bmatrix} Have \; 0.2 \\ Has \ \ \; 0.3 \\ . \\ . \\ . \\ . \\ . \end{bmatrix}$}};
\node [anchor=north](p2) at ([yshift=-0.3em]part2.south) {$P2$};
\node [anchor=west](part3) at ([xshift=0.5em]part2.east){\small{$\begin{bmatrix} Have \; 0.4 \\ Has \ \ \; 0.3 \\ . \\ . \\ . \\ . \\ . \end{bmatrix}$}};
\node [anchor=north](p3) at ([yshift=-0.3em]part3.south) {$P3$};
\node [anchor=west](part4) at ([xshift=0.5em]part3.east){\huge{$\Rightarrow$}};
\node [anchor=west](part5) at ([xshift=0.5em]part4.east){\small{$\begin{bmatrix} Have \; 0.37 \\ Has \ \ \; 0.23 \\ . \\ . \\ . \\ . \\ . \end{bmatrix}$}};
\node [anchor=north](p5) at (part5.south) {$P=\sum_{i=1}^{3}{\frac{1}{3}P_{i}}$};
\end{tikzpicture}
\begin{tikzpicture}
\tikzstyle{system} = [rectangle,very thick,minimum width=1.5cm,font=\scriptsize];
\tikzstyle{output} = [rectangle,very thick,rounded corners=3pt,minimum width=1.5cm,align=center,font=\scriptsize];
\begin{scope}[local bounding box=MULTIPLE]
\node [system,draw=orange,text=orange] (engine3) at (0,0) {Engine $n$};
\node [system,draw=ugreen,text=ugreen,anchor=south] (engine2) at ([yshift=0.6cm]engine3.north) {Engine $2$};
\node [system,draw=red,text=red,anchor=south] (engine1) at ([yshift=0.3cm]engine2.north) {Engine $1$};
\node [output,draw=orange,text=orange,anchor=west] (output3) at ([xshift=0.5cm]engine3.east) {Output $n$};
\node [output,draw=ugreen,text=ugreen,anchor=west] (output2) at ([xshift=0.5cm]engine2.east) {Output $2$};
\node [output,draw=red,text=red,anchor=west] (output1) at ([xshift=0.5cm]engine1.east) {Output $1$};
\draw [very thick,decorate,decoration={brace}] ([xshift=3pt]output1.north east) to node [midway,name=final] {} ([xshift=3pt]output3.south east);
\node [output,draw=ublue,text=ublue,minimum width=1cm,right=0pt of final,minimum height=2.5em] () {Final\\Output};
\draw [->,very thick] (engine1) to (output1);
\draw [->,very thick] (engine2) to (output2);
\draw [->,very thick] (engine3) to (output3);
\node [] () at ([yshift=0.4cm]output3.north) {$\vdots$};
\end{scope}
\begin{scope}[local bounding box=SINGLE]
\node [output,draw=ugreen,text=ugreen,anchor=west] (output3) at ([xshift=4cm]output3.east) {Output $n$};
\node [output,draw=ugreen,text=ugreen,anchor=west] (output2) at ([xshift=4cm]output2.east) {Output $2$};
\node [output,draw=ugreen,text=ugreen,anchor=west] (output1) at ([xshift=4cm]output1.east) {Output $1$};
\node [system,draw=ugreen,text=ugreen,anchor=east,align=center,inner sep=1.9pt] (engine) at ([xshift=-0.5cm]output2.west) {Single\\Engine};
\draw [very thick,decorate,decoration={brace}] ([xshift=3pt]output1.north east) to node [midway,name=final] {} ([xshift=3pt]output3.south east);
\node [output,draw=ublue,text=ublue,minimum width=1cm,right=0pt of final,minimum height=2.5em] () {Final\\Output};
\draw [->,very thick] (engine.east) to (output1.west);
\draw [->,very thick] (engine.east) to (output2.west);
\draw [->,very thick] (engine.east) to (output3.west);
\node [] () at ([yshift=0.4cm]output3.north) {$\vdots$};
\end{scope}
\node [align=center,anchor=north,font=\small] () at ([yshift=-0.3cm]MULTIPLE.south) {(a) combing outputs from\\multiple translation engines};
\node [align=center,anchor=north,font=\small] () at ([yshift=-0.3cm]SINGLE.south) {(b) combing outputs from a\\single translation engine};
\end{tikzpicture}
\ No newline at end of file
\begin{tikzpicture}
\setlength{\base}{1.2em}
\tikzstyle{node} = [rounded corners=1pt,minimum width=1.2em,minimum height=1.2em,draw,fill=green!30!white]
\tikzstyle{node2} = [rounded corners=1pt,minimum width=1.2em,minimum height=1.2em,draw,fill=blue!30!white]
\node[node] (enc1) at (0,0) {};
\node[node] (enc2) at ([xshift = \base]enc1.east) {};
\node[node] (enc3) at ([xshift = \base]enc2.east) {};
\node[node] (enc4) at ([xshift = \base]enc3.east) {};
\node[node] (enc5) at ([xshift = \base]enc4.east) {};
\node[node] (enc6) at ([xshift = \base]enc5.east) {};
\node[] (enc7) at ([xshift = \base]enc6.east) {...};
\node[node] (enc8) at ([xshift = \base]enc7.east) {};
\node[node] (enc9) at ([xshift = \base]enc8.east) {};
\node[node] (enc10) at ([xshift = \base]enc9.east) {};
\node[font=\scriptsize,rotate=270] (src) at ([xshift = -\base]enc1.west) {src};
\draw [->] ([xshift=-0.75em]enc1.west) -- (enc1.west);
\draw [decorate,decoration={brace}] ([yshift=0.3em]enc1.north west) to node [auto,anchor=south,font=\scriptsize] {Nx} ([yshift=0.3em]enc10.north east);
\draw [->] (enc1.east) -- (enc2.west);
\draw [->] (enc2.east) -- (enc3.west);
\draw [->] (enc3.east) -- (enc4.west);
\draw [->] (enc4.east) -- (enc5.west);
\draw [->] (enc5.east) -- (enc6.west);
\draw [->] (enc8.east) -- (enc9.west);
\draw [->] (enc9.east) -- (enc10.west);
\node[node2,anchor=north] (dec1) at ([yshift=-2em]enc1.south) {};
\node[node2,anchor=north] (dec2) at ([yshift=-2em]enc2.south) {};
\node[node2,anchor=north] (dec3) at ([yshift=-2em]enc3.south) {};
\node[node2,anchor=north] (dec4) at ([yshift=-2em]enc4.south) {};
\node[node2,anchor=north] (dec5) at ([yshift=-2em]enc5.south) {};
\node[node2,anchor=north] (dec6) at ([yshift=-2em]enc6.south) {};
\node[font=\scriptsize,rotate=270] (tgt) at ([xshift = -\base]dec1.west) {tgt};
\node[font=\scriptsize,rotate=270] (tgt) at ([xshift = \base]dec6.east) {out};
\draw [->] ([xshift=-0.75em]dec1.west) -- (dec1.west);
\draw [->] (dec6.east) -- ([xshift=0.75em]dec6.east);
\draw [decorate,decoration={brace,mirror}] ([yshift=-0.3em]dec1.south west) to node [auto,anchor=north,font=\scriptsize] {6x} ([yshift=-0.3em]dec6.south east);
\draw [->] (dec1.east) -- (dec2.west);
\draw [->] (dec2.east) -- (dec3.west);
\draw [->] (dec3.east) -- (dec4.west);
\draw [->] (dec4.east) -- (dec5.west);
\draw [->] (dec5.east) -- (dec6.west);
\node[node] (enc_legend) at ([xshift = 2\base]enc10.east) {};
\node[node2,anchor=north] (dec_legend) at ([yshift = -\base]enc_legend.south) {};
\node[font=\scriptsize,anchor=west] (line1) at (enc_legend.east) {:编码层};
\node[font=\scriptsize,anchor=west] (line1) at (dec_legend.east) {:解码层};
%\node[node] (dec1) at ([xshift=4em]enc1.east) {Decoder};
%\node[node2] (enc2) at ([xshift=4em]dec1.east) {Encoder};
%\node[node] (dec2) at ([xshift=4em]enc2.east) {Decoder};
\coordinate (c1) at ([xshift=1em]enc10.east);
\coordinate (c2) at ([yshift=-1.6em]c1.south);
\draw [->,rounded corners] (enc10.east) -- (c1) -- (c2)--([yshift=1em]dec1.north) -- (dec1.north);
\draw [->,rounded corners] (enc10.east) -- (c1) -- (c2)--([yshift=1em]dec2.north) -- (dec2.north);
\draw [->,rounded corners] (enc10.east) -- (c1) -- (c2)--([yshift=1em]dec3.north) -- (dec3.north);
\draw [->,rounded corners] (enc10.east) -- (c1) -- (c2)--([yshift=1em]dec4.north) -- (dec4.north);
\draw [->,rounded corners] (enc10.east) -- (c1) -- (c2)--([yshift=1em]dec5.north) -- (dec5.north);
\draw [->,rounded corners] (enc10.east) -- (c1) -- (c2)--([yshift=1em]dec6.north) -- (dec6.north);
\end{tikzpicture}
\ No newline at end of file
\begin{tikzpicture}
\node[font=\scriptsize] (model) at (0,0) {Model out:};
\node[anchor=north west,font=\scriptsize] (label_smooth) at ([yshift=-1.8em]model.south west) {label smoothing:};
\node[anchor=south west,font=\scriptsize] (one-hot) at ([yshift=2em]model.north west) {one hot:};
\node [anchor=west,minimum width=1em,minimum height=0.2em,fill=ublue!80,inner sep=0pt] (model_label1) at ([xshift=2em,yshift=-0.5em]model.east) {};
\node [anchor=south,font=\scriptsize] (model_w1) at (model_label1.north) {$p_{1}$};
\node [anchor=south west,minimum width=1.2em,minimum height=0.1em,fill=ublue!80,inner sep=0pt] (model_label2) at (model_label1.south east) {};
\node [anchor=south,font=\scriptsize] (model_w2) at (model_label2.north) {$p_{2}$};
\node [anchor=south west,minimum width=1.2em,minimum height=0.7em,fill=ublue!80,inner sep=0pt] (model_label3) at (model_label2.south east) {};
\node [anchor=south,font=\scriptsize] (model_w3) at (model_label3.north) {{\color{red} $p_{3}$}};
\node [anchor=south west,minimum width=1.2em,minimum height=0.4em,fill=ublue!80,inner sep=0pt] (model_label4) at (model_label3.south east) {};
\node [anchor=south,font=\scriptsize] (model_w5) at (model_label4.north) {$p_{4}$};
\node [anchor=south west,minimum width=1.2em,minimum height=0.1em,fill=ublue!80,inner sep=0pt] (model_label5) at (model_label4.south east) {};
\node [anchor=south,font=\scriptsize] (model_w6) at (model_label5.north) {$p_{5}$};
\node [anchor=south west,minimum width=1.2em,minimum height=0.3em,fill=ublue!80,inner sep=0pt] (model_label6) at (model_label5.south east) {};
\node [anchor=south,font=\scriptsize] (model_w7) at (model_label6.north) {$p_{6}$};
\node [anchor=south west,minimum width=1.2em,minimum height=0.2em,fill=ublue!80,inner sep=0pt] (model_label7) at (model_label6.south east) {};
\node [anchor=south,font=\scriptsize] (model_w8) at (model_label7.north) {$p_{7}$};
%no label smooth
\node [anchor=west,minimum width=1.2em,minimum height=0em,inner sep=0pt,font=\scriptsize] (one_hot_label1) at ([xshift=2em,yshift=3em]model.east) {$0$};
\node [anchor=south west,minimum width=1.2em,minimum height=0em,inner sep=0pt,font=\scriptsize] (one_hot_label2) at (one_hot_label1.south east) {$0$};
\node [anchor=south west,minimum width=1.2em,minimum height=1.5em,fill=orange!50,inner sep=0pt] (one_hot_label3) at (one_hot_label2.south east) {};
\node [anchor=south,font=\scriptsize] (one_hot_w3) at (one_hot_label3.north) {{\color{red} $1$}};
\node [anchor=south west,minimum width=1.2em,minimum height=0em,inner sep=0pt,font=\scriptsize] (one_hot_label4) at (one_hot_label3.south east) {$0$};
\node [anchor=south west,minimum width=1.2em,minimum height=0.1em,inner sep=0pt,font=\scriptsize] (one_hot_label5) at (one_hot_label4.south east) {$0$};
\node [anchor=south west,minimum width=1.2em,minimum height=0.3em,inner sep=0pt,font=\scriptsize] (one_hot_label6) at (one_hot_label5.south east) {$0$};
\node [anchor=south west,minimum width=1em,minimum height=0.4em,inner sep=0pt,font=\scriptsize] (one_hot_label7) at (one_hot_label6.south east) {$0$};
%label smoothing
\node [anchor=west,minimum width=1em,minimum height=0.2em,fill=red!50,inner sep=0pt] (label1) at ([xshift=2em,yshift=-3.2em]model.east) {};
\node [anchor=south,font=\scriptsize] (w1) at (label1.north) {$0.1$};
\node [anchor=south west,minimum width=1.2em,minimum height=0.2em,fill=red!50,inner sep=0pt] (label2) at (label1.south east) {};
\node [anchor=south,font=\scriptsize] (w2) at (label2.north) {$0.1$};
\node [anchor=south west,minimum width=1.2em,minimum height=0.8em,fill=red!50,inner sep=0pt] (label3) at (label2.south east) {};
\node [anchor=south,font=\scriptsize] (w3) at (label3.north) {{\color{red} $0.4$}};
\node [anchor=south west,minimum width=1.2em,minimum height=0.2em,fill=red!50,inner sep=0pt] (label4) at (label3.south east) {};
\node [anchor=south,font=\scriptsize] (w5) at (label4.north) {$0.1$};
\node [anchor=south west,minimum width=1.2em,minimum height=0.2em,fill=red!50,inner sep=0pt] (label5) at (label4.south east) {};
\node [anchor=south,font=\scriptsize] (w6) at (label5.north) {$0.1$};
\node [anchor=south west,minimum width=1.2em,minimum height=0.2em,fill=red!50,inner sep=0pt] (label6) at (label5.south east) {};
\node [anchor=south,font=\scriptsize] (w7) at (label6.north) {$0.1$};
\node [anchor=south west,minimum width=1.2em,minimum height=0.2em,fill=red!50,inner sep=0pt] (label7) at (label6.south east) {};
\node [anchor=south,font=\scriptsize] (w8) at (label7.north) {$0.1$};
\node[font=\scriptsize] (line1) at ([xshift=9em,yshift=-1.5em]model_label7.east) {$loss =-0.3 \log p_{3}-\sum_{i=1}^{7} 0.1 \log p_{i}$};
\node[font=\scriptsize] (line2) at ([xshift=5.9em,yshift=3.5em]model_label7.east) {$loss =-\log p_{3}$};
\begin{pgfonlayer}{background}
\node [rectangle,inner sep=0.1em,rounded corners=1pt,very thick,dotted,draw=red] [fit = (one_hot_label1) (one_hot_w3) (one_hot_label7) (model_label1) (model_label7)] (box1) {};
\node [rectangle,inner sep=0.2em,rounded corners=1pt,fill=purple!10,drop shadow,draw=purple] [fit = (line2)] (box3) {};
\draw [->,dotted,very thick,red] ([yshift=-1em]box1.east) .. controls +(east:1) and +(west:1) .. (box3.west);
\node [rectangle,inner sep=0.1em,rounded corners=1pt,very thick,dotted,draw=ugreen] [fit = (label1) (label7) (model_label1) (model_label7) (model_w3)] (box2) {};
\node [rectangle,inner sep=0.2em,rounded corners=1pt,fill=green!10,drop shadow,draw=ugreen] [fit = (line1)] (box4) {};
\draw [->,dotted,very thick,ugreen] ([yshift=1em]box2.east) .. controls +(east:1) and +(west:1) .. (box4.west);
\end{pgfonlayer}
\end{tikzpicture}
\ No newline at end of file
\begin{tikzpicture}
\begin{scope}\small
\node [anchor=north] (n1) at (0, 0) {开始};
\draw [->,very thick] ([xshift=0em,yshift=1em]n1.north west)--([xshift=20em,yshift=1em]n1.north west);
\draw [->,very thick] ([xshift=0em,yshift=1em]n1.north west)--([xshift=0em,yshift=10em]n1.north west);
\node [anchor=west] (n2) at ([xshift=15em,yshift=0em]n1.east) {成熟};
\node [anchor=south] (n3) at ([xshift=-4em,yshift=8em]n1.north) {翻译品质};
\draw [-,very thick,draw=ublue] ([xshift=0.7em,yshift=3em]n1.north) .. controls +(north:7em) and +(south:0em) .. ([xshift=17em,yshift=9em]n1.north);
{\footnotesize
\node [anchor=south] (n4) at ([xshift=7em,yshift=5em]n1.north) {性能快速爬升阶段};
\node [anchor=west] (n5) at ([xshift=0em,yshift=-2em]n4.west) {数据的作用会非常明显};
}
\draw [-,thick] ([xshift=2.3em,yshift=-2em]n4.east)--([xshift=2.3em,yshift=2em]n4.north east);
\end{scope}
\end{tikzpicture}
\ No newline at end of file
\begin{tikzpicture}
%左
\node [anchor=west,draw=black,very thick,minimum width=6em,minimum height=3.5em,fill=blue!15,align=center,text=black] (part1) at (0,0) {\scriptsize{预测模块} \\ \tiny{(RNN/Transsformer)}};
\node [anchor=south] (text) at ([xshift=0.5em,yshift=-3.5em]part1.south) {\scriptsize{源语言句子(编码)}};
\node [anchor=east,draw=black,very thick,minimum width=6em,minimum height=3.5em,fill=blue!15,align=center,text=black] (part2) at ([xshift=10em]part1.east) {\scriptsize{搜索模块}};
\draw [->,draw=black, thick] ([yshift=2em]part1.north) -- ([yshift=0.1em]part1.north);
\draw [->,draw=black, thick] ([yshift=-2em]part1.south) -- ([yshift=-0.1em]part1.south);
\draw [->,draw=black, thick] ([yshift=0em]part2.north) -- ([yshift=2em]part2.north);
\draw [->,draw=black,very thick] ([yshift=-0.7em]part1.east) -- ([xshift=-0.05em,yshift=-0.7em]part2.west);
\draw [->,draw=black,very thick,dashed] ([yshift=0.7em]part2.west) -- ([xshift=0.05em,yshift=0.7em]part1.east);
\end{tikzpicture}
\begin{tikzpicture}
\tikzstyle{op} =[rounded corners=1pt,thick,minimum width=4.0em,minimum height=3.0em,draw,fill=red!5!white,font=\scriptsize]
\tikzstyle{data} = [cylinder,draw=black,thick,minimum height=3em,minimum width=3em,shape border rotate=0,cylinder uses custom fill, cylinder body fill=blue!10,cylinder end fill=blue!5,anchor = east,font=\scriptsize]
\node[op] (node1) at (0,0) {分词};
\node[op,anchor = west] (node2) at ([xshift = 2.0em]node1.east) {符号标准化};
\node[op,anchor = west] (node3) at ([xshift = 2.0em]node2.east) {数据过滤};
\node [data,anchor = east] (data1) at ([xshift = -2.0em]node1.west){原始数据};
\node [data,anchor = west] (data2) at ([xshift = 2.0em]node3.east){训练数据};
\draw[-stealth,line width=.05cm] ([xshift=0.25em]data1.east) -- ([xshift=-0.25em]node1.west);
\draw[-stealth,line width=.05cm] ([xshift=0.25em]node1.east) -- ([xshift=-0.25em]node2.west);
\draw[-stealth,line width=.05cm] ([xshift=0.25em]node2.east) -- ([xshift=-0.25em]node3.west);
\draw[-stealth,line width=.05cm] ([xshift=0.25em]node3.east) -- ([xshift=-0.25em]data2.west);
\end{tikzpicture}
\ No newline at end of file
\begin{tikzpicture}
\def\neuronsep{1}
\tikzstyle{neuronnode} = [minimum size=1.0em,circle,draw,thick,ublue,inner sep=1pt, fill=white,align=center]
%standard
\foreach \n in {1,...,4}{
\node [neuronnode] (neuron0\n) at (0,\n * \neuronsep) {};
}
\foreach \n in {1,...,4}{
\node [neuronnode] (neuron1\n) at (1.2\neuronsep ,\n * \neuronsep) {};
}
\foreach \n in {1,...,4}{
\node [neuronnode] (neuron2\n) at (2.4*\neuronsep ,\n * \neuronsep) {};
}
\node [neuronnode] (neuron3) at (3.6*\neuronsep ,2.5 * \neuronsep) {};
\foreach \n in {1,...,4}{
\foreach \m in {1,...,4}{
\draw [->] (neuron0\n.east) -- (neuron1\m.west);
}
}
\foreach \n in {1,...,4}{
\foreach \m in {1,...,4}{
\draw [->] (neuron1\n.east) -- (neuron2\m.west);
}
}
\foreach \n in {1,...,4}{
\draw [->] (neuron2\n.east) -- (neuron3.west);
}
%drop
%layer1
\foreach \n in {1,3,4}{
\node [neuronnode] (neuron4\n) at (5*\neuronsep,\n * \neuronsep) {};
}
\node [neuronnode,dashed] (neuron42) at (5*\neuronsep,2 * \neuronsep) {};
%layer1
\foreach \n in {1,2,4}{
\node [neuronnode] (neuron5\n) at (6.2*\neuronsep ,\n * \neuronsep) {};
}
\node [neuronnode,dashed] (neuron53) at (6.2*\neuronsep,3 * \neuronsep) {};
%layer3
\foreach \n in {1,4}{
\node [neuronnode] (neuron6\n) at (7.4*\neuronsep ,\n * \neuronsep) {};
}
\node [neuronnode,dashed] (neuron62) at (7.4*\neuronsep ,2 * \neuronsep) {};
\node [neuronnode,dashed] (neuron63) at (7.4*\neuronsep ,3 * \neuronsep) {};
%layer4
\node [neuronnode] (neuron7) at (8.6*\neuronsep ,2.5 * \neuronsep) {};
\foreach \n in {1,3,4}{
\foreach \m in {1,2,4}{
\draw [->] (neuron4\n.east) -- (neuron5\m.west);
}
}
\foreach \n in {1,2,4}{
\foreach \m in {1,4}{
\draw [->] (neuron5\n.east) -- (neuron6\m.west);
}
}
\foreach \n in {1,4}{
\draw [->] (neuron6\n.east) -- (neuron7.west);
}
\end{tikzpicture}
\ No newline at end of file
\begin{tikzpicture}
\tikzstyle{sublayernode} = [rectangle,draw,thick,inner sep=3pt,rounded corners=2pt,align=center,minimum height=1.5em,minimum width=1.5em,font=\scriptsize]
\tikzstyle{inputnode} = [rectangle,inner sep=3pt,align=center,font=\scriptsize]
%\tikzstyle{circlenode} = [circle,draw,thick,minimum size=0.3\base,font=\small,inner sep=0pt]
\tikzstyle{mnode} = [circle,thick,minimum size=0.7em,font=\small,inner sep=0pt,draw]
\node[anchor=south west,inputnode] (input) at (0,0) {$x_{i}^{l}$};
\node[anchor=west,sublayernode,fill=red!10] (ln) at ([xshift=1.2em]input.east) {LN};
\node[anchor=west,sublayernode,fill=green!10] (fn) at ([xshift=1.2em]ln.east) {F};
\node[anchor=west,mnode] (m) at ([xshift=2em]fn.east) {};
\node[] (res) at ([xshift=2.4em]fn.east) {+};
\node[anchor=west,sublayernode,fill=red!10] (ln1) at ([xshift=2em]m.east) {LN};
\node[anchor=west,sublayernode,fill=green!10] (fn1) at ([xshift=1.2em]ln1.east) {F};
\node[anchor=west,mnode] (m1) at ([xshift=2em]fn1.east) {};
\node[] (res1) at ([xshift=2.4em]fn1.east) {+};
\node[anchor=west,inputnode] (output) at ([xshift=1.2em]res1.east) {$x_{i}^{l+1}$};
\node[anchor=west,inputnode] (legend1) at (8em,-1em) {(a) 标准Transformer网络};
%\coordinate (mend) at ([xshift=1em]m.west);
\draw[-latex',thick] (input)--(ln);
\draw[-latex',thick] (ln)--(fn);
\draw[-latex',thick] (fn)--(m);
%\draw[-,thick] (mend)--(res);
\coordinate (h) at ([xshift=-0.7em]ln.west);
\draw[-latex',thick,rounded corners] (h) -- ([yshift=1.35em]h.north) -- ([yshift=1em]m.north) -- (m.north);
%\coordinate (mend1) at ([xshift=1.0\hseg]m1.west);
\draw[-latex',thick] (m)--(ln1);
\draw[-latex',thick] (ln1)--(fn1);
\draw[-latex',thick] (fn1)--(m1);
%\draw[-,thick] (mend1)--(res1);
\draw[-latex',thick] (m1)--(output);
\coordinate (h1) at ([xshift=-0.7em]ln1.west);
\draw[-latex',thick,rounded corners] (h1) -- ([yshift=1.35em]h1.north) -- ([yshift=1em]m1.north) -- (m1.north);
%--------------------------------------------------------
\node[anchor=south west,inputnode] (input_2) at (0,-4em) {$x_{i}^{l}$};
\node[anchor=west,sublayernode,fill=red!10] (ln_2) at ([xshift=1.2em]input_2.east) {LN};
\node[anchor=west,sublayernode,fill=green!10] (fn_2) at ([xshift=1.2em]ln_2.east) {F};
\node[anchor=west,mnode] (m_2) at ([xshift=2em]fn_2.east) {};
\node[] (res_2) at ([xshift=2.4em]fn_2.east) {+};
\node[anchor=west,sublayernode,fill=red!10] (ln1_2) at ([xshift=2em]m_2.east) {LN};
\node[anchor=west,sublayernode,fill=green!10] (fn1_2) at ([xshift=1.2em]ln1_2.east) {F};
\node[anchor=west,mnode] (m1_2) at ([xshift=2em]fn1_2.east) {};
\node[] (res1_2) at ([xshift=2.4em]fn1_2.east) {+};
\node[anchor=west,inputnode] (output_2) at ([xshift=1.2em]res1_2.east) {$x_{i}^{l+1}$};
\node[anchor=west,inputnode] (legend2) at (6.5em,-5.5em) {(b) 引入Layer Dropout后的Transformer网络};
\node[anchor=south west,inputnode,red,font=\tiny] (mlable) at ([xshift=-2.2em,yshift=-0.6em]m_2.south) {M=1};
\node[anchor=south west,inputnode,red,font=\tiny] (mlable1) at ([xshift=-2.2em,yshift=-0.6em]m1_2.south) {M=0};
\coordinate (start_1) at ([xshift=-1.3em]m_2.west);
\coordinate (end_1) at ([xshift=-0.5em]m_2.west);
%\node[red,font=\scriptsize] (dot1) at (start_1) {$\cdot$};
\draw[-latex',thick] (input_2)--(ln_2);
\draw[-latex',thick] (ln_2)--(fn_2);
\draw[-latex',thick] (fn_2)--(start_1);
\draw[-,thick,red] (start_1)--(end_1);
\draw[-,thick] (end_1)--(m_2);
%\draw[-,thick] (mend)--(res);
\coordinate (h_2) at ([xshift=-0.7em]ln_2.west);
\draw[-latex',thick,rounded corners] (h_2) -- ([yshift=1.35em]h_2.north) -- ([yshift=1em]m_2.north) -- (m_2.north);
%\coordinate (mend1) at ([xshift=1.0\hseg]m1.west);
\coordinate (start_2) at ([xshift=-1.3em]m1_2.west);
\coordinate (end_2) at ([xshift=-0.5em]m1_2.west);
\draw[-latex',thick] (m_2)--(ln1_2);
\draw[-latex',thick] (ln1_2)--(fn1_2);
\draw[-latex',thick] (fn1_2)--(start_2);
\draw[-,thick,red] (start_2)--([yshift=0.3em]end_2);
\draw[-,thick] (end_2)--(m1_2);
%\draw[-,thick] (mend1)--(res1);
\draw[-latex',thick] (m1_2)--(output_2);
\coordinate (h1_2) at ([xshift=-0.7em]ln1_2.west);
\draw[-latex',thick,rounded corners] (h1_2) -- ([yshift=1.35em]h1_2.north) -- ([yshift=1em]m1_2.north) -- (m1_2.north);
\end{tikzpicture}
\ No newline at end of file
\begin{tikzpicture}
\tikzstyle{node} = [minimum height=0.8em,draw=teal,fill=teal!10]
\node[node,minimum width=2.0em] (sent1) at (0,0) {};
\node[node,minimum width=5.0em,anchor=north west] (sent2) at (sent1.south west) {};
\node[node,minimum width=1.0em,anchor=north west] (sent3) at (sent2.south west) {};
\node[node,minimum width=3.0em,anchor=north west] (sent4) at (sent3.south west) {};
\node[node,minimum width=4.0em] (sent5) at (12em,0) {};
\node[node,minimum width=4.5em,anchor=north west] (sent6) at (sent5.south west) {};
\node[node,minimum width=4.5em,anchor=north west] (sent7) at (sent6.south west) {};
\node[node,minimum width=5em,anchor=north west] (sent8) at (sent7.south west) {};
%\node[node,minimum width=2.0em] (sent3) at (0,0) {};
%\node[node,minimum width=2.0em] (sent4) at (0,0) {};
\node[font=\scriptsize,anchor=east] (line1) at (sent1.west) {sent1};
\node[font=\scriptsize,anchor=east] (line2) at (sent2.west) {sent2};
\node[font=\scriptsize,anchor=east] (line3) at (sent3.west) {sent3};
\node[font=\scriptsize,anchor=east] (line4) at (sent4.west) {sent4};
\node[font=\scriptsize,anchor=east] (line5) at (sent5.west) {sent1};
\node[font=\scriptsize,anchor=east] (line6) at (sent6.west) {sent2};
\node[font=\scriptsize,anchor=east] (line7) at (sent7.west) {sent3};
\node[font=\scriptsize,anchor=east] (line8) at (sent8.west) {sent4};
\begin{pgfonlayer}{background}
\node [rectangle,inner sep=-0.0em,draw] [fit = (sent1) (sent2) (sent3) (sent4)] (box1) {};
\node [rectangle,inner sep=-0.0em,draw] [fit = (sent5) (sent6) (sent7) (sent8)] (box2) {};
\end{pgfonlayer}
\node[font=\scriptsize,anchor=west] (node1) at ([yshift=-3.2em]sent1.south) {随机生成};
\node[font=\scriptsize,anchor=west] (node2) at ([xshift=7.5em]node1.east) {排序生成};
\end{tikzpicture}
\ No newline at end of file
\begin{tikzpicture}
\begin{scope}
\node [anchor=center] (node1-1) at (0,0) {\small{$y'$}};
\node [anchor=center] (node1) at (-2.3,1.5) {\small{$x,y$:双语数据}};
\node [anchor=center] (node2) at (-2.1,1) {\small{$z$}:单语数据};
\node[anchor=south,draw,rounded corners,minimum height=1.5em,minimum width=4em,fill=blue!20](node1-2) at ([yshift=-3em]node1-1.south) {\small{softamx}};
\node[anchor=south,draw,rounded corners,minimum height=2.5em,minimum width=4em,fill=red!20](node1-3) at ([yshift=-4.5em]node1-2.south) {\small{Decoder}};
\node[anchor=south](node1-4) at ([yshift=-3em]node1-3.south) {\small{$y$}};
\node[anchor=west](node2-2) at ([xshift=-5.5em]node1-4.west) {\small{$x$}};
\node [anchor=center] (labela) at ([xshift=3.5em,yshift=-1.5em]node2-2.south) {\small{(a) Baseline}};
\node[anchor=north,draw,rounded corners,minimum height=2.5em,minimum width=4em,fill=red!20](node2-1) at ([yshift=3.5em]node2-2.north) {\small{Encoder}};
\draw [->](node1-4.north)--(node1-3);
\draw [->](node1-3.north)--(node1-2);
\draw [->](node1-2.north)--(node1-1);
\draw [->](node2-2.north)--(node2-1);
\draw[->](node2-1.north)--([yshift=1em]node2-1.north)--([xshift=2.5em,yshift=1em]node2-1.north)--([xshift=2.5em,yshift=-0.4em]node2-1.north)--(node1-3.west);
\end{scope}
\begin{scope}[xshift=2.3in,yshift=0.6in]
\node [anchor=center] (node1-1) at (0,0) {\small{$y'$}};
\node[anchor=south,draw,rounded corners,minimum height=1.5em,minimum width=4em,fill=blue!20](node1-2) at ([yshift=-3em]node1-1.south) {\small{softamx}};
\node[anchor=south,draw,rounded corners,minimum height=2.5em,minimum width=4em,fill=red!20](node1-3) at ([yshift=-4.5em]node1-2.south) {\small{Decoder}};
\node[anchor=south,draw,rounded corners,minimum height=1.5em,minimum width=4em,fill=blue!20](node3-1) at ([xshift=6em,yshift=0em]node1-3.south) {\small{softmax}};
\node[anchor=south](node3-2) at ([yshift=3em]node3-1.south) {\small{$z'$}};
\node[anchor=south,draw,rounded corners,minimum height=2em,minimum width=4em,,fill=yellow!20](node1-4) at ([yshift=-4em]node1-3.south) {\small{LM}};
\node[anchor=south](node1-5) at ([yshift=-3em]node1-4.south) {\small{$y$}};
\node [anchor=center] (labelb) at ([yshift=-1.5em]node1-5.south) {\small{(b) Multi task learning}};
\node[anchor=west](node2-2) at ([xshift=-5em]node1-5.west) {\small{$x$}};
\node[anchor=north,draw,rounded corners,minimum height=2.5em,minimum width=4em,fill=red!20](node2-1) at ([yshift=5.3em]node2-2.north) {\small{Encoder}};
\draw [->](node1-5.north)--(node1-4);
\draw [->](node1-4.north)--(node1-3);
\draw [->](node1-3.north)--(node1-2);
\draw [->](node1-2.north)--(node1-1);
\draw [->](node2-2.north)--(node2-1);
\draw [->](node3-1.north)--(node3-2);
\draw[->](node2-1.north)--([yshift=1.8em]node2-1.north)--(node1-3.west);
\draw [->]([yshift=0.8em]node1-4.north)--([xshift=6em,yshift=0.8em]node1-4.north)--(node3-1.south);
\end{scope}
\end{tikzpicture}
\ No newline at end of file
\begin{tikzpicture}
\tikzstyle{op} =[rounded corners=1pt,thick,minimum width=4.0em,minimum height=3.0em,draw,fill=yellow!5!white,font=\scriptsize,drop shadow]
\node [op] {
\begin{tabular}{l}
\rule{0pt}{13pt} 这里 \ \ 来举 \ \ 几个 \ \ 例子 。\\
\rule{0pt}{13pt}\ \ 必需 \ \ \ \ 装扮成 \ \ 男人 。 \\
\rule{0pt}{13pt} 语言 \ \ 本身\ \ 不会 \ \ 发生 \ \ 那些 \ \ 我们 \ \ 跟不上 \ \ \ \ 变化 。 \\
\rule{0pt}{13pt}\ \ \ \ \ \ \ \ 缠 着 \ \ 一条 \ \ 运动衫 。 \\
\rule{0pt}{13pt}\ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ 一段时间 内 \ \ 偿还 \ \ \ \ \ \ \\
\rule{0pt}{13pt} Tom \ \ \ \ 非常感谢 \ \ \ \ \ \ 感觉 \ \ 真的 \ \ \ \ 棒极了 \ \ \\
\rule{0pt}{13pt} 这是 \ \ 非常 \ \ 非常 \ \ 重要 \ \ \\
\rule{0pt}{13pt} ... \\
\rule{0pt}{13pt} So \ \ let \ \ me \ \ give \ \ you \ \ a \ \ few \ \ examples \ \ here . \\
\rule{0pt}{13pt} She \ \ had \ \ to \ \ impersonate \ \ a \ \ man . \\
\rule{0pt}{13pt} The \ \ language \ \ is \ \ not \ \ going \ \ to \ \ change \ \ so \ \ fast \ \ that \ \ we \ \ can ’t \ \ keep \ \ up \ \ . \\
\rule{0pt}{13pt} With \ \ a \ \ sweatshirt \ \ there \ \ tied \ \ around \ \ his \ \ waist \ \ . \\
\rule{0pt}{13pt} You \ \ give \ \ them \ \ more \ \ money \ \ ; \ \ they \ \ repay \ \ you \ \ that \ \ over \ \ a \ \ time \ \ . \\
\rule{0pt}{13pt} Tom \ \ , \ \ thank \ \ you \ \ so \ \ much \ \ . \ \ It ’s \ \ been \ \ really \ \ , \ \ really \ \ great \ \ . \\
\rule{0pt}{13pt} It ’s \ \ very \ \ important \ \ . \\
\rule{0pt}{13pt} ... \\
\end{tabular}
};
\end{tikzpicture}
\ No newline at end of file
\begin{tikzpicture}
\node[rounded corners=3pt,minimum width=1.0em,minimum height=2.0em,font=\scriptsize,fill=green!5,drop shadow,thick,draw](top) at (0,0) {
\begin{tabular}{ll}
\multicolumn{2}{c}{BPE词表:} \\
errrr$<$e$>$ & tain$<$e$>$ \\
moun & est$<$e$>$ \\
high & the$<$e$>$ \\
a$<$e$>$ &
\end{tabular}
};
\node[font=\scriptsize,anchor=west] (node1) at ([xshift=0.5em,yshift=1em]top.east) {原始序列:};
\node[font=\scriptsize,anchor=west] (this) at (node1.east) {"this$<$e$>$" ,};
\node[font=\scriptsize,anchor=west] (highest) at (this.east) {"highest$<$e$>$",};
\node[font=\scriptsize,anchor=west] (mountain) at (highest.east) { "mountain$<$e$>$"};
\node[font=\scriptsize,anchor=west] (node2) at ([yshift=-1.5em]node1.south west) {BPE切分:};
\node[font=\scriptsize,anchor=west] (unk) at (node2.east) {"$<$unk$>$",};
\node[font=\scriptsize,anchor=west] (high) at (unk.east) {"high",};
\node[font=\scriptsize,anchor=west] (est) at (high.east) {"est$<$e$>$",};
\node[font=\scriptsize,anchor=west] (moun) at (est.east) {"moun",};
\node[font=\scriptsize,anchor=west] (tain) at (moun.east) {"tain$<$e$>$"};
%\draw[->,thick](node1.south) -- ([xshift=-1.0em]node2.north);
\draw[->,thick]([xshift=-0.2em]this.south) -- (unk);
\draw[->,thick](highest.south) -- (high);
\draw[->,thick](highest.south) -- (est);
\draw[->,thick](mountain.south) -- (moun);
\draw[->,thick](mountain.south) -- (tain);
\end{tikzpicture}
\ No newline at end of file
% set table width
\newcommand{\PreserveBackslash}[1]{\let\temp=\\#1\let\\=\temp}
\newcolumntype{C}[1]{>{\PreserveBackslash\centering}p{#1}}
% used for heatmap
\newcommand*{\MinNumber}{0}%
\newcommand*{\MaxNumber}{1}%
\newcommand{\ApplyGradient}[1]{%
\pgfmathsetmacro{\PercentColor}{100.0*(#1-\MinNumber)/(\MaxNumber-\MinNumber)}
\hspace{-0.33em}\colorbox{white!\PercentColor!myblack}{}
}
\newcolumntype{Q}{>{\collectcell\ApplyGradient}c<{\endcollectcell}}
\begin{center}
\renewcommand{\arraystretch}{0}
\setlength{\tabcolsep}{5mm}
\setlength{\fboxsep}{2.2mm} % box size
\begin{tabular}{C{.20\textwidth}C{.20\textwidth}C{.20\textwidth}C{.20\textwidth}}
\setlength{\tabcolsep}{0pt}
\subfigure [\footnotesize{Self-Attention}] {
\begin{tabular}{cc}
\setlength{\tabcolsep}{0pt}
~
&
\begin{tikzpicture}
\begin{scope}
\node [inner sep=1.5pt] (w1) at (0,0) {\small{$1$} };
\foreach \x/\y/\z in {2/1/$2$, 3/2/$3$, 4/3/$4$, 5/4/$5$, 6/5/$6$}
{
\node [inner sep=1.5pt,anchor=south west] (w\x) at ([xshift=1.15em]w\y.south west) {\small{\z} };
}
\end{scope}
\end{tikzpicture}
\\
\renewcommand\arraystretch{1}
\begin{tabular}{c}
\setlength{\tabcolsep}{0pt}
\small{$1\ \ $} \\
\small{$2\ $} \\
\small{$3\ $} \\
\small{$4\ $} \\
\small{$5\ $} \\
\small{$6\ $} \\
\end{tabular}
&
%\setlength{\tabcolsep}{0pt}
\begin{tabular}{*{6}{Q}}
0.0000 & 0.5429 & 0.5138 & 0.4650 & 0.5005 & 0.5531 \\
0.5429 & 0.0000 & 0.0606 & 0.0630 & 0.0703 & 0.0332 \\
0.5138 & 0.0606 & 0.0000 & 0.0671 & 0.0472 & 0.0296 \\
0.4650 & 0.0630 & 0.0671 & 0.0000 & 0.0176 & 0.0552 \\
0.5005 & 0.0703 & 0.0472 & 0.0176 & 0.0000 & 0.0389 \\
0.5531 & 0.0332 & 0.0296 & 0.0552 & 0.0389 & 0.0000 \\
\end{tabular}
\end{tabular}
}
&
\subfigure [\footnotesize{Enc-Dec Attention}] {
\setlength{\tabcolsep}{0pt}
\begin{tabular}{cc}
\setlength{\tabcolsep}{0pt}
~
&
\begin{tikzpicture}
\begin{scope}
\node [inner sep=1.5pt] (w1) at (0,0) {\small{$1$} };
\foreach \x/\y/\z in {2/1/$2$, 3/2/$3$, 4/3/$4$, 5/4/$5$, 6/5/$6$}
{
\node [inner sep=1.5pt,anchor=south west] (w\x) at ([xshift=1.15em]w\y.south west) {\small{\z} };
}
\end{scope}
\end{tikzpicture}
\\
\renewcommand\arraystretch{1}
\begin{tabular}{c}
\setlength{\tabcolsep}{0pt}
\small{$1\ \ $} \\
\small{$2\ $} \\
\small{$3\ $} \\
\small{$4\ $} \\
\small{$5\ $} \\
\small{$6\ $} \\
\end{tabular}
&
%\setlength{\tabcolsep}{0pt}
\begin{tabular}{*{6}{Q}}
0.0000 & 0.0175 & 0.2239 & 0.3933 & 0.7986 & 0.3603 \\
0.0175 & 0.0000 & 0.1442 & 0.3029 & 0.7295 & 0.3324 \\
0.2239 & 0.1442 & 0.0000 & 0.0971 & 0.6270 & 0.4163 \\
0.3933 & 0.3029 & 0.0971 & 0.0000 & 0.2385 & 0.2022 \\
0.7986 & 0.7295 & 0.6270 & 0.2385 & 0.0000 & 0.0658 \\
0.3603 & 0.3324 & 0.4163 & 0.2022 & 0.0658 & 0.0000 \\
\end{tabular}
\end{tabular}
}
\end{tabular}
\end{center}
\definecolor{ublue}{rgb}{0.152,0.250,0.545}
\begin{tikzpicture}
\begin{axis}
[
width=5cm, height=3.5cm,
xtick={15,17,19,21,23,25},
ytick={6.0,6.5,7.0},
xlabel={\scriptsize Epoch},
ylabel={},
ylabel style={},
x tick label style={},
y tick label style={},
tick align=inside,
legend style={anchor=north,xshift=1.7cm,yshift=1cm,legend columns =-1},
ymin=5.7,
ymax=7.3,
xmin=14.6,
xmax=25.4,
extra y ticks={6.0,6.5,7.0},
extra y tick labels={3.7,3.8,3.9},
extra y tick style={ticklabel pos=right}]
\addplot [sharp plot,very thick,red!60,mark=diamond*] coordinates{(15,6.75) (16,6.73) (17,6.70) (18,6.67) (19,6.64) (20,6.61) (21,6.59) (22,6.58) (23,6.57) (24,6.58) (25,6.59)};
\addplot [sharp plot,very thick,purple!60,mark=triangle*] coordinates{(15,6.70) (16,6.4) (17,6.20) (18,6.30) (19,6.20) (20,6.10) (21,6.15) (22,6.10) (23,6.15) (24,6.16) (25,6.17)};
\legend{\scriptsize {训练集},\scriptsize{校验集}}
\end{axis}
\begin{axis}
[ xshift=6.6cm,
width=5cm, height=3.5cm,
xtick={15,17,19,21,23,25},
ytick={5.0,5.5,6.0},
xlabel={\scriptsize Epoch},
ylabel={},
ylabel style={},
x tick label style={},
y tick label style={},
tick align=inside,
ymin=4.7,
ymax=6.3,
xmin=14.6,
xmax=25.4,
extra y ticks={5.0,5.5,6.0},
extra y tick labels={3.5,3.6,3.7},
extra y tick style={ticklabel pos=right}]
\addplot [sharp plot,very thick,red!60,mark=diamond*] coordinates{(15,5.7) (16,5.65) (17,5.6) (18,5.55) (19,5.5) (20,5.45) (21,5.4) (22,5.38) (23,5.36) (24,5.34) (25,5.27)};
\addplot [sharp plot,very thick,purple!60,mark=triangle*] coordinates{(15,5.0) (16,4.9) (17,4.9) (18,5.05) (19,4.9) (20,5.0) (21,5.0) (22,5.1) (23,5.0) (24,5.15) (25,5.5)};
\end{axis}
\node [anchor=north,rotate=90] (n1) at (-1.3cm,1cm) {\scriptsize 训练集\ PPL};
\node [anchor=north,rotate=90] (n2) at (5.4cm,1cm) {\scriptsize 训练集\ PPL};
\node [anchor=north,rotate=90] (n3) at (4.2cm,1cm) {\scriptsize 校验集\ PPL};
\node [anchor=north,rotate=90] (n4) at (10.7cm,1cm) {\scriptsize 校验集\ PPL};
\end{tikzpicture}
%---------------------------------------------------------------------
\ No newline at end of file
\begin{center}
\centerline{以英语为例:}
\vspace{0.5em}
\begin{tikzpicture}
\node[rounded corners=3pt,minimum width=10.0em,minimum height=2.0em,draw,thick,fill=green!5,font=\scriptsize,drop shadow,inner sep=0.5em] (left) at (0,0) {
\begin{tabular}{c}
名词\\
\rule{0pt}{12pt}cat,cats 、watch,watches\\
\rule{0pt}{12pt}baby,babies、wife,wives\\
\end{tabular}
};
\node[rounded corners=3pt,minimum width=10.0em,minimum height=2.0em,draw,thick,fill=green!5,font=\scriptsize,drop shadow,inner sep=0.5em] (right) at ([xshift=8em]left.east) {
\begin{tabular}{c}
动词\\
\rule{0pt}{12pt}do,did ,does,doing,done\\
\rule{0pt}{12pt}have,had,has,having\\
\end{tabular}
};
\end{tikzpicture}
\end{center}
\ No newline at end of file
\begin{tikzpicture}
\node[] (do) at (0,0) {{\red do}};
\node[anchor = west] (does) at ([xshift = 1em]do.east) {{\red do}es};
\node[anchor = west] (doing) at ([xshift = 0.7em]does.east) {{\red do}ing};
\node[anchor = north] (do_root) at ([yshift = -1em]does.south) {do};
\node[anchor = west] (new) at ([xshift = 2em]doing.east) {{\red new}};
\node[anchor = west] (newer) at ([xshift = 1em]new.east) {{\red new}er};
\node[anchor = west] (newest) at ([xshift = 0.7em]newer.east) {{\red new}est};
\node[anchor = north] (new_root) at ([yshift = -1em]newer.south) {new};
\draw [->] (do_root.north) .. controls +(north:0.4) and +(south:0.6) ..(do.south);
\draw [->] (do_root.north) -- (does.south);
\draw [->] (do_root.north) .. controls +(north:0.4) and +(south:0.6) ..(doing.south);
\draw [->] (new_root.north) .. controls +(north:0.4) and +(south:0.6) ..(new.south);
\draw [->] (new_root.north) -- (newer.south);
\draw [->] (new_root.north) .. controls +(north:0.4) and +(south:0.6) ..(newest.south);
\end{tikzpicture}
\ No newline at end of file
\begin{tikzpicture}
\begin{scope}
\node [anchor=north west] (pos1) at (0,0) {$\circ$};
\node [anchor= west] (pos2) at ([xshift=3.0em]pos1.east){$\circ$};
\node [anchor= west] (pos1-2) at ([xshift=1.0em,yshift=1.0em]pos1.east){I};
\draw[->,thick](pos1.east)--(pos2.west);
\node [anchor= west] (pos3) at ([xshift=3.0em]pos2.east){$\circ$};
\node [anchor= west] (pos2-2) at ([xshift=0.1em,yshift=1.0em]pos2.east){have};
\draw[->,thick](pos2.east)--(pos3.west);
\end{scope}
\begin{scope}[yshift=-4.0em]
\node [anchor=north west] (pos1) at (0,0) {$\circ$};
\node [anchor= west] (pos2) at ([xshift=3.0em]pos1.east){$\circ$};
\node [anchor= west] (pos1-2) at ([xshift=0.5em,yshift=1.0em]pos1.east){He};
\draw[->,thick](pos1.east)--(pos2.west);
\node [anchor= west] (pos3) at ([xshift=3.0em]pos2.east){$\circ$};
\node [anchor= west] (pos2-2) at ([xshift=0.1em,yshift=1.0em]pos2.east){has};
\draw[->,thick](pos2.east)--(pos3.west);
\node [anchor= west] (pos4) at ([xshift=5.0em]pos3.east){$\circ$};
\node [anchor= west] (pos5) at ([xshift=5.0em]pos4.east){$\circ$};
\node [anchor= west] (pos6) at ([xshift=5.0em]pos5.east){$\circ$};
\node [anchor= west] (word1) at ([xshift=2.0em,yshift=2.7em]pos4.east){I};
\node [anchor= west] (word2) at ([xshift=1.5em,yshift=-1.6em]pos4.east){He};
\node [anchor= west] (word3) at ([xshift=1.4em,yshift=-3em]pos4.east){She};
\node [anchor= west] (word4) at ([xshift=1.1em,yshift=2.8em]pos5.east){Have};
\node [anchor= west] (word5) at ([xshift=1.3em,yshift=-2.8em]pos5.east){Has};
\begin{pgfonlayer}{background}
{
% I
\draw [->,thick] (pos4.north) .. controls +(north:0.8) and +(north:0.8) .. (pos5.north);
% He
\draw [->,thick] (pos4.south) .. controls +(south:0.8) and +(south:0.8) .. (pos5.south);
% She
\draw [->,thick] (pos4.south) .. controls +(south:1.5) and +(south:1.5) .. (pos5.south);
% Have
\draw [->,thick] (pos5.north) .. controls +(north:0.8) and +(north:0.8) .. (pos6.north);
% Has
\draw [->,thick] (pos5.south) .. controls +(south:0.8) and +(south:0.8) .. (pos6.south);
}
\end{pgfonlayer}
\end{scope}
\begin{scope}[yshift=-8.0em]
\node [anchor=north west] (pos1) at (0,0) {$\circ$};
\node [anchor= west] (pos2) at ([xshift=3.0em]pos1.east){$\circ$};
\node [anchor= west] (pos1-2) at ([xshift=0.4em,yshift=1.0em]pos1.east){She};
\draw[->,thick](pos1.east)--(pos2.west);
\node [anchor= west] (pos3) at ([xshift=3.0em]pos2.east){$\circ$};
\node [anchor= west] (pos2-2) at ([xshift=0.1em,yshift=1.0em]pos2.east){has};
\draw[->,thick](pos2.east)--(pos3.west);
\end{scope}
\end{tikzpicture}
%---------------------------------------------------------------------
\begin{center}
\begin{tikzpicture}
\footnotesize{
\begin{axis}[
width=.40\textwidth,
height=.30\textwidth,
legend style={at={(0.60,0.08)}, anchor=south west},
xlabel={\footnotesize{更新次数(10k)}},
ylabel={\footnotesize{学习率 (\scriptsize{$10^{-3}$}}},
ylabel style={yshift=-1em},xlabel style={yshift=0.0em},
yticklabel style={/pgf/number format/precision=2,/pgf/number format/fixed zerofill},
ymin=0,ymax=2.2, ytick={0.5, 1, 1.5, 2},
xmin=0,xmax=5,xtick={1,2,3,4},
legend style={xshift=-8pt,yshift=-4pt, legend plot pos=right,font=\scriptsize,cells={anchor=west}}
]
\addplot[red,line width=1.25pt] coordinates {(0,0) (1.6,2) (1.8,1.888) (2,1.787) (2.5,1.606) (3,1.462) (3.5,1.3549) (4,1.266) (4.5,1.193) (5,1.131)};
\addlegendentry{\scriptsize Base48}
%\addplot[red,line width=1.25pt] coordinates {(0,0) (8000,0.002) (10000,0.00179) (12000,0.00163) (12950,0.001572)};
\addplot[blue,line width=1.25pt] coordinates {(0,0) (0.8,2) (0.9906,1.7983)};
%\addplot[red,line width=1.25pt] coordinates {(0,0) (8000,0.002) (9906,0.0017983)};
\addplot[blue,dashed,line width=1.25pt] coordinates {(0.9906,1.7983) (0.9906,2)};
\addplot[blue,line width=1.25pt] coordinates {(0.9906,2) (1.1906,1.79) (1.3906,1.63) (1.4856,1.572)};
\addplot[blue,dashed,line width=1.25pt] coordinates {(1.4856,1.572) (1.4856,2)};
\addplot[blue,line width=1.25pt] coordinates {(1.4856,2) (1.6856,1.79) (1.8856,1.63) (1.9806,1.572)};
\addplot[blue,dashed,line width=1.25pt] coordinates {(1.9806,1.572) (1.9806,2)};
\addplot[blue,line width=1.25pt] coordinates {(1.9806,2) (2.1806,1.79) (2.3806,1.63) (2.4756,1.572)};
\addplot[blue,dashed,line width=1.25pt] coordinates {(2.4756,1.572) (2.4756,2)};
\addplot[blue,line width=1.25pt] coordinates {(2.4756,2) (2.6756,1.79) (2.8756,1.63) (2.9706,1.572)};
\addplot[blue,dashed,line width=1.25pt] coordinates {(2.9706,1.572) (2.9706,2)};
\addplot[blue,line width=1.25pt] coordinates {(2.9706,2) (3.1706,1.79) (3.3706,1.63) (3.4656,1.572) (3.6706,1.4602) (3.7136,1.44)};
\addplot[blue,dashed,line width=1.25pt] coordinates {(3.7136,1.44) (3.7136,2)};
\addplot[blue,line width=1.25pt] coordinates {(3.7136,2) (3.9136,1.79) (4.1136,1.63) (4.2086,1.572) (4.4136,1.4602) (4.4566,1.44) (4.7000,1.3574) (5.0000,1.2531)};
\addlegendentry{\scriptsize SDT48}
\end{axis}
}
\end{tikzpicture}
\end{center}
\ No newline at end of file
%%%------------------------------------------------------------------------------------------------------------
\begin{center}
\begin{tikzpicture}
\begin{scope}
\node [anchor=east,fill=orange!20,draw=orange,rounded corners=3pt,minimum height=1.4em,minimum width=1.4em] (s11) at (-0.5em, 0) {$\times h$};
\node [rectangle,anchor=west,fill=blue!20,draw=blue,rounded corners=3pt,minimum height=1.4em,minimum width=1.5em] (s12) at ([xshift=1.5em]s11.east) {};
\node [anchor=north,fill=orange!20,draw=orange,rounded corners=3pt,minimum height=1.4em,minimum width=1.4em] (s21) at ([yshift=-1.8em]s11.south) {$\times h$};
\node [anchor=west,fill=orange!20,draw=red,rounded corners=3pt,minimum height=1.4em,minimum width=1.4em,dashed] (s22) at ([xshift=1.5em]s21.east) {$\times h$};
\node [anchor=west,fill=blue!20,draw=blue,rounded corners=3pt,minimum height=1.4em,minimum width=1.5em] (s23) at ([xshift=1.5em]s22.east) {};
\node [anchor=north,fill=orange!20,draw=orange,rounded corners=3pt,minimum height=1.4em,minimum width=1.4em] (s31) at ([yshift=-1.8em]s21.south) {$\times h$};
\node [anchor=west,fill=orange!20,draw=orange,rounded corners=3pt,minimum height=1.4em,minimum width=1.4em] (s32) at ([xshift=1.5em]s31.east) {$\times h$};
\node [anchor=west,fill=orange!20,draw=red,rounded corners=3pt,minimum height=1.4em,minimum width=1.4em,dashed] (s33) at ([xshift=1.5em]s32.east) {$\times h$};
\node [anchor=west,fill=blue!20,draw=blue,rounded corners=3pt,minimum height=1.4em,minimum width=1.5em] (s34) at ([xshift=1.5em]s33.east) {};
\node [anchor=north,fill=orange!20,draw=orange,rounded corners=3pt,minimum height=1.4em,minimum width=1.4em] (s41) at ([yshift=-1.8em]s31.south) {$\times h$};
\node [anchor=west,fill=orange!20,draw=orange,rounded corners=3pt,minimum height=1.4em,minimum width=1.4em] (s42) at ([xshift=1.5em]s41.east) {$\times h$};
\node [anchor=west,fill=orange!20,draw=orange,rounded corners=3pt,minimum height=1.4em,minimum width=1.4em] (s43) at ([xshift=1.5em]s42.east) {$\times h$};
\node [anchor=west,fill=orange!20,draw=red,rounded corners=3pt,minimum height=1.4em,minimum width=1.4em,dashed] (s44) at ([xshift=1.5em]s43.east) {$\times h$};
\node [anchor=west,fill=blue!20,draw=blue,rounded corners=3pt,minimum height=1.4em,minimum width=1.5em] (s45) at ([xshift=1.5em]s44.east) {};
\node [anchor=east] (p1) at ([xshift=-2em]s11.west) {step1};
\node [anchor=east] (p2) at ([xshift=-2em]s21.west) {step2};
\node [anchor=east] (p3) at ([xshift=-2em]s31.west) {step3};
\node [anchor=east] (p4) at ([xshift=-2em]s41.west) {step4};
\node [anchor=south,fill=orange!20,draw=orange,rounded corners=3pt,minimum height=1.4em,minimum width=1.4em] (b1) at ([xshift=-0.2em,yshift=2em]p1.north) {};
\node [anchor=west] (b2) at (b1.east) {:编码器};
\node [anchor=west,fill=blue!20,draw=blue,rounded corners=3pt,minimum height=1.4em,minimum width=1.4em] (b3) at ([xshift=1em]b2.east) {};
\node [anchor=west] (b4) at (b3.east) {:解码器};
\node [anchor=west] (b5) at ([xshift=2.5em]b4.east) {:拷贝};
\draw[-latex,thick,red,dashed] ([xshift=0.2em]b4.east) -- (b5.west);
\draw [-latex, line width=0.8pt] ([xshift=-1.5em]s11.west) -- (s11.west);
\draw [-latex, line width=0.8pt] (s11.east) -- (s12.west);
\draw [-latex, line width=0.8pt] (s12.east) -- ([xshift=1.5em]s12.east);
\draw [-latex, line width=0.8pt] ([xshift=-1.5em]s21.west) -- (s21.west);
\draw [-latex, line width=0.8pt] (s21.east) -- (s22.west);
\draw [-latex, line width=0.8pt] (s22.east) -- (s23.west);
\draw [-latex, line width=0.8pt] (s23.east) -- ([xshift=1.5em]s23.east);
\draw [-latex, line width=0.8pt] ([xshift=-1.5em]s31.west) -- (s31.west);
\draw [-latex, line width=0.8pt] (s31.east) -- (s32.west);
\draw [-latex, line width=0.8pt] (s32.east) -- (s33.west);
\draw [-latex, line width=0.8pt] (s33.east) -- (s34.west);
\draw [-latex, line width=0.8pt] (s34.east) -- ([xshift=1.5em]s34.east);
\draw [-latex, line width=0.8pt] ([xshift=-1.5em]s41.west) -- (s41.west);
\draw [-latex, line width=0.8pt] (s41.east) -- (s42.west);
\draw [-latex, line width=0.8pt] (s42.east) -- (s43.west);
\draw [-latex, line width=0.8pt] (s43.east) -- (s44.west);
\draw [-latex, line width=0.8pt] (s44.east) -- (s45.west);
\draw [-latex, line width=0.8pt] (s45.east) -- ([xshift=1.5em]s45.east);
\draw[-latex,thick,red,dashed] (s11.south)..controls +(south:1em) and +(north:1.2em)..(s22.north);
\draw[-latex,thick,red,dashed] (s22.south)..controls +(south:1em) and +(north:1.2em)..(s33.north);
\draw[-latex,thick,red,dashed] (s33.south)..controls +(south:1em) and +(north:1.2em)..(s44.north);
\end{scope}
\end{tikzpicture}
\end{center}
%%%------------------------------------------------------------------------------------------------------------
%%% 短语系统的问题 - 一个实例
\begin{center}
\begin{tikzpicture}
\begin{scope}
\node [anchor=east,fill=orange!20,draw,rounded corners=3pt,minimum height=1.4em,minimum width=1.4em] (s11) at (-0.5em, 0) {};
\node [rectangle,anchor=west,fill=orange!20,draw,rounded corners=3pt,minimum height=1.4em,minimum width=1.4em] (s12) at ([xshift=2em]s11.east) {};
\node [anchor=west,fill=orange!20,draw,rounded corners=3pt,minimum height=1.4em,minimum width=1.4em] (s13) at ([xshift=2em]s12.east) {};
\node [anchor=west,fill=orange!20,draw,rounded corners=3pt,minimum height=1.4em,minimum width=1.4em] (s14) at ([xshift=2em]s13.east) {};
\node [anchor=north,fill=orange!20,draw,rounded corners=3pt,minimum height=1.4em,minimum width=1.4em] (s21) at ([yshift=-2.5em]s11.south) {};
\node [anchor=west,fill=orange!20,draw,rounded corners=3pt,minimum height=1.4em,minimum width=1.4em] (s22) at ([xshift=2em]s21.east) {};
\node [anchor=west,fill=orange!20,draw,rounded corners=3pt,minimum height=1.4em,minimum width=1.4em] (s23) at ([xshift=2em]s22.east) {};
\node [anchor=west,fill=orange!20,draw,rounded corners=3pt,minimum height=1.4em,minimum width=1.4em] (s24) at ([xshift=2em]s23.east) {};
\node [anchor=north,fill=orange!20,draw,rounded corners=3pt,minimum height=1.4em,minimum width=1.4em] (s31) at ([yshift=-2.5em]s21.south) {};
\node [anchor=west,fill=orange!20,draw,rounded corners=3pt,minimum height=1.4em,minimum width=1.4em] (s32) at ([xshift=2em]s31.east) {};
\node [anchor=west,fill=orange!20,draw,rounded corners=3pt,minimum height=1.4em,minimum width=1.4em] (s33) at ([xshift=2em]s32.east) {};
\node [anchor=west,fill=orange!20,draw,rounded corners=3pt,minimum height=1.4em,minimum width=1.4em] (s34) at ([xshift=2em]s33.east) {};
\node [anchor=north,fill=orange!20,draw,rounded corners=3pt,minimum height=1.4em,minimum width=1.4em] (s41) at ([yshift=-2.5em]s31.south) {};
\node [anchor=west,fill=orange!20,draw,rounded corners=3pt,minimum height=1.4em,minimum width=1.4em] (s42) at ([xshift=2em]s41.east) {};
\node [anchor=west,fill=orange!20,draw,rounded corners=3pt,minimum height=1.4em,minimum width=1.4em] (s43) at ([xshift=2em]s42.east) {};
\node [anchor=west,fill=orange!20,draw,rounded corners=3pt,minimum height=1.4em,minimum width=1.4em] (s44) at ([xshift=2em]s43.east) {};
\node [anchor=east] (p1) at ([xshift=-3.5em]s11.west) {$p=\infty$};
\node [anchor=east] (p2) at ([xshift=-4em]s21.west) {$p=1$};
\node [anchor=east] (p3) at ([xshift=-4em]s31.west) {$p=2$};
\node [anchor=east] (p4) at ([xshift=-4em]s41.west) {$p=4$};
\node [anchor=north] (p5) at ([yshift=-1em]p3.south) {$\cdots$};
\node [anchor=south,fill=orange!20,draw,rounded corners=3pt,minimum height=1.4em,minimum width=1.4em] (b1) at ([xshift=-0.6em,yshift=2em]p1.north) {};
\node [anchor=west] (b2) at (b1.east) {:Layer};
\node [anchor=west,draw=red,rounded corners=3pt,minimum height=1.4em,minimum width=1.4em,dashed,line width=0.8pt] (b3) at ([xshift=1em]b2.east) {};
\node [anchor=west] (b4) at (b3.east) {:Block};
\draw [-latex, line width=0.8pt] ([xshift=-2em]s11.west) -- (s11.west);
\draw [-latex, line width=0.8pt] (s11.east) -- (s12.west);
\draw [-latex, line width=0.8pt] (s12.east) -- (s13.west);
\draw [-latex, line width=0.8pt] (s13.east) -- (s14.west);
\draw [-latex, line width=0.8pt] (s14.east) -- ([xshift=2em]s14.east);
\draw [-latex, line width=0.8pt] ([xshift=-2em]s21.west) -- (s21.west);
\draw [-latex, line width=0.8pt] (s21.east) -- (s22.west);
\draw [-latex, line width=0.8pt] (s22.east) -- (s23.west);
\draw [-latex, line width=0.8pt] (s23.east) -- (s24.west);
\draw [-latex, line width=0.8pt] (s24.east) -- ([xshift=2em]s24.east);
\draw [-latex, line width=0.8pt] ([xshift=-2em]s31.west) -- (s31.west);
\draw [-latex, line width=0.8pt] (s31.east) -- (s32.west);
\draw [-latex, line width=0.8pt] (s32.east) -- (s33.west);
\draw [-latex, line width=0.8pt] (s33.east) -- (s34.west);
\draw [-latex, line width=0.8pt] (s34.east) -- ([xshift=2em]s34.east);
\draw [-latex, line width=0.8pt] ([xshift=-2em]s41.west) -- (s41.west);
\draw [-latex, line width=0.8pt] (s41.east) -- (s42.west);
\draw [-latex, line width=0.8pt] (s42.east) -- (s43.west);
\draw [-latex, line width=0.8pt] (s43.east) -- (s44.west);
\draw [-latex, line width=0.8pt] (s44.east) -- ([xshift=2em]s44.east);
\node [draw=red,rounded corners=3pt,minimum height=1.7em,minimum width=1.7em,dashed,line width=0.8pt] (x21) at (s21) {};
\node [draw=red,rounded corners=3pt,minimum height=1.7em,minimum width=1.7em,dashed,line width=0.8pt] (x22) at (s22) {};
\node [draw=red,rounded corners=3pt,minimum height=1.7em,minimum width=1.7em,dashed,line width=0.8pt] (x23) at (s23) {};
\node [draw=red,rounded corners=3pt,minimum height=1.7em,minimum width=1.7em,dashed,line width=0.8pt] (x24) at (s24) {};
\node [draw=red,rounded corners=3pt,minimum height=1.7em,minimum width=5.2em,dashed,line width=0.8pt] (x31) at ([xshift=1.75em]s31) {};
\node [draw=red,rounded corners=3pt,minimum height=1.7em,minimum width=5.2em,dashed,line width=0.8pt] (x32) at ([xshift=1.75em]s33) {};
\node [draw=red,rounded corners=3pt,minimum height=1.7em,minimum width=12.2em,dashed,line width=0.8pt] (x41) at ([xshift=1.75em]s42) {};
{
\draw [-latex, line width=0.8pt] ([xshift=-1em]s21.west).. controls +(58:0.6) and +(122:0.6) .. ([xshift=1em]s21.east);
\draw [-latex, line width=0.8pt] ([xshift=-1em]s22.west).. controls +(58:0.6) and +(122:0.6) .. ([xshift=1em]s22.east);
\draw [-latex, line width=0.8pt] ([xshift=-1em]s23.west).. controls +(58:0.6) and +(122:0.6) .. ([xshift=1em]s23.east);
\draw [-latex, line width=0.8pt] ([xshift=-1em]s24.west).. controls +(58:0.6) and +(122:0.6) .. ([xshift=1em]s24.east);
}
{
\draw [-latex, line width=0.8pt] ([xshift=-1em]s21.west).. controls +(65:0.8) and +(115:0.8) .. ([xshift=1em]s22.east);
\draw [-latex, line width=0.8pt] ([xshift=-1em]s22.west).. controls +(65:0.8) and +(115:0.8) .. ([xshift=1em]s23.east);
\draw [-latex, line width=0.8pt] ([xshift=-1em]s23.west).. controls +(65:0.8) and +(115:0.8) .. ([xshift=1em]s24.east);
\draw [-latex, line width=0.8pt] ([xshift=-1em]s31.west).. controls +(65:0.8) and +(115:0.8) .. ([xshift=1em]s32.east);
\draw [-latex, line width=0.8pt] ([xshift=-1em]s33.west).. controls +(65:0.8) and +(115:0.8) .. ([xshift=1em]s34.east);
}
{
\draw [-latex, line width=0.8pt] ([xshift=-1em]s21.west).. controls +(70:1.0) and +(110:1.0) .. ([xshift=1em]s23.east);
\draw [-latex, line width=0.8pt] ([xshift=-1em]s22.west).. controls +(70:1.0) and +(110:1.0) .. ([xshift=1em]s24.east);
}
{
\draw [-latex, line width=0.8pt] ([xshift=-1em]s21.west).. controls +(75:1.2) and +(105:1.2) .. ([xshift=1em]s24.east);
\draw [-latex, line width=0.8pt] ([xshift=-1em]s31.west).. controls +(75:1.2) and +(105:1.2) .. ([xshift=1em]s34.east);
\draw [-latex, line width=0.8pt] ([xshift=-1em]s41.west).. controls +(75:1.2) and +(105:1.2) .. ([xshift=1em]s44.east);
}
\end{scope}
\end{tikzpicture}
\end{center}
...@@ -4691,3 +4691,1191 @@ pages ={157-166}, ...@@ -4691,3 +4691,1191 @@ pages ={157-166},
biburl = {https://dblp.org/rec/conf/emnlp/DuanLXZ09.bib}, biburl = {https://dblp.org/rec/conf/emnlp/DuanLXZ09.bib},
bibsource = {dblp computer science bibliography, https://dblp.org} bibsource = {dblp computer science bibliography, https://dblp.org}
} }
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
%%%%% chapter 7------------------------------------------------------
@article{姚树杰2011基于句对质量和覆盖度的统计机器翻译训练语料选取,
title={基于句对质量和覆盖度的统计机器翻译训练语料选取},
author={姚树杰 and 肖桐 and 朱靖波},
journal={中文信息学报},
volume={25},
number={2},
pages={72-78},
year={2011},
}
%%%%%%%%%%%%%%%
@misc{provilkov2019bpedropout,
title={BPE-Dropout: Simple and Effective Subword Regularization},
author={Ivan Provilkov and Dmitrii Emelianenko and Elena Voita},
year={2019},
eprint={1910.13267},
archivePrefix={arXiv},
primaryClass={cs.CL}
}
%%%%%%%%%%%%%%%%%%%
@article{DBLP:journals/corr/SennrichHB15,
author = {Rico Sennrich and
Barry Haddow and
Alexandra Birch},
title = {Neural Machine Translation of Rare Words with Subword Units},
journal = {CoRR},
volume = {abs/1508.07909},
year = {2015},
url = {http://arxiv.org/abs/1508.07909},
archivePrefix = {arXiv},
eprint = {1508.07909},
timestamp = {Mon, 13 Aug 2018 16:47:17 +0200},
biburl = {https://dblp.org/rec/journals/corr/SennrichHB15.bib},
bibsource = {dblp computer science bibliography, https://dblp.org}
}
@article{DBLP:journals/corr/abs-1207-0580,
author = {Geoffrey E. Hinton and
Nitish Srivastava and
Alex Krizhevsky and
Ilya Sutskever and
Ruslan Salakhutdinov},
title = {Improving neural networks by preventing co-adaptation of feature detectors},
journal = {CoRR},
volume = {abs/1207.0580},
year = {2012},
url = {http://arxiv.org/abs/1207.0580},
archivePrefix = {arXiv},
eprint = {1207.0580},
timestamp = {Mon, 13 Aug 2018 16:46:10 +0200},
biburl = {https://dblp.org/rec/journals/corr/abs-1207-0580.bib},
bibsource = {dblp computer science bibliography, https://dblp.org}
}
@article{Hornic1989Multilayer,
title={Multilayer feedforward networks are universal approximators},
author={Hornic, K},
journal={Neural Networks},
volume={2},
number={5},
pages={359-366},
year={1989},
}
@article{DBLP:journals/corr/abs-1809-10853,
author = {Alexei Baevski and
Michael Auli},
title = {Adaptive Input Representations for Neural Language Modeling},
journal = {CoRR},
volume = {abs/1809.10853},
year = {2018},
url = {http://arxiv.org/abs/1809.10853},
archivePrefix = {arXiv},
eprint = {1809.10853},
timestamp = {Fri, 05 Oct 2018 11:34:52 +0200},
biburl = {https://dblp.org/rec/journals/corr/abs-1809-10853.bib},
bibsource = {dblp computer science bibliography, https://dblp.org}
}
@inproceedings{Stahlberg2019OnNS,
title={On NMT Search Errors and Model Errors: Cat Got Your Tongue?},
author={Felix Stahlberg and Bill Byrne},
booktitle={EMNLP/IJCNLP},
year={2019}
}
@article{DBLP:journals/corr/abs-1810-08398,
author = {Mingbo Ma and
Liang Huang and
Hao Xiong and
Kaibo Liu and
Chuanqiang Zhang and
Zhongjun He and
Hairong Liu and
Xing Li and
Haifeng Wang},
title = {{STACL:} Simultaneous Translation with Integrated Anticipation and
Controllable Latency},
journal = {CoRR},
volume = {abs/1810.08398},
year = {2018},
url = {http://arxiv.org/abs/1810.08398},
archivePrefix = {arXiv},
eprint = {1810.08398},
timestamp = {Thu, 01 Nov 2018 11:22:30 +0100},
biburl = {https://dblp.org/rec/journals/corr/abs-1810-08398.bib},
bibsource = {dblp computer science bibliography, https://dblp.org}
}
@article{DBLP:journals/corr/StahlbergHSB17,
author = {Felix Stahlberg and
Eva Hasler and
Danielle Saunders and
Bill Byrne},
title = {{SGNMT} - {A} Flexible {NMT} Decoding Platform for Quick Prototyping
of New Models and Search Strategies},
journal = {CoRR},
volume = {abs/1707.06885},
year = {2017},
url = {http://arxiv.org/abs/1707.06885},
archivePrefix = {arXiv},
eprint = {1707.06885},
timestamp = {Mon, 13 Aug 2018 16:48:37 +0200},
biburl = {https://dblp.org/rec/journals/corr/StahlbergHSB17.bib},
bibsource = {dblp computer science bibliography, https://dblp.org}
}
@article{DBLP:journals/corr/SennrichHB16,
author = {Rico Sennrich and
Barry Haddow and
Alexandra Birch},
title = {Edinburgh Neural Machine Translation Systems for {WMT} 16},
journal = {CoRR},
volume = {abs/1606.02891},
year = {2016},
url = {http://arxiv.org/abs/1606.02891},
archivePrefix = {arXiv},
eprint = {1606.02891},
timestamp = {Mon, 13 Aug 2018 16:46:23 +0200},
biburl = {https://dblp.org/rec/journals/corr/SennrichHB16.bib},
bibsource = {dblp computer science bibliography, https://dblp.org}
}
@inproceedings{DBLP:conf/wmt/LiLXLLLWZXWFCLL19,
author = {Bei Li and
Yinqiao Li and
Chen Xu and
Ye Lin and
Jiqiang Liu and
Hui Liu and
Ziyang Wang and
Yuhao Zhang and
Nuo Xu and
Zeyang Wang and
Kai Feng and
Hexuan Chen and
Tengbo Liu and
Yanyang Li and
Qiang Wang and
Tong Xiao and
Jingbo Zhu},
editor = {Ondrej Bojar and
Rajen Chatterjee and
Christian Federmann and
Mark Fishel and
Yvette Graham and
Barry Haddow and
Matthias Huck and
Antonio Jimeno{-}Yepes and
Philipp Koehn and
Andr{\'{e}} Martins and
Christof Monz and
Matteo Negri and
Aur{\'{e}}lie N{\'{e}}v{\'{e}}ol and
Mariana L. Neves and
Matt Post and
Marco Turchi and
Karin Verspoor},
title = {The NiuTrans Machine Translation Systems for {WMT19}},
booktitle = {Proceedings of the Fourth Conference on Machine Translation, {WMT}
2019, Florence, Italy, August 1-2, 2019 - Volume 2: Shared Task Papers,
Day 1},
pages = {257--266},
publisher = {Association for Computational Linguistics},
year = {2019},
url = {https://doi.org/10.18653/v1/w19-5325},
doi = {10.18653/v1/w19-5325},
timestamp = {Tue, 28 Jan 2020 10:30:56 +0100},
biburl = {https://dblp.org/rec/conf/wmt/LiLXLLLWZXWFCLL19.bib},
bibsource = {dblp computer science bibliography, https://dblp.org}
}
@inproceedings{DBLP:conf/aaai/DabreF19,
author = {Raj Dabre and
Atsushi Fujita},
title = {Recurrent Stacking of Layers for Compact Neural Machine Translation
Models},
booktitle = {The Thirty-Third {AAAI} Conference on Artificial Intelligence, {AAAI}
2019, The Thirty-First Innovative Applications of Artificial Intelligence
Conference, {IAAI} 2019, The Ninth {AAAI} Symposium on Educational
Advances in Artificial Intelligence, {EAAI} 2019, Honolulu, Hawaii,
USA, January 27 - February 1, 2019},
pages = {6292--6299},
publisher = {{AAAI} Press},
year = {2019},
url = {https://doi.org/10.1609/aaai.v33i01.33016292},
doi = {10.1609/aaai.v33i01.33016292},
timestamp = {Wed, 25 Sep 2019 11:05:09 +0200},
biburl = {https://dblp.org/rec/conf/aaai/DabreF19.bib},
bibsource = {dblp computer science bibliography, https://dblp.org}
}
@article{DBLP:journals/corr/abs-1712-05877,
author = {Benoit Jacob and
Skirmantas Kligys and
Bo Chen and
Menglong Zhu and
Matthew Tang and
Andrew G. Howard and
Hartwig Adam and
Dmitry Kalenichenko},
title = {Quantization and Training of Neural Networks for Efficient Integer-Arithmetic-Only
Inference},
journal = {CoRR},
volume = {abs/1712.05877},
year = {2017},
url = {http://arxiv.org/abs/1712.05877},
archivePrefix = {arXiv},
eprint = {1712.05877},
timestamp = {Mon, 13 Aug 2018 16:48:27 +0200},
biburl = {https://dblp.org/rec/journals/corr/abs-1712-05877.bib},
bibsource = {dblp computer science bibliography, https://dblp.org}
}
@article{DBLP:journals/corr/abs-1910-10485,
author = {Gabriele Prato and
Ella Charlaix and
Mehdi Rezagholizadeh},
title = {Fully Quantized Transformer for Improved Translation},
journal = {CoRR},
volume = {abs/1910.10485},
year = {2019},
url = {http://arxiv.org/abs/1910.10485},
archivePrefix = {arXiv},
eprint = {1910.10485},
timestamp = {Fri, 25 Oct 2019 14:59:26 +0200},
biburl = {https://dblp.org/rec/journals/corr/abs-1910-10485.bib},
bibsource = {dblp computer science bibliography, https://dblp.org}
}
@article{DBLP:journals/corr/abs-1801-05122,
author = {Xiangwen Zhang and
Jinsong Su and
Yue Qin and
Yang Liu and
Rongrong Ji and
Hongji Wang},
title = {Asynchronous Bidirectional Decoding for Neural Machine Translation},
journal = {CoRR},
volume = {abs/1801.05122},
year = {2018},
url = {http://arxiv.org/abs/1801.05122},
archivePrefix = {arXiv},
eprint = {1801.05122},
timestamp = {Mon, 15 Jul 2019 14:17:41 +0200},
biburl = {https://dblp.org/rec/journals/corr/abs-1801-05122.bib},
bibsource = {dblp computer science bibliography, https://dblp.org}
}
@article{DBLP:journals/corr/abs-1809-00069,
author = {Liang Huang and
Kai Zhao and
Mingbo Ma},
title = {When to Finish? Optimal Beam Search for Neural Text Generation (modulo
beam size)},
journal = {CoRR},
volume = {abs/1809.00069},
year = {2018},
url = {http://arxiv.org/abs/1809.00069},
archivePrefix = {arXiv},
eprint = {1809.00069},
timestamp = {Fri, 05 Oct 2018 11:34:52 +0200},
biburl = {https://dblp.org/rec/journals/corr/abs-1809-00069.bib},
bibsource = {dblp computer science bibliography, https://dblp.org}
}
@article{DBLP:journals/jcss/FreundS97,
author = {Yoav Freund and
Robert E. Schapire},
title = {A Decision-Theoretic Generalization of On-Line Learning and an Application
to Boosting},
journal = {J. Comput. Syst. Sci.},
volume = {55},
number = {1},
pages = {119--139},
year = {1997},
url = {https://doi.org/10.1006/jcss.1997.1504},
doi = {10.1006/jcss.1997.1504},
timestamp = {Wed, 14 Nov 2018 10:33:59 +0100},
biburl = {https://dblp.org/rec/journals/jcss/FreundS97.bib},
bibsource = {dblp computer science bibliography, https://dblp.org}
}
@inproceedings{DBLP:conf/acl/XiaoZZW10,
author = {Tong Xiao and
Jingbo Zhu and
Muhua Zhu and
Huizhen Wang},
editor = {Jan Hajic and
Sandra Carberry and
Stephen Clark},
title = {Boosting-Based System Combination for Machine Translation},
booktitle = {{ACL} 2010, Proceedings of the 48th Annual Meeting of the Association
for Computational Linguistics, July 11-16, 2010, Uppsala, Sweden},
pages = {739--748},
publisher = {The Association for Computer Linguistics},
year = {2010},
url = {https://www.aclweb.org/anthology/P10-1076/},
timestamp = {Fri, 13 Sep 2019 13:00:43 +0200},
biburl = {https://dblp.org/rec/conf/acl/XiaoZZW10.bib},
bibsource = {dblp computer science bibliography, https://dblp.org}
}
@inproceedings{DBLP:conf/icassp/SimBGSW07,
author = {Khe Chai Sim and
William J. Byrne and
Mark J. F. Gales and
Hichem Sahbi and
Philip C. Woodland},
title = {Consensus Network Decoding for Statistical Machine Translation System
Combination},
booktitle = {Proceedings of the {IEEE} International Conference on Acoustics, Speech,
and Signal Processing, {ICASSP} 2007, Honolulu, Hawaii, USA, April
15-20, 2007},
pages = {105--108},
publisher = {{IEEE}},
year = {2007},
url = {https://doi.org/10.1109/ICASSP.2007.367174},
doi = {10.1109/ICASSP.2007.367174},
timestamp = {Wed, 16 Oct 2019 14:14:52 +0200},
biburl = {https://dblp.org/rec/conf/icassp/SimBGSW07.bib},
bibsource = {dblp computer science bibliography, https://dblp.org}
}
@inproceedings{DBLP:conf/acl/RostiMS07,
author = {Antti{-}Veikko I. Rosti and
Spyridon Matsoukas and
Richard M. Schwartz},
editor = {John A. Carroll and
Antal van den Bosch and
Annie Zaenen},
title = {Improved Word-Level System Combination for Machine Translation},
booktitle = {{ACL} 2007, Proceedings of the 45th Annual Meeting of the Association
for Computational Linguistics, June 23-30, 2007, Prague, Czech Republic},
publisher = {The Association for Computational Linguistics},
year = {2007},
url = {https://www.aclweb.org/anthology/P07-1040/},
timestamp = {Mon, 16 Sep 2019 13:46:41 +0200},
biburl = {https://dblp.org/rec/conf/acl/RostiMS07.bib},
bibsource = {dblp computer science bibliography, https://dblp.org}
}
@inproceedings{DBLP:conf/wmt/RostiZMS08,
author = {Antti{-}Veikko I. Rosti and
Bing Zhang and
Spyros Matsoukas and
Richard M. Schwartz},
editor = {Chris Callison{-}Burch and
Philipp Koehn and
Christof Monz and
Josh Schroeder and
Cameron S. Fordyce},
title = {Incremental Hypothesis Alignment for Building Confusion Networks with
Application to Machine Translation System Combination},
booktitle = {Proceedings of the Third Workshop on Statistical Machine Translation,
WMT@ACL 2008, Columbus, Ohio, USA, June 19, 2008},
pages = {183--186},
publisher = {Association for Computational Linguistics},
year = {2008},
url = {https://www.aclweb.org/anthology/W08-0329/},
timestamp = {Fri, 13 Sep 2019 13:08:46 +0200},
biburl = {https://dblp.org/rec/conf/wmt/RostiZMS08.bib},
bibsource = {dblp computer science bibliography, https://dblp.org}
}
@article{DBLP:journals/corr/LiMJ16,
author = {Jiwei Li and
Will Monroe and
Dan Jurafsky},
title = {A Simple, Fast Diverse Decoding Algorithm for Neural Generation},
journal = {CoRR},
volume = {abs/1611.08562},
year = {2016},
url = {http://arxiv.org/abs/1611.08562},
archivePrefix = {arXiv},
eprint = {1611.08562},
timestamp = {Mon, 13 Aug 2018 16:48:46 +0200},
biburl = {https://dblp.org/rec/journals/corr/LiMJ16.bib},
bibsource = {dblp computer science bibliography, https://dblp.org}
}
@inproceedings{DBLP:conf/emnlp/TrombleKOM08,
author = {Roy Tromble and
Shankar Kumar and
Franz Josef Och and
Wolfgang Macherey},
title = {Lattice Minimum Bayes-Risk Decoding for Statistical Machine Translation},
booktitle = {2008 Conference on Empirical Methods in Natural Language Processing,
{EMNLP} 2008, Proceedings of the Conference, 25-27 October 2008, Honolulu,
Hawaii, USA, {A} meeting of SIGDAT, a Special Interest Group of the
{ACL}},
pages = {620--629},
publisher = {{ACL}},
year = {2008},
url = {https://www.aclweb.org/anthology/D08-1065/},
timestamp = {Fri, 13 Sep 2019 13:08:45 +0200},
biburl = {https://dblp.org/rec/conf/emnlp/TrombleKOM08.bib},
bibsource = {dblp computer science bibliography, https://dblp.org}
}
@inproceedings{DBLP:conf/aaai/SuTXJSL17,
author = {Jinsong Su and
Zhixing Tan and
Deyi Xiong and
Rongrong Ji and
Xiaodong Shi and
Yang Liu},
editor = {Satinder P. Singh and
Shaul Markovitch},
title = {Lattice-Based Recurrent Neural Network Encoders for Neural Machine
Translation},
booktitle = {Proceedings of the Thirty-First {AAAI} Conference on Artificial Intelligence,
February 4-9, 2017, San Francisco, California, {USA}},
pages = {3302--3308},
publisher = {{AAAI} Press},
year = {2017},
url = {http://aaai.org/ocs/index.php/AAAI/AAAI17/paper/view/14320},
timestamp = {Sun, 31 Mar 2019 12:09:37 +0200},
biburl = {https://dblp.org/rec/conf/aaai/SuTXJSL17.bib},
bibsource = {dblp computer science bibliography, https://dblp.org}
}
@inproceedings{DBLP:conf/acl/BirdL04,
author = {Steven Bird and
Edward Loper},
title = {{NLTK:} The Natural Language Toolkit},
booktitle = {Proceedings of the 42nd Annual Meeting of the Association for Computational
Linguistics, Barcelona, Spain, July 21-26, 2004 - Poster and Demonstration},
publisher = {{ACL}},
year = {2004},
url = {https://www.aclweb.org/anthology/P04-3031/},
timestamp = {Wed, 18 Sep 2019 12:15:54 +0200},
biburl = {https://dblp.org/rec/conf/acl/BirdL04.bib},
bibsource = {dblp computer science bibliography, https://dblp.org}
}
@inproceedings{wang-etal-2018-dynamic,
title = "Dynamic Sentence Sampling for Efficient Training of Neural Machine Translation",
author = "Wang, Rui and
Utiyama, Masao and
Sumita, Eiichiro",
booktitle = "Proceedings of the 56th Annual Meeting of the Association for Computational Linguistics (Volume 2: Short Papers)",
month = jul,
year = "2018",
address = "Melbourne, Australia",
publisher = "Association for Computational Linguistics",
url = "https://www.aclweb.org/anthology/P18-2048",
doi = "10.18653/v1/P18-2048",
pages = "298--304",
abstract = "Traditional Neural machine translation (NMT) involves a fixed training procedure where each sentence is sampled once during each epoch. In reality, some sentences are well-learned during the initial few epochs; however, using this approach, the well-learned sentences would continue to be trained along with those sentences that were not well learned for 10-30 epochs, which results in a wastage of time. Here, we propose an efficient method to dynamically sample the sentences in order to accelerate the NMT training. In this approach, a weight is assigned to each sentence based on the measured difference between the training costs of two iterations. Further, in each epoch, a certain percentage of sentences are dynamically sampled according to their weights. Empirical results based on the NIST Chinese-to-English and the WMT English-to-German tasks show that the proposed method can significantly accelerate the NMT training and improve the NMT performance.",
}
@inproceedings{garciamartinez:hal-01433161,
TITLE = {{Factored Neural Machine Translation Architectures}},
AUTHOR = {Garcia-Martinez, Mercedes and Barrault, Lo{\"i}c and Bougares, Fethi},
URL = {https://hal.archives-ouvertes.fr/hal-01433161},
BOOKTITLE = {{International Workshop on Spoken Language Translation (IWSLT'16)}},
ADDRESS = {Seattle, United States},
YEAR = {2016},
PDF = {https://hal.archives-ouvertes.fr/hal-01433161/file/FNMTiwslt2016.pdf},
HAL_ID = {hal-01433161},
HAL_VERSION = {v1},
}
@article{DBLP:journals/corr/JeanCMB14,
author = {S{\'{e}}bastien Jean and
Kyunghyun Cho and
Roland Memisevic and
Yoshua Bengio},
title = {On Using Very Large Target Vocabulary for Neural Machine Translation},
journal = {CoRR},
volume = {abs/1412.2007},
year = {2014},
url = {http://arxiv.org/abs/1412.2007},
archivePrefix = {arXiv},
eprint = {1412.2007},
timestamp = {Mon, 13 Aug 2018 16:46:10 +0200},
biburl = {https://dblp.org/rec/journals/corr/JeanCMB14.bib},
bibsource = {dblp computer science bibliography, https://dblp.org}
}
@article{DBLP:journals/corr/LuongM16,
author = {Minh{-}Thang Luong and
Christopher D. Manning},
title = {Achieving Open Vocabulary Neural Machine Translation with Hybrid Word-Character
Models},
journal = {CoRR},
volume = {abs/1604.00788},
year = {2016},
url = {http://arxiv.org/abs/1604.00788},
archivePrefix = {arXiv},
eprint = {1604.00788},
timestamp = {Mon, 13 Aug 2018 16:47:26 +0200},
biburl = {https://dblp.org/rec/journals/corr/LuongM16.bib},
bibsource = {dblp computer science bibliography, https://dblp.org}
}
@article{philipAlgorithmfordataCompression,
title={A New Algorithm for Data Compression},
author={Philip Gage},
year = {1994}
}
@article{DBLP:journals/corr/abs-1804-10959,
author = {Taku Kudo},
title = {Subword Regularization: Improving Neural Network Translation Models
with Multiple Subword Candidates},
journal = {CoRR},
volume = {abs/1804.10959},
year = {2018},
url = {http://arxiv.org/abs/1804.10959},
archivePrefix = {arXiv},
eprint = {1804.10959},
timestamp = {Mon, 13 Aug 2018 16:48:57 +0200},
biburl = {https://dblp.org/rec/journals/corr/abs-1804-10959.bib},
bibsource = {dblp computer science bibliography, https://dblp.org}
}
@article{DBLP:journals/corr/ZagoruykoK16,
author = {Sergey Zagoruyko and
Nikos Komodakis},
title = {Wide Residual Networks},
journal = {CoRR},
volume = {abs/1605.07146},
year = {2016},
url = {http://arxiv.org/abs/1605.07146},
archivePrefix = {arXiv},
eprint = {1605.07146},
timestamp = {Mon, 13 Aug 2018 16:46:42 +0200},
biburl = {https://dblp.org/rec/journals/corr/ZagoruykoK16.bib},
bibsource = {dblp computer science bibliography, https://dblp.org}
}
@article{DBLP:journals/iet-bmt/Sepas-Moghaddam20,
author = {Alireza Sepas{-}Moghaddam and
Fernando Pereira and
Paulo Lobato Correia},
title = {Face recognition: a novel multi-level taxonomy based survey},
journal = {{IET} Biom.},
volume = {9},
number = {2},
pages = {58--67},
year = {2020},
url = {https://doi.org/10.1049/iet-bmt.2019.0001},
doi = {10.1049/iet-bmt.2019.0001},
timestamp = {Wed, 01 Apr 2020 08:42:20 +0200},
biburl = {https://dblp.org/rec/journals/iet-bmt/Sepas-Moghaddam20.bib},
bibsource = {dblp computer science bibliography, https://dblp.org}
}
@inproceedings{ethayarajh-2019-contextual,
title = "How Contextual are Contextualized Word Representations? Comparing the Geometry of {BERT}, {ELM}o, and {GPT}-2 Embeddings",
author = "Ethayarajh, Kawin",
booktitle = "Proceedings of the 2019 Conference on Empirical Methods in Natural Language Processing and the 9th International Joint Conference on Natural Language Processing (EMNLP-IJCNLP)",
month = nov,
year = "2019",
address = "Hong Kong, China",
publisher = "Association for Computational Linguistics",
url = "https://www.aclweb.org/anthology/D19-1006",
doi = "10.18653/v1/D19-1006",
pages = "55--65",
abstract = "Replacing static word embeddings with contextualized word representations has yielded significant improvements on many NLP tasks. However, just how contextual are the contextualized representations produced by models such as ELMo and BERT? Are there infinitely many context-specific representations for each word, or are words essentially assigned one of a finite number of word-sense representations? For one, we find that the contextualized representations of all words are not isotropic in any layer of the contextualizing model. While representations of the same word in different contexts still have a greater cosine similarity than those of two different words, this self-similarity is much lower in upper layers. This suggests that upper layers of contextualizing models produce more context-specific representations, much like how upper layers of LSTMs produce more task-specific representations. In all layers of ELMo, BERT, and GPT-2, on average, less than 5{\%} of the variance in a word{'}s contextualized representations can be explained by a static embedding for that word, providing some justification for the success of contextualized representations.",
}
@inproceedings{DBLP:conf/acl/JawaharSS19,
author = {Ganesh Jawahar and
Beno{\^{\i}}t Sagot and
Djam{\'{e}} Seddah},
editor = {Anna Korhonen and
David R. Traum and
Llu{\'{\i}}s M{\`{a}}rquez},
title = {What Does {BERT} Learn about the Structure of Language?},
booktitle = {Proceedings of the 57th Conference of the Association for Computational
Linguistics, {ACL} 2019, Florence, Italy, July 28- August 2, 2019,
Volume 1: Long Papers},
pages = {3651--3657},
publisher = {Association for Computational Linguistics},
year = {2019},
url = {https://doi.org/10.18653/v1/p19-1356},
doi = {10.18653/v1/p19-1356},
timestamp = {Tue, 28 Jan 2020 10:28:06 +0100},
biburl = {https://dblp.org/rec/conf/acl/JawaharSS19.bib},
bibsource = {dblp computer science bibliography, https://dblp.org}
}
@article{DBLP:journals/corr/abs-1806-00187,
author = {Myle Ott and
Sergey Edunov and
David Grangier and
Michael Auli},
title = {Scaling Neural Machine Translation},
journal = {CoRR},
volume = {abs/1806.00187},
year = {2018},
url = {http://arxiv.org/abs/1806.00187},
archivePrefix = {arXiv},
eprint = {1806.00187},
timestamp = {Mon, 13 Aug 2018 16:47:40 +0200},
biburl = {https://dblp.org/rec/journals/corr/abs-1806-00187.bib},
bibsource = {dblp computer science bibliography, https://dblp.org}
}
@article{bengioCurriculumlearning,
author = {Yoshu Bengio and
Jerome Louradour and
Ronman Collobert and
Jason Weston},
title = {Curriculum learning}
}
@inproceedings{Hubara2016BinarizedNN,
title={Binarized Neural Networks},
author={Itay Hubara and Matthieu Courbariaux and Daniel Soudry and Ran El-Yaniv and Yoshua Bengio},
booktitle={NIPS},
year={2016}
}
@inproceedings{DBLP:conf/emnlp/DuanLXZ09,
author = {Nan Duan and
Mu Li and
Tong Xiao and
Ming Zhou},
title = {The Feature Subspace Method for {SMT} System Combination},
booktitle = {Proceedings of the 2009 Conference on Empirical Methods in Natural
Language Processing, {EMNLP} 2009, 6-7 August 2009, Singapore, {A}
meeting of SIGDAT, a Special Interest Group of the {ACL}},
pages = {1096--1104},
publisher = {{ACL}},
year = {2009},
url = {https://www.aclweb.org/anthology/D09-1114/},
timestamp = {Fri, 13 Sep 2019 13:08:45 +0200},
biburl = {https://dblp.org/rec/conf/emnlp/DuanLXZ09.bib},
bibsource = {dblp computer science bibliography, https://dblp.org}
}
@article{DBLP:journals/corr/abs-1910-10683,
author = {Colin Raffel and
Noam Shazeer and
Adam Roberts and
Katherine Lee and
Sharan Narang and
Michael Matena and
Yanqi Zhou and
Wei Li and
Peter J. Liu},
title = {Exploring the Limits of Transfer Learning with a Unified Text-to-Text
Transformer},
journal = {CoRR},
volume = {abs/1910.10683},
year = {2019},
url = {http://arxiv.org/abs/1910.10683},
archivePrefix = {arXiv},
eprint = {1910.10683},
timestamp = {Fri, 25 Oct 2019 14:59:26 +0200},
biburl = {https://dblp.org/rec/journals/corr/abs-1910-10683.bib},
bibsource = {dblp computer science bibliography, https://dblp.org}
}
@book{deeplearning,
title={deep learning},
author={Yann LeCun and
Yoshua Bengio and
Geoffrey Hinton},
year={2015},
pages = {436--444}
}
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
%%%%% chapter 7.5.1----------------------------------------------------------------
@inproceedings{DBLP:conf/cvpr/YuYR18,
author = {Xin Yu and
Zhiding Yu and
Srikumar Ramalingam},
title = {Learning Strict Identity Mappings in Deep Residual Networks},
booktitle = {2018 {IEEE} Conference on Computer Vision and Pattern Recognition,
{CVPR} 2018, Salt Lake City, UT, USA, June 18-22, 2018},
pages = {4432--4440},
publisher = {{IEEE} Computer Society},
year = {2018},
url = {http://openaccess.thecvf.com/content\_cvpr\_2018/html/Yu\_Learning\_Strict\_Identity\_CVPR\_2018\_paper.html},
doi = {10.1109/CVPR.2018.00466},
timestamp = {Wed, 16 Oct 2019 14:14:50 +0200},
biburl = {https://dblp.org/rec/conf/cvpr/YuYR18.bib},
bibsource = {dblp computer science bibliography, https://dblp.org}
}
@inproceedings{DBLP:conf/emnlp/BapnaCFCW18,
author = {Ankur Bapna and
Mia Xu Chen and
Orhan Firat and
Yuan Cao and
Yonghui Wu},
editor = {Ellen Riloff and
David Chiang and
Julia Hockenmaier and
Jun'ichi Tsujii},
title = {Training Deeper Neural Machine Translation Models with Transparent
Attention},
booktitle = {Proceedings of the 2018 Conference on Empirical Methods in Natural
Language Processing, Brussels, Belgium, October 31 - November 4, 2018},
pages = {3028--3033},
publisher = {Association for Computational Linguistics},
year = {2018},
url = {https://doi.org/10.18653/v1/d18-1338},
doi = {10.18653/v1/d18-1338},
timestamp = {Tue, 28 Jan 2020 10:28:48 +0100},
biburl = {https://dblp.org/rec/conf/emnlp/BapnaCFCW18.bib},
bibsource = {dblp computer science bibliography, https://dblp.org}
}
@inproceedings{DBLP:conf/emnlp/ZhangTS19,
author = {Biao Zhang and
Ivan Titov and
Rico Sennrich},
editor = {Kentaro Inui and
Jing Jiang and
Vincent Ng and
Xiaojun Wan},
title = {Improving Deep Transformer with Depth-Scaled Initialization and Merged
Attention},
booktitle = {Proceedings of the 2019 Conference on Empirical Methods in Natural
Language Processing and the 9th International Joint Conference on
Natural Language Processing, {EMNLP-IJCNLP} 2019, Hong Kong, China,
November 3-7, 2019},
pages = {898--909},
publisher = {Association for Computational Linguistics},
year = {2019},
url = {https://doi.org/10.18653/v1/D19-1083},
doi = {10.18653/v1/D19-1083},
timestamp = {Thu, 12 Dec 2019 13:23:43 +0100},
biburl = {https://dblp.org/rec/conf/emnlp/ZhangTS19.bib},
bibsource = {dblp computer science bibliography, https://dblp.org}
}
@inproceedings{DBLP:conf/eccv/HeZRS16,
author = {Kaiming He and
Xiangyu Zhang and
Shaoqing Ren and
Jian Sun},
editor = {Bastian Leibe and
Jiri Matas and
Nicu Sebe and
Max Welling},
title = {Identity Mappings in Deep Residual Networks},
booktitle = {Computer Vision - {ECCV} 2016 - 14th European Conference, Amsterdam,
The Netherlands, October 11-14, 2016, Proceedings, Part {IV}},
series = {Lecture Notes in Computer Science},
volume = {9908},
pages = {630--645},
publisher = {Springer},
year = {2016},
url = {https://doi.org/10.1007/978-3-319-46493-0\_38},
doi = {10.1007/978-3-319-46493-0\_38},
timestamp = {Wed, 25 Sep 2019 18:11:12 +0200},
biburl = {https://dblp.org/rec/conf/eccv/HeZRS16.bib},
bibsource = {dblp computer science bibliography, https://dblp.org}
}
@inproceedings{DBLP:conf/acl/WuWXTGQLL19,
author = {Lijun Wu and
Yiren Wang and
Yingce Xia and
Fei Tian and
Fei Gao and
Tao Qin and
Jianhuang Lai and
Tie{-}Yan Liu},
editor = {Anna Korhonen and
David R. Traum and
Llu{\'{\i}}s M{\`{a}}rquez},
title = {Depth Growing for Neural Machine Translation},
booktitle = {Proceedings of the 57th Conference of the Association for Computational
Linguistics, {ACL} 2019, Florence, Italy, July 28- August 2, 2019,
Volume 1: Long Papers},
pages = {5558--5563},
publisher = {Association for Computational Linguistics},
year = {2019},
url = {https://doi.org/10.18653/v1/p19-1558},
doi = {10.18653/v1/p19-1558},
timestamp = {Tue, 28 Jan 2020 10:27:34 +0100},
biburl = {https://dblp.org/rec/conf/acl/WuWXTGQLL19.bib},
bibsource = {dblp computer science bibliography, https://dblp.org}
}
@article{DBLP:journals/corr/HuangLW16a,
author = {Gao Huang and
Zhuang Liu and
Kilian Q. Weinberger},
title = {Densely Connected Convolutional Networks},
journal = {CoRR},
volume = {abs/1608.06993},
year = {2016},
url = {http://arxiv.org/abs/1608.06993},
archivePrefix = {arXiv},
eprint = {1608.06993},
timestamp = {Mon, 10 Sep 2018 15:49:32 +0200},
biburl = {https://dblp.org/rec/journals/corr/HuangLW16a.bib},
bibsource = {dblp computer science bibliography, https://dblp.org}
}
@article{DBLP:journals/corr/abs-1810-10181,
author = {Zi{-}Yi Dou and
Zhaopeng Tu and
Xing Wang and
Shuming Shi and
Tong Zhang},
title = {Exploiting Deep Representations for Neural Machine Translation},
journal = {CoRR},
volume = {abs/1810.10181},
year = {2018},
url = {http://arxiv.org/abs/1810.10181},
archivePrefix = {arXiv},
eprint = {1810.10181},
timestamp = {Tue, 15 Jan 2019 11:48:13 +0100},
biburl = {https://dblp.org/rec/journals/corr/abs-1810-10181.bib},
bibsource = {dblp computer science bibliography, https://dblp.org}
}
@article{DBLP:journals/corr/GreffSS16,
author = {Klaus Greff and
Rupesh Kumar Srivastava and
J{\"{u}}rgen Schmidhuber},
title = {Highway and Residual Networks learn Unrolled Iterative Estimation},
journal = {CoRR},
volume = {abs/1612.07771},
year = {2016},
url = {http://arxiv.org/abs/1612.07771},
archivePrefix = {arXiv},
eprint = {1612.07771},
timestamp = {Mon, 13 Aug 2018 16:48:07 +0200},
biburl = {https://dblp.org/rec/journals/corr/GreffSS16.bib},
bibsource = {dblp computer science bibliography, https://dblp.org}
}
@article{DBLP:journals/corr/XiaQCBYL17,
author = {Yingce Xia and
Tao Qin and
Wei Chen and
Jiang Bian and
Nenghai Yu and
Tie{-}Yan Liu},
title = {Dual Supervised Learning},
journal = {CoRR},
volume = {abs/1707.00415},
year = {2017},
url = {http://arxiv.org/abs/1707.00415},
archivePrefix = {arXiv},
eprint = {1707.00415},
timestamp = {Tue, 03 Sep 2019 16:31:11 +0200},
biburl = {https://dblp.org/rec/journals/corr/XiaQCBYL17.bib},
bibsource = {dblp computer science bibliography, https://dblp.org}
}
@inproceedings{DBLP:conf/nips/HeXQWYLM16,
author = {Di He and
Yingce Xia and
Tao Qin and
Liwei Wang and
Nenghai Yu and
Tie{-}Yan Liu and
Wei{-}Ying Ma},
editor = {Daniel D. Lee and
Masashi Sugiyama and
Ulrike von Luxburg and
Isabelle Guyon and
Roman Garnett},
title = {Dual Learning for Machine Translation},
booktitle = {Advances in Neural Information Processing Systems 29: Annual Conference
on Neural Information Processing Systems 2016, December 5-10, 2016,
Barcelona, Spain},
pages = {820--828},
year = {2016},
url = {http://papers.nips.cc/paper/6469-dual-learning-for-machine-translation},
timestamp = {Fri, 06 Mar 2020 17:00:15 +0100},
biburl = {https://dblp.org/rec/conf/nips/HeXQWYLM16.bib},
bibsource = {dblp computer science bibliography, https://dblp.org}
}
@inproceedings{DBLP:conf/nips/SuttonMSM99,
author = {Richard S. Sutton and
David A. McAllester and
Satinder P. Singh and
Yishay Mansour},
editor = {Sara A. Solla and
Todd K. Leen and
Klaus{-}Robert M{\"{u}}ller},
title = {Policy Gradient Methods for Reinforcement Learning with Function Approximation},
booktitle = {Advances in Neural Information Processing Systems 12, {[NIPS} Conference,
Denver, Colorado, USA, November 29 - December 4, 1999]},
pages = {1057--1063},
publisher = {The {MIT} Press},
year = {1999},
url = {http://papers.nips.cc/paper/1713-policy-gradient-methods-for-reinforcement-learning-with-function-approximation},
timestamp = {Fri, 06 Mar 2020 16:58:30 +0100},
biburl = {https://dblp.org/rec/conf/nips/SuttonMSM99.bib},
bibsource = {dblp computer science bibliography, https://dblp.org}
}
@article{DBLP:journals/corr/GulcehreFXCBLBS15,
author = {{\c{C}}aglar G{\"{u}}l{\c{c}}ehre and
Orhan Firat and
Kelvin Xu and
Kyunghyun Cho and
Lo{\"{\i}}c Barrault and
Huei{-}Chi Lin and
Fethi Bougares and
Holger Schwenk and
Yoshua Bengio},
title = {On Using Monolingual Corpora in Neural Machine Translation},
journal = {CoRR},
volume = {abs/1503.03535},
year = {2015},
url = {http://arxiv.org/abs/1503.03535},
archivePrefix = {arXiv},
eprint = {1503.03535},
timestamp = {Mon, 13 Aug 2018 16:46:37 +0200},
biburl = {https://dblp.org/rec/journals/corr/GulcehreFXCBLBS15.bib},
bibsource = {dblp computer science bibliography, https://dblp.org}
}
@inproceedings{DBLP:conf/wmt/CurreyBH17,
author = {Anna Currey and
Antonio Valerio Miceli Barone and
Kenneth Heafield},
editor = {Ondrej Bojar and
Christian Buck and
Rajen Chatterjee and
Christian Federmann and
Yvette Graham and
Barry Haddow and
Matthias Huck and
Antonio Jimeno{-}Yepes and
Philipp Koehn and
Julia Kreutzer},
title = {Copied Monolingual Data Improves Low-Resource Neural Machine Translation},
booktitle = {Proceedings of the Second Conference on Machine Translation, {WMT}
2017, Copenhagen, Denmark, September 7-8, 2017},
pages = {148--156},
publisher = {Association for Computational Linguistics},
year = {2017},
url = {https://doi.org/10.18653/v1/w17-4715},
doi = {10.18653/v1/w17-4715},
timestamp = {Tue, 28 Jan 2020 10:31:00 +0100},
biburl = {https://dblp.org/rec/conf/wmt/CurreyBH17.bib},
bibsource = {dblp computer science bibliography, https://dblp.org}
}
@inproceedings{DBLP:conf/acl/SennrichHB16,
author = {Rico Sennrich and
Barry Haddow and
Alexandra Birch},
title = {Improving Neural Machine Translation Models with Monolingual Data},
booktitle = {Proceedings of the 54th Annual Meeting of the Association for Computational
Linguistics, {ACL} 2016, August 7-12, 2016, Berlin, Germany, Volume
1: Long Papers},
publisher = {The Association for Computer Linguistics},
year = {2016},
url = {https://doi.org/10.18653/v1/p16-1009},
doi = {10.18653/v1/p16-1009},
timestamp = {Tue, 28 Jan 2020 10:28:01 +0100},
biburl = {https://dblp.org/rec/conf/acl/SennrichHB16.bib},
bibsource = {dblp computer science bibliography, https://dblp.org}
}
@inproceedings{DBLP:conf/emnlp/EdunovOAG18,
author = {Sergey Edunov and
Myle Ott and
Michael Auli and
David Grangier},
editor = {Ellen Riloff and
David Chiang and
Julia Hockenmaier and
Jun'ichi Tsujii},
title = {Understanding Back-Translation at Scale},
booktitle = {Proceedings of the 2018 Conference on Empirical Methods in Natural
Language Processing, Brussels, Belgium, October 31 - November 4, 2018},
pages = {489--500},
publisher = {Association for Computational Linguistics},
year = {2018},
url = {https://doi.org/10.18653/v1/d18-1045},
doi = {10.18653/v1/d18-1045},
timestamp = {Tue, 28 Jan 2020 10:28:36 +0100},
biburl = {https://dblp.org/rec/conf/emnlp/EdunovOAG18.bib},
bibsource = {dblp computer science bibliography, https://dblp.org}
}
@inproceedings{DBLP:conf/emnlp/DomhanH17,
author = {Tobias Domhan and
Felix Hieber},
editor = {Martha Palmer and
Rebecca Hwa and
Sebastian Riedel},
title = {Using Target-side Monolingual Data for Neural Machine Translation
through Multi-task Learning},
booktitle = {Proceedings of the 2017 Conference on Empirical Methods in Natural
Language Processing, {EMNLP} 2017, Copenhagen, Denmark, September
9-11, 2017},
pages = {1500--1505},
publisher = {Association for Computational Linguistics},
year = {2017},
url = {https://doi.org/10.18653/v1/d17-1158},
doi = {10.18653/v1/d17-1158},
timestamp = {Tue, 28 Jan 2020 10:28:22 +0100},
biburl = {https://dblp.org/rec/conf/emnlp/DomhanH17.bib},
bibsource = {dblp computer science bibliography, https://dblp.org}
}
@article{DBLP:journals/corr/abs-2002-11794,
author = {Zhuohan Li and
Eric Wallace and
Sheng Shen and
Kevin Lin and
Kurt Keutzer and
Dan Klein and
Joseph E. Gonzalez},
title = {Train Large, Then Compress: Rethinking Model Size for Efficient Training
and Inference of Transformers},
journal = {CoRR},
volume = {abs/2002.11794},
year = {2020},
url = {https://arxiv.org/abs/2002.11794},
archivePrefix = {arXiv},
eprint = {2002.11794},
timestamp = {Tue, 03 Mar 2020 14:32:13 +0100},
biburl = {https://dblp.org/rec/journals/corr/abs-2002-11794.bib},
bibsource = {dblp computer science bibliography, https://dblp.org}
}
@inproceedings{DBLP:conf/iclr/FrankleC19,
author = {Jonathan Frankle and
Michael Carbin},
title = {The Lottery Ticket Hypothesis: Finding Sparse, Trainable Neural Networks},
booktitle = {7th International Conference on Learning Representations, {ICLR} 2019,
New Orleans, LA, USA, May 6-9, 2019},
publisher = {OpenReview.net},
year = {2019},
url = {https://openreview.net/forum?id=rJl-b3RcF7},
timestamp = {Thu, 25 Jul 2019 13:03:15 +0200},
biburl = {https://dblp.org/rec/conf/iclr/FrankleC19.bib},
bibsource = {dblp computer science bibliography, https://dblp.org}
}
@inproceedings{DBLP:conf/emnlp/KimR16,
author = {Yoon Kim and
Alexander M. Rush},
editor = {Jian Su and
Xavier Carreras and
Kevin Duh},
title = {Sequence-Level Knowledge Distillation},
booktitle = {Proceedings of the 2016 Conference on Empirical Methods in Natural
Language Processing, {EMNLP} 2016, Austin, Texas, USA, November 1-4,
2016},
pages = {1317--1327},
publisher = {The Association for Computational Linguistics},
year = {2016},
url = {https://doi.org/10.18653/v1/d16-1139},
doi = {10.18653/v1/d16-1139},
timestamp = {Tue, 28 Jan 2020 10:28:22 +0100},
biburl = {https://dblp.org/rec/conf/emnlp/KimR16.bib},
bibsource = {dblp computer science bibliography, https://dblp.org}
}
@article{DBLP:journals/corr/abs-1909-10351,
author = {Xiaoqi Jiao and
Yichun Yin and
Lifeng Shang and
Xin Jiang and
Xiao Chen and
Linlin Li and
Fang Wang and
Qun Liu},
title = {TinyBERT: Distilling {BERT} for Natural Language Understanding},
journal = {CoRR},
volume = {abs/1909.10351},
year = {2019},
url = {http://arxiv.org/abs/1909.10351},
archivePrefix = {arXiv},
eprint = {1909.10351},
timestamp = {Fri, 27 Sep 2019 13:04:21 +0200},
biburl = {https://dblp.org/rec/journals/corr/abs-1909-10351.bib},
bibsource = {dblp computer science bibliography, https://dblp.org}
}
@article{DBLP:journals/corr/abs-1901-09069,
author = {Felipe Almeida and
Geraldo Xex{\'{e}}o},
title = {Word Embeddings: {A} Survey},
journal = {CoRR},
volume = {abs/1901.09069},
year = {2019},
url = {http://arxiv.org/abs/1901.09069},
archivePrefix = {arXiv},
eprint = {1901.09069},
timestamp = {Sat, 02 Feb 2019 16:56:00 +0100},
biburl = {https://dblp.org/rec/journals/corr/abs-1901-09069.bib},
bibsource = {dblp computer science bibliography, https://dblp.org}
}
@article{DBLP:journals/corr/abs-2002-06823,
author = {Jinhua Zhu and
Yingce Xia and
Lijun Wu and
Di He and
Tao Qin and
Wengang Zhou and
Houqiang Li and
Tie{-}Yan Liu},
title = {Incorporating {BERT} into Neural Machine Translation},
journal = {CoRR},
volume = {abs/2002.06823},
year = {2020},
url = {https://arxiv.org/abs/2002.06823},
archivePrefix = {arXiv},
eprint = {2002.06823},
timestamp = {Mon, 02 Mar 2020 16:46:06 +0100},
biburl = {https://dblp.org/rec/journals/corr/abs-2002-06823.bib},
bibsource = {dblp computer science bibliography, https://dblp.org}
}
@article{DBLP:journals/corr/Ruder17a,
author = {Sebastian Ruder},
title = {An Overview of Multi-Task Learning in Deep Neural Networks},
journal = {CoRR},
volume = {abs/1706.05098},
year = {2017},
url = {http://arxiv.org/abs/1706.05098},
archivePrefix = {arXiv},
eprint = {1706.05098},
timestamp = {Mon, 13 Aug 2018 16:48:50 +0200},
biburl = {https://dblp.org/rec/journals/corr/Ruder17a.bib},
bibsource = {dblp computer science bibliography, https://dblp.org}
}
@inproceedings{DBLP:conf/iccv/ZhuPIE17,
author = {Jun{-}Yan Zhu and
Taesung Park and
Phillip Isola and
Alexei A. Efros},
title = {Unpaired Image-to-Image Translation Using Cycle-Consistent Adversarial
Networks},
booktitle = {{IEEE} International Conference on Computer Vision, {ICCV} 2017, Venice,
Italy, October 22-29, 2017},
pages = {2242--2251},
publisher = {{IEEE} Computer Society},
year = {2017},
url = {https://doi.org/10.1109/ICCV.2017.244},
doi = {10.1109/ICCV.2017.244},
timestamp = {Wed, 16 Oct 2019 14:14:51 +0200},
biburl = {https://dblp.org/rec/conf/iccv/ZhuPIE17.bib},
bibsource = {dblp computer science bibliography, https://dblp.org}
}
@inproceedings{domhan2017using,
title={Using target-side monolingual data for neural machine translation through multi-task learning},
author={Domhan and
Tobias and
Hieber and
Felix},
booktitle={Proceedings of the 2017 Conference on Empirical Methods in Natural Language Processing},
pages={1500--1505},
year={2017}
}
...@@ -110,14 +110,14 @@ ...@@ -110,14 +110,14 @@
% CHAPTERS % CHAPTERS
%---------------------------------------------------------------------------------------- %----------------------------------------------------------------------------------------
%\include{Chapter1/chapter1} \include{Chapter1/chapter1}
%\include{Chapter2/chapter2} \include{Chapter2/chapter2}
%\include{Chapter3/chapter3} \include{Chapter3/chapter3}
\include{Chapter4/chapter4} \include{Chapter4/chapter4}
%\include{Chapter5/chapter5} \include{Chapter5/chapter5}
%\include{Chapter6/chapter6} \include{Chapter6/chapter6}
%\include{Chapter7/chapter7} \include{Chapter7/chapter7}
%\include{ChapterAppend/chapterappend} \include{ChapterAppend/chapterappend}
......
Markdown 格式
0%
您添加了 0 到此讨论。请谨慎行事。
请先完成此评论的编辑!
注册 或者 后发表评论