Commit 06a49b7c by 曹润柘

合并分支 'caorunzhe' 到 'master'

Caorunzhe

查看合并请求 !821
parents 753cd9f5 522913f2
...@@ -2,9 +2,10 @@ ...@@ -2,9 +2,10 @@
\tikzstyle{decoder} = [rectangle,thick,rounded corners,minimum width=5cm,minimum height=0.6cm,text centered,draw=black,fill=blue!15] \tikzstyle{decoder} = [rectangle,thick,rounded corners,minimum width=5cm,minimum height=0.6cm,text centered,draw=black,fill=blue!15]
\begin{scope} \begin{scope}
\node (aa)[decoder] at (0,0) {}; \node (encoder) at (0,0) {来自编码器的信息};
\node (aa)[decoder,anchor=east] at ([xshift=5.5cm]encoder.east) {};
\node (y2b)[anchor=south] at ([yshift=-2.5em]aa.south) {$y_2$}; \node (y2b)[anchor=south] at ([yshift=-2.5em]aa.south) {$y_2$};
\node (label)[anchor=south] at ([yshift=-1.8em]y2b.south) {\small{(a) 自回归解码}}; \node (label)[anchor=south] at ([xshift=-4.5em,yshift=-1.8em]y2b.south) {\small{(a) 自回归解码}};
\node (y1b)[anchor=east] at ([xshift=-2.5em]y2b.east) {$y_1$}; \node (y1b)[anchor=east] at ([xshift=-2.5em]y2b.east) {$y_1$};
\node (sos)[anchor=east] at ([xshift=-4.3em]y2b.east) {\small{<sos>}}; \node (sos)[anchor=east] at ([xshift=-4.3em]y2b.east) {\small{<sos>}};
\node (y3b)[anchor=west] at ([xshift=2.5em]y2b.west) {$y_3$}; \node (y3b)[anchor=west] at ([xshift=2.5em]y2b.west) {$y_3$};
...@@ -21,8 +22,9 @@ ...@@ -21,8 +22,9 @@
\draw [->,very thick,dotted] ([xshift=-0.3em]y3a.east) .. controls +(east:0.5) and +(west:0.5) ..([xshift=0.3em]y3b.west); \draw [->,very thick,dotted] ([xshift=-0.3em]y3a.east) .. controls +(east:0.5) and +(west:0.5) ..([xshift=0.3em]y3b.west);
\draw [->,very thick,dotted] ([xshift=-0.3em]y4a.east) .. controls +(east:0.5) and +(west:0.5) ..([xshift=0.3em]y4b.west); \draw [->,very thick,dotted] ([xshift=-0.3em]y4a.east) .. controls +(east:0.5) and +(west:0.5) ..([xshift=0.3em]y4b.west);
\node (autodecoder)[decoder] at (0,0) {自回归编码器}; \node (autodecoder)[decoder,anchor=east] at ([xshift=5.5cm]encoder.east) {自回归解码器};
%\node (encoder)[decoder,anchor=west,fill=red!20] at ([xshift=-2cm]autodecoder.west) {编码器};
\draw [->,thick](encoder.east) to (autodecoder.west);
\draw [->,thick]([yshift=0em]y1b.north) to ([yshift=1.15em]y1b.north); \draw [->,thick]([yshift=0em]y1b.north) to ([yshift=1.15em]y1b.north);
\draw [->,thick]([yshift=0em]y2b.north) to ([yshift=1.15em]y2b.north); \draw [->,thick]([yshift=0em]y2b.north) to ([yshift=1.15em]y2b.north);
\draw [->,thick]([yshift=0em]y3b.north) to ([yshift=1.15em]y3b.north); \draw [->,thick]([yshift=0em]y3b.north) to ([yshift=1.15em]y3b.north);
...@@ -37,9 +39,10 @@ ...@@ -37,9 +39,10 @@
\end{scope} \end{scope}
\begin{scope}[yshift=-1.55in] \begin{scope}[yshift=-1.55in]
\node (aa) [decoder] at (0,0) {}; \node (encoder) at (0,0) {来自编码器的信息};
\node (aa)[decoder,anchor=east] at ([xshift=5.5cm]encoder.east) {};
\node (y1y2b)[rectangle,anchor=south,inner sep=0.25em,densely dashed,draw] at ([yshift=-2.6em]aa.south) {$y_1\;y_2$}; \node (y1y2b)[rectangle,anchor=south,inner sep=0.25em,densely dashed,draw] at ([yshift=-2.6em]aa.south) {$y_1\;y_2$};
\node (label)[anchor=south] at ([yshift=-2.1em]y1y2b.south) {\small{(b) 半自回归解码}}; \node (label)[anchor=south] at ([xshift=-4em,yshift=-2.1em]y1y2b.south) {\small{(b) 半自回归解码}};
\node (sos)[anchor=east] at ([xshift=-4.55em]y1y2b.east) {\small{<sos>}}; \node (sos)[anchor=east] at ([xshift=-4.55em]y1y2b.east) {\small{<sos>}};
\node (y3y4b)[rectangle,anchor=west,inner sep=0.25em,densely dashed,draw] at ([xshift=4.7em]y1y2b.west) {$y_3\;y_4$}; \node (y3y4b)[rectangle,anchor=west,inner sep=0.25em,densely dashed,draw] at ([xshift=4.7em]y1y2b.west) {$y_3\;y_4$};
...@@ -50,7 +53,9 @@ ...@@ -50,7 +53,9 @@
\draw [->,very thick,dotted] ([xshift=-0em]y1y2a.east) .. controls +(east:0.5) and +(west:0.5) ..([xshift=0em]y1y2b.west); \draw [->,very thick,dotted] ([xshift=-0em]y1y2a.east) .. controls +(east:0.5) and +(west:0.5) ..([xshift=0em]y1y2b.west);
\draw [->,very thick,dotted] ([xshift=-0em]y3y4a.east) .. controls +(east:0.5) and +(west:0.5) ..([xshift=0em]y3y4b.west); \draw [->,very thick,dotted] ([xshift=-0em]y3y4a.east) .. controls +(east:0.5) and +(west:0.5) ..([xshift=0em]y3y4b.west);
\node (autodecoder)[decoder] at (0,0) {半自回归编码器}; \node (autodecoder)[decoder,anchor=east] at ([xshift=5.5cm]encoder.east) {半自回归解码器};
%\node (encoder)[decoder,anchor=west,fill=red!20] at ([xshift=-2cm]autodecoder.west) {编码器};
\draw [->,thick](encoder.east) to (autodecoder.west);
\draw [->,thick]([yshift=0.05em]sos.north) to ([yshift=1.38em]sos.north); \draw [->,thick]([yshift=0.05em]sos.north) to ([yshift=1.38em]sos.north);
\draw [->,thick]([yshift=0em]y1y2b.north) to ([yshift=1.38em]y1y2b.north); \draw [->,thick]([yshift=0em]y1y2b.north) to ([yshift=1.38em]y1y2b.north);
...@@ -62,22 +67,19 @@ ...@@ -62,22 +67,19 @@
\end{scope} \end{scope}
\begin{scope}[yshift=-3.1in] \begin{scope}[yshift=-3.1in]
\node (aa) [decoder]at (0,0) {非自回归模型}; \node (encoder) at (0,0) {来自编码器的信息};
\node (y2b)[anchor=south] at ([xshift=-1.5em,yshift=-2.5em]aa.south) {$y_2$}; \node (aa)[decoder,anchor=east] at ([xshift=5.5cm]encoder.east) {非自回归解码器};
\node (label)[anchor=south] at ([yshift=-4.3em]aa.south) {\small{(c) 非自回归解码}}; %\node (encoder)[decoder,anchor=west,fill=red!20] at ([xshift=-2cm]aa.west) {编码器};
\node (y1b)[anchor=east] at ([xshift=-3em]y2b.east) {$y_1$}; \draw [->,thick](encoder.east) to (aa.west);
\node (y3b)[anchor=west] at ([xshift=3em]y2b.west) {$y_3$};
\node (y4b)[anchor=west] at ([xshift=6em]y2b.west) {$y_4$}; \node (label)[anchor=south] at ([xshift=-4em,yshift=-4.3em]aa.south) {\small{(c) 非自回归解码}};
\node (y2a)[anchor=north] at ([xshift=-1.5em,yshift=2.5em]aa.north) {$y_2$}; \node (y2a)[anchor=north] at ([xshift=-1.5em,yshift=2.5em]aa.north) {$y_2$};
\node (y1a)[anchor=east] at ([xshift=-3em]y2a.east) {$y_1$}; \node (y1a)[anchor=east] at ([xshift=-3em]y2a.east) {$y_1$};
\node (y3a)[anchor=west] at ([xshift=3em]y2a.west) {$y_3$}; \node (y3a)[anchor=west] at ([xshift=3em]y2a.west) {$y_3$};
\node (y4a)[anchor=west] at ([xshift=6em]y2a.west) {$y_4$}; \node (y4a)[anchor=west] at ([xshift=6em]y2a.west) {$y_4$};
\draw [->,thick]([yshift=0em]y1b.north) to ([yshift=1.15em]y1b.north);
\draw [->,thick]([yshift=0em]y2b.north) to ([yshift=1.15em]y2b.north);
\draw [->,thick]([yshift=0em]y3b.north) to ([yshift=1.15em]y3b.north);
\draw [->,thick]([yshift=0em]y4b.north) to ([yshift=1.15em]y4b.north);
\draw [->,thick]([yshift=-1.2em]y1a.south) to (y1a.south); \draw [->,thick]([yshift=-1.2em]y1a.south) to (y1a.south);
\draw [->,thick]([yshift=-1.2em]y2a.south) to (y2a.south); \draw [->,thick]([yshift=-1.2em]y2a.south) to (y2a.south);
......
...@@ -3,7 +3,6 @@ ...@@ -3,7 +3,6 @@
\tikzstyle{snode} = [draw,inner sep=1pt,minimum width=3em,minimum height=0.5em,rounded corners=1pt,fill=green!20!white] \tikzstyle{snode} = [draw,inner sep=1pt,minimum width=3em,minimum height=0.5em,rounded corners=1pt,fill=green!20!white]
\tikzstyle{pnode} = [draw,inner sep=1pt,minimum width=1em,minimum height=0.5em,rounded corners=1pt] \tikzstyle{pnode} = [draw,inner sep=1pt,minimum width=1em,minimum height=0.5em,rounded corners=1pt]
\node [anchor=west] (des) at (1.5,3) {\normalsize\bfnew{$\bm{m}$:显存\quad$\bm{t}$:时间\quad$\bm{m_1>m_2}$\quad$\bm{t_1>t_2}$}};
\node [anchor=west,snode] (s1) at (0,0) {\tiny{}}; \node [anchor=west,snode] (s1) at (0,0) {\tiny{}};
\node [anchor=north west,snode,minimum width=6.3em] (s2) at ([yshift=-0.3em]s1.south west) {\tiny{}}; \node [anchor=north west,snode,minimum width=6.3em] (s2) at ([yshift=-0.3em]s1.south west) {\tiny{}};
\node [anchor=north west,snode,minimum width=2em] (s3) at ([yshift=-0.3em]s2.south west) {\tiny{}}; \node [anchor=north west,snode,minimum width=2em] (s3) at ([yshift=-0.3em]s2.south west) {\tiny{}};
...@@ -12,6 +11,9 @@ ...@@ -12,6 +11,9 @@
\node [anchor=north west,snode,minimum width=3em] (s6) at ([yshift=-0.3em]s5.south west) {\tiny{}}; \node [anchor=north west,snode,minimum width=3em] (s6) at ([yshift=-0.3em]s5.south west) {\tiny{}};
\node [anchor=east] (label1) at ([xshift=-0.8em,yshift=-2em]s1.west) {{句子:}}; \node [anchor=east] (label1) at ([xshift=-0.8em,yshift=-2em]s1.west) {{句子:}};
\node [anchor=east,draw,dashed,minimum height=7.5cm,minimum width=7.3cm,thick] (box) at ([xshift=10.9cm]label1.east) {};
%\node [anchor=north] (label6) at ([xshift=3em,yshift=7em]label1.north) {{$m$:显存}};
%\node [anchor=north] (label7) at ([xshift=3.3em,yshift=5.5em]label1.north) {{$t$:延迟}};
\node [anchor=west,pnode,minimum width=3em] (p1) at ([xshift=0.3em]s1.east) {\tiny{}}; \node [anchor=west,pnode,minimum width=3em] (p1) at ([xshift=0.3em]s1.east) {\tiny{}};
\node [anchor=west,pnode,minimum width=4em] (p3) at ([xshift=0.3em]s3.east) {\tiny{}}; \node [anchor=west,pnode,minimum width=4em] (p3) at ([xshift=0.3em]s3.east) {\tiny{}};
\node [anchor=west,pnode,minimum width=0.5em] (p4) at ([xshift=0.3em]s4.east) {\tiny{}}; \node [anchor=west,pnode,minimum width=0.5em] (p4) at ([xshift=0.3em]s4.east) {\tiny{}};
...@@ -19,17 +21,20 @@ ...@@ -19,17 +21,20 @@
\node [anchor=west,pnode,minimum width=3em] (p6) at ([xshift=0.3em]s6.east) {\tiny{}}; \node [anchor=west,pnode,minimum width=3em] (p6) at ([xshift=0.3em]s6.east) {\tiny{}};
\node [rectangle,inner sep=0.5em,rounded corners=2pt,very thick,dotted,draw=ugreen!80] [fit = (s1) (s6) (p1) (p6)] (box0) {}; \node [rectangle,inner sep=0.5em,rounded corners=2pt,very thick,dotted,draw=ugreen!80] [fit = (s1) (s6) (p1) (p6)] (box0) {};
\node[rectangle,inner sep=0.5em,rounded corners=1pt,draw,fill=blue!20] (model) at ([xshift=3.5em]box0.east){{模型}}; \node[anchor=east] (model) at ([xshift=2em]box0.east){{}};
% big batch % big batch
\node [anchor=west,snode] (sbi1) at ([xshift=3em,yshift=6em]model.east) {\tiny{}}; \node [anchor=west,snode] (sbi1) at ([xshift=3.5em,yshift=6.7em]model.east) {\tiny{}};
\node [anchor=north west,snode,minimum width=6.3em] (sbi2) at ([yshift=-0.3em]sbi1.south west) {\tiny{}}; \node [anchor=north west,snode,minimum width=6.3em] (sbi2) at ([yshift=-0.3em]sbi1.south west) {\tiny{}};
\node [anchor=north west,snode,minimum width=2em] (sbi3) at ([yshift=-0.3em]sbi2.south west) {\tiny{}}; \node [anchor=north west,snode,minimum width=2em] (sbi3) at ([yshift=-0.3em]sbi2.south west) {\tiny{}};
\node [anchor=north west,snode,minimum width=5.5em] (sbi4) at ([yshift=-0.3em]sbi3.south west) {\tiny{}}; \node [anchor=north west,snode,minimum width=5.5em] (sbi4) at ([yshift=-0.3em]sbi3.south west) {\tiny{}};
\node [anchor=north west,snode,minimum width=5.8em] (sbi5) at ([yshift=-0.3em]sbi4.south west) {\tiny{}}; \node [anchor=north west,snode,minimum width=5.8em] (sbi5) at ([yshift=-0.3em]sbi4.south west) {\tiny{}};
\node [anchor=north west,snode,minimum width=3em] (sbi6) at ([yshift=-0.3em]sbi5.south west) {\tiny{}}; \node [anchor=north west,snode,minimum width=3em] (sbi6) at ([yshift=-0.3em]sbi5.south west) {\tiny{}};
\node [anchor=south] (label2) at ([xshift=0.3em,yshift=-3em]sbi5.south) {\footnotesize{批次1}};
\node [anchor=east] (label1) at ([xshift=-0.8em,yshift=-1em]sbi1.west) {{大批次}}; \node [anchor=west] (label12) at ([xshift=-2.3em,yshift=-0.2em]sbi3.west) {{}};
\node [anchor=north] (label11) at ([yshift=1.1em]label12.north) {{}};
\node [anchor=south] (label13) at ([yshift=-1.1em]label12.south) {{}};
\node [anchor=west,pnode,minimum width=3em] (pbi1) at ([xshift=0.3em]sbi1.east) {\tiny{}}; \node [anchor=west,pnode,minimum width=3em] (pbi1) at ([xshift=0.3em]sbi1.east) {\tiny{}};
\node [anchor=west,pnode,minimum width=4em] (pbi3) at ([xshift=0.3em]sbi3.east) {\tiny{}}; \node [anchor=west,pnode,minimum width=4em] (pbi3) at ([xshift=0.3em]sbi3.east) {\tiny{}};
\node [anchor=west,pnode,minimum width=0.5em] (pbi4) at ([xshift=0.3em]sbi4.east) {\tiny{}}; \node [anchor=west,pnode,minimum width=0.5em] (pbi4) at ([xshift=0.3em]sbi4.east) {\tiny{}};
...@@ -39,11 +44,15 @@ ...@@ -39,11 +44,15 @@
\node [rectangle,inner sep=0.5em,rounded corners=2pt,very thick,dotted,draw=ugreen!80] [fit = (sbi1) (sbi6) (pbi1) (pbi6)] (box1) {}; \node [rectangle,inner sep=0.5em,rounded corners=2pt,very thick,dotted,draw=ugreen!80] [fit = (sbi1) (sbi6) (pbi1) (pbi6)] (box1) {};
% small batch % small batch
\node [anchor=west,snode,minimum width=5.5em] (sma1) at ([xshift=3em,yshift=-3em]model.east) {\tiny{}}; \node [anchor=west,snode,minimum width=5.5em] (sma1) at ([xshift=3.5em,yshift=-3.7em]model.east) {\tiny{}};
\node [anchor=north west,snode,minimum width=5.8em] (sma2) at ([yshift=-0.3em]sma1.south west) {\tiny{}}; \node [anchor=north west,snode,minimum width=5.8em] (sma2) at ([yshift=-0.3em]sma1.south west) {\tiny{}};
\node [anchor=north west,snode,minimum width=6.3em] (sma3) at ([yshift=-0.3em]sma2.south west) {\tiny{}}; \node [anchor=north west,snode,minimum width=6.3em] (sma3) at ([yshift=-0.3em]sma2.south west) {\tiny{}};
\node [anchor=south] (label3) at ([xshift=0.3em,yshift=-3em]sma2.south) {\footnotesize{批次1}};
\node [anchor=south] (label5) at ([xshift=2.5em,yshift=-1.8em]label3.south) {{Transformer模型处理}};
\node [anchor=east] (label1) at ([xshift=-0.8em,yshift=-2em]sma1.west) {{小批次}}; \node [anchor=west] (label22) at ([xshift=-2.3em]sma2.west) {{}};
\node [anchor=north] (label21) at ([yshift=1.1em]label22.north) {{}};
\node [anchor=south] (label23) at ([yshift=-1.1em]label22.south) {{}};
\node [anchor=west,pnode,minimum width=0.5em] (pma1) at ([xshift=0.3em]sma1.east) {\tiny{}}; \node [anchor=west,pnode,minimum width=0.5em] (pma1) at ([xshift=0.3em]sma1.east) {\tiny{}};
\node [anchor=west,pnode,minimum width=0.2em] (pma2) at ([xshift=0.3em]sma2.east) {\tiny{}}; \node [anchor=west,pnode,minimum width=0.2em] (pma2) at ([xshift=0.3em]sma2.east) {\tiny{}};
...@@ -53,15 +62,18 @@ ...@@ -53,15 +62,18 @@
\node [anchor=west,snode,minimum width=2em] (sma4) at ([xshift=3.5em,yshift=0em]sma1.east) {\tiny{}}; \node [anchor=west,snode,minimum width=2em] (sma4) at ([xshift=3.5em,yshift=0em]sma1.east) {\tiny{}};
\node [anchor=north west,snode,minimum width=3em] (sma5) at ([yshift=-0.3em]sma4.south west) {\tiny{}}; \node [anchor=north west,snode,minimum width=3em] (sma5) at ([yshift=-0.3em]sma4.south west) {\tiny{}};
\node [anchor=north west,snode,minimum width=3em] (sma6) at ([yshift=-0.3em]sma5.south west) {\tiny{}}; \node [anchor=north west,snode,minimum width=3em] (sma6) at ([yshift=-0.3em]sma5.south west) {\tiny{}};
\node [anchor=south] (label4) at ([yshift=-3em]sma5.south) {\footnotesize{批次2}};
\node [anchor=west,pnode,minimum width=0.7em] (pma4) at ([xshift=0.3em]sma4.east) {\tiny{}}; \node [anchor=west,pnode,minimum width=0.7em] (pma4) at ([xshift=0.3em]sma4.east) {\tiny{}};
\node [rectangle,inner sep=0.5em,rounded corners=2pt,very thick,dotted,draw=ugreen!80] [fit = (sma4) (sma6) (pma4)] (box3) {}; \node [rectangle,inner sep=0.5em,rounded corners=2pt,very thick,dotted,draw=ugreen!80] [fit = (sma4) (sma6) (pma4)] (box3) {};
\draw [->,very thick] (box0.east) -- (model.west); \draw [->,very thick] (box0.east) -- ([xshift=0.3em]model.west);
\draw [->,thick] (model.east) .. controls +(east:0.5) and +(west:0.5) .. ([xshift=-1em]box1.west); \draw [->,thick] (model.east) .. controls +(east:0.5) and +(west:0.5) .. ([xshift=-1.8em]box1.west);
\draw [->,thick] (model.east) .. controls +(east:0.5) and +(west:0.5) .. ([xshift=-1em]box2.west); \draw [->,thick] (model.east) .. controls +(east:0.5) and +(west:0.5) .. ([xshift=-1.8em]box2.west);
\draw [->,very thick] (box2.east) -- (box3.west); \draw [-,very thick] ([xshift=0.3em]box2.east) -- ([xshift=-0.3em]box3.west);
\draw [-,very thick] ([xshift=0.782em,yshift=0.5em]box2.east) -- ([xshift=0.782em,yshift=-0.5em]box2.east);
%%%%% %%%%%
\node [] (t10) at ([yshift=1.5em]box1.north) {$t_1$}; \node [] (t10) at ([yshift=1.5em]box1.north) {$t_1$};
......
...@@ -5,19 +5,27 @@ ...@@ -5,19 +5,27 @@
width=8cm, width=8cm,
height=5cm, height=5cm,
yticklabel style={/pgf/number format/.cd,fixed,precision=2}, yticklabel style={/pgf/number format/.cd,fixed,precision=2},
xticklabel style={/pgf/number format/.cd,fixed,precision=2}, xticklabel style={color=white},,
xlabel={\footnotesize{搜索束大小(取log)}},ylabel={\footnotesize{BLEU\ (\%)}}, xlabel={\footnotesize{搜索束大小(取$\log$}},ylabel={\footnotesize{BLEU\ \%}},
ymin=28.8,ymax=30.4, ymin=28.8,ymax=30.4,
xmin=0,xmax=7, xmin=0,xmax=7,
xtick={0,1,2,3,4,5,6,7}, xtick={0,1,2.32,3.32,4.91,6.64},
ytick={28.8,29.0,29.2,29.4,29.6,29.8,30.0,30.2,30.4}, ytick={28.8,29.0,29.2,29.4,29.6,29.8,30.0,30.2,30.4},
xticklabels={0,1,2,3,4,5,6,7}, xticklabels={0,1,2,3,4,5,6,7},
yticklabels={28.8,29.0,29.2,29.4,29.6,29.8,30.0,30.2,30.4}, yticklabels={28.8,29.0,29.2,29.4,29.6,29.8,30.0,30.2,30.4},
legend style={yshift=-5em,xshift=0em,legend cell align=left,legend plot pos=right} legend style={yshift=-5em,xshift=0em,legend cell align=left,legend plot pos=right}
] ]
\addplot[purple,mark=square,mark=star,very thick] coordinates {(0,29.3) (1,29.7) (1.58,30.05) (2.32,30.1) (2.73,30.2) (3.32,30.3) (3.84,30.2) (4.23,30.08) (4.91,29.98) (5.81,29.6)(6.64,28.8) }; \addplot[purple,mark=square,mark=star,very thick] coordinates {(0,29.3) (1,29.7) (1.58,30.05) (2.32,30.1) (3,30.2) (3.32,30.3) (3.9,30.2) (4.32,30.08) (4.91,29.98) (5.91,29.6)(6.64,28.8) };
\end{axis} \end{axis}
\node[inner sep=0pt] at (0,-1em) {$\log$1};
\node[inner sep=0pt] at (1,-1em) {$\log$2};
%\node[inner sep=0pt] at (1.58,-1em) {$\log$3};
\node[inner sep=0pt] at (2.15,-1em) {$\log$5};
\node[inner sep=0pt] at (3.05,-1em) {$\log$10};
\node[inner sep=0pt] at (4.45,-1em) {$\log$30};
\node[inner sep=0pt] at (6,-1em) {$\log$100};
} }
\end{tikzpicture} \end{tikzpicture}
......
...@@ -6,29 +6,29 @@ ...@@ -6,29 +6,29 @@
\subfigure[\small{假设选择}] \subfigure[\small{假设选择}]
{ {
\begin{tikzpicture}[scale=0.5] \begin{tikzpicture}[scale=0.5]
\tikzstyle{system} = [rectangle,very thick,minimum width=1cm,font=\tiny]; \tikzstyle{system} = [rectangle,thick,minimum width=1.2cm,font=\scriptsize];
\tikzstyle{output} = [rectangle,very thick,rounded corners=3pt,minimum width=1cm,align=center,font=\tiny]; \tikzstyle{output} = [rectangle,thick,rounded corners=3pt,minimum width=1.2cm,align=center,font=\scriptsize];
\begin{scope} \begin{scope}
\node [system,draw=orange!70,text=orange] (model3) at (0,0) {模型 $3$}; \node [system,fill=orange!20,draw] (model3) at (0,0) {模型 $3$};
\node [system,draw=ugreen!70,text=ugreen,anchor=south] (model2) at ([yshift=0.3cm]model3.north) {模型 $2$}; \node [system,fill=ugreen!20,draw,anchor=south] (model2) at ([yshift=0.5cm]model3.north) {模型 $2$};
\node [system,draw=red!70,text=red,anchor=south] (model1) at ([yshift=0.3cm]model2.north) {模型 $1$}; \node [system,fill=red!20,draw,anchor=south] (model1) at ([yshift=0.5cm]model2.north) {模型 $1$};
\node [output,draw=orange!70,text=orange,anchor=west] (output3) at ([xshift=0.5cm]model3.east) {输出 $3$}; \node [output,fill=orange!20,draw,anchor=west] (output3) at ([xshift=0.8cm]model3.east) {输出 $3$};
\node [output,draw=ugreen!70,text=ugreen,anchor=west] (output2) at ([xshift=0.5cm]model2.east) {输出 $2$}; \node [output,fill=ugreen!20,draw,anchor=west] (output2) at ([xshift=0.8cm]model2.east) {输出 $2$};
\node [output,draw=red!70,text=red,anchor=west] (output1) at ([xshift=0.5cm]model1.east) {输出 $1$}; \node [output,fill=red!20,draw,anchor=west] (output1) at ([xshift=0.8cm]model1.east) {输出 $1$};
\begin{pgfonlayer}{background} \begin{pgfonlayer}{background}
\node [draw,thick,dashed,rounded corners=3pt,inner sep=2pt,fit=(output1) (output2) (output3)] (output) {}; \node [draw,thick,dashed,rounded corners=3pt,inner sep=3pt,fit=(output1) (output2) (output3)] (output) {};
\end{pgfonlayer} \end{pgfonlayer}
\node [output,draw=cocoabrown!70,text=cocoabrown,minimum width=1cm,right=1cm of output] (final) {最终\\输出}; \node [output,fill=cocoabrown!20,draw,minimum width=1.2cm,right=1cm of output] (final) {最终\\输出};
\draw [->,very thick] (model1) to (output1); \draw [->,very thick] (model1) to (output1);
\draw [->,very thick] (model2) to (output2); \draw [->,very thick] (model2) to (output2);
\draw [->,very thick] (model3) to (output3); \draw [->,very thick] (model3) to (output3);
\draw [->,very thick] (output) to node [above,pos=0.5,font=\tiny] {选择} (final); \draw [->,very thick] (output) to node [above,pos=0.5,font=\scriptsize] {选择} (final);
\end{scope} \end{scope}
\end{tikzpicture} \end{tikzpicture}
} }
...@@ -36,23 +36,23 @@ ...@@ -36,23 +36,23 @@
\subfigure[\small{预测融合}] \subfigure[\small{预测融合}]
{ {
\begin{tikzpicture}[scale=0.5] \begin{tikzpicture}[scale=0.5]
\tikzstyle{system} = [rectangle,very thick,minimum width=1cm,font=\tiny]; \tikzstyle{system} = [rectangle,thick,minimum width=1.2cm,font=\scriptsize];
\tikzstyle{output} = [rectangle,very thick,rounded corners=3pt,minimum width=1cm,align=center,font=\tiny]; \tikzstyle{output} = [rectangle,thick,rounded corners=3pt,minimum width=1.2cm,align=center,font=\scriptsize];
\begin{scope} \begin{scope}
\node [system,draw=orange!70,text=orange] (model3) at (0,0) {模型 $3$}; \node [system,fill=orange!20,draw] (model3) at (0,0) {模型 $3$};
\node [system,draw=ugreen!70,text=ugreen,anchor=south] (model2) at ([yshift=0.3cm]model3.north) {模型 $2$}; \node [system,fill=ugreen!20,draw,anchor=south] (model2) at ([yshift=0.5cm]model3.north) {模型 $2$};
\node [system,draw=red!70,text=red,anchor=south] (model1) at ([yshift=0.3cm]model2.north) {模型 $1$}; \node [system,fill=red!20,draw,anchor=south] (model1) at ([yshift=0.5cm]model2.north) {模型 $1$};
\begin{pgfonlayer}{background} \begin{pgfonlayer}{background}
\node [draw,thick,dashed,inner sep=2pt,fit=(model3) (model2) (model1)] (ensemble) {}; \node [draw,thick,dashed,inner sep=3pt,fit=(model3) (model2) (model1)] (ensemble) {};
\end{pgfonlayer} \end{pgfonlayer}
\node [system,draw=ugreen!70,text=ugreen,right=1cm of ensemble] (model) {模型}; \node [system,fill=ugreen!20,draw,right=1cm of ensemble] (model) {模型};
\node [output,draw=cocoabrown!70,text=cocoabrown,minimum width=1cm,anchor=west] (final) at ([xshift=0.5cm]model.east) {最终\\输出}; \node [output,fill=cocoabrown!20,draw,minimum width=1.2cm,anchor=west] (final) at ([xshift=0.8cm]model.east) {最终\\输出};
\draw [->,very thick] (ensemble) to node [above,pos=0.5,font=\tiny] {融合} (model); \draw [->,very thick] (ensemble) to node [above,pos=0.5,font=\scriptsize] {融合} (model);
\draw [->,very thick] (model) to (final); \draw [->,very thick] (model) to (final);
\end{scope} \end{scope}
...@@ -63,25 +63,25 @@ ...@@ -63,25 +63,25 @@
\subfigure[\small{译文重组}] \subfigure[\small{译文重组}]
{ {
\begin{tikzpicture}[scale=0.5] \begin{tikzpicture}[scale=0.5]
\tikzstyle{system} = [rectangle,very thick,minimum width=1cm,font=\tiny]; \tikzstyle{system} = [rectangle,thick,minimum width=1.2cm,font=\scriptsize];
\tikzstyle{output} = [rectangle,very thick,rounded corners=3pt,minimum width=1cm,align=center,font=\tiny]; \tikzstyle{output} = [rectangle,thick,rounded corners=3pt,minimum width=1.2cm,align=center,font=\scriptsize];
\tikzstyle{dot} = [circle,fill=blue!40!white,minimum size=5pt,inner sep=0pt]; \tikzstyle{dot} = [circle,fill=blue!40!white,minimum size=5pt,inner sep=0pt];
\begin{scope} \begin{scope}
\node [system,draw=orange!70,text=orange] (model3) at (0,0) {模型 $3$}; \node [system,fill=orange!20,draw] (model3) at (0,0) {模型 $3$};
\node [system,draw=ugreen!70,text=ugreen,anchor=south] (model2) at ([yshift=0.3cm]model3.north) {模型 $2$}; \node [system,fill=ugreen!20,draw,anchor=south] (model2) at ([yshift=0.5cm]model3.north) {模型 $2$};
\node [system,draw=red!70,text=red,anchor=south] (model1) at ([yshift=0.3cm]model2.north) {模型 $1$}; \node [system,fill=red!20,draw,anchor=south] (model1) at ([yshift=0.5cm]model2.north) {模型 $1$};
\node [output,draw=orange!70,text=orange,anchor=west] (output3) at ([xshift=0.5cm]model3.east) {输出 $3$}; \node [output,fill=orange!20,draw,anchor=west] (output3) at ([xshift=0.8cm]model3.east) {输出 $3$};
\node [output,draw=ugreen!70,text=ugreen,anchor=west] (output2) at ([xshift=0.5cm]model2.east) {输出 $2$}; \node [output,fill=ugreen!20,draw,anchor=west] (output2) at ([xshift=0.8cm]model2.east) {输出 $2$};
\node [output,draw=red!70,text=red,anchor=west] (output1) at ([xshift=0.5cm]model1.east) {输出 $1$}; \node [output,fill=red!20,draw,anchor=west] (output1) at ([xshift=0.8cm]model1.east) {输出 $1$};
\draw [->,very thick] (model1) to (output1); \draw [->,very thick] (model1) to (output1);
\draw [->,very thick] (model2) to (output2); \draw [->,very thick] (model2) to (output2);
\draw [->,very thick] (model3) to (output3); \draw [->,very thick] (model3) to (output3);
\begin{pgfonlayer}{background} \begin{pgfonlayer}{background}
\node [draw,thick,dashed,rounded corners=3pt,inner sep=2pt,fit=(output1) (output2) (output3)] (output) {}; \node [draw,thick,dashed,rounded corners=3pt,inner sep=3pt,fit=(output1) (output2) (output3)] (output) {};
\end{pgfonlayer} \end{pgfonlayer}
\node [dot,anchor=west] (lattice1) at ([shift={(1.5cm,0.5cm)}]output2.east) {}; \node [dot,anchor=west] (lattice1) at ([shift={(1.5cm,0.5cm)}]output2.east) {};
...@@ -98,14 +98,14 @@ ...@@ -98,14 +98,14 @@
\draw [-latex,blue] (lattice5) to [out=-60,in=-90] (lattice3); \draw [-latex,blue] (lattice5) to [out=-60,in=-90] (lattice3);
\begin{pgfonlayer}{background} \begin{pgfonlayer}{background}
\node [draw=blue,fill=white,drop shadow,thick,rounded corners=3pt,inner sep=5pt,fit=(lattice1) (lattice2) (lattice3) (lattice4) (lattice5),label={[font=\tiny,label distance=0pt]90:词格}] (lattice) {}; \node [fill=blue,fill=white,drop shadow,thick,rounded corners=3pt,inner sep=5pt,fit=(lattice1) (lattice2) (lattice3) (lattice4) (lattice5),label={[font=\scriptsize,label distance=0pt]90:词格}] (lattice) {};
\end{pgfonlayer} \end{pgfonlayer}
\draw [->,very thick] (output) to (lattice); \draw [->,very thick] (output) to (lattice);
\node [system,draw=purple,text=purple,anchor=west] (model) at ([xshift=5.3cm]output1.east) {模型}; \node [system,fill=purple!20,draw,anchor=west] (model) at ([xshift=5.3cm]output1.east) {模型};
\node [output,draw=cocoabrown!70,text=cocoabrown,minimum width=1cm,right=1.3cm of lattice] (final) {最终输出}; \node [output,fill=cocoabrown!20,draw,minimum width=1.2cm,right=1.5cm of lattice] (final) {最终输出};
\draw [->,very thick] (model) |- (final); \draw [->,very thick] (model) |- (final);
\draw [->,very thick] (lattice) -- (final); \draw [->,very thick] (lattice) -- (final);
......
\definecolor{cocoabrown}{rgb}{0.82, 0.41, 0.12} \definecolor{cocoabrown}{rgb}{0.82, 0.41, 0.12}
\begin{tikzpicture} \begin{tikzpicture}
\tikzstyle{system} = [rectangle,very thick,minimum width=1.5cm,font=\scriptsize]; \tikzstyle{system} = [rectangle,thick,minimum width=1.2cm,minimum height=0.6cm,font=\scriptsize];
\tikzstyle{output} = [rectangle,very thick,rounded corners=3pt,minimum width=1.5cm,align=center,font=\scriptsize]; \tikzstyle{output} = [rectangle,thick,rounded corners=3pt,minimum width=1.2cm,align=center,font=\scriptsize];
\begin{scope}[local bounding box=MULTIPLE] \begin{scope}[local bounding box=MULTIPLE]
\node [system,draw=orange!70,text=orange] (engine3) at (0,0) {系统 $n$}; \node [system,fill=orange!20,draw] (engine3) at (0,0) {系统 $n$};
\node [system,draw=ugreen!70,text=ugreen,anchor=south] (engine2) at ([yshift=0.6cm]engine3.north) {系统 $2$}; \node [system,fill=ugreen!20,draw,anchor=south] (engine2) at ([yshift=0.6cm]engine3.north) {系统 $2$};
\node [system,draw=red!70,text=red,anchor=south] (engine1) at ([yshift=0.3cm]engine2.north) {系统 $1$}; \node [system,fill=red!20,draw,anchor=south] (engine1) at ([yshift=0.3cm]engine2.north) {系统 $1$};
\node [output,draw=orange!70,text=orange,anchor=west] (output3) at ([xshift=0.5cm]engine3.east) {输出 $n$}; \node [output,fill=orange!20,draw,anchor=west] (output3) at ([xshift=0.5cm]engine3.east) {输出 $n$};
\node [output,draw=ugreen!70,text=ugreen,anchor=west] (output2) at ([xshift=0.5cm]engine2.east) {输出 $2$}; \node [output,fill=ugreen!20,draw,anchor=west] (output2) at ([xshift=0.5cm]engine2.east) {输出 $2$};
\node [output,draw=red!70,text=red,anchor=west] (output1) at ([xshift=0.5cm]engine1.east) {输出 $1$}; \node [output,fill=red!20,draw,anchor=west] (output1) at ([xshift=0.5cm]engine1.east) {输出 $1$};
\draw [very thick,decorate,decoration={brace}] ([xshift=3pt]output1.north east) to node [midway,name=final] {} ([xshift=3pt]output3.south east); \draw [very thick,decorate,decoration={brace}] ([xshift=3pt]output1.north east) to node [midway,name=final] {} ([xshift=3pt]output3.south east);
\node [output,draw=cocoabrown!70,text=cocoabrown,minimum width=1cm,right=0pt of final,minimum height=2.5em] () {最终\\输出}; \node [output,fill=cocoabrown!20,draw,minimum width=1cm,right=3pt of final,minimum height=2.5em] () {最终\\输出};
\draw [->,very thick] (engine1) to (output1); \draw [->,very thick] (engine1) to (output1);
\draw [->,very thick] (engine2) to (output2); \draw [->,very thick] (engine2) to (output2);
...@@ -25,15 +25,15 @@ ...@@ -25,15 +25,15 @@
\end{scope} \end{scope}
\begin{scope}[local bounding box=SINGLE] \begin{scope}[local bounding box=SINGLE]
\node [output,draw=ugreen!70,text=ugreen,anchor=west] (output3) at ([xshift=4cm]output3.east) {输出 $n$}; \node [output,fill=ugreen!20,draw,anchor=west] (output3) at ([xshift=4cm]output3.east) {输出 $n$};
\node [output,draw=ugreen!70,text=ugreen,anchor=west] (output2) at ([xshift=4cm]output2.east) {输出 $2$}; \node [output,fill=ugreen!20,draw,anchor=west] (output2) at ([xshift=4cm]output2.east) {输出 $2$};
\node [output,draw=ugreen!70,text=ugreen,anchor=west] (output1) at ([xshift=4cm]output1.east) {输出 $1$}; \node [output,fill=ugreen!20,draw,anchor=west] (output1) at ([xshift=4cm]output1.east) {输出 $1$};
\node [system,draw=ugreen!70,text=ugreen,anchor=east,align=center,inner sep=1.9pt] (engine) at ([xshift=-0.5cm]output2.west) {单系统}; \node [system,fill=ugreen!20,draw,anchor=east,align=center,inner sep=1.9pt] (engine) at ([xshift=-0.5cm]output2.west) {单系统};
\draw [very thick,decorate,decoration={brace}] ([xshift=3pt]output1.north east) to node [midway,name=final] {} ([xshift=3pt]output3.south east); \draw [very thick,decorate,decoration={brace}] ([xshift=3pt]output1.north east) to node [midway,name=final] {} ([xshift=3pt]output3.south east);
\node [output,draw=cocoabrown!70,text=cocoabrown,minimum width=1cm,right=0pt of final,minimum height=2.5em] () {最终\\输出}; \node [output,fill=cocoabrown!20,draw,minimum width=1cm,right=3pt of final,minimum height=2.5em] () {最终\\输出};
\draw [->,very thick] (engine.east) to (output1.west); \draw [->,very thick] (engine.east) to (output1.west);
\draw [->,very thick] (engine.east) to (output2.west); \draw [->,very thick] (engine.east) to (output2.west);
......
...@@ -5,37 +5,44 @@ ...@@ -5,37 +5,44 @@
\tikzstyle{er} = [rectangle,minimum width=2.5cm,minimum height=1.5cm,rounded corners,text centered,draw,drop shadow] \tikzstyle{er} = [rectangle,minimum width=2.5cm,minimum height=1.5cm,rounded corners,text centered,draw,drop shadow]
\begin{tikzpicture}[node distance = 0,scale = 0.75] \begin{tikzpicture}[node distance = 0,scale = 0.75]
\tikzstyle{every node}=[scale=0.75] \tikzstyle{every node}=[scale=0.75]
\node (encoder)[er,thick,draw,fill=ugreen!20]{\Large{编码器}}; \node (encoder)[er,thick,draw,fill=red!20,minimum width=2.8cm]{\Large{编码器}};
\node (decoder_1)[er,thick,draw,right of=encoder,xshift=4cm,fill=red!20]{\Large{解码器}}; \node (lenpre)[er,anchor=north,thick,draw,fill=yellow!20,minimum height=0.8cm] at ([yshift=1.5cm]encoder.north){\Large{长度预测器}};
\node (decoder_2)[er,thick,draw,right of=decoder_1,xshift=4cm,fill=red!20]{\Large{解码器}}; \node (decoder_1)[er,thick,draw,right of=encoder,xshift=5cm,fill=blue!20]{\Large{解码器}};
\node (point)[right of=decoder_2,xshift=2.5cm,]{\LARGE{...}}; \node (decoder_2)[er,thick,draw,right of=decoder_1,xshift=3.7cm,fill=blue!20]{\Large{解码器}};
\node (decoder_3)[er,thick,draw,right of=point,xshift=2.5cm,fill=red!20]{\Large{解码器}}; \node (point)[right of=decoder_2,xshift=3cm,]{\LARGE{...}};
\draw [->,very thick,draw=black!70]([xshift=0.2cm]encoder.east) -- ([xshift=-0.2cm]decoder_1.west); \node (decoder_3)[er,thick,draw,right of=point,xshift=3cm,fill=blue!20]{\Large{解码器}};
%\draw [->,very thick,draw=black!70]([xshift=0.2cm]decoder_1.east) -- ([xshift=-0.2cm]decoder_2.west); \draw [->,very thick,draw=black!70]([xshift=0cm]encoder.east) -- ([xshift=-0cm]decoder_1.west);
%\draw [->,very thick,draw=black!70]([xshift=0.2cm]decoder_2.east) -- ([xshift=-0.1cm]point.west);
%\draw [->,very thick,draw=black!70]([xshift=0.1cm]point.east) -- ([xshift=-0.2cm]decoder_3.west); \draw [->,very thick,draw=black!70]([xshift=0,yshift=-1cm]encoder.south) -- ([xshift=0]encoder.south);
\draw [->,very thick,draw=black!70](encoder.north) -- (lenpre.south);
\draw [->,very thick,draw=black!70]([xshift=0,yshift=-1cm]encoder.south) -- ([xshift=0,yshift=-0.2cm]encoder.south);
\draw [->,very thick,draw=black!70]([xshift=0,yshift=0.2cm]encoder.north) -- ([xshift=0,yshift=1cm]encoder.north);
\node [below of = encoder,xshift=0cm,yshift=2.2cm]{预测目标长度};
\node [below of = encoder,xshift=0cm,yshift=-2.2cm]{\Large$\seq{x}$}; \node [below of = encoder,xshift=0cm,yshift=-2.2cm]{\Large$\seq{x}$};
\draw [->,very thick,draw=black!70]([xshift=0,yshift=-1cm]decoder_1.south) -- ([xshift=0,yshift=-0.2cm]decoder_1.south); \draw [->,very thick,draw=black!70]([xshift=0,yshift=-1cm]decoder_1.south) -- ([xshift=0]decoder_1.south);
\draw [->,very thick,draw=black!70]([xshift=0,yshift=0.2cm]decoder_1.north) -- ([xshift=0,yshift=1cm]decoder_1.north); \draw [->,very thick,draw=black!70]([xshift=0]decoder_1.north) -- ([xshift=0,yshift=1cm]decoder_1.north);
\node [below of = decoder_1,xshift=0cm,yshift=-2.2cm]{\Large$\seq{x'}$}; \node (d1x) [below of = decoder_1,xshift=0cm,yshift=-2.2cm]{\Large$\seq{x'}$};
\draw [-,very thick,draw=black!70] (lenpre.east) --([xshift=1.26cm]lenpre.east);
\draw [-,very thick,draw=black!70,dashed] ([xshift=1.26cm]lenpre.east) -- ([xshift=-2cm]d1x.west);
\draw [->,very thick,draw=black!70] ([xshift=-2cm]d1x.west) -- ([xshift=0cm]d1x.west);
\node (line1_1)[below of = decoder_1,xshift=0cm,yshift=2.2cm]{\Large$\seq{y}^{[1]}$}; \node (line1_1)[below of = decoder_1,xshift=0cm,yshift=2.2cm]{\Large$\seq{y}^{[1]}$};
\draw [->,thick,]([xshift=0,yshift=-1cm]decoder_2.south) -- ([xshift=0,yshift=-0.2cm]decoder_2.south); \draw [->,thick,]([xshift=0,yshift=-1cm]decoder_2.south) -- ([xshift=0]decoder_2.south);
\draw [->,very thick,draw=black!70]([xshift=0,yshift=0.2cm]decoder_2.north) -- ([xshift=0,yshift=1cm]decoder_2.north); \draw [->,very thick,draw=black!70]([xshift=0]decoder_2.north) -- ([xshift=0,yshift=1cm]decoder_2.north);
\node (line1_2)[below of = decoder_2,xshift=0cm,yshift=-2.2cm]{\Large$\seq{y}^{[1]}$}; \node (line1_2)[below of = decoder_2,xshift=0cm,yshift=-2.2cm]{\Large$\seq{y}^{[1]}$};
\node [below of = decoder_2,xshift=0cm,yshift=2.2cm]{\Large$\seq{y}^{[2]}$}; \node (line2_1)[below of = decoder_2,xshift=0cm,yshift=2.2cm]{\Large$\seq{y}^{[2]}$};
\node (line2_2)[below of = point,xshift=0cm,yshift=-2.2cm]{};
\node (line3_1)[below of = point,xshift=0cm,yshift=2.2cm]{};
\draw [->,very thick,draw=black!70]([xshift=0,yshift=-1cm]decoder_3.south) -- ([xshift=0,yshift=-0.2cm]decoder_3.south); \draw [->,very thick,draw=black!70]([xshift=0,yshift=-1cm]decoder_3.south) -- ([xshift=0]decoder_3.south);
\draw [->,very thick,draw=black!70]([xshift=0,yshift=0.2cm]decoder_3.north) -- ([xshift=0,yshift=1cm]decoder_3.north); \draw [->,very thick,draw=black!70]([xshift=0]decoder_3.north) -- ([xshift=0,yshift=1cm]decoder_3.north);
\node (line3_2)[below of = decoder_3,xshift=0cm,yshift=-2.2cm]{\Large$\seq{y}^{[N-1]}$}; \node (line3_2)[below of = decoder_3,xshift=0cm,yshift=-2.2cm]{\Large$\seq{y}^{[N-1]}$};
\node [below of = decoder_3,xshift=0cm,yshift=2.2cm]{\Large$\seq{y}^{[N]}$}; \node [below of = decoder_3,xshift=0cm,yshift=2.2cm]{\Large$\seq{y}^{[N]}$};
\draw[->,very thick,draw=black!70, out=0, in=180,dotted] (line1_1.east) to (line1_2.west); \draw[->,very thick,draw=black!70, out=0, in=180,dotted] (line1_1.east) to (line1_2.west);
\draw[->,very thick,draw=black!70, out=0, in=180,dotted] ([xshift=4cm]line1_1.east) to ([xshift=3cm]line1_2.west); \draw[->,very thick,draw=black!70, out=0, in=180,dotted] (line2_1.east) to (line2_2.west);
\draw[->,very thick,draw=black!70, out=0, in=180,dotted] ([xshift=6cm]line1_1.east) to (line3_2.west); \draw[->,very thick,draw=black!70, out=0, in=180,dotted] (line3_1.east) to (line3_2.west);
\draw [->,very thick,draw=black!70] ([xshift=0.5cm]encoder.east) -- ([xshift=0.5cm,yshift=-2.8cm]encoder.east) --([xshift=5.55cm,yshift=-2.8cm]encoder.east) --([xshift=-0.5cm]decoder_2.west) -- (decoder_2.west);
\draw [->,very thick,draw=black!70] ([xshift=5.55cm,yshift=-2.8cm]encoder.east) -- ([xshift=9.45cm,yshift=-2.8cm]encoder.east) --([xshift=-0.5cm]point.west) -- (point.west);
\draw [->,very thick,draw=black!70] ([xshift=9.45cm,yshift=-2.8cm]encoder.east) -- ([xshift=11.55cm,yshift=-2.8cm]encoder.east) -- ([xshift=-0.5cm]decoder_3.west) -- (decoder_3.west);
\end{tikzpicture} \end{tikzpicture}
\ No newline at end of file
...@@ -2,37 +2,33 @@ ...@@ -2,37 +2,33 @@
\tikzstyle{er} = [rectangle,minimum width=7cm,minimum height=2.5cm,text centered,draw,drop shadow,rounded corners] \tikzstyle{er} = [rectangle,minimum width=7cm,minimum height=2.5cm,text centered,draw,drop shadow,rounded corners]
\begin{tikzpicture}[node distance = 0,scale = 0.55] \begin{tikzpicture}[node distance = 0,scale = 0.55]
\tikzstyle{every node}=[scale=0.55] \tikzstyle{every node}=[scale=0.55]
\node (encoder)[er,thick,minimum width=5.5cm,fill=ugreen!20]{\huge{编码器}}; \node (encoder)[er,thick,minimum width=5.5cm,fill=red!20]{\huge{编码器}};
\node (decoder)[er,thick,right of=encoder,xshift=7.75cm,fill=red!20]{\huge{解码器}}; \node (decoder)[er,thick,right of=encoder,xshift=8.75cm,fill=blue!20]{\huge{解码器}};
\node (decoder_1)[er,thick,right of=decoder,xshift=8.75cm,fill=red!20]{\huge{解码器}}; \node (decoder_1)[er,thick,right of=decoder,xshift=8.75cm,fill=blue!20]{\huge{解码器}};
\draw [->,very thick,draw=blue!70]([xshift=0.2cm]encoder.east) -- ([xshift=-0.2cm]decoder.west); \draw [->,very thick,draw=black!70]([xshift=0cm]encoder.east) -- ([xshift=-0cm]decoder.west);
\begin{pgfonlayer}{background}
\draw [->,very thick,draw=blue!70]([xshift=0.2cm,yshift=-0.8em]encoder.east) -- ([xshift=-0.2cm,yshift=-0.8em]decoder_1.west);
\end{pgfonlayer}
\foreach \x in {-2.2cm,-1.1cm,...,2.2cm} \foreach \x in {-2.2cm,-1.1cm,...,2.2cm}
\draw [->,very thick,draw=black!70]([xshift=\x,yshift=-1cm]encoder.south) -- ([xshift=\x,yshift=-0.2cm]encoder.south); \draw [->,very thick,draw=black!70]([xshift=\x,yshift=-1cm]encoder.south) -- ([xshift=\x,yshift=-0.2cm]encoder.south);
\node [below of = encoder,xshift=-2.3cm,yshift=-2.95cm,scale=1.2]{\large{<LEN>}}; \node [below of = encoder,xshift=-2.3cm,yshift=-2.92cm,scale=1.2]{\small{<LEN>}};
\node [below of = encoder,xshift=-1cm,yshift=-2.9cm,scale=1.2]{\large{hello}}; \node [below of = encoder,xshift=-1cm,yshift=-2.9cm,scale=1.2]{\large{Hello}};
\node [below of = encoder,xshift=0cm,yshift=-3.05cm,scale=1.2]{,}; \node [below of = encoder,xshift=0cm,yshift=-3.05cm,scale=1.2]{,};
\node [below of = encoder,xshift=1.1cm,yshift=-2.9cm,scale=1.2]{\large{world}}; \node [below of = encoder,xshift=1.1cm,yshift=-2.9cm,scale=1.2]{\large{world}};
\node [below of = encoder,xshift=2.2cm,yshift=-2.9cm,scale=1.2]{!}; \node [below of = encoder,xshift=2.2cm,yshift=-2.9cm,scale=1.2]{!};
\draw [->,very thick,draw=black!70]([xshift=-2.2cm,yshift=0.2cm]encoder.north) -- ([xshift=-2.2cm,yshift=1cm]encoder.north);
\node [below of = encoder,xshift=-2.2cm,yshift=2.9cm,scale=1.5]{4};
\foreach \x in {-2.7cm,-0.9cm,...,2.8cm} \foreach \x in {-2.7cm,-0.9cm,...,2.8cm}
{\draw [->,very thick,draw=black!70]([xshift=\x,yshift=-1cm]decoder.south) -- ([xshift=\x,yshift=-0.2cm]decoder.south); {\draw [->,very thick,draw=black!70]([xshift=\x,yshift=-1cm]decoder.south) -- ([xshift=\x,yshift=-0.2cm]decoder.south);
\draw [->,very thick,draw=black!70]([xshift=\x,yshift=0.2cm]decoder.north) -- ([xshift=\x,yshift=1cm]decoder.north);} \draw [->,very thick,draw=black!70]([xshift=\x,yshift=0.2cm]decoder.north) -- ([xshift=\x,yshift=1cm]decoder.north);}
\node [below of = decoder,xshift=-3cm,yshift=-2.9cm,scale=1.6]{\small{<Mask>}}; \node (mask_1) [below of = decoder,xshift=-3cm,yshift=-2.9cm,scale=1.6]{\small{<Mask>}};
\node [below of = decoder,xshift=-1cm,yshift=-2.9cm,scale=1.6]{\small{<Mask>}}; \node [below of = decoder,xshift=-1cm,yshift=-2.9cm,scale=1.6]{\small{<Mask>}};
\node [below of = decoder,xshift=1cm,yshift=-2.9cm,scale=1.6]{\small{<Mask>}}; \node [below of = decoder,xshift=1cm,yshift=-2.9cm,scale=1.6]{\small{<Mask>}};
\node [below of = decoder,xshift=3cm,yshift=-2.9cm,scale=1.6]{\small{<Mask>}}; \node [below of = decoder,xshift=3cm,yshift=-2.9cm,scale=1.6]{\small{<Mask>}};
\node [below of = decoder,xshift=-3cm,yshift=2.9cm,scale=1.6]{你好}; \node [below of = decoder,xshift=-3cm,yshift=2.9cm,scale=1.6]{你好};
\node [below of = decoder,xshift=-1cm,yshift=2.7cm,scale=1.6]{}; \node [below of = decoder,xshift=-1cm,yshift=2.7cm,scale=1.6]{};
\node [below of = decoder,xshift=1cm,yshift=2.9cm,scale=1.6]{你好}; \node [below of = decoder,xshift=1cm,yshift=2.9cm,scale=1.6]{你好};
\node [below of = decoder,xshift=2.9cm,yshift=2.9cm,scale=1.6]{}; \node (line1)[below of = decoder,xshift=2.6cm,yshift=2.9cm,scale=1.6]{};
\foreach \x in {-2.7cm,-0.9cm,...,2.8cm} \foreach \x in {-2.7cm,-0.9cm,...,2.8cm}
...@@ -41,12 +37,18 @@ ...@@ -41,12 +37,18 @@
\node [below of = decoder_1,xshift=-2.7cm,yshift=2.9cm,scale=1.6]{你好}; \node [below of = decoder_1,xshift=-2.7cm,yshift=2.9cm,scale=1.6]{你好};
\node [below of = decoder_1,xshift=-0.9cm,yshift=2.7cm,scale=1.6]{}; \node [below of = decoder_1,xshift=-0.9cm,yshift=2.7cm,scale=1.6]{};
\node [below of = decoder_1,xshift=0.9cm,yshift=2.9cm,scale=1.6]{世界}; \node [below of = decoder_1,xshift=0.9cm,yshift=2.9cm,scale=1.6]{世界};
\node [below of = decoder_1,xshift=2.7cm,yshift=2.9cm,scale=1.6]{}; \node [below of = decoder_1,xshift=2.6cm,yshift=2.8cm,scale=1.6]{};
\node [below of = decoder_1,xshift=-2.7cm,yshift=-2.9cm,scale=1.6]{你好}; \node (line2)[below of = decoder_1,xshift=-2.7cm,yshift=-2.9cm,scale=1.6]{你好};
\node [below of = decoder_1,xshift=-0.9cm,yshift=-3cm,scale=1.6]{}; \node [below of = decoder_1,xshift=-0.9cm,yshift=-3cm,scale=1.6]{};
\node [below of = decoder_1,xshift=0.9cm,yshift=-2.9cm,scale=1.6]{\small{<Mask>}}; \node [below of = decoder_1,xshift=0.9cm,yshift=-2.9cm,scale=1.6]{\small{<Mask>}};
\node [below of = decoder_1,xshift=2.7cm,yshift=-2.9cm,scale=1.6]{}; \node [below of = decoder_1,xshift=2.6cm,yshift=-2.8cm,scale=1.6]{};
\draw [-,very thick,draw=black!70]([xshift=-2.2cm]encoder.north) -- ([xshift=-2.2cm,yshift=0.5cm]encoder.north)-- ([xshift=4.1cm,yshift=0.5cm]encoder.north);
\draw [-,very thick,draw=black!70,dashed]([xshift=4.1cm,yshift=0.5cm]encoder.north) -- ([xshift=-0.5cm]mask_1.west);
\draw [->,very thick,draw=black!70]([xshift=-0.5cm]mask_1.west) -- (mask_1.west);
\draw [->,very thick,draw=black!70]([xshift=0.5cm]encoder.east) -- ([xshift=0.5cm,yshift=-3.5cm]encoder.east) -- ([xshift=10.5cm,yshift=-3.5cm]encoder.east) -- ([xshift=-0.72cm]decoder_1.west) -- (decoder_1.west);
\draw [->,very thick,dotted] (line1.east) .. controls +(east:1.2) and +(west:1.2) ..(line2.west);
\node [below of = encoder,xshift=1.2cm,yshift=2.4cm,scale=1.7]{译文长度:4};
\end{tikzpicture} \end{tikzpicture}
\ No newline at end of file
...@@ -4,23 +4,33 @@ ...@@ -4,23 +4,33 @@
%%% outline %%% outline
%------------------------------------------------------------------------- %-------------------------------------------------------------------------
\begin{tikzpicture} \begin{tikzpicture}
\tikzstyle{word} = [font=\scriptsize] \tikzstyle{emb} = [font=\scriptsize,rounded corners=1pt, fill=orange!20, minimum width=1.8em,minimum height=1.5em,draw]
\tikzstyle{po} = [font=\scriptsize,rounded corners=1pt, fill=gray!20, minimum width=1.8em,minimum height=1.5em,draw]
\tikzstyle{tgt} = [minimum height=1.6em,minimum width=5.2em,fill=black!10!yellow!30,font=\footnotesize,drop shadow={shadow xshift=0.15em,shadow yshift=-0.15em,}] \tikzstyle{tgt} = [minimum height=1.6em,minimum width=5.2em,fill=black!10!yellow!30,font=\footnotesize,drop shadow={shadow xshift=0.15em,shadow yshift=-0.15em,}]
\tikzstyle{p} = [fill=ugreen!15,minimum width=0.4em,inner sep=0pt] \tikzstyle{p} = [fill=ugreen!15,minimum width=0.4em,inner sep=0pt]
\node[ rounded corners=3pt, fill=red!20, drop shadow, minimum width=10em,minimum height=4em,draw] (encoder) at (0,0) {Transformer 编码器 }; \node[ rounded corners=3pt, fill=red!20, drop shadow, minimum width=12em,minimum height=4em,draw] (encoder) at (0,0) {编码器};
\node[anchor=west, rounded corners=3pt, fill=blue!20, drop shadow, minimum width=14em,minimum height=4em,draw] (decoder) at ([xshift=0.8cm]encoder.east) {Transformer 解码器}; \node[anchor=north,rounded corners=3pt, fill=yellow!20, drop shadow, minimum width=12em,minimum height=2em,draw] (lenpre) at([yshift=3em]encoder.north){长度预测器};
\node[anchor=north] (lable) at([xshift=3.5em,yshift=2.5em]lenpre.north){译文长度:3};
\node[anchor=north,word] (en1) at ([yshift=-1.3em,xshift=-3em]encoder.south) {}; \node[anchor=west, rounded corners=3pt, fill=blue!20, drop shadow, minimum width=13em,minimum height=4em,draw] (decoder) at ([xshift=1cm]encoder.east) {解码器};
\node[anchor=north,word] (en2) at ([yshift=-1.3em,xshift=-1em]encoder.south) {};
\node[anchor=north,word] (en3) at ([yshift=-1.3em,xshift=1em]encoder.south) {}; \node[anchor=north,emb] (en1) at ([yshift=-1.3em,xshift=-4.5em]encoder.south) {${\mathbi e}$(干)};
\node[anchor=north,word] (en4) at ([yshift=-1.3em,xshift=3em]encoder.south) {}; \node[anchor=north,emb] (en2) at ([yshift=-1.3em,xshift=-1.5em]encoder.south) {${\mathbi e}$(得)};
\node[anchor=north,emb] (en3) at ([yshift=-1.3em,xshift=1.5em]encoder.south) {${\mathbi e}$(好)};
\node[anchor=north,emb] (en4) at ([yshift=-1.3em,xshift=4.5em]encoder.south) {${\mathbi e}$(!)};
\node[anchor=north,po] (po1) at ([yshift=-1.4em]en1.south) {PE(1)};
\node[anchor=north,po] (po2) at ([yshift=-1.4em]en2.south) {PE(2)};
\node[anchor=north,po] (po3) at ([yshift=-1.4em]en3.south) {PE(3)};
\node[anchor=north,po] (po4) at ([yshift=-1.4em]en4.south) {PE(4)};
\foreach \x in {1,2,3,4}{
\node [anchor=north] (plus\x) at ([yshift=-0.04em]en\x.south) {\large{\textbf{$\oplus$}}};
}
\node[anchor=north,word] (de1) at ([yshift=-1.3em,xshift=-5.2em]decoder.south) {1}; \node[anchor=north,po] (de1) at ([yshift=-1.3em,xshift=-4.5em]decoder.south) {PE(1)};
\node[anchor=north,word] (de2) at ([yshift=-1.3em]decoder.south) {2}; \node[anchor=north,po] (de2) at ([yshift=-1.3em]decoder.south) {PE(2)};
\node[anchor=north,word] (de3) at ([yshift=-1.3em,xshift=5em]decoder.south) {3}; \node[anchor=north,po] (de3) at ([yshift=-1.3em,xshift=4.5em]decoder.south) {PE(3};
\node[rounded corners=3pt, minimum width=12em,minimum height=2em,draw,dashed,very thick] (box0) at ([yshift=-2.05em]decoder.south) {};
\node[p,anchor=south, minimum height=0.5em] (w1_1) at ([xshift=-7em,yshift=1.5em]decoder.north){}; \node[p,anchor=south, minimum height=0.5em] (w1_1) at ([xshift=-6.5em,yshift=1.5em]decoder.north){};
\node[p,anchor=south,minimum height=2em] (w1_2) at ([xshift=0.3em]w1_1.south east){}; \node[p,anchor=south,minimum height=2em] (w1_2) at ([xshift=0.3em]w1_1.south east){};
\node[p,anchor=south,minimum height=0.7em] (w1_3) at ([xshift=0.3em]w1_2.south east){}; \node[p,anchor=south,minimum height=0.7em] (w1_3) at ([xshift=0.3em]w1_2.south east){};
\node[p,anchor=south,minimum height=0.6em] (w1_4) at ([xshift=0.3em]w1_3.south east){}; \node[p,anchor=south,minimum height=0.6em] (w1_4) at ([xshift=0.3em]w1_3.south east){};
...@@ -29,7 +39,7 @@ ...@@ -29,7 +39,7 @@
\node[p,anchor=south,minimum height=0.6em] (w1_7) at ([xshift=0.3em]w1_6.south east){}; \node[p,anchor=south,minimum height=0.6em] (w1_7) at ([xshift=0.3em]w1_6.south east){};
\node[p,anchor=south,minimum height=0.8em] (w1_8) at ([xshift=0.3em]w1_7.south east){}; \node[p,anchor=south,minimum height=0.8em] (w1_8) at ([xshift=0.3em]w1_7.south east){};
\node[p,anchor=south, minimum height=0.5em] (w2_1) at ([xshift=-1.8em,yshift=1.5em]decoder.north){}; \node[p,anchor=south, minimum height=0.5em] (w2_1) at ([xshift=-1.9em,yshift=1.5em]decoder.north){};
\node[p,anchor=south,minimum height=2em] (w2_2) at ([xshift=0.3em]w2_1.south east){}; \node[p,anchor=south,minimum height=2em] (w2_2) at ([xshift=0.3em]w2_1.south east){};
\node[p,anchor=south,minimum height=0.7em] (w2_3) at ([xshift=0.3em]w2_2.south east){}; \node[p,anchor=south,minimum height=0.7em] (w2_3) at ([xshift=0.3em]w2_2.south east){};
\node[p,anchor=south,minimum height=0.6em] (w2_4) at ([xshift=0.3em]w2_3.south east){}; \node[p,anchor=south,minimum height=0.6em] (w2_4) at ([xshift=0.3em]w2_3.south east){};
...@@ -38,7 +48,7 @@ ...@@ -38,7 +48,7 @@
\node[p,anchor=south,minimum height=0.6em] (w2_7) at ([xshift=0.3em]w2_6.south east){}; \node[p,anchor=south,minimum height=0.6em] (w2_7) at ([xshift=0.3em]w2_6.south east){};
\node[p,anchor=south,minimum height=0.8em] (w2_8) at ([xshift=0.3em]w2_7.south east){}; \node[p,anchor=south,minimum height=0.8em] (w2_8) at ([xshift=0.3em]w2_7.south east){};
\node[p,anchor=south, minimum height=0.4em] (w3_1) at ([xshift=3.2em,yshift=1.5em]decoder.north){}; \node[p,anchor=south, minimum height=0.4em] (w3_1) at ([xshift=2.7em,yshift=1.5em]decoder.north){};
\node[p,anchor=south,minimum height=0.5em] (w3_2) at ([xshift=0.3em]w3_1.south east){}; \node[p,anchor=south,minimum height=0.5em] (w3_2) at ([xshift=0.3em]w3_1.south east){};
\node[p,anchor=south,minimum height=0.7em] (w3_3) at ([xshift=0.3em]w3_2.south east){}; \node[p,anchor=south,minimum height=0.7em] (w3_3) at ([xshift=0.3em]w3_2.south east){};
\node[p,anchor=south,minimum height=2em] (w3_4) at ([xshift=0.3em]w3_3.south east){}; \node[p,anchor=south,minimum height=2em] (w3_4) at ([xshift=0.3em]w3_3.south east){};
...@@ -54,30 +64,31 @@ ...@@ -54,30 +64,31 @@
\node[inner sep=0pt,font=\scriptsize] at ([yshift=0.45em]w3_4.north){!}; \node[inner sep=0pt,font=\scriptsize] at ([yshift=0.45em]w3_4.north){!};
\draw[->, thick] ([yshift=0.1em]en1.north) -- ([xshift=-3em,yshift=-0.1em]encoder.south); \draw[->, thick] ([yshift=0.1em]en1.north) -- ([xshift=-4.5em,yshift=-0.1em]encoder.south);
\draw[->, thick] ([yshift=0.1em]en2.north) -- ([xshift=-1em,yshift=-0.1em]encoder.south); \draw[->, thick] ([yshift=0.1em]en2.north) -- ([xshift=-1.5em,yshift=-0.1em]encoder.south);
\draw[->, thick] ([yshift=0.1em]en3.north) -- ([xshift=1em,yshift=-0.1em]encoder.south); \draw[->, thick] ([yshift=0.1em]en3.north) -- ([xshift=1.5em,yshift=-0.1em]encoder.south);
\draw[->, thick] ([yshift=0.1em]en4.north) -- ([xshift=3em,yshift=-0.1em]encoder.south); \draw[->, thick] ([yshift=0.1em]en4.north) -- ([xshift=4.5em,yshift=-0.1em]encoder.south);
\draw[->, thick] ([yshift=0.1em]de1.north) -- ([xshift=-5.2em,yshift=-0.1em]decoder.south); \draw[->, thick] ([yshift=0.1em]de1.north) -- ([xshift=-4.5em,yshift=-0.1em]decoder.south);
\draw[->, thick] ([yshift=0.1em]de2.north) -- ([xshift=0em,yshift=-0.1em]decoder.south); \draw[->, thick] ([yshift=0.1em]de2.north) -- ([xshift=0em,yshift=-0.1em]decoder.south);
\draw[->, thick] ([yshift=0.1em]de3.north) -- ([xshift=5em,yshift=-0.1em]decoder.south); \draw[->, thick] ([yshift=0.1em]de3.north) -- ([xshift=4.5em,yshift=-0.1em]decoder.south);
\draw[->, line width=1.5pt] (encoder.east) -- (decoder.west); \draw[->, line width=1.5pt] (encoder.east) -- (decoder.west);
\begin{pgfonlayer}{background}
{
\node[inner sep=2pt] [fit =(w1_1)(w1_2)(w1_8)](box1) {}; \node[inner sep=2pt] [fit =(w1_1)(w1_2)(w1_8)](box1) {};
\node[inner sep=2pt] [fit =(w2_1)(w2_2)(w2_8)] (box2){}; \node[inner sep=2pt] [fit =(w2_1)(w2_2)(w2_8)] (box2){};
\node[inner sep=2pt] [fit =(w3_1)(w3_2)(w3_8)] (box3){}; \node[inner sep=2pt] [fit =(w3_1)(w3_2)(w3_8)] (box3){};
} \draw[->, thick] ([yshift=0em]encoder.north) -- ([yshift=0em]lenpre.south);
\end{pgfonlayer} \draw[-, thick] ([yshift=0em]lenpre.north) -- ([yshift=1em]lenpre.north) -- ([xshift=7.1em,yshift=1em]lenpre.north);
\draw[-, thick,dashed] ([xshift=7.1em,yshift=1em]lenpre.north) -- ([xshift=-2em]box0.west);
\draw[->, thick] ([xshift=-2em]box0.west) -- (box0.west);
\draw[->,thick] ([yshift=-1.2em]box1.south) -- (box1.south); \draw[->,thick] ([yshift=-1.2em]box1.south) -- (box1.south);
\draw[->, thick] ([yshift=-1.2em]box2.south) -- (box2.south); \draw[->, thick] ([yshift=-1.2em]box2.south) -- (box2.south);
\draw[->, thick] ([yshift=-1.2em]box3.south) -- (box3.south); \draw[->, thick] ([yshift=-1.2em]box3.south) -- (box3.south);
\node[tgt,anchor=west,align=left] (tgt1) at ([xshift=2em]box3.east) {Good job !}; \node[tgt,anchor=west,align=left] (tgt1) at ([xshift=1.5em]box3.east) {Good job !};
\node[tgt,,anchor=north,align=left](tgt2) at ([yshift=-1em]tgt1.south) {Well done !}; \node[tgt,,anchor=north,align=left](tgt2) at ([yshift=-1em]tgt1.south) {Well done !};
\node[tgt,,anchor=north,align=left] (tgt3) at ([yshift=-1em]tgt2.south) {Good done !}; \node[tgt,,anchor=north,align=left] (tgt3) at ([yshift=-1em]tgt2.south) {Good done !};
\node[tgt,,anchor=north,align=left] (tgt4) at ([yshift=-1em]tgt3.south) {Well job !}; \node[tgt,,anchor=north,align=left] (tgt4) at ([yshift=-1em]tgt3.south) {Well job !};
......
...@@ -8,15 +8,16 @@ ...@@ -8,15 +8,16 @@
\tikzstyle{cir} = [draw,circle,minimum size=1em, thick,inner sep=0pt] \tikzstyle{cir} = [draw,circle,minimum size=1em, thick,inner sep=0pt]
%encoder %encoder
\node[layer,fill=red!15] (src_emb) at (0,0){\scriptsize\textbf{Input Embedding}}; \node[layer,fill=red!15] (src_emb) at (0,0){\scriptsize\textbf{Embedding}};
\node[anchor=south,layer,fill=yellow!20] (src_sa) at ([yshift=3.7em]src_emb.north){\scriptsize\textbf{Self-attention}}; \node[anchor=south,layer,fill=yellow!20] (src_sa) at ([yshift=3.7em]src_emb.north){\scriptsize\textbf{Self-attention}};
\node[anchor=south,layer,fill=orange!20] (src_ff) at ([yshift=1em]src_sa.north){\scriptsize\textbf{Feed Forward}}; \node[anchor=south,layer,fill=orange!20] (src_ff) at ([yshift=1em]src_sa.north){\scriptsize\textbf{Feed Forward}};
\node[anchor=south,layer,fill=blue!20] (src_sf) at ([yshift=2.6em]src_ff.north){\scriptsize\textbf{Softmax}}; \node[anchor=south,layer,fill=blue!20] (src_sf) at ([yshift=2.6em]src_ff.north){\scriptsize\textbf{Softmax}};
%decoder %decoder
\node[anchor=west,layer,fill=red!15] (tgt_emb) at ([xshift=4.4em]src_emb.east){\scriptsize\textbf{Output Embedding}}; \node[anchor=west,layer,fill=red!15] (tgt_emb) at ([xshift=4.4em]src_emb.east){\scriptsize\textbf{Embedding}};
\node[anchor=south,layer,fill=yellow!20] (tgt_sa) at ([yshift=3.7em]tgt_emb.north){\scriptsize\textbf{Self-attention}}; \node[anchor=south,layer,fill=yellow!20] (tgt_sa) at ([yshift=3.7em]tgt_emb.north){\scriptsize\textbf{Self-attention}};
\node[anchor=south,layer,fill=yellow!20] (tgt_pa) at ([yshift=1.5em]tgt_sa.north){\scriptsize\textbf{Positional Attention}}; \node[anchor=south,layer,fill=yellow!20] (tgt_pa) at ([yshift=1.5em]tgt_sa.north){\scriptsize\textbf{Positional Attention}};
\node[anchor=south,layer,draw=red,dashed,line width=2pt,minimum height=1.55em] (tgt_paa) at ([yshift=1.5em]tgt_sa.north){};
\node[anchor=south,layer,fill=yellow!20] (tgt_eda) at ([yshift=1.5em]tgt_pa.north){\scriptsize\textbf{Encoder-Decoder} \\ \scriptsize\textbf{Attention}}; \node[anchor=south,layer,fill=yellow!20] (tgt_eda) at ([yshift=1.5em]tgt_pa.north){\scriptsize\textbf{Encoder-Decoder} \\ \scriptsize\textbf{Attention}};
\node[anchor=south,layer,fill=orange!20] (tgt_ff) at ([yshift=1em]tgt_eda.north){\scriptsize\textbf{Feed Forward}}; \node[anchor=south,layer,fill=orange!20] (tgt_ff) at ([yshift=1em]tgt_eda.north){\scriptsize\textbf{Feed Forward}};
\node[anchor=south,layer,fill=green!20] (tgt_linear) at ([yshift=1.4em]tgt_ff.north){\scriptsize\textbf{Linear}}; \node[anchor=south,layer,fill=green!20] (tgt_linear) at ([yshift=1.4em]tgt_ff.north){\scriptsize\textbf{Linear}};
...@@ -63,6 +64,7 @@ ...@@ -63,6 +64,7 @@
\draw[line] (src_sf.north) -- (w3.south); \draw[line] (src_sf.north) -- (w3.south);
\draw[line] (tgt_sf.north) -- (output.south); \draw[line] (tgt_sf.north) -- (output.south);
\draw[line] (src.north) -- (src_emb.south); \draw[line] (src.north) -- (src_emb.south);
\draw[line] (tgt.north) -- (tgt_emb.south);
\draw[line,<->,out=-35,in=-145] ([xshift=-2em]src_sa.south) to ([xshift=2em]src_sa.south); \draw[line,<->,out=-35,in=-145] ([xshift=-2em]src_sa.south) to ([xshift=2em]src_sa.south);
\draw[line, rounded corners=2pt] (src_ff.north) -- ([yshift=1.1em]src_ff.north) -- ([xshift=-2.4em,yshift=-0.8em]tgt_eda.south) -- ([xshift=-2.4em]tgt_eda.south); \draw[line, rounded corners=2pt] (src_ff.north) -- ([yshift=1.1em]src_ff.north) -- ([xshift=-2.4em,yshift=-0.8em]tgt_eda.south) -- ([xshift=-2.4em]tgt_eda.south);
\draw[line, rounded corners=2pt] (src_ff.north) -- ([yshift=1.1em]src_ff.north) -- ([yshift=-0.8em]tgt_eda.south) -- (tgt_eda.south); \draw[line, rounded corners=2pt] (src_ff.north) -- ([yshift=1.1em]src_ff.north) -- ([yshift=-0.8em]tgt_eda.south) -- (tgt_eda.south);
...@@ -84,8 +86,8 @@ ...@@ -84,8 +86,8 @@
\node[] at ([xshift=2em]box3.east){\normalsize{解码器}}; \node[] at ([xshift=2em]box3.east){\normalsize{解码器}};
\node[] at ([xshift=1em,yshift=-6em]box3.east){{$\times N$}}; \node[] at ([xshift=1em,yshift=-6em]box3.east){{$\times N$}};
\draw[line,dotted,rounded corners=4pt,violet] (box2.north) -- ([yshift=1em]box2.north) -- ([yshift=1em,xshift=5.8em]box2.north) -- ([xshift=-2.35em]tgt_emb.west) -- (tgt_emb.west); \draw[line,dotted,rounded corners=4pt,violet] (box2.north) -- ([yshift=1em]box2.north) -- ([yshift=1em,xshift=5.8em]box2.north) -- ([xshift=-1.8em]tgt.west) -- (tgt.west);
\draw[line,-,dotted,rounded corners=4pt,violet,] (src_emb.east) -- ([xshift=-2em]tgt_emb.west); \draw[line,-,dotted,rounded corners=4pt,violet] (src.east) -- ([xshift=-1.8em]tgt.west);
\end{tikzpicture} \end{tikzpicture}
......
...@@ -15,10 +15,10 @@ ...@@ -15,10 +15,10 @@
\node[module, minimum width=8em] (encoder) at (0,0) {编码器组件}; \node[module, minimum width=8em] (encoder) at (0,0) {编码器组件};
\node[module,anchor=west, minimum width=8em] (decoder) at ([xshift=4em]encoder.east){解码器组件}; \node[module,anchor=west, minimum width=8em] (decoder) at ([xshift=4em]encoder.east){解码器组件};
\node[module,anchor=west, minimum width=8em] (decoder2) at ([xshift=4em]decoder.east){解码器组件}; \node[module,anchor=west, minimum width=8em] (decoder2) at ([xshift=4em]decoder.east){解码器组件};
\node[module,anchor=north, minimum width=6em,font=\scriptsize,inner ysep=4pt] (deinput) at ([yshift=-2em]decoder2.south){解码输入}; \node[module,anchor=north, minimum width=6em,font=\scriptsize,inner ysep=4pt] (deinput) at ([yshift=-2em]decoder2.south){解码输入};
\node[anchor=south,font=\footnotesize] (mod1) at ([yshift=0.4em]encoder.north){\small\bfnew{编码器模块}}; \node[anchor=south,font=\footnotesize] (mod1) at ([yshift=0.4em]encoder.north){\small\bfnew{编码器模块}};
\node[anchor=south,font=\footnotesize] (mod2) at ([yshift=0.4em]decoder.north){\small\bfnew{调序模块}}; \node[anchor=south,font=\footnotesize] (mod2) at ([yshift=0.4em]decoder.north){\small\bfnew{调序模块}};
\node[anchor=south,font=\footnotesize] (mod3) at ([yshift=0.4em]decoder2.north){\small\bfnew{解码}}; \node[anchor=south,font=\footnotesize] (mod3) at ([yshift=0.4em]decoder2.north){\small\bfnew{解码器模块}};
\begin{pgfonlayer}{background} \begin{pgfonlayer}{background}
{ {
...@@ -27,14 +27,14 @@ ...@@ -27,14 +27,14 @@
\node[box][fit=(decoder2)(mod3)] (box3) {}; \node[box][fit=(decoder2)(mod3)] (box3) {};
} }
\end{pgfonlayer} \end{pgfonlayer}
\node[anchor=north,font=\scriptsize,align=center] (w1) at ([yshift=-2em]encoder.south){\scriptsize\bfnew{There exist different} \\ \scriptsize\bfnew{opinions on this question}}; \node[anchor=north,font=\scriptsize,align=center] (w1) at ([yshift=-2em]encoder.south){\scriptsize\bfnew{There exist different} \\ \scriptsize\bfnew{opinions on this question .}};
\node[anchor=north,font=\scriptsize,align=center] (w2) at ([yshift=-2em]decoder.south){\scriptsize\bfnew{There exist different} \\ \scriptsize\bfnew{opinions on this question}}; \node[anchor=north,font=\scriptsize,align=center] (w2) at ([yshift=-2em]decoder.south){\scriptsize\bfnew{There exist different} \\ \scriptsize\bfnew{opinions on this question .}};
\node[anchor=north,font=\scriptsize,text=gray] (w3) at ([yshift=0.6em]w2.south){\scriptsize\bfnew{(复制源语言句子)}}; \node[anchor=north,font=\scriptsize,text=gray] (w3) at ([yshift=0.6em]w2.south){\scriptsize\bfnew{(复制源语言句子)}};
\node[anchor=south,font=\scriptsize,align=center] (w4) at ([yshift=1.6em]box2.north){\scriptsize\bfnew{on this question} \\ \scriptsize\bfnew{There exist different opinions}}; \node[anchor=south,font=\scriptsize,align=center] (w4) at ([yshift=1.6em]box2.north){\scriptsize\bfnew{on this question} \\ \scriptsize\bfnew{There exist different opinions .}};
\node[anchor=south,font=\scriptsize,align=center] (w5) at ([yshift=1.6em]box3.north){\tiny\bfnew{\ 这个 \ 问题 \ 存在 \ 不同的 \ 看法}}; \node[anchor=south,font=\scriptsize,align=center] (w5) at ([yshift=1.6em]box3.north){\tiny\bfnew{\ 这个 \ 问题 \ 存在 \ 不同的 \ 看法 \ }};
\node[font=\tiny] at ([xshift=-0.8em,yshift=-0.6em]encoder.east) {$N\times$}; \node[font=\tiny] at ([xshift=-0.8em,yshift=-0.6em]encoder.east) {$N\times$};
\node[font=\tiny] at ([xshift=-0.8em,yshift=-0.6em]decoder.east) {$1\times$}; \node[font=\tiny] at ([xshift=-0.8em,yshift=-0.6em]decoder.east) {$1\times$};
\node[font=\tiny] at ([xshift=-1em,yshift=-0.6em]decoder2.east) {$N-1\times$}; \node[font=\tiny] at ([xshift=-1.2em,yshift=-0.6em]decoder2.east) {$N-1\times$};
\draw[line] (w1.north) -- (box1.south); \draw[line] (w1.north) -- (box1.south);
\draw[line] (w2.north) -- (box2.south); \draw[line] (w2.north) -- (box2.south);
...@@ -44,7 +44,7 @@ ...@@ -44,7 +44,7 @@
\draw[line] (box1.east) -- (box2.west); \draw[line] (box1.east) -- (box2.west);
\draw[line] (box2.east) -- (box3.west); \draw[line] (box2.east) -- (box3.west);
\draw[line,rounded corners=2pt,dotted,brown(traditional)] (w1.south) -- ([yshift=-1.6em]w1.south) -- ([yshift=-2.3em]deinput.south) -- (deinput.south); \draw[line,rounded corners=2pt,dotted,brown(traditional)] (w1.south) -- ([yshift=-1.6em]w1.south) -- ([yshift=-2.3em]deinput.south) -- (deinput.south);
\draw[line,rounded corners=2pt,dotted,brown(traditional)] (w4.east) -- ([xshift=0.9em]w4.east) -- ([xshift=-3em]deinput.west) -- (deinput.west); \draw[line,rounded corners=2pt,dotted,brown(traditional)] (w4.east) -- ([xshift=0.9em]w4.east) -- ([xshift=-2.7em]deinput.west) -- (deinput.west);
\end{tikzpicture} \end{tikzpicture}
......
\begin{tikzpicture} \begin{tikzpicture}
\tikzstyle{encoder} = [rectangle,thick,rounded corners,minimum width=1.9cm,minimum height=1.2cm,text centered,draw=black,fill=red!25] \tikzstyle{encoder} = [rectangle,thick,rounded corners,minimum width=1.9cm,minimum height=1.2cm,text centered,draw=black,fill=red!25]
\tikzstyle{autodecoder} = [rectangle,thick,rounded corners,minimum width=3cm,minimum height=1.2cm,text centered,draw=black,fill=blue!15] \tikzstyle{autodecoder} = [rectangle,thick,rounded corners,minimum width=3cm,minimum height=1.2cm,text centered,draw=black,fill=blue!15]
\tikzstyle{nonautodecoder} = [rectangle,thick,rounded corners,minimum width=3.4cm,minimum height=1.2cm,text centered,draw=black!70,fill=blue!15] \tikzstyle{nonautodecoder} = [rectangle,thick,rounded corners,minimum width=4cm,minimum height=1.2cm,text centered,draw=black!70,fill=blue!15]
\node (encoder)[encoder] at (0,0) {编码器}; \node (encoder)[encoder] at (0,0) {编码器};
\node (text_left)[anchor=south] at ([yshift=-3em]encoder.south) {\footnotesize{\ \ 熟睡}}; %\node (des)[anchor=north] at ([yshift=2cm]encoder.north) {<Mask>:<Mask>};
\node (text_left)[anchor=south] at ([yshift=-3em]encoder.south) {\footnotesize{\ \ 熟睡\ }};
\node (autodecoder)[autodecoder,right of=encoder,xshift=6em ] {自回归解码器}; \node (autodecoder)[autodecoder,right of=encoder,xshift=6em ] {自回归解码器};
\node (text_mid1)[anchor=north] at ([yshift=3em]autodecoder.north) {\scriptsize{NP1\ VP3\ <eos>}}; \node (text_mid1)[anchor=north] at ([yshift=3em]autodecoder.north) {\scriptsize{NP1\ VP3\ PU1\ <eos>}};
\node (text_mid2)[anchor=south] at ([yshift=-3em]autodecoder.south) {\scriptsize{<sos>\ NP1\ VP3}}; \node (text_mid2)[anchor=south] at ([yshift=-3em]autodecoder.south) {\scriptsize{<sos>\ NP1\ VP3\ PU1}};
\node (nonautodecoder)[nonautodecoder,right of=autodecoder,xshift=10.5em ] {非自回归解码器}; \node (nonautodecoder)[nonautodecoder,right of=autodecoder,xshift=12.5em] {非自回归解码器};
\node (text_right1)[anchor=north] at ([yshift=3em]nonautodecoder.north) {\scriptsize{NP1\;Cats\;VP3\;sleep\;a\;lot}}; \node (text_right1)[anchor=north] at ([yshift=3em]nonautodecoder.north) {\scriptsize{NP1\;Cats\;VP3\;sleep\;a\;lot\;PU1\;.}};
\node (text_right2)[anchor=south] at ([yshift=-3em]nonautodecoder.south) {\scriptsize{NP1\;<Mask>\;VP3\;<Mask>\;<Mask>\;<Mask>}}; \node (text_right2)[anchor=south] at ([yshift=-3em]nonautodecoder.south) {\scriptsize{NP1\;<Mask>\;VP3\;<Mask>\;<Mask>\;<Mask>\;PU1\;<Mask>}};
\draw[->,thick] (encoder.east) to (autodecoder.west);
\draw[->,thick] ([yshift=0.1em]text_left.north) to (encoder.south); \draw[->,thick] ([yshift=0.1em]text_left.north) to (encoder.south);
\draw[->,thick] ([yshift=0.1em]text_mid2.north) to (autodecoder.south); \draw[->,thick] ([yshift=0.1em]text_mid2.north) to (autodecoder.south);
\draw[->,thick] (autodecoder.north) to ([yshift=-0.1em]text_mid1.south); \draw[->,thick] (autodecoder.north) to ([yshift=-0.1em]text_mid1.south);
\draw[->,thick] ([yshift=0.1em]text_right2.north) to (nonautodecoder.south); \draw[->,thick] ([yshift=0.1em]text_right2.north) to (nonautodecoder.south);
\draw[->,thick] (nonautodecoder.north) to ([yshift=-0.1em]text_right1.south); \draw[->,thick] (nonautodecoder.north) to ([yshift=-0.1em]text_right1.south);
\draw[->,thick] (text_mid1.east) -- ([xshift=2.1em]text_mid1.east) -- ([xshift=-1.2em]text_right2.west)-- (text_right2.west); \draw[->,thick] (text_mid1.east) -- ([xshift=1.4em]text_mid1.east) -- ([xshift=-1.2em]text_right2.west)-- (text_right2.west);
\draw[-,thick] (encoder.north) to ([yshift=0.8em]encoder.north); \draw[-,thick] (encoder.north) to ([yshift=0.8em]encoder.north);
\draw[-,thick,dashed] ([yshift=0.8em]encoder.north) -- ([xshift=-7em,yshift=0.8em]nonautodecoder.north) -- ([xshift=-2.5em]nonautodecoder.west); \draw[-,thick,dashed] ([yshift=0.8em]encoder.north) -- ([xshift=-7.7em,yshift=0.8em]nonautodecoder.north) --([xshift=-2.5em]nonautodecoder.west);
\draw[->,thick]([xshift=-2.5em]nonautodecoder.west) to (nonautodecoder.west); \draw[->,thick] ([xshift=-2.5em]nonautodecoder.west) -- (nonautodecoder.west);
\end{tikzpicture} \end{tikzpicture}
\ No newline at end of file
...@@ -54,11 +54,11 @@ ...@@ -54,11 +54,11 @@
\node[rec,anchor=center,rotate=60,fill=red!20](c1x5) at ([xshift=-2em,yshift=1.0em]circle1.east){\tiny{5}}; \node[rec,anchor=center,rotate=60,fill=red!20](c1x5) at ([xshift=-2em,yshift=1.0em]circle1.east){\tiny{5}};
%circle2 %circle2
\node[cir,anchor=center,rotate=-30,fill=blue!20] (c2a) at ([xshift=-5.3em,yshift=2.15em]circle2.east){\tiny{a}}; \node[cir,anchor=center,rotate=-30,fill=blue!20] (c2a) at ([xshift=-5.3em,yshift=2.15em]circle2.east){\tiny{$a$}};
\node[cir,anchor=east,rotate=-30,fill=blue!20] (c2b) at ([xshift=2.0em,yshift=-1.25em]c2a.east){\tiny{b}}; \node[cir,anchor=east,rotate=-30,fill=blue!20] (c2b) at ([xshift=2.0em,yshift=-1.25em]c2a.east){\tiny{$b$}};
\node[cir,anchor=east,rotate=-30,fill=blue!20] (c2c) at ([xshift=0.8em,yshift=-3.9em]c2a.south){\tiny{c}}; \node[cir,anchor=east,rotate=-30,fill=blue!20] (c2c) at ([xshift=0.8em,yshift=-3.9em]c2a.south){\tiny{$c$}};
\node[cir,anchor=east,rotate=-30,fill=blue!20] (c2x) at ([xshift=-0.3em,yshift=-1.9em]c2a.south){\tiny{x}}; \node[cir,anchor=east,rotate=-30,fill=blue!20] (c2x) at ([xshift=-0.3em,yshift=-1.9em]c2a.south){\tiny{$x$}};
\node[cir,anchor=west,rotate=-30,fill=blue!20] (c2y) at ([xshift=1.15em,yshift=-2.85em]c2a.east){\tiny{y}}; \node[cir,anchor=west,rotate=-30,fill=blue!20] (c2y) at ([xshift=1.15em,yshift=-2.85em]c2a.east){\tiny{$y$}};
%circle3 %circle3
\node[rec,anchor=center,rotate=-30,fill=red!20] (c3x1) at ([xshift=-6.7em,yshift=1.75em]circle3.east){\tiny{1}}; \node[rec,anchor=center,rotate=-30,fill=red!20] (c3x1) at ([xshift=-6.7em,yshift=1.75em]circle3.east){\tiny{1}};
...@@ -74,11 +74,11 @@ ...@@ -74,11 +74,11 @@
\node[rec,anchor=east,rotate=-30,fill=red!20] (c4x4) at ([xshift=0.35em,yshift=-2.7em]c4x1.south){\tiny{4}}; \node[rec,anchor=east,rotate=-30,fill=red!20] (c4x4) at ([xshift=0.35em,yshift=-2.7em]c4x1.south){\tiny{4}};
\node[rec,anchor=west,rotate=-30,fill=red!20] (c4x5) at ([xshift=2.35em,yshift=-3.85em]c4x1.east){\tiny{5}}; \node[rec,anchor=west,rotate=-30,fill=red!20] (c4x5) at ([xshift=2.35em,yshift=-3.85em]c4x1.east){\tiny{5}};
\node[cir,anchor=center,rotate=-30,fill=blue!20] (c4a) at ([xshift=-5.3em,yshift=2.15em]circle4.east){\tiny{a}}; \node[cir,anchor=center,rotate=-30,fill=blue!20] (c4a) at ([xshift=-5.3em,yshift=2.15em]circle4.east){\tiny{$a$}};
\node[cir,anchor=east,rotate=-30,fill=blue!20] (c4b) at ([xshift=2.0em,yshift=-1.25em]c4a.east){\tiny{b}}; \node[cir,anchor=east,rotate=-30,fill=blue!20] (c4b) at ([xshift=2.0em,yshift=-1.25em]c4a.east){\tiny{$b$}};
\node[cir,anchor=east,rotate=-30,fill=blue!20] (c4c) at ([xshift=0.8em,yshift=-3.9em]c4a.south){\tiny{c}}; \node[cir,anchor=east,rotate=-30,fill=blue!20] (c4c) at ([xshift=0.8em,yshift=-3.9em]c4a.south){\tiny{$c$}};
\node[cir,anchor=east,rotate=-30,fill=blue!20] (c4x) at ([xshift=-0.3em,yshift=-1.9em]c4a.south){\tiny{x}}; \node[cir,anchor=east,rotate=-30,fill=blue!20] (c4x) at ([xshift=-0.3em,yshift=-1.9em]c4a.south){\tiny{$x$}};
\node[cir,anchor=west,rotate=-30,fill=blue!20] (c4y) at ([xshift=1.15em,yshift=-2.85em]c4a.east){\tiny{y}}; \node[cir,anchor=west,rotate=-30,fill=blue!20] (c4y) at ([xshift=1.15em,yshift=-2.85em]c4a.east){\tiny{$y$}};
\draw [color=red,line width=0.7pt,rotate=18] ([xshift=-5.1em,yshift=3.7em]circle4.east) ellipse (1.6em and 0.9em); \draw [color=red,line width=0.7pt,rotate=18] ([xshift=-5.1em,yshift=3.7em]circle4.east) ellipse (1.6em and 0.9em);
\draw [color=red,line width=0.7pt,rotate=-5] ([xshift=-2.8em,yshift=0.6em]circle4.east) ellipse (1.6em and 0.9em); \draw [color=red,line width=0.7pt,rotate=-5] ([xshift=-2.8em,yshift=0.6em]circle4.east) ellipse (1.6em and 0.9em);
......
...@@ -22,9 +22,9 @@ ...@@ -22,9 +22,9 @@
%---------------------------------------------------------------------------------------- %----------------------------------------------------------------------------------------
\chapter{低资源神经机器翻译} \chapter{低资源神经机器翻译}
\parinterval 神经机器翻译带来的性能提升是显著的,但随之而来的问题是对海量双语训练数据的依赖。但是,不同语言可使用的数据规模是不同的。比如汉语、英语这种使用范围广泛的语言,存在着大量的双语平行句对,这些语言被称为{\small\bfnew{富资源语言}}\index{富资源语言}(High-resource Language\index{High-resource Language})。而对于其它一些使用范围稍小的语言,如斐济语、古吉拉特语等,相关的数据非常稀少,这些语言被称为{\small\bfnew{低资源语言}}\index{低资源语言}(Low-resource Language\index{Low-resource Language})。世界上现存语言超过5000种,仅有很少一部分为富资源语言,绝大多数均为低资源语言。即使在富资源语言中,对于一些特定的领域,双语平行语料也是十分稀缺的。有时,一些特殊的语种或者领域甚至会面临“零资源”的问题。因此,{\small\bfnew{低资源机器翻译}}\index{低资源机器翻译}(Low-resource Machine Translation)是当下急需解决且颇具挑战的问题。 \parinterval 神经机器翻译带来的性能提升是显著的,但随之而来的问题是对海量双语训练数据的依赖。不同语言可使用的数据规模是不同的。比如汉语、英语这种使用范围广泛的语言,存在着大量的双语平行句对,这些语言被称为{\small\bfnew{富资源语言}}\index{富资源语言}(High-resource Language\index{High-resource Language})。而对于其它一些使用范围稍小的语言,如斐济语、古吉拉特语等,相关的数据非常稀少,这些语言被称为{\small\bfnew{低资源语言}}\index{低资源语言}(Low-resource Language\index{Low-resource Language})。世界上现存语言超过5000种,仅有很少一部分为富资源语言,绝大多数均为低资源语言。即使在富资源语言中,对于一些特定的领域,双语平行语料也是十分稀缺的。有时,一些特殊的语种或者领域甚至会面临“零资源”的问题。因此,{\small\bfnew{低资源机器翻译}}\index{低资源机器翻译}(Low-resource Machine Translation)是当下急需解决且颇具挑战的问题。
\parinterval 本章将对低资源神经机器翻译的相关问题、模型和方法展开介绍,内容涉及数据的有效使用、双向翻译模型、多语言翻译建模、无监督机器翻译、领域适应五个方面。 \parinterval 本章将对低资源神经机器翻译的相关问题、模型和方法展开介绍,内容涉及数据的有效使用、双向翻译模型、多语言翻译模型、无监督机器翻译、领域适应五个方面。
%---------------------------------------------------------------------------------------- %----------------------------------------------------------------------------------------
% NEW SECTION 16.1 % NEW SECTION 16.1
...@@ -55,7 +55,7 @@ ...@@ -55,7 +55,7 @@
\begin{figure}[htp] \begin{figure}[htp]
\centering \centering
\input{./Chapter16/Figures/figure-application-process-of-back-translation} \input{./Chapter16/Figures/figure-application-process-of-back-translation}
\caption{回译方法的流程} \caption{回译方法的简要流程}
\label{fig:16-1} \label{fig:16-1}
\end{figure} \end{figure}
%---------------------------------------------- %----------------------------------------------
...@@ -553,7 +553,7 @@ ...@@ -553,7 +553,7 @@
\parinterval 微调的原理普遍基于普氏分析\upcite{DBLP:journals/corr/MikolovLS13}。假设现在有一个种子词典$D=\left\{x_{i}, y_{i}\right\}$其中${i \in\{1, n\}}$,和两个单语词嵌入$\mathbi{X}$$\mathbi{Y}$,那么就可以将$D$作为{\small\bfnew{映射锚点}}\index{映射锚点}(Anchor\index{Anchor})学习一个转移矩阵$\mathbi{W}$,使得$\mathbi{W} \mathbi{X}$$\mathbi{Y}$这两个空间尽可能相近,此外通过对$\mathbi{W}$施加正交约束可以显著提高性能\upcite{DBLP:conf/naacl/XingWLL15},于是这个优化问题就转变成了{\small\bfnew{普鲁克问题}}\index{普鲁克问题}(Procrustes Problem\index{Procrustes Problem}\upcite{DBLP:conf/iclr/SmithTHH17},可以通过{\small\bfnew{奇异值分解}}\index{奇异值分解}(Singular Value Decomposition,SVD\index{Singular Value Decomposition})来获得近似解。这里用$\mathbi{X}'$$\mathbi{Y}'$表示$D$中源语言单词和目标语言单词的词嵌入矩阵,优化$\mathbi{W}$的过程可以被描述为: \parinterval 微调的原理普遍基于普氏分析\upcite{DBLP:journals/corr/MikolovLS13}。假设现在有一个种子词典$D=\left\{x_{i}, y_{i}\right\}$其中${i \in\{1, n\}}$,和两个单语词嵌入$\mathbi{X}$$\mathbi{Y}$,那么就可以将$D$作为{\small\bfnew{映射锚点}}\index{映射锚点}(Anchor\index{Anchor})学习一个转移矩阵$\mathbi{W}$,使得$\mathbi{W} \mathbi{X}$$\mathbi{Y}$这两个空间尽可能相近,此外通过对$\mathbi{W}$施加正交约束可以显著提高性能\upcite{DBLP:conf/naacl/XingWLL15},于是这个优化问题就转变成了{\small\bfnew{普鲁克问题}}\index{普鲁克问题}(Procrustes Problem\index{Procrustes Problem}\upcite{DBLP:conf/iclr/SmithTHH17},可以通过{\small\bfnew{奇异值分解}}\index{奇异值分解}(Singular Value Decomposition,SVD\index{Singular Value Decomposition})来获得近似解。这里用$\mathbi{X}'$$\mathbi{Y}'$表示$D$中源语言单词和目标语言单词的词嵌入矩阵,优化$\mathbi{W}$的过程可以被描述为:
\begin{eqnarray} \begin{eqnarray}
\widehat{\mathbi{W}} & = &\underset{\mathbi{W} \in O_{d}(\mathbb{R})}{\operatorname{argmin}}\|\mathbi{W} \mathbi{X}'- \mathbi{Y}' \|_{\mathrm{F}} \nonumber \\ \widehat{\mathbi{W}} & = & \argmin_{\mathbi{W} \in O_{d}(\mathbb{R})}{\|\mathbi{W} \mathbi{X}'- \mathbi{Y}' \|_{\mathrm{F}}} \nonumber \\
& = & \mathbi{U} \mathbi{V}^{\rm{T}} \\ \label{eq:16-9} & = & \mathbi{U} \mathbi{V}^{\rm{T}} \\ \label{eq:16-9}
\textrm{s.t.\ \ \ \ } \mathbi{U} \Sigma \mathbi{V}^{\rm{T}} &= &\operatorname{SVD}\left(\mathbi{Y}' \mathbi{X}'^{\rm{T}}\right) \textrm{s.t.\ \ \ \ } \mathbi{U} \Sigma \mathbi{V}^{\rm{T}} &= &\operatorname{SVD}\left(\mathbi{Y}' \mathbi{X}'^{\rm{T}}\right)
\label{eq:16-10} \label{eq:16-10}
......
...@@ -3,16 +3,16 @@ ...@@ -3,16 +3,16 @@
\begin {scope} \begin {scope}
\node[draw=white,scale=0.6] (input) at (0,0){\includegraphics[width=0.62\textwidth]{./Chapter17/Figures/figure-bank-without-attention.jpg}};(1.9,-1.4); \node[draw=white,scale=0.6] (input) at (0,0){\includegraphics[width=0.62\textwidth]{./Chapter17/Figures/figure-bank-without-attention.jpg}};(1.9,-1.4);
\node[anchor=west] (label1) at ([xshift=-3.5em]input.west) {\begin{tabular}{l}{\normalsize{图片:}}\end{tabular}}; \node[anchor=west] (label1) at ([xshift=-3.5em]input.west) {\begin{tabular}{l}{\normalsize{图片:}}\end{tabular}};
\node[anchor=south] (label2) at ([yshift=-7.15em]label1.south) {\begin{tabular}{l}{\normalsize{源文:}}\end{tabular}}; \node[anchor=south] (label2) at ([yshift=-6em]label1.south) {\begin{tabular}{l}{\normalsize{源文:}}\end{tabular}};
\node[anchor=south] (english1) at ([xshift=-0.1em,yshift=-3.5em]input.south) {\begin{tabular}{l}{\large{A\; girl\; jumps\; off\; a\; {\red{\underline{bank}}}.}}\end{tabular}}; \node[anchor=south] (english1) at ([xshift=-0.35em,yshift=-2.3em]input.south) {\begin{tabular}{l}{\large{A\; girl\; jumps\; off\; a\; {\red{\underline{bank}}}\quad .}}\end{tabular}};
\draw[decorate,decoration={brace,amplitude=4mm},very thick] ([xshift=7em]input.90) -- ([xshift=1.2em,yshift=-0.5em]english1.east); \draw[decorate,decoration={brace,amplitude=4mm},very thick] ([xshift=6.3em]input.90) -- ([xshift=0.3em,yshift=-0em]english1.east);
\node[anchor=east,rectangle,thick,rounded corners,minimum width=3.5em,minimum height=2.5em,text centered,draw=black!70,fill=red!25](trans)at ([xshift=8.0em,yshift=5.55em]english1.east){\normalsize{翻译模型}}; \node[anchor=east,rectangle,thick,rounded corners,minimum width=3.5em,minimum height=2.5em,text centered,draw=black!70,fill=red!25](trans)at ([xshift=7.4em,yshift=5.27em]english1.east){\normalsize{翻译模型}};
\draw[->,very thick]([xshift=-1.4em]trans.west) to (trans.west); \draw[->,very thick]([xshift=-1.4em]trans.west) to (trans.west);
\draw[->,very thick](trans.east) to ([xshift=1.4em]trans.east); \draw[->,very thick](trans.east) to ([xshift=1.4em]trans.east);
\node[anchor=east] (de1) at ([xshift=4.7cm,yshift=-0.1em]trans.east) {\begin{tabular}{l}{\normalsize{译文:}}{\normalsize{一个女孩从{\red{河床}}}}\end{tabular}}; \node[anchor=east] (de1) at ([xshift=4.8cm,yshift=-0.1em]trans.east) {\begin{tabular}{l}{\normalsize{译文:}}{\normalsize{一个\ \;女孩\ \;\ \;{\red{河床}}}}\end{tabular}};
\node[anchor=south] (de2) at ([xshift=-0.4em,yshift=-1.5em]de1.south) {\begin{tabular}{l}{\normalsize{跳下来}} \end{tabular}}; \node[anchor=south] (de2) at ([xshift=-0em,yshift=-1.5em]de1.south) {\begin{tabular}{l}{\normalsize{\ \;跳下来\ \;}} \end{tabular}};
\end {scope} \end {scope}
\end{tikzpicture} \end{tikzpicture}
\ No newline at end of file
...@@ -33,9 +33,9 @@ ...@@ -33,9 +33,9 @@
\section{机器翻译需要更多的上下文} \section{机器翻译需要更多的上下文}
\parinterval 长期以来,机器翻译都是指句子级翻译。主要原因在于,句子级的翻译建模可以大大简化问题,使得机器翻译方法更容易被实践和验证。但是人类使用语言的过程并不是孤立在一个个句子上进行的。这个问题可以类比于人类学习语言的过程:小孩成长过程中会接受视觉、听觉、触觉等多种信号,这些信号的共同作用使得他们产生对客观世界的“认识”,同时促使他们使用“语言”进行表达。从这个角度说,语言能力并不是由单一因素形成的,它往往伴随着其他信息的相互作用,比如,当我们翻译一句话的时候,会用到看到的画面、听到的语调、甚至前面说过句子中的信息。 \parinterval 长期以来,机器翻译都是指句子级翻译。主要原因在于,句子级的翻译建模可以大大简化问题,使得机器翻译方法更容易被实践和验证。但是人类使用语言的过程并不是孤立地在一个个句子上进行的。这个问题可以类比于人类学习语言的过程:小孩成长过程中会接受视觉、听觉、触觉等多种信号,这些信号的共同作用使得他们产生对客观世界的“认识”,同时促使他们使用“语言”进行表达。从这个角度说,语言能力并不是由单一因素形成的,它往往伴随着其他信息的相互作用,比如,当我们翻译一句话的时候,会用到看到的画面、听到的语调、甚至前面说过的句子中的信息。
\parinterval 广义上,当前句子以外的信息都可以被看作是一种上下文。比如,图\ref{fig:17-1}中,需要把英语句子“A girl jumps off a bank”翻译为汉语。但是,其中的“bank”有多个含义,因此仅仅使用英语句子本身的信息可能会将其翻译为“银行”,而非正确的译文“河床”。但是,图\ref{fig:17-1}中也提供了这个英语句子所对应的图片,显然图片中直接展示了河床,这时“bank”是没有歧义的。通常也会把这种使用图片和文字一起进行机器翻译的任务称作{\small\bfnew{多模态机器翻译}}\index{多模态机器翻译}(Multi-Modal Machine Translation)\index{Multi-Modal Machine Translation} \parinterval 广义上,当前句子以外的信息都可以被看作一种上下文。比如,图\ref{fig:17-1}中,需要把英语句子“A girl jumps off a bank .”翻译为汉语。但是,其中的“bank”有多个含义,因此仅仅使用英语句子本身的信息可能会将其翻译为“银行”,而非正确的译文“河床”。但是,图\ref{fig:17-1}中也提供了这个英语句子所对应的图片,显然图片中直接展示了河床,这时“bank”是没有歧义的。通常也会把这种使用图片和文字一起进行机器翻译的任务称作{\small\bfnew{多模态机器翻译}}\index{多模态机器翻译}(Multi-Modal Machine Translation)\index{Multi-Modal Machine Translation}
%---------------------------------------------- %----------------------------------------------
\begin{figure}[htp] \begin{figure}[htp]
...@@ -45,9 +45,9 @@ ...@@ -45,9 +45,9 @@
\label{fig:17-1} \label{fig:17-1}
\end{figure} \end{figure}
%------------------------------------------- %-------------------------------------------
\parinterval {\small\bfnew{模态}}\index{模态}(Modality)\index{Modality}是指某一种信息来源。例如,视觉、听觉、嗅觉、味觉都可以被看作是不同的模态。因此视频、语音、文字等都可以被看作是承载这些模态的媒介。在机器翻译中使用多模态这个概念,更多是为了区分某些不同于文字的信息。除了图像等视觉模态信息,机器翻译也可以利用语音模态信息。比如,直接对语音进行翻译,甚至直接用语音表达出翻译结果。 \parinterval {\small\bfnew{模态}}\index{模态}(Modality)\index{Modality}是指某一种信息来源。例如,视觉、听觉、嗅觉、味觉都可以被看作是不同的模态。因此视频、语音、文字等都可以被看作是承载这些模态的媒介。在机器翻译中使用多模态这个概念,是为了区分某些不同于文字的信息。除了图像等视觉模态信息,机器翻译也可以利用语音模态信息。比如,直接对语音进行翻译,甚至直接用语音表达出翻译结果。
\parinterval 此外,除了不同信息源所引入的上下文,机器翻译也可以利用文字本身的上下文。比如,翻译一篇文章中的某个句子时,可以根据整个篇章的内容进行翻译。显然这种篇章的语境是有助于机器翻译的。在本章接下来的内容中,会对机器翻译中使用不同上下文(多模态和篇章信息)的方法展开讨论。 \parinterval 除了不同信息源所引入的上下文,机器翻译也可以利用文字本身的上下文。比如,翻译一篇文章中的某个句子时,可以根据整个篇章的内容进行翻译。显然这种篇章的语境是有助于机器翻译的。在本章接下来的内容中,会对机器翻译中使用不同上下文(多模态和篇章信息)的方法展开讨论。
%---------------------------------------------------------------------------------------- %----------------------------------------------------------------------------------------
% NEW SECTION % NEW SECTION
...@@ -464,13 +464,13 @@ ...@@ -464,13 +464,13 @@
\parinterval “篇章”在这里是指一系列连续的段落或句子所构成的整体,其中各个句子间从形式和内容上都具有一定的连贯性和一致性\upcite{jurafsky2000speech}。这些联系主要体现在{\small\sffamily\bfseries{衔接}}\index{衔接}(Cohesion \index{Cohesion})以及连贯两个方面。其中衔接体现在显性的语言成分和结构上,包括篇章中句子间的语法和词汇的联系,而连贯体现在各个句子之间的逻辑和语义的联系上。因此,篇章级翻译就是要将这些上下文之间的联系考虑在内,从而生成比句子级翻译更连贯和准确的翻译结果。实例\ref{eg:17-1}就展示了一个使用篇章信息进行机器翻译的实例。 \parinterval “篇章”在这里是指一系列连续的段落或句子所构成的整体,其中各个句子间从形式和内容上都具有一定的连贯性和一致性\upcite{jurafsky2000speech}。这些联系主要体现在{\small\sffamily\bfseries{衔接}}\index{衔接}(Cohesion \index{Cohesion})以及连贯两个方面。其中衔接体现在显性的语言成分和结构上,包括篇章中句子间的语法和词汇的联系,而连贯体现在各个句子之间的逻辑和语义的联系上。因此,篇章级翻译就是要将这些上下文之间的联系考虑在内,从而生成比句子级翻译更连贯和准确的翻译结果。实例\ref{eg:17-1}就展示了一个使用篇章信息进行机器翻译的实例。
\begin{example} \begin{example}
上下文句子:我上周针对这个问题做出解释并咨询了他的意见 上下文句子:我\ 上周\ 针对\ 这个\ 问题\ 做出\ 解释\ \ 咨询\ \ 他的\ 意见\
\hspace{2em} 待翻译句子:他也同意我的看法 \hspace{2em} 待翻译句子:他\ \ 同意\ 我的\ 看法\
\hspace{2em} 句子级翻译结果:He also agrees with me. \hspace{2em} 句子级翻译结果:He also agrees with me .
\hspace{2em} 篇章级翻译结果:{\red{And}} he {\red{agreed}} with me. \hspace{2em} 篇章级翻译结果:{\red{And}} he {\red{agreed}} with me .
\label{eg:17-1} \label{eg:17-1}
\end{example} \end{example}
...@@ -523,16 +523,16 @@ ...@@ -523,16 +523,16 @@
\begin{example} \begin{example}
传统模型训练输入: 传统模型训练输入:
\hspace{10em}源语言:你看到了吗 \hspace{10em}源语言:你\ 看到\ \ \
\hspace{10em}目标语言:Do you see them? \hspace{10em}目标语言:Do you see them ?
\vspace{0.5em} \vspace{0.5em}
\qquad\ 改进后模型训练输入: \qquad\ 改进后模型训练输入:
\hspace{10em}源语言:{\red{他们在哪?\ <sep>\ }}你看到了吗 \hspace{10em}源语言:{\red{他们\ \ \ \ <sep>\ }}\ \ 看到\ \ \
\hspace{10em}目标语言:Do you see them? \hspace{10em}目标语言:Do you see them ?
\label{eg:17-3-1} \label{eg:17-3-1}
\end{example} \end{example}
......
Markdown 格式
0%
您添加了 0 到此讨论。请谨慎行事。
请先完成此评论的编辑!
注册 或者 后发表评论