Commit 2149702b by 曹润柘

合并分支 'caorunzhe' 到 'master'

Caorunzhe

查看合并请求 !621
parents 2d87fe96 086edd8f
%%%------------------------------------------------------------------------------------------------------------
%%% 调序模型1:基于距离的调序
\begin{center}
\begin{tikzpicture}
\tikzstyle{cirnode}=[circle,inner sep=4pt,draw]
\tikzstyle{colnode}=[fill=ugreen!30,inner sep=0.1pt]
\tikzstyle{show}=[fill=red,circle,inner sep=0.5pt]
\begin{scope}
\node [anchor=north west,cirnode] (c1) at (0, 0) {};
\draw[-,dotted] ([xshift=-1em,yshift=-0.5em]c1.south)--([xshift=9.3em,yshift=-0.5em]c1.south);
\draw[-,dotted] ([xshift=-1em,yshift=-2em]c1.south)--([xshift=9.3em,yshift=-2em]c1.south);
\draw[-,dotted] ([xshift=-1em,yshift=-3.5em]c1.south)--([xshift=9.3em,yshift=-3.5em]c1.south);
\draw[-,dotted] ([xshift=-1em,yshift=-5em]c1.south)--([xshift=9.3em,yshift=-5em]c1.south);
\node [anchor=north,cirnode] (c2) at ([xshift=0em,yshift=-5.5em]c1.south) {};
\node [anchor=west,cirnode] (c3) at ([xshift=0.6em,yshift=0em]c2.east) {};
\node [anchor=west,cirnode] (c4) at ([xshift=0.6em,yshift=0em]c3.east) {};
\node [anchor=west,cirnode] (c5) at ([xshift=0.6em,yshift=0em]c4.east) {};
\node [anchor=west,cirnode] (c6) at ([xshift=0.6em,yshift=0em]c5.east) {};
\node [anchor=west,cirnode] (c7) at ([xshift=0.6em,yshift=0em]c6.east) {};
\node [anchor=south,colnode,minimum height=1.6em,minimum width=1em] (b1) at ([xshift=0em,yshift=0.5em]c2.north) {};
\node [anchor=south,colnode,minimum height=4.1em,minimum width=1em] (b2) at ([xshift=0em,yshift=0.5em]c3.north) {};
\node [anchor=south,colnode,minimum height=0.8em,minimum width=1em] (b3) at ([xshift=0em,yshift=0.5em]c4.north) {};
\node [anchor=south,colnode,minimum height=0.4em,minimum width=1em] (b4) at ([xshift=0em,yshift=0.5em]c5.north) {};
\node [anchor=south,colnode,minimum height=0.15em,minimum width=1em] (b5) at ([xshift=0em,yshift=0.5em]c6.north) {};
\node [anchor=south,colnode,minimum height=0.15em,minimum width=1em] (b6) at ([xshift=0em,yshift=0.5em]c7.north) {};
{\scriptsize
\node [anchor=center] (n1) at ([xshift=0em,yshift=-1em]c2.south){\color{orange}Bush};
\node [anchor=west] (n2) at ([xshift=-0.2em,yshift=0em]n1.east){\color{ugreen!30}held};
\node [anchor=west] (n3) at ([xshift=0.35em,yshift=0em]n2.east){a};
\node [anchor=west] (n4) at ([xshift=0.5em,yshift=0em]n3.east){talk};
\node [anchor=west] (n5) at ([xshift=-0.3em,yshift=0em]n4.east){with};
\node [anchor=west] (n6) at ([xshift=-0.3em,yshift=0em]n5.east){Sharon};
}
\node [anchor=north] (l1) at ([xshift=1em,yshift=-1em]n3.south){\small {(a)原始分布}};
\draw[-,very thick] ([xshift=1.2em,yshift=2.4em]b6.north)--([xshift=1.7em,yshift=1.9em]b6.north);
\draw[-,very thick] ([xshift=1.2em,yshift=1.9em]b6.north)--([xshift=1.7em,yshift=2.4em]b6.north);
\end{scope}
\begin{scope}[xshift=4.4cm,yshift=0em]
\node [anchor=north west,circle,inner sep=4pt] (c1) at (0, 0) {};
\draw[-,dotted] ([xshift=-1em,yshift=-0.5em]c1.south)--([xshift=9.3em,yshift=-0.5em]c1.south);
\draw[-,dotted] ([xshift=-1em,yshift=-2em]c1.south)--([xshift=9.3em,yshift=-2em]c1.south);
\draw[-,dotted] ([xshift=-1em,yshift=-3.5em]c1.south)--([xshift=9.3em,yshift=-3.5em]c1.south);
\draw[-,dotted] ([xshift=-1em,yshift=-5em]c1.south)--([xshift=9.3em,yshift=-5em]c1.south);
\node [anchor=north,cirnode] (c2) at ([xshift=0em,yshift=-5.5em]c1.south) {};
\node [anchor=west,cirnode] (c3) at ([xshift=0.6em,yshift=0em]c2.east) {};
\node [anchor=west,cirnode] (c4) at ([xshift=0.6em,yshift=0em]c3.east) {};
\node [anchor=west,cirnode] (c5) at ([xshift=0.6em,yshift=0em]c4.east) {};
\node [anchor=west,cirnode] (c6) at ([xshift=0.6em,yshift=0em]c5.east) {};
\node [anchor=west,cirnode] (c7) at ([xshift=0.6em,yshift=0em]c6.east) {};
\node [anchor=south,inner sep=0.1pt,minimum height=1.6em,minimum width=1em] (b1) at ([xshift=0em,yshift=0.5em]c2.north) {};
\node [anchor=south,inner sep=0.1pt,minimum height=4.1em,minimum width=1em] (b2) at ([xshift=0em,yshift=0.5em]c3.north) {};
\node [anchor=south,inner sep=0.1pt,minimum height=0.8em,minimum width=1em] (b3) at ([xshift=0em,yshift=0.5em]c4.north) {};
\node [anchor=south,inner sep=0.1pt,minimum height=0.4em,minimum width=1em] (b4) at ([xshift=0em,yshift=0.5em]c5.north) {};
\node [anchor=south,inner sep=0.1pt,minimum height=0.15em,minimum width=1em] (b5) at ([xshift=0em,yshift=0.5em]c6.north) {};
\node [anchor=south,inner sep=0.1pt,minimum height=0.15em,minimum width=1em] (b6) at ([xshift=0em,yshift=0.5em]c7.north) {};
{\scriptsize
\node [anchor=center] (n1) at ([xshift=0em,yshift=-1em]c2.south){\color{orange}Bush};
\node [anchor=west] (n2) at ([xshift=-0.2em,yshift=0em]n1.east){held};
\node [anchor=west] (n3) at ([xshift=0.35em,yshift=0em]n2.east){a};
\node [anchor=west] (n4) at ([xshift=0.5em,yshift=0em]n3.east){\color{blue!60}talk};
\node [anchor=west] (n5) at ([xshift=-0.3em,yshift=0em]n4.east){with};
\node [anchor=west] (n6) at ([xshift=-0.3em,yshift=0em]n5.east){Sharon};
}
%\node [anchor=west,show] (s1) at (-0.5em,-5.7em){};
%\node [anchor=west,show] (s2) at (-0.2em,-5.6em){};
%\node [anchor=west,show] (s3) at (1.1em,-5.5em){};
%\node [anchor=west,show] (s11) at (1.9em,-5em){};
%\node [anchor=west,show] (s4) at (3em,-4em){};
%\node [anchor=west,show] (s5) at (3.7em,-3em){};
%\node [anchor=west,show] (s12) at (4.2em,-2.2em){};
%\node [anchor=west,show] (s6) at (5.3em,-1.4em){};
%\node [anchor=west,show] (s7) at (6.3em,-2em){};
%\node [anchor=west,show] (s13) at (7.4em,-3em){};
%\node [anchor=west,show] (s8) at (8.4em,-3.9em){};
%\node [anchor=west,show] (s9) at (8.9em,-4.5em){};
%\node [anchor=west,show] (s10) at (9.3em,-5em){};
%\draw[-,blue!60,thick] (-0.5em,-5.7em)..controls (-0.2em,-5.6em) and (1.1em,-5.5em)..(1.9em,-5em)..controls (3em,-4em) and (3.7em,-3em)..(4.2em,-2.2em)..controls (5.3em,-1.4em) and (6.3em,-2em)..(7.4em,-3em)..controls (8.4em,-3.9em) and (8.9em,-4.5em)..(9.3em,-5em);
%\draw[-,blue!60,thick] (-1em,-6em)..controls (0em,-5.7em) and (1em,-5em)..(1.6em,-4.3em)..controls (5.3em,1em) and (7.4em,-2em)..(9.3em,-5em);
\draw[-,blue!60,thick] ([xshift=-1em,yshift=-4.7em]c1.south)..controls (3.8em,-6em) and (3.9em,3.6em)..([xshift=9.3em,yshift=-4.3em]c1.south);
\node [anchor=north] (l1) at ([xshift=1em,yshift=-1em]n3.south){\small {(b)高斯分布}};
\draw[-,very thick] ([xshift=1.2em,yshift=2.2em]b6.north)--([xshift=1.7em,yshift=2.2em]b6.north);
\draw[-,very thick] ([xshift=1.2em,yshift=1.9em]b6.north)--([xshift=1.7em,yshift=1.9em]b6.north);
\node [anchor=south] (t1) at ([xshift=0em,yshift=6.7em]n4.north){$D$};
\draw[->] ([xshift=0em,yshift=0em]t1.west)--([xshift=-1em,yshift=0em]t1.west);
\draw[->] ([xshift=0em,yshift=0em]t1.east)--([xshift=1em,yshift=0em]t1.east);
\draw[-] ([xshift=1em,yshift=-0.5em]t1.east)--([xshift=1em,yshift=0.5em]t1.east);
\draw[-] ([xshift=-1em,yshift=-0.5em]t1.west)--([xshift=-1em,yshift=0.5em]t1.west);
\end{scope}
\begin{scope}[xshift=8.8cm,yshift=0em]
\node [anchor=north west,cirnode] (c1) at (0, 0) {};
\draw[-,dotted] ([xshift=-1em,yshift=-0.5em]c1.south)--([xshift=9.3em,yshift=-0.5em]c1.south);
\draw[-,dotted] ([xshift=-1em,yshift=-2em]c1.south)--([xshift=9.3em,yshift=-2em]c1.south);
\draw[-,dotted] ([xshift=-1em,yshift=-3.5em]c1.south)--([xshift=9.3em,yshift=-3.5em]c1.south);
\draw[-,dotted] ([xshift=-1em,yshift=-5em]c1.south)--([xshift=9.3em,yshift=-5em]c1.south);
\node [anchor=north,cirnode] (c2) at ([xshift=0em,yshift=-5.5em]c1.south) {};
\node [anchor=west,cirnode] (c3) at ([xshift=0.6em,yshift=0em]c2.east) {};
\node [anchor=west,cirnode] (c4) at ([xshift=0.6em,yshift=0em]c3.east) {};
\node [anchor=west,cirnode] (c5) at ([xshift=0.6em,yshift=0em]c4.east) {};
\node [anchor=west,cirnode] (c6) at ([xshift=0.6em,yshift=0em]c5.east) {};
\node [anchor=west,cirnode] (c7) at ([xshift=0.6em,yshift=0em]c6.east) {};
\node [anchor=south,colnode,minimum height=0.15em,minimum width=1em] (b1) at ([xshift=0em,yshift=0.5em]c2.north) {};
\node [anchor=south,colnode,minimum height=4.2em,minimum width=1em] (b2) at ([xshift=0em,yshift=0.5em]c3.north) {};
\node [anchor=south,colnode,minimum height=3.7em,minimum width=1em] (b3) at ([xshift=0em,yshift=0.5em]c4.north) {};
\node [anchor=south,colnode,minimum height=4.2em,minimum width=1em] (b4) at ([xshift=0em,yshift=0.5em]c5.north) {};
\node [anchor=south,colnode,minimum height=0.8em,minimum width=1em] (b5) at ([xshift=0em,yshift=0.5em]c6.north) {};
\node [anchor=south,colnode,minimum height=0.15em,minimum width=1em] (b6) at ([xshift=0em,yshift=0.5em]c7.north) {};
{\scriptsize
\node [anchor=center] (n1) at ([xshift=0em,yshift=-1em]c2.south){\color{orange}Bush};
\node [anchor=west] (n2) at ([xshift=-0.2em,yshift=0em]n1.east){\color{ugreen!30}held};
\node [anchor=west] (n3) at ([xshift=0.35em,yshift=0em]n2.east){\color{ugreen!30}a};
\node [anchor=west] (n4) at ([xshift=0.5em,yshift=0em]n3.east){\color{ugreen!30}talk};
\node [anchor=west] (n5) at ([xshift=-0.3em,yshift=0em]n4.east){with};
\node [anchor=west] (n6) at ([xshift=-0.3em,yshift=0em]n5.east){Sharon};
}
\node [anchor=north] (l1) at ([xshift=1em,yshift=-1em]n3.south){\small {(c)修改后的分布}};
\end{scope}
\end{tikzpicture}
\end{center}
\ No newline at end of file
\begin{tikzpicture}
\tikzstyle{elementnode} = [anchor=center,draw,minimum size=0.6em,inner sep=0.1pt,gray!80]
\begin{scope}[scale=1.0]
\foreach \i / \j in
{0/4, 1/4, 2/4, 3/4, 4/4, 5/4, 6/4, 7/4,
0/3, 1/3, 2/3, 3/3, 4/3, 5/3, 6/3, 7/3,
0/2, 1/2, 2/2, 3/2, 4/2, 5/2, 6/2, 7/2,
0/1, 1/1, 2/1, 3/1, 4/1, 5/1, 6/1, 7/1,
0/0, 1/0, 2/0, 3/0, 4/0, 5/0, 6/0, 7/0}
\node[elementnode] (a\i\j) at (0.6em*\i,0.6em*\j) {};
\foreach \i / \j in
{0/4, 1/4, 2/4, 3/4, 4/4, 5/4, 6/4, 7/4,
0/3, 1/3, 2/3, 3/3, 4/3, 5/3, 6/3, 7/3,
0/2, 1/2, 2/2, 3/2, 4/2, 5/2, 6/2, 7/2,
0/1, 1/1, 2/1, 3/1, 4/1, 5/1, 6/1, 7/1,
0/0, 1/0, 2/0, 3/0, 4/0, 5/0, 6/0, 7/0}
\node[elementnode,fill=gray!50] (b\i\j) at (0.6em*\i+5.5em,0.6em*\j) {};
\node [anchor=south west,minimum height=0.5em,minimum width=4.8em,inner sep=0.1pt,very thick,blue!60,draw] (n1) at ([xshift=0em,yshift=0em]a01.south west) {};
\node [anchor=north west,minimum height=0.5em,minimum width=4.8em,inner sep=0.1pt,very thick,red!60,draw] (n2) at ([xshift=0em,yshift=0em]a02.north west) {};
\node [anchor=west,minimum height=0.6em,minimum width=0.6em,inner sep=0.1pt,very thick,blue!60,draw] (n3) at ([xshift=0em,yshift=0em]b21.west) {};
\node [anchor=west,minimum height=0.6em,minimum width=0.6em,inner sep=0.1pt,very thick,red!60,draw] (n4) at ([xshift=0em,yshift=0em]b42.west) {};
\draw [-,very thick,dotted,blue!60] ([xshift=0em,yshift=0em]n1.south east) -- ([xshift=0em,yshift=0em]n3.south west);
\draw [-,very thick,dotted,blue!60] ([xshift=0em,yshift=0em]n1.north east) -- ([xshift=0em,yshift=0em]n3.north west);
\draw [-,very thick,dotted,red!60] ([xshift=0em,yshift=0em]n2.south east) -- ([xshift=0em,yshift=0em]n4.south west);
\draw [-,very thick,dotted,red!60] ([xshift=0em,yshift=0em]n2.north east) -- ([xshift=0em,yshift=0em]n4.north west);
\node [anchor=north] (l1) at ([xshift=0.5em,yshift=-1em]a70.south){\footnotesize {(a)原始SAN}};
\node [anchor=south,rotate=90] (l2) at ([xshift=0em,yshift=0em]a02.west){\scriptsize {注意力头}};
\node [anchor=south] (l2) at ([xshift=0em,yshift=0em]a44.north){\scriptsize {句子长度}};
\end{scope}
\begin{scope}[scale=1.0,xshift=4.6cm]
\foreach \i / \j in
{0/4, 1/4, 2/4, 3/4, 4/4, 5/4, 6/4, 7/4,
0/3, 1/3, 2/3, 3/3, 4/3, 5/3, 6/3, 7/3,
0/2, 1/2, 2/2, 3/2, 4/2, 5/2, 6/2, 7/2,
0/1, 1/1, 2/1, 3/1, 4/1, 5/1, 6/1, 7/1,
0/0, 1/0, 2/0, 3/0, 4/0, 5/0, 6/0, 7/0}
\node[elementnode] (a\i\j) at (0.6em*\i,0.6em*\j) {};
\foreach \i / \j in
{0/4, 1/4, 2/4, 3/4, 4/4, 5/4, 6/4, 7/4,
0/3, 1/3, 2/3, 3/3, 4/3, 5/3, 6/3, 7/3,
0/2, 1/2, 2/2, 3/2, 4/2, 5/2, 6/2, 7/2,
0/1, 1/1, 2/1, 3/1, 4/1, 5/1, 6/1, 7/1,
0/0, 1/0, 2/0, 3/0, 4/0, 5/0, 6/0, 7/0}
\node[elementnode,fill=gray!50] (b\i\j) at (0.6em*\i+5.5em,0.6em*\j) {};
\node [anchor=south west,minimum height=0.5em,minimum width=3em,inner sep=0.1pt,very thick,blue!60,draw] (n1) at ([xshift=0em,yshift=0em]a01.south west) {};
\node [anchor=north west,minimum height=0.5em,minimum width=3em,inner sep=0.1pt,very thick,red!60,draw] (n2) at ([xshift=0em,yshift=0em]a22.north west) {};
\node [anchor=west,minimum height=0.6em,minimum width=0.6em,inner sep=0.1pt,very thick,blue!60,draw] (n3) at ([xshift=0em,yshift=0em]b21.west) {};
\node [anchor=west,minimum height=0.6em,minimum width=0.6em,inner sep=0.1pt,very thick,red!60,draw] (n4) at ([xshift=0em,yshift=0em]b42.west) {};
\draw [-,very thick,dotted,blue!60] ([xshift=0em,yshift=0em]n1.south east) -- ([xshift=0em,yshift=0em]n3.south west);
\draw [-,very thick,dotted,blue!60] ([xshift=0em,yshift=0em]n1.north east) -- ([xshift=0em,yshift=0em]n3.north west);
\draw [-,very thick,dotted,red!60] ([xshift=0em,yshift=0em]n2.south east) -- ([xshift=0em,yshift=0em]n4.south west);
\draw [-,very thick,dotted,red!60] ([xshift=0em,yshift=0em]n2.north east) -- ([xshift=0em,yshift=0em]n4.north west);
\node [anchor=north] (l1) at ([xshift=0.5em,yshift=-1em]a70.south){\footnotesize {(b)一维卷积SAN}};
\node [anchor=south,rotate=90] (l2) at ([xshift=0em,yshift=0em]a02.west){\scriptsize {注意力头}};
\node [anchor=south] (l2) at ([xshift=0em,yshift=0em]a44.north){\scriptsize {句子长度}};
\end{scope}
\begin{scope}[scale=1.0,xshift=9.2cm]
\foreach \i / \j in
{0/4, 1/4, 2/4, 3/4, 4/4, 5/4, 6/4, 7/4,
0/3, 1/3, 2/3, 3/3, 4/3, 5/3, 6/3, 7/3,
0/2, 1/2, 2/2, 3/2, 4/2, 5/2, 6/2, 7/2,
0/1, 1/1, 2/1, 3/1, 4/1, 5/1, 6/1, 7/1,
0/0, 1/0, 2/0, 3/0, 4/0, 5/0, 6/0, 7/0}
\node[elementnode] (a\i\j) at (0.6em*\i,0.6em*\j) {};
\foreach \i / \j in
{0/4, 1/4, 2/4, 3/4, 4/4, 5/4, 6/4, 7/4,
0/3, 1/3, 2/3, 3/3, 4/3, 5/3, 6/3, 7/3,
0/2, 1/2, 2/2, 3/2, 4/2, 5/2, 6/2, 7/2,
0/1, 1/1, 2/1, 3/1, 4/1, 5/1, 6/1, 7/1,
0/0, 1/0, 2/0, 3/0, 4/0, 5/0, 6/0, 7/0}
\node[elementnode,fill=gray!50] (b\i\j) at (0.6em*\i+5.5em,0.6em*\j) {};
\node [anchor=south west,minimum height=1.8em,minimum width=3em,inner sep=0.1pt,very thick,blue!60,draw] (n1) at ([xshift=0em,yshift=0em]a00.south west) {};
\node [anchor=north west,minimum height=1.8em,minimum width=3em,inner sep=0.1pt,very thick,red!60,draw] (n2) at ([xshift=0em,yshift=0em]a23.north west) {};
\node [anchor=west,minimum height=0.6em,minimum width=0.6em,inner sep=0.1pt,very thick,blue!60,draw] (n3) at ([xshift=0em,yshift=0em]b21.west) {};
\node [anchor=west,minimum height=0.6em,minimum width=0.6em,inner sep=0.1pt,very thick,red!60,draw] (n4) at ([xshift=0em,yshift=0em]b42.west) {};
\draw [-,very thick,dotted,blue!60] ([xshift=0em,yshift=0em]n1.south east) -- ([xshift=0em,yshift=0em]n3.south west);
\draw [-,very thick,dotted,blue!60] ([xshift=0em,yshift=0em]n1.north east) -- ([xshift=0em,yshift=0em]n3.north west);
\draw [-,very thick,dotted,red!60] ([xshift=0em,yshift=0em]n2.south east) -- ([xshift=0em,yshift=0em]n4.south west);
\draw [-,very thick,dotted,red!60] ([xshift=0em,yshift=0em]n2.north east) -- ([xshift=0em,yshift=0em]n4.north west);
\node [anchor=north] (l1) at ([xshift=0.5em,yshift=-1em]a70.south){\footnotesize {(c)二维卷积SAN}};
\node [anchor=south,rotate=90] (l2) at ([xshift=0em,yshift=0em]a02.west){\scriptsize {注意力头}};
\node [anchor=south] (l2) at ([xshift=0em,yshift=0em]a44.north){\scriptsize {句子长度}};
\end{scope}
\end{tikzpicture}
\ No newline at end of file
\begin{tikzpicture}
\tikzstyle{unit}=[draw,rounded corners=2pt,drop shadow,font=\tiny]
%left
\begin{scope}
\foreach \x/\d in {1/2em, 2/8em, 3/18em, 4/24em}
\node[unit,fill=yellow!20] at (0,\d) (ln_\x) {层正则};
\foreach \x/\d in {1/4em, 2/20em}
\node[unit,fill=green!20] at (0,\d) (sa_\x) {8头自注意力:512};
\foreach \x/\d in {1/6em, 2/16em, 3/22em, 4/32em}
\node[draw,circle,minimum size=1em,inner sep=1pt] at (0,\d) (add_\x) {\scriptsize\bfnew{+}};
\foreach \x/\d in {2/14em, 4/30em}
\node[unit,fill=red!20] at (0,\d) (conv_\x) {卷积$1 \times 1$:512};
\foreach \x/\d in {1/10em,3/26em}
\node[unit,fill=red!20] at (0,\d) (conv_\x) {卷积$1 \times 1$:2048};
\foreach \x/\d in {1/12em, 2/28em}
\node[unit,fill=blue!20] at (0,\d) (relu_\x) {RELU};
\draw[->,thick] ([yshift=-1.4em]ln_1.-90) -- ([yshift=-0.1em]ln_1.-90);
\draw[->,thick] ([yshift=0.1em]ln_1.90) -- ([yshift=-0.1em]sa_1.-90);
\draw[->,thick] ([yshift=0.1em]sa_1.90) -- ([yshift=-0.1em]add_1.-90);
\draw[->,thick] ([yshift=0.1em]add_1.90) -- ([yshift=-0.1em]ln_2.-90);
\draw[->,thick] ([yshift=0.1em]ln_2.90) -- ([yshift=-0.1em]conv_1.-90);
\draw[->,thick] ([yshift=0.1em]conv_1.90) -- ([yshift=-0.1em]relu_1.-90);
\draw[->,thick] ([yshift=0.1em]relu_1.90) -- ([yshift=-0.1em]conv_2.-90);
\draw[->,thick] ([yshift=0.1em]conv_2.90) -- ([yshift=-0.1em]add_2.-90);
\draw[->,thick] ([yshift=0.1em]add_2.90) -- ([yshift=-0.1em]ln_3.-90);
\draw[->,thick] ([yshift=0.1em]ln_3.90) -- ([yshift=-0.1em]sa_2.-90);
\draw[->,thick] ([yshift=0.1em]sa_2.90) -- ([yshift=-0.1em]add_3.-90);
\draw[->,thick] ([yshift=0.1em]add_3.90) -- ([yshift=-0.1em]ln_4.-90);
\draw[->,thick] ([yshift=0.1em]ln_4.90) -- ([yshift=-0.1em]conv_3.-90);
\draw[->,thick] ([yshift=0.1em]conv_3.90) -- ([yshift=-0.1em]relu_2.-90);
\draw[->,thick] ([yshift=0.1em]relu_2.90) -- ([yshift=-0.1em]conv_4.-90);
\draw[->,thick] ([yshift=0.1em]conv_4.90) -- ([yshift=-0.1em]add_4.-90);
\draw[->,thick] ([yshift=0.1em]add_4.90) -- ([yshift=1em]add_4.90);
\draw[->,thick] ([yshift=-0.8em]ln_1.-90) .. controls ([xshift=5em,yshift=-0.8em]ln_1.-90) and ([xshift=5em]add_1.0) .. (add_1.0);
\draw[->,thick] (add_1.0) .. controls ([xshift=5em]add_1.0) and ([xshift=5em]add_2.0) .. (add_2.0);
\draw[->,thick] (add_2.0) .. controls ([xshift=5em]add_2.0) and ([xshift=5em]add_3.0) .. (add_3.0);
\draw[->,thick] (add_3.0) .. controls ([xshift=5em]add_3.0) and ([xshift=5em]add_4.0) .. (add_4.0);
\node[font=\scriptsize] at (0em, -1em){(a) Transformer编码器中若干块的结构};
\end{scope}
%right
\begin{scope}[xshift=14em]
\foreach \x/\d in {1/2em, 2/8em, 3/16em, 4/22em, 5/28em}
\node[unit,fill=yellow!20] at (0,\d) (ln_\x) {层正则};
\node[unit,fill=green!20] at (0,24em) (sa_1) {8头自注意力:512};
\foreach \x/\d in {1/6em, 2/14em, 3/20em, 4/26em, 5/36em}
\node[draw,circle,minimum size=1em,inner sep=1pt] at (0,\d) (add_\x) {\scriptsize\bfnew{+}};
\node[unit,fill=red!20] at (0,30em) (conv_4) {卷积$1 \times 1$:2048};
\node[unit,fill=red!20] at (0,34em) (conv_5) {卷积$1 \times 1$:512};
\node[unit,fill=blue!20] at (0,32em) (relu_3) {RELU};
\node[unit,fill=red!20] at (0,4em) (glu_1) {门控线性单元:512};
\node[unit,fill=red!20] at (-3em,10em) (conv_1) {卷积$1 \times 1$:2048};
\node[unit,fill=cyan!20] at (3em,10em) (conv_2) {卷积$3 \times 1$:256};
\node[unit,fill=blue!20] at (-3em,12em) (relu_1) {RELU};
\node[unit,fill=blue!20] at (3em,12em) (relu_2) {RELU};
\node[unit,fill=cyan!20] at (0em,18em) (conv_3) {Sep卷积$9 \times 1$:256};
\draw[->,thick] ([yshift=-1.4em]ln_1.-90) -- ([yshift=-0.1em]ln_1.-90);
\draw[->,thick] ([yshift=0.1em]ln_1.90) -- ([yshift=-0.1em]glu_1.-90);
\draw[->,thick] ([yshift=0.1em]glu_1.90) -- ([yshift=-0.1em]add_1.-90);
\draw[->,thick] ([yshift=0.1em]add_1.90) -- ([yshift=-0.1em]ln_2.-90);
\draw[->,thick] ([,yshift=0.1em]ln_2.135) -- ([yshift=-0.1em]conv_1.-90);
\draw[->,thick] ([yshift=0.1em]ln_2.45) -- ([yshift=-0.1em]conv_2.-90);
\draw[->,thick] ([yshift=0.1em]conv_1.90) -- ([yshift=-0.1em]relu_1.-90);
\draw[->,thick] ([yshift=0.1em]conv_2.90) -- ([yshift=-0.1em]relu_2.-90);
\draw[->,thick] ([yshift=0.1em]relu_1.90) -- ([yshift=-0.1em]add_2.-135);
\draw[->,thick] ([yshift=0.1em]relu_2.90) -- ([yshift=-0.1em]add_2.-45);
\draw[->,thick] ([yshift=0.1em]add_2.90) -- ([yshift=-0.1em]ln_3.-90);
\draw[->,thick] ([yshift=0.1em]ln_3.90) -- ([yshift=-0.1em]conv_3.-90);
\draw[->,thick] ([yshift=0.1em]conv_3.90) -- ([yshift=-0.1em]add_3.-90);
\draw[->,thick] ([yshift=0.1em]add_3.90) -- ([yshift=-0.1em]ln_4.-90);
\draw[->,thick] ([yshift=0.1em]ln_4.90) -- ([yshift=-0.1em]sa_1.-90);
\draw[->,thick] ([yshift=0.1em]sa_1.90) -- ([yshift=-0.1em]add_4.-90);
\draw[->,thick] ([yshift=0.1em]add_4.90) -- ([yshift=-0.1em]ln_5.-90);
\draw[->,thick] ([yshift=0.1em]ln_5.90) -- ([yshift=-0.1em]conv_4.-90);
\draw[->,thick] ([yshift=0.1em]conv_4.90) -- ([yshift=-0.1em]relu_3.-90);
\draw[->,thick] ([yshift=0.1em]relu_3.90) -- ([yshift=-0.1em]conv_5.-90);
\draw[->,thick] ([yshift=0.1em]conv_5.90) -- ([yshift=-0.1em]add_5.-90);
\draw[->,thick] ([yshift=0.1em]add_5.90) -- ([yshift=1em]add_5.90);
\draw[->,thick] ([yshift=-0.8em]ln_1.-90) .. controls ([xshift=5em,yshift=-0.8em]ln_1.-90) and ([xshift=5em]add_1.0) .. (add_1.0);
\draw[->,thick] (add_1.0) .. controls ([xshift=8em]add_1.0) and ([xshift=8em]add_3.0) .. (add_3.0);
\draw[->,thick] (add_3.0) .. controls ([xshift=5em]add_3.0) and ([xshift=5em]add_4.0) .. (add_4.0);
\draw[->,thick] (add_4.0) .. controls ([xshift=5em]add_4.0) and ([xshift=5em]add_5.0) .. (add_5.0);
\node[font=\scriptsize,align=center] at (0em, -1.5em){(b) 使用结构搜索方法优化后的 \\ Transformer编码器中若干块的结构};
\node[minimum size=0.8em,inner sep=0pt,rounded corners=1pt,draw,fill=blue!20] (act) at (5.5em, 38em){};
\node[anchor=west,font=\footnotesize] at ([xshift=0.1em]act.east){激活函数};
\node[anchor=north,minimum size=0.8em,inner sep=0pt,rounded corners=1pt,draw,fill=yellow!20] (nor) at ([yshift=-0.6em]act.south){};
\node[anchor=west,font=\footnotesize] at ([xshift=0.1em]nor.east){正则化};
\node[anchor=north,minimum size=0.8em,inner sep=0pt,rounded corners=1pt,draw,fill=cyan!20] (wc) at ([yshift=-0.6em]nor.south){};
\node[anchor=west,font=\footnotesize] at ([xshift=0.1em]wc.east){宽卷积};
\node[anchor=north,minimum size=0.8em,inner sep=0pt,rounded corners=1pt,draw,fill=green!20] (at) at ([yshift=-0.6em]wc.south){};
\node[anchor=west,font=\footnotesize] (tag) at ([xshift=0.1em]at.east){注意力机制};
\node[anchor=north,minimum size=0.8em,inner sep=0pt,rounded corners=1pt,draw,fill=red!20] (nsl) at ([yshift=-0.6em]at.south){};
\node[anchor=west,font=\footnotesize] at ([xshift=0.1em]nsl.east){非空间层};
\begin{pgfonlayer}{background}
\node[draw,drop shadow,fill=white][fit=(act)(nsl)(tag)]{};
\end{pgfonlayer}
\end{scope}
\end{tikzpicture}
\ No newline at end of file
\begin{tikzpicture}
\begin{scope}
\tikzstyle{hnode}=[rectangle,inner sep=0mm,minimum height=2em,minimum width=4.5em,rounded corners=5pt,fill=ugreen!30]
\tikzstyle{tnode}=[rectangle,inner sep=0mm,minimum height=2em,minimum width=4.5em,rounded corners=5pt,fill=red!30]
\tikzstyle{wnode}=[inner sep=0mm,minimum height=1.4em,minimum width=4.4em]
\node [anchor=south west,hnode] (n1) at (0,0) {$\mathbi{h}_1$};
\node [anchor=west,hnode] (n2) at ([xshift=1em,yshift=0em]n1.east) {$\mathbi{h}_2$};
\node [anchor=west,hnode] (n3) at ([xshift=1em,yshift=0em]n2.east) {$\mathbi{h}_3$};
\node [anchor=west,hnode] (n4) at ([xshift=1em,yshift=0em]n3.east) {$\cdots$};
\node [anchor=west,hnode] (n5) at ([xshift=1em,yshift=0em]n4.east) {$\mathbi{h}_n$};
\node [anchor=south,tnode] (t1) at ([xshift=2.8em,yshift=1em]n1.north) {$\mathbi{h}_{n+1}$};
\node [anchor=south,tnode] (t2) at ([xshift=2.8em,yshift=1em]t1.north) {$\mathbi{h}_{n+2}$};
\node [anchor=south,tnode] (t3) at ([xshift=2.8em,yshift=1em]t2.north) {$\cdots$};
\node [anchor=south,tnode] (t4) at ([xshift=2.8em,yshift=1em]t3.north) {$\mathbi{h}_{2n-1}$};
\draw [->,thick] ([xshift=0em,yshift=0em]n1.east) -- ([xshift=0em,yshift=0em]n2.west);
\draw [->,thick] ([xshift=0em,yshift=0em]n2.east) -- ([xshift=0em,yshift=0em]n3.west);
\draw [->,thick] ([xshift=0em,yshift=0em]n3.east) -- ([xshift=0em,yshift=0em]n4.west);
\draw [->,thick] ([xshift=0em,yshift=0em]n4.east) -- ([xshift=0em,yshift=0em]n5.west);
\draw [->,thick] ([xshift=0em,yshift=0em]n1.north) -- ([xshift=0em,yshift=0em]t1.south);
\draw [->,thick] ([xshift=0em,yshift=0em]n2.north) -- ([xshift=0em,yshift=0em]t1.south);
\draw [->,thick] ([xshift=0em,yshift=0em]t1.north) -- ([xshift=0em,yshift=0em]t2.south);
\draw [->,thick] ([xshift=0em,yshift=0em]n3.north) -- ([xshift=0em,yshift=0em]t2.south);
\draw [->,thick] ([xshift=0em,yshift=0em]t2.north) -- ([xshift=0em,yshift=0em]t3.south);
\draw [->,thick] ([xshift=0em,yshift=0em]n4.north) -- ([xshift=0em,yshift=0em]t3.south);
\draw [->,thick] ([xshift=0em,yshift=0em]t3.north) -- ([xshift=0em,yshift=0em]t4.south);
\draw [->,thick] ([xshift=0em,yshift=0em]n5.north) -- ([xshift=0em,yshift=0em]t4.south);
\end{scope}
\end{tikzpicture}
\ No newline at end of file
\begin{tikzpicture}
\node[rounded corners=4pt, minimum width=10.4em, minimum height=7em,fill=yellow!15!gray!15] (box1) at (0em,0em){};
\node[anchor=west,rounded corners=4pt, minimum width=10.4em, minimum height=7em,fill=yellow!15!gray!15] (box2) at ([xshift=2.8em]box1.east){};
\node[anchor=west,rounded corners=4pt, minimum width=10.4em, minimum height=7em,fill=yellow!15!gray!15] (box3) at ([xshift=2.8em]box2.east){};
\draw[densely dotted,line width=1.2pt] ([xshift=0.8em]box1.90) -- ([xshift=0.8em]box1.-90);
\draw[densely dotted,line width=1.2pt] ([xshift=0.8em]box2.90) -- ([xshift=0.8em]box2.-90);
\draw[densely dotted,line width=1.2pt] ([xshift=0.8em]box3.90) -- ([xshift=0.8em]box3.-90);
\node[anchor=west,draw,rounded corners=2pt, minimum width=5em, minimum height=3em,font=\scriptsize,align=center,inner sep=1pt,fill=yellow!10] (n1) at ([xshift=0.5em]box1.west){机器学习算法:\\决策树、支持 \\ 向量机$\cdots$};
\node[anchor=west,draw,rounded corners=2pt, minimum width=5em, minimum height=3em,font=\scriptsize,align=center,inner sep=1pt,fill=yellow!10] (n2) at ([xshift=0.5em]box2.west){神经网络:\\RNN、CNN、 \\ Transformer$\cdots$};
\node[anchor=west,draw,rounded corners=2pt, minimum width=5em, minimum height=3em,font=\scriptsize,align=center,inner sep=1pt,fill=ugreen!10] (n3) at ([xshift=0.5em]box3.west){神经网络:\\RNN、CNN、 \\ Transformer$\cdots$};
\foreach \x/\c in {1/yellow,2/ugreen,3/ugreen}{
\node[anchor=north,font=\scriptsize,inner ysep=0.1em] (output_\x)at ([xshift=-2.2em,yshift=-0.5em]box\x.north){输出};
\node[anchor=north,inner ysep=0.1em] at ([xshift=3em,yshift=-0.5em]box\x.north){\scriptsize\bfnew{执行步骤}};
\node[anchor=south,font=\scriptsize,inner ysep=0.1em,fill=\c!10,rounded corners=2pt] at ([xshift=-2.2em,yshift=0.5em]box\x.south)(input_\x){输入};
\draw[->,thick] (input_\x.90) -- (n\x.-90);
\draw[->,thick] (n\x.90) -- (output_\x.-90);
}
\node[anchor=east,font=\scriptsize,align=center,inner xsep=0pt] at ([xshift=-0.2em]box1.east){1.特征提取;\\2.模型设计; \\3.实验验证。};
\node[anchor=east,font=\scriptsize,align=center,inner xsep=0pt] at ([xshift=-0.2em]box2.east){1.模型设计; \\2.实验验证。\\ };
\node[anchor=east,font=\scriptsize,align=center,inner xsep=0pt] at ([xshift=-0.2em]box3.east){1.实验验证。 \\ \\};
\node [draw,thick,anchor=west,single arrow,minimum height=1.6em,single arrow head extend=0.4em] at ([xshift=0.6em]box1.east) {};
\node [draw,thick,anchor=west,single arrow,minimum height=1.6em,single arrow head extend=0.4em] at ([xshift=0.6em]box2.east) {};
\node[font=\footnotesize, anchor=north] at ([yshift=-0.1em]box1.south){传统机器学习};
\node[font=\footnotesize, anchor=north] at ([yshift=-0.1em]box2.south){深度学习};
\node[font=\footnotesize, anchor=north] at ([yshift=-0.1em]box3.south){深度学习\&网络结构搜索};
\end{tikzpicture}
\ No newline at end of file
\begin{tikzpicture}
\begin{scope}
\tikzstyle{cirnode}=[circle,minimum size=3.7em,draw]
\tikzstyle{recnode}=[rectangle,rounded corners=2pt,inner sep=0mm,minimum height=1.5em,minimum width=4em,draw]
\node [anchor=west,cirnode] (n1) at (0, 0) {$\mathbi{h}_{i-2}^l$};
\node [anchor=west,cirnode] (n2) at ([xshift=1em,yshift=0em]n1.east) {$\mathbi{h}_{i-1}^l$};
\node [anchor=west,cirnode] (n3) at ([xshift=1em,yshift=0em]n2.east) {$\mathbi{h}_{i}^l$};
\node [anchor=west,cirnode] (n4) at ([xshift=1em,yshift=0em]n3.east) {$\mathbi{h}_{i+1}^l$};
\node [anchor=west,cirnode] (n5) at ([xshift=1em,yshift=0em]n4.east) {$\mathbi{h}_{i+2}^l$};
\node [anchor=center,blue!30,minimum height=4.2em,minimum width=4.5em,very thick,draw] (c1) at ([xshift=0em,yshift=0em]n3.center) {};
\node [anchor=center,ugreen!30,minimum height=4.9em,minimum width=14.5em,very thick,draw] (c2) at ([xshift=0em,yshift=0em]n3.center) {};
\node [anchor=center,red!30,minimum height=5.6em,minimum width=24.5em,very thick,draw] (c3) at ([xshift=0em,yshift=0em]n3.center) {};
\node [anchor=south,recnode] (r1) at ([xshift=0em,yshift=2.5em]n2.north) {$\textrm{head}_1$};
\node [anchor=south,recnode] (r2) at ([xshift=0em,yshift=2.5em]n3.north) {$\textrm{head}_2$};
\node [anchor=south,recnode] (r3) at ([xshift=0em,yshift=2.5em]n4.north) {$\textrm{head}_3$};
\node [anchor=south,cirnode] (n6) at ([xshift=0em,yshift=1em]r2.north) {$\mathbi{h}_{i}^{l+1}$};
\draw [->,very thick,blue!30] ([xshift=0em,yshift=0em]c1.north) -- ([xshift=0em,yshift=0em]r2.south);
\draw [->,very thick,ugreen!30] ([xshift=4.73em,yshift=0em]c2.north) -- ([xshift=0em,yshift=0em]r3.south);
\draw [->,very thick,red!30] ([xshift=-4.73em,yshift=0em]c3.north) -- ([xshift=0em,yshift=0em]r1.south);
\draw [->] ([xshift=0em,yshift=0em]r1.north) -- ([xshift=0em,yshift=0em]n6.south west);
\draw [->] ([xshift=0em,yshift=0em]r2.north) -- ([xshift=0em,yshift=0em]n6.south);
\draw [->] ([xshift=0em,yshift=0em]r3.north) -- ([xshift=0em,yshift=0em]n6.south east);
\end{scope}
\end{tikzpicture}
\ No newline at end of file
\begin{tikzpicture}
\begin{scope}
\tikzstyle{encnode}=[rectangle,inner sep=0mm,minimum height=2em,minimum width=4.5em,rounded corners=5pt,thick]
\tikzstyle{decnode}=[rectangle,inner sep=0mm,minimum height=2em,minimum width=4.5em,rounded corners=5pt,thick]
\tikzstyle{cnode}=[rectangle,draw=teal!80, inner sep=0mm,minimum height=1.4em,minimum width=4.4em,fill=teal!17,rounded corners=2pt,thick]
\node [anchor=north,encnode] (n1) at (0, 0) {编码器};
\node [anchor=north,rectangle,minimum height=1.5em,minimum width=2.5em,rounded corners=5pt] (n2) at ([xshift=0em,yshift=-0.2em]n1.south) {$\mathbi{X}$};
\node [anchor=west,encnode,draw=red!60!black!80,fill=red!20] (n3) at ([xshift=1.5em,yshift=0em]n2.east) {$\mathbi{h}_0$};
\node [anchor=west,encnode,draw=red!60!black!80,fill=red!20] (n4) at ([xshift=1.5em,yshift=0em]n3.east) {$\mathbi{h}_1$};
\node [anchor=west,encnode,draw=red!60!black!80,fill=red!20] (n5) at ([xshift=1.5em,yshift=0em]n4.east) {$\mathbi{h}_2$};
\node [anchor=west,rectangle,minimum height=1.5em,minimum width=2.5em,rounded corners=5pt] (n6) at ([xshift=1em,yshift=0em]n5.east) {$\ldots$};
\node [anchor=west,encnode,draw=red!60!black!80,fill=red!20] (n7) at ([xshift=1em,yshift=0em]n6.east) {$\mathbi{h}_{N-1}$};
\node [anchor=north,cnode] (cn1) at ([xshift=0em,yshift=-2.8em]n4.south) {\footnotesize{权重聚合$\mathbi{g}$}};
\node [anchor=north,cnode,opacity=0.5] (cn2) at ([xshift=0em,yshift=-2.8em]n3.south) {\footnotesize{权重聚合$\mathbi{g}$}};
\node [anchor=north,cnode,opacity=0.5] (cn3) at ([xshift=0em,yshift=-2.8em]n5.south) {\footnotesize{权重聚合$\mathbi{g}$}};
\node [anchor=north,cnode,opacity=0.5] (cn4) at ([xshift=0em,yshift=-2.8em]n7.south) {\footnotesize{权重聚合$\mathbi{g}$}};
\node [anchor=west,decnode] (n9) at ([xshift=0em,yshift=-7.2em]n1.west) {解码器};
\node [anchor=north,rectangle,minimum height=1.5em,minimum width=2.5em,rounded corners=5pt] (n10) at ([xshift=0em,yshift=-0.2em]n9.south) {$\mathbi{y}_{<j}$};
\node [anchor=west,decnode,draw=ublue,fill=blue!10] (n11) at ([xshift=1.5em,yshift=0em]n10.east) {$\mathbi{s}_j^0$};
\node [anchor=west,decnode,draw=ublue,fill=blue!10] (n12) at ([xshift=1.5em,yshift=0em]n11.east) {$\mathbi{s}_j^1$};
\node [anchor=west,decnode,draw=ublue,fill=blue!10] (n13) at ([xshift=1.5em,yshift=0em]n12.east) {$\mathbi{s}_j^2$};
\node [anchor=west,rectangle,minimum height=1.5em,minimum width=2.5em,rounded corners=5pt] (n14) at ([xshift=1em,yshift=0em]n13.east) {$\ldots$};
\node [anchor=west,decnode,draw=ublue,fill=blue!10] (n15) at ([xshift=1em,yshift=0em]n14.east) {$\mathbi{s}_j^{M-1}$};
\node [anchor=west,rectangle,minimum height=1.5em,minimum width=2.5em,rounded corners=5pt] (n16) at ([xshift=1.5em,yshift=0em]n15.east) {$\mathbi{y}_{j}$};
\node [anchor=south,minimum height=1.5em,minimum width=2.5em] (n17) at ([xshift=0em,yshift=6em]n16.north) {};
\node [anchor=west,minimum height=0.5em,minimum width=4em] (n20) at ([xshift=0em,yshift=-2.5em]n2.east) {};
\node [anchor=west,minimum height=0.5em,minimum width=4em] (n21) at ([xshift=0em,yshift=-3.9em]n2.east) {};
\node [anchor=north,minimum height=0.5em,minimum width=4em] (n22) at ([xshift=0em,yshift=-0.7em]n11.south) {};
\begin{pgfonlayer}{background}
{
\node[rectangle,inner sep=2pt,fill=blue!7] [fit = (n1) (n7) (n17) (n20)] (bg1) {};
\node[rectangle,inner sep=2pt,fill=red!7] [fit = (n9) (n16) (n13) (n21) (n22)] (bg2) {};
}
\end{pgfonlayer}
\draw [->,thick] ([xshift=0em,yshift=0em]n2.east) -- ([xshift=0em,yshift=0em]n3.west);
\draw [->,thick] ([xshift=0em,yshift=0em]n3.east) -- ([xshift=0em,yshift=0em]n4.west);
\draw [->,thick] ([xshift=0em,yshift=0em]n4.east) -- ([xshift=0em,yshift=0em]n5.west);
\draw [->,thick] ([xshift=0em,yshift=0em]n5.east) -- ([xshift=0em,yshift=0em]n6.west);
\draw [->,thick] ([xshift=0em,yshift=0em]n6.east) -- ([xshift=0em,yshift=0em]n7.west);
\draw [->,thick] ([xshift=0em,yshift=0em]n10.east) -- ([xshift=0em,yshift=0em]n11.west);
\draw [->,thick] ([xshift=0em,yshift=0em]n11.east) -- ([xshift=0em,yshift=0em]n12.west);
\draw [->,thick] ([xshift=0em,yshift=0em]n12.east) -- ([xshift=0em,yshift=0em]n13.west);
\draw [->,thick] ([xshift=0em,yshift=0em]n13.east) -- ([xshift=0em,yshift=0em]n14.west);
\draw [->,thick] ([xshift=0em,yshift=0em]n14.east) -- ([xshift=0em,yshift=0em]n15.west);
\draw [->,thick] ([xshift=0em,yshift=0em]n15.east) -- ([xshift=0em,yshift=0em]n16.west);
\draw [->,thick,gray!70,opacity=0.5] ([xshift=0em,yshift=0em]cn2.south) -- ([xshift=0em,yshift=0em]n11.north);
\draw [->,thick] ([xshift=0em,yshift=0em]cn1.south) -- ([xshift=0em,yshift=0em]n12.north);
\draw [->,thick,gray!70,opacity=0.5] ([xshift=0em,yshift=0em]cn3.south) -- ([xshift=0em,yshift=0em]n13.north);
\draw [->,thick,gray!70,opacity=0.5] ([xshift=0em,yshift=0em]cn4.south) -- ([xshift=0em,yshift=0em]n15.north);
\draw [->,thick,gray!70,opacity=0.5] ([xshift=0em,yshift=0em]n2.south)..controls +(south:1.4em) and +(north:0.6em)..([xshift=0em,yshift=0em]cn2.north);
\draw [->,thick,gray!70,opacity=0.5] ([xshift=0em,yshift=0em]n3.south)..controls +(south:0.8em) and +(north:0.7em)..([xshift=0em,yshift=0em]cn2.north);
\draw [->,thick,gray!70,opacity=0.5] ([xshift=0em,yshift=0em]n4.south)..controls +(south:0.6em) and +(north:0.6em)..([xshift=0em,yshift=0em]cn2.north);
\draw [->,thick,gray!70,opacity=0.5] ([xshift=0em,yshift=0em]n5.south)..controls +(south:0.7em) and +(north:0.7em)..([xshift=0em,yshift=0em]cn2.north);
\draw [->,thick,gray!70,opacity=0.5] ([xshift=0em,yshift=0em]n7.south)..controls +(south:1.4em) and +(north:0.6em)..([xshift=0em,yshift=0em]cn2.north);
\draw [->,thick,gray!70,opacity=0.5] ([xshift=0em,yshift=0em]n2.south)..controls +(south:1.4em) and +(north:0.6em)..([xshift=0em,yshift=0em]cn3.north);
\draw [->,thick,gray!70,opacity=0.5] ([xshift=0em,yshift=0em]n3.south)..controls +(south:0.8em) and +(north:0.7em)..([xshift=0em,yshift=0em]cn3.north);
\draw [->,thick,gray!70,opacity=0.5] ([xshift=0em,yshift=0em]n4.south)..controls +(south:0.6em) and +(north:0.6em)..([xshift=0em,yshift=0em]cn3.north);
\draw [->,thick,gray!70,opacity=0.5] ([xshift=0em,yshift=0em]n5.south)..controls +(south:0.7em) and +(north:0.7em)..([xshift=0em,yshift=0em]cn3.north);
\draw [->,thick,gray!70,opacity=0.5] ([xshift=0em,yshift=0em]n7.south)..controls +(south:1.4em) and +(north:0.6em)..([xshift=0em,yshift=0em]cn3.north);
\draw [->,thick,gray!70,opacity=0.5] ([xshift=0em,yshift=0em]n2.south)..controls +(south:1.4em) and +(north:0.6em)..([xshift=0em,yshift=0em]cn4.north);
\draw [->,thick,gray!70,opacity=0.5] ([xshift=0em,yshift=0em]n3.south)..controls +(south:0.8em) and +(north:0.7em)..([xshift=0em,yshift=0em]cn4.north);
\draw [->,thick,gray!70,opacity=0.5] ([xshift=0em,yshift=0em]n4.south)..controls +(south:0.6em) and +(north:0.6em)..([xshift=0em,yshift=0em]cn4.north);
\draw [->,thick,gray!70,opacity=0.5] ([xshift=0em,yshift=0em]n5.south)..controls +(south:0.7em) and +(north:0.7em)..([xshift=0em,yshift=0em]cn4.north);
\draw [->,thick,gray!70,opacity=0.5] ([xshift=0em,yshift=0em]n7.south)..controls +(south:1.4em) and +(north:0.6em)..([xshift=0em,yshift=0em]cn4.north);
\draw [->,thick] ([xshift=0em,yshift=0em]n2.south)..controls +(south:1.4em) and +(north:0.6em)..([xshift=0em,yshift=0em]cn1.north);
\draw [->,thick] ([xshift=0em,yshift=0em]n3.south)..controls +(south:0.8em) and +(north:0.7em)..([xshift=0em,yshift=0em]cn1.north);
\draw [->,thick] ([xshift=0em,yshift=0em]n4.south)..controls +(south:0.6em) and +(north:0.6em)..([xshift=0em,yshift=0em]cn1.north);
\draw [->,thick] ([xshift=0em,yshift=0em]n5.south)..controls +(south:0.7em) and +(north:0.7em)..([xshift=0em,yshift=0em]cn1.north);
\draw [->,thick] ([xshift=0em,yshift=0em]n7.south)..controls +(south:1.4em) and +(north:0.6em)..([xshift=0em,yshift=0em]cn1.north);
\end{scope}
\end{tikzpicture}
\ No newline at end of file
...@@ -142,12 +142,21 @@ A_{ij}^{rel} &=& \underbrace{\mathbi{E}_{x_i}\mathbi{W}_Q\mathbi{W}_{K}^{T}\math ...@@ -142,12 +142,21 @@ A_{ij}^{rel} &=& \underbrace{\mathbi{E}_{x_i}\mathbi{W}_Q\mathbi{W}_{K}^{T}\math
\begin{itemize} \begin{itemize}
\vspace{0.5em} \vspace{0.5em}
\item 高斯约束\upcite{Yang2018ModelingLF}。这类方法的核心思想是引入可学习的高斯分布进行局部建模,之后与自注意力机制中计算得到的注意力分布进行融合,如下图:形式上,可以用自注意力机制中计算得到的中间表示来预测高斯分布的中心和偏差,得到高斯分布$\mathbi{G}$后累加在模型计算得到的相关性系数之上在进行归一化计算,具体的形式如下: \item 高斯约束\upcite{Yang2018ModelingLF}。这类方法的核心思想是引入可学习的高斯分布进行局部建模,之后与自注意力机制中计算得到的注意力分布进行融合,如\ref{fig:15-1}。形式上,可以用自注意力机制中计算得到的中间表示来预测高斯分布的中心和偏差,得到高斯分布$\mathbi{G}$后累加在模型计算得到的相关性系数之上在进行归一化计算,具体的形式如下:{\red{下面图片中的曲线再调整一下}}
\begin{equation} \begin{equation}
\mathbi{e}_{ij} = \frac{(\mathbi{x}_i \mathbi{W}_Q){(\mathbi{x}_j \mathbi{W}_K)}^{T}}{\sqrt{d_k}} + \mathbi{G} \mathbi{e}_{ij} = \frac{(\mathbi{x}_i \mathbi{W}_Q){(\mathbi{x}_j \mathbi{W}_K)}^{T}}{\sqrt{d_k}} + \mathbi{G}
\label{eq:15-13} \label{eq:15-13}
\end{equation} \end{equation}
%----------------------------------------------
\begin{figure}[htp]
\centering
\input{./Chapter15/Figures/figure-attention-distribution-based-on-gaussian-distribution}
\caption{融合高斯分布的注意力分布}
\label{fig:15-1}
\end{figure}
%-------------------------------------------
\noindent 其中,$\mathbi{G} \in \mathbb{R}^{m\times m}$$m$是源语言的句子长度。$\mathbi{G}$中的每个元素$G_{ij}$表示当前单词$\mathbi{x}_j$和预测的中心位置$P_i$之间的关联程度,计算公式如下: \noindent 其中,$\mathbi{G} \in \mathbb{R}^{m\times m}$$m$是源语言的句子长度。$\mathbi{G}$中的每个元素$G_{ij}$表示当前单词$\mathbi{x}_j$和预测的中心位置$P_i$之间的关联程度,计算公式如下:
\begin{equation} \begin{equation}
G_{ij} = - \frac{{(j - P_i)}^2}{2\sigma_i^2} G_{ij} = - \frac{{(j - P_i)}^2}{2\sigma_i^2}
...@@ -160,15 +169,6 @@ G_{ij} = - \frac{{(j - P_i)}^2}{2\sigma_i^2} ...@@ -160,15 +169,6 @@ G_{ij} = - \frac{{(j - P_i)}^2}{2\sigma_i^2}
\label{eq:15-15} \label{eq:15-15}
\end{equation} \end{equation}
%----------------------------------------------
\begin{figure}[htp]
\centering
\input{./Chapter15/Figures/figure-attention-distribution-based-on-gaussian-distribution}
\caption{融合高斯分布的注意力分布}
\label{fig:15-1}
\end{figure}
%-------------------------------------------
\noindent 通过标量$m$控制得到的中心位置和偏差的数值为0和序列长度之间的实数值,$p_i$$v_i$为网络计算的中间结果,分别用于预测中心位置和窗口大小。用如下方式计算: \noindent 通过标量$m$控制得到的中心位置和偏差的数值为0和序列长度之间的实数值,$p_i$$v_i$为网络计算的中间结果,分别用于预测中心位置和窗口大小。用如下方式计算:
\begin{eqnarray} \begin{eqnarray}
p_i &=& \mathbi{I}_p^T\textrm{Tanh}(\mathbi{W}_p\mathbi{Q}_i) \\ p_i &=& \mathbi{I}_p^T\textrm{Tanh}(\mathbi{W}_p\mathbi{Q}_i) \\
...@@ -184,7 +184,7 @@ v_i &=& \mathbi{I}_d^T\textrm{Tanh}(\mathbi{W}_p\mathbi{Q}_i) ...@@ -184,7 +184,7 @@ v_i &=& \mathbi{I}_d^T\textrm{Tanh}(\mathbi{W}_p\mathbi{Q}_i)
%---------------------------------------------- %----------------------------------------------
\begin{figure}[htp] \begin{figure}[htp]
\centering \centering
\includegraphics[scale=0.4]{./Chapter15/Figures/figure-multi-scale-local-modeling.png} \input{./Chapter15/Figures/figure-multi-scale-local-modeling}
\caption{多尺度局部建模} \caption{多尺度局部建模}
\label{fig:15-2} \label{fig:15-2}
\end{figure} \end{figure}
...@@ -227,12 +227,12 @@ C(\mathbi{x}_j \mathbi{W}_K,\omega) = (\mathbi{x}_{j-\omega},\ldots,\mathbi{x}_{ ...@@ -227,12 +227,12 @@ C(\mathbi{x}_j \mathbi{W}_K,\omega) = (\mathbi{x}_{j-\omega},\ldots,\mathbi{x}_{
\vspace{0.5em} \vspace{0.5em}
\item 使用1维卷积注意力网络(图\ref{fig:15-3}(b))。为了捕捉短距离依赖,则可以使用一维的卷积自注意力网络(1D-CSAN)将关注的范围限制在相近的元素窗口中。其形式上十分简单,只需预先设定好局部建模的窗口大小范围D,并在进行注意力权重计算和对Value值进行加权求和时,将其限制在设定好的窗口范围内即可。 \item 使用1维卷积注意力网络(图\ref{fig:15-3}(b))。为了捕捉短距离依赖,则可以使用一维的卷积自注意力网络(1D-CSAN)将关注的范围限制在相近的元素窗口中。其形式上十分简单,只需预先设定好局部建模的窗口大小范围D,并在进行注意力权重计算和对Value值进行加权求和时,将其限制在设定好的窗口范围内即可。
\vspace{0.5em} \vspace{0.5em}
\item 使用2维卷积注意力网络(图\ref{fig:15-3}(c))。在1维卷积注意力网络的基础上对多个注意力头之间的信息进行了交互建模,打破了注意力头之间的界限。 1D-CDAN的关注区域为$1\times D$,当将其扩展为2维矩形$(N×D)$,长和宽分别为局部窗口的大小和参与建模的自注意力头的个数。在这种形势下,允许模型计算某个头中的第$i$个元素和第$s$个头中的第$j$个元素之间的相关性系数。实现了对不同子空间之间关系的建模,所得到的注意力分布表示了头之间的依赖关系。 \item 使用2维卷积注意力网络(图\ref{fig:15-3}(c))。在1维卷积注意力网络的基础上对多个注意力头之间的信息进行了交互建模,打破了注意力头之间的界限。 1D-CDAN的关注区域为$1\times D$,当将其扩展为2维矩形$(N\times D)$,长和宽分别为局部窗口的大小和参与建模的自注意力头的个数。在这种形势下,允许模型计算某个头中的第$i$个元素和第$s$个头中的第$j$个元素之间的相关性系数。实现了对不同子空间之间关系的建模,所得到的注意力分布表示了头之间的依赖关系。
%---------------------------------------------- %----------------------------------------------
\begin{figure}[htp] \begin{figure}[htp]
\centering \centering
\includegraphics[scale=0.3]{./Chapter15/Figures/figure-convolutional-attention-network.png} \input{./Chapter15/Figures/figure-convolutional-attention-network}
\caption{卷积注意力网络} \caption{卷积注意力网络}
\label{fig:15-3} \label{fig:15-3}
\end{figure} \end{figure}
...@@ -1096,7 +1096,7 @@ lr=d_{model}^{-0.5}\cdot step\_num^{-0.5} ...@@ -1096,7 +1096,7 @@ lr=d_{model}^{-0.5}\cdot step\_num^{-0.5}
%---------------------------------------------- %----------------------------------------------
\begin{figure}[htp] \begin{figure}[htp]
\centering \centering
\includegraphics[scale=0.1]{./Chapter15/Figures/figure-encoder-structure-of-transformer-model-optimized-by-nas.jpg} \input{./Chapter15/Figures/figure-encoder-structure-of-transformer-model-optimized-by-nas}
\caption{传统Transformer以及通过网络结构搜索方法优化后的Transformer模型编码器结构} \caption{传统Transformer以及通过网络结构搜索方法优化后的Transformer模型编码器结构}
\label{fig:15-27} \label{fig:15-27}
\end{figure} \end{figure}
...@@ -1107,7 +1107,7 @@ lr=d_{model}^{-0.5}\cdot step\_num^{-0.5} ...@@ -1107,7 +1107,7 @@ lr=d_{model}^{-0.5}\cdot step\_num^{-0.5}
%---------------------------------------------- %----------------------------------------------
\begin{figure}[htp] \begin{figure}[htp]
\centering \centering
\includegraphics[scale=0.6]{./Chapter15/Figures/figure-evolution-and-change-of-ml-methods.jpg} \input{./Chapter15/Figures/figure-evolution-and-change-of-ml-methods}
\caption{机器学习方法的演化与变迁} \caption{机器学习方法的演化与变迁}
\label{fig:15-28} \label{fig:15-28}
\end{figure} \end{figure}
......
...@@ -9560,6 +9560,7 @@ author = {Zhuang Liu and ...@@ -9560,6 +9560,7 @@ author = {Zhuang Liu and
journal = {CoRR}, journal = {CoRR},
year = {2019} year = {2019}
} }
@article{DBLP:journals/corr/abs-2002-06823, @article{DBLP:journals/corr/abs-2002-06823,
author = {Jinhua Zhu and author = {Jinhua Zhu and
Yingce Xia and Yingce Xia and
...@@ -9570,7 +9571,7 @@ author = {Zhuang Liu and ...@@ -9570,7 +9571,7 @@ author = {Zhuang Liu and
Houqiang Li and Houqiang Li and
Tie-Yan Liu}, Tie-Yan Liu},
title = {Incorporating {BERT} into Neural Machine Translation}, title = {Incorporating {BERT} into Neural Machine Translation},
journal = {CoRR}, journal = {International Conference on Learning Representations},
year = {2020} year = {2020}
} }
@inproceedings{song2019mass, @inproceedings{song2019mass,
...@@ -9582,7 +9583,7 @@ author = {Zhuang Liu and ...@@ -9582,7 +9583,7 @@ author = {Zhuang Liu and
title = {{MASS:} Masked Sequence to Sequence Pre-training for Language Generation}, title = {{MASS:} Masked Sequence to Sequence Pre-training for Language Generation},
volume = {97}, volume = {97},
pages = {5926--5936}, pages = {5926--5936},
publisher = {{PMLR}}, publisher = {International Conference on Machine Learning},
year = {2019} year = {2019}
} }
@article{DBLP:journals/corr/Ruder17a, @article{DBLP:journals/corr/Ruder17a,
...@@ -9602,7 +9603,7 @@ author = {Zhuang Liu and ...@@ -9602,7 +9603,7 @@ author = {Zhuang Liu and
title = {Dual Supervised Learning}, title = {Dual Supervised Learning},
volume = {70}, volume = {70},
pages = {3789--3798}, pages = {3789--3798},
publisher = {{PMLR}}, publisher = {International Conference on Machine Learning},
year = {2017} year = {2017}
} }
@inproceedings{DBLP:conf/iccv/ZhuPIE17, @inproceedings{DBLP:conf/iccv/ZhuPIE17,
...@@ -9656,12 +9657,12 @@ author = {Zhuang Liu and ...@@ -9656,12 +9657,12 @@ author = {Zhuang Liu and
title = {Analyzing Uncertainty in Neural Machine Translation}, title = {Analyzing Uncertainty in Neural Machine Translation},
volume = {80}, volume = {80},
pages = {3953--3962}, pages = {3953--3962},
publisher = {{PMLR}}, publisher = {International Conference on Machine Learning},
year = {2018} year = {2018}
} }
@inproceedings{finding2006adafre, @inproceedings{finding2006adafre,
author = {S. F. Adafre and Maarten de Rijke}, author = {S. F. Adafre and Maarten de Rijke},
title = {Finding Similar Sentences across Multiple Languages in Wikipedia }, title = {Finding Similar Sentences across Multiple Languages in Wikipedia},
publisher = {Annual Conference of the European Association for Machine Translation}, publisher = {Annual Conference of the European Association for Machine Translation},
year = {2006} year = {2006}
} }
...@@ -9732,7 +9733,7 @@ author = {Zhuang Liu and ...@@ -9732,7 +9733,7 @@ author = {Zhuang Liu and
} }
@article{2015OnGulcehre, @article{2015OnGulcehre,
title = {On Using Monolingual Corpora in Neural Machine Translation}, title = {On Using Monolingual Corpora in Neural Machine Translation},
author = { Gulcehre Caglar and author = {Gulcehre Caglar and
Firat Orhan and Firat Orhan and
Xu Kelvin and Xu Kelvin and
Cho Kyunghyun and Cho Kyunghyun and
...@@ -9740,7 +9741,7 @@ author = {Zhuang Liu and ...@@ -9740,7 +9741,7 @@ author = {Zhuang Liu and
Lin Huei Chi and Lin Huei Chi and
Bougares Fethi and Bougares Fethi and
Schwenk Holger and Schwenk Holger and
Bengio Yoshua }, Bengio Yoshua},
journal = {Computer Science}, journal = {Computer Science},
year = {2015}, year = {2015},
} }
...@@ -9777,7 +9778,7 @@ author = {Zhuang Liu and ...@@ -9777,7 +9778,7 @@ author = {Zhuang Liu and
} }
@article{1966ASchnemann, @article{1966ASchnemann,
title={A generalized solution of the orthogonal procrustes problem}, title={A generalized solution of the orthogonal procrustes problem},
author={Schnemann, Peter H. }, author={Schnemann and Peter},
journal={Psychometrika}, journal={Psychometrika},
volume={31}, volume={31},
number={1}, number={1},
...@@ -9861,7 +9862,7 @@ author = {Zhuang Liu and ...@@ -9861,7 +9862,7 @@ author = {Zhuang Liu and
Atsushi Fujita}, Atsushi Fujita},
title = {Iterative Training of Unsupervised Neural and Statistical Machine title = {Iterative Training of Unsupervised Neural and Statistical Machine
Translation Systems}, Translation Systems},
journal = {{ACM} Trans. Asian Low Resour. Lang. Inf. Process.}, journal = {ACM Transactions on Asian and Low-Resource Language Information Processing},
volume = {19}, volume = {19},
number = {5}, number = {5},
pages = {68:1--68:21}, pages = {68:1--68:21},
...@@ -9913,7 +9914,7 @@ author = {Zhuang Liu and ...@@ -9913,7 +9914,7 @@ author = {Zhuang Liu and
@article{A2020Li, @article{A2020Li,
title={A Simple and Effective Approach to Robust Unsupervised Bilingual Dictionary Induction}, title={A Simple and Effective Approach to Robust Unsupervised Bilingual Dictionary Induction},
author={Yanyang Li and Yingfeng Luo and Ye Lin and Quan Du and Huizhen Wang and Shujian Huang and Tong Xiao and Jingbo Zhu}, author={Yanyang Li and Yingfeng Luo and Ye Lin and Quan Du and Huizhen Wang and Shujian Huang and Tong Xiao and Jingbo Zhu},
publisher={International Conference on Computational Linguistics}, journal={International Conference on Computational Linguistics},
year={2020} year={2020}
} }
@inproceedings{2018When, @inproceedings{2018When,
...@@ -9955,6 +9956,7 @@ author = {Zhuang Liu and ...@@ -9955,6 +9956,7 @@ author = {Zhuang Liu and
publisher = {AAAI Conference on Artificial Intelligence}, publisher = {AAAI Conference on Artificial Intelligence},
year = {2020} year = {2020}
} }
@article{DBLP:journals/corr/abs-2001-08210, @article{DBLP:journals/corr/abs-2001-08210,
author = {Yinhan Liu and author = {Yinhan Liu and
Jiatao Gu and Jiatao Gu and
...@@ -9965,10 +9967,13 @@ author = {Zhuang Liu and ...@@ -9965,10 +9967,13 @@ author = {Zhuang Liu and
Mike Lewis and Mike Lewis and
Luke Zettlemoyer}, Luke Zettlemoyer},
title = {Multilingual Denoising Pre-training for Neural Machine Translation}, title = {Multilingual Denoising Pre-training for Neural Machine Translation},
journal = {CoRR}, journal = {Transactions of the Association for Computational Linguistics},
volume = {abs/2001.08210}, volume = {8},
pages = {726--742},
year = {2020} year = {2020}
} }
@inproceedings{DBLP:conf/aaai/JiZDZCL20, @inproceedings{DBLP:conf/aaai/JiZDZCL20,
author = {Baijun Ji and author = {Baijun Ji and
Zhirui Zhang and Zhirui Zhang and
...@@ -9997,25 +10002,25 @@ author = {Zhuang Liu and ...@@ -9997,25 +10002,25 @@ author = {Zhuang Liu and
publisher = {Annual Meeting of the Association for Computational Linguistics}, publisher = {Annual Meeting of the Association for Computational Linguistics},
year = {2020} year = {2020}
} }
@article{DBLP:journals/corr/abs-2009-08088, @inproceedings{DBLP:journals/corr/abs-2009-08088,
author = {Zhen Yang and author = {Zhen Yang and
Bojie Hu and Bojie Hu and
Ambyera Han and Ambyera Han and
Shen Huang and Shen Huang and
Qi Ju}, Qi Ju},
title = {Code-switching pre-training for neural machine translation}, title = {{CSP:} Code-Switching Pre-training for Neural Machine Translation},
journal = {CoRR}, pages = {2624--2636},
volume = {abs/2009.08088}, publisher = {Conference on Empirical Methods in Natural Language Processing},
year = {2020} year = {2020}
} }
@article{DBLP:journals/corr/abs-2010-09403, @inproceedings{DBLP:journals/corr/abs-2010-09403,
author = {Dusan Varis and author = {Dusan Varis and
Ondrej Bojar}, Ondrej Bojar},
title = {Unsupervised Pretraining for Neural Machine Translation Using Elastic title = {Unsupervised Pretraining for Neural Machine Translation Using Elastic
Weight Consolidation}, Weight Consolidation},
journal = {CoRR}, pages = {130--135},
volume = {abs/2010.09403}, publisher = {Annual Meeting of the Association for Computational Linguistics},
year = {2020} year = {2019}
} }
@inproceedings{DBLP:conf/emnlp/LampleOCDR18, @inproceedings{DBLP:conf/emnlp/LampleOCDR18,
author = {Guillaume Lample and author = {Guillaume Lample and
...@@ -10032,7 +10037,7 @@ author = {Zhuang Liu and ...@@ -10032,7 +10037,7 @@ author = {Zhuang Liu and
author = {Connor Shorten and author = {Connor Shorten and
Taghi M. Khoshgoftaar}, Taghi M. Khoshgoftaar},
title = {A survey on Image Data Augmentation for Deep Learning}, title = {A survey on Image Data Augmentation for Deep Learning},
journal = {J. Big Data}, journal = {Journal of Big Data},
volume = {6}, volume = {6},
pages = {60}, pages = {60},
year = {2019} year = {2019}
...@@ -10163,6 +10168,7 @@ author = {Zhuang Liu and ...@@ -10163,6 +10168,7 @@ author = {Zhuang Liu and
@article{hartmann2018empirical, @article{hartmann2018empirical,
title={Empirical observations on the instability of aligning word vector spaces with GANs}, title={Empirical observations on the instability of aligning word vector spaces with GANs},
author={Hartmann, Mareike and Kementchedjhieva, Yova and S{\o}gaard, Anders}, author={Hartmann, Mareike and Kementchedjhieva, Yova and S{\o}gaard, Anders},
journal = {openreview.net},
year={2018} year={2018}
} }
@inproceedings{DBLP:conf/emnlp/Kementchedjhieva19, @inproceedings{DBLP:conf/emnlp/Kementchedjhieva19,
...@@ -10228,6 +10234,7 @@ author = {Zhuang Liu and ...@@ -10228,6 +10234,7 @@ author = {Zhuang Liu and
@article{2019ADabre, @article{2019ADabre,
title={A Survey of Multilingual Neural Machine Translation}, title={A Survey of Multilingual Neural Machine Translation},
author={Dabre, Raj and Chu, Chenhui and Kunchukuttan, Anoop }, author={Dabre, Raj and Chu, Chenhui and Kunchukuttan, Anoop },
journal={ACM Computing Surveys},
year={2019}, year={2019},
} }
@inproceedings{DBLP:conf/naacl/ZophK16, @inproceedings{DBLP:conf/naacl/ZophK16,
...@@ -10264,7 +10271,7 @@ author = {Zhuang Liu and ...@@ -10264,7 +10271,7 @@ author = {Zhuang Liu and
author = {Hua Wu and author = {Hua Wu and
Haifeng Wang}, Haifeng Wang},
title = {Pivot language approach for phrase-based statistical machine translation}, title = {Pivot language approach for phrase-based statistical machine translation},
journal = {Mach. Transl.}, journal = {Machine Translation},
volume = {21}, volume = {21},
number = {3}, number = {3},
pages = {165--181}, pages = {165--181},
...@@ -10273,7 +10280,7 @@ author = {Zhuang Liu and ...@@ -10273,7 +10280,7 @@ author = {Zhuang Liu and
@article{Farsi2010somayeh, @article{Farsi2010somayeh,
author = {Somayeh Bakhshaei and Shahram Khadivi and Noushin Riahi }, author = {Somayeh Bakhshaei and Shahram Khadivi and Noushin Riahi },
title = {Farsi-german statistical machine translation through bridge language}, title = {Farsi-german statistical machine translation through bridge language},
publisher = {International Telecommunications Symposium}, journal = {International Telecommunications Symposium},
pages = {165--181}, pages = {165--181},
year = {2010} year = {2010}
} }
...@@ -10297,7 +10304,7 @@ author = {Zhuang Liu and ...@@ -10297,7 +10304,7 @@ author = {Zhuang Liu and
title = {Improving Pivot-Based Statistical Machine Translation by Pivoting title = {Improving Pivot-Based Statistical Machine Translation by Pivoting
the Co-occurrence Count of Phrase Pairs}, the Co-occurrence Count of Phrase Pairs},
pages = {1665--1675}, pages = {1665--1675},
publisher = {{ACL}}, publisher = {Annual Meeting of the Association for Computational Linguistics},
year = {2014} year = {2014}
} }
@inproceedings{DBLP:conf/acl/MiuraNSTN15, @inproceedings{DBLP:conf/acl/MiuraNSTN15,
...@@ -10350,7 +10357,7 @@ author = {Zhuang Liu and ...@@ -10350,7 +10357,7 @@ author = {Zhuang Liu and
@inproceedings{de2006catalan, @inproceedings{de2006catalan,
title={Catalan-English statistical machine translation without parallel corpus: bridging through Spanish}, title={Catalan-English statistical machine translation without parallel corpus: bridging through Spanish},
author={De Gispert, Adri{\`a} and Marino, Jose B}, author={De Gispert, Adri{\`a} and Marino, Jose B},
booktitle={Proc. of 5th International Conference on Language Resources and Evaluation (LREC)}, publisher={International Conference on Language Resources and Evaluation},
pages={65--68}, pages={65--68},
year={2006} year={2006}
} }
...@@ -10381,12 +10388,19 @@ author = {Zhuang Liu and ...@@ -10381,12 +10388,19 @@ author = {Zhuang Liu and
volume = {abs/1503.02531}, volume = {abs/1503.02531},
year = {2015} year = {2015}
} }
@article{gu2018meta,
title={Meta-learning for low-resource neural machine translation}, @inproceedings{gu2018meta,
author={Gu, Jiatao and Wang, Yong and Chen, Yun and Cho, Kyunghyun and Li, Victor OK}, author = {Jiatao Gu and
journal={arXiv preprint arXiv:1808.08437}, Yong Wang and
year={2018} Yun Chen and
Victor O. K. Li and
Kyunghyun Cho},
title = {Meta-Learning for Low-Resource Neural Machine Translation},
pages = {3622--3631},
publisher = {Conference on Empirical Methods in Natural Language Processing},
year = {2018}
} }
@inproceedings{DBLP:conf/naacl/GuHDL18, @inproceedings{DBLP:conf/naacl/GuHDL18,
author = {Jiatao Gu and author = {Jiatao Gu and
Hany Hassan and Hany Hassan and
...@@ -10440,7 +10454,7 @@ author = {Zhuang Liu and ...@@ -10440,7 +10454,7 @@ author = {Zhuang Liu and
} }
@book{2009Handbook, @book{2009Handbook,
title={Handbook Of Research On Machine Learning Applications and Trends: Algorithms, Methods and Techniques - 2 Volumes}, title={Handbook Of Research On Machine Learning Applications and Trends: Algorithms, Methods and Techniques - 2 Volumes},
author={ Olivas, Emilio Soria and Guerrero, Jose David Martin and Sober, Marcelino Martinez and Benedito, Jose Rafael Magdalena and Lopez, Antonio Jose Serrano }, author={Olivas, Emilio Soria and Guerrero, Jose David Martin and Sober, Marcelino Martinez and Benedito, Jose Rafael Magdalena and Lopez, Antonio Jose Serrano },
publisher={Information Science Reference - Imprint of: IGI Publishing}, publisher={Information Science Reference - Imprint of: IGI Publishing},
year={2009}, year={2009},
} }
...@@ -10463,11 +10477,17 @@ author = {Zhuang Liu and ...@@ -10463,11 +10477,17 @@ author = {Zhuang Liu and
publisher = {OpenReview.net}, publisher = {OpenReview.net},
year = {2019} year = {2019}
} }
@article{platanios2018contextual,
title={Contextual parameter generation for universal neural machine translation},
author={Platanios, Emmanouil Antonios and Sachan, Mrinmaya and Neubig, Graham and Mitchell, Tom}, @inproceedings{platanios2018contextual,
journal={arXiv preprint arXiv:1808.08493}, author = {Emmanouil Antonios Platanios and
year={2018} Mrinmaya Sachan and
Graham Neubig and
Tom M. Mitchell},
title = {Contextual Parameter Generation for Universal Neural Machine Translation},
pages = {425--435},
publisher = {Conference on Empirical Methods in Natural Language Processing},
year = {2018}
} }
@inproceedings{ji2020cross, @inproceedings{ji2020cross,
title={Cross-Lingual Pre-Training Based Transfer for Zero-Shot Neural Machine Translation}, title={Cross-Lingual Pre-Training Based Transfer for Zero-Shot Neural Machine Translation},
...@@ -10517,7 +10537,7 @@ author = {Zhuang Liu and ...@@ -10517,7 +10537,7 @@ author = {Zhuang Liu and
@article{dabre2020survey, @article{dabre2020survey,
title={A survey of multilingual neural machine translation}, title={A survey of multilingual neural machine translation},
author={Dabre, Raj and Chu, Chenhui and Kunchukuttan, Anoop}, author={Dabre, Raj and Chu, Chenhui and Kunchukuttan, Anoop},
journal={ACM Computing Surveys (CSUR)}, journal={ACM Computing Surveys},
volume={53}, volume={53},
number={5}, number={5},
pages={1--38}, pages={1--38},
...@@ -10667,12 +10687,20 @@ author = {Zhuang Liu and ...@@ -10667,12 +10687,20 @@ author = {Zhuang Liu and
publisher = {Annual Conference of the North American Chapter of the Association for Computational Linguistics}, publisher = {Annual Conference of the North American Chapter of the Association for Computational Linguistics},
year = {2019} year = {2019}
} }
@article{firat2016zero,
title={Zero-resource translation with multi-lingual neural machine translation},
author={Firat, Orhan and Sankaran, Baskaran and Al-Onaizan, Yaser and Vural, Fatos T Yarman and Cho, Kyunghyun}, @inproceedings{firat2016zero,
journal={arXiv preprint arXiv:1606.04164}, author = {Orhan Firat and
year={2016} Baskaran Sankaran and
Yaser Al{-}Onaizan and
Fatos T. Yarman{-}Vural and
Kyunghyun Cho},
title = {Zero-Resource Translation with Multi-Lingual Neural Machine Translation},
pages = {268--277},
publisher = {Conference on Empirical Methods in Natural Language Processing},
year = {2016}
} }
@article{DBLP:journals/corr/abs-1805-10338, @article{DBLP:journals/corr/abs-1805-10338,
author = {Lierni Sestorain and author = {Lierni Sestorain and
Massimiliano Ciaramita and Massimiliano Ciaramita and
...@@ -10759,7 +10787,7 @@ author = {Zhuang Liu and ...@@ -10759,7 +10787,7 @@ author = {Zhuang Liu and
Yoshua Bengio and Yoshua Bengio and
Pierre-Antoine Manzagol}, Pierre-Antoine Manzagol},
title = {Extracting and composing robust features with denoising autoencoders}, title = {Extracting and composing robust features with denoising autoencoders},
series = {{ACM} International Conference Proceeding Series}, series = {International Conference on Learning Representations},
volume = {307}, volume = {307},
pages = {1096--1103}, pages = {1096--1103},
publisher = {International Conference on Machine Learning} publisher = {International Conference on Machine Learning}
...@@ -10843,7 +10871,7 @@ author = {Zhuang Liu and ...@@ -10843,7 +10871,7 @@ author = {Zhuang Liu and
Ivan Vulic and Ivan Vulic and
Anders S{\o}gaard}, Anders S{\o}gaard},
title = {A Survey of Cross-lingual Word Embedding Models}, title = {A Survey of Cross-lingual Word Embedding Models},
journal = {J. Artif. Intell. Res.}, journal = {Journal of Artificial Intelligence Research},
volume = {65}, volume = {65},
pages = {569--631}, pages = {569--631},
year = {2019} year = {2019}
...@@ -10885,12 +10913,21 @@ author = {Zhuang Liu and ...@@ -10885,12 +10913,21 @@ author = {Zhuang Liu and
publisher = {Annual Meeting of the Association for Computational Linguistics}, publisher = {Annual Meeting of the Association for Computational Linguistics},
year = {2017} year = {2017}
} }
@article{ng2019facebook,
title={Facebook FAIR's WMT19 News Translation Task Submission},
author={Ng, Nathan and Yee, Kyra and Baevski, Alexei and Ott, Myle and Auli, Michael and Edunov, Sergey}, @inproceedings{ng2019facebook,
journal={arXiv preprint arXiv:1907.06616}, author = {Nathan Ng and
year={2019} Kyra Yee and
Alexei Baevski and
Myle Ott and
Michael Auli and
Sergey Edunov},
title = {Facebook FAIR's {WMT19} News Translation Task Submission},
pages = {314--319},
publisher = {Association for Computational Linguistics},
year = {2019}
} }
@inproceedings{DBLP:conf/wmt/WangLLJZLLXZ18, @inproceedings{DBLP:conf/wmt/WangLLJZLLXZ18,
author = {Qiang Wang and author = {Qiang Wang and
Bei Li and Bei Li and
...@@ -10937,7 +10974,9 @@ author = {Zhuang Liu and ...@@ -10937,7 +10974,9 @@ author = {Zhuang Liu and
publisher = {Conference and Workshop on Neural Information Processing Systems}, publisher = {Conference and Workshop on Neural Information Processing Systems},
year = {2015} year = {2015}
} }
@article{DBLP:journals/corr/abs-1802-05365,
@inproceedings{DBLP:journals/corr/abs-1802-05365,
author = {Matthew E. Peters and author = {Matthew E. Peters and
Mark Neumann and Mark Neumann and
Mohit Iyyer and Mohit Iyyer and
...@@ -10945,11 +10984,12 @@ author = {Zhuang Liu and ...@@ -10945,11 +10984,12 @@ author = {Zhuang Liu and
Christopher Clark and Christopher Clark and
Kenton Lee and Kenton Lee and
Luke Zettlemoyer}, Luke Zettlemoyer},
title = {Deep contextualized word representations}, title = {Deep Contextualized Word Representations},
journal = {CoRR}, pages = {2227--2237},
volume = {abs/1802.05365}, publisher = {Annual Conference of the North American Chapter of the Association for Computational Linguistics},
year = {2018} year = {2018}
} }
@inproceedings{DBLP:conf/icml/CollobertW08, @inproceedings{DBLP:conf/icml/CollobertW08,
author = {Ronan Collobert and author = {Ronan Collobert and
Jason Weston}, Jason Weston},
...@@ -11037,12 +11077,18 @@ author = {Zhuang Liu and ...@@ -11037,12 +11077,18 @@ author = {Zhuang Liu and
publisher = {Springer}, publisher = {Springer},
year = {1998} year = {1998}
} }
@article{liu2019multi,
title={Multi-task deep neural networks for natural language understanding}, @inproceedings{liu2019multi,
author={Liu, Xiaodong and He, Pengcheng and Chen, Weizhu and Gao, Jianfeng}, author = {Xiaodong Liu and
journal={arXiv preprint arXiv:1901.11504}, Pengcheng He and
year={2019} Weizhu Chen and
Jianfeng Gao},
title = {Multi-Task Deep Neural Networks for Natural Language Understanding},
pages = {4487--4496},
publisher = {Annual Meeting of the Association for Computational Linguistics},
year = {2019}
} }
@inproceedings{DBLP:journals/corr/LuongLSVK15, @inproceedings{DBLP:journals/corr/LuongLSVK15,
author = {Minh-Thang Luong and author = {Minh-Thang Luong and
Quoc V. Le and Quoc V. Le and
...@@ -11185,7 +11231,7 @@ author = {Zhuang Liu and ...@@ -11185,7 +11231,7 @@ author = {Zhuang Liu and
@article{shah2012general, @article{shah2012general,
title={A general framework to weight heterogeneous parallel data for model adaptation in statistical machine translation}, title={A general framework to weight heterogeneous parallel data for model adaptation in statistical machine translation},
author={Shah, Kashif and Barrault, Lo{\i}c and Schwenk, Holger and Le Mans, France}, author={Shah, Kashif and Barrault, Lo{\i}c and Schwenk, Holger and Le Mans, France},
journal={MT Summit, Octobre}, journal={Machine Translation Summit},
year={2012} year={2012}
} }
@inproceedings{DBLP:conf/iwslt/MansourN12, @inproceedings{DBLP:conf/iwslt/MansourN12,
...@@ -11325,7 +11371,7 @@ author = {Zhuang Liu and ...@@ -11325,7 +11371,7 @@ author = {Zhuang Liu and
@article{moore2010intelligent, @article{moore2010intelligent,
title = {Intelligent selection of language model training data}, title = {Intelligent selection of language model training data},
author = {Moore, Robert C and Lewis, Will}, author = {Moore, Robert C and Lewis, Will},
publisher = {Annual Meeting of the Association for Computational Linguistics}, journal = {Annual Meeting of the Association for Computational Linguistics},
year = {2010} year = {2010}
} }
@inproceedings{DBLP:conf/acl/UtiyamaI03, @inproceedings{DBLP:conf/acl/UtiyamaI03,
...@@ -11373,7 +11419,7 @@ author = {Zhuang Liu and ...@@ -11373,7 +11419,7 @@ author = {Zhuang Liu and
title={Integrated parallel data extraction from comparable corpora for statistical machine translation}, title={Integrated parallel data extraction from comparable corpora for statistical machine translation},
author={Chu, Chenhui}, author={Chu, Chenhui},
year={2015}, year={2015},
publisher={Kyoto University} journal={Kyoto University}
} }
@article{DBLP:journals/tit/Scudder65a, @article{DBLP:journals/tit/Scudder65a,
author = {H. J. Scudder III}, author = {H. J. Scudder III},
...@@ -11532,12 +11578,19 @@ author = {Zhuang Liu and ...@@ -11532,12 +11578,19 @@ author = {Zhuang Liu and
publisher = {Conference on Empirical Methods in Natural Language Processing}, publisher = {Conference on Empirical Methods in Natural Language Processing},
year = {2019} year = {2019}
} }
@article{barone2017regularization,
title={Regularization techniques for fine-tuning in neural machine translation},
author={Barone, Antonio Valerio Miceli and Haddow, Barry and Germann, Ulrich and Sennrich, Rico}, @inproceedings{barone2017regularization,
journal={arXiv preprint arXiv:1707.09920}, author = {Antonio Valerio Miceli Barone and
year={2017} Barry Haddow and
Ulrich Germann and
Rico Sennrich},
title = {Regularization techniques for fine-tuning in neural machine translation},
pages = {1489--1494},
publisher = {Conference on Empirical Methods in Natural Language Processing},
year = {2017}
} }
@inproceedings{DBLP:conf/acl/SaundersB20, @inproceedings{DBLP:conf/acl/SaundersB20,
author = {Danielle Saunders and author = {Danielle Saunders and
Bill Byrne}, Bill Byrne},
...@@ -11594,7 +11647,7 @@ author = {Zhuang Liu and ...@@ -11594,7 +11647,7 @@ author = {Zhuang Liu and
@article{DBLP:journals/ibmrd/Luhn58, @article{DBLP:journals/ibmrd/Luhn58,
author = {Hans Peter Luhn}, author = {Hans Peter Luhn},
title = {The Automatic Creation of Literature Abstracts}, title = {The Automatic Creation of Literature Abstracts},
journal = {{IBM} J. Res. Dev.}, journal = {IBM Journal of research and development},
volume = {2}, volume = {2},
number = {2}, number = {2},
pages = {159--165}, pages = {159--165},
...@@ -11754,7 +11807,7 @@ author = {Zhuang Liu and ...@@ -11754,7 +11807,7 @@ author = {Zhuang Liu and
Le Wang and Le Wang and
Osamu Yoshie}, Osamu Yoshie},
title = {Dual supervised learning for non-native speech recognition}, title = {Dual supervised learning for non-native speech recognition},
journal = {{EURASIP} J. Audio Speech Music. Process.}, journal = {EURASIP Journal on Audio, Speech, and Music Processing},
volume = {2019}, volume = {2019},
pages = {3}, pages = {3},
year = {2019} year = {2019}
...@@ -11796,6 +11849,7 @@ author = {Zhuang Liu and ...@@ -11796,6 +11849,7 @@ author = {Zhuang Liu and
publisher = {Annual Meeting of the Association for Computational Linguistics}, publisher = {Annual Meeting of the Association for Computational Linguistics},
year = {2020} year = {2020}
} }
@inproceedings{DBLP:conf/nips/YangDYCSL19, @inproceedings{DBLP:conf/nips/YangDYCSL19,
author = {Zhilin Yang and author = {Zhilin Yang and
Zihang Dai and Zihang Dai and
...@@ -11804,6 +11858,7 @@ author = {Zhuang Liu and ...@@ -11804,6 +11858,7 @@ author = {Zhuang Liu and
Ruslan Salakhutdinov and Ruslan Salakhutdinov and
Quoc V. Le}, Quoc V. Le},
title = {XLNet: Generalized Autoregressive Pretraining for Language Understanding}, title = {XLNet: Generalized Autoregressive Pretraining for Language Understanding},
publisher = {Annual Conference on Neural Information Processing Systems},
pages = {5754--5764}, pages = {5754--5764},
year = {2019} year = {2019}
} }
......
Markdown 格式
0%
您添加了 0 到此讨论。请谨慎行事。
请先完成此评论的编辑!
注册 或者 后发表评论