Commit fd3fc816 by 单韦乔

15章图片

parent cad4927e
%%%------------------------------------------------------------------------------------------------------------
%%% 调序模型1:基于距离的调序
\begin{center}
\begin{tikzpicture}
\tikzstyle{cirnode}=[circle,inner sep=4pt,draw]
\tikzstyle{colnode}=[fill=ugreen!30,inner sep=0.1pt]
\tikzstyle{show}=[fill=red,circle,inner sep=0.5pt]
\begin{scope}
\node [anchor=north west,cirnode] (c1) at (0, 0) {};
\draw[-,dotted] ([xshift=-1em,yshift=-0.5em]c1.south)--([xshift=9.3em,yshift=-0.5em]c1.south);
\draw[-,dotted] ([xshift=-1em,yshift=-2em]c1.south)--([xshift=9.3em,yshift=-2em]c1.south);
\draw[-,dotted] ([xshift=-1em,yshift=-3.5em]c1.south)--([xshift=9.3em,yshift=-3.5em]c1.south);
\draw[-,dotted] ([xshift=-1em,yshift=-5em]c1.south)--([xshift=9.3em,yshift=-5em]c1.south);
\node [anchor=north,cirnode] (c2) at ([xshift=0em,yshift=-5.5em]c1.south) {};
\node [anchor=west,cirnode] (c3) at ([xshift=0.6em,yshift=0em]c2.east) {};
\node [anchor=west,cirnode] (c4) at ([xshift=0.6em,yshift=0em]c3.east) {};
\node [anchor=west,cirnode] (c5) at ([xshift=0.6em,yshift=0em]c4.east) {};
\node [anchor=west,cirnode] (c6) at ([xshift=0.6em,yshift=0em]c5.east) {};
\node [anchor=west,cirnode] (c7) at ([xshift=0.6em,yshift=0em]c6.east) {};
\node [anchor=south,colnode,minimum height=1.6em,minimum width=1em] (b1) at ([xshift=0em,yshift=0.5em]c2.north) {};
\node [anchor=south,colnode,minimum height=4.1em,minimum width=1em] (b2) at ([xshift=0em,yshift=0.5em]c3.north) {};
\node [anchor=south,colnode,minimum height=0.8em,minimum width=1em] (b3) at ([xshift=0em,yshift=0.5em]c4.north) {};
\node [anchor=south,colnode,minimum height=0.4em,minimum width=1em] (b4) at ([xshift=0em,yshift=0.5em]c5.north) {};
\node [anchor=south,colnode,minimum height=0.15em,minimum width=1em] (b5) at ([xshift=0em,yshift=0.5em]c6.north) {};
\node [anchor=south,colnode,minimum height=0.15em,minimum width=1em] (b6) at ([xshift=0em,yshift=0.5em]c7.north) {};
{\scriptsize
\node [anchor=center] (n1) at ([xshift=0em,yshift=-1em]c2.south){\color{orange}Bush};
\node [anchor=west] (n2) at ([xshift=-0.2em,yshift=0em]n1.east){\color{ugreen!30}held};
\node [anchor=west] (n3) at ([xshift=0.35em,yshift=0em]n2.east){a};
\node [anchor=west] (n4) at ([xshift=0.5em,yshift=0em]n3.east){talk};
\node [anchor=west] (n5) at ([xshift=-0.3em,yshift=0em]n4.east){with};
\node [anchor=west] (n6) at ([xshift=-0.3em,yshift=0em]n5.east){Sharon};
}
\node [anchor=north] (l1) at ([xshift=1em,yshift=-1em]n3.south){\small {(a)原始分布}};
\draw[-,very thick] ([xshift=1.2em,yshift=2.4em]b6.north)--([xshift=1.7em,yshift=1.9em]b6.north);
\draw[-,very thick] ([xshift=1.2em,yshift=1.9em]b6.north)--([xshift=1.7em,yshift=2.4em]b6.north);
\end{scope}
\begin{scope}[xshift=4.4cm,yshift=0em]
\node [anchor=north west,circle,inner sep=4pt] (c1) at (0, 0) {};
\draw[-,dotted] ([xshift=-1em,yshift=-0.5em]c1.south)--([xshift=9.3em,yshift=-0.5em]c1.south);
\draw[-,dotted] ([xshift=-1em,yshift=-2em]c1.south)--([xshift=9.3em,yshift=-2em]c1.south);
\draw[-,dotted] ([xshift=-1em,yshift=-3.5em]c1.south)--([xshift=9.3em,yshift=-3.5em]c1.south);
\draw[-,dotted] ([xshift=-1em,yshift=-5em]c1.south)--([xshift=9.3em,yshift=-5em]c1.south);
\node [anchor=north,cirnode] (c2) at ([xshift=0em,yshift=-5.5em]c1.south) {};
\node [anchor=west,cirnode] (c3) at ([xshift=0.6em,yshift=0em]c2.east) {};
\node [anchor=west,cirnode] (c4) at ([xshift=0.6em,yshift=0em]c3.east) {};
\node [anchor=west,cirnode] (c5) at ([xshift=0.6em,yshift=0em]c4.east) {};
\node [anchor=west,cirnode] (c6) at ([xshift=0.6em,yshift=0em]c5.east) {};
\node [anchor=west,cirnode] (c7) at ([xshift=0.6em,yshift=0em]c6.east) {};
\node [anchor=south,inner sep=0.1pt,minimum height=1.6em,minimum width=1em] (b1) at ([xshift=0em,yshift=0.5em]c2.north) {};
\node [anchor=south,inner sep=0.1pt,minimum height=4.1em,minimum width=1em] (b2) at ([xshift=0em,yshift=0.5em]c3.north) {};
\node [anchor=south,inner sep=0.1pt,minimum height=0.8em,minimum width=1em] (b3) at ([xshift=0em,yshift=0.5em]c4.north) {};
\node [anchor=south,inner sep=0.1pt,minimum height=0.4em,minimum width=1em] (b4) at ([xshift=0em,yshift=0.5em]c5.north) {};
\node [anchor=south,inner sep=0.1pt,minimum height=0.15em,minimum width=1em] (b5) at ([xshift=0em,yshift=0.5em]c6.north) {};
\node [anchor=south,inner sep=0.1pt,minimum height=0.15em,minimum width=1em] (b6) at ([xshift=0em,yshift=0.5em]c7.north) {};
{\scriptsize
\node [anchor=center] (n1) at ([xshift=0em,yshift=-1em]c2.south){\color{orange}Bush};
\node [anchor=west] (n2) at ([xshift=-0.2em,yshift=0em]n1.east){held};
\node [anchor=west] (n3) at ([xshift=0.35em,yshift=0em]n2.east){a};
\node [anchor=west] (n4) at ([xshift=0.5em,yshift=0em]n3.east){\color{blue!60}talk};
\node [anchor=west] (n5) at ([xshift=-0.3em,yshift=0em]n4.east){with};
\node [anchor=west] (n6) at ([xshift=-0.3em,yshift=0em]n5.east){Sharon};
}
%\node [anchor=west,show] (s1) at (-0.5em,-5.7em){};
%\node [anchor=west,show] (s2) at (-0.2em,-5.6em){};
%\node [anchor=west,show] (s3) at (1.1em,-5.5em){};
%\node [anchor=west,show] (s11) at (1.9em,-5em){};
%\node [anchor=west,show] (s4) at (3em,-4em){};
%\node [anchor=west,show] (s5) at (3.7em,-3em){};
%\node [anchor=west,show] (s12) at (4.2em,-2.2em){};
%\node [anchor=west,show] (s6) at (5.3em,-1.4em){};
%\node [anchor=west,show] (s7) at (6.3em,-2em){};
%\node [anchor=west,show] (s13) at (7.4em,-3em){};
%\node [anchor=west,show] (s8) at (8.4em,-3.9em){};
%\node [anchor=west,show] (s9) at (8.9em,-4.5em){};
%\node [anchor=west,show] (s10) at (9.3em,-5em){};
%\draw[-,blue!60,thick] (-0.5em,-5.7em)..controls (-0.2em,-5.6em) and (1.1em,-5.5em)..(1.9em,-5em)..controls (3em,-4em) and (3.7em,-3em)..(4.2em,-2.2em)..controls (5.3em,-1.4em) and (6.3em,-2em)..(7.4em,-3em)..controls (8.4em,-3.9em) and (8.9em,-4.5em)..(9.3em,-5em);
%\draw[-,blue!60,thick] (-1em,-6em)..controls (0em,-5.7em) and (1em,-5em)..(1.6em,-4.3em)..controls (5.3em,1em) and (7.4em,-2em)..(9.3em,-5em);
\draw[-,blue!60,thick] ([xshift=-1em,yshift=-4.7em]c1.south)..controls (3.8em,-6em) and (3.9em,3.6em)..([xshift=9.3em,yshift=-4.3em]c1.south);
\node [anchor=north] (l1) at ([xshift=1em,yshift=-1em]n3.south){\small {(b)高斯分布}};
\draw[-,very thick] ([xshift=1.2em,yshift=2.2em]b6.north)--([xshift=1.7em,yshift=2.2em]b6.north);
\draw[-,very thick] ([xshift=1.2em,yshift=1.9em]b6.north)--([xshift=1.7em,yshift=1.9em]b6.north);
\node [anchor=south] (t1) at ([xshift=0em,yshift=6.7em]n4.north){$D$};
\draw[->] ([xshift=0em,yshift=0em]t1.west)--([xshift=-1em,yshift=0em]t1.west);
\draw[->] ([xshift=0em,yshift=0em]t1.east)--([xshift=1em,yshift=0em]t1.east);
\draw[-] ([xshift=1em,yshift=-0.5em]t1.east)--([xshift=1em,yshift=0.5em]t1.east);
\draw[-] ([xshift=-1em,yshift=-0.5em]t1.west)--([xshift=-1em,yshift=0.5em]t1.west);
\end{scope}
\begin{scope}[xshift=8.8cm,yshift=0em]
\node [anchor=north west,cirnode] (c1) at (0, 0) {};
\draw[-,dotted] ([xshift=-1em,yshift=-0.5em]c1.south)--([xshift=9.3em,yshift=-0.5em]c1.south);
\draw[-,dotted] ([xshift=-1em,yshift=-2em]c1.south)--([xshift=9.3em,yshift=-2em]c1.south);
\draw[-,dotted] ([xshift=-1em,yshift=-3.5em]c1.south)--([xshift=9.3em,yshift=-3.5em]c1.south);
\draw[-,dotted] ([xshift=-1em,yshift=-5em]c1.south)--([xshift=9.3em,yshift=-5em]c1.south);
\node [anchor=north,cirnode] (c2) at ([xshift=0em,yshift=-5.5em]c1.south) {};
\node [anchor=west,cirnode] (c3) at ([xshift=0.6em,yshift=0em]c2.east) {};
\node [anchor=west,cirnode] (c4) at ([xshift=0.6em,yshift=0em]c3.east) {};
\node [anchor=west,cirnode] (c5) at ([xshift=0.6em,yshift=0em]c4.east) {};
\node [anchor=west,cirnode] (c6) at ([xshift=0.6em,yshift=0em]c5.east) {};
\node [anchor=west,cirnode] (c7) at ([xshift=0.6em,yshift=0em]c6.east) {};
\node [anchor=south,colnode,minimum height=0.15em,minimum width=1em] (b1) at ([xshift=0em,yshift=0.5em]c2.north) {};
\node [anchor=south,colnode,minimum height=4.2em,minimum width=1em] (b2) at ([xshift=0em,yshift=0.5em]c3.north) {};
\node [anchor=south,colnode,minimum height=3.7em,minimum width=1em] (b3) at ([xshift=0em,yshift=0.5em]c4.north) {};
\node [anchor=south,colnode,minimum height=4.2em,minimum width=1em] (b4) at ([xshift=0em,yshift=0.5em]c5.north) {};
\node [anchor=south,colnode,minimum height=0.8em,minimum width=1em] (b5) at ([xshift=0em,yshift=0.5em]c6.north) {};
\node [anchor=south,colnode,minimum height=0.15em,minimum width=1em] (b6) at ([xshift=0em,yshift=0.5em]c7.north) {};
{\scriptsize
\node [anchor=center] (n1) at ([xshift=0em,yshift=-1em]c2.south){\color{orange}Bush};
\node [anchor=west] (n2) at ([xshift=-0.2em,yshift=0em]n1.east){\color{ugreen!30}held};
\node [anchor=west] (n3) at ([xshift=0.35em,yshift=0em]n2.east){\color{ugreen!30}a};
\node [anchor=west] (n4) at ([xshift=0.5em,yshift=0em]n3.east){\color{ugreen!30}talk};
\node [anchor=west] (n5) at ([xshift=-0.3em,yshift=0em]n4.east){with};
\node [anchor=west] (n6) at ([xshift=-0.3em,yshift=0em]n5.east){Sharon};
}
\node [anchor=north] (l1) at ([xshift=1em,yshift=-1em]n3.south){\small {(c)修改后的分布}};
\end{scope}
\end{tikzpicture}
\end{center}
\ No newline at end of file
\begin{tikzpicture}
\tikzstyle{elementnode} = [anchor=center,draw,minimum size=0.6em,inner sep=0.1pt,gray!80]
\begin{scope}[scale=1.0]
\foreach \i / \j in
{0/4, 1/4, 2/4, 3/4, 4/4, 5/4, 6/4, 7/4,
0/3, 1/3, 2/3, 3/3, 4/3, 5/3, 6/3, 7/3,
0/2, 1/2, 2/2, 3/2, 4/2, 5/2, 6/2, 7/2,
0/1, 1/1, 2/1, 3/1, 4/1, 5/1, 6/1, 7/1,
0/0, 1/0, 2/0, 3/0, 4/0, 5/0, 6/0, 7/0}
\node[elementnode] (a\i\j) at (0.6em*\i,0.6em*\j) {};
\foreach \i / \j in
{0/4, 1/4, 2/4, 3/4, 4/4, 5/4, 6/4, 7/4,
0/3, 1/3, 2/3, 3/3, 4/3, 5/3, 6/3, 7/3,
0/2, 1/2, 2/2, 3/2, 4/2, 5/2, 6/2, 7/2,
0/1, 1/1, 2/1, 3/1, 4/1, 5/1, 6/1, 7/1,
0/0, 1/0, 2/0, 3/0, 4/0, 5/0, 6/0, 7/0}
\node[elementnode,fill=gray!50] (b\i\j) at (0.6em*\i+5.5em,0.6em*\j) {};
\node [anchor=south west,minimum height=0.5em,minimum width=4.8em,inner sep=0.1pt,very thick,blue!60,draw] (n1) at ([xshift=0em,yshift=0em]a01.south west) {};
\node [anchor=north west,minimum height=0.5em,minimum width=4.8em,inner sep=0.1pt,very thick,red!60,draw] (n2) at ([xshift=0em,yshift=0em]a02.north west) {};
\node [anchor=west,minimum height=0.6em,minimum width=0.6em,inner sep=0.1pt,very thick,blue!60,draw] (n3) at ([xshift=0em,yshift=0em]b21.west) {};
\node [anchor=west,minimum height=0.6em,minimum width=0.6em,inner sep=0.1pt,very thick,red!60,draw] (n4) at ([xshift=0em,yshift=0em]b42.west) {};
\draw [-,very thick,dotted,blue!60] ([xshift=0em,yshift=0em]n1.south east) -- ([xshift=0em,yshift=0em]n3.south west);
\draw [-,very thick,dotted,blue!60] ([xshift=0em,yshift=0em]n1.north east) -- ([xshift=0em,yshift=0em]n3.north west);
\draw [-,very thick,dotted,red!60] ([xshift=0em,yshift=0em]n2.south east) -- ([xshift=0em,yshift=0em]n4.south west);
\draw [-,very thick,dotted,red!60] ([xshift=0em,yshift=0em]n2.north east) -- ([xshift=0em,yshift=0em]n4.north west);
\node [anchor=north] (l1) at ([xshift=0.5em,yshift=-1em]a70.south){\footnotesize {(a)原始SAN}};
\node [anchor=south,rotate=90] (l2) at ([xshift=0em,yshift=0em]a02.west){\scriptsize {注意力头}};
\node [anchor=south] (l2) at ([xshift=0em,yshift=0em]a44.north){\scriptsize {句子长度}};
\end{scope}
\begin{scope}[scale=1.0,xshift=4.6cm]
\foreach \i / \j in
{0/4, 1/4, 2/4, 3/4, 4/4, 5/4, 6/4, 7/4,
0/3, 1/3, 2/3, 3/3, 4/3, 5/3, 6/3, 7/3,
0/2, 1/2, 2/2, 3/2, 4/2, 5/2, 6/2, 7/2,
0/1, 1/1, 2/1, 3/1, 4/1, 5/1, 6/1, 7/1,
0/0, 1/0, 2/0, 3/0, 4/0, 5/0, 6/0, 7/0}
\node[elementnode] (a\i\j) at (0.6em*\i,0.6em*\j) {};
\foreach \i / \j in
{0/4, 1/4, 2/4, 3/4, 4/4, 5/4, 6/4, 7/4,
0/3, 1/3, 2/3, 3/3, 4/3, 5/3, 6/3, 7/3,
0/2, 1/2, 2/2, 3/2, 4/2, 5/2, 6/2, 7/2,
0/1, 1/1, 2/1, 3/1, 4/1, 5/1, 6/1, 7/1,
0/0, 1/0, 2/0, 3/0, 4/0, 5/0, 6/0, 7/0}
\node[elementnode,fill=gray!50] (b\i\j) at (0.6em*\i+5.5em,0.6em*\j) {};
\node [anchor=south west,minimum height=0.5em,minimum width=3em,inner sep=0.1pt,very thick,blue!60,draw] (n1) at ([xshift=0em,yshift=0em]a01.south west) {};
\node [anchor=north west,minimum height=0.5em,minimum width=3em,inner sep=0.1pt,very thick,red!60,draw] (n2) at ([xshift=0em,yshift=0em]a22.north west) {};
\node [anchor=west,minimum height=0.6em,minimum width=0.6em,inner sep=0.1pt,very thick,blue!60,draw] (n3) at ([xshift=0em,yshift=0em]b21.west) {};
\node [anchor=west,minimum height=0.6em,minimum width=0.6em,inner sep=0.1pt,very thick,red!60,draw] (n4) at ([xshift=0em,yshift=0em]b42.west) {};
\draw [-,very thick,dotted,blue!60] ([xshift=0em,yshift=0em]n1.south east) -- ([xshift=0em,yshift=0em]n3.south west);
\draw [-,very thick,dotted,blue!60] ([xshift=0em,yshift=0em]n1.north east) -- ([xshift=0em,yshift=0em]n3.north west);
\draw [-,very thick,dotted,red!60] ([xshift=0em,yshift=0em]n2.south east) -- ([xshift=0em,yshift=0em]n4.south west);
\draw [-,very thick,dotted,red!60] ([xshift=0em,yshift=0em]n2.north east) -- ([xshift=0em,yshift=0em]n4.north west);
\node [anchor=north] (l1) at ([xshift=0.5em,yshift=-1em]a70.south){\footnotesize {(b)一维卷积SAN}};
\node [anchor=south,rotate=90] (l2) at ([xshift=0em,yshift=0em]a02.west){\scriptsize {注意力头}};
\node [anchor=south] (l2) at ([xshift=0em,yshift=0em]a44.north){\scriptsize {句子长度}};
\end{scope}
\begin{scope}[scale=1.0,xshift=9.2cm]
\foreach \i / \j in
{0/4, 1/4, 2/4, 3/4, 4/4, 5/4, 6/4, 7/4,
0/3, 1/3, 2/3, 3/3, 4/3, 5/3, 6/3, 7/3,
0/2, 1/2, 2/2, 3/2, 4/2, 5/2, 6/2, 7/2,
0/1, 1/1, 2/1, 3/1, 4/1, 5/1, 6/1, 7/1,
0/0, 1/0, 2/0, 3/0, 4/0, 5/0, 6/0, 7/0}
\node[elementnode] (a\i\j) at (0.6em*\i,0.6em*\j) {};
\foreach \i / \j in
{0/4, 1/4, 2/4, 3/4, 4/4, 5/4, 6/4, 7/4,
0/3, 1/3, 2/3, 3/3, 4/3, 5/3, 6/3, 7/3,
0/2, 1/2, 2/2, 3/2, 4/2, 5/2, 6/2, 7/2,
0/1, 1/1, 2/1, 3/1, 4/1, 5/1, 6/1, 7/1,
0/0, 1/0, 2/0, 3/0, 4/0, 5/0, 6/0, 7/0}
\node[elementnode,fill=gray!50] (b\i\j) at (0.6em*\i+5.5em,0.6em*\j) {};
\node [anchor=south west,minimum height=1.8em,minimum width=3em,inner sep=0.1pt,very thick,blue!60,draw] (n1) at ([xshift=0em,yshift=0em]a00.south west) {};
\node [anchor=north west,minimum height=1.8em,minimum width=3em,inner sep=0.1pt,very thick,red!60,draw] (n2) at ([xshift=0em,yshift=0em]a23.north west) {};
\node [anchor=west,minimum height=0.6em,minimum width=0.6em,inner sep=0.1pt,very thick,blue!60,draw] (n3) at ([xshift=0em,yshift=0em]b21.west) {};
\node [anchor=west,minimum height=0.6em,minimum width=0.6em,inner sep=0.1pt,very thick,red!60,draw] (n4) at ([xshift=0em,yshift=0em]b42.west) {};
\draw [-,very thick,dotted,blue!60] ([xshift=0em,yshift=0em]n1.south east) -- ([xshift=0em,yshift=0em]n3.south west);
\draw [-,very thick,dotted,blue!60] ([xshift=0em,yshift=0em]n1.north east) -- ([xshift=0em,yshift=0em]n3.north west);
\draw [-,very thick,dotted,red!60] ([xshift=0em,yshift=0em]n2.south east) -- ([xshift=0em,yshift=0em]n4.south west);
\draw [-,very thick,dotted,red!60] ([xshift=0em,yshift=0em]n2.north east) -- ([xshift=0em,yshift=0em]n4.north west);
\node [anchor=north] (l1) at ([xshift=0.5em,yshift=-1em]a70.south){\footnotesize {(c)二维卷积SAN}};
\node [anchor=south,rotate=90] (l2) at ([xshift=0em,yshift=0em]a02.west){\scriptsize {注意力头}};
\node [anchor=south] (l2) at ([xshift=0em,yshift=0em]a44.north){\scriptsize {句子长度}};
\end{scope}
\end{tikzpicture}
\ No newline at end of file
\begin{tikzpicture}
\tikzstyle{unit}=[draw,rounded corners=2pt,drop shadow,font=\tiny]
%left
\begin{scope}
\foreach \x/\d in {1/2em, 2/8em, 3/18em, 4/24em}
\node[unit,fill=yellow!20] at (0,\d) (ln_\x) {层正则};
\foreach \x/\d in {1/4em, 2/20em}
\node[unit,fill=green!20] at (0,\d) (sa_\x) {8头自注意力:512};
\foreach \x/\d in {1/6em, 2/16em, 3/22em, 4/32em}
\node[draw,circle,minimum size=1em,inner sep=1pt] at (0,\d) (add_\x) {\scriptsize\bfnew{+}};
\foreach \x/\d in {2/14em, 4/30em}
\node[unit,fill=red!20] at (0,\d) (conv_\x) {卷积$1 \times 1$:512};
\foreach \x/\d in {1/10em,3/26em}
\node[unit,fill=red!20] at (0,\d) (conv_\x) {卷积$1 \times 1$:2048};
\foreach \x/\d in {1/12em, 2/28em}
\node[unit,fill=blue!20] at (0,\d) (relu_\x) {RELU};
\draw[->,thick] ([yshift=-1.4em]ln_1.-90) -- ([yshift=-0.1em]ln_1.-90);
\draw[->,thick] ([yshift=0.1em]ln_1.90) -- ([yshift=-0.1em]sa_1.-90);
\draw[->,thick] ([yshift=0.1em]sa_1.90) -- ([yshift=-0.1em]add_1.-90);
\draw[->,thick] ([yshift=0.1em]add_1.90) -- ([yshift=-0.1em]ln_2.-90);
\draw[->,thick] ([yshift=0.1em]ln_2.90) -- ([yshift=-0.1em]conv_1.-90);
\draw[->,thick] ([yshift=0.1em]conv_1.90) -- ([yshift=-0.1em]relu_1.-90);
\draw[->,thick] ([yshift=0.1em]relu_1.90) -- ([yshift=-0.1em]conv_2.-90);
\draw[->,thick] ([yshift=0.1em]conv_2.90) -- ([yshift=-0.1em]add_2.-90);
\draw[->,thick] ([yshift=0.1em]add_2.90) -- ([yshift=-0.1em]ln_3.-90);
\draw[->,thick] ([yshift=0.1em]ln_3.90) -- ([yshift=-0.1em]sa_2.-90);
\draw[->,thick] ([yshift=0.1em]sa_2.90) -- ([yshift=-0.1em]add_3.-90);
\draw[->,thick] ([yshift=0.1em]add_3.90) -- ([yshift=-0.1em]ln_4.-90);
\draw[->,thick] ([yshift=0.1em]ln_4.90) -- ([yshift=-0.1em]conv_3.-90);
\draw[->,thick] ([yshift=0.1em]conv_3.90) -- ([yshift=-0.1em]relu_2.-90);
\draw[->,thick] ([yshift=0.1em]relu_2.90) -- ([yshift=-0.1em]conv_4.-90);
\draw[->,thick] ([yshift=0.1em]conv_4.90) -- ([yshift=-0.1em]add_4.-90);
\draw[->,thick] ([yshift=0.1em]add_4.90) -- ([yshift=1em]add_4.90);
\draw[->,thick] ([yshift=-0.8em]ln_1.-90) .. controls ([xshift=5em,yshift=-0.8em]ln_1.-90) and ([xshift=5em]add_1.0) .. (add_1.0);
\draw[->,thick] (add_1.0) .. controls ([xshift=5em]add_1.0) and ([xshift=5em]add_2.0) .. (add_2.0);
\draw[->,thick] (add_2.0) .. controls ([xshift=5em]add_2.0) and ([xshift=5em]add_3.0) .. (add_3.0);
\draw[->,thick] (add_3.0) .. controls ([xshift=5em]add_3.0) and ([xshift=5em]add_4.0) .. (add_4.0);
\node[font=\scriptsize] at (0em, -1em){(a) Transformer编码器中若干块的结构};
\end{scope}
%right
\begin{scope}[xshift=14em]
\foreach \x/\d in {1/2em, 2/8em, 3/16em, 4/22em, 5/28em}
\node[unit,fill=yellow!20] at (0,\d) (ln_\x) {层正则};
\node[unit,fill=green!20] at (0,24em) (sa_1) {8头自注意力:512};
\foreach \x/\d in {1/6em, 2/14em, 3/20em, 4/26em, 5/36em}
\node[draw,circle,minimum size=1em,inner sep=1pt] at (0,\d) (add_\x) {\scriptsize\bfnew{+}};
\node[unit,fill=red!20] at (0,30em) (conv_4) {卷积$1 \times 1$:2048};
\node[unit,fill=red!20] at (0,34em) (conv_5) {卷积$1 \times 1$:512};
\node[unit,fill=blue!20] at (0,32em) (relu_3) {RELU};
\node[unit,fill=red!20] at (0,4em) (glu_1) {门控线性单元:512};
\node[unit,fill=red!20] at (-3em,10em) (conv_1) {卷积$1 \times 1$:2048};
\node[unit,fill=cyan!20] at (3em,10em) (conv_2) {卷积$3 \times 1$:256};
\node[unit,fill=blue!20] at (-3em,12em) (relu_1) {RELU};
\node[unit,fill=blue!20] at (3em,12em) (relu_2) {RELU};
\node[unit,fill=cyan!20] at (0em,18em) (conv_3) {Sep卷积$9 \times 1$:256};
\draw[->,thick] ([yshift=-1.4em]ln_1.-90) -- ([yshift=-0.1em]ln_1.-90);
\draw[->,thick] ([yshift=0.1em]ln_1.90) -- ([yshift=-0.1em]glu_1.-90);
\draw[->,thick] ([yshift=0.1em]glu_1.90) -- ([yshift=-0.1em]add_1.-90);
\draw[->,thick] ([yshift=0.1em]add_1.90) -- ([yshift=-0.1em]ln_2.-90);
\draw[->,thick] ([,yshift=0.1em]ln_2.135) -- ([yshift=-0.1em]conv_1.-90);
\draw[->,thick] ([yshift=0.1em]ln_2.45) -- ([yshift=-0.1em]conv_2.-90);
\draw[->,thick] ([yshift=0.1em]conv_1.90) -- ([yshift=-0.1em]relu_1.-90);
\draw[->,thick] ([yshift=0.1em]conv_2.90) -- ([yshift=-0.1em]relu_2.-90);
\draw[->,thick] ([yshift=0.1em]relu_1.90) -- ([yshift=-0.1em]add_2.-135);
\draw[->,thick] ([yshift=0.1em]relu_2.90) -- ([yshift=-0.1em]add_2.-45);
\draw[->,thick] ([yshift=0.1em]add_2.90) -- ([yshift=-0.1em]ln_3.-90);
\draw[->,thick] ([yshift=0.1em]ln_3.90) -- ([yshift=-0.1em]conv_3.-90);
\draw[->,thick] ([yshift=0.1em]conv_3.90) -- ([yshift=-0.1em]add_3.-90);
\draw[->,thick] ([yshift=0.1em]add_3.90) -- ([yshift=-0.1em]ln_4.-90);
\draw[->,thick] ([yshift=0.1em]ln_4.90) -- ([yshift=-0.1em]sa_1.-90);
\draw[->,thick] ([yshift=0.1em]sa_1.90) -- ([yshift=-0.1em]add_4.-90);
\draw[->,thick] ([yshift=0.1em]add_4.90) -- ([yshift=-0.1em]ln_5.-90);
\draw[->,thick] ([yshift=0.1em]ln_5.90) -- ([yshift=-0.1em]conv_4.-90);
\draw[->,thick] ([yshift=0.1em]conv_4.90) -- ([yshift=-0.1em]relu_3.-90);
\draw[->,thick] ([yshift=0.1em]relu_3.90) -- ([yshift=-0.1em]conv_5.-90);
\draw[->,thick] ([yshift=0.1em]conv_5.90) -- ([yshift=-0.1em]add_5.-90);
\draw[->,thick] ([yshift=0.1em]add_5.90) -- ([yshift=1em]add_5.90);
\draw[->,thick] ([yshift=-0.8em]ln_1.-90) .. controls ([xshift=5em,yshift=-0.8em]ln_1.-90) and ([xshift=5em]add_1.0) .. (add_1.0);
\draw[->,thick] (add_1.0) .. controls ([xshift=8em]add_1.0) and ([xshift=8em]add_3.0) .. (add_3.0);
\draw[->,thick] (add_3.0) .. controls ([xshift=5em]add_3.0) and ([xshift=5em]add_4.0) .. (add_4.0);
\draw[->,thick] (add_4.0) .. controls ([xshift=5em]add_4.0) and ([xshift=5em]add_5.0) .. (add_5.0);
\node[font=\scriptsize,align=center] at (0em, -1.5em){(b) 使用结构搜索方法优化后的 \\ Transformer编码器中若干块的结构};
\node[minimum size=0.8em,inner sep=0pt,rounded corners=1pt,draw,fill=blue!20] (act) at (5.5em, 38em){};
\node[anchor=west,font=\footnotesize] at ([xshift=0.1em]act.east){激活函数};
\node[anchor=north,minimum size=0.8em,inner sep=0pt,rounded corners=1pt,draw,fill=yellow!20] (nor) at ([yshift=-0.6em]act.south){};
\node[anchor=west,font=\footnotesize] at ([xshift=0.1em]nor.east){正则化};
\node[anchor=north,minimum size=0.8em,inner sep=0pt,rounded corners=1pt,draw,fill=cyan!20] (wc) at ([yshift=-0.6em]nor.south){};
\node[anchor=west,font=\footnotesize] at ([xshift=0.1em]wc.east){宽卷积};
\node[anchor=north,minimum size=0.8em,inner sep=0pt,rounded corners=1pt,draw,fill=green!20] (at) at ([yshift=-0.6em]wc.south){};
\node[anchor=west,font=\footnotesize] (tag) at ([xshift=0.1em]at.east){注意力机制};
\node[anchor=north,minimum size=0.8em,inner sep=0pt,rounded corners=1pt,draw,fill=red!20] (nsl) at ([yshift=-0.6em]at.south){};
\node[anchor=west,font=\footnotesize] at ([xshift=0.1em]nsl.east){非空间层};
\begin{pgfonlayer}{background}
\node[draw,drop shadow,fill=white][fit=(act)(nsl)(tag)]{};
\end{pgfonlayer}
\end{scope}
\end{tikzpicture}
\ No newline at end of file
\begin{tikzpicture}
\begin{scope}
\tikzstyle{hnode}=[rectangle,inner sep=0mm,minimum height=2em,minimum width=4.5em,rounded corners=5pt,fill=ugreen!30]
\tikzstyle{tnode}=[rectangle,inner sep=0mm,minimum height=2em,minimum width=4.5em,rounded corners=5pt,fill=red!30]
\tikzstyle{wnode}=[inner sep=0mm,minimum height=1.4em,minimum width=4.4em]
\node [anchor=south west,hnode] (n1) at (0,0) {$\mathbi{h}_1$};
\node [anchor=west,hnode] (n2) at ([xshift=1em,yshift=0em]n1.east) {$\mathbi{h}_2$};
\node [anchor=west,hnode] (n3) at ([xshift=1em,yshift=0em]n2.east) {$\mathbi{h}_3$};
\node [anchor=west,hnode] (n4) at ([xshift=1em,yshift=0em]n3.east) {$\cdots$};
\node [anchor=west,hnode] (n5) at ([xshift=1em,yshift=0em]n4.east) {$\mathbi{h}_n$};
\node [anchor=south,tnode] (t1) at ([xshift=2.8em,yshift=1em]n1.north) {$\mathbi{h}_{n+1}$};
\node [anchor=south,tnode] (t2) at ([xshift=2.8em,yshift=1em]t1.north) {$\mathbi{h}_{n+2}$};
\node [anchor=south,tnode] (t3) at ([xshift=2.8em,yshift=1em]t2.north) {$\cdots$};
\node [anchor=south,tnode] (t4) at ([xshift=2.8em,yshift=1em]t3.north) {$\mathbi{h}_{2n-1}$};
\draw [->,thick] ([xshift=0em,yshift=0em]n1.east) -- ([xshift=0em,yshift=0em]n2.west);
\draw [->,thick] ([xshift=0em,yshift=0em]n2.east) -- ([xshift=0em,yshift=0em]n3.west);
\draw [->,thick] ([xshift=0em,yshift=0em]n3.east) -- ([xshift=0em,yshift=0em]n4.west);
\draw [->,thick] ([xshift=0em,yshift=0em]n4.east) -- ([xshift=0em,yshift=0em]n5.west);
\draw [->,thick] ([xshift=0em,yshift=0em]n1.north) -- ([xshift=0em,yshift=0em]t1.south);
\draw [->,thick] ([xshift=0em,yshift=0em]n2.north) -- ([xshift=0em,yshift=0em]t1.south);
\draw [->,thick] ([xshift=0em,yshift=0em]t1.north) -- ([xshift=0em,yshift=0em]t2.south);
\draw [->,thick] ([xshift=0em,yshift=0em]n3.north) -- ([xshift=0em,yshift=0em]t2.south);
\draw [->,thick] ([xshift=0em,yshift=0em]t2.north) -- ([xshift=0em,yshift=0em]t3.south);
\draw [->,thick] ([xshift=0em,yshift=0em]n4.north) -- ([xshift=0em,yshift=0em]t3.south);
\draw [->,thick] ([xshift=0em,yshift=0em]t3.north) -- ([xshift=0em,yshift=0em]t4.south);
\draw [->,thick] ([xshift=0em,yshift=0em]n5.north) -- ([xshift=0em,yshift=0em]t4.south);
\end{scope}
\end{tikzpicture}
\ No newline at end of file
\begin{tikzpicture}
\node[rounded corners=4pt, minimum width=10.4em, minimum height=7em,fill=yellow!15!gray!15] (box1) at (0em,0em){};
\node[anchor=west,rounded corners=4pt, minimum width=10.4em, minimum height=7em,fill=yellow!15!gray!15] (box2) at ([xshift=2.8em]box1.east){};
\node[anchor=west,rounded corners=4pt, minimum width=10.4em, minimum height=7em,fill=yellow!15!gray!15] (box3) at ([xshift=2.8em]box2.east){};
\draw[densely dotted,line width=1.2pt] ([xshift=0.8em]box1.90) -- ([xshift=0.8em]box1.-90);
\draw[densely dotted,line width=1.2pt] ([xshift=0.8em]box2.90) -- ([xshift=0.8em]box2.-90);
\draw[densely dotted,line width=1.2pt] ([xshift=0.8em]box3.90) -- ([xshift=0.8em]box3.-90);
\node[anchor=west,draw,rounded corners=2pt, minimum width=5em, minimum height=3em,font=\scriptsize,align=center,inner sep=1pt,fill=yellow!10] (n1) at ([xshift=0.5em]box1.west){机器学习算法:\\决策树、支持 \\ 向量机$\cdots$};
\node[anchor=west,draw,rounded corners=2pt, minimum width=5em, minimum height=3em,font=\scriptsize,align=center,inner sep=1pt,fill=yellow!10] (n2) at ([xshift=0.5em]box2.west){神经网络:\\RNN、CNN、 \\ Transformer$\cdots$};
\node[anchor=west,draw,rounded corners=2pt, minimum width=5em, minimum height=3em,font=\scriptsize,align=center,inner sep=1pt,fill=ugreen!10] (n3) at ([xshift=0.5em]box3.west){神经网络:\\RNN、CNN、 \\ Transformer$\cdots$};
\foreach \x/\c in {1/yellow,2/ugreen,3/ugreen}{
\node[anchor=north,font=\scriptsize,inner ysep=0.1em] (output_\x)at ([xshift=-2.2em,yshift=-0.5em]box\x.north){输出};
\node[anchor=north,inner ysep=0.1em] at ([xshift=3em,yshift=-0.5em]box\x.north){\scriptsize\bfnew{执行步骤}};
\node[anchor=south,font=\scriptsize,inner ysep=0.1em,fill=\c!10,rounded corners=2pt] at ([xshift=-2.2em,yshift=0.5em]box\x.south)(input_\x){输入};
\draw[->,thick] (input_\x.90) -- (n\x.-90);
\draw[->,thick] (n\x.90) -- (output_\x.-90);
}
\node[anchor=east,font=\scriptsize,align=center,inner xsep=0pt] at ([xshift=-0.2em]box1.east){1.特征提取;\\2.模型设计; \\3.实验验证。};
\node[anchor=east,font=\scriptsize,align=center,inner xsep=0pt] at ([xshift=-0.2em]box2.east){1.模型设计; \\2.实验验证。\\ };
\node[anchor=east,font=\scriptsize,align=center,inner xsep=0pt] at ([xshift=-0.2em]box3.east){1.实验验证。 \\ \\};
\node [draw,thick,anchor=west,single arrow,minimum height=1.6em,single arrow head extend=0.4em] at ([xshift=0.6em]box1.east) {};
\node [draw,thick,anchor=west,single arrow,minimum height=1.6em,single arrow head extend=0.4em] at ([xshift=0.6em]box2.east) {};
\node[font=\footnotesize, anchor=north] at ([yshift=-0.1em]box1.south){传统机器学习};
\node[font=\footnotesize, anchor=north] at ([yshift=-0.1em]box2.south){深度学习};
\node[font=\footnotesize, anchor=north] at ([yshift=-0.1em]box3.south){深度学习\&网络结构搜索};
\end{tikzpicture}
\ No newline at end of file
\begin{tikzpicture}
\begin{scope}
\tikzstyle{cirnode}=[circle,minimum size=3.7em,draw]
\tikzstyle{recnode}=[rectangle,rounded corners=2pt,inner sep=0mm,minimum height=1.5em,minimum width=4em,draw]
\node [anchor=west,cirnode] (n1) at (0, 0) {$\mathbi{h}_{i-2}^l$};
\node [anchor=west,cirnode] (n2) at ([xshift=1em,yshift=0em]n1.east) {$\mathbi{h}_{i-1}^l$};
\node [anchor=west,cirnode] (n3) at ([xshift=1em,yshift=0em]n2.east) {$\mathbi{h}_{i}^l$};
\node [anchor=west,cirnode] (n4) at ([xshift=1em,yshift=0em]n3.east) {$\mathbi{h}_{i+1}^l$};
\node [anchor=west,cirnode] (n5) at ([xshift=1em,yshift=0em]n4.east) {$\mathbi{h}_{i+2}^l$};
\node [anchor=center,blue!30,minimum height=4.2em,minimum width=4.5em,very thick,draw] (c1) at ([xshift=0em,yshift=0em]n3.center) {};
\node [anchor=center,ugreen!30,minimum height=4.9em,minimum width=14.5em,very thick,draw] (c2) at ([xshift=0em,yshift=0em]n3.center) {};
\node [anchor=center,red!30,minimum height=5.6em,minimum width=24.5em,very thick,draw] (c3) at ([xshift=0em,yshift=0em]n3.center) {};
\node [anchor=south,recnode] (r1) at ([xshift=0em,yshift=2.5em]n2.north) {$\textrm{head}_1$};
\node [anchor=south,recnode] (r2) at ([xshift=0em,yshift=2.5em]n3.north) {$\textrm{head}_2$};
\node [anchor=south,recnode] (r3) at ([xshift=0em,yshift=2.5em]n4.north) {$\textrm{head}_3$};
\node [anchor=south,cirnode] (n6) at ([xshift=0em,yshift=1em]r2.north) {$\mathbi{h}_{i}^{l+1}$};
\draw [->,very thick,blue!30] ([xshift=0em,yshift=0em]c1.north) -- ([xshift=0em,yshift=0em]r2.south);
\draw [->,very thick,ugreen!30] ([xshift=4.73em,yshift=0em]c2.north) -- ([xshift=0em,yshift=0em]r3.south);
\draw [->,very thick,red!30] ([xshift=-4.73em,yshift=0em]c3.north) -- ([xshift=0em,yshift=0em]r1.south);
\draw [->] ([xshift=0em,yshift=0em]r1.north) -- ([xshift=0em,yshift=0em]n6.south west);
\draw [->] ([xshift=0em,yshift=0em]r2.north) -- ([xshift=0em,yshift=0em]n6.south);
\draw [->] ([xshift=0em,yshift=0em]r3.north) -- ([xshift=0em,yshift=0em]n6.south east);
\end{scope}
\end{tikzpicture}
\ No newline at end of file
\begin{tikzpicture}
\begin{scope}
\tikzstyle{encnode}=[rectangle,inner sep=0mm,minimum height=2em,minimum width=4.5em,rounded corners=5pt,thick]
\tikzstyle{decnode}=[rectangle,inner sep=0mm,minimum height=2em,minimum width=4.5em,rounded corners=5pt,thick]
\tikzstyle{cnode}=[rectangle,draw=teal!80, inner sep=0mm,minimum height=1.4em,minimum width=4.4em,fill=teal!17,rounded corners=2pt,thick]
\node [anchor=north,encnode] (n1) at (0, 0) {编码器};
\node [anchor=north,rectangle,minimum height=1.5em,minimum width=2.5em,rounded corners=5pt] (n2) at ([xshift=0em,yshift=-0.2em]n1.south) {$\mathbi{X}$};
\node [anchor=west,encnode,draw=red!60!black!80,fill=red!20] (n3) at ([xshift=1.5em,yshift=0em]n2.east) {$\mathbi{h}_0$};
\node [anchor=west,encnode,draw=red!60!black!80,fill=red!20] (n4) at ([xshift=1.5em,yshift=0em]n3.east) {$\mathbi{h}_1$};
\node [anchor=west,encnode,draw=red!60!black!80,fill=red!20] (n5) at ([xshift=1.5em,yshift=0em]n4.east) {$\mathbi{h}_2$};
\node [anchor=west,rectangle,minimum height=1.5em,minimum width=2.5em,rounded corners=5pt] (n6) at ([xshift=1em,yshift=0em]n5.east) {$\ldots$};
\node [anchor=west,encnode,draw=red!60!black!80,fill=red!20] (n7) at ([xshift=1em,yshift=0em]n6.east) {$\mathbi{h}_{N-1}$};
\node [anchor=north,cnode] (cn1) at ([xshift=0em,yshift=-2.8em]n4.south) {\footnotesize{权重聚合$\mathbi{g}$}};
\node [anchor=north,cnode,opacity=0.5] (cn2) at ([xshift=0em,yshift=-2.8em]n3.south) {\footnotesize{权重聚合$\mathbi{g}$}};
\node [anchor=north,cnode,opacity=0.5] (cn3) at ([xshift=0em,yshift=-2.8em]n5.south) {\footnotesize{权重聚合$\mathbi{g}$}};
\node [anchor=north,cnode,opacity=0.5] (cn4) at ([xshift=0em,yshift=-2.8em]n7.south) {\footnotesize{权重聚合$\mathbi{g}$}};
\node [anchor=west,decnode] (n9) at ([xshift=0em,yshift=-7.2em]n1.west) {解码器};
\node [anchor=north,rectangle,minimum height=1.5em,minimum width=2.5em,rounded corners=5pt] (n10) at ([xshift=0em,yshift=-0.2em]n9.south) {$\mathbi{y}_{<j}$};
\node [anchor=west,decnode,draw=ublue,fill=blue!10] (n11) at ([xshift=1.5em,yshift=0em]n10.east) {$\mathbi{s}_j^0$};
\node [anchor=west,decnode,draw=ublue,fill=blue!10] (n12) at ([xshift=1.5em,yshift=0em]n11.east) {$\mathbi{s}_j^1$};
\node [anchor=west,decnode,draw=ublue,fill=blue!10] (n13) at ([xshift=1.5em,yshift=0em]n12.east) {$\mathbi{s}_j^2$};
\node [anchor=west,rectangle,minimum height=1.5em,minimum width=2.5em,rounded corners=5pt] (n14) at ([xshift=1em,yshift=0em]n13.east) {$\ldots$};
\node [anchor=west,decnode,draw=ublue,fill=blue!10] (n15) at ([xshift=1em,yshift=0em]n14.east) {$\mathbi{s}_j^{M-1}$};
\node [anchor=west,rectangle,minimum height=1.5em,minimum width=2.5em,rounded corners=5pt] (n16) at ([xshift=1.5em,yshift=0em]n15.east) {$\mathbi{y}_{j}$};
\node [anchor=south,minimum height=1.5em,minimum width=2.5em] (n17) at ([xshift=0em,yshift=6em]n16.north) {};
\node [anchor=west,minimum height=0.5em,minimum width=4em] (n20) at ([xshift=0em,yshift=-2.5em]n2.east) {};
\node [anchor=west,minimum height=0.5em,minimum width=4em] (n21) at ([xshift=0em,yshift=-3.9em]n2.east) {};
\node [anchor=north,minimum height=0.5em,minimum width=4em] (n22) at ([xshift=0em,yshift=-0.7em]n11.south) {};
\begin{pgfonlayer}{background}
{
\node[rectangle,inner sep=2pt,fill=blue!7] [fit = (n1) (n7) (n17) (n20)] (bg1) {};
\node[rectangle,inner sep=2pt,fill=red!7] [fit = (n9) (n16) (n13) (n21) (n22)] (bg2) {};
}
\end{pgfonlayer}
\draw [->,thick] ([xshift=0em,yshift=0em]n2.east) -- ([xshift=0em,yshift=0em]n3.west);
\draw [->,thick] ([xshift=0em,yshift=0em]n3.east) -- ([xshift=0em,yshift=0em]n4.west);
\draw [->,thick] ([xshift=0em,yshift=0em]n4.east) -- ([xshift=0em,yshift=0em]n5.west);
\draw [->,thick] ([xshift=0em,yshift=0em]n5.east) -- ([xshift=0em,yshift=0em]n6.west);
\draw [->,thick] ([xshift=0em,yshift=0em]n6.east) -- ([xshift=0em,yshift=0em]n7.west);
\draw [->,thick] ([xshift=0em,yshift=0em]n10.east) -- ([xshift=0em,yshift=0em]n11.west);
\draw [->,thick] ([xshift=0em,yshift=0em]n11.east) -- ([xshift=0em,yshift=0em]n12.west);
\draw [->,thick] ([xshift=0em,yshift=0em]n12.east) -- ([xshift=0em,yshift=0em]n13.west);
\draw [->,thick] ([xshift=0em,yshift=0em]n13.east) -- ([xshift=0em,yshift=0em]n14.west);
\draw [->,thick] ([xshift=0em,yshift=0em]n14.east) -- ([xshift=0em,yshift=0em]n15.west);
\draw [->,thick] ([xshift=0em,yshift=0em]n15.east) -- ([xshift=0em,yshift=0em]n16.west);
\draw [->,thick,gray!70,opacity=0.5] ([xshift=0em,yshift=0em]cn2.south) -- ([xshift=0em,yshift=0em]n11.north);
\draw [->,thick] ([xshift=0em,yshift=0em]cn1.south) -- ([xshift=0em,yshift=0em]n12.north);
\draw [->,thick,gray!70,opacity=0.5] ([xshift=0em,yshift=0em]cn3.south) -- ([xshift=0em,yshift=0em]n13.north);
\draw [->,thick,gray!70,opacity=0.5] ([xshift=0em,yshift=0em]cn4.south) -- ([xshift=0em,yshift=0em]n15.north);
\draw [->,thick,gray!70,opacity=0.5] ([xshift=0em,yshift=0em]n2.south)..controls +(south:1.4em) and +(north:0.6em)..([xshift=0em,yshift=0em]cn2.north);
\draw [->,thick,gray!70,opacity=0.5] ([xshift=0em,yshift=0em]n3.south)..controls +(south:0.8em) and +(north:0.7em)..([xshift=0em,yshift=0em]cn2.north);
\draw [->,thick,gray!70,opacity=0.5] ([xshift=0em,yshift=0em]n4.south)..controls +(south:0.6em) and +(north:0.6em)..([xshift=0em,yshift=0em]cn2.north);
\draw [->,thick,gray!70,opacity=0.5] ([xshift=0em,yshift=0em]n5.south)..controls +(south:0.7em) and +(north:0.7em)..([xshift=0em,yshift=0em]cn2.north);
\draw [->,thick,gray!70,opacity=0.5] ([xshift=0em,yshift=0em]n7.south)..controls +(south:1.4em) and +(north:0.6em)..([xshift=0em,yshift=0em]cn2.north);
\draw [->,thick,gray!70,opacity=0.5] ([xshift=0em,yshift=0em]n2.south)..controls +(south:1.4em) and +(north:0.6em)..([xshift=0em,yshift=0em]cn3.north);
\draw [->,thick,gray!70,opacity=0.5] ([xshift=0em,yshift=0em]n3.south)..controls +(south:0.8em) and +(north:0.7em)..([xshift=0em,yshift=0em]cn3.north);
\draw [->,thick,gray!70,opacity=0.5] ([xshift=0em,yshift=0em]n4.south)..controls +(south:0.6em) and +(north:0.6em)..([xshift=0em,yshift=0em]cn3.north);
\draw [->,thick,gray!70,opacity=0.5] ([xshift=0em,yshift=0em]n5.south)..controls +(south:0.7em) and +(north:0.7em)..([xshift=0em,yshift=0em]cn3.north);
\draw [->,thick,gray!70,opacity=0.5] ([xshift=0em,yshift=0em]n7.south)..controls +(south:1.4em) and +(north:0.6em)..([xshift=0em,yshift=0em]cn3.north);
\draw [->,thick,gray!70,opacity=0.5] ([xshift=0em,yshift=0em]n2.south)..controls +(south:1.4em) and +(north:0.6em)..([xshift=0em,yshift=0em]cn4.north);
\draw [->,thick,gray!70,opacity=0.5] ([xshift=0em,yshift=0em]n3.south)..controls +(south:0.8em) and +(north:0.7em)..([xshift=0em,yshift=0em]cn4.north);
\draw [->,thick,gray!70,opacity=0.5] ([xshift=0em,yshift=0em]n4.south)..controls +(south:0.6em) and +(north:0.6em)..([xshift=0em,yshift=0em]cn4.north);
\draw [->,thick,gray!70,opacity=0.5] ([xshift=0em,yshift=0em]n5.south)..controls +(south:0.7em) and +(north:0.7em)..([xshift=0em,yshift=0em]cn4.north);
\draw [->,thick,gray!70,opacity=0.5] ([xshift=0em,yshift=0em]n7.south)..controls +(south:1.4em) and +(north:0.6em)..([xshift=0em,yshift=0em]cn4.north);
\draw [->,thick] ([xshift=0em,yshift=0em]n2.south)..controls +(south:1.4em) and +(north:0.6em)..([xshift=0em,yshift=0em]cn1.north);
\draw [->,thick] ([xshift=0em,yshift=0em]n3.south)..controls +(south:0.8em) and +(north:0.7em)..([xshift=0em,yshift=0em]cn1.north);
\draw [->,thick] ([xshift=0em,yshift=0em]n4.south)..controls +(south:0.6em) and +(north:0.6em)..([xshift=0em,yshift=0em]cn1.north);
\draw [->,thick] ([xshift=0em,yshift=0em]n5.south)..controls +(south:0.7em) and +(north:0.7em)..([xshift=0em,yshift=0em]cn1.north);
\draw [->,thick] ([xshift=0em,yshift=0em]n7.south)..controls +(south:1.4em) and +(north:0.6em)..([xshift=0em,yshift=0em]cn1.north);
\end{scope}
\end{tikzpicture}
\ No newline at end of file
......@@ -142,12 +142,21 @@ A_{ij}^{rel} &=& \underbrace{\mathbi{E}_{x_i}\mathbi{W}_Q\mathbi{W}_{K}^{T}\math
\begin{itemize}
\vspace{0.5em}
\item 高斯约束\upcite{Yang2018ModelingLF}。这类方法的核心思想是引入可学习的高斯分布进行局部建模,之后与自注意力机制中计算得到的注意力分布进行融合,如下图:形式上,可以用自注意力机制中计算得到的中间表示来预测高斯分布的中心和偏差,得到高斯分布$\mathbi{G}$后累加在模型计算得到的相关性系数之上在进行归一化计算,具体的形式如下:
\item 高斯约束\upcite{Yang2018ModelingLF}。这类方法的核心思想是引入可学习的高斯分布进行局部建模,之后与自注意力机制中计算得到的注意力分布进行融合,如\ref{fig:15-1}。形式上,可以用自注意力机制中计算得到的中间表示来预测高斯分布的中心和偏差,得到高斯分布$\mathbi{G}$后累加在模型计算得到的相关性系数之上在进行归一化计算,具体的形式如下:{\red{下面图片中的曲线再调整一下}}
\begin{equation}
\mathbi{e}_{ij} = \frac{(\mathbi{x}_i \mathbi{W}_Q){(\mathbi{x}_j \mathbi{W}_K)}^{T}}{\sqrt{d_k}} + \mathbi{G}
\label{eq:15-13}
\end{equation}
%----------------------------------------------
\begin{figure}[htp]
\centering
\input{./Chapter15/Figures/figure-attention-distribution-based-on-gaussian-distribution}
\caption{融合高斯分布的注意力分布}
\label{fig:15-1}
\end{figure}
%-------------------------------------------
\noindent 其中,$\mathbi{G} \in \mathbb{R}^{m\times m}$$m$是源语言的句子长度。$\mathbi{G}$中的每个元素$G_{ij}$表示当前单词$\mathbi{x}_j$和预测的中心位置$P_i$之间的关联程度,计算公式如下:
\begin{equation}
G_{ij} = - \frac{{(j - P_i)}^2}{2\sigma_i^2}
......@@ -160,15 +169,6 @@ G_{ij} = - \frac{{(j - P_i)}^2}{2\sigma_i^2}
\label{eq:15-15}
\end{equation}
%----------------------------------------------
\begin{figure}[htp]
\centering
\input{./Chapter15/Figures/figure-attention-distribution-based-on-gaussian-distribution}
\caption{融合高斯分布的注意力分布}
\label{fig:15-1}
\end{figure}
%-------------------------------------------
\noindent 通过标量$m$控制得到的中心位置和偏差的数值为0和序列长度之间的实数值,$p_i$$v_i$为网络计算的中间结果,分别用于预测中心位置和窗口大小。用如下方式计算:
\begin{eqnarray}
p_i &=& \mathbi{I}_p^T\textrm{Tanh}(\mathbi{W}_p\mathbi{Q}_i) \\
......@@ -184,7 +184,7 @@ v_i &=& \mathbi{I}_d^T\textrm{Tanh}(\mathbi{W}_p\mathbi{Q}_i)
%----------------------------------------------
\begin{figure}[htp]
\centering
\includegraphics[scale=0.4]{./Chapter15/Figures/figure-multi-scale-local-modeling.png}
\input{./Chapter15/Figures/figure-multi-scale-local-modeling}
\caption{多尺度局部建模}
\label{fig:15-2}
\end{figure}
......@@ -227,12 +227,12 @@ C(\mathbi{x}_j \mathbi{W}_K,\omega) = (\mathbi{x}_{j-\omega},\ldots,\mathbi{x}_{
\vspace{0.5em}
\item 使用1维卷积注意力网络(图\ref{fig:15-3}(b))。为了捕捉短距离依赖,则可以使用一维的卷积自注意力网络(1D-CSAN)将关注的范围限制在相近的元素窗口中。其形式上十分简单,只需预先设定好局部建模的窗口大小范围D,并在进行注意力权重计算和对Value值进行加权求和时,将其限制在设定好的窗口范围内即可。
\vspace{0.5em}
\item 使用2维卷积注意力网络(图\ref{fig:15-3}(c))。在1维卷积注意力网络的基础上对多个注意力头之间的信息进行了交互建模,打破了注意力头之间的界限。 1D-CDAN的关注区域为$1\times D$,当将其扩展为2维矩形$(N×D)$,长和宽分别为局部窗口的大小和参与建模的自注意力头的个数。在这种形势下,允许模型计算某个头中的第$i$个元素和第$s$个头中的第$j$个元素之间的相关性系数。实现了对不同子空间之间关系的建模,所得到的注意力分布表示了头之间的依赖关系。
\item 使用2维卷积注意力网络(图\ref{fig:15-3}(c))。在1维卷积注意力网络的基础上对多个注意力头之间的信息进行了交互建模,打破了注意力头之间的界限。 1D-CDAN的关注区域为$1\times D$,当将其扩展为2维矩形$(N\times D)$,长和宽分别为局部窗口的大小和参与建模的自注意力头的个数。在这种形势下,允许模型计算某个头中的第$i$个元素和第$s$个头中的第$j$个元素之间的相关性系数。实现了对不同子空间之间关系的建模,所得到的注意力分布表示了头之间的依赖关系。
%----------------------------------------------
\begin{figure}[htp]
\centering
\includegraphics[scale=0.3]{./Chapter15/Figures/figure-convolutional-attention-network.png}
\input{./Chapter15/Figures/figure-convolutional-attention-network}
\caption{卷积注意力网络}
\label{fig:15-3}
\end{figure}
......@@ -1096,7 +1096,7 @@ lr=d_{model}^{-0.5}\cdot step\_num^{-0.5}
%----------------------------------------------
\begin{figure}[htp]
\centering
\includegraphics[scale=0.1]{./Chapter15/Figures/figure-encoder-structure-of-transformer-model-optimized-by-nas.jpg}
\input{./Chapter15/Figures/figure-encoder-structure-of-transformer-model-optimized-by-nas}
\caption{传统Transformer以及通过网络结构搜索方法优化后的Transformer模型编码器结构}
\label{fig:15-27}
\end{figure}
......@@ -1107,7 +1107,7 @@ lr=d_{model}^{-0.5}\cdot step\_num^{-0.5}
%----------------------------------------------
\begin{figure}[htp]
\centering
\includegraphics[scale=0.6]{./Chapter15/Figures/figure-evolution-and-change-of-ml-methods.jpg}
\input{./Chapter15/Figures/figure-evolution-and-change-of-ml-methods}
\caption{机器学习方法的演化与变迁}
\label{fig:15-28}
\end{figure}
......
Markdown 格式
0%
您添加了 0 到此讨论。请谨慎行事。
请先完成此评论的编辑!
注册 或者 后发表评论