Commit 569f7cf8 by 单韦乔

合并分支 'shanweiqiao' 到 'caorunzhe'

十五章现有内容整理以及部分格式修改

查看合并请求 !286
parents 7f08f351 7f29f545
\begin{tikzpicture}
\begin{scope}
\node [anchor=north,rectangle, inner sep=0mm,minimum height=1.2em,minimum width=2em,rounded corners=5pt,thick] (n1) at (0, 0) {编码端};
\node [anchor=west,rectangle, inner sep=0mm,minimum height=1.2em,minimum width=0em,rounded corners=5pt,thick] (n2) at ([xshift=3.5em,yshift=-0.5em]n1.east) {$z_0$};
\node [anchor=west,rectangle,draw, inner sep=0mm,minimum height=1.2em,minimum width=3em,fill=orange!20,rounded corners=5pt,thick] (n3) at ([xshift=3.5em,yshift=0em]n2.east) {$z_1$};
\node [anchor=west,rectangle,draw, inner sep=0mm,minimum height=1.2em,minimum width=3em,fill=orange!20,rounded corners=5pt,thick] (n4) at ([xshift=3.5em,yshift=0em]n3.east) {$z_2$};
\node [anchor=west,rectangle, inner sep=0mm,minimum height=1.2em,minimum width=1em,rounded corners=5pt,thick] (n6) at ([xshift=1.5em,yshift=0em]n4.east) {$\ldots$};
\node [anchor=west,rectangle,draw, inner sep=0mm,minimum height=1.2em,minimum width=3em,fill=orange!20,rounded corners=5pt,thick] (n5) at ([xshift=3.5em,yshift=0em]n6.east) {$z_{l}$};
\node [anchor=west,rectangle,draw, inner sep=0mm,minimum height=1.2em,minimum width=3em,fill=orange!20,rounded corners=5pt,thick] (n7) at ([xshift=1.5em,yshift=0em]n5.east) {$z_{l+1}$};
\node [anchor=north,rectangle,draw, inner sep=0mm,minimum height=1.2em,minimum width=15em,fill=teal!17,rounded corners=5pt,thick] (n8) at ([xshift=0em,yshift=-3em]n4.south) {层正则化};
\node [anchor=north,rectangle,draw, inner sep=0mm,minimum height=1.2em,minimum width=15em,fill=purple!17,rounded corners=5pt,thick] (n9) at ([xshift=0em,yshift=-1em]n8.south) {$L_0\ \quad L_1\ \quad L_2\quad \ldots \quad\ L_l$};
\node [anchor=north,rectangle,draw, inner sep=0mm,minimum height=1.2em,minimum width=15em,fill=teal!17,rounded corners=5pt,thick] (n10) at ([xshift=0em,yshift=-2em]n9.south) {权重累加};
\node [anchor=west,rectangle, inner sep=0mm,minimum height=1.2em, rounded corners=5pt,thick] (n11) at ([xshift=0em,yshift=-4.5em]n1.west) {聚合网络};
\node [anchor=east,rectangle, inner sep=0mm,minimum height=1.2em,minimum width=9em,rounded corners=5pt,thick] (n12) at ([xshift=0em,yshift=-4.5em]n7.east) {};
\node [anchor=south,rectangle, inner sep=0mm,minimum height=1em,minimum width=1em,rounded corners=5pt,thick] (n13) at ([xshift=0em,yshift=1em]n8.north) {};
\begin{pgfonlayer}{background}
{
\node[rectangle,inner sep=2pt,fill=blue!7] [fit = (n1) (n7) (n13)] (bg1) {};
\node[rectangle,inner sep=2pt,fill=red!7] [fit = (n10) (n8) (n11) (n12)] (bg2) {};
}
\end{pgfonlayer}
\draw[->,thick] ([xshift=0.5em,yshift=-0em]n2.south)..controls +(south:2em) and +(north:2em)..([xshift=-0em,yshift=-0em]n8.north) ;
\draw[->,thick] ([xshift=-0em,yshift=-0em]n3.south)..controls +(south:2em) and +(north:2em)..([xshift=-0em,yshift=-0em]n8.north) ;
\draw[->,thick] ([xshift=-0em,yshift=-0em]n5.south)..controls +(south:2em) and +(north:2em)..([xshift=-0em,yshift=-0em]n8.north) ;
\draw [->,thick] ([xshift=0em,yshift=0em]n4.south) -- ([xshift=0em,yshift=0em]n8.north);
\draw [->,thick] ([xshift=0em,yshift=0em]n8.south) -- ([xshift=0em,yshift=0em]n9.north);
\draw[->,thick] ([xshift=-4.5em,yshift=-0em]n9.south)..controls +(south:0.8em) and +(north:0.8em)..([xshift=-0em,yshift=-0em]n10.north) ;
\draw[->,thick] ([xshift=-2em,yshift=-0em]n9.south)..controls +(south:0.8em) and +(north:0.8em)..([xshift=-0em,yshift=-0em]n10.north) ;
\draw[->,thick] ([xshift=0em,yshift=-0em]n9.south)..controls +(south:0.8em) and +(north:0.8em)..([xshift=-0em,yshift=-0em]n10.north) ;
\draw[->,thick] ([xshift=4.5em,yshift=-0em]n9.south)..controls +(south:0.8em) and +(north:0.8em)..([xshift=-0em,yshift=-0em]n10.north) ;
\draw[->,thick] ([xshift=0em,yshift=-0em]n10.east)..controls +(east:5em) and +(south:1.5em)..([xshift=-0em,yshift=-0em]n7.south) ;
\end{scope}
\end{tikzpicture}
\ No newline at end of file
%%
\begin{center}
\begin{tikzpicture}
\begin{scope}[scale=0.6]
\node [anchor=east,fill=red!50,draw,rounded corners=3pt] (s11) at (-0.5em, 0) {\footnotesize{sublayer1}};
\node [anchor=west,draw,circle,line width=1pt] (c11) at ([xshift=2em]s11.east) {};
\node [anchor=north,fill=red!10,draw,dotted,rounded corners=3pt] (s21) at ([yshift=-3em]s11.south) {\footnotesize{sublayer1}};
\node [anchor=west, draw,circle,dotted,line width=1pt] (c21) at ([xshift=2em]s21.east) {};
\node [anchor=west,fill=red!10,draw,dotted,rounded corners=3pt] (s22) at ([xshift=2em]c21.east) {\footnotesize{sublayer2}};
\node [anchor=west, draw,circle,dotted,line width=1pt] (c22) at ([xshift=2em]s22.east) {};
\node [anchor=north,fill=red!50,draw,rounded corners=3pt] (s31) at ([yshift=-3em]s21.south) {\footnotesize{sublayer1}};
\node [anchor=west,draw,circle,line width=1pt] (c31) at ([xshift=2em]s31.east) {};
\node [anchor=north,fill=red!10,draw,dotted,rounded corners=3pt] (s41) at ([yshift=-3em]s31.south) {\footnotesize{sublayer1}};
\node [anchor=east, draw,circle,line width=1pt] (c44) at ([xshift=-2em]s41.west) {};
\node [anchor=west, draw,circle,dotted,line width=1pt] (c41) at ([xshift=2em]s41.east) {};
\node [anchor=west,fill=red!10,draw,dotted,rounded corners=3pt] (s42) at ([xshift=2em]c41.east) {\footnotesize{sublayer2}};
\node [anchor=west, draw,circle,dotted,line width=1pt] (c42) at ([xshift=2em]s42.east) {};
\node [anchor=west,fill=red!50,draw,rounded corners=3pt] (s43) at ([xshift=2em]c42.east) {\footnotesize{sublayer3}};
\node [anchor=west, draw,circle,line width=1pt] (c43) at ([xshift=2em]s43.east) {};
\draw[-,rounded corners,line width=1pt] (c44.east) -- ([xshift=0.8em]c44.east) -- ([xshift=-1.2em,yshift=2.3em]s11.west) -- ([xshift=2.8em,,yshift=2.3em]s11.east) -- (c11.north);
\draw[-,rounded corners,line width=1pt] (c44.east) -- ([xshift=0.8em]c44.east) -- ([xshift=-1.2em]s11.west) -- (s11.west);
\draw[-,rounded corners,line width=1pt] (s11.east) -- (c11.west);
\draw[-,rounded corners,line width=1pt] (c11.east) -- ([xshift=11.3em]c11.east) -- (c22.north);
\draw[-,rounded corners,line width=1pt,dotted] (c44.east) -- ([xshift=0.8em]c44.east) -- ([xshift=-1.2em,yshift=2.3em]s21.west) -- ([xshift=2.7em,,yshift=2.3em]s21.east) -- (c21.north);
\draw[-,rounded corners,line width=1pt,dotted] (c44.east) -- ([xshift=0.8em]c44.east) -- ([xshift=-1.2em]s21.west) -- (s21.west);
\draw[-,rounded corners,line width=1pt,dotted] (s21.east) -- (c21.west);
\draw[-,rounded corners,line width=1pt,dotted] (c21.east) -- (s22.west);
\draw[-,rounded corners,line width=1pt,dotted] (s22.east) -- (c22.west);
\draw[-,rounded corners,line width=1pt] (c22.east) -- ([xshift=11.3em]c22.east) -- (c43.north);
\draw[-,rounded corners,line width=1pt] (c44.east) -- ([xshift=0.8em]c44.east) -- ([xshift=-1.2em,yshift=2.3em]s31.west) -- ([xshift=2.7em,,yshift=2.3em]s31.east) -- (c31.north);
\draw[-,rounded corners,line width=1pt] (c44.east) -- ([xshift=0.8em]c44.east) -- ([xshift=-1.2em]s31.west) -- (s31.west);
\draw[-,rounded corners,line width=1pt] (s31.east) -- (c31.west);
\draw[-,rounded corners,line width=1pt] (c31.east) -- ([xshift=11.3em]c31.east) -- (c42.north);
\draw[-,rounded corners,line width=1pt,dotted] (c44.east) -- ([xshift=0.8em]c44.east) -- ([xshift=-1.2em,yshift=2.3em]s41.west) -- ([xshift=2.7em,,yshift=2.3em]s41.east) -- (c41.north);
\draw[-,rounded corners,line width=1pt,dotted] (c44.east) -- (s41.west);
\draw[-,rounded corners,line width=1pt,dotted] (s41.east) -- (c41.west);
\draw[-,rounded corners,line width=1pt,dotted] (c41.east) -- (s42.west);
\draw[-,rounded corners,line width=1pt,dotted] (s42.east) -- (c42.west);
\draw[-,rounded corners,line width=1pt] (c42.east) -- (s43.west);
\draw[-,rounded corners,line width=1pt] (s43.east) -- (c43.west);
\draw[->,rounded corners,line width=1pt] (c43.east) -- ([xshift=2em]c43.east);
\end{scope}
\end{tikzpicture}
\end{center}
\begin{center}
\begin{tikzpicture}[scale=1.0]
\footnotesize{
\begin{axis}[
width=.50\textwidth,
height=.40\textwidth,
legend style={at={(0.60,0.08)}, anchor=south west},
xlabel={\scriptsize{更新次数(10k)}},
ylabel={\scriptsize{学习率 ($10^{-3}$}},
ylabel style={yshift=-1em},xlabel style={yshift=0.0em},
yticklabel style={/pgf/number format/precision=2,/pgf/number format/fixed zerofill},
ymin=0,ymax=2.2, ytick={0.5, 1, 1.5, 2},
xmin=0,xmax=5,xtick={1,2,3,4},
legend style={xshift=-8pt,yshift=-4pt, legend plot pos=right,font=\scriptsize,cells={anchor=west}}
]
\addplot[red,line width=1.25pt] coordinates {(0,0) (1.6,2) (1.8,1.888) (2,1.787) (2.5,1.606) (3,1.462) (3.5,1.3549) (4,1.266) (4.5,1.193) (5,1.131)};
\addlegendentry{\scriptsize Base48}
%\addplot[red,line width=1.25pt] coordinates {(0,0) (8000,0.002) (10000,0.00179) (12000,0.00163) (12950,0.001572)};
\addplot[blue,line width=1.25pt] coordinates {(0,0) (0.8,2) (0.9906,1.7983)};
%\addplot[red,line width=1.25pt] coordinates {(0,0) (8000,0.002) (9906,0.0017983)};
\addplot[blue,dashed,line width=1.25pt] coordinates {(0.9906,1.7983) (0.9906,2)};
\addplot[blue,line width=1.25pt] coordinates {(0.9906,2) (1.1906,1.79) (1.3906,1.63) (1.4856,1.572)};
\addplot[blue,dashed,line width=1.25pt] coordinates {(1.4856,1.572) (1.4856,2)};
\addplot[blue,line width=1.25pt] coordinates {(1.4856,2) (1.6856,1.79) (1.8856,1.63) (1.9806,1.572)};
\addplot[blue,dashed,line width=1.25pt] coordinates {(1.9806,1.572) (1.9806,2)};
\addplot[blue,line width=1.25pt] coordinates {(1.9806,2) (2.1806,1.79) (2.3806,1.63) (2.4756,1.572)};
\addplot[blue,dashed,line width=1.25pt] coordinates {(2.4756,1.572) (2.4756,2)};
\addplot[blue,line width=1.25pt] coordinates {(2.4756,2) (2.6756,1.79) (2.8756,1.63) (2.9706,1.572)};
\addplot[blue,dashed,line width=1.25pt] coordinates {(2.9706,1.572) (2.9706,2)};
\addplot[blue,line width=1.25pt] coordinates {(2.9706,2) (3.1706,1.79) (3.3706,1.63) (3.4656,1.572) (3.6706,1.4602) (3.7136,1.44)};
\addplot[blue,dashed,line width=1.25pt] coordinates {(3.7136,1.44) (3.7136,2)};
\addplot[blue,line width=1.25pt] coordinates {(3.7136,2) (3.9136,1.79) (4.1136,1.63) (4.2086,1.572) (4.4136,1.4602) (4.4566,1.44) (4.7000,1.3574) (5.0000,1.2531)};
\addlegendentry{\scriptsize SDT48}
\end{axis}
}
\end{tikzpicture}
\end{center}
\ No newline at end of file
%%%------------------------------------------------------------------------------------------------------------
%%% 调序模型1:基于距离的调序
\begin{center}
\begin{tikzpicture}
\begin{scope}[minimum height = 20pt]
\node [anchor=east] (x1) at (-0.5em, 0) {$x_l$};
\node [anchor=west,draw,fill=red!20,inner xsep=5pt,rounded corners=2pt] (F1) at ([xshift=2em]x1.east){\small{$\mathcal{F}$}};
\node [anchor=west,circle,draw,minimum size=1em] (n1) at ([xshift=2em]F1.east) {};
\node [anchor=west,draw,fill=green!20,inner xsep=5pt,rounded corners=2pt] (ln1) at ([xshift=2em]n1.east){\small{\textrm{LN}}};
\node [anchor=west] (x2) at ([xshift=2em]ln1.east) {$x_{l+1}$};
\node [anchor=north] (x3) at ([yshift=-5em]x1.south) {$x_l$};
\node [anchor=west,draw,fill=green!20,inner xsep=5pt,rounded corners=2pt] (F2) at ([xshift=2em]x3.east){\small{\textrm{LN}}};
\node [anchor=west,draw,fill=red!20,inner xsep=5pt,rounded corners=2pt] (ln2) at ([xshift=2em]F2.east){\small{$\mathcal{F}$}};
\node [anchor=west,circle,draw,,minimum size=1em] (n2) at ([xshift=2em]ln2.east){};
\node [anchor=west] (x4) at ([xshift=2em]n2.east) {$x_{l+1}$};
\draw[->, line width=1pt] ([xshift=-0.1em]x1.east)--(F1.west);
\draw[->, line width=1pt] ([xshift=-0.1em]F1.east)--(n1.west);
\draw[->, line width=1pt] (n1.east)--node[above]{$y_l$}(ln1.west);
\draw[->, line width=1pt] ([xshift=-0.1em]ln1.east)--(x2.west);
\draw[->, line width=1pt] ([xshift=-0.1em]x3.east)--(F2.west);
\draw[->, line width=1pt] ([xshift=-0.1em]F2.east)--(ln2.west);
\draw[->, line width=1pt] ([xshift=0.1em]ln2.east)--node[above]{$y_l$}(n2.west);
\draw[->, line width=1pt] (n2.east)--(x4.west);
\draw[->,rounded corners,line width=1pt] ([yshift=-0.2em]x1.north) -- ([yshift=1em]x1.north) -- ([yshift=1.4em]n1.north) -- (n1.north);
\draw[->,rounded corners,line width=1pt] ([yshift=-0.2em]x3.north) -- ([yshift=1em]x3.north) -- ([yshift=1.4em]n2.north) -- (n2.north);
\draw[-] (n1.west)--(n1.east);
\draw[-] (n1.north)--(n1.south);
\draw[-] (n2.west)--(n2.east);
\draw[-] (n2.north)--(n2.south);
\node [anchor=south] (k1) at ([yshift=-0.1em]x1.north){};
\node [anchor=south] (k2) at ([yshift=-0.1em]x3.north){};
\begin{pgfonlayer}{background}
\node [rectangle,inner sep=0.3em,fill=orange!10] [fit = (x1) (F1) (n1) (ln1) (x2) (k1)] (box0) {};
\node [rectangle,inner sep=0.3em,fill=blue!10] [fit = (x3) (F2) (n2) (ln2) (x4) (k2)] (box1) {};
\end{pgfonlayer}
\node [anchor=north] (c1) at (box0.south){\footnotesize {(a)后作方式的残差连接}};
\node [anchor=north] (c2) at (box1.south){\footnotesize {(b)前作方式的残差连接}};
\end{scope}
\end{tikzpicture}
\end{center}
\ No newline at end of file
%%%------------------------------------------------------------------------------------------------------------
\begin{center}
\begin{tikzpicture}
\begin{scope}
\node [anchor=east,fill=orange!20,draw,rounded corners=3pt,minimum height=1.6em,minimum width=1.6em] (s11) at (-0.5em, 0) {\footnotesize{$\times h$}};
\node [rectangle,anchor=west,fill=blue!20,draw,rounded corners=3pt,minimum height=1.6em,minimum width=1.6em] (s12) at ([xshift=1.2em]s11.east) {};
\node [anchor=north,fill=orange!20,draw,rounded corners=3pt,minimum height=1.6em,minimum width=1.6em] (s21) at ([yshift=-1.2em]s11.south) {\footnotesize{$\times h$}};
\node [anchor=west,fill=orange!20,draw=red,rounded corners=3pt,minimum height=1.6em,minimum width=1.6em,dashed] (s22) at ([xshift=1.2em]s21.east) {\footnotesize{$\times h$}};
\node [anchor=west,fill=blue!20,draw,rounded corners=3pt,minimum height=1.6em,minimum width=1.6em] (s23) at ([xshift=1.2em]s22.east) {};
\node [anchor=north,fill=orange!20,draw,rounded corners=3pt,minimum height=1.6em,minimum width=1.6em] (s31) at ([yshift=-1.2em]s21.south) {\footnotesize{$\times h$}};
\node [anchor=west,fill=orange!20,draw,rounded corners=3pt,minimum height=1.6em,minimum width=1.6em] (s32) at ([xshift=1.2em]s31.east) {\footnotesize{$\times h$}};
\node [anchor=west,fill=orange!20,draw=red,rounded corners=3pt,minimum height=1.6em,minimum width=1.6em,dashed] (s33) at ([xshift=1.2em]s32.east) {\footnotesize{$\times h$}};
\node [anchor=west,fill=blue!20,draw,rounded corners=3pt,minimum height=1.6em,minimum width=1.6em] (s34) at ([xshift=1.2em]s33.east) {};
\node [anchor=north,fill=orange!20,draw,rounded corners=3pt,minimum height=1.6em,minimum width=1.6em] (s41) at ([yshift=-1.2em]s31.south) {\footnotesize{$\times h$}};
\node [anchor=west,fill=orange!20,draw,rounded corners=3pt,minimum height=1.6em,minimum width=1.6em] (s42) at ([xshift=1.2em]s41.east) {\footnotesize{$\times h$}};
\node [anchor=west,fill=orange!20,draw,rounded corners=3pt,minimum height=1.6em,minimum width=1.6em] (s43) at ([xshift=1.2em]s42.east) {\footnotesize{$\times h$}};
\node [anchor=west,fill=orange!20,draw=red,rounded corners=3pt,minimum height=1.6em,minimum width=1.6em,dashed] (s44) at ([xshift=1.2em]s43.east) {\footnotesize{$\times h$}};
\node [anchor=west,fill=blue!20,draw,rounded corners=3pt,minimum height=1.6em,minimum width=1.6em] (s45) at ([xshift=1.2em]s44.east) {};
\node [anchor=east] (p1) at ([xshift=-2em]s11.west) {\footnotesize{step 1}};
\node [anchor=east] (p2) at ([xshift=-2em]s21.west) {\footnotesize{step 2}};
\node [anchor=east] (p3) at ([xshift=-2em]s31.west) {\footnotesize{step 3}};
\node [anchor=east] (p4) at ([xshift=-2em]s41.west) {\footnotesize{step 4}};
\node [anchor=south,fill=orange!20,draw,rounded corners=3pt,minimum height=1.4em,minimum width=1.4em] (b1) at ([xshift=-0.2em,yshift=1.6em]p1.north) {};
\node [anchor=west] (b2) at (b1.east) {\footnotesize{:编码器}};
\node [anchor=west,fill=blue!20,draw,rounded corners=3pt,minimum height=1.4em,minimum width=1.4em] (b3) at ([xshift=1em]b2.east) {};
\node [anchor=west] (b4) at (b3.east) {\footnotesize{:解码器}};
\node [anchor=west] (b5) at ([xshift=2em]b4.east) {\footnotesize{:拷贝}};
\draw[-latex,thick,red,dashed] ([xshift=0.5em]b4.east) -- (b5.west);
\draw [-latex, line width=0.8pt] ([xshift=-1.2em]s11.west) -- (s11.west);
\draw [-latex, line width=0.8pt] (s11.east) -- (s12.west);
\draw [-latex, line width=0.8pt] (s12.east) -- ([xshift=1.2em]s12.east);
\draw [-latex, line width=0.8pt] ([xshift=-1.2em]s21.west) -- (s21.west);
\draw [-latex, line width=0.8pt] (s21.east) -- (s22.west);
\draw [-latex, line width=0.8pt] (s22.east) -- (s23.west);
\draw [-latex, line width=0.8pt] (s23.east) -- ([xshift=1.2em]s23.east);
\draw [-latex, line width=0.8pt] ([xshift=-1.2em]s31.west) -- (s31.west);
\draw [-latex, line width=0.8pt] (s31.east) -- (s32.west);
\draw [-latex, line width=0.8pt] (s32.east) -- (s33.west);
\draw [-latex, line width=0.8pt] (s33.east) -- (s34.west);
\draw [-latex, line width=0.8pt] (s34.east) -- ([xshift=1.2em]s34.east);
\draw [-latex, line width=0.8pt] ([xshift=-1.2em]s41.west) -- (s41.west);
\draw [-latex, line width=0.8pt] (s41.east) -- (s42.west);
\draw [-latex, line width=0.8pt] (s42.east) -- (s43.west);
\draw [-latex, line width=0.8pt] (s43.east) -- (s44.west);
\draw [-latex, line width=0.8pt] (s44.east) -- (s45.west);
\draw [-latex, line width=0.8pt] (s45.east) -- ([xshift=1.2em]s45.east);
\draw[-latex,thick,red,dashed] (s11.south)..controls +(south:1em) and +(north:1.2em)..(s22.north);
\draw[-latex,thick,red,dashed] (s22.south)..controls +(south:1em) and +(north:1.2em)..(s33.north);
\draw[-latex,thick,red,dashed] (s33.south)..controls +(south:1em) and +(north:1.2em)..(s44.north);
\end{scope}
\end{tikzpicture}
\end{center}
%%%------------------------------------------------------------------------------------------------------------
\begin{center}
\begin{tikzpicture}
\begin{scope}
\node [anchor=east,fill=orange!20,draw,rounded corners=3pt,minimum height=1.4em,minimum width=1.4em] (s11) at (-0.5em, 0) {};
\node [rectangle,anchor=west,fill=orange!20,draw,rounded corners=3pt,minimum height=1.4em,minimum width=1.4em] (s12) at ([xshift=2em]s11.east) {};
\node [anchor=west,fill=orange!20,draw,rounded corners=3pt,minimum height=1.4em,minimum width=1.4em] (s13) at ([xshift=2em]s12.east) {};
\node [anchor=west,fill=orange!20,draw,rounded corners=3pt,minimum height=1.4em,minimum width=1.4em] (s14) at ([xshift=2em]s13.east) {};
\node [anchor=north,fill=orange!20,draw,rounded corners=3pt,minimum height=1.4em,minimum width=1.4em] (s21) at ([yshift=-2.5em]s11.south) {};
\node [anchor=west,fill=orange!20,draw,rounded corners=3pt,minimum height=1.4em,minimum width=1.4em] (s22) at ([xshift=2em]s21.east) {};
\node [anchor=west,fill=orange!20,draw,rounded corners=3pt,minimum height=1.4em,minimum width=1.4em] (s23) at ([xshift=2em]s22.east) {};
\node [anchor=west,fill=orange!20,draw,rounded corners=3pt,minimum height=1.4em,minimum width=1.4em] (s24) at ([xshift=2em]s23.east) {};
\node [anchor=north,fill=orange!20,draw,rounded corners=3pt,minimum height=1.4em,minimum width=1.4em] (s31) at ([yshift=-2.5em]s21.south) {};
\node [anchor=west,fill=orange!20,draw,rounded corners=3pt,minimum height=1.4em,minimum width=1.4em] (s32) at ([xshift=2em]s31.east) {};
\node [anchor=west,fill=orange!20,draw,rounded corners=3pt,minimum height=1.4em,minimum width=1.4em] (s33) at ([xshift=2em]s32.east) {};
\node [anchor=west,fill=orange!20,draw,rounded corners=3pt,minimum height=1.4em,minimum width=1.4em] (s34) at ([xshift=2em]s33.east) {};
\node [anchor=north,fill=orange!20,draw,rounded corners=3pt,minimum height=1.4em,minimum width=1.4em] (s41) at ([yshift=-2.5em]s31.south) {};
\node [anchor=west,fill=orange!20,draw,rounded corners=3pt,minimum height=1.4em,minimum width=1.4em] (s42) at ([xshift=2em]s41.east) {};
\node [anchor=west,fill=orange!20,draw,rounded corners=3pt,minimum height=1.4em,minimum width=1.4em] (s43) at ([xshift=2em]s42.east) {};
\node [anchor=west,fill=orange!20,draw,rounded corners=3pt,minimum height=1.4em,minimum width=1.4em] (s44) at ([xshift=2em]s43.east) {};
\node [anchor=east] (p1) at ([xshift=-3.5em]s11.west) {$p=\infty$};
\node [anchor=east] (p2) at ([xshift=-4em]s21.west) {$p=1$};
\node [anchor=east] (p3) at ([xshift=-4em]s31.west) {$p=2$};
\node [anchor=east] (p4) at ([xshift=-4em]s41.west) {$p=4$};
\node [anchor=north] (p5) at ([yshift=-1em]p3.south) {$\cdots$};
\node [anchor=south,fill=orange!20,draw,rounded corners=3pt,minimum height=1.4em,minimum width=1.4em] (b1) at ([xshift=-0.6em,yshift=1.2em]p1.north) {};
\node [anchor=west] (b2) at (b1.east) {\footnotesize{:Layer}};
\node [anchor=west,draw=red,rounded corners=3pt,minimum height=1.4em,minimum width=1.4em,dashed,line width=0.8pt] (b3) at ([xshift=1em]b2.east) {};
\node [anchor=west] (b4) at (b3.east) {\footnotesize{:Block}};
\draw [-latex, line width=0.8pt] ([xshift=-2em]s11.west) -- (s11.west);
\draw [-latex, line width=0.8pt] (s11.east) -- (s12.west);
\draw [-latex, line width=0.8pt] (s12.east) -- (s13.west);
\draw [-latex, line width=0.8pt] (s13.east) -- (s14.west);
\draw [-latex, line width=0.8pt] (s14.east) -- ([xshift=2em]s14.east);
\draw [-latex, line width=0.8pt] ([xshift=-2em]s21.west) -- (s21.west);
\draw [-latex, line width=0.8pt] (s21.east) -- (s22.west);
\draw [-latex, line width=0.8pt] (s22.east) -- (s23.west);
\draw [-latex, line width=0.8pt] (s23.east) -- (s24.west);
\draw [-latex, line width=0.8pt] (s24.east) -- ([xshift=2em]s24.east);
\draw [-latex, line width=0.8pt] ([xshift=-2em]s31.west) -- (s31.west);
\draw [-latex, line width=0.8pt] (s31.east) -- (s32.west);
\draw [-latex, line width=0.8pt] (s32.east) -- (s33.west);
\draw [-latex, line width=0.8pt] (s33.east) -- (s34.west);
\draw [-latex, line width=0.8pt] (s34.east) -- ([xshift=2em]s34.east);
\draw [-latex, line width=0.8pt] ([xshift=-2em]s41.west) -- (s41.west);
\draw [-latex, line width=0.8pt] (s41.east) -- (s42.west);
\draw [-latex, line width=0.8pt] (s42.east) -- (s43.west);
\draw [-latex, line width=0.8pt] (s43.east) -- (s44.west);
\draw [-latex, line width=0.8pt] (s44.east) -- ([xshift=2em]s44.east);
\node [draw=red,rounded corners=3pt,minimum height=1.7em,minimum width=1.7em,dashed,line width=0.8pt] (x21) at (s21) {};
\node [draw=red,rounded corners=3pt,minimum height=1.7em,minimum width=1.7em,dashed,line width=0.8pt] (x22) at (s22) {};
\node [draw=red,rounded corners=3pt,minimum height=1.7em,minimum width=1.7em,dashed,line width=0.8pt] (x23) at (s23) {};
\node [draw=red,rounded corners=3pt,minimum height=1.7em,minimum width=1.7em,dashed,line width=0.8pt] (x24) at (s24) {};
\node [draw=red,rounded corners=3pt,minimum height=1.7em,minimum width=5.2em,dashed,line width=0.8pt] (x31) at ([xshift=1.75em]s31) {};
\node [draw=red,rounded corners=3pt,minimum height=1.7em,minimum width=5.2em,dashed,line width=0.8pt] (x32) at ([xshift=1.75em]s33) {};
\node [draw=red,rounded corners=3pt,minimum height=1.7em,minimum width=12.2em,dashed,line width=0.8pt] (x41) at ([xshift=1.75em]s42) {};
{
\draw [-latex, line width=0.8pt] ([xshift=-1em]s21.west).. controls +(58:0.6) and +(122:0.6) .. ([xshift=1em]s21.east);
\draw [-latex, line width=0.8pt] ([xshift=-1em]s22.west).. controls +(58:0.6) and +(122:0.6) .. ([xshift=1em]s22.east);
\draw [-latex, line width=0.8pt] ([xshift=-1em]s23.west).. controls +(58:0.6) and +(122:0.6) .. ([xshift=1em]s23.east);
\draw [-latex, line width=0.8pt] ([xshift=-1em]s24.west).. controls +(58:0.6) and +(122:0.6) .. ([xshift=1em]s24.east);
}
{
\draw [-latex, line width=0.8pt] ([xshift=-1em]s21.west).. controls +(65:0.8) and +(115:0.8) .. ([xshift=1em]s22.east);
\draw [-latex, line width=0.8pt] ([xshift=-1em]s22.west).. controls +(65:0.8) and +(115:0.8) .. ([xshift=1em]s23.east);
\draw [-latex, line width=0.8pt] ([xshift=-1em]s23.west).. controls +(65:0.8) and +(115:0.8) .. ([xshift=1em]s24.east);
\draw [-latex, line width=0.8pt] ([xshift=-1em]s31.west).. controls +(65:0.8) and +(115:0.8) .. ([xshift=1em]s32.east);
\draw [-latex, line width=0.8pt] ([xshift=-1em]s33.west).. controls +(65:0.8) and +(115:0.8) .. ([xshift=1em]s34.east);
}
{
\draw [-latex, line width=0.8pt] ([xshift=-1em]s21.west).. controls +(70:1.0) and +(110:1.0) .. ([xshift=1em]s23.east);
\draw [-latex, line width=0.8pt] ([xshift=-1em]s22.west).. controls +(70:1.0) and +(110:1.0) .. ([xshift=1em]s24.east);
}
{
\draw [-latex, line width=0.8pt] ([xshift=-1em]s21.west).. controls +(75:1.2) and +(105:1.2) .. ([xshift=1em]s24.east);
\draw [-latex, line width=0.8pt] ([xshift=-1em]s31.west).. controls +(75:1.2) and +(105:1.2) .. ([xshift=1em]s34.east);
\draw [-latex, line width=0.8pt] ([xshift=-1em]s41.west).. controls +(75:1.2) and +(105:1.2) .. ([xshift=1em]s44.east);
}
\end{scope}
\end{tikzpicture}
\end{center}
%%%------------------------------------------------------------------------------------------------------------
%%% 调序模型1:基于距离的调序
\begin{center}
\begin{tikzpicture}
\begin{scope}[minimum height = 20pt]
\node [anchor=east] (x1) at (-0.5em, 0) {$x_l$};
\node [anchor=west,draw,fill=red!20,inner xsep=5pt,rounded corners=2pt] (ln1) at ([xshift=1em]x1.east){\small{\textrm{LN}}};
\node [anchor=west,draw,fill=green!20,inner xsep=5pt,rounded corners=2pt] (f1) at ([xshift=0.6em]ln1.east){\small{$\mathcal{F}$}};
\node [anchor=west,circle,draw,,minimum size=1em] (n1) at ([xshift=3em]f1.east){};
\node [anchor=west] (x2) at ([xshift=1em]n1.east) {$x_{l+1}$};
\node [anchor=west,draw,fill=red!20,inner xsep=5pt,rounded corners=2pt] (ln12) at ([xshift=1em]x2.east){\small{\textrm{LN}}};
\node [anchor=west,draw,fill=green!20,inner xsep=5pt,rounded corners=2pt] (f12) at ([xshift=0.6em]ln12.east){\small{$\mathcal{F}$}};
\node [anchor=west,circle,draw,,minimum size=1em] (n12) at ([xshift=3em]f12.east){};
\node [anchor=west] (x22) at ([xshift=1em]n12.east) {$x_{l+2}$};
\node [anchor=north] (x3) at ([yshift=-5em]x1.south) {$x_l$};
\node [anchor=west,draw,fill=red!20,inner xsep=5pt,rounded corners=2pt] (ln2) at ([xshift=1em]x3.east){\small{\textrm{LN}}};
\node [anchor=west,draw,fill=green!20,inner xsep=5pt,rounded corners=2pt] (f2) at ([xshift=0.6em]ln2.east){\small{$\mathcal{F}$}};
\node [anchor=west,minimum size=1em] (p1) at ([xshift=1em]f2.east){};
\node [anchor=north] (m1) at ([yshift=0.6em]p1.south){\tiny{\red{$M=1$}}};
\node [anchor=west,circle,draw,,minimum size=1em] (n2) at ([xshift=3em]f2.east){};
\node [anchor=west] (x4) at ([xshift=1em]n2.east) {$x_{l+1}$};
\node [anchor=west,draw,fill=red!20,inner xsep=5pt,rounded corners=2pt] (ln22) at ([xshift=1em]x4.east){\small{\textrm{LN}}};
\node [anchor=west,draw,fill=green!20,inner xsep=5pt,rounded corners=2pt] (f22) at ([xshift=0.6em]ln22.east){\small{$\mathcal{F}$}};
\node [anchor=west,minimum size=1em] (p2) at ([xshift=1em]f22.east){};
\node [anchor=north] (m2) at ([yshift=0.6em]p2.south){\tiny{\red{$M=0$}}};
\node [anchor=west,circle,draw,,minimum size=1em] (n22) at ([xshift=3em]f22.east){};
\node [anchor=west] (x42) at ([xshift=1em]n22.east) {$x_{l+2}$};
\draw[->, line width=1pt] ([xshift=-0.1em]x1.east)--(ln1.west);
\draw[->, line width=1pt] ([xshift=-0.1em]ln1.east)--(f1.west);
\draw[->, line width=1pt] ([xshift=0.1em]f1.east)--(n1.west);
\draw[->, line width=1pt] (n1.east)--(x2.west);
\draw[->, line width=1pt] ([xshift=-0.1em]x3.east)--(ln2.west);
\draw[->, line width=1pt] ([xshift=-0.1em]ln2.east)--(f2.west);
\draw[-, line width=1pt] ([xshift=0.1em]f2.east)--(p1.west);
\draw[*-,red,line width=0.6pt] (p1.west) -- (p1.east);
\draw[->, line width=1pt] (p1.east)--(n2.west);
\draw[->, line width=1pt] (n2.east)--(x4.west);
\draw[->,rounded corners,line width=1pt] ([yshift=-0.2em]x1.north) -- ([yshift=1em]x1.north) -- ([yshift=1.4em]n1.north) -- (n1.north);
\draw[->,rounded corners,line width=1pt] ([yshift=-0.2em]x3.north) -- ([yshift=1em]x3.north) -- ([yshift=1.4em]n2.north) -- (n2.north);
\draw[-] (n1.west)--(n1.east);
\draw[-] (n1.north)--(n1.south);
\draw[-] (n2.west)--(n2.east);
\draw[-] (n2.north)--(n2.south);
\draw[->, line width=1pt] ([xshift=-0.1em]x2.east)--(ln12.west);
\draw[->, line width=1pt] ([xshift=-0.1em]ln12.east)--(f12.west);
\draw[->, line width=1pt] ([xshift=0.1em]f12.east)--(n12.west);
\draw[->, line width=1pt] (n12.east)--(x22.west);
\draw[->, line width=1pt] ([xshift=-0.1em]x4.east)--(ln22.west);
\draw[->, line width=1pt] ([xshift=-0.1em]ln22.east)--(f22.west);
\draw[-, line width=1pt] ([xshift=0.1em]f22.east)--(p2.west);
\draw[*-,red,line width=0.6pt] ([yshift=-0.1em]p2.west) -- (p2.north east);
\draw[->, line width=1pt] (p2.east)--(n22.west);
\draw[->, line width=1pt] (n22.east)--(x42.west);
\draw[->,rounded corners,line width=1pt] ([yshift=-0.2em]x2.north) -- ([yshift=1em]x2.north) -- ([yshift=1.4em]n12.north) -- (n12.north);
\draw[->,rounded corners,line width=1pt] ([yshift=-0.2em]x4.north) -- ([yshift=1em]x4.north) -- ([yshift=1.4em]n22.north) -- (n22.north);
\draw[-] (n12.west)--(n12.east);
\draw[-] (n12.north)--(n12.south);
\draw[-] (n22.west)--(n22.east);
\draw[-] (n22.north)--(n22.south);
\node [anchor=south] (k1) at ([yshift=-0.1em]x1.north){};
\node [anchor=south] (k2) at ([yshift=-0.1em]x3.north){};
\begin{pgfonlayer}{background}
\node [rectangle,inner sep=0.3em,fill=orange!10] [fit = (x1) (f1) (n1) (ln1) (x2) (k1) (f12) (n12) (ln12) (x22)] (box0) {};
\node [rectangle,inner sep=0.3em,fill=blue!10] [fit = (x3) (f2) (n2) (ln2) (x4) (k2) (f22) (n22) (ln22) (x42)] (box1) {};
\end{pgfonlayer}
\node [anchor=north] (c1) at (box0.south){\footnotesize {(a)标准的Pre-Norm}};
\node [anchor=north] (c2) at (box1.south){\footnotesize {(b)基于随机子层跳跃的Pre-Norm}};
\end{scope}
\end{tikzpicture}
\end{center}
\ No newline at end of file
\definecolor{ublue}{rgb}{0.152,0.250,0.545}
\begin{tikzpicture}
\begin{axis}
[
width=5cm, height=3.5cm,
xtick={15,17,19,21,23,25},
ytick={6.0,6.5,7.0},
xlabel={\scriptsize Epoch},
ylabel={},
ylabel style={},
x tick label style={},
y tick label style={},
tick align=inside,
legend style={anchor=north,xshift=1.7cm,yshift=1cm,legend columns =-1},
ymin=5.7,
ymax=7.3,
xmin=14.6,
xmax=25.4,
extra y ticks={6.0,6.5,7.0},
extra y tick labels={3.7,3.8,3.9},
extra y tick style={ticklabel pos=right}]
\addplot [sharp plot,very thick,red!60,mark=diamond*] coordinates{(15,6.75) (16,6.73) (17,6.70) (18,6.67) (19,6.64) (20,6.61) (21,6.59) (22,6.58) (23,6.57) (24,6.58) (25,6.59)};
\addplot [sharp plot,very thick,purple!60,mark=triangle*] coordinates{(15,6.70) (16,6.4) (17,6.20) (18,6.30) (19,6.20) (20,6.10) (21,6.15) (22,6.10) (23,6.15) (24,6.16) (25,6.17)};
\legend{\scriptsize {训练集},\scriptsize{校验集}}
\end{axis}
\begin{axis}
[ xshift=6.6cm,
width=5cm, height=3.5cm,
xtick={15,17,19,21,23,25},
ytick={5.0,5.5,6.0},
xlabel={\scriptsize Epoch},
ylabel={},
ylabel style={},
x tick label style={},
y tick label style={},
tick align=inside,
ymin=4.7,
ymax=6.3,
xmin=14.6,
xmax=25.4,
extra y ticks={5.0,5.5,6.0},
extra y tick labels={3.5,3.6,3.7},
extra y tick style={ticklabel pos=right}]
\addplot [sharp plot,very thick,red!60,mark=diamond*] coordinates{(15,5.7) (16,5.65) (17,5.6) (18,5.55) (19,5.5) (20,5.45) (21,5.4) (22,5.38) (23,5.36) (24,5.34) (25,5.27)};
\addplot [sharp plot,very thick,purple!60,mark=triangle*] coordinates{(15,5.0) (16,4.9) (17,4.9) (18,5.05) (19,4.9) (20,5.0) (21,5.0) (22,5.1) (23,5.0) (24,5.15) (25,5.5)};
\end{axis}
\node [anchor=north,rotate=90] (n1) at (-1.3cm,1cm) {\scriptsize 训练集\ PPL};
\node [anchor=north,rotate=90] (n2) at (5.4cm,1cm) {\scriptsize 训练集\ PPL};
\node [anchor=north,rotate=90] (n3) at (4.2cm,1cm) {\scriptsize 校验集\ PPL};
\node [anchor=north,rotate=90] (n4) at (10.7cm,1cm) {\scriptsize 校验集\ PPL};
\end{tikzpicture}
%---------------------------------------------------------------------
\ No newline at end of file
......@@ -5086,6 +5086,102 @@ pages ={157-166},
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
%%%%% chapter 15------------------------------------------------------
@inproceedings{DBLP:conf/cvpr/YuYR18,
author = {Xin Yu and
Zhiding Yu and
Srikumar Ramalingam},
title = {Learning Strict Identity Mappings in Deep Residual Networks},
pages = {4432--4440},
publisher = {{IEEE} Conference on Computer Vision and Pattern Recognition},
year = {2018}
}
@inproceedings{DBLP:conf/emnlp/ZhangTS19,
author = {Biao Zhang and
Ivan Titov and
Rico Sennrich},
title = {Improving Deep Transformer with Depth-Scaled Initialization and Merged
Attention},
pages = {898--909},
publisher = {Annual Meeting of the Association for Computational Linguistics},
year = {2019}
}
@inproceedings{DBLP:conf/eccv/HeZRS16,
author = {Kaiming He and
Xiangyu Zhang and
Shaoqing Ren and
Jian Sun},
title = {Identity Mappings in Deep Residual Networks},
volume = {9908},
pages = {630--645},
publisher = {European Conference on Computer Vision},
year = {2016}
}
@inproceedings{Ottfairseq,
author = {Myle Ott and
Sergey Edunov and
Alexei Baevski and
Angela Fan and
Sam Gross and
Nathan Ng and
David Grangier and
Michael Auli},
title = {fairseq: {A} Fast, Extensible Toolkit for Sequence Modeling},
pages = {48--53},
publisher = {Annual Meeting of the Association for Computational Linguistics},
year = {2019}
}
@inproceedings{KleinOpenNMT,
author = {Guillaume Klein and
Yoon Kim and
Yuntian Deng and
Jean Senellart and
Alexander M. Rush},
title = {OpenNMT: Open-Source Toolkit for Neural Machine Translation},
pages = {67--72},
publisher = {Annual Meeting of the Association for Computational Linguistics},
year = {2017}
}
@inproceedings{DBLP:conf/acl/WuWXTGQLL19,
author = {Lijun Wu and
Yiren Wang and
Yingce Xia and
Fei Tian and
Fei Gao and
Tao Qin and
Jianhuang Lai and
Tie{-}Yan Liu},
title = {Depth Growing for Neural Machine Translation},
pages = {5558--5563},
publisher = {Annual Meeting of the Association for Computational Linguistics},
year = {2019}
}
@inproceedings{DBLP:conf/cvpr/HuangLMW17,
author = {Gao Huang and
Zhuang Liu and
Laurens van der Maaten and
Kilian Q. Weinberger},
title = {Densely Connected Convolutional Networks},
pages = {2261--2269},
publisher = {{IEEE} Conference on Computer Vision and Pattern Recognition},
year = {2017}
}
@article{DBLP:journals/corr/GreffSS16,
author = {Klaus Greff and
Rupesh Kumar Srivastava and
J{\"{u}}rgen Schmidhuber},
title = {Highway and Residual Networks learn Unrolled Iterative Estimation},
publisher = {International Conference on Learning Representations},
year = {2017}
}
%%%%% chapter 15------------------------------------------------------
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
......
Markdown 格式
0%
您添加了 0 到此讨论。请谨慎行事。
请先完成此评论的编辑!
注册 或者 后发表评论