Commit 95a9677f by zengxin

合并分支 'caorunzhe' 到 'zengxin'

Caorunzhe

查看合并请求 !998
parents 98ecd151 5c38b917
......@@ -15,7 +15,7 @@
\node [neuronnode] (neuron_y') at (2.4 * \nodespace,-1.5 * \neuronsep) {\scriptsize{$x_{i}^{l+1}$}};
\node [anchor=north] (standard) at ([yshift=-4em]neuron_z.south) {\scriptsize{标准网络}};
\node [] (standard) at ([xshift=-1em]neuron_z.west) {\scriptsize{$\mathbf{w}_{i}^{l}$}};
\node [] (standard) at ([xshift=-1em]neuron_z.west) {\scriptsize{$\mathbi{w}_{i}^{l}$}};
\node [] (standard) at ([xshift=0.6em,yshift=0.3em]neuron_z.east) {\scriptsize{$f$}};
\draw [->,line width=0.3mm] (neuron_b.east) -- (neuron_z.130);
......@@ -41,7 +41,7 @@
\node [neuronnode] (drop_neuron_r1) at (4.4*\nodespace,-2.5*\neuronsep) {\scriptsize{$r_{1}^{l}$}};
\node [anchor=north] (standard) at ([xshift=2em,yshift=-4em]drop_neuron_z.south) {\scriptsize{应用Dropout后的网络}};
\node [] (standard) at ([xshift=-1em]drop_neuron_z.west) {\scriptsize{$\mathbf{w}_{i}^{l}$}};
\node [] (standard) at ([xshift=-1em]drop_neuron_z.west) {\scriptsize{$\mathbi{w}_{i}^{l}$}};
\node [] (standard) at ([xshift=0.6em,yshift=0.3em]drop_neuron_z.east) {\scriptsize{$f$}};
%structure
\draw [->,line width=0.3mm] (drop_neuron_b.east) -- (drop_neuron_z.130);
......@@ -60,12 +60,12 @@
%equ
\node [anchor=west,inner sep = 2pt] (line1) at (9*\nodespace,0) {未应用Dropout:};
\node [anchor=north west,inner sep = 2pt] (line2) at (line1.south west) {$z_{i}^{l+1}=\mathbf{w}^{l} \mathbf{x}^{l} + b^{l}$};
\node [anchor=north west,inner sep = 2pt] (line2) at (line1.south west) {$z_{i}^{l+1}=\mathbi{w}^{l} \mathbi{x}^{l} + b^{l}$};
\node [anchor=north west,inner sep = 2pt] (line3) at (line2.south west) {$x_{i}^{l+1}=f\left(z_{i}^{l+1}\right)$};
\node [anchor=north west,inner sep = 2pt] (line4) at (line3.south west) {应用Dropout:};
\node [anchor=north west,inner sep = 2pt] (line5) at (line4.south west) {$r_{j}^{l} \sim$ Bernoulli $(1-p)$};
\node [anchor=north west,inner sep = 2pt] (line6) at (line5.south west) {$\tilde{\mathbf{x}}=\mathbf{r} * \mathbf{x}$};
\node [anchor=north west,inner sep = 2pt] (line7) at (line6.south west) {$z_{i}^{l+1}=\mathbf{w}^{l} \widetilde{\mathbf{x}}^{l} + b^{l}$};
\node [anchor=north west,inner sep = 2pt] (line6) at (line5.south west) {$\tilde{\mathbi{x}}=\mathbi{r} * \mathbi{x}$};
\node [anchor=north west,inner sep = 2pt] (line7) at (line6.south west) {$z_{i}^{l+1}=\mathbi{w}^{l} \widetilde{\mathbi{x}}^{l} + b^{l}$};
\node [anchor=north west,inner sep = 2pt] (line8) at (line7.south west) {$x_{i}^{l+1}=f\left(z_{i}^{l+1}\right)$};
\end{tikzpicture}
\ No newline at end of file
......@@ -4,10 +4,10 @@
\begin{scope}[]
% Column 1
\node [prob,minimum size=0.1cm] (prob11) at (0,0) {};
\node [prob,minimum size=0.5cm,anchor=center] (prob21) at ([yshift=-0.5cm]prob11.center) {$.7$};
\node [prob,minimum size=0.5cm,anchor=center] (prob21) at ([yshift=-0.5cm]prob11.center) {$0.7$};
\node [prob,minimum size=0.1cm,anchor=center] (prob31) at ([yshift=-0.5cm]prob21.center) {};
\node [prob,minimum size=0.1cm,anchor=center] (prob41) at ([yshift=-0.5cm]prob31.center) {};
\node [prob,minimum size=0.3cm,anchor=center] (prob51) at ([yshift=-0.5cm]prob41.center) {$.2$};
\node [prob,minimum size=0.3cm,anchor=center,font=\tiny] (prob51) at ([yshift=-0.5cm]prob41.center) {$0.2$};
\begin{pgfonlayer}{background}
\coordinate (bottomleft) at ([shift={(-0.25cm,-0.25cm)}]prob51.center);
\coordinate (topright) at ([shift={(0.25cm,0.25cm)}]prob11.center);
......@@ -18,8 +18,8 @@
% Column 2
\node [prob,minimum size=0.1cm,anchor=center] (prob12) at ([xshift=1cm]prob11.center) {};
\node [prob,minimum size=0.1cm,anchor=center] (prob22) at ([yshift=-0.5cm]prob12.center) {};
\node [prob,minimum size=0.4cm,anchor=center] (prob32) at ([yshift=-0.5cm]prob22.center) {$.4$};
\node [prob,minimum size=0.3cm,anchor=center] (prob42) at ([yshift=-0.5cm]prob32.center) {$.3$};
\node [prob,minimum size=0.4cm,anchor=center,font=\tiny] (prob32) at ([yshift=-0.5cm]prob22.center) {0$.4$};
\node [prob,minimum size=0.3cm,anchor=center,font=\tiny] (prob42) at ([yshift=-0.5cm]prob32.center) {$0.3$};
\node [prob,minimum size=0.1cm,anchor=center] (prob52) at ([yshift=-0.5cm]prob42.center) {};
\begin{pgfonlayer}{background}
\coordinate (bottomleft) at ([shift={(-0.25cm,-0.25cm)}]prob52.center);
......@@ -33,7 +33,7 @@
\node [prob,minimum size=0.1cm,anchor=center] (prob23) at ([yshift=-0.5cm]prob13.center) {};
\node [prob,minimum size=0.1cm,anchor=center] (prob33) at ([yshift=-0.5cm]prob23.center) {};
\node [prob,minimum size=0.1cm,anchor=center] (prob43) at ([yshift=-0.5cm]prob33.center) {};
\node [prob,minimum size=0.4cm,anchor=center] (prob53) at ([yshift=-0.5cm]prob43.center) {$.6$};
\node [prob,minimum size=0.4cm,anchor=center,font=\tiny] (prob53) at ([yshift=-0.5cm]prob43.center) {$0.6$};
\begin{pgfonlayer}{background}
\coordinate (bottomleft) at ([shift={(-0.25cm,-0.25cm)}]prob53.center);
\coordinate (topright) at ([shift={(0.25cm,0.25cm)}]prob13.center);
......@@ -42,7 +42,7 @@
% \node [anchor=center] (word13) at ([yshift=0.7cm]prob13.center) {fine};
% Column 4
\node [prob,minimum size=0.5cm,anchor=center] (prob14) at ([xshift=1cm]prob13.center) {$.8$};
\node [prob,minimum size=0.5cm,anchor=center] (prob14) at ([xshift=1cm]prob13.center) {$0.8$};
\node [prob,minimum size=0.1cm,anchor=center] (prob24) at ([yshift=-0.5cm]prob14.center) {};
\node [prob,minimum size=0.1cm,anchor=center] (prob34) at ([yshift=-0.5cm]prob24.center) {};
\node [prob,minimum size=0.1cm,anchor=center] (prob44) at ([yshift=-0.5cm]prob34.center) {};
......@@ -92,7 +92,7 @@
\begin{scope}[xshift=2.7in]
% Column 1
\node [prob,minimum size=0.1cm] (prob11) at (0,0) {};
\node [prob,minimum size=0.5cm,anchor=center] (prob21) at ([yshift=-0.5cm]prob11.center) {$1.$};
\node [prob,minimum size=0.5cm,anchor=center] (prob21) at ([yshift=-0.5cm]prob11.center) {$1$};
\node [prob,minimum size=0.1cm,anchor=center] (prob31) at ([yshift=-0.5cm]prob21.center) {};
\node [prob,minimum size=0.1cm,anchor=center] (prob41) at ([yshift=-0.5cm]prob31.center) {};
\node [prob,minimum size=0.1cm,anchor=center] (prob51) at ([yshift=-0.5cm]prob41.center) {};
......@@ -106,7 +106,7 @@
% Column 2
\node [prob,minimum size=0.1cm,anchor=center] (prob12) at ([xshift=1cm]prob11.center) {};
\node [prob,minimum size=0.1cm,anchor=center] (prob22) at ([yshift=-0.5cm]prob12.center) {};
\node [prob,minimum size=0.5cm,anchor=center] (prob32) at ([yshift=-0.5cm]prob22.center) {$1.$};
\node [prob,minimum size=0.5cm,anchor=center] (prob32) at ([yshift=-0.5cm]prob22.center) {$1$};
\node [prob,minimum size=0.1cm,anchor=center] (prob42) at ([yshift=-0.5cm]prob32.center) {};
\node [prob,minimum size=0.1cm,anchor=center] (prob52) at ([yshift=-0.5cm]prob42.center) {};
\begin{pgfonlayer}{background}
......@@ -121,7 +121,7 @@
\node [prob,minimum size=0.1cm,anchor=center] (prob23) at ([yshift=-0.5cm]prob13.center) {};
\node [prob,minimum size=0.1cm,anchor=center] (prob33) at ([yshift=-0.5cm]prob23.center) {};
\node [prob,minimum size=0.1cm,anchor=center] (prob43) at ([yshift=-0.5cm]prob33.center) {};
\node [prob,minimum size=0.5cm,anchor=center] (prob53) at ([yshift=-0.5cm]prob43.center) {$1.$};
\node [prob,minimum size=0.5cm,anchor=center] (prob53) at ([yshift=-0.5cm]prob43.center) {$1$};
\begin{pgfonlayer}{background}
\coordinate (bottomleft) at ([shift={(-0.25cm,-0.25cm)}]prob53.center);
\coordinate (topright) at ([shift={(0.25cm,0.25cm)}]prob13.center);
......@@ -130,7 +130,7 @@
\node [anchor=center] (word13) at ([yshift=0.68cm]prob13.center) {good};
% Column 4
\node [prob,minimum size=0.5cm,anchor=center] (prob14) at ([xshift=1cm]prob13.center) {$1.$};
\node [prob,minimum size=0.5cm,anchor=center] (prob14) at ([xshift=1cm]prob13.center) {$1$};
\node [prob,minimum size=0.1cm,anchor=center] (prob24) at ([yshift=-0.5cm]prob14.center) {};
\node [prob,minimum size=0.1cm,anchor=center] (prob34) at ([yshift=-0.5cm]prob24.center) {};
\node [prob,minimum size=0.1cm,anchor=center] (prob44) at ([yshift=-0.5cm]prob34.center) {};
......
......@@ -20,21 +20,21 @@
\foreach \curr / \prev in {1/0,2/1,3/2}
{
% models
\node[modelnode,fill=yellow!20] (stu\curr1) at ([yshift=-3em]stu\prev1.south) {\rotatebox{90}{学生模型 $1$}};
\node[modelnode,fill=yellow!20] (stu\curr2) at ([yshift=-3em]stu\prev2.south) {\rotatebox{90}{学生模型 $2$}};
\node[modelnode,fill=yellow!20] (stu\curr3) at ([yshift=-3em]stu\prev3.south) {\rotatebox{90}{学生模型 $3$}};
\node[modelnode,fill=yellow!20] (stu\curr4) at ([yshift=-3em]stu\prev4.south) {\rotatebox{90}{学生模型 $4$}};
\node[modelnode,fill=yellow!20] (stu\curr5) at ([yshift=-3em]stu\prev5.south) {\rotatebox{90}{学生模型 $5$}};
\node[modelnode] (tea\curr1) at ([yshift=-3em]tea\prev1.south) {\rotatebox{90}{\color{red!60} 教师模型 $1$}};
\node[modelnode] (tea\curr2) at ([yshift=-3em]tea\prev2.south) {\rotatebox{90}{\color{blue!60} 教师模型 $2$}};
\node[modelnode,fill=yellow!20,align=center] (stu\curr1) at ([yshift=-3em]stu\prev1.south) {\\[-0.5ex]\\[-0.5ex]\\[-0.5ex]\\[-0.5ex]$1$};
\node[modelnode,fill=yellow!20,align=center] (stu\curr2) at ([yshift=-3em]stu\prev2.south) {\\[-0.5ex]\\[-0.5ex]\\[-0.5ex]\\[-0.5ex]$2$};
\node[modelnode,fill=yellow!20,align=center] (stu\curr3) at ([yshift=-3em]stu\prev3.south) {\\[-0.5ex]\\[-0.5ex]\\[-0.5ex]\\[-0.5ex]$3$};
\node[modelnode,fill=yellow!20,align=center] (stu\curr4) at ([yshift=-3em]stu\prev4.south) {\\[-0.5ex]\\[-0.5ex]\\[-0.5ex]\\[-0.5ex]$4$};
\node[modelnode,fill=yellow!20,align=center] (stu\curr5) at ([yshift=-3em]stu\prev5.south) {\\[-0.5ex]\\[-0.5ex]\\[-0.5ex]\\[-0.5ex]$5$};
\node[modelnode,align=center,text=red!60] (tea\curr1) at ([yshift=-3em]tea\prev1.south) {\\[-0.5ex]\\[-0.5ex]\\[-0.5ex]\\[-0.5ex]$1$};
\node[modelnode,align=center,text=blue!60] (tea\curr2) at ([yshift=-3em]tea\prev2.south) {\\[-0.5ex]\\[-0.5ex]\\[-0.5ex]\\[-0.5ex]$2$};
% 集成 labels
\draw[->,very thick] ([xshift=2pt]stu\curr5.east) to node [auto] {\small 集成} ([xshift=-2pt]tea\curr1.west);
}
% iteration labels
\node[font=\small,anchor=east,purple!80] (iterate1) at ([xshift=-1em]stu21.west) {\rotatebox{90}{轮数 $1$}};
\node[font=\small,anchor=east,purple!80] (iterate2) at ([xshift=-1em]stu31.west) {\rotatebox{90}{轮数 $2$}};
\node[font=\small,anchor=east,purple!80,align=center] (iterate1) at ([xshift=-1em]stu21.west) {\\\\$1$};
\node[font=\small,anchor=east,purple!80,align=center] (iterate2) at ([xshift=-1em]stu31.west) {\\\\$2$};
% distillation labels
\node[font=\small,anchor=south west] (distill1) at ([yshift=1.2em]iterate1.north west) {知识蒸馏};
......
......@@ -55,8 +55,8 @@
\node [anchor=south,font=\scriptsize] (w8) at (label7.north) {$0.1$};
\node[font=\scriptsize] (line1) at ([xshift=13em,yshift=-1.5em]model_label7.east) {$Loss =-0.3 \log p_{3}-\sum_{i=1}^{7} 0.1 \log p_{i}$};
\node[font=\scriptsize] (line2) at ([xshift=9.5em,yshift=3em]model_label7.east) {$Loss =-\log p_{3}$};
\node[font=\scriptsize] (line1) at ([xshift=13em,yshift=-1.5em]model_label7.east) {$\textrm{Loss} =-0.3 \log p_{3}-\sum_{i=1}^{7} 0.1 \log p_{i}$};
\node[font=\scriptsize] (line2) at ([xshift=9.5em,yshift=3em]model_label7.east) {$\textrm{Loss} =-\log p_{3}$};
\begin{pgfonlayer}{background}
\node [rectangle,inner sep=0.5em,rounded corners=1pt,very thick,dotted,draw=red] [fit =(model_w3) (model_label1) (model_label7) (one_hot_w3)] (box1) {};
......
......@@ -90,12 +90,12 @@
\draw[-,dotted] ([xshift=-1em,yshift=-3.5em]c1.south)--([xshift=9.3em,yshift=-3.5em]c1.south);
\draw[-,dotted] ([xshift=-1em,yshift=-5em]c1.south)--([xshift=9.3em,yshift=-5em]c1.south);
\node [anchor=south,colnode,minimum height=0.15em,minimum width=1em] (b1) at ([xshift=0em,yshift=-5em]c1.south) {};
\node [anchor=south,colnode,minimum height=0.85em,minimum width=1em] (b1) at ([xshift=0em,yshift=-5em]c1.south) {};
\node [anchor=south,colnode,minimum height=4.2em,minimum width=1em] (b2) at ([xshift=1.67em,yshift=0em]b1.south) {};
\node [anchor=south,colnode,minimum height=3.7em,minimum width=1em] (b3) at ([xshift=1.67em,yshift=0em]b2.south) {};
\node [anchor=south,colnode,minimum height=3.2em,minimum width=1em] (b3) at ([xshift=1.67em,yshift=0em]b2.south) {};
\node [anchor=south,colnode,minimum height=4.2em,minimum width=1em] (b4) at ([xshift=1.67em,yshift=0em]b3.south) {};
\node [anchor=south,colnode,minimum height=0.8em,minimum width=1em] (b5) at ([xshift=1.67em,yshift=0em]b4.south) {};
\node [anchor=south,colnode,minimum height=0.15em,minimum width=1em] (b6) at ([xshift=1.67em,yshift=0em]b5.south) {};
\node [anchor=south,colnode,minimum height=1.4em,minimum width=1em] (b5) at ([xshift=1.67em,yshift=0em]b4.south) {};
\node [anchor=south,colnode,minimum height=0.35em,minimum width=1em] (b6) at ([xshift=1.67em,yshift=0em]b5.south) {};
{\scriptsize
\node [anchor=center] (n1) at ([xshift=0em,yshift=-1em]b1.south){\color{orange}It};
......
......@@ -20,7 +20,7 @@
\node [anchor=north,rectangle,draw, inner sep=0mm,minimum height=1.2em,minimum width=15em,fill=purple!30,rounded corners=5pt,thick] (n9) at ([xshift=0em,yshift=-1em]n8.south) {$\mathbi{X}\ \quad \mathbi{h}^1\ \quad \mathbi{h}^2\quad \ldots \quad\ \mathbi{h}^l$};
\node [anchor=north,rectangle,draw, inner sep=0mm,minimum height=1.2em,minimum width=15em,fill=teal!30,rounded corners=5pt,thick] (n10) at ([xshift=0em,yshift=-2em]n9.south) {权重累加};
\node [anchor=north,rectangle,draw, inner sep=0mm,minimum height=1.2em,minimum width=15em,fill=teal!30,rounded corners=5pt,thick] (n10) at ([xshift=0em,yshift=-2em]n9.south) {权重累加\ {\red $\mathbi{g}^l$}};
\node [anchor=west,rectangle, inner sep=0mm,minimum height=1.2em, rounded corners=5pt,thick] (n11) at ([xshift=0em,yshift=-4.5em]n1.west) {聚合网络};
......
......@@ -4,7 +4,7 @@
%left
\begin{scope}
\foreach \x/\d in {1/2em, 2/8em}
\node[unit,fill=yellow!30] at (0,\d) (ln_\x) {正则};
\node[unit,fill=yellow!30] at (0,\d) (ln_\x) {标准};
\foreach \x/\d in {1/4em}
\node[unit,fill=green!30] at (0,\d) (sa_\x) {8头自注意力:512};
......@@ -35,7 +35,7 @@
\draw[->,thick] ([yshift=-0.8em]ln_1.-90) .. controls ([xshift=5em,yshift=-0.8em]ln_1.-90) and ([xshift=5em]add_1.0) .. (add_1.0);
\draw[->,thick] (add_1.0) .. controls ([xshift=5em]add_1.0) and ([xshift=5em]add_2.0) .. (add_2.0);
\node[font=\scriptsize] at (0em, -1em){(a) Transformer编码器中若干块的结构};
\node[font=\scriptsize] at (0em, -1.1em){(a) Transformer编码器中若干块的结构};
\end{scope}
%right
......@@ -44,7 +44,7 @@
\foreach \x/\d in {1/2em, 2/8em, 3/16em}
\node[unit,fill=yellow!30] at (0,\d) (ln_\x) {正则};
\node[unit,fill=yellow!30] at (0,\d) (ln_\x) {标准};
\foreach \x/\d in {1/6em, 2/14em, 3/20em}
\node[draw,circle,minimum size=1em,inner sep=1pt] at (0,\d) (add_\x) {\scriptsize\bfnew{+}};
......@@ -84,7 +84,7 @@
\node[minimum size=0.8em,inner sep=0pt,rounded corners=1pt,draw,fill=blue!30] (act) at (8em, 20em){};
\node[anchor=west,font=\footnotesize] at ([xshift=0.1em]act.east){激活函数};
\node[anchor=north,minimum size=0.8em,inner sep=0pt,rounded corners=1pt,draw,fill=yellow!30] (nor) at ([yshift=-0.6em]act.south){};
\node[anchor=west,font=\footnotesize] at ([xshift=0.1em]nor.east){正则};
\node[anchor=west,font=\footnotesize] at ([xshift=0.1em]nor.east){标准};
\node[anchor=north,minimum size=0.8em,inner sep=0pt,rounded corners=1pt,draw,fill=cyan!30] (wc) at ([yshift=-0.6em]nor.south){};
\node[anchor=west,font=\footnotesize] at ([xshift=0.1em]wc.east){宽卷积};
\node[anchor=north,minimum size=0.8em,inner sep=0pt,rounded corners=1pt,draw,fill=green!30] (at) at ([yshift=-0.6em]wc.south){};
......
......@@ -9,15 +9,15 @@
\node [anchor=north,rectangle,minimum height=1.5em,minimum width=2.5em,rounded corners=5pt] (n2) at ([xshift=0em,yshift=-0.2em]n1.south) {$\mathbi{X}$};
\node [anchor=west,encnode,draw=red!60!black!80,fill=red!20] (n3) at ([xshift=1.5em,yshift=0em]n2.east) {$\mathbi{h}^0$};
\node [anchor=west,encnode,draw=red!60!black!80,fill=red!20] (n3) at ([xshift=1.5em,yshift=0em]n2.east) {$\mathbi{h}^1$};
\node [anchor=west,encnode,draw=red!60!black!80,fill=red!20] (n4) at ([xshift=1.5em,yshift=0em]n3.east) {$\mathbi{h}^1$};
\node [anchor=west,encnode,draw=red!60!black!80,fill=red!20] (n4) at ([xshift=1.5em,yshift=0em]n3.east) {$\mathbi{h}^2$};
\node [anchor=west,encnode,draw=red!60!black!80,fill=red!20] (n5) at ([xshift=1.5em,yshift=0em]n4.east) {$\mathbi{h}^2$};
\node [anchor=west,encnode,draw=red!60!black!80,fill=red!20] (n5) at ([xshift=1.5em,yshift=0em]n4.east) {$\mathbi{h}^3$};
\node [anchor=west,rectangle,minimum height=1.5em,minimum width=2.5em,rounded corners=5pt] (n6) at ([xshift=1em,yshift=0em]n5.east) {$\ldots$};
\node [anchor=west,encnode,draw=red!60!black!80,fill=red!20] (n7) at ([xshift=1em,yshift=0em]n6.east) {$\mathbi{h}^{L-1}$};
\node [anchor=west,encnode,draw=red!60!black!80,fill=red!20] (n7) at ([xshift=1em,yshift=0em]n6.east) {$\mathbi{h}^{L}$};
\node [anchor=north,rectangle,draw=teal!80, inner sep=0mm,minimum height=2em,minimum width=8em,fill=teal!17,rounded corners=5pt,thick] (n8) at ([xshift=3em,yshift=-1.5em]n4.south) {权重聚合$\mathbi{g}$};
......@@ -27,15 +27,15 @@
\node [anchor=north,rectangle,minimum height=1.5em,minimum width=2.5em,rounded corners=5pt] (n10) at ([xshift=0em,yshift=-0.2em]n9.south) {$\mathbi{y}_{<j}$};
\node [anchor=west,decnode,draw=ublue,fill=blue!10] (n11) at ([xshift=1.5em,yshift=0em]n10.east) {$\mathbi{s}^0_{j}$};
\node [anchor=west,decnode,draw=ublue,fill=blue!10] (n11) at ([xshift=1.5em,yshift=0em]n10.east) {$\mathbi{s}^1_{j}$};
\node [anchor=west,decnode,draw=ublue,fill=blue!10] (n12) at ([xshift=1.5em,yshift=0em]n11.east) {$\mathbi{s}^1_{j}$};
\node [anchor=west,decnode,draw=ublue,fill=blue!10] (n12) at ([xshift=1.5em,yshift=0em]n11.east) {$\mathbi{s}^2_{j}$};
\node [anchor=west,decnode,draw=ublue,fill=blue!10] (n13) at ([xshift=1.5em,yshift=0em]n12.east) {$\mathbi{s}^2_{j}$};
\node [anchor=west,decnode,draw=ublue,fill=blue!10] (n13) at ([xshift=1.5em,yshift=0em]n12.east) {$\mathbi{s}^3_{j}$};
\node [anchor=west,rectangle,minimum height=1.5em,minimum width=2.5em,rounded corners=5pt] (n14) at ([xshift=1em,yshift=0em]n13.east) {$\ldots$};
\node [anchor=west,decnode,draw=ublue,fill=blue!10] (n15) at ([xshift=1em,yshift=0em]n14.east) {$\mathbi{s}^{M-1}_{j}$};
\node [anchor=west,decnode,draw=ublue,fill=blue!10] (n15) at ([xshift=1em,yshift=0em]n14.east) {$\mathbi{s}^{M}_{j}$};
\node [anchor=west,rectangle,minimum height=1.5em,minimum width=2.5em,rounded corners=5pt] (n16) at ([xshift=1.5em,yshift=0em]n15.east) {$\mathbi{y}_{j}$};
......
\begin{tikzpicture}
\tikzstyle{node}=[draw,minimum height=1.4em,minimum width=2em,rounded corners=1pt,thick]
%violet
\begin{scope}[scale=0.36]
\tikzstyle{every node}=[scale=0.36]
\node[draw=ublue,very thick,rounded corners=3pt,drop shadow,fill=white,minimum width=40em,minimum height=25em] (rec3) at (2.25,0){};
\node[draw=ublue,very thick,rounded corners=3pt,drop shadow,fill=white,minimum width=22em,minimum height=25em] (rec2) at (-12.4,0){};
\node[draw=ublue,very thick,rounded corners=3pt,drop shadow,fill=white,minimum width=24em,minimum height=25em] (rec1) at (-24,0){};
\node[draw,very thick,rounded corners=3pt,drop shadow,fill=red!30,minimum width=40em,minimum height=25em] (rec3) at (2.25,0){};
\node[draw,very thick,rounded corners=3pt,drop shadow,fill=green!30,minimum width=22em,minimum height=25em] (rec2) at (-12.4,0){};
\node[draw,very thick,rounded corners=3pt,drop shadow,fill=yellow!30,minimum width=24em,minimum height=25em] (rec1) at (-24,0){};
%left
\node[text=ublue] (label1) at (-26.4,4){\Huge\bfnew{结构空间}};
\node[align=left] at (-24,-0.5){\Huge\bfnew{1.前馈神经网络} \\ [4ex] \Huge\bfnew{2.卷积神经网络} \\ [4ex] \Huge\bfnew{3.循环神经网络} \\ [4ex] \Huge\bfnew{4. Transformer网络} \\ [4ex] \Huge\bfnew{...}};
\node[] (label1) at (-26.4,4){\Huge\bfnew{结构空间}};
\node[align=left] at (-24,-0.5){\Huge{1.前馈神经网络} \\ [4ex] \Huge{2.卷积神经网络} \\ [4ex] \Huge{3.循环神经网络} \\ [4ex] \Huge{4. Transformer网络} \\ [4ex] \Huge{...}};
\draw[ublue,very thick,-latex] (rec1.0) -- node[align=center,above,text=violet]{\huge{设计} \\ \huge{搜索} \\ \huge{空间}}(rec2.180);
\draw[very thick,-latex] (rec1.0) -- node[align=center,above,text=ublue]{\huge\bfnew{设计} \\ \huge\bfnew{搜索} \\ \huge\bfnew{空间}}(rec2.180);
%mid
\node[text=ublue] (label2) at (-14.4,4){\Huge\bfnew{搜索空间}};
\node[align=left] at (-12.4,-0.5){\Huge\bfnew{循环神经网络} \\ [4ex] \Huge\bfnew{1.普通RNN网络} \\ [4ex] \Huge\bfnew{2. LSTM网络} \\ [4ex] \Huge\bfnew{3. GRU网络} \\ [4ex] \Huge\bfnew{...}};
\node[] (label2) at (-14.4,4){\Huge\bfnew{搜索空间}};
\node[align=left] at (-12.4,-0.5){\Huge{循环神经网络} \\ [4ex] \Huge{1.普通RNN网络} \\ [4ex] \Huge{2. LSTM网络} \\ [4ex] \Huge{3. GRU网络} \\ [4ex] \Huge{...}};
\draw[ublue,very thick,-latex] (rec2.0) -- node[align=center,above,text=violet]{\huge{选择} \\ \huge{搜索} \\ \huge{策略}}(rec3.180);
\draw[very thick,-latex] (rec2.0) -- node[align=center,above,text=ublue]{\huge\bfnew{选择} \\ \huge\bfnew{搜索} \\ \huge\bfnew{策略}}(rec3.180);
\draw[ublue,very thick,-latex,out=-150,in=-30] (rec3.-90) to node[above,text=violet,yshift=1em]{\huge{迭代结构搜索的过程}}(rec2.-90);
\draw[very thick,-latex,out=-150,in=-30] (rec3.-90) to node[above,text=ublue,yshift=1em]{\huge\bfnew{迭代结构搜索的过程}}(rec2.-90);
\draw[ublue,very thick,-latex,out=60,in=130] ([xshift=-8em]rec3.90) to node[above,text=violet]{\huge{性能评估}}([xshift=8em]rec3.90);
\draw[very thick,-latex,out=60,in=130] ([xshift=-8em]rec3.90) to node[above,text=ublue]{\huge\bfnew{性能评估}}([xshift=8em]rec3.90);
%right
\node[node] (n1) at (0,0){};
\node[node] (n2) at (1.5,0){};
......@@ -52,7 +52,7 @@
\node[font=\Huge] at (9,0){$\cdots$};
\node[font=\Huge] at (-4.5,0){$\cdots$};
\node[text=ublue] (label3) at (-2,4){\Huge\bfnew{找到的模型结构}};
\node[] (label3) at (-2,4){\Huge\bfnew{找到的模型结构}};
\node[draw,rounded corners=6pt,very thick,minimum width=16em,minimum height=15em] (box1) at (2.25,0){};
......
......@@ -27,8 +27,8 @@
{0/0/8, 1/0/9, 2/0/10, 3/0/11, 4/0/12, 5/0/13, 6/0/14}
\node[anchor=north] (n\k) at ([xshift=-0em,yshift=-0.5em]a\i\j.south) {\i};
\node [anchor=east] (l1) at ([xshift=-0.3em,yshift=0em]n8.west) {$i$};
\node [anchor=north] (l2) at ([xshift=0em,yshift=-0em]n7.south) {$j$};
\node [anchor=east] (l1) at ([xshift=-0.3em,yshift=0em]n8.west) {$j$};
\node [anchor=north] (l2) at ([xshift=0em,yshift=-0em]n7.south) {$i$};
%\node [anchor=north] (n1) at ([xshift=0em,yshift=0em]a00.south west) {};
......
......@@ -873,7 +873,7 @@
% NEW SECTION
%----------------------------------------------------------------------------------------
\section{小结及展阅读}
\section{小结及展阅读}
低资源机器翻译是机器翻译大规模应用所面临的挑战之一,因此也备受关注。一方面,小样本学习技术的发展,使得研究人员可以有更多的手段对问题求解;另一方面,从多语言之间的联系出发,也可以进一步挖掘不同语言背后的知识,并应用于低资源机器翻译任务。本章从多个方面介绍了低资源机器翻译方法,并结合多语言、零资源翻译等问题给出了不同场景下解决问题的思路。除此之外,还有几方面工作值得进一步关注:
......
......@@ -52,6 +52,7 @@
%----------------------------------------------------------------------------------------
% NEW SECTION
%----------------------------------------------------------------------------------------
\sectionnewpage
\section{语音翻译}
\parinterval 语音,是人类交流中最常用的一种信息载体。从日常聊天、出国旅游,到国际会议、跨国合作,对于语音翻译的需求不断增加。甚至在有些场景下,用语音进行交互要比用文本进行交互频繁得多。因此,{\small\bfnew{语音翻译}}\index{语音翻译}(Speech Translation)\index{Speech Translation}也成为了语音处理和机器翻译相结合的重要产物。根据目标语言的载体类型,可以将语音翻译分为{\small\bfnew{语音到文本翻译}}\index{语音到文本翻译}(Speech-to-Text Translation)\index{Speech-to-Text Translation}{\small\bfnew{语音到语音翻译}}\index{语音到语音翻译}(Speech-to-Speech Translation)\index{Speech-to-Speech Translation};基于翻译的实时性,还可以分为{\small\bfnew{实时语音翻译}}\index{实时语音翻译}(即同声传译,Simultaneous Translation)\index{Simultaneous Translation}{\small\bfnew{离线语音翻译}}(Offline Speech Translation)\index{离线语音翻译}\index{Offline Speech Translation}。本节主要关注离线语音到文本翻译方法(简称为语音翻译),分别从音频处理、级联语音翻译和端到端语音翻译几个角度开展讨论。
......@@ -253,7 +254,7 @@
%----------------------------------------------------------------------------------------
% NEW SECTION
%----------------------------------------------------------------------------------------
\sectionnewpage
\section{图像翻译}
\parinterval 在人类所接受的信息中,视觉信息的比重往往不亚于语音和文本信息,甚至更多。视觉信息通常以图像的形式存在,近几年,结合图像的多模态机器翻译受到了广泛的关注。多模态机器翻译(图\ref{fig:17-11} (a))简单来说就是结合源语言和其他模态(例如图像等)的信息生成目标语言的过程。这种结合图像的机器翻译还是一种狭义上的“翻译”,它本质上还是从源语言到目标语言或者说从文本到文本的翻译。事实上从图像到文本(图\ref{fig:17-11}(b))的转换,即给定图像,生成与图像内容相关的描述,也可以被称为广义上的“翻译”。例如,{\small\bfnew{图片描述生成}}\index{图片描述生成}(Image Captioning)\index{Image Captioning}就是一种典型的图像到文本的翻译。当然,这种广义上的翻译形式不仅仅包括图像到文本的转换,还可以包括从图像到图像的转换(图\ref{fig:17-11}(c)),甚至是从文本到图像的转换(图\ref{fig:17-11}(d))等等。这里将这些与图像相关的翻译任务统称为图像翻译。
......@@ -423,7 +424,7 @@
%----------------------------------------------------------------------------------------
% NEW SECTION
%----------------------------------------------------------------------------------------
\sectionnewpage
\section{篇章级翻译}
\parinterval 目前大多数机器翻译系统是句子级的。由于缺少了对篇章上下文信息的建模,在需要依赖上下文的翻译场景中,模型的翻译效果总是不尽人意。篇章级翻译的目的就是对篇章上下文信息进行建模,进而改善机器翻译在整个篇章上的翻译质量。篇章级翻译的概念在很早就已经出现\upcite{DBLP:journals/ac/Bar-Hillel60},随着近几年神经机器翻译取得了巨大进展,篇章级神经机器翻译也成为了重要的方向\upcite{DBLP:journals/corr/abs-1912-08494,DBLP:journals/corr/abs-1901-09115}。基于此,本节将对篇章级神经机器翻译的若干问题展开讨论。
......@@ -635,8 +636,8 @@
%----------------------------------------------------------------------------------------
% NEW SECTION
%----------------------------------------------------------------------------------------
\section{小结及展阅读}
\sectionnewpage
\section{小结及展阅读}
\parinterval 使用更大上下文进行机器翻译建模是极具潜力的研究方向,包括多模态翻译在内的多个领域也非常活跃。有许多问题值得进一步思考与讨论:
......
......@@ -596,7 +596,7 @@ x_1\cdot w_1+x_2\cdot w_2+x_3\cdot w_3 & = & 0\cdot 1+0\cdot 1+1\cdot 1 \nonumbe
\parinterval $ x_3 $:女朋友是否喜欢
\parinterval 在新修改的模型中,$ x_0 $$ x_1 $变成了连续变量,$ x_2 $仍然是离散变量,如图\ref{fig:9-8}所示。
\parinterval 在新修改的模型中,$ x_1 $$ x_2 $变成了连续变量,$ x_3 $仍然是离散变量,如图\ref{fig:9-8}所示。
%----------------------------------------------
\begin{figure}[htp]
......@@ -1732,7 +1732,7 @@ z_t&=&\gamma z_{t-1}+(1-\gamma) \frac{\partial J}{\partial {\theta}_t} \cdot \f
\parinterval 这个过程可以得到$ {\mathbi{s}}^K $节点处的梯度$ {\bm \pi}^K= \frac{\partial L}{\partial {\mathbi{s}}^K} $,在后续的过程中可以直接使用其作为前一层提供的梯度计算结果,而不需要从$ {\mathbi{h}}^K $节点处重新计算。这也体现了自动微分与符号微分的差别,对于计算图的每一个阶段,并不需要得到完成的微分表达式,而是通过前一层提供的梯度,直接计算当前的梯度即可,这样避免了大量的重复计算。
\parinterval 在得到$ {\bm \pi}^K= \frac{\partial L}{\partial {\mathbi{s}}^K} $之后,下一步的目标是:1)计算损失函数$ L $相对于第$ K-1 $层与输出层之间连接权重$ {\mathbi{W}}^K $的梯度;2)计算损失函数$ L $相对于神经网络网络$ K-1 $层输出结果$ {\mathbi{h}}^{K-1} $的梯度。这部分内容如图\ref{fig:9-55}所示。
\parinterval 在得到$ {\bm \pi}^K= \frac{\partial L}{\partial {\mathbi{s}}^K} $之后,下一步的目标是:1)计算损失函数$ L $相对于第$ K-1 $层与输出层之间连接权重$ {\mathbi{W}}^K $的梯度;2)计算损失函数$ L $相对于神经网络第$ K-1 $层输出结果$ {\mathbi{h}}^{K-1} $的梯度。这部分内容如图\ref{fig:9-55}所示。
%----------------------------------------------
\begin{figure}[htp]
......

222 KB | W: | H:

378 KB | W: | H:

Figures/fig-cover.jpg
Figures/fig-cover.jpg
Figures/fig-cover.jpg
Figures/fig-cover.jpg
  • 2-up
  • Swipe
  • Onion skin
Markdown 格式
0%
您添加了 0 到此讨论。请谨慎行事。
请先完成此评论的编辑!
注册 或者 后发表评论