Commit 97538212 by 曹润柘

update

parent becc8fa0
\begin{tikzpicture}
\tikzstyle{layer}=[draw,rounded corners=2pt,font=\scriptsize,align=center,minimum width=5em]
\tikzstyle{word}=[font=\scriptsize]
\node[layer,fill=red!20] (en_sa) at (0,0){Multi-Head \\ Attention};
\node[layer,anchor=south,fill=green!20] (en_ffn) at ([yshift=1.4em]en_sa.north){Feed Forward \\ Network};
\node[draw,circle,inner sep=0pt, minimum size=1em,anchor=north] (en_add) at ([yshift=-1.4em]en_sa.south){};
\draw[] (en_add.90) -- (en_add.-90);
\draw[] (en_add.0) -- (en_add.180);
\node[layer,anchor=north,fill=yellow!20] (en_cnn) at ([yshift=-1.4em]en_add.south){CNN};
\node[draw,circle,inner sep=0pt, minimum size=1em,anchor=west] (de_add) at ([xshift=7em]en_add.east){};
\draw[] (de_add.90) -- (de_add.-90);
\draw[] (de_add.0) -- (de_add.180);
\node[layer,anchor=south,fill=red!20] (de_sa) at ([yshift=1.4em]de_add.north){Masked \\Multi-Head\\Attention};
\node[layer,anchor=south,fill=red!20] (de_ca) at ([yshift=1.4em]de_sa.north){Multi-Head \\ Attention};
\node[layer,anchor=south,fill=green!20] (de_ffn) at ([yshift=1.4em]de_ca.north){Feed Forward \\ Network};
\node[layer,anchor=south,fill=blue!20] (sf) at ([yshift=2em]de_ffn.north){Softmax};
\node[layer,anchor=south,fill=orange!20] (output) at ([yshift=1.4em]sf.north){STLoss};
\node[anchor=north,font=\scriptsize,align=center] (en_input) at ([yshift=-1em]en_cnn.south){语音特征\\(FilterBank/MFCC)};
\node[anchor=north,font=\scriptsize,align=center] (de_input) at ([yshift=-1em]de_add.south){目标文本\\(Embedding)};
\node[anchor=east,font=\scriptsize,align=center] (en_pos) at ([xshift=-2em]en_add.west){Position\\(Embedding)};
\node[anchor=west,font=\scriptsize,align=center] (de_pos) at ([xshift=2em]de_add.east){Position\\(Embedding)};
\draw[->] (en_input.90) -- ([yshift=-0.1em]en_cnn.-90);
\draw[->] ([yshift=0.1em]en_cnn.90) -- ([yshift=-0.1em]en_add.-90);
\draw[->] ([yshift=0.1em]en_add.90) -- ([yshift=-0.1em]en_sa.-90);
\draw[->] ([yshift=0.1em]en_sa.90) -- ([yshift=-0.1em]en_ffn.-90);
\draw[->] (de_input.90) -- ([yshift=-0.1em]de_add.-90);
\draw[->] ([yshift=0.1em]de_add.90) -- ([yshift=-0.1em]de_sa.-90);
\draw[->] ([yshift=0.1em]de_sa.90) -- ([yshift=-0.1em]de_ca.-90);
\draw[->] ([yshift=0.1em]de_ca.90) -- ([yshift=-0.1em]de_ffn.-90);
\draw[->] ([yshift=0.1em]de_ffn.90) -- ([yshift=-0.1em]sf.-90);
\draw[->] ([yshift=0.1em]sf.90) -- ([yshift=-0.1em]output.-90);
\draw[->] ([xshift=0.1em]en_pos.0) -- ([xshift=-0.1em]en_add.180);
\draw[->] ([xshift=-0.1em]de_pos.180) -- ([xshift=0.1em]de_add.0);
\draw[->,rounded corners=2pt] ([yshift=0.1em]en_ffn.90) -- ([yshift=2em]en_ffn.90) -- ([xshift=4em,yshift=2em]en_ffn.90) -- ([xshift=-1.5em]de_ca.west) -- ([xshift=-0.1em]de_ca.west);
\begin{pgfonlayer}{background}
\node[draw=ugreen,rounded corners=2pt,inner xsep=6pt,inner ysep=8pt][fit=(en_sa)(en_ffn)]{};
\node[draw=red,rounded corners=2pt,inner xsep=6pt,inner ysep=8pt][fit=(de_sa)(de_ca)(de_ffn)]{};
\end{pgfonlayer}
\node[anchor=east,font=\scriptsize,text=ugreen] at ([xshift=-0.1em]box1.west){$N \times$};
\node[anchor=west,font=\scriptsize,text=red] at ([xshift=0.1em]box2.east){$\times N$};
\node[anchor=east,font=\scriptsize] at ([xshift=-0.1em]en_cnn.west){$2 \times$};
\end{tikzpicture}
\ No newline at end of file
\begin{tikzpicture}
\node[draw=white] (input) at (0,0){\includegraphics[width=0.62\textwidth]{./Chapter17/Figures/figure-hello-audio.png}};
\node[anchor=east,font=\scriptsize,align=center] (a1) at ([xshift=2.0em]input.west) {音频数据输入};
\node[minimum width=17.4em,minimum height=2.9em,draw=white,line width=3pt] at (0.3em,-0.02em){};
\node[anchor=north,draw,rounded corners=2pt,minimum width=16em, minimum height=2.2em,fill=yellow!20] (box) at ([xshift=0.4em]input.south){};
\node[anchor=west,minimum width=1.2em,minimum height=2.2em] (w1) at ([xshift=0.2em]box.west){{h}};
\node[anchor=west,minimum width=1.2em,minimum height=2.2em] (w2) at ([xshift=0.2em]w1.east){{e}};
\node[anchor=west,minimum width=1.2em,minimum height=2.2em] (w3) at ([xshift=0.2em]w2.east){{e}};
\node[anchor=west,minimum width=1.2em,minimum height=2.2em] (w4) at ([xshift=0.2em]w3.east){{$\epsilon$}};
\node[anchor=west,minimum width=1.2em,minimum height=2.2em] (w5) at ([xshift=0.2em]w4.east){{l}};
\node[anchor=west,minimum width=1.2em,minimum height=2.2em] (w6) at ([xshift=0.2em]w5.east){{$\epsilon$}};
\node[anchor=west,minimum width=1.2em,minimum height=2.2em] (w7) at ([xshift=0.2em]w6.east){{l}};
\node[anchor=west,minimum width=1.2em,minimum height=2.2em] (w8) at ([xshift=0.2em]w7.east){{l}};
\node[anchor=west,minimum width=1.2em,minimum height=2.2em] (w9) at ([xshift=0.2em]w8.east){{o}};
\node[anchor=west,minimum width=1.2em,minimum height=2.2em] (w10) at ([xshift=0.2em]w9.east){{o}};
\node[anchor=west,minimum width=1.2em,minimum height=2.2em] (w11) at ([xshift=0.2em]w10.east){{!}};
\draw[very thick] (w1.south west) -- (w1.south east);
\draw[very thick] (w2.south west) -- (w2.south east);
\draw[very thick] (w3.south west) -- (w3.south east);
\draw[very thick] (w5.south west) -- (w5.south east);
\draw[very thick] (w7.south west) -- (w7.south east);
\draw[very thick] (w8.south west) -- (w8.south east);
\draw[very thick] (w9.south west) -- (w9.south east);
\draw[very thick] (w10.south west) -- (w10.south east);
\draw[very thick] (w11.south west) -- (w11.south east);
\node[anchor=north,minimum width=1.2em,minimum height=1.4em,fill=gray!30] (m1) at ([yshift=-1em]w1.south){{h}};
\node[anchor=north,minimum width=2.64em,minimum height=1.4em,fill=gray!30] (m2) at ([yshift=-1em,xshift=0.72em]w2.south){{e}};
\node[anchor=north,minimum width=1.2em,minimum height=1.4em,fill=gray!30] (m3) at ([yshift=-1em]w4.south){};
\node[anchor=north,minimum width=1.2em,minimum height=1.4em,fill=gray!30] (m4) at ([yshift=-1em]w5.south){{l}};
\node[anchor=north,minimum width=1.2em,minimum height=1.4em,fill=gray!30] (m5) at ([yshift=-1em]w6.south){};
\node[anchor=north,minimum width=2.64em,minimum height=1.4em,fill=gray!30] (m6) at ([yshift=-1em,xshift=0.72em]w7.south){{l}};
\node[anchor=north,minimum width=2.64em,minimum height=1.4em,fill=gray!30] (m7) at ([yshift=-1em,xshift=0.72em]w9.south){{o}};
\node[anchor=north,minimum width=1.2em,minimum height=1.4em,fill=gray!30] (m8) at ([yshift=-1em]w11.south){{!}};
\node[anchor=north,minimum width=1.2em,minimum height=1.4em,fill=gray!30] (o1) at ([yshift=-3.8em]w1.south){{h}};
\node[anchor=north,minimum width=1.2em,minimum height=1.4em,fill=gray!30] (o2) at ([yshift=-3.8em]w2.south){{e}};
\node[anchor=north,minimum width=1.2em,minimum height=1.4em,fill=gray!30] (o3) at ([yshift=-3.8em]w3.south){{l}};
\node[anchor=north,minimum width=1.2em,minimum height=1.4em,fill=gray!30] (o4) at ([yshift=-3.8em]w4.south){{l}};
\node[anchor=north,minimum width=1.2em,minimum height=1.4em,fill=gray!30] (o5) at ([yshift=-3.8em]w5.south){{o}};
\node[anchor=north,minimum width=1.2em,minimum height=1.4em,fill=gray!30] (o6) at ([yshift=-3.8em]w6.south){{!}};
\node[anchor=north,minimum width=1.2em,minimum height=1.4em,fill=gray!30] at ([yshift=-3.8em]w7.south){};
\node[anchor=north,minimum width=1.2em,minimum height=1.4em,fill=gray!30] at ([yshift=-3.8em]w8.south){};
\node[anchor=north,minimum width=1.2em,minimum height=1.4em,fill=gray!30] at ([yshift=-3.8em]w9.south){};
\node[anchor=north,minimum width=1.2em,minimum height=1.4em,fill=gray!30] at ([yshift=-3.8em]w10.south){};
\node[anchor=north,minimum width=1.2em,minimum height=1.4em,fill=gray!30] at ([yshift=-3.8em]w11.south){};
\draw[blue!40,fill=blue!30,opacity=0.7] (w1.south west) -- (w1.south east) -- (o1.south east) -- (o1.south west) -- (w1.south west);
\draw[blue!40,fill=blue!30,opacity=0.7] (w2.south west) -- (w3.south east) -- (m2.south east) .. controls ([yshift=-0.3em]m2.south east) and ([yshift=0.3em]o2.north east) .. (o2.north east) -- (o2.south east) -- (o2.south west) -- (w2.south west);
\draw[blue!40,fill=blue!30,opacity=0.7] (w5.south west) -- (w5.south east) -- (m4.south east) .. controls ([yshift=-0.3em]m4.south east) and ([yshift=0.3em]o3.north east) .. (o3.north east) -- (o3.south east) -- (o3.south west) -- (o3.north west) .. controls ([yshift=0.3em]o3.north west) and ([yshift=-0.3em]m4.south west) .. (m4.south west) -- (w5.south west);
\draw[blue!40,fill=blue!30,opacity=0.7] (w7.south west) -- (w8.south east) -- (m6.south east) .. controls ([yshift=-0.3em]m6.south east) and ([yshift=0.3em]o4.north east) .. (o4.north east) -- (o4.south east) -- (o4.south west) -- (o4.north west) .. controls ([yshift=0.3em]o4.north west) and ([yshift=-0.3em]m6.south west) .. (m6.south west) -- (w7.south west);
\draw[blue!40,fill=blue!30,opacity=0.7] (w9.south west) -- (w10.south east) -- (m7.south east) .. controls ([yshift=-0.1em]m7.south east) and ([yshift=0.2em]o5.north east) .. (o5.north east) -- (o5.south east) -- (o5.south west) -- (o5.north west) .. controls ([yshift=0.1em]o5.north west) and ([yshift=-0.5em]m7.south west) .. (m7.south west) -- (w9.south west);
\draw[blue!40,fill=blue!30,opacity=0.7] (w11.south west) -- (w11.south east) -- (m8.south east) .. controls ([yshift=-0.4em]m8.south east) and ([yshift=0.1em]o6.north east) .. (o6.north east) -- (o6.south east) -- (o6.south west) -- (o6.north west) .. controls ([yshift=0.1em]o6.north west) and ([yshift=-0.5em]m8.south west) .. (m8.south west) -- (w11.south west);
\node[anchor=north,font=\scriptsize,align=center] (a2) at ([yshift=-1.4em]a1.south) {预测字母序列};
\node[anchor=north,font=\scriptsize,align=center] (a3) at ([yshift=-1.8em]a2.south) {合并重复字母 \\ 并丢弃$\epsilon$};
\node[anchor=north,font=\scriptsize,align=center] (a4) at ([yshift=-0.6em]a3.south) {最终结果输出};
\end{tikzpicture}
\ No newline at end of file
\begin{tikzpicture}[scale=0.8]
\tikzstyle{every node}=[scale=0.8]
%figure 1
\coordinate (A1) at (0, 0);
\coordinate (B1) at ([xshift=1.5em,yshift=-0.4em]A1);
\coordinate (C1) at ([xshift=0.3em,yshift=-2.6em]A1);
\coordinate (D1) at ([xshift=2.7em,yshift=-2.6em]A1);
\coordinate (E1) at ([xshift=2.4em,yshift=-1.5em]A1);
\coordinate (F1) at ([xshift=0.3em]D1);
%figure 2
\coordinate (A2) at ([yshift=-15em]A1);
\coordinate (B2) at ([xshift=1.5em,yshift=-0.4em]A2);
\coordinate (C2) at ([xshift=0.3em,yshift=-2.6em]A2);
\coordinate (D2) at ([xshift=2.7em,yshift=-2.6em]A2);
\coordinate (E2) at ([xshift=2.4em,yshift=-1.5em]A2);
\coordinate (F2) at ([xshift=0.3em]D2);
\foreach \x in {1,2}{
\draw[-,line width=2pt] (A\x) -- ([xshift=3.6em]A\x) -- ([xshift=3.6em,yshift=-3em]A\x) -- ([yshift=-3em]A\x) -- (A\x) -- ([xshift=1em]A\x);
\draw[-, very thick] (B\x) -- (C\x) -- (D\x) -- (B\x);
\draw[-, very thick,fill=black] ([xshift=-0.6em,yshift=-1.2em]B\x) -- ([xshift=-0.3em,yshift=-1em]B\x) -- ([yshift=-1.2em]B\x) --([xshift=0.3em,yshift=-1em]B\x) -- ([xshift=0.6em,yshift=-1.2em]B\x) -- (D\x) -- (C\x) -- ([xshift=-0.6em,yshift=-1.2em]B\x);
\draw[-, very thick,fill=black] (E\x) -- ([xshift=0.2em,yshift=0.3em]E\x) -- ([xshift=0.33em]F\x) -- (F\x) -- (E\x);
\node[circle,inner sep=0pt,minimum size=0.4em,fill=black] at ([xshift=-0.7em,yshift=-0.2em]B\x){};
\node[draw,rounded corners=2pt,fill=yellow!20,minimum width=2.3cm,minimum height=1cm](cnn\x) at ([xshift=9.5em,yshift=-1.5em]A\x){CNN};
\node[draw,circle,fill=green!20,font=\footnotesize,anchor=west,inner sep=3pt] (h\x_2) at ([xshift=3em,yshift=0.66em]cnn\x.east){$h_2$};
\node[draw,circle,fill=green!20,font=\footnotesize,anchor=south,inner sep=3pt] (h\x_1) at ([yshift=1em]h\x_2.north){$h_1$};
\node[font=\footnotesize,anchor=north] (h\x_c) at ([yshift=-0.6em]h\x_2.south){$\cdots$};
\node[draw,circle,fill=green!20,font=\footnotesize,anchor=north,inner sep=3pt] (h\x_n) at ([yshift=-0.6em]h\x_c.south){$h_n$};
}
\begin{pgfonlayer}{background}
\node[draw,thick,rounded corners=2pt,densely dashed,inner ysep=1.2em,inner xsep=0.4em,label={above:图像特征向量}][fit=(h1_1)(h1_2)(h1_n)](box1){};
\node[draw,thick,rounded corners=2pt,densely dashed,inner ysep=1.2em,inner xsep=0.4em,label={above:图像特征向量}][fit=(h2_1)(h2_2)(h2_n)](box2){};
\end{pgfonlayer}
\node[anchor=west,draw,rounded corners=2pt,fill=blue!20,minimum width=2.3cm,minimum height=1cm] (decoder1)at ([xshift=3em]box1.east){解码器};
\node[anchor=west,draw,circle,inner sep=0pt,minimum size=1.4em] (add)at ([xshift=2em,yshift=1.6em]box2.east){};
\draw[] (add.0) -- (add.180);
\draw[] (add.90) -- (add.-90);
\node[anchor=west,draw,rounded corners=2pt,fill=blue!20,minimum width=2.3cm,minimum height=1cm] (decoder2)at ([xshift=6em]box2.east){解码器};
\draw[->,thick] ([xshift=-2.7em]cnn1.180) -- ([xshift=-0.1em]cnn1.180);
\draw[->,thick] ([xshift=-2.7em]cnn2.180) -- ([xshift=-0.1em]cnn2.180);
\draw[->,thick] ([xshift=0.1em]cnn1.0) -- ([xshift=-0.1em]box1.180);
\draw[->,thick] ([xshift=0.1em]cnn2.0) -- ([xshift=-0.1em]box2.180);
\draw[->,thick] ([xshift=0.1em]box1.0) -- ([xshift=-0.1em]decoder1.180);
\draw[->,thick] ([xshift=0.1em]h2_1.0) -- (add.180);
\draw[->,thick] ([xshift=0.1em]h2_2.0) -- (add.180);
\draw[->,thick] ([xshift=0.1em]h2_c.0) -- (add.180);
\draw[->,thick] ([xshift=0.1em]h2_n.0) -- (add.180);
\draw[->,thick,out=20,in=130] ([xshift=0.1em]add.45) to ([xshift=-0em,yshift=0.1em]decoder2.north west);
\draw[->,thick,out=200,in=-45] ([xshift=-0.1em]decoder2.west) to ([yshift=-0.1em]add.-90);
\node [anchor=north](pos1) at ([yshift=-1.0em]box1.south) {(a)未引入注意力机制};
\node [anchor=north](pos2) at ([yshift=-1.0em]box2.south) {(b)引入注意力机制};
\end{tikzpicture}
%------------------------------------------------------------------------------------------------------------
\begin{tikzpicture}[scale=0.6]
\tikzstyle{every node}=[scale=0.6]
%figure 1
\coordinate (A1) at (0, 0);
\coordinate (B1) at ([xshift=1.5em,yshift=-0.4em]A1);
\coordinate (C1) at ([xshift=0.3em,yshift=-2.6em]A1);
\coordinate (D1) at ([xshift=2.7em,yshift=-2.6em]A1);
\coordinate (E1) at ([xshift=2.4em,yshift=-1.5em]A1);
\coordinate (F1) at ([xshift=0.3em]D1);
\coordinate (G1) at ([xshift=0.3em,yshift=-5em]A1);
\coordinate (H1) at ([xshift=0.4em,yshift=-1.6em]G1);
\coordinate (I1) at ([xshift=0.4em,yshift=-2.0em]G1);
\coordinate (J1) at ([xshift=0.4em,yshift=-2.5em]G1);
\coordinate (K1) at ([xshift=0.4em,yshift=-3.0em]G1);
\coordinate (L1) at ([xshift=0.4em,yshift=-3.5em]G1);
\coordinate (G2) at ([xshift=8em,yshift=-2.5em]A1);
\coordinate (H2) at ([xshift=0.4em,yshift=-1.6em]G2);
\coordinate (I2) at ([xshift=0.4em,yshift=-2.0em]G2);
\coordinate (J2) at ([xshift=0.4em,yshift=-2.5em]G2);
\coordinate (K2) at ([xshift=0.4em,yshift=-3.0em]G2);
\coordinate (L2) at ([xshift=0.4em,yshift=-3.5em]G2);
%figure 2
\coordinate (A2) at ([yshift=-0.5em,xshift=7em]G2);
\coordinate (B2) at ([xshift=1.5em,yshift=-0.4em]A2);
\coordinate (C2) at ([xshift=0.3em,yshift=-2.6em]A2);
\coordinate (D2) at ([xshift=2.7em,yshift=-2.6em]A2);
\coordinate (E2) at ([xshift=2.4em,yshift=-1.5em]A2);
\coordinate (F2) at ([xshift=0.3em]D2);
\coordinate (G3) at ([xshift=8em,yshift=0.5em]A2);
\coordinate (H3) at ([xshift=0.4em,yshift=-1.6em]G3);
\coordinate (I3) at ([xshift=0.4em,yshift=-2.0em]G3);
\coordinate (J3) at ([xshift=0.4em,yshift=-2.5em]G3);
\coordinate (K3) at ([xshift=0.4em,yshift=-3.0em]G3);
\coordinate (L3) at ([xshift=0.4em,yshift=-3.5em]G3);
%figure 3
\coordinate (A3) at ([yshift=-0.5em,xshift=7em]G3);
\coordinate (B3) at ([xshift=1.5em,yshift=-0.4em]A3);
\coordinate (C3) at ([xshift=0.3em,yshift=-2.6em]A3);
\coordinate (D3) at ([xshift=2.7em,yshift=-2.6em]A3);
\coordinate (E3) at ([xshift=2.4em,yshift=-1.5em]A3);
\coordinate (F3) at ([xshift=0.3em]D3);
\coordinate (A4) at ([xshift=8em]A3);
\coordinate (B4) at ([xshift=1.5em,yshift=-0.4em]A4);
\coordinate (C4) at ([xshift=0.3em,yshift=-2.6em]A4);
\coordinate (D4) at ([xshift=2.7em,yshift=-2.6em]A4);
\coordinate (E4) at ([xshift=2.4em,yshift=-1.5em]A4);
\coordinate (F4) at ([xshift=0.3em]D4);
%figure 4
\coordinate (G4) at ([xshift=7.6em,yshift=0.5em]A4);
\coordinate (H4) at ([xshift=0.4em,yshift=-1.6em]G4);
\coordinate (I4) at ([xshift=0.4em,yshift=-2.0em]G4);
\coordinate (J4) at ([xshift=0.4em,yshift=-2.5em]G4);
\coordinate (K4) at ([xshift=0.4em,yshift=-3.0em]G4);
\coordinate (L4) at ([xshift=0.4em,yshift=-3.5em]G4);
\coordinate (A5) at ([yshift=-0.5em,xshift=8em]G4);
\coordinate (B5) at ([xshift=1.5em,yshift=-0.4em]A5);
\coordinate (C5) at ([xshift=0.3em,yshift=-2.6em]A5);
\coordinate (D5) at ([xshift=2.7em,yshift=-2.6em]A5);
\coordinate (E5) at ([xshift=2.4em,yshift=-1.5em]A5);
\coordinate (F5) at ([xshift=0.3em]D5);
\foreach \x in {1,2,3,4,5}{
\draw[-,line width=2pt] (A\x) -- ([xshift=3.6em]A\x) -- ([xshift=3.6em,yshift=-3em]A\x) -- ([yshift=-3em]A\x) -- (A\x) -- ([xshift=1em]A\x);
\draw[-, very thick] (B\x) -- (C\x) -- (D\x) -- (B\x);
\draw[-, very thick,fill=black] ([xshift=-0.6em,yshift=-1.2em]B\x) -- ([xshift=-0.3em,yshift=-1em]B\x) -- ([yshift=-1.2em]B\x) --([xshift=0.3em,yshift=-1em]B\x) -- ([xshift=0.6em,yshift=-1.2em]B\x) -- (D\x) -- (C\x) -- ([xshift=-0.6em,yshift=-1.2em]B\x);
\draw[-, very thick,fill=black] (E\x) -- ([xshift=0.2em,yshift=0.3em]E\x) -- ([xshift=0.33em]F\x) -- (F\x) -- (E\x);
\node[circle,inner sep=0pt,minimum size=0.4em,fill=black] at ([xshift=-0.7em,yshift=-0.2em]B\x){};
}
\foreach \y in {1,2,3,4}{
\draw[-,line width=2pt] (G\y) -- ([xshift=1.6em]G\y) -- ([xshift=3em,yshift=-1.4em]G\y) -- ([xshift=3em,yshift=-4em]G\y) -- ([yshift=-4em]G\y) -- (G\y) -- ([xshift=1em]G\y);
\draw[-,line width=2pt] ([xshift=1.6em]G\y) -- ([xshift=1.5em,yshift=-1.4em]G\y) -- ([xshift=3em,yshift=-1.4em]G\y) ;
\draw[-,line width=1.6pt] (H\y) -- ([xshift=0.6em]H\y);
\draw[-,line width=1.6pt] (I\y) -- ([xshift=2em]I\y);
\draw[-,line width=1.6pt] (J\y) -- ([xshift=2em]J\y);
\draw[-,line width=1.6pt] (K\y) -- ([xshift=2em]K\y);
\draw[-,line width=1.6pt] (L\y) -- ([xshift=2em]L\y);
}
\draw[-,thick] ([yshift=4em,xshift=5em]G2) -- ([yshift=-8em,xshift=5em]G2);
\draw[-,thick] ([yshift=4em,xshift=5em]G3) -- ([yshift=-8em,xshift=5em]G3);
\draw[-,thick] ([yshift=4.5em,xshift=5.6em]A4) -- ([yshift=-7.5em,xshift=5.6em]A4);
\node [draw,single arrow,minimum height=2.4em,single arrow head extend=0.4em] (arrow1) at ([xshift=-2.4em,yshift=-2em]G2) {};
\node [draw,single arrow,minimum height=2.4em,single arrow head extend=0.4em] (arrow2) at ([xshift=-2.4em,yshift=-2em]G3) {};
\node [draw,single arrow,minimum height=2.4em,single arrow head extend=0.4em] (arrow3) at ([xshift=-2.4em,yshift=-1.5em]A4) {};
\node [draw,single arrow,minimum height=2.4em,single arrow head extend=0.4em] (arrow4) at ([xshift=-2.5em,yshift=-1.5em]A5) {};
\node[anchor=north,font=\small,scale=1.5] at ([yshift=-6em]arrow1.south){(a)多模态机器翻译};
\node[anchor=north,font=\small,scale=1.5] at ([yshift=-6em]arrow2.south){(b)图像到文本翻译};
\node[anchor=north,font=\small,scale=1.5] at ([yshift=-6em]arrow3.south){(c)图像到图像翻译};
\node[anchor=north,font=\small,scale=1.5] at ([yshift=-6em]arrow4.south){(d)文本到图像翻译};
\end{tikzpicture}
%------------------------------------------------------------------------------------------------------------
\begin{tikzpicture}[scale=0.8]
\tikzstyle{every node}=[scale=0.8]
%figure 1
\coordinate (A1) at (0, 0);
\coordinate (B1) at ([xshift=1.5em,yshift=-0.4em]A1);
\coordinate (C1) at ([xshift=0.3em,yshift=-2.6em]A1);
\coordinate (D1) at ([xshift=2.7em,yshift=-2.6em]A1);
\coordinate (E1) at ([xshift=2.4em,yshift=-1.5em]A1);
\coordinate (F1) at ([xshift=0.3em]D1);
%figure 2
\coordinate (A2) at ([xshift=15em]A1);
\coordinate (B2) at ([xshift=1.5em,yshift=-0.4em]A2);
\coordinate (C2) at ([xshift=0.3em,yshift=-2.6em]A2);
\coordinate (D2) at ([xshift=2.7em,yshift=-2.6em]A2);
\coordinate (E2) at ([xshift=2.4em,yshift=-1.5em]A2);
\coordinate (F2) at ([xshift=0.3em]D2);
\foreach \x in {1,2}{
\draw[-,line width=2pt] (A\x) -- ([xshift=3.6em]A\x) -- ([xshift=3.6em,yshift=-3em]A\x) -- ([yshift=-3em]A\x) -- (A\x) -- ([xshift=1em]A\x);
\draw[-, very thick] (B\x) -- (C\x) -- (D\x) -- (B\x);
\draw[-, very thick,fill=black] ([xshift=-0.6em,yshift=-1.2em]B\x) -- ([xshift=-0.3em,yshift=-1em]B\x) -- ([yshift=-1.2em]B\x) --([xshift=0.3em,yshift=-1em]B\x) -- ([xshift=0.6em,yshift=-1.2em]B\x) -- (D\x) -- (C\x) -- ([xshift=-0.6em,yshift=-1.2em]B\x);
\draw[-, very thick,fill=black] (E\x) -- ([xshift=0.2em,yshift=0.3em]E\x) -- ([xshift=0.33em]F\x) -- (F\x) -- (E\x);
\node[circle,inner sep=0pt,minimum size=0.4em,fill=black] at ([xshift=-0.7em,yshift=-0.2em]B\x){};
\node[draw,rounded corners=2pt,fill=yellow!20,minimum width=2.3cm,minimum height=1cm](cnn\x) at ([xshift=1.8em,yshift=3.6em]A\x){CNN};
}
\node[draw,anchor=south,rounded corners=2pt,minimum width=4.0cm,minimum height=1cm,fill=red!20](encoder) at ([yshift=2.6em,xshift=2.2em]cnn1.north){编码器};
\node[anchor=north,font=\Large](x) at ([xshift=2.5em,yshift=-3.4em]encoder.south){$\seq{x}$};
\node[draw,anchor=south,rounded corners=2pt,minimum width=4.0cm,minimum height=1cm,fill=blue!20](decoder) at ([yshift=2.6em,xshift=2.2em]cnn2.north){解码器};
\node[anchor=north,font=\Large](y) at ([xshift=2.5em,yshift=-3.4em]decoder.south){$\seq{y}$};
\node[anchor=south,font=\Large](y_1) at ([yshift=3em]decoder.north){$\seq{y}'$};
\draw[->,thick] ([yshift=-2.1em]cnn1.south) -- ([yshift=-0.1em]cnn1.south);
\draw[->,thick] ([yshift=-2.1em]cnn2.south) -- ([yshift=-0.1em]cnn2.south);
\draw[->,thick] ([yshift=0.1em]cnn1.north) -- ([yshift=2.4em]cnn1.north);
\draw[->,thick] ([yshift=0.1em]cnn2.north) -- ([yshift=2.4em]cnn2.north);
\draw[->,thick] ([yshift=0.3em]x.north) -- ([yshift=4.5em]x.south);
\draw[->,thick] ([yshift=0.3em]y.north) -- ([yshift=4.7em]y.south);
\draw[->,thick] ([xshift=0.1em]encoder.east) -- ([xshift=-0.1em]decoder.west);
\draw[->,thick] ([yshift=0.1em]decoder.north) -- ([yshift=-0.1em]y_1.south);
\node [anchor=south,scale=1.2] (node1) at ([xshift=-2.0em,yshift=2.5em]encoder.north) {{$x,y$:双语数据}};
\end{tikzpicture}
%------------------------------------------------------------------------------------------------------------
\begin{tikzpicture}
\tikzstyle{layer}=[draw,rounded corners=2pt,font=\scriptsize,align=center,minimum width=5em]
\tikzstyle{word}=[font=\scriptsize]
\node[layer,fill=red!20] (en_sa) at (0,0){Multi-Head \\ Attention};
\node[layer,anchor=south,fill=green!20] (en_ffn) at ([yshift=1.4em]en_sa.north){Feed Forward \\ Network};
\node[draw,circle,inner sep=0pt, minimum size=1em,anchor=north] (en_add) at ([yshift=-1.4em]en_sa.south){};
\draw[] (en_add.90) -- (en_add.-90);
\draw[] (en_add.0) -- (en_add.180);
\node[layer,anchor=north,fill=yellow!20] (en_cnn) at ([yshift=-1.4em]en_add.south){CNN};
\node[draw,circle,inner sep=0pt, minimum size=1em,anchor=west] (de_add) at ([xshift=7em]en_add.east){};
\draw[] (de_add.90) -- (de_add.-90);
\draw[] (de_add.0) -- (de_add.180);
\node[layer,anchor=south,fill=red!20] (de_sa) at ([yshift=1.4em]de_add.north){Masked \\Multi-Head\\Attention};
\node[layer,anchor=south,fill=red!20] (de_ca) at ([yshift=1.4em]de_sa.north){Multi-Head \\ Attention};
\node[layer,anchor=south,fill=green!20] (de_ffn) at ([yshift=1.4em]de_ca.north){Feed Forward \\ Network};
\node[layer,anchor=south,fill=blue!20] (sf) at ([yshift=2em]de_ffn.north){Softmax};
\node[layer,anchor=south,fill=orange!20] (output) at ([yshift=1.4em]sf.north){Output Probabilities};
\node[anchor=north,font=\scriptsize,align=center] (en_input) at ([yshift=-1em]en_cnn.south){Speech Feature\\(FilterBank/MFCC)};
\node[anchor=north,font=\scriptsize,align=center] (de_input) at ([yshift=-1em]de_add.south){Transcription\\(Embedding)};
\node[anchor=east,font=\scriptsize,align=center] (en_pos) at ([xshift=-2em]en_add.west){Position\\(Embedding)};
\node[anchor=west,font=\scriptsize,align=center] (de_pos) at ([xshift=2em]de_add.east){Position\\(Embedding)};
\draw[->] (en_input.90) -- ([yshift=-0.1em]en_cnn.-90);
\draw[->] ([yshift=0.1em]en_cnn.90) -- ([yshift=-0.1em]en_add.-90);
\draw[->] ([yshift=0.1em]en_add.90) -- ([yshift=-0.1em]en_sa.-90);
\draw[->] ([yshift=0.1em]en_sa.90) -- ([yshift=-0.1em]en_ffn.-90);
\draw[->] (de_input.90) -- ([yshift=-0.1em]de_add.-90);
\draw[->] ([yshift=0.1em]de_add.90) -- ([yshift=-0.1em]de_sa.-90);
\draw[->] ([yshift=0.1em]de_sa.90) -- ([yshift=-0.1em]de_ca.-90);
\draw[->] ([yshift=0.1em]de_ca.90) -- ([yshift=-0.1em]de_ffn.-90);
\draw[->] ([yshift=0.1em]de_ffn.90) -- ([yshift=-0.1em]sf.-90);
\draw[->] ([yshift=0.1em]sf.90) -- ([yshift=-0.1em]output.-90);
\draw[->] ([xshift=0.1em]en_pos.0) -- ([xshift=-0.1em]en_add.180);
\draw[->] ([xshift=-0.1em]de_pos.180) -- ([xshift=0.1em]de_add.0);
\draw[->,rounded corners=2pt] ([yshift=0.1em]en_ffn.90) -- ([yshift=2em]en_ffn.90) -- ([xshift=4em,yshift=2em]en_ffn.90) -- ([xshift=-1.5em]de_ca.west) -- ([xshift=-0.1em]de_ca.west);
\begin{pgfonlayer}{background}
\node[draw=ugreen,rounded corners=2pt,inner xsep=6pt,inner ysep=8pt][fit=(en_sa)(en_ffn)](box1){};
\node[draw=red,rounded corners=2pt,inner xsep=6pt,inner ysep=8pt][fit=(de_sa)(de_ca)(de_ffn)](box2){};
\end{pgfonlayer}
\node[anchor=east,font=\scriptsize,text=ugreen] at ([xshift=-0.1em]box1.west){$N \times$};
\node[anchor=west,font=\scriptsize,text=red] at ([xshift=0.1em]box2.east){$\times N$};
\node[anchor=east,font=\scriptsize] at ([xshift=-0.1em]en_cnn.west){$2 \times$};
\node[anchor=east,font=\scriptsize,align=center,text=ugreen] at ([xshift=-0.1em,yshift=3em]box1.west){ASR \\ Encoder};
\node[anchor=west,font=\scriptsize,align=center,text=red] at ([xshift=0.1em,yshift=5em]box2.east){ASR \\ Decoder};
\end{tikzpicture}
\ No newline at end of file
\begin{tikzpicture}
\tikzstyle{layer}=[draw,rounded corners=2pt,font=\scriptsize,align=center,minimum width=5em]
\tikzstyle{word}=[font=\scriptsize]
\node[layer,fill=red!20] (en_sa) at (0,0){Multi-Head \\ Attention};
\node[layer,anchor=south,fill=green!20] (en_ffn) at ([yshift=1.4em]en_sa.north){Feed Forward \\ Network};
\node[draw,circle,inner sep=0pt, minimum size=1em,anchor=north] (en_add) at ([yshift=-1.4em]en_sa.south){};
\draw[] (en_add.90) -- (en_add.-90);
\draw[] (en_add.0) -- (en_add.180);
\node[layer,anchor=north,fill=yellow!20] (en_cnn) at ([yshift=-1.4em]en_add.south){CNN};
\node[draw,circle,inner sep=0pt, minimum size=1em,anchor=west] (de_add) at ([xshift=7em]en_add.east){};
\draw[] (de_add.90) -- (de_add.-90);
\draw[] (de_add.0) -- (de_add.180);
\node[layer,anchor=south,fill=red!20] (de_sa) at ([yshift=1.4em]de_add.north){Masked \\Multi-Head\\Attention};
\node[layer,anchor=south,fill=red!20] (de_ca) at ([yshift=1.4em]de_sa.north){Multi-Head \\ Attention};
\node[layer,anchor=south,fill=green!20] (de_ffn) at ([yshift=1.4em]de_ca.north){Feed Forward \\ Network};
\node[layer,anchor=south,fill=blue!20] (en_sf) at ([yshift=3em]en_ffn.north){Softmax};
\node[layer,anchor=south,fill=blue!20] (sf) at ([yshift=2em]de_ffn.north){Softmax};
\node[layer,anchor=south,fill=orange!20] (en_output) at ([yshift=1.4em]en_sf.north){CTC输出};
\node[layer,anchor=south,fill=orange!20] (output) at ([yshift=1.4em]sf.north){语音翻译输出};
\node[anchor=north,font=\scriptsize,align=center] (en_input) at ([yshift=-1em]en_cnn.south){语音特征\\(FilterBank/MFCC)};
\node[anchor=north,font=\scriptsize,align=center] (de_input) at ([yshift=-1em]de_add.south){目标文本\\(Embedding)};
\node[anchor=east,font=\scriptsize,align=center] (en_pos) at ([xshift=-2em]en_add.west){Position\\(Embedding)};
\node[anchor=west,font=\scriptsize,align=center] (de_pos) at ([xshift=2em]de_add.east){Position\\(Embedding)};
\draw[->] (en_input.90) -- ([yshift=-0.1em]en_cnn.-90);
\draw[->] ([yshift=0.1em]en_cnn.90) -- ([yshift=-0.1em]en_add.-90);
\draw[->] ([yshift=0.1em]en_add.90) -- ([yshift=-0.1em]en_sa.-90);
\draw[->] ([yshift=0.1em]en_sa.90) -- ([yshift=-0.1em]en_ffn.-90);
\draw[->] (de_input.90) -- ([yshift=-0.1em]de_add.-90);
\draw[->] ([yshift=0.1em]de_add.90) -- ([yshift=-0.1em]de_sa.-90);
\draw[->] ([yshift=0.1em]de_sa.90) -- ([yshift=-0.1em]de_ca.-90);
\draw[->] ([yshift=0.1em]de_ca.90) -- ([yshift=-0.1em]de_ffn.-90);
\draw[->] ([yshift=0.1em]en_ffn.90) -- ([yshift=-0.1em]en_sf.-90);
\draw[->] ([yshift=0.1em]en_sf.90) -- ([yshift=-0.1em]en_output.-90);
\draw[->] ([yshift=0.1em]de_ffn.90) -- ([yshift=-0.1em]sf.-90);
\draw[->] ([yshift=0.1em]sf.90) -- ([yshift=-0.1em]output.-90);
\draw[->] ([xshift=0.1em]en_pos.0) -- ([xshift=-0.1em]en_add.180);
\draw[->] ([xshift=-0.1em]de_pos.180) -- ([xshift=0.1em]de_add.0);
\draw[->,rounded corners=2pt] ([yshift=2em]en_ffn.90) -- ([xshift=4em,yshift=2em]en_ffn.90) -- ([xshift=-1.5em]de_ca.west) -- ([xshift=-0.1em]de_ca.west);
\begin{pgfonlayer}{background}
\node[draw=ugreen,rounded corners=2pt,inner xsep=6pt,inner ysep=8pt][fit=(en_sa)(en_ffn)]{};
\node[draw=red,rounded corners=2pt,inner xsep=6pt,inner ysep=8pt][fit=(de_sa)(de_ca)(de_ffn)]{};
\end{pgfonlayer}
\node[anchor=east,font=\scriptsize,text=ugreen] at ([xshift=-0.1em]box1.west){$N \times$};
\node[anchor=west,font=\scriptsize,text=red] at ([xshift=0.1em]box2.east){$\times N$};
\node[anchor=east,font=\scriptsize] at ([xshift=-0.1em]en_cnn.west){$2 \times$};
\node[anchor=east,font=\scriptsize,align=center,text=ugreen] at ([xshift=-0.1em,yshift=3em]box1.west){语音翻译\\编码器};
\node[anchor=west,font=\scriptsize,align=center,text=red] at ([xshift=0.1em,yshift=5em]box2.east){语音翻译\\解码器};
\end{tikzpicture}
\ No newline at end of file
......@@ -15,7 +15,9 @@
\draw[->,thick](decoder_2.north)to(y.south);
\draw[->,thick](encoder.north)--([yshift=0.7cm]encoder.north)--([xshift=-4.16em,yshift=0.7cm]encoder.north)--(decoder_1.south);
\draw[->,thick](encoder.north)--([yshift=0.7cm]encoder.north)--([xshift=4.16em,yshift=0.7cm]encoder.north)--(decoder_2.south);
\node [anchor=north](pos1) at (s.south) {(a) 单编码器-双解码器方式};
\node [anchor=north,scale = 1.2](pos1) at (s.south) {(a) 单编码器-双解码器方式};
\node [anchor=south,scale=1.2] (node1) at ([xshift=-2.0em,yshift=6em]decoder_1.north) {{$x,y$:语言数据}};
\node [anchor=north,scale=1.2] (node2) at ([xshift=0.6em]node1.south){{$s$:语音数据}};
%%%%%%%%%%%%%%%%%%%%%%%%级联
\node(encoder-2)[coder]at ([xshift=10.0em]encoder.east){\large{编码器}};
\node(decoder_1-2)[coder,above of =encoder-2,yshift=1.4cm,fill=blue!20]{\large{解码器}};
......@@ -27,7 +29,7 @@
\draw[->,thick](encoder-2.north)to(decoder_1-2.south);
\draw[->,thick](decoder_1-2.north)to(decoder_2-2.south);
\draw[->,thick](decoder_2-2.north)to(y-2.south);
\node [anchor=north](pos2) at (s-2.south) {(b) 级联编码器方式};
\node [anchor=north,scale = 1.2](pos2) at (s-2.south) {(b) 级联编码器方式};
%%%%%%%%%%%%%%%%%%%%%%%%联合
\node(encoder-3)[coder]at([xshift=10.0em]encoder-2.east){\large{编码器}};
\node(decoder_1-3)[coder,above of =encoder-3,xshift=-1.6cm,yshift=2.8cm,fill=blue!20]{\large{解码器}};
......@@ -40,5 +42,5 @@
\draw[->,thick](decoder_2-3.north)to(y-3.south);
\draw[->,thick](encoder-3.north)--([yshift=0.7cm]encoder-3.north)--([xshift=-4.16em,yshift=0.7cm]encoder-3.north)--(decoder_1-3.south);
\draw[->,thick](encoder-3.north)--([yshift=0.7cm]encoder-3.north)--([xshift=4.16em,yshift=0.7cm]encoder-3.north)--(decoder_2-3.south);
\node [anchor=north](pos3) at (s-3.south) {(c) 联合编码器方式};
\node [anchor=north,scale = 1.2](pos3) at (s-3.south) {(c) 联合编码器方式};
\end{tikzpicture}
\ No newline at end of file
\begin{tikzpicture}
\tikzstyle{node}=[circle,minimum size=1.2em,draw,inner sep=0pt,fill=yellow!20,font=\footnotesize,thick]
\tikzstyle{word}=[font=\scriptsize]
\node[node] (n0) at (0,0) {0};
\node[anchor=west,node] (n2) at ([xshift=3em]n0.east){2};
\node[anchor=west,node] (n6) at ([xshift=13em]n2.east){6};
\node[anchor=west,node] (n8) at ([xshift=2.4em]n6.east){8};
\node[anchor=west,node] (n9) at ([xshift=2.4em]n8.east){9};
\node[anchor=south,node] (n1) at ([xshift=0.6em,yshift=3.2em]n2.north){1};
\node[anchor=north,node] (n3) at ([xshift=2.2em,yshift=-1.6em]n2.south){3};
\node[anchor=north,node] (n7) at ([xshift=5.2em,yshift=-0.8em]n2.south){7};
\node[anchor=west,node] (n10) at ([xshift=4em]n7.east){10};
\node[anchor=south,node] (n11) at ([yshift=3.0em]n7.north){11};
\node[anchor=south,node] (n5) at ([yshift=3.0em]n10.north){5};
\node[anchor=north,node] (n4) at ([xshift=6em,yshift=-1.6em]n3.south){4};
\draw[->] (n0.0) -- node[word,above]{of /0.343}(n2.180);
\draw[->] (n0.60) -- node[word,above,rotate=40]{a /0.499}(n1.-150);
\draw[->] (n0.-50) -- node[word,above,rotate=-20]{our /0.116}(n3.150);
\draw[->] (n0.-70) .. controls ([xshift=-8em]n4.180) and ([xshift=-8em]n4.180) .. node[above,word,xshift=3em,yshift=-0.6em]{that /0.039} (n4.180);
\draw[->] (n4.0) .. node[word,above,xshift=-2em,yshift=-0.4em]{hostage /1} controls ([xshift=5em]n4.0) and ([yshift=-6em]n6.-90) .. (n6.-90);
\draw[->] (n2.-90) -- node[word,above,rotate=-18,pos=0.55]{house /0.125}(n7.180);
\draw[->] (n3.-10) node[word,above,xshift=3.6em,yshift=-0.8em]{conference /1} .. controls ([xshift=4.6em,yshift=-1.8em]n3.-10) and ([yshift=-1.6em,xshift=-3em]n10.-135) .. (n10.-135);
\draw[->] (n7.0) -- node[word,above]{which /1}(n10.180);
\draw[->] (n2.0) -- node[word,above,pos=0.5]{hostages /0.300}(n6.180);
\draw[->] (n2.45) -- node[word,above,rotate=18,pos=0.3]{a /0.573}(n11.-135);
\draw[->,rounded corners=1em] (n1.-45) node[word,above,xshift=1.4em,yshift=-1.3em,rotate=-43]{house /0.078} -- ([yshift=-0.4em,xshift=-1em]n11.-90) -- (n7.100);
\draw[->] (n1.20) node[word,above,xshift=4em]{conference /0.734} .. controls ([xshift=8em]n1.20) and ([xshift=-0.6em,yshift=2.2em]n5.110) .. (n5.110);
\draw[->] (n11.0) -- node[word,above]{conference /1}(n5.180);
\draw[->] (n5.-90) ..node[word,above,xshift=1.4em]{is /0.773} controls ([yshift=-1.6em]n5.-90) and ([xshift=-3em]n6.150]) .. (n6.150);
\draw[->] (n5.0) node[word, above,xshift=1.4em]{as /0.226}.. controls ([xshift=2.6em]n5.0) and ([xshift=-0.6em,yshift=2em]n6.120) .. (n6.120);
\coordinate (a) at ([xshift=6em,yshift=3em]n1);
\draw[->] (n1.60) .. controls ([xshift=3em,yshift=2em]n1.60) and ([xshift=-2em]a) .. (a) node[word,above,xshift=1em]{hostage /0.187}.. controls ([xshift=8em]a) and ([xshift=-0.6em,yshift=6em]n6.90) .. (n6.90);
\draw[->] (n10.0) -- node[above,word,pos=0.4,rotate=30]{is /1}(n6.-135);
\draw[->] (n6.0) -- node[above,word,yshift=0.2em]{being /1}(n8.180);
\draw[->] (n8.0) -- node[above,word,yshift=0.3em]{recorded /1}(n9.180);
\end{tikzpicture}
\ No newline at end of file
Markdown 格式
0%
您添加了 0 到此讨论。请谨慎行事。
请先完成此评论的编辑!
注册 或者 后发表评论