\node[layer,fill=red!15] (src_emb) at (0,0){\scriptsize\textbf{Input Embedding}};
\node[layer,fill=red!15] (src_emb) at (0,0){\scriptsize\textbf{Input Embedding}};
\node[anchor=south,layer,fill=yellow!20] (src_sa) at ([yshift=3.7em]src_emb.north){\scriptsize\textbf{Self-attention}};
\node[anchor=south,layer,fill=yellow!20] (src_sa) at ([yshift=3.7em]src_emb.north){\scriptsize\textbf{Self-attention}};
\node[anchor=south,layer,fill=orange!20] (src_ff) at ([yshift=1em]src_sa.north){\scriptsize\textbf{Feed Forward}};
\node[anchor=south,layer,fill=orange!20] (src_ff) at ([yshift=1em]src_sa.north){\scriptsize\textbf{Feed Forward}};
\node[anchor=south,layer,fill=blue!20] (src_sf) at ([yshift=2.4em]src_ff.north){\scriptsize\textbf{Softmax}};
\node[anchor=south,layer,fill=blue!20] (src_sf) at ([yshift=2.6em]src_ff.north){\scriptsize\textbf{Softmax}};
%decoder
%decoder
\node[anchor=west,layer,fill=red!15] (tgt_emb) at ([xshift=4.4em]src_emb.east){\scriptsize\textbf{Output Embedding}};
\node[anchor=west,layer,fill=red!15] (tgt_emb) at ([xshift=4.4em]src_emb.east){\scriptsize\textbf{Output Embedding}};
\node[anchor=south,layer,fill=yellow!20] (tgt_sa) at ([yshift=3.7em]tgt_emb.north){\scriptsize\textbf{Self-attention}};
\node[anchor=south,layer,fill=yellow!20] (tgt_sa) at ([yshift=3.7em]tgt_emb.north){\scriptsize\textbf{Self-attention}};
\node[anchor=south,layer,fill=yellow!20] (tgt_pa) at ([yshift=1.1em]tgt_sa.north){\scriptsize\textbf{Positional Attention}};
\node[anchor=south,layer,fill=yellow!20] (tgt_pa) at ([yshift=1.5em]tgt_sa.north){\scriptsize\textbf{Positional Attention}};
\node[anchor=south,layer,fill=yellow!20] (tgt_eda) at ([yshift=1.5em]tgt_pa.north){\scriptsize\textbf{Encoder-Decoder}\\\scriptsize\textbf{Attention}};
\node[anchor=south,layer,fill=yellow!20] (tgt_eda) at ([yshift=1.5em]tgt_pa.north){\scriptsize\textbf{Encoder-Decoder}\\\scriptsize\textbf{Attention}};
\node[anchor=south,layer,fill=orange!20] (tgt_ff) at ([yshift=1em]tgt_eda.north){\scriptsize\textbf{Feed Forward}};
\node[anchor=south,layer,fill=orange!20] (tgt_ff) at ([yshift=1em]tgt_eda.north){\scriptsize\textbf{Feed Forward}};
\node[anchor=south,layer,fill=green!20] (tgt_linear) at ([yshift=1.4em]tgt_ff.north){\scriptsize\textbf{Linear}};
\node[anchor=south,layer,fill=green!20] (tgt_linear) at ([yshift=1.4em]tgt_ff.north){\scriptsize\textbf{Linear}};
...
@@ -36,6 +36,11 @@
...
@@ -36,6 +36,11 @@
\node[cir] (tgt_add) at (12em,2.5em) {};
\node[cir] (tgt_add) at (12em,2.5em) {};
\node[cir,fill=orange!7] (tgt_pos) at (14.5em,2.5em) {};
\node[cir,fill=orange!7] (tgt_pos) at (14.5em,2.5em) {};
\node[cir,fill=orange!7] (tgt_pos2) at ([xshift=4.5em,yshift=-2.45em]tgt_pa.north) {};
\node[layer,fill=red!15] (src_emb) at (0,0){\scriptsize\textbf{Input Embedding}};
\node[anchor=south,layer,fill=yellow!20] (src_sa) at ([yshift=2.8em]src_emb.north){\scriptsize\textbf{Self-Attention}};
\node[anchor=south,layer,fill=gray!20] (src_norm1) at ([yshift=0.6em]src_sa.north){\scriptsize\textbf{Add \& LayerNorm}};
\node[anchor=south,layer,fill=orange!20] (src_ff) at ([yshift=0.6em]src_norm1.north){\scriptsize\textbf{Feed Forward}\\\scriptsize\textbf{Network}};
\node[anchor=south,layer,fill=gray!20] (src_norm2) at ([yshift=0.6em]src_ff.north){\scriptsize\textbf{Add \& LayerNorm}};
\node[anchor=south,layer,fill=blue!20] (src_sf) at ([yshift=1.6em]src_norm2.north){\scriptsize\textbf{Softmax}};
%decoder
\node[anchor=west,layer,fill=red!15] (tgt_emb) at ([xshift=4.4em]src_emb.east){\scriptsize\textbf{Output Embedding}};
\node[anchor=south,layer,fill=yellow!20] (tgt_sa) at ([yshift=2.8em]tgt_emb.north){\scriptsize\textbf{Self-Attention}};
\node[anchor=south,layer,fill=yellow!20] (tgt_pa) at ([yshift=1.4em]tgt_sa.north){\scriptsize\textbf{Positional Attention}};
\node[anchor=south,layer,fill=gray!20] (tgt_norm1) at ([yshift=0.6em]tgt_pa.north){\scriptsize\textbf{Add \& LayerNorm}};
\node[anchor=south,layer,fill=yellow!20] (tgt_eda) at ([yshift=1.4em]tgt_norm1.north){\scriptsize\textbf{Encoder-Decoder}\\\scriptsize\textbf{Attention}};
\node[anchor=south,layer,fill=gray!20] (tgt_norm2) at ([yshift=0.6em]tgt_eda.north){\scriptsize\textbf{Add \& LayerNorm}};
\node[anchor=south,layer,fill=orange!20] (tgt_ff) at ([yshift=0.6em]tgt_norm2.north){\scriptsize\textbf{Feed Forward}\\\scriptsize\textbf{Network}};
\node[anchor=south,layer,fill=gray!20] (tgt_norm3) at ([yshift=0.6em]tgt_ff.north){\scriptsize\textbf{Add \& LayerNorm}};
\node[anchor=south,layer,fill=green!20] (tgt_linear) at ([yshift=1.1em]tgt_norm3.north){\scriptsize\textbf{Linear}};
\node[anchor=south,layer,fill=blue!20] (tgt_sf) at ([yshift=0.6em]tgt_linear.north){\scriptsize\textbf{Softmax}};
\node[font=\footnotesize,anchor=south] (w3) at ([yshift=0.8em]src_sf.north){\scriptsize\textbf{2}};
\node[font=\footnotesize,anchor=east] (w2) at ([xshift=-0.5em]w3.west){\scriptsize\textbf{1}};
\node[font=\footnotesize,anchor=east] (w1) at ([xshift=-0.5em]w2.west){\scriptsize\textbf{1}};
\node[font=\footnotesize,anchor=west] (w4) at ([xshift=0.5em]w3.east){\scriptsize\textbf{0}};
\node[font=\footnotesize,anchor=west] (w5) at ([xshift=0.5em]w4.east){\scriptsize\textbf{1}};
\node[font=\footnotesize,anchor=south] (output) at ([yshift=0.8em]tgt_sf.north){\scriptsize\sffamily\bfseries{我们\quad 完全\quad 接受\quad 它\quad 。}};
\node[font=\footnotesize,anchor=north] (src) at ([yshift=-0.8em]src_emb.south){\scriptsize\textbf{We totally accept it .}};
\node[font=\footnotesize,anchor=north] (tgt) at ([yshift=-0.8em]tgt_emb.south){\scriptsize\textbf{We totally accept accept .}};
\node[cir] (src_add) at (0,1.6em) {};
\node[cir,fill=orange!7] (src_pos) at (-2.5em,1.6em) {};
\node[cir] (tgt_add) at (9.7em,1.6em) {};
\node[cir,fill=orange!7] (tgt_pos) at (12.2em,1.6em) {};
\node[cir,fill=orange!7] (tgt_pos2) at ([xshift=3em,yshift=-1.74em]tgt_pa.north) {};