\node[emb,fill=gray!30] (deemb_\x) at (24em+4em*\x,0){\small\bfnew{Emb}};
%encoder
\node[layer,fill=red!15] (src_emb) at (0,0){\scriptsize\textbf{Input Embedding}};
\node[anchor=south,layer,fill=yellow!20] (src_sa) at ([yshift=3.7em]src_emb.north){\scriptsize\textbf{Self-attention}};
\node[anchor=south,layer,fill=orange!20] (src_ff) at ([yshift=1em]src_sa.north){\scriptsize\textbf{Feed Forward}};
\node[anchor=south,layer,fill=blue!20] (src_sf) at ([yshift=2.4em]src_ff.north){\scriptsize\textbf{Softmax}};
\node[attention] (a1) at (12em, 5em) {\small\bfnew{Multi-Head Self-Attention}};
\node[attention] (a2) at (36em, 5em) {\small\bfnew{Multi-Head Self-Attention}};
\node[attention] (a3) at (36em, 9em) {\small\bfnew{Multi-Head Self-Attention}};
\node[attention] (a4) at (36em, 13em) {\small\bfnew{Multi-Head Self-Attention}};
%decoder
\node[anchor=west,layer,fill=red!15] (tgt_emb) at ([xshift=4.4em]src_emb.east){\scriptsize\textbf{Output Embedding}};
\node[anchor=south,layer,fill=yellow!20] (tgt_sa) at ([yshift=3.7em]tgt_emb.north){\scriptsize\textbf{Self-attention}};
\node[anchor=south,layer,fill=yellow!20] (tgt_pa) at ([yshift=1.1em]tgt_sa.north){\scriptsize\textbf{Positional Attention}};
\node[anchor=south,layer,fill=yellow!20] (tgt_eda) at ([yshift=1.5em]tgt_pa.north){\scriptsize\textbf{Encoder-Decoder}\\\scriptsize\textbf{Attention}};
\node[anchor=south,layer,fill=orange!20] (tgt_ff) at ([yshift=1em]tgt_eda.north){\scriptsize\textbf{Feed Forward}};
\node[anchor=south,layer,fill=green!20] (tgt_linear) at ([yshift=1.4em]tgt_ff.north){\scriptsize\textbf{Linear}};
\node[anchor=south,layer,fill=blue!20] (tgt_sf) at ([yshift=1em]tgt_linear.north){\scriptsize\textbf{Softmax}};
\foreach\x in {1,2,3,4,5}
\node[emb,fill=cyan!35] (enmlp_\x) at (0+4em*\x,9em){\small\bfnew{MLP}};
\node[font=\footnotesize,anchor=south] (w3) at ([yshift=0.8em]src_sf.north){\scriptsize\textbf{2}};
\node[font=\footnotesize,anchor=east] (w2) at ([xshift=-0.5em]w3.west){\scriptsize\textbf{1}};
\node[font=\footnotesize,anchor=east] (w1) at ([xshift=-0.5em]w2.west){\scriptsize\textbf{1}};
\node[font=\footnotesize,anchor=west] (w4) at ([xshift=0.5em]w3.east){\scriptsize\textbf{0}};
\node[font=\footnotesize,anchor=west] (w5) at ([xshift=0.5em]w4.east){\scriptsize\textbf{1}};
\node[font=\footnotesize,anchor=south] (output) at ([yshift=1em]tgt_sf.north){\scriptsize\textbf{Wir akzeptieren das voll und ganz}};
\node[font=\footnotesize,anchor=north] (src) at ([yshift=-1em]src_emb.south){\scriptsize\textbf{We totally accept it .}};
\node[font=\footnotesize,anchor=north] (tgt) at ([yshift=-1em]tgt_emb.south){\scriptsize\textbf{We totally accept accept .}};
\foreach\x in {1,2,3,4,5}
\node[emb,fill=cyan!35] (demlp_\x) at (24em+4em*\x,17em){\small\bfnew{MLP}};
\node[cir] (src_add) at (0,2.5em) {};
\node[cir,fill=orange!7] (src_pos) at (-2.5em,2.5em) {};
\foreach\x in {1,2,3,4,5}
\node[emb,fill=red!25,inner sep=1pt] (ensf_\x) at (0+4em*\x,17em){\small\bfnew{SoftMax}};
\node[cir] (tgt_add) at (12em,2.5em) {};
\node[cir,fill=orange!7] (tgt_pos) at (14.5em,2.5em) {};
\foreach\x in {1,2,3,4,5}
\node[emb,fill=ugreen!25!white,inner sep=1pt, font=\scriptsize] (desf_\x) at (24em+4em*\x,22em){\small\bfnew{SoftMax}};
\draw[-,thick] (src_add.90) -- (src_add.-90);
\draw[-,thick] (src_add.0) -- (src_add.180);
\draw[-,thick,] (src_pos.180) .. controls ([xshift=0.8em,yshift=0.8em]src_pos.180) and ([xshift=-0.8em,yshift=-0.8em]src_pos.0) ..(src_pos.0);
\draw[-,thick] (tgt_add.90) -- (tgt_add.-90);
\draw[-,thick] (tgt_add.0) -- (tgt_add.180);
\draw[-,thick,] (tgt_pos.180) .. controls ([xshift=0.8em,yshift=0.8em]tgt_pos.180) and ([xshift=-0.8em,yshift=-0.8em]tgt_pos.0) ..(tgt_pos.0);
\node[minimum height=2.4em,minimum width=3.6em] (enout_1) at (0+4em*1,21em){\small\bfnew{1}};
\node[minimum height=2.4em,minimum width=3.6em] (enout_2) at (0+4em*2,21em){\small\bfnew{1}};
\node[minimum height=2.4em,minimum width=3.6em] (enout_3) at (0+4em*3,21em){\small\bfnew{2}};
\node[minimum height=2.4em,minimum width=3.6em] (enout_4) at (0+4em*4,21em){\small\bfnew{0}};
\node[minimum height=2.4em,minimum width=3.6em] (enout_5) at (0+4em*5,21em){\small\bfnew{1}};
\draw[line] (src_emb.north) -- (src_add.south);
\draw[line] (src_add.north) -- (src_sa.south);
\draw[line] (src_sa.north) -- (src_ff.south);
\draw[line] (src_ff.north) -- (src_sf.south);
\draw[line] (tgt_emb.north) -- (tgt_add.south);
\draw[line] (tgt_add.north) -- (tgt_sa.south);
\draw[line] (tgt_sa.north) -- (tgt_pa.south);
\draw[line] (tgt_eda.north) -- (tgt_ff.south);
\draw[line] (tgt_ff.north) -- (tgt_linear.south);
\draw[line] (tgt_linear.north) -- (tgt_sf.south);
\draw[line] (src_pos.0) -- (src_add.180);
\draw[line] (tgt_pos.180) -- (tgt_add.0);
\draw[line] (src_sf.north) -- (w3.south);
\draw[line] (tgt_sf.north) -- (output.south);
\draw[line] (src.north) -- (src_emb.south);
\draw[line,<->,out=-35,in=-145] ([xshift=-2em]src_sa.south) to ([xshift=2em]src_sa.south);
\node[module, minimum width=8em] (encoder) at (0,0) {编码器组件};
\node[module,anchor=west, minimum width=8em] (decoder) at ([xshift=4em]encoder.east){解码器组件};
\node[module,anchor=west, minimum width=8em] (decoder2) at ([xshift=4em]decoder.east){解码器组件};
\node[module,anchor=north, minimum width=6em,font=\scriptsize,inner ysep=4pt] (deinput) at ([yshift=-2em]decoder2.south){解码端输入};
\node[anchor=south,font=\footnotesize] (mod1) at ([yshift=0.4em]encoder.north){\small\bfnew{编码器模块}};
\node[anchor=south,font=\footnotesize] (mod2) at ([yshift=0.4em]decoder.north){\small\bfnew{重排序模块}};
\node[anchor=south,font=\footnotesize] (mod3) at ([yshift=0.4em]decoder2.north){\small\bfnew{解码端}};
\begin{pgfonlayer}{background}
{
\node[box][fit=(encoder)(mod1)] (box1) {};
\node[box][fit=(decoder)(mod2)] (box2) {};
\node[box][fit=(decoder2)(mod3)] (box3) {};
}
\end{pgfonlayer}
\node[anchor=north,font=\scriptsize,align=center] (w1) at ([yshift=-2em]encoder.south){\scriptsize\bfnew{There exist different}\\\scriptsize\bfnew{opinions on this question}};
\node[anchor=north,font=\scriptsize,align=center] (w2) at ([yshift=-2em]decoder.south){\scriptsize\bfnew{There exist different}\\\scriptsize\bfnew{opinions on this question}};
\node[anchor=north,font=\scriptsize,text=gray] (w3) at ([yshift=0.6em]w2.south){\scriptsize\bfnew{(copy source sentence)}};
\node[anchor=south,font=\scriptsize,align=center] (w4) at ([yshift=1.6em]box2.north){\scriptsize\bfnew{on this question}\\\scriptsize\bfnew{There exist different opinions}};