\definecolor{Melon}{rgb}{0.99, 0.74, 0.71}
\definecolor{Goldenrod}{rgb}{0.85, 0.65, 0.13}
\definecolor{Cerulean}{rgb}{0, 0.48, 0.65}
\definecolor{Gray}{rgb}{0.5, 0.5, 0.5}
\definecolor{aliceblue}{rgb}{0.94, 0.97, 1.0}
\tikzstyle{emb} = [rectangle,very thick,rounded corners,minimum width=3cm,minimum height=0.85cm,text centered,draw=black!70,fill=red!15]
\tikzstyle{sa} = [rectangle,very thick,rounded corners,minimum width=3cm,minimum height=1cm,text centered,draw=black!70,fill=yellow!20]
\tikzstyle{edsa} = [rectangle,very thick,rounded corners,minimum width=3cm,minimum height=1.5cm,text centered,align=center,draw=black!70,fill=yellow!20]
\tikzstyle{an} = [rectangle,very thick,rounded corners,minimum width=3cm,minimum height=0.7cm,text centered,draw=black!70,fill=aliceblue]
\tikzstyle{ff} = [rectangle,very thick,rounded corners,minimum width=3cm,minimum height=1cm,text centered,align=center,draw=black!70,fill=orange!20]
\tikzstyle{linear} = [rectangle,very thick,rounded corners,minimum width=3cm,minimum height=0.7cm,text centered,draw=black!70,fill=green!20]
\tikzstyle{softmax} = [rectangle,very thick,rounded corners,minimum width=3cm,minimum height=0.7cm,text centered,draw=black!70,fill=blue!20]
\begin{tikzpicture}[node distance = 0,scale = 0.7]
\tikzstyle{every node}=[scale=0.7]
%left
\node(left_Emb)[emb]{\footnotesize{\textbf{Input Embedding}}};
\node(left_cir)[circle,very thick,minimum width=0.5cm,draw=black!70,above of = left_Emb,yshift=1.1cm]{};
\draw[-,very thick,draw=black!70]([xshift=0.03cm]left_cir.west)--([xshift=-0.03cm]left_cir.east);
\draw[-,very thick,draw=black!70]([yshift=-0.03cm]left_cir.north)--([yshift=0.03cm]left_cir.south);
\node(left_cir2)[circle,very thick,minimum width=0.5cm,draw=black!70,left of = left_cir,xshift=-1.5cm]{};
\draw[very thick,draw=black!70]([xshift=0.04cm]left_cir2.west)sin([xshift=0.14cm,yshift=0.08cm]left_cir2.west)cos([xshift=0.25cm]left_cir2.west)sin([xshift=0.36cm,yshift=-0.08cm]left_cir2.west)cos([xshift=-0.03cm]left_cir2.east);
\draw [->,very thick,draw=black!70](left_cir2.east)--(left_cir.west);
\node(left_Self)[sa,above of = left_cir,yshift=1.6cm]{\textbf{Self-attention}};
\node(left_Add_bottom)[an,above of = left_Self,yshift=1.1cm]{\textbf{Add$\&\&$Norm}};
\node(left_Feed)[ff,above of = left_Add_bottom,yshift=1.2cm]{\textbf{Feed}\\\textbf{Forward}};
\node(left_Add_top)[an,above of = left_Feed,yshift=1.1cm]{\textbf{Add$\&\&$Norm}};
\node(left_text_bottom)[below of = left_Emb,xshift=0cm,yshift=-1.2cm,scale=1]{\small\sffamily\bfseries{我\quad 爱\quad  我的\quad  狗}};
\draw [->,very thick,draw=black!70]([yshift=-0.5cm]left_Emb.south)--(left_Emb.south);
\draw [->,very thick,draw=black!70](left_Emb.north)--(left_cir.south);
\draw [->,very thick,draw=black!70](left_cir.north)--(left_Self.south);
\draw [->,very thick,draw=black!70](left_Self.north)--(left_Add_bottom.south);
\draw [->,very thick,draw=black!70](left_Add_bottom.north)--(left_Feed.south);
\draw [->,very thick,draw=black!70](left_Feed.north)--(left_Add_top.south);
\draw [->,very thick,draw=black!70]([yshift=0.35cm]left_cir.north)--([xshift=-2cm,yshift=0.35cm]left_cir.north)--([xshift=-0.5cm]left_Add_bottom.west)--(left_Add_bottom.west);
\draw [->,very thick,draw=black!70]([yshift=0.1cm]left_Add_bottom.north)--([xshift=-2cm,yshift=0.1cm]left_Add_bottom.north)--([xshift=-0.5cm]left_Add_top.west)--(left_Add_top.west);
\draw[->,very thick,draw=black!70,in=250,out=0] ([yshift=0.5cm]left_cir.north)to([xshift=0.9cm]left_Self.south);
\draw[->,very thick,draw=black!70,in=290,out=180] ([yshift=0.5cm]left_cir.north)to([xshift=-0.9cm]left_Self.south);
%middle
\node(Emb)[emb,right of = left_Emb,xshift=5cm]{\footnotesize{\textbf{Parser Embedding}}};
\node(cir)[circle,very thick,draw=black!70,minimum width=0.5cm,above of = Emb,yshift=1.1cm]{};
\draw[-,very thick,draw=black!70]([xshift=0.03cm]cir.west)--([xshift=-0.03cm]cir.east);
\draw[-,very thick,draw=black!70]([yshift=-0.03cm]cir.north)--([yshift=0.03cm]cir.south);
\node(cir2)[circle,very thick,minimum width=0.5cm,draw=black!70,right of = cir,xshift=1.5cm]{};
\draw[very thick,draw=black!70]([xshift=0.04cm]cir2.west)sin([xshift=0.14cm,yshift=0.08cm]cir2.west)cos([xshift=0.25cm]cir2.west)sin([xshift=0.36cm,yshift=-0.08cm]cir2.west)cos([xshift=-0.03cm]cir2.east);
\node(Self)[sa,above of = cir,yshift=1.6cm]{\textbf{Self-attention}};
\node(Add_bottom)[an,above of = Self,yshift=1.1cm]{\textbf{Add$\&\&$Norm}};
\node(ED_Self)[edsa,above of = Add_bottom,yshift=1.8cm]{\textbf{Encoder-Decoder}\\ \textbf{Attention}};
\node(Add_mid)[an,above of = ED_Self,yshift=1.35cm]{\textbf{Add$\&\&$Norm}};
\node(Feed)[ff,above of = Add_mid,yshift=1.2cm]{\textbf{Feed}\\ \textbf{Forward}};
\node(Add_top)[an,above of = Feed,yshift=1.1cm]{\textbf{Add$\&\&$Norm}};
\node(Linear)[linear,above of = Add_top,yshift=1.3cm]{\textbf{Linear}};
\node(Softmax)[softmax,above of = Linear,yshift=1cm]{\textbf{Softmax}};
\node(text_bottom)[below of = Emb,xshift=0.2cm,yshift=-1.2cm,scale=0.9]{\textbf{VP1\ \ VP3\ \  <eos>}};
\node(text_top)[above of = Softmax,xshift=0.2cm,yshift=1.2cm,scale=0.9]{\textbf{VP1\ \ VP3\ \  <eos>}};
\draw [->,very thick,draw=black!70]([yshift=-0.5cm]Emb.south)--(Emb.south);
\draw [->,very thick,draw=black!70]([xshift=0.9cm,yshift=-0.5cm]Emb.south)--([xshift=0.9cm]Emb.south);
\draw [->,very thick,draw=black!70]([xshift=-0.9cm,yshift=-0.5cm]Emb.south)--([xshift=-0.9cm]Emb.south);
\draw [->,very thick,draw=black!70](Emb.north)--(cir.south);
\draw [->,very thick,draw=black!70](cir.north)--(Self.south);
\draw [->,very thick,draw=black!70](cir2.west)--(cir.east);
\draw[->,very thick,draw=black!70,in=250,out=0] ([yshift=0.5cm]cir.north)to([xshift=0.9cm]Self.south);
\draw[->,very thick,draw=black!70,in=290,out=180] ([yshift=0.5cm]cir.north)to([xshift=-0.9cm]Self.south);
\draw [->,very thick,draw=black!70](Self.north)--(Add_bottom.south);
\draw [->,very thick,draw=black!70](ED_Self.north)--(Add_mid.south);
\draw [->,very thick,draw=black!70](Add_mid.north)--(Feed.south);
\draw [->,very thick,draw=black!70](Feed.north)--(Add_top.south);
\draw [->,very thick,draw=black!70](Add_top.north)--(Linear.south);
\draw [->,very thick,draw=black!70](Linear.north)--(Softmax.south);
\draw [->,very thick,draw=black!70](Softmax.north)--([yshift=0.5cm]Softmax.north);
\draw [->,very thick,draw=black!70]([yshift=0.35cm]cir.north)--([xshift=2cm,yshift=0.35cm]cir.north)--([xshift=0.5cm]Add_bottom.east)--(Add_bottom.east);
\draw [->,very thick,draw=black!70]([yshift=0.1cm]Add_mid.north)--([xshift=2cm,yshift=0.1cm]Add_mid.north)--([xshift=0.5cm]Add_top.east)--(Add_top.east);
\draw [->,very thick,draw=black!70](left_Add_top.north)--([yshift=0.6cm]left_Add_top.north)--([xshift=2.2cm,yshift=0.6cm]left_Add_top.north)--([xshift=2.2cm,yshift=-2cm]left_Add_top.north)--([xshift=5cm,yshift=-2cm]left_Add_top.north)--(ED_Self.south);
\draw [->,very thick,draw=black!70]([xshift=0.9cm,yshift=-0.45cm]ED_Self.south)--([xshift=0.9cm]ED_Self.south);
\draw [->,very thick,draw=black!70]([xshift=-0.9cm,yshift=-0.35cm]ED_Self.south)--([xshift=-0.9cm]ED_Self.south);
\draw [->,very thick,draw=black!70](Add_bottom.north)--([yshift=0.2cm]Add_bottom.north)--([xshift=2cm,yshift=0.2cm]Add_bottom.north)--([xshift=0.5cm]Add_mid.east)--(Add_mid.east);
%right
\node(right_Emb)[emb,right of = Emb,xshift=5.5cm]{\footnotesize{\textbf{Parser Embedding}}};
\node(right_cir)[circle,very thick,minimum width=0.5cm,draw=black!70,above of = right_Emb,yshift=1.1cm]{};
\draw[-,very thick,draw=black!70]([xshift=0.03cm]right_cir.west)--([xshift=-0.03cm]right_cir.east);
\draw[-,very thick,draw=black!70]([yshift=-0.03cm]right_cir.north)--([yshift=0.03cm]right_cir.south);
\node(right_cir2)[circle,very thick,minimum width=0.5cm,draw=black!70,right of = right_cir,xshift=1.5cm]{};
\draw[very thick,draw=black!70]([xshift=0.04cm]right_cir2.west)sin([xshift=0.14cm,yshift=0.08cm]right_cir2.west)cos([xshift=0.25cm]right_cir2.west)sin([xshift=0.36cm,yshift=-0.08cm]right_cir2.west)cos([xshift=-0.03cm]right_cir2.east);
\node(right_Self)[sa,above of = right_cir,yshift=1.6cm]{\textbf{Self-attention}};
\node(right_Add_bottom)[an,above of = right_Self,yshift=1.1cm]{\textbf{Add$\&\&$Norm}};
\node(right_ED_Self)[edsa,above of = right_Add_bottom,yshift=1.8cm]{\textbf{Encoder-Decoder}\\\textbf{Attention}};
\node(right_Add_mid)[an,above of = right_ED_Self,yshift=1.35cm]{\textbf{Add$\&\&$Norm}};
\node(right_Feed)[ff,above of = right_Add_mid,yshift=1.2cm]{\textbf{Feed}\\\textbf{Forward}};
\node(right_Add_top)[an,above of = right_Feed,yshift=1.1cm]{\textbf{Add$\&\&$Norm}};
\node(right_Linear)[linear,above of = right_Add_top,yshift=1.3cm]{\textbf{Linear}};
\node(right_Softmax)[softmax,above of = right_Linear,yshift=1cm]{\textbf{Softmax}};
\node(right_text_bottom)[below of = right_Emb,xshift=1.2cm,yshift=-1.2cm,scale=0.8]{\textbf{VP1 <Mask> VP3 <Mask> <Mask <Mask>}};
\node(right_text_top)[above of = right_Softmax,xshift=0cm,yshift=1.2cm,scale=0.9]{\textbf{VP1 I VP3 love my dog}};
\draw [->,very thick,draw=black!70]([yshift=-0.5cm]right_Emb.south)--(right_Emb.south);
\draw [->,very thick,draw=black!70](right_Emb.north)--(right_cir.south);
\draw [->,very thick,draw=black!70](right_cir.north)--(right_Self.south);
\draw [->,very thick,draw=black!70](right_cir2.west)--(right_cir.east);
\draw[->,very thick,draw=black!70,in=250,out=0] ([yshift=0.5cm]right_cir.north)to([xshift=0.9cm]right_Self.south);
\draw[->,very thick,draw=black!70,in=290,out=180] ([yshift=0.5cm]right_cir.north)to([xshift=-0.9cm]right_Self.south);
\draw [->,very thick,draw=black!70](right_Self.north)--(right_Add_bottom.south);
\draw [->,very thick,draw=black!70](right_ED_Self.north)--(right_Add_mid.south);
\draw [->,very thick,draw=black!70](right_Add_mid.north)--(right_Feed.south);
\draw [->,very thick,draw=black!70](right_Feed.north)--(right_Add_top.south);
\draw [->,very thick,draw=black!70](right_Add_top.north)--(right_Linear.south);
\draw [->,very thick,draw=black!70](right_Linear.north)--(right_Softmax.south);
\draw [->,very thick,draw=black!70](right_Softmax.north)--([yshift=0.5cm]right_Softmax.north);
\draw [->,very thick,draw=black!70]([yshift=0.35cm]right_cir.north)--([xshift=2cm,yshift=0.35cm]right_cir.north)--([xshift=0.5cm]right_Add_bottom.east)--(right_Add_bottom.east);
\draw [->,very thick,draw=black!70]([yshift=0.1cm]right_Add_mid.north)--([xshift=2cm,yshift=0.1cm]right_Add_mid.north)--([xshift=0.5cm]right_Add_top.east)--(right_Add_top.east);
\draw [->,very thick,draw=black!70]([xshift=0.9cm,yshift=-0.45cm]right_ED_Self.south)--([xshift=0.9cm]right_ED_Self.south);
\draw [->,very thick,draw=black!70]([xshift=-0.9cm,yshift=-0.35cm]right_ED_Self.south)--([xshift=-0.9cm]right_ED_Self.south);
\draw [-,very thick,dashed,draw=black!70]([xshift=2.2cm,yshift=0.6cm]left_Add_top.north)--([xshift=2.2cm,yshift=3.5cm]left_Add_top.north)--([xshift=8cm,yshift=3.5cm]left_Add_top.north)--([xshift=8cm,yshift=-2cm]left_Add_top.north);
\draw [->,very thick,draw=black!70](right_Add_bottom.north)--([yshift=0.2cm]right_Add_bottom.north)--([xshift=2cm,yshift=0.2cm]right_Add_bottom.north)--([xshift=0.5cm]right_Add_mid.east)--(right_Add_mid.east);
\draw [->,very thick,draw=black!70]([xshift=8cm,yshift=-2cm]left_Add_top.north)--([yshift=0.3cm]right_Add_bottom.north)--(right_ED_Self.south);
\draw [->,very thick,draw=black!70](Softmax.east)--([xshift=1.2cm]Softmax.east)--([xshift=1.2cm,yshift=-12.75cm]Softmax.east)--([xshift=2cm,yshift=-12.75cm]Softmax.east);
%module
\node(left_module)[rectangle,very thick,rounded corners,minimum width=4.3cm,minimum height=5.3cm,text centered,draw=black!70,above of = left_Emb,xshift=-0.25cm,yshift=4.1cm]{};
\node(module)[rectangle,very thick,rounded corners,minimum width=4.3cm,minimum height=8.4cm,text centered,draw=black!70,above of = Emb,xshift=0.25cm,yshift=5.65cm]{};
\node(right_module)[rectangle,very thick,rounded corners,minimum width=4.3cm,minimum height=8.4cm,text centered,draw=black!70,above of = right_Emb,xshift=0.25cm,yshift=5.65cm]{};
\node(N)[right of = right_ED_Self,xshift=3cm,scale=1.3]{\textbf{$N\times$}};
\node(left_N)[left of = left_Feed,xshift=-3cm,scale=1.3]{\textbf{$N\times$}};
\node(M)[left of = ED_Self,xshift=-2.3cm,scale=1.3]{\textbf{$M\times$}};
\end{tikzpicture}