\begin{tikzpicture}

\foreach \x in {1,2,3,4}
	\node[inner sep=0pt,minimum size=1em,fill=ublue,circle] (c1_\x) at (0em+1.6em*\x, 0em){};

\foreach \x in {1,2,3,4,5,6}
	\node[inner sep=0pt,minimum size=1em,fill=ublue,circle] (c2_\x) at (8.4em+1.6em*\x, 0em){};

\foreach \x in {1,2,3,4,5}
	\node[inner sep=0pt,minimum size=1em,fill=ublue,circle] (c3_\x) at (20em+1.6em*\x, 0em){};

\foreach \x in {1,2,3,4,5}
	\node[inner sep=0pt,minimum size=1em,fill=orange,circle] (c4_\x) at (20em+1.6em*\x, 9.4em){};

\node[inner sep=0pt,minimum size=1em,fill=ugreen,circle] (c5) at (9em, 7em){};
\node[inner sep=0pt,minimum size=1.2em,fill=ugreen,circle] (qs) at (18.6em, 5em){};
\node[inner sep=0pt,minimum size=1.2em,fill=ugreen,circle] (qw) at (18.6em, 3em){};

\node[fill=orange,inner sep=0pt, minimum size=1.2em, circle, text=white] (sigma) at (24.8em, 7em){\small\bfnew{$\sigma$}};

\node[fill=ugreen,inner sep=0pt, minimum size=1.2em, circle, text=white] (add1) at (4em, 3em){\small\bfnew{+}};
\node[fill=ugreen,inner sep=0pt, minimum size=1.2em, circle, text=white] (add2) at (14em, 3em){\small\bfnew{+}};
\node[fill=ugreen,inner sep=0pt, minimum size=1.2em, circle, text=white] (add3) at (9em, 5em){\small\bfnew{+}};
\begin{pgfonlayer}{background}
\node[draw,rounded corners=2pt,drop shadow,fill=white][fit=(c1_1)(c1_4)](box1){};
\node[draw,rounded corners=2pt,drop shadow,fill=white][fit=(c2_1)(c2_6)](box2){};
\node[draw,rounded corners=2pt,drop shadow,fill=white][fit=(c3_1)(c3_5)](box3){};
\node[draw,rounded corners=2pt,drop shadow,fill=white][fit=(c4_1)(c4_5)](box4){};
\node[draw,rounded corners=2pt,inner xsep=6pt,drop shadow,fill=white][fit=(c5)](box5){};
\end{pgfonlayer}

\node[draw,dash pattern=on 3pt off 1pt,minimum width=1.6em, minimum height=2em,very thick] (n1) at (24.8em,0em){};
\node[draw,dash pattern=on 3pt off 1pt,minimum width=1.6em, minimum height=2em,very thick] (n2) at (24.8em,9.4em){};
\node[] at (24.8em, -1.5em){$\mathbi{x}_\mathbi{t}$};
\node[text=ublue] at (8.2em, 0em) {\small\bfnew{...}};

\draw[-latex, out=70, in=-120] (c1_1.90) node[xshift=-0.4em,yshift=1.2em]{$ \mathbi{h}_ \mathbi{i}^ \mathbi{j}$}to (add1.-90);
\draw[-latex, out=80, in=-100] (c1_2.90) to (add1.-90);
\draw[-latex, out=100, in=-80] (c1_3.90) to (add1.-90);
\draw[-latex, out=110, in=-60] (c1_4.90) to (add1.-90);

\draw[-latex, out=60, in=-140] (c2_1.90) to (add2.-90);
\draw[-latex, out=70, in=-120] (c2_2.90) to (add2.-90);
\draw[-latex, out=80, in=-100] (c2_3.90) to (add2.-90);
\draw[-latex, out=100, in=-80] (c2_4.90) to (add2.-90);
\draw[-latex, out=110, in=-60] (c2_5.90) to (add2.-90);
\draw[-latex, out=120, in=-40] (c2_6.90) to (add2.-90);

\draw[-latex, out=20, in=-150] (add1.90) node[xshift=-0.4em,yshift=1.2em]{$ \mathbi{s}^ \mathbi{j}$} to (add3.-90);
\draw[-latex, out=160, in=-30] (add2.90) to (add3.-90);
\draw[-latex] (add3.90) -- (box5.-90);
\draw[-latex] (box5.0) -- node[xshift=-3em,above]{$ \mathbi{d}_\mathbi{t}$}(sigma.180);
\draw[-latex, ugreen!60] (qs.180) node[xshift=-1em,above,text=black]{$ \mathbi{q}_\mathbi{s}$}-- (add3.0);
\draw[-, ugreen!60] (qw.180) node[xshift=-1em,above,text=black]{$ \mathbi{q}_\mathbi{w}$}-- (add2.0);
\draw[-latex, ugreen!60] (add2.180) -- (add1.0);

\draw[-latex] (n1.130) -- (qw.0);
\draw[-latex] (n1.120) -- (qs.0);
\draw[-latex] (n1.90) node[yshift=1em,right]{$ \mathbi{h}_\mathbi{t}$}-- (sigma.-90);
\draw[-latex] (sigma.90) -- (n2.-90);
\draw[-latex] (n2.90) -- node[right]{$ \widetilde{\mathbi{h}}_\mathbi{t}$}([yshift=2em]n2.90);

\draw[decorate,decoration={brace, mirror},gray, thick] ([yshift=-2em]box1.-180) -- node[font=\scriptsize,text=black,below]{前几句}([yshift=-2em]box2.0);
\draw[decorate,decoration={brace, mirror},gray, thick] ([yshift=-2em]box3.-180) -- node[font=\scriptsize,text=black,below]{当前句}([yshift=-2em]box3.0);


%annotation
\node[fill=ublue,rounded corners=1pt,inner sep=0pt,minimum size=1em] (a1) at (2em,-4.5em) {};
\node[anchor=west,font=\footnotesize] (w1) at ([xshift=0.4em]a1.east) {编码表示};

\node[anchor=west,fill=ugreen,rounded corners=1pt,inner sep=0pt,minimum size=1em] (a2) at ([xshift=2em]w1.east) {};
\node[anchor=west,font=\footnotesize] (w2)at ([xshift=0.4em]a2.east) {层次注意力};

\node[anchor=west,fill=orange,rounded corners=1pt,inner sep=0pt,minimum size=1em] (a3) at ([xshift=2em]w2.east) {};
\node[anchor=west,font=\footnotesize] at ([xshift=0.4em]a3.east) {融合上下文信息的编码表示};
\end{tikzpicture}