\begin{tikzpicture}[node distance = 0,scale = 0.8] \tikzstyle{every node}=[scale=0.8] \foreach \x in {1,2,3,4} \node[draw,inner sep=0pt,minimum height=1em,minimum width=1.6em,fill=red!30,rounded corners=1pt] (c1_\x) at (0em+2em*\x, 0em){}; \foreach \x in {1,2,3} \node[draw,inner sep=0pt,minimum height=1em,minimum width=1.6em,fill=red!30,rounded corners=1pt] (c2_\x) at (11em+2em*\x, 0em){}; \foreach \x in {1,2,3,4,5} \node[draw,inner sep=0pt,minimum height=1em,minimum width=1.6em,fill=red!30,rounded corners=1pt] (c3_\x) at (18.4em+2em*\x, 0em){};,minimum width=1em \foreach \x in {1,2,3,4,5} \node[draw,inner sep=0pt,minimum height=1em,minimum width=1.6em,fill=blue!30,rounded corners=1pt] (c4_\x) at (18.4em+2em*\x, 10.4em){}; %\node[inner sep=0pt,minimum size=1em,fill=ugreen,circle] (c5) at (9em, 7em){}; \node[draw,inner sep=0pt,minimum size=1.2em,fill=green!20,circle] (qs) at (18.6em, 6.4em){}; \node[draw,inner sep=0pt,minimum size=1.2em,fill=green!20,circle] (qw) at (18.6em, 4.4em){}; \node[draw,thick,inner sep=0pt, minimum size=1.2em, circle] (sigma) at (24.4em, 8em){}; \draw[-,thick] (sigma.0) -- (sigma.180); \draw[-,thick] (sigma.90) -- (sigma.-90); \node[draw,fill=orange!30,inner sep=0pt, minimum size=1.2em, circle] (add1) at (5em, 3em){}; \node[draw,fill=orange!30,inner sep=0pt, minimum size=1.2em, circle] (add2) at (15em, 3em){}; \node[draw,fill=orange!30,inner sep=0pt, minimum size=1.2em, circle] (add3) at (10em, 5.2em){}; \begin{pgfonlayer}{background} \node[draw,rounded corners=2pt,drop shadow,fill=white, minimum width=8.3em][fit=(c1_1)(c1_4)](box1){}; \node[draw,rounded corners=2pt,drop shadow,fill=white,minimum width=6.4em][fit=(c2_1)(c2_3)](box2){}; \node[draw,rounded corners=2pt,drop shadow,fill=white,minimum width=10.5em][fit=(c3_1)(c3_5)](box3){}; \node[draw,rounded corners=2pt,drop shadow,fill=white,minimum width=10.3em][fit=(c4_1)(c4_5)](box4){}; %\node[draw,rounded corners=2pt,inner xsep=6pt,drop shadow,fill=white][fit=(c5)](box5){}; \end{pgfonlayer} \node[draw=violet,densely dotted,minimum width=1.9em, minimum height=2.1em,very thick] (n1) at (24.4em,0em){}; \node[draw=violet,densely dotted,minimum width=1.8em, minimum height=2em,very thick] (n2) at (24.4em,10.4em){}; \node[] at (24.4em, -1.5em){$\mathbi{x}_\mathbi{t}$}; \node[text=ublue] at (10.5em, 0em) {\small\bfnew{...}}; \draw[->,thick, out=70, in=-120] ([yshift=0.1em]c1_1.90) node[xshift=-0.4em,yshift=1.2em]{$ \mathbi{h}_ \mathbi{i}^ \mathbi{j}$}to ([yshift=-0.1em]add1.-90); \draw[->,thick, out=80, in=-100] ([yshift=0.1em]c1_2.90) to ([yshift=-0.1em]add1.-90); \draw[->,thick, out=100, in=-80] ([yshift=0.1em]c1_3.90) to ([yshift=-0.1em]add1.-90); \draw[->,thick, out=110, in=-60] ([yshift=0.1em]c1_4.90) to ([yshift=-0.1em]add1.-90); \draw[->,thick, out=70, in=-110] ([yshift=0.1em]c2_1.90) to ([yshift=-0.1em]add2.-90); \draw[->,thick, out=90, in=-90] ([yshift=0.1em]c2_2.90) to ([yshift=-0.1em]add2.-90); \draw[->,thick, out=110, in=-70] ([yshift=0.1em]c2_3.90) to ([yshift=-0.1em]add2.-90); \draw[->,thick, out=30, in=-130] ([yshift=0.1em]add1.90) node[xshift=-0.4em,yshift=1.1em]{$ \mathbi{s}^ \mathbi{j}$} to ([yshift=-0.1em]add3.-120); \draw[->,thick, out=150, in=-50] ([yshift=0.1em]add2.90) to ([yshift=-0.1em]add3.-70); \draw[->,thick, ugreen!60,out=160,in=-10] ([xshift=-0.1em]qs.160) node[xshift=-0.3em,yshift=0.1em,above,text=black]{$ \mathbi{q}_\mathbi{s}$} to ([xshift=0.1em]add3.0); \draw[->,thick, ugreen!60,out=180,in=0] ([xshift=-0.1em]qw.180) node[xshift=-0.3em,yshift=0.4em,above,text=black]{$ \mathbi{q}_\mathbi{w}$} to ([xshift=0.1em]add2.0); \draw[->,thick, ugreen!60,out=170,in=-10] ([xshift=-0.1em]qw.160) to ([xshift=0.1em]add1.0); \draw[->,thick] ([yshift=0.1em]n1.135) .. controls ([xshift=-2em]n1.130) and ([xshift=2em]qw.0) .. ([xshift=0.1em]qw.0); \draw[->,thick] ([yshift=0.1em]n1.120) .. controls ([xshift=-2em,yshift=1em]n1.120) and ([xshift=3em]qs.0) .. ([xshift=0.1em]qs.0); \draw[->,thick] ([yshift=0.1em]n1.90) node[yshift=1em,right]{$ \mathbi{h}_\mathbi{t}$}-- ([yshift=-0.1em]sigma.-90); \draw[->,thick] ([yshift=0.1em]sigma.90) -- ([yshift=-0.1em]n2.-90); \draw[->,thick] ([yshift=0.1em]n2.90) -- node[right]{$ \widetilde{\mathbi{h}}_\mathbi{t}$}([yshift=2em]n2.90); \draw[decorate,decoration={brace, mirror},gray, thick] ([yshift=-2em]box1.-180) -- node[font=\scriptsize,text=black,below]{前几句}([yshift=-2em]box2.0); \draw[decorate,decoration={brace, mirror},gray, thick] ([yshift=-2em]box3.-180) -- node[font=\scriptsize,text=black,below]{当前句}([yshift=-2em]box3.0); \draw[->, thick, rounded corners=2pt] ([yshift=0.1em]add3.90) -- ([yshift=2.1em]add3.90) -- ([xshift=-0.1em]sigma.180); %annotation \node[fill=red!30,rounded corners=1pt,inner sep=0pt,minimum size=1em] (a1) at (2em,-4.5em) {}; \node[anchor=west,font=\footnotesize] (w1) at ([xshift=0.4em]a1.east) {编码表示}; \node[anchor=west,fill=orange!30,rounded corners=1pt,inner sep=0pt,minimum size=1em] (a2) at ([xshift=2em]w1.east) {}; \node[anchor=west,font=\footnotesize] (w2)at ([xshift=0.4em]a2.east) {层次注意力}; \node[anchor=west,fill=blue!30,rounded corners=1pt,inner sep=0pt,minimum size=1em] (a3) at ([xshift=2em]w2.east) {}; \node[anchor=west,font=\footnotesize] at ([xshift=0.4em]a3.east) {融合上下文信息的编码表示}; \end{tikzpicture}