\node(encoder)[coder, above of = x,yshift=4em]{{编码器}};
\node(decoder_left)[coder, above of = encoder, yshift=6em,fill=blue!25]{{解码器}};
\node(encoder)[coder, above of = x,yshift=4em]{\large{编码器}};
\node(decoder_left)[coder, above of = encoder, yshift=6em,fill=blue!20]{\large{解码器}};
\node(y_hat)[above of = decoder_left, yshift=4em]{{$y$}};
\node(y)[above of = decoder_left, xshift=-6em]{{$y_{<}$}};
\node(decoder_right)[coder, above of = encoder, xshift=11em,fill=yellow!25]{{解码器}};
\node(decoder_right)[coder, above of = encoder, xshift=11em,fill=yellow!20]{\large{解码器}};
\node(figure)[draw=white,above of = decoder_right,yshift=6.5em,scale=0.25] {\includegraphics[width=0.62\textwidth]{./Chapter17/Figures/figure-bank-without-attention.jpg}};
\node [anchor=south,scale=1.2] (node1) at ([xshift=-2.5em,yshift=4.5em]y.north) {\small{$x$:源语言文本数据}};
\node [anchor=north,scale=1.2] (node2) at ([xshift=0.57em]node1.south){\small{$y$:目标语言文本数据}};
\node [anchor=south,scale=1.2] (node1) at ([xshift=-2.5em,yshift=4.5em]y.north) {{$x$:源语言文本数据}};
\node [anchor=north,scale=1.2] (node2) at ([xshift=0.57em]node1.south){{$y$:目标语言文本数据}};
\node[draw=white,scale=0.6] (input) at (0,0){\includegraphics[width=0.62\textwidth]{./Chapter17/Figures/figure-bank-without-attention.jpg}};(1.9,-1.4);
\node[anchor=west] (label1) at ([xshift=-3.5em]input.west) {\begin{tabular}{l}{\normalsize{图片:}}\end{tabular}};
\node[anchor=south] (label2) at ([yshift=-6em]label1.south) {\begin{tabular}{l}{\normalsize{源文:}}\end{tabular}};
\node[anchor=south] (english1) at ([xshift=-0.35em,yshift=-2.3em]input.south) {\begin{tabular}{l}{\large{A\; girl\; jumps\; off\; a\;{\red{\underline{bank}}}\quad .}}\end{tabular}};
\node[anchor=south] (label2) at ([yshift=-6em]label1.south) {\begin{tabular}{l}{\normalsize{源语言:}}\end{tabular}};
\node[anchor=south] (english1) at ([xshift=-0.28em,yshift=-2.3em]input.south) {\begin{tabular}{l}{\large{A\; girl\; jumps\; off\; a\;{\red{\underline{bank}}}\quad .}}\end{tabular}};
\draw[->,very thick]([xshift=-1.4em]trans.west) to (trans.west);
\draw[->,very thick](trans.east) to ([xshift=1.4em]trans.east);
\node[anchor=east] (de1) at ([xshift=4.5cm,yshift=-0.1em]trans.east) {\begin{tabular}{l}{\normalsize{译文:}}{\normalsize{一个/女孩/从/{\red{河床}}/}}\end{tabular}};
\node[anchor=south] (de2) at ([xshift=-0em,yshift=-1.5em]de1.south) {\begin{tabular}{l}{\normalsize{上/跳下来/。}}\end{tabular}};
\draw[->,thick]([xshift=-1.4em]trans.west) to (trans.west);
\node[anchor=east] (de1) at ([xshift=5.2cm,yshift=-0.1em]trans.east) {\begin{tabular}{l}{\normalsize{目标语言:}}{\normalsize{一个/女孩/从/{\red{河床}}/}}\end{tabular}};
\node[anchor=south] (de2) at ([xshift=1.1em,yshift=-1.5em]de1.south) {\begin{tabular}{l}{\normalsize{上/跳下来/。}}\end{tabular}};
\draw[->,thick](trans.east) to ([xshift=0.5em,yshift=0.1em]de1.west);
\parinterval 在得到${\bm\pi}^K=\frac{\partial L}{\partial{\mathbi{s}}^K}$之后,下一步的目标是:1)计算损失函数$ L $相对于第$ K-1$层与输出层之间连接权重${\mathbi{W}}^K $的梯度;2)计算损失函数$ L $相对于神经网络网络第$ K-1$层输出结果${\mathbi{h}}^{K-1}$的梯度。这部分内容如图\ref{fig:9-55}所示。
\parinterval 在得到${\bm\pi}^K=\frac{\partial L}{\partial{\mathbi{s}}^K}$之后,下一步的目标是:1)计算损失函数$ L $相对于第$ K-1$层与输出层之间连接权重${\mathbi{W}}^K $的梯度;2)计算损失函数$ L $相对于神经网络第$ K-1$层输出结果${\mathbi{h}}^{K-1}$的梯度。这部分内容如图\ref{fig:9-55}所示。