Commit 5e63b9ba by 曹润柘

合并分支 'caorunzhe' 到 'master'

Caorunzhe

查看合并请求 !34
parents 3ec71235 a41c6a4f
%------------------------------------------------------------------------
\begin{tikzpicture}
\setlength{\base}{0.9cm}
\tikzstyle{rnnnode} = [rounded corners=1pt,minimum height=0.5\base,minimum width=1\base,draw,inner sep=0pt,outer sep=0pt]
\tikzstyle{wordnode} = [font=\tiny]
% RNN translation model
\begin{scope}[local bounding box=RNNMT]
% RNN Encoder
\coordinate (eemb0) at (0,0);
\foreach \x [count=\y from 0] in {1,2,...,3}
\node[rnnnode,minimum height=0.5\base,fill=green!30!white,anchor=west] (eemb\x) at ([xshift=0.4\base]eemb\y.east) {\tiny{$e_x()$}};
\foreach \x in {1,2,...,3}
\node[rnnnode,fill=blue!30!white,anchor=south] (enc\x) at ([yshift=0.3\base]eemb\x.north) {};
\node[] (enclabel1) at (enc1) {\tiny{$\textbf{h}_{m-2}$}};
\node[] (enclabel2) at (enc2) {\tiny{$\textbf{h}_{m-1}$}};
\node[rnnnode,fill=purple!30!white] (enclabel3) at (enc3) {\tiny{$\textbf{h}_{m}$}};
\node[wordnode,left=0.4\base of enc1] (init1) {$\cdots$};
\node[wordnode,left=0.4\base of eemb1] (init2) {$\cdots$};
\node[wordnode,below=0pt of eemb1] () {};
\node[wordnode,below=0pt of eemb2] () {};
\node[wordnode,below=0pt of eemb3] () {$\langle$eos$\rangle$};
% RNN Decoder
\foreach \x in {1,2,...,3}
\node[rnnnode,minimum height=0.5\base,fill=green!30!white,anchor=south] (demb\x) at ([yshift=\base]enc\x.north) {\tiny{$e_y()$}};
\foreach \x in {1,2,...,3}
\node[rnnnode,fill=blue!30!white,anchor=south] (dec\x) at ([yshift=0.3\base]demb\x.north) {{\tiny{$\textbf{s}_\x$}}};
\foreach \x in {1,2,...,3}
\node[rnnnode,minimum height=0.5\base,fill=red!30!white,anchor=south] (softmax\x) at ([yshift=0.3\base]dec\x.north) {\tiny{Softmax}};
\node[wordnode,right=0.4\base of demb3] (end1) {$\cdots$};
\node[wordnode,right=0.4\base of dec3] (end2) {$\cdots$};
\node[wordnode,right=0.4\base of softmax3] (end3) {$\cdots$};
% Decoder input words
\node[wordnode,below=0pt of demb1] (decwordin) {$\langle$sos$\rangle$};
\ExtractX{$(demb2.south)$}
\ExtractY{$(decwordin.base)$}
\node[wordnode,anchor=base] () at (\XCoord,\YCoord) {Do};
\ExtractX{$(demb3.south)$}
\ExtractY{$(decwordin.base)$}
\node[wordnode,anchor=base] () at (\XCoord,\YCoord) {you};
% Decoder output words
\node[wordnode,above=0pt of softmax1] (decwordout) {Do};
\ExtractX{$(softmax2.north)$}
\ExtractY{$(decwordout.base)$}
\node[wordnode,anchor=base] () at (\XCoord,\YCoord) {you};
\ExtractX{$(softmax3.north)$}
\ExtractY{$(decwordout.base)$}
\node[wordnode,anchor=base] () at (\XCoord,\YCoord) {know};
% Connections
\draw[-latex'] (init1.east) to (enc1.west);
\draw[-latex'] (dec3.east) to (end2.west);
\foreach \x in {1,2,...,3}
\draw[-latex'] (eemb\x) to (enc\x);
\foreach \x in {1,2,...,3}
\draw[-latex'] (demb\x) to (dec\x);
\foreach \x in {1,2,...,3}
\draw[-latex'] (dec\x.north) to (softmax\x.south);
\foreach \x [count=\y from 2] in {1,2}
{
\draw[-latex'] (enc\x.east) to (enc\y.west);
\draw[-latex'] (dec\x.east) to (dec\y.west);
}
\coordinate (bridge) at ([yshift=0.4\base]enc2.north west);
\draw[-latex'] (enc3.north) .. controls +(north:0.3\base) and +(east:\base) .. (bridge) .. controls +(west:2.7\base) and +(west:0.3\base) .. (dec1.west);
{
\node [anchor=east] (line1) at ([xshift=-3em,yshift=0.5em]softmax1.west) {\scriptsize{基于RNN的隐层状态$\textbf{s}_i$}};
\node [anchor=north west] (line2) at ([yshift=0.3em]line1.south west) {\scriptsize{预测目标词的概率}};
\node [anchor=north west] (line3) at ([yshift=0.3em]line2.south west) {\scriptsize{通常,用Softmax函数}};
\node [anchor=north west] (line4) at ([yshift=0.3em]line3.south west) {\scriptsize{实现 $\textrm{P}(y_i|...)$}};
}
{
\node [anchor=north west] (line11) at ([yshift=-1.8em]line4.west) {\scriptsize{每个词的one-hot}};
\node [anchor=north west] (line12) at ([yshift=0.3em]line11.south west) {\scriptsize{离散化表示都被转化为}};
\node [anchor=north west] (line13) at ([yshift=0.3em]line12.south west) {\scriptsize{实数向量,即词嵌入}};
\node [anchor=north west] (line14) at ([yshift=0.3em]line13.south west) {\scriptsize{($e_x()$$e_y()$函数)}};
}
{
\node [anchor=west] (line21) at ([xshift=1.3em,yshift=1.5em]enc3.east) {\scriptsize{源语编码器最后一个}};
\node [anchor=north west] (line22) at ([yshift=0.3em]line21.south west) {\scriptsize{循环单元的输出被}};
\node [anchor=north west] (line23) at ([yshift=0.3em]line22.south west) {\scriptsize{看作是句子的表示,}};
\node [anchor=north west] (line24) at ([yshift=0.3em]line23.south west) {\scriptsize{记为$\textbf{C}$}};
}
\begin{pgfonlayer}{background}
{
\node [rectangle,inner sep=0.2em,rounded corners=1pt,fill=red!10,drop shadow,draw=red] [fit = (line1) (line2) (line3) (line4)] (box1) {};
\node [rectangle,inner sep=0.2em,rounded corners=1pt,very thick,dotted,draw=red] [fit = (softmax1) (softmax2) (softmax3)] (box4) {};
\draw [->,dotted,very thick,red] ([yshift=1em,xshift=2.5em]box1.east) -- ([yshift=1em,xshift=0.1em]box1.east);
}
{
\node [rectangle,inner sep=0.2em,rounded corners=1pt,fill=green!10,drop shadow,draw=ugreen] [fit = (line11) (line12) (line13) (line14)] (box2) {};
\node [rectangle,inner sep=0.2em,rounded corners=1pt,very thick,dotted,draw=ugreen] [fit = (eemb1) (eemb2) (eemb3)] (box5) {};
\node [rectangle,inner sep=0.2em,rounded corners=1pt,very thick,dotted,draw=ugreen] [fit = (demb1) (demb2) (demb3)] (box6) {};
\draw [->,dotted,very thick,ugreen] ([yshift=-1.3em,xshift=2.5em]box2.east) -- ([yshift=-1.3em,xshift=0.1em]box2.east);
\draw [->,dotted,very thick,ugreen] ([xshift=0.1em]box6.west) .. controls +(west:1) and +(east:1) .. ([yshift=1.0em]box2.east) ;
}
{
\node [rectangle,inner sep=0.2em,rounded corners=1pt,fill=purple!10,drop shadow,draw=purple] [fit = (line21) (line22) (line23) (line24)] (box3) {};
\node [rectangle,inner sep=0.2em,rounded corners=1pt,very thick,dotted,draw=purple] [fit = (enc3)] (box7) {};
\draw [->,dotted,very thick,purple] ([xshift=0.1em]box7.east) -- ([xshift=0.8em]box7.east) ;
}
\end{pgfonlayer}
\end{scope}
\end{tikzpicture}
%%% outline
%-------------------------------------------------------------------------
\begin{tikzpicture}
%第一段----------------------------------------------
%原文-------------
\node [pos=0.4,left,xshift=-36em,yshift=7em,font=\small] (original0) {\quad 源语(中文)输入:};
\node [pos=0.4,left,xshift=-22em,yshift=7em,font=\small] (original1) {
\begin{tabular}[t]{l}
\parbox{14em}{``我''、``很''、``好''、``<eos>'' }
\end{tabular}
};
%译文1--------------mt1
\node[font=\small] (mt1) at ([xshift=0em,yshift=-1em]original0.south) {目标语(英文)输出:};
\node[font=\small] (ts1) at ([xshift=0em,yshift=-1em]original1.south) {
\begin{tabular}[t]{l}
\parbox{14em}{``I''、``am''、``fine''、``<eos>''}
\end{tabular}
};
\begin{pgfonlayer}{background}
{
\node[rectangle,draw=ublue, inner sep=0mm] [fit =(original0)(mt1)(ts1)(ts1)(original1)(original1)] {};
}
\end{pgfonlayer}
\end{tikzpicture}
\ No newline at end of file
\begin{tikzpicture}
\setlength{\base}{1cm}
\tikzstyle{rnnnode} = [rounded corners=1pt,minimum size=0.5\base,draw,inner sep=0pt,outer sep=0pt]
\tikzstyle{wordnode} = [font=\footnotesize]
% RNN translation model
\begin{scope}[local bounding box=RNNMT]
% RNN Encoder
\coordinate (eemb0) at (0,0);
\foreach \x [count=\y from 0] in {1,2,...,4}
\node[rnnnode,minimum height=0.5\base,fill=green!30!white,anchor=west] (eemb\x) at ([xshift=0.4\base]eemb\y.east) {};
\foreach \x in {1,2,...,4}
\node[rnnnode,fill=blue!30!white,anchor=south] (enc\x) at ([yshift=0.5\base]eemb\x.north) {};
\node[wordnode,left=0.4\base of enc1,font=\footnotesize] (init) {0};
\node[wordnode,anchor=east] (init2) at ([xshift=-3.0em]init.west){};
{
\node[rnnnode,fill=purple] (repr) at (enc4) {};
\node[wordnode] (label) at ([yshift=2.5em]enc4.north) {
\begin{tabular}{c}
源语言句\\子表示
\end{tabular}
};
\draw[->,dashed,thick] (label.south) -- (enc4.north);
}
\node[wordnode,below=0pt of eemb1,font=\footnotesize] (encwordin1) {};
\node[wordnode,below=0pt of eemb2,font=\footnotesize] (encwordin2) {};
\node[wordnode,below=0pt of eemb3,font=\footnotesize] (encwordin3) {};
\node[wordnode,below=0pt of eemb4,font=\footnotesize] (encwordin4) {$\langle$eos$\rangle$};
%大括号
\draw[decorate,thick,decoration={mirror,brace}]([xshift=0.0em,yshift=-1.5em]eemb1.south west) --([xshift=0.0em,yshift=-1.5em]eemb4.south east) node [font=\footnotesize,xshift=-3.8em,yshift=-1.0em,align=center](label2) {编码器};
% RNN Decoder
\foreach \x in {1,2,...,4}
\node[rnnnode,minimum height=0.5\base,fill=green!30!white,anchor=south] (demb\x) at ([xshift=9.5em,yshift=-3.9em]enc\x.north) {};
\foreach \x in {1,2,...,4}
\node[rnnnode,fill=blue!30!white,anchor=south] (dec\x) at ([yshift=0.5\base]demb\x.north) {};
\foreach \x in {1,2,...,4}
\node[rnnnode,minimum height=0.5\base,fill=red!30!white,anchor=south] (softmax\x) at ([yshift=0.5\base]dec\x.north) {};
% Decoder input words
\node[wordnode,below=0pt of demb1,font=\footnotesize] (decwordin) {$\langle$sos$\rangle$};
\ExtractX{$(demb2.south)$}
\ExtractY{$(decwordin.base)$}
\node[wordnode,anchor=base,font=\footnotesize] () at (\XCoord,\YCoord) {I};
\ExtractX{$(demb3.south)$}
\ExtractY{$(decwordin.base)$}
\node[wordnode,anchor=base,font=\footnotesize] () at (\XCoord,\YCoord) {am};
\ExtractX{$(demb4.south)$}
\ExtractY{$(decwordin.base)$}
\node[wordnode,anchor=base,font=\footnotesize] () at (\XCoord,\YCoord) {fine};
% Decoder output words
\node[wordnode,above=0pt of softmax1,font=\footnotesize] (decwordout1) {I};
\ExtractX{$(softmax2.north)$}
\ExtractY{$(decwordout1.base)$}
\node[wordnode,anchor=base,font=\footnotesize] (decwordout2) at (\XCoord,\YCoord) {am};
\ExtractX{$(softmax3.north)$}
\ExtractY{$(decwordout1.base)$}
\node[wordnode,anchor=base,font=\footnotesize] (decwordout3) at (\XCoord,\YCoord) {fine};
\ExtractX{$(softmax4.north)$}
\ExtractY{$(decwordout1.base)$}
\node[wordnode,anchor=base,font=\footnotesize] (decwordout4) at (\XCoord,\YCoord) {$\langle$eos$\rangle$};
%大括号
\draw[decorate,thick,decoration={brace}]([xshift=0.0em,yshift=1.3em]softmax1.north west) --([xshift=0.0em,yshift=1.3em]softmax4.north east) node [font=\footnotesize,xshift=-3.8em,yshift=1.0em,align=center](label1) {解码器};
% Connections
\draw[-latex'] (init.east) to (enc1.west);
\foreach \x in {1,2,...,4}
\draw[-latex'] (eemb\x) to (enc\x);
\foreach \x in {1,2,...,4}
\draw[-latex'] (demb\x) to (dec\x);
\foreach \x in {1,2,...,4}
\draw[-latex'] (dec\x.north) to ([yshift=0.5\base]dec\x.north);
\foreach \x [count=\y from 2] in {1,2,...,3}
{
\draw[-latex'] (enc\x.east) to (enc\y.west);
\draw[-latex'] (dec\x.east) to (dec\y.west);
}
\coordinate (bridge) at ([yshift=-1.15\base]demb2);
\draw[-latex'] (enc4.east) -- (dec1.west);
\node[] at ([xshift=1.5cm]dec4.east){};
\end{scope}
\end{tikzpicture}
\ No newline at end of file
%
%---------------------------------------
\begin{tikzpicture}
\begin{scope}
%\newlength{\mystep}
%\setlength{\mystep}{1.6em}
\foreach \x in {1,2,...,6}
\node[] (s\x) at (\x * 1.6em,0) {};
\node [] (ws1) at (s1) {\scriptsize{}};
\node [] (ws2) at (s2) {\scriptsize{}};
\node [] (ws3) at (s3) {\scriptsize{}};
\node [] (ws4) at (s4) {\scriptsize{很长}};
\node [] (ws5) at (s5) {\scriptsize{}};
\node [] (ws6) at (s6) {\scriptsize{句子}};
\foreach \x in {1,2,...,6}
\node[] (t\x) at (\x * 1.6em + 2.4in,0) {};
\node [] (wt1) at (t1) {\scriptsize{This}};
\node [] (wt2) at (t2) {\scriptsize{is}};
\node [] (wt3) at ([yshift=-1pt]t3) {\scriptsize{a}};
\node [] (wt4) at ([yshift=-0.1em]t4) {\scriptsize{very}};
\node [] (wt5) at (t5) {\scriptsize{long}};
\node [] (wt6) at ([xshift=1em]t6) {\scriptsize{sentence}};
\node [anchor=south west,fill=red!30,minimum width=1.6in,minimum height=1.5em] (encoder) at ([yshift=1.0em]ws1.north west) {\footnotesize{Encoder}};
\node [anchor=west,fill=blue!30,minimum width=1.9in,minimum height=1.5em] (decoder) at ([xshift=4.5em]encoder.east) {\footnotesize{Decoder}};
\node [anchor=west,fill=green!30,minimum height=1.5em] (representation) at ([xshift=1em]encoder.east) {\footnotesize{表示}};
\draw [->,thick] ([xshift=1pt]encoder.east)--([xshift=-1pt]representation.west);
\draw [->,thick] ([xshift=1pt]representation.east)--([xshift=-1pt]decoder.west);
\foreach \x in {1,2,...,6}
\draw[->] ([yshift=0.1em]s\x.north) -- ([yshift=1.2em]s\x.north);
\foreach \x in {1,2,...,5}
\draw[<-] ([yshift=0.1em]t\x.north) -- ([yshift=1.2em]t\x.north);
\draw[<-] ([yshift=0.1em,xshift=1em]t6.north) -- ([yshift=1.2em,xshift=1em]t6.north);
{
\draw [<->,ublue,thick] ([xshift=0.3em]ws4.south) .. controls +(-60:1) and +(south:1) .. (wt4.south);
\draw [<->,ublue,thick] (ws4.south) .. controls +(south:1.0) and +(south:1.5) .. (wt5.south);
}
{
\node [anchor=north,fill=green!30] (attentionlabel) at ([yshift=-3.4em]representation.south) {\footnotesize{词语的关注度}};
\draw [->,dotted,very thick,ublue] ([yshift=0.1em]attentionlabel.north)--([yshift=-0.1em]representation.south);
}
\end{scope}
\end{tikzpicture}
\ No newline at end of file
\begin{tikzpicture}
\begin{scope}
\tikzstyle{lnode} = [minimum height=2em,minimum width=8em,inner sep=3pt,rounded corners=2pt,draw,fill=red!20];
\tikzstyle{standard} = [rounded corners=3pt]
\node [lnode,anchor=west] (l1) at (0,0) {编码器};
\node [lnode,anchor=west,fill=blue!20] (l2) at ([xshift=3em]l1.east) {解码器};
\node [anchor=north] (inputs) at ([xshift=-1.5em,yshift=-1em]l1.south) {Inputs: 五 星 红 旗};
\node [anchor=south] (outputs) at ([xshift=-3.5em,yshift=2em]l2.north) {Outputs: {\color{red}}云深处小蓬莱\ \ {\color{red}}斗阑干次第开};
\node [anchor=south] (outputs1) at ([xshift=-1.5em,yshift=1em]l2.north) {{\color{red}}旆壁幢春色里\ \ {\color{red}}亭鼓吹乐声来};
\draw [->,very thick] ([yshift=-1em]l1.south) -- ([yshift=-0.1em]l1.south);
\draw [->,very thick] ([yshift=0.1em]l2.north) -- ([yshift=1em]l2.north);
\draw [->,very thick] ([xshift=0.1em]l1.east) -- ([xshift=-0.1em]l2.west);
\node [rectangle,inner sep=0.5em,rounded corners=1pt,very thick,dotted,draw=ugreen!80] [fit = (l1) (l2)] (box0) {};
\end{scope}
\end{tikzpicture}
\ No newline at end of file
\begin{tikzpicture}
\begin{scope}
\tikzstyle{lnode} = [minimum height=2.5em,minimum width=12em,inner sep=3pt,rounded corners=2pt,draw=red!75!black,fill=red!5];
\tikzstyle{rnode} = [minimum height=2.5em,minimum width=12em,inner sep=3pt,rounded corners=2pt,draw=blue!75!black,fill=blue!5];
\tikzstyle{standard} = [rounded corners=3pt]
\node [lnode,anchor=west] (l1) at (0,0) {上联:翠竹千支歌盛世};
\node [rnode,anchor=west] (l2) at ([xshift=1em]l1.east) {下联:红梅万点报新春};
\node [lnode,anchor=north] (l3) at ([yshift=-0.8em]l1.south) {上联:一帆风顺年年好};
\node [rnode,anchor=west] (l4) at ([xshift=1em]l3.east) {下联:万事如意步步高};
\node [lnode,anchor=north] (l5) at ([yshift=-0.8em]l3.south) {上联:佳节迎春春生笑脸};
\node [rnode,anchor=west] (l6) at ([xshift=1em]l5.east) {下联:新年纳福富华满堂};
\node [lnode,anchor=north] (l7) at ([yshift=-0.8em]l5.south) {上联:腊梅吐芳迎红日};
\node [rnode,anchor=west] (l8) at ([xshift=1em]l7.east) {下联:绿柳展枝舞春风};
\node [lnode,anchor=north] (l9) at ([yshift=-0.8em]l7.south) {上联:雪兆丰年丛岭翠};
\node [rnode,anchor=west] (l10) at ([xshift=1em]l9.east) {下联:春回大地满园红};
\end{scope}
\end{tikzpicture}
\ No newline at end of file
%-----------------------------------------
\begin{tikzpicture}
\begin{scope}
\tikzstyle{rnnnode} = [minimum height=1.1em,minimum width=3.5em,inner sep=2pt,rounded corners=1pt,draw,fill=red!20];
\tikzstyle{wnode} = [minimum height=1.0em,minimum width=3.5em,inner sep=2pt,rounded corners=1pt,draw,fill=white];
{
\node [rnnnode,anchor=west,fill=green!20] (t1) at (0,0) {\scriptsize{$e_y()$}};
}
{
\node [rnnnode,anchor=west,fill=green!20] (t2) at ([xshift=2.2em]t1.east) {\scriptsize{$e_y()$ ($\times 3$)}};
}
{
\node [rnnnode,anchor=west,fill=green!20] (t3) at ([xshift=2.2em]t2.east) {\scriptsize{$e_y()$ ($\times 3$)}};
\node [anchor=west,inner sep=2pt] (t4) at ([xshift=0.3em]t3.east) {\scriptsize{...}};
}
{
\node [rnnnode,anchor=south] (s1) at ([yshift=1em]t1.north) {\scriptsize{$\textbf{s}_1$}};
}
{
\node [rnnnode,anchor=south] (s2) at ([yshift=1em]t2.north) {\scriptsize{$\textbf{s}_2$ ($\times 3$)}};
}
{
\node [rnnnode,anchor=south] (s3) at ([yshift=1em]t3.north) {\scriptsize{$\textbf{s}_3$ ($\times 3$)}};
\node [anchor=west,inner sep=2pt] (s4) at ([xshift=0.3em]s3.east) {\scriptsize{...}};
}
{
\node [rnnnode,anchor=south,fill=blue!20] (o1) at ([yshift=1em]s1.north) {\scriptsize{Softmax}};
}
{
\node [rnnnode,anchor=south,fill=blue!20] (o2) at ([yshift=1em]s2.north) {\scriptsize{Softmax ($\times 3$)}};
}
{
\node [rnnnode,anchor=south,fill=blue!20] (o3) at ([yshift=1em]s3.north) {\scriptsize{Softmax ($\times 3$)}};
\node [anchor=west,inner sep=2pt] (o4) at ([xshift=0.3em]o3.east) {\scriptsize{...}};
}
\node [wnode,anchor=north] (wt1) at ([yshift=-0.8em]t1.south) {\scriptsize{$\langle$sos$\rangle$}};
{
\node [wnode,anchor=north] (wt2) at ([yshift=-0.8em]t2.south) {\scriptsize{}};
\node [wnode,anchor=north] (wt2copy1) at ([xshift=-0.2em,yshift=-0.2em]wt2.north) {\scriptsize{}};
\node [wnode,anchor=north] (wt2copy2) at ([xshift=-0.4em,yshift=-0.4em]wt2.north) {\scriptsize{Have}};
}
{
\node [wnode,anchor=north,inner sep=2pt] (wt3) at ([yshift=-0.8em]t3.south) {\scriptsize{}};
\node [wnode,anchor=north] (wt3copy1) at ([xshift=-0.2em,yshift=-0.2em]wt3.north) {\scriptsize{}};
\node [wnode,anchor=north] (wt3copy2) at ([xshift=-0.4em,yshift=-0.4em]wt3.north) {\scriptsize{you}};
}
{
\node [wnode,anchor=center,inner sep=2pt] (wo1) at ([xshift=0.4em,yshift=1.8em]o1.north) {\scriptsize{}};
\node [wnode,anchor=north] (wo1copy1) at ([xshift=-0.2em,yshift=-0.2em]wo1.north) {\scriptsize{}};
\node [wnode,anchor=north] (wo1copy2) at ([xshift=-0.4em,yshift=-0.4em]wo1.north) {\scriptsize{Have}};
}
{
\node [wnode,anchor=center,inner sep=2pt] (wo2) at ([xshift=0.4em,yshift=1.8em]o2.north) {\scriptsize{}};
\node [wnode,anchor=north] (wo2copy1) at ([xshift=-0.2em,yshift=-0.2em]wo2.north) {\scriptsize{}};
\node [wnode,anchor=north] (wo2copy2) at ([xshift=-0.4em,yshift=-0.4em]wo2.north) {\scriptsize{you}};
}
{
\node [wnode,anchor=center,inner sep=2pt] (wo3) at ([xshift=0.4em,yshift=1.8em]o3.north) {\scriptsize{}};
\node [wnode,anchor=north] (wo3copy1) at ([xshift=-0.2em,yshift=-0.2em]wo3.north) {\scriptsize{}};
\node [wnode,anchor=north] (wo3copy2) at ([xshift=-0.4em,yshift=-0.4em]wo3.north) {\scriptsize{learned}};
}
{
\foreach \x in {1}{
\draw [->] ([yshift=-0.7em]t\x.south) -- ([yshift=-0.1em]t\x.south);
\draw [->] ([yshift=0.1em]t\x.north) -- ([yshift=-0.1em]s\x.south);
\draw [->] ([yshift=0.1em]s\x.north) -- ([yshift=-0.1em]o\x.south);
}
}
{
\draw [->] ([yshift=0.1em]o1.north) -- ([yshift=0.8em]o1.north) node [pos=0.5,right] {\scriptsize{top-3}};
}
{
\foreach \x in {2}{
\draw [->] ([yshift=-0.7em]t\x.south) -- ([yshift=-0.1em]t\x.south);
\draw [->] ([yshift=0.1em]t\x.north) -- ([yshift=-0.1em]s\x.south);
\draw [->] ([yshift=0.1em]s\x.north) -- ([yshift=-0.1em]o\x.south);
\draw [->] ([yshift=0.1em]o\x.north) -- ([yshift=0.8em]o\x.north) node [pos=0.5,right] {\scriptsize{top-3}};
}
}
{
\foreach \x in {3}{
\draw [->] ([yshift=-0.7em]t\x.south) -- ([yshift=-0.1em]t\x.south);
\draw [->] ([yshift=0.1em]t\x.north) -- ([yshift=-0.1em]s\x.south);
\draw [->] ([yshift=0.1em]s\x.north) -- ([yshift=-0.1em]o\x.south);
\draw [->] ([yshift=0.1em]o\x.north) -- ([yshift=0.8em]o\x.north) node [pos=0.5,right] {\scriptsize{top-3}};
}
}
{
\draw [->] ([xshift=-0.5em]s1.west) -- ([xshift=-0.1em]s1.west) node [pos=0,left,inner sep=1pt] {\scriptsize{0}};
}
{
\draw [->] ([xshift=0.1em]s1.east) -- ([xshift=-0.1em]s2.west);
}
{
\draw [->] ([xshift=0.1em]s2.east) -- ([xshift=-0.1em]s3.west);
}
{
\draw [->,very thick,dotted] (wo1.east) .. controls +(east:0.6) and +(west:0.8) ..(wt2copy2.west);
}
{
\draw [->,very thick,dotted] (wo2.east) .. controls +(east:0.6) and +(west:0.8) ..(wt3copy2.west);
}
{
\node [circle,draw,anchor=north,inner sep=2pt,fill=orange!20,text=orange!20] (c2) at ([yshift=-2.5em]t1.south) {\scriptsize{$\textbf{C}_2$}};
\node [circle,draw,inner sep=2pt,fill=orange!20] (c2copy1) at ([yshift=-0.1em,xshift=-0.1em]c2) {\scriptsize{$\textbf{C}_2$}};
\node [circle,draw,inner sep=2pt,fill=orange!20] (c2copy2) at ([yshift=-0.2em,xshift=-0.2em]c2) {\scriptsize{$\textbf{C}_2$}};
\draw [->] ([xshift=-0.9em]c2.west) -- ([xshift=-0.3em]c2.west);
\draw [->] ([xshift=0.1em]c2.east) .. controls +(east:1.5) and +(west:0.8) ..([yshift=-0.3em,xshift=-0.1em]s2.west);
}
{
\node [circle,draw,anchor=north,inner sep=2pt,fill=orange!20,text=orange!20] (c3) at ([yshift=-2.5em]t2.south) {\scriptsize{$\textbf{C}_3$}};
\node [circle,draw,inner sep=2pt,fill=orange!20,text=orange!20] (c3copy1) at ([yshift=-0.1em,xshift=-0.1em]c3) {\scriptsize{$\textbf{C}_3$}};
\node [circle,draw,inner sep=2pt,fill=orange!20] (c3copy2) at ([yshift=-0.2em,xshift=-0.2em]c3) {\scriptsize{$\textbf{C}_3$}};
\draw [->] ([xshift=-0.9em]c3.west) -- ([xshift=-0.3em]c3.west);
\draw [->] ([xshift=0.1em]c3.east) .. controls +(east:1.5) and +(west:0.8) ..([yshift=-0.3em,xshift=-0.1em]s3.west);
}
{
\node [anchor=east] (vocab) at ([xshift=-5em]s1.west) {\tiny{$\begin{bmatrix} \textrm{Have} & 0.50 \\ \textrm{I} & 0.02 \\ \textrm{it} & 0.03 \\ \textrm{has} & 0.30 \\ \textrm{you} & 0.01 \\ \textrm{the} & 0.01 \\ \textrm{a} & 0.01 \\ \textrm{an} & 0.02 \\ \textrm{he} & 0.03 \\ \textrm{she} & 0.01 \\ \textrm{are} & 0.00 \\ \textrm{am} & 0.01 \\ ... & ... \end{bmatrix}$}};
\node [anchor=south] (vocablabel) at (vocab.north) {\scriptsize{单词的概率分布}};
\draw [->,red,very thick,dotted] (o1.west) .. controls +(west:1) and +(east:2) .. ([yshift=1em]vocab.south east);
}
{
\node [anchor=east,inner sep=1pt] (vocabtopn) at ([xshift=-0.5em,yshift=-0.5em]wo1.west) {\scriptsize{$\begin{bmatrix} \textrm{Have} \\ \textrm{has} \\ \textrm{it} \end{bmatrix}$}};
\draw [->] ([yshift=-1.6em,xshift=-0.4em]vocab.north east) .. controls +(east:1) and +(west:1) .. ([xshift=0.1em,yshift=0.4em]vocabtopn.west) node [pos=0.3,below] (topnlabel) {\scriptsize{top-3}};
{
\node [anchor=north] (cap) at (vocab.south east) {\scriptsize{\textbf{束搜索($b=3$)}}};
}
}
\end{scope}
\end{tikzpicture}
%-------------------------------------------
\ No newline at end of file
%--------------------------------------------------------------------------------
\begin{tikzpicture}
%\newlength{\base}
\setlength{\base}{0.9cm}
\tikzstyle{rnnnode} = [rounded corners=1pt,minimum size=0.5\base,draw,inner sep=0pt,outer sep=0pt]
\tikzstyle{wordnode} = [font=\tiny]
% RNN translation model
\begin{scope}[local bounding box=RNNMT]
% RNN Encoder
\coordinate (eemb0) at (0,0);
\foreach \x [count=\y from 0] in {1,2,...,10}
\node[rnnnode,minimum height=0.5\base,fill=green!30!white,anchor=west] (eemb\x) at ([xshift=0.4\base]eemb\y.east) {};
\foreach \x in {1,2,...,10}
\node[rnnnode,fill=blue!30!white,anchor=south] (backenc\x) at ([yshift=0.5\base]eemb\x.north) {};
\foreach \x in {1,2,...,10}
\node[rnnnode,fill=blue!30!white,anchor=south] (enc\x) at ([yshift=0.5\base]backenc\x.north) {};
\node[wordnode,left=0.4\base of enc1] (init) {$0$};
\node[wordnode,right=0.4\base of backenc10] (backinit) {$0$};
\node [rnnnode,fill=purple!30!white] at (enc10) {};
\node [rnnnode,fill=purple!30!white] at (backenc1) {};
\node[wordnode,below=0pt of eemb1] () {};
\node[wordnode,below=0pt of eemb2] () {知道};
\node[wordnode,below=0pt of eemb3] () {};
\node[wordnode,below=0pt of eemb4] () {北京站};
\node[wordnode,below=0pt of eemb5] () {};
\node[wordnode,below=0pt of eemb6] () {};
\node[wordnode,below=0pt of eemb7] () {怎么};
\node[wordnode,below=0pt of eemb8] () {};
\node[wordnode,below=0pt of eemb9] () {};
\node[wordnode,below=0pt of eemb10] () {$\langle$eos$\rangle$};
% RNN Decoder
\foreach \x in {1,2,...,10}
\node[rnnnode,minimum height=0.5\base,fill=green!30!white,anchor=south] (demb\x) at ([yshift=1.5\base]enc\x.north) {};
\foreach \x in {1,2,...,10}
\node[rnnnode,fill=blue!30!white,anchor=south] (dec\x) at ([yshift=0.5\base]demb\x.north) {};
\foreach \x in {1,2,...,10}
\node[rnnnode,minimum height=0.5\base,fill=red!30!white,anchor=south] (softmax\x) at ([yshift=0.5\base]dec\x.north) {};
% Decoder input words
\node[wordnode,below=0pt of demb1] (decwordin) {$\langle$eos$\rangle$};
\ExtractX{$(demb2.south)$}
\ExtractY{$(decwordin.base)$}
\node[wordnode,anchor=base] () at (\XCoord,\YCoord) {Do};
\ExtractX{$(demb3.south)$}
\ExtractY{$(decwordin.base)$}
\node[wordnode,anchor=base] () at (\XCoord,\YCoord) {you};
\ExtractX{$(demb4.south)$}
\ExtractY{$(decwordin.base)$}
\node[wordnode,anchor=base] () at (\XCoord,\YCoord) {know};
\ExtractX{$(demb5.south)$}
\ExtractY{$(decwordin.base)$}
\node[wordnode,anchor=base] () at (\XCoord,\YCoord) {the};
\ExtractX{$(demb6.south)$}
\ExtractY{$(decwordin.base)$}
\node[wordnode,anchor=base] () at (\XCoord,\YCoord) {way};
\ExtractX{$(demb7.south)$}
\ExtractY{$(decwordin.base)$}
\node[wordnode,anchor=base] () at (\XCoord,\YCoord) {to};
\ExtractX{$(demb8.south)$}
\ExtractY{$(decwordin.base)$}
\node[wordnode,anchor=base] () at (\XCoord,\YCoord) {Beijing};
\ExtractX{$(demb9.south)$}
\ExtractY{$(decwordin.base)$}
\node[wordnode,anchor=base] () at (\XCoord,\YCoord) {Railway};
\ExtractX{$(demb10.south)$}
\ExtractY{$(decwordin.base)$}
\node[wordnode,anchor=base] () at (\XCoord,\YCoord) {Station};
% Decoder output words
\node[wordnode,above=0pt of softmax1] (decwordout) {Do};
\ExtractX{$(softmax2.north)$}
\ExtractY{$(decwordout.base)$}
\node[wordnode,anchor=base] () at (\XCoord,\YCoord) {you};
\ExtractX{$(softmax3.north)$}
\ExtractY{$(decwordout.base)$}
\node[wordnode,anchor=base] () at (\XCoord,\YCoord) {know};
\ExtractX{$(softmax4.north)$}
\ExtractY{$(decwordout.base)$}
\node[wordnode,anchor=base] () at (\XCoord,\YCoord) {the};
\ExtractX{$(softmax5.north)$}
\ExtractY{$(decwordout.base)$}
\node[wordnode,anchor=base] () at (\XCoord,\YCoord) {way};
\ExtractX{$(softmax6.north)$}
\ExtractY{$(decwordout.base)$}
\node[wordnode,anchor=base] () at (\XCoord,\YCoord) {to};
\ExtractX{$(softmax7.north)$}
\ExtractY{$(decwordout.base)$}
\node[wordnode,anchor=base] () at (\XCoord,\YCoord) {Beijing};
\ExtractX{$(softmax8.north)$}
\ExtractY{$(decwordout.base)$}
\node[wordnode,anchor=base] () at (\XCoord,\YCoord) {Railway};
\ExtractX{$(softmax9.north)$}
\ExtractY{$(decwordout.base)$}
\node[wordnode,anchor=base] () at (\XCoord,\YCoord) {Station};
\ExtractX{$(softmax10.north)$}
\ExtractY{$(decwordout.base)$}
\node[wordnode,anchor=base] () at (\XCoord,\YCoord) {$\langle$eos$\rangle$};
% Connections
\draw[-latex'] (init.east) to (enc1.west);
\draw[-latex'] (backinit.west) to (backenc10.east);
\foreach \x in {1,2,...,10}
\draw[-latex'] (eemb\x) to (backenc\x);
\foreach \x in {1,2,...,10}
\draw[-latex'] (eemb\x.north) to [out=15,in=-15] (enc\x.south);
\foreach \x in {1,2,...,10}
\draw[-latex'] (demb\x) to (dec\x);
\foreach \x in {1,2,...,10}
\draw[-latex'] (dec\x.north) to ([yshift=0.5\base]dec\x.north);
\foreach \x [count=\y from 2] in {1,2,...,9}
{
\draw[-latex'] (enc\x.east) to (enc\y.west);
\draw[-latex'] (dec\x.east) to (dec\y.west);
}
\def\y{0}
\foreach \x in {10,9,...,2}
{
\pgfmathtruncatemacro{\y}{\x - 1}
\draw[-latex'] (backenc\x.west) to (backenc\y.east);
}
\coordinate (bridge) at ([yshift=-1.2\base]demb2);
\draw[-latex'] (enc10.north) .. controls +(north:0.7\base) and +(east:1.5\base) .. (bridge) .. controls +(west:2.5\base) and +(west:0.6\base) .. (dec1.west);
\draw[-latex'] (backenc1) to [out=180,in=180] (dec1.west);
% Backward RNN
\begin{pgfonlayer}{background}
\node[draw=red,thick,densely dashed,inner sep=5pt] [fit = (backinit) (backenc1) (backenc10)] (backrnn) {};
\end{pgfonlayer}
\node[font=\scriptsize,anchor=south] (backrnnlabel) at ([xshift=-0.5\base,yshift=\base]backrnn.north east) {反向RNN};
\draw[->,dashed] (backrnnlabel.south) to ([xshift=-0.5\base]backrnn.north east);
\end{scope}
\end{tikzpicture}
\ No newline at end of file
\begin{tikzpicture}
\begin{scope}
\node [anchor=west,draw,fill=red!20!white,inner sep=3pt,minimum width=2em,minimum height=1.2em] (h1) at (0,0) {\scriptsize{$\textbf{h}_1$}};
\node [anchor=west,draw,fill=red!20!white,inner sep=3pt,minimum width=2em,minimum height=1.2em] (h2) at ([xshift=1em]h1.east) {\scriptsize{$\textbf{h}_2$}};
\node [anchor=west,inner sep=0pt,minimum width=3em] (h3) at ([xshift=0.5em]h2.east) {\scriptsize{...}};
\node [anchor=west,draw,fill=red!20!white,inner sep=3pt,minimum width=2em,minimum height=1.2em] (h4) at ([xshift=0.5em]h3.east) {\scriptsize{$\textbf{h}_m$}};
\node [anchor=south,circle,minimum size=1.0em,draw,ublue,thick] (sum) at ([yshift=2em]h2.north east) {};
\draw [thick,-,ublue] (sum.north) -- (sum.south);
\draw [thick,-,ublue] (sum.west) -- (sum.east);
\node [anchor=south,draw,fill=green!20!white,inner sep=3pt,minimum width=2em,minimum height=1.2em] (th1) at ([yshift=2em,xshift=-1em]sum.north west) {\scriptsize{$\textbf{s}_{j-1}$}};
\node [anchor=west,draw,fill=green!20!white,inner sep=3pt,minimum width=2em,minimum height=1.2em] (th2) at ([xshift=2em]th1.east) {\scriptsize{$\textbf{s}_{j}$}};
\draw [->] (h1.north) .. controls +(north:0.8) and +(west:1) .. (sum.190) node [pos=0.3,left] {\scriptsize{$\alpha_{1,j}$}};
\draw [->] (h2.north) .. controls +(north:0.6) and +(220:0.2) .. (sum.220) node [pos=0.2,right] {\scriptsize{$\alpha_{2,j}$}};
\draw [->] (h4.north) .. controls +(north:0.8) and +(east:1) .. (sum.-10) node [pos=0.1,left] (alphan) {\scriptsize{$\alpha_{m,j}$}};
\draw [->] ([xshift=-1.5em]th1.west) -- ([xshift=-0.1em]th1.west);
\draw [->] ([xshift=0.1em]th1.east) -- ([xshift=-0.1em]th2.west);
\draw [->] ([xshift=0.1em]th2.east) -- ([xshift=1.5em]th2.east);
\draw [->] (sum.north) .. controls +(north:0.8) and +(west:0.2) .. ([yshift=-0.4em,xshift=-0.1em]th2.west) node [pos=0.2,right] (ci) {\scriptsize{$\textbf{C}_{j}$}};
\node [anchor=south,inner sep=1pt] (output) at ([yshift=0.8em]th2.north) {\scriptsize{输出层}};
\draw [->] ([yshift=0.1em]th2.north) -- ([yshift=-0.1em]output.south);
\node [anchor=north] (enc1) at (h1.south west) {\scriptsize{编码器输出}};
\node [anchor=north] (enc12) at ([yshift=0.5em]enc1.south) {\scriptsize{(位置$1$)}};
\node [anchor=north] (enc2) at (h2.south) {\scriptsize{编码器输出}};
\node [anchor=north] (enc22) at ([yshift=0.5em]enc2.south) {\scriptsize{(位置$2$)}};
\node [anchor=north] (enc4) at (h4.south) {\scriptsize{编码器输出}};
\node [anchor=north] (enc42) at ([yshift=0.5em]enc4.south) {\scriptsize{(位置$4$)}};
{
\node [anchor=west] (math1) at ([xshift=5em,yshift=1em]th2.east) {$\textbf{C}_j = \sum_{i} \alpha_{i,j} \textbf{h}_i \ \ $};
}
{
\node [anchor=north west] (math2) at ([yshift=-2em]math1.south west) {$\alpha_{i,j} = \frac{\exp(\beta_{i,j})}{\sum_{i'} \exp(\beta_{i',j})}$};
\node [anchor=north west] (math3) at ([yshift=-0em]math2.south west) {$\beta_{i,j} = a(\textbf{s}_{j-1}, \textbf{h}_i)$};
}
\begin{pgfonlayer}{background}
{
\node [rectangle,inner sep=0.4em,rounded corners=1pt,fill=blue!10,drop shadow] [fit = (math1)] (box1) {};
}
{
\node [rectangle,inner sep=0.4em,rounded corners=1pt,fill=orange!10,drop shadow] [fit = (math2) (math3)] (box2) {};
}
\end{pgfonlayer}
{
\draw [->,dotted,thick,blue] (box1.west) .. controls +(west:1.2) and +(east:2.0) .. ([xshift=-0.3em]ci.east);
}
{
\draw [->,dotted,thick,orange] ([yshift=1em]box2.west) .. controls +(west:1.2) and +(east:1.0) .. ([xshift=-0.35em]alphan.east);
}
\end{scope}
\end{tikzpicture}
\ No newline at end of file
\definecolor{ublue}{rgb}{0.152,0.250,0.545}
\begin{tikzpicture}
\begin{axis}[
name=s1,
width=7cm, height=4.5cm,
xtick={-4,-3,-2,-1,0,1,2,3,4},
ytick={0,1,...,4},
xticklabel style={opacity=0},
yticklabel style={opacity=0},
xlabel={$w$},
ylabel={$L(w)$},
axis line style={->,very thick},
xlabel style={xshift=2.2cm,yshift=1.2cm},
ylabel style={rotate=-90,xshift=1.5cm,yshift=1.6cm},
tick align=inside,
axis y line*=left,
axis x line*=bottom,
tick style={opacity=0},
xmin=-4,
xmax=4,
ymin=0,
ymax=4.5]
\addplot [dashed,ublue,thick] {x^2/4};
\addplot [quiver={u=1,v=x/2,scale arrows = 0.25},domain=-4:-0.3,->,samples=10,red!60,ultra thick] {x^2/4};
\addplot [draw=ublue,fill=red,mark=*] coordinates{(0,0)};
\end{axis}
\begin{axis}[
at={(s1.south)},
anchor=south,
xshift=6cm,
yshift=0cm,
width=7cm, height=4.5cm,
xtick={-4,-3,-2,-1,0,1,2,3,4},
ytick={0,1,...,4},
xticklabel style={opacity=0},
yticklabel style={opacity=0},
xlabel={$w$},
ylabel={$L(w)$},
axis line style={->,very thick},
xlabel style={xshift=2.2cm,yshift=1.2cm},
ylabel style={rotate=-90,xshift=1.5cm,yshift=1.6cm},
tick align=inside,
axis y line*=left,
axis x line*=bottom,
tick style={opacity=0},
xmin=-4,
xmax=4,
ymin=0,
ymax=4.5]
\addplot [dashed,ublue,thick] {x^2/4};
\addplot [quiver={u=-x-(x/abs(x))*(1+x^2-4)^(1/2),v=-0.7},domain=-4:3.6,->,samples=2,red!60,ultra thick] {x^2/4};
\addplot [quiver={u=-x-(x/abs(x))*(1+x^2-4)^(1/2),v=-0.7},domain=-3.13:2.6,->,samples=2,red!60,ultra thick] {x^2/4};
\addplot [draw=ublue,fill=red,mark=*] coordinates{(0,0)};
\end{axis}
\end{tikzpicture}
\ No newline at end of file
%----------------------------------
\begin{tikzpicture}
\setlength{\base}{1.5em}
\tikzstyle{samplenode} = [rounded corners=1pt,minimum size=1\base,draw,inner sep=3pt,outer sep=0pt,fill=green!30!white]
\tikzstyle{rnnnode} = [rounded corners=1pt,minimum size=1\base,draw,inner sep=0pt,outer sep=0pt,fill=blue!30!white]
\tikzstyle{wordnode} = [font=\footnotesize,align=center]
\begin{scope}
\coordinate (batch0) at (0,0);
\foreach \i [count=\j from 0,evaluate=\i as \k using int(4-\i)] in {1,2}
\node [samplenode,anchor=south west,font=\scriptsize] (batch\i) at ([shift={(-1em,-0.5em)}]batch\j.south west) {\qquad \k};
\foreach \i [count=\j from 2,evaluate=\i as \k using int(4-\i)] in {3}
\node [samplenode,anchor=south west,font=\scriptsize] (batch\i) at ([shift={(-1em,-0.5em)}]batch\j.south west) {句子 \k};
\draw [decorate,decoration={brace}] (batch1.south east) to node [auto,rotate=30,anchor=north,font=\scriptsize] {batch大小} (batch3.south east);
\node [samplenode,anchor=west,font=\scriptsize] (sample2) at ([xshift=4em]batch2.east) {句子2};
\node [samplenode,anchor=south,font=\scriptsize] (sample3) at ([yshift=3em]sample2.north) {句子3};
\node [samplenode,anchor=north,font=\scriptsize] (sample1) at ([yshift=-3em]sample2.south) {句子1};
\foreach \i in {1,2,3}
\draw [->,thick] ([xshift=1.5em]batch2.east) -- ([xshift=-3pt]sample\i.west);
\foreach \i in {1,2,3}
{
\coordinate (start) at ([xshift=2em]sample\i.east);
\node [wordnode,anchor=west] (rnn0) at (start) {$0$};
\foreach \j [count=\k from 0] in {1,2,3}
{
\node [rnnnode,anchor=west] (rnn\j) at ([xshift=1em]rnn\k.east) {};
\draw [-latex'] (rnn\k) to (rnn\j);
\coordinate (in\j) at ([yshift=-1em]rnn\j.south);
\draw [-latex'] (in\j) to (rnn\j.south);
\coordinate (out\j) at ([yshift=1em]rnn\j.north);
\draw [-latex'] (rnn\j.north) to (out\j);
}
\node [wordnode,anchor=west] (rnn4) at ([xshift=1em]rnn3.east) {$\cdots$};
\draw [-latex'] (rnn3) to (rnn4);
\node [draw,densely dashed,thick,rounded corners=0.3em,fit=(start) (in3) (out3) (rnn4),label={[font=\footnotesize,rotate=90,anchor=north]0:设备\i}] (rnn) {};
\draw [->,double] ([xshift=3pt]sample\i.east) -- ([xshift=-3pt]rnn.west);
}
\end{scope}
\end{tikzpicture}
\ No newline at end of file
\begin{tikzpicture}
\begin{scope}
\tikzstyle{rnnnode} = [minimum height=1.1em,minimum width=3.5em,inner sep=2pt,rounded corners=1pt,draw,fill=red!20];
\tikzstyle{wnode} = [minimum height=1.0em,minimum width=3.5em,inner sep=2pt,rounded corners=1pt,draw,fill=white];
{
\node [rnnnode,anchor=west,fill=green!20] (t1) at (0,0) {\scriptsize{$e_y()$}};
}
{
\node [rnnnode,anchor=south] (s1) at ([yshift=1em]t1.north) {\scriptsize{$\textbf{s}_1$}};
}
\node [wnode,anchor=north] (wt1) at ([yshift=-0.8em]t1.south) {\scriptsize{$\langle$sos$\rangle$}};
{
\node [rnnnode,anchor=south,fill=blue!20] (o1) at ([yshift=1em]s1.north) {\scriptsize{Softmax}};
}
{
\foreach \x in {1}{
\draw [->] ([yshift=-0.7em]t\x.south) -- ([yshift=-0.1em]t\x.south);
\draw [->] ([yshift=0.1em]t\x.north) -- ([yshift=-0.1em]s\x.south);
\draw [->] ([yshift=0.1em]s\x.north) -- ([yshift=-0.1em]o\x.south);
}
}
{
\draw [->] ([xshift=-0.5em]s1.west) -- ([xshift=-0.1em]s1.west) node [pos=0,left,inner sep=1pt] {\scriptsize{0}};
}
{
\node [anchor=center,inner sep=2pt] (wo1) at ([yshift=1.2em]o1.north) {};
}
{
\node [anchor=east] (vocab) at ([xshift=-5em]s1.west) {\tiny{$\begin{bmatrix} \textrm{Have} & 0.50 \\ \textrm{I} & 0.02 \\ \textrm{it} & 0.03 \\ \textrm{has} & 0.30 \\ \textrm{you} & 0.01 \\ \textrm{the} & 0.01 \\ \textrm{a} & 0.01 \\ \textrm{an} & 0.02 \\ \textrm{he} & 0.03 \\ \textrm{she} & 0.01 \\ \textrm{are} & 0.00 \\ \textrm{am} & 0.01 \\ ... & ... \end{bmatrix}$}};
\node [anchor=south] (vocablabel) at (vocab.north) {\scriptsize{单词的概率分布}};
\draw [->,red,very thick,dotted] (o1.west) .. controls +(west:1) and +(east:2) .. ([yshift=1em]vocab.south east);
}
{
\node [anchor=east,inner sep=1pt] (vocabtopn) at ([xshift=-0.5em,yshift=-0.5em]wo1.west) {\scriptsize{$\begin{bmatrix} \textrm{Have} \end{bmatrix}$}};
\draw [->] ([yshift=-1.6em,xshift=-0.4em]vocab.north east) .. controls +(east:1) and +(west:1) .. ([xshift=0.1em,yshift=0.4em]vocabtopn.west) node [pos=0.3,below] (topnlabel) {\scriptsize{top-1}};
}
\end{scope}
\end{tikzpicture}
\ No newline at end of file
\begin{tikzpicture}
\begin{scope}
\tikzstyle{rnnnode} = [minimum height=1.1em,minimum width=2.1em,inner sep=2pt,rounded corners=1pt,draw,fill=red!20];
\node [rnnnode,anchor=west] (h1) at (0,0) {\tiny{$\textbf{h}_1$}};
\node [anchor=west] (h2) at ([xshift=1em]h1.east) {\tiny{...}};
\node [rnnnode,anchor=west] (h3) at ([xshift=1em]h2.east) {\tiny{$\textbf{h}_m$}};
\node [rnnnode,anchor=north,fill=green!20] (e1) at ([yshift=-1em]h1.south) {\tiny{$e_x()$}};
\node [anchor=west] (e2) at ([xshift=1em]e1.east) {\tiny{...}};
\node [rnnnode,anchor=west,fill=green!20] (e3) at ([xshift=1em]e2.east) {\tiny{$e_x()$}};
\node [anchor=north,inner sep=2pt] (w1) at ([yshift=-0.6em]e1.south) {\tiny{}};
\node [anchor=north,inner sep=2pt] (w2) at ([yshift=-0.8em]e2.south) {\tiny{...}};
\node [anchor=north,inner sep=2pt] (w3) at ([yshift=-0.6em]e3.south) {\tiny{$\langle$eos$\rangle$}};
\draw [->] (w1.north) -- ([yshift=-0.1em]e1.south);
\draw [->] (w3.north) -- ([yshift=-0.1em]e3.south);
\draw [->] ([yshift=0.1em]e1.north) -- ([yshift=-0.1em]h1.south);
\draw [->] ([yshift=0.1em]e3.north) -- ([yshift=-0.1em]h3.south);
\draw [->] ([xshift=0.1em]h1.east) -- ([xshift=-0.1em]h2.west);
\draw [->] ([xshift=0.1em]h2.east) -- ([xshift=-0.1em]h3.west);
\draw [->] ([xshift=-0.8em]h1.west) -- ([xshift=-0.1em]h1.west) node [pos=0,left,inner sep=2pt] {\tiny{0}};
\node [anchor=south] (encoder) at ([xshift=-0.2em]h1.north west) {\scriptsize{\textbf{编码器}}};
{
\node [rnnnode,anchor=west,fill=green!20] (t1) at ([xshift=3em]h3.east) {\tiny{$e_y()$}};
}
{
\node [rnnnode,anchor=west,fill=green!20] (t2) at ([xshift=1.5em]t1.east) {\tiny{$e_y()$}};
}
{
\node [rnnnode,anchor=west,fill=green!20] (t3) at ([xshift=1.5em]t2.east) {\tiny{$e_y()$}};
\node [rnnnode,anchor=west,fill=green!20] (t4) at ([xshift=1.5em]t3.east) {\tiny{$e_y()$}};
\node [anchor=west,inner sep=2pt] (t5) at ([xshift=0.3em]t4.east) {\tiny{...}};
}
{
\node [rnnnode,anchor=south] (s1) at ([yshift=1em]t1.north) {\tiny{$\textbf{s}_1$}};
}
{
\node [rnnnode,anchor=south] (s2) at ([yshift=1em]t2.north) {\tiny{$\textbf{s}_2$}};
}
{
\node [rnnnode,anchor=south] (s3) at ([yshift=1em]t3.north) {\tiny{$\textbf{s}_3$}};
\node [rnnnode,anchor=south] (s4) at ([yshift=1em]t4.north) {\tiny{$\textbf{s}_4$}};
\node [anchor=west,inner sep=2pt] (s5) at ([xshift=0.3em]s4.east) {\tiny{...}};
}
{
\node [rnnnode,anchor=south,fill=blue!20] (o1) at ([yshift=1em]s1.north) {\tiny{Softmax}};
\node [anchor=east] (decoder) at ([xshift=-0.5em]o1.north west) {\scriptsize{\textbf{解码器}}};
}
{
\node [rnnnode,anchor=south,fill=blue!20] (o2) at ([yshift=1em]s2.north) {\tiny{Softmax}};
}
{
\node [rnnnode,anchor=south,fill=blue!20] (o3) at ([yshift=1em]s3.north) {\tiny{Softmax}};
\node [rnnnode,anchor=south,fill=blue!20] (o4) at ([yshift=1em]s4.north) {\tiny{Softmax}};
\node [anchor=west,inner sep=2pt] (o5) at ([xshift=0.3em]o4.east) {\tiny{...}};
}
{
\node [anchor=north,inner sep=2pt] (wt1) at ([yshift=-0.6em]t1.south) {\tiny{$\langle$sos$\rangle$}};
}
{
\node [anchor=north,inner sep=2pt] (wt2) at ([yshift=-0.6em]t2.south) {\tiny{Have}};
}
{
\node [anchor=north,inner sep=2pt] (wt3) at ([yshift=-0.8em]t3.south) {\tiny{you}};
\node [anchor=north,inner sep=2pt] (wt4) at ([yshift=-0.6em]t4.south) {\tiny{learned}};
}
{
\node [anchor=center,inner sep=2pt] (wo1) at ([yshift=1.2em]o1.north) {\tiny{Have}};
}
{
\node [anchor=south,inner sep=2pt] (wos1) at (wo1.north) {\tiny{\textbf{[step 1]}}};
}
{
\node [anchor=center,inner sep=2pt] (wo2) at ([yshift=1.2em]o2.north) {\tiny{you}};
}
{
\node [anchor=south,inner sep=2pt] (wos2) at (wo2.north) {\tiny{\textbf{[step 2]}}};
}
{
\node [anchor=center,inner sep=2pt] (wo3) at ([yshift=1.2em]o3.north) {\tiny{learned}};
\node [anchor=south,inner sep=2pt] (wos3) at (wo3.north) {\tiny{\textbf{[step 3]}}};
\node [anchor=center,inner sep=2pt] (wo4) at ([yshift=1.2em]o4.north) {\tiny{nothing}};
\node [anchor=south,inner sep=2pt] (wos4) at (wo4.north) {\tiny{\textbf{[step 4]}}};
}
{
\foreach \x in {1}{
\draw [->] ([yshift=-0.7em]t\x.south) -- ([yshift=-0.1em]t\x.south);
\draw [->] ([yshift=0.1em]t\x.north) -- ([yshift=-0.1em]s\x.south);
\draw [->] ([yshift=0.1em]s\x.north) -- ([yshift=-0.1em]o\x.south);
\draw [->] ([yshift=0.1em]o\x.north) -- ([yshift=0.8em]o\x.north) node [pos=0.5,right] {\tiny{top1}};
}
}
{
\foreach \x in {2}{
\draw [->] ([yshift=-0.7em]t\x.south) -- ([yshift=-0.1em]t\x.south);
\draw [->] ([yshift=0.1em]t\x.north) -- ([yshift=-0.1em]s\x.south);
\draw [->] ([yshift=0.1em]s\x.north) -- ([yshift=-0.1em]o\x.south);
\draw [->] ([yshift=0.1em]o\x.north) -- ([yshift=0.8em]o\x.north) node [pos=0.5,right] {\tiny{top1}};
}
}
{
\foreach \x in {3,4}{
\draw [->] ([yshift=-0.7em]t\x.south) -- ([yshift=-0.1em]t\x.south);
\draw [->] ([yshift=0.1em]t\x.north) -- ([yshift=-0.1em]s\x.south);
\draw [->] ([yshift=0.1em]s\x.north) -- ([yshift=-0.1em]o\x.south);
\draw [->] ([yshift=0.1em]o\x.north) -- ([yshift=0.8em]o\x.north) node [pos=0.5,right] {\tiny{top1}};
}
}
{
\draw [->] ([xshift=-0.8em]s1.west) -- ([xshift=-0.1em]s1.west) node [pos=0,left] {\tiny{0}};
}
{
\draw [->] ([xshift=0.1em]s1.east) -- ([xshift=-0.1em]s2.west);
}
{
\draw [->] ([xshift=0.1em]s2.east) -- ([xshift=-0.1em]s3.west);
\draw [->] ([xshift=0.1em]s3.east) -- ([xshift=-0.1em]s4.west);
}
{
\draw [->,thick,dotted] (wo1.east) .. controls +(east:1.0) and +(west:1.0) ..(wt2.west);
}
{
\draw [->,thick,dotted] (wo2.east) .. controls +(east:1.3) and +(west:1.1) ..(wt3.west);
\draw [->,thick,dotted] (wo3.east) .. controls +(east:0.9) and +(west:0.9) ..(wt4.west);
}
{
\node [circle,draw,anchor=south,inner sep=3pt,fill=orange!20] (c2) at ([yshift=2em]h2.north) {\tiny{$\textbf{C}_2$}};
\node [anchor=south] (c2label) at (c2.north) {\tiny{\textbf{注意力机制:上下文}}};
\node [anchor=south] (c2more) at ([yshift=-1.5em]c2.south) {\tiny{...}};
\draw [->] (h1.north) .. controls +(north:0.6) and +(250:0.9) .. (c2.250);
\draw [->] (h3.north) .. controls +(north:0.6) and +(290:0.9) .. (c2.290);
\draw [->] ([yshift=-0.3em]s1.west) .. controls +(west:2) and +(-50:0.3) .. (c2.-40);
}
{
\draw [->] (c2.0) -- ([xshift=1.2in]c2.0) -- ([yshift=0.3em,xshift=-1.2em]s2.west) -- ([yshift=0.3em,xshift=-0.1em]s2.west);
}
{
\node [circle,draw,anchor=north,inner sep=3pt,fill=orange!20] (c3) at ([yshift=-2em]t2.south) {\tiny{$\textbf{C}_3$}};
\draw [->] ([xshift=-0.7em]c3.west) -- ([xshift=-0.1em]c3.west);
\draw [->] ([xshift=0.1em]c3.east) .. controls +(east:0.6) and +(west:0.8) ..([yshift=-0.3em,xshift=-0.1em]s3.west);
}
{
\node [circle,draw,anchor=north,inner sep=3pt,fill=orange!20] (c4) at ([yshift=-2em]t3.south) {\tiny{$\textbf{C}_4$}};
\draw [->] ([xshift=-0.7em]c4.west) -- ([xshift=-0.1em]c4.west);
\draw [->] ([xshift=0.1em]c4.east) .. controls +(east:0.6) and +(west:0.8) ..([yshift=-0.3em,xshift=-0.1em]s4.west);
}
\end{scope}
\end{tikzpicture}
\ No newline at end of file
%--------------------------------------------------------------------------------
\begin{tikzpicture}
\setlength{\base}{0.9cm}
\tikzstyle{rnnnode} = [rounded corners=1pt,minimum size=0.5\base,draw,inner sep=0pt,outer sep=0pt]
\tikzstyle{wordnode} = [font=\tiny]
% RNN translation model
\begin{scope}[local bounding box=RNNMT]
% RNN Encoder
\coordinate (eemb0) at (0,0);
\foreach \x [count=\y from 0] in {1,2,...,10}
\node[rnnnode,minimum height=0.5\base,fill=green!30!white,anchor=west] (eemb\x) at ([xshift=0.4\base]eemb\y.east) {};
\foreach \x in {1,2,...,10}
\node[rnnnode,fill=blue!30!white,anchor=south] (enc1\x) at ([yshift=0.3\base]eemb\x.north) {};
\foreach \x in {1,2,...,10}
\node[rnnnode,fill=blue!30!white,anchor=south] (enc2\x) at ([yshift=0.5\base]enc1\x.north) {};
\node[wordnode,left=0.4\base of enc11] (init1) {$0$};
\node[wordnode,left=0.4\base of enc21] (init2) {$0$};
\node[wordnode,below=0pt of eemb1] () {};
\node[wordnode,below=0pt of eemb2] () {知道};
\node[wordnode,below=0pt of eemb3] () {};
\node[wordnode,below=0pt of eemb4] () {北京站};
\node[wordnode,below=0pt of eemb5] () {};
\node[wordnode,below=0pt of eemb6] () {};
\node[wordnode,below=0pt of eemb7] () {怎么};
\node[wordnode,below=0pt of eemb8] () {};
\node[wordnode,below=0pt of eemb9] () {};
\node[wordnode,below=0pt of eemb10] () {$\langle$eos$\rangle$};
% RNN Decoder
\foreach \x in {1,2,...,10}
\node[rnnnode,minimum height=0.5\base,fill=green!30!white,anchor=south] (demb\x) at ([yshift=1.5\base]enc2\x.north) {};
\foreach \x in {1,2,...,10}
\node[rnnnode,fill=blue!30!white,anchor=south] (dec1\x) at ([yshift=0.3\base]demb\x.north) {};
\foreach \x in {1,2,...,10}
\node[rnnnode,fill=blue!30!white,anchor=south] (dec2\x) at ([yshift=0.5\base]dec1\x.north) {};
\foreach \x in {1,2,...,10}
\node[rnnnode,minimum height=0.5\base,fill=red!30!white,anchor=south] (softmax\x) at ([yshift=0.5\base]dec2\x.north) {};
% Decoder input words
\node[wordnode,below=0pt of demb1] (decwordin) {$\langle$sos$\rangle$};
\ExtractX{$(demb2.south)$}
\ExtractY{$(decwordin.base)$}
\node[wordnode,anchor=base] () at (\XCoord,\YCoord) {Do};
\ExtractX{$(demb3.south)$}
\ExtractY{$(decwordin.base)$}
\node[wordnode,anchor=base] () at (\XCoord,\YCoord) {you};
\ExtractX{$(demb4.south)$}
\ExtractY{$(decwordin.base)$}
\node[wordnode,anchor=base] () at (\XCoord,\YCoord) {know};
\ExtractX{$(demb5.south)$}
\ExtractY{$(decwordin.base)$}
\node[wordnode,anchor=base] () at (\XCoord,\YCoord) {the};
\ExtractX{$(demb6.south)$}
\ExtractY{$(decwordin.base)$}
\node[wordnode,anchor=base] () at (\XCoord,\YCoord) {way};
\ExtractX{$(demb7.south)$}
\ExtractY{$(decwordin.base)$}
\node[wordnode,anchor=base] () at (\XCoord,\YCoord) {to};
\ExtractX{$(demb8.south)$}
\ExtractY{$(decwordin.base)$}
\node[wordnode,anchor=base] () at (\XCoord,\YCoord) {Beijing};
\ExtractX{$(demb9.south)$}
\ExtractY{$(decwordin.base)$}
\node[wordnode,anchor=base] () at (\XCoord,\YCoord) {Railway};
\ExtractX{$(demb10.south)$}
\ExtractY{$(decwordin.base)$}
\node[wordnode,anchor=base] () at (\XCoord,\YCoord) {Station};
% Decoder output words
\node[wordnode,above=0pt of softmax1] (decwordout) {Do};
\ExtractX{$(softmax2.north)$}
\ExtractY{$(decwordout.base)$}
\node[wordnode,anchor=base] () at (\XCoord,\YCoord) {you};
\ExtractX{$(softmax3.north)$}
\ExtractY{$(decwordout.base)$}
\node[wordnode,anchor=base] () at (\XCoord,\YCoord) {know};
\ExtractX{$(softmax4.north)$}
\ExtractY{$(decwordout.base)$}
\node[wordnode,anchor=base] () at (\XCoord,\YCoord) {the};
\ExtractX{$(softmax5.north)$}
\ExtractY{$(decwordout.base)$}
\node[wordnode,anchor=base] () at (\XCoord,\YCoord) {way};
\ExtractX{$(softmax6.north)$}
\ExtractY{$(decwordout.base)$}
\node[wordnode,anchor=base] () at (\XCoord,\YCoord) {to};
\ExtractX{$(softmax7.north)$}
\ExtractY{$(decwordout.base)$}
\node[wordnode,anchor=base] () at (\XCoord,\YCoord) {Beijing};
\ExtractX{$(softmax8.north)$}
\ExtractY{$(decwordout.base)$}
\node[wordnode,anchor=base] () at (\XCoord,\YCoord) {Railway};
\ExtractX{$(softmax9.north)$}
\ExtractY{$(decwordout.base)$}
\node[wordnode,anchor=base] () at (\XCoord,\YCoord) {Station};
\ExtractX{$(softmax10.north)$}
\ExtractY{$(decwordout.base)$}
\node[wordnode,anchor=base] () at (\XCoord,\YCoord) {$\langle$eos$\rangle$};
% Connections
\draw[-latex'] (init1.east) to (enc11.west);
\draw[-latex'] (init2.east) to (enc21.west);
\foreach \x in {1,2,...,10}
\draw[-latex'] (eemb\x) to (enc1\x);
\foreach \x in {1,2,...,10}
\draw[-latex'] (enc1\x) to (enc2\x);
\foreach \x in {1,2,...,10}
\draw[-latex'] (demb\x) to (dec1\x);
\foreach \x in {1,2,...,10}
\draw[-latex'] (dec1\x) to (dec2\x);
\foreach \x in {1,2,...,10}
\draw[-latex'] (dec2\x.north) to ([yshift=0.5\base]dec2\x.north);
\foreach \x [count=\y from 2] in {1,2,...,9}
{
\draw[-latex'] (enc1\x.east) to (enc1\y.west);
\draw[-latex'] (enc2\x.east) to (enc2\y.west);
\draw[-latex'] (dec1\x.east) to (dec1\y.west);
\draw[-latex'] (dec2\x.east) to (dec2\y.west);
}
\coordinate (bridge) at ([yshift=1.4\base]enc16.north west);
\draw[-latex'] (enc210.north) .. controls +(north:0.4\base) and +(east:1.5\base) .. (bridge) .. controls +(west:8.0\base) and +(south west:0.8\base) .. (dec21.west);
\coordinate (bridge) at ([yshift=1.6\base]enc16.north west);
\draw[-latex'] (enc110.east) .. controls +(east:0.5\base) and +(east:8\base) .. (bridge) .. controls +(west:7.5\base) and +(south west:0.1\base) .. (dec11.west);
% stack RNN
\begin{pgfonlayer}{background}
\node[draw=red,thick,densely dashed,inner sep=5pt] [fit = (init2) (enc21) (enc210)] (enc2) {};
\node[draw=red,thick,densely dashed,inner sep=5pt] [fit = (dec21) (dec210)] (dec2) {};
\end{pgfonlayer}
\node[font=\scriptsize,anchor=west] (label) at ([xshift=0.4\base]demb10.east) {堆叠RNN};
\draw[->,dashed] (label.north) to (dec2.east);
\draw[->,dashed] (label.south) to (enc2.east);
\end{scope}
\end{tikzpicture}
\begin{tikzpicture}
\begin{scope}
\small{
\node [anchor=south west,minimum width=15em] (source) at (0,0) {\textbf{source}: 我\ \ \ \ \ \ \ \ \ \ \ \ 感到\ \ \ \ 满意};
{
\node [anchor=south west,minimum width=15em] (target) at ([yshift=12em]source.north west) {\textbf{target}: I\ \ am\ \ \ satisfied\ \ \ with\ \ \ you};
}
{
\node [anchor=center,minimum width=9.6em,minimum height=1.8em,draw,rounded corners=0.3em] (hidden) at ([yshift=6em]source.north) {};
\node [anchor=west,minimum width=1.5em,minimum size=1.5em,fill=ugreen!20] (cell01) at ([xshift=0.2em]hidden.west) {\footnotesize{.2}};
\node [anchor=west,minimum width=1.5em,minimum size=1.5em,fill=ugreen!10] (cell02) at (cell01.east) {\footnotesize{-1}};
\node [anchor=west,minimum width=1.5em,minimum size=1.5em,fill=ugreen!70] (cell03) at (cell02.east) {\footnotesize{6}};
\node [anchor=west,minimum width=1.5em,minimum size=1.5em,fill=ugreen!50] (cell04) at (cell03.east) {\footnotesize{5}};
\node [anchor=west,minimum width=1.5em,minimum size=1.5em,fill=ugreen!30] (cell05) at (cell04.east) {\footnotesize{.7}};
\node [anchor=west,minimum width=1.5em,minimum size=1.5em,fill=ugreen!10] (cell06) at (cell05.east) {\footnotesize{-2}};
}
%占位
\node (cell010) at ([xshift=-9em,yshift=0em]cell01.west){\quad};
%\rightarrow {}
\node [anchor=west,minimum width=1.5em,minimum size=1.5em] (cell07) at (cell06.east) {\hspace{0.07em}\footnotesize{$\longrightarrow$}};
\node [anchor=west,minimum width=1.5em,minimum size=1.5em] (cell08) at (cell06.east){\small{
\hspace{0.6em}
\begin{tabular}{l}
源语言句子的``表示''
\end{tabular}
}
};
{
\filldraw [fill=red!20,draw=white] (source.north west) -- (source.north east) -- ([xshift=-0.2em,yshift=-0.1em]hidden.south east) -- ([xshift=0.2em,yshift=-0.1em]hidden.south west);
}
{
\filldraw [fill=blue!20,draw=white] (target.south west) -- (target.south east) -- ([xshift=-0.2em,yshift=0.1em]hidden.north east) -- ([xshift=0.2em,yshift=0.1em]hidden.north west);
}
{
\draw [->,thick] (source.north west) -- ([xshift=0.2em,yshift=-0.1em]hidden.south west);
\draw [->,thick] (source.north east) -- ([xshift=-0.2em,yshift=-0.1em]hidden.south east);
}
{
\draw [->,thick] ([xshift=0.2em,yshift=0.1em]hidden.north west) -- (target.south west);
\draw [->,thick] ([xshift=-0.2em,yshift=0.1em]hidden.north east) -- (target.south east);
}
}
{
\node [anchor=south] (enclabel) at ([yshift=2em]source.north) {\small{\textbf{Encoder}}};
\node [anchor=north] (declabel) at ([yshift=-2em]target.south) {\small{\textbf{Decoder}}};
}
\end{scope}
\end{tikzpicture}
\ No newline at end of file
%---------------------------------------------------------
\begin{tikzpicture}
%\setlength{\mystep}{1.6em}
%%% a simple encoder-decoder model
\begin{scope}
\foreach \x in {1,2,...,6}
\node[] (s\x) at (\x * 1.6em,0) {};
\node [] (ws1) at (s1) {\scriptsize{}};
\node [] (ws2) at (s2) {\scriptsize{}};
\node [] (ws3) at (s3) {\scriptsize{}};
\node [] (ws4) at (s4) {\scriptsize{很长}};
\node [] (ws5) at (s5) {\scriptsize{}};
\node [] (ws6) at (s6) {\scriptsize{句子}};
\foreach \x in {1,2,...,6}
\node[] (t\x) at (\x * 1.6em + 2.4in,0) {};
\node [] (wt1) at (t1) {\scriptsize{This}};
\node [] (wt2) at (t2) {\scriptsize{is}};
\node [] (wt3) at ([yshift=-1pt]t3) {\scriptsize{a}};
\node [] (wt4) at ([yshift=-0.1em]t4) {\scriptsize{very}};
\node [] (wt5) at (t5) {\scriptsize{long}};
\node [] (wt6) at ([xshift=1em]t6) {\scriptsize{sentence}};
\node [anchor=south west,fill=red!30,minimum width=1.6in,minimum height=1.5em] (encoder) at ([yshift=1.0em]ws1.north west) {\footnotesize{Encoder}};
\node [anchor=west,fill=blue!30,minimum width=1.9in,minimum height=1.5em] (decoder) at ([xshift=4.5em]encoder.east) {\footnotesize{Decoder}};
\node [anchor=west,fill=green!30,minimum height=1.5em] (representation) at ([xshift=1em]encoder.east) {\footnotesize{表示}};
\draw [->,thick] ([xshift=1pt]encoder.east)--([xshift=-1pt]representation.west);
\draw [->,thick] ([xshift=1pt]representation.east)--([xshift=-1pt]decoder.west);
\foreach \x in {1,2,...,6}
\draw[->] ([yshift=0.1em]s\x.north) -- ([yshift=1.2em]s\x.north);
\foreach \x in {1,2,...,5}
\draw[<-] ([yshift=0.1em]t\x.north) -- ([yshift=1.2em]t\x.north);
\draw[<-] ([yshift=0.1em,xshift=1em]t6.north) -- ([yshift=1.2em,xshift=1em]t6.north);
\node [anchor=north] (cap) at ([xshift=2em,yshift=-2.5em]encoder.south east) {\small{(a) 简单的编码器-解码器框架}};
\end{scope}
%%% a encoder-decoder model with attention
\begin{scope}[yshift=-1.7in]
\foreach \x in {1,2,...,6}
\node[] (s\x) at (\x * 1.6em,0) {};
\node [] (ws1) at (s1) {\scriptsize{}};
\node [] (ws2) at (s2) {\scriptsize{}};
\node [] (ws3) at (s3) {\scriptsize{}};
\node [] (ws4) at (s4) {\scriptsize{很长}};
\node [] (ws5) at (s5) {\scriptsize{}};
\node [] (ws6) at (s6) {\scriptsize{句子}};
\foreach \x in {1,2,...,6}
\node[] (t\x) at (\x * 1.6em + 2.4in,0) {};
\node [] (wt1) at (t1) {\scriptsize{This}};
\node [] (wt2) at (t2) {\scriptsize{is}};
\node [] (wt3) at ([yshift=-1pt]t3) {\scriptsize{a}};
\node [] (wt4) at ([yshift=-0.1em]t4) {\scriptsize{very}};
\node [] (wt5) at (t5) {\scriptsize{long}};
\node [] (wt6) at ([xshift=1em]t6) {\scriptsize{sentence}};
\node [anchor=south west,fill=red!30,minimum width=1.6in,minimum height=1.5em] (encoder) at ([yshift=1.0em]ws1.north west) {\footnotesize{Encoder}};
\node [anchor=west,fill=blue!30,minimum width=1.9in,minimum height=1.5em] (decoder) at ([xshift=4.5em]encoder.east) {\footnotesize{Decoder}};
\foreach \x in {1,2,...,6}
\draw[->] ([yshift=0.1em]s\x.north) -- ([yshift=1.2em]s\x.north);
\foreach \x in {1,2,...,5}
\draw[<-] ([yshift=0.1em]t\x.north) -- ([yshift=1.2em]t\x.north);
\draw[<-] ([yshift=0.1em,xshift=1em]t6.north) -- ([yshift=1.2em,xshift=1em]t6.north);
\draw [->] ([yshift=3em]s6.north) -- ([yshift=4em]s6.north) -- ([yshift=4em]t1.north) node [pos=0.5,fill=green!30,inner sep=2pt] (c1) {\scriptsize{表示$\textbf{C}_1$}} -- ([yshift=3em]t1.north) ;
\draw [->] ([yshift=3em]s5.north) -- ([yshift=5.3em]s5.north) -- ([yshift=5.3em]t2.north) node [pos=0.5,fill=green!30,inner sep=2pt] (c2) {\scriptsize{表示$\textbf{C}_2$}} -- ([yshift=3em]t2.north) ;
\draw [->] ([yshift=3.5em]s3.north) -- ([yshift=6.6em]s3.north) -- ([yshift=6.6em]t4.north) node [pos=0.5,fill=green!30,inner sep=2pt] (c3) {\scriptsize{表示$\textbf{C}_i$}} -- ([yshift=3.5em]t4.north) ;
\node [anchor=north] (smore) at ([yshift=3.5em]s3.north) {...};
\node [anchor=north] (tmore) at ([yshift=3.5em]t4.north) {...};
\node [anchor=north] (cap) at ([xshift=2em,yshift=-2.5em]encoder.south east) {\small{(b) 引入注意力机制的编码器-解码器框架}};
\end{scope}
\end{tikzpicture}
\ No newline at end of file
%---------------------------------------
\begin{frame}{}
\begin{tcolorbox}[size=normal,left=2mm,right=1mm,colback=red!5!white,colframe=red!75!black,boxrule=1pt]
{
\small{古文:侍卫步军都指挥使、彰信节度使李继勋营于寿州城南,唐刘仁赡伺继勋无备,出兵击之,杀士卒数百人,焚其攻具。}
}
\end{tcolorbox}
\vspace{-0.4em}
\begin{tcolorbox}[size=normal,left=2mm,right=1mm,colback=blue!5!white,colframe=blue!75!black,boxrule=1pt]
{
\small{现代文:侍卫步军都指挥使、彰信节度使李继勋在寿州城南扎营,唐刘仁赡窥伺李继勋没有防备,出兵攻打他,杀死士兵几百人,烧毁李继勋的攻城器}
}
\end{tcolorbox}
\vspace{0.2em}
\begin{tcolorbox}[size=normal,left=2mm,right=1mm,colback=red!5!white,colframe=red!75!black,boxrule=1pt]
{
\small{古文:其后人稍稍识之,多延至其家,使为弟子论学。}
}
\end{tcolorbox}
\vspace{-0.4em}
\begin{tcolorbox}[size=normal,left=2mm,right=1mm,colback=blue!5!white,colframe=blue!75!black,boxrule=1pt]
{
\small{现代文:后来的人渐渐认识他,多把他请到家里,让他为弟子讲授学问。}
}
\end{tcolorbox}
\vspace{-0.8em}
\end{frame}
\ No newline at end of file
%-------------------------------------------
\begin{tikzpicture}
%\newlength{\mystep}
%\newlength{\wseg}
%\newlength{\hseg}
%\newlength{\wnode}
%\newlength{\hnode}
\setlength{\wseg}{1.5cm}
\setlength{\hseg}{1.0cm}
\setlength{\wnode}{3.75cm}
\setlength{\hnode}{1.0cm}
\tikzstyle{elementnode} = [rectangle,text=white,anchor=center]
\tikzstyle{srcnode} = [rotate=45,font=\small,anchor=south west]
\tikzstyle{tgtnode} = [left,font=\small,anchor=north east]
\tikzstyle{alignmentnode} = [rectangle,draw,minimum height=3.6\hnode,minimum width=0.36\hnode]
\tikzstyle{probnode} = [fill=blue!30,minimum width=0.4\hnode]
\tikzstyle{labelnode} = [above]
% alignment matrix
\begin{scope}[scale=0.9,yshift=0.12in]
\foreach \i / \j / \c in
{0/7/0.2, 1/7/0.45, 2/7/0.15, 3/7/0.15, 4/7/0.15, 5/7/0.15,
0/6/0.35, 1/6/0.45, 2/6/0.15, 3/6/0.15, 4/6/0.15, 5/6/0.15,
0/5/0.25, 1/5/0.15, 2/5/0.15, 3/5/0.35, 4/5/0.15, 5/5/0.15,
0/4/0.15, 1/4/0.25, 2/4/0.2, 3/4/0.30, 4/4/0.15, 5/4/0.15,
0/3/0.15, 1/3/0.15, 2/3/0.8, 3/3/0.25, 4/3/0.15, 5/3/0.25,
0/2/0.15, 1/2/0.15, 2/2/0.15, 3/2/0.15, 4/2/0.25, 5/2/0.3,
0/1/0.15, 1/1/0.15, 2/1/0.15, 3/1/0.15, 4/1/0.8, 5/1/0.15,
0/0/0.15, 1/0/0.15, 2/0/0.15, 3/0/0.15, 4/0/0.25, 5/0/0.60}
\node[elementnode,minimum size=0.6*\hnode*\c,inner sep=0.1pt,fill=blue] (a\i\j) at (0.5*\hnode*\i-5.4*0.5*\hnode,0.5*\hnode*\j-1.05*\hnode) {};
%attention score labels
\node[align=center] (l17) at (a17) {\scriptsize{{\color{white} .4}}};
\node[align=center] (l26) at (a06) {\scriptsize{{\color{white} .3}}};
\node[align=center] (l26) at (a16) {\scriptsize{{\color{white} .4}}};
\node[align=center] (l17) at (a35) {\scriptsize{{\color{white} .3}}};
\node[align=center] (l17) at (a34) {\tiny{{\color{white} .3}}};
\node[align=center] (l17) at (a23) {\small{{\color{white} .8}}};
\node[align=center] (l17) at (a41) {\small{{\color{white} .8}}};
\node[align=center] (l17) at (a50) {\small{{\color{white} .7}}};
% source
\node[srcnode] (src1) at (-5.4*0.5*\hnode,-1.05*\hnode+7.5*0.5*\hnode) {\scriptsize{Have}};
\node[srcnode] (src2) at ([xshift=0.5\hnode]src1.south west) {\scriptsize{you}};
\node[srcnode] (src3) at ([xshift=0.5\hnode]src2.south west) {\scriptsize{learned}};
\node[srcnode] (src4) at ([xshift=0.5\hnode]src3.south west) {\scriptsize{nothing}};
\node[srcnode] (src5) at ([xshift=0.5\hnode]src4.south west) {\scriptsize{?}};
\node[srcnode] (src6) at ([xshift=0.5\hnode]src5.south west) {\scriptsize{$\langle$eos$\rangle$}};
% target
\node[tgtnode] (tgt1) at (-6.0*0.5*\hnode,-1.05*\hnode+7.5*0.5*\hnode) {\scriptsize{}};
\node[tgtnode] (tgt2) at ([yshift=-0.5\hnode]tgt1.north east) {\scriptsize{什么}};
\node[tgtnode] (tgt3) at ([yshift=-0.5\hnode]tgt2.north east) {\scriptsize{}};
\node[tgtnode] (tgt4) at ([yshift=-0.5\hnode]tgt3.north east) {\scriptsize{}};
\node[tgtnode] (tgt5) at ([yshift=-0.5\hnode]tgt4.north east) {\scriptsize{}};
\node[tgtnode] (tgt6) at ([yshift=-0.5\hnode]tgt5.north east) {\scriptsize{}};
\node[tgtnode] (tgt7) at ([yshift=-0.5\hnode]tgt6.north east) {\scriptsize{?}};
\node[tgtnode] (tgt8) at ([yshift=-0.5\hnode]tgt7.north east) {\scriptsize{$\langle$eos$\rangle$}};
\end{scope}
%\visible<2->
{
% alignment rectangle 2
\node[alignmentnode, ugreen, anchor=north west] (alignment1) at ([xshift=-0.3em,yshift=0.4em]a07.north west) {};
}
%\visible<3->
{
% alignment rectangle 1
\node[alignmentnode, red, anchor=north west] (alignment2) at ([xshift=-0.1em,yshift=0.2em]a17.north west) {};
}
%\visible<3->
{
% alignment bars 2
\node[probnode,anchor=south west,minimum height=0.4\hnode,inner sep=0.1pt,fill=red!40,label=below:\scriptsize{$0.4$}] (attn21) at ([xshift=2.3\hnode,yshift=0.5\hnode]alignment2.east) {};
\node[probnode,anchor=south west,minimum height=0.4\hnode,inner sep=0.1pt,fill=red!40,label=below:\scriptsize{$0.4$}] (attn22) at ([xshift=1pt]attn21.south east) {};
\node[probnode,anchor=south west,minimum height=0.05\hnode,inner sep=0.1pt,fill=red!40,label=below:\scriptsize{$0$}] (attn23) at ([xshift=1pt]attn22.south east) {};
\node[probnode,anchor=south west,minimum height=0.1\hnode,inner sep=0.1pt,fill=red!40,label=below:\scriptsize{$0.1$}] (attn24) at ([xshift=1pt]attn23.south east) {};
\node[probnode,anchor=south west,minimum height=0.05\hnode,inner sep=0.1pt,fill=red!40,label=below:\scriptsize{$0$}] (attn25) at ([xshift=1pt]attn24.south east) {};
\node[probnode,anchor=south west,minimum height=0.05\hnode,inner sep=0.1pt,fill=red!40,label=below:\scriptsize{$...$}] (attn26) at ([xshift=1pt]attn25.south east) {};
}
%\visible<2->
{
% alignment bars 1
\node[probnode,anchor=south,minimum height=0.2\hnode,inner sep=0.1pt,fill=ugreen!40,label=below:\scriptsize{$0.2$}] (attn11) at ([xshift=2.5\hnode,yshift=-1em]alignment2.north east) {};
\node[probnode,anchor=south west,minimum height=0.3\hnode,inner sep=0.1pt,fill=ugreen!40,label=below:\scriptsize{$0.3$}] (attn12) at ([xshift=1pt]attn11.south east) {};
\node[probnode,anchor=south west,minimum height=0.2\hnode,inner sep=0.1pt,fill=ugreen!40,label=below:\scriptsize{$0.2$}] (attn13) at ([xshift=1pt]attn12.south east) {};
\node[probnode,anchor=south west,minimum height=0.05\hnode,inner sep=0.1pt,fill=ugreen!40,label=below:\scriptsize{$0$}] (attn14) at ([xshift=1pt]attn13.south east) {};
\node[probnode,anchor=south west,minimum height=0.05\hnode,inner sep=0.1pt,fill=ugreen!40,label=below:\scriptsize{$0$}] (attn15) at ([xshift=1pt]attn14.south east) {};
\node[probnode,anchor=south west,minimum height=0.05\hnode,inner sep=0.1pt,fill=ugreen!40,label=below:\scriptsize{$...$}] (attn16) at ([xshift=1pt]attn15.south east) {};
}
%\visible<3->
{
% coverage score formula node
\node [anchor=north west] (formula) at ([xshift=-0.3\hnode,yshift=-1.5\hnode]attn11.south) {\small{不同$\textbf{C}_j$所对应的源语言词的权重是不同的}};
\node [anchor=north west] (example) at (formula.south west) {\footnotesize{$\textbf{C}_2=0.4 \times \textbf{h}(\textrm{``你''}) + 0.4 \times \textbf{h}(\textrm{``什么''}) +$}};
\node [anchor=north west] (example2) at ([yshift=0.4em]example.south west) {\footnotesize{$\ \ \ \ \ \ \ \ 0 \times \textbf{h}(\textrm{``都''}) + 0.1 \times \textbf{h}(\textrm{`` 没''}) + ..$}};
}
%\visible<3->
{
% matrix -> attn2
\draw[->,red] ([xshift=0.1em,yshift=2.3em]alignment2.east).. controls +(east:1.9cm) and +(west:1.0cm) ..([xshift=-0.15\hnode,yshift=-1em]attn21.north west);
}
%\visible<2->
{
\draw[->,ugreen] ([xshift=0.1em,yshift=-1.2em]alignment1.north east)--([xshift=2.2\hnode,yshift=-1.2em]alignment2.north east);
}
%\visible<3->
{
% attn2 -> cov2
\draw[->] ([xshift=0.2\hnode,yshift=0.0\hnode]attn26.east)--([xshift=0.7\hnode,yshift=0]attn26.east) node[pos=0.5,above] (sum2) {\small{$\sum$}}; % 0.3 - 0.5 height of the
}
%\visible<2->
{
% attn1 -> cov1
\draw[->] ([xshift=0.2\hnode]attn16.east)--([xshift=0.7\hnode]attn16.east) node[pos=0.5,above] (sum1) {\small{$\sum$}};
}
% coverage score for each source word
%\visible<2->
{
\node[anchor=west] (sc1) at ([xshift=0.9\hnode]attn16.east) {$\textbf{C}_1 = \sum_{i=1}^{8} \alpha_{i1} \textbf{h}_{i}$};
}
%\visible<3->
{
\node[anchor=west] (sc2) at ([xshift=0.9\hnode,yshift=0.0\hnode]attn26.east) {$\textbf{C}_2 = \sum_{i=1}^{8} \alpha_{i2} \textbf{h}_{i}$};
}
\end{tikzpicture}
\ No newline at end of file
\begin{tikzpicture}
%第一段----------------------------------------------
%原文-------------
\node [pos=0.4,left,xshift=-36em,yshift=7.3em,font=\small] (original0) {原文:};
\node [pos=0.4,left,xshift=-2em,yshift=3.3em,font=\small] (original1) {
\begin{tabular}[t]{l}
\parbox{36em}{During Soviet times, if a city’s population topped one million, it would become eligible for its own metro. Planners wanted to brighten the lives of everyday Soviet citizens, and saw the metros, with their tens of thousands of daily passengers, as a singular opportunity to do so. In 1977, Tashkent, the capital of Uzbekistan, became the seventh Soviet city to have a metro built. Grand themes celebrating the history of Uzbekistan and the Soviet Union were brought to life, as art was commissioned and designers set to work. The stations reflected different themes, some with domed ceilings and painted tiles reminiscent of Uzbekistan’s Silk Road mosques, while others ...}
\end{tabular}
};
%译文1--------------mt1
\node[font=\small] (mt1) at ([xshift=0em,yshift=-9.1em]original0.south) {译文1:};
\node[font=\small] (ts1) at ([xshift=0em,yshift=-4em]original1.south) {
\begin{tabular}[t]{l}
\parbox{36em}{在苏联时代,如果一个城市的人口突破一百万,这将成为合资格为自己的地铁。规划者想去照亮每天的苏联公民的生命,看到地铁,与他们的数十每天数千乘客,作为一个独特的机会来这样做。1977年,塔什干,乌兹别克斯坦的首都,成了苏联第七城市建有地铁。宏大主题,庆祝乌兹别克斯坦和苏联的历史被带到生活,因为艺术是委托和设计师开始工作。车站反映了不同的主题,有的圆顶天花板和绘瓷砖让人想起乌兹别克斯坦是丝绸之路的清真寺,而另一些则装饰着...}
\end{tabular}
};
%{
%\draw[dotted,thick,ublue] ([xshift=10.3em,yshift=0.3em]mt8.south west)--%([xshift=-5.2em,yshift=-0.3em]ht8.north);
%}
\begin{pgfonlayer}{background}
{
\node[rectangle,draw=ublue, inner sep=0mm] [fit =(original0)(mt1)(mt1)(ts1)(original1)] {};
}
\end{pgfonlayer}
\end{tikzpicture}
\ No newline at end of file
\begin{tikzpicture}
%-------译文2
\node [pos=0.4,left,xshift=-36em,yshift=7.3em,font=\small] (original0) {译文2:};
\node [pos=0.4,left,xshift=-2em,yshift=4.5em,font=\small] (original1) {
\begin{tabular}[t]{l}
\parbox{36em}{在苏联时期,如果一个城市的人口超过一百万,它就有资格拥有自己的地铁。 规划者想要照亮日常苏联公民的生活,并把拥有数万名每日乘客的地铁看作是这样做的一个绝佳机会。 1977年,乌兹别克斯坦首都塔什干成为苏联第七个修建地铁的城市。 随着艺术的委托和设计师们的工作,乌兹别克斯坦和苏联历史的宏伟主题被赋予了生命力。 这些电台反映了不同的主题,有的有穹顶和彩砖,让人想起乌兹别克斯坦的丝绸之路清真寺,有的则用...}
\end{tabular}
};
\begin{pgfonlayer}{background}
{
\node[rectangle,draw=ublue, inner sep=0mm] [fit =(original0)(original1)] {};
}
\end{pgfonlayer}
\end{tikzpicture}
\ No newline at end of file
\begin{tikzpicture}
%第一段----------------------------------------------
%原文-------------
\node [pos=0.4,left,xshift=-36em,yshift=7.3em,font=\small] (original0) {原文:};
\node [pos=0.4,left,xshift=-2em,yshift=3.3em,font=\small] (original1) {
\begin{tabular}[t]{l}
\parbox{36em}{During Soviet times, if a city’s population topped one million, it would become eligible for its own metro. Planners wanted to brighten the lives of everyday Soviet citizens, and saw the metros, with their tens of thousands of daily passengers, as a singular opportunity to do so. In 1977, Tashkent, the capital of Uzbekistan, became the seventh Soviet city to have a metro built. Grand themes celebrating the history of Uzbekistan and the Soviet Union were brought to life, as art was commissioned and designers set to work. The stations reflected different themes, some with domed ceilings and painted tiles reminiscent of Uzbekistan’s Silk Road mosques, while others ...}
\end{tabular}
};
%译文1--------------mt1
\node[font=\small] (mt1) at ([xshift=0em,yshift=-9.1em]original0.south) {译文1:};
\node[font=\small] (ts1) at ([xshift=0em,yshift=-4em]original1.south) {
\begin{tabular}[t]{l}
\parbox{36em}{在苏联时代,如果一个城市的人口突破一百万,这将成为合资格为自己的地铁。规划者想去照亮每天的苏联公民的生命,看到地铁,与他们的数十每天数千乘客,作为一个独特的机会来这样做。1977年,塔什干,乌兹别克斯坦的首都,成了苏联第七城市建有地铁。宏大主题,庆祝乌兹别克斯坦和苏联的历史被带到生活,因为艺术是委托和设计师开始工作。车站反映了不同的主题,有的圆顶天花板和绘瓷砖让人想起乌兹别克斯坦是丝绸之路的清真寺,而另一些则装饰着...}
\end{tabular}
};
%译文2---------------mt2
\node[font=\small] (mt2) at ([xshift=0em,yshift=-6.7em]mt1.south) {译文2:};
\node[font=\small] (mt3) at ([xshift=0em,yshift=-4em]ts1.south) {
\begin{tabular}[t]{l}
\parbox{36em}{在苏联时期,如果一个城市的人口超过一百万,它就有资格拥有自己的地铁。 规划者想要照亮日常苏联公民的生活,并把拥有数万名每日乘客的地铁看作是这样做的一个绝佳机会。 1977年,乌兹别克斯坦首都塔什干成为苏联第七个修建地铁的城市。 随着艺术的委托和设计师们的工作,乌兹别克斯坦和苏联历史的宏伟主题被赋予了生命力。 这些电台反映了不同的主题,有的有穹顶和彩砖,让人想起乌兹别克斯坦的丝绸之路清真寺,有的则用...}
\end{tabular}
};
%{
%\draw[dotted,thick,ublue] ([xshift=10.3em,yshift=0.3em]mt8.south west)--%([xshift=-5.2em,yshift=-0.3em]ht8.north);
%}
\begin{pgfonlayer}{background}
{
\node[rectangle,draw=ublue, inner sep=0mm] [fit =(original0)(mt1)(mt3)(mt1)(ts1)(mt2)(original1)] {};
}
\end{pgfonlayer}
\end{tikzpicture}
\ No newline at end of file
%%% outline
%-------------------------------------------------------------------------
\begin{tikzpicture}
%第一段----------------------------------------------
%原文-------------
\node [pos=0.4,left,xshift=-32em,yshift=10em,font=\small] (original0) {输入文本:};
\node [pos=0.4,left,xshift=-2em,yshift=2.3em,font=\small] (original1) {
\begin{tabular}[t]{l}
\parbox{32em}{
Jenson Button was denied his 100th race for McLaren after an ERS prevented him from making it to the startline. It capped a miserable weekend for the Briton; his time in Bahrain plagued by reliability issues. Button spent much of the race on Twitter delivering his verdict as the action unfolded. ’Kimi is the man to watch,’ and ’loving the sparks’, were among his pearls of wisdom, but the tweet which courted the most attention was a rather mischievous one: ’Ooh is Lewis backing his team mate into Vettel?’ he quizzed after Rosberg accused Hamilton of pulling off such a manoeuvre in China. Jenson Button waves to the crowd ahead of the Bahrain Grand Prix which he failed to start Perhaps a career in the media beckons... Lewis Hamilton has out-qualified and finished ahead of Nico Rosberg at every race this season. Indeed Rosberg has now beaten his Mercedes team-mate only once in the 11 races since the pair infamously collided in Belgium last year. Hamilton secured the 36th win of his career in Bahrain and his 21st from pole position. Only Michael Schumacher (40), Ayrton Senna (29) and Sebastian Vettel (27) have more. (...)
}
\end{tabular}
};
%译文1--------------mt1
\node[font=\small] (mt1) at ([xshift=0em,yshift=-16.8em]original0.south) {系统生成\quad};
\node[font=\small] (mt-2) at ([xshift=0em,yshift=-0.5em]mt1.south) {的摘要:\quad };
\node[font=\small] (ts1) at ([xshift=0em,yshift=-3em]original1.south) {
\begin{tabular}[t]{l}
\parbox{32em}{
Button was denied his 100th race for McLaren. The ERS prevented him from making it to the start-line. Button was his team mate in the 11 races in Bahrain. He quizzed after Nico Rosberg accused Lewis Hamilton of pulling off such a manoeuvre in China.
}
\end{tabular}
};
%{
%\draw[dotted,thick,ublue] ([xshift=10.3em,yshift=0.3em]mt8.south west)--%([xshift=-5.2em,yshift=-0.3em]ht8.north);
%}
\begin{pgfonlayer}{background}
{
\node[rectangle,draw=ublue, inner sep=0mm] [fit =(original0)(mt1)(mt-2)(ts1)(original1)] {};
}
\end{pgfonlayer}
\end{tikzpicture}
\ No newline at end of file
%----------------------------------------------------------------------
\begin{tikzpicture}
\setlength{\base}{0.6cm}
\tikzstyle{auxnode} = [inner sep=0pt,outer sep=0pt,opacity=0,draw=red,fill=red,circle,minimum size=3pt]
\tikzstyle{wordnode} = [font=\scriptsize]
\tikzstyle{opnode} = [inner sep=0pt,outer sep=0pt,draw,fill=green!30!white,font=\scriptsize,minimum size=10pt]
\tikzstyle{standard} = [rounded corners=5pt,thick]
\tikzstyle{emph} = [rounded corners=5pt,thick,draw=red]
\tikzstyle{formulanode} = [font=\scriptsize,align=left,draw=red,rectangle,fill=red!10!white,rounded corners=2pt,drop shadow]
% Skeleton
\begin{scope}[every label/.append style={label distance=1pt,font=\tiny,inner sep=0pt,opacity=0}]
\coordinate (aux12) at (0,0);
%\node[auxnode,label={-45:12}] () at (aux12) {};
\coordinate (aux22) at ([yshift=\base]aux12);
%\node[auxnode,label={-45:22}] () at (aux22) {};
\coordinate (aux23) at ([xshift=0.5\base]aux22);
%\node[auxnode,label={-45:23}] () at (aux23) {};
\coordinate (aux32) at ([yshift=0.4\base]aux22);
%\node[auxnode,label={-45:32}] () at (aux32) {};
\ExtractX{$([xshift=\base]aux23)$}
\ExtractY{$([yshift=\base]aux32)$}
\coordinate (aux44) at (\XCoord,\YCoord);
%\node[auxnode,label={-45:44}] () at (aux44) {};
\coordinate (aux45) at ([xshift=\base]aux44);
%\node[auxnode,label={-45:45}] () at (aux45) {};
\coordinate (aux46) at ([xshift=1.3\base]aux45);
%\node[auxnode,label={-45:46}] () at (aux46) {};
\ExtractX{$(aux23)$}
\ExtractY{$([yshift=\base]aux44)$}
\coordinate (aux53) at (\XCoord,\YCoord);
%\node[auxnode,label={-45:53}] () at (aux53) {};
\coordinate (aux56) at ([yshift=\base]aux46);
%\node[auxnode,label={-45:56}] () at (aux56) {};
\ExtractX{$(aux45)$}
\ExtractY{$([yshift=0.5\base]aux56)$}
\coordinate (aux65) at (\XCoord,\YCoord);
%\node[auxnode,label={-45:65}] () at (aux65) {};
\ExtractX{$([xshift=-\base]aux12)$}
\ExtractY{$([yshift=\base]aux65)$}
\coordinate (aux71) at (\XCoord,\YCoord);
%\node[auxnode,label={-45:71}] () at (aux71) {};
\coordinate (aux75) at ([yshift=\base]aux65);
%\node[auxnode,label={-45:75}] () at (aux75) {};
\ExtractX{$(aux56)$}
\ExtractY{$(aux75)$}
\coordinate (aux76) at (\XCoord,\YCoord);
%\node[auxnode,label={-45:76}] () at (aux76) {};
\coordinate (aux78) at ([xshift=1.7\base]aux76);
%\node[auxnode,label={-45:78}] () at (aux78) {};
\coordinate (aux87) at ([shift={(0.7\base,1.3\base)}]aux76);
%\node[auxnode,label={-45:87}] () at (aux87) {};
\end{scope}
\begin{scope}
\node[opnode,circle,opacity=0] (r53) at (aux53) {};
\node[opnode,circle,opacity=0] (z56) at (aux56) {};
\node[opnode,circle,opacity=0] (z75) at (aux75) {};
\node[opnode,circle,opacity=0] (z76) at (aux76) {};
% reset gate
{
\draw[-latex,emph] (aux12) -- (aux32) -| (aux44) |- (r53);
\draw[emph] (aux71) -| (aux32) -| (aux44);
\node[opnode,circle,draw=red,thick] () at (aux44) {$\sigma$};
}
\end{scope}
\begin{scope}
\node[wordnode,anchor=south] () at (aux71) {$\mathbf{h}_{t-1}$};
\node[wordnode,anchor=west] () at (aux12) {$\mathbf{x}_t$};
\end{scope}
\node[] (tanh) at (aux46){};
\begin{pgfonlayer}{background}
\node[draw,very thick,rectangle,fill=blue!10!white,rounded corners=5pt,inner sep=6pt,fit=(aux22) (aux76) (z76) (tanh)] (GRU) {};
\end{pgfonlayer}
\end{tikzpicture}
\ No newline at end of file
%----------------------------------------------------------------------
\begin{tikzpicture}
\setlength{\base}{0.6cm}
\tikzstyle{auxnode} = [inner sep=0pt,outer sep=0pt,opacity=0,draw=red,fill=red,circle,minimum size=3pt]
\tikzstyle{wordnode} = [font=\scriptsize]
\tikzstyle{opnode} = [inner sep=0pt,outer sep=0pt,draw,fill=green!30!white,font=\scriptsize,minimum size=10pt]
\tikzstyle{standard} = [rounded corners=5pt,thick]
\tikzstyle{emph} = [rounded corners=5pt,thick,draw=red]
\tikzstyle{formulanode} = [font=\scriptsize,align=left,draw=red,rectangle,fill=red!10!white,rounded corners=2pt,drop shadow]
% Skeleton
\begin{scope}[every label/.append style={label distance=1pt,font=\tiny,inner sep=0pt,opacity=0}]
\coordinate (aux12) at (0,0);
%\node[auxnode,label={-45:12}] () at (aux12) {};
\coordinate (aux22) at ([yshift=\base]aux12);
%\node[auxnode,label={-45:22}] () at (aux22) {};
\coordinate (aux23) at ([xshift=0.5\base]aux22);
%\node[auxnode,label={-45:23}] () at (aux23) {};
\coordinate (aux32) at ([yshift=0.4\base]aux22);
%\node[auxnode,label={-45:32}] () at (aux32) {};
\ExtractX{$([xshift=\base]aux23)$}
\ExtractY{$([yshift=\base]aux32)$}
\coordinate (aux44) at (\XCoord,\YCoord);
%\node[auxnode,label={-45:44}] () at (aux44) {};
\coordinate (aux45) at ([xshift=\base]aux44);
%\node[auxnode,label={-45:45}] () at (aux45) {};
\coordinate (aux46) at ([xshift=1.3\base]aux45);
%\node[auxnode,label={-45:46}] () at (aux46) {};
\ExtractX{$(aux23)$}
\ExtractY{$([yshift=\base]aux44)$}
\coordinate (aux53) at (\XCoord,\YCoord);
%\node[auxnode,label={-45:53}] () at (aux53) {};
\coordinate (aux56) at ([yshift=\base]aux46);
%\node[auxnode,label={-45:56}] () at (aux56) {};
\ExtractX{$(aux45)$}
\ExtractY{$([yshift=0.5\base]aux56)$}
\coordinate (aux65) at (\XCoord,\YCoord);
%\node[auxnode,label={-45:65}] () at (aux65) {};
\ExtractX{$([xshift=-\base]aux12)$}
\ExtractY{$([yshift=\base]aux65)$}
\coordinate (aux71) at (\XCoord,\YCoord);
%\node[auxnode,label={-45:71}] () at (aux71) {};
\coordinate (aux75) at ([yshift=\base]aux65);
%\node[auxnode,label={-45:75}] () at (aux75) {};
\ExtractX{$(aux56)$}
\ExtractY{$(aux75)$}
\coordinate (aux76) at (\XCoord,\YCoord);
%\node[auxnode,label={-45:76}] () at (aux76) {};
\coordinate (aux78) at ([xshift=1.7\base]aux76);
%\node[auxnode,label={-45:78}] () at (aux78) {};
\coordinate (aux87) at ([shift={(0.7\base,1.3\base)}]aux76);
%\node[auxnode,label={-45:87}] () at (aux87) {};
\end{scope}
\begin{scope}
\node[opnode,circle,opacity=0] (r53) at (aux53) {};
\node[opnode,circle,opacity=0] (z56) at (aux56) {};
\node[opnode,circle,opacity=0] (z75) at (aux75) {};
\node[opnode,circle,opacity=0] (z76) at (aux76) {};
% reset gate
{
\draw[-latex,emph] (aux12) -- (aux32) -| (aux44) |- (r53);
\draw[emph] (aux71) -| (aux32) -| (aux44);
\node[opnode,circle,draw=red,thick] () at (aux44) {$\sigma$};
}
{
\draw[-latex,standard] (aux12) -- (aux32) -| (aux44) |- (r53);
\draw[standard] (aux71) -| (aux32) -| (aux44);
\node[opnode,circle] () at (aux44) {$\sigma$};
}
% update gate
{
\draw[-latex,emph] (aux12) -- (aux32) -| (aux45) -- (z75);
\draw[-latex,emph] (aux71) -| (aux32) -| (aux45) |- (z56);
\node[opnode,circle,draw=red,thick] () at (aux45) {$\sigma$};
\node[opnode,rectangle,rounded corners=2pt,inner sep=2pt,font=\tiny,draw=red,thick] () at (aux65) {$1-$};
}
\end{scope}
\begin{scope}
\node[wordnode,anchor=south] () at (aux71) {$\mathbf{h}_{t-1}$};
\node[wordnode,anchor=west] () at (aux12) {$\mathbf{x}_t$};
\end{scope}
\node[] (tanh) at (aux46){};
\begin{pgfonlayer}{background}
\node[draw,very thick,rectangle,fill=blue!10!white,rounded corners=5pt,inner sep=6pt,fit=(aux22) (aux76) (z76) (tanh)] (GRU) {};
\end{pgfonlayer}
\end{tikzpicture}
\ No newline at end of file
%----------------------------------------------------------------------
\begin{tikzpicture}
\setlength{\base}{0.6cm}
\tikzstyle{auxnode} = [inner sep=0pt,outer sep=0pt,opacity=0,draw=red,fill=red,circle,minimum size=3pt]
\tikzstyle{wordnode} = [font=\scriptsize]
\tikzstyle{opnode} = [inner sep=0pt,outer sep=0pt,draw,fill=green!30!white,font=\scriptsize,minimum size=10pt]
\tikzstyle{standard} = [rounded corners=5pt,thick]
\tikzstyle{emph} = [rounded corners=5pt,thick,draw=red]
\tikzstyle{formulanode} = [font=\scriptsize,align=left,draw=red,rectangle,fill=red!10!white,rounded corners=2pt,drop shadow]
% Skeleton
\begin{scope}[every label/.append style={label distance=1pt,font=\tiny,inner sep=0pt,opacity=0}]
\coordinate (aux12) at (0,0);
%\node[auxnode,label={-45:12}] () at (aux12) {};
\coordinate (aux22) at ([yshift=\base]aux12);
%\node[auxnode,label={-45:22}] () at (aux22) {};
\coordinate (aux23) at ([xshift=0.5\base]aux22);
%\node[auxnode,label={-45:23}] () at (aux23) {};
\coordinate (aux32) at ([yshift=0.4\base]aux22);
%\node[auxnode,label={-45:32}] () at (aux32) {};
\ExtractX{$([xshift=\base]aux23)$}
\ExtractY{$([yshift=\base]aux32)$}
\coordinate (aux44) at (\XCoord,\YCoord);
%\node[auxnode,label={-45:44}] () at (aux44) {};
\coordinate (aux45) at ([xshift=\base]aux44);
%\node[auxnode,label={-45:45}] () at (aux45) {};
\coordinate (aux46) at ([xshift=1.3\base]aux45);
%\node[auxnode,label={-45:46}] () at (aux46) {};
\ExtractX{$(aux23)$}
\ExtractY{$([yshift=\base]aux44)$}
\coordinate (aux53) at (\XCoord,\YCoord);
%\node[auxnode,label={-45:53}] () at (aux53) {};
\coordinate (aux56) at ([yshift=\base]aux46);
%\node[auxnode,label={-45:56}] () at (aux56) {};
\ExtractX{$(aux45)$}
\ExtractY{$([yshift=0.5\base]aux56)$}
\coordinate (aux65) at (\XCoord,\YCoord);
%\node[auxnode,label={-45:65}] () at (aux65) {};
\ExtractX{$([xshift=-\base]aux12)$}
\ExtractY{$([yshift=\base]aux65)$}
\coordinate (aux71) at (\XCoord,\YCoord);
%\node[auxnode,label={-45:71}] () at (aux71) {};
\coordinate (aux75) at ([yshift=\base]aux65);
%\node[auxnode,label={-45:75}] () at (aux75) {};
\ExtractX{$(aux56)$}
\ExtractY{$(aux75)$}
\coordinate (aux76) at (\XCoord,\YCoord);
%\node[auxnode,label={-45:76}] () at (aux76) {};
\coordinate (aux78) at ([xshift=1.7\base]aux76);
%\node[auxnode,label={-45:78}] () at (aux78) {};
\coordinate (aux87) at ([shift={(0.7\base,1.3\base)}]aux76);
%\node[auxnode,label={-45:87}] () at (aux87) {};
\end{scope}
\begin{scope}
\node[opnode,circle,opacity=0] (r53) at (aux53) {};
\node[opnode,circle,opacity=0] (z56) at (aux56) {};
\node[opnode,circle,opacity=0] (z75) at (aux75) {};
\node[opnode,circle,opacity=0] (z76) at (aux76) {};
% reset gate
{
\draw[-latex,emph] (aux12) -- (aux32) -| (aux44) |- (r53);
\draw[emph] (aux71) -| (aux32) -| (aux44);
\node[opnode,circle,draw=red,thick] () at (aux44) {$\sigma$};
}
{
\draw[-latex,standard] (aux12) -- (aux32) -| (aux44) |- (r53);
\draw[standard] (aux71) -| (aux32) -| (aux44);
\node[opnode,circle] () at (aux44) {$\sigma$};
}
% update gate
{
\draw[-latex,emph] (aux12) -- (aux32) -| (aux45) -- (z75);
\draw[-latex,emph] (aux71) -| (aux32) -| (aux45) |- (z56);
\node[opnode,circle,draw=red,thick] () at (aux45) {$\sigma$};
\node[opnode,rectangle,rounded corners=2pt,inner sep=2pt,font=\tiny,draw=red,thick] () at (aux65) {$1-$};
}
{
\draw[-latex,standard] (aux12) -- (aux32) -| (aux45) -- (z75);
\draw[-latex,standard] (aux71) -| (aux32) -| (aux45) |- (z56);
\node[opnode,circle] () at (aux45) {$\sigma$};
\node[opnode,rectangle,rounded corners=2pt,inner sep=2pt,font=\tiny] () at (aux65) {$1-$};
}
% hidden update
{
\draw[-latex,emph] (aux71) -- (aux78);
\draw[-latex,emph] (aux71) -| (aux87);
\draw[-latex,emph] (aux71) -| (aux53) -- (aux23) -| (aux46) -- (z76);
\draw[emph] (aux12) |- (aux23) -| (aux46);
\node[opnode,rectangle,rounded corners=2pt,inner sep=2pt,draw=red,thick] (tanh) at (aux46) {$\mathrm{tanh}$};
\node[opnode,circle,draw=red,thick] (a1) at (aux53) {};
\node[opnode,circle,draw=red,thick] (a2) at (aux56) {};
\node[opnode,circle,draw=red,thick] (a3) at (aux75) {};
\node[opnode,circle,draw=red,thick] (a4) at (aux76) {};
}
\end{scope}
\begin{scope}
\node[wordnode,anchor=south] () at (aux71) {$\mathbf{h}_{t-1}$};
\node[wordnode,anchor=west] () at (aux12) {$\mathbf{x}_t$};
{
\node[wordnode,anchor=east] () at (aux87) {$\mathbf{h}_{t}$};
\node[wordnode,anchor=south] () at (aux78) {$\mathbf{h}_{t}$};
}
\end{scope}
\begin{pgfonlayer}{background}
\node[draw,very thick,rectangle,fill=blue!10!white,rounded corners=5pt,inner sep=6pt,fit=(aux22) (aux76) (z76) (tanh)] (GRU) {};
\end{pgfonlayer}
%%new
\draw[-] (a1.north west) -- (a1.south east);
\draw[-] (a1.north east) -- (a1.south west);
\draw[-] (a2.north west) -- (a2.south east);
\draw[-] (a2.north east) -- (a2.south west);
\draw[-] (a3.north west) -- (a3.south east);
\draw[-] (a3.north east) -- (a3.south west);
\draw [-,line width=0.8pt] (a4.north) -- (a4.south);
\draw [-,line width=0.8pt] (a4.west) -- (a4.east);
\end{tikzpicture}
\ No newline at end of file
%-----------------------------------------------
\begin{tikzpicture}
\setlength{\base}{0.6cm}
\tikzstyle{wordnode} = [font=\scriptsize]
\tikzstyle{auxnode} = [inner sep=0pt,outer sep=0pt,opacity=0,draw=red,fill=red,circle,minimum size=3pt]
\tikzstyle{opnode} = [inner sep=0pt,outer sep=0pt,draw,fill=green!30!white,font=\scriptsize,minimum size=10pt]
\tikzstyle{standard} = [rounded corners=5pt,thick]
\tikzstyle{emph} = [rounded corners=5pt,thick,draw=red]
\tikzstyle{formulanode} = [font=\scriptsize,align=left,draw=red,rectangle,fill=red!10!white,rounded corners=2pt,drop shadow]
% Skeleton
\begin{scope}[every label/.append style={label distance=1pt,font=\tiny,inner sep=0pt,opacity=0}]
\coordinate (aux12) at (0,0);
%\node[auxnode,label={-45:12}] () at (aux12) {};
\coordinate (aux22) at ([yshift=\base]aux12);
%\node[auxnode,label={-45:22}] () at (aux22) {};
\coordinate (aux21) at ([xshift=-2\base]aux22);
%\node[auxnode,label={-45:21}] () at (aux21) {};
\coordinate (aux23) at ([xshift=\base]aux22);
%\node[auxnode,label={-45:23}] () at (aux23) {};
\coordinate (aux24) at ([xshift=\base]aux23);
%\node[auxnode,label={-45:24}] () at (aux24) {};
\coordinate (aux25) at ([xshift=\base]aux24);
%\node[auxnode,label={-45:25}] () at (aux25) {};
\coordinate (aux26) at ([xshift=\base]aux25);
%\node[auxnode,label={-45:26}] () at (aux26) {};
\coordinate (aux27) at ([xshift=\base]aux26);
%\node[auxnode,label={-45:27}] () at (aux27) {};
\coordinate (aux28) at ([xshift=\base]aux27);
%\node[auxnode,label={-45:28}] () at (aux28) {};
\coordinate (aux29) at ([xshift=2\base]aux28);
%\node[auxnode,label={-45:29}] () at (aux29) {};
\coordinate (aux33) at ([yshift=\base]aux23);
%\node[auxnode,label={-45:33}] () at (aux33) {};
\coordinate (aux34) at ([yshift=\base]aux24);
%\node[auxnode,label={-45:34}] () at (aux34) {};
\coordinate (aux35) at ([yshift=\base]aux25);
%\node[auxnode,label={-45:35}] () at (aux35) {};
\coordinate (aux37) at ([yshift=\base]aux27);
%\node[auxnode,label={-45:37}] () at (aux37) {};
\coordinate (aux45) at ([yshift=\base]aux35);
%\node[auxnode,label={-45:45}] () at (aux45) {};
\coordinate (aux55) at ([yshift=\base]aux45);
%\node[auxnode,label={-45:55}] () at (aux55) {};
\ExtractX{$(aux21)$}
\ExtractY{$(aux55)$}
\coordinate (aux51) at (\XCoord,\YCoord);
%\node[auxnode,label={-45:51}] () at (aux51) {};
\ExtractX{$(aux23)$}
\ExtractY{$(aux55)$}
\coordinate (aux53) at (\XCoord,\YCoord);
%\node[auxnode,label={-45:53}] () at (aux53) {};
\ExtractX{$(aux28)$}
\ExtractY{$(aux55)$}
\coordinate (aux58) at (\XCoord,\YCoord);
%\node[auxnode,label={-45:58}] () at (aux58) {};
\ExtractX{$(aux29)$}
\ExtractY{$(aux55)$}
\coordinate (aux59) at (\XCoord,\YCoord);
%\node[auxnode,label={-45:59}] () at (aux59) {};
\coordinate (aux68) at ([yshift=\base]aux58);
%\node[auxnode,label={-45:68}] () at (aux68) {};
\end{scope}
\begin{scope}
\node[opnode,circle,opacity=0] (f53) at (aux53) {};
\node[opnode,circle,opacity=0] (u55) at (aux55) {};
% forget gate
{
\draw[emph] (aux21) -- (aux23) -- (aux33);
\draw[-latex,emph] (aux12) -- (aux22) -- (aux23) -- (f53);
\node[opnode,circle,draw=red,thick] () at (aux33) {$\sigma$};
}
\end{scope}
\begin{scope}
\node[wordnode,anchor=south] () at ([xshift=0.5\base]aux21) {$\mathbf{h}_{t-1}$};
\node[wordnode,anchor=west] () at (aux12) {$\mathbf{x}_t$};
\node[wordnode,anchor=south] () at ([xshift=0.5\base]aux51) {$\mathbf{c}_{t-1}$};
\end{scope}
\node[ ] (o27) at (aux27) { };
\begin{pgfonlayer}{background}
\node[draw,very thick,rectangle,fill=blue!10!white,rounded corners=5pt,inner sep=4pt,fit=(aux22) (aux58) (u55) (o27)] (LSTM) {};
\end{pgfonlayer}
\end{tikzpicture}
\ No newline at end of file
%-----------------------------------------------
\begin{tikzpicture}
\setlength{\base}{0.6cm}
\tikzstyle{wordnode} = [font=\scriptsize]
\tikzstyle{auxnode} = [inner sep=0pt,outer sep=0pt,opacity=0,draw=red,fill=red,circle,minimum size=3pt]
\tikzstyle{opnode} = [inner sep=0pt,outer sep=0pt,draw,fill=green!30!white,font=\scriptsize,minimum size=10pt]
\tikzstyle{standard} = [rounded corners=5pt,thick]
\tikzstyle{emph} = [rounded corners=5pt,thick,draw=red]
\tikzstyle{formulanode} = [font=\scriptsize,align=left,draw=red,rectangle,fill=red!10!white,rounded corners=2pt,drop shadow]
% Skeleton
\begin{scope}[every label/.append style={label distance=1pt,font=\tiny,inner sep=0pt,opacity=0}]
\coordinate (aux12) at (0,0);
%\node[auxnode,label={-45:12}] () at (aux12) {};
\coordinate (aux22) at ([yshift=\base]aux12);
%\node[auxnode,label={-45:22}] () at (aux22) {};
\coordinate (aux21) at ([xshift=-2\base]aux22);
%\node[auxnode,label={-45:21}] () at (aux21) {};
\coordinate (aux23) at ([xshift=\base]aux22);
%\node[auxnode,label={-45:23}] () at (aux23) {};
\coordinate (aux24) at ([xshift=\base]aux23);
%\node[auxnode,label={-45:24}] () at (aux24) {};
\coordinate (aux25) at ([xshift=\base]aux24);
%\node[auxnode,label={-45:25}] () at (aux25) {};
\coordinate (aux26) at ([xshift=\base]aux25);
%\node[auxnode,label={-45:26}] () at (aux26) {};
\coordinate (aux27) at ([xshift=\base]aux26);
%\node[auxnode,label={-45:27}] () at (aux27) {};
\coordinate (aux28) at ([xshift=\base]aux27);
%\node[auxnode,label={-45:28}] () at (aux28) {};
\coordinate (aux29) at ([xshift=2\base]aux28);
%\node[auxnode,label={-45:29}] () at (aux29) {};
\coordinate (aux33) at ([yshift=\base]aux23);
%\node[auxnode,label={-45:33}] () at (aux33) {};
\coordinate (aux34) at ([yshift=\base]aux24);
%\node[auxnode,label={-45:34}] () at (aux34) {};
\coordinate (aux35) at ([yshift=\base]aux25);
%\node[auxnode,label={-45:35}] () at (aux35) {};
\coordinate (aux37) at ([yshift=\base]aux27);
%\node[auxnode,label={-45:37}] () at (aux37) {};
\coordinate (aux45) at ([yshift=\base]aux35);
%\node[auxnode,label={-45:45}] () at (aux45) {};
\coordinate (aux55) at ([yshift=\base]aux45);
%\node[auxnode,label={-45:55}] () at (aux55) {};
\ExtractX{$(aux21)$}
\ExtractY{$(aux55)$}
\coordinate (aux51) at (\XCoord,\YCoord);
%\node[auxnode,label={-45:51}] () at (aux51) {};
\ExtractX{$(aux23)$}
\ExtractY{$(aux55)$}
\coordinate (aux53) at (\XCoord,\YCoord);
%\node[auxnode,label={-45:53}] () at (aux53) {};
\ExtractX{$(aux28)$}
\ExtractY{$(aux55)$}
\coordinate (aux58) at (\XCoord,\YCoord);
%\node[auxnode,label={-45:58}] () at (aux58) {};
\ExtractX{$(aux29)$}
\ExtractY{$(aux55)$}
\coordinate (aux59) at (\XCoord,\YCoord);
%\node[auxnode,label={-45:59}] () at (aux59) {};
\coordinate (aux68) at ([yshift=\base]aux58);
%\node[auxnode,label={-45:68}] () at (aux68) {};
\end{scope}
\begin{scope}
\node[opnode,circle,opacity=0] (f53) at (aux53) {};
\node[opnode,circle,opacity=0] (u55) at (aux55) {};
% forget gate
{
\draw[emph] (aux21) -- (aux23) -- (aux33);
\draw[-latex,emph] (aux12) -- (aux22) -- (aux23) -- (f53);
\node[opnode,circle,draw=red,thick] () at (aux33) {$\sigma$};
}
{
\draw[standard] (aux21) -- (aux23) -- (aux33);
\draw[-latex,standard] (aux12) -- (aux22) -- (aux23) -- (f53);
\node[opnode,circle] () at (aux33) {$\sigma$};
}
% input gate
{
\node[opnode,circle] (i45) at (aux45) {};
\draw[-latex,emph] (aux21) -- (aux24) |- (i45);
\draw[-latex,emph] (aux21) -- (aux25) -- (u55);
\draw[emph] (aux12) -- (aux22) -- (aux23);
\node[opnode,circle,draw=red,thick] () at (aux34) {$\sigma$};
\node[opnode,rectangle,rounded corners=2pt,inner sep=2pt,draw=red,thick] () at (aux35) {$\mathrm{tanh}$};
\node[opnode,circle,draw=red,thick] (i45) at (aux45) {};
}
\end{scope}
\begin{scope}
\node[wordnode,anchor=south] () at ([xshift=0.5\base]aux21) {$\mathbf{h}_{t-1}$};
\node[wordnode,anchor=west] () at (aux12) {$\mathbf{x}_t$};
\node[wordnode,anchor=south] () at ([xshift=0.5\base]aux51) {$\mathbf{c}_{t-1}$};
\end{scope}
\node[ ] (o27) at (aux27) { };
\begin{pgfonlayer}{background}
\node[draw,very thick,rectangle,fill=blue!10!white,rounded corners=5pt,inner sep=4pt,fit=(aux22) (aux58) (u55) (o27)] (LSTM) {};
\end{pgfonlayer}
%%new
\draw[-] (i45.north west) -- (i45.south east);
\draw[-] (i45.north east) -- (i45.south west);
\end{tikzpicture}
\ No newline at end of file
%-----------------------------------------------
\begin{tikzpicture}
\setlength{\base}{0.6cm}
\tikzstyle{wordnode} = [font=\scriptsize]
\tikzstyle{auxnode} = [inner sep=0pt,outer sep=0pt,opacity=0,draw=red,fill=red,circle,minimum size=3pt]
\tikzstyle{opnode} = [inner sep=0pt,outer sep=0pt,draw,fill=green!30!white,font=\scriptsize,minimum size=10pt]
\tikzstyle{standard} = [rounded corners=5pt,thick]
\tikzstyle{emph} = [rounded corners=5pt,thick,draw=red]
\tikzstyle{formulanode} = [font=\scriptsize,align=left,draw=red,rectangle,fill=red!10!white,rounded corners=2pt,drop shadow]
% Skeleton
\begin{scope}[every label/.append style={label distance=1pt,font=\tiny,inner sep=0pt,opacity=0}]
\coordinate (aux12) at (0,0);
%\node[auxnode,label={-45:12}] () at (aux12) {};
\coordinate (aux22) at ([yshift=\base]aux12);
%\node[auxnode,label={-45:22}] () at (aux22) {};
\coordinate (aux21) at ([xshift=-2\base]aux22);
%\node[auxnode,label={-45:21}] () at (aux21) {};
\coordinate (aux23) at ([xshift=\base]aux22);
%\node[auxnode,label={-45:23}] () at (aux23) {};
\coordinate (aux24) at ([xshift=\base]aux23);
%\node[auxnode,label={-45:24}] () at (aux24) {};
\coordinate (aux25) at ([xshift=\base]aux24);
%\node[auxnode,label={-45:25}] () at (aux25) {};
\coordinate (aux26) at ([xshift=\base]aux25);
%\node[auxnode,label={-45:26}] () at (aux26) {};
\coordinate (aux27) at ([xshift=\base]aux26);
%\node[auxnode,label={-45:27}] () at (aux27) {};
\coordinate (aux28) at ([xshift=\base]aux27);
%\node[auxnode,label={-45:28}] () at (aux28) {};
\coordinate (aux29) at ([xshift=2\base]aux28);
%\node[auxnode,label={-45:29}] () at (aux29) {};
\coordinate (aux33) at ([yshift=\base]aux23);
%\node[auxnode,label={-45:33}] () at (aux33) {};
\coordinate (aux34) at ([yshift=\base]aux24);
%\node[auxnode,label={-45:34}] () at (aux34) {};
\coordinate (aux35) at ([yshift=\base]aux25);
%\node[auxnode,label={-45:35}] () at (aux35) {};
\coordinate (aux37) at ([yshift=\base]aux27);
%\node[auxnode,label={-45:37}] () at (aux37) {};
\coordinate (aux45) at ([yshift=\base]aux35);
%\node[auxnode,label={-45:45}] () at (aux45) {};
\coordinate (aux55) at ([yshift=\base]aux45);
%\node[auxnode,label={-45:55}] () at (aux55) {};
\ExtractX{$(aux21)$}
\ExtractY{$(aux55)$}
\coordinate (aux51) at (\XCoord,\YCoord);
%\node[auxnode,label={-45:51}] () at (aux51) {};
\ExtractX{$(aux23)$}
\ExtractY{$(aux55)$}
\coordinate (aux53) at (\XCoord,\YCoord);
%\node[auxnode,label={-45:53}] () at (aux53) {};
\ExtractX{$(aux28)$}
\ExtractY{$(aux55)$}
\coordinate (aux58) at (\XCoord,\YCoord);
%\node[auxnode,label={-45:58}] () at (aux58) {};
\ExtractX{$(aux29)$}
\ExtractY{$(aux55)$}
\coordinate (aux59) at (\XCoord,\YCoord);
%\node[auxnode,label={-45:59}] () at (aux59) {};
\coordinate (aux68) at ([yshift=\base]aux58);
%\node[auxnode,label={-45:68}] () at (aux68) {};
\end{scope}
\begin{scope}
\node[opnode,circle,opacity=0] (f53) at (aux53) {};
\node[opnode,circle,opacity=0] (u55) at (aux55) {};
% forget gate
{
\draw[emph] (aux21) -- (aux23) -- (aux33);
\draw[-latex,emph] (aux12) -- (aux22) -- (aux23) -- (f53);
\node[opnode,circle,draw=red,thick] () at (aux33) {$\sigma$};
}
{
\draw[standard] (aux21) -- (aux23) -- (aux33);
\draw[-latex,standard] (aux12) -- (aux22) -- (aux23) -- (f53);
\node[opnode,circle] () at (aux33) {$\sigma$};
}
% input gate
{
\node[opnode,circle] (i45) at (aux45) {};
\draw[-latex,emph] (aux21) -- (aux24) |- (i45);
\draw[-latex,emph] (aux21) -- (aux25) -- (u55);
\draw[emph] (aux12) -- (aux22) -- (aux23);
\node[opnode,circle,draw=red,thick] () at (aux34) {$\sigma$};
\node[opnode,rectangle,rounded corners=2pt,inner sep=2pt,draw=red,thick] () at (aux35) {$\mathrm{tanh}$};
\node[opnode,circle,draw=red,thick] (i45) at (aux45) {X};
}
{
\node[opnode,circle] (i45) at (aux45) {};
\draw[-latex,standard] (aux21) -- (aux24) |- (i45);
\draw[-latex,standard] (aux21) -- (aux25) -- (u55);
\node[opnode,circle] () at (aux34) {$\sigma$};
\node[opnode,rectangle,rounded corners=2pt,inner sep=2pt] () at (aux35) {$\mathrm{tanh}$};
\node[opnode,circle] (i45) at (aux45) {};
}
% cell update
{
\draw[-latex,emph] (aux51) -- (aux59);
\node[opnode,circle,draw=red,thick] (f53) at (aux53) {};
\node[opnode,circle,draw=red,thick] (u55) at (aux55) {};
}
\end{scope}
\begin{scope}
\node[wordnode,anchor=south] () at ([xshift=0.5\base]aux21) {$\mathbf{h}_{t-1}$};
\node[wordnode,anchor=west] () at (aux12) {$\mathbf{x}_t$};
\node[wordnode,anchor=south] () at ([xshift=0.5\base]aux51) {$\mathbf{c}_{t-1}$};
{
\node[wordnode,anchor=south] () at ([xshift=-0.5\base]aux59) {$\mathbf{c}_{t}$};
}
\end{scope}
\node[ ] (o27) at (aux27) { };
\begin{pgfonlayer}{background}
\node[draw,very thick,rectangle,fill=blue!10!white,rounded corners=5pt,inner sep=4pt,fit=(aux22) (aux58) (u55) (o27)] (LSTM) {};
\end{pgfonlayer}
%%new
\draw[-] (i45.north west) -- (i45.south east);
\draw[-] (i45.north east) -- (i45.south west);
\draw[-] (f53.north west) -- (f53.south east);
\draw[-] (f53.north east) -- (f53.south west);
\draw [-,line width=0.8pt] (u55.north) -- (u55.south);
\draw [-,line width=0.8pt] (u55.west) -- (u55.east);
\end{tikzpicture}
\ No newline at end of file
%-----------------------------------------------
\begin{tikzpicture}
\setlength{\base}{0.6cm}
\tikzstyle{wordnode} = [font=\scriptsize]
\tikzstyle{auxnode} = [inner sep=0pt,outer sep=0pt,opacity=0,draw=red,fill=red,circle,minimum size=3pt]
\tikzstyle{opnode} = [inner sep=0pt,outer sep=0pt,draw,fill=green!30!white,font=\scriptsize,minimum size=10pt]
\tikzstyle{standard} = [rounded corners=5pt,thick]
\tikzstyle{emph} = [rounded corners=5pt,thick,draw=red]
\tikzstyle{formulanode} = [font=\scriptsize,align=left,draw=red,rectangle,fill=red!10!white,rounded corners=2pt,drop shadow]
% Skeleton
\begin{scope}[every label/.append style={label distance=1pt,font=\tiny,inner sep=0pt,opacity=0}]
\coordinate (aux12) at (0,0);
%\node[auxnode,label={-45:12}] () at (aux12) {};
\coordinate (aux22) at ([yshift=\base]aux12);
%\node[auxnode,label={-45:22}] () at (aux22) {};
\coordinate (aux21) at ([xshift=-2\base]aux22);
%\node[auxnode,label={-45:21}] () at (aux21) {};
\coordinate (aux23) at ([xshift=\base]aux22);
%\node[auxnode,label={-45:23}] () at (aux23) {};
\coordinate (aux24) at ([xshift=\base]aux23);
%\node[auxnode,label={-45:24}] () at (aux24) {};
\coordinate (aux25) at ([xshift=\base]aux24);
%\node[auxnode,label={-45:25}] () at (aux25) {};
\coordinate (aux26) at ([xshift=\base]aux25);
%\node[auxnode,label={-45:26}] () at (aux26) {};
\coordinate (aux27) at ([xshift=\base]aux26);
%\node[auxnode,label={-45:27}] () at (aux27) {};
\coordinate (aux28) at ([xshift=\base]aux27);
%\node[auxnode,label={-45:28}] () at (aux28) {};
\coordinate (aux29) at ([xshift=2\base]aux28);
%\node[auxnode,label={-45:29}] () at (aux29) {};
\coordinate (aux33) at ([yshift=\base]aux23);
%\node[auxnode,label={-45:33}] () at (aux33) {};
\coordinate (aux34) at ([yshift=\base]aux24);
%\node[auxnode,label={-45:34}] () at (aux34) {};
\coordinate (aux35) at ([yshift=\base]aux25);
%\node[auxnode,label={-45:35}] () at (aux35) {};
\coordinate (aux37) at ([yshift=\base]aux27);
%\node[auxnode,label={-45:37}] () at (aux37) {};
\coordinate (aux45) at ([yshift=\base]aux35);
%\node[auxnode,label={-45:45}] () at (aux45) {};
\coordinate (aux55) at ([yshift=\base]aux45);
%\node[auxnode,label={-45:55}] () at (aux55) {};
\ExtractX{$(aux21)$}
\ExtractY{$(aux55)$}
\coordinate (aux51) at (\XCoord,\YCoord);
%\node[auxnode,label={-45:51}] () at (aux51) {};
\ExtractX{$(aux23)$}
\ExtractY{$(aux55)$}
\coordinate (aux53) at (\XCoord,\YCoord);
%\node[auxnode,label={-45:53}] () at (aux53) {};
\ExtractX{$(aux28)$}
\ExtractY{$(aux55)$}
\coordinate (aux58) at (\XCoord,\YCoord);
%\node[auxnode,label={-45:58}] () at (aux58) {};
\ExtractX{$(aux29)$}
\ExtractY{$(aux55)$}
\coordinate (aux59) at (\XCoord,\YCoord);
%\node[auxnode,label={-45:59}] () at (aux59) {};
\coordinate (aux68) at ([yshift=\base]aux58);
%\node[auxnode,label={-45:68}] () at (aux68) {};
\end{scope}
\begin{scope}
\node[opnode,circle,opacity=0] (f53) at (aux53) {};
\node[opnode,circle,opacity=0] (u55) at (aux55) {};
% forget gate
{
\draw[emph] (aux21) -- (aux23) -- (aux33);
\draw[-latex,emph] (aux12) -- (aux22) -- (aux23) -- (f53);
\node[opnode,circle,draw=red,thick] () at (aux33) {$\sigma$};
}
{
\draw[standard] (aux21) -- (aux23) -- (aux33);
\draw[-latex,standard] (aux12) -- (aux22) -- (aux23) -- (f53);
\node[opnode,circle] () at (aux33) {$\sigma$};
}
% input gate
{
\node[opnode,circle] (i45) at (aux45) {};
\draw[-latex,emph] (aux21) -- (aux24) |- (i45);
\draw[-latex,emph] (aux21) -- (aux25) -- (u55);
\draw[emph] (aux12) -- (aux22) -- (aux23);
\node[opnode,circle,draw=red,thick] () at (aux34) {$\sigma$};
\node[opnode,rectangle,rounded corners=2pt,inner sep=2pt,draw=red,thick] () at (aux35) {$\mathrm{tanh}$};
\node[opnode,circle,draw=red,thick] (i45) at (aux45) {X};
}
{
\node[opnode,circle] (i45) at (aux45) {};
\draw[-latex,standard] (aux21) -- (aux24) |- (i45);
\draw[-latex,standard] (aux21) -- (aux25) -- (u55);
\node[opnode,circle] () at (aux34) {$\sigma$};
\node[opnode,rectangle,rounded corners=2pt,inner sep=2pt] () at (aux35) {$\mathrm{tanh}$};
\node[opnode,circle] (i45) at (aux45) {};
}
% cell update
{
\draw[-latex,emph] (aux51) -- (aux59);
\node[opnode,circle,draw=red,thick] (f53) at (aux53) {X};
\node[opnode,circle,draw=red,thick] (u55) at (aux55) {\textbf{+}};
}
{
\draw[-latex,standard] (aux51) -- (aux59);
\node[opnode,circle] (f53) at (aux53) {};
\node[opnode,circle] (u55) at (aux55) {};
}
% output gate
{
\node[opnode,circle,draw=red,thick] (o27) at (aux27) {};
\draw[-latex,emph] (u55) -| (o27);
\draw[-latex,emph] (aux21) -- (o27);
\draw[emph] (aux12) -- (aux22) -- (aux23);
\node[opnode,circle,draw=red,thick] () at (aux26) {$\sigma$};
\node[opnode,rectangle,rounded corners=2pt,inner sep=2pt,draw=red,thick] () at (aux37) {$\mathrm{tanh}$};
\draw[-latex,emph] (o27) -- (aux29);
\draw[-latex,emph] (o27) -| (aux68);
}
\end{scope}
\begin{scope}
\node[wordnode,anchor=south] () at ([xshift=0.5\base]aux21) {$\mathbf{h}_{t-1}$};
\node[wordnode,anchor=west] () at (aux12) {$\mathbf{x}_t$};
\node[wordnode,anchor=south] () at ([xshift=0.5\base]aux51) {$\mathbf{c}_{t-1}$};
{
\node[wordnode,anchor=south] () at ([xshift=-0.5\base]aux59) {$\mathbf{c}_{t}$};
}
{
\node[wordnode,anchor=east] () at (aux68) {$\mathbf{h}_{t}$};
\node[wordnode,anchor=south] () at ([xshift=-0.5\base]aux29) {$\mathbf{h}_{t}$};
}
\end{scope}
\begin{pgfonlayer}{background}
\node[draw,very thick,rectangle,fill=blue!10!white,rounded corners=5pt,inner sep=4pt,fit=(aux22) (aux58) (u55) (o27)] (LSTM) {};
\end{pgfonlayer}
%%new
\draw[-] (i45.north west) -- (i45.south east);
\draw[-] (i45.north east) -- (i45.south west);
\draw[-] (f53.north west) -- (f53.south east);
\draw[-] (f53.north east) -- (f53.south west);
\draw [-,line width=0.8pt] (u55.north) -- (u55.south);
\draw [-,line width=0.8pt] (u55.west) -- (u55.east);
\draw[-] (o27.north west) -- (o27.south east);
\draw[-] (o27.north east) -- (o27.south west);
\end{tikzpicture}
\ No newline at end of file
%-------------------------------------------
\begin{tikzpicture}
%\setlength{\hnode}{1.2cm}
\tikzstyle{elementnode} = [rectangle,text=white,anchor=center]
\tikzstyle{srcnode} = [rotate=45,font=\small,anchor=south west]
\tikzstyle{tgtnode} = [left,font=\small,anchor=north east]
\tikzstyle{alignmentnode} = [rectangle,draw,minimum height=3.6cm,minimum width=0.36cm]
\tikzstyle{probnode} = [fill=blue!30,minimum width=0.4cm]
\tikzstyle{labelnode} = [above]
% alignment matrix
\begin{scope}[scale=0.9,yshift=0.12in]
\foreach \i / \j / \c in
{0/7/0.2, 1/7/0.45, 2/7/0.15, 3/7/0.15, 4/7/0.15, 5/7/0.15,
0/6/0.35, 1/6/0.45, 2/6/0.15, 3/6/0.15, 4/6/0.15, 5/6/0.15,
0/5/0.25, 1/5/0.15, 2/5/0.15, 3/5/0.35, 4/5/0.15, 5/5/0.15,
0/4/0.15, 1/4/0.25, 2/4/0.2, 3/4/0.30, 4/4/0.15, 5/4/0.15,
0/3/0.15, 1/3/0.15, 2/3/0.8, 3/3/0.25, 4/3/0.15, 5/3/0.25,
0/2/0.15, 1/2/0.15, 2/2/0.15, 3/2/0.15, 4/2/0.25, 5/2/0.3,
0/1/0.15, 1/1/0.15, 2/1/0.15, 3/1/0.15, 4/1/0.8, 5/1/0.15,
0/0/0.15, 1/0/0.15, 2/0/0.15, 3/0/0.15, 4/0/0.25, 5/0/0.60}
\node[elementnode,minimum size=0.6*1.2cm*\c,inner sep=0.1pt,fill=blue] (a\i\j) at (0.5*1.2cm*\i-5.4*0.5*1.2cm,0.5*1.2cm*\j-1.05*1.2cm) {};
%attention score labels
\node[align=center] (l17) at (a17) {\scriptsize{{\color{white} .4}}};
\node[align=center] (l26) at (a06) {\scriptsize{{\color{white} .3}}};
\node[align=center] (l26) at (a16) {\scriptsize{{\color{white} .4}}};
\node[align=center] (l17) at (a35) {\scriptsize{{\color{white} .3}}};
\node[align=center] (l17) at (a34) {\tiny{{\color{white} .3}}};
\node[align=center] (l17) at (a23) {\small{{\color{white} .8}}};
\node[align=center] (l17) at (a41) {\small{{\color{white} .8}}};
\node[align=center] (l17) at (a50) {\small{{\color{white} .7}}};
% source
\node[srcnode] (src1) at (-5.4*0.5*1.2cm,-1.05*1.2cm+7.5*0.5*1.2cm) {\scriptsize{Have}};
\node[srcnode] (src2) at ([xshift=0.6cm]src1.south west) {\scriptsize{you}};
\node[srcnode] (src3) at ([xshift=0.6cm]src2.south west) {\scriptsize{learned}};
\node[srcnode] (src4) at ([xshift=0.6cm]src3.south west) {\scriptsize{nothing}};
\node[srcnode] (src5) at ([xshift=0.6cm]src4.south west) {\scriptsize{?}};
\node[srcnode] (src6) at ([xshift=0.6cm]src5.south west) {\scriptsize{$\langle$eos$\rangle$}};
% target
\node[tgtnode] (tgt1) at (-6.0*0.5*1.2cm,-1.05*1.2cm+7.5*0.5*1.2cm) {\scriptsize{}};
\node[tgtnode] (tgt2) at ([yshift=-0.6cm]tgt1.north east) {\scriptsize{什么}};
\node[tgtnode] (tgt3) at ([yshift=-0.6cm]tgt2.north east) {\scriptsize{}};
\node[tgtnode] (tgt4) at ([yshift=-0.6cm]tgt3.north east) {\scriptsize{}};
\node[tgtnode] (tgt5) at ([yshift=-0.6cm]tgt4.north east) {\scriptsize{}};
\node[tgtnode] (tgt6) at ([yshift=-0.6cm]tgt5.north east) {\scriptsize{}};
\node[tgtnode] (tgt7) at ([yshift=-0.6cm]tgt6.north east) {\scriptsize{?}};
\node[tgtnode] (tgt8) at ([yshift=-0.6cm]tgt7.north east) {\scriptsize{$\langle$eos$\rangle$}};
\end{scope}
\end{tikzpicture}
%-------------------------------------------
\ No newline at end of file
%--------------------------------------------------------------------------
\begin{tikzpicture}
\setlength{\base}{0.9cm}
\tikzstyle{rnnnode} = [rounded corners=1pt,minimum size=0.5\base,draw,inner sep=0pt,outer sep=0pt]
\tikzstyle{wordnode} = [font=\tiny]
% RNN translation model
\begin{scope}[local bounding box=RNNMT]
% RNN Encoder
\coordinate (eemb0) at (0,0);
\foreach \x [count=\y from 0] in {1,2,...,10}
\node[rnnnode,minimum height=0.5\base,fill=green!30!white,anchor=west] (eemb\x) at ([xshift=0.4\base]eemb\y.east) {};
\foreach \x in {1,2,...,10}
\node[rnnnode,fill=blue!30!white,anchor=south] (enc\x) at ([yshift=0.5\base]eemb\x.north) {};
\node[wordnode,left=0.4\base of enc1] (init) {$0$};
\node[wordnode,below=0pt of eemb1] () {};
\node[wordnode,below=0pt of eemb2] () {知道};
\node[wordnode,below=0pt of eemb3] () {};
\node[wordnode,below=0pt of eemb4] () {北京站};
\node[wordnode,below=0pt of eemb5] () {};
\node[wordnode,below=0pt of eemb6] () {};
\node[wordnode,below=0pt of eemb7] () {怎么};
\node[wordnode,below=0pt of eemb8] () {};
\node[wordnode,below=0pt of eemb9] () {};
\node[wordnode,below=0pt of eemb10] () {$\langle$eos$\rangle$};
% RNN Decoder
\foreach \x in {1,2,...,10}
\node[rnnnode,minimum height=0.5\base,fill=green!30!white,anchor=south] (demb\x) at ([yshift=2\base]enc\x.north) {};
\foreach \x in {1,2,...,10}
\node[rnnnode,fill=blue!30!white,anchor=south] (dec\x) at ([yshift=0.5\base]demb\x.north) {};
\foreach \x in {1,2,...,10}
\node[rnnnode,minimum height=0.5\base,fill=red!30!white,anchor=south] (softmax\x) at ([yshift=0.5\base]dec\x.north) {};
% Decoder input words
\node[wordnode,below=0pt of demb1] (decwordin) {$\langle$sos$\rangle$};
\ExtractX{$(demb2.south)$}
\ExtractY{$(decwordin.base)$}
\node[wordnode,anchor=base] () at (\XCoord,\YCoord) {Do};
\ExtractX{$(demb3.south)$}
\ExtractY{$(decwordin.base)$}
\node[wordnode,anchor=base] () at (\XCoord,\YCoord) {you};
\ExtractX{$(demb4.south)$}
\ExtractY{$(decwordin.base)$}
\node[wordnode,anchor=base] () at (\XCoord,\YCoord) {know};
\ExtractX{$(demb5.south)$}
\ExtractY{$(decwordin.base)$}
\node[wordnode,anchor=base] () at (\XCoord,\YCoord) {the};
\ExtractX{$(demb6.south)$}
\ExtractY{$(decwordin.base)$}
\node[wordnode,anchor=base] () at (\XCoord,\YCoord) {way};
\ExtractX{$(demb7.south)$}
\ExtractY{$(decwordin.base)$}
\node[wordnode,anchor=base] () at (\XCoord,\YCoord) {to};
\ExtractX{$(demb8.south)$}
\ExtractY{$(decwordin.base)$}
\node[wordnode,anchor=base] () at (\XCoord,\YCoord) {Beijing};
\ExtractX{$(demb9.south)$}
\ExtractY{$(decwordin.base)$}
\node[wordnode,anchor=base] () at (\XCoord,\YCoord) {Railway};
\ExtractX{$(demb10.south)$}
\ExtractY{$(decwordin.base)$}
\node[wordnode,anchor=base] () at (\XCoord,\YCoord) {Station};
% Decoder output words
\node[wordnode,above=0pt of softmax1] (decwordout) {Do};
\ExtractX{$(softmax2.north)$}
\ExtractY{$(decwordout.base)$}
\node[wordnode,anchor=base] () at (\XCoord,\YCoord) {you};
\ExtractX{$(softmax3.north)$}
\ExtractY{$(decwordout.base)$}
\node[wordnode,anchor=base] () at (\XCoord,\YCoord) {know};
\ExtractX{$(softmax4.north)$}
\ExtractY{$(decwordout.base)$}
\node[wordnode,anchor=base] () at (\XCoord,\YCoord) {the};
\ExtractX{$(softmax5.north)$}
\ExtractY{$(decwordout.base)$}
\node[wordnode,anchor=base] () at (\XCoord,\YCoord) {way};
\ExtractX{$(softmax6.north)$}
\ExtractY{$(decwordout.base)$}
\node[wordnode,anchor=base] () at (\XCoord,\YCoord) {to};
\ExtractX{$(softmax7.north)$}
\ExtractY{$(decwordout.base)$}
\node[wordnode,anchor=base] () at (\XCoord,\YCoord) {Beijing};
\ExtractX{$(softmax8.north)$}
\ExtractY{$(decwordout.base)$}
\node[wordnode,anchor=base] () at (\XCoord,\YCoord) {Railway};
\ExtractX{$(softmax9.north)$}
\ExtractY{$(decwordout.base)$}
\node[wordnode,anchor=base] () at (\XCoord,\YCoord) {Station};
\ExtractX{$(softmax10.north)$}
\ExtractY{$(decwordout.base)$}
\node[wordnode,anchor=base] () at (\XCoord,\YCoord) {$\langle$eos$\rangle$};
% Connections
\draw[-latex'] (init.east) to (enc1.west);
\foreach \x in {1,2,...,10}
\draw[-latex'] (eemb\x) to (enc\x);
\foreach \x in {1,2,...,10}
\draw[-latex'] (demb\x) to (dec\x);
\foreach \x in {1,2,...,10}
\draw[-latex'] (dec\x.north) to ([yshift=0.5\base]dec\x.north);
\foreach \x [count=\y from 2] in {1,2,...,9}
{
\draw[-latex'] (enc\x.east) to (enc\y.west);
\draw[-latex'] (dec\x.east) to (dec\y.west);
}
\coordinate (bridge) at ([yshift=-1.2\base]demb2);
\draw[-latex'] (enc10.north) .. controls +(north:\base) and +(east:1.5\base) .. (bridge) .. controls +(west:2.5\base) and +(west:0.6\base) .. (dec1.west);
\end{scope}
% legend
\begin{scope}[shift={(10\base,2.5\base)}]
\node[rnnnode,minimum height=0.5\base,fill=green!30!white,label={[label distance=3pt,font=\scriptsize]0:词嵌入层}] (emb) at (0,0) {};
\node[rnnnode,fill=blue!30!white,anchor=north west,label={[label distance=3pt,font=\scriptsize]0:循环单元}] (rnn) at ([yshift=2\base]emb.south west) {};
\node[rnnnode,minimum height=0.5\base,fill=red!30!white,anchor=north west,label={[label distance=3pt,font=\scriptsize]0:输出层}] (softmax) at ([yshift=2\base]rnn.south west) {};
\node [anchor=north west] (softmax2) at ([xshift=0.6\base]softmax.south west) {\scriptsize{Softmax}};
\node [anchor=north west] (rnn2) at ([xshift=0.6\base]rnn.south west) {\scriptsize{LSTM}};
\node [anchor=west] (reprlabel) at ([xshift=1em]enc10.east) {\scriptsize{句子表示}};
\draw [->,dashed] (reprlabel.west) -- ([xshift=0.1em]enc10.east);
\node [rnnnode,fill=purple!30!white] at (enc10) {};
\end{scope}
\end{tikzpicture}
\ No newline at end of file
% not compatible with [scale=?]
%----------------------------------------------------
\begin{tikzpicture}
\begin{scope}[local bounding box=WMT]
\draw[->,thick] (0.4,0) to (9.5,0);
\draw[->,thick] (0.4,-0) to (0.4,4.3);
\draw[thick] (0.4,2) to (0.6,2);
\draw[thick] (0.4,4) to (0.6,4);
\node[font=\scriptsize] at (0,2) {10};
\node[font=\scriptsize] at (0,4) {20};
% 2015
\node[minimum width=0.5cm,thick,minimum height=7*0.2cm,fill=blue!30!white,inner sep=0pt,outer sep=0pt,anchor=south west] (smt2015) at (1.5*0.7,0.5pt) {};
\node[minimum width=0.5cm,thick,minimum height=2*0.2cm,fill=red!30!white,inner sep=0pt,outer sep=0pt,anchor=south west] (nmt2015) at (smt2015.south east) {};
\node[font=\scriptsize,anchor=north] () at ([yshift=-0.2em]smt2015.south east) {2015};
% 2016
\node[minimum width=0.5cm,thick,minimum height=3*0.2cm,fill=blue!30!white,inner sep=0pt,outer sep=0pt,anchor=south west] (smt2016) at ($(nmt2015.south east)+(0.7,0)$) {};
\node[minimum width=0.5cm,thick,minimum height=8*0.2cm,fill=red!30!white,inner sep=0pt,outer sep=0pt,anchor=south west] (nmt2016) at (smt2016.south east) {};
\node[font=\scriptsize,anchor=north] () at ([yshift=-0.2em]smt2016.south east) {2016};
% 2017
\node[minimum width=0.5cm,thick,minimum height=3*0.2cm,fill=blue!30!white,inner sep=0pt,outer sep=0pt,anchor=south west] (smt2017) at ($(nmt2016.south east)+(0.7,0)$) {};
\node[minimum width=0.5cm,thick,minimum height=13*0.2cm,fill=red!30!white,inner sep=0pt,outer sep=0pt,anchor=south west] (nmt2017) at (smt2017.south east) {};
\node[font=\scriptsize,anchor=north] () at ([yshift=-0.2em]smt2017.south east) {2017};
% 2018
\node[minimum width=0.5cm,thick,minimum height=0cm,draw,fill=blue!30!white,inner sep=0pt,outer sep=0pt,anchor=south west] (smt2018) at ($(nmt2017.south east)+(0.7,0)$) {};
\node[minimum width=0.5cm,thick,minimum height=14*0.2cm,fill=red!30!white,inner sep=0pt,outer sep=0pt,anchor=south west] (nmt2018) at (smt2018.south east) {};
\node[font=\scriptsize,anchor=north] () at ([yshift=-0.2em]smt2018.south east) {2018};
% 2019
\node[minimum width=0.5cm,thick,minimum height=0cm,draw,fill=blue!30!white,inner sep=0pt,outer sep=0pt,anchor=south west] (smt2019) at ($(nmt2018.south east)+(0.7,0)$) {};
\node[minimum width=0.5cm,thick,minimum height=21*0.2cm,fill=red!30!white,inner sep=0pt,outer sep=0pt,anchor=south west] (nmt2019) at (smt2019.south east) {};
\node[font=\scriptsize,anchor=north] () at ([yshift=-0.2em]smt2019.south east) {2019};
\end{scope}
% legend
\ExtractX{$(nmt2015.west)$}
\ExtractY{$(WMT.north)$}
\node[minimum width=0.5cm,rectangle,fill=blue!30!white,anchor=north west,label={[label distance=1pt,font=\scriptsize]0:统计机器翻译}] () at (\XCoord,\YCoord) {};
\ExtractX{$(nmt2017.west)$}
\node[minimum width=0.5cm,rectangle,fill=red!30!white,anchor=north west,label={[label distance=1pt,font=\scriptsize]0:神经机器翻译}] () at (\XCoord,\YCoord) {};
% \node[font=\normalsize,rotate=90] () at ([xshift=-1em]WMT.west) {数量};
\node[font=\normalsize] () at (0.4,4.5) {数量};
\node[font=\normalsize] () at (9.5,-0.3) {年份};
\end{tikzpicture}
\ No newline at end of file
% not compatible with [scale=?]
%-----------------------------------------------------------------
\begin{tikzpicture}
\setlength{\base}{0.9cm}
\tikzstyle{rnnnode} = [rounded corners=1pt,minimum height=0.5\base,minimum width=1\base,draw,inner sep=0pt,outer sep=0pt]
\tikzstyle{wordnode} = [font=\tiny]
% RNN translation model
\begin{scope}[local bounding box=RNNMT]
% RNN Encoder
\coordinate (eemb0) at (0,0);
\foreach \x [count=\y from 0] in {1,2,...,3}
\node[rnnnode,minimum height=0.5\base,fill=green!30!white,anchor=west] (eemb\x) at ([xshift=0.4\base]eemb\y.east) {\tiny{$e_x()$}};
\foreach \x in {1,2,...,3}
\node[rnnnode,fill=blue!30!white,anchor=south] (enc\x) at ([yshift=0.3\base]eemb\x.north) {};
\node[] (enclabel1) at (enc1) {\tiny{$\textbf{h}_{m-2}$}};
\node[] (enclabel2) at (enc2) {\tiny{$\textbf{h}_{m-1}$}};
\node[rnnnode,fill=purple!30!white] (enclabel3) at (enc3) {\tiny{$\textbf{h}_{m}$}};
\node[wordnode,left=0.4\base of enc1] (init1) {$\cdots$};
\node[wordnode,left=0.4\base of eemb1] (init2) {$\cdots$};
\node[wordnode,below=0pt of eemb1] () {};
\node[wordnode,below=0pt of eemb2] () {};
\node[wordnode,below=0pt of eemb3] () {$\langle$eos$\rangle$};
% RNN Decoder
\foreach \x in {1,2,...,3}
\node[rnnnode,minimum height=0.5\base,fill=green!30!white,anchor=south] (demb\x) at ([yshift=\base]enc\x.north) {\tiny{$e_y()$}};
\foreach \x in {1,2,...,3}
\node[rnnnode,fill=blue!30!white,anchor=south] (dec\x) at ([yshift=0.3\base]demb\x.north) {{\tiny{$\textbf{s}_\x$}}};
\foreach \x in {1,2,...,3}
\node[rnnnode,minimum height=0.5\base,fill=red!30!white,anchor=south] (softmax\x) at ([yshift=0.3\base]dec\x.north) {\tiny{Softmax}};
\node[wordnode,right=0.4\base of demb3] (end1) {$\cdots$};
\node[wordnode,right=0.4\base of dec3] (end2) {$\cdots$};
\node[wordnode,right=0.4\base of softmax3] (end3) {$\cdots$};
% Decoder input words
\node[wordnode,below=0pt of demb1] (decwordin) {$\langle$sos$\rangle$};
\ExtractX{$(demb2.south)$}
\ExtractY{$(decwordin.base)$}
\node[wordnode,anchor=base] () at (\XCoord,\YCoord) {Do};
\ExtractX{$(demb3.south)$}
\ExtractY{$(decwordin.base)$}
\node[wordnode,anchor=base] () at (\XCoord,\YCoord) {you};
% Decoder output words
\node[wordnode,above=0pt of softmax1] (decwordout) {Do};
\ExtractX{$(softmax2.north)$}
\ExtractY{$(decwordout.base)$}
\node[wordnode,anchor=base] () at (\XCoord,\YCoord) {you};
\ExtractX{$(softmax3.north)$}
\ExtractY{$(decwordout.base)$}
\node[wordnode,anchor=base] () at (\XCoord,\YCoord) {know};
% Connections
\draw[-latex'] (init1.east) to (enc1.west);
\draw[-latex'] (dec3.east) to (end2.west);
\foreach \x in {1,2,...,3}
\draw[-latex'] (eemb\x) to (enc\x);
\foreach \x in {1,2,...,3}
\draw[-latex'] (demb\x) to (dec\x);
\foreach \x in {1,2,...,3}
\draw[-latex'] (dec\x.north) to (softmax\x.south);
\foreach \x [count=\y from 2] in {1,2}
{
\draw[-latex'] (enc\x.east) to (enc\y.west);
\draw[-latex'] (dec\x.east) to (dec\y.west);
}
\coordinate (bridge) at ([yshift=0.4\base]enc2.north west);
\draw[-latex'] (enc3.north) .. controls +(north:0.3\base) and +(east:\base) .. (bridge) .. controls +(west:2.7\base) and +(west:0.3\base) .. (dec1.west);
\end{scope}
\begin{scope}
\coordinate (start) at (8.5\base,0.1\base);
\node [anchor=center,minimum width=5.7em,minimum height=1.3em,draw,rounded corners=0.3em] (hidden) at (start) {};
\node [anchor=west,minimum width=1em,minimum size=1em,fill=ugreen!20] (cell01) at ([xshift=0.2em]hidden.west) {\scriptsize{.2}};
\node [anchor=west,minimum width=1em,minimum size=1em,fill=ugreen!10] (cell02) at (cell01.east) {\scriptsize{-1}};
\node [anchor=west,minimum width=1em,minimum size=1em,fill=white] (cell03) at (cell02.east) {\scriptsize{$\cdots$}};
\node [anchor=west,minimum width=1em,minimum size=1em,fill=ugreen!50] (cell04) at (cell03.east) {\scriptsize{5}};
{
\node [anchor=south,minimum width=10.9em,minimum height=1.3em,draw,rounded corners=0.3em] (target) at ([yshift=1.5em]hidden.north) {};
\node [anchor=west,minimum width=1em,minimum size=1em,fill=ugreen!10] (cell11) at ([xshift=0.2em]target.west) {\scriptsize{-2}};
\node [anchor=west,minimum width=1em,minimum size=1em,fill=ugreen!10] (cell12) at (cell11.east) {\scriptsize{-1}};
\node [anchor=west,minimum width=1em,minimum size=1em,fill=ugreen!30] (cell13) at (cell12.east) {\scriptsize{.7}};
\node [anchor=west,minimum width=1em,minimum size=1em,fill=white] (cell14) at (cell13.east) {\scriptsize{$\cdots$}};
\node [anchor=west,minimum width=1em,minimum size=1em,fill=ugreen!70] (cell15) at (cell14.east) {\scriptsize{6}};
\node [anchor=west,minimum width=1em,minimum size=1em,fill=ugreen!10] (cell16) at (cell15.east) {\scriptsize{-3}};
\node [anchor=west,minimum width=1em,minimum size=1em,fill=ugreen!10] (cell17) at (cell16.east) {\scriptsize{-1}};
\node [anchor=west,minimum width=1em,minimum size=1em,fill=ugreen!20] (cell18) at (cell17.east) {\scriptsize{.2}};
}
{
\node [anchor=south,minimum width=1em,minimum height=0.2em,fill=ublue!80,inner sep=0pt] (label1) at ([yshift=2.5em]cell11.north) {};
\node [anchor=west,rotate=90,font=\tiny] (w1) at (label1.north) {$\langle$eos$\rangle$};
\node [anchor=south,minimum width=1em,minimum height=0.3em,fill=ublue!80,inner sep=0pt] (label2) at ([yshift=2.5em]cell12.north) {};
\node [anchor=west,rotate=90,font=\tiny] (w2) at (label2.north) {$\langle$sos$\rangle$};
\node [anchor=south,minimum width=1em,minimum height=0.5em,fill=ublue!80,inner sep=0pt] (label3) at ([yshift=2.5em]cell13.north) {};
\node [anchor=west,rotate=90,font=\tiny] (w3) at (label3.north) {Do};
\node [anchor=south,font=\scriptsize] (w4) at ([yshift=2.5em]cell14.north) {$\cdots$};
\node [anchor=south,minimum width=1em,minimum height=1em,fill=ublue!80,inner sep=0pt] (label5) at ([yshift=2.5em]cell15.north) {};
{
\node [anchor=west,rotate=90,font=\tiny] (w5) at (label5.north) {{\color{red} know}};
}
\node [anchor=south,minimum width=1em,minimum height=0.1em,fill=ublue!80,inner sep=0pt] (label6) at ([yshift=2.5em]cell16.north) {};
\node [anchor=west,rotate=90,font=\tiny] (w6) at (label6.north) {you};
\node [anchor=south,minimum width=1em,minimum height=0.3em,fill=ublue!80,inner sep=0pt] (label7) at ([yshift=2.5em]cell17.north) {};
\node [anchor=west,rotate=90,font=\tiny] (w7) at (label7.north) {?};
\node [anchor=south,minimum width=1em,minimum height=0.4em,fill=ublue!80,inner sep=0pt] (label8) at ([yshift=2.5em]cell18.north) {};
\node [anchor=west,rotate=90,font=\tiny] (w8) at (label8.north) {have};
}
{
\filldraw [fill=red!20,draw=white] (target.south west) -- (target.south east) -- ([xshift=-0.2em,yshift=0.1em]hidden.north east) -- ([xshift=0.2em,yshift=0.1em]hidden.north west);
\draw [->,thick] ([xshift=0.2em,yshift=0.1em]hidden.north west) -- (target.south west);
\draw [->,thick] ([xshift=-0.2em,yshift=0.1em]hidden.north east) -- (target.south east);
\node [anchor=south] () at ([yshift=0.3em]hidden.north) {\scriptsize{$\hat{\mathbf{s}}=\mathbf{Ws}$}};
}
{
\node [rounded corners=0.3em] (softmax) at ([yshift=1.25em]target.north) {\scriptsize{$p(\hat{s}_i)=\frac{e^{\hat{s}_i}}{\sum_j e^{\hat{s}_j}}$}};
\filldraw [fill=blue!20,draw=white] ([yshift=0.1em]cell11.north west) {[rounded corners=0.3em] -- (softmax.west)} -- (label1.south west) -- (label8.south east) {[rounded corners=0.3em] -- (softmax.east)} -- ([yshift=0.1em]cell18.north east) -- ([yshift=0.1em]cell11.north west);
\node [rounded corners=0.3em] (softmax) at ([yshift=1.25em]target.north) {\scriptsize{$p(\hat{s}_i)=\frac{e^{\hat{s}_i}}{\sum_j e^{\hat{s}_j}}$}};
}
\draw [-latex'] ([yshift=-0.3cm]hidden.south) to (hidden.south);
{
\draw [-latex'] (w5.east) to ([yshift=0.3cm]w5.east);
}
\coordinate (tmp) at ([yshift=-3pt]w5.east);
\node [draw=red,thick,densely dashed,rounded corners=3pt,inner sep=5pt,fit=(cell01) (cell11) (label1) (label8) (target) (hidden) (tmp)] (output) {};
\end{scope}
\draw [->,thick,densely dashed,red] ([yshift=-0.2em]softmax3.east) .. controls +(east:2\base) and +(west:\base) .. (output.west);
\end{tikzpicture}
\ No newline at end of file
\begin{tikzpicture}
\begin{scope}
\node [anchor=south west,draw,thick,red,minimum width=0.9in,minimum height=0.7in] (space1) at (0,0) {};
\node [anchor=south west,fill=blue,minimum width=0.1in,minimum height=0.1in] (unit1) at (0.2,0.8) {};
\node [anchor=south west,fill=ugreen,minimum width=0.1in,minimum height=0.1in] (unit2) at (0.7,0.3) {};
\node [anchor=south west,fill=blue,minimum width=0.1in,minimum height=0.1in] (unit3) at (1.3,1.3) {};
\node [anchor=south west,fill=ugreen,minimum width=0.1in,minimum height=0.1in] (unit4) at ([xshift=0.1em]unit3.south east) {};
\begin{pgfonlayer}{background}
\node [rectangle,inner sep=2pt,rounded corners=1pt,draw,thick] [fit = (unit3) (unit4)] (unitbox) {};
\end{pgfonlayer}
\draw [->] ([yshift=1pt]unit1.north) .. controls +(north:0.23) and +(west:0.2) .. ([yshift=0.2em,xshift=-1pt]unitbox.west);
\draw [->] ([xshift=1pt]unit2.east) .. controls +(east:0.5) and +(south:0.2) .. ([xshift=0.2em,yshift=-1pt]unitbox.south);
\node [anchor=south] (spacelabel1) at (space1.north) {\scriptsize{离散表示空间}};
\node [anchor=north] (captain1) at ([yshift=-0.5em]space1.south) {\scriptsize{(a) \textbf{统计机器翻译}}};
\end{scope}
\begin{scope}[xshift=1.3in]
\node [anchor=south west,draw,thick,red,minimum width=0.9in,minimum height=0.7in] (space1) at (0,0) {};
\node [anchor=south west,fill=blue,minimum width=0.1in,minimum height=0.1in] (unit1) at (0.2,0.8) {};
\node [anchor=south west,fill=ugreen,minimum width=0.1in,minimum height=0.1in] (unit2) at (0.7,0.3) {};
\node [anchor=south west,draw,thick,red,minimum width=0.9in,minimum height=0.7in] (space2) at (1.1in,0) {};
\node [anchor=south west,circle,fill=orange,minimum width=0.1in,minimum height=0.1in] (unit3) at (1.5in,1.3) {};
\draw [->] ([yshift=1pt]unit1.north) .. controls +(north:0.4) and +(west:2) .. ([yshift=0.0em,xshift=-1pt]unit3.west);
\draw [->] ([xshift=1pt]unit2.east) .. controls +(east:1.5) and +(south:1) .. ([xshift=0.0em,yshift=-1pt]unit3.south);
\node [anchor=south] (spacelabel1) at (space1.north) {\scriptsize{离散表示空间}};
\node [anchor=south] (spacelabel2) at (space2.north) {\scriptsize{连续表示空间}};
\node [anchor=north] (captain1) at ([yshift=-0.5em,xshift=1em]space1.south east) {\scriptsize{(b) \textbf{神经机器翻译}}};
\end{scope}
\end{tikzpicture}
\ No newline at end of file
\begin{tikzpicture}
\setlength{\base}{1.0em}
\tikzstyle{rnnnode} = [rounded corners=1pt,minimum size=1\base,draw,inner sep=0pt,outer sep=0pt,fill=blue!30!white]
\tikzstyle{wordnode} = [font=\footnotesize,align=center]
\begin{scope}
% rnn[layer][step]
\coordinate (rnn00) at (0,0);
\foreach \i [count=\j from 0] in {1,2,3}
\node[wordnode,font=\tiny] (rnn\i0) at ([yshift=2\base]rnn\j0) {$0$};
\foreach \i [count=\j from 0] in {1,2,...,4}
\coordinate (rnn0\i) at ([xshift=2\base]rnn0\j);
% step 1
{
\node[rnnnode] (rnn11) at ([xshift=2\base]rnn10) {};
\draw[-latex'] ([yshift=0.5\base]rnn01) to (rnn11);
\draw[-latex'] ([xshift=0.5\base]rnn10) to (rnn11);
}
% step 2
{
\node[rnnnode] (rnn12) at ([xshift=2\base]rnn11) {};
\node[rnnnode] (rnn21) at ([yshift=2\base]rnn11) {};
\draw[-latex'] ([yshift=0.5\base]rnn02) to (rnn12);
\draw[-latex'] ([xshift=0.5\base]rnn20) to (rnn21);
\draw[-latex'] (rnn11) to (rnn12);
\draw[-latex'] (rnn11) to (rnn21);
}
% step 3
{\footnotesize
\node[rnnnode] (rnn13) at ([xshift=2\base]rnn12) {};
\node[rnnnode] (rnn31) at ([yshift=2\base]rnn21) {};
\node[rnnnode] (rnn22) at ([xshift=2\base]rnn21) {};
\node[wordnode,anchor=south,font=\scriptsize] (o1) at ([yshift=\base]rnn31.north) {};
\draw[-latex'] ([yshift=0.5\base]rnn03) to (rnn13);
\draw[-latex'] ([xshift=0.5\base]rnn30) to (rnn31);
\draw[-latex'] (rnn12) to (rnn13);
\draw[-latex'] (rnn21) to (rnn31);
\draw[-latex'] (rnn12) to (rnn22);
\draw[-latex'] (rnn21) to (rnn22);
\draw[-latex'] (rnn31) to (o1);
}
% step 4
{\footnotesize
\node[rnnnode] (rnn14) at ([xshift=2\base]rnn13) {};
\node[rnnnode] (rnn23) at ([xshift=2\base]rnn22) {};
\node[rnnnode] (rnn32) at ([xshift=2\base]rnn31) {};
\node[wordnode,anchor=south,font=\scriptsize] (o2) at ([yshift=\base]rnn32.north) {不错};
\draw[-latex'] ([yshift=0.5\base]rnn04) to (rnn14);
\draw[-latex'] (rnn13) to (rnn14);
\draw[-latex'] (rnn13) to (rnn23);
\draw[-latex'] (rnn22) to (rnn23);
\draw[-latex'] (rnn22) to (rnn32);
\draw[-latex'] (rnn31) to (rnn32);
\draw[-latex'] (rnn32) to (o2);
}
% step 5
{\footnotesize
\node[rnnnode] (rnn24) at ([xshift=2\base]rnn23) {};
\node[rnnnode] (rnn33) at ([xshift=2\base]rnn32) {};
\node[wordnode,anchor=south,font=\scriptsize] (o3) at ([yshift=\base]rnn33.north) {};
\draw[-latex'] (rnn14) to (rnn24);
\draw[-latex'] (rnn23) to (rnn24);
\draw[-latex'] (rnn23) to (rnn33);
\draw[-latex'] (rnn32) to (rnn33);
\draw[-latex'] (rnn33) to (o3);
}
% step 6
{
\node[rnnnode] (rnn34) at ([xshift=2\base]rnn33) {};
\node[wordnode,anchor=south,font=\scriptsize] (o4) at ([yshift=\base]rnn34.north) {EOS};
\draw[-latex'] (rnn33) to (rnn34);
\draw[-latex'] (rnn24) to (rnn34);
\draw[-latex'] (rnn34) to (o4);
}
{
% frontier
\node[rnnnode,fill=purple] () at (rnn34) {};
\node[draw=red,thick,inner sep=7pt,rounded corners=0.3em,rotate fit=-45,label={[font=\scriptsize,align=center]90:正在运算的\\{\color{red} 循环单元}},fit=(rnn34)] () {};
}
% labels
{
\draw[decorate,decoration={brace}] ([yshift=-\base]rnn10.west) to node[wordnode,align=right,left,font=\scriptsize] {空闲的\\设备1} ([yshift=\base]rnn10.west);
}
{
\draw[decorate,decoration={brace}] ([yshift=-\base]rnn20.west) to node[wordnode,align=right,left,font=\scriptsize] {空闲的\\设备2} ([yshift=\base]rnn20.west);
}
{
\draw[decorate,decoration={brace}] ([yshift=-\base]rnn30.west) to node[wordnode,align=right,left,text=red,font=\scriptsize] {正在使用的\\设备3} ([yshift=\base]rnn30.west);
}
\foreach \i in {1,2,3}
\node[wordnode,font=\tiny,anchor=south west] () at ([yshift=-0.25em]rnn\i0.north west) {\i};
\node[wordnode,font=\scriptsize] () at (rnn01) {};
\node[wordnode,font=\scriptsize] () at (rnn02) {};
\node[wordnode,font=\scriptsize] () at (rnn03) {不错};
\node[wordnode,font=\scriptsize] () at ([xshift=0.25em]rnn04) {};
\end{scope}
\end{tikzpicture}
\ No newline at end of file
%-----------------------------------------
\begin{tikzpicture}
\setlength{\base}{0.9em}
\tikzstyle{rnnnode} = [rounded corners=1pt,minimum size=1\base,draw,inner sep=0pt,outer sep=0pt,fill=blue!30!white]
\tikzstyle{wordnode} = [font=\footnotesize,align=center]
\tikzstyle{test} = [rounded corners=1pt,minimum size=1\base,inner sep=0pt,outer sep=0pt]
\begin{scope}
% rnn[layer][step]
\coordinate (rnn00) at (0,0);
\foreach \i [count=\j from 0] in {1,2,3}
\node[wordnode] (rnn\i0) at ([yshift=2\base]rnn\j0) {$0$};
\foreach \i [count=\j from 0] in {1,2,...,4}
\coordinate (rnn0\i) at ([xshift=2\base]rnn0\j);
% step 1
{
\node[rnnnode] (rnn11) at ([xshift=2\base]rnn10) {};
\draw[-latex'] ([yshift=0.5\base]rnn01) to (rnn11);
\draw[-latex'] ([xshift=0.5\base]rnn10) to (rnn11);
}
{
% frontier
\node[rnnnode,fill=purple] () at (rnn11) {};
\node[draw=red,thick,inner sep=5pt,rounded corners=0.3em,rotate fit=-45,label={[font=\scriptsize,align=center]90:正在运算的\\{\color{red} 循环单元}},fit=(rnn11)] () {};
}
% 占位
{
\node[test] (z2) at ([xshift=2\base]rnn10) {};
\node[test] (z3) at ([xshift=2\base]z2) {};
\node[test] (z4) at ([xshift=2\base]z3) {};
\node[test] (z5) at ([xshift=2\base]z4) {};
\node[test] (z6) at ([xshift=2\base]z5) {};
\node[test] (z7) at ([xshift=2\base]z6) {};
%\node[test] (z8) at ([xshift=2\base]z7) {};
}
% labels
%\alt<1-4>
{
\draw[decorate,decoration={brace}] ([yshift=-\base]rnn10.west) to node[wordnode,align=right,left,text=red,font=\scriptsize] {正在使用的\\设备1} ([yshift=\base]rnn10.west);
}
%\alt<2-5>
{
\draw[decorate,decoration={brace}] ([yshift=-\base]rnn20.west) to node[wordnode,align=right,left,font=\scriptsize] {空闲的\\设备2} ([yshift=\base]rnn20.west);
}
%\alt<3-6>
{
\draw[decorate,decoration={brace}] ([yshift=-\base]rnn30.west) to node[wordnode,align=right,left,font=\scriptsize] {空闲的\\设备3} ([yshift=\base]rnn30.west);
}
\foreach \i in {1,2,3}
\node[wordnode,font=\tiny,anchor=south west] () at ([yshift=-0.25em]rnn\i0.north west) {\i};
\node[wordnode,font=\scriptsize] () at (rnn01) {};
\node[wordnode,font=\scriptsize] () at (rnn02) {};
\node[wordnode,font=\scriptsize] () at (rnn03) {不错};
\node[wordnode,font=\scriptsize] () at ([xshift=0.25em]rnn04) {};
\end{scope}
\end{tikzpicture}
\ No newline at end of file
%-----------------------------------------
\begin{tikzpicture}
\setlength{\base}{0.9em}
\tikzstyle{rnnnode} = [rounded corners=1pt,minimum size=1\base,draw,inner sep=0pt,outer sep=0pt,fill=blue!30!white]
\tikzstyle{wordnode} = [font=\footnotesize,align=center]
\tikzstyle{test} = [rounded corners=1pt,minimum size=1\base,inner sep=0pt,outer sep=0pt]
\begin{scope}
% rnn[layer][step]
\coordinate (rnn00) at (0,0);
\foreach \i [count=\j from 0] in {1,2,3}
\node[wordnode] (rnn\i0) at ([yshift=2\base]rnn\j0) {$0$};
\foreach \i [count=\j from 0] in {1,2,...,4}
\coordinate (rnn0\i) at ([xshift=2\base]rnn0\j);
% step 1
{
\node[rnnnode] (rnn11) at ([xshift=2\base]rnn10) {};
\draw[-latex'] ([yshift=0.5\base]rnn01) to (rnn11);
\draw[-latex'] ([xshift=0.5\base]rnn10) to (rnn11);
}
% step 2
{
\node[rnnnode] (rnn12) at ([xshift=2\base]rnn11) {};
\node[rnnnode] (rnn21) at ([yshift=2\base]rnn11) {};
\draw[-latex'] ([yshift=0.5\base]rnn02) to (rnn12);
\draw[-latex'] ([xshift=0.5\base]rnn20) to (rnn21);
\draw[-latex'] (rnn11) to (rnn12);
\draw[-latex'] (rnn11) to (rnn21);
}
{
% frontier
\node[rnnnode,fill=purple] () at (rnn12) {};
\node[rnnnode,fill=purple] () at (rnn21) {};
\node[draw=red,thick,inner sep=5pt,rounded corners=0.3em,rotate fit=-45,label={[font=\scriptsize,align=center]90:正在运算的\\{\color{red} 循环单元}},fit=(rnn12) (rnn21)] () {};
}
% 占位
{
\node[test] (z2) at ([xshift=2\base]rnn11) {};
\node[test] (z3) at ([xshift=2\base]z2) {};
\node[test] (z4) at ([xshift=2\base]z3) {};
\node[test] (z5) at ([xshift=2\base]z4) {};
\node[test] (z6) at ([xshift=2\base]z5) {};
\node[test] (z7) at ([xshift=2\base]z6) {};
}
% labels
{
\draw[decorate,decoration={brace}] ([yshift=-\base]rnn10.west) to node[wordnode,align=right,left,text=red,font=\scriptsize] {正在使用的\\设备1} ([yshift=\base]rnn10.west);
}
{
\draw[decorate,decoration={brace}] ([yshift=-\base]rnn20.west) to node[wordnode,align=right,left,text=red,font=\scriptsize] {正在使用的\\设备2} ([yshift=\base]rnn20.west);
}
{
\draw[decorate,decoration={brace}] ([yshift=-\base]rnn30.west) to node[wordnode,align=right,left,font=\scriptsize] {空闲的\\设备3} ([yshift=\base]rnn30.west);
}
\foreach \i in {1,2,3}
\node[wordnode,font=\tiny,anchor=south west] () at ([yshift=-0.25em]rnn\i0.north west) {\i};
\node[wordnode,font=\scriptsize] () at (rnn01) {};
\node[wordnode,font=\scriptsize] () at (rnn02) {};
\node[wordnode,font=\scriptsize] () at (rnn03) {不错};
\node[wordnode,font=\scriptsize] () at ([xshift=0.25em]rnn04) {};
\end{scope}
\end{tikzpicture}
\ No newline at end of file
%-----------------------------------------
\begin{tikzpicture}
\setlength{\base}{0.9em}
\tikzstyle{rnnnode} = [rounded corners=1pt,minimum size=1\base,draw,inner sep=0pt,outer sep=0pt,fill=blue!30!white]
\tikzstyle{wordnode} = [font=\footnotesize,align=center]
\begin{scope}
% rnn[layer][step]
\coordinate (rnn00) at (0,0);
\foreach \i [count=\j from 0] in {1,2,3}
\node[wordnode] (rnn\i0) at ([yshift=2\base]rnn\j0) {$0$};
\foreach \i [count=\j from 0] in {1,2,...,4}
\coordinate (rnn0\i) at ([xshift=2\base]rnn0\j);
% step 1
{
\node[rnnnode] (rnn11) at ([xshift=2\base]rnn10) {};
\draw[-latex'] ([yshift=0.5\base]rnn01) to (rnn11);
\draw[-latex'] ([xshift=0.5\base]rnn10) to (rnn11);
}
% step 2
{
\node[rnnnode] (rnn12) at ([xshift=2\base]rnn11) {};
\node[rnnnode] (rnn21) at ([yshift=2\base]rnn11) {};
\draw[-latex'] ([yshift=0.5\base]rnn02) to (rnn12);
\draw[-latex'] ([xshift=0.5\base]rnn20) to (rnn21);
\draw[-latex'] (rnn11) to (rnn12);
\draw[-latex'] (rnn11) to (rnn21);
}
% step 3
{
\node[rnnnode] (rnn13) at ([xshift=2\base]rnn12) {};
\node[rnnnode] (rnn31) at ([yshift=2\base]rnn21) {};
\node[rnnnode] (rnn22) at ([xshift=2\base]rnn21) {};
\node[wordnode,anchor=south] (o1) at ([yshift=\base]rnn31.north) {};
\draw[-latex'] ([yshift=0.5\base]rnn03) to (rnn13);
\draw[-latex'] ([xshift=0.5\base]rnn30) to (rnn31);
\draw[-latex'] (rnn12) to (rnn13);
\draw[-latex'] (rnn21) to (rnn31);
\draw[-latex'] (rnn12) to (rnn22);
\draw[-latex'] (rnn21) to (rnn22);
\draw[-latex'] (rnn31) to (o1);
}
{
% frontier
\node[rnnnode,fill=purple] () at (rnn13) {};
\node[rnnnode,fill=purple] () at (rnn31) {};
\node[rnnnode,fill=purple] () at (rnn22) {};
\node[draw=red,thick,inner sep=5pt,rounded corners=0.3em,rotate fit=-45,label={[font=\scriptsize,align=center]90:正在运算的\\{\color{red} 循环单元}},fit=(rnn13) (rnn31) (rnn22)] () {};
}
% labels
{
\draw[decorate,decoration={brace}] ([yshift=-\base]rnn10.west) to node[wordnode,align=right,left,text=red,font=\scriptsize] {正在使用的\\设备1} ([yshift=\base]rnn10.west);
}
{
\draw[decorate,decoration={brace}] ([yshift=-\base]rnn20.west) to node[wordnode,align=right,left,text=red,font=\scriptsize] {正在使用的\\设备2} ([yshift=\base]rnn20.west);
}
{
\draw[decorate,decoration={brace}] ([yshift=-\base]rnn30.west) to node[wordnode,align=right,left,text=red,font=\scriptsize] {正在使用的\\设备3} ([yshift=\base]rnn30.west);
}
\foreach \i in {1,2,3}
\node[wordnode,font=\tiny,anchor=south west] () at ([yshift=-0.25em]rnn\i0.north west) {\i};
\node[wordnode,font=\scriptsize] () at (rnn01) {};
\node[wordnode,font=\scriptsize] () at (rnn02) {};
\node[wordnode,font=\scriptsize] () at (rnn03) {不错};
\node[wordnode,font=\scriptsize] () at ([xshift=0.25em]rnn04) {};
\end{scope}
\end{tikzpicture}
\ No newline at end of file
%-----------------------------------------
\begin{tikzpicture}
\setlength{\base}{0.9em}
\tikzstyle{rnnnode} = [rounded corners=1pt,minimum size=1\base,draw,inner sep=0pt,outer sep=0pt,fill=blue!30!white]
\tikzstyle{wordnode} = [font=\footnotesize,align=center]
\begin{scope}
% rnn[layer][step]
\coordinate (rnn00) at (0,0);
\foreach \i [count=\j from 0] in {1,2,3}
\node[wordnode] (rnn\i0) at ([yshift=2\base]rnn\j0) {$0$};
\foreach \i [count=\j from 0] in {1,2,...,4}
\coordinate (rnn0\i) at ([xshift=2\base]rnn0\j);
% step 1
{
\node[rnnnode] (rnn11) at ([xshift=2\base]rnn10) {};
\draw[-latex'] ([yshift=0.5\base]rnn01) to (rnn11);
\draw[-latex'] ([xshift=0.5\base]rnn10) to (rnn11);
}
% step 2
{
\node[rnnnode] (rnn12) at ([xshift=2\base]rnn11) {};
\node[rnnnode] (rnn21) at ([yshift=2\base]rnn11) {};
\draw[-latex'] ([yshift=0.5\base]rnn02) to (rnn12);
\draw[-latex'] ([xshift=0.5\base]rnn20) to (rnn21);
\draw[-latex'] (rnn11) to (rnn12);
\draw[-latex'] (rnn11) to (rnn21);
}
% step 3
{
\node[rnnnode] (rnn13) at ([xshift=2\base]rnn12) {};
\node[rnnnode] (rnn31) at ([yshift=2\base]rnn21) {};
\node[rnnnode] (rnn22) at ([xshift=2\base]rnn21) {};
\node[wordnode,anchor=south] (o1) at ([yshift=\base]rnn31.north) {};
\draw[-latex'] ([yshift=0.5\base]rnn03) to (rnn13);
\draw[-latex'] ([xshift=0.5\base]rnn30) to (rnn31);
\draw[-latex'] (rnn12) to (rnn13);
\draw[-latex'] (rnn21) to (rnn31);
\draw[-latex'] (rnn12) to (rnn22);
\draw[-latex'] (rnn21) to (rnn22);
\draw[-latex'] (rnn31) to (o1);
}
% step 4
{
\node[rnnnode] (rnn14) at ([xshift=2\base]rnn13) {};
\node[rnnnode] (rnn23) at ([xshift=2\base]rnn22) {};
\node[rnnnode] (rnn32) at ([xshift=2\base]rnn31) {};
\node[wordnode,anchor=south] (o2) at ([yshift=\base]rnn32.north) {不错};
\draw[-latex'] ([yshift=0.5\base]rnn04) to (rnn14);
\draw[-latex'] (rnn13) to (rnn14);
\draw[-latex'] (rnn13) to (rnn23);
\draw[-latex'] (rnn22) to (rnn23);
\draw[-latex'] (rnn22) to (rnn32);
\draw[-latex'] (rnn31) to (rnn32);
\draw[-latex'] (rnn32) to (o2);
}
{
% frontier
\node[rnnnode,fill=purple] () at (rnn14) {};
\node[rnnnode,fill=purple] () at (rnn23) {};
\node[rnnnode,fill=purple] () at (rnn32) {};
\node[draw=red,thick,inner sep=5pt,rounded corners=0.3em,rotate fit=-45,label={[font=\scriptsize,align=center]90:正在运算的\\{\color{red} 循环单元}},fit=(rnn14) (rnn23) (rnn32)] () {};
}
% labels
{
\draw[decorate,decoration={brace}] ([yshift=-\base]rnn10.west) to node[wordnode,align=right,left,text=red,font=\scriptsize] {正在使用的\\设备1} ([yshift=\base]rnn10.west);
}
{
\draw[decorate,decoration={brace}] ([yshift=-\base]rnn20.west) to node[wordnode,align=right,left,text=red,font=\scriptsize] {正在使用的\\设备2} ([yshift=\base]rnn20.west);
}
{
\draw[decorate,decoration={brace}] ([yshift=-\base]rnn30.west) to node[wordnode,align=right,left,text=red,font=\scriptsize] {正在使用的\\设备3} ([yshift=\base]rnn30.west);
}
\foreach \i in {1,2,3}
\node[wordnode,font=\tiny,anchor=south west] () at ([yshift=-0.25em]rnn\i0.north west) {\i};
\node[wordnode,font=\scriptsize] () at (rnn01) {};
\node[wordnode,font=\scriptsize] () at (rnn02) {};
\node[wordnode,font=\scriptsize] () at (rnn03) {不错};
\node[wordnode,font=\scriptsize] () at ([xshift=0.25em]rnn04) {};
\node[wordnode,font=\scriptsize] () at ([xshift=0.25em]rnn04) {};
\end{scope}
\end{tikzpicture}
\ No newline at end of file
%-----------------------------------------
\begin{tikzpicture}
\setlength{\base}{0.9em}
\tikzstyle{rnnnode} = [rounded corners=1pt,minimum size=1\base,draw,inner sep=0pt,outer sep=0pt,fill=blue!30!white]
\tikzstyle{wordnode} = [font=\footnotesize,align=center]
\begin{scope}
% rnn[layer][step]
\coordinate (rnn00) at (0,0);
\foreach \i [count=\j from 0] in {1,2,3}
\node[wordnode] (rnn\i0) at ([yshift=2\base]rnn\j0) {$0$};
\foreach \i [count=\j from 0] in {1,2,...,4}
\coordinate (rnn0\i) at ([xshift=2\base]rnn0\j);
% step 1
{
\node[rnnnode] (rnn11) at ([xshift=2\base]rnn10) {};
\draw[-latex'] ([yshift=0.5\base]rnn01) to (rnn11);
\draw[-latex'] ([xshift=0.5\base]rnn10) to (rnn11);
}
% step 2
{
\node[rnnnode] (rnn12) at ([xshift=2\base]rnn11) {};
\node[rnnnode] (rnn21) at ([yshift=2\base]rnn11) {};
\draw[-latex'] ([yshift=0.5\base]rnn02) to (rnn12);
\draw[-latex'] ([xshift=0.5\base]rnn20) to (rnn21);
\draw[-latex'] (rnn11) to (rnn12);
\draw[-latex'] (rnn11) to (rnn21);
}
% step 3
{
\node[rnnnode] (rnn13) at ([xshift=2\base]rnn12) {};
\node[rnnnode] (rnn31) at ([yshift=2\base]rnn21) {};
\node[rnnnode] (rnn22) at ([xshift=2\base]rnn21) {};
\node[wordnode,anchor=south] (o1) at ([yshift=\base]rnn31.north) {};
\draw[-latex'] ([yshift=0.5\base]rnn03) to (rnn13);
\draw[-latex'] ([xshift=0.5\base]rnn30) to (rnn31);
\draw[-latex'] (rnn12) to (rnn13);
\draw[-latex'] (rnn21) to (rnn31);
\draw[-latex'] (rnn12) to (rnn22);
\draw[-latex'] (rnn21) to (rnn22);
\draw[-latex'] (rnn31) to (o1);
}
% step 4
{
\node[rnnnode] (rnn14) at ([xshift=2\base]rnn13) {};
\node[rnnnode] (rnn23) at ([xshift=2\base]rnn22) {};
\node[rnnnode] (rnn32) at ([xshift=2\base]rnn31) {};
\node[wordnode,anchor=south] (o2) at ([yshift=\base]rnn32.north) {不错};
\draw[-latex'] ([yshift=0.5\base]rnn04) to (rnn14);
\draw[-latex'] (rnn13) to (rnn14);
\draw[-latex'] (rnn13) to (rnn23);
\draw[-latex'] (rnn22) to (rnn23);
\draw[-latex'] (rnn22) to (rnn32);
\draw[-latex'] (rnn31) to (rnn32);
\draw[-latex'] (rnn32) to (o2);
}
% step 5
{
\node[rnnnode] (rnn24) at ([xshift=2\base]rnn23) {};
\node[rnnnode] (rnn33) at ([xshift=2\base]rnn32) {};
\node[wordnode,anchor=south] (o3) at ([yshift=\base]rnn33.north) {};
\draw[-latex'] (rnn14) to (rnn24);
\draw[-latex'] (rnn23) to (rnn24);
\draw[-latex'] (rnn23) to (rnn33);
\draw[-latex'] (rnn32) to (rnn33);
\draw[-latex'] (rnn33) to (o3);
}
{
% frontier
\node[rnnnode,fill=purple] () at (rnn24) {};
\node[rnnnode,fill=purple] () at (rnn33) {};
\node[draw=red,thick,inner sep=5pt,rounded corners=0.3em,rotate fit=-45,label={[font=\scriptsize,align=center]90:正在运算的\\{\color{red} 循环单元}},fit=(rnn24) (rnn33)] () {};
}
% labels
{
\draw[decorate,decoration={brace}] ([yshift=-\base]rnn10.west) to node[wordnode,align=right,left,font=\scriptsize] {空闲的\\设备1} ([yshift=\base]rnn10.west);
}
{
\draw[decorate,decoration={brace}] ([yshift=-\base]rnn20.west) to node[wordnode,align=right,left,text=red,font=\scriptsize] {正在使用的\\设备2} ([yshift=\base]rnn20.west);
}
{
\draw[decorate,decoration={brace}] ([yshift=-\base]rnn30.west) to node[wordnode,align=right,left,text=red,font=\scriptsize] {正在使用的\\设备3} ([yshift=\base]rnn30.west);
}
\foreach \i in {1,2,3}
\node[wordnode,font=\tiny,anchor=south west] () at ([yshift=-0.25em]rnn\i0.north west) {\i};
\node[wordnode,font=\scriptsize] () at (rnn01) {};
\node[wordnode,font=\scriptsize] () at (rnn02) {};
\node[wordnode,font=\scriptsize] () at (rnn03) {不错};
\node[wordnode,font=\scriptsize] () at ([xshift=0.25em]rnn04) {};
\end{scope}
\end{tikzpicture}
\ No newline at end of file
%-----------------------------------------
\begin{tikzpicture}
\setlength{\base}{0.9em}
\tikzstyle{rnnnode} = [rounded corners=1pt,minimum size=1\base,draw,inner sep=0pt,outer sep=0pt,fill=blue!30!white]
\tikzstyle{wordnode} = [font=\footnotesize,align=center]
\begin{scope}
% rnn[layer][step]
\coordinate (rnn00) at (0,0);
\foreach \i [count=\j from 0] in {1,2,3}
\node[wordnode] (rnn\i0) at ([yshift=2\base]rnn\j0) {$0$};
\foreach \i [count=\j from 0] in {1,2,...,4}
\coordinate (rnn0\i) at ([xshift=2\base]rnn0\j);
% step 1
{
\node[rnnnode] (rnn11) at ([xshift=2\base]rnn10) {};
\draw[-latex'] ([yshift=0.5\base]rnn01) to (rnn11);
\draw[-latex'] ([xshift=0.5\base]rnn10) to (rnn11);
}
% step 2
{
\node[rnnnode] (rnn12) at ([xshift=2\base]rnn11) {};
\node[rnnnode] (rnn21) at ([yshift=2\base]rnn11) {};
\draw[-latex'] ([yshift=0.5\base]rnn02) to (rnn12);
\draw[-latex'] ([xshift=0.5\base]rnn20) to (rnn21);
\draw[-latex'] (rnn11) to (rnn12);
\draw[-latex'] (rnn11) to (rnn21);
}
% step 3
{
\node[rnnnode] (rnn13) at ([xshift=2\base]rnn12) {};
\node[rnnnode] (rnn31) at ([yshift=2\base]rnn21) {};
\node[rnnnode] (rnn22) at ([xshift=2\base]rnn21) {};
\node[wordnode,anchor=south] (o1) at ([yshift=\base]rnn31.north) {};
\draw[-latex'] ([yshift=0.5\base]rnn03) to (rnn13);
\draw[-latex'] ([xshift=0.5\base]rnn30) to (rnn31);
\draw[-latex'] (rnn12) to (rnn13);
\draw[-latex'] (rnn21) to (rnn31);
\draw[-latex'] (rnn12) to (rnn22);
\draw[-latex'] (rnn21) to (rnn22);
\draw[-latex'] (rnn31) to (o1);
}
% step 4
{
\node[rnnnode] (rnn14) at ([xshift=2\base]rnn13) {};
\node[rnnnode] (rnn23) at ([xshift=2\base]rnn22) {};
\node[rnnnode] (rnn32) at ([xshift=2\base]rnn31) {};
\node[wordnode,anchor=south] (o2) at ([yshift=\base]rnn32.north) {不错};
\draw[-latex'] ([yshift=0.5\base]rnn04) to (rnn14);
\draw[-latex'] (rnn13) to (rnn14);
\draw[-latex'] (rnn13) to (rnn23);
\draw[-latex'] (rnn22) to (rnn23);
\draw[-latex'] (rnn22) to (rnn32);
\draw[-latex'] (rnn31) to (rnn32);
\draw[-latex'] (rnn32) to (o2);
}
% step 5
{
\node[rnnnode] (rnn24) at ([xshift=2\base]rnn23) {};
\node[rnnnode] (rnn33) at ([xshift=2\base]rnn32) {};
\node[wordnode,anchor=south] (o3) at ([yshift=\base]rnn33.north) {};
\draw[-latex'] (rnn14) to (rnn24);
\draw[-latex'] (rnn23) to (rnn24);
\draw[-latex'] (rnn23) to (rnn33);
\draw[-latex'] (rnn32) to (rnn33);
\draw[-latex'] (rnn33) to (o3);
}
% step 6
{
\node[rnnnode] (rnn34) at ([xshift=2\base]rnn33) {};
\node[wordnode,anchor=south] (o4) at ([yshift=\base]rnn34.north) {$\langle$eos$\rangle$};
\draw[-latex'] (rnn33) to (rnn34);
\draw[-latex'] (rnn24) to (rnn34);
\draw[-latex'] (rnn34) to (o4);
}
{
% frontier
\node[rnnnode,fill=purple] () at (rnn34) {};
\node[draw=red,thick,inner sep=5pt,rounded corners=0.3em,rotate fit=-45,label={[font=\scriptsize,align=center]90:正在运算的\\{\color{red} 循环单元}},fit=(rnn34)] () {};
}
% labels
{
\draw[decorate,decoration={brace}] ([yshift=-\base]rnn10.west) to node[wordnode,align=right,left,font=\scriptsize] {空闲的\\设备1} ([yshift=\base]rnn10.west);
}
{
\draw[decorate,decoration={brace}] ([yshift=-\base]rnn20.west) to node[wordnode,align=right,left,font=\scriptsize] {空闲的\\设备2} ([yshift=\base]rnn20.west);
}
{
\draw[decorate,decoration={brace}] ([yshift=-\base]rnn30.west) to node[wordnode,align=right,left,text=red,font=\scriptsize] {正在使用的\\设备3} ([yshift=\base]rnn30.west);
}
\foreach \i in {1,2,3}
\node[wordnode,font=\tiny,anchor=south west] () at ([yshift=-0.25em]rnn\i0.north west) {\i};
\node[wordnode,font=\scriptsize] () at (rnn01) {};
\node[wordnode,font=\scriptsize] () at (rnn02) {};
\node[wordnode,font=\scriptsize] () at (rnn03) {不错};
\node[wordnode,font=\scriptsize] () at ([xshift=0.25em]rnn04) {};
\end{scope}
\end{tikzpicture}
\ No newline at end of file
\begin{tikzpicture}
\begin{scope}
\tikzstyle{rnode} = [draw,minimum width=3.5em,minimum height=1.2em]
\node [rnode,anchor=south west,fill=red!20!white] (value1) at (0,0) {\scriptsize{$\textbf{h}(\textrm{``你''})$}};
\node [rnode,anchor=south west,fill=red!20!white] (value2) at ([xshift=1em]value1.south east) {\scriptsize{$\textbf{h}(\textrm{``什么''})$}};
\node [rnode,anchor=south west,fill=red!20!white] (value3) at ([xshift=1em]value2.south east) {\scriptsize{$\textbf{h}(\textrm{``也''})$}};
\node [rnode,anchor=south west,fill=red!20!white] (value4) at ([xshift=1em]value3.south east) {\scriptsize{$\textbf{h}(\textrm{``没''})$}};
\node [rnode,anchor=south west,fill=green!20!white] (key1) at ([yshift=0.2em]value1.north west) {\scriptsize{$\textbf{h}(\textrm{``你''})$}};
\node [rnode,anchor=south west,fill=green!20!white] (key2) at ([yshift=0.2em]value2.north west) {\scriptsize{$\textbf{h}(\textrm{``什么''})$}};
\node [rnode,anchor=south west,fill=green!20!white] (key3) at ([yshift=0.2em]value3.north west) {\scriptsize{$\textbf{h}(\textrm{``也''})$}};
\node [rnode,anchor=south west,fill=green!20!white] (key4) at ([yshift=0.2em]value4.north west) {\scriptsize{$\textbf{h}(\textrm{``没''})$}};
\node [rnode,anchor=east] (query) at ([xshift=-2em]key1.west) {\scriptsize{$\textbf{s}(\textrm{``you''})$}};
\node [anchor=east] (querylabel) at ([xshift=-0.2em]query.west) {\scriptsize{query}};
\draw [->] ([yshift=1pt,xshift=6pt]query.north) .. controls +(90:1em) and +(90:1em) .. ([yshift=1pt]key1.north);
\draw [->] ([yshift=1pt,xshift=3pt]query.north) .. controls +(90:1.5em) and +(90:1.5em) .. ([yshift=1pt]key2.north);
\draw [->] ([yshift=1pt]query.north) .. controls +(90:2em) and +(90:2em) .. ([yshift=1pt]key3.north);
\draw [->] ([yshift=1pt,xshift=-3pt]query.north) .. controls +(90:2.5em) and +(90:2.5em) .. ([yshift=1pt]key4.north);
\node [anchor=south east] (alpha1) at ([xshift=1em]key1.north east) {\scriptsize{$\alpha_1=.4$}};
\node [anchor=south east] (alpha2) at ([xshift=1em]key2.north east) {\scriptsize{$\alpha_2=.4$}};
\node [anchor=south east] (alpha3) at ([xshift=1em]key3.north east) {\scriptsize{$\alpha_3=0$}};
\node [anchor=south east] (alpha4) at ([xshift=1em]key4.north east) {\scriptsize{$\alpha_4=.1$}};
\end{scope}
\end{tikzpicture}
\ No newline at end of file
%-----------------------------------------------------
\begin{tikzpicture}
\begin{scope}
\tikzstyle{rnode} = [draw,minimum width=3em,minimum height=1.2em]
\node [rnode,anchor=south west,fill=blue!20!white] (value1) at (0,0) {\scriptsize{value$_1$}};
\node [rnode,anchor=south west,fill=blue!20!white] (value2) at ([xshift=1em]value1.south east) {\scriptsize{value$_2$}};
\node [rnode,anchor=south west,fill=red!20!white] (value3) at ([xshift=1em]value2.south east) {\scriptsize{value$_3$}};
\node [rnode,anchor=south west,fill=blue!20!white] (value4) at ([xshift=1em]value3.south east) {\scriptsize{value$_4$}};
\node [rnode,anchor=south west,pattern=north east lines] (key1) at ([yshift=0.2em]value1.north west) {};
\node [rnode,anchor=south west,pattern=dots] (key2) at ([yshift=0.2em]value2.north west) {};
\node [rnode,anchor=south west,pattern=horizontal lines] (key3) at ([yshift=0.2em]value3.north west) {};
\node [rnode,anchor=south west,pattern=crosshatch dots] (key4) at ([yshift=0.2em]value4.north west) {};
\node [fill=white,inner sep=1pt] (key1label) at (key1) {\scriptsize{key$_1$}};
\node [fill=white,inner sep=1pt] (key1label) at (key2) {\scriptsize{key$_2$}};
\node [fill=white,inner sep=1pt] (key1label) at (key3) {\scriptsize{key$_3$}};
\node [fill=white,inner sep=1pt] (key1label) at (key4) {\scriptsize{key$_4$}};
\node [rnode,anchor=east,pattern=horizontal lines] (query) at ([xshift=-3em]key1.west) {};
\node [anchor=east] (querylabel) at ([xshift=-0.2em]query.west) {\scriptsize{query}};
\draw [->] ([yshift=1pt]query.north) .. controls +(90:2em) and +(90:2em) .. ([yshift=1pt]key3.north) node [pos=0.5,below,yshift=0.2em] {\scriptsize{匹配}};
\node [anchor=north] (result) at (value3.south) {\scriptsize{ {\red 返回结果} }};
\end{scope}
\end{tikzpicture}
%-----------------------------------------------------
\begin{tikzpicture}
\begin{scope}
\tikzstyle{rnode} = [draw,minimum width=3em,minimum height=1.2em]
\node [rnode,anchor=south west,fill=red!20!white] (value1) at (0,0) {\scriptsize{value$_1$}};
\node [rnode,anchor=south west,fill=red!20!white] (value2) at ([xshift=1em]value1.south east) {\scriptsize{value$_2$}};
\node [rnode,anchor=south west,fill=red!20!white] (value3) at ([xshift=1em]value2.south east) {\scriptsize{value$_3$}};
\node [rnode,anchor=south west,fill=red!20!white] (value4) at ([xshift=1em]value3.south east) {\scriptsize{value$_4$}};
\node [rnode,anchor=south west,pattern=north east lines] (key1) at ([yshift=0.2em]value1.north west) {};
\node [rnode,anchor=south west,pattern=dots] (key2) at ([yshift=0.2em]value2.north west) {};
\node [rnode,anchor=south west,pattern=horizontal lines] (key3) at ([yshift=0.2em]value3.north west) {};
\node [rnode,anchor=south west,pattern=crosshatch dots] (key4) at ([yshift=0.2em]value4.north west) {};
\node [fill=white,inner sep=1pt] (key1label) at (key1) {\scriptsize{key$_1$}};
\node [fill=white,inner sep=1pt] (key1label) at (key2) {\scriptsize{key$_2$}};
\node [fill=white,inner sep=1pt] (key1label) at (key3) {\scriptsize{key$_3$}};
\node [fill=white,inner sep=1pt] (key1label) at (key4) {\scriptsize{key$_4$}};
\node [rnode,anchor=east,pattern=vertical lines] (query) at ([xshift=-3em]key1.west) {};
\node [anchor=east] (querylabel) at ([xshift=-0.2em]query.west) {\scriptsize{query}};
\draw [->] ([yshift=1pt,xshift=6pt]query.north) .. controls +(90:1em) and +(90:1em) .. ([yshift=1pt]key1.north);
\draw [->] ([yshift=1pt,xshift=3pt]query.north) .. controls +(90:1.5em) and +(90:1.5em) .. ([yshift=1pt]key2.north);
\draw [->] ([yshift=1pt]query.north) .. controls +(90:2em) and +(90:2em) .. ([yshift=1pt]key3.north);
\draw [->] ([yshift=1pt,xshift=-3pt]query.north) .. controls +(90:2.5em) and +(90:2.5em) .. ([yshift=1pt]key4.north);
\node [anchor=south east] (alpha1) at (key1.north east) {\scriptsize{$\alpha_1$}};
\node [anchor=south east] (alpha2) at (key2.north east) {\scriptsize{$\alpha_2$}};
\node [anchor=south east] (alpha3) at (key3.north east) {\scriptsize{$\alpha_3$}};
\node [anchor=south east] (alpha4) at (key4.north east) {\scriptsize{$\alpha_4$}};
\node [anchor=north] (result) at ([xshift=-1.5em]value2.south east) {\scriptsize{{\red 返回结果}=$\alpha_1 \cdot \textrm{value}_1 + \alpha_2 \cdot \textrm{value}_2 + \alpha_3 \cdot \textrm{value}_3 + \alpha_4 \cdot \textrm{value}_4$}};
\end{scope}
\end{tikzpicture}
\ No newline at end of file
\begin{tikzpicture}
\footnotesize{
\begin{axis}[
width=.60\textwidth,
height=.40\textwidth,
legend style={at={(0.60,0.08)}, anchor=south west},
xlabel={\scriptsize{更新次数}},
ylabel={\scriptsize{学习率}},
xtick=\empty,
ytick=\empty,
ylabel style={yshift=-2.5em},xlabel style={yshift=1.5em},
legend style={yshift=-6pt, legend plot pos=right,font=\scriptsize,cells={anchor=west}}
]
\addplot[orange,line width=1.25pt] coordinates {(329,0.000045) (447,0.000078) (540,0.00012) (661,0.0002) (752,0.00032) (856,0.00051) (975,0.00089) (996,0.001) (6599,0.001) (6624,0.0005) (7200,0.0005) (7218,0.00025) (7784,0.00025) (7821,0.000125) (8398,0.000125)};
\end{axis}
}
\end{tikzpicture}
\ No newline at end of file
%\definecolor{ublue}{rgb}{0.152,0.250,0.545}
\begin{tikzpicture}
\begin{axis}[
width=10cm, height=5cm,
symbolic x coords={1-15,16-25,26-35,>35},
xtick=data,
ytick={6,12,...,28},
xlabel={句子长度(范围)},
ylabel={$\%$\footnotesize{mTER}},
xlabel style={align=center},
ylabel style={},
y tick style={opacity=0},
x tick label style={font=\small},
y tick label style={font=\small},
tick align=inside,
%major grid style={draw=blue,dashed},
legend pos=outer north east,
%legend style={anchor=north west,yshift=-1cm},
legend style={yshift=-4.5em,xshift=-6em,legend cell align=left,legend plot pos=right},
ymin=6,
ymax=28]
\addplot [sharp plot,very thick,ublue,mark=diamond*] coordinates{(1-15,11.3) (16-25,16.4) (26-35,17) (>35,19.8)};
\addplot [sharp plot,very thick,red,mark=triangle*] coordinates{(1-15,14.4) (16-25,22.6) (26-35,23.8) (>35,25.9)};
\addplot [sharp plot,very thick,ugreen,mark=square*] coordinates{(1-15,14.9) (16-25,23.7) (26-35,24.7) (>35,26.4)};
\addplot [sharp plot,very thick,orange,mark=*] coordinates{(1-15,17.5) (16-25,24) (26-35,25) (>35,27)};
\legend{\tiny{NMT},\tiny{SPB},\tiny{HPB},\tiny{PBSY}}
\end{axis}
\end{tikzpicture}
%---------------------------------------------------------------------
\ No newline at end of file
\definecolor{ublue}{rgb}{0.152,0.250,0.545}
\begin{tikzpicture}
\begin{axis}[
width=8cm, height=5cm,
xtick={-6,-4,...,6},
ytick={0,0.5,1},
xlabel={\small{$x$}},
ylabel={\small{Softmax($x$)}},
xlabel style={xshift=3.0cm,yshift=1cm},
axis y line=middle,
ylabel style={xshift=-2.4cm,yshift=-0.2cm},
x axis line style={->},
axis line style={very thick},
% ymajorgrids,
%xmajorgrids,
axis x line*=bottom,
xmin=-6,
xmax=6,
ymin=0,
ymax=1]
\addplot[draw=ublue,very thick]{(tanh(x/2) + 1)/2};
\end{axis}
\end{tikzpicture}
%---------------------------------------------------------------------
\ No newline at end of file
%------------------------------------------------------------
\begin{tikzpicture}
\begin{scope}[scale=0.9]
{\Large
\tikzstyle{rnnnode} = [draw,inner sep=5pt,minimum width=3em,minimum height=0.8em,fill=green!30!white,blur shadow={shadow xshift=1pt,shadow yshift=-1pt}]
\node [anchor=west,rnnnode] (node11) at (0,0) {\scriptsize{RNN Cell}};
\node [anchor=west,rnnnode] (node12) at ([xshift=1em]node11.east) {\scriptsize{RNN Cell}};
\node [anchor=west,rnnnode] (node13) at ([xshift=1em]node12.east) {\scriptsize{RNN Cell}};
\node [anchor=west,rnnnode] (node14) at ([xshift=1em]node13.east) {\scriptsize{RNN Cell}};
\node [anchor=north,rnnnode,fill=blue!30!white] (e1) at ([yshift=-1em]node11.south) {\scriptsize{}};
\node [anchor=north,rnnnode,fill=blue!30!white] (e2) at ([yshift=-1em]node12.south) {\scriptsize{}};
\node [anchor=north,rnnnode,fill=blue!30!white] (e3) at ([yshift=-1em]node13.south) {\scriptsize{}};
\node [anchor=north,rnnnode,fill=blue!30!white] (e4) at ([yshift=-1em]node14.south) {\scriptsize{}};
\node [anchor=north,inner sep=2pt] (w1) at ([yshift=-1em]e1.south) {\footnotesize{$<$sos$>$}};
\node [anchor=north,inner sep=2pt] (w2) at ([yshift=-1em]e2.south) {\footnotesize{}};
\node [anchor=north,inner sep=2pt] (w3) at ([yshift=-1em]e3.south) {\footnotesize{我们}};
\node [anchor=north,inner sep=2pt] (w4) at ([yshift=-1em]e4.south) {\footnotesize{开始}};
\draw [->,thick] ([yshift=0.1em]w1.north)--([yshift=-0.1em]e1.south);
\draw [->,thick] ([yshift=0.1em]w2.north)--([yshift=-0.1em]e2.south);
\draw [->,thick] ([yshift=0.1em]w3.north)--([yshift=-0.1em]e3.south);
\draw [->,thick] ([yshift=0.1em]w4.north)--([yshift=-0.1em]e4.south);
\draw [->,thick] ([yshift=0.1em]e1.north)--([yshift=-0.1em]node11.south);
\draw [->,thick] ([yshift=0.1em]e2.north)--([yshift=-0.1em]node12.south);
\draw [->,thick] ([yshift=0.1em]e3.north)--([yshift=-0.1em]node13.south);
\draw [->,thick] ([yshift=0.1em]e4.north)--([yshift=-0.1em]node14.south);
\node [anchor=south,rnnnode,fill=blue!30!white] (node21) at ([yshift=1.0em]node11.north) {\scriptsize{}};
\node [anchor=south,rnnnode,fill=blue!30!white] (node22) at ([yshift=1.0em]node12.north) {\scriptsize{}};
\node [anchor=south,rnnnode,fill=blue!30!white] (node23) at ([yshift=1.0em]node13.north) {\scriptsize{}};
\node [anchor=south,rnnnode,fill=blue!30!white] (node24) at ([yshift=1.0em]node14.north) {\scriptsize{}};
\node [anchor=south] (output1) at ([yshift=1em]node21.north) {\Large{\textbf{}}};
\node [anchor=south] (output2) at ([yshift=1em]node22.north) {\Large{\textbf{}}};
\node [anchor=south] (output3) at ([yshift=1em]node23.north) {\Large{\textbf{}}};
\node [anchor=south] (output4) at ([yshift=1em]node24.north) {\Large{\textbf{}}};
\draw [->,thick] ([yshift=0.1em]node21.north)--([yshift=-0.1em]output1.south);
\draw [->,thick] ([yshift=0.1em]node22.north)--([yshift=-0.1em]output2.south);
\draw [->,thick] ([yshift=0.1em]node23.north)--([yshift=-0.1em]output3.south);
\draw [->,thick] ([yshift=0.1em]node24.north)--([yshift=-0.1em]output4.south);
\draw [->,thick] ([yshift=0.1em]node11.north)--([yshift=-0.1em]node21.south);
\draw [->,thick] ([yshift=0.1em]node12.north)--([yshift=-0.1em]node22.south);
\draw [->,thick] ([yshift=0.1em]node13.north)--([yshift=-0.1em]node23.south);
\draw [->,thick] ([yshift=0.1em]node14.north)--([yshift=-0.1em]node24.south);
\draw [->,thick] ([xshift=-1em]node11.west)--([xshift=-0.1em]node11.west);
\draw [->,thick] ([xshift=0.1em]node11.east)--([xshift=-0.1em]node12.west);
\draw [->,thick] ([xshift=0.1em]node12.east)--([xshift=-0.1em]node13.west);
\draw [->,thick] ([xshift=0.1em]node13.east)--([xshift=-0.1em]node14.west);
\draw [->,thick] ([xshift=0.1em]node14.east)--([xshift=1em]node14.east);
}
\end{scope}
\end{tikzpicture}
%-------------------------------------------------------------------
\begin{tikzpicture}
\setlength{\base}{0.25cm}
\tikzstyle{rnnnode} = [minimum height=1.1em,minimum width=1.1em,inner sep=2pt,rounded corners=1pt,draw,fill=red!20,font=\scriptsize];
\tikzstyle{wnode} = [minimum height=1.2em,inner sep=3pt,rounded corners=1pt,font=\scriptsize];
% Encoder
\begin{scope}
\node[rnnnode,fill=green!20] (encemb1) at (0,0) {};
\node[rnnnode,fill=green!20,right=\base of encemb1] (encemb2) {};
\node[rnnnode,draw=white,fill=white,right=\base of encemb2] (encemb3) {$\cdots$};
\node[rnnnode,fill=green!20,right=\base of encemb3] (encemb4) {};
\node[rnnnode,above=\base of encemb1] (enc11) {};
\node[rnnnode,above=\base of encemb2] (enc12) {};
\node[rnnnode,draw=white,fill=white,above=\base of encemb3] (enc13) {$\cdots$};
\node[rnnnode,above=\base of encemb4] (enc14) {};
\foreach \cur [count=\prev from 1] in {2,...,4}
{
\node[rnnnode,above=\base of enc\prev1] (enc\cur1) {};
\node[rnnnode,above=\base of enc\prev2] (enc\cur2) {};
\node[rnnnode,draw=white,fill=white,above=\base of enc\prev3] (enc\cur3) {$\cdots$};
\node[rnnnode,above=\base of enc\prev4] (enc\cur4) {};
}
\node[rnnnode,draw=white,fill=white,above=\base of enc41] (enc51) {$\cdots$};
\node[rnnnode,draw=white,fill=white,above=\base of enc42] (enc52) {$\cdots$};
\node[rnnnode,draw=white,fill=white,above=\base of enc43] (enc53) {};
\node[rnnnode,draw=white,fill=white,above=\base of enc44] (enc54) {$\cdots$};
\node[rnnnode,above=\base of enc51] (enc61) {};
\node[rnnnode,above=\base of enc52] (enc62) {};
\node[rnnnode,draw=white,fill=white,above=\base of enc53] (enc63) {$\cdots$};
\node[rnnnode,above=\base of enc54] (enc64) {};
% words
\node[wnode,below=0pt of encemb1] (encword1) {};
\node[wnode,below=0pt of encemb2] (encword2) {什么};
\node[wnode,below=0pt of encemb4] (encword4) {$\langle$eos$\rangle$};
% connections
\draw[-latex'] (enc11) to (enc12);
\draw[-latex'] (enc12) to (enc13);
\draw[-latex'] (enc13) to (enc14);
\draw[-latex'] (enc24) to (enc23);
\draw[-latex'] (enc23) to (enc22);
\draw[-latex'] (enc22) to (enc21);
\draw[-latex'] (enc31) to (enc32);
\draw[-latex'] (enc32) to (enc33);
\draw[-latex'] (enc33) to (enc34);
\draw[-latex'] (enc41) to (enc42);
\draw[-latex'] (enc42) to (enc43);
\draw[-latex'] (enc43) to (enc44);
\draw[-latex'] (enc61) to (enc62);
\draw[-latex'] (enc62) to (enc63);
\draw[-latex'] (enc63) to (enc64);
\draw[-latex'] (encemb1) to (enc11);
\draw[-latex'] (encemb2) to (enc12);
\draw[-latex'] (encemb4) to (enc14);
\draw[-latex'] ([xshift=2pt]encemb1.north) to [out=30,in=-30] ([xshift=2pt]enc21.south);
\draw[-latex'] ([xshift=2pt]encemb2.north) to [out=30,in=-30] ([xshift=2pt]enc22.south);
\draw[-latex'] ([xshift=2pt]encemb4.north) to [out=30,in=-30] ([xshift=2pt]enc24.south);
\draw[-latex'] ([xshift=-2pt]enc11.north) to [out=150,in=-150] ([xshift=-2pt]enc31.south);
\draw[-latex'] ([xshift=-2pt]enc12.north) to [out=150,in=-150] ([xshift=-2pt]enc32.south);
\draw[-latex'] ([xshift=-2pt]enc14.north) to [out=150,in=-150] ([xshift=-2pt]enc34.south);
\draw[-latex'] (enc22) to (enc32);
\draw[-latex'] (enc21) to (enc31);
\draw[-latex'] (enc24) to (enc34);
\draw[-latex'] ([xshift=-2pt]enc31.north) to [out=150,in=-150] ([xshift=-2pt]enc51.south);
\draw[-latex'] ([xshift=-2pt]enc32.north) to [out=150,in=-150] ([xshift=-2pt]enc52.south);
\draw[-latex'] ([xshift=-2pt]enc34.north) to [out=150,in=-150] ([xshift=-2pt]enc54.south);
\draw[-latex'] (enc31) to (enc41);
\draw[-latex'] (enc32) to (enc42);
\draw[-latex'] (enc34) to (enc44);
\draw[-latex'] (enc41) to (enc51);
\draw[-latex'] (enc42) to (enc52);
\draw[-latex'] (enc44) to (enc54);
\draw[-latex'] (enc51) to (enc61);
\draw[-latex'] (enc52) to (enc62);
\draw[-latex'] (enc54) to (enc64);
\draw[-latex'] (enc61) to ([yshift=\base]enc61.north);
\draw[-latex'] (enc62) to ([yshift=\base]enc62.north);
\draw[-latex'] (enc64) to ([yshift=\base]enc64.north);
\end{scope}
\node[rnnnode,fill=orange!20,minimum width=3.5cm,anchor=south west] (attention) at ([yshift=\base]enc61.north west) {注意力机制};
\begin{scope}
\node[rnnnode,fill=green!20,right=2.5cm of encemb4] (decemb1) {};
\node[rnnnode,fill=green!20,right=\base of decemb1] (decemb2) {};
\node[rnnnode,draw=white,fill=white,right=\base of decemb2] (decemb3) {$\cdots$};
\node[rnnnode,fill=green!20,right=\base of decemb3] (decemb4) {};
\node[rnnnode,above=\base of decemb1] (dec11) {};
\node[rnnnode,above=\base of decemb2] (dec12) {};
\node[rnnnode,draw=white,fill=white,above=\base of decemb3] (dec13) {$\cdots$};
\node[rnnnode,above=\base of decemb4] (dec14) {};
\node[rnnnode,above=\base of dec11] (dec21) {};
\node[rnnnode,above=\base of dec12] (dec22) {};
\node[rnnnode,draw=white,fill=white,above=\base of dec13] (dec23) {$\cdots$};
\node[rnnnode,above=\base of dec14] (dec24) {};
\node[rnnnode,above=\base of dec21] (dec31) {};
\node[rnnnode,above=\base of dec22] (dec32) {};
\node[rnnnode,draw=white,fill=white,above=\base of dec23] (dec33) {$\cdots$};
\node[rnnnode,above=\base of dec24] (dec34) {};
\node[rnnnode,draw=white,fill=white,above=\base of dec31] (dec41) {$\cdots$};
\node[rnnnode,draw=white,fill=white,above=\base of dec32] (dec42) {$\cdots$};
\node[rnnnode,draw=white,fill=white,above=\base of dec33] (dec43) {};
\node[rnnnode,draw=white,fill=white,above=\base of dec34] (dec44) {$\cdots$};
\node[rnnnode,above=\base of dec41] (dec51) {};
\node[rnnnode,above=\base of dec42] (dec52) {};
\node[rnnnode,draw=white,fill=white,above=\base of dec43] (dec53) {$\cdots$};
\node[rnnnode,above=\base of dec44] (dec54) {};
\node[rnnnode,fill=blue!20,above=\base of dec51] (softmax1) {};
\node[rnnnode,fill=blue!20,above=\base of dec52] (softmax2) {};
\node[rnnnode,draw=white,fill=white,above=\base of dec53] (softmax3) {$\cdots$};
\node[rnnnode,fill=blue!20,above=\base of dec54] (softmax4) {};
% words
\node[wnode,below=0pt of decemb1] (decinword1) {$\langle$sos$\rangle$};
\node[wnode,below=0pt of decemb2] (decinword2) {Have};
\node[wnode,below=0pt of decemb4] (decinword4) {?};
\node[wnode,above=0pt of softmax1] (decoutword1) {Have};
\ExtractX{$(softmax2.north)$}
\ExtractY{$(decoutword1.base)$}
\node[wnode,anchor=base] (decoutword2) at (\XCoord,\YCoord) {you};
\ExtractX{$(softmax4.north)$}
\ExtractY{$(decoutword1.base)$}
\node[wnode,anchor=base] (decoutword4) at (\XCoord,\YCoord) {$\langle$eos$\rangle$};
% connections
\draw[-latex'] (dec11) to (dec12);
\draw[-latex'] (dec12) to (dec13);
\draw[-latex'] (dec13) to (dec14);
\draw[-latex'] (dec21) to (dec22);
\draw[-latex'] (dec22) to (dec23);
\draw[-latex'] (dec23) to (dec24);
\draw[-latex'] (dec31) to (dec32);
\draw[-latex'] (dec32) to (dec33);
\draw[-latex'] (dec33) to (dec34);
\draw[-latex'] (dec51) to (dec52);
\draw[-latex'] (dec52) to (dec53);
\draw[-latex'] (dec53) to (dec54);
\draw[-latex'] (decemb1) to (dec11);
\draw[-latex'] (decemb2) to (dec12);
\draw[-latex'] (decemb4) to (dec14);
\foreach \cur [count=\prev from 1] in {2,...,5}
{
\draw[-latex'] (dec\prev1) to (dec\cur1);
\draw[-latex'] (dec\prev2) to (dec\cur2);
\draw[-latex'] (dec\prev4) to (dec\cur4);
}
\draw[-latex'] ([xshift=-2pt]dec21.north) to [out=150,in=-150] ([xshift=-2pt]dec41.south);
\draw[-latex'] ([xshift=-2pt]dec22.north) to [out=150,in=-150] ([xshift=-2pt]dec42.south);
\draw[-latex'] ([xshift=-2pt]dec24.north) to [out=150,in=-150] ([xshift=-2pt]dec44.south);
\draw[-latex'] (dec51) to (softmax1);
\draw[-latex'] (dec52) to (softmax2);
\draw[-latex'] (dec54) to (softmax4);
\end{scope}
% attention connections
\draw[-latex',rounded corners=2pt] (dec11) -| ([xshift=-0.4cm]attention.south east);
\ExtractX{$([xshift=9pt]attention.east)$}
\ExtractY{$([yshift=2pt]dec11.north)$}
\coordinate (tmp1) at (\XCoord,\YCoord);
\ExtractX{$([xshift=-5pt]dec12.west)$}
\coordinate (tmp2) at (\XCoord,\YCoord);
\draw[-latex',rounded corners=2pt] ([yshift=-3pt]attention.east) -| (tmp1) -- (tmp2) |- ([yshift=3pt]dec12.west);
\ExtractX{$([xshift=11pt]attention.east)$}
\ExtractY{$([yshift=2pt]dec21.north)$}
\coordinate (tmp1) at (\XCoord,\YCoord);
\ExtractX{$([xshift=-5pt]dec22.west)$}
\coordinate (tmp2) at (\XCoord,\YCoord);
\draw[-latex',rounded corners=2pt] ([yshift=-1pt]attention.east) -| (tmp1) -- (tmp2) |- ([yshift=3pt]dec22.west);
\ExtractX{$([xshift=13pt]attention.east)$}
\ExtractY{$([yshift=2pt]dec31.north)$}
\coordinate (tmp1) at (\XCoord,\YCoord);
\ExtractX{$([xshift=-5pt]dec32.west)$}
\coordinate (tmp2) at (\XCoord,\YCoord);
\draw[-latex',rounded corners=2pt] ([yshift=1pt]attention.east) -| (tmp1) -- (tmp2) |- ([yshift=3pt]dec32.west);
\ExtractX{$([xshift=15pt]attention.east)$}
\ExtractY{$([yshift=2pt]dec51.north)$}
\coordinate (tmp1) at (\XCoord,\YCoord);
\ExtractX{$([xshift=-5pt]dec52.west)$}
\coordinate (tmp2) at (\XCoord,\YCoord);
\draw[-latex',rounded corners=2pt] ([yshift=3pt]attention.east) -| (tmp1) -- (tmp2) |- ([yshift=3pt]dec52.west);
% label
\draw[decorate,decoration={brace}] ([xshift=-5pt]enc11.south west) to node [auto,font=\scriptsize,name=label1] {8层} ([xshift=-5pt]enc61.north west);
\draw[decorate,decoration={brace,mirror}] ([xshift=5pt]dec14.south east) to node [auto,swap,font=\scriptsize,name=label2] {8层} ([xshift=5pt]dec54.north east);
\begin{pgfonlayer}{background}
\coordinate (tmp) at ([xshift=-4pt]label1.west);
\node[draw,densely dashed,rounded corners=2pt,inner sep=2pt,fit=(label1) (encword1) (attention) (tmp)] (encoder) {};
\ExtractX{$([xshift=4pt]label2.east)$}
\ExtractY{$([yshift=6pt]decoutword4.north)$}
\coordinate (tmp) at (\XCoord,\YCoord);
\node[draw,densely dashed,rounded corners=2pt,inner sep=2pt,fit=(label2) (decinword1) (decoutword4) (tmp)] (decoder) {};
\end{pgfonlayer}
\node[wnode,anchor=north west] () at (encoder.north west) {编码器};
\node[wnode,anchor=north east] () at (decoder.north east) {解码器};
\end{tikzpicture}
\ No newline at end of file
%--------------------------------------------------------------------------------
\begin{tikzpicture}
\setlength{\base}{0.6cm}
\tikzstyle{wordnode} = [font=\scriptsize]
\tikzstyle{auxnode} = [inner sep=0pt,outer sep=0pt,opacity=0,draw=red,fill=red,circle,minimum size=3pt]
\tikzstyle{opnode} = [inner sep=0pt,outer sep=0pt,draw,fill=green!30!white,font=\scriptsize,minimum size=10pt]
\tikzstyle{standard} = [rounded corners=5pt,thick]
\tikzstyle{emph} = [rounded corners=5pt,thick,draw=red]
\tikzstyle{formulanode} = [font=\scriptsize,align=left,draw=red,rectangle,fill=red!10!white,rounded corners=2pt,drop shadow]
% Skeleton
\begin{scope}[every label/.append style={label distance=1pt,font=\tiny,inner sep=0pt,opacity=0}]
\coordinate (aux12) at (0,0);
%\node[auxnode,label={-45:12}] () at (aux12) {};
\coordinate (aux22) at ([yshift=\base]aux12);
%\node[auxnode,label={-45:22}] () at (aux22) {};
\coordinate (aux21) at ([xshift=-2\base]aux22);
%\node[auxnode,label={-45:21}] () at (aux21) {};
\coordinate (aux23) at ([xshift=\base]aux22);
%\node[auxnode,label={-45:23}] () at (aux23) {};
\coordinate (aux24) at ([xshift=\base]aux23);
%\node[auxnode,label={-45:24}] () at (aux24) {};
\coordinate (aux25) at ([xshift=\base]aux24);
%\node[auxnode,label={-45:25}] () at (aux25) {};
\coordinate (aux26) at ([xshift=\base]aux25);
%\node[auxnode,label={-45:26}] () at (aux26) {};
\coordinate (aux27) at ([xshift=\base]aux26);
%\node[auxnode,label={-45:27}] () at (aux27) {};
\coordinate (aux28) at ([xshift=\base]aux27);
%\node[auxnode,label={-45:28}] () at (aux28) {};
\coordinate (aux29) at ([xshift=2\base]aux28);
%\node[auxnode,label={-45:29}] () at (aux29) {};
\coordinate (aux33) at ([yshift=\base]aux23);
%\node[auxnode,label={-45:33}] () at (aux33) {};
\coordinate (aux34) at ([yshift=\base]aux24);
%\node[auxnode,label={-45:34}] () at (aux34) {};
\coordinate (aux35) at ([yshift=\base]aux25);
%\node[auxnode,label={-45:35}] () at (aux35) {};
\coordinate (aux37) at ([yshift=\base]aux27);
%\node[auxnode,label={-45:37}] () at (aux37) {};
\coordinate (aux45) at ([yshift=\base]aux35);
%\node[auxnode,label={-45:45}] () at (aux45) {};
\coordinate (aux55) at ([yshift=\base]aux45);
%\node[auxnode,label={-45:55}] () at (aux55) {};
\ExtractX{$(aux21)$}
\ExtractY{$(aux55)$}
\coordinate (aux51) at (\XCoord,\YCoord);
%\node[auxnode,label={-45:51}] () at (aux51) {};
\ExtractX{$(aux23)$}
\ExtractY{$(aux55)$}
\coordinate (aux53) at (\XCoord,\YCoord);
%\node[auxnode,label={-45:53}] () at (aux53) {};
\ExtractX{$(aux28)$}
\ExtractY{$(aux55)$}
\coordinate (aux58) at (\XCoord,\YCoord);
%\node[auxnode,label={-45:58}] () at (aux58) {};
\ExtractX{$(aux29)$}
\ExtractY{$(aux55)$}
\coordinate (aux59) at (\XCoord,\YCoord);
%\node[auxnode,label={-45:59}] () at (aux59) {};
\coordinate (aux68) at ([yshift=\base]aux58);
%\node[auxnode,label={-45:68}] () at (aux68) {};
\end{scope}
\begin{scope}
\node[opnode,circle,opacity=0] (f53) at (aux53) {};
\node[opnode,circle,opacity=0] (u55) at (aux55) {};
% forget gate
{
\draw[emph] (aux21) -- (aux23) -- (aux33);
\draw[-latex,emph] (aux12) -- (aux22) -- (aux23) -- (f53);
\node[opnode,circle,draw=red,thick] () at (aux33) {$\sigma$};
}
{
\draw[standard] (aux21) -- (aux23) -- (aux33);
\draw[-latex,standard] (aux12) -- (aux22) -- (aux23) -- (f53);
\node[opnode,circle] () at (aux33) {$\sigma$};
}
% input gate
{
\node[opnode,circle] (i45) at (aux45) {};
\draw[-latex,emph] (aux21) -- (aux24) |- (i45);
\draw[-latex,emph] (aux21) -- (aux25) -- (u55);
\draw[emph] (aux12) -- (aux22) -- (aux23);
\node[opnode,circle,draw=red,thick] () at (aux34) {$\sigma$};
\node[opnode,rectangle,rounded corners=2pt,inner sep=2pt,draw=red,thick] () at (aux35) {$\mathrm{tanh}$};
\node[opnode,circle,draw=red,thick] (i45) at (aux45) {};
}
{
\node[opnode,circle] (i45) at (aux45) {};
\draw[-latex,standard] (aux21) -- (aux24) |- (i45);
\draw[-latex,standard] (aux21) -- (aux25) -- (u55);
\node[opnode,circle] () at (aux34) {$\sigma$};
\node[opnode,rectangle,rounded corners=2pt,inner sep=2pt] () at (aux35) {$\mathrm{tanh}$};
\node[opnode,circle] (i45) at (aux45) {};
}
% cell update
{
\draw[-latex,emph] (aux51) -- (aux59);
\node[opnode,circle,draw=red,thick] (f53) at (aux53) {};
\node[opnode,circle,draw=red,thick] (u55) at (aux55) {};
}
{
\draw[-latex,standard] (aux51) -- (aux59);
\node[opnode,circle] (f53) at (aux53) {};
\node[opnode,circle] (u55) at (aux55) {};
}
% output gate
{
\node[opnode,circle,draw=red,thick] (o27) at (aux27) {};
\draw[-latex,emph] (u55) -| (o27);
\draw[-latex,emph] (aux21) -- (o27);
\draw[emph] (aux12) -- (aux22) -- (aux23);
\node[opnode,circle,draw=red,thick] () at (aux26) {$\sigma$};
\node[opnode,rectangle,rounded corners=2pt,inner sep=2pt,draw=red,thick] () at (aux37) {$\mathrm{tanh}$};
\draw[-latex,emph] (o27) -- (aux29);
\draw[-latex,emph] (o27) -| (aux68);
}
{
\node[opnode,circle] (o27) at (aux27) {};
\draw[-latex,standard] (u55) -| (o27);
\draw[-latex,standard] (aux21) -- (o27);
\node[opnode,circle] () at (aux26) {$\sigma$};
\node[opnode,rectangle,rounded corners=2pt,inner sep=2pt] () at (aux37) {$\mathrm{tanh}$};
\draw[-latex,standard] (o27) -- (aux29);
\draw[-latex,standard] (o27) -| (aux68);
}
\end{scope}
\begin{scope}
\node[wordnode,anchor=south] () at ([xshift=0.5\base]aux21) {$\mathbf{h}_{t-1}$};
\node[wordnode,anchor=west] () at (aux12) {$\mathbf{x}_t$};
\node[wordnode,anchor=south] () at ([xshift=0.5\base]aux51) {$\mathbf{c}_{t-1}$};
{
\node[wordnode,anchor=south] () at ([xshift=-0.5\base]aux59) {$\mathbf{c}_{t}$};
}
{
\node[wordnode,anchor=east] () at (aux68) {$\mathbf{h}_{t}$};
\node[wordnode,anchor=south] () at ([xshift=-0.5\base]aux29) {$\mathbf{h}_{t}$};
}
\end{scope}
\begin{pgfonlayer}{background}
\node[draw,very thick,rectangle,fill=blue!10!white,rounded corners=5pt,inner sep=4pt,fit=(aux22) (aux58) (u55) (o27)] (LSTM) {};
\end{pgfonlayer}
%%new
\draw[-] (i45.north west) -- (i45.south east);
\draw[-] (i45.north east) -- (i45.south west);
\draw[-] (f53.north west) -- (f53.south east);
\draw[-] (f53.north east) -- (f53.south west);
\draw [-,line width=0.8pt] (u55.north) -- (u55.south);
\draw [-,line width=0.8pt] (u55.west) -- (u55.east);
\draw[-] (o27.north west) -- (o27.south east);
\draw[-] (o27.north east) -- (o27.south west);
\begin{scope}
{
% forget gate formula
\node[formulanode,anchor=south east,text width=10em] () at ([shift={(4\base,1.5\base)}]aux51) {遗忘门\\$\mathbf{f}_t=\sigma(\mathbf{W}_f[\mathbf{h}_{t-1},\mathbf{x}_t]+\mathbf{b}_f)$};
}
{
% input gate formula
\node[formulanode,anchor=north east,text width=10em] () at ([shift={(4\base,-1.5\base)}]aux21) {输入门\\$\mathbf{i}_t=\sigma(\mathbf{W}_i[\mathbf{h}_{t-1},\mathbf{x}_t]+\mathbf{b}_i)$\\$\hat{\mathbf{c}}_t=\mathrm{tanh}(\mathbf{W}_c[\mathbf{h}_{t-1},\mathbf{x}_t]+\mathbf{b}_c)$};
}
{
% cell update formula
\node[formulanode,anchor=south west,text width=10em] () at ([shift={(-4\base,1.5\base)}]aux59) {记忆更新\\$\mathbf{c}_{t}=\mathbf{f}_t\cdot \mathbf{c}_{t-1}+\mathbf{i}_t\cdot \hat{\mathbf{c}}_t$};
}
{
% output gate formula
\node[formulanode,anchor=north west,text width=10em] () at ([shift={(-4\base,-1.5\base)}]aux29) {输出门\\$\mathbf{o}_t=\sigma(\mathbf{W}_o[\mathbf{h}_{t-1},\mathbf{x}_t]+\mathbf{b}_o)$\\$\mathbf{h}_{t}=\mathbf{o}_t\cdot \mathrm{tanh}(\mathbf{c}_{t})$};
}
\end{scope}
\end{tikzpicture}
%------------------------------------------------------
\begin{tikzpicture}
\setlength{\base}{0.9cm}
\tikzstyle{rnnnode} = [rounded corners=1pt,minimum height=0.5\base,minimum width=1\base,draw,inner sep=0pt,outer sep=0pt]
\tikzstyle{wordnode} = [font=\tiny]
% RNN translation model
\begin{scope}[local bounding box=RNNMT]
% RNN Encoder
\coordinate (eemb0) at (0,0);
\foreach \x [count=\y from 0] in {1,2,...,3}
\node[rnnnode,minimum height=0.5\base,fill=green!30!white,anchor=west] (eemb\x) at ([xshift=0.4\base]eemb\y.east) {\tiny{$e_x()$}};
\foreach \x in {1,2,...,3}
\node[rnnnode,fill=blue!30!white,anchor=south] (enc\x) at ([yshift=0.3\base]eemb\x.north) {};
\node[] (enclabel1) at (enc1) {\tiny{$\textbf{h}_{m-2}$}};
\node[] (enclabel2) at (enc2) {\tiny{$\textbf{h}_{m-1}$}};
\node[rnnnode,fill=purple!30!white] (enclabel3) at (enc3) {\tiny{$\textbf{h}_{m}$}};
\node[wordnode,left=0.4\base of enc1] (init1) {$\cdots$};
\node[wordnode,left=0.4\base of eemb1] (init2) {$\cdots$};
\node[wordnode,below=0pt of eemb1] () {};
\node[wordnode,below=0pt of eemb2] () {};
\node[wordnode,below=0pt of eemb3] () {$\langle$eos$\rangle$};
% RNN Decoder
\foreach \x in {1,2,...,3}
\node[rnnnode,minimum height=0.5\base,fill=green!30!white,anchor=south] (demb\x) at ([yshift=\base]enc\x.north) {\tiny{$e_y()$}};
\foreach \x in {1,2,...,3}
\node[rnnnode,fill=blue!30!white,anchor=south] (dec\x) at ([yshift=0.3\base]demb\x.north) {{\tiny{$\textbf{s}_\x$}}};
\foreach \x in {1,2,...,3}
\node[rnnnode,minimum height=0.5\base,fill=red!30!white,anchor=south] (softmax\x) at ([yshift=0.3\base]dec\x.north) {\tiny{Softmax}};
\node[wordnode,right=0.4\base of demb3] (end1) {$\cdots$};
\node[wordnode,right=0.4\base of dec3] (end2) {$\cdots$};
\node[wordnode,right=0.4\base of softmax3] (end3) {$\cdots$};
% Decoder input words
\node[wordnode,below=0pt of demb1] (decwordin) {$\langle$sos$\rangle$};
\ExtractX{$(demb2.south)$}
\ExtractY{$(decwordin.base)$}
\node[wordnode,anchor=base] () at (\XCoord,\YCoord) {Do};
\ExtractX{$(demb3.south)$}
\ExtractY{$(decwordin.base)$}
\node[wordnode,anchor=base] () at (\XCoord,\YCoord) {you};
% Decoder output words
\node[wordnode,above=0pt of softmax1] (decwordout) {Do};
\ExtractX{$(softmax2.north)$}
\ExtractY{$(decwordout.base)$}
\node[wordnode,anchor=base] () at (\XCoord,\YCoord) {you};
\ExtractX{$(softmax3.north)$}
\ExtractY{$(decwordout.base)$}
\node[wordnode,anchor=base] () at (\XCoord,\YCoord) {know};
% Connections
\draw[-latex'] (init1.east) to (enc1.west);
\draw[-latex'] (dec3.east) to (end2.west);
\foreach \x in {1,2,...,3}
\draw[-latex'] (eemb\x) to (enc\x);
\foreach \x in {1,2,...,3}
\draw[-latex'] (demb\x) to (dec\x);
\foreach \x in {1,2,...,3}
\draw[-latex'] (dec\x.north) to (softmax\x.south);
\foreach \x [count=\y from 2] in {1,2}
{
\draw[-latex'] (enc\x.east) to (enc\y.west);
\draw[-latex'] (dec\x.east) to (dec\y.west);
}
\coordinate (bridge) at ([yshift=0.4\base]enc2.north west);
\draw[-latex'] (enc3.north) .. controls +(north:0.3\base) and +(east:\base) .. (bridge) .. controls +(west:2.7\base) and +(west:0.3\base) .. (dec1.west);
\end{scope}
\begin{scope}
\coordinate (start) at (5.8\base,0.3\base);
{
\node [anchor=south west] (one) at (start) {\scriptsize{$\begin{bmatrix} 0 \\ 0 \\ 0 \\ \vdots \\ 0 \\ {\color{ugreen} 1} \\ 0 \\ 0 \end{bmatrix}$}};
\node [anchor=south west,inner sep=0pt] (T) at ([yshift=-0.5em,xshift=-0.5em]one.north east) {\tiny{T}};
}
{
\node [draw=ugreen,fill=green!20!white,rounded corners=0.3em,minimum width=3.8cm,minimum height=0.9em,anchor=south west] (emb) at ([shift={(1.25cm,0.8cm)}]start) {};
}
\node [anchor=north] (w) at ([yshift=3pt]one.south) {\scriptsize{\color{ugreen} you}};
\node [anchor=north west] (words) at ([xshift=10pt]one.north east) {\scriptsize{$\begin{matrix} \langle\textrm{eos}\rangle \\ \langle\textrm{sos}\rangle \\ \textrm{Do} \\ \vdots \\ \textrm{know} \\ \textrm{you} \\ \textrm{?} \\ \textrm{have} \end{matrix}$}};
\node [anchor=north west] (mat) at ([xshift=-6pt]words.north east) {\scriptsize{$
\begin{bmatrix}
.1 & -4 & \cdots & 2 \\
5 & 2 & \cdots & .2 \\
2 & .1 & \cdots & .3 \\
\vdots & \vdots & \ddots & \vdots \\
0 & .8 & \cdots & 4 \\
-1 & -2 & \cdots & -3 \\
.7 & .5 & \cdots & 3 \\
-2 & .3 & \cdots & .1
\end{bmatrix}
$}};
\draw [decorate,decoration={brace,mirror}] ([shift={(6pt,2pt)}]mat.south west) to node [auto,swap,font=\scriptsize] {词嵌入矩阵} ([shift={(-6pt,2pt)}]mat.south east);
{
\draw [-latex'] ([xshift=-2pt,yshift=-0.65cm]one.east) to ([yshift=-0.65cm]words.west);
}
{
\draw [-latex'] (emb.east) -| ([yshift=0.4cm]mat.north east) node [pos=1,above] {\scriptsize{RNN输入}};
}
\draw [-latex'] ([yshift=-0.4cm]w.south) to ([yshift=2pt]w.south);
\node [anchor=north] (wlabel) at ([yshift=-0.6em]w.south) {\scriptsize{输入的单词}};
\node [draw=ugreen,densely dashed,thick,rounded corners=3pt,fit=(one) (words) (mat) (w)] (input) {};
\end{scope}
\draw [->,thick,densely dashed,ugreen] ([yshift=-0.2em]demb3.east) to [out=0,in=180] ([yshift=-1cm]input.west);
\end{tikzpicture}
\ No newline at end of file
This source diff could not be displayed because it is too large. You can view the blob instead.
\begin{tikzpicture}
\begin{scope}
\tikzstyle{rnode} = [draw,minimum width=3.5em,minimum height=1.2em]
\node [rnode,anchor=south west,fill=red!20!white] (e1) at (0,0) {\scriptsize{$\textbf{e}(\textrm{``沈阳''})$}};
\node [rnode,anchor=south west,fill=red!20!white] (e2) at ([xshift=1em]e1.south east) {\scriptsize{$\textbf{e}(\textrm{``到''})$}};
\node [rnode,anchor=south west,fill=red!20!white] (e3) at ([xshift=1em]e2.south east) {\scriptsize{$\textbf{e}(\textrm{``广州''})$}};
\node [rnode,anchor=south west,fill=red!20!white] (e4) at ([xshift=1em]e3.south east) {\scriptsize{$\textbf{e}(\textrm{``的''})$}};
\node [rnode,anchor=south west,fill=red!20!white] (e5) at ([xshift=1em]e4.south east) {\scriptsize{$\textbf{e}(\textrm{``机票''})$}};
\node [rnode,anchor=south west,fill=green!20!white] (h1) at ([yshift=1.5em]e1.north west) {\scriptsize{$\textbf{h}(\textrm{``沈阳''})$}};
\node [rnode,anchor=south west,fill=green!20!white] (h2) at ([yshift=1.5em]e2.north west) {\scriptsize{$\textbf{h}(\textrm{``到''})$}};
\node [rnode,anchor=south west,fill=green!20!white] (h3) at ([yshift=1.5em]e3.north west) {\scriptsize{$\textbf{h}(\textrm{``广州''})$}};
\node [rnode,anchor=south west,fill=green!20!white] (h4) at ([yshift=1.5em]e4.north west) {\scriptsize{$\textbf{h}(\textrm{``的''})$}};
\node [rnode,anchor=south west,fill=green!20!white] (h5) at ([yshift=1.5em]e5.north west) {\scriptsize{$\textbf{h}(\textrm{``机票''})$}};
\foreach \x in {1,2,3,4,5}{
\node [anchor=north] (plus\x) at ([yshift=-0em]e\x.south) {\scriptsize{$\mathbf{\oplus}$}};
}
\node [rnode,anchor=north,fill=yellow!20!white] (pos1) at ([yshift=-1.1em]e1.south) {\scriptsize{$\textbf{PE}(1)$}};
\node [rnode,anchor=north,fill=yellow!20!white] (pos2) at ([yshift=-1.1em]e2.south) {\scriptsize{$\textbf{PE}(2)$}};
\node [rnode,anchor=north,fill=yellow!20!white] (pos3) at ([yshift=-1.1em]e3.south) {\scriptsize{$\textbf{PE}(3)$}};
\node [rnode,anchor=north,fill=yellow!20!white] (pos4) at ([yshift=-1.1em]e4.south) {\scriptsize{$\textbf{PE}(4)$}};
\node [rnode,anchor=north,fill=yellow!20!white] (pos5) at ([yshift=-1.1em]e5.south) {\scriptsize{$\textbf{PE}(5)$}};
\foreach \x in {1,2,3,4,5}{
\node [rectangle,inner sep=0.1em,rounded corners=1pt,very thick,dotted,draw=red!60] [fit = (e\x) (pos\x)] (box\x) {};
}
\node [anchor=north] (inputs1) at ([yshift=-1em]pos1.south) {\scriptsize{沈阳}};
\node [anchor=north] (inputs2) at ([yshift=-1em]pos2.south) {\scriptsize{}};
\node [anchor=north] (inputs3) at ([yshift=-1em]pos3.south) {\scriptsize{广州}};
\node [anchor=north] (inputs4) at ([yshift=-1em]pos4.south) {\scriptsize{}};
\node [anchor=north] (inputs5) at ([yshift=-1em]pos5.south) {\scriptsize{机票}};
\draw [->] ([yshift=0.1em]e1.north) .. controls +(north:0.5) and +(south:0.5) .. ([xshift=-1em,yshift=-0.1em]h3.south);
\draw [->] ([yshift=0.1em]e2.north) .. controls +(north:0.3) and +(south:0.6) .. ([xshift=-0.5em,yshift=-0.1em]h3.south);
\draw [->] ([yshift=0.1em]e3.north) -- ([yshift=-0.1em]h3.south);
\draw [->] ([yshift=0.1em]e4.north) .. controls +(north:0.3) and +(south:0.6) .. ([xshift=0.5em,yshift=-0.1em]h3.south);
\draw [->] ([yshift=0.1em]e5.north) .. controls +(north:0.5) and +(south:0.5) .. ([xshift=1em,yshift=-0.1em]h3.south);
\draw [->] ([yshift=0.1em]e1.north) -- ([yshift=-0.1em]h1.south);
\draw [->] ([yshift=0.1em]e2.north) -- ([yshift=-0.1em]h2.south);
\draw [->] ([yshift=0.1em]e4.north) -- ([yshift=-0.1em]h4.south);
\draw [->] ([yshift=0.1em]e5.north) -- ([yshift=-0.1em]h5.south);
\foreach \x in {1,2,3,4,5}{
\draw [->] ([yshift=-0.1em]inputs\x.north) -- ([yshift=-0.2em]pos\x.south);
}
\node [anchor=north] (dot1) at ([xshift=0.4em,yshift=-0.2em]h1.south) {\tiny{...}};
\node [anchor=north] (dot2) at ([xshift=0.4em,yshift=-0.2em]h2.south) {\tiny{...}};
\node [anchor=north] (dot4) at ([xshift=-0.4em,yshift=-0.2em]h4.south) {\tiny{...}};
\node [anchor=north] (dot5) at ([xshift=-0.4em,yshift=-0.2em]h5.south) {\tiny{...}};
\end{scope}
\end{tikzpicture}
\ No newline at end of file
\begin{tikzpicture}
\begin{scope}
\tikzstyle{rnode} = [draw,minimum width=3.5em,minimum height=1.2em]
\node [rnode,anchor=south west,fill=green!20!white] (key1) at (0,0) {\scriptsize{$\textbf{h}(\textrm{``沈阳''})$}};
\node [rnode,anchor=south west,fill=green!20!white] (key2) at ([xshift=1em]key1.south east) {\scriptsize{$\textbf{h}(\textrm{``到''})$}};
\node [rnode,anchor=south west,fill=green!20!white] (key3) at ([xshift=1em]key2.south east) {\scriptsize{$\textbf{h}(\textrm{``广州''})$}};
\node [rnode,anchor=south west,fill=green!20!white] (key4) at ([xshift=2em]key3.south east) {\scriptsize{$\textbf{h}(\textrm{``机票''})$}};
\node [rnode,anchor=south west] (key5) at ([xshift=1em]key4.south east) {\scriptsize{$\textbf{h}(\textrm{``机票''})$}};
\node [anchor=west] (sep1) at ([xshift=0.3em]key3.east) {\scriptsize{$\textbf{...}$}};
\draw [->] ([yshift=1pt,xshift=-3pt]key5.north) .. controls +(90:1em) and +(90:0.7em) .. ([yshift=1pt]key4.north);
\draw [->] ([yshift=1pt,xshift=0pt]key5.north) .. controls +(90:1.4em) and +(90:1.4em) .. ([yshift=1pt]key3.north);
\draw [->] ([yshift=1pt,xshift=3pt]key5.north) .. controls +(90:1.8em) and +(90:1.8em) .. ([yshift=1pt]key2.north);
\draw [->] ([yshift=1pt,xshift=6pt]key5.north) .. controls +(90:2.2em) and +(90:2.2em) .. ([yshift=1pt]key1.north);
\node [anchor=south west] (alpha1) at ([xshift=-1em]key1.north west) {\scriptsize{$\alpha_1=.2$}};
\node [anchor=south west] (alpha2) at ([xshift=-1em]key2.north west) {\scriptsize{$\alpha_2=.3$}};
\node [anchor=south west] (alpha3) at ([xshift=-1em]key3.north west) {\scriptsize{$\alpha_3=.1$}};
\node [anchor=south west] (alpha4) at ([xshift=-1em]key4.north west) {\scriptsize{$\alpha_4=.3$}};
\vspace{0.5em}
\node [rnode,anchor=south west,fill=green!20!white] (key6) at ([yshift=2em]key1.north west) {\scriptsize{$\textbf{h}(\textrm{``广州''})$}};
\node [rnode,anchor=south west,fill=green!20!white] (key7) at ([yshift=2em]key2.north west) {\scriptsize{$\textbf{h}(\textrm{``到''})$}};
\node [rnode,anchor=south west,fill=green!20!white] (key8) at ([yshift=2em]key3.north west) {\scriptsize{$\textbf{h}(\textrm{``沈阳''})$}};
\node [rnode,anchor=south west,fill=green!20!white] (key9) at ([yshift=2em]key4.north west) {\scriptsize{$\textbf{h}(\textrm{``机票''})$}};
\node [rnode,anchor=south west] (key10) at ([yshift=2em]key5.north west) {\scriptsize{$\textbf{h}(\textrm{``机票''})$}};
\node [anchor=west] (sep1) at ([xshift=0.3em]key8.east) {\scriptsize{$\textbf{...}$}};
\draw [->] ([yshift=1pt,xshift=-3pt]key10.north) .. controls +(90:1em) and +(90:0.7em) .. ([yshift=1pt]key9.north);
\draw [->] ([yshift=1pt,xshift=0pt]key10.north) .. controls +(90:1.4em) and +(90:1.4em) .. ([yshift=1pt]key8.north);
\draw [->] ([yshift=1pt,xshift=3pt]key10.north) .. controls +(90:1.8em) and +(90:1.8em) .. ([yshift=1pt]key7.north);
\draw [->] ([yshift=1pt,xshift=6pt]key10.north) .. controls +(90:2.2em) and +(90:2.2em) .. ([yshift=1pt]key6.north);
\node [anchor=south west] (alpha5) at ([xshift=-1em]key6.north west) {\scriptsize{$\alpha_1=.1$}};
\node [anchor=south west] (alpha6) at ([xshift=-1em]key7.north west) {\scriptsize{$\alpha_2=.3$}};
\node [anchor=south west] (alpha7) at ([xshift=-1em]key8.north west) {\scriptsize{$\alpha_3=.2$}};
\node [anchor=south west] (alpha8) at ([xshift=-1em]key9.north west) {\scriptsize{$\alpha_4=.3$}};
\end{scope}
\end{tikzpicture}
\vspace{-1.0em}
\footnotesize{
\begin{eqnarray}
\tilde{\mathbf{h}} (\textrm{''机票''}) & = & 0.2 \times \textbf{h}(\textrm{``沈阳''}) + 0.3 \times \textbf{h}(\textrm{``到''}) + \nonumber \\
& & 0.1 \times \textbf{h}(\textrm{``广州''}) + ... + 0.3 \times \textbf{h}(\textrm{``机票''}) \nonumber
\end{eqnarray}
}
\ No newline at end of file
\begin{tikzpicture}
\begin{scope}[scale=1.5]
{\Large
\tikzstyle{snode} = [draw,inner sep=1pt,minimum width=3em,minimum height=0.5em,rounded corners=1pt,fill=green!30!white]
\tikzstyle{pnode} = [draw,inner sep=1pt,minimum width=1em,minimum height=0.5em,rounded corners=1pt]
\node [anchor=west,snode] (s1) at (0,0) {};
\node [anchor=north west,snode,minimum width=6.5em] (s2) at ([yshift=-0.3em]s1.south west) {};
\node [anchor=north west,snode,minimum width=2em] (s3) at ([yshift=-0.3em]s2.south west) {};
\node [anchor=east] (label1) at ([xshift=-0.8em,yshift=0.6em]s1.west) {\scriptsize{Shuffled:}};
\node [anchor=west,pnode,minimum width=3em] (p1) at ([xshift=0.3em]s1.east) {};
\node [anchor=west,pnode,minimum width=4em] (p3) at ([xshift=0.3em]s3.east) {};
\node [anchor=west,snode,minimum width=5em] (s4) at ([xshift=4em]p1.east) {};
\node [anchor=north west,snode,minimum width=5em] (s5) at ([yshift=-0.3em]s4.south west) {};
\node [anchor=north west,snode,minimum width=6.5em] (s6) at ([yshift=-0.3em]s5.south west) {};
\node [anchor=east] (label2) at ([xshift=-0.8em,yshift=0.6em]s4.west) {\scriptsize{Sorted:}};
\node [anchor=west,pnode,minimum width=1em] (p4) at ([xshift=0.3em]s4.east) {};
\node [anchor=west,pnode,minimum width=1em] (p5) at ([xshift=0.3em]s5.east) {};
\node [rectangle,inner sep=0.5em,rounded corners=2pt,very thick,dotted,draw=ugreen!80] [fit = (s1) (s3) (p1) (p3)] (box0) {};
\node [rectangle,inner sep=0.5em,rounded corners=2pt,very thick,dotted,draw=ugreen!80] [fit = (s4) (s6) (p4) (p5)] (box0) {};
}
\end{scope}
\end{tikzpicture}
\begin{tikzpicture}
\begin{scope}
\tikzstyle{rnnnode} = [minimum height=1.1em,minimum width=2.1em,inner sep=2pt,rounded corners=1pt,draw,fill=red!20];
\node [rnnnode,anchor=west] (h1) at (0,0) {\tiny{$\textbf{h}_1$}};
\node [rnnnode,anchor=west] (h2) at ([xshift=1em]h1.east) {\tiny{$\textbf{h}_2$}};
\node [rnnnode,anchor=west] (h3) at ([xshift=1em]h2.east) {\tiny{$\textbf{h}_3$}};
\node [rnnnode,anchor=north,fill=green!20] (e1) at ([yshift=-1em]h1.south) {\tiny{$e_x()$}};
\node [rnnnode,anchor=west,fill=green!20] (e2) at ([xshift=1em]e1.east) {\tiny{$e_x()$}};
\node [rnnnode,anchor=west,fill=green!20] (e3) at ([xshift=1em]e2.east) {\tiny{$e_x()$}};
\node [anchor=north,inner sep=2pt] (w1) at ([yshift=-0.6em]e1.south) {\tiny{}};
\node [anchor=north,inner sep=2pt] (w2) at ([yshift=-0.6em]e2.south) {\tiny{}};
\node [anchor=north,inner sep=2pt] (w3) at ([yshift=-0.6em]e3.south) {\tiny{$\langle$eos$\rangle$}};
%\node [anchor=south] (dot1) at ([xshift=0.4em,yshift=-0.7em]h1.south) {\tiny{...}};
%\node [anchor=south] (dot2) at ([xshift=-0.4em,yshift=-0.7em]h3.south) {\tiny{...}};
\draw [->] (w1.north) -- ([yshift=-0.1em]e1.south);
\draw [->] (w2.north) -- ([yshift=-0.1em]e2.south);
\draw [->] (w3.north) -- ([yshift=-0.1em]e3.south);
\draw [->] ([yshift=0.1em]e1.north) -- ([yshift=-0.1em]h1.south);
\draw [->] ([yshift=0.1em]e2.north) -- ([yshift=-0.1em]h2.south);
\draw [->] ([yshift=0.1em]e3.north) -- ([yshift=-0.1em]h3.south);
\draw [->] ([xshift=0.2em,yshift=0.1em]e1.north) .. controls +(north:0.3) and +(south:0.4) .. ([xshift=-0.3em,yshift=-0.1em]h2.south);
\draw [->] ([xshift=-0.2em,yshift=0.1em]e3.north) .. controls +(north:0.3) and +(south:0.4) .. ([xshift=0.3em,yshift=-0.1em]h2.south);
\draw [->] ([xshift=0.4em,yshift=-0.4em]h1.south) -- ([xshift=0.3em,yshift=-0.1em]h1.south);
\draw [->] ([xshift=0.8em,yshift=-0.4em]h1.south) -- ([xshift=0.6em,yshift=-0.1em]h1.south);
\draw [->] ([xshift=-0.4em,yshift=-0.4em]h3.south) -- ([xshift=-0.3em,yshift=-0.1em]h3.south);
\draw [->] ([xshift=-0.8em,yshift=-0.4em]h3.south) -- ([xshift=-0.6em,yshift=-0.1em]h3.south);
\node [anchor=south] (encoder) at ([xshift=-0.2em]h1.north west) {\scriptsize{\textbf{编码器}}};
{
\node [rnnnode,anchor=west,fill=green!20] (t1) at ([xshift=3em]e3.east) {\tiny{$e_y()$}};
}
{
\node [rnnnode,anchor=west,fill=green!20] (t2) at ([xshift=1.5em]t1.east) {\tiny{$e_y()$}};
}
{
\node [rnnnode,anchor=west,fill=green!20] (t3) at ([xshift=1.5em]t2.east) {\tiny{$e_y()$}};
\node [rnnnode,anchor=west,fill=green!20] (t4) at ([xshift=1.5em]t3.east) {\tiny{$e_y()$}};
%\node [anchor=west,inner sep=2pt] (t5) at ([xshift=0.3em]t4.east) {\tiny{...}};
}
{
\node [rnnnode,anchor=south] (s1) at ([yshift=1em]t1.north) {\tiny{$\textbf{s}_1$}};
\node [rnnnode,anchor=south] (f1) at ([yshift=1em]s1.north) {\tiny{$\textbf{f}_1$}};
}
{
\node [rnnnode,anchor=south] (s2) at ([yshift=1em]t2.north) {\tiny{$\textbf{s}_2$}};
\node [rnnnode,anchor=south] (f2) at ([yshift=1em]s2.north) {\tiny{$\textbf{f}_2$}};
}
{
\node [rnnnode,anchor=south] (s3) at ([yshift=1em]t3.north) {\tiny{$\textbf{s}_3$}};
\node [rnnnode,anchor=south] (f3) at ([yshift=1em]s3.north) {\tiny{$\textbf{f}_3$}};
\node [rnnnode,anchor=south] (s4) at ([yshift=1em]t4.north) {\tiny{$\textbf{s}_4$}};
\node [rnnnode,anchor=south] (f4) at ([yshift=1em]s4.north) {\tiny{$\textbf{f}_4$}};
%\node [anchor=west,inner sep=2pt] (s5) at ([xshift=0.3em]s4.east) {\tiny{...}};
%\node [anchor=south] (dot3) at ([xshift=-0.4em,yshift=-0.7em]s3.south) {\tiny{...}};
\node [anchor=south] (dot4) at ([xshift=-0.4em,yshift=-0.7em]s4.south) {\tiny{...}};
}
{
\node [rnnnode,anchor=south,fill=blue!20] (o1) at ([yshift=1em]f1.north) {\tiny{softmax}};
\node [anchor=east] (decoder) at ([xshift=-0.3em,yshift=0.5em]o1.north west) {\scriptsize{\textbf{解码器}}};
}
{
\node [rnnnode,anchor=south,fill=blue!20] (o2) at ([yshift=1em]f2.north) {\tiny{softmax}};
}
{
\node [rnnnode,anchor=south,fill=blue!20] (o3) at ([yshift=1em]f3.north) {\tiny{softmax}};
\node [rnnnode,anchor=south,fill=blue!20] (o4) at ([yshift=1em]f4.north) {\tiny{softmax}};
%\node [anchor=west,inner sep=2pt] (o5) at ([xshift=0.3em]o4.east) {\tiny{...}};
}
{
\node [anchor=north,inner sep=2pt] (wt1) at ([yshift=-0.6em]t1.south) {\tiny{$\langle$eos$\rangle$}};
}
{
\node [anchor=north,inner sep=2pt] (wt2) at ([yshift=-0.6em]t2.south) {\tiny{How}};
}
{
\node [anchor=north,inner sep=2pt] (wt3) at ([yshift=-0.8em]t3.south) {\tiny{are}};
\node [anchor=north,inner sep=2pt] (wt4) at ([yshift=-0.8em]t4.south) {\tiny{you}};
}
{
\node [anchor=center,inner sep=2pt] (wo1) at ([yshift=1.2em]o1.north) {\tiny{How}};
}
{
\node [anchor=south,inner sep=2pt] (wos1) at (wo1.north) {\tiny{\textbf{[step 1]}}};
}
{
\node [anchor=center,inner sep=2pt] (wo2) at ([yshift=1.2em]o2.north) {\tiny{are}};
}
{
\node [anchor=south,inner sep=2pt] (wos2) at (wo2.north) {\tiny{\textbf{[step 2]}}};
}
{
\node [anchor=center,inner sep=2pt] (wo3) at ([yshift=1.2em]o3.north) {\tiny{you}};
\node [anchor=south,inner sep=2pt] (wos3) at (wo3.north) {\tiny{\textbf{[step 3]}}};
\node [anchor=center,inner sep=2pt] (wo4) at ([yshift=1.2em]o4.north) {\tiny{$\langle$eos$\rangle$}};
\node [anchor=south,inner sep=2pt] (wos4) at (wo4.north) {\tiny{\textbf{[step 4]}}};
}
{
\foreach \x in {1}{
\draw [->] ([yshift=-0.7em]t\x.south) -- ([yshift=-0.1em]t\x.south);
\draw [->] ([yshift=0.1em]t\x.north) -- ([yshift=-0.1em]s\x.south);
\draw [->] ([yshift=0.1em]s\x.north) -- ([yshift=-0.1em]f\x.south);
\draw [->] ([yshift=0.1em]f\x.north) -- ([yshift=-0.1em]o\x.south);
\draw [->] ([yshift=0.1em]o\x.north) -- ([yshift=0.8em]o\x.north) node [pos=0.5,right] {\tiny{top1}};
}
}
{
\foreach \x in {2}{
\draw [->] ([yshift=-0.7em]t\x.south) -- ([yshift=-0.1em]t\x.south);
\draw [->] ([yshift=0.1em]t\x.north) -- ([yshift=-0.1em]s\x.south);
\draw [->] ([yshift=0.1em]s\x.north) -- ([yshift=-0.1em]f\x.south);
\draw [->] ([yshift=0.1em]f\x.north) -- ([yshift=-0.1em]o\x.south);
\draw [->] ([yshift=0.1em]o\x.north) -- ([yshift=0.8em]o\x.north) node [pos=0.5,right] {\tiny{top1}};
\draw [->] ([xshift=0.2em,yshift=0.1em]t1.north) .. controls +(north:0.3) and +(south:0.3) .. ([xshift=-0.3em,yshift=-0.1em]s2.south);
}
}
{
\foreach \x in {3,4}{
\draw [->] ([yshift=-0.7em]t\x.south) -- ([yshift=-0.1em]t\x.south);
\draw [->] ([yshift=0.1em]t\x.north) -- ([yshift=-0.1em]s\x.south);
\draw [->] ([yshift=0.1em]s\x.north) -- ([yshift=-0.1em]f\x.south);
\draw [->] ([yshift=0.1em]f\x.north) -- ([yshift=-0.1em]o\x.south);
\draw [->] ([yshift=0.1em]o\x.north) -- ([yshift=0.8em]o\x.north) node [pos=0.5,right] {\tiny{top1}};
%\draw [->] ([xshift=0.4em,yshift=0.1em]t1.north) .. controls +(north:0.25) and +(south:0.3) .. ([xshift=-0.6em,yshift=-0.1em]s3.south);
%\draw [->] ([xshift=0.2em,yshift=0.1em]t2.north) .. controls +(north:0.2) and +(south:0.4) .. ([xshift=-0.3em,yshift=-0.1em]s3.south);
\draw [->] ([xshift=-0.6em,yshift=-0.5em]s3.south) .. controls +(north:0) and +(south:0.2) .. ([xshift=-0.3em,yshift=-0.1em]s3.south);
\draw [->] ([xshift=-1.5em,yshift=-0.5em]s3.south) .. controls +(north:0) and +(south:0.15) .. ([xshift=-0.6em,yshift=-0.1em]s3.south);
}
}
{
\draw [->,thick,dotted] (wo1.east) .. controls +(east:1.0) and +(west:1.0) ..(wt2.west);
}
{
\draw [->,thick,dotted] (wo2.east) .. controls +(east:1.3) and +(west:1.1) ..(wt3.west);
\draw [->,thick,dotted] (wo3.east) .. controls +(east:1.1) and +(west:0.9) ..(wt4.west);
}
{
\node [circle,draw,anchor=south,inner sep=3pt,fill=orange!20] (c1) at ([yshift=2em]h2.north) {\tiny{$\textbf{C}_1$}};
\node [anchor=south] (c1label) at (c1.north) {\tiny{\textbf{编码-解码注意力机制:上下文}}};
\draw [->] (h1.north) .. controls +(north:0.6) and +(250:0.9) .. (c1.250);
\draw [->] (h2.north) .. controls +(north:0.6) and +(270:0.9) .. (c1.270);
\draw [->] (h3.north) .. controls +(north:0.6) and +(290:0.9) .. (c1.290);
\draw [->] ([yshift=0.3em]s1.west) .. controls +(west:1) and +(east:1) .. (c1.-30);
\draw [->] (c1.0) .. controls +(east:1) and +(west:1) .. ([yshift=0em]f1.west);
}
{
\node [circle,draw,anchor=north,inner sep=3pt,fill=orange!20] (c2) at ([yshift=-2em]t1.south) {\tiny{$\textbf{C}_2$}};
\draw [->] ([xshift=-0.7em]c2.west) -- ([xshift=-0.1em]c2.west);
\draw [->] ([xshift=0.1em]c2.east) .. controls +(east:0.6) and +(west:0.8) ..([yshift=-0.3em,xshift=-0.1em]f2.west);
}
{
\node [circle,draw,anchor=north,inner sep=3pt,fill=orange!20] (c3) at ([yshift=-2em]t2.south) {\tiny{$\textbf{C}_3$}};
\draw [->] ([xshift=-0.7em]c3.west) -- ([xshift=-0.1em]c3.west);
\draw [->] ([xshift=0.1em]c3.east) .. controls +(east:0.6) and +(west:0.8) ..([yshift=-0.3em,xshift=-0.1em]f3.west);
}
{
\node [circle,draw,anchor=north,inner sep=3pt,fill=orange!20] (c4) at ([yshift=-2em]t3.south) {\tiny{$\textbf{C}_4$}};
\draw [->] ([xshift=-0.7em]c4.west) -- ([xshift=-0.1em]c4.west);
\draw [->] ([xshift=0.1em]c4.east) .. controls +(east:0.6) and +(west:0.8) ..([yshift=-0.3em,xshift=-0.1em]f4.west);
}
\end{scope}
\end{tikzpicture}
\ No newline at end of file
\begin{tikzpicture}
\begin{scope}
\node [anchor=west] (w0) at (0,0) {$w_1$};
\node [anchor=west] (w1) at ([xshift=0.5em]w0.east) {$w_2$};
\node [anchor=west] (w2) at ([xshift=0.5em]w1.east) {$w_3$};
\node [anchor=west] (w3) at ([xshift=0.5em]w2.east) {$...$};
\node [anchor=west] (w4) at ([xshift=0.5em]w3.east) {$w_{m-1}$};
\node [anchor=west,fill=green!20!white] (w5) at ([xshift=0.5em]w4.east) {$w_{m}$};
\draw [->,thick,red] (w1.north).. controls +(130:0.5) and +(50:0.5) .. (w0.north);
\draw [->,thick,red] (w2.north).. controls +(130:0.5) and +(50:0.5) .. (w1.north);
\draw [->,thick,red] ([yshift=0.2em]w3.north).. controls +(130:0.5) and +(50:0.5) .. (w2.north);
\draw [->,thick,red] (w4.north).. controls +(130:0.5) and +(50:0.5) .. ([yshift=0.2em]w3.north);
\draw [->,thick,red] (w5.north).. controls +(130:0.5) and +(50:0.5) .. (w4.north);
\draw [->,very thick,red] ([xshift=-5em]w0.west) -- ([xshift=-6.5em]w0.west) node [pos=0,right] {\scriptsize{信息传递}};
\end{scope}
\end{tikzpicture}
\ No newline at end of file
\begin{tikzpicture}
\begin{scope}
\node [anchor=west] (w0) at (0,-2) {$w_1$};
\node [anchor=west] (w1) at ([xshift=0.5em]w0.east) {$w_2$};
\node [anchor=west] (w2) at ([xshift=0.5em]w1.east) {$w_3$};
\node [anchor=west] (w3) at ([xshift=0.5em]w2.east) {$...$};
\node [anchor=west] (w4) at ([xshift=0.5em]w3.east) {$w_{m-1}$};
\node [anchor=west,fill=green!20!white] (w5) at ([xshift=0.5em]w4.east) {$w_{m}$};
\draw [->,thick,red] (w5.north).. controls +(100:0.85) and +(50:0.85) .. (w0.north);
\draw [->,thick,red] (w5.north).. controls +(110:0.75) and +(50:0.75) .. (w1.north);
\draw [->,thick,red] (w5.north).. controls +(120:0.6) and +(50:0.6) .. ([yshift=0.2em]w3.north);
\draw [->,thick,red] (w5.north).. controls +(130:0.5) and +(50:0.5) .. (w4.north);
\draw [->,very thick,red] ([xshift=-5em]w0.west) -- ([xshift=-6.5em]w0.west) node [pos=0,right] {\scriptsize{信息传递}};
\end{scope}
\end{tikzpicture}
\ No newline at end of file
\begin{tikzpicture}
\begin{scope}
\tikzstyle{lnode} = [minimum height=1.5em,minimum width=3em,inner sep=3pt,rounded corners=1.5pt,draw,fill=orange!20];
\tikzstyle{standard} = [rounded corners=3pt]
\node [lnode,anchor=west] (l1) at (0,0) {\scriptsize{子层$n$}};
\node [lnode,anchor=west] (l2) at ([xshift=3em]l1.east) {\scriptsize{层正则化}};
\node [lnode,anchor=west] (l3) at ([xshift=4em]l2.east) {\scriptsize{层正则化}};
\node [lnode,anchor=west] (l4) at ([xshift=1.5em]l3.east) {\scriptsize{子层$n$}};
\node [anchor=west] (plus1) at ([xshift=0.9em]l1.east) {\scriptsize{$\mathbf{\oplus}$}};
\node [anchor=west] (plus2) at ([xshift=0.9em]l4.east) {\scriptsize{$\mathbf{\oplus}$}};
\node [anchor=north] (label1) at ([xshift=3em,yshift=-0.5em]l1.south) {\scriptsize{(a)后正则化}};
\node [anchor=north] (label2) at ([xshift=3em,yshift=-0.5em]l3.south) {\scriptsize{(b)前正则化}};
\draw [->,thick] ([xshift=-1.5em]l1.west) -- ([xshift=-0.1em]l1.west);
\draw [->,thick] ([xshift=0.1em]l1.east) -- ([xshift=0.2em]plus1.west);
\draw [->,thick] ([xshift=-0.2em]plus1.east) -- ([xshift=-0.1em]l2.west);
\draw [->,thick] ([xshift=0.1em]l2.east) -- ([xshift=1em]l2.east);
\draw [->,thick] ([xshift=-1.5em]l3.west) -- ([xshift=-0.1em]l3.west);
\draw [->,thick] ([xshift=0.1em]l3.east) -- ([xshift=-0.1em]l4.west);
\draw [->,thick] ([xshift=0.1em]l4.east) -- ([xshift=0.2em]plus2.west);
\draw [->,thick] ([xshift=-0.2em]plus2.east) -- ([xshift=1em]plus2.east);
\draw[->,standard,thick] ([xshift=-0.8em]l1.west) -- ([xshift=-0.8em,yshift=2em]l1.west) -- ([yshift=2em]plus1.center) -- ([yshift=-0.2em]plus1.north);
\draw[->,standard,thick] ([xshift=-0.8em]l3.west) -- ([xshift=-0.8em,yshift=2em]l3.west) -- ([yshift=2em]plus2.center) -- ([yshift=-0.2em]plus2.north);
\end{scope}
\end{tikzpicture}
\ No newline at end of file
\begin{tikzpicture}
\begin{scope}
\tikzstyle{rnode} = [draw,minimum width=2.8em,minimum height=1.2em]
\node [rnode,anchor=south west,fill=green!20!white] (key11) at (0,0) {\scriptsize{$\textbf{h}(\textrm{``你''})$}};
\node [rnode,anchor=south west,fill=green!20!white] (key12) at ([xshift=0.8em]key11.south east) {\scriptsize{$\textbf{h}(\textrm{``什么''})$}};
\node [rnode,anchor=south west,fill=green!20!white] (key13) at ([xshift=0.8em]key12.south east) {\scriptsize{$\textbf{h}(\textrm{``也''})$}};
\node [rnode,anchor=south west,fill=green!20!white] (key14) at ([xshift=0.8em]key13.south east) {\scriptsize{$\textbf{h}(\textrm{``没''})$}};
\node [rnode,anchor=south west,fill=green!20!white] (key15) at ([xshift=0.8em]key14.south east) {\scriptsize{$\textbf{h}(\textrm{``学''})$}};
\node [rnode,anchor=east] (query1) at ([xshift=-1em]key11.west) {\scriptsize{$\textbf{h}(\textrm{``你''})$}};
\draw [->] ([yshift=1pt,xshift=4pt]query1.north) .. controls +(90:0.6em) and +(90:0.6em) .. ([yshift=1pt]key11.north);
\draw [->] ([yshift=1pt,xshift=0pt]query1.north) .. controls +(90:1.0em) and +(90:1.0em) .. ([yshift=1pt]key12.north);
\draw [->] ([yshift=1pt,xshift=-4pt]query1.north) .. controls +(90:1.4em) and +(90:1.4em) .. ([yshift=1pt]key13.north);
\draw [->] ([yshift=1pt,xshift=-8pt]query1.north) .. controls +(90:1.8em) and +(90:1.8em) .. ([yshift=1pt]key14.north);
\draw [->] ([yshift=1pt,xshift=-12pt]query1.north) .. controls +(90:2.2em) and +(90:2.2em) .. ([yshift=1pt]key15.north);
\node [anchor=south west] (alpha11) at ([xshift=0.3em]key11.north) {\scriptsize{$\alpha_1$}};
\node [anchor=south west] (alpha12) at ([xshift=0.3em]key12.north) {\scriptsize{$\alpha_2$}};
\node [anchor=south west] (alpha13) at ([xshift=0.3em]key13.north) {\scriptsize{$\alpha_3$}};
\node [anchor=south west] (alpha14) at ([xshift=0.3em]key14.north) {\scriptsize{$\alpha_4$}};
\node [anchor=south west] (alpha15) at ([xshift=0.3em]key15.north) {\scriptsize{$\alpha_5$}};
\end{scope}
\end{tikzpicture}
\ No newline at end of file
\begin{tikzpicture}
\footnotesize{
\begin{axis}[
width=.60\textwidth,
height=.40\textwidth,
legend style={at={(0.60,0.08)}, anchor=south west},
xlabel={\footnotesize{更新步数 (10k)}},
ylabel={\footnotesize{学习率 (\scriptsize{$10^{-3}$)}}},
ylabel style={yshift=-1em},xlabel style={yshift=0.0em},
yticklabel style={/pgf/number format/precision=2,/pgf/number format/fixed zerofill},
ymin=0,ymax=0.9, ytick={0.2, 0.4, 0.6, 0.8},
xmin=0,xmax=12,xtick={2,4,6,8,10},
legend style={yshift=-6pt, legend plot pos=right,font=\scriptsize,cells={anchor=west}}
]
\addplot[orange,line width=1.25pt] coordinates {(0,0) (4,0.7) (5,0.63) (6,0.57) (7,0.525) (8,0.49) (9,0.465) (10,0.44) (11,0.42) (12,0.4)};
\end{axis}
}
\end{tikzpicture}
\begin{tikzpicture}
\begin{scope}
\tikzstyle{attnode} = [minimum size=1.5em,inner sep=0pt,rounded corners=1pt,draw]
\tikzstyle{srcnode} = [rotate=45,font=\small,anchor=south west]
\tikzstyle{tgtnode} = [left,font=\small,anchor=north east]
\tikzstyle{masknode} = [minimum size=5.8em,inner sep=0pt,rounded corners=1pt,draw]
\tikzstyle{elementnode} = [rectangle,text=white,anchor=center]
%\setlength{\hnode}{1.0cm}
%\node [anchor=west,attnode] (node1) at (0,0) {\tiny{}};
%\node [anchor=west,attnode] (node2) at ([xshift=1em]node1.east) {\tiny{}};
{
\foreach \i / \j / \c in
{0/5/0.25, 1/5/0.15, 2/5/0.15, 3/5/0.35, 4/5/0.25, 5/5/0.15,
0/4/0.15, 1/4/0.25, 2/4/0.2, 3/4/0.30, 4/4/0.15, 5/4/0.15,
0/3/0.15, 1/3/0.15, 2/3/0.5, 3/3/0.25, 4/3/0.15, 5/3/0.25,
0/2/0.15, 1/2/0.15, 2/2/0.15, 3/2/0.15, 4/2/0.25, 5/2/0.3,
0/1/0.25, 1/1/0.15, 2/1/0.15, 3/1/0.15, 4/1/0.5, 5/1/0.15,
0/0/0.15, 1/0/0.15, 2/0/0.15, 3/0/0.15, 4/0/0.25, 5/0/0.40}
\node[elementnode,minimum size=0.6*1.0cm*\c,inner sep=0.1pt,fill=blue] (a\i\j) at (0.5*1.0cm*\i-5.4*0.5*1.0cm,0.5*1.0cm*\j-1.05*1.0cm) {};
% source
\node[srcnode] (src1) at (-5.4*0.5*1.0cm,-1.05*1.0cm+5.5*0.5*1.0cm) {\scriptsize{Have}};
\node[srcnode] (src2) at ([xshift=0.5cm]src1.south west) {\scriptsize{you}};
\node[srcnode] (src3) at ([xshift=0.5cm]src2.south west) {\scriptsize{learned}};
\node[srcnode] (src4) at ([xshift=0.5cm]src3.south west) {\scriptsize{nothing}};
\node[srcnode] (src5) at ([xshift=0.5cm]src4.south west) {\scriptsize{?}};
\node[srcnode] (src6) at ([xshift=0.5cm]src5.south west) {\scriptsize{$\langle$eos$\rangle$}};
% target
\node[tgtnode] (tgt1) at (-6.0*0.5*1.0cm,-1.05*1.0cm+5.5*0.5*1.0cm) {\scriptsize{Have}};
\node[tgtnode] (tgt2) at ([yshift=-0.5cm]tgt1.north east) {\scriptsize{you}};
\node[tgtnode] (tgt3) at ([yshift=-0.5cm]tgt2.north east) {\scriptsize{learned}};
\node[tgtnode] (tgt4) at ([yshift=-0.5cm]tgt3.north east) {\scriptsize{nothing}};
\node[tgtnode] (tgt5) at ([yshift=-0.5cm]tgt4.north east) {\scriptsize{?}};
\node[tgtnode] (tgt6) at ([yshift=-0.5cm]tgt5.north east) {\scriptsize{$\langle$eos$\rangle$}};
{
\filldraw [fill=blue!20,draw,thick,fill opacity=0.85] ([xshift=-0.9em,yshift=0.5em]a15.north west) -- ([xshift=0.5em,yshift=-0.9em]a51.south east) -- ([xshift=0.5em,yshift=0.5em]a55.north east) -- ([xshift=-0.9em,yshift=0.5em]a15.north west);
\node[anchor=west] (labelmask) at ([xshift=0.3em,yshift=0.5em]a23.north east) {Masked};
}
{
\foreach \i / \j / \c in
{0/5/0.25,
0/4/0.15, 1/4/0.25,
0/3/0.15, 1/3/0.15, 2/3/0.5,
0/2/0.15, 1/2/0.15, 2/2/0.15, 3/2/0.15,
0/1/0.25, 1/1/0.15, 2/1/0.15, 3/1/0.15, 4/1/0.5,
0/0/0.15, 1/0/0.15, 2/0/0.15, 3/0/0.15, 4/0/0.25, 5/0/0.40}
\node[elementnode,minimum size=0.6*1.0cm*\c,inner sep=0.1pt,fill=blue] (a\i\j) at (0.5*1.0cm*\i+6*0.5*1.0cm,0.5*1.0cm*\j-1.05*1.0cm) {};
% source
\node[srcnode] (src1) at (6*0.5*1.0cm,-1.05*1.0cm+5.5*0.5*1.0cm) {\scriptsize{Have}};
\node[srcnode] (src2) at ([xshift=0.5cm]src1.south west) {\scriptsize{you}};
\node[srcnode] (src3) at ([xshift=0.5cm]src2.south west) {\scriptsize{learned}};
\node[srcnode] (src4) at ([xshift=0.5cm]src3.south west) {\scriptsize{nothing}};
\node[srcnode] (src5) at ([xshift=0.5cm]src4.south west) {\scriptsize{?}};
\node[srcnode] (src6) at ([xshift=0.5cm]src5.south west) {\scriptsize{$\langle$eos$\rangle$}};
% target
\node[tgtnode] (tgt1) at (5.4*0.5*1.0cm,-1.05*1.0cm+5.5*0.5*1.0cm) {\scriptsize{Have}};
\node[tgtnode] (tgt2) at ([yshift=-0.5cm]tgt1.north east) {\scriptsize{you}};
\node[tgtnode] (tgt3) at ([yshift=-0.5cm]tgt2.north east) {\scriptsize{learned}};
\node[tgtnode] (tgt4) at ([yshift=-0.5cm]tgt3.north east) {\scriptsize{nothing}};
\node[tgtnode] (tgt5) at ([yshift=-0.5cm]tgt4.north east) {\scriptsize{?}};
\node[tgtnode] (tgt6) at ([yshift=-0.5cm]tgt5.north east) {\scriptsize{$\langle$eos$\rangle$}};
}
}
\end{scope}
\end{tikzpicture}
\ No newline at end of file
\begin{tikzpicture}
\begin{scope}
\node [anchor=west,draw=black!30,inner sep=4pt,fill=ugreen!20!white,text=ugreen!20!white] (Linear0) at (0,0) {\footnotesize{Linear}};
\node [anchor=south west,draw=black!50,fill=ugreen!20!white,draw,inner sep=4pt,text=ugreen!20!white] (Linear01) at ([shift={(-0.2em,-0.2em)}]Linear0.south west) {\footnotesize{Linear}};
\node [anchor=south west,fill=ugreen!20!white,draw,inner sep=4pt] (Linear02) at ([shift={(-0.2em,-0.2em)}]Linear01.south west) {\footnotesize{Linear}};
\node [anchor=north] (Q) at ([xshift=0em,yshift=-1em]Linear02.south) {\footnotesize{$\mathbf{Q}$}};
\node [anchor=west,draw=black!30,inner sep=4pt,fill=ugreen!20!white,text=ugreen!20!white] (Linear1) at ([xshift=1.5em]Linear0.east) {\footnotesize{Linear}};
\node [anchor=south west,draw=black!50,fill=ugreen!20!white,draw,inner sep=4pt,text=ugreen!20!white] (Linear11) at ([shift={(-0.2em,-0.2em)}]Linear1.south west) {\footnotesize{Linear}};
\node [anchor=south west,fill=ugreen!20!white,draw,inner sep=4pt] (Linear12) at ([shift={(-0.2em,-0.2em)}]Linear11.south west) {\footnotesize{Linear}};
\node [anchor=north] (K) at ([xshift=0em,yshift=-1em]Linear12.south) {\footnotesize{$\mathbf{K}$}};
\node [anchor=west,draw=black!30,inner sep=4pt,fill=ugreen!20!white,text=ugreen!20!white] (Linear2) at ([xshift=1.5em]Linear1.east) {\footnotesize{Linear}};
\node [anchor=south west,draw=black!50,fill=ugreen!20!white,draw,inner sep=4pt,text=ugreen!20!white] (Linear21) at ([shift={(-0.2em,-0.2em)}]Linear2.south west) {\footnotesize{Linear}};
\node [anchor=south west,fill=ugreen!20!white,draw,inner sep=4pt] (Linear22) at ([shift={(-0.2em,-0.2em)}]Linear21.south west) {\footnotesize{Linear}};
\node [anchor=north] (V) at ([xshift=0em,yshift=-1em]Linear22.south) {\footnotesize{$\mathbf{V}$}};
\node [anchor=south,draw=black!30,minimum width=12em,minimum height=2em,inner sep=4pt,fill=blue!20!white] (Scale) at ([yshift=1em]Linear1.north) {\footnotesize{}};
\node [anchor=south west,draw=black!50,minimum width=12em,minimum height=2em,fill=blue!20!white,draw,inner sep=4pt] (Scale1) at ([shift={(-0.2em,-0.2em)}]Scale.south west) {\footnotesize{}};
\node [anchor=south west,fill=blue!20!white,draw,minimum width=12em,minimum height=2em,inner sep=4pt] (Scale2) at ([shift={(-0.2em,-0.2em)}]Scale1.south west) {\footnotesize{Scaled Dot-Product Attention}};
\node [anchor=south,draw,minimum width=4em,inner sep=4pt,fill=yellow!30] (Concat) at ([yshift=1em]Scale2.north) {\footnotesize{Concat}};
\node [anchor=south,draw,minimum width=4em,inner sep=4pt,fill=ugreen!20!white] (Linear) at ([yshift=1em]Concat.north) {\footnotesize{Linear}};
\draw [->] ([yshift=0.1em]Q.north) -- ([yshift=-0.1em]Linear02.south);
\draw [-,draw=black!50] ([yshift=0.1em]Q.north) -- ([xshift=0.2em,yshift=-0.1em]Linear02.south);
\draw [-,draw=black!30] ([yshift=0.1em]Q.north) -- ([xshift=0.4em,yshift=-0.1em]Linear02.south);
\draw [->] ([yshift=0.1em]K.north) -- ([yshift=-0.1em]Linear12.south);
\draw [-,draw=black!50] ([yshift=0.1em]K.north) -- ([xshift=0.2em,yshift=-0.1em]Linear12.south);
\draw [-,draw=black!30] ([yshift=0.1em]K.north) -- ([xshift=0.4em,yshift=-0.1em]Linear12.south);
\draw [->] ([yshift=0.1em]V.north) -- ([yshift=-0.1em]Linear22.south);
\draw [-,draw=black!50] ([yshift=0.1em]V.north) -- ([xshift=0.2em,yshift=-0.1em]Linear22.south);
\draw [-,draw=black!30] ([yshift=0.1em]V.north) -- ([xshift=0.4em,yshift=-0.1em]Linear22.south);
\draw [->] ([yshift=0em]Linear02.north) -- ([yshift=1em]Linear02.north);
\draw [-,draw=black!50] ([yshift=0em]Linear01.north) -- ([yshift=0.8em]Linear01.north);
\draw [-,draw=black!30] ([yshift=0em]Linear0.north) -- ([yshift=0.6em]Linear0.north);
\draw [->] ([yshift=0em]Linear12.north) -- ([yshift=1em]Linear12.north);
\draw [-,draw=black!50] ([yshift=0em]Linear11.north) -- ([yshift=0.8em]Linear11.north);
\draw [-,draw=black!30] ([yshift=0em]Linear1.north) -- ([yshift=0.6em]Linear1.north);
\draw [->] ([yshift=0em]Linear22.north) -- ([yshift=1em]Linear22.north);
\draw [-,draw=black!50] ([yshift=0em]Linear21.north) -- ([yshift=0.8em]Linear21.north);
\draw [-,draw=black!30] ([yshift=0em]Linear2.north) -- ([yshift=0.6em]Linear2.north);
\draw [->] ([yshift=0em]Scale2.north) -- ([yshift=0em]Concat.south);
\draw [-,draw=black!50] ([yshift=0em]Scale1.north) -- ([yshift=0.8em]Scale1.north);
\draw [-,draw=black!30] ([yshift=0em]Scale.north) -- ([yshift=0.6em]Scale.north);
\draw [->] ([yshift=0em]Concat.north) -- ([yshift=0em]Linear.south);
\draw [->] ([yshift=0em]Linear.north) -- ([yshift=1em]Linear.north);
\end{scope}
\end{tikzpicture}
\ No newline at end of file
\begin{tikzpicture}
\begin{scope}
\node [anchor=south west,fill=white,draw,inner sep=4pt,minimum width=4em,fill=blue!20!white] (MatMul) at (0,0) {\tiny{MatMul}};
\node [anchor=north] (Q1) at ([xshift=-1.4em,yshift=-1em]MatMul.south) {\footnotesize{$\mathbf{Q}$}};
\node [anchor=north] (K1) at ([xshift=1.4em,yshift=-1em]MatMul.south) {\footnotesize{$\mathbf{K}$}};
\node [anchor=south,draw,inner sep=4pt,fill=yellow!30,minimum width=2.5em] (Scale3) at ([yshift=1em]MatMul.north) {\tiny{Scale}};
\node [anchor=south,draw,inner sep=4pt,fill=purple!20,minimum width=3.5em] (Mask) at ([yshift=0.8em]Scale3.north) {\tiny{Mask(opt.)}};
\node [anchor=south,draw,inner sep=4pt,fill=ugreen!20!white] (SoftMax) at ([yshift=1em]Mask.north) {\tiny{SoftMax}};
\node [anchor=south,draw,minimum width=4em,inner sep=4pt,fill=blue!20!white] (MatMul1) at ([xshift=1.7em,yshift=1em]SoftMax.north) {\tiny{MatMul}};
\node [anchor=north] (V1) at ([xshift=2em]K1.north) {\footnotesize{$\mathbf{V}$}};
\node [anchor=north] (null) at ([yshift=0.8em]MatMul1.north) {};
\draw [->] ([yshift=0.1em]Q1.north) -- ([xshift=-1.4em,yshift=-0.1em]MatMul.south);
\draw [->] ([yshift=0.1em]K1.north) -- ([xshift=1.4em,yshift=-0.1em]MatMul.south);
\draw [->] ([yshift=0.1em]MatMul.north) -- ([yshift=-0.1em]Scale3.south);
\draw [->] ([yshift=0.1em]Scale3.north) -- ([yshift=-0.1em]Mask.south);
\draw [->] ([yshift=0.1em]Mask.north) -- ([yshift=-0.1em]SoftMax.south);
\draw [->] ([yshift=0.1em]SoftMax.north) -- ([yshift=0.9em]SoftMax.north);
\draw [->] ([yshift=0.1em]V1.north) -- ([yshift=9.3em]V1.north);
\draw [->] ([yshift=0.1em]MatMul1.north) -- ([yshift=0.8em]MatMul1.north);
{
\node [anchor=east] (line1) at ([xshift=-4em,yshift=1em]MatMul.west) {\scriptsize{自注意力机制的Query}};
\node [anchor=north west] (line2) at ([yshift=0.3em]line1.south west) {\scriptsize{Key和Value均来自同一句子}};
\node [anchor=north west] (line3) at ([yshift=0.3em]line2.south west) {\scriptsize{编码-解码注意力机制}};
\node [anchor=north west] (line4) at ([yshift=0.3em]line3.south west) {\scriptsize{与前面讲的一样}};
}
{
\node [anchor=west] (line11) at ([xshift=3em,yshift=0em]MatMul.east) {\scriptsize{Query和Key的转置}};
\node [anchor=north west] (line12) at ([yshift=0.3em]line11.south west) {\scriptsize{进行点积,得到句子内部}};
\node [anchor=north west] (line13) at ([yshift=0.3em]line12.south west) {\scriptsize{各个位置的相关性}};
}
{
\node [anchor=west] (line21) at ([yshift=5em]line11.west) {\scriptsize{相关性矩阵在训练中}};
\node [anchor=north west] (line22) at ([yshift=0.3em]line21.south west) {\scriptsize{方差变大,不利于训练}};
\node [anchor=north west] (line23) at ([yshift=0.3em]line22.south west) {\scriptsize{所以对其进行缩放}};
}
{
\node [anchor=west] (line31) at ([yshift=6em]line1.west) {\scriptsize{在编码端,对句子补齐}};
\node [anchor=north west] (line32) at ([yshift=0.3em]line31.south west) {\scriptsize{填充的部分进行屏蔽}};
\node [anchor=north west] (line33) at ([yshift=0.3em]line32.south west) {\scriptsize{解码时看不到未来的信息}};
\node [anchor=north west] (line34) at ([yshift=0.3em]line33.south west) {\scriptsize{需要对未来的信息进行屏蔽}};
}
{
\node [anchor=west] (line41) at ([yshift=4em]line21.west) {\scriptsize{用归一化的相关性打分}};
\node [anchor=north west] (line42) at ([yshift=0.3em]line41.south west) {\scriptsize{对Value进行加权求和}};
}
\begin{pgfonlayer}{background}
{
\node [rectangle,inner sep=0.2em,rounded corners=1pt,fill=green!10,drop shadow,draw=ugreen] [fit = (line1) (line2) (line3) (line4)] (box1) {};
\node [rectangle,inner sep=0.1em,rounded corners=1pt,very thick,dotted,draw=ugreen] [fit = (Q1) (K1) (V1)] (box0) {};
\draw [->,dotted,very thick,ugreen] ([yshift=-1.5em,xshift=1.2em]box1.east) -- ([yshift=-1.5em,xshift=0.1em]box1.east);
}
{
\node [rectangle,inner sep=0.2em,rounded corners=1pt,fill=blue!20!white,drop shadow,draw=blue] [fit = (line11) (line12) (line13)] (box2) {};
\draw [->,dotted,very thick,blue] ([yshift=1em,xshift=-2.8em]box2.west) -- ([yshift=1em,xshift=-0.1em]box2.west);
}
{
\node [rectangle,inner sep=0.2em,rounded corners=1pt,fill=yellow!20,drop shadow,draw=black] [fit = (line21) (line22) (line23)] (box3) {};
\draw [->,dotted,very thick,black] ([xshift=0.1em]Scale3.east) .. controls +(east:1) and +(west:1) .. ([yshift=1.0em]box3.west) ;
}
{
\node [rectangle,inner sep=0.2em,rounded corners=1pt,fill=red!10,drop shadow,draw=red] [fit = (line31) (line32) (line33) (line34)] (box4) {};
\draw [->,dotted,very thick,red] ([yshift=-1.2em,xshift=2.2em]box4.east) -- ([yshift=-1.2em,xshift=0.1em]box4.east);
}
{
\node [rectangle,inner sep=0.2em,rounded corners=1pt,fill=blue!20!white,drop shadow,draw=blue] [fit = (line41) (line42)] (box5) {};
\draw [->,dotted,very thick,blue] ([yshift=-0.3em,xshift=-1em]box5.west) -- ([yshift=-0.3em,xshift=-0.1em]box5.west);
}
\end{pgfonlayer}
\end{scope}
\end{tikzpicture}
\ No newline at end of file
\begin{tikzpicture}
\begin{scope}
\tikzstyle{Sanode} = [minimum height=1.4em,minimum width=7em,inner sep=3pt,rounded corners=1.5pt,draw,fill=orange!20];
\tikzstyle{Resnode} = [minimum height=1.1em,minimum width=7em,inner sep=3pt,rounded corners=1.5pt,draw,fill=yellow!20];
\tikzstyle{ffnnode} = [minimum height=1.4em,minimum width=7em,inner sep=3pt,rounded corners=1.5pt,draw];
\tikzstyle{outputnode} = [minimum height=1.4em,minimum width=7em,inner sep=3pt,rounded corners=1.5pt,draw];
\tikzstyle{inputnode} = [minimum height=1.4em,minimum width=3.5em,inner sep=3pt,rounded corners=1.5pt,draw,fill=red!10];
\tikzstyle{posnode} = [minimum height=1.4em,minimum width=3.5em,inner sep=3pt,rounded corners=1.5pt,draw,fill=black!5!white];
\tikzstyle{standard} = [rounded corners=3pt]
\node [Sanode,anchor=west] (sa1) at (0,0) {\tiny{$\textbf{Self-Attention}$}};
\node [Resnode,anchor=south] (res1) at ([yshift=0.3em]sa1.north) {\tiny{$\textbf{Add \& LayerNorm}$}};
\node [ffnnode,anchor=south] (ffn1) at ([yshift=1em]res1.north) {\tiny{$\textbf{Feed Forward Network}$}};
\node [Resnode,anchor=south] (res2) at ([yshift=0.3em]ffn1.north) {\tiny{$\textbf{Add \& LayerNorm}$}};
\node [inputnode,anchor=north west] (input1) at ([yshift=-1em]sa1.south west) {\tiny{$\textbf{Embedding}$}};
\node [posnode,anchor=north east] (pos1) at ([yshift=-1em]sa1.south east) {\tiny{$\textbf{Postion}$}};
\node [anchor=north] (inputs) at ([yshift=-3em]sa1.south) {\scriptsize{$\textbf{编码器输入: 我\ \ \ \ }$}};
\node [anchor=south] (encoder) at ([xshift=0.2em,yshift=0.6em]res2.north west) {\scriptsize{\textbf{编码器}}};
\draw [->] (sa1.north) -- (res1.south);
\draw [->] (res1.north) -- (ffn1.south);
\draw [->] (ffn1.north) -- (res2.south);
\draw [->] ([yshift=-1em]sa1.south) -- (sa1.south);
\draw [->] ([yshift=-0.3em]inputs.north) -- ([yshift=0.6em]inputs.north);
\node [Sanode,anchor=west] (sa2) at ([xshift=3em]sa1.east) {\tiny{$\textbf{Self-Attention}$}};
\node [Resnode,anchor=south] (res3) at ([yshift=0.3em]sa2.north) {\tiny{$\textbf{Add \& LayerNorm}$}};
\node [Sanode,anchor=south] (ed1) at ([yshift=1em]res3.north) {\tiny{$\textbf{Encoder-Decoder Attention}$}};
\node [Resnode,anchor=south] (res4) at ([yshift=0.3em]ed1.north) {\tiny{$\textbf{Add \& LayerNorm}$}};
\node [ffnnode,anchor=south] (ffn2) at ([yshift=1em]res4.north) {\tiny{$\textbf{Feed Forward Network}$}};
\node [Resnode,anchor=south] (res5) at ([yshift=0.3em]ffn2.north) {\tiny{$\textbf{Add \& LayerNorm}$}};
\node [outputnode,anchor=south] (o1) at ([yshift=1em]res5.north) {\tiny{$\textbf{Output layer}$}};
\node [inputnode,anchor=north west] (input2) at ([yshift=-1em]sa2.south west) {\tiny{$\textbf{Embedding}$}};
\node [posnode,anchor=north east] (pos2) at ([yshift=-1em]sa2.south east) {\tiny{$\textbf{Postion}$}};
\node [anchor=north] (outputs) at ([yshift=-3em]sa2.south) {\scriptsize{$\textbf{解码器输入: $<$sos$>$ I am fine}$}};
\node [anchor=east] (decoder) at ([xshift=-1em,yshift=-1.5em]o1.west) {\scriptsize{\textbf{解码器}}};
\node [anchor=north] (decoutputs) at ([yshift=1.5em]o1.north) {\scriptsize{$\textbf{解码器输出: I am fine $<$eos$>$ }$}};
\draw [->] (sa2.north) -- (res3.south);
\draw [->] (res3.north) -- (ed1.south);
\draw [->] (ed1.north) -- (res4.south);
\draw [->] (res4.north) -- (ffn2.south);
\draw [->] (ffn2.north) -- (res5.south);
\draw [->] (res5.north) -- (o1.south);
\draw [->] (o1.north) -- ([yshift=0.5em]o1.north);
\draw [->] ([yshift=-1em]sa2.south) -- (sa2.south);
\draw [->] ([yshift=-0.3em]outputs.north) -- ([yshift=0.6em]outputs.north);
\draw[->,standard] ([yshift=-0.5em]sa1.south) -- ([xshift=-4em,yshift=-0.5em]sa1.south) -- ([xshift=-4em,yshift=2.3em]sa1.south) -- ([xshift=-3.5em,yshift=2.3em]sa1.south);
\draw[->,standard] ([yshift=0.5em]res1.north) -- ([xshift=-4em,yshift=0.5em]res1.north) -- ([xshift=-4em,yshift=3.3em]res1.north) -- ([xshift=-3.5em,yshift=3.3em]res1.north);
\draw[->,standard] ([yshift=-0.5em]sa2.south) -- ([xshift=4em,yshift=-0.5em]sa2.south) -- ([xshift=4em,yshift=2.3em]sa2.south) -- ([xshift=3.5em,yshift=2.3em]sa2.south);
\draw[->,standard] ([yshift=0.5em]res3.north) -- ([xshift=4em,yshift=0.5em]res3.north) -- ([xshift=4em,yshift=3.3em]res3.north) -- ([xshift=3.5em,yshift=3.3em]res3.north);
\draw[->,standard] ([yshift=0.5em]res4.north) -- ([xshift=4em,yshift=0.5em]res4.north) -- ([xshift=4em,yshift=3.3em]res4.north) -- ([xshift=3.5em,yshift=3.3em]res4.north);
\draw[->,standard] (res2.north) -- ([yshift=0.5em]res2.north) -- ([xshift=5em,yshift=0.5em]res2.north) -- ([xshift=5em,yshift=-2.2em]res2.north) -- ([xshift=6.5em,yshift=-2.2em]res2.north);
%\node [rectangle,inner sep=0.7em,rounded corners=1pt,very thick,dotted,draw=ugreen!70] [fit = (sa1) (res1) (ffn1) (res2)] (box0) {};
%\node [rectangle,inner sep=0.7em,rounded corners=1pt,very thick,dotted,draw=red!60] [fit = (sa2) (res3) (res5)] (box1) {};
\begin{pgfonlayer}{background}
\node [rectangle,inner sep=0.2em,rounded corners=1pt,very thick,dotted,fill=red!40] [fit = (res1)] (box1) {};
\node [rectangle,inner sep=0.2em,rounded corners=1pt,very thick,dotted,fill=red!40] [fit = (res2)] (box2) {};
\node [rectangle,inner sep=0.2em,rounded corners=1pt,very thick,dotted,fill=red!40] [fit = (res3)] (box3) {};
\node [rectangle,inner sep=0.2em,rounded corners=1pt,very thick,dotted,fill=red!40] [fit = (res4)] (box4) {};
\end{pgfonlayer}
\end{scope}
\end{tikzpicture}
\ No newline at end of file
\begin{tikzpicture}
\begin{scope}
\tikzstyle{Sanode} = [minimum height=1.4em,minimum width=7em,inner sep=3pt,rounded corners=1.5pt,draw,fill=orange!20];
\tikzstyle{Resnode} = [minimum height=1.1em,minimum width=7em,inner sep=3pt,rounded corners=1.5pt,draw,fill=yellow!20];
\tikzstyle{ffnnode} = [minimum height=1.4em,minimum width=7em,inner sep=3pt,rounded corners=1.5pt,draw,fill=blue!20];
\tikzstyle{outputnode} = [minimum height=1.4em,minimum width=7em,inner sep=3pt,rounded corners=1.5pt,draw];
\tikzstyle{inputnode} = [minimum height=1.4em,minimum width=3.5em,inner sep=3pt,rounded corners=1.5pt,draw,fill=red!10];
\tikzstyle{posnode} = [minimum height=1.4em,minimum width=3.5em,inner sep=3pt,rounded corners=1.5pt,draw,fill=black!5!white];
\tikzstyle{standard} = [rounded corners=3pt]
\node [Sanode,anchor=west] (sa1) at (0,0) {\tiny{$\textbf{Self-Attention}$}};
\node [Resnode,anchor=south] (res1) at ([yshift=0.3em]sa1.north) {\tiny{$\textbf{Add \& LayerNorm}$}};
\node [ffnnode,anchor=south] (ffn1) at ([yshift=1em]res1.north) {\tiny{$\textbf{Feed Forward Network}$}};
\node [Resnode,anchor=south] (res2) at ([yshift=0.3em]ffn1.north) {\tiny{$\textbf{Add \& LayerNorm}$}};
\node [inputnode,anchor=north west] (input1) at ([yshift=-1em]sa1.south west) {\tiny{$\textbf{Embedding}$}};
\node [posnode,anchor=north east] (pos1) at ([yshift=-1em]sa1.south east) {\tiny{$\textbf{Postion}$}};
\node [anchor=north] (inputs) at ([yshift=-3em]sa1.south) {\scriptsize{$\textbf{编码器输入: 我\ \ \ \ }$}};
\node [anchor=south] (encoder) at ([xshift=0.2em,yshift=0.6em]res2.north west) {\scriptsize{\textbf{编码器}}};
\draw [->] (sa1.north) -- (res1.south);
\draw [->] (res1.north) -- (ffn1.south);
\draw [->] (ffn1.north) -- (res2.south);
\draw [->] ([yshift=-1em]sa1.south) -- (sa1.south);
\draw [->] ([yshift=-0.3em]inputs.north) -- ([yshift=0.6em]inputs.north);
\node [Sanode,anchor=west] (sa2) at ([xshift=3em]sa1.east) {\tiny{$\textbf{Self-Attention}$}};
\node [Resnode,anchor=south] (res3) at ([yshift=0.3em]sa2.north) {\tiny{$\textbf{Add \& LayerNorm}$}};
\node [Sanode,anchor=south] (ed1) at ([yshift=1em]res3.north) {\tiny{$\textbf{Encoder-Decoder Attention}$}};
\node [Resnode,anchor=south] (res4) at ([yshift=0.3em]ed1.north) {\tiny{$\textbf{Add \& LayerNorm}$}};
\node [ffnnode,anchor=south] (ffn2) at ([yshift=1em]res4.north) {\tiny{$\textbf{Feed Forward Network}$}};
\node [Resnode,anchor=south] (res5) at ([yshift=0.3em]ffn2.north) {\tiny{$\textbf{Add \& LayerNorm}$}};
\node [outputnode,anchor=south] (o1) at ([yshift=1em]res5.north) {\tiny{$\textbf{Output layer}$}};
\node [inputnode,anchor=north west] (input2) at ([yshift=-1em]sa2.south west) {\tiny{$\textbf{Embedding}$}};
\node [posnode,anchor=north east] (pos2) at ([yshift=-1em]sa2.south east) {\tiny{$\textbf{Postion}$}};
\node [anchor=north] (outputs) at ([yshift=-3em]sa2.south) {\scriptsize{$\textbf{解码器输入: $<$sos$>$ I am fine}$}};
\node [anchor=east] (decoder) at ([xshift=-1em,yshift=-1.5em]o1.west) {\scriptsize{\textbf{解码器}}};
\node [anchor=north] (decoutputs) at ([yshift=1.5em]o1.north) {\scriptsize{$\textbf{解码器输出: I am fine $<$eos$>$ }$}};
\draw [->] (sa2.north) -- (res3.south);
\draw [->] (res3.north) -- (ed1.south);
\draw [->] (ed1.north) -- (res4.south);
\draw [->] (res4.north) -- (ffn2.south);
\draw [->] (ffn2.north) -- (res5.south);
\draw [->] (res5.north) -- (o1.south);
\draw [->] (o1.north) -- ([yshift=0.5em]o1.north);
\draw [->] ([yshift=-1em]sa2.south) -- (sa2.south);
\draw [->] ([yshift=-0.3em]outputs.north) -- ([yshift=0.6em]outputs.north);
\draw[->,standard] ([yshift=-0.5em]sa1.south) -- ([xshift=-4em,yshift=-0.5em]sa1.south) -- ([xshift=-4em,yshift=2.3em]sa1.south) -- ([xshift=-3.5em,yshift=2.3em]sa1.south);
\draw[->,standard] ([yshift=0.5em]res1.north) -- ([xshift=-4em,yshift=0.5em]res1.north) -- ([xshift=-4em,yshift=3.3em]res1.north) -- ([xshift=-3.5em,yshift=3.3em]res1.north);
\draw[->,standard] ([yshift=-0.5em]sa2.south) -- ([xshift=4em,yshift=-0.5em]sa2.south) -- ([xshift=4em,yshift=2.3em]sa2.south) -- ([xshift=3.5em,yshift=2.3em]sa2.south);
\draw[->,standard] ([yshift=0.5em]res3.north) -- ([xshift=4em,yshift=0.5em]res3.north) -- ([xshift=4em,yshift=3.3em]res3.north) -- ([xshift=3.5em,yshift=3.3em]res3.north);
\draw[->,standard] ([yshift=0.5em]res4.north) -- ([xshift=4em,yshift=0.5em]res4.north) -- ([xshift=4em,yshift=3.3em]res4.north) -- ([xshift=3.5em,yshift=3.3em]res4.north);
\draw[->,standard] (res2.north) -- ([yshift=0.5em]res2.north) -- ([xshift=5em,yshift=0.5em]res2.north) -- ([xshift=5em,yshift=-2.2em]res2.north) -- ([xshift=6.5em,yshift=-2.2em]res2.north);
%\node [rectangle,inner sep=0.7em,rounded corners=1pt,very thick,dotted,draw=ugreen!70] [fit = (sa1) (res1) (ffn1) (res2)] (box0) {};
%\node [rectangle,inner sep=0.7em,rounded corners=1pt,very thick,dotted,draw=red!60] [fit = (sa2) (res3) (res5)] (box1) {};
\begin{pgfonlayer}{background}
\node [rectangle,inner sep=0.2em,rounded corners=1pt,very thick,dotted,fill=red!40] [fit = (ffn1)] (box1) {};
\node [rectangle,inner sep=0.2em,rounded corners=1pt,very thick,dotted,fill=red!40] [fit = (ffn2)] (box2) {};
\end{pgfonlayer}
\end{scope}
\end{tikzpicture}
\ No newline at end of file
\begin{tikzpicture}
\begin{scope}
\tikzstyle{Sanode} = [minimum height=1.4em,minimum width=7em,inner sep=3pt,rounded corners=1.5pt,draw,fill=orange!20];
\tikzstyle{Resnode} = [minimum height=1.1em,minimum width=7em,inner sep=3pt,rounded corners=1.5pt,draw];
\tikzstyle{ffnnode} = [minimum height=1.4em,minimum width=7em,inner sep=3pt,rounded corners=1.5pt,draw];
\tikzstyle{outputnode} = [minimum height=1.4em,minimum width=7em,inner sep=3pt,rounded corners=1.5pt,draw];
\tikzstyle{inputnode} = [minimum height=1.4em,minimum width=3.5em,inner sep=3pt,rounded corners=1.5pt,draw,fill=red!10];
\tikzstyle{posnode} = [minimum height=1.4em,minimum width=3.5em,inner sep=3pt,rounded corners=1.5pt,draw,fill=black!5!white];
\tikzstyle{standard} = [rounded corners=3pt]
\node [Sanode,anchor=west] (sa1) at (0,0) {\tiny{$\textbf{Self-Attention}$}};
\node [Resnode,anchor=south] (res1) at ([yshift=0.3em]sa1.north) {\tiny{$\textbf{Add \& LayerNorm}$}};
\node [ffnnode,anchor=south] (ffn1) at ([yshift=1em]res1.north) {\tiny{$\textbf{Feed Forward Network}$}};
\node [Resnode,anchor=south] (res2) at ([yshift=0.3em]ffn1.north) {\tiny{$\textbf{Add \& LayerNorm}$}};
\node [inputnode,anchor=north west] (input1) at ([yshift=-1em]sa1.south west) {\tiny{$\textbf{Embedding}$}};
\node [posnode,anchor=north east] (pos1) at ([yshift=-1em]sa1.south east) {\tiny{$\textbf{Postion}$}};
\node [anchor=north] (inputs) at ([yshift=-3em]sa1.south) {\scriptsize{$\textbf{编码器输入: 我\ \ \ \ }$}};
\node [anchor=south] (encoder) at ([xshift=0.2em,yshift=0.6em]res2.north west) {\scriptsize{\textbf{编码器}}};
\draw [->] (sa1.north) -- (res1.south);
\draw [->] (res1.north) -- (ffn1.south);
\draw [->] (ffn1.north) -- (res2.south);
\draw [->] ([yshift=-1em]sa1.south) -- (sa1.south);
\draw [->] ([yshift=-0.3em]inputs.north) -- ([yshift=0.6em]inputs.north);
\node [Sanode,anchor=west] (sa2) at ([xshift=3em]sa1.east) {\tiny{$\textbf{Self-Attention}$}};
\node [Resnode,anchor=south] (res3) at ([yshift=0.3em]sa2.north) {\tiny{$\textbf{Add \& LayerNorm}$}};
\node [Sanode,anchor=south] (ed1) at ([yshift=1em]res3.north) {\tiny{$\textbf{Encoder-Decoder Attention}$}};
\node [Resnode,anchor=south] (res4) at ([yshift=0.3em]ed1.north) {\tiny{$\textbf{Add \& LayerNorm}$}};
\node [ffnnode,anchor=south] (ffn2) at ([yshift=1em]res4.north) {\tiny{$\textbf{Feed Forward Network}$}};
\node [Resnode,anchor=south] (res5) at ([yshift=0.3em]ffn2.north) {\tiny{$\textbf{Add \& LayerNorm}$}};
\node [outputnode,anchor=south] (o1) at ([yshift=1em]res5.north) {\tiny{$\textbf{Output layer}$}};
\node [inputnode,anchor=north west] (input2) at ([yshift=-1em]sa2.south west) {\tiny{$\textbf{Embedding}$}};
\node [posnode,anchor=north east] (pos2) at ([yshift=-1em]sa2.south east) {\tiny{$\textbf{Postion}$}};
\node [anchor=north] (outputs) at ([yshift=-3em]sa2.south) {\scriptsize{$\textbf{解码器输入: $<$sos$>$ I am fine}$}};
\node [anchor=east] (decoder) at ([xshift=-1em,yshift=-1.5em]o1.west) {\scriptsize{\textbf{解码器}}};
\node [anchor=north] (decoutputs) at ([yshift=1.5em]o1.north) {\scriptsize{$\textbf{解码器输出: I am fine $<$eos$>$ }$}};
\draw [->] (sa2.north) -- (res3.south);
\draw [->] (res3.north) -- (ed1.south);
\draw [->] (ed1.north) -- (res4.south);
\draw [->] (res4.north) -- (ffn2.south);
\draw [->] (ffn2.north) -- (res5.south);
\draw [->] (res5.north) -- (o1.south);
\draw [->] (o1.north) -- ([yshift=0.5em]o1.north);
\draw [->] ([yshift=-1em]sa2.south) -- (sa2.south);
\draw [->] ([yshift=-0.3em]outputs.north) -- ([yshift=0.6em]outputs.north);
\draw[->,standard] ([yshift=-0.5em]sa1.south) -- ([xshift=-4em,yshift=-0.5em]sa1.south) -- ([xshift=-4em,yshift=2.3em]sa1.south) -- ([xshift=-3.5em,yshift=2.3em]sa1.south);
\draw[->,standard] ([yshift=0.5em]res1.north) -- ([xshift=-4em,yshift=0.5em]res1.north) -- ([xshift=-4em,yshift=3.3em]res1.north) -- ([xshift=-3.5em,yshift=3.3em]res1.north);
\draw[->,standard] ([yshift=-0.5em]sa2.south) -- ([xshift=4em,yshift=-0.5em]sa2.south) -- ([xshift=4em,yshift=2.3em]sa2.south) -- ([xshift=3.5em,yshift=2.3em]sa2.south);
\draw[->,standard] ([yshift=0.5em]res3.north) -- ([xshift=4em,yshift=0.5em]res3.north) -- ([xshift=4em,yshift=3.3em]res3.north) -- ([xshift=3.5em,yshift=3.3em]res3.north);
\draw[->,standard] ([yshift=0.5em]res4.north) -- ([xshift=4em,yshift=0.5em]res4.north) -- ([xshift=4em,yshift=3.3em]res4.north) -- ([xshift=3.5em,yshift=3.3em]res4.north);
\draw[->,standard] (res2.north) -- ([yshift=0.5em]res2.north) -- ([xshift=5em,yshift=0.5em]res2.north) -- ([xshift=5em,yshift=-2.2em]res2.north) -- ([xshift=6.5em,yshift=-2.2em]res2.north);
%\node [rectangle,inner sep=0.7em,rounded corners=1pt,very thick,dotted,draw=ugreen!70] [fit = (sa1) (res1) (ffn1) (res2)] (box0) {};
%\node [rectangle,inner sep=0.7em,rounded corners=1pt,very thick,dotted,draw=red!60] [fit = (sa2) (res3) (res5)] (box1) {};
\begin{pgfonlayer}{background}
\node [rectangle,inner sep=0.2em,rounded corners=1pt,very thick,dotted,fill=red!40] [fit = (sa1)] (box1) {};
\node [rectangle,inner sep=0.2em,rounded corners=1pt,very thick,dotted,fill=red!40] [fit = (sa2)] (box2) {};
\node [rectangle,inner sep=0.2em,rounded corners=1pt,very thick,dotted,fill=red!40] [fit = (ed1)] (box3) {};
\end{pgfonlayer}
\end{scope}
\end{tikzpicture}
\ No newline at end of file
\begin{tikzpicture}
\node(atten) at (0,0){Attention(};
%%%% Q
\node(tbq) at ([xshift=0.5em,yshift=0]atten.east){
\begin{tabular}{|c|}
\hline
\rowcolor{yellow!20} \\ \hline
\rowcolor{yellow!20} \\ \hline
\rowcolor{yellow!20} \\ \hline
\end{tabular}
};
\node at ([xshift=0em,yshift=0.5em]tbq.north){$\mathbf{Q}$};
\node(comma1) at ([xshift=0.15em,yshift=-2em]tbq.east){,};
%%%% k
\node(tbk) at ([xshift=1em,yshift=0]tbq.east){
\begin{tabular}{|c|}
\hline
\rowcolor{blue!20} \\ \hline
\rowcolor{blue!20} \\ \hline
\rowcolor{blue!20} \\ \hline
\end{tabular}
};
\node at ([xshift=0em,yshift=0.5em]tbk.north){$\mathbf{K}$};
\node(comma2) at ([xshift=0.15em,yshift=-2em]tbk.east){,};
%%%% v
\node(tbv) at ([xshift=1em,yshift=0]tbk.east){
\begin{tabular}{|c|}
\hline
\rowcolor{orange!20} \\ \hline
\rowcolor{orange!20} \\ \hline
\rowcolor{orange!20} \\ \hline
\end{tabular}
};
\node at ([xshift=0em,yshift=0.5em]tbv.north){$\mathbf{V}$};
\node(bra) at ([xshift=0.3em,yshift=0]tbv.east){)};
\node(eq1) at ([xshift=0.5em,yshift=0]bra.east){=};
\node(sof1) at ([xshift=2em,yshift=0]eq1.east){Softmax(};
%-----------------------------------------------------------
%QK+MASK
\node(tbq2) at ([xshift=0.5em,yshift=2em]sof1.east){
\begin{tabular}{|c|}
\hline
\rowcolor{yellow!20} \\ \hline
\rowcolor{yellow!20} \\ \hline
\rowcolor{yellow!20} \\ \hline
\end{tabular}
};
\node at ([xshift=0em,yshift=0.5em]tbq2.north){$\mathbf{Q}$};
% x
\node (times) at ([xshift=1em,yshift=0em]tbq2.east){$\times$};
%k
\node(tbk2) at ([xshift=2em,yshift=0em]times.east){
\begin{tabular}{|l|l|l|}
\hline
\cellcolor{blue!20} & \cellcolor{blue!20} &\cellcolor{blue!20} \\ \hline
\end{tabular}
};
\node at ([xshift=0em,yshift=0.5em]tbk2.north){$\mathbf{K}^{\mathrm{T}}$};
\draw [-] (5.6,-0.2) -- (8,-0.2);
\node at ([xshift=0em,yshift=-3em]times.south){$\sqrt{d_k}$};
% MASK
\node(mask) at ([xshift=3em,yshift=-2em]tbk2.east){
\begin{tabular}{|l|l|l|}
\hline
\cellcolor{green!20} &\cellcolor{green!20} &\cellcolor{green!20} \\ \hline
\cellcolor{green!20} &\cellcolor{green!20} &\cellcolor{green!20} \\ \hline
\cellcolor{green!20} &\cellcolor{green!20} &\cellcolor{green!20} \\ \hline
\end{tabular}
};
\node at ([xshift=0em,yshift=0.5em]mask.north){$\mathbf{Mask}$};
%+
\node at ([xshift=-0.6em,yshift=0em]mask.west){$+$};
%)
\node at ([xshift=0.2em,yshift=0em]mask.east){)};
%%%% v
\node(tbv2) at ([xshift=1.2em,yshift=0]mask.east){
\begin{tabular}{|c|}
\hline
\rowcolor{orange!20} \\ \hline
\rowcolor{orange!20} \\ \hline
\rowcolor{orange!20} \\ \hline
\end{tabular}
};
\node at ([xshift=0em,yshift=0.5em]tbv2.north){$\mathbf{V}$};
%------------------------------
%第二行
\node(eq2) at ([xshift=0em,yshift=-6em]eq1.south){=};
\node(sof2) at ([xshift=2em,yshift=0]eq2.east){Softmax(};
%中间粉色矩阵
\node(mid) at ([xshift=1.5em,yshift=0em]sof2.east){
\begin{tabular}{|l|l|l|}
\hline
\cellcolor{pink!30} &\cellcolor{pink!30} &\cellcolor{pink!30} \\ \hline
\cellcolor{pink!30} &\cellcolor{pink!30} &\cellcolor{pink!30} \\ \hline
\cellcolor{pink!30} &\cellcolor{pink!30} &\cellcolor{pink!30} \\ \hline
\end{tabular}
};
% )
\node(bra2) at ([xshift=0.2em,yshift=0]mid.east){)};
%红色框
\node[rectangle,minimum width=4.0em,minimum height=1.5em,draw=red](p222) at([xshift=0em,yshift=-1.0em]mid.north) {};
%%%% v
\node(tbv3) at ([xshift=0.5em,yshift=0]bra2.east){
\begin{tabular}{|c|}
\hline
\rowcolor{orange!20} \\ \hline
\rowcolor{orange!20} \\ \hline
\rowcolor{orange!20} \\ \hline
\end{tabular}
};
\node at ([xshift=0em,yshift=0.5em]tbv3.north){$\mathbf{V}$};
%------------------------------------
%第三行
\node(eq3) at ([xshift=0em,yshift=-6em]eq2.south){=};
%%%% softmax结果 红色矩阵
\node(result) at ([xshift=2em,yshift=0]eq3.east){
\begin{tabular}{|l|l|l|}
\hline
\cellcolor{red!20} &\cellcolor{red!20} &\cellcolor{red!20} \\ \hline
\cellcolor{red!20}&\cellcolor{red!20} &\cellcolor{red!20} \\ \hline
\cellcolor{red!20} &\cellcolor{red!20} &\cellcolor{red!20} \\ \hline
\end{tabular}
};
% x
\node (times) at ([xshift=0.5em,yshift=0em]result.east){$\times$};
%%%% v
\node(tbv4) at ([xshift=0.5em,yshift=0]times.east){
\begin{tabular}{|c|}
\hline
\rowcolor{orange!20} \\ \hline
\rowcolor{orange!20} \\ \hline
\rowcolor{orange!20} \\ \hline
\end{tabular}
};
\node at ([xshift=0em,yshift=0.5em]tbv4.north){$\mathbf{V}$};
%=
\node(eq4) at ([xshift=0.5em,yshift=0em]tbv4.east){=};
%%%% 灰色矩阵
\node(gre) at ([xshift=0.5em,yshift=0]eq4.east){
\begin{tabular}{|c|}
\hline
\rowcolor{black!15} \\ \hline
\rowcolor{black!15} \\ \hline
\rowcolor{black!15} \\ \hline
\end{tabular}
};
\end{tikzpicture}
\ No newline at end of file
\begin{tikzpicture}
\begin{scope}
\tikzstyle{lnode} = [minimum height=1.5em,minimum width=3em,inner sep=3pt,rounded corners=1.5pt,draw,fill=orange!20];
\tikzstyle{standard} = [rounded corners=3pt]
\node [lnode,anchor=west] (l1) at (0,0) {\scriptsize{子层1}};
\node [lnode,anchor=west] (l2) at ([xshift=3em]l1.east) {\scriptsize{子层2}};
\node [lnode,anchor=west] (l3) at ([xshift=3em]l2.east) {\scriptsize{子层3}};
\node [anchor=west,inner sep=2pt] (dot1) at ([xshift=1em]l3.east) {\scriptsize{$\textbf{...}$}};
\node [lnode,anchor=west] (l4) at ([xshift=1em]dot1.east) {\scriptsize{子层$n$}};
\node [anchor=west] (plus1) at ([xshift=0.9em]l1.east) {\scriptsize{$\mathbf{\oplus}$}};
\node [anchor=west] (plus2) at ([xshift=0.9em]l2.east) {\scriptsize{$\mathbf{\oplus}$}};
\draw [->,thick] ([xshift=-1.5em]l1.west) -- ([xshift=-0.1em]l1.west);
\draw [->,thick] ([xshift=0.1em]l1.east) -- ([xshift=0.2em]plus1.west);
\draw [->,thick] ([xshift=-0.2em]plus1.east) -- ([xshift=-0.1em]l2.west);
\draw [->,thick] ([xshift=0.1em]l2.east) -- ([xshift=0.2em]plus2.west);
\draw [->,thick] ([xshift=-0.2em]plus2.east) -- ([xshift=-0.1em]l3.west);
\draw [->,thick] ([xshift=0.1em]l3.east) -- ([xshift=-0.1em]dot1.west);
\draw [->,thick] ([xshift=0.1em]dot1.east) -- ([xshift=-0.1em]l4.west);
\draw [->,thick] ([xshift=0.1em]l4.east) -- ([xshift=1.5em]l4.east);
\draw[->,standard,thick] ([xshift=-0.8em]l1.west) -- ([xshift=-0.8em,yshift=2em]l1.west) -- ([yshift=2em]plus1.center) -- ([yshift=-0.2em]plus1.north);
\draw[->,standard,thick] ([xshift=-0.8em]l2.west) -- ([xshift=-0.8em,yshift=2em]l2.west) -- ([yshift=2em]plus2.center) -- ([yshift=-0.2em]plus2.north);
\draw [->,very thick,red] ([xshift=1.5em,yshift=-0.3em]l4.east) -- ([xshift=0.1em,,yshift=-0.3em]l4.east);
\draw [->,very thick,red] ([xshift=-0.1em,yshift=-0.3em]l4.west) -- ([xshift=0.1em,yshift=-0.3em]dot1.east);
\draw [->,very thick,red] ([xshift=-0.1em,yshift=-0.3em]dot1.west) -- ([xshift=0.1em,yshift=-0.3em]l3.east);
\draw[->,standard,very thick,red] ([xshift=-0.3em,yshift=-0.2em]plus2.north) -- ([xshift=-0.3em,yshift=1.8em]plus2.center) -- ([xshift=-0.5em,yshift=1.8em]l2.west) -- ([xshift=-0.5em,yshift=0.2em]l2.west);
\draw[->,standard,very thick,red] ([xshift=-0.3em,yshift=-0.2em]plus1.north) -- ([xshift=-0.3em,yshift=1.8em]plus1.center) -- ([xshift=-0.5em,yshift=1.8em]l1.west) -- ([xshift=-0.5em,yshift=0.2em]l1.west);
\node [anchor=west] (label1) at ([xshift=1em,yshift=1.5em]l3.north) {\tiny{前向计算}};
\draw [->,thick] ([xshift=-1.5em]label1.west) -- ([xshift=-0.1em]label1.west);
\node [anchor=west] (label2) at ([xshift=2.5em]label1.east) {\tiny{反向传播}};
\draw [->,thick,red] ([xshift=-1.5em]label2.west) -- ([xshift=-0.1em]label2.west);
\end{scope}
\end{tikzpicture}
\ No newline at end of file
\begin{tikzpicture}
\node[rounded corners=1pt,minimum width=11.0em,minimum height=2.0em,fill=pink!30,draw=black](p1) at (0,0) {\small{Self-Attention}};
\node[anchor=north](word1) at ([xshift=0.0em,yshift=-2.0em]p1.south) {\small \textbf{K}};
\node[anchor=west](word2) at ([xshift=2.2em]word1.east) {\small \textbf{V}};
\node[anchor=east](word3) at ([xshift=-2.2em]word1.west) {\small \textbf{Q}};
\draw[->,thick](word1.north)--(p1.south);
\draw[->,thick]([xshift=-3.6em]word1.north)--([xshift=-3.6em]p1.south);
\draw[->,thick]([xshift=3.6em]word1.north)--([xshift=3.6em]p1.south);
\node[anchor=north,rounded corners=1pt,minimum width=11.0em,minimum height=3.5em,draw=ugreen!70,very thick,dotted](p1-1) at ([yshift=-5.2em]p1.south) {\small{解码端每个位置的表示}};
\draw [->,thick,dashed] (word3.south) .. controls +(south:1.5em) and +(north:1.5em) .. ([xshift=-0.4em]p1-1.north);
\draw [->,thick,dashed](word1.south) --(p1-1.north);
\draw [->,thick,dashed] (word2.south) .. controls +(south:1.0em) and +(north:1.5em) .. ([xshift=0.4em]p1-1.north);
\node[anchor=north](caption1) at ([xshift=0.0em,yshift=-9.5em]p1.south){\small{(a) Self-Attention的输入}};
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
\node[anchor=west,rounded corners=1pt,minimum width=14.0em,minimum height=2.0em,fill=pink!30,draw=black](p2) at ([xshift=5.0em]p1.east){\small{Encoder-Decoder Attention}};
\node[anchor=north](word1-2) at ([xshift=0.0em,yshift=-2.0em]p2.south) {\small \textbf{K}};
\node[anchor=west](word2-2) at ([xshift=2.2em]word1-2.east) {\small \textbf{V}};
\node[anchor=east](word3-2) at ([xshift=-2.2em]word1-2.west) {\small \textbf{Q}};
\draw[->,thick](word1-2.north)--(p2.south);
\draw[->,thick]([xshift=-3.6em]word1-2.north)--([xshift=-3.6em]p2.south);
\draw[->,thick]([xshift=3.6em]word1-2.north)--([xshift=3.6em]p2.south);
\node[anchor=north,rounded corners=1pt](p2-1) at ([xshift=-3.55em,yshift=-5.5em]p2.south) {\small{解码端每个}};
\node[anchor=north,rounded corners=1pt](p2-2) at ([xshift=-3.55em,yshift=-6.8em]p2.south) {\small{位置的表示}};
\begin{pgfonlayer}{background}
{
\node[rounded corners=1pt,draw=ugreen!70,very thick,dotted] [fit = (p2-1) (p2-2)] (p2-12) {};
}
\end{pgfonlayer}
\node[anchor=north,rounded corners=1pt](p2-3) at ([xshift=3.55em,yshift=-5.5em]p2.south) {\small{编码端每个}};
\node[anchor=north,rounded corners=1pt](p2-4) at ([xshift=3.55em,yshift=-6.8em]p2.south) {\small{位置的表示}};
\begin{pgfonlayer}{background}
{
\node[rounded corners=1pt,draw=ugreen!70,very thick,dotted] [fit = (p2-3) (p2-4)] (p2-34) {};
}
\end{pgfonlayer}
\draw[<-,thick,dashed]([xshift=-3.6em,yshift=-3.2em]word1-2.north)--([xshift=-3.6em,yshift=-3.2em]p2.south);
\draw[<-,thick,dashed]([xshift=3.6em,yshift=-3.2em]word1-2.north)--([xshift=3.6em,yshift=-3.2em]p2.south);
\draw [->,thick,dashed] (word1-2.south) .. controls +(south:1em) and +(north:1.5em) .. ([yshift=0.3em,xshift=-0.4em]p2-3.north);
\node[anchor=north](caption2) at ([xshift=0.0em,yshift=-9.5em]p2.south){\small{(b) Encoder-Decoder Attention的输入}};
\end{tikzpicture}
\ No newline at end of file
\begin{tikzpicture}
\begin{scope}
\tikzstyle{rnnnode} = [minimum height=1.1em,minimum width=2.1em,inner sep=2pt,rounded corners=1pt,draw,fill=red!20];
\tikzstyle{lossnode} = [minimum height=1.1em,minimum width=6em,inner sep=2pt,rounded corners=1pt,draw,fill=red!20];
\node [rnnnode,anchor=west] (h1) at (0,0) {\tiny{$\textbf{h}_1$}};
\node [rnnnode,anchor=west] (h2) at ([xshift=1em]h1.east) {\tiny{$\textbf{h}_2$}};
\node [rnnnode,anchor=west] (h3) at ([xshift=1em]h2.east) {\tiny{$\textbf{h}_3$}};
\node [rnnnode,anchor=north,fill=green!20] (e1) at ([yshift=-1em]h1.south) {\tiny{$e_x()$}};
\node [rnnnode,anchor=west,fill=green!20] (e2) at ([xshift=1em]e1.east) {\tiny{$e_x()$}};
\node [rnnnode,anchor=west,fill=green!20] (e3) at ([xshift=1em]e2.east) {\tiny{$e_x()$}};
\node [anchor=north,inner sep=2pt] (w1) at ([yshift=-0.6em]e1.south) {\tiny{}};
\node [anchor=north,inner sep=2pt] (w2) at ([yshift=-0.6em]e2.south) {\tiny{}};
\node [anchor=north,inner sep=2pt] (w3) at ([yshift=-0.6em]e3.south) {\tiny{$\langle$eos$\rangle$}};
\node [anchor=south] (dot1) at ([xshift=0.4em,yshift=-0.7em]h1.south) {\tiny{...}};
\node [anchor=south] (dot2) at ([xshift=-0.4em,yshift=-0.7em]h3.south) {\tiny{...}};
\draw [->] (w1.north) -- ([yshift=-0.1em]e1.south);
\draw [->] (w2.north) -- ([yshift=-0.1em]e2.south);
\draw [->] (w3.north) -- ([yshift=-0.1em]e3.south);
\draw [->] ([yshift=0.1em]e1.north) -- ([yshift=-0.1em]h1.south);
\draw [->] ([yshift=0.1em]e2.north) -- ([yshift=-0.1em]h2.south);
\draw [->] ([yshift=0.1em]e3.north) -- ([yshift=-0.1em]h3.south);
\draw [->] ([xshift=0.2em,yshift=0.1em]e1.north) .. controls +(north:0.3) and +(south:0.4) .. ([xshift=-0.3em,yshift=-0.1em]h2.south);
\draw [->] ([xshift=-0.2em,yshift=0.1em]e3.north) .. controls +(north:0.3) and +(south:0.4) .. ([xshift=0.3em,yshift=-0.1em]h2.south);
\node [anchor=south] (encoder) at ([xshift=-0.2em]h1.north west) {\scriptsize{\textbf{编码器}}};
{
\node [rnnnode,anchor=west,fill=green!20] (t1) at ([xshift=3em]e3.east) {\tiny{$e_y()$}};
\node [rnnnode,anchor=west,fill=green!20] (t2) at ([xshift=1.5em]t1.east) {\tiny{$e_y()$}};
\node [rnnnode,anchor=west,fill=green!20] (t3) at ([xshift=1.5em]t2.east) {\tiny{$e_y()$}};
\node [rnnnode,anchor=west,fill=green!20] (t4) at ([xshift=1.5em]t3.east) {\tiny{$e_y()$}};
}
{
\node [rnnnode,anchor=south] (s1) at ([yshift=1em]t1.north) {\tiny{$\textbf{s}_1$}};
\node [rnnnode,anchor=south] (s2) at ([yshift=1em]t2.north) {\tiny{$\textbf{s}_2$}};
\node [rnnnode,anchor=south] (s3) at ([yshift=1em]t3.north) {\tiny{$\textbf{s}_3$}};
\node [rnnnode,anchor=south] (s4) at ([yshift=1em]t4.north) {\tiny{$\textbf{s}_4$}};
%\node [anchor=south] (dot3) at ([xshift=-0.4em,yshift=-0.7em]s3.south) {\tiny{...}};
\node [anchor=south] (dot4) at ([xshift=-0.4em,yshift=-0.7em]s4.south) {\tiny{...}};
\draw [->] ([xshift=-0.6em,yshift=-0.5em]s3.south) .. controls +(north:0) and +(south:0.2) .. ([xshift=-0.3em,yshift=-0.1em]s3.south);
\draw [->] ([xshift=-1.5em,yshift=-0.5em]s3.south) .. controls +(north:0) and +(south:0.15) .. ([xshift=-0.6em,yshift=-0.1em]s3.south);
}
{
\node [rnnnode,anchor=south] (f1) at ([yshift=1em]s1.north) {\tiny{$\textbf{f}_1$}};
\node [rnnnode,anchor=south] (f2) at ([yshift=1em]s2.north) {\tiny{$\textbf{f}_2$}};
\node [rnnnode,anchor=south] (f3) at ([yshift=1em]s3.north) {\tiny{$\textbf{f}_3$}};
\node [rnnnode,anchor=south] (f4) at ([yshift=1em]s4.north) {\tiny{$\textbf{f}_4$}};
\node [rnnnode,anchor=south,fill=blue!20] (o1) at ([yshift=1em]f1.north) {\tiny{softmax}};
\node [rnnnode,anchor=south,fill=blue!20] (o2) at ([yshift=1em]f2.north) {\tiny{softmax}};
\node [rnnnode,anchor=south,fill=blue!20] (o3) at ([yshift=1em]f3.north) {\tiny{softmax}};
\node [rnnnode,anchor=south,fill=blue!20] (o4) at ([yshift=1em]f4.north) {\tiny{softmax}};
\node [anchor=east] (decoder) at ([xshift=-0.3em,yshift=0.5em]o1.north west) {\scriptsize{\textbf{解码器}}};
\node [anchor=south,fill=black!5!white,minimum height=1.1em,minimum width=13em,inner sep=2pt,rounded corners=1pt,draw] (loss) at ([xshift=1.8em,yshift=1em]o2.north) {\scriptsize{\textbf{Cross Entropy Loss}}};
}
{
\node [anchor=north,inner sep=2pt] (wt1) at ([yshift=-0.6em]t1.south) {\tiny{$\langle$eos$\rangle$}};
\node [anchor=north,inner sep=2pt] (wt2) at ([yshift=-0.6em]t2.south) {\tiny{How}};
\node [anchor=north,inner sep=2pt] (wt3) at ([yshift=-0.8em]t3.south) {\tiny{are}};
\node [anchor=north,inner sep=2pt] (wt4) at ([yshift=-0.8em]t4.south) {\tiny{you}};
}
{
\foreach \x in {1,2,3,4}{
\draw [->] ([yshift=-0.7em]t\x.south) -- ([yshift=-0.1em]t\x.south);
\draw [->] ([yshift=0.1em]t\x.north) -- ([yshift=-0.1em]s\x.south);
\draw [->] ([xshift=0.2em,yshift=0.1em]t1.north) .. controls +(north:0.3) and +(south:0.3) .. ([xshift=-0.3em,yshift=-0.1em]s2.south);
}
}
{
\foreach \x in {1,2,3,4}{
\draw [->] ([yshift=0.1em]s\x.north) -- ([yshift=-0.1em]f\x.south);
\draw [->] ([yshift=0.1em]f\x.north) -- ([yshift=-0.1em]o\x.south);
\draw [->] ([yshift=0.1em]o\x.north) -- ([yshift=0.8em]o\x.north);
}
}
{
\node [circle,draw,anchor=south,inner sep=3pt,fill=orange!20] (c1) at ([yshift=2em]h2.north) {\tiny{$\textbf{C}_1$}};
\node [anchor=south] (c1label) at (c1.north) {\tiny{\textbf{编码-解码注意力机制:上下文}}};
\draw [->] (h1.north) .. controls +(north:0.6) and +(250:0.9) .. (c1.250);
\draw [->] (h2.north) .. controls +(north:0.6) and +(270:0.9) .. (c1.270);
\draw [->] (h3.north) .. controls +(north:0.6) and +(290:0.9) .. (c1.290);
\draw [->] ([yshift=0.3em]s1.west) .. controls +(west:1) and +(east:1) .. (c1.-30);
\draw [->] (c1.0) .. controls +(east:1) and +(west:1) .. ([yshift=0em]f1.west);
}
{
\node [circle,draw,anchor=north,inner sep=3pt,fill=orange!20] (c2) at ([yshift=-2em]t1.south) {\tiny{$\textbf{C}_2$}};
\draw [->] ([xshift=-0.7em]c2.west) -- ([xshift=-0.1em]c2.west);
\draw [->] ([xshift=0.1em]c2.east) .. controls +(east:0.6) and +(west:0.8) ..([yshift=-0.3em,xshift=-0.1em]f2.west);
\node [circle,draw,anchor=north,inner sep=3pt,fill=orange!20] (c3) at ([yshift=-2em]t2.south) {\tiny{$\textbf{C}_3$}};
\draw [->] ([xshift=-0.7em]c3.west) -- ([xshift=-0.1em]c3.west);
\draw [->] ([xshift=0.1em]c3.east) .. controls +(east:0.6) and +(west:0.8) ..([yshift=-0.3em,xshift=-0.1em]f3.west);
\node [circle,draw,anchor=north,inner sep=3pt,fill=orange!20] (c4) at ([yshift=-2em]t3.south) {\tiny{$\textbf{C}_4$}};
\draw [->] ([xshift=-0.7em]c4.west) -- ([xshift=-0.1em]c4.west);
\draw [->] ([xshift=0.1em]c4.east) .. controls +(east:0.6) and +(west:0.8) ..([yshift=-0.3em,xshift=-0.1em]f4.west);
}
\end{scope}
\end{tikzpicture}
\ No newline at end of file
\begin{tikzpicture}
\begin{scope}
\tikzstyle{Sanode} = [minimum height=1.4em,minimum width=7em,inner sep=3pt,rounded corners=1.5pt,draw];
\tikzstyle{Resnode} = [minimum height=1.1em,minimum width=7em,inner sep=3pt,rounded corners=1.5pt,draw];
\tikzstyle{ffnnode} = [minimum height=1.4em,minimum width=7em,inner sep=3pt,rounded corners=1.5pt,draw];
\tikzstyle{outputnode} = [minimum height=1.4em,minimum width=7em,inner sep=3pt,rounded corners=1.5pt,draw];
\tikzstyle{inputnode} = [minimum height=1.4em,minimum width=3.5em,inner sep=3pt,rounded corners=1.5pt,draw,fill=red!10];
\tikzstyle{posnode} = [minimum height=1.4em,minimum width=3.5em,inner sep=3pt,rounded corners=1.5pt,draw,fill=black!5!white];
\tikzstyle{standard} = [rounded corners=3pt]
\node [Sanode,anchor=west] (sa1) at (0,0) {\tiny{$\textbf{Self-Attention}$}};
\node [Resnode,anchor=south] (res1) at ([yshift=0.3em]sa1.north) {\tiny{$\textbf{Add \& LayerNorm}$}};
\node [ffnnode,anchor=south] (ffn1) at ([yshift=1em]res1.north) {\tiny{$\textbf{Feed Forward Network}$}};
\node [Resnode,anchor=south] (res2) at ([yshift=0.3em]ffn1.north) {\tiny{$\textbf{Add \& LayerNorm}$}};
\node [inputnode,anchor=north west] (input1) at ([yshift=-1em]sa1.south west) {\tiny{$\textbf{Embedding}$}};
\node [posnode,anchor=north east] (pos1) at ([yshift=-1em]sa1.south east) {\tiny{$\textbf{Postion}$}};
\node [anchor=north] (inputs) at ([yshift=-3em]sa1.south) {\scriptsize{$\textbf{编码器输入: 我\ \ \ \ }$}};
\node [anchor=south] (encoder) at ([xshift=0.2em,yshift=0.6em]res2.north west) {\scriptsize{\textbf{编码器}}};
\draw [->] (sa1.north) -- (res1.south);
\draw [->] (res1.north) -- (ffn1.south);
\draw [->] (ffn1.north) -- (res2.south);
\draw [->] ([yshift=-1em]sa1.south) -- (sa1.south);
\draw [->] ([yshift=-0.3em]inputs.north) -- ([yshift=0.6em]inputs.north);
\node [Sanode,anchor=west] (sa2) at ([xshift=3em]sa1.east) {\tiny{$\textbf{Self-Attention}$}};
\node [Resnode,anchor=south] (res3) at ([yshift=0.3em]sa2.north) {\tiny{$\textbf{Add \& LayerNorm}$}};
\node [Sanode,anchor=south] (ed1) at ([yshift=1em]res3.north) {\tiny{$\textbf{Encoder-Decoder Attention}$}};
\node [Resnode,anchor=south] (res4) at ([yshift=0.3em]ed1.north) {\tiny{$\textbf{Add \& LayerNorm}$}};
\node [ffnnode,anchor=south] (ffn2) at ([yshift=1em]res4.north) {\tiny{$\textbf{Feed Forward Network}$}};
\node [Resnode,anchor=south] (res5) at ([yshift=0.3em]ffn2.north) {\tiny{$\textbf{Add \& LayerNorm}$}};
\node [outputnode,anchor=south] (o1) at ([yshift=1em]res5.north) {\tiny{$\textbf{Output layer}$}};
\node [inputnode,anchor=north west] (input2) at ([yshift=-1em]sa2.south west) {\tiny{$\textbf{Embedding}$}};
\node [posnode,anchor=north east] (pos2) at ([yshift=-1em]sa2.south east) {\tiny{$\textbf{Postion}$}};
\node [anchor=north] (outputs) at ([yshift=-3em]sa2.south) {\scriptsize{$\textbf{解码器输入: $<$sos$>$ I am fine}$}};
\node [anchor=east] (decoder) at ([xshift=-1em,yshift=-1.5em]o1.west) {\scriptsize{\textbf{解码器}}};
\node [anchor=north] (decoutputs) at ([yshift=1.5em]o1.north) {\scriptsize{$\textbf{解码器输出: I am fine $<$eos$>$ }$}};
\draw [->] (sa2.north) -- (res3.south);
\draw [->] (res3.north) -- (ed1.south);
\draw [->] (ed1.north) -- (res4.south);
\draw [->] (res4.north) -- (ffn2.south);
\draw [->] (ffn2.north) -- (res5.south);
\draw [->] (res5.north) -- (o1.south);
\draw [->] (o1.north) -- ([yshift=0.5em]o1.north);
\draw [->] ([yshift=-1em]sa2.south) -- (sa2.south);
\draw [->] ([yshift=-0.3em]outputs.north) -- ([yshift=0.6em]outputs.north);
\draw[->,standard] ([yshift=-0.5em]sa1.south) -- ([xshift=-4em,yshift=-0.5em]sa1.south) -- ([xshift=-4em,yshift=2.3em]sa1.south) -- ([xshift=-3.5em,yshift=2.3em]sa1.south);
\draw[->,standard] ([yshift=0.5em]res1.north) -- ([xshift=-4em,yshift=0.5em]res1.north) -- ([xshift=-4em,yshift=3.3em]res1.north) -- ([xshift=-3.5em,yshift=3.3em]res1.north);
\draw[->,standard] ([yshift=-0.5em]sa2.south) -- ([xshift=4em,yshift=-0.5em]sa2.south) -- ([xshift=4em,yshift=2.3em]sa2.south) -- ([xshift=3.5em,yshift=2.3em]sa2.south);
\draw[->,standard] ([yshift=0.5em]res3.north) -- ([xshift=4em,yshift=0.5em]res3.north) -- ([xshift=4em,yshift=3.3em]res3.north) -- ([xshift=3.5em,yshift=3.3em]res3.north);
\draw[->,standard] ([yshift=0.5em]res4.north) -- ([xshift=4em,yshift=0.5em]res4.north) -- ([xshift=4em,yshift=3.3em]res4.north) -- ([xshift=3.5em,yshift=3.3em]res4.north);
\draw[->,standard] (res2.north) -- ([yshift=0.5em]res2.north) -- ([xshift=5em,yshift=0.5em]res2.north) -- ([xshift=5em,yshift=-2.2em]res2.north) -- ([xshift=6.5em,yshift=-2.2em]res2.north);
%\node [rectangle,inner sep=0.7em,rounded corners=1pt,very thick,dotted,draw=ugreen!70] [fit = (sa1) (res1) (ffn1) (res2)] (box0) {};
%\node [rectangle,inner sep=0.7em,rounded corners=1pt,very thick,dotted,draw=red!60] [fit = (sa2) (res3) (res5)] (box1) {};
\begin{pgfonlayer}{background}
\node [rectangle,inner sep=0.2em,rounded corners=1pt,very thick,dotted,fill=red!40] [fit = (input1) (pos1)] (box1) {};
\node [rectangle,inner sep=0.2em,rounded corners=1pt,very thick,dotted,fill=red!40] [fit = (input2) (pos2)] (box2) {};
\end{pgfonlayer}
\end{scope}
\end{tikzpicture}
\ No newline at end of file
\begin{tikzpicture}
\begin{scope}
\tikzstyle{Sanode} = [minimum height=1.4em,minimum width=7em,inner sep=3pt,rounded corners=1.5pt,draw,fill=orange!20];
\tikzstyle{Resnode} = [minimum height=1.1em,minimum width=7em,inner sep=3pt,rounded corners=1.5pt,draw,fill=yellow!20];
\tikzstyle{ffnnode} = [minimum height=1.4em,minimum width=7em,inner sep=3pt,rounded corners=1.5pt,draw,fill=blue!10];
\tikzstyle{outputnode} = [minimum height=1.4em,minimum width=7em,inner sep=3pt,rounded corners=1.5pt,draw,fill=blue!30];
\tikzstyle{inputnode} = [minimum height=1.4em,minimum width=3.5em,inner sep=3pt,rounded corners=1.5pt,draw,fill=red!10];
\tikzstyle{posnode} = [minimum height=1.4em,minimum width=3.5em,inner sep=3pt,rounded corners=1.5pt,draw,fill=black!5!white];
\tikzstyle{standard} = [rounded corners=3pt]
\node [Sanode,anchor=west] (sa1) at (0,0) {\tiny{$\textbf{Self-Attention}$}};
\node [Resnode,anchor=south] (res1) at ([yshift=0.3em]sa1.north) {\tiny{$\textbf{Add \& LayerNorm}$}};
\node [ffnnode,anchor=south] (ffn1) at ([yshift=1em]res1.north) {\tiny{$\textbf{Feed Forward Network}$}};
\node [Resnode,anchor=south] (res2) at ([yshift=0.3em]ffn1.north) {\tiny{$\textbf{Add \& LayerNorm}$}};
\node [inputnode,anchor=north west] (input1) at ([yshift=-1em]sa1.south west) {\tiny{$\textbf{Embedding}$}};
\node [posnode,anchor=north east] (pos1) at ([yshift=-1em]sa1.south east) {\tiny{$\textbf{Postion}$}};
\node [anchor=north] (inputs) at ([yshift=-3em]sa1.south) {\scriptsize{$\textbf{编码器输入: 我\ \ \ \ }$}};
\node [anchor=south] (encoder) at ([xshift=0.2em,yshift=0.6em]res2.north west) {\scriptsize{\textbf{编码器}}};
\draw [->] (sa1.north) -- (res1.south);
\draw [->] (res1.north) -- (ffn1.south);
\draw [->] (ffn1.north) -- (res2.south);
\draw [->] ([yshift=-1em]sa1.south) -- (sa1.south);
\draw [->] ([yshift=-0.3em]inputs.north) -- ([yshift=0.6em]inputs.north);
\node [Sanode,anchor=west] (sa2) at ([xshift=3em]sa1.east) {\tiny{$\textbf{Self-Attention}$}};
\node [Resnode,anchor=south] (res3) at ([yshift=0.3em]sa2.north) {\tiny{$\textbf{Add \& LayerNorm}$}};
\node [Sanode,anchor=south] (ed1) at ([yshift=1em]res3.north) {\tiny{$\textbf{Encoder-Decoder Attention}$}};
\node [Resnode,anchor=south] (res4) at ([yshift=0.3em]ed1.north) {\tiny{$\textbf{Add \& LayerNorm}$}};
\node [ffnnode,anchor=south] (ffn2) at ([yshift=1em]res4.north) {\tiny{$\textbf{Feed Forward Network}$}};
\node [Resnode,anchor=south] (res5) at ([yshift=0.3em]ffn2.north) {\tiny{$\textbf{Add \& LayerNorm}$}};
\node [outputnode,anchor=south] (o1) at ([yshift=1em]res5.north) {\tiny{$\textbf{Output layer}$}};
\node [inputnode,anchor=north west] (input2) at ([yshift=-1em]sa2.south west) {\tiny{$\textbf{Embedding}$}};
\node [posnode,anchor=north east] (pos2) at ([yshift=-1em]sa2.south east) {\tiny{$\textbf{Postion}$}};
\node [anchor=north] (outputs) at ([yshift=-3em]sa2.south) {\scriptsize{$\textbf{解码器输入: $<$sos$>$ I am fine}$}};
\node [anchor=east] (decoder) at ([xshift=-1em,yshift=-1.5em]o1.west) {\scriptsize{\textbf{解码器}}};
\node [anchor=north] (decoutputs) at ([yshift=1.5em]o1.north) {\scriptsize{$\textbf{解码器输出: I am fine $<$eos$>$ }$}};
\draw [->] (sa2.north) -- (res3.south);
\draw [->] (res3.north) -- (ed1.south);
\draw [->] (ed1.north) -- (res4.south);
\draw [->] (res4.north) -- (ffn2.south);
\draw [->] (ffn2.north) -- (res5.south);
\draw [->] (res5.north) -- (o1.south);
\draw [->] (o1.north) -- ([yshift=0.5em]o1.north);
\draw [->] ([yshift=-1em]sa2.south) -- (sa2.south);
\draw [->] ([yshift=-0.3em]outputs.north) -- ([yshift=0.6em]outputs.north);
\draw[->,standard] ([yshift=-0.5em]sa1.south) -- ([xshift=-4em,yshift=-0.5em]sa1.south) -- ([xshift=-4em,yshift=2.3em]sa1.south) -- ([xshift=-3.5em,yshift=2.3em]sa1.south);
\draw[->,standard] ([yshift=0.5em]res1.north) -- ([xshift=-4em,yshift=0.5em]res1.north) -- ([xshift=-4em,yshift=3.3em]res1.north) -- ([xshift=-3.5em,yshift=3.3em]res1.north);
\draw[->,standard] ([yshift=-0.5em]sa2.south) -- ([xshift=4em,yshift=-0.5em]sa2.south) -- ([xshift=4em,yshift=2.3em]sa2.south) -- ([xshift=3.5em,yshift=2.3em]sa2.south);
\draw[->,standard] ([yshift=0.5em]res3.north) -- ([xshift=4em,yshift=0.5em]res3.north) -- ([xshift=4em,yshift=3.3em]res3.north) -- ([xshift=3.5em,yshift=3.3em]res3.north);
\draw[->,standard] ([yshift=0.5em]res4.north) -- ([xshift=4em,yshift=0.5em]res4.north) -- ([xshift=4em,yshift=3.3em]res4.north) -- ([xshift=3.5em,yshift=3.3em]res4.north);
\draw[->,standard] (res2.north) -- ([yshift=0.5em]res2.north) -- ([xshift=5em,yshift=0.5em]res2.north) -- ([xshift=5em,yshift=-2.2em]res2.north) -- ([xshift=6.5em,yshift=-2.2em]res2.north);
\node [rectangle,inner sep=0.7em,rounded corners=1pt,very thick,dotted,draw=ugreen!70] [fit = (sa1) (res1) (ffn1) (res2)] (box0) {};
\node [rectangle,inner sep=0.7em,rounded corners=1pt,very thick,dotted,draw=red!60] [fit = (sa2) (res3) (res5)] (box1) {};
\node [ugreen,font=\scriptsize] (count) at ([xshift=-1.5em,yshift=-1em]encoder.south) {$6\times$};
\node [red,font=\scriptsize] (count) at ([xshift=10.8em,yshift=0em]decoder.south) {$\times 6$};
\end{scope}
\end{tikzpicture}
\ No newline at end of file
......@@ -21,10 +21,531 @@
% CHAPTER 11
%----------------------------------------------------------------------------------------
\chapter{基于自注意力机制的模型}
\chapter{基于自注意力或卷积的模型}
%----------------------------------------------------------------------------------------
% NEW SECTION
% NEW SECTION 11.1
%----------------------------------------------------------------------------------------
\section{自注意力机制}
\vspace{0.5em}
\label{sec:11.4.1}
\section{}
\parinterval 首先,再回顾一下循环神经网络处理文字序列的过程。如图\ref{fig:11-36}所示,对于单词序列$\{ w_1,...,w_m \}$,处理第$m$个单词$w_m$时(绿色方框部分),需要输入前一时刻的信息(即处理单词$w_{m-1}$),而$w_{m-1}$又依赖于$w_{m-2}$,以此类推。也就是说,如果想建立$w_m$$w_1$之间的关系,需要$m-1$次信息传递。对于长序列来说,词汇之间信息传递距离过长会导致信息在传递过程中丢失,同时这种按顺序建模的方式也使得系统对序列的处理十分缓慢。
%----------------------------------------------
\begin{figure}[htp]
\centering
\input{./Chapter11/Figures/figure-dependencies-between-words-in-a-recurrent-neural-network}
\caption{循环神经网络中单词之间的依赖关系}
\label{fig:11-36}
\end{figure}
%----------------------------------------------
\parinterval 那么能否摆脱这种顺序传递信息的方式,直接对不同位置单词之间的关系进行建模,即将信息传递的距离拉近为1?{\small\sffamily\bfseries{自注意力机制}}\index{自注意力机制}(Self-Attention)\index{Self-Attention}的提出便有效解决了这个问题\cite{DBLP:journals/corr/LinFSYXZB17}。图\ref{fig:11-37}给出了自注意力机制对序列进行建模的示例。对于单词$w_m$,自注意力机制直接建立它与前$m-1$个单词之间的关系。也就是说,$w_m$与序列中所有其他单词的距离都是1。这种方式很好地解决了长距离依赖问题,同时由于单词之间的联系都是相互独立的,因此也大大提高了模型的并行度。
%----------------------------------------------
\begin{figure}[htp]
\centering
\input{./Chapter11/Figures/figure-dependencies-between-words-of-attention}
\caption{自注意力机制中单词之间的依赖关系}
\label{fig:11-37}
\end{figure}
%----------------------------------------------
\parinterval 自注意力机制也可以被看做是一个序列表示模型。比如,对于每个目标位置$j$,都生成一个与之对应的源语言句子表示,它的形式为:$\mathbf{C}_j = \sum_i \alpha_{i,j}\mathbf{h}_i$,其中$\mathbf{h}_i$ 为源语言句子每个位置的表示结果,$\alpha_{i,j}$是目标位置$j$$\mathbf{h}_i$的注意力权重。而自注意力机制不仅可以处理两种语言句子之间的对应,它也可以对单语句子进行表示。以源语言句子为例,同时参考\ref{sec:11.3.4.3} 节的内容,自注意力机制将序列中每个位置的表示$\mathbf{h}_i$看作$\mathrm{query}$(查询),并且将所有位置的表示看作$\mathrm{key}$(键)和$\mathrm{value}$(值)。自注意力模型通过计算当前位置与所有位置的匹配程度,也就是在注意力机制中提到的注意力权重,来对各个位置的$\mathrm{value}$进行加权求和。得到的结果可以被看作是在这个句子中当前位置的抽象表示。这个过程,可以叠加多次,形成多层注意力模型,对输入序列中各个位置进行更深层的表示。
%----------------------------------------------
\begin{figure}[htp]
\centering
\input{./Chapter11/Figures/figure-example-of-self-attention-mechanism-calculation}
\caption{自注意力计算实例}
\label{fig:11-38}
\end{figure}
%----------------------------------------------
\parinterval 举个例子,如图\ref{fig:11-38}所示,一个汉语句子包含5个词。这里,用$\mathbf{h}$(``你'')表示``你''当前的表示结果。如果把``你''看作目标,这时$\mathrm{query}$就是$\mathbf{h}$(``你''),$\mathrm{key}$$\mathrm{value}$是图中所有位置的表示,即:{$\mathbf{h}$(``你'')、$\mathbf{h}$(``什么'')、$\mathbf{h}$(``也'')、$\mathbf{h}$(``没'')、$\mathbf{h}$(`` 学'')}。在自注意力模型中,首先计算$\mathrm{query}$$\mathrm{key}$的相关度,这里用$\alpha_i$表示$\mathbf{h}$(``你'')和位置$i$的表示之间的相关性。然后,把$\alpha_i$作为权重,对不同位置上的$\mathrm{value}$进行加权求和。最终,得到新的表示结果$\tilde{\mathbf{h}}$ (``你'' ):
\begin{eqnarray}
\tilde{\mathbf{h}} (\textrm{``你''} ) = \alpha_1 {\mathbf{h}} (\textrm{``你''} )
+ \alpha_2 {\mathbf{h}} (\textrm{``什么 ''})
+ \alpha_3 {\mathbf{h}} (\textrm{``也''} )
+ \alpha_4 {\mathbf{h}} (\textrm{``没''} )
+\alpha_5 {\mathbf{h}} (\textrm{``学''} )
\label{eq:11-42}
\end{eqnarray}
\parinterval 同理,也可以用同样的方法处理这个句子中的其他单词。可以看出,在注意力机制中,并不是使用类似于循环神经网络的记忆能力去访问历史信息。序列中所有单词之间的信息都是通过同一种操作($\mathrm{query}$$\mathrm{key}$的相关度)进行处理。这样,表示结果$\tilde{\mathbf{h}} (\textrm{``你''})$在包含``你''这个单词的信息的同时,也包含了序列中其他词的信息。也就是,序列中每一个位置的表示结果中,都包含了其他位置的信息。从这个角度说,$\tilde{\mathbf{h}} (\textrm{``你''})$已经不再是单词''你''自身的表示结果,而是一种在单词``你''的位置上的全局信息的表示。
\parinterval 通常,也把生成\{ $\tilde{\mathbf{h}}(\mathbf{w}_i)$ \}的过程称为{\small\sffamily\bfseries{特征提取}}\index{特征提取},而实现这个过程的模型被称为特征提取器。循环神经网络、自注意力模型都是典型的特征提取器。特征提取是神经机器翻译系统的关键步骤,在随后的内容中可以看到自注意力模型是一个非常适合机器翻译任务的特征提取器。
%----------------------------------------------------------------------------------------
% NEW SECTION 11.2
%----------------------------------------------------------------------------------------
\sectionnewpage
\section{Transformer}
\parinterval 编码器-解码器框架提供了一个非常灵活的机制,因为开发者只需要设计编码器和解码器的结构就能完成机器翻译。但是,架构的设计是深度学习中最具挑战的工作,优秀的架构往往需要长时间的探索和大量的实验验证,而且还需要一点点``灵感''。
\parinterval 前面介绍的基于循环神经网络的翻译模型和注意力机制就是研究人员通过长期的实践发现的神经网络架构。除了神经机器翻译,它们也被广泛地应用于语音处理、图像处理等领域。虽然循环神经网络很强大,但是人们也发现了一些弊端。一个突出的问题是,循环神经网络每个循环单元都有向前依赖性,也就是当前时间步的处理依赖前一时间步处理的结果。这个性质可以使序列的``历史''信息不断被传递,但是也造成模型运行效率的下降。特别是对于自然语言处理任务,序列往往较长,无论是传统的RNN结构,还是更为复杂的LSTM结构,都需要很多次循环单元的处理才能够捕捉到单词之间的长距离依赖。由于需要多个循环单元的处理,距离较远的两个单词之间的信息传递变得很复杂。
\parinterval 针对这些问题,谷歌的研究人员提出了一种全新的模型$\ \dash\ $Transformer\cite{NIPS2017_7181}。与循环神经网络等传统模型不同,Transformer模型仅仅使用一种被称作自注意力机制的方法和标准的前馈神经网络,完全不依赖任何循环单元或者卷积操作。自注意力机制的优点在于可以直接对序列中任意两个单元之间的关系进行建模,这使得长距离依赖等问题可以更好地被求解。此外,自注意力机制非常适合在GPU 上进行并行化,因此模型训练的速度更快。表\ref{tab:11-11}对比了RNN、CNN、Transformer三种模型的时间复杂度。
%----------------------------------------------
\begin{table}[htp]
\centering
\caption{ RNN、CNN、Transformer的对比\cite{NIPS2017_7181}$n$表示序列长度,$d$表示隐层大小,$k$表示卷积核大小) }
\label{tab:11-11}
\begin{tabular}{l | l l l}
\rule{0pt}{20pt} Layer Type & \begin{tabular}[l]{@{}l@{}}Complexity\\ per Layer\end{tabular} & \begin{tabular}[l]{@{}l@{}}Sequential\\ Operations\end{tabular} & \begin{tabular}[l]{@{}l@{}}Maximum\\ Path Length\end{tabular} \\ \hline
\rule{0pt}{13pt}Self-Attention &$O(n^2\cdot d)$ &$O(1)$ &$O(1)$ \\
\rule{0pt}{13pt}Recurrent &$O(n \cdot d^2)$ &$O(n)$ &$O(n)$ \\
\rule{0pt}{13pt}Convolutional &$O(k\cdot n \cdot d^2)$ &$O(1)$ &$O(\mathrm{log}_k(n))$
\end{tabular}
\end{table}
%----------------------------------------------
\parinterval Transformer在被提出之后,很快就席卷了整个自然语言处理领域。实际上,Transformer也可以当作一种表示模型,因此也被大量地使用在自然语言处理的其他领域,甚至图像处理和语音处理中也能看到它的影子。比如,目前非常流行的BERT等预训练模型就是基于Transformer。表\ref{tab:11-12}展示了Transformer在WMT英德和英法机器翻译任务上的性能。它能用更少的计算量(FLOPS)达到比其他模型更好的翻译品质\footnote{FLOPS = floating-point operations per second,即每秒浮点运算次数。它是度量计算机运算规模的常用单位}
%----------------------------------------------
\begin{table}[htp]
\centering
\caption{ 不同翻译模型性能对比\cite{NIPS2017_7181}}
\label{tab:11-12}
\begin{tabular}{l l l l}
\multicolumn{1}{l|}{\multirow{2}{*}{\#}} & \multicolumn{2}{c}{BLEU} & \multirow{2}{*}{\parbox{6em}{Training Cost (FLOPs)}} \\
\multicolumn{1}{l|}{} & EN-DE & EN-FR & \\ \hline
\multicolumn{1}{l|}{GNMT+RL} & 24.6 & 39.92 & 1.4$\times 10^{20}$ \\
\multicolumn{1}{l|}{ConvS2S} & 25.16 & 40.46 & 1.5$\times 10^{20}$ \\
\multicolumn{1}{l|}{MoE} & 26.03 & 40.56 & 1.2$\times 10^{20}$ \\
\multicolumn{1}{l|}{Transformer (Big)} & {\small\sffamily\bfseries{28.4}} & {\small\sffamily\bfseries{41.8}} & 2.3$\times 10^{19}$ \\
\end{tabular}
\end{table}
%----------------------------------------------
\parinterval 注意,Transformer并不简单等同于自注意力机制。Transformer模型还包含了很多优秀的技术,比如:多头注意力、新的训练学习率调整策略等等。这些因素一起组成了真正的Transformer。下面就一起看一看自注意力机制和Transformer是如何工作的。
%----------------------------------------------------------------------------------------
% NEW SUB-SECTION 11.2.1
%----------------------------------------------------------------------------------------
\subsection{Transformer架构}
%----------------------------------------------
\begin{figure}[htp]
\centering
\input{./Chapter11/Figures/figure-transformer}
\caption{ Transformer结构}
\label{fig:11-39}
\end{figure}
%----------------------------------------------
\parinterval\ref{fig:11-39}展示了经典的Transformer结构。解码器由若干层组成(绿色虚线框就代表一层)。每一层(layer)的输入都是一个向量序列,输出是同样大小的向量序列,而Transformer层的作用是对输入进行进一步的抽象,得到新的表示结果。不过这里的层并不是指单一的神经网络结构,它里面由若干不同的模块组成,包括:
\begin{itemize}
\vspace{0.5em}
\item {\small\sffamily\bfseries{自注意力子层}}\index{自注意力子层}(Self-attention Sub-layer)\index{Self-attention Sub-layer}:使用自注意力机制对输入的序列进行新的表示;
\vspace{0.5em}
\item {\small\sffamily\bfseries{前馈神经网络子层}}\index{前馈神经网络子层}(Feed-forward Sub-layer)\index{Feed-forward Sub-layer}:使用全连接的前馈神经网络对输入向量序列进行进一步变换;
\vspace{0.5em}
\item {\small\sffamily\bfseries{残差连接}}\index{残差连接}(Residual Connection,标记为``Add'')\index{Residual Connection}:对于自注意力子层和前馈神经网络子层,都有一个从输入直接到输出的额外连接,也就是一个跨子层的直连。残差连接可以使深层网络的信息传递更为有效;
\vspace{0.5em}
\item {\small\sffamily\bfseries{层正则化}}\index{层正则化}(Layer Normalization)\index{Layer Normalization}:自注意力子层和前馈神经网络子层进行最终输出之前,会对输出的向量进行层正则化,规范结果向量取值范围,这样易于后面进一步的处理。
\vspace{0.5em}
\end{itemize}
\parinterval 以上操作就构成了Transformer的一层,各个模块执行的顺序可以简单描述为:Self-Attention $\to$ Residual Connection $\to$ Layer Normalization $\to$ Feed Forward Network $\to$ Residual Connection $\to$ Layer Normalization。编码器可以包含多个这样的层,比如,可以构建一个六层编码器,每层都执行上面的操作。最上层的结果作为整个编码的结果,会被传入解码器。
\parinterval 解码器的结构与编码器十分类似。它也是由若干层组成,每一层包含编码器中的所有结构,即:自注意力子层、前馈神经网络子层、残差连接和层正则化模块。此外,为了捕捉源语言的信息,解码器又引入了一个额外的{\small\sffamily\bfseries{编码-解码注意力子层}}\index{编码-解码注意力子层}(Encoder-decoder Attention Sub-layer)\index{Encoder-decoder Attention Sub-layer}。这个新的子层,可以帮助模型使用源语言句子的表示信息生成目标语不同位置的表示。编码-解码注意力子层仍然基于自注意力机制,因此它和自注意力子层的结构是相同的,只是$\mathrm{query}$$\mathrm{key}$$\mathrm{value}$的定义不同。比如,在解码端,自注意力子层的$\mathrm{query}$$\mathrm{key}$$\mathrm{value}$是相同的,它们都等于解码端每个位置的表示。而在编码-解码注意力子层中,$\mathrm{query}$是解码端每个位置的表示,此时$\mathrm{key}$$\mathrm{value}$是相同的,等于编码端每个位置的表示。图\ref{fig:11-40}给出了这两种不同注意力子层输入的区别。
%----------------------------------------------
\begin{figure}[htp]
\centering
\input{./Chapter11/Figures/figure-self-att-vs-enco-deco-att}
\caption{ 注意力模型的输入(自注意力子层 vs 编码-解码注意力子层)}
\label{fig:11-40}
\end{figure}
%----------------------------------------------
\parinterval 此外,编码端和解码端都有输入的词序列。编码端的词序列输入是为了对其进行表示,进而解码端能从编码端访问到源语言句子的全部信息。解码端的词序列输入是为了进行目标语的生成,本质上它和语言模型是一样的,在得到前$n-1$个单词的情况下输出第$n$个单词。除了输入的词序列的词嵌入,Transformer中也引入了位置嵌入,以表示每个位置信息。原因是,自注意力机制没有显性地对位置进行表示,因此也无法考虑词序。在输入中引入位置信息可以让自注意力机制间接地感受到每个词的位置,进而保证对序列表示的合理性。最终,整个模型的输出由一个Softmax层完成,它和循环神经网络中的输出层是完全一样的(\ref{sec:11.3.2}节)。
\parinterval 在进行更详细的介绍前,先利用图\ref{fig:11-39}简单了解一下Transformer模型是如何进行翻译的。首先,Transformer将源语``我\ \ 好''的{\small\bfnew{词嵌入}}\index{词嵌入}(Word Embedding)\index{Word Embedding}融合{\small\bfnew{位置编码}}\index{位置编码}(Position Embedding)\index{Position Embedding}后作为输入。然后,编码器对输入的源语言句子进行逐层抽象,得到包含丰富的上下文信息的源语表示并传递给解码器。解码器的每一层,使用自注意力子层对输入解码端的表示进行加工,之后再使用编码-解码注意力子层融合源语言句子的表示信息。就这样逐词生成目标语译文单词序列。解码器的每个位置的输入是当前单词(比如,``I''),而这个位置输出是下一个单词(比如,``am''),这个设计和标准的神经语言模型是完全一样的。
\parinterval 了解到这里,可能大家还有很多疑惑,比如,什么是位置编码?Transformer的自注意力机制具体是怎么进行计算的,其结构是怎样的?Add\& LayerNorm又是什么?等等。下面就一一展开介绍。
%----------------------------------------------------------------------------------------
% NEW SUB-SECTION 11.2.2
%----------------------------------------------------------------------------------------
\subsection{位置编码}
\parinterval 在使用循环神经网络进行序列的信息提取时,每个时刻的运算都要依赖前一个时刻的输出,具有一定的时序性,这也与语言具有顺序的特点相契合。而采用自注意力机制对源语言和目标语言序列进行处理时,直接对当前位置和序列中的任意位置进行建模,忽略了词之间的顺序关系,例如图\ref{fig:11-41}中两个语义不同的句子,通过自注意力得到的表示$\tilde{\mathbf{h}}$(``机票'')却是相同的。
%----------------------------------------------
\begin{figure}[htp]
\centering
\input{./Chapter11/Figures/figure-calculation-of-context-vector-c}
\caption{``机票''的更进一步抽象表示$\tilde{\mathbf{h}}$的计算}
\label{fig:11-41}
\end{figure}
%----------------------------------------------
\parinterval 为了解决这个问题,Transformer在原有的词向量输入基础上引入了位置编码,来表示单词之间的顺序关系。位置编码在Transformer结构中的位置如图\ref{fig:11-42},它是Transformer成功的一个重要因素。
%----------------------------------------------
\begin{figure}[htp]
\centering
\input{./Chapter11/Figures/figure-transformer-input-and-position-encoding}
\caption{Transformer输入与位置编码}
\label{fig:11-42}
\end{figure}
%----------------------------------------------
\parinterval 位置编码的计算方式有很多种,Transformer使用不同频率的正余弦函数:
\begin{eqnarray}
\textrm{PE}(pos,2i) = \textrm{sin} (\frac{pos}{10000^{2i/d_{model}}})
\label{eq:11-43}
\end{eqnarray}
\begin{eqnarray}
\textrm{PE}(pos,2i+1) = \textrm{cos} (\frac{pos}{10000^{2i/d_{model}}})
\label{eq:11-44}
\end{eqnarray}
\noindent 式中PE($\cdot$)表示位置编码的函数,$pos$表示单词的位置,$i$代表位置编码向量中的第几维,$d_{model}$是Transformer的一个基础参数,表示每个位置的隐层大小。因为,正余弦函数的编码各占一半,因此当位置编码的维度为512 时,$i$ 的范围是0-255。 在Transformer中,位置编码的维度和词嵌入向量的维度相同(均为$d_{model}$),模型通过将二者相加作为模型输入,如图\ref{fig:11-43}所示。
%----------------------------------------------
\begin{figure}[htp]
\centering
\input{./Chapter11/Figures/figure-a-combination-of-position-encoding-and-word-encoding}
\caption{位置编码与词编码的组合}
\label{fig:11-43}
\end{figure}
%----------------------------------------------
\parinterval 那么为什么通过这种计算方式可以很好的表示位置信息?有几方面原因。首先,正余弦函数是具有上下界的周期函数,用正余弦函数可将长度不同的序列的位置编码的范围都固定到[-1,1],这样在与词的编码进行相加时,不至于产生太大差距。另外位置编码的不同维度对应不同的正余弦曲线,这为多维的表示空间赋予一定意义。最后,根据三角函数的性质:
\begin{eqnarray}
\textrm{sin}(\alpha + \beta) &=& \textrm{sin}\alpha \cdot \textrm{cos} \beta + \textrm{cos} \alpha \cdot \textrm{sin} \beta \nonumber \\
\textrm{cos}(\alpha + \beta) &=& \textrm{cos} \alpha \cdot \textrm{cos} \beta - \textrm{sin} \alpha \cdot \textrm{sin} \beta
\label{eq:11-45}
\end{eqnarray}
\parinterval 可以得到``$pos+k$''的位置编码为:
\begin{eqnarray}
\textrm{PE}(pos+k,2i) &=& \textrm{PE}(pos,2i) \times \textrm{PE}(k,2i+1) + \nonumber \\
& & \textrm{PE}(pos,2i+1) \times \textrm{PE}(k,2i)\\
\textrm{PE}(pos+k ,2i+1) &=& \textrm{PE}(pos,2i+1) \times \textrm{PE}(k,2i+1) - \nonumber \\
& & \textrm{PE}(pos,2i) \times \textrm{PE}(k,2i)
\label{eq:11-46}
\end{eqnarray}
\noindent 即对于任意固定的偏移量$k$$\textrm{PE}(pos+k)$能被表示成$\textrm{PE}(pos)$的线性函数,换句话说,位置编码可以表示词之间的距离。在实践中发现,位置编码对Transformer系统的性能有很大影响。对其进行改进也会带来进一步的性能提升\cite{Shaw2018SelfAttentionWR}
%----------------------------------------------------------------------------------------
% NEW SUB-SECTION 11.2.3
%----------------------------------------------------------------------------------------
\subsection{基于点乘的注意力机制}
%----------------------------------------------
\begin{figure}[htp]
\centering
\input{./Chapter11/Figures/figure-position-of-self-attention-mechanism-in-the-model}
\caption{自注意力机制在模型中的位置}
\label{fig:11-44}
\end{figure}
%----------------------------------------------
\parinterval Transformer模型摒弃了循环单元和卷积等结构,完全基于注意力机制来构造模型,其中包含着大量的注意力计算。比如,可以通过自注意力机制对源语言和目标语言序列进行信息提取,并通过编码-解码注意力对双语句对之间的关系进行提取。图\ref{fig:11-44}中红色方框部分是Transformer中使用自注意力机制的模块。
\parinterval\ref{sec:11.4.1}节中已经介绍,自注意力机制中,至关重要的是获取相关性系数,也就是在融合不同位置的表示向量时各位置的权重。在\ref{sec:11.3}节基于循环神经网络的机器翻译模型中,注意力机制的相关性系数有很多种计算方式,如余弦相似度等。而在Transformer模型中,则采用了一种基于点乘的方法来计算相关性系数。这种方法也称为{\small\bfnew{点乘注意力}}\index{点乘注意力}(Scaled Dot-Product Attention)\index{Scaled Dot-Product Attention}机制。它的运算并行度高,同时并不消耗太多的存储空间。
\parinterval 具体来看,在注意力机制的计算过程中,包含三个重要的参数,分别是Query,\\Key和Value。在下面的描述中,分别用$\mathbf{Q}$$\mathbf{K}$$\mathbf{V}$对它们进行表示,其中$\mathbf{Q}$$\mathbf{K}$的维度为$L\times d_k$$\mathbf{V}$的维度为$L\times d_v$。这里,$L$为序列的长度,$d_k$$d_v$分别表示每个Key和Value的大小,通常设置为$d_k=d_v=d_{model}$
\parinterval 在自注意力机制中,$\mathbf{Q}$$\mathbf{K}$$\mathbf{V}$都是相同的,对应着源语言或目标语言的表示。而在编码-解码注意力机制中,由于要对双语之间的信息进行建模,因此,将目标语每个位置的表示视为编码-解码注意力机制的$\mathbf{Q}$,源语言句子的表示视为$\mathbf{K}$$\mathbf{V}$
\parinterval 在得到$\mathbf{Q}$$\mathbf{K}$$\mathbf{V}$后,便可以进行注意力机制的运算,这个过程可以被形式化为:
\begin{eqnarray}
\textrm{Attention}(\mathbf{Q},\mathbf{K},\mathbf{V}) = \textrm{Softmax}
( \frac{\mathbf{Q}\mathbf{K}^{T}} {\sqrt{d_k}} + \mathbf{Mask} ) \mathbf{V}
\label{eq:11-47}
\end{eqnarray}
\noindent 首先,通过对$\mathbf{Q}$$\mathbf{K}$的转置进行点乘操作,计算得到一个维度大小为$L \times L$的相关性矩阵,即$\mathbf{Q}\mathbf{K}^{T}$,它表示一个序列上任意两个位置的相关性。再通过系数1/$\sqrt{d_k}$进行放缩操作,放缩可以尽量减少相关性矩阵的方差,具体体现在运算过程中实数矩阵中的数值不会过大,有利于模型训练。
\parinterval 在此基础上,通过对相关性矩阵累加一个掩码矩阵,来屏蔽掉矩阵中的无用信息。比如,在编码端对句子的补齐,在解码端则屏蔽掉未来信息,这一部分内容将在下一小节进行详细介绍。随后,使用Softmax函数对相关性矩阵在行的维度上进行归一化操作,这可以理解为对第$i$行进行归一化,结果对应了$\mathbf{V}$中的不同位置上向量的注意力权重。对于$\mathrm{value}$的加权求和,可以直接用相关性系数和$\mathbf{V}$进行矩阵乘法得到,即$\textrm{Softmax}
( \frac{\mathbf{Q}\mathbf{K}^{T}} {\sqrt{d_k}} + \mathbf{Mask} )$$\mathbf{V}$进行矩阵乘。最终得到自注意力的输出,它和输入的$\mathbf{V}$的大小是一模一样的。图\ref{fig:11-45}展示了点乘注意力计算的全过程。
%----------------------------------------------
\begin{figure}[htp]
\centering
\input{./Chapter11/Figures/figure-point-product-attention-model}
\caption{点乘注意力力模型 }
\label{fig:11-45}
\end{figure}
%----------------------------------------------
\parinterval 下面举个简单的例子介绍点乘注意力的具体计算过程。如图\ref{fig:11-46}所示,用黄色、蓝色和橙色的矩阵分别表示$\mathbf{Q}$$\mathbf{K}$$\mathbf{V}$$\mathbf{Q}$$\mathbf{K}$$\mathbf{V}$中的每一个小格都对应一个单词在模型中的表示(即一个向量)。首先,通过点乘、放缩、掩码等操作得到相关性矩阵,即粉色部分。其次,将得到的中间结果矩阵(粉色)的每一行使用Softmax激活函数进行归一化操作,得到最终的权重矩阵,也就是图中的红色矩阵。红色矩阵中的每一行都对应一个注意力分布。最后,按行对$\mathbf{V}$进行加权求和,便得到了每个单词通过点乘注意力机制计算得到的表示。这里面,主要的计算消耗是两次矩阵乘法,即$\mathbf{Q}$$\mathbf{K}^{T}$的乘法、相关性矩阵和$\mathbf{V}$的乘法。这两个操作都可以在GPU上高效地完成,因此可以一次性计算出序列中所有单词之间的注意力权重,并完成所有位置表示的加权求和过程,这样大大提高了模型的计算速度。
%----------------------------------------------
\begin{figure}[htp]
\centering
\input{./Chapter11/Figures/figure-process-of-5}
\caption{\ref{eq:11-47}的执行过程示例}
\label{fig:11-46}
\end{figure}
%----------------------------------------------
%----------------------------------------------------------------------------------------
% NEW SUB-SECTION 11.2.4
%----------------------------------------------------------------------------------------
\subsection{掩码操作}
\parinterval 在公式\ref{eq:11-47}中提到了Mask(掩码),它的目的是对向量中某些值进行掩盖,避免无关位置的数值对运算造成影响。Transformer中的Mask主要应用在注意力机制中的相关性系数计算,具体方式是在相关性系数矩阵上累加一个Mask矩阵。该矩阵在需要Mask的位置的值为负无穷-inf(具体实现时是一个非常小的数,比如-1e-9),其余位置为0,这样在进行了Softmax归一化操作之后,被掩码掉的位置计算得到的权重便近似为0,也就是说对无用信息分配的权重为0,从而避免了其对结果产生影响。Transformer包含两种Mask:
\begin{itemize}
\vspace{0.5em}
\item Padding Mask。在批量处理多个样本时(训练或解码),由于要对源语言和目标语言的输入进行批次化处理,而每个批次内序列的长度不一样,为了方便对批次内序列进行矩阵表示,需要进行对齐操作,即在较短的序列后面填充0来占位(padding操作)。而这些填充的位置没有意义,不参与注意力机制的计算,因此,需要进行Mask操作,屏蔽其影响。
\vspace{0.5em}
\item Future Mask。对于解码器来说,由于在预测的时候是自左向右进行的,即第$t$时刻解码器的输出只能依赖于$t$时刻之前的输出。且为了保证训练解码一致,避免在训练过程中观测到目标语端每个位置未来的信息,因此需要对未来信息进行屏蔽。具体的做法是:构造一个上三角值全为-inf的Mask矩阵,也就是说,在解码端计算中,在当前位置,通过Future Mask把序列之后的信息屏蔽掉了,避免了$t$之后的位置对当前的计算产生影响。图\ref{fig:11-47}给出了一个具体的实例。
%----------------------------------------------
% 图3.10
\begin{figure}[htp]
\centering
\input{./Chapter11/Figures/figure-mask-instance-for-future-positions-in-transformer}
\caption{Transformer中对于未来位置进行的屏蔽的Mask实例}
\label{fig:11-47}
\end{figure}
%----------------------------------------------
\vspace{0.5em}
\end{itemize}
%----------------------------------------------------------------------------------------
% NEW SUB-SECTION 11.2.5
%----------------------------------------------------------------------------------------
\subsection{多头注意力}
\parinterval Transformer中使用的另一项重要技术是{\small\sffamily\bfseries{多头注意力}}\index{多头注意力}(Multi-head Attention)\index{Multi-head Attention}。``多头''可以理解成将原来的$\mathbf{Q}$$\mathbf{K}$$\mathbf{V}$按照隐层维度平均切分成多份。假设切分$h$份,那么最终会得到$\mathbf{Q} = \{ \mathbf{q}_1, \mathbf{q}_2,...,\mathbf{q}_h \}$$\mathbf{K}=\{ \mathbf{k}_1,\mathbf{k}_2,...,\mathbf{k}_h \}$$\mathbf{V}=\{ \mathbf{v}_1, \mathbf{v}_2,...,\mathbf{v}_h \}$。多头注意力机制就是用每一个切分得到的$\mathbf{q}$$\mathbf{k}$$\mathbf{v}$独立的进行注意力计算。即第$i$个头的注意力计算结果$\mathbf{head}_i = \textrm{Attention}(\mathbf{q}_i,\mathbf{k}_i, \mathbf{v}_i)$
\parinterval 下面根据图\ref{fig:11-48}详细介绍多头注意力的计算过程:
\begin{itemize}
\vspace{0.5em}
\item 首先将$\mathbf{Q}$$\mathbf{K}$$\mathbf{V}$分别通过线性变换的方式映射为$h$个子集(机器翻译任务中,$h$一般为8)。即$\mathbf{q}_i = \mathbf{Q}\mathbf{W}_i^Q $$\mathbf{k}_i = \mathbf{K}\mathbf{W}_i^K $$\mathbf{v}_i = \mathbf{V}\mathbf{W}_i^V $,其中$i$表示第$i$个头, $\mathbf{W}_i^Q \in \mathbb{R}^{d_{model} \times d_k}$, $\mathbf{W}_i^K \in \mathbb{R}^{d_{model} \times d_k}$, $\mathbf{W}_i^V \in \mathbb{R}^{d_{model} \times d_v}$是参数矩阵; $d_k=d_v=d_{model} / h$,对于不同的头采用不同的变换矩阵,这里$d_{model}$是Transformer的一个参数,表示每个隐层向量的维度;
\vspace{0.5em}
\item 其次对每个头分别执行点乘注意力操作,并得到每个头的注意力操作的输出$\mathbf{head}_i$
\vspace{0.5em}
\item 最后将$h$个头的注意力输出在最后一维$d_v$进行拼接(Concat)重新得到维度为$h \times d_v$的输出,并通过对其左乘一个权重矩阵$\mathbf{W}^o$进行线性变换,从而对多头计算得到的信息进行融合,且将多头注意力输出的维度映射为模型的隐层大小(即$d_{model}$),这里参数矩阵$\mathbf{W}^o \in \mathbb{R}^{h \times d_v \times d_{model}}$
\vspace{0.5em}
\end{itemize}
%----------------------------------------------
\begin{figure}[htp]
\centering
\input{./Chapter11/Figures/figure-multi-head-attention-model}
\caption{多头注意力模型}
\label{fig:11-48}
\end{figure}
%----------------------------------------------
\parinterval 多头机制具体的计算公式如下:
\begin{eqnarray}
\textrm{MultiHead}(\mathbf{Q}, \mathbf{K} , \mathbf{V})& = & \textrm{Concat} (\mathbf{head}_1, ... , \mathbf{head}_h ) \mathbf{W}^o \label{eq:11-48} \\
\mathbf{head}_i & = &\textrm{Attention} (\mathbf{Q}\mathbf{W}_i^Q , \mathbf{K}\mathbf{W}_i^K , \mathbf{V}\mathbf{W}_i^V )
\label{eq:11-49}
\end{eqnarray}
\parinterval 多头机制的好处是允许模型在不同的表示子空间里学习。在很多实验中发现,不同表示空间的头捕获的信息是不同的,比如,在使用Transformer处理自然语言时,有的头可以捕捉句法信息,有头可以捕捉词法信息。
%----------------------------------------------------------------------------------------
% NEW SUB-SECTION 11.2.6
%----------------------------------------------------------------------------------------
\subsection{残差网络和层正则化}
\parinterval Transformer编码器、解码器分别由多层网络组成(通常为6层),每层网络又包含多个子层(自注意力网络、前馈神经网络)。因此Transformer实际上是一个很深的网络结构。再加上前面介绍的点乘注意力机制,包含很多线性和非线性变换;另外,注意力函数Attention($\cdot$)的计算也涉及多层网络,整个网络的信息传递非常复杂。从反向传播的角度来看,每次回传的梯度都会经过若干步骤,容易产生梯度爆炸或者消失。
\parinterval 解决这个问题的一种办法就是使用{\small\sffamily\bfseries{残差连接}}\index{残差连接}\cite{DBLP:journals/corr/HeZRS15}。残差连接是一种用来训练深层网络的技术,其结构如图\ref{fig:11-49},即在子层之前通过增加直接连接的方式,将底层信息直接传递给上层。
%----------------------------------------------
\begin{figure}[htp]
\centering
\input{./Chapter11/Figures/figure-residual-network-structure}
\caption{残差网络结构}
\label{fig:11-49}
\end{figure}
%----------------------------------------------
\parinterval 残差连接从广义上讲也叫{\small\bfnew{短连接}}\index{短连接}(Short-cut Connection)\index{Short-cut Connection},指的是这种短距离的连接。它的思想很简单,就是把层和层之间的距离拉近。如图\ref{fig:11-49}所示,子层1通过残差连接跳过了子层2,直接和子层3进行信息传递。使信息传递变得更高效,有效解决了深层网络训练过程中容易出现的梯度消失/爆炸问题,使得深层网络的训练更加容易。其计算公式为:
\begin{eqnarray}
x_{l+1} = x_l + \mathcal{F} (x_l)
\label{eq:11-50}
\end{eqnarray}
\noindent 其中$\mathcal{F} (x_l)$是子层运算。如果$l=2$,那么公式\ref{eq:11-50}可以解释为,第3层的输出等于第2层的输出加上第二层的输入。图\ref{fig:11-50}中的红色方框展示了Transformer中残差连接的位置。
%----------------------------------------------
\begin{figure}[htp]
\centering
\input{./Chapter11/Figures/figure-position-of-difference-and-layer-regularization-in-the-model}
\caption{残差和层正则化在模型中的位置}
\label{fig:11-50}
\end{figure}
%----------------------------------------------
\parinterval 在Transformer的训练过程中,由于引入了残差操作,将前面所有层的输出加到一起。这样会导致不同层(或子层)的结果之间的差异性很大,造成训练过程不稳定、训练时间较长。为了避免这种情况,在每层中加入了层正则化操作\cite{Ba2016LayerN}。层正则化的计算公式如下:
\begin{eqnarray}
\textrm{LN}(x) = g \cdot \frac{x- \mu} {\sigma} + b
\label{eq:11-51}
\end{eqnarray}
\noindent 该公式使用均值$\mu$和方差$\sigma$对样本进行平移缩放,将数据规范化为均值为0,方差为1的标准分布。$g$$b$是可学习的参数。
\parinterval 在Transformer中经常使用的层正则化操作有两种结构,分别是{\small\bfnew{后正则化}}\index{后正则化}(Post-norm)\index{Post-norm}{\small\bfnew{前正则化}}\index{前正则化}(Pre-norm)\index{Pre-norm},结构如图\ref{fig:11-51}所示。后正则化中先进行残差连接再进行层正则化,而前正则化则是在子层输入之前进行层正则化操作。在很多实践中已经发现,前正则化的方式更有利于信息传递,因此适合训练深层的Transformer模型\cite{WangLearning}
%----------------------------------------------
\begin{figure}[htp]
\centering
\input{./Chapter11/Figures/figure-different-regularization-methods}
\caption{不同正则化方式 }
\label{fig:11-51}
\end{figure}
%----------------------------------------------
%----------------------------------------------------------------------------------------
% NEW SUB-SECTION 11.2.7
%----------------------------------------------------------------------------------------
\subsection{前馈全连接网络子层}
\parinterval 在Transformer的结构中,每一个编码层或者解码层中都包含一个前馈神经网络,它在模型中的位置如图\ref{fig:11-52}中红色方框所示。
%----------------------------------------------
\begin{figure}[htp]
\centering
\input{./Chapter11/Figures/figure-position-of-feedforward-neural-network-in-the-model}
\caption{前馈神经网络在模型中的位置}
\label{fig:11-52}
\end{figure}
%----------------------------------------------
\parinterval Transformer使用了全连接网络。全连接网络的作用主要体现在将经过注意力操作之后的表示映射到新的空间中,新的空间会有利于接下来的非线性变换等操作。实验证明,去掉全连接网络会对模型的性能造成影响。Transformer的全连接前馈神经网络包含两次线性变换和一次非线性变换(ReLU激活函数:ReLU$(x)=\textrm{max}(0,x)$),每层的前馈神经网络参数不共享,计算公式如下:
\begin{eqnarray}
\textrm{FFN}(x) = \textrm{max} (0,\mathbf{x}\mathbf{W}_1 + \mathbf{b}_1)\mathbf{W}_2 + \mathbf{b}_2
\label{eq:11-52}
\end{eqnarray}
\noindent 其中,$\mathbf{W}_1$$\mathbf{W}_2$$\mathbf{b}_1$$\mathbf{b}_2$为模型的参数。通常情况下,前馈神经网络的隐层维度要比注意力部分的隐层维度大,而且研究人员发现这种设置对Transformer是至关重要的。 比如,注意力部分的隐层维度为512,前馈神经网络部分的隐层维度为2048。当然,继续增大前馈神经网络的隐层大小,比如设为4096,甚至8192,还可以带来性能的增益,但是前馈部分的存储消耗较大,需要更大规模GPU 设备的支持。因此在具体实现时,往往需要在翻译准确性和存储/速度之间找到一个平衡。
%----------------------------------------------------------------------------------------
% NEW SUB-SECTION 11.2.8
%----------------------------------------------------------------------------------------
\subsection{训练}
\parinterval 与前面介绍的神经机器翻译模型的训练一样,Transformer的训练流程为:首先对模型进行初始化,然后在编码器输入包含结束符的源语言单词序列。前面已经介绍过,解码端每个位置单词的预测都要依赖已经生成的序列。在解码端输入包含起始符号的目标语序列,通过起始符号预测目标语的第一个单词,用真实的目标语的第一个单词去预测第二个单词,以此类推,然后用真实的目标语序列和预测的结果比较,计算它的损失。Transformer使用了{\small\bfnew{交叉熵损失}}\index{交叉熵损失}(Cross Entropy Loss)\index{Cross Entropy Loss}函数,损失越小说明模型的预测越接近真实输出。然后利用反向传播来调整模型中的参数。由于Transformer 将任意时刻输入信息之间的距离拉近为1,摒弃了RNN中每一个时刻的计算都要基于前一时刻的计算这种具有时序性的训练方式,因此Transformer中训练的不同位置可以并行化训练,大大提高了训练效率。
%----------------------------------------------
%\begin{figure}[htp]
%\centering
%\input{./Chapter11/Figures/figure-structure-of-the-network-during-transformer-training}
%\caption{Transformer训练时网络的结构}
%\label{fig:11-53}
%\end{figure}
%----------------------------------------------
\parinterval 需要注意的是,Transformer也包含很多工程方面的技巧。首先,在训练优化器方面,需要注意以下几点:
\begin{itemize}
\vspace{0.5em}
\item Transformer使用Adam优化器优化参数,并设置$\beta_1=0.9$$\beta_2=0.98$$\epsilon=10^{-9}$
\vspace{0.5em}
\item Transformer在学习率中同样应用了学习率{\small\bfnew{预热}}\index{预热}(Warmup)\index{Warmup}策略,其计算公式如下:
\begin{eqnarray}
lrate = d_{model}^{-0.5} \cdot \textrm{min} (step^{-0.5} , step \cdot warmup\_steps^{-1.5})
\label{eq:11-53}
\vspace{0.5em}
\end{eqnarray}
其中,$step$表示更新的次数(或步数)。通常设置网络更新的前4000步为预热阶段即$warmup\_steps=4000$。Transformer的学习率曲线如图\ref{fig:11-54}所示。在训练初期,学习率从一个较小的初始值逐渐增大(线性增长),当到达一定的步数,学习率再逐渐减小。这样做可以减缓在训练初期的不稳定现象,同时在模型达到相对稳定之后,通过逐渐减小的学习率让模型进行更细致的调整。这种学习率的调整方法是Transformer的一个很大的工程贡献。
\vspace{0.5em}
\end{itemize}
%----------------------------------------------
\begin{figure}[htp]
\centering
\input{./Chapter11/Figures/figure-lrate-of-transformer}
\caption{Transformer模型的学习率曲线}
\label{fig:11-54}
\end{figure}
%----------------------------------------------
\parinterval 另外,Transformer为了提高模型训练的效率和性能,还进行了以下几方面的操作:
\begin{itemize}
\vspace{0.5em}
\item {\small\bfnew{小批量训练}}\index{小批量训练}(Mini-batch Training)\index{Mini-batch Training}:每次使用一定数量的样本进行训练,即每次从样本中选择一小部分数据进行训练。这种方法的收敛较快,同时易于提高设备的利用率。批次大小通常设置为2048/4096(token数即每个批次中的单词个数)。每一个批次中的句子并不是随机选择的,模型通常会根据句子长度进行排序,选取长度相近的句子组成一个批次。这样做可以减少padding数量,提高训练效率,如图\ref{fig:11-55}
%----------------------------------------------
\begin{figure}[htp]
\centering
\input{./Chapter11/Figures/figure-comparison-of-the-number-of-padding-in-batch}
\caption{batch中padding数量对比(白色部分为padding)}
\label{fig:11-55}
\end{figure}
%----------------------------------------------
\vspace{0.5em}
\item {\small\bfnew{Dropout}}\index{Dropout}:由于Transformer模型网络结构较为复杂,会导致过度拟合训练数据,从而对未见数据的预测结果变差。这种现象也被称作{\small\sffamily\bfseries{过拟合}}\index{过拟合}(Over Fitting)\index{Over fitting}。为了避免这种现象,Transformer加入了Dropout操作\cite{JMLR:v15:srivastava14a}。Transformer中这四个地方用到了Dropout:词嵌入和位置编码、残差连接、注意力操作和前馈神经网络。Dropout比例通常设置为$0.1$
\vspace{0.5em}
\item {\small\bfnew{标签平滑}}\index{标签平滑}(Label Smoothing)\index{Label Smoothing}:在计算损失的过程中,需要用预测概率去拟合真实概率。在分类任务中,往往使用One-hot向量代表真实概率,即真实答案位置那一维对应的概率为1,其余维为0,而拟合这种概率分布会造成两个问题:1)无法保证模型的泛化能力,容易造成过拟合;2) 1和0概率鼓励所属类别和其他类别之间的差距尽可能加大,会造成模型过于相信预测的类别。因此Transformer里引入标签平滑\cite{Szegedy_2016_CVPR}来缓解这种现象,简单的说就是给正确答案以外的类别分配一定的概率,而不是采用非0即1的概率。这样,可以学习一个比较平滑的概率分布,从而提升泛化能力。
\vspace{0.5em}
\end{itemize}
\parinterval 不同的Transformer可以适应不同的任务,常见的Transformer模型有Transformer Base、Transformer Big和Transformer Deep\cite{NIPS2017_7181,WangLearning},具体设置如下:
\begin{itemize}
\vspace{0.5em}
\item Transformer Base:标准的Transformer结构,解码器编码器均包含6层,隐层维度为512,前馈神经网络维度为2048,多头注意力机制为8头,Dropout设为0.1。
\vspace{0.5em}
\item Transformer Big:为了提升网络的容量,使用更宽的网络。在Base的基础上增大隐层维度至1024,前馈神经网络的维度变为4096,多头注意力机制为16头,Dropout设为0.3。
\vspace{0.5em}
\item Transformer Deep:加深编码器网络层数可以进一步提升网络的性能,它的参数设置与Transformer Base基本一致,但是层数增加到48层,同时使用Pre-Norm作为层正则化的结构。
\vspace{0.5em}
\end{itemize}
\parinterval 在WMT'16数据 上的实验对比如表\ref{tab:6-13}所示。可以看出,Transformer Base的BLE\\U得分虽不如另外两种模型,但其参数量是最少的。而Transformer Deep的性能整体好于Transformer Big。
%----------------------------------------------
\begin{table}[htp]
\centering
\caption{三种Transformer模型的对比}
\label{tab:11-13}
\begin{tabular}{l | l l l}
\multirow{2}{*}{系统} & \multicolumn{2}{c}{BLEU[\%]} & \# of \\
& EN-DE & EN-FR & params \\ \hline
Transformer Base & 27.3 & 38.1 & 65$\times 10^{6}$ \\
Transformer Big & 28.4 & 41.8 & 213$\times 10^{6}$ \\
Transformer Deep(48层) & 30.2 & 43.1 & 194$\times 10^{6}$ \\
\end{tabular}
\end{table}
%----------------------------------------------
%----------------------------------------------------------------------------------------
% NEW SUB-SECTION 11.2.9
%----------------------------------------------------------------------------------------
\subsection{推断}
\parinterval Transformer解码器生成目标语的过程和前面介绍的循环网络翻译模型类似,都是从左往右生成,且下一个单词的预测依赖已经生成的上一个单词。其具体推断过程如图\ref{fig:11-56}所示,其中$\mathbf{C}_i$是编码-解码注意力的结果,解码器首先根据``<eos>''和$\mathbf{C}_1$生成第一个单词``how'',然后根据``how''和$\mathbf{C}_2$生成第二个单词``are'',以此类推,当解码器生成``<eos>''时结束推断。
\parinterval 但是,Transformer在推断阶段无法对所有位置进行并行化操作,因为对于每一个目标语单词都需要对前面所有单词进行注意力操作,因此它推断速度非常慢。可以采用的加速手段有:低精度\cite{DBLP:journals/corr/CourbariauxB16}、Cache(缓存需要重复计算的变量)\cite{DBLP:journals/corr/abs-1805-00631}、共享注意力网络等\cite{Xiao2019SharingAW}。关于Transformer模型的推断技术将会在第七章进一步深入介绍。
%----------------------------------------------
\begin{figure}[htp]
\centering
\input{./Chapter11/Figures/figure-decode-of-transformer}
\caption{Transformer推断过程示例}
\label{fig:11-56}
\end{figure}
%----------------------------------------------
Markdown 格式
0%
您添加了 0 到此讨论。请谨慎行事。
请先完成此评论的编辑!
注册 或者 后发表评论