Commit 93e5393c by 姜雨帆

Update Mask

parent 98598b4b
......@@ -2874,7 +2874,7 @@ $\textrm{``you''} = \argmax_{y} \textrm{P}(y|\textbf{s}_1, \alert{\textbf{C}})$
\foreach \i [count=\j from 0,evaluate=\i as \k using int(4-\i)] in {1,2,3}
\node [samplenode,anchor=south west] (batch\i) at ([shift={(-1em,-0.5em)}]batch\j.south west) {句子\k};
\draw [decorate,decoration={brace}] (batch1.south east) to node [auto,rotate=30,anchor=north,font=\scriptsize] {batch大小} (batch3.south east);
\node [samplenode,anchor=west] (sample2) at ([xshift=4em]batch2.east) {句子2};
\node [samplenode,anchor=south] (sample3) at ([yshift=3em]sample2.north) {句子3};
\node [samplenode,anchor=north] (sample1) at ([yshift=-3em]sample2.south) {句子1};
......@@ -2915,7 +2915,7 @@ $\textrm{``you''} = \argmax_{y} \textrm{P}(y|\textbf{s}_1, \alert{\textbf{C}})$
\setlength{\base}{1.5em}
\tikzstyle{rnnnode} = [rounded corners=1pt,minimum size=1\base,draw,inner sep=0pt,outer sep=0pt,fill=blue!30!white]
\tikzstyle{wordnode} = [font=\footnotesize,align=center]
\begin{scope}
% rnn[layer][step]
\coordinate (rnn00) at (0,0);
......@@ -2923,7 +2923,7 @@ $\textrm{``you''} = \argmax_{y} \textrm{P}(y|\textbf{s}_1, \alert{\textbf{C}})$
\node[wordnode] (rnn\i0) at ([yshift=2\base]rnn\j0) {$0$};
\foreach \i [count=\j from 0] in {1,2,...,4}
\coordinate (rnn0\i) at ([xshift=2\base]rnn0\j);
% step 1
\visible<1->{
\node[rnnnode] (rnn11) at ([xshift=2\base]rnn10) {};
......@@ -2935,7 +2935,7 @@ $\textrm{``you''} = \argmax_{y} \textrm{P}(y|\textbf{s}_1, \alert{\textbf{C}})$
\node[rnnnode,fill=purple] () at (rnn11) {};
\node[draw=red,thick,inner sep=7pt,rounded corners=0.3em,rotate fit=-45,label={[font=\footnotesize,align=center]90:正在运算的\\{\color{red} 循环单元}},fit=(rnn11)] () {};
}
% step 2
\visible<2->{
\node[rnnnode] (rnn12) at ([xshift=2\base]rnn11) {};
......@@ -2951,7 +2951,7 @@ $\textrm{``you''} = \argmax_{y} \textrm{P}(y|\textbf{s}_1, \alert{\textbf{C}})$
\node[rnnnode,fill=purple] () at (rnn21) {};
\node[draw=red,thick,inner sep=7pt,rounded corners=0.3em,rotate fit=-45,label={[font=\footnotesize,align=center]90:正在运算的\\{\color{red} 循环单元}},fit=(rnn12) (rnn21)] () {};
}
% step 3
\visible<3->{
\node[rnnnode] (rnn13) at ([xshift=2\base]rnn12) {};
......@@ -2973,7 +2973,7 @@ $\textrm{``you''} = \argmax_{y} \textrm{P}(y|\textbf{s}_1, \alert{\textbf{C}})$
\node[rnnnode,fill=purple] () at (rnn22) {};
\node[draw=red,thick,inner sep=7pt,rounded corners=0.3em,rotate fit=-45,label={[font=\footnotesize,align=center]90:正在运算的\\{\color{red} 循环单元}},fit=(rnn13) (rnn31) (rnn22)] () {};
}
% step 4
\visible<4->{
\node[rnnnode] (rnn14) at ([xshift=2\base]rnn13) {};
......@@ -2995,7 +2995,7 @@ $\textrm{``you''} = \argmax_{y} \textrm{P}(y|\textbf{s}_1, \alert{\textbf{C}})$
\node[rnnnode,fill=purple] () at (rnn32) {};
\node[draw=red,thick,inner sep=7pt,rounded corners=0.3em,rotate fit=-45,label={[font=\footnotesize,align=center]90:正在运算的\\{\color{red} 循环单元}},fit=(rnn14) (rnn23) (rnn32)] () {};
}
% step 5
\visible<5->{
\node[rnnnode] (rnn24) at ([xshift=2\base]rnn23) {};
......@@ -3013,7 +3013,7 @@ $\textrm{``you''} = \argmax_{y} \textrm{P}(y|\textbf{s}_1, \alert{\textbf{C}})$
\node[rnnnode,fill=purple] () at (rnn33) {};
\node[draw=red,thick,inner sep=7pt,rounded corners=0.3em,rotate fit=-45,label={[font=\footnotesize,align=center]90:正在运算的\\{\color{red} 循环单元}},fit=(rnn24) (rnn33)] () {};
}
% step 6
\visible<6->{
\node[rnnnode] (rnn34) at ([xshift=2\base]rnn33) {};
......@@ -3027,7 +3027,7 @@ $\textrm{``you''} = \argmax_{y} \textrm{P}(y|\textbf{s}_1, \alert{\textbf{C}})$
\node[rnnnode,fill=purple] () at (rnn34) {};
\node[draw=red,thick,inner sep=7pt,rounded corners=0.3em,rotate fit=-45,label={[font=\footnotesize,align=center]90:正在运算的\\{\color{red} 循环单元}},fit=(rnn34)] () {};
}
% labels
\alt<1-4>{
\draw[decorate,decoration={brace}] ([yshift=-\base]rnn10.west) to node[wordnode,align=right,left,text=red] {正在使用的\\设备1} ([yshift=\base]rnn10.west);
......@@ -3044,10 +3044,10 @@ $\textrm{``you''} = \argmax_{y} \textrm{P}(y|\textbf{s}_1, \alert{\textbf{C}})$
}{
\draw[decorate,decoration={brace}] ([yshift=-\base]rnn30.west) to node[wordnode,align=right,left] {空闲的\\设备3} ([yshift=\base]rnn30.west);
}
\foreach \i in {1,2,3}
\node[wordnode,font=\scriptsize,anchor=south west] () at (rnn\i0.north west) {\i};
\node[wordnode] () at (rnn01) {};
\node[wordnode] () at (rnn02) {};
\node[wordnode] () at (rnn03) {不错};
......@@ -3680,13 +3680,13 @@ $\textrm{``you''} = \argmax_{y} \textrm{P}(y|\textbf{s}_1, \alert{\textbf{C}})$
\begin{frame}{效果}
%% 实用注意力机制带来的提升
%% 个大评测比赛没有不使用注意力机制的系统,已经成为标配
\begin{itemize}
\item 在引入注意力机制之前,神经机器翻译(RNNSearch)的性能要弱于统计机器翻译(PBMT)
\item 加入注意力机制和深层网络之后,神经机器翻译性能有了很大幅度的提升
\item 虽然网络深度增加了,但是通过相应的结构设计和解码策略保证了解码速度
\end{itemize}
{
\footnotesize
\begin{center}
......@@ -3712,7 +3712,7 @@ $\textrm{``you''} = \argmax_{y} \textrm{P}(y|\textbf{s}_1, \alert{\textbf{C}})$
\end{tabular}
\end{center}
}
\end{frame}
%%%------------------------------------------------------------------------------------------------------------
......@@ -4478,6 +4478,95 @@ PE_{(pos,2i+1)} = cos(pos/10000^{2i/d_{model}})
% 为什么要加mask
% self-attention的mask
% enc-dec的mask (也可以加页)
\begin{itemize}
\item 对于源语和目标语的输入,由于需要进行batch处理,有些部分是填充的(Padding),需要用Mask进行屏蔽
\item 对于解码器来说,由于在预测的时候是自左向右进行的,为了保持{\color{red} \textbf{训练解码一致}},需要对未来信息进行屏蔽
\end{itemize}
%%% 运行实例的图
\begin{center}
\begin{tikzpicture}
\begin{scope}
\tikzstyle{attnode} = [minimum size=1.5em,inner sep=0pt,rounded corners=1pt,draw]
\tikzstyle{srcnode} = [rotate=45,font=\small,anchor=south west]
\tikzstyle{tgtnode} = [left,font=\small,anchor=north east]
\tikzstyle{masknode} = [minimum size=5.8em,inner sep=0pt,rounded corners=1pt,draw]
\tikzstyle{elementnode} = [rectangle,text=white,anchor=center]
\setlength{\hnode}{1.0cm}
%\node [anchor=west,attnode] (node1) at (0,0) {\tiny{}};
%\node [anchor=west,attnode] (node2) at ([xshift=1em]node1.east) {\tiny{}};
\visible<2->{
\foreach \i / \j / \c in
{0/5/0.25, 1/5/0.15, 2/5/0.15, 3/5/0.35, 4/5/0.25, 5/5/0.15,
0/4/0.15, 1/4/0.25, 2/4/0.2, 3/4/0.30, 4/4/0.15, 5/4/0.15,
0/3/0.15, 1/3/0.15, 2/3/0.5, 3/3/0.25, 4/3/0.15, 5/3/0.25,
0/2/0.15, 1/2/0.15, 2/2/0.15, 3/2/0.15, 4/2/0.25, 5/2/0.3,
0/1/0.25, 1/1/0.15, 2/1/0.15, 3/1/0.15, 4/1/0.5, 5/1/0.15,
0/0/0.15, 1/0/0.15, 2/0/0.15, 3/0/0.15, 4/0/0.25, 5/0/0.40}
\node[elementnode,minimum size=0.6*\hnode*\c,inner sep=0.1pt,fill=blue] (a\i\j) at (0.5*\hnode*\i-5.4*0.5*\hnode,0.5*\hnode*\j-1.05*\hnode) {};
% source
\node[srcnode] (src1) at (-5.4*0.5*\hnode,-1.05*\hnode+5.5*0.5*\hnode) {\scriptsize{Have}};
\node[srcnode] (src2) at ([xshift=0.5\hnode]src1.south west) {\scriptsize{you}};
\node[srcnode] (src3) at ([xshift=0.5\hnode]src2.south west) {\scriptsize{learned}};
\node[srcnode] (src4) at ([xshift=0.5\hnode]src3.south west) {\scriptsize{nothing}};
\node[srcnode] (src5) at ([xshift=0.5\hnode]src4.south west) {\scriptsize{?}};
\node[srcnode] (src6) at ([xshift=0.5\hnode]src5.south west) {\scriptsize{EOS}};
% target
\node[tgtnode] (tgt1) at (-6.0*0.5*\hnode,-1.05*\hnode+5.5*0.5*\hnode) {\scriptsize{Have}};
\node[tgtnode] (tgt2) at ([yshift=-0.5\hnode]tgt1.north east) {\scriptsize{you}};
\node[tgtnode] (tgt3) at ([yshift=-0.5\hnode]tgt2.north east) {\scriptsize{learned}};
\node[tgtnode] (tgt4) at ([yshift=-0.5\hnode]tgt3.north east) {\scriptsize{nothing}};
\node[tgtnode] (tgt5) at ([yshift=-0.5\hnode]tgt4.north east) {\scriptsize{?}};
\node[tgtnode] (tgt6) at ([yshift=-0.5\hnode]tgt5.north east) {\scriptsize{EOS}};
\node [rounded corners=0.3em,fill=yellow!30] (qk) at ([xshift=2.5em,yshift=5em]a55.north) {\large{$\frac{QK^{T}}{\sqrt{d_k}}$}};
\node [rounded corners=0.3em,anchor=west] (add) at ([xshift=0.1em]qk.east) {\large{+}};
\node [rounded corners=0.3em,anchor=west] (mask) at ([xshift=0.1em]add.east) {\large{$Mask$}};
\node [rounded corners=0.3em,anchor=east] (left) at ([xshift=-0em]qk.west) {\large{$($}};
\node [rounded corners=0.3em,anchor=west] (right) at ([xshift=0em]mask.east) {\large{$)$}};
\node [rounded corners=0.3em,anchor=west] (softmax) at ([xshift=-6em]left.east) {\large{Softmax}};
}
\visible<3->{
\filldraw [fill=blue!20,draw,thick,fill opacity=0.85] ([xshift=-0.9em,yshift=0.5em]a15.north west) -- ([xshift=0.5em,yshift=-0.9em]a51.south east) -- ([xshift=0.5em,yshift=0.5em]a55.north east) -- ([xshift=-0.9em,yshift=0.5em]a15.north west);
\node[anchor=west] (labelmask) at ([xshift=0.3em,yshift=0.5em]a23.north east) {Mask};
\node [rounded corners=0.3em,anchor=west,fill=blue!20] (mask) at ([xshift=0.1em]add.east) {\large{$Mask$}};
}
\visible<4->{
\foreach \i / \j / \c in
{0/5/0.25,
0/4/0.15, 1/4/0.25,
0/3/0.15, 1/3/0.15, 2/3/0.5,
0/2/0.15, 1/2/0.15, 2/2/0.15, 3/2/0.15,
0/1/0.25, 1/1/0.15, 2/1/0.15, 3/1/0.15, 4/1/0.5,
0/0/0.15, 1/0/0.15, 2/0/0.15, 3/0/0.15, 4/0/0.25, 5/0/0.40}
\node[elementnode,minimum size=0.6*\hnode*\c,inner sep=0.1pt,fill=blue] (a\i\j) at (0.5*\hnode*\i+6*0.5*\hnode,0.5*\hnode*\j-1.05*\hnode) {};
% source
\node[srcnode] (src1) at (6*0.5*\hnode,-1.05*\hnode+5.5*0.5*\hnode) {\scriptsize{Have}};
\node[srcnode] (src2) at ([xshift=0.5\hnode]src1.south west) {\scriptsize{you}};
\node[srcnode] (src3) at ([xshift=0.5\hnode]src2.south west) {\scriptsize{learned}};
\node[srcnode] (src4) at ([xshift=0.5\hnode]src3.south west) {\scriptsize{nothing}};
\node[srcnode] (src5) at ([xshift=0.5\hnode]src4.south west) {\scriptsize{?}};
\node[srcnode] (src6) at ([xshift=0.5\hnode]src5.south west) {\scriptsize{EOS}};
% target
\node[tgtnode] (tgt1) at (5.4*0.5*\hnode,-1.05*\hnode+5.5*0.5*\hnode) {\scriptsize{Have}};
\node[tgtnode] (tgt2) at ([yshift=-0.5\hnode]tgt1.north east) {\scriptsize{you}};
\node[tgtnode] (tgt3) at ([yshift=-0.5\hnode]tgt2.north east) {\scriptsize{learned}};
\node[tgtnode] (tgt4) at ([yshift=-0.5\hnode]tgt3.north east) {\scriptsize{nothing}};
\node[tgtnode] (tgt5) at ([yshift=-0.5\hnode]tgt4.north east) {\scriptsize{?}};
\node[tgtnode] (tgt6) at ([yshift=-0.5\hnode]tgt5.north east) {\scriptsize{EOS}};
\node [rounded corners=0.3em,anchor=west,fill=green!30] (softmax) at ([xshift=-6em]left.east) {\large{Softmax}};
}
\end{scope}
\end{tikzpicture}
\end{center}
\end{frame}
......@@ -5230,7 +5319,7 @@ x_{l+1} = x_l+\mathcal{F}(x_l)
\begin{tikzpicture}
\begin{scope}
\tikzstyle{rnnnode} = [minimum height=1.1em,minimum width=2.1em,inner sep=2pt,rounded corners=1pt,draw,fill=red!20];
\node [rnnnode,anchor=west] (h1) at (0,0) {\tiny{$\textbf{h}_1$}};
\node [rnnnode,anchor=west] (h2) at ([xshift=1em]h1.east) {\tiny{$\textbf{h}_2$}};
\node [rnnnode,anchor=west] (h3) at ([xshift=1em]h2.east) {\tiny{$\textbf{h}_3$}};
......@@ -5242,7 +5331,7 @@ x_{l+1} = x_l+\mathcal{F}(x_l)
\node [anchor=north,inner sep=2pt] (w3) at ([yshift=-0.6em]e3.south) {\tiny{EOS}};
%\node [anchor=south] (dot1) at ([xshift=0.4em,yshift=-0.7em]h1.south) {\tiny{...}};
%\node [anchor=south] (dot2) at ([xshift=-0.4em,yshift=-0.7em]h3.south) {\tiny{...}};
\draw [->] (w1.north) -- ([yshift=-0.1em]e1.south);
\draw [->] (w2.north) -- ([yshift=-0.1em]e2.south);
\draw [->] (w3.north) -- ([yshift=-0.1em]e3.south);
......@@ -5255,9 +5344,9 @@ x_{l+1} = x_l+\mathcal{F}(x_l)
\draw [->] ([xshift=0.8em,yshift=-0.4em]h1.south) -- ([xshift=0.6em,yshift=-0.1em]h1.south);
\draw [->] ([xshift=-0.4em,yshift=-0.4em]h3.south) -- ([xshift=-0.3em,yshift=-0.1em]h3.south);
\draw [->] ([xshift=-0.8em,yshift=-0.4em]h3.south) -- ([xshift=-0.6em,yshift=-0.1em]h3.south);
\node [anchor=south] (encoder) at ([xshift=-0.2em]h1.north west) {\scriptsize{\textbf{编码器}}};
\visible<5->{
\node [rnnnode,anchor=west,fill=green!20] (t1) at ([xshift=3em]e3.east) {\tiny{$e_y()$}};
}
......@@ -5326,7 +5415,7 @@ x_{l+1} = x_l+\mathcal{F}(x_l)
\node [anchor=center,inner sep=2pt] (wo4) at ([yshift=1.2em]o4.north) {\tiny{EOS}};
\node [anchor=south,inner sep=2pt] (wos4) at (wo4.north) {\tiny{\textbf{[step 4]}}};
}
\visible<5->{
\foreach \x in {1}{
\draw [->] ([yshift=-0.7em]t\x.south) -- ([yshift=-0.1em]t\x.south);
......@@ -5336,7 +5425,7 @@ x_{l+1} = x_l+\mathcal{F}(x_l)
\draw [->] ([yshift=0.1em]o\x.north) -- ([yshift=0.8em]o\x.north) node [pos=0.5,right] {\tiny{top1}};
}
}
\visible<7->{
\foreach \x in {2}{
\draw [->] ([yshift=-0.7em]t\x.south) -- ([yshift=-0.1em]t\x.south);
......@@ -5347,7 +5436,7 @@ x_{l+1} = x_l+\mathcal{F}(x_l)
\draw [->] ([xshift=0.2em,yshift=0.1em]t1.north) .. controls +(north:0.3) and +(south:0.3) .. ([xshift=-0.3em,yshift=-0.1em]s2.south);
}
}
\visible<8->{
\foreach \x in {3,4}{
\draw [->] ([yshift=-0.7em]t\x.south) -- ([yshift=-0.1em]t\x.south);
......@@ -5361,7 +5450,7 @@ x_{l+1} = x_l+\mathcal{F}(x_l)
\draw [->] ([xshift=-1.5em,yshift=-0.5em]s3.south) .. controls +(north:0) and +(south:0.15) .. ([xshift=-0.6em,yshift=-0.1em]s3.south);
}
}
\visible<6->{
\draw [->,thick,dotted] (wo1.east) .. controls +(east:1.0) and +(west:1.0) ..(wt2.west);
}
......@@ -5369,7 +5458,7 @@ x_{l+1} = x_l+\mathcal{F}(x_l)
\draw [->,thick,dotted] (wo2.east) .. controls +(east:1.3) and +(west:1.1) ..(wt3.west);
\draw [->,thick,dotted] (wo3.east) .. controls +(east:1.1) and +(west:0.9) ..(wt4.west);
}
\visible<5->{
\node [circle,draw,anchor=south,inner sep=3pt,fill=orange!20] (c1) at ([yshift=2em]h2.north) {\tiny{$\textbf{C}_1$}};
\node [anchor=south] (c1label) at (c1.north) {\tiny{\textbf{编码-解码注意力机制:上下文}}};
......@@ -5379,25 +5468,25 @@ x_{l+1} = x_l+\mathcal{F}(x_l)
\draw [->] ([yshift=0.3em]s1.west) .. controls +(west:1) and +(east:1) .. (c1.-30);
\draw [->] (c1.0) .. controls +(east:1) and +(west:1) .. ([yshift=0em]f1.west);
}
\visible<7->{
\node [circle,draw,anchor=north,inner sep=3pt,fill=orange!20] (c2) at ([yshift=-2em]t1.south) {\tiny{$\textbf{C}_2$}};
\draw [->] ([xshift=-0.7em]c2.west) -- ([xshift=-0.1em]c2.west);
\draw [->] ([xshift=0.1em]c2.east) .. controls +(east:0.6) and +(west:0.8) ..([yshift=-0.3em,xshift=-0.1em]f2.west);
}
\visible<8->{
\node [circle,draw,anchor=north,inner sep=3pt,fill=orange!20] (c3) at ([yshift=-2em]t2.south) {\tiny{$\textbf{C}_3$}};
\draw [->] ([xshift=-0.7em]c3.west) -- ([xshift=-0.1em]c3.west);
\draw [->] ([xshift=0.1em]c3.east) .. controls +(east:0.6) and +(west:0.8) ..([yshift=-0.3em,xshift=-0.1em]f3.west);
}
\visible<8->{
\node [circle,draw,anchor=north,inner sep=3pt,fill=orange!20] (c4) at ([yshift=-2em]t3.south) {\tiny{$\textbf{C}_4$}};
\draw [->] ([xshift=-0.7em]c4.west) -- ([xshift=-0.1em]c4.west);
\draw [->] ([xshift=0.1em]c4.east) .. controls +(east:0.6) and +(west:0.8) ..([yshift=-0.3em,xshift=-0.1em]f4.west);
}
\end{scope}
\end{tikzpicture}
\end{center}
......@@ -5750,9 +5839,9 @@ x_{l+1} = x_l+\mathcal{F}(x_l)
\item https://github.com/NiuTrans/NiuTensor
\end{itemize}
\vspace{0.2em}
\item 其他优秀的开源NMT系统:OpenNMT、THUMT
\item 其他优秀的开源NMT系统:OpenNMT、THUMT\\Sockeye、Marian、Nematus、SGNMT、Neural Monkey...
\end{itemize}
\end{frame}
%%%------------------------------------------------------------------------------------------------------------
......
Markdown 格式
0%
您添加了 0 到此讨论。请谨慎行事。
请先完成此评论的编辑!
注册 或者 后发表评论