Commit 93e5393c by 姜雨帆

Update Mask

parent 98598b4b
......@@ -4478,6 +4478,95 @@ PE_{(pos,2i+1)} = cos(pos/10000^{2i/d_{model}})
% 为什么要加mask
% self-attention的mask
% enc-dec的mask (也可以加页)
\begin{itemize}
\item 对于源语和目标语的输入,由于需要进行batch处理,有些部分是填充的(Padding),需要用Mask进行屏蔽
\item 对于解码器来说,由于在预测的时候是自左向右进行的,为了保持{\color{red} \textbf{训练解码一致}},需要对未来信息进行屏蔽
\end{itemize}
%%% 运行实例的图
\begin{center}
\begin{tikzpicture}
\begin{scope}
\tikzstyle{attnode} = [minimum size=1.5em,inner sep=0pt,rounded corners=1pt,draw]
\tikzstyle{srcnode} = [rotate=45,font=\small,anchor=south west]
\tikzstyle{tgtnode} = [left,font=\small,anchor=north east]
\tikzstyle{masknode} = [minimum size=5.8em,inner sep=0pt,rounded corners=1pt,draw]
\tikzstyle{elementnode} = [rectangle,text=white,anchor=center]
\setlength{\hnode}{1.0cm}
%\node [anchor=west,attnode] (node1) at (0,0) {\tiny{}};
%\node [anchor=west,attnode] (node2) at ([xshift=1em]node1.east) {\tiny{}};
\visible<2->{
\foreach \i / \j / \c in
{0/5/0.25, 1/5/0.15, 2/5/0.15, 3/5/0.35, 4/5/0.25, 5/5/0.15,
0/4/0.15, 1/4/0.25, 2/4/0.2, 3/4/0.30, 4/4/0.15, 5/4/0.15,
0/3/0.15, 1/3/0.15, 2/3/0.5, 3/3/0.25, 4/3/0.15, 5/3/0.25,
0/2/0.15, 1/2/0.15, 2/2/0.15, 3/2/0.15, 4/2/0.25, 5/2/0.3,
0/1/0.25, 1/1/0.15, 2/1/0.15, 3/1/0.15, 4/1/0.5, 5/1/0.15,
0/0/0.15, 1/0/0.15, 2/0/0.15, 3/0/0.15, 4/0/0.25, 5/0/0.40}
\node[elementnode,minimum size=0.6*\hnode*\c,inner sep=0.1pt,fill=blue] (a\i\j) at (0.5*\hnode*\i-5.4*0.5*\hnode,0.5*\hnode*\j-1.05*\hnode) {};
% source
\node[srcnode] (src1) at (-5.4*0.5*\hnode,-1.05*\hnode+5.5*0.5*\hnode) {\scriptsize{Have}};
\node[srcnode] (src2) at ([xshift=0.5\hnode]src1.south west) {\scriptsize{you}};
\node[srcnode] (src3) at ([xshift=0.5\hnode]src2.south west) {\scriptsize{learned}};
\node[srcnode] (src4) at ([xshift=0.5\hnode]src3.south west) {\scriptsize{nothing}};
\node[srcnode] (src5) at ([xshift=0.5\hnode]src4.south west) {\scriptsize{?}};
\node[srcnode] (src6) at ([xshift=0.5\hnode]src5.south west) {\scriptsize{EOS}};
% target
\node[tgtnode] (tgt1) at (-6.0*0.5*\hnode,-1.05*\hnode+5.5*0.5*\hnode) {\scriptsize{Have}};
\node[tgtnode] (tgt2) at ([yshift=-0.5\hnode]tgt1.north east) {\scriptsize{you}};
\node[tgtnode] (tgt3) at ([yshift=-0.5\hnode]tgt2.north east) {\scriptsize{learned}};
\node[tgtnode] (tgt4) at ([yshift=-0.5\hnode]tgt3.north east) {\scriptsize{nothing}};
\node[tgtnode] (tgt5) at ([yshift=-0.5\hnode]tgt4.north east) {\scriptsize{?}};
\node[tgtnode] (tgt6) at ([yshift=-0.5\hnode]tgt5.north east) {\scriptsize{EOS}};
\node [rounded corners=0.3em,fill=yellow!30] (qk) at ([xshift=2.5em,yshift=5em]a55.north) {\large{$\frac{QK^{T}}{\sqrt{d_k}}$}};
\node [rounded corners=0.3em,anchor=west] (add) at ([xshift=0.1em]qk.east) {\large{+}};
\node [rounded corners=0.3em,anchor=west] (mask) at ([xshift=0.1em]add.east) {\large{$Mask$}};
\node [rounded corners=0.3em,anchor=east] (left) at ([xshift=-0em]qk.west) {\large{$($}};
\node [rounded corners=0.3em,anchor=west] (right) at ([xshift=0em]mask.east) {\large{$)$}};
\node [rounded corners=0.3em,anchor=west] (softmax) at ([xshift=-6em]left.east) {\large{Softmax}};
}
\visible<3->{
\filldraw [fill=blue!20,draw,thick,fill opacity=0.85] ([xshift=-0.9em,yshift=0.5em]a15.north west) -- ([xshift=0.5em,yshift=-0.9em]a51.south east) -- ([xshift=0.5em,yshift=0.5em]a55.north east) -- ([xshift=-0.9em,yshift=0.5em]a15.north west);
\node[anchor=west] (labelmask) at ([xshift=0.3em,yshift=0.5em]a23.north east) {Mask};
\node [rounded corners=0.3em,anchor=west,fill=blue!20] (mask) at ([xshift=0.1em]add.east) {\large{$Mask$}};
}
\visible<4->{
\foreach \i / \j / \c in
{0/5/0.25,
0/4/0.15, 1/4/0.25,
0/3/0.15, 1/3/0.15, 2/3/0.5,
0/2/0.15, 1/2/0.15, 2/2/0.15, 3/2/0.15,
0/1/0.25, 1/1/0.15, 2/1/0.15, 3/1/0.15, 4/1/0.5,
0/0/0.15, 1/0/0.15, 2/0/0.15, 3/0/0.15, 4/0/0.25, 5/0/0.40}
\node[elementnode,minimum size=0.6*\hnode*\c,inner sep=0.1pt,fill=blue] (a\i\j) at (0.5*\hnode*\i+6*0.5*\hnode,0.5*\hnode*\j-1.05*\hnode) {};
% source
\node[srcnode] (src1) at (6*0.5*\hnode,-1.05*\hnode+5.5*0.5*\hnode) {\scriptsize{Have}};
\node[srcnode] (src2) at ([xshift=0.5\hnode]src1.south west) {\scriptsize{you}};
\node[srcnode] (src3) at ([xshift=0.5\hnode]src2.south west) {\scriptsize{learned}};
\node[srcnode] (src4) at ([xshift=0.5\hnode]src3.south west) {\scriptsize{nothing}};
\node[srcnode] (src5) at ([xshift=0.5\hnode]src4.south west) {\scriptsize{?}};
\node[srcnode] (src6) at ([xshift=0.5\hnode]src5.south west) {\scriptsize{EOS}};
% target
\node[tgtnode] (tgt1) at (5.4*0.5*\hnode,-1.05*\hnode+5.5*0.5*\hnode) {\scriptsize{Have}};
\node[tgtnode] (tgt2) at ([yshift=-0.5\hnode]tgt1.north east) {\scriptsize{you}};
\node[tgtnode] (tgt3) at ([yshift=-0.5\hnode]tgt2.north east) {\scriptsize{learned}};
\node[tgtnode] (tgt4) at ([yshift=-0.5\hnode]tgt3.north east) {\scriptsize{nothing}};
\node[tgtnode] (tgt5) at ([yshift=-0.5\hnode]tgt4.north east) {\scriptsize{?}};
\node[tgtnode] (tgt6) at ([yshift=-0.5\hnode]tgt5.north east) {\scriptsize{EOS}};
\node [rounded corners=0.3em,anchor=west,fill=green!30] (softmax) at ([xshift=-6em]left.east) {\large{Softmax}};
}
\end{scope}
\end{tikzpicture}
\end{center}
\end{frame}
......@@ -5750,7 +5839,7 @@ x_{l+1} = x_l+\mathcal{F}(x_l)
\item https://github.com/NiuTrans/NiuTensor
\end{itemize}
\vspace{0.2em}
\item 其他优秀的开源NMT系统:OpenNMT、THUMT
\item 其他优秀的开源NMT系统:OpenNMT、THUMT\\Sockeye、Marian、Nematus、SGNMT、Neural Monkey...
\end{itemize}
\end{frame}
......
Markdown 格式
0%
您添加了 0 到此讨论。请谨慎行事。
请先完成此评论的编辑!
注册 或者 后发表评论