Commit f7c5d4e6 by Lee

Update softmax figure

parent bdcfc769
...@@ -145,28 +145,82 @@ ...@@ -145,28 +145,82 @@
\subsection{注意力机制} \subsection{注意力机制}
%%%------------------------------------------------------------------------------------------------------------ %%%------------------------------------------------------------------------------------------------------------
%%% 解码 - 长度惩罚和覆盖度 \begin{frame}{模块2:输出层}
\begin{frame}{推断 - 其它特征}
\begin{itemize}
\item 直接用$\textrm{P}(\textbf{y}|\textbf{x})$进行解码,面临两方面问题
\begin{itemize} \begin{itemize}
\item$\textrm{P}(y_j|\textbf{y}_{<j},\textbf{x})$进行乘积会导致长句的概率很低 \item Softmax
\item 模型本身并没有考虑每个源语言单词被使用的程度,比如一个单词可能会被翻译了很多``次''
\end{itemize} \end{itemize}
\item<2-> 因此,解码时会使用其它特征与$\textrm{P}(\textbf{y}|\textbf{x})$一起组成模型得分$score(\textbf{y},\textbf{x})$$score(\textbf{y},\textbf{x})$也作为beam search的排序依据 %%% 图
\begin{eqnarray} \begin{center}
score(\textbf{y},\textbf{x}) & = & \textrm{P}(\textbf{y}|\textbf{x})/\textrm{lp}(\textbf{y}) + \textrm{cp}(\textbf{y},\textbf{x}) \nonumber \\ \begin{tikzpicture}
\textrm{lp}(\textbf{y}) & = & \frac{(5 + |\textbf{y}|)^\alpha}{(5 + 1)^\alpha} \nonumber \\ \setlength{\base}{0.9cm}
\textrm{cp}(\textbf{y},\textbf{x}) & = & \beta \cdot \sum\nolimits_{i=1}^{|\textbf{x}|} \log (\min(\sum\nolimits_{j}^{|\textbf{y}|} a_{ij}, 1))) \nonumber
\end{eqnarray}
\vspace{-0.5em} \tikzstyle{rnnnode} = [rounded corners=1pt,minimum height=0.5\base,minimum width=1\base,draw,inner sep=0pt,outer sep=0pt]
\begin{itemize} \tikzstyle{wordnode} = [font=\tiny]
\item lp会惩罚译文过短的结果(长度惩罚);cp会惩罚把某些源语单词对应到很多目标语单词的情况(覆盖度),被覆盖的程度用$\sum\nolimits_{j}^{|\textbf{y}|} a_{ij}$度量;$\alpha$$\beta$是超参,需要经验性设置
\end{itemize} % RNN translation model
\end{itemize} \begin{scope}[local bounding box=RNNMT]
% RNN Encoder
\end{frame} \node[rnnnode,minimum height=0.5\base,fill=green!30!white,anchor=west] (eemb1) at (0,0) {\tiny{$e_x()$}};
\node[rnnnode,fill=blue!30!white,anchor=south] (enc1) at ([yshift=0.3\base]eemb1.north) {};
\node[rnnnode,fill=purple!30!white] (enclabel1) at (enc1) {\tiny{$\textbf{h}_{m}$}};
\node[wordnode,left=0.4\base of enc1] (init1) {$\cdots$};
\node[wordnode,left=0.4\base of eemb1] (init2) {$\cdots$};
\node[wordnode,below=0pt of eemb1] () {$\langle$eos$\rangle$};
% RNN Decoder
\node[rnnnode,minimum height=0.5\base,fill=green!30!white,anchor=south] (demb1) at ([yshift=\base]enc1.north) {\tiny{$e_y()$}};
\node[rnnnode,fill=blue!30!white,anchor=south] (dec1) at ([yshift=0.3\base]demb1.north) {{\tiny{$\textbf{s}_1$}}};
\node[rnnnode,minimum height=0.5\base,fill=red!30!white,anchor=south] (softmax1) at ([yshift=0.3\base]dec1.north) {\tiny{Softmax}};
\node[wordnode,right=0.4\base of demb1] (end1) {$\cdots$};
\node[wordnode,right=0.4\base of dec1] (end2) {$\cdots$};
\node[wordnode,right=0.4\base of softmax1] (end3) {$\cdots$};
% Decoder input words
\node[wordnode,below=0pt of demb1] (decwordin) {$\langle$sos$\rangle$};
% Decoder output words
\node[wordnode,above=0pt of softmax1] (decwordout) {Do};
% Connections
\draw[-latex'] (init1.east) to (enc1.west);
\draw[-latex'] (dec1.east) to (end2.west);
\draw[-latex'] (eemb1) to (enc1);
\draw[-latex'] (demb1) to (dec1);
\draw[-latex'] (dec1.north) to (softmax1.south);
\coordinate (bridge) at ([yshift=0.4\base]enc1.north west);
\draw[-latex'] (enc1.north) to [out=90,in=0] (bridge) to [out=180,in=180] (dec1.west);
\end{scope}
\begin{scope}
\coordinate (start) at (7\base,0);
\node [anchor=center,minimum width=6.7em,minimum height=1.8em,draw,rounded corners=0.3em] (hidden) at (start) {};
\node [anchor=west,minimum width=1.5em,minimum size=1.5em,fill=ugreen!20] (cell01) at ([xshift=0.2em]hidden.west) {\footnotesize{.2}};
\node [anchor=west,minimum width=1.5em,minimum size=1.5em,fill=ugreen!10] (cell02) at (cell01.east) {\footnotesize{-1}};
\node [anchor=west,minimum width=1.5em,minimum size=1.5em,fill=white] (cell03) at (cell02.east) {\footnotesize{$\cdots$}};
\node [anchor=west,minimum width=1.5em,minimum size=1.5em,fill=ugreen!50] (cell04) at (cell03.east) {\footnotesize{5}};
\node [anchor=south,minimum width=12.9em,minimum height=1.8em,draw,rounded corners=0.3em] (target) at ([yshift=3em]hidden.north) {};
\node [anchor=west,minimum width=1.5em,minimum size=1.5em,fill=ugreen!10] (cell11) at ([xshift=0.2em]target.west) {\footnotesize{-2}};
\node [anchor=west,minimum width=1.5em,minimum size=1.5em,fill=ugreen!10] (cell12) at (cell11.east) {\footnotesize{-1}};
\node [anchor=west,minimum width=1.5em,minimum size=1.5em,fill=ugreen!70] (cell13) at (cell12.east) {\footnotesize{6}};
\node [anchor=west,minimum width=1.5em,minimum size=1.5em,fill=white] (cell14) at (cell13.east) {\footnotesize{$\cdots$}};
\node [anchor=west,minimum width=1.5em,minimum size=1.5em,fill=ugreen!30] (cell15) at (cell14.east) {\footnotesize{.7}};
\node [anchor=west,minimum width=1.5em,minimum size=1.5em,fill=ugreen!10] (cell16) at (cell15.east) {\footnotesize{-3}};
\node [anchor=west,minimum width=1.5em,minimum size=1.5em,fill=ugreen!10] (cell17) at (cell16.east) {\footnotesize{-1}};
\node [anchor=west,minimum width=1.5em,minimum size=1.5em,fill=ugreen!20] (cell18) at (cell17.east) {\footnotesize{.2}};
\filldraw [fill=red!20,draw=white] (target.south west) -- (target.south east) -- ([xshift=-0.2em,yshift=0.1em]hidden.north east) -- ([xshift=0.2em,yshift=0.1em]hidden.north west);
\draw [->,thick] ([xshift=0.2em,yshift=0.1em]hidden.north west) -- (target.south west);
\draw [->,thick] ([xshift=-0.2em,yshift=0.1em]hidden.north east) -- (target.south east);
\node [anchor=south] () at ([yshift=0.9em]hidden.north) {$y=Wx$};
\end{scope}
\end{tikzpicture}
\end{center}
\end{frame}
%%%------------------------------------------------------------------------------------------------------------ %%%------------------------------------------------------------------------------------------------------------
\section{Transformer} \section{Transformer}
......
Markdown 格式
0%
您添加了 0 到此讨论。请谨慎行事。
请先完成此评论的编辑!
注册 或者 后发表评论