Commit 822f6186 by zengxin

合并分支 'zengxin' 到 'caorunzhe'

slide 1 2 4 5 6

查看合并请求 !271
parents 67b18d5d a784b416
...@@ -104,7 +104,7 @@ ...@@ -104,7 +104,7 @@
%\visible<3-> %\visible<3->
{ {
% coverage score formula node % coverage score formula node
\node [anchor=north west] (formula) at ([xshift=-0.3\hnode,yshift=-1.5\hnode]attn11.south) {\small{不同$\textbf{C}_i$所对应的源语言词的权重是不同的}}; \node [anchor=north west] (formula) at ([xshift=-0.3\hnode,yshift=-1.5\hnode]attn11.south) {\small{不同$\textbf{C}_j$所对应的源语言词的权重是不同的}};
\node [anchor=north west] (example) at (formula.south west) {\footnotesize{$\textbf{C}_2=0.4 \times \textbf{h}(\textrm{``你''}) + 0.4 \times \textbf{h}(\textrm{``什么''}) +$}}; \node [anchor=north west] (example) at (formula.south west) {\footnotesize{$\textbf{C}_2=0.4 \times \textbf{h}(\textrm{``你''}) + 0.4 \times \textbf{h}(\textrm{``什么''}) +$}};
\node [anchor=north west] (example2) at ([yshift=0.4em]example.south west) {\footnotesize{$\ \ \ \ \ \ \ \ 0 \times \textbf{h}(\textrm{``都''}) + 0.1 \times \textbf{h}(\textrm{`` 没''}) + ..$}}; \node [anchor=north west] (example2) at ([yshift=0.4em]example.south west) {\footnotesize{$\ \ \ \ \ \ \ \ 0 \times \textbf{h}(\textrm{``都''}) + 0.1 \times \textbf{h}(\textrm{`` 没''}) + ..$}};
} }
......
...@@ -304,8 +304,8 @@ ...@@ -304,8 +304,8 @@
\visible<3->{ \visible<3->{
\begin{center} \begin{center}
\begin{tikzpicture} \begin{tikzpicture}
\node [anchor=south west, fill=red, minimum width=1.5cm, minimum height=2.3cm] (mt) at (1,0) {{\color{white} \textbf{机器}}}; \node [anchor=south west, fill=red!50, minimum width=1.5cm, minimum height=2.3cm] (mt) at (1,0) {{\color{white} \textbf{机器}}};
\node [anchor=south west, fill=ugreen, minimum width=1.5cm, minimum height=2.7cm] (human) at ([xshift=0.5cm]mt.south east) {{\color{white} \textbf{}}}; \node [anchor=south west, fill=blue!50, minimum width=1.5cm, minimum height=2.7cm] (human) at ([xshift=0.5cm]mt.south east) {{\color{white} \textbf{}}};
\node [anchor=south] (mtscore) at (mt.north) {3.9}; \node [anchor=south] (mtscore) at (mt.north) {3.9};
\node [anchor=south] (humanscore) at (human.north) {4.7}; \node [anchor=south] (humanscore) at (human.north) {4.7};
\draw [->,thick] ([xshift=-0.5cm]mt.south west) -- ([xshift=0.5cm]human.south east); \draw [->,thick] ([xshift=-0.5cm]mt.south west) -- ([xshift=0.5cm]human.south east);
...@@ -321,8 +321,8 @@ ...@@ -321,8 +321,8 @@
\visible<4->{ \visible<4->{
\begin{center} \begin{center}
\begin{tikzpicture} \begin{tikzpicture}
\node [anchor=south west, fill=red, minimum width=1.5cm, minimum height=1.5cm] (mt) at (1,0) {{\color{white} \textbf{机器}}}; \node [anchor=south west, fill=red!50, minimum width=1.5cm, minimum height=1.5cm] (mt) at (1,0) {{\color{white} \textbf{机器}}};
\node [anchor=south west, fill=ugreen, minimum width=1.5cm, minimum height=2.7cm] (human) at ([xshift=0.5cm]mt.south east) {{\color{white} \textbf{}}}; \node [anchor=south west, fill=blue!50, minimum width=1.5cm, minimum height=2.7cm] (human) at ([xshift=0.5cm]mt.south east) {{\color{white} \textbf{}}};
\node [anchor=south] (mtscore) at (mt.north) {47\%}; \node [anchor=south] (mtscore) at (mt.north) {47\%};
\node [anchor=south] (humanscore) at (human.north) {100\%}; \node [anchor=south] (humanscore) at (human.north) {100\%};
\draw [->,thick] ([xshift=-0.5cm]mt.south west) -- ([xshift=0.5cm]human.south east); \draw [->,thick] ([xshift=-0.5cm]mt.south west) -- ([xshift=0.5cm]human.south east);
......
...@@ -3706,8 +3706,8 @@ d = r_1 \circ r_2 \circ r_3 \circ r_4 ...@@ -3706,8 +3706,8 @@ d = r_1 \circ r_2 \circ r_3 \circ r_4
\subsection{基于chart的解码} \subsection{基于chart的解码}
%%%------------------------------------------------------------------------------------------------------------ %%%------------------------------------------------------------------------------------------------------------
%%% CYK解码 %%% CKY解码
\begin{frame}{CYK解码} \begin{frame}{CKY解码}
% 看NiuTrans Manual % 看NiuTrans Manual
\begin{itemize} \begin{itemize}
\item 基于层次短语的翻译解码与基于短语的模型类似,都是要找到使$\textrm{score}(d)$达到最大的翻译推导$d$ \item 基于层次短语的翻译解码与基于短语的模型类似,都是要找到使$\textrm{score}(d)$达到最大的翻译推导$d$
...@@ -3717,8 +3717,8 @@ d = r_1 \circ r_2 \circ r_3 \circ r_4 ...@@ -3717,8 +3717,8 @@ d = r_1 \circ r_2 \circ r_3 \circ r_4
\end{displaymath} \end{displaymath}
\vspace{-0.8em} \vspace{-0.8em}
\begin{itemize} \begin{itemize}
\item 由于翻译推导由SCFG构成,使用CYK算法进行解码 \item 由于翻译推导由SCFG构成,使用CKY算法进行解码
\item CYK算法解码是一个用来判定任意给定的字符串 是否属于一个上下文无关文法的算法,具体流程如下 \item CKY算法解码是一个用来判定任意给定的字符串 是否属于一个上下文无关文法的算法,具体流程如下
\end{itemize} \end{itemize}
\vspace{0.5em} \vspace{0.5em}
\begin{center} \begin{center}
...@@ -3740,16 +3740,16 @@ d = r_1 \circ r_2 \circ r_3 \circ r_4 ...@@ -3740,16 +3740,16 @@ d = r_1 \circ r_2 \circ r_3 \circ r_4
\end{tikzpicture} \end{tikzpicture}
\end{center} \end{center}
\vspace{0.3em} \vspace{0.3em}
%\item 由于对文法中的非终结符进行了限制,可以直接使用CYK算法进行解码,无需转换成乔姆斯基范式 %\item 由于对文法中的非终结符进行了限制,可以直接使用CKY算法进行解码,无需转换成乔姆斯基范式
\end{itemize} \end{itemize}
\end{frame} \end{frame}
%%%------------------------------------------------------------------------------------------------------------ %%%------------------------------------------------------------------------------------------------------------
%%% CYK解码 %%% CKY解码
\begin{frame}{CYK算法} \begin{frame}{CKY算法}
% 看NiuTrans Manual % 看NiuTrans Manual
\begin{itemize} \begin{itemize}
\item CYK算法通过遍历不同\alert{span}来判断字符串是否符合文法 \item CKY算法通过遍历不同\alert{span}来判断字符串是否符合文法
\begin{itemize} \begin{itemize}
\item 输入:源语串\textbf{s =} $s_1 ... s_J$,以及CNF文法$G$ \item 输入:源语串\textbf{s =} $s_1 ... s_J$,以及CNF文法$G$
\item 输出:判断字符串是否符合G \item 输出:判断字符串是否符合G
...@@ -3762,7 +3762,7 @@ d = r_1 \circ r_2 \circ r_3 \circ r_4 ...@@ -3762,7 +3762,7 @@ d = r_1 \circ r_2 \circ r_3 \circ r_4
\tikzstyle{srcnode} = [anchor=south west] \tikzstyle{srcnode} = [anchor=south west]
\begin{scope}[scale=0.85] \begin{scope}[scale=0.85]
\node[srcnode] (c1) at (0,0) {\small{\textbf{Function} CYK-Algorithm($\textbf{s},G$)}}; \node[srcnode] (c1) at (0,0) {\small{\textbf{Function} CKY-Algorithm($\textbf{s},G$)}};
\node[srcnode,anchor=north west] (c21) at ([xshift=1.5em,yshift=0.4em]c1.south west) {\small{\textbf{fore} $j=0$ to $ J - 1$}}; \node[srcnode,anchor=north west] (c21) at ([xshift=1.5em,yshift=0.4em]c1.south west) {\small{\textbf{fore} $j=0$ to $ J - 1$}};
\node[srcnode,anchor=north west] (c22) at ([xshift=1.5em,yshift=0.4em]c21.south west) {\small{$span[j,j+1 ]$.Add($A \to a \in G$)}}; \node[srcnode,anchor=north west] (c22) at ([xshift=1.5em,yshift=0.4em]c21.south west) {\small{$span[j,j+1 ]$.Add($A \to a \in G$)}};
\node[srcnode,anchor=north west] (c3) at ([xshift=-1.5em,yshift=0.4em]c22.south west) {\small{\textbf{for} $l$ = 1 to $J$}}; \node[srcnode,anchor=north west] (c3) at ([xshift=-1.5em,yshift=0.4em]c22.south west) {\small{\textbf{for} $l$ = 1 to $J$}};
...@@ -3810,11 +3810,11 @@ d = r_1 \circ r_2 \circ r_3 \circ r_4 ...@@ -3810,11 +3810,11 @@ d = r_1 \circ r_2 \circ r_3 \circ r_4
\end{frame} \end{frame}
%%%------------------------------------------------------------------------------------------------------------ %%%------------------------------------------------------------------------------------------------------------
%%% CYK解码 %%% CKY解码
\begin{frame}{CYK算法} \begin{frame}{CKY算法}
% 看NiuTrans Manual % 看NiuTrans Manual
\begin{itemize} \begin{itemize}
\item 我们来看一个CYK算法的具体例子,给定一个上下无关文法以及一个单词\alert{aabbc},来判断该单词是否属于此文法,解析流程如下 \item 我们来看一个CKY算法的具体例子,给定一个上下无关文法以及一个单词\alert{aabbc},来判断该单词是否属于此文法,解析流程如下
\vspace{-0.3em} \vspace{-0.3em}
\begin{center} \begin{center}
\begin{tikzpicture} \begin{tikzpicture}
...@@ -3946,11 +3946,11 @@ d = r_1 \circ r_2 \circ r_3 \circ r_4 ...@@ -3946,11 +3946,11 @@ d = r_1 \circ r_2 \circ r_3 \circ r_4
\end{frame} \end{frame}
%%%------------------------------------------------------------------------------------------------------------ %%%------------------------------------------------------------------------------------------------------------
%%% CYK解码 %%% CKY解码
\begin{frame}{CYK解码(续)} \begin{frame}{CKY解码(续)}
% 看NiuTrans Manual % 看NiuTrans Manual
\begin{itemize} \begin{itemize}
\item 实际上,在层次短语解码的时候,不能直接使用CYK算法,需要先转化为乔姆斯基范式,才能进行解码 \item 实际上,在层次短语解码的时候,不能直接使用CKY算法,需要先转化为乔姆斯基范式,才能进行解码
\begin{itemize} \begin{itemize}
\item<2-> 对于每个源语句子,使用短语规则表初始化它的span \item<2-> 对于每个源语句子,使用短语规则表初始化它的span
\item<3-> 自底向上对span中的每个子span进行重新组合(正、反向) \item<3-> 自底向上对span中的每个子span进行重新组合(正、反向)
...@@ -4166,7 +4166,7 @@ d = r_1 \circ r_2 \circ r_3 \circ r_4 ...@@ -4166,7 +4166,7 @@ d = r_1 \circ r_2 \circ r_3 \circ r_4
% 实验结果 % 实验结果
\begin{itemize} \begin{itemize}
\item 从实验结果中可以看出,基于层次短语的翻译模型性能要优于基于短语的翻译模型 \item 从实验结果中可以看出,基于层次短语的翻译模型性能要优于基于短语的翻译模型
\item 选择使用层次短语信息实际上增加了模型的复杂度,但是可以通过借鉴基于短语的翻译模型模型以及CYK解码和立方剪枝等技术来解决 \item 选择使用层次短语信息实际上增加了模型的复杂度,但是可以通过借鉴基于短语的翻译模型模型以及CKY解码和立方剪枝等技术来解决
\item 可以考虑加入更多句法信息来进一步提升模型性能 \item 可以考虑加入更多句法信息来进一步提升模型性能
\end{itemize} \end{itemize}
%\vspace{-1em} %\vspace{-1em}
...@@ -6785,7 +6785,7 @@ NP-BAR(NN$_1$ NP-BAR$_2$) $\to$ NN$_1$ NP-BAR$_2$ ...@@ -6785,7 +6785,7 @@ NP-BAR(NN$_1$ NP-BAR$_2$) $\to$ NN$_1$ NP-BAR$_2$
搜索空间 & 与输入的源语句法树 & 所有推导$D$ \\ 搜索空间 & 与输入的源语句法树 & 所有推导$D$ \\
& 兼容的推导$D_{\textrm{tree}}$ & \\ \hline & 兼容的推导$D_{\textrm{tree}}$ & \\ \hline
适用模型 & 树到串、树到树 & 所有句法模型 \\ \hline 适用模型 & 树到串、树到树 & 所有句法模型 \\ \hline
解码算法 & chart解码 & CYK + 规则二叉化 \\ \hline 解码算法 & chart解码 & CKY + 规则二叉化 \\ \hline
速度 && 一般较慢 速度 && 一般较慢
\end{tabular} \end{tabular}
...@@ -7358,7 +7358,7 @@ NP-BAR(NN$_1$ NP-BAR$_2$) $\to$ NN$_1$ NP-BAR$_2$ ...@@ -7358,7 +7358,7 @@ NP-BAR(NN$_1$ NP-BAR$_2$) $\to$ NN$_1$ NP-BAR$_2$
\end{frame} \end{frame}
%%%------------------------------------------------------------------------------------------------------------ %%%------------------------------------------------------------------------------------------------------------
%%% 基于串的解码 - CYK + 规则二叉化 %%% 基于串的解码 - CKY + 规则二叉化
\begin{frame}{基于串的解码 - CKY + 规则二叉化} \begin{frame}{基于串的解码 - CKY + 规则二叉化}
\begin{itemize} \begin{itemize}
......
...@@ -5031,6 +5031,10 @@ GPT-2 (Transformer) & Radford et al. & 2019 & 35.7 ...@@ -5031,6 +5031,10 @@ GPT-2 (Transformer) & Radford et al. & 2019 & 35.7
\node [anchor=west,draw,inner sep=4pt,fill=ugreen!20!white,minimum width=2em] (e2) at ([xshift=1em]e1.east) {\scriptsize{$\textbf{e}_2$}}; \node [anchor=west,draw,inner sep=4pt,fill=ugreen!20!white,minimum width=2em] (e2) at ([xshift=1em]e1.east) {\scriptsize{$\textbf{e}_2$}};
\node [anchor=west,inner sep=4pt] (sep5) at ([xshift=1em]e2.east) {\scriptsize{...}}; \node [anchor=west,inner sep=4pt] (sep5) at ([xshift=1em]e2.east) {\scriptsize{...}};
\node [anchor=west,draw,inner sep=4pt,fill=ugreen!20!white,minimum width=2em] (e3) at ([xshift=1em]sep5.east) {\scriptsize{$\textbf{e}_m$}}; \node [anchor=west,draw,inner sep=4pt,fill=ugreen!20!white,minimum width=2em] (e3) at ([xshift=1em]sep5.east) {\scriptsize{$\textbf{e}_m$}};
\node [anchor=south] (word1) at ([yshift=-1.5em]e1.south) {\footnotesize {Once}};
\node [anchor=south] (word2) at ([yshift=-1.6em]e2.south) {\footnotesize {upon}};
\node [anchor=south] (wordseq) at ([yshift=-1.5em]sep5.south) {\footnotesize{...}};
\node [anchor=south] (word3) at ([yshift=-1.5em]e3.south) {\footnotesize {island}};
\node [anchor=south,draw,inner sep=4pt,fill=yellow!30,minimum width=2em] (t1) at ([xshift=-2em,yshift=1em]Lstm5.north) {\scriptsize{$\textbf{h}_1$}}; \node [anchor=south,draw,inner sep=4pt,fill=yellow!30,minimum width=2em] (t1) at ([xshift=-2em,yshift=1em]Lstm5.north) {\scriptsize{$\textbf{h}_1$}};
\node [anchor=west,draw,inner sep=4pt,fill=yellow!30,minimum width=2em] (t2) at ([xshift=1em]t1.east) {\scriptsize{$\textbf{h}_2$}}; \node [anchor=west,draw,inner sep=4pt,fill=yellow!30,minimum width=2em] (t2) at ([xshift=1em]t1.east) {\scriptsize{$\textbf{h}_2$}};
...@@ -5130,6 +5134,12 @@ GPT-2 (Transformer) & Radford et al. & 2019 & 35.7 ...@@ -5130,6 +5134,12 @@ GPT-2 (Transformer) & Radford et al. & 2019 & 35.7
\node [anchor=north,draw,inner sep=4pt,fill=ugreen!20!white,minimum width=2em] (e4) at ([yshift=-1em]Trm3.south) {\scriptsize{$\textbf{e}_4$}}; \node [anchor=north,draw,inner sep=4pt,fill=ugreen!20!white,minimum width=2em] (e4) at ([yshift=-1em]Trm3.south) {\scriptsize{$\textbf{e}_4$}};
\node [anchor=north,inner sep=4pt] (sep5) at ([yshift=-1em]sep.south) {\scriptsize{...}}; \node [anchor=north,inner sep=4pt] (sep5) at ([yshift=-1em]sep.south) {\scriptsize{...}};
\node [anchor=north,draw,inner sep=4pt,fill=ugreen!20!white,minimum width=2em] (e5) at ([yshift=-1em]Trm4.south) {\scriptsize{$\textbf{e}_m$}}; \node [anchor=north,draw,inner sep=4pt,fill=ugreen!20!white,minimum width=2em] (e5) at ([yshift=-1em]Trm4.south) {\scriptsize{$\textbf{e}_m$}};
\node [anchor=south] (word1) at ([yshift=-1.5em]e1.south) {\footnotesize {Once}};
\node [anchor=south] (word2) at ([yshift=-1.6em]e2.south) {\footnotesize {upon}};
\node [anchor=south] (word3) at ([yshift=-1.5em]e3.south) {\footnotesize {a}};
\node [anchor=south] (word4) at ([yshift=-1.5em]e4.south) {\footnotesize {time}};
\node [anchor=south] (wordseq) at ([yshift=-2.0em]sep5.south) {\footnotesize{...}};
\node [anchor=south] (word4) at ([yshift=-1.5em]e5.south) {\footnotesize {island}};
\node [anchor=south,draw,inner sep=4pt,fill=yellow!30,minimum width=2em] (t1) at ([yshift=1em]Trm5.north) {\scriptsize{$\textbf{h}_1$}}; \node [anchor=south,draw,inner sep=4pt,fill=yellow!30,minimum width=2em] (t1) at ([yshift=1em]Trm5.north) {\scriptsize{$\textbf{h}_1$}};
\node [anchor=south,draw,inner sep=4pt,fill=yellow!30,minimum width=2em] (t2) at ([yshift=1em]Trm6.north) {\scriptsize{$\textbf{h}_2$}}; \node [anchor=south,draw,inner sep=4pt,fill=yellow!30,minimum width=2em] (t2) at ([yshift=1em]Trm6.north) {\scriptsize{$\textbf{h}_2$}};
...@@ -5214,6 +5224,12 @@ GPT-2 (Transformer) & Radford et al. & 2019 & 35.7 ...@@ -5214,6 +5224,12 @@ GPT-2 (Transformer) & Radford et al. & 2019 & 35.7
\node [anchor=north,draw,inner sep=4pt,fill=ugreen!20!white,minimum width=2em] (e4) at ([yshift=-1em]Trm3.south) {\scriptsize{$\textbf{e}_4$}}; \node [anchor=north,draw,inner sep=4pt,fill=ugreen!20!white,minimum width=2em] (e4) at ([yshift=-1em]Trm3.south) {\scriptsize{$\textbf{e}_4$}};
\node [anchor=north,inner sep=4pt] (sep5) at ([yshift=-1em]sep.south) {\scriptsize{...}}; \node [anchor=north,inner sep=4pt] (sep5) at ([yshift=-1em]sep.south) {\scriptsize{...}};
\node [anchor=north,draw,inner sep=4pt,fill=ugreen!20!white,minimum width=2em] (e5) at ([yshift=-1em]Trm4.south) {\scriptsize{$\textbf{e}_m$}}; \node [anchor=north,draw,inner sep=4pt,fill=ugreen!20!white,minimum width=2em] (e5) at ([yshift=-1em]Trm4.south) {\scriptsize{$\textbf{e}_m$}};
\node [anchor=south] (word1) at ([yshift=-1.5em]e1.south) {\footnotesize {Once}};
\node [anchor=south] (word2) at ([yshift=-1.7em]e2.south) {\footnotesize {[MASK]}};
\node [anchor=south] (word3) at ([yshift=-1.5em]e3.south) {\footnotesize {a}};
\node [anchor=south] (word4) at ([yshift=-1.5em]e4.south) {\footnotesize {time}};
\node [anchor=south] (wordseq) at ([yshift=-2.0em]sep5.south) {\footnotesize{...}};
\node [anchor=south] (word4) at ([yshift=-1.5em]e5.south) {\footnotesize {island}};
\node [anchor=south,draw,inner sep=4pt,fill=yellow!30,minimum width=2em] (t1) at ([yshift=1em]Trm5.north) {\scriptsize{$\textbf{h}_1$}}; \node [anchor=south,draw,inner sep=4pt,fill=yellow!30,minimum width=2em] (t1) at ([yshift=1em]Trm5.north) {\scriptsize{$\textbf{h}_1$}};
\node [anchor=south,draw,inner sep=4pt,fill=yellow!30,minimum width=2em] (t2) at ([yshift=1em]Trm6.north) {\scriptsize{$\textbf{h}_2$}}; \node [anchor=south,draw,inner sep=4pt,fill=yellow!30,minimum width=2em] (t2) at ([yshift=1em]Trm6.north) {\scriptsize{$\textbf{h}_2$}};
......
...@@ -520,12 +520,12 @@ NLP问题的隐含结构假设 & 无隐含结构假设,端到端学习 \\ ...@@ -520,12 +520,12 @@ NLP问题的隐含结构假设 & 无隐含结构假设,端到端学习 \\
\node[rnnnode,fill=blue!30!white,right=\base of rnn3] (rnn4) {}; \node[rnnnode,fill=blue!30!white,right=\base of rnn3] (rnn4) {};
\node[rnnnode,fill=green!30!white,below=\base of rnn4] (emb4) {}; \node[rnnnode,fill=green!30!white,below=\base of rnn4] (emb4) {};
\node[wordnode,below=0pt of emb4] (word4) {EOS}; \node[wordnode,below=0pt of emb4] (word4) {$\langle$eos$\rangle$};
\draw[-latex'] (emb4.north) to (rnn4.south); \draw[-latex'] (emb4.north) to (rnn4.south);
\draw[-latex'] (rnn3.east) to (rnn4.west); \draw[-latex'] (rnn3.east) to (rnn4.west);
} }
\visible<4->{ \visible<4->{
\draw[decoration={mirror,brace},decorate] (word1.south west) to node [auto,anchor=north,align=center] {编码器} ([yshift=-0.2em]word4.south east); \draw[decoration={mirror,brace},decorate] ([yshift=-0.2em]word1.south west) to node [auto,anchor=north,align=center] {编码器} ([yshift=-0.2em]word4.south east);
} }
\visible<5->{ \visible<5->{
\node[rnnnode,fill=purple] (repr) at (rnn4) {}; \node[rnnnode,fill=purple] (repr) at (rnn4) {};
...@@ -535,7 +535,7 @@ NLP问题的隐含结构假设 & 无隐含结构假设,端到端学习 \\ ...@@ -535,7 +535,7 @@ NLP问题的隐含结构假设 & 无隐含结构假设,端到端学习 \\
\visible<6->{ \visible<6->{
\node[rnnnode,fill=blue!30!white,right=\base of rnn4] (rnn5) {}; \node[rnnnode,fill=blue!30!white,right=\base of rnn4] (rnn5) {};
\node[rnnnode,fill=green!30!white,below=\base of rnn5] (emb5) {}; \node[rnnnode,fill=green!30!white,below=\base of rnn5] (emb5) {};
\node[wordnode,below=0pt of emb5] (word5) {SOS}; \node[wordnode,below=0pt of emb5] (word5) {$\langle$sos$\rangle$};
\draw[-latex'] (emb5.north) to (rnn5.south); \draw[-latex'] (emb5.north) to (rnn5.south);
\draw[-latex'] (rnn4.east) to (rnn5.west); \draw[-latex'] (rnn4.east) to (rnn5.west);
\node[rnnnode,fill=red!30!white,above=\base of rnn5] (softmax1) {}; \node[rnnnode,fill=red!30!white,above=\base of rnn5] (softmax1) {};
...@@ -578,7 +578,7 @@ NLP问题的隐含结构假设 & 无隐含结构假设,端到端学习 \\ ...@@ -578,7 +578,7 @@ NLP问题的隐含结构假设 & 无隐含结构假设,端到端学习 \\
\node[wordnode,anchor=base] (word8) at (\XCoord,\YCoord) {fine}; \node[wordnode,anchor=base] (word8) at (\XCoord,\YCoord) {fine};
\ExtractX{$(emb8)$} \ExtractX{$(emb8)$}
\ExtractY{$(out1.base)$} \ExtractY{$(out1.base)$}
\node[wordnode,anchor=base] (out4) at (\XCoord,\YCoord) {EOS}; \node[wordnode,anchor=base] (out4) at (\XCoord,\YCoord) {$\langle$eos$\rangle$};
\draw[-latex'] (emb8.north) to (rnn8.south); \draw[-latex'] (emb8.north) to (rnn8.south);
\draw[-latex'] (rnn7.east) to (rnn8.west); \draw[-latex'] (rnn7.east) to (rnn8.west);
\draw[-latex'] (rnn8.north) to (softmax4.south); \draw[-latex'] (rnn8.north) to (softmax4.south);
...@@ -720,7 +720,7 @@ NLP问题的隐含结构假设 & 无隐含结构假设,端到端学习 \\ ...@@ -720,7 +720,7 @@ NLP问题的隐含结构假设 & 无隐含结构假设,端到端学习 \\
\node [anchor=north,rnnnode,fill=blue!30!white] (e2) at ([yshift=-1em]node12.south) {\tiny{}}; \node [anchor=north,rnnnode,fill=blue!30!white] (e2) at ([yshift=-1em]node12.south) {\tiny{}};
\node [anchor=north,rnnnode,fill=blue!30!white] (e3) at ([yshift=-1em]node13.south) {\tiny{}}; \node [anchor=north,rnnnode,fill=blue!30!white] (e3) at ([yshift=-1em]node13.south) {\tiny{}};
\node [anchor=north,rnnnode,fill=blue!30!white] (e4) at ([yshift=-1em]node14.south) {\tiny{}}; \node [anchor=north,rnnnode,fill=blue!30!white] (e4) at ([yshift=-1em]node14.south) {\tiny{}};
\node [anchor=north,inner sep=2pt] (w1) at ([yshift=-1em]e1.south) {\tiny{$<$s$>$}}; \node [anchor=north,inner sep=2pt] (w1) at ([yshift=-1em]e1.south) {\tiny{$<$sos$>$}};
\node [anchor=north,inner sep=2pt] (w2) at ([yshift=-1em]e2.south) {\tiny{}}; \node [anchor=north,inner sep=2pt] (w2) at ([yshift=-1em]e2.south) {\tiny{}};
\node [anchor=north,inner sep=2pt] (w3) at ([yshift=-1em]e3.south) {\tiny{我们}}; \node [anchor=north,inner sep=2pt] (w3) at ([yshift=-1em]e3.south) {\tiny{我们}};
\node [anchor=north,inner sep=2pt] (w4) at ([yshift=-1em]e4.south) {\tiny{开始}}; \node [anchor=north,inner sep=2pt] (w4) at ([yshift=-1em]e4.south) {\tiny{开始}};
...@@ -1072,10 +1072,10 @@ NLP问题的隐含结构假设 & 无隐含结构假设,端到端学习 \\ ...@@ -1072,10 +1072,10 @@ NLP问题的隐含结构假设 & 无隐含结构假设,端到端学习 \\
\draw[-latex'] (enc3.north) .. controls +(north:0.3\base) and +(east:\base) .. (bridge) .. controls +(west:2.7\base) and +(west:0.3\base) .. (dec1.west); \draw[-latex'] (enc3.north) .. controls +(north:0.3\base) and +(east:\base) .. (bridge) .. controls +(west:2.7\base) and +(west:0.3\base) .. (dec1.west);
\visible<2->{ \visible<2->{
\node [anchor=east] (line1) at ([xshift=-3em,yshift=0.5em]softmax1.west) {\scriptsize{基于RNN的隐层状态$\textbf{s}_i$}}; \node [anchor=east] (line1) at ([xshift=-3em,yshift=0.5em]softmax1.west) {\scriptsize{基于RNN的隐层状态$\textbf{s}_j$}};
\node [anchor=north west] (line2) at ([yshift=0.3em]line1.south west) {\scriptsize{预测目标词的概率}}; \node [anchor=north west] (line2) at ([yshift=0.3em]line1.south west) {\scriptsize{预测目标词的概率}};
\node [anchor=north west] (line3) at ([yshift=0.3em]line2.south west) {\scriptsize{通常,用Softmax函数}}; \node [anchor=north west] (line3) at ([yshift=0.3em]line2.south west) {\scriptsize{通常,用Softmax函数}};
\node [anchor=north west] (line4) at ([yshift=0.3em]line3.south west) {\scriptsize{实现 $\textrm{P}(y_i|...)$}}; \node [anchor=north west] (line4) at ([yshift=0.3em]line3.south west) {\scriptsize{实现 $\textrm{P}(y_j|...)$}};
} }
\visible<3->{ \visible<3->{
...@@ -1833,7 +1833,7 @@ NLP问题的隐含结构假设 & 无隐含结构假设,端到端学习 \\ ...@@ -1833,7 +1833,7 @@ NLP问题的隐含结构假设 & 无隐含结构假设,端到端学习 \\
\node[rnnnode,minimum height=0.5\base,fill=red!30!white,anchor=south] (softmax\x) at ([yshift=0.5\base]dec\x.north) {}; \node[rnnnode,minimum height=0.5\base,fill=red!30!white,anchor=south] (softmax\x) at ([yshift=0.5\base]dec\x.north) {};
% Decoder input words % Decoder input words
\node[wordnode,below=0pt of demb1] (decwordin) {EOS}; \node[wordnode,below=0pt of demb1] (decwordin) {$\langle$sos$\rangle$};
\ExtractX{$(demb2.south)$} \ExtractX{$(demb2.south)$}
\ExtractY{$(decwordin.base)$} \ExtractY{$(decwordin.base)$}
\node[wordnode,anchor=base] () at (\XCoord,\YCoord) {Do}; \node[wordnode,anchor=base] () at (\XCoord,\YCoord) {Do};
...@@ -1890,7 +1890,7 @@ NLP问题的隐含结构假设 & 无隐含结构假设,端到端学习 \\ ...@@ -1890,7 +1890,7 @@ NLP问题的隐含结构假设 & 无隐含结构假设,端到端学习 \\
\node[wordnode,anchor=base] () at (\XCoord,\YCoord) {Station}; \node[wordnode,anchor=base] () at (\XCoord,\YCoord) {Station};
\ExtractX{$(softmax10.north)$} \ExtractX{$(softmax10.north)$}
\ExtractY{$(decwordout.base)$} \ExtractY{$(decwordout.base)$}
\node[wordnode,anchor=base] () at (\XCoord,\YCoord) {EOS}; \node[wordnode,anchor=base] () at (\XCoord,\YCoord) {$\langle$eos$\rangle$};
% Connections % Connections
\draw[-latex'] (init.east) to (enc1.west); \draw[-latex'] (init.east) to (enc1.west);
...@@ -1971,7 +1971,7 @@ NLP问题的隐含结构假设 & 无隐含结构假设,端到端学习 \\ ...@@ -1971,7 +1971,7 @@ NLP问题的隐含结构假设 & 无隐含结构假设,端到端学习 \\
\node[wordnode,below=0pt of eemb7] () {怎么}; \node[wordnode,below=0pt of eemb7] () {怎么};
\node[wordnode,below=0pt of eemb8] () {}; \node[wordnode,below=0pt of eemb8] () {};
\node[wordnode,below=0pt of eemb9] () {}; \node[wordnode,below=0pt of eemb9] () {};
\node[wordnode,below=0pt of eemb10] () {EOS}; \node[wordnode,below=0pt of eemb10] () {$\langle$eos$\rangle$};
% RNN Decoder % RNN Decoder
\foreach \x in {1,2,...,10} \foreach \x in {1,2,...,10}
...@@ -2041,7 +2041,7 @@ NLP问题的隐含结构假设 & 无隐含结构假设,端到端学习 \\ ...@@ -2041,7 +2041,7 @@ NLP问题的隐含结构假设 & 无隐含结构假设,端到端学习 \\
\node[wordnode,anchor=base] () at (\XCoord,\YCoord) {Station}; \node[wordnode,anchor=base] () at (\XCoord,\YCoord) {Station};
\ExtractX{$(softmax10.north)$} \ExtractX{$(softmax10.north)$}
\ExtractY{$(decwordout.base)$} \ExtractY{$(decwordout.base)$}
\node[wordnode,anchor=base] () at (\XCoord,\YCoord) {EOS}; \node[wordnode,anchor=base] () at (\XCoord,\YCoord) {$\langle$eos$\rangle$};
% Connections % Connections
\draw[-latex'] (init1.east) to (enc11.west); \draw[-latex'] (init1.east) to (enc11.west);
...@@ -2187,7 +2187,7 @@ NLP问题的隐含结构假设 & 无隐含结构假设,端到端学习 \\ ...@@ -2187,7 +2187,7 @@ NLP问题的隐含结构假设 & 无隐含结构假设,端到端学习 \\
\begin{itemize} \begin{itemize}
\item 在注意力机制中,每个目标语单词的生成会使用一个动态的源语表示,而非一个统一的固定表示 \item 在注意力机制中,每个目标语单词的生成会使用一个动态的源语表示,而非一个统一的固定表示
\begin{itemize} \begin{itemize}
\item 这里$\textbf{C}_i$表示第$i$个目标语单词所使用的源语表示 \item 这里$\textbf{C}_j$表示第$j$个目标语单词所使用的源语表示
\end{itemize} \end{itemize}
\end{itemize} \end{itemize}
...@@ -2286,15 +2286,15 @@ NLP问题的隐含结构假设 & 无隐含结构假设,端到端学习 \\ ...@@ -2286,15 +2286,15 @@ NLP问题的隐含结构假设 & 无隐含结构假设,端到端学习 \\
%%%------------------------------------------------------------------------------------------------------------ %%%------------------------------------------------------------------------------------------------------------
%%% C_i的定义 %%% C_i的定义
\begin{frame}{上下文向量$\textbf{C}_i$} \begin{frame}{上下文向量$\textbf{C}_j$}
\begin{itemize} \begin{itemize}
\item 对于目标语位置$i$$\textbf{C}_i$是目标语$i$使用的上下文向量 \item 对于目标语位置$j$$\textbf{C}_j$是目标语$j$使用的上下文向量
\begin{itemize} \begin{itemize}
\item $\textbf{h}_j$表示编码器第$j$个位置的隐层状态 \item $\textbf{h}_i$表示编码器第$i$个位置的隐层状态
\item $\textbf{s}_i$表示解码器第$i$个位置的隐层状态 \item $\textbf{s}_j$表示解码器第$j$个位置的隐层状态
\item<2-> $\alpha_{i,j}$表示注意力权重,表示目标语第$i$个位置与源语第$j$个位置之间的相关性大小 \item<2-> $\alpha_{i,j}$表示注意力权重,表示目标语第$j$个位置与源语第$i$个位置之间的相关性大小
\item<2-> $a(\cdot)$表示注意力函数,计算$\textbf{s}_{i-1}$$\textbf{h}_j$之间的相关性 \item<2-> $a(\cdot)$表示注意力函数,计算$\textbf{s}_{j-1}$$\textbf{h}_i$之间的相关性
\item<3-> $\textbf{C}_i$是所有源语编码表示$\{\textbf{h}_j\}$的加权求和,权重为$\{\alpha_{i,j}\}$ \item<3-> $\textbf{C}_j$是所有源语编码表示$\{\textbf{h}_i\}$的加权求和,权重为$\{\alpha_{i,j}\}$
\end{itemize} \end{itemize}
\end{itemize} \end{itemize}
...@@ -2306,23 +2306,23 @@ NLP问题的隐含结构假设 & 无隐含结构假设,端到端学习 \\ ...@@ -2306,23 +2306,23 @@ NLP问题的隐含结构假设 & 无隐含结构假设,端到端学习 \\
\node [anchor=west,draw,fill=red!20!white,inner sep=3pt,minimum width=2em,minimum height=1.2em] (h1) at (0,0) {\scriptsize{$\textbf{h}_1$}}; \node [anchor=west,draw,fill=red!20!white,inner sep=3pt,minimum width=2em,minimum height=1.2em] (h1) at (0,0) {\scriptsize{$\textbf{h}_1$}};
\node [anchor=west,draw,fill=red!20!white,inner sep=3pt,minimum width=2em,minimum height=1.2em] (h2) at ([xshift=1em]h1.east) {\scriptsize{$\textbf{h}_2$}}; \node [anchor=west,draw,fill=red!20!white,inner sep=3pt,minimum width=2em,minimum height=1.2em] (h2) at ([xshift=1em]h1.east) {\scriptsize{$\textbf{h}_2$}};
\node [anchor=west,inner sep=0pt,minimum width=3em] (h3) at ([xshift=0.5em]h2.east) {\scriptsize{...}}; \node [anchor=west,inner sep=0pt,minimum width=3em] (h3) at ([xshift=0.5em]h2.east) {\scriptsize{...}};
\node [anchor=west,draw,fill=red!20!white,inner sep=3pt,minimum width=2em,minimum height=1.2em] (h4) at ([xshift=0.5em]h3.east) {\scriptsize{$\textbf{h}_n$}}; \node [anchor=west,draw,fill=red!20!white,inner sep=3pt,minimum width=2em,minimum height=1.2em] (h4) at ([xshift=0.5em]h3.east) {\scriptsize{$\textbf{h}_m$}};
\node [anchor=south,circle,minimum size=1.0em,draw,ublue,thick] (sum) at ([yshift=2em]h2.north east) {}; \node [anchor=south,circle,minimum size=1.0em,draw,ublue,thick] (sum) at ([yshift=2em]h2.north east) {};
\draw [thick,-,ublue] (sum.north) -- (sum.south); \draw [thick,-,ublue] (sum.north) -- (sum.south);
\draw [thick,-,ublue] (sum.west) -- (sum.east); \draw [thick,-,ublue] (sum.west) -- (sum.east);
\node [anchor=south,draw,fill=green!20!white,inner sep=3pt,minimum width=2em,minimum height=1.2em] (th1) at ([yshift=2em,xshift=-1em]sum.north west) {\scriptsize{$\textbf{s}_{i-1}$}}; \node [anchor=south,draw,fill=green!20!white,inner sep=3pt,minimum width=2em,minimum height=1.2em] (th1) at ([yshift=2em,xshift=-1em]sum.north west) {\scriptsize{$\textbf{s}_{j-1}$}};
\node [anchor=west,draw,fill=green!20!white,inner sep=3pt,minimum width=2em,minimum height=1.2em] (th2) at ([xshift=2em]th1.east) {\scriptsize{$\textbf{s}_{i}$}}; \node [anchor=west,draw,fill=green!20!white,inner sep=3pt,minimum width=2em,minimum height=1.2em] (th2) at ([xshift=2em]th1.east) {\scriptsize{$\textbf{s}_{j}$}};
\draw [->] (h1.north) .. controls +(north:0.8) and +(west:1) .. (sum.190) node [pos=0.3,left] {\tiny{$\alpha_{i,1}$}}; \draw [->] (h1.north) .. controls +(north:0.8) and +(west:1) .. (sum.190) node [pos=0.3,left] {\tiny{$\alpha_{1,j}$}};
\draw [->] (h2.north) .. controls +(north:0.6) and +(220:0.2) .. (sum.220) node [pos=0.2,right] {\tiny{$\alpha_{i,2}$}}; \draw [->] (h2.north) .. controls +(north:0.6) and +(220:0.2) .. (sum.220) node [pos=0.2,right] {\tiny{$\alpha_{2,j}$}};
\draw [->] (h4.north) .. controls +(north:0.8) and +(east:1) .. (sum.-10) node [pos=0.1,left] (alphan) {\tiny{$\alpha_{i,n}$}}; \draw [->] (h4.north) .. controls +(north:0.8) and +(east:1) .. (sum.-10) node [pos=0.1,left] (alphan) {\tiny{$\alpha_{m,j}$}};
\draw [->] ([xshift=-1.5em]th1.west) -- ([xshift=-0.1em]th1.west); \draw [->] ([xshift=-1.5em]th1.west) -- ([xshift=-0.1em]th1.west);
\draw [->] ([xshift=0.1em]th1.east) -- ([xshift=-0.1em]th2.west); \draw [->] ([xshift=0.1em]th1.east) -- ([xshift=-0.1em]th2.west);
\draw [->] ([xshift=0.1em]th2.east) -- ([xshift=1.5em]th2.east); \draw [->] ([xshift=0.1em]th2.east) -- ([xshift=1.5em]th2.east);
\draw [->] (sum.north) .. controls +(north:0.8) and +(west:0.2) .. ([yshift=-0.4em,xshift=-0.1em]th2.west) node [pos=0.2,right] (ci) {\scriptsize{$\textbf{C}_{i}$}}; \draw [->] (sum.north) .. controls +(north:0.8) and +(west:0.2) .. ([yshift=-0.4em,xshift=-0.1em]th2.west) node [pos=0.2,right] (ci) {\scriptsize{$\textbf{C}_{j}$}};
\node [anchor=south,inner sep=1pt] (output) at ([yshift=0.8em]th2.north) {\tiny{输出层}}; \node [anchor=south,inner sep=1pt] (output) at ([yshift=0.8em]th2.north) {\tiny{输出层}};
\draw [->] ([yshift=0.1em]th2.north) -- ([yshift=-0.1em]output.south); \draw [->] ([yshift=0.1em]th2.north) -- ([yshift=-0.1em]output.south);
...@@ -2334,11 +2334,11 @@ NLP问题的隐含结构假设 & 无隐含结构假设,端到端学习 \\ ...@@ -2334,11 +2334,11 @@ NLP问题的隐含结构假设 & 无隐含结构假设,端到端学习 \\
\node [anchor=north] (enc42) at ([yshift=0.5em]enc4.south) {\tiny{(位置$4$)}}; \node [anchor=north] (enc42) at ([yshift=0.5em]enc4.south) {\tiny{(位置$4$)}};
\visible<2->{ \visible<2->{
\node [anchor=west] (math1) at ([xshift=5em,yshift=1em]th2.east) {$\textbf{C}_i = \sum_{j} \alpha_{i,j} \textbf{h}_j \ \ $}; \node [anchor=west] (math1) at ([xshift=5em,yshift=1em]th2.east) {$\textbf{C}_j = \sum_{i} \alpha_{i,j} \textbf{h}_i \ \ $};
} }
\visible<3->{ \visible<3->{
\node [anchor=north west] (math2) at ([yshift=-2em]math1.south west) {$\alpha_{i,j} = \frac{\exp(\beta_{i,j})}{\sum_{j'} \exp(\beta_{i,j'})}$}; \node [anchor=north west] (math2) at ([yshift=-2em]math1.south west) {$\alpha_{i,j} = \frac{\exp(\beta_{i,j})}{\sum_{i'} \exp(\beta_{i',j})}$};
\node [anchor=north west] (math3) at ([yshift=-0em]math2.south west) {$\beta_{i,j} = a(\textbf{s}_{i-1}, \textbf{h}_j)$}; \node [anchor=north west] (math3) at ([yshift=-0em]math2.south west) {$\beta_{i,j} = a(\textbf{s}_{j-1}, \textbf{h}_i)$};
} }
\begin{pgfonlayer}{background} \begin{pgfonlayer}{background}
...@@ -2418,7 +2418,7 @@ NLP问题的隐含结构假设 & 无隐含结构假设,端到端学习 \\ ...@@ -2418,7 +2418,7 @@ NLP问题的隐含结构假设 & 无隐含结构假设,端到端学习 \\
\node[srcnode] (src3) at ([xshift=0.5\hnode]src2.south west) {\scriptsize{learned}}; \node[srcnode] (src3) at ([xshift=0.5\hnode]src2.south west) {\scriptsize{learned}};
\node[srcnode] (src4) at ([xshift=0.5\hnode]src3.south west) {\scriptsize{nothing}}; \node[srcnode] (src4) at ([xshift=0.5\hnode]src3.south west) {\scriptsize{nothing}};
\node[srcnode] (src5) at ([xshift=0.5\hnode]src4.south west) {\scriptsize{?}}; \node[srcnode] (src5) at ([xshift=0.5\hnode]src4.south west) {\scriptsize{?}};
\node[srcnode] (src6) at ([xshift=0.5\hnode]src5.south west) {\scriptsize{EOS}}; \node[srcnode] (src6) at ([xshift=0.5\hnode]src5.south west) {\scriptsize{$\langle$eos$\rangle$}};
% target % target
\node[tgtnode] (tgt1) at (-6.0*0.5*\hnode,-1.05*\hnode+7.5*0.5*\hnode) {\scriptsize{}}; \node[tgtnode] (tgt1) at (-6.0*0.5*\hnode,-1.05*\hnode+7.5*0.5*\hnode) {\scriptsize{}};
...@@ -2428,7 +2428,7 @@ NLP问题的隐含结构假设 & 无隐含结构假设,端到端学习 \\ ...@@ -2428,7 +2428,7 @@ NLP问题的隐含结构假设 & 无隐含结构假设,端到端学习 \\
\node[tgtnode] (tgt5) at ([yshift=-0.5\hnode]tgt4.north east) {\scriptsize{}}; \node[tgtnode] (tgt5) at ([yshift=-0.5\hnode]tgt4.north east) {\scriptsize{}};
\node[tgtnode] (tgt6) at ([yshift=-0.5\hnode]tgt5.north east) {\scriptsize{}}; \node[tgtnode] (tgt6) at ([yshift=-0.5\hnode]tgt5.north east) {\scriptsize{}};
\node[tgtnode] (tgt7) at ([yshift=-0.5\hnode]tgt6.north east) {\scriptsize{?}}; \node[tgtnode] (tgt7) at ([yshift=-0.5\hnode]tgt6.north east) {\scriptsize{?}};
\node[tgtnode] (tgt8) at ([yshift=-0.5\hnode]tgt7.north east) {\scriptsize{EOS}}; \node[tgtnode] (tgt8) at ([yshift=-0.5\hnode]tgt7.north east) {\scriptsize{$\langle$eos$\rangle$}};
\end{scope} \end{scope}
...@@ -2464,7 +2464,7 @@ NLP问题的隐含结构假设 & 无隐含结构假设,端到端学习 \\ ...@@ -2464,7 +2464,7 @@ NLP问题的隐含结构假设 & 无隐含结构假设,端到端学习 \\
\visible<3->{ \visible<3->{
% coverage score formula node % coverage score formula node
\node [anchor=north west] (formula) at ([xshift=-0.3\hnode,yshift=-1.5\hnode]attn11.south) {\small{不同$\textbf{C}_i$所对应的源语言词的权重是不同的}}; \node [anchor=north west] (formula) at ([xshift=-0.3\hnode,yshift=-1.5\hnode]attn11.south) {\small{不同$\textbf{C}_j$所对应的源语言词的权重是不同的}};
\node [anchor=north west] (example) at (formula.south west) {\footnotesize{$\textbf{C}_2=0.4 \times \textbf{h}(\textrm{``你''}) + 0.4 \times \textbf{h}(\textrm{``什么''}) +$}}; \node [anchor=north west] (example) at (formula.south west) {\footnotesize{$\textbf{C}_2=0.4 \times \textbf{h}(\textrm{``你''}) + 0.4 \times \textbf{h}(\textrm{``什么''}) +$}};
\node [anchor=north west] (example2) at ([yshift=0.4em]example.south west) {\footnotesize{$\ \ \ \ \ \ \ \ 0 \times \textbf{h}(\textrm{``都''}) + 0.1 \times \textbf{h}(\textrm{`` 没''}) + ..$}}; \node [anchor=north west] (example2) at ([yshift=0.4em]example.south west) {\footnotesize{$\ \ \ \ \ \ \ \ 0 \times \textbf{h}(\textrm{``都''}) + 0.1 \times \textbf{h}(\textrm{`` 没''}) + ..$}};
} }
...@@ -2526,7 +2526,7 @@ $\textrm{``you''} = \argmax_{y_2} \textrm{P}(y_2|\textbf{s}_1, y_1)$ & $\textrm{ ...@@ -2526,7 +2526,7 @@ $\textrm{``you''} = \argmax_{y_2} \textrm{P}(y_2|\textbf{s}_1, y_1)$ & $\textrm{
\item 再来看一下注意力权重的定义。这个过程实际上是对$a(\cdot,\cdot)$做指数归一化:\\ \item 再来看一下注意力权重的定义。这个过程实际上是对$a(\cdot,\cdot)$做指数归一化:\\
\vspace{-0.3em} \vspace{-0.3em}
\begin{displaymath} \begin{displaymath}
\alpha_{i,j} = \frac{\exp(a(\textbf{s}_{i-1}, \textbf{h}_j))}{\sum_{j'} \exp(a(\textbf{s}_{i-1}, \textbf{h}_{j'}))} \alpha_{i,j} = \frac{\exp(a(\textbf{s}_{j-1}, \textbf{h}_i))}{\sum_{i'} \exp(a(\textbf{s}_{j-1}, \textbf{h}_{i'}))}
\end{displaymath} \end{displaymath}
\item<2-> 注意力函数$a(\textbf{s},\textbf{h})$的目的是捕捉$\textbf{s}$$\textbf{h}$之间的\alert{相似性},这也可以被看作是目标语表示和源语言表示的一种``统一化'',即把源语言和目标语表示在同一个语义空间,进而语义相近的内容有更大的相似性。\visible<3->{定义$a(\textbf{s},\textbf{h})$的方式:} \item<2-> 注意力函数$a(\textbf{s},\textbf{h})$的目的是捕捉$\textbf{s}$$\textbf{h}$之间的\alert{相似性},这也可以被看作是目标语表示和源语言表示的一种``统一化'',即把源语言和目标语表示在同一个语义空间,进而语义相近的内容有更大的相似性。\visible<3->{定义$a(\textbf{s},\textbf{h})$的方式:}
...@@ -2572,7 +2572,7 @@ $\textrm{``you''} = \argmax_{y_2} \textrm{P}(y_2|\textbf{s}_1, y_1)$ & $\textrm{ ...@@ -2572,7 +2572,7 @@ $\textrm{``you''} = \argmax_{y_2} \textrm{P}(y_2|\textbf{s}_1, y_1)$ & $\textrm{
ymin=-0.5,ymax=5.5, ymin=-0.5,ymax=5.5,
xmin=-0.5,xmax=2.5, xmin=-0.5,xmax=2.5,
ytick={0,1,...,5}, ytick={0,1,...,5},
yticklabels={The,New,York,Times,comments,EOS}, yticklabels={The,New,York,Times,comments,$\langle$eos$\rangle$},
yticklabel style={font=\scriptsize}, yticklabel style={font=\scriptsize},
xtick={0,1,2}, xtick={0,1,2},
xticklabels={纽约时报,发表,评论}, xticklabels={纽约时报,发表,评论},
...@@ -2593,7 +2593,7 @@ $\textrm{``you''} = \argmax_{y_2} \textrm{P}(y_2|\textbf{s}_1, y_1)$ & $\textrm{ ...@@ -2593,7 +2593,7 @@ $\textrm{``you''} = \argmax_{y_2} \textrm{P}(y_2|\textbf{s}_1, y_1)$ & $\textrm{
ymin=-0.5,ymax=5.5, ymin=-0.5,ymax=5.5,
xmin=-0.5,xmax=3.5, xmin=-0.5,xmax=3.5,
ytick={0,1,...,5}, ytick={0,1,...,5},
yticklabels={I,came,to,this,world,EOS}, yticklabels={I,came,to,this,world,$\langle$eos$\rangle$},
yticklabel style={font=\scriptsize}, yticklabel style={font=\scriptsize},
xtick={0,1,2,3}, xtick={0,1,2,3},
xticklabels={我,来到,这个,世界}, xticklabels={我,来到,这个,世界},
...@@ -2715,7 +2715,7 @@ $\textrm{``you''} = \argmax_{y_2} \textrm{P}(y_2|\textbf{s}_1, y_1)$ & $\textrm{ ...@@ -2715,7 +2715,7 @@ $\textrm{``you''} = \argmax_{y_2} \textrm{P}(y_2|\textbf{s}_1, y_1)$ & $\textrm{
%%% 如何进一步理解注意力机制 - 回到机器翻译任务 %%% 如何进一步理解注意力机制 - 回到机器翻译任务
\begin{frame}{重新解释注意力机制(续)} \begin{frame}{重新解释注意力机制(续)}
\begin{itemize} \begin{itemize}
\item 回到机器翻译,如果把目标语状态$\textbf{s}_{i-1}$看做query,而把源语言所有位置的最上层RNN表示$\textbf{h}_{j}$看做{\color{ugreen} \textbf{key}}{\color{red} \textbf{value}} \item 回到机器翻译,如果把目标语状态$\textbf{s}_{j-1}$看做query,而把源语言所有位置的最上层RNN表示$\textbf{h}_{i}$看做{\color{ugreen} \textbf{key}}{\color{red} \textbf{value}}
\end{itemize} \end{itemize}
\vspace{-1.5em} \vspace{-1.5em}
...@@ -3084,7 +3084,7 @@ $\textrm{``you''} = \argmax_{y_2} \textrm{P}(y_2|\textbf{s}_1, y_1)$ & $\textrm{ ...@@ -3084,7 +3084,7 @@ $\textrm{``you''} = \argmax_{y_2} \textrm{P}(y_2|\textbf{s}_1, y_1)$ & $\textrm{
% step 6 % step 6
\visible<6->{ \visible<6->{
\node[rnnnode] (rnn34) at ([xshift=2\base]rnn33) {}; \node[rnnnode] (rnn34) at ([xshift=2\base]rnn33) {};
\node[wordnode,anchor=south] (o4) at ([yshift=\base]rnn34.north) {EOS}; \node[wordnode,anchor=south] (o4) at ([yshift=\base]rnn34.north) {$\langle$eos$\rangle$};
\draw[-latex'] (rnn33) to (rnn34); \draw[-latex'] (rnn33) to (rnn34);
\draw[-latex'] (rnn24) to (rnn34); \draw[-latex'] (rnn24) to (rnn34);
\draw[-latex'] (rnn34) to (o4); \draw[-latex'] (rnn34) to (o4);
...@@ -3136,7 +3136,7 @@ $\textrm{``you''} = \argmax_{y_2} \textrm{P}(y_2|\textbf{s}_1, y_1)$ & $\textrm{ ...@@ -3136,7 +3136,7 @@ $\textrm{``you''} = \argmax_{y_2} \textrm{P}(y_2|\textbf{s}_1, y_1)$ & $\textrm{
\hat{\textbf{y}} = \argmax_{\textbf{y}} \log\textrm{P}(\textbf{y}|\textbf{x}) = \argmax_{\textbf{y}} \sum_{j=1}^{n} \log\textrm{P}(y_j|\textbf{y}_{<j}, \textbf{x}) \hat{\textbf{y}} = \argmax_{\textbf{y}} \log\textrm{P}(\textbf{y}|\textbf{x}) = \argmax_{\textbf{y}} \sum_{j=1}^{n} \log\textrm{P}(y_j|\textbf{y}_{<j}, \textbf{x})
\end{displaymath} \end{displaymath}
\item<2-> 由于$y_i$的生成需要依赖$y_{i-1}$,因此无法同时生成$\{y_1,...,y_n\}$。常用的方法是自左向右逐个单词生成 \item<2-> 由于$y_j$的生成需要依赖$y_{j-1}$,因此无法同时生成$\{y_1,...,y_n\}$。常用的方法是自左向右逐个单词生成
\end{itemize} \end{itemize}
...@@ -3156,7 +3156,7 @@ $\textrm{``you''} = \argmax_{y_2} \textrm{P}(y_2|\textbf{s}_1, y_1)$ & $\textrm{ ...@@ -3156,7 +3156,7 @@ $\textrm{``you''} = \argmax_{y_2} \textrm{P}(y_2|\textbf{s}_1, y_1)$ & $\textrm{
\node [rnnnode,anchor=west,fill=green!20] (e3) at ([xshift=1em]e2.east) {\tiny{$e_x()$}}; \node [rnnnode,anchor=west,fill=green!20] (e3) at ([xshift=1em]e2.east) {\tiny{$e_x()$}};
\node [anchor=north,inner sep=2pt] (w1) at ([yshift=-0.6em]e1.south) {\tiny{}}; \node [anchor=north,inner sep=2pt] (w1) at ([yshift=-0.6em]e1.south) {\tiny{}};
\node [anchor=north,inner sep=2pt] (w2) at ([yshift=-0.8em]e2.south) {\tiny{...}}; \node [anchor=north,inner sep=2pt] (w2) at ([yshift=-0.8em]e2.south) {\tiny{...}};
\node [anchor=north,inner sep=2pt] (w3) at ([yshift=-0.6em]e3.south) {\tiny{EOS}}; \node [anchor=north,inner sep=2pt] (w3) at ([yshift=-0.6em]e3.south) {\tiny{$\langle$eos$\rangle$}};
\draw [->] (w1.north) -- ([yshift=-0.1em]e1.south); \draw [->] (w1.north) -- ([yshift=-0.1em]e1.south);
\draw [->] (w3.north) -- ([yshift=-0.1em]e3.south); \draw [->] (w3.north) -- ([yshift=-0.1em]e3.south);
...@@ -3202,7 +3202,7 @@ $\textrm{``you''} = \argmax_{y_2} \textrm{P}(y_2|\textbf{s}_1, y_1)$ & $\textrm{ ...@@ -3202,7 +3202,7 @@ $\textrm{``you''} = \argmax_{y_2} \textrm{P}(y_2|\textbf{s}_1, y_1)$ & $\textrm{
\node [anchor=west,inner sep=2pt] (o5) at ([xshift=0.3em]o4.east) {\tiny{...}}; \node [anchor=west,inner sep=2pt] (o5) at ([xshift=0.3em]o4.east) {\tiny{...}};
} }
\visible<4->{ \visible<4->{
\node [anchor=north,inner sep=2pt] (wt1) at ([yshift=-0.6em]t1.south) {\tiny{EOS}}; \node [anchor=north,inner sep=2pt] (wt1) at ([yshift=-0.6em]t1.south) {\tiny{$\langle$sos$\rangle$}};
} }
\visible<7->{ \visible<7->{
\node [anchor=north,inner sep=2pt] (wt2) at ([yshift=-0.6em]t2.south) {\tiny{Have}}; \node [anchor=north,inner sep=2pt] (wt2) at ([yshift=-0.6em]t2.south) {\tiny{Have}};
...@@ -3355,7 +3355,7 @@ $\textrm{``you''} = \argmax_{y_2} \textrm{P}(y_2|\textbf{s}_1, y_1)$ & $\textrm{ ...@@ -3355,7 +3355,7 @@ $\textrm{``you''} = \argmax_{y_2} \textrm{P}(y_2|\textbf{s}_1, y_1)$ & $\textrm{
\node [anchor=west,inner sep=2pt] (o4) at ([xshift=0.3em]o3.east) {\tiny{...}}; \node [anchor=west,inner sep=2pt] (o4) at ([xshift=0.3em]o3.east) {\tiny{...}};
} }
\node [wnode,anchor=north] (wt1) at ([yshift=-0.8em]t1.south) {\tiny{EOS}}; \node [wnode,anchor=north] (wt1) at ([yshift=-0.8em]t1.south) {\tiny{$\langle$sos$\rangle$}};
\visible<6->{ \visible<6->{
\node [wnode,anchor=north] (wt2) at ([yshift=-0.8em]t2.south) {\tiny{Have}}; \node [wnode,anchor=north] (wt2) at ([yshift=-0.8em]t2.south) {\tiny{Have}};
...@@ -3546,7 +3546,7 @@ $\textrm{``you''} = \argmax_{y_2} \textrm{P}(y_2|\textbf{s}_1, y_1)$ & $\textrm{ ...@@ -3546,7 +3546,7 @@ $\textrm{``you''} = \argmax_{y_2} \textrm{P}(y_2|\textbf{s}_1, y_1)$ & $\textrm{
% words % words
\node[wnode,below=0pt of encemb1] (encword1) {}; \node[wnode,below=0pt of encemb1] (encword1) {};
\node[wnode,below=0pt of encemb2] (encword2) {什么}; \node[wnode,below=0pt of encemb2] (encword2) {什么};
\node[wnode,below=0pt of encemb4] (encword4) {EOS}; \node[wnode,below=0pt of encemb4] (encword4) {$\langle$eos$\rangle$};
% connections % connections
\draw[-latex'] (enc11) to (enc12); \draw[-latex'] (enc11) to (enc12);
...@@ -3645,7 +3645,7 @@ $\textrm{``you''} = \argmax_{y_2} \textrm{P}(y_2|\textbf{s}_1, y_1)$ & $\textrm{ ...@@ -3645,7 +3645,7 @@ $\textrm{``you''} = \argmax_{y_2} \textrm{P}(y_2|\textbf{s}_1, y_1)$ & $\textrm{
\node[rnnnode,fill=blue!20,above=\base of dec54] (softmax4) {}; \node[rnnnode,fill=blue!20,above=\base of dec54] (softmax4) {};
% words % words
\node[wnode,below=0pt of decemb1] (decinword1) {SOS}; \node[wnode,below=0pt of decemb1] (decinword1) {$\langle$sos$\rangle$};
\node[wnode,below=0pt of decemb2] (decinword2) {Have}; \node[wnode,below=0pt of decemb2] (decinword2) {Have};
\node[wnode,below=0pt of decemb4] (decinword4) {?}; \node[wnode,below=0pt of decemb4] (decinword4) {?};
...@@ -3655,7 +3655,7 @@ $\textrm{``you''} = \argmax_{y_2} \textrm{P}(y_2|\textbf{s}_1, y_1)$ & $\textrm{ ...@@ -3655,7 +3655,7 @@ $\textrm{``you''} = \argmax_{y_2} \textrm{P}(y_2|\textbf{s}_1, y_1)$ & $\textrm{
\node[wnode,anchor=base] (decoutword2) at (\XCoord,\YCoord) {you}; \node[wnode,anchor=base] (decoutword2) at (\XCoord,\YCoord) {you};
\ExtractX{$(softmax4.north)$} \ExtractX{$(softmax4.north)$}
\ExtractY{$(decoutword1.base)$} \ExtractY{$(decoutword1.base)$}
\node[wnode,anchor=base] (decoutword4) at (\XCoord,\YCoord) {EOS}; \node[wnode,anchor=base] (decoutword4) at (\XCoord,\YCoord) {$\langle$eos$\rangle$};
% connections % connections
\draw[-latex'] (dec11) to (dec12); \draw[-latex'] (dec11) to (dec12);
...@@ -3810,7 +3810,7 @@ $\textrm{``you''} = \argmax_{y_2} \textrm{P}(y_2|\textbf{s}_1, y_1)$ & $\textrm{ ...@@ -3810,7 +3810,7 @@ $\textrm{``you''} = \argmax_{y_2} \textrm{P}(y_2|\textbf{s}_1, y_1)$ & $\textrm{
\node [anchor=north,rnnnode,fill=blue!30!white] (e2) at ([yshift=-2em]node12.south) {\tiny{}}; \node [anchor=north,rnnnode,fill=blue!30!white] (e2) at ([yshift=-2em]node12.south) {\tiny{}};
\node [anchor=north,rnnnode,fill=blue!30!white] (e3) at ([yshift=-2em]node13.south) {\tiny{}}; \node [anchor=north,rnnnode,fill=blue!30!white] (e3) at ([yshift=-2em]node13.south) {\tiny{}};
\node [anchor=north,rnnnode,fill=blue!30!white] (e4) at ([yshift=-2em]node14.south) {\tiny{}}; \node [anchor=north,rnnnode,fill=blue!30!white] (e4) at ([yshift=-2em]node14.south) {\tiny{}};
\node [anchor=north,inner sep=2pt] (w1) at ([yshift=-1em]e1.south) {\tiny{$<$s$>$}}; \node [anchor=north,inner sep=2pt] (w1) at ([yshift=-1em]e1.south) {\tiny{$\langle$sos$\rangle$}};
\node [anchor=north,inner sep=2pt] (w2) at ([yshift=-1em]e2.south) {\tiny{}}; \node [anchor=north,inner sep=2pt] (w2) at ([yshift=-1em]e2.south) {\tiny{}};
\node [anchor=north,inner sep=2pt] (w3) at ([yshift=-1em]e3.south) {\tiny{我们}}; \node [anchor=north,inner sep=2pt] (w3) at ([yshift=-1em]e3.south) {\tiny{我们}};
\node [anchor=north,inner sep=2pt] (w4) at ([yshift=-1em]e4.south) {\tiny{开始}}; \node [anchor=north,inner sep=2pt] (w4) at ([yshift=-1em]e4.south) {\tiny{开始}};
...@@ -4100,9 +4100,9 @@ $\textrm{``you''} = \argmax_{y_2} \textrm{P}(y_2|\textbf{s}_1, y_1)$ & $\textrm{ ...@@ -4100,9 +4100,9 @@ $\textrm{``you''} = \argmax_{y_2} \textrm{P}(y_2|\textbf{s}_1, y_1)$ & $\textrm{
\node [outputnode,anchor=south] (o1) at ([yshift=1em]res5.north) {\tiny{$\textbf{Output layer}$}}; \node [outputnode,anchor=south] (o1) at ([yshift=1em]res5.north) {\tiny{$\textbf{Output layer}$}};
\node [inputnode,anchor=north west] (input2) at ([yshift=-1em]sa2.south west) {\tiny{$\textbf{Embedding}$}}; \node [inputnode,anchor=north west] (input2) at ([yshift=-1em]sa2.south west) {\tiny{$\textbf{Embedding}$}};
\node [posnode,anchor=north east] (pos2) at ([yshift=-1em]sa2.south east) {\tiny{$\textbf{Postion}$}}; \node [posnode,anchor=north east] (pos2) at ([yshift=-1em]sa2.south east) {\tiny{$\textbf{Postion}$}};
\node [anchor=north] (outputs) at ([yshift=-3em]sa2.south) {\tiny{$\textbf{解码器输入: $<$SOS$>$ I am fine}$}}; \node [anchor=north] (outputs) at ([yshift=-3em]sa2.south) {\tiny{$\textbf{解码器输入: $<$sos$>$ I am fine}$}};
\node [anchor=east] (decoder) at ([xshift=-1em,yshift=-1.5em]o1.west) {\scriptsize{\textbf{解码器}}}; \node [anchor=east] (decoder) at ([xshift=-1em,yshift=-1.5em]o1.west) {\scriptsize{\textbf{解码器}}};
\node [anchor=north] (decoutputs) at ([yshift=1.5em]o1.north) {\tiny{$\textbf{解码器输出: I am fine $<$EOS$>$ }$}}; \node [anchor=north] (decoutputs) at ([yshift=1.5em]o1.north) {\tiny{$\textbf{解码器输出: I am fine $<$eos$>$ }$}};
\draw [->] (sa2.north) -- (res3.south); \draw [->] (sa2.north) -- (res3.south);
\draw [->] (res3.north) -- (ed1.south); \draw [->] (res3.north) -- (ed1.south);
...@@ -4127,6 +4127,9 @@ $\textrm{``you''} = \argmax_{y_2} \textrm{P}(y_2|\textbf{s}_1, y_1)$ & $\textrm{ ...@@ -4127,6 +4127,9 @@ $\textrm{``you''} = \argmax_{y_2} \textrm{P}(y_2|\textbf{s}_1, y_1)$ & $\textrm{
\node [rectangle,inner sep=0.7em,rounded corners=1pt,very thick,dotted,draw=ugreen!70] [fit = (sa1) (res1) (ffn1) (res2)] (box0) {}; \node [rectangle,inner sep=0.7em,rounded corners=1pt,very thick,dotted,draw=ugreen!70] [fit = (sa1) (res1) (ffn1) (res2)] (box0) {};
\node [rectangle,inner sep=0.7em,rounded corners=1pt,very thick,dotted,draw=red!60] [fit = (sa2) (res3) (res5)] (box1) {}; \node [rectangle,inner sep=0.7em,rounded corners=1pt,very thick,dotted,draw=red!60] [fit = (sa2) (res3) (res5)] (box1) {};
\node [ugreen,font=\scriptsize] (count) at ([xshift=-1.5em,yshift=-1em]encoder.south) {$6\times$};
\node [red,font=\scriptsize] (count) at ([xshift=10.8em,yshift=0em]decoder.south) {$\times 6$};
\end{scope} \end{scope}
\end{tikzpicture} \end{tikzpicture}
\end{center} \end{center}
...@@ -4180,9 +4183,9 @@ $\textrm{``you''} = \argmax_{y_2} \textrm{P}(y_2|\textbf{s}_1, y_1)$ & $\textrm{ ...@@ -4180,9 +4183,9 @@ $\textrm{``you''} = \argmax_{y_2} \textrm{P}(y_2|\textbf{s}_1, y_1)$ & $\textrm{
\node [outputnode,anchor=south] (o1) at ([yshift=1em]res5.north) {\tiny{$\textbf{Output layer}$}}; \node [outputnode,anchor=south] (o1) at ([yshift=1em]res5.north) {\tiny{$\textbf{Output layer}$}};
\node [inputnode,anchor=north west] (input2) at ([yshift=-1em]sa2.south west) {\tiny{$\textbf{Embedding}$}}; \node [inputnode,anchor=north west] (input2) at ([yshift=-1em]sa2.south west) {\tiny{$\textbf{Embedding}$}};
\node [posnode,anchor=north east] (pos2) at ([yshift=-1em]sa2.south east) {\tiny{$\textbf{Postion}$}}; \node [posnode,anchor=north east] (pos2) at ([yshift=-1em]sa2.south east) {\tiny{$\textbf{Postion}$}};
\node [anchor=north] (outputs) at ([yshift=-3em]sa2.south) {\tiny{$\textbf{解码器输入: $<$SOS$>$ I am fine}$}}; \node [anchor=north] (outputs) at ([yshift=-3em]sa2.south) {\tiny{$\textbf{解码器输入: $<$sos$>$ I am fine}$}};
\node [anchor=east] (decoder) at ([xshift=-1em,yshift=-1.5em]o1.west) {\scriptsize{\textbf{解码器}}}; \node [anchor=east] (decoder) at ([xshift=-1em,yshift=-1.5em]o1.west) {\scriptsize{\textbf{解码器}}};
\node [anchor=north] (decoutputs) at ([yshift=1.5em]o1.north) {\tiny{$\textbf{解码器输出: I am fine $<$EOS$>$ }$}}; \node [anchor=north] (decoutputs) at ([yshift=1.5em]o1.north) {\tiny{$\textbf{解码器输出: I am fine $<$eos$>$ }$}};
\draw [->] (sa2.north) -- (res3.south); \draw [->] (sa2.north) -- (res3.south);
\draw [->] (res3.north) -- (ed1.south); \draw [->] (res3.north) -- (ed1.south);
...@@ -4414,9 +4417,9 @@ PE_{(pos,2i+1)} = cos(pos/10000^{2i/d_{model}}) ...@@ -4414,9 +4417,9 @@ PE_{(pos,2i+1)} = cos(pos/10000^{2i/d_{model}})
\node [outputnode,anchor=south] (o1) at ([yshift=1em]res5.north) {\tiny{$\textbf{Output layer}$}}; \node [outputnode,anchor=south] (o1) at ([yshift=1em]res5.north) {\tiny{$\textbf{Output layer}$}};
\node [inputnode,anchor=north west] (input2) at ([yshift=-1em]sa2.south west) {\tiny{$\textbf{Embedding}$}}; \node [inputnode,anchor=north west] (input2) at ([yshift=-1em]sa2.south west) {\tiny{$\textbf{Embedding}$}};
\node [posnode,anchor=north east] (pos2) at ([yshift=-1em]sa2.south east) {\tiny{$\textbf{Postion}$}}; \node [posnode,anchor=north east] (pos2) at ([yshift=-1em]sa2.south east) {\tiny{$\textbf{Postion}$}};
\node [anchor=north] (outputs) at ([yshift=-3em]sa2.south) {\tiny{$\textbf{解码器输入: $<$SOS$>$ I am fine}$}}; \node [anchor=north] (outputs) at ([yshift=-3em]sa2.south) {\tiny{$\textbf{解码器输入: $<$sos$>$ I am fine}$}};
\node [anchor=east] (decoder) at ([xshift=-1em,yshift=-1.5em]o1.west) {\scriptsize{\textbf{解码器}}}; \node [anchor=east] (decoder) at ([xshift=-1em,yshift=-1.5em]o1.west) {\scriptsize{\textbf{解码器}}};
\node [anchor=north] (decoutputs) at ([yshift=1.5em]o1.north) {\tiny{$\textbf{解码器输出: I am fine $<$EOS$>$ }$}}; \node [anchor=north] (decoutputs) at ([yshift=1.5em]o1.north) {\tiny{$\textbf{解码器输出: I am fine $<$eos$>$ }$}};
\draw [->] (sa2.north) -- (res3.south); \draw [->] (sa2.north) -- (res3.south);
\draw [->] (res3.north) -- (ed1.south); \draw [->] (res3.north) -- (ed1.south);
...@@ -4591,7 +4594,7 @@ PE_{(pos,2i+1)} = cos(pos/10000^{2i/d_{model}}) ...@@ -4591,7 +4594,7 @@ PE_{(pos,2i+1)} = cos(pos/10000^{2i/d_{model}})
\node[srcnode] (src3) at ([xshift=0.5\hnode]src2.south west) {\scriptsize{learned}}; \node[srcnode] (src3) at ([xshift=0.5\hnode]src2.south west) {\scriptsize{learned}};
\node[srcnode] (src4) at ([xshift=0.5\hnode]src3.south west) {\scriptsize{nothing}}; \node[srcnode] (src4) at ([xshift=0.5\hnode]src3.south west) {\scriptsize{nothing}};
\node[srcnode] (src5) at ([xshift=0.5\hnode]src4.south west) {\scriptsize{?}}; \node[srcnode] (src5) at ([xshift=0.5\hnode]src4.south west) {\scriptsize{?}};
\node[srcnode] (src6) at ([xshift=0.5\hnode]src5.south west) {\scriptsize{EOS}}; \node[srcnode] (src6) at ([xshift=0.5\hnode]src5.south west) {\scriptsize{$\langle$eos$\rangle$}};
% target % target
\node[tgtnode] (tgt1) at (-6.0*0.5*\hnode,-1.05*\hnode+5.5*0.5*\hnode) {\scriptsize{Have}}; \node[tgtnode] (tgt1) at (-6.0*0.5*\hnode,-1.05*\hnode+5.5*0.5*\hnode) {\scriptsize{Have}};
...@@ -4599,7 +4602,7 @@ PE_{(pos,2i+1)} = cos(pos/10000^{2i/d_{model}}) ...@@ -4599,7 +4602,7 @@ PE_{(pos,2i+1)} = cos(pos/10000^{2i/d_{model}})
\node[tgtnode] (tgt3) at ([yshift=-0.5\hnode]tgt2.north east) {\scriptsize{learned}}; \node[tgtnode] (tgt3) at ([yshift=-0.5\hnode]tgt2.north east) {\scriptsize{learned}};
\node[tgtnode] (tgt4) at ([yshift=-0.5\hnode]tgt3.north east) {\scriptsize{nothing}}; \node[tgtnode] (tgt4) at ([yshift=-0.5\hnode]tgt3.north east) {\scriptsize{nothing}};
\node[tgtnode] (tgt5) at ([yshift=-0.5\hnode]tgt4.north east) {\scriptsize{?}}; \node[tgtnode] (tgt5) at ([yshift=-0.5\hnode]tgt4.north east) {\scriptsize{?}};
\node[tgtnode] (tgt6) at ([yshift=-0.5\hnode]tgt5.north east) {\scriptsize{EOS}}; \node[tgtnode] (tgt6) at ([yshift=-0.5\hnode]tgt5.north east) {\scriptsize{$\langle$eos$\rangle$}};
\node [rounded corners=0.3em,fill=yellow!30] (qk) at ([xshift=2.5em,yshift=5em]a55.north) {\large{$\frac{QK^{T}}{\sqrt{d_k}}$}}; \node [rounded corners=0.3em,fill=yellow!30] (qk) at ([xshift=2.5em,yshift=5em]a55.north) {\large{$\frac{QK^{T}}{\sqrt{d_k}}$}};
\node [rounded corners=0.3em,anchor=west] (add) at ([xshift=0.1em]qk.east) {\large{+}}; \node [rounded corners=0.3em,anchor=west] (add) at ([xshift=0.1em]qk.east) {\large{+}};
...@@ -4630,7 +4633,7 @@ PE_{(pos,2i+1)} = cos(pos/10000^{2i/d_{model}}) ...@@ -4630,7 +4633,7 @@ PE_{(pos,2i+1)} = cos(pos/10000^{2i/d_{model}})
\node[srcnode] (src3) at ([xshift=0.5\hnode]src2.south west) {\scriptsize{learned}}; \node[srcnode] (src3) at ([xshift=0.5\hnode]src2.south west) {\scriptsize{learned}};
\node[srcnode] (src4) at ([xshift=0.5\hnode]src3.south west) {\scriptsize{nothing}}; \node[srcnode] (src4) at ([xshift=0.5\hnode]src3.south west) {\scriptsize{nothing}};
\node[srcnode] (src5) at ([xshift=0.5\hnode]src4.south west) {\scriptsize{?}}; \node[srcnode] (src5) at ([xshift=0.5\hnode]src4.south west) {\scriptsize{?}};
\node[srcnode] (src6) at ([xshift=0.5\hnode]src5.south west) {\scriptsize{EOS}}; \node[srcnode] (src6) at ([xshift=0.5\hnode]src5.south west) {\scriptsize{$\langle$eos$\rangle$}};
% target % target
\node[tgtnode] (tgt1) at (5.4*0.5*\hnode,-1.05*\hnode+5.5*0.5*\hnode) {\scriptsize{Have}}; \node[tgtnode] (tgt1) at (5.4*0.5*\hnode,-1.05*\hnode+5.5*0.5*\hnode) {\scriptsize{Have}};
...@@ -4638,7 +4641,7 @@ PE_{(pos,2i+1)} = cos(pos/10000^{2i/d_{model}}) ...@@ -4638,7 +4641,7 @@ PE_{(pos,2i+1)} = cos(pos/10000^{2i/d_{model}})
\node[tgtnode] (tgt3) at ([yshift=-0.5\hnode]tgt2.north east) {\scriptsize{learned}}; \node[tgtnode] (tgt3) at ([yshift=-0.5\hnode]tgt2.north east) {\scriptsize{learned}};
\node[tgtnode] (tgt4) at ([yshift=-0.5\hnode]tgt3.north east) {\scriptsize{nothing}}; \node[tgtnode] (tgt4) at ([yshift=-0.5\hnode]tgt3.north east) {\scriptsize{nothing}};
\node[tgtnode] (tgt5) at ([yshift=-0.5\hnode]tgt4.north east) {\scriptsize{?}}; \node[tgtnode] (tgt5) at ([yshift=-0.5\hnode]tgt4.north east) {\scriptsize{?}};
\node[tgtnode] (tgt6) at ([yshift=-0.5\hnode]tgt5.north east) {\scriptsize{EOS}}; \node[tgtnode] (tgt6) at ([yshift=-0.5\hnode]tgt5.north east) {\scriptsize{$\langle$eos$\rangle$}};
\node [rounded corners=0.3em,anchor=west,fill=green!30] (softmax) at ([xshift=-6em]left.east) {\large{Softmax}}; \node [rounded corners=0.3em,anchor=west,fill=green!30] (softmax) at ([xshift=-6em]left.east) {\large{Softmax}};
...@@ -4800,9 +4803,9 @@ PE_{(pos,2i+1)} = cos(pos/10000^{2i/d_{model}}) ...@@ -4800,9 +4803,9 @@ PE_{(pos,2i+1)} = cos(pos/10000^{2i/d_{model}})
\node [outputnode,anchor=south] (o1) at ([yshift=1em]res5.north) {\tiny{$\textbf{Output layer}$}}; \node [outputnode,anchor=south] (o1) at ([yshift=1em]res5.north) {\tiny{$\textbf{Output layer}$}};
\node [inputnode,anchor=north west] (input2) at ([yshift=-1em]sa2.south west) {\tiny{$\textbf{Embedding}$}}; \node [inputnode,anchor=north west] (input2) at ([yshift=-1em]sa2.south west) {\tiny{$\textbf{Embedding}$}};
\node [posnode,anchor=north east] (pos2) at ([yshift=-1em]sa2.south east) {\tiny{$\textbf{Postion}$}}; \node [posnode,anchor=north east] (pos2) at ([yshift=-1em]sa2.south east) {\tiny{$\textbf{Postion}$}};
\node [anchor=north] (outputs) at ([yshift=-3em]sa2.south) {\tiny{$\textbf{解码器输入: $<$SOS$>$ I am fine}$}}; \node [anchor=north] (outputs) at ([yshift=-3em]sa2.south) {\tiny{$\textbf{解码器输入: $<$sos$>$ I am fine}$}};
\node [anchor=east] (decoder) at ([xshift=-1em,yshift=-1.5em]o1.west) {\scriptsize{\textbf{解码器}}}; \node [anchor=east] (decoder) at ([xshift=-1em,yshift=-1.5em]o1.west) {\scriptsize{\textbf{解码器}}};
\node [anchor=north] (decoutputs) at ([yshift=1.5em]o1.north) {\tiny{$\textbf{解码器输出: I am fine $<$EOS$>$ }$}}; \node [anchor=north] (decoutputs) at ([yshift=1.5em]o1.north) {\tiny{$\textbf{解码器输出: I am fine $<$eos$>$ }$}};
\draw [->] (sa2.north) -- (res3.south); \draw [->] (sa2.north) -- (res3.south);
\draw [->] (res3.north) -- (ed1.south); \draw [->] (res3.north) -- (ed1.south);
...@@ -5030,9 +5033,9 @@ x_{l+1} = x_l+\mathcal{F}(x_l) ...@@ -5030,9 +5033,9 @@ x_{l+1} = x_l+\mathcal{F}(x_l)
\node [outputnode,anchor=south] (o1) at ([yshift=1em]res5.north) {\tiny{$\textbf{Output layer}$}}; \node [outputnode,anchor=south] (o1) at ([yshift=1em]res5.north) {\tiny{$\textbf{Output layer}$}};
\node [inputnode,anchor=north west] (input2) at ([yshift=-1em]sa2.south west) {\tiny{$\textbf{Embedding}$}}; \node [inputnode,anchor=north west] (input2) at ([yshift=-1em]sa2.south west) {\tiny{$\textbf{Embedding}$}};
\node [posnode,anchor=north east] (pos2) at ([yshift=-1em]sa2.south east) {\tiny{$\textbf{Postion}$}}; \node [posnode,anchor=north east] (pos2) at ([yshift=-1em]sa2.south east) {\tiny{$\textbf{Postion}$}};
\node [anchor=north] (outputs) at ([yshift=-3em]sa2.south) {\tiny{$\textbf{解码器输入: $<$SOS$>$ I am fine}$}}; \node [anchor=north] (outputs) at ([yshift=-3em]sa2.south) {\tiny{$\textbf{解码器输入: $<$sos$>$ I am fine}$}};
\node [anchor=east] (decoder) at ([xshift=-1em,yshift=-1.5em]o1.west) {\scriptsize{\textbf{解码器}}}; \node [anchor=east] (decoder) at ([xshift=-1em,yshift=-1.5em]o1.west) {\scriptsize{\textbf{解码器}}};
\node [anchor=north] (decoutputs) at ([yshift=1.5em]o1.north) {\tiny{$\textbf{解码器输出: I am fine $<$EOS$>$ }$}}; \node [anchor=north] (decoutputs) at ([yshift=1.5em]o1.north) {\tiny{$\textbf{解码器输出: I am fine $<$eos$>$ }$}};
\draw [->] (sa2.north) -- (res3.south); \draw [->] (sa2.north) -- (res3.south);
\draw [->] (res3.north) -- (ed1.south); \draw [->] (res3.north) -- (ed1.south);
...@@ -5170,7 +5173,7 @@ x_{l+1} = x_l+\mathcal{F}(x_l) ...@@ -5170,7 +5173,7 @@ x_{l+1} = x_l+\mathcal{F}(x_l)
\node [rnnnode,anchor=west,fill=green!20] (e3) at ([xshift=1em]e2.east) {\tiny{$e_x()$}}; \node [rnnnode,anchor=west,fill=green!20] (e3) at ([xshift=1em]e2.east) {\tiny{$e_x()$}};
\node [anchor=north,inner sep=2pt] (w1) at ([yshift=-0.6em]e1.south) {\tiny{}}; \node [anchor=north,inner sep=2pt] (w1) at ([yshift=-0.6em]e1.south) {\tiny{}};
\node [anchor=north,inner sep=2pt] (w2) at ([yshift=-0.6em]e2.south) {\tiny{}}; \node [anchor=north,inner sep=2pt] (w2) at ([yshift=-0.6em]e2.south) {\tiny{}};
\node [anchor=north,inner sep=2pt] (w3) at ([yshift=-0.6em]e3.south) {\tiny{EOS}}; \node [anchor=north,inner sep=2pt] (w3) at ([yshift=-0.6em]e3.south) {\tiny{$\langle$eos$\rangle$}};
\node [anchor=south] (dot1) at ([xshift=0.4em,yshift=-0.7em]h1.south) {\tiny{...}}; \node [anchor=south] (dot1) at ([xshift=0.4em,yshift=-0.7em]h1.south) {\tiny{...}};
\node [anchor=south] (dot2) at ([xshift=-0.4em,yshift=-0.7em]h3.south) {\tiny{...}}; \node [anchor=south] (dot2) at ([xshift=-0.4em,yshift=-0.7em]h3.south) {\tiny{...}};
...@@ -5212,7 +5215,7 @@ x_{l+1} = x_l+\mathcal{F}(x_l) ...@@ -5212,7 +5215,7 @@ x_{l+1} = x_l+\mathcal{F}(x_l)
\node [anchor=south,fill=black!5!white,minimum height=1.1em,minimum width=13em,inner sep=2pt,rounded corners=1pt,draw] (loss) at ([xshift=1.8em,yshift=1em]o2.north) {\scriptsize{\textbf{Cross Entropy Loss}}}; \node [anchor=south,fill=black!5!white,minimum height=1.1em,minimum width=13em,inner sep=2pt,rounded corners=1pt,draw] (loss) at ([xshift=1.8em,yshift=1em]o2.north) {\scriptsize{\textbf{Cross Entropy Loss}}};
} }
\visible<3->{ \visible<3->{
\node [anchor=north,inner sep=2pt] (wt1) at ([yshift=-0.6em]t1.south) {\tiny{EOS}}; \node [anchor=north,inner sep=2pt] (wt1) at ([yshift=-0.6em]t1.south) {\tiny{$\langle$sos$\rangle$}};
\node [anchor=north,inner sep=2pt] (wt2) at ([yshift=-0.6em]t2.south) {\tiny{How}}; \node [anchor=north,inner sep=2pt] (wt2) at ([yshift=-0.6em]t2.south) {\tiny{How}};
\node [anchor=north,inner sep=2pt] (wt3) at ([yshift=-0.8em]t3.south) {\tiny{are}}; \node [anchor=north,inner sep=2pt] (wt3) at ([yshift=-0.8em]t3.south) {\tiny{are}};
\node [anchor=north,inner sep=2pt] (wt4) at ([yshift=-0.8em]t4.south) {\tiny{you}}; \node [anchor=north,inner sep=2pt] (wt4) at ([yshift=-0.8em]t4.south) {\tiny{you}};
...@@ -5413,7 +5416,7 @@ x_{l+1} = x_l+\mathcal{F}(x_l) ...@@ -5413,7 +5416,7 @@ x_{l+1} = x_l+\mathcal{F}(x_l)
\node [rnnnode,anchor=west,fill=green!20] (e3) at ([xshift=1em]e2.east) {\tiny{$e_x()$}}; \node [rnnnode,anchor=west,fill=green!20] (e3) at ([xshift=1em]e2.east) {\tiny{$e_x()$}};
\node [anchor=north,inner sep=2pt] (w1) at ([yshift=-0.6em]e1.south) {\tiny{}}; \node [anchor=north,inner sep=2pt] (w1) at ([yshift=-0.6em]e1.south) {\tiny{}};
\node [anchor=north,inner sep=2pt] (w2) at ([yshift=-0.6em]e2.south) {\tiny{}}; \node [anchor=north,inner sep=2pt] (w2) at ([yshift=-0.6em]e2.south) {\tiny{}};
\node [anchor=north,inner sep=2pt] (w3) at ([yshift=-0.6em]e3.south) {\tiny{EOS}}; \node [anchor=north,inner sep=2pt] (w3) at ([yshift=-0.6em]e3.south) {\tiny{$\langle$eos$\rangle$}};
%\node [anchor=south] (dot1) at ([xshift=0.4em,yshift=-0.7em]h1.south) {\tiny{...}}; %\node [anchor=south] (dot1) at ([xshift=0.4em,yshift=-0.7em]h1.south) {\tiny{...}};
%\node [anchor=south] (dot2) at ([xshift=-0.4em,yshift=-0.7em]h3.south) {\tiny{...}}; %\node [anchor=south] (dot2) at ([xshift=-0.4em,yshift=-0.7em]h3.south) {\tiny{...}};
...@@ -5473,7 +5476,7 @@ x_{l+1} = x_l+\mathcal{F}(x_l) ...@@ -5473,7 +5476,7 @@ x_{l+1} = x_l+\mathcal{F}(x_l)
%\node [anchor=west,inner sep=2pt] (o5) at ([xshift=0.3em]o4.east) {\tiny{...}}; %\node [anchor=west,inner sep=2pt] (o5) at ([xshift=0.3em]o4.east) {\tiny{...}};
} }
\visible<4->{ \visible<4->{
\node [anchor=north,inner sep=2pt] (wt1) at ([yshift=-0.6em]t1.south) {\tiny{EOS}}; \node [anchor=north,inner sep=2pt] (wt1) at ([yshift=-0.6em]t1.south) {\tiny{$\langle$sos$\rangle$}};
} }
\visible<6->{ \visible<6->{
\node [anchor=north,inner sep=2pt] (wt2) at ([yshift=-0.6em]t2.south) {\tiny{How}}; \node [anchor=north,inner sep=2pt] (wt2) at ([yshift=-0.6em]t2.south) {\tiny{How}};
...@@ -5497,7 +5500,7 @@ x_{l+1} = x_l+\mathcal{F}(x_l) ...@@ -5497,7 +5500,7 @@ x_{l+1} = x_l+\mathcal{F}(x_l)
\visible<8->{ \visible<8->{
\node [anchor=center,inner sep=2pt] (wo3) at ([yshift=1.2em]o3.north) {\tiny{you}}; \node [anchor=center,inner sep=2pt] (wo3) at ([yshift=1.2em]o3.north) {\tiny{you}};
\node [anchor=south,inner sep=2pt] (wos3) at (wo3.north) {\tiny{\textbf{[step 3]}}}; \node [anchor=south,inner sep=2pt] (wos3) at (wo3.north) {\tiny{\textbf{[step 3]}}};
\node [anchor=center,inner sep=2pt] (wo4) at ([yshift=1.2em]o4.north) {\tiny{EOS}}; \node [anchor=center,inner sep=2pt] (wo4) at ([yshift=1.2em]o4.north) {\tiny{$\langle$eos$\rangle$}};
\node [anchor=south,inner sep=2pt] (wos4) at (wo4.north) {\tiny{\textbf{[step 4]}}}; \node [anchor=south,inner sep=2pt] (wos4) at (wo4.north) {\tiny{\textbf{[step 4]}}};
} }
...@@ -5606,7 +5609,7 @@ x_{l+1} = x_l+\mathcal{F}(x_l) ...@@ -5606,7 +5609,7 @@ x_{l+1} = x_l+\mathcal{F}(x_l)
\node [anchor=north,rnnnode,fill=blue!30!white] (e2) at ([yshift=-2em]node12.south) {\tiny{}}; \node [anchor=north,rnnnode,fill=blue!30!white] (e2) at ([yshift=-2em]node12.south) {\tiny{}};
\node [anchor=north,rnnnode,fill=blue!30!white] (e3) at ([yshift=-2em]node13.south) {\tiny{}}; \node [anchor=north,rnnnode,fill=blue!30!white] (e3) at ([yshift=-2em]node13.south) {\tiny{}};
\node [anchor=north,rnnnode,fill=blue!30!white] (e4) at ([yshift=-2em]node14.south) {\tiny{}}; \node [anchor=north,rnnnode,fill=blue!30!white] (e4) at ([yshift=-2em]node14.south) {\tiny{}};
\node [anchor=north,inner sep=2pt] (w1) at ([yshift=-1em]e1.south) {\tiny{$<$s$>$}}; \node [anchor=north,inner sep=2pt] (w1) at ([yshift=-1em]e1.south) {\tiny{$<$sos$>$}};
\node [anchor=north,inner sep=2pt] (w2) at ([yshift=-1em]e2.south) {\tiny{}}; \node [anchor=north,inner sep=2pt] (w2) at ([yshift=-1em]e2.south) {\tiny{}};
\node [anchor=north,inner sep=2pt] (w3) at ([yshift=-1em]e3.south) {\tiny{我们}}; \node [anchor=north,inner sep=2pt] (w3) at ([yshift=-1em]e3.south) {\tiny{我们}};
\node [anchor=north,inner sep=2pt] (w4) at ([yshift=-1em]e4.south) {\tiny{开始}}; \node [anchor=north,inner sep=2pt] (w4) at ([yshift=-1em]e4.south) {\tiny{开始}};
......
Markdown 格式
0%
您添加了 0 到此讨论。请谨慎行事。
请先完成此评论的编辑!
注册 或者 后发表评论