Commit 95f50640 by xiaotong

new pages

parent 58a3a27c
......@@ -138,55 +138,28 @@
\subsection{注意力机制}
%%%------------------------------------------------------------------------------------------------------------
%%% 注意力机制的简单示例
\begin{frame}{神经机器翻译的注意力机制}
%%% C_i的定义
\begin{frame}{上下文向量$C_i$}
\begin{itemize}
\item 在注意力机制中,每个目标语单词的生成会使用一个动态的源语表示,而非一个统一的固定表示
\begin{itemize}
\item 这里$C_i$表示第$i$个目标语单词所使用的源语表示
\end{itemize}
\item 对于目标语位置$i$$C_i$是目标语$i$使用的上下文向量
\end{itemize}
\begin{center}
\begin{tikzpicture}
\begin{scope}
\setlength{\mystep}{1.6em}
\foreach \x in {1,2,...,6}
\node[] (s\x) at (\x * \mystep,0) {};
\node [] (ws1) at (s1) {\scriptsize{}};
\node [] (ws2) at (s2) {\scriptsize{}};
\node [] (ws3) at (s3) {\scriptsize{}};
\node [] (ws4) at (s4) {\scriptsize{很长}};
\node [] (ws5) at (s5) {\scriptsize{}};
\node [] (ws6) at (s6) {\scriptsize{句子}};
\foreach \x in {1,2,...,6}
\node[] (t\x) at (\x * \mystep + 2.4in,0) {};
\node [] (wt1) at (t1) {\scriptsize{This}};
\node [] (wt2) at (t2) {\scriptsize{is}};
\node [] (wt3) at ([yshift=-1pt]t3) {\scriptsize{a}};
\node [] (wt4) at ([yshift=-0.1em]t4) {\scriptsize{very}};
\node [] (wt5) at (t5) {\scriptsize{long}};
\node [] (wt6) at ([xshift=1em]t6) {\scriptsize{sentence}};
\node [anchor=south west,fill=red!30,minimum width=1.6in,minimum height=1.5em] (encoder) at ([yshift=1.0em]ws1.north west) {\footnotesize{Encoder}};
\node [anchor=west,fill=blue!30,minimum width=1.9in,minimum height=1.5em] (decoder) at ([xshift=4.5em]encoder.east) {\footnotesize{Decoder}};
\node [anchor=west,fill=green!30,minimum height=1.5em] (representation) at ([xshift=1em]encoder.east) {\footnotesize{表示}};
\draw [->,thick] ([xshift=1pt]encoder.east)--([xshift=-1pt]representation.west);
\draw [->,thick] ([xshift=1pt]representation.east)--([xshift=-1pt]decoder.west);
\begin{scope}
\foreach \x in {1,2,...,6}
\draw[->] ([yshift=0.1em]s\x.north) -- ([yshift=1.2em]s\x.north);
\node [anchor=west,fill=red!20!white,inner sep=2pt,minimum width=2em] (h1) at (0,0) {\scriptsize{$h_1$}};
\node [anchor=west,fill=red!20!white,inner sep=2pt,minimum width=2em] (h2) at ([xshift=1em]h1.east) {\scriptsize{$h_2$}};
\node [anchor=west,inner sep=0pt,minimum width=2em] (h3) at ([xshift=0.7em]h2.east) {\scriptsize{...}};
\node [anchor=west,fill=red!20!white,inner sep=2pt,minimum width=2em] (h4) at ([xshift=0.7em]h3.east) {\scriptsize{$h_n$}};
\foreach \x in {1,2,...,5}
\draw[<-] ([yshift=0.1em]t\x.north) -- ([yshift=1.2em]t\x.north);
\node [anchor=south,circle,minimum size=1.5em,draw,ublue,thick] (sum) at ([yshift=2em]h2.north east) {};
\draw[<-] ([yshift=0.1em,xshift=1em]t6.north) -- ([yshift=1.2em,xshift=1em]t6.north);
\node [anchor=south,fill=green!20!white,inner sep=3pt,minimum width=2em] (th1) at ([yshift=2em]sum.north) {\scriptsize{$s_{i_1}$}};
\end{scope}
\end{tikzpicture}
\end{center}
......
......@@ -1036,8 +1036,15 @@ NLP问题的隐含结构假设 & 无隐含结构假设,端到端学习 \\
%%%------------------------------------------------------------------------------------------------------------
%%% 一些变种
\begin{frame}{改进 - 多层网络}
%%% 图
\vspace*{-1cm}
\begin{itemize}
\item 堆叠更多层的网络,可以提升模型的表示能力
\begin{itemize}
\item 常见的NMT系统有2-8层
\end{itemize}
\end{itemize}
\vspace*{-0.6cm}
\begin{center}
\hspace*{-2.5cm}
\begin{tikzpicture}
......@@ -1053,7 +1060,7 @@ NLP问题的隐含结构假设 & 无隐含结构假设,端到端学习 \\
\foreach \x [count=\y from 0] in {1,2,...,10}
\node[rnnnode,minimum height=0.5\base,fill=green!30!white,anchor=west] (eemb\x) at ([xshift=0.4\base]eemb\y.east) {};
\foreach \x in {1,2,...,10}
\node[rnnnode,fill=blue!30!white,anchor=south] (enc1\x) at ([yshift=0.5\base]eemb\x.north) {};
\node[rnnnode,fill=blue!30!white,anchor=south] (enc1\x) at ([yshift=0.3\base]eemb\x.north) {};
\foreach \x in {1,2,...,10}
\node[rnnnode,fill=blue!30!white,anchor=south] (enc2\x) at ([yshift=0.5\base]enc1\x.north) {};
\node[wordnode,left=0.4\base of enc11] (init1) {$0$};
......@@ -1074,7 +1081,7 @@ NLP问题的隐含结构假设 & 无隐含结构假设,端到端学习 \\
\foreach \x in {1,2,...,10}
\node[rnnnode,minimum height=0.5\base,fill=green!30!white,anchor=south] (demb\x) at ([yshift=1.5\base]enc2\x.north) {};
\foreach \x in {1,2,...,10}
\node[rnnnode,fill=blue!30!white,anchor=south] (dec1\x) at ([yshift=0.5\base]demb\x.north) {};
\node[rnnnode,fill=blue!30!white,anchor=south] (dec1\x) at ([yshift=0.3\base]demb\x.north) {};
\foreach \x in {1,2,...,10}
\node[rnnnode,fill=blue!30!white,anchor=south] (dec2\x) at ([yshift=0.5\base]dec1\x.north) {};
\foreach \x in {1,2,...,10}
......@@ -1269,7 +1276,7 @@ NLP问题的隐含结构假设 & 无隐含结构假设,端到端学习 \\
\begin{itemize}
\item 关注的顺序:大狗的帽子 $\to$ 大狗 $\to$ 小狗的帽子 $\to$ 小狗
\end{itemize}
\item 人往往不是``均匀地''看图像中的所有位置,翻译是一个道理,生成一个目标语单词时参考的源语单词不会太多
\item 人往往不是``均匀地''看图像中的所有区域,翻译是一个道理,生成一个目标语单词时参考的源语单词不会太多
\end{itemize}
\begin{center}
......@@ -1285,6 +1292,109 @@ NLP问题的隐含结构假设 & 无隐含结构假设,端到端学习 \\
\end{frame}
%%%------------------------------------------------------------------------------------------------------------
%%% 注意力机制的简单示例
\begin{frame}{神经机器翻译的注意力机制}
\begin{itemize}
\item 在注意力机制中,每个目标语单词的生成会使用一个动态的源语表示,而非一个统一的固定表示
\begin{itemize}
\item 这里$C_i$表示第$i$个目标语单词所使用的源语表示
\end{itemize}
\end{itemize}
\vspace{0.4em}
\begin{center}
\begin{tikzpicture}
\setlength{\mystep}{1.6em}
%%% a simple encoder-decoder model
\begin{scope}
\foreach \x in {1,2,...,6}
\node[] (s\x) at (\x * \mystep,0) {};
\node [] (ws1) at (s1) {\scriptsize{}};
\node [] (ws2) at (s2) {\scriptsize{}};
\node [] (ws3) at (s3) {\scriptsize{}};
\node [] (ws4) at (s4) {\scriptsize{很长}};
\node [] (ws5) at (s5) {\scriptsize{}};
\node [] (ws6) at (s6) {\scriptsize{句子}};
\foreach \x in {1,2,...,6}
\node[] (t\x) at (\x * \mystep + 2.4in,0) {};
\node [] (wt1) at (t1) {\scriptsize{This}};
\node [] (wt2) at (t2) {\scriptsize{is}};
\node [] (wt3) at ([yshift=-1pt]t3) {\scriptsize{a}};
\node [] (wt4) at ([yshift=-0.1em]t4) {\scriptsize{very}};
\node [] (wt5) at (t5) {\scriptsize{long}};
\node [] (wt6) at ([xshift=1em]t6) {\scriptsize{sentence}};
\node [anchor=south west,fill=red!30,minimum width=1.6in,minimum height=1.5em] (encoder) at ([yshift=1.0em]ws1.north west) {\footnotesize{Encoder}};
\node [anchor=west,fill=blue!30,minimum width=1.9in,minimum height=1.5em] (decoder) at ([xshift=4.5em]encoder.east) {\footnotesize{Decoder}};
\node [anchor=west,fill=green!30,minimum height=1.5em] (representation) at ([xshift=1em]encoder.east) {\footnotesize{表示}};
\draw [->,thick] ([xshift=1pt]encoder.east)--([xshift=-1pt]representation.west);
\draw [->,thick] ([xshift=1pt]representation.east)--([xshift=-1pt]decoder.west);
\foreach \x in {1,2,...,6}
\draw[->] ([yshift=0.1em]s\x.north) -- ([yshift=1.2em]s\x.north);
\foreach \x in {1,2,...,5}
\draw[<-] ([yshift=0.1em]t\x.north) -- ([yshift=1.2em]t\x.north);
\draw[<-] ([yshift=0.1em,xshift=1em]t6.north) -- ([yshift=1.2em,xshift=1em]t6.north);
\node [anchor=north] (cap) at ([xshift=2em,yshift=-2.5em]encoder.south east) {\scriptsize{(a) 简单的编码器-解码器框架}};
\end{scope}
%%% a encoder-decoder model with attention
\begin{scope}[yshift=-1.7in]
\foreach \x in {1,2,...,6}
\node[] (s\x) at (\x * \mystep,0) {};
\node [] (ws1) at (s1) {\scriptsize{}};
\node [] (ws2) at (s2) {\scriptsize{}};
\node [] (ws3) at (s3) {\scriptsize{}};
\node [] (ws4) at (s4) {\scriptsize{很长}};
\node [] (ws5) at (s5) {\scriptsize{}};
\node [] (ws6) at (s6) {\scriptsize{句子}};
\foreach \x in {1,2,...,6}
\node[] (t\x) at (\x * \mystep + 2.4in,0) {};
\node [] (wt1) at (t1) {\scriptsize{This}};
\node [] (wt2) at (t2) {\scriptsize{is}};
\node [] (wt3) at ([yshift=-1pt]t3) {\scriptsize{a}};
\node [] (wt4) at ([yshift=-0.1em]t4) {\scriptsize{very}};
\node [] (wt5) at (t5) {\scriptsize{long}};
\node [] (wt6) at ([xshift=1em]t6) {\scriptsize{sentence}};
\node [anchor=south west,fill=red!30,minimum width=1.6in,minimum height=1.5em] (encoder) at ([yshift=1.0em]ws1.north west) {\footnotesize{Encoder}};
\node [anchor=west,fill=blue!30,minimum width=1.9in,minimum height=1.5em] (decoder) at ([xshift=4.5em]encoder.east) {\footnotesize{Decoder}};
\foreach \x in {1,2,...,6}
\draw[->] ([yshift=0.1em]s\x.north) -- ([yshift=1.2em]s\x.north);
\foreach \x in {1,2,...,5}
\draw[<-] ([yshift=0.1em]t\x.north) -- ([yshift=1.2em]t\x.north);
\draw[<-] ([yshift=0.1em,xshift=1em]t6.north) -- ([yshift=1.2em,xshift=1em]t6.north);
\draw [->] ([yshift=3em]s6.north) -- ([yshift=4em]s6.north) -- ([yshift=4em]t1.north) node [pos=0.5,fill=green!30,inner sep=2pt] (c1) {\scriptsize{表示$C_1$}} -- ([yshift=3em]t1.north) ;
\draw [->] ([yshift=3em]s5.north) -- ([yshift=5.3em]s5.north) -- ([yshift=5.3em]t2.north) node [pos=0.5,fill=green!30,inner sep=2pt] (c2) {\scriptsize{表示$C_2$}} -- ([yshift=3em]t2.north) ;
\draw [->] ([yshift=3.5em]s3.north) -- ([yshift=6.6em]s3.north) -- ([yshift=6.6em]t4.north) node [pos=0.5,fill=green!30,inner sep=2pt] (c3) {\scriptsize{表示$C_i$}} -- ([yshift=3.5em]t4.north) ;
\node [anchor=north] (smore) at ([yshift=3.5em]s3.north) {...};
\node [anchor=north] (tmore) at ([yshift=3.5em]t4.north) {...};
\node [anchor=north] (cap) at ([xshift=2em,yshift=-2.5em]encoder.south east) {\scriptsize{(b) 引入注意力机制的编码器-解码器框架}};
\end{scope}
\end{tikzpicture}
\end{center}
\end{frame}
%%%------------------------------------------------------------------------------------------------------------
\section{Transformer}
%%%------------------------------------------------------------------------------------------------------------
......
Markdown 格式
0%
您添加了 0 到此讨论。请谨慎行事。
请先完成此评论的编辑!
注册 或者 后发表评论