new pages

f9ae7c63 · xiaotong · a7a807cb · f9ae7c63 · f9ae7c63 · f9ae7c63
Commit f9ae7c63 authored Nov 12, 2019 by xiaotong
--- a/Section06-Neural-Machine-Translation/Figures/dog-hat.jpg
+++ b/Section06-Neural-Machine-Translation/Figures/dog-hat.jpg
--- a/Section06-Neural-Machine-Translation/section06-test.tex
+++ b/Section06-Neural-Machine-Translation/section06-test.tex
@@ -82,6 +82,7 @@
 \newcounter{mycount2}
 \newcounter{mycount3}
 \newcounter{mycount4}
+\newlength{\mystep}

 \usefonttheme[onlylarge]{structurebold}

@@ -137,29 +138,22 @@
 \subsection{注意力机制}

 %%%------------------------------------------------------------------------------------------------------------
-%%% 注意力机制
-\begin{frame}{简单的编码器-解码器足够了？}
+%%% 注意力机制的简单示例
+\begin{frame}{神经机器翻译的注意力机制}
 \begin{itemize}
-\item 将源语言句子编码为一个实数向量确实很神奇，但是也有明显问题
+\item 在注意力机制中，每个目标语单词的生成会使用一个动态的源语表示，而非一个统一的固定表示
    \begin{itemize}
-    \item 整个句子编码到一个向量里可能会有信息丢失
-    \item 缺少源语-目标语词之间词语的对应。某种意义上讲，一个目标语单词的生成无法区分不同源语单词的贡献
-    \end{itemize}
-\item 但是，翻译是具有很强的局部性的，有些词之间会有更紧密的关系，这种关系可以在建模中考虑
-    \begin{itemize}
-    \item 源语词和目标语词的对应并不是均匀的，甚至非常稀疏
-    \item 比如，一些短语的生成仅依赖于源文中的少数词
+    \item 这里$C_i$表示第$i$个目标语单词所使用的源语表示
    \end{itemize}
 \end{itemize}

 \begin{center}
 \begin{tikzpicture}
 \begin{scope}
-\newlength{\step}
-\setlength{\step}{1.6em}
+\setlength{\mystep}{1.6em}

 \foreach \x in {1,2,...,6}
-    \node[] (s\x) at (\x * \step,0) {};
+    \node[] (s\x) at (\x * \mystep,0) {};

 \node [] (ws1) at (s1) {\scriptsize{这}};
 \node [] (ws2) at (s2) {\scriptsize{是}};
@@ -169,7 +163,7 @@
 \node [] (ws6) at (s6) {\scriptsize{句子}};

 \foreach \x in {1,2,...,6}
-    \node[] (t\x) at (\x * \step + 2.4in,0) {};
+    \node[] (t\x) at (\x * \mystep + 2.4in,0) {};

 \node [] (wt1) at (t1) {\scriptsize{This}};
 \node [] (wt2) at (t2) {\scriptsize{is}};
@@ -190,15 +184,11 @@
 \foreach \x in {1,2,...,5}
    \draw[<-] ([yshift=0.1em]t\x.north) -- ([yshift=1.2em]t\x.north);

- \draw[<-] ([yshift=0.1em,xshift=1em]t6.north) -- ([yshift=1.2em,xshift=1em]t6.north);
-
+\draw[<-] ([yshift=0.1em,xshift=1em]t6.north) -- ([yshift=1.2em,xshift=1em]t6.north);

 \end{scope}
 \end{tikzpicture}
 \end{center}
-\vspace{-1.5em}
-
-

 \end{frame}


--- a/Section06-Neural-Machine-Translation/section06.tex
+++ b/Section06-Neural-Machine-Translation/section06.tex
@@ -81,6 +81,8 @@
 \newcounter{mycount2}
 \newcounter{mycount3}
 \newcounter{mycount4}
+\newlength{\mystep}
+

 \usefonttheme[onlylarge]{structurebold}

@@ -702,6 +704,9 @@ NLP问题的隐含结构假设 & 无隐含结构假设，端到端学习 \\
 \end{frame}

 %%%------------------------------------------------------------------------------------------------------------
+\subsection{模型结构}
+
+%%%------------------------------------------------------------------------------------------------------------
 %%% 模型结构
 \begin{frame}{基于循环神经网络的翻译模型}
 \begin{itemize}
@@ -1041,10 +1046,102 @@ NLP问题的隐含结构假设 & 无隐含结构假设，端到端学习 \\
 \end{frame}

 %%%------------------------------------------------------------------------------------------------------------
-\subsection{模型结构}
+\subsection{注意力机制}

 %%%------------------------------------------------------------------------------------------------------------
-\subsection{注意力机制}
+%%% 简单的编码-解码框架的问题
+\begin{frame}{简单的编码器-解码器就足够了？}
+\begin{itemize}
+\item 将源语言句子编码为一个实数向量确实很神奇，但是也有明显问题
+    \begin{itemize}
+    \item 整个句子编码到一个向量里可能会有信息丢失
+    \item 缺少源语单词与目标语单词之间的对应。某种意义上讲，一个目标语单词的生成无法区分不同源语单词的贡献
+    \end{itemize}
+\item<2-> 但是，翻译是具有很强的\alert{局部性}的，有些词之间会有更紧密的关系
+    \begin{itemize}
+    \item 源语词和目标语词的对应并不是均匀的，甚至非常稀疏
+    \item 比如，一些短语的生成仅依赖于源文中的少数词
+    \item<3-> 这些关系可以在表示模型中考虑
+    \end{itemize}
+\end{itemize}
+
+\begin{center}
+\begin{tikzpicture}
+\begin{scope}
+\setlength{\mystep}{1.6em}
+
+\foreach \x in {1,2,...,6}
+    \node[] (s\x) at (\x * \mystep,0) {};
+
+\node [] (ws1) at (s1) {\scriptsize{这}};
+\node [] (ws2) at (s2) {\scriptsize{是}};
+\node [] (ws3) at (s3) {\scriptsize{个}};
+\node [] (ws4) at (s4) {\scriptsize{很长}};
+\node [] (ws5) at (s5) {\scriptsize{的}};
+\node [] (ws6) at (s6) {\scriptsize{句子}};
+
+\foreach \x in {1,2,...,6}
+    \node[] (t\x) at (\x * \mystep + 2.4in,0) {};
+
+\node [] (wt1) at (t1) {\scriptsize{This}};
+\node [] (wt2) at (t2) {\scriptsize{is}};
+\node [] (wt3) at ([yshift=-1pt]t3) {\scriptsize{a}};
+\node [] (wt4) at ([yshift=-0.1em]t4) {\scriptsize{very}};
+\node [] (wt5) at (t5) {\scriptsize{long}};
+\node [] (wt6) at ([xshift=1em]t6) {\scriptsize{sentence}};
+
+\node [anchor=south west,fill=red!30,minimum width=1.6in,minimum height=1.5em] (encoder) at ([yshift=1.0em]ws1.north west) {\footnotesize{Encoder}};
+\node [anchor=west,fill=blue!30,minimum width=1.9in,minimum height=1.5em] (decoder) at ([xshift=4.5em]encoder.east) {\footnotesize{Decoder}};
+\node [anchor=west,fill=green!30,minimum height=1.5em] (representation) at ([xshift=1em]encoder.east) {\footnotesize{表示}};
+\draw [->,thick] ([xshift=1pt]encoder.east)--([xshift=-1pt]representation.west);
+\draw [->,thick] ([xshift=1pt]representation.east)--([xshift=-1pt]decoder.west);
+
+\foreach \x in {1,2,...,6}
+    \draw[->] ([yshift=0.1em]s\x.north) -- ([yshift=1.2em]s\x.north);
+
+\foreach \x in {1,2,...,5}
+    \draw[<-] ([yshift=0.1em]t\x.north) -- ([yshift=1.2em]t\x.north);
+
+\draw[<-] ([yshift=0.1em,xshift=1em]t6.north) -- ([yshift=1.2em,xshift=1em]t6.north);
+
+\visible<2->{
+\draw [<->,ublue,thick] ([xshift=0.3em]ws4.south) .. controls +(-60:1) and +(south:1) .. (wt4.south);
+\draw [<->,ublue,thick] (ws4.south) .. controls +(south:1.0) and +(south:1.5) .. (wt5.south);
+}
+
+\visible<3->{
+\node [anchor=north,fill=green!30,draw=ublue] (attentionlabel) at ([yshift=-3.4em]representation.south) {\footnotesize{词语的关注度}};
+\draw [->,dotted,very thick,ublue] ([yshift=0.1em]attentionlabel.north)--([yshift=-0.1em]representation.south);
+}
+
+\end{scope}
+\end{tikzpicture}
+\end{center}
+
+\end{frame}
+
+%%%------------------------------------------------------------------------------------------------------------
+%%% 注意力机制
+\begin{frame}{注意力机制}
+\begin{itemize}
+\item 关注的``局部性''在图像处理、语音识别等领域也有广泛讨论，比如，对于下图
+    \begin{itemize}
+    \item 关注的顺序：大狗的帽子 $\to$ 大狗 $\to$ 小狗的帽子 $\to$ 小狗
+    \end{itemize}
+\item 人往往不是``均匀地''看图像中的所有位置，翻译是一个道理，生成一个目标语单词时参考的源语单词不会太多
+\end{itemize}
+
+\begin{center}
+\includegraphics[scale=0.20]{./Figures/dog-hat.jpg}
+\end{center}
+
+\begin{itemize}
+\item<2-> \alert{注意力机制}在机器翻译中已经成功应用，经典的论文\\
+\textbf{Neural Machine Translation by Jointly Learning to Align and Translate}\\
+\textbf{Bahdanau et al., 2015, In Proc of ICLR}
+\end{itemize}
+
+\end{frame}

 %%%------------------------------------------------------------------------------------------------------------
 \section{Transformer}