Merge branch 'master' of http://47.105.50.196/NiuTrans/Toy-MT-Introduction

9974fde7 · Lee · ed397f52 · 2b827edc · 9974fde7 · 9974fde7
Commit 9974fde7 authored Nov 14, 2019 by Lee
--- a/Section06-Neural-Machine-Translation/section06-test.tex
+++ b/Section06-Neural-Machine-Translation/section06-test.tex
--- a/Section06-Neural-Machine-Translation/section06.tex
+++ b/Section06-Neural-Machine-Translation/section06.tex
@@ -851,9 +851,176 @@ NLP问题的隐含结构假设 & 无隐含结构假设，端到端学习 \\
 %%%------------------------------------------------------------------------------------------------------------
 %%% NMT的数学描述
 \begin{frame}{数学建模}
+\begin{itemize}
+\item 对于源语言序列$\textbf{x} = \{x_1,x_2,...,x_m\}$，生成目标语序列$\textbf{y} = \{y_1,y_2,...,y_n\}$的概率可以被描述为
+
+\begin{displaymath}
+\log\textrm{P}(\textbf{y}|\textbf{x}) = \sum_{j=1}^{n} \log\textrm{P}(y_j|\textbf{y}_{<j}, \textbf{x})
+\end{displaymath}
+
+根据源于句子$\textbf{x}$和已生成的译文$\textbf{y}_{<j} = \{y_1,y_2,...,y_{j-1}\}$生成第$j$个译文$y_j$
+
+\item<2-> \textbf{核心}：如何求解$\textrm{P}(y_j|\textbf{y}_{<j}, \textbf{x})$。在这个循环神经网络模型中，有三个步骤
+    \begin{enumerate}
+    \item 输入的单词用分布式表示，如$\textbf{x}$被表示为词向量序列$e_x(\textbf{x})$，同理$\textbf{y}_{<j}$被表示为$e_y(\textbf{y}_{<j})$
+    \item 源语言句子被一个RNN编码为一个表示$C$，如前面的例子中是一个实数向量
+    \item 目标端解码用另一个RNN，因此生成$y_j$时只考虑前一个状态$s_{j-1}$（这里，$s_{j-1}$表示RNN第$j-1$步骤的隐层状态）
+    \end{enumerate}
+    
+\end{itemize}
 \end{frame}

 %%%------------------------------------------------------------------------------------------------------------
+%%% 各部分的解释
+\begin{frame}{数学建模(续)}
+
+\vspace{-1.5em}
+	\begin{center}
+		% \hspace*{-1.5cm}
+		\begin{tikzpicture}
+			\setlength{\base}{0.9cm}
+	
+			\tikzstyle{rnnnode} = [rounded corners=1pt,minimum height=0.5\base,minimum width=1\base,draw,inner sep=0pt,outer sep=0pt]
+			\tikzstyle{wordnode} = [font=\tiny]
+	
+			% RNN translation model
+			\begin{scope}[local bounding box=RNNMT]
+				% RNN Encoder
+				\coordinate (eemb0) at (0,0);
+				\foreach \x [count=\y from 0] in {1,2,...,3}
+					\node[rnnnode,minimum height=0.5\base,fill=green!30!white,anchor=west] (eemb\x) at ([xshift=0.4\base]eemb\y.east) {\tiny{$e_x()$}};
+				\foreach \x in {1,2,...,3}
+					\node[rnnnode,fill=blue!30!white,anchor=south] (enc\x) at ([yshift=0.3\base]eemb\x.north) {};
+			        \node[] (enclabel1) at (enc1) {\tiny{$h_{m-2}$}};
+			        \node[] (enclabel2) at (enc2) {\tiny{$h_{m-1}$}};
+			        \node[rnnnode,fill=purple!30!white] (enclabel3) at (enc3) {\tiny{$h_{m}$}};
+				\node[wordnode,left=0.4\base of enc1] (init1) {$\cdots$};
+				\node[wordnode,left=0.4\base of eemb1] (init2) {$\cdots$};
+
+				\node[wordnode,below=0pt of eemb1] () {走};
+				\node[wordnode,below=0pt of eemb2] () {吗};
+				\node[wordnode,below=0pt of eemb3] () {$\langle$eos$\rangle$};
+	
+				% RNN Decoder
+				\foreach \x in {1,2,...,3}
+					\node[rnnnode,minimum height=0.5\base,fill=green!30!white,anchor=south] (demb\x) at ([yshift=\base]enc\x.north) {\tiny{$e_y()$}};
+				\foreach \x in {1,2,...,3}
+					\node[rnnnode,fill=blue!30!white,anchor=south] (dec\x) at ([yshift=0.3\base]demb\x.north) {{\tiny{$s_\x$}}};
+				\foreach \x in {1,2,...,3}
+					\node[rnnnode,minimum height=0.5\base,fill=red!30!white,anchor=south] (softmax\x) at ([yshift=0.3\base]dec\x.north) {\tiny{Softmax}};
+				\node[wordnode,right=0.4\base of demb3] (end1) {$\cdots$};
+				\node[wordnode,right=0.4\base of dec3] (end2) {$\cdots$};
+				\node[wordnode,right=0.4\base of softmax3] (end3) {$\cdots$};
+
+				% Decoder input words
+				\node[wordnode,below=0pt of demb1] (decwordin) {$\langle$sos$\rangle$};
+				\ExtractX{$(demb2.south)$}
+				\ExtractY{$(decwordin.base)$}
+				\node[wordnode,anchor=base] () at (\XCoord,\YCoord) {Do};
+				\ExtractX{$(demb3.south)$}
+				\ExtractY{$(decwordin.base)$}
+				\node[wordnode,anchor=base] () at (\XCoord,\YCoord) {you};
+
+				% Decoder output words
+				\node[wordnode,above=0pt of softmax1] (decwordout) {Do};
+				\ExtractX{$(softmax2.north)$}
+				\ExtractY{$(decwordout.base)$}
+				\node[wordnode,anchor=base] () at (\XCoord,\YCoord) {you};
+				\ExtractX{$(softmax3.north)$}
+				\ExtractY{$(decwordout.base)$}
+				\node[wordnode,anchor=base] () at (\XCoord,\YCoord) {know};
+	
+				% Connections
+				\draw[-latex'] (init1.east) to (enc1.west);
+				\draw[-latex'] (dec3.east) to (end2.west);
+				\foreach \x in {1,2,...,3}
+					\draw[-latex'] (eemb\x) to (enc\x);
+				\foreach \x in {1,2,...,3}
+					\draw[-latex'] (demb\x) to (dec\x);
+				\foreach \x in {1,2,...,3}
+					\draw[-latex'] (dec\x.north) to (softmax\x.south);
+				\foreach \x [count=\y from 2] in {1,2}
+				{
+					\draw[-latex'] (enc\x.east) to (enc\y.west);
+					\draw[-latex'] (dec\x.east) to (dec\y.west);
+				}
+	
+				\coordinate (bridge) at ([yshift=0.4\base]enc2.north west);
+				\draw[-latex'] (enc3.north) .. controls +(north:0.3\base) and +(east:\base) .. (bridge) .. controls +(west:2.7\base) and +(west:0.3\base) .. (dec1.west);
+				
+				\visible<2->{
+				\node [anchor=east] (line1) at ([xshift=-3em,yshift=0.5em]softmax1.west) {\scriptsize{基于RNN的隐层状态$s_i$}};
+				\node [anchor=north west] (line2) at ([yshift=0.3em]line1.south west) {\scriptsize{预测目标词的概率}};
+				\node [anchor=north west] (line3) at ([yshift=0.3em]line2.south west) {\scriptsize{通常，用Softmax函数}};
+				\node [anchor=north west] (line4) at ([yshift=0.3em]line3.south west) {\scriptsize{实现 $\textrm{P}(y_i|...)$}};
+				}
+				
+				\visible<3->{
+				\node [anchor=north west] (line11) at ([yshift=-1.8em]line4.west) {\scriptsize{每个词的one-hot}};
+				\node [anchor=north west] (line12) at ([yshift=0.3em]line11.south west) {\scriptsize{离散化表示都被转化为}};
+				\node [anchor=north west] (line13) at ([yshift=0.3em]line12.south west) {\scriptsize{实数向量，即词嵌入}};
+				\node [anchor=north west] (line14) at ([yshift=0.3em]line13.south west) {\scriptsize{($e_x()$和$e_y()$函数)}};
+				}
+				
+				\visible<4->{
+				\node [anchor=west] (line21) at ([xshift=1.3em,yshift=1.5em]enc3.east)  {\scriptsize{源语编码器最后一个}};
+				\node [anchor=north west] (line22) at ([yshift=0.3em]line21.south west) {\scriptsize{循环单元的输出被}};
+				\node [anchor=north west] (line23) at ([yshift=0.3em]line22.south west) {\scriptsize{看作是句子的表示,}};
+				\node [anchor=north west] (line24) at ([yshift=0.3em]line23.south west) {\scriptsize{记为$C$}};
+				}
+				
+				\begin{pgfonlayer}{background}
+				\visible<2->{
+				\node [rectangle,inner sep=0.2em,rounded corners=1pt,fill=red!10,drop shadow,draw=red] [fit = (line1) (line2) (line3) (line4)] (box1) {};
+				\node [rectangle,inner sep=0.2em,rounded corners=1pt,very thick,dotted,draw=red] [fit = (softmax1) (softmax2) (softmax3)] (box4) {};
+				\draw [->,dotted,very thick,red] ([yshift=1em,xshift=2.5em]box1.east) -- ([yshift=1em,xshift=0.1em]box1.east);
+				}
+				
+				\visible<3->{
+				\node [rectangle,inner sep=0.2em,rounded corners=1pt,fill=green!10,drop shadow,draw=ugreen] [fit = (line11) (line12) (line13) (line14)] (box2) {};
+				\node [rectangle,inner sep=0.2em,rounded corners=1pt,very thick,dotted,draw=ugreen] [fit = (eemb1) (eemb2) (eemb3)] (box5) {};
+				\node [rectangle,inner sep=0.2em,rounded corners=1pt,very thick,dotted,draw=ugreen] [fit = (demb1) (demb2) (demb3)] (box6) {};
+				\draw [->,dotted,very thick,ugreen] ([yshift=-1.3em,xshift=2.5em]box2.east) -- ([yshift=-1.3em,xshift=0.1em]box2.east);
+				\draw [->,dotted,very thick,ugreen] ([xshift=0.1em]box6.west) .. controls +(west:1) and +(east:1) .. ([yshift=1.0em]box2.east) ;
+				}
+				
+				\visible<4->{
+				\node [rectangle,inner sep=0.2em,rounded corners=1pt,fill=purple!10,drop shadow,draw=purple] [fit = (line21) (line22) (line23) (line24)] (box3) {};
+				\node [rectangle,inner sep=0.2em,rounded corners=1pt,very thick,dotted,draw=purple] [fit = (enc3)] (box7) {};
+				\draw [->,dotted,very thick,purple] ([xshift=0.1em]box7.east) -- ([xshift=0.8em]box7.east) ;
+				}
+							
+				\end{pgfonlayer}
+	
+			\end{scope}
+		\end{tikzpicture}
+	\end{center}
+
+\visible<5->{
+\vspace{-1.5em}
+\begin{itemize}
+\item 可以重新定义\\
+\vspace{-0.8em}
+\begin{displaymath}
+\textrm{P}(y_j|\textbf{y}_{<j}, \textbf{x}) \triangleq \textrm{P}(y_j|s_{j-1}, C)
+\end{displaymath}
+
+对于上图中的模型，进一步化简为：\\
+\vspace{-0.3em}
+
+\begin{displaymath}
+\textrm{P}(y_j|\textbf{y}_{<j}, \textbf{x}) \triangleq \left\{ 
+    \begin{matrix}
+        \textrm{P}(y_j|C)\ \ \ \  & j = 1 \\
+        \textrm{P}(y_j|s_{j-1}) & j > 1
+    \end{matrix} \right.
+\end{displaymath}
+
+\end{itemize}
+}
+
+\end{frame}
+%%%------------------------------------------------------------------------------------------------------------
 %%% 词嵌入
 \begin{frame}{模块1：词嵌入层}
 \begin{itemize}