Commit 4f5d290b by xiaotong

new update

parent bf0af520
......@@ -21,6 +21,7 @@
\usepackage{tikz-3dplot}
\usepackage{esvect}
\usepackage{CJKulem}
\usepackage{booktabs}
\usepackage{tcolorbox}
\tcbuselibrary{skins}
......@@ -68,6 +69,15 @@
%\usetheme{Boadilla}
%\usecolortheme{dolphin}
% not compatible with [scale=?]
\newdimen\XCoord
\newdimen\YCoord
\newdimen\TMP
\newcommand*{\ExtractCoordinate}[1]{\path (#1); \pgfgetlastxy{\XCoord}{\YCoord};}%
\newcommand*{\ExtractX}[1]{\path (#1); \pgfgetlastxy{\XCoord}{\TMP};}%
\newcommand*{\ExtractY}[1]{\path (#1); \pgfgetlastxy{\TMP}{\YCoord};}%
\newcounter{mycount1}
\newcounter{mycount2}
\newcounter{mycount3}
......@@ -121,94 +131,145 @@
\subsection{起源}
%%%------------------------------------------------------------------------------------------------------------
%%% 神经机器翻译的历史
\begin{frame}{最初的神经机器翻译}
%%% 模型结构
\begin{frame}{基于循环神经网络的翻译模型}
\begin{itemize}
\item 神经网络的在机器翻译中并不新鲜,在很多模块中早有实现,比如,翻译候选打分、语言模型等
\item 一种简单的模型:用循环神经网络进行编码和解码
\begin{itemize}
\item 但是,整个框架仍然是统计机器翻译
\item 编码端是一个RNN,最后一个隐层状态被看做句子表示
\item 解码端也是一个RNN,利用编码结果逐词解码出译文
\end{itemize}
\item<2-> 基于神经元网络的端到端建模出现在2013-2015,被称为\alert{Neural Machine Translation (NMT)},一些代表性工作:
\end{itemize}
\visible<2->{
\vspace{-0.5em}
\begin{center}
{\footnotesize
\begin{tabular}{l | l | l}
\textbf{时间} & \textbf{作者} & \textbf{论文} \\ \hline
2013 & Kalchbrenner和 & Recurrent Continuous Translation Models \\
& Blunsom & \\
2014 & Sutskever等 & Sequence to Sequence Learning with \\
& & neural networks \\
2014 & Cho等 & Learning Phrase Representations using \\
& & RNN Encoder-Decoder for Statistical \\
& & Machine Translation \\
2014 & Cho等 & On the Properties of Neural Machine \\
& & Translation \\
2015 & Jean等 & On Using Very Large Target Vocabulary \\
& & for Neural Machine Translation
\end{tabular}
}
\begin{tikzpicture}
\newlength{\base}
\setlength{\base}{0.9cm}
\tikzstyle{rnnnode} = [rounded corners=1pt,minimum size=0.5\base,draw,inner sep=0pt,outer sep=0pt]
\tikzstyle{wordnode} = [font=\tiny]
% RNN translation model
\begin{scope}[local bounding box=RNNMT]
% RNN Encoder
\coordinate (eemb0) at (0,0);
\foreach \x [count=\y from 0] in {1,2,...,10}
\node[rnnnode,minimum height=0.5\base,fill=green!30!white,anchor=west] (eemb\x) at ([xshift=0.4\base]eemb\y.east) {};
\foreach \x in {1,2,...,10}
\node[rnnnode,fill=blue!30!white,anchor=south] (enc\x) at ([yshift=0.5\base]eemb\x.north) {};
\node[wordnode,left=0.4\base of enc1] (init) {$0$};
\node[wordnode,below=0pt of eemb1] () {};
\node[wordnode,below=0pt of eemb2] () {知道};
\node[wordnode,below=0pt of eemb3] () {};
\node[wordnode,below=0pt of eemb4] () {北京站};
\node[wordnode,below=0pt of eemb5] () {};
\node[wordnode,below=0pt of eemb6] () {};
\node[wordnode,below=0pt of eemb7] () {怎么};
\node[wordnode,below=0pt of eemb8] () {};
\node[wordnode,below=0pt of eemb9] () {};
\node[wordnode,below=0pt of eemb10] () {$\langle$eos$\rangle$};
% RNN Decoder
\foreach \x in {1,2,...,10}
\node[rnnnode,minimum height=0.5\base,fill=green!30!white,anchor=south] (demb\x) at ([yshift=2\base]enc\x.north) {};
\foreach \x in {1,2,...,10}
\node[rnnnode,fill=blue!30!white,anchor=south] (dec\x) at ([yshift=0.5\base]demb\x.north) {};
\foreach \x in {1,2,...,10}
\node[rnnnode,minimum height=0.5\base,fill=red!30!white,anchor=south] (softmax\x) at ([yshift=0.5\base]dec\x.north) {};
% Decoder input words
\node[wordnode,below=0pt of demb1] (decwordin) {$\langle$sos$\rangle$};
\ExtractX{$(demb2.south)$}
\ExtractY{$(decwordin.base)$}
\node[wordnode,anchor=base] () at (\XCoord,\YCoord) {Do};
\ExtractX{$(demb3.south)$}
\ExtractY{$(decwordin.base)$}
\node[wordnode,anchor=base] () at (\XCoord,\YCoord) {you};
\ExtractX{$(demb4.south)$}
\ExtractY{$(decwordin.base)$}
\node[wordnode,anchor=base] () at (\XCoord,\YCoord) {know};
\ExtractX{$(demb5.south)$}
\ExtractY{$(decwordin.base)$}
\node[wordnode,anchor=base] () at (\XCoord,\YCoord) {the};
\ExtractX{$(demb6.south)$}
\ExtractY{$(decwordin.base)$}
\node[wordnode,anchor=base] () at (\XCoord,\YCoord) {way};
\ExtractX{$(demb7.south)$}
\ExtractY{$(decwordin.base)$}
\node[wordnode,anchor=base] () at (\XCoord,\YCoord) {to};
\ExtractX{$(demb8.south)$}
\ExtractY{$(decwordin.base)$}
\node[wordnode,anchor=base] () at (\XCoord,\YCoord) {Beijing};
\ExtractX{$(demb9.south)$}
\ExtractY{$(decwordin.base)$}
\node[wordnode,anchor=base] () at (\XCoord,\YCoord) {Railway};
\ExtractX{$(demb10.south)$}
\ExtractY{$(decwordin.base)$}
\node[wordnode,anchor=base] () at (\XCoord,\YCoord) {Station};
% Decoder output words
\node[wordnode,above=0pt of softmax1] (decwordout) {Do};
\ExtractX{$(softmax2.north)$}
\ExtractY{$(decwordout.base)$}
\node[wordnode,anchor=base] () at (\XCoord,\YCoord) {you};
\ExtractX{$(softmax3.north)$}
\ExtractY{$(decwordout.base)$}
\node[wordnode,anchor=base] () at (\XCoord,\YCoord) {know};
\ExtractX{$(softmax4.north)$}
\ExtractY{$(decwordout.base)$}
\node[wordnode,anchor=base] () at (\XCoord,\YCoord) {the};
\ExtractX{$(softmax5.north)$}
\ExtractY{$(decwordout.base)$}
\node[wordnode,anchor=base] () at (\XCoord,\YCoord) {way};
\ExtractX{$(softmax6.north)$}
\ExtractY{$(decwordout.base)$}
\node[wordnode,anchor=base] () at (\XCoord,\YCoord) {to};
\ExtractX{$(softmax7.north)$}
\ExtractY{$(decwordout.base)$}
\node[wordnode,anchor=base] () at (\XCoord,\YCoord) {Beijing};
\ExtractX{$(softmax8.north)$}
\ExtractY{$(decwordout.base)$}
\node[wordnode,anchor=base] () at (\XCoord,\YCoord) {Railway};
\ExtractX{$(softmax9.north)$}
\ExtractY{$(decwordout.base)$}
\node[wordnode,anchor=base] () at (\XCoord,\YCoord) {Station};
\ExtractX{$(softmax10.north)$}
\ExtractY{$(decwordout.base)$}
\node[wordnode,anchor=base] () at (\XCoord,\YCoord) {$\langle$eos$\rangle$};
% Connections
\draw[-latex'] (init.east) to (enc1.west);
\foreach \x in {1,2,...,10}
\draw[-latex'] (eemb\x) to (enc\x);
\foreach \x in {1,2,...,10}
\draw[-latex'] (demb\x) to (dec\x);
\foreach \x in {1,2,...,10}
\draw[-latex'] (dec\x.north) to ([yshift=0.5\base]dec\x.north);
\foreach \x [count=\y from 2] in {1,2,...,9}
{
\draw[-latex'] (enc\x.east) to (enc\y.west);
\draw[-latex'] (dec\x.east) to (dec\y.west);
}
\coordinate (bridge) at ([yshift=-1.2\base]demb2);
\draw[-latex'] (enc10.north) .. controls +(north:\base) and +(east:1.5\base) .. (bridge) .. controls +(west:2.5\base) and +(west:0.6\base) .. (dec1.west);
\end{scope}
% legend
\begin{scope}[shift={(10\base,2.5\base)}]
\node[rnnnode,minimum height=0.5\base,fill=green!30!white,label={[label distance=3pt,font=\scriptsize]0:词嵌入层}] (emb) at (0,0) {};
\node[rnnnode,fill=blue!30!white,anchor=north west,label={[label distance=3pt,font=\scriptsize]0:循环单元}] (rnn) at ([yshift=2\base]emb.south west) {};
\node[rnnnode,minimum height=0.5\base,fill=red!30!white,anchor=north west,label={[label distance=3pt,font=\scriptsize]0:输出层}] (softmax) at ([yshift=2\base]rnn.south west) {};
\node [anchor=north west] (softmax2) at ([xshift=0.6\base]softmax.south west) {\scriptsize{Softmax}};
\node [anchor=north west] (rnn2) at ([xshift=0.6\base]rnn.south west) {\scriptsize{LSTM}};
\node [anchor=west] (reprlabel) at ([xshift=1em]enc10.east) {\scriptsize{句子表示}};
\draw [->,dashed] (reprlabel.west) -- ([xshift=0.1em]enc10.east);
\node [rnnnode,fill=purple!30!white] at (enc10) {};
\end{scope}
\end{tikzpicture}
\end{center}
}
\end{frame}
%%%------------------------------------------------------------------------------------------------------------
%%% 逐渐崛起的NMT
\begin{frame}{崛起}
\begin{itemize}
\item 2015年前统计机器翻译(SMT)在NLP是具有统治力的
\begin{itemize}
\item 当时NMT的系统还很初步,被SMT碾压
\item 大多数的认知还没有进化到NMT年代,甚至Kalchbrenner等人早期的报告也被人质疑
\end{itemize}
\item 2016年情况大有改变,当时非常受关注的一项工作是Google上线了神经机器翻译系统GNMT
\begin{itemize}
\item 在GNMT前后,百度、微软、小牛翻译等也分别推出了自己的神经机器翻译系统,出现了百花齐放的局面
\end{itemize}
\end{itemize}
\begin{center}
\includegraphics[scale=0.35]{./Figures/google-news.png}
\end{center}
\end{frame}
%%%------------------------------------------------------------------------------------------------------------
%%% 模型结构
\begin{frame}{基于循环神经网络的翻译模型}
\begin{itemize}
\item 一种简单的模型
\end{itemize}
%%% 图
\end{frame}
%%%------------------------------------------------------------------------------------------------------------
%%% LSTM
\begin{frame}{长短时记忆模型(LSTM) (2页?)}
\begin{itemize}
\item LSTM
\end{itemize}
%%% 图
\end{frame}
%%%------------------------------------------------------------------------------------------------------------
%%% LSTM
\begin{frame}{门循环单元(GRU)}
\begin{itemize}
\item GRU
\end{itemize}
%%% 图
\end{frame}
%%%------------------------------------------------------------------------------------------------------------
%%% 一些变种
\begin{frame}{进一步的改进}
\begin{itemize}
\item 多层网络
\item fine-tuning
\end{itemize}
%%% 图
\end{frame}
%%%------------------------------------------------------------------------------------------------------------
......
......@@ -198,7 +198,7 @@
%%% 神经机器翻译的性能增长
\begin{frame}{神经机器翻译的进展(续)}
\begin{itemize}
\item 神经机器翻译在大部分场景下已经超越统计机器翻译!
\item 神经机器翻译在很多场景下已经超越统计机器翻译
{
\footnotesize
\begin{center}
......@@ -223,7 +223,7 @@
\end{tabular}
\end{center}
}
\item 微软报道在部分场景下机器翻译质量已经超越人类!
\item 微软的报道:在部分场景下机器翻译质量已经接近甚至超过人工翻译
{
\footnotesize
\begin{center}
......@@ -239,7 +239,6 @@
\specialrule{0.6pt}{1pt}{1pt}
人工翻译 & 68.6 & REFERENCE-HT \\
& 67.6 & REFERENCE-PE \\
& 62.1 & REFERENCE-WMT \\
\specialrule{1pt}{1pt}{1pt}
\end{tabular}\\
\addlinespace[-0.3ex]
......@@ -651,7 +650,7 @@ NLP问题的隐含结构假设 & 无隐含结构假设,端到端学习 \\
\begin{itemize}
\item 2015年前统计机器翻译(SMT)在NLP是具有统治力的
\begin{itemize}
\item 当时NMT的系统还很初级,被SMT碾压
\item 当时的NMT系统还很初级,被SMT碾压
\item 大多数的认知还没有进化到NMT时代,甚至Kalchbrenner等人早期的报告也被人质疑
\end{itemize}
\item 2016年情况大有改变,当时非常受关注的一项工作是Google上线了神经机器翻译系统GNMT
......@@ -674,7 +673,8 @@ NLP问题的隐含结构假设 & 无隐含结构假设,端到端学习 \\
\item 解码端也是一个RNN,利用编码结果逐词解码出译文
\end{itemize}
\end{itemize}
%%% 图
\vspace{-0.5em}
\begin{center}
\begin{tikzpicture}
\newlength{\base}
......@@ -688,7 +688,7 @@ NLP问题的隐含结构假设 & 无隐含结构假设,端到端学习 \\
% RNN Encoder
\coordinate (eemb0) at (0,0);
\foreach \x [count=\y from 0] in {1,2,...,10}
\node[rnnnode,minimum height=0.1\base,fill=green!30!white,anchor=west] (eemb\x) at ([xshift=0.4\base]eemb\y.east) {};
\node[rnnnode,minimum height=0.5\base,fill=green!30!white,anchor=west] (eemb\x) at ([xshift=0.4\base]eemb\y.east) {};
\foreach \x in {1,2,...,10}
\node[rnnnode,fill=blue!30!white,anchor=south] (enc\x) at ([yshift=0.5\base]eemb\x.north) {};
\node[wordnode,left=0.4\base of enc1] (init) {$0$};
......@@ -706,11 +706,11 @@ NLP问题的隐含结构假设 & 无隐含结构假设,端到端学习 \\
% RNN Decoder
\foreach \x in {1,2,...,10}
\node[rnnnode,minimum height=0.1\base,fill=green!30!white,anchor=south] (demb\x) at ([yshift=2\base]enc\x.north) {};
\node[rnnnode,minimum height=0.5\base,fill=green!30!white,anchor=south] (demb\x) at ([yshift=2\base]enc\x.north) {};
\foreach \x in {1,2,...,10}
\node[rnnnode,fill=blue!30!white,anchor=south] (dec\x) at ([yshift=0.5\base]demb\x.north) {};
\foreach \x in {1,2,...,10}
\node[rnnnode,minimum height=0.1\base,fill=red!30!white,anchor=south] (softmax\x) at ([yshift=0.5\base]dec\x.north) {};
\node[rnnnode,minimum height=0.5\base,fill=red!30!white,anchor=south] (softmax\x) at ([yshift=0.5\base]dec\x.north) {};
% Decoder input words
\node[wordnode,below=0pt of demb1] (decwordin) {$\langle$sos$\rangle$};
......@@ -790,10 +790,16 @@ NLP问题的隐含结构假设 & 无隐含结构假设,端到端学习 \\
\end{scope}
% legend
\begin{scope}[shift={(-2.3\base,0)}]
\node[rnnnode,minimum height=0.1\base,fill=green!30!white,label={[label distance=3pt,font=\scriptsize]0:词嵌入层}] (emb) at (0,0) {};
\node[rnnnode,fill=blue!30!white,anchor=north west,label={[label distance=3pt,font=\scriptsize]0:循环单元}] (rnn) at ([yshift=2.7\base]emb.south west) {};
\node[rnnnode,minimum height=0.1\base,fill=red!30!white,anchor=north west,label={[label distance=3pt,font=\scriptsize]0:输出层}] (softmax) at ([yshift=2.6\base]rnn.south west) {};
\begin{scope}[shift={(10\base,2.5\base)}]
\node[rnnnode,minimum height=0.5\base,fill=green!30!white,label={[label distance=3pt,font=\scriptsize]0:词嵌入层}] (emb) at (0,0) {};
\node[rnnnode,fill=blue!30!white,anchor=north west,label={[label distance=3pt,font=\scriptsize]0:循环单元}] (rnn) at ([yshift=2\base]emb.south west) {};
\node[rnnnode,minimum height=0.5\base,fill=red!30!white,anchor=north west,label={[label distance=3pt,font=\scriptsize]0:输出层}] (softmax) at ([yshift=2\base]rnn.south west) {};
\node [anchor=north west] (softmax2) at ([xshift=0.6\base]softmax.south west) {\scriptsize{Softmax}};
\node [anchor=north west] (rnn2) at ([xshift=0.6\base]rnn.south west) {\scriptsize{LSTM}};
\node [anchor=west] (reprlabel) at ([xshift=1em]enc10.east) {\scriptsize{句子表示}};
\draw [->,dashed] (reprlabel.west) -- ([xshift=0.1em]enc10.east);
\node [rnnnode,fill=purple!30!white] at (enc10) {};
\end{scope}
\end{tikzpicture}
\end{center}
......
Markdown 格式
0%
您添加了 0 到此讨论。请谨慎行事。
请先完成此评论的编辑!
注册 或者 后发表评论