new page

6d1b84b9 · xiaotong · 7f78e16e · 6d1b84b9
Commit 6d1b84b9 authored Oct 28, 2019 by xiaotong
--- a/Section05-Neural-Networks-and-Language-Modeling/section05-test.tex
+++ b/Section05-Neural-Networks-and-Language-Modeling/section05-test.tex
@@ -120,103 +120,60 @@
 \subsection{词嵌入}

 %%%------------------------------------------------------------------------------------------------------------
-%%% 上下文表示模型
-\begin{frame}{表示更长的片段 - 上下文表示模型}
+%%% 更强大的表示模型 - ELMO
+\begin{frame}{更强的表示模型 - ELMO}
 \begin{itemize}
-\item 在语言模型中已经包含了每个位置的上下文表示信息
+\item \textbf{ELMO}(Embedding from Language Models)可以说是掀起了基于语言模型的预训练的热潮
    \begin{itemize}
-    \item 以RNN LM为例，位置$i$的隐层输出就是一种$w_1...w_i$的表示
+    \item 仍然使用RNN结构，不过循环单元换成了LSTM
+    \item 同时考虑自左向右和自右向左的建模方式，同时表示一个词左端和右端的上下文
+    \item 融合所有层的输出，送给下游应用，提供了更丰富的信息
    \end{itemize}
 \end{itemize}
+\end{frame}

-\vspace{-0.5em}
-\begin{center}
-\begin{tikzpicture}
-\begin{scope}
-\tikzstyle{rnnnode} = [draw,inner sep=5pt,minimum width=4em,minimum height=1.5em,fill=green!30!white,blur shadow={shadow xshift=1pt,shadow yshift=-1pt}]
-\node [anchor=west,rnnnode] (node11) at (0,0) {\scriptsize{RNN Cell}};
-\node [anchor=west,rnnnode] (node12) at ([xshift=2em]node11.east) {\scriptsize{RNN Cell}};
-\node [anchor=west,rnnnode] (node13) at ([xshift=2em]node12.east) {\scriptsize{RNN Cell}};
-\node [anchor=west,rnnnode] (node14) at ([xshift=2em]node13.east) {\scriptsize{RNN Cell}};
-
-\node [anchor=north,rnnnode,fill=red!30!white] (e1) at ([yshift=-1.2em]node11.south) {\scriptsize{embedding}};
-\node [anchor=north,rnnnode,fill=red!30!white] (e2) at ([yshift=-1.2em]node12.south) {\scriptsize{embedding}};
-\node [anchor=north,rnnnode,fill=red!30!white] (e3) at ([yshift=-1.2em]node13.south) {\scriptsize{embedding}};
-\node [anchor=north,rnnnode,fill=red!30!white] (e4) at ([yshift=-1.2em]node14.south) {\scriptsize{embedding}};
-\node [anchor=north] (w1) at ([yshift=-1em]e1.south) {\footnotesize{乔布斯}};
-\node [anchor=north] (w2) at ([yshift=-1em]e2.south) {\footnotesize{任职}};
-\node [anchor=north] (w3) at ([yshift=-1em]e3.south) {\footnotesize{于}};
-\node [anchor=north] (w4) at ([yshift=-1em]e4.south) {\footnotesize{苹果}};
-
-\draw [->,thick] ([yshift=0.1em]w1.north)--([yshift=-0.1em]e1.south);
-\draw [->,thick] ([yshift=0.1em]w2.north)--([yshift=-0.1em]e2.south);
-\draw [->,thick] ([yshift=0.1em]w3.north)--([yshift=-0.1em]e3.south);
-\draw [->,thick] ([yshift=0.1em]w4.north)--([yshift=-0.1em]e4.south);
-
-\draw [->,thick] ([yshift=0.1em]e1.north)--([yshift=-0.1em]node11.south);
-\draw [->,thick] ([yshift=0.1em]e2.north)--([yshift=-0.1em]node12.south);
-\draw [->,thick] ([yshift=0.1em]e3.north)--([yshift=-0.1em]node13.south);
-\draw [->,thick] ([yshift=0.1em]e4.north)--([yshift=-0.1em]node14.south);
-
-\node [anchor=south,rnnnode] (node21) at ([yshift=1.5em]node11.north) {\scriptsize{RNN Cell}};
-\node [anchor=south,rnnnode] (node22) at ([yshift=1.5em]node12.north) {\scriptsize{RNN Cell}};
-\node [anchor=south,rnnnode] (node23) at ([yshift=1.5em]node13.north) {\scriptsize{RNN Cell}};
-\node [anchor=south,rnnnode] (node24) at ([yshift=1.5em]node14.north) {\scriptsize{RNN Cell}};
-
-\node [anchor=south] (node31) at ([yshift=1.0em]node21.north) {\scriptsize{的表示}};
-\node [anchor=south west] (node31new) at ([yshift=-0.3em]node31.north west) {\scriptsize{``乔布斯''}};
-\node [anchor=south] (node32) at ([yshift=1.0em]node22.north) {\scriptsize{的表示\ \ \ }};
-\node [anchor=south west] (node32new) at ([yshift=-0.3em]node32.north west) {\scriptsize{``乔布斯 任职''}};
-\node [anchor=south] (node33) at ([yshift=1.0em]node23.north) {\scriptsize{的表示\ \ \ \ \ \ \ \ }};
-\node [anchor=south west] (node33new) at ([yshift=-0.3em]node33.north west) {\scriptsize{``乔布斯 任职 于''}};
-\node [anchor=south] (node34) at ([yshift=1.0em]node24.north) {\scriptsize{的表示\ \ \ \ \ \ \ \ }};
-\node [anchor=south west] (node34new) at ([yshift=-0.3em]node34.north west) {\scriptsize{``乔布斯 任职 于 苹果''}};
-
-\draw [->,thick] ([yshift=0.1em]node21.north)--([yshift=-0.1em]node31.south);
-\draw [->,thick] ([yshift=0.1em]node22.north)--([yshift=-0.1em]node32.south);
-\draw [->,thick] ([yshift=0.1em]node23.north)--([yshift=-0.1em]node33.south);
-\draw [->,thick] ([yshift=0.1em]node24.north)--([yshift=-0.1em]node34.south);
-
-\draw [->,thick] ([xshift=-1em]node21.west)--([xshift=-0.1em]node21.west);
-\draw [->,thick] ([xshift=0.1em]node21.east)--([xshift=-0.1em]node22.west);
-\draw [->,thick] ([xshift=0.1em]node22.east)--([xshift=-0.1em]node23.west);
-\draw [->,thick] ([xshift=0.1em]node23.east)--([xshift=-0.1em]node24.west);
-\draw [->,thick] ([xshift=0.1em]node24.east)--([xshift=1em]node24.east);
-
-\draw [->,thick] ([yshift=0.1em]node11.north)--([yshift=-0.1em]node21.south);
-\draw [->,thick] ([yshift=0.1em]node12.north)--([yshift=-0.1em]node22.south);
-\draw [->,thick] ([yshift=0.1em]node13.north)--([yshift=-0.1em]node23.south);
-\draw [->,thick] ([yshift=0.1em]node14.north)--([yshift=-0.1em]node24.south);
-
-\draw [->,thick] ([xshift=-1em]node11.west)--([xshift=-0.1em]node11.west);
-\draw [->,thick] ([xshift=0.1em]node11.east)--([xshift=-0.1em]node12.west);
-\draw [->,thick] ([xshift=0.1em]node12.east)--([xshift=-0.1em]node13.west);
-\draw [->,thick] ([xshift=0.1em]node13.east)--([xshift=-0.1em]node14.west);
-\draw [->,thick] ([xshift=0.1em]node14.east)--([xshift=1em]node14.east);
-
-\visible<2->{
-\node [anchor=south] (toplabel1) at ([yshift=2em,xshift=-2em]node32new.north) {\footnotesize{``苹果''的表示：}};
-\node [anchor=west,fill=blue!20!white,minimum width=3em] (toplabel2) at (toplabel1.east) {\footnotesize{上下文}};
-}
-\visible<3->{
-\node [anchor=west,fill=red!20!white,minimum width=3em] (toplabel3) at (toplabel2.east) {\footnotesize{词}};
-}
+%%%------------------------------------------------------------------------------------------------------------
+%%% 更强大的表示模型 - GTP
+\begin{frame}{更强的表示模型 - GPT}
+\begin{itemize}
+\item \textbf{GPT}(Generative Pre-Training)也是一种基于语言模型的表示模型
+    \begin{itemize}
+    \item 架构换成了Transformer，特征抽取能力更强
+    \item 基于Pre-training + Fine-tuning的框架，预训练作为下游系统部件的参数初始值，因此可以更好的适应目标任务
+    \end{itemize}
+\end{itemize}
+\end{frame}

-\begin{pgfonlayer}{background}
-\visible<3->{
-\node [rectangle,inner sep=2pt,draw,thick,dashed,red] [fit = (e4)] (r2) {};
-\draw [->,thick,red] (r2.west) .. controls +(west:0.8) and +(south:2) .. ([xshift=1.3em]toplabel3.south);
-}
-\visible<2->{
-\node [rectangle,inner sep=2pt,draw,thick,dashed,ublue,fill=white] [fit = (node33) (node33new)] (r1) {};
-\draw [->,thick,ublue] ([xshift=-2em]r1.north) .. controls +(north:0.7) and +(south:0.7) .. ([xshift=-0.5em]toplabel2.south);
-}
-\end{pgfonlayer}
+%%%------------------------------------------------------------------------------------------------------------
+%%% 更强大的表示模型 - BERT
+\begin{frame}{更强的表示模型 - BERT}
+\begin{itemize}
+\item \textbf{BERT}( Bidirectional Encoder Representations from Transformers)是最近非常火爆的表示模型
+    \begin{itemize}
+    \item 仍然基于Transformer但是考虑了左右两端的上下文(可以对比GPT)
+    \item 使用了Mask方法来增加训练得到模型的健壮性，这个方法几乎成为了预训练表示模型的新范式
+    \end{itemize}
+\end{itemize}
+\end{frame}

-\end{scope}
-\end{tikzpicture}
-\end{center}
+%%%------------------------------------------------------------------------------------------------------------
+%%% 更强大的表示模型 - BERT
+\begin{frame}{更强的表示模型 - BERT}
+\begin{itemize}
+\item \textbf{BERT}( Bidirectional Encoder Representations from Transformers)是最近非常火爆的表示模型
+    \begin{itemize}
+    \item 仍然基于Transformer但是考虑了左右两端的上下文(可以对比GPT)
+    \item 使用了Mask方法来增加训练得到模型的健壮性，这个方法几乎成为了预训练表示模型的新范式
+    \end{itemize}
+\end{itemize}
+\end{frame}

+%%%------------------------------------------------------------------------------------------------------------
+%%% 预训练
+\begin{frame}{预训练}
+\begin{itemize}
+\item 语言模型可以使用大量无标注数据进行训练
+\end{itemize}
 \end{frame}

 %%%------------------------------------------------------------------------------------------------------------