a complete version

ce425042 · xiaotong · b8e95e14 · ce425042 · ce425042
Commit ce425042 authored Oct 29, 2019 by xiaotong
--- a/Section05-Neural-Networks-and-Language-Modeling/section05-test.tex
+++ b/Section05-Neural-Networks-and-Language-Modeling/section05-test.tex
@@ -124,7 +124,7 @@
 %%% 预训练带来的新思路
 \begin{frame}{预训练带来的新思路}
 \begin{itemize}
-\item 预训练模型刷榜各种任务的同时，引发了一些思考：\\ 
+\item 预训练模型刷榜各种任务的同时，引发了一些思考：\\
      预训练究竟给我们带来了什么？
    \begin{itemize}
    \item 有标注数据量有限，预训练提供使用超大规模数据的方法
@@ -168,14 +168,115 @@
 }

 \begin{itemize}
-\item 机器翻译中的预训练
+\item<5-> 机器翻译中的预训练
+    \begin{itemize}
+    \item 机器翻译中预训练还没有屠榜，一方面由于很多机器翻译任务训练数据量并不小，另一方面也反应出翻译的双语建模对预训练也提出了新的要求
+    \end{itemize}
 \end{itemize}

 \end{frame}

 %%%------------------------------------------------------------------------------------------------------------
+%%% 总结
+\begin{frame}{总结 - 长出一口气}
+\begin{itemize}
+\item 讲了很多，累呀累，再整理一下主要观点
+    \begin{itemize}
+    \item 神经网络没有那么复杂，入门不能
+    \item 简单的网络结构可以组合成强大的模型
+    \item 语言模型可以用神经网络实现，效果很好，最近出现的预训练等范式证明了神经语言模型的潜力
+    \end{itemize}
+\item<2-> 仍然有很多问题需要讨论
+    \begin{itemize}
+    \item 常见的神经网络结构(面向NLP)\\
+          google一下LSTM、GRU、CNN
+    \item 深层模型和训练方法。深度学习如何体现``深''?\\
+          深层网络可以带来什么？\\
+          如何有效的训练深层模型？
+    \item 如何把神经网络用于包括机器翻译在内的其它NLP任务？\\
+          比如encoder-decoder框架
+    \item 深度学习的实践技巧\\
+          ``炼金术''了解下，因为不同任务调参和模型设计都有技巧\\
+          ...
+    \end{itemize}
+\end{itemize}
+\end{frame}
+
+%%%------------------------------------------------------------------------------------------------------------
+%%% last slide
+\begin{frame}{又结束一章内容~}
+
+\vspace{2em}
+
+\begin{center}
+\textbf{内容很多，开个了个头}\\
+\textbf{学习深度学习技术需要实践和经验的积累！}
+
+\vspace{2em}
+
+\begin{tikzpicture}
+
+\tikzstyle{rnnnode} = [draw,inner sep=5pt,minimum width=4em,minimum height=1.5em,fill=green!30!white,blur shadow={shadow xshift=1pt,shadow yshift=-1pt}]
+\node [anchor=west,rnnnode] (node11) at (0,0) {\tiny{RNN Cell}};
+\node [anchor=west,rnnnode] (node12) at ([xshift=2em]node11.east) {\tiny{RNN Cell}};
+\node [anchor=west,rnnnode] (node13) at ([xshift=2em]node12.east) {\tiny{RNN Cell}};
+\node [anchor=west,rnnnode] (node14) at ([xshift=2em]node13.east) {\tiny{RNN Cell}};
+
+\node [anchor=north,rnnnode,fill=red!30!white] (e1) at ([yshift=-1.2em]node11.south) {\tiny{embedding}};
+\node [anchor=north,rnnnode,fill=red!30!white] (e2) at ([yshift=-1.2em]node12.south) {\tiny{embedding}};
+\node [anchor=north,rnnnode,fill=red!30!white] (e3) at ([yshift=-1.2em]node13.south) {\tiny{embedding}};
+\node [anchor=north,rnnnode,fill=red!30!white] (e4) at ([yshift=-1.2em]node14.south) {\tiny{embedding}};
+\node [anchor=north] (w1) at ([yshift=-1em]e1.south) {\footnotesize{$<$s$>$}};
+\node [anchor=north] (w2) at ([yshift=-1em]e2.south) {\footnotesize{谢谢}};
+\node [anchor=north] (w3) at ([yshift=-1em]e3.south) {\footnotesize{大家}};
+\node [anchor=north] (w4) at ([yshift=-1em]e4.south) {\footnotesize{聆听}};
+
+\draw [->,thick] ([yshift=0.1em]w1.north)--([yshift=-0.1em]e1.south);
+\draw [->,thick] ([yshift=0.1em]w2.north)--([yshift=-0.1em]e2.south);
+\draw [->,thick] ([yshift=0.1em]w3.north)--([yshift=-0.1em]e3.south);
+\draw [->,thick] ([yshift=0.1em]w4.north)--([yshift=-0.1em]e4.south);
+
+\draw [->,thick] ([yshift=0.1em]e1.north)--([yshift=-0.1em]node11.south);
+\draw [->,thick] ([yshift=0.1em]e2.north)--([yshift=-0.1em]node12.south);
+\draw [->,thick] ([yshift=0.1em]e3.north)--([yshift=-0.1em]node13.south);
+\draw [->,thick] ([yshift=0.1em]e4.north)--([yshift=-0.1em]node14.south);
+
+\node [anchor=south,rnnnode,fill=red!30!white] (node21) at ([yshift=1.0em]node11.north) {\tiny{Softmax($\cdot$)}};
+\node [anchor=south,rnnnode,fill=red!30!white] (node22) at ([yshift=1.0em]node12.north) {\tiny{Softmax($\cdot$)}};
+\node [anchor=south,rnnnode,fill=red!30!white] (node23) at ([yshift=1.0em]node13.north) {\tiny{Softmax($\cdot$)}};
+\node [anchor=south,rnnnode,fill=red!30!white] (node24) at ([yshift=1.0em]node14.north) {\tiny{Softmax($\cdot$)}};
+
+\node [anchor=south] (output1) at ([yshift=1em]node21.north) {\Large{\textbf{谢谢}}};
+\node [anchor=south] (output2) at ([yshift=1em]node22.north) {\Large{\textbf{大家}}};
+\node [anchor=south] (output3) at ([yshift=1em]node23.north) {\Large{\textbf{聆听}}};
+\node [anchor=south] (output4) at ([yshift=1em]node24.north) {\Large{\textbf{$<$/s$>$}}};
+
+\draw [->,thick] ([yshift=0.1em]node21.north)--([yshift=-0.1em]output1.south);
+\draw [->,thick] ([yshift=0.1em]node22.north)--([yshift=-0.1em]output2.south);
+\draw [->,thick] ([yshift=0.1em]node23.north)--([yshift=-0.1em]output3.south);
+\draw [->,thick] ([yshift=0.1em]node24.north)--([yshift=-0.1em]output4.south);
+
+\draw [->,thick] ([yshift=0.1em]node11.north)--([yshift=-0.1em]node21.south);
+\draw [->,thick] ([yshift=0.1em]node12.north)--([yshift=-0.1em]node22.south);
+\draw [->,thick] ([yshift=0.1em]node13.north)--([yshift=-0.1em]node23.south);
+\draw [->,thick] ([yshift=0.1em]node14.north)--([yshift=-0.1em]node24.south);
+
+\draw [->,thick] ([xshift=-1em]node11.west)--([xshift=-0.1em]node11.west);
+\draw [->,thick] ([xshift=0.1em]node11.east)--([xshift=-0.1em]node12.west);
+\draw [->,thick] ([xshift=0.1em]node12.east)--([xshift=-0.1em]node13.west);
+\draw [->,thick] ([xshift=0.1em]node13.east)--([xshift=-0.1em]node14.west);
+\draw [->,thick] ([xshift=0.1em]node14.east)--([xshift=1em]node14.east);
+
+\end{tikzpicture}
+
+\end{center}
+
+\end{frame}
+
+%%%------------------------------------------------------------------------------------------------------------
 %%% 深度学习带来的问题及思考 - 并不是无所不能


 \end{CJK}
 \end{document}
+
--- a/Section05-Neural-Networks-and-Language-Modeling/section05.tex
+++ b/Section05-Neural-Networks-and-Language-Modeling/section05.tex
@@ -24,6 +24,7 @@
 \usepackage{tcolorbox}
 \tcbuselibrary{skins}

+\usetikzlibrary{calc,intersections}
 \usetikzlibrary{matrix}
 \usetikzlibrary{arrows,decorations.pathreplacing}
 \usetikzlibrary{shadows} % LATEX and plain TEX when using Tik Z
@@ -3855,7 +3856,7 @@ NLP问题的\alert{隐含结构}假设 & 无隐含结构假设，\alert{端到



-\item<3-> 最具代表性的方法是前馈神经网络(FFN)语言模型
+\item<3-> 最具代表性的方法是前馈神经网络(FNN)语言模型
    \begin{itemize}
    \item 经典中的经典，对现代神经语言模型的设计产生深远影响
    \end{itemize}
@@ -3973,7 +3974,7 @@ NLP问题的\alert{隐含结构}假设 & 无隐含结构假设，\alert{端到

 %%%------------------------------------------------------------------------------------------------------------
 %%% FNNLM implementation
-\begin{frame}{前馈神经网络语言模型(FFN LM)的实现}
+\begin{frame}{前馈神经网络语言模型(FNN LM)的实现}

 \begin{itemize}
 \item 实现非常简单，几行代码
@@ -4061,7 +4062,7 @@ NLP问题的\alert{隐含结构}假设 & 无隐含结构假设，\alert{端到
    激活函数如何选择？\\
    ...
    \end{enumerate}
-\item<2-> 从FFN LM得到的启发
+\item<2-> 从FNN LM得到的启发
    \begin{itemize}
    \item 重新定义词是什么 - 非词典里的一项，而是一个实数向量
    \item 多层神经网络可以很好的表示单词之间的(短距离)依赖
@@ -4457,7 +4458,7 @@ $\textbf{V}, \textbf{U}, \textbf{W}$: 参数
 \node [anchor=south,draw,inner sep=3pt] (h0) at ([xshift=-0.5em, yshift=1.5em]e0.north) {\tiny{$h_{0}=\textrm{SelfAtt}(e_0,e_3)$}};
 \node [anchor=south,draw,inner sep=3pt] (h1) at ([xshift=0.5em, yshift=1.5em]e1.north) {\tiny{$h_{1}=\textrm{SelfAtt}(e_1,e_3)$}};
 \node [anchor=south,draw,inner sep=3pt] (h2) at ([xshift=1.5em, yshift=1.5em]e2.north) {\tiny{$h_{2}=\textrm{SelfAtt}(e_2,e_3)$}};
-\node [anchor=south,draw,minimum width=9em,inner sep=3pt] (f1) at ([xshift=0.5em, yshift=1.5em]h2.north) {\tiny{$f_3=\textrm{FFN}([h_0,h_1,h_2,e_3])$}};
+\node [anchor=south,draw,minimum width=9em,inner sep=3pt] (f1) at ([xshift=0.5em, yshift=1.5em]h2.north) {\tiny{$f_3=\textrm{FNN}([h_0,h_1,h_2,e_3])$}};
 \node [anchor=south,draw,minimum width=9em,inner sep=3pt] (o1) at ([yshift=1em]f1.north) {\tiny{$y=\textrm{Softmax}(f_3 \textbf{U})$}};
 \node [anchor=south] (ylabel) at ([yshift=1em]o1.north) {\footnotesize{$\textrm{P}(w_4|w_{0}w_{1}w_{2}w_{3})$}};

@@ -4507,7 +4508,7 @@ $\textbf{V}, \textbf{U}, \textbf{W}$: 参数
 }

 \visible<4->{
-\node [anchor=south,draw,minimum width=9em,inner sep=3pt,fill=orange!20!white] (f1) at ([xshift=0.5em, yshift=1.5em]h2.north) {\tiny{$f_3=\textrm{FFN}([h_0,h_1,h_2,e_3])$}};
+\node [anchor=south,draw,minimum width=9em,inner sep=3pt,fill=orange!20!white] (f1) at ([xshift=0.5em, yshift=1.5em]h2.north) {\tiny{$f_3=\textrm{FNN}([h_0,h_1,h_2,e_3])$}};
 \node [anchor=south,draw,minimum width=9em,inner sep=3pt,fill=orange!20!white] (o1) at ([yshift=1em]f1.north) {\tiny{$y=\textrm{Softmax}(f_3 \textbf{U})$}};
 }

@@ -5279,5 +5280,158 @@ GPT-2 (Transformer) & Radford et al. & 2019 & 35.7

 \end{frame}

+%%%------------------------------------------------------------------------------------------------------------
+%%% 预训练带来的新思路
+\begin{frame}{预训练带来的新思路}
+\begin{itemize}
+\item 预训练模型刷榜各种任务的同时，引发了一些思考：\\
+      预训练究竟给我们带来了什么？
+    \begin{itemize}
+    \item 有标注数据量有限，预训练提供使用超大规模数据的方法
+    \item 从大规模无标注数据中学习通用知识，提升泛化能力
+    \item 神经网络复杂且不容易训练，预训练可以使模型关注优质解的高密度区域
+    \end{itemize}
+\end{itemize}
+
+\visible<2->{
+\begin{center}
+\begin{tikzpicture}
+\draw[name path=ellipse,thick] (0,0) circle[x radius = 2, y radius = 1];
+\node[rectangle,minimum size=0.1em,inner sep=2pt,fill=red] (p1) at (0.2,0.5) {};
+\node[rectangle,minimum size=0.1em,inner sep=2pt,fill=red] (p2) at (0.3,0.6) {};
+\node[rectangle,minimum size=0.1em,inner sep=2pt,fill=red] (p3) at (0.1,-0.1) {};
+\node[rectangle,minimum size=0.1em,inner sep=2pt,fill=red] (p4) at (0.4,0) {};
+\node[rectangle,minimum size=0.1em,inner sep=2pt,fill=red] (p5) at (0.5,0.3) {};
+\node[rectangle,minimum size=0.1em,inner sep=2pt,fill=red] (p6) at (0.6,0.1) {};
+\node[rectangle,minimum size=0.1em,inner sep=2pt,fill=red] (p7) at (0.7,-0.1) {};
+\node[rectangle,minimum size=0.1em,inner sep=2pt,fill=red] (p8) at (-1.2,0.4) {};
+\node[rectangle,minimum size=0.1em,inner sep=2pt,fill=red] (p9) at (-1.0,-0.3) {};
+\node[rectangle,minimum size=0.1em,inner sep=2pt,fill=red] (p10) at (-0.1,-0.8) {};
+
+\begin{pgfonlayer}{background}
+\visible<4->{
+\node [rectangle,inner sep=0.4em,draw,blue] [fit = (p1) (p2) (p3) (p4) (p5) (p6)] (area) {};
+}
+\end{pgfonlayer}
+
+\draw [->] (2.5,-0.7) -- (1.8,-0.5) node [pos=0,right] {\scriptsize{模型参数解空间}};
+
+\visible<4->{
+\draw [->] (2.0,0.7) -- (area.20) node [pos=0,right] {\scriptsize{优质解高密度区域(预训练)}};
+}
+\visible<3->{
+\draw [->] (-2.0,0.7) -- (p8.west) node [pos=0,left] {\scriptsize{游离的解}};
+}
+
+\end{tikzpicture}
+\end{center}
+}
+
+\begin{itemize}
+\item<5-> 机器翻译中的预训练
+    \begin{itemize}
+    \item 机器翻译中预训练还没有屠榜，一方面由于很多机器翻译任务训练数据量并不小，另一方面也反应出翻译的双语建模对预训练也提出了新的要求
+    \end{itemize}
+\end{itemize}
+
+\end{frame}
+
+%%%------------------------------------------------------------------------------------------------------------
+%%% 总结
+\begin{frame}{总结 - 长出一口气}
+\begin{itemize}
+\item 讲了很多，累呀累，再整理一下主要观点
+    \begin{itemize}
+    \item 神经网络没有那么复杂，入门不能
+    \item 简单的网络结构可以组合成强大的模型
+    \item 语言模型可以用神经网络实现，效果很好，最近出现的预训练等范式证明了神经语言模型的潜力
+    \end{itemize}
+\item<2-> 仍然有很多问题需要讨论
+    \begin{itemize}
+    \item 常见的神经网络结构(面向NLP)\\
+          google一下LSTM、GRU、CNN
+    \item 深层模型和训练方法。深度学习如何体现``深''?\\
+          深层网络可以带来什么？\\
+          如何有效的训练深层模型？
+    \item 如何把神经网络用于包括机器翻译在内的其它NLP任务？\\
+          比如encoder-decoder框架
+    \item 深度学习的实践技巧\\
+          ``炼金术''了解下，因为不同任务调参和模型设计都有技巧\\
+          ...
+    \end{itemize}
+\end{itemize}
+\end{frame}
+
+%%%------------------------------------------------------------------------------------------------------------
+%%% last slide
+\begin{frame}{又结束一章内容~}
+
+\vspace{2em}
+
+\begin{center}
+\textbf{内容很多，开个了个头}\\
+\textbf{学习深度学习技术需要实践和经验的积累！}
+
+\vspace{2em}
+
+\begin{tikzpicture}
+
+\tikzstyle{rnnnode} = [draw,inner sep=5pt,minimum width=4em,minimum height=1.5em,fill=green!30!white,blur shadow={shadow xshift=1pt,shadow yshift=-1pt}]
+\node [anchor=west,rnnnode] (node11) at (0,0) {\tiny{RNN Cell}};
+\node [anchor=west,rnnnode] (node12) at ([xshift=2em]node11.east) {\tiny{RNN Cell}};
+\node [anchor=west,rnnnode] (node13) at ([xshift=2em]node12.east) {\tiny{RNN Cell}};
+\node [anchor=west,rnnnode] (node14) at ([xshift=2em]node13.east) {\tiny{RNN Cell}};
+
+\node [anchor=north,rnnnode,fill=red!30!white] (e1) at ([yshift=-1.2em]node11.south) {\tiny{embedding}};
+\node [anchor=north,rnnnode,fill=red!30!white] (e2) at ([yshift=-1.2em]node12.south) {\tiny{embedding}};
+\node [anchor=north,rnnnode,fill=red!30!white] (e3) at ([yshift=-1.2em]node13.south) {\tiny{embedding}};
+\node [anchor=north,rnnnode,fill=red!30!white] (e4) at ([yshift=-1.2em]node14.south) {\tiny{embedding}};
+\node [anchor=north] (w1) at ([yshift=-1em]e1.south) {\footnotesize{$<$s$>$}};
+\node [anchor=north] (w2) at ([yshift=-1em]e2.south) {\footnotesize{谢谢}};
+\node [anchor=north] (w3) at ([yshift=-1em]e3.south) {\footnotesize{大家}};
+\node [anchor=north] (w4) at ([yshift=-1em]e4.south) {\footnotesize{聆听}};
+
+\draw [->,thick] ([yshift=0.1em]w1.north)--([yshift=-0.1em]e1.south);
+\draw [->,thick] ([yshift=0.1em]w2.north)--([yshift=-0.1em]e2.south);
+\draw [->,thick] ([yshift=0.1em]w3.north)--([yshift=-0.1em]e3.south);
+\draw [->,thick] ([yshift=0.1em]w4.north)--([yshift=-0.1em]e4.south);
+
+\draw [->,thick] ([yshift=0.1em]e1.north)--([yshift=-0.1em]node11.south);
+\draw [->,thick] ([yshift=0.1em]e2.north)--([yshift=-0.1em]node12.south);
+\draw [->,thick] ([yshift=0.1em]e3.north)--([yshift=-0.1em]node13.south);
+\draw [->,thick] ([yshift=0.1em]e4.north)--([yshift=-0.1em]node14.south);
+
+\node [anchor=south,rnnnode,fill=red!30!white] (node21) at ([yshift=1.0em]node11.north) {\tiny{Softmax($\cdot$)}};
+\node [anchor=south,rnnnode,fill=red!30!white] (node22) at ([yshift=1.0em]node12.north) {\tiny{Softmax($\cdot$)}};
+\node [anchor=south,rnnnode,fill=red!30!white] (node23) at ([yshift=1.0em]node13.north) {\tiny{Softmax($\cdot$)}};
+\node [anchor=south,rnnnode,fill=red!30!white] (node24) at ([yshift=1.0em]node14.north) {\tiny{Softmax($\cdot$)}};
+
+\node [anchor=south] (output1) at ([yshift=1em]node21.north) {\Large{\textbf{谢谢}}};
+\node [anchor=south] (output2) at ([yshift=1em]node22.north) {\Large{\textbf{大家}}};
+\node [anchor=south] (output3) at ([yshift=1em]node23.north) {\Large{\textbf{聆听}}};
+\node [anchor=south] (output4) at ([yshift=1em]node24.north) {\Large{\textbf{$<$/s$>$}}};
+
+\draw [->,thick] ([yshift=0.1em]node21.north)--([yshift=-0.1em]output1.south);
+\draw [->,thick] ([yshift=0.1em]node22.north)--([yshift=-0.1em]output2.south);
+\draw [->,thick] ([yshift=0.1em]node23.north)--([yshift=-0.1em]output3.south);
+\draw [->,thick] ([yshift=0.1em]node24.north)--([yshift=-0.1em]output4.south);
+
+\draw [->,thick] ([yshift=0.1em]node11.north)--([yshift=-0.1em]node21.south);
+\draw [->,thick] ([yshift=0.1em]node12.north)--([yshift=-0.1em]node22.south);
+\draw [->,thick] ([yshift=0.1em]node13.north)--([yshift=-0.1em]node23.south);
+\draw [->,thick] ([yshift=0.1em]node14.north)--([yshift=-0.1em]node24.south);
+
+\draw [->,thick] ([xshift=-1em]node11.west)--([xshift=-0.1em]node11.west);
+\draw [->,thick] ([xshift=0.1em]node11.east)--([xshift=-0.1em]node12.west);
+\draw [->,thick] ([xshift=0.1em]node12.east)--([xshift=-0.1em]node13.west);
+\draw [->,thick] ([xshift=0.1em]node13.east)--([xshift=-0.1em]node14.west);
+\draw [->,thick] ([xshift=0.1em]node14.east)--([xshift=1em]node14.east);
+
+\end{tikzpicture}
+
+\end{center}
+
+\end{frame}
+
 \end{CJK}
 \end{document}