Commit ce425042 by xiaotong

a complete version

parent b8e95e14
......@@ -124,7 +124,7 @@
%%% 预训练带来的新思路
\begin{frame}{预训练带来的新思路}
\begin{itemize}
\item 预训练模型刷榜各种任务的同时,引发了一些思考:\\
\item 预训练模型刷榜各种任务的同时,引发了一些思考:\\
预训练究竟给我们带来了什么?
\begin{itemize}
\item 有标注数据量有限,预训练提供使用超大规模数据的方法
......@@ -168,14 +168,115 @@
}
\begin{itemize}
\item 机器翻译中的预训练
\item<5-> 机器翻译中的预训练
\begin{itemize}
\item 机器翻译中预训练还没有屠榜,一方面由于很多机器翻译任务训练数据量并不小,另一方面也反应出翻译的双语建模对预训练也提出了新的要求
\end{itemize}
\end{itemize}
\end{frame}
%%%------------------------------------------------------------------------------------------------------------
%%% 总结
\begin{frame}{总结 - 长出一口气}
\begin{itemize}
\item 讲了很多,累呀累,再整理一下主要观点
\begin{itemize}
\item 神经网络没有那么复杂,入门不能
\item 简单的网络结构可以组合成强大的模型
\item 语言模型可以用神经网络实现,效果很好,最近出现的预训练等范式证明了神经语言模型的潜力
\end{itemize}
\item<2-> 仍然有很多问题需要讨论
\begin{itemize}
\item 常见的神经网络结构(面向NLP)\\
google一下LSTM、GRU、CNN
\item 深层模型和训练方法。深度学习如何体现``深''?\\
深层网络可以带来什么?\\
如何有效的训练深层模型?
\item 如何把神经网络用于包括机器翻译在内的其它NLP任务?\\
比如encoder-decoder框架
\item 深度学习的实践技巧\\
``炼金术''了解下,因为不同任务调参和模型设计都有技巧\\
...
\end{itemize}
\end{itemize}
\end{frame}
%%%------------------------------------------------------------------------------------------------------------
%%% last slide
\begin{frame}{又结束一章内容~}
\vspace{2em}
\begin{center}
\textbf{内容很多,开个了个头}\\
\textbf{学习深度学习技术需要实践和经验的积累!}
\vspace{2em}
\begin{tikzpicture}
\tikzstyle{rnnnode} = [draw,inner sep=5pt,minimum width=4em,minimum height=1.5em,fill=green!30!white,blur shadow={shadow xshift=1pt,shadow yshift=-1pt}]
\node [anchor=west,rnnnode] (node11) at (0,0) {\tiny{RNN Cell}};
\node [anchor=west,rnnnode] (node12) at ([xshift=2em]node11.east) {\tiny{RNN Cell}};
\node [anchor=west,rnnnode] (node13) at ([xshift=2em]node12.east) {\tiny{RNN Cell}};
\node [anchor=west,rnnnode] (node14) at ([xshift=2em]node13.east) {\tiny{RNN Cell}};
\node [anchor=north,rnnnode,fill=red!30!white] (e1) at ([yshift=-1.2em]node11.south) {\tiny{embedding}};
\node [anchor=north,rnnnode,fill=red!30!white] (e2) at ([yshift=-1.2em]node12.south) {\tiny{embedding}};
\node [anchor=north,rnnnode,fill=red!30!white] (e3) at ([yshift=-1.2em]node13.south) {\tiny{embedding}};
\node [anchor=north,rnnnode,fill=red!30!white] (e4) at ([yshift=-1.2em]node14.south) {\tiny{embedding}};
\node [anchor=north] (w1) at ([yshift=-1em]e1.south) {\footnotesize{$<$s$>$}};
\node [anchor=north] (w2) at ([yshift=-1em]e2.south) {\footnotesize{谢谢}};
\node [anchor=north] (w3) at ([yshift=-1em]e3.south) {\footnotesize{大家}};
\node [anchor=north] (w4) at ([yshift=-1em]e4.south) {\footnotesize{聆听}};
\draw [->,thick] ([yshift=0.1em]w1.north)--([yshift=-0.1em]e1.south);
\draw [->,thick] ([yshift=0.1em]w2.north)--([yshift=-0.1em]e2.south);
\draw [->,thick] ([yshift=0.1em]w3.north)--([yshift=-0.1em]e3.south);
\draw [->,thick] ([yshift=0.1em]w4.north)--([yshift=-0.1em]e4.south);
\draw [->,thick] ([yshift=0.1em]e1.north)--([yshift=-0.1em]node11.south);
\draw [->,thick] ([yshift=0.1em]e2.north)--([yshift=-0.1em]node12.south);
\draw [->,thick] ([yshift=0.1em]e3.north)--([yshift=-0.1em]node13.south);
\draw [->,thick] ([yshift=0.1em]e4.north)--([yshift=-0.1em]node14.south);
\node [anchor=south,rnnnode,fill=red!30!white] (node21) at ([yshift=1.0em]node11.north) {\tiny{Softmax($\cdot$)}};
\node [anchor=south,rnnnode,fill=red!30!white] (node22) at ([yshift=1.0em]node12.north) {\tiny{Softmax($\cdot$)}};
\node [anchor=south,rnnnode,fill=red!30!white] (node23) at ([yshift=1.0em]node13.north) {\tiny{Softmax($\cdot$)}};
\node [anchor=south,rnnnode,fill=red!30!white] (node24) at ([yshift=1.0em]node14.north) {\tiny{Softmax($\cdot$)}};
\node [anchor=south] (output1) at ([yshift=1em]node21.north) {\Large{\textbf{谢谢}}};
\node [anchor=south] (output2) at ([yshift=1em]node22.north) {\Large{\textbf{大家}}};
\node [anchor=south] (output3) at ([yshift=1em]node23.north) {\Large{\textbf{聆听}}};
\node [anchor=south] (output4) at ([yshift=1em]node24.north) {\Large{\textbf{$<$/s$>$}}};
\draw [->,thick] ([yshift=0.1em]node21.north)--([yshift=-0.1em]output1.south);
\draw [->,thick] ([yshift=0.1em]node22.north)--([yshift=-0.1em]output2.south);
\draw [->,thick] ([yshift=0.1em]node23.north)--([yshift=-0.1em]output3.south);
\draw [->,thick] ([yshift=0.1em]node24.north)--([yshift=-0.1em]output4.south);
\draw [->,thick] ([yshift=0.1em]node11.north)--([yshift=-0.1em]node21.south);
\draw [->,thick] ([yshift=0.1em]node12.north)--([yshift=-0.1em]node22.south);
\draw [->,thick] ([yshift=0.1em]node13.north)--([yshift=-0.1em]node23.south);
\draw [->,thick] ([yshift=0.1em]node14.north)--([yshift=-0.1em]node24.south);
\draw [->,thick] ([xshift=-1em]node11.west)--([xshift=-0.1em]node11.west);
\draw [->,thick] ([xshift=0.1em]node11.east)--([xshift=-0.1em]node12.west);
\draw [->,thick] ([xshift=0.1em]node12.east)--([xshift=-0.1em]node13.west);
\draw [->,thick] ([xshift=0.1em]node13.east)--([xshift=-0.1em]node14.west);
\draw [->,thick] ([xshift=0.1em]node14.east)--([xshift=1em]node14.east);
\end{tikzpicture}
\end{center}
\end{frame}
%%%------------------------------------------------------------------------------------------------------------
%%% 深度学习带来的问题及思考 - 并不是无所不能
\end{CJK}
\end{document}
......@@ -24,6 +24,7 @@
\usepackage{tcolorbox}
\tcbuselibrary{skins}
\usetikzlibrary{calc,intersections}
\usetikzlibrary{matrix}
\usetikzlibrary{arrows,decorations.pathreplacing}
\usetikzlibrary{shadows} % LATEX and plain TEX when using Tik Z
......@@ -3855,7 +3856,7 @@ NLP问题的\alert{隐含结构}假设 & 无隐含结构假设,\alert{端到
\item<3-> 最具代表性的方法是前馈神经网络(FFN)语言模型
\item<3-> 最具代表性的方法是前馈神经网络(FNN)语言模型
\begin{itemize}
\item 经典中的经典,对现代神经语言模型的设计产生深远影响
\end{itemize}
......@@ -3973,7 +3974,7 @@ NLP问题的\alert{隐含结构}假设 & 无隐含结构假设,\alert{端到
%%%------------------------------------------------------------------------------------------------------------
%%% FNNLM implementation
\begin{frame}{前馈神经网络语言模型(FFN LM)的实现}
\begin{frame}{前馈神经网络语言模型(FNN LM)的实现}
\begin{itemize}
\item 实现非常简单,几行代码
......@@ -4061,7 +4062,7 @@ NLP问题的\alert{隐含结构}假设 & 无隐含结构假设,\alert{端到
激活函数如何选择?\\
...
\end{enumerate}
\item<2-> 从FFN LM得到的启发
\item<2-> 从FNN LM得到的启发
\begin{itemize}
\item 重新定义词是什么 - 非词典里的一项,而是一个实数向量
\item 多层神经网络可以很好的表示单词之间的(短距离)依赖
......@@ -4457,7 +4458,7 @@ $\textbf{V}, \textbf{U}, \textbf{W}$: 参数
\node [anchor=south,draw,inner sep=3pt] (h0) at ([xshift=-0.5em, yshift=1.5em]e0.north) {\tiny{$h_{0}=\textrm{SelfAtt}(e_0,e_3)$}};
\node [anchor=south,draw,inner sep=3pt] (h1) at ([xshift=0.5em, yshift=1.5em]e1.north) {\tiny{$h_{1}=\textrm{SelfAtt}(e_1,e_3)$}};
\node [anchor=south,draw,inner sep=3pt] (h2) at ([xshift=1.5em, yshift=1.5em]e2.north) {\tiny{$h_{2}=\textrm{SelfAtt}(e_2,e_3)$}};
\node [anchor=south,draw,minimum width=9em,inner sep=3pt] (f1) at ([xshift=0.5em, yshift=1.5em]h2.north) {\tiny{$f_3=\textrm{FFN}([h_0,h_1,h_2,e_3])$}};
\node [anchor=south,draw,minimum width=9em,inner sep=3pt] (f1) at ([xshift=0.5em, yshift=1.5em]h2.north) {\tiny{$f_3=\textrm{FNN}([h_0,h_1,h_2,e_3])$}};
\node [anchor=south,draw,minimum width=9em,inner sep=3pt] (o1) at ([yshift=1em]f1.north) {\tiny{$y=\textrm{Softmax}(f_3 \textbf{U})$}};
\node [anchor=south] (ylabel) at ([yshift=1em]o1.north) {\footnotesize{$\textrm{P}(w_4|w_{0}w_{1}w_{2}w_{3})$}};
......@@ -4507,7 +4508,7 @@ $\textbf{V}, \textbf{U}, \textbf{W}$: 参数
}
\visible<4->{
\node [anchor=south,draw,minimum width=9em,inner sep=3pt,fill=orange!20!white] (f1) at ([xshift=0.5em, yshift=1.5em]h2.north) {\tiny{$f_3=\textrm{FFN}([h_0,h_1,h_2,e_3])$}};
\node [anchor=south,draw,minimum width=9em,inner sep=3pt,fill=orange!20!white] (f1) at ([xshift=0.5em, yshift=1.5em]h2.north) {\tiny{$f_3=\textrm{FNN}([h_0,h_1,h_2,e_3])$}};
\node [anchor=south,draw,minimum width=9em,inner sep=3pt,fill=orange!20!white] (o1) at ([yshift=1em]f1.north) {\tiny{$y=\textrm{Softmax}(f_3 \textbf{U})$}};
}
......@@ -5279,5 +5280,158 @@ GPT-2 (Transformer) & Radford et al. & 2019 & 35.7
\end{frame}
%%%------------------------------------------------------------------------------------------------------------
%%% 预训练带来的新思路
\begin{frame}{预训练带来的新思路}
\begin{itemize}
\item 预训练模型刷榜各种任务的同时,引发了一些思考:\\
预训练究竟给我们带来了什么?
\begin{itemize}
\item 有标注数据量有限,预训练提供使用超大规模数据的方法
\item 从大规模无标注数据中学习通用知识,提升泛化能力
\item 神经网络复杂且不容易训练,预训练可以使模型关注优质解的高密度区域
\end{itemize}
\end{itemize}
\visible<2->{
\begin{center}
\begin{tikzpicture}
\draw[name path=ellipse,thick] (0,0) circle[x radius = 2, y radius = 1];
\node[rectangle,minimum size=0.1em,inner sep=2pt,fill=red] (p1) at (0.2,0.5) {};
\node[rectangle,minimum size=0.1em,inner sep=2pt,fill=red] (p2) at (0.3,0.6) {};
\node[rectangle,minimum size=0.1em,inner sep=2pt,fill=red] (p3) at (0.1,-0.1) {};
\node[rectangle,minimum size=0.1em,inner sep=2pt,fill=red] (p4) at (0.4,0) {};
\node[rectangle,minimum size=0.1em,inner sep=2pt,fill=red] (p5) at (0.5,0.3) {};
\node[rectangle,minimum size=0.1em,inner sep=2pt,fill=red] (p6) at (0.6,0.1) {};
\node[rectangle,minimum size=0.1em,inner sep=2pt,fill=red] (p7) at (0.7,-0.1) {};
\node[rectangle,minimum size=0.1em,inner sep=2pt,fill=red] (p8) at (-1.2,0.4) {};
\node[rectangle,minimum size=0.1em,inner sep=2pt,fill=red] (p9) at (-1.0,-0.3) {};
\node[rectangle,minimum size=0.1em,inner sep=2pt,fill=red] (p10) at (-0.1,-0.8) {};
\begin{pgfonlayer}{background}
\visible<4->{
\node [rectangle,inner sep=0.4em,draw,blue] [fit = (p1) (p2) (p3) (p4) (p5) (p6)] (area) {};
}
\end{pgfonlayer}
\draw [->] (2.5,-0.7) -- (1.8,-0.5) node [pos=0,right] {\scriptsize{模型参数解空间}};
\visible<4->{
\draw [->] (2.0,0.7) -- (area.20) node [pos=0,right] {\scriptsize{优质解高密度区域(预训练)}};
}
\visible<3->{
\draw [->] (-2.0,0.7) -- (p8.west) node [pos=0,left] {\scriptsize{游离的解}};
}
\end{tikzpicture}
\end{center}
}
\begin{itemize}
\item<5-> 机器翻译中的预训练
\begin{itemize}
\item 机器翻译中预训练还没有屠榜,一方面由于很多机器翻译任务训练数据量并不小,另一方面也反应出翻译的双语建模对预训练也提出了新的要求
\end{itemize}
\end{itemize}
\end{frame}
%%%------------------------------------------------------------------------------------------------------------
%%% 总结
\begin{frame}{总结 - 长出一口气}
\begin{itemize}
\item 讲了很多,累呀累,再整理一下主要观点
\begin{itemize}
\item 神经网络没有那么复杂,入门不能
\item 简单的网络结构可以组合成强大的模型
\item 语言模型可以用神经网络实现,效果很好,最近出现的预训练等范式证明了神经语言模型的潜力
\end{itemize}
\item<2-> 仍然有很多问题需要讨论
\begin{itemize}
\item 常见的神经网络结构(面向NLP)\\
google一下LSTM、GRU、CNN
\item 深层模型和训练方法。深度学习如何体现``深''?\\
深层网络可以带来什么?\\
如何有效的训练深层模型?
\item 如何把神经网络用于包括机器翻译在内的其它NLP任务?\\
比如encoder-decoder框架
\item 深度学习的实践技巧\\
``炼金术''了解下,因为不同任务调参和模型设计都有技巧\\
...
\end{itemize}
\end{itemize}
\end{frame}
%%%------------------------------------------------------------------------------------------------------------
%%% last slide
\begin{frame}{又结束一章内容~}
\vspace{2em}
\begin{center}
\textbf{内容很多,开个了个头}\\
\textbf{学习深度学习技术需要实践和经验的积累!}
\vspace{2em}
\begin{tikzpicture}
\tikzstyle{rnnnode} = [draw,inner sep=5pt,minimum width=4em,minimum height=1.5em,fill=green!30!white,blur shadow={shadow xshift=1pt,shadow yshift=-1pt}]
\node [anchor=west,rnnnode] (node11) at (0,0) {\tiny{RNN Cell}};
\node [anchor=west,rnnnode] (node12) at ([xshift=2em]node11.east) {\tiny{RNN Cell}};
\node [anchor=west,rnnnode] (node13) at ([xshift=2em]node12.east) {\tiny{RNN Cell}};
\node [anchor=west,rnnnode] (node14) at ([xshift=2em]node13.east) {\tiny{RNN Cell}};
\node [anchor=north,rnnnode,fill=red!30!white] (e1) at ([yshift=-1.2em]node11.south) {\tiny{embedding}};
\node [anchor=north,rnnnode,fill=red!30!white] (e2) at ([yshift=-1.2em]node12.south) {\tiny{embedding}};
\node [anchor=north,rnnnode,fill=red!30!white] (e3) at ([yshift=-1.2em]node13.south) {\tiny{embedding}};
\node [anchor=north,rnnnode,fill=red!30!white] (e4) at ([yshift=-1.2em]node14.south) {\tiny{embedding}};
\node [anchor=north] (w1) at ([yshift=-1em]e1.south) {\footnotesize{$<$s$>$}};
\node [anchor=north] (w2) at ([yshift=-1em]e2.south) {\footnotesize{谢谢}};
\node [anchor=north] (w3) at ([yshift=-1em]e3.south) {\footnotesize{大家}};
\node [anchor=north] (w4) at ([yshift=-1em]e4.south) {\footnotesize{聆听}};
\draw [->,thick] ([yshift=0.1em]w1.north)--([yshift=-0.1em]e1.south);
\draw [->,thick] ([yshift=0.1em]w2.north)--([yshift=-0.1em]e2.south);
\draw [->,thick] ([yshift=0.1em]w3.north)--([yshift=-0.1em]e3.south);
\draw [->,thick] ([yshift=0.1em]w4.north)--([yshift=-0.1em]e4.south);
\draw [->,thick] ([yshift=0.1em]e1.north)--([yshift=-0.1em]node11.south);
\draw [->,thick] ([yshift=0.1em]e2.north)--([yshift=-0.1em]node12.south);
\draw [->,thick] ([yshift=0.1em]e3.north)--([yshift=-0.1em]node13.south);
\draw [->,thick] ([yshift=0.1em]e4.north)--([yshift=-0.1em]node14.south);
\node [anchor=south,rnnnode,fill=red!30!white] (node21) at ([yshift=1.0em]node11.north) {\tiny{Softmax($\cdot$)}};
\node [anchor=south,rnnnode,fill=red!30!white] (node22) at ([yshift=1.0em]node12.north) {\tiny{Softmax($\cdot$)}};
\node [anchor=south,rnnnode,fill=red!30!white] (node23) at ([yshift=1.0em]node13.north) {\tiny{Softmax($\cdot$)}};
\node [anchor=south,rnnnode,fill=red!30!white] (node24) at ([yshift=1.0em]node14.north) {\tiny{Softmax($\cdot$)}};
\node [anchor=south] (output1) at ([yshift=1em]node21.north) {\Large{\textbf{谢谢}}};
\node [anchor=south] (output2) at ([yshift=1em]node22.north) {\Large{\textbf{大家}}};
\node [anchor=south] (output3) at ([yshift=1em]node23.north) {\Large{\textbf{聆听}}};
\node [anchor=south] (output4) at ([yshift=1em]node24.north) {\Large{\textbf{$<$/s$>$}}};
\draw [->,thick] ([yshift=0.1em]node21.north)--([yshift=-0.1em]output1.south);
\draw [->,thick] ([yshift=0.1em]node22.north)--([yshift=-0.1em]output2.south);
\draw [->,thick] ([yshift=0.1em]node23.north)--([yshift=-0.1em]output3.south);
\draw [->,thick] ([yshift=0.1em]node24.north)--([yshift=-0.1em]output4.south);
\draw [->,thick] ([yshift=0.1em]node11.north)--([yshift=-0.1em]node21.south);
\draw [->,thick] ([yshift=0.1em]node12.north)--([yshift=-0.1em]node22.south);
\draw [->,thick] ([yshift=0.1em]node13.north)--([yshift=-0.1em]node23.south);
\draw [->,thick] ([yshift=0.1em]node14.north)--([yshift=-0.1em]node24.south);
\draw [->,thick] ([xshift=-1em]node11.west)--([xshift=-0.1em]node11.west);
\draw [->,thick] ([xshift=0.1em]node11.east)--([xshift=-0.1em]node12.west);
\draw [->,thick] ([xshift=0.1em]node12.east)--([xshift=-0.1em]node13.west);
\draw [->,thick] ([xshift=0.1em]node13.east)--([xshift=-0.1em]node14.west);
\draw [->,thick] ([xshift=0.1em]node14.east)--([xshift=1em]node14.east);
\end{tikzpicture}
\end{center}
\end{frame}
\end{CJK}
\end{document}
Markdown 格式
0%
您添加了 0 到此讨论。请谨慎行事。
请先完成此评论的编辑!
注册 或者 后发表评论