Commit b8e95e14 by xiaotong

new pages

parent e4ecbafb
......@@ -24,6 +24,7 @@
\usepackage{tcolorbox}
\tcbuselibrary{skins}
\usetikzlibrary{calc,intersections}
\usetikzlibrary{matrix}
\usetikzlibrary{arrows,decorations.pathreplacing}
\usetikzlibrary{shadows} % LATEX and plain TEX when using Tik Z
......@@ -120,24 +121,56 @@
\subsection{词嵌入}
%%%------------------------------------------------------------------------------------------------------------
%%% 预训练
\begin{frame}{预训练}
%%% 预训练带来的新思路
\begin{frame}{预训练带来的新思路}
\begin{itemize}
\item 语言模型可以使用大量无标注数据进行训练,得到的模型可以被直接用于下游系统,以序列到序列任务为例
\item 预训练模型刷榜各种任务的同时,引发了一些思考:\\
预训练究竟给我们带来了什么?
\begin{itemize}
\item 有标注数据量有限,预训练提供使用超大规模数据的方法
\item 从大规模无标注数据中学习通用知识,提升泛化能力
\item 神经网络复杂且不容易训练,预训练可以使模型关注优质解的高密度区域
\end{itemize}
\end{itemize}
\visible<2->{
\begin{center}
\begin{tikzpicture}
\node [anchor=south,minimum width=17em,fill=red!20!white] (encoder) at (0,0) {Encoder (语言模型预训练)};
\node [anchor=south,minimum width=17em,fill=blue!20!white] (decoder) at (encoder.north) {Decoder (带目标任务标注正常训练)};
\draw[name path=ellipse,thick] (0,0) circle[x radius = 2, y radius = 1];
\node[rectangle,minimum size=0.1em,inner sep=2pt,fill=red] (p1) at (0.2,0.5) {};
\node[rectangle,minimum size=0.1em,inner sep=2pt,fill=red] (p2) at (0.3,0.6) {};
\node[rectangle,minimum size=0.1em,inner sep=2pt,fill=red] (p3) at (0.1,-0.1) {};
\node[rectangle,minimum size=0.1em,inner sep=2pt,fill=red] (p4) at (0.4,0) {};
\node[rectangle,minimum size=0.1em,inner sep=2pt,fill=red] (p5) at (0.5,0.3) {};
\node[rectangle,minimum size=0.1em,inner sep=2pt,fill=red] (p6) at (0.6,0.1) {};
\node[rectangle,minimum size=0.1em,inner sep=2pt,fill=red] (p7) at (0.7,-0.1) {};
\node[rectangle,minimum size=0.1em,inner sep=2pt,fill=red] (p8) at (-1.2,0.4) {};
\node[rectangle,minimum size=0.1em,inner sep=2pt,fill=red] (p9) at (-1.0,-0.3) {};
\node[rectangle,minimum size=0.1em,inner sep=2pt,fill=red] (p10) at (-0.1,-0.8) {};
\begin{pgfonlayer}{background}
\visible<4->{
\node [rectangle,inner sep=0.4em,draw,blue] [fit = (p1) (p2) (p3) (p4) (p5) (p6)] (area) {};
}
\end{pgfonlayer}
\draw [->] (2.5,-0.7) -- (1.8,-0.5) node [pos=0,right] {\scriptsize{模型参数解空间}};
\visible<4->{
\draw [->] (2.0,0.7) -- (area.20) node [pos=0,right] {\scriptsize{优质解高密度区域(预训练)}};
}
\visible<3->{
\draw [->] (-2.0,0.7) -- (p8.west) node [pos=0,left] {\scriptsize{游离的解}};
}
\end{tikzpicture}
\end{center}
}
\item 衍生出了非常火爆的\alert{新范式},大规模语言模型pre-training + 目标任务fine-tuning
\begin{itemize}
\item 许多NLP任务都可以被描述为语言建模,在外部训练得到的语言模型作为模块放入目标系统中(参数初始化)
\end{itemize}
\begin{itemize}
\item 机器翻译中的预训练
\end{itemize}
\end{frame}
%%%------------------------------------------------------------------------------------------------------------
......
......@@ -2873,7 +2873,7 @@ $\textbf{w}^*$表示在训练集上使得损失的平均值达到最小的参数
\item<2-> 核心问题:\textbf{求解$\argmin$,即找到代价函数最小值点}
\begin{itemize}
\item 这是非常常见的问题,回一下第三章的IBM模型,当时使用的是EM算法
\item 这是非常常见的问题,回一下第三章的IBM模型,当时使用的是EM算法
\item 但是这里并不是一个生成模型
\item 需要一种更加通用的求解方法
\end{itemize}
......@@ -5216,6 +5216,68 @@ GPT-2 (Transformer) & Radford et al. & 2019 & 35.7
\end{frame}
%%%------------------------------------------------------------------------------------------------------------
%%% 预训练
\begin{frame}{预训练}
\begin{itemize}
\item 语言模型可以使用大量无标注数据进行训练,得到的模型可以被直接用于下游系统,以序列到序列任务为例
\begin{center}
\begin{tikzpicture}
\node [anchor=south,minimum width=17em,fill=red!20!white] (encoder) at (0,0) {Encoder (语言模型预先训练)};
\node [anchor=south,minimum width=17em,fill=blue!20!white] (decoder) at (encoder.north) {Decoder (目标任务正常训练)};
\end{tikzpicture}
\end{center}
\item<2-> 衍生出了非常火爆的\alert{范式}:大规模语言模型pre-training + 目标任务fine-tuning
\begin{itemize}
\item 许多NLP任务都可以被描述为语言建模,在外部训练得到的语言模型作为模块放入目标系统中(参数初始化)
\end{itemize}
\end{itemize}
\visible<2->{
\begin{center}
\begin{tikzpicture}
\begin{scope}
\node [anchor=west,draw,thick,minimum width=4em,minimum height=1.7em,fill=blue!20] (encoder) at (0,0) {模块};
\node [anchor=south,minimum width=4em,minimum height=1.7em] (space) at ([yshift=0.3em]encoder.north) {\footnotesize{目标系统}};
\begin{pgfonlayer}{background}
\node [rectangle,draw,thick,fill=red!20] [fit = (encoder) (space)] (system) {};
\end{pgfonlayer}
\node [anchor=north] (data) at ([yshift=-1em]system.south) {\scriptsize{\textbf{目标任务有标注数据}}};
\draw [->,thick] (data.north) -- ([yshift=-0.1em]system.south);
\node [anchor=north] (label) at ([yshift=-0em]data.south) {\scriptsize{(a) standard method}};
\end{scope}
\begin{scope}[xshift=2.8in]
\node [anchor=west,draw,dashed,thick,minimum width=4em,minimum height=1.7em,fill=blue!20] (encoder) at (0,0) {模块};
\node [anchor=south,minimum width=4em,minimum height=1.7em] (space) at ([yshift=0.3em]encoder.north) {\footnotesize{目标系统}};
\node [anchor=center,draw,thick,minimum width=4em,minimum height=1.7em,fill=green!20] (encoderpre) at ([xshift=-7em]encoder.center) {\footnotesize{语言模型}};
\draw [->,thick] (encoderpre.east) -- (encoder.west);
\begin{pgfonlayer}{background}
\node [rectangle,draw,thick,fill=red!20] [fit = (encoder) (space)] (system) {};
\end{pgfonlayer}
\node [anchor=north] (data) at ([yshift=-1em]system.south) {\scriptsize{\textbf{目标任务有标注数据}}};
\draw [->,thick] (data.north) -- ([yshift=-0.1em]system.south);
\node [anchor=north] (data2) at ([yshift=-1em,xshift=-7em]system.south) {\scriptsize{\textbf{大规模无标注数据}}};
\draw [->,thick] (data2.north) -- ([yshift=-0.1em]encoderpre.south);
\node [anchor=north] (label) at ([yshift=-0em,xshift=-4em]data.south) {\scriptsize{(b) pre-training + fine-tuning}};
\end{scope}
\end{tikzpicture}
\end{center}
}
\end{frame}
\end{CJK}
\end{document}
Markdown 格式
0%
您添加了 0 到此讨论。请谨慎行事。
请先完成此评论的编辑!
注册 或者 后发表评论