Commit a1c93eed by xiaotong

FFN pages

parent a10f27e4
......@@ -120,76 +120,42 @@
%%%------------------------------------------------------------------------------------------------------------
%%% FNNLM implementation
\begin{frame}{前馈神经网络语言模型的实现}
%%% 循环神经网络
\begin{frame}{循环神经网络(Recurrent Neural Networks)}
\begin{itemize}
\item 实现非常简单,几行代码
\begin{itemize}
\item 细节1:做batching时可以把$w[i]$进行扩展,比如放入多个词
\item 细节2:TanH一般会用HardTanH实现,因为TanH容易溢出
\end{itemize}
\end{itemize}
\begin{tcolorbox}
[bicolor,sidebyside,righthand width=3.8cm,size=title,frame engine=empty,
colback=blue!10!white,colbacklower=black!5!white]
{\scriptsize
\begin{tabbing}
\texttt{XTensor w[3], e[3], h0, y;} \\
\texttt{XTensor C, H, d, U;} \\
\texttt{...}\\
\texttt{} \\
\texttt{for(unsigned i = 0; i < 3; i++)\{} \\
\texttt{\ \ \ \ e[i] = MMul(w[i], C);}\\
\texttt{\}}\\
\texttt{e01 = Concatenate(e[0], e[1], -1);}\\
\texttt{e = Concatenate(e01, e[2], -1);}\\
\texttt{} \\
\texttt{h0 = TanH(MMul(e, H) + d);}\\
\texttt{y = Softmax(MMul(h0, U));}\\
\texttt{} \\
\texttt{for(unsigned k = 0; k < size; k++)\{} \\
\texttt{} \ \ \ \ ... // \alert{\texttt{y}}的第$k$元素表示 $\textrm{P}(w|...)$\\
\texttt{} \ \ \ \ ... // $w$为词汇表里第$k$个词\\
\texttt{\}}
\end{tabbing}
}
\tcblower
\item FNN LM固然有效,但是和传统的$n$-gram LM一样,需要依赖\alert{有限上下文}假设
\begin{center}
\begin{tikzpicture}
\begin{scope}
\node [anchor=west] (w0) at (0,0) {\scriptsize{$w_{i-3}$}};
\node [anchor=west] (w1) at ([xshift=0.5em]w0.east) {\scriptsize{$w_{i-2}$}};
\node [anchor=west] (w2) at ([xshift=0.5em]w1.east) {\scriptsize{$w_{i-1}$}};
\node [anchor=north] (index0) at ([yshift=0.5em]w0.south) {\tiny(index)};
\node [anchor=north] (index1) at ([yshift=0.5em]w1.south) {\tiny(index)};
\node [anchor=north] (index2) at ([yshift=0.5em]w2.south) {\tiny(index)};
\node [anchor=south,draw,inner sep=3pt,align=left] (e0) at ([yshift=1.0em]w0.north) {\tiny{$e_0:$}\\\tiny{$w_{i-3} \textbf{C}$}};
\node [anchor=south,draw,inner sep=3pt,align=left] (e1) at ([yshift=1.0em]w1.north) {\tiny{$e_1:$}\\\tiny{$w_{i-2} \textbf{C}$}};
\node [anchor=south,draw,inner sep=3pt,align=left] (e2) at ([yshift=1.0em]w2.north) {\tiny{$e_2:$}\\\tiny{$w_{i-1} \textbf{C}$}};
\node [anchor=south,draw,minimum width=9em,inner sep=3pt] (h0) at ([yshift=1.5em]e1.north) {\tiny{$h_0=\textrm{Tanh}([e_0,e_1,e_2] \textbf{H} + \textbf{d})$}};
\node [anchor=south,draw,minimum width=9em,inner sep=3pt] (h1) at ([yshift=1.5em]h0.north) {\tiny{$y=\textrm{Softmax}(h_0 \textbf{U})$}};
\node [anchor=south] (ylabel) at ([yshift=1em]h1.north) {\scriptsize{$\textrm{P}(w_i|w_{i-3}w_{i-2}w_{i-1})$}};
\draw [->] ([yshift=0.1em]w0.north) -- ([yshift=-0.1em]e0.south);
\draw [->] ([yshift=0.1em]w1.north) -- ([yshift=-0.1em]e1.south);
\draw [->] ([yshift=0.1em]w2.north) -- ([yshift=-0.1em]e2.south);
\draw [->] ([yshift=0.1em]e0.north) -- ([xshift=-2em,yshift=-0.1em]h0.south);
\draw [->] ([yshift=0.1em]e1.north) -- ([yshift=-0.1em]h0.south);
\draw [->] ([yshift=0.1em]e2.north) -- ([xshift=2em,yshift=-0.1em]h0.south);
\draw [->] ([yshift=0.1em]h0.north) -- ([yshift=-0.1em]h1.south);
\draw [->] ([yshift=0.1em]h1.north) -- ([yshift=-0.1em]ylabel.south);
\node [anchor=west] (w0) at (0,0) {$w_1$};
\node [anchor=west] (w1) at ([xshift=0.5em]w0.east) {$w_2$};
\node [anchor=west] (w2) at ([xshift=0.5em]w1.east) {$...$};
\node [anchor=west] (w3) at ([xshift=0.5em]w2.east) {$w_{m-n+1}$};
\node [anchor=west] (w4) at ([xshift=0.5em]w3.east) {$...$};
\node [anchor=west,fill=green!20!white] (w5) at ([xshift=0.5em]w4.east) {$w_{m}$};
\draw [->,thick,ublue] (w5.south).. controls +(210:0.5) and +(-30:0.5) .. (w3.south);
\draw [->,thick,red] (w5.north).. controls +(150:1) and +(30:1) .. (w1.north);
\draw [->,very thick,ublue] ([xshift=-5em,yshift=1em]w0.west) -- ([xshift=-6.5em,yshift=1em]w0.west) node [pos=0,right] {\scriptsize{依赖}};
\draw [->,very thick,red] ([xshift=-5em,yshift=-0.5em]w0.west) -- ([xshift=-6.5em,yshift=-0.5em]w0.west) node [pos=0,right] {\scriptsize{不依赖}};
\end{scope}
\end{tikzpicture}
\end{center}
\end{tcolorbox}
\vspace{-0.5em}
\footnotesize{注: size表示词汇表大小}
\item<2-> 能否直接对原始问题建模,即定义函数$g$,对于任意的$w_{1} ... w_{m}$
\vspace{-0.5em}
\begin{displaymath}
g(w_{1} ... w_{m}) \approx \textrm{P}(w_m | w_{1} ... w_{m-1})
\end{displaymath}
\item<3-> \textbf{循环神经网络(RNNs)}可以很好的解决上述问题,因此也被成功的应用于语言建模任务
\begin{itemize}
\item 它假设每个词的生成都依赖已经生成的所有词
\item 对于不同位置的词的生成概率都可以用同一个函数描述
\end{itemize}
\textbf{Recurrent Neural Network Based Language Model}\\
\textbf{Mikolov et al., 2010, In Proc. of Interspeech, 1045-1048}
\end{itemize}
\end{frame}
......
......@@ -3847,14 +3847,14 @@ NLP问题的\alert{隐含结构}假设 & 无隐含结构假设,\alert{端到
\item 随着$n$的增大,\alert{数据稀疏}问题会非常严重,因为绝大多数$n$-gram是没见过的
\item 因为要维护$n$-gram的索引,存储消耗大
\end{itemize}
\item<2-> 另一种思路是直接对$\textrm{P}(w_m | w_{m-n+1} ... w_{m-1})$进行连续空间建模,即定义函数$g$使得对于任意的$w_{m-n+1} ... w_{m}$
\item<2-> 另一种思路是直接对$\textrm{P}(w_m | w_{m-n+1} ... w_{m-1})$进行连续空间建模,即定义函数$g$,对于任意的$w_{m-n+1} ... w_{m}$
\begin{displaymath}
g(w_{m-n+1} ... w_{m}) \approx \textrm{P}(w_m | w_{m-n+1} ... w_{m-1})
\end{displaymath}
\item<3-> 最具代表性的方法是基于前馈神经网络的语言模型
\item<3-> 最具代表性的方法是前馈神经网络(FFN)语言模型
\begin{itemize}
\item 经典中的经典,对现代神经语言模型的设计产生深远影响
\end{itemize}
......@@ -3972,7 +3972,7 @@ NLP问题的\alert{隐含结构}假设 & 无隐含结构假设,\alert{端到
%%%------------------------------------------------------------------------------------------------------------
%%% FNNLM implementation
\begin{frame}{前馈神经网络语言模型的实现}
\begin{frame}{前馈神经网络语言模型(FFN LM)的实现}
\begin{itemize}
\item 实现非常简单,几行代码
......@@ -3992,9 +3992,8 @@ NLP问题的\alert{隐含结构}假设 & 无隐含结构假设,\alert{端到
\texttt{...}\\
\texttt{} \\
\texttt{for(unsigned i = 0; i < 3; i++)\{} \\
\texttt{for(unsigned i = 0; i < 3; i++)} \\
\texttt{\ \ \ \ e[i] = MMul(w[i], C);}\\
\texttt{\}}\\
\texttt{e01 = Concatenate(e[0], e[1], -1);}\\
\texttt{e = Concatenate(e01, e[2], -1);}\\
......@@ -4045,6 +4044,73 @@ NLP问题的\alert{隐含结构}假设 & 无隐含结构假设,\alert{端到
\end{frame}
%%%------------------------------------------------------------------------------------------------------------
%%% 神经语言模型给我们带来了什么
\begin{frame}{神经语言建模的意义}
\begin{itemize}
\item Bengio el al. (2003)中有待讨论的问题
\begin{enumerate}
\item 神经网络每一层究竟学到了什么 \\
词汇、句法?还是其它一些知识?如何解释?
\item 网络的层数变多会怎样 - 10层、20层、100层的网络 \\
\# of layers: 10 $\to$ 20 $\to$ 100 $\to$ 1000
\item 超参(比如隐藏层大小)如何选择 - 不同任务的最优设置\\
单词的分布式表示维度多大好?\\
隐层多大好?\\
激活函数如何选择?\\
...
\end{enumerate}
\item<2-> 从FFN LM得到的启发
\begin{itemize}
\item 重新定义词是什么 - 非词典里的一项,而是一个实数向量
\item 多层神经网络可以很好的表示单词之间的(短距离)依赖
\item $n$-gram的生成概率可以使用连续空间函数描述,缓解数据稀疏问题,模型并不需要记录完整的$n$-gram
\end{itemize}
\end{itemize}
\end{frame}
%%%------------------------------------------------------------------------------------------------------------
%%% 循环神经网络
\begin{frame}{循环神经网络(Recurrent Neural Networks)}
\begin{itemize}
\item FNN LM固然有效,但是和传统的$n$-gram LM一样,需要依赖\alert{有限上下文}假设
\begin{center}
\begin{tikzpicture}
\begin{scope}
\node [anchor=west] (w0) at (0,0) {$w_1$};
\node [anchor=west] (w1) at ([xshift=0.5em]w0.east) {$w_2$};
\node [anchor=west] (w2) at ([xshift=0.5em]w1.east) {$...$};
\node [anchor=west] (w3) at ([xshift=0.5em]w2.east) {$w_{m-n+1}$};
\node [anchor=west] (w4) at ([xshift=0.5em]w3.east) {$...$};
\node [anchor=west,fill=green!20!white] (w5) at ([xshift=0.5em]w4.east) {$w_{m}$};
\draw [->,thick,ublue] (w5.south).. controls +(210:0.5) and +(-30:0.5) .. (w3.south);
\draw [->,thick,red] (w5.north).. controls +(150:1) and +(30:1) .. (w1.north);
\draw [->,very thick,ublue] ([xshift=-5em,yshift=1em]w0.west) -- ([xshift=-6.5em,yshift=1em]w0.west) node [pos=0,right] {\scriptsize{依赖}};
\draw [->,very thick,red] ([xshift=-5em,yshift=-0.5em]w0.west) -- ([xshift=-6.5em,yshift=-0.5em]w0.west) node [pos=0,right] {\scriptsize{不依赖}};
\end{scope}
\end{tikzpicture}
\end{center}
\item<2-> 能否直接对原始问题建模,即定义函数$g$,对于任意的$w_{1} ... w_{m}$
\vspace{-0.5em}
\begin{displaymath}
g(w_{1} ... w_{m}) \approx \textrm{P}(w_m | w_{1} ... w_{m-1})
\end{displaymath}
\item<3-> \textbf{循环神经网络(RNNs)}可以很好的解决上述问题,因此也被成功的应用于语言建模任务
\begin{itemize}
\item 它假设每个词的生成都依赖已经生成的所有词
\item 对于不同位置的词的生成概率都可以用同一个函数描述
\end{itemize}
\textbf{Recurrent Neural Network Based Language Model}\\
\textbf{Mikolov et al., 2010, In Proc. of Interspeech, 1045-1048}
\end{itemize}
\end{frame}
%%%------------------------------------------------------------------------------------------------------------
\subsection{词嵌入}
%%%------------------------------------------------------------------------------------------------------------
......
Markdown 格式
0%
您添加了 0 到此讨论。请谨慎行事。
请先完成此评论的编辑!
注册 或者 后发表评论