Commit b14247bd by zengxin

10 11

parent 866b08ab
......@@ -12,7 +12,7 @@
% RNN Encoder
\coordinate (eemb0) at (0,0);
\foreach \x [count=\y from 0] in {1,2,...,3}
\node[rnnnode,minimum height=0.5\base,fill=green!30!white,anchor=west] (eemb\x) at ([xshift=0.4\base]eemb\y.east) {\tiny{$e_x()$}};
\node[rnnnode,minimum height=0.5\base,fill=green!30!white,anchor=west] (eemb\x) at ([xshift=0.4\base]eemb\y.east) {\tiny{$\textrm{e}_x()$}};
\foreach \x in {1,2,...,3}
\node[rnnnode,fill=blue!30!white,anchor=south] (enc\x) at ([yshift=0.3\base]eemb\x.north) {};
\node[] (enclabel1) at (enc1) {\tiny{$\mathbi{h}_{m-2}$}};
......@@ -27,7 +27,7 @@
% RNN Decoder
\foreach \x in {1,2,...,3}
\node[rnnnode,minimum height=0.5\base,fill=green!30!white,anchor=south] (demb\x) at ([yshift=\base]enc\x.north) {\tiny{$e_y()$}};
\node[rnnnode,minimum height=0.5\base,fill=green!30!white,anchor=south] (demb\x) at ([yshift=\base]enc\x.north) {\tiny{$\textrm{e}_y()$}};
\foreach \x in {1,2,...,3}
\node[rnnnode,fill=blue!30!white,anchor=south] (dec\x) at ([yshift=0.3\base]demb\x.north) {{\tiny{$\mathbi{s}_\x$}}};
\foreach \x in {1,2,...,3}
......@@ -80,10 +80,10 @@
}
{
\node [anchor=north west] (line11) at ([yshift=-1.8em]line4.west) {\scriptsize{每个词的one-hot}};
\node [anchor=north west] (line11) at ([yshift=-1.8em]line4.west) {\scriptsize{每个词的One-hot}};
\node [anchor=north west] (line12) at ([yshift=0.3em]line11.south west) {\scriptsize{离散化表示都被转化为\ \ \ \ }};
\node [anchor=north west] (line13) at ([yshift=0.3em]line12.south west) {\scriptsize{实数向量,即词嵌入}};
\node [anchor=north west] (line14) at ([yshift=0.3em]line13.south west) {\scriptsize{($e_x()$$e_y()$函数)}};
\node [anchor=north west] (line14) at ([yshift=0.3em]line13.south west) {\scriptsize{($\textrm{e}_x()$$\textrm{e}_y()$函数)}};
}
{
......
......@@ -43,7 +43,7 @@
\node[rnnnode,minimum height=0.5\base,fill=red!30!white,anchor=south] (softmax\x) at ([yshift=0.5\base]dec\x.north) {};
% Decoder input words
\node[wordnode,below=0pt of demb1] (decwordin) {$\langle$eos$\rangle$};
\node[wordnode,below=0pt of demb1] (decwordin) {$\langle$sos$\rangle$};
\ExtractX{$(demb2.south)$}
\ExtractY{$(decwordin.base)$}
\node[wordnode,anchor=base] () at (\XCoord,\YCoord) {Do};
......
......@@ -83,10 +83,10 @@
\end{scope}
\node[] (tanh) at (aux46){};
\node[] (Tanh) at (aux46){};
\begin{pgfonlayer}{background}
\node[draw,very thick,rectangle,fill=blue!10!white,rounded corners=5pt,inner sep=6pt,fit=(aux22) (aux76) (z76) (tanh)] (GRU) {};
\node[draw,very thick,rectangle,fill=blue!10!white,rounded corners=5pt,inner sep=6pt,fit=(aux22) (aux76) (z76) (Tanh)] (GRU) {};
\end{pgfonlayer}
......
......@@ -96,10 +96,10 @@
\end{scope}
\node[] (tanh) at (aux46){};
\node[] (Tanh) at (aux46){};
\begin{pgfonlayer}{background}
\node[draw,very thick,rectangle,fill=blue!10!white,rounded corners=5pt,inner sep=6pt,fit=(aux22) (aux76) (z76) (tanh)] (GRU) {};
\node[draw,very thick,rectangle,fill=blue!10!white,rounded corners=5pt,inner sep=6pt,fit=(aux22) (aux76) (z76) (Tanh)] (GRU) {};
\end{pgfonlayer}
......
......@@ -99,7 +99,7 @@
\draw[-latex,emph] (aux71) -| (aux87);
\draw[-latex,emph] (aux71) -| (aux53) -- (aux23) -| (aux46) -- (z76);
\draw[emph] (aux12) |- (aux23) -| (aux46);
\node[opnode,rectangle,rounded corners=2pt,inner sep=2pt,draw=red,thick] (tanh) at (aux46) {$\mathrm{tanh}$};
\node[opnode,rectangle,rounded corners=2pt,inner sep=2pt,draw=red,thick] (Tanh) at (aux46) {$\mathrm{Tanh}$};
\node[opnode,circle,draw=red,thick] (a1) at (aux53) {};
\node[opnode,circle,draw=red,thick] (a2) at (aux56) {};
\node[opnode,circle,draw=red,thick] (a3) at (aux75) {};
......@@ -118,7 +118,7 @@
\end{scope}
\begin{pgfonlayer}{background}
\node[draw,very thick,rectangle,fill=blue!10!white,rounded corners=5pt,inner sep=6pt,fit=(aux22) (aux76) (z76) (tanh)] (GRU) {};
\node[draw,very thick,rectangle,fill=blue!10!white,rounded corners=5pt,inner sep=6pt,fit=(aux22) (aux76) (z76) (Tanh)] (GRU) {};
\end{pgfonlayer}
%%new
......
......@@ -93,7 +93,7 @@
\draw[-latex,emph] (aux21) -- (aux25) -- (u55);
\draw[emph] (aux12) -- (aux22) -- (aux23);
\node[opnode,circle,draw=red,thick] () at (aux34) {$\sigma$};
\node[opnode,rectangle,rounded corners=2pt,inner sep=2pt,draw=red,thick] () at (aux35) {$\mathrm{tanh}$};
\node[opnode,rectangle,rounded corners=2pt,inner sep=2pt,draw=red,thick] () at (aux35) {$\mathrm{Tanh}$};
\node[opnode,circle,draw=red,thick] (i45) at (aux45) {};
}
\end{scope}
......
......@@ -92,7 +92,7 @@
\draw[-latex,emph] (aux21) -- (aux25) -- (u55);
\draw[emph] (aux12) -- (aux22) -- (aux23);
\node[opnode,circle,draw=red,thick] () at (aux34) {$\sigma$};
\node[opnode,rectangle,rounded corners=2pt,inner sep=2pt,draw=red,thick] () at (aux35) {$\mathrm{tanh}$};
\node[opnode,rectangle,rounded corners=2pt,inner sep=2pt,draw=red,thick] () at (aux35) {$\mathrm{Tanh}$};
\node[opnode,circle,draw=red,thick] (i45) at (aux45) {X};
}
{
......@@ -100,7 +100,7 @@
\draw[-latex,standard] (aux21) -- (aux24) |- (i45);
\draw[-latex,standard] (aux21) -- (aux25) -- (u55);
\node[opnode,circle] () at (aux34) {$\sigma$};
\node[opnode,rectangle,rounded corners=2pt,inner sep=2pt] () at (aux35) {$\mathrm{tanh}$};
\node[opnode,rectangle,rounded corners=2pt,inner sep=2pt] () at (aux35) {$\mathrm{Tanh}$};
\node[opnode,circle] (i45) at (aux45) {};
}
% cell update
......
......@@ -93,7 +93,7 @@
\draw[-latex,emph] (aux21) -- (aux25) -- (u55);
\draw[emph] (aux12) -- (aux22) -- (aux23);
\node[opnode,circle,draw=red,thick] () at (aux34) {$\sigma$};
\node[opnode,rectangle,rounded corners=2pt,inner sep=2pt,draw=red,thick] () at (aux35) {$\mathrm{tanh}$};
\node[opnode,rectangle,rounded corners=2pt,inner sep=2pt,draw=red,thick] () at (aux35) {$\mathrm{Tanh}$};
\node[opnode,circle,draw=red,thick] (i45) at (aux45) {X};
}
{
......@@ -101,7 +101,7 @@
\draw[-latex,standard] (aux21) -- (aux24) |- (i45);
\draw[-latex,standard] (aux21) -- (aux25) -- (u55);
\node[opnode,circle] () at (aux34) {$\sigma$};
\node[opnode,rectangle,rounded corners=2pt,inner sep=2pt] () at (aux35) {$\mathrm{tanh}$};
\node[opnode,rectangle,rounded corners=2pt,inner sep=2pt] () at (aux35) {$\mathrm{Tanh}$};
\node[opnode,circle] (i45) at (aux45) {};
}
% cell update
......@@ -122,7 +122,7 @@
\draw[-latex,emph] (aux21) -- (o27);
\draw[emph] (aux12) -- (aux22) -- (aux23);
\node[opnode,circle,draw=red,thick] () at (aux26) {$\sigma$};
\node[opnode,rectangle,rounded corners=2pt,inner sep=2pt,draw=red,thick] () at (aux37) {$\mathrm{tanh}$};
\node[opnode,rectangle,rounded corners=2pt,inner sep=2pt,draw=red,thick] () at (aux37) {$\mathrm{Tanh}$};
\draw[-latex,emph] (o27) -- (aux29);
\draw[-latex,emph] (o27) -| (aux68);
......
......@@ -94,7 +94,7 @@
\draw[-latex,emph] (aux21) -- (aux25) -- (u55);
\draw[emph] (aux12) -- (aux22) -- (aux23);
\node[opnode,circle,draw=red,thick] () at (aux34) {$\sigma$};
\node[opnode,rectangle,rounded corners=2pt,inner sep=2pt,draw=red,thick] () at (aux35) {$\mathrm{tanh}$};
\node[opnode,rectangle,rounded corners=2pt,inner sep=2pt,draw=red,thick] () at (aux35) {$\mathrm{Tanh}$};
\node[opnode,circle,draw=red,thick] (i45) at (aux45) {};
}
{
......@@ -102,7 +102,7 @@
\draw[-latex,standard] (aux21) -- (aux24) |- (i45);
\draw[-latex,standard] (aux21) -- (aux25) -- (u55);
\node[opnode,circle] () at (aux34) {$\sigma$};
\node[opnode,rectangle,rounded corners=2pt,inner sep=2pt] () at (aux35) {$\mathrm{tanh}$};
\node[opnode,rectangle,rounded corners=2pt,inner sep=2pt] () at (aux35) {$\mathrm{Tanh}$};
\node[opnode,circle] (i45) at (aux45) {};
}
% cell update
......@@ -123,7 +123,7 @@
\draw[-latex,emph] (aux21) -- (o27);
\draw[emph] (aux12) -- (aux22) -- (aux23);
\node[opnode,circle,draw=red,thick] () at (aux26) {$\sigma$};
\node[opnode,rectangle,rounded corners=2pt,inner sep=2pt,draw=red,thick] () at (aux37) {$\mathrm{tanh}$};
\node[opnode,rectangle,rounded corners=2pt,inner sep=2pt,draw=red,thick] () at (aux37) {$\mathrm{Tanh}$};
\draw[-latex,emph] (o27) -- (aux29);
\draw[-latex,emph] (o27) -| (aux68);
......@@ -133,7 +133,7 @@
\draw[-latex,standard] (u55) -| (o27);
\draw[-latex,standard] (aux21) -- (o27);
\node[opnode,circle] () at (aux26) {$\sigma$};
\node[opnode,rectangle,rounded corners=2pt,inner sep=2pt] () at (aux37) {$\mathrm{tanh}$};
\node[opnode,rectangle,rounded corners=2pt,inner sep=2pt] () at (aux37) {$\mathrm{Tanh}$};
\draw[-latex,standard] (o27) -- (aux29);
\draw[-latex,standard] (o27) -| (aux68);
......@@ -174,7 +174,7 @@
}
{
% input gate formula
\node[formulanode,anchor=north east,text width=10em] () at ([shift={(4\base,-1.5\base)}]aux21) {输入门\\$\mathbi{i}_t=\sigma(\mathbi{W}_i[\mathbi{h}_{t-1},\mathbi{x}_t]+\mathbi{b}_i)$\\$\hat{\mathbi{c}}_t=\mathrm{tanh}(\mathbi{W}_c[\mathbi{h}_{t-1},\mathbi{x}_t]+\mathbi{b}_c)$};
\node[formulanode,anchor=north east,text width=10em] () at ([shift={(4\base,-1.5\base)}]aux21) {输入门\\$\mathbi{i}_t=\sigma(\mathbi{W}_i[\mathbi{h}_{t-1},\mathbi{x}_t]+\mathbi{b}_i)$\\$\hat{\mathbi{c}}_t=\mathrm{Tanh}(\mathbi{W}_c[\mathbi{h}_{t-1},\mathbi{x}_t]+\mathbi{b}_c)$};
}
{
% cell update formula
......@@ -182,7 +182,7 @@
}
{
% output gate formula
\node[formulanode,anchor=north west,text width=10em] () at ([shift={(-4\base,-1.5\base)}]aux29) {输出门\\$\mathbi{o}_t=\sigma(\mathbi{W}_o[\mathbi{h}_{t-1},\mathbi{x}_t]+\mathbi{b}_o)$\\$\mathbi{h}_{t}=\mathbi{o}_t\cdot \mathrm{tanh}(\mathbi{c}_{t})$};
\node[formulanode,anchor=north west,text width=10em] () at ([shift={(-4\base,-1.5\base)}]aux29) {输出门\\$\mathbi{o}_t=\sigma(\mathbi{W}_o[\mathbi{h}_{t-1},\mathbi{x}_t]+\mathbi{b}_o)$\\$\mathbi{h}_{t}=\mathbi{o}_t\cdot \mathrm{Tanh}(\mathbi{c}_{t})$};
}
\end{scope}
\end{tikzpicture}
......
......@@ -43,7 +43,7 @@
\node [draw=ugreen!30,rectangle,inner ysep=5pt,inner xsep=1.8em,rounded corners=4pt,line width=2pt,fill=ugreen!10] [fit = (tgt_1) (i_0)(tgt_2)(i_5) ] (group1_2) {};
\end{pgfonlayer}
\node[anchor=east, word] (l_0) at ([xshift=-2em,yshift=-0.5em]i_0.west){\sffamily\bfnew{词嵌入}};
\node[anchor=east, word] (l_0) at ([xshift=-2em,yshift=-0.5em]i_0.west){\sffamily\bfnew{目标语词嵌入}};
\node[anchor=south, word] (l_1) at ([yshift=2em]l_0.north){\sffamily\bfnew{卷积}};
\node[anchor=south, word] (l_2) at ([yshift=2.4em]l_1.north){\sffamily\bfnew{门控}};
\node[anchor=south, word] (l_3) at ([yshift=0.06em]l_2.north){\sffamily\bfnew{线性单元}};
......@@ -144,7 +144,7 @@
\node[anchor=south,word] (src_2) at ([xshift=2em,yshift=0.4em]r_2.north){$<$p$>$};
\node[anchor=east, word] (t_1) at ([xshift=-4em,yshift=0.5em]r_0.west){\sffamily\bfnew{词嵌入}};
\node[anchor=east, word] (t_1) at ([xshift=-4em,yshift=0.5em]r_0.west){\sffamily\bfnew{源语词嵌入}};
\node[anchor=north, word] (t_2) at ([yshift=-2em]t_1.south){\sffamily\bfnew{卷积}};
\node[anchor=north, word] (t_3) at ([yshift=-2.8em]t_2.south){\sffamily\bfnew{门控}};
\node[anchor=north, word] (t_4) at ([yshift=-0.06em]t_3.south){\sffamily\bfnew{线性单元}};
......
......@@ -39,7 +39,7 @@
\node[anchor=north,word] at ([yshift=-0.4em]i_4.south){to};
\node[anchor=north,word] at ([yshift=-0.4em]i_5.south){school};
\node[anchor=east, word] (l_0) at ([xshift=-2em,yshift=-0.5em]i_0.west){\sffamily\bfnew{词嵌入}};
\node[anchor=east, word] (l_0) at ([xshift=-2em,yshift=-0.5em]i_0.west){\sffamily\bfnew{目标语词嵌入}};
\node[anchor=south, word] (l_1) at ([yshift=2em]l_0.north){\sffamily\bfnew{卷积}};
\node[anchor=south, word] (l_2) at ([yshift=2.4em]l_1.north){\sffamily\bfnew{门控}};
\node[anchor=south, word] (l_3) at ([yshift=0.06em]l_2.north){\sffamily\bfnew{线性单元}};
......@@ -139,7 +139,7 @@
\node[anchor=south,word] (src_2) at ([xshift=2em,yshift=0.4em]r_2.north){$<$p$>$};
\node[anchor=east, word] (t_1) at ([xshift=-4em,yshift=0.5em]r_0.west){\sffamily\bfnew{词嵌入}};
\node[anchor=east, word] (t_1) at ([xshift=-4em,yshift=0.5em]r_0.west){\sffamily\bfnew{源语词嵌入}};
\node[anchor=north, word] (t_2) at ([yshift=-2em]t_1.south){\sffamily\bfnew{卷积}};
\node[anchor=north, word] (t_3) at ([yshift=-2.8em]t_2.south){\sffamily\bfnew{门控}};
\node[anchor=north, word] (t_4) at ([yshift=-0.06em]t_3.south){\sffamily\bfnew{线性单元}};
......
......@@ -39,7 +39,7 @@
\node[anchor=north,word] at ([yshift=-0.4em]i_4.south){to};
\node[anchor=north,word] at ([yshift=-0.4em]i_5.south){school};
\node[anchor=east, word] (l_0) at ([xshift=-2em,yshift=-0.5em]i_0.west){\sffamily\bfnew{词嵌入}};
\node[anchor=east, word] (l_0) at ([xshift=-2em,yshift=-0.5em]i_0.west){\sffamily\bfnew{目标语词嵌入}};
\node[anchor=south, word] (l_1) at ([yshift=2em]l_0.north){\sffamily\bfnew{卷积}};
\node[anchor=south, word] (l_2) at ([yshift=2.4em]l_1.north){\sffamily\bfnew{门控}};
\node[anchor=south, word] (l_3) at ([yshift=0.06em]l_2.north){\sffamily\bfnew{线性单元}};
......@@ -140,7 +140,7 @@
\node[anchor=south,word] (src_2) at ([xshift=2em,yshift=0.4em]r_2.north){$<$p$>$};
\node[anchor=east, word] (t_1) at ([xshift=-4em,yshift=0.5em]r_0.west){\sffamily\bfnew{词嵌入}};
\node[anchor=east, word] (t_1) at ([xshift=-4em,yshift=0.5em]r_0.west){\sffamily\bfnew{源语词嵌入}};
\node[anchor=north, word] (t_2) at ([yshift=-2em]t_1.south){\sffamily\bfnew{卷积}};
\node[anchor=north, word] (t_3) at ([yshift=-2.8em]t_2.south){\sffamily\bfnew{门控}};
\node[anchor=north, word] (t_4) at ([yshift=-0.06em]t_3.south){\sffamily\bfnew{线性单元}};
......
......@@ -104,7 +104,7 @@
\label{eq:11-2-new}
\end{eqnarray}
\parinterval 在图像处理中,可以通过设计特定的卷积核来进行特征提取,比如图像边缘信息(图\ref{fig:11-5})。而在卷积神经网络中,只需要指定卷积层中卷积核的数量及大小,卷积核参数通过梯度下降等手段学习得到。这样可以让模型自己学习需要提取哪些特征
\parinterval 卷积计算的作用是提取特征,用不同的卷积核计算可以获取不同的特征,比如图\ref{fig:11-5},通过设计的特定卷积核就可以获取图像边缘信息。在卷积神经网络中,不需要手动设计卷积核,只需要指定卷积层中卷积核的数量及大小,模型就可以自己学习卷积核具体的参数
%----------------------------------------------
% 图4.
......@@ -203,7 +203,7 @@
\parinterval 针对不定长序列,一种可行的方法是使用之前介绍过的循环神经网络,其本质也是基于权重共享的想法,在不同的时间步复用相同的循环神经网络单元进行处理。但是,循环神经网络最大的弊端在于每一时刻的计算都依赖于上一时刻的结果,因此只能对序列进行串行处理,无法充分利用硬件设备进行并行计算,导致效率相对较低。此外,在处理较长的序列时,这种串行的方式很难捕捉长距离的依赖关系。相比之下,卷积神经网络采用共享参数的方式处理固定大小窗口内的信息,且不同位置的卷积操作之间没有相互依赖,因此可以对序列进行高效地并行处理。同时,针对序列中距离较长的依赖关系,可以通过堆叠多层卷积层来扩大{\small\bfnew{感受野}}\index{感受野} (Receptive Field)\index{Receptive Field} ,这里感受野指能够影响神经元输出的原始输入数据区域的大小。图\ref{fig:11-9}对比了这两种结构,可以看出,为了捕捉$\mathbi{e}_2$$\mathbi{e}_8$ 之间的联系,串行结构需要顺序的6次操作,和序列长度相关。而该卷积神经网络中,卷积操作每次对三个词进行计算,仅需要4层卷积计算就能得到$\mathbi{e}_2$$\mathbi{e}_8$之间的联系,其操作数和卷积核的大小相关,相比于串行的方式具有更短的路径和更少的非线性计算,更容易进行训练。因此,也有许多研究人员在许多自然语言处理任务上尝试使用卷积神经网络进行序列建模\upcite{Kim2014ConvolutionalNN,Santos2014DeepCN,Kalchbrenner2014ACN,DBLP:conf/naacl/Johnson015,DBLP:conf/naacl/NguyenG15}
\parinterval 区别于传统图像上的卷积操作,在面向序列的卷积操作中,卷积核只在序列这一维度进行移动,用来捕捉连续的多个词之间的特征。需要注意的是,由于单词通常由一个实数向量表示(词嵌入),因此可以将词嵌入的维度看作是卷积操作中的通道数。图\ref{fig:11-10}就是一个基于序列卷积的文本分类模型,模型的输入是维度大小为$m\times O $的句子表示,$m$表示句子长度,$O$表示词嵌入维度,模型使用多个不同(对应图中不同的颜色)的卷积核来对序列进行特征提取,得到了多个不同的特征序列。然后使用池化层降低表示维度,得到了一组和序列长度无关的特征表示。基于这组压缩过的特征表示,模型再通过全连接网络和Softmax函数作为相应类别的预测。在这其中卷积层和池化层分别起到了特征提取和状态压缩的作用,将一个不定长的序列转化到一组固定大小的特征表示。
\parinterval 区别于传统图像上的卷积操作,在面向序列的卷积操作中,卷积核只在序列这一维度进行移动,用来捕捉连续的多个词之间的特征。需要注意的是,由于单词通常由一个实数向量表示(词嵌入),因此可以将词嵌入的维度看作是卷积操作中的通道数。图\ref{fig:11-10}就是一个基于序列卷积的文本分类模型,模型的输入是维度大小为$m\times O $的句子表示,$m$表示句子长度,$O$表示卷积核通道数,其值等于词嵌入维度,模型使用多个不同(对应图中不同的颜色)的卷积核来对序列进行特征提取,得到了多个不同的特征序列。然后使用池化层降低表示维度,得到了一组和序列长度无关的特征表示。基于这组压缩过的特征表示,模型再通过全连接网络和Softmax函数作为相应类别的预测。在这其中卷积层和池化层分别起到了特征提取和状态压缩的作用,将一个不定长的序列转化到一组固定大小的特征表示。
%----------------------------------------------
% 图10.
......
Markdown 格式
0%
您添加了 0 到此讨论。请谨慎行事。
请先完成此评论的编辑!
注册 或者 后发表评论