Commit 525e4614 by 孟霞

合并分支 'mengxia' 到 'caorunzhe'

9的图和参考文献

查看合并请求 !313
parents dcd23f27 736fb9f4
......@@ -3,47 +3,47 @@
\node [anchor=west,minimum width=1.5em,minimum height=1.5em] (part1) at (0,0) {\footnotesize{$y$}};
\node [anchor=north,minimum width=1.5em,minimum height=1.5em] (part1-2) at ([xshift=-1.2em,yshift=-0.3em]part1.south) {\scriptsize {$1\times1$}};
\node [anchor=north,draw,minimum width=4.0em,minimum height=1.5em,rounded corners] (part2) at ([yshift=-1.5em]part1.south) {\footnotesize {$\rm{Sigmoid}$}};
\node [anchor=north,draw,minimum width=4.0em,minimum height=1.5em,rounded corners,ublue,thick] (part2) at ([yshift=-1.5em]part1.south) {\black{\footnotesize {$\rm{Sigmoid}$}}};
\draw [-,thick](part1.south)--(part2.north);
\node [anchor=north,minimum width=1.5em,minimum height=1.5em] (part2-2) at ([xshift=-1.2em,yshift=-0.3em]part2.south) {\scriptsize {$1\times1$}};
\node [anchor=north,draw,minimum width=4.0em,minimum height=1.5em,rounded corners] (part3) at ([yshift=-1.5em]part2.south) {\footnotesize {$\rm{ADD}$}};
\node [anchor=north,draw,minimum width=4.0em,minimum height=1.5em,rounded corners,ublue,thick] (part3) at ([yshift=-1.5em]part2.south) {\black{\footnotesize {$\rm{ADD}$}}};
\draw [-,thick](part2.south)--(part3.north);
\node [anchor=north,minimum width=1.5em,minimum height=1.5em] (part3-2) at ([xshift=-1.2em,yshift=-0.3em]part3.south) {\scriptsize {$1\times1$}};
\node [anchor=north,draw,minimum width=4.0em,minimum height=1.5em,rounded corners] (part4) at ([yshift=-1.5em]part3.south) {\footnotesize {$\rm{MUL}$}};
\node[anchor=north,draw,minimum width=4.0em,minimum height=1.5em,rounded corners,ublue,thick] (part4) at ([yshift=-1.5em]part3.south) {\black{\footnotesize {$\rm{MUL}$}}};
\draw [-,thick](part3.south)--(part4.north);
\node [anchor=north,minimum width=1.5em,minimum height=1.5em] (part4-2) at ([xshift=-1.2em,yshift=-0.2em]part4.south) {\scriptsize {$1\times 2$}};
\node [anchor=north,minimum width=4.0em,minimum height=1.5em] (part5) at ([yshift=-1.4em]part4.south) {\footnotesize {${\vectorn{\emph{a}}}$}};
\draw [-,thick](part4.south)--([yshift=-0.1em]part5.north);
%%%%%%%%%%%%%%%%%%%%%%%%%%%%
\node [anchor=west,minimum width=2.0em,minimum height=1.5em] (part5-3) at ([xshift=0.0em,yshift=0.1em]part5.east) {\footnotesize {${\vectorn{\emph{W}}}^{[2]}$}};
\node [anchor=west,minimum width=2.0em,minimum height=1.5em] (part5-3) at ([xshift=-0.8em,yshift=0.2em]part5.east) {\footnotesize {${\vectorn{\emph{W}}}^{[2]}$}};
\node [anchor=west,minimum width=2.0em,minimum height=1.5em] (part5-4) at ([xshift=2.0em,yshift=0.0em]part5-3.east) {\footnotesize {$ b^{[2]}$}};
\draw[-,thick](part4.south)--(part5-3.north);
\draw[-,thick](part4.south)--([xshift=-0.5em]part5-3.north);
\draw[-,thick](part3.south)--(part5-4.north);
\node [anchor=south,minimum width=1.5em,minimum height=1.5em] (part5-3-1) at ([xshift=1.1em,yshift=-0.45em]part5-3.north) {\scriptsize {$1\times 2$}};
\node [anchor=south,minimum width=1.5em,minimum height=1.5em] (part5-4-1) at ([xshift=1.1em,yshift=-0.45em]part5-4.north) {\scriptsize {$1\times1$}};
%%%%%%%%%%%%%%%%%%%%%%%%%%
\node [anchor=north,minimum width=1.5em,minimum height=1.5em] (part5-2) at ([xshift=-1.2em,yshift=-0.2em]part5.south) {\scriptsize {$1\times 2$}};
\node [anchor=north,draw,minimum width=4.0em,minimum height=1.5em,rounded corners] (part6) at ([yshift=-1.4em]part5.south) {\footnotesize {$\rm{Tanh}$}};
\node [anchor=north,draw,minimum width=4.0em,minimum height=1.5em,rounded corners,ublue,thick] (part6) at ([yshift=-1.4em]part5.south) {\black{\footnotesize {$\rm{Tanh}$}}};
\draw [-,thick]([yshift=0.1em]part5.south)--(part6.north);
\node [anchor=north,minimum width=1.5em,minimum height=1.5em] (part6-2) at ([xshift=-1.2em,yshift=-0.3em]part6.south) {\scriptsize {$1\times 2$}};
\node [anchor=north,draw,minimum width=4.0em,minimum height=1.5em,rounded corners] (part7) at ([yshift=-1.5em]part6.south) {\footnotesize {$\rm{ADD}$}};
\node[anchor=north,draw,minimum width=4.0em,minimum height=1.5em,rounded corners,ublue,thick] (part7) at ([yshift=-1.5em]part6.south) {\black{\footnotesize {$\rm{ADD}$}}};
\draw [-,thick](part6.south)--(part7.north);
\node [anchor=north,minimum width=1.5em,minimum height=1.5em] (part7-2) at ([xshift=-1.2em,yshift=-0.3em]part7.south) {\scriptsize {$1\times 2$}};
\node [anchor=north,draw,minimum width=4.0em,minimum height=1.5em,rounded corners] (part8) at ([yshift=-1.5em]part7.south) {\footnotesize {$\rm{MUL}$}};
\node[anchor=north,draw,minimum width=4.0em,minimum height=1.5em,rounded corners,ublue,thick] (part8) at ([yshift=-1.5em]part7.south) {\black{\footnotesize {$\rm{MUL}$}}};
\draw [-,thick](part7.south)--(part8.north);
\node [anchor=north,minimum width=1.5em,minimum height=1.5em] (part8-2) at ([xshift=-1.2em,yshift=-0.2em]part8.south) {\scriptsize{$1\times 2$}};
\node [anchor=north,minimum width=1.5em,minimum height=1.5em] (part8-2) at ([xshift=-1.2em,yshift=-0.2em]part8.south) {\scriptsize{$1\times 3$}};
\node [anchor=north,minimum width=4.0em,minimum height=1.5em] (part9) at ([yshift=-1.4em]part8.south) {\footnotesize {${\vectorn{\emph{x}}}$}};
\draw [-,thick](part8.south)--([yshift=-0.1em]part9.north);
%%%%%%%%%%%%%%%%%%%%%%%%%%%%
\node [anchor=west,minimum width=2.0em,minimum height=1.5em] (part9-3) at ([xshift=0.0em,yshift=0.1em]part9.east) {\footnotesize {${\vectorn{\emph{W}}}^{[1]}$}};
\node [anchor=west,minimum width=2.0em,minimum height=1.5em] (part9-3) at ([xshift=-0.8em,yshift=0.1em]part9.east) {\footnotesize {${\vectorn{\emph{W}}}^{[1]}$}};
\node [anchor=west,minimum width=2.0em,minimum height=1.5em] (part9-4) at ([xshift=2.0em,yshift=0.0em]part9-3.east) {\footnotesize {${\vectorn{\emph{b}}}^{[1]}$}};
\draw[-,thick](part8.south)--(part9-3.north);
\draw[-,thick](part8.south)--([xshift=-0.5em]part9-3.north);
\draw[-,thick](part7.south)--(part9-4.north);
\node [anchor=south,minimum width=1.5em,minimum height=1.5em] (part9-3-1) at ([xshift=1.1em,yshift=-0.45em]part9-3.north) {\scriptsize {$3\times 2$}};
\node [anchor=south,minimum width=1.5em,minimum height=1.5em] (part9-4-1) at ([xshift=1.1em,yshift=-0.45em]part9-4.north) {\scriptsize {$1\times 2$}};
......
%%%------------------------------------------------------------------------------------------------------------
\begin{tikzpicture}
\begin{scope}
%左
\node [anchor=west,draw=ublue,minimum width=3.55em,fill=yellow!20] (part1-1) at (0,0) {\scriptsize{天空状况}};
\node [anchor=north] (inputlabel) at ([yshift=2em]part1-1.north) {\scriptsize{输入}};
\node [anchor=north,draw=ublue,minimum width=3.55em,fill=yellow!20] (part1-2) at ([yshift=-2.0em]part1-1.south) {\scriptsize {低空气温}};
\node [anchor=north,draw=ublue,minimum width=3.55em,fill=yellow!20] (part1-3) at ([yshift=-2.0em]part1-2.south) {\scriptsize {水平气压}};
\node [rectangle,rounded corners,draw=black!50,densely dashed,inner sep=0.4em] [fit = (part1-1) (part1-2) (part1-3) (inputlabel)] (inputshadow) {};
\node [anchor=north,draw=ublue,minimum width=3.55em,fill=yellow!20] (part1-4) at ([yshift=-2.0em]part1-3.south) {\scriptsize {偏置1}};
\node [anchor=north,minimum width=2.5em] (part1-5) at ([yshift=-0.5em]part1-4.south) {\scriptsize {输入层}};
%中
\node [circle,anchor=west,draw=ublue,minimum width=2.5em,fill=blue!20] (part2-1) at ([xshift=2.0em,yshift=1.7em]part1-2.east) {\scriptsize {温度}};
\node [anchor=north] (hidlabel) at ([yshift=3.1em]part2-1.north) {\scriptsize{特征}};
\node [circle,anchor=west,draw=ublue,minimum width=2.5em,fill=blue!20] (part2-2) at ([xshift=2.0em,yshift=-1.7em]part1-2.east) {\scriptsize {风速}};
\node [rectangle,rounded corners,draw=black!50,densely dashed,inner sep=0.4em] [fit = (part2-1) (part2-2) (hidlabel) ] (inputshadow) {};
\node [circle,anchor=west,draw=ublue,minimum width=2.5em,fill=blue!20,inner sep=2pt] (part2-3) at ([xshift=2.0em,yshift=-1.7em]part1-3.east) {\scriptsize {偏置2}};
\node [anchor=north,minimum width=3.0em] (part2-4) at ([xshift=0.0em,yshift=-1.6em]part2-3.south) {\scriptsize{隐藏层}};
\node [anchor=north] (labela) at ([xshift=0.0em,yshift=-3em]part2-3.south) {\footnotesize {(a)}};
%右
\node [anchor=west,draw=ublue,minimum width=3.0em,fill=purple!20] (part3-1) at ([xshift=2em,yshift=0.0em]part2-2.east) {\scriptsize {穿衣指数}};
\node [anchor=north,minimum width=3.0em] (part3-2) at ([yshift=-5.55em]part3-1.south) {\scriptsize{输出层}};
%\node[anchor=south,minimum height=18em,minimum width=16.0em,draw=ublue,dotted,thick] (part2out) at ([xshift=4.8em,yshift=-11em]part1-2.north) {};
%连线
\draw [->,line width=0.2mm,ublue](part1-1.east)--([xshift=-0.05em]part2-1.170);
\draw [->,line width=0.2mm,ublue](part1-1.east)--([xshift=-0.05em]part2-2.165);
\draw [->,line width=0.2mm,ublue](part1-2.east)--([xshift=-0.05em]part2-1.175);
\draw [->,line width=0.2mm,ublue](part1-2.east)--([xshift=-0.05em]part2-2.175);
\draw [->,line width=0.2mm,ublue](part1-3.east)--([xshift=-0.05em]part2-1.185);
\draw [->,line width=0.2mm,ublue](part1-3.east)--([xshift=-0.05em]part2-2.185);
\draw [->,line width=0.2mm,ublue](part1-4.east)--([xshift=-0.05em]part2-1.195);
\draw [->,line width=0.2mm,ublue](part1-4.east)--([xshift=-0.05em]part2-2.195);
\draw [->,line width=0.2mm,ublue](part2-1.east)--([xshift=-0.05em,yshift=0.2em]part3-1.west);
\draw [->,line width=0.2mm,ublue](part2-2.east)--([xshift=-0.05em]part3-1.west);
\draw [->,line width=0.2mm,ublue](part2-3.east)--([xshift=-0.05em,yshift=-0.2em]part3-1.west);
\end{scope}
\begin{scope}[xshift=3.0in]
%左
\node [anchor=west,align=center,draw=ublue,minimum width=3.55em,minimum height=1.33em,fill=yellow!20] (part1-1) at (0,0) {\normalsize{$x_1$}};
\node [anchor=north] (inputlabel) at ([yshift=2em]part1-1.north) {\scriptsize{输入$\mathbf x $}};
\node [anchor=north,draw=ublue,minimum width=3.55em,minimum height=1.33em,fill=yellow!20] (part1-2) at ([yshift=-2.0em]part1-1.south) {\normalsize{$x_2$}};
\node [anchor=north,draw=ublue,minimum width=3.55em,minimum height=1.33em,fill=yellow!20] (part1-3) at ([yshift=-2.0em]part1-2.south) {\normalsize{$x_3$}};
y
\node [rectangle,rounded corners,draw=black!50,densely dashed,inner sep=0.4em] [fit = (part1-1) (part1-2) (part1-3) (inputlabel)] (inputshadow) {};
\node [anchor=north,draw=ublue,minimum width=3.55em,fill=yellow!20] (part1-4) at ([yshift=-2.0em]part1-3.south) {\footnotesize {$\mathbf b^{[1]} $}};
\node [anchor=north,minimum width=2.5em] (part1-5) at ([yshift=-0.5em]part1-4.south) {\scriptsize {输入层}};
%中
\node [circle,anchor=west,draw=ublue,minimum width=2.5em,fill=blue!20] (part2-1) at ([xshift=2.0em,yshift=1.7em]part1-2.east) {\large{$a_1$}};
\node [anchor=north] (hidlabel) at ([yshift=3.1em]part2-1.north) {\scriptsize{特征$\mathbf a $}};
\node [circle,anchor=west,draw=ublue,minimum width=2.5em,fill=blue!20] (part2-2) at ([xshift=2.0em,yshift=-1.7em]part1-2.east) {\large{$a_2$}};
\node [rectangle,rounded corners,draw=black!50,densely dashed,inner sep=0.4em] [fit = (part2-1) (part2-2) (hidlabel) ] (inputshadow) {};
\node [circle,anchor=west,draw=ublue,minimum width=2.5em,fill=blue!20,inner sep=2pt] (part2-3) at ([xshift=2.0em,yshift=-1.7em]part1-3.east) {\large {$b^{[2]} $}};
\node [anchor=north,minimum width=3.0em] (part2-4) at ([xshift=0.0em,yshift=-1.6em]part2-3.south) {\scriptsize{隐藏层}};
\node [anchor=north] (labelb) at ([xshift=0.0em,yshift=-3em]part2-3.south) {\footnotesize {(b)}};
%右
\node [anchor=west,draw=ublue,minimum width=3.0em,fill=purple!20] (part3-1) at ([xshift=2em,yshift=0.0em]part2-2.east) {\large{$y$}};
\node [anchor=north,minimum width=3.0em] (part3-2) at ([yshift=-5.55em]part3-1.south) {\scriptsize{输出层}};
%\node[anchor=south,minimum height=18em,minimum width=16.0em,draw=ublue,dotted,thick] (part2out) at ([xshift=4.8em,yshift=-11em]part1-2.north) {};
%连线
\draw [->,line width=0.2mm,ublue](part1-1.east)--([xshift=-0.05em]part2-1.170);
\draw [->,line width=0.2mm,ublue](part1-1.east)--([xshift=-0.05em]part2-2.165);
\draw [->,line width=0.2mm,ublue](part1-2.east)--([xshift=-0.05em]part2-1.175);
\draw [->,line width=0.2mm,ublue](part1-2.east)--([xshift=-0.05em]part2-2.175);
\draw [->,line width=0.2mm,ublue](part1-3.east)--([xshift=-0.05em]part2-1.185);
\draw [->,line width=0.2mm,ublue](part1-3.east)--([xshift=-0.05em]part2-2.185);
\draw [->,line width=0.2mm,ublue](part1-4.east)--([xshift=-0.05em]part2-1.195);
\draw [->,line width=0.2mm,ublue](part1-4.east)--([xshift=-0.05em]part2-2.195);
\draw [->,line width=0.2mm,ublue](part2-1.east)--([xshift=-0.05em,yshift=0.2em]part3-1.west);
\draw [->,line width=0.2mm,ublue](part2-2.east)--([xshift=-0.05em]part3-1.west);
\draw [->,line width=0.2mm,ublue](part2-3.east)--([xshift=-0.05em,yshift=-0.2em]part3-1.west);
\end{scope}
\tikzstyle{neuronnode} = [minimum size=2.2em,circle,draw,ublue,very thick,inner sep=1pt, fill=white,align=center,drop shadow={shadow xshift=0.1em,shadow yshift=-0.1em}]
\node [anchor=west,minimum width=2.0em,minimum height=1.5em] (bias10) at (0,0.05) {\footnotesize{${\vectorn{\emph{b}}}^{[1]}$}};
\node [anchor=west,minimum width=2.0em,minimum height=1.5em] (bias11) at ([xshift=-1.5em,yshift=-0.3em]bias10.south) {\footnotesize{偏置1}};
\node [anchor=west,minimum width=2.0em,minimum height=1.5em] (input10) at (2,0) {\footnotesize {$x_1$}};
\node [anchor=west,minimum width=2.0em,minimum height=1.5em] (input11) at ([xshift=-2.1em,yshift=-0.3em]input10.south) {\footnotesize {天空状况}};
\node [anchor=west,minimum width=2.0em,minimum height=1.5em] (input20) at (4,0) {\footnotesize {$x_2$}};
\node [anchor=west,minimum width=2.0em,minimum height=1.5em] (input21) at ([xshift=-2.1em,yshift=-0.3em]input20.south) {\footnotesize {低温气温}};
\node [anchor=west,minimum width=2.0em,minimum height=1.5em] (input30) at (6,0) {\footnotesize {$x_3$}};
\node [anchor=west,minimum width=2.0em,minimum height=1.5em] (input31) at ([xshift=-2.1em,yshift=-0.3em]input30.south) {\footnotesize {水平气压}};
\node [neuronnode] (n10) at ([xshift=1.5em,yshift=4em]input10.east) {\tiny{$f$}\\[-1ex] \tiny{$\sum$}};
\node [anchor=west,minimum width=2.0em,minimum height=1.5em] (bias20) at ([xshift=-4em,yshift=0.5em]n10.west) {\footnotesize {$b^{[2]}$}};
\node [anchor=west,minimum width=2.0em,minimum height=1.5em] (bias21) at ([xshift=-1.5em,yshift=-0.3em]bias20.south) {\footnotesize {偏置2}};
\node [neuronnode] (n11) at ([xshift=1.5em,yshift=4em]input20.east){\tiny{$f$}\\[-1ex] \tiny{$\sum$}};
\draw [-,ublue] (n10.west) -- (n10.east);
\draw [-,ublue] (n11.west) -- (n11.east);
\node [neuronnode] (n20) at ([xshift=1.5em,yshift=8em]input10.east) {\tiny{$f$}\\[-1ex] \tiny{$\sum$}};
\draw [-,ublue] (n20.west) -- (n20.east);
\node [anchor=west,minimum width=2.0em,minimum height=1.5em] (output) at ([xshift=0.5em,yshift=12em]input10.east) {\footnotesize {$y$}};
\draw [->,thick] (input10.north) -- (n10.south);
\draw [->,thick] (input20.north) -- (n10.south);
\draw [->,thick] (input20.north) -- (n11.south);
\draw [->,thick] (input30.north) -- (n11.south);
\draw [->,thick] (n10.north) -- (n20.south);
\draw [->,thick] (n11.north) -- (n20.south);
\draw [->,thick] (bias20.north) -- (n20.south);
\draw [->,thick] (n20.north) -- (output.south);
\draw [->,thick] (bias10.north) -- (n10.south);
\end{tikzpicture}
%%%------------------------------------------------------------------------------------------------------------
%%------------------------------------------------------------------------------------------------------------
......
......@@ -1053,65 +1053,7 @@ f(x)=\begin{cases} 0 & x\le 0 \\x & x>0\end{cases}
\parinterval 实现神经网络的开源系统有很多,比如,使用经典的Python工具包Numpy。也可以使用成熟的深度学习框架,比如,Tensorflow和Pytorch就是非常受欢迎的深度学习工具包,除此之外还有很多其他优秀的框架:CNTK、MXNet、PaddlePaddle、\\Keras、Chainer、dl4j、NiuTensor等。开发者可以根据自身的喜好和开发项目的要求选择所采用的框架。
\parinterval 本节将使用NiuTensor来描述张量计算。NiuTensor是一个面向自然语言处理任务的张量库,它支持丰富的张量计算接口。如图\ref{fig:9-30}是一个使用NiuTensor声明、定义张量的C++代码:
%----------------------------------------------
\begin{figure}[htp]
\centering
\input{./Chapter9/Figures/fig-code-tensor-define}
\caption{使用NiuTensor声明定义张量}
\label{fig:9-30}
\end{figure}
%-------------------------------------------
\parinterval 这段程序定义了一个形状为$ 2\times 2 $${\textrm{dimsize}}=2,2 $),数据类型是单精度浮点($ \textrm{dtype=X\_FLOAT} $),非稀疏($ \textrm{dense}=1.00 $)的2阶($ \textrm{order}=2 $)张量。运行这个程序会显示张量每个元素的值,如图\ref{fig:9-31}所示。
%----------------------------------------------
\begin{figure}[htp]
\centering
\input{./Chapter9/Figures/fig-code-out}
\caption{程序运行结果}
\label{fig:9-31}
\end{figure}
%-------------------------------------------
\parinterval 在NiuTensor中,张量由类XTensor表示,利用InitTensor定义,主要有四个参数:
\begin{itemize}
\vspace{0.5em}
\item 指向XTensor类型变量的指针,如$ \& $tensor。
\vspace{0.5em}
\item 张量的阶,如6。
\vspace{0.5em}
\item 各个方向维度的大小,约定该参数形式与传统的多维数组形式相同,如$ \{2,3,4,\\2,3,4\} $
\vspace{0.5em}
\item 张量的数据类型,该参数有缺省值。
\vspace{0.5em}
\end{itemize}
\parinterval 定义XTensor的程序示例如图\ref{fig:9-32}(a)所示:
%----------------------------------------------
\begin{figure}[htp]
\centering
\input{./Chapter9/Figures/fig-code-tensor-define-2}
\caption{ NiuTensor定义张量程序示例}
\label{fig:9-32}
\end{figure}
%-------------------------------------------
\parinterval 除此之外,NiuTensor还提供更简便的张量定义方式,如图\ref{fig:9-32}(b)所示。也可以在GPU上定义张量,如图\ref{fig:9-32}(c)所示。NiuTensor支持张量的各种代数运算,各种单元算子,如$ + $$ - $$ \ast $$ / $、Log(取对数)、Exp(指数运算)、Power(幂方运算)、Absolute(绝对值)等,还有Sigmoid、Softmax等激活函数。图\ref{fig:9-35}(a)是一段对张量进行1阶运算的程序示例。
%----------------------------------------------
\begin{figure}[htp]
\centering
\input{./Chapter9/Figures/fig-code-tensor-operation}
\caption{ 张量代数运算程序示例}
\label{fig:9-35}
\end{figure}
%-------------------------------------------
\parinterval 除了上述单元算子外,NiuTensor还支持张量之间的高阶运算,其中最常用的是矩阵乘法,图\ref{fig:9-35}(b)是张量之间进行矩阵乘法的程序示例。表\ref{tab:5-2}展示了一些NiuTensor支持的其他函数操作。
\parinterval NiuTensor是一个面向自然语言处理任务的张量库,它支持丰富的张量计算接口,如张量的声明、定义和张量的各种代数运算,各种单元算子,如$ + $$ - $$ \ast $$ / $、Log(取对数)、Exp(指数运算)、Power(幂方运算)、Absolute(绝对值)等,还有Sigmoid、Softmax等激活函数,除了上述单元算子外。NiuTensor还支持张量之间的高阶运算,其中最常用的是矩阵乘法。表\ref{tab:5-2}展示了一些NiuTensor支持的其他函数操作。
%--------------------------------------------------------------------
\begin{table}[htp]
......@@ -1173,7 +1115,7 @@ y&=&{\textrm{Sigmoid}}({\textrm{Tanh}}({\vectorn{\emph{x}}}\cdot {\vectorn{\emph
\begin{figure}[htp]
\centering
\input{./Chapter9/Figures/fig-weather-forward}
\caption{前向计算示例(计算图)\red{(图需要改)}}
\caption{前向计算示例(计算图)}
\label{fig:9-38}
\end{figure}
%-------------------------------------------
......@@ -1915,36 +1857,6 @@ z_t&=&\gamma z_{t-1}+(1-\gamma) \frac{\partial J}{\partial {\theta}_t} \cdot \f
\parinterval 综合输出层和隐藏层的反向传播方法,可以得到神经网络中任意位置和任意参数的梯度信息。只需要根据网络的拓扑结构,逆向访问每一个节点,并执行上述反向计算过程。
%----------------------------------------------------------------------------------------
% NEW SUBSUB-SECTION
%----------------------------------------------------------------------------------------
\subsubsection{3. {\red 实例}}
\parinterval 为了进一步理解反向传播的过程,图\ref{fig:9-58}展示了一个简单的神经网络的反向传播程序示例。
%----------------------------------------------
\begin{figure}[htp]
\centering
\input{./Chapter9/Figures/fig-code-back-propagation-1}
\caption{手动编写反向传播代码(NiuTensor)}
\label{fig:9-58}
\end{figure}
%-------------------------------------------
\parinterval 此外,很多张量计算工具都提供了封装好的反向传播函数。如图\ref{fig:9-59}所示,在完成神经网络的搭建后,无论前向计算过程是怎样的,直接利用Backward 函数就可以实现整个神经网络的反向传播,系统开发人员可以完全不用关心其求解过程。
%----------------------------------------------
\begin{figure}[htp]
\centering
\input{./Chapter9/Figures/fig-code-back-propagation-2}
\caption{反向传播的自动微分实现(NiuTensor)}
\label{fig:9-59}
\end{figure}
%-------------------------------------------
%----------------------------------------------------------------------------------------
% NEW SECTION
%----------------------------------------------------------------------------------------
......@@ -2079,16 +1991,7 @@ z_t&=&\gamma z_{t-1}+(1-\gamma) \frac{\partial J}{\partial {\theta}_t} \cdot \f
\parinterval 在FNNLM中,所有的参数、输入、输出都是连续变量,因此FNNLM也是典型的一个连续空间模型。通过使用交叉熵等损失函数,FNNLM很容易进行优化。比如,可以使用梯度下降方法对FNNLM的模型参数进行训练。
\parinterval {\red FNNLM的实现也非常简单,图\ref{fig:9-61}展示了基于FNNLM一个简单实现}。虽然FNNLM模型形式简单,却为处理自然语言提供了一个全新的视角。首先,该模型重新定义了“词是什么”\ \dash \ 它并非词典的一项,而是可以用一个连续实数向量进行表示的可计算的“量”。此外,由于$n$-gram不再是离散的符号序列,模型不需要记录$n$-gram,所以很好的缓解了上面所提到的数据稀疏问题,模型体积也大大减小。
%----------------------------------------------
\begin{figure}[htp]
\centering
\input{./Chapter9/Figures/fig-code-fnnlm}
\caption{FNNLM模型代码示例(NiuTensor){\red 删!!}}
\label{fig:9-61}
\end{figure}
%-------------------------------------------
\parinterval 虽然FNNLM模型形式简单,却为处理自然语言提供了一个全新的视角。首先,该模型重新定义了“词是什么”\ \dash \ 它并非词典的一项,而是可以用一个连续实数向量进行表示的可计算的“量”。此外,由于$n$-gram不再是离散的符号序列,模型不需要记录$n$-gram,所以很好的缓解了上面所提到的数据稀疏问题,模型体积也大大减小。
\parinterval 当然,FNNLM模型也引发后人的许多思考,比如:神经网络每一层都学到了什么?是词法、句法,还是一些其他知识?如何理解词的分布式表示?等等。在随后的内容中也会看到,随着近几年深度学习和自然语言处理的发展,部分问题已经得到了很好的解答,但是仍有许多问题需要进一步探索。
......@@ -2139,11 +2042,11 @@ z_t&=&\gamma z_{t-1}+(1-\gamma) \frac{\partial J}{\partial {\theta}_t} \cdot \f
% NEW SUBSUB-SECTION
%----------------------------------------------------------------------------------------
\subsubsection{2. 其他类型的语言模型}
\subsubsection{2. 其他类型的语言模型}\label{sec:9.5.2.2}
\parinterval 通过引入记忆历史的能力,RNNLM缓解了$n$-gram模型中有限上下文的局限性,但依旧存在一些问题。随着序列变长,不同单词之间信息传递路径变长,信息传递的效率变低。对于长序列,很难通过很多次的循环单元操作保留很长的历史信息。过长的序列还容易引起梯度消失和梯度爆炸问题(详见\ref{sec:9.4.4}节),增加模型训练的难度。
\parinterval 针对这个问题,一种解决方法是使用卷积神经网络{\color{red} 引用基于卷积的语言模型论文}。卷积神经网络的特点是可以对一定窗口大小内的连续单词进行统一建模,这样非常易于捕捉窗口内单词之间的依赖,同时对它们进行整体的表示。进一步,卷积操作可以被多次叠加使用,通过更多层的卷积神经网络可以捕捉更大范围的依赖关系。关于卷积神经网络及其在机器翻译中的应用,{\chaptereleven}会有详细论述。
\parinterval 针对这个问题,一种解决方法是使用卷积神经网络\upcite{Pham2016ConvolutionalNN}。卷积神经网络的特点是可以对一定窗口大小内的连续单词进行统一建模,这样非常易于捕捉窗口内单词之间的依赖,同时对它们进行整体的表示。进一步,卷积操作可以被多次叠加使用,通过更多层的卷积神经网络可以捕捉更大范围的依赖关系。关于卷积神经网络及其在机器翻译中的应用,{\chaptereleven}会有详细论述。
\parinterval 此外,研究者也提出了另一种新的结构$\ \dash \ ${\small\bfnew{自注意力机制}}\index{自注意力机制}(Self-attention Mechanism)\index{Self-attention Mechanism}。自注意力是一种特殊的神经网络结构,它可以对序列上任意两个词的相互作用直接进行建模,这样也就避免了循环神经网络中随着距离变长信息传递步骤增多的缺陷。在自然语言处理领域,自注意力机制被成功地应用在机器翻译任务上,形成了著名的Transformer模型\upcite{vaswani2017attention}{\chaptertwelve}会系统地介绍自注意力机制和Transformer模型。
......@@ -2285,7 +2188,7 @@ Jobs was the CEO of {\red{\underline{apple}}}.
\vspace{0.5em}
\item 端到端学习是神经网络方法的特点之一。这样,系统开发者不需要设计输入和输出的隐含结构,甚至连特征工程都不再需要。但是,另一方面,由于这种端到端学习完全由神经网络自行完成,整个学习过程没有人的先验知识做指导,导致学习的结构和参数很难进行解释。针对这个问题也有很多研究者进行{\small\sffamily\bfseries{可解释机器学习}}\index{可解释机器学习}(Explainable Machine Learning)\index{Explainable Machine Learning}的研究\upcite{DBLP:journals/corr/abs-1905-09418,moraffah2020causal,blodgett2020language,}。对于自然语言处理,方法的可解释性是十分必要的。从另一个角度说,如何使用先验知识改善端到端学习也是很多人关注的方向\upcite{arthur2016incorporating,zhang-etal-2017-prior,yang2017improving},比如,如何使用句法知识改善自然语言处理模型\upcite{stahlberg2016syntactically,currey2019incorporating,currey2018multi,marevcek2018extracting,blevins2018deep}
\vspace{0.5em}
\item 为了进一步提高神经语言模型性能,除了改进模型,还可以在模型中引入新的结构或是其他有效信息,该领域也有很多典型工作值得关注。例如在神经语言模型中引入除了词嵌入以外的单词特征,如语言特征(形态、语法、语义特征等)\upcite{Wu2012FactoredLM,Adel2015SyntacticAS}、上下文信息\upcite{mikolov2012context,Wang2015LargerContextLM}、知识图谱等外部知识\upcite{Ahn2016ANK};或是在神经语言模型中引入字符级信息,将其作为字符特征单独\upcite{Kim2016CharacterAwareNL,Hwang2017CharacterlevelLM}或与单词特征一起\upcite{Onoe2016GatedWR,Verwimp2017CharacterWordLL}送入模型中;在神经语言模型中引入双向模型也是一种十分有效的尝试,在单词预测时可以同时利用来自过去和未来的文本信息\upcite{Graves2013HybridSR,bahdanau2014neural,Peters2018DeepCW};在神经语言模型中引入注意力机制能够明显提高模型性能,1.5.2节对此有简短介绍,除了Transformer模型,GPT\upcite{radford2018improving}和BERT\upcite{devlin2019bert}也是不错的工作。
\item 为了进一步提高神经语言模型性能,除了改进模型,还可以在模型中引入新的结构或是其他有效信息,该领域也有很多典型工作值得关注。例如在神经语言模型中引入除了词嵌入以外的单词特征,如语言特征(形态、语法、语义特征等)\upcite{Wu2012FactoredLM,Adel2015SyntacticAS}、上下文信息\upcite{mikolov2012context,Wang2015LargerContextLM}、知识图谱等外部知识\upcite{Ahn2016ANK};或是在神经语言模型中引入字符级信息,将其作为字符特征单独\upcite{Kim2016CharacterAwareNL,Hwang2017CharacterlevelLM}或与单词特征一起\upcite{Onoe2016GatedWR,Verwimp2017CharacterWordLL}送入模型中;在神经语言模型中引入双向模型也是一种十分有效的尝试,在单词预测时可以同时利用来自过去和未来的文本信息\upcite{Graves2013HybridSR,bahdanau2014neural,Peters2018DeepCW};在神经语言模型中引入注意力机制能够明显提高模型性能,\ref{sec:9.5.2.2}节对此有简短介绍,除了Transformer模型,GPT\upcite{radford2018improving}和BERT\upcite{devlin2019bert}也是不错的工作。
\vspace{0.5em}
\item 词嵌入是自然语言处理近些年的重要进展。所谓“嵌入”是一类方法,理论上,把一个事物进行分布式表示的过程都可以被看作是广义上的“嵌入”。基于这种思想的表示学习也成为了自然语言处理中的前沿方法。比如,如何对树结构,甚至图结构进行分布式表示\upcite{DBLP:journals/corr/abs-1809-01854,Yin2018StructVAETL,Aharoni2017TowardsSN}成为了分析自然语言的重要方法。此外,除了语言建模,还有很多方式可以进行词嵌入的学习,比如,SENNA\upcite{collobert2011natural}、word2vec\upcite{DBLP:journals/corr/abs-1301-3781}\upcite{mikolov2013distributed}、Glove\upcite{DBLP:conf/emnlp/PenningtonSM14}、CoVe\upcite{mccann2017learned} 等。
\vspace{0.5em}
......
......@@ -4199,6 +4199,14 @@ year = {2012}
publisher={International Conference on Computational Linguistics},
year={2012}
}
@inproceedings{Pham2016ConvolutionalNN,
title={Convolutional Neural Network Language Models},
author={Ngoc-Quan Pham and
German Kruszewski and
Gemma Boleda},
publisher={Conference on Empirical Methods in Natural Language Processing},
year={2016}
}
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
%%%%%%%%%%%%%%%%%%%%%深度阅读修改和补充,待检查修改%%%%%%%%%%%%%%%%%%%
@article{moraffah2020causal,
......
Markdown 格式
0%
您添加了 0 到此讨论。请谨慎行事。
请先完成此评论的编辑!
注册 或者 后发表评论