Commit 590b3448 by 孟霞

合并分支 'mengxia' 到 'caorunzhe'

Mengxia

查看合并请求 !114
parents 264e8595 dbec4f5b
%%%------------------------------------------------------------------------------------------------------------
\begin{tikzpicture}
\begin{tikzpicture}
\begin{scope}
\node [anchor=west] (w0) at (0,0) {\footnotesize{$w_{0}$}};
\node [anchor=west] (w1) at ([xshift=5em]w0.east) {\footnotesize{$w_{1}$}};
\node [anchor=west] (w2) at ([xshift=5em]w1.east) {\footnotesize{$w_{2}$}};
\node [anchor=west] (w3) at ([xshift=5em]w2.east) {\footnotesize{$w_{3}$}};
\node [anchor=north] (index0) at ([yshift=0.5em]w0.south) {\tiny(index)};
\node [anchor=north] (index1) at ([yshift=0.5em]w1.south) {\tiny(index)};
\node [anchor=north] (index2) at ([yshift=0.5em]w2.south) {\tiny(index)};
\node [anchor=north] (index3) at ([yshift=0.5em]w3.south) {\tiny(index)};
\node [anchor=south,draw,inner sep=3pt] (e0) at ([yshift=1em]w0.north) {\tiny{$\textbf{e}_0=w_{0} \textbf{C} + \textrm{PE}(0)$}};
\node [anchor=south,draw,inner sep=3pt] (e1) at ([yshift=1em]w1.north) {\tiny{$\textbf{e}_1=w_{1} \textbf{C} + \textrm{PE}(1)$}};
\node [anchor=south,draw,inner sep=3pt] (e2) at ([yshift=1em]w2.north) {\tiny{$\textbf{e}_2=w_{2} \textbf{C} + \textrm{PE}(2)$}};
\node [anchor=south,draw,inner sep=3pt] (e3) at ([yshift=1em]w3.north) {\tiny{$\textbf{e}_3=w_{3} \textbf{C} + \textrm{PE}(3)$}};
\node [anchor=south,draw,inner sep=3pt] (h0) at ([xshift=-0.5em, yshift=1.5em]e0.north) {\tiny{$\textbf{h}_{0}=\textrm{SelfAtt}(\textbf{e}_0,\textbf{e}_3)$}};
\node [anchor=south,draw,inner sep=3pt] (h1) at ([xshift=0.5em, yshift=1.5em]e1.north) {\tiny{$\textbf{h}_{1}=\textrm{SelfAtt}(\textbf{e}_1,\textbf{e}_3)$}};
\node [anchor=south,draw,inner sep=3pt] (h2) at ([xshift=1.5em, yshift=1.5em]e2.north) {\tiny{$\textbf{h}_{2}=\textrm{SelfAtt}(\textbf{e}_2,\textbf{e}_3)$}};
\node [anchor=south,draw,minimum width=9em,inner sep=3pt] (f1) at ([xshift=0.5em, yshift=1.5em]h2.north) {\tiny{$\textbf{f}_3=\textrm{FNN}([\textbf{h}_0,\textbf{h}_1,\textbf{h}_2,\textbf{e}_3])$}};
\node [anchor=south,draw,minimum width=9em,inner sep=3pt] (o1) at ([yshift=1em]f1.north) {\tiny{$\textbf{y}=\textrm{Softmax}(f_3 \textbf{U})$}};
\node [anchor=south] (ylabel) at ([yshift=1em]o1.north) {\footnotesize{$\textrm{P}(w_4|w_{0}w_{1}w_{2}w_{3})$}};
\draw [->] ([yshift=0.1em]w0.north) -- ([yshift=-0.1em]e0.south);
\draw [->] ([yshift=0.1em]w1.north) -- ([yshift=-0.1em]e1.south);
\draw [->] ([yshift=0.1em]w2.north) -- ([yshift=-0.1em]e2.south);
\draw [->] ([yshift=0.1em]w3.north) -- ([yshift=-0.1em]e3.south);
\draw [->] ([yshift=0.1em]e0.north) -- ([xshift=0em,yshift=-0.1em]h0.south);
\draw [->] ([yshift=0.1em]e1.north) -- ([xshift=-0.5em,yshift=-0.1em]h1.south);
\draw [->] ([yshift=0.1em]e2.north) -- ([xshift=-1em,yshift=-0.1em]h2.south);
\draw [->] ([yshift=0.1em]e3.north) -- ([xshift=1em,yshift=-0.1em]h0.south);
\draw [->] ([yshift=0.1em]e3.north) -- ([xshift=1em,yshift=-0.1em]h1.south);
\draw [->] ([yshift=0.1em]e3.north) -- ([xshift=1em,yshift=-0.1em]h2.south);
\draw [->] ([yshift=0.1em]h0.north) -- ([xshift=-2em,yshift=-0.1em]f1.south);
\draw [->] ([yshift=0.1em]e3.north) -- ([xshift=2em,yshift=-0.1em]f1.south);
\draw [->] ([yshift=0.1em]h1.north) -- ([xshift=-1em,yshift=-0.1em]f1.south);
\draw [->] ([yshift=0.1em]h2.north) -- ([xshift=0em,yshift=-0.1em]f1.south);
\draw [->] ([yshift=0.1em]f1.north) -- ([yshift=-0.1em]o1.south);
\draw [->] ([yshift=0.1em]o1.north) -- ([yshift=-0.1em]ylabel.south);
\node [anchor=west] (w1) at ([xshift=6.5em]w0.east) {\footnotesize{$w_{1}$}};
\node [anchor=west] (w2) at ([xshift=6.5em]w1.east) {\footnotesize{$w_{2}$}};
\node [anchor=west] (w3) at ([xshift=6.5em]w2.east) {\footnotesize{$w_{3}$}};
\node [anchor=north] (index0) at ([yshift=0.5em]w0.south) {\footnotesize(index)};
\node [anchor=north] (index1) at ([yshift=0.5em]w1.south) {\footnotesize(index)};
\node [anchor=north] (index2) at ([yshift=0.5em]w2.south) {\footnotesize(index)};
\node [anchor=north] (index3) at ([yshift=0.5em]w3.south) {\footnotesize(index)};
{
\node [anchor=south,draw,inner sep=3pt,fill=blue!20!white] (e0) at ([yshift=1em]w0.north) {\tiny{$\textbf{e}_0=w_{0} \textbf{C} + \textrm{PE}(0)$}};
\node [anchor=south,draw,inner sep=3pt,fill=blue!20!white] (e1) at ([yshift=1em]w1.north) {\tiny{$\textbf{e}_1=w_{1} \textbf{C} + \textrm{PE}(1)$}};
\node [anchor=south,draw,inner sep=3pt,fill=blue!20!white] (e2) at ([yshift=1em]w2.north) {\tiny{$\textbf{e}_2=w_{2} \textbf{C} + \textrm{PE}(2)$}};
\node [anchor=south,draw,inner sep=3pt,fill=blue!20!white] (e3) at ([yshift=1em]w3.north) {\tiny{$\textbf{e}_3=w_{3} \textbf{C} + \textrm{PE}(3)$}};
\node [anchor=south,line width=0.8pt,draw,inner sep=3pt,fill=blue!20!white] (e0) at ([yshift=1em]w0.north) {\footnotesize{$\textbf{e}_0=w_{0} \textbf{C} + \textrm{PE}(0)$}};
\node [anchor=south,line width=0.8pt,draw,inner sep=3pt,fill=blue!20!white] (e1) at ([yshift=1em]w1.north) {\footnotesize{$\textbf{e}_1=w_{1} \textbf{C} + \textrm{PE}(1)$}};
\node [anchor=south,line width=0.8pt,draw,inner sep=3pt,fill=blue!20!white] (e2) at ([yshift=1em]w2.north) {\footnotesize{$\textbf{e}_2=w_{2} \textbf{C} + \textrm{PE}(2)$}};
\node [anchor=south,line width=0.8pt,draw,inner sep=3pt,fill=blue!20!white] (e3) at ([yshift=1em]w3.north) {\footnotesize{$\textbf{e}_3=w_{3} \textbf{C} + \textrm{PE}(3)$}};
}
{
\node [anchor=south,draw,inner sep=3pt,fill=ugreen!20!white] (h0) at ([xshift=-0.5em, yshift=1.5em]e0.north) {\tiny{$\textbf{h}_{0}=\textrm{SelfAtt}(\textbf{e}_0,\textbf{e}_3)$}};
\node [anchor=south,draw,inner sep=3pt,fill=ugreen!20!white] (h1) at ([xshift=0.5em, yshift=1.5em]e1.north) {\tiny{$\textbf{h}_{1}=\textrm{SelfAtt}(\textbf{e}_1,\textbf{e}_3)$}};
\node [anchor=south,draw,inner sep=3pt,fill=ugreen!20!white] (h2) at ([xshift=1.5em, yshift=1.5em]e2.north) {\tiny{$\textbf{h}_{2}=\textrm{SelfAtt}(\textbf{e}_2,\textbf{e}_3)$}};
\node [anchor=south,line width=0.8pt,draw,inner sep=3pt,fill=ugreen!20!white] (h0) at ([xshift=-0.5em, yshift=2.0em]e0.north) {\footnotesize{$\textbf{h}_{0}=\textrm{SelfAtt}(\textbf{e}_0,\textbf{e}_3)$}};
\node [anchor=south,line width=0.8pt,draw,inner sep=3pt,fill=ugreen!20!white] (h1) at ([xshift=0.5em, yshift=2.0em]e1.north) {\footnotesize{$\textbf{h}_{1}=\textrm{SelfAtt}(\textbf{e}_1,\textbf{e}_3)$}};
\node [anchor=south,line width=0.8pt,draw,inner sep=3pt,fill=ugreen!20!white] (h2) at ([xshift=1.5em, yshift=2.0em]e2.north) {\footnotesize{$\textbf{h}_{2}=\textrm{SelfAtt}(\textbf{e}_2,\textbf{e}_3)$}};
}
{
\node [anchor=south,draw,minimum width=9em,inner sep=3pt,fill=orange!20!white] (f1) at ([xshift=0.5em, yshift=1.5em]h2.north) {\tiny{$\textbf{f}_3=\textrm{FNN}([\textbf{h}_0,\textbf{h}_1,\textbf{h}_2,\textbf{e}_3])$}};
\node [anchor=south,draw,minimum width=9em,inner sep=3pt,fill=orange!20!white] (o1) at ([yshift=1em]f1.north) {\tiny{$\textbf{y}=\textrm{Softmax}(f_3 \textbf{U})$}};
\node [anchor=south,line width=0.8pt,draw,minimum width=9em,inner sep=3pt,fill=orange!20!white] (f1) at ([xshift=0.5em, yshift=2.0em]h2.north) {\footnotesize{$\textbf{f}_3=\textrm{FNN}([\textbf{h}_0,\textbf{h}_1,\textbf{h}_2,\textbf{e}_3])$}};
\node [anchor=south,line width=0.8pt,draw,minimum width=9em,inner sep=3pt,fill=orange!20!white] (o1) at ([yshift=1.2em]f1.north) {\footnotesize{$\textbf{y}=\textrm{Softmax}(f_3 \textbf{U})$}};
}
\node [anchor=south] (ylabel) at ([yshift=1.2em]o1.north) {\footnotesize{$\textrm{P}(w_4|w_{0}w_{1}w_{2}w_{3})$}};
\draw [->,line width=0.6pt] ([yshift=0.1em]w0.north) -- ([yshift=-0.1em]e0.south);
\draw [->,line width=0.6pt] ([yshift=0.1em]w1.north) -- ([yshift=-0.1em]e1.south);
\draw [->,line width=0.6pt] ([yshift=0.1em]w2.north) -- ([yshift=-0.1em]e2.south);
\draw [->,line width=0.6pt] ([yshift=0.1em]w3.north) -- ([yshift=-0.1em]e3.south);
\draw [->,line width=0.6pt] ([yshift=0.1em]e0.north) -- ([xshift=0em,yshift=-0.1em]h0.south);
\draw [->,line width=0.6pt] ([yshift=0.1em]e1.north) -- ([xshift=-0.5em,yshift=-0.1em]h1.south);
\draw [->,line width=0.6pt] ([yshift=0.1em]e2.north) -- ([xshift=-1em,yshift=-0.1em]h2.south);
\draw [->,line width=0.6pt] ([yshift=0.1em]e3.north) -- ([xshift=1em,yshift=-0.1em]h0.south);
\draw [->,line width=0.6pt] ([yshift=0.1em]e3.north) -- ([xshift=1em,yshift=-0.1em]h1.south);
\draw [->,line width=0.6pt] ([yshift=0.1em]e3.north) -- ([xshift=1em,yshift=-0.1em]h2.south);
\draw [->,line width=0.6pt] ([yshift=0.1em]h0.north) -- ([xshift=-2em,yshift=-0.1em]f1.south);
\draw [->,line width=0.6pt] ([yshift=0.1em]e3.north) -- ([xshift=2em,yshift=-0.1em]f1.south);
\draw [->,line width=0.6pt] ([yshift=0.1em]h1.north) -- ([xshift=-1em,yshift=-0.1em]f1.south);
\draw [->,line width=0.6pt] ([yshift=0.1em]h2.north) -- ([xshift=0em,yshift=-0.1em]f1.south);
\draw [->,line width=0.6pt] ([yshift=0.1em]f1.north) -- ([yshift=-0.1em]o1.south);
\draw [->,line width=0.6pt] ([yshift=0.1em]o1.north) -- ([yshift=-0.1em]ylabel.south);
\end{scope}
\end{tikzpicture}
%%%------------------------------------------------------------------------------------------------------------
\end{tikzpicture}
\ No newline at end of file
......@@ -969,7 +969,7 @@ x_0\cdot w_0+x_1\cdot w_1+x_2\cdot w_2 & = & 0\cdot 1+0\cdot 1+1\cdot 1 \nonumbe
\parinterval 张量的严格定义是利用线性映射来描述的。与矢量相类似,定义由若干坐标系改变时满足一定坐标转化关系的有序数组成的集合为张量。从几何角度讲,它是一个真正的几何量,也就是说,它是不随参照系的坐标变换而变化的,是若干向量和协向量通过张量乘法定义的量。
\parinterval 不过,更广泛接受的定义是:张量是多重线性函数,是定义在一些向量空间和笛卡尔积上的多重线性映射。张量的多重线性表现在,对于每一个输入函数都是线性的。比如,张量$ \mathbf T(v_0,v_1,\dots,v_r) $,其输入是$r$个向量$ \{v_0,v_1,\dots,v_r\} $,对于张量$ \mathbf T $的任意一个$ v_i $,都有$ \mathbf T(v_0,\dots,v_i+c\cdot u,\dots,v_r)=\mathbf T(v_0,\dots,v_i,\dots,v_r)+c\cdot{\mathbf T(v_0,\dots,u,\dots,v_r)} $,其中,$ c $为任意实数。这个性质非常重要,根据这个性质可以推导出张量的其他定义。
\parinterval 不过,更广泛接受的定义是:张量是多重线性函数,是定义在一些向量空间和笛卡尔积上的多重线性映射。张量的多重线性表现在,对于每一个输入变量都是线性的。比如,张量$ \mathbf T(v_0,v_1,\dots,v_r) $,其输入是$r$个向量$ \{v_0,v_1,\dots,v_r\} $,对于张量$ \mathbf T $的任意一个$ v_i $,都有$ \mathbf T(v_0,\dots,v_i+c\cdot u,\dots,v_r)=\mathbf T(v_0,\dots,v_i,\dots,v_r)+c\cdot{\mathbf T(v_0,\dots,u,\dots,v_r)} $,其中,$ c $为任意实数。这个性质非常重要,根据这个性质可以推导出张量的其他定义。
\parinterval 从我们的物理世界看,如果一个物理量在物体的某个位置上只是一个单值,那么它是一个标量,例如密度;如果一个物理量在同一个位置、从多个方向上看,有不同的值,那么这个物理量就是一个张量。比如物理学中常用的应力的描述就是一个典型的张量。举一个简单的例子:$ \mathbf T(\mathbf v,\mathbf u) $是一个三维空间$(\textrm{x},\textrm{y},\textrm{z})$上的2阶张量,其中$ \mathbf v $$ \mathbf u $ 是两个向量,如图\ref{fig:5-26}所示,向量$ \mathbf v $在某个两两垂直的三维坐标系中可以表示为$ {(\begin{array}{ccc} a & b & c\end{array})}^{\rm T} $,同理向量$ \mathbf u $在某个两两垂直的三维坐标系中可以表示为$ {(\begin{array}{ccc} a' & b' & c' \end{array})}^{\rm T} $。但在三维空间$(\textrm{x},\textrm{y},\textrm{z})$中,向量$ \mathbf v $和向量$ \mathbf u $分别被表示为$ {(\begin{array}{ccc} v_x & v_y & v_z\end{array})}^{\rm T} $$ {(\begin{array}{ccc} u_x & u_y & u_z\end{array})}^{\rm T} $
......@@ -2442,7 +2442,7 @@ Jobs was the CEO of {\red{\underline{apple}}}.
\subsubsection{BERT模型}
\parinterval BERT(Bidirectional Encoder Representations from Transformers)是另一个非常有代表性的基于预训练的句子表示模型\cite{devlin2018bert}。某种意义上,BERT把基于预训练的句子表示模型推向了新的高潮。BERT的论文也获得了NAACL2019最佳论文奖。
\parinterval BERT(Bidirectional Encoder Representations from Transformers)是另一个非常有代表性的基于预训练的句子表示模型\cite{DBLP:conf/naacl/DevlinCLT19}。某种意义上,BERT把基于预训练的句子表示模型推向了新的高潮。BERT的论文也获得了NAACL2019最佳论文奖。
\parinterval 与传统语言模型的训练目标不同,BERT不使用预测下一个词作为训练目标,而是提出了两个新的任务。
......@@ -2499,7 +2499,7 @@ Jobs was the CEO of {\red{\underline{apple}}}.
\sectionnewpage
\section{小结及深入阅读}
\parinterval 神经网络为解决自然语言处理问题提供了全新的思路。而所谓深度学习也是建立在多层神经网络结构之上的一系列模型和方法。本章从神经网络的基本概念到其在语言建模中的应用进行了概述。由于篇幅所限,这里无法覆盖所有神经网络和深度学习的相关内容,感兴趣的读者可以进一步阅读《Neural Network Methods in Natural Language Processing》\cite{goldberg2017neural}和《Deep Learning》\cite{deeplearning}。此外,也有很多研究方向值得关注:
\parinterval 神经网络为解决自然语言处理问题提供了全新的思路。而所谓深度学习也是建立在多层神经网络结构之上的一系列模型和方法。本章从神经网络的基本概念到其在语言建模中的应用进行了概述。由于篇幅所限,这里无法覆盖所有神经网络和深度学习的相关内容,感兴趣的读者可以进一步阅读《Neural Network Methods in Natural Language Processing》\cite{goldberg2017neural}和《Deep Learning》\cite{Goodfellow-et-al-2016}。此外,也有很多研究方向值得关注:
\begin{itemize}
\vspace{0.5em}
......
Markdown 格式
0%
您添加了 0 到此讨论。请谨慎行事。
请先完成此评论的编辑!
注册 或者 后发表评论