Commit 7a19b5c0 by zengxin

11

parent 56e2b861
...@@ -51,8 +51,8 @@ ...@@ -51,8 +51,8 @@
%\node[minimum width = 1.8cm] (sub) at ([xshift=-5.5cm,yshift=2cm]num9_9.east) {}; %\node[minimum width = 1.8cm] (sub) at ([xshift=-5.5cm,yshift=2cm]num9_9.east) {};
\draw[decorate,decoration={brace,mirror,raise=0pt,amplitude=0.3cm},black,thick] ([yshift=0.4cm,xshift=-0.1cm]num1_1.west) -- node[att,xshift=-0.5cm]{$q$} ([yshift=-0.4cm,xshift=-0.1cm]num3_3.west); \draw[decorate,decoration={brace,mirror,raise=0pt,amplitude=0.3cm},black,thick] ([yshift=0.4cm,xshift=-0.1cm]num1_1.west) -- node[att,xshift=-0.5cm]{$k$} ([yshift=-0.4cm,xshift=-0.1cm]num3_3.west);
\draw[decorate,decoration={brace,raise=0pt,amplitude=0.3cm},black,thick] ([xshift=-0.4cm,yshift=0.1cm]num1.north) -- node[att,yshift=0.5cm]{$k$}([xshift=0.4cm,yshift=0.1cm]num7.north); \draw[decorate,decoration={brace,raise=0pt,amplitude=0.3cm},black,thick] ([xshift=-0.4cm,yshift=0.1cm]num1.north) -- node[att,yshift=0.5cm]{$q$}([xshift=0.4cm,yshift=0.1cm]num7.north);
\draw[decorate,decoration={brace,mirror,raise=0pt,amplitude=0.3cm},black,thick] ([xshift=0.5cm,yshift=0.00cm]num9_9.south) -- node[att,xshift=0.5cm,yshift=-0.3cm]{$o$}([xshift=0.5cm,yshift=0.00cm]num9.south); \draw[decorate,decoration={brace,mirror,raise=0pt,amplitude=0.3cm},black,thick] ([xshift=0.5cm,yshift=0.00cm]num9_9.south) -- node[att,xshift=0.5cm,yshift=-0.3cm]{$o$}([xshift=0.5cm,yshift=0.00cm]num9.south);
......
...@@ -2,7 +2,7 @@ ...@@ -2,7 +2,7 @@
\begin{tikzpicture}[node distance = 0cm] \begin{tikzpicture}[node distance = 0cm]
\node(num1_0)[num, fill = blue!40]{\textbf{\textcolor{white}0}}; \node(num1_0)[num, fill = blue!40]{\textcolor{white}{$\mathbi{0}$}};
\node(num1_1)[num,right of = num1_0,xshift = 1.2cm]{$\mathbi{e}_1$}; \node(num1_1)[num,right of = num1_0,xshift = 1.2cm]{$\mathbi{e}_1$};
\node(num1_2)[num,right of = num1_1,xshift = 1.2cm]{\textcolor{blue!70}{$\mathbi{e}_2$}}; \node(num1_2)[num,right of = num1_1,xshift = 1.2cm]{\textcolor{blue!70}{$\mathbi{e}_2$}};
\node(num1_3)[num,right of = num1_2,xshift = 1.2cm]{\textcolor{blue!70}{$\mathbi{e}_3$}}; \node(num1_3)[num,right of = num1_2,xshift = 1.2cm]{\textcolor{blue!70}{$\mathbi{e}_3$}};
...@@ -12,11 +12,11 @@ ...@@ -12,11 +12,11 @@
\node(num1_7)[num,right of = num1_6,xshift = 1.2cm]{\textcolor{blue!70}{$\mathbi{e}_7$}}; \node(num1_7)[num,right of = num1_6,xshift = 1.2cm]{\textcolor{blue!70}{$\mathbi{e}_7$}};
\node(num1_8)[num,right of = num1_7,xshift = 1.2cm]{\textcolor{blue!70}{$\mathbi{e}_8$}}; \node(num1_8)[num,right of = num1_7,xshift = 1.2cm]{\textcolor{blue!70}{$\mathbi{e}_8$}};
\node(num1_9)[num,right of = num1_8,xshift = 1.2cm]{$\mathbi{e}_9$}; \node(num1_9)[num,right of = num1_8,xshift = 1.2cm]{$\mathbi{e}_9$};
\node(num1_10)[num,right of = num1_9,xshift = 1.2cm, fill = blue!40]{\textbf0}; \node(num1_10)[num,right of = num1_9,xshift = 1.2cm, fill = blue!40]{$\mathbi{0}$};
\node(A)[below of = num2,yshift = -0.6cm]{A}; \node(A)[below of = num2,yshift = -0.6cm]{A};
\node(B)[below of = num8,yshift = -0.6cm]{B}; \node(B)[below of = num8,yshift = -0.6cm]{B};
\node(num2_0)[num,above of = num1_0,yshift = 1.2cm, fill = blue!40]{\textbf{\textcolor{white}0}}; \node(num2_0)[num,above of = num1_0,yshift = 1.2cm, fill = blue!40]{\textcolor{white}{$\mathbi{0}$}};
\node(num2_1)[num,right of = num2_0,xshift = 1.2cm]{\textbf2}; \node(num2_1)[num,right of = num2_0,xshift = 1.2cm]{\textbf2};
\node(num2_2)[num,right of = num2_1,xshift = 1.2cm]{\textbf2}; \node(num2_2)[num,right of = num2_1,xshift = 1.2cm]{\textbf2};
\node(num2_3)[num,right of = num2_2,xshift = 1.2cm]{\textbf{\textcolor{blue!70}2}}; \node(num2_3)[num,right of = num2_2,xshift = 1.2cm]{\textbf{\textcolor{blue!70}2}};
...@@ -26,9 +26,9 @@ ...@@ -26,9 +26,9 @@
\node(num2_7)[num,right of = num2_6,xshift = 1.2cm]{\textbf{\textcolor{blue!70}2}}; \node(num2_7)[num,right of = num2_6,xshift = 1.2cm]{\textbf{\textcolor{blue!70}2}};
\node(num2_8)[num,right of = num2_7,xshift = 1.2cm]{\textbf2}; \node(num2_8)[num,right of = num2_7,xshift = 1.2cm]{\textbf2};
\node(num2_9)[num,right of = num2_8,xshift = 1.2cm]{\textbf2}; \node(num2_9)[num,right of = num2_8,xshift = 1.2cm]{\textbf2};
\node(num2_10)[num,right of = num2_9,xshift = 1.2cm, fill = blue!40]{\textbf0}; \node(num2_10)[num,right of = num2_9,xshift = 1.2cm, fill = blue!40]{$\mathbi{0}$};
\node(num3_0)[num,above of = num2_0,yshift = 1.2cm, fill = blue!40]{\textbf{\textcolor{white}0}}; \node(num3_0)[num,above of = num2_0,yshift = 1.2cm, fill = blue!40]{\textcolor{white}{$\mathbi{0}$}};
\node(num3_1)[num,right of = num3_0,xshift = 1.2cm]{\textbf3}; \node(num3_1)[num,right of = num3_0,xshift = 1.2cm]{\textbf3};
\node(num3_2)[num,right of = num3_1,xshift = 1.2cm]{\textbf3}; \node(num3_2)[num,right of = num3_1,xshift = 1.2cm]{\textbf3};
\node(num3_3)[num,right of = num3_2,xshift = 1.2cm]{\textbf3}; \node(num3_3)[num,right of = num3_2,xshift = 1.2cm]{\textbf3};
...@@ -38,9 +38,9 @@ ...@@ -38,9 +38,9 @@
\node(num3_7)[num,right of = num3_6,xshift = 1.2cm]{\textbf3}; \node(num3_7)[num,right of = num3_6,xshift = 1.2cm]{\textbf3};
\node(num3_8)[num,right of = num3_7,xshift = 1.2cm]{\textbf3}; \node(num3_8)[num,right of = num3_7,xshift = 1.2cm]{\textbf3};
\node(num3_9)[num,right of = num3_8,xshift = 1.2cm]{\textbf3}; \node(num3_9)[num,right of = num3_8,xshift = 1.2cm]{\textbf3};
\node(num3_10)[num,right of = num3_9,xshift = 1.2cm, fill = blue!40]{\textbf0}; \node(num3_10)[num,right of = num3_9,xshift = 1.2cm, fill = blue!40]{$\mathbi{0}$};
\node(num4_0)[num,above of = num3_0,yshift = 1.2cm, fill = blue!40]{\textbf{\textcolor{white}0}}; \node(num4_0)[num,above of = num3_0,yshift = 1.2cm, fill = blue!40]{\textcolor{white}{$\mathbi{0}$}};
\node(num4_1)[num,right of = num4_0,xshift = 1.2cm]{\textbf4}; \node(num4_1)[num,right of = num4_0,xshift = 1.2cm]{\textbf4};
\node(num4_2)[num,right of = num4_1,xshift = 1.2cm]{\textbf4}; \node(num4_2)[num,right of = num4_1,xshift = 1.2cm]{\textbf4};
\node(num4_3)[num,right of = num4_2,xshift = 1.2cm]{\textbf4}; \node(num4_3)[num,right of = num4_2,xshift = 1.2cm]{\textbf4};
...@@ -50,7 +50,7 @@ ...@@ -50,7 +50,7 @@
\node(num4_7)[num,right of = num4_6,xshift = 1.2cm]{\textbf4}; \node(num4_7)[num,right of = num4_6,xshift = 1.2cm]{\textbf4};
\node(num4_8)[num,right of = num4_7,xshift = 1.2cm]{\textbf4}; \node(num4_8)[num,right of = num4_7,xshift = 1.2cm]{\textbf4};
\node(num4_9)[num,right of = num4_8,xshift = 1.2cm]{\textbf4}; \node(num4_9)[num,right of = num4_8,xshift = 1.2cm]{\textbf4};
\node(num4_10)[num,right of = num4_9,xshift = 1.2cm, fill = blue!40]{\textbf0}; \node(num4_10)[num,right of = num4_9,xshift = 1.2cm, fill = blue!40]{$\mathbi{0}$};
\draw [->, thick](num1_0.north)--([xshift=-0.1em,yshift=-0.1em]num2_1.south); \draw [->, thick](num1_0.north)--([xshift=-0.1em,yshift=-0.1em]num2_1.south);
\draw [->, thick](num2_0.north)--([xshift=-0.1em,yshift=-0.1em]num3_1.south); \draw [->, thick](num2_0.north)--([xshift=-0.1em,yshift=-0.1em]num3_1.south);
......
...@@ -176,7 +176,7 @@ ...@@ -176,7 +176,7 @@
%\input{./Chapter11/Figures/figure-f } %\input{./Chapter11/Figures/figure-f }
\subfigure[循环神经网络的串行结构($O(n)$)]{\input{./Chapter11/Figures/figure-structural-comparison-a}} \subfigure[循环神经网络的串行结构($O(n)$)]{\input{./Chapter11/Figures/figure-structural-comparison-a}}
\subfigure[卷积神经网络的层级结构($O(n/k)$)]{\input{./Chapter11/Figures/figure-structural-comparison-b}} \subfigure[卷积神经网络的层级结构($O(n/k)$)]{\input{./Chapter11/Figures/figure-structural-comparison-b}}
\caption{串行及层级结构对比($\mathbi{e}_i$表示词嵌入,0表示0向量,2,3,4表示第几层)} \caption{串行及层级结构对比($\mathbi{e}_i$表示词嵌入,$\mathbi{0}$表示$\mathbi{0}$向量,2,3,4表示第几层)}
\label{fig:11-9} \label{fig:11-9}
\end{figure} \end{figure}
%---------------------------------------------- %----------------------------------------------
...@@ -440,7 +440,7 @@ ...@@ -440,7 +440,7 @@
\section{局部模型的改进} \section{局部模型的改进}
\parinterval 在序列建模中,卷积神经网络可以通过参数共享,高效地捕捉局部上下文特征,如图\ref{fig:11-11}所示。但是通过进一步分析可以发现,在标准卷积操作中包括了不同词和不同通道之间两种信息的交互,每个卷积核都是对相邻词的不同通道进行卷积,参数量为$K \times O$。其中$K$为卷积核大小,$O$为输入的通道数,即单词表示的维度大小。因此$N$个卷积核总共的参数量为$K \times O \times N$。这里涉及卷积核大小、输入通道数和输出通道数三个维度,因此计算复杂度较高。为了进一步提升计算效率,降低参数量,一些研究人员提出{\small\bfnew{深度可分离卷积}}\index{深度可分离卷积}(Depthwise Separable Convolution)\index{Depthwise Separable Convolution},将空间维度和通道间的信息交互分离成深度卷积(也叫逐通道卷积,Depthwise Convolution)\index{逐通道卷积,Depthwise Convolution}{\small\bfnew{逐点卷积}} \index{逐点卷积}(Pointwise Convolution)\index{Pointwise Convolution} 两部分\upcite{Chollet2017XceptionDL,Howard2017MobileNetsEC}。除了直接将深度可分离卷积应用到神经机器翻译中\upcite{Kaiser2018DepthwiseSC},研究人员提出使用更高效的{\small\bfnew{轻量卷积}}\index{轻量卷积}(Lightweight Convolution)\index{Lightweight Convolution}{\small\bfnew{动态卷积}}\index{动态卷积}(Dynamic convolution)\index{Dynamic convolution}来进行不同词之间的特征提取\upcite{Wu2019PayLA}。本节将主要介绍这些改进的卷积操作。在后续章节中也会看到这些模型在神经机器翻译中的应用。 \parinterval 在序列建模中,卷积神经网络可以通过参数共享,高效地捕捉局部上下文特征,如图\ref{fig:11-11}所示。但是通过进一步分析可以发现,在标准卷积操作中包括了不同词和不同通道之间两种信息的交互,每个卷积核都是对相邻词的不同通道进行卷积,参数量为$K \times O$。其中$K$为卷积核大小,$O$为输入的通道数,即单词表示的维度大小。因此$N$个卷积核总共的参数量为$K \times O \times N$。这里涉及卷积核大小、输入通道数和输出通道数三个维度,因此计算复杂度较高。为了进一步提升计算效率,降低参数量,一些研究人员提出{\small\bfnew{深度可分离卷积}}\index{深度可分离卷积}(Depthwise Separable Convolution)\index{Depthwise Separable Convolution},将空间维度和通道间的信息交互分离成深度卷积(也叫逐通道卷积,Depthwise Convolution)\index{逐通道卷积,Depthwise Convolution}{\small\bfnew{逐点卷积}} \index{逐点卷积}(Pointwise Convolution)\index{Pointwise Convolution} 两部分\upcite{Chollet2017XceptionDL,Howard2017MobileNetsEC}。除了直接将深度可分离卷积应用到神经机器翻译中\upcite{Kaiser2018DepthwiseSC},研究人员提出使用更高效的{\small\bfnew{轻量卷积}}\index{轻量卷积}(Lightweight Convolution)\index{Lightweight Convolution}{\small\bfnew{动态卷积}}\index{动态卷积}(Dynamic Convolution)\index{Dynamic convolution}来进行不同词之间的特征提取\upcite{Wu2019PayLA}。本节将主要介绍这些改进的卷积操作。在后续章节中也会看到这些模型在神经机器翻译中的应用。
%---------------------------------------------------------------------------------------- %----------------------------------------------------------------------------------------
% NEW SUB-SECTION % NEW SUB-SECTION
......
Markdown 格式
0%
您添加了 0 到此讨论。请谨慎行事。
请先完成此评论的编辑!
注册 或者 后发表评论