Commit 6273cf64 by 孟霞

合并分支 'master' 到 'mengxia'

Master

查看合并请求 !925
parents 9b395a80 dc9540cc
......@@ -13,6 +13,7 @@
\coordinate (topright) at ([shift={(0.25cm,0.25cm)}]prob11.center);
\node [draw,fit=(prob11) (prob21) (prob31) (prob41) (prob51) (topright) (bottomleft)] (prob1) {};
\end{pgfonlayer}
% \node [anchor=center] (word11) at ([yshift=0.7cm]prob11.center) {I};
% Column 2
\node [prob,minimum size=0.1cm,anchor=center] (prob12) at ([xshift=1cm]prob11.center) {};
......@@ -25,6 +26,7 @@
\coordinate (topright) at ([shift={(0.25cm,0.25cm)}]prob12.center);
\node [draw,fit=(prob12) (prob22) (prob32) (prob42) (prob52) (topright) (bottomleft)] (prob2) {};
\end{pgfonlayer}
% \node [anchor=center] (word12) at ([yshift=0.68cm]prob12.center) {am};
% Column 3
\node [prob,minimum size=0.1cm,anchor=center] (prob13) at ([xshift=1cm]prob12.center) {};
......@@ -37,6 +39,7 @@
\coordinate (topright) at ([shift={(0.25cm,0.25cm)}]prob13.center);
\node [draw,fit=(prob13) (prob23) (prob33) (prob43) (prob53) (topright) (bottomleft)] (prob3) {};
\end{pgfonlayer}
% \node [anchor=center] (word13) at ([yshift=0.7cm]prob13.center) {fine};
% Column 4
\node [prob,minimum size=0.5cm,anchor=center] (prob14) at ([xshift=1cm]prob13.center) {$.8$};
......@@ -49,9 +52,10 @@
\coordinate (topright) at ([shift={(0.25cm,0.25cm)}]prob14.center);
\node [draw,fit=(prob14) (prob24) (prob34) (prob44) (prob54) (topright) (bottomleft)] (prob4) {};
\end{pgfonlayer}
% \node [anchor=center] (word14) at ([yshift=0.68cm]prob14.center) {$\langle$eos$\rangle$};
% Label
\draw [decorate,decoration={brace}] ([yshift=0.1cm]prob1.north west) to node [midway,above,font=\small] {学习目标(Teacher输出)} ([yshift=0.1cm]prob4.north east);
\draw [decorate,decoration={brace}] ([xshift=0.1cm]prob4.north east) to node [midway,right,font=\small,align=center] {教师\\模型\\输出\\分布} ([xshift=0.1cm]prob4.south east);
% Vocab
\node [word,anchor=center] () at ([xshift=-0.9cm]prob11.center) {$\langle$eos$\rangle$};
......@@ -84,7 +88,7 @@
\node [word,anchor=north] () at ([xshift=2.1cm,yshift=-0.5cm]ns.south) {(a)\ Word-level};
\end{scope}
\begin{scope}[xshift=2.5in]
\begin{scope}[xshift=2.7in]
% Column 1
\node [prob,minimum size=0.1cm] (prob11) at (0,0) {};
\node [prob,minimum size=0.5cm,anchor=center] (prob21) at ([yshift=-0.5cm]prob11.center) {$1.$};
......@@ -96,6 +100,7 @@
\coordinate (topright) at ([shift={(0.25cm,0.25cm)}]prob11.center);
\node [draw,fit=(prob11) (prob21) (prob31) (prob41) (prob51) (topright) (bottomleft)] (prob1) {};
\end{pgfonlayer}
\node [anchor=center] (word11) at ([yshift=0.7cm]prob11.center) {I};
% Column 2
\node [prob,minimum size=0.1cm,anchor=center] (prob12) at ([xshift=1cm]prob11.center) {};
......@@ -108,6 +113,7 @@
\coordinate (topright) at ([shift={(0.25cm,0.25cm)}]prob12.center);
\node [draw,fit=(prob12) (prob22) (prob32) (prob42) (prob52) (topright) (bottomleft)] (prob2) {};
\end{pgfonlayer}
\node [anchor=center] (word12) at ([yshift=0.68cm]prob12.center) {am};
% Column 3
\node [prob,minimum size=0.1cm,anchor=center] (prob13) at ([xshift=1cm]prob12.center) {};
......@@ -120,6 +126,7 @@
\coordinate (topright) at ([shift={(0.25cm,0.25cm)}]prob13.center);
\node [draw,fit=(prob13) (prob23) (prob33) (prob43) (prob53) (topright) (bottomleft)] (prob3) {};
\end{pgfonlayer}
\node [anchor=center] (word13) at ([yshift=0.68cm]prob13.center) {good};
% Column 4
\node [prob,minimum size=0.5cm,anchor=center] (prob14) at ([xshift=1cm]prob13.center) {$1.$};
......@@ -132,9 +139,10 @@
\coordinate (topright) at ([shift={(0.25cm,0.25cm)}]prob14.center);
\node [draw,fit=(prob14) (prob24) (prob34) (prob44) (prob54) (topright) (bottomleft)] (prob4) {};
\end{pgfonlayer}
\node [anchor=center] (word14) at ([yshift=0.68cm]prob14.center) {$\langle$eos$\rangle$};
% Label
\draw [decorate,decoration={brace}] ([yshift=0.1cm]prob1.north west) to node [midway,above,font=\small] {学习目标(Teacher输出)} ([yshift=0.1cm]prob4.north east);
\draw [decorate,decoration={brace}] ([xshift=0.1cm]prob4.north east) to node [midway,right,font=\small,align=center] {教师\\模型\\输出\\译文} ([xshift=0.1cm]prob4.south east);
% Vocab
\node [word,anchor=center] () at ([xshift=-0.9cm]prob11.center) {$\langle$eos$\rangle$};
......
......@@ -7,7 +7,7 @@
\end{tabular}
};
\node[font=\footnotesize,anchor=north] (l1) at ([xshift=0em,yshift=-1em]top.south) {(a) 符号合并表};
\node[font=\scriptsize,anchor=west] (n1) at ([xshift=-6em,yshift=-6em]top.west) {l\ o\ w\ e\ r\ $<$e$>$};
\node[font=\scriptsize,anchor=west] (n1) at ([xshift=-4.5em,yshift=-6em]top.west) {l\ o\ w\ e\ r\ $<$e$>$};
\node[font=\scriptsize,anchor=west] (n2) at ([xshift=2.6em]n1.east) {l\ o\ w\ e\ {\red r$<$e$>$}};
\node[font=\scriptsize,anchor=west] (n3) at ([xshift=2.6em]n2.east) {{\red lo}\ w\ e\ r$<$e$>$};
\node[font=\scriptsize,anchor=west] (n4) at ([xshift=2.6em]n3.east) {{\red low}\ e\ r$<$e$>$};
......@@ -20,7 +20,7 @@
\node[font=\scriptsize,anchor=west] (t5) at ([xshift=0.8em]t4.east) {{\red low}\ est\ $<$e$>$};
\node[font=\scriptsize,anchor=west] (t6) at ([xshift=0.8em]t5.east) {low\ {\red est$<$e$>$}};
\node[font=\footnotesize,anchor=north] (l2) at ([xshift=1.5em,yshift=-1em]t3.south) {(b) 合并样例};
\node[font=\footnotesize,anchor=north] (l2) at ([xshift=2em,yshift=-1em]t3.south) {(b) 合并样例};
\draw[->,thick](n1.east) -- (n2.west);
\draw[->,thick](n2.east) -- (n3.west);
......
%%%------------------------------------------------------------------------------------------------------------
%%% 调序模型1:基于距离的调序
\begin{center}
\begin{tikzpicture}
\tikzstyle{tnode}=[rectangle,inner sep=0mm,minimum height=1.8em,minimum width=4em,rounded corners=5pt,thick,draw,fill=teal!20]
\tikzstyle{stnode}=[rectangle,inner sep=0mm,minimum height=1.8em,minimum width=4em,rounded corners=5pt,thick,fill=red!20,draw]
\tikzstyle{rnnnode}=[rectangle,inner sep=0mm,minimum height=2em,minimum width=6.5em,rounded corners=5pt,thick,fill=green!20,draw]
\tikzstyle{wnode}=[inner sep=0mm,minimum height=2em,minimum width=5em]
\begin{scope}[]
\node [anchor=west,rnnnode] (r1) at (0, 0) {循环单元$\mathbi{h}_1$};
\node [anchor=south,rnnnode] (r2) at ([xshift=0em,yshift=1em]r1.north){循环单元$\mathbi{h}_2$};
\node [anchor=south] (r3) at ([xshift=0em,yshift=1em]r2.north){$\cdots$};
\node [anchor=south,rnnnode] (r4) at ([xshift=0em,yshift=1em]r3.north){循环单元$\mathbi{h}_n$};
\node [anchor=east,wnode,font=\footnotesize] (wr1) at ([xshift=-1em,yshift=0em]r1.west) {1时刻输入};
\node [anchor=east,wnode,font=\footnotesize] (wr2) at ([xshift=-1em,yshift=0em]r2.west) {2时刻输入};
\node [anchor=east,wnode,font=\footnotesize] (wr3) at ([xshift=-1em,yshift=0em]r4.west) {$n$时刻输入};
\node [anchor=north,wnode] (input) at ([xshift=0em,yshift=-1em]r1.south) {$\mathbi{h}_0$};
\node [anchor=south,wnode] (output) at ([xshift=0em,yshift=1em]r4.north) {输出};
\draw[->,very thick] ([xshift=-0.3em,yshift=-1em]wr1.west)--([xshift=-0.3em,yshift=1em]wr3.west);
\draw[->,thick] ([xshift=0em,yshift=0em]input.north)--([xshift=0em,yshift=0em]r1.south);
\draw[->,thick] ([xshift=0em,yshift=0em]r1.north)--([xshift=0em,yshift=0em]r2.south);
\draw[->,thick] ([xshift=0em,yshift=0em]r2.north)--([xshift=0em,yshift=0em]r3.south);
\draw[->,thick] ([xshift=0em,yshift=0em]r3.north)--([xshift=0em,yshift=0em]r4.south);
\draw[->,thick] ([xshift=0em,yshift=0em]r4.north)--([xshift=0em,yshift=0em]output.south);
\draw[->,thick] ([xshift=0em,yshift=0em]wr1.east)--([xshift=0em,yshift=0em]r1.west);
\draw[->,thick] ([xshift=0em,yshift=0em]wr2.east)--([xshift=0em,yshift=0em]r2.west);
\draw[->,thick] ([xshift=0em,yshift=0em]wr3.east)--([xshift=0em,yshift=0em]r4.west);
\node [anchor=north,font=\small] (label) at ([xshift=-4em,yshift=-0.5em]input.south) {(a)RNN};
\end{scope}
\begin{scope}[xshift=1.9in]
\node [anchor=west,stnode] (r1) at (0, 0) {第1层};
\node [anchor=south,tnode] (r2) at ([xshift=0em,yshift=1em]r1.north){第2层};
\node [anchor=south] (r3) at ([xshift=0em,yshift=1em]r2.north){$\cdots$};
\node [anchor=south,tnode] (r4) at ([xshift=0em,yshift=1em]r3.north){$n$};
\node [anchor=east,wnode,font=\footnotesize,minimum width=3em] (wr1) at ([xshift=-0.3em,yshift=0em]r1.west) {1时刻};
\node [anchor=east,wnode,font=\footnotesize,minimum width=3em] (wr2) at ([xshift=-0.3em,yshift=0em]r2.west) {2时刻};
\node [anchor=east,wnode,font=\footnotesize,minimum width=3em] (wr3) at ([xshift=-0.3em,yshift=0em]r4.west) {$n$时刻};
\node [anchor=north,wnode] (input) at ([xshift=0em,yshift=-1em]r1.south) {$\mathbi{h}_0$};
\node [anchor=south,wnode] (output) at ([xshift=0em,yshift=1em]r4.north) {输出};
\draw[->,very thick] ([xshift=-0.3em,yshift=-1em]wr1.west)--([xshift=-0.3em,yshift=1em]wr3.west);
\draw[->,thick] ([xshift=0em,yshift=0em]input.north)--([xshift=0em,yshift=0em]r1.south);
\draw[->,thick] ([xshift=0em,yshift=0em]r1.north)--([xshift=0em,yshift=0em]r2.south);
\draw[->,thick] ([xshift=0em,yshift=0em]r2.north)--([xshift=0em,yshift=0em]r3.south);
\draw[->,thick] ([xshift=0em,yshift=0em]r3.north)--([xshift=0em,yshift=0em]r4.south);
\draw[->,thick] ([xshift=0em,yshift=0em]r4.north)--([xshift=0em,yshift=0em]output.south);
\node [anchor=north,font=\small] (label) at ([xshift=-2em,yshift=-0.5em]input.south) {(b)原始Transformer模型};
\end{scope}
\begin{scope}[xshift=3.7in]
\node [anchor=west,stnode] (r1) at (0, 0) {第1层};
\node [anchor=south,stnode] (r2) at ([xshift=0em,yshift=1em]r1.north){第2层};
\node [anchor=south] (r3) at ([xshift=0em,yshift=1em]r2.north){$\cdots$};
\node [anchor=south,stnode] (r4) at ([xshift=0em,yshift=1em]r3.north){$n$};
\node [anchor=east,wnode,font=\footnotesize,align=left,minimum width=4em] (wr1) at ([xshift=-1em,yshift=0em]r1.west) {1时刻编\\ 码向量};
\node [anchor=east,wnode,font=\footnotesize,align=left,minimum width=4em] (wr2) at ([xshift=-1em,yshift=0em]r2.west) {2时刻编\\ 码向量};
\node [anchor=east,wnode,font=\footnotesize,align=left,minimum width=4em] (wr3) at ([xshift=-1em,yshift=0em]r4.west) {$n$时刻编\\ 码向量};
\node [anchor=north,wnode] (input) at ([xshift=0em,yshift=-1em]r1.south) {$\mathbi{h}_0$};
\node [anchor=south,wnode] (output) at ([xshift=0em,yshift=1em]r4.north) {输出};
\draw[->,very thick] ([xshift=-0.3em,yshift=-1em]wr1.west)--([xshift=-0.3em,yshift=1em]wr3.west);
\draw[->,thick] ([xshift=0em,yshift=0em]input.north)--([xshift=0em,yshift=0em]r1.south);
\draw[->,thick] ([xshift=0em,yshift=0em]r1.north)--([xshift=0em,yshift=0em]r2.south);
\draw[->,thick] ([xshift=0em,yshift=0em]r2.north)--([xshift=0em,yshift=0em]r3.south);
\draw[->,thick] ([xshift=0em,yshift=0em]r3.north)--([xshift=0em,yshift=0em]r4.south);
\draw[->,thick] ([xshift=0em,yshift=0em]r4.north)--([xshift=0em,yshift=0em]output.south);
\draw[->,thick] ([xshift=0em,yshift=0em]wr1.east)--([xshift=0em,yshift=0em]r1.west);
\draw[->,thick] ([xshift=0em,yshift=0em]wr2.east)--([xshift=0em,yshift=0em]r2.west);
\draw[->,thick] ([xshift=0em,yshift=0em]wr3.east)--([xshift=0em,yshift=0em]r4.west);
\node [anchor=north,font=\small,align=left] (label) at ([xshift=-2em,yshift=0.2em]input.south) {(c)共享权重的\\ Transformer模型};
\end{scope}
\end{tikzpicture}
\end{center}
\ No newline at end of file
......@@ -51,7 +51,7 @@
%----------------------------------------------
\begin{figure}[htp]
\centering
\includegraphics[scale=0.5]{./Chapter15/Figures/figure-relative-position-coding-and-absolute-position-coding.jpg}
\input{./Chapter15/Figures/figure-relative-position-coding-and-absolute-position-coding}
\caption{绝对位置编码(左)和相对位置编码(右)}
\label{fig:15-1}
\end{figure}
......@@ -322,7 +322,7 @@ C(\mathbi{x}_j \mathbi{W}_K,\omega) &=& (\mathbi{x}_{j-\omega},\ldots,\mathbi{x}
%----------------------------------------------
\begin{figure}[htp]
\centering
\includegraphics[scale=0.5]{./Chapter15/Figures/figure-introducing-rnn-mechanism-into-transformer.jpg}
\input{./Chapter15/Figures/figure-introducing-rnn-mechanism-into-transformer}
\caption{在Transformer中引入循环机制}
\label{fig:15-8}
\end{figure}
......@@ -867,17 +867,17 @@ lr &=& d_{\textrm{model}}^{-0.5}\cdot step\_num^{-0.5}
\sectionnewpage
\section{基于句法的神经机器翻译模型}
\parinterval 在统计机器翻译时代,使用句法信息是一种非常有效的机器翻译建模手段(见{\chaptereight})。由于句法是人类运用语言的高级抽象结果,使用句法信息(如句法树)可以帮助机器翻译系统对句子结构进行建模例如,利用句法树提升译文语法结构的正确性。在神经机器翻译中,大多数框架均基于词串进行建模,因此在模型中引入句法树等结构也很有潜力\upcite{DBLP:conf/acl/LiXTZZZ17}。 具体来说,由于传统神经机器翻译模型缺少对句子结构的理解,会导致一些翻译问题:
\parinterval 在统计机器翻译时代,使用句法信息是一种非常有效的机器翻译建模手段(见{\chaptereight})。由于句法是人类运用语言的高级抽象结果,使用句法信息(如句法树)可以帮助机器翻译系统对句子结构进行建模例如,利用句法树提升译文语法结构的正确性。在神经机器翻译中,大多数框架均基于词串进行建模,因此在模型中引入句法树等结构也很有潜力\upcite{DBLP:conf/acl/LiXTZZZ17}。 具体来说,由于传统神经机器翻译模型缺少对句子结构的理解,会导致一些翻译问题:
\begin{itemize}
\vspace{0.5em}
\item 过度翻译问题,如:
\item {\small\bfnew{过度翻译问题}},如:
\begin{equation}
\textrm{“ 两/个/女孩 ”}\ \to \ \textrm{“ two girls and two girls ”} \nonumber
\end{equation}
\vspace{0.5em}
\item 翻译不连贯问题,如:
\item {\small\bfnew{翻译不连贯问题}},如:
\begin{equation}
\textrm{“ 新生/银行/申请/上市 ”}\ \to \ \textrm{“ new listing bank ”} \nonumber
\end{equation}
......@@ -885,15 +885,15 @@ lr &=& d_{\textrm{model}}^{-0.5}\cdot step\_num^{-0.5}
\vspace{0.5em}
\end{itemize}
\parinterval 显然,神经机器翻译系统并没有按照合理的句法结构生成译文。也就是说,模型并没有理解句子的结构\upcite{DBLP:conf/acl/LiXTZZZ17}。甚至对于一些语言差异很大的语言对,会出现将介词短语翻译成一个词的情况。虽然可以通过很多手段对上述问题进行求解,但是使用句法树是最直接的解决问题的方法\upcite{DBLP:conf/acl/EriguchiHT16}
\parinterval 显然,神经机器翻译系统并没有按照合理的句法结构生成译文。也就是说,模型并没有理解句子的结构\upcite{DBLP:conf/acl/LiXTZZZ17}。甚至对于一些语言差异很大的语言对,会出现将介词短语翻译成一个词的情况。虽然可以通过很多手段对上述问题进行求解,但是使用句法树是解决该问题的一种最直接的方法\upcite{DBLP:conf/acl/EriguchiHT16}
\parinterval 那么在神经机器翻译中,如何将这种离散化的树结构融入到基于分布式表示的翻译模型中呢?有以下两种策略:
\begin{itemize}
\vspace{0.5em}
\item 将句法信息加入到编码器,使得编码器更加充分地表示源语言句子。
\item {\small\bfnew{将句法信息加入到编码器}},使得编码器更加充分地表示源语言句子。
\vspace{0.5em}
\item 将句法信息加入到解码器,使得翻译模型能生成更符合句法的译文。
\item {\small\bfnew{将句法信息加入到解码器}},使得翻译模型能生成更符合句法的译文。
\vspace{0.5em}
\end{itemize}
......@@ -903,7 +903,7 @@ lr &=& d_{\textrm{model}}^{-0.5}\cdot step\_num^{-0.5}
\subsection{编码器使用句法信息}
\parinterval 编码器中使用句法信息有两种思路,一种思路是在编码器中显性使用树结构进行建模,另一种思路是把句法信息作为特征输入到传统的序列编码器中。这两种思路与统计机器翻译中基于句法树结构的模型和基于句法特征的模型十分相似(见{\chaptereight})。
\parinterval 编码器中使用句法信息有两种思路,一种思路是在编码器中显性使用树结构进行建模,另一种思路是把句法信息作为特征输入到传统的序列编码器中。这两种思路与统计机器翻译中基于句法树结构的模型和基于句法特征的模型十分相似(见{\chaptereight})。
%----------------------------------------------------------------------------------------
% NEW SUBSUB-SECTION
......@@ -911,7 +911,7 @@ lr &=& d_{\textrm{model}}^{-0.5}\cdot step\_num^{-0.5}
\subsubsection{1. 基于句法树结构的编码}
\parinterval 一种简单的方法是将源语言句子编码成一个二叉树结构\footnote[6]{所有句法树都可以通过二叉化方法转化为二叉树(见{\chaptereight})。},树节点的信息是由左子树和右子树变换而来,如下所示:
\parinterval 使用句法信息的一种简单的方法是将源语言句子编码成一个二叉树结构\footnote[6]{所有句法树都可以通过二叉化方法转化为二叉树(见{\chaptereight})。},树节点的信息是由左子树和右子树变换而来,如下所示:
\begin{eqnarray}
\mathbi{h}_p &=& f_\textrm{tree}(\mathbi{h}_l,\mathbi{h}_r)
\label{eq:15-50}
......
\tikzstyle{coder} = [rectangle,rounded corners,minimum height=2.2em,minimum width=4.3em,text centered,draw=black,fill=red!25]
\tikzstyle{coder} = [rectangle,thick,rounded corners,minimum height=2.2em,minimum width=4.3em,text centered,draw=black,fill=red!20]
\begin{tikzpicture}[node distance = 0,scale = 0.75]
\tikzstyle{every node}=[scale=0.75]
\node(x)[]{$x$};
\node(encoder)[coder, above of = x,yshift=4em]{{编码器}};
\node(decoder_left)[coder, above of = encoder, yshift=6em,fill=blue!25]{{解码器}};
\node(encoder)[coder, above of = x,yshift=4em]{\large{编码器}};
\node(decoder_left)[coder, above of = encoder, yshift=6em,fill=blue!20]{\large{解码器}};
\node(y_hat)[above of = decoder_left, yshift=4em]{{$y$}};
\node(y)[above of = decoder_left, xshift=-6em]{{$y_{<}$}};
\node(decoder_right)[coder, above of = encoder, xshift=11em,fill=yellow!25]{{解码器}};
\node(decoder_right)[coder, above of = encoder, xshift=11em,fill=yellow!20]{\large{解码器}};
\node(figure)[draw=white,above of = decoder_right,yshift=6.5em,scale=0.25] {\includegraphics[width=0.62\textwidth]{./Chapter17/Figures/figure-bank-without-attention.jpg}};
\node [anchor=south,scale=1.2] (node1) at ([xshift=-2.5em,yshift=4.5em]y.north) {\small{$x$:源语言文本数据}};
\node [anchor=north,scale=1.2] (node2) at ([xshift=0.57em]node1.south){\small{$y$:目标语言文本数据}};
\node [anchor=south,scale=1.2] (node1) at ([xshift=-2.5em,yshift=4.5em]y.north) {{$x$:源语言文本数据}};
\node [anchor=north,scale=1.2] (node2) at ([xshift=0.57em]node1.south){{$y$:目标语言文本数据}};
\draw[->,thick](x)to(encoder);
\draw[->,thick](encoder)to(decoder_left)node[right,xshift=-0.1cm,yshift=-1.25cm,scale=1.2]{\small{翻译}};
\draw[->,thick](encoder)to(decoder_left)node[right,xshift=-0.1cm,yshift=-1.25cm,scale=1.2]{{翻译}};
\draw[->,thick](decoder_left)to(y_hat);
\draw[->,thick](y)to(decoder_left);
\draw[->,thick](encoder)to(decoder_right)node[left,xshift=-3.1em,yshift=0.25cm,scale=1.2]{\small{生成图片}};
\draw[->,thick](encoder)to(decoder_right)node[left,xshift=-3.1em,yshift=0.25cm,scale=1.2]{{生成图片}};
\draw[->,thick](decoder_right)to(figure);
\end{tikzpicture}
\ No newline at end of file
......@@ -7,7 +7,6 @@
\node[layer,anchor=south,fill=green!20] (en_ffn) at ([yshift=1.0em]en_add1.north){Feed Forward \\ Network};
\node[anchor=south,layer,fill=yellow!20](en_add2) at ([yshift=1.0em]en_ffn.north) {Add \& LayerNorm};
\node[layer,anchor=south,fill=blue!20] (en_sf) at ([yshift=2.4em]en_add2.north){Softmax};
\node[layer,anchor=south,fill=orange!20] (en_output) at ([yshift=1.0em]en_sf.north){CTC Output};
\node[draw,circle,inner sep=0pt, minimum size=1em,anchor=north,thick] (en_add) at ([yshift=-1.4em]en_sa.south){};
\draw[thick] (en_add.90) -- (en_add.-90);
\draw[thick] (en_add.0) -- (en_add.180);
......@@ -22,7 +21,7 @@
\draw[->,thick] ([yshift=0.1em]en_add1.90) -- ([yshift=-0.1em]en_ffn.-90);
\draw[->,thick] ([yshift=0.1em]en_ffn.90) --([yshift=-0.1em]en_add2.-90);
\draw[->,thick] ([yshift=0.1em]en_add2.90) -- ([yshift=-0.1em]en_sf.-90);
\draw[->,thick] ([yshift=0.1em]en_sf.90) -- ([yshift=-0.1em]en_output.-90);
\draw[->,thick] ([yshift=0.1em]en_sf.90) -- ([yshift=1.5em]en_sf.90)node[left,pos=0.5]{\scriptsize{CTC Loss}};
\draw[->,rounded corners=2pt,thick] ([yshift=-0.6em]en_sa.south)--([yshift=-0.6em,xshift=-4.0em]en_sa.south)--([xshift=-0.43em]en_add1.west)--(en_add1.west);
\draw[->,rounded corners=2pt,thick] ([yshift=-0.6em]en_ffn.south)--([yshift=-0.6em,xshift=-4.0em]en_ffn.south)--([xshift=-0.43em]en_add2.west)--(en_add2.west);
......
\tikzstyle{coder} = [rectangle,thick,rounded corners,minimum height=2.2em,minimum width=4.3em,text centered,draw=black!70,fill=red!20]
\tikzstyle{coder} = [rectangle,thick,rounded corners,minimum height=2.2em,minimum width=4.3em,text centered,draw=black,fill=red!20]
\begin{tikzpicture}[node distance = 0,scale = 0.75]
\tikzstyle{every node}=[scale=0.75]
......
......@@ -2,31 +2,31 @@
\definecolor{color_green}{rgb}{0.663,0.82,0.557}
\definecolor{color_orange}{rgb}{0.957,0.694,0.514}
\definecolor{color_blue}{rgb}{0.335,0.708,0.735}
\tikzstyle{description} = [rectangle,rounded corners=1mm, minimum width=3cm,minimum height=0.6cm,text centered]
\tikzstyle{description} = [rectangle,rounded corners=1mm, minimum width=3cm,minimum height=0.6cm,text centered,draw,thick]
\begin{tikzpicture}[node distance = 0,scale = 0.8]
\tikzstyle{every node}=[scale=0.8]
\node(figure-1)[draw=white,scale=0.25] at (0,0){\includegraphics[width=0.62\textwidth]{./Chapter17/Figures/figure-dog-with-hat.png}};
\node(ground-1)[rectangle,rounded corners, minimum width=5cm, minimum height=3.5cm,right of = figure-1, xshift=5cm,fill=blue!20]{};
\node(text-1)[right of = figure-1, xshift=3.6cm,yshift=2cm,scale=1.2]{\textcolor{color_gray}{描述候选池}};
\node(text_1-1)[description, right of = figure-1, xshift=4.2cm,yshift=1.2cm,fill=color_gray!50]{\textcolor{white}{天空中有很多鸟。}};
\node(text_2-1)[description, right of = figure-1, xshift=5.3cm,yshift=0.5cm,fill=color_green]{\textcolor{white}{孩子从河岸上跳下来。}};
\node(text_3-1)[description, right of = figure-1, xshift=4.5cm,yshift=-0.2cm,fill=color_orange]{\textcolor{white}{狗在吐舌头。}};
\node(ground-1)[rectangle,rounded corners, minimum width=5cm, minimum height=3.5cm,right of = figure-1, xshift=5cm,fill=gray!10,draw,thick,drop shadow]{};
\node(text-1)[right of = figure-1, xshift=3.6cm,yshift=2.1cm,scale=1.2]{{描述候选池}};
\node(text_1-1)[description, right of = figure-1, xshift=4.2cm,yshift=1.2cm,fill=gray!20]{{天空中有很多鸟。}};
\node(text_2-1)[description, right of = figure-1, xshift=5.3cm,yshift=0.5cm,fill=green!20]{{孩子从河岸上跳下来。}};
\node(text_3-1)[description, right of = figure-1, xshift=4.5cm,yshift=-0.2cm,fill=orange!20]{{狗在吐舌头。}};
\node(surd-1)[right of = text_3-1, xshift=2cm,scale=1.5]{\textcolor{red}{$\surd$}};
\node(text_4-1)[description, right of = figure-1, xshift=5.2cm,yshift=-0.9cm,fill=color_blue]{\textcolor{white}{男人戴着眼镜。}};
\node(text_4-1)[description, right of = figure-1, xshift=5.2cm,yshift=-0.9cm,fill=blue!20]{{男人戴着眼镜。}};
\node(point-1)[right of = figure-1, xshift=5cm,yshift=-1.4cm,scale=1.5]{...};
\draw[->,thick](figure-1)to([xshift=-0.1cm]ground-1.west);
\node(figure)[draw=white,scale=0.25]at ([xshift=20.0em]figure-1.east){\includegraphics[width=0.62\textwidth]{./Chapter17/Figures/figure-dog-with-hat.png}};
\node(ground)[rectangle,rounded corners, minimum width=5cm, minimum height=1.5cm,right of = figure, xshift=5cm,yshift=-2.6em,fill=blue!20]{\large{图片中有\underline{\textcolor{red}{}}\underline{\textcolor{red}{帽子}}\underline{\quad\ }}};
\node(dog)[rectangle,rounded corners, minimum width=1cm, minimum height=0.7cm,right of = figure, xshift=3cm,yshift=1.5cm,thick, draw=color_orange,fill=color_orange!50]{};
\node(hat)[rectangle,rounded corners, minimum width=1.5cm, minimum height=0.7cm,right of = figure, xshift=4.5cm,yshift=1.5cm,thick, draw=color_green,fill=color_green!50]{帽子};
\draw[->, thick,color=black!60](figure.east)to([xshift=-0.1cm]dog.west)node[left,xshift=-0.2cm,yshift=-0.1cm,color=black]{图片检测};
\draw[->, thick,color=black!60]([yshift=-0.1cm]hat.south)to([yshift=0.1cm]ground.north)node[right,xshift=-0.2cm,yshift=0.5cm,color=black]{模板填充};
\node(ground)[rectangle,rounded corners, minimum width=5cm, minimum height=1.5cm,right of = figure, xshift=5cm,yshift=-2.6em,fill=gray!10,draw,thick,drop shadow]{\large{图片中有\underline{\textcolor{red}{}}\underline{\textcolor{red}{帽子}}\underline{\quad\ }}};
\node(dog)[rectangle,rounded corners, minimum width=1cm, minimum height=0.7cm,right of = figure, xshift=3cm,yshift=1.5cm,thick, draw,fill=orange!20,thick]{};
\node(hat)[rectangle,rounded corners, minimum width=1.5cm, minimum height=0.7cm,right of = figure, xshift=4.5cm,yshift=1.5cm,thick, draw,fill=green!20,thick]{帽子};
\draw[->, thick](figure.east)to([xshift=-0.1cm]dog.west)node[left,xshift=-0.2cm,yshift=-0.1cm,color=black]{图片检测};
\draw[->, thick]([yshift=-0.1cm]hat.south)to([yshift=0.1cm]ground.north)node[right,xshift=-0.2cm,yshift=0.5cm,color=black]{模板填充};
\node [anchor=north](pos1)at ([xshift=-3.8em,yshift=-0.5em]ground-1.south){(a) 基于检索的图像描述生成};
\node [anchor=north](pos2)at ([xshift=-3.8em,yshift=-0.5em]ground.south){(b) 基于模板的图像描述生成};
\node [anchor=north](pos1)at ([xshift=-3.8em,yshift=-1em]ground-1.south){(a) 基于检索的图像描述生成};
\node [anchor=north](pos2)at ([xshift=-3.8em,yshift=-1em]ground.south){(b) 基于模板的图像描述生成};
\end{tikzpicture}
\ No newline at end of file
......@@ -62,7 +62,7 @@
\subsection{音频处理}
\parinterval 为了保证对相关内容描述的完整性,这里对语音处理的基本知识作简要介绍。不同于文本,音频本质上是经过若干信号处理之后的{\small\bfnew{波形}}(Waveform)\index{Waveform}。具体来说,声音是一种空气的震动,因此可以被转换为模拟信号。模拟信号是一段连续的信号,经过采样变为离散的数字信号。采样是每隔固定的时间记录一下声音的振幅,采样率表示每秒的采样点数,单位是赫兹(Hz)。采样率越高,结果的损失则越小。通常来说,采样的标准是能够通过离散化的数字信号重现原始语音。日常生活中使用的手机和电脑设备的采样率一般为16kHz,表示每秒16000个采样点;而音频CD的采样率可以达到44.1kHz。 经过进一步的量化,将采样点的值转换为整型数值保存,从而减少占用的存储空间,通常采用的是16位量化。将采样率和量化位数相乘,就可以得到{\small\bfnew{比特率}}\index{比特率}(Bits Per Second,BPS)\index{Bits Per Second},表示音频每秒占用的位数。例如,16kHz采样率和16位量化的音频,比特率为256kb/s。音频处理的整体流程如图\ref{fig:17-2}所示\upcite{洪青阳2020语音识别原理与应用,陈果果2020语音识别实战}
\parinterval 为了保证对相关内容描述的完整性,这里对语音处理的基本知识作简要介绍。不同于文本,音频本质上是经过若干信号处理之后的{\small\bfnew{波形}}(Waveform)\index{Waveform}。具体来说,声音是一种空气的震动,因此可以被转换为模拟信号。模拟信号是一段连续的信号,经过采样变为离散的数字信号。采样是每隔固定的时间记录一下声音的振幅,采样率表示每秒的采样点数,单位是赫兹(Hz)。采样率越高,采样的结果与原始的语音越相像。通常来说,采样的标准是能够通过离散化的数字信号重现原始语音。日常生活中使用的手机和电脑设备的采样率一般为16kHz,表示每秒16000个采样点;而音频CD的采样率可以达到44.1kHz。 经过进一步的量化,将采样点的值转换为整型数值保存,从而减少占用的存储空间,通常采用的是16位量化。将采样率和量化位数相乘,就可以得到{\small\bfnew{比特率}}\index{比特率}(Bits Per Second,BPS)\index{Bits Per Second},表示音频每秒占用的位数。例如,16kHz采样率和16位量化的音频,比特率为256kb/s。音频处理的整体流程如图\ref{fig:17-2}所示\upcite{洪青阳2020语音识别原理与应用,陈果果2020语音识别实战}
%----------------------------------------------------------------------------------------------------
\begin{figure}[htp]
......@@ -385,7 +385,7 @@
\parinterval 要想使编码器-解码器框架在图像描述生成中充分发挥作用,编码器也要更好的表示图像信息。对于编码器的改进,通常体现在向编码器中添加图像的语义信息\upcite{DBLP:conf/cvpr/YouJWFL16,DBLP:conf/cvpr/ChenZXNSLC17,DBLP:journals/pami/FuJCSZ17}和位置信息\upcite{DBLP:conf/cvpr/ChenZXNSLC17,DBLP:conf/ijcai/LiuSWWY17}
\parinterval 图像的语义信息一般是指图像中存在的实体、属性、场景等等。如图\ref{fig:17-17}所示,从图像中利用属性或实体检测器提取出“girl”、“river”、“bank”等属性词和实体词,将他们作为图像的语义信息编码的一部分,再利用注意力机制计算目标语言单词与这些属性词或实体词之间的注意力权重\upcite{DBLP:conf/cvpr/YouJWFL16}。当然,除了图像中的实体和属性作为语义信息外,也可以将图片的场景信息加入到编码器当中\upcite{DBLP:journals/pami/FuJCSZ17}。有关如何做属性、实体和场景的检测,涉及到目标检测任务的工作,例如Faster-RCNN\upcite{DBLP:journals/pami/RenHG017}、YOLO\upcite{DBLP:journals/corr/abs-1804-02767,DBLP:journals/corr/abs-2004-10934}等等,这里不再赘述。
\parinterval 图像的语义信息一般是指图像中存在的实体、属性、场景等等。如图\ref{fig:17-17}所示,从图像中利用属性或实体检测器提取出“jump”、“girl”、“river”、“bank”等属性词和实体词,将他们作为图像的语义信息编码的一部分,再利用注意力机制计算目标语言单词与这些属性词或实体词之间的注意力权重\upcite{DBLP:conf/cvpr/YouJWFL16}。当然,除了图像中的实体和属性作为语义信息外,也可以将图片的场景信息加入到编码器当中\upcite{DBLP:journals/pami/FuJCSZ17}。有关如何做属性、实体和场景的检测,涉及到目标检测任务的工作,例如Faster-RCNN\upcite{DBLP:journals/pami/RenHG017}、YOLO\upcite{DBLP:journals/corr/abs-1804-02767,DBLP:journals/corr/abs-2004-10934}等等,这里不再赘述。
%----------------------------------------------------------------------------------------------------
\begin{figure}[htp]
......@@ -418,7 +418,7 @@
\parinterval 在计算机视觉中,图像风格变换、图像超分辨率重建等任务,都可以被视为{\small\bfnew{图像到图像的翻译}}\index{图像到图像的翻译}(Image-to-Image Translation)\index{Image-to-Image Translation}问题。与机器翻译类似,这些问题的共同目标是学习从一个对象到另一个对象的映射,只不过这里的对象是指图像,而非机器翻译中的文字。例如,给定物体的轮廓生成真实物体图片,或者给定白天照片生成夜晚的照片等。图像到图像的翻译有广阔的应用场景,如图片补全、风格迁移等。{\small\bfnew{文本到图像的翻译}}\index{文本到图像的翻译}(Text-to-Image Translation)\index{Text-to-Image Translation}是指给定描述物体颜色和形状等细节的自然语言文字,生成对应的图像。该任务也可以看作是图像描述生成的逆任务。
\parinterval 无论是图像到图像的生成,还是文本到图像的生成,均可直接使用编码器-解码器框架进行实现。比如,在文本到图像生成中,可以使用机器翻译中的编码器对输入文本进行编码,之后用反卷积神经网络将编码结果转化为图像。近些年,图像生成类任务也取得了很大的进展,这主要得益于生成对抗网络的使用\upcite{DBLP:conf/nips/GoodfellowPMXWOCB14,DBLP:journals/corr/abs-1908-06616,DBLP:conf/icml/ReedAYLSL16,DBLP:journals/corr/DashGALA17}。在{\chapterthirteen}已经介绍了生成对抗网络,而且图像生成也不是本书的重点,感兴趣的读者可以参考{\chapterthirteen}的内容或者自行查阅相关文献进行了解。
\parinterval 无论是图像到图像的生成,还是文本到图像的生成,均可直接使用编码器-解码器框架进行实现。比如,在文本到图像生成中,可以使用机器翻译中的编码器对输入文本进行编码,之后用对抗生成网络将编码结果转化为图像\upcite{DBLP:conf/icml/ReedAYLSL16}。近些年,图像生成类任务也取得了很大的进展,这主要得益于生成对抗网络的使用\upcite{DBLP:conf/nips/GoodfellowPMXWOCB14,DBLP:journals/corr/abs-1908-06616,DBLP:journals/corr/DashGALA17}。在{\chapterthirteen}已经介绍了生成对抗网络,而且图像生成也不是本书的重点,感兴趣的读者可以参考{\chapterthirteen}的内容或者自行查阅相关文献进行了解。
%----------------------------------------------------------------------------------------
% NEW SECTION
......
Markdown 格式
0%
您添加了 0 到此讨论。请谨慎行事。
请先完成此评论的编辑!
注册 或者 后发表评论