Commit 7a6878f6 by 曹润柘

16 and 17

parent 5e1bdbd4
\begin{tikzpicture}
\begin{scope}
\node [anchor=center] (node1-1) at (0,0) {\small{$y$}};
\node[anchor=north,line width=0.6pt,draw,rounded corners,minimum height=2.2em,minimum width=4.3em,fill=blue!20](node1-3) at ([yshift=-2.0em]node1-1.south) {\small{解码器}};
\node[anchor=north,line width=0.6pt,draw,rounded corners,minimum height=2.2em,minimum width=4.3em,fill=yellow!20](node3-3) at ([yshift=-2.0em]node1-3.south) {\small{语言模型}};
\node [anchor=west] (node3-1) at ([xshift=4.0em]node3-3.east) {\small{$z$}};
\node[anchor=north](node3-41) at ([yshift=-2em]node3-3.south) {\small{$y_{<}+z_{<}$}};
\node[anchor=east,line width=0.6pt,draw,rounded corners,minimum height=2.2em,minimum width=4.3em,fill=red!20](node2-1) at ([xshift=-2em]node1-3.west) {\small{编码器}};
\node[anchor=north](node2-2) at ([yshift=-2em]node2-1.south) {\small{$x$}};
\tikzstyle{rec} = [line width=0.6pt,draw,rounded corners,minimum height=2.2em,minimum width=4.3em]
\node [rectangle,rounded corners,draw=red,line width=0.2mm,densely dashed,inner sep=0.4em] [fit = (node3-1) (node3-3)] (inputshadow) {};
\draw [->,thick](node1-3.north)--(node1-1)node[pos=0.5,left,font=\scriptsize]{Softmax};
\draw [->,thick](node2-2.north)--(node2-1);
\draw[->,thick](node2-1.east)--(node1-3.west);
\draw [->,thick](node3-41.north)--(node3-3.south);
\draw [->,thick](node3-3.north)--(node1-3.south);
\draw[->,thick](node3-3.east)--(node3-1.west)node[pos=0.5,above,font=\scriptsize]{Softmax};
\node [anchor=center] (node1-1) at (0,0) {\small{$y$}};
\node[anchor=north,rec,fill=blue!20](node1-2) at ([yshift=-2.0em]node1-1.south) {\small{解码器}};
\node[anchor=north,rec,fill=red!20](node1-3) at ([yshift=-2em]node1-2.south) {\small{编码器}};
\node[anchor=east](node1-5) at ([xshift=-2em]node1-2.west) {\small{$y_{<}$}};
\node[anchor=north](node1-4) at ([yshift=-2em]node1-3.south) {\small{$x$}};
\draw [->,thick](node1-4.north)--(node1-3.south);
\draw [->,thick](node1-5.east)--(node1-2.west);
\draw [->,thick](node1-3.north)--(node1-2.south);
\draw [->,thick](node1-2.north)--(node1-1.south);
\node [anchor=center] (node2-1) at ([xshift=12.0em]node1-1.east) {\small{$y$}};
\node[anchor=north,rec,fill=blue!20](node2-2) at ([yshift=-2.0em]node2-1.south) {\small{解码器}};
\node[anchor=north,rec,fill=red!20](node2-3) at ([yshift=-2em]node2-2.south) {\small{编码器}};
\node[anchor=north](node2-4) at ([yshift=-2em]node2-3.south) {\small{$x$}};
\node[anchor=west,rec,fill=yellow!20](node2-6) at ([xshift=3.0em]node2-2.east) {\small{语言模型}};
\node[anchor=north](node2-5) at ([yshift=-2em]node2-6.south) {\small{$y_{<}+z_{<}$}};
\node[anchor=south](node2-7) at ([yshift=2em]node2-6.north) {\small{z}};
\node [rectangle,rounded corners,draw=red,line width=0.2mm,densely dashed,inner sep=0.4em] [fit = (node2-6) (node2-7)] (inputshadow) {};
\draw [->,thick](node2-4.north)--(node2-3.south);
\draw [->,thick](node2-5.north)--(node2-6.south);
\draw [->,thick](node2-3.north)--(node2-2.south);
\draw [->,thick](node2-2.north)--(node2-1.south);
\draw [->,thick](node2-2.east)--(node2-6.west);
\draw [->,thick](node2-6.north)--(node2-7.south);
\node [anchor=east] (node1) at ([yshift=1.6em,xshift=-2.0em]node1-1.west) {\small{$x,y$:双语数据}};
\node [anchor=south] (node2) at ([xshift=1.96em]node1.north) {\small{$y_{<}$:目标语言文本数据}};
\node [anchor=north] (node3) at ([xshift=0.45em]node1.south) {\small{$z$}:单语数据};
\node [anchor=north](pos1) at ([yshift=0em]node1-4.south) {\small{(a)单任务学习}};
\node [anchor=west](pos2) at ([xshift=10.0em]pos1.east) {\small{(b)多任务学习}};
\node [anchor=east] (node2-1-1) at ([xshift=-12.0em,yshift=-4.25em]node1-1.west) {\small{$y$}};
\node[anchor=north,line width=0.6pt,draw,rounded corners,minimum height=2.2em,minimum width=4.3em,fill=blue!20](node2-1-3) at ([yshift=-2.0em]node2-1-1.south) {\small{解码器}};
\node[anchor=east,line width=0.6pt,draw,rounded corners,minimum height=2.2em,minimum width=4.3em,fill=red!20](node2-2-1) at ([xshift=-2em]node2-1-3.west) {\small{编码器}};
\node[anchor=north](node2-2-2) at ([yshift=-2em]node2-2-1.south) {\small{$x$}};
\node[anchor=north](node2-2-3) at ([yshift=-2em]node2-1-3.south) {\small{$y_{<}$}};
\draw [->,thick](node2-2-2.north)--(node2-2-1);
\draw[->,thick](node2-2-1.east)--(node2-1-3.west);
\draw [->,thick](node2-1-3.north)--(node2-1-1)node[pos=0.5,left,font=\scriptsize]{Softmax};
\draw [->,thick](node2-2-3.north)--(node2-1-3);
\node [anchor=east] (node1) at ([xshift=-2.0em,yshift=3em]node2-1-1.west) {\small{$x,y$:双语数据}};
\node [anchor=south] (node3) at ([xshift=1.96em]node1.north) {\small{$y_{<}$:目标语言文本数据}};
\node [anchor=north] (node2) at ([xshift=0.45em]node1.south) {\small{$z$}:单语数据};
\node [anchor=north](pos1) at ([yshift=-3.5em]node3-3.south) {\small{(b)多任务学习}};
\node [anchor=east](pos2) at ([xshift=-10.0em]pos1.west) {\small{(a)单任务学习}};
\end{scope}
\end{tikzpicture}
\ No newline at end of file
......@@ -8,7 +8,7 @@
\node(y)[above of = decoder_left, xshift=-6em]{{$y_{<}$}};
\node(decoder_right)[coder, above of = encoder, xshift=11em,fill=yellow!25]{{解码器}};
\node(figure)[draw=white,above of = decoder_right,yshift=6.5em,scale=0.25] {\includegraphics[width=0.62\textwidth]{./Chapter17/Figures/figure-bank-without-attention.png}};
\node(figure)[draw=white,above of = decoder_right,yshift=6.5em,scale=0.25] {\includegraphics[width=0.62\textwidth]{./Chapter17/Figures/figure-bank-without-attention.jpg}};
\draw[->,thick](x)to(encoder);
\draw[->,thick](encoder)to(decoder_left)node[right,xshift=-0.1cm,yshift=-1.25cm,scale=1.2]{\small{翻译}};
......
\begin{tikzpicture}[node distance = 0,scale = 0.7]
\tikzstyle{every node}=[scale=0.7]
\node[draw=white] (input) at (0,0){\includegraphics[width=0.62\textwidth]{./Chapter17/Figures/figure-bank-without-attention.png}};(1.9,-1.4);
\node[draw=white] (input) at (10,0){\includegraphics[width=0.62\textwidth]{./Chapter17/Figures/figure-bank-with-attention.png}};(1.9,-1.4);
\node[draw=white] (input) at (0,0){\includegraphics[width=0.62\textwidth]{./Chapter17/Figures/figure-bank-without-attention.jpg}};(1.9,-1.4);
\node[draw=white] (input) at (10,0){\includegraphics[width=0.62\textwidth]{./Chapter17/Figures/figure-bank-with-attention.jpg}};(1.9,-1.4);
\end{tikzpicture}
\ No newline at end of file
\begin{tikzpicture}[node distance = 0]
\tikzstyle{every node}=[scale=0.85]
\begin {scope}
\node[draw=white,scale=0.6] (input) at (0,0){\includegraphics[width=0.62\textwidth]{./Chapter17/Figures/figure-bank-without-attention.png}};(1.9,-1.4);
\node[draw=white,scale=0.6] (input) at (0,0){\includegraphics[width=0.62\textwidth]{./Chapter17/Figures/figure-bank-without-attention.jpg}};(1.9,-1.4);
\node[anchor=west] (label1) at ([xshift=-3.5em]input.west) {\begin{tabular}{l}{\normalsize{图片:}}\end{tabular}};
\node[anchor=south] (label2) at ([yshift=-7.15em]label1.south) {\begin{tabular}{l}{\normalsize{源文:}}\end{tabular}};
\node[anchor=south] (english1) at ([xshift=-0.1em,yshift=-3.5em]input.south) {\begin{tabular}{l}{\large{A\,medium\,sized\,child\,jumps\,off}}\end{tabular}};
\node[anchor=south] (english2) at ([xshift=-3.3em,yshift=-1.2em]english1.south) {\begin{tabular}{l}{\large{a dusty {\red{\underline{bank}}}.}} \end{tabular}};
\draw[decorate,decoration={brace,amplitude=4mm},very thick] ([xshift=7em]input.90) -- ([xshift=10.4em,yshift=0.5em]english2.270);
\node[anchor=south] (english1) at ([xshift=-0.1em,yshift=-3.5em]input.south) {\begin{tabular}{l}{\large{A\; girl\; jumps\; off\; a\; {\red{\underline{bank}}}.}}\end{tabular}};
\node[anchor=east,rectangle,thick,rounded corners,minimum width=3.5em,minimum height=2.5em,text centered,draw=black!70,fill=red!25](trans)at ([xshift=7.5em,yshift=5.1em]english1.east){\normalsize{翻译模型}};
\draw[decorate,decoration={brace,amplitude=4mm},very thick] ([xshift=7em]input.90) -- ([xshift=1.2em,yshift=-0.5em]english1.east);
\node[anchor=east,rectangle,thick,rounded corners,minimum width=3.5em,minimum height=2.5em,text centered,draw=black!70,fill=red!25](trans)at ([xshift=8.0em,yshift=5.55em]english1.east){\normalsize{翻译模型}};
\draw[->,very thick]([xshift=-1.4em]trans.west) to (trans.west);
\draw[->,very thick](trans.east) to ([xshift=1.4em]trans.east);
\node[anchor=east] (de1) at ([xshift=4.9cm,yshift=-0.1em]trans.east) {\begin{tabular}{l}{\normalsize{译文:}}{\normalsize{一个半大孩子从尘土}}\end{tabular}};
\node[anchor=south] (de2) at ([xshift=1.65em,yshift=-1.5em]de1.south) {\begin{tabular}{l}{\normalsize{飞扬的{\red{\underline{河床}}}跳下来。}} \end{tabular}};
\node[anchor=east] (de1) at ([xshift=4.7cm,yshift=-0.1em]trans.east) {\begin{tabular}{l}{\normalsize{译文:}}{\normalsize{一个女孩从{\red{河床}}}}\end{tabular}};
\node[anchor=south] (de2) at ([xshift=-0.4em,yshift=-1.5em]de1.south) {\begin{tabular}{l}{\normalsize{跳下来。}} \end{tabular}};
\end {scope}
\end{tikzpicture}
\ No newline at end of file
\tikzstyle{word} = [rectangle,thick,minimum width=2cm,minimum height=0.7cm,text centered,]
\begin{tikzpicture}[node distance = 0,scale = 0.9]
\tikzstyle{every node}=[scale=0.9]
\node(figure)[draw=white,scale=0.4] {\includegraphics[width=0.62\textwidth]{./Chapter17/Figures/figure-bank-without-attention.png}};
\node(figure)[draw=white,scale=0.4] {\includegraphics[width=0.62\textwidth]{./Chapter17/Figures/figure-bank-without-attention.jpg}};
\node(river)[word, right of = figure, xshift=5cm, yshift=0.35cm, fill=blue!45]{river};
\node(mountain)[word, above of = river, yshift=0.75cm, fill=blue!45]{mountain};
\node(child)[word, above of = mountain, yshift=0.75cm, fill=blue!15]{child};
\node(man)[word, above of = child, yshift=0.75cm, fill=blue!25]{man};
\node(man)[word, above of = child, yshift=0.75cm, fill=blue!25]{girl};
\node(jump)[word, below of = river, yshift=-0.75cm, fill=blue!30]{jump};
\node(bank)[word, below of = jump, yshift=-0.75cm, fill=blue!65]{bank};
\node(sky)[word, below of = bank, yshift=-0.75cm, fill=blue!30]{sky};
......@@ -13,7 +13,7 @@
\node(cir)[circle,thick, minimum width=0.6cm, xshift=8cm, draw=black]{};
\node(decoder)[rectangle, rounded corners, minimum height=2.2em,minimum width=4.3em, right of = cir,xshift=3cm, draw=black, fill=blue!25]{\large{解码器}};
\node(yn_1)[below of = decoder,yshift=-2cm,scale=1.2]{$y_{<j}$};
\node(yn_2)[above of = decoder,yshift=2cm,scale=1.2]{$y_{j}$};
\node(yn_2)[above of = decoder,yshift=2cm,scale=1.2]{$y_{j}$(bank)};
\draw[->, thick]([xshift=0.1cm]figure.east)to([xshift=2cm]figure.east);
\draw[-,thick]([xshift=-0.03cm]cir.east)to([xshift=0.03cm]cir.west);
......
......@@ -35,7 +35,7 @@
\parinterval 长期以来,机器翻译都是指句子级翻译。主要原因在于,句子级的翻译建模可以大大简化问题,使得机器翻译方法更容易被实践和验证。但是人类使用语言的过程并不是孤立在一个个句子上进行的。这个问题可以类比于人类学习语言的过程:小孩成长过程中会接受视觉、听觉、触觉等多种信号,这些信号的共同作用使得他们产生对客观世界的“认识”,同时促使他们使用“语言”进行表达。从这个角度说,语言能力并不是由单一因素形成的,它往往伴随着其他信息的相互作用,比如,当我们翻译一句话的时候,会用到看到的画面、听到的语调、甚至前面说过句子中的信息。
\parinterval 广义上,当前句子以外的信息都可以被看作是一种上下文。比如,图\ref{fig:17-1}中,需要把英语句子“A medium sized child jumps off a dusty bank”翻译为汉语。但是,其中的“bank”有多个含义,因此仅仅使用英语句子本身的信息可能会将其翻译为“银行”,而非正确的译文“河床”。但是,图\ref{fig:17-1}中也提供了这个英语句子所对应的图片,显然图片中直接展示了河床,这时“bank”是没有歧义的。通常也会把这种使用图片和文字一起进行机器翻译的任务称作{\small\bfnew{多模态机器翻译}}\index{多模态机器翻译}(Multi-Modal Machine Translation)\index{Multi-Modal Machine Translation}
\parinterval 广义上,当前句子以外的信息都可以被看作是一种上下文。比如,图\ref{fig:17-1}中,需要把英语句子“A girl jumps off a bank”翻译为汉语。但是,其中的“bank”有多个含义,因此仅仅使用英语句子本身的信息可能会将其翻译为“银行”,而非正确的译文“河床”。但是,图\ref{fig:17-1}中也提供了这个英语句子所对应的图片,显然图片中直接展示了河床,这时“bank”是没有歧义的。通常也会把这种使用图片和文字一起进行机器翻译的任务称作{\small\bfnew{多模态机器翻译}}\index{多模态机器翻译}(Multi-Modal Machine Translation)\index{Multi-Modal Machine Translation}
%----------------------------------------------
\begin{figure}[htp]
......@@ -412,7 +412,7 @@
\parinterval 要想使编码器-解码器框架在图像描述生成中充分发挥作用,编码器也要更好的表示图像信息。对于编码器的改进,通常体现在向编码器中添加图像的语义信息\upcite{DBLP:conf/cvpr/YouJWFL16,DBLP:conf/cvpr/ChenZXNSLC17,DBLP:journals/pami/FuJCSZ17}和位置信息\upcite{DBLP:conf/cvpr/ChenZXNSLC17,DBLP:conf/ijcai/LiuSWWY17}
\parinterval 图像的语义信息一般是指图像中存在的实体、属性、场景等等。如图\ref{fig:17-17}所示,从图像中利用属性或实体检测器提取出“child”、“river”、“bank”等属性词和实体词,将他们作为图像的语义信息编码的一部分,再利用注意力机制计算目标语言单词与这些属性词或实体词之间的注意力权重\upcite{DBLP:conf/cvpr/YouJWFL16}。当然,除了图像中的实体和属性作为语义信息外,也可以将图片的场景信息加入到编码器当中\upcite{DBLP:journals/pami/FuJCSZ17}。有关如何做属性、实体和场景的检测,涉及到目标检测任务的工作,例如Faster-RCNN\upcite{DBLP:journals/pami/RenHG017}、YOLO\upcite{DBLP:journals/corr/abs-1804-02767,DBLP:journals/corr/abs-2004-10934}等等,这里不过多赘述。
\parinterval 图像的语义信息一般是指图像中存在的实体、属性、场景等等。如图\ref{fig:17-17}所示,从图像中利用属性或实体检测器提取出“girl”、“river”、“bank”等属性词和实体词,将他们作为图像的语义信息编码的一部分,再利用注意力机制计算目标语言单词与这些属性词或实体词之间的注意力权重\upcite{DBLP:conf/cvpr/YouJWFL16}。当然,除了图像中的实体和属性作为语义信息外,也可以将图片的场景信息加入到编码器当中\upcite{DBLP:journals/pami/FuJCSZ17}。有关如何做属性、实体和场景的检测,涉及到目标检测任务的工作,例如Faster-RCNN\upcite{DBLP:journals/pami/RenHG017}、YOLO\upcite{DBLP:journals/corr/abs-1804-02767,DBLP:journals/corr/abs-2004-10934}等等,这里不过多赘述。
%----------------------------------------------------------------------------------------------------
\begin{figure}[htp]
......
Markdown 格式
0%
您添加了 0 到此讨论。请谨慎行事。
请先完成此评论的编辑!
注册 或者 后发表评论