add picture

d4c2adbd · 曹润柘 · d011b48d · d4c2adbd · d4c2adbd · d4c2adbd
Commit d4c2adbd authored Dec 06, 2020 by 曹润柘
--- a/Chapter16/Figures/figure-multi-language-single-model-system-diagram.tex
+++ b/Chapter16/Figures/figure-multi-language-single-model-system-diagram.tex
@@ -18,21 +18,15 @@

 \node[font=\footnotesize] (train) at (11em,7em) {\small\bfnew{训练阶段：}};
 \node[anchor=north,font=\footnotesize] (pair1) at ([yshift=-1em,xshift=1em]train.south) {双语句对1：};
-%\node[anchor=west,draw=black,lan,minimum width=9.8em,fill=red!20,line width=0.6pt] (box1) at ([yshift=.7em,xshift=0.4em]pair1.east) {};
 \node[anchor=west,lan](train1) at ([yshift=.7em,xshift=0.4em]pair1.east) {英语：{\color{red}<spanish>} \ hello};
-%\node[anchor=west,draw=black,lan,minimum width=9.8em,fill=blue!20,line width=0.6pt] (box2) at ([yshift=-.7em,xshift=0.4em]pair1.east) {};
 \node[anchor=west,lan](train2) at ([yshift=-.7em,xshift=0.4em]pair1.east) {西班牙语：hola};
 \node[anchor=north,font=\footnotesize] (pair2) at ([yshift=-4.5em,xshift=1em]train.south) {双语句对2：};
-%\node[anchor=west,draw=black,lan,minimum width=9.8em,fill=red!20,line width=0.6pt] (box3) at ([yshift=.7em,xshift=0.4em]pair2.east) {};
 \node[anchor=west,lan](train3) at ([yshift=.7em,xshift=0.4em]pair2.east) {法语：{\color{red}<german>} \ Bonjour};
-%\node[anchor=west,draw=black,lan,minimum width=9.8em,fill=blue!20,line width=0.6pt] (box4) at ([yshift=-.7em,xshift=0.4em]pair2.east) {};
 \node[anchor=west,lan](train4) at ([yshift=-.7em,xshift=0.4em]pair2.east) {德语：Hallo};
 \node[anchor=north,font=\footnotesize] (decode) at ([yshift=-8em]train.south) {\small\bfnew{解码阶段：}};
 \node[anchor=north,font=\footnotesize] (input) at ([xshift=2.13em,yshift=-0.6em]decode.south) {输入：};
-%\node[anchor=west,draw=black,lan,minimum width=9.8em,fill=red!20,line width=0.6pt] (box5) at ([xshift=0.4em]input.east) {};
 \node[anchor=west,lan](decode2) at ([xshift=0.4em]input.east) {英语：{\color{red}<german>} \ hello};
 \node[anchor=north,font=\footnotesize] (output) at ([xshift=2.13em,yshift=-2.6em]decode.south) {输出：};
-%\node[anchor=west,draw=black,lan,minimum width=9.8em,fill=blue!20,line width=0.6pt] (box6) at ([xshift=0.4em]output.east) {};
 \node[anchor=west,lan](decode3) at ([xshift=0.4em]output.east) {德语：Hallo};
 \node[anchor=north,lan,minimum width=9.8em] (box7) at ([yshift=-4em]train3.south) {};


--- a/Chapter16/Figures/figure-multitask-learning-in-machine-translation-1.tex
+++ b/Chapter16/Figures/figure-multitask-learning-in-machine-translation-1.tex
+
+%%% outline
+%-------------------------------------------------------------------------
+\begin{tikzpicture}
+\tikzstyle{rec} = [line width=0.6pt,draw,rounded corners,minimum height=2.2em,minimum width=4.3em]
+
+
+
+\node [anchor=center] (node1-1) at (0,0) {\small{$y'$}};
+\node[anchor=north,rec,fill=blue!20](node1-2) at ([yshift=-2.0em]node1-1.south) {\small{解码器}};
+\node[anchor=north,rec,fill=red!20](node1-3) at ([yshift=-2em]node1-2.south) {\small{编码器}};
+\node[anchor=east](node1-5) at ([xshift=-2em]node1-2.west) {\small{$y$}};
+\node[anchor=north](node1-4) at ([yshift=-2em]node1-3.south) {\small{$x$}};
+\draw [->,thick](node1-4.north)--(node1-3.south);
+\draw [->,thick](node1-5.east)--(node1-2.west);
+\draw [->,thick](node1-3.north)--(node1-2.south);
+\draw [->,thick](node1-2.north)--(node1-1.south);
+
+\node [anchor=center] (node2-1) at ([xshift=12.0em]node1-1.east) {\small{$y'$}};
+\node[anchor=north,rec,fill=blue!20](node2-2) at ([yshift=-2.0em]node2-1.south) {\small{解码器}};
+\node[anchor=north,rec,fill=red!20](node2-3) at ([yshift=-2em]node2-2.south) {\small{编码器}};
+\node[anchor=east](node2-5) at ([xshift=-2em]node2-2.west) {\small{$y$}};
+\node[anchor=north](node2-4) at ([yshift=-2em]node2-3.south) {\small{$x$}};
+\node[anchor=west,rec,fill=yellow!20](node2-6) at ([xshift=3.0em]node2-3.east) {\small{解码器}};
+\node[anchor=south](node2-7) at ([yshift=2em]node2-6.north) {\small{$x'$}};
+
+\draw [->,thick](node2-4.north)--(node2-3.south);
+\draw [->,thick](node2-5.east)--(node2-2.west);
+\draw [->,thick](node2-3.north)--(node2-2.south)node[pos=0.5,left,font=\scriptsize]{翻译};
+\draw [->,thick](node2-2.north)--(node2-1.south);
+\draw [->,thick](node2-3.east)--(node2-6.west)node[pos=0.5,above,font=\scriptsize]{重排序};
+\draw [->,thick](node2-6.north)--(node2-7.south);
+
+
+
+\node [anchor=north](pos1) at ([yshift=0em]node1-4.south) {\small{(a)单任务学习}};
+\node [anchor=west](pos2) at ([xshift=10.0em]pos1.east) {\small{(b)多任务学习}};
+
+\end{tikzpicture}
\ No newline at end of file
--- a/Chapter16/Figures/figure-target-side-multi-task-learning.tex
+++ b/Chapter16/Figures/figure-target-side-multi-task-learning.tex
--- a/Chapter16/Figures/figure-optimization-of-the-model-initialization-method.tex
+++ b/Chapter16/Figures/figure-optimization-of-the-model-initialization-method.tex
@@ -3,16 +3,16 @@
 \begin{tikzpicture}
 \begin{scope}
 % ,minimum height =1em,minimum width=2em
-\tikzstyle{circle} = [draw,black,very thick,inner sep=3.5pt,rounded corners=4pt,minimum width=2em]
+\tikzstyle{circle} = [draw,black,line width=0.6pt,inner sep=3.5pt,rounded corners=4pt,minimum width=2em]
 \tikzstyle{word} = [inner sep=3.5pt]

 \node[circle,fill=red!20](data) at (0,0) {数据};
 \node[circle,fill=blue!20](model) at ([xshift=5em]data.east) {模型};
 \node[word] (init) at ([xshift=-5em]data.west){初始化};

-\draw[->,very thick] (init.east) -- ([xshift=-0.2em]data.west);
-\draw [->,very thick] ([yshift=1pt]data.north) .. controls +(90:2em) and +(90:2em) .. ([yshift=1pt]model.north) node[above,midway] {参数优化};
-\draw [->,very thick] ([yshift=1pt]model.south) .. controls +(-90:2em) and +(-90:2em) .. ([yshift=1pt]data.south) node[below,midway] {数据优化};
+\draw[->,thick] (init.east) -- ([xshift=-0.2em]data.west);
+\draw [->,thick] ([yshift=1pt]data.north) .. controls +(90:2em) and +(90:2em) .. ([yshift=1pt]model.north) node[above,midway] {参数优化};
+\draw [->,thick] ([yshift=1pt]model.south) .. controls +(-90:2em) and +(-90:2em) .. ([yshift=1pt]data.south) node[below,midway] {数据优化};

 \node[word] at ([xshift=-0.5em,yshift=-5em]data.south){（a）思路1};

@@ -22,16 +22,16 @@
 \begin{tikzpicture}
 \begin{scope}
 % ,minimum height =1em,minimum width=2em
-\tikzstyle{circle} = [draw,black,very thick,inner sep=3.5pt,rounded corners=4pt,minimum width=2em]
+\tikzstyle{circle} = [draw,black,line width=0.6pt,inner sep=3.5pt,rounded corners=4pt,minimum width=2em]
 \tikzstyle{word} = [inner sep=3.5pt]

 \node[circle,fill=red!20](data) at (0,0) {数据};
 \node[circle,fill=blue!20](model) at ([xshift=5em]data.east) {模型};
 \node[word] (init) at ([xshift=5em]model.east){初始化};

-\draw[->,very thick] (init.west) -- ([xshift=0.2em]model.east);
-\draw [->,very thick] ([yshift=1pt]data.north) .. controls +(90:2em) and +(90:2em) .. ([yshift=1pt]model.north) node[above,midway] {参数优化};
-\draw [->,very thick] ([yshift=1pt]model.south) .. controls +(-90:2em) and +(-90:2em) .. ([yshift=1pt]data.south) node[below,midway] {数据优化};
+\draw[->,thick] (init.west) -- ([xshift=0.2em]model.east);
+\draw [->,thick] ([yshift=1pt]data.north) .. controls +(90:2em) and +(90:2em) .. ([yshift=1pt]model.north) node[above,midway] {参数优化};
+\draw [->,thick] ([yshift=1pt]model.south) .. controls +(-90:2em) and +(-90:2em) .. ([yshift=1pt]data.south) node[below,midway] {数据优化};

 \node[word] at ([xshift=-0.5em,yshift=-5em]model.south){（b）思路2};


--- a/Chapter16/Figures/figure-parameter-initialization-method-diagram.tex
+++ b/Chapter16/Figures/figure-parameter-initialization-method-diagram.tex
@@ -24,7 +24,7 @@
 \draw[->,thick] (w4.-90) -- (encoder3.90);

 \node [anchor=north,single arrow,minimum height=2.2em,fill=blue!20,rotate=-90] (arrow1) at ([yshift=-1.4em,xshift=0.4em]encoder1.south) {};
-\node [anchor=north,single arrow,minimum height=2.2em,fill=blue!20,rotate=-90] (arrow2) at ([yshift=-1.4em,xshift=0.4em]encoder2.south) {};
+\node [anchor=north,single arrow,minimum height=2.2em,fill=red!20,rotate=-90] (arrow2) at ([yshift=-1.4em,xshift=0.4em]encoder2.south) {};
 \node [anchor=north,single arrow,minimum height=2.2em,fill=red!20,rotate=-90] (arrow3) at ([yshift=-1.4em,xshift=0.4em]encoder3.south) {};

 \node[anchor=south,yshift=3.4em] at (encoder1.north){\small\bfnew{父模型}};

--- a/Chapter16/Figures/figure-schematic-of-the-domain-discriminator.tex
+++ b/Chapter16/Figures/figure-schematic-of-the-domain-discriminator.tex
@@ -9,8 +9,8 @@
 \draw [->,thick](node1.east)--(node2.west);
 \draw [->,thick](node2.east)--([xshift=1.5em]node2.east)--([xshift=1.5em,yshift=2.0em]node2.east)--(node3.west);
 \draw [->,thick](node2.east)--([xshift=1.5em]node2.east)--([xshift=1.5em,yshift=-2.0em]node2.east)--(node4.west);
-\node [anchor=west](node5) at ([xshift=2.0em]node3.east) {目标语言};
-\node [anchor=west](node6) at ([xshift=2.0em]node4.east) {< 领域 >};
+\node [anchor=west,minimum width=5.0em](node5) at ([xshift=2.0em]node3.east) {目标语言};
+\node [anchor=west,minimum width=5.0em](node6) at ([xshift=2.0em]node4.east) {< 领域 >};
 \draw [->,thick](node3.east)--(node5.west);
 \draw [->,thick](node4.east)--(node6.west);
 \end{tikzpicture}
\ No newline at end of file
--- a/Chapter16/Figures/figure-unmt-process.tex
+++ b/Chapter16/Figures/figure-unmt-process.tex

 \begin{tikzpicture}
 \begin{scope}
-\tikzstyle{circle} = [draw,black,very thick,inner sep=3.5pt,rounded corners=4pt,minimum width=2em,align=center,fill=blue!20]
+\tikzstyle{circle} = [draw,black,line width=0.6pt,inner sep=3.5pt,rounded corners=4pt,minimum width=2em,align=center,fill=blue!20]
 \tikzstyle{word} = [inner sep=3.5pt]

 \node[circle](center) at (0,0) {
@@ -17,14 +17,14 @@ $x\rightarrow y$ & $y\rightarrow x$ \\

 \node[circle,fill=red!20] (down) at ([yshift=-8em]center.south) {$x,y$ \\ 数据};

-\draw[->,very thick] (init.south) -- ([yshift=0.2em]center.north);
-\draw[->,very thick] ([yshift=0.2em]down.north) -- ([yshift=-0.2em]center.south) node[pos=0.6,midway,align=left,xshift=-2.5em,yshift=0.5em] {语言模型\\目标函数};
+\draw[->,thick] (init.south) -- ([yshift=0.2em]center.north);
+\draw[->,thick] ([yshift=0.2em]down.north) -- ([yshift=-0.2em]center.south) node[pos=0.6,midway,align=left,xshift=-2.5em,yshift=0.5em] {语言模型\\目标函数};
 \node [anchor=center] at ([yshift=2.0em,xshift=-2.5em]down.north){（模型优化）};
-\draw[->,very thick] ([yshift=1pt]left.north) .. controls +(90:2em) and +(90:2em) .. ([yshift=1pt,xshift=-2.2em]center.north) node[above,midway,align=center] {翻译模型目标函数\\（模型优化）};
-\draw[->,very thick] ([yshift=1pt,xshift=-1.8em]center.north) .. controls +(90:2em) and +(90:2em) .. ([yshift=1pt]right.north) node[above,pos=0.6,align=center] {回译\\（数据优化）};
+\draw[->,thick] ([yshift=1pt]left.north) .. controls +(90:2em) and +(90:2em) .. ([yshift=1pt,xshift=-2.2em]center.north) node[above,midway,align=center] {翻译模型目标函数\\（模型优化）};
+\draw[->,thick] ([yshift=1pt,xshift=-1.8em]center.north) .. controls +(90:2em) and +(90:2em) .. ([yshift=1pt]right.north) node[above,pos=0.6,align=center] {回译\\（数据优化）};

-\draw [->,very thick] ([yshift=1pt]right.south) .. controls +(-90:2em) and +(-90:2em) .. ([yshift=1pt,xshift=2.2em]center.south) node[below,midway,align=center] {翻译模型目标函数\\（模型优化）};
-\draw [->,very thick] ([yshift=1pt,xshift=1.8em]center.south) .. controls +(-90:2em) and +(-90:2em) .. ([yshift=1pt]left.south) node[below,pos=0.6,align=center] {回译\\（数据优化）};
+\draw [->,thick] ([yshift=1pt]right.south) .. controls +(-90:2em) and +(-90:2em) .. ([yshift=1pt,xshift=2.2em]center.south) node[below,midway,align=center] {翻译模型目标函数\\（模型优化）};
+\draw [->,thick] ([yshift=1pt,xshift=1.8em]center.south) .. controls +(-90:2em) and +(-90:2em) .. ([yshift=1pt]left.south) node[below,pos=0.6,align=center] {回译\\（数据优化）};

 \end{scope}
 \end{tikzpicture}
--- a/Chapter16/Figures/figure-unsupervised-dual-learning-process.tex
+++ b/Chapter16/Figures/figure-unsupervised-dual-learning-process.tex
@@ -16,9 +16,9 @@

 \node[anchor=north,circle,fill=red!20,minimum width=6.8em](node2) at ([xshift=-6.0em,yshift=-2.0em]remark1.south) {源语言句子$\seq{x}$};
 \node[anchor=north,circle,fill=red!20,minimum width=6.8em](node2-2) at ([yshift=-0.2em]node2.south) {新生成句子$\seq{x'}$};
-\draw [->,thick]([yshift=0.2em]node2.north).. controls (-1.95,-1.5) and (-1.95,-0.2)..([xshift=-0.2em]remark1.west);
+\draw [->,thick]([yshift=0.2em]node2.north).. controls (-1.93,-1.5) and (-2.0,-0.2)..([xshift=-0.2em]remark1.west);
 \node[anchor=north,circle,fill=red!20](node3) at ([xshift=6.5em,yshift=-2.0em]remark1.south) {目标语言句子$\seq{x}$};
-\draw [->,thick]([xshift=0.2em]remark1.east).. controls (2.9,-0.2) and (2.9,-0.7) ..([yshift=0.2em]node3.north);
+\draw [->,thick]([xshift=0.2em]remark1.east).. controls (2.9,-0.25) and (2.9,-0.7) ..([yshift=0.2em]node3.north);


 \node [anchor=north] (node4-1) at ([xshift=-1.0em,yshift=-7.0em]remark1.south) {\small{\seq{y}}};

--- a/Chapter16/chapter16.tex
+++ b/Chapter16/chapter16.tex
@@ -233,12 +233,12 @@

 \parinterval 在训练一个神经网络的时候，往往会给定模型一个训练目标，希望模型通过不断训练在这个目标上表现地越来越好。我们希望模型在训练过程中可以自动提取到与训练目标相关的所有信息。然而，过分地关注单个训练目标，可能使模型忽略掉其他可能有帮助的信息，这些信息可能来自于一些其他相关的任务\upcite{DBLP:journals/corr/Ruder17a}。通过联合多个独立但相关的任务共同学习，任务之间相互``促进''，就是{\small\sffamily\bfnew{多任务学习}}\index{多任务学习}（Multitask Learning）\index{Multitask Learning}方法\upcite{DBLP:journals/corr/Ruder17a,DBLP:books/sp/98/Caruana98,liu2019multi}。多任务学习的常用做法是针对多个相关的任务，共享模型的部分参数来学习不同任务之间相似的特征，并通过特定的模块来学习每个任务独立的特征。常用的策略是对底层的模型参数进行共享，顶层的模型参数用于独立学习各个不同的任务，可以参考{\red{15.4.2小节多任务结构图}}。

-\parinterval 在神经机器翻译中，应用多任务学习的主要策略是将翻译任务作为主任务，同时设置一些仅使用单语数据的子任务，通过这些子任务来捕捉单语数据中的语言知识\upcite{DBLP:conf/emnlp/DomhanH17,DBLP:conf/emnlp/ZhangZ16,DBLP:journals/corr/LuongLSVK15}。一种多任务学习的方法是利用源语言单语数据，通过单个编码器对源语言数据进行建模，然后分别使用两个解码器来学习源语言排序和翻译任务。源语言排序任务是指对句子的顺序进行调整，可以通过单语数据来构造训练数据，从而使编码器训练地更加充分\upcite{DBLP:conf/emnlp/ZhangZ16}，如图所示。({\color{red}许： 加个图，单编码器，双解码器，可以参考论文\upcite{DBLP:conf/emnlp/ZhangZ16}，和下面的图一左一右应该就可以，两种多任务的方式})
+\parinterval 在神经机器翻译中，应用多任务学习的主要策略是将翻译任务作为主任务，同时设置一些仅使用单语数据的子任务，通过这些子任务来捕捉单语数据中的语言知识\upcite{DBLP:conf/emnlp/DomhanH17,DBLP:conf/emnlp/ZhangZ16,DBLP:journals/corr/LuongLSVK15}。一种多任务学习的方法是利用源语言单语数据，通过单个编码器对源语言数据进行建模，然后分别使用两个解码器来学习源语言排序和翻译任务。源语言排序任务是指对句子的顺序进行调整，可以通过单语数据来构造训练数据，从而使编码器训练地更加充分\upcite{DBLP:conf/emnlp/ZhangZ16}，如图\ref{fig:16-7-xc}所示。
 %----------------------------------------------
 \begin{figure}[htp]
    \centering
-    \input{./Chapter16/Figures/figure-examples-of-comparable-corpora}
-    \caption{占位置用}
+    \input{./Chapter16/Figures/figure-multitask-learning-in-machine-translation-1}
+    \caption{利用源语言单语数据的多任务学习}
    \label{fig:16-7-xc}
 \end{figure}
 %----------------------------------------------
@@ -248,8 +248,8 @@
 %----------------------------------------------
 \begin{figure}[htp]
 \centering
-\input{./Chapter16/Figures/figure-target-side-multi-task-learning}
-\caption{机器翻译中的多任务学习}
+\input{./Chapter16/Figures/figure-multitask-learning-in-machine-translation-2}
+\caption{利用语言模型的多任务学习}
 \label{fig:16-9-xc}
 \end{figure}
 %----------------------------------------------