16

61e7c918 · 曹润柘 · a7333a18 · 61e7c918 · 61e7c918
Commit 61e7c918 authored Jan 06, 2021 by 曹润柘
--- a/Chapter16/Figures/figure-data-based-domain-adaptation-approach.tex
+++ b/Chapter16/Figures/figure-data-based-domain-adaptation-approach.tex
+
+\begin{tikzpicture}[scale=0.8]
+\begin{scope}
+\tikzstyle{data} = [draw,black,very thick,inner sep=2pt,rounded corners=0pt,minimum width=2.5em,minimum height=1.5em,anchor=west]
+\tikzstyle{model} = [draw,black,very thick,inner sep=3.5pt,rounded corners=0pt,fill=red!20,minimum width=3em,minimum height=1.5em,font=\footnotesize]
+\tikzstyle{word} = [inner sep=3.5pt,align=left,font=\scriptsize]
+\tikzstyle{more} = [inner sep=2pt,rounded corners=0pt,minimum width=2.5em,minimum height=1.5em,anchor=west]
+
+\node[data,fill=blue!20] (one) at (0,0) {};
+\node[data,fill=green!20] (two) at ([]one.east) {};
+\node[data,fill=yellow!20] (three) at ([]two.east) {};
+
+\node[data,fill=blue!20,minimum width=1em] (one_) at ([yshift=-6em]one.south west) {};
+\node[data,fill=green!20,minimum width=4.5em] (two_) at ([yshift=0em]one_.east) {};
+\node[data,fill=yellow!20,minimum width=2em] (three_) at ([yshift=0em]two_.east) {};
+
+\node[model] (mo) at ([xshift=0.5em,yshift=-5em]two_.south){模型};
+
+\node[word] at ([xshift=-1.5em]one.west) {原始 \\ 数据};
+\node[word] at ([xshift=-1.5em]one_.west) {加权};
+
+\node[word,font=\tiny] at ([yshift=1em]one.north) {$(x_1,y_1)$};
+\node[word,font=\tiny] at ([yshift=1em]two.north) {$(x_2,y_2)$};
+\node[word,font=\tiny] at ([yshift=1em]three.north) {$(x_3,y_3)$};
+
+\node[word,font=\tiny] at ([yshift=1em]one_.north) {$(x_1,y_1)$};
+\node[word,font=\tiny] at ([yshift=1em]two_.north) {$(x_2,y_2)$};
+\node[word,font=\tiny] at ([yshift=1em]three_.north) {$(x_3,y_3)$};
+
+\draw [->,thick] ([yshift=-0.2em]one.south) .. controls +(south:2.5em) and +(north:2.5em) .. ([yshift=1.5em]one_.north);
+\draw [->,thick] ([yshift=-0.2em]two.south) .. controls +(south:2.5em) and +(north:2.5em) .. ([yshift=1.5em]two_.north);
+\draw [->,thick] ([yshift=-0.2em]three.south) .. controls +(south:2.5em) and +(north:2.5em) .. ([yshift=1.5em]three_.north);
+
+\draw [->,thick] ([xshift=0.5em,yshift=-0.2em]two_.south) -- ([yshift=0.2em]mo.north) node[pos=0.5,left,align=center,font=\footnotesize]{训练};
+
+\node[font=\small] at ([yshift=-4em]mo.south){(a)数据加权};
+\end{scope}
+
+\begin{scope}
+\tikzstyle{data} = [draw,black,very thick,inner sep=2pt,rounded corners=0pt,minimum width=2.5em,minimum height=1.5em,anchor=west]
+\tikzstyle{model} = [draw,black,very thick,inner sep=3.5pt,rounded corners=0pt,fill=red!20,minimum width=3em,minimum height=1.5em,font=\footnotesize]
+\tikzstyle{word} = [inner sep=3.5pt,align=left,font=\scriptsize]
+\tikzstyle{more} = [inner sep=2pt,rounded corners=0pt,minimum width=2.5em,minimum height=1.5em,anchor=west]
+
+\node[data,fill=blue!20] (one-2) at ([xshift=10.0em]one.east) {};
+\node[data,fill=green!20] (two-2) at ([]one-2.east) {};
+\node[data,fill=yellow!20] (three-2) at ([]two-2.east) {};
+
+\node[data,fill=blue!20] (one_-2) at ([yshift=-6em]one-2.south west) {};
+\node[data,fill=yellow!20] (three_-2) at ([yshift=0em]one_-2.east) {};
+
+\node[model] (mo-2) at ([xshift=1.7em,yshift=-5em]one_-2.south){模型};
+
+\node[word] at ([xshift=-1.5em]one-2.west) {原始 \\ 数据};
+\node[word] at ([xshift=-1.5em]one_-2.west) {选择};
+
+\node[word,font=\tiny] at ([yshift=1em]one-2.north) {$(x_1,y_1)$};
+\node[word,font=\tiny] at ([yshift=1em]two-2.north) {$(x_2,y_2)$};
+\node[word,font=\tiny] at ([yshift=1em]three-2.north) {$(x_3,y_3)$};
+
+\node[word,font=\tiny] at ([yshift=1em]one_-2.north) {$(x_1,y_1)$};
+\node[word,font=\tiny] at ([yshift=1em]three_-2.north) {$(x_3,y_3)$};
+
+\draw [->,thick] ([yshift=-0.2em]one-2.south) .. controls +(south:2.5em) and +(north:2.5em) .. ([yshift=1.5em]one_-2.north);
+\draw [->,thick] ([yshift=-0.2em]three-2.south) .. controls +(south:2.5em) and +(north:2.5em) .. ([yshift=1.5em]three_-2.north);
+
+\draw [->,thick] ([xshift=1.7em,yshift=-0.2em]one_-2.south) -- ([yshift=0.2em]mo-2.north) node[pos=0.5,left,align=center,font=\footnotesize]{训练};
+\node[font=\small] at ([yshift=-4em]mo-2.south){(b)数据选择};
+\end{scope}
+
+\begin{scope}
+\tikzstyle{data} = [draw,black,very thick,inner sep=2pt,rounded corners=0pt,minimum width=2.5em,minimum height=1.5em,anchor=west]
+\tikzstyle{model} = [draw,black,very thick,inner sep=3.5pt,rounded corners=0pt,fill=red!20,minimum width=3em,minimum height=1.5em,font=\footnotesize]
+\tikzstyle{word} = [inner sep=3.5pt,align=left,font=\scriptsize]
+\tikzstyle{more} = [inner sep=2pt,rounded corners=0pt,minimum width=2.5em,minimum height=1.5em,anchor=west]
+
+\node[data,fill=blue!20] (one-3) at ([xshift=10.0em]one-2.east) {};
+\node[data,fill=green!20] (two-3) at ([]one-3.east) {};
+\node[data,fill=yellow!20] (three-3) at ([]two-3.east) {};
+
+\node[data,fill=blue!20] (one_-3) at ([yshift=-6em]one-3.south west) {};
+\node[data,fill=green!20] (two_-3) at ([yshift=0em]one_-3.east) {};
+\node[data,fill=yellow!20] (three_-3) at ([yshift=0em]two_-3.east) {};
+\node[data,fill=black!10] (new_-3) at ([yshift=0em]three_-3.east) {};
+
+\node[model] (mo-3) at ([xshift=1.7em,yshift=-5em]two_-3.south){模型};
+
+\node[word] at ([xshift=-1.5em]one-3.west) {原始 \\ 数据};
+\node[word] at ([xshift=-1.5em]one_-3.west) {伪数据};
+
+\node[word,font=\tiny] at ([yshift=1em]one-3.north) {$(x_1,y_1)$};
+\node[word,font=\tiny] at ([yshift=1em]two-3.north) {$(x_2,y_2)$};
+\node[word,font=\tiny] at ([yshift=1em]three-3.north) {$(x_3,y_3)$};
+\node[word,font=\scriptsize] (monolingual-3) at ([xshift=5em]three-3.south) {$x_4$};
+
+\node[word,font=\tiny] at ([yshift=1em]one_-3.north) {$(x_1,y_1)$};
+\node[word,font=\tiny] at ([yshift=1em]two_-3.north) {$(x_2,y_2)$};
+\node[word,font=\tiny] at ([yshift=1em]three_-3.north) {$(x_3,y_3)$};
+\node[word,font=\tiny] at ([yshift=1em]new_-3.north) {{\red $(x_4,y_4^*)$}};
+
+\draw [->,thick] ([yshift=-0.2em]one-3.south) .. controls +(south:2.5em) and +(north:2.5em) .. ([yshift=1.5em]one_-3.north);
+\draw [->,thick] ([yshift=-0.2em]two-3.south) .. controls +(south:2.5em) and +(north:2.5em) .. ([yshift=1.5em]two_-3.north);
+\draw [->,thick] ([yshift=-0.2em]three-3.south) .. controls +(south:2.5em) and +(north:2.5em) .. ([yshift=1.5em]three_-3.north);
+\draw [->,thick] ([yshift=-0.2em]monolingual-3.south) .. controls +(south:2.5em) and +(north:2.5em) .. ([xshift=0.8em,yshift=1.6em]new_-3.north);
+
+\draw [->,thick] ([xshift=1.7em,yshift=-0.2em]two_-3.south) -- ([yshift=0.2em]mo-3.north) node[pos=0.5,left,align=center,font=\footnotesize]{训练};
+\node[font=\small] at ([yshift=-4em]mo-3.south){(c)伪数据};
+\end{scope}
+\end{tikzpicture}
--- a/Chapter16/chapter16.tex
+++ b/Chapter16/chapter16.tex
@@ -767,7 +767,16 @@
 \label{sec:domain-adaptatoin-data-based-method}

 \parinterval 在统计机器翻译时代，如何有效利用外部数据来改善目标领域的翻译效果已经备受关注
-。其中的绝大多数思想和方法和翻译模型无关，因此这些方法也同样适用于神经机器翻译。基于数据的领域适应可以分为基于数据加权的方法、基于数据选择的方法、基于伪数据的方法和使用多领域数据。图XX展示了这几种方法的示意图。
+。其中的绝大多数思想和方法和翻译模型无关，因此这些方法也同样适用于神经机器翻译。基于数据的领域适应可以分为基于数据加权的方法、基于数据选择的方法、基于伪数据的方法和使用多领域数据。图\ref{fig:16-19-2}展示了这几种方法的示意图。
+
+%----------------------------------------------
+\begin{figure}[h]
+\centering
+\input{Chapter16/Figures/figure-data-based-domain-adaptation-approach}
+\caption{基于数据的领域适应方法}
+\label{fig:16-19-2}
+\end{figure}
+%----------------------------------------------

 %----------------------------------------------------------------------------------------
 %    NEW SUB-SUB-SECTION
@@ -787,8 +796,6 @@

 \parinterval 数据选择方法可以从源领域中选择出和目标领域相似的训练数据用于训练，但可用的数据是较为有限的。因此，另外一种思路是对现有的双语数据进行修改\upcite{DBLP:conf/acl/UtiyamaI03}（如抽取双语短语对等）或通过单语数据生成伪数据来增加数据量\upcite{DBLP:conf/wmt/BertoldiF09}。这个问题和\ref{effective-use-of-data}小节所面临的场景是基本一致的，因此可以直接复用\ref{effective-use-of-data}小节所描述的方法。

-{\color{red} 图XX：基于数据的领域适应方法，三个子图分别描述上面三个方法，找孟霞、曾信讨论}
-
 %----------------------------------------------------------------------------------------
 %    NEW SUB-SUB-SECTION
 %----------------------------------------------------------------------------------------