Commit 9eb3620a by 孟霞

合并分支 'master' 到 'mengxia'

Master

查看合并请求 !812
parents 5cef847e b2f1d7cc
\begin{tikzpicture}
\tikzstyle{node1}=[inner sep=0mm,minimum height=1em,minimum width=3em,fill=ugreen!10!blue!10]
\tikzstyle{node2}=[inner sep=0mm,minimum height=1em,minimum width=3em,fill=ugreen!20!blue!20]
\tikzstyle{node3}=[inner sep=0mm,minimum height=1em,minimum width=3em,fill=ugreen!30!blue!30]
\tikzstyle{node4}=[inner sep=0mm,minimum height=1em,minimum width=3em,fill=ugreen!40!blue!40]
\tikzstyle{node5}=[inner sep=0mm,minimum height=1em,minimum width=3em,fill=ugreen!50!blue!50]
\tikzstyle{node1}=[inner sep=0mm,minimum height=1.5em,minimum width=3em,fill=ugreen!10!blue!10]
\tikzstyle{node2}=[inner sep=0mm,minimum height=1.5em,minimum width=3em,fill=ugreen!20!blue!20]
\tikzstyle{node3}=[inner sep=0mm,minimum height=1.5em,minimum width=3em,fill=ugreen!30!blue!30]
\tikzstyle{node4}=[inner sep=0mm,minimum height=1.5em,minimum width=3em,fill=ugreen!40!blue!40]
\tikzstyle{node5}=[inner sep=0mm,minimum height=1.5em,minimum width=3em,fill=ugreen!50!blue!50]
\begin{scope}
\node[anchor=north west] (l1) at (0,0) {};
\node[anchor=west] (l2) at ([xshift=10.3em,yshift=0em]l1.east) {};
\draw [->,thick] ([xshift=-2em,yshift=-1em]l1.south) -- ([xshift=2em,yshift=-1em]l2.south);
\node[anchor=north,node1] (c1) at ([xshift=0em,yshift=-2em]l1.south) {};
\node[anchor=west,node2] (c2) at ([xshift=0em,yshift=0em]c1.east) {};
\node[anchor=west,node3] (c3) at ([xshift=0em,yshift=0em]c2.east) {};
\node[anchor=west,node4] (c4) at ([xshift=0em,yshift=0em]c3.east) {};
\node[anchor=west,node5] (c5) at ([xshift=0em,yshift=0em]c4.east) {};
\node[anchor=north] (lb1) at ([xshift=0em,yshift=-1.5em]c3.south) {(a)样本难易程度图示};
\end{scope}
\begin{scope}[yshift = -1.7in]
\foreach \i / \j / \z in
{0/0/node1, 1/0/node1, 2/0/node1, 3/0/node1, 4/0/node1, 5/0/node1, 6/0/node1,
1/1/node2, 2/1/node2, 3/1/node2, 4/1/node2, 5/1/node2, 6/1/node2,
2/2/node3, 3/2/node3, 4/2/node3, 5/2/node3, 6/2/node3,
3/3/node4, 4/3/node4, 5/3/node4, 6/3/node4,
4/4/node5, 5/4/node5, 6/4/node5,
{0/0/node1,
1/0/node1, 1/1/node2,
2/0/node1, 2/1/node2, 2/2/node3,
3/0/node1, 3/1/node2, 3/2/node3, 3/3/node4,
4/0/node1, 4/1/node2, 4/2/node3, 4/3/node4, 4/4/node5,
5/0/node1, 5/1/node2, 5/2/node3, 5/3/node4, 5/4/node5,
}
\node[anchor=north west,\z,draw=white](n\i\j) at (1.0*3em*\j + 0*0em,-1.0*1em*\i + 0*0em){};
\node[anchor=south west,\z,draw=white](n\i\j) at (1.0*3.2em*\i + 0*0em,1.5*1em*\j + 0*0em){};
\node[anchor=south west,node1,draw=white](nc) at (0,0){};
\node[anchor=north west,node1,draw=white](nc) at (0,0){};
\draw [->,very thick] ([xshift=-1em,yshift=-1em]n00.south west) -- ([xshift=19.7em,yshift=-1em]n00.south west);
\draw [->,very thick] ([xshift=-1em,yshift=-1em]n00.south west) -- ([xshift=-1em,yshift=9em]n00.south west);
\draw [->,very thick] ([xshift=-1em,yshift=1em]n00.north west) -- ([xshift=16em,yshift=1em]n00.north west);
\draw [->,very thick] ([xshift=-1em,yshift=1em]n00.north west) -- ([xshift=-1em,yshift=-9em]n00.north west);
\node[anchor=east] (x1) at ([xshift=-1em,yshift=8em]n00.south west) {数据块};
\node[anchor=north west] (y1) at ([xshift=14.5em,yshift=-1.3em]n00.south west) {训练时长};
\node[anchor=west,font=\small] (t1) at ([xshift=0.5em,yshift=0em]n52.east) {$\cdots$};
\node[anchor=west] (x1) at ([xshift=12em,yshift=2em]n00.north west) {数据块};
\node[anchor=east] (y1) at ([xshift=-1em,yshift=-7.5em]n00.north west) {轮次};
\node[anchor=west,font=\small] (t1) at ([xshift=0em,yshift=-1em]n60.south west) {$\ldots$直到模型收敛};
\node[anchor=west,node1,minimum width=0.5em] (c1) at ([xshift=3em,yshift=-2.5em]t1.east) {};
\node[anchor=south,node2,minimum width=0.5em] (c2) at ([xshift=0em,yshift=0em]c1.north) {};
\node[anchor=south,node3,minimum width=0.5em] (c3) at ([xshift=0em,yshift=0em]c2.north) {};
\node[anchor=south,node4,minimum width=0.5em] (c4) at ([xshift=0em,yshift=0em]c3.north) {};
\node[anchor=south,node5,minimum width=0.5em] (c5) at ([xshift=0em,yshift=0em]c4.north) {};
\node[anchor=north] (lb2) at ([xshift=0em,yshift=-3em]n62.south) {(b)不同训练阶段使用的数据};
\node[anchor=north] (l1) at ([xshift=0em,yshift=-0.3em]c1.south) {};
\node[anchor=south] (l2) at ([xshift=0em,yshift=0.3em]c5.north) {};
\end{scope}
......
......@@ -4,7 +4,7 @@
\def\nodespace{1}
\def\picturespace{0.8}
\tikzstyle{neuronnode} = [minimum size=1.8em,circle,draw,very thick,ublue,inner sep=0pt, fill=white,align=center]
\tikzstyle{neuronnode} = [minimum size=1.8em,circle,draw=ublue,very thick,inner sep=0pt, fill=white,align=center]
%standard
\node [neuronnode] (neuron_b) at (0,0) {\scriptsize{$b^{l}$}};
......@@ -14,9 +14,9 @@
\node [neuronnode] (neuron_z) at (1.2 * \nodespace,-1.5 * \neuronsep) {\scriptsize{$z_{i}^{l+1}$}};
\node [neuronnode] (neuron_y') at (2.4 * \nodespace,-1.5 * \neuronsep) {\scriptsize{$x_{i}^{l+1}$}};
\node [anchor=north,ublue] (standard) at ([yshift=-4em]neuron_z.south) {\scriptsize{标准网络}};
\node [ublue] (standard) at ([xshift=-1em]neuron_z.west) {\scriptsize{$\mathbf{w}_{i}^{l}$}};
\node [ublue] (standard) at ([xshift=0.6em,yshift=0.3em]neuron_z.east) {\scriptsize{$f$}};
\node [anchor=north] (standard) at ([yshift=-4em]neuron_z.south) {\scriptsize{标准网络}};
\node [] (standard) at ([xshift=-1em]neuron_z.west) {\scriptsize{$\mathbf{w}_{i}^{l}$}};
\node [] (standard) at ([xshift=0.6em,yshift=0.3em]neuron_z.east) {\scriptsize{$f$}};
\draw [->,line width=0.3mm] (neuron_b.east) -- (neuron_z.west);
\draw [->,line width=0.3mm] (neuron_y3.east) -- (neuron_z.west);
......@@ -40,9 +40,9 @@
\node [neuronnode] (drop_neuron_r2) at (4.4*\nodespace,-1.5*\neuronsep) {\scriptsize{$r_{2}^{l}$}};
\node [neuronnode] (drop_neuron_r1) at (4.4*\nodespace,-2.5*\neuronsep) {\scriptsize{$r_{1}^{l}$}};
\node [anchor=north,ublue] (standard) at ([xshift=2em,yshift=-4em]drop_neuron_z.south) {\scriptsize{应用Dropout后的网络}};
\node [ublue] (standard) at ([xshift=-1em]drop_neuron_z.west) {\scriptsize{$\mathbf{w}_{i}^{l}$}};
\node [ublue] (standard) at ([xshift=0.6em,yshift=0.3em]drop_neuron_z.east) {\scriptsize{$f$}};
\node [anchor=north] (standard) at ([xshift=2em,yshift=-4em]drop_neuron_z.south) {\scriptsize{应用Dropout后的网络}};
\node [] (standard) at ([xshift=-1em]drop_neuron_z.west) {\scriptsize{$\mathbf{w}_{i}^{l}$}};
\node [] (standard) at ([xshift=0.6em,yshift=0.3em]drop_neuron_z.east) {\scriptsize{$f$}};
%structure
\draw [->,line width=0.3mm] (drop_neuron_b.east) -- (drop_neuron_z.west);
\draw [->,line width=0.3mm] (drop_neuron_y3'.east) -- (drop_neuron_z.west);
......
......@@ -54,7 +54,7 @@
\draw [decorate,decoration={brace}] ([yshift=0.1cm]prob1.north west) to node [midway,above,font=\small] {学习目标(Teacher输出)} ([yshift=0.1cm]prob4.north east);
% Vocab
\node [word,anchor=center] () at ([xshift=-0.9cm]prob11.center) {EOS};
\node [word,anchor=center] () at ([xshift=-0.9cm]prob11.center) {$\langle$eos$\rangle$};
\node [word,anchor=center] () at ([xshift=-0.9cm]prob21.center) {I};
\node [word,anchor=center] () at ([xshift=-0.9cm]prob31.center) {am};
\node [word,anchor=center] () at ([xshift=-0.9cm]prob41.center) {fine};
......@@ -72,7 +72,7 @@
\draw [-latex,thick] (prob\i.south) to ([yshift=-0.5cm]prob\i.south);
% Input
\node [word,anchor=south] (input1) at ([yshift=-1.8cm]prob1.south) {EOS};
\node [word,anchor=south] (input1) at ([yshift=-1.8cm]prob1.south) {$\langle$eos$\rangle$};
\node [word,anchor=south] (input2) at ([yshift=-1.8cm]prob2.south) {I};
\node [word,anchor=south] (input3) at ([yshift=-1.8cm]prob3.south) {am};
\node [word,anchor=south] (input4) at ([yshift=-1.8cm]prob4.south) {fine};
......@@ -137,7 +137,7 @@
\draw [decorate,decoration={brace}] ([yshift=0.1cm]prob1.north west) to node [midway,above,font=\small] {学习目标(Teacher输出)} ([yshift=0.1cm]prob4.north east);
% Vocab
\node [word,anchor=center] () at ([xshift=-0.9cm]prob11.center) {EOS};
\node [word,anchor=center] () at ([xshift=-0.9cm]prob11.center) {$\langle$eos$\rangle$};
\node [word,anchor=center] () at ([xshift=-0.9cm]prob21.center) {I};
\node [word,anchor=center] () at ([xshift=-0.9cm]prob31.center) {am};
\node [word,anchor=center] () at ([xshift=-0.9cm]prob41.center) {fine};
......@@ -155,7 +155,7 @@
\draw [-latex,thick] (prob\i.south) to ([yshift=-0.5cm]prob\i.south);
% Input
\node [word,anchor=south] (input1) at ([yshift=-1.8cm]prob1.south) {EOS};
\node [word,anchor=south] (input1) at ([yshift=-1.8cm]prob1.south) {$\langle$eos$\rangle$};
\node [word,anchor=south] (input2) at ([yshift=-1.8cm]prob2.south) {I};
\node [word,anchor=south] (input3) at ([yshift=-1.8cm]prob3.south) {am};
\node [word,anchor=center] (input4) at ([xshift=1cm]input3.center) {good};
......
\begin{tikzpicture}
\node[font=\scriptsize,align=left] (model) at (0,0) {模型输出:\\(未使用\\标签平滑)};
\node[anchor=north west,font=\scriptsize,align=left] (label_smooth) at ([yshift=-0.3em]model.south west) {模型输出:\\(使用标\\签平滑)};
\node[anchor=south west,font=\scriptsize] (one-hot) at ([yshift=1em]model.north west) {One-hot分布:};
\node[font=\scriptsize,align=left] (model) at (0,0) {模型输出:};
\node[anchor=north west,font=\scriptsize,align=left] (label_smooth) at ([yshift=-1em]model.south west) {学习目标:\\(使用标\\签平滑)};
\node[anchor=south west,font=\scriptsize,align=left] (one-hot) at ([yshift=1em]model.north west) {标准答案:\\(未使用\\标签平滑)};
%model out
\node [anchor=west,minimum width=1.2em,minimum height=0.2em,fill=ublue!80,inner sep=0pt] (model_label1) at ([xshift=1.5em,yshift=-0.8em]model.east) {};
\node [anchor=west,minimum width=1.2em,minimum height=0.2em,fill=ublue!80,inner sep=0pt] (model_label1) at ([xshift=1.5em,yshift=-0.4em]model.east) {};
\node [anchor=south,font=\scriptsize] (model_w1) at (model_label1.north) {$p_{1}$};
\node [anchor=south west,minimum width=1.2em,minimum height=0.1em,fill=ublue!80,inner sep=0pt] (model_label2) at (model_label1.south east) {};
\node [anchor=south,font=\scriptsize] (model_w2) at (model_label2.north) {$p_{2}$};
......@@ -59,14 +59,14 @@
\node[font=\scriptsize] (line2) at ([xshift=5.9em,yshift=3em]model_label7.east) {$Loss =-\log p_{3}$};
\begin{pgfonlayer}{background}
\node [rectangle,inner sep=0.5em,rounded corners=1pt,very thick,dotted,draw=red] [fit =(model_w3) (model_label1) (model_label7)] (box1) {};
\node [rectangle,inner sep=0.5em,rounded corners=1pt,very thick,dotted,draw=red] [fit =(model_w3) (model_label1) (model_label7) (one_hot_w3)] (box1) {};
\node [rectangle,inner sep=0.2em,rounded corners=1pt,fill=purple!10,drop shadow,draw=purple] [fit = (line2)] (box3) {};
\draw [->,dotted,very thick,red] ([yshift=-0.5em]box1.east) .. controls +(east:1) and +(west:1) .. (box3.west);
\draw [->,dotted,very thick,red] ([yshift=1em]box1.east) .. controls +(east:1) and +(west:1) .. (box3.west);
\node [rectangle,inner sep=0.5em,rounded corners=1pt,very thick,dotted,draw=ugreen] [fit =(w3) (label1) (label7) ] (box2) {};
\node [rectangle,inner sep=0.7em,rounded corners=1pt,very thick,dotted,draw=ugreen] [fit =(w3) (label1) (label7) (model_w3)] (box2) {};
\node [rectangle,inner sep=0.2em,rounded corners=1pt,fill=green!10,drop shadow,draw=ugreen] [fit = (line1)] (box4) {};
\draw [->,dotted,very thick,ugreen] ([yshift=-0.5em]box2.east) .. controls +(east:1) and +(west:1) .. (box4.west);
\draw [->,dotted,very thick,ugreen] ([yshift=-1.5em]box2.east) .. controls +(east:1) and +(west:1) .. (box4.west);
\end{pgfonlayer}
......
......@@ -100,7 +100,7 @@
\draw [->,thick] ([yshift=-0.2em]one-3.south) .. controls +(south:2.5em) and +(north:2.5em) .. ([yshift=1.5em]one_-3.north);
\draw [->,thick] ([yshift=-0.2em]two-3.south) .. controls +(south:2.5em) and +(north:2.5em) .. ([yshift=1.5em]two_-3.north);
\draw [->,thick] ([yshift=-0.2em]three-3.south) .. controls +(south:2.5em) and +(north:2.5em) .. ([yshift=1.5em]three_-3.north);
\draw [->,thick] ([yshift=-0.0em]monolingual-3.south) .. controls +(south:2.5em) and +(north:2.5em) .. ([xshift=0.8em,yshift=1.7em]new_-3.north) node[pos=0.5,left,align=center,font=\tiny]{解码};
\draw [->,thick] ([yshift=-0.0em]monolingual-3.south) .. controls +(south:2.5em) and +(north:2.5em) .. ([xshift=0.8em,yshift=1.7em]new_-3.north) node[pos=0.5,left,align=center,font=\tiny]{翻译};
\draw [->,thick] ([xshift=1.7em,yshift=-0.2em]two_-3.south) -- ([yshift=0.2em]mo-3.north) node[pos=0.5,left,align=center,font=\footnotesize]{训练};
\node[font=\small] at ([yshift=-4em]mo-3.south){(c)伪数据};
......@@ -110,13 +110,13 @@
\node [rectangle,rounded corners=1pt,fill=cyan!10] [fit = (w4-3) (new_-3)] (box2) {};
\end{pgfonlayer}
\node[word,draw=orange!50,dotted,very thick,inner sep=2.5pt] (realdata-3) at ([xshift=-4.5em,yshift=-2em]box1.south) {真实数据};
\node[word,draw=cyan!50,dotted,very thick,inner sep=2.5pt] (fake-3) at ([xshift=1em,yshift=-2em]box2.south) {伪数据};
\node[word,draw,dotted,very thick,inner sep=2.5pt] (monodata-3) at ([xshift=-0.5em,yshift=2em]monolingual-3.north) {单语数据};
\node[word,draw=orange!50,dotted,very thick,inner sep=2.5pt] (realdata-3) at ([xshift=-3.5em,yshift=-2em]box1.south) {真实数据};
\node[word,draw=cyan!50,dotted,very thick,inner sep=2.5pt] (fake-3) at ([xshift=0em,yshift=-2em]box2.south) {伪数据};
\node[word,draw,dotted,very thick,inner sep=2.5pt] (monodata-3) at ([xshift=0em,yshift=2em]monolingual-3.north) {单语数据};
\draw[->,dotted,very thick] ([yshift=0.0em]monolingual-3.north)-- ([yshift=-0.2em,xshift=0.45em]monodata-3.south);
\draw[->,dotted,very thick,cyan] (box2.south) -- ([xshift=-1em,yshift=0.2em]fake-3.north);
\draw[->,dotted,very thick,orange] ([xshift=-3.5em]box1.south) -- ([xshift=1em,yshift=0.2em]realdata-3.north);
\draw[->,dotted,very thick] ([yshift=0.0em]monolingual-3.north)-- ([yshift=-0.2em,xshift=0.0em]monodata-3.south);
\draw[->,dotted,very thick,cyan] (box2.south) -- ([xshift=-0em,yshift=0.2em]fake-3.north);
\draw[->,dotted,very thick,orange] ([xshift=-3.5em]box1.south) -- ([xshift=0em,yshift=0.2em]realdata-3.north);
\end{scope}
\end{tikzpicture}
......@@ -6,32 +6,33 @@
\node [anchor=center] (node1-1) at (0,0) {\small{$y'$}};
\node [anchor=center] (node1-1) at (0,0) {\small{$y$}};
\node[anchor=north,rec,fill=blue!20](node1-2) at ([yshift=-2.0em]node1-1.south) {\small{解码器}};
\node[anchor=north,rec,fill=red!20](node1-3) at ([yshift=-2em]node1-2.south) {\small{编码器}};
\node[anchor=east](node1-5) at ([xshift=-2em]node1-2.west) {\small{$y$}};
\node[anchor=east](node1-5) at ([xshift=-2em]node1-2.west) {\small{$y_{<}$}};
\node[anchor=north](node1-4) at ([yshift=-2em]node1-3.south) {\small{$x$}};
\draw [->,thick](node1-4.north)--(node1-3.south);
\draw [->,thick](node1-5.east)--(node1-2.west);
\draw [->,thick](node1-3.north)--(node1-2.south);
\draw [->,thick](node1-2.north)--(node1-1.south);
\node [anchor=center] (node2-1) at ([xshift=12.0em]node1-1.east) {\small{$y'$}};
\node [anchor=center] (node2-1) at ([xshift=12.0em]node1-1.east) {\small{$y$}};
\node[anchor=north,rec,fill=blue!20](node2-2) at ([yshift=-2.0em]node2-1.south) {\small{解码器}};
\node[anchor=north,rec,fill=red!20](node2-3) at ([yshift=-2em]node2-2.south) {\small{编码器}};
\node[anchor=east](node2-5) at ([xshift=-2em]node2-2.west) {\small{$y$}};
\node[anchor=east](node2-5) at ([xshift=-2em]node2-2.west) {\small{$y_{<}$}};
\node[anchor=north](node2-4) at ([yshift=-2em]node2-3.south) {\small{$x$}};
\node[anchor=west,rec,fill=yellow!20](node2-6) at ([xshift=3.0em]node2-3.east) {\small{解码器}};
\node[anchor=south](node2-7) at ([yshift=2em]node2-6.north) {\small{$x'$}};
\node[anchor=south](node2-7) at ([yshift=2em]node2-6.north) {\small{$\hat{x}$}};
\draw [->,thick](node2-4.north)--(node2-3.south);
\draw [->,thick](node2-5.east)--(node2-2.west);
\draw [->,thick](node2-3.north)--(node2-2.south)node[pos=0.5,left,font=\scriptsize]{翻译};
\draw [->,thick](node2-2.north)--(node2-1.south);
\draw [->,thick](node2-3.east)--(node2-6.west)node[pos=0.5,above,font=\scriptsize]{重排序};
\draw [->,thick](node2-6.north)--(node2-7.south);
\draw [->,thick](node2-3.north)--(node2-2.south);
\draw [->,thick](node2-2.north)--(node2-1.south)node[pos=0.5,left,font=\scriptsize]{翻译};
\draw [->,thick](node2-3.east)--(node2-6.west);
\draw [->,thick](node2-6.north)--(node2-7.south)node[pos=0.5,left,font=\scriptsize]{调整语序};
\node [anchor=east] (node1) at ([xshift=-2.0em]node1-1.west) {\small{$x,y$:双语数据}};
\node [anchor=south] (node2) at ([xshift=1.96em]node1.north) {\small{$y_{<}$:目标语言文本数据}};
\node [anchor=north](pos1) at ([yshift=0em]node1-4.south) {\small{(a)单任务学习}};
\node [anchor=west](pos2) at ([xshift=10.0em]pos1.east) {\small{(b)多任务学习}};
......
\begin{tikzpicture}
\begin{scope}
\node [anchor=center] (node1-1) at (0,0) {\small{$y'$}};
\node[anchor=south,line width=0.6pt,draw,rounded corners,minimum height=1.5em,minimum width=4.3em,fill=blue!20](node1-2) at ([yshift=-3em]node1-1.south) {\small{Softmax}};
\node [anchor=center] (node1-1) at (0,0) {\small{$y$}};
\node[anchor=north,line width=0.6pt,draw,rounded corners,minimum height=2.2em,minimum width=4.3em,fill=blue!20](node1-3) at ([yshift=-2.0em]node1-2.south) {\small{解码器}};
\node[anchor=north,line width=0.6pt,draw,rounded corners,minimum height=2.2em,minimum width=4.3em,fill=blue!20](node1-3) at ([yshift=-2.0em]node1-1.south) {\small{解码器}};
\node[anchor=north,line width=0.6pt,draw,rounded corners,minimum height=2.2em,minimum width=4.3em,fill=yellow!20](node3-3) at ([yshift=-2.0em]node1-3.south) {\small{语言模型}};
\node [anchor=west] (node3-1) at ([xshift=4.0em]node3-3.east) {\small{$z$}};
\node[anchor=west,line width=0.6pt,draw,rounded corners,minimum height=1.5em,minimum width=4.3em,fill=blue!20](node3-2) at ([xshift=2em]node3-3.east) {\small{Softmax}};
\node [anchor=north] (node3-1) at ([yshift=3.0em]node3-2.north) {\small{$z'$}};
\node[anchor=north](node3-41) at ([xshift=-0.6em,yshift=-2em]node3-3.south) {\small{$y$}};
\node[anchor=north](node3-42) at ([xshift=0.6em,yshift=-2em]node3-3.south) {\small{$z$}};
\node[anchor=north](node3-41) at ([yshift=-2em]node3-3.south) {\small{$y_{<}+z_{<}$}};
\node[anchor=east,line width=0.6pt,draw,rounded corners,minimum height=2.2em,minimum width=4.3em,fill=red!20](node2-1) at ([xshift=-2em]node1-3.west) {\small{编码器}};
\node[anchor=north](node2-2) at ([yshift=-2em]node2-1.south) {\small{$x$}};
\node [rectangle,rounded corners,draw=red,line width=0.2mm,densely dashed,inner sep=0.4em] [fit = (node3-2) (node3-3)] (inputshadow) {};
\draw [->,thick](node1-3.north)--(node1-2);
\draw [->,thick](node1-2.north)--(node1-1);
\node [rectangle,rounded corners,draw=red,line width=0.2mm,densely dashed,inner sep=0.4em] [fit = (node3-1) (node3-3)] (inputshadow) {};
\draw [->,thick](node1-3.north)--(node1-1)node[pos=0.5,left,font=\scriptsize]{Softmax};
\draw [->,thick](node2-2.north)--(node2-1);
\draw[->,thick](node2-1.east)--(node1-3.west);
\draw [->,thick](node3-41.north)--([xshift=-0.6em]node3-3.south);
\draw [->,thick](node3-42.north)--([xshift=0.6em]node3-3.south);
\draw [->,thick](node3-41.north)--(node3-3.south);
\draw [->,thick](node3-3.north)--(node1-3.south);
\draw [->,thick](node3-2.north)--(node3-1);
\draw[->,thick](node3-3.east)--(node3-2.west);
\draw[->,thick](node3-3.east)--(node3-1.west)node[pos=0.5,above,font=\scriptsize]{Softmax};
\node [anchor=east] (node2-1-1) at ([xshift=-12.0em,yshift=-4.25em]node1-1.west) {\small{$y'$}};
\node[anchor=south,line width=0.6pt,draw,rounded corners,minimum height=1.5em,minimum width=4.3em,fill=blue!20](node2-1-2) at ([yshift=-3em]node2-1-1.south) {\small{Softmax}};
\node[anchor=north,line width=0.6pt,draw,rounded corners,minimum height=2.2em,minimum width=4.3em,fill=blue!20](node2-1-3) at ([yshift=-2.0em]node2-1-2.south) {\small{解码器}};
\node [anchor=east] (node2-1-1) at ([xshift=-12.0em,yshift=-4.25em]node1-1.west) {\small{$y$}};
\node[anchor=north,line width=0.6pt,draw,rounded corners,minimum height=2.2em,minimum width=4.3em,fill=blue!20](node2-1-3) at ([yshift=-2.0em]node2-1-1.south) {\small{解码器}};
\node[anchor=east,line width=0.6pt,draw,rounded corners,minimum height=2.2em,minimum width=4.3em,fill=red!20](node2-2-1) at ([xshift=-2em]node2-1-3.west) {\small{编码器}};
\node[anchor=north](node2-2-2) at ([yshift=-2em]node2-2-1.south) {\small{$x$}};
\node[anchor=north](node2-2-3) at ([yshift=-2em]node2-1-3.south) {\small{$y$}};
\node[anchor=north](node2-2-3) at ([yshift=-2em]node2-1-3.south) {\small{$y_{<}$}};
\draw [->,thick](node2-1-2.north)--(node2-1-1);
\draw [->,thick](node2-2-2.north)--(node2-2-1);
\draw[->,thick](node2-2-1.east)--(node2-1-3.west);
\draw [->,thick](node2-1-3.north)--(node2-1-2.south);
\draw [->,thick](node2-1-3.north)--(node2-1-1)node[pos=0.5,left,font=\scriptsize]{Softmax};
\draw [->,thick](node2-2-3.north)--(node2-1-3);
\node [anchor=east] (node1) at ([xshift=-2.0em,yshift=4em]node2-1-1.west) {\small{$x,y$:双语数据}};
\node [anchor=east] (node1) at ([xshift=-2.0em,yshift=3em]node2-1-1.west) {\small{$x,y$:双语数据}};
\node [anchor=south] (node3) at ([xshift=1.96em]node1.north) {\small{$y_{<}$:目标语言文本数据}};
\node [anchor=north] (node2) at ([xshift=0.45em]node1.south) {\small{$z$}:单语数据};
\node [anchor=north](pos1) at ([yshift=-3.5em]node3-3.south) {\small{(b)多任务学习}};
......
......@@ -14,7 +14,7 @@
\draw [->,thick] ([yshift=1pt]data.north) .. controls +(90:2em) and +(90:2em) .. ([yshift=1pt]model.north) node[above,midway] {\small{参数优化}};
\draw [->,thick] ([yshift=1pt]model.south) .. controls +(-90:2em) and +(-90:2em) .. ([yshift=1pt]data.south) node[below,midway] {\small{数据优化}};
\node[word] at ([xshift=-0.5em,yshift=-4em]data.south){\small{(a) 基于数据的初始化方法}};
\node[word] at ([xshift=-0.5em,yshift=-4em]data.south){\small{(a) 基于数据的初始化}};
\end{scope}
\end{tikzpicture}
......@@ -33,7 +33,7 @@
\draw [->,thick] ([yshift=1pt]data.north) .. controls +(90:2em) and +(90:2em) .. ([yshift=1pt]model.north) node[above,midway] {\small{参数优化}};
\draw [->,thick] ([yshift=1pt]model.south) .. controls +(-90:2em) and +(-90:2em) .. ([yshift=1pt]data.south) node[below,midway] {\small{数据优化}};
\node[word] at ([xshift=-0.5em,yshift=-4em]model.south){\small{(b) 基于模型的初始化方法}};
\node[word] at ([xshift=-0.5em,yshift=-4em]model.south){\small{(b) 基于模型的初始化}};
\end{scope}
\end{tikzpicture}
......
......@@ -5,35 +5,33 @@
\tikzstyle{node}=[rounded corners=4pt,draw,minimum height=3em,drop shadow,font=\footnotesize]
\node[node,minimum width=6em,minimum height=2.4em,fill=red!20,line width=0.6pt] (encoder1) at (0,0){\small 编码器};
\node[node,anchor=west,minimum width=6em,minimum height=2.4em,fill=red!20,line width=0.6pt] (encoder2) at ([xshift=4em,yshift=0em]encoder1.east){\small 编码器};
\node[node,anchor=west,minimum width=6em,minimum height=2.4em,fill=red!30,line width=0.6pt] (encoder3) at ([xshift=3em]encoder2.east){\small 编码器};
\node[node,anchor=west,minimum width=6em,minimum height=2.4em,fill=red!30,line width=0.6pt] (encoder2) at ([xshift=7em,yshift=0em]encoder1.east){\small 编码器};
\node[node,anchor=north,minimum width=6em,minimum height=2.4em,fill=blue!20,line width=0.6pt] (decoder1) at ([yshift=-2em]encoder1.south){\small 解码器};
\node[node,anchor=west,minimum width=6em,minimum height=2.4em,fill=blue!30,line width=0.6pt] (decoder2) at ([xshift=7em,yshift=0em]decoder1.east){\small 解码器};
\node[node,anchor=north,minimum width=6em,minimum height=2.4em,fill=blue!20,line width=0.6pt] (decoder1) at ([yshift=-3em]encoder1.south){\small 解码器};
\node[node,anchor=west,minimum width=6em,minimum height=2.4em,fill=blue!20,line width=0.6pt] (decoder2) at ([xshift=4em,yshift=0em]decoder1.east){\small 解码器};
\node[node,anchor=west,minimum width=6em,minimum height=2.4em,fill=blue!30,line width=0.6pt] (decoder3) at ([xshift=3em]decoder2.east){\small 解码器};
\node[anchor=north,font=\scriptsize,fill=yellow!20,drop shadow,draw] (w1) at ([yshift=-1.6em]decoder1.south){知识 \ 就是 \ 力量 \ \ <eos>};
\node[anchor=north,font=\scriptsize,fill=green!20,drop shadow,draw] (w3) at ([yshift=-1.6em]decoder3.south){El conocimiento es poder . <eos>};
\node[anchor=north,font=\scriptsize,fill=green!20,drop shadow,draw] (w3) at ([yshift=-1.6em]decoder2.south){El conocimiento es poder . <eos>};
\node[anchor=south,font=\scriptsize,fill=orange!20,drop shadow,draw] (w2) at ([yshift=1.6em]encoder1.north){Knowledge \ is \ power \ . };
\node[anchor=south,font=\scriptsize,fill=orange!20,drop shadow,draw] (w4) at ([yshift=1.6em]encoder3.north){Knowledge \ is \ power \ . };
\node[anchor=south,font=\scriptsize,fill=orange!20,drop shadow,draw] (w4) at ([yshift=1.6em]encoder2.north){Knowledge \ is \ power \ . };
\draw[->,thick] (decoder1.-90) -- (w1.north);
\draw[->,thick] (decoder3.-90) -- (w3.north);
\draw[->,thick] (decoder2.-90) -- (w3.north);
\draw[->,thick] (w2.-90) -- (encoder1.90);
\draw[->,thick] (w4.-90) -- (encoder3.90);
\draw[->,thick] (w4.-90) -- (encoder2.90);
\draw[->,thick](encoder1.south)--(decoder1.north);
\draw[->,thick](encoder2.south)--(decoder2.north);
\node [anchor=north,single arrow,minimum height=2.2em,fill=blue!20,rotate=-90] (arrow1) at ([yshift=-1.4em,xshift=0.4em]encoder1.south) {};
\node [anchor=north,single arrow,minimum height=2.2em,fill=red!20,rotate=-90] (arrow2) at ([yshift=-1.4em,xshift=0.4em]encoder2.south) {};
\node [anchor=north,single arrow,minimum height=2.2em,fill=red!20,rotate=-90] (arrow3) at ([yshift=-1.4em,xshift=0.4em]encoder3.south) {};
\node[anchor=south,yshift=3.4em] at (encoder1.north){\small\bfnew{父模型}};
\node[anchor=south,yshift=3.4em] at (encoder3.north){\small\bfnew{子模型}};
\node[anchor=south,yshift=3.4em] at (encoder2.north){\small\bfnew{子模型}};
\draw[->,dash pattern=on 3pt off 2pt,thick] ([yshift=0em]encoder1.0) -- node[above,font=\scriptsize]{参数复用}(encoder2.180);
\draw[->,dash pattern=on 3pt off 2pt,thick] (encoder2.0) -- node[above,font=\scriptsize]{微调}(encoder3.180);
\draw[->,dash pattern=on 3pt off 2pt,thick] ([yshift=0em]decoder1.0) -- node[above,font=\scriptsize]{参数复用}(decoder2.180);
\draw[->,dash pattern=on 3pt off 2pt,thick] (decoder2.0) -- node[above,font=\scriptsize]{微调}(decoder3.180);
\draw[->,dash pattern=on 3pt off 2pt,thick] ([yshift=0em]encoder1.0) -- node[above,font=\scriptsize]{参数复用\&微调}(encoder2.180);
\draw[->,dash pattern=on 3pt off 2pt,thick] ([yshift=0em]decoder1.0) -- node[above,font=\scriptsize]{参数复用\&微调}(decoder2.180);
\end{tikzpicture}
......
......@@ -4,8 +4,8 @@
\node(x)[]{$x$};
\node(encoder)[coder, above of = x,yshift=4em]{{编码器}};
\node(decoder_left)[coder, above of = encoder, yshift=6em,fill=blue!25]{{解码器}};
\node(y_hat)[above of = decoder_left, yshift=4em]{{$y'$}};
\node(y)[above of = decoder_left, xshift=-6em]{{$y$}};
\node(y_hat)[above of = decoder_left, yshift=4em]{{$y$}};
\node(y)[above of = decoder_left, xshift=-6em]{{$y_{<}$}};
\node(decoder_right)[coder, above of = encoder, xshift=11em,fill=yellow!25]{{解码器}};
\node(figure)[draw=white,above of = decoder_right,yshift=6.5em,scale=0.25] {\includegraphics[width=0.62\textwidth]{./Chapter17/Figures/figure-bank-without-attention.png}};
......
......@@ -15,7 +15,7 @@
\node[anchor=south,font=\footnotesize,inner sep=0pt] at ([yshift=0.1em]key.north){key};
\node[anchor=south,font=\footnotesize,inner sep=0pt] at ([yshift=0.2em]value.north){value};
\node[anchor=south,font=\footnotesize,inner sep=0pt] (cache)at ([yshift=2em,xshift=1.5em]key.north){\small\bfnew{Cache}};
\node[anchor=south,font=\footnotesize,inner sep=0pt] (cache)at ([yshift=2em,xshift=1.5em]key.north){\small\bfnew{缓存}};
\node[draw,anchor=east,minimum size=1.8em,fill=orange!15] (dt) at ([yshift=2.1em,xshift=-4em]key.west){${\mathbi{d}}_{t}$};
\node[anchor=north,font=\footnotesize] (readlab) at ([xshift=2.8em,yshift=0.3em]dt.north){\red{读取}};
......
......@@ -4,7 +4,7 @@
\foreach \x in {1,2,3,4}
\node[draw,inner sep=0pt,minimum height=1em,minimum width=1.6em,fill=red!30,rounded corners=1pt] (c1_\x) at (0em+2em*\x, 0em){};
\node[anchor=north] (hpre) at ([yshift=1.8em]c1_1.north) {${\mathbi{h}}^ {\textrm{pre}j}$};
\node[anchor=north] (hpre) at ([yshift=1.8em]c1_1.north) {${\mathbi{h}}^ {\textrm{pre}k}$};
\foreach \x in {1,2,3}
\node[draw,inner sep=0pt,minimum height=1em,minimum width=1.6em,fill=red!30,rounded corners=1pt] (c2_\x) at (11em+2em*\x, 0em){};
\node[anchor=north] (hpre) at ([yshift=1.8em]c2_1.north) {${\mathbi{h}}^ {\textrm{pre}1}$};
......@@ -29,7 +29,7 @@
\node[draw,fill=orange!30,inner sep=0pt, minimum size=1.2em, circle] (add3) at (10em, 5.2em){};
\node[anchor=north] (cond) at ([xshift=-1em,yshift=0.5em]add3.north) {${\mathbi{d}}$};
\node[anchor=north] (cons1) at ([xshift=-1em,yshift=0.5em]add2.north) {${\mathbi{s}}^1$};
\node[anchor=north] (consj) at ([xshift=-1em,yshift=0.5em]add1.north) {${\mathbi{s}}^j$};
\node[anchor=north] (consj) at ([xshift=-1em,yshift=0.5em]add1.north) {${\mathbi{s}}^k$};
\begin{pgfonlayer}{background}
\node[draw,rounded corners=2pt,drop shadow,fill=white, minimum width=8.3em][fit=(c1_1)(c1_4)](box1){};
\node[draw,rounded corners=2pt,drop shadow,fill=white,minimum width=6.4em][fit=(c2_1)(c2_3)](box2){};
......@@ -67,8 +67,8 @@
\draw[->,thick] ([yshift=0.1em]sigma.90) -- ([yshift=-0.1em]n2.-90);
\draw[->,thick] ([yshift=0.1em]n2.90) -- node[right]{$ \widetilde{\mathbi{h}}_{\textrm{t}}$}([yshift=2em]n2.90);
\draw[decorate,decoration={brace, mirror},gray, thick] ([yshift=-2em]hh.-180) -- node[font=\scriptsize,text=black,below]{前几}([yshift=-2em]box2.0);
\draw[decorate,decoration={brace, mirror},gray, thick] ([yshift=-2em]box3.-180) -- node[font=\scriptsize,text=black,below]{当前句}([yshift=-2em]box3.0);
\draw[decorate,decoration={brace, mirror},gray, thick] ([yshift=-2em]hh.-180) -- node[font=\scriptsize,text=black,below]{前几个句子}([yshift=-2em]box2.0);
\draw[decorate,decoration={brace, mirror},gray, thick] ([yshift=-2em]box3.-180) -- node[font=\scriptsize,text=black,below]{当前句}([yshift=-2em]box3.0);
\draw[->, thick, rounded corners=2pt] ([yshift=0.1em]add3.90) -- ([yshift=2.1em]add3.90) -- ([xshift=-0.1em]sigma.180);
......
......@@ -30,8 +30,8 @@
\node[anchor=north,font=\Large](x) at ([xshift=2.5em,yshift=-3.4em]encoder.south){$\seq{x}$};
\node[draw,anchor=south,rounded corners=2pt,minimum width=4.0cm,minimum height=2.2em,fill=blue!20](decoder) at ([yshift=2.6em,xshift=2.2em]cnn2.north){解码器};
\node[anchor=north,font=\Large](y) at ([xshift=2.5em,yshift=-3.4em]decoder.south){$\seq{y}$};
\node[anchor=south,font=\Large](y_1) at ([yshift=3em]decoder.north){$\seq{y}'$};
\node[anchor=north,font=\Large](y) at ([xshift=2.5em,yshift=-3.4em]decoder.south){$\seq{y}_{<}$};
\node[anchor=south,font=\Large](y_1) at ([yshift=3em]decoder.north){$\seq{y}$};
\draw[->,thick] ([yshift=-2.1em]cnn1.south) -- ([yshift=-0.1em]cnn1.south);
\draw[->,thick] ([yshift=-2.1em]cnn2.south) -- ([yshift=-0.1em]cnn2.south);
......
......@@ -6,21 +6,22 @@
\tikzstyle{every node}=[scale=0.7]
\node(encoder_c)[coder]{\large{编码器}};
\node(encoder_s)[coder, right of = encoder_c, xshift=3.5cm, fill=red!25]{\large{编码器}};
\node(h_pre)[above of = encoder_c, yshift=1.3cm,scale=1.3]{${\mathbi{h}}_{pre}$};
\node(h_pre)[above of = encoder_c, yshift=1.3cm,scale=1.3]{${\mathbi{h}}_{\rm pre}$};
\node(h)[above of = encoder_s, yshift=1.3cm,scale=1.3]{$\mathbi{h}$};
\node(cir)[circle,very thick, right of = h, draw=black!90,minimum width=0.5cm,xshift=1.1cm]{};
\draw[-,very thick,draw=black!90]([xshift=0.04cm]cir.west)--([xshift=-0.04cm]cir.east);
\draw[-,very thick,draw=black!90]([yshift=-0.04cm]cir.north)--([yshift=0.04cm]cir.south);
\node(last)[below of = encoder_c, yshift=-1.3cm]{\large{前一}};
\node(current)[below of = encoder_s, yshift=-1.3cm]{\large{当前句}};
\node(last)[below of = encoder_c, yshift=-1.3cm]{\large{前一个句子}};
\node(current)[below of = encoder_s, yshift=-1.3cm]{\large{当前句}};
\node(attention_left)[attention, above of = encoder_c, xshift=2.4cm,yshift=3.1cm]{\large{注意力机制}};
\node(d)[above of = attention_left, yshift=1.1cm,scale=1.3]{$\mathbi{d}$};
\node(ground)[rectangle, thick, rounded corners, minimum width=5cm, minimum height=5.5cm, right of = encoder_s, xshift=4.4cm,yshift=2.2cm, draw=black!70, fill=gray!10]{};
\node(decoder)[above of = encoder_s, xshift=3.1cm]{\large{解码器}};
\node(attention_right)[attention, right of = attention_left, xshift=5.4cm,yshift=-0.4cm]{\large{注意力机制}};
\node(target)[right of = current, xshift=5.3cm]{\large{目标}};
\node(target)[right of = current, xshift=5.3cm]{\large{目标语言句子(位置$j$之前)}};
\node(point_below)[right of = encoder_s, xshift=5.3cm]{\Huge{...}};
\node(point_above)[above of = attention_right, yshift=1.8cm]{\Huge{...}};
\node(target_above)[above of = attention_right, yshift=3.3cm]{\large{目标语言句子}};
\draw[->, very thick](last)to([yshift=-0.05cm]encoder_c.south);
\draw[->, very thick](current)to([yshift=-0.05cm]encoder_s.south);
......
......@@ -12,8 +12,8 @@
\node(tree)[word, below of = sky, yshift=-0.75cm, fill=blue!15]{tree};
\node(cir)[circle,thick, minimum width=0.6cm, xshift=8cm, draw=black]{};
\node(decoder)[rectangle, rounded corners, minimum height=2.2em,minimum width=4.3em, right of = cir,xshift=3cm, draw=black, fill=blue!25]{\large{解码器}};
\node(yn_1)[below of = decoder,yshift=-2cm,scale=1.2]{$\rm y_{n-1}$};
\node(yn_2)[above of = decoder,yshift=2cm,scale=1.2]{$\rm y'_{n-1}$(bank)};
\node(yn_1)[below of = decoder,yshift=-2cm,scale=1.2]{$y_{<j}$};
\node(yn_2)[above of = decoder,yshift=2cm,scale=1.2]{$y_{j}$};
\draw[->, thick]([xshift=0.1cm]figure.east)to([xshift=2cm]figure.east);
\draw[-,thick]([xshift=-0.03cm]cir.east)to([xshift=0.03cm]cir.west);
......
......@@ -9,17 +9,17 @@
\node(encoder_left)[encoder, above of = decoder_left, yshift=1.6cm]{\normalsize{编码器}};
\node(encoder_right)[encoder, above of = decoder_right, yshift=1.6cm]{\normalsize{编码器}};
\node(text_left)[below of = encoder_left, yshift=1.5cm]{\normalsize{前文}};
\node(text_right)[below of = encoder_right, yshift=1.5cm]{\normalsize{源语}};
\node(text_right)[below of = encoder_right, yshift=1.5cm]{\normalsize{源语言句子}};
\node(text_top)[above of = decoder_right, yshift=-1.6cm]{\normalsize{句子级翻译结果}};
\node(title_1)[above of = text_left, xshift=1.1cm, yshift=3cm]{\large\bfnew{一阶段解码}};
\node(title_1)[above of = text_left, xshift=1.1cm, yshift=3cm]{\large\bfnew{一阶段翻译}};
\node(ground2)[rectangle,very thick,rounded corners,minimum width=5cm,minimum height=5.8cm,right of = decoder_right,xshift=5.3cm,yshift=1.6cm,draw=black,dashed]{};
\node(ground1)[rectangle,thick,rounded corners,minimum width=3.3cm,minimum height=5cm,right of = decoder_right,xshift=4.8cm,yshift=1.58cm,draw=black,fill=yellow!15]{};
\node(attention_below)[attention, right of = decoder_right, xshift=4.8cm]{\normalsize{注意力机制}};
\node(attention_above)[attention, above of = attention_below, yshift=1.6cm]{\normalsize{注意力机制}};
\node(ffn)[attention, above of = attention_above, yshift=1.6cm, fill=blue!8]{\normalsize{前馈神经网络}};
\node(n)[right of = attention_above, xshift=2.4cm,scale=1.5]{$\times N$};
\node(text_2)[above of = ffn, yshift=1.9cm]{\normalsize{上下文修正结果}};
\node(title_2)[right of = title_1, xshift=6.3cm]{\large\bfnew{二阶段解码}};
\node(text_2)[above of = ffn, yshift=1.9cm]{\normalsize{基于上下文的修正结果}};
\node(title_2)[right of = title_1, xshift=6.3cm]{\large\bfnew{二阶段翻译}};
%\node(text_rright)[right of = text_right, xshift=5.5cm]{\normalsize{句子级翻译结果}};
\draw[->,very thick]([yshift=-0.1cm]text_left.south)to(encoder_left.north);
......
......@@ -148,7 +148,7 @@
\parinterval 交互式机器翻译体现了一种用户的行为“干预”机器翻译结果的思想。实际上,在机器翻译出现错误时,人们总是希望用一种直接有效的方式“改变”译文,最短时间内达到改善翻译质量的目的。比如,如果机器翻译系统可以输出多个候选译文,用户可以在其中挑选最好的译文进行输出。也就是,人干预了译文候选的排序过程。另一个例子是{\small\bfnew{翻译记忆}}\index{翻译记忆}(Translation Memory\index{Translation Memory})。翻译记忆记录了高质量的源语言-目标语言句对,有时也可以被看作是一种先验知识或“记忆”。因此,当进行机器翻译时,使用翻译记忆指导翻译过程也可以被看作是一种干预手段\upcite{DBLP:conf/acl/WangZS13,DBLP:conf/aaai/XiaHLS19}
\parinterval 虽然干预机器翻译系统的方式很多,最常用的还是对源语言特定片段翻译的干预,以期望最终句子的译文满足某些约束。这个问题也被称作{\small\bfnew{基于约束的翻译}}\index{基于约束的翻译} (Constraint-based Translation\index{Constraint-based Translation})。比如,在翻译网页时,需要保持译文中的网页标签与源文一致。另一个典型例子是术语翻译。在实际应用中,经常会遇到公司名称、品牌名称、产品名称等专有名词和行业术语,以及不同含义的缩写,比如,对于“小牛翻译”这个专有名词,不同的机器翻译系统给出的结果不一样:“Maverick translation”、“Calf translation”、“The mavericks translation”…… 而它正确的翻译应该为“NiuTrans”。 对于这些类似的特殊词汇,机器翻译引擎很难翻译得准确。一方面,因为模型大多是在通用数据集上训练出来的,并不能保证数据集能涵盖所有的语言现象。另一方面,即使是这些术语在训练数据中出现,它们通常也是低频的,模型不容易捕捉它们的规律。为了保证翻译的准确性,对术语翻译进行干预是十分有必要的,这对领域适应等问题的求解也是非常有意义的。
\parinterval 虽然干预机器翻译系统的方式很多,最常用的还是对源语言特定片段翻译的干预,以期望最终句子的译文满足某些约束。这个问题也被称作{\small\bfnew{基于约束的翻译}}\index{基于约束的翻译} (Constraint-based Translation\index{Constraint-based Translation})。比如,在翻译网页时,需要保持译文中的网页标签与源文一致。另一个典型例子是术语翻译。在实际应用中,经常会遇到公司名称、品牌名称、产品名称等专有名词和行业术语,以及不同含义的缩写,比如,对于“小牛翻译”这个专有名词,不同的机器翻译系统给出的结果不一样:“Maverick translation”、“Calf translation”、“The mavericks translation”等等,而它正确的翻译应该为“NiuTrans”。 对于这些类似的特殊词汇,机器翻译引擎很难翻译得准确。一方面,因为模型大多是在通用数据集上训练出来的,并不能保证数据集能涵盖所有的语言现象。另一方面,即使是这些术语在训练数据中出现,它们通常也是低频的,模型不容易捕捉它们的规律。为了保证翻译的准确性,对术语翻译进行干预是十分有必要的,这对领域适应等问题的求解也是非常有意义的。
\parinterval{\small\bfnew 词汇约束翻译}\index{词汇约束翻译}(Lexically Constrained Translation)\index{Lexically Constrained Translation}而言,在不干预的情况下让模型直接翻译出正确术语是很难的,因为术语的译文很可能是未登录词,因此必须人为提供额外的术语词典,那么我们的目标就是让模型的翻译输出遵守用户提供的术语约束。这个过程如图\ref{fig:18-3}所示。
%----------------------------------------------
......
......@@ -6619,13 +6619,13 @@ author = {Yoshua Bengio and
year = {2017}
}
@inproceedings{DBLP:journals/corr/SuGMRUVWY16a,
author = {Pei{-}Hao Su and
author = {Pei-Hao Su and
Milica Gasic and
Nikola Mrksic and
Lina Maria Rojas{-}Barahona and
Lina Maria Rojas-Barahona and
Stefan Ultes and
David Vandyke and
Tsung{-}Hsien Wen and
Tsung-Hsien Wen and
Steve J. Young},
title = {Continuously Learning Neural Dialogue Management},
publisher = {CoRR},
......@@ -6661,7 +6661,7 @@ author = {Yoshua Bengio and
Fei Tian and
Tao Qin and
Jianhuang Lai and
Tie{-}Yan Liu},
Tie-Yan Liu},
title = {A Study of Reinforcement Learning for Neural Machine Translation},
pages = {3612--3621},
publisher = {Annual Meeting of the Association for Computational Linguistics},
......@@ -9213,6 +9213,36 @@ author = {Zhuang Liu and
publisher = {{IEEE} Conference on Computer Vision and Pattern Recognition},
year = {2017}
}
@inproceedings{DBLP:conf/coling/XuHJFWHJXZ20,
author = {Chen Xu and
Bojie Hu and
Yufan Jiang and
Kai Feng and
Zeyang Wang and
Shen Huang and
Qi Ju and
Tong Xiao and
Jingbo Zhu},
title = {Dynamic Curriculum Learning for Low-Resource Neural Machine Translation},
pages = {3977--3989},
publisher = {International Conference on Computational Linguistics},
year = {2020}
}
@inproceedings{DBLP:conf/acml/WuXTZQLL18,
author = {Lijun Wu and
Yingce Xia and
Fei Tian and
Li Zhao and
Tao Qin and
Jianhuang Lai and
Tie-Yan Liu},
title = {Adversarial Neural Machine Translation},
series = {Proceedings of Machine Learning Research},
volume = {95},
pages = {534--549},
publisher = {Asian Conference on Machine Learning},
year = {2018}
}
%%%%% chapter 15------------------------------------------------------
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
......@@ -13088,6 +13118,23 @@ author = {Zhuang Liu and
publisher = {Annual Meeting of the Association for Computational Linguistics},
year = {2019}
}
@inproceedings{DBLP:conf/ijcai/BernardiCEEEIKM17,
author = {Raffaella Bernardi and
Ruket {\c{C}}akici and
Desmond Elliott and
Aykut Erdem and
Erkut Erdem and
Nazli Ikizler{-}Cinbis and
Frank Keller and
Adrian Muscat and
Barbara Plank},
title = {Automatic Description Generation from Images: {A} Survey of Models,
Datasets, and Evaluation Measures (Extended Abstract)},
pages = {4970--4974},
publisher = {International Joint Conference on Artificial Intelligence},
year = {2017}
}
%%%%% chapter 17------------------------------------------------------
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%cha
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
......
Markdown 格式
0%
您添加了 0 到此讨论。请谨慎行事。
请先完成此评论的编辑!
注册 或者 后发表评论