Commit 423cdabb by 曹润柘

update

parent a87caa3e
\begin{tikzpicture} \begin{tikzpicture}
\tikzstyle{bignode} = [line width=0.6pt,draw=black,minimum width=6.3em,minimum height=2.2em,fill=white] \tikzstyle{bignode} = [,inner sep=0.3em,draw=black,line width=0.6pt,rounded corners=2pt,minimum width=3.0em]
\tikzstyle{middlenode} = [line width=0.6pt,draw=black,minimum width=5.6em,minimum height=2.2em,fill=white]
\node [anchor=center] (node1-1) at (0,0) {\scriptsize{汉语}}; \node [anchor=center] (node1-1) at (0,0) {{汉语}};
\node [anchor=west] (node1-2) at ([xshift=0.8em]node1-1.east) {\scriptsize{英语}}; \node [anchor=west] (node1-2) at ([xshift=0.8em]node1-1.east) {{英语}};
\node [anchor=north] (node1-3) at ([xshift=1.45em]node1-1.south) {\scriptsize{反向翻译模型}}; \node [anchor=north] (node1-3) at ([xshift=1.75em]node1-1.south) {{反向翻译模型}};
\draw [->,line width=0.6pt](node1-1.east)--(node1-2.west); \draw [->,thick](node1-1.east)--(node1-2.west);
\begin{pgfonlayer}{background} \begin{pgfonlayer}{background}
{ {
\node[fill=blue!20,inner sep=0.1em,draw=black,line width=0.6pt,minimum width=6.0em,drop shadow,rounded corners=2pt] [fit =(node1-1)(node1-2)(node1-3)] (remark1) {}; \node[fill=blue!20,inner sep=0.3em,draw=black,line width=0.6pt,minimum width=6.0em,drop shadow,rounded corners=2pt] [fit =(node1-1)(node1-2)(node1-3)] (remark1) {};
} }
\end{pgfonlayer} \end{pgfonlayer}
\node [anchor=north,fill=green!20,inner sep=0.1em,minimum width=3em,draw=black,line width=0.6pt,rounded corners=2pt](node2-1) at ([xshift=-1.5em,yshift=-1.95em]remark1.south){\scriptsize{汉语}}; \node [anchor=north,fill=green!20,bignode](node2-1) at ([yshift=-3em]node1-3.south){{汉语}};
\node [anchor=west,fill=green!20,inner sep=0.1em,minimum width=3em,draw=black,line width=0.6pt,rounded corners=2pt](node2-2) at (node2-1.east){\scriptsize{英语}}; \node [anchor=north,fill=green!20,bignode](node2-2) at (node2-1.south){{英语}};
\draw [->,line width=0.6pt]([yshift=-2.0em]remark1.south)--(remark1.south) node [pos=0.5,right] (pos1) {\scriptsize{训练}}; \draw [->,thick](node2-1.north)--(remark1.south) node [pos=0.5,right] (pos1) {{训练}};
\node [anchor=west,fill=yellow!20,inner sep=0.1em,minimum width=3em,draw=black,line width=0.6pt,rounded corners=2pt](node3-1) at ([xshift=5.0em,yshift=0.0em]node1-2.east){\scriptsize{汉语}}; \node [anchor=west,fill=yellow!20,bignode](node3-1) at ([xshift=6.5em,yshift=0.0em]node1-2.east){{汉语}};
\node [anchor=north,fill=red!20,inner sep=0.1em,minimum width=3em,draw=black,line width=0.6pt,rounded corners=2pt](node3-2) at ([yshift=-2.15em]node3-1.south){\scriptsize{英语}}; \node [anchor=north,fill=red!20,bignode](node3-2) at ([yshift=-2.5em]node3-1.south){{英语}};
\node [anchor=center](node3-3) at ([xshift=0.4em]node3-2.east){};
\draw [->,line width=0.6pt](node3-1.south)--(node3-2.north) node [pos=0.5,right] (pos2) {\scriptsize{翻译}}; \draw [->,thick](node3-1.south)--(node3-2.north) node [pos=0.5,right] (pos2) {{翻译}};
\begin{pgfonlayer}{background} \begin{pgfonlayer}{background}
{ {
\node[rounded corners=2pt,inner sep=0.3em,draw=black,line width=0.6pt,dotted] [fit =(node3-1)(node3-2)] (remark2) {}; \node[rounded corners=2pt,inner sep=0.3em,draw=black,line width=0.6pt,dotted] [fit =(node3-1)(node3-2)(node3-3)] (remark2) {};
} }
\end{pgfonlayer} \end{pgfonlayer}
\draw [->,line width=0.6pt](remark1.east)--([yshift=0.85em]remark2.west) node [pos=0.5,above] (pos2) {\scriptsize{模型翻译}}; \draw [->,thick](remark1.east)--([xshift=5.5em]remark1.east) node [pos=0.5,above] (pos2) {{模型翻译}};
\node [anchor=south](pos2-2) at ([yshift=-0.5em]pos2.north){\scriptsize{使用反向}}; \node [anchor=south](pos2-2) at ([yshift=-0.5em]pos2.north){{使用反向}};
\draw[decorate,thick,decoration={brace,amplitude=5pt}] ([yshift=1.3em,xshift=1.0em]node3-1.east) -- ([yshift=-5.2em,xshift=1.0em]node3-1.east) node [pos=0.1,right,xshift=0.0em,yshift=0.0em] (label1) {\scriptsize{{混合}}}; \draw[decorate,thick,decoration={brace,amplitude=5pt}] ([yshift=1.5em,xshift=1.5em]node3-1.east) -- ([yshift=-8.6em,xshift=1.5em]node3-1.east) node [pos=0.1,right,xshift=0.0em,yshift=0.0em] (label1) {{{混合}}};
\node [anchor=west,fill=red!20,inner sep=0.1em,minimum width=3em,draw=black,line width=0.6pt,rounded corners=2pt](node4-1) at ([xshift=2.0em,yshift=1.6em]node3-2.east){\scriptsize{英语}}; \node [anchor=west,fill=red!20,bignode](node4-1) at ([xshift=2.5em,yshift=1.3em]node3-2.east){{英语}};
\node [anchor=north,fill=green!20,inner sep=0.1em,minimum width=3em,draw=black,line width=0.6pt,rounded corners=2pt](node4-2) at (node4-1.south){\scriptsize{}}; \node [anchor=north,fill=yellow!20,bignode](node4-2) at (node4-1.south){{}};
\node [anchor=west,fill=yellow!20,inner sep=0.1em,minimum width=3em,draw=black,line width=0.6pt,rounded corners=2pt](node4-3) at (node4-1.east){\scriptsize{}}; \node [anchor=west,fill=green!20,bignode](node4-3) at (node4-1.east){{}};
\node [anchor=north,fill=green!20,inner sep=0.1em,minimum width=3em,draw=black,line width=0.6pt,rounded corners=2pt](node4-4) at (node4-3.south){\scriptsize{汉语}}; \node [anchor=north,fill=green!20,bignode](node4-4) at (node4-3.south){{汉语}};
\node [anchor=center] (node5-1) at ([xshift=3.4em,yshift=0.25em]node4-3.east) {\scriptsize{英语}}; \node [anchor=center] (node5-1) at ([xshift=5em,yshift=0.02em]node4-3.east) {{英语}};
\node [anchor=west] (node5-2) at ([xshift=0.8em]node5-1.east) {\scriptsize{汉语}}; \node [anchor=west] (node5-2) at ([xshift=0.8em]node5-1.east) {{汉语}};
\node [anchor=north] (node5-3) at ([xshift=1.65em]node5-1.south) {\scriptsize{正向翻译模型}}; \node [anchor=north] (node5-3) at ([xshift=1.65em]node5-1.south) {{正向翻译模型}};
\draw [->,line width=0.6pt](node5-1.east)--(node5-2.west); \draw [->,thick](node5-1.east)--(node5-2.west);
\begin{pgfonlayer}{background} \begin{pgfonlayer}{background}
{ {
\node[fill=blue!20,inner sep=0.1em,draw=black,line width=0.6pt,minimum width=6.0em,drop shadow,rounded corners=2pt] [fit =(node5-1)(node5-2)(node5-3)] (remark3) {}; \node[fill=blue!20,inner sep=0.3em,draw=black,line width=0.6pt,minimum width=6.0em,drop shadow,rounded corners=2pt] [fit =(node5-1)(node5-2)(node5-3)] (remark3) {};
} }
\end{pgfonlayer} \end{pgfonlayer}
\draw [->,line width=0.6pt]([xshift=-2em]remark3.west)--(remark3.west) node [pos=0.5,above] (pos3) {\scriptsize{训练}}; \draw [->,thick]([xshift=-3.2em]remark3.west)--(remark3.west) node [pos=0.5,above] (pos3) {{训练}};
\node [anchor=south](d1) at ([xshift=0.0em,yshift=2em]remark3.north){\scriptsize{真实数据:}};
\node [anchor=north](d2) at ([xshift=0.35em]d1.south){\scriptsize{伪数据:}};
\node [anchor=south](d3) at ([xshift=0.0em,yshift=0em]d1.north){\scriptsize{额外数据:}};
\node [anchor=west,fill=green!20,minimum width=1em](d1-1) at ([xshift=-0.0em]d1.east){};
\node [anchor=west,fill=red!20,minimum width=1em](d2-1) at ([xshift=-0.0em]d2.east){};
\node [anchor=west,fill=yellow!20,minimum width=1em](d3-1) at ([xshift=-0.0em]d3.east){};
\node [anchor=south](d1) at ([xshift=-1.5em,yshift=1em]remark1.north){{真实数据:}};
\node [anchor=west](d2) at ([xshift=2.0em]d1.east){{伪数据:}};
\node [anchor=west](d3) at ([xshift=2.0em]d2.east){{额外数据:}};
\node [anchor=west,fill=green!20,minimum width=1.5em](d1-1) at ([xshift=-0.0em]d1.east){};
\node [anchor=west,fill=red!20,minimum width=1.5em](d2-1) at ([xshift=-0.0em]d2.east){};
\node [anchor=west,fill=yellow!20,minimum width=1.5em](d3-1) at ([xshift=-0.0em]d3.east){};
\end{tikzpicture} \end{tikzpicture}
\ No newline at end of file
\begin{tikzpicture} \begin{tikzpicture}
\begin{scope} \tikzstyle{rec} = [inner sep=0.3em,minimum width=4em,draw=black,line width=0.6pt,rounded corners=2pt]
\node [anchor=center] (node1) at (9.6,1) {\small{训练:}}; \node [anchor=north,fill=green!20,rec](node1-1) at (0,0){{汉语}};
\node [anchor=center] (node11) at (10.2,1) {}; \node [anchor=north,fill=green!20,rec](node1-2) at (node1-1.south){{英语}};
\node [anchor=center] (node12) at (11.4,1) {}; \node [anchor=north,fill=yellow!20,rec](node2-1) at ([yshift=-5.0em]node1-1.south){{汉语}};
\node [anchor=center] (node2) at (9.6,0.5) {\small{推理:}}; \node [anchor=north,fill=red!20,rec](node2-2) at (node2-1.south){{英语}};
\node [anchor=center] (node21) at (10.2,0.5) {}; \node [anchor=east] (node3-1) at ([xshift=-4.0em,yshift=-3.5em]node1-1.west) {{正向}};
\node [anchor=center] (node22) at (11.4,0.5) {}; \node [anchor=north] (node3-2) at ([yshift=0.5em]node3-1.south) {{翻译模型}};
\node [anchor=west,draw=black,line width=0.6pt,minimum width=5.6em,minimum height=2.2em,fill=blue!20,rounded corners=2pt] (node1-1) at (0,0) {\footnotesize{双语数据}}; \begin{pgfonlayer}{background}
\node [anchor=south,draw=black,line width=0.6pt,minimum width=4.5em,minimum height=2.2em,fill=blue!20,rounded corners=2pt] (node1-2) at ([yshift=-5em]node1-1.south) {\footnotesize{目标语伪数据}}; {
\node [anchor=west,draw=black,line width=0.6pt,minimum width=4.5em,minimum height=2.2em,fill=red!20,rounded corners=2pt] (node2-1) at ([xshift=-7.7em,yshift=-2.5em]node1-1.west) {\footnotesize{前向NMT系统}}; \node[fill=blue!20,inner sep=0.3em,draw=black,line width=0.6pt,minimum width=3.0em,drop shadow,rounded corners=2pt] [fit =(node3-1)(node3-2)] (remark1) {};
\node [anchor=west,draw=black,line width=0.6pt,minimum width=4.5em,minimum height=2.2em,fill=red!20,rounded corners=2pt] (node3-1) at ([xshift=1.5em,yshift=-2.5em]node1-1.east) {\footnotesize{反向NMT系统}}; }
\end{pgfonlayer}
\draw [->,thick]([yshift=-0.75em]node1-1.west)--(remark1.north east);
\draw [->,thick,dashed](remark1.south east)--([yshift=-0.75em]node2-1.west);
\node [anchor=east,draw=black,line width=0.6pt,minimum width=5.6em,minimum height=2.2em,fill=blue!20,rounded corners=2pt] (node4-1) at ([xshift=18em]node1-1) {\footnotesize{双语数据}}; \node [anchor=west] (node4-1) at ([xshift=4.0em,yshift=-3.5em]node1-1.east) {{反向}};
\node [anchor=south,draw=black,line width=0.6pt,minimum width=4.5em,minimum height=2.2em,fill=blue!20,rounded corners=2pt] (node4-2) at ([yshift=-5em]node4-1.south) {\footnotesize{目标语伪数据}}; \node [anchor=north] (node4-2) at ([yshift=0.5em]node4-1.south) {{翻译模型}};
\begin{pgfonlayer}{background}
{
\node[fill=blue!20,inner sep=0.3em,draw=black,line width=0.6pt,minimum width=3.0em,drop shadow,rounded corners=2pt] [fit =(node4-1)(node4-2)] (remark2) {};
}
\end{pgfonlayer}
\draw [->,thick]([yshift=-0.75em]node1-1.east)--(remark2.north west);
\draw [->,thick]([yshift=-0.75em]node2-1.east)--(remark2.south west);
\node [anchor=east,draw=black,line width=0.6pt,minimum width=4.5em,minimum height=2.2em,fill=red!20,rounded corners=2pt] (node5-1) at ([xshift=15.2em]node3-1.east) {\footnotesize{前向NMT系统}}; \node [anchor=west,fill=green!20,rec](node5-1) at ([xshift=4.0em,yshift=3.48em]node4-1.east){{英语}};
\node [anchor=north,fill=green!20,rec](node5-2) at (node5-1.south){{汉语 }};
\node [anchor=north,fill=yellow!20,rec](node6-1) at ([yshift=-5.0em]node5-1.south){{英语}};
\node [anchor=north,fill=red!20,rec](node6-2) at (node6-1.south){{汉语}};
\draw [->,thick,dashed](remark2.south east)--([yshift=-0.75em]node6-1.west);
\node [anchor=west] (node7-1) at ([xshift=4.0em,yshift=-3.5em]node5-1.east) {{正向}};
\node [anchor=north] (node7-2) at ([yshift=0.5em]node7-1.south) {{翻译模型}};
\begin{pgfonlayer}{background}
{
\node[fill=blue!20,inner sep=0.3em,draw=black,line width=0.6pt,minimum width=3.0em,drop shadow,rounded corners=2pt] [fit =(node7-1)(node7-2)] (remark3) {};
}
\end{pgfonlayer}
\draw [->,thick]([yshift=-0.75em]node5-1.east)--(remark3.north west);
\draw [->,thick]([yshift=-0.75em]node6-1.east)--(remark3.south west);
\node [anchor=south](d1) at ([xshift=-0.7em,yshift=4em]remark1.north){{真实数据:}};
\node [anchor=west](d2) at ([xshift=2.0em]d1.east){{伪数据:}};
\node [anchor=west](d3) at ([xshift=2.0em]d2.east){{额外数据:}};
\node [anchor=west,fill=green!20,minimum width=1.5em](d1-1) at ([xshift=-0.0em]d1.east){};
\node [anchor=west,fill=red!20,minimum width=1.5em](d2-1) at ([xshift=-0.0em]d2.east){};
\node [anchor=west,fill=yellow!20,minimum width=1.5em](d3-1) at ([xshift=-0.0em]d3.east){};
\node [anchor=south] (d4) at ([xshift=1em]d1.north) {{训练:}};
\node [anchor=south] (d5) at ([xshift=0.5em]d2.north) {{推理:}};
\draw [->,thick] ([xshift=0em]d4.east)--([xshift=1.5em]d4.east);
\draw [->,thick,dashed] ([xshift=0em]d5.east)--([xshift=1.5em]d5.east);
\draw [->,line width=1pt](node1-1.west)--([xshift=3em]node2-1.north);
\draw [->,line width=1pt](node1-1.east)--([xshift=-3em]node3-1.north);
\draw [->,line width=1pt](node1-2.east)--([xshift=-3em]node3-1.south);
\draw [->,line width=1pt](node11.east)--(node12.west);
\draw [->,line width=1pt,dashed](node21.east)--(node22.west);
\draw [->,line width=1pt,dashed]([xshift=3em]node2-1.south)--([xshift=-0.1em]node1-2.west);
\draw [->,line width=1pt,dashed]([xshift=3em]node3-1.south)--([xshift=-0.1em]node4-2.west);
\draw [->,line width=1pt](node4-1.east)--([xshift=-3em]node5-1.north);
\draw [->,line width=1pt](node4-2.east)--([xshift=-3em]node5-1.south);
\end{scope}
\end{tikzpicture} \end{tikzpicture}
\ No newline at end of file
\definecolor{color1}{rgb}{1,0.725,0.058}
\tikzstyle{data} = [rectangle,very thick,rounded corners,minimum width=2.3cm,minimum height=0.83cm,text centered,draw=black!70,fill=color1!25]
\tikzstyle{data_shadow} = [rectangle,very thick,rounded corners,minimum width=2.3cm,minimum height=0.83cm,text centered,draw=black!70,fill=black!70]
\tikzstyle{process} = [rectangle,thick,rounded corners,minimum width=2cm,minimum height=0.7cm,text centered,draw=black!80,fill=gray!25]
\tikzstyle{state} = [rectangle,thick,rounded corners,minimum width=3cm,minimum height=0.7cm,text centered,draw=black!80,fill=gray!25]
\begin{tikzpicture}[node distance = 0,scale = 1]
\tikzstyle{every node}=[scale=1]
\node(monolingual_X_shadow)[data_shadow]{};
\node(bilingual_D_shadow)[data_shadow, right of = monolingual_X_shadow, xshift=5cm]{};
\node(monolingual_Y_shadow)[data_shadow, right of = bilingual_D_shadow, xshift=5cm]{};
\node(monolingual_X)[data,right of = monolingual_X_shadow,xshift=-0.08cm,yshift=0.08cm]{单语语料X};
\node(bilingual_D)[data, right of = monolingual_X, xshift=5cm, fill=ugreen!25]{双语语料D};
\node(monolingual_Y)[data, right of = bilingual_D, xshift=5cm, fill=blue!25]{单语语料Y};
\node(process_1_1)[process, right of = monolingual_X, xshift=2.5cm, yshift=-1.5cm]{\textbf{$M^0_{x\to y}$}};
\node(process_1_2)[process, right of = process_1_1, xshift=5cm, fill=red!25]{$M^0_{y\to x}$};
\node(process_2_1)[process, below of = process_1_1, yshift=-1.2cm]{解码过程};
\node(process_2_2)[process, below of = process_1_2, yshift=-1.2cm, fill=red!25]{解码过程};
\node(process_3_1)[state, below of = process_2_1, yshift=-1.2cm, fill=color1!25]{\{$x_i,\hat{y}^0_i$\}};
\node(process_3_2)[state, below of = process_2_2, yshift=-1.2cm, fill=blue!25]{\{$\hat{x}^0_i,{y_i}$\}};
\node(process_4_1)[process, below of = process_3_1, yshift=-1.2cm]{\textbf{$M^1_{x\to y}$}};
\node(process_4_2)[process, below of = process_3_2, yshift=-1.2cm, fill=red!25]{$M^1_{y\to x}$};
\node(process_5_1)[process, below of = process_4_1, yshift=-1.2cm]{解码过程};
\node(process_5_2)[process, below of = process_4_2, yshift=-1.2cm, fill=red!25]{解码过程};
\node(process_6_1)[state, below of = process_5_1, yshift=-1.2cm, fill=color1!25]{\{$x_i,\hat{y}^1_i$\}};
\node(process_6_2)[state, below of = process_5_2, yshift=-1.2cm, fill=blue!25]{\{$\hat{x}^1_i,{y_i}$\}};
\node(process_7_1)[process, below of = process_6_1, yshift=-1.2cm]{\textbf{$M^2_{x\to y}$}};
\node(process_7_2)[process, below of = process_6_2, yshift=-1.2cm, fill=red!25]{$M^2_{y\to x}$};
\node(ellipsis_1)[below of = monolingual_X, yshift=-9.9cm,scale=1.5]{$...$};
\node(ellipsis_2)[below of = process_7_1, yshift=-1.2cm,scale=1.5]{$...$};
\node(ellipsis_3)[below of = bilingual_D, yshift=-9.9cm,scale=1.5]{$...$};
\node(ellipsis_4)[below of = process_7_2, yshift=-1.2cm,scale=1.5]{$...$};
\node(ellipsis_5)[below of = monolingual_Y, yshift=-9.9cm,scale=1.5]{$...$};
\node(text_1)[left of = process_2_1, xshift=-4cm,scale=0.8]{第0轮迭代};
\node(text_2)[left of = process_5_1, xshift=-4cm,scale=0.8]{第1轮迭代};
\node(text_3)[left of = ellipsis_2, xshift=-4cm, scale=0.8]{第2轮迭代};
\draw[->, very thick, color=color1!40](monolingual_X.south)--(ellipsis_1.north);
\draw[->, very thick, color=ugreen!55](bilingual_D.south)--(ellipsis_3.north);
\draw[->, very thick, color=blue!55](monolingual_Y.south)--(ellipsis_5.north);
\draw[->, very thick, color=color1!40]([xshift=-1.5cm]process_2_1.west)--(process_2_1.west);
\draw[->, very thick, color=color1!40]([xshift=-1.5cm]process_5_1.west)--(process_5_1.west);
\draw[->, very thick, color=blue!55]([xshift=1.5cm]process_2_2.east)--(process_2_2.east);
\draw[->, very thick, color=blue!55]([xshift=1.5cm]process_5_2.east)--(process_5_2.east);
\draw[->, thick](process_1_1.south)--(process_2_1.north);
\draw[->, thick](process_1_2.south)--(process_2_2.north);
\draw[->, thick](process_2_1.south)--(process_3_1.north);
\draw[->, thick](process_2_2.south)--(process_3_2.north);
\draw[->, thick](process_4_1.south)--(process_5_1.north);
\draw[->, thick](process_4_2.south)--(process_5_2.north);
\draw[->, thick](process_5_1.south)--(process_6_1.north);
\draw[->, thick](process_5_2.south)--(process_6_2.north);
\draw[->, thick](process_7_1.south)--(ellipsis_2.north);
\draw[->, thick](process_7_2.south)--(ellipsis_4.north);
\draw[->, very thick, color=color1!40](process_3_1.east)--([yshift=0.35cm]process_4_2.west);
\draw[->, very thick, color=color1!40](process_3_2.west)--([yshift=0.35cm]process_4_1.east);
\draw[->, very thick, color=color1!40](process_6_1.east)--([yshift=0.35cm]process_7_2.west);
\draw[->, very thick, color=color1!40](process_6_2.west)--([yshift=0.35cm]process_7_1.east);
\draw[->, very thick, color=ugreen!55,in=0,out=270]([xshift=-0.3cm]bilingual_D.south)to(process_1_1.east);
\draw[->, very thick, color=ugreen!55,in=180,out=270]([xshift=0.3cm]bilingual_D.south)to(process_1_2.west);
\draw[->, very thick, color=ugreen!55,in=0,out=270]([yshift=-3.7cm]bilingual_D.south)to(process_4_1.east);
\draw[->, very thick, color=ugreen!55,in=180,out=270]([yshift=-3.7cm]bilingual_D.south)to(process_4_2.west);
\draw[->, very thick, color=ugreen!55,in=0,out=270]([yshift=-7.3cm]bilingual_D.south)to(process_7_1.east);
\draw[->, very thick, color=ugreen!55,in=180,out=270]([yshift=-7.3cm]bilingual_D.south)to(process_7_2.west);
\draw[->, very thick, color=ugreen!55,in=180,out=270]([yshift=-7.3cm]bilingual_D.south)to(process_7_2.west);
\draw[-, very thick, dashed, color=blue!55]([xshift=-1cm,yshift=-0.35cm]text_1.south)--([xshift=12.7cm,yshift=-0.35cm]text_1.south);
\draw[-, very thick, dashed, color=blue!55]([xshift=-1cm,yshift=-0.35cm]text_2.south)--([xshift=12.7cm,yshift=-0.35cm]text_2.south);
\draw[-, very thick, dashed, color=blue!55]([xshift=-1cm,yshift=-0.35cm]text_3.south)--([xshift=12.7cm,yshift=-0.35cm]text_3.south);
\end{tikzpicture}
\ No newline at end of file
...@@ -66,7 +66,7 @@ ...@@ -66,7 +66,7 @@
\begin{figure}[htp] \begin{figure}[htp]
\centering \centering
\input{./Chapter16/Figures/figure-example-of-iterative-back-translation} \input{./Chapter16/Figures/figure-example-of-iterative-back-translation}
\caption{\red{迭代式回译方法的流程,未修改} {\color{blue} 这个图的逻辑我觉得是ok的,主要是这些线和过程需要再清晰一下,再找我讨论下!}} \caption{迭代式回译方法的流程}
\label{fig:16-2-xc} \label{fig:16-2-xc}
\end{figure} \end{figure}
%---------------------------------------------- %----------------------------------------------
...@@ -288,7 +288,7 @@ $\funp{P}(\seq{y}|\seq{x})$和$\funp{P}(\seq{x}|\seq{y})$是否真的没有关 ...@@ -288,7 +288,7 @@ $\funp{P}(\seq{y}|\seq{x})$和$\funp{P}(\seq{x}|\seq{y})$是否真的没有关
%---------------------------------------------- %----------------------------------------------
\begin{figure}[h] \begin{figure}[h]
\centering \centering
\includegraphics[scale=0.7]{Chapter16/Figures/figure-the-iterative-process-of-bidirectional-training.png} \input{Chapter16/Figures/figure-the-iterative-process-of-bidirectional-training}
\caption{双向训练的迭代过程} \caption{双向训练的迭代过程}
\label{fig:16-1-fk} \label{fig:16-1-fk}
\end{figure} \end{figure}
...@@ -315,7 +315,7 @@ $\funp{P}(\seq{y}|\seq{x})$和$\funp{P}(\seq{x}|\seq{y})$是否真的没有关 ...@@ -315,7 +315,7 @@ $\funp{P}(\seq{y}|\seq{x})$和$\funp{P}(\seq{x}|\seq{y})$是否真的没有关
\label{eq:16-7-xc} \label{eq:16-7-xc}
\end{eqnarray} \end{eqnarray}
\parinterval 公式\ref{eq:16-7-xc}很自然地把两个方向的翻译模型$\funp{P}(\seq{y}|\seq{x})$$\funp{P}(\seq{x}|\seq{y})$以及两个语言模型$\funp{P}(\seq{x})$$\funp{P}(\seq{y})$联系起来:$\funp{P}(\seq{x})\funp{P}(\seq{y}|\seq{x})$应该与$\funp{P}(\seq{y})\funp{P}(\seq{x}|\seq{y})$接近,因为它们都表达了同一个联合分布$\funp{P}(\seq{x},\seq{y})$。因此,在构建训练两个方向的翻译模型的目标函数时,除了它们单独训练时各自使用的极大似然估计目标函数,可以额外增加一个目标项来鼓励两个方向的翻译模型: \parinterval 公式\eqref{eq:16-7-xc}很自然地把两个方向的翻译模型$\funp{P}(\seq{y}|\seq{x})$$\funp{P}(\seq{x}|\seq{y})$以及两个语言模型$\funp{P}(\seq{x})$$\funp{P}(\seq{y})$联系起来:$\funp{P}(\seq{x})\funp{P}(\seq{y}|\seq{x})$应该与$\funp{P}(\seq{y})\funp{P}(\seq{x}|\seq{y})$接近,因为它们都表达了同一个联合分布$\funp{P}(\seq{x},\seq{y})$。因此,在构建训练两个方向的翻译模型的目标函数时,除了它们单独训练时各自使用的极大似然估计目标函数,可以额外增加一个目标项来鼓励两个方向的翻译模型:
\begin{eqnarray} \begin{eqnarray}
{L}_{\rm{dual}} & = & (\log{\funp{P}(\seq{x})} + \log{\funp{P}(\seq{y}|\seq{x})} - \log{\funp{P}(\seq{y})} - \log{\funp{P}(\seq{x}|\seq{y}))^{2}} {L}_{\rm{dual}} & = & (\log{\funp{P}(\seq{x})} + \log{\funp{P}(\seq{y}|\seq{x})} - \log{\funp{P}(\seq{y})} - \log{\funp{P}(\seq{x}|\seq{y}))^{2}}
\label{eq:16-8-xc} \label{eq:16-8-xc}
...@@ -323,7 +323,7 @@ $\funp{P}(\seq{y}|\seq{x})$和$\funp{P}(\seq{x}|\seq{y})$是否真的没有关 ...@@ -323,7 +323,7 @@ $\funp{P}(\seq{y}|\seq{x})$和$\funp{P}(\seq{x}|\seq{y})$是否真的没有关
\parinterval 通过该正则化项,我们将互为对偶的两个任务放在一块学习,通过任务对偶性加强监督学习的过程,就是有监督对偶学习\upcite{DBLP:conf/icml/XiaQCBYL17,qin2020dual}。这里,$\funp{P}(\seq{x})$$\funp{P}(\seq{y})$这两个语言模型是预先训练好的,并不参与翻译模型的训练。可以看到,对于单独的一个模型来说,其目标函数增加了与另外一个方向的模型相关的项。这样的形式与L1/L2正则化非常类似(见{\chapternine}),因此可以把这个方法看作是一种任务特定的正则化的手段(由翻译任务本身的性质所启发而来)。有监督对偶学习实际上要优化下面这个损失函数: \parinterval 通过该正则化项,我们将互为对偶的两个任务放在一块学习,通过任务对偶性加强监督学习的过程,就是有监督对偶学习\upcite{DBLP:conf/icml/XiaQCBYL17,qin2020dual}。这里,$\funp{P}(\seq{x})$$\funp{P}(\seq{y})$这两个语言模型是预先训练好的,并不参与翻译模型的训练。可以看到,对于单独的一个模型来说,其目标函数增加了与另外一个方向的模型相关的项。这样的形式与L1/L2正则化非常类似(见{\chapternine}),因此可以把这个方法看作是一种任务特定的正则化的手段(由翻译任务本身的性质所启发而来)。有监督对偶学习实际上要优化下面这个损失函数:
\begin{eqnarray} \begin{eqnarray}
{L} & = & \log{\funp{P}(\seq{y}|\seq{x})}+\log{\funp{P}(\seq{x}|\seq{y})}+\mathcal{L}_{\rm{dual}} {L} & = & \log{\funp{P}(\seq{y}|\seq{x})}+\log{\funp{P}(\seq{x}|\seq{y})}+{L}_{\rm{dual}}
\label{eq:16-2-fk} \label{eq:16-2-fk}
\end{eqnarray} \end{eqnarray}
...@@ -346,7 +346,7 @@ $\funp{P}(\seq{y}|\seq{x})$和$\funp{P}(\seq{x}|\seq{y})$是否真的没有关 ...@@ -346,7 +346,7 @@ $\funp{P}(\seq{y}|\seq{x})$和$\funp{P}(\seq{x}|\seq{y})$是否真的没有关
\label{eq:16-9-xc} \label{eq:16-9-xc}
\end{eqnarray} \end{eqnarray}
\parinterval 公式\ref{eq:16-9-xc}假设$\funp{P}(\seq{x}|\seq{y})=\funp{P}(\seq{x}|\seq{x},\seq{y})$。这个假设显然是成立的,因为当知道一个句子的译文时,并不需要知道它的源文就可以把它翻译回去。如果直接优化(最大化)公式\ref{eq:16-9-xc}右侧,相当于对这个等式$\funp{P}(\seq{x}|\seq{y})$$\funp{P}(\seq{y}|\seq{x})$施加了{\small\sffamily\bfnew{循环一致性}}\index{循环一致性}(Circle Consistency)\index{Circle Consistency}的约束\upcite{DBLP:conf/iccv/ZhuPIE17},也就是对于一个句子$\seq{x}$,通过$\funp{P}(\seq{y}|\seq{x})$把它翻译成$\seq{y}$后,根据$\funp{P}(\seq{x}|\seq{y})$应该能重新翻译出$\seq{x}$,如图\ref{fig:16-10-xc}所示。公式\ref{eq:16-9-xc}给出了同时优化$\funp{P}(\seq{x}|\seq{y})$$\funp{P}(\seq{y}|\seq{x})$的一个目标函数形式。这个目标函数的一个额外的好处是它本质上是在学习一个由$\funp{P}(\seq{x}|\seq{y})$$\funp{P}(\seq{y}|\seq{x})$组成的语言模型$\funp{P}(\seq{x})$,而$\funp{P}(\seq{x})$的学习依赖于单语数据,这意味着这个目标函数可以很自然地直接使用大量单语数据来同时训练两个翻译模型。相同的结论可以推广到$\funp{P}(\seq{y})$\upcite{DBLP:conf/nips/HeXQWYLM16} \parinterval 公式\eqref{eq:16-9-xc}假设$\funp{P}(\seq{x}|\seq{y})=\funp{P}(\seq{x}|\seq{x},\seq{y})$。这个假设显然是成立的,因为当知道一个句子的译文时,并不需要知道它的源文就可以把它翻译回去。如果直接优化(最大化)公式\eqref{eq:16-9-xc}右侧,相当于对这个等式$\funp{P}(\seq{x}|\seq{y})$$\funp{P}(\seq{y}|\seq{x})$施加了{\small\sffamily\bfnew{循环一致性}}\index{循环一致性}(Circle Consistency)\index{Circle Consistency}的约束\upcite{DBLP:conf/iccv/ZhuPIE17},也就是对于一个句子$\seq{x}$,通过$\funp{P}(\seq{y}|\seq{x})$把它翻译成$\seq{y}$后,根据$\funp{P}(\seq{x}|\seq{y})$应该能重新翻译出$\seq{x}$,如图\ref{fig:16-10-xc}所示。公式\eqref{eq:16-9-xc}给出了同时优化$\funp{P}(\seq{x}|\seq{y})$$\funp{P}(\seq{y}|\seq{x})$的一个目标函数形式。这个目标函数的一个额外的好处是它本质上是在学习一个由$\funp{P}(\seq{x}|\seq{y})$$\funp{P}(\seq{y}|\seq{x})$组成的语言模型$\funp{P}(\seq{x})$,而$\funp{P}(\seq{x})$的学习依赖于单语数据,这意味着这个目标函数可以很自然地直接使用大量单语数据来同时训练两个翻译模型。相同的结论可以推广到$\funp{P}(\seq{y})$\upcite{DBLP:conf/nips/HeXQWYLM16}
%---------------------------------------------- %----------------------------------------------
\begin{figure}[htp] \begin{figure}[htp]
...@@ -357,14 +357,14 @@ $\funp{P}(\seq{y}|\seq{x})$和$\funp{P}(\seq{x}|\seq{y})$是否真的没有关 ...@@ -357,14 +357,14 @@ $\funp{P}(\seq{y}|\seq{x})$和$\funp{P}(\seq{x}|\seq{y})$是否真的没有关
\end{figure} \end{figure}
%---------------------------------------------- %----------------------------------------------
\parinterval 但是直接使用公式\ref{eq:16-9-xc}作为目标函数需要解决两个问题: \parinterval 但是直接使用公式\eqref{eq:16-9-xc}作为目标函数需要解决两个问题:
\begin{itemize} \begin{itemize}
\vspace{0.5em} \vspace{0.5em}
\item 计算公式\ref{eq:16-9-xc}要枚举所有可能的隐变量$\seq{y}$的取值,也就是所有可能产生的目标语句子,而这是不可能的,因此一般会通过平均多个随机产生的$\seq{y}$对应的损失来近似真正的目标函数值; \item 计算公式\eqref{eq:16-9-xc}要枚举所有可能的隐变量$\seq{y}$的取值,也就是所有可能产生的目标语句子,而这是不可能的,因此一般会通过平均多个随机产生的$\seq{y}$对应的损失来近似真正的目标函数值;
\vspace{0.5em} \vspace{0.5em}
\item 从公式\ref{eq:16-9-xc}可以看到,在$\funp{P}(\seq{x})$上计算完目标函数值后,得到的梯度首先传递给$\funp{P}(\seq{x}|\seq{y})$,然后通过$\funp{P}(\seq{x}|\seq{y})$传递给$\funp{P}(\seq{y}|\seq{x})$。由于$\funp{P}(\seq{x}|\seq{y})$的输入$\seq{y}$$\funp{P}(\seq{y}|\seq{x})$采样得到,而采样操作不可导,导致梯度的传播在$\funp{P}(\seq{y}|\seq{x})$的输出处断开了,因此$\funp{P}(\seq{y}|\seq{x})$接收不到任何梯度来进行更新。常见的解决方案是使用策略梯度\upcite{DBLP:conf/nips/SuttonMSM99}。策略梯度的基本思想如下:如果在执行某个动作之后,获得了一个不错的反馈,那么可以调整策略来增加这个状态下执行该动作的概率;反之,如果采取某个动作后获得了一个负反馈,就需要调整策略来降低这个状态下执行该动作的概率。在算法的实现上,首先对两个翻译模型求梯度,然后在策略调整时选择将梯度加到模型上(获得正反馈)或者减去该梯度(获得负反馈)。 \item 从公式\eqref{eq:16-9-xc}可以看到,在$\funp{P}(\seq{x})$上计算完目标函数值后,得到的梯度首先传递给$\funp{P}(\seq{x}|\seq{y})$,然后通过$\funp{P}(\seq{x}|\seq{y})$传递给$\funp{P}(\seq{y}|\seq{x})$。由于$\funp{P}(\seq{x}|\seq{y})$的输入$\seq{y}$$\funp{P}(\seq{y}|\seq{x})$采样得到,而采样操作不可导,导致梯度的传播在$\funp{P}(\seq{y}|\seq{x})$的输出处断开了,因此$\funp{P}(\seq{y}|\seq{x})$接收不到任何梯度来进行更新。常见的解决方案是使用策略梯度\upcite{DBLP:conf/nips/SuttonMSM99}。策略梯度的基本思想如下:如果在执行某个动作之后,获得了一个不错的反馈,那么可以调整策略来增加这个状态下执行该动作的概率;反之,如果采取某个动作后获得了一个负反馈,就需要调整策略来降低这个状态下执行该动作的概率。在算法的实现上,首先对两个翻译模型求梯度,然后在策略调整时选择将梯度加到模型上(获得正反馈)或者减去该梯度(获得负反馈)。
\vspace{0.5em} \vspace{0.5em}
\end{itemize} \end{itemize}
......
Markdown 格式
0%
您添加了 0 到此讨论。请谨慎行事。
请先完成此评论的编辑!
注册 或者 后发表评论