Commit c39382ec by 曹润柘

合并分支 'caorunzhe' 到 'master'

Caorunzhe

查看合并请求 !629
parents 4865f376 9efd050d
\begin{tikzpicture}
\node [rectangle,inner sep=2pt,font=\scriptsize] (center) at (0,0) {};
\node [rectangle,inner sep=2pt,font=\scriptsize] (top) at ([yshift=3em,xshift=0em]center.north) {
\begin{tabular}{c}
翻译模型 \\
$\textrm{P}(\ \mathbi{y}|\ \mathbi{x})$
\end{tabular}
};
\node [rectangle,inner sep=2pt,font=\scriptsize] (left) at ([yshift=0em,xshift=-4em]center.west) {
\begin{tabular}{c}
今天天气真好。
\end{tabular}
};
\node [rectangle,inner sep=2pt,font=\scriptsize] (right) at ([yshift=0em,xshift=4em]center.east) {
\begin{tabular}{c}
The weather is \\so good today.
\end{tabular}
};
\node [rectangle,inner sep=2pt,font=\scriptsize] (down) at ([yshift=-3em,xshift=0em]center.south) {
\begin{tabular}{c}
翻译模型 \\
$\textrm{P}(\ \mathbi{x}|\ \mathbi{y})$
\end{tabular}
};
\draw [->,line width=0.8pt] (left.north) .. controls +(north:0.5) and +(west:0.5) .. (top.west);
\draw [->,line width=0.8pt] (top.east) .. controls +(east:0.5) and +(north:0.5) .. (right.north);
\draw [->,line width=0.8pt] (down.west) .. controls +(west:0.5) and +(south:0.5) .. (left.south);
\draw [->,line width=0.8pt] (right.south) .. controls +(south:0.5) and +(east:0.5) .. (down.east) ;
\end{tikzpicture}
\ No newline at end of file
\begin{tikzpicture}
\tikzstyle{embedding} = [line width=0.6pt,draw=black,minimum width=2.5em,minimum height=1.6em,fill=green!20]
\tikzstyle{model} = [line width=0.6pt,draw=black,minimum width=3.0em,minimum height=1.6em,fill=blue!20,rounded corners=2pt]
\node [anchor=center,model] (node1-1) at (0,0) {\footnotesize{LSTM}};
\node [anchor=west,model] (node1-2) at ([xshift=1.8em]node1-1.east) {\footnotesize{LSTM}};
\node [anchor=west,scale=1.8] (node1-3) at ([xshift=1.0em]node1-2.east) {...};
\node [anchor=west,model] (node1-4) at ([xshift=1.0em]node1-3.east) {\footnotesize{LSTM}};
\node [anchor=west,model] (node1-5) at ([xshift=2.0em]node1-4.east) {\footnotesize{LSTM}};
\node [anchor=west,model] (node1-6) at ([xshift=1.8em]node1-5.east) {\footnotesize{LSTM}};
\node [anchor=west,scale=1.8] (node1-7) at ([xshift=1.0em]node1-6.east) {...};
\node [anchor=west,model] (node1-8) at ([xshift=1.0em]node1-7.east) {\footnotesize{LSTM}};
\node [anchor=south,model](node2-1) at ([yshift=1.8em]node1-1.north){\footnotesize{LSTM}};
\node [anchor=south,model](node2-2) at ([yshift=1.8em]node1-2.north){\footnotesize{LSTM}};
\node [anchor=west,scale=1.8](node2-3) at ([xshift=1.0em]node2-2.east){...};
\node [anchor=south,model](node2-4) at ([yshift=1.8em]node1-4.north){\footnotesize{LSTM}};
\node [anchor=south,model](node2-5) at ([yshift=1.8em]node1-5.north){\footnotesize{LSTM}};
\node [anchor=south,model](node2-6) at ([yshift=1.8em]node1-6.north){\footnotesize{LSTM}};
\node [anchor=west,scale=1.8](node2-7) at ([xshift=1.0em]node2-6.east){...};
\node [anchor=south,model](node2-8) at ([yshift=1.8em]node1-8.north){\footnotesize{LSTM}};
\draw [->,thick](node1-1.east)--(node1-2.west);
\draw [->,thick](node1-2.east)--([xshift=0.5em]node1-3.west);
\draw [->,thick]([xshift=-0.5em]node1-3.east)--(node1-4.west);
\draw [<-,thick](node1-5.east)--(node1-6.west);
\draw [<-,thick](node1-6.east)--([xshift=0.5em]node1-7.west);
\draw [<-,thick]([xshift=-0.5em]node1-7.east)--(node1-8.west);
\draw [->,thick](node1-1.north)--(node2-1.south);
\draw [->,thick](node1-2.north)--(node2-2.south);
\draw [->,thick](node1-4.north)--(node2-4.south);
\draw [->,thick](node1-5.north)--(node2-5.south);
\draw [->,thick](node1-6.north)--(node2-6.south);
\draw [->,thick](node1-8.north)--(node2-8.south);
\draw [->,thick](node2-1.east)--(node2-2.west);
\draw [->,thick](node2-2.east)--([xshift=0.5em]node2-3.west);
\draw [->,thick]([xshift=-0.5em]node2-3.east)--(node2-4.west);
\draw [<-,thick](node2-5.east)--(node2-6.west);
\draw [<-,thick](node2-6.east)--([xshift=0.5em]node2-7.west);
\draw [<-,thick]([xshift=-0.5em]node2-7.east)--(node2-8.west);
\begin{pgfonlayer}{background}
{
\node[fill=white,inner sep=0.5em,draw=black,line width=0.6pt,minimum width=6.0em,rounded corners=2pt,dashed] [fit =(node1-1)(node1-2)(node1-3)(node1-4)(node2-1)] (remark1) {};
}
\end{pgfonlayer}
\begin{pgfonlayer}{background}
{
\node[fill=white,inner sep=0.5em,draw=black,line width=0.6pt,minimum width=6.0em,rounded corners=2pt,dashed] [fit =(node1-5)(node1-6)(node1-7)(node1-8)(node2-8)] (remark1) {};
}
\end{pgfonlayer}
\node [anchor=north,embedding] (node0-2) at ([yshift=-2em]node1-4.south){\footnotesize{$\mathbi{e}_2$}};
\node [anchor=east,embedding] (node0-1) at ([xshift=-1.4em]node0-2.west){\footnotesize{$\mathbi{e}_1$}};
\node [anchor=north,scale=1.8] (node0-3) at ([yshift=-2em]node1-5.south){...};
\node [anchor=north,embedding] (node0-4) at ([yshift=-2em]node1-6.south){\footnotesize{$\mathbi{e}_n$}};
\draw [->,thick](node0-1.north)--(node1-1.south);
\draw [->,thick](node0-1.north)--(node1-5.south);
\draw [->,thick](node0-2.north)--(node1-2.south);
\draw [->,thick](node0-2.north)--(node1-6.south);
\draw [->,thick](node0-4.north)--(node1-4.south);
\draw [->,thick](node0-4.north)--(node1-8.south);
\node [anchor=south,embedding,fill=yellow!20](node3-2) at ([yshift=2em]node2-4.north){\footnotesize{$\seq{P}_2$}};
\node [anchor=east,embedding,fill=yellow!20] (node3-1) at ([xshift=-1.4em]node3-2.west){\footnotesize{$\seq{P}_1$}};
\node [anchor=south,scale=1.8] (node3-3) at ([yshift=2em]node2-5.north){...};
\node [anchor=south,embedding,fill=yellow!20](node3-4) at ([yshift=2em]node2-6.north){\footnotesize{$\seq{P}_n$}};
\draw [<-,thick](node3-1.south)--(node2-1.north);
\draw [<-,thick](node3-1.south)--(node2-5.north);
\draw [<-,thick](node3-2.south)--(node2-2.north);
\draw [<-,thick](node3-2.south)--(node2-6.north);
\draw [<-,thick](node3-4.south)--(node2-4.north);
\draw [<-,thick](node3-4.south)--(node2-8.north);
\end{tikzpicture}
\begin{tikzpicture}
\tikzstyle{cir} = [draw,inner sep=2pt,line width=1pt,align=center,minimum height=2em,minimum width=2em,circle,fill=white]
\tikzstyle{add} = [draw,inner sep=2pt,line width=1pt,align=center,minimum height=1em,minimum width=1em,fill=white]
\tikzstyle{minicir} = [draw,inner sep=2pt,line width=1pt,align=center,minimum height=1em,minimum width=1em,fill=white,circle]
\tikzstyle{rec} = [draw,inner sep=2pt,line width=1pt,align=center,minimum height=1.5em,minimum width=2.5em,fill=white]
\tikzstyle{dia} = [draw,inner sep=2pt,line width=1pt,align=center,fill=white,diamond,minimum height=2em,minimum width=2em]
\node [cir,anchor=north,dashed] (a0) at (0,0) {\tiny{$y_{t-1}$}};
\node [cir,anchor=west] (a1) at ([xshift=4.0em]a0.east) {\tiny{$y_t$}};
\node [add,anchor=north] (a11) at ([yshift=-1em]a1.south) {\tiny{$+$}};
\node [minicir,anchor=north] (a12) at ([yshift=-1em]a11.south) {\tiny{$\times$}};
\node [minicir,anchor=west] (a11-1) at ([xshift=0.8em]a12.east) {\tiny{$\beta$}};
\node [rec,anchor=north] (a13) at ([yshift=-1.0em]a12.south) {\tiny{${\funp{P}}_{t}^{LM}$}};
\node [rec,anchor=north] (a14) at ([yshift=-2.0em]a13.south) {\tiny{${\funp{P}}_{t}^{TM}$}};
\node [dia,anchor=north] (a15) at ([yshift=-1em]a14.south) {\tiny{$\funp{C}_{t}$}};
\node [anchor=west] (a13-2) at ([xshift=-4em]a13.west) {\tiny{$\cdots$}};
\node [anchor=west] (a14-2) at ([xshift=-4em]a14.west) {\tiny{$\cdots$}};
\node [anchor=west] (a15-2) at ([xshift=-4.25em]a15.west) {\tiny{$\cdots$}};
\node [anchor=east] (a13-3) at ([yshift=0.8em]a13-2.west) {\small{模型语言}};
\node [anchor=north] (a13-4) at ([xshift=0em]a13-3.south) {\small{隐藏层}};
\node [anchor=east] (a14-3) at ([yshift=0.8em]a14-2.west) {\small{神经机器翻译}};
\node [anchor=north] (a14-4) at ([xshift=0.5em]a14-3.south) {\small{模型隐藏层}};
\node [anchor=east] (a15-3) at ([xshift=0em]a15-2.west) {\small{上下文向量}};
\draw[->,thick](a11.north) -- (a1.south);
\draw[->,thick](a12.north) -- (a11.south);
\draw[->,thick](a13.north) -- (a12.south);
\draw[->,thick](a11-1.west) -- (a12.east);
\draw[->,dashed](a0.south) -- (a13.north west);
\draw[->,dashed](a0.south) -- (a14.north west);
\draw[->,thick](a15.north) -- (a14.south);
\draw[->,dashed]([xshift=-2.0em]a13.west) -- (a13.west);
\draw[->,dashed]([xshift=-2.0em]a14.west) -- (a14.west);
\draw [->,thick] (a14.east) ..controls + (east:1em) and +(east:4.1em).. (a11.east);
\draw[->,dashed](a1.south east) -- ([xshift=6.0em,yshift=-4.0em]a1.south);
\draw[->,dashed](a1.south east) -- ([xshift=6.0em,yshift=-7.5em]a1.south);
\draw[-]([xshift=5.9em,yshift=1.05em]a1.east) -- ([xshift=5.9em,yshift=-14.7em]a1.east);
%%%%%%%%%%%%%%%%%%%%%%
\node [cir,anchor=west] (a2) at ([xshift=10.0em]a1.east) {\tiny{$y_{t}$}};
\node [add,anchor=north] (a21) at ([yshift=-1em]a2.south) {\tiny{$+$}};
\node [minicir,anchor=north] (a22) at ([yshift=-1em]a21.south) {\tiny{$\times$}};
\node [minicir,anchor=west] (a21-1) at ([xshift=0.8em]a22.east) {\tiny{$g_{t}$}};
\node [cir,anchor=north] (a23) at ([yshift=-0.6125em]a22.south) {\tiny{${\funp{P}}_{t}^{LM}$}};
\node [cir,anchor=north] (a24) at ([yshift=-1.217em]a23.south) {\tiny{${\funp{P}}_{t}^{TM}$}};
\node [dia,anchor=north] (a25) at ([yshift=-0.6044em]a24.south) {\tiny{$\funp{C}_{t}$}};
\node [anchor=west] (a23-2) at ([xshift=-3.5em]a23.west) {\tiny{$\cdots$}};
\node [anchor=west] (a24-2) at ([xshift=-3.5em]a24.west) {\tiny{$\cdots$}};
\node [anchor=west] (a25-2) at ([xshift=-3.65em]a25.west) {\tiny{$\cdots$}};
\draw[->,thick](a21.north) -- (a2.south);
\draw[->,thick](a22.north) -- (a21.south);
\draw[->,thick](a23.north) -- (a22.south);
\draw[->,thick](a21-1.west) -- (a22.east);
\draw[->,thick](a25.north) -- (a24.south);
\draw [->,thick] (a24.east) ..controls + (east:1em) and +(east:4.2em).. (a21.east);
\draw [->,thick] (a25.west) ..controls + (west:1em) and +(west:2em).. (a21.west);
\draw[->,dashed]([xshift=-1.5em]a23.west) -- (a23.west);
\draw[->,dashed]([xshift=-1.5em]a24.west) -- (a24.west);
\node [cir,anchor=west] (a3) at ([xshift=4.0em]a2.east) {\tiny{$y_{t+1}$}};
\node [add,anchor=north] (a31) at ([yshift=-1em]a3.south) {\tiny{$+$}};
\node [minicir,anchor=north] (a32) at ([yshift=-1em]a31.south) {\tiny{$\times$}};
\node [minicir,anchor=west] (a31-1) at ([xshift=0.8em]a32.east) {\tiny{$g_{t}$}};
\node [cir,anchor=north] (a33) at ([yshift=-0.6125em]a32.south) {\tiny{${\funp{P}}_{t}^{LM}$}};
\node [cir,anchor=north] (a34) at ([yshift=-1.217em]a33.south) {\tiny{${\funp{P}}_{t}^{TM}$}};
\node [dia,anchor=north] (a35) at ([yshift=-0.6044em]a34.south) {\tiny{$\funp{C}_{t}$}};
\draw[->,thick](a31.north) -- (a3.south);
\draw[->,thick](a32.north) -- (a31.south);
\draw[->,thick](a33.north) -- (a32.south);
\draw[->,thick](a31-1.west) -- (a32.east);
\draw[->,thick](a35.north) -- (a34.south);
\draw[->,dashed](a23.east) -- (a33.west);
\draw[->,dashed](a24.east) -- (a34.west);
\draw [->,thick] (a34.east) ..controls + (east:1em) and +(east:4.2em).. (a31.east);
\draw [->,thick] (a35.west) ..controls + (west:1em) and +(west:2em).. (a31.west);
\draw[->,dashed](a33.east) -- ([xshift=2em]a33.east);
\draw[->,dashed](a34.east) -- ([xshift=2em]a34.east);
\draw[->,dashed](a3.south east) -- ([xshift=6.0em,yshift=-4.0em]a3.south);
\draw[->,dashed](a3.south east) -- ([xshift=6.0em,yshift=-7.5em]a3.south);
\node[anchor=north](pos1) at ([xshift=-1.5em,yshift=-0.5em]a15.south) {(a) 浅融合};
\node[anchor=north](pos2) at ([xshift=-2.0em,yshift=-0.5em]a35.south) {(b) 深融合};
\end{tikzpicture}
......@@ -372,7 +372,7 @@
\begin{itemize}
\vspace{0.5em}
\item 基于枢轴语言的方法,即以资源丰富的语言(通常为英语、汉语等)为中心,在语言对之间进行翻译\upcite{DBLP:conf/emnlp/KimPPKN19}{\color{red} 参考文献:Pivot Language Approach for Phrase-Based Statistical Machine Translation}
\item 基于枢轴语言的方法,即以资源丰富的语言(通常为英语、汉语等)为中心,在语言对之间进行翻译\upcite{DBLP:conf/emnlp/KimPPKN19,DBLP:journals/mt/WuW07}
\vspace{0.5em}
\item 基于知识蒸馏的方法,即用枢轴语言到目标语言的训练指导源语言到目标语言的训练\upcite{DBLP:conf/acl/ChenLCL17}
\vspace{0.5em}
......@@ -388,7 +388,7 @@
\subsection{基于枢轴语言的方法}
\parinterval 传统的多语言翻译中,广泛使用的是{\small\bfnew{基于枢轴语言的翻译}}(Pivot-based Translation)\upcite{DBLP:conf/emnlp/KimPPKN19}{\color{red} 参考文献:Pivot Language Approach for Phrase-Based Statistical Machine Translation}。在这种方法中,会使用一种数据丰富语言作为{\small\bfnew{中介语言}}\index{中介语言}或者{\small\bfnew{枢轴语言}}\index{枢轴语言}(Pivot Language)\index{Pivot Language},之后让源语言和目标语言向枢轴语言进行翻译。这样,通过资源丰富的中介语言将源语言和目标语言桥接在一起,达到解决源语言-目标语言双语数据缺乏的问题。比如,想要得到泰语到波兰语的翻译,可以通过英语做枢轴语言。通过“泰语$\to$英语$\to$波兰语”的翻译过程完成泰语到波兰语的转换。
\parinterval 传统的多语言翻译中,广泛使用的是{\small\bfnew{基于枢轴语言的翻译}}(Pivot-based Translation)\upcite{DBLP:conf/emnlp/KimPPKN19,DBLP:journals/mt/WuW07}。在这种方法中,会使用一种数据丰富语言作为{\small\bfnew{中介语言}}\index{中介语言}或者{\small\bfnew{枢轴语言}}\index{枢轴语言}(Pivot Language)\index{Pivot Language},之后让源语言和目标语言向枢轴语言进行翻译。这样,通过资源丰富的中介语言将源语言和目标语言桥接在一起,达到解决源语言-目标语言双语数据缺乏的问题。比如,想要得到泰语到波兰语的翻译,可以通过英语做枢轴语言。通过“泰语$\to$英语$\to$波兰语”的翻译过程完成泰语到波兰语的转换。
\parinterval 基于枢轴语的方法很早就出现在基于统计机器翻译中。在基于短语的机器翻译中,已经有很多方法建立了源到枢轴和枢轴到目标的短语/单词级别特征,并基于这些特征开发了源语言到目标语言的系统\upcite{DBLP:conf/naacl/UtiyamaI07,DBLP:journals/mt/WuW07,DBLP:conf/acl/ZahabiBK13,DBLP:conf/emnlp/ZhuHWZWZ14,DBLP:conf/acl/MiuraNSTN15},这些系统也已经广泛用于翻译稀缺资源语言对\upcite{DBLP:conf/acl/CohnL07,DBLP:journals/mt/WuW07,DBLP:conf/acl/WuW09,de2006catalan}。由于基于枢轴语的方法与模型结构无关,因此该方法也快速适用于神经机器翻译,并且取得了不错的效果\upcite{DBLP:conf/emnlp/KimPPKN19,DBLP:journals/corr/ChengLYSX16}
......
Markdown 格式
0%
您添加了 0 到此讨论。请谨慎行事。
请先完成此评论的编辑!
注册 或者 后发表评论