Commit 8dd2623a by 曹润柘

update chapter 6

parent 6ad7094d
%%% outline
%-------------------------------------------------------------------------
\newlength{\bc}
\setlength{\bc}{0.5cm}
\begin{tikzpicture}
\begin{scope}
\filldraw [fill=white,drop shadow] (0,0) rectangle (\bc*5,\bc*5);
\filldraw [fill=black] (\bc*2,0) rectangle (\bc*3,\bc*1);
\filldraw [fill=black] (\bc*1,\bc*1) rectangle (\bc*2,\bc*2);
\filldraw [fill=black] (\bc*4,\bc*2) rectangle (\bc*5,\bc*3);
\filldraw [fill=black] (\bc*3,\bc*3) rectangle (\bc*4,\bc*4);
\filldraw [fill=black] (0,\bc*4) rectangle (\bc*1,\bc*5);
\draw [step=\bc, very thin, color=gray] (0,0) grid (\bc*5,\bc*5);
\draw [line width=1pt] (0,0) -- (0,\bc*5)--(\bc*5,\bc*5) -- (\bc*5,0) -- (0,0);
\coordinate (o) at (0,\bc*5);
\node[anchor=east,inner sep=0pt,font=\footnotesize] at([xshift=-0.4em,yshift=-0.25cm]o.west){};
\node[anchor=east,inner sep=0pt,font=\footnotesize] at([xshift=-0.4em,yshift=-0.25cm-\bc]o.west){};
\node[anchor=east,inner sep=0pt,font=\footnotesize] at([xshift=-0.4em,yshift=-0.25cm-\bc*2]o.west){};
\node[anchor=east,inner sep=0pt,font=\footnotesize] at([xshift=-0.4em,yshift=-0.25cm-\bc*3]o.west){感到};
\node[anchor=east,inner sep=0pt,font=\footnotesize] at([xshift=-0.4em,yshift=-0.25cm-\bc*4]o.west){满意};
\node[anchor=west,inner sep=0pt,font=\footnotesize,rotate=45] at([xshift=0.2cm,yshift=0.4em]o.east){I};
\node[anchor=west,inner sep=0pt,font=\footnotesize,rotate=45] at([xshift=0.1cm+\bc,yshift=0.4em]o.east){am};
\node[anchor=west,inner sep=0pt,font=\footnotesize,rotate=45] at([xshift=0.1cm+\bc*2,yshift=0.4em]o.east){satisfied};
\node[anchor=west,inner sep=0pt,font=\footnotesize,rotate=45] at([xshift=0.1cm+\bc*3,yshift=0.4em]o.east){with};
\node[anchor=west,inner sep=0pt,font=\footnotesize,rotate=45] at([xshift=0.1cm+\bc*4,yshift=0.4em]o.east){you};
\node[anchor=east,inner sep=0pt,font=\footnotesize] at([xshift=\bc*3,yshift=-1.0cm-\bc*4]o.west){(a)};
\end{scope}
\begin{scope}[xshift=15.0em]
\filldraw [fill=white,drop shadow] (0,0) rectangle (\bc*8,\bc*6);
\filldraw [fill=black] (0,\bc*5) rectangle (\bc*1,\bc*6);
\filldraw [fill=black] (\bc*1,\bc*3) rectangle (\bc*2,\bc*4);
\filldraw [fill=black] (\bc*2,\bc*2) rectangle (\bc*3,\bc*3);
\filldraw [fill=black] (\bc*3,\bc*2) rectangle (\bc*4,\bc*3);
\filldraw [fill=black] (\bc*4,\bc*1) rectangle (\bc*5,\bc*2);
\filldraw [fill=black] (\bc*5,0) rectangle (\bc*6,\bc*1);
\filldraw [fill=black] (\bc*6,\bc*4) rectangle (\bc*7,\bc*5);
\filldraw [fill=black] (\bc*7,\bc*4) rectangle (\bc*8,\bc*5);
\draw [step=\bc, very thin, color=gray] (0,0) grid (\bc*8,\bc*6);
\draw [line width=1pt] (0,0) -- (0,\bc*6)--(\bc*8,\bc*6) -- (\bc*8,0) -- (0,0);
\coordinate (o) at (0,\bc*6);
\node[anchor=east,inner sep=0pt,font=\footnotesize] at([xshift=-0.4em,yshift=-0.25cm]o.west){};
\node[anchor=east,inner sep=0pt,font=\footnotesize] at([xshift=-0.4em,yshift=-0.25cm-\bc]o.west){每天};
\node[anchor=east,inner sep=0pt,font=\footnotesize] at([xshift=-0.4em,yshift=-0.25cm-\bc*2]o.west){乘坐};
\node[anchor=east,inner sep=0pt,font=\footnotesize] at([xshift=-0.4em,yshift=-0.25cm-\bc*3]o.west){地铁};
\node[anchor=east,inner sep=0pt,font=\footnotesize] at([xshift=-0.4em,yshift=-0.25cm-\bc*4]o.west){};
\node[anchor=east,inner sep=0pt,font=\footnotesize] at([xshift=-0.4em,yshift=-0.25cm-\bc*5]o.west){上班};
\node[anchor=west,inner sep=0pt,font=\footnotesize,rotate=45] at([xshift=0.1cm,yshift=0.4em]o.east){He};
\node[anchor=west,inner sep=0pt,font=\footnotesize,rotate=45] at([xshift=0.1cm+\bc,yshift=0.4em]o.east){takes};
\node[anchor=west,inner sep=0pt,font=\footnotesize,rotate=45] at([xshift=0.1cm+\bc*2,yshift=0.4em]o.east){the};
\node[anchor=west,inner sep=0pt,font=\footnotesize,rotate=45] at([xshift=0.1cm+\bc*3,yshift=0.4em]o.east){subway};
\node[anchor=west,inner sep=0pt,font=\footnotesize,rotate=45] at([xshift=0.1cm+\bc*4,yshift=0.4em]o.east){to};
\node[anchor=west,inner sep=0pt,font=\footnotesize,rotate=45] at([xshift=0.1cm+\bc*5,yshift=0.4em]o.east){work};
\node[anchor=west,inner sep=0pt,font=\footnotesize,rotate=45] at([xshift=0.1cm+\bc*6,yshift=0.4em]o.east){every};
\node[anchor=west,inner sep=0pt,font=\footnotesize,rotate=45] at([xshift=0.1cm+\bc*7,yshift=0.4em]o.east){day};
\node[anchor=east,inner sep=0pt,font=\footnotesize] at([xshift=\bc*4.5,yshift=-1.0cm-\bc*5]o.west){(b)};
\end{scope}
\end{tikzpicture}
%---------------------------------------------------------------------
\ No newline at end of file
%%% outline
%-------------------------------------------------------------------------
\begin{tikzpicture}
\begin{scope}
\tikzstyle{cand} = [draw,inner sep=4pt,line width=1pt,align=center,drop shadow,minimum height =1.6em,minimum width=4.2em,fill=green!30]
\tikzstyle{ref} = [draw,inner sep=4pt,line width=1pt,align=center,drop shadow,minimum height =1.6em,minimum width=4.2em,fill=red!30]
\node[align=center,minimum width=2.4em,minimum height=1.6em,minimum width=6em] (n11) at (0,0){源语};
\node[cand,anchor=west] (n12) at ([xshift=0.0em]n11.east){};
\node[cand,anchor=west] (n13) at ([xshift=1em]n12.east){};
\node[cand,anchor=west] (n14) at ([xshift=1em]n13.east){};
\node[cand,anchor=west] (n15) at ([xshift=1em]n14.east){感到};
\node[cand,anchor=west] (n16) at ([xshift=1em]n15.east){满意};
\node[align=center,minimum width=2.4em,minimum height=1.6em,anchor=north,minimum width=6em] (n21) at ([yshift=-4em]n11.south){顺序翻译};
\node[ref,anchor=west] (n22) at ([xshift=0.0em]n21.east){I};
\node[ref,anchor=west] (n23) at ([xshift=1em]n22.east){with};
\node[ref,anchor=west] (n24) at ([xshift=1em]n23.east){you};
\node[ref,anchor=west] (n25) at ([xshift=1em]n24.east){am};
\node[ref,anchor=west] (n26) at ([xshift=1em]n25.east){satisfied};
\draw[line width=1.2pt,dashed] ([yshift=-0.3em]n12.south) -- ([yshift=0.2em]n22.north);
\draw[line width=1.2pt,dashed] ([yshift=-0.3em]n13.south) -- ([yshift=0.2em]n23.north);
\draw[line width=1.2pt,dashed] ([yshift=-0.3em]n14.south) -- ([yshift=0.2em]n24.north);
\draw[line width=1.2pt,dashed] ([yshift=-0.3em]n15.south) -- ([yshift=0.2em]n25.north);
\draw[line width=1.2pt,dashed] ([yshift=-0.3em]n16.south) -- ([yshift=0.2em]n26.north);
\end{scope}
\begin{scope}[yshift=-10.0em]
\tikzstyle{cand} = [draw,inner sep=4pt,line width=1pt,align=center,drop shadow,minimum height =1.6em,minimum width=4.2em,fill=green!30]
\tikzstyle{ref} = [draw,inner sep=4pt,line width=1pt,align=center,drop shadow,minimum height =1.6em,minimum width=4.2em,fill=red!30]
\node[align=center,minimum width=2.4em,minimum height=1.6em,minimum width=6em] (n11) at (0,0){源语};
\node[cand,anchor=west] (n12) at ([xshift=0.0em]n11.east){};
\node[cand,anchor=west] (n13) at ([xshift=1em]n12.east){};
\node[cand,anchor=west] (n14) at ([xshift=1em]n13.east){};
\node[cand,anchor=west] (n15) at ([xshift=1em]n14.east){感到};
\node[cand,anchor=west] (n16) at ([xshift=1em]n15.east){满意};
\node[align=center,minimum width=2.4em,minimum height=1.6em,anchor=north,minimum width=6em] (n21) at ([yshift=-4em]n11.south){调序翻译};
\node[ref,anchor=west] (n22) at ([xshift=0.0em]n21.east){I};
\node[ref,anchor=west] (n23) at ([xshift=1em]n22.east){am};
\node[ref,anchor=west] (n24) at ([xshift=1em]n23.east){satisfied};
\node[ref,anchor=west] (n25) at ([xshift=1em]n24.east){with};
\node[ref,anchor=west] (n26) at ([xshift=1em]n25.east){you};
\draw[line width=1.2pt,dashed] ([yshift=-0.3em]n12.south) -- ([yshift=0.2em]n22.north);
\draw[line width=1.2pt,dashed,out=-40,in=140] ([yshift=-0.3em]n13.south) to ([yshift=0.2em]n25.north);
\draw[line width=1.2pt,dashed,out=-40,in=140] ([yshift=-0.3em]n14.south) to ([yshift=0.2em]n26.north);
\draw[line width=1.2pt,dashed,out=-140,in=40] ([yshift=-0.3em]n15.south) to ([yshift=0.2em]n23.north);
\draw[line width=1.2pt,dashed,out=-140,in=40] ([yshift=-0.3em]n16.south) to ([yshift=0.2em]n24.north);
\end{scope}
\end{tikzpicture}
%---------------------------------------------------------------------
\ No newline at end of file
......@@ -41,7 +41,30 @@
%----------------------------------------------------------------------------------------
\subsection{什么是扭曲度}
可以考虑先用例子描述一下翻译中的调序,之后说单词调序对应着``调序距离'',这种距离实际上可以被看做是一种扭曲度。。。
\parinterval {\small\sffamily\bfseries{调序}}(Reordering)是自然语言翻译中特有的语言现象。造成这个现象的主要原因在于不同语言之间语序的差异,比如,汉语是“主谓宾”结构,而日语是“主宾谓”结构。即使在句子整体结构相似的语言上进行翻译,调序也是频繁出现的现象。如图\ref{fig:6-1}所示,当一个主动语态的汉语句子翻译为一个被动语态的英语句子时,如果直接顺序翻译,那么翻译结果“I with you am satisfied”很明显不符合英语语法。这里就需要采取一些方法和手段在翻译过程中对词或短语进行调序,从而得到正确的翻译结果。
%----------------------------------------------
\begin{figure}[htp]
\centering
\input{./Chapter6/Figures/figure-examples-of-sequential-translation-and-reorder-translation}
\caption{顺序翻译和调序翻译的实例对比}
\label{fig:6-1}
\end{figure}
%----------------------------------------------
\parinterval 既然调序时翻译中的基本现象,机器翻译自然就需要一种方式对其进行描述。其中,最常见的是基于“调序距离”的方法。这里,可以假设完全进行顺序翻译时,调序的“代价”是最低的。当调序出现时,可以用调序相对于顺序翻译产生的位置偏移来度量调序的程度,也被称为“调序距离”。图\ref{fig:6-2}展示了翻译时两种语言中词的对齐矩阵。比如,在图\ref{fig:6-2}(a)中,系统需要跳过“对”和“你”来翻译“感到”和“满意”,之后再回过头翻译“对”和“你”,这样就完成了对单词的调序。这时可以简单的把调序时需要跳过的单词数看作一种距离。
\parinterval 可以看到,调序距离实际上是在度量译文词序相对于源文词序的一种扭曲程度。因此,也常常把这种调序距离称作{\small\sffamily\bfseries{扭曲度}}(Distortion)。调序距离越大对应的扭曲度也越大。比如,可以明显看出图\ref{fig:6-2}(b)中调序的扭曲度要比图\ref{fig:6-2}(a)中调序的扭曲度大,因此\ref{fig:6-2}(b)实例的调序代价也更大。
\parinterval 在机器翻译中使用扭曲度进行翻译建模是一种十分自然的想法。接下来,会介绍两个基于扭曲度的翻译模型,分别是IBM模型2和隐马尔可夫模型。不同于IBM模型1,它们利用了单词的位置信息定义了扭曲度,并将扭曲度融入翻译模型中,使得对翻译问题的建模更加合理。
%----------------------------------------------
\begin{figure}[htp]
\centering
\input{./Chapter6/Figures/figure-alignment-matrix-for-zh-to-en-translation}
\caption{不同的译文导致不同IBM模型1得分的情况}
\label{fig:6-2}
\end{figure}
%----------------------------------------------
%----------------------------------------------------------------------------------------
% NEW SUB-SECTION
......
Markdown 格式
0%
您添加了 0 到此讨论。请谨慎行事。
请先完成此评论的编辑!
注册 或者 后发表评论