Commit e1d9c36f by 单韦乔

更新第四章参考文献,cky,以及部分图片名称

parent 285843eb
%------------------------------------------------------------------------------------------------------------
%%% CKY解码
% 看NiuTrans Manual
\begin{center}
\begin{tikzpicture}
\tikzstyle{alignmentnode} = [rectangle,fill=blue!30,minimum size=0.45em,text=white,inner sep=0.1pt]
\tikzstyle{selectnode} = [rectangle,fill=green!20,minimum height=1.5em,minimum width=1.5em,inner sep=1.2pt]
\tikzstyle{srcnode} = [anchor=south west]
\begin{scope}[scale=0.85]
\node[srcnode] (c1) at (0,0) {\normalsize{\textbf{Function} CKY-Algorithm($\textbf{s},G$)}};
\node[srcnode,anchor=north west] (c21) at ([xshift=1.5em,yshift=0.4em]c1.south west) {\normalsize{\textbf{fore} $j=0$ to $ J - 1$}};
\node[srcnode,anchor=north west] (c22) at ([xshift=1.5em,yshift=0.4em]c21.south west) {\normalsize{$span[j,j+1 ]$.Add($A \to a \in G$)}};
\node[srcnode,anchor=north west] (c3) at ([xshift=-1.5em,yshift=0.4em]c22.south west) {\normalsize{\textbf{for} $l$ = 1 to $J$}};
\node[srcnode,anchor=west] (c31) at ([xshift=6em]c3.east) {\normalsize{// length of span}};
\node[srcnode,anchor=north west] (c4) at ([xshift=1.5em,yshift=0.4em]c3.south west) {\normalsize{\textbf{for} $j$ = 0 to $J-l$}};
\node[srcnode,anchor=north west] (c41) at ([yshift=0.4em]c31.south west) {\normalsize{// beginning of span}};
\node[srcnode,anchor=north west] (c5) at ([xshift=1.5em,yshift=0.4em]c4.south west) {\normalsize{\textbf{for} $k$ = $j$ to $j+l$}};
\node[srcnode,anchor=north west] (c51) at ([yshift=0.4em]c41.south west) {\normalsize{// partition of span}};
\node[srcnode,anchor=north west] (c6) at ([xshift=1.5em,yshift=0.4em]c5.south west) {\normalsize{$hypos$ = Compose($span[j, k], span[k, j+l]$)}};
\node[srcnode,anchor=north west] (c7) at ([yshift=0.4em]c6.south west) {\normalsize{$span[j, j+l]$.Update($hypos$)}};
\node[srcnode,anchor=north west] (c8) at ([xshift=-4.5em,yshift=0.4em]c7.south west) {\normalsize{\textbf{return} $span[0, J]$}};
\node[anchor=west] (c9) at ([xshift=-3.2em,yshift=1.7em]c1.west) {\small{\textrm{参数:}\textbf{s}为输入字符串。$G$为输入CFG。$J$为待分析字符串长度。}};
\node[anchor=west] (c10) at ([xshift=0em,yshift=1.3em]c9.west) {\small{\textrm{输出:字符串全部可能的语法分析结果}}};
\node[anchor=west] (c11) at ([xshift=0em,yshift=1.3em]c10.west) {\small{\textrm{输入:符合乔姆斯基范式的待分析字符串和一个上下文无关文法(CFG)}}};
\begin{pgfonlayer}{background}
\node [rectangle,inner sep=0.2em,rounded corners=1pt,fill=blue!10!white] [fit = (c1) (c21) (c3) (c6) (c7) (c8) (c11)] (gl1) {};
\end{pgfonlayer}
\end{scope}
\end{tikzpicture}
\end{center}
%------------------------------------------------------------------------------------------------------------
%%% 基于树的解码方法 - chart-based decoding
\begin{center}
\begin{tikzpicture}\footnotesize
\begin{scope}[scale=0.2]
\node[anchor=south east] (g1) at (0,0) {\small{$\textrm{S} \to \textrm{AB}\ \ \ \textrm{A} \to \textrm{CD}\ \vert \ \textrm{CF}\ \ \ \textrm{B} \to \textrm{c}\ \vert \ \textrm{BE}$}};
\node[anchor=north west] (g2) at ([yshift=0.3em]g1.south west) {\small{$\textrm{C} \to \textrm{a}\ \ \ \ \textrm{D} \to \textrm{b}\ \ \ \ \textrm{E} \to \textrm{c}\ \ \ \ \textrm{F} \to \textrm{AD}$}};
\begin{pgfonlayer}{background}
\node [rectangle,inner sep=0.1em,rounded corners=1pt,fill=green!10,drop shadow,draw=ugreen] [fit = (g1) (g2)] (gl1) {};
\end{pgfonlayer}
\end{scope}
\end{tikzpicture}
\end{center}
%------------------------------------------------------------------------------------------------------------
%%% CYK解码
\begin{tikzpicture}\scriptsize
\tikzstyle{alignmentnode} = [rectangle,fill=blue!20,minimum size=0.5em,text=white,inner sep=0.1pt]
\tikzstyle{selectnode} = [rectangle,fill=green!20,minimum height=1.5em,minimum width=1.5em,inner sep=1.2pt]
\tikzstyle{srcnode} = [anchor=south west]
\tikzstyle{chartnode}=[rectangle,minimum size=1.3em,draw]
%图1
\begin{scope}[scale=0.5]
\node [anchor=east] (s1) at (0,0) {a};
\node [anchor=north west] (s2) at ([yshift=-2.5em]s1.south west) {a};
\node [anchor=north west] (s3) at ([yshift=-2.4em]s2.south west) {b};
\node [anchor=north west] (s4) at ([yshift=-2.3em]s3.south west) {b};
\node [anchor=north west] (s5) at ([yshift=-2.2em]s4.south west) {c};
\node [alignmentnode,anchor=west] (cell11) at ([xshift=1.9em]s1.east) {};
\node [alignmentnode,anchor=west] (cell21) at ([xshift=1.9em]s2.east) {};
\node [alignmentnode,anchor=west] (cell22) at ([xshift=3.5em]cell21.east) {};
\node [alignmentnode,anchor=west] (cell31) at ([xshift=2em]s3.east) {};
\node [alignmentnode,anchor=west] (cell32) at ([xshift=3.5em]cell31.east) {};
\node [alignmentnode,anchor=west] (cell33) at ([xshift=3.5em]cell32.east) {};
\node [alignmentnode,anchor=west] (cell41) at ([xshift=1.9em]s4.east) {};
\node [alignmentnode,anchor=west] (cell42) at ([xshift=3.5em]cell41.east) {};
\node [alignmentnode,anchor=west] (cell43) at ([xshift=3.5em]cell42.east) {};
\node [alignmentnode,anchor=west] (cell44) at ([xshift=3.5em]cell43.east) {};
\node [alignmentnode,anchor=west] (cell51) at ([xshift=1.9em]s5.east) {};
\node [alignmentnode,anchor=west] (cell52) at ([xshift=3.5em]cell51.east) {};
\node [alignmentnode,anchor=west] (cell53) at ([xshift=3.5em]cell52.east) {};
\node [alignmentnode,anchor=west] (cell54) at ([xshift=3.5em]cell53.east) {};
\node [alignmentnode,anchor=west] (cell55) at ([xshift=3.5em]cell54.east) {};
\node [anchor=north] (l1) at ([yshift=-1em]cell51.south) {\tiny{$l$=1}};
\node [anchor=north] (l2) at ([yshift=-1em]cell52.south) {\tiny{$l$=2}};
\node [anchor=north] (l3) at ([yshift=-1em]cell53.south) {\tiny{$l$=3}};
\node [anchor=north] (l4) at ([yshift=-1em]cell54.south) {\tiny{$l$=4}};
\node [anchor=north] (l5) at ([yshift=-1em]cell55.south) {\tiny{$l$=5}};
\node [anchor=north] (caption1) at ([xshift=0.0em,yshift=0.0em]l5.south) {(a)};
\node [anchor=center] (y1) at ([xshift=-2.1em,yshift=2em]cell11.center) {\tiny{\blue 0}};
\node [anchor=center] (y2) at ([xshift=-2.1em,yshift=2em]cell21.center) {\tiny{\blue 1}};
\node [anchor=center] (y3) at ([xshift=-2.1em,yshift=2em]cell31.center) {\tiny{\blue 2}};
\node [anchor=center] (y4) at ([xshift=-2.1em,yshift=2em]cell41.center) {\tiny{\blue 3}};
\node [anchor=center] (y5) at ([xshift=-2.1em,yshift=2em]cell51.center) {\tiny{\blue 4}};
\node [anchor=center] (y6) at ([xshift=-2.1em,yshift=-2em]cell51.center) {\tiny{\blue 5}};
\node [anchor=west] (num) at ([xshift=22.3em,yshift=2em]s1.east) {\scriptsize{序号}};
\node [anchor=west] (kua) at ([xshift=0em]num.east) {\scriptsize{跨度}};
\node [anchor=west] (tui) at ([xshift=0.4em]kua.east) {\scriptsize{推导}};
\draw[-] ([yshift=-0.1em]num.south west)--([xshift=17em,yshift=-0.1em]num.south west);
{
\node [anchor=west] (n1) at ([xshift=1.4em,yshift=-1.5em]num.south west) {\scriptsize{1}};
\node [anchor=west] (k1) at ([yshift=-1.8em]kua.south west) {\scriptsize{[{\blue 0},{\blue 1}]}};
\node [anchor=west] (t1) at ([xshift=-0.8em,yshift=-1.5em]tui.south west) {\scriptsize{C $\to$ a}};
\node [anchor=center,selectnode,fill=red!20] (alig11) at (cell11.center) {\tiny{C}};
}
\end{scope}
%图2
\begin{scope}[xshift=22.5em,scale=0.5]
\node [anchor=east] (s1) at (0,0) {a};
\node [anchor=north west] (s2) at ([yshift=-2.5em]s1.south west) {a};
\node [anchor=north west] (s3) at ([yshift=-2.4em]s2.south west) {b};
\node [anchor=north west] (s4) at ([yshift=-2.3em]s3.south west) {b};
\node [anchor=north west] (s5) at ([yshift=-2.2em]s4.south west) {c};
\node [alignmentnode,anchor=west] (cell11) at ([xshift=1.9em]s1.east) {};
\node [alignmentnode,anchor=west] (cell21) at ([xshift=1.9em]s2.east) {};
\node [alignmentnode,anchor=west] (cell22) at ([xshift=3.5em]cell21.east) {};
\node [alignmentnode,anchor=west] (cell31) at ([xshift=2em]s3.east) {};
\node [alignmentnode,anchor=west] (cell32) at ([xshift=3.5em]cell31.east) {};
\node [alignmentnode,anchor=west] (cell33) at ([xshift=3.5em]cell32.east) {};
\node [alignmentnode,anchor=west] (cell41) at ([xshift=1.9em]s4.east) {};
\node [alignmentnode,anchor=west] (cell42) at ([xshift=3.5em]cell41.east) {};
\node [alignmentnode,anchor=west] (cell43) at ([xshift=3.5em]cell42.east) {};
\node [alignmentnode,anchor=west] (cell44) at ([xshift=3.5em]cell43.east) {};
\node [alignmentnode,anchor=west] (cell51) at ([xshift=1.9em]s5.east) {};
\node [alignmentnode,anchor=west] (cell52) at ([xshift=3.5em]cell51.east) {};
\node [alignmentnode,anchor=west] (cell53) at ([xshift=3.5em]cell52.east) {};
\node [alignmentnode,anchor=west] (cell54) at ([xshift=3.5em]cell53.east) {};
\node [alignmentnode,anchor=west] (cell55) at ([xshift=3.5em]cell54.east) {};
\node [anchor=north] (l1) at ([yshift=-1em]cell51.south) {\tiny{$l$=1}};
\node [anchor=north] (l2) at ([yshift=-1em]cell52.south) {\tiny{$l$=2}};
\node [anchor=north] (l3) at ([yshift=-1em]cell53.south) {\tiny{$l$=3}};
\node [anchor=north] (l4) at ([yshift=-1em]cell54.south) {\tiny{$l$=4}};
\node [anchor=north] (l5) at ([yshift=-1em]cell55.south) {\tiny{$l$=5}};
\node [anchor=north] (caption2) at ([xshift=0.0em,yshift=0.0em]l5.south) {(b)};
\node [anchor=center] (y1) at ([xshift=-2.1em,yshift=2em]cell11.center) {\tiny{\blue 0}};
\node [anchor=center] (y2) at ([xshift=-2.1em,yshift=2em]cell21.center) {\tiny{\blue 1}};
\node [anchor=center] (y3) at ([xshift=-2.1em,yshift=2em]cell31.center) {\tiny{\blue 2}};
\node [anchor=center] (y4) at ([xshift=-2.1em,yshift=2em]cell41.center) {\tiny{\blue 3}};
\node [anchor=center] (y5) at ([xshift=-2.1em,yshift=2em]cell51.center) {\tiny{\blue 4}};
\node [anchor=center] (y6) at ([xshift=-2.1em,yshift=-2em]cell51.center) {\tiny{\blue 5}};
\node [anchor=west] (num) at ([xshift=22.3em,yshift=2em]s1.east) {\scriptsize{序号}};
\node [anchor=west] (kua) at ([xshift=0em]num.east) {\scriptsize{跨度}};
\node [anchor=west] (tui) at ([xshift=0.4em]kua.east) {\scriptsize{推导}};
\draw[-] ([yshift=-0.1em]num.south west)--([xshift=17em,yshift=-0.1em]num.south west);
{
\node [anchor=west] (n1) at ([xshift=1.4em,yshift=-1.5em]num.south west) {\scriptsize{1}};
\node [anchor=west] (k1) at ([yshift=-1.8em]kua.south west) {\scriptsize{[{\blue 0},{\blue 1}]}};
\node [anchor=west] (t1) at ([xshift=-0.8em,yshift=-1.5em]tui.south west) {\scriptsize{C $\to$ a}};
\node [anchor=center,selectnode,fill=red!20] (alig11) at (cell11.center) {\tiny{C}};
}
{
\node [anchor=center] (n2) at ([yshift=-2.2em]n1.center) {\scriptsize{2}};
\node [anchor=center] (k2) at ([yshift=-2.2em]k1.center) {\scriptsize{[{\blue 1},{\blue 2}]}};
\node [anchor=west] (t2) at ([yshift=-2.2em]t1.west) {\scriptsize{C $\to$ a}};
\node [anchor=center,selectnode,fill=red!20] (alig21) at (cell21.center) {\tiny{C}};
}
{
\node [anchor=center] (n3) at ([yshift=-2.2em]n2.center) {\scriptsize{3}};
\node [anchor=center] (k3) at ([yshift=-2.2em]k2.center) {\scriptsize{[{\blue 2},{\blue 3}]}};
\node [anchor=west] (t3) at ([yshift=-2.2em]t2.west) {\scriptsize{D $\to$ b}};
\node [anchor=center,selectnode,fill=red!20] (alig31) at (cell31.center) {\tiny{D}};
}
{
\node [anchor=center] (n4) at ([yshift=-2.2em]n3.center) {\scriptsize{4}};
\node [anchor=center] (k4) at ([yshift=-2.2em]k3.center) {\scriptsize{[{\blue 3},{\blue 4}]}};
\node [anchor=west] (t4) at ([yshift=-2.2em]t3.west) {\scriptsize{D $\to$ b}};
\node [anchor=center,selectnode,fill=red!20] (alig41) at (cell41.center) {\tiny{D}};
}
{
\node [anchor=center] (n5) at ([yshift=-2.2em]n4.center) {\scriptsize{5}};
\node [anchor=center] (k5) at ([yshift=-2.2em]k4.center) {\scriptsize{[{\blue 4},{\blue 5}]}};
\node [anchor=west] (t5) at ([yshift=-2.2em]t4.west) {\scriptsize{B $\to$ c , }};
\node [anchor=east] (t52) at ([xshift=-1em,yshift=-2em]t5.east) {\scriptsize{E $\to$ c}};
\node [anchor=center,selectnode,fill=red!20] (alig51) at (cell51.center) {\tiny{B,E}};
}
\end{scope}
%图3
\begin{scope}[yshift=-16.0em,scale=0.5]
\node [anchor=east] (s1) at (0,0) {a};
\node [anchor=north west] (s2) at ([yshift=-2.5em]s1.south west) {a};
\node [anchor=north west] (s3) at ([yshift=-2.4em]s2.south west) {b};
\node [anchor=north west] (s4) at ([yshift=-2.3em]s3.south west) {b};
\node [anchor=north west] (s5) at ([yshift=-2.2em]s4.south west) {c};
\node [alignmentnode,anchor=west] (cell11) at ([xshift=1.9em]s1.east) {};
\node [alignmentnode,anchor=west] (cell21) at ([xshift=1.9em]s2.east) {};
\node [alignmentnode,anchor=west] (cell22) at ([xshift=3.5em]cell21.east) {};
\node [alignmentnode,anchor=west] (cell31) at ([xshift=2em]s3.east) {};
\node [alignmentnode,anchor=west] (cell32) at ([xshift=3.5em]cell31.east) {};
\node [alignmentnode,anchor=west] (cell33) at ([xshift=3.5em]cell32.east) {};
\node [alignmentnode,anchor=west] (cell41) at ([xshift=1.9em]s4.east) {};
\node [alignmentnode,anchor=west] (cell42) at ([xshift=3.5em]cell41.east) {};
\node [alignmentnode,anchor=west] (cell43) at ([xshift=3.5em]cell42.east) {};
\node [alignmentnode,anchor=west] (cell44) at ([xshift=3.5em]cell43.east) {};
\node [alignmentnode,anchor=west] (cell51) at ([xshift=1.9em]s5.east) {};
\node [alignmentnode,anchor=west] (cell52) at ([xshift=3.5em]cell51.east) {};
\node [alignmentnode,anchor=west] (cell53) at ([xshift=3.5em]cell52.east) {};
\node [alignmentnode,anchor=west] (cell54) at ([xshift=3.5em]cell53.east) {};
\node [alignmentnode,anchor=west] (cell55) at ([xshift=3.5em]cell54.east) {};
\node [anchor=north] (l1) at ([yshift=-1em]cell51.south) {\tiny{$l$=1}};
\node [anchor=north] (l2) at ([yshift=-1em]cell52.south) {\tiny{$l$=2}};
\node [anchor=north] (l3) at ([yshift=-1em]cell53.south) {\tiny{$l$=3}};
\node [anchor=north] (l4) at ([yshift=-1em]cell54.south) {\tiny{$l$=4}};
\node [anchor=north] (l5) at ([yshift=-1em]cell55.south) {\tiny{$l$=5}};
\node [anchor=north] (caption3) at ([xshift=0.0em,yshift=0.0em]l5.south) {(c)};
\node [anchor=center] (y1) at ([xshift=-2.1em,yshift=2em]cell11.center) {\tiny{\blue 0}};
\node [anchor=center] (y2) at ([xshift=-2.1em,yshift=2em]cell21.center) {\tiny{\blue 1}};
\node [anchor=center] (y3) at ([xshift=-2.1em,yshift=2em]cell31.center) {\tiny{\blue 2}};
\node [anchor=center] (y4) at ([xshift=-2.1em,yshift=2em]cell41.center) {\tiny{\blue 3}};
\node [anchor=center] (y5) at ([xshift=-2.1em,yshift=2em]cell51.center) {\tiny{\blue 4}};
\node [anchor=center] (y6) at ([xshift=-2.1em,yshift=-2em]cell51.center) {\tiny{\blue 5}};
\node [anchor=west] (num) at ([xshift=22.3em,yshift=2em]s1.east) {\scriptsize{序号}};
\node [anchor=west] (kua) at ([xshift=0em]num.east) {\scriptsize{跨度}};
\node [anchor=west] (tui) at ([xshift=0.4em]kua.east) {\scriptsize{推导}};
\draw[-] ([yshift=-0.1em]num.south west)--([xshift=17em,yshift=-0.1em]num.south west);
{
\node [anchor=west] (n1) at ([xshift=1.4em,yshift=-1.5em]num.south west) {\scriptsize{1}};
\node [anchor=west] (k1) at ([yshift=-1.8em]kua.south west) {\scriptsize{[{\blue 0},{\blue 1}]}};
\node [anchor=west] (t1) at ([xshift=-0.8em,yshift=-1.5em]tui.south west) {\scriptsize{C $\to$ a}};
\node [anchor=center,selectnode,fill=red!20] (alig11) at (cell11.center) {\tiny{C}};
}
{
\node [anchor=center] (n2) at ([yshift=-2.2em]n1.center) {\scriptsize{2}};
\node [anchor=center] (k2) at ([yshift=-2.2em]k1.center) {\scriptsize{[{\blue 1},{\blue 2}]}};
\node [anchor=west] (t2) at ([yshift=-2.2em]t1.west) {\scriptsize{C $\to$ a}};
\node [anchor=center,selectnode,fill=red!20] (alig21) at (cell21.center) {\tiny{C}};
}
{
\node [anchor=center] (n3) at ([yshift=-2.2em]n2.center) {\scriptsize{3}};
\node [anchor=center] (k3) at ([yshift=-2.2em]k2.center) {\scriptsize{[{\blue 2},{\blue 3}]}};
\node [anchor=west] (t3) at ([yshift=-2.2em]t2.west) {\scriptsize{D $\to$ b}};
\node [anchor=center,selectnode,fill=red!20] (alig31) at (cell31.center) {\tiny{D}};
}
{
\node [anchor=center] (n4) at ([yshift=-2.2em]n3.center) {\scriptsize{4}};
\node [anchor=center] (k4) at ([yshift=-2.2em]k3.center) {\scriptsize{[{\blue 3},{\blue 4}]}};
\node [anchor=west] (t4) at ([yshift=-2.2em]t3.west) {\scriptsize{D $\to$ b}};
\node [anchor=center,selectnode,fill=red!20] (alig41) at (cell41.center) {\tiny{D}};
}
{
\node [anchor=center] (n5) at ([yshift=-2.2em]n4.center) {\scriptsize{5}};
\node [anchor=center] (k5) at ([yshift=-2.2em]k4.center) {\scriptsize{[{\blue 4},{\blue 5}]}};
\node [anchor=west] (t5) at ([yshift=-2.2em]t4.west) {\scriptsize{B $\to$ c , }};
\node [anchor=east] (t52) at ([xshift=-1em,yshift=-2em]t5.east) {\scriptsize{E $\to$ c}};
\node [anchor=center,selectnode,fill=red!20] (alig51) at (cell51.center) {\tiny{B,E}};
}
{
\node [anchor=center] (n6) at ([yshift=-4em]n5.center) {\scriptsize{6}};
\node [anchor=center] (k6) at ([yshift=-4em]k5.center) {\scriptsize{[{\blue 0},{\blue 2}]}};
\node [anchor=west] (t6) at ([xshift=0.2em,yshift=-4em]t5.west) {\scriptsize{none}};
\node [anchor=center,selectnode,fill=red!20] (alig22) at (cell22.center) {\tiny{}};
}
{
\node [anchor=center] (n7) at ([yshift=-2.2em]n6.center) {\scriptsize{7}};
\node [anchor=center] (k7) at ([yshift=-2.2em]k6.center) {\scriptsize{[{\blue 1},{\blue 3}]}};
\node [anchor=west] (t7) at ([yshift=-2.2em]t6.west) {\scriptsize{A $\to$ CD}};
\node [anchor=center,selectnode,fill=red!20] (alig32) at (cell32.center) {\tiny{A}};
}
\end{scope}
%图4
\begin{scope}[xshift=22.5em,yshift=-16.0em,scale=0.5]
\node [anchor=east] (s1) at (0,0) {a};
\node [anchor=north west] (s2) at ([yshift=-2.5em]s1.south west) {a};
\node [anchor=north west] (s3) at ([yshift=-2.4em]s2.south west) {b};
\node [anchor=north west] (s4) at ([yshift=-2.3em]s3.south west) {b};
\node [anchor=north west] (s5) at ([yshift=-2.2em]s4.south west) {c};
\node [alignmentnode,anchor=west] (cell11) at ([xshift=1.9em]s1.east) {};
\node [alignmentnode,anchor=west] (cell21) at ([xshift=1.9em]s2.east) {};
\node [alignmentnode,anchor=west] (cell22) at ([xshift=3.5em]cell21.east) {};
\node [alignmentnode,anchor=west] (cell31) at ([xshift=2em]s3.east) {};
\node [alignmentnode,anchor=west] (cell32) at ([xshift=3.5em]cell31.east) {};
\node [alignmentnode,anchor=west] (cell33) at ([xshift=3.5em]cell32.east) {};
\node [alignmentnode,anchor=west] (cell41) at ([xshift=1.9em]s4.east) {};
\node [alignmentnode,anchor=west] (cell42) at ([xshift=3.5em]cell41.east) {};
\node [alignmentnode,anchor=west] (cell43) at ([xshift=3.5em]cell42.east) {};
\node [alignmentnode,anchor=west] (cell44) at ([xshift=3.5em]cell43.east) {};
\node [alignmentnode,anchor=west] (cell51) at ([xshift=1.9em]s5.east) {};
\node [alignmentnode,anchor=west] (cell52) at ([xshift=3.5em]cell51.east) {};
\node [alignmentnode,anchor=west] (cell53) at ([xshift=3.5em]cell52.east) {};
\node [alignmentnode,anchor=west] (cell54) at ([xshift=3.5em]cell53.east) {};
\node [alignmentnode,anchor=west] (cell55) at ([xshift=3.5em]cell54.east) {};
\node [anchor=north] (l1) at ([yshift=-1em]cell51.south) {\tiny{$l$=1}};
\node [anchor=north] (l2) at ([yshift=-1em]cell52.south) {\tiny{$l$=2}};
\node [anchor=north] (l3) at ([yshift=-1em]cell53.south) {\tiny{$l$=3}};
\node [anchor=north] (l4) at ([yshift=-1em]cell54.south) {\tiny{$l$=4}};
\node [anchor=north] (l5) at ([yshift=-1em]cell55.south) {\tiny{$l$=5}};
\node [anchor=north] (caption4) at ([xshift=0.0em,yshift=0.0em]l5.south) {(d)};
\node [anchor=center] (y1) at ([xshift=-2.1em,yshift=2em]cell11.center) {\tiny{\blue 0}};
\node [anchor=center] (y2) at ([xshift=-2.1em,yshift=2em]cell21.center) {\tiny{\blue 1}};
\node [anchor=center] (y3) at ([xshift=-2.1em,yshift=2em]cell31.center) {\tiny{\blue 2}};
\node [anchor=center] (y4) at ([xshift=-2.1em,yshift=2em]cell41.center) {\tiny{\blue 3}};
\node [anchor=center] (y5) at ([xshift=-2.1em,yshift=2em]cell51.center) {\tiny{\blue 4}};
\node [anchor=center] (y6) at ([xshift=-2.1em,yshift=-2em]cell51.center) {\tiny{\blue 5}};
\node [anchor=west] (num) at ([xshift=22.3em,yshift=2em]s1.east) {\scriptsize{序号}};
\node [anchor=west] (kua) at ([xshift=0em]num.east) {\scriptsize{跨度}};
\node [anchor=west] (tui) at ([xshift=0.4em]kua.east) {\scriptsize{推导}};
\draw[-] ([yshift=-0.1em]num.south west)--([xshift=17em,yshift=-0.1em]num.south west);
{
\node [anchor=west] (n1) at ([xshift=1.4em,yshift=-1.5em]num.south west) {\scriptsize{1}};
\node [anchor=west] (k1) at ([yshift=-1.8em]kua.south west) {\scriptsize{[{\blue 0},{\blue 1}]}};
\node [anchor=west] (t1) at ([xshift=-0.8em,yshift=-1.5em]tui.south west) {\scriptsize{C $\to$ a}};
\node [anchor=center,selectnode,fill=red!20] (alig11) at (cell11.center) {\tiny{C}};
}
{
\node [anchor=center] (n2) at ([yshift=-2.2em]n1.center) {\scriptsize{2}};
\node [anchor=center] (k2) at ([yshift=-2.2em]k1.center) {\scriptsize{[{\blue 1},{\blue 2}]}};
\node [anchor=west] (t2) at ([yshift=-2.2em]t1.west) {\scriptsize{C $\to$ a}};
\node [anchor=center,selectnode,fill=red!20] (alig21) at (cell21.center) {\tiny{C}};
}
{
\node [anchor=center] (n3) at ([yshift=-2.2em]n2.center) {\scriptsize{3}};
\node [anchor=center] (k3) at ([yshift=-2.2em]k2.center) {\scriptsize{[{\blue 2},{\blue 3}]}};
\node [anchor=west] (t3) at ([yshift=-2.2em]t2.west) {\scriptsize{D $\to$ b}};
\node [anchor=center,selectnode,fill=red!20] (alig31) at (cell31.center) {\tiny{D}};
}
{
\node [anchor=center] (n4) at ([yshift=-2.2em]n3.center) {\scriptsize{4}};
\node [anchor=center] (k4) at ([yshift=-2.2em]k3.center) {\scriptsize{[{\blue 3},{\blue 4}]}};
\node [anchor=west] (t4) at ([yshift=-2.2em]t3.west) {\scriptsize{D $\to$ b}};
\node [anchor=center,selectnode,fill=red!20] (alig41) at (cell41.center) {\tiny{D}};
}
{
\node [anchor=center] (n5) at ([yshift=-2.2em]n4.center) {\scriptsize{5}};
\node [anchor=center] (k5) at ([yshift=-2.2em]k4.center) {\scriptsize{[{\blue 4},{\blue 5}]}};
\node [anchor=west] (t5) at ([yshift=-2.2em]t4.west) {\scriptsize{B $\to$ c , }};
\node [anchor=east] (t52) at ([xshift=-1em,yshift=-2em]t5.east) {\scriptsize{E $\to$ c}};
\node [anchor=center,selectnode,fill=red!20] (alig51) at (cell51.center) {\tiny{B,E}};
}
{
\node [anchor=center] (n6) at ([yshift=-4em]n5.center) {\scriptsize{6}};
\node [anchor=center] (k6) at ([yshift=-4em]k5.center) {\scriptsize{[{\blue 0},{\blue 2}]}};
\node [anchor=west] (t6) at ([xshift=0.2em,yshift=-4.2em]t5.west) {\scriptsize{none}};
\node [anchor=center,selectnode,fill=red!20] (alig22) at (cell22.center) {\tiny{}};
}
{
\node [anchor=center] (n7) at ([yshift=-2.2em]n6.center) {\scriptsize{7}};
\node [anchor=center] (k7) at ([yshift=-2.2em]k6.center) {\scriptsize{[{\blue 1},{\blue 3}]}};
\node [anchor=west] (t7) at ([yshift=-2.2em]t6.west) {\scriptsize{A $\to$ CD}};
\node [anchor=center,selectnode,fill=red!20] (alig32) at (cell32.center) {\tiny{A}};
}
{
\node [anchor=center] (sep1) at ([yshift=-1.7em]n7.center) {\scriptsize{...}};
\node [anchor=center] (n8) at ([yshift=-3.4em]n7.center) {\scriptsize{15}};
\node [anchor=center] (k8) at ([yshift=-3.4em]k7.center) {\scriptsize{[{\blue 0},{\blue 5}]}};
\node [anchor=west] (t8) at ([yshift=-3.4em]t7.west) {\tiny{S $\to$ AB}};
\node [anchor=center,selectnode,fill=red!20] (alig33) at (cell33.center) {\tiny{}};
\node [anchor=center,selectnode,fill=red!20] (alig42) at (cell42.center) {\tiny{}};
\node [anchor=center,selectnode,fill=red!20] (alig43) at (cell43.center) {\tiny{F}};
\node [anchor=center,selectnode,fill=red!20] (alig44) at (cell44.center) {\tiny{A}};
\node [anchor=center,selectnode,fill=red!20] (alig52) at (cell52.center) {\tiny{}};
\node [anchor=center,selectnode,fill=red!20] (alig53) at (cell53.center) {\tiny{}};
\node [anchor=center,selectnode,fill=red!20] (alig54) at (cell54.center) {\tiny{}};
\node [anchor=center,selectnode,fill=red!20] (alig55) at (cell55.center) {\tiny{S}};
}
\end{scope}
\end{tikzpicture}
......@@ -468,7 +468,7 @@ d = {(\bar{s}_{\bar{a}_1},\bar{t}_1)} \circ {(\bar{s}_{\bar{a}_2},\bar{t}_2)} \c
\end{figure}
%-------------------------------------------
\parinterval 除此之外,一些外部工具也可以用来获取词对齐,如Fastalign\cite{dyer2013a}、Berkeley Word Aligner\cite{taskar2005a}等。词对齐的质量通常使用词对齐错误率(AER)来评价\cite{OchA}。但是词对齐并不是一个独立的系统,它一般会服务于其他任务。因此,也可以使用下游任务来评价词对齐的好坏。比如,改进词对齐后观察机器翻译系统性能的变化。
\parinterval 除此之外,一些外部工具也可以用来获取词对齐,如Fastalign\cite{dyer2013a}、Berkeley Word Aligner\cite{taskar2005a}等。词对齐的质量通常使用词对齐错误率(AER)来评价\cite{DBLP:conf/coling/OchN00}。但是词对齐并不是一个独立的系统,它一般会服务于其他任务。因此,也可以使用下游任务来评价词对齐的好坏。比如,改进词对齐后观察机器翻译系统性能的变化。
%----------------------------------------------------------------------------------------
% NEW SUBSUB-SECTION
......@@ -537,13 +537,13 @@ d = {(\bar{s}_{\bar{a}_1},\bar{t}_1)} \circ {(\bar{s}_{\bar{a}_2},\bar{t}_2)} \c
\parinterval 基于距离的调序是最简单的一种调序模型。很多时候,语言的翻译基本上都是顺序的,也就是,译文单词出现的顺序和源语言单词的顺序基本上是一致的。反过来说,如果译文和源语言单词(或短语)的顺序差别很大,就认为出现了调序。
\parinterval 基于距离的调序方法的核心思想就是度量当前翻译结果与顺序翻译之间的差距。对于译文中的第$i$个短语,令$\textrm{start}_i$表示它所对应的源语言短语中第一个词所在的位置,$\textrm{end}_i$是这个短语中最后一个词所在的位置。于是,这个短语(相对于前一个短语)的调序距离为:
\parinterval 基于距离的调序方法的核心思想就是度量当前翻译结果与顺序翻译之间的差距。对于译文中的第$i$个短语,令$start_i$表示它所对应的源语言短语中第一个词所在的位置,$end_i$是这个短语中最后一个词所在的位置。于是,这个短语(相对于前一个短语)的调序距离为:
\begin{eqnarray}
dr = \textrm{start}_i-\textrm{end}_{i-1}-1
dr = start_i-end_{i-1}-1
\label{eq:4-15}
\end{eqnarray}
\parinterval 在图\ref{fig:4-20}的例子中,``the apple''所对应的调序距离为4,``在桌子上的''所对应的调序距离为-5。显然,如果两个源语短语按顺序翻译,则$\textrm{start}_i = \textrm{end}_{i-1} + 1$,这时调序距离为0。
\parinterval 在图\ref{fig:4-20}的例子中,``the apple''所对应的调序距离为4,``在桌子上的''所对应的调序距离为-5。显然,如果两个源语短语按顺序翻译,则$start_i = end_{i-1} + 1$,这时调序距离为0。
%----------------------------------------------
\begin{figure}[htp]
......@@ -737,7 +737,7 @@ dr = \textrm{start}_i-\textrm{end}_{i-1}-1
\vspace{0.5em}
\end{itemize}
\parinterval MERT最大的优点在于可以用于目标函数不可微、甚至不连续的情况。对于优化线性模型, MERT是一种很好的选择。但是,也有研究发现,简单使用MERT无法处理特征数量过多的情况。比如,用MERT优化10000个稀疏特征的权重时,优化效果可能会不理想,而且收敛速度慢。这时也可以考虑使用在线学习等技术对大量特征的权重进行调优,比较有代表性的方法包括MIRA\cite{crammer2003ultraconservative}和PRO\cite{Hopkins2011Tuning}。由于篇幅所限,这里不对这些方法做深入讨论,感兴趣的读者可以参考\ref{section-4.5}节的内容,对相关文献进行查阅。
\parinterval MERT最大的优点在于可以用于目标函数不可微、甚至不连续的情况。对于优化线性模型, MERT是一种很好的选择。但是,也有研究发现,简单使用MERT无法处理特征数量过多的情况。比如,用MERT优化10000个稀疏特征的权重时,优化效果可能会不理想,而且收敛速度慢。这时也可以考虑使用在线学习等技术对大量特征的权重进行调优,比较有代表性的方法包括MIRA\cite{DBLP:conf/emnlp/ChiangMR08}和PRO\cite{Hopkins2011Tuning}。由于篇幅所限,这里不对这些方法做深入讨论,感兴趣的读者可以参考\ref{section-4.5}节的内容,对相关文献进行查阅。
%----------------------------------------------------------------------------------------
% NEW SUB-SECTION
......@@ -797,7 +797,7 @@ dr = \textrm{start}_i-\textrm{end}_{i-1}-1
\subsubsection{翻译假设扩展}
\parinterval 下一步,需要使用这些翻译候选生成完整的译文。在机器翻译中,一个很重要的概念是{\small\bfnew{翻译假设}}\index{翻译假设}(Translation Hypothesis)\index{Translation Hypothesis}。它可以被当作是一个局部译文所对应的短语翻译推导。在解码开始时,只有一个空假设,也就是任何译文单词都没有被生成出来。接着,可以挑选翻译选项来扩展当前的翻译假设。图\ref{fig:4-28}展示了翻译假设扩展的过程。在翻译假设扩展时,需要保证新加入的翻译候选放置在旧翻译假设译文的右侧,也就是要确保翻译自左向右的连续性。而且,同一个翻译假设可以使用不同的翻译候选进行扩展。例如,扩展第一个翻译假设时,可以选择``桌子''的翻译候选``table'';也可以选择``有''的翻译候选``There is''。扩展完之后需要记录输入句子中已翻译的短语,同时计算当前所有翻译假设的模型得分。这个过程相当于生成了一个图的结构,每个节点代表了一个翻译假设。当翻译假设覆盖了输入句子所有的短语,不能被继续扩展时,就生成了一个完整的翻译假设(译文)。最后需要找到得分最高的完整翻译假设,它对应了搜索图中的最优路径。
\parinterval 下一步,需要使用这些翻译候选生成完整的译文。在机器翻译中,一个很重要的概念是{\small\bfnew{翻译假设}}\index{翻译假设}(Translation Hypothesis)\index{Translation Hypothesis} 它可以被当作是一个局部译文所对应的短语翻译推导。在解码开始时,只有一个空假设,也就是任何译文单词都没有被生成出来。接着,可以挑选翻译选项来扩展当前的翻译假设。图\ref{fig:4-28}展示了翻译假设扩展的过程。在翻译假设扩展时,需要保证新加入的翻译候选放置在旧翻译假设译文的右侧,也就是要确保翻译自左向右的连续性。而且,同一个翻译假设可以使用不同的翻译候选进行扩展。例如,扩展第一个翻译假设时,可以选择``桌子''的翻译候选``table'';也可以选择``有''的翻译候选``There is''。扩展完之后需要记录输入句子中已翻译的短语,同时计算当前所有翻译假设的模型得分。这个过程相当于生成了一个图的结构,每个节点代表了一个翻译假设。当翻译假设覆盖了输入句子所有的短语,不能被继续扩展时,就生成了一个完整的翻译假设(译文)。最后需要找到得分最高的完整翻译假设,它对应了搜索图中的最优路径。
%----------------------------------------------
\begin{figure}[htp]
......@@ -1220,7 +1220,7 @@ h_i (d,\textbf{t},\textbf{s})=\sum_{r \in d}h_i (r)
% NEW SUB-SECTION
%----------------------------------------------------------------------------------------
\subsection{CYK解码}\label{subsection-4.3.4}
\subsection{CKY解码}\label{subsection-4.3.4}
\parinterval 层次短语模型解码的目标是找到模型得分最高的推导,即:
\begin{eqnarray}
......@@ -1234,9 +1234,9 @@ h_i (d,\textbf{t},\textbf{s})=\sum_{r \in d}h_i (r)
\label{eq:4-29}
\end{eqnarray}
\parinterval 由于层次短语规则本质上就是CFG规则,因此公式\ref{eq:4-28}代表了一个典型的句法分析过程。需要做的是,用模型源语言端的CFG对输入句子进行分析,同时用模型目标语言端的CFG生成译文。基于CFG的句法分析是自然语言处理中的经典问题。一种广泛使用的方法是:首先把CFG转化为$\varepsilon$-free的{\small\bfnew{乔姆斯基范式}}\index{乔姆斯基范式}(Chomsky Normal Form)\index{Chomsky Normal Form}\footnote[5]{能够证明任意的CFG都可以被转换为乔姆斯基范式,即文法只包含形如A$\to$BC或A$\to$a的规则。这里,假设文法中不包含空串产生式A$\to\varepsilon$,其中$\varepsilon$表示空字符串。},之后采用CYK方法进行分析。
\parinterval 由于层次短语规则本质上就是CFG规则,因此公式\ref{eq:4-28}代表了一个典型的句法分析过程。需要做的是,用模型源语言端的CFG对输入句子进行分析,同时用模型目标语言端的CFG生成译文。基于CFG的句法分析是自然语言处理中的经典问题。一种广泛使用的方法是:首先把CFG转化为$\varepsilon$-free的{\small\bfnew{乔姆斯基范式}}\index{乔姆斯基范式}(Chomsky Normal Form)\index{Chomsky Normal Form}\footnote[5]{能够证明任意的CFG都可以被转换为乔姆斯基范式,即文法只包含形如A$\to$BC或A$\to$a的规则。这里,假设文法中不包含空串产生式A$\to\varepsilon$,其中$\varepsilon$表示空字符串。},之后采用CKY方法进行分析。
\parinterval CYK是形式语言中一种常用的句法分析方法\cite{cocke1969programming,younger1967recognition,kasami1966efficient}。它主要用于分析符合乔姆斯基范式的句子。由于乔姆斯基范式中每个规则最多包含两叉(或者说两个变量),因此CYK方法也可以被看作是基于二叉规则的一种分析方法。对于一个待分析的字符串,CYK方法从小的``范围''开始,不断扩大分析的``范围'',最终完成对整个字符串的分析。在CYK方法中,一个重要的概念是{\small\bfnew{跨度}}\index{跨度}(Span)\index{Span},所谓跨度表示了一个符号串的范围。这里可以把跨度简单的理解为从一个起始位置到一个结束位置中间的部分。
\parinterval CKY是形式语言中一种常用的句法分析方法\cite{cocke1969programming,younger1967recognition,kasami1966efficient}。它主要用于分析符合乔姆斯基范式的句子。由于乔姆斯基范式中每个规则最多包含两叉(或者说两个变量),因此CKY方法也可以被看作是基于二叉规则的一种分析方法。对于一个待分析的字符串,CKY方法从小的``范围''开始,不断扩大分析的``范围'',最终完成对整个字符串的分析。在CKY方法中,一个重要的概念是{\small\bfnew{跨度}}\index{跨度}(Span)\index{Span},所谓跨度表示了一个符号串的范围。这里可以把跨度简单的理解为从一个起始位置到一个结束位置中间的部分。
%----------------------------------------------
\begin{figure}[htp]
......@@ -1255,7 +1255,7 @@ span\textrm{[2,4]}&=&\textrm{``吃} \quad \textrm{鱼''} \nonumber \\
span\textrm{[0,4]}&=&\textrm{``猫} \quad \textrm{喜欢} \quad \textrm{} \quad \textrm{鱼''} \nonumber
\end{eqnarray}
\parinterval CYK方法是按跨度由小到大的次序执行的,这也对应了一种{\small\bfnew{自下而上的分析}}\index{自下而上的分析}(Top-down Parsing)\index{Top-down Parsing}过程。对于每个跨度,检查:
\parinterval CKY方法是按跨度由小到大的次序执行的,这也对应了一种{\small\bfnew{自下而上的分析}}\index{自下而上的分析}(Top-down Parsing)\index{Top-down Parsing}过程。对于每个跨度,检查:
\begin{itemize}
\vspace{0.5em}
......@@ -1265,30 +1265,30 @@ span\textrm{[0,4]}&=&\textrm{``猫} \quad \textrm{喜欢} \quad \textrm{吃} \qu
\vspace{0.5em}
\end{itemize}
\parinterval 对于第一种情况,简单匹配字符串即可;对于第二种情况,需要把当前的跨度进一步分割为两部分,并检查左半部分是否已经被归纳为B,右半部分是否已经被归纳为C。如果可以匹配,会在这个跨度上保存匹配结果。后面,可以访问这个结果(也就是A)来生成更大跨度上的分析结果。CYK算法的伪代码如图\ref{fig:4-36}所示。整个算法的执行顺序是按跨度的长度($l$)组织的。对于每个$span[j,j + l]$,会在位置$k$进行切割。之后,判断$span[j,k]$$span[k,j +l]$是否可以形成一个规则的右部。也就是判断$span[j,k]$是否生成了B,同时判断$span[k,j + l]$是否生成了C,如果文法中有规则A$\to$BC,则把这个规则放入$span[j,j+l]$。这个过程由Compose函数完成。如果$span[j,j + l]$可以匹配多条规则,所有生成的推导都会被记录在$span[j,j + l]$所对应的一个列表里\footnote[6]{通常,这个列表会用优先队列实现。这样可以对推导按模型得分进行排序,方便后续的剪枝操作。}
\parinterval 对于第一种情况,简单匹配字符串即可;对于第二种情况,需要把当前的跨度进一步分割为两部分,并检查左半部分是否已经被归纳为B,右半部分是否已经被归纳为C。如果可以匹配,会在这个跨度上保存匹配结果。后面,可以访问这个结果(也就是A)来生成更大跨度上的分析结果。CKY算法的伪代码如图\ref{fig:4-36}所示。整个算法的执行顺序是按跨度的长度($l$)组织的。对于每个$span[j,j + l]$,会在位置$k$进行切割。之后,判断$span[j,k]$$span[k,j +l]$是否可以形成一个规则的右部。也就是判断$span[j,k]$是否生成了B,同时判断$span[k,j + l]$是否生成了C,如果文法中有规则A$\to$BC,则把这个规则放入$span[j,j+l]$。这个过程由Compose函数完成。如果$span[j,j + l]$可以匹配多条规则,所有生成的推导都会被记录在$span[j,j + l]$所对应的一个列表里\footnote[6]{通常,这个列表会用优先队列实现。这样可以对推导按模型得分进行排序,方便后续的剪枝操作。}
%----------------------------------------------
\begin{figure}[htp]
\centering
\input{./Chapter4/Figures/CYK-algorithm}
\caption{CYK算法}
\input{./Chapter4/Figures/cky-algorithm}
\caption{CKY算法}
\label{fig:4-36}
\end{figure}
%-------------------------------------------
\parinterval\ref{fig:4-37}展示了CYK方法的一个运行实例(输入词串是aabbc)。算法在处理完最后一个跨度后会得到覆盖整个词串的分析结果,即句法树的根结点S。
\parinterval\ref{fig:4-37}展示了CKY方法的一个运行实例(输入词串是aabbc)。算法在处理完最后一个跨度后会得到覆盖整个词串的分析结果,即句法树的根结点S。
%----------------------------------------------
\begin{figure}[htp]
\centering
\input{./Chapter4/Figures/example-of-cyk-algorithm-execution-label}
\input{./Chapter4/Figures/example-of-cyk-algorithm-execution}
\caption{CYK算法执行实例}
\input{./Chapter4/Figures/example-of-cky-algorithm-execution-label}
\input{./Chapter4/Figures/example-of-cky-algorithm-execution}
\caption{CKY算法执行实例}
\label{fig:4-37}
\end{figure}
%----------------------------------------------
\parinterval 不过,CYK方法并不能直接用于层次短语模型。有两个问题:
\parinterval 不过,CKY方法并不能直接用于层次短语模型。有两个问题:
\begin{itemize}
\vspace{0.5em}
......@@ -1302,21 +1302,21 @@ span\textrm{[0,4]}&=&\textrm{``猫} \quad \textrm{喜欢} \quad \textrm{吃} \qu
\begin{itemize}
\vspace{0.5em}
\item 把层次短语文法转化为乔姆斯基范式,这样可以直接使用原始的CYK方法进行分析;
\item 把层次短语文法转化为乔姆斯基范式,这样可以直接使用原始的CKY方法进行分析;
\vspace{0.5em}
\item 对CYK方法进行改造。解码的核心任务要知道每个跨度是否能匹配规则的源语言部分。实际上,层次短语模型的文法是一种特殊的文法。这种文法规则的源语言部分最多包含两个变量,而且变量不能连续。这样的规则会对应一种特定类型的模版,比如,对于包含两个变量的规则,它的源语言部分形如$\alpha_0 \textrm{X}_1 \alpha_1 \textrm{X}_2 \alpha_2$。其中,$\alpha_0$$\alpha_1$$\alpha_2$表示终结符串,$\textrm{X}_1$$\textrm{X}_2$是变量。显然,如果$\alpha_0$$\alpha_1$$\alpha_2$确定下来那么$\textrm{X}_1$$\textrm{X}_2$的位置也就确定了下来。因此,对于每一个词串,都可以很容易的生成这种模版,进而完成匹配。而$\textrm{X}_1$$\textrm{X}_2$和原始CYK中匹配二叉规则本质上是一样的。由于这种方法并不需要对CYK方法进行过多的调整,因此层次短语系统中广泛使用这种改造的CYK方法进行解码。
\item 对CKY方法进行改造。解码的核心任务要知道每个跨度是否能匹配规则的源语言部分。实际上,层次短语模型的文法是一种特殊的文法。这种文法规则的源语言部分最多包含两个变量,而且变量不能连续。这样的规则会对应一种特定类型的模版,比如,对于包含两个变量的规则,它的源语言部分形如$\alpha_0 \textrm{X}_1 \alpha_1 \textrm{X}_2 \alpha_2$。其中,$\alpha_0$$\alpha_1$$\alpha_2$表示终结符串,$\textrm{X}_1$$\textrm{X}_2$是变量。显然,如果$\alpha_0$$\alpha_1$$\alpha_2$确定下来那么$\textrm{X}_1$$\textrm{X}_2$的位置也就确定了下来。因此,对于每一个词串,都可以很容易的生成这种模版,进而完成匹配。而$\textrm{X}_1$$\textrm{X}_2$和原始CKY中匹配二叉规则本质上是一样的。由于这种方法并不需要对CKY方法进行过多的调整,因此层次短语系统中广泛使用这种改造的CKY方法进行解码。
\vspace{0.5em}
\end{itemize}
\parinterval 对于语言模型在解码中的集成问题,一种简单的办法是:在CYK分析的过程中,用语言模型对每个局部的翻译结果进行评价,并计算局部翻译(推导)的模型得分。注意,局部的语言模型得分可能是不准确的,比如,局部翻译片段最左边单词的概率计算需要依赖前面的单词。但是由于每个跨度下生成的翻译是局部的,当前跨度下看不到前面的译文。这时会用1-gram语言模型的得分代替真实的高阶语言模型得分。等这个局部翻译片段和其他片段组合之后,可以知道前文的内容,这时才会得出最终的语言模型得分。另一种解决问题的思路是,先不加入语言模型,这样可以直接使用CYK方法进行分析。在得到最终的结果后,对最好的多个推导用含有语言模型的完整模型进行打分,选出最终的最优推导。不过,在实践中发现,由于语言模型在机器翻译中起到至关重要的作用,因此对最终结果进行重排序会带来一定的性能损失。不过这种方法的优势在于速度快,而且容易实现。
\parinterval 对于语言模型在解码中的集成问题,一种简单的办法是:在CKY分析的过程中,用语言模型对每个局部的翻译结果进行评价,并计算局部翻译(推导)的模型得分。注意,局部的语言模型得分可能是不准确的,比如,局部翻译片段最左边单词的概率计算需要依赖前面的单词。但是由于每个跨度下生成的翻译是局部的,当前跨度下看不到前面的译文。这时会用1-gram语言模型的得分代替真实的高阶语言模型得分。等这个局部翻译片段和其他片段组合之后,可以知道前文的内容,这时才会得出最终的语言模型得分。另一种解决问题的思路是,先不加入语言模型,这样可以直接使用CKY方法进行分析。在得到最终的结果后,对最好的多个推导用含有语言模型的完整模型进行打分,选出最终的最优推导。不过,在实践中发现,由于语言模型在机器翻译中起到至关重要的作用,因此对最终结果进行重排序会带来一定的性能损失。不过这种方法的优势在于速度快,而且容易实现。
\parinterval 另外,在实践时,还需要考虑两方面问题:
\begin{itemize}
\vspace{0.5em}
\item 剪枝:在CYK中,每个跨度都可以生成非常多的推导(局部翻译假设)。理论上,这些推导的数量会和跨度大小成指数关系。显然不可能保存如此大量的翻译推导。对于这个问题,常用的办法是只保留top-$k$个推导。也就是每个局部结果只保留最好的$k$个。这种方法也被称作{\small\bfnew{束剪枝}}\index{束剪枝}(Beam Pruning)\index{Beam Pruning}。在极端情况下,当$k$=1时,这个方法就变成了贪婪的方法;
\item 剪枝:在CKY中,每个跨度都可以生成非常多的推导(局部翻译假设)。理论上,这些推导的数量会和跨度大小成指数关系。显然不可能保存如此大量的翻译推导。对于这个问题,常用的办法是只保留top-$k$个推导。也就是每个局部结果只保留最好的$k$个。这种方法也被称作{\small\bfnew{束剪枝}}\index{束剪枝}(Beam Pruning)\index{Beam Pruning}。在极端情况下,当$k$=1时,这个方法就变成了贪婪的方法;
\vspace{0.5em}
\item $n$-best结果的生成:$n$-best推导(译文)的生成是统计机器翻译必要的功能。比如,最小错误率训练中就需要最好的$n$个结果用于特征权重调优。在基于CYK的方法中,整个句子的翻译结果会被保存在最大跨度所对应的结构中。因此一种简单的$n$-best生成方法是从这个结构中取出排名最靠前的$n$个结果。另外,也可以考虑自上而下遍历CYK生成的推导空间,得到更好的$n$-best结果\cite{huang2005better}
\item $n$-best结果的生成:$n$-best推导(译文)的生成是统计机器翻译必要的功能。比如,最小错误率训练中就需要最好的$n$个结果用于特征权重调优。在基于CKY的方法中,整个句子的翻译结果会被保存在最大跨度所对应的结构中。因此一种简单的$n$-best生成方法是从这个结构中取出排名最靠前的$n$个结果。另外,也可以考虑自上而下遍历CKY生成的推导空间,得到更好的$n$-best结果\cite{huang2005better}
\end{itemize}
%----------------------------------------------------------------------------------------
......@@ -1926,7 +1926,7 @@ r_9: \quad \textrm{IP(}\textrm{NN}_1\ \textrm{VP}_2) \rightarrow \textrm{S(}\tex
\subsubsection{句法树二叉化}
\parinterval 句法树是使用人类语言学知识归纳出来的一种解释句子结构的工具。比如, CTB、PTB等语料就是常用的训练句法分析器的数据\cite{xue2005building,DBLP:journals/coling/MarcusSM94}。但是,这些数据的标注中会含有大量的扁平结构,如图\ref{fig:4-58}所示,多个分句可能会导致一个根节点下有很多个分支。
\parinterval 句法树是使用人类语言学知识归纳出来的一种解释句子结构的工具。比如, CTB\cite{xue2005building}、PTB\cite{DBLP:journals/coling/MarcusSM94}等语料就是常用的训练句法分析器的数据。但是,这些数据的标注中会含有大量的扁平结构,如图\ref{fig:4-58}所示,多个分句可能会导致一个根节点下有很多个分支。
%----------------------------------------------
\begin{figure}[htp]
......@@ -1957,7 +1957,7 @@ r_9: \quad \textrm{IP(}\textrm{NN}_1\ \textrm{VP}_2) \rightarrow \textrm{S(}\tex
&& \textrm{NP-BAR(}\textrm{NN}_1\ \textrm{NP-}\textrm{BAR}_2) \rightarrow \textrm{NN}_1\ \textrm{NP-}\textrm{BAR}_2 \nonumber
\end{eqnarray}
\parinterval 由于树二叉化可以帮助规则抽取得到更细颗粒度的规则,提高规则抽取的召回率,因此成为了基于句法的机器翻译中的常用方法。二叉化方法也有很多不同的实现策略,比如:左二叉化、右二叉化、基于中心词的二叉化等\cite{Tong2009Better,DBLP:conf/naacl/ZhangHGK06}。具体实现时可以根据实际情况进行选择。
\parinterval 由于树二叉化可以帮助规则抽取得到更细颗粒度的规则,提高规则抽取的召回率,因此成为了基于句法的机器翻译中的常用方法。二叉化方法也有很多不同的实现策略,比如:左二叉化\cite{DBLP:conf/naacl/ZhangHGK06}、右二叉化\cite{Tong2009Better}、基于中心词的二叉化\cite{DBLP:conf/acl/KleinM03,charniak2006multilevel}。具体实现时可以根据实际情况进行选择。
%----------------------------------------------
\begin{figure}[htp]
......@@ -2015,7 +2015,7 @@ r_9: \quad \textrm{IP(}\textrm{NN}_1\ \textrm{VP}_2) \rightarrow \textrm{S(}\tex
\end{figure}
%-------------------------------------------
\parinterval 可以看到,节点对齐可以避免词对齐错误造成的影响。不过,节点对齐需要开发额外的工具。有很多方法可以参考,比如可以基于启发性规则、基于分类模型、基于无指导的方法等\cite{xiao2013unsupervised,tinsley2007robust}
\parinterval 可以看到,节点对齐可以避免词对齐错误造成的影响。不过,节点对齐需要开发额外的工具。有很多方法可以参考,比如可以基于启发性规则\cite{DBLP:conf/coling/GrovesHW04}、基于分类模型\cite{DBLP:conf/coling/SunZT10}、基于无指导的方法\cite{xiao2013unsupervised}
%----------------------------------------------------------------------------------------
% NEW SUBSUB-SECTION
......@@ -2211,7 +2211,7 @@ d_1 = {d'} \circ {r_5}
\rule{0pt}{15pt}解码方法 & $\hat{d} = \arg\max_{d \in D_{\textrm{tree}}} \textrm{score} (d)$ & $\hat{d} = \arg\max_{d \in D} \textrm{score} (d)$ \\
\rule{0pt}{15pt}搜索空间 & 与输入的源语句法树兼容的推导$D_{\textrm{tree}}$ & 所有的推导$D$ \\
\rule{0pt}{15pt}适用模型 & 树到串、树到树 & 所有的句法模型 \\
\rule{0pt}{15pt}解码算法 & Chart解码 & CYK + 规则二叉化 \\
\rule{0pt}{15pt}解码算法 & Chart解码 & CKY + 规则二叉化 \\
\rule{0pt}{15pt}速度 && 一般较慢
\end{tabular}
}
......@@ -2293,7 +2293,7 @@ d_1 = {d'} \circ {r_5}
\vspace{0.5em}
\item 对文法进行限制。比如,可以限制规则中变量的数量;或者不允许连续的变量,这样的规则也被称作满足{\small\bfnew{Lexicalized Norm Form}}\index{Lexicalized Norm Form}(LNF)的规则。比如,层次短语规则就是LNF规则。由于LNF 中单词(终结符)可以作为锚点,因此规则匹配时所有变量的匹配范围是固定的;
\vspace{0.5em}
\item 对规则进行二叉化,使用CYK方法进行分析。这个方法也是句法分析中常用的策略。所谓规则二叉化是把规则转化为最多只含两个变量或连续词串的规则(串到树规则)。比如,对于如下的规则:
\item 对规则进行二叉化,使用CKY方法进行分析。这个方法也是句法分析中常用的策略。所谓规则二叉化是把规则转化为最多只含两个变量或连续词串的规则(串到树规则)。比如,对于如下的规则:
\begin{eqnarray}
\textrm{喜欢}\ \textrm{VP}_1\ \textrm{NP}_2 \rightarrow \textrm{VP(VBZ(likes)}\ \textrm{VP}_1\ \textrm{NP}_2 ) \nonumber
\end{eqnarray}
......@@ -2303,7 +2303,7 @@ d_1 = {d'} \circ {r_5}
\textrm{喜欢}\ \textrm{V103} &\rightarrow& \textrm{VP}(\textrm{VBZ}(\textrm{likes})\ \textrm{V103} ) \nonumber \\
\textrm{VP}_1\ \textrm{NP}_2 &\rightarrow& \textrm{V103(}\ \textrm{VP}_1\ \textrm{NP}_2 ) \nonumber
\end{eqnarray}
\noindent 可以看到,这两条新的规则源语言端只有两个部分,代表两个分叉。V103是一个新的标签,它没有任何句法含义。不过,为了保证二叉化后规则目标语部分的连续性,需要考虑源语言和目标语二叉化的同步性\cite{zhang2006synchronous,Tong2009Better}。这样的规则与CYK方法一起使用完成解码,具体内容可以参考\ref{subsection-4.3.4}节的内容。
\noindent 可以看到,这两条新的规则源语言端只有两个部分,代表两个分叉。V103是一个新的标签,它没有任何句法含义。不过,为了保证二叉化后规则目标语部分的连续性,需要考虑源语言和目标语二叉化的同步性\cite{zhang2006synchronous,Tong2009Better}。这样的规则与CKY方法一起使用完成解码,具体内容可以参考\ref{subsection-4.3.4}节的内容。
\vspace{0.5em}
\end{itemize}
......
......@@ -1295,18 +1295,23 @@
//bibsource = {dblp computer science bibliography, https://dblp.org}
}
@article{crammer2003ultraconservative,
author = {Koby Crammer and
Yoram Singer},
title = {Ultraconservative Online Algorithms for Multiclass Problems},
journal = {Journal of Machine Learning Research},
volume = {3},
pages = {951--991},
year = {2003},
//url = {http://jmlr.org/papers/v3/crammer03a.html},
//timestamp = {Wed, 10 Jul 2019 15:28:37 +0200},
//biburl = {https://dblp.org/rec/journals/jmlr/CrammerS03.bib},
//bibsource = {dblp computer science bibliography, https://dblp.org}
@inproceedings{DBLP:conf/emnlp/ChiangMR08,
author = {David Chiang and
Yuval Marton and
Philip Resnik},
title = {Online Large-Margin Training of Syntactic and Structural Translation
Features},
booktitle = {2008 Conference on Empirical Methods in Natural Language Processing,
{EMNLP} 2008, Proceedings of the Conference, 25-27 October 2008, Honolulu,
Hawaii, USA, {A} meeting of SIGDAT, a Special Interest Group of the
{ACL}},
pages = {224--233},
publisher = {{ACL}},
year = {2008},
url = {https://www.aclweb.org/anthology/D08-1024/},
timestamp = {Fri, 13 Sep 2019 13:08:45 +0200},
biburl = {https://dblp.org/rec/conf/emnlp/ChiangMR08.bib},
bibsource = {dblp computer science bibliography, https://dblp.org}
}
@inproceedings{dreyer2015apro,
......@@ -1804,21 +1809,23 @@ year ={2008},
//biburl = {https://dblp.org/rec/conf/naacl/OchGKSYFKSSEJJR04.bib},
//bibsource = {dblp computer science bibliography, https://dblp.org}
}
@article{OchA,
@inproceedings{DBLP:conf/coling/OchN00,
author = {Franz Josef Och and
Hermann Ney},
title = {A Systematic Comparison of Various Statistical Alignment Models},
journal = {Computational Linguistics},
volume = {29},
number = {1},
pages = {19--51},
year = {2003},
//url = {https://doi.org/10.1162/089120103321337421},
//doi = {10.1162/089120103321337421},
//timestamp = {Tue, 21 May 2019 18:03:32 +0200},
//biburl = {https://dblp.org/rec/journals/coling/OchN03.bib},
//bibsource = {dblp computer science bibliography, https://dblp.org}
title = {A Comparison of Alignment Models for Statistical Machine Translation},
booktitle = {{COLING} 2000, 18th International Conference on Computational Linguistics,
Proceedings of the Conference, 2 Volumes, July 31 - August 4, 2000,
Universit{\"{a}}t des Saarlandes, Saarbr{\"{u}}cken, Germany},
pages = {1086--1090},
publisher = {Morgan Kaufmann},
year = {2000},
url = {https://www.aclweb.org/anthology/C00-2163/},
timestamp = {Mon, 16 Sep 2019 17:08:53 +0200},
biburl = {https://dblp.org/rec/conf/coling/OchN00.bib},
bibsource = {dblp computer science bibliography, https://dblp.org}
}
@article{powell1964an,
author = {M. J. D. Powell},
title = {An efficient method for finding the minimum of a function of several
......@@ -1948,14 +1955,40 @@ year ={2008},
//biburl = {https://dblp.org/rec/conf/naacl/TaskarLK05.bib},
//bibsource = {dblp computer science bibliography, https://dblp.org}
}
@article{tinsley2007robust,
author ={Tinsley, John and Zhechev, Ventsislav and Hearne, Mary and Way, Andy},
title ={Robust language pair-independent sub-tree alignment},
journal ={Machine Translation Summit XI},
pages ={467–474},
year ={2007},
//url ={http://doras.dcu.ie/15230/},
@inproceedings{DBLP:conf/coling/GrovesHW04,
author = {Declan Groves and
Mary Hearne and
Andy Way},
title = {Robust Sub-Sentential Alignment of Phrase-Structure Trees},
booktitle = {{COLING} 2004, 20th International Conference on Computational Linguistics,
Proceedings of the Conference, 23-27 August 2004, Geneva, Switzerland},
year = {2004},
url = {https://www.aclweb.org/anthology/C04-1154/},
timestamp = {Mon, 16 Sep 2019 17:08:53 +0200},
biburl = {https://dblp.org/rec/conf/coling/GrovesHW04.bib},
bibsource = {dblp computer science bibliography, https://dblp.org}
}
@inproceedings{DBLP:conf/coling/SunZT10,
author = {Jun Sun and
Min Zhang and
Chew Lim Tan},
editor = {Chu{-}Ren Huang and
Dan Jurafsky},
title = {Discriminative Induction of Sub-Tree Alignment using Limited Labeled
Data},
booktitle = {{COLING} 2010, 23rd International Conference on Computational Linguistics,
Proceedings of the Conference, 23-27 August 2010, Beijing, China},
pages = {1047--1055},
publisher = {Tsinghua University Press},
year = {2010},
url = {https://www.aclweb.org/anthology/C10-1118/},
timestamp = {Mon, 16 Sep 2019 17:08:53 +0200},
biburl = {https://dblp.org/rec/conf/coling/SunZT10.bib},
bibsource = {dblp computer science bibliography, https://dblp.org}
}
@inproceedings{Tong2009Better,
author = {Tong Xiao and
Mu Li and
......@@ -1974,6 +2007,33 @@ year ={2008},
//biburl = {https://dblp.org/rec/conf/emnlp/XiaoLZZZ09.bib},
//bibsource = {dblp computer science bibliography, https://dblp.org}
}
@inproceedings{DBLP:conf/acl/KleinM03,
author = {Dan Klein and
Christopher D. Manning},
editor = {Erhard W. Hinrichs and
Dan Roth},
title = {Accurate Unlexicalized Parsing},
booktitle = {Proceedings of the 41st Annual Meeting of the Association for Computational
Linguistics, 7-12 July 2003, Sapporo Convention Center, Sapporo, Japan},
pages = {423--430},
publisher = {{ACL}},
year = {2003},
url = {https://www.aclweb.org/anthology/P03-1054/},
timestamp = {Mon, 19 Aug 2019 18:09:53 +0200},
biburl = {https://dblp.org/rec/conf/acl/KleinM03.bib},
bibsource = {dblp computer science bibliography, https://dblp.org}
}
@inproceedings{charniak2006multilevel,
title="Multilevel Coarse-to-Fine PCFG Parsing",
author="Eugene {Charniak} and Mark {Johnson} and Micha {Elsner} and Joseph {Austerweil} and David {Ellis} and Isaac {Haxton} and Catherine {Hill} and R. {Shrivaths} and Jeremy {Moore} and Michael {Pozar} and Theresa {Vu}",
booktitle="Proceedings of the Human Language Technology Conference of the NAACL, Main Conference",
pages="168--175",
notes="Sourced from Microsoft Academic - https://academic.microsoft.com/paper/2101714644",
year="2006"
}
@inproceedings{Tong2016Syntactic,
author = {Tong Xiao and
Jingbo Zhu and
......
Markdown 格式
0%
您添加了 0 到此讨论。请谨慎行事。
请先完成此评论的编辑!
注册 或者 后发表评论