Commit d25e5421 by xiaotong

new pages of rule extraction

parent 93e5393c
......@@ -146,42 +146,23 @@
\subsection{翻译规则抽取}
%%%------------------------------------------------------------------------------------------------------------
%%% 基于树结构的翻译文法 - 树到串/串到树
\begin{frame}{树到串规则抽取 - GHKM方法}
%%% 规则抽取
\begin{frame}{规则抽取}
\begin{itemize}
\item 基于句法的翻译系统的核心有两个部分
\begin{enumerate}
\item \textbf{文法归纳}:从带有句法分析结果的双语数据中自动学习翻译规则
\item \textbf{解码}:使用学习到的翻译规则对新的句子进行翻译
\end{enumerate}
\item<2-> 首先要解决的是如何获取翻译规则,即\alert{规则抽取} - 这里先从GHKM方法开始,它是经典的树到串翻译规则的抽取方法(Galley et al., 2004; 2006)
\item 可信节点本质上定义了规则的边界,规则需要满足
\begin{itemize}
\item 方法的名字是由四位作者的名字首字母构成 :)
\end{itemize}
\item<3-> GHKM方法的输入包括
\begin{itemize}
\item 源语言句子和和它的短语分析树
\item 目标语句子
\item 源语和目标语句子之间的词对齐
\end{itemize}
\item<3-> 注意:
\begin{itemize}
\item 句法树可以由句法分析器自动生成
\item 词对齐可以由词对齐系统(如IBM模型)自动生成
\item 左部树片段的根节点是可信节点
\item 左部树片段的叶子节点是终结符或者可信节点
\end{itemize}
\end{itemize}
\end{frame}
%%%------------------------------------------------------------------------------------------------------------
%%% 翻译规则抽取本质上是要完成对树结构的切割
\begin{frame}{树片段}
\begin{itemize}
\item 树到串翻译规则实际上是一个树片段到一个词串的映射。一个合理的树到串翻译规则,不应该违反任何的词对齐信息
\begin{itemize}
\item 显然这种树片段可以有很多
\item 一棵句法树也可以被切割成多个树片段
\end{itemize}
\end{itemize}
\begin{center}
{\footnotesize
\visible<2->{
例如:$\textrm{VP(PP(P() NP(NN(回答))) VP}_1) \to \textrm{VP}_1\ \textrm{with the answer}$
}
}
\end{center}
\begin{center}
\begin{tikzpicture}
......@@ -205,12 +186,12 @@
]
]
\node[anchor=north,minimum size=18pt] (tw1) at ([yshift=-6.0em]cw1.south){he};
\node[anchor=west,minimum size=18pt] (tw2) at ([yshift=-0.1em,xshift=1.1em]tw1.east){was};
\node[anchor=west,minimum size=18pt] (tw3) at ([yshift=0.1em,xshift=1.1em]tw2.east){satisfied};
\node[anchor=west,minimum size=18pt] (tw4) at ([xshift=1.1em]tw3.east){with};
\node[anchor=west,minimum size=18pt] (tw5) at ([xshift=1.1em]tw4.east){the};
\node[anchor=west,minimum size=18pt] (tw6) at ([yshift=-0.1em,xshift=1.1em]tw5.east){answer};
\node[anchor=north,minimum size=18pt,align=center] (tw1) at ([yshift=-6.0em]cw1.south){he\\\scriptsize{1}};
\node[anchor=west,minimum size=18pt,align=center] (tw2) at ([yshift=-0.1em,xshift=1.1em]tw1.east){was\\\scriptsize{2}};
\node[anchor=west,minimum size=18pt,align=center] (tw3) at ([yshift=0.1em,xshift=1.1em]tw2.east){satisfied\\\scriptsize{3}};
\node[anchor=west,minimum size=18pt,align=center] (tw4) at ([xshift=1.1em]tw3.east){with\\\scriptsize{4}};
\node[anchor=west,minimum size=18pt,align=center] (tw5) at ([xshift=1.1em]tw4.east){the\\\scriptsize{5}};
\node[anchor=west,minimum size=18pt,align=center] (tw6) at ([yshift=-0.1em,xshift=1.1em]tw5.east){answer\\\scriptsize{6}};
\draw[dashed] (cw1.south) -- ([yshift=-0.4em]tw1.north);
\draw[dashed] (cw2.south) .. controls +(south:1.6) and +(north:0.6) .. ([yshift=-0.4em]tw4.north);
......@@ -219,23 +200,37 @@
\draw[dashed] (cw4.south) .. controls +(south:2.0) and +(north:0.6) .. ([yshift=-0.4em]tw3.north);
\draw[dashed] (cw5.south) .. controls +(south:2.0) and +(north:0.6) .. ([yshift=-0.4em]tw3.north);
\begin{pgfonlayer}{background}
\visible<2->{
\node [rectangle,inner sep=0em,fill=red!20] [fit = (cw2) (cw3) (n5)] (rule1s) {};
\node [rectangle,inner sep=0em,fill=red!20] [fit = (tw4) (tw5) (tw6)] (rule1t) {};
}
\visible<3->{
\node [rectangle,inner sep=0em,fill=blue!20] [fit = (cw5) (n11)] (rule2s) {};
\node [rectangle,inner sep=0em,fill=blue!20] [fit = (tw3)] (rule2t) {};
\node [fill=blue!20] [fit = (n5) (cw2) (cw3) (n8)] (rule1box1) {};
\node [fill=blue!20] [fit = (n4) (n5) (n9)] (rule1box2) {};
\node [fill=blue!20] [fit = (tw4) (tw5) (tw6)] (rule1box3) {};
}
\node [rectangle,fill=orange!20,inner sep=0] [fit = (n11)] (n11box) {};
\node [rectangle,fill=green!20,inner sep=0] [fit = (n4)] (n4box) {};
\node [rectangle,fill=green!20,inner sep=0] [fit = (n1)] (n1box) {};
\node [rectangle,fill=green!20,inner sep=0] [fit = (n2)] (n2box) {};
\node [rectangle,fill=green!20,inner sep=0] [fit = (n3)] (n3box) {};
\node [rectangle,fill=green!20,inner sep=0] [fit = (n5)] (n5box) {};
\node [rectangle,fill=green!20,inner sep=0] [fit = (n6)] (n6box) {};
\node [rectangle,fill=green!20,inner sep=0] [fit = (n7)] (n7box) {};
\node [rectangle,fill=green!20,inner sep=0] [fit = (n8)] (n8box) {};
\node [rectangle,fill=green!20,inner sep=0] [fit = (n9)] (n9box) {};
\node [rectangle,fill=orange!20,inner sep=0] [fit = (n10)] (n10box) {};
\end{pgfonlayer}
\visible<2->{
\node [anchor=south] (rule1label) at ([xshift=1em]rule1s.north west) {\scriptsize{\textbf{\alert{正确的规则}}}};
}
\node [anchor=north west, minimum size=1.2em, fill=green!20] (land1) at ([xshift=1.5em,yshift=-1em]cw5.south east) {};
\node [anchor=west] (land1label) at (land1.east) {\scriptsize{可信}};
\node [anchor=north west, minimum size=1.2em, fill=orange!20] (land2) at ([yshift=-0.3em]land1.south west) {};
\node [anchor=west] (land2label) at (land2.east) {\scriptsize{不可信}};
\visible<3->{
\node [anchor=north west,align=left] (rule2label) at (rule2s.north east) {\scriptsize{\textbf{\color{blue} 错误的规则}}\\\scriptsize{因为``satisfied''会}\\\scriptsize{对齐到规则外,}\\\scriptsize{也就是这条规则}\\\scriptsize{与词对齐不相容}};
\node [draw,thick,red,fill=red!20] [fit = (n9)] (var1) {{\color{black} VP}};
\node [draw,thick,red,fill=red!20] [fit = (tw3)] (var1v2) {{\color{black} \large{VP}}};
\node [anchor=west] (var1label) at ([yshift=0.5em]var1.east) {\tiny{\alert{变量}}};
\node [anchor=south] (var1v2label) at ([xshift=-2em]var1v2.north) {\tiny{\alert{变量}}};
}
\end{scope}
......
Markdown 格式
0%
您添加了 0 到此讨论。请谨慎行事。
请先完成此评论的编辑!
注册 或者 后发表评论