Commit c5d54017 by xiaotong

updates

parent 8e704c9d
......@@ -146,130 +146,99 @@
\subsection{翻译规则抽取}
%%%------------------------------------------------------------------------------------------------------------
%%% SPMT规则
\begin{frame}{更多的规则 - SPMT规则}
%%% tree binarization
\begin{frame}{更多的规则 - 句法树二叉化}
\begin{itemize}
\item 任意大小的规则都可以通过组合的方式获取,但是组合过多数量的规则会大大增加规则集大小。SPMT一种高效地获得大颗粒度规则的方法 (Marcu et al., 2006)
\item 句法分析器生成的句法树可能会非常平坦,这会导致抽取的规则很``大''而且规则无法继续被分解
\begin{itemize}
\item 先抽取短语,之后找到覆盖这个短语的可信节点
\item 以这个可信节点做根,生成包含该短语的规则
\item 比如,在CTB中经常会看到很宽的子树结构
\end{itemize}
\end{itemize}
\vspace{-0.3em}
\begin{minipage}[b]{0.47\textwidth}
{\footnotesize
\visible<3->{
对于任意一个与词对齐兼容的短语,可以找到包含它的``最小''翻译规则,即SPMT规则,比如\\
\vspace{-0.4em}
对 形式 $\to$ about the situation \\
\vspace{-0.4em}
可以很容易得到它的SPMT规则 \\
\vspace{-0.4em}
VP(P(对) NP(NN(局势)) VP$_1$) $\to$ VP$_1$ about the situation \\
}
\visible<4->{
\vspace{-0.4em}
但是,如果用组合的方式,需要三条最小规则才能得到这条规则\\
}
\vfill
}
\end{minipage}
\hfill
\begin{minipage}[t]{0.47\textwidth}
\begin{center}
\begin{tikzpicture}
{\scriptsize
\begin{scope}
\begin{scope}[scale = 0.9, sibling distance=20pt, level distance=30pt]
\visible<2->{
\begin{scope}[level distance=20pt]
\node[scale=0.8, inner sep=0.1cm,align=center,draw] (cfrag1) at
(0,0.25) {\Tree[.\node(sn1){NP}; [.\node(sn2){PN}; 他 ]]};
\end{scope}
\begin{scope}[level distance=20pt]
\node[scale=0.8, inner sep=0.1cm,align=center,anchor=south west,draw] (cfrag2) at
([xshift=1.2em]cfrag1.south east) {\Tree[.\node(sn3){P}; 对 ]};
{\footnotesize
\Tree[.IP
[.NP ]
[.VP ]
[., ]
[.VP ]
[., ]
[.VP ]
[., ]
[.VP ]
[.{.{\color{white} V}} ]
]
}
\end{scope}
}
\begin{scope}[level distance=20pt]
\node[scale=0.8, inner sep=0.1cm,align=center,anchor=south west,draw] (cfrag3) at
([xshift=1.2em]cfrag2.south east) {\Tree[.\node(sn4){NP}; [.NN 形式 ]]};
\end{scope}
\end{tikzpicture}
\end{center}
\begin{scope}[sibling distance=15pt,level distance=20pt]
\node[scale=0.8, inner sep=0.1cm,align=center,anchor=south west,draw] (cfrag4) at
([xshift=1.4em]cfrag3.south east) {\Tree[.\node(sn5){VP}; [.\node(sn6){VV}; 表示 ] [.\node(sn7){NN}; 担心 ]]};
\end{scope}
\begin{itemize}
\item<2-> 一个具体的例子
\end{itemize}
\begin{scope}[sibling distance=32pt,level distance=20pt]
\node[scale=0.8, inner sep=0.1cm,align=center,anchor=south west,draw] (cfrag6) at
([xshift=0.3em,yshift=4em]cfrag2.north west) {\Tree[.\node(sn11){VP}; [.\node(sn9){P}; ] [.\node(sn10){NP}; ] [.\node(sn13){VP}; ]]};
\end{scope}
\vspace{-1.0em}
\begin{center}
\begin{tikzpicture}
\begin{scope}[sibling distance=65pt,level distance=18pt]
\node[scale=0.8, inner sep=0.1cm,align=center,anchor=south east,draw] (cfrag7) at
([xshift=-4.5em,yshift=0.5em]cfrag6.north east) {\Tree[.\node(sn14){IP}; [.\node(sn15){NP}; ] [.\node(sn16){VP}; ]]};
\end{scope}
\visible<2->{
{\scriptsize
\begin{scope}[sibling distance=4pt, level distance=25pt]
\node[scale=0.9,anchor=north,minimum size=18pt] (tw11) at ([xshift=-0.3em,yshift=-1.2em]cfrag1.south){he};
\node[scale=0.9,anchor=west,minimum size=18pt] (tw12) at ([yshift=-0.1em,xshift=0.5em]tw11.east){was};
\node[scale=0.9,anchor=west,minimum size=18pt] (tw13) at ([yshift=0.1em,xshift=0.5em]tw12.east){worried};
\node[scale=0.9,anchor=west,minimum size=18pt] (tw14) at ([xshift=0.5em]tw13.east){about};
\node[scale=0.9,anchor=west,minimum size=18pt] (tw15) at ([xshift=0.5em]tw14.east){the};
\node[scale=0.9,anchor=west,minimum size=18pt] (tw16) at ([yshift=-0.1em,xshift=0.5em]tw15.east){situation};
\draw[dashed] ([xshift=-0.3em]cfrag1.south) -- ([yshift=-0.3em]tw11.north);
\draw[dashed] (cfrag2.south) -- ([yshift=-0.4em]tw14.north);
\draw[dashed] (cfrag3.south) -- ([yshift=-0.4em]tw15.north);
\draw[dashed] (cfrag3.south) -- ([yshift=-0.4em]tw16.north);
\draw[dashed] (cfrag4.south) .. controls +(south:0.6) and +(north:0.6) .. ([yshift=-0.4em]tw13.north);
\visible<3->{
\draw[dashed,red] (cfrag2.south) -- ([yshift=-0.4em]tw14.north);
\draw[dashed,red] (cfrag3.south) -- ([yshift=-0.4em]tw15.north);
\draw[dashed,red] (cfrag3.south) -- ([yshift=-0.4em]tw16.north);
{\footnotesize
\Tree[.\node(n1){NP};
[.NNP \node(sw1){美国}; ]
[.NN \node(sw2){总统}; ]
[.NN \node(sw3){唐纳德}; ]
[.NN \node(sw4){特朗普}; ]
]
}
\draw[*-*] ([xshift=0.0em,yshift=-0.2em]cfrag1.north) -- ([xshift=0.0em,yshift=6.5em]cfrag1.north);
\draw[*-*] ([xshift=-0.1em,yshift=-0.2em]cfrag2.north) -- ([xshift=-0.1em,yshift=4.4em]cfrag2.north);
\draw[*-*] ([xshift=0.1em,yshift=-0.4em]cfrag3.north) .. controls +(north:2.4em) and +(south:2.4em) .. ([xshift=1.1em,yshift=2.5em]cfrag3.north);
\draw[*-*] ([xshift=0.0em,yshift=-0.2em]cfrag4.north) -- ([xshift=0.0em,yshift=2.5em]cfrag4.north);
\draw[*-*] ([xshift=0.0em,yshift=-0.2em]cfrag6.north) -- ([xshift=0.0em,yshift=0.9em]cfrag6.north);
\node [anchor=north] (tw1) at ([yshift=-2em]sw1.south) {U.S.};
\node [anchor=north] (tw2) at ([yshift=-2em]sw2.south) {President};
\node [anchor=north] (tw3) at ([yshift=-2em]sw3.south) {Trump};
\visible<4->{
\node [fill=blue,circle,inner sep=2pt] (rlabel2) at (cfrag2.north east) {{\color{white} \tiny{2}}};
\node [fill=blue,circle,inner sep=2pt] (rlabel3) at (cfrag3.north east) {{\color{white} \tiny{3}}};
\node [fill=blue,circle,inner sep=2pt] (rlabel6) at (cfrag6.north east) {{\color{white} \tiny{5}}};
}
\draw [-,dashed] (sw1.south) -- (tw1.north);
\draw [-] (sw2.south) -- (tw2.north);
\draw [-] (sw3.south) -- (tw3.north);
\draw [-] (sw4.south) -- (tw3.north);
\begin{pgfonlayer}{background}
\visible<4->{
\node [fill=green!20,inner sep=0pt] (cfrag2back) [fit = (cfrag2)] {};
\node [fill=green!20,inner sep=0pt] (cfrag3back) [fit = (cfrag3)] {};
\node [fill=green!20,inner sep=0pt] (cfrag6back) [fit = (cfrag6)] {};
}
\node [anchor=west] (rulelabel1) at ([xshift=1in,yshift=0em]n1.east) {\footnotesize{\textbf{抽取到的规则:}}};
\node [anchor=north west] (rule1) at (rulelabel1.south west) {NP(NNP$_1$ NN$_2$ NN(唐纳德) NN(特朗普))};
\node [anchor=north west] (rule1t) at ([yshift=0.2em]rule1.south west) {$\to$ NNP$_1$ NN$_2$ Trump};
\node [anchor=north west] (rule2) at (rule1t.south west) {NP(NNP$_1$ NN(总统) NN(唐纳德) NN(特朗普))};
\node [anchor=north west] (rule2t) at ([yshift=0.2em]rule2.south west) {$\to$ NNP$_1$ President Trump};
\node [anchor=north west] (rulelabel2) at (rule2t.south west) {\footnotesize{\textbf{\alert{不能}抽取到的规则:}}};
\node [anchor=north west] (rule3) at (rulelabel2.south west) {NP(NN(唐纳德) NN(特朗普)) $\to$ Trump};
\visible<3->{
\node [anchor=south west,draw=red,thick,fill=red!20,inner sep=0pt,minimum height = 2em, minimum width=6em] (ps) at ([xshift=-0.2em,yshift=-0.2em]cfrag2.south west) {};
\node [anchor=south west,draw=red,thick,fill=red!20,inner sep=0pt] (pt) [fit = (tw14) (tw15) (tw16)] {};
}
\end{pgfonlayer}
}
\end{scope}
}
}
\end{tikzpicture}
\end{minipage}
\end{center}
\end{frame}
%%%------------------------------------------------------------------------------------------------------------
%%% tree binarization (cont.)
\begin{frame}{更多的规则 - 句法树二叉化(续)}
\begin{itemize}
\item 句法分析器生成的句法树可能会非常平坦,这会导致抽取的规则很``大''而且规则无法继续被分解
\begin{itemize}
\item 比如,在CTB中经常会看到很宽的子树结构
\end{itemize}
\end{itemize}
\end{frame}
%%%------------------------------------------------------------------------------------------------------------
\subsection{规则匹配}
\end{CJK}
......
......@@ -3727,7 +3727,7 @@ VP(P(对) NP(NN(局势)) VP$_1$) $\to$ VP$_1$ about the situation \\
}
\visible<4->{
\vspace{-0.4em}
\vspace{-0.3em}
但是,如果用组合的方式,需要三条最小规则才能得到这条规则\\
}
\vfill
......
Markdown 格式
0%
您添加了 0 到此讨论。请谨慎行事。
请先完成此评论的编辑!
注册 或者 后发表评论