Commit c5d54017 by xiaotong

updates

parent 8e704c9d
...@@ -146,130 +146,99 @@ ...@@ -146,130 +146,99 @@
\subsection{翻译规则抽取} \subsection{翻译规则抽取}
%%%------------------------------------------------------------------------------------------------------------ %%%------------------------------------------------------------------------------------------------------------
%%% SPMT规则 %%% tree binarization
\begin{frame}{更多的规则 - SPMT规则} \begin{frame}{更多的规则 - 句法树二叉化}
\begin{itemize} \begin{itemize}
\item 任意大小的规则都可以通过组合的方式获取,但是组合过多数量的规则会大大增加规则集大小。SPMT一种高效地获得大颗粒度规则的方法 (Marcu et al., 2006) \item 句法分析器生成的句法树可能会非常平坦,这会导致抽取的规则很``大''而且规则无法继续被分解
\begin{itemize} \begin{itemize}
\item 先抽取短语,之后找到覆盖这个短语的可信节点 \item 比如,在CTB中经常会看到很宽的子树结构
\item 以这个可信节点做根,生成包含该短语的规则
\end{itemize} \end{itemize}
\end{itemize} \end{itemize}
\vspace{-0.3em} \begin{center}
\begin{minipage}[b]{0.47\textwidth}
{\footnotesize
\visible<3->{
对于任意一个与词对齐兼容的短语,可以找到包含它的``最小''翻译规则,即SPMT规则,比如\\
\vspace{-0.4em}
对 形式 $\to$ about the situation \\
\vspace{-0.4em}
可以很容易得到它的SPMT规则 \\
\vspace{-0.4em}
VP(P(对) NP(NN(局势)) VP$_1$) $\to$ VP$_1$ about the situation \\
}
\visible<4->{
\vspace{-0.4em}
但是,如果用组合的方式,需要三条最小规则才能得到这条规则\\
}
\vfill
}
\end{minipage}
\hfill
\begin{minipage}[t]{0.47\textwidth}
\begin{tikzpicture} \begin{tikzpicture}
{\scriptsize {\scriptsize
\begin{scope} \begin{scope}[scale = 0.9, sibling distance=20pt, level distance=30pt]
\visible<2->{ {\footnotesize
\begin{scope}[level distance=20pt] \Tree[.IP
\node[scale=0.8, inner sep=0.1cm,align=center,draw] (cfrag1) at [.NP ]
(0,0.25) {\Tree[.\node(sn1){NP}; [.\node(sn2){PN}; 他 ]]}; [.VP ]
\end{scope} [., ]
[.VP ]
\begin{scope}[level distance=20pt] [., ]
\node[scale=0.8, inner sep=0.1cm,align=center,anchor=south west,draw] (cfrag2) at [.VP ]
([xshift=1.2em]cfrag1.south east) {\Tree[.\node(sn3){P}; 对 ]}; [., ]
[.VP ]
[.{.{\color{white} V}} ]
]
}
\end{scope} \end{scope}
}
\begin{scope}[level distance=20pt] \end{tikzpicture}
\node[scale=0.8, inner sep=0.1cm,align=center,anchor=south west,draw] (cfrag3) at \end{center}
([xshift=1.2em]cfrag2.south east) {\Tree[.\node(sn4){NP}; [.NN 形式 ]]};
\end{scope}
\begin{scope}[sibling distance=15pt,level distance=20pt] \begin{itemize}
\node[scale=0.8, inner sep=0.1cm,align=center,anchor=south west,draw] (cfrag4) at \item<2-> 一个具体的例子
([xshift=1.4em]cfrag3.south east) {\Tree[.\node(sn5){VP}; [.\node(sn6){VV}; 表示 ] [.\node(sn7){NN}; 担心 ]]}; \end{itemize}
\end{scope}
\begin{scope}[sibling distance=32pt,level distance=20pt] \vspace{-1.0em}
\node[scale=0.8, inner sep=0.1cm,align=center,anchor=south west,draw] (cfrag6) at \begin{center}
([xshift=0.3em,yshift=4em]cfrag2.north west) {\Tree[.\node(sn11){VP}; [.\node(sn9){P}; ] [.\node(sn10){NP}; ] [.\node(sn13){VP}; ]]}; \begin{tikzpicture}
\end{scope}
\begin{scope}[sibling distance=65pt,level distance=18pt] \visible<2->{
\node[scale=0.8, inner sep=0.1cm,align=center,anchor=south east,draw] (cfrag7) at {\scriptsize
([xshift=-4.5em,yshift=0.5em]cfrag6.north east) {\Tree[.\node(sn14){IP}; [.\node(sn15){NP}; ] [.\node(sn16){VP}; ]]}; \begin{scope}[sibling distance=4pt, level distance=25pt]
\end{scope}
\node[scale=0.9,anchor=north,minimum size=18pt] (tw11) at ([xshift=-0.3em,yshift=-1.2em]cfrag1.south){he}; {\footnotesize
\node[scale=0.9,anchor=west,minimum size=18pt] (tw12) at ([yshift=-0.1em,xshift=0.5em]tw11.east){was}; \Tree[.\node(n1){NP};
\node[scale=0.9,anchor=west,minimum size=18pt] (tw13) at ([yshift=0.1em,xshift=0.5em]tw12.east){worried}; [.NNP \node(sw1){美国}; ]
\node[scale=0.9,anchor=west,minimum size=18pt] (tw14) at ([xshift=0.5em]tw13.east){about}; [.NN \node(sw2){总统}; ]
\node[scale=0.9,anchor=west,minimum size=18pt] (tw15) at ([xshift=0.5em]tw14.east){the}; [.NN \node(sw3){唐纳德}; ]
\node[scale=0.9,anchor=west,minimum size=18pt] (tw16) at ([yshift=-0.1em,xshift=0.5em]tw15.east){situation}; [.NN \node(sw4){特朗普}; ]
]
\draw[dashed] ([xshift=-0.3em]cfrag1.south) -- ([yshift=-0.3em]tw11.north);
\draw[dashed] (cfrag2.south) -- ([yshift=-0.4em]tw14.north);
\draw[dashed] (cfrag3.south) -- ([yshift=-0.4em]tw15.north);
\draw[dashed] (cfrag3.south) -- ([yshift=-0.4em]tw16.north);
\draw[dashed] (cfrag4.south) .. controls +(south:0.6) and +(north:0.6) .. ([yshift=-0.4em]tw13.north);
\visible<3->{
\draw[dashed,red] (cfrag2.south) -- ([yshift=-0.4em]tw14.north);
\draw[dashed,red] (cfrag3.south) -- ([yshift=-0.4em]tw15.north);
\draw[dashed,red] (cfrag3.south) -- ([yshift=-0.4em]tw16.north);
} }
\draw[*-*] ([xshift=0.0em,yshift=-0.2em]cfrag1.north) -- ([xshift=0.0em,yshift=6.5em]cfrag1.north); \node [anchor=north] (tw1) at ([yshift=-2em]sw1.south) {U.S.};
\draw[*-*] ([xshift=-0.1em,yshift=-0.2em]cfrag2.north) -- ([xshift=-0.1em,yshift=4.4em]cfrag2.north); \node [anchor=north] (tw2) at ([yshift=-2em]sw2.south) {President};
\draw[*-*] ([xshift=0.1em,yshift=-0.4em]cfrag3.north) .. controls +(north:2.4em) and +(south:2.4em) .. ([xshift=1.1em,yshift=2.5em]cfrag3.north); \node [anchor=north] (tw3) at ([yshift=-2em]sw3.south) {Trump};
\draw[*-*] ([xshift=0.0em,yshift=-0.2em]cfrag4.north) -- ([xshift=0.0em,yshift=2.5em]cfrag4.north);
\draw[*-*] ([xshift=0.0em,yshift=-0.2em]cfrag6.north) -- ([xshift=0.0em,yshift=0.9em]cfrag6.north);
\visible<4->{ \draw [-,dashed] (sw1.south) -- (tw1.north);
\node [fill=blue,circle,inner sep=2pt] (rlabel2) at (cfrag2.north east) {{\color{white} \tiny{2}}}; \draw [-] (sw2.south) -- (tw2.north);
\node [fill=blue,circle,inner sep=2pt] (rlabel3) at (cfrag3.north east) {{\color{white} \tiny{3}}}; \draw [-] (sw3.south) -- (tw3.north);
\node [fill=blue,circle,inner sep=2pt] (rlabel6) at (cfrag6.north east) {{\color{white} \tiny{5}}}; \draw [-] (sw4.south) -- (tw3.north);
}
\begin{pgfonlayer}{background} \node [anchor=west] (rulelabel1) at ([xshift=1in,yshift=0em]n1.east) {\footnotesize{\textbf{抽取到的规则:}}};
\visible<4->{ \node [anchor=north west] (rule1) at (rulelabel1.south west) {NP(NNP$_1$ NN$_2$ NN(唐纳德) NN(特朗普))};
\node [fill=green!20,inner sep=0pt] (cfrag2back) [fit = (cfrag2)] {}; \node [anchor=north west] (rule1t) at ([yshift=0.2em]rule1.south west) {$\to$ NNP$_1$ NN$_2$ Trump};
\node [fill=green!20,inner sep=0pt] (cfrag3back) [fit = (cfrag3)] {}; \node [anchor=north west] (rule2) at (rule1t.south west) {NP(NNP$_1$ NN(总统) NN(唐纳德) NN(特朗普))};
\node [fill=green!20,inner sep=0pt] (cfrag6back) [fit = (cfrag6)] {}; \node [anchor=north west] (rule2t) at ([yshift=0.2em]rule2.south west) {$\to$ NNP$_1$ President Trump};
} \node [anchor=north west] (rulelabel2) at (rule2t.south west) {\footnotesize{\textbf{\alert{不能}抽取到的规则:}}};
\node [anchor=north west] (rule3) at (rulelabel2.south west) {NP(NN(唐纳德) NN(特朗普)) $\to$ Trump};
\visible<3->{
\node [anchor=south west,draw=red,thick,fill=red!20,inner sep=0pt,minimum height = 2em, minimum width=6em] (ps) at ([xshift=-0.2em,yshift=-0.2em]cfrag2.south west) {};
\node [anchor=south west,draw=red,thick,fill=red!20,inner sep=0pt] (pt) [fit = (tw14) (tw15) (tw16)] {};
}
\end{pgfonlayer}
}
\end{scope} \end{scope}
} }
}
\end{tikzpicture} \end{tikzpicture}
\end{minipage} \end{center}
\end{frame} \end{frame}
%%%------------------------------------------------------------------------------------------------------------ %%%------------------------------------------------------------------------------------------------------------
%%% tree binarization (cont.)
\begin{frame}{更多的规则 - 句法树二叉化(续)}
\begin{itemize}
\item 句法分析器生成的句法树可能会非常平坦,这会导致抽取的规则很``大''而且规则无法继续被分解
\begin{itemize}
\item 比如,在CTB中经常会看到很宽的子树结构
\end{itemize}
\end{itemize}
\end{frame}
%%%------------------------------------------------------------------------------------------------------------
\subsection{规则匹配} \subsection{规则匹配}
\end{CJK} \end{CJK}
......
...@@ -3727,7 +3727,7 @@ VP(P(对) NP(NN(局势)) VP$_1$) $\to$ VP$_1$ about the situation \\ ...@@ -3727,7 +3727,7 @@ VP(P(对) NP(NN(局势)) VP$_1$) $\to$ VP$_1$ about the situation \\
} }
\visible<4->{ \visible<4->{
\vspace{-0.4em} \vspace{-0.3em}
但是,如果用组合的方式,需要三条最小规则才能得到这条规则\\ 但是,如果用组合的方式,需要三条最小规则才能得到这条规则\\
} }
\vfill \vfill
......
Markdown 格式
0%
您添加了 0 到此讨论。请谨慎行事。
请先完成此评论的编辑!
注册 或者 后发表评论