Commit e1a8ace5 by xiaotong

binarized trees

parent c5d54017
...@@ -181,7 +181,7 @@ ...@@ -181,7 +181,7 @@
\end{center} \end{center}
\begin{itemize} \begin{itemize}
\item<2-> 一个具体的例子 \item<2-> 一个例子
\end{itemize} \end{itemize}
\vspace{-1.0em} \vspace{-1.0em}
...@@ -206,16 +206,16 @@ ...@@ -206,16 +206,16 @@
\node [anchor=north] (tw3) at ([yshift=-2em]sw3.south) {Trump}; \node [anchor=north] (tw3) at ([yshift=-2em]sw3.south) {Trump};
\draw [-,dashed] (sw1.south) -- (tw1.north); \draw [-,dashed] (sw1.south) -- (tw1.north);
\draw [-] (sw2.south) -- (tw2.north); \draw [-,dashed] (sw2.south) -- (tw2.north);
\draw [-] (sw3.south) -- (tw3.north); \draw [-,dashed] (sw3.south) -- (tw3.north);
\draw [-] (sw4.south) -- (tw3.north); \draw [-,dashed] (sw4.south) -- (tw3.north);
\node [anchor=west] (rulelabel1) at ([xshift=1in,yshift=0em]n1.east) {\footnotesize{\textbf{抽取到的规则:}}}; \node [anchor=west] (rulelabel1) at ([xshift=1in,yshift=0.3em]n1.east) {\footnotesize{\textbf{抽取到的规则:}}};
\node [anchor=north west] (rule1) at (rulelabel1.south west) {NP(NNP$_1$ NN$_2$ NN(唐纳德) NN(特朗普))}; \node [anchor=north west] (rule1) at (rulelabel1.south west) {NP(NNP$_1$ NN$_2$ NN(唐纳德) NN(特朗普))};
\node [anchor=north west] (rule1t) at ([yshift=0.2em]rule1.south west) {$\to$ NNP$_1$ NN$_2$ Trump}; \node [anchor=north west] (rule1t) at ([yshift=0.2em]rule1.south west) {$\to$ NNP$_1$ NN$_2$ Trump};
\node [anchor=north west] (rule2) at (rule1t.south west) {NP(NNP$_1$ NN(总统) NN(唐纳德) NN(特朗普))}; \node [anchor=north west] (rule2) at (rule1t.south west) {NP(NNP$_1$ NN(总统) NN(唐纳德) NN(特朗普))};
\node [anchor=north west] (rule2t) at ([yshift=0.2em]rule2.south west) {$\to$ NNP$_1$ President Trump}; \node [anchor=north west] (rule2t) at ([yshift=0.2em]rule2.south west) {$\to$ NNP$_1$ President Trump};
\node [anchor=north west] (rulelabel2) at (rule2t.south west) {\footnotesize{\textbf{\alert{不能}抽取到的规则:}}}; \node [anchor=north west] (rulelabel2) at ([yshift=-0.3em]rule2t.south west) {\footnotesize{\textbf{\alert{不能}抽取到的规则:}}};
\node [anchor=north west] (rule3) at (rulelabel2.south west) {NP(NN(唐纳德) NN(特朗普)) $\to$ Trump}; \node [anchor=north west] (rule3) at (rulelabel2.south west) {NP(NN(唐纳德) NN(特朗普)) $\to$ Trump};
\end{scope} \end{scope}
...@@ -231,11 +231,87 @@ ...@@ -231,11 +231,87 @@
%%% tree binarization (cont.) %%% tree binarization (cont.)
\begin{frame}{更多的规则 - 句法树二叉化(续)} \begin{frame}{更多的规则 - 句法树二叉化(续)}
\begin{itemize} \begin{itemize}
\item 句法分析器生成的句法树可能会非常平坦,这会导致抽取的规则很``大''而且规则无法继续被分解 \item 一种解决问题的思路是用二叉化方法把树结构变得更深
\vspace{-1.5em}
\begin{center}
\begin{tikzpicture}
{\scriptsize
\begin{scope}[sibling distance=4pt, level distance=25pt]
\Tree[.\node(n1){NP};
[.NNP \node(sw1){美国}; ]
[.NN \node(sw2){总统}; ]
[.NN \node(sw3){唐纳德}; ]
[.NN \node(sw4){特朗普}; ]
]
\node [anchor=north] (tw1) at ([yshift=-2em]sw1.south) {U.S.};
\node [anchor=north] (tw2) at ([yshift=-2em]sw2.south) {President};
\node [anchor=north] (tw3) at ([yshift=-2em]sw3.south) {Trump};
\draw [-,dashed] (sw1.south) -- (tw1.north);
\draw [-,dashed] (sw2.south) -- (tw2.north);
\draw [-,dashed] (sw3.south) -- (tw3.north);
\draw [-,dashed] (sw4.south) -- (tw3.north);
\draw [->,very thick] ([xshift=1em]sw4.east) -- ([xshift=5em]sw4.east) node [pos=0.5,above] {\tiny{二叉化}};
\end{scope}
\begin{scope}[xshift=2.2in,sibling distance=10pt, level distance=15pt]
\Tree[.\node(n1){NP};
[.NNP \node(sw1){美国}; ]
[.NP-BAR
[.NN \node(sw2){总统}; ]
[.NP-BAR
[.NN \node(sw3){唐纳德}; ]
[.NN \node(sw4){特朗普}; ]
]
]
]
\node [anchor=north] (tw1) at ([yshift=-4.5em]sw1.south) {U.S.};
\node [anchor=north] (tw2) at ([yshift=-2.75em]sw2.south) {President};
\node [anchor=north] (tw3) at ([yshift=-1em]sw3.south) {Trump};
\draw [-,dashed] (sw1.south) -- (tw1.north);
\draw [-,dashed] (sw2.south) -- (tw2.north);
\draw [-,dashed] (sw3.south) -- (tw3.north);
\draw [-,dashed] (sw4.south) -- (tw3.north);
\end{scope}
}
\end{tikzpicture}
\end{center}
\visible<2->{
\small{二叉化增加了更多的可信节点,这也带来了新的规则}
\begin{center}
{\footnotesize
\vspace{0.3em}
NP-BAR(NN(唐纳德) NN(特朗普)) $\to$ Trump \\
\vspace{0.3em}
NP-BAR(NN$_1$ NP-BAR$_2$) $\to$ NN$_1$ NP-BAR$_2$
\vspace{0.3em}
}
\end{center}
}
\item<3-> 树二叉化已经成为基于句法机器翻译模型的常用方法
\begin{itemize} \begin{itemize}
\item 比如,在CTB中经常会看到很宽的子树结构 \item 有很多策略:左优先、右优先、head优先等等
\item 二叉化可以得到更多(细粒度)规则,保证规则的覆盖度
\end{itemize} \end{itemize}
\end{itemize} \end{itemize}
\end{frame} \end{frame}
%%%------------------------------------------------------------------------------------------------------------ %%%------------------------------------------------------------------------------------------------------------
......
...@@ -3823,6 +3823,175 @@ VP(P(对) NP(NN(局势)) VP$_1$) $\to$ VP$_1$ about the situation \\ ...@@ -3823,6 +3823,175 @@ VP(P(对) NP(NN(局势)) VP$_1$) $\to$ VP$_1$ about the situation \\
\end{frame} \end{frame}
%%%------------------------------------------------------------------------------------------------------------
%%% tree binarization
\begin{frame}{更多的规则 - 句法树二叉化}
\begin{itemize}
\item 句法分析器生成的句法树可能会非常平坦,这会导致抽取的规则很``大''而且规则无法继续被分解
\begin{itemize}
\item 比如,在CTB中经常会看到很宽的子树结构
\end{itemize}
\end{itemize}
\begin{center}
\begin{tikzpicture}
{\scriptsize
\begin{scope}[scale = 0.9, sibling distance=20pt, level distance=30pt]
{\footnotesize
\Tree[.IP
[.NP ]
[.VP ]
[., ]
[.VP ]
[., ]
[.VP ]
[., ]
[.VP ]
[.{.{\color{white} V}} ]
]
}
\end{scope}
}
\end{tikzpicture}
\end{center}
\begin{itemize}
\item<2-> 一个例子
\end{itemize}
\vspace{-1.0em}
\begin{center}
\begin{tikzpicture}
\visible<2->{
{\scriptsize
\begin{scope}[sibling distance=4pt, level distance=25pt]
{\footnotesize
\Tree[.\node(n1){NP};
[.NNP \node(sw1){美国}; ]
[.NN \node(sw2){总统}; ]
[.NN \node(sw3){唐纳德}; ]
[.NN \node(sw4){特朗普}; ]
]
}
\node [anchor=north] (tw1) at ([yshift=-2em]sw1.south) {U.S.};
\node [anchor=north] (tw2) at ([yshift=-2em]sw2.south) {President};
\node [anchor=north] (tw3) at ([yshift=-2em]sw3.south) {Trump};
\draw [-,dashed] (sw1.south) -- (tw1.north);
\draw [-,dashed] (sw2.south) -- (tw2.north);
\draw [-,dashed] (sw3.south) -- (tw3.north);
\draw [-,dashed] (sw4.south) -- (tw3.north);
\node [anchor=west] (rulelabel1) at ([xshift=1in,yshift=0.3em]n1.east) {\footnotesize{\textbf{抽取到的规则:}}};
\node [anchor=north west] (rule1) at (rulelabel1.south west) {NP(NNP$_1$ NN$_2$ NN(唐纳德) NN(特朗普))};
\node [anchor=north west] (rule1t) at ([yshift=0.2em]rule1.south west) {$\to$ NNP$_1$ NN$_2$ Trump};
\node [anchor=north west] (rule2) at (rule1t.south west) {NP(NNP$_1$ NN(总统) NN(唐纳德) NN(特朗普))};
\node [anchor=north west] (rule2t) at ([yshift=0.2em]rule2.south west) {$\to$ NNP$_1$ President Trump};
\node [anchor=north west] (rulelabel2) at ([yshift=-0.3em]rule2t.south west) {\footnotesize{\textbf{\alert{不能}抽取到的规则:}}};
\node [anchor=north west] (rule3) at (rulelabel2.south west) {NP(NN(唐纳德) NN(特朗普)) $\to$ Trump};
\end{scope}
}
}
\end{tikzpicture}
\end{center}
\end{frame}
%%%------------------------------------------------------------------------------------------------------------
%%% tree binarization (cont.)
\begin{frame}{更多的规则 - 句法树二叉化(续)}
\begin{itemize}
\item 一种解决问题的思路是用二叉化方法把树结构变得更深
\vspace{-1.5em}
\begin{center}
\begin{tikzpicture}
{\scriptsize
\begin{scope}[sibling distance=4pt, level distance=25pt]
\Tree[.\node(n1){NP};
[.NNP \node(sw1){美国}; ]
[.NN \node(sw2){总统}; ]
[.NN \node(sw3){唐纳德}; ]
[.NN \node(sw4){特朗普}; ]
]
\node [anchor=north] (tw1) at ([yshift=-2em]sw1.south) {U.S.};
\node [anchor=north] (tw2) at ([yshift=-2em]sw2.south) {President};
\node [anchor=north] (tw3) at ([yshift=-2em]sw3.south) {Trump};
\draw [-,dashed] (sw1.south) -- (tw1.north);
\draw [-,dashed] (sw2.south) -- (tw2.north);
\draw [-,dashed] (sw3.south) -- (tw3.north);
\draw [-,dashed] (sw4.south) -- (tw3.north);
\draw [->,very thick] ([xshift=1em]sw4.east) -- ([xshift=5em]sw4.east) node [pos=0.5,above] {\tiny{二叉化}};
\end{scope}
\begin{scope}[xshift=2.2in,sibling distance=10pt, level distance=15pt]
\Tree[.\node(n1){NP};
[.NNP \node(sw1){美国}; ]
[.NP-BAR
[.NN \node(sw2){总统}; ]
[.NP-BAR
[.NN \node(sw3){唐纳德}; ]
[.NN \node(sw4){特朗普}; ]
]
]
]
\node [anchor=north] (tw1) at ([yshift=-4.5em]sw1.south) {U.S.};
\node [anchor=north] (tw2) at ([yshift=-2.75em]sw2.south) {President};
\node [anchor=north] (tw3) at ([yshift=-1em]sw3.south) {Trump};
\draw [-,dashed] (sw1.south) -- (tw1.north);
\draw [-,dashed] (sw2.south) -- (tw2.north);
\draw [-,dashed] (sw3.south) -- (tw3.north);
\draw [-,dashed] (sw4.south) -- (tw3.north);
\end{scope}
}
\end{tikzpicture}
\end{center}
\visible<2->{
\small{二叉化增加了更多的可信节点,这也带来了新的规则}
\begin{center}
{\footnotesize
\vspace{0.3em}
NP-BAR(NN(唐纳德) NN(特朗普)) $\to$ Trump \\
\vspace{0.3em}
NP-BAR(NN$_1$ NP-BAR$_2$) $\to$ NN$_1$ NP-BAR$_2$
\vspace{0.3em}
}
\end{center}
}
\item<3-> 树二叉化已经成为基于句法机器翻译模型的常用方法
\begin{itemize}
\item 有很多策略:左优先、右优先、head优先等等
\item 二叉化可以得到更多(细粒度)规则,保证规则的覆盖度
\end{itemize}
\end{itemize}
\end{frame}
%%%------------------------------------------------------------------------------------------------------------ %%%------------------------------------------------------------------------------------------------------------
%%% 基于树结构的翻译文法 - 树到树 %%% 基于树结构的翻译文法 - 树到树
......
Markdown 格式
0%
您添加了 0 到此讨论。请谨慎行事。
请先完成此评论的编辑!
注册 或者 后发表评论