% !Mode:: "TeX:UTF-8" % !TEX encoding = UTF-8 Unicode \def\CTeXPreproc{Created by ctex v0.2.13, don't edit!} \documentclass[cjk,t,compress,12pt]{beamer} \usepackage{pstricks} \usepackage{etex} \usepackage{eso-pic,graphicx} \usepackage{fancybox} \usepackage{amsmath,amssymb} \usepackage{setspace} \usepackage{xcolor} \usepackage{CJK} \usepackage{tikz} \usepackage{tikz-qtree} \usepackage{hyperref} \usepackage{ulem} \usepackage{tcolorbox} \tcbuselibrary{skins} \usetikzlibrary{arrows,decorations.pathreplacing} \usetikzlibrary{shadows} % LATEX and plain TEX when using Tik Z \usepgflibrary{arrows} % LATEX and plain TEX and pure pgf \usetikzlibrary{arrows} % LATEX and plain TEX when using Tik Z \usetikzlibrary{decorations} \usetikzlibrary{arrows,shapes} \usetikzlibrary{positioning,fit,calc} \usetikzlibrary{mindmap,backgrounds} % mind map \DeclareMathOperator*{\argmax}{arg\,max} \DeclareMathOperator*{\argmin}{arg\,min} \setbeamertemplate{items}[ball] \usefonttheme[onlymath]{serif} % fout of math \definecolor{ugreen}{rgb}{0,0.5,0} \definecolor{lgreen}{rgb}{0.9,1,0.8} \definecolor{xtgreen1}{rgb}{0.824,0.898,0.8} \definecolor{xtgreen}{rgb}{0.914,0.945,0.902} \definecolor{lightgray}{gray}{0.85} \setbeamercolor{uppercol}{fg=white,bg=ugreen} \setbeamercolor{lowercol}{fg=black,bg=xtgreen} \definecolor{darkred}{rgb}{0.55, 0.0, 0.0} \definecolor{alizarin}{rgb}{0.82, 0.1, 0.26} \definecolor{ublue}{rgb}{0.152,0.250,0.545} \setbeamercolor{uppercolblue}{fg=white,bg=ublue} \setbeamercolor{lowercolblue}{fg=black,bg=blue!10} \newlength{\mystep} \newlength{\base} \newlength{\wseg} \newlength{\hseg} \newlength{\wnode} \newlength{\hnode} \usefonttheme[onlylarge]{structurebold} \IfFileExists{C:/WINDOWS/win.ini} {\newcommand{\mycfont}{you}} {\newcommand{\mycfont}{gbsn}} \begin{CJK}{UTF8}{\mycfont} \end{CJK} \setbeamerfont*{frametitle}{size=\large,series=\bfseries} \setbeamertemplate{navigation symbols}{\begin{CJK}{UTF8}{\mycfont} 第四章 基于短语和句法的统计翻译模型 \hspace*{2em} 肖桐\&朱靖波 \end{CJK} \hspace*{2em} \today \hspace*{2em} \insertframenumber{}/\inserttotalframenumber} \setbeamertemplate{itemize items}[circle] % if you want a circle \setbeamertemplate{itemize subitem}[triangle] % if you wnat a triangle \setbeamertemplate{itemize subsubitem}[ball] % if you want a ball \begin{document} \begin{CJK}{UTF8}{\mycfont} \title{\Large{基于短语和句法的统计翻译模型}} \author{\large{\textbf{肖桐\ \ 朱靖波}}} \institute{ \blue{\url{xiaotong@mail.neu.edu.cn}} \black{} \\ \blue{\url{zhujingbo@mail.neu.edu.cn}} \black{} \\ \vspace{1.0em} 东北大学 自然语言处理实验室 \\ \blue{\underline{\url{http://www.nlplab.com}}} \black{} \\ \vspace{0.2cm} \hspace{0.1cm} \includegraphics[scale=0.1]{../Figures/logo.pdf} } \date{} \maketitle \setlength{\leftmargini}{1em} \setlength{\leftmarginii}{1em} %%%------------------------------------------------------------------------------------------------------------ \section{使用更大的翻译单元} %%%------------------------------------------------------------------------------------------------------------ %%% 基于单词的模型的问题 \begin{frame}{基于单词有哪些问题} \begin{itemize} \item 基于单词的翻译模型有哪些\alert{优点}? \begin{itemize} \item 比较符合人的思维、简单直接、易于实现 \end{itemize} \vspace{0.3em} \begin{minipage}[t]{0.40\linewidth} \begin{center} {\scriptsize \begin{tabular}{l | l} 单词翻译表 & P \\ \hline 我 $\to$ I & 0.6 \\ 喜欢 $\to$ like & 0.3 \\ 绿 $\to$ green & 0.9 \\ 茶 $\to$ tea & 0.8 \\ \end{tabular} } \end{center} \end{minipage} \begin{minipage}[c]{0.58\linewidth} \begin{tikzpicture} \begin{scope} {\footnotesize \node [anchor=west] (s1) at (0,0) {我}; \node [anchor=west] (s2) at ([xshift=1.0em]s1.east) {喜欢}; \node [anchor=west] (s3) at ([xshift=1.0em]s2.east) {{\color{ugreen} 绿}}; \node [anchor=west] (s4) at ([xshift=1.0em]s3.east) {茶}; \node [anchor=east] (s) at (s1.west) {$\textbf{s}=$}; } \end{scope} \visible<2->{ \begin{scope}[yshift=-2.0em] {\footnotesize \node [anchor=west] (t1) at (0.35em,0) {I}; \node [anchor=west] (t2) at ([xshift=0.8em,yshift=-0.0em]t1.east) {like}; \node [anchor=west] (t3) at ([xshift=1.0em,yshift=-0.2em]t2.east) {green}; \node [anchor=west] (t4) at ([xshift=0.8em,yshift=0.1em]t3.east) {tea}; \node [anchor=east] (t) at ([xshift=-0.3em]t1.west) {$\textbf{t}=$}; } \end{scope} \draw [-] (s1.south) -- (t1.north); \draw [-] (s2.south) ..controls +(south:0.5em) and +(north:0.5em).. (t2.north); \draw [-] (s3.south) ..controls +(south:0.5em) and +(north:0.5em).. (t3.north); \draw [-] (s4.south) -- (t4.north); }%visible<2-> \end{tikzpicture} \end{minipage} \vspace{0.5em} \item<3-> 基于单词的翻译模型有哪些\alert{不足}? \begin{itemize} \item 需要定义词是什么 \item 独立性假设:单词之间相对独立,没有考虑搭配 \item 调序:较弱的调序建模 \item ... \end{itemize} \vspace{-0.5em} \begin{minipage}[t]{0.40\linewidth} \begin{center} {\scriptsize \begin{tabular}{l | l} 单词翻译表 & P \\ \hline 我 $\to$ I & 0.6 \\ 喜欢 $\to$ like & 0.3 \\ 红 $\to$ red & 0.8 \\ 红 $\to$ black & 0.1 \\ 茶 $\to$ tea & 0.8\\ \end{tabular} } \end{center} \end{minipage} \begin{minipage}[c]{0.58\linewidth} \vspace{1em} \begin{tikzpicture} \begin{scope} {\footnotesize \node [anchor=west] (s1) at (0,0) {我}; \node [anchor=west] (s2) at ([xshift=1.0em]s1.east) {喜欢}; \node [anchor=west] (s3) at ([xshift=1.0em]s2.east) {\alert{红}}; \node [anchor=west] (s4) at ([xshift=1.0em]s3.east) {茶}; \node [anchor=east] (s) at (s1.west) {$\textbf{s}=$}; } \end{scope} \visible<4->{ \begin{scope}[yshift=-2.0em] {\footnotesize \node [anchor=west] (t1) at (0.35em,0) {I}; \node [anchor=west] (t2) at ([xshift=0.8em,yshift=-0.0em]t1.east) {like}; \node [anchor=west] (t3) at ([xshift=1.0em,yshift=-0.0em]t2.east) {red}; \node [anchor=west] (t4) at ([xshift=1.0em,yshift=-0.1em]t3.east) {tea}; \node [anchor=east] (t) at ([xshift=-0.3em]t1.west) {$\textbf{t}=$}; } \end{scope} \draw [-] (s1.south) -- (t1.north); \draw [-] (s2.south) ..controls +(south:0.5em) and +(north:0.5em).. (t2.north); \draw [-] (s3.south) ..controls +(south:0.5em) and +(north:0.5em).. (t3.north); \draw [-] (s4.south) ..controls +(south:0.5em) and +(north:0.5em).. (t4.north); }%visible<4-> \begin{pgfonlayer}{background} \visible<5->{ \node [rectangle,draw=red,thick,inner sep=0.2em,fill=white,drop shadow] [fit = (t3) (t4)] (problemphrase) {}; \node [anchor=north,red,text width=8em,align=left] (problemlabel) at (problemphrase.south) {\begin{spacing}{0.8}\scriptsize{"红 茶"为一种搭配,应该翻译为"black tea"}\end{spacing}}; }%\visible<5-> \end{pgfonlayer} \end{tikzpicture} \end{minipage} \end{itemize} \end{frame} %%%------------------------------------------------------------------------------------------------------------ %%% 引入短语翻译 \begin{frame}{引入更大的翻译单元} \begin{itemize} \item 简单的单词翻译似乎不行?\visible<2->{\alert{- 引入更大的翻译单元}} \begin{minipage}[t]{0.40\linewidth} \begin{center} {\scriptsize \begin{tabular}{l | l} \only<1>{单词翻译表}\only<2->{\alert{\sout{单词}}词串翻译表} & P \\ \hline 我 $\to$ I & 0.6 \\ 喜欢 $\to$ like & 0.3 \\ 红 $\to$ red & 0.8 \\ 红 $\to$ black & 0.1 \\ 茶 $\to$ tea & 0.8\\ \visible<2->{我 喜欢 $\to$ I like & 0.3\\ 我 喜欢 $\to$ I liked & 0.2\\ 绿 茶 $\to$ green tea & 0.5\\ 绿 茶 $\to$ the green tea & 0.1\\ 红 茶 $\to$ black tea & 0.6\\ ... & } \end{tabular} } \end{center} \end{minipage} \begin{minipage}[c]{0.55\linewidth} \vspace{1em} \begin{center} \begin{tikzpicture} \begin{scope} \begin{scope} {\footnotesize \node [anchor=west] (s1) at (0,0) {我}; \node [anchor=west] (s2) at ([xshift=1.0em]s1.east) {喜欢}; \node [anchor=west] (s3) at ([xshift=1.0em]s2.east) {\alert{红}}; \node [anchor=west] (s4) at ([xshift=1.0em]s3.east) {茶}; \node [anchor=east] (s) at (s1.west) {$\textbf{s}=$}; } \end{scope} \begin{scope}[yshift=-3.0em] {\small \node [anchor=west] (t1) at (0.30em,0) {I}; \node [anchor=west] (t2) at ([xshift=0.8em,yshift=-0.0em]t1.east) {like}; \node [anchor=west] (t3) at ([xshift=0.6em,yshift=-0.0em]t2.east) {red}; \node [anchor=west] (t4) at ([xshift=1.0em,yshift=-0.1em]t3.east) {tea}; \node [anchor=east] (t) at ([xshift=-0.3em]t1.west) {$\textbf{t}=$}; } \end{scope} \draw [-] (s1.south) -- (t1.north); \draw [-] (s2.south) ..controls +(south:1.0em) and +(north:1.0em).. (t2.north); \draw [-] (s3.south) ..controls +(south:1.0em) and +(north:1.0em).. (t3.north); \draw [-] (s4.south) -- (t4.north); \node [anchor=center,draw=ublue,circle,thick,fill=white,inner sep=2pt,circular drop shadow={shadow xshift=0.1em,shadow yshift=-0.1em}] (head1) at ([xshift=2.5em,yshift=-0.5em]s4.south east) {{\color{red} \small{\textbf{No}}}}; \end{scope} \visible<3->{ \begin{scope}[yshift=-5.0em] \begin{scope} {\footnotesize \node [anchor=west] (s1) at (0,0) {我}; \node [anchor=west] (s2) at ([xshift=1.0em]s1.east) {喜欢}; \node [anchor=west] (s3) at ([xshift=1.0em]s2.east) {\alert{红}}; \node [anchor=west] (s4) at ([xshift=1.0em]s3.east) {茶}; \node [anchor=east] (s) at (s1.west) {$\textbf{s}=$}; } \end{scope} \begin{scope}[yshift=-3.0em] {\small \node [anchor=west] (t1) at (0.30em,0) {I}; \node [anchor=west] (t2) at ([xshift=0.8em,yshift=-0.0em]t1.east) {like}; \node [anchor=west] (t3) at ([xshift=0.6em,yshift=-0.0em]t2.east) {black}; \node [anchor=west] (t4) at ([xshift=1.0em,yshift=-0.1em]t3.east) {tea}; \node [anchor=east] (t) at ([xshift=-0.3em]t1.west) {$\textbf{t}=$}; } \end{scope} \begin{pgfonlayer}{background} \visible<3->{ \node [rectangle,draw=red,thick,inner sep=0.0em,fill=white] [fit = (s3) (s4)] (sphrase1) {}; \node [rectangle,draw=black,thick,inner sep=0.0em,fill=white] [fit = (t3) (t4)] (tphrase1) {}; } \end{pgfonlayer} \draw [-] (s1.south) -- (t1.north); \draw [-] (s2.south) ..controls +(south:1.0em) and +(north:1.0em).. (t2.north); \draw [-] (sphrase1.south) ..controls +(south:1.0em) and +(north:1.0em).. ([xshift=-1em]tphrase1.north); \node [anchor=center,draw=ublue,circle,thick,fill=white,inner sep=1pt,circular drop shadow={shadow xshift=0.1em,shadow yshift=-0.1em}] (head1) at ([xshift=2.5em,yshift=-0.5em]s4.south east) {{\color{ugreen} \small{\textbf{Yes}}}}; \end{scope} } \end{tikzpicture} \end{center} \end{minipage} \item<4-> \alert{优点}: \begin{itemize} \item 翻译时候可以考虑更大范围的上下文信息\\ 比如:"红茶"中的"红"如果和"茶"搭配 ... \item 更好的局部调序,比如:短语中有"的"字 $\to$ ... of ... 结构 \item 更大范围的目标语连续词串的使用,有利于$n$-gram语言模型选择译文 \end{itemize} \end{itemize} \end{frame} %%%------------------------------------------------------------------------------------------------------------ \section{翻译中为什么要使用短语} %%%------------------------------------------------------------------------------------------------------------ %%% 回顾基于词的翻译模型 \begin{frame}{回顾 - 基于词的机器翻译} \begin{itemize} \item 对每个\alert{单词}的翻译进行(任意)组合 \end{itemize} \vspace{-2em} \begin{flushright} \begin{tikzpicture} \begin{scope} \node [anchor=west] (s1) at (0,0) {\textbf{我}}; \node [anchor=west] (s2) at ([xshift=2em]s1.east) {\textbf{对}}; \node [anchor=west] (s3) at ([xshift=2em]s2.east) {\textbf{你}}; \node [anchor=west] (s4) at ([xshift=2em]s3.east) {\textbf{表示}}; \node [anchor=west] (s5) at ([xshift=2em]s4.east) {\textbf{满意}}; \node [anchor=south west] (sentlabel) at ([yshift=-0.5em]s1.north west) {\scriptsize{\textbf{\alert{待翻译句子(已经分词):}}}}; \draw [->,very thick,ublue] (s1.south) -- ([yshift=-0.7em]s1.south); \draw [->,very thick,ublue] (s2.south) -- ([yshift=-0.7em]s2.south); \draw [->,very thick,ublue] (s3.south) -- ([yshift=-0.7em]s3.south); \draw [->,very thick,ublue] (s4.south) -- ([yshift=-0.7em]s4.south); \draw [->,very thick,ublue] (s5.south) -- ([yshift=-0.7em]s5.south); {\small \node [anchor=north,inner sep=2pt,fill=red!20,minimum height=1.5em,minimum width=2.5em] (t11) at ([yshift=-1em]s1.south) {I}; \node [anchor=north,inner sep=2pt,fill=red!20,minimum height=1.5em,minimum width=2.5em] (t12) at ([yshift=-0.2em]t11.south) {me}; \node [anchor=north,inner sep=2pt,fill=red!20,minimum height=1.5em,minimum width=2.5em] (t13) at ([yshift=-0.2em]t12.south) {I'm}; \node [anchor=north west,inner sep=1pt,fill=black] (tl11) at (t11.north west) {\tiny{{\color{white} \textbf{1}}}}; \node [anchor=north west,inner sep=1pt,fill=black] (tl12) at (t12.north west) {\tiny{{\color{white} \textbf{1}}}}; \node [anchor=north west,inner sep=1pt,fill=black] (tl13) at (t13.north west) {\tiny{{\color{white} \textbf{1}}}}; \invisible{ \node [anchor=north west,inner sep=2pt,fill=red!20,minimum height=1.5em,minimum width=6.55em] (t14) at ([yshift=-0.2em]t13.south west) {I'm}; \node [anchor=north west,inner sep=2pt,fill=red!20,minimum height=1.5em,minimum width=6.55em] (t15) at ([yshift=-0.2em]t14.south west) {I}; \node [anchor=north west,inner sep=1pt,fill=black] (tl14) at (t14.north west) {\tiny{{\color{white} \textbf{1-2}}}}; \node [anchor=north west,inner sep=1pt,fill=black] (tl15) at (t15.north west) {\tiny{{\color{white} \textbf{1-2}}}}; } \node [anchor=north,inner sep=2pt,fill=green!20,minimum height=1.5em,minimum width=2.5em] (t21) at ([yshift=-1em]s2.south) {to}; \node [anchor=north,inner sep=2pt,fill=green!20,minimum height=1.5em,minimum width=2.5em] (t22) at ([yshift=-0.2em]t21.south) {with}; \node [anchor=north,inner sep=2pt,fill=green!20,minimum height=1.5em,minimum width=2.5em] (t23) at ([yshift=-0.2em]t22.south) {for}; \node [anchor=north west,inner sep=1pt,fill=black] (tl21) at (t21.north west) {\tiny{{\color{white} \textbf{2}}}}; \node [anchor=north west,inner sep=1pt,fill=black] (tl22) at (t22.north west) {\tiny{{\color{white} \textbf{2}}}}; \node [anchor=north west,inner sep=1pt,fill=black] (tl23) at (t23.north west) {\tiny{{\color{white} \textbf{2}}}}; \invisible{ \node [anchor=north west,inner sep=2pt,fill=green!20,minimum height=1.5em,minimum width=6.55em] (t24) at ([yshift=-0.2em,xshift=-2.6em]t15.south east) {for you}; \node [anchor=north west,inner sep=2pt,fill=green!20,minimum height=1.5em,minimum width=6.55em] (t25) at ([yshift=-0.2em]t24.south west) {with you}; \node [anchor=north west,inner sep=1pt,fill=black] (tl24) at (t24.north west) {\tiny{{\color{white} \textbf{2-3}}}}; \node [anchor=north west,inner sep=1pt,fill=black] (tl25) at (t25.north west) {\tiny{{\color{white} \textbf{2-3}}}}; } \node [anchor=north,inner sep=2pt,fill=blue!20,minimum height=1.5em,minimum width=2.5em] (t31) at ([yshift=-1em]s3.south) {you}; \node [anchor=north west,inner sep=1pt,fill=black] (tl31) at (t31.north west) {\tiny{{\color{white} \textbf{3}}}}; \invisible{ \node [anchor=west,inner sep=2pt,fill=blue!20,minimum height=1.5em,minimum width=13.35em] (t32) at ([xshift=1.4em]t14.east) {you are satisfied}; \node [anchor=north west,inner sep=2pt,fill=blue!20,minimum height=1.5em,minimum width=7.45em] (t33) at ([yshift=-0.2em]t32.south west) {$\phi$}; \node [anchor=north west,inner sep=1pt,fill=black] (tl32) at (t32.north west) {\tiny{{\color{white} \textbf{3-5}}}}; \node [anchor=north west,inner sep=1pt,fill=black] (tl33) at (t33.north west) {\tiny{{\color{white} \textbf{3-4}}}}; } \node [anchor=north,inner sep=2pt,fill=orange!20,minimum height=1.5em,minimum width=3em] (t41) at ([yshift=-1em]s4.south) {$\phi$}; \node [anchor=north,inner sep=2pt,fill=orange!20,minimum height=1.5em,minimum width=3em] (t42) at ([yshift=-0.2em]t41.south) {show}; \node [anchor=north west,inner sep=1pt,fill=black] (tl41) at (t41.north west) {\tiny{{\color{white} \textbf{4}}}}; \node [anchor=north west,inner sep=1pt,fill=black] (tl42) at (t42.north west) {\tiny{{\color{white} \textbf{4}}}}; \invisible{ \node [anchor=west,inner sep=2pt,fill=red!20,minimum height=1.5em,minimum width=9.00em] (t43) at ([xshift=1.75em]t24.east) {satisfied}; \node [anchor=north west,inner sep=2pt,fill=red!20,minimum height=1.5em,minimum width=9.00em] (t44) at ([yshift=-0.2em]t43.south west) {satisfactory}; \node [anchor=north west,inner sep=1pt,fill=black] (tl43) at (t43.north west) {\tiny{{\color{white} \textbf{4-5}}}}; \node [anchor=north west,inner sep=1pt,fill=black] (tl44) at (t44.north west) {\tiny{{\color{white} \textbf{4-5}}}}; } \node [anchor=north,inner sep=2pt,fill=purple!20,minimum height=1.5em,minimum width=4.5em] (t51) at ([yshift=-1em]s5.south) {satisfy}; \node [anchor=north,inner sep=2pt,fill=purple!20,minimum height=1.5em,minimum width=4.5em] (t52) at ([yshift=-0.2em]t51.south) {satisfied}; \node [anchor=north,inner sep=2pt,fill=purple!20,minimum height=1.5em,minimum width=4.5em] (t53) at ([yshift=-0.2em]t52.south) {satisfies}; \node [anchor=north west,inner sep=1pt,fill=black] (tl51) at (t51.north west) {\tiny{{\color{white} \textbf{5}}}}; \node [anchor=north west,inner sep=1pt,fill=black] (tl52) at (t52.north west) {\tiny{{\color{white} \textbf{5}}}}; \node [anchor=north west,inner sep=1pt,fill=black] (tl53) at (t53.north west) {\tiny{{\color{white} \textbf{5}}}}; } {\tiny \node [anchor=north,rotate=90,inner sep=1pt,minimum width=2.55em,fill=black] (pt11) at (t11.east) {{\color{white} \textbf{P=.4}}}; \node [anchor=north,rotate=90,inner sep=1pt,minimum width=2.55em,fill=black] (pt12) at (t12.east) {{\color{white} \textbf{P=.2}}}; \node [anchor=north,rotate=90,inner sep=1pt,minimum width=2.55em,fill=black] (pt13) at (t13.east) {{\color{white} \textbf{P=.4}}}; \invisible{ \node [anchor=north,rotate=90,inner sep=1pt,minimum width=2.55em,fill=black] (pt14) at (t14.east) {{\color{white} \textbf{P=.1}}}; \node [anchor=north,rotate=90,inner sep=1pt,minimum width=2.55em,fill=black] (pt15) at (t15.east) {{\color{white} \textbf{P=.2}}}; } \node [anchor=north,rotate=90,inner sep=1pt,minimum width=2.55em,fill=black] (pt21) at (t21.east) {{\color{white} \textbf{P=.4}}}; \node [anchor=north,rotate=90,inner sep=1pt,minimum width=2.55em,fill=black] (pt22) at (t22.east) {{\color{white} \textbf{P=.3}}}; \node [anchor=north,rotate=90,inner sep=1pt,minimum width=2.55em,fill=black] (pt23) at (t23.east) {{\color{white} \textbf{P=.3}}}; \invisible{ \node [anchor=north,rotate=90,inner sep=1pt,minimum width=2.55em,fill=black] (pt24) at (t24.east) {{\color{white} \textbf{P=.2}}}; \node [anchor=north,rotate=90,inner sep=1pt,minimum width=2.55em,fill=black] (pt25) at (t25.east) {{\color{white} \textbf{P=.1}}}; } \node [anchor=north,rotate=90,inner sep=1pt,minimum width=2.55em,fill=black] (pt31) at (t31.east) {{\color{white} \textbf{P=1}}}; \invisible{ \node [anchor=north,rotate=90,inner sep=1pt,minimum width=2.55em,fill=black] (pt33) at (t32.east) {{\color{white} \textbf{P=.4}}}; \node [anchor=north,rotate=90,inner sep=1pt,minimum width=2.55em,fill=black] (pt33) at (t33.east) {{\color{white} \textbf{P=.3}}}; } \node [anchor=north,rotate=90,inner sep=1pt,minimum width=2.55em,fill=black] (pt41) at (t41.east) {{\color{white} \textbf{P=.5}}}; \node [anchor=north,rotate=90,inner sep=1pt,minimum width=2.55em,fill=black] (pt42) at (t42.east) {{\color{white} \textbf{P=.5}}}; \invisible{ \node [anchor=north,rotate=90,inner sep=1pt,minimum width=2.55em,fill=black] (pt43) at (t43.east) {{\color{white} \textbf{P=.3}}}; \node [anchor=north,rotate=90,inner sep=1pt,minimum width=2.55em,fill=black] (pt44) at (t44.east) {{\color{white} \textbf{P=.2}}}; } \node [anchor=north,rotate=90,inner sep=1pt,minimum width=2.55em,fill=black] (pt51) at (t51.east) {{\color{white} \textbf{P=.5}}}; \node [anchor=north,rotate=90,inner sep=1pt,minimum width=2.55em,fill=black] (pt52) at (t52.east) {{\color{white} \textbf{P=.4}}}; \node [anchor=north,rotate=90,inner sep=1pt,minimum width=2.55em,fill=black] (pt53) at (t53.east) {{\color{white} \textbf{P=.1}}}; } \end{scope} \begin{scope} {\small } \end{scope} \begin{scope} \draw[decorate,thick,decoration={brace,amplitude=5pt,mirror}] ([yshift=0em,xshift=-0.5em]t11.north west) -- ([xshift=-0.5em]t13.south west) node [pos=0.5,left,xshift=1.0em,yshift=0.0em,text width=5em,align=left] (label2) {\footnotesize{\textbf{单词翻译}}}; \visible<2->{ \draw [->,ultra thick,red,line width=2pt,opacity=0.7] ([xshift=-0.5em]t13.west) -- ([xshift=0.8em]t13.east) -- ([xshift=-0.2em]t22.west) -- ([xshift=0.8em]t22.east) -- ([xshift=-0.2em]t31.west) -- ([xshift=0.8em]t31.east) -- ([xshift=-0.2em]t41.west) -- ([xshift=0.8em]t41.east) -- ([xshift=-0.2em]t52.west) -- ([xshift=1.2em]t52.east); } \invisible{ \draw [->,ultra thick,ublue,line width=2pt,opacity=0.7] ([xshift=-0.5em]t15.west) -- ([xshift=0.8em]t15.east) -- ([xshift=-0.2em]t32.west) -- ([xshift=1.2em]t32.east); \draw [->,ultra thick,ublue,line width=2pt,opacity=0.7] ([xshift=-0.5em,yshift=0.1em]t13.west) -- ([xshift=0.8em,yshift=0.1em]t13.east) -- ([xshift=-0.2em]t25.west) -- ([xshift=0.8em]t25.east) -- ([xshift=-0.2em,yshift=0.1em]t41.west) -- ([xshift=0.8em,yshift=0.1em]t41.east) -- ([xshift=-0.2em,yshift=0.1em]t52.west) -- ([xshift=1.2em,yshift=0.1em]t52.east); } \invisible{ \draw[decorate,thick,decoration={brace,amplitude=5pt,mirror}] ([yshift=-0.2em,xshift=-0.5em]t13.south west) -- ([yshift=-6.3em,xshift=-0.5em]t13.south west) node [pos=0.5,left,xshift=1.0em,yshift=0.0em,text width=5em,align=left] (label3) {\footnotesize{\textbf{短语翻译}}}; } \visible<2->{ \node [anchor=north west] (wtranslabel) at ([yshift=-4em]t15.south west) {\scriptsize{翻译路径(仅含有单词):}}; \draw [->,ultra thick,red,line width=1.5pt,opacity=0.7] (wtranslabel.east) -- ([xshift=1em]wtranslabel.east); } \invisible{ \node [anchor=north west] (ptranslabel) at ([yshift=-5.5em]t15.south west) {\scriptsize{翻译路径(含有短语):}}; \draw [->,ultra thick,ublue,line width=1.5pt,opacity=0.7] ([xshift=0.65em]ptranslabel.east) -- ([xshift=1.65em]ptranslabel.east); } \end{scope} \end{tikzpicture} \end{flushright} \end{frame} %%%------------------------------------------------------------------------------------------------------------ %%% 基于短语的翻译模型 \begin{frame}{基于``短语''的机器翻译} \begin{itemize} \item 对每个\alert{单词}及\alert{连续词串}的翻译进行(任意)组合 \end{itemize} \vspace{-2em} \begin{flushright} \begin{tikzpicture} \begin{scope} \node [anchor=west] (s1) at (0,0) {\textbf{我}}; \node [anchor=west] (s2) at ([xshift=2em]s1.east) {\textbf{对}}; \node [anchor=west] (s3) at ([xshift=2em]s2.east) {\textbf{你}}; \node [anchor=west] (s4) at ([xshift=2em]s3.east) {\textbf{表示}}; \node [anchor=west] (s5) at ([xshift=2em]s4.east) {\textbf{满意}}; \node [anchor=south west] (sentlabel) at ([yshift=-0.5em]s1.north west) {\scriptsize{\textbf{\alert{待翻译句子(已经分词):}}}}; \draw [->,very thick,ublue] (s1.south) -- ([yshift=-0.7em]s1.south); \draw [->,very thick,ublue] (s2.south) -- ([yshift=-0.7em]s2.south); \draw [->,very thick,ublue] (s3.south) -- ([yshift=-0.7em]s3.south); \draw [->,very thick,ublue] (s4.south) -- ([yshift=-0.7em]s4.south); \draw [->,very thick,ublue] (s5.south) -- ([yshift=-0.7em]s5.south); {\small \node [anchor=north,inner sep=2pt,fill=red!20,minimum height=1.5em,minimum width=2.5em] (t11) at ([yshift=-1em]s1.south) {I}; \node [anchor=north,inner sep=2pt,fill=red!20,minimum height=1.5em,minimum width=2.5em] (t12) at ([yshift=-0.2em]t11.south) {me}; \node [anchor=north,inner sep=2pt,fill=red!20,minimum height=1.5em,minimum width=2.5em] (t13) at ([yshift=-0.2em]t12.south) {I'm}; \node [anchor=north west,inner sep=1pt,fill=black] (tl11) at (t11.north west) {\tiny{{\color{white} \textbf{1}}}}; \node [anchor=north west,inner sep=1pt,fill=black] (tl12) at (t12.north west) {\tiny{{\color{white} \textbf{1}}}}; \node [anchor=north west,inner sep=1pt,fill=black] (tl13) at (t13.north west) {\tiny{{\color{white} \textbf{1}}}}; \visible<2->{ \node [anchor=north west,inner sep=2pt,fill=red!20,minimum height=1.5em,minimum width=6.55em] (t14) at ([yshift=-0.2em]t13.south west) {I'm}; \node [anchor=north west,inner sep=2pt,fill=red!20,minimum height=1.5em,minimum width=6.55em] (t15) at ([yshift=-0.2em]t14.south west) {I}; \node [anchor=north west,inner sep=1pt,fill=black] (tl14) at (t14.north west) {\tiny{{\color{white} \textbf{1-2}}}}; \node [anchor=north west,inner sep=1pt,fill=black] (tl15) at (t15.north west) {\tiny{{\color{white} \textbf{1-2}}}}; } \node [anchor=north,inner sep=2pt,fill=green!20,minimum height=1.5em,minimum width=2.5em] (t21) at ([yshift=-1em]s2.south) {to}; \node [anchor=north,inner sep=2pt,fill=green!20,minimum height=1.5em,minimum width=2.5em] (t22) at ([yshift=-0.2em]t21.south) {with}; \node [anchor=north,inner sep=2pt,fill=green!20,minimum height=1.5em,minimum width=2.5em] (t23) at ([yshift=-0.2em]t22.south) {for}; \node [anchor=north west,inner sep=1pt,fill=black] (tl21) at (t21.north west) {\tiny{{\color{white} \textbf{2}}}}; \node [anchor=north west,inner sep=1pt,fill=black] (tl22) at (t22.north west) {\tiny{{\color{white} \textbf{2}}}}; \node [anchor=north west,inner sep=1pt,fill=black] (tl23) at (t23.north west) {\tiny{{\color{white} \textbf{2}}}}; \visible<2->{ \node [anchor=north west,inner sep=2pt,fill=green!20,minimum height=1.5em,minimum width=6.55em] (t24) at ([yshift=-0.2em,xshift=-2.6em]t15.south east) {for you}; \node [anchor=north west,inner sep=2pt,fill=green!20,minimum height=1.5em,minimum width=6.55em] (t25) at ([yshift=-0.2em]t24.south west) {with you}; \node [anchor=north west,inner sep=1pt,fill=black] (tl24) at (t24.north west) {\tiny{{\color{white} \textbf{2-3}}}}; \node [anchor=north west,inner sep=1pt,fill=black] (tl25) at (t25.north west) {\tiny{{\color{white} \textbf{2-3}}}}; } \node [anchor=north,inner sep=2pt,fill=blue!20,minimum height=1.5em,minimum width=2.5em] (t31) at ([yshift=-1em]s3.south) {you}; \node [anchor=north west,inner sep=1pt,fill=black] (tl31) at (t31.north west) {\tiny{{\color{white} \textbf{3}}}}; \visible<2->{ \node [anchor=west,inner sep=2pt,fill=blue!20,minimum height=1.5em,minimum width=13.35em] (t32) at ([xshift=1.4em]t14.east) {you are satisfied}; \node [anchor=north west,inner sep=2pt,fill=blue!20,minimum height=1.5em,minimum width=7.45em] (t33) at ([yshift=-0.2em]t32.south west) {$\phi$}; \node [anchor=north west,inner sep=1pt,fill=black] (tl32) at (t32.north west) {\tiny{{\color{white} \textbf{3-5}}}}; \node [anchor=north west,inner sep=1pt,fill=black] (tl33) at (t33.north west) {\tiny{{\color{white} \textbf{3-4}}}}; } \node [anchor=north,inner sep=2pt,fill=orange!20,minimum height=1.5em,minimum width=3em] (t41) at ([yshift=-1em]s4.south) {$\phi$}; \node [anchor=north,inner sep=2pt,fill=orange!20,minimum height=1.5em,minimum width=3em] (t42) at ([yshift=-0.2em]t41.south) {show}; \node [anchor=north west,inner sep=1pt,fill=black] (tl41) at (t41.north west) {\tiny{{\color{white} \textbf{4}}}}; \node [anchor=north west,inner sep=1pt,fill=black] (tl42) at (t42.north west) {\tiny{{\color{white} \textbf{4}}}}; \visible<2->{ \node [anchor=west,inner sep=2pt,fill=red!20,minimum height=1.5em,minimum width=9.00em] (t43) at ([xshift=1.75em]t24.east) {satisfied}; \node [anchor=north west,inner sep=2pt,fill=red!20,minimum height=1.5em,minimum width=9.00em] (t44) at ([yshift=-0.2em]t43.south west) {satisfactory}; \node [anchor=north west,inner sep=1pt,fill=black] (tl43) at (t43.north west) {\tiny{{\color{white} \textbf{4-5}}}}; \node [anchor=north west,inner sep=1pt,fill=black] (tl44) at (t44.north west) {\tiny{{\color{white} \textbf{4-5}}}}; } \node [anchor=north,inner sep=2pt,fill=purple!20,minimum height=1.5em,minimum width=4.5em] (t51) at ([yshift=-1em]s5.south) {satisfy}; \node [anchor=north,inner sep=2pt,fill=purple!20,minimum height=1.5em,minimum width=4.5em] (t52) at ([yshift=-0.2em]t51.south) {satisfied}; \node [anchor=north,inner sep=2pt,fill=purple!20,minimum height=1.5em,minimum width=4.5em] (t53) at ([yshift=-0.2em]t52.south) {satisfies}; \node [anchor=north west,inner sep=1pt,fill=black] (tl51) at (t51.north west) {\tiny{{\color{white} \textbf{5}}}}; \node [anchor=north west,inner sep=1pt,fill=black] (tl52) at (t52.north west) {\tiny{{\color{white} \textbf{5}}}}; \node [anchor=north west,inner sep=1pt,fill=black] (tl53) at (t53.north west) {\tiny{{\color{white} \textbf{5}}}}; } {\tiny \node [anchor=north,rotate=90,inner sep=1pt,minimum width=2.55em,fill=black] (pt11) at (t11.east) {{\color{white} \textbf{P=.4}}}; \node [anchor=north,rotate=90,inner sep=1pt,minimum width=2.55em,fill=black] (pt12) at (t12.east) {{\color{white} \textbf{P=.2}}}; \node [anchor=north,rotate=90,inner sep=1pt,minimum width=2.55em,fill=black] (pt13) at (t13.east) {{\color{white} \textbf{P=.4}}}; \visible<2->{ \node [anchor=north,rotate=90,inner sep=1pt,minimum width=2.55em,fill=black] (pt14) at (t14.east) {{\color{white} \textbf{P=.1}}}; \node [anchor=north,rotate=90,inner sep=1pt,minimum width=2.55em,fill=black] (pt15) at (t15.east) {{\color{white} \textbf{P=.2}}}; } \node [anchor=north,rotate=90,inner sep=1pt,minimum width=2.55em,fill=black] (pt21) at (t21.east) {{\color{white} \textbf{P=.4}}}; \node [anchor=north,rotate=90,inner sep=1pt,minimum width=2.55em,fill=black] (pt22) at (t22.east) {{\color{white} \textbf{P=.3}}}; \node [anchor=north,rotate=90,inner sep=1pt,minimum width=2.55em,fill=black] (pt23) at (t23.east) {{\color{white} \textbf{P=.3}}}; \visible<2->{ \node [anchor=north,rotate=90,inner sep=1pt,minimum width=2.55em,fill=black] (pt24) at (t24.east) {{\color{white} \textbf{P=.2}}}; \node [anchor=north,rotate=90,inner sep=1pt,minimum width=2.55em,fill=black] (pt25) at (t25.east) {{\color{white} \textbf{P=.1}}}; } \node [anchor=north,rotate=90,inner sep=1pt,minimum width=2.55em,fill=black] (pt31) at (t31.east) {{\color{white} \textbf{P=1}}}; \visible<2->{ \node [anchor=north,rotate=90,inner sep=1pt,minimum width=2.55em,fill=black] (pt33) at (t32.east) {{\color{white} \textbf{P=.4}}}; \node [anchor=north,rotate=90,inner sep=1pt,minimum width=2.55em,fill=black] (pt33) at (t33.east) {{\color{white} \textbf{P=.3}}}; } \node [anchor=north,rotate=90,inner sep=1pt,minimum width=2.55em,fill=black] (pt41) at (t41.east) {{\color{white} \textbf{P=.5}}}; \node [anchor=north,rotate=90,inner sep=1pt,minimum width=2.55em,fill=black] (pt42) at (t42.east) {{\color{white} \textbf{P=.5}}}; \visible<2->{ \node [anchor=north,rotate=90,inner sep=1pt,minimum width=2.55em,fill=black] (pt43) at (t43.east) {{\color{white} \textbf{P=.3}}}; \node [anchor=north,rotate=90,inner sep=1pt,minimum width=2.55em,fill=black] (pt44) at (t44.east) {{\color{white} \textbf{P=.2}}}; } \node [anchor=north,rotate=90,inner sep=1pt,minimum width=2.55em,fill=black] (pt51) at (t51.east) {{\color{white} \textbf{P=.5}}}; \node [anchor=north,rotate=90,inner sep=1pt,minimum width=2.55em,fill=black] (pt52) at (t52.east) {{\color{white} \textbf{P=.4}}}; \node [anchor=north,rotate=90,inner sep=1pt,minimum width=2.55em,fill=black] (pt53) at (t53.east) {{\color{white} \textbf{P=.1}}}; } \end{scope} \begin{scope} {\small } \end{scope} \begin{scope} \draw[decorate,thick,decoration={brace,amplitude=5pt,mirror}] ([yshift=0em,xshift=-0.5em]t11.north west) -- ([xshift=-0.5em]t13.south west) node [pos=0.5,left,xshift=1.0em,yshift=0.0em,text width=5em,align=left] (label2) {\footnotesize{\textbf{单词翻译}}}; \draw [->,ultra thick,red,line width=2pt,opacity=0.7] ([xshift=-0.5em]t13.west) -- ([xshift=0.8em]t13.east) -- ([xshift=-0.2em]t22.west) -- ([xshift=0.8em]t22.east) -- ([xshift=-0.2em]t31.west) -- ([xshift=0.8em]t31.east) -- ([xshift=-0.2em]t41.west) -- ([xshift=0.8em]t41.east) -- ([xshift=-0.2em]t52.west) -- ([xshift=1.2em]t52.east); \visible<3->{ \draw [->,ultra thick,ublue,line width=2pt,opacity=0.7] ([xshift=-0.5em]t15.west) -- ([xshift=0.8em]t15.east) -- ([xshift=-0.2em]t32.west) -- ([xshift=1.2em]t32.east); } \visible<4->{ \draw [->,ultra thick,ublue,line width=2pt,opacity=0.7] ([xshift=-0.5em,yshift=0.1em]t13.west) -- ([xshift=0.8em,yshift=0.1em]t13.east) -- ([xshift=-0.2em]t25.west) -- ([xshift=0.8em]t25.east) -- ([xshift=-0.2em,yshift=0.1em]t41.west) -- ([xshift=0.8em,yshift=0.1em]t41.east) -- ([xshift=-0.2em,yshift=0.1em]t52.west) -- ([xshift=1.2em,yshift=0.1em]t52.east); } \visible<2->{ \draw[decorate,thick,decoration={brace,amplitude=5pt,mirror}] ([yshift=-0.2em,xshift=-0.5em]t13.south west) -- ([yshift=-6.3em,xshift=-0.5em]t13.south west) node [pos=0.5,left,xshift=1.0em,yshift=0.0em,text width=5em,align=left] (label3) {\footnotesize{\textbf{短语翻译}}}; } \node [anchor=north west] (wtranslabel) at ([yshift=-4em]t15.south west) {\scriptsize{翻译路径(仅含有单词):}}; \draw [->,ultra thick,red,line width=1.5pt,opacity=0.7] (wtranslabel.east) -- ([xshift=1em]wtranslabel.east); \visible<3->{ \node [anchor=north west] (ptranslabel) at ([yshift=-5.5em]t15.south west) {\scriptsize{翻译路径(含有短语):}}; \draw [->,ultra thick,ublue,line width=1.5pt,opacity=0.7] ([xshift=0.65em]ptranslabel.east) -- ([xshift=1.65em]ptranslabel.east); } \end{scope} \end{tikzpicture} \end{flushright} \end{frame} %%%------------------------------------------------------------------------------------------------------------ %%% 短语 -> 句法 \begin{frame}{使用短语就够了?} \begin{itemize} \item 短语是具有完整意思的连续词串,因此可以捕捉更多的上下文信息 \begin{itemize} \item 不过过大的短语会造成数据稀疏、长距离依赖等问题 \item 而且单纯的词串也缺乏句法功能表示能力 \end{itemize} \begin{tikzpicture} \node[anchor=west, fill=blue!50, inner sep=0.05cm] (sp1) at (0, 0) {进口\ \ }; \node[anchor=west] (sp2) at (2.5em, 0) {在 过去的 五 到 十 年 间}; \node[anchor=west, fill=red!50, inner sep=0.05cm] (sp3) at (14em, 0) {有了 大幅度 下降}; \draw[->] (sp1) edge [out=15, in=170] (sp3); \node[anchor=west, fill=blue!50, inner sep=0.05cm] (tp1) at (0, -0.8) {the imports}; \node[anchor=west, fill=red!50, inner sep=0.05cm] (tp2) at (5.3em, -0.8) {drastically fell}; \node[anchor=west] (tp3) at (11.3em, -0.8) {in the past five to ten years}; \path[->] (tp1) edge [out=30, in=150] (tp2); \end{tikzpicture} \item<2-> 另一种方式是考虑句子的句法结构,这样更容易描述句子的层次结构和长距离依赖关系 \end{itemize} \visible<2->{ \begin{center} \begin{tikzpicture} \begin{scope}[scale=0.8, sibling distance=1pt, level distance=20pt, yshift=-1.4in] \Tree[. S [.NP [.NP [.DT the ] [.\node[fill=ugreen!50]{NN}; \node[](n1){import}; ] ] [.\node[fill=ugreen!50]{IN}; in ] [.NP \edge[roof]; {North Korea} ] ] [.VP [.\node[fill=ugreen!50]{VBZ}; \node[](n2){have}; ] [.ADVP [.RB drastically ] [.VBN fallen ] ] ] ] \draw[-latex] (n1.south) .. controls +(south east:1) and +(south:1) .. (n2.south); \end{scope} \end{tikzpicture} \end{center} } \end{frame} %%%------------------------------------------------------------------------------------------------------------ %%% 基于句法的模型 \begin{frame}{引入句法信息} \begin{itemize} \item 句法树是句子的更高层次的抽象,相比短语句法树具有更加丰富的句法功能标记,对语言结构的转换很有帮助 \begin{itemize} \item 更容易捕捉翻译中的远距离调序 \item 使用句法更容易对大范围的上下文建模 \end{itemize} \item<2-> 看一个真实的例子 \begin{itemize} \item 长介词短语的翻译,需要完整的看到这个结构才能准确翻译介词 \end{itemize} \end{itemize} \visible<2->{ \vspace{-1.2em} \begin{center} \begin{tikzpicture} %% example \begin{scope}[xshift=-0.1in,yshift=-1.5in] {\tiny \node[anchor=west] (ref) at (0,0) {\textbf{人工翻译:} \alert{After} North Korea demanded concessions from U.S. again before the start of a new round of six-nation talks ...}; \node[anchor=north west] (hifst) at ([yshift=-0.3em]ref.south west) {\textbf{机器翻译:} \blue{In}\black{} the new round of six-nation talks on North Korea again demanded that U.S. in the former promise ...}; \visible<4->{ \node[anchor=north west] (synhifst) at ([yshift=-0.3em]hifst.south west) {\textbf{better?:}}; \node[anchor=west, fill=red!20!white, inner sep=0.3em] (synhifstpart1) at ([xshift=-0.5em]synhifst.east) {After}; \node[anchor=west, fill=blue!20!white, inner sep=0.25em] (synhifstpart2) at ([xshift=0.1em,yshift=-0.05em]synhifstpart1.east) {North Korea again demanded that U.S. promised concessions before the new round of six-nation talks}; \node[anchor=west] (synhifstpart3) at ([xshift=-0.2em]synhifstpart2.east) {...}; } \node [anchor=west] (inputlabel) at ([yshift=-0.4in]synhifst.west) {\textbf{Input:}}; \node [anchor=west,minimum height=12pt] (inputseg1) at (inputlabel.east) {在$_1$ }; \node [anchor=west,minimum height=12pt] (inputseg2) at ([xshift=0.2em]inputseg1.east) {北韩$_2$ 再度$_3$ 要求$_4$ 美国$_5$ 于$_6$ 新$_7$ 回合$_8$ 六$_9$ 国$_{10}$ 会谈$_{11}$ 前$_{12}$ 承诺$_{13}$ 让步$_{14}$}; \node [anchor=west,minimum height=12pt] (inputseg3) at ([xshift=0.2em]inputseg2.east) {后$_{15}$}; \node [anchor=west,minimum height=12pt] (inputseg4) at ([xshift=0.2em]inputseg3.east) {,$_{16}$}; \node [anchor=west,minimum height=12pt] (inputseg5) at ([xshift=0.2em]inputseg4.east) {...}; \visible<3->{ \node [anchor=north,inner sep=2pt] (synlabel1) at ([yshift=-0.34in]inputseg2.south) {\scriptsize{PP}}; \node [anchor=north,inner sep=2pt] (synlabel2) at ([yshift=-0.34in]inputseg4.south) {\scriptsize{PU}}; \node [anchor=north,inner sep=2pt] (synlabel3) at ([yshift=-0.34in]inputseg5.south) {\scriptsize{VP}}; \node [anchor=north,inner sep=2pt] (synlabel4) at ([xshift=1.6in,yshift=-0.35in]synlabel1.south) {\scriptsize{VP}}; \draw [-] (inputseg1.south west) -- (inputseg3.south east) -- (synlabel1.north) -- cycle; \draw [-] (inputseg4.south) -- (synlabel2.north); \draw [-] (inputseg5.south) -- (synlabel3.north); \draw [-] (synlabel1.south) -- (synlabel4.north); \draw [-] (synlabel2.south) -- (synlabel4.north); \draw [-] (synlabel3.south) -- (synlabel4.north); } \visible<3->{ \node [anchor=north east,align=left] (nolimitlabel) at (synlabel1.south west) {\scriptsize{短语结构树很容易捕捉}\\\scriptsize{这种介词短语结构}}; } \visible<4->{ \node [anchor=west,minimum height=12pt,fill=red!20] (inputseg1) at (inputlabel.east) {在$_1$ }; \node [anchor=west,minimum height=12pt,fill=blue!20] (inputseg2) at ([xshift=0.2em]inputseg1.east) {北韩$_2$ 再度$_3$ 要求$_4$ 美国$_5$ 于$_6$ 新$_7$ 回合$_8$ 六$_9$ 国$_{10}$ 会谈$_{11}$ 前$_{12}$ 承诺$_{13}$ 让步$_{14}$}; \node [anchor=west,minimum height=12pt,fill=red!20] (inputseg3) at ([xshift=0.2em]inputseg2.east) {后$_{15}$}; \path [draw,->,dashed] (inputseg1.north) .. controls +(north:0.2) and +(south:0.3) .. ([xshift=1em]synhifstpart1.south); \path [draw,->,dashed] (inputseg3.north) .. controls +(north:0.2) and +(south:0.6) .. ([xshift=1em]synhifstpart1.south); \path [draw,->,dashed] ([xshift=-0.5in]inputseg2.north) -- ([xshift=-0.6in]synhifstpart2.south); } } \end{scope} %% end of example \end{tikzpicture} \end{center} } \end{frame} %%%------------------------------------------------------------------------------------------------------------ %%% 本章的核心问题 \begin{frame}{核心问题} \vspace{6em} \begin{tcolorbox}[enhanced,size=normal,left=2mm,right=1mm,colback=blue!5!white,colframe=blue!75!black,drop fuzzy shadow] {\LARGE \textbf{如何使用短语、句法等}\\ \\ \textbf{结构信息进行机器翻译建模?} } \end{tcolorbox} \end{frame} %%%------------------------------------------------------------------------------------------------------------ %%% 本章的内容 \begin{frame}{Outline} \begin{tcolorbox}[enhanced,size=normal,left=2mm,right=1mm,colback=red!5!white,colframe=red!75!black,drop fuzzy shadow] {\normalsize \textbf{基于短语的模型} \vspace{-0.2em}\\ \small{1. 建模} \vspace{-0.2em}\\ \small{2. 短语获取和调序} \vspace{-0.2em}\\ \small{3. 翻译特征和最小错误率训练} \vspace{-0.2em}\\ \small{4. 栈解码} } \end{tcolorbox} \vspace{0em} \begin{tcolorbox}[enhanced,size=normal,left=2mm,right=1mm,colback=red!5!white,colframe=red!75!black,drop fuzzy shadow] {\normalsize \textbf{基于层次短语的模型} \vspace{-0.2em}\\ \small{1. 同步上下文无关文法} \vspace{-0.2em}\\ \small{2. 层次短语规则及翻译特征} \vspace{-0.2em}\\ \small{3. 基于chart的解码和剪枝} } \end{tcolorbox} \vspace{0em} \begin{tcolorbox}[enhanced,size=normal,left=2mm,right=1mm,colback=red!5!white,colframe=red!75!black,drop fuzzy shadow] {\normalsize \textbf{基于语言学句法的模型} \vspace{-0.2em}\\ \small{1. 基于树结构的文法} \vspace{-0.2em}\\ \small{2. 翻译规则抽取} \vspace{-0.2em}\\ \small{3. 规则匹配} } \end{tcolorbox} \end{frame} %%%------------------------------------------------------------------------------------------------------------ \section{基于短语的模型} %%%------------------------------------------------------------------------------------------------------------ \subsection{建模} %%%------------------------------------------------------------------------------------------------------------ %%% 什么是短语 \begin{frame}{何为短语?} \begin{itemize} \item 句对可以用短语对的组合进行表示,比如下图的例子包含三个短语翻译: \begin{itemize} \item 进口 $\leftrightarrow$ the imports have \item 大幅度 $\leftrightarrow$ drastically \item 下降 了 $\leftrightarrow$ fallen \end{itemize} \begin{center} \begin{tikzpicture} \begin{scope}[minimum height = 18pt] \node[anchor=east] (s0) at (-0.5em, 0) {源语:}; \node[anchor=west] (s1) at (0, 0) {进口}; \node[anchor=west] (s2) at (3.5em, 0) {大幅度}; \node[anchor=west] (s3) at (7.9em, 0) {下降 了}; \node[anchor=west,fill=ugreen!50] (s1) at (0, 0) {进口}; \node [anchor=west,fill=red!50] (s2) at (3.5em, 0) {大幅度}; \node[anchor=west,fill=blue!50] (s3) at (7.9em, 0) {下降 了}; \node[anchor=east] (t0) at (-0.5em, -1) {目标语:}; \node[anchor=west] (t1) at (0, -1) {the imports have}; \node[anchor=west] (t2) at (8.4em, -1) {drastically}; \node[anchor=west] (t3) at (14.0em, -1) {fallen}; \node[anchor=west,fill=ugreen!50] (t1) at (0, -1) {the imports have}; \node[anchor=west,fill=red!50] (t2) at (8.4em, -1) {drastically}; \node[anchor=west,fill=blue!50] (t3) at (14.0em, -1) {fallen}; \path[<->, thick] (s1.south) edge (t1.north); \path[<->, thick] (s2.south) edge (t2.north); \path[<->, thick] (s3.south) edge (t3.north); \end{scope} \end{tikzpicture} \end{center} \item<2-> 显然上图中的短语并\alert{不是}语言学上的短语。这里有:\\ \vspace{0.3em} \begin{beamerboxesrounded}[upper=uppercolblue,lower=lowercolblue,shadow=true]{定义 - 短语} 对于一个句子$\textbf{w} = w_1...w_n$,任意子串$w_i...w_j$($i \le j$, $0 \le i$, $j \le n$)都是句子$\textbf{w}$的一个\alert{短语} \end{beamerboxesrounded} \begin{itemize} \item $n$个词构成的句子可以有$\frac{n(n+1)}{2}$个短语 \end{itemize} \end{itemize} \end{frame} %%%------------------------------------------------------------------------------------------------------------ %%% 什么是短语翻译推导 \begin{frame}{双语短语} \begin{itemize} \item 进一步,可以定义 \\ \begin{beamerboxesrounded}[upper=uppercolblue,lower=lowercolblue,shadow=true]{定义 - 句子的短语切分} 对于一个句子$\textbf{w} = w_1...w_n$,可以被切分为$m$个子串,则称$\textbf{w}$由$m$个短语组成,记为$\textbf{w} = p_1...p_m$,其中$p_i$是$\textbf{w}$的一个短语, $p_1...p_m$也被称作句子$\textbf{w}$的一个\alert{短语切分} \end{beamerboxesrounded} \vspace{0.5em} \item<2-> 对于双语的情况 \\ \begin{beamerboxesrounded}[upper=uppercolblue,lower=lowercolblue,shadow=true]{定义 - 双语短语(或短语对)} 对于源语和目标语句对($\textbf{s}, \textbf{t}$),$\textbf{s}$中短语$\bar{s}_i$和$\textbf{t}$中的短语$\bar{t}_j$可以构成一个双语短语对$(\bar{s}_i,\bar{t}_j)$,简称\alert{短语对}$(\bar{s}_i,\bar{t}_j)$ \end{beamerboxesrounded} \begin{itemize} \item 比如,句对``进口 大幅度 下降 了 $\leftrightarrow$ the imports have drastically fallen'',有很多短语对,比如 \begin{itemize} \item 大幅度 $\leftrightarrow$ drastically \item 大幅度 下降 $\leftrightarrow$ have drastically fallen \end{itemize} \end{itemize} \end{itemize} \end{frame} %%%------------------------------------------------------------------------------------------------------------ %%% 融合用双语短语描述翻译 \begin{frame}{基于短语的翻译推导} \begin{beamerboxesrounded}[upper=uppercolblue,lower=lowercolblue,shadow=true]{定义 - 基于短语的翻译推导} {\small 对于源语和目标语句对($\textbf{s}, \textbf{t}$),有$l$个短语对$\{(\bar{s}_i,\bar{t}_j)\}$,且所有源语言短语$\{\bar{s}_i\}$和所有目标语短语$\{\bar{t}_j\}$分别构成$\textbf{s}$和$\textbf{t}$ 的切分,则称这些短语对$\{(\bar{s}_i,\bar{t}_j)\}$构成了$\textbf{s}$到$\textbf{t}$的\alert{基于短语的翻译推导}(简称推导),记为$d(\{(\bar{s}_i,\bar{t}_j)\},\textbf{s},\textbf{t})$(简记为$d(\{(\bar{s}_i,\bar{t}_j)\})$或$d$)。 } \end{beamerboxesrounded} \vspace{-0.5em} \begin{center} \begin{tikzpicture} \begin{scope}[minimum height = 18pt] \node[anchor=east] (s0) at (-0.5em, 0) {$\textbf{s}$:}; \node[anchor=west] (s1) at (0, 0) {进口}; \node[anchor=west] (s2) at (3.5em, 0) {大幅度}; \node[anchor=west] (s3) at (7.9em, 0) {下降 了}; \node[anchor=west,fill=ugreen!50] (s1) at (0, 0) {进口}; \node[anchor=west,fill=red!50] (s2) at (3.5em, 0) {大幅度}; \node[anchor=west,fill=blue!50] (s3) at (7.9em, 0) {下降 了}; \node[anchor=east] (t0) at (-0.5em, -1) {$\textbf{t}$:}; \node[anchor=west] (t1) at (0, -1) {the imports have}; \node[anchor=west] (t2) at (8.4em, -1) {drastically}; \node[anchor=west] (t3) at (14.0em, -1) {fallen}; \node[anchor=west,fill=ugreen!50] (t1) at (0, -1) {the imports have}; \node[anchor=west,fill=red!50] (t2) at (8.4em, -1) {drastically}; \node[anchor=west,fill=blue!50] (t3) at (14.0em, -1) {fallen}; \path[<->, thick] (s1.south) edge (t1.north); \path[<->, thick] (s2.south) edge (t2.north); \path[<->, thick] (s3.south) edge (t3.north); \node[anchor=south,inner sep=0pt,yshift=-0.3em] (sp1) at (s1.north) {\scriptsize{$\bar{s}_1$}}; \node[anchor=south,inner sep=0pt,yshift=-0.3em] (sp2) at (s2.north) {\scriptsize{$\bar{s}_2$}}; \node[anchor=south,inner sep=0pt,yshift=-0.3em] (sp3) at (s3.north) {\scriptsize{$\bar{s}_3$}}; \node[anchor=north,inner sep=0pt,yshift=0.3em] (tp1) at (t1.south) {\scriptsize{$\bar{t}_1$}}; \node[anchor=north,inner sep=0pt,yshift=0.3em] (tp2) at (t2.south) {\scriptsize{$\bar{t}_2$}}; \node[anchor=north,inner sep=0pt,yshift=0.3em] (tp3) at (t3.south) {\scriptsize{$\bar{t}_3$}}; \end{scope} \end{tikzpicture} \end{center} \vspace{-1.0em} \begin{itemize} \item $\{(\bar{s}_k,\bar{t}_k)\}$构成了$(\textbf{s},\textbf{t})$的一个基于短语的翻译推导 \item 需要在建模中描述的两个问题: \begin{itemize} \item $\bar{s}_k$是如何被翻译成$\bar{t}_k$的? \item $\bar{t}_k$在目标语中位置是如何决定的? \end{itemize} \end{itemize} \end{frame} %%%------------------------------------------------------------------------------------------------------------ %%% 数学模型 \begin{frame}{数学模型} \begin{itemize} \item \textbf{机器翻译}:对于输入的源语言句子$\textbf{s}$,找到最佳译文$\hat{\textbf{t}}$ \begin{displaymath} \hat{\textbf{t}} = \argmax_{\textbf{t}} \textrm{P}(\textbf{t}|\textbf{s}) \end{displaymath} 其中$\textrm{P}(\textbf{t}|\textbf{s})$表示$\textbf{s}$到$\textbf{t}$的翻译概率 \item 三个基本问题(回忆一下第三章) \begin{enumerate} \item 如何定义$\textrm{P}(\textbf{t}|\textbf{s})$ - 建模问题 \item 如何学习$\textrm{P}(\textbf{t}|\textbf{s})$的统计模型 - 训练问题 \item 如何找到最优译文 - 解码问题 \end{enumerate} \vspace{0.3em} \item<2-> 先看建模问题。可以把$\textrm{P}(\textbf{t}|\textbf{s})$表示成所有翻译推导的概率 \begin{displaymath} \textrm{P}(\textbf{t}|\textbf{s}) = \sum_{d} \textrm{P}(d,\textbf{t}|\textbf{s}) \end{displaymath} $d$是一个$(\textbf{s},\textbf{t})$上基于短语的翻译推导,$\textrm{P}(d,\textbf{t}|\textbf{s})$表示翻译推导$d$的概率 \end{itemize} \end{frame} %%%------------------------------------------------------------------------------------------------------------ %%% 数学模型 \begin{frame}{数学模型(续)} \begin{itemize} \item 但是,上式提到的翻译推导的样本空间是巨大的,很难枚举所有推导并进行求和。通常使用采样的方法选取搜索空间的一部分样本代表整个搜索空间 \vspace{0.3em} \begin{center} \begin{tikzpicture} \node [anchor=west] (s1) at (0,0) {$\textrm{P}(\textbf{t}|\textbf{s}) = $}; \node [anchor=west,inner sep=3pt,fill=red!20] (s2) at ([xshift=0.1em]s1.east) {$\sum_{d} \textrm{P}(d,\textbf{t}|\textbf{s})$}; \node [anchor=west,inner sep=3pt,fill=green!20,minimum width=7.5em] (s3) at ([xshift=2.5em,yshift=1.5em]s2.east) {$\textrm{Max}\ \textrm{P}(d,\textbf{t}|\textbf{s})$}; \node [anchor=west,inner sep=3pt,fill=green!20,minimum width=7.5em] (s4) at ([xshift=2.5em,yshift=-1.5em]s2.east) {$\sum_{d_{nbest}} \textrm{P}(d,\textbf{t}|\textbf{s})$}; \draw[->,thick] ([xshift=-0.1em]s3.west) -- ([xshift=0.1em,yshift=0.3em]s2.east); \draw[->,thick] ([xshift=-0.1em]s4.west) -- ([xshift=0.1em,yshift=-0.3em]s2.east); \end{tikzpicture} \end{center} \vspace{0.3em} 如1-best(Viterbi)或者n-best的和来近似所有的和 \item<2-> 若采用Viterbi的方法,机器翻译也可看作对于输入的源语言句子$\textbf{s}$,找到最佳翻译推导$\hat{d}$ \begin{displaymath} \hat{d} = \argmax_{d} \textrm{P}(d,\textbf{t}|\textbf{s}) \end{displaymath} 在后面的内容中出现的 $\hat{d}$ 和 $\hat{t}$ 都可以看作是等价的 \end{itemize} \end{frame} %%%------------------------------------------------------------------------------------------------------------ %%% 翻译推导的建模 \begin{frame}{对翻译推导进行建模} \vspace{-0.4em} \begin{itemize} \item $\textrm{P}(\textbf{t}|\textbf{s}) = \sum_{d} \textrm{P}(d,\textbf{t}|\textbf{s})$带来新的问题:如何描述$\textrm{P}(d,\textbf{t}|\textbf{s})$ \\ \vspace{0.0em} \begin{center} \begin{tikzpicture} \begin{scope}[minimum height = 18pt] \node[anchor=east] (s0) at (-0.5em, 0) {$\textbf{s}$:}; \node[anchor=west,fill=ugreen!50] (s1) at (0, 0) {在}; \node[anchor=west,fill=red!50] (s2) at ([xshift=1em]s1.east) {桌子 上 的}; \node[anchor=west,fill=blue!50] (s3) at ([xshift=1em]s2.east) {苹果}; \node[anchor=east] (t0) at (-0.5em, -1.5) {$\textbf{t}$:}; \node[anchor=west,fill=blue!50] (t1) at (0, -1.5) {the apple}; \node[anchor=west,fill=ugreen!50] (t2) at ([xshift=1em]t1.east) {on}; \node[anchor=west,fill=red!50] (t3) at ([xshift=1em]t2.east) {the table}; \path[<->, thick] (s1.south) edge (t3.north); \path[<->, thick] (s2.south) edge (t2.north); \path[<->, thick] (s3.south) edge (t1.north); \end{scope} \end{tikzpicture} \end{center} 上图体现了三方面问题 \begin{enumerate} \item 短语获取:确定哪些是``可用''的短语 \item 翻译模型:描述短语翻译的好坏 \item 调序模型:描述翻译中的调序现象 \end{enumerate} \item<2-> 希望有这样一种模型可以对任意的因素进行方便的建模。经典的判别式模型成为了不二的选择\\ \vspace{0.5em} \visible<2->{ \textbf{Discriminative Training and Maximum Entropy Models for Statistical Machine Translation}\\ \textbf{Franz Och and Hermann Ney, 2002, In Proc of ACL} } \end{itemize} \end{frame} %%%------------------------------------------------------------------------------------------------------------ %%% 判别式模型 \begin{frame}{判别式模型} \begin{itemize} \item 判别式模型的形式: \begin{displaymath} \textrm{P}(d,\textbf{t}|\textbf{s}) = \frac{\exp(\sum_{i=1}^{M} \lambda_i \cdot h_i(d,\textbf{s},\textbf{t}))}{\sum_{d',t'}\exp(\sum_{i=1}^{M} \lambda_i \cdot h_i(d',\textbf{s},\textbf{t}'))} \end{displaymath} \begin{itemize} \item $\{h_i(\cdot)\}$是$M$个\alert{特征},每个$h_i(d,\textbf{s},\textbf{t})$把$d$映射为一个实数值 \item $\{\lambda_i\}$是这些特征对应权重,权重越大表示特征越重要 \item $\sum_{i=1}^{M} \lambda_i \cdot h_i(d,\textbf{s},\textbf{t})$描述了$d$的整体质量,值约大$d$越``好'' \end{itemize} \item<2-> 判别式模型的优点在于,它可以很方便的引入各种特征。我们只需要设计不同的特征函数$h_i(\cdot)$即可。 \begin{itemize} \item 比如,可以定义短语翻译概率作为特征,也可以定义调序的程度作为一个特征 \end{itemize} \item<2-> \textbf{两个问题}: \begin{itemize} \item \textbf{特征定义}:定义短语翻译特征和调序特征(马上) \item \textbf{权重调优}:得到最好的特征权重(后面) \end{itemize} \end{itemize} \end{frame} %%%------------------------------------------------------------------------------------------------------------ %%% 短语系统的架构 \begin{frame}{短语系统的架构} \begin{itemize} \item \textbf{训练阶段},需要得到三个子模型 \begin{enumerate} \item 短语表:短语翻译及每个短语对应的特征值 \item 调序模型:短语调序的模型 \item 语言模型:评价译文流畅度的$n$-gram语言模型 \end{enumerate} \item \textbf{解码阶段}利用以上模型对新的句子进行翻译 \end{itemize} \begin{center} \begin{tikzpicture} \begin{scope} \tikzstyle{datanode} = [minimum width=7em,minimum height=1.7em,fill=ublue,rounded corners=0.7em]; \tikzstyle{modelnode} = [minimum width=7em,minimum height=1.7em,fill=darkred,rounded corners=0.2em]; \tikzstyle{decodingnode} = [minimum width=7em,minimum height=1.7em,fill=ugreen,rounded corners=0.2em]; \node [datanode,anchor=north west] (bitext) at (0,0) {{\color{white} \scriptsize{训练用双语数据}}}; \node [modelnode,anchor=north] (phrase) at ([yshift=-1.5em]bitext.south) {{\color{white} \scriptsize{短语抽取及打分}}}; \node [modelnode,anchor=west] (reorder) at ([xshift=1.5em]phrase.east) {{\color{white} \scriptsize{调序建模}}}; \node [modelnode,anchor=west] (lm) at ([xshift=1.5em]reorder.east) {{\color{white} \scriptsize{语言建模}}}; \node [datanode,anchor=south] (monotext) at ([yshift=1.5em]lm.north) {{\color{white} \scriptsize{目标语单语数据}}}; \node [datanode,anchor=north] (phrasetable) at ([yshift=-1.5em]phrase.south) {{\color{white} \scriptsize{短语表}}}; \node [datanode,anchor=north] (reordertable) at ([yshift=-1.5em]reorder.south) {{\color{white} \scriptsize{调序模型}}}; \node [datanode,anchor=north] (lmtable) at ([yshift=-1.5em]lm.south) {{\color{white} \scriptsize{语言模型}}}; \node [decodingnode,anchor=north] (decoding) at ([yshift=-2em]reordertable.south) {{\color{white} \scriptsize{解码器}}}; \draw [->,very thick] ([yshift=-0.1em]bitext.south) -- ([yshift=0.1em]phrase.north); \draw [->,very thick] (bitext.south east) -- ([yshift=0.1em]reorder.north west); \draw [->,very thick] ([yshift=-0.1em]monotext.south) -- ([yshift=0.1em]lm.north); \draw [->,very thick] ([yshift=-0.1em]phrase.south) -- ([yshift=0.1em]phrasetable.north); \draw [->,very thick] ([yshift=-0.1em]reorder.south) -- ([yshift=0.1em]reordertable.north); \draw [->,very thick] ([yshift=-0.1em]lm.south) -- ([yshift=0.1em]lmtable.north); \draw [->,very thick] ([yshift=-0.1em]phrasetable.south east) -- ([yshift=0.1em,xshift=-3em]decoding.north); \draw [->,very thick] ([yshift=-0.1em]reordertable.south) -- ([yshift=0.1em,xshift=0em]decoding.north); \draw [->,very thick] ([yshift=-0.1em]lmtable.south west) -- ([yshift=0.1em,xshift=3em]decoding.north); \end{scope} \end{tikzpicture} \end{center} \end{frame} %%%------------------------------------------------------------------------------------------------------------ \subsection{短语抽取} %%%------------------------------------------------------------------------------------------------------------ %%% 短语获取 \begin{frame}{短语获取} \begin{itemize} \item 回到最开始的问题: 给定$\textbf{s}$和$\textbf{t}$,\alert{如何获得双语短语} \begin{itemize} \item 如果没有限制,$\textbf{s}$和$\textbf{t}$之间任何子串映射都可以看做双语短语 \end{itemize} \end{itemize} \vspace{-0.7em} \begin{center} \begin{tikzpicture} \setlength{\wseg}{1.5cm} \setlength{\hseg}{1.0cm} \setlength{\wnode}{3.75cm} \setlength{\hnode}{1.0cm} \tikzstyle{elementnode} = [rectangle,text=white,anchor=center] \tikzstyle{srcnode} = [rotate=45,font=\small,anchor=south west] \tikzstyle{tgtnode} = [left,font=\small,anchor=north east] \tikzstyle{alignmentnode} = [rectangle,draw,minimum height=3.6\hnode,minimum width=0.36\hnode] \tikzstyle{probnode} = [fill=blue!30,minimum width=0.4\hnode] \tikzstyle{labelnode} = [above] % alignment matrix \begin{scope}[scale=0.85,yshift=0.12in] \foreach \i / \j / \c in {0/7/0.15, 1/7/0.15, 2/7/0.15, 3/7/0.15, 4/7/0.15, 5/7/0.15, 0/6/0.15, 1/6/0.15, 2/6/0.15, 3/6/0.15, 4/6/0.15, 5/6/0.15, 0/5/0.15, 1/5/0.15, 2/5/0.15, 3/5/0.15, 4/5/0.15, 5/5/0.15, 0/4/0.15, 1/4/0.15, 2/4/0.15, 3/4/0.15, 4/4/0.15, 5/4/0.15, 0/3/0.15, 1/3/0.15, 2/3/0.15, 3/3/0.15, 4/3/0.15, 5/3/0.15, 0/2/0.15, 1/2/0.15, 2/2/0.15, 3/2/0.15, 4/2/0.15, 5/2/0.15, 0/1/0.15, 1/1/0.15, 2/1/0.15, 3/1/0.15, 4/1/0.15, 5/1/0.15, 0/0/0.15, 1/0/0.15, 2/0/0.15, 3/0/0.15, 4/0/0.15, 5/0/0.15} \node[elementnode,minimum size=0.6*\hnode*\c,inner sep=0.1pt,fill=blue] (a\i\j) at (0.5*\hnode*\i-5.4*0.5*\hnode,0.5*\hnode*\j-1.05*\hnode) {}; % source \node[srcnode] (src1) at (-5.4*0.5*\hnode,-1.05*\hnode+7.5*0.5*\hnode) {\scriptsize{Have}}; \node[srcnode] (src2) at ([xshift=0.5\hnode]src1.south west) {\scriptsize{you}}; \node[srcnode] (src3) at ([xshift=0.5\hnode]src2.south west) {\scriptsize{learned}}; \node[srcnode] (src4) at ([xshift=0.5\hnode]src3.south west) {\scriptsize{nothing}}; \node[srcnode] (src5) at ([xshift=0.5\hnode]src4.south west) {\scriptsize{?}}; \node[srcnode] (src6) at ([xshift=0.5\hnode]src5.south west) {\scriptsize{EOS}}; % target \node[tgtnode] (tgt1) at (-6.0*0.5*\hnode,-1.05*\hnode+7.5*0.5*\hnode) {\scriptsize{你}}; \node[tgtnode] (tgt2) at ([yshift=-0.5\hnode]tgt1.north east) {\scriptsize{什么}}; \node[tgtnode] (tgt3) at ([yshift=-0.5\hnode]tgt2.north east) {\scriptsize{都}}; \node[tgtnode] (tgt4) at ([yshift=-0.5\hnode]tgt3.north east) {\scriptsize{没}}; \node[tgtnode] (tgt5) at ([yshift=-0.5\hnode]tgt4.north east) {\scriptsize{学}}; \node[tgtnode] (tgt6) at ([yshift=-0.5\hnode]tgt5.north east) {\scriptsize{到}}; \node[tgtnode] (tgt7) at ([yshift=-0.5\hnode]tgt6.north east) {\scriptsize{?}}; \node[tgtnode] (tgt8) at ([yshift=-0.5\hnode]tgt7.north east) {\scriptsize{EOS}}; \node [anchor=west] (p1line1) at ([xshift=4em,yshift=1em]a57.east) {\footnotesize{$\bar{s}_i$: 什么\ \ \ 都\ \ \ 没}}; \node [anchor=north west] (p1line2) at ([xshift=0]p1line1.south west) {\footnotesize{$\bar{t}_i$: learned\ \ \ nothing\ \ \ ? \ \ \ \ \ \ \ \ \ \ \ \ }}; \node [anchor=west] (p2line1) at ([xshift=4em]a53.east) {\footnotesize{$\bar{s}_j$: 到\ \ \ ?}}; \node [anchor=north west] (p2line2) at ([xshift=0]p2line1.south west) {\footnotesize{$\bar{t}_j$: Have\ \ \ you\ \ \ learned\ \ \ nothing}}; \begin{pgfonlayer}{background} \node [rectangle,draw=red,thick,inner sep=0.2em,fill=white,drop shadow] [fit = (a26) (a44)] (phrase1) {}; \node [rectangle,draw=ugreen,thick,inner sep=0.2em,fill=white,drop shadow] [fit = (a01) (a32)] (phrase2) {}; \node [rectangle,inner sep=0.2em,fill=red!10] [fit = (p1line1) (p1line2)] (box1) {}; \node [rectangle,inner sep=0.2em,fill=green!10] [fit = (p2line1) (p2line2)] (box2) {}; \end{pgfonlayer} \draw [->,thick,dotted] ([yshift=-0.8em]phrase1.east) .. controls +(east:1.5) and +(west:1) .. (box1.west); \draw [->,thick,dotted] ([yshift=-0.0em]phrase2.east) .. controls +(east:2.0) and +(west:1) .. ([yshift=1em]box2.west); \end{scope} \end{tikzpicture} \end{center} \begin{itemize} \item<2-> \textbf{显然},不加限制的定义短语会带来很多问题 \begin{itemize} \item 短语数量随句子长度增加急剧膨胀 \item 大量噪声,如``到 ? $\leftrightarrow$ Have you learned nothing'' \end{itemize} \end{itemize} \end{frame} %%------------------------------------------------------------------------------------------------------------ %% 与词对齐的兼容性 \begin{frame}{与词对齐的兼容性} \begin{itemize} \item 基于短语的翻译系统性能很大程度取决于短语表的好坏 \begin{itemize} \item 在前面一章,我们提到过\alert{词对齐}的概念,在源语和目标语之间存在着单词级别的对应关系 \item 借助词对齐信息可以提高抽取双语短语的效率和质量 \end{itemize} \end{itemize} \vspace{-0.2em} \begin{center} \begin{tikzpicture} \setlength{\wseg}{1.5cm} \setlength{\hseg}{1.0cm} \setlength{\wnode}{3.75cm} \setlength{\hnode}{1.1cm} \tikzstyle{elementnode} = [rectangle,text=white,anchor=center] \tikzstyle{srcnode} = [rotate=45,font=\small,anchor=south west] \tikzstyle{tgtnode} = [left,font=\small,anchor=north east] \tikzstyle{alignmentnode} = [rectangle,draw,minimum height=3.6\hnode,minimum width=0.36\hnode] \tikzstyle{probnode} = [fill=blue!30,minimum width=0.4\hnode] \tikzstyle{labelnode} = [above] alignment matrix \begin{scope}[scale=0.85,yshift=0.12in] \foreach \i / \j / \c in {0/5/0.15, 1/5/0.15, 2/5/0.15, 3/5/0.15, 4/5/0.15, 5/5/0.15, 6/5/0.15, 7/5/0.15, 0/4/0.15, 1/4/0.15, 2/4/0.15, 3/4/0.15, 4/4/0.15, 5/4/0.15, 6/4/0.15, 7/4/0.15, 0/3/0.15, 1/3/0.15, 2/3/0.15, 3/3/0.15, 4/3/0.15, 5/3/0.15, 6/3/0.15, 7/3/0.15, 0/2/0.15, 1/2/0.15, 2/2/0.15, 3/2/0.15, 4/2/0.15, 5/2/0.15, 6/2/0.15, 7/2/0.15, 0/1/0.15, 1/1/0.15, 2/1/0.15, 3/1/0.15, 4/1/0.15, 5/1/0.15, 6/1/0.15, 7/1/0.15, 0/0/0.15, 1/0/0.15, 2/0/0.15, 3/0/0.15, 4/0/0.15, 5/0/0.15, 6/0/0.15, 7/0/0.15} \node[elementnode,minimum size=0.6*\hnode*\c,inner sep=0.1pt,fill=blue] (a\i\j) at (0.5*\hnode*\i-5.4*0.5*\hnode,0.5*\hnode*\j-0.05*\hnode) {}; source \node[srcnode] (src1) at (-5.4*0.5*\hnode,-1.05*\hnode+7.5*0.5*\hnode) {\scriptsize{The}}; \node[srcnode] (src2) at ([xshift=0.5\hnode]src1.south west) {\scriptsize{weather}}; \node[srcnode] (src3) at ([xshift=0.5\hnode]src2.south west) {\scriptsize{is}}; \node[srcnode] (src4) at ([xshift=0.5\hnode]src3.south west) {\scriptsize{very}}; \node[srcnode] (src5) at ([xshift=0.5\hnode]src4.south west) {\scriptsize{good}}; \node[srcnode] (src6) at ([xshift=0.5\hnode]src5.south west) {\scriptsize{today}}; \node[srcnode] (src7) at ([xshift=0.5\hnode]src6.south west) {\scriptsize{.}}; \node[srcnode] (src8) at ([xshift=0.5\hnode]src7.south west) {\scriptsize{EOS}}; target \node[tgtnode] (tgt1) at (-6.0*0.5*\hnode,-1.05*\hnode+7.5*0.5*\hnode) {\scriptsize{今天}}; \node[tgtnode] (tgt2) at ([yshift=-0.5\hnode]tgt1.north east) {\scriptsize{天气}}; \node[tgtnode] (tgt3) at ([yshift=-0.5\hnode]tgt2.north east) {\scriptsize{真}}; \node[tgtnode] (tgt4) at ([yshift=-0.5\hnode]tgt3.north east) {\scriptsize{好}}; \node[tgtnode] (tgt5) at ([yshift=-0.5\hnode]tgt4.north east) {\scriptsize{。}}; \node[tgtnode] (tgt6) at ([yshift=-0.5\hnode]tgt5.north east) {\scriptsize{EOS}}; word alignment \node[align=center,elementnode,minimum size=0.3cm,inner sep=0.1pt,fill=blue!50] (l04) at (a04) {}; \node[align=center,elementnode,minimum size=0.3cm,inner sep=0.1pt,fill=blue!50] (l14) at (a14) {}; \node[align=center,elementnode,minimum size=0.3cm,inner sep=0.1pt,fill=blue!50] (l55) at (a55) {}; \node[align=center,elementnode,minimum size=0.3cm,inner sep=0.1pt,fill=blue!50] (l33) at (a33) {}; \node[align=center,elementnode,minimum size=0.3cm,inner sep=0.1pt,fill=blue!50] (l42) at (a42) {}; \node[align=center,elementnode,minimum size=0.3cm,inner sep=0.1pt,fill=blue!50] (l61) at (a61) {}; \node[align=center,elementnode,minimum size=0.3cm,inner sep=0.1pt,fill=blue!50] (l70) at (a70) {}; \visible<2->{ \node [anchor=west] (p1line1) at ([xshift=4em,yshift=1em]a75.east) {\footnotesize{$\bar{s}_i$: 天气\ \ \ \ \ \ }}; \node [anchor=north west] (p1line2) at ([xshift=0]p1line1.south west) {\footnotesize{$\bar{t}_i$: The\ \ \ weather\ \ \ \ \ }}; \node [anchor=west] (p2line1) at ([xshift=4em]a72.east) {\footnotesize{$\bar{s}_j$: 真\ \ \ 好 \ \ }}; \node [anchor=north west] (p2line2) at ([xshift=0]p2line1.south west) {\footnotesize{$\bar{t}_j$: very\ \ \ good\ \ \ \ \ \ \ \ }}; \begin{pgfonlayer}{background} \visible<2->{ \node [rectangle,draw=red,thick,inner sep=0.4em,fill=white,drop shadow] [fit = (a04) (a14)] (phrase1) {}; \node [rectangle,draw=ugreen,thick,inner sep=0.4em,fill=white,drop shadow] [fit = (a33) (a42)] (phrase2) {}; \node [rectangle,inner sep=0.2em,fill=red!10] [fit = (p1line1) (p1line2)] (box1) {}; \node [rectangle,inner sep=0.2em,fill=green!10] [fit = (p2line1) (p2line2)] (box2) {}; } \end{pgfonlayer} \draw [->,thick,dotted] ([yshift=0.3em]phrase1.east) .. controls +(east:3.5) and +(west:1) .. (box1.west); \draw [->,thick,dotted] ([yshift=-0.0em]phrase2.east) .. controls +(east:2.0) and +(west:1) .. ([yshift=1em]box2.west); } \end{scope} \end{tikzpicture} \end{center} \begin{itemize} \item<2-> \textbf{如何}使用词对齐信息来抽取短语对? \end{itemize} \end{frame} %%------------------------------------------------------------------------------------------------------------ %% 短语抽取方法 \begin{frame}{基于词对齐的短语抽取} %描述算法,给出词对齐及短语抽取结果 \begin{itemize} \item 抽取短语要与词对齐保持一致 \begin{itemize} \item 如果短语$\bar{s}$中所有词$s_1,\cdots,s_n$在词对齐$A$中与短语$\bar{t}$中的词$t_1,\cdots,t_n$都有对齐的点,反之亦然,那么称短语对$(\bar{t},\bar{s})$与词对齐$A$一致。即 \end{itemize} \end{itemize} \begin{center} \begin{tikzpicture} \setlength{\wseg}{1.5cm} \setlength{\hseg}{1.0cm} \setlength{\wnode}{3.75cm} \setlength{\hnode}{1.1cm} \tikzstyle{elementnode} = [rectangle,text=white,anchor=center] \tikzstyle{srcnode} = [font=\small,anchor=south west] \tikzstyle{tgtnode} = [left,font=\small,anchor=north east] \tikzstyle{alignmentnode} = [rectangle,draw,minimum height=3.6\hnode,minimum width=0.36\hnode] \tikzstyle{probnode} = [fill=blue!30,minimum width=0.4\hnode] \tikzstyle{labelnode} = [above] alignment matrix1 \begin{scope}[scale=1,yshift=0.12in] \foreach \i / \j / \c in {0/3/0.15, 1/3/0.15, 2/3/0.15, 3/3/0.15, 0/2/0.15, 1/2/0.15, 2/2/0.15, 3/2/0.15, 0/1/0.15, 1/1/0.15, 2/1/0.15, 3/1/0.15, 0/0/0.15, 1/0/0.15, 2/0/0.15, 3/0/0.15} \node[elementnode,minimum size=0.6*\hnode*\c,inner sep=0.1pt,fill=blue] (a\i\j) at (0.5*\hnode*\i-5.4*0.5*\hnode,0.5*\hnode*\j-1.05*\hnode) {}; source \node[srcnode] (src01) at (-5.9*0.5*\hnode,-1.05*\hnode+3.4*0.5*\hnode) {\scriptsize{$t_1$}}; \node[srcnode] (src02) at ([xshift=0.5\hnode]src01.south west) {\scriptsize{$t_2$}}; \node[srcnode] (src03) at ([xshift=0.5\hnode]src02.south west) {\scriptsize{$t_3$}}; \node[srcnode] (src04) at ([xshift=0.5\hnode]src03.south west) {\scriptsize{$t_4$}}; target \node[tgtnode] (tgt01) at (-6.0*0.5*\hnode,-1.05*\hnode+3.3*0.5*\hnode) {\scriptsize{$s_1$}}; \node[tgtnode] (tgt02) at ([yshift=-0.5\hnode]tgt01.north east) {\scriptsize{$s_2$}}; \node[tgtnode] (tgt03) at ([yshift=-0.5\hnode]tgt02.north east) {\scriptsize{$s_3$}}; \node[tgtnode] (tgt04) at ([yshift=-0.5\hnode]tgt03.north east) {\scriptsize{$s_4$}}; alignment matrix2 \foreach \i / \j / \c in {0/3/0.15, 1/3/0.15, 2/3/0.15, 3/3/0.15, 0/2/0.15, 1/2/0.15, 2/2/0.15, 3/2/0.15, 0/1/0.15, 1/1/0.15, 2/1/0.15, 3/1/0.15, 0/0/0.15, 1/0/0.15, 2/0/0.15, 3/0/0.15} \node[elementnode,minimum size=0.6*\hnode*\c,inner sep=0.1pt,fill=blue] (b\i\j) at (0.5*\hnode*\i+0.6*0.5*\hnode,0.5*\hnode*\j-1.05*\hnode) {}; source \node[srcnode] (src11) at (0.1*0.5*\hnode,-1.05*\hnode+3.4*0.5*\hnode) {\scriptsize{$t_1$}}; \node[srcnode] (src12) at ([xshift=0.5\hnode]src11.south west) {\scriptsize{$t_2$}}; \node[srcnode] (src13) at ([xshift=0.5\hnode]src12.south west) {\scriptsize{$t_3$}}; \node[srcnode] (src14) at ([xshift=0.5\hnode]src13.south west) {\scriptsize{$t_4$}}; target \node[tgtnode] (tgt11) at (0.2*0.5*\hnode,-1.05*\hnode+3.3*0.5*\hnode) {\scriptsize{$s_1$}}; \node[tgtnode] (tgt12) at ([yshift=-0.5\hnode]tgt11.north east) {\scriptsize{$s_2$}}; \node[tgtnode] (tgt13) at ([yshift=-0.5\hnode]tgt12.north east) {\scriptsize{$s_3$}}; \node[tgtnode] (tgt14) at ([yshift=-0.5\hnode]tgt13.north east) {\scriptsize{$s_4$}}; alignment matrix3 \foreach \i / \j / \c in {0/3/0.15, 1/3/0.15, 2/3/0.15, 3/3/0.15, 0/2/0.15, 1/2/0.15, 2/2/0.15, 3/2/0.15, 0/1/0.15, 1/1/0.15, 2/1/0.15, 3/1/0.15, 0/0/0.15, 1/0/0.15, 2/0/0.15, 3/0/0.15} \node[elementnode,minimum size=0.6*\hnode*\c,inner sep=0.1pt,fill=blue] (c\i\j) at (0.5*\hnode*\i+6.6*0.5*\hnode,0.5*\hnode*\j-1.05*\hnode) {}; source \node[srcnode] (src21) at (6.1*0.5*\hnode,-1.05*\hnode+3.4*0.5*\hnode) {\scriptsize{$t_1$}}; \node[srcnode] (src22) at ([xshift=0.5\hnode]src21.south west) {\scriptsize{$t_2$}}; \node[srcnode] (src23) at ([xshift=0.5\hnode]src22.south west) {\scriptsize{$t_3$}}; \node[srcnode] (src24) at ([xshift=0.5\hnode]src23.south west) {\scriptsize{$t_4$}}; target \node[tgtnode] (tgt21) at (6.2*0.5*\hnode,-1.05*\hnode+3.3*0.5*\hnode) {\scriptsize{$s_1$}}; \node[tgtnode] (tgt22) at ([yshift=-0.5\hnode]tgt21.north east) {\scriptsize{$s_2$}}; \node[tgtnode] (tgt23) at ([yshift=-0.5\hnode]tgt22.north east) {\scriptsize{$s_3$}}; \node[tgtnode] (tgt24) at ([yshift=-0.5\hnode]tgt23.north east) {\scriptsize{$s_4$}}; word alignment \node[align=center,elementnode,minimum size=0.3cm,inner sep=0.1pt,fill=blue!50] (la1) at (a03) {}; \node[align=center,elementnode,minimum size=0.3cm,inner sep=0.1pt,fill=blue!50] (la2) at (a12) {}; \node[align=center,elementnode,minimum size=0.3cm,inner sep=0.1pt,fill=blue!50] (la3) at (a11) {}; \node[align=center,elementnode,minimum size=0.3cm,inner sep=0.1pt,fill=blue!50] (lb1) at (b03) {}; \node[align=center,elementnode,minimum size=0.3cm,inner sep=0.1pt,fill=blue!50] (lb2) at (b12) {}; \node[align=center,elementnode,minimum size=0.3cm,inner sep=0.1pt,fill=red!50] (lb3) at (b11) {}; \node[align=center,elementnode,minimum size=0.3cm,inner sep=0.1pt,fill=blue!50] (lc1) at (c03) {}; \node[align=center,elementnode,minimum size=0.3cm,inner sep=0.1pt,fill=blue!50] (lc2) at (c12) {}; \node[align=center,elementnode,minimum size=0.3cm,inner sep=0.1pt,fill=blue!50] (lc3) at (c11) {}; \begin{pgfonlayer}{background} \node [rectangle,draw=ugreen,thick,inner sep=0.4em,fill=white,drop shadow] [fit = (a03) (a11)] (phrase1) {}; \node [rectangle,draw=red,thick,inner sep=0.4em,fill=white,drop shadow] [fit = (b03) (b12)] (phrase2) {}; \node [rectangle,draw=ugreen,thick,inner sep=0.4em,fill=white,drop shadow] [fit = (c03) (c21)] (phrase3) {}; \end{pgfonlayer} \node[anchor=north] (l1) at ([xshift=0.5em,yshift=-0.5em]a10.south) {一致}; \node[anchor=north] (l2) at ([xshift=0.5em,yshift=-0.5em]b10.south) {不一致}; \node[anchor=north] (l3) at ([xshift=0.5em,yshift=-0.5em]c10.south) {一致}; \node[anchor=north] (formula1) at ([xshift=-0.5em,yshift=8.0em]a33.north) {$(\bar{t},\bar{s})$与$A$一致$\Leftrightarrow$}; \node[anchor=west] (formula2) at ([xshift=0.3em,yshift=-1.5em]formula1.east) {$\forall t_i \in \bar{t}:(\bar{t},\bar{s}) \in A \Rightarrow s_i \in \bar{s}$}; \node[anchor=west] (formula3) at ([xshift=-2.4em,yshift=-1.5em]formula2.west) {AND $\forall s_i \in \bar{s}:(\bar{t},\bar{s}) \in A \Rightarrow t_i \in \bar{t}$}; \node[anchor=west] (formula4) at ([yshift=-1.5em]formula3.west) {AND $\exists t_i \in \bar{t},\exists s_i \in \bar{s}:(\bar{t},\bar{s}) \in A $}; \end{scope} \end{tikzpicture} \end{center} \end{frame} %%%------------------------------------------------------------------------------------------------------------ %%% 短语抽取方法 \begin{frame}{基于词对齐的短语抽取(续)} % 描述算法,给出词对齐及短语抽取结果 \begin{itemize} \item 短语抽取算法 \begin{itemize} \item 遍历所有可能的目标语短语,搜索与它们中每一个相匹配的(最小)源语短语 \item<6-> 需要对抽取短语的长度进行限制,否则抽取出来的短语对数量十分庞大,导致短语表质量下降 \end{itemize} \end{itemize} \vspace{-2.0em} \begin{center} \begin{tikzpicture} \setlength{\wseg}{1.5cm} \setlength{\hseg}{1.0cm} \setlength{\wnode}{3.75cm} \setlength{\hnode}{1.1cm} \tikzstyle{elementnode} = [rectangle,text=white,anchor=center] \tikzstyle{srcnode} = [rotate=45,font=\small,anchor=south west] \tikzstyle{tgtnode} = [left,font=\small,anchor=north east] \tikzstyle{alignmentnode} = [rectangle,draw,minimum height=3.6\hnode,minimum width=0.36\hnode] \tikzstyle{probnode} = [fill=blue!30,minimum width=0.4\hnode] \tikzstyle{labelnode} = [above] % alignment matrix \begin{scope}[scale=0.85,yshift=0.12in] \foreach \i / \j / \c in {0/5/0.15, 1/5/0.15, 2/5/0.15, 3/5/0.15, 4/5/0.15, 5/5/0.15, 6/5/0.15, 7/5/0.15, 0/4/0.15, 1/4/0.15, 2/4/0.15, 3/4/0.15, 4/4/0.15, 5/4/0.15, 6/4/0.15, 7/4/0.15, 0/3/0.15, 1/3/0.15, 2/3/0.15, 3/3/0.15, 4/3/0.15, 5/3/0.15, 6/3/0.15, 7/3/0.15, 0/2/0.15, 1/2/0.15, 2/2/0.15, 3/2/0.15, 4/2/0.15, 5/2/0.15, 6/2/0.15, 7/2/0.15, 0/1/0.15, 1/1/0.15, 2/1/0.15, 3/1/0.15, 4/1/0.15, 5/1/0.15, 6/1/0.15, 7/1/0.15, 0/0/0.15, 1/0/0.15, 2/0/0.15, 3/0/0.15, 4/0/0.15, 5/0/0.15, 6/0/0.15, 7/0/0.15} \node[elementnode,minimum size=0.6*\hnode*\c,inner sep=0.1pt,fill=blue] (a\i\j) at (0.5*\hnode*\i-5.4*0.5*\hnode,0.5*\hnode*\j-0.05*\hnode) {}; % source \node[srcnode] (src1) at (-5.4*0.5*\hnode,-1.05*\hnode+7.5*0.5*\hnode) {\scriptsize{The}}; \node[srcnode] (src2) at ([xshift=0.5\hnode]src1.south west) {\scriptsize{weather}}; \node[srcnode] (src3) at ([xshift=0.5\hnode]src2.south west) {\scriptsize{is}}; \node[srcnode] (src4) at ([xshift=0.5\hnode]src3.south west) {\scriptsize{very}}; \node[srcnode] (src5) at ([xshift=0.5\hnode]src4.south west) {\scriptsize{good}}; \node[srcnode] (src6) at ([xshift=0.5\hnode]src5.south west) {\scriptsize{today}}; \node[srcnode] (src7) at ([xshift=0.5\hnode]src6.south west) {\scriptsize{.}}; \node[srcnode] (src8) at ([xshift=0.5\hnode]src7.south west) {\scriptsize{EOS}}; % target \node[tgtnode] (tgt1) at (-6.0*0.5*\hnode,-1.05*\hnode+7.5*0.5*\hnode) {\scriptsize{今天}}; \node[tgtnode] (tgt2) at ([yshift=-0.5\hnode]tgt1.north east) {\scriptsize{天气}}; \node[tgtnode] (tgt3) at ([yshift=-0.5\hnode]tgt2.north east) {\scriptsize{真}}; \node[tgtnode] (tgt4) at ([yshift=-0.5\hnode]tgt3.north east) {\scriptsize{好}}; \node[tgtnode] (tgt5) at ([yshift=-0.5\hnode]tgt4.north east) {\scriptsize{。}}; \node[tgtnode] (tgt6) at ([yshift=-0.5\hnode]tgt5.north east) {\scriptsize{EOS}}; % word alignment \node[align=center,elementnode,minimum size=0.3cm,inner sep=0.1pt,fill=blue!50] (l14) at (a14) {}; \node[align=center,elementnode,minimum size=0.3cm,inner sep=0.1pt,fill=blue!50] (l55) at (a55) {}; \node[align=center,elementnode,minimum size=0.3cm,inner sep=0.1pt,fill=blue!50] (l33) at (a33) {}; \node[align=center,elementnode,minimum size=0.3cm,inner sep=0.1pt,fill=blue!50] (l42) at (a42) {}; \node[align=center,elementnode,minimum size=0.3cm,inner sep=0.1pt,fill=blue!50] (l61) at (a61) {}; \node[align=center,elementnode,minimum size=0.3cm,inner sep=0.1pt,fill=blue!50] (l70) at (a70) {}; \node [anchor=north west,fill=blue!30,minimum width=1.3in,minimum height=1.5em] (alig) at ([xshift=3em,yshift=-1.5em]tgt6.south west) {\footnotesize{与词对齐保持一致?}}; \visible<2>{ \begin{pgfonlayer}{background} \visible<2>{ \node [rectangle,draw=red,thick,inner sep=0.4em,fill=white,drop shadow] [fit = (a04) (a14)] (phrase1) {}; } \end{pgfonlayer} \draw [->,thick,dotted] ([yshift=-0.1em]phrase1.south) .. controls +(south:3.5) and +(north:1.5) .. ([yshift=0.1em]alig.north); } \visible<3>{ \begin{pgfonlayer}{background} \visible<3>{ \node [rectangle,draw=red,thick,inner sep=0.4em,fill=white,drop shadow] [fit = (a04) (a14) (a24)] (phrase2) {}; } \end{pgfonlayer} \draw [->,thick,dotted] ([xshift=0.5em,yshift=-0.1em]phrase2.south) .. controls +(south:3.5) and +(north:1.5) .. ([yshift=0.1em]alig.north); } \visible<4>{ \begin{pgfonlayer}{background} \visible<4>{ \node [rectangle,draw=red,thick,inner sep=0.4em,fill=white,drop shadow] [fit = (a04) (a14) (a24) (a33)] (phrase3) {}; } \end{pgfonlayer} \draw [->,thick,dotted] ([yshift=-0.1em]phrase3.south) .. controls +(south:2.8) and +(north:1.4) .. ([yshift=0.1em]alig.north); } \visible<5>{ \begin{pgfonlayer}{background} \visible<5>{ \node [rectangle,draw=red,thick,inner sep=0.4em,fill=white,drop shadow] [fit = (a04) (a14) (a24) (a33) (a42)] (phrase4) {}; } \end{pgfonlayer} \draw [->,thick,dotted] ([xshift=0.5em,yshift=-0.1em]phrase4.south) .. controls +(south:2.0) and +(north:1.2) .. ([yshift=0.1em]alig.north); } \end{scope} \begin{scope}[xshift = 1.5in, yshift = 1.3in] {\scriptsize \node (rules) {\textbf{抽取得到的短语:}}; \draw[-] (rules.south west)--([xshift=2.0in]rules.south west); \visible<2->{ \node[anchor=north west] (r1) at ([yshift=-0.3em]rules.south west) {天气 -- The weather}; } \visible<3->{ \node[anchor=north west] (r2) at ([yshift=-0.4em]r1.south west) {天气 -- The weather is}; } \visible<4->{ \node[anchor=north west] (r3) at ([yshift=-0.4em]r2.south west) {天气真 -- The weather is very}; } \visible<5->{ \node[anchor=north west] (r4) at ([yshift=-0.4em]r3.south west) {天气真好 -- The weather is very good}; } \visible<6->{ \node[anchor=north west] (r5) at ([yshift=-0.4em]r4.south west) {今天天气真好 -- The weather is very}; \node[anchor=north west] (r51) at ([xshift=7em,yshift=-0.4em]r5.south west) {good today}; \node[anchor=north west] (r6) at ([xshift=-7em,yshift=-0.8em]r51.south west) {今天 -- The today/真好 -- very good......}; } \visible<2>{ \begin{pgfonlayer}{background} \visible<2>{ \node [rectangle,thick,inner sep=0.1em,fill=ugreen!20] [fit = (r1)] (p1) {}; } \end{pgfonlayer} \draw [->,thick,dotted] ([xshift=0.1em]alig.east) .. controls +(east:2) and +(west:1.5) .. ([xshift=-0.1em]p1.west); } \visible<3>{ \begin{pgfonlayer}{background} \visible<3>{ \node [rectangle,thick,inner sep=0.1em,fill=ugreen!20] [fit = (r2)] (p2) {}; } \end{pgfonlayer} \draw [->,thick,dotted] ([xshift=0.1em]alig.east) .. controls +(east:2) and +(west:1.5) .. ([xshift=-0.1em]p2.west); } \visible<4>{ \begin{pgfonlayer}{background} \visible<4>{ \node [rectangle,thick,inner sep=0.1em,fill=ugreen!20] [fit = (r3)] (p3) {}; } \end{pgfonlayer} \draw [->,thick,dotted] ([xshift=0.1em]alig.east) .. controls +(east:2) and +(west:1.5) .. ([xshift=-0.1em]p3.west); } \visible<5>{ \begin{pgfonlayer}{background} \visible<5>{ \node [rectangle,thick,inner sep=0.1em,fill=ugreen!20] [fit = (r4)] (p4) {}; } \end{pgfonlayer} \draw [->,thick,dotted] ([xshift=0.1em]alig.east) .. controls +(east:2) and +(west:1.5) .. ([xshift=-0.1em]p4.west); } \begin{pgfonlayer}{background} \visible<6>{ \node [rectangle,draw=ugreen,thick,inner sep=0.1em,fill=white,drop shadow] [fit = (r1) (r6)] (p5) {}; } \end{pgfonlayer} } \end{scope} \end{tikzpicture} \end{center} \end{frame} %%%------------------------------------------------------------------------------------------------------------ %%% 词对齐系统 \begin{frame}{如何获得词对齐} % GIZA++ + 对称化 % FastAlign % ... \begin{itemize} \item 基于词的翻译模型(IBM模型)为每个句对建立了词对齐关系,但是IBM模型有一个根本问题,每个目标语单词只对齐到一个源语词(GIZA++??) \begin{itemize} \item 词对齐对称化:从两个方向运行IBM模型,合并词对齐结果(交集包含相对可靠的对齐点,并集包含大多数对齐点) \end{itemize} \end{itemize} \vspace{-0.3em} \begin{center} \begin{tikzpicture} \setlength{\wseg}{1.5cm} \setlength{\hseg}{1.0cm} \setlength{\wnode}{3.75cm} \setlength{\hnode}{1.1cm} \tikzstyle{elementnode} = [rectangle,text=white,anchor=center] \tikzstyle{srcnode} = [font=\small,anchor=south west] \tikzstyle{tgtnode} = [left,font=\small,anchor=north east] \tikzstyle{alignmentnode} = [rectangle,draw,minimum height=3.6\hnode,minimum width=0.36\hnode] \tikzstyle{probnode} = [fill=blue!30,minimum width=0.4\hnode] \tikzstyle{labelnode} = [above] % alignment matrix1 \begin{scope}[scale=0.9,yshift=0.12in] \foreach \i / \j / \c in {0/3/0.15, 1/3/0.15, 2/3/0.15, 3/3/0.15, 0/2/0.15, 1/2/0.15, 2/2/0.15, 3/2/0.15, 0/1/0.15, 1/1/0.15, 2/1/0.15, 3/1/0.15, 0/0/0.15, 1/0/0.15, 2/0/0.15, 3/0/0.15} \node[elementnode,minimum size=0.6*\hnode*\c,inner sep=0.1pt,fill=blue] (a\i\j) at (0.5*\hnode*\i-5.4*0.5*\hnode,0.5*\hnode*\j-1.05*\hnode) {}; % source \node[srcnode] (src01) at (-5.9*0.5*\hnode,-1.05*\hnode+3.4*0.5*\hnode) {\scriptsize{$t_1$}}; \node[srcnode] (src02) at ([xshift=0.5\hnode]src01.south west) {\scriptsize{$t_2$}}; \node[srcnode] (src03) at ([xshift=0.5\hnode]src02.south west) {\scriptsize{$t_3$}}; \node[srcnode] (src04) at ([xshift=0.5\hnode]src03.south west) {\scriptsize{$t_4$}}; % target \node[tgtnode] (tgt01) at (-6.0*0.5*\hnode,-1.05*\hnode+3.3*0.5*\hnode) {\scriptsize{$s_1$}}; \node[tgtnode] (tgt02) at ([yshift=-0.5\hnode]tgt01.north east) {\scriptsize{$s_2$}}; \node[tgtnode] (tgt03) at ([yshift=-0.5\hnode]tgt02.north east) {\scriptsize{$s_3$}}; \node[tgtnode] (tgt04) at ([yshift=-0.5\hnode]tgt03.north east) {\scriptsize{$s_4$}}; % alignment matrix2 \foreach \i / \j / \c in {0/3/0.15, 1/3/0.15, 2/3/0.15, 3/3/0.15, 0/2/0.15, 1/2/0.15, 2/2/0.15, 3/2/0.15, 0/1/0.15, 1/1/0.15, 2/1/0.15, 3/1/0.15, 0/0/0.15, 1/0/0.15, 2/0/0.15, 3/0/0.15} \node[elementnode,minimum size=0.6*\hnode*\c,inner sep=0.1pt,fill=blue] (b\i\j) at (0.5*\hnode*\i+0.6*0.5*\hnode,0.5*\hnode*\j-1.05*\hnode) {}; % source \node[srcnode] (src11) at (0.1*0.5*\hnode,-1.05*\hnode+3.4*0.5*\hnode) {\scriptsize{$t_1$}}; \node[srcnode] (src12) at ([xshift=0.5\hnode]src11.south west) {\scriptsize{$t_2$}}; \node[srcnode] (src13) at ([xshift=0.5\hnode]src12.south west) {\scriptsize{$t_3$}}; \node[srcnode] (src14) at ([xshift=0.5\hnode]src13.south west) {\scriptsize{$t_4$}}; % target \node[tgtnode] (tgt11) at (0.2*0.5*\hnode,-1.05*\hnode+3.3*0.5*\hnode) {\scriptsize{$s_1$}}; \node[tgtnode] (tgt12) at ([yshift=-0.5\hnode]tgt11.north east) {\scriptsize{$s_2$}}; \node[tgtnode] (tgt13) at ([yshift=-0.5\hnode]tgt12.north east) {\scriptsize{$s_3$}}; \node[tgtnode] (tgt14) at ([yshift=-0.5\hnode]tgt13.north east) {\scriptsize{$s_4$}}; % alignment matrix3 \foreach \i / \j / \c in {0/3/0.15, 1/3/0.15, 2/3/0.15, 3/3/0.15, 0/2/0.15, 1/2/0.15, 2/2/0.15, 3/2/0.15, 0/1/0.15, 1/1/0.15, 2/1/0.15, 3/1/0.15, 0/0/0.15, 1/0/0.15, 2/0/0.15, 3/0/0.15} \node[elementnode,minimum size=0.6*\hnode*\c,inner sep=0.1pt,fill=blue] (c\i\j) at (0.5*\hnode*\i+6.6*0.5*\hnode,0.5*\hnode*\j-1.05*\hnode) {}; % source \node[srcnode] (src21) at (6.1*0.5*\hnode,-1.05*\hnode+3.4*0.5*\hnode) {\scriptsize{$t_1$}}; \node[srcnode] (src22) at ([xshift=0.5\hnode]src21.south west) {\scriptsize{$t_2$}}; \node[srcnode] (src23) at ([xshift=0.5\hnode]src22.south west) {\scriptsize{$t_3$}}; \node[srcnode] (src24) at ([xshift=0.5\hnode]src23.south west) {\scriptsize{$t_4$}}; % target \node[tgtnode] (tgt21) at (6.2*0.5*\hnode,-1.05*\hnode+3.3*0.5*\hnode) {\scriptsize{$s_1$}}; \node[tgtnode] (tgt22) at ([yshift=-0.5\hnode]tgt21.north east) {\scriptsize{$s_2$}}; \node[tgtnode] (tgt23) at ([yshift=-0.5\hnode]tgt22.north east) {\scriptsize{$s_3$}}; \node[tgtnode] (tgt24) at ([yshift=-0.5\hnode]tgt23.north east) {\scriptsize{$s_4$}}; % word alignment \node[align=center,elementnode,minimum size=0.3cm,inner sep=0.1pt,fill=blue!50] (la1) at (a03) {}; \node[align=center,elementnode,minimum size=0.3cm,inner sep=0.1pt,fill=blue!50] (la2) at (a12) {}; \node[align=center,elementnode,minimum size=0.3cm,inner sep=0.1pt,fill=blue!50] (la3) at (a22) {}; \node[align=center,elementnode,minimum size=0.3cm,inner sep=0.1pt,fill=blue!50] (la4) at (a30) {}; \node[align=center,elementnode,minimum size=0.3cm,inner sep=0.1pt,fill=blue!50] (lb1) at (b03) {}; \node[align=center,elementnode,minimum size=0.3cm,inner sep=0.1pt,fill=blue!50] (lb2) at (b12) {}; \node[align=center,elementnode,minimum size=0.3cm,inner sep=0.1pt,fill=blue!50] (lb3) at (b11) {}; \node[align=center,elementnode,minimum size=0.3cm,inner sep=0.1pt,fill=blue!50] (lb4) at (b30) {}; \node[align=center,elementnode,minimum size=0.3cm,inner sep=0.1pt,fill=blue!50] (lc1) at (c03) {}; \node[align=center,elementnode,minimum size=0.3cm,inner sep=0.1pt,fill=blue!50] (lc2) at (c12) {}; \node[align=center,elementnode,minimum size=0.3cm,inner sep=0.1pt,fill=red!50] (lc3) at (c11) {}; \node[align=center,elementnode,minimum size=0.3cm,inner sep=0.1pt,fill=red!50] (lc4) at (c22) {}; \node[align=center,elementnode,minimum size=0.3cm,inner sep=0.1pt,fill=blue!50] (lc5) at (c30) {}; \node[anchor=north] (l1) at ([xshift=0.5em,yshift=-0.5em]a10.south) {\scriptsize{S - T}}; \node[anchor=north] (l2) at ([xshift=0.5em,yshift=-0.5em]b10.south) {\scriptsize{T - S}}; \node[anchor=north] (l3) at ([xshift=0.5em,yshift=-0.5em]c10.south) {\scriptsize{交集/并集}}; \end{scope} \end{tikzpicture} \end{center} \vspace{-1.2em} \begin{itemize} \item<2-> 其他词对齐方法:FastAlign、Berkeley Word Aligner \item<3-> 如何评价词对齐? \begin{enumerate} \item<3-> 自动指标:词对齐错误率(AER) \item<3-> 下游系统:短语抽取、机器翻译... \end{enumerate} \end{itemize} % 如何评价词对齐 - 1) 自动指标 2) 下游系统(短语抽取、机器翻译) \end{frame} %%------------------------------------------------------------------------------------------------------------ %% 改进方法 %\begin{frame}{更好更多的短语} % 提高词对齐性能 -> 增加短语质量? -> 提高翻译质量? % 提高Recall,对于不同任务,词对齐的密度更重要? %\end{frame} %%%------------------------------------------------------------------------------------------------------------ %%% 如何度量短语的好坏 \begin{frame}{短语打分 - 翻译概率} % 正向、反向翻译概率 \begin{itemize} \item 抽取到短语之后,如何将这些短语对转化成概率化的短语表?这里使用极大似然估计的方法(MLE)对翻译概率进行估计: \vspace{-0.5em} \begin{displaymath} \textrm{P}(\bar{t}|\bar{s}) = \frac{count(\bar{s},\bar{t})}{count(\bar{s})} \end{displaymath} \vspace{-1.0em} \begin{itemize} \item count($\bar{s},\bar{t}$)表示短语对($\bar{s},\bar{t}$)出现的次数 \item count($\bar{s}$)表示短语$\bar{s}$出现的次数 \end{itemize} \vspace{0.8em} \begin{tikzpicture} {\scriptsize \node [anchor=north west,inner sep=1pt] (entry1) at (0,0) {\tiny{\textbf{1:} 这 是 数据 $\leftrightarrow$ This is data}}; \node [anchor=north west,inner sep=1pt] (entry2) at ([yshift=-0.1em]entry1.south west) {\tiny{\textbf{2:} 小心 !$\leftrightarrow$ Look out !}}; \node [anchor=north west,inner sep=1pt] (entry3) at ([yshift=-0.1em]entry2.south west) {\tiny{\textbf{3:} 你 是 谁 $\leftrightarrow$ Who are you}}; \node [anchor=north west,inner sep=2pt] (entry4) at ([yshift=-0.1em]entry3.south west) {...}; \node [anchor=south west] (corpuslabel) at (entry1.north west) {{\color{ublue} \textbf{双语平行数据}}}; \begin{pgfonlayer}{background} \node[rectangle,draw=ublue,thick,inner sep=0.2em,fill=white,drop shadow,minimum height=1.6cm] [fit = (entry1) (entry2) (entry3) (entry4) (corpuslabel)] (corpus) {}; \end{pgfonlayer} } \visible<2->{ {\scriptsize \node [anchor=west] (P1) at ([xshift=15em]entry1.west){\tiny{\textbf{1:} 数据 -- data}}; \node [anchor=west] (P2) at ([xshift=15em]entry2.west){\tiny{\textbf{2:} 小心 -- Look out}}; \node [anchor=west] (P3) at ([xshift=15em]entry3.west){\tiny{\textbf{3:} 谁 -- you}}; \node [anchor=west] (P4) at ([xshift=15em]entry4.west){...}; \node [anchor=west] (plabel) at ([xshift=15em]corpuslabel.west) {{\color{ublue} \textbf{短语对}}}; } \begin{pgfonlayer}{background} \visible<2->{ \node[rectangle,draw=ublue,thick,inner sep=0.2em,fill=white,drop shadow,minimum height=1.6cm] [fit = (P1) (P4) (P2) (P3) (plabel)] (model) {}; } \end{pgfonlayer} \draw [->,very thick,ublue] ([xshift=0.2em]corpus.east) -- ([xshift=3.2em]corpus.east) node [pos=0.5, above] {\alert{\scriptsize{短语抽取}}}; } \visible<3->{ \draw [->,very thick,ublue] ([xshift=0.4em]model.east) -- ([xshift=3.4em]model.east) node [inner sep=0pt,pos=0.5, above,yshift=0.3em] (decodingarrow) {\alert{\scriptsize{统计}}}; {\scriptsize \node [anchor=north west,inner sep=2pt] (sentlabel) at ([xshift=5.5em,yshift=-0.3em]model.north east) {{\color{ublue} \textbf{统计出现次数}}}; \node [anchor=north west] (sent) at ([yshift=-0.5em]sentlabel.south west) {\textbf{count($\bar{s},\bar{t}$)}}; \node [anchor=north west] (sentpart2) at ([yshift=-0.1em]sent.south west) {\textbf{count($\bar{s}$)}}; } } \begin{pgfonlayer}{background} \visible<3->{ \node[rectangle,draw=ublue,thick,inner sep=0.2em,fill=white,drop shadow,minimum height=1.6cm] [fit = (sentlabel) (sent) (sentpart2)] (segsystem) {}; } \end{pgfonlayer} \end{tikzpicture} \vspace{-0.2em} \item<4-> 在实际使用中,还可以加入反向翻译概率即$\textrm{P}(\bar{s}|\bar{t})$来提升机器翻译模型性能 \end{itemize} \end{frame} %%%------------------------------------------------------------------------------------------------------------ %%% 如何度量短语的好坏 \begin{frame}{短语打分 - 词汇翻译概率} % 正向、反向翻译概率 \begin{itemize} \item 对于不常出现的短语可能会产生一些问题,可以将短语分解成词,计算他们的匹配程度。计算公式如下: \vspace{-0.5em} \begin{displaymath} \textrm{$\textrm{P}_{lex}$}(\bar{t}|\bar{s}) = \prod_{j=1}^{J} \frac{1}{|\{j|a(j,i) = 1\}|} \sum_{\forall(j,i):a(j,i) = 1} w(t_i|s_j) \end{displaymath} \vspace{-1em} \begin{itemize} \item 源语短语$\bar{s}=s_1...s_J$, 目标语短语$\bar{t}=t_1...t_I$,词对齐矩阵 $\mathbf{a}$ \end{itemize} \visible<2->{ \begin{center} \begin{tikzpicture} \setlength{\wseg}{1.5cm} \setlength{\hseg}{1.0cm} \setlength{\wnode}{3.75cm} \setlength{\hnode}{1.1cm} \tikzstyle{elementnode} = [rectangle,text=white,anchor=center] \tikzstyle{srcnode} = [font=\small,anchor=south west] \tikzstyle{tgtnode} = [left,font=\small,anchor=north east] \tikzstyle{alignmentnode} = [rectangle,draw,minimum height=3.6\hnode,minimum width=0.36\hnode] \tikzstyle{probnode} = [fill=blue!30,minimum width=0.4\hnode] \tikzstyle{labelnode} = [above] % alignment matrix1 \begin{scope}[scale=0.9,yshift=0.12in] \foreach \i / \j / \c in {0/3/0.15, 1/3/0.15, 2/3/0.15, 3/3/0.15, 4/3/0.15, 0/2/0.15, 1/2/0.15, 2/2/0.15, 3/2/0.15, 4/2/0.15, 0/1/0.15, 1/1/0.15, 2/1/0.15, 3/1/0.15, 4/1/0.15, 0/0/0.15, 1/0/0.15, 2/0/0.15, 3/0/0.15, 4/0/0.15} \node[elementnode,minimum size=0.6*\hnode*\c,inner sep=0.1pt,fill=blue] (a\i\j) at (0.5*\hnode*\i-5.4*0.5*\hnode,0.5*\hnode*\j-1.05*\hnode) {}; % source \node[srcnode] (tgt01) at (-5.9*0.5*\hnode,-1.05*\hnode+3.4*0.5*\hnode) {\scriptsize{$t_1$}}; \node[srcnode] (tgt02) at ([xshift=0.5\hnode]tgt01.south west) {\scriptsize{$t_2$}}; \node[srcnode] (tgt03) at ([xshift=0.5\hnode]tgt02.south west) {\scriptsize{$t_3$}}; \node[srcnode] (tgt04) at ([xshift=0.5\hnode]tgt03.south west) {\scriptsize{$t_4$}}; \node[srcnode] (tgt05) at ([xshift=0.5\hnode]tgt04.south west) {\scriptsize{$N$}}; % target \node[tgtnode] (src01) at (-6.0*0.5*\hnode,-1.05*\hnode+3.3*0.5*\hnode) {\scriptsize{$s_1$}}; \node[tgtnode] (src02) at ([yshift=-0.5\hnode]src01.north east) {\scriptsize{$s_2$}}; \node[tgtnode] (src03) at ([yshift=-0.5\hnode]src02.north east) {\scriptsize{$s_3$}}; \node[tgtnode] (src04) at ([yshift=-0.5\hnode]src03.north east) {\scriptsize{$s_4$}}; % word alignment \node[align=center,elementnode,minimum size=0.3cm,inner sep=0.1pt,fill=blue!50] (la1) at (a03) {}; \node[align=center,elementnode,minimum size=0.3cm,inner sep=0.1pt,fill=blue!50] (la2) at (a12) {}; \node[align=center,elementnode,minimum size=0.3cm,inner sep=0.1pt,fill=blue!50] (la3) at (a22) {}; \node[align=center,elementnode,minimum size=0.3cm,inner sep=0.1pt,fill=blue!50] (la4) at (a41) {}; \node[align=center,elementnode,minimum size=0.3cm,inner sep=0.1pt,fill=blue!50] (la5) at (a30) {}; \node[anchor=west] (f1) at ([xshift=3em,yshift=0.8em]a43.east) {\scriptsize{$\textrm{P}_{lex}(\bar{t}|\bar{s})=w(t_1|s_1)\times$}}; \node[anchor=north] (f2) at ([xshift=6em]f1.south) {\scriptsize{$\frac{1}{2}(w(t_2|s_2)+w(t_4|s_2))\times$}}; \node[anchor=north west] (f3) at (f2.south west) {\scriptsize{$w(N|s_3)\times$}}; \node[anchor=north west] (f4) at (f3.south west) {\scriptsize{$w(t_4|s_4)\times$}}; \end{scope} \end{tikzpicture} \end{center} \begin{itemize} \item 词对齐概率$w(t_i|s_i)$可以从平行语料中获取 \item 如果对空则使用概率$w(t_i|N)$ \end{itemize} } \item<3-> 同翻译概率一样,在模型中可以使用双向词汇翻译概率 \end{itemize} \end{frame} %%%------------------------------------------------------------------------------------------------------------ %%% 展示短语表的内容 \begin{frame}{短语表实例} \begin{itemize} \item 下面来看一个真实的短语表例子 \vspace{-0.5em} \begin{center} \begin{tikzpicture} \node [anchor=west] (s1) at (0,0) {\scriptsize{报告 认为 $\vert\vert\vert$ report holds that $\vert\vert\vert$ -2.62 -5.81 -0.91 -2.85 1 0 $\vert\vert\vert$ 4 $\vert\vert\vert$ 0-0 1-1 1-2}}; \node [anchor=west] (s2) at ([yshift=-1.2em]s1.west) {\scriptsize{,悲伤 $\vert\vert\vert$ , sadness $\vert\vert\vert$ -1.946 -3.659 0 -3.709 1 0 $\vert\vert\vert$ 1 $\vert\vert\vert$ 0-0 1-1}}; \node [anchor=west] (s3) at ([yshift=-1.2em]s2.west) {\scriptsize{,北京 等 $\vert\vert\vert$ , beijing , and other $\vert\vert\vert$ 0 -7.98 0 -3.84 1 0 $\vert\vert\vert$ 2 $\vert\vert\vert$ 0-0 1-1 2-2 2-3 2-4}}; \node [anchor=west] (s4) at ([yshift=-1.2em]s3.west) {\scriptsize{,北京 及 $\vert\vert\vert$ , beijing , and $\vert\vert\vert$ -0.69 -1.45 -0.92 -4.80 1 0 $\vert\vert\vert$ 2 $\vert\vert\vert$ 0-0 1-1 2-2}}; \node [anchor=west] (s5) at ([yshift=-1.2em]s4.west) {\scriptsize{一个 中国 $\vert\vert\vert$ one china $\vert\vert\vert$ 0 -1.725 0 -1.636 1 0 $\vert\vert\vert$ 2 $\vert\vert\vert$ 1-1 2-2}}; \node [anchor=west] (s7) at ([yshift=-1.1em]s5.west) {\scriptsize{...}}; \node [anchor=west] (s6) at ([yshift=1.0em]s1.west) {\scriptsize{...}}; \begin{pgfonlayer}{background} \node [rectangle,inner sep=0.3em,fill=red!20] [fit = (s1) (s3) (s4) (s6) (s7)] (box1) {}; \end{pgfonlayer} \end{tikzpicture} \end{center} \item 在短语表的例子中,每行使用 $\vert\vert\vert$ 划分为五个部分 \begin{itemize} \item 第一部分为源语端的短语 \item 第二部分为目标语端的短语 \item 第三部分为多个特征的值,包含了短语翻译概率、词汇翻译概率等特征 \item 第四部分为短语对在短语抽取集合中出现的频率 \item 第五部分为词对齐信息 \end{itemize} \end{itemize} \end{frame} %%%------------------------------------------------------------------------------------------------------------ \subsection{调序} %%%------------------------------------------------------------------------------------------------------------ %%% 调序是翻译中的常见现象 \begin{frame}{翻译中的调序} % 用一个调序比较多的实例说明调序是翻译中的普遍现象 % 需要设计调序特征来对问题进行建模 % 参考NiuTrans Manual \begin{itemize} \item 通过短语表可以找到每个短语正确的翻译结果,我们仍需要对这些短语进行调序来获取流利的翻译结果 \vspace{0.3em} \begin{center} \begin{tikzpicture} \begin{scope}[minimum height = 18pt] \node[anchor=east] (s0) at (-0.5em, 0) {$\textbf{s}$:}; \node[anchor=west,fill=ugreen!50] (s1) at (0, 0) {在}; \node[anchor=west,fill=red!50] (s2) at ([xshift=1em]s1.east) {桌子 上 的}; \node[anchor=west,fill=blue!50] (s3) at ([xshift=1em]s2.east) {苹果}; \node[anchor=east] (t0) at (-0.5em, -1.5) {$\textbf{t}$:}; \node[anchor=west,fill=blue!50] (t1) at (0, -1.5) {the apple}; \node[anchor=west,fill=ugreen!50] (t2) at ([xshift=1em]t1.east) {on}; \node[anchor=west,fill=red!50] (t3) at ([xshift=1em]t2.east) {the table}; \path[<->, thick] (s1.south) edge (t3.north); \path[<->, thick] (s2.south) edge (t2.north); \path[<->, thick] (s3.south) edge (t1.north); \end{scope} \end{tikzpicture} \end{center} \begin{itemize} \item 将调序的程度作为特征加入判别式模型 \item<2-> 也可以引入约束化简调序问题,如BTG \end{itemize} \vspace{0.5em} \visible<2->{ \begin{flushright} \begin{tabular}{l l l l r} $X$ & $\to$ & $X_1$ $X_2,$ & $X_1$ $X_2$ & \hspace{4em} (R1)\\ $X$ & $\to$ & $X_1$ $X_2,$ & $X_2$ $X_1$ & (R2)\\ $X$ & $\to$ & $\bar{s},$ & $\bar{t}$ & (R3)\\ \end{tabular} \end{flushright} } \item<3-> 常见的调序模型有基于距离的调序模型、词汇化调序模型(MSD)、最大熵调序模型(ME) \end{itemize} \end{frame} %%%------------------------------------------------------------------------------------------------------------ %%% 调序模型1:基于距离的调序 \begin{frame}{调序模型1:基于距离的调序} % 参考Moses \begin{itemize} \item 参考前一个短语,来判断当前短语是否需要进行,调序距离设为$\textrm{start}_i-\textrm{end}_{i-1}-1$ \begin{itemize} \item $\textrm{start}_i$是指翻译成第$i$个目标语短语的源语短语中的第一个词,而$\textrm{end}_i$为源于短语最后一个词($\textrm{end}_0$为0) \end{itemize} \vspace{0.0em} \visible<2->{ \begin{center} \begin{tikzpicture} \begin{scope}[minimum height = 20pt] \node[anchor=east] (s0) at (-0.5em, 0) {$\textbf{s}$:}; \node[anchor=west,fill=ugreen!50] (s1) at (0, 0) {在 桌子 上 的}; \node[anchor=south] (n1) at ([xshift=-2.5em,yshift=-0.5em]s1.north) {\scriptsize{1}}; \node[anchor=south] (n2) at ([xshift=-0.8em,yshift=-0.5em]s1.north) {\scriptsize{2}}; \node[anchor=south] (n3) at ([xshift=1.1em,yshift=-0.5em]s1.north) {\scriptsize{3}}; \node[anchor=south] (n4) at ([xshift=2.5em,yshift=-0.5em]s1.north) {\scriptsize{4}}; \node[anchor=west,fill=blue!50] (s2) at ([xshift=1em]s1.east) {苹果}; \node[anchor=south] (n5) at ([yshift=-0.5em]s2.north) {\scriptsize{5}}; \node[anchor=east] (t0) at (-0.5em, -1.5) {$\textbf{t}$:}; \node[anchor=west,fill=blue!50] (t1) at (0, -1.5) {the apple}; \node[anchor=west,fill=ugreen!50] (t2) at ([xshift=1em]t1.east) {on the table}; \path[<->, thick] (s1.south) edge (t2.north); \path[<->, thick] (s2.south) edge (t1.north); \node[anchor=west] (target) at ([xshift=3em,yshift=2.3em]n5.east) {\scriptsize{目标短语}}; \node[anchor=west] (source) at ([xshift=0.7em]target.east) {\scriptsize{源短语}}; \node[anchor=west] (distance) at ([xshift=0.7em]source.east) {\scriptsize{距离}}; \node[anchor=north] (t1) at ([yshift=-0.1em]target.south) {1}; \node[anchor=north] (t2) at ([yshift=-1.8em]t1.south) {2}; \node[anchor=north] (so1) at ([yshift=-0.1em]source.south) {5}; \node[anchor=north] (so2) at ([yshift=-1.8em]so1.south) {1-4}; \node[anchor=north] (d1) at ([yshift=-0.1em]distance.south) {+4}; \node[anchor=north] (d2) at ([yshift=-1.8em]d1.south) {-5}; \node[anchor=north west,fill=red!20] (m1) at ([xshift=-1em,yshift=-0.0em]t1.south west) {\scriptsize{$\textrm{start}_1-\textrm{end}_{0}-1$ = 5 - 0 - 1}}; \node[anchor=north west,fill=red!20] (m2) at ([xshift=-1em,yshift=-0.0em]t2.south west) {\scriptsize{$\textrm{start}_2-\textrm{end}_{1}-1$ = 1 - 5 - 1}}; \draw[-] (target.south west)--([xshift=1.6in]target.south west); \draw[-,thick] (s1.north west)--([yshift=0.3in]s1.north west); \draw[->,densely dotted,thick] ([yshift=0.3in]s1.north west)--([xshift=0.3in,yshift=0.3in]s1.north west); \draw[-,thick] (s2.north west)--([yshift=0.3in]s2.north west); \draw[->,densely dotted,thick] ([yshift=0.3in]s2.north west)--([xshift=-0.3in,yshift=0.3in]s2.north west); \node[anchor=south] (ld1) at ([xshift=-0.1em,yshift=0.4em]n1.north) {\scriptsize{x=-5}}; \node[anchor=south] (ld2) at ([xshift=6em,yshift=0.4em]n1.north) {\scriptsize{x=+4}}; \end{scope} \end{tikzpicture} \end{center} } \vspace{0.3em} \item<3-> 代价函数选择指数衰减函数$c(x)=\alpha^{|x|}$,其中$\alpha$通过近似估计得到,$\alpha \in$[0,1] \begin{itemize} \item<3-> 调序距离越大,调序代价越大 \end{itemize} \end{itemize} \end{frame} %%%------------------------------------------------------------------------------------------------------------ %%% 调序模型2:MSD模型 \begin{frame}{调序模型2:MSD模型} % 参考NiuTrans Manual \begin{itemize} \item 基于距离的调序模型仅仅以词语移动的距离为条件,词汇化调序模型以实际短语为条件 \begin{itemize} \item 在词汇化调序模型中仅考虑三种调序类型:M、S、D \item 三种调序类型仅针对每个短语的前面一个短语 \end{itemize} \end{itemize} \vspace{-0.2em} \begin{center} \begin{tikzpicture} \setlength{\wseg}{1.5cm} \setlength{\hseg}{1.0cm} \setlength{\wnode}{3.75cm} \setlength{\hnode}{1.1cm} \tikzstyle{elementnode} = [rectangle,text=white,anchor=center] \tikzstyle{srcnode} = [left,font=\small,anchor=south west] \tikzstyle{tgtnode} = [left,font=\small,anchor=north east] \tikzstyle{alignmentnode} = [rectangle,draw,minimum height=3.6\hnode,minimum width=0.36\hnode] \tikzstyle{probnode} = [fill=blue!30,minimum width=0.4\hnode] \tikzstyle{labelnode} = [above] alignment matrix \begin{scope}[scale=0.92,yshift=0.12in] \foreach \i / \j / \c in {0/5/0.15, 1/5/0.15, 2/5/0.15, 3/5/0.15, 4/5/0.15, 5/5/0.15, 6/5/0.15, 7/5/0.15, 0/4/0.15, 1/4/0.15, 2/4/0.15, 3/4/0.15, 4/4/0.15, 5/4/0.15, 6/4/0.15, 7/4/0.15, 0/3/0.15, 1/3/0.15, 2/3/0.15, 3/3/0.15, 4/3/0.15, 5/3/0.15, 6/3/0.15, 7/3/0.15, 0/2/0.15, 1/2/0.15, 2/2/0.15, 3/2/0.15, 4/2/0.15, 5/2/0.15, 6/2/0.15, 7/2/0.15, 0/1/0.15, 1/1/0.15, 2/1/0.15, 3/1/0.15, 4/1/0.15, 5/1/0.15, 6/1/0.15, 7/1/0.15, 0/0/0.15, 1/0/0.15, 2/0/0.15, 3/0/0.15, 4/0/0.15, 5/0/0.15, 6/0/0.15, 7/0/0.15} \node[elementnode,minimum size=0.6*\hnode*\c,inner sep=0.1pt,fill=blue] (a\i\j) at (0.5*\hnode*\i-5.4*0.5*\hnode,0.5*\hnode*\j-0.05*\hnode) {}; source \node[srcnode] (src1) at (-5.9*0.5*\hnode,-1.05*\hnode+7.5*0.5*\hnode) {\scriptsize{$t_1$}}; \node[srcnode] (src2) at ([xshift=0.5\hnode]src1.south west) {\scriptsize{$t_2$}}; \node[srcnode] (src3) at ([xshift=0.5\hnode]src2.south west) {\scriptsize{$t_3$}}; \node[srcnode] (src4) at ([xshift=0.5\hnode]src3.south west) {\scriptsize{$t_4$}}; \node[srcnode] (src5) at ([xshift=0.5\hnode]src4.south west) {\scriptsize{$t_5$}}; \node[srcnode] (src6) at ([xshift=0.5\hnode]src5.south west) {\scriptsize{$t_6$}}; \node[srcnode] (src7) at ([xshift=0.5\hnode]src6.south west) {\scriptsize{$t_7$}}; \node[srcnode] (src8) at ([xshift=0.5\hnode]src7.south west) {\scriptsize{$t_8$}}; target \node[tgtnode] (tgt1) at (-6.0*0.5*\hnode,-1.05*\hnode+7.5*0.5*\hnode) {\scriptsize{$s_1$}}; \node[tgtnode] (tgt2) at ([yshift=-0.5\hnode]tgt1.north east) {\scriptsize{$s_2$}}; \node[tgtnode] (tgt3) at ([yshift=-0.5\hnode]tgt2.north east) {\scriptsize{$s_3$}}; \node[tgtnode] (tgt4) at ([yshift=-0.5\hnode]tgt3.north east) {\scriptsize{$s_4$}}; \node[tgtnode] (tgt5) at ([yshift=-0.5\hnode]tgt4.north east) {\scriptsize{$s_5$}}; \node[tgtnode] (tgt6) at ([yshift=-0.5\hnode]tgt5.north east) {\scriptsize{$s_6$}}; word alignment \node[align=center,elementnode,minimum size=0.3cm,inner sep=0.1pt,fill=blue!50] (l05) at (a05) {}; \node[align=center,elementnode,minimum size=0.3cm,inner sep=0.1pt,fill=blue!50] (l14) at (a14) {}; \node[align=center,elementnode,minimum size=0.3cm,inner sep=0.1pt,fill=blue!50] (l24) at (a24) {}; \node[align=center,elementnode,minimum size=0.3cm,inner sep=0.1pt,fill=blue!50] (l31) at (a31) {}; \node[align=center,elementnode,minimum size=0.3cm,inner sep=0.1pt,fill=blue!50] (l32) at (a32) {}; \node[align=center,elementnode,minimum size=0.3cm,inner sep=0.1pt,fill=blue!50] (l43) at (a43) {}; \node[align=center,elementnode,minimum size=0.3cm,inner sep=0.1pt,fill=blue!50] (l53) at (a53) {}; \node[align=center,elementnode,minimum size=0.3cm,inner sep=0.1pt,fill=blue!50] (l63) at (a63) {}; \node[align=center,elementnode,minimum size=0.3cm,inner sep=0.1pt,fill=blue!50] (l70) at (a70) {}; \visible<2->{ \node [anchor=west] (p1line1) at ([xshift=3.5em,yshift=0.5em]a75.east) {\footnotesize{M(monotone):单调调序}}; \node [anchor=north west] (p1line2) at ([xshift=0,yshift=-1em]p1line1.south west) {\footnotesize{S(swap): 与前面一个短语}}; \node [anchor=north west] (p1line3) at ([xshift=3.8em]p1line2.south west) {\footnotesize{位置进行交换}}; \node [anchor=north west] (p1line4) at ([xshift=-3.8em,yshift=-1em]p1line3.south west) {\footnotesize{D(discontinuous):非连续调序}}; %\node [anchor=west] (p2line1) at ([xshift=4em]a73.east) {\footnotesize{$\bar{s}_j$: 真\ \ \ 好 \ \ }}; %\node [anchor=north west] (p2line2) at ([xshift=0]p2line1.south west) {\footnotesize{$\bar{t}_j$: very\ \ \ good\ \ \ \ \ \ \ \ }}; \begin{pgfonlayer}{background} \visible<2->{ \node [rectangle,thick,inner sep=0.3em,fill=blue!40,drop shadow,fill opacity=0.85] [fit = (a05)] (phrase1) {}; \node [rectangle,thick,inner sep=0.3em,fill=blue!40,drop shadow,fill opacity=0.85] [fit = (a14) (a24)] (phrase2) {}; \node [rectangle,thick,inner sep=0.3em,fill=blue!40,drop shadow,fill opacity=0.85] [fit = (a31) (a32)] (phrase3) {}; \node [rectangle,thick,inner sep=0.3em,fill=blue!40,drop shadow,fill opacity=0.85] [fit = (a43) (a63)] (phrase4) {}; \node [rectangle,thick,inner sep=0.3em,fill=blue!40,drop shadow,fill opacity=0.85] [fit = (a70)] (phrase5) {}; \node [rectangle,inner sep=0.2em,fill=red!10] [fit = (p1line1)] (box1) {}; \node [rectangle,inner sep=0.2em,fill=ugreen!10] [fit = (p1line2) (p1line3)] (box2) {}; \node [rectangle,inner sep=0.2em,fill=orange!10] [fit = (p1line4)] (box3) {}; } \end{pgfonlayer} \node [circle,draw,anchor=south,inner sep=1pt,fill=red!20] (c1) at ([xshift=-0.5em]a05.north) {\scriptsize{m}}; \node [circle,draw,anchor=south,inner sep=1pt,fill=red!20] (c2) at ([xshift=-0.5em]a14.north) {\scriptsize{m}}; \node [circle,draw,anchor=north,inner sep=1pt,fill=orange!20] (c3) at ([xshift=0.1em]a24.south) {\scriptsize{d}}; \node [circle,draw,anchor=south,inner sep=2pt,fill=ugreen!20] (c4) at ([xshift=0.5em]a32.north) {\scriptsize{s}}; \node [circle,draw,anchor=north,inner sep=1pt,fill=orange!20] (c5) at ([xshift=0.5em]a63.south) {\scriptsize{d}}; \draw [->,thick] (a05.south east) -- (c1.315); \draw [->,thick] ([xshift=-0.5em]a24.west) -- (c2.315); \draw [->,thick] ([yshift=-0.7em]a32.south) .. controls +(west:1.3) and +(east:1.3) .. (c3.340); \draw [->,thick] (a53.center) -- (c4.15); \draw [->,thick] (a70.center) .. controls +(west:1.3) and +(east:1.3) .. (c5.340); } \end{scope} \end{tikzpicture} \end{center} \vspace{-0.6em} \begin{itemize} \item<3-> 引入调序模型来预测调序的方向类型,计算公式如下 \end{itemize} \visible<3->{ \begin{displaymath} \textrm{P}(\textbf{o}|\textbf{s},\textbf{t},\textbf{a}) = \prod_{i=1}^{K} \textrm{P}(o_i| \bar{s}_{a_i}, \bar{t}_i, a_{i-1}, a_i) \end{displaymath} } \end{frame} %%%------------------------------------------------------------------------------------------------------------ %%% 调序模型2:MSD模型 \begin{frame}{调序模型2:MSD模型(续)} % 参考NiuTrans Manual \begin{itemize} \item 来详细的分析一下 \vspace{-1em} \begin{displaymath} \Pr(\textbf{o}|\textbf{s},\textbf{t},\textbf{a}) = \prod_{i=1}^{K} \Pr(o_i| \bar{s}_{a_i}, \bar{t}_i, a_{i-1}, a_i) \end{displaymath} \vspace{-1em} \begin{itemize} \item 其中$a_i$为$\bar{t}_i$与$\bar{s}_{a_i}$的词对齐,$\bar{t}_1...\bar{t}_K$为目标语短语序列 \item $o_i$为相应的调序类型,$O=\{M,S,D\}$,与$a_i$和$a_{i-1}$有关 \end{itemize} \vspace{-0.5em} \begin{displaymath} o_i = \left\{ \begin{array}{ll} M & \textrm{if $a_i - a_{i-1} = 1$}\\ S & \textrm{if $a_i - a_{i-1} = -1$}\\ D & \textrm{otherwise}\\ \end{array} \right. \nonumber \end{displaymath} \vspace{-0.5em} \begin{itemize} \item 针对不同的调序类型,调序模型如下 \end{itemize} \vspace{0.1em} \begin{displaymath} f_{\textrm{M-pre}}(d) = \prod_{i=1}^{K} \Pr(o_i = M| \bar{s}_{a_i}, \bar{t}_i, a_{i-1}, a_i) \end{displaymath} \vspace{-0.8em} \begin{itemize} \item 我们还可以得到$f_{\textrm{S-pre}}(d)$和$f_{\textrm{D-pre}}(d)$,此外将$a_{i-1}$换成$a_{i+1}$,还可以得到每个短语与后面短语的调序类型 \end{itemize} \end{itemize} \end{frame} %%%------------------------------------------------------------------------------------------------------------ %%% 调序模型2:MSD模型 \begin{frame}{调序模型2:MSD模型(续)} % 参考NiuTrans Manual \begin{itemize} \item 统计每种调序方向中每个抽取短语对的频率,使用极大似然估计对其概率分布进行估计 \begin{itemize} \item 基于词的调序模型和基于短语的调序模型 \item<2-> 假设$\bar{t}_i$由$(t_u,...,t_v)$组成,$\bar{s}_{a_i}$由$(s_x,...,s_y)$组成 \end{itemize} \end{itemize} \vspace{-0.2em} \visible<2->{ \begin{center} \begin{tikzpicture} \setlength{\wseg}{1.5cm} \setlength{\hseg}{1.0cm} \setlength{\wnode}{3.75cm} \setlength{\hnode}{1.1cm} \tikzstyle{elementnode} = [rectangle,anchor=center] \tikzstyle{srcnode} = [font=\small,anchor=south west] \tikzstyle{tgtnode} = [left,font=\small,anchor=north east] \tikzstyle{alignmentnode} = [rectangle,draw,minimum height=3.6\hnode,minimum width=0.36\hnode] \tikzstyle{probnode} = [fill=blue!30,minimum width=0.4\hnode] \tikzstyle{labelnode} = [above] % alignment matrix1 \begin{scope}[scale=0.9,yshift=0.12in] \foreach \i / \j / \c in {0/5/0.15, 1/5/0.15, 2/5/0.15, 3/5/0.15, 4/5/0.15, 5/5/0.15, 0/4/0.15, 1/4/0.15, 2/4/0.15, 3/4/0.15, 4/4/0.15, 5/4/0.15, 0/3/0.15, 1/3/0.15, 2/3/0.15, 3/3/0.15, 4/3/0.15, 5/3/0.15, 0/2/0.15, 1/2/0.15, 2/2/0.15, 3/2/0.15, 4/2/0.15, 5/2/0.15, 0/1/0.15, 1/1/0.15, 2/1/0.15, 3/1/0.15, 4/1/0.15, 5/1/0.15, 0/0/0.15, 1/0/0.15, 2/0/0.15, 3/0/0.15, 4/0/0.15, 5/0/0.15} \node[elementnode,minimum size=0.6*\hnode*\c,inner sep=0.1pt,fill=blue] (a\i\j) at (0.5*\hnode*\i-5.4*0.5*\hnode,0.5*\hnode*\j-1.05*\hnode) {}; % source \node[srcnode] (src01) at (-5.9*0.5*\hnode,-1.05*\hnode+5.4*0.5*\hnode) {\scriptsize{$t_1$}}; \node[srcnode] (src02) at ([xshift=0.5\hnode]src01.south west) {\scriptsize{$t_2$}}; \node[srcnode] (src03) at ([xshift=0.5\hnode]src02.south west) {\scriptsize{$t_3$}}; \node[srcnode] (src04) at ([xshift=0.5\hnode]src03.south west) {\scriptsize{$t_4$}}; \node[srcnode] (src05) at ([xshift=0.5\hnode]src04.south west) {\scriptsize{$t_5$}}; \node[srcnode] (src06) at ([xshift=0.5\hnode]src05.south west) {\scriptsize{$t_6$}}; % target \node[tgtnode] (tgt01) at (-6.0*0.5*\hnode,-1.05*\hnode+5.4*0.5*\hnode) {\scriptsize{$s_1$}}; \node[tgtnode] (tgt02) at ([yshift=-0.5\hnode]tgt01.north east) {\scriptsize{$s_2$}}; \node[tgtnode] (tgt03) at ([yshift=-0.5\hnode]tgt02.north east) {\scriptsize{$s_3$}}; \node[tgtnode] (tgt04) at ([yshift=-0.5\hnode]tgt03.north east) {\scriptsize{$s_4$}}; \node[tgtnode] (tgt05) at ([yshift=-0.5\hnode]tgt04.north east) {\scriptsize{$s_5$}}; \node[tgtnode] (tgt06) at ([yshift=-0.5\hnode]tgt05.north east) {\scriptsize{$s_6$}}; %% alignment matrix2 \foreach \i / \j / \c in {0/5/0.15, 1/5/0.15, 2/5/0.15, 3/5/0.15, 4/5/0.15, 5/5/0.15, 0/4/0.15, 1/4/0.15, 2/4/0.15, 3/4/0.15, 4/4/0.15, 5/4/0.15, 0/3/0.15, 1/3/0.15, 2/3/0.15, 3/3/0.15, 4/3/0.15, 5/3/0.15, 0/2/0.15, 1/2/0.15, 2/2/0.15, 3/2/0.15, 4/2/0.15, 5/2/0.15, 0/1/0.15, 1/1/0.15, 2/1/0.15, 3/1/0.15, 4/1/0.15, 5/1/0.15, 0/0/0.15, 1/0/0.15, 2/0/0.15, 3/0/0.15, 4/0/0.15, 5/0/0.15} \node[elementnode,minimum size=0.6*\hnode*\c,inner sep=0.1pt,fill=blue] (b\i\j) at (0.5*\hnode*\i+4*0.5*\hnode,0.5*\hnode*\j-1.05*\hnode) {}; % source \node[srcnode] (src11) at (3.5*0.5*\hnode,-1.05*\hnode+5.4*0.5*\hnode) {\scriptsize{$t_1$}}; \node[srcnode] (src12) at ([xshift=0.5\hnode]src11.south west) {\scriptsize{$t_2$}}; \node[srcnode] (src13) at ([xshift=0.5\hnode]src12.south west) {\scriptsize{$t_3$}}; \node[srcnode] (src14) at ([xshift=0.5\hnode]src13.south west) {\scriptsize{$t_4$}}; \node[srcnode] (src15) at ([xshift=0.5\hnode]src14.south west) {\scriptsize{$t_5$}}; \node[srcnode] (src16) at ([xshift=0.5\hnode]src15.south west) {\scriptsize{$t_6$}}; % target \node[tgtnode] (tgt11) at (3.4*0.5*\hnode,-1.05*\hnode+5.4*0.5*\hnode) {\scriptsize{$s_1$}}; \node[tgtnode] (tgt12) at ([yshift=-0.5\hnode]tgt11.north east) {\scriptsize{$s_2$}}; \node[tgtnode] (tgt13) at ([yshift=-0.5\hnode]tgt12.north east) {\scriptsize{$s_3$}}; \node[tgtnode] (tgt14) at ([yshift=-0.5\hnode]tgt13.north east) {\scriptsize{$s_4$}}; \node[tgtnode] (tgt15) at ([yshift=-0.5\hnode]tgt14.north east) {\scriptsize{$s_5$}}; \node[tgtnode] (tgt16) at ([yshift=-0.5\hnode]tgt15.north east) {\scriptsize{$s_6$}}; % word alignment \node[align=center,elementnode,minimum size=0.3cm,inner sep=0.1pt,fill=blue!50] (la1) at (a23) {}; \node[align=center,elementnode,minimum size=0.3cm,inner sep=0.1pt,fill=blue!50] (la2) at (a22) {}; \node[align=center,elementnode,minimum size=0.3cm,inner sep=0.1pt,fill=blue!50] (la3) at (a31) {}; \visible<3->{ \node[align=center,elementnode,minimum size=0.5cm,inner sep=0.1pt,fill=red!50] (la4) at (a14) {M}; } \visible<4->{ \node[align=center,elementnode,minimum size=0.5cm,inner sep=0.1pt,fill=ugreen!50] (la5) at (a44) {S}; } \node[align=center,elementnode,minimum size=0.3cm,inner sep=0.1pt,fill=blue!50] (lb1) at (b23) {}; \node[align=center,elementnode,minimum size=0.3cm,inner sep=0.1pt,fill=blue!50] (lb2) at (b22) {}; \node[align=center,elementnode,minimum size=0.3cm,inner sep=0.1pt,fill=blue!50] (lb3) at (b31) {}; \visible<3->{ \node[align=center,elementnode,minimum size=0.3cm,inner sep=0.1pt,fill=red!50] (lb4) at (b05) {}; \node[align=center,elementnode,minimum size=0.3cm,inner sep=0.1pt,fill=red!50] (lb5) at (b14) {}; } \visible<4->{ \node[align=center,elementnode,minimum size=0.3cm,inner sep=0.1pt,fill=ugreen!50] (lb6) at (b45) {}; \node[align=center,elementnode,minimum size=0.3cm,inner sep=0.1pt,fill=ugreen!50] (lb7) at (b54) {}; } \begin{pgfonlayer}{background} \node [rectangle,thick,inner sep=0.4em,fill=blue!40,drop shadow,fill opacity=0.85] [fit = (a23) (a31)] (phrase1) {}; \node [rectangle,thick,inner sep=0.4em,fill=blue!40,drop shadow,fill opacity=0.85] [fit = (b23) (b31)] (phrase2) {}; \visible<3->{ \node [rectangle,thick,inner sep=0.4em,fill=red!40,drop shadow,fill opacity=0.85] [fit = (b05) (b14)] (phrase3) {}; } \visible<4->{ \node [rectangle,thick,inner sep=0.4em,fill=ugreen!40,drop shadow,fill opacity=0.85] [fit = (b45) (b54)] (phrase4) {}; } \end{pgfonlayer} \visible<4->{ \node[anchor=north] (s1) at ([xshift=0.6em,yshift=0.1em]b45.east) {S}; } \visible<3->{ \node[anchor=north] (m1) at ([xshift=0.6em,yshift=0.1em]b05.east) {M}; } \node[anchor=north] (l1) at ([xshift=1.8em,yshift=-0.5em]a10.south) {\scriptsize{基于词}}; \node[anchor=north] (l2) at ([xshift=2.2em,yshift=-0.5em]b10.south) {\scriptsize{基于短语}}; \visible<3->{ \node [anchor=south west] (p1line1) at ([xshift=-4em,yshift=2em]a05.north west) {\footnotesize{如果在(x-1,u-1)存在对齐点,或者存在可抽取的短语,则$o_i$=M}}; } \visible<4->{ \node [anchor=north west] (p1line2) at ([yshift=-11.2em]p1line1.south west) {\footnotesize{如果在(x-1,v+1)存在对齐点,或者存在可抽取的短语,则$o_i$=S}}; } \begin{pgfonlayer}{background} \visible<3->{ \node [rectangle,inner sep=0.2em,fill=red!10] [fit = (p1line1)] (box1) {}; } \visible<4->{ \node [rectangle,inner sep=0.2em,fill=ugreen!10] [fit = (p1line2)] (box2) {}; } \end{pgfonlayer} %\draw [->,dotted,very thick,red] ([yshift=0.1em]la4.north) .. controls +(north:1) and +(south:1) .. ([xshift=3em,yshift=-0.1em]box1.south west) ; \end{scope} \end{tikzpicture} \end{center} } \end{frame} %%%------------------------------------------------------------------------------------------------------------ %%% 调序模型3:分类模型 \begin{frame}{调序模型3:分类模型} % 参考NiuTrans Manual 的最大熵分类 \begin{itemize} \item 还有一种基于最大熵的调序模型,将调序问题看成是一个二分类问题 \begin{itemize} \item 对于相邻的两个短语块,将其合成一个大的更大的块 \item 根据合成后的顺序$o$,可以分为正序和反序,因此可以看作是一个二分类问题 \end{itemize} \vspace{0.1em} \begin{center} \begin{tikzpicture} \begin{scope}[minimum height = 15pt] \node[anchor=west,fill=ugreen!50,minimum width=3em] (x1) at (0, 0) {\footnotesize{$X_1$}}; \node[anchor=west,fill=blue!50] (x2) at ([xshift=1em]x1.east) {\footnotesize{$X_2$}}; \node[anchor=west,fill=ugreen!50,minimum width=3em] (x3) at ([xshift=3em,yshift=1.2em]x2.east) {\footnotesize{$X_1$}}; \node[anchor=west,fill=blue!50] (x4) at ([xshift=1em]x3.east) {\footnotesize{$X_2$}}; \node[anchor=west,fill=blue!50] (x5) at ([xshift=3em,yshift=-1.2em]x2.east) {\footnotesize{$X_2$}}; \node[anchor=west,fill=ugreen!50,minimum width=3em] (x6) at ([xshift=1em]x5.east) {\footnotesize{$X_1$}}; \draw [->,thick] ([xshift=0.1em]x2.east) -- ([xshift=-0.1em]x3.west); \draw [->,thick] ([xshift=0.1em]x2.east) -- ([xshift=-0.1em]x5.west); \node[anchor=west] (l1) at ([xshift=1em]x4.east) {正序}; \node[anchor=west] (l2) at ([xshift=1em]x6.east) {反序}; \end{scope} \end{tikzpicture} \end{center} \begin{itemize} \item 但是,对于非连续的调序该模型无法处理 \end{itemize} \item 对于每一种翻译推导$d$,基于最大熵的调序模型的得分计算公式如下 \begin{displaymath} f_{\textrm{ME}}(d) = \prod_{<o,X_1,X_2> \in d} \Pr(o|X_1, X_2) \end{displaymath} \end{itemize} \end{frame} %%%------------------------------------------------------------------------------------------------------------ \subsection{特征及最小错误率训练} %%%------------------------------------------------------------------------------------------------------------ %%% 模型得分 \begin{frame}{模型得分} % 定义模型得分 - 不需要算归一化的分母 % 判别式模型使用了三类特征:1)翻译模型特征;2)调序模型特征;3)语言模型 \begin{itemize} \item 基于短语的统计机器翻译模型包括三个子模型 \begin{enumerate} \item 短语表:短语翻译及每个短语对应的特征值 \item 调序模型:短语调序的模型 \item 语言模型:评价译文流畅度的$n$-gram语言模型 \end{enumerate} \item 把每个子模型当作一个特征,为每个模型添加一个权重,然后使用对数线性模型对这些子模型进行建模,对数线性模型的形式如下: \vspace{-0.8em} \begin{displaymath} \textrm{P}(d,\textbf{t}|\textbf{s}) \propto \exp(\sum_{i=1}^{M} \lambda_i \cdot h_i(d,\textbf{s},\textbf{t})) \end{displaymath} \vspace{-1.2em} \begin{itemize} \item 将三个子模型作为具体的特征代入有 \end{itemize} \vspace{0.8em} \begin{displaymath} \textrm{P}(d,\textbf{t}|\textbf{s}) = \prod_{(\bar{s},\bar{t}) \in d} \Pr(\bar{t}|\bar{s})^{\lambda_{1}} \times f(d)^{\lambda_{2}} \times \Pr\nolimits_{\textrm{lm}}(\mathbf{t})^{\lambda_{lm}} \end{displaymath} \item 可以引入更多的特征来提高翻译质量(下面介绍) \end{itemize} \end{frame} %%%------------------------------------------------------------------------------------------------------------ %%% 特征的具体形式 \begin{frame}{特征} % 给出特征列表 \begin{itemize} \item \textbf{特征1-2: 短语翻译概率},即正向翻译概率$\Pr(\bar{s}|\bar{t})$和反向翻译概率$\Pr(\bar{t}|\bar{s})$。是基于短语的统计机器翻译模型中最主要的特征。 \item \textbf{特征3-4: 词汇翻译概率},即正向词汇翻译概率$\Pr_{\textrm{lex}}(\bar{t}|\bar{s})$和反向词汇翻译概率$\Pr_{\textrm{lex}}(\bar{s}|\bar{t})$。用来描述短语对中源语端单词和目标语端单词的对应关系 \item<2-> \textbf{特征5: $n$-gram语言模型},即$\textrm{P}_{\textrm{lm}}(\textbf{t})$。度量译文的流畅度,可以使用大规模目标语单语数据得到。 \item<2-> \textbf{特征6:译文长度},即$|\textbf{t}|$。避免模型倾向于短译文,同时让系统自动学习对译文长度的偏好。 \item<2-> \textbf{特征7:翻译规则数量}。这个特征是为了避免模型仅仅使用少量特征构成翻译推导(因为翻译概率相乘,因子少结果一般会大一些),同时让系统自动学习对使用规则数量的偏好。 \end{itemize} \end{frame} %%%------------------------------------------------------------------------------------------------------------ %%% 特征的具体形式 \begin{frame}{特征(续)} % 给出特征列表 \begin{itemize} \item \textbf{特征8:源语言被翻译为空的单词数量}。注意,空翻译规则(或特征)有时也被称作evil feature,这类特征在一些数据集上对BLEU有很好的提升作用,但是会造成人工评价的下降,因此需要谨慎使用。 \item \textbf{特征9:基于最大熵的调序模型},$f_{ME}(d)$。 \item \textbf{特征10:基于MSD的调序模型},包括与前一个短语的调序$f_{\textrm{M-pre}}(d)$、$f_{\textrm{S-pre}}(d)$、$f_{\textrm{D-pre}}(d)$, 和后一个短语的调序$f_{\textrm{M-fol}}(d)$、$f_{\textrm{S-fol}}(d)$、$f_{\textrm{D-fol}}(d)$ \item \textbf{最终模型得分} \vspace{0.3em} \begin{center} \begin{tikzpicture} \begin{scope}[minimum height = 15pt] \node[anchor=west,minimum width=3em] (x1) at (0, 0) {\footnotesize{$\textrm{P}(d,\textbf{t}|\textbf{s}) = \prod_{(\bar{s},\bar{t}) \in d} score(\bar{s},\bar{t}) \times f_{\textrm{ME}}(d)^{\lambda_{ME}} \times f_{\textrm{MSD}}(d)^{\lambda_{MSD}} \times$}}; \node[anchor=north west] (x2) at ([xshift=4em,yshift=0.1em]x1.south west) {\footnotesize{$\Pr\nolimits_{\textrm{lm}}(\mathbf{t})^{\lambda_{lm}} \times \exp(\lambda_{TWB} \cdot length(\mathbf{t})) / Z(\mathbf{s})$}}; \node[anchor=north west] (x3) at ([yshift=-1.8em]x1.south west) {\footnotesize{$score(\bar{s},\bar{t}) = \Pr(\bar{t}|\bar{s})^{\lambda_{1}} \times \Pr(\bar{s}|\bar{t})^{\lambda_{2}} \times \Pr\nolimits_{\textrm{lex}}(\bar{t}|\bar{s})^{\lambda_{3}} \times \Pr\nolimits_{\textrm{lex}}(\bar{s}|\bar{t})^{\lambda_{4}} \times$}}; \node[anchor=north west] (x4) at ([xshift=5em,yshift=0.1em]x3.south west) {\footnotesize{$\exp(\lambda_{PB}) \times \exp(\lambda_{WDB} \cdot \delta(\bar{s} \to null))$}}; \end{scope} \end{tikzpicture} \end{center} \end{itemize} \end{frame} %%%------------------------------------------------------------------------------------------------------------ %%% 特征权重调优 \begin{frame}{特征权重调优} % 看我MERT的slides \begin{itemize} \item 对于训练样本$S=\{(f_1,r_1),...,(f_s,r_s)\}$,有 \begin{itemize} \item $f_s$为样本中的第$s$个源语句子,$r_s$为相应的译文,通常使用$R=\{r_1,...r_s\}$来表示训练样本的参考译文 \item 针对每个源语句子,解码器可以生成一个n-best结果$\{d_{ij}\}$ \end{itemize} \item 对于模型参数$\lambda$,最佳的翻译推导为$D^*=\{d_{1}^{*},..,d_{i}^{*}\}$ \vspace{-0.5em} \begin{displaymath} d_{i}^{*} = \argmax_{d_{ij}} \sum_{k=1}^{M} \lambda_k \cdot h_k(d_{ij}) \end{displaymath} \vspace{-0.9em} \item<2-> 最小错误率训练(MERT) \vspace{0.1em} \begin{itemize} \item<2-> 定义一个错误函数Err$(D^*, R)$来衡量推导$D^*$得到的译文与参考答案$R$之间的差距,通过调整权重$\lambda$来最小化错误率 \item<2-> 常见的错误函数有词错误率(WER)、位置错误率(PER)、BLEU值以及NIST值 \end{itemize} \vspace{0.3em} \visible<2->{ \begin{displaymath} \mathbf{\lambda}^* = \argmin_{\mathbf{\lambda}} \mathbf{Err}(D^*, R) \end{displaymath} } \end{itemize} \end{frame} %%%------------------------------------------------------------------------------------------------------------ %%% 特征权重调优 \begin{frame}{特征权重调优(续)} % 看我MERT的slides \begin{itemize} \item 如何得到最优的$\lambda^*$ \begin{itemize} \item 最简单的方法是枚举所有可能的$\lambda$值,但是这样做效率很低。可以只考虑最优译文发生变化的点:) \item 对于每个训练样本,假设有2-best个推导$\mathbf{d}=\{d_1,d_2\}$,每个推导$d$的得分modelscore($d$)可以表示成关于权重$\lambda_i$的函数 \end{itemize} \vspace{0.2em} \begin{displaymath} \textrm{modelscore}(d) = \lambda_i \cdot h_i(d) + \sum_{k{\ne}i}^{M} \lambda_k \cdot h_k(d) = a \cdot \lambda_i + b \end{displaymath} \vspace{-0.7em} \begin{center} \begin{tikzpicture} \begin{scope} \node[anchor=west] (x0) at (0, 0) {}; \draw[->,thick] (x0.center) -- ([xshift=8.2em]x0.east); \draw[->,thick] (x0.center) -- ([yshift=5.6em]x0.center); \visible<1-2>{ \draw[thick] ([yshift=1em]x0.center) -- ([xshift=8em,yshift=5em]x0.center); \draw[thick] ([yshift=2em]x0.center) -- ([xshift=8em,yshift=4em]x0.center); \node[anchor=north] (e1) at ([xshift=6em,yshift=6em]x0.south) {\footnotesize{$d_1$}}; \node[anchor=north] (e2) at ([xshift=7em,yshift=4em]x0.south) {\footnotesize{$d_2$}}; \node[anchor=north,rotate=90] (e2) at ([xshift=-1.3em,yshift=3.6em]x0.south) {\footnotesize{model score}}; } \visible<2>{ \node [anchor=center,draw=red,circle,inner sep=2pt,thick] (x1) at ([xshift=4em,yshift=3em]x0.center) {}; \draw[thick,dotted] ([xshift=4em]x0.center) -- ([xshift=4em,yshift=3em]x0.center); } \node[anchor=north] (zero) at ([yshift=0.1em]x0.south) {\footnotesize{0}}; \node[anchor=north] (wx) at ([xshift=4em,yshift=0.1em]x0.south) {\footnotesize{$w_x$}}; \node[anchor=north] (wi) at ([xshift=8em,yshift=0.1em]x0.south) {\footnotesize{$w_i$}}; \visible<3->{ \draw[thick] ([yshift=2em]x0.center) -- ([xshift=4em,yshift=2em]x0.center); \draw[thick] ([xshift=4em,yshift=4em]x0.center) -- ([xshift=8em,yshift=4em]x0.center); \draw[thick,dotted] ([xshift=4em]x0.center) -- ([xshift=4em,yshift=5.5em]x0.center); \node[anchor=north] (e1) at ([xshift=2em,yshift=3em]x0.north) {\footnotesize{$d^*=d_1$}}; \node[anchor=north] (e2) at ([xshift=6.2em,yshift=5em]x0.north) {\footnotesize{$d^*=d_2$}}; \node[anchor=north,rotate=90] (e2) at ([xshift=-1.3em,yshift=3.6em]x0.south) {\footnotesize{BLEU}}; \draw[decorate,decoration={brace,amplitude=0.4em},red,thick] ([xshift=3.8em,yshift=0.5em]x0.south) -- ([xshift=8.2em,yshift=0.5em]x0.south); \node[anchor=north] (wi) at ([xshift=6.1em,yshift=2em]x0.south) {\tiny{\alert{挑选$w_i$}}}; } \visible<2->{ \node [anchor=east] (line1) at ([xshift=-2.2em,yshift=4.5em]x0.west) {\footnotesize{1.找到最优译文$E^*$发生变化的位置}}; } \visible<3->{ \node [anchor=north west] (line2) at ([yshift=-0.8em]line1.south west) {\footnotesize{2.对译文按照BLEU值进行排序,\ \ \ }}; \node [anchor=north west] (line3) at ([xshift=0.6em,yshift=-0.1em]line2.south west) {\footnotesize{在分数高的范围中挑选新的$\lambda_i$}}; } \begin{pgfonlayer}{background} \visible<2->{ \node [rectangle,inner sep=0.2em,fill=red!10] [fit = (line1)] (box1) {}; } \visible<3->{ \node [rectangle,inner sep=0.2em,fill=green!10] [fit = (line2) (line3)] (box2) {}; } \end{pgfonlayer} \end{scope} \end{tikzpicture} \end{center} \vspace{-0.3em} \begin{itemize} \item<4-> 还有一些技巧可以防止训练出现过拟合和陷入局部最优 \end{itemize} \end{itemize} \end{frame} %%%------------------------------------------------------------------------------------------------------------ \subsection{栈解码} %%%------------------------------------------------------------------------------------------------------------ %%% 什么是解码 \begin{frame}{解码问题} % 定义解码是啥 \begin{itemize} \item 解码是根据模型以及输入原文,找到得分最高的推导 ${d}^*$ \begin{displaymath} d^* = \argmax_{d \in D} \textrm{score}(d) \end{displaymath} \vspace{-0.8em} \begin{itemize} \item 其中 $D$表示所有可能的推导构成的搜索空间。 \item $\Pr(d, \mathbf{t}|\mathbf{s})$表示前面提到的所有特征的得分 \end{itemize} \item 实际解码过程中,通常按从左到右的顺序生成译文,递增的计算翻译概率,同时对已翻译的原文进行标记 \vspace{1em} \begin{center} \begin{tikzpicture} \begin{scope}[minimum height = 18pt] \node[anchor=east] (s0) at (-0.5em, 0) {$\textbf{s}$:}; \node[anchor=west,fill=ugreen!50] (s1) at (0, 0) {桌子 上}; \node[anchor=west,fill=red!50] (s2) at ([xshift=1em]s1.east) {有}; \node[anchor=west,fill=blue!50] (s3) at ([xshift=1em]s2.east) {一个 苹果}; \node[anchor=east] (t0) at (-0.5em, -1.5) {$\textbf{t}$:}; \visible<2->{ \node[anchor=west,fill=red!50] (t1) at (0, -1.5) {There is}; \path[<->, thick] (s2.south) edge (t1.north); } \visible<3->{ \node[anchor=west,fill=blue!50] (t2) at ([xshift=1em]t1.east) {an apple}; \path[<->, thick] (s3.south) edge (t2.north); } \visible<4->{ \node[anchor=west,fill=ugreen!50] (t3) at ([xshift=1em]t2.east) {on the table}; \path[<->, thick] (s1.south) edge (t3.north); } \end{scope} \end{tikzpicture} \end{center} \end{itemize} \end{frame} %%%------------------------------------------------------------------------------------------------------------ %%% 什么是解码 \begin{frame}{解码问题 - 翻译选项} % 定义解码是啥 \begin{itemize} \item 对于每个输入的源语句子$\textbf{s}$,可以从短语表中查询到所有可能的翻译选项,用来翻译 \vspace{0.1em} \begin{center} \begin{tikzpicture} \begin{scope}[minimum height = 16pt] \node[anchor=east] (s0) at (-0.8em, 0) {$\textbf{s}$:}; \node[anchor=west] (s1) at (0, 0) {桌子}; \node[anchor=west] (s2) at ([xshift=2em]s1.east) {上}; \node[anchor=west] (s3) at ([xshift=2.3em]s2.east) {有}; \node[anchor=west] (s4) at ([xshift=2em]s3.east) {一个}; \node[anchor=west] (s5) at ([xshift=1.6em]s4.east) {苹果}; \node [anchor=north,inner sep=2pt,fill=red!20,minimum height=1.5em,minimum width=3em] (t11) at ([yshift=-0.5em]s1.south) {table}; \node [anchor=north,inner sep=2pt,fill=red!20,minimum height=1.5em,minimum width=3em] (t12) at ([yshift=-0.2em]t11.south) {desk}; \node [anchor=north,inner sep=2pt,fill=green!20,minimum height=1.5em,minimum width=3em] (t21) at ([yshift=-0.5em]s2.south) {on}; \node [anchor=north,inner sep=2pt,fill=green!20,minimum height=1.5em,minimum width=3em] (t22) at ([yshift=-0.2em]t21.south) {up}; \node [anchor=north,inner sep=2pt,fill=blue!20,minimum height=1.5em,minimum width=3.4em] (t31) at ([yshift=-0.5em]s3.south) {have}; \node [anchor=north,inner sep=2pt,fill=blue!20,minimum height=1.5em,minimum width=3.4em] (t32) at ([yshift=-0.2em]t31.south) {there is}; \node [anchor=north,inner sep=2pt,fill=orange!20,minimum height=1.5em,minimum width=3em] (t41) at ([yshift=-0.5em]s4.south) {one}; \node [anchor=north,inner sep=2pt,fill=orange!20,minimum height=1.5em,minimum width=3em] (t42) at ([yshift=-0.2em]t41.south) {an}; \node [anchor=north,inner sep=2pt,fill=purple!20,minimum height=1.5em,minimum width=3em] (t51) at ([yshift=-0.5em]s5.south) {apple}; \node [anchor=north,inner sep=2pt,fill=purple!20,minimum height=1.5em,minimum width=3em] (t52) at ([yshift=-0.2em]t51.south) {apples}; \node [anchor=north west,inner sep=2pt,fill=red!20,minimum height=1.5em,minimum width=7.2em] (t13) at ([yshift=-3.7em]t12.south west) {on tabel}; \node [anchor=north west,inner sep=2pt,fill=red!20,minimum height=1.5em,minimum width=7.2em] (t14) at ([yshift=-0.2em]t13.south west) {on the tabel}; \node [anchor=north west,inner sep=2pt,fill=orange!20,minimum height=1.5em,minimum width=7.35em] (t43) at ([yshift=-0.2em]t42.south west) {one apple}; \node [anchor=north west,inner sep=2pt,fill=orange!20,minimum height=1.5em,minimum width=7.35em] (t44) at ([yshift=-0.2em]t43.south west) {an apple}; \node [anchor=north west,inner sep=2pt,fill=green!20,minimum height=1.5em,minimum width=7.25em] (t23) at ([yshift=-0.2em]t22.south west) {upon there}; \node [anchor=north west,inner sep=2pt,fill=red!20,minimum height=1.5em,minimum width=11.5em] (t15) at ([yshift=-1.95em]t12.south west) {upon the tabel}; \node [anchor=north west,inner sep=2pt,fill=blue!20,minimum height=1.5em,minimum width=11.8em] (t33) at ([yshift=-3.7em]t32.south west) {there is an apple}; \node [anchor=north west,inner sep=2pt,fill=blue!20,minimum height=1.5em,minimum width=11.8em] (t34) at ([yshift=-0.2em]t33.south west) {have an apple...}; \end{scope} \end{tikzpicture} \end{center} \vspace{0.3em} \item 正确的翻译都在翻译选项中,但是里面也包含了很多其他翻译选项,接下来介绍如何找到最优的译文? \end{itemize} \end{frame} %%%------------------------------------------------------------------------------------------------------------ %%% 什么是解码 \begin{frame}{解码问题 - 假设扩展} % 定义解码是啥 \begin{itemize} \item 从翻译选项中挑选合适的选项,顺序地构建输出,构建的局部翻译称为翻译假设 \begin{enumerate} \item<1-> 翻译的起点是空假设,局部概率得分是1 \item<2-> 挑选一个翻译选项扩展为新的翻译假设,同时记录已翻译的原文并计算翻译代价(可以同时生成多种翻译假设) \item<3-> 对未覆盖的源语应用上述方法 \item<4-> 当翻译假设覆盖了所有的原文时,就得到了一个完整的翻译假设,从所有的翻译假设中找到一个概率最高的翻译 \end{enumerate} \vspace{0.5em} \begin{center} \begin{tikzpicture} \begin{scope} \visible<1->{ \node [anchor=north,inner sep=2pt,fill=red!20,minimum height=1.7em,minimum width=3.2em] (h0) at (0,0) {\scriptsize{null}}; \node [anchor=north west,inner sep=1.5pt,fill=black] (hl0) at (h0.north west) {\tiny{{\color{white} \textbf{0}}}}; \node [anchor=north,rotate=90,inner sep=1pt,minimum width=1.7em,fill=black] (pt0) at (h0.east) {\tiny{{\color{white} \textbf{P=1}}}}; } \visible<2->{ \node [anchor=west,inner sep=2pt,fill=red!20,minimum height=1.7em,minimum width=3.2em] (h1) at ([xshift=2.5em]h0.east) {\scriptsize{on}}; \node [anchor=west,inner sep=2pt,fill=red!20,minimum height=1.7em,minimum width=3.2em] (h2) at ([xshift=2.5em,yshift=2.5em]h0.east) {\scriptsize{table}}; \node [anchor=west,inner sep=2pt,fill=red!20,minimum height=1.7em,minimum width=3.2em] (h3) at ([xshift=2.5em,yshift=-2.5em]h0.east) {\scriptsize{there is}}; \node [anchor=north west,inner sep=1.5pt,fill=black] (hl1) at (h1.north west) {\tiny{{\color{white} \textbf{2}}}}; \node [anchor=north west,inner sep=1.5pt,fill=black] (hl2) at (h2.north west) {\tiny{{\color{white} \textbf{1}}}}; \node [anchor=north west,inner sep=1.5pt,fill=black] (hl3) at (h3.north west) {\tiny{{\color{white} \textbf{3}}}}; \node [anchor=north,rotate=90,inner sep=1pt,minimum width=1.7em,fill=black] (pt1) at (h1.east) {\tiny{{\color{white} \textbf{P=.2}}}}; \node [anchor=north,rotate=90,inner sep=1pt,minimum width=1.7em,fill=black] (pt2) at (h2.east) {\tiny{{\color{white} \textbf{P=.3}}}}; \node [anchor=north,rotate=90,inner sep=1pt,minimum width=1.7em,fill=black] (pt3) at (h3.east) {\tiny{{\color{white} \textbf{P=.5}}}}; \draw [->,very thick,ublue] ([xshift=0.1em]pt0.south) -- ([xshift=-0.1em]h1.west); \draw [->,very thick,ublue] ([xshift=0.1em]pt0.south) -- ([xshift=-0.1em]h2.west); \draw [->,very thick,ublue] ([xshift=0.1em]pt0.south) -- ([xshift=-0.1em]h3.west); } \visible<3->{ \node [anchor=west,inner sep=2pt,fill=red!20,minimum height=1.7em,minimum width=3.2em] (h4) at ([xshift=2.5em,yshift=-1.8em]h3.east) {\scriptsize{one}}; \node [anchor=west,inner sep=2pt,fill=red!20,minimum height=1.7em,minimum width=3.2em] (h5) at ([xshift=2.5em,yshift=0.7em]h3.east) {\scriptsize{an apple}}; \node [anchor=west,inner sep=2pt,fill=red!20,minimum height=1.7em,minimum width=3.2em] (h6) at ([xshift=2.5em,yshift=0.7em]h1.east) {\scriptsize{table}}; \node [anchor=west,inner sep=2pt,fill=red!20,minimum height=1.7em,minimum width=3.5em] (h7) at ([xshift=2.5em,yshift=0.7em]h5.east) {\scriptsize{on the table}}; \node [anchor=west,inner sep=2pt,fill=red!20,minimum height=1.7em,minimum width=3.2em] (h8) at ([xshift=2.5em,yshift=-2em]h5.east) {\scriptsize{apple}}; \node [anchor=north west,inner sep=1.5pt,fill=black] (hl4) at (h4.north west) {\tiny{{\color{white} \textbf{4}}}}; \node [anchor=north west,inner sep=1.5pt,fill=black] (hl5) at (h5.north west) {\tiny{{\color{white} \textbf{4-5}}}}; \node [anchor=north west,inner sep=1.5pt,fill=black] (hl6) at (h6.north west) {\tiny{{\color{white} \textbf{1}}}}; \node [anchor=north west,inner sep=1.5pt,fill=black] (hl7) at (h7.north west) {\tiny{{\color{white} \textbf{1-2}}}}; \node [anchor=north west,inner sep=1.5pt,fill=black] (hl8) at (h8.north west) {\tiny{{\color{white} \textbf{5}}}}; \node [anchor=north,rotate=90,inner sep=1pt,minimum width=1.7em,fill=black] (pt4) at (h4.east) {\tiny{{\color{white} \textbf{P=.1}}}}; \node [anchor=north,rotate=90,inner sep=1pt,minimum width=1.7em,fill=black] (pt5) at (h5.east) {\tiny{{\color{white} \textbf{P=.4}}}}; \node [anchor=north,rotate=90,inner sep=1pt,minimum width=1.7em,fill=black] (pt6) at (h6.east) {\tiny{{\color{white} \textbf{P=.3}}}}; \node [anchor=north,rotate=90,inner sep=1pt,minimum width=1.7em,fill=black] (pt7) at (h7.east) {\tiny{{\color{white} \textbf{P=.4}}}}; \node [anchor=north,rotate=90,inner sep=1pt,minimum width=1.7em,fill=black] (pt8) at (h8.east) {\tiny{{\color{white} \textbf{P=.2}}}}; \draw [->,very thick,ublue] ([xshift=0.1em]pt1.south) -- ([xshift=1em,yshift=0.7em]pt1.south); \draw [->,very thick,ublue] ([xshift=0.1em]pt2.south) -- ([xshift=1em,yshift=-0.7em]pt2.south); \draw [->,very thick,ublue] ([xshift=0.1em]pt2.south) -- ([xshift=1em,yshift=0.7em]pt2.south); \draw [->,very thick,ublue] ([xshift=0.1em]pt6.south) -- ([xshift=1em,yshift=-0.7em]pt6.south); \draw [->,very thick,ublue] ([xshift=0.1em]pt6.south) -- ([xshift=1em,yshift=0.7em]pt6.south); \draw [->,very thick,ublue] ([xshift=0.1em]pt3.south) -- ([xshift=-0.1em]h4.west); \draw [->,very thick,ublue] ([xshift=0.1em]pt3.south) -- ([xshift=-0.1em]h5.west); \draw [->,very thick,ublue] ([xshift=0.1em]pt3.south) -- ([xshift=-0.1em]h6.west); \draw [->,very thick,ublue] ([xshift=0.1em]pt5.south) -- ([xshift=-0.1em]h7.west); \draw [->,very thick,ublue] ([xshift=0.1em]pt5.south) -- ([xshift=1em,yshift=-0.7em]pt5.south); \draw [->,very thick,ublue] ([xshift=0.1em]pt4.south) -- ([xshift=-0.1em]h8.west); \draw [->,very thick,ublue] ([xshift=0.1em]pt4.south) -- ([xshift=1em,yshift=-0.7em]pt4.south); } \visible<4->{ \draw [->,ultra thick,red,line width=2pt,opacity=0.7] ([xshift=-0.2em]h0.west) -- ([xshift=0.7em]h0.east) -- ([xshift=-0.2em]h3.west) -- ([xshift=0.8em]h3.east) -- ([xshift=-0.2em]h5.west) -- ([xshift=0.8em]h5.east) -- ([xshift=-0.2em]h7.west) -- ([xshift=0.8em]h7.east); \node [anchor=north west] (wtranslabel) at ([yshift=-3em]h0.south west) {\scriptsize{翻译路径:}}; \draw [->,ultra thick,red,line width=1.5pt,opacity=0.7] (wtranslabel.east) -- ([xshift=1.5em]wtranslabel.east); } \end{scope} \end{tikzpicture} \end{center} \end{itemize} \end{frame} %%%------------------------------------------------------------------------------------------------------------ %%% 假设重组 \begin{frame}{假设重组} % 描述过程,可能需要几页,注意提到coverage,beam search,hypothesis recombination \begin{itemize} \item 随着源语长度的增加,搜索空间将变得十分巨大,其中相同的翻译假设可以通过不同的搜索路径得到 \begin{itemize} \item 可以通过\alert{假设重组},来减少翻译假设的数量 \end{itemize} \vspace{0.8em} \begin{center} \begin{tikzpicture} \begin{scope} \visible<2->{ \node [anchor=north,inner sep=2pt,fill=red!20,minimum height=1.5em,minimum width=2.0em] (h0) at (0,0) {\tiny{null}}; \node [anchor=north west,inner sep=1.0pt,fill=black] (hl0) at (h0.north west) {\tiny{{\color{white} \textbf{0}}}}; \node [anchor=north,rotate=90,inner sep=1pt,minimum width=1.5em,fill=black] (pt0) at (h0.east) {\tiny{{\color{white} \textbf{P=1}}}}; \node [anchor=west,inner sep=2pt,fill=red!20,minimum height=1.5em,minimum width=2.0em] (h2) at ([xshift=1.8em,yshift=2.5em]h0.east) {\tiny{an}}; \node [anchor=west,inner sep=2pt,fill=red!20,minimum height=1.5em,minimum width=2.0em] (h3) at ([xshift=1.8em]h2.east) {\tiny{apple}}; \node [anchor=north west,inner sep=1.0pt,fill=black] (hl2) at (h2.north west) {\tiny{{\color{white} \textbf{1}}}}; \node [anchor=north west,inner sep=1.0pt,fill=black] (hl3) at (h3.north west) {\tiny{{\color{white} \textbf{2}}}}; \node [anchor=north,rotate=90,inner sep=1pt,minimum width=1.5em,fill=black] (pt2) at (h2.east) {\tiny{{\color{white} \textbf{P=.3}}}}; \node [anchor=north,rotate=90,inner sep=1pt,minimum width=1.5em,fill=black] (pt3) at (h3.east) {\tiny{{\color{white} \textbf{P=.5}}}}; \draw [->,very thick,ublue] ([xshift=0.1em]pt0.south) -- ([xshift=-0.1em]h2.west); \draw [->,very thick,ublue] ([xshift=0.1em]pt2.south) -- ([xshift=-0.1em]h3.west); \visible<2>{ \node [anchor=west,inner sep=2pt,fill=red!20,minimum height=1.5em,minimum width=2.0em] (h1) at ([xshift=5.3em]h0.east) {\tiny{an apple}}; \node [anchor=north west,inner sep=1.0pt,fill=black] (hl1) at (h1.north west) {\tiny{{\color{white} \textbf{1-2}}}}; \node [anchor=north,rotate=90,inner sep=1pt,minimum width=1.5em,fill=black] (pt1) at (h1.east) {\tiny{{\color{white} \textbf{P=.5}}}}; \draw [->,very thick,ublue] ([xshift=0.1em]pt0.south) -- ([xshift=-0.1em]h1.west); } \visible<3->{ \draw [->,ultra thick,red,line width=2pt,opacity=0.7] ([xshift=0.1em]pt0.south) -- ([xshift=5em]pt0.south) -- ([yshift=-0.1em]h3.south); } } \visible<4->{ \node [anchor=north west,inner sep=2pt,fill=red!20,minimum height=1.5em,minimum width=2.0em] (h4) at ([yshift=-4em]h0.south west) {\tiny{null}}; \node [anchor=west,inner sep=2pt,fill=red!20,minimum height=1.5em,minimum width=2.0em] (h5) at ([xshift=1.8em]h4.east) {\tiny{he}}; \node [anchor=west,inner sep=2pt,fill=red!20,minimum height=1.5em,minimum width=2.0em] (h6) at ([xshift=1.8em,yshift=2.5em]h4.east) {\tiny{it}}; \node [anchor=west,inner sep=2pt,fill=red!20,minimum height=1.5em,minimum width=2.0em] (h8) at ([xshift=1.8em]h6.east) {\tiny{is not}}; \node [anchor=north west,inner sep=1.0pt,fill=black] (hl4) at (h4.north west) {\tiny{{\color{white} \textbf{0}}}}; \node [anchor=north west,inner sep=1.0pt,fill=black] (hl5) at (h5.north west) {\tiny{{\color{white} \textbf{1}}}}; \node [anchor=north west,inner sep=1.0pt,fill=black] (hl5) at (h6.north west) {\tiny{{\color{white} \textbf{1}}}}; \node [anchor=north west,inner sep=1.0pt,fill=black] (hl5) at (h8.north west) {\tiny{{\color{white} \textbf{2}}}}; \node [anchor=north,rotate=90,inner sep=1pt,minimum width=1.5em,fill=black] (pt4) at (h4.east) {\tiny{{\color{white} \textbf{P=1}}}}; \node [anchor=north,rotate=90,inner sep=1pt,minimum width=1.5em,fill=black] (pt5) at (h5.east) {\tiny{{\color{white} \textbf{P=.3}}}}; \node [anchor=north,rotate=90,inner sep=1pt,minimum width=1.5em,fill=black] (pt6) at (h6.east) {\tiny{{\color{white} \textbf{P=.4}}}}; \node [anchor=north,rotate=90,inner sep=1pt,minimum width=1.5em,fill=black] (pt8) at (h8.east) {\tiny{{\color{white} \textbf{P=.2}}}}; \draw [->,very thick,ublue] ([xshift=0.1em]pt4.south) -- ([xshift=-0.1em]h5.west); \draw [->,very thick,ublue] ([xshift=0.1em]pt4.south) -- ([xshift=-0.1em]h6.west); \draw [->,very thick,ublue] ([xshift=0.1em]pt6.south) -- ([xshift=-0.1em]h8.west); \visible<4>{ \node [anchor=west,inner sep=2pt,fill=red!20,minimum height=1.5em,minimum width=2.0em] (h7) at ([xshift=1.8em]h5.east) {\tiny{is not}}; \node [anchor=north,rotate=90,inner sep=1pt,minimum width=1.5em,fill=black] (pt7) at (h7.east) {\tiny{{\color{white} \textbf{P=.2}}}}; \node [anchor=north west,inner sep=1.0pt,fill=black] (hl5) at (h7.north west) {\tiny{{\color{white} \textbf{2}}}}; \draw [->,very thick,ublue] ([xshift=0.1em]pt5.south) -- ([xshift=-0.1em]h7.west); } \visible<5->{ \draw [->,ultra thick,red,line width=2pt,opacity=0.7] ([xshift=0.1em]pt5.south) -- ([xshift=1.5em]pt5.south) -- ([yshift=-0.1em]h8.south); } } \visible<2->{ \node [anchor=west] (line1) at ([xshift=2em,yshift=-0.5em]h3.east) {\footnotesize{1.翻译过相同数量的源语,输出}}; \node [anchor=north west] (line2) at ([xshift=0.6em,yshift=-0.1em]line1.south west) {\footnotesize{相同的翻译。舍弃概率低的假设}}; } \visible<4->{ \node [anchor=west] (line3) at ([xshift=2em,yshift=-0.5em]h8.east) {\footnotesize{2.输出结果略有不同也可以重新}}; \node [anchor=north west] (line4) at ([xshift=0.6em,yshift=-0.1em]line3.south west) {\footnotesize{组合,只要有相同后续代价即可}}; } \begin{pgfonlayer}{background} \visible<2->{ \node [rectangle,inner sep=0.2em,fill=red!10] [fit = (line1) (line2)] (box1) {}; } \visible<4->{ \node [rectangle,inner sep=0.2em,fill=green!10] [fit = (line3) (line4)] (box2) {}; } \end{pgfonlayer} \end{scope} \end{tikzpicture} \end{center} \vspace{0.3em} \item<5-> 重组假设可以减少内部表示不同,如不同短语切分的情况,能够更严格和高效地搜索 \end{itemize} \end{frame} %%%------------------------------------------------------------------------------------------------------------ %%% 栈解码 \begin{frame}{栈解码} % 描述过程,可能需要几页,注意提到coverage,beam search,hypothesis recombination \begin{itemize} \item 还有一种减小搜索空间的方法,如果早期发现较差的翻译假设,则将它舍弃,并忽略由它扩展出来的翻译假设 \begin{itemize} \item 整理翻译假设,放进\alert{假设堆栈}中 \item 堆栈按照已翻译的词数进行分类 \item 如果栈过大,则删掉栈里面最差的那些假设 \end{itemize} \vspace{0.2em} \begin{center} \begin{tikzpicture} \begin{scope} \visible<2->{ \node [anchor=north,inner sep=2pt,fill=red!20,minimum height=1.5em,minimum width=2.2em] (h0) at (0,0) {\tiny{null}}; \node [anchor=north west,inner sep=1.5pt,fill=black] (hl0) at (h0.north west) {\tiny{{\color{white} \textbf{0}}}}; \node [anchor=north,rotate=90,inner sep=1pt,minimum width=1.5em,fill=black] (pt0) at (h0.east) {\tiny{{\color{white} \textbf{P=1}}}}; } \visible<3->{ \node [anchor=west,inner sep=2pt,fill=red!20,minimum height=1.5em,minimum width=2.2em] (h13) at ([xshift=2.1em,yshift=5em]h0.east) {\tiny{there is}}; %\node [anchor=west,inner sep=2pt,fill=red!20,minimum height=1.5em,minimum width=2.2em] (h12) at ([xshift=2.1em,yshift=2.5em]h0.east) {\tiny{on}}; \node [anchor=west,inner sep=2pt,minimum height=1.5em,minimum width=2.2em] (h12) at ([xshift=2.1em,yshift=3em]h0.east) {\footnotesize{\textbf{...}}}; \node [anchor=west,inner sep=2pt,fill=red!20,minimum height=1.5em,minimum width=2.2em] (h1) at ([xshift=2.1em]h0.east) {\tiny{tabel}}; \node [anchor=north west,inner sep=1.0pt,fill=black] (hl1) at (h1.north west) {\tiny{{\color{white} \textbf{1}}}}; %\node [anchor=north west,inner sep=1.0pt,fill=black] (hl2) at (h12.north west) {\tiny{{\color{white} \textbf{2}}}}; \node [anchor=north west,inner sep=1.0pt,fill=black] (hl3) at (h13.north west) {\tiny{{\color{white} \textbf{3}}}}; \node [anchor=north,rotate=90,inner sep=1pt,minimum width=1.5em,fill=black] (pt1) at (h1.east) {\tiny{{\color{white} \textbf{P=.2}}}}; %\node [anchor=north,rotate=90,inner sep=1pt,minimum width=1.5em,fill=black] (pt2) at (h12.east) {\tiny{{\color{white} \textbf{P=.3}}}}; \node [anchor=north,rotate=90,inner sep=1pt,minimum width=1.5em,fill=black] (pt3) at (h13.east) {\tiny{{\color{white} \textbf{P=.5}}}}; \node [anchor=west,inner sep=2pt,fill=red!20,minimum height=1.5em,minimum width=2.2em] (h2) at ([xshift=2.1em]h1.east) {\tiny{have}}; \node [anchor=west,inner sep=2pt,minimum height=1.5em,minimum width=2.2em] (h22) at ([xshift=2.1em]h12.east) {\footnotesize{\textbf{...}}}; \node [anchor=west,inner sep=2pt,fill=red!20,minimum height=1.5em,minimum width=2.2em] (h23) at ([xshift=2.1em]h13.east) {\tiny{an}}; \node [anchor=west,inner sep=2pt,fill=red!20,minimum height=1.5em,minimum width=2.2em] (h3) at ([xshift=2.1em]h2.east) {\tiny{there is}}; \node [anchor=west,inner sep=2pt,minimum height=1.5em,minimum width=2.2em] (h32) at ([xshift=2.1em]h22.east) {\footnotesize{\textbf{...}}}; \node [anchor=west,inner sep=2pt,fill=red!20,minimum height=1.5em,minimum width=2.2em] (h33) at ([xshift=2.1em]h23.east) {\tiny{an apple}}; \node [anchor=north west,inner sep=1.0pt,fill=black] (hl2) at (h2.north west) {\tiny{{\color{white} \textbf{3}}}}; \node [anchor=north west,inner sep=1.0pt,fill=black] (hl23) at (h23.north west) {\tiny{{\color{white} \textbf{4}}}}; \node [anchor=north west,inner sep=1.0pt,fill=black] (hl3) at (h3.north west) {\tiny{{\color{white} \textbf{2}}}}; \node [anchor=north west,inner sep=1.0pt,fill=black] (hl33) at (h33.north west) {\tiny{{\color{white} \textbf{4-5}}}}; \node [anchor=north,rotate=90,inner sep=1pt,minimum width=1.5em,fill=black] (pt2) at (h2.east) {\tiny{{\color{white} \textbf{P=.5}}}}; \node [anchor=north,rotate=90,inner sep=1pt,minimum width=1.5em,fill=black] (pt23) at (h23.east) {\tiny{{\color{white} \textbf{P=.5}}}}; \node [anchor=north,rotate=90,inner sep=1pt,minimum width=1.5em,fill=black] (pt3) at (h3.east) {\tiny{{\color{white} \textbf{P=.5}}}}; \node [anchor=north,rotate=90,inner sep=1pt,minimum width=1.5em,fill=black] (pt33) at (h33.east) {\tiny{{\color{white} \textbf{P=.5}}}}; } \node [anchor=north] (l0) at ([xshift=0.2em,yshift=-0.7em]h0.south) {\scriptsize{\textbf{未译词}}}; \node [anchor=north] (l1) at ([xshift=0.3em,yshift=-0.7em]h1.south) {\scriptsize{\textbf{已译1词}}}; \node [anchor=north] (l2) at ([xshift=0.3em,yshift=-0.7em]h2.south) {\scriptsize{\textbf{已译2词}}}; \node [anchor=north] (l3) at ([xshift=0.3em,yshift=-0.7em]h3.south) {\scriptsize{\textbf{已译3词}}}; \begin{pgfonlayer}{background} \node [rectangle,inner sep=0.3em,fill=blue!10] [fit = (h0) (pt0)] (box0) {}; \node [rectangle,inner sep=0.3em,fill=blue!10] [fit = (h1) (pt1) (h13)] (box1) {}; \node [rectangle,inner sep=0.3em,fill=blue!10] [fit = (h2) (pt2) (h23)] (box2) {}; \node [rectangle,inner sep=0.3em,fill=blue!10] [fit = (h3) (pt3) (h33)] (box3) {}; \end{pgfonlayer} \visible<3->{ \draw [->,thick,red] (h13.north).. controls +(60:0.5) and +(120:0.5) .. (h23.north); \draw [->,thick,red] (h13.north).. controls +(58:0.8) and +(122:0.8) .. (h33.north); \draw [->,thick,red] (h1.north).. controls +(60:0.5) and +(120:0.5) .. (h2.north); \draw [->,thick,red] (h2.north).. controls +(60:0.5) and +(120:0.5) .. (h3.north); } \node [anchor=south east] (wtranslabel) at ([xshift=-2em,yshift=-2em]h0.south west) {\scriptsize{\textbf{:假设堆栈}}}; \node [anchor=east,inner sep=2pt,fill=blue!10,minimum height=1em,minimum width=2em] (stacklabel) at ([xshift=-0.1em]wtranslabel.west) {}; \visible<2->{ \node [anchor=east] (line1) at ([xshift=-1.0em,yshift=0em]h0.west) {\footnotesize{0号栈包含空假设}}; } \visible<3->{ \node [anchor=east] (line2) at ([xshift=-1.5em,yshift=0em]h13.west) {\footnotesize{通过假设扩展产生新的假设}}; \node [anchor=north west] (line3) at ([yshift=0.1em]line2.south west) {\footnotesize{并不断的被存入假设堆栈中}}; } \begin{pgfonlayer}{background} \visible<2->{ \node [rectangle,inner sep=0.1em,fill=ugreen!10] [fit = (line1)] (box1) {}; } \visible<3->{ \node [rectangle,inner sep=0.1em,fill=red!10] [fit = (line2) (line3)] (box2) {}; } \end{pgfonlayer} \end{scope} \end{tikzpicture} \end{center} \item<3-> 使用栈解码可以很大程度上提高解码效率 \end{itemize} \end{frame} %%%------------------------------------------------------------------------------------------------------------ \section{基于层次短语的模型} %%%------------------------------------------------------------------------------------------------------------ %%% 第二节的内容 \begin{frame}{Outline} \vspace{5em} \begin{tcolorbox}[enhanced,size=normal,left=2mm,right=1mm,colback=red!5!white,colframe=red!75!black,drop fuzzy shadow] {\Large \begin{center} \textbf{基于层次短语的翻译模型} \end{center} } \end{tcolorbox} \end{frame} %%%------------------------------------------------------------------------------------------------------------ %%% 短语系统的问题 - 数据稀疏和无法处理长距离依赖 \begin{frame}{基于短语的方法的不足} \begin{itemize} \item 短语可以很好的捕捉词语之间的局部搭配和调序,但是长距离依赖需要更长的短语 \begin{itemize} \item 实践中发现使用超过长度3的短语作用不大 \item 短语非常稀疏,包含多个词的短语大多非常低频 \end{itemize} \vspace{0.5em} \begin{center} \begin{tabular}{l | r} 短语(中文) & 训练数据中出现频次 \\ \hline 包含 & 3341 \\ 包含 多个 & 213 \\ 包含 多个 词 & 12 \\ 包含 多个 词 的 & 8 \\ 包含 多个 词 的 短语 & 0 \\ 包含 多个 词 的 短语 大多 & 0 \end{tabular} \end{center} \vspace{0.5em} \item<2-> 简单使用短语和$n$-gram语言模型无法处理长距离的调序 \begin{itemize} \item 引入独立的调序模型,比如简单的基于距离的调序 \item 当然,也可以设计更加复杂的调序模型 \end{itemize} \end{itemize} \end{frame} %%%------------------------------------------------------------------------------------------------------------ %%% 短语系统的问题 - 一个实例 \begin{frame}{基于短语的方法的不足 - 一个实例} \begin{itemize} \item 一个短语翻译不成功的例子(Chiang, 2015) \end{itemize} \vspace{-1.5em} \begin{center} \begin{tikzpicture} \begin{scope} \node [anchor=east] (shead) at (0,0) {源语:}; \node [anchor=west] (swords) at (shead.east) {澳洲\ \ 是\ \ 与\ \ 北韩\ \ 有\ \ 邦交\ \ 的\ \ 少数\ \ 国家\ \ 之一}; \node [anchor=north east] (thead) at ([yshift=-0.8em]shead.south east) {短语系统:}; \node [anchor=west] (twords) at (thead.east) {Australia is diplomatic relations with North Korea}; \node [anchor=north west] (twords2) at ([yshift=-0.2em]twords.south west) {is one of the few countries}; \node [anchor=north east] (rhead) at ([yshift=-2.2em]thead.south east) {参考译文:}; \node [anchor=west] (rwords) at (rhead.east) {Australia is one of the few countries that have}; \node [anchor=north west] (rwords2) at ([yshift=-0.2em]rwords.south west) {diplomatic relations with North Korea}; \begin{pgfonlayer}{background} \visible<2->{ \draw[fill=red!20,draw=white] ([xshift=-5.6em]twords.north) rectangle ([xshift=11em]twords.south); \draw[fill=blue!20,draw=white] ([xshift=-4.8em]twords2.north) rectangle ([xshift=6.3em]twords2.south); \node [anchor=south east,inner sep=1pt,fill=black] (l1) at ([xshift=11em]twords.south) {\tiny{{\color{white} 1}}}; \node [anchor=south east,inner sep=1pt,fill=black] (l2) at ([xshift=6.3em]twords2.south) {\tiny{{\color{white} 2}}}; } \end{pgfonlayer} \end{scope} \end{tikzpicture} \end{center} \begin{itemize} \item<2-> 从短语系统翻译结果可以看出 \begin{itemize} \item diplomatic relations with North Korea能够进行正确调序 \item one of the few countries能够进行正确调序 \item \textbf{但是},两个短语(\tikz{\node[fill=black,inner sep=2pt] {\tiny{{\color{white} 1}}};} 和 \tikz{\node[fill=black,inner sep=2pt] {\tiny{{\color{white} 2}}};})没有正确调序 - 怎么办? \end{itemize} \end{itemize} \end{frame} %%%------------------------------------------------------------------------------------------------------------ %%% 引入层次短语规则 \begin{frame}{引入新的翻译单元} \begin{itemize} \item 显然,通过由连续单词构成的短语拼装出理想的译文需要比较复杂的机制。但是,语言是有``结构''的,我们可以用一种新的方式描述翻译: \begin{displaymath} \langle\ \textrm{与}\ \textrm{X}_1\ \textrm{有}\ \textrm{X}_2,\ \ \textrm{have}\ \textrm{X}_2\ \textrm{with}\ \textrm{X}_1\ \rangle \end{displaymath} 这里,$\textrm{X}_1$和$\textrm{X}_2$表示变量,源语和目标语相同的变量表示对应关系,变量可以被其它连续词串替换。这样,这种源语言和目标语言的对应构成了一种翻译规则或模版,相当于把``$\textrm{与}\ \textrm{X}_1\ \textrm{有}\ \textrm{X}_2$''翻译为``$\textrm{have}\ \textrm{X}_2\ \textrm{with}\ \textrm{X}_1$'',调序信息就隐含在变量的编号里 \vspace{0.5em} \item<2-> 类似的,可以写出很多这样的翻译规则 \vspace{-1.5em} \begin{eqnarray} \langle\ \textrm{X}_1\ \textrm{是}\ \textrm{X}_2, & & \textrm{X}_1\ \textrm{is}\ \textrm{X}_2\ \rangle \nonumber \\ \langle\ \textrm{X}_1\ \textrm{之一},& & \textrm{one\ \ of\ \ }\textrm{X}_1\ \rangle \nonumber \\ \langle\ \textrm{X}_1\ \textrm{的}\ \textrm{X}_2,& & \textrm{X}_2\ \textrm{that\ \ have\ \ }\textrm{X}_1\ \rangle \nonumber \end{eqnarray} \end{itemize} \end{frame} %%%------------------------------------------------------------------------------------------------------------ %%% 使用翻译规则 \begin{frame}{使用翻译规则描述双语句子生成过程} \begin{itemize} \item 翻译过程可以用上述规则描述 \begin{itemize} \item 每个变量在源语和目标语端可以被同步替换 \end{itemize} \end{itemize} \begin{center} \begin{tikzpicture} \begin{scope} \visible<3->{ % rule 1 (source) \node [anchor=west] (rule1s1) at (0,0) {与}; \node [anchor=west,inner sep=2pt,fill=black] (rule1s2) at ([xshift=0.5em]rule1s1.east) {\scriptsize{{\color{white} $\textrm{X}_1$}}}; \node [anchor=west] (rule1s3) at ([xshift=0.5em]rule1s2.east) {有}; \node [anchor=west,inner sep=2pt,fill=black] (rule1s4) at ([xshift=0.5em]rule1s3.east) {\scriptsize{{\color{white} $\textrm{X}_2$}}}; % rule 1 (target) \node [anchor=west] (rule1t1) at ([xshift=0.8in]rule1s4.east) {have}; \node [anchor=west,inner sep=2pt,fill=black] (rule1t2) at ([xshift=0.5em]rule1t1.east) {\scriptsize{{\color{white} $\textrm{X}_2$}}}; \node [anchor=west] (rule1t3) at ([xshift=0.5em]rule1t2.east) {with}; \node [anchor=west,inner sep=2pt,fill=black] (rule1t4) at ([xshift=0.5em]rule1t3.east) {\scriptsize{{\color{white} $\textrm{X}_1$}}}; } \visible<1->{ % phrase 1 (source and target) \node [anchor=north] (phrase1s1) at ([yshift=-1em]rule1s2.south) {北韩}; \node [anchor=north] (phrase1t1) at ([yshift=-1em]rule1t4.south) {North Korea}; } \visible<2->{ % phrase 2 (source and target) \node [anchor=north] (phrase2s1) at ([yshift=-3em]rule1s4.south) {邦交}; \node [anchor=north] (phrase2t1) at ([yshift=-3em]rule1t2.south) {diplomatic relations}; } \begin{pgfonlayer}{background} \visible<3->{ \node [rectangle,draw=red,inner sep=1pt,thick,fill=white,drop shadow] [fit = (rule1s1) (rule1s4)] (rule1s) {}; \node [rectangle,draw=red,inner sep=2pt,thick,fill=white,drop shadow] [fit = (rule1t1) (rule1t4)] (rule1t) {}; \draw [<->,dotted,thick,red] ([xshift=0.1em]rule1s.east) -- ([xshift=-0.1em]rule1t.west); } \end{pgfonlayer} \visible<3->{ % edges (phrases 1-2 to rule1) \draw [->] (phrase1s1.north) -- ([yshift=-0.1em]rule1s2.south); \draw [->] (phrase1t1.north) -- ([yshift=-0.1em]rule1t4.south); \draw [->] (phrase2s1.north) -- ([yshift=-0.1em]rule1s4.south); \draw [->] (phrase2t1.north) -- ([yshift=-0.1em]rule1t2.south); } \visible<4->{ % rule 2 (source) \node [anchor=west,inner sep=2pt,fill=black] (rule2s1) at ([yshift=3.5em,xshift=-0.5em]rule1s1.north west) {\scriptsize{{\color{white} $\textrm{X}_1$}}}; \node [anchor=west] (rule2s2) at ([xshift=0.5em]rule2s1.east) {的}; \node [anchor=west,inner sep=2pt,fill=black] (rule2s3) at ([xshift=0.5em]rule2s2.east) {\scriptsize{{\color{white} $\textrm{X}_2$}}}; % rule 2 (target) \node [anchor=west,inner sep=2pt,fill=black] (rule2t1) at ([xshift=1.8in]rule2s3.east) {\scriptsize{{\color{white} $\textrm{X}_2$}}}; \node [anchor=west] (rule2t2) at ([xshift=0.5em]rule2t1.east) {that}; \node [anchor=west,inner sep=2pt,fill=black] (rule2t3) at ([xshift=0.5em]rule2t2.east) {\scriptsize{{\color{white} $\textrm{X}_1$}}}; % phrase 3 (source and target) \node [anchor=north] (phrase3s1) at ([yshift=-0.8em]rule2s3.south) {\footnotesize{少数 国家}}; \node [anchor=north] (phrase3t1) at ([yshift=-0.8em]rule2t1.south) {\footnotesize{the few countries}}; % edges (phrase 3 to rule 2 and rule1 to rule2) \draw [->] (phrase3s1.north) -- ([yshift=-0.1em]rule2s3.south); \draw [->] (phrase3t1.north) -- ([yshift=-0.1em]rule2t1.south); \draw [->] ([xshift=2em]rule1s.north west) ..controls +(north:1.5em) and +(south:1.5em).. ([yshift=-0.1em]rule2s1.south); \draw [->] ([xshift=-2em]rule1t.north east) ..controls +(north:1.5em) and +(south:1.5em).. ([yshift=-0.1em]rule2t3.south); } \begin{pgfonlayer}{background} \visible<4->{ \node [rectangle,draw=blue,inner sep=4pt,thick,fill=white,drop shadow] [fit = (rule2s1) (rule2s3)] (rule2s) {}; \node [rectangle,draw=blue,inner sep=4pt,thick,fill=white,drop shadow] [fit = (rule2t1) (rule2t3)] (rule2t) {}; \draw [<->,dotted,thick,blue] ([xshift=0.1em]rule2s.east) -- ([xshift=-0.1em]rule2t.west); } \end{pgfonlayer} \visible<5->{ % rule 3 (source) \node [anchor=west,inner sep=2pt,fill=black] (rule3s1) at ([yshift=2.5em,xshift=4em]rule2s1.north west) {\scriptsize{{\color{white} $\textrm{X}_1$}}}; \node [anchor=west] (rule3s2) at ([xshift=0.5em]rule3s1.east) {之一}; % rule 3 (target) \node [anchor=west] (rule3t1) at ([xshift=1.0in]rule3s2.east) {one of}; \node [anchor=west,inner sep=2pt,fill=black] (rule3t2) at ([xshift=0.5em]rule3t1.east) {\scriptsize{{\color{white} $\textrm{X}_1$}}}; % edges: rule 2 to rule 3 \draw [->] ([xshift=-1em]rule2s.north) ..controls +(north:1.2em) and +(south:1.2em).. ([yshift=-0.1em]rule3s1.south); \draw [->] ([xshift=1em]rule2t.north) ..controls +(north:1.2em) and +(south:1.2em).. ([yshift=-0.1em]rule3t2.south); } \begin{pgfonlayer}{background} \visible<5->{ \node [rectangle,draw=ugreen,inner sep=1pt,thick,fill=white,drop shadow] [fit = (rule3s1) (rule3s2)] (rule3s) {}; \node [rectangle,draw=ugreen,inner sep=2pt,thick,fill=white,drop shadow] [fit = (rule3t1) (rule3t2)] (rule3t) {}; \draw [<->,dotted,thick,ugreen] ([xshift=0.1em]rule3s.east) -- ([xshift=-0.1em]rule3t.west); } \end{pgfonlayer} \visible<6->{ % rule 4 (source) \node [anchor=west,inner sep=2pt,fill=black] (rule4s1) at ([yshift=3.5em,xshift=-3.5em]rule3s1.north west) {\scriptsize{{\color{white} $\textrm{X}_1$}}}; \node [anchor=west] (rule4s2) at ([xshift=0.5em]rule4s1.east) {是}; \node [anchor=west,inner sep=2pt,fill=black] (rule4s3) at ([xshift=0.5em]rule4s2.east) {\scriptsize{{\color{white} $\textrm{X}_2$}}}; % rule 2 (target) \node [anchor=west,inner sep=2pt,fill=black] (rule4t1) at ([xshift=2.0in]rule4s2.east) {\scriptsize{{\color{white} $\textrm{X}_1$}}}; \node [anchor=west] (rule4t2) at ([xshift=0.5em]rule4t1.east) {is}; \node [anchor=west,inner sep=2pt,fill=black] (rule4t3) at ([xshift=0.5em]rule4t2.east) {\scriptsize{{\color{white} $\textrm{X}_2$}}}; % phrase 4 (source and target) \node [anchor=north] (phrase4s1) at ([yshift=-0.8em]rule4s1.south) {\footnotesize{澳洲}}; \node [anchor=north] (phrase4t1) at ([yshift=-0.8em]rule4t1.south) {\footnotesize{Australia}}; % edges (phrase 4 to rule 4 and rule3 to rule4) \draw [->] (phrase4s1.north) -- ([yshift=-0.1em]rule4s1.south); \draw [->] (phrase4t1.north) -- ([yshift=-0.1em]rule4t1.south); \draw [->] ([xshift=1em]rule3s.north) ..controls +(north:1.5em) and +(south:1.5em).. ([yshift=-0.1em]rule4s3.south); \draw [->] ([xshift=-1em]rule3t.north east) ..controls +(north:1.5em) and +(south:1.5em).. ([yshift=-0.1em]rule4t3.south); } \begin{pgfonlayer}{background} \visible<6->{ \node [rectangle,draw=orange,inner sep=4pt,thick,fill=white,drop shadow] [fit = (rule4s1) (rule4s3)] (rule4s) {}; \node [rectangle,draw=orange,inner sep=4pt,thick,fill=white,drop shadow] [fit = (rule4t1) (rule4t3)] (rule4t) {}; \draw [<->,dotted,thick,orange] ([xshift=0.1em]rule4s.east) -- ([xshift=-0.1em]rule4t.west); } \end{pgfonlayer} \end{scope} \end{tikzpicture} \end{center} \end{frame} %%%------------------------------------------------------------------------------------------------------------ \subsection{同步上下文无关文法} %%%------------------------------------------------------------------------------------------------------------ %%% 上下文无关文法 \begin{frame}{同步上下文无关文法} \begin{itemize} \item 以上这种对翻译的描述方式,可以用同步上下文无关文法来定义,记Synchronous Context-Free Grammar(SCFG) \end{itemize} \begin{beamerboxesrounded}[upper=uppercolblue,lower=lowercolblue,shadow=true]{定义 - 同步上下文无关文法} {\small 一个同步上下文无关文法由五部分构成$(N, T_s, T_t, I, R)$,其中 \\ 1. $N$是非终结符集合 \\ 2. $T_s$和$T_t$分别是源语言和目标语终结符集合\\ 3. $I \subseteq N$起始非终结符集合\\ 4. $R$是规则集合,每条规则$r \in R$有如下形式 \begin{displaymath} \textrm{LHS} \to <\alpha, \beta, \sim> \end{displaymath} 其中,$\textrm{LHS} \in N$表示规则的左部,它是一个非终结符;规则右部由三部分组成,$\alpha \in (N \bigcup T_s)^{*}$表示由源语言终结符和非终结符组成的串;$\beta \in (N \bigcup T_t)^{*}$ 表示由目标语言终结符和非终结符组成的串;$\sim$表示$\alpha$和$\beta$中终结符的1-1对应关系 } \end{beamerboxesrounded} \end{frame} %%%------------------------------------------------------------------------------------------------------------ %%% 上下文无关文法 - 一些实例 \begin{frame}{同步上下文无关文法 - 实例} \begin{itemize} \item SCFG可以被看做是对CFG的扩展,相当于把单语的CFG扩展到双语,如下是一些SCFG规则,其中每个规则非终结符的对应用非终结符的标号表示 \begin{eqnarray} \textrm{S} & \to & <\textrm{NP}_1\ \textrm{希望}\ \textrm{VP}_2,\ \textrm{NP}_1\ \textrm{wish to}\ \textrm{VP}_2> \nonumber \\ \textrm{VP}& \to & <\textrm{对}\ \textrm{NP}_1\ \textrm{感到}\ \textrm{VP}_2,\ \textrm{be}\ \textrm{VP}_2\ \textrm{with}\ \textrm{NP}_1> \nonumber \\ \textrm{NN} & \to & <\textrm{强大},\ \textrm{strong}> \nonumber \end{eqnarray} \item<2-> 这里NP、VP等是有语言学意义的非终结符。当然,在机器翻译中这些并不是必要的,可以使用更简单的文法,只包含一种非终结符 \begin{eqnarray} \textrm{X} & \to & <\textrm{X}_1\ \textrm{希望}\ \textrm{X}_2,\ \textrm{X}_1\ \textrm{wish to}\ \textrm{X}_2> \nonumber \\ \textrm{X}& \to & <\textrm{对}\ \textrm{X}_1\ \textrm{感到}\ \textrm{X}_2,\ \textrm{be}\ \textrm{X}_2\ \textrm{with}\ \textrm{X}_1> \nonumber \\ \textrm{X} & \to & <\textrm{强大},\ \textrm{strong}> \nonumber \end{eqnarray} \end{itemize} \end{frame} %%%------------------------------------------------------------------------------------------------------------ %%% 一个完整文法的例子 \begin{frame}{一个完整的文法} \begin{itemize} \item 对于一个中文-英文句对,假设可以得到如下同步上下文无关文法\\ \vspace{0.3em} \textbf{源语}:\ \ \ 进口 大幅度 下降 了 \\ \vspace{0.3em} \textbf{目标语}:The imports have drastically fallen \\ \vspace{1.0em} \textbf{SCFG}:\\ \vspace{-1.5em} \begin{eqnarray} r_1:\ \ \ \textrm{X} & \to & \langle\ \textrm{进口}\ \textrm{X}_1,\ \textrm{The imports}\ \textrm{X}_1\ \rangle \nonumber \\ r_2:\ \ \ \textrm{X}& \to & \langle\ \textrm{X}_1\ \textrm{下降}\ \textrm{X}_2,\ \textrm{X}_2\ \textrm{X}_1\ \textrm{fallen}\ \rangle \nonumber \\ r_3:\ \ \ \textrm{X} & \to & \langle\ \textrm{大幅度},\ \textrm{drastically}\ \rangle \nonumber \\ r_4:\ \ \ \textrm{X} & \to & \langle\ \textrm{了},\ \textrm{have}\ \rangle \nonumber \end{eqnarray} 其中,规则$r_1$和$r_2$是右部含有变量的规则,这些变量可以被其它规则的右部替换;规则$r_2$是调序规则;规则$r_3$和$r_4$是纯词汇化规则,表示单词或者短语的翻译 \end{itemize} \end{frame} %%%------------------------------------------------------------------------------------------------------------ %%% 推导 \begin{frame}{翻译规则的推导} \begin{itemize} \item 使用SCFG规则的过程构成了一个\alert{推导},每次规则的使用都会同步替换源语言和目标语言串中的一个非终结符\\ \vspace{-2.0em} \begin{eqnarray} & & \langle\ \textrm{X}_1, \textrm{X}_1\ \rangle \nonumber \\ \visible<2->{& \xrightarrow[]{r_1} & \langle\ \alert{\textrm{进口}\ \textrm{X}_2},\ \alert{\textrm{The imports}\ \textrm{X}_2}\ \rangle \nonumber \\} \visible<3->{& \xrightarrow[]{r_2} & \langle\ \textrm{进口}\ \alert{\textrm{X}_3\ \textrm{下降}\ \textrm{X}_4},\ \textrm{The imports}\ \alert{\textrm{X}_4\ \textrm{X}_3\ \textrm{fallen}}\ \rangle \nonumber \\} \visible<4->{& \xrightarrow[]{r_3} & \langle\ \textrm{进口}\ \alert{\textrm{大幅度}}\ \textrm{下降}\ \textrm{X}_4, \nonumber \\} \visible<4->{& & \ \textrm{The imports}\ \textrm{X}_4\ \alert{\textrm{drastically}}\ \textrm{fallen}\ \rangle \nonumber \\} \visible<5->{& \xrightarrow[]{r_4} & \langle\ \textrm{进口}\ \textrm{大幅度}\ \textrm{下降}\ \alert{\textrm{了}}, \nonumber \\} \visible<5->{& & \ \textrm{The imports}\ \alert{\textrm{have}}\ \textrm{drastically}\ \textrm{fallen}\ \rangle \nonumber} \end{eqnarray} \visible<6->{ 这里把$d$定义为由规则$r_1, r_2, r_3, r_4$构成的SCFG推导,记作 \begin{displaymath} d = r_1 \circ r_2 \circ r_3 \circ r_4 \end{displaymath} 显然$d$定义了从源于句子生成目标语译文的一个过程 } \end{itemize} \end{frame} %%%------------------------------------------------------------------------------------------------------------ %%% glue rules \begin{frame}{胶水规则} \begin{itemize} \item 在实际系统中往往会遇到需要把两个局部翻译线性的拼接到一起的情况,因此可以在文法中引入胶水规则(glue rule)来处理,形式如下 \begin{eqnarray} \textrm{S} & \to & \langle\ \textrm{S}_1\ \textrm{X}_2,\ \textrm{S}_1\ \textrm{X}_2\ \rangle \nonumber \\ \textrm{S} & \to & \langle\ \textrm{X}_1,\ \textrm{X}_1\ \rangle \nonumber \end{eqnarray} \item<2-> 本质上,胶水规则会顺序的拼接若干片段,最后整个句子会被归纳为$\textrm{S}$ \begin{eqnarray} \textrm{S} & \to & \langle\ \textrm{S}_1\ \textrm{X}_2,\ \textrm{S}_1\ \textrm{X}_2\ \rangle \nonumber \\ & \to & \langle\ \textrm{S}_3\ \textrm{X}_4\ \textrm{X}_2,\ \textrm{S}_3\ \textrm{X}_4\ \textrm{X}_2\ \rangle \nonumber \\ & \to & ... \nonumber \\ & \to & \langle\ \textrm{X}_n\ ...\ \textrm{X}_4\ \textrm{X}_2,\ \textrm{X}_n\ ...\ \textrm{X}_4\ \textrm{X}_2\ \rangle \nonumber \end{eqnarray} \item<2-> 胶水规则大大提高了系统的健壮性(即使没有复杂规则,翻译可以顺序拼接),也体现了语言翻译单调性的假设 \end{itemize} \end{frame} %%%------------------------------------------------------------------------------------------------------------ %%% 文法驱动的统计机器翻译流程 \begin{frame}{文法驱动的机器翻译流程} \begin{itemize} \item 同步翻译文法给我们带来了新的思路:可以通过不断使用文法规则完成翻译过程。这类模型的基本流程如下: \end{itemize} \begin{center} \begin{tikzpicture} \begin{scope} \tikzstyle{datanode} = [minimum width=7em,minimum height=1.7em,fill=ublue,rounded corners=0.7em]; \tikzstyle{modelnode} = [minimum width=7em,minimum height=1.7em,fill=darkred,rounded corners=0.2em]; \tikzstyle{decodingnode} = [minimum width=7em,minimum height=1.7em,fill=ugreen,rounded corners=0.2em]; \node [datanode,anchor=north west] (bitext) at (0,0) {{\color{white} \scriptsize{训练用双语数据}}}; \node [modelnode, anchor=north west] (gi) at ([xshift=2em,yshift=-0.2em]bitext.south east) {{\color{white} \scriptsize{文法(规则)抽取}}}; \node [datanode,anchor=north east] (birules) at ([xshift=-2em,yshift=-0.2em]gi.south west) {{\color{white} \scriptsize{同步翻译文法}}}; \node [modelnode, anchor=north west] (training) at ([xshift=2em,yshift=-0.2em]birules.south east) {{\color{white} \scriptsize{特征值学习}}}; \node [datanode,anchor=north east] (model) at ([xshift=-2em,yshift=-0.2em]training.south west) {{\color{white} \scriptsize{翻译模型}}}; \node [decodingnode, anchor=north west] (tuning) at ([xshift=2em,yshift=-0.2em]model.south east) {{\color{white} \scriptsize{特征权重调优}}}; \node [datanode,anchor=north east] (tuningdata) at ([xshift=-2em,yshift=-0.2em]tuning.south west) {{\color{white} \scriptsize{调优用双语数据}}}; \node [decodingnode, anchor=north west] (decoding) at ([xshift=2em,yshift=-0.2em]tuningdata.south east) {{\color{white} \scriptsize{解码新句子}}}; \node [datanode,anchor=south west] (monotext) at ([xshift=2em,yshift=0.2em]training.north east) {{\color{white} \scriptsize{目标语数据}}}; \node [modelnode,anchor=south west] (lm) at ([xshift=2em,yshift=0.2em]tuning.north east) {{\color{white} \scriptsize{$n$-gram语言建模}}}; \node [datanode,anchor=south west] (lmmodel) at ([xshift=2em,yshift=0.2em]decoding.north east) {{\color{white} \scriptsize{语言模型}}}; \draw [->,very thick] ([yshift=-0.3em,xshift=0.1em]bitext.east) -- ([yshift=0.1em]gi.north west); \draw [->,very thick] ([yshift=-0.1em]gi.south west) -- ([yshift=0.3em,xshift=0.1em]birules.east); \draw [->,very thick] ([yshift=-0.3em,xshift=0.1em]birules.east) -- ([yshift=0.1em]training.north west); \draw [->,very thick] ([yshift=-0.1em]training.south west) -- ([yshift=0.3em,xshift=0.1em]model.east); \draw [->,very thick] ([yshift=-0.3em,xshift=0.1em]model.east) -- ([yshift=0.1em]tuning.north west); \draw [->,very thick] ([yshift=-0.1em]tuning.south) -- ([yshift=0.1em]decoding.north); \draw [->,very thick] ([yshift=0.3em,xshift=0.1em]tuningdata.east) -- ([yshift=-0.1em]tuning.south west); \draw [->,very thick] ([yshift=-0.1em]monotext.south) -- ([yshift=0.1em]lm.north); \draw [->,very thick] ([yshift=-0.1em]lm.south) -- ([yshift=0.1em]lmmodel.north); \draw [->,very thick] ([yshift=0.3em,xshift=-0.1em]lmmodel.west) -- ([yshift=-0.1em]tuning.south east); \draw [->,very thick] ([yshift=-0.3em,xshift=-0.1em]lmmodel.west) -- ([yshift=0.1em]decoding.north east); \end{scope} \end{tikzpicture} \end{center} \end{frame} %%%------------------------------------------------------------------------------------------------------------ \subsection{层次短语规则及翻译特征} %%%------------------------------------------------------------------------------------------------------------ %%% 翻译规则抽取 \begin{frame}{层次短语规则抽取} % 还是David Chiang的论文 \begin{itemize} \item 如何抽取规则 \begin{itemize} \item 首先进行短语抽取,从词对齐矩阵中抽取得到所有与词对齐保持一致的短语对 \item<3-> 在抽取得到的短语中,找到其中包含的子短语,使用非终结符进行替换,就得到了一条规则 \end{itemize} \end{itemize} \vspace{-1.5em} \begin{center} \begin{tikzpicture} \setlength{\wseg}{1.5cm} \setlength{\hseg}{1.0cm} \setlength{\wnode}{3.75cm} \setlength{\hnode}{1.1cm} \tikzstyle{elementnode} = [rectangle,text=white,anchor=center] \tikzstyle{srcnode} = [rotate=45,font=\small,anchor=south west] \tikzstyle{tgtnode} = [left,font=\small,anchor=north east] \tikzstyle{alignmentnode} = [rectangle,draw,minimum height=3.6\hnode,minimum width=0.36\hnode] \tikzstyle{probnode} = [fill=blue!30,minimum width=0.4\hnode] \tikzstyle{labelnode} = [above] % alignment matrix \begin{scope}[scale=1.0,yshift=0.12in] \foreach \i / \j / \c in {0/5/0.15, 1/5/0.15, 2/5/0.15, 3/5/0.15, 4/5/0.15, 5/5/0.15, 6/5/0.15, 7/5/0.15, 0/4/0.15, 1/4/0.15, 2/4/0.15, 3/4/0.15, 4/4/0.15, 5/4/0.15, 6/4/0.15, 7/4/0.15, 0/3/0.15, 1/3/0.15, 2/3/0.15, 3/3/0.15, 4/3/0.15, 5/3/0.15, 6/3/0.15, 7/3/0.15, 0/2/0.15, 1/2/0.15, 2/2/0.15, 3/2/0.15, 4/2/0.15, 5/2/0.15, 6/2/0.15, 7/2/0.15, 0/1/0.15, 1/1/0.15, 2/1/0.15, 3/1/0.15, 4/1/0.15, 5/1/0.15, 6/1/0.15, 7/1/0.15, 0/0/0.15, 1/0/0.15, 2/0/0.15, 3/0/0.15, 4/0/0.15, 5/0/0.15, 6/0/0.15, 7/0/0.15} \node[elementnode,minimum size=0.6*\hnode*\c,inner sep=0.1pt,fill=blue] (a\i\j) at (0.5*\hnode*\i-5.4*0.5*\hnode,0.5*\hnode*\j-0.05*\hnode) {}; % source \node[srcnode] (src1) at (-5.4*0.5*\hnode,-1.05*\hnode+7.5*0.5*\hnode) {\scriptsize{The}}; \node[srcnode] (src2) at ([xshift=0.5\hnode]src1.south west) {\scriptsize{weather}}; \node[srcnode] (src3) at ([xshift=0.5\hnode]src2.south west) {\scriptsize{is}}; \node[srcnode] (src4) at ([xshift=0.5\hnode]src3.south west) {\scriptsize{very}}; \node[srcnode] (src5) at ([xshift=0.5\hnode]src4.south west) {\scriptsize{good}}; \node[srcnode] (src6) at ([xshift=0.5\hnode]src5.south west) {\scriptsize{today}}; \node[srcnode] (src7) at ([xshift=0.5\hnode]src6.south west) {\scriptsize{.}}; \node[srcnode] (src8) at ([xshift=0.5\hnode]src7.south west) {\scriptsize{EOS}}; % target \node[tgtnode] (tgt1) at (-6.0*0.5*\hnode,-1.05*\hnode+7.5*0.5*\hnode) {\scriptsize{今天}}; \node[tgtnode] (tgt2) at ([yshift=-0.5\hnode]tgt1.north east) {\scriptsize{天气}}; \node[tgtnode] (tgt3) at ([yshift=-0.5\hnode]tgt2.north east) {\scriptsize{真}}; \node[tgtnode] (tgt4) at ([yshift=-0.5\hnode]tgt3.north east) {\scriptsize{好}}; \node[tgtnode] (tgt5) at ([yshift=-0.5\hnode]tgt4.north east) {\scriptsize{。}}; \node[tgtnode] (tgt6) at ([yshift=-0.5\hnode]tgt5.north east) {\scriptsize{EOS}}; % word alignment \node[align=center,elementnode,minimum size=0.3cm,inner sep=0.1pt,fill=blue!50] (l04) at (a04) {}; \node[align=center,elementnode,minimum size=0.3cm,inner sep=0.1pt,fill=blue!50] (l14) at (a14) {}; \node[align=center,elementnode,minimum size=0.3cm,inner sep=0.1pt,fill=blue!50] (l55) at (a55) {}; \node[align=center,elementnode,minimum size=0.3cm,inner sep=0.1pt,fill=blue!50] (l33) at (a33) {}; \node[align=center,elementnode,minimum size=0.3cm,inner sep=0.1pt,fill=blue!50] (l42) at (a42) {}; \node[align=center,elementnode,minimum size=0.3cm,inner sep=0.1pt,fill=blue!50] (l61) at (a61) {}; \node[align=center,elementnode,minimum size=0.3cm,inner sep=0.1pt,fill=blue!50] (l70) at (a70) {}; \begin{pgfonlayer}{background} \visible<2->{ \node [rectangle,draw=red,thick,inner sep=0.6em,fill=white,drop shadow] [fit = (a04) (a14) (a24) (a33) (a42)] (phrase1) {}; } \visible<3->{ \node [rectangle,draw=ugreen,thick,inner sep=0.4em,fill=white,drop shadow] [fit = (a04) (a14)] (phrase2) {}; } \end{pgfonlayer} \end{scope} \begin{scope}[xshift = 1.3in, yshift = 1.8in] {\scriptsize \node (phrase) {\textbf{抽取得到的短语:}}; \draw[-] (phrase.south west)--([xshift=1.9in]phrase.south west); \node[anchor=north west] (rules) at ([yshift=-7.5em]phrase.south west) {\textbf{抽取得到的规则:}}; \draw[-] (rules.south west)--([xshift=1.9in]rules.south west); \visible<2->{ \node[anchor=north west] (p1) at ([yshift=-0.3em]phrase.south west) {天气真好 -- The weather is very good}; } \visible<3->{ \node[anchor=north west] (r1) at ([yshift=-0.3em]rules.south west) {$\mathrm{X_1}$真好 -- $\mathrm{X_1}$ is very good}; } \visible<4->{ \node[anchor=north west] (p2) at ([yshift=-0.4em]p1.south west) {天气 -- The weather is}; \node[anchor=north west] (p3) at ([yshift=-0.4em]p2.south west) {天气真 -- The weather is very}; \node[anchor=north west] (p4) at ([yshift=-0.4em]p3.south west) {...}; \node[anchor=north west] (r2) at ([yshift=-0.4em]r1.south west) {天气真$\mathrm{X_1}$ -- The weather is very $\mathrm{X_1}$}; \node[anchor=north west] (r3) at ([yshift=-0.4em]r2.south west) {$\mathrm{X_1}$真$\mathrm{X_2}$ -- $\mathrm{X_1}$ is very $\mathrm{X_2}$}; \node[anchor=north west] (r4) at ([yshift=-0.4em]r3.south west) {...}; } \begin{pgfonlayer}{background} \visible<2-3>{ \node [rectangle,thick,inner sep=0.1em,fill=red!20] [fit = (p1)] (pb1) {}; } \visible<2>{ \draw [->,thick,dotted] ([xshift=0.1em]phrase1.east) .. controls +(east:2) and +(west:1.5) .. ([xshift=-0.1em]p1.west); } \visible<3>{ \node [rectangle,thick,inner sep=0.1em,fill=ugreen!20] [fit = (r1)] (rb1) {}; \draw [->,thick,dotted] ([xshift=0.1em]phrase1.east) .. controls +(east:2) and +(west:1.5) .. ([xshift=-0.1em]r1.west); } \visible<4->{ \node [rectangle,thick,inner sep=0.1em,fill=red!20] [fit = (p1) (p4)] (pb2) {}; \node [rectangle,thick,inner sep=0.1em,fill=ugreen!20] [fit = (r1) (r2) (r4)] (rb2) {}; } \end{pgfonlayer} } \end{scope} \end{tikzpicture} \end{center} \end{frame} %%%------------------------------------------------------------------------------------------------------------ %%% 关于文法的约束 \begin{frame}{文法的约束} % 把文法限制在合理的范围内,使得系统可以处理 \begin{itemize} \item 按照上面的方法可以抽取到大量的规则,规则太多会降低训练和解码的效率,甚至影响翻译性能,因此需要加入一些\alert{约束}来限制文法规则的数目 \vspace{0.3em} \begin{center} \begin{tikzpicture} \begin{scope}[minimum height = 18pt] %\node[anchor=east] (s0) at (0, 0) {$\textrm{X} \ \to \ <\textrm{X}_1\ \textrm{希望}\ \textrm{X}_2,\ \textrm{X}_1\ \textrm{wish to}\ \textrm{X}_2>$}; \node[anchor=north west] (s1) at (0, 0) {$\textrm{X} \ \to \ <\textrm{X}_1\ \textrm{X}_2 \ \textrm{之一},\ \textrm{one of}\ \textrm{X}_1\ \textrm{X}_2>$}; \node[anchor=north west] (s2) at ([yshift=0.1em]s1.south west) {$\textrm{X} \ \to \ <\textrm{X}_1\ \textrm{X}_2\ \textrm{是}\ \textrm{X}_3,\ \textrm{X}_1\ \textrm{X}_2\ \textrm{is}\ \textrm{X}_3>$}; \node[anchor=north west] (s3) at ([yshift=0.1em]s2.south west) {$\textrm{X} \ \to \ <\textrm{X}_1\ \textrm{希望...}\ \textrm{X}_2,\ \textrm{X}_1\ \textrm{wish to...}\ \textrm{X}_2>$}; \draw[decorate,decoration={brace,amplitude=0.4em,mirror},red,thick] ([xshift=1.3em,yshift=0.3em]s3.south) -- ([xshift=8.0em,yshift=0.3em]s3.south); \node[anchor=north] (wi) at ([xshift=4.7em,yshift=-0.0em]s3.south) {\scriptsize{\alert{超过10个词}}}; \visible<2->{ \draw[-,red] (s1.west)--(s1.east); \draw[-,red] (s2.west)--(s2.east); \draw[-,red] (s3.west)--(s3.east); } \end{scope} \end{tikzpicture} \end{center} \vspace{-0.5em} \visible<2->{ 具体包含如下约束 } \begin{enumerate} \item<2-> 规则中的非终结符不可以连续的出现 \item<2-> 每条规则最多包含两个非终结符 \item<2-> 抽取规则最多可以跨越10个单词 \end{enumerate} \item<3-> 除此之外,不同的语言会有不同的文法约束,可参考 \visible<3->{ \textbf{Hierarchical Phrase-Based Translation}\\ \textbf{Chiang, Computational Linguistics, 2007} } \end{itemize} \end{frame} %%%------------------------------------------------------------------------------------------------------------ %%% 特征 \begin{frame}{特征} % 还是David Chiang的论文 \begin{itemize} \item 与短语模型一样,层次短语模型也使用判别式模型进行建模 - $\textrm{P}(d,\textbf{t}|\textbf{s}) = \frac{\exp(\sum_{i=1}^{M} \lambda_i \cdot h_i(d,\textbf{s},\textbf{t}))}{\sum_{d',t'}\exp(\sum_{i=1}^{M} \lambda_i \cdot h_i(d',\textbf{s},\textbf{t}'))}$。其中特征权重$\{\lambda_i\}$可以使用最小错误率训练进行调优,特征函数$\{h_i\}$需要用户定义。 \item<2-> 这里,所有层次短语规则满足$\langle\ \alpha, \beta, \sim\ \rangle$的形式 \begin{itemize} \item $\alpha$和$\beta$表示源语和目标语的规则串,$\sim$表示他们的对应关系 \item 此外,定义$\tau(\alpha)$和$\tau(\beta)$为源语端和目标语端的规则序列。例如 \vspace{-0.8em} \begin{eqnarray} \tau(\alpha) & = & \textrm{对}\ \textrm{X}_1\ \textrm{感到}\ \textrm{X}_2 \nonumber \\ \tau(\beta) & = & \textrm{be}\ \textrm{X}_2\ \textrm{with}\ \textrm{X}_1 \nonumber \end{eqnarray} \end{itemize} \item<3-> \textbf{特征1-2: 短语翻译概率},即正向翻译概率$\textrm{P}(\tau(\alpha)|\tau(\beta))$和反向翻译概率$\textrm{P}(\tau(\alpha)|\tau(\beta))$。这里,$\tau(\alpha)$和$\tau(\beta)$ 都被看做短语,因此可以直接复用短语系统的方法,使用极大似然估计进行计算。 \end{itemize} \end{frame} %%%------------------------------------------------------------------------------------------------------------ %%% 特征 \begin{frame}{特征(续)} % 给出特征列表 \begin{itemize} \item \textbf{特征3-4: 词汇翻译概率},即正向词汇翻译概率$\Pr_{lex}(\bar{t}|\bar{s})$和反向词汇翻译概率$\Pr_{lex}(\bar{s}|\bar{t})$。用来描述短语对中源语端单词和目标语端单词的对应关系 \item \textbf{特征5: $n$-gram语言模型},即$\textrm{P}_{\textrm{lm}}(\textbf{t})$。度量译文的流畅度,可以使用大规模目标语单语数据得到。 \item<2-> \textbf{特征6:译文长度},即$|\textbf{t}|$。避免模型倾向于短译文,同时让系统自动学习对译文长度的偏好。 \item<2-> \textbf{特征7:翻译规则数量}。这个特征是为了避免模型仅仅使用少量特征构成翻译推导(因为翻译概率相乘,因子少结果一般会大一些),同时让系统自动学习对使用规则数量的偏好。 \item<2-> \textbf{特征8:源语言被翻译为空的单词数量}。注意,空翻译规则(或特征)有时也被称作evil feature,这类特征在一些数据集上对BLEU有很好的提升作用,但是会造成人工评价的下降,因此需要谨慎使用。 \end{itemize} \end{frame} %%%------------------------------------------------------------------------------------------------------------ \subsection{基于chart的解码} %%%------------------------------------------------------------------------------------------------------------ %%% CYK解码 \begin{frame}{CYK解码} % 看NiuTrans Manual \begin{itemize} \item 基于层次短语的翻译解码与基于短语的模型类似,都是要找到使$\textrm{score}(d)$达到最大的翻译推导$d$ \vspace{-0.5em} \begin{displaymath} \hat{d} = \argmax_{d \in D} \textrm{score}(d) \end{displaymath} \vspace{-0.8em} \begin{itemize} \item 由于翻译推导由SCFG构成,使用CYK算法进行解码 \item CYK算法解码是一个用来判定任意给定的字符串 是否属于一个上下文无关文法的算法,具体流程如下 \end{itemize} \vspace{0.5em} \begin{center} \begin{tikzpicture} \node [anchor=south west,rectangle,draw=ublue,thick,inner sep=0.4em,fill=white,drop shadow] (sourceG) at (0,0) {{\color{ublue} \footnotesize{\textbf{S端文法}}}}; \node [anchor=west,rectangle,draw=ublue,thick,inner sep=0.4em,fill=white,drop shadow] (chom) at ([xshift=3.5em]sourceG.east) {{\color{ublue} \footnotesize{\textbf{乔姆斯基范式}}}}; \node [anchor=west,rectangle,draw=ublue,thick,inner sep=0.4em,fill=white,drop shadow] (targetG) at ([xshift=3.5em]chom.east) {{\color{ublue} \footnotesize{\textbf{T端文法}}}}; \draw[->,very thick] ([xshift=0.1em]sourceG.east) -- ([xshift=-0.1em]chom.west); \draw[->,very thick] ([xshift=-0.1em]targetG.west) -- ([xshift=0.1em]chom.east); \node [anchor=north west,rectangle,draw=ublue,thick,inner sep=0.4em,fill=white,drop shadow,minimum height=1.2cm] (sourceS) at ([yshift=-1em]sourceG.south west) {{\color{ublue} \footnotesize{\textbf{S端句子}}}}; \node [anchor=north west,rectangle,draw=ublue,thick,inner sep=0.4em,fill=white,drop shadow,minimum height=1.2cm] (targetS) at ([yshift=-1em]targetG.south west) {{\color{ublue} \footnotesize{\textbf{T端句子}}}}; \node [anchor=north,rectangle,draw=ublue,thick,inner sep=0.4em,fill=white,drop shadow,minimum height=1.2cm] (parse) at ([yshift=-1em]chom.south) {{\color{ublue} \footnotesize{\textbf{解析得到最好的翻译}}}}; \draw[->,very thick] ([xshift=0.1em]sourceS.east) -- ([xshift=-0.1em]parse.west); \draw[->,very thick] ([xshift=0.1em]parse.east) -- ([xshift=-0.1em]targetS.west); \draw[->,very thick] ([yshift=-0.1em]chom.south) -- ([yshift=0.1em]parse.north); \end{tikzpicture} \end{center} \vspace{0.3em} \item 由于对文法中的非终结符进行了限制,可以直接使用CYK算法进行解码,无需转换成乔姆斯基范式 \end{itemize} \end{frame} %%%------------------------------------------------------------------------------------------------------------ %%% CYK解码 \begin{frame}{CYK算法} % 看NiuTrans Manual \begin{itemize} \item CYK算法通过遍历不同\alert{span}来判断字符串是否符合文法 \begin{itemize} \item 输入:源语串\textbf{s =} $s_1 ... s_J$,以及上下文无关文法$G$ \item 输出:判断字符串是否符合上下文无关文法 \end{itemize} %\vspace{-0.5em} \begin{center} \begin{tikzpicture} \tikzstyle{alignmentnode} = [rectangle,fill=blue!30,minimum size=0.45em,text=white,inner sep=0.1pt] \tikzstyle{selectnode} = [rectangle,fill=green!20,minimum height=1.5em,minimum width=1.5em,inner sep=1.2pt] \tikzstyle{srcnode} = [anchor=south west] \begin{scope}[scale=0.85] \node[srcnode] (c1) at (0,0) {\small{\textbf{Function} CKY-Algorithm($s,G$)}}; \node[srcnode,anchor=north west] (c21) at ([xshift=2em,yshift=0.4em]c1.south west) {\small{\textbf{foreach} ($j_1, j_2$): 1$ \leq j_1 \leq J$ and 1$ \leq j_2 \leq J$}}; \node[srcnode,anchor=north west] (c22) at ([xshift=2em,yshift=0.4em]c21.south west) {\small{Initialize $cell[j_1,j_2 ]$}}; \node[srcnode,anchor=north west] (c3) at ([xshift=-2em,yshift=0.4em]c22.south west) {\small{\textbf{for} $j_1$ = 1 to $J$}}; \node[srcnode,anchor=west] (c31) at ([xshift=5em]c3.east) {\small{// beginning of span}}; \node[srcnode,anchor=north west] (c4) at ([xshift=2em,yshift=0.4em]c3.south west) {\small{\textbf{for} $j_2$ = $j_1$ to $J$}}; \node[srcnode,anchor=north west] (c41) at ([yshift=0.4em]c31.south west) {\small{// ending of span}}; \node[srcnode,anchor=north west] (c5) at ([xshift=2em,yshift=0.4em]c4.south west) {\small{\textbf{for} $k$ = $j_1$ to $j_2$}}; \node[srcnode,anchor=north west] (c51) at ([yshift=0.4em]c41.south west) {\small{// partition of span}}; \node[srcnode,anchor=north west] (c6) at ([xshift=2em,yshift=0.4em]c5.south west) {\small{$hypos$ = Compose($cell[j_1, k], cell[k, j_2]$)}}; \node[srcnode,anchor=north west] (c7) at ([yshift=0.4em]c6.south west) {\small{$cell[j_1, j_2]$.update($hypos$)}}; \node[srcnode,anchor=north west] (c8) at ([xshift=-6em,yshift=0.4em]c7.south west) {\small{\textbf{return} $cell[1, J]$}}; \node[srcnode] (s1) at ([yshift=-2.5em]c8.south west) {\textbf{s:}}; \node[srcnode] (s2) at ([xshift=1em]s1.south east) {$s_1$}; \node[srcnode] (s3) at ([xshift=1em]s2.south east) {$s_2$}; \node[srcnode] (s4) at ([xshift=1em]s3.south east) {$s_3$}; \node[srcnode] (s5) at ([xshift=1em]s4.south east) {$s_4$}; \node[srcnode] (s6) at ([xshift=1em]s5.south east) {$s_5$}; \node[srcnode] (s7) at ([xshift=1em]s6.south east) {$s_6$}; \node[srcnode] (s8) at ([xshift=1em]s7.south east) {$s_7$}; \node[srcnode,anchor=center] (j1) at ([yshift=-1.4em]s3.south) {$j_1$}; \node[srcnode,anchor=center] (j2) at ([yshift=-1.4em]s7.south) {$j_2$}; \node[srcnode,anchor=center] (k) at ([xshift=1.5em,yshift=-1.5em]s4.south) {$k$}; \draw[->,thick] ([yshift=-0.1em]j1.north)--([yshift=0.1em]s3.south); \draw[->,thick] ([yshift=-0.1em]j2.north)--([yshift=0.1em]s7.south); \draw[->,thick] ([yshift=-0.1em]k.north)--([xshift=1.5em,yshift=0.1em]s4.south); \node [rectangle,inner sep=0.3em,rounded corners=1pt,very thick,dotted,draw=ugreen] [fit = (s3) (s7)] (box1) {}; \begin{pgfonlayer}{background} \node [rectangle,inner sep=0.2em,rounded corners=1pt,fill=blue!10!white] [fit = (c1) (c21) (c3) (c6) (c7) (c8)] (gl1) {}; \node [rectangle,inner sep=0.3em,rounded corners=1pt,fill=green!10!white] [fit = (s3) (s4)] (box2) {}; \node [rectangle,inner sep=0.3em,rounded corners=1pt,fill=red!10!white] [fit = (s5) (s7)] (box3) {}; \end{pgfonlayer} \end{scope} \end{tikzpicture} \end{center} \end{itemize} \end{frame} %%%------------------------------------------------------------------------------------------------------------ %%% CYK解码 \begin{frame}{CYK算法} % 看NiuTrans Manual \begin{itemize} \item 我们来看一个CYK算法的具体例子,给定一个上下无关文法以及一个单词\alert{aabbc},来判断该单词是否属于此文法,解析流程如下 \vspace{-0.3em} \begin{center} \begin{tikzpicture} \tikzstyle{alignmentnode} = [rectangle,fill=blue!30,minimum size=0.5em,text=white,inner sep=0.1pt] \tikzstyle{selectnode} = [rectangle,fill=green!20,minimum height=1.5em,minimum width=1.5em,inner sep=1.2pt] \tikzstyle{srcnode} = [anchor=south west] \tikzstyle{chartnode}=[rectangle,minimum size=1.3em,draw] \begin{scope}[scale=0.85] \node[anchor=south east] (g1) at (0,0) {\small{$\textrm{S} \to \textrm{AB}\ \ \ \textrm{A} \to \textrm{CD}\ \vert \ \textrm{CF}\ \ \ \textrm{B} \to \textrm{c}\ \vert \ \textrm{BE}$}}; \node[anchor=north west] (g2) at ([yshift=0.3em]g1.south west) {\small{$\textrm{C} \to \textrm{a}\ \ \ \ \textrm{D} \to \textrm{b}\ \ \ \ \textrm{E} \to \textrm{c}\ \ \ \ \textrm{F} \to \textrm{AD}$}}; \begin{pgfonlayer}{background} \node [rectangle,inner sep=0.1em,rounded corners=1pt,fill=green!10,drop shadow,draw=ugreen] [fit = (g1) (g2)] (gl1) {}; \end{pgfonlayer} \vspace{0.5em} \node [anchor=east] (s1) at ([xshift=-3em,yshift=-3em]g2.west) {a}; \node [anchor=north west] (s2) at ([yshift=-1em]s1.south west) {a}; \node [anchor=north west] (s3) at ([yshift=-1em]s2.south west) {b}; \node [anchor=north west] (s4) at ([yshift=-1em]s3.south west) {b}; \node [anchor=north west] (s5) at ([yshift=-1em]s4.south west) {c}; \node [alignmentnode,anchor=west] (cell11) at ([xshift=1em]s1.east) {}; \node [alignmentnode,anchor=west] (cell21) at ([xshift=1em]s2.east) {}; \node [alignmentnode,anchor=west] (cell22) at ([xshift=2em]cell21.east) {}; \node [alignmentnode,anchor=west] (cell31) at ([xshift=1em]s3.east) {}; \node [alignmentnode,anchor=west] (cell32) at ([xshift=2em]cell31.east) {}; \node [alignmentnode,anchor=west] (cell33) at ([xshift=2em]cell32.east) {}; \node [alignmentnode,anchor=west] (cell41) at ([xshift=1em]s4.east) {}; \node [alignmentnode,anchor=west] (cell42) at ([xshift=2em]cell41.east) {}; \node [alignmentnode,anchor=west] (cell43) at ([xshift=2em]cell42.east) {}; \node [alignmentnode,anchor=west] (cell44) at ([xshift=2em]cell43.east) {}; \node [alignmentnode,anchor=west] (cell51) at ([xshift=1em]s5.east) {}; \node [alignmentnode,anchor=west] (cell52) at ([xshift=2em]cell51.east) {}; \node [alignmentnode,anchor=west] (cell53) at ([xshift=2em]cell52.east) {}; \node [alignmentnode,anchor=west] (cell54) at ([xshift=2em]cell53.east) {}; \node [alignmentnode,anchor=west] (cell55) at ([xshift=2em]cell54.east) {}; \node [anchor=north] (l1) at ([yshift=-0.5em]cell51.south) {\scriptsize{$l$=1}}; \node [anchor=north] (l2) at ([yshift=-0.5em]cell52.south) {\scriptsize{$l$=2}}; \node [anchor=north] (l3) at ([yshift=-0.5em]cell53.south) {\scriptsize{$l$=3}}; \node [anchor=north] (l4) at ([yshift=-0.5em]cell54.south) {\scriptsize{$l$=4}}; \node [anchor=north] (l5) at ([yshift=-0.5em]cell55.south) {\scriptsize{$l$=5}}; \node [anchor=center] (y1) at ([xshift=-1.2em,yshift=1.3em]cell11.center) {\scriptsize{\blue 0}}; \node [anchor=center] (y2) at ([xshift=-1.2em,yshift=1.3em]cell21.center) {\scriptsize{\blue 1}}; \node [anchor=center] (y3) at ([xshift=-1.2em,yshift=1.3em]cell31.center) {\scriptsize{\blue 2}}; \node [anchor=center] (y4) at ([xshift=-1.2em,yshift=1.3em]cell41.center) {\scriptsize{\blue 3}}; \node [anchor=center] (y5) at ([xshift=-1.2em,yshift=1.3em]cell51.center) {\scriptsize{\blue 4}}; \node [anchor=center] (y6) at ([xshift=-1.2em,yshift=-1em]cell51.center) {\scriptsize{\blue 5}}; \node [anchor=west] (num) at ([xshift=15em,yshift=0.5em]s1.east) {\footnotesize{序号}}; \node [anchor=west] (kua) at ([xshift=1em]num.east) {\footnotesize{跨度}}; \node [anchor=west] (tui) at ([xshift=1em]kua.east) {\footnotesize{推导}}; \draw[-] ([yshift=-0.1em]num.south west)--([xshift=13em,yshift=-0.1em]num.south west); \visible<2->{ \node [anchor=west] (n1) at ([yshift=-1em]num.south west) {\footnotesize{1}}; \node [anchor=west] (k1) at ([yshift=-1em]kua.south west) {\footnotesize{[{\blue 0},{\blue 1}]}}; \node [anchor=west] (t1) at ([yshift=-1em]tui.south west) {\footnotesize{C $\to$ a}}; \node [anchor=center,selectnode,fill=black!10] (alig11) at (cell11.center) {\footnotesize{C}}; } \visible<3->{ \node [anchor=center] (n2) at ([yshift=-1.3em]n1.center) {\footnotesize{2}}; \node [anchor=center] (k2) at ([yshift=-1.3em]k1.center) {\footnotesize{[{\blue 1},{\blue 2}]}}; \node [anchor=west] (t2) at ([yshift=-1.3em]t1.west) {\footnotesize{C $\to$ a}}; \node [anchor=center,selectnode,fill=black!10] (alig21) at (cell21.center) {\footnotesize{C}}; } \visible<4->{ \node [anchor=center] (n3) at ([yshift=-1.3em]n2.center) {\footnotesize{3}}; \node [anchor=center] (k3) at ([yshift=-1.3em]k2.center) {\footnotesize{[{\blue 2},{\blue 3}]}}; \node [anchor=west] (t3) at ([yshift=-1.3em]t2.west) {\footnotesize{D $\to$ b}}; \node [anchor=center,selectnode,fill=black!10] (alig31) at (cell31.center) {\footnotesize{D}}; } \visible<5->{ \node [anchor=center] (n4) at ([yshift=-1.3em]n3.center) {\footnotesize{4}}; \node [anchor=center] (k4) at ([yshift=-1.3em]k3.center) {\footnotesize{[{\blue 3},{\blue 4}]}}; \node [anchor=west] (t4) at ([yshift=-1.3em]t3.west) {\footnotesize{D $\to$ b}}; \node [anchor=center,selectnode,fill=black!10] (alig41) at (cell41.center) {\footnotesize{D}}; } \visible<6->{ \node [anchor=center] (n5) at ([yshift=-1.3em]n4.center) {\footnotesize{5}}; \node [anchor=center] (k5) at ([yshift=-1.3em]k4.center) {\footnotesize{[{\blue 4},{\blue 5}]}}; \node [anchor=west] (t5) at ([yshift=-1.3em]t4.west) {\footnotesize{B $\to$ c , E $\to$ c}}; \node [anchor=center,selectnode,fill=black!10] (alig51) at (cell51.center) {\footnotesize{B,E}}; } \visible<7->{ \node [anchor=center] (n6) at ([yshift=-1.3em]n5.center) {\footnotesize{6}}; \node [anchor=center] (k6) at ([yshift=-1.3em]k5.center) {\footnotesize{[{\blue 0},{\blue 2}]}}; \node [anchor=west] (t6) at ([yshift=-1.3em]t5.west) {\footnotesize{none}}; \node [anchor=center,selectnode,fill=black!10] (alig22) at (cell22.center) {\footnotesize{}}; } \visible<8->{ \node [anchor=center] (n7) at ([yshift=-1.3em]n6.center) {\footnotesize{7}}; \node [anchor=center] (k7) at ([yshift=-1.3em]k6.center) {\footnotesize{[{\blue 1},{\blue 3}]}}; \node [anchor=west] (t7) at ([yshift=-1.3em]t6.west) {\footnotesize{A $\to$ CD}}; \node [anchor=center,selectnode,fill=black!10] (alig32) at (cell32.center) {\footnotesize{A}}; } \visible<9->{ \node [anchor=center] (sep1) at ([yshift=-0.8em]n7.center) {\footnotesize{...}}; \node [anchor=center] (n8) at ([yshift=-1.8em]n7.center) {\footnotesize{15}}; \node [anchor=center] (k8) at ([yshift=-1.8em]k7.center) {\footnotesize{[{\blue 0},{\blue 5}]}}; \node [anchor=west] (t8) at ([yshift=-1.8em]t7.west) {\footnotesize{S $\to$ AB}}; \node [anchor=center,selectnode,fill=black!10] (alig33) at (cell33.center) {\footnotesize{}}; \node [anchor=center,selectnode,fill=black!10] (alig42) at (cell42.center) {\footnotesize{}}; \node [anchor=center,selectnode,fill=black!10] (alig43) at (cell43.center) {\footnotesize{F}}; \node [anchor=center,selectnode,fill=black!10] (alig44) at (cell44.center) {\footnotesize{A}}; \node [anchor=center,selectnode,fill=black!10] (alig52) at (cell52.center) {\footnotesize{}}; \node [anchor=center,selectnode,fill=black!10] (alig53) at (cell53.center) {\footnotesize{}}; \node [anchor=center,selectnode,fill=black!10] (alig54) at (cell54.center) {\footnotesize{}}; \node [anchor=center,selectnode,fill=black!10] (alig55) at (cell55.center) {\footnotesize{S}}; } \end{scope} \end{tikzpicture} \end{center} \end{itemize} \end{frame} %%%------------------------------------------------------------------------------------------------------------ %%% CYK解码 \begin{frame}{CYK解码(续)} % 看NiuTrans Manual \begin{itemize} \item CYK解码提出了一种cell的数据结构,用来记录所有可能出现的翻译假设。 \begin{itemize} \item<2-> 对于每个源语句子,使用短语规则表初始化它的cell \item<3-> 自底向上对cell中的每个子cell进行重新组合(正向、反向) \item<4-> 计算每个推导的得分并记录下来,最终选择最优推导所对应的译文作为输出 \end{itemize} \end{itemize} \vspace{-0.8em} \begin{center} \begin{tikzpicture} \visible<2->{ \node [anchor=west] (s1) at (0,0) {\small{\textbf{进口}}}; \node [anchor=west] (s2) at ([xshift=4em]s1.east) {\small{\textbf{大幅度}}}; \node [anchor=west] (s3) at ([xshift=2.8em]s2.east) {\small{\textbf{下降}}}; \node [anchor=west] (s4) at ([xshift=2.8em]s3.east) {\small{\textbf{了}}}; \node [anchor=north] (t11) at ([yshift=-0.4em]s1.south) {\scriptsize{Imports}}; \node [anchor=west] (t12) at ([yshift=-1em]t11.west) {\scriptsize{The imports}}; \node [anchor=west] (t13) at ([yshift=-0.7em]t12.west) {\scriptsize{...}}; \node [anchor=north] (t21) at ([yshift=-0.4em]s2.south) {\scriptsize{drastically}}; \node [anchor=west] (t22) at ([yshift=-1em]t21.west) {\scriptsize{substantially}}; \node [anchor=west] (t23) at ([yshift=-0.7em]t22.west) {\scriptsize{...}}; \node [anchor=north] (t31) at ([yshift=-0.4em]s3.south) {\scriptsize{fall}}; \node [anchor=west] (t32) at ([yshift=-1em]t31.west) {\scriptsize{fallen}}; \node [anchor=west] (t33) at ([yshift=-0.7em]t32.west) {\scriptsize{...}}; \node [anchor=north] (t41) at ([yshift=-0.4em]s4.south) {\scriptsize{have}}; \node [anchor=west] (t42) at ([yshift=-1em]t41.west) {\scriptsize{had}}; \node [anchor=west] (t43) at ([yshift=-0.7em]t42.west) {\scriptsize{...}}; } \visible<3->{ \node [anchor=west] (t51) at ([xshift=2em,yshift=-1.8em]t23.west) {\scriptsize{drastically \ fallen}}; \node [anchor=west] (t53) at ([yshift=-0.7em]t51.west) {\scriptsize{...}}; } \visible<4->{ \node [anchor=west] (t61) at ([xshift=3em,yshift=-1.8em]t53.west) {\scriptsize{have \ drastically \ fallen}}; \node [anchor=west] (t62) at ([yshift=-0.7em]t61.west) {\scriptsize{...}}; \node [anchor=west] (t71) at ([xshift=-6em,yshift=-1.8em]t62.west) {\scriptsize{The \ imports}}; \node [anchor=west] (t72) at ([xshift=0.2em]t71.east) {\scriptsize{have \ drastically \ fallen}}; \node [anchor=west] (t73) at ([yshift=-0.7em]t71.west) {\scriptsize{...}}; } \begin{pgfonlayer}{background} \visible<2->{ \node [rectangle,inner sep=0.05em,fill=red!20] [fit = (t11) (t12) (t13)] (box1) {}; \node [rectangle,inner sep=0.05em,fill=green!20] [fit = (t21) (t22) (t23)] (box2) {}; \node [rectangle,inner sep=0.05em,fill=blue!20,minimum width=3em] [fit = (t31) (t32) (t33)] (box3) {}; \node [rectangle,inner sep=0.05em,fill=orange!20,minimum width=3em] [fit = (t41) (t42) (t43)] (box4) {}; } \visible<3->{ \node [rectangle,inner sep=0.05em,fill=purple!20] [fit = (t51) (t53)] (box5) {}; } \visible<4->{ \node [rectangle,inner sep=0.05em,fill=yellow!20] [fit = (t61) (t62)] (box6) {}; \node [rectangle,inner sep=0.05em,fill=black!10] [fit = (t71) (t72) (t73)] (box7) {}; } \end{pgfonlayer} \visible<2->{ \node [anchor=south east,inner sep=1pt,fill=black] (tl1) at (box1.south east) {\tiny{{\color{white} \textbf{1}}}}; \node [anchor=south east,inner sep=1pt,fill=black] (tl2) at (box2.south east) {\tiny{{\color{white} \textbf{2}}}}; \node [anchor=south east,inner sep=1pt,fill=black] (tl3) at (box3.south east) {\tiny{{\color{white} \textbf{3}}}}; \node [anchor=south east,inner sep=1pt,fill=black] (tl4) at (box4.south east) {\tiny{{\color{white} \textbf{4}}}}; } \visible<3->{ \draw [->,thick] (t22.south) .. controls +(south:0.5) and +(north:0.5) .. (t51.north); \draw [->,thick] (t32.south) .. controls +(south:0.5) and +(north:0.5) .. (t51.north); \node [anchor=south east,inner sep=1pt,fill=black] (tl5) at (box5.south east) {\tiny{{\color{white} \textbf{2-3}}}}; } \visible<4->{ \draw [->,thick] (t51.south) .. controls +(south:0.5) and +(north:0.5) .. (t61.north); \draw [->,thick] (t42.south) .. controls +(south:2) and +(north:0.5) .. (t61.north); \draw [->,thick] (t61.south) .. controls +(south:0.5) and +(north:0.5) .. (t72.north); \draw [->,thick] (t12.south) .. controls +(south:2.5) and +(north:0.8) .. (t71.north); \node [anchor=south east,inner sep=1pt,fill=black] (tl6) at (box6.south east) {\tiny{{\color{white} \textbf{2-4}}}}; \node [anchor=south east,inner sep=1pt,fill=black] (tl7) at (box7.south east) {\tiny{{\color{white} \textbf{1-4}}}}; } \end{tikzpicture} \end{center} \end{frame} %%%------------------------------------------------------------------------------------------------------------ \subsection{剪枝} %%%------------------------------------------------------------------------------------------------------------ %%% 立方剪枝 \begin{frame}{立方剪枝(Cube Pruning)} % 问题 % 解决方法 \begin{itemize} \item 前面介绍的解码方法由于搜索空间非常大,速度很慢,通常使用剪枝的方法来加速这个过程 \begin{itemize} \item 解码时对来自不同的cell进行合并时,第一个cell包含$n$个条目,第二个cell包含$m$个条目,就会产生$n \times m$个新条目 \vspace{0.8em} \begin{center} \begin{tikzpicture} \node [anchor=west] (s1) at (0,0) {\footnotesize{$\textrm{X} \to <\textrm{从}\ \textrm{X}_1,\ \textrm{from}\ \textrm{X}_1>$}}; \node [anchor=east] (s2) at ([yshift=-2em]s1.east) {\footnotesize{$\textrm{X} \to <\textrm{从}\ \textrm{X}_1,\ \textrm{since}\ \textrm{X}_1>$}}; \node [anchor=east] (s3) at ([yshift=-2em]s2.east) {\footnotesize{$\textrm{X} \to <\textrm{从}\ \textrm{X}_1,\ \textrm{from the}\ \textrm{X}_1>$}}; \node [anchor=east] (s4) at ([yshift=-2em]s3.east) {\footnotesize{$\textrm{X} \to <\textrm{从}\ \textrm{X}_1,\ \textrm{through}\ \textrm{X}_1>$}}; \node [anchor=west] (t1) at ([xshift=2.5em]s1.east) {\footnotesize{$\textrm{X} \to <\textrm{计划},\ \textrm{plan}>$}}; \node [anchor=west] (t2) at ([xshift=2.5em]s2.east) {\footnotesize{$\textrm{X} \to <\textrm{方案},\ \textrm{scheme}>$}}; \node [anchor=west] (t3) at ([xshift=2.5em]s3.east) {\footnotesize{$\textrm{X} \to <\textrm{项目},\ \textrm{project}>$}}; \node [anchor=west] (t4) at ([xshift=2.5em]s4.east) {\footnotesize{$\textrm{X} \to <\textrm{时期},\ \textrm{times}>$}}; \foreach \x in {1,2,...,4} \foreach \y in {1,2,...,4} \draw[->] ([xshift=0.1em]s\x.east) -- ([xshift=-0.1em]t\y.west); \begin{pgfonlayer}{background} \node [rectangle,inner sep=0.3em,fill=red!20] [fit = (s1) (s3) (s4)] (box1) {}; \node [rectangle,inner sep=0.3em,fill=green!20] [fit = (t1) (t2) (t3) (t4)] (box2) {}; \end{pgfonlayer} \end{tikzpicture} \end{center} \vspace{0.3em} \item 当$n$和$m$都很大时,会产生很多新的条目,可以通过限制栈的大小来舍弃大部分条目 \item 另一个思路就是如何只生成有机会被选中的条目,这就是\alert{立方剪枝(cube pruning)}的思想 \end{itemize} \end{itemize} \end{frame} %%%------------------------------------------------------------------------------------------------------------ %%% 立方剪枝 \begin{frame}{立方剪枝(Cube Pruning)(续)} % 问题 % 解决方法 \begin{itemize} \item 不是考虑所有可能的组合,而是从\alert{最优}候选开始选择 \begin{itemize} \item 根据代价估计对每个cell中的条目进行排序,最好的候选在最顶层,次优候选排在后面 \item<2-> 计算所有邻居的代价,选择最优的条目,再计算其邻居的代价,不断进行迭代 \item<3-> 直到产生的条目达到一定的数量(如栈容量)或者新加入的条目被其他剪枝策略丢弃,则终止算法 \end{itemize} \end{itemize} \begin{center} \begin{tikzpicture} \tikzstyle{alignmentnode} = [rectangle,fill=blue!30,minimum size=0.4em,text=white,inner sep=0.1pt] \tikzstyle{selectnode} = [rectangle,fill=green!20,minimum height=1.5em,minimum width=1.5em,inner sep=1.2pt] \tikzstyle{srcnode} = [rotate=45,anchor=south west] \begin{scope}[scale=0.85] \node [anchor=west] (s1) at (0,0) {\footnotesize{$\textrm{X} \to <\textrm{从}\ \textrm{X}_1,\ \textrm{from}\ \textrm{X}_1>$}}; \node [anchor=east] (s2) at ([yshift=-2em]s1.east) {\footnotesize{$\textrm{X} \to <\textrm{从}\ \textrm{X}_1,\ \textrm{since}\ \textrm{X}_1>$}}; \node [anchor=east] (s3) at ([yshift=-2em]s2.east) {\footnotesize{$\textrm{X} \to <\textrm{从}\ \textrm{X}_1,\ \textrm{from the}\ \textrm{X}_1>$}}; \node [anchor=east] (s4) at ([yshift=-2em]s3.east) {\footnotesize{$\textrm{X} \to <\textrm{从}\ \textrm{X}_1,\ \textrm{through}\ \textrm{X}_1>$}}; \node [anchor=center,alignmentnode] (alig1) at ([xshift=2.5em]s1.east) {}; \node [anchor=center,alignmentnode] (alig11) at ([xshift=2.2em]alig1.center) {}; \node [anchor=center,alignmentnode] (alig12) at ([xshift=2.2em]alig11.center) {}; \node [anchor=center,alignmentnode] (alig13) at ([xshift=2.2em]alig12.center) {}; \node [anchor=center,alignmentnode] (alig2) at ([xshift=2.5em]s2.east) {}; \node [anchor=center,alignmentnode] (alig21) at ([xshift=2.2em]alig2.center) {}; \node [anchor=center,alignmentnode] (alig22) at ([xshift=2.2em]alig21.center) {}; \node [anchor=center,alignmentnode] (alig23) at ([xshift=2.2em]alig22.center) {}; \node [anchor=center,alignmentnode] (alig3) at ([xshift=2.5em]s3.east) {}; \node [anchor=center,alignmentnode] (alig31) at ([xshift=2.2em]alig3.center) {}; \node [anchor=center,alignmentnode] (alig32) at ([xshift=2.2em]alig31.center) {}; \node [anchor=center,alignmentnode] (alig33) at ([xshift=2.2em]alig32.center) {}; \node [anchor=center,alignmentnode] (alig4) at ([xshift=2.5em]s4.east) {}; \node [anchor=center,alignmentnode] (alig41) at ([xshift=2.2em]alig4.center) {}; \node [anchor=center,alignmentnode] (alig42) at ([xshift=2.2em]alig41.center) {}; \node [anchor=center,alignmentnode] (alig43) at ([xshift=2.2em]alig42.center) {}; \node[srcnode] (c1) at ([yshift=1em]alig1.north) {\footnotesize{plan}}; \node[srcnode] (c2) at ([yshift=1em]alig11.north) {\footnotesize{scheme}}; \node[srcnode] (c3) at ([yshift=1em]alig12.north) {\footnotesize{project}}; \node[srcnode] (c4) at ([yshift=1em]alig13.north) {\footnotesize{times}}; \visible<2->{ \node [anchor=center,selectnode] (c1) at (alig1.center) {\footnotesize{2.1}}; } \visible<3->{ \node [anchor=center,selectnode,fill=red!20] (c2) at (alig11.center) {\footnotesize{5.1}}; \node [anchor=center,selectnode,fill=red!20] (c3) at (alig2.center) {\footnotesize{5.5}}; } \visible<4->{ \node [anchor=center,selectnode] (c2) at (alig11.center) {\footnotesize{5.1}}; \node [anchor=center,selectnode] (c3) at (alig2.center) {\footnotesize{5.5}}; \node [anchor=center,selectnode,fill=red!20] (c4) at (alig12.center) {\footnotesize{8.2}}; \node [anchor=center,selectnode,fill=red!20] (c5) at (alig21.center) {\footnotesize{8.5}}; \node [anchor=center,selectnode,fill=red!20] (c6) at (alig3.center) {\footnotesize{7.7}}; } \end{scope} \end{tikzpicture} \end{center} \end{frame} %%%------------------------------------------------------------------------------------------------------------ %%% BLEU等评价 \begin{frame}{效果} % 实验结果 \begin{itemize} \item 从实验结果中可以看出,基于层次短语的翻译模型性能要优于基于短语的翻译模型 \item 选择使用层次短语信息实际上增加了模型的复杂度,但是可以通过借鉴基于短语的翻译模型模型以及CYK解码和立方剪枝等技术来解决 \item 可以考虑加入更多句法信息来进一步提升模型性能 \end{itemize} %\vspace{-1em} \begin{center} \begin{tabular}{l | l | l | l} \multicolumn{2}{c|}{模型} & 开发集 & 测试集 \\ \multicolumn{2}{c|}{} & (BLEU[\%]) & (BLEU[\%]) \\ \hline \multicolumn{2}{l|}{短语(Moses)} & 36.51 & 34.93 \\ \multicolumn{2}{l|}{短语(NiuTrans)} & 36.99 & 35.29 \\ \hline \multicolumn{2}{l|}{层次短语(Moses)} & 36.65 & 34.79 \\ \multicolumn{2}{l|}{层次短语(NiuTrans)} & 37.41 & 35.35 \\ \hline \end{tabular} \end{center} \vspace{-0.5em} \scriptsize{* 以上结果来自 NiuTrans: An Open Source Toolkit for Phrase-based Machine Translation}\\ \scriptsize{* 开发集:NIST MT03,测试集:NIST MT05}\\ \end{frame} %%%------------------------------------------------------------------------------------------------------------ \section{基于语言学句法的模型} %%%------------------------------------------------------------------------------------------------------------ %%% 第三节的内容 \begin{frame}{Outline} \vspace{5em} \begin{tcolorbox}[enhanced,size=normal,left=2mm,right=1mm,colback=red!5!white,colframe=red!75!black,drop fuzzy shadow] {\Large \begin{center} \textbf{基于语言学句法的翻译模型} \end{center} } \end{tcolorbox} \end{frame} %%%------------------------------------------------------------------------------------------------------------ %%% 句法树(层次短语) \begin{frame}{层次短语规则推导的句法树表示} \begin{itemize} \item 层次短语规则的使用本质上是对(源语)句子建立树状结构,这反应了文法对于字符串分析得到的推导\\ \vspace{-0.4em} \begin{displaymath} \visible<2->{d = r_3}\visible<3->{\circ r_1}\visible<4->{ \circ r_4}\visible<5->{ \circ r_2}\visible<6->{ \circ r_5}\visible<7->{ \circ r_2}\visible<8->{ \circ r_7}\visible<9->{ \circ r_6}\visible<10->{ \circ r_2} \end{displaymath} \end{itemize} \vspace{-1em} \begin{center} \begin{tikzpicture} {\scriptsize \begin{scope}[sibling distance=0pt, level distance = 27pt] {\scriptsize \Tree[.\node(n1){\textbf{S}}; [.\node(n2){\textbf{S}}; [.\node(n3){\textbf{S}}; [.\node(n4){\textbf{S}}; [.\node(n5){\textbf{X}}; \node(cw1){但}; ] ] [.\node(n6){\textbf{X}}; \node(cw2){美国}; ] ] [.\node(n7){\textbf{X}}; [. \node(cw3){并没有}; ] [. \node(cw4){执行}; ] ] ] [.\node(n8){\textbf{X}}; [. \node(cw5){世贸}; ] [.\node(n9){\textbf{X}}; [. \node(cw6){组织}; ] [. \node(cw7){的}; ] ] [. \node(cw8){裁决}; ] ] ] \node<10->[circle, inner sep = 0.5pt, fill=blue!90!white, anchor = west] (l1) at ([xshift=-0.3em]n1.north east) {{\color{white} \tiny{$r_2$}}}; \node<7->[circle, inner sep = 0.5pt, fill=blue!90!white, anchor = east] (l2) at ([xshift=0.3em]n2.north west) {{\color{white} \tiny{$r_2$}}}; \node<5->[circle, inner sep = 0.5pt, fill=blue!90!white, anchor = east] (l2) at ([xshift=0.3em]n3.north west) {{\color{white} \tiny{$r_2$}}}; \node<3->[circle, inner sep = 0.5pt, fill=blue!90!white, anchor = east] (l2) at ([xshift=0.3em]n4.north west) {{\color{white} \tiny{$r_1$}}}; \visible<2->{\node[circle, inner sep = 0.5pt, fill=blue!90!white, anchor = east] (l2) at ([xshift=0.3em]n5.north west) {{\color{white} \tiny{$r_3$}}};} \node<4->[circle, inner sep = 0.5pt, fill=blue!90!white, anchor = west] (l1) at ([xshift=-0.3em]n6.south east) {{\color{white} \tiny{$r_4$}}}; \node<6->[circle, inner sep = 0.5pt, fill=blue!90!white, anchor = west] (l1) at ([xshift=-0.3em]n7.north east) {{\color{white} \tiny{$r_5$}}}; \node<9->[circle, inner sep = 0.5pt, fill=blue!90!white, anchor = west] (l1) at ([xshift=-0.3em]n8.north east) {{\color{white} \tiny{$r_6$}}}; \node<8->[circle, inner sep = 0.5pt, fill=blue!90!white, anchor = west] (l1) at ([xshift=-0.3em]n9.north east) {{\color{white} \tiny{$r_7$}}}; } \end{scope} \begin{scope}[xshift = 1.8in, yshift = 0.1in] \node (rules) {\textbf{层次短语翻译规则:}}; \draw[-] (rules.south west)--([xshift=1.8in]rules.south west); \node[anchor=north west] (r1) at ([yshift=-0.2em]rules.south west) {$r_1$}; \node[anchor=west] (rc1) at ([xshift=0.0em]r1.east) {$\textrm{S} \; \to \; \langle\ \textrm{X}_1, \; \; \textrm{X}_1\ \rangle$}; \node[anchor=north west] (r2) at ([yshift=-0.4em]r1.south west) {$r_2$}; \node[anchor=west] (rc2) at ([xshift=0em]r2.east) {$\textrm{S} \; \to \; \langle\ \textrm{S}_1 \; \textrm{X}_2, \; \; \textrm{S}_1 \; \textrm{X}_2\ \rangle$}; \node[anchor=north west] (r3) at ([yshift=-0.4em]r2.south west) {$r_3$}; \node[anchor=west] (rc3) at ([xshift=0em]r3.east) {$\textrm{X} \; \to \; \langle\ \text{但}, \; \; \text{but}\ \rangle$}; \node[anchor=north west] (r4) at ([yshift=-0.4em]r3.south west) {$r_4$}; \node[anchor=west] (rc4) at ([xshift=0em]r4.east) {$\textrm{X} \; \to \; \langle\ \text{美国}, \; \; \text{the U.S.}\ \rangle$}; \node[anchor=north west] (r5) at ([yshift=-0.4em]r4.south west) {$r_5$}; \node[anchor=west] (rc5) at ([xshift=0em]r5.east) {$\textrm{X} \; \to \; \langle\ \text{并没有} \; \text{执行}, \; \; \text{}$}; \node[anchor=north west] (r52) at ([yshift=-0.4em]r5.south west) {{\color{white} $r_5$}}; \node[anchor=west] (rc52) at ([xshift=2.9em]r52.east) {$\text{has not implemented}\ \rangle$}; \node[anchor=north west] (r6) at ([yshift=-0.4em]r52.south west) {$r_6$}; \node[anchor=west] (rc6) at ([xshift=0em]r6.east) {$\textrm{X} \; \to \; \langle\ \text{世贸} \; \textrm{X}_1 \; \text{裁决}, $}; \node[anchor=north west] (r61) at ([yshift=-0.4em]r6.south west) {{\color{white} $r_6$}}; \node[anchor=west] (rc61) at ([xshift=2.9em]r61.east) {$\text{the decision} \; \textrm{X}_1 \; \text{the WTO}\ \rangle$}; \node[anchor=north west] (r7) at ([yshift=-0.4em]r61.south west) {$r_7$}; \node[anchor=west] (rc7) at ([xshift=0em]r7.east) {$\textrm{X} \; \to \; \langle\ \text{组织 的}, \; \; \text{of}\ \rangle$}; \end{scope} } \end{tikzpicture} \end{center} \end{frame} %%%------------------------------------------------------------------------------------------------------------ %%% 句法树(语言学) \begin{frame}{语言学句法树} \begin{itemize} \item 但是,层次短语的分析过程还是一种形式文法的推导,另一种自然的想法是把句子表示成语言学上的句法树,这种表示更符合人类对语言的认知 \begin{itemize} \item 比如,``执行''后面接名词短语NP \item 还有,``并没有''作用于后面的动词短语``执行...'' \end{itemize} \end{itemize} \begin{center} \begin{tikzpicture} \begin{scope}[xshift = -0.3in, sibling distance=5pt, level distance = 24pt] {\footnotesize \Tree[.IP [.ADVP [.AD \node(cw1){但}; ] ] [.NP [.NR \node(cw2){美国}; ] ] [.VP [.ADVP [.AD \node(cw3){并没有}; ] ] [.VP [.VP [.VV \node(cw4){执行}; ] ] [.NP [.NP [.NN \node(cw5){世贸}; ] [.NN \node(cw6){组织}; ] ] [.DEC \node(cw7){的}; ] [.NP [.NN \node(cw8){裁决}; ] ] ] ] ] ] } \end{scope} \end{tikzpicture} \end{center} \end{frame} %%%------------------------------------------------------------------------------------------------------------ %%% 在MT中使用句法带来的好处 \begin{frame}{使用句法的好处} \begin{itemize} \item 短语结构树可以仍容易进行远距离调序的建模,同时丰富的句法功能标记有助于翻译中的消歧和生成 \end{itemize} \vspace{-1.0em} \begin{center} \begin{tikzpicture} \begin{scope}[yshift=-0in] {\tiny \node[anchor=west] (ref) at (0,0) {\textbf{参考答案:} The Chinese star performance troupe presented a wonderful Peking opera as well as singing and dancing }; \node[anchor=north west] (ref2) at (ref.south west) {{\color{white} \textbf{Reference:}} performance to Hong Kong audience .}; \node[anchor=north west] (hifst) at (ref2.south west) {\textbf{层次短语系统:} Star troupe of China, highlights of Peking opera and dance show to the audience of Hong Kong .}; \node[anchor=north west] (synhifst) at (hifst.south west) {\textbf{句法系统:} Chinese star troupe}; \node[anchor=west, fill=green!20!white, inner sep=0.25em] (synhifstpart2) at (synhifst.east) {presented}; \node[anchor=west, fill=blue!20!white, inner sep=0.25em] (synhifstpart3) at ([xshift=0.2em]synhifstpart2.east) {a wonderful Peking opera singing and dancing}; \node[anchor=west, fill=red!20!white, inner sep=0.40em] (synhifstpart4) at ([xshift=0.2em]synhifstpart3.east) {to}; \node[anchor=west, fill=purple!20!white, inner sep=0.25em] (synhifstpart5) at ([xshift=0.2em]synhifstpart4.east) {Hong Kong audience}; \node[anchor=west] (synhifstpart6) at (synhifstpart5.east) {.}; \node[anchor=north west] (input) at ([yshift=-10em]synhifst.south west) {\textbf{源语句法树:}}; \node<2->[anchor=north west] (rule) at ([yshift=-2em]synhifst.south west) {句法翻译规则:}; \node<2->[anchor=north west] (rule2) at ([yshift=0.2em]rule.south west) {(VP BA(将) $x_1$:NP $x_2$:VP PP(P(给) $x_3$:NP))}; \node<2->[anchor=north west] (rule3) at ([yshift=0.2em]rule2.south west) {$\to$ $x_2$ $x_1$ to $x_3$ }; \begin{scope}[scale = 0.9, grow'=up, sibling distance=15pt, level distance=23pt, xshift=2.8in, yshift=-2.8in] \Tree[.\node(tn1){IP}; [.\node(tn2){NP}; \edge[roof]; \node[](seg1){中国$_1$ 明星$_2$ 艺术团$_3$}; ] [.\node(tn3){VP}; [.\node(tn4){BA}; \node[fill=red!20!white](seg2){将$_4$}; ] [.\node(tn5){NP}; \edge[roof]; \node[fill=blue!20!white](seg3){一$_5$ 台$_6$ 精彩$_7$ 的$_8$ 京剧$_9$ 歌舞$_{10}$}; ] [.\node(tn6){VP}; [.\node(tn7){VV}; \node[fill=green!20!white](seg4){呈现$_{11}$}; ] ] [.\node(tn8){PP}; [.\node(tn9){P}; \node[fill=red!20!white](seg5){给$_{12}$}; ] [.\node(tn10){NP}; \edge[roof]; \node[fill=purple!20!white](seg6){香港$_{13}$ 观众$_{14}$}; ] ] ] [.\node(tn11){.}; ] ] \end{scope} \path<2-> [draw,dotted,thick,blue] (tn3.south west) -- (tn4.south west) -- (seg2.north west) -- (seg2.north east) -- ([xshift=0.1em]tn4.north east) -- ([xshift=2.8em]tn6.north east) -- (seg5.north west) -- (seg5.north east) -- ([yshift=-1.7em]seg5.south east) -- (tn10.north east) -- (tn10.south east) -- (tn8.south east) -- (tn3.south east) -- (tn3.south west); \path [draw,thick,->,dashed] (seg2.north) .. controls +(north:1.0) and +(south:1.5) .. (synhifstpart4.south); \path [draw,thick,->,dashed] (seg3.north) -- (synhifstpart3.south); \path [draw,thick,->,dashed] (seg4.north) -- (synhifstpart2.south); \path [draw,thick,->,dashed] (seg5.north) .. controls +(north:0.5) .. (synhifstpart4.south); \path [draw,thick,->,dashed] (seg6.north) -- (synhifstpart5.south); \path<2-> [draw,<->,dotted,blue,thick] ([yshift=-0.5em]seg2.south west) .. controls +(west:1.0) and +(south:0.5) .. ([xshift=-1.5em]rule2.south); } \end{scope} \end{tikzpicture} \end{center} \end{frame} %%%------------------------------------------------------------------------------------------------------------ %%% 使用句法信息的一些思考 \begin{frame}{有很多句法知识可以使用} \begin{itemize} \item \textbf{句法树形式选择} - 句法的表示形式有很多,侧重点也不同,机器翻译需要何种句法信息? \begin{center} \begin{tikzpicture} {\footnotesize \begin{scope}[xshift = -0.3in, sibling distance=3pt, level distance = 18pt] \Tree[.S [.NP [.NNP \node(w1){Messi}; ] ] [.VP [.VBD \node(w2){hit}; ] [.NP [.DT \node(w3){the}; ] [.NN \node(w4){ball}; ] ] ] ] \node [anchor=north west] (cap1) at ([yshift=-2.0em,xshift=2.0em]w1.south west) {\footnotesize{(a) 短语结构树}}; \node [anchor=west] (t1) at ([xshift=3em,]w4.east) {Messi}; \node [anchor=west] (t2) at ([xshift=0.5em,]t1.east) {hit}; \node [anchor=west] (t3) at ([xshift=0.5em,]t2.east) {the}; \node [anchor=west] (t4) at ([xshift=0.5em,]t3.east) {ball}; \draw [->] ([xshift=0em]t3.north) .. controls +(north:1em) and +(north:1em) .. ([xshift=-0.2em]t4.north); \draw [->] ([xshift=0.2em]t4.north) .. controls +(north:2.5em) and +(north:2.5em) .. ([xshift=0.2em]t2.north); \draw [->] ([xshift=0.0em]t1.north) .. controls +(north:2.5em) and +(north:2.5em) .. ([xshift=-0.2em]t2.north); \node [anchor=north west] (cap2) at ([yshift=-0.2em,xshift=-0.5em]t2.south west) {\footnotesize{(b) 依存树}}; \end{scope} } \end{tikzpicture} \end{center} \item<2-> \textbf{如何获取句法知识} - 句法信息由谁提供? \begin{itemize} \item 自动句法分析器可以提供句法信息,分析器可以从树库中自动学习(回忆第二章),因此得到的句法分析结果和树库标注是一致的 \item 不使用句法分析器,而是让机器翻译自动学习适合翻译任务的句法结构 \end{itemize} \end{itemize} \end{frame} %%%------------------------------------------------------------------------------------------------------------ %%% 句法树中的一些基本概念 \begin{frame}{术语} \begin{itemize} \item 这里我们\alert{假设}: \begin{itemize} \item 使用短语结构树表示句法信息 \item 句法树由句法分析器自动生成 \end{itemize} \item 涉及的术语 \end{itemize} \vspace{-0.8em} {\small \begin{center} \begin{tabular}{l | l} 术语 & 说明 \\ \hline 翻译规则 & 翻译的最小单元(或步骤) \\ \hline 推导 & 由一系列规则组成的分析或翻译过程,推导可以 \\ & 被看做是规则的序列 \\ \hline 规则表 & 翻译规则的存储表示形式,可以高效进行查询\\ \hline 层次短语模型 & 基于同步上下文无关文法的翻译模型,非终结符\\ & 只有S和X两种,规则和文法并不需要符合语言学\\ & 句法约束\\ \hline 树到串模型 & 一类翻译模型,它使用源语语言学句法树,因此\\ & 翻译可以被看做从一棵句法树到词串的转换\\ \hline 串到树模型 & 一类翻译模型,它使用目标语语言学句法树,因\\ & 此翻译可以被看做从词串到句法树的转换\\ \end{tabular} \end{center} } \end{frame} %%%------------------------------------------------------------------------------------------------------------ %%% 句法树中的一些基本概念(续) \begin{frame}{术语(续)} \vspace{-0.5em} {\small \begin{center} \begin{tabular}{l | l} 术语 & 说明 \\ \hline 树到树模型 & 一类翻译模型,它同时使用源语和目标语语言学\\ & 句法树,因此此翻译可以被看做从句法树到句法\\ & 树的转换 \\ \hline 基于句法 & 使用语言学句法 \\ \hline 基于树 & (源语言)使用树结构(大多指句法树)\\ \hline 基于串 & (源语言)使用词串,比如串到树的翻译系统的\\ & 解码器一般都是基于串的解码方法 \\ \hline 基于森林 & (源语言)使用句法森林,这里森林只是对多个\\ & 句法树的一种压缩表示 \\ \hline 词汇化规则 & 含有终结符的规则 \\ \hline 非词汇规则 & 不含有终结符的规则 \\ \hline 句法约软束 & 不强制规则推导匹配语言学句法树,通常把 \\ & 句法信息作为特征使用 \\ \hline 句法硬约束 & 强制推导必须符合语言学句法树,不符合的 \\ & 推导会被过滤掉 \end{tabular} \end{center} } \end{frame} %%%------------------------------------------------------------------------------------------------------------ %%% 句法模型的分类 \begin{frame}{句法模型的分类} \begin{center} \begin{tikzpicture} \begin{scope} \tikzstyle{cnode} = [minimum width=7.0em,minimum height=2.5em,rounded corners=0.2em]; \tikzstyle{xnode} = [minimum width=4.5em,minimum height=2.5em,rounded corners=0.2em]; \node[xnode,anchor=west,fill=red!30,align=left] (itg) at (0,0) {\footnotesize{反向转录}\\\footnotesize{文法}}; \node[xnode,anchor=west,fill=red!30,align=left] (hiero) at ([xshift=0.5em]itg.east) {\footnotesize{层次短语}\\\footnotesize{模型}}; \node[xnode,anchor=west,fill=blue!30,align=left] (s2t) at ([xshift=0.5em]hiero.east) {\footnotesize{串到树}\\\footnotesize{模型}}; \node[xnode,anchor=west,fill=blue!30,align=left] (t2s) at ([xshift=0.5em]s2t.east) {\footnotesize{树到串}\\\footnotesize{模型}}; \node[xnode,anchor=west,fill=blue!30,align=left] (t2t) at ([xshift=0.5em]t2s.east) {\footnotesize{树到树}\\\footnotesize{模型}}; \node[cnode,anchor=south,fill=red!30,align=left] (cat1) at ([xshift=-0.2em,yshift=2em]hiero.north west) {\footnotesize{基于形式文法}\\\footnotesize{的模型}}; \node[cnode,anchor=south,fill=blue!30,align=left] (cat2) at ([xshift=-0.0em,yshift=2em]t2s.north) {\footnotesize{基于语言学}\\\footnotesize{句法的模型}}; \node[cnode,anchor=south,minimum width=10.0em,fill=green!30,align=center] (cat0) at ([xshift=-3em,yshift=2em]cat2.north west) {\footnotesize{(广义上)}\\\footnotesize{基于句法的模型}}; \draw [-,thick] ([yshift=0.1em,xshift=1em]cat1.north) -- ([xshift=-1.5em,yshift=-0.1em]cat0.south); \draw [-,thick] ([yshift=0.1em,xshift=-1em]cat2.north) -- ([xshift=1.5em,yshift=-0.1em]cat0.south); \draw [-,thick] ([yshift=0.1em]itg.north) -- ([xshift=-0.5em,yshift=-0.1em]cat1.south); \draw [-,thick] ([yshift=0.1em]hiero.north) -- ([xshift=0.5em,yshift=-0.1em]cat1.south); \draw [-,thick] ([yshift=0.1em]s2t.north) -- ([xshift=-0.8em,yshift=-0.1em]cat2.south); \draw [-,thick] ([yshift=0.1em]t2s.north) -- ([xshift=-0.0em,yshift=-0.1em]cat2.south); \draw [-,thick] ([yshift=0.1em]t2t.north) -- ([xshift=0.8em,yshift=-0.1em]cat2.south); \node [anchor=north] (itglabel) at (itg.south) {\scriptsize{(Wu, 1995)}}; \node [anchor=north] (hierolabel) at (hiero.south) {\scriptsize{(Chiang, 2005)}}; \node [anchor=north,align=left] (s2tlabel) at (s2t.south) {\scriptsize{(Galley et al.,}\\\scriptsize{\ 2004; 2006)}}; \node [anchor=north,align=left] (t2slabel) at (t2s.south) {\scriptsize{(Liu et al.,}\\\scriptsize{\ 2006)}}; \node [anchor=north,align=left] (t2tlabel) at (t2t.south) {\scriptsize{(Eisner, 2003)}}; \end{scope} \end{tikzpicture} \end{center} \vspace{-0.8em} \begin{itemize} \item 实际上,上面仅仅只是一种分类方法,还有很多其它分类标准,比如:句法软约束 vs 句法硬约束,基于树 vs. 基于串,等等 \end{itemize} \end{frame} %%%------------------------------------------------------------------------------------------------------------ %%% 对比 \begin{frame}{句法模型对比} \begin{tabular}{l | l | l | l | l} & 形式句法 & \multicolumn{3}{c}{语言学句法} \\ \cline{3-5} & & 树到串 & 串到树 & 树到树 \\ \hline 源语句法 & No & Yes & No & Yes \\ 目标语句法 & No & No & Yes & Yes \\ 基于串的解码 & Yes & No & Yes & Yes \\ 基于树的解码 & No & Yes & No & Yes \\ 健壮性 & High & Mid & Mid & Low \end{tabular} \vspace{0.3em} \begin{itemize} \item<2-> 下面,以树到串/串到树为例进行介绍,一些代表性论文\\ \begin{itemize} \item \textbf{What's in a translation rule?}\\ \textbf{Galley et al., 2004, In Proc. of HLT-NAACL} \item \textbf{Scalable Inference and Training of Context-Rich Syntactic Translation Models}\\ \textbf{Galley et al., 2006, In Proc. of ACL} \item \textbf{Tree-to-String Alignment Template for Statistical Machine Translation}\\ \textbf{Liu et al., 2006, In Proc. of ACL} \end{itemize} \end{itemize} \end{frame} %%%------------------------------------------------------------------------------------------------------------ \subsection{基于树结构的文法} %%%------------------------------------------------------------------------------------------------------------ %%% 树结构的表示 \begin{frame}{树结构的表示} \begin{itemize} \item 基于句法的翻译模型核心是对\alert{树结构}进行建模。对于树到串和串到树模型,本质上是要到树和串的对应关系;而对于树到树模型,本质上是找到树到树的对应 \begin{tikzpicture} {\footnotesize \begin{scope}[sibling distance=3pt, level distance = 22pt] \Tree[.S [.NN ] [.VP [.AD ] [.VP [.VV ] [.AS ] ] ] ] \end{scope} \begin{scope}[xshift=1in,yshift=0.25in] \node [anchor=north west,align=left] (string1) at (0,0) {[S \\\hspace{1em}NN\\\hspace{1em}VP[\\\hspace{2.5em}AD \\\hspace{2.5em}VP[\\\hspace{4em}VV \\\hspace{4em}AS]]]}; \end{scope} \begin{scope}[xshift=2.5in,yshift=-0.80in] \node [anchor=west,align=left] (string2) at (0,0) {(S NN VP(AD \\ VP(VV AS)))}; \end{scope} \node [anchor=north west] (cap1) at (-1.5em,-1in) {\scriptsize{(a) 树状表示}}; \node [anchor=west] (cap2) at ([xshift=0.5in]cap1.east) {\scriptsize{(b) 序列表示(缩进)}}; \node [anchor=west] (cap3) at ([xshift=0.5in]cap2.east) {\scriptsize{(c) 序列表示}}; } \end{tikzpicture} \item<2-> 通常,可以用基于树结构的翻译规则来描述上述过程,有两种情况: \begin{enumerate} \item 树到串翻译规则 - 对应树到串、串到树模型 \item 树到树翻译规则 - 对应树到树模型 \end{enumerate} 这里用一种统一的形式描述上述规则 \end{itemize} \end{frame} %%%------------------------------------------------------------------------------------------------------------ %%% 基于树结构的文法 \begin{frame}{基于树结构的文法} \begin{itemize} \item 为了描述任意树和串之间的转换,可以定义如下文法 \end{itemize} \begin{beamerboxesrounded}[upper=uppercolblue,lower=lowercolblue,shadow=true]{定义 - 基于树结构的文法} {\small 一个基于树结构的文法由七部分构成$(N_s, N_t, T_s, T_t, I_s, I_t, R)$,其中 \\ 1. $N_s$和$N_t$是源语和目标语非终结符集合 \\ 2. $T_s$和$T_t$是源语言和目标语终结符集合\\ 3. $I_s \subseteq N_s$和$I_t \subseteq N_t$是源语言和目标语起始非终结符集合\\ 4. $R$是规则集合,每条规则$r \in R$有如下形式 \begin{displaymath} \langle\ \alpha_h, \beta_h\ \rangle \to \langle\ \alpha_r, \beta_r, \sim\ \rangle \end{displaymath} 其中,规则左部由非终结符$\alpha_h \in N_s$和$\beta_h \in N_t$构成;规则右部由三部分组成,$\alpha_r$表示由源语言终结符和非终结符组成的树结构;$\beta_r$ 表示由目标语言终结符和非终结符组成的树结构;$\sim$表示$\alpha_r$和$\beta_r$中叶子非终结符的1-1对应关系 } \end{beamerboxesrounded} \end{frame} %%%------------------------------------------------------------------------------------------------------------ %%% 规则实例(树到树) \begin{frame}{基于树的翻译规则} \begin{itemize} \item 上述文法定义了一种树结构到树结构的映射,例子: \end{itemize} \begin{center} \begin{tikzpicture} {\scriptsize \begin{scope}[sibling distance=5pt, level distance = 22pt] \Tree[.\node(s1){VP}; [.\node(s2){PP}; ] [.\node(s3){VP}; [.\node(s4){VV}; \node[fill=white](w1){表示}; ] [.\node(s5){NN}; ] ] ] \end{scope} \begin{scope}[xshift=2in,sibling distance=5pt, level distance = 22pt] \Tree[.\node(t1){VP}; [.\node(t2){VBZ}; \node(w2){was}; ] [.\node(t3){VP}; [.\node(t4){VBN}; ] [.\node(t5){PP}; ] ] ] \end{scope} \begin{pgfonlayer}{background} \node [anchor=west] (arrow) at ([xshift=3em]s5.east) {\Large{\textbf{$\to$}}}; \visible<2->{ \node [inner sep=0,fill=orange!20] [fit = (s2)] (snode1) {}; \node [inner sep=0,fill=green!20] [fit = (s5)] (snode2) {}; \node [inner sep=0,fill=green!20] [fit = (t4)] (tnode1) {}; \node [inner sep=0,fill=orange!20] [fit = (t5)] (tnode2) {}; \draw [<->,dotted,thick] (snode2.south) ..controls +(south:2.5em) and +(south:2.5em).. (tnode1.south); \draw [<->,dotted,thick] (snode1.south) ..controls +(south:8em) and +(south:4.5em).. (tnode2.south); } \end{pgfonlayer} } \end{tikzpicture} \end{center} \vspace{-2.0em} \begin{itemize} \item 由规则定义$\langle\ \alpha_h, \beta_h\ \rangle \to \langle\ \alpha_r, \beta_r, \sim\ \rangle$知道 \vspace{-1.3em} {\small \begin{eqnarray} \langle\ \alpha_h,\beta_h\ \rangle & = & \langle\ \textrm{VP}, \textrm{VP}\ \rangle \nonumber \\ \alpha_r & = & \textrm{VP(PP:}x\ \textrm{VP(VV(表示) NN:}x)) \nonumber \\ \beta_r & = & \textrm{VP(VBZ(was) VP(VBN:}x\ \textrm{PP:}x)) \nonumber \\ \sim & = & \{1-2,2-1\} \nonumber \end{eqnarray} } \vspace{-1.7em} \visible<2->{ $x$表示叶子非终结符(可替换的变量),显然这是调序规则 } \end{itemize} \end{frame} %%%------------------------------------------------------------------------------------------------------------ %%% 规则实例(树到树) \begin{frame}{基于树的翻译规则(续)} \begin{itemize} \item 可以省略``$\sim$'',把规则重新写为易于计算机处理的格式 \\ \vspace{-1em} \begin{eqnarray} \langle\ \textrm{VP}, \textrm{VP}\ \rangle & \to & \langle\ \textrm{VP(PP}_{\alert{1}}\ \textrm{VP(VV(表示) NN}_{\alert{2}})), \nonumber \\ & & \ \ \textrm{VP(VBZ(was) VP(VBZ}_{\alert{2}}\ \textrm{PP}_{\alert{1}}))\ \rangle \nonumber \end{eqnarray} 其中变量的对应关系用下标数字表示,比如:$\textrm{PP}_1 \leftrightarrow \textrm{PP}_1$,$\textrm{NN}_2 \leftrightarrow \textrm{VBZ}_2$ \item<2-> 在这个规则的树结构中,每个叶子非终结符本质上定义了一个变量,这个节点也被称作边缘节点(frontier node)。边缘节点可以被其它树结构替换,组合为更大的树结构,这个操作被称作组合(composition) 或树替换 \end{itemize} \visible<2->{ \begin{center} \begin{tikzpicture} {\scriptsize \begin{scope}[sibling distance=5pt, level distance = 22pt] \Tree[.\node(s1){VP}; [.\node(s2){PP}; ] [.\node(s3){VP}; [.\node(s4){VV}; \node[fill=white](w1){表示}; ] [.\node(s5){NN}; ] ] ] \end{scope} \begin{scope}[xshift=1.2in, yshift=-0.2in, sibling distance=5pt, level distance = 22pt] \Tree[.\node(ws1){NN}; \node(ws2){满意}; ] \end{scope} \node (anchor=west] (arrow) at ([xshift=0.4in]ws2.east) {\large{$\Rightarrow$}}; \begin{pgfonlayer}{background} \node [inner sep=0,fill=green!20] [fit = (ws1)] (snode1) {}; \node [inner sep=0,fill=green!20] [fit = (s5)] (snode2) {}; \draw [<-,thick,dotted] ([xshift=0.1em]s5.east) ..controls +(east:3em) and +(west:3em).. ([xshift=-0.1em]ws1.west); \end{pgfonlayer} \begin{scope}[xshift=2.5in, sibling distance=5pt, level distance = 22pt] \Tree[.\node(s1){VP}; [.\node(s2){PP}; ] [.\node(s3){VP}; [.\node(s4){VV}; \node[fill=white](w1){表示}; ] [.\node(s5){NN}; \node(w2){满意}; ] ] ] \end{scope} } \end{tikzpicture} \end{center} } \end{frame} %%%------------------------------------------------------------------------------------------------------------ %%% 规则的组装对应翻译过程 \begin{frame}{使用规则定义翻译过程} \begin{itemize} \item 规则的推导描述双语句子同步生成(和分析)的过程 \begin{itemize} \item \raisebox{0.3em}{\tikz{\draw[*-*] (0,0)--(0.5,0);}}表示对边缘阶段(变量)的替换操作 \end{itemize} \end{itemize} \begin{tikzpicture} % target side \begin{scope}[xshift=-1em, level distance=25pt] \node[scale=0.8] at (1.5, 2.2) {目标语言}; \node[scale=0.6, inner sep=0.1cm, draw,xshift=1em] (tfrag1) at (0,0.05) {\Tree[.NP [.DT the ] [.NNS imports ]]}; \visible<4->{ \node[scale=0.6, inner sep=0.1cm, draw, sibling distance=100pt] (tfrag2) at (1.4,1.4) {\Tree[.S [.NP ] [.VP ]]}; } \visible<3->{ \node[scale=0.6, inner sep=0.1cm, draw, sibling distance=30pt] (tfrag3) at (3,-0.2) {\Tree[.VP [.VBZ have ] [.ADVP [.RB ] [.VBN fallen ]]]}; } \visible<2->{ \node[scale=0.6, inner sep=0.1cm, draw,xshift=-1.5em] (tfrag4) at (2.8,-1.8) {\Tree[.RB drastically ]}; } \visible<4->{\draw[*-*] (0.15,0.7) -- (0.15,1.05);} \visible<4->{\draw[*-*] (2.7,0.7) -- (2.7,1.05);} \visible<3->{\draw[*-*] (2.4,-1.4) .. controls +(north:0.6) and +(south:0.6) .. (3.0,-0.6);} \end{scope} % source side \begin{scope}[scale=0.8, level distance=25pt, xshift=-20em, yshift=1em] \node[scale=0.8] at (2.1, 2.45) {源语言}; \visible<1->{ \node[scale=0.6, inner sep=0.1cm, draw] (sfrag1) at (0,0) {\Tree[.NN 进口 ]}; } \visible<4->{ \node[scale=0.6, inner sep=0.1cm, draw, sibling distance=135pt] (sfrag2) at (2.0,1.4) {\Tree[.IP [.NN ] [.VP ]]}; } \visible<3->{ \node[scale=0.6, inner sep=0.1cm, xshift=1em, draw] (sfrag3) at (4,-0.7) {\Tree[.VP [.AD ] [.ADVP [.VV 下降 ] [.AS 了 ]]]}; } \visible<2->{ \node[scale=0.6, inner sep=0.1cm, draw] (sfrag4) at (1.3,-1.3) {\Tree[.AD 大幅度 ]}; } \visible<4->{\draw[*-*] (0.05,0.5) -- (0.05,0.95);} \visible<4->{\draw[*-*] (4.15,0.5) -- (4.15,0.95);} \visible<3->{\draw[*-*] (1.55,-0.95) .. controls +(east:1.0) and +(south:0.4) .. (3.5,-0.6);} \end{scope} % rule 1 \begin{scope}[scale=0.6, xshift=-22em, yshift=-11em, level distance=20pt] \begin{scope}[anchor=north east, xshift=-6em] \Tree[.NN 进口 ] \end{scope} \draw[->] (-1.7,0.1) -- (-0.9,0.1); \begin{scope}[anchor=north west] \Tree[.NP [.DT the ] [.NNS imports ]] \end{scope} \end{scope} % rule 2 \begin{scope}[scale=0.6, xshift=-11em, yshift=-11em, level distance=20pt] \begin{scope}[anchor=north east, xshift=-6em] \Tree[.AD 大幅度 ] \end{scope} \draw[->] (-1.7,0.1) -- (-0.9,0.1); \begin{scope}[anchor=north west] \Tree[.RB drastically ] \end{scope} \end{scope} % rule 3 \begin{scope}[scale=0.6, xshift=2em, yshift=-11em, level distance=20pt] \begin{scope}[anchor=north east, xshift=-7em] \Tree[.VP [.AD ] [.ADVP [.VV 下降 ] [.AS 了 ]]] \end{scope} \draw[->] (-1.7,0.1) -- (-0.9,0.1); \begin{scope}[anchor=north west, xshift=1em] \Tree[.VP [.VBZ have ] [.ADVP [.RB ] [.VBN fallen ]]] \end{scope} \end{scope} % rule 4 \begin{scope}[scale=0.6, xshift=16em, yshift=-11em, level distance=20pt] \begin{scope}[anchor=north east, xshift=-6em] \Tree[.IP [.NN ] [.VP ]] \end{scope} \draw[->] (-1.7,0.1) -- (-0.9,0.1); \begin{scope}[anchor=north west,xshift=-1em] \Tree[.S [.NP ] [.VP ]] \end{scope} \end{scope} % red rule 1 \begin{scope}[red, scale=0.6, xshift=-22em, yshift=-11em, level distance=20pt] \visible<1>{ \begin{scope}[anchor=north east, xshift=-6em] \Tree[.NN 进口 ] \end{scope} \draw[->] (-1.7,0.1) -- (-0.9,0.1); \begin{scope}[anchor=north west] \Tree[.NP [.DT the ] [.NNS imports ]] \end{scope} } \end{scope} \visible<2>{ % red rule 2 \begin{scope}[red, scale=0.6, xshift=-11em, yshift=-11em, level distance=20pt] \begin{scope}[anchor=north east, xshift=-6em] \Tree[.AD 大幅度 ] \end{scope} \draw[->] (-1.7,0.1) -- (-0.9,0.1); \begin{scope}[anchor=north west] \Tree[.RB drastically ] \end{scope} \end{scope} } \visible<3>{ % red rule 3 \begin{scope}[red, scale=0.6, xshift=2em, yshift=-11em, level distance=20pt] \begin{scope}[anchor=north east, xshift=-7em] \Tree[.VP [.AD ] [.ADVP [.VV 下降 ] [.AS 了 ]]] \end{scope} \draw[->] (-1.7,0.1) -- (-0.9,0.1); \begin{scope}[anchor=north west, xshift=1em] \Tree[.VP [.VBZ have ] [.ADVP [.RB ] [.VBN fallen ]]] \end{scope} \end{scope} } \visible<4>{ % rule 4 \begin{scope}[red, scale=0.6, xshift=16em, yshift=-11em, level distance=20pt] \begin{scope}[anchor=north east, xshift=-6em] \Tree[.IP [.NN ] [.VP ]] \end{scope} \draw[->] (-1.7,0.1) -- (-0.9,0.1); \begin{scope}[anchor=north west,xshift=-1em] \Tree[.S [.NP ] [.VP ]] \end{scope} \end{scope} } \end{tikzpicture} \end{frame} %%%------------------------------------------------------------------------------------------------------------ %%% 树到串规则 \begin{frame}{树到串翻译规则} \begin{itemize} \item 对于只有一端使用句法树的情况,仍然可以用上述规则定义进行描述 \begin{itemize} \item 树到串翻译可以看做是句法树到词串的转换,串到树类似,只是反过来看 \end{itemize} \end{itemize} \begin{center} \begin{tikzpicture} \begin{scope}[scale=0.9] \Tree[.\node(sn1){VP}; [.\node(sn2){VV}; \node(scw1){提高}; ] [.\node(sn3){NN}; ] ] \path [draw, ->, thick] ([xshift=1em]sn3.east) -- ([xshift=2.5em]sn3.east); \node [anchor=west] (tw1) at ([xshift=3.5em]sn3.east) {increases}; \node [anchor=west] (tw2) at ([xshift=0.3em]tw1.east) {NN}; \draw[dotted,thick] ([yshift=-0.1em]sn3.south)..controls +(south:1.2) and +(south: 1.2)..([yshift=-0.1em]tw2.south); \end{scope} \end{tikzpicture} \end{center} \vspace{-0.3em} \begin{itemize} \item<2-> 由规则定义$\langle\ \alpha_h, \beta_h\ \rangle \to \langle\ \alpha_r, \beta_r, \sim\ \rangle$,可得 \vspace{-1.3em} {\small \begin{eqnarray} \alpha_h & = & \textrm{VP} \nonumber \\ \beta_h & = & \textrm{VP}\ (=\alpha_h) \nonumber \\ \alpha_r & = & \textrm{VP(VV(提高) NN:}x) \nonumber \\ \beta_r & = & \textrm{increases\ NN:}x \nonumber \\ \sim & = & \{1-1\} \nonumber \end{eqnarray} } \end{itemize} \end{frame} %%%------------------------------------------------------------------------------------------------------------ %%% 树到串规则 \begin{frame}{树到串翻译规则} \begin{itemize} \item 这里,目标端是一个词串,因此 \begin{itemize} \item $\beta_h$实际上并不是树的根节点标记,直接复制$\alpha_h$即可,也就是说目标语端和源语端共享同一个非终结符集合 \item $\beta_r$是一个词串,可以被看作是树的叶子节点序列,或者一个单层的树结构 \end{itemize} \item<2-> 可以把这条规则简记为 \begin{center} \vspace{0.5em} $\textrm{VP} \to \langle\ \textrm{VP(VV(提高) NN}_1),\ \textrm{increases\ NN}_1\ \rangle$ \\ \vspace{0.5em} \end{center} 或 \\ \begin{center} \vspace{0.5em} $\textrm{VP(VV(提高) NN}_1) \to \textrm{increases\ NN}_1$ \\ \end{center} \vspace{0.5em} 上述规则也被称作\alert{树到串翻译规则} \item<3-> 类似的,层次短语规则也可以被看作是一种特殊的基于树结构的规则,它的源语和目标语都是由单层树结构构成,且源语和目标语共享同一个非终结符集合 \end{itemize} \end{frame} %%%------------------------------------------------------------------------------------------------------------ \subsection{树到串翻译规则抽取} %%%------------------------------------------------------------------------------------------------------------ %%% 基于树结构的翻译文法 - 树到串/串到树 \begin{frame}{树到串规则抽取 - GHKM方法} \begin{itemize} \item 基于句法的翻译系统的核心有两个部分 \begin{enumerate} \item \textbf{文法归纳}:从带有句法分析结果的双语数据中自动学习翻译规则 \item \textbf{解码}:使用学习到的翻译规则对新的句子进行翻译 \end{enumerate} \item<2-> 首先要解决的是如何获取翻译规则,即\alert{规则抽取} - 这里先从GHKM方法开始,它是经典的树到串翻译规则的抽取方法(Galley et al., 2004; 2006) \begin{itemize} \item 方法的名字是由四位作者的名字首字母构成 :) \end{itemize} \item<3-> GHKM方法的输入包括 \begin{itemize} \item 源语言句子和它的短语分析树 \item 目标语句子 \item 源语和目标语句子之间的词对齐 \end{itemize} \item<3-> 注意: \begin{itemize} \item 句法树可以由句法分析器自动生成 \item 词对齐可以由词对齐系统(如IBM模型)自动生成 \end{itemize} \end{itemize} \end{frame} %%%------------------------------------------------------------------------------------------------------------ %%% 翻译规则抽取本质上是要完成对树结构的切割 \begin{frame}{树片段} \begin{itemize} \item 树到串翻译规则实际上是一个树片段到一个词串的映射。一个合理的树到串翻译规则,不应该违反任何的词对齐信息 \begin{itemize} \item 显然这种树片段可以有很多 \item 一棵句法树也可以被切割成多个树片段 \end{itemize} \end{itemize} \begin{center} \begin{tikzpicture} {\footnotesize \begin{scope}[sibling distance=25pt, level distance=20pt] \Tree[.\node(n1){IP}; [.\node(n2){NP}; [.\node(n3){PN}; \node(cw1){他}; ]] [.\node(n4){VP}; [.\node(n5){PP}; [.\node(n6){P}; \node(cw2){对}; ] [.\node(n7){NP}; [.\node(n8){NN}; \node(cw3){回答}; ] ] ] [.\node(n9){VP}; [.\node(n10){VV}; \node(cw4){表示}; ] [.\node(n11){NN}; \node(cw5){满意}; ] ] ] ] \node[anchor=north,minimum size=18pt] (tw1) at ([yshift=-6.0em]cw1.south){he}; \node[anchor=west,minimum size=18pt] (tw2) at ([yshift=-0.1em,xshift=1.1em]tw1.east){was}; \node[anchor=west,minimum size=18pt] (tw3) at ([yshift=0.1em,xshift=1.1em]tw2.east){satisfied}; \node[anchor=west,minimum size=18pt] (tw4) at ([xshift=1.1em]tw3.east){with}; \node[anchor=west,minimum size=18pt] (tw5) at ([xshift=1.1em]tw4.east){the}; \node[anchor=west,minimum size=18pt] (tw6) at ([yshift=-0.1em,xshift=1.1em]tw5.east){answer}; \draw[dashed] (cw1.south) -- ([yshift=-0.4em]tw1.north); \draw[dashed] (cw2.south) .. controls +(south:1.6) and +(north:0.6) .. ([yshift=-0.4em]tw4.north); \draw[dashed] (cw3.south) -- ([yshift=-0.4em]tw5.north); \draw[dashed] (cw3.south) -- ([yshift=-0.4em]tw6.north); \draw[dashed] (cw4.south) .. controls +(south:2.0) and +(north:0.6) .. ([yshift=-0.4em]tw3.north); \draw[dashed] (cw5.south) .. controls +(south:2.0) and +(north:0.6) .. ([yshift=-0.4em]tw3.north); \begin{pgfonlayer}{background} \visible<2->{ \node [rectangle,inner sep=0em,fill=red!20] [fit = (cw2) (cw3) (n5)] (rule1s) {}; \node [rectangle,inner sep=0em,fill=red!20] [fit = (tw4) (tw5) (tw6)] (rule1t) {}; } \visible<3->{ \node [rectangle,inner sep=0em,fill=blue!20] [fit = (cw5) (n11)] (rule2s) {}; \node [rectangle,inner sep=0em,fill=blue!20] [fit = (tw3)] (rule2t) {}; } \end{pgfonlayer} \visible<2->{ \node [anchor=south] (rule1label) at ([xshift=1em]rule1s.north west) {\scriptsize{\textbf{\alert{正确的规则}}}}; } \visible<3->{ \node [anchor=north west,align=left] (rule2label) at (rule2s.north east) {\scriptsize{\textbf{\color{blue} 错误的规则}}\\\scriptsize{因为``satisfied''会}\\\scriptsize{对齐到规则外,}\\\scriptsize{也就是这条规则}\\\scriptsize{与词对齐不相容}}; } \end{scope} } \end{tikzpicture} \end{center} \end{frame} %%%------------------------------------------------------------------------------------------------------------ %%% Span的定义 \begin{frame}{Span} \begin{itemize} \item 为了描述每个树节点对齐到目标语的情况,定义概念 \end{itemize} \begin{beamerboxesrounded}[upper=uppercolblue,lower=lowercolblue,shadow=true]{定义 - Span} {\small 源语树节点$n$的Span是它所对应到到目标语的第一个单词和最后一个单词所构成的索引范围 } \end{beamerboxesrounded} \begin{center} \begin{tikzpicture} {\footnotesize \begin{scope}[sibling distance=25pt, level distance=20pt] \Tree[.\node(n1){IP}; [.\node(n2){NP}; [.\node(n3){PN}; \node(cw1){他}; ]] [.\node(n4){VP}; [.\node(n5){PP}; [.\node(n6){P}; \node(cw2){对}; ] [.\node(n7){NP}; [.\node(n8){NN}; \node(cw3){回答}; ] ] ] [.\node(n9){VP}; [.\node(n10){VV}; \node(cw4){表示}; ] [.\node(n11){NN}; \node(cw5){满意}; ] ] ] ] \node[anchor=north,minimum size=18pt,align=center] (tw1) at ([yshift=-6.0em]cw1.south){he\\\scriptsize{1}}; \node[anchor=west,minimum size=18pt,align=center] (tw2) at ([yshift=-0.1em,xshift=1.1em]tw1.east){was\\\scriptsize{2}}; \node[anchor=west,minimum size=18pt,align=center] (tw3) at ([yshift=0.1em,xshift=1.1em]tw2.east){satisfied\\\scriptsize{3}}; \node[anchor=west,minimum size=18pt,align=center] (tw4) at ([xshift=1.1em]tw3.east){with\\\scriptsize{4}}; \node[anchor=west,minimum size=18pt,align=center] (tw5) at ([xshift=1.1em]tw4.east){the\\\scriptsize{5}}; \node[anchor=west,minimum size=18pt,align=center] (tw6) at ([yshift=-0.1em,xshift=1.1em]tw5.east){answer\\\scriptsize{6}}; \draw[dashed] (cw1.south) -- ([yshift=-0.4em]tw1.north); \draw[dashed] (cw2.south) .. controls +(south:1.6) and +(north:0.6) .. ([yshift=-0.4em]tw4.north); \draw[dashed] (cw3.south) -- ([yshift=-0.4em]tw5.north); \draw[dashed] (cw3.south) -- ([yshift=-0.4em]tw6.north); \draw[dashed] (cw4.south) .. controls +(south:2.0) and +(north:0.6) .. ([yshift=-0.4em]tw3.north); \draw[dashed] (cw5.south) .. controls +(south:2.0) and +(north:0.6) .. ([yshift=-0.4em]tw3.north); \visible<2->{ \node [anchor=south west,align=left,fill=orange!20,drop shadow] (label1) at ([xshift=0.5em]n11.north east) {\scriptsize{span=\{3\}}\\\scriptsize{{\color{orange!20} c-span=\{1,3-6\}}}}; } \visible<3->{ \node [anchor=south west,align=left,fill=green!20,drop shadow] (label1) at ([xshift=0.5em,yshift=-0.5em]n4.north east) {\scriptsize{span=\{3-6\}}\\\scriptsize{{\color{green!20} c-span=\{1\}}}}; } \begin{pgfonlayer}{background} \visible<2->{ \node [rectangle,fill=orange!20,inner sep=0] [fit = (n11)] (n11box) {}; } \visible<3->{ \node [rectangle,fill=green!20,inner sep=0] [fit = (n4)] (n4box) {}; } \end{pgfonlayer} \end{scope} } \end{tikzpicture} \end{center} \end{frame} %%%------------------------------------------------------------------------------------------------------------ %%% Complement Span的定义 \begin{frame}{Complement Span} \begin{itemize} \item 进一步,引入Complement Span的概念 \end{itemize} \vspace{-0.2em} \begin{beamerboxesrounded}[upper=uppercolblue,lower=lowercolblue,shadow=true]{定义 - Complement Span} {\small 源语树节点$n$的Complement Span是除了它的祖先和子孙阶段外的其它节点Span的并集 } \end{beamerboxesrounded} \begin{center} \begin{tikzpicture} {\footnotesize \begin{scope}[sibling distance=25pt, level distance=20pt] \Tree[.\node(n1){IP}; [.\node(n2){NP}; [.\node(n3){PN}; \node(cw1){他}; ]] [.\node(n4){VP}; [.\node(n5){PP}; [.\node(n6){P}; \node(cw2){对}; ] [.\node(n7){NP}; [.\node(n8){NN}; \node(cw3){回答}; ] ] ] [.\node(n9){VP}; [.\node(n10){VV}; \node(cw4){表示}; ] [.\node(n11){NN}; \node(cw5){满意}; ] ] ] ] \node[anchor=north,minimum size=18pt,align=center] (tw1) at ([yshift=-6.0em]cw1.south){he\\\scriptsize{1}}; \node[anchor=west,minimum size=18pt,align=center] (tw2) at ([yshift=-0.1em,xshift=1.1em]tw1.east){was\\\scriptsize{2}}; \node[anchor=west,minimum size=18pt,align=center] (tw3) at ([yshift=0.1em,xshift=1.1em]tw2.east){satisfied\\\scriptsize{3}}; \node[anchor=west,minimum size=18pt,align=center] (tw4) at ([xshift=1.1em]tw3.east){with\\\scriptsize{4}}; \node[anchor=west,minimum size=18pt,align=center] (tw5) at ([xshift=1.1em]tw4.east){the\\\scriptsize{5}}; \node[anchor=west,minimum size=18pt,align=center] (tw6) at ([yshift=-0.1em,xshift=1.1em]tw5.east){answer\\\scriptsize{6}}; \draw[dashed] (cw1.south) -- ([yshift=-0.4em]tw1.north); \draw[dashed] (cw2.south) .. controls +(south:1.6) and +(north:0.6) .. ([yshift=-0.4em]tw4.north); \draw[dashed] (cw3.south) -- ([yshift=-0.4em]tw5.north); \draw[dashed] (cw3.south) -- ([yshift=-0.4em]tw6.north); \draw[dashed] (cw4.south) .. controls +(south:2.0) and +(north:0.6) .. ([yshift=-0.4em]tw3.north); \draw[dashed] (cw5.south) .. controls +(south:2.0) and +(north:0.6) .. ([yshift=-0.4em]tw3.north); \node [anchor=south west,align=left,fill=orange!20,drop shadow] (label1) at ([xshift=0.5em]n11.north east) {\scriptsize{span=\{3\}}\\\scriptsize{c-span=\{1,3-6\}}}; \node [anchor=south west,align=left,fill=green!20,drop shadow] (label1) at ([xshift=0.5em,yshift=-0.5em]n4.north east) {\scriptsize{span=\{3-6\}}\\\scriptsize{c-span=\{1\}}}; \begin{pgfonlayer}{background} \node [rectangle,fill=orange!20,inner sep=0] [fit = (n11)] (n11box) {}; \node [rectangle,fill=green!20,inner sep=0] [fit = (n4)] (n4box) {}; \end{pgfonlayer} \end{scope} } \end{tikzpicture} \end{center} \end{frame} %%%------------------------------------------------------------------------------------------------------------ %%% Admissible Node的定义 \begin{frame}{可信节点} \begin{itemize} \item 基于Span和Complment Span,可以定义可信节点) \end{itemize} \vspace{-0.3em} \begin{beamerboxesrounded}[upper=uppercolblue,lower=lowercolblue,shadow=true]{定义 - 可信节点(Admissible Node)} {\small 对于源语树节点$n$,如果他的Span和Complement Span不相交,节点$n$就是一个可信节点,否则是一个不可信节点 } \end{beamerboxesrounded} \begin{center} \begin{tikzpicture} {\footnotesize \begin{scope}[sibling distance=25pt, level distance=20pt] \Tree[.\node(n1){IP}; [.\node(n2){NP}; [.\node(n3){PN}; \node(cw1){他}; ]] [.\node(n4){VP}; [.\node(n5){PP}; [.\node(n6){P}; \node(cw2){对}; ] [.\node(n7){NP}; [.\node(n8){NN}; \node(cw3){回答}; ] ] ] [.\node(n9){VP}; [.\node(n10){VV}; \node(cw4){表示}; ] [.\node(n11){NN}; \node(cw5){满意}; ] ] ] ] \node[anchor=north,minimum size=18pt,align=center] (tw1) at ([yshift=-6.0em]cw1.south){he\\\scriptsize{1}}; \node[anchor=west,minimum size=18pt,align=center] (tw2) at ([yshift=-0.1em,xshift=1.1em]tw1.east){was\\\scriptsize{2}}; \node[anchor=west,minimum size=18pt,align=center] (tw3) at ([yshift=0.1em,xshift=1.1em]tw2.east){satisfied\\\scriptsize{3}}; \node[anchor=west,minimum size=18pt,align=center] (tw4) at ([xshift=1.1em]tw3.east){with\\\scriptsize{4}}; \node[anchor=west,minimum size=18pt,align=center] (tw5) at ([xshift=1.1em]tw4.east){the\\\scriptsize{5}}; \node[anchor=west,minimum size=18pt,align=center] (tw6) at ([yshift=-0.1em,xshift=1.1em]tw5.east){answer\\\scriptsize{6}}; \draw[dashed] (cw1.south) -- ([yshift=-0.4em]tw1.north); \draw[dashed] (cw2.south) .. controls +(south:1.6) and +(north:0.6) .. ([yshift=-0.4em]tw4.north); \draw[dashed] (cw3.south) -- ([yshift=-0.4em]tw5.north); \draw[dashed] (cw3.south) -- ([yshift=-0.4em]tw6.north); \draw[dashed] (cw4.south) .. controls +(south:2.0) and +(north:0.6) .. ([yshift=-0.4em]tw3.north); \draw[dashed] (cw5.south) .. controls +(south:2.0) and +(north:0.6) .. ([yshift=-0.4em]tw3.north); \node [anchor=south west,align=left,fill=orange!20,drop shadow] (label1) at ([xshift=0.5em]n11.north east) {\scriptsize{span=\{3\}}\\\scriptsize{c-span=\{1,3-6\}}}; \node [anchor=south west,align=left,fill=green!20,drop shadow] (label2) at ([xshift=0.5em,yshift=-0.5em]n4.north east) {\scriptsize{span=\{3-6\}}\\\scriptsize{c-span=\{1\}}}; \begin{pgfonlayer}{background} \node [rectangle,fill=orange!20,inner sep=0] [fit = (n11)] (n11box) {}; \node [rectangle,fill=green!20,inner sep=0] [fit = (n4)] (n4box) {}; \visible<3->{ \node [rectangle,fill=green!20,inner sep=0] [fit = (n1)] (n1box) {}; \node [rectangle,fill=green!20,inner sep=0] [fit = (n2)] (n2box) {}; \node [rectangle,fill=green!20,inner sep=0] [fit = (n3)] (n3box) {}; \node [rectangle,fill=green!20,inner sep=0] [fit = (n5)] (n5box) {}; \node [rectangle,fill=green!20,inner sep=0] [fit = (n6)] (n6box) {}; \node [rectangle,fill=green!20,inner sep=0] [fit = (n7)] (n7box) {}; \node [rectangle,fill=green!20,inner sep=0] [fit = (n8)] (n8box) {}; \node [rectangle,fill=green!20,inner sep=0] [fit = (n9)] (n9box) {}; \node [rectangle,fill=orange!20,inner sep=0] [fit = (n10)] (n10box) {}; } \end{pgfonlayer} \visible<2->{ \node [anchor=north] (n11boxlabel) at (label1.south) {\tiny{\textbf{\alert{不可信}}}}; \node [anchor=north] (n4boxlabel) at (label2.south) {\tiny{\textbf{\alert{可信}}}}; } \visible<3->{ \node [anchor=north west, minimum size=1.2em, fill=green!20] (land1) at ([xshift=1.5em,yshift=-1em]cw5.south east) {}; \node [anchor=west] (land1label) at (land1.east) {\scriptsize{可信}}; \node [anchor=north west, minimum size=1.2em, fill=orange!20] (land2) at ([yshift=-0.3em]land1.south west) {}; \node [anchor=west] (land2label) at (land2.east) {\scriptsize{不可信}}; } \end{scope} } \end{tikzpicture} \end{center} \end{frame} %%%------------------------------------------------------------------------------------------------------------ %%% 规则抽取 \begin{frame}{规则抽取} \begin{itemize} \item 可信节点本质上定义了规则的边界,规则需要满足 \begin{itemize} \item 左部树片段的根节点是可信节点 \item 左部树片段的叶子节点是终结符或者可信节点 \end{itemize} \end{itemize} \begin{center} {\footnotesize \visible<2->{ 例如:$\textrm{VP(PP(P(对) NP(NN(回答))) VP}_1) \to \textrm{VP}_1\ \textrm{with the answer}$ } } \end{center} \begin{center} \begin{tikzpicture} {\footnotesize \begin{scope}[sibling distance=25pt, level distance=20pt] \Tree[.\node(n1){IP}; [.\node(n2){NP}; [.\node(n3){PN}; \node(cw1){他}; ]] [.\node(n4){VP}; [.\node(n5){PP}; [.\node(n6){P}; \node(cw2){对}; ] [.\node(n7){NP}; [.\node(n8){NN}; \node(cw3){回答}; ] ] ] [.\node(n9){VP}; [.\node(n10){VV}; \node(cw4){表示}; ] [.\node(n11){NN}; \node(cw5){满意}; ] ] ] ] \node[anchor=north,minimum size=18pt,align=center] (tw1) at ([yshift=-6.0em]cw1.south){he\\\scriptsize{1}}; \node[anchor=west,minimum size=18pt,align=center] (tw2) at ([yshift=-0.1em,xshift=1.1em]tw1.east){was\\\scriptsize{2}}; \node[anchor=west,minimum size=18pt,align=center] (tw3) at ([yshift=0.1em,xshift=1.1em]tw2.east){satisfied\\\scriptsize{3}}; \node[anchor=west,minimum size=18pt,align=center] (tw4) at ([xshift=1.1em]tw3.east){with\\\scriptsize{4}}; \node[anchor=west,minimum size=18pt,align=center] (tw5) at ([xshift=1.1em]tw4.east){the\\\scriptsize{5}}; \node[anchor=west,minimum size=18pt,align=center] (tw6) at ([yshift=-0.1em,xshift=1.1em]tw5.east){answer\\\scriptsize{6}}; \draw[dashed] (cw1.south) -- ([yshift=-0.4em]tw1.north); \draw[dashed] (cw2.south) .. controls +(south:1.6) and +(north:0.6) .. ([yshift=-0.4em]tw4.north); \draw[dashed] (cw3.south) -- ([yshift=-0.4em]tw5.north); \draw[dashed] (cw3.south) -- ([yshift=-0.4em]tw6.north); \draw[dashed] (cw4.south) .. controls +(south:2.0) and +(north:0.6) .. ([yshift=-0.4em]tw3.north); \draw[dashed] (cw5.south) .. controls +(south:2.0) and +(north:0.6) .. ([yshift=-0.4em]tw3.north); \begin{pgfonlayer}{background} \visible<2->{ \node [fill=blue!30] [fit = (cw2) (cw3) (n7) (n8)] (rule1box1) {}; \node [fill=blue!30] [fit = (n4) (n9)] (rule1box2) {}; \node [fill=blue!30] [fit = (tw3) (tw4) (tw5) (tw6)] (rule1box3) {}; \path [fill=blue!30] ([xshift=0.05em]rule1box2.north west) -- ([xshift=-0.3em,yshift=0.3em]n5.north west) -- ([yshift=-0.05em]rule1box1.north west) -- ([xshift=0.05em,yshift=-0.05em]rule1box2.south west) -- ([xshift=0.05em]rule1box2.north west); } \node [rectangle,fill=orange!20,inner sep=0] [fit = (n11)] (n11box) {}; \node [rectangle,fill=green!20,inner sep=0] [fit = (n4)] (n4box) {}; \node [rectangle,fill=green!20,inner sep=0] [fit = (n1)] (n1box) {}; \node [rectangle,fill=green!20,inner sep=0] [fit = (n2)] (n2box) {}; \node [rectangle,fill=green!20,inner sep=0] [fit = (n3)] (n3box) {}; \node [rectangle,fill=green!20,inner sep=0] [fit = (n5)] (n5box) {}; \node [rectangle,fill=green!20,inner sep=0] [fit = (n6)] (n6box) {}; \node [rectangle,fill=green!20,inner sep=0] [fit = (n7)] (n7box) {}; \node [rectangle,fill=green!20,inner sep=0] [fit = (n8)] (n8box) {}; \node [rectangle,fill=green!20,inner sep=0] [fit = (n9)] (n9box) {}; \node [rectangle,fill=orange!20,inner sep=0] [fit = (n10)] (n10box) {}; \end{pgfonlayer} \node [anchor=north west, minimum size=1.2em, fill=green!20] (land1) at ([xshift=1.5em,yshift=-1em]cw5.south east) {}; \node [anchor=west] (land1label) at (land1.east) {\scriptsize{可信}}; \node [anchor=north west, minimum size=1.2em, fill=orange!20] (land2) at ([yshift=-0.3em]land1.south west) {}; \node [anchor=west] (land2label) at (land2.east) {\scriptsize{不可信}}; \visible<3->{ \node [draw,thick,red,fill=red!20] [fit = (n9)] (var1) {{\color{black} VP}}; \node [draw,thick,red,fill=red!20] [fit = (tw3)] (var1v2) {{\color{black} \large{VP}}}; \node [anchor=west] (var1label) at ([yshift=0.5em]var1.east) {\tiny{\alert{变量}}}; \node [anchor=south] (var1v2label) at ([xshift=-2em]var1v2.north) {\tiny{\alert{变量}}}; } \end{scope} } \end{tikzpicture} \end{center} \end{frame} %%%------------------------------------------------------------------------------------------------------------ %%% 规则抽取 \begin{frame}{规则抽取 - 树的切割} \begin{itemize} \item 所有可信节点构成了句法树的边缘集合(frontier set),进而得到树的一种\alert{切割}以及切割得到的树片段(或规则) \end{itemize} \begin{center} \begin{tikzpicture} {\scriptsize \begin{scope}[scale = 0.9, sibling distance=20pt, level distance=30pt] {\footnotesize \Tree[.\node(n1){IP}; [.\node(n2){NP}; [.\node(n3){PN}; \node(cw1){他}; ]] [.\node(n4){VP}; [.\node(n5){PP}; [.\node(n6){P}; \node(cw2){对}; ] [.\node(n7){NP}; [.\node(n8){NN}; \node(cw3){回答}; ] ] ] [.\node(n9){VP}; [.\node(n10){VV}; \node(cw4){表示}; ] [.\node(n11){NN}; \node(cw5){满意}; ] ] ] ] } \begin{pgfonlayer}{background} \visible<2->{ \node [rectangle,fill=orange!20,inner sep=0] [fit = (n11)] (n11box) {}; \node [rectangle,fill=green!20,inner sep=0] [fit = (n4)] (n4box) {}; \node [rectangle,fill=green!20,inner sep=0] [fit = (n1)] (n1box) {}; \node [rectangle,fill=green!20,inner sep=0] [fit = (n2)] (n2box) {}; \node [rectangle,fill=green!20,inner sep=0] [fit = (n3)] (n3box) {}; \node [rectangle,fill=green!20,inner sep=0] [fit = (n5)] (n5box) {}; \node [rectangle,fill=green!20,inner sep=0] [fit = (n6)] (n6box) {}; \node [rectangle,fill=green!20,inner sep=0] [fit = (n7)] (n7box) {}; \node [rectangle,fill=green!20,inner sep=0] [fit = (n8)] (n8box) {}; \node [rectangle,fill=green!20,inner sep=0] [fit = (n9)] (n9box) {}; \node [rectangle,fill=orange!20,inner sep=0] [fit = (n10)] (n10box) {}; \node [anchor=north west, minimum size=1.2em, fill=green!20] (land1) at ([xshift=7.0em,yshift=0em]n1.north east) {}; \node [anchor=west] (land1label) at (land1.east) {\scriptsize{可信}}; \node [anchor=north west, minimum size=1.2em, fill=orange!20] (land2) at ([yshift=-0.3em]land1.south west) {}; \node [anchor=west] (land2label) at (land2.east) {\scriptsize{不可信}}; } \end{pgfonlayer} \node[anchor=north,minimum size=18pt] (tw1) at ([yshift=-10.0em]cw1.south){he}; \node[anchor=west,minimum size=18pt] (tw2) at ([yshift=-0.1em,xshift=0.3em]tw1.east){was}; \node[anchor=west,minimum size=18pt] (tw3) at ([yshift=0.1em,xshift=0.3em]tw2.east){satisfied}; \node[anchor=west,minimum size=18pt] (tw4) at ([xshift=0.3em]tw3.east){with}; \node[anchor=west,minimum size=18pt] (tw5) at ([xshift=0.3em]tw4.east){the}; \node[anchor=west,minimum size=18pt] (tw6) at ([yshift=-0.1em,xshift=0.3em]tw5.east){answer}; \draw[dashed] (cw1.south) -- ([yshift=-0.4em]tw1.north); \draw[dashed] (cw2.south) .. controls +(south:2.0) and +(north:0.6) .. ([yshift=-0.4em]tw4.north); \draw[dashed] (cw3.south) -- ([yshift=-0.4em]tw5.north); \draw[dashed] (cw3.south) -- ([yshift=-0.4em]tw6.north); \draw[dashed] (cw4.south) .. controls +(south:2.5) and +(north:0.6) .. ([yshift=-0.4em]tw3.north); \draw[dashed] (cw5.south) .. controls +(south:2.5) and +(north:0.6) .. ([yshift=-0.4em]tw3.north); \end{scope} \begin{scope} [yshift = -1.87in, xshift = 1.9in] \visible<3->{ \begin{scope}[level distance=20pt] \node[scale=0.8, inner sep=0.1cm,align=center,draw] (cfrag1) at (0,0.25) {\Tree[.\node(sn1){NP}; [.\node(sn2){PN}; 他 ]]}; \end{scope} \begin{scope}[level distance=20pt] \node[scale=0.8, inner sep=0.1cm,align=center,anchor=south west,draw] (cfrag2) at ([xshift=1.2em]cfrag1.south east) {\Tree[.\node(sn3){P}; 对 ]}; \end{scope} \begin{scope}[level distance=20pt] \node[scale=0.8, inner sep=0.1cm,align=center,anchor=south west,draw] (cfrag3) at ([xshift=1.2em]cfrag2.south east) {\Tree[.\node(sn4){NP}; [.NN 回答 ]]}; \end{scope} \begin{scope}[sibling distance=15pt,level distance=20pt] \node[scale=0.8, inner sep=0.1cm,align=center,anchor=south west,draw] (cfrag4) at ([xshift=1.4em]cfrag3.south east) {\Tree[.\node(sn5){VP}; [.\node(sn6){VV}; 表示 ] [.\node(sn7){NN}; 满意 ]]}; \end{scope} \begin{scope}[sibling distance=20pt,,level distance=25pt] \node[scale=0.8, inner sep=0.1cm,align=center,anchor=south west,draw] (cfrag5) at ([xshift=0.3em,yshift=2.2em]cfrag2.north west) {\Tree[.\node(sn8){PP}; [.\node(sn9){P}; ] [.\node(sn10){NP}; ]]}; \end{scope} \begin{scope}[sibling distance=60pt] \node[scale=0.8, inner sep=0.1cm,align=center,anchor=south west,draw] (cfrag6) at ([xshift=1.6em,yshift=0.5em]cfrag5.north west) {\Tree[.\node(sn11){VP}; [.\node(sn12){PP}; ] [.\node(sn13){VP}; ]]}; \end{scope} \begin{scope}[sibling distance=80pt,level distance=18pt] \node[scale=0.8, inner sep=0.1cm,align=center,anchor=south east,draw] (cfrag7) at ([xshift=-3.6em,yshift=0.5em]cfrag6.north east) {\Tree[.\node(sn14){IP}; [.\node(sn15){NP}; ] [.\node(sn16){VP}; ]]}; \end{scope} \node[scale=0.9,anchor=north,minimum size=18pt] (tw11) at ([xshift=-0.3em,yshift=-1.2em]cfrag1.south){he}; \node[scale=0.9,anchor=west,minimum size=18pt] (tw12) at ([yshift=-0.1em,xshift=0.5em]tw11.east){was}; \node[scale=0.9,anchor=west,minimum size=18pt] (tw13) at ([yshift=0.1em,xshift=0.5em]tw12.east){satisfied}; \node[scale=0.9,anchor=west,minimum size=18pt] (tw14) at ([xshift=0.5em]tw13.east){with}; \node[scale=0.9,anchor=west,minimum size=18pt] (tw15) at ([xshift=0.5em]tw14.east){the}; \node[scale=0.9,anchor=west,minimum size=18pt] (tw16) at ([yshift=-0.1em,xshift=0.5em]tw15.east){answer}; \draw[dashed] ([xshift=-0.3em]cfrag1.south) -- ([yshift=-0.3em]tw11.north); \draw[dashed] (cfrag2.south) -- ([yshift=-0.4em]tw14.north); \draw[dashed] (cfrag3.south) -- ([yshift=-0.4em]tw15.north); \draw[dashed] (cfrag3.south) -- ([yshift=-0.4em]tw16.north); \draw[dashed] (cfrag4.south) .. controls +(south:0.6) and +(north:0.6) .. ([yshift=-0.4em]tw13.north); \draw[*-*] ([xshift=0.0em,yshift=-0.2em]cfrag1.north) -- ([xshift=0.0em,yshift=10.0em]cfrag1.north); \draw[*-*] ([xshift=0.1em,yshift=-0.2em]cfrag2.north) -- ([xshift=0.1em,yshift=2.5em]cfrag2.north); \draw[*-*] ([xshift=0.1em,yshift=-0.4em]cfrag3.north) -- ([xshift=0.1em,yshift=0.7em]cfrag3.north); \draw[*-*] ([xshift=0.0em,yshift=-0.2em]cfrag4.north) -- ([xshift=0.0em,yshift=5.1em]cfrag4.north); \draw[*-*] ([xshift=0.1em,yshift=-0.2em]cfrag5.north) -- ([xshift=0.1em,yshift=0.9em]cfrag5.north); \draw[*-*] ([xshift=0.0em,yshift=-0.2em]cfrag6.north) -- ([xshift=0.0em,yshift=0.9em]cfrag6.north); } \end{scope} } \end{tikzpicture} \end{center} \end{frame} %%%------------------------------------------------------------------------------------------------------------ %%% 最小规则 \begin{frame}{规则抽取 - 最小规则} \begin{itemize} \item 边缘集合所对应的树切割,产生了若干树片段,这些树片段内部包含其它的切割点,每个树片段都对应了一条最小翻译规则(即无法继续分解的规则)。见如下示例 \end{itemize} \begin{minipage}[b]{0.47\textwidth} {\footnotesize \renewcommand*{\arraystretch}{1.3} \begin{tabular}{l l} \visible<2->{$r_1$} & \visible<2->{NP(PN(他)) $\to$ he} \\ \visible<3->{$r_2$} & \visible<3->{P(对) $\to$ with} \\ \visible<4->{$r_3$} & \visible<4->{NP(NN(回答)) $\to$ the answer} \\ \visible<5->{$r_4$} & \visible<5->{VP(VV(表示) NN(满意) $\to$} \\ & \visible<5->{satisfied} \\ \visible<6->{$r_5$} & \visible<6->{PP(P$_1$ NP$_2$) $\to$} \\ & \visible<6->{P$_1$ NP$_2$} \\ \visible<6->{$r_6$} & \visible<6->{VP(PP$_1$ VP$_2$) $\to$} \\ & \visible<6->{VP$_2$ PP$_1$} \\ \visible<6->{$r_7$} & \visible<6->{IP(NP$_1$ VP$_2$) $\to$} \\ & \visible<6->{NP$_1$ VP$_2$} \\ & \\ \end{tabular} \renewcommand*{\arraystretch}{1.0} } \end{minipage} \hfill \begin{minipage}[t]{0.47\textwidth} \begin{tikzpicture} {\scriptsize \begin{scope} \visible<1->{ \begin{scope}[level distance=20pt] \node[scale=0.8, inner sep=0.1cm,align=center,draw] (cfrag1) at (0,0.25) {\Tree[.\node(sn1){NP}; [.\node(sn2){PN}; 他 ]]}; \end{scope} \begin{scope}[level distance=20pt] \node[scale=0.8, inner sep=0.1cm,align=center,anchor=south west,draw] (cfrag2) at ([xshift=1.2em]cfrag1.south east) {\Tree[.\node(sn3){P}; 对 ]}; \end{scope} \begin{scope}[level distance=20pt] \node[scale=0.8, inner sep=0.1cm,align=center,anchor=south west,draw] (cfrag3) at ([xshift=1.2em]cfrag2.south east) {\Tree[.\node(sn4){NP}; [.NN 回答 ]]}; \end{scope} \begin{scope}[sibling distance=15pt,level distance=20pt] \node[scale=0.8, inner sep=0.1cm,align=center,anchor=south west,draw] (cfrag4) at ([xshift=1.4em]cfrag3.south east) {\Tree[.\node(sn5){VP}; [.\node(sn6){VV}; 表示 ] [.\node(sn7){NN}; 满意 ]]}; \end{scope} \begin{scope}[sibling distance=20pt,,level distance=25pt] \node[scale=0.8, inner sep=0.1cm,align=center,anchor=south west,draw] (cfrag5) at ([xshift=0.3em,yshift=2.2em]cfrag2.north west) {\Tree[.\node(sn8){PP}; [.\node(sn9){P}; ] [.\node(sn10){NP}; ]]}; \end{scope} \begin{scope}[sibling distance=60pt] \node[scale=0.8, inner sep=0.1cm,align=center,anchor=south west,draw] (cfrag6) at ([xshift=1.6em,yshift=0.5em]cfrag5.north west) {\Tree[.\node(sn11){VP}; [.\node(sn12){PP}; ] [.\node(sn13){VP}; ]]}; \end{scope} \begin{scope}[sibling distance=80pt,level distance=18pt] \node[scale=0.8, inner sep=0.1cm,align=center,anchor=south east,draw] (cfrag7) at ([xshift=-3.6em,yshift=0.5em]cfrag6.north east) {\Tree[.\node(sn14){IP}; [.\node(sn15){NP}; ] [.\node(sn16){VP}; ]]}; \end{scope} \node[scale=0.9,anchor=north,minimum size=18pt] (tw11) at ([xshift=-0.3em,yshift=-1.2em]cfrag1.south){he}; \node[scale=0.9,anchor=west,minimum size=18pt] (tw12) at ([yshift=-0.1em,xshift=0.5em]tw11.east){was}; \node[scale=0.9,anchor=west,minimum size=18pt] (tw13) at ([yshift=0.1em,xshift=0.5em]tw12.east){satisfied}; \node[scale=0.9,anchor=west,minimum size=18pt] (tw14) at ([xshift=0.5em]tw13.east){with}; \node[scale=0.9,anchor=west,minimum size=18pt] (tw15) at ([xshift=0.5em]tw14.east){the}; \node[scale=0.9,anchor=west,minimum size=18pt] (tw16) at ([yshift=-0.1em,xshift=0.5em]tw15.east){answer}; \draw[dashed] ([xshift=-0.3em]cfrag1.south) -- ([yshift=-0.3em]tw11.north); \draw[dashed] (cfrag2.south) -- ([yshift=-0.4em]tw14.north); \draw[dashed] (cfrag3.south) -- ([yshift=-0.4em]tw15.north); \draw[dashed] (cfrag3.south) -- ([yshift=-0.4em]tw16.north); \draw[dashed] (cfrag4.south) .. controls +(south:0.6) and +(north:0.6) .. ([yshift=-0.4em]tw13.north); \draw[*-*] ([xshift=0.0em,yshift=-0.2em]cfrag1.north) -- ([xshift=0.0em,yshift=10.0em]cfrag1.north); \draw[*-*] ([xshift=0.1em,yshift=-0.2em]cfrag2.north) -- ([xshift=0.1em,yshift=2.5em]cfrag2.north); \draw[*-*] ([xshift=0.1em,yshift=-0.4em]cfrag3.north) -- ([xshift=0.1em,yshift=0.7em]cfrag3.north); \draw[*-*] ([xshift=0.0em,yshift=-0.2em]cfrag4.north) -- ([xshift=0.0em,yshift=5.1em]cfrag4.north); \draw[*-*] ([xshift=0.1em,yshift=-0.2em]cfrag5.north) -- ([xshift=0.1em,yshift=0.9em]cfrag5.north); \draw[*-*] ([xshift=0.0em,yshift=-0.2em]cfrag6.north) -- ([xshift=0.0em,yshift=0.9em]cfrag6.north); \visible<2->{ \node [fill=blue,circle,inner sep=2pt] (rlabel1) at (cfrag1.south east) {{\color{white} \tiny{1}}}; } \visible<3->{ \node [fill=blue,circle,inner sep=2pt] (rlabel2) at (cfrag2.south east) {{\color{white} \tiny{2}}}; } \visible<4->{ \node [fill=blue,circle,inner sep=2pt] (rlabel3) at (cfrag3.south east) {{\color{white} \tiny{3}}}; } \visible<5->{ \node [fill=blue,circle,inner sep=2pt] (rlabel4) at (cfrag4.south east) {{\color{white} \tiny{4}}}; } \visible<6->{ \node [fill=blue,circle,inner sep=2pt] (rlabel5) at (cfrag5.north west) {{\color{white} \tiny{5}}}; \node [fill=blue,circle,inner sep=2pt] (rlabel6) at (cfrag6.north east) {{\color{white} \tiny{6}}}; \node [fill=blue,circle,inner sep=2pt] (rlabel7) at (cfrag7.south west) {{\color{white} \tiny{7}}}; } } \end{scope} } \end{tikzpicture} \end{minipage} \end{frame} %%%------------------------------------------------------------------------------------------------------------ %%% unary rule \begin{frame}{一元规则} \begin{itemize} \item 上述例子中,词性生成单词的过程对应一种\alert{一元规则},即树的孩子节点只有一个,例如 \\ \vspace{0.5em} \begin{center} NP(PN(他)) $\to$ he \end{center} \vspace{0.5em} 显然,NP只有一个孩子PN,PN只有一个孩子he。实际上,这条规则对应两条规则的组合 \vspace{0.5em} \begin{center} PN(他) $\to$ he\\ \vspace{0.3em} NP(PN$_1$) $\to$ PN$_1$ \end{center} \vspace{0.5em} 这里,把两条一元规则合并成``NP(PN(他)) $\to$ he''主要是出于系统实现的考虑,因为允许任意词性到句法标记的一元规则会大大增加翻译假设数量,但是本质上这些一元规则并没有带来太多新的信息\\ \vspace{0.2em} 这也是为什么上图中PN虽然是可信节点,但是没有作为树切割的边界 \end{itemize} \end{frame} \subsection{更多样的规则} %%%------------------------------------------------------------------------------------------------------------ %%% 处理空对其单词 \begin{frame}{更多的规则 - 处理空对齐} \begin{itemize} \item 句法翻译系统成功的前提是规则可以覆盖尽可能多的语言现象。为了得到覆盖度更高的规则集,需要处理空对齐的情况 - 把空对齐单词附着在所有可能的规则上 \end{itemize} \begin{minipage}[b]{0.47\textwidth} {\footnotesize \renewcommand*{\arraystretch}{1.3} \begin{tabular}{l l} \visible<2->{$r_1$} & \visible<2->{NP(PN(他)) $\to$ he} \\ \visible<2->{$r_4$} & \visible<2->{VP(VV(表示) NN(满意) $\to$} \\ & \visible<2->{satisfied} \\ \visible<2->{$r_6$} & \visible<2->{VP(PP$_1$ VP$_2$) $\to$ VP$_2$ PP$_1$} \\ \visible<2->{$r_7$} & \visible<2->{IP(NP$_1$ VP$_2$) $\to$ NP$_1$ VP$_2$} \\ \visible<4->{$r_8$} & \visible<4->{NP(PN(他)) $\to$ he \alert{was}} \\ \visible<5->{$r_9$} & \visible<5->{VP(VV(表示) NN(满意)) $\to$} \\ & \visible<5->{\alert{was} satisfied} \\ \visible<6->{$r_{10}$} & \visible<6->{VP(PP$_1$ VP$_2$) $\to$} \\ & \visible<6->{\alert{was} VP$_2$ PP$_1$} \\ \visible<7->{$r_{11}$} & \visible<7->{IP(NP$_1$ VP$_2$) $\to$} \\ & \visible<7->{NP$_1$ \alert{was} VP$_2$} \\ \end{tabular} \renewcommand*{\arraystretch}{1.0} } \end{minipage} \hfill \begin{minipage}[t]{0.47\textwidth} \begin{tikzpicture} {\scriptsize \begin{scope} \visible<2->{ \begin{scope}[level distance=20pt] \node[scale=0.8, inner sep=0.1cm,align=center,draw] (cfrag1) at (0,0.25) {\Tree[.\node(sn1){NP}; [.\node(sn2){PN}; 他 ]]}; \end{scope} \begin{scope}[level distance=20pt] \node[scale=0.8, inner sep=0.1cm,align=center,anchor=south west,draw] (cfrag2) at ([xshift=1.2em]cfrag1.south east) {\Tree[.\node(sn3){P}; 对 ]}; \end{scope} \begin{scope}[level distance=20pt] \node[scale=0.8, inner sep=0.1cm,align=center,anchor=south west,draw] (cfrag3) at ([xshift=1.2em]cfrag2.south east) {\Tree[.\node(sn4){NP}; [.NN 回答 ]]}; \end{scope} \begin{scope}[sibling distance=15pt,level distance=20pt] \node[scale=0.8, inner sep=0.1cm,align=center,anchor=south west,draw] (cfrag4) at ([xshift=1.4em]cfrag3.south east) {\Tree[.\node(sn5){VP}; [.\node(sn6){VV}; 表示 ] [.\node(sn7){NN}; 满意 ]]}; \end{scope} \begin{scope}[sibling distance=20pt,,level distance=25pt] \node[scale=0.8, inner sep=0.1cm,align=center,anchor=south west,draw] (cfrag5) at ([xshift=0.3em,yshift=2.2em]cfrag2.north west) {\Tree[.\node(sn8){PP}; [.\node(sn9){P}; ] [.\node(sn10){NP}; ]]}; \end{scope} \begin{scope}[sibling distance=60pt] \node[scale=0.8, inner sep=0.1cm,align=center,anchor=south west,draw] (cfrag6) at ([xshift=1.6em,yshift=0.5em]cfrag5.north west) {\Tree[.\node(sn11){VP}; [.\node(sn12){PP}; ] [.\node(sn13){VP}; ]]}; \end{scope} \begin{scope}[sibling distance=80pt,level distance=18pt] \node[scale=0.8, inner sep=0.1cm,align=center,anchor=south east,draw] (cfrag7) at ([xshift=-3.6em,yshift=0.5em]cfrag6.north east) {\Tree[.\node(sn14){IP}; [.\node(sn15){NP}; ] [.\node(sn16){VP}; ]]}; \end{scope} \node[scale=0.9,anchor=north,minimum size=18pt] (tw11) at ([xshift=-0.3em,yshift=-1.2em]cfrag1.south){he}; \node[scale=0.9,anchor=west,minimum size=18pt] (tw12) at ([yshift=-0.1em,xshift=0.5em]tw11.east){was}; \node[scale=0.9,anchor=west,minimum size=18pt] (tw13) at ([yshift=0.1em,xshift=0.5em]tw12.east){satisfied}; \node[scale=0.9,anchor=west,minimum size=18pt] (tw14) at ([xshift=0.5em]tw13.east){with}; \node[scale=0.9,anchor=west,minimum size=18pt] (tw15) at ([xshift=0.5em]tw14.east){the}; \node[scale=0.9,anchor=west,minimum size=18pt] (tw16) at ([yshift=-0.1em,xshift=0.5em]tw15.east){answer}; \draw[dashed] ([xshift=-0.3em]cfrag1.south) -- ([yshift=-0.3em]tw11.north); \draw[dashed] (cfrag2.south) -- ([yshift=-0.4em]tw14.north); \draw[dashed] (cfrag3.south) -- ([yshift=-0.4em]tw15.north); \draw[dashed] (cfrag3.south) -- ([yshift=-0.4em]tw16.north); \draw[dashed] (cfrag4.south) .. controls +(south:0.6) and +(north:0.6) .. ([yshift=-0.4em]tw13.north); \draw[*-*] ([xshift=0.0em,yshift=-0.2em]cfrag1.north) -- ([xshift=0.0em,yshift=10.0em]cfrag1.north); \draw[*-*] ([xshift=0.1em,yshift=-0.2em]cfrag2.north) -- ([xshift=0.1em,yshift=2.5em]cfrag2.north); \draw[*-*] ([xshift=0.1em,yshift=-0.4em]cfrag3.north) -- ([xshift=0.1em,yshift=0.7em]cfrag3.north); \draw[*-*] ([xshift=0.0em,yshift=-0.2em]cfrag4.north) -- ([xshift=0.0em,yshift=5.1em]cfrag4.north); \draw[*-*] ([xshift=0.1em,yshift=-0.2em]cfrag5.north) -- ([xshift=0.1em,yshift=0.9em]cfrag5.north); \draw[*-*] ([xshift=0.0em,yshift=-0.2em]cfrag6.north) -- ([xshift=0.0em,yshift=0.9em]cfrag6.north); \visible<4->{ \node [fill=blue,circle,inner sep=2pt] (rlabel1) at (cfrag1.south east) {{\color{white} \tiny{1}}}; } %\node [fill=blue,circle,inner sep=2pt] (rlabel2) at (cfrag2.south east) {{\color{white} \tiny{2}}}; %\node [fill=blue,circle,inner sep=2pt] (rlabel3) at (cfrag3.south east) {{\color{white} \tiny{3}}}; \visible<5->{ \node [fill=blue,circle,inner sep=2pt] (rlabel4) at (cfrag4.south east) {{\color{white} \tiny{4}}}; } %\node [fill=blue,circle,inner sep=2pt] (rlabel5) at (cfrag5.north west) {{\color{white} \tiny{5}}}; \visible<6->{ \node [fill=blue,circle,inner sep=2pt] (rlabel6) at (cfrag6.north east) {{\color{white} \tiny{6}}}; } \visible<7->{ \node [fill=blue,circle,inner sep=2pt] (rlabel7) at (cfrag7.south west) {{\color{white} \tiny{7}}}; } \visible<3->{ \node [fill=white,draw=red,thick] (tw12label) at (tw12) {\alert{was}}; } \visible<4->{ \draw [->,red] ([xshift=0.2em]tw12label.north west) .. controls +(north:0.4) and +(south:0.4) .. ([xshift=0em]cfrag1.south); } \visible<7->{ \draw [->,red] ([xshift=0.8em]tw12label.north west) -- ([xshift=0.8em,yshift=16.8em]tw12label.north west); } \visible<6->{ \draw [->,red] ([xshift=0.2em]tw12label.north) .. controls +(north:7em) and +(south:11em) .. ([xshift=0em]cfrag6.south); } \visible<5->{ \draw [->,red] ([xshift=0.6em]tw12label.north) -- ([xshift=-2em]cfrag4.south); } \begin{pgfonlayer}{background} \visible<4->{ \node [fill=green!20,inner sep=0pt] (cfrag1back) [fit = (cfrag1)] {}; } \visible<5->{ \node [fill=green!20,inner sep=0pt] (cfrag1back) [fit = (cfrag4)] {}; } \visible<6->{ \node [fill=green!20,inner sep=0pt] (cfrag1back) [fit = (cfrag6)] {}; } \visible<7->{ \node [fill=green!20,inner sep=0pt] (cfrag1back) [fit = (cfrag7)] {}; } \end{pgfonlayer} } \end{scope} } \end{tikzpicture} \end{minipage} \end{frame} %%%------------------------------------------------------------------------------------------------------------ %%% 组合规则 \begin{frame}{更多的规则 - 组合规则} \begin{itemize} \item 最小规则之间可以进行组合,得到更大粒度的组合规则,可以使用更多上下文信息,并进行更复杂的调序。比如,三条最小规则组合成一条composed-3规则 \end{itemize} \begin{minipage}[b]{0.47\textwidth} {\footnotesize \renewcommand*{\arraystretch}{1.3} \begin{tabular}{l l} \visible<2->{$r_1$} & \visible<2->{NP(PN(他)) $\to$ he} \\ \visible<2->{$r_5$} & \visible<2->{PP(P$_1$ NP$_2$) $\to$ P$_1$ NP$_2$} \\ \visible<2->{$r_6$} & \visible<2->{VP(PP$_1$ VP$_2$) $\to$ VP$_2$ PP$_1$} \\ \visible<2->{$r_7$} & \visible<2->{IP(NP$_1$ VP$_2$) $\to$ NP$_1$ VP$_2$} \\ \visible<3->{\alert{$r_{1,7}$}} & \visible<3->{IP(NP(PN(他)) VP$_1$) $\to$} \\ & \visible<3->{he VP$_1$}\\ \visible<4->{\alert{$r_{1,6}$}} & \visible<4->{IP(NP(PN(他)) VP(PP$_1$ VP$_2$))} \\ \visible<4->{\alert{\ $^{,7}$}} & \visible<4->{$\to$ he VP$_2$ PP$_1$}\\ \visible<5->{\alert{$r_{1,5}$}} & \visible<5->{IP(NP(PN(他)) } \\ \visible<5->{\alert{\ $^{,6,7}$}} & \visible<5->{VP(P$_1$ NP$_2$ VP$_3$))}\\ & \visible<5->{$\to$ he VP$_3$ P$_1$ NP$_2$}\\ & \\ \end{tabular} \renewcommand*{\arraystretch}{1.0} } \end{minipage} \hfill \begin{minipage}[t]{0.47\textwidth} \begin{tikzpicture} {\scriptsize \begin{scope} \visible<2->{ \begin{scope}[level distance=20pt] \node[scale=0.8, inner sep=0.1cm,align=center,draw] (cfrag1) at (0,0.25) {\Tree[.\node(sn1){NP}; [.\node(sn2){PN}; 他 ]]}; \end{scope} \begin{scope}[level distance=20pt] \node[scale=0.8, inner sep=0.1cm,align=center,anchor=south west,draw] (cfrag2) at ([xshift=1.2em]cfrag1.south east) {\Tree[.\node(sn3){P}; 对 ]}; \end{scope} \begin{scope}[level distance=20pt] \node[scale=0.8, inner sep=0.1cm,align=center,anchor=south west,draw] (cfrag3) at ([xshift=1.2em]cfrag2.south east) {\Tree[.\node(sn4){NP}; [.NN 回答 ]]}; \end{scope} \begin{scope}[sibling distance=15pt,level distance=20pt] \node[scale=0.8, inner sep=0.1cm,align=center,anchor=south west,draw] (cfrag4) at ([xshift=1.4em]cfrag3.south east) {\Tree[.\node(sn5){VP}; [.\node(sn6){VV}; 表示 ] [.\node(sn7){NN}; 满意 ]]}; \end{scope} \begin{scope}[sibling distance=20pt,,level distance=25pt] \node[scale=0.8, inner sep=0.1cm,align=center,anchor=south west,draw] (cfrag5) at ([xshift=0.3em,yshift=2.2em]cfrag2.north west) {\Tree[.\node(sn8){PP}; [.\node(sn9){P}; ] [.\node(sn10){NP}; ]]}; \end{scope} \begin{scope}[sibling distance=60pt] \node[scale=0.8, inner sep=0.1cm,align=center,anchor=south west,draw] (cfrag6) at ([xshift=1.6em,yshift=0.5em]cfrag5.north west) {\Tree[.\node(sn11){VP}; [.\node(sn12){PP}; ] [.\node(sn13){VP}; ]]}; \end{scope} \begin{scope}[sibling distance=80pt,level distance=18pt] \node[scale=0.8, inner sep=0.1cm,align=center,anchor=south east,draw] (cfrag7) at ([xshift=-3.6em,yshift=0.5em]cfrag6.north east) {\Tree[.\node(sn14){IP}; [.\node(sn15){NP}; ] [.\node(sn16){VP}; ]]}; \end{scope} \node[scale=0.9,anchor=north,minimum size=18pt] (tw11) at ([xshift=-0.3em,yshift=-1.2em]cfrag1.south){he}; \node[scale=0.9,anchor=west,minimum size=18pt] (tw12) at ([yshift=-0.1em,xshift=0.5em]tw11.east){was}; \node[scale=0.9,anchor=west,minimum size=18pt] (tw13) at ([yshift=0.1em,xshift=0.5em]tw12.east){satisfied}; \node[scale=0.9,anchor=west,minimum size=18pt] (tw14) at ([xshift=0.5em]tw13.east){with}; \node[scale=0.9,anchor=west,minimum size=18pt] (tw15) at ([xshift=0.5em]tw14.east){the}; \node[scale=0.9,anchor=west,minimum size=18pt] (tw16) at ([yshift=-0.1em,xshift=0.5em]tw15.east){answer}; \draw[dashed] ([xshift=-0.3em]cfrag1.south) -- ([yshift=-0.3em]tw11.north); \draw[dashed] (cfrag2.south) -- ([yshift=-0.4em]tw14.north); \draw[dashed] (cfrag3.south) -- ([yshift=-0.4em]tw15.north); \draw[dashed] (cfrag3.south) -- ([yshift=-0.4em]tw16.north); \draw[dashed] (cfrag4.south) .. controls +(south:0.6) and +(north:0.6) .. ([yshift=-0.4em]tw13.north); \draw[*-*] ([xshift=0.0em,yshift=-0.2em]cfrag1.north) -- ([xshift=0.0em,yshift=10.0em]cfrag1.north); \draw[*-*] ([xshift=0.1em,yshift=-0.2em]cfrag2.north) -- ([xshift=0.1em,yshift=2.5em]cfrag2.north); \draw[*-*] ([xshift=0.1em,yshift=-0.4em]cfrag3.north) -- ([xshift=0.1em,yshift=0.7em]cfrag3.north); \draw[*-*] ([xshift=0.0em,yshift=-0.2em]cfrag4.north) -- ([xshift=0.0em,yshift=5.1em]cfrag4.north); \draw[*-*] ([xshift=0.1em,yshift=-0.2em]cfrag5.north) -- ([xshift=0.1em,yshift=0.9em]cfrag5.north); \draw[*-*] ([xshift=0.0em,yshift=-0.2em]cfrag6.north) -- ([xshift=0.0em,yshift=0.9em]cfrag6.north); \visible<3->{ \node [fill=blue,circle,inner sep=2pt] (rlabel1) at (cfrag1.south east) {{\color{white} \tiny{1}}}; \draw[*-*,red,thick] ([xshift=0.0em,yshift=-0.2em]cfrag1.north) -- ([xshift=0.0em,yshift=10.0em]cfrag1.north); } \visible<5->{ \node [fill=blue,circle,inner sep=2pt] (rlabel5) at (cfrag5.north west) {{\color{white} \tiny{5}}}; \draw[*-*,red] ([xshift=0.1em,yshift=-0.2em]cfrag5.north) -- ([xshift=0.1em,yshift=0.9em]cfrag5.north); } \visible<4->{ \node [fill=blue,circle,inner sep=2pt] (rlabel6) at (cfrag6.north east) {{\color{white} \tiny{6}}}; \draw[*-*,red] ([xshift=0.0em,yshift=-0.2em]cfrag6.north) -- ([xshift=0.0em,yshift=0.9em]cfrag6.north); } \visible<3->{ \node [fill=blue,circle,inner sep=2pt] (rlabel7) at (cfrag7.south west) {{\color{white} \tiny{7}}}; } \begin{pgfonlayer}{background} \visible<3->{ \node [fill=green!20,inner sep=0pt] (cfrag1back) [fit = (cfrag1)] {}; \node [fill=green!20,inner sep=0pt] (cfrag1back) [fit = (cfrag7)] {}; } \visible<4->{ \node [fill=green!20,inner sep=0pt] (cfrag1back) [fit = (cfrag6)] {}; } \visible<5->{ \node [fill=green!20,inner sep=0pt] (cfrag1back) [fit = (cfrag5)] {}; } \end{pgfonlayer} } \end{scope} } \end{tikzpicture} \end{minipage} \end{frame} %%%------------------------------------------------------------------------------------------------------------ %%% SPMT规则 \begin{frame}{更多的规则 - SPMT规则} \begin{itemize} \item 任意大小的规则都可以通过组合的方式获取,但是组合过多数量的规则会大大增加规则集大小。SPMT一种高效地获得大颗粒度规则的方法 (Marcu et al., 2006) \begin{itemize} \item 先抽取短语,之后找到覆盖这个短语的可信节点 \item 以这个可信节点做根,生成包含该短语的规则 \end{itemize} \end{itemize} \vspace{-0.1em} \begin{minipage}[b]{0.47\textwidth} {\footnotesize \visible<3->{ 对于任意一个与词对齐兼容的短语,可以找到包含它的``最小''翻译规则,即SPMT规则,比如\\ \vspace{-0.4em} 对 形式 $\to$ about the situation \\ \vspace{-0.4em} 可以很容易得到它的SPMT规则 \\ \vspace{-0.4em} VP(P(对) NP(NN(局势)) VP$_1$) $\to$ VP$_1$ about the situation \\ } \visible<4->{ \vspace{-0.3em} 但是,如果用组合的方式,需要三条最小规则才能得到这条规则\\ } \vfill } \end{minipage} \hfill \begin{minipage}[t]{0.47\textwidth} \begin{tikzpicture} {\scriptsize \begin{scope} \visible<2->{ \begin{scope}[level distance=20pt] \node[scale=0.8, inner sep=0.1cm,align=center,draw] (cfrag1) at (0,0.25) {\Tree[.\node(sn1){NP}; [.\node(sn2){PN}; 他 ]]}; \end{scope} \begin{scope}[level distance=20pt] \node[scale=0.8, inner sep=0.1cm,align=center,anchor=south west,draw] (cfrag2) at ([xshift=1.2em]cfrag1.south east) {\Tree[.\node(sn3){P}; 对 ]}; \end{scope} \begin{scope}[level distance=20pt] \node[scale=0.8, inner sep=0.1cm,align=center,anchor=south west,draw] (cfrag3) at ([xshift=1.2em]cfrag2.south east) {\Tree[.\node(sn4){NP}; [.NN 形式 ]]}; \end{scope} \begin{scope}[sibling distance=15pt,level distance=20pt] \node[scale=0.8, inner sep=0.1cm,align=center,anchor=south west,draw] (cfrag4) at ([xshift=1.4em]cfrag3.south east) {\Tree[.\node(sn5){VP}; [.\node(sn6){VV}; 表示 ] [.\node(sn7){NN}; 担心 ]]}; \end{scope} \begin{scope}[sibling distance=32pt,level distance=20pt] \node[scale=0.8, inner sep=0.1cm,align=center,anchor=south west,draw] (cfrag6) at ([xshift=0.3em,yshift=4em]cfrag2.north west) {\Tree[.\node(sn11){VP}; [.\node(sn9){P}; ] [.\node(sn10){NP}; ] [.\node(sn13){VP}; ]]}; \end{scope} \begin{scope}[sibling distance=65pt,level distance=18pt] \node[scale=0.8, inner sep=0.1cm,align=center,anchor=south east,draw] (cfrag7) at ([xshift=-4.5em,yshift=0.5em]cfrag6.north east) {\Tree[.\node(sn14){IP}; [.\node(sn15){NP}; ] [.\node(sn16){VP}; ]]}; \end{scope} \node[scale=0.9,anchor=north,minimum size=18pt] (tw11) at ([xshift=-0.3em,yshift=-1.2em]cfrag1.south){he}; \node[scale=0.9,anchor=west,minimum size=18pt] (tw12) at ([yshift=-0.1em,xshift=0.5em]tw11.east){was}; \node[scale=0.9,anchor=west,minimum size=18pt] (tw13) at ([yshift=0.1em,xshift=0.5em]tw12.east){worried}; \node[scale=0.9,anchor=west,minimum size=18pt] (tw14) at ([xshift=0.5em]tw13.east){about}; \node[scale=0.9,anchor=west,minimum size=18pt] (tw15) at ([xshift=0.5em]tw14.east){the}; \node[scale=0.9,anchor=west,minimum size=18pt] (tw16) at ([yshift=-0.1em,xshift=0.5em]tw15.east){situation}; \draw[dashed] ([xshift=-0.3em]cfrag1.south) -- ([yshift=-0.3em]tw11.north); \draw[dashed] (cfrag2.south) -- ([yshift=-0.4em]tw14.north); \draw[dashed] (cfrag3.south) -- ([yshift=-0.4em]tw15.north); \draw[dashed] (cfrag3.south) -- ([yshift=-0.4em]tw16.north); \draw[dashed] (cfrag4.south) .. controls +(south:0.6) and +(north:0.6) .. ([yshift=-0.4em]tw13.north); \visible<3->{ \draw[dashed,red] (cfrag2.south) -- ([yshift=-0.4em]tw14.north); \draw[dashed,red] (cfrag3.south) -- ([yshift=-0.4em]tw15.north); \draw[dashed,red] (cfrag3.south) -- ([yshift=-0.4em]tw16.north); } \draw[*-*] ([xshift=0.0em,yshift=-0.2em]cfrag1.north) -- ([xshift=0.0em,yshift=6.5em]cfrag1.north); \draw[*-*] ([xshift=-0.1em,yshift=-0.2em]cfrag2.north) -- ([xshift=-0.1em,yshift=4.4em]cfrag2.north); \draw[*-*] ([xshift=0.1em,yshift=-0.4em]cfrag3.north) .. controls +(north:2.4em) and +(south:2.4em) .. ([xshift=1.1em,yshift=2.5em]cfrag3.north); \draw[*-*] ([xshift=0.0em,yshift=-0.2em]cfrag4.north) -- ([xshift=0.0em,yshift=2.5em]cfrag4.north); \draw[*-*] ([xshift=0.0em,yshift=-0.2em]cfrag6.north) -- ([xshift=0.0em,yshift=0.9em]cfrag6.north); \visible<4->{ \node [fill=blue,circle,inner sep=2pt] (rlabel2) at (cfrag2.north east) {{\color{white} \tiny{2}}}; \node [fill=blue,circle,inner sep=2pt] (rlabel3) at (cfrag3.north east) {{\color{white} \tiny{3}}}; \node [fill=blue,circle,inner sep=2pt] (rlabel6) at (cfrag6.north east) {{\color{white} \tiny{5}}}; } \begin{pgfonlayer}{background} \visible<4->{ \node [fill=green!20,inner sep=0pt] (cfrag2back) [fit = (cfrag2)] {}; \node [fill=green!20,inner sep=0pt] (cfrag3back) [fit = (cfrag3)] {}; \node [fill=green!20,inner sep=0pt] (cfrag6back) [fit = (cfrag6)] {}; } \visible<3->{ \node [anchor=south west,draw=red,thick,fill=red!20,inner sep=0pt,minimum height = 2em, minimum width=6em] (ps) at ([xshift=-0.2em,yshift=-0.2em]cfrag2.south west) {}; \node [anchor=south west,draw=red,thick,fill=red!20,inner sep=0pt] (pt) [fit = (tw14) (tw15) (tw16)] {}; } \end{pgfonlayer} } \end{scope} } \end{tikzpicture} \end{minipage} \end{frame} %%%------------------------------------------------------------------------------------------------------------ %%% tree binarization \begin{frame}{更多的规则 - 句法树二叉化} \begin{itemize} \item 句法分析器生成的句法树可能会非常平坦,这会导致抽取的规则很``大''而且规则无法继续被分解 \begin{itemize} \item 比如,在CTB中经常会看到很宽的子树结构 \end{itemize} \end{itemize} \begin{center} \begin{tikzpicture} {\scriptsize \begin{scope}[scale = 0.9, sibling distance=20pt, level distance=30pt] {\footnotesize \Tree[.IP [.NP ] [.VP ] [., ] [.VP ] [., ] [.VP ] [., ] [.VP ] [.{.{\color{white} V}} ] ] } \end{scope} } \end{tikzpicture} \end{center} \begin{itemize} \item<2-> 一个例子 \end{itemize} \vspace{-1.0em} \begin{center} \begin{tikzpicture} \visible<2->{ {\scriptsize \begin{scope}[sibling distance=4pt, level distance=25pt] {\footnotesize \Tree[.\node(n1){NP}; [.NNP \node(sw1){美国}; ] [.NN \node(sw2){总统}; ] [.NN \node(sw3){唐纳德}; ] [.NN \node(sw4){特朗普}; ] ] } \node [anchor=north] (tw1) at ([yshift=-2em]sw1.south) {U.S.}; \node [anchor=north] (tw2) at ([yshift=-2em]sw2.south) {President}; \node [anchor=north] (tw3) at ([yshift=-2em]sw3.south) {Trump}; \draw [-,dashed] (sw1.south) -- (tw1.north); \draw [-,dashed] (sw2.south) -- (tw2.north); \draw [-,dashed] (sw3.south) -- (tw3.north); \draw [-,dashed] (sw4.south) -- (tw3.north); \node [anchor=west] (rulelabel1) at ([xshift=1in,yshift=0.3em]n1.east) {\footnotesize{\textbf{抽取到的规则:}}}; \node [anchor=north west] (rule1) at (rulelabel1.south west) {NP(NNP$_1$ NN$_2$ NN(唐纳德) NN(特朗普))}; \node [anchor=north west] (rule1t) at ([yshift=0.2em]rule1.south west) {$\to$ NNP$_1$ NN$_2$ Trump}; \node [anchor=north west] (rule2) at (rule1t.south west) {NP(NNP$_1$ NN(总统) NN(唐纳德) NN(特朗普))}; \node [anchor=north west] (rule2t) at ([yshift=0.2em]rule2.south west) {$\to$ NNP$_1$ President Trump}; \node [anchor=north west] (rulelabel2) at ([yshift=-0.3em]rule2t.south west) {\footnotesize{\textbf{\alert{不能}抽取到的规则:}}}; \node [anchor=north west] (rule3) at (rulelabel2.south west) {NP(NN(唐纳德) NN(特朗普)) $\to$ Trump}; \end{scope} } } \end{tikzpicture} \end{center} \end{frame} %%%------------------------------------------------------------------------------------------------------------ %%% tree binarization (cont.) \begin{frame}{更多的规则 - 句法树二叉化(续)} \begin{itemize} \item 一种解决问题的思路是用二叉化方法把树结构变得更深 \vspace{-0.5em} \begin{center} \begin{tikzpicture} {\scriptsize \begin{scope}[sibling distance=4pt, level distance=25pt] \Tree[.\node(n1){NP}; [.NNP \node(sw1){美国}; ] [.NN \node(sw2){总统}; ] [.NN \node(sw3){唐纳德}; ] [.NN \node(sw4){特朗普}; ] ] \node [anchor=north] (tw1) at ([yshift=-2em]sw1.south) {U.S.}; \node [anchor=north] (tw2) at ([yshift=-2em]sw2.south) {President}; \node [anchor=north] (tw3) at ([yshift=-2em]sw3.south) {Trump}; \draw [-,dashed] (sw1.south) -- (tw1.north); \draw [-,dashed] (sw2.south) -- (tw2.north); \draw [-,dashed] (sw3.south) -- (tw3.north); \draw [-,dashed] (sw4.south) -- (tw3.north); \draw [->,very thick] ([xshift=1em]sw4.east) -- ([xshift=5em]sw4.east) node [pos=0.5,above] {\tiny{二叉化}}; \end{scope} \begin{scope}[xshift=2.2in,sibling distance=10pt, level distance=15pt] \Tree[.\node(n1){NP}; [.NNP \node(sw1){美国}; ] [.NP-BAR [.NN \node(sw2){总统}; ] [.NP-BAR [.NN \node(sw3){唐纳德}; ] [.NN \node(sw4){特朗普}; ] ] ] ] \node [anchor=north] (tw1) at ([yshift=-4.5em]sw1.south) {U.S.}; \node [anchor=north] (tw2) at ([yshift=-2.75em]sw2.south) {President}; \node [anchor=north] (tw3) at ([yshift=-1em]sw3.south) {Trump}; \draw [-,dashed] (sw1.south) -- (tw1.north); \draw [-,dashed] (sw2.south) -- (tw2.north); \draw [-,dashed] (sw3.south) -- (tw3.north); \draw [-,dashed] (sw4.south) -- (tw3.north); \end{scope} } \end{tikzpicture} \end{center} \visible<2->{ \small{二叉化增加了更多的可信节点,这也带来了新的规则} \begin{center} {\footnotesize \vspace{0.3em} NP-BAR(NN(唐纳德) NN(特朗普)) $\to$ Trump \\ \vspace{0.3em} NP-BAR(NN$_1$ NP-BAR$_2$) $\to$ NN$_1$ NP-BAR$_2$ \vspace{0.3em} } \end{center} } \item<3-> 树二叉化已经成为基于句法机器翻译模型的常用方法 \begin{itemize} \item 有很多策略:左优先、右优先、head优先等等 \item 二叉化可以得到更多(细粒度)规则,保证规则的覆盖度 \end{itemize} \end{itemize} \end{frame} \subsection{引入双语句法信息} %%%------------------------------------------------------------------------------------------------------------ %%% 树到树规则抽取 \begin{frame}{引入双语句法信息} \begin{itemize} \item 对于树到树模型,源语和目标语端都有句法树,需要使用树片段到树片段的映射来描述翻译过程,这种映射关系被描述为树到树翻译规则。这里,把\\ \vspace{-1.3em} \begin{eqnarray} \langle\ \textrm{VP}, \textrm{VP}\ \rangle & \to & \langle\ \textrm{VP(PP}_{1}\ \textrm{VP(VV(表示) NN}_{2})), \nonumber \\ & & \ \ \textrm{VP(VBZ(was) VP(VBZ}_{2}\ \textrm{PP}_{1}))\ \rangle \nonumber \end{eqnarray} 表示为\alert{树片段到树片段}的映射形式\\ \vspace{-1.3em} \begin{eqnarray} & & \textrm{VP(PP}_{1}\ \textrm{VP(VV(表示) NN}_{2})) \nonumber \\ & \to & \textrm{VP(VBZ(was) VP(VBZ}_{2}\ \textrm{PP}_{1})) \nonumber \end{eqnarray} \item<2-> 可以通过扩展GHKM方法进行树到树规则抽取 \begin{itemize} \item 双语端进行可信节点的识别,之后找到节点之间的对应 \item 基于对应的节点获得树片段的对应,即抽取树到树规则 \item 规则组合、SPMT等方法同样适用 \end{itemize} \end{itemize} \end{frame} %%%------------------------------------------------------------------------------------------------------------ %%% 方法1:利用词对齐归纳句法映射 \begin{frame}{方法1:利用词对齐归纳树到树规则} \begin{itemize} \item 简单直接的方法是把GHKM方法扩展到双语的情况,利用词对齐归纳树到树映射 \begin{itemize} \item<3-> 但是词对齐的错误往往会导致很多规则无法抽取 \end{itemize} \end{itemize} \begin{minipage}[c][5cm][t]{0.47\textwidth} \begin{center} \begin{tikzpicture} \begin{scope} \begin{scope}[scale=0.65, level distance=27pt] \Tree[.S [.NP [.DT \node(ew1){the}; ] [.NNS \node(ew2){imports}; ] ] [.VP [.VBZ \node(ew3){have}; ] [.ADVP [.RB \node(ew4){drastically}; ] [.VBN \node(ew5){fallen}; ] ] ] ] \end{scope} \begin{scope}[scale=0.65, level distance=27pt, grow'=up, xshift=-13pt, yshift=-3.5in, sibling distance=22pt] \Tree[.IP [.NN \node(cw1){进口}; ] [.VP [.AD \node(cw2){大幅度}; ] [.VP [.VV \node(cw3){下降}; ] [.AS \node(cw4){了}; ] ] ] ] \end{scope} \visible<2->{ \draw[-, dashed] (cw1) -- (ew2); \draw[-, dashed] (cw2) -- (ew4); \draw[-, dashed] (cw3) -- (ew5); \draw[-, dashed] (cw4) .. controls +(north:1.0) and +(south:1.6) .. (ew1); } \visible<3->{ \draw[-, red, dashed,thick] (cw4) .. controls +(north:1.0) and +(south:1.6) .. (ew1); } \end{scope} \end{tikzpicture} \end{center} \end{minipage} \begin{minipage}[c][5cm][t]{0.50\textwidth} \visible<2->{ \renewcommand*{\arraystretch}{0.95} \begin{tabular}{l l} \multicolumn{2}{l}{\textbf{\scriptsize{抽取得到的规则}}} \\ \hline \scriptsize{$r_1$} & \scriptsize{AS(了) $\rightarrow$ DT(the)} \\ \scriptsize{$r_2$} & \scriptsize{NN(进口) $\rightarrow$ NNS(imports)} \\ \scriptsize{$r_3$} & \scriptsize{AD(大幅度) $\rightarrow$ RB(drastically)} \\ \scriptsize{$r_4$} & \scriptsize{VV(下降) $\rightarrow$ VBN(fallen)} \\ \scriptsize{$r_5$} & \scriptsize{IP(NN$_1$ VP(AD$_2$ VP(VV$_3$ AS$_4$)) $\rightarrow$} \\ \multicolumn{2}{l}{\tiny{S(NP(DT$_4$ NNS$_1$) VP(VBZ(have) ADVP(RB$_2$ VBN$_3$))}} \\ \end{tabular} \renewcommand*{\arraystretch}{1.0} } \visible<3->{ \vspace{0.5em} \renewcommand*{\arraystretch}{0.95} \begin{tabular}{l l} \multicolumn{2}{l}{\textbf{\scriptsize{无法得到的规则}}} \\ \hline \scriptsize{$r_{?}$} & \scriptsize{AS(了) $\rightarrow$ VBZ(have)} \\ \scriptsize{$r_{?}$} & \scriptsize{NN(进口) $\rightarrow$} \\ & \scriptsize{NP(DT(the) NNS(imports))} \\ \scriptsize{$r_{?}$} & \scriptsize{IP(NN$_1$ VP$_2$) $\rightarrow$ S(NP$_1$ VP$_2$)} \\ \end{tabular} \renewcommand*{\arraystretch}{1.0} } \end{minipage} \end{frame} %%%------------------------------------------------------------------------------------------------------------ %%% 方法2:直接进行节点对齐然后归纳句法映射 \begin{frame}{方法2:利用节点对齐抽取树到树规则} \begin{itemize} \item 另一种思路是直接获取源语言树节点到目标语树节点的对应关系,然后直接抽取规则,这样可避免词对齐错误 \begin{itemize} \item 节点对其可以更准确的捕捉双语结构的对应 \end{itemize} \end{itemize} \begin{minipage}[c][5cm][t]{0.47\textwidth} \begin{center} \begin{tikzpicture} \only<1>{ \begin{scope} \begin{scope}[scale=0.65, level distance=27pt] \Tree[.S [.NP [.DT \node(ew1){the}; ] [.NNS \node(ew2){imports}; ] ] [.VP [.VBZ \node(ew3){have}; ] [.ADVP [.RB \node(ew4){drastically}; ] [.VBN \node(ew5){fallen}; ] ] ] ] \end{scope} \begin{scope}[scale=0.65, level distance=27pt, grow'=up, xshift=-13pt, yshift=-3.5in, sibling distance=22pt] \Tree[.IP [.NN \node(cw1){进口}; ] [.VP [.AD \node(cw2){大幅度}; ] [.VP [.VV \node(cw3){下降}; ] [.AS \node(cw4){了}; ] ] ] ] \end{scope} \draw[-, dashed] (cw1) -- (ew2); \draw[-, dashed] (cw2) -- (ew4); \draw[-, dashed] (cw3) -- (ew5); \draw[-, dashed] (cw4) .. controls +(north:1.0) and +(south:1.6) .. (ew1); \end{scope} } \begin{scope} \visible<2->{ \begin{scope}[scale=0.65, level distance=27pt] \Tree[.\node[draw](en1){S}; [.\node[draw](en2){NP}; [.DT the ] [.NNS imports ] ] [.\node[draw](en3){VP}; [.\node[draw](en4){VBZ}; have ] [.ADVP [.\node[draw](en5){RB}; drastically ] [.\node[draw](en6){VBN}; fallen ] ] ] ] \end{scope} \begin{scope}[scale=0.65, level distance=27pt, grow'=up, xshift=-13pt, yshift=-3.5in, sibling distance=22pt] \Tree[.\node[draw](cn1){\ \ IP\ \ }; [.\node[draw](cn2){NN}; 进口 ] [.\node[draw](cn3){VP}; [.\node[draw](cn4){AD}; 大幅度 ] [.VP [.\node[draw](cn5){VV}; 下降 ] [.\node[draw](cn6){AS}; 了 ] ] ] ] \end{scope} } \visible<3->{ \draw[latex-latex, dotted, thick, red] (cn4.east) .. controls +(east:0.5) and +(west:0.5) .. (en5.west); \draw[latex-latex, dotted, thick, red] (cn5.east) .. controls +(east:0.5) and +(south:0.5) .. (en6.south west); \draw[latex-latex, dotted, thick, red] (cn6.north west) .. controls +(north:1.5) and +(south:2.5) .. (en4.south west); \draw[latex-latex, dotted, thick, red] (cn3.north west) -- (en3.south west); \draw[latex-latex, dotted, thick, red] (cn2.west) .. controls +(west:0.6) and +(west:0.6) .. (en2.west); \draw[latex-latex, dotted, thick, red] (cn1.north west) .. controls +(north:4) and +(south:5.5) .. (en1.south west); } \end{scope} \end{tikzpicture} \end{center} \end{minipage} \begin{minipage}[c][5cm][t]{0.50\textwidth} \only<1>{ \renewcommand*{\arraystretch}{0.9} \begin{tabular}{l l} \multicolumn{2}{l}{\textbf{\scriptsize{抽取得到的规则(词对齐)}}} \\ \hline \scriptsize{$r_1$} & \scriptsize{AS(了) $\rightarrow$ DT(the)} \\ \scriptsize{$r_2$} & \scriptsize{NN(进口) $\rightarrow$ NNS(imports)} \\ \scriptsize{$r_3$} & \scriptsize{AD(大幅度) $\rightarrow$ RB(drastically)} \\ \scriptsize{$r_4$} & \scriptsize{VV(下降) $\rightarrow$ VBN(fallen)} \\ \scriptsize{$r_5$} & \scriptsize{IP(NN$_1$ VP(AD$_2$ VP(VV$_3$ AS$_4$)) $\rightarrow$} \\ \multicolumn{2}{l}{\tiny{S(NP(DT$_4$ NNS$_1$) VP(VBZ(have) ADVP(RB$_2$ VBN$_3$))}} \\ \end{tabular} \renewcommand*{\arraystretch}{1.0} } \visible<4->{ \renewcommand*{\arraystretch}{0.9} \begin{tabular}{l l} \multicolumn{2}{l}{\textbf{\scriptsize{抽取得到的规则(子树对齐)}}} \\ \hline {\color{gray!70} \scriptsize{$r_1$}} & {\color{gray!70} \scriptsize{AS(了) $\rightarrow$ DT(the)}} \\ {\color{gray!70} \scriptsize{$r_2$}} & {\color{gray!70}\scriptsize{NN(进口) $\rightarrow$ NNS(imports)}} \\ \scriptsize{$r_3$} & \scriptsize{AD(大幅度) $\rightarrow$ RB(drastically)} \\ \scriptsize{$r_4$} & \scriptsize{VV(下降) $\rightarrow$ VBN(fallen)} \\ {\color{gray!70} \scriptsize{$r_5$}} & {\color{gray!70} \scriptsize{IP(NN$_1$ VP(AD$_2$ VP(VV$_3$ AS$_4$)) $\rightarrow$}} \\ \multicolumn{2}{l}{{\color{gray!70} \tiny{S(NP(DT$_4$ NNS$_1$) VP(VBZ(have) ADVP(RB$_2$ VBN$_3$))}}} \\ \alert{\scriptsize{$r_6$}} & \alert{\scriptsize{AS(了) $\rightarrow$ VBZ(have)}} \\ \alert{\scriptsize{$r_7$}} & \alert{\scriptsize{NN(进口) $\rightarrow$ }} \\ & \alert{\scriptsize{NP(DT(the) NNS(imports))}}\\ \alert{\scriptsize{$r_8$}} & \alert{\scriptsize{VP(AD$_1$ VP(VV$_2$ AS$_3$)) $\rightarrow$}} \\ & \alert{\scriptsize{VP(VBZ$_3$ ADVP(RB$_1$ VBN$_2$)}} \\ \alert{\scriptsize{$r_9$}} & \alert{\scriptsize{IP(NN$_1$ VP$_2$) $\rightarrow$ S(NP$_1$ VP$_2$)}} \\ \end{tabular} \renewcommand*{\arraystretch}{1.0} } \end{minipage} \end{frame} %%%------------------------------------------------------------------------------------------------------------ %%% 抽取更多的规则:节点对齐矩阵 \begin{frame}{节点对齐矩阵} \begin{itemize} \item 节点对齐的自动获取:1)基于分类模型的方法;2)无指导节点对齐的方法 \item 使用节点对齐的另一个好处是,我们可以直接用节点对齐矩阵进行规则抽取,而不是用单一的对齐结果 \begin{itemize} \item 对齐矩阵可以帮助抽取更多样的规则 \end{itemize} \end{itemize} \vspace{-0.2em} \centering \begin{tikzpicture} \begin{scope}[scale=0.7] \begin{scope}[sibling distance=17pt, level distance=25pt] \Tree[.\node(en1){VP$^{[1]}$}; [.\node(en2){VBZ$^{[2]}$}; have ] [.\node(en3){ADVP$^{[3]}$}; [.\node(en4){RB$^{[4]}$}; drastically ] [.\node(en5){VBN$^{[5]}$}; fallen ] ] ] \end{scope} \begin{scope}[grow'=up, yshift=-2.7in, sibling distance=32pt, level distance=25pt] \Tree[.\node(cn1){VP$^{[1]}$}; [.\node(cn2){AD$^{[2]}$}; 大幅度 ] [.\node(cn3){VP$^{[3]}$}; [.\node(cn4){VV$^{[4]}$}; 下降 ] [.\node(cn5){AS$^{[5]}$}; 了 ] ] ] \end{scope} \begin{scope}[xshift=1.7in, yshift=-0.4in] {\footnotesize \node[anchor=west, rotate=60] at (0.8,-0.6) {VP$^{[1]}$}; \node[anchor=west, rotate=60] at (1.8,-0.6) {VBZ$^{[2]}$}; \node[anchor=west, rotate=60] at (2.8,-0.6) {ADVP$^{[3]}$}; \node[anchor=west, rotate=60] at (3.8,-0.6) {RB$^{[4]}$}; \node[anchor=west, rotate=60] at (4.8,-0.6) {VBN$^{[5]}$}; \node[] at (6.2,-1) {VP$^{[1]}$}; \node[] at (6.2,-2) {AD$^{[2]}$}; \node[] at (6.2,-3) {VP$^{[3]}$}; \node[] at (6.2,-4) {VV$^{[4]}$}; \node[] at (6.2,-5) {AS$^{[5]}$}; } \foreach \i in {1,...,5}{ \foreach \j in {-5,...,-1}{ \node[fill=blue,scale=0.2] at (\i,\j) {}; } } \visible<2-3>{ \node[fill=blue, scale=1.2] at (1,-1) {}; \node[fill=blue, scale=1.2] at (4,-2) {}; \node[fill=blue, scale=1.2] at (2,-5) {}; } \visible<2>{ \node[fill=blue, scale=1.2] at (5,-4) {}; } \visible<3>{ \node[fill=red, scale=1.2] at (5,-4) {}; } \visible<4-5>{ \node[fill=blue, scale=1.1] at (1,-1) {}; \node[fill=blue, scale=0.5] at (1,-3) {}; \node[fill=blue, scale=0.6] at (2,-2) {}; \node[fill=blue, scale=0.7] at (2,-3) {}; \node[fill=blue, scale=0.7] at (2,-5) {}; \node[fill=blue, scale=0.4] at (3,-1) {}; \node[fill=blue, scale=0.6] at (3,-2) {}; \node[fill=blue, scale=0.5] at (3,-3) {}; \node[fill=blue, scale=0.9] at (4,-2) {}; \node[fill=blue, scale=0.7] at (5,-3) {}; \node[fill=blue, scale=0.4] at (5,-5) {}; } \visible<4>{ \node[fill=blue, scale=0.6] at (3,-4) {}; \node[fill=blue, scale=0.8] at (5,-4) {}; } \visible<5>{ \node[fill=red, scale=0.6] at (3,-4) {}; \node[fill=red, scale=0.8] at (5,-4) {}; } \visible<2-3>{ \node[] at (4,-5.8) {\footnotesize{{\color{blue} $\blacksquare$} = extractable node-pair}}; } \visible<4-5>{ \node[] at (4,-5.8) {\footnotesize{{\color{blue} $\blacksquare$} = possible alignment}}; } \end{scope} \visible<3>{\draw[<->, red, thick] (cn4.east) .. controls +(east:0.9) and +(west:0.9) .. (en5.west);} \visible<5>{\draw[<->, red, dotted, very thick] (cn4.east) .. controls +(east:0.9) and +(west:0.9) .. (en5.west);} \visible<5>{\draw[<->, red, dotted, very thick] (cn4.west) .. controls +(west:1.0) and +(west:2) .. (en3.west);} \end{scope} \end{tikzpicture} \end{frame} \subsection{翻译特征} %%%------------------------------------------------------------------------------------------------------------ %%% 翻译特征 \begin{frame}{特征} \begin{itemize} \item 与短语和层次短语模型一样,句法模型也使用判别式模型进行建模 - $\textrm{P}(d,\textbf{t}|\textbf{s}) = \frac{\exp(\sum_{i=1}^{M} \lambda_i \cdot h_i(d,\textbf{s},\textbf{t}))}{\sum_{d',t'}\exp(\sum_{i=1}^{M} \lambda_i \cdot h_i(d',\textbf{s},\textbf{t}'))}$。其中特征权重$\{\lambda_i\}$可以使用最小错误率训练进行调优,特征函数$\{h_i\}$需要用户定义。 \item<2-> 这里,所有规则满足$\langle\ \alpha_h, \beta_h\ \rangle \to \langle\ \alpha_r, \beta_r, \sim\ \rangle$的形式 \begin{itemize} \item $\alpha_h$和$\beta_h$是规则左部的源语和目标语部分,对应树结构的根节点 \item $\alpha_r$和$\beta_r$是规则右部的源语和目标语部分,对应树结构 \item $\sim$表示$\alpha_r$和$\beta_r$中叶子非终结符的对应 \item 此外,定义$\tau(\alpha_r)$和$\tau(\beta_r)$为源语和目标语树结构的叶子节点序列。例如,对于规则$\langle\ \textrm{VP}, \textrm{VP}\ \rangle \to \langle\ \textrm{VP(PP}_{1}\ \textrm{VP(VV(表示) NN}_{2})), \textrm{VP(VBZ(was) VP(VBZ}_{2}\ \textrm{PP}_{1}))$,有 \\ \vspace{-1.5em} \begin{eqnarray} \tau(\alpha_r) & = & \textrm{PP}_1\ \textrm{表示 NN}_2 \nonumber \\ \tau(\beta_r) & = & \textrm{was}\ \textrm{VBZ}_2\ \textrm{PP}_1\nonumber \end{eqnarray} \end{itemize} \end{itemize} \end{frame} %%%------------------------------------------------------------------------------------------------------------ %%% 翻译特征(续) \begin{frame}{特征(续)} \begin{itemize} \item \textbf{特征1-2: 短语翻译概率},即正向翻译概率$\textrm{P}(\tau(\beta_r)|\tau(\alpha_r))$和反向翻译概率$\textrm{P}(\tau(\alpha_r)|\tau(\beta_r))$。这里,$\tau(\alpha_r)$和$\tau(\beta_r)$ 都被看做短语,因此可以直接复用短语系统的方法进行计算。 \item \textbf{特征3-4: 词汇翻译概率},即$\textrm{P}_{\textrm{lex}}(\tau(\beta_r)|\tau(\alpha_r))$和$\textrm{P}_{\textrm{lex}}(\tau(\alpha_r)|\tau(\beta_r))$。可以用短语系统中的词汇翻译概率描述源语和目标语单词对应的情况。 \item<2-> \textbf{特征5: $n$-gram语言模型},即$\textrm{P}_{\textrm{lm}}(\textbf{t})$。度量译文的流畅度,可以使用大规模目标语单语数据得到。 \item<2-> \textbf{特征6:译文长度},即$|\textbf{t}|$。避免模型倾向于短译文,同时让系统自动学习对译文长度的偏好。 \item<2-> \textbf{特征7:翻译规则数量}。这个特征是为了避免模型仅仅使用少量特征构成翻译推导(因为翻译概率相乘,因子少结果一般会大一些),同时让系统自动学习对使用规则数量的偏好。 \end{itemize} \end{frame} %%%------------------------------------------------------------------------------------------------------------ %%% 翻译特征(续) \begin{frame}{特征(续2)} \begin{itemize} \item \textbf{特征8:源语言被翻译为空的单词数量}。注意,空翻译规则(或特征)有时也被称作evil feature,这类特征在一些数据集上对BLEU有很好的提升作用,但是会造成人工评价的下降,因此需要谨慎使用。 \item<2-> \textbf{特征9: 翻译规则生成概率},即$\textrm{P}_{\textrm{rule}}(\alpha_r,\beta_r,\sim|\alpha_h,\beta_h)$。这个特征可以被看做是生成翻译推导的概率。 \item<2-> \textbf{特征10:组合规则的数量}。学习使用组合规则(或最小规则)的偏好。 \item<2-> \textbf{特征11:词汇化规则的数量}。学习使用含有终结符规则的偏好。 \item<2-> \textbf{特征12:低频规则的数量}。学习使用训练数据中出现频次低于3的规则的偏好。低频规则大多并不可靠,这个特征本质上也是为了区分不同质量规则。 \item<3-> \alert{注意!}特征9-12也被看做是句法特征。当然,还有很多很多特征,感兴趣可以自己设计或查阅相关论文。 \end{itemize} \end{frame} %%%------------------------------------------------------------------------------------------------------------ \subsection{基于树和基于串的解码方法} %%%------------------------------------------------------------------------------------------------------------ %%% 解码模型 \begin{frame}{解码} \begin{itemize} \item 翻译时可以用如下式子计算每个推导的模型得分\\ \vspace{-1em} \begin{eqnarray} \textrm{score}(d,\textbf{t},\textbf{s}) & = & \sum_{r\in d} \log(\textrm{score}(r)) + \lambda_{\textrm{lm}}\log(\textrm{P}_{\textrm{lm}}(\textbf{t})) + \lambda_{l} |\textbf{t}| \nonumber \end{eqnarray} 其中,$\textrm{score}(r)$表示每条规则的得分,由特征1-4和特征7-12共同计算得到,因此也可以把特征1-4和特征7-12看做是规则特征 \item<2-> \alert{解码}是要找到使$\textrm{score}(d)$达到最大的翻译推导$d$\\ \begin{displaymath} \hat{d} = \argmax_{d \in D} \textrm{score}(d) \end{displaymath} 其中 $D$表示所有可能的推导构成的搜索空间。广义上来说,由于句法系统引入了非终结符和复杂的规则,它的推导空间会远大于短语系统,因此句法模型的解码器的好坏对性能影响很大 \end{itemize} \end{frame} %%%------------------------------------------------------------------------------------------------------------ %%% 基于树的解码 vs 基于串的解码 \begin{frame}{基于树的解码 vs 基于串的解码} \begin{itemize} \item 前面的公式本质上描述了一种基于串的解码,即对输入的源语言句子通过句法模型进行翻译,得到译文串。不过,搜索所有的推导导致巨大的解码空间。对于树到串和树到树翻译来说,源语言句法树是可见的,因此可以使用另一种解码方法 - 基于树的解码,即把输出入的源语句法树翻译为目标语串\\ \end{itemize} \centering \begin{tabular} {l | l l} \textbf{对比} & \textbf{基于树的解码} & \textbf{基于串的解码} \\ \hline 解码方法 & \parbox{0.35\textwidth}{ \begin{equation} \hat{d} = \argmax_{d \in D_{\textrm{tree}}} \textrm{score}(d) \nonumber \end{equation} } & \parbox{0.35\textwidth}{ \begin{equation} \hat{d} = \argmax_{d \in D} \textrm{score}(d) \nonumber \end{equation} } \\ \hline 搜索空间 & 与输入的源语句法树 & 所有推导$D$ \\ & 兼容的推导$D_{\textrm{tree}}$ & \\ \hline 适用模型 & 树到串、树到树 & 所有句法模型 \\ \hline 解码算法 & chart解码 & CYK + 规则二叉化 \\ \hline 速度 & 快 & 一般较慢 \end{tabular} \end{frame} %%%------------------------------------------------------------------------------------------------------------ %%% 基于树的解码方法 - 超图 \begin{frame}{基于树的解码 - 超图} \begin{itemize} \item 如果源语言输入的是句法树,\alert{基于树的解码}会找到一个推导覆盖整个句法树,之后输出所对应的目标语词串作为译文 \item 比如,可以从树的叶子结点开始,找到所有能匹配到这个节点的规则,当所有节点匹配完之后,本质上获得了一个超图 \begin{itemize} \item<2-> 图的节点对应一个句法树句法节点 \item<2-> 图的边(或者叫超边)对应规则,边的头指向规则左部(源语言端)所对应图节点,边可以有多个尾,每个尾对应规则右部(源语言端)中的一个变量 \end{itemize} \end{itemize} \visible<2->{ \begin{center} \begin{tikzpicture} {\scriptsize \begin{scope}[sibling distance=5pt, level distance=20pt] \Tree[.\node(cn1){VP$^{[1]}$}; [.\node(cn2){AD$^{[2]}$}; 大幅度 ] [.\node(cn3){VP$^{[3]}$}; [.\node(cn4){VV$^{[4]}$}; 下降 ] [.\node(cn5){AS$^{[5]}$}; 了 ] ] ] \node [anchor=south] (treelabel) at (cn1.north) {\scriptsize{源语句法树}}; \end{scope} \begin{scope}[xshift=1.3in,sibling distance=5pt, level distance=20pt] \Tree[.\node(sn1){VP}; [.\node(sn2){AD}; \node(sw1){大幅度}; ] [.\node(sn3){VP};] ] \node [anchor=south] (rulelabel) at ([yshift=0.2em,xshift=4em]sn1.north) {\scriptsize{匹配的翻译规则}}; \end{scope} \draw [->,thick] (sn3.east) -- ([xshift=1em]sn3.east); \node [anchor=west] (rr) at ([xshift=1em]sn3.east) {drastically VP}; \node [anchor=west,circle,draw,inner sep=2pt] (tail) at ([yshift=-3em,xshift=3em]rr.south east) {\tiny{[3]}}; \node [anchor=west,circle,draw,inner sep=2pt] (head) at ([yshift=2.5em,xshift=5em]rr.south east) {\tiny{[1]}}; \draw [-latex,thick] ([yshift=0.1em]tail.60) -- ([yshift=-0.1em]head.260); \node [anchor=north] (headlabel) at ([xshift=0.5em]head.south) {\tiny{头}}; \node [anchor=west] (taillabel) at ([yshift=1.2em,xshift=-0.3em]tail.east) {\tiny{尾}}; \node [anchor=south] (graphlabel) at ([yshift=0.6em]head.north) {\scriptsize{超图}}; \begin{pgfonlayer}{background} \node [fill=green!20,inner sep=2pt,drop shadow] (rulebox) [fit = (sn1) (sn2) (sn3) (sw1) (rr)] {}; \draw [->,dotted,thick] ([yshift=2em]rulebox.east) ..controls +(east:3.5em) and +(west:1.5em).. ([yshift=1em,xshift=0.3em]tail.north); \node [fill=red!20,inner sep=0pt] (nodebox1) [fit = (cn1)] {}; \node [fill=red!20,inner sep=0pt] (nodebox2) [fit = (sn1)] {}; \draw [<->,red] (nodebox1) edge [out=15, in=160] (nodebox2); \end{pgfonlayer} } \end{tikzpicture} \end{center} } \end{frame} %%%------------------------------------------------------------------------------------------------------------ %%% 基于树的解码方法 - chart \begin{frame}{基于树的解码 - chart} \begin{itemize} \item 规则匹配后形成的超图,每个节点可以由两部分信息决定:节点的句法标记 + 跨度 \begin{itemize} \item 这本质上和单语句法分析中的表示方法是一致的 \item 存储形式有很多中,这里采用常用的chart结构,即,用一个二维表存储,其中每一个单元对应一个跨度(span)。同一个跨度的节点都可以放到同一个表单元中,同一表单元的节点用句法标记区分 \end{itemize} \end{itemize} \begin{center} \begin{tikzpicture} \visible<2->{ \begin{scope} \node [anchor=west,circle,inner sep=2pt,draw,fill=red!20] (node1) at (0,0) {\tiny{[1,1]}}; \node [anchor=west,circle,inner sep=2pt,draw,fill=blue!20] (node2) at ([xshift=3.3em]node1.east) {\tiny{[3,3]}}; \node [anchor=north,circle,inner sep=2pt,draw,fill=green!20] (node3) at ([xshift=2.5em,yshift=5em]node1.north) {\tiny{[1,3]}}; \node [anchor=north] (rulelabel) at ([yshift=-0.5em,xshift=2em]node1.south east) {\tiny{规则$r$所对应的超图}}; \draw [-latex] (node1.90) ..controls +(north:3em) and +(south:3em).. (node3.-90); \draw [-latex] (node2.90) ..controls +(north:3em) and +(south:3em).. (node3.-90); \node [anchor=west] (rule1) at ([xshift=6em]node3.east) {\footnotesize{VP(AD$_1$ VP(VV(下降)) AS$_2$)}}; \node [anchor=north west] (rule2) at (rule1.south west) {\footnotesize{$\to$ AS$_2$ AD$_1$ fallen}}; \node [anchor=south] (rulelabel) at ([yshift=-0.4em]rule1.north) {\tiny{规则$r$}}; \begin{pgfonlayer}{background} \visible<2->{ \node [anchor=north west,fill=green!20,inner sep=2pt,minimum height=1.5em,minimum width=1.3em] (nodebox1) at ([xshift=0.2em]rule1.north west) {}; \node [anchor=north west,fill=red!20,inner sep=2pt,minimum height=1.5em,minimum width=1.8em] (nodebox2) at ([xshift=0.2em]nodebox1.north east) {}; \node [anchor=north east,fill=blue!20,inner sep=2pt,minimum height=1.5em,minimum width=1.6em] (nodebox1) at ([xshift=-0.6em]rule1.north east) {}; } \end{pgfonlayer} \tikzstyle{chartnode}=[rectangle,minimum size=1.3em,draw] \node [chartnode,anchor=north west] (cell11) at ([yshift=-2em]rule2.south west) {}; \node [chartnode,anchor=north west] (cell21) at ([yshift=-0em]cell11.south west) {}; \node [chartnode,anchor=west] (cell22) at (cell21.east) {}; \node [chartnode,anchor=north west] (cell31) at ([yshift=-0em]cell21.south west) {}; \node [chartnode,anchor=west] (cell32) at (cell31.east) {}; \node [chartnode,anchor=west] (cell33) at (cell32.east) {}; \node [anchor=north] (chartlabel) at (cell32.south) {\scriptsize{chart}}; \draw [<-,dotted,thick] ([xshift=0.1em]node3.60) ..controls +(60:0.5) and +(120:0.5).. (rule1.north west); \visible<3->{ \node [chartnode,anchor=north west,fill=red!20] (cell11) at ([yshift=-2em]rule2.south west) {}; \node [chartnode,anchor=north west,fill=blue!20] (cell31) at ([yshift=-0em]cell21.south west) {}; \node [chartnode,anchor=west,fill=green!20] (cell33) at (cell32.east) {}; \draw [->,dotted] ([xshift=0.1em]node3.-60)..controls +(-60:3em) and +(north:7em).. (cell33.center); \draw [->,dotted] ([xshift=0.1em]node2.0)..controls +(east:2em) and +(west:3em).. (cell31.center); \draw [->,dotted] ([xshift=0.1em]node1.60)..controls +(60:1.5em) and +(120:2em).. (cell11.center); } \visible<4->{ \node [anchor=north west,fill=green!20,drop shadow,align=left] (cellbox) at ([yshift=4em,xshift=1em]cell33.north east) {\scriptsize{表单元表示覆盖单词1-3的跨度}\\\scriptsize{保存跨度[1,3]上的所有节点}\\\scriptsize{VP [1,3]}\\\scriptsize{NP [1,3]}\\\scriptsize{...}}; \draw [->] ([xshift=-0.2em,yshift=-0.2em]cell33.north east)..controls +(north:2.5em) and +(west:1em).. ([yshift=2em]cellbox.west); } \end{scope} } \end{tikzpicture} \end{center} \end{frame} %%%------------------------------------------------------------------------------------------------------------ %%% 基于树的解码方法 - chart-based decoding \begin{frame}{基于树的解码 - 基于chart的方法} \begin{itemize} \item 基于chart这种结构,可以很容易的构建解码所用的超图。常用的方法是自底向上解码: \begin{itemize} \item 从源语言句法树的叶子节点开始,自下而上访问树的节点 \item 对于每个跨度,如果对应一个树节点,则匹配相应的规则 \item 从树的根节点可以得到翻译推导,最终选择最优推导所对应的译文输出 \end{itemize} \end{itemize} \visible<2->{ \begin{minipage}[b]{0.42\linewidth} \begin{center} \begin{tikzpicture} \begin{scope} \tikzstyle{chartnode}=[rectangle,minimum size=1.3em,draw] \node [chartnode,anchor=north west] (cell11) at (0,0) {}; \node [chartnode,anchor=north west] (cell21) at ([yshift=-0em]cell11.south west) {}; \node [chartnode,anchor=west] (cell22) at (cell21.east) {}; \node [chartnode,anchor=north west] (cell31) at ([yshift=-0em]cell21.south west) {}; \node [chartnode,anchor=west] (cell32) at (cell31.east) {}; \node [chartnode,anchor=west] (cell33) at (cell32.east) {}; \node [chartnode,anchor=north west] (cell41) at ([yshift=-0em]cell31.south west) {}; \node [chartnode,anchor=west] (cell42) at (cell41.east) {}; \node [chartnode,anchor=west] (cell43) at (cell42.east) {}; \node [chartnode,anchor=west] (cell44) at (cell43.east) {}; \tikzstyle{chartnode2}=[rectangle,minimum size=1.3em,fill=orange!20] \node<3-> [chartnode2,anchor=north west] (cell11) at (0,0) {}; \node<4-> [chartnode2,anchor=north west] (cell21) at ([yshift=-0em]cell11.south west) {}; \node<7-> [chartnode2,anchor=west] (cell22) at (cell21.east) {}; \node<5-> [chartnode2,anchor=north west] (cell31) at ([yshift=-0em]cell21.south west) {}; \node<8-> [chartnode2,anchor=west] (cell32) at (cell31.east) {}; \node<10-> [chartnode2,anchor=west] (cell33) at (cell32.east) {}; \node<6-> [chartnode2,anchor=north west] (cell41) at ([yshift=-0em]cell31.south west) {}; \node<9-> [chartnode2,anchor=west] (cell42) at (cell41.east) {}; \node<11-> [chartnode2,anchor=west] (cell43) at (cell42.east) {}; \node<12-> [chartnode2,anchor=west] (cell44) at (cell43.east) {}; \node [anchor=east] (s1) at (cell11.west) {\scriptsize{猫}}; \node [anchor=east] (s2) at (cell21.west) {\scriptsize{喜欢}}; \node [anchor=east] (s3) at (cell31.west) {\scriptsize{吃}}; \node [anchor=east] (s4) at (cell41.west) {\scriptsize{鱼}}; \node [anchor=north] (t5) at (cell41.south) {\tiny{$l$=1}}; \node [anchor=north] (t5) at (cell42.south) {\tiny{$l$=2}}; \node [anchor=north] (t5) at (cell43.south) {\tiny{$l$=3}}; \node [anchor=north] (t5) at (cell44.south) {\tiny{$l$=4}}; \node [anchor=north] (chartlabel) at ([yshift=-1em]cell42.south east) {\footnotesize{\textbf{chart}}}; \node [anchor=north west] (w1) at ([yshift=-2.5em,xshift=-2.0em]cell41.south west) {猫}; \node [anchor=west] (w2) at ([xshift=0.3em]w1.east) {喜欢}; \node [anchor=west] (w3) at ([xshift=0.3em]w2.east) {吃}; \node [anchor=west] (w4) at ([xshift=0.3em]w3.east) {鱼}; \node [anchor=north east] (p0) at ([xshift=0.3em]w1.south west) {\blue{0}}; \node [anchor=north east] (p1) at ([xshift=0.3em]w2.south west) {\blue{1}}; \node [anchor=north east] (p2) at ([xshift=0.3em]w3.south west) {\blue{2}}; \node [anchor=north east] (p3) at ([xshift=0.3em]w4.south west) {\blue{3}}; \node [anchor=north west] (p4) at ([xshift=-0.4em]w4.south east) {\blue{4}}; \node [anchor=north] (slabel) at (p2.south) {\scriptsize{\textbf{源语言句子}}}; \end{scope} \end{tikzpicture} \end{center} \end{minipage} \begin{minipage}[b]{0.55\linewidth} {\footnotesize \visible<3->{ \begin{tabular}{l l l l} \visible<3->{序号} & \visible<3->{跨度} & \visible<3->{标记} & \visible<3->{源语句子片段} \\ \hline \visible<3->{1} & \visible<3->{[{\blue 0},{\blue 1}]} & \visible<3->{NN \& NP} & \visible<3->{猫} \\ \visible<4->{2} & \visible<4->{[{\blue 1},{\blue 2}]} & \visible<4->{VV} & \visible<4->{喜欢} \\ \visible<5->{3} & \visible<5->{[{\blue 2},{\blue 5}]} & \visible<5->{VV} & \visible<5->{吃} \\ \visible<6->{4} & \visible<6->{[{\blue 3},{\blue 6}]} & \visible<6->{NN \& NP} & \visible<6->{鱼} \\ \visible<7->{5} & \visible<7->{[{\blue 0},{\blue 2}]} & \visible<7->{N/A} & \visible<7->{猫 喜欢} \\ \visible<8->{6} & \visible<8->{[{\blue 1},{\blue 3}]} & \visible<8->{N/A} & \visible<8->{喜欢 吃} \\ \visible<9->{7} & \visible<9->{[{\blue 2},{\blue 4}]} & \visible<9->{VP} & \visible<9->{吃 鱼} \\ \visible<10->{8} & \visible<10->{[{\blue 0},{\blue 3}]} & \visible<10->{N/A} & \visible<10->{猫 喜欢 吃} \\ \visible<11->{9} & \visible<11->{[{\blue 1},{\blue 4}]} & \visible<11->{VP} & \visible<11->{喜欢 吃 鱼} \\ \visible<12->{10} & \visible<12->{[{\blue 0},{\blue 4}]} & \visible<12->{IP (\alert{root})} & \visible<12->{猫 喜欢 吃 鱼} \\ \\ \end{tabular} } } \end{minipage} } \end{frame} %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% %%% 规则使用 - 基于树的匹配 \begin{frame}{使用树结构匹配树到串规则} \begin{itemize} \item 对于规则的源语言部分,可以使用树片段的匹配找到可以使用这条规则位置 \begin{itemize} \item 匹配的规则会被存入相应的表格单元中 \end{itemize} \end{itemize} \vspace{-2em} \begin{center} \begin{tikzpicture} \begin{scope} {\scriptsize \begin{scope}[sibling distance=2pt,level distance=20pt,grow'=up] \Tree[.\node(treeroot){IP}; [.NP [.NR 阿都拉$_1$ ]] [.\node(tn1){VP}; [.\node(tn2){PP}; [.\node(tn3){P}; \node(cw1){对$_2$}; ] [.\node(tn4){NP}; \edge[roof]; {自己$_3$ 四$_4$\ 个$_5$\ 多$_6$\ 月$_7$\ 以来$_8$\ 的$_9$\ 施政$_{10}$\ 表现$_{11}$} ] ] [.\node(tn5){VP}; [.VV 感到$_{12}$ ] [.NN 满意$_{13}$ ] ] ] ] \end{scope} \visible<2->{ \node [anchor=west,fill=green!20!white] (rulepart1) at ([yshift=2.0in,xshift=-1.3in]treeroot.east) {VP(PP(P(对) NP$_1$) VP$_2$)}; \node [anchor=north west] (rulepart2) at (rulepart1.south west) {$\to$ VP$_2$ with NP$_1$}; } \begin{pgfonlayer}{background} \visible<2->{ \node[rectangle,draw,inner sep=2pt] [fit = (rulepart1) (rulepart2)] (rulemark) {}; } \end{pgfonlayer} \visible<2->{ \node [anchor=south west] at (rulemark.north west) {\tiny{\textbf{树到串翻译规则}}}; } } \begin{scope}[xshift=1.5in,yshift=1.6in] \visible<3->{ \node[anchor=center, minimum size=10pt,draw] (cell1and1) at (0,0) {}; \node[anchor=center, minimum size=10pt,draw] (cell2and1) at ([xshift=10pt]cell1and1.center) {}; \node[anchor=center, minimum size=10pt,draw] (cell3and1) at ([xshift=10pt]cell2and1.center) {}; \node[anchor=center, minimum size=10pt] (cell4and1) at ([xshift=15pt]cell3and1.center) {\tiny{$\dots$}}; \node[anchor=center, minimum size=10pt,draw] (cellnand1) at ([xshift=13pt]cell4and1.center) {}; \node[anchor=center, minimum size=10pt,draw] (cell1and2) at ([yshift=10pt]cell1and1.center) {}; \node[anchor=center, minimum size=10pt,draw] (cell2and2) at ([xshift=10pt]cell1and2.center) {}; \node[anchor=center, minimum size=10pt,draw] (cell1and3) at ([yshift=10pt]cell1and2.center) {}; \node[anchor=center, minimum size=10pt] (cell1and4) at ([yshift=20pt]cell1and3.center) {\tiny{$\vdots$}}; \node[anchor=center, minimum size=10pt] (cell2and3) at ([yshift=20pt]cell2and2.center) {\tiny{$\vdots$}}; \node[anchor=center, minimum size=10pt] (cell3and2) at ([yshift=20pt]cell3and1.center) {\tiny{$\vdots$}}; \node[anchor=center, minimum size=10pt,draw] (cell4andn) at ([yshift=20pt,xshift=10pt]cell3and1.center) {}; %\node[anchor=center, minimum size=10pt] (cell4and2) at ([yshift=20pt]cell4and1.center) {\tiny{$\ddots$}}; \node[anchor=center, minimum size=10pt,draw] (cell1andn) at ([yshift=10pt]cell1and4.center) {}; \node[anchor=center, minimum size=10pt,draw] (cell2andn) at ([xshift=10pt,yshift=-10pt]cell1andn.center) {}; %\node[anchor=center, minimum size=10pt,draw] (cell3andn) at ([xshift=10pt,yshift=-10pt]cell2andn.center) {}; \node[anchor=west] (chartlabel) at ([xshift=-0em,yshift=-0.8em]cell1and1.south east) {\scriptsize{Chart}}; } \end{scope} \visible<3->{ \path [draw,thick,blue,->,dashed] (rulemark.north east) .. controls +(60:2.0) and +(north east:1.5) .. ([yshift=0.05em,xshift=0.05em]cell4andn.north east) node[pos=0.25, below,yshift=-0.2em] (spanlabel) {}; } \begin{pgfonlayer}{background} \visible<2->{ \path [fill=green!20] (tn1.south west) -- ([yshift=0.2em]tn2.south west) -- ([yshift=0.2em,xshift=-0.30em]tn3.south west) -- (cw1.north west) -- (cw1.north east) -- ([xshift=0.30em]tn3.north east) -- (tn4.north east) -- (tn4.south east) -- ([xshift=0.5em]tn2.north east) -- (tn5.north east)-- ([yshift=0.2em]tn5.south east) -- (tn1.south east) -- (tn1.south west); } \end{pgfonlayer} \visible<2->{ \path [draw,thick,blue,<->] ([xshift=-2em]rulepart1.south east) .. controls +(south:1.7) and +(north:1.3) .. ([xshift=0em,yshift=0.10em]cw1.north) node[pos=0.5, below,xshift=0.3in] {\scriptsize{树片段的匹配}}; } \end{scope} \end{tikzpicture} \end{center} \end{frame} %%%------------------------------------------------------------------------------------------------------------ %%% 基于串的解码方法 \begin{frame}{基于串的解码} \begin{itemize} \item 不同于基于树的解码,\alert{基于串的解码}方法并不要求输入句法树,它直接对输入词串进行翻译,最终得到译文。 \begin{itemize} \item 这种方法适用于树到串、串到树、树到树等多种模型 \item 本质上,由于并不受固定输入的句法树约束,基于串的解码可以探索更多潜在的树结构,这也增大了搜索空间(相比基于串的解码),因此该方法更有可能找到高质量翻译结果 \end{itemize} \item<2-> 在基于串的方法中,句法结构被看做是翻译的隐含变量,而非线性的输入和输出。比如,层次短语翻译解码就是一种典型的基于串的解码方法,所有的翻译推导在翻译过程里动态生成,但是并不要输入或者输出这些推导所对应的层次结构 \end{itemize} \visible<2->{ \begin{center} \begin{tikzpicture} \begin{scope}[scale=0.9,level distance=15pt,sibling distance=0pt] {\scriptsize \Tree[.\node(bsn0){IP}; [.\node(bsn1){NP}; [.\node(bsn2){NN}; \node(bsw1){猫}; ] ] [.\node(bsn3){VP}; [.\node(bsn4){VV}; \node(bsw2){喜欢}; ] [.\node(bsn5){VP}; \edge[roof]; \node(bsw3){吃 \ 鱼}; ] ] ] \node [anchor=west] (target) at ([xshift=1em]bsw3.east) {Cats like eating fish}; \node [anchor=north,inner sep=3pt] (cap1) at (target.south west) {(a) 基于树的解码}; \draw [->,thick] (bsw3.east) -- (target.west); \node [anchor=west] (sourcelabel) at ([xshift=2em]bsn0.east) {显式输入的结构}; \node [anchor=west] (source2) at ([xshift=2em]target.east) {猫 喜欢 吃 鱼}; \node [anchor=west] (target2) at ([xshift=1em]source2.east) {Cats like eating fish}; \node [anchor=north,inner sep=3pt] (cap2) at (target2.south west) {(b) 基于串的解码}; \draw [->,thick] (source2.east) -- (target2.west); \begin{scope}[xshift=2.45in,sibling distance=3pt] \Tree[.\node(bsn0){IP}; [.\node(bsn1){NP}; [.\node(bsn2){NN}; ] ] [.\node(bsn3){VP}; [.\node(bsn4){VV}; ] [.\node(bsn5){VP}; ] ] ] \begin{pgfonlayer}{background} \node [draw,dashed,inner sep=2pt] (box) [fit = (bsn0) (bsn1) (bsn2) (bsn3) (bsn4) (bsn5)] {}; \node [anchor=north west] (boxlabel) at (box.north east) {隐含结构}; \end{pgfonlayer} \end{scope} } \end{scope} \end{tikzpicture} \end{center} } \end{frame} %%%------------------------------------------------------------------------------------------------------------ %%% 基于串的解码方法本质上和句法分析一样 \begin{frame}{基于串的解码 $\approx$ 句法分析} \begin{itemize} \item 基于串的翻译和传统\alert{句法分析}的任务很像:对于一个输入的词串,找到生成这个词串的最佳推导。唯一不同的地方,在于机器翻译需要考虑译文的生成(语言模型的引入会使问题稍微复杂一些),但是源语言部分的处理和句法分析一模一样 \item<2-> 这个过程仍然可以用基于chart的方法实现,即对于每一个源语言片段,都匹配可能的翻译规则,之后填入相应的表格单元,这也构成了一个超图,最佳推导可以从这个超图得到 \end{itemize} \visible<2->{ \begin{center} \begin{tikzpicture} \begin{scope} {\scriptsize \node [anchor=west] (sw1) at (0,0) {阿都拉$_1$}; \node [anchor=west] (sw2) at ([xshift=0.1em]sw1.east) {对$_2$}; \node [anchor=west] (sw3) at ([xshift=0.1em]sw2.east) {自己$_3$ 四$_4$\ 个$_5$\ 多$_6$\ 月$_7$\ 以来$_8$\ 的$_9$\ 施政$_{10}$\ 表现$_{11}$}; \node [anchor=west] (sw4) at ([xshift=0.2em]sw3.east) {感到$_{12}$ 满意$_{13}$}; \begin{pgfonlayer}{background} \visible<3->{ \node [fill=red!20,inner sep=0pt] (box1) [fit = (sw1)] {}; \node [fill=green!20,inner sep=0pt] (box2) [fit = (sw3)] {}; \node [fill=orange!20,inner sep=0pt] (box3) [fit = (sw4)] {}; \node [anchor=south,align=center] (box1label) at (box1.north) {[{\blue 0},{\blue 1}]\\VP}; \node [anchor=south,align=center] (box2label) at (box2.north) {[{\blue 2},{\blue 11}]\\NP}; \node [anchor=south,align=center] (box3label) at (box3.north) {[{\blue 11},{\blue 13}]\\VP}; } \end{pgfonlayer} \draw[decorate,decoration={brace,mirror,,amplitude=3mm}] (sw1.south west) -- (sw4.south east); \node [anchor=north] (label) at ([yshift=-1em]sw3.south) {在跨度[{\blue 0},{\blue 13}]上进行规则匹配}; \node [anchor=north] (rule) at ([yshift=-0.3em]label.south) {{\footnotesize 比如:IP({\color{red} NP$_1$} VP(PP(P(对) {\color{ugreen} NP$_2$}) {\color{orange} VP$_3$}))}}; \node [anchor=north west] (rule2) at ([yshift=0.2em]rule.south west) {{\footnotesize \hspace{2.8em} $\to$ NP$_1$ VP$_3$ with NP$_2$}}; } \end{scope} \end{tikzpicture} \end{center} } \end{frame} %%%------------------------------------------------------------------------------------------------------------ %%% 基于串的解码 - 规则匹配 \begin{frame}{基于串的解码 - 规则匹配} \begin{itemize} \item 相比基于树的解码,基于串的解码的实现要复杂许多,因为对于每一个片段,需要判断每条规则是否能匹配 \begin{itemize} \item 就是匹配树片段的叶子节点序列,即单词和变量构成的串 \item<2-> 匹配单词可以直接完成 \item<3-> 匹配变量需要检查相应跨度节点上是否有相应标记的推导 \end{itemize} \end{itemize} \vspace{-1em} \begin{center} \begin{tikzpicture} \begin{scope} {\scriptsize \node [anchor=west] (sw1) at (0,0) {阿都拉$_1$}; \node [anchor=west] (sw2) at ([xshift=0.1em]sw1.east) {对$_2$}; \node [anchor=west] (sw3) at ([xshift=0.1em]sw2.east) {自己$_3$ 四$_4$\ 个$_5$\ 多$_6$\ 月$_7$\ 以来$_8$\ 的$_9$\ 施政$_{10}$\ 表现$_{11}$}; \node [anchor=west] (sw4) at ([xshift=0.2em]sw3.east) {感到$_{12}$ 满意$_{13}$}; \begin{pgfonlayer}{background} \visible<3->{ \node [fill=red!20,inner sep=0pt] (box1) [fit = (sw1)] {}; \node [fill=green!20,inner sep=0pt] (box2) [fit = (sw3)] {}; \node [fill=orange!20,inner sep=0pt] (box3) [fit = (sw4)] {}; \node [anchor=south,align=center] (box1label) at (box1.north) {[{\blue 0},{\blue 1}]\\VP}; \node [anchor=south,align=center] (box2label) at (box2.north) {[{\blue 2},{\blue 11}]\\NP}; \node [anchor=south,align=center] (box3label) at (box3.north) {[{\blue 11},{\blue 13}]\\VP}; } \visible<2->{ \node [draw,thick,purple,inner sep=0pt] (box4) [fit = (sw2)] {}; } \end{pgfonlayer} \draw[decorate,decoration={brace,mirror,,amplitude=3mm}] (sw1.south west) -- (sw4.south east); \node [anchor=north] (label) at ([yshift=-1em]sw3.south) {在跨度[{\blue 0},{\blue 13}]上匹配``NP 对 NP VP''}; } \end{scope} \end{tikzpicture} \end{center} \vspace{-1em} \begin{itemize} \item<4-> 如果待匹配的单词和变量序列中,没有连续的变量,这样的规则符合lexicalized norm form (LNF)。因为LNF中单词(终结符)可以作为锚点,因此规则匹配较容易实现 \begin{itemize} \item 比如层次短语系统的规则就符合LNF,因此规则匹配非常容易实现 \item 显然上面例子中的规则不符合LNF \end{itemize} \end{itemize} \end{frame} %%%------------------------------------------------------------------------------------------------------------ %%% 基于串的解码 - 连续变量的匹配,复杂度增加 \begin{frame}{基于串的解码 - 连续变量的匹配} \begin{itemize} \item 但是,如果待匹配串中有连续变量,问题会变得复杂:因为确定两个变量之间的边界需要 增加一重循环 \end{itemize} \vspace{-0.5em} \begin{center} \begin{tikzpicture} \begin{scope} {\scriptsize \node [anchor=west] (sw11) at (0,0) {阿都拉$_1$}; \node [anchor=west] (sw12) at ([xshift=0.1em]sw11.east) {对$_2$}; \node [anchor=west,fill=green!20] (sw13) at ([xshift=0.1em]sw12.east) {自己$_3$ 四$_4$\ 个$_5$\ 多$_6$\ 月$_7$\ 以来$_8$\ 的$_9$\ 施政$_{10}$\ 表现$_{11}$ 感到$_{12}$ }; \node [anchor=west,fill=orange!20] (sw14) at ([xshift=0.2em]sw13.east) {满意$_{13}$}; \node [anchor=north west] (sw21) at ([yshift=-0.3em]sw11.south west) {阿都拉$_1$}; \node [anchor=west] (sw22) at ([xshift=0.1em]sw21.east) {对$_2$}; \node [anchor=west,fill=green!20] (sw23) at ([xshift=0.1em]sw22.east) {自己$_3$ 四$_4$\ 个$_5$\ 多$_6$\ 月$_7$\ 以来$_8$\ 的$_9$\ 施政$_{10}$\ 表现$_{11}$}; \node [anchor=west,fill=orange!20] (sw24) at ([xshift=0.2em]sw23.east) {感到$_{12}$ 满意$_{13}$}; \node [anchor=north west] (sw31) at ([yshift=-0.3em]sw21.south west) {阿都拉$_1$}; \node [anchor=west] (sw32) at ([xshift=0.1em]sw31.east) {对$_2$}; \node [anchor=west,fill=green!20] (sw33) at ([xshift=0.1em]sw32.east) {自己$_3$ 四$_4$\ 个$_5$\ 多$_6$\ 月$_7$\ 以来$_8$\ 的$_9$\ 施政$_{10}$}; \node [anchor=west,fill=orange!20] (sw34) at ([xshift=0.2em]sw33.east) {表现$_{11}$ 感到$_{12}$ 满意$_{13}$}; \node [anchor=north] (dots) at ([yshift=-0.5em]sw33.south) {...}; \node [anchor=north west] (sw41) at ([yshift=-1.8em]sw31.south west) {阿都拉$_1$}; \node [anchor=west] (sw42) at ([xshift=0.1em]sw41.east) {对$_2$}; \node [anchor=west,fill=green!20] (sw43) at ([xshift=0.1em]sw42.east) {自己$_3$ }; \node [anchor=west,fill=orange!20] (sw44) at ([xshift=0.2em]sw43.east) {四$_4$\ 个$_5$\ 多$_6$\ 月$_7$\ 以来$_8$\ 的$_9$ 施政$_{10}$ 表现$_{11}$ 感到$_{12}$ 满意$_{13}$}; \node [anchor=south] (label) at ([yshift=0.3em]sw13.north) {\footnotesize{在跨度[{\blue 0},{\blue 13}]上匹配``NP 对 NP VP''}}; \node [anchor=north west,minimum size=1.2em,fill=green!20] (np) at ([yshift=-1.0em,xshift=0.3em]sw41.south west) {}; \node [anchor=west] (nplabel) at (np.east) {NP(第二个)}; \node [anchor=west,minimum size=1.2em,fill=orange!20] (vp) at ([xshift=1.0em]nplabel.east) {}; \node [anchor=west] (vplabel) at (vp.east) {VP}; } \end{scope} \end{tikzpicture} \end{center} \vspace{-0.5em} \begin{itemize} \item<2-> 理论上,对于长度为$n$的词串,匹配$m$个连续变量的时间复杂度是O($n^{m-1}$) \begin{itemize} \item 这也会导致含有多个变量的非词汇化规则的匹配大大增加系统的运行时间,但这种规则在句法系统中也很常见 \end{itemize} \end{itemize} \end{frame} %%%------------------------------------------------------------------------------------------------------------ %%% 基于串的解码 - CYK + 规则二叉化 \begin{frame}{基于串的解码 - CKY + 规则二叉化} \begin{itemize} \item 对于这个问题,常用的解决办法是进行规则二叉化,这样右端最多只有两个连续变量,规则匹配的复杂度降为O($n$)。例如,对于如下串到树规则\\ \vspace{0.4em} \begin{center} 喜欢 VP$_1$ NP$_2$ $\to$ VP(VBZ(likes) VP$_1$ NP$_2$) \\ \end{center} \vspace{0.4em} 二叉化之后变为 \vspace{0.4em} \begin{center} \hspace{1em} 喜欢 V103 $\to$ VP(VBZ(likes) V103) \\ \vspace{0.4em} VP$_1$ NP$_2$ $\to$ V103(VP$_1$ NP$_2$) \\ \end{center} \vspace{0.4em} 其中,二叉化后的规则源语言端最多有两个非终结符。V103是一个虚拟符号,用于表示临时生成的规则 \item<2-> 对于二叉化后的规则,可以使用CKY方法完成解码,它也是一种基于chart的分析方法,对于每个源语言片段,匹配规则两个枝杈的左分支和右分支,整个过程和其它chart方法没有区别 \end{itemize} \end{frame} %%%------------------------------------------------------------------------------------------------------------ %%% 基于串的解码 - chart parsing examples \begin{frame}{基于串的解码 - 规则使用的实例} \begin{itemize} \item 使用规则可以构建句子的分析图 \end{itemize} \centering \includegraphics[scale=0.63]{./Figures/figure-chart-parsing-example.pdf} \end{frame} %%%------------------------------------------------------------------------------------------------------------ \subsection{改进方法} %%%------------------------------------------------------------------------------------------------------------ %%% 对于句法系统的改进 \begin{frame}{改进方法} \begin{itemize} \item \textbf{基于森林的翻译模型}。句法分析会出现错误,因此只使用一棵句法树进行规则抽取和解码会放大句法分析错误的影响。一种解决方法是使用多棵句法树增加覆盖度,句法森林是一种有效的数据结构表示指数级树结构,因此也被用于基于句法的机器翻译。 \vspace{0.3em} \item \textbf{句法软约束和规则模糊匹配}。前面提到的模型都要求模型严格遵循句法结构,很多时候由于句法结构可能不完全适合翻译任务甚至有错误,这种模型过``硬''。因此可以使用句法软约束或者放松规则匹配时的约束。 \vspace{0.3em} \item \textbf{控制句法使用的程度}。句法模型比较适合捕捉句法上层的表示,而短语模型更适合处理局部依赖。因此可以使用二者的混合来达到更好的效果,比如,可以让句法模型处理上层骨架的翻译,之后让短语模型处理简单短语片段的翻译。 \end{itemize} \end{frame} %%%------------------------------------------------------------------------------------------------------------ %%% 实验结果 \begin{frame}{翻译效果} \vspace{-1em} \begin{center} \begin{tabular}{l | l | l | l} \multicolumn{2}{c|}{模型} & 开发集 & 测试集 \\ \multicolumn{2}{c|}{} & (BLEU[\%]) & (BLEU[\%]) \\ \hline \multicolumn{2}{l|}{短语(Moses)} & 36.51 & 34.93 \\ \multicolumn{2}{l|}{短语(NiuTrans)} & 36.99 & 35.29 \\ \hline \multicolumn{2}{l|}{层次短语(Moses)} & 36.65 & 34.79 \\ \multicolumn{2}{l|}{层次短语(NiuTrans)} & 37.41 & 35.35 \\ \hline & 基于串的解码 & 36.48 & 34.71 \\ 树到串 & 基于树的解码 & 35.54 & 33.99 \\ (NiuTrans) & 基于森林的解码 & 36.14 & 34.25 \\ \hline & 基于串的解码 & 35.99 & 34.01 \\ 树到树 & 基于树的解码 & 35.04 & 33.21 \\ (NiuTrans) & 基于森林的解码 & 35.56 & 33.45 \\ \hline 串到树 & 基于串的解码 & 37.63 & 35.65 \\ (NiuTrans) & & & \end{tabular} \end{center} \vspace{-0.5em} \scriptsize{* 以上结果来自 NiuTrans: An Open Source Toolkit for Phrase-based and Syntax-based Machine Translation}\\ \scriptsize{* 开发集:NIST MT03,测试集:NIST MT05}\\ \end{frame} %%%------------------------------------------------------------------------------------------------------------ %%% 总结 \begin{frame}{小结一下} \begin{itemize} \item 基于短语和基于句法的模型是机器翻译2000年之后的重要进展之一 \begin{itemize} \item 研究热度持续十余年,至今仍有使用(无指导机器翻译中使用SMT做初始模型) \item 一些方法代表了NLP中的原始创新,比如最小错误率训练 \end{itemize} \item 相关技术和方法对现在研究仍然有很好的借鉴意义 \begin{itemize} \item 对翻译的统计建模方法,比如,基于翻译文法和推导的机器翻译建模思想 \item 翻译调序等机器翻译特有问题的描述方法 \item 先验知识的使用,句法结构、篇章等等 \end{itemize} \item<2-> 在深度学习时代下重新审视统计机器翻译 \begin{itemize} \item 注意,统计机器翻译并不是简单几套系统,更重要的是思想,这种建模方法更接近人类对翻译的认知 \item 深度学习方法从另一个视角看待机器翻译,二者必然存在结合的可能,只是结合的方法需要探索 \end{itemize} \end{itemize} \end{frame} %%%------------------------------------------------------------------------------------------------------------ %%% last slide \begin{frame}{Last Slide} \vspace{1em} \begin{center} \begin{tikzpicture} \begin{scope}[level distance=25pt] \node[] (x) at (-4,0) {}; \begin{scope}[sibling distance=15pt] \Tree[.\node(en1){VP}; [.\node(en2){VP}; [.VB \textbf{Thank} ] [.PRP \textbf{you} ] ] [.\node(en3){ADJP}; [.RB \textbf{very} ] [.JJ \textbf{much} ] ] ] \end{scope} \begin{scope}[grow'=up, xshift=-0.2in, yshift=-2.6in, sibling distance=34pt] \Tree[.\node(cn1){VP}; [.\node(cn2){ADVP}; [.AD \textbf{非常} ] ] [.\node(cn3){VP}; [.VV \textbf{感谢} ] [.PR \textbf{大家} ] ] ] \end{scope} \draw[<->, very thick, dotted, red] (cn1.east) .. controls +(east:4.5) and +(east:4.5) .. (en1.east); \draw[<->, very thick, dotted, ugreen] (cn2.east) .. controls +(east:2) and +(west:2) .. (en3.west); \draw[<->, very thick, dotted, blue] (cn3.west) .. controls +(west:2) and +(south:2) .. (en2.south); \end{scope} \end{tikzpicture} \end{center} \end{frame} \end{CJK} \end{document}