Commit a2a024e2 by xiaotong

new pages of the introduction

parent a3d52954
......@@ -8,3 +8,4 @@
*.toc
*.blg
*.sav
*.pdf
......@@ -23,7 +23,7 @@
\usetikzlibrary{matrix}
\usetikzlibrary{patterns}
\usetikzlibrary{arrows,decorations.pathreplacing}
\usetikzlibrary{shadows}
\usetikzlibrary{shadows}
\usetikzlibrary{shadows.blur}
\usepgflibrary{arrows}
\usetikzlibrary{arrows}
......@@ -38,7 +38,7 @@
\DeclareMathOperator*{\argmin}{arg\,min}
\setbeamertemplate{items}[ball]
\usefonttheme[onlymath]{serif}
\usefonttheme[onlymath]{serif}
\definecolor{ugreen}{rgb}{0,0.5,0}
\definecolor{lgreen}{rgb}{0.9,1,0.8}
......@@ -74,7 +74,7 @@
\begin{CJK}{UTF8}{\mycfont}
\title{\Large{基于短语和句法的翻译模型}}
\title{\Large{基于短语和句法的统计翻译模型}}
\author{\large{\textbf{肖桐\ \ 朱靖波}}}
\institute{
\blue{\url{xiaotong@mail.neu.edu.cn}} \black{} \\
......@@ -102,12 +102,55 @@
\begin{itemize}
\item 短语是具有完整意思的连续词串,因此可以捕捉更多的上下文信息
\begin{itemize}
\item 不过过大的短语也会造成问题,比如数据稀疏
\item 不过过大的短语会造成数据稀疏、长距离依赖等问题
\item 而且单纯的词串也缺乏句法功能表示能力
\end{itemize}
\item 另一种方式是考虑句子的句法结构
\begin{tikzpicture}
\node[anchor=west, fill=blue!50, inner sep=0.05cm] (sp1) at (0, 0) {进口\ \ };
\node[anchor=west] (sp2) at (2.5em, 0) {在 过去的 五 到 十 年 间};
\node[anchor=west, fill=red!50, inner sep=0.05cm] (sp3) at (14em, 0) {有了 大幅度 下降};
\draw[->] (sp1) edge [out=15, in=170] (sp3);
\node[anchor=west, fill=blue!50, inner sep=0.05cm] (tp1) at (0, -0.8) {the imports};
\node[anchor=west, fill=red!50, inner sep=0.05cm] (tp2) at (5.3em, -0.8) {drastically fell};
\node[anchor=west] (tp3) at (11.3em, -0.8) {in the past five to ten years};
\path[->] (tp1) edge [out=30, in=150] (tp2);
\end{tikzpicture}
\item<2-> 另一种方式是考虑句子的句法结构,这样更容易描述句子的层次结构和长距离依赖关系
\end{itemize}
\visible<2->{
\begin{center}
\begin{tikzpicture}
\begin{scope}[scale=0.8, sibling distance=1pt, level distance=20pt, yshift=-1.4in]
\Tree[. S
[.NP
[.NP
[.DT the ]
[.\node[fill=ugreen!50]{NN}; \node[](n1){import}; ]
]
[.\node[fill=ugreen!50]{IN}; in ]
[.NP \edge[roof]; {North Korea} ]
]
[.VP
[.\node[fill=ugreen!50]{VBZ}; \node[](n2){have}; ]
[.ADVP
[.RB drastically ]
[.VBN fallen ]
]
]
]
\draw[-latex] (n1.south) .. controls +(south east:1) and +(south:1) .. (n2.south);
\end{scope}
\end{tikzpicture}
\end{center}
}
\end{frame}
%%%------------------------------------------------------------------------------------------------------------
......@@ -119,7 +162,95 @@
\item 更容易捕捉翻译中的远距离调序
\item 使用句法更容易对大范围的上下文建模
\end{itemize}
\item<2-> 看一个真实的例子
\begin{itemize}
\item 长介词短语的翻译,需要完整的看到这个结构才能准确翻译介词
\end{itemize}
\end{itemize}
\visible<2->{
\vspace{-1.2em}
\begin{center}
\begin{tikzpicture}
%% example
\begin{scope}[xshift=-0.1in,yshift=-1.5in]
{\tiny
\node[anchor=west] (ref) at (0,0) {\textbf{人工翻译:} \alert{After} North Korea demanded concessions from U.S. again before the start of a new round of six-nation talks ...};
\node[anchor=north west] (hifst) at ([yshift=-0.3em]ref.south west) {\textbf{机器翻译:} \blue{In}\black{} the new round of six-nation talks on North Korea again demanded that U.S. in the former promise ...};
\visible<4->{
\node[anchor=north west] (synhifst) at ([yshift=-0.3em]hifst.south west) {\textbf{better?:}};
\node[anchor=west, fill=red!20!white, inner sep=0.3em] (synhifstpart1) at ([xshift=-0.5em]synhifst.east) {After};
\node[anchor=west, fill=blue!20!white, inner sep=0.25em] (synhifstpart2) at ([xshift=0.1em,yshift=-0.05em]synhifstpart1.east) {North Korea again demanded that U.S. promised concessions before the new round of six-nation talks};
\node[anchor=west] (synhifstpart3) at ([xshift=-0.2em]synhifstpart2.east) {...};
}
\node [anchor=west] (inputlabel) at ([yshift=-0.4in]synhifst.west) {\textbf{Input:}};
\node [anchor=west,minimum height=12pt] (inputseg1) at (inputlabel.east) {$_1$ };
\node [anchor=west,minimum height=12pt] (inputseg2) at ([xshift=0.2em]inputseg1.east) {北韩$_2$ 再度$_3$ 要求$_4$ 美国$_5$$_6$$_7$ 回合$_8$$_9$$_{10}$ 会谈$_{11}$$_{12}$ 承诺$_{13}$ 让步$_{14}$};
\node [anchor=west,minimum height=12pt] (inputseg3) at ([xshift=0.2em]inputseg2.east) {$_{15}$};
\node [anchor=west,minimum height=12pt] (inputseg4) at ([xshift=0.2em]inputseg3.east) {,$_{16}$};
\node [anchor=west,minimum height=12pt] (inputseg5) at ([xshift=0.2em]inputseg4.east) {...};
\visible<3->{
\node [anchor=north,inner sep=2pt] (synlabel1) at ([yshift=-0.34in]inputseg2.south) {\scriptsize{PP}};
\node [anchor=north,inner sep=2pt] (synlabel2) at ([yshift=-0.34in]inputseg4.south) {\scriptsize{PU}};
\node [anchor=north,inner sep=2pt] (synlabel3) at ([yshift=-0.34in]inputseg5.south) {\scriptsize{VP}};
\node [anchor=north,inner sep=2pt] (synlabel4) at ([xshift=1.6in,yshift=-0.35in]synlabel1.south) {\scriptsize{VP}};
\draw [-] (inputseg1.south west) -- (inputseg3.south east) -- (synlabel1.north) -- cycle;
\draw [-] (inputseg4.south) -- (synlabel2.north);
\draw [-] (inputseg5.south) -- (synlabel3.north);
\draw [-] (synlabel1.south) -- (synlabel4.north);
\draw [-] (synlabel2.south) -- (synlabel4.north);
\draw [-] (synlabel3.south) -- (synlabel4.north);
}
\visible<3->{
\node [anchor=north east,align=left] (nolimitlabel) at (synlabel1.south west) {\scriptsize{短语结构树很容易捕捉}\\\scriptsize{这种介词短语结构}};
}
\visible<4->{
\node [anchor=west,minimum height=12pt,fill=red!20] (inputseg1) at (inputlabel.east) {$_1$ };
\node [anchor=west,minimum height=12pt,fill=blue!20] (inputseg2) at ([xshift=0.2em]inputseg1.east) {北韩$_2$ 再度$_3$ 要求$_4$ 美国$_5$$_6$$_7$ 回合$_8$$_9$$_{10}$ 会谈$_{11}$$_{12}$ 承诺$_{13}$ 让步$_{14}$};
\node [anchor=west,minimum height=12pt,fill=red!20] (inputseg3) at ([xshift=0.2em]inputseg2.east) {$_{15}$};
\path [draw,->,dashed] (inputseg1.north) .. controls +(north:0.2) and +(south:0.3) .. ([xshift=1em]synhifstpart1.south);
\path [draw,->,dashed] (inputseg3.north) .. controls +(north:0.2) and +(south:0.6) .. ([xshift=1em]synhifstpart1.south);
\path [draw,->,dashed] ([xshift=-0.5in]inputseg2.north) -- ([xshift=-0.6in]synhifstpart2.south);
}
}
\end{scope}
%% end of example
\end{tikzpicture}
\end{center}
}
\end{frame}
%%%------------------------------------------------------------------------------------------------------------
%%% 本章的核心问题
\begin{frame}{核心问题}
\vspace{6em}
\begin{tcolorbox}[enhanced,size=normal,left=2mm,right=1mm,colback=blue!5!white,colframe=blue!75!black,drop fuzzy shadow]
{\LARGE
\textbf{如何使用短语、句法等}\\ \textbf{结构信息进行机器翻译建模?}
}
\end{tcolorbox}
\end{frame}
%%%------------------------------------------------------------------------------------------------------------
......
......@@ -16,6 +16,9 @@
\usepackage{hyperref}
\usepackage{ulem}
\usepackage{tcolorbox}
\tcbuselibrary{skins}
\usetikzlibrary{arrows,decorations.pathreplacing}
\usetikzlibrary{shadows} % LATEX and plain TEX when using Tik Z
......@@ -78,7 +81,7 @@
\begin{CJK}{UTF8}{\mycfont}
\title{\Large{基于短语和句法的翻译模型}}
\title{\Large{基于短语和句法的统计翻译模型}}
\author{\large{\textbf{肖桐\ \ 朱靖波}}}
\institute{
\blue{\url{xiaotong@mail.neu.edu.cn}} \black{} \\
......@@ -688,6 +691,202 @@
\end{frame}
%%%------------------------------------------------------------------------------------------------------------
%%% 短语 -> 句法
\begin{frame}{使用短语就够了?}
\begin{itemize}
\item 短语是具有完整意思的连续词串,因此可以捕捉更多的上下文信息
\begin{itemize}
\item 不过过大的短语会造成数据稀疏、长距离依赖等问题
\item 而且单纯的词串也缺乏句法功能表示能力
\end{itemize}
\begin{tikzpicture}
\node[anchor=west, fill=blue!50, inner sep=0.05cm] (sp1) at (0, 0) {进口\ \ };
\node[anchor=west] (sp2) at (2.5em, 0) {在 过去的 五 到 十 年 间};
\node[anchor=west, fill=red!50, inner sep=0.05cm] (sp3) at (14em, 0) {有了 大幅度 下降};
\draw[->] (sp1) edge [out=15, in=170] (sp3);
\node[anchor=west, fill=blue!50, inner sep=0.05cm] (tp1) at (0, -0.8) {the imports};
\node[anchor=west, fill=red!50, inner sep=0.05cm] (tp2) at (5.3em, -0.8) {drastically fell};
\node[anchor=west] (tp3) at (11.3em, -0.8) {in the past five to ten years};
\path[->] (tp1) edge [out=30, in=150] (tp2);
\end{tikzpicture}
\item<2-> 另一种方式是考虑句子的句法结构,这样更容易描述句子的层次结构和长距离依赖关系
\end{itemize}
\visible<2->{
\begin{center}
\begin{tikzpicture}
\begin{scope}[scale=0.8, sibling distance=1pt, level distance=20pt, yshift=-1.4in]
\Tree[. S
[.NP
[.NP
[.DT the ]
[.\node[fill=ugreen!50]{NN}; \node[](n1){import}; ]
]
[.\node[fill=ugreen!50]{IN}; in ]
[.NP \edge[roof]; {North Korea} ]
]
[.VP
[.\node[fill=ugreen!50]{VBZ}; \node[](n2){have}; ]
[.ADVP
[.RB drastically ]
[.VBN fallen ]
]
]
]
\draw[-latex] (n1.south) .. controls +(south east:1) and +(south:1) .. (n2.south);
\end{scope}
\end{tikzpicture}
\end{center}
}
\end{frame}
%%%------------------------------------------------------------------------------------------------------------
%%% 基于句法的模型
\begin{frame}{引入句法信息}
\begin{itemize}
\item 句法树是句子的更高层次的抽象,相比短语句法树具有更加丰富的句法功能标记,对语言结构的转换很有帮助
\begin{itemize}
\item 更容易捕捉翻译中的远距离调序
\item 使用句法更容易对大范围的上下文建模
\end{itemize}
\item<2-> 看一个真实的例子
\begin{itemize}
\item 长介词短语的翻译,需要完整的看到这个结构才能准确翻译介词
\end{itemize}
\end{itemize}
\visible<2->{
\vspace{-1.2em}
\begin{center}
\begin{tikzpicture}
%% example
\begin{scope}[xshift=-0.1in,yshift=-1.5in]
{\tiny
\node[anchor=west] (ref) at (0,0) {\textbf{人工翻译:} \alert{After} North Korea demanded concessions from U.S. again before the start of a new round of six-nation talks ...};
\node[anchor=north west] (hifst) at ([yshift=-0.3em]ref.south west) {\textbf{机器翻译:} \blue{In}\black{} the new round of six-nation talks on North Korea again demanded that U.S. in the former promise ...};
\visible<4->{
\node[anchor=north west] (synhifst) at ([yshift=-0.3em]hifst.south west) {\textbf{better?:}};
\node[anchor=west, fill=red!20!white, inner sep=0.3em] (synhifstpart1) at ([xshift=-0.5em]synhifst.east) {After};
\node[anchor=west, fill=blue!20!white, inner sep=0.25em] (synhifstpart2) at ([xshift=0.1em,yshift=-0.05em]synhifstpart1.east) {North Korea again demanded that U.S. promised concessions before the new round of six-nation talks};
\node[anchor=west] (synhifstpart3) at ([xshift=-0.2em]synhifstpart2.east) {...};
}
\node [anchor=west] (inputlabel) at ([yshift=-0.4in]synhifst.west) {\textbf{Input:}};
\node [anchor=west,minimum height=12pt] (inputseg1) at (inputlabel.east) {$_1$ };
\node [anchor=west,minimum height=12pt] (inputseg2) at ([xshift=0.2em]inputseg1.east) {北韩$_2$ 再度$_3$ 要求$_4$ 美国$_5$$_6$$_7$ 回合$_8$$_9$$_{10}$ 会谈$_{11}$$_{12}$ 承诺$_{13}$ 让步$_{14}$};
\node [anchor=west,minimum height=12pt] (inputseg3) at ([xshift=0.2em]inputseg2.east) {$_{15}$};
\node [anchor=west,minimum height=12pt] (inputseg4) at ([xshift=0.2em]inputseg3.east) {,$_{16}$};
\node [anchor=west,minimum height=12pt] (inputseg5) at ([xshift=0.2em]inputseg4.east) {...};
\visible<3->{
\node [anchor=north,inner sep=2pt] (synlabel1) at ([yshift=-0.34in]inputseg2.south) {\scriptsize{PP}};
\node [anchor=north,inner sep=2pt] (synlabel2) at ([yshift=-0.34in]inputseg4.south) {\scriptsize{PU}};
\node [anchor=north,inner sep=2pt] (synlabel3) at ([yshift=-0.34in]inputseg5.south) {\scriptsize{VP}};
\node [anchor=north,inner sep=2pt] (synlabel4) at ([xshift=1.6in,yshift=-0.35in]synlabel1.south) {\scriptsize{VP}};
\draw [-] (inputseg1.south west) -- (inputseg3.south east) -- (synlabel1.north) -- cycle;
\draw [-] (inputseg4.south) -- (synlabel2.north);
\draw [-] (inputseg5.south) -- (synlabel3.north);
\draw [-] (synlabel1.south) -- (synlabel4.north);
\draw [-] (synlabel2.south) -- (synlabel4.north);
\draw [-] (synlabel3.south) -- (synlabel4.north);
}
\visible<3->{
\node [anchor=north east,align=left] (nolimitlabel) at (synlabel1.south west) {\scriptsize{短语结构树很容易捕捉}\\\scriptsize{这种介词短语结构}};
}
\visible<4->{
\node [anchor=west,minimum height=12pt,fill=red!20] (inputseg1) at (inputlabel.east) {$_1$ };
\node [anchor=west,minimum height=12pt,fill=blue!20] (inputseg2) at ([xshift=0.2em]inputseg1.east) {北韩$_2$ 再度$_3$ 要求$_4$ 美国$_5$$_6$$_7$ 回合$_8$$_9$$_{10}$ 会谈$_{11}$$_{12}$ 承诺$_{13}$ 让步$_{14}$};
\node [anchor=west,minimum height=12pt,fill=red!20] (inputseg3) at ([xshift=0.2em]inputseg2.east) {$_{15}$};
\path [draw,->,dashed] (inputseg1.north) .. controls +(north:0.2) and +(south:0.3) .. ([xshift=1em]synhifstpart1.south);
\path [draw,->,dashed] (inputseg3.north) .. controls +(north:0.2) and +(south:0.6) .. ([xshift=1em]synhifstpart1.south);
\path [draw,->,dashed] ([xshift=-0.5in]inputseg2.north) -- ([xshift=-0.6in]synhifstpart2.south);
}
}
\end{scope}
%% end of example
\end{tikzpicture}
\end{center}
}
\end{frame}
%%%------------------------------------------------------------------------------------------------------------
%%% 本章的核心问题
\begin{frame}{核心问题}
\vspace{6em}
\begin{tcolorbox}[enhanced,size=normal,left=2mm,right=1mm,colback=blue!5!white,colframe=blue!75!black,drop fuzzy shadow]
{\LARGE
\textbf{如何使用短语、句法等}\\ \textbf{结构信息进行机器翻译建模?}
}
\end{tcolorbox}
\end{frame}
%%%------------------------------------------------------------------------------------------------------------
%%% 本章的内容
\begin{frame}{Outline}
\begin{tcolorbox}[enhanced,size=normal,left=2mm,right=1mm,colback=red!5!white,colframe=red!75!black,drop fuzzy shadow]
{\normalsize
\textbf{基于短语的模型} \vspace{-0.2em}\\
\small{1. 建模} \vspace{-0.2em}\\
\small{2. 如何获取短语} \vspace{-0.2em}\\
\small{3. 判别式模型和最小错误率训练} \vspace{-0.2em}\\
\small{4. 栈解码}
}
\end{tcolorbox}
\vspace{0em}
\begin{tcolorbox}[enhanced,size=normal,left=2mm,right=1mm,colback=red!5!white,colframe=red!75!black,drop fuzzy shadow]
{\normalsize
\textbf{基于层次短语的模型} \vspace{-0.2em}\\
\small{1. 同步上下文无关文法} \vspace{-0.2em}\\
\small{2. 层次短语规则及特征} \vspace{-0.2em}\\
\small{3. 基于chart的解码和立方剪枝}
}
\end{tcolorbox}
\vspace{0em}
\begin{tcolorbox}[enhanced,size=normal,left=2mm,right=1mm,colback=red!5!white,colframe=red!75!black,drop fuzzy shadow]
{\normalsize
\textbf{基于语言学句法的模型} \vspace{-0.2em}\\
\small{1. 基于树结构的文法} \vspace{-0.2em}\\
\small{2. 翻译规则抽取} \vspace{-0.2em}\\
\small{3. 规则匹配}
}
\end{tcolorbox}
\end{frame}
%%%------------------------------------------------------------------------------------------------------------
\section{基于短语的模型}
%%%------------------------------------------------------------------------------------------------------------
......
Markdown 格式
0%
您添加了 0 到此讨论。请谨慎行事。
请先完成此评论的编辑!
注册 或者 后发表评论