Commit fb4867d8 by xiaotong

the architecture of phrase-based models

parent 822074f3
...@@ -125,36 +125,49 @@ ...@@ -125,36 +125,49 @@
\section{基于层次短语的模型} \section{基于层次短语的模型}
%%%------------------------------------------------------------------------------------------------------------ %%%------------------------------------------------------------------------------------------------------------
%%% 文法驱动的统计机器翻译流程 %%% 基于短语的系统的架构
\begin{frame}{文法驱动的机器翻译流程} \begin{frame}{基于短语的系统的架构}
\begin{itemize} \begin{itemize}
\item 同步翻译文法给我们带来了新的思路:可以通过不断使用文法规则完成翻译过程。这类模型的基本流程如下: \item \textbf{训练阶段},需要得到三个子模型
\begin{enumerate}
\item 短语表:短语翻译及每个短语对应的特征值
\item 调序模型:短语调序的模型
\item 语言模型:评价译文流畅度的$n$-gram语言模型
\end{enumerate}
\item \textbf{解码阶段}利用以上模型对新的句子进行翻译
\end{itemize} \end{itemize}
\begin{center} \begin{center}
\begin{tikzpicture} \begin{tikzpicture}
\begin{scope} \begin{scope}
\tikzstyle{datanode} = [minimum width=10em,minimum height=1.7em,fill=ublue,rounded corners=0.7em]; \tikzstyle{datanode} = [minimum width=7em,minimum height=1.7em,fill=ublue,rounded corners=0.7em];
\tikzstyle{modelnode} = [minimum width=10em,minimum height=1.7em,fill=darkred,rounded corners=0.2em]; \tikzstyle{modelnode} = [minimum width=7em,minimum height=1.7em,fill=darkred,rounded corners=0.2em];
\tikzstyle{decodingnode} = [minimum width=10em,minimum height=1.7em,fill=ugreen,rounded corners=0.2em]; \tikzstyle{decodingnode} = [minimum width=7em,minimum height=1.7em,fill=ugreen,rounded corners=0.2em];
\node [datanode,anchor=north west] (bitext) at (0,0) {{\color{white} \scriptsize{训练用双语数据}}}; \node [datanode,anchor=north west] (bitext) at (0,0) {{\color{white} \scriptsize{训练用双语数据}}};
\node [modelnode, anchor=north west] (gi) at ([xshift=2em,yshift=-0.2em]bitext.south east) {{\color{white} \scriptsize{文法(规则)抽取}}}; \node [modelnode,anchor=north] (phrase) at ([yshift=-1.5em]bitext.south) {{\color{white} \scriptsize{短语抽取及打分}}};
\node [datanode,anchor=north east] (birules) at ([xshift=-2em,yshift=-0.2em]gi.south west) {{\color{white} \scriptsize{同步翻译文法}}}; \node [modelnode,anchor=west] (reorder) at ([xshift=1.5em]phrase.east) {{\color{white} \scriptsize{调序建模}}};
\node [modelnode, anchor=north west] (training) at ([xshift=2em,yshift=-0.2em]birules.south east) {{\color{white} \scriptsize{特征值学习}}}; \node [modelnode,anchor=west] (lm) at ([xshift=1.5em]reorder.east) {{\color{white} \scriptsize{语言建模}}};
\node [datanode,anchor=north east] (model) at ([xshift=-2em,yshift=-0.2em]training.south west) {{\color{white} \scriptsize{翻译模型}}}; \node [datanode,anchor=south] (monotext) at ([yshift=1.5em]lm.north) {{\color{white} \scriptsize{目标语单语数据}}};
\node [decodingnode, anchor=north west] (tuning) at ([xshift=2em,yshift=-0.2em]model.south east) {{\color{white} \scriptsize{特征权重调优}}};
\node [datanode,anchor=north east] (tuningdata) at ([xshift=-2em,yshift=-0.2em]tuning.south west) {{\color{white} \scriptsize{调优用双语数据}}}; \node [datanode,anchor=north] (phrasetable) at ([yshift=-1.5em]phrase.south) {{\color{white} \scriptsize{短语表}}};
\node [decodingnode, anchor=north west] (decoding) at ([xshift=2em,yshift=-0.2em]tuningdata.south east) {{\color{white} \scriptsize{解码新句子}}}; \node [datanode,anchor=north] (reordertable) at ([yshift=-1.5em]reorder.south) {{\color{white} \scriptsize{调序模型}}};
\node [datanode,anchor=north] (lmtable) at ([yshift=-1.5em]lm.south) {{\color{white} \scriptsize{语言模型}}};
\draw [->,very thick] ([yshift=-0.3em,xshift=0.1em]bitext.east) -- ([yshift=0.1em]gi.north west);
\draw [->,very thick] ([yshift=-0.1em]gi.south west) -- ([yshift=0.3em,xshift=0.1em]birules.east); \node [decodingnode,anchor=north] (decoding) at ([yshift=-2em]reordertable.south) {{\color{white} \scriptsize{解码器}}};
\draw [->,very thick] ([yshift=-0.3em,xshift=0.1em]birules.east) -- ([yshift=0.1em]training.north west);
\draw [->,very thick] ([yshift=-0.1em]training.south west) -- ([yshift=0.3em,xshift=0.1em]model.east); \draw [->,very thick] ([yshift=-0.1em]bitext.south) -- ([yshift=0.1em]phrase.north);
\draw [->,very thick] ([yshift=-0.3em,xshift=0.1em]model.east) -- ([yshift=0.1em]tuning.north west); \draw [->,very thick] (bitext.south east) -- ([yshift=0.1em]reorder.north west);
\draw [->,very thick] ([yshift=-0.1em]tuning.south) -- ([yshift=0.1em]decoding.north); \draw [->,very thick] ([yshift=-0.1em]monotext.south) -- ([yshift=0.1em]lm.north);
\draw [->,very thick] ([yshift=0.3em,xshift=0.1em]tuningdata.east) -- ([yshift=-0.1em]tuning.south west); \draw [->,very thick] ([yshift=-0.1em]phrase.south) -- ([yshift=0.1em]phrasetable.north);
\draw [->,very thick] ([yshift=-0.1em]reorder.south) -- ([yshift=0.1em]reordertable.north);
\draw [->,very thick] ([yshift=-0.1em]lm.south) -- ([yshift=0.1em]lmtable.north);
\draw [->,very thick] ([yshift=-0.1em]phrasetable.south east) -- ([yshift=0.1em,xshift=-3em]decoding.north);
\draw [->,very thick] ([yshift=-0.1em]reordertable.south) -- ([yshift=0.1em,xshift=0em]decoding.north);
\draw [->,very thick] ([yshift=-0.1em]lmtable.south west) -- ([yshift=0.1em,xshift=3em]decoding.north);
\end{scope} \end{scope}
\end{tikzpicture} \end{tikzpicture}
\end{center} \end{center}
......
...@@ -1126,7 +1126,7 @@ $d$是一个$(\textbf{s},\textbf{t})$上基于短语的翻译推导,$\textrm{P ...@@ -1126,7 +1126,7 @@ $d$是一个$(\textbf{s},\textbf{t})$上基于短语的翻译推导,$\textrm{P
\begin{itemize} \begin{itemize}
\item $\{h_i(\cdot)\}$$M$\alert{特征},每个$h_i(d,\textbf{s},\textbf{t})$$d$映射为一个实数值 \item $\{h_i(\cdot)\}$$M$\alert{特征},每个$h_i(d,\textbf{s},\textbf{t})$$d$映射为一个实数值
\item $\{\lambda_i\}$是这些特征对应权重,权重越大表示特征越重要 \item $\{\lambda_i\}$是这些特征对应权重,权重越大表示特征越重要
\item $\sum_{i=1}^{M} \lambda_i \cdot h_i(d,\textbf{s},\textbf{t}))$描述了$d$的整体质量,值约大$d$越``好'' \item $\sum_{i=1}^{M} \lambda_i \cdot h_i(d,\textbf{s},\textbf{t})$描述了$d$的整体质量,值约大$d$越``好''
\end{itemize} \end{itemize}
\item<2-> 判别式模型的优点在于,它可以很方便的引入各种特征。我们只需要设计不同的特征函数$h_i(\cdot)$即可。 \item<2-> 判别式模型的优点在于,它可以很方便的引入各种特征。我们只需要设计不同的特征函数$h_i(\cdot)$即可。
\begin{itemize} \begin{itemize}
...@@ -1142,6 +1142,57 @@ $d$是一个$(\textbf{s},\textbf{t})$上基于短语的翻译推导,$\textrm{P ...@@ -1142,6 +1142,57 @@ $d$是一个$(\textbf{s},\textbf{t})$上基于短语的翻译推导,$\textrm{P
\end{frame} \end{frame}
%%%------------------------------------------------------------------------------------------------------------ %%%------------------------------------------------------------------------------------------------------------
%%% 短语系统的架构
\begin{frame}{短语系统的架构}
\begin{itemize}
\item \textbf{训练阶段},需要得到三个子模型
\begin{enumerate}
\item 短语表:短语翻译及每个短语对应的特征值
\item 调序模型:短语调序的模型
\item 语言模型:评价译文流畅度的$n$-gram语言模型
\end{enumerate}
\item \textbf{解码阶段}利用以上模型对新的句子进行翻译
\end{itemize}
\begin{center}
\begin{tikzpicture}
\begin{scope}
\tikzstyle{datanode} = [minimum width=7em,minimum height=1.7em,fill=ublue,rounded corners=0.7em];
\tikzstyle{modelnode} = [minimum width=7em,minimum height=1.7em,fill=darkred,rounded corners=0.2em];
\tikzstyle{decodingnode} = [minimum width=7em,minimum height=1.7em,fill=ugreen,rounded corners=0.2em];
\node [datanode,anchor=north west] (bitext) at (0,0) {{\color{white} \scriptsize{训练用双语数据}}};
\node [modelnode,anchor=north] (phrase) at ([yshift=-1.5em]bitext.south) {{\color{white} \scriptsize{短语抽取及打分}}};
\node [modelnode,anchor=west] (reorder) at ([xshift=1.5em]phrase.east) {{\color{white} \scriptsize{调序建模}}};
\node [modelnode,anchor=west] (lm) at ([xshift=1.5em]reorder.east) {{\color{white} \scriptsize{语言建模}}};
\node [datanode,anchor=south] (monotext) at ([yshift=1.5em]lm.north) {{\color{white} \scriptsize{目标语单语数据}}};
\node [datanode,anchor=north] (phrasetable) at ([yshift=-1.5em]phrase.south) {{\color{white} \scriptsize{短语表}}};
\node [datanode,anchor=north] (reordertable) at ([yshift=-1.5em]reorder.south) {{\color{white} \scriptsize{调序模型}}};
\node [datanode,anchor=north] (lmtable) at ([yshift=-1.5em]lm.south) {{\color{white} \scriptsize{语言模型}}};
\node [decodingnode,anchor=north] (decoding) at ([yshift=-2em]reordertable.south) {{\color{white} \scriptsize{解码器}}};
\draw [->,very thick] ([yshift=-0.1em]bitext.south) -- ([yshift=0.1em]phrase.north);
\draw [->,very thick] (bitext.south east) -- ([yshift=0.1em]reorder.north west);
\draw [->,very thick] ([yshift=-0.1em]monotext.south) -- ([yshift=0.1em]lm.north);
\draw [->,very thick] ([yshift=-0.1em]phrase.south) -- ([yshift=0.1em]phrasetable.north);
\draw [->,very thick] ([yshift=-0.1em]reorder.south) -- ([yshift=0.1em]reordertable.north);
\draw [->,very thick] ([yshift=-0.1em]lm.south) -- ([yshift=0.1em]lmtable.north);
\draw [->,very thick] ([yshift=-0.1em]phrasetable.south east) -- ([yshift=0.1em,xshift=-3em]decoding.north);
\draw [->,very thick] ([yshift=-0.1em]reordertable.south) -- ([yshift=0.1em,xshift=0em]decoding.north);
\draw [->,very thick] ([yshift=-0.1em]lmtable.south west) -- ([yshift=0.1em,xshift=3em]decoding.north);
\end{scope}
\end{tikzpicture}
\end{center}
\end{frame}
%%%------------------------------------------------------------------------------------------------------------
\subsection{短语抽取} \subsection{短语抽取}
%%%------------------------------------------------------------------------------------------------------------ %%%------------------------------------------------------------------------------------------------------------
...@@ -1732,16 +1783,16 @@ d = r_1 \circ r_2 \circ r_3 \circ r_4 ...@@ -1732,16 +1783,16 @@ d = r_1 \circ r_2 \circ r_3 \circ r_4
%%% 文法驱动的统计机器翻译流程 %%% 文法驱动的统计机器翻译流程
\begin{frame}{文法驱动的机器翻译流程} \begin{frame}{文法驱动的机器翻译流程}
\begin{itemize} \begin{itemize}
\item 同步翻译文法给我们带来了新的思路:可以通过不断使用文法规则完成翻译过程。这类方法的基本流程如下: \item 同步翻译文法给我们带来了新的思路:可以通过不断使用文法规则完成翻译过程。这类模型的基本流程如下:
\end{itemize} \end{itemize}
\begin{center} \begin{center}
\begin{tikzpicture} \begin{tikzpicture}
\begin{scope} \begin{scope}
\tikzstyle{datanode} = [minimum width=10em,minimum height=1.7em,fill=ublue,rounded corners=0.7em]; \tikzstyle{datanode} = [minimum width=7em,minimum height=1.7em,fill=ublue,rounded corners=0.7em];
\tikzstyle{modelnode} = [minimum width=10em,minimum height=1.7em,fill=darkred,rounded corners=0.2em]; \tikzstyle{modelnode} = [minimum width=7em,minimum height=1.7em,fill=darkred,rounded corners=0.2em];
\tikzstyle{decodingnode} = [minimum width=10em,minimum height=1.7em,fill=ugreen,rounded corners=0.2em]; \tikzstyle{decodingnode} = [minimum width=7em,minimum height=1.7em,fill=ugreen,rounded corners=0.2em];
\node [datanode,anchor=north west] (bitext) at (0,0) {{\color{white} \scriptsize{训练用双语数据}}}; \node [datanode,anchor=north west] (bitext) at (0,0) {{\color{white} \scriptsize{训练用双语数据}}};
\node [modelnode, anchor=north west] (gi) at ([xshift=2em,yshift=-0.2em]bitext.south east) {{\color{white} \scriptsize{文法(规则)抽取}}}; \node [modelnode, anchor=north west] (gi) at ([xshift=2em,yshift=-0.2em]bitext.south east) {{\color{white} \scriptsize{文法(规则)抽取}}};
...@@ -1752,6 +1803,10 @@ d = r_1 \circ r_2 \circ r_3 \circ r_4 ...@@ -1752,6 +1803,10 @@ d = r_1 \circ r_2 \circ r_3 \circ r_4
\node [datanode,anchor=north east] (tuningdata) at ([xshift=-2em,yshift=-0.2em]tuning.south west) {{\color{white} \scriptsize{调优用双语数据}}}; \node [datanode,anchor=north east] (tuningdata) at ([xshift=-2em,yshift=-0.2em]tuning.south west) {{\color{white} \scriptsize{调优用双语数据}}};
\node [decodingnode, anchor=north west] (decoding) at ([xshift=2em,yshift=-0.2em]tuningdata.south east) {{\color{white} \scriptsize{解码新句子}}}; \node [decodingnode, anchor=north west] (decoding) at ([xshift=2em,yshift=-0.2em]tuningdata.south east) {{\color{white} \scriptsize{解码新句子}}};
\node [datanode,anchor=south west] (monotext) at ([xshift=2em,yshift=0.2em]training.north east) {{\color{white} \scriptsize{目标语数据}}};
\node [modelnode,anchor=south west] (lm) at ([xshift=2em,yshift=0.2em]tuning.north east) {{\color{white} \scriptsize{$n$-gram语言建模}}};
\node [datanode,anchor=south west] (lmmodel) at ([xshift=2em,yshift=0.2em]decoding.north east) {{\color{white} \scriptsize{语言模型}}};
\draw [->,very thick] ([yshift=-0.3em,xshift=0.1em]bitext.east) -- ([yshift=0.1em]gi.north west); \draw [->,very thick] ([yshift=-0.3em,xshift=0.1em]bitext.east) -- ([yshift=0.1em]gi.north west);
\draw [->,very thick] ([yshift=-0.1em]gi.south west) -- ([yshift=0.3em,xshift=0.1em]birules.east); \draw [->,very thick] ([yshift=-0.1em]gi.south west) -- ([yshift=0.3em,xshift=0.1em]birules.east);
\draw [->,very thick] ([yshift=-0.3em,xshift=0.1em]birules.east) -- ([yshift=0.1em]training.north west); \draw [->,very thick] ([yshift=-0.3em,xshift=0.1em]birules.east) -- ([yshift=0.1em]training.north west);
...@@ -1759,6 +1814,11 @@ d = r_1 \circ r_2 \circ r_3 \circ r_4 ...@@ -1759,6 +1814,11 @@ d = r_1 \circ r_2 \circ r_3 \circ r_4
\draw [->,very thick] ([yshift=-0.3em,xshift=0.1em]model.east) -- ([yshift=0.1em]tuning.north west); \draw [->,very thick] ([yshift=-0.3em,xshift=0.1em]model.east) -- ([yshift=0.1em]tuning.north west);
\draw [->,very thick] ([yshift=-0.1em]tuning.south) -- ([yshift=0.1em]decoding.north); \draw [->,very thick] ([yshift=-0.1em]tuning.south) -- ([yshift=0.1em]decoding.north);
\draw [->,very thick] ([yshift=0.3em,xshift=0.1em]tuningdata.east) -- ([yshift=-0.1em]tuning.south west); \draw [->,very thick] ([yshift=0.3em,xshift=0.1em]tuningdata.east) -- ([yshift=-0.1em]tuning.south west);
\draw [->,very thick] ([yshift=-0.1em]monotext.south) -- ([yshift=0.1em]lm.north);
\draw [->,very thick] ([yshift=-0.1em]lm.south) -- ([yshift=0.1em]lmmodel.north);
\draw [->,very thick] ([yshift=0.3em,xshift=-0.1em]lmmodel.west) -- ([yshift=-0.1em]tuning.south east);
\draw [->,very thick] ([yshift=-0.3em,xshift=-0.1em]lmmodel.west) -- ([yshift=-0.1em]decoding.north east);
\end{scope} \end{scope}
\end{tikzpicture} \end{tikzpicture}
\end{center} \end{center}
......
Markdown 格式
0%
您添加了 0 到此讨论。请谨慎行事。
请先完成此评论的编辑!
注册 或者 后发表评论