the architecture of phrase-based models

fb4867d8 · xiaotong · 822074f3 · fb4867d8 · fb4867d8
Commit fb4867d8 authored Dec 23, 2019 by xiaotong
--- a/Section04-Phrasal-and-Syntactic-Models/section04-test.tex
+++ b/Section04-Phrasal-and-Syntactic-Models/section04-test.tex
@@ -125,36 +125,49 @@
 \section{基于层次短语的模型}
 %%%------------------------------------------------------------------------------------------------------------
-%%%  文法驱动的统计机器翻译流程
+%%%  基于短语的系统的架构
-\begin{frame}{文法驱动的机器翻译流程}
+\begin{frame}{基于短语的系统的架构}
 \begin{itemize}
-\item 同步翻译文法给我们带来了新的思路：可以通过不断使用文法规则完成翻译过程。这类模型的基本流程如下：
+\item \textbf{训练阶段}，需要得到三个子模型
+	\begin{enumerate}
+	\item 短语表：短语翻译及每个短语对应的特征值
+	\item 调序模型：短语调序的模型
+	\item 语言模型：评价译文流畅度的$n$-gram语言模型
+	\end{enumerate}
+\item \textbf{解码阶段}利用以上模型对新的句子进行翻译
 \end{itemize}
 \begin{center}
 \begin{tikzpicture}
 \begin{scope}
-\tikzstyle{datanode} = [minimum width=10em,minimum height=1.7em,fill=ublue,rounded corners=0.7em];
+\tikzstyle{datanode} = [minimum width=7em,minimum height=1.7em,fill=ublue,rounded corners=0.7em];
-\tikzstyle{modelnode} = [minimum width=10em,minimum height=1.7em,fill=darkred,rounded corners=0.2em];
+\tikzstyle{modelnode} = [minimum width=7em,minimum height=1.7em,fill=darkred,rounded corners=0.2em];
-\tikzstyle{decodingnode} = [minimum width=10em,minimum height=1.7em,fill=ugreen,rounded corners=0.2em];
+\tikzstyle{decodingnode} = [minimum width=7em,minimum height=1.7em,fill=ugreen,rounded corners=0.2em];
 \node [datanode,anchor=north west] (bitext) at (0,0) {{\color{white} \scriptsize{训练用双语数据}}};
-\node [modelnode, anchor=north west] (gi) at ([xshift=2em,yshift=-0.2em]bitext.south east) {{\color{white} \scriptsize{文法(规则)抽取}}};
+\node [modelnode,anchor=north] (phrase) at ([yshift=-1.5em]bitext.south) {{\color{white} \scriptsize{短语抽取及打分}}};
-\node [datanode,anchor=north east] (birules) at ([xshift=-2em,yshift=-0.2em]gi.south west) {{\color{white} \scriptsize{同步翻译文法}}};
+\node [modelnode,anchor=west] (reorder) at ([xshift=1.5em]phrase.east) {{\color{white} \scriptsize{调序建模}}};
-\node [modelnode, anchor=north west] (training) at ([xshift=2em,yshift=-0.2em]birules.south east) {{\color{white} \scriptsize{特征值学习}}};
+\node [modelnode,anchor=west] (lm) at ([xshift=1.5em]reorder.east) {{\color{white} \scriptsize{语言建模}}};
-\node [datanode,anchor=north east] (model) at ([xshift=-2em,yshift=-0.2em]training.south west) {{\color{white} \scriptsize{翻译模型}}};
+\node [datanode,anchor=south] (monotext) at ([yshift=1.5em]lm.north) {{\color{white} \scriptsize{目标语单语数据}}};
-\node [decodingnode, anchor=north west] (tuning) at ([xshift=2em,yshift=-0.2em]model.south east) {{\color{white} \scriptsize{特征权重调优}}};
-\node [datanode,anchor=north east] (tuningdata) at ([xshift=-2em,yshift=-0.2em]tuning.south west) {{\color{white} \scriptsize{调优用双语数据}}};
+\node [datanode,anchor=north] (phrasetable) at ([yshift=-1.5em]phrase.south) {{\color{white} \scriptsize{短语表}}};
-\node [decodingnode, anchor=north west] (decoding) at ([xshift=2em,yshift=-0.2em]tuningdata.south east) {{\color{white} \scriptsize{解码新句子}}};
+\node [datanode,anchor=north] (reordertable) at ([yshift=-1.5em]reorder.south) {{\color{white} \scriptsize{调序模型}}};
+\node [datanode,anchor=north] (lmtable) at ([yshift=-1.5em]lm.south) {{\color{white} \scriptsize{语言模型}}};
-\draw [->,very thick] ([yshift=-0.3em,xshift=0.1em]bitext.east) -- ([yshift=0.1em]gi.north west);
-\draw [->,very thick] ([yshift=-0.1em]gi.south west) -- ([yshift=0.3em,xshift=0.1em]birules.east);
+\node [decodingnode,anchor=north] (decoding) at ([yshift=-2em]reordertable.south) {{\color{white} \scriptsize{解码器}}};
-\draw [->,very thick] ([yshift=-0.3em,xshift=0.1em]birules.east) -- ([yshift=0.1em]training.north west);
-\draw [->,very thick] ([yshift=-0.1em]training.south west) -- ([yshift=0.3em,xshift=0.1em]model.east);
+\draw [->,very thick] ([yshift=-0.1em]bitext.south) -- ([yshift=0.1em]phrase.north);
-\draw [->,very thick] ([yshift=-0.3em,xshift=0.1em]model.east) -- ([yshift=0.1em]tuning.north west);
+\draw [->,very thick] (bitext.south east) -- ([yshift=0.1em]reorder.north west);
-\draw [->,very thick] ([yshift=-0.1em]tuning.south) -- ([yshift=0.1em]decoding.north);
+\draw [->,very thick] ([yshift=-0.1em]monotext.south) -- ([yshift=0.1em]lm.north);
-\draw [->,very thick] ([yshift=0.3em,xshift=0.1em]tuningdata.east) -- ([yshift=-0.1em]tuning.south west);
+\draw [->,very thick] ([yshift=-0.1em]phrase.south) -- ([yshift=0.1em]phrasetable.north);
+\draw [->,very thick] ([yshift=-0.1em]reorder.south) -- ([yshift=0.1em]reordertable.north);
+\draw [->,very thick] ([yshift=-0.1em]lm.south) -- ([yshift=0.1em]lmtable.north);
+\draw [->,very thick] ([yshift=-0.1em]phrasetable.south east) -- ([yshift=0.1em,xshift=-3em]decoding.north);
+\draw [->,very thick] ([yshift=-0.1em]reordertable.south) -- ([yshift=0.1em,xshift=0em]decoding.north);
+\draw [->,very thick] ([yshift=-0.1em]lmtable.south west) -- ([yshift=0.1em,xshift=3em]decoding.north);
 \end{scope}
 \end{tikzpicture}
 \end{center}

--- a/Section04-Phrasal-and-Syntactic-Models/section04.tex
+++ b/Section04-Phrasal-and-Syntactic-Models/section04.tex
@@ -1126,7 +1126,7 @@ $d$是一个$(\textbf{s},\textbf{t})$上基于短语的翻译推导，$\textrm{P
    \begin{itemize}
    \item $\{h_i(\cdot)\}$是$M$个\alert{特征}，每个$h_i(d,\textbf{s},\textbf{t})$把$d$映射为一个实数值
    \item $\{\lambda_i\}$是这些特征对应权重，权重越大表示特征越重要
-    \item $\sum_{i=1}^{M} \lambda_i \cdot h_i(d,\textbf{s},\textbf{t}))$描述了$d$的整体质量，值约大$d$越``好''
+    \item $\sum_{i=1}^{M} \lambda_i \cdot h_i(d,\textbf{s},\textbf{t})$描述了$d$的整体质量，值约大$d$越``好''
    \end{itemize}
 \item<2-> 判别式模型的优点在于，它可以很方便的引入各种特征。我们只需要设计不同的特征函数$h_i(\cdot)$即可。
    \begin{itemize}
@@ -1142,6 +1142,57 @@ $d$是一个$(\textbf{s},\textbf{t})$上基于短语的翻译推导，$\textrm{P
 \end{frame}
 %%%------------------------------------------------------------------------------------------------------------
+%%%  短语系统的架构
+\begin{frame}{短语系统的架构}
+\begin{itemize}
+\item \textbf{训练阶段}，需要得到三个子模型
+	\begin{enumerate}
+	\item 短语表：短语翻译及每个短语对应的特征值
+	\item 调序模型：短语调序的模型
+	\item 语言模型：评价译文流畅度的$n$-gram语言模型
+	\end{enumerate}
+\item \textbf{解码阶段}利用以上模型对新的句子进行翻译
+\end{itemize}
+\begin{center}
+\begin{tikzpicture}
+\begin{scope}
+\tikzstyle{datanode} = [minimum width=7em,minimum height=1.7em,fill=ublue,rounded corners=0.7em];
+\tikzstyle{modelnode} = [minimum width=7em,minimum height=1.7em,fill=darkred,rounded corners=0.2em];
+\tikzstyle{decodingnode} = [minimum width=7em,minimum height=1.7em,fill=ugreen,rounded corners=0.2em];
+\node [datanode,anchor=north west] (bitext) at (0,0) {{\color{white} \scriptsize{训练用双语数据}}};
+\node [modelnode,anchor=north] (phrase) at ([yshift=-1.5em]bitext.south) {{\color{white} \scriptsize{短语抽取及打分}}};
+\node [modelnode,anchor=west] (reorder) at ([xshift=1.5em]phrase.east) {{\color{white} \scriptsize{调序建模}}};
+\node [modelnode,anchor=west] (lm) at ([xshift=1.5em]reorder.east) {{\color{white} \scriptsize{语言建模}}};
+\node [datanode,anchor=south] (monotext) at ([yshift=1.5em]lm.north) {{\color{white} \scriptsize{目标语单语数据}}};
+\node [datanode,anchor=north] (phrasetable) at ([yshift=-1.5em]phrase.south) {{\color{white} \scriptsize{短语表}}};
+\node [datanode,anchor=north] (reordertable) at ([yshift=-1.5em]reorder.south) {{\color{white} \scriptsize{调序模型}}};
+\node [datanode,anchor=north] (lmtable) at ([yshift=-1.5em]lm.south) {{\color{white} \scriptsize{语言模型}}};
+\node [decodingnode,anchor=north] (decoding) at ([yshift=-2em]reordertable.south) {{\color{white} \scriptsize{解码器}}};
+\draw [->,very thick] ([yshift=-0.1em]bitext.south) -- ([yshift=0.1em]phrase.north);
+\draw [->,very thick] (bitext.south east) -- ([yshift=0.1em]reorder.north west);
+\draw [->,very thick] ([yshift=-0.1em]monotext.south) -- ([yshift=0.1em]lm.north);
+\draw [->,very thick] ([yshift=-0.1em]phrase.south) -- ([yshift=0.1em]phrasetable.north);
+\draw [->,very thick] ([yshift=-0.1em]reorder.south) -- ([yshift=0.1em]reordertable.north);
+\draw [->,very thick] ([yshift=-0.1em]lm.south) -- ([yshift=0.1em]lmtable.north);
+\draw [->,very thick] ([yshift=-0.1em]phrasetable.south east) -- ([yshift=0.1em,xshift=-3em]decoding.north);
+\draw [->,very thick] ([yshift=-0.1em]reordertable.south) -- ([yshift=0.1em,xshift=0em]decoding.north);
+\draw [->,very thick] ([yshift=-0.1em]lmtable.south west) -- ([yshift=0.1em,xshift=3em]decoding.north);
+\end{scope}
+\end{tikzpicture}
+\end{center}
+\end{frame}
+%%%------------------------------------------------------------------------------------------------------------
 \subsection{短语抽取}
 %%%------------------------------------------------------------------------------------------------------------
@@ -1732,16 +1783,16 @@ d = r_1 \circ r_2 \circ r_3 \circ r_4
 %%%  文法驱动的统计机器翻译流程
 \begin{frame}{文法驱动的机器翻译流程}
 \begin{itemize}
-\item 同步翻译文法给我们带来了新的思路：可以通过不断使用文法规则完成翻译过程。这类方法的基本流程如下：
+\item 同步翻译文法给我们带来了新的思路：可以通过不断使用文法规则完成翻译过程。这类模型的基本流程如下：
 \end{itemize}
 \begin{center}
 \begin{tikzpicture}
 \begin{scope}
-\tikzstyle{datanode} = [minimum width=10em,minimum height=1.7em,fill=ublue,rounded corners=0.7em];
+\tikzstyle{datanode} = [minimum width=7em,minimum height=1.7em,fill=ublue,rounded corners=0.7em];
-\tikzstyle{modelnode} = [minimum width=10em,minimum height=1.7em,fill=darkred,rounded corners=0.2em];
+\tikzstyle{modelnode} = [minimum width=7em,minimum height=1.7em,fill=darkred,rounded corners=0.2em];
-\tikzstyle{decodingnode} = [minimum width=10em,minimum height=1.7em,fill=ugreen,rounded corners=0.2em];
+\tikzstyle{decodingnode} = [minimum width=7em,minimum height=1.7em,fill=ugreen,rounded corners=0.2em];
 \node [datanode,anchor=north west] (bitext) at (0,0) {{\color{white} \scriptsize{训练用双语数据}}};
 \node [modelnode, anchor=north west] (gi) at ([xshift=2em,yshift=-0.2em]bitext.south east) {{\color{white} \scriptsize{文法(规则)抽取}}};
@@ -1752,6 +1803,10 @@ d = r_1 \circ r_2 \circ r_3 \circ r_4
 \node [datanode,anchor=north east] (tuningdata) at ([xshift=-2em,yshift=-0.2em]tuning.south west) {{\color{white} \scriptsize{调优用双语数据}}};
 \node [decodingnode, anchor=north west] (decoding) at ([xshift=2em,yshift=-0.2em]tuningdata.south east) {{\color{white} \scriptsize{解码新句子}}};
+\node [datanode,anchor=south west] (monotext) at ([xshift=2em,yshift=0.2em]training.north east) {{\color{white} \scriptsize{目标语数据}}};
+\node [modelnode,anchor=south west] (lm) at ([xshift=2em,yshift=0.2em]tuning.north east) {{\color{white} \scriptsize{$n$-gram语言建模}}};
+\node [datanode,anchor=south west] (lmmodel) at ([xshift=2em,yshift=0.2em]decoding.north east) {{\color{white} \scriptsize{语言模型}}};
 \draw [->,very thick] ([yshift=-0.3em,xshift=0.1em]bitext.east) -- ([yshift=0.1em]gi.north west);
 \draw [->,very thick] ([yshift=-0.1em]gi.south west) -- ([yshift=0.3em,xshift=0.1em]birules.east);
 \draw [->,very thick] ([yshift=-0.3em,xshift=0.1em]birules.east) -- ([yshift=0.1em]training.north west);
@@ -1759,6 +1814,11 @@ d = r_1 \circ r_2 \circ r_3 \circ r_4
 \draw [->,very thick] ([yshift=-0.3em,xshift=0.1em]model.east) -- ([yshift=0.1em]tuning.north west);
 \draw [->,very thick] ([yshift=-0.1em]tuning.south) -- ([yshift=0.1em]decoding.north);
 \draw [->,very thick] ([yshift=0.3em,xshift=0.1em]tuningdata.east) -- ([yshift=-0.1em]tuning.south west);
+\draw [->,very thick] ([yshift=-0.1em]monotext.south) -- ([yshift=0.1em]lm.north);
+\draw [->,very thick] ([yshift=-0.1em]lm.south) -- ([yshift=0.1em]lmmodel.north);
+\draw [->,very thick] ([yshift=0.3em,xshift=-0.1em]lmmodel.west) -- ([yshift=-0.1em]tuning.south east);
+\draw [->,very thick] ([yshift=-0.3em,xshift=-0.1em]lmmodel.west) -- ([yshift=-0.1em]decoding.north east);
 \end{scope}
 \end{tikzpicture}
 \end{center}