Commit 9c2f9cb7 by xiaotong

complete pages of modeling phrase-based derivations

parent 7b8b7e9a
...@@ -56,6 +56,13 @@ ...@@ -56,6 +56,13 @@
\usefonttheme[onlylarge]{structurebold} \usefonttheme[onlylarge]{structurebold}
\newlength{\mystep}
\newlength{\base}
\newlength{\wseg}
\newlength{\hseg}
\newlength{\wnode}
\newlength{\hnode}
\IfFileExists{C:/WINDOWS/win.ini} \IfFileExists{C:/WINDOWS/win.ini}
{\newcommand{\mycfont}{you}} {\newcommand{\mycfont}{you}}
{\newcommand{\mycfont}{gbsn}} {\newcommand{\mycfont}{gbsn}}
...@@ -135,12 +142,85 @@ $d$是一个$(\textbf{s},\textbf{t})$上基于短语的翻译推导,$\textrm{P ...@@ -135,12 +142,85 @@ $d$是一个$(\textbf{s},\textbf{t})$上基于短语的翻译推导,$\textrm{P
\item \textbf{翻译建模}:如何描述$\textrm{P}(d,\textbf{t}|\textbf{s})$ \item \textbf{翻译建模}:如何描述$\textrm{P}(d,\textbf{t}|\textbf{s})$
\item \textbf{模型简化}:如何对所有$d$进行$\textrm{P}(d,\textbf{t}|\textbf{s})$的求和 \item \textbf{模型简化}:如何对所有$d$进行$\textrm{P}(d,\textbf{t}|\textbf{s})$的求和
\end{itemize} \end{itemize}
下面会分别展开讨论 后面会分别展开讨论
\item 回到一开始的问题: 给定$\textbf{s}$$\textbf{t}$,如何获得双语短语 \vspace{0.3em}
\item<2-> 回到一开始的问题: 给定$\textbf{s}$$\textbf{t}$\alert{如何获得双语短语}
\begin{itemize} \begin{itemize}
\item 如果没有限制,$\textbf{s}$$\textbf{t}$之间任何子串映射都可以看做双语短语 \item 如果没有限制,$\textbf{s}$$\textbf{t}$之间任何子串映射都可以看做双语短语
\end{itemize} \end{itemize}
\end{itemize} \end{itemize}
\vspace{-0.7em}
\visible<2->{
\begin{center}
\begin{tikzpicture}
\setlength{\wseg}{1.5cm}
\setlength{\hseg}{1.0cm}
\setlength{\wnode}{3.75cm}
\setlength{\hnode}{1.0cm}
\tikzstyle{elementnode} = [rectangle,text=white,anchor=center]
\tikzstyle{srcnode} = [rotate=45,font=\small,anchor=south west]
\tikzstyle{tgtnode} = [left,font=\small,anchor=north east]
\tikzstyle{alignmentnode} = [rectangle,draw,minimum height=3.6\hnode,minimum width=0.36\hnode]
\tikzstyle{probnode} = [fill=blue!30,minimum width=0.4\hnode]
\tikzstyle{labelnode} = [above]
% alignment matrix
\begin{scope}[scale=0.85,yshift=0.12in]
\foreach \i / \j / \c in
{0/7/0.15, 1/7/0.15, 2/7/0.15, 3/7/0.15, 4/7/0.15, 5/7/0.15,
0/6/0.15, 1/6/0.15, 2/6/0.15, 3/6/0.15, 4/6/0.15, 5/6/0.15,
0/5/0.15, 1/5/0.15, 2/5/0.15, 3/5/0.15, 4/5/0.15, 5/5/0.15,
0/4/0.15, 1/4/0.15, 2/4/0.15, 3/4/0.15, 4/4/0.15, 5/4/0.15,
0/3/0.15, 1/3/0.15, 2/3/0.15, 3/3/0.15, 4/3/0.15, 5/3/0.15,
0/2/0.15, 1/2/0.15, 2/2/0.15, 3/2/0.15, 4/2/0.15, 5/2/0.15,
0/1/0.15, 1/1/0.15, 2/1/0.15, 3/1/0.15, 4/1/0.15, 5/1/0.15,
0/0/0.15, 1/0/0.15, 2/0/0.15, 3/0/0.15, 4/0/0.15, 5/0/0.15}
\node[elementnode,minimum size=0.6*\hnode*\c,inner sep=0.1pt,fill=blue] (a\i\j) at (0.5*\hnode*\i-5.4*0.5*\hnode,0.5*\hnode*\j-1.05*\hnode) {};
% source
\node[srcnode] (src1) at (-5.4*0.5*\hnode,-1.05*\hnode+7.5*0.5*\hnode) {\scriptsize{Have}};
\node[srcnode] (src2) at ([xshift=0.5\hnode]src1.south west) {\scriptsize{you}};
\node[srcnode] (src3) at ([xshift=0.5\hnode]src2.south west) {\scriptsize{learned}};
\node[srcnode] (src4) at ([xshift=0.5\hnode]src3.south west) {\scriptsize{nothing}};
\node[srcnode] (src5) at ([xshift=0.5\hnode]src4.south west) {\scriptsize{?}};
\node[srcnode] (src6) at ([xshift=0.5\hnode]src5.south west) {\scriptsize{EOS}};
% target
\node[tgtnode] (tgt1) at (-6.0*0.5*\hnode,-1.05*\hnode+7.5*0.5*\hnode) {\scriptsize{}};
\node[tgtnode] (tgt2) at ([yshift=-0.5\hnode]tgt1.north east) {\scriptsize{什么}};
\node[tgtnode] (tgt3) at ([yshift=-0.5\hnode]tgt2.north east) {\scriptsize{}};
\node[tgtnode] (tgt4) at ([yshift=-0.5\hnode]tgt3.north east) {\scriptsize{}};
\node[tgtnode] (tgt5) at ([yshift=-0.5\hnode]tgt4.north east) {\scriptsize{}};
\node[tgtnode] (tgt6) at ([yshift=-0.5\hnode]tgt5.north east) {\scriptsize{}};
\node[tgtnode] (tgt7) at ([yshift=-0.5\hnode]tgt6.north east) {\scriptsize{?}};
\node[tgtnode] (tgt8) at ([yshift=-0.5\hnode]tgt7.north east) {\scriptsize{EOS}};
\node [anchor=west] (p1line1) at ([xshift=4em,yshift=1em]a57.east) {\footnotesize{$\tilde{s}_i$: 什么\ \ \ \ \ \ }};
\node [anchor=north west] (p1line2) at ([xshift=0]p1line1.south west) {\footnotesize{$\tilde{t}_i$: learned\ \ \ nothing\ \ \ ? \ \ \ \ \ \ \ \ \ \ \ \ }};
\node [anchor=west] (p2line1) at ([xshift=4em]a53.east) {\footnotesize{$\tilde{s}_j$: 到\ \ \ ?}};
\node [anchor=north west] (p2line2) at ([xshift=0]p2line1.south west) {\footnotesize{$\tilde{t}_j$: Have\ \ \ you\ \ \ learned\ \ \ nothing}};
\begin{pgfonlayer}{background}
\node [rectangle,draw=red,thick,inner sep=0.2em,fill=white,drop shadow] [fit = (a26) (a44)] (phrase1) {};
\node [rectangle,draw=ugreen,thick,inner sep=0.2em,fill=white,drop shadow] [fit = (a01) (a32)] (phrase2) {};
\node [rectangle,inner sep=0.2em,fill=red!10] [fit = (p1line1) (p1line2)] (box1) {};
\node [rectangle,inner sep=0.2em,fill=green!10] [fit = (p2line1) (p2line2)] (box2) {};
\end{pgfonlayer}
\draw [->,thick,dotted] ([yshift=-0.8em]phrase1.east) .. controls +(east:1.5) and +(west:1) .. (box1.west);
\draw [->,thick,dotted] ([yshift=-0.0em]phrase2.east) .. controls +(east:2.0) and +(west:1) .. ([yshift=1em]box2.west);
\end{scope}
\end{tikzpicture}
\end{center}
}
\end{frame} \end{frame}
%%%------------------------------------------------------------------------------------------------------------ %%%------------------------------------------------------------------------------------------------------------
......
...@@ -51,15 +51,12 @@ ...@@ -51,15 +51,12 @@
\setbeamercolor{uppercolblue}{fg=white,bg=ublue} \setbeamercolor{uppercolblue}{fg=white,bg=ublue}
\setbeamercolor{lowercolblue}{fg=black,bg=blue!10} \setbeamercolor{lowercolblue}{fg=black,bg=blue!10}
\newlength{\mystep}
%\usetheme{default} \newlength{\base}
%\usetheme{Darmstadt} \newlength{\wseg}
%\usetheme{Madrid} \newlength{\hseg}
%\usetheme{Frankfurt} \newlength{\wnode}
%\usetheme{Dresden} \newlength{\hnode}
%\usetheme{Boadilla}
%\usecolortheme{dolphin}
\usefonttheme[onlylarge]{structurebold} \usefonttheme[onlylarge]{structurebold}
...@@ -1035,6 +1032,127 @@ ...@@ -1035,6 +1032,127 @@
\end{frame} \end{frame}
%%%------------------------------------------------------------------------------------------------------------ %%%------------------------------------------------------------------------------------------------------------
%%% 数学模型
\begin{frame}{数学模型}
\begin{itemize}
\item \textbf{机器翻译}:对于输入的源语言句子$\textbf{s}$,找到最佳译文$\hat{\textbf{t}}$
\begin{displaymath}
\hat{\textbf{t}} = \argmax_{\textbf{t}} \textrm{P}(\textbf{t}|\textbf{s})
\end{displaymath}
其中$\textrm{P}(\textbf{t}|\textbf{s})$表示$\textbf{s}$$\textbf{t}$的翻译概率
\item 三个基本问题(回忆一下第三章)
\begin{enumerate}
\item 如何定义$\textrm{P}(\textbf{t}|\textbf{s})$ - 建模问题
\item 如何学习$\textrm{P}(\textbf{t}|\textbf{s})$的统计模型 - 训练问题
\item 如何找到最优译文 - 解码问题
\end{enumerate}
\vspace{0.3em}
\item<2-> 先看建模问题。可以把$\textrm{P}(\textbf{t}|\textbf{s})$表示成所有翻译推导的概率
\begin{displaymath}
\textrm{P}(\textbf{t}|\textbf{s}) = \sum_{d} \textrm{P}(d,\textbf{t}|\textbf{s})
\end{displaymath}
$d$是一个$(\textbf{s},\textbf{t})$上基于短语的翻译推导,$\textrm{P}(d,\textbf{t}|\textbf{s})$表示翻译推导$d$的概率
\end{itemize}
\end{frame}
%%%------------------------------------------------------------------------------------------------------------
%%% 翻译推导的建模
\begin{frame}{对翻译推导进行建模}
\begin{itemize}
\item $\textrm{P}(\textbf{t}|\textbf{s}) = \sum_{d} \textrm{P}(d,\textbf{t}|\textbf{s})$带来新的问题:
\begin{itemize}
\item \textbf{短语获取}:如何获取双语短语,以构成$d$
\item \textbf{翻译建模}:如何描述$\textrm{P}(d,\textbf{t}|\textbf{s})$
\item \textbf{模型简化}:如何对所有$d$进行$\textrm{P}(d,\textbf{t}|\textbf{s})$的求和
\end{itemize}
后面会分别展开讨论
\vspace{0.3em}
\item<2-> 回到一开始的问题: 给定$\textbf{s}$$\textbf{t}$\alert{如何获得双语短语}
\begin{itemize}
\item 如果没有限制,$\textbf{s}$$\textbf{t}$之间任何子串映射都可以看做双语短语
\end{itemize}
\end{itemize}
\vspace{-0.7em}
\visible<2->{
\begin{center}
\begin{tikzpicture}
\setlength{\wseg}{1.5cm}
\setlength{\hseg}{1.0cm}
\setlength{\wnode}{3.75cm}
\setlength{\hnode}{1.0cm}
\tikzstyle{elementnode} = [rectangle,text=white,anchor=center]
\tikzstyle{srcnode} = [rotate=45,font=\small,anchor=south west]
\tikzstyle{tgtnode} = [left,font=\small,anchor=north east]
\tikzstyle{alignmentnode} = [rectangle,draw,minimum height=3.6\hnode,minimum width=0.36\hnode]
\tikzstyle{probnode} = [fill=blue!30,minimum width=0.4\hnode]
\tikzstyle{labelnode} = [above]
% alignment matrix
\begin{scope}[scale=0.85,yshift=0.12in]
\foreach \i / \j / \c in
{0/7/0.15, 1/7/0.15, 2/7/0.15, 3/7/0.15, 4/7/0.15, 5/7/0.15,
0/6/0.15, 1/6/0.15, 2/6/0.15, 3/6/0.15, 4/6/0.15, 5/6/0.15,
0/5/0.15, 1/5/0.15, 2/5/0.15, 3/5/0.15, 4/5/0.15, 5/5/0.15,
0/4/0.15, 1/4/0.15, 2/4/0.15, 3/4/0.15, 4/4/0.15, 5/4/0.15,
0/3/0.15, 1/3/0.15, 2/3/0.15, 3/3/0.15, 4/3/0.15, 5/3/0.15,
0/2/0.15, 1/2/0.15, 2/2/0.15, 3/2/0.15, 4/2/0.15, 5/2/0.15,
0/1/0.15, 1/1/0.15, 2/1/0.15, 3/1/0.15, 4/1/0.15, 5/1/0.15,
0/0/0.15, 1/0/0.15, 2/0/0.15, 3/0/0.15, 4/0/0.15, 5/0/0.15}
\node[elementnode,minimum size=0.6*\hnode*\c,inner sep=0.1pt,fill=blue] (a\i\j) at (0.5*\hnode*\i-5.4*0.5*\hnode,0.5*\hnode*\j-1.05*\hnode) {};
% source
\node[srcnode] (src1) at (-5.4*0.5*\hnode,-1.05*\hnode+7.5*0.5*\hnode) {\scriptsize{Have}};
\node[srcnode] (src2) at ([xshift=0.5\hnode]src1.south west) {\scriptsize{you}};
\node[srcnode] (src3) at ([xshift=0.5\hnode]src2.south west) {\scriptsize{learned}};
\node[srcnode] (src4) at ([xshift=0.5\hnode]src3.south west) {\scriptsize{nothing}};
\node[srcnode] (src5) at ([xshift=0.5\hnode]src4.south west) {\scriptsize{?}};
\node[srcnode] (src6) at ([xshift=0.5\hnode]src5.south west) {\scriptsize{EOS}};
% target
\node[tgtnode] (tgt1) at (-6.0*0.5*\hnode,-1.05*\hnode+7.5*0.5*\hnode) {\scriptsize{}};
\node[tgtnode] (tgt2) at ([yshift=-0.5\hnode]tgt1.north east) {\scriptsize{什么}};
\node[tgtnode] (tgt3) at ([yshift=-0.5\hnode]tgt2.north east) {\scriptsize{}};
\node[tgtnode] (tgt4) at ([yshift=-0.5\hnode]tgt3.north east) {\scriptsize{}};
\node[tgtnode] (tgt5) at ([yshift=-0.5\hnode]tgt4.north east) {\scriptsize{}};
\node[tgtnode] (tgt6) at ([yshift=-0.5\hnode]tgt5.north east) {\scriptsize{}};
\node[tgtnode] (tgt7) at ([yshift=-0.5\hnode]tgt6.north east) {\scriptsize{?}};
\node[tgtnode] (tgt8) at ([yshift=-0.5\hnode]tgt7.north east) {\scriptsize{EOS}};
\node [anchor=west] (p1line1) at ([xshift=4em,yshift=1em]a57.east) {\footnotesize{$\tilde{s}_i$: 什么\ \ \ \ \ \ }};
\node [anchor=north west] (p1line2) at ([xshift=0]p1line1.south west) {\footnotesize{$\tilde{t}_i$: learned\ \ \ nothing\ \ \ ? \ \ \ \ \ \ \ \ \ \ \ \ }};
\node [anchor=west] (p2line1) at ([xshift=4em]a53.east) {\footnotesize{$\tilde{s}_j$: 到\ \ \ ?}};
\node [anchor=north west] (p2line2) at ([xshift=0]p2line1.south west) {\footnotesize{$\tilde{t}_j$: Have\ \ \ you\ \ \ learned\ \ \ nothing}};
\begin{pgfonlayer}{background}
\node [rectangle,draw=red,thick,inner sep=0.2em,fill=white,drop shadow] [fit = (a26) (a44)] (phrase1) {};
\node [rectangle,draw=ugreen,thick,inner sep=0.2em,fill=white,drop shadow] [fit = (a01) (a32)] (phrase2) {};
\node [rectangle,inner sep=0.2em,fill=red!10] [fit = (p1line1) (p1line2)] (box1) {};
\node [rectangle,inner sep=0.2em,fill=green!10] [fit = (p2line1) (p2line2)] (box2) {};
\end{pgfonlayer}
\draw [->,thick,dotted] ([yshift=-0.8em]phrase1.east) .. controls +(east:1.5) and +(west:1) .. (box1.west);
\draw [->,thick,dotted] ([yshift=-0.0em]phrase2.east) .. controls +(east:2.0) and +(west:1) .. ([yshift=1em]box2.west);
\end{scope}
\end{tikzpicture}
\end{center}
}
\end{frame}
%%%------------------------------------------------------------------------------------------------------------
\subsection{短语抽取} \subsection{短语抽取}
%%%------------------------------------------------------------------------------------------------------------ %%%------------------------------------------------------------------------------------------------------------
......
Markdown 格式
0%
您添加了 0 到此讨论。请谨慎行事。
请先完成此评论的编辑!
注册 或者 后发表评论