reorgnization

52ecf09c · xiaotong · 9c2f9cb7 · 52ecf09c · 52ecf09c · 52ecf09c
Commit 52ecf09c authored Nov 22, 2019 by xiaotong
--- a/Section04-Phrasal-and-Syntactic-Models/section04-test.tex
+++ b/Section04-Phrasal-and-Syntactic-Models/section04-test.tex
@@ -136,15 +136,81 @@ $d$是一个$(\textbf{s},\textbf{t})$上基于短语的翻译推导，$\textrm{P
 %%% 翻译推导的建模
 \begin{frame}{对翻译推导进行建模}
 \begin{itemize}
-\item $\textrm{P}(\textbf{t}|\textbf{s}) = \sum_{d} \textrm{P}(d,\textbf{t}|\textbf{s})$带来新的问题：
+\item $\textrm{P}(\textbf{t}|\textbf{s}) = \sum_{d} \textrm{P}(d,\textbf{t}|\textbf{s})$带来新的问题：如何描述$\textrm{P}(d,\textbf{t}|\textbf{s})$ \\
+
+\vspace{0.5em}
+\begin{center}
+\begin{tikzpicture}
+
+\begin{scope}[minimum height = 18pt]
+
+\node[anchor=east] (s0) at (-0.5em, 0) {$\textbf{s}$:};
+\node[anchor=west,fill=ugreen!50] (s1) at (0, 0) {在};
+\node[anchor=west,fill=red!50] (s2) at ([xshift=1em]s1.east) {桌子 上 的};
+\node[anchor=west,fill=blue!50] (s3) at ([xshift=1em]s2.east) {苹果};
+
+\node[anchor=east] (t0) at (-0.5em, -1.5) {$\textbf{t}$:};
+\node[anchor=west,fill=blue!50] (t1) at (0, -1.5) {the apple};
+\node[anchor=west,fill=ugreen!50] (t2) at ([xshift=1em]t1.east) {on};
+\node[anchor=west,fill=red!50] (t3) at ([xshift=1em]t2.east) {the table};
+
+\path[<->, thick] (s1.south) edge (t3.north);
+\path[<->, thick] (s2.south) edge (t2.north);
+\path[<->, thick] (s3.south) edge (t1.north);
+
+\end{scope}
+\end{tikzpicture}
+\end{center}
+
+上图体现了三方面问题
+
+    \begin{enumerate}
+    \item 确定哪些是``可用''的短语
+    \item 描述短语翻译的好坏
+    \item 描述翻译中的调序现象
+    \end{enumerate}
+
+\item<2-> 希望有这样一种模型可以对任意的因素进行方便的建模。经典的判别式模型成为了不二的选择
+\end{itemize}
+
+\visible<2->{
+\textbf{Discriminative Training and Maximum Entropy Models for Statistical Machine Translation}\\
+\textbf{Franz Och and Hermann Ney, 2002, In Proc of ACL}
+}
+
+\end{frame}
+
+%%%------------------------------------------------------------------------------------------------------------
+%%% 判别式模型
+\begin{frame}{判别式模型}
+\begin{itemize}
+\item 判别式模型的形式：
+\begin{displaymath}
+\textrm{P}(d,\textbf{t}|\textbf{s}) = \frac{\exp(\sum_{i=1}^{M} \lambda_i \cdot h_i(d,\textbf{s},\textbf{t}))}{\sum_{d',t'}\exp(\sum_{i=1}^{M} \lambda_i \cdot h_i(d',\textbf{s},\textbf{t}'))}
+\end{displaymath}
    \begin{itemize}
-    \item \textbf{短语获取}：如何获取双语短语，以构成$d$
-    \item \textbf{翻译建模}：如何描述$\textrm{P}(d,\textbf{t}|\textbf{s})$
-    \item \textbf{模型简化}：如何对所有$d$进行$\textrm{P}(d,\textbf{t}|\textbf{s})$的求和
+    \item $\{h_i(\cdot)\}$是$M$个特征，每个$h_i(d,\textbf{s},\textbf{t})$把$d$映射为一个实数值
+    \item $\{\lambda_i\}$是这些特征对应权重，权重越大表示特征越重要
+    \item $\sum_{i=1}^{M} \lambda_i \cdot h_i(d,\textbf{s},\textbf{t}))$描述了$d$的整体质量，值约大$d$越``好''
    \end{itemize}
-    后面会分别展开讨论
-\vspace{0.3em}
-\item<2-> 回到一开始的问题: 给定$\textbf{s}$和$\textbf{t}$，\alert{如何获得双语短语}
+\item 判别式模型的优点在于，它可以很方便的引入各种特征。我们只需要设计不同的特征函数$h_i(\cdot)$即可。
+    \begin{itemize}
+    \item 比如，可以定义短语翻译概率作为特征，也可以定义调序的程度作为一个特征
+    \end{itemize}
+\item \textbf{两个问题}：
+    \begin{itemize}
+    \item 特征定义：定义短语翻译特征和调序特征(马上)
+    \item 权重调优：得到最好的特征权重（后面）
+    \end{itemize}
+
+\end{itemize}
+\end{frame}
+
+%%%------------------------------------------------------------------------------------------------------------
+%%% 翻译推导的建模
+\begin{frame}{对翻译推导进行建模}
+\begin{itemize}
+\item 回到最开始的问题: 给定$\textbf{s}$和$\textbf{t}$，\alert{如何获得双语短语}
    \begin{itemize}
    \item 如果没有限制，$\textbf{s}$和$\textbf{t}$之间任何子串映射都可以看做双语短语
    \end{itemize}
@@ -152,7 +218,6 @@ $d$是一个$(\textbf{s},\textbf{t})$上基于短语的翻译推导，$\textrm{P

 \vspace{-0.7em}

-\visible<2->{
 \begin{center}
 \begin{tikzpicture}

@@ -219,7 +284,14 @@ $d$是一个$(\textbf{s},\textbf{t})$上基于短语的翻译推导，$\textrm{P

 \end{tikzpicture}
 \end{center}
-}
+
+\begin{itemize}
+\item<2-> \textbf{显然}，不加限制的定义短语会带来很多问题
+    \begin{itemize}
+    \item 短语数量随句子长度增加急剧膨胀
+    \item 大量噪声，如``到 ? $\leftrightarrow$ Have you learned nothing''
+    \end{itemize}
+\end{itemize}

 \end{frame}


--- a/Section04-Phrasal-and-Syntactic-Models/section04.tex
+++ b/Section04-Phrasal-and-Syntactic-Models/section04.tex
--- a/Section06-Neural-Machine-Translation/section06.tex
+++ b/Section06-Neural-Machine-Translation/section06.tex
@@ -1188,7 +1188,7 @@ NLP问题的隐含结构假设 & 无隐含结构假设，端到端学习 \\
            \node[auxnode,label={-45:28}] () at (aux28) {};
            \coordinate (aux29) at ([xshift=2\base]aux28);
            \node[auxnode,label={-45:29}] () at (aux29) {};
-            
+
            \coordinate (aux33) at ([yshift=\base]aux23);
            \node[auxnode,label={-45:33}] () at (aux33) {};
            \coordinate (aux34) at ([yshift=\base]aux24);
@@ -1197,10 +1197,10 @@ NLP问题的隐含结构假设 & 无隐含结构假设，端到端学习 \\
            \node[auxnode,label={-45:35}] () at (aux35) {};
            \coordinate (aux37) at ([yshift=\base]aux27);
            \node[auxnode,label={-45:37}] () at (aux37) {};
-            
+
            \coordinate (aux45) at ([yshift=\base]aux35);
            \node[auxnode,label={-45:45}] () at (aux45) {};
-            
+
            \coordinate (aux55) at ([yshift=\base]aux45);
            \node[auxnode,label={-45:55}] () at (aux55) {};
            \ExtractX{$(aux21)$}
@@ -1219,7 +1219,7 @@ NLP问题的隐含结构假设 & 无隐含结构假设，端到端学习 \\
            \ExtractY{$(aux55)$}
            \coordinate (aux59) at (\XCoord,\YCoord);
            \node[auxnode,label={-45:59}] () at (aux59) {};
-            
+
            \coordinate (aux68) at ([yshift=\base]aux58);
            \node[auxnode,label={-45:68}] () at (aux68) {};
        \end{scope}
@@ -2244,8 +2244,8 @@ $\textrm{``you''} = \argmax_{y} \textrm{P}(y|\textbf{s}_1, \alert{\textbf{C}})$ 

 \vspace{-2.5em}
 \begin{eqnarray}
-\textbf{C}_3 & = & 0.4 \times \textbf{h}(\textrm{``什么''}) + 0.4 \times \textbf{h}(\textrm{``也''}) + \nonumber \\
-             &   & 0 \times \textbf{h}(\textrm{``没''}) + 0.1 \times \textbf{h}(\textrm{``学''}) \nonumber
+\textbf{C}_3 & = & 0.4 \times \textbf{h}(\textrm{``你''}) + 0.4 \times \textbf{h}(\textrm{``什么''}) + \nonumber \\
+             &   & 0 \times \textbf{h}(\textrm{``也''}) + 0.1 \times \textbf{h}(\textrm{``没''}) \nonumber
 \end{eqnarray}

 \vspace{-0.5em}