new update

c03143f2 · xiaotong · a2a024e2 · c03143f2 · c03143f2 · c03143f2
Commit c03143f2 authored Nov 19, 2019 by xiaotong
--- a/Section04-Phrasal-and-Syntactic-Models/section04-test.tex
+++ b/Section04-Phrasal-and-Syntactic-Models/section04-test.tex
@@ -96,209 +96,95 @@
 \section{使用更大的翻译单元}

 %%%------------------------------------------------------------------------------------------------------------
-%%% 短语 -> 句法
-\begin{frame}{使用短语就够了？}
-
-\begin{itemize}
-\item 短语是具有完整意思的连续词串，因此可以捕捉更多的上下文信息
-	\begin{itemize}
-	\item 不过过大的短语会造成数据稀疏、长距离依赖等问题
-	\item 而且单纯的词串也缺乏句法功能表示能力
-	\end{itemize}
-    \begin{tikzpicture}
-
-    \node[anchor=west, fill=blue!50, inner sep=0.05cm] (sp1) at (0, 0) {进口\ \ };
-    \node[anchor=west] (sp2) at (2.5em, 0) {在 过去的 五 到 十 年 间};
-    \node[anchor=west, fill=red!50, inner sep=0.05cm] (sp3) at (14em, 0) {有了 大幅度 下降};
-    \draw[->] (sp1) edge [out=15, in=170] (sp3);
-
-    \node[anchor=west, fill=blue!50, inner sep=0.05cm] (tp1) at (0, -0.8) {the imports};
-    \node[anchor=west, fill=red!50, inner sep=0.05cm] (tp2) at (5.3em, -0.8) {drastically fell};
-    \node[anchor=west] (tp3) at (11.3em, -0.8) {in the past five to ten years};
-    \path[->] (tp1) edge [out=30, in=150] (tp2);
-
-    \end{tikzpicture}
-\item<2-> 另一种方式是考虑句子的句法结构，这样更容易描述句子的层次结构和长距离依赖关系
-\end{itemize}
-
-\visible<2->{
-\begin{center}
-\begin{tikzpicture}
-
-\begin{scope}[scale=0.8, sibling distance=1pt, level distance=20pt, yshift=-1.4in]
-\Tree[. S
-        [.NP
-            [.NP
-                [.DT the ]
-                [.\node[fill=ugreen!50]{NN}; \node[](n1){import}; ]
-            ]
-            [.\node[fill=ugreen!50]{IN}; in ]
-            [.NP \edge[roof]; {North Korea} ]
-        ]
-        [.VP
-            [.\node[fill=ugreen!50]{VBZ}; \node[](n2){have}; ]
-            [.ADVP
-                [.RB drastically ]
-                [.VBN fallen ]
-            ]
-        ]
-     ]
-
-\draw[-latex] (n1.south) .. controls +(south east:1) and +(south:1) .. (n2.south);
-\end{scope}
+%%% 基于句法的模型
+\begin{frame}{引入句法信息}

-\end{tikzpicture}
-\end{center}
-}

-\end{frame}

-%%%------------------------------------------------------------------------------------------------------------
-%%% 基于句法的模型
-\begin{frame}{引入句法信息}
 \begin{itemize}
-\item 句法树是句子的更高层次的抽象，相比短语句法树具有更加丰富的句法功能标记，对语言结构的转换很有帮助
-	\begin{itemize}
-	\item 更容易捕捉翻译中的远距离调序
-	\item 使用句法更容易对大范围的上下文建模
-	\end{itemize}
-\item<2-> 看一个真实的例子
-    \begin{itemize}
-    \item 长介词短语的翻译，需要完整的看到这个结构才能准确翻译介词
-    \end{itemize}
-\end{itemize}
+\item<1-> Brown等人(1993)及Koehn等人(2003)提出了基于词串的翻译模型\\

-\visible<2->{
-\vspace{-1.2em}
 \begin{center}
 \begin{tikzpicture}

-%% example
-\begin{scope}[xshift=-0.1in,yshift=-1.5in]
-
-{\tiny
-
-\node[anchor=west] (ref) at (0,0) {\textbf{人工翻译:} \alert{After} North Korea demanded concessions from U.S. again before the start of a new round of six-nation talks ...};
-
-\node[anchor=north west] (hifst) at ([yshift=-0.3em]ref.south west) {\textbf{机器翻译:} \blue{In}\black{} the new round of six-nation talks on North Korea again demanded that U.S. in the former promise ...};
+\begin{scope}[minimum height = 18pt]

-\visible<4->{
-\node[anchor=north west] (synhifst) at ([yshift=-0.3em]hifst.south west) {\textbf{better?:}};
+\node[anchor=east] (s0) at (-0.5em, 0) {源语:};
+\node[anchor=west] (s1) at (0, 0) {进口};
+\node[anchor=west] (s2) at (3.5em, 0) {大幅度};
+\node[anchor=west] (s3) at (7.9em, 0) {下降 了};

-\node[anchor=west, fill=red!20!white, inner sep=0.3em] (synhifstpart1) at ([xshift=-0.5em]synhifst.east) {After};
+\node<2->[anchor=west,fill=ugreen!50] (s1) at (0, 0) {进口};
+\node<3-> [anchor=west,fill=red!50] (s2) at (3.5em, 0) {大幅度};
+\node<4-> [anchor=west,fill=blue!50] (s3) at (7.9em, 0) {下降 了};

-\node[anchor=west, fill=blue!20!white, inner sep=0.25em] (synhifstpart2) at ([xshift=0.1em,yshift=-0.05em]synhifstpart1.east) {North Korea again demanded that U.S. promised concessions before the new round of six-nation talks};
+\node[anchor=east] (t0) at (-0.5em, -1) {目标语:};
+\node[anchor=west] (t1) at (0, -1) {the imports have};
+\node[anchor=west] (t2) at (8.4em, -1) {drastically};
+\node[anchor=west] (t3) at (14.0em, -1) {fallen};

-\node[anchor=west] (synhifstpart3) at ([xshift=-0.2em]synhifstpart2.east) {...};
-}
-
-\node [anchor=west] (inputlabel) at ([yshift=-0.4in]synhifst.west) {\textbf{Input:}};
-
-\node [anchor=west,minimum height=12pt] (inputseg1) at (inputlabel.east) {在$_1$ };
-\node [anchor=west,minimum height=12pt] (inputseg2) at ([xshift=0.2em]inputseg1.east) {北韩$_2$ 再度$_3$ 要求$_4$ 美国$_5$ 于$_6$ 新$_7$ 回合$_8$ 六$_9$ 国$_{10}$ 会谈$_{11}$ 前$_{12}$ 承诺$_{13}$ 让步$_{14}$};
-\node [anchor=west,minimum height=12pt] (inputseg3) at ([xshift=0.2em]inputseg2.east) {后$_{15}$};
-\node [anchor=west,minimum height=12pt] (inputseg4) at ([xshift=0.2em]inputseg3.east) {,$_{16}$};
-\node [anchor=west,minimum height=12pt] (inputseg5) at ([xshift=0.2em]inputseg4.east) {...};
-
-\visible<3->{
-\node [anchor=north,inner sep=2pt] (synlabel1) at ([yshift=-0.34in]inputseg2.south) {\scriptsize{PP}};
-\node [anchor=north,inner sep=2pt] (synlabel2) at ([yshift=-0.34in]inputseg4.south) {\scriptsize{PU}};
-\node [anchor=north,inner sep=2pt] (synlabel3) at ([yshift=-0.34in]inputseg5.south) {\scriptsize{VP}};
-\node [anchor=north,inner sep=2pt] (synlabel4) at ([xshift=1.6in,yshift=-0.35in]synlabel1.south) {\scriptsize{VP}};
-
-\draw [-] (inputseg1.south west) -- (inputseg3.south east) -- (synlabel1.north) -- cycle;
-\draw [-] (inputseg4.south) -- (synlabel2.north);
-\draw [-] (inputseg5.south) -- (synlabel3.north);
-\draw [-] (synlabel1.south) -- (synlabel4.north);
-\draw [-] (synlabel2.south) -- (synlabel4.north);
-\draw [-] (synlabel3.south) -- (synlabel4.north);
-}
+\node<2-> [anchor=west,fill=ugreen!50] (t1) at (0, -1) {the imports have};
+\node<3-> [anchor=west,fill=red!50] (t2) at (8.4em, -1) {drastically};
+\node<4-> [anchor=west,fill=blue!50] (t3) at (14.0em, -1) {fallen};

-\visible<3->{
-\node [anchor=north east,align=left] (nolimitlabel) at (synlabel1.south west) {\scriptsize{短语结构树很容易捕捉}\\\scriptsize{这种介词短语结构}};
-}
+\path[<->, thick]<2-> (s1.south) edge (t1.north);
+\path[<->, thick]<3-> (s2.south) edge (t2.north);
+\path[<->, thick]<4-> (s3.south) edge (t3.north);

-\visible<4->{
-\node [anchor=west,minimum height=12pt,fill=red!20] (inputseg1) at (inputlabel.east) {在$_1$ };
-\node [anchor=west,minimum height=12pt,fill=blue!20] (inputseg2) at ([xshift=0.2em]inputseg1.east) {北韩$_2$ 再度$_3$ 要求$_4$ 美国$_5$ 于$_6$ 新$_7$ 回合$_8$ 六$_9$ 国$_{10}$ 会谈$_{11}$ 前$_{12}$ 承诺$_{13}$ 让步$_{14}$};
-\node [anchor=west,minimum height=12pt,fill=red!20] (inputseg3) at ([xshift=0.2em]inputseg2.east) {后$_{15}$};
-
-\path [draw,->,dashed] (inputseg1.north) .. controls +(north:0.2) and +(south:0.3) ..  ([xshift=1em]synhifstpart1.south);
-\path [draw,->,dashed] (inputseg3.north) .. controls +(north:0.2) and +(south:0.6) ..  ([xshift=1em]synhifstpart1.south);
-\path [draw,->,dashed] ([xshift=-0.5in]inputseg2.north) --  ([xshift=-0.6in]synhifstpart2.south);
-}
-
-}
+%\visible<5>{\draw[<->] (t3.south) -- (s3.north);}

 \end{scope}
-%% end of example

 \end{tikzpicture}
 \end{center}
-}

+\end{itemize}
 \end{frame}

 %%%------------------------------------------------------------------------------------------------------------
-%%% 本章的核心问题
-\begin{frame}{核心问题}
-
-\vspace{6em}
+\section{基于短语的模型}

- \begin{tcolorbox}[enhanced,size=normal,left=2mm,right=1mm,colback=blue!5!white,colframe=blue!75!black,drop fuzzy shadow]
-{\LARGE
-\textbf{如何使用短语、句法等}\\ \textbf{结构信息进行机器翻译建模？}
-}
-\end{tcolorbox}
+%%%------------------------------------------------------------------------------------------------------------
+\subsection{建模}

-\end{frame}
+%%%------------------------------------------------------------------------------------------------------------
+\subsection{短语抽取}

 %%%------------------------------------------------------------------------------------------------------------
-%%% 本章的内容
-\begin{frame}{Outline}
-
- \begin{tcolorbox}[enhanced,size=normal,left=2mm,right=1mm,colback=red!5!white,colframe=red!75!black,drop fuzzy shadow]
-{\normalsize
-\textbf{基于短语的模型} \vspace{-0.2em}\\
-\small{1. 建模} \vspace{-0.2em}\\
-\small{2. 如何获取短语} \vspace{-0.2em}\\
-\small{3. 判别式模型和最小错误率训练} \vspace{-0.2em}\\
-\small{4. 栈解码}
-}
-\end{tcolorbox}
+\subsection{判别式模型及特征}

-\vspace{0em}
+%%%------------------------------------------------------------------------------------------------------------
+\subsection{最小错误率训练}

- \begin{tcolorbox}[enhanced,size=normal,left=2mm,right=1mm,colback=red!5!white,colframe=red!75!black,drop fuzzy shadow]
-{\normalsize
-\textbf{基于层次短语的模型} \vspace{-0.2em}\\
-\small{1. 同步上下文无关文法} \vspace{-0.2em}\\
-\small{2. 层次短语规则及特征} \vspace{-0.2em}\\
-\small{3. 基于chart的解码和立方剪枝}
-}
-\end{tcolorbox}
+%%%------------------------------------------------------------------------------------------------------------
+\subsection{栈解码}

-\vspace{0em}
+%%%------------------------------------------------------------------------------------------------------------
+\section{基于层次短语的模型}

-\begin{tcolorbox}[enhanced,size=normal,left=2mm,right=1mm,colback=red!5!white,colframe=red!75!black,drop fuzzy shadow]
-{\normalsize
-\textbf{基于语言学句法的模型} \vspace{-0.2em}\\
-\small{1. 基于树结构的文法} \vspace{-0.2em}\\
-\small{2. 翻译规则抽取} \vspace{-0.2em}\\
-\small{3. 规则匹配}
-}
-\end{tcolorbox}
+%%%------------------------------------------------------------------------------------------------------------
+\subsection{同步上下文无关文法}

-\end{frame}
+%%%------------------------------------------------------------------------------------------------------------
+\subsection{层次短语规则及翻译特征}

 %%%------------------------------------------------------------------------------------------------------------
-\section{基于短语的模型}
+\subsection{基于chart的解码}

 %%%------------------------------------------------------------------------------------------------------------
-\section{基于层次短语的模型}
+\subsection{剪枝}

 %%%------------------------------------------------------------------------------------------------------------
 \section{基于语言学句法的模型}

+%%%------------------------------------------------------------------------------------------------------------
+\subsection{基于树结构的文法}
+
+%%%------------------------------------------------------------------------------------------------------------
+\subsection{翻译规则抽取}
+
+%%%------------------------------------------------------------------------------------------------------------
+\subsection{规则匹配}
+
 \end{CJK}
 \end{document}
--- a/Section04-Phrasal-and-Syntactic-Models/section04.tex
+++ b/Section04-Phrasal-and-Syntactic-Models/section04.tex
@@ -868,8 +868,8 @@
 {\normalsize
 \textbf{基于层次短语的模型} \vspace{-0.2em}\\
 \small{1. 同步上下文无关文法} \vspace{-0.2em}\\
-\small{2. 层次短语规则及特征} \vspace{-0.2em}\\
-\small{3. 基于chart的解码和立方剪枝}
+\small{2. 层次短语规则及翻译特征} \vspace{-0.2em}\\
+\small{3. 基于chart的解码和剪枝}
 }
 \end{tcolorbox}

@@ -890,7 +890,10 @@
 \section{基于短语的模型}

 %%%------------------------------------------------------------------------------------------------------------
-\section{基于句法的模型}
+\section{基于层次短语的模型}
+
+%%%------------------------------------------------------------------------------------------------------------
+\section{基于语言学句法的模型}

 \end{CJK}
 \end{document}
--- a/Section06-Neural-Machine-Translation/section06.tex
+++ b/Section06-Neural-Machine-Translation/section06.tex
@@ -30,7 +30,7 @@
 \usetikzlibrary{matrix}
 \usetikzlibrary{patterns}
 \usetikzlibrary{arrows,decorations.pathreplacing}
-\usetikzlibrary{shadows} 
+\usetikzlibrary{shadows}
 \usetikzlibrary{shadows.blur}
 \usepgflibrary{arrows}
 \usetikzlibrary{arrows}
@@ -45,7 +45,7 @@
 \DeclareMathOperator*{\argmin}{arg\,min}

 \setbeamertemplate{items}[ball]
-\usefonttheme[onlymath]{serif} 
+\usefonttheme[onlymath]{serif}

 \definecolor{ugreen}{rgb}{0,0.5,0}
 \definecolor{lgreen}{rgb}{0.9,1,0.8}
@@ -2274,7 +2274,7 @@ $\textrm{``you''} = \argmax_{y} \textrm{P}(y|\textbf{s}_1, \alert{\textbf{C}})$ 
 \begin{frame}{推断 - Beam Search}
 \begin{itemize}
 \item \textbf{Greedy Search}: 目标语每一个位置，输出层的Softmax可以得到所有单词的概率，然后选择一个概率最大单词输出，下一个位置的预测就基于这一步输出的单词
-\item \textbf{Beach Search}: 为了避免贪婪方法造成的错误累加，可以每次对$b$个单词进行扩展，而不是只使用一个单词，其中$b$称做束的宽度 - 这样可以搜索更多可能的译文
+\item \textbf{Beam Search}: 为了避免贪婪方法造成的错误累加，可以每次对$b$个单词进行扩展，而不是只使用一个单词，其中$b$称做束的宽度 - 这样可以搜索更多可能的译文
 \end{itemize}

 \vspace{-0.3em}