update

6342f26c · xiaotong · 54fa983b · 6342f26c · 6342f26c
Commit 6342f26c authored Jan 07, 2020 by xiaotong
--- a/Section04-Phrasal-and-Syntactic-Models/section04-test.tex
+++ b/Section04-Phrasal-and-Syntactic-Models/section04-test.tex
@@ -152,60 +152,43 @@
 \subsection{规则匹配}
 %%%------------------------------------------------------------------------------------------------------------
-%%%  基于树的解码方法 - 超图
+%%%  基于树的解码方法 - chart
-\begin{frame}{基于树的解码 - 超图}
+\begin{frame}{基于树的解码 - chart}
 \begin{itemize}
-\item 如果源语言输入的是句法树，基于树的解码会找到一个推导覆盖整个句法树，之后输出所对应的目标语词串作为译文
+\item 规则匹配后形成的超图，每个节点可以由两部分信息决定：节点的句法标记 + 跨度
-\item 比如，可以从树的叶子结点开始，找到所有能匹配到这个节点的规则，当所有节点匹配完之后，本质上获得了一个超图
+    \begin{itemize}
-	\begin{itemize}
+    \item 这本质上和单语句法分析中的表示方法是一致的
-	\item<2-> 图的节点对应一个句法树句法节点
+    \item 存储形式有很多中，这里采用常用的chart结构，即，用一个二维表存储，其中每一个单元对应一个跨度(span)。同一个跨度的节点都可以放到同一个表单元中，统一表单元的节点用句法标记区分
-	\item<2-> 图的边(或者叫超边)对应规则，边的头指向规则左部(源语言端)所对应图节点，边可以有多个尾，每个尾对应规则右部(源语言端)中的一个变量
+    \end{itemize}
-	\end{itemize}
 \end{itemize}
-\visible<2->{
 \begin{center}
 \begin{tikzpicture}
-{\scriptsize
-\begin{scope}[sibling distance=5pt, level distance=20pt]
-\Tree[.\node(cn1){VP$^{[1]}$};
-        [.\node(cn2){AD$^{[2]}$}; 大幅度 ]
-        [.\node(cn3){VP$^{[3]}$};
-            [.\node(cn4){VV$^{[4]}$}; 下降 ]
-            [.\node(cn5){AS$^{[5]}$}; 了 ]
-        ]
-     ]
-\node [anchor=south] (treelabel) at (cn1.north) {\scriptsize{源语句法树}};
-\end{scope}
-\begin{scope}[xshift=1.3in,sibling distance=5pt, level distance=20pt]
+\begin{scope}
-\Tree[.\node(sn1){VP};  [.\node(sn2){AD}; \node(sw1){大幅度}; ] [.\node(sn3){VP};] ]
+\node [anchor=west,circle,inner sep=2pt,draw,fill=red!20] (node1) at (0,0) {\tiny{[1,1]}};
-\node [anchor=south] (rulelabel) at ([yshift=0.2em,xshift=4em]sn1.north) {\scriptsize{匹配的翻译规则}};
+\node [anchor=west,circle,inner sep=2pt,draw,fill=blue!20] (node2) at ([xshift=3.3em]node1.east) {\tiny{[3,3]}};
-\end{scope}
+\node [anchor=north,circle,inner sep=2pt,draw,fill=green!20] (node3) at ([xshift=2.5em,yshift=5em]node1.north) {\tiny{[1,3]}};
-\draw [->,thick] (sn3.east) -- ([xshift=1em]sn3.east);
+\draw [-latex] (node1.90) ..controls +(north:3em) and +(south:3em).. (node3.-90);
-\node [anchor=west] (rr) at ([xshift=1em]sn3.east) {drastically VP};
+\draw [-latex] (node2.90) ..controls +(north:3em) and +(south:3em).. (node3.-90);
-\node [anchor=west,circle,draw,inner sep=2pt] (tail) at ([yshift=-3em,xshift=3em]rr.south east) {\tiny{[3]}};
+\node [anchor=west] (rule1) at ([xshift=2em]node3.east) {\footnotesize{VP(AD$_1$ VP(VV(下降)) AS$_2$)}};
-\node [anchor=west,circle,draw,inner sep=2pt] (head) at ([yshift=2.5em,xshift=5em]rr.south east) {\tiny{[1]}};
+\node [anchor=north west] (rule2) at (rule1.south west) {\footnotesize{$\to$ AS$_2$ AD$_1$ fallen}};
-\draw [-latex,thick] ([yshift=0.1em]tail.60) -- ([yshift=-0.1em]head.260);
-\node [anchor=north] (headlabel) at ([xshift=0.5em]head.south) {\tiny{头}};
-\node [anchor=west] (taillabel) at ([yshift=1.2em,xshift=-0.3em]tail.east) {\tiny{尾}};
-\node [anchor=south] (graphlabel) at ([yshift=0.6em]head.north) {\scriptsize{超图}};
 \begin{pgfonlayer}{background}
-\node [fill=green!20,inner sep=2pt,drop shadow] (rulebox) [fit = (sn1) (sn2) (sn3) (sw1) (rr)] {};
+\node [anchor=north west,fill=green!20,inner sep=2pt,minimum height=1.5em,minimum width=1.3em] (nodebox1) at ([xshift=0.2em]rule1.north west) {};
-\draw [->,dotted,thick] ([yshift=2em]rulebox.east) ..controls +(east:3.5em) and +(west:1.5em).. ([yshift=1em,xshift=0.3em]tail.north);
+\node [anchor=north west,fill=red!20,inner sep=2pt,minimum height=1.5em,minimum width=1.8em] (nodebox2) at ([xshift=0.2em]nodebox1.north east) {};
+\node [anchor=north east,fill=blue!20,inner sep=2pt,minimum height=1.5em,minimum width=1.6em] (nodebox1) at ([xshift=-0.6em]rule1.north east) {};
-\node [fill=red!20,inner sep=0pt] (nodebox1) [fit = (cn1)] {};
-\node [fill=red!20,inner sep=0pt] (nodebox2) [fit = (sn1)] {};
-\draw [<->,red] (nodebox1) edge [out=15, in=160] (nodebox2);
 \end{pgfonlayer}
-}
+\end{scope}
+\begin{scope}
+\end{scope}
 \end{tikzpicture}
 \end{center}
-}
 \end{frame}

--- a/Section06-Neural-Machine-Translation/section06.tex
+++ b/Section06-Neural-Machine-Translation/section06.tex
@@ -2863,7 +2863,7 @@ $\textrm{``you''} = \argmax_{y} \textrm{P}(y|\textbf{s}_1, \alert{\textbf{C}})$ 
            \item<2-> 学习率预热：模型训练初期，梯度通常很大，直接使用很大的学习率很容易让模型跑偏，因此需要学习率有一个从小到大的过程
            \item<2-> 学习率衰减：模型训练接近收敛的时候，使用大学习率会很容易让模型错过局部极小，因此需要学习率逐渐变小来逼近局部最小
        \end{itemize}
        \visible<2->{
        \begin{center}
            \begin{tikzpicture}
@@ -3987,7 +3987,7 @@ $\textrm{``you''} = \argmax_{y} \textrm{P}(y|\textbf{s}_1, \alert{\textbf{C}})$ 
 \end{frame}
 %%%------------------------------------------------------------------------------------------------------------
-\subsection{Transformer模型}
+\subsection{模型架构}
 %%%------------------------------------------------------------------------------------------------------------
 \begin{frame}{Transformer 介绍}