Commit 6342f26c by xiaotong

update

parent 54fa983b
......@@ -152,60 +152,43 @@
\subsection{规则匹配}
%%%------------------------------------------------------------------------------------------------------------
%%% 基于树的解码方法 - 超图
\begin{frame}{基于树的解码 - 超图}
%%% 基于树的解码方法 - chart
\begin{frame}{基于树的解码 - chart}
\begin{itemize}
\item 如果源语言输入的是句法树,基于树的解码会找到一个推导覆盖整个句法树,之后输出所对应的目标语词串作为译文
\item 比如,可以从树的叶子结点开始,找到所有能匹配到这个节点的规则,当所有节点匹配完之后,本质上获得了一个超图
\begin{itemize}
\item<2-> 图的节点对应一个句法树句法节点
\item<2-> 图的边(或者叫超边)对应规则,边的头指向规则左部(源语言端)所对应图节点,边可以有多个尾,每个尾对应规则右部(源语言端)中的一个变量
\end{itemize}
\item 规则匹配后形成的超图,每个节点可以由两部分信息决定:节点的句法标记 + 跨度
\begin{itemize}
\item 这本质上和单语句法分析中的表示方法是一致的
\item 存储形式有很多中,这里采用常用的chart结构,即,用一个二维表存储,其中每一个单元对应一个跨度(span)。同一个跨度的节点都可以放到同一个表单元中,统一表单元的节点用句法标记区分
\end{itemize}
\end{itemize}
\visible<2->{
\begin{center}
\begin{tikzpicture}
{\scriptsize
\begin{scope}[sibling distance=5pt, level distance=20pt]
\Tree[.\node(cn1){VP$^{[1]}$};
[.\node(cn2){AD$^{[2]}$}; 大幅度 ]
[.\node(cn3){VP$^{[3]}$};
[.\node(cn4){VV$^{[4]}$}; 下降 ]
[.\node(cn5){AS$^{[5]}$}; 了 ]
]
]
\node [anchor=south] (treelabel) at (cn1.north) {\scriptsize{源语句法树}};
\end{scope}
\begin{scope}[xshift=1.3in,sibling distance=5pt, level distance=20pt]
\Tree[.\node(sn1){VP}; [.\node(sn2){AD}; \node(sw1){大幅度}; ] [.\node(sn3){VP};] ]
\node [anchor=south] (rulelabel) at ([yshift=0.2em,xshift=4em]sn1.north) {\scriptsize{匹配的翻译规则}};
\end{scope}
\begin{scope}
\node [anchor=west,circle,inner sep=2pt,draw,fill=red!20] (node1) at (0,0) {\tiny{[1,1]}};
\node [anchor=west,circle,inner sep=2pt,draw,fill=blue!20] (node2) at ([xshift=3.3em]node1.east) {\tiny{[3,3]}};
\node [anchor=north,circle,inner sep=2pt,draw,fill=green!20] (node3) at ([xshift=2.5em,yshift=5em]node1.north) {\tiny{[1,3]}};
\draw [->,thick] (sn3.east) -- ([xshift=1em]sn3.east);
\node [anchor=west] (rr) at ([xshift=1em]sn3.east) {drastically VP};
\draw [-latex] (node1.90) ..controls +(north:3em) and +(south:3em).. (node3.-90);
\draw [-latex] (node2.90) ..controls +(north:3em) and +(south:3em).. (node3.-90);
\node [anchor=west,circle,draw,inner sep=2pt] (tail) at ([yshift=-3em,xshift=3em]rr.south east) {\tiny{[3]}};
\node [anchor=west,circle,draw,inner sep=2pt] (head) at ([yshift=2.5em,xshift=5em]rr.south east) {\tiny{[1]}};
\draw [-latex,thick] ([yshift=0.1em]tail.60) -- ([yshift=-0.1em]head.260);
\node [anchor=north] (headlabel) at ([xshift=0.5em]head.south) {\tiny{}};
\node [anchor=west] (taillabel) at ([yshift=1.2em,xshift=-0.3em]tail.east) {\tiny{}};
\node [anchor=south] (graphlabel) at ([yshift=0.6em]head.north) {\scriptsize{超图}};
\node [anchor=west] (rule1) at ([xshift=2em]node3.east) {\footnotesize{VP(AD$_1$ VP(VV(下降)) AS$_2$)}};
\node [anchor=north west] (rule2) at (rule1.south west) {\footnotesize{$\to$ AS$_2$ AD$_1$ fallen}};
\begin{pgfonlayer}{background}
\node [fill=green!20,inner sep=2pt,drop shadow] (rulebox) [fit = (sn1) (sn2) (sn3) (sw1) (rr)] {};
\draw [->,dotted,thick] ([yshift=2em]rulebox.east) ..controls +(east:3.5em) and +(west:1.5em).. ([yshift=1em,xshift=0.3em]tail.north);
\node [fill=red!20,inner sep=0pt] (nodebox1) [fit = (cn1)] {};
\node [fill=red!20,inner sep=0pt] (nodebox2) [fit = (sn1)] {};
\draw [<->,red] (nodebox1) edge [out=15, in=160] (nodebox2);
\node [anchor=north west,fill=green!20,inner sep=2pt,minimum height=1.5em,minimum width=1.3em] (nodebox1) at ([xshift=0.2em]rule1.north west) {};
\node [anchor=north west,fill=red!20,inner sep=2pt,minimum height=1.5em,minimum width=1.8em] (nodebox2) at ([xshift=0.2em]nodebox1.north east) {};
\node [anchor=north east,fill=blue!20,inner sep=2pt,minimum height=1.5em,minimum width=1.6em] (nodebox1) at ([xshift=-0.6em]rule1.north east) {};
\end{pgfonlayer}
}
\end{scope}
\begin{scope}
\end{scope}
\end{tikzpicture}
\end{center}
}
\end{frame}
......
......@@ -2863,7 +2863,7 @@ $\textrm{``you''} = \argmax_{y} \textrm{P}(y|\textbf{s}_1, \alert{\textbf{C}})$
\item<2-> 学习率预热:模型训练初期,梯度通常很大,直接使用很大的学习率很容易让模型跑偏,因此需要学习率有一个从小到大的过程
\item<2-> 学习率衰减:模型训练接近收敛的时候,使用大学习率会很容易让模型错过局部极小,因此需要学习率逐渐变小来逼近局部最小
\end{itemize}
\visible<2->{
\begin{center}
\begin{tikzpicture}
......@@ -3987,7 +3987,7 @@ $\textrm{``you''} = \argmax_{y} \textrm{P}(y|\textbf{s}_1, \alert{\textbf{C}})$
\end{frame}
%%%------------------------------------------------------------------------------------------------------------
\subsection{Transformer模型}
\subsection{模型架构}
%%%------------------------------------------------------------------------------------------------------------
\begin{frame}{Transformer 介绍}
......
Markdown 格式
0%
您添加了 0 到此讨论。请谨慎行事。
请先完成此评论的编辑!
注册 或者 后发表评论