Commit 08a23d17 by xiaotong

new pages

parent 95f50640
......@@ -142,6 +142,13 @@
\begin{frame}{上下文向量$C_i$}
\begin{itemize}
\item 对于目标语位置$i$$C_i$是目标语$i$使用的上下文向量
\begin{itemize}
\item $h_j$表示编码器第$j$个位置的隐层状态
\item $s_i$表示解码器第$i$个位置的隐层状态
\item<2-> $\alpha_{i,j}$表示注意力权重,表示目标语第$i$个位置与源语第$j$个位置之间的相关性大小
\item<2-> $a(\cdot)$表示注意力函数,计算$s_{i-1}$$h_j$之间的相关性
\item<3-> $C_i$是所有源语编码表示a$\{h_j\}$的加权求和,权重为$\{\alpha_{i,j}\}$
\end{itemize}
\end{itemize}
\begin{center}
......@@ -149,14 +156,59 @@
\begin{scope}
\node [anchor=west,fill=red!20!white,inner sep=2pt,minimum width=2em] (h1) at (0,0) {\scriptsize{$h_1$}};
\node [anchor=west,fill=red!20!white,inner sep=2pt,minimum width=2em] (h2) at ([xshift=1em]h1.east) {\scriptsize{$h_2$}};
\node [anchor=west,inner sep=0pt,minimum width=2em] (h3) at ([xshift=0.7em]h2.east) {\scriptsize{...}};
\node [anchor=west,fill=red!20!white,inner sep=2pt,minimum width=2em] (h4) at ([xshift=0.7em]h3.east) {\scriptsize{$h_n$}};
\node [anchor=west,draw,fill=red!20!white,inner sep=3pt,minimum width=2em,minimum height=1.2em] (h1) at (0,0) {\scriptsize{$h_1$}};
\node [anchor=west,draw,fill=red!20!white,inner sep=3pt,minimum width=2em,minimum height=1.2em] (h2) at ([xshift=1em]h1.east) {\scriptsize{$h_2$}};
\node [anchor=west,inner sep=0pt,minimum width=3em] (h3) at ([xshift=0.5em]h2.east) {\scriptsize{...}};
\node [anchor=west,draw,fill=red!20!white,inner sep=3pt,minimum width=2em,minimum height=1.2em] (h4) at ([xshift=0.5em]h3.east) {\scriptsize{$h_n$}};
\node [anchor=south,circle,minimum size=1.0em,draw,ublue,thick] (sum) at ([yshift=2em]h2.north east) {};
\draw [thick,-,ublue] (sum.north) -- (sum.south);
\draw [thick,-,ublue] (sum.west) -- (sum.east);
\node [anchor=south,draw,fill=green!20!white,inner sep=3pt,minimum width=2em,minimum height=1.2em] (th1) at ([yshift=2em,xshift=-1em]sum.north west) {\scriptsize{$s_{i-1}$}};
\node [anchor=west,draw,fill=green!20!white,inner sep=3pt,minimum width=2em,minimum height=1.2em] (th2) at ([xshift=2em]th1.east) {\scriptsize{$s_{i}$}};
\draw [->] (h1.north) .. controls +(north:0.8) and +(west:1) .. (sum.190) node [pos=0.3,left] {\tiny{$\alpha_{i,1}$}};
\draw [->] (h2.north) .. controls +(north:0.6) and +(220:0.2) .. (sum.220) node [pos=0.2,right] {\tiny{$\alpha_{i,2}$}};
\draw [->] (h4.north) .. controls +(north:0.8) and +(east:1) .. (sum.-10) node [pos=0.1,left] (alphan) {\tiny{$\alpha_{i,n}$}};
\draw [->] ([xshift=-1.5em]th1.west) -- ([xshift=-0.1em]th1.west);
\draw [->] ([xshift=0.1em]th1.east) -- ([xshift=-0.1em]th2.west);
\draw [->] ([xshift=0.1em]th2.east) -- ([xshift=1.5em]th2.east);
\draw [->] (sum.north) .. controls +(north:0.8) and +(west:0.2) .. ([yshift=-0.4em,xshift=-0.1em]th2.west) node [pos=0.2,right] (ci) {\scriptsize{$C_{i}$}};
\node [anchor=south,inner sep=1pt] (output) at ([yshift=0.8em]th2.north) {\tiny{输出层}};
\draw [->] ([yshift=0.1em]th2.north) -- ([yshift=-0.1em]output.south);
\node [anchor=north] (enc1) at (h1.south) {\tiny{编码器输出}};
\node [anchor=north] (enc12) at ([yshift=0.5em]enc1.south) {\tiny{(位置$1$)}};
\node [anchor=north] (enc2) at (h2.south) {\tiny{编码器输出}};
\node [anchor=north] (enc22) at ([yshift=0.5em]enc2.south) {\tiny{(位置$2$)}};
\node [anchor=north] (enc4) at (h4.south) {\tiny{编码器输出}};
\node [anchor=north] (enc42) at ([yshift=0.5em]enc4.south) {\tiny{(位置$4$)}};
\visible<2->{
\node [anchor=west] (math1) at ([xshift=5em,yshift=1em]th2.east) {$C_i = \sum_{j} \alpha_{i,j} h_j \ \ $};
}
\visible<3->{
\node [anchor=north west] (math2) at ([yshift=-2em]math1.south west) {$\alpha_{i,j} = \frac{\exp(\beta_{i,j})}{\sum_j \exp(\beta_{i,j})}$};
\node [anchor=north west] (math3) at ([yshift=-0em]math2.south west) {$\beta_{i,j} = a(s_{i-1}, h_j)$};
}
\node [anchor=south,circle,minimum size=1.5em,draw,ublue,thick] (sum) at ([yshift=2em]h2.north east) {};
\begin{pgfonlayer}{background}
\visible<2->{
\node [rectangle,inner sep=0.4em,rounded corners=1pt,fill=blue!10,drop shadow] [fit = (math1)] (box1) {};
}
\visible<3->{
\node [rectangle,inner sep=0.4em,rounded corners=1pt,fill=orange!10,drop shadow] [fit = (math2) (math3)] (box2) {};
}
\end{pgfonlayer}
\node [anchor=south,fill=green!20!white,inner sep=3pt,minimum width=2em] (th1) at ([yshift=2em]sum.north) {\scriptsize{$s_{i_1}$}};
\visible<2->{
\draw [->,dotted,thick,blue] (box1.west) .. controls +(west:1.2) and +(east:2.0) .. ([xshift=-0.3em]ci.east);
}
\visible<3->{
\draw [->,dotted,thick,orange] ([yshift=1em]box2.west) .. controls +(west:1.2) and +(east:1.0) .. ([xshift=-0.35em]alphan.east);
}
\end{scope}
......
......@@ -849,6 +849,11 @@ NLP问题的隐含结构假设 & 无隐含结构假设,端到端学习 \\
\end{frame}
%%%------------------------------------------------------------------------------------------------------------
%%% NMT的数学描述
\begin{frame}{数学建模}
\end{frame}
%%%------------------------------------------------------------------------------------------------------------
%%% 词嵌入
\begin{frame}{模块1:词嵌入层}
\begin{itemize}
......@@ -1395,6 +1400,86 @@ NLP问题的隐含结构假设 & 无隐含结构假设,端到端学习 \\
\end{frame}
%%%------------------------------------------------------------------------------------------------------------
%%% C_i的定义
\begin{frame}{上下文向量$C_i$}
\begin{itemize}
\item 对于目标语位置$i$$C_i$是目标语$i$使用的上下文向量
\begin{itemize}
\item $h_j$表示编码器第$j$个位置的隐层状态
\item $s_i$表示解码器第$i$个位置的隐层状态
\item<2-> $\alpha_{i,j}$表示注意力权重,表示目标语第$i$个位置与源语第$j$个位置之间的相关性大小
\item<2-> $a(\cdot)$表示注意力函数,计算$s_{i-1}$$h_j$之间的相关性
\item<3-> $C_i$是所有源语编码表示$\{h_j\}$的加权求和,权重为$\{\alpha_{i,j}\}$
\end{itemize}
\end{itemize}
\begin{center}
\begin{tikzpicture}
\begin{scope}
\node [anchor=west,draw,fill=red!20!white,inner sep=3pt,minimum width=2em,minimum height=1.2em] (h1) at (0,0) {\scriptsize{$h_1$}};
\node [anchor=west,draw,fill=red!20!white,inner sep=3pt,minimum width=2em,minimum height=1.2em] (h2) at ([xshift=1em]h1.east) {\scriptsize{$h_2$}};
\node [anchor=west,inner sep=0pt,minimum width=3em] (h3) at ([xshift=0.5em]h2.east) {\scriptsize{...}};
\node [anchor=west,draw,fill=red!20!white,inner sep=3pt,minimum width=2em,minimum height=1.2em] (h4) at ([xshift=0.5em]h3.east) {\scriptsize{$h_n$}};
\node [anchor=south,circle,minimum size=1.0em,draw,ublue,thick] (sum) at ([yshift=2em]h2.north east) {};
\draw [thick,-,ublue] (sum.north) -- (sum.south);
\draw [thick,-,ublue] (sum.west) -- (sum.east);
\node [anchor=south,draw,fill=green!20!white,inner sep=3pt,minimum width=2em,minimum height=1.2em] (th1) at ([yshift=2em,xshift=-1em]sum.north west) {\scriptsize{$s_{i-1}$}};
\node [anchor=west,draw,fill=green!20!white,inner sep=3pt,minimum width=2em,minimum height=1.2em] (th2) at ([xshift=2em]th1.east) {\scriptsize{$s_{i}$}};
\draw [->] (h1.north) .. controls +(north:0.8) and +(west:1) .. (sum.190) node [pos=0.3,left] {\tiny{$\alpha_{i,1}$}};
\draw [->] (h2.north) .. controls +(north:0.6) and +(220:0.2) .. (sum.220) node [pos=0.2,right] {\tiny{$\alpha_{i,2}$}};
\draw [->] (h4.north) .. controls +(north:0.8) and +(east:1) .. (sum.-10) node [pos=0.1,left] (alphan) {\tiny{$\alpha_{i,n}$}};
\draw [->] ([xshift=-1.5em]th1.west) -- ([xshift=-0.1em]th1.west);
\draw [->] ([xshift=0.1em]th1.east) -- ([xshift=-0.1em]th2.west);
\draw [->] ([xshift=0.1em]th2.east) -- ([xshift=1.5em]th2.east);
\draw [->] (sum.north) .. controls +(north:0.8) and +(west:0.2) .. ([yshift=-0.4em,xshift=-0.1em]th2.west) node [pos=0.2,right] (ci) {\scriptsize{$C_{i}$}};
\node [anchor=south,inner sep=1pt] (output) at ([yshift=0.8em]th2.north) {\tiny{输出层}};
\draw [->] ([yshift=0.1em]th2.north) -- ([yshift=-0.1em]output.south);
\node [anchor=north] (enc1) at (h1.south) {\tiny{编码器输出}};
\node [anchor=north] (enc12) at ([yshift=0.5em]enc1.south) {\tiny{(位置$1$)}};
\node [anchor=north] (enc2) at (h2.south) {\tiny{编码器输出}};
\node [anchor=north] (enc22) at ([yshift=0.5em]enc2.south) {\tiny{(位置$2$)}};
\node [anchor=north] (enc4) at (h4.south) {\tiny{编码器输出}};
\node [anchor=north] (enc42) at ([yshift=0.5em]enc4.south) {\tiny{(位置$4$)}};
\visible<2->{
\node [anchor=west] (math1) at ([xshift=5em,yshift=1em]th2.east) {$C_i = \sum_{j} \alpha_{i,j} h_j \ \ $};
}
\visible<3->{
\node [anchor=north west] (math2) at ([yshift=-2em]math1.south west) {$\alpha_{i,j} = \frac{\exp(\beta_{i,j})}{\sum_j \exp(\beta_{i,j})}$};
\node [anchor=north west] (math3) at ([yshift=-0em]math2.south west) {$\beta_{i,j} = a(s_{i-1}, h_j)$};
}
\begin{pgfonlayer}{background}
\visible<2->{
\node [rectangle,inner sep=0.4em,rounded corners=1pt,fill=blue!10,drop shadow] [fit = (math1)] (box1) {};
}
\visible<3->{
\node [rectangle,inner sep=0.4em,rounded corners=1pt,fill=orange!10,drop shadow] [fit = (math2) (math3)] (box2) {};
}
\end{pgfonlayer}
\visible<2->{
\draw [->,dotted,thick,blue] (box1.west) .. controls +(west:1.2) and +(east:2.0) .. ([xshift=-0.3em]ci.east);
}
\visible<3->{
\draw [->,dotted,thick,orange] ([yshift=1em]box2.west) .. controls +(west:1.2) and +(east:1.0) .. ([xshift=-0.35em]alphan.east);
}
\end{scope}
\end{tikzpicture}
\end{center}
\end{frame}
%%%------------------------------------------------------------------------------------------------------------
\section{Transformer}
%%%------------------------------------------------------------------------------------------------------------
......
Markdown 格式
0%
您添加了 0 到此讨论。请谨慎行事。
请先完成此评论的编辑!
注册 或者 后发表评论