new update

18b714cf · xiaotong · 6d1b84b9 · 18b714cf · 18b714cf
Commit 18b714cf authored Oct 28, 2019 by xiaotong
--- a/Section05-Neural-Networks-and-Language-Modeling/section05-test.tex
+++ b/Section05-Neural-Networks-and-Language-Modeling/section05-test.tex
@@ -120,52 +120,36 @@
 \subsection{词嵌入}

 %%%------------------------------------------------------------------------------------------------------------
-%%% 更强大的表示模型 - ELMO
-\begin{frame}{更强的表示模型 - ELMO}
+%%% Transformer architecture
+\begin{frame}{语言模型的评价指标}
 \begin{itemize}
-\item \textbf{ELMO}(Embedding from Language Models)可以说是掀起了基于语言模型的预训练的热潮
-    \begin{itemize}
-    \item 仍然使用RNN结构，不过循环单元换成了LSTM
-    \item 同时考虑自左向右和自右向左的建模方式，同时表示一个词左端和右端的上下文
-    \item 融合所有层的输出，送给下游应用，提供了更丰富的信息
-    \end{itemize}
-\end{itemize}
-\end{frame}
-
-%%%------------------------------------------------------------------------------------------------------------
-%%% 更强大的表示模型 - GTP
-\begin{frame}{更强的表示模型 - GPT}
+\item 困惑度(ppl)
 \begin{itemize}
-\item \textbf{GPT}(Generative Pre-Training)也是一种基于语言模型的表示模型
-    \begin{itemize}
-    \item 架构换成了Transformer，特征抽取能力更强
-    \item 基于Pre-training + Fine-tuning的框架，预训练作为下游系统部件的参数初始值，因此可以更好的适应目标任务
-    \end{itemize}
+\item 语言模型预测一个语言样本的能力
+\item 困惑度越低，建模的效果越好
 \end{itemize}
-\end{frame}
-
-%%%------------------------------------------------------------------------------------------------------------
-%%% 更强大的表示模型 - BERT
-\begin{frame}{更强的表示模型 - BERT}
-\begin{itemize}
-\item \textbf{BERT}( Bidirectional Encoder Representations from Transformers)是最近非常火爆的表示模型
-    \begin{itemize}
-    \item 仍然基于Transformer但是考虑了左右两端的上下文(可以对比GPT)
-    \item 使用了Mask方法来增加训练得到模型的健壮性，这个方法几乎成为了预训练表示模型的新范式
-    \end{itemize}
 \end{itemize}
-\end{frame}

-%%%------------------------------------------------------------------------------------------------------------
-%%% 更强大的表示模型 - BERT
-\begin{frame}{更强的表示模型 - BERT}
-\begin{itemize}
-\item \textbf{BERT}( Bidirectional Encoder Representations from Transformers)是最近非常火爆的表示模型
-    \begin{itemize}
-    \item 仍然基于Transformer但是考虑了左右两端的上下文(可以对比GPT)
-    \item 使用了Mask方法来增加训练得到模型的健壮性，这个方法几乎成为了预训练表示模型的新范式
-    \end{itemize}
-\end{itemize}
+\begin{center}
+\begin{tikzpicture}
+\begin{scope}
+\node [anchor=west] (eq) at (0,0) {$perplexity(s)=p(w_1,w_2,w_3,...,w_m)^{-1/m}$};
+
+\end{scope}
+\end{tikzpicture}
+\end{center}
+\vspace{0.5em}
+\begin{tabular}{l | l | l | r}
+模型 & 作者 & 年份 & PPL \\ \hline
+Feed-forward Neural LM & Bengio et al. & 2003 & 162.2 \\
+Recurrent NN-based LM & Mikolov et al. & 2010 & 124.7 \\
+Recurrent NN-LDA & Mikolov et al. & 2012 & 92.0 \\
+LSTM & Zaremba et al. & 2014 & 78.4 \\
+RHN & Zilly et al. & 2016 & 65.4 \\
+AWD-LSTM & Merity et al. & 2018 & 58.8 \\
+GPT-2 (Transformer) & Radford et al. & 2019 & \alert{35.7}
+\end{tabular}
+
 \end{frame}

 %%%------------------------------------------------------------------------------------------------------------

--- a/Section05-Neural-Networks-and-Language-Modeling/section05.tex
+++ b/Section05-Neural-Networks-and-Language-Modeling/section05.tex
@@ -4368,6 +4368,273 @@ $\textbf{V}, \textbf{U}, \textbf{W}$: 参数
 \end{frame}

 %%%------------------------------------------------------------------------------------------------------------
+%%% 自注意力机制
+\begin{frame}{自注意力机制(Self-Attention)}
+
+\begin{itemize}
+\item RNN LM效果很好，但是当序列过长,词汇之间信息传递路径过长，容易出现梯度消失、梯度爆炸的问题。
+\vspace{0.5em}
+\begin{center}
+\begin{tikzpicture}
+\begin{scope}
+\node [anchor=west] (w0) at (0,0) {$w_1$};
+\node [anchor=west] (w1) at ([xshift=0.5em]w0.east) {$w_2$};
+\node [anchor=west] (w2) at ([xshift=0.5em]w1.east) {$w_3$};
+\node [anchor=west] (w3) at ([xshift=0.5em]w2.east) {$...$};
+\node [anchor=west] (w4) at ([xshift=0.5em]w3.east) {$w_{m-1}$};
+\node [anchor=west,fill=green!20!white] (w5) at ([xshift=0.5em]w4.east) {$w_{m}$};
+\draw [->,thick,red] (w1.north).. controls +(130:0.5) and +(50:0.5) .. (w0.north);
+\draw [->,thick,red] (w2.north).. controls +(130:0.5) and +(50:0.5) .. (w1.north);
+\draw [->,thick,red] ([yshift=0.2em]w3.north).. controls +(130:0.5) and +(50:0.5) .. (w2.north);
+\draw [->,thick,red] (w4.north).. controls +(130:0.5) and +(50:0.5) .. ([yshift=0.2em]w3.north);
+\draw [->,thick,red] (w5.north).. controls +(130:0.5) and +(50:0.5) .. (w4.north);
+\draw [->,very thick,red] ([xshift=-5em]w0.west) -- ([xshift=-6.5em]w0.west) node [pos=0,right] {\scriptsize{信息传递}};
+
+
+\end{scope}
+
+\end{tikzpicture}
+\end{center}
+\item<2-> 能否将不同位置之间的词汇间信息传递的距离拉近为1？
+
+
+\begin{center}
+\begin{tikzpicture}
+\begin{scope}
+\node [anchor=west] (w0) at (0,-2) {$w_1$};
+\node [anchor=west] (w1) at ([xshift=0.5em]w0.east) {$w_2$};
+\node [anchor=west] (w2) at ([xshift=0.5em]w1.east) {$w_3$};
+\node [anchor=west] (w3) at ([xshift=0.5em]w2.east) {$...$};
+\node [anchor=west] (w4) at ([xshift=0.5em]w3.east) {$w_{m-1}$};
+\node [anchor=west,fill=green!20!white] (w5) at ([xshift=0.5em]w4.east) {$w_{m}$};
+\draw [->,thick,red] (w5.north).. controls +(100:0.8) and +(50:0.8) .. (w0.north);
+\draw [->,thick,red] (w5.north).. controls +(110:0.7) and +(50:0.7) .. (w1.north);
+\draw [->,thick,red] (w5.north).. controls +(120:0.6) and +(50:0.6) .. ([yshift=0.2em]w3.north);
+\draw [->,thick,red] (w5.north).. controls +(130:0.5) and +(50:0.5) .. (w4.north);
+\draw [->,very thick,red] ([xshift=-5em]w0.west) -- ([xshift=-6.5em]w0.west) node [pos=0,right] {\scriptsize{信息传递}};
+
+
+\end{scope}
+
+\end{tikzpicture}
+\end{center}
+\item<3-> \textbf{自注意力机制(Self-Attention)}可以很好的解决长距离依赖问题，在长距离语言建模任务取得了很好的效果
+	\begin{itemize}
+	\item 更充分的表示序列不同位置之间的复杂关系
+	\item 并行训练，提高效率
+	\end{itemize}
+	
+        \textbf{Attention Is All You Need}\\
+        \textbf{Vaswani et al., 2017, In Proc. of Neural Information Processing Systems, 6000-6010}
+\end{itemize}
+
+\end{frame}
+
+%%%------------------------------------------------------------------------------------------------------------
+%%% Transformer architecture
+\begin{frame}{Transformer语言模型(Vaswani et al., 2017)}
+\begin{itemize}
+\item 一个简单的例子
+\end{itemize}
+
+\vspace{-2em}
+\begin{center}
+\begin{tikzpicture}
+\begin{scope}
+\node [anchor=west] (w0) at (0,0) {\footnotesize{$w_{0}$}};
+\node [anchor=west] (w1) at ([xshift=4em]w0.east) {\footnotesize{$w_{1}$}};
+\node [anchor=west] (w2) at ([xshift=4em]w1.east) {\footnotesize{$w_{2}$}};
+\node [anchor=west] (w3) at ([xshift=4em]w2.east) {\footnotesize{$w_{3}$}};
+\node [anchor=north] (index0) at ([yshift=0.5em]w0.south) {\tiny(index)};
+\node [anchor=north] (index1) at ([yshift=0.5em]w1.south) {\tiny(index)};
+\node [anchor=north] (index2) at ([yshift=0.5em]w2.south) {\tiny(index)};
+\node [anchor=north] (index3) at ([yshift=0.5em]w3.south) {\tiny(index)};
+\node [anchor=south,draw,inner sep=3pt] (e0) at ([yshift=1em]w0.north) {\tiny{$e_0=w_{0} \textbf{C}$}};
+\node [anchor=south,draw,inner sep=3pt] (e1) at ([yshift=1em]w1.north) {\tiny{$e_1=w_{1} \textbf{C}$}};
+\node [anchor=south,draw,inner sep=3pt] (e2) at ([yshift=1em]w2.north) {\tiny{$e_2=w_{2} \textbf{C}$}};
+\node [anchor=south,draw,inner sep=3pt] (e3) at ([yshift=1em]w3.north) {\tiny{$e_3=w_{3} \textbf{C}$}};
+
+\node [anchor=south,draw,inner sep=3pt] (h0) at ([xshift=-0.5em, yshift=1.5em]e0.north) {\tiny{$h_{0}=\textrm{SelfAtt}(e_0,e_3)$}};
+\node [anchor=south,draw,inner sep=3pt] (h1) at ([xshift=0.5em, yshift=1.5em]e1.north) {\tiny{$h_{1}=\textrm{SelfAtt}(e_1,e_3)$}};
+\node [anchor=south,draw,inner sep=3pt] (h2) at ([xshift=1.5em, yshift=1.5em]e2.north) {\tiny{$h_{2}=\textrm{SelfAtt}(e_2,e_3)$}};
+\node [anchor=south,draw,minimum width=9em,inner sep=3pt] (f1) at ([xshift=0.5em, yshift=1.5em]h2.north) {\tiny{$f_3=\textrm{FFN}([h_0,h_1,h_2,e_3])$}};
+\node [anchor=south,draw,minimum width=9em,inner sep=3pt] (o1) at ([yshift=1em]f1.north) {\tiny{$y=\textrm{Softmax}(f_3 \textbf{U})$}};
+\node [anchor=south] (ylabel) at ([yshift=1em]o1.north) {\footnotesize{$\textrm{P}(w_4|w_{0}w_{1}w_{2}w_{3})$}};
+
+\draw [->] ([yshift=0.1em]w0.north) -- ([yshift=-0.1em]e0.south);
+\draw [->] ([yshift=0.1em]w1.north) -- ([yshift=-0.1em]e1.south);
+\draw [->] ([yshift=0.1em]w2.north) -- ([yshift=-0.1em]e2.south);
+\draw [->] ([yshift=0.1em]w3.north) -- ([yshift=-0.1em]e3.south);
+\draw [->] ([yshift=0.1em]e0.north) -- ([xshift=0em,yshift=-0.1em]h0.south);
+\draw [->] ([yshift=0.1em]e1.north) -- ([xshift=-0.5em,yshift=-0.1em]h1.south);
+\draw [->] ([yshift=0.1em]e2.north) -- ([xshift=-1em,yshift=-0.1em]h2.south);
+\draw [->] ([yshift=0.1em]e3.north) -- ([xshift=1em,yshift=-0.1em]h0.south);
+\draw [->] ([yshift=0.1em]e3.north) -- ([xshift=1em,yshift=-0.1em]h1.south);
+\draw [->] ([yshift=0.1em]e3.north) -- ([xshift=1em,yshift=-0.1em]h2.south);
+
+\draw [->] ([yshift=0.1em]h0.north) -- ([xshift=-2em,yshift=-0.1em]f1.south);
+\draw [->] ([yshift=0.1em]e3.north) -- ([xshift=2em,yshift=-0.1em]f1.south);
+\draw [->] ([yshift=0.1em]h1.north) -- ([xshift=-1em,yshift=-0.1em]f1.south);
+\draw [->] ([yshift=0.1em]h2.north) -- ([xshift=0em,yshift=-0.1em]f1.south);
+\draw [->] ([yshift=0.1em]f1.north) -- ([yshift=-0.1em]o1.south);
+\draw [->] ([yshift=0.1em]o1.north) -- ([yshift=-0.1em]ylabel.south);
+
+\visible<2->{
+\node [anchor=south,draw,inner sep=3pt,fill=blue!20!white] (e0) at ([yshift=1em]w0.north) {\tiny{$e_0=w_{0} \textbf{C}$}};
+\node [anchor=south,draw,inner sep=3pt,fill=blue!20!white] (e1) at ([yshift=1em]w1.north) {\tiny{$e_1=w_{1} \textbf{C}$}};
+\node [anchor=south,draw,inner sep=3pt,fill=blue!20!white] (e2) at ([yshift=1em]w2.north) {\tiny{$e_2=w_{2} \textbf{C}$}};
+\node [anchor=south,draw,inner sep=3pt,fill=blue!20!white] (e3) at ([yshift=1em]w3.north) {\tiny{$e_3=w_{3} \textbf{C}$}};
+}
+
+\visible<2->{
+\node [anchor=west] (embedinglabel0) at ([xshift=-5em,yshift=-2em]w0.south) {\scriptsize{{\blue \textbf{词的分布式表示}}}};
+\node [anchor=north west] (embedinglabel1) at ([yshift=0.3em]embedinglabel0.south west) {\scriptsize{前面已经介绍过！}};
+\node [anchor=north west] (embedinglabel2) at ([yshift=0.3em]embedinglabel1.south west) {\scriptsize{基于One-hot表示获得}};
+\node [anchor=north west] (embedinglabel3) at ([yshift=0.3em]embedinglabel2.south west) {\scriptsize{新加入位置向量}};
+}
+
+\visible<3->{
+\node [anchor=south,draw,inner sep=3pt,fill=ugreen!20!white] (h0) at ([xshift=-0.5em, yshift=1.5em]e0.north) {\tiny{$h_{0}=\textrm{SelfAtt}(e_0,e_3)$}};
+\node [anchor=south,draw,inner sep=3pt,fill=ugreen!20!white] (h1) at ([xshift=0.5em, yshift=1.5em]e1.north) {\tiny{$h_{1}=\textrm{SelfAtt}(e_1,e_3)$}};
+\node [anchor=south,draw,inner sep=3pt,fill=ugreen!20!white] (h2) at ([xshift=1.5em, yshift=1.5em]e2.north) {\tiny{$h_{2}=\textrm{SelfAtt}(e_2,e_3)$}};
+}
+
+\visible<3->{
+\node [anchor=west] (selfattlabel0) at ([xshift=3em]embedinglabel0.east) {\scriptsize{{\color{ugreen} \textbf{自注意力机制}}}};
+\node [anchor=west] (selfattlabel1) at ([yshift=-0.3em]selfattlabel0.south west) {\scriptsize{计算词汇之间的相关度}};
+\node [anchor=west] (selfattlabel2) at ([yshift=-0.3em]selfattlabel1.south west) {\scriptsize{多头自注意力机制}};
+\node [anchor=west] (directlabel0) at ([yshift=-0.3em]selfattlabel2.south west) {\scriptsize{\alert{\textbf{后面将会介绍}}}};
+}
+
+\visible<4->{
+\node [anchor=south,draw,minimum width=9em,inner sep=3pt,fill=orange!20!white] (f1) at ([xshift=0.5em, yshift=1.5em]h2.north) {\tiny{$f_3=\textrm{FFN}([h_0,h_1,h_2,e_3])$}};
+\node [anchor=south,draw,minimum width=9em,inner sep=3pt,fill=orange!20!white] (o1) at ([yshift=1em]f1.north) {\tiny{$y=\textrm{Softmax}(f_3 \textbf{U})$}};
+}
+
+\visible<4->{
+\node [anchor=west] (ffnlabel0) at ([xshift=3em]selfattlabel0.east) {\scriptsize{{\color{orange} \textbf{前馈神经网络和输出层}}}};
+\node [anchor=west] (ffnlabel1) at ([yshift=-0.3em]ffnlabel0.south west) {\scriptsize{双层全连接网络}};
+\node [anchor=west] (ffnlabel2) at ([yshift=-0.3em]ffnlabel1.south west) {\scriptsize{激活函数为Relu}};
+\node [anchor=west] (ffnlabel3) at ([yshift=-0.3em]ffnlabel2.south west) {\scriptsize{最后通过Softmax输出}};
+}
+
+
+\end{scope}
+\end{tikzpicture}
+\end{center}
+
+\end{frame}
+
+%%%------------------------------------------------------------------------------------------------------------
+%%% Transformer architecture
+\begin{frame}{Transformer语言模型(Vaswani et al., 2017)}
+\begin{itemize}
+\item 多头注意力机制
+\end{itemize}
+
+\vspace{-1.5em}
+\begin{center}
+\begin{tikzpicture}
+\begin{scope}
+
+\node [anchor=west,draw=black!30,inner sep=4pt,fill=ugreen!20!white] (Linear0) at (0,0) {\tiny{Linear}};
+\node [anchor=south west,draw=black!50,fill=ugreen!20!white,draw,inner sep=4pt] (Linear01) at ([shift={(-0.2em,-0.2em)}]Linear0.south west) {\tiny{Linear}};
+\node [anchor=south west,fill=ugreen!20!white,draw,inner sep=4pt] (Linear02) at ([shift={(-0.2em,-0.2em)}]Linear01.south west) {\tiny{Linear}};
+\node [anchor=north] (Q) at ([xshift=0em,yshift=-1em]Linear02.south) {\footnotesize{$Q$}};
+
+\node [anchor=west,draw=black!30,inner sep=4pt,fill=ugreen!20!white] (Linear1) at ([xshift=1.5em]Linear0.east) {\tiny{Linear}};
+\node [anchor=south west,draw=black!50,fill=ugreen!20!white,draw,inner sep=4pt] (Linear11) at ([shift={(-0.2em,-0.2em)}]Linear1.south west) {\tiny{Linear}};
+\node [anchor=south west,fill=ugreen!20!white,draw,inner sep=4pt] (Linear12) at ([shift={(-0.2em,-0.2em)}]Linear11.south west) {\tiny{Linear}};
+\node [anchor=north] (K) at ([xshift=0em,yshift=-1em]Linear12.south) {\footnotesize{$K$}};
+
+\node [anchor=west,draw=black!30,inner sep=4pt,fill=ugreen!20!white] (Linear2) at ([xshift=1.5em]Linear1.east) {\tiny{Linear}};
+\node [anchor=south west,draw=black!50,fill=ugreen!20!white,draw,inner sep=4pt] (Linear21) at ([shift={(-0.2em,-0.2em)}]Linear2.south west) {\tiny{Linear}};
+\node [anchor=south west,fill=ugreen!20!white,draw,inner sep=4pt] (Linear22) at ([shift={(-0.2em,-0.2em)}]Linear21.south west) {\tiny{Linear}};
+\node [anchor=north] (V) at ([xshift=0em,yshift=-1em]Linear22.south) {\footnotesize{$V$}};
+
+\node [anchor=south,draw=black!30,minimum width=9em,inner sep=4pt,fill=blue!20!white] (Scale) at ([yshift=1em]Linear1.north) {\tiny{Scaled Dot-Product Attention}};
+\node [anchor=south west,draw=black!50,minimum width=9em,fill=blue!20!white,draw,inner sep=4pt] (Scale1) at ([shift={(-0.2em,-0.2em)}]Scale.south west) {\tiny{Scaled Dot-Product Attention}};
+\node [anchor=south west,fill=blue!20!white,draw,minimum width=9em,inner sep=4pt] (Scale2) at ([shift={(-0.2em,-0.2em)}]Scale1.south west) {\tiny{Scaled Dot-Product Attention}};
+
+\node [anchor=south,draw,minimum width=4em,inner sep=4pt,fill=yellow!30] (Concat) at ([yshift=1em]Scale2.north) {\tiny{Concat}};
+
+\node [anchor=south,draw,minimum width=4em,inner sep=4pt,fill=ugreen!20!white] (Linear) at ([yshift=1em]Concat.north) {\tiny{Linear}};
+
+
+\draw [->] ([yshift=0.1em]Q.north) -- ([yshift=-0.1em]Linear02.south);
+\draw [-,draw=black!50] ([yshift=0.1em]Q.north) -- ([xshift=0.2em,yshift=-0.1em]Linear02.south);
+\draw [-,draw=black!30] ([yshift=0.1em]Q.north) -- ([xshift=0.4em,yshift=-0.1em]Linear02.south);
+
+\draw [->] ([yshift=0.1em]K.north) -- ([yshift=-0.1em]Linear12.south);
+\draw [-,draw=black!50] ([yshift=0.1em]K.north) -- ([xshift=0.2em,yshift=-0.1em]Linear12.south);
+\draw [-,draw=black!30] ([yshift=0.1em]K.north) -- ([xshift=0.4em,yshift=-0.1em]Linear12.south);
+
+\draw [->] ([yshift=0.1em]V.north) -- ([yshift=-0.1em]Linear22.south);
+\draw [-,draw=black!50] ([yshift=0.1em]V.north) -- ([xshift=0.2em,yshift=-0.1em]Linear22.south);
+\draw [-,draw=black!30] ([yshift=0.1em]V.north) -- ([xshift=0.4em,yshift=-0.1em]Linear22.south);
+
+\draw [->] ([yshift=0em]Linear02.north) -- ([yshift=1em]Linear02.north);
+\draw [-,draw=black!50] ([yshift=0em]Linear01.north) -- ([yshift=0.8em]Linear01.north);
+\draw [-,draw=black!30] ([yshift=0em]Linear0.north) -- ([yshift=0.6em]Linear0.north);
+
+\draw [->] ([yshift=0em]Linear12.north) -- ([yshift=1em]Linear12.north);
+\draw [-,draw=black!50] ([yshift=0em]Linear11.north) -- ([yshift=0.8em]Linear11.north);
+\draw [-,draw=black!30] ([yshift=0em]Linear1.north) -- ([yshift=0.6em]Linear1.north);
+
+\draw [->] ([yshift=0em]Linear22.north) -- ([yshift=1em]Linear22.north);
+\draw [-,draw=black!50] ([yshift=0em]Linear21.north) -- ([yshift=0.8em]Linear21.north);
+\draw [-,draw=black!30] ([yshift=0em]Linear2.north) -- ([yshift=0.6em]Linear2.north);
+
+\draw [->] ([yshift=0em]Scale2.north) -- ([yshift=0em]Concat.south);
+\draw [-,draw=black!50] ([yshift=0em]Scale1.north) -- ([yshift=0.8em]Scale1.north);
+\draw [-,draw=black!30] ([yshift=0em]Scale.north) -- ([yshift=0.6em]Scale.north);
+
+\draw [->] ([yshift=0em]Concat.north) -- ([yshift=0em]Linear.south);
+\draw [->] ([yshift=0em]Linear.north) -- ([yshift=1em]Linear.north);
+
+\node [anchor=west] (Multiheadlabel0) at ([xshift=-5em,yshift=-1.2em]Q.south) {\scriptsize{{\blue \textbf{多头注意力}}}};
+\node [anchor=north west] (Multiheadlabel1) at ([yshift=0em]Multiheadlabel0.south west) {\scriptsize{$MultiHead(Q,K,V)=Concat(head_1,...head_n)W^0$}};
+\node [anchor=north west] (Multiheadlabel2) at ([yshift=0.2em]Multiheadlabel1.south west) {\scriptsize{把输入压缩成多个维度较小的输出，分别做自注意力}};
+\node [anchor=north west] (Multiheadlabel3) at ([yshift=0.2em]Multiheadlabel2.south west) {\scriptsize{再把结果级联，经过线性变换得到最终输出}};
+
+
+\visible<2->{
+\node [anchor=south west,fill=white,draw,inner sep=4pt,minimum width=3.5em,fill=blue!20!white] (MatMul) at ([xshift=8em]Linear22.south west) {\tiny{MatMul}};
+\node [anchor=north] (Q1) at ([xshift=-1em,yshift=-1em]MatMul.south) {\footnotesize{$Q$}};
+\node [anchor=north] (K1) at ([xshift=1em,yshift=-1em]MatMul.south) {\footnotesize{$K$}};
+\node [anchor=south,draw,inner sep=4pt,fill=yellow!30] (Scale3) at ([yshift=1em]MatMul.north) {\tiny{Scale}};
+\node [anchor=south,draw,inner sep=4pt,fill=purple!20,minimum width=3.5em] (Mask) at ([yshift=0.8em]Scale3.north) {\tiny{Mask(opt.)}};
+\node [anchor=south,draw,inner sep=4pt,fill=ugreen!20!white] (SoftMax) at ([yshift=1em]Mask.north) {\tiny{SoftMax}};
+\node [anchor=south,draw,minimum width=3.5em,inner sep=4pt,fill=blue!20!white] (MatMul1) at ([xshift=1.5em,yshift=1em]SoftMax.north) {\tiny{MatMul}};
+\node [anchor=north] (V1) at ([xshift=2em]K1.north) {\footnotesize{$V$}};
+\node [anchor=north] (null) at ([yshift=0.8em]MatMul1.north) {};
+
+\node [rectangle,draw, densely dashed,inner sep=0.4em] [fit = (MatMul) (MatMul1) (Q1) (K1) (V1) (null)] (inputshadow) {};
+
+\draw [->] ([yshift=0.1em]Q1.north) -- ([xshift=-1em,yshift=-0.1em]MatMul.south);
+\draw [->] ([yshift=0.1em]K1.north) -- ([xshift=1em,yshift=-0.1em]MatMul.south);
+\draw [->] ([yshift=0.1em]MatMul.north) -- ([yshift=-0.1em]Scale3.south);
+\draw [->] ([yshift=0.1em]Scale3.north) -- ([yshift=-0.1em]Mask.south);
+\draw [->] ([yshift=0.1em]Mask.north) -- ([yshift=-0.1em]SoftMax.south);
+\draw [->] ([yshift=0.1em]SoftMax.north) -- ([yshift=0.9em]SoftMax.north);
+\draw [->] ([yshift=0.1em]V1.north) -- ([yshift=9.1em]V1.north);
+\draw [->] ([yshift=0.1em]MatMul1.north) -- ([yshift=0.8em]MatMul1.north);
+
+\draw [->,dashed,red,thick] ([xshift=0.1em]Scale.east) .. controls +(east:1) and +(west:1) .. ([xshift=-0.1em,yshift=1em]inputshadow.west);
+
+\node [anchor=west] (Attentionlabel0) at ([xshift=-2em,yshift=-1.2em]Q1.south) {\scriptsize{{\color{ugreen} \textbf{基于点乘的自注意力}}}};
+\node [anchor=north west] (Attentionlabel1) at ([yshift=0.3em]Attentionlabel0.south west) {\scriptsize{$head_i=softmax(\frac{QK^{T}}{\sqrt{d_k}})V$}};
+\node [anchor=north west] (Attentionlabel2) at ([yshift=0.6em]Attentionlabel1.south west) {\scriptsize{计算得到位置向量的加权和}};
+\node [anchor=north west] (Attentionlabel3) at ([yshift=0.2em]Attentionlabel2.south west) {\scriptsize{Q,K,V都是相同的}};
+}
+
+\end{scope}
+\end{tikzpicture}
+\end{center}
+
+\end{frame}
+
+%%%------------------------------------------------------------------------------------------------------------
 \subsection{词嵌入}

 %%%------------------------------------------------------------------------------------------------------------
@@ -4657,5 +4924,266 @@ $\textbf{V}, \textbf{U}, \textbf{W}$: 参数

 \end{frame}

+%%%------------------------------------------------------------------------------------------------------------
+%%% 更强大的表示模型 - ELMO
+\begin{frame}{更强的表示模型 - ELMO}
+\begin{itemize}
+\item \textbf{ELMO}(Embedding from Language Models)可以说是掀起了基于语言模型的预训练的热潮
+    \begin{itemize}
+    \item 仍然使用RNN结构，不过循环单元换成了LSTM
+    \item 同时考虑自左向右和自右向左的建模方式，同时表示一个词左端和右端的上下文
+    \item 融合所有层的输出，送给下游应用，提供了更丰富的信息
+    \end{itemize}
+\end{itemize}
+\vspace{0.5em}
+\begin{center}
+\begin{tikzpicture}
+\begin{scope}[scale=1.2]
+
+\node [anchor=west,draw,inner sep=4pt,fill=blue!20!white] (Lstm0) at (0,0) {\scriptsize{Lstm}};
+\node [anchor=west,draw,inner sep=4pt,fill=blue!20!white] (Lstm1) at ([xshift=1em]Lstm0.east) {\scriptsize{Lstm}};
+\node [anchor=west,inner sep=4pt] (sep) at ([xshift=1em]Lstm1.east) {\scriptsize{...}};
+\node [anchor=west,draw,inner sep=4pt,fill=blue!20!white] (Lstm2) at ([xshift=1em]sep.east) {\scriptsize{Lstm}};
+
+\node [anchor=south,draw,inner sep=4pt,fill=blue!20!white] (Lstm3) at ([yshift=1em]Lstm0.north) {\scriptsize{Lstm}};
+\node [anchor=west,draw,inner sep=4pt,fill=blue!20!white] (Lstm4) at ([xshift=1em]Lstm3.east) {\scriptsize{Lstm}};
+\node [anchor=west,inner sep=4pt] (sep1) at ([xshift=1em]Lstm4.east) {\scriptsize{...}};
+\node [anchor=west,draw,inner sep=4pt,fill=blue!20!white] (Lstm5) at ([xshift=1em]sep1.east) {\scriptsize{Lstm}};
+
+\node [rectangle,rounded corners,draw=black!50,densely dashed,inner sep=0.4em] [fit = (Lstm0) (Lstm2) (Lstm3) (Lstm5)] (inputshadow) {};
+
+\node [anchor=north,draw,inner sep=4pt,fill=ugreen!20!white,minimum width=2em] (e1) at ([xshift=-2em,yshift=-1em]Lstm2.south) {\scriptsize{$E_1$}};
+\node [anchor=west,draw,inner sep=4pt,fill=ugreen!20!white,minimum width=2em] (e2) at ([xshift=1em]e1.east) {\scriptsize{$E_2$}};
+\node [anchor=west,inner sep=4pt] (sep5) at ([xshift=1em]e2.east) {\scriptsize{...}};
+\node [anchor=west,draw,inner sep=4pt,fill=ugreen!20!white,minimum width=2em] (e3) at ([xshift=1em]sep5.east) {\scriptsize{$E_3$}};
+
+\node [anchor=south,draw,inner sep=4pt,fill=yellow!30,minimum width=2em] (t1) at ([xshift=-2em,yshift=1em]Lstm5.north) {\scriptsize{$T_1$}};
+\node [anchor=west,draw,inner sep=4pt,fill=yellow!30,minimum width=2em] (t2) at ([xshift=1em]t1.east) {\scriptsize{$T_2$}};
+\node [anchor=west,inner sep=4pt] (sep6) at ([xshift=1em]t2.east) {\scriptsize{...}};
+\node [anchor=west,draw,inner sep=4pt,fill=yellow!30,minimum width=2em] (t3) at ([xshift=1em]sep6.east) {\scriptsize{$T_3$}};
+
+\node [anchor=west,draw,inner sep=4pt,fill=blue!20!white] (Lstm6) at ([xshift=1.5em]Lstm2.east) {\scriptsize{Lstm}};
+\node [anchor=west,draw,inner sep=4pt,fill=blue!20!white] (Lstm7) at ([xshift=1em]Lstm6.east) {\scriptsize{Lstm}};
+\node [anchor=west,inner sep=4pt] (sep3) at ([xshift=1em]Lstm7.east) {\scriptsize{...}};
+\node [anchor=west,draw,inner sep=4pt,fill=blue!20!white] (Lstm8) at ([xshift=1em]sep3.east) {\scriptsize{Lstm}};
+
+\node [anchor=south,draw,inner sep=4pt,fill=blue!20!white] (Lstm9) at ([yshift=1em]Lstm6.north) {\scriptsize{Lstm}};
+\node [anchor=west,draw,inner sep=4pt,fill=blue!20!white] (Lstm10) at ([xshift=1em]Lstm9.east) {\scriptsize{Lstm}};
+\node [anchor=west,inner sep=4pt] (sep4) at ([xshift=1em]Lstm10.east) {\scriptsize{...}};
+\node [anchor=west,draw,inner sep=4pt,fill=blue!20!white] (Lstm11) at ([xshift=1em]sep4.east) {\scriptsize{Lstm}};
+
+\node [rectangle,rounded corners,draw=black!50,densely dashed,inner sep=0.4em] [fit = (Lstm6) (Lstm8) (Lstm9) (Lstm11)] (inputshadow) {};
+
+\draw [->] ([xshift=0.1em]Lstm0.east) -- ([xshift=-0.1em]Lstm1.west);
+\draw [->] ([xshift=0.1em]Lstm1.east) -- ([xshift=-0.1em]sep.west);
+\draw [->] ([xshift=0.1em]sep.east) -- ([xshift=-0.1em]Lstm2.west);
+
+\draw [->] ([xshift=0.1em]Lstm3.east) -- ([xshift=-0.1em]Lstm4.west);
+\draw [->] ([xshift=0.1em]Lstm4.east) -- ([xshift=-0.1em]sep1.west);
+\draw [->] ([xshift=0.1em]sep1.east) -- ([xshift=-0.1em]Lstm5.west);
+
+\draw [->] ([yshift=0.1em]Lstm0.north) -- ([yshift=-0.1em]Lstm3.south);
+\draw [->] ([yshift=0.1em]Lstm1.north) -- ([yshift=-0.1em]Lstm4.south);
+\draw [->] ([yshift=0.1em]Lstm2.north) -- ([yshift=-0.1em]Lstm5.south);
+
+\draw [->] ([xshift=0.1em]Lstm6.east) -- ([xshift=-0.1em]Lstm7.west);
+\draw [->] ([xshift=0.1em]Lstm7.east) -- ([xshift=-0.1em]sep3.west);
+\draw [->] ([xshift=0.1em]sep3.east) -- ([xshift=-0.1em]Lstm8.west);
+
+\draw [->] ([xshift=0.1em]Lstm9.east) -- ([xshift=-0.1em]Lstm10.west);
+\draw [->] ([xshift=0.1em]Lstm10.east) -- ([xshift=-0.1em]sep4.west);
+\draw [->] ([xshift=0.1em]sep4.east) -- ([xshift=-0.1em]Lstm11.west);
+
+\draw [->] ([yshift=0.1em]Lstm6.north) -- ([yshift=-0.1em]Lstm9.south);
+\draw [->] ([yshift=0.1em]Lstm7.north) -- ([yshift=-0.1em]Lstm10.south);
+\draw [->] ([yshift=0.1em]Lstm8.north) -- ([yshift=-0.1em]Lstm11.south);
+
+\draw [->] ([yshift=0.1em]e1.north) -- ([yshift=-0.1em]Lstm0.south);
+\draw [->] ([yshift=0.1em]e1.north) -- ([yshift=-0.1em]Lstm6.south);
+\draw [->] ([yshift=0.1em]e2.north) -- ([yshift=-0.1em]Lstm1.south);
+\draw [->] ([yshift=0.1em]e2.north) -- ([yshift=-0.1em]Lstm7.south);
+\draw [->] ([yshift=0.1em]e3.north) -- ([yshift=-0.1em]Lstm2.south);
+\draw [->] ([yshift=0.1em]e3.north) -- ([yshift=-0.1em]Lstm8.south);
+
+\draw [->] ([yshift=0.1em]Lstm3.north) -- ([xshift=-0.05em,yshift=-0.1em]t1.south);
+\draw [->] ([yshift=0.1em]Lstm9.north) -- ([yshift=-0.1em]t1.south);
+\draw [->] ([yshift=0.1em]Lstm4.north) -- ([xshift=-0.05em,yshift=-0.1em]t2.south);
+\draw [->] ([yshift=0.1em]Lstm10.north) -- ([yshift=-0.1em]t2.south);
+\draw [->] ([yshift=0.1em]Lstm5.north) -- ([xshift=-0.05em,yshift=-0.1em]t3.south);
+\draw [->] ([yshift=0.1em]Lstm11.north) -- ([yshift=-0.1em]t3.south);
+
+\end{scope}
+\end{tikzpicture}
+\end{center}
+
+\end{frame}
+
+%%%------------------------------------------------------------------------------------------------------------
+%%% 更强大的表示模型 - GTP
+\begin{frame}{更强的表示模型 - GPT}
+\begin{itemize}
+\item \textbf{GPT}(Generative Pre-Training)也是一种基于语言模型的表示模型
+    \begin{itemize}
+    \item 架构换成了Transformer，特征抽取能力更强
+    \item 基于Pre-training + Fine-tuning的框架，预训练作为下游系统部件的参数初始值，因此可以更好的适应目标任务
+    \end{itemize}
+\end{itemize}
+\vspace{0.5em}
+\begin{center}
+\begin{tikzpicture}
+\begin{scope}[scale=1.2]
+
+\node [anchor=west,draw,inner sep=4pt,fill=blue!20!white,minimum width=3em] (Trm0) at (0,0) {\scriptsize{Trm}};
+\node [anchor=west,draw,inner sep=4pt,fill=blue!20!white,minimum width=3em] (Trm1) at ([xshift=1em]Trm0.east) {\scriptsize{Trm}};
+\node [anchor=west,draw,inner sep=4pt,fill=blue!20!white,minimum width=3em] (Trm2) at ([xshift=1em]Trm1.east) {\scriptsize{Trm}};
+\node [anchor=west,draw,inner sep=4pt,fill=blue!20!white,minimum width=3em] (Trm3) at ([xshift=1em]Trm2.east) {\scriptsize{Trm}};
+\node [anchor=west,inner sep=4pt] (sep) at ([xshift=1em]Trm3.east) {\scriptsize{...}};
+\node [anchor=west,draw,inner sep=4pt,fill=blue!20!white,minimum width=3em] (Trm4) at ([xshift=1em]sep.east) {\scriptsize{Trm}};
+
+\node [anchor=south,draw,inner sep=4pt,fill=blue!20!white,minimum width=3em] (Trm5) at ([yshift=1em]Trm0.north) {\scriptsize{Trm}};
+\node [anchor=west,draw,inner sep=4pt,fill=blue!20!white,minimum width=3em] (Trm6) at ([xshift=1em]Trm5.east) {\scriptsize{Trm}};
+\node [anchor=west,draw,inner sep=4pt,fill=blue!20!white,minimum width=3em] (Trm7) at ([xshift=1em]Trm6.east) {\scriptsize{Trm}};
+\node [anchor=west,draw,inner sep=4pt,fill=blue!20!white,minimum width=3em] (Trm8) at ([xshift=1em]Trm7.east) {\scriptsize{Trm}};
+\node [anchor=west,inner sep=4pt] (sep1) at ([xshift=1em]Trm8.east) {\scriptsize{...}};
+\node [anchor=west,draw,inner sep=4pt,fill=blue!20!white,minimum width=3em] (Trm9) at ([xshift=1em]sep1.east) {\scriptsize{Trm}};
+
+\node [rectangle,rounded corners,draw=black!50,densely dashed,inner sep=0.4em] [fit = (Trm0) (Trm4) (Trm5) (Trm9)] (inputshadow) {};
+
+\node [anchor=north,draw,inner sep=4pt,fill=ugreen!20!white,minimum width=2em] (e1) at ([yshift=-1em]Trm0.south) {\scriptsize{$E_1$}};
+\node [anchor=north,draw,inner sep=4pt,fill=ugreen!20!white,minimum width=2em] (e2) at ([yshift=-1em]Trm1.south) {\scriptsize{$E_2$}};
+\node [anchor=north,draw,inner sep=4pt,fill=ugreen!20!white,minimum width=2em] (e3) at ([yshift=-1em]Trm2.south) {\scriptsize{$E_3$}};
+\node [anchor=north,draw,inner sep=4pt,fill=ugreen!20!white,minimum width=2em] (e4) at ([yshift=-1em]Trm3.south) {\scriptsize{$E_4$}};
+\node [anchor=north,inner sep=4pt] (sep5) at ([yshift=-1em]sep.south) {\scriptsize{...}};
+\node [anchor=north,draw,inner sep=4pt,fill=ugreen!20!white,minimum width=2em] (e5) at ([yshift=-1em]Trm4.south) {\scriptsize{$E_5$}};
+
+\node [anchor=south,draw,inner sep=4pt,fill=yellow!30,minimum width=2em] (t1) at ([yshift=1em]Trm5.north) {\scriptsize{$T_1$}};
+\node [anchor=south,draw,inner sep=4pt,fill=yellow!30,minimum width=2em] (t2) at ([yshift=1em]Trm6.north) {\scriptsize{$T_2$}};
+\node [anchor=south,draw,inner sep=4pt,fill=yellow!30,minimum width=2em] (t3) at ([yshift=1em]Trm7.north) {\scriptsize{$T_3$}};
+\node [anchor=south,draw,inner sep=4pt,fill=yellow!30,minimum width=2em] (t4) at ([yshift=1em]Trm8.north) {\scriptsize{$T_4$}};
+\node [anchor=south,inner sep=4pt] (sep6) at ([yshift=1em]sep1.north) {\scriptsize{...}};
+\node [anchor=south,draw,inner sep=4pt,fill=yellow!30,minimum width=2em] (t5) at ([yshift=1em]Trm9.north) {\scriptsize{$T_5$}};
+
+\draw [->] ([yshift=0.1em]e1.north) -- ([yshift=-0.1em]Trm0.south);
+\draw [->] ([yshift=0.1em]e1.north) -- ([yshift=-0.1em]Trm1.south);
+\draw [->] ([yshift=0.1em]e1.north) -- ([yshift=-0.1em]Trm2.south);
+\draw [->] ([yshift=0.1em]e1.north) -- ([yshift=-0.1em]Trm3.south);
+\draw [->] ([yshift=0.1em]e1.north) -- ([yshift=-0.1em]Trm4.south);
+\draw [->] ([yshift=0.1em]e2.north) -- ([yshift=-0.1em]Trm1.south);
+\draw [->] ([yshift=0.1em]e3.north) -- ([yshift=-0.1em]Trm2.south);
+\draw [->] ([yshift=0.1em]e3.north) -- ([yshift=-0.1em]Trm3.south);
+\draw [->] ([yshift=0.1em]e3.north) -- ([yshift=-0.1em]Trm4.south);
+\draw [->] ([yshift=0.1em]e4.north) -- ([yshift=-0.1em]Trm3.south);
+\draw [->] ([yshift=0.1em]e5.north) -- ([yshift=-0.1em]Trm4.south);
+
+\draw [->] ([yshift=0.1em]Trm0.north) -- ([yshift=-0.1em]Trm5.south);
+\draw [->] ([yshift=0.1em]Trm0.north) -- ([yshift=-0.1em]Trm6.south);
+\draw [->] ([yshift=0.1em]Trm0.north) -- ([yshift=-0.1em]Trm7.south);
+\draw [->] ([yshift=0.1em]Trm0.north) -- ([yshift=-0.1em]Trm8.south);
+\draw [->] ([yshift=0.1em]Trm0.north) -- ([yshift=-0.1em]Trm9.south);
+\draw [->] ([yshift=0.1em]Trm1.north) -- ([yshift=-0.1em]Trm6.south);
+\draw [->] ([yshift=0.1em]Trm2.north) -- ([yshift=-0.1em]Trm7.south);
+\draw [->] ([yshift=0.1em]Trm2.north) -- ([yshift=-0.1em]Trm8.south);
+\draw [->] ([yshift=0.1em]Trm2.north) -- ([yshift=-0.1em]Trm9.south);
+\draw [->] ([yshift=0.1em]Trm3.north) -- ([yshift=-0.1em]Trm8.south);
+\draw [->] ([yshift=0.1em]Trm4.north) -- ([yshift=-0.1em]Trm9.south);
+
+\draw [->] ([yshift=0.1em]Trm5.north) -- ([yshift=-0.1em]t1.south);
+\draw [->] ([yshift=0.1em]Trm6.north) -- ([yshift=-0.1em]t2.south);
+\draw [->] ([yshift=0.1em]Trm7.north) -- ([yshift=-0.1em]t3.south);
+\draw [->] ([yshift=0.1em]Trm8.north) -- ([yshift=-0.1em]t4.south);
+\draw [->] ([yshift=0.1em]Trm9.north) -- ([yshift=-0.1em]t5.south);
+
+\end{scope}
+\end{tikzpicture}
+\end{center}
+
+\end{frame}
+
+%%%------------------------------------------------------------------------------------------------------------
+%%% 更强大的表示模型 - BERT
+\begin{frame}{更强的表示模型 - BERT}
+\begin{itemize}
+\item \textbf{BERT}( Bidirectional Encoder Representations from Transformers)是最近非常火爆的表示模型
+    \begin{itemize}
+    \item 仍然基于Transformer但是考虑了左右两端的上下文(可以对比GPT)
+    \item 使用了Mask方法来增加训练得到模型的健壮性，这个方法几乎成为了预训练表示模型的新范式
+    \end{itemize}
+\end{itemize}
+\vspace{0.5em}
+\begin{center}
+\begin{tikzpicture}
+\begin{scope}[scale=1.2]
+
+\node [anchor=west,draw,inner sep=4pt,fill=blue!20!white,minimum width=3em] (Trm0) at (0,0) {\scriptsize{Trm}};
+\node [anchor=west,draw,inner sep=4pt,fill=blue!20!white,minimum width=3em] (Trm1) at ([xshift=1em]Trm0.east) {\scriptsize{Trm}};
+\node [anchor=west,draw,inner sep=4pt,fill=blue!20!white,minimum width=3em] (Trm2) at ([xshift=1em]Trm1.east) {\scriptsize{Trm}};
+\node [anchor=west,draw,inner sep=4pt,fill=blue!20!white,minimum width=3em] (Trm3) at ([xshift=1em]Trm2.east) {\scriptsize{Trm}};
+\node [anchor=west,inner sep=4pt] (sep) at ([xshift=1em]Trm3.east) {\scriptsize{...}};
+\node [anchor=west,draw,inner sep=4pt,fill=blue!20!white,minimum width=3em] (Trm4) at ([xshift=1em]sep.east) {\scriptsize{Trm}};
+
+\node [anchor=south,draw,inner sep=4pt,fill=blue!20!white,minimum width=3em] (Trm5) at ([yshift=1em]Trm0.north) {\scriptsize{Trm}};
+\node [anchor=west,draw,inner sep=4pt,fill=blue!20!white,minimum width=3em] (Trm6) at ([xshift=1em]Trm5.east) {\scriptsize{Trm}};
+\node [anchor=west,draw,inner sep=4pt,fill=blue!20!white,minimum width=3em] (Trm7) at ([xshift=1em]Trm6.east) {\scriptsize{Trm}};
+\node [anchor=west,draw,inner sep=4pt,fill=blue!20!white,minimum width=3em] (Trm8) at ([xshift=1em]Trm7.east) {\scriptsize{Trm}};
+\node [anchor=west,inner sep=4pt] (sep1) at ([xshift=1em]Trm8.east) {\scriptsize{...}};
+\node [anchor=west,draw,inner sep=4pt,fill=blue!20!white,minimum width=3em] (Trm9) at ([xshift=1em]sep1.east) {\scriptsize{Trm}};
+
+\node [rectangle,rounded corners,draw=black!50,densely dashed,inner sep=0.4em] [fit = (Trm0) (Trm4) (Trm5) (Trm9)] (inputshadow) {};
+
+\node [anchor=north,draw,inner sep=4pt,fill=ugreen!20!white,minimum width=2em] (e1) at ([yshift=-1em]Trm0.south) {\scriptsize{$E_1$}};
+\node [anchor=north,draw,inner sep=4pt,fill=ugreen!20!white,minimum width=2em] (e2) at ([yshift=-1em]Trm1.south) {\scriptsize{$E_2$}};
+\node [anchor=north,draw,inner sep=4pt,fill=ugreen!20!white,minimum width=2em] (e3) at ([yshift=-1em]Trm2.south) {\scriptsize{$E_3$}};
+\node [anchor=north,draw,inner sep=4pt,fill=ugreen!20!white,minimum width=2em] (e4) at ([yshift=-1em]Trm3.south) {\scriptsize{$E_4$}};
+\node [anchor=north,inner sep=4pt] (sep5) at ([yshift=-1em]sep.south) {\scriptsize{...}};
+\node [anchor=north,draw,inner sep=4pt,fill=ugreen!20!white,minimum width=2em] (e5) at ([yshift=-1em]Trm4.south) {\scriptsize{$E_5$}};
+
+\node [anchor=south,draw,inner sep=4pt,fill=yellow!30,minimum width=2em] (t1) at ([yshift=1em]Trm5.north) {\scriptsize{$T_1$}};
+\node [anchor=south,draw,inner sep=4pt,fill=yellow!30,minimum width=2em] (t2) at ([yshift=1em]Trm6.north) {\scriptsize{$T_2$}};
+\node [anchor=south,draw,inner sep=4pt,fill=yellow!30,minimum width=2em] (t3) at ([yshift=1em]Trm7.north) {\scriptsize{$T_3$}};
+\node [anchor=south,draw,inner sep=4pt,fill=yellow!30,minimum width=2em] (t4) at ([yshift=1em]Trm8.north) {\scriptsize{$T_4$}};
+\node [anchor=south,inner sep=4pt] (sep6) at ([yshift=1em]sep1.north) {\scriptsize{...}};
+\node [anchor=south,draw,inner sep=4pt,fill=yellow!30,minimum width=2em] (t5) at ([yshift=1em]Trm9.north) {\scriptsize{$T_5$}};
+
+\draw [->] ([yshift=0.1em]e1.north) -- ([yshift=-0.1em]Trm0.south);
+\draw [->] ([yshift=0.1em]e1.north) -- ([yshift=-0.1em]Trm1.south);
+\draw [->] ([yshift=0.1em]e1.north) -- ([yshift=-0.1em]Trm2.south);
+\draw [->] ([yshift=0.1em]e1.north) -- ([yshift=-0.1em]Trm3.south);
+\draw [->] ([yshift=0.1em]e1.north) -- ([yshift=-0.1em]Trm4.south);
+\draw [->] ([yshift=0.1em]e2.north) -- ([yshift=-0.1em]Trm1.south);
+\draw [->] ([yshift=0.1em]e3.north) -- ([yshift=-0.1em]Trm0.south);
+\draw [->] ([yshift=0.1em]e3.north) -- ([yshift=-0.1em]Trm1.south);
+\draw [->] ([yshift=0.1em]e3.north) -- ([yshift=-0.1em]Trm2.south);
+\draw [->] ([yshift=0.1em]e3.north) -- ([yshift=-0.1em]Trm3.south);
+\draw [->] ([yshift=0.1em]e3.north) -- ([yshift=-0.1em]Trm4.south);
+\draw [->] ([yshift=0.1em]e4.north) -- ([yshift=-0.1em]Trm3.south);
+\draw [->] ([yshift=0.1em]e5.north) -- ([yshift=-0.1em]Trm4.south);
+
+\draw [->] ([yshift=0.1em]Trm0.north) -- ([yshift=-0.1em]Trm5.south);
+\draw [->] ([yshift=0.1em]Trm0.north) -- ([yshift=-0.1em]Trm6.south);
+\draw [->] ([yshift=0.1em]Trm0.north) -- ([yshift=-0.1em]Trm7.south);
+\draw [->] ([yshift=0.1em]Trm0.north) -- ([yshift=-0.1em]Trm8.south);
+\draw [->] ([yshift=0.1em]Trm0.north) -- ([yshift=-0.1em]Trm9.south);
+\draw [->] ([yshift=0.1em]Trm1.north) -- ([yshift=-0.1em]Trm6.south);
+\draw [->] ([yshift=0.1em]Trm2.north) -- ([yshift=-0.1em]Trm5.south);
+\draw [->] ([yshift=0.1em]Trm2.north) -- ([yshift=-0.1em]Trm6.south);
+\draw [->] ([yshift=0.1em]Trm2.north) -- ([yshift=-0.1em]Trm7.south);
+\draw [->] ([yshift=0.1em]Trm2.north) -- ([yshift=-0.1em]Trm8.south);
+\draw [->] ([yshift=0.1em]Trm2.north) -- ([yshift=-0.1em]Trm9.south);
+\draw [->] ([yshift=0.1em]Trm3.north) -- ([yshift=-0.1em]Trm8.south);
+\draw [->] ([yshift=0.1em]Trm4.north) -- ([yshift=-0.1em]Trm9.south);
+
+\draw [->] ([yshift=0.1em]Trm5.north) -- ([yshift=-0.1em]t1.south);
+\draw [->] ([yshift=0.1em]Trm6.north) -- ([yshift=-0.1em]t2.south);
+\draw [->] ([yshift=0.1em]Trm7.north) -- ([yshift=-0.1em]t3.south);
+\draw [->] ([yshift=0.1em]Trm8.north) -- ([yshift=-0.1em]t4.south);
+\draw [->] ([yshift=0.1em]Trm9.north) -- ([yshift=-0.1em]t5.south);
+
+\end{scope}
+\end{tikzpicture}
+\end{center}
+
+\end{frame}
+
+
 \end{CJK}
 \end{document}