\def\CTeXPreproc{Created by ctex v0.2.13, don't edit!}
\documentclass[cjk,t,compress,12pt]{beamer}
\usepackage{pstricks}
\usepackage{etex}
\usepackage{eso-pic,graphicx}
\usepackage{fancybox}
\usepackage{amsmath,amssymb}
\usepackage{setspace}
\usepackage{xcolor}
\usepackage{array,multirow}
\usepackage{CJK}
\usepackage{tikz}
\usepackage{tikz-qtree}
\usepackage{hyperref}
\usepackage{changepage}
\usepackage{pgfplots}
\usepackage{subfigure}
\usepackage{tikz-3dplot}
\usepackage{esvect}

\usepackage{tcolorbox}
\tcbuselibrary{skins}

\usetikzlibrary{calc,intersections}
\usetikzlibrary{matrix}
\usetikzlibrary{arrows,decorations.pathreplacing}
\usetikzlibrary{shadows} % LATEX and plain TEX when using Tik Z
\usetikzlibrary{shadows.blur}

\usepgflibrary{arrows} % LATEX and plain TEX and pure pgf
\usetikzlibrary{arrows} % LATEX and plain TEX when using Tik Z
\usetikzlibrary{decorations}
\usetikzlibrary{arrows,shapes}

\usetikzlibrary{positioning,fit,calc}

\usetikzlibrary{mindmap,backgrounds} % mind map

\DeclareMathOperator*{\argmax}{arg\,max}
\DeclareMathOperator*{\argmin}{arg\,min}

\setbeamertemplate{items}[ball]
\usefonttheme[onlymath]{serif}  % fout of math

\definecolor{ugreen}{rgb}{0,0.5,0}
\definecolor{lgreen}{rgb}{0.9,1,0.8}
\definecolor{xtgreen1}{rgb}{0.824,0.898,0.8}
\definecolor{xtgreen}{rgb}{0.914,0.945,0.902}
\definecolor{lightgray}{gray}{0.85}

\setbeamercolor{uppercol}{fg=white,bg=ugreen}
\setbeamercolor{lowercol}{fg=black,bg=xtgreen}

\definecolor{ublue}{rgb}{0.152,0.250,0.545}
\setbeamercolor{uppercolblue}{fg=white,bg=ublue}
\setbeamercolor{lowercolblue}{fg=black,bg=blue!10}


%\usetheme{default}
%\usetheme{Darmstadt}
%\usetheme{Madrid}
%\usetheme{Frankfurt}
%\usetheme{Dresden}
%\usetheme{Boadilla}
%\usecolortheme{dolphin}

\newcounter{mycount1}
\newcounter{mycount2}
\newcounter{mycount3}
\newcounter{mycount4}

\usefonttheme[onlylarge]{structurebold}

\IfFileExists{C:/WINDOWS/win.ini}
{\newcommand{\mycfont}{you}}
{\newcommand{\mycfont}{gbsn}}

\begin{CJK}{GBK}{\mycfont}
\end{CJK}

\setbeamerfont*{frametitle}{size=\large,series=\bfseries}
\setbeamertemplate{navigation symbols}{\begin{CJK}{GBK}{\mycfont} ������ �����������ģ�� \hspace*{2em} Фͩ\&�쾸�� \end{CJK} \hspace*{2em} \today \hspace*{2em} \insertframenumber{}/\inserttotalframenumber}

\setbeamertemplate{itemize items}[circle] % if you want a circle
\setbeamertemplate{itemize subitem}[triangle] % if you wnat a triangle
\setbeamertemplate{itemize subsubitem}[ball] % if you want a ball

\begin{document}

\begin{CJK}{GBK}{\mycfont}

\title{\Large{�����������ģ��}}
\author{\large{\textbf{Фͩ\ \ �쾸��}}}
\institute{
\blue{\url{xiaotong@mail.neu.edu.cn}} \black{} \\
\blue{\url{zhujingbo@mail.neu.edu.cn}} \black{} \\
\vspace{1.0em}
������ѧ ��Ȼ���Դ���ʵ���� \\
\blue{\underline{\url{http://www.nlplab.com}}} \black{} \\
\vspace{0.2cm}
\hspace{0.1cm} \includegraphics[scale=0.1]{../Figures/logo.pdf}
}
\date{}

\maketitle

\setlength{\leftmargini}{1em}
\setlength{\leftmarginii}{1em}

%%%------------------------------------------------------------------------------------------------------------
\section{ΪʲôҪ̸������}

%%%------------------------------------------------------------------------------------------------------------
\subsection{��ʷ}

%%%------------------------------------------------------------------------------------------------------------
%%% ΪʲôҪ̸������
\begin{frame}{ΪʲôҪ̸������}
\begin{itemize}
\item ��Щ��\textbf{���ѧϰ��Deep Learning��}�����˾޴��DZ��
    \begin{itemize}
    \item ϯ���˰��������������ڵĺܶ�NLP����
    \item �Ѿ���Ϊ��NLP�з������·�ʽ
    \item ������\textbf{�񾭻�������}����һ����������һ�����ݣ�
    \end{itemize}
\vspace{0.2em}
\begin{center}
\includegraphics[scale=0.45]{./Figures/deeplearning.jpg}
\end{center}
\vspace{0.5em}

\item<2-> \textbf{�˹������磨Artificial Neural Network��}�����ѧϰ��ʵ������

\end{itemize}

\end{frame}

%%%------------------------------------------------------------------------------------------------------------
%%% �򵥵���ʷ
\begin{frame}{����������ѧϰ�ĸ��1940s-1970s��}
\begin{itemize}
\item \textbf{������}��������ڿ������У�Cybernetics��������������������壨Connectionism���б��ἰ
    \begin{itemize}
    \item \textbf{������뷨}��ģ����Ե�����ѧϰ���ƽ��м������ģ
    \item<2-> ����ʹ�����Լ�Ȩ��������������$\textbf{x}$�ͽ��$\textbf{y}$֮�����ϵ
    \vspace{-0.5em}
    \begin{displaymath}
    f(\textbf{x},\textbf{w})=x_1 \cdot w_1 + ... + x_n \cdot w_n
    \end{displaymath}\\
    \vspace{-0.5em}
    ����$\textbf{w}$��Ȩ�ء�����ģ��ҲӰ��������ݶ��½������ڻ���ѧϰ�����ķ�չ��
    \item<3-> ���෽���ľ���Ҳ�����ԣ��޷��������������⣬���������������XOR��ѧϰ����
    \end{itemize}

\end{itemize}

\vspace{-0.5em}
\begin{center}
\includegraphics[scale=0.21]{./Figures/concept-history.jpg}\\
\scriptsize{ͼƬ���ԡ�Deep Learning��}
\end{center}

\end{frame}

%%%------------------------------------------------------------------------------------------------------------
%%% ���ѧϰ�ķ�չ
\begin{frame}{����������ѧϰ�ķ�չ��1980s-1990s��}
\begin{itemize}
\item ���ڣ�����ѧ�����Ѿ������������Ψһ�����Դ�����ѧϰҲ�������µķ�չ�׶Ρ�����˼��Ӱ��޴�
    \begin{itemize}
    \item<2-> \textbf{�������壨Connectionism��}������֪ѧ���У����ڵķ������壨Symbolicism�����ѽ��ʹ������ʹ����Ԫ������������������ĺ���˼���ǣ��������򵥵ļ��㵥Ԫ���ӵ�һ�����ʵ��������Ϊ����\\
        ��Ҳ�ƶ��˷��򴫲���ѵ����������緽����Ӧ�ã�����չ�˰�������ʱ����ģ�����ڵľ��佨ģ������
    \item<3-> \textbf{�ֲ�ʽ��ʾ��Distributed representation��}��һ������ϵͳ���κβ��ֵ����붼Ӧ���Ƕ��������ͬ��ʾ�Ľ�������磬һ�����ʲ���һ�������������ɳɰ���ǧ��������ͬ������������ÿ������������������ʵ�"ij��"���档
    \end{itemize}
\item<4-> \alert{�ź�����}��������90������ڣ��ںܶ�Ӧ�������Ƕ������緽���������ߣ����ǽ����û�дﵽԤ�ڡ��ر��ǣ��˷�����ͼģ�͵Ȼ���ѧϰ����ȡ���˺ܺõ�Ч�����������о�������һ�ε͹ȡ�

\end{itemize}

\end{frame}

%%%------------------------------------------------------------------------------------------------------------
%%% ���ѧϰ�ĵ������˳�
\begin{frame}{�������˳���2000s-now��}
\begin{itemize}
\item \textbf{���ѧϰ�ı���}Դ��2006��Hinton���˳ɹ�ѵ����һ������������磨deep belief network����֮�����ѧϰ���˳���ϯ���˻���ѧϰ���˹�����Ӧ���������������ִ����ѧϰ�ijɹ���������ԭ��
    \begin{enumerate}
    \item \textbf{ģ�ͺ��㷨}��������Ľ�
    \item \textbf{���м�������}������ʹ���ģʵ����Ϊ�˿���
    \item ��Hinton����Ϊ������ѧ�ߵ�\textbf{��������Ͷ��}
    \end{enumerate}
\item<2-> \textbf{��Ӧ�õĽǶ�}���������Ŀ���������ģ������������ҲΪ���ѧϰ�ijɹ��ṩ������
\end{itemize}

\visible<2->{
\begin{center}
\begin{tikzpicture}
\scriptsize{
\begin{semilogyaxis}[
    width=.95\textwidth,
    height=.38\textwidth,
    yticklabel style={/pgf/number format/precision=1,/pgf/number format/fixed zerofill},
    xticklabel style={/pgf/number format/1000 sep=},
    xlabel style={yshift=0.5em},
    xlabel={\footnotesize{Year}},ylabel={\footnotesize{\# of sents.}},
    ymin=1,ymax=1000000000000,
    xmin=1999,xmax=2020,xtick={2000,2005,2010,2015,2020},
    legend style={yshift=-5em,xshift=0em,legend cell align=left,legend plot pos=right}
]

\addplot[purple,mark=square,mark=star,very thick] coordinates {(2001,10000) (2005,2000000) (2008,8000000) (2009,9000000) (2011,10000000) (2012,12000000) (2014,20000000) (2016,30000000) (2018,40000000) };
\addlegendentry{\tiny{Bi-text used in MT papers}\ \ \ \ \ \ \ \ \ \ }
\only<3->{
\addplot[ublue,mark=otimes*,very thick] coordinates {(2005,10000000) (2008,100000000) (2012,3000000000) (2016,5000000000) (2019,10000000000) };
\addlegendentry{\tiny{Bi-text used in practical systems}}
}

\end{semilogyaxis}
}
\end{tikzpicture}
\end{center}
}

\end{frame}

%%%------------------------------------------------------------------------------------------------------------
\subsection{���ѧϰ������}

%%%------------------------------------------------------------------------------------------------------------
%%% �˵���ѧϰ
\begin{frame}{�˵���ѧϰ}
\begin{itemize}
\item ���������������ṩ��һ�ֻ��ƣ�����ֱ�Ӵ�ѧϰ���뵽����Ĺ�ϵ����֮Ϊ\alert{�˵���ѧϰ}
    \begin{itemize}
    \item<2-> \textbf{�����������̵ķ���}����Ҫ�����˹�������������������������������������������
    \item<3-> \textbf{���ڶ˵���ѧϰ�ķ���}��û���˹����������������������ȫ�������罨ģ
    \end{itemize}
\end{itemize}
\vspace{-0.5em}
\begin{center}
\visible<2->{
\includegraphics[scale=0.31]{./Figures/end2end-learning-1.jpg}\\
}
\visible<3->{
\Large{\textbf{VS.}}\\
\vspace{0.3em}
\includegraphics[scale=0.31]{./Figures/end2end-learning-2.jpg}\\
}
\end{center}
\end{frame}

%%%------------------------------------------------------------------------------------------------------------
%%% ���ѧϰ�ı���
\begin{frame}{���ѧϰ�ı��� - �����Խ�ģΪ��}
\begin{itemize}
\item \textbf{����}�������Խ�ģ��LM�������ϣ���������������ѧϰ�ķ��������˾޴����ƣ���PTB������PPLֵ�Ѿ��õ����˵��½���PPLԽ��Խ�ã�
	\begin{itemize}
	\item ��ͳ$n$Ԫ�﷨ģ����������ϡ�������
	\item<2-> ������ģ�Ϳ��Ը��õ�����������������
	\end{itemize}
\end{itemize}
\begin{tabular}{l | l | l | r}
ģ�� & ���� & ��� & PPL  \\ \hline
3-gram LM & Brown et al. & 1992 & 178.0 \pause \\ \hline
Feed-forward Neural LM & Bengio et al. & 2003 & 162.2 \\
Recurrent NN-based LM & Mikolov et al. & 2010 & 124.7 \\
Recurrent NN-LDA & Mikolov et al. & 2012 & 92.0 \\
LSTM & Zaremba et al. & 2014 & 78.4 \\
RHN & Zilly et al. & 2016 & 65.4 \\
AWD-LSTM & Merity et al. & 2018 & 58.8 \\
GPT-2 (Transformer) & Radford et al. & 2019 & \alert{35.7}
\end{tabular}
\end{frame}

%%%------------------------------------------------------------------------------------------------------------
\section{���������}

%%%------------------------------------------------------------------------------------------------------------
\subsection{�򵥵�����}

%%%------------------------------------------------------------------------------------------------------------
%%% ��Ԫ
\begin{frame}{������Ļ�����Ԫ - ��Ԫ}
\begin{itemize}
\item ����ѧ�ϣ���Ԫ����ϵͳ�Ļ�����ɵ�Ԫ���ܶ��������������Ӧ����������\\
\begin{center}
\includegraphics[scale=0.25]{./Figures/neuron-real.jpg}\\
\end{center}
\item<2-> ����������˵����\textbf{�˹���Ԫ}��ʵ������������ :)
    \begin{itemize}
    \item ����$\textbf{x}$����$\textbf{w}$�������Ա仯��֮�����ƫ��$\textbf{b}$���ھ��������$f$�����õ�$\textbf{y}$ - ɶ����������
    \end{itemize}
{\Large
\begin{displaymath}
\textbf{y} = f(\textbf{x} \cdot \textbf{w} + \textbf{b})
\end{displaymath}
}
\\
\vspace{-0.5em}
\end{itemize}
\end{frame}

%%%------------------------------------------------------------------------------------------------------------
%%% ��֪��
\begin{frame}{��򵥵��˹���Ԫģ�� - ��֪����Perceptron��}
\begin{itemize}
\item ��֪�����˹���Ԫ��һ��ʵ������������50-60���������󣬶��������о���������ԶӰ�졣
    \begin{itemize}
    \item<2-> \textbf{����}�����ɸ���ֵ������$x_i=0$ or $1$
    \item<3-> ÿһ�����������Ӧһ��\textbf{Ȩ��}$w_i$��ʵ����
    \item<4-> \textbf{���}Ҳ��һ����ֵ�����$y=0$ or $1$�� �жϵ������ǣ�����ͼ�Ȩ���Ƿ���ڣ�����С�ڣ�һ����ֵ$\sigma$��
    \begin{displaymath}
    y = \left\{ \begin{array}{ll}
    0 & \sum_i {w_i \cdot x_i} < \sigma \\
    1 & \sum_i {w_i \cdot x_i} \ge \sigma
    \end{array} \right.
    \end{displaymath}
    \end{itemize}
\end{itemize}

\begin{center}
\begin{tikzpicture}
\begin{scope}
\node [anchor=center,circle,draw,ublue,very thick,minimum size=3.5em,fill=white,drop shadow={shadow xshift=0.1em,shadow yshift=-0.1em}] (neuron) at (0,0) {};
\node [anchor=east] (x1) at ([xshift=-6em]neuron.west) {\Large{$x_1$}};
\node [anchor=center] (x0) at ([yshift=3em]x1.center) {\Large{$x_0$}};
\node [anchor=center] (x2) at ([yshift=-3em]x1.center) {\Large{$x_2$}};
\node [anchor=west] (y) at ([xshift=6em]neuron.east) {\Large{$y$}};

\draw [->,thick] (x0.east) -- (neuron.150) node [pos=0.5,above] {$w_0$};
\draw [->,thick] (x1.east) -- (neuron.180) node [pos=0.5,above] {$w_1$};
\draw [->,thick] (x2.east) -- (neuron.210) node [pos=0.5,above] {$w_2$};
\draw [->,thick] (neuron.east) -- (y.west);

\visible<2>{
\draw [->,thick,red] (x0.east) -- (neuron.150) node [pos=0.5,above] {\black{$w_0$}};
\draw [->,thick,red] (x1.east) -- (neuron.180) node [pos=0.5,above] {\black{$w_1$}};
\draw [->,thick,red] (x2.east) -- (neuron.210) node [pos=0.5,above] {\black{$w_2$}};
}

\visible<3>{
\draw [->,thick] (x0.east) -- (neuron.150) node [pos=0.5,above] {\red{$w_0$}};
\draw [->,thick] (x1.east) -- (neuron.180) node [pos=0.5,above] {\red{$w_1$}};
\draw [->,thick] (x2.east) -- (neuron.210) node [pos=0.5,above] {\red{$w_2$}};
}

\visible<4->{
\node [anchor=center] (neuronmath) at (neuron.center) {\red{\small{$\sum \ge \sigma$}}};
}

\visible<5->{
\node [anchor=south] (prediction) at ([xshift=-2em,yshift=1em]y.north west) {\footnotesize{\red{$x_0 w_0 + x_1 w_1 + x_2 w_2 \ge \sigma$}}};
\draw [->,thick,red] (neuron.east) -- (y.west);
\node [anchor=west] (yvalue) at ([yshift=0.2em]y.east) {\Large{$=1$}};
}

\end{scope}
\end{tikzpicture}
\end{center}

\end{frame}

%%%------------------------------------------------------------------------------------------------------------
%%% ��֪�� - һ������
\begin{frame}{һ������}
\begin{itemize}
\item һ���dz��򵥵����ӡ����磬��һ�����ֻᣬ�����ھ����Ƿ�ȥ�μӣ����������ػ�Ӱ����ľ���
    \begin{itemize}
    \item $x_0$���糡�Ƿ������㹻����
    \item $x_1$��Ʊ���Ƿ����300Ԫ��
    \item $x_2$��Ů�����Ƿ�ϲ�����ֻ᣿
    \end{itemize}
\item<2-> ��ξ��������磬Ů���Ѻ�ϣ������һ�𣬵��Ǿ糡��Զ����Ʊ��500Ԫ�������Щ���ض���ľ��߶���ͬ����Ҫ�ģ���ô����һ���ۺϵ÷֣�
    \begin{displaymath}
    x_0 \cdot w_0 + x_1 \cdot w_1 + x_2 \cdot w_2 = 0 \cdot 1 + 0 \cdot 1 + 1 \cdot 1 = 1
    \end{displaymath}
\item<3-> ����㲻��ʮ�־��ᣬ�ܹ����ܲ����������飬����ܻ���$\sigma=1$������
    \begin{displaymath}
    \sum_i x_i \cdot w_i \ge \sigma
    \end{displaymath}
    \textbf{��ô}�����ȥ�μ����ֻ�
\end{itemize}
\end{frame}

%%%------------------------------------------------------------------------------------------------------------
%%% ��֪�� - һ�����ӣ�Ȩ��
\begin{frame}{һ������ - Ȩ��}
\begin{itemize}
\item ���Կ�����ʵ����������߹��̱����Ͼ���һ����֪��
\item<2-> ���ǣ��˲����������ģ���������Щ����������һЩ���������\textbf{�ز�ū}����˻��Ʊ�ۿ��ĸ���һЩ����ʱ����ò����ȵ�Ȩ�ؼ���ÿ�����ص�Ӱ�죬���磺$w_0=0.5$��$w_1=2$��$w_2=0.5$
\item<3-> Ů�Ѻ�ϣ������һ�𣬵��Ǿ糡��Զ����Ʊ��500Ԫ���ᵼ����\alert{ѡ��ȥ}�����ֻᣨŮ���Ѷ���Ҫ�ˣ�զ����
    \begin{displaymath}
    \sum_i x_i \cdot w_i = 0 \cdot 0.5 + 0 \cdot 2 + 1 \cdot 0.5 = 0.5 < \sigma = 1
    \end{displaymath}
\end{itemize}

\vspace{-1.8em}
\begin{center}
\begin{tikzpicture}
\begin{scope}
\node [anchor=center,circle,draw,ublue,very thick,minimum size=3.5em,fill=white,drop shadow={shadow xshift=0.1em,shadow yshift=-0.1em}] (neuron) at (0,0) {};
\node [anchor=east] (x1) at ([xshift=-6em]neuron.west) {$x_1$:Ʊ�۹��ͣ�};
\node [anchor=center] (x0) at ([yshift=3em]x1.center) {$x_0$:���빻����};
\node [anchor=center] (x2) at ([yshift=-3em]x1.center) {$x_2$:Ů��ϲ����};
\node [anchor=west] (y) at ([xshift=2em]neuron.east) {$y$:ȥ�����Dz�ȥ��};

\visible<1>{
\draw [->,thick] (x0.east) -- (neuron.150) node [pos=0.5,above,yshift=0.2em] {\small{$w_0=1$}};
\draw [->,thick] (x1.east) -- (neuron.180) node [pos=0.5,above,yshift=-0.1em] {\small{$w_1=1$}};
\draw [->,thick] (x2.east) -- (neuron.210) node [pos=0.5,above,yshift=0.1em] {\small{$w_2=1$}};
}
\draw [->,thick] (neuron.east) -- (y.west);

\node [anchor=center] (neuronmath) at (neuron.center) {\small{$\sum \ge \sigma$}};

\visible<2->{
\draw [->,thin,red] (x0.east) -- (neuron.150) node [pos=0.5,above,yshift=0.2em] {\small{$w_0=.5$}};
\draw [->,line width=0.8mm,red] (x1.east) -- (neuron.180) node [pos=0.5,above,yshift=-0.1em] {\textbf{\small{$w_1=2$}}};
\draw [->,thin,red] (x2.east) -- (neuron.210) node [pos=0.5,above,yshift=0.1em] {\small{$w_2=.5$}};
}

\visible<3->{
\node [anchor=south] (ylabel) at (y.north) {\red{\textbf{��ȥ�ˣ�}}};
}

\end{scope}
\end{tikzpicture}
\end{center}

\end{frame}

%%%------------------------------------------------------------------------------------------------------------
%%% ��֪�� - һ�����ӣ�������ʽ
\begin{frame}{һ������ - ������ʽ}

\begin{itemize}
\item ��������Ů��һ����˺�֮������ʶ�����߲�Ӧ��ֻ���Ƿ�0��1�����أ�Ӧ�ð�``�̶�''���ǽ�����
    \begin{itemize}
    \item $x_0$��10/����
    \item $x_1$��150/Ʊ��
    \item $x_2$��Ů�����Ƿ�ϲ����(�������Ҹ�)
    \end{itemize}
\item<2-> ��ģ���У�$x_0$��$x_1$������������$x_2$��һ����ɢ����
\end{itemize}

\visible<2->{
\begin{tikzpicture}

\begin{scope}
\draw [->,thick] (0,0) -- (2.5,0);
\draw [->,thick] (0,0) -- (0, 1.5);
\draw [-,very thick,ublue,domain=0.6:2,samples=100] plot (\x,{ 1/\x - 0.2});
\node [anchor=east] (ylabel) at (0, 3.2em) {\footnotesize{$x_0$}};
\node [anchor=north] (xlabel) at (5em, 0em) {\scriptsize{����(km)}};
\end{scope}

\begin{scope}[xshift=9em]
\draw [->,thick] (0,0) -- (2.5,0);
\draw [->,thick] (0,0) -- (0, 1.5);
\draw [-,very thick,ublue,domain=0.4:2,samples=100] plot (\x,{ 0.5/\x});
\node [anchor=east] (ylabel) at (0, 3.2em) {\footnotesize{$x_1$}};
\node [anchor=north] (xlabel) at (5em, 0em) {\scriptsize{Ʊ��(Ԫ)}};
\end{scope}

\begin{scope}[xshift=18em]
\draw [->,thick] (0,0) -- (2.5,0);
\draw [->,thick] (0,0) -- (0, 1.5);
\node [anchor=east] (ylabel) at (0, 3.2em) {\footnotesize{$x_2$}};
\node [anchor=south, fill=ublue, minimum width=1.5em, minimum height=0.1em, inner sep=0] (histogram1) at (1.5em, 0) {};
\node [anchor=south, fill=ublue, minimum width=1.5em, minimum height=3em, inner sep=0] (histogram2) at (4.0em, 0) {};
\node [anchor=north] (hlabel1) at (histogram1.south) {\tiny{Ů�Ѳ�ȥ}};
\node [anchor=north] (hlabel2) at (histogram2.south) {\tiny{Ů��ȥ}};
\end{scope}

\end{tikzpicture}
}

\begin{itemize}
\item<3-> Ů���Ѻ�ϣ������һ�𣬵��Ǿ糡��20kmԶ����Ʊ��500Ԫ��������\ $x_0 = 10/20 = 0.5$��$x_1=150/500 = 0.3$, $x_2=1$���ۺ�����$\sum_i x_i \cdot w_i \ge \sigma$������{\color{red} ȥ�����ֻ�} :)
    \begin{displaymath}
    \sum_i x_i \cdot w_i = 0.5 \cdot 0.5 + 0.3 \cdot 2 + 1 \cdot 0.5 = 1.35 \ge \sigma = 1
    \end{displaymath}
\end{itemize}

\end{frame}

%%%------------------------------------------------------------------------------------------------------------
%%% ��֪�� - һ������ - ѧϰ
\begin{frame}{һ������ - ѧϰ}

\begin{itemize}
\item һ�γɹ������ֻ�֮�����ƺ�������������ֻҪŮ���ѿ��ľͺã�Ϊ�β���������ص�Ȩ�ص�����򵥵ķ�ʽ�ǰ�$w_0$��$w_1$��Ȩ�ض���0��ͬʱ��$w_3 > 0$
\item<3-> �ܿ�����һ�����ֻᣬ����1000���Ʊ�ۣ�����·�ѣ�3000Ԫ����Ȼ��Ů����һֱ��ϲ�����ֻ�ġ������µľ���ģ�ͣ������޷��˵�\alert{����ȥ��}�ⳡ���ֻ�
\item<4-> \textbf{֮��}����Ů�����ָ�����1����˺���ʹ��������
    \begin{itemize}
    \item \alert{����㷢��}��Ů�Ѽ�Ҫ������ͬʱҲ����
    \end{itemize}
\end{itemize}

\begin{center}
\begin{tikzpicture}
\begin{scope}
\node [anchor=center,circle,draw,ublue,very thick,minimum size=3.5em,fill=white,drop shadow={shadow xshift=0.1em,shadow yshift=-0.1em}] (neuron) at (0,0) {};
\node [anchor=east] (x1) at ([xshift=-6em]neuron.west) {$x_1$:���˳̶�\ \ \ \ };
\node [anchor=center] (x0) at ([yshift=3em]x1.center) {$x_0$:Զ���̶�\ \ \ \ };
\node [anchor=center] (x2) at ([yshift=-3em]x1.center) {$x_2$:Ů��ϲ����};
\node [anchor=west] (y) at ([xshift=2em]neuron.east) {$y$:ȥ�����Dz�ȥ��};

\draw [->,thick] (neuron.east) -- (y.west);

\node [anchor=center] (neuronmath) at (neuron.center) {\small{$\sum \ge \sigma$}};

\visible<1>{
\draw [->,thin] (x0.east) -- (neuron.150) node [pos=0.5,above,yshift=0.2em] {\small{$w_0=.5$}};
\draw [->,line width=0.8mm] (x1.east) -- (neuron.180) node [pos=0.5,above,yshift=-0.1em] {\textbf{\small{$w_1=2$}}};
\draw [->,thin] (x2.east) -- (neuron.210) node [pos=0.5,above,yshift=0.1em] {\small{$w_2=.5$}};
}

\visible<2->{
\draw [->,dotted] (x0.east) -- (neuron.150) node [pos=0.5,above,yshift=0.2em] {\small{$w_0=0$}};
\draw [->,dotted] (x1.east) -- (neuron.180) node [pos=0.5,above,yshift=-0.1em] {\textbf{\small{$w_1=0$}}};
\draw [->,line width=1mm] (x2.east) -- (neuron.210) node [pos=0.5,above,yshift=0.1em] {\small{$w_2=10$}};
}


\end{scope}
\end{tikzpicture}
\end{center}

\end{frame}

%%%------------------------------------------------------------------------------------------------------------
%%% ��֪�� - һ������ - ѧϰ(cont)
\begin{frame}{һ������ - Ȩ��ѧϰ}
\begin{itemize}
\item \textbf{ʹ��˼ʹ}���㷢��ÿ�����ص�Ȩ����Ҫ׼ȷ�����ò��ܴﵽ��õľ���Ч��
    \begin{itemize}
    \item ���ȷ����õ�Ȩ�أ�
    \end{itemize}
\item<2-> \textbf{��Ȼ}������һ������ʵ������
    \begin{itemize}
    \item �����ܼ򵥣����ϵس��ԣ����ݽṹ���ϵص���Ȩ��
    \item<10-> �ڽ����˺ܶ��ʵ��󣬷�������Ժõ�һ��Ȩ��
    \end{itemize}
\end{itemize}

\visible<2->{
\begin{center}
\begin{tikzpicture}

\begin{scope}[scale=0.6]
\visible<3->{
\draw [->,thick] (0,0) -- (2.5,0);
\draw [->,thick] (0,0) -- (0, 1.5);
\draw [-,very thick,ublue,domain=0.6:2,samples=100] plot (\x,{ 1/\x - 0.2});
\node [anchor=east] (ylabel) at (0, 3.2em) {\footnotesize{$x_0$}};
\node [anchor=north] (xlabel) at (5em, 0em) {\tiny{����(km)}};
}

\visible<4->{
\draw [-,thick] (0.25,-1.5) -- (2.25,-1.5);
\node [anchor=east] (wlabel) at (0.25,-1.5) {\footnotesize{$w_0$}};
}

\visible<5>{\node [anchor=north,fill=ugreen,minimum height=0.5em,minimum width=1.5em] (w0) at (1.25,-1.5) {};}
\visible<6>{\node [anchor=north,fill=ugreen,minimum height=0.3em,minimum width=1.5em] (w0) at (1.25,-1.5) {};}
\visible<7>{\node [anchor=north,fill=ugreen,minimum height=1.8em,minimum width=1.5em] (w0) at (1.25,-1.5) {};}
\visible<8>{\node [anchor=north,fill=ugreen,minimum height=0.3em,minimum width=1.5em] (w0) at (1.25,-1.5) {};}
\visible<9>{\node [anchor=north,fill=ugreen,minimum height=0.3em,minimum width=1.5em] (w0) at (1.25,-1.5) {};}
\visible<10->{\node [anchor=north,fill=ugreen,minimum height=0.8em,minimum width=1.5em] (w0) at (1.25,-1.5) {};}

\end{scope}

\begin{scope}[scale=0.6,xshift=12em]
\visible<3->{
\draw [->,thick] (0,0) -- (2.5,0);
\draw [->,thick] (0,0) -- (0, 1.5);
\draw [-,very thick,ublue,domain=0.4:2,samples=100] plot (\x,{ 0.5/\x});
\node [anchor=east] (ylabel) at (0, 3.2em) {\footnotesize{$x_1$}};
\node [anchor=north] (xlabel) at (5em, 0em) {\tiny{Ʊ��(Ԫ)}};
}

\visible<4->{
\draw [-,thick] (0.25,-1.5) -- (2.25,-1.5);
\node [anchor=east] (wlabel) at (0.25,-1.5) {\footnotesize{$w_1$}};
}

\visible<5>{\node [anchor=north,fill=ugreen,minimum height=0.5em,minimum width=1.5em] (w1) at (1.25,-1.5) {};}
\visible<6>{\node [anchor=north,fill=ugreen,minimum height=1.5em,minimum width=1.5em] (w1) at (1.25,-1.5) {};}
\visible<7>{\node [anchor=north,fill=ugreen,minimum height=0.8em,minimum width=1.5em] (w1) at (1.25,-1.5) {};}
\visible<8>{\node [anchor=north,fill=ugreen,minimum height=0.1em,minimum width=1.5em] (w1) at (1.25,-1.5) {};}
\visible<9>{\node [anchor=north,fill=ugreen,minimum height=1.0em,minimum width=1.5em] (w1) at (1.25,-1.5) {};}
\visible<10->{\node [anchor=north,fill=ugreen,minimum height=0.3em,minimum width=1.5em] (w1) at (1.25,-1.5) {};}

\end{scope}

\begin{scope}[scale=0.6,xshift=24em]
\visible<3->{
\draw [->,thick] (0,0) -- (2.5,0);
\draw [->,thick] (0,0) -- (0, 1.5);
\node [anchor=east] (ylabel) at (0, 3.2em) {\footnotesize{$x_2$}};
\node [anchor=south, fill=ublue, minimum width=0.8em, minimum height=0.1em, inner sep=0] (histogram1) at (1.5em, 0) {};
\node [anchor=south, fill=ublue, minimum width=0.8em, minimum height=2em, inner sep=0] (histogram2) at (4.0em, 0) {};
\node [anchor=north,align=left] (hlabel1) at (histogram1.south) {\tiny{��no}};
\node [anchor=north,align=left] (hlabel2) at ([xshift=0.5em]histogram2.south) {\tiny{��yes}};
}

\visible<4->{
\draw [-,thick] (0.25,-1.5) -- (2.25,-1.5);
\node [anchor=east] (wlabel) at (0.25,-1.5) {\footnotesize{$w_2$}};
}

\visible<5>{\node [anchor=north,fill=ugreen,minimum height=0.5em,minimum width=1.5em] (w2) at (1.25,-1.5) {};}
\visible<6>{\node [anchor=north,fill=ugreen,minimum height=1.2em,minimum width=1.5em] (w2) at (1.25,-1.5) {};}
\visible<7>{\node [anchor=north,fill=ugreen,minimum height=0.8em,minimum width=1.5em] (w2) at (1.25,-1.5) {};}
\visible<8>{\node [anchor=north,fill=ugreen,minimum height=1.2em,minimum width=1.5em] (w2) at (1.25,-1.5) {};}
\visible<9>{\node [anchor=north,fill=ugreen,minimum height=1.5em,minimum width=1.5em] (w2) at (1.25,-1.5) {};}
\visible<10->{\node [anchor=north,fill=ugreen,minimum height=1.3em,minimum width=1.5em] (w2) at (1.25,-1.5) {};}

\end{scope}

\end{tikzpicture}
\end{center}

}

\visible<5->{
\begin{center}
\begin{tabular}{c<{\onslide<5->}c<{\onslide<6->}c<{\onslide<7->}c<{\onslide<8->}c<{\onslide<9->}c<{\onslide<10->}c<{\onslide}}
ʵ�� & 1 & 2 & 3 & 4 & ... & 10k \\
��� & ʧ�� & �ɹ� & ʧ�� & ʧ�� & ... & �ɹ�
\end{tabular}
\end{center}
}

\end{frame}

%%%------------------------------------------------------------------------------------------------------------
%%% ��֪�� - һ������ - �ܽ�
\begin{frame}{һ������ - �ܽ�}
\begin{itemize}
\item �������һ�������⣬������һ�ֺ���������׼ȷ�Ľ��о��߲����򵥡����������ģ���У�����һЩ\alert{����}��Ҫ�ش�
    \begin{itemize}
    \item<2-> �����⽨ģ��������������$\{x_i\}$����ʽ
    \item<3-> �����Ч�ľ���ģ�ͣ���������$y$
    \item<4-> ����ģ�����漰�IJ�������Ȩ��$\{w_i\}$��������ֵ
    \end{itemize}
\end{itemize}

\vspace{-2em}
\begin{center}
\begin{tikzpicture}
\begin{scope}
\node [anchor=center,circle,draw,ublue,very thick,minimum size=3.5em,fill=white,drop shadow={shadow xshift=0.1em,shadow yshift=-0.1em}] (neuron) at (0,0) {};

\visible<2->{
\node [anchor=east] (x1) at ([xshift=-6em]neuron.west) {$x_1$:���˳̶�\ \ \ \ };
\node [anchor=center] (x0) at ([yshift=3em]x1.center) {$x_0$:Զ���̶�\ \ \ \ };
\node [anchor=center] (x2) at ([yshift=-3em]x1.center) {$x_2$:Ů��ϲ����};
}

\visible<3->{
\node [anchor=west] (y) at ([xshift=2em]neuron.east) {$y$:ȥ�����Dz�ȥ��};
\node [anchor=center] (neuronmath) at (neuron.center) {\small{$\sum \ge \sigma$}};
}

\draw [->,thick] (neuron.east) -- (y.west);

\draw [->,thick] (x0.east) -- (neuron.150);
\draw [->,thick] (x1.east) -- (neuron.180);
\draw [->,thick] (x2.east) -- (neuron.210);

\visible<4->{
\draw [->,thick] (x0.east) -- (neuron.150) node [pos=0.5,above,yshift=0.2em] {$w_0$};
\draw [->,thick] (x1.east) -- (neuron.180) node [pos=0.5,above,yshift=-0.1em] {$w_1$};
\draw [->,thick] (x2.east) -- (neuron.210) node [pos=0.5,above,yshift=0.1em] {$w_2$};
}

\end{scope}
\end{tikzpicture}
\end{center}

\vspace{-0.5em}

\begin{itemize}
\item<5-> \textbf{��Ȼ}����������ݻ��漰��������⣬���Ҳ�ֹ��Щ :)
\end{itemize}

\end{frame}

%%%------------------------------------------------------------------------------------------------------------
\subsection{�˹���Ԫ}

%%%------------------------------------------------------------------------------------------------------------
%%% outline
\begin{frame}{�����˹�������(���ѧϰ)��������������}

\vspace{1em}

\begin{tcolorbox}[enhanced,size=normal,left=2mm,right=1mm,colback=red!5!white,colframe=red!75!black,drop fuzzy shadow]
{\Large
\textbf{1. �˹�������Ļ�����Ԫ��ʲô,}

\vspace{0.4em}
\textbf{\hspace{0.9em} �����ϳ���ǿ���ģ�ͣ�}
}
\end{tcolorbox}

\vspace{0.5em}

\begin{tcolorbox}[enhanced,size=normal,left=2mm,right=1mm,colback=red!5!white,colframe=red!75!black,drop fuzzy shadow]
{\Large
\textbf{2. �˹����������ѧ������ʲô,}

\vspace{0.4em}
\textbf{\hspace{0.9em} ��α��ʵ��������ѧģ�ͣ�}
}
\end{tcolorbox}

\vspace{0.5em}

\begin{tcolorbox}[enhanced,size=normal,left=2mm,right=1mm,colback=red!5!white,colframe=red!75!black,drop fuzzy shadow]
{\Large
\textbf{3. ��ζ�ģ���еIJ�������ѧϰ,}

\vspace{0.4em}
\textbf{\hspace{0.9em} ֮��ʹ��ѧϰ����ģ�ͽ����ƶϣ�}
}
\end{tcolorbox}

\end{frame}

%%%------------------------------------------------------------------------------------------------------------
%%% outline: problem 1
\begin{frame}{����}

\vspace{6em}

\begin{tcolorbox}[enhanced,size=normal,left=2mm,right=1mm,colback=red!5!white,colframe=red!75!black,drop fuzzy shadow]
{\Large
\textbf{�˹�������Ļ�����Ԫ��ʲô,}

\vspace{0.4em}
\textbf{�����ϳ���ǿ���ģ�ͣ�}
}
\end{tcolorbox}

\vspace{2em}
\begin{center}
\begin{tikzpicture}
\node [fill=blue!10] (label) at (0,0) {\Large{$\textbf{y} = ?(\textbf{x})$ }};
\end{tikzpicture}
\end{center}

\end{frame}

%%%------------------------------------------------------------------------------------------------------------
%%% ���Դ�������
\begin{frame}{Ԥ�� - ���Դ���֪ʶ}
\begin{itemize}
\item \textbf{����}��������$a$��ʾһ������(һ����)���ô���$\textbf{a}$��ʾһ������(������)������$a_{ij}$��ʾ$\textbf{a}$��$i$�С���$j$�е�Ԫ��\\
    \begin{displaymath}
    a = 5 \hspace{3em} \textbf{a} = \begin{pmatrix} a_{11} & a_{12} \\ a_{21} & a_{22} \end{pmatrix} = \begin{pmatrix} 1 & 2 \\ 3 & 4 \end{pmatrix}
    \end{displaymath}
\item \textbf{����}��һ������ľ���ֻ��һ�л���һ�У�����Ĭ��ʹ��������������$\textbf{a} = (a_1,a_2,a_3) = (10, 20, 30)$��$\textbf{a}$��Ӧ����������Ϊ$\textbf{a}^T$
\item<2-> \textbf{��������}��������԰�λ����+��-�ȴ������㣬����$\textbf{a} = \begin{pmatrix} 1 & 2 \\ 3 & 4 \end{pmatrix}$��$\textbf{b} = \begin{pmatrix} 1 & 1 \\ 1 & 1 \end{pmatrix}$����$\textbf{a} + \textbf{b} = \begin{pmatrix} 2 & 3 \\ 4 & 5 \end{pmatrix}$
\item<3-> \textbf{�����΢��}����λ���У����ھ���$\textbf{c}$�ͱ���$x$��
    \begin{displaymath}
    \frac{\partial \textbf{c}}{\partial x} = \begin{pmatrix} \frac{\partial c_{11}}{\partial x} & \frac{\partial c_{12}}{\partial x} \\ \frac{\partial c_{21}}{\partial x} & \frac{\partial c_{22}}{\partial x} \end{pmatrix} \hspace{2em} \frac{\partial x}{\partial \textbf{c}} = \begin{pmatrix} \frac{\partial x}{\partial c_{11}} & \frac{\partial x}{\partial c_{12}} \\ \frac{\partial x}{\partial c_{21}} & \frac{\partial x}{\partial c_{22}} \end{pmatrix}
    \end{displaymath}
\end{itemize}

\end{frame}

%%%------------------------------------------------------------------------------------------------------------
%%% ���Դ�������
\begin{frame}{Ԥ�� - ���Դ���֪ʶ(��)}
\begin{itemize}


\item \textbf{����ij˷�}������$\textbf{a} \in \mathbb{R}^{n \times k}$��$\textbf{b} \in \mathbb{R}^{k \times m}$����$\textbf{c} = \textbf{a} \textbf{b} \in \mathbb{R}^{n \times m}$��ʾ\textbf{a}��\textbf{b}�ľ���˷�������
    \begin{displaymath}
    c_{pq} = \sum_{i = 1}^k a_{pi} b_{iq}
    \end{displaymath}
    ���ڷ���$\left\{ \begin{array}{l} 5x_{1} + 2x_{2} = y_{1} \\ 3x_{1} + x_{2} = y_{2}\end{array} \right.$�����Ա�ʾΪ$\textbf{a} \textbf{x}^T = \textbf{y}^T$ ����$\textbf{a}=\begin{pmatrix} 5 & 2 \\ 3 & 1 \end{pmatrix}$��$\textbf{x}^T =\begin{pmatrix} x_1 \\ x_2 \end{pmatrix}$��$\textbf{y}^T =\begin{pmatrix} y_1 \\ y_2 \end{pmatrix}$
\item<2-> \textbf{����}
    \begin{itemize}
    \item \textbf{��λ����}������$\textbf{I}$��$I_{ij} = 1$���ҽ���$i=j$������$I_{ij} = 0$
    \item \textbf{ת��}��$\textbf{a}$��ת�ü�Ϊ$\textbf{a}^T$����$a^T_{ji}=a_{ij}$
    \item \textbf{�����}������$\textbf{a}$��������Ϊ$\textbf{a}^{-1}$����$\textbf{a} \textbf{a}^{-1} = \textbf{a}^{-1} \textbf{a} = \textbf{I}$
    \item \textbf{����(����)�ķ���}��$||\textbf{a}||_p = \big( \sum_i |a_i|^p \big)^{\frac{1}{p}}$
    \end{itemize}

\end{itemize}
\end{frame}

%%%------------------------------------------------------------------------------------------------------------
%%% �˹���Ԫ�ĺ�����ʽ
\begin{frame}{�˹���Ԫ��һ������}

\begin{itemize}
\item ��Ԫ��
\end{itemize}

\vspace{-1em}
\begin{center}
\begin{tikzpicture}

\node [anchor=center] (y) at (0,0) {\LARGE{$\textbf{y}$}};
\node [anchor=west] (eq) at (y.east) {\LARGE{$=$}};
\node [anchor=west] (func) at (eq.east) {\LARGE{$f$}};
\node [anchor=west] (brace01) at (func.east) {\LARGE{$($}};
\node [anchor=west] (x) at (brace01.east) {\LARGE{$\textbf{x}$}};
\node [anchor=west] (dot) at (x.east) {\LARGE{$\cdot$}};
\node [anchor=west] (w) at (dot.east) {\LARGE{$\textbf{w}$}};
\node [anchor=west] (plus) at (w.east) {\LARGE{$+$}};
\node [anchor=west] (b) at (plus.east) {\LARGE{$\textbf{b}$}};
\node [anchor=west] (brace02) at (b.east) {\LARGE{$)$}};

\visible<2->{
\node [anchor=center,fill=yellow!30] (x2) at (x) {\LARGE{$\textbf{x}$}};
\node [anchor=south] (xlabel) at ([yshift=1.5em]x.north) {����};
\draw [<-] ([yshift=0.2em]x2.north) -- (xlabel.south);
}

\visible<3->{
\node [anchor=center,fill=green!20] (w2) at (w) {\LARGE{$\textbf{w}$}};
\node [anchor=north] (wlabel) at ([yshift=-1.5em]w.south) {����(Ȩ��)};
\draw [<-] ([yshift=-0.2em]w2.south) -- (wlabel.north);
}

\visible<4->{
\node [anchor=center,fill=purple!20] (b2) at (b) {\LARGE{$\textbf{b}$}};
\node [anchor=south] (blabel) at ([yshift=1.3em]b.north) {ƫ��};
\draw [<-] ([yshift=0.2em]b2.north) -- (blabel.south);
}

\visible<5->{
\node [anchor=center,fill=blue!20] (func2) at (func) {\LARGE{$f$}};
\node [anchor=north] (funclabel) at ([yshift=-1.1em]func.south) {�����};
\draw [<-] ([yshift=-0.2em]func2.south) -- (funclabel.north);
}

\visible<6->{
\node [anchor=center,fill=red!20] (y2) at (y) {\LARGE{$\textbf{y}$}};
\node [anchor=south] (ylabel) at ([yshift=1.3em]y.north) {���};
\draw [<-] ([yshift=0.2em]y2.north) -- (ylabel.south);
}

\end{tikzpicture}
\end{center}

\vspace{-1em}
\begin{itemize}
\item<7-> �Ը�֪��Ϊ��
	\begin{itemize}
	\item ���룺$\textbf{x}=(x_0,...,x_n)$
	\item Ȩ�أ�$\textbf{w}=(w_0,...,w_n)$
	\item ƫ�ƣ�$\textbf{b} = (-\sigma)$
	\item �������$f(z)=1$ ��$z \ge 0$, �������$f(z)=0$
	\item �����$\textbf{y}=f(\textbf{x} \cdot \textbf{w} - \sigma)$
	\end{itemize}
\end{itemize}

\end{frame}

%%%------------------------------------------------------------------------------------------------------------
%%% ��ĸ���
\begin{frame}{``��"�ĸ���}
\begin{itemize}
\item ����һ�����⣨��ͬ���룩�����ܻ��ж���������ʱ���԰�\alert{�����ͬ����Ԫ��������}������һ\alert{``��"}
    \begin{itemize}
    \item ���磬����Ԥ����ҪͬʱԤ��ʪ�Ⱥ��¶�
    \end{itemize}
\end{itemize}

\vspace{-2em}

\begin{center}
\begin{tikzpicture}
\begin{scope}

\tikzstyle{neuronnode} = [minimum size=1.5em,circle,draw,ublue,very thick,fill=white,drop shadow={shadow xshift=0.1em,shadow yshift=-0.1em}]

\node [anchor=center,neuronnode] (neuron00) at (0,0) {};
\visible<2->{
\node [anchor=center,neuronnode] (neuron01) at ([yshift=-3em]neuron00) {};
}
\visible<3->{
\node [anchor=center,neuronnode] (neuron02) at ([yshift=-3em]neuron01) {};
}

\node [anchor=east] (x0) at ([xshift=-6em]neuron00.west) {$x_0$};
\node [anchor=east] (x1) at ([xshift=-6em]neuron01.west) {$x_1$};
\node [anchor=east] (x2) at ([xshift=-6em]neuron02.west) {$b$};

\node [anchor=west] (y0) at ([xshift=4em]neuron00.east) {$y_0$};

\draw [->] (x0.east) -- (neuron00.180) node [pos=0.1,above] {\tiny{$w_{00}$}};
\draw [->] (x1.east) -- (neuron00.200) node [pos=0.1,above] {\tiny{$w_{10}$}};
\draw [->] (x2.east) -- (neuron00.220) node [pos=0.05,above,yshift=0.3em] {\tiny{$b_{0}$}};
\draw [->] (neuron00.east) -- (y0.west);

\visible<2->{
\node [anchor=west] (y1) at ([xshift=4em]neuron01.east) {$y_1$};
\draw [->] (x0.east) -- (neuron01.160) node [pos=0.4,above] {\tiny{$w_{01}$}};
\draw [->] (x1.east) -- (neuron01.180) node [pos=0.35,above,yshift=-0.2em] {\tiny{$w_{11}$}};
\draw [->] (x2.east) -- (neuron01.200) node [pos=0.3,below,yshift=0.2em] {\tiny{$b_{1}$}};
\draw [->] (neuron01.east) -- (y1.west);
}

\visible<3->{
\node [anchor=west] (y2) at ([xshift=4em]neuron02.east) {$y_2$};
\draw [->] (x0.east) -- (neuron02.140) node [pos=0.1,below,yshift=-0.2em] {\tiny{$w_{02}$}};
\draw [->] (x1.east) -- (neuron02.160) node [pos=0.1,below] {\tiny{$w_{12}$}};
\draw [->] (x2.east) -- (neuron02.180) node [pos=0.3,below] {\tiny{$b_{2}$}};
\draw [->] (neuron02.east) -- (y2.west);
}

\visible<4->{
\node [anchor=east,align=left] (inputlabel) at ([xshift=-0.1em]x1.west) {��������:\\\small{$\textbf{x}=(x_0,x_1)$}};
}
\visible<5->{
\node [anchor=west,align=left] (outputlabel) at ([xshift=0.1em]y1.east) {�������:\\\small{$\textbf{y}=(y_0,y_1,y_2)$}};
}

\begin{pgfonlayer}{background}
\visible<6->{
\node [rectangle,inner sep=0.4em,fill=red!20] [fit = (neuron00) (neuron01) (neuron02)] (layer) {};
\node [anchor=south] (layerlabel) at ([yshift=0.2em]layer.north) {һ����Ԫ};
}

\visible<4->{
\node [rectangle,inner sep=0.1em,fill=ugreen!20] [fit = (x0) (x1)] (inputshadow) {};
}
\visible<5->{
\node [rectangle,inner sep=0.1em,fill=blue!20] [fit = (y0) (y1) (y2)] (outputshadow) {};
}
\end{pgfonlayer}

\visible<7->{
\node [anchor=north west] (wlabel) at ([yshift=-1em,xshift=-7em]x2.south) {����(����):$\textbf{w} = \Big( \begin{array}{lll} w_{00} & w_{01} & w_{02} \\ w_{10} & w_{11} & w_{12} \end{array} \Big)$};
}
\visible<8->{
\node [anchor=west] (blabel) at (wlabel.east) {����(����):$\textbf{b} = (b_0, b_1, b_2)$};
}

\end{scope}
\end{tikzpicture}
\end{center}

\end{frame}

%%%------------------------------------------------------------------------------------------------------------
%%% �����������
\begin{frame}{�����磺���Ա任 + �����}
\begin{itemize}
\item ��������$\textbf{x} \in \mathbb{R}^m$��һ�����������Ȱ�������\textbf{\alert{���Ա任}}ӳ�䵽$\mathbb{R}^m$��֮�󾭹�\textbf{{\color{blue}�����}}�任��$\textbf{y} \in \mathbb{R}^n$
\end{itemize}

\vspace{1em}

\begin{center}
\begin{tikzpicture}

\node [anchor=center] (y) at (0,0) {\Large{$\textbf{y}$}};
\node [anchor=west] (eq) at (y.east) {\Large{$=$}};
\node [anchor=west] (func) at (eq.east) {\Large{$f$}};
\node [anchor=west] (brace01) at (func.east) {\Large{$($}};
\node [anchor=west] (x) at (brace01.east) {\Large{$\textbf{x}$}};
\node [anchor=west] (dot) at (x.east) {\Large{$\cdot$}};
\node [anchor=west] (w) at (dot.east) {\Large{$\textbf{w}$}};
\node [anchor=west] (plus) at (w.east) {\Large{$+$}};
\node [anchor=west] (b) at (plus.east) {\Large{$\textbf{b}$}};
\node [anchor=west] (brace02) at (b.east) {\Large{$)$}};

\node [anchor=center,fill=blue!20] (func2) at (func) {\LARGE{$f$}};
\node [anchor=north] (funclabel) at ([yshift=-1.1em]func.south) {\blue{�����}};
\draw [<-] ([yshift=-0.2em]func2.south) -- (funclabel.north);

\begin{pgfonlayer}{background}
\node [rectangle,inner sep=0.2em,fill=red!20] [fit = (x) (w) (b)] (linear) {};
\node [anchor=north] (linearlabel) at ([yshift=-1.1em]linear.south) {\alert{���Ա任}};
\draw [<-] ([yshift=-0.2em]linear.south) -- (linearlabel.north);
\end{pgfonlayer}

\end{tikzpicture}

\begin{figure}[htp!]

\includegraphics[scale=0.24]{./Figures/wf.png}
% \begin{tikzpicture}
%     \node [rectangle,inner sep=0.2em,fill=red!20] [fit = (x) (w) (b)] (linear) {};
%     \node [anchor=north] (linearlabel) at ([yshift=-1.1em]linear.south) {\alert{���Ա任}}
\end{figure}
\tikz {\node () at (0,0) {}; \node () at (0,10) {};}
\end{center}
\end{frame}


%%%------------------------------------------------------------------------------------------------------------
%%% ���Ա任
\begin{frame}{���Ա任}
\begin{itemize}
\item �������Կռ�$V$������$\textbf{a}$��$\textbf{b} \in V$�������е�����$\alpha$�����Ա任$T(\cdot)$������
\begin{eqnarray}
T(\textbf{a} + \textbf{b}) & = & T(\textbf{a}) + T(\textbf{b}) \nonumber \\
T(\alpha \textbf{a}) & = & \alpha T(\textbf{a}) \nonumber
\end{eqnarray}
\item<2-> ���Ա任��һ�ּ��ν��ͣ�
\end{itemize}

\vspace{-0.5em}
\visible<2->{
\begin{center}
\begin{tikzpicture}

\node [anchor=west] (x) at (0,0) {\Large{$\textbf{x}$}};
\node [anchor=west] (dot) at (x.east) {\Large{$\cdot$}};
\node [anchor=west] (w) at (dot.east) {\Large{$\textbf{w}$}};
\node [anchor=west] (plus) at (w.east) {\Large{$+$}};
\node [anchor=west] (b) at (plus.east) {\Large{$\textbf{b}$}};

\tikzstyle{neuron} = [rectangle,draw,thick,fill=red!30,red!35,minimum height=2em,minimum width=2em,font=\small]
\node[neuron,anchor=north] (a1) at ([xshift=-6em,yshift=-4em]x.south) {};
\draw[->,thick] ([xshift=-2em,yshift=0em]a1.south) to ([xshift=3em,yshift=0em]a1.south);
\draw[->,thick] ([xshift=0em,yshift=-4em]a1.west) to ([xshift=0em,yshift=2em]a1.west);
\node[below] at ([xshift=0.5em,yshift=-1em]a1.west){0};
\node[below] at ([xshift=2em,yshift=-1em]a1.west){1};
\node[below] at ([xshift=-0.5em,yshift=2em]a1.west){1};
\node [anchor=west] (x) at ([xshift=-0.7em,yshift=1em]a1.south) {\Large{$\textbf{F}$}};

\visible<3->{
\node [anchor=center,fill=green!20] (w2) at (w) {\Large{$\textbf{w}$}};
\node [anchor=north,inner sep=1pt] (wlabel) at ([yshift=-0.7em]w.south) {\small{��ת(rotation)}};
\draw [<-] ([yshift=-0.2em]w2.south) -- (wlabel.north);

\tikzstyle{neuron} = [rectangle,draw,thick,fill=red!30,red!35,minimum height=2em,minimum width=2em,font=\small]
\node[neuron,anchor=north] (a2) at ([xshift=10em,yshift=0em]a1.south) {};
\draw[->,thick] ([xshift=-2em,yshift=0em]a2.north) to ([xshift=3em,yshift=0em]a2.north);
\draw[->,thick] ([xshift=0em,yshift=-2em]a2.west) to ([xshift=0em,yshift=4em]a2.west);
\node[above] at ([xshift=0.5em,yshift=1em]a2.west){0};
\node[above] at ([xshift=2em,yshift=1em]a2.west){1};
\node[below] at ([xshift=-0.5em,yshift=0em]a2.west){-1};
\node [anchor=west] (x) at ([xshift=-3.5cm,yshift=2em]a2.north) {\scriptsize{
    $w=\begin{bmatrix}
    1&0&0\\
    0&-1&0\\
    0&0&1
    \end{bmatrix}$}
    };

\node [anchor=west,rotate = 180] (x) at ([xshift=0.7em,yshift=1em]a2.south) {\Large{$\textbf{F}$}};


\draw[-stealth, line width=2pt,dashed] ([xshift=4em,yshift=0em]a1.south) to ([xshift=-3em,yshift=0em]a2.north);
}

\visible<4->{
\node [anchor=center,fill=purple!20] (b2) at (b) {\Large{$\textbf{b}$}};
\node [anchor=west] (blabel) at ([xshift=1.5em]b2.east) {ƽ��(shift)};
\draw [<-] ([xshift=0.2em]b2.east) -- (blabel.west);

\tikzstyle{neuron} = [rectangle,draw,thick,fill=red!30,red!35,minimum height=2em,minimum width=2em,font=\small]
\node[neuron,anchor=north] (a3) at ([xshift=11em,yshift=2.05em]a2.south) {};
\draw[->,thick] ([xshift=-3em,yshift=0em]a3.north) to ([xshift=2em,yshift=0em]a3.north);
\draw[->,thick] ([xshift=-1em,yshift=-2em]a3.west) to ([xshift=-1em,yshift=4em]a3.west);
\node[above] at ([xshift=-0.5em,yshift=1em]a3.west){0};
\node[above] at ([xshift=1em,yshift=1em]a3.west){1};
\node[left] at ([xshift=-0.75em,yshift=-0.5em]a3.west){-1};
\node [anchor=west,rotate = 180] (x) at ([xshift=0.7em,yshift=1em]a3.south) {\Large{$\textbf{F}$}};


\node [anchor=west] (x) at ([xshift=-4cm,yshift=2em]a3.north) {\scriptsize{
    $b=\begin{bmatrix}
    0.5&0&0\\
    0&0&0\\
    0&0&0
    \end{bmatrix}$}
    };
\draw[-stealth, line width=2pt,dashed] ([xshift=3em,yshift=1em]a2.east) to ([xshift=-3em,yshift=1em]a3.west);
}

\end{tikzpicture}
\end{center}
}


\end{frame}


%%%------------------------------------------------------------------------------------------------------------
%%% ���Ա任�������ӵ�ʵ��
\begin{frame}[fragile]{���Ա任������}
\begin{itemize}
\item ���Ա任Ҳ�����ڸ��Ӹ��ӵ��������Ҳ���������ṩ����ϲ�ͬ���ݷֲ�������
    \begin{itemize}
    \item ���磬���ǿ��԰���άͼ��ͶӰ����άƽ����
    \item �ٱ��磬����Ҳ���԰Ѷ�άƽ���ϵ�ͼ��ӳ�䵽��άƽ��
    \end{itemize}
\end{itemize}
\begin{tiny}
$$
\begin{smallmatrix}  \underbrace{
    \left\{
        \begin{smallmatrix}
            \left[
            \begin{array}{cccc}
             1& 0 &0 \\
             0& 1 &0 \\
             0& 0 &1
            \end{array}
            \right ]
            \cdots
            \left[
            \begin{array}{cccc}
                1& 0 &0 \\
                0& 1 &0 \\
                0& 0 &1
            \end{array}
            \right]
        \end{smallmatrix}
        \right\}
     }\\5
\end{smallmatrix}
\times
\begin{smallmatrix}
\left[
    \begin{array}{cccc}
    1\\
    1\\
    1
    \end{array}
\right ]
\end{smallmatrix}
=
\begin{smallmatrix}  \underbrace{
    \left\{
        \begin{smallmatrix}
            \left[
            \begin{array}{cccc}
             1 \\
             1 \\
             1
            \end{array}
            \right ]
            \cdots
            \left[
            \begin{array}{cccc}
                1 \\
                1 \\
                1
            \end{array}
            \right]
        \end{smallmatrix}
        \right\}
     }\\5
\end{smallmatrix}
$$
\end{tiny}
%\vspace{1em}

\newcommand{\plane}[1]{
(-1.95, #1, 1.35) --
++(3.6, 0.6, 0.0) --
++(0.3, -1.8, -2.7) --
++(-3.6, -0.6, -0.0) --
cycle}
\newcommand{\nullspacepicture}{
% bottom part of the row space line
\draw (0,0,0) -- (0.3,-1.8,1.233);
% five planes
\draw[fill=gray!20]\plane{-0.2};
\draw[fill=gray!20]\plane{0.2};
\draw[fill=blue!70!gray]\plane{0.6};
\draw[fill=gray!20]\plane{1};
\draw[fill=gray!20]\plane{1.4};
% top part of the row space line
\draw (-.094,.562,-.385) -- (-0.3,1.8,-1.233);
}
\newcommand{\rangepicture}[1]{
% axes
\draw[help lines,->] (-2,0) -- (2,0);
\draw[help lines,->] (0,-2) -- (0,2);
% the line and circles
\draw (1,-2) -- (-1,2);
\draw[fill=#1] (0,0) circle (2.5pt);
\draw[fill=gray!50] (0.2,-0.4) circle (2.5pt);
\draw[fill=gray!50] (0.4,-0.8) circle (2.5pt);
\draw[fill=gray!50] (-0.2,0.4) circle (2.5pt);
\draw[fill=gray!50] (-0.4,0.8) circle (2.5pt);
}

\begin{tikzpicture}[scale=0.95]
\centering
\nullspacepicture
% the label
\node at (-2,1.8) {$\mathbb{R}^3$};
% arrow between diagrams
\path[->] (3,0) edge[bend left] node[above] {���Ա任} (4.5,0);
\begin{scope}[xshift=7cm]
\rangepicture{blue!70!gray}
\node at (1.8,1.8) {$\mathbb{R}^2$};
\end{scope}
\end{tikzpicture}
\end{frame}


%%%------------------------------------------------------------------------------------------------------------
%%% �����
\begin{frame}{�����}
\begin{itemize}
\item ������������Ϊ�˽��ʵ�������е�\alert{������}�任
    \begin{itemize}
    \item �����Բ����ṩ��������⺯�����������Ժ���ܣ�
    \end{itemize}
\end{itemize}

\vspace{-1em}
\begin{center}
\begin{tikzpicture}
\begin{scope}
\draw [line width=3pt,ublue,-](0,0) -- (-2.0,1);
\node [anchor=north] (linelabel) at (-1.0,-0.5) {\footnotesize{����һ������}};
\end{scope}

\begin{scope}[xshift=10em]
\draw [line width=3pt,ublue,-,line cap=round](0,0) .. controls (-0.5,-0.25) and (-0.5,1).. (-1.3,0.3) .. controls (-2.3,-0.3) and (-1.1,1.8).. (-2.0,1);
\node [] at (-2,1) {\white{$\cdot$}};
\node [anchor=north] (linelabel) at (-1.0,-0.5) {\footnotesize{����һֻ���}};
\end{scope}
\end{tikzpicture}
\end{center}

\begin{itemize}
\item<2-> �򵥵ķ����Ժ���
\end{itemize}

\vspace{-1em}

\visible<2->{
\begin{center}
\begin{tikzpicture}
\begin{scope}[]
\draw [->,thick] (-1.5,0) -- (1.5,0);
\draw [->,thick] (0,-0.1) -- (0,1.5);
\draw [-,very thick,ublue,domain=-1.2:1.2,samples=100] plot (\x,{0.5 * (\x -0.3)^2 + 0.2});
\node [anchor=west] (ylabel) at (0,1.3) {$y$};
\node [anchor=north] (xlabel) at (1.3,0) {$x$};
\node [anchor=north] (func) at (0,-0.8) {\footnotesize{$y = \frac{1}{2}  (x - 0.3)^2 + 0.2$}};
\node [anchor=south west] (flabel) at (func.north west) {\footnotesize{Quadratic:}};
\end{scope}

\begin{scope}[xshift=9.5em]
\draw [->,thick] (-1.5,0) -- (1.5,0);
\draw [->,thick] (0,-0.1) -- (0,1.5);
\draw [-,very thick,ublue,domain=-1.2:1.2,samples=100] plot (\x, {0.5 * exp(\x)});
\node [anchor=west] (ylabel) at (0,1.3) {$y$};
\node [anchor=north] (xlabel) at (1.3,0) {$x$};
\node [anchor=north] (func) at (0,-0.8) {\footnotesize{$y = 0.5 \cdot  \exp(x)$}};
\node [anchor=south west] (flabel) at ([xshift=-1.8em]func.north west) {\footnotesize{Exponential:}};
\end{scope}

\begin{scope}[xshift=19em]
\draw [->,thick] (-1.5,0) -- (1.5,0);
\draw [->,thick] (0,-0.1) -- (0,1.5);
\draw [-,very thick,ublue,domain=-1.1:1.2,samples=100] plot (\x,{abs(\x -0.2) + 0.1});
\node [anchor=west] (ylabel) at (0,1.3) {$y$};
\node [anchor=north] (xlabel) at (1.3,0) {$x$};
\node [anchor=north] (func) at (0,-0.8) {\footnotesize{$y = |x - 0.3| + 0.1$}};
\node [anchor=south west] (flabel) at ([xshift=-0.4em]func.north west) {\footnotesize{Absolute:}};
\end{scope}
\end{tikzpicture}
\end{center}
}

\end{frame}

%%%------------------------------------------------------------------------------------------------------------
%%% ���õļ����
\begin{frame}{���õļ����}
    \begin{itemize}
    \item �ö�ö࣬�оٲ�ȫ ...
    \end{itemize}
    \vspace{-1em}
    \begin{figure}
    \subfigure[softplus]{
    \centering
    \begin{minipage}{.2\textwidth}
        \begin{tikzpicture}
        \draw[->](-1.2,0)--(1.2,0)node[left,below,font=\tiny]{$x$};
        \draw[->](0,-1.2)--(0,1.2)node[right,font=\tiny]{$y$};
        \foreach \x in {-1.0,-0.5,0.0,0.5,1.0}{\draw(\x,0)--(\x,0.05)node[below,outer sep=2pt,font=\tiny]at(\x,0){\x};}
        \foreach \y in {1.0,0.5}{\draw(0,\y)--(0.05,\y)node[left,outer sep=2pt,font=\tiny]at(0,\y){\y};}
        \draw[color=red ,domain=-1.2:1]plot(\x,{ln(1+(exp(\x))});
        \node[black,anchor=south] at (0,1.2) {\small $y = ln(1+e^x)$};
        \end{tikzpicture}
    \end{minipage}%
    }
    \hfill
    \subfigure[sigmoid]{
    \centering
    \begin{minipage}{.2\textwidth}
        \begin{tikzpicture}
        \draw[->](-1.2,0)--(1.2,0)node[left,below,font=\tiny]{$x$};
        \draw[->](0,-1.2)--(0,1.2)node[right,font=\tiny]{$y$};
        \draw[dashed](-1.2,1)--(1.2,1);
        \foreach \x in {-1,-0.5,0,0.5,1}{\draw(\x,0)--(\x,0.05)node[below,outer sep=2pt,font=\tiny]at(\x,0){
            \pgfmathparse{(\x)*5}
            \pgfmathresult};}
        \foreach \y in {0.5,1.0}{\draw(0,\y)--(0.05,\y)node[left,outer sep=2pt,font=\tiny]at(0,\y){\y};}
        \draw[color=red,domain=-1.2:1.2]plot(\x,{1/(1+(exp(-5*\x)))});
        \node[black,anchor=south] at (0,1.2) {\small $y = \frac{1}{1+e^{-x}}$};
        \end{tikzpicture}
    \end{minipage}%
    }
    \hfill
    \subfigure[tanh]{
    \centering
    \begin{minipage}{.2\textwidth}
        \begin{tikzpicture}
        \draw[->](-1.2,0)--(1.2,0)node[left,below,font=\tiny]{$x$};
        \draw[->](0,-1.2)--(0,1.2)node[right,font=\tiny]{$y$};
        \draw[dashed](-1.2,1)--(1.2,1);
        \draw[dashed](-1.2,-1)--(1.2,-1);
        \foreach \x in {-1.0,-0.5,0.0,0.5,1.0}{\draw(\x,0)--(\x,0.05)node[below,outer sep=2pt,font=\tiny]at(\x,0){\x};}
        \foreach \y in {0.5,1.0}{\draw(0,\y)--(0.05,\y)node[left,outer sep=2pt,font=\tiny]at(0,\y){\y};}
        \draw[color=red ,domain=-1.2:1.2]plot(\x,{tanh(\x)});
        \node[black,anchor=south] at (0,1.2) {\small $y = \frac{e^{x}-e^{-x}}{e^{x}+e^{-x}}$};
        \end{tikzpicture}
    \end{minipage}
    }
    \end{figure}
    \vspace{-1em}
    \begin{figure}
    \subfigure[relu]{
    \centering
    \begin{minipage}{.2\textwidth}
        \begin{tikzpicture}
        \draw[->](-1.2,0)--(1.2,0)node[left,below,font=\tiny]{$x$};
        \draw[->](0,-1.2)--(0,1.2)node[right,font=\tiny]{$y$};
        \draw[dashed](-1.2,1)--(1.2,1);
        \draw[dashed](-1.2,-1)--(1.2,-1);
        \foreach \x in {-1.0,-0.5,0.0,0.5,1.0}{\draw(\x,0)--(\x,0.05)node[below,outer sep=2pt,font=\tiny]at(\x,0){\x};}
        \foreach \y in {0.5,1.0}{\draw(0,\y)--(0.05,\y)node[left,outer sep=2pt,font=\tiny]at(0,\y){\y};}
        \draw[color=red ,domain=-1.2:1.2]plot(\x,{max(\x,0)});
        \node[black,anchor=south] at (0,1.2) {\small $y =\max (0, x)$};
        \end{tikzpicture}
    \end{minipage}%
    }
    \hfill
    \subfigure[gaussian]{
    \centering
    \begin{minipage}{.2\textwidth}
        \begin{tikzpicture}
        \draw[->](-1.2,0)--(1.2,0)node[left,below,font=\tiny]{$x$};
        \draw[->](0,-1.2)--(0,1.2)node[right,font=\tiny]{$y$};
        \draw[dashed](-1.2,1)--(1.2,1);
        \foreach \x in {-1.0,-0.5,0.0,0.5,1.0}{\draw(\x,0)--(\x,0.05)node[below,outer sep=2pt,font=\tiny]at(\x,0){\x};}
        \foreach \y in {0.5,1.0}{\draw(0,\y)--(0.05,\y)node[left,outer sep=2pt,font=\tiny]at(0,\y){\y};}
        \draw[color=red ,domain=-1.2:1.2]plot(\x,{exp(-1*((\x)^2))});
        \node[black,anchor=south] at (0,1.2) {\small $y =e^{-x^2}$};
        \end{tikzpicture}
    \end{minipage}%
    }
    \hfill
    \subfigure[identity]{
    \centering
    \begin{minipage}{.2\textwidth}
        \begin{tikzpicture}
        \draw[->](-1.2,0)--(1.2,0)node[left,below,font=\tiny]{$x$};
        \draw[->](0,-1.2)--(0,1.2)node[right,font=\tiny]{$y$};
        \foreach \x in {-1.0,-0.5,0.0,0.5,1.0}{\draw(\x,0)--(\x,0.05)node[below,outer sep=2pt,font=\tiny]at(\x,0){\x};}
        \foreach \y in {0.5,1.0}{\draw(0,\y)--(0.05,\y)node[left,outer sep=2pt,font=\tiny]at(0,\y){\y};}
        \draw[color=red ,domain=-1:1]plot(\x,\x);
        \node[black,anchor=south] at (0,1.2) {\small $y =x$};
        \end{tikzpicture}
    \end{minipage}
    }

    \end{figure}

\end{frame}


%%%------------------------------------------------------------------------------------------------------------
\subsection{���������}

%%%------------------------------------------------------------------------------------------------------------
%%% һ�� -> ���
\begin{frame}{����IJ�}
\begin{itemize}
\item \textbf{����������}�����Ա任 + ������������ԣ�
\item ���ǿ����ظ�����Ĺ��̣�����\textbf{���������}
\end{itemize}

\vspace{-1.0em}
\begin{center}
\begin{tikzpicture}
\begin{scope}[]

\def\neuronsep{1.6}

\tikzstyle{neuronnode} = [minimum size=1.7em,circle,draw,ublue,very thick,inner sep=1pt, fill=white,align=center,drop shadow={shadow xshift=0.1em,shadow yshift=-0.1em}]

%%% layer 1
\foreach \n in {1,...,5}{
    \node [neuronnode] (neuron0\n) at (\n * \neuronsep,0) {\tiny{$f_1$}\\[-1ex] \tiny{$\sum$}};
    \draw [-,ublue] (neuron0\n.east) -- (neuron0\n.west);
}

\foreach \n in {1,...,5}{
    \foreach \m in {1,...,5}{
        \draw [<-] (neuron0\m.south) -- ([yshift=-2em]neuron0\n.south);
    }
    \node [anchor=north] (x\n) at ([yshift=-2em]neuron0\n.south) {$x_\n$};
    \visible<1>{
    \draw [<-,thick] ([yshift=1.5em]neuron0\n.north) -- (neuron0\n.north);
    \node [anchor=south] (y\n) at ([yshift=1.5em]neuron0\n.north) {$y_\n$};
    }
}

\node [anchor=west] (w1label) at ([xshift=-0.5em,yshift=0.5em]x5.north east) {$\textbf{w}_1$};

\begin{pgfonlayer}{background}
\node [rectangle,inner sep=0.2em,fill=red!20] [fit = (neuron01) (neuron05)] (layer01) {};
\end{pgfonlayer}

\node [anchor=west] (layer00label) at ([xshift=1.25em]x5.east) {\alert{�����}};

\visible<2->{
\node [anchor=west] (layer01label) at ([xshift=1em]layer01.east) {�ڶ���};
}
\visible<4->{
\node [anchor=west] (layer01label2) at (layer01label.east) {(\alert{����})};
}

%%% layer 2
\visible<2->{
\foreach \n in {2,...,4}{
    \node [neuronnode] (neuron1\n) at (\n * \neuronsep,4em) {\tiny{$f_2$}\\[-1ex] \tiny{$\sum$}};
    \draw [-,ublue] (neuron1\n.east) -- (neuron1\n.west);
}

\foreach \n in {2,...,4}{
    \foreach \m in {1,...,5}{
        \draw [<-] (neuron1\n.south) -- (neuron0\m.north);
    }
    \visible<2>{
    \draw [<-,thick] ([yshift=1.5em]neuron1\n.north) -- (neuron1\n.north);
    \node [anchor=south] (y\n) at ([yshift=1.5em]neuron1\n.north) {$y_\n$};
    }
}

\node [anchor=west] (w2label) at ([xshift=-2.5em,yshift=5.0em]x5.north east) {$\textbf{w}_2$};

\begin{pgfonlayer}{background}
\visible<2->{
\node [rectangle,inner sep=0.2em,fill=ugreen!20] [fit = (neuron12) (neuron14)] (layer02) {};
}
\end{pgfonlayer}

\node [anchor=west] (layer02label) at ([xshift=4.9em]layer02.east) {������};
\visible<4->{
\node [anchor=west] (layer02label2) at (layer02label.east) {(\alert{����})};
}
}

%%% layer 3
\visible<3->{
\foreach \n in {1,...,5}{
    \node [neuronnode] (neuron2\n) at (\n * \neuronsep,8em) {\tiny{$f_3$}\\[-1ex] \tiny{$\sum$}};
    \draw [-,ublue] (neuron2\n.east) -- (neuron2\n.west);
}

\foreach \n in {1,...,5}{
    \foreach \m in {2,...,4}{
        \draw [<-] (neuron2\n.south) -- (neuron1\m.north);
    }

    \node [anchor=south] (y\n) at ([yshift=1.5em]neuron2\n.north) {$y_\n$};
    \draw [<-,thick] ([yshift=1.5em]neuron2\n.north) -- (neuron2\n.north);
}

\node [anchor=west] (w3label) at ([xshift=-2.5em,yshift=8.5em]x5.north east) {$\textbf{w}_3$};

\begin{pgfonlayer}{background}
\visible<3->{
\node [rectangle,inner sep=0.2em,fill=blue!20] [fit = (neuron21) (neuron25)] (layer03) {};
}
\end{pgfonlayer}

\node [anchor=west] (layer03label) at ([xshift=1em]layer03.east) {���IJ�};
\visible<4->{
\node [anchor=west] (layer03label2) at (layer03label.east) {(\alert{�����})};
}
}

\end{scope}
\end{tikzpicture}
\end{center}

\end{frame}

%%%------------------------------------------------------------------------------------------------------------
%%% ������������Աƽ��κκ���
\begin{frame}{�����������Աƽ����⺯��}
\begin{itemize}
\item ��һ���򵥵���������Ϊ�������㼤�����sigmoid��
\end{itemize}

\begin{center}
\begin{tikzpicture}

%% a two-layer neural network
\begin{scope}
\tikzstyle{neuronnode} = [minimum size=1.7em,circle,draw,ublue,very thick,inner sep=1pt, fill=white,align=center,drop shadow={shadow xshift=0.1em,shadow yshift=-0.1em}]

%% input and hidden layers
\node [neuronnode] (n10) at (0,0) {\tiny{$f$}\\[-1ex] \tiny{$\sum$}};
\node [neuronnode] (n11) at (1.5,0) {\tiny{$f$}\\[-1ex] \tiny{$\sum$}};
\draw [-,ublue] (n10.west) -- (n10.east);
\draw [-,ublue] (n11.west) -- (n11.east);
\node [anchor=north] (x1) at ([yshift=-6em]n11.south) {$x_1$};
\node [anchor=north] (b) at ([yshift=-6em]n10.south) {$b$};
\visible<1-10>{
\draw [->,thick] (b.north) -- ([yshift=-0.1em]n10.south);
\draw [->,thick] (x1.north) -- ([yshift=-0.1em]n10.290);
}
\visible<1>{
\draw [->,thick] (b.north) -- ([yshift=-0.1em]n11.250);
\draw [->,thick] (x1.north) -- ([yshift=-0.1em]n11.south);
}

\visible<11->{
\draw [->,thick,red] (b.north) -- ([yshift=-0.1em]n10.south);
\draw [->,thick,ugreen] (x1.north) -- ([yshift=-0.1em]n10.290);
}

\visible<2->{
\draw [->,thick,blue] (b.north) -- ([yshift=-0.1em]n11.250);
\draw [->,thick,purple] (x1.north) -- ([yshift=-0.1em]n11.south);
}

\visible<15->{
\node [neuronnode] (n12) at (2.7,0) {\tiny{$f$}\\[-1ex] \tiny{$\sum$}};
\node [neuronnode] (n13) at (3.8,0) {\tiny{$f$}\\[-1ex] \tiny{$\sum$}};
\draw [-,ublue] (n12.west) -- (n12.east);
\draw [-,ublue] (n13.west) -- (n13.east);
\draw [->,thick] (b.north) -- ([yshift=-0.1em]n12.250);
\draw [->,thick] (x1.north) -- ([yshift=-0.1em]n12.270);
\draw [->,thick] (b.north) -- ([yshift=-0.1em]n13.230);
\draw [->,thick] (x1.north) -- ([yshift=-0.1em]n13.250);
}

\visible<16->{
\node [anchor=west] (morenodes) at (n13.east) {...};
}

%% output layers
\node [neuronnode] (n20) at (0.75,5em) {\scriptsize{$\sum$}};
\visible<1-10>{\draw [->,thick] ([yshift=0.1em]n10.north) -- ([yshift=-0.1em]n20.250);}
\visible<1-8>{\draw [->,thick] ([yshift=0.1em]n11.north) -- ([yshift=-0.1em]n20.290);}

\visible<11->{\draw [->,thick,brown] ([yshift=0.1em]n10.north) -- ([yshift=-0.1em]n20.250);}
\visible<9->{\draw [->,thick,orange] ([yshift=0.1em]n11.north) -- ([yshift=-0.1em]n20.290);}

\node [] (y) at ([yshift=3em]n20.north) {$y$};
\draw [->,thick] ([yshift=0.1em]n20.north) -- (y.south);

\visible<15->{
\draw [->,thick] ([yshift=0.1em]n12.north) -- ([yshift=-0.1em]n20.310);
\draw [->,thick] ([yshift=0.1em]n13.north) -- ([yshift=-0.1em]n20.330);
}

%% weight and bias
\visible<11->{\node [anchor=center,rotate=90,fill=white,inner sep=1pt] (b0) at ([yshift=3em,xshift=-0.5em]b.north) {\tiny{$b=-6$}};}
\visible<11->{\node [anchor=center,rotate=-59,fill=white,inner sep=1pt] (w2) at ([yshift=1.2em,xshift=-1.2em]x1.north) {\tiny{$w=100$}};}

\visible<2-6>{\node [anchor=center,rotate=59,fill=white,inner sep=1pt] (b1) at ([yshift=4.9em,xshift=2.2em]b.north) {\tiny{$b=0$}};}
\visible<7>{\node [anchor=center,rotate=59,fill=white,inner sep=1pt] (b1) at ([yshift=4.9em,xshift=2.2em]b.north) {\tiny{$b=-2$}};}
\visible<8->{\node [anchor=center,rotate=59,fill=white,inner sep=1pt] (b1) at ([yshift=4.9em,xshift=2.2em]b.north) {\tiny{$b=-4$}};}
\visible<2-4>{\node [anchor=center,rotate=90,fill=white,inner sep=1pt] (w1) at ([yshift=3em,xshift=0.5em]x1.north) {\tiny{$w=1$}};}
\visible<5>{\node [anchor=center,rotate=90,fill=white,inner sep=1pt] (w1) at ([yshift=3em,xshift=0.5em]x1.north) {\tiny{$w=10$}};}
\visible<6->{\node [anchor=center,rotate=90,fill=white,inner sep=1pt] (w1) at ([yshift=3em,xshift=0.5em]x1.north) {\tiny{$w=100$}};}

\visible<11>{\node [anchor=center,rotate=62,fill=white,inner sep=1pt] (w21) at ([yshift=2em,xshift=0.5em]n10.north) {\tiny{$w'=0.7$}};}
\visible<12->{\node [anchor=center,rotate=62,fill=white,inner sep=1pt] (w21) at ([yshift=2em,xshift=0.5em]n10.north) {\tiny{$w'=-0.7$}};}

\visible<2-8>{\node [anchor=center,rotate=-62,fill=white,inner sep=1pt] (w22) at ([yshift=2em,xshift=-0.5em]n11.north) {\tiny{$w'=1$}};}
\visible<9>{\node [anchor=center,rotate=-62,fill=white,inner sep=1pt] (w22) at ([yshift=2em,xshift=-0.5em]n11.north) {\tiny{$w'=0.9$}};}
\visible<10->{\node [anchor=center,rotate=-62,fill=white,inner sep=1pt] (w22) at ([yshift=2em,xshift=-0.5em]n11.north) {\tiny{$w'=0.7$}};}


%% sigmoid box
\begin{scope}
\visible<3->{
\node [anchor=west] (flabel) at ([xshift=1.2in]y.east) {\footnotesize{sigmoid:}};
\node [anchor=north east] (slabel) at ([xshift=0]flabel.south east) {\footnotesize{sum:}};

\node [anchor=west,inner sep=2pt] (flabel2) at (flabel.east) {\footnotesize{$f(s)=1/(1+e^{-s})$}};
\node [anchor=west,inner sep=2pt] (flabel3) at (slabel.east) {\footnotesize{$s=x_1 \cdot w + b$}};
\draw [->,thick,dotted] ([yshift=-0.3em,xshift=-0.1em]n11.60)  .. controls +(east:1) and +(west:2) ..  ([xshift=-0.2em]flabel.west) ;

\begin{pgfonlayer}{background}
\visible<3->{
\node [rectangle,inner sep=0.2em,fill=blue!20,drop shadow={shadow xshift=0.1em,shadow yshift=-0.1em}] [fit = (flabel) (flabel2) (flabel3)] (funcbox) {};
}
\end{pgfonlayer}
}
\end{scope}

%% output illustration
\begin{scope}[xshift=2.8in,yshift=0.1in]
\visible<4->{
\draw [->,thick] (-2.2,0) -- (2.2,0);
\draw [->,thick] (0,0) -- (0,2);
\draw [-] (-0.05,1) -- (0.05,1);
\node [anchor=east,inner sep=1pt] (label1) at (0,1) {\tiny{1}};
\node [anchor=south east,inner sep=1pt] (label2) at (0,0) {\tiny{0}};
}

\visible<4>{\draw [-,very thick,ublue,domain=-2:2,samples=100] plot (\x,{1/(1+exp(-2*\x))});}
\visible<5>{\draw [-,very thick,ublue,domain=-2:2,samples=100] plot (\x,{1/(1+exp(-4*\x))});}
\visible<6>{\draw [-,very thick,ublue,rounded corners=0.1em] (-2,0) -- (0,0) -- (0,1) -- (2,1);}
\visible<7>{\draw [-,very thick,ublue,rounded corners=0.1em] (-2,0) -- (0.25,0) -- (0.25,1) -- (2,1);}
\visible<8>{\draw [-,very thick,ublue,rounded corners=0.1em] (-2,0) -- (0.5,0) -- (0.5,1) -- (2,1);}
\visible<9>{\draw [-,very thick,ublue,rounded corners=0.1em] (-2,0) -- (0.5,0) -- (0.5,0.9) -- (2,0.9);}
\visible<10>{\draw [-,very thick,ublue,rounded corners=0.1em] (-2,0) -- (0.5,0) -- (0.5,0.7) -- (2,0.7);}
\visible<11>{\draw [-,very thick,ublue,rounded corners=0.1em] (-2,0) -- (0.5,0) -- (0.5,0.7) -- (0.7,0.7) -- (0.7,1.4) -- (2,1.4);}
\visible<12->{\draw [-,very thick,ublue,rounded corners=0.1em] (-2,0) -- (0.5,0) -- (0.5,0.7) -- (0.7,0.7) -- (0.7,0) -- (2,0);}
\visible<15->{\draw [-,very thick,ublue,rounded corners=0.1em] (-2,0) -- (0.7,0) -- (0.7,0.6) -- (0.9,0.6) -- (0.9,0) -- (2,0);}

\visible<14>{\draw [->,dashed] (0.6,-0.05) -- (0.6,-0.96in);}
\visible<15->{\draw [->,dashed] (0.8,-0.05) -- (0.8,-0.98in);}

\visible<4>{\node [anchor=north west,align=left] (wblabel) at (-2,2) {\scriptsize{$w_1=1$}\\[-0ex] \scriptsize{\ $b_1=0$}};}
\visible<5>{\node [anchor=north west,align=left] (wblabel) at (-2,2) {\alert{\scriptsize{$w_1=10$}}\\[-0ex] \scriptsize{\ $b_1=0$}};}
\visible<6>{\node [anchor=north west,align=left] (wblabel) at (-2,2) {\alert{\scriptsize{$w_1=100$}}\\[-0ex] \scriptsize{\ $b_1=0$}};}
\visible<7>{\node [anchor=north west,align=left] (wblabel) at (-2,2) {\scriptsize{$w_1=100$}\\[-0ex] \alert{\scriptsize{\ $b_1=-2$}}};}
\visible<8>{\node [anchor=north west,align=left] (wblabel) at (-2,2) {\scriptsize{$w_1=100$}\\[-0ex] \alert{\scriptsize{\ $b_1=-4$}}};}
\visible<9>{\node [anchor=north west,align=left] (wblabel) at (-2,2) {\alert{\scriptsize{$w'_1=0.9$}}};}
\visible<10>{\node [anchor=north west,align=left] (wblabel) at (-2,2) {\alert{\scriptsize{$w'_1=0.7$}}};}
\visible<11>{\node [anchor=north west,align=left] (wblabel) at (-2,2) {\alert{\scriptsize{$w_2=100$}}\\[-0ex] \alert{\scriptsize{\ $b_2=-6$}}\\[-0ex] \alert{\scriptsize{\ $w'_2=0.7$}}};}
\visible<12>{\node [anchor=north west,align=left] (wblabel) at (-2,2) {\scriptsize{$w_2=100$}\\[-0ex] \scriptsize{\ $b_2=-6$}\\[-0ex] \alert{\scriptsize{\ $w'_2=-0.7$}}};}
\visible<13->{\node [anchor=north west,align=left] (wblabel) at (-2.5,2) {\scriptsize{����һ��}\\[-1ex] \scriptsize{step function}};}
\end{scope}

\begin{scope}[xshift=2.8in,yshift=-1.2in]

\visible<13->{
\draw [->,thick] (-2.2,0) -- (2.2,0);
\draw [->,thick] (0,0) -- (0,2);
\draw [-,very thick,red,domain=-1.98:2,samples=100] plot (\x,{0.2 * (\x +0.4)^3 + 1.2 - 0.3 *(\x + 0.8)^2});
}

\visible<14->{
\foreach \n in {0.5}{
    \pgfmathsetmacro{\result}{0.2 * (\n + 0.1 + 0.4)^3 + 1.2 - 0.3 *(\n + 0.1 + 0.8)^2};
    \draw [-,ublue,thick] (\n,0) -- (\n, \result) -- (\n + 0.2, \result) -- (\n + 0.2, 0);
}
}

\visible<15->{
\foreach \n in {0.7}{
    \pgfmathsetmacro{\result}{0.2 * (\n + 0.1 + 0.4)^3 + 1.2 - 0.3 *(\n + 0.1 + 0.8)^2};
    \draw [-,ublue,thick] (\n,0) -- (\n, \result) -- (\n + 0.2, \result) -- (\n + 0.2, 0);
}
}

\visible<16->{
\foreach \n in {-1.9,-1.7,...,1.9}{
    \pgfmathsetmacro{\result}{0.2 * (\n + 0.1 + 0.4)^3 + 1.2 - 0.3 *(\n + 0.1 + 0.8)^2};
    \draw [-,ublue,thick] (\n,0) -- (\n, \result) -- (\n + 0.2, \result) -- (\n + 0.2, 0);
}
}

\visible<14>{\node [anchor=north west,align=left] (wblabel) at (-2.5,2.5) {\scriptsize{������ÿһ�ζ���}\\[-1ex] \scriptsize{��step function}\\[-1ex] \scriptsize{����}};}
\visible<15>{\node [anchor=north west,align=left] (wblabel) at (-2.5,2.5) {\scriptsize{���������Ԫ}\\[-1ex] \scriptsize{������ϸ����}\\[-1ex] \scriptsize{����}};}
\visible<16>{\node [anchor=north west,align=left] (wblabel) at (-2.5,2.5) {\scriptsize{�������㹻���}\\[-1ex] \scriptsize{������Ԫ����}\\[-1ex] \scriptsize{���\alert{���⺯��}}};}

\end{scope}

\end{scope}

\end{tikzpicture}
\end{center}
\end{frame}

%%%------------------------------------------------------------------------------------------------------------
\subsection{������ļ�ʵ�� - ��������}

%%%------------------------------------------------------------------------------------------------------------
%%% outline: problem 2
\begin{frame}{Ȼ��}

\vspace{6em}
\begin{tcolorbox}[enhanced,size=normal,left=2mm,right=1mm,colback=red!5!white,colframe=red!75!black,drop fuzzy shadow]
{\Large
\textbf{�˹����������ѧ������ʲô,}

\vspace{0.4em}
\textbf{��α��ʵ��������ѧģ�ͣ�}
}
\end{tcolorbox}

\vspace{1em}
\begin{center}
\begin{tikzpicture}
\begin{scope}[yshift=6.5em,xshift=1em]
\setcounter{mycount1}{1}
\draw[step=0.5cm,color=orange,thick] (-1,-1) grid (0.5,0.5);
\foreach \y in {+0.25,-0.25,-0.75}
  \foreach \x in {-0.75,-0.25,0.25}{
    \node [fill=orange!20,inner sep=0pt,minimum height=0.49cm,minimum width=0.49cm] at (\x,\y) {\number\value{mycount1}};
    \addtocounter{mycount1}{1};
  }
\end{scope}

\begin{scope}[yshift=6em,xshift=0.5em]
\setcounter{mycount2}{2}
\draw[step=0.5cm,color=blue,thick] (-1,-1) grid (0.5,0.5);
\foreach \y in {+0.25,-0.25,-0.75}
  \foreach \x in {-0.75,-0.25,0.25}{
    \node [fill=blue!20,inner sep=0pt,minimum height=0.49cm,minimum width=0.49cm] at (\x,\y) {\number\value{mycount2}};
    \addtocounter{mycount2}{1};
  }
\end{scope}

\begin{scope}[yshift=5.5em,xshift=0em]
\setcounter{mycount3}{3}
\draw[step=0.5cm,color=ugreen,thick] (-1,-1) grid (0.5,0.5);
\foreach \y in {+0.25,-0.25,-0.75}
  \foreach \x in {-0.75,-0.25,0.25}{
    \node [fill=green!20,inner sep=0pt,minimum height=0.49cm,minimum width=0.49cm] at (\x,\y) {\number\value{mycount3}};
    \addtocounter{mycount3}{1};
  }
\end{scope}

\begin{scope}[yshift=5em,xshift=-0.5em]
\setcounter{mycount4}{4}
\draw[step=0.5cm,color=red,thick] (-1,-1) grid (0.5,0.5);
\foreach \y in {+0.25,-0.25,-0.75}
  \foreach \x in {-0.75,-0.25,0.25}{
    \node [fill=red!20,inner sep=0pt,minimum height=0.49cm,minimum width=0.49cm] at (\x,\y) {\number\value{mycount4}};
    \addtocounter{mycount4}{1};
  }
\end{scope}
\end{tikzpicture}
\end{center}

\end{frame}

%%%------------------------------------------------------------------------------------------------------------
%%% ����
\begin{frame}{������������� - ��������}
\begin{itemize}
\item ���������磬����$\textbf{x}$�����$\textbf{y}$����ʽ��������������
\end{itemize}

\begin{center}
\begin{tikzpicture}

\node [anchor=center] (y) at (0,0) {\LARGE{$\textbf{y}$}};
\node [anchor=west] (eq) at (y.east) {\LARGE{$=$}};
\node [anchor=west] (func) at (eq.east) {\LARGE{$f$}};
\node [anchor=west] (brace01) at (func.east) {\LARGE{$($}};
\node [anchor=west] (x) at (brace01.east) {\LARGE{$\textbf{x}$}};
\node [anchor=west] (dot) at (x.east) {\LARGE{$\cdot$}};
\node [anchor=west] (w) at (dot.east) {\LARGE{$\textbf{w}$}};
\node [anchor=west] (plus) at (w.east) {\LARGE{$+$}};
\node [anchor=west] (b) at (plus.east) {\LARGE{$\textbf{b}$}};
\node [anchor=west] (brace02) at (b.east) {\LARGE{$)$}};

\visible<2->{
\node [anchor=center,fill=yellow!30] (x2) at (x) {\LARGE{$\textbf{x}$}};
\node [anchor=south] (xlabel) at ([xshift=-3em,yshift=1.5em]x.north) {\alert{����������...}};
\draw [<-] ([yshift=0.2em,xshift=-0.5em]x2.north) -- ([xshift=1em]xlabel.south);

\node [anchor=center,fill=red!20] (y2) at (y) {\LARGE{$\textbf{y}$}};
\draw [<-] ([yshift=0.2em,xshift=0.5em]y2.north) -- ([xshift=-1em]xlabel.south);

\node [anchor=center,fill=green!20] (w2) at (w) {\LARGE{$\textbf{w}$}};
\node [anchor=north] (wlabel) at ([yshift=-1.0em]w.south) {���� e.g.,};
\draw [<-] ([yshift=-0.2em]w2.south) -- (wlabel.north);
\node [anchor=west] (wsample) at ([xshift=-0.5em]wlabel.east) {\footnotesize{$\left(\begin{array}{c c} 1 & 2 \\ 3 & 4 \end{array}\right)$}};

\node [anchor=center,fill=purple!20] (b2) at (b) {\LARGE{$\textbf{b}$}};
\node [anchor=south] (blabel) at ([yshift=1.3em]b.north) {���� e.g.,};
\draw [<-] ([yshift=0.2em]b2.north) -- (blabel.south);
\node [anchor=west] (bsample) at ([xshift=-0.5em]blabel.east) {\footnotesize{$(1, 3)$}};
}

\end{tikzpicture}
\end{center}

\begin{itemize}
\item<3-> $\textbf{x}$��$\textbf{y}$ʵ������һ����tensor�Ķ�������\textbf{����}�����磬
\end{itemize}

\begin{center}
\begin{tikzpicture}
\begin{scope}
\visible<4->{\node [anchor=west] (vector) at (0,0) {$\textbf{x} = (1,  3)$};}
\visible<5->{\node [anchor=west] (matrix) at ([xshift=0.1in]vector.east) {$\textbf{x} = \left(\begin{array}{c c} -1 & 3 \\ 0.2 & 2 \end{array}\right)$};}
\visible<6->{\node [anchor=west] (tensor3d) at ([xshift=0.1in]matrix.east) {ɶ��$\textbf{x} = \left(\begin{array}{c} \left(\begin{array}{c c} -1 & 3 \\ 0.2 & 2 \end{array}\right) \\ \left(\begin{array}{c c} -1 & 3 \\ 0.2 & 2 \end{array}\right) \end{array}\right)$};}
\end{scope}
\end{tikzpicture}
\end{center}

\end{frame}

%%%------------------------------------------------------------------------------------------------------------
%%% �����ļ򵥶���
\begin{frame}{������ʲô}
\begin{itemize}
\item \textbf{���ѧϰ}�У�������``��"�ض���Ϊ\alert{��ά����}
    \begin{itemize}
    \item �����Ľף�rank����ʾ�ж��ٸ������ķ���ÿ����������ɶ��ά�ȱ�ʾ
    \end{itemize}
\end{itemize}

\begin{center}
\begin{tikzpicture}

\begin{scope}
\visible<2->{
\node [anchor=north] (label) at (0,0) {����};
\node [anchor=center] (label2) at ([yshift=-0.7em]label.south) {scalar};
\node [anchor=center] (rank) at ([yshift=-1.5em]label2.center) {(rank=0)};
\node [anchor=center] (scalar) at ([yshift=5em]label.north) {\Huge{3}};
}
\end{scope}

\begin{scope}[xshift=1in]
\visible<3->{
\node [anchor=north] (label) at (0,0) {����};
\node [anchor=center] (label2) at ([yshift=-0.7em]label.south) {vector};
\node [anchor=center] (rank) at ([yshift=-1.5em]label2.center) {(rank=1)};
\node [anchor=center] (scalar) at ([yshift=5em]label.north) {$\begin{pmatrix} 2 \\ .3 \\ -8 \\ .2\end{pmatrix}$};
}
\end{scope}

\begin{scope}[xshift=2in]
\visible<4->{
\node [anchor=north] (label) at (0,0) {����};
\node [anchor=center] (label2) at ([yshift=-0.7em]label.south) {matrix};
\node [anchor=center] (rank) at ([yshift=-1.5em]label2.center) {(rank=2)};
\node [anchor=center] (scalar) at ([yshift=5em]label.north) {$\begin{pmatrix} 1 & 1 & 9 \\ 1 & 0 & 0 \\ 1 & -4 & 7 \end{pmatrix}$};
}
\end{scope}

\begin{scope}[xshift=3.2in]
\visible<5->{
\node [anchor=north] (label) at (0,0) {3������};
\node [anchor=center] (label2) at ([yshift=-0.7em]label.south) {tensor};
\node [anchor=center] (rank) at ([yshift=-1.5em]label2.center) {(rank=3)};
}
\begin{scope}[yshift=6.5em,xshift=1em]
\visible<5->{
\setcounter{mycount1}{1}
\draw[step=0.5cm,color=orange,thick] (-1,-1) grid (1,1);
\foreach \y in {+0.75,+0.25,-0.25,-0.75}
  \foreach \x in {-0.75,-0.25,0.25,0.75}{
    \node [fill=orange!20,inner sep=0pt,minimum height=0.49cm,minimum width=0.49cm] at (\x,\y) {\number\value{mycount1}};
    \addtocounter{mycount1}{1};
  }
}
\end{scope}

\begin{scope}[yshift=6em,xshift=0.5em]
\visible<5->{
\setcounter{mycount2}{1}
\draw[step=0.5cm,color=blue,thick] (-1,-1) grid (1,1);
\foreach \y in {+0.75,+0.25,-0.25,-0.75}
  \foreach \x in {-0.75,-0.25,0.25,0.75}{
    \node [fill=blue!20,inner sep=0pt,minimum height=0.49cm,minimum width=0.49cm] at (\x,\y) {\number\value{mycount2}};
    \addtocounter{mycount2}{1};
  }
}
\end{scope}

\begin{scope}[yshift=5.5em,xshift=0em]
\visible<5->{
\setcounter{mycount3}{1}
\draw[step=0.5cm,color=ugreen,thick] (-1,-1) grid (1,1);
\foreach \y in {+0.75,+0.25,-0.25,-0.75}
  \foreach \x in {-0.75,-0.25,0.25,0.75}{
    \node [fill=green!20,inner sep=0pt,minimum height=0.49cm,minimum width=0.49cm] at (\x,\y) {\number\value{mycount3}};
    \addtocounter{mycount3}{1};
  }
}
\end{scope}

\begin{scope}[yshift=5em,xshift=-0.5em]
\visible<5->{
\setcounter{mycount4}{1}
\draw[step=0.5cm,color=red,thick] (-1,-1) grid (1,1);
\foreach \y in {+0.75,+0.25,-0.25,-0.75}
  \foreach \x in {-0.75,-0.25,0.25,0.75}{
    \node [fill=red!20,inner sep=0pt,minimum height=0.49cm,minimum width=0.49cm] at (\x,\y) {\number\value{mycount4}};
    \addtocounter{mycount4}{1};
  }
}
\end{scope}

\end{scope}
\end{tikzpicture}
\end{center}

\end{frame}

%%%------------------------------------------------------------------------------------------------------------
%%% ������һ����ά���Ժ���
\begin{frame}{��ʵ�ϣ��������Ǽ򵥵Ķ�ά���� - ��ţ��˽��� :)}
\begin{itemize}
\item \textbf{�dz������ε�˵}������\alert{����}�����;���ļ���չ������˵����ά����\alert{Ҳ����}����������ı�����ʽ
\item<2-> �ϸ������ϣ������ǣ�
    \begin{enumerate}
    \item<2-> \textbf{�������Ķ���}������������ϵ�ı�ʱ����һ������ת����ϵ�ij����������һ���������ϵ������任���仯�ļ����������ζ��壩
    \item<3-> \textbf{���ǿ������Ķ���}������������Э����ͨ�������˷�����������������壩
    \item<4-> \textbf{�����Խ��͵Ķ���}��\alert{�����Ƕ������Ժ���}���Ƕ�����һЩ�����ռ�͵ѿ������ϵĶ�������ӳ��
        \begin{itemize}
        \item ������Ϊ$T(v_0,...,v_r)$������������$r$������$\{v_0,...,v_r\}$
        \item ����������ָ������ÿ�����룬�����������Եģ����磬����һ��$v_i$��������
        \vspace{-0.3em}
        \begin{displaymath}
        T(v_0,...,v_i+c \cdot u,...,v_r) = T(v_0,...,v_i,...,v_r) + c \cdot T(v_0,...,u,...,v_r)
        \end{displaymath}
        ���У�$c$Ϊ��������������ʷdz���Ҫ���������Ƶ���ǰ����������塣
        \end{itemize}
    \end{enumerate}
\end{itemize}
\end{frame}

%%%------------------------------------------------------------------------------------------------------------
%%% ��һ������һ�������Ķ���
\begin{frame}{������һ��``��''������``����''}
\begin{itemize}
\item ������һ�£�
    \begin{itemize}
    \item ���һ�����������������ij��λ����ֻ��һ����ֵ�������DZ����������ܶ�
    \item �������ͬһ��λ�á��Ӷ���ķ����Ͽ����в�ͬ��ֵ�����������ǡ���þ���˹۲췽�������������������(rank$>$1)������Ӧ������
    \end{itemize}
\end{itemize}

\vspace{-0.8em}
\begin{center}
\tdplotsetmaincoords{50}{140}
\begin{tikzpicture}[scale=2,tdplot_main_coords]
\visible<3->{
\draw[thick,->] (0,0,0) -- (1,0,0) node[anchor=north east]{$a$};
\draw[thick,->] (0,0,0) -- (0,1,0) node[anchor=north west]{$b$};
\draw[thick,->] (0,0,0) -- (0,0,1) node[anchor=south]{$c$};
}
\pgfmathsetmacro{\ax}{2}
\pgfmathsetmacro{\ay}{2}
\pgfmathsetmacro{\az}{1}
\tdplotsetrotatedcoords{20}{40}{00}
\visible<4->{
\draw[thick,color=red,tdplot_rotated_coords,->] (0,0,0)
        -- (.7,0,0) node[anchor=east]{$a'$};
\draw[thick,color=green!50!black,tdplot_rotated_coords,->] (0,0,0)
        -- (0,.7,0) node[anchor=west]{$b'$};
\draw[thick,color=blue,tdplot_rotated_coords,->] (0,0,0)
        -- (0,0,.7) node[anchor=south]{$c'$};
}
\tdplottransformmainrot{\ax}{\ay}{\az}

\visible<3->{\node [anchor=west,inner sep=2pt] (coord1) at (-0.40in,-0.4in) {\footnotesize{����$v=(a,b,c)$}};}
\visible<4->{\node [anchor=north west,inner sep=2pt] (coord2) at (coord1.south west) {\footnotesize{����$u=(\red{a'}\black{,}{\color{ugreen} b'}\black{,}\blue{c'}\black{)}$}};}

\begin{scope}[xshift=0.4in,yshift=0.35in]
\visible<2->{
\node [anchor=west,inner sep = 2pt] (description) at (0,0) {\small{$T(v,u)$��һ����ά�ռ�$(x,y,z)$�ϵ�}};
\node [anchor=north west,inner sep = 2pt] (description2) at (description.south west) {\small{2������������$v$��$u$����������}};
}

\visible<5->{
\node [anchor=north west,inner sep=2pt] (T) at ([yshift=-2em]description2.south west) {\small{$T(v,u)=$}};
\node [anchor=west,inner sep=1pt] (T2) at (T.east) {\footnotesize{$\begin{pmatrix} v_x \\ v_y \\ v_z \end{pmatrix}^T$}};
\node [anchor=west,inner sep=1pt] (T3) at ([xshift=2pt]T2.east) {\footnotesize{$\begin{pmatrix} T_{xx} & T_{xy} & T_{xz} \\ T_{yx} & T_{yy} & T_{yz} \\ T_{zx} & T_{zy} & T_{zz} \end{pmatrix}$}};
\node [anchor=west,inner sep=1pt] (T4) at ([xshift=2pt]T3.east) {\footnotesize{$\begin{pmatrix} u_x \\ u_y \\ u_z \end{pmatrix}$}};
}
\begin{pgfonlayer}{background}
\visible<7->{
\node [rectangle,inner sep=0pt,fill=red!20,minimum height=3.5em,minimum width=7em] [fit = (T3) ] (TBox) {};
}
\visible<6->{
\node [rectangle,inner sep=0pt,fill=green!20,minimum height=3.5em,minimum width=3em] [fit = (T2) ] (VBox) {};
\node [rectangle,inner sep=0pt,fill=blue!20,minimum height=3.5em,minimum width=2.5em] [fit = (T4) ] (UBox) {};
}
\end{pgfonlayer}

\visible<6->{
\draw [<-] (VBox.north) -- ([yshift=0.3em]VBox.north);
\node [anchor=south,align=left] (Vlabel) at ([yshift=0.3em]VBox.north) {\scriptsize{$v$�ڻ������ϵ�ͶӰ}};
\draw [<-] (UBox.north) -- ([yshift=0.3em]UBox.north);
\node [anchor=south,align=left] (Ulabel) at ([yshift=0.3em,xshift=-1em]UBox.north) {\scriptsize{$u$�ڻ������ϵ�ͶӰ}};
}
\visible<7->{
\draw [<-] (TBox.south) -- ([yshift=-0.3em]TBox.south);
\node [anchor=north,align=left] (Vlabel) at ([xshift=-0.5em,yshift=-0.3em]TBox.south) {\scriptsize{������$3 \times 3$�������ϵķ�����ǡ����``����''��ʾ��}};
\node [anchor=north west,align=left] (Vlabel2) at ([yshift=0.2em]Vlabel.south west) {\scriptsize{��Ϊ$[T]$������һ������ϵ����ת}};
}
\end{scope}
\end{tikzpicture}
\end{center}

\end{frame}

%%%------------------------------------------------------------------------------------------------------------
%%% ��������ѧϰ�ж���һ������
\begin{frame}{``����''��``����''����չ������������ʹ������}
\begin{itemize}
\item ����ǰ��Ŀ��Ժ��� - �����``\alert{�������Ƕ�ά����}''
    \begin{itemize}
    \item ���������󶼿��Կ�����ѧ�ϵ�``����''����չ
    \end{itemize}

\item<2-> ����$T(1:3)$��ʾһ��������������Ԫ��\\
\vspace{0.5em}
\begin{tikzpicture}
\begin{scope}
\node [anchor=north east, inner sep=1pt] (label) at (0,0) {�����洢��};
\draw[step=0.5cm,thick] (0,-0.5) grid (1.5,0);
\setcounter{mycount1}{1}
\foreach \x in {0.25,0.75,1.25}{
    \node [fill=green!20,inner sep=0pt,minimum height=0.49cm,minimum width=0.49cm] at (\x,-0.25) {$\number\value{mycount1}$};
    \addtocounter{mycount1}{1};
}
\end{scope}
\end{tikzpicture}

\item<3-> ����$T(1:2,1:3)$��ʾһ��$3 \times 2$�ľ���\\
\vspace{0.5em}
\begin{tikzpicture}
\begin{scope}
\node [anchor=north east, inner sep=1pt] (label) at (0,0) {�����洢��};
\draw[step=0.5cm,thick] (0,-0.5) grid (3.0,0);
\setcounter{mycount2}{1}
\foreach \x in {0.25,0.75,1.25}{
    \node [fill=green!20,inner sep=0pt,minimum height=0.49cm,minimum width=0.49cm] at (\x,-0.25) {$\number\value{mycount2}$};
    \addtocounter{mycount2}{1};
}
\foreach \x in {1.75,2.25,2.75}{
    \node [fill=red!20,inner sep=0pt,minimum height=0.49cm,minimum width=0.49cm] at (\x,-0.25) {$\number\value{mycount2}$};
    \addtocounter{mycount2}{1};
}
\end{scope}
\end{tikzpicture}

\item<4-> ����$T(1:2,1:2,1:3)$��ʾһ��������������С��$3 \times 2 \times 2$\\
\vspace{0.5em}
\begin{tikzpicture}
\begin{scope}
\node [anchor=north east, inner sep=1pt] (label) at (0,0) {�����洢��};
\draw[step=0.5cm,thick] (0,-0.5) grid (6.0,0);
\setcounter{mycount3}{1}
\foreach \x in {0.25,0.75,1.25}{
    \node [fill=green!20,inner sep=0pt,minimum height=0.49cm,minimum width=0.49cm] at (\x,-0.25) {$\number\value{mycount3}$};
    \addtocounter{mycount3}{1};
}
\foreach \x in {1.75,2.25,2.75}{
    \node [fill=red!20,inner sep=0pt,minimum height=0.49cm,minimum width=0.49cm] at (\x,-0.25) {$\number\value{mycount3}$};
    \addtocounter{mycount3}{1};
}
\foreach \x in {3.25,3.75,4.25}{
    \node [fill=green!20,inner sep=0pt,minimum height=0.49cm,minimum width=0.49cm] at (\x,-0.25) {$\number\value{mycount3}$};
    \addtocounter{mycount3}{1};
}
\foreach \x in {4.75,5.25,5.75}{
    \node [fill=red!20,inner sep=0pt,minimum height=0.49cm,minimum width=0.49cm] at (\x,-0.25) {$\number\value{mycount3}$};
    \addtocounter{mycount3}{1};
}
\draw[decorate,thick,decoration={brace,mirror,raise=0.2em}] (0,-0.50) -- (2.95,-0.50);
\draw[decorate,thick,decoration={brace,mirror,raise=0.2em}] (3.05,-0.50) -- (6,-0.50);
\node [anchor=north] (subtensor1) at (1.5,-0.6) {\footnotesize{$3 \times 2$ sub-tensor}};
\node [anchor=north] (subtensor1) at (4.5,-0.6) {\footnotesize{$3 \times 2$ sub-tensor}};

\end{scope}
\end{tikzpicture}

\item<5-> �߽����������飡���飡���飡
    \begin{itemize}
    \item ��C++��Python�еĶ�ά����һģһ��
    \end{itemize}
\end{itemize}
\end{frame}

%%%------------------------------------------------------------------------------------------------------------
%%% ��������˷�
\begin{frame}{�����ľ���˷�}
\begin{itemize}
\item ����������$\textbf{y}=f(\textbf{x}\cdot \textbf{w} + \textbf{b})$��$\textbf{x} \cdot \textbf{w}$��$\textbf{x} \times \textbf{w}$�����Ա任������$\textbf{x}$������������$\textbf{w}$��һ������
    \begin{itemize}
    \item $\textbf{x} \cdot \textbf{w}$��ʾ���Ǿ���˷������Ϊ$\times$��
    \item ע�⣬���ﲻ�������˷�����Ϊ�����˷�������������
    \item $\textbf{w}$��$n \times m$�ľ���$\textbf{x}$����״��$... \times n$����$\textbf{x}$�ĵ�һά����Ҫ��$\textbf{w}$��������С���\\
    \vspace{0.5em}
    $\textbf{x}(1:4,1:4,\alert{1:4}) \times \textbf{w}(\alert{1:4},1:2) = \textbf{s}(1:4,1:4,1:2)$
    \end{itemize}
\end{itemize}

\begin{center}
\begin{tikzpicture}

\begin{scope}[yshift=6.5em,xshift=1em]
\visible<2->{
\setcounter{mycount1}{1}
\draw[step=0.5cm,color=orange,thick] (-1,-1) grid (1,1);
\foreach \y in {+0.75,+0.25,-0.25,-0.75}
  \foreach \x in {-0.75,-0.25,0.25,0.75}{
    \node [fill=orange!20,inner sep=0pt,minimum height=0.49cm,minimum width=0.49cm] at (\x,\y) {$\number\value{mycount1}$};
    \addtocounter{mycount1}{1};
  }
}
\end{scope}

\begin{scope}[yshift=6em,xshift=0.5em]
\visible<2->{
\setcounter{mycount2}{2}
\draw[step=0.5cm,color=blue,thick] (-1,-1) grid (1,1);
\foreach \y in {+0.75,+0.25,-0.25,-0.75}
  \foreach \x in {-0.75,-0.25,0.25,0.75}{
    \node [fill=blue!20,inner sep=0pt,minimum height=0.49cm,minimum width=0.49cm] at (\x,\y) {$\number\value{mycount2}$};
    \addtocounter{mycount2}{1};
  }
}
\end{scope}

\begin{scope}[yshift=5.5em,xshift=0em]
\visible<2->{
\setcounter{mycount3}{3}
\draw[step=0.5cm,color=ugreen,thick] (-1,-1) grid (1,1);
\foreach \y in {+0.75,+0.25,-0.25,-0.75}
  \foreach \x in {-0.75,-0.25,0.25,0.75}{
    \node [fill=green!20,inner sep=0pt,minimum height=0.49cm,minimum width=0.49cm] at (\x,\y) {$\number\value{mycount3}$};
    \addtocounter{mycount3}{1};
  }
}
\end{scope}

\begin{scope}[yshift=5em,xshift=-0.5em]
\visible<2->{
\setcounter{mycount4}{4}
\draw[step=0.5cm,color=red,thick] (-1,-1) grid (1,1);
\foreach \y in {+0.75,+0.25,-0.25,-0.75}
  \foreach \x in {-0.75,-0.25,0.25,0.75}{
    \node [fill=red!20,inner sep=0pt,minimum height=0.49cm,minimum width=0.49cm] at (\x,\y) {$\number\value{mycount4}$};
    \addtocounter{mycount4}{1};
  }
\node [anchor=north] (xlabel) at (0,-1.2) {$\textbf{x}$};
}
\end{scope}

\begin{scope}[yshift=5em,xshift=1.5in]
\visible<2->{
\draw[step=0.5cm,thick] (-0.5,-1) grid (0.5,1.0);
\node [fill=black!20,inner sep=0pt,minimum height=0.49cm,minimum width=0.49cm] at (-0.25,0.75) {\small{$-1$}};
\node [fill=black!20,inner sep=0pt,minimum height=0.49cm,minimum width=0.49cm] at (-0.25,0.25) {$0$};
\node [fill=black!20,inner sep=0pt,minimum height=0.49cm,minimum width=0.49cm] at (-0.25,-0.25) {$1$};
\node [fill=black!20,inner sep=0pt,minimum height=0.49cm,minimum width=0.49cm] at (-0.25,-0.75) {$0$};
\node [fill=black!20,inner sep=0pt,minimum height=0.49cm,minimum width=0.49cm] at (0.25,0.75) {$0$};
\node [fill=black!20,inner sep=0pt,minimum height=0.49cm,minimum width=0.49cm] at (0.25,0.25) {\small{$-1$}};
\node [fill=black!20,inner sep=0pt,minimum height=0.49cm,minimum width=0.49cm] at (0.25,-0.25) {$1$};
\node [fill=black!20,inner sep=0pt,minimum height=0.49cm,minimum width=0.49cm] at (0.25,-0.75) {$0$};
\node [anchor=north] (xlabel) at (0,-1.2) {$\textbf{w}$};
}

\visible<3>{\draw [->,thick,dashed] (-1.5in+2em+1.5em,-0.3) .. controls +(east:2) and +(west:1) .. (-0.55,0.8) node [pos=0.5,left] {\scriptsize{\textbf{�����}}};}
\visible<4>{\draw [->,thick,dashed] (-1.5in+2em+1.0em,-0.5) .. controls +(east:2) and +(west:1) .. (-0.55,0.8) node [pos=0.5,left] {\scriptsize{\textbf{�����}}};}
\visible<5>{\draw [->,thick,dashed] (-1.5in+2em+0.5em,-0.7) .. controls +(east:2.5) and +(west:1) .. (-0.55,0.8) node [pos=0.5,left] {\scriptsize{\textbf{�����}}};}
\visible<6->{\draw [->,thick,dashed] (-1.5in+2em,-0.9) .. controls +(east:3) and +(west:1) .. (-0.55,0.8) node [pos=0.5,left] {\scriptsize{\textbf{�����}}};}
\end{scope}

\begin{scope}[yshift=6.5em,xshift=1em+3in]
\visible<3->{
\draw[step=0.5cm,color=orange,thick] (-0.5,-1) grid (0.5,1.0);
\foreach \y in {+0.75,+0.25,-0.25,-0.75}{
  \setcounter{mycount1}{2}
  \foreach \x in {-0.25,0.25}{
    \node [fill=orange!20,inner sep=0pt,minimum height=0.49cm,minimum width=0.49cm] at (\x,\y) {$\number\value{mycount1}$};
    \addtocounter{mycount1}{-1};
  }
}
}
\end{scope}

\begin{scope}[yshift=6em,xshift=0.5em+3in]
\visible<4->{
\draw[step=0.5cm,color=blue,thick] (-0.5,-1) grid (0.5,1.0);
\foreach \y in {+0.75,+0.25,-0.25,-0.75}{
  \setcounter{mycount1}{2}
  \foreach \x in {-0.25,0.25}{
    \node [fill=blue!20,inner sep=0pt,minimum height=0.49cm,minimum width=0.49cm] at (\x,\y) {$\number\value{mycount1}$};
    \addtocounter{mycount1}{-1};
  }
}
}
\end{scope}

\begin{scope}[yshift=5.5em,xshift=0em+3in]
\visible<5->{
\draw[step=0.5cm,color=ugreen,thick] (-0.5,-1) grid (0.5,1.0);
\foreach \y in {+0.75,+0.25,-0.25,-0.75}{
  \setcounter{mycount1}{2}
  \foreach \x in {-0.25,0.25}{
    \node [fill=green!20,inner sep=0pt,minimum height=0.49cm,minimum width=0.49cm] at (\x,\y) {$\number\value{mycount1}$};
    \addtocounter{mycount1}{-1};
  }
}
}
\end{scope}

\begin{scope}[yshift=5.0em,xshift=-0.5em+3in]
\visible<6->{
\draw[step=0.5cm,color=red,thick] (-0.5,-1) grid (0.5,1.0);
\foreach \y in {+0.75,+0.25,-0.25,-0.75}{
  \setcounter{mycount1}{2}
  \foreach \x in {-0.25,0.25}{
    \node [fill=red!20,inner sep=0pt,minimum height=0.49cm,minimum width=0.49cm] at (\x,\y) {$\number\value{mycount1}$};
    \addtocounter{mycount1}{-1};
  }
}
}

\visible<3->{
\node [anchor=north] (xlabel) at (0,-1.2) {$\textbf{x} \cdot \textbf{w}$};
\node [anchor=center] (elabel) at (-0.7in,0) {\Huge{$\textbf{=}$}};
}
\end{scope}

\end{tikzpicture}
\end{center}

\end{frame}

%%%------------------------------------------------------------------------------------------------------------
%%% �����ĵ�Ԫ����
\begin{frame}{�����ĵ�Ԫ����}
\begin{itemize}
\item ������$\textbf{y}=f(\textbf{x}\cdot \textbf{w} + \textbf{b})$Ҳ����һЩ�����ĵ�Ԫ������element-wise operation��
	\begin{itemize}
	\item �ӷ���$\textbf{s}+\textbf{b}$������$\textbf{s} = \textbf{x}\cdot \textbf{w}$
	\item �������$f(\cdot)$
	\end{itemize}
\item<2-> \textbf{��Ԫ��}���Ƕ������е�ÿ��λ�ö����мӷ�
	\begin{itemize}
	\item<3-> ��չ���ӷ���\textbf{�㲥}���ظ�����һ���������мӷ�������Ҫ������������״��ͬ
	\end{itemize}
\end{itemize}

\vspace{-1.5em}
\begin{center}
\begin{tikzpicture}
\visible<3->{
\begin{scope}
\setcounter{mycount1}{1}
\draw[step=0.5cm,color=orange,thick] (-1,-0.5) grid (1,0.5);
\foreach \y in {+0.25,-0.25}
  \foreach \x in {-0.75,-0.25,0.25,0.75}{
    \node [fill=orange!20,inner sep=0pt,minimum height=0.49cm,minimum width=0.49cm] at (\x,\y) {$\number\value{mycount1}$};
    \addtocounter{mycount1}{1};
  }
\node [anchor=south] (varlabel) at (0,0.6) {$\textbf{s}$};
\end{scope}
\begin{scope}[xshift=1.5in]
\setcounter{mycount1}{1}
\draw[step=0.5cm,color=ugreen,thick] (-1,-0) grid (1,0.5);
\foreach \y in {+0.25}
  \foreach \x in {-0.75,-0.25,0.25,0.75}{
    \node [fill=green!20,inner sep=0pt,minimum height=0.49cm,minimum width=0.49cm] at (\x,\y) {$1$};
    \addtocounter{mycount1}{1};
  }
\node [anchor=center] (plabel) at (-4.5em,0) {\huge{$\textbf{+}$}};
\node [anchor=south] (varlabel) at (0,0.6) {$\textbf{b}$};
\end{scope}
\begin{scope}[xshift=3in]
\setcounter{mycount1}{2}
\draw[step=0.5cm,color=orange,thick] (-1,-0.5) grid (1,0.5);
\foreach \y in {+0.25,-0.25}
  \foreach \x in {-0.75,-0.25,0.25,0.75}{
    \node [fill=orange!20,inner sep=0pt,minimum height=0.49cm,minimum width=0.49cm] at (\x,\y) {$\number\value{mycount1}$};
    \addtocounter{mycount1}{1};
  }
\node [anchor=center] (plabel) at (-4.5em,0) {\huge{$\textbf{=}$}};
\node [anchor=south] (varlabel) at (0,0.6) {$\textbf{s+b}$};
\end{scope}
}

\end{tikzpicture}
\end{center}

\vspace{-0.3em}

\begin{itemize}
\item<4-> ���Ƶģ����ǿ������������˷���Ҳ�������������Ҳ��������������������vectorization��
\end{itemize}

\vspace{-0.5em}
\visible<4->{
\begin{displaymath}
\textrm{Relu} \Big( \begin{pmatrix} 2 \\ -.3 \end{pmatrix} \Big) = \begin{pmatrix} 2 \\ 0 \end{pmatrix}
\end{displaymath}
}

\end{frame}

%%%------------------------------------------------------------------------------------------------------------
%%% ���ѧϰ���߰�
\begin{frame}{���ʵ�֣�- ��Դ����������}
\begin{itemize}
\item ʵ��������Ŀ�Դϵͳ�ܶ࣬һ���򵥺��õĹ��߰�NumPy \url{https://numpy.org/}
    \begin{itemize}
    \item Python�ӿڣ���ά����Ķ���ʹ�÷���
    \item �ṩ��������ʾ��ʹ�õķ�ʽ
    \end{itemize}
\item<2-> ������ܻ��������ܣ�TensorFlow��PyTorch
    \begin{itemize}
    \item Google��Facebook��Ʒ�������б�֤
    \item ����ǿ�󣬽ӿڷḻ
    \item ���Խ��д��ģ�����Ӧ��
    \item �����ɲο���ʵ��
    \end{itemize}

    \includegraphics[scale=0.13]{./Figures/tensorflowpytorch.jpg}
\item<3-> �����������ڸ��µ������ܣ� CNTK��MXNet��PaddlePaddle��Keras��Chainer�� dl4j��NiuTensor��
\end{itemize}
\end{frame}

%%%------------------------------------------------------------------------------------------------------------
%%% NiuTrans.Tensor���߰�
\begin{frame}{NiuTensor}
\begin{itemize}
\item ����ʹ���������е�NiuTensor���߰����н�ѧ \url{http://www.niutrans.com/opensource/niutensor/index.html}
    \begin{itemize}
    \item ��С�ɣ������޸�
    \item C++���Ա�д������߶��Ż�
    \item ͬʱ֧��CPU��GPU�豸
    \item �ḻ����������ӿ�
    \end{itemize}
\end{itemize}

\includegraphics[scale=0.35]{./Figures/niutensor.jpg}

\end{frame}

%%%------------------------------------------------------------------------------------------------------------
%%% ʹ��NiuTensor
\begin{frame}{ʹ��NiuTensor}
\begin{itemize}
\item NiuTensor��ʹ�úܼ򵥣�������һ��C++����
\end{itemize}

\begin{tcolorbox}[enhanced,frame engine=empty,boxrule=0.1mm,size=title,colback=blue!10!white]
\begin{flushleft}
{\scriptsize
\begin{tabbing}
\texttt{\#include "source/tensor/XTensor.h"} \hspace{4em} \= // ����XTensor�����ͷ�ļ� \\

\texttt{using namespace nts;} \> // ����nts�����ռ� \\
\ \\

\texttt{int main(int argc, const char ** argv)\{} \\
\ \ \ \ \texttt{XTensor tensor;} \> // ��������tensor \\

\ \ \ \ \texttt{InitTensor2D(\&tensor, 2, 2, X\_FLOAT);} \> // ��������Ϊ2*2�ľ��� \\

\ \ \ \ \texttt{tensor.SetDataRand();} \> // [0,1]���ȷֲ���ʼ������ \\

\ \ \ \ \texttt{tensor.Dump(stdout);} \> // ����������� \\

\ \ \ \ \texttt{return 0;}\\
\texttt{\}}

\end{tabbing}
}
\end{flushleft}
\end{tcolorbox}

\begin{itemize}
\item<2-> ��������������ʾ����ÿ��Ԫ�ص�ֵ
\begin{itemize}
\item<2-> ��������(order=2)����״��$2 \times 2$ (dimsize=2,2)�����������ǵ����ȸ���(dtype=X\_FLOAT)����ϡ��(dense=1.00)
\end{itemize}
\end{itemize}

\vspace{-0em}
\visible<2->{
\begin{tcolorbox}[enhanced,frame engine=empty,boxrule=0.1mm,size=title,colback=black!10!white]
\begin{flushleft}
{\scriptsize
\begin{tabbing}
\texttt{order=2 dimsize=2,2 dtype=X\_FLOAT dense=1.000000} \\
\texttt{3.605762e-001 2.992340e-001 1.393780e-001 7.301248e-001}
\end{tabbing}
}
\end{flushleft}
\end{tcolorbox}
}

\end{frame}

%%%------------------------------------------------------------------------------------------------------------
%%% ����XTensor
\begin{frame}{����XTensor}
\begin{itemize}
\item ��������XTensor��ʾ������InitTensor���壬������
    \begin{itemize}
    \item ָ��XTensor���ͱ�����ָ��
    \item �����Ľ�
    \item ��������ά�ȵĴ�С���봫ͳ��ά����Լ��һ����
    \item �������������͵ȣ���ȱʡֵ��
    \end{itemize}
\end{itemize}

\vspace{-0.3em}
\begin{tcolorbox}[enhanced,frame engine=empty,boxrule=0.1mm,size=title,colback=blue!10!white]
\begin{flushleft}
{\scriptsize
\begin{tabbing}
\texttt{XTensor tensor;} \hspace{12em} \= // ��������tensor \\
\texttt{int sizes[6] = \{2,3,4,2,3,4\};} \> // ��������״Ϊ2*3*4*2*3*4 \\
\texttt{InitTensor(\&tensor, 6, sizes, X\_FLOAT);} \> // ������״Ϊsizes��6������
\end{tabbing}
}
\end{flushleft}
\end{tcolorbox}

\visible<2->{
\begin{itemize}
\item �����Ķ��巽ʽ
\end{itemize}

\vspace{-0.2em}
\begin{tcolorbox}[enhanced,frame engine=empty,boxrule=0.1mm,size=title,colback=blue!10!white]
\begin{flushleft}
{\scriptsize
\begin{tabbing}
\texttt{XTensor a, b, c;} \hspace{11.5em} \= // ��������tensor \\
\texttt{InitTensor1D(\&a, 10, X\_INT);} \> // 10������������\\
\texttt{InitTensor1D(\&b, 10);} \> // 10ά��������ȱʡ����(����)\\
\texttt{InitTensor4D(\&c, 10, 20, 30, 40);} \> // 10*20*30*40��4������(����)
\end{tabbing}
}
\end{flushleft}
\end{tcolorbox}
}

\visible<3->{
\begin{itemize}
\item ֱ����GPU�϶�������
\end{itemize}

\vspace{-0.2em}
\begin{tcolorbox}[enhanced,frame engine=empty,boxrule=0.1mm,size=title,colback=blue!10!white]
\begin{flushleft}
{\scriptsize
\begin{tabbing}
\texttt{XTensor tensorGPU;} \hspace{10.5em} \= // ��������tensor \\
\texttt{InitTensor2D(\&tensorGPU, 10, 20,} $\backslash$ \> // �ڱ��Ϊ0��GPU�϶������� \\
\hspace{6.7em} \texttt{X\_FLOAT, 0);}
\end{tabbing}
}
\end{flushleft}
\end{tcolorbox}
}

\end{frame}

%%%------------------------------------------------------------------------------------------------------------
%%% XTensor�Ĵ�������
\begin{frame}{��������}
\begin{itemize}
\item ���ֵ�Ԫ���ӣ�1�����㣩+��-��*��$\backslash$��Log��Exp�� Power��Absolute�ȣ�����Sigmoid��Softmax�ȼ����
\end{itemize}

\vspace{-0.2em}
\begin{tcolorbox}[enhanced,frame engine=empty,boxrule=0.1mm,size=title,colback=blue!10!white]
\begin{flushleft}
{\scriptsize
\begin{tabbing}
\texttt{XTensor a, b, c, d, e;} \hspace{7em} \= // ��������tensor \\
\texttt{InitTensor3D(\&a, 2, 3, 4);} \> // aΪ2*3*4��3������ \\
\texttt{InitTensor3D(\&b, 2, 3, 4);} \> // bΪ2*3*4��3������ \\
\texttt{InitTensor3D(\&c, 2, 3, 4);} \> // cΪ2*3*4��3������ \\
\texttt{a.SetDataRand();} \> // �����ʼ��a \\
\texttt{b.SetDataRand();} \> // �����ʼ��b \\
\texttt{c.SetDataRand();} \> // �����ʼ��c \\
\texttt{d = a + b * c;} \> // d����ֵΪ a + b * c \\
\texttt{d = ((a + b) * d - b / c ) * d;} \> // d���Ա�Ƕ��ʹ�� \\
\texttt{e = Sigmoid(d);} \> // d���������Sigmoid��ֵ��e
\end{tabbing}
}
\end{flushleft}
\end{tcolorbox}

\visible<2->{
\begin{itemize}
\item �߽����㣬��õ��Ǿ���˷�(MMul)
\end{itemize}

\vspace{-0.2em}
\begin{tcolorbox}[enhanced,frame engine=empty,boxrule=0.1mm,size=title,colback=blue!10!white]
\begin{flushleft}
{\scriptsize
\begin{tabbing}
\texttt{XTensor a, b, c;} \hspace{10.0em} \= // ��������tensor \\
\texttt{InitTensor4D(\&a, 2, 2, 3, 4);} \> // aΪ2*2*3*4��4������ \\
\texttt{InitTensor2D(\&b, 4, 5);} \> // bΪ4*5�ľ��� \\
\texttt{a.SetDataRand();} \> // �����ʼ��a \\
\texttt{b.SetDataRand();} \> // �����ʼ��b \\
\texttt{c = MMul(a, b);} \> // ����˵Ľ��Ϊ2*2*3*5��4������
\end{tabbing}
}
\end{flushleft}
\end{tcolorbox}
}

\end{frame}

%%%------------------------------------------------------------------------------------------------------------
%%% XTensor����������
\begin{frame}{�������ú���}
\begin{itemize}
\item �����������в�ȫ�����Բο���վ�ϵ���ϸ˵��
\end{itemize}

\footnotesize{
\begin{center}
\begin{tabular}{l|l}
���� & ���� \\ \hline
\texttt{a.Reshape(o, s)} & ��a�任Ϊ��Ϊo����״Ϊs������\\
\texttt{a.Get(pos)} & ȡ������λ��Ϊpos��Ԫ�� \\
\texttt{a.Set(v, pos)} & ��������λ��Ϊpos��Ԫ�ص�ֵ��Ϊv \\
\texttt{a.Dump(file)} & �������浽file�У�fileΪ�ļ���� \\
\texttt{a.Read(file)} & ��file�ж�ȡ������fileΪ�ļ���� \\ \hline
\texttt{Power(a, p)}  & ����ָ��$\textrm{a}^{\textrm{p}}$ \\
\texttt{Linear(a, s, b)}  & ���� a * s + b��s��b����һ���� \\
\texttt{CopyValues(a)} & ����a��һ������ \\
\texttt{ReduceMax(a, d)} & ��a���ŷ���d���й�Լ���õ����ֵ \\
\texttt{ReduceSum(a, d)} & ��a���ŷ���d���й�Լ���õ��� \\
\texttt{Concatenate(a, b, d)} & ����������a��b��d������\\
\texttt{Merge(a, d)} & ������a��d����ϲ�\\
\texttt{Split(a, d, n)} & ������a��d������ѳ�n��\\ \hline
\texttt{Sigmoid(a)}  & ��a����Sigmoid�任 \\
\texttt{Softmax(a)}  & ��a����Softmax�任�������һ������ \\
\texttt{HardTanH(a)} & ��a����hard tanh�任(˫�����еĽ���)\\
\texttt{Relu(a)}     & ��a����Relu�任\\
\end{tabular}
\end{center}
}

\end{frame}

%%%------------------------------------------------------------------------------------------------------------
%%% ����XTensor����������
\begin{frame}{����������}
\begin{itemize}
\item ���Ժܷ���Ĺ���һ����������
\end{itemize}

\begin{tcolorbox}
[bicolor,sidebyside,righthand width=4cm,size=title,frame engine=empty,
 colback=blue!10!white,colbacklower=black!5!white]
 {\scriptsize
\begin{tabbing}
\texttt{XTensor x, y, w, b;} \\
\texttt{InitTensor3D(\&x, 3, 4, 5);} \\
\texttt{InitTensor2D(\&w, 5, 3);} \\
\texttt{InitTensor1D(\&b, 3);} \\
\texttt{...} \\
\texttt{y = Sigmoid(MMul(x, w) + b);}
\end{tabbing}
}
\tcblower
\begin{center}
\begin{tikzpicture}
\node [draw,circle,inner sep=2pt,fill=red!30!white,blur shadow={shadow xshift=1pt,shadow yshift=-1pt}] (x) at (0,0) {\footnotesize{$\textrm{x}$}};
\node [anchor=south,draw,rounded corners,inner sep=2pt,minimum width=4em,fill=green!30!white,blur shadow={shadow xshift=1pt,shadow yshift=-1pt}] (layer) at ([yshift=0.7em]x.north) {\scriptsize{layer}};
\node [anchor=south,draw,circle,inner sep=2pt,fill=red!30!white,blur shadow={shadow xshift=1pt,shadow yshift=-1pt}] (y) at ([yshift=0.7em]layer.north) {\scriptsize{$\textrm{y}$}};
\draw [thick,->] (x.north) -- (layer.south);
\draw [thick,->] (layer.north) -- (y.south);
\node [anchor=west,align=left] (xshape) at (x.east) {\tiny{shape: 3*4*5}};
\node [anchor=west,align=left] (yshape) at (y.east) {\tiny{shape: 3*4*3}};
\end{tikzpicture}
\end{center}
\end{tcolorbox}

\visible<2->{
\begin{itemize}
\item һ���������
\end{itemize}

\begin{tcolorbox}
[bicolor,sidebyside,righthand width=4cm,size=title,frame engine=empty,
 colback=blue!10!white,colbacklower=black!5!white]
 {\scriptsize
\begin{tabbing}
\texttt{XTensor x, y, h1, h2;} \\
\texttt{XTensor w1, b1, w2, w3;} \\
\texttt{InitTensor3D(\&x, 3, 4, 5);} \\
\texttt{InitTensor2D(\&w1, 5, 3);} \\
\texttt{InitTensor1D(\&b1, 3);} \\
\texttt{InitTensor2D(\&w2, 3, 6);} \\
\texttt{InitTensor2D(\&w3, 6, 4);} \\
\texttt{...} \\
\texttt{h1 = Sigmoid(MMul(x, w1) + b1);} \\
\texttt{h2 = HandTanH(MMul(h1, w2));} \\
\texttt{y = Relu(MMul(h2, w3));}
\end{tabbing}
}
\tcblower
\begin{center}
\begin{tikzpicture}
\node [draw,circle,inner sep=2pt,fill=red!30!white,blur shadow={shadow xshift=1pt,shadow yshift=-1pt}] (x) at (0,0) {\footnotesize{$\textrm{x}$}};
\node [anchor=south,draw,rounded corners,inner sep=2pt,minimum width=4em,fill=green!30!white,blur shadow={shadow xshift=1pt,shadow yshift=-1pt}] (layer1) at ([yshift=0.7em]x.north) {\scriptsize{layer1}};
\node [anchor=south,draw,rounded corners,inner sep=2pt,minimum width=4em,fill=green!30!white,blur shadow={shadow xshift=1pt,shadow yshift=-1pt}] (layer2) at ([yshift=1.0em]layer.north) {\scriptsize{layer2}};
\node [anchor=south,draw,rounded corners,inner sep=2pt,minimum width=4em,fill=green!30!white,blur shadow={shadow xshift=1pt,shadow yshift=-1pt}] (layer3) at ([yshift=1.0em]layer2.north) {\scriptsize{layer3}};
\node [anchor=south,draw,circle,inner sep=2pt,fill=red!30!white,blur shadow={shadow xshift=1pt,shadow yshift=-1pt}] (y) at ([yshift=0.7em]layer3.north) {\scriptsize{$\textrm{y}$}};
\draw [thick,->] (x.north) -- (layer1.south);
\draw [thick,->] (layer1.north) -- (layer2.south);
\draw [thick,->] (layer2.north) -- (layer3.south);
\draw [thick,->] (layer3.north) -- (y.south);
\node [anchor=west,align=left] (xshape) at (x.east) {\tiny{shape: 3*4*5}};
\node [anchor=west,align=left] (yshape) at (y.east) {\tiny{shape: 3*4*4}};
\node [anchor=south west,align=left,inner sep=2pt] (l1shape) at (layer1.north) {\tiny{shape: 3*4*3}};
\node [anchor=south west,align=left,inner sep=2pt] (l2shape) at (layer2.north) {\tiny{shape: 3*4*6}};
\end{tikzpicture}
\end{center}
\end{tcolorbox}
}

\end{frame}

%%%------------------------------------------------------------------------------------------------------------
%%% ����XTensor���������ӵ�������
\begin{frame}{������һ�������}
\begin{itemize}
\item �κ����綼���Թ���������RNN��Transformer ��
\end{itemize}

\begin{tcolorbox}
[bicolor,sidebyside,righthand width=4cm,size=title,frame engine=empty,
 colback=blue!10!white,colbacklower=black!5!white]
 {\scriptsize
\begin{tabbing}
\texttt{XTensor x[3], y[3], r, wh;} \\
\texttt{XTensor h1, h2, w1, b1, h3, h4;} \\
\texttt{XList splits;} \\
\texttt{...} \\
\texttt{for(unsigned i = 0; i < 3; i++)\{} \\
\texttt{\hspace{2em}r = Concatenate(x[i] + r) * wh;}\\
\texttt{\hspace{2em}splits.Add(\&r);}\\
\texttt{\}}\\
\visible<2->{
\texttt{} \\
\texttt{h1 = Merge(splits, 0);}\\
\texttt{h2 = Relu(h1 * w1 + b1);}\\
\texttt{h3 = h1 + h2;} \\
\texttt{h4 = Softmax(h3);} \\
}
\visible<3->{
\texttt{} \\
\texttt{Split(h4, splits, 0);} \\
\texttt{} \\
\texttt{for(unsigned i = 0; i < 3; i++)\{} \\
\texttt{\hspace{2em}y[i] = *(XTensor*)splits.Get(i);}\\
\texttt{\hspace{2em}y[i].Dump(stdout);}\\
\texttt{\}}
}
\end{tabbing}
}
\tcblower
\begin{center}
\begin{tikzpicture}
\node [draw,circle,inner sep=1pt,fill=red!30!white,blur shadow={shadow xshift=1pt,shadow yshift=-1pt}] (x1) at (0,0) {\footnotesize{$\textrm{x}_1$}};
\node [anchor=west,draw,circle,inner sep=1pt,fill=red!30!white,blur shadow={shadow xshift=1pt,shadow yshift=-1pt}] (x2) at ([xshift=2em]x1.east) {\footnotesize{$\textrm{x}_2$}};
\node [anchor=west,draw,circle,inner sep=1pt,fill=red!30!white,blur shadow={shadow xshift=1pt,shadow yshift=-1pt}] (x3) at ([xshift=2em]x2.east) {\footnotesize{$\textrm{x}_3$}};
\node [anchor=south,draw,rounded corners,inner sep=2pt,minimum width=2.5em,fill=green!30!white,blur shadow={shadow xshift=1pt,shadow yshift=-1pt}] (rlayer1) at ([yshift=1em]x1.north) {\tiny{rlayer}};
\node [anchor=south,draw,rounded corners,inner sep=2pt,minimum width=2.5em,fill=green!30!white,blur shadow={shadow xshift=1pt,shadow yshift=-1pt}] (rlayer2) at ([yshift=1em]x2.north) {\tiny{rlayer}};
\node [anchor=south,draw,rounded corners,inner sep=2pt,minimum width=2.5em,fill=green!30!white,blur shadow={shadow xshift=1pt,shadow yshift=-1pt}] (rlayer3) at ([yshift=1em]x3.north) {\tiny{rlayer}};
\draw [->,thick] (x1.north) -- (rlayer1.south);
\draw [->,thick] (x2.north) -- (rlayer2.south);
\draw [->,thick] (x3.north) -- (rlayer3.south);
\draw [->,thick] (rlayer1.east) -- (rlayer2.west);
\draw [->,thick] (rlayer2.east) -- (rlayer3.west);
\draw [->,thick] (rlayer1.north) -- ([yshift=1em]rlayer1.north);
\draw [->,thick] (rlayer2.north) -- ([yshift=1em]rlayer2.north);
\draw [->,thick] (rlayer3.north) -- ([yshift=1em]rlayer3.north);

\visible<2->{
\node [anchor=south,draw,rounded corners,inner sep=2pt,minimum width=9.4em,minimum height=1.0em,fill=green!30!white,blur shadow={shadow xshift=1pt,shadow yshift=-1pt}] (h1) at ([yshift=1em]rlayer2.north) {\tiny{h1 = Merge($\cdot$)}};
\node [anchor=south,draw,rounded corners,inner sep=2pt,minimum width=9.4em,minimum height=1.0em,fill=green!30!white,blur shadow={shadow xshift=1pt,shadow yshift=-1pt}] (h2) at ([yshift=1em]h1.north) {\tiny{h2 = Relu($\cdot$)}};
\node [anchor=south,draw,rounded corners,inner sep=2pt,minimum width=9.4em,minimum height=1.0em,fill=green!30!white,blur shadow={shadow xshift=1pt,shadow yshift=-1pt}] (h3) at ([yshift=1em]h2.north) {\tiny{h3 = Sum($\cdot$)}};
\node [anchor=south,draw,rounded corners,inner sep=2pt,minimum width=9.4em,minimum height=1.0em,fill=green!30!white,blur shadow={shadow xshift=1pt,shadow yshift=-1pt}] (h4) at ([yshift=1em]h3.north) {\tiny{h4 = Softmax($\cdot$)}};
\draw [->,thick] (h1.north) -- (h2.south);
\draw [->,thick] (h2.north) -- (h3.south);
\draw [->,thick] (h3.north) -- (h4.south);
\draw [->,thick,rounded corners] (h1.east) -- ([xshift=0.5em]h1.east) -- ([xshift=0.5em,yshift=0.5em]h2.north east) -- ([xshift=-2em,yshift=0.5em]h2.north east) -- ([xshift=-2em,yshift=1em]h2.north east);
}

\visible<3->{
\node [anchor=south,draw,rounded corners,inner sep=2pt,minimum width=9.4em,minimum height=1.0em,fill=green!30!white,blur shadow={shadow xshift=1pt,shadow yshift=-1pt}] (slayer) at ([yshift=1em]h4.north) {\tiny{Split($\cdot$)}};
\node [anchor=south,draw,circle,inner sep=1pt,fill=red!30!white,blur shadow={shadow xshift=1pt,shadow yshift=-1pt}] (y2) at ([yshift=1em]slayer.north) {\footnotesize{$\textrm{y}_2$}};
\node [anchor=east,draw,circle,inner sep=1pt,fill=red!30!white,blur shadow={shadow xshift=1pt,shadow yshift=-1pt}] (y1) at ([xshift=-2em]y2.west) {\footnotesize{$\textrm{y}_1$}};
\node [anchor=west,draw,circle,inner sep=1pt,fill=red!30!white,blur shadow={shadow xshift=1pt,shadow yshift=-1pt}] (y3) at ([xshift=2em]y2.east) {\footnotesize{$\textrm{y}_3$}};
\draw [<-,thick] (y1.south) -- ([yshift=-1em]y1.south);
\draw [<-,thick] (y2.south) -- ([yshift=-1em]y2.south);
\draw [<-,thick] (y3.south) -- ([yshift=-1em]y3.south);
}

\visible<2->{
\draw [->,thick] (h4.north) -- (slayer.south);
}

\end{tikzpicture}
\end{center}
\end{tcolorbox}

\end{frame}


%%%------------------------------------------------------------------------------------------------------------
\subsection{����ѧϰ - ���򴫲�}

%%%------------------------------------------------------------------------------------------------------------
%%% outline: problem 3
\begin{frame}{����һ������}

\vspace{6em}
\begin{tcolorbox}[enhanced,size=normal,left=2mm,right=1mm,colback=red!5!white,colframe=red!75!black,drop fuzzy shadow]
{\Large
\textbf{��ζ�ģ���еIJ�������ѧϰ,}

\vspace{0.4em}
\textbf{֮��ʹ��ѧϰ����ģ�ͽ����ƶϣ�}
}
\end{tcolorbox}

\vspace{2em}
\begin{center}
\begin{tikzpicture}
\node [fill=blue!10] (label) at (0,0) {\LARGE{$\frac{\partial L(\textbf{w})}{\partial \textbf{w}} = $ ? }};
\end{tikzpicture}
\end{center}

\end{frame}

%%%------------------------------------------------------------------------------------------------------------
%%% ������ = ����ʽ
\begin{frame}{������ = ��������ʽ}
\begin{itemize}
\item ���е������綼���Կ����ɱ����ͺ�����ɵı���ʽ\\
\end{itemize}

\begin{center}
\begin{tikzpicture}
\node [anchor=north west] (eq1) at (0,0) {$\textbf{y} = \textbf{x} + \textbf{b}$};
\node [anchor=north west] (eq2) at (eq1.south west) {$\textbf{y} = \textrm{Relu}(\textbf{x} \cdot \textbf{w} + \textbf{b})$};
\node [anchor=north west] (eq3) at (eq2.south west) {$\textbf{y} = (\textrm{Relu}(\textbf{x} \cdot \textbf{w}_1 + \textbf{b}) + \textbf{x}) \cdot \textbf{w}_2$};
\node [anchor=north west] (eq4) at (eq3.south west) {$\textbf{y} = \textrm{Sigmoid}(\textrm{Relu}(\textbf{x} \cdot \textbf{w}_1 + \textbf{b}_1) + \textbf{x}) \cdot \textbf{w}_2 + \textbf{b}_2$};

\visible<2->{
\node [anchor=north west,minimum height=1.2em,minimum width=1.2em,fill=green!30!white] (xlabel) at ([yshift=-0.5em,xshift=0.3em]eq4.south west) {};
\node [anchor=west] (xlabel2) at (xlabel.east) {������� - ���û�ָ��};
}

\begin{pgfonlayer}{background}
\visible<2->{
\node [anchor=south, minimum height=1.6em,minimum width=0.8em,fill=green!30!white] (x1) at ([xshift=-1.3em]eq4.south) {};
\node [anchor=south, minimum height=1.6em,minimum width=0.8em,fill=green!30!white] (x2) at ([xshift=4.9em]eq4.south) {};
}
\end{pgfonlayer}

\visible<3->{
\node [anchor=north west,minimum height=1.2em,minimum width=1.2em,fill=red!30!white] (wlabel) at ([yshift=-0.3em]xlabel.south west) {};
\node [anchor=west] (wlabel2) at (wlabel.east) {ģ�Ͳ��� - ��ô���ã�����};
}

\begin{pgfonlayer}{background}
\visible<3->{
\node [anchor=south, minimum height=1.6em,minimum width=1.2em,fill=red!30!white] (w1) at ([xshift=0.2em]eq4.south) {};
\node [anchor=south, minimum height=1.6em,minimum width=1.2em,fill=red!30!white] (b1) at ([xshift=2.5em]eq4.south) {};
\node [anchor=south, minimum height=1.6em,minimum width=1.2em,fill=red!30!white] (w2) at ([xshift=6.85em]eq4.south) {};
\node [anchor=south, minimum height=1.6em,minimum width=1.2em,fill=red!30!white] (b2) at ([xshift=9.2em]eq4.south) {};
}
\end{pgfonlayer}

\end{tikzpicture}
\end{center}

\visible<4->{
\begin{tcolorbox}[enhanced,size=normal,left=2mm,right=1mm,colback=blue!5!white,colframe=blue!75!black,drop fuzzy shadow]
{\Large
\textbf{�������ˣ�}

\vspace{0.4em}
\textbf{���ȷ��w��b��ʹx��y��Ӧ�ø��ã�}
}
\end{tcolorbox}
}

\end{frame}

%%%------------------------------------------------------------------------------------------------------------
%%% ѧϰ��Ŀ����ʲô
\begin{frame}{Ŀ�꺯������ʧ����}
\begin{itemize}
\item ����һ�����͵��Ż����⣬��������������\\
    \begin{enumerate}
    \item �Ż���Ŀ����ʲô��
    \item ��ε�������$\textbf{w}$��$\textbf{b}$���Ŀ�ꣿ
    \end{enumerate}
\item<2-> \textbf{����Ŀ��}�����ڸ���$\textbf{x}$��ʲô����$\textbf{y}$�Ǻõ�
    \begin{itemize}
    \item ���裺�����������$\{\textbf{x}_1,...,\textbf{x}_n\}$��ÿ��$\textbf{x}_i$����Ӧ\alert{��ȷ��}$\hat{\textbf{y}}_i$
    \item ����һ��������$\textbf{y}=f(\textbf{x})$��ÿ��$\textbf{x}_i$Ҳ����һ�����$\textbf{y}_i$
    \item ������Զ�����$\hat{\textbf{y}}_i$���������$\textbf{y}_i$֮���ƫ������������������С����ƫ��Ϳ��Եõ����õ�ģ��
    \end{itemize}
\end{itemize}

\visible<3->{
\vspace{-0.7em}
\begin{center}
\begin{tikzpicture}
\begin{scope}[yscale=0.2,xscale=0.8]
\draw[-,very thick,ublue,domain=-4.2:3.5,samples=100] plot (\x,{ - 1/14 * (\x + 4) * (\x + 1) * (\x - 1) * (\x - 3)});
\visible<4->{
\draw[-,very thick,ugreen,domain=-3.8:3.0,samples=100] plot (\x,{ - 1/14 * (4*\x*\x*\x + 3*\x*\x - 26*\x - 1)});
}
\draw[->,thick] (-6,0) -- (5,0);
\draw[->,thick] (-5,-4) -- (-5,5);

\draw [<-] (-2.5,4) -- (-2,5) node [pos=1,right,inner sep=2pt] {\footnotesize{��$\hat{\textbf{y}}_i$}};
\visible<4->{
\draw [<-] (-3,-3) -- (-2.5,-2) node [pos=0,left,inner sep=2pt] {\footnotesize{Ԥ��$\textbf{y}_i$}};}

\visible<5->{
\draw [<-] (2.3,1) -- (3.3,2) node [pos=1,right,inner sep=2pt] {\footnotesize{ƫ��$|\hat{\textbf{y}}_i - \textbf{y}_i|$}};
\foreach \x in {-3.8,-3.7,...,3.0}{
    \pgfmathsetmacro{\p}{- 1/14 * (\x + 4) * (\x + 1) * (\x - 1) * (\x - 3)};
    \pgfmathsetmacro{\q}{- 1/14 * (4*\x*\x*\x + 3*\x*\x - 26*\x - 1)};
    \draw [-] (\x,\p) -- (\x, \q);
}
}

\end{scope}
\end{tikzpicture}
\end{center}
}

\vspace{-0.3em}
\begin{itemize}
\item<6-> ������̾���\alert{�����Ż�/ѵ��}����$\hat{\textbf{y}}_i$��$\textbf{y}_i$֮��ƫ��Ķ�������һ��\alert{��ʧ����}��Ҳ����ѵ����\alert{Ŀ�꺯��}�����Ż���Ŀ�����\textbf{��С����ʧ����}
\end{itemize}

\end{frame}

%%%------------------------------------------------------------------------------------------------------------
%%% ������Ŀ�꺯��
\begin{frame}{��������ʧ����}

\begin{itemize}
\item ��ʧ������Ϊ$Loss(\hat{\textbf{y}}_i,\textbf{y}_i)$�����Ϊ$L$�������dz��õĶ���
\end{itemize}

\vspace{0.5em}

\footnotesize{
\renewcommand{\arraystretch}{1.2}
\begin{tabular}{l | l | l | l}
���� & ���� & NiuTensorʵ��(\texttt{yh}��ʾ$\hat{\textbf{y}}_i$) & Ӧ�� \\ \hline
0-1 & $L = \left\{ \begin{array}{ll} 0 & \hat{\textbf{y}}_i = \textbf{y}_i \\ 1 & \hat{\textbf{y}}_i \ne \textbf{y}_i \end{array} \right.$ & \scriptsize{\texttt{L = Sign(Absolute(yh - y))}} & ��֪�� \\
Hinge & $L=\max(0,1-\hat{\textbf{y}}_i \cdot \textbf{y}_i)$ & \scriptsize{\texttt{L = Max(0, 1 - yh * y))}} & SVM \\
����ֵ & $L=|\hat{\textbf{y}}_i - \textbf{y}_i|$ & \scriptsize{\texttt{L = Absolute(yh - y)}} & �ع� \\
Logistic & $L=\log(1 + \hat{\textbf{y}}_i \cdot \textbf{y}_i)$ & \scriptsize{\texttt{L = Log(1 + yh * y)}} & �ع� \\
ƽ�� & $L=(\hat{\textbf{y}}_i - \textbf{y}_i)^2$ & \scriptsize{\texttt{L = Power(yh - y, 2)}} & �ع� \\
ָ�� & $L=\exp(- \hat{\textbf{y}}_i \cdot \textbf{y}_i) $ & \scriptsize{\texttt{L = Exp(Negate(yh * y))}} & \scriptsize{AdaBoost} \\
������ & $L=-\sum_k \textbf{y}_i^{[k]} \log \hat{\textbf{y}}_i^{[k]} $ & \scriptsize{\texttt{L = CrossEntropy(y, yh)}} & ����� \\
       & \scriptsize{$\textbf{y}_i^{[k]}$: $\textbf{y}_i$�ĵ�$k$ά} & & \\
\end{tabular}
\renewcommand{\arraystretch}{1.0}
}

\vspace{-0.5em}
\begin{itemize}
\item ע�⣺
    \begin{itemize}
    \item ��ʧ�������Ը������ⲻͬ����ѡ��û�й̶�Ҫ��
    \item ��Щ��ʧ���������������Լ�������罻����Ҫ��$\hat{\textbf{y}}_i$��$\textbf{y}_i$���Ǹ��ʷֲ�
    \end{itemize}
\end{itemize}

\end{frame}

%%%------------------------------------------------------------------------------------------------------------
%%% �Ż�Ŀ�꺯��
\begin{frame}{�����Ż�}

\begin{itemize}
\item ���ڵ�$i$����������($\textbf{x}_i$,$\hat{\textbf{y}}_i$)���������ʧ������������$\textbf{w}$�ĺ�������$\textbf{b}$Ҳ��Ϊһ��$\textbf{w}$������Ϊ$L(\textbf{x}_i,\hat{\textbf{y}}_i;\textbf{w})$�������ѧϰ���Ա�����Ϊ��\\

\begin{displaymath}
\textbf{w}^* = \argmin_{\textbf{w}} \frac{1}{n} \sum_{i=1}^{n} L(\textbf{x}_i,\hat{\textbf{y}}_i;\textbf{w})
\end{displaymath}

$\textbf{w}^*$��ʾ��ѵ������ʹ����ʧ��ƽ��ֵ�ﵽ��С�IJ�����$\frac{1}{n} \sum_{i=1}^{n} L(\textbf{x}_i,\hat{\textbf{y}}_i;\textbf{w})$���������ۺ���(cost function)��������ʧ������ֵ�����Ĺ��ơ�

\vspace{0.5em}

\item<2-> �������⣺\textbf{���$\argmin$�����ҵ����ۺ�����Сֵ��}
    \begin{itemize}
    \item ���Ƿdz����������⣬����һ�µ����µ�IBMģ�ͣ���ʱʹ�õ���EM�㷨
    \item �������ﲢ����һ������ģ��
    \item ��Ҫһ�ָ���ͨ�õ���ⷽ��
    \end{itemize}

\end{itemize}

\end{frame}

%%%------------------------------------------------------------------------------------------------------------
%%% �ݶ��½�
\begin{frame}{�ݶ��½���Gradient Descent��}

\begin{itemize}
\item �����Ŀ�꺯�������Dz���$\textbf{w}$�ĺ�������Ϊ$J(\textbf{w})$���Ż�Ŀ���ǣ��ҵ�ʹ$J(\textbf{w})$�ﵽ��С��$\textbf{w}$
\item ע�⣬$\textbf{w}$���ܰ������ڸ�ʵ����������SMT��MERT֮��ĵ��η�����������Կ���һ�ָ����ʺϴ���ʵ���������Ż������������˼����\alert{�ݶ��½�}��
    \begin{itemize}
    \item<2-> ���$J(\textbf{w})$����$\textbf{w}$��΢�֣�$\frac{\partial J(\textbf{w})}{\partial \textbf{w}}$��ʾ$J$��$\textbf{w}$���仯���ķ���
    \item<2-> $\textbf{w}$�����ݶȷ�����£��µ�$\textbf{w}$����ʹ�������ӽ���ֵ
    \end{itemize}
\end{itemize}

\pgfplotsset{%
  colormap={whitered}{color(-1cm)=(orange!75!red);color(1cm)=(white)}
}


\begin{center}
\begin{tikzpicture}[
  declare function = {mu1=1;},
  declare function = {mu2=2;},
  declare function = {sigma1=0.5;},
  declare function = {sigma2=1;},
  declare function = {normal(\m,\s)=1/(2*\s*sqrt(pi))*exp(-(x-\m)^2/(2*\s^2));},
  declare function = {bivar(\ma,\sa,\mb,\sb)=1/(2*pi*\sa*\sb) * exp(-((x-\ma)^2/\sa^2 + (y-\mb)^2/\sb^2))/2;}]
  \footnotesize{
  \visible<2->{
  \begin{scope}
  \begin{axis}[
    colormap name  = whitered,
    width          = 8cm,
    height         = 5cm,
    view           = {20}{45},
    enlargelimits  = false,
    grid           = major,
    domain         = -1:3,
    y domain       = 0:4,
    samples        = 30,
    xlabel         = $\textbf{w}^{[1]}$,
    ylabel         = $\textbf{w}^{[2]}$,
    xlabel style   = {xshift=0em,yshift=0.8em},
    ylabel style   = {xshift=0.2em,yshift=0.8em},
    zlabel         = {$J(\textbf{w})$},
    ztick          = {-0.1},
    colorbar,
    colorbar style = {
      at     = {(1.2,0.5)},
      anchor = north west,
      ytick  = {0,-0.1},
      height = 0.25*\pgfkeysvalueof{/pgfplots/parent axis height},
      title  = {}
    }
  ]

    \addplot3 [surf] {-bivar(mu1,sigma1,mu2,sigma2)};

    \node [circle,fill=red,minimum size=3pt,inner sep=1.5pt] () at (axis cs:0.5,2,-0.01) {};

    \draw [->,very thick,ublue] (axis cs:0.5,2,-0.01) -- (axis cs:0.8,1.6,-0.03) node [pos=1,right,inner sep=2pt] {\tiny{-$\frac{\partial J(\textbf{w})}{\partial \textbf{w}}$}};
    \draw [->,very thick,dotted] (axis cs:0.5,2,-0.01) -- (axis cs:0.2,1.5,-0.03);
    \draw [->,very thick,dotted] (axis cs:0.5,2,-0.01) -- (axis cs:0.2,3.5,-0.03);
    %\draw [black!50] (axis cs:0,-1,0) -- (axis cs:0,4,0);

  \end{axis}
  \end{scope}
  }
  }
\end{tikzpicture}
\end{center}

\end{frame}

%%%------------------------------------------------------------------------------------------------------------
%%% �ݶ��½��ı���
\begin{frame}{�ݶ��½��IJ�ͬʵ�ַ�ʽ}

\begin{itemize}
\item \textbf{�ݶ��½�}�����ǿ��������ݶȷ������$\textbf{w}$һС����֮��õ����õ�$\textbf{w}$��֮�����¼����ݶȣ������ظ���������

\begin{displaymath}
\textbf{w}_{t+1} = \textbf{w}_t - \alpha \cdot \frac{\partial J(\textbf{w}_t)}{\partial \textbf{w}_t}
\end{displaymath}

����$t$��ʾ���µIJ�����$\alpha$��һ����������ʾ���²����Ĵ�С��$\alpha$��������Ҫ����������е�������$J(\textbf{w}_t)$����ʽ�����˾�����㷨�����ʵ�֡�

\item<2-> \textbf{�����ݶ��½�(Batch Gradient Descent)}��

\begin{displaymath}
J(\textbf{w}_t) = \frac{1}{n} \sum_{i=1}^{n} L(\textbf{x}_i,\hat{\textbf{y}}_i;\textbf{w}_t)
\end{displaymath}

���ַ���ѵ���ȶ�����������ÿ�θ�����Ҫ������ѵ���������б�����Ч�ʵͣ�����$n$�ܴ󣩣����ģ�����Ϻ���ʹ��

\end{itemize}

\end{frame}

%%%------------------------------------------------------------------------------------------------------------
%%% �ݶ��½��ı���
\begin{frame}{�ݶ��½��IJ�ͬʵ�ַ�ʽ(��)}

\begin{itemize}
\item \textbf{����ݶ��½�(Stochastic Gradient Descent)}��

\begin{displaymath}
J(\textbf{w}_t) = L(\textbf{x}_i,\hat{\textbf{y}}_i;\textbf{w}_t)
\end{displaymath}

����������SGD�����л���ѧϰ�Ŀγ��X�����н��ܡ�ÿ�����ѡȡһ�����������ݶȼ���Ͳ������£����µļ�����۵ͣ�������������������������������ѧϰ(online learning)����������������


\vspace{0.3em}

\item<2-> \textbf{С�����ݶ��½�(Mini-batch Gradient Descent)}��

\begin{displaymath}
J(\textbf{w}_t) = \frac{1}{m} \sum_{i=j}^{j+m} L(\textbf{x}_i,\hat{\textbf{y}}_i;\textbf{w}_t)
\end{displaymath}

ÿ�����ʹ�������������в�������(���������ر��)������һ�����з�����������õķ���֮һ

\end{itemize}

\end{frame}

%%%------------------------------------------------------------------------------------------------------------
%%% �����ݶ��½��ĸĽ�
\begin{frame}{һЩ�Ľ�}

\begin{itemize}
\item \textbf{���ֺ͸Ľ�}����߻����ݶȵķ����������ٶȡ�ѵ���ȶ��Եȣ�����googleһ��
    \begin{itemize}
    \item Momentum, Adagrad, Adadelta, RMSprop, Adam, AdaMax, Nadam, AMSGrad�ȵ�
    \item \footnotesize{\url{http://ruder.io/optimizing-gradient-descent}}
    \end{itemize}
\item<2-> \textbf{���л�}�����ģ���ݴ�����Ҫ�ֲ�ʽ���㣬�ݶȸ��µIJ�����Ҫ���
    \begin{itemize}
    \item \textbf{ͬ������}�����м���ڵ���ɼ����ͳһ���ܲ����²�����Ч���ȶ������Dz��жȵ�
    \item \textbf{�첽����}��ÿ���ڵ������ʱ���¡����жȸߣ��������ڽڵ��������ܲ�ͬ����������ʮ���ȶ�
    \end{itemize}
\item<3-> \textbf{����}
    \begin{itemize}
    \item ��������ݶ���ʧ�ͱ�ը�����⣬ʹ���ݶȲü����в����ӵ�
    \item �����������ӣ����Զ��ⲿ֪ʶ��ģ����������������ѵ�����ȶ�
    \end{itemize}
\end{itemize}

\end{frame}

%%%------------------------------------------------------------------------------------------------------------
%%% ��μ����ݶ�
\begin{frame}{��μ����ݶ�? - ��ֵ΢��}

\begin{itemize}
\item \textbf{����һ����������}����μ����ݶ�
\begin{displaymath}
\frac{\partial L(\textbf{w})}{\partial \textbf{w}} = ?
\end{displaymath}

\vspace{0.5em}

\item<2-> \textbf{��ֵ΢��} - �򵥴ֱ��ķ���
\begin{displaymath}
\frac{\partial L(\textbf{w})}{\partial \textbf{w}} = \lim_{\Delta \textbf{w} \to 0} \frac{L(\textbf{w} + \Delta \textbf{w}) - L(\textbf{w} - \Delta \textbf{w}) }{2\Delta \textbf{w}}
\end{displaymath}

�������΢�ֹ�ʽ�����ǿ��Խ�$\textbf{w}$�仯һ�������$\Delta \textbf{w}$��ʾ����֮��$L(\cdot)$�ı仯

    \begin{itemize}
    \item<3-> \textbf{�ŵ������}��������ķdz��򵥣�����ʵ��
    \item<3-> \textbf{ȱ��Ҳ������}��Ч��̫�ͣ����ڸ������硢��������΢��һЩ��ģ�ͻ������޷�ʹ��
    \end{itemize}

\end{itemize}

\end{frame}

%%%------------------------------------------------------------------------------------------------------------
%%% ��μ����ݶ� - ����΢��
\begin{frame}{��μ����ݶ�? - ����΢��}

\begin{itemize}
\item \textbf{����΢��}����������д��΢�ֱ���ʽ�������������ֵ���õ�΢�ֽ�������磬�������±���ʽ
\begin{displaymath}
L(\textbf{w}) = \textbf{x} \cdot \textbf{w} + 2 \textbf{w}^2
\end{displaymath}

\visible<2->{
\vspace{0.5em}
�����ֶ��Ƶ���΢�ֱ���ʽ

\begin{displaymath}
\frac{\partial L(\textbf{w})}{\partial \textbf{w}} = \textbf{x} + 4 \textbf{w}
\end{displaymath}
}

\visible<3->{
\vspace{0.5em}
��󣬴���$\textbf{x} = \begin{pmatrix} 2 \\ -3 \end{pmatrix}$��$\textbf{w} = \begin{pmatrix} -1 \\ 1 \end{pmatrix}$���õ�΢�ֽ��\\

\vspace{1em}

\begin{displaymath}
\frac{\partial L(\textbf{w})}{\partial \textbf{w}} =  \begin{pmatrix} 2 \\ -3 \end{pmatrix} + 4 \begin{pmatrix} -1 \\ 1 \end{pmatrix} = \begin{pmatrix} -2 \\ 1 \end{pmatrix}
\end{displaymath}
}

\end{itemize}

\end{frame}

%%%------------------------------------------------------------------------------------------------------------
%%% ����΢�ֵ���������
\begin{frame}{����΢�ֵ���������}

\begin{itemize}
\item \textbf{Expression Swell}����㺯����΢�ֱ���ʽ��dz�����
	\begin{itemize}
	\item ����ʽ�߳����״洢�͹���
	\item ������Ҫ����΢�ֵ�\alert{���ֵ}��������΢�ֱ���ʽ
	\end{itemize}
\end{itemize}

\vspace{0.5em}

{\small
\begin{tabular} {l | l | l}
���� & ΢�ֱ���ʽ & �����΢�ֱ���ʽ \\ \hline
$x$ & $1$ & $1$ \\ \hline
$x(x+1)$ & $(x+1)+x$ & $2x + 1$ \\ \hline
$x(x+1)$ & $(x+1)(x^2+x+1)$ & $4x^3+6x^2$ \\
$(x^2+x+1)$ & $+x(x^2+x+1)$ & $+4x+1$ \\
                     & $+x(x+1)(2x+1)$ & \\ \hline
$(x^2+x)$ & $(2x+1)(x^2+x+1)$ & $8x^7+28x^6$ \\
$(x^2+x+1)$ & $(x^4+2x^3+2x^2+x+1)$ & $+48x^5+50x^4$ \\
$(x^4+2x^3$ & $+(2x+1)(x^2+x)$ & $+36x^3+18x^2$ \\
$+2x^2+x+1)$ & \ \ $(x^4+2x^3+2x^2+x+1)$ & $+6x+1$ \\
 & $+(x^2+x)(x^2+x+1)$ & \\
 & \ \ $(4x^3+6x^2+4x+1)$ & \\


\end{tabular}
}

\end{frame}

%%%------------------------------------------------------------------------------------------------------------
%%% �Զ�΢��
\begin{frame}{��μ����ݶ�? - �Զ�΢��}

\begin{itemize}
\item \textbf{�Զ�΢��}�����ӵ�΢�ֱ�ɼ򵥵IJ��裬��Щ������ȫ�Զ������������׽��д洢�����㡣�������һ�ַ���ģʽ����������Ҳ����\alert{���򴫲�}˼�룩����������
	\begin{enumerate}
	\item \textbf{ǰ�����}��������������룬������ÿ����������ֵ����Ҳ��������ı�׼ʹ�÷�ʽ
	\item \textbf{�������}��������������������������ÿ���������루���������Ӧ��΢��
	\end{enumerate}
\end{itemize}

\visible<2->{
\vspace{-1em}
\begin{center}
\begin{tikzpicture}
\begin{scope}
\tikzstyle{layernode} = [draw,thick,fill=ugreen!30!white,blur shadow={shadow xshift=1pt,shadow yshift=-1pt}];

\node [anchor=center,layernode,minimum height=4em,minimum width=1em] (layer01) at (0,0) {};
\node [anchor=north west,layernode,minimum height=3em,minimum width=1em] (layer02) at ([xshift=3em]layer01.north east) {};
\node [anchor=south west,layernode,minimum height=3em,minimum width=1em] (layer03) at ([xshift=7em]layer01.south east) {};
\node [anchor=south west,layernode,minimum height=4em,minimum width=1em] (layer04) at ([xshift=11em]layer01.south east) {};
\node [anchor=south west,layernode,minimum height=4em,minimum width=1em] (layer05) at ([xshift=3em]layer04.south east) {};

\node [anchor=east] (input) at ([xshift=-1em]layer01.west){\scriptsize{����}};
\node [anchor=west] (output) at ([xshift=1em]layer05.east){\scriptsize{���}};

\draw [->] ([xshift=-1em]layer01.west) -- ([xshift=-0.1em]layer01.west);
\draw [->] ([xshift=0.1em,yshift=-0.5em]layer01.north east) -- ([xshift=-0.1em,yshift=-0.5em]layer02.north west);
\draw [->] ([xshift=0.1em,yshift=0.5em]layer01.south east) -- ([xshift=-0.1em,yshift=0.5em]layer03.south west);
\draw [->] ([xshift=0.1em,yshift=-0.5em]layer02.north east) -- ([xshift=-0.1em,yshift=-0.5em]layer04.north west);
\draw [->] ([xshift=0.1em,yshift=0.5em]layer03.south east) -- ([xshift=-0.1em,yshift=0.5em]layer04.south west);
\draw [->] ([xshift=0.1em]layer04.east) -- ([xshift=-0.1em]layer05.west);
\draw [->] ([xshift=0.1em]layer05.east) -- ([xshift=1.0em]layer05.east);

\visible<3->{
\draw [->,very thick,ublue] ([xshift=-1em]layer01.west) -- ([xshift=-0.1em]layer01.west);
}
\visible<4->{
\draw [->,very thick,ublue] ([xshift=0.1em,yshift=-0.5em]layer01.north east) -- ([xshift=-0.1em,yshift=-0.5em]layer02.north west);
}
\visible<5->{
\draw [->,very thick,ublue] ([xshift=0.1em,yshift=0.5em]layer01.south east) -- ([xshift=-0.1em,yshift=0.5em]layer03.south west);
}
\visible<6->{
\draw [->,very thick,ublue] ([xshift=0.1em,yshift=-0.5em]layer02.north east) -- ([xshift=-0.1em,yshift=-0.5em]layer04.north west);
\draw [->,very thick,ublue] ([xshift=0.1em,yshift=0.5em]layer03.south east) -- ([xshift=-0.1em,yshift=0.5em]layer04.south west);
\draw [->,very thick,ublue] ([xshift=0.1em]layer04.east) -- ([xshift=-0.1em]layer05.west);
\draw [->,very thick,ublue] ([xshift=0.1em]layer05.east) -- ([xshift=1.0em]layer05.east);
}

\visible<8->{
\draw [<-,very thick,red] ([xshift=-1em,yshift=-0.3em]layer01.west) -- ([xshift=-0.1em,yshift=-0.3em]layer01.west);
\draw [<-,very thick,red] ([xshift=0.1em,yshift=-0.8em]layer01.north east) -- ([xshift=-0.1em,yshift=-0.8em]layer02.north west);
\draw [<-,very thick,red] ([xshift=0.1em,yshift=0.2em]layer01.south east) -- ([xshift=-0.1em,yshift=0.2em]layer03.south west);
\draw [<-,very thick,red] ([xshift=0.1em,yshift=-0.8em]layer02.north east) -- ([xshift=-0.1em,yshift=-0.8em]layer04.north west);
\draw [<-,very thick,red] ([xshift=0.1em,yshift=0.2em]layer03.south east) -- ([xshift=-0.1em,yshift=0.2em]layer04.south west);
\draw [<-,very thick,red] ([xshift=0.1em,yshift=-0.3em]layer04.east) -- ([xshift=-0.1em,yshift=-0.3em]layer05.west);
\draw [<-,very thick,red] ([xshift=0.1em,yshift=-0.3em]layer05.east) -- ([xshift=1.0em,yshift=-0.3em]layer05.east);
}

\visible<7->{
\draw [<-,thin] ([xshift=0.3em,yshift=0.3em]layer04.east) .. controls +(35:1) and +(215:1) .. ([xshift=-2em,yshift=0.3em]layer05.north west) node [pos=1,above] {\scriptsize{ǰ�򣺲�$i$ �����$h_{i}$}};
}
\visible<9->{
\draw [<-,thin] ([xshift=0.3em,yshift=-0.7em]layer04.east) .. controls +(-35:1) and +(145:1) .. ([xshift=-2em,yshift=-0.3em]layer05.south west) node [pos=1,below] {\scriptsize{����$h_{i}$ �����ݶ�$\frac{\partial L}{\partial h_i}$}};
}

\end{scope}
\end{tikzpicture}
\end{center}
}

\vspace{-1em}
\begin{itemize}
\item<10-> �Զ�΢�ֿ�����\alert{����ͼ}ʵ��(TensorFlow�� NiuTensor ��)����������ͼ�����˿γ̵ķ�Χ������������ѧϰ
\end{itemize}

\end{frame}

%%%------------------------------------------------------------------------------------------------------------
%%% ���򴫲� - ����˵��
\begin{frame}{����˵��}

\begin{itemize}
\item ��һ��$K$��������Ϊ��������ȷһ�·���
    \begin{itemize}
    \item �������ÿ���������ж�����ƫ�������$\textbf{b}$��
    \end{itemize}
\end{itemize}

\vspace{-1em}
\begin{center}
\begin{tikzpicture}
\begin{scope}

\def\neuronsep{1}
\tikzstyle{neuronnode} = [minimum size=1.2em,circle,draw,ublue,very thick,inner sep=1pt, fill=white,align=center,drop shadow={shadow xshift=0.1em,shadow yshift=-0.1em}];

%%% layer 1
\foreach \n in {1,...,4}{
    \node [neuronnode] (neuron0\n) at (\n * \neuronsep,0) {};
    \draw [->] ([yshift=-0.8em]neuron0\n.south) -- ([yshift=-0.1em]neuron0\n.south) node [pos=0,below] {\tiny{...}};
}


\begin{pgfonlayer}{background}
\node [rectangle,inner sep=0.2em,fill=red!20] [fit = (neuron01) (neuron04)] (layer01) {};
\node [anchor=east] (layer01label) at (layer01.west) {\scriptsize{��$l-1$}};
\end{pgfonlayer}

%%% layer 2
\foreach \n in {1,...,4}{
    \node [neuronnode] (neuron1\n) at (\n * \neuronsep,3em) {};
}

\foreach \n in {1,...,4}{
    \foreach \m in {1,...,4}{
        \draw [<-] (neuron1\n.south) -- (neuron0\m.north);
    }
}

\begin{pgfonlayer}{background}
\node [rectangle,inner sep=0.2em,fill=ugreen!20] [fit = (neuron11) (neuron14)] (layer02) {};
\node [anchor=east] (layer02label) at (layer02.west) {\scriptsize{��$l$}};
\end{pgfonlayer}

%%% layer 3
\foreach \n in {1,...,4}{
    \node [neuronnode] (neuron2\n) at (\n * \neuronsep,6em) {};
    \draw [<-] ([yshift=0.8em]neuron2\n.north) -- ([yshift=0.0em]neuron2\n.north) node [pos=0,above] {\tiny{...}};
}

\foreach \n in {1,...,4}{
    \foreach \m in {1,...,4}{
        \draw [<-] (neuron2\n.south) -- (neuron1\m.north);
    }
}

\begin{pgfonlayer}{background}
\node [rectangle,inner sep=0.2em,fill=blue!20] [fit = (neuron21) (neuron24)] (layer03) {};
\node [anchor=east] (layer03label) at (layer03.west) {\scriptsize{��$l+1$}};
\end{pgfonlayer}

%%% output layer
\foreach \n in {1,...,4}{
    \node [neuronnode] (neuron3\n) at (\n * \neuronsep,9.4em) {};
    \visible<1-3,5->{
    \draw [<-] ([yshift=0.6em]neuron3\n.north) -- ([yshift=0.0em]neuron3\n.north) node [pos=0,above] {\tiny{output}};
    }
    \visible<4>{
    \draw [<-,red,very thick] ([yshift=0.6em]neuron3\n.north) -- ([yshift=0.0em]neuron3\n.north) node [pos=0,above] {\tiny{output}};
    }
    \draw [->] ([yshift=-0.6em]neuron3\n.south) -- ([yshift=0.0em]neuron3\n.south);
}

\begin{pgfonlayer}{background}
\node [rectangle,inner sep=0.2em,fill=ugreen!20] [fit = (neuron31) (neuron34)] (layer04) {};
\node [anchor=east] (layer04label) at (layer04.west) {\scriptsize{��$L$(���)}};
\end{pgfonlayer}

\visible<2->{
\node [neuronnode,draw=red,fill=red!20!white,inner sep=1pt] (neuron12new) at (2 * \neuronsep,3em) {};
\node [anchor=east] (neuronsamplelabel) at ([yshift=-1em]layer02label.south east) {\alert{\textbf{\tiny{��$l$��, ��$i$����Ԫ}}}};
\draw [->,dashed,very thick,red] ([xshift=-0.2em,yshift=0.2em]neuronsamplelabel.east) .. controls +(30:1) and +(220:1) .. ([xshift=-0em,yshift=-0em]neuron12new.210);
}

\visible<3>{
\foreach \n in {1,...,4}{
\draw [<-,thick,red] (neuron2\n.south) -- (neuron12.north);
}
}

\visible<5->{
\draw [<-,thick,red] (neuron14.south) -- (neuron04.north);
\node [anchor=north] (wlabel) at (layer02.south east) {\alert{\scriptsize{$w_{4,4}^{l}$}}};
}

\visible<3->{
\node [anchor=west,align=left] (line01) at ([xshift=1em,yshift=1em]layer04.east) {\footnotesize{$h_{i}^{k}$����$l$��, ��$i$����Ԫ�����}};
\node [anchor=north west,align=left] (line02) at (line01.south west) {\footnotesize{$\textbf{h}^{k}$����$k$������}};
\node [anchor=north west,align=left] (line03) at (line02.south west) {\footnotesize{$\textbf{s}^{k}$����$k$������Ա任$\textbf{s}^k=\textbf{h}^{k-1}\textbf{w}^k$}};
\node [anchor=north west,align=left] (line04) at (line03.south west) {\footnotesize{$f^{k}$����$k$��ļ����$\textbf{h}^k=f^l(\textbf{s}^k)$}};
}
\visible<4->{
\node [anchor=north west,align=left] (line05) at (line04.south west) {\footnotesize{$\textbf{h}^{K}$�������������}};
}
\visible<5->{
\node [anchor=north west,align=left] (line06) at (line05.south west) {\footnotesize{$w_{j,i}^{k}$����$k-1$����Ԫ$j$��}\\\footnotesize{��$k$����Ԫ$i$������Ȩ��}};
\node [anchor=north west,align=left] (line07) at (line06.south west) {\footnotesize{$\textbf{w}^{k}$����$k-1$�����$k1$���}\\\footnotesize{����Ȩ��}};
}

\end{scope}
\end{tikzpicture}
\end{center}

\vspace{-1.5em}

\visible<6->{
\begin{displaymath} \textrm{���ڵ�}k\textrm{��}: \textbf{h}^k = f^k(\textbf{s}^k) = f^k(\sum_j h_{j}^{k-1}w_{j,i}^k) = f^k(\textbf{h}^{k-1} \textbf{w}^k) \end{displaymath}
}

\end{frame}

%%%------------------------------------------------------------------------------------------------------------
%%% �����ķ��򴫲�
\begin{frame}{���򴫲� - �����}

\begin{itemize}
\item �����(�����׶�)
\end{itemize}

\vspace{-1.0em}
\begin{center}
\begin{tikzpicture}
\begin{scope}
\node [anchor=center,minimum height=1.7em,fill=yellow!20,draw] (h) at (0,0) {$\textbf{h}^{K-1}$};
\node [anchor=west,minimum height=1.7em,fill=blue!20,draw] (s) at ([xshift=5.5em]h.east) {$\textbf{s}^{K}$};
\node [anchor=west,minimum height=1.7em,fill=green!20,draw] (h2) at ([xshift=5.5em]s.east) {$\textbf{h}^{K}$};
\node [anchor=west,minimum height=1.7em,fill=orange!20,draw] (l) at ([xshift=5.5em]h2.east) {$L$};
\draw [->] (h.east) -- (s.west);
\draw [->] (s.east) -- (h2.west);
\draw [->] (h2.east) -- (l.west) node [pos=0.5,above] {\tiny{��ʧ}};

\node [anchor=south west,inner sep=2pt] (step100) at ([xshift=0.5em,yshift=-0.8em]h.north east) {\tiny{$\textbf{s}^K = \textbf{h}^{K-1} \textbf{w}^K$}};
\node [anchor=south west,inner sep=2pt] (step101) at (step100.north west) {\tiny{���Ա任}};

\node [anchor=south west,inner sep=2pt] (step200) at ([xshift=0.5em,yshift=-0.8em]s.north east) {\tiny{$\textbf{h}^K = f^K(\textbf{s}^K)$}};
\node [anchor=south west,inner sep=2pt] (step201) at (step200.north west) {\tiny{�����}};

\node [anchor=south,inner sep=1pt] (outputlabel) at ([yshift=0.0em]h2.north) {\tiny{\textbf{�����}}};

\visible<2->{
\draw[decorate,thick,decoration={brace,mirror,raise=0.4em,amplitude=2mm}] (h.south west) -- (s.south west) node [pos=0.5,below,yshift=-1em] {\scriptsize{\textbf{��һ�׶Σ����Ա任}}};
}
\visible<3->{
\draw[decorate,thick,decoration={brace,mirror,raise=0.4em,amplitude=2mm}] ([xshift=0.2em]s.south west) -- (l.south east) node [pos=0.5,below,yshift=-1em] (step2) {\scriptsize{\textbf{�ڶ��׶Σ������+��ʧ����}}};
}

\visible<4->{
\draw [->,very thick,red] ([yshift=1em,xshift=-0.1em]l.north) -- ([yshift=1em,xshift=0.1em]s.north) node [pos=0.5,above] {\tiny{�������ݶ�\alert{$\frac{\partial L}{\partial \textbf{s}^K} = ?$}}};
\draw [-,very thick,red] ([yshift=0.5em]l.north) -- ([yshift=1.5em]l.north);
\draw [-,very thick,red] ([yshift=0.5em]s.north) -- ([yshift=1.5em]s.north);
}

\end{scope}

\end{tikzpicture}
\end{center}

\begin{itemize}
\item<4-> ���򴫲�����������봫���ݶȣ���������ȿ��ǽ׶ζ�\visible<5->{����$\pi^k = \frac{\partial L}{\partial \textbf{s}^k}$��ʾ��ʧ$L$�ڵ�$k$�㼤������봦���ݶȣ�������ʽ����}

\vspace{-1.5em}
\visible<5->{
\begin{eqnarray}
\pi^K & = & \frac{\partial L}{\partial \textbf{s}^K} \nonumber \\
         & = & \frac{\partial L}{\partial \textbf{h}^K} \cdot \frac{\partial \textbf{h}^K}{\partial \textbf{s}^K}  \nonumber \\
         & = & \frac{\partial L}{\partial \textbf{h}^K} \cdot \frac{\partial f^K(\textbf{s}^K)}{\partial \textbf{s}^K}  \nonumber
%\frac{\partial L}{\partial \textbf{w}^K} & = & \frac{\partial L}{\partial \textbf{h}^K} \cdot \frac{\partial \textbf{h}^K}{\partial \textbf{w}^K} \nonumber \\
%                                                         & \visible<4->{=} & \visible<4->{\frac{\partial L}{\partial \textbf{h}^K} \cdot \frac{\partial f^K(\textbf{h}^{K-1} \textbf{w}^K)}{\partial \textbf{w}^K} \ \ \ \ (\textrm{��Ϊ}\textbf{h}^K=f^K(\textbf{h}^{K-1} \textbf{w}^K))} \nonumber \\
%                                                         & \visible<5->{=} & \visible<5->{\frac{\partial L}{\partial \textbf{h}^K} \cdot \frac{\partial f^K(\textbf{s}^K)}{\partial \textbf{s}^K} \cdot \frac{\partial \textbf{s}^K}{\partial \textbf{w}^K}  \ \ \ (\textrm{��Ϊ}\textbf{s}^K=\textbf{h}^{K-1} \textbf{w}^K)} \nonumber
\end{eqnarray}
}

\end{itemize}

\end{frame}

%%%------------------------------------------------------------------------------------------------------------
%%% �����ķ��򴫲� - �������ӵ�����
\begin{frame}{���򴫲� - �����($\textbf{s}^K$�����ݶ�)}
\begin{center}
\begin{tikzpicture}

\begin{scope}
\node [anchor=center] (factor00) at (0,0) {${\displaystyle \pi^K \ = }$};
\node [anchor=west] (factor01) at (factor00.east) {${\displaystyle \frac{\partial L}{\partial \textbf{h}^K}}$};
\node [anchor=west,inner sep=1pt] (factor02) at (factor01.east) {${\displaystyle \cdot}$};
\node [anchor=west] (factor03) at (factor02.east) {${\displaystyle \frac{\partial f^K(\textbf{s}^K)}{\partial \textbf{s}^K}}$};

\begin{pgfonlayer}{background}
\visible<2-4>{
\node [rectangle,inner sep=0em,fill=red!20] [fit = (factor01)] (p1) {};
}
\visible<3-4>{
\node [rectangle,inner sep=0em,fill=blue!20] [fit = (factor03)] (p2) {};
}
\visible<5->{
\node [circle,inner sep=0em,fill=green!20] [fit = (factor02)] (p3) {};
}
\end{pgfonlayer}

\end{scope}
\end{tikzpicture}
\end{center}

\begin{itemize}
\item<2-> \raisebox{-0.7em}{\tikz{\node [anchor=west,fill=red!20] (factor01) at (factor00.east) {$\frac{\partial L}{\partial \textbf{h}^K}$};}} ��ʾ��ʧ$L$�����������ı仯�ʣ����磬����$L = \frac{1}{2} ||\hat{\textbf{y}} - \textbf{h}^K||^2$����$\frac{\partial L}{\partial \textbf{h}^K} = \hat{\textbf{y}} - \textbf{h}^K$
\item<3-> \raisebox{-0.7em}{\tikz{\node [anchor=west,fill=blue!20] (factor01) at (factor00.east) {$\frac{\partial f^K(\textbf{s}^K)}{\partial \textbf{s}^K}$};}} ��ʾ�������������Լ�������ı仯�ʣ����磬����$f(\textbf{s}) = \frac{1}{1+\exp(-\textbf{s})}$����$\frac{\partial f(\textbf{s})}{\partial \textbf{s}} = f(\textbf{s})(1-f(\textbf{s}))$
\item<4-> ����������ֱ������$s^K$�����ݶ��൱������ʧ����΢��($\frac{\partial L}{\partial \textbf{h}^K}$)�ͼ����΢��($\frac{\partial f^K(\textbf{s}^K)}{\partial \textbf{s}^K}$) �ij˻�\visible<5->{��ע���������в������ǵ�Ԫ����������������Ԫ�˷�}

\end{itemize}

\visible<4->{
\vspace{-0.5em}
\begin{center}
\begin{tikzpicture}
\begin{scope}
\node [anchor=west,minimum height=1.7em,fill=blue!20,draw] (s) at (0,0) {$\textbf{s}^{K}$};
\node [anchor=west,minimum height=1.7em,fill=green!20,draw] (h2) at ([xshift=5.5em]s.east) {$\textbf{h}^{K}$};
\node [anchor=west,minimum height=1.7em,fill=orange!20,draw] (l) at ([xshift=5.5em]h2.east) {$L$};
\draw [->] (s.east) -- (h2.west);
\draw [->] (h2.east) -- (l.west);

\draw [->,very thick,red] ([yshift=1em,xshift=-0.1em]l.north) -- ([yshift=1em,xshift=0.1em]h2.north) node [pos=0.5,above] {\tiny{���ݶ�\alert{$\frac{\partial L}{\partial \textbf{h}^K} = ?$}}};
\draw [->,very thick,red] ([yshift=1em,xshift=-0.1em]h2.north) -- ([yshift=1em,xshift=0.1em]s.north) node [pos=0.5,above] {\tiny{���ݶ�\alert{$\frac{\partial f^K(\textbf{s}^K)}{\partial \textbf{s}^K} = ?$}}};
\draw [-,very thick,red] ([yshift=0.5em]l.north) -- ([yshift=1.5em]l.north);
\draw [-,very thick,red] ([yshift=0.5em]h2.north) -- ([yshift=1.5em]h2.north);
\draw [-,very thick,red] ([yshift=0.5em]s.north) -- ([yshift=1.5em]s.north);

\end{scope}

\end{tikzpicture}
\end{center}
}

\end{frame}

%%%------------------------------------------------------------------------------------------------------------
%%% �����ķ��򴫲� - �� dL/dw
\begin{frame}{���򴫲� - �����($\textbf{h}^{K-1}$�����ݶ�)}

\begin{itemize}
\item �Ѿ��õ�$\textbf{s}^K$�����ݶ�\visible<2->{�����������������}
	\begin{enumerate}
	\item<2-> ������ʧ$L$���ڵ�$K$���������$\textbf{w}^K$���ݶȣ�$\frac{\partial L}{\partial \textbf{w}^K}$
	\item<2-> ������ʧ$L$���ڵ�$K$������$\textbf{h}^{K-1}$���ݶȣ�$\frac{\partial L}{\partial \textbf{h}^{K-1}}$
	\end{enumerate}
\end{itemize}

\vspace{-0.8em}
\begin{center}
\begin{tikzpicture}
\begin{scope}
\node [anchor=center,minimum height=1.7em,fill=yellow!20,draw] (h) at (0,0) {$\textbf{h}^{K-1}$};
\node [anchor=west,minimum height=1.7em,fill=blue!20,draw] (s) at ([xshift=5.5em]h.east) {$\textbf{s}^{K}$};
\node [anchor=west,minimum height=1.7em,fill=green!20,draw] (h2) at ([xshift=5.5em]s.east) {$\textbf{h}^{K}$};
\node [anchor=west,minimum height=1.7em,fill=orange!20,draw] (l) at ([xshift=5.5em]h2.east) {$L$};
\draw [->] (h.east) -- (s.west);
\draw [->] (s.east) -- (h2.west);
\draw [->] (h2.east) -- (l.west) node [pos=0.5,above] {\tiny{��ʧ}};

\node [anchor=south west,inner sep=2pt] (step100) at ([xshift=0.5em,yshift=-0.8em]h.north east) {\tiny{$\textbf{s}^K = \textbf{h}^{K-1} \textbf{w}^K$}};

\node [anchor=south west,inner sep=2pt] (step200) at ([xshift=0.5em,yshift=-0.8em]s.north east) {\tiny{$\textbf{h}^K = f^K(\textbf{s}^K)$}};

\node [anchor=south,inner sep=1pt] (outputlabel) at ([yshift=0.0em]h2.north) {\tiny{\textbf{�����}}};
\node [anchor=south west] (slabel) at ([yshift=1em,xshift=0.3em]s.north) {\scriptsize{\textbf{\alert{�Ѿ��õ���$\pi^K = \frac{\partial L}{\partial \textbf{s}^K}$}}}};
\draw [->,red] ([yshift=0.3em]slabel.south) .. controls +(south:0.5) and +(north:0.5) .. ([xshift=0.5em]s.north);

\visible<2->{
\draw [->,very thick,red] ([yshift=1em,xshift=-0.1em]s.north) -- ([yshift=1em,xshift=0.1em]h.north) node [pos=0.5,above] {\tiny{\alert{$\frac{\partial L}{\partial \textbf{w}^K} = ?$, $\frac{\partial L}{\partial \textbf{h}^{K-1}} = ?$}}};
\draw [-,very thick,red] ([yshift=0.5em]h.north) -- ([yshift=1.5em]h.north);
\draw [-,very thick,red] ([yshift=0.5em]s.north) -- ([yshift=1.5em]s.north);
}

\end{scope}

\end{tikzpicture}
\end{center}

\begin{itemize}
\item<3-> ����$\textbf{s}^K = \textbf{h}^{K-1} \textbf{w}^K$������$\pi^K = \frac{\partial L}{\partial \textbf{s}^K}$�Ѿ���⣬���Եõ�(��ҪһЩ��ѧ���������Դ�����֪ʶ���Ƶ�һ��!)��

\vspace{-1.2em}

\begin{eqnarray}
\frac{\partial L}{\partial \textbf{w}^K}      & = & [\textbf{h}^{K-1}]^T \pi^K \nonumber \\
\frac{\partial L}{\partial \textbf{h}^{K-1}} & = & \pi^K  [\textbf{w}^K]^T\nonumber
\end{eqnarray}

���$[\textbf{A}]^T$��ʾ$\textbf{A}$��ת�ã�$\pi^K  [\textbf{w}^K]^T$��ʾ����$\pi^K$\alert{�����}$\textbf{w}^K$��ת��
\end{itemize}

\end{frame}

%%%------------------------------------------------------------------------------------------------------------
%%% ����ķ��򴫲�
\begin{frame}{���򴫲� - ����}
\begin{itemize}
\item ������������$k$��$\textbf{h}^k = f^k(\textbf{s}^k) = f^k(\textbf{h}^{k-1}\textbf{w}^k)$��������������������ݶ�$\pi^k=\frac{\partial L}{\partial \textbf{h}^{k}}$����Ҫ
	\begin{enumerate}
	\item ������ʧ$L$���ڵ�$k$���������$\textbf{w}^k$���ݶȣ�$\frac{\partial L}{\partial \textbf{w}^k}$
	\item ������ʧ$L$���ڵ�$k$������$\textbf{h}^{k-1}$���ݶȣ�$\frac{\partial L}{\partial \textbf{h}^{k-1}}$
	\end{enumerate}
\item<2-> ֱ��������һҳ�ķ��������Խ�$\pi^k=\frac{\partial L}{\partial \textbf{h}^{k}} \frac{\partial f^k(\textbf{s}^k)}{\partial \textbf{s}^{k}}$���򴫲�
\vspace{-0.0em}
\begin{eqnarray}
\frac{\partial L}{\partial \textbf{w}^k}      & = & [\textbf{h}^{k-1}]^T \pi^k \nonumber \\
\frac{\partial L}{\partial \textbf{h}^{k-1}} & = & \pi^k  [\textbf{w}^k]^T\nonumber
\end{eqnarray}

\end{itemize}

\visible<3->{
\begin{center}
\begin{tikzpicture}
\begin{scope}
\node [anchor=center,draw,fill=red!20,minimum height=1.8em,minimum width=2.5em] (h) at (0,0) {$\textbf{h}^{k-1}$};
\node [anchor=west,draw,fill=blue!20,minimum height=1.8em,minimum width=2.5em] (s) at ([xshift=6em]h.east) {$\textbf{s}^{k}$};
\node [anchor=west,draw,fill=green!20,minimum height=1.8em,minimum width=2.5em] (h2) at ([xshift=6em]s.east) {$\textbf{h}^{k}$};
\node [anchor=east] (prev) at ([xshift=-2em]h.west) {...};
\node [anchor=west] (next) at ([xshift=2em]h2.east) {...};
\draw [->,thick] ([xshift=0.1em]prev.east) -- ([xshift=-0.1em]h.west);
\draw [->,thick] ([xshift=0.1em]h.east) -- ([xshift=-0.1em]s.west) node [pos=0.5,below] {\tiny{$\textbf{s}^k = \textbf{h}^{k-1}\textbf{w}^k$}};
\draw [->,thick] ([xshift=0.1em]s.east) -- ([xshift=-0.1em]h2.west) node [pos=0.5,below] {\tiny{$\textbf{h}^k = f(\textbf{s}^{k})$}};
\draw [->,thick] ([xshift=0.1em]h2.east) -- ([xshift=-0.1em]next.west);

\visible<4->{
\draw [<-,thick,red] ([xshift=0.1em,yshift=0.4em]h2.east) -- ([xshift=-0.1em,yshift=0.4em]next.west) node [pos=0.8,above] {\tiny{���򴫲�}};
}

\visible<5->{
\draw [<-,thick,red] ([xshift=0.1em,yshift=0.4em]s.east) -- ([xshift=-0.1em,yshift=0.4em]h2.west) node [pos=0.5,above] {\tiny{���򴫲�}};
}

\visible<6->{
\draw [<-,thick,red] ([xshift=0.1em,yshift=0.4em]h.east) -- ([xshift=-0.1em,yshift=0.4em]s.west) node [pos=0.5,above] {\tiny{���򴫲�}};
}

\visible<7->{
\draw [->,thick,red,dashed] ([yshift=-0.1em]h.south) -- ([yshift=-1em]h.south) -- ([yshift=-1em]h2.south) -- ([yshift=-0.1em]h2.south);
\node [anchor=north,red] (recur) at ([yshift=-1em]s.south) {\scriptsize{$k=k-1$�ظ���������}};
}

\visible<4->{
\node [anchor=south] (h2label) at (h2.north) {$\frac{\partial L}{\partial \textbf{h}^{k}}$};
}

\visible<5->{
\node [anchor=south] (slabel) at (s.north) {$\pi^k = \frac{\partial L}{\partial \textbf{s}^{k}}$};
}

\visible<6->{
\node [anchor=south] (hlabel) at (h.north) {$\frac{\partial L}{\partial \textbf{h}^{k-1}}$, $\frac{\partial L}{\partial \textbf{w}^{k}}$};
}

\end{scope}
\end{tikzpicture}
\end{center}
}

\end{frame}

%%%------------------------------------------------------------------------------------------------------------
%%% ���򴫲�ʵ��
\begin{frame}{���򴫲���ʵ��}
\begin{itemize}
\item ����һ����������������ʵ�ַ��򴫲�
\end{itemize}

\begin{tcolorbox}
[bicolor,sidebyside,righthand width=3.5cm,size=title,frame engine=empty,
 colback=blue!10!white,colbacklower=black!5!white]
 {\scriptsize
\begin{tabbing}
\texttt{XTensor x, y, gold, h[5], w[5], s[5];} \\
\texttt{XTensor dh[5], dw[5], ds[5];} \\
\texttt{...} // ǰ����� \\
\texttt{h[0] = x;} \\
\texttt{y = h[4];} \\

\visible<2->{
\texttt{} \\
\texttt{CrossEntropyBackward(dldh[4], y, gold);} \\
\texttt{SoftmaxBackward(y, s[4], dh[4], ds[4]);}\\
\texttt{MMul(h[3], {\tiny X\_TRANS}, ds[4], {\tiny X\_NOTRANS}, dw[4]);}\\
}

\visible<3->{
\texttt{} \\
\texttt{dh[2] = dh[3];}\\
\texttt{dh[1] = dh[3];}\\
}

\visible<4->{
\texttt{} \\
\texttt{ReluBackward(h[2], s[2], dh[2], ds[2]);}\\
\texttt{MMul(h[1], {\tiny X\_TRANS}, ds[2], {\tiny X\_NOTRANS}, dw[2]);}\\
\texttt{} \\
\texttt{ReluBackward(h[1], s[1], dh[1], ds[1]);}\\
\texttt{MMul(h[0], {\tiny X\_TRANS}, ds[1], {\tiny X\_NOTRANS}, dw[1]);}\\
}

\visible<5->{
\texttt{} \\
\texttt{for(unsigned i = 0; i < 5; i++)\{} \\
\texttt{} \ \ \ \ ... // ͨ��\alert{\texttt{dw[i]}}���ʲ������ݶ�\\
\texttt{\}}
}

\end{tabbing}
}
\tcblower
\begin{center}
\begin{tikzpicture}


\node [anchor=south,draw,rounded corners,inner sep=2pt,minimum width=8em,minimum height=1.2em,fill=red!30!white,blur shadow={shadow xshift=1pt,shadow yshift=-1pt}] (h1) at (0,0) {\tiny{x (input)}};
\node [anchor=south,draw,rounded corners,inner sep=2pt,minimum width=8em,minimum height=1.2em,fill=green!30!white,blur shadow={shadow xshift=1pt,shadow yshift=-1pt}] (h2) at ([yshift=1.5em]h1.north) {\tiny{h1 = Relu(x * w1)}};
\node [anchor=south,draw,rounded corners,inner sep=2pt,minimum width=8em,minimum height=1.2em,fill=green!30!white,blur shadow={shadow xshift=1pt,shadow yshift=-1pt}] (h3) at ([yshift=1.5em]h2.north) {\tiny{h2 = Relu(h1 * w2)}};
\node [anchor=south,draw,rounded corners,inner sep=2pt,minimum width=8em,minimum height=1.2em,fill=green!30!white,blur shadow={shadow xshift=1pt,shadow yshift=-1pt}] (h4) at ([yshift=1.5em]h3.north) {\tiny{h3 = h2 + h1}};

\visible<1-3>{\draw [->,thick] (h1.north) -- (h2.south);}
\visible<1-3>{\draw [->,thick] (h2.north) -- (h3.south);}
\visible<1-2>{\draw [->,thick] (h3.north) -- (h4.south);}
\visible<1-2>{\draw [->,thick,rounded corners] (h2.east) -- ([xshift=0.5em]h2.east) -- ([xshift=0.5em,yshift=0.5em]h3.north east) -- ([xshift=-2em,yshift=0.5em]h3.north east) -- ([xshift=-2em,yshift=1.5em]h3.north east);}

\visible<4->{\draw [<-,very thick,red] (h1.north) -- (h2.south);}
\visible<4->{\draw [<-,very thick,red] (h2.north) -- (h3.south);}
\visible<3->{\draw [<-,very thick,red] (h3.north) -- (h4.south);}
\visible<3->{\draw [<-,very thick,red,rounded corners] (h2.east) -- ([xshift=0.5em]h2.east) -- ([xshift=0.5em,yshift=0.5em]h3.north east) -- ([xshift=-2em,yshift=0.5em]h3.north east) -- ([xshift=-2em,yshift=1.5em]h3.north east);}

\node [anchor=south,draw,rounded corners,inner sep=2pt,minimum width=8.0em,minimum height=1.2em,fill=red!30!white,blur shadow={shadow xshift=1pt,shadow yshift=-1pt}] (slayer) at ([yshift=1.5em]h4.north) {\tiny{h4 = Softmax(h3 * w4) (output)}};
\node [anchor=south] (losslabel) at (slayer.north) {\scriptsize{\textbf{Cross Entropy Loss}}};

\visible<1>{\draw [->,thick] (h4.north) -- (slayer.south);}
\visible<2->{\draw [<-,very thick,red] (h4.north) -- (slayer.south);}

\end{tikzpicture}
\end{center}
\end{tcolorbox}

\end{frame}

%%%------------------------------------------------------------------------------------------------------------
%%% �Զ�΢�ֵ�ʵ��
\begin{frame}{���򵥵�ʵ��}
\begin{itemize}
\item ���˵��ǣ����ڼ������е��������ѧϰ��ܶ�ʵ�����Զ�΢�֣�һ���������Ը㶨
\end{itemize}

\begin{tcolorbox}
[bicolor,sidebyside,righthand width=3.5cm,size=title,frame engine=empty,
 colback=blue!10!white,colbacklower=black!5!white]
 {\scriptsize
\begin{tabbing}
\texttt{XTensor x, loss, gold, h[5], w[5], b[5];} \\
\texttt{...} \\

\texttt{} \\
\texttt{h[1] = Relu(MMul(x, w[1]) + b[1]);} \\
\texttt{h[2] = Relu(MMul(h[1], w[2]) + b[2]);} \\
\texttt{h[3] = HardTanH(h[2]);} \\
\texttt{h[4] = Softmax(MMul(h[3], w[3]));} \\
\texttt{loss = CrossEntropy(h[4], gold);} \\

\texttt{} \\
\texttt{XNet net;}\\
\alert{\texttt{net.Backward(loss);} //һ�д���ʵ���Զ�΢��}\\

\texttt{} \\
\texttt{for(unsigned i = 0; i < 5; i++)\{} \\
\texttt{} \ \ \ \ ... // ͨ��\alert{\texttt{w[i].grad}}���ʲ������ݶ�\\
\texttt{\}}

\end{tabbing}
}
\tcblower
\begin{center}
\begin{tikzpicture}


\node [anchor=south,draw,rounded corners,inner sep=2pt,minimum width=8em,minimum height=1.0em,fill=red!30!white,blur shadow={shadow xshift=1pt,shadow yshift=-1pt}] (h1) at (0,0) {\tiny{x (input)}};
\node [anchor=south,draw,rounded corners,inner sep=2pt,minimum width=8em,minimum height=1.0em,fill=green!30!white,blur shadow={shadow xshift=1pt,shadow yshift=-1pt}] (h2) at ([yshift=1.0em]h1.north) {\tiny{h1 = Relu(x * w1 + b1)}};
\node [anchor=south,draw,rounded corners,inner sep=2pt,minimum width=8em,minimum height=1.0em,fill=green!30!white,blur shadow={shadow xshift=1pt,shadow yshift=-1pt}] (h3) at ([yshift=1.0em]h2.north) {\tiny{h2 = Relu(h1 * w2 + b2)}};
\node [anchor=south,draw,rounded corners,inner sep=2pt,minimum width=8em,minimum height=1.0em,fill=green!30!white,blur shadow={shadow xshift=1pt,shadow yshift=-1pt}] (h4) at ([yshift=1.0em]h3.north) {\tiny{h3 = HardTanh(h2)}};

\draw [->,thick] (h1.north) -- (h2.south);
\draw [->,thick] (h2.north) -- (h3.south);
\draw [->,thick] (h3.north) -- (h4.south);

\node [anchor=south,draw,rounded corners,inner sep=2pt,minimum width=8.0em,minimum height=1.0em,fill=red!30!white,blur shadow={shadow xshift=1pt,shadow yshift=-1pt}] (slayer) at ([yshift=1.0em]h4.north) {\tiny{h4 = Softmax(h3 * w4) (output)}};
\node [anchor=south] (losslabel) at (slayer.north) {\scriptsize{\textbf{Cross Entropy Loss}}};

\draw [->,thick] (h4.north) -- (slayer.south);

\end{tikzpicture}
\end{center}
\end{tcolorbox}

\begin{itemize}
\item ����������Զ�΢��ʵ��Ҳ���Բο�TensorFlow�� PyTorch�ȹ���
\end{itemize}

\end{frame}

%%%------------------------------------------------------------------------------------------------------------
%%% ǰ�������̼�����ֵ�ù�ע������
\begin{frame}{ǰ����㼰��������}
\begin{itemize}
\item \alert{ǰ�����}ʵ���Ͼ������繹���Ĺ��̣������ֳ��÷�ʽ
    \begin{itemize}
    \item \textbf{��̬ͼ}(��PyTorch��NiuTensor)��д�꺯������ʽ��ǰ����㼴��ɣ����ڵ���
    \item \textbf{��̬ͼ}(��TensorFlow)����������ʽ��ɺ󣬲����ܵõ�ǰ�����������Ҫ���Ե���һ��Forward���������Ǽ���ͼ���Խ�������Ż���ִ��Ч�ʽϸ�
    \end{itemize}
\item<2-> ����һЩ���ѧϰϵͳʵ�ֵ����⣬ֵ�ù�ע��������Щ�������˱��γ̵ķ�Χ
    \begin{itemize}
    \item \textbf{�ֲ�ʽѵ��}�����ڸ���ģ�͵ĺ�������ѵ������Ҫ���ö���豸��������࿨��ͬʱѵ��
    \item \textbf{�;��ȼ���}��Ϊ�����Ч�ʿ��Բ��ð뾫�Ȼ��߶��������м���
    \item \textbf{ģ��ѹ��}���������࣬����ѹ��ģ�ͣ�ʹ��ģ�����ڴ洢ͬʱ���ϵͳ����Ч��
    \item \textbf{ѵ�������ͳ���ѡ��}����ͬ����������Ҫ��ͬ��ѵ�����ԣ������������ã��Ӻܶ࣬��Ҫ���۾���
    \end{itemize}
\end{itemize}
\end{frame}

%%%------------------------------------------------------------------------------------------------------------
\section{������ģ��}

%%%------------------------------------------------------------------------------------------------------------
%%% outline: neural language modeling
\begin{frame}{��������}

\vspace{6em}
\begin{tcolorbox}[enhanced,size=normal,left=2mm,right=1mm,colback=red!5!white,colframe=red!75!black,drop fuzzy shadow]
{\Large
\textbf{��ν���Ԫ����Ӧ�õ�NLP��}

\vspace{0.4em}
\textbf{- ����ģ�͵������罨ģ}
}
\end{tcolorbox}

\vspace{2em}
\begin{center}
\begin{tikzpicture}
\end{tikzpicture}
\end{center}

\end{frame}

%%%------------------------------------------------------------------------------------------------------------
\subsection{ǰ����ѭ������ע����������}

%%%------------------------------------------------------------------------------------------------------------
%%% ��NLP���������ܸ�ʲô��
\begin{frame}{��Ȼ���Դ�������������}
\begin{itemize}
\item �����緽������Ȼ���Դ���(NLP)�������µ�˼·
\end{itemize}

\begin{tabular} {l | l}
\textbf{��ͳ����ͳ�Ƶķ���} & \textbf{���ѧϰ����} \\ \hline
����\alert{��ɢ}�ռ�ı�ʾģ�� & ����\alert{����}�ռ�ı�ʾģ�� \\
NLP�����\alert{�����ṹ}���� & �������ṹ���裬\alert{�˵���}ѧϰ \\
\alert{��������}Ϊ�� & ����������������Ҫ\alert{�������} \\
�����������\alert{�洢����Դ} & ģ�ʹ洢���С����\alert{������}
\end{tabular}

\vspace{0em}

\begin{itemize}
\item<2-> ����ģ������Ҳ����ʹ�����ѧϰ����(Ч���dz���)
    \begin{itemize}
    \item ����ģ��Ҫ�ش���������������һ���ʴ��ĺû�
    \item ���Ի���һ�µڶ����ᵽ��$n$Ԫ�﷨ģ��
    \end{itemize}
    \vspace{0.5em}
    \begin{displaymath}
    \textbf{P}(w_0 w_1 ... w_m) = ?
    \end{displaymath}

\end{itemize}

\visible<3->{
\begin{tcolorbox}[enhanced,size=normal,left=2mm,right=1mm,colback=blue!5!white,colframe=blue!75!black,drop fuzzy shadow]
{\Large
\textbf{��ζԴʴ������ɸ��ʽ��н�ģ��}
}
\end{tcolorbox}
}

\end{frame}

%%%------------------------------------------------------------------------------------------------------------
%%% n-gram����ģ��
\begin{frame}{$n$-gram����ģ��}
\begin{itemize}
\item \textbf{��ʽ����}
\begin{eqnarray}
\textrm{P}(w_1 w_2 ... w_m)  & = & \textrm{P}(w_1) \textrm{P}(w_2|w_1) \textrm{P}(w_3 | w_1 w_2) ... \nonumber \\
                                               &    & \textrm{P}(w_m|w_1...w_{m-1}) \nonumber
\end{eqnarray}
\item<2-> \textbf{��ͳ$n$-gram����ģ��}����ǰ�ʽ�������ǰ��$n-1$����
\begin{eqnarray}
\textrm{P}(w_1 w_2 ... w_m)  & = & \textrm{P}(w_1) \textrm{P}(w_2|w_1) \textrm{P}(w_3 | w_1 w_2) ... \nonumber \\
                                               &    & \textrm{P}(w_m|\underbrace{w_{m-n+1}...w_{m-1}}_{\text{ǰ��$n-1$����}}) \nonumber
\end{eqnarray}
\vspace{-1.0em}
\ \ \ \ \ \ ����
\begin{displaymath}
\textrm{P}(w_m | w_{m-n+1} ... w_{m-1})  = \frac{\textrm{count}(w_{m-n+1}...w_{m})}{\textrm{count}(w_{m-n+1}...w_{m-1})}
\end{displaymath}
\ \ \ \ \ \ $\textrm{count}(\cdot)$��ʾ��ѵ��������ͳ�Ƶ�Ƶ��
\end{itemize}
\end{frame}

%%%------------------------------------------------------------------------------------------------------------
%%% n-gram lm => neural lm
\begin{frame}{$n$-gram���ɸ��ʵ������罨ģ}
\begin{itemize}
\item ��ͳ��$n$-gram����ģ��ʵ���Ͼ���һ����ѯ������$w_{m-n+1} ... w_{m}$��ѯ$n$-gram����$\textrm{P}(w_m | w_{m-n+1} ... w_{m-1})$
    \begin{itemize}
    \item ���ű���������һ��$w_{m-n+1} ... w_{m}$��\alert{��ɢ��ʾ}
    \item ����$n$������\alert{����ϡ��}�����dz����أ���Ϊ�������$n$-gram��û������
    \item ��ΪҪά��$n$-gram���������洢���Ĵ�
    \end{itemize}
\item<2-> ��һ��˼·��ֱ�Ӷ�$\textrm{P}(w_m | w_{m-n+1} ... w_{m-1})$���������ռ佨ģ�������庯��$g$�����������$w_{m-n+1} ... w_{m}$��
    \begin{displaymath}
    g(w_{m-n+1} ... w_{m}) \approx \textrm{P}(w_m | w_{m-n+1} ... w_{m-1})
    \end{displaymath}



\item<3-> ��ߴ����Եķ�����ǰ��������(FNN)����ģ��
    \begin{itemize}
    \item �����еľ��䣬���ִ�������ģ�͵���Ʋ�����ԶӰ��
    \end{itemize}

    \textbf{A Neural Probabilistic Language Model}\\
    \textbf{Bengio et al., 2003, Journal of Machine Learning Research 3: 1137-1155}
\end{itemize}
\end{frame}

%%%------------------------------------------------------------------------------------------------------------
%%% FNNLM architecture
\begin{frame}{ǰ������������ģ��(Bengio et al., 2003)}
\begin{itemize}
\item ��4-gram����ģ��Ϊ��
\end{itemize}

\vspace{-1em}
\begin{center}
\begin{tikzpicture}
\begin{scope}
\node [anchor=west] (w0) at (0,0) {\footnotesize{$w_{i-3}$}};
\node [anchor=west] (w1) at ([xshift=2em]w0.east) {\footnotesize{$w_{i-2}$}};
\node [anchor=west] (w2) at ([xshift=2em]w1.east) {\footnotesize{$w_{i-1}$}};
\node [anchor=north] (index0) at ([yshift=0.5em]w0.south) {\tiny(index)};
\node [anchor=north] (index1) at ([yshift=0.5em]w1.south) {\tiny(index)};
\node [anchor=north] (index2) at ([yshift=0.5em]w2.south) {\tiny(index)};
\node [anchor=south,draw,inner sep=3pt] (e0) at ([yshift=1em]w0.north) {\tiny{$e_0=w_{i-3} \textbf{C}$}};
\node [anchor=south,draw,inner sep=3pt] (e1) at ([yshift=1em]w1.north) {\tiny{$e_1=w_{i-2} \textbf{C}$}};
\node [anchor=south,draw,inner sep=3pt] (e2) at ([yshift=1em]w2.north) {\tiny{$e_2=w_{i-1} \textbf{C}$}};
\node [anchor=south,draw,minimum width=9em,inner sep=3pt] (h0) at ([yshift=1.5em]e1.north) {\tiny{$h_0=\textrm{Tanh}([e_0,e_1,e_2] \textbf{H} + \textbf{d})$}};
\node [anchor=south,draw,minimum width=9em,inner sep=3pt] (h1) at ([yshift=1.5em]h0.north) {\tiny{$y=\textrm{Softmax}(h_0 \textbf{U})$}};
\node [anchor=south] (ylabel) at ([yshift=1em]h1.north) {\footnotesize{$\textrm{P}(w_i|w_{i-3}w_{i-2}w_{i-1})$}};

\draw [->] ([yshift=0.1em]w0.north) -- ([yshift=-0.1em]e0.south);
\draw [->] ([yshift=0.1em]w1.north) -- ([yshift=-0.1em]e1.south);
\draw [->] ([yshift=0.1em]w2.north) -- ([yshift=-0.1em]e2.south);
\draw [->] ([yshift=0.1em]e0.north) -- ([xshift=-2em,yshift=-0.1em]h0.south);
\draw [->] ([yshift=0.1em]e1.north) -- ([yshift=-0.1em]h0.south);
\draw [->] ([yshift=0.1em]e2.north) -- ([xshift=2em,yshift=-0.1em]h0.south);
\draw [->] ([yshift=0.1em]h0.north) -- ([yshift=-0.1em]h1.south);
\draw [->] ([yshift=0.1em]h1.north) -- ([yshift=-0.1em]ylabel.south);

\visible<6->{
\draw [->,dashed,red,thick] ([xshift=1em,yshift=0.1em]e1.north) -- ([xshift=1em,yshift=-0.1em]h1.south);
\draw [->,dashed,red,thick] ([xshift=-1em,yshift=0.1em]e0.north) .. controls +(north:2) and +(south:1) .. ([xshift=-3em,yshift=-0.1em]h1.south);
\draw [->,dashed,red,thick] ([xshift=1em,yshift=0.1em]e2.north) .. controls +(north:2) and +(south:1) .. ([xshift=3em,yshift=-0.1em]h1.south);
}

\begin{pgfonlayer}{background}
\visible<2->{
\node [rectangle,inner sep=0.1em,fill=ugreen!20!white] [fit = (w0) (index0)] (wordbox0) {};
\node [rectangle,inner sep=0.1em,fill=ugreen!20!white] [fit = (w1) (index1)] (wordbox1) {};
\node [rectangle,inner sep=0.1em,fill=ugreen!20!white] [fit = (w2) (index2)] (wordbox2) {};
}
\end{pgfonlayer}

\visible<3->{
\node [anchor=south,draw,inner sep=3pt,fill=blue!20!white] (e0) at ([yshift=1em]w0.north) {\tiny{$e_0=w_{i-3} \textbf{C}$}};
\node [anchor=south,draw,inner sep=3pt,fill=blue!20!white] (e1) at ([yshift=1em]w1.north) {\tiny{$e_1=w_{i-2} \textbf{C}$}};
\node [anchor=south,draw,inner sep=3pt,fill=blue!20!white] (e2) at ([yshift=1em]w2.north) {\tiny{$e_2=w_{i-1} \textbf{C}$}};
}
\visible<5->{
\node [anchor=south,draw,minimum width=9em,inner sep=3pt,fill=orange!20!white] (h0) at ([yshift=1.5em]e1.north) {\tiny{$h_0=\textrm{Tanh}([e_0,e_1,e_2] \textbf{H} + \textbf{d})$}};
\node [anchor=south,draw,minimum width=9em,inner sep=3pt,fill=orange!20!white] (h1) at ([yshift=1.5em]h0.north) {\tiny{$y=\textrm{Softmax}(h_0 \textbf{U})$}};
}

\visible<2->{
\node [anchor=north west] (indexlabel0) at ([yshift=-0.5em,xshift=-1.2em]index0.south west) {\scriptsize{{\color{ugreen} \textbf{One-hot��ʾ}}}};
\node [anchor=north west] (indexlabel1) at ([yshift=0.3em]indexlabel0.south west) {\scriptsize{ÿ������һ���ʻ����С��0-1������ʾ��}};
\node [anchor=north west] (indexlabel2) at ([yshift=0.3em]indexlabel1.south west) {\scriptsize{��һλΪ1������Ϊ0�����磺}};
\node [anchor=north west] (indexlabel3) at ([yshift=0.0em]indexlabel2.south west) {\scriptsize{$(0,0,{\red 1},0,0,0,0,0,0,0,0,0)$}};
\node [anchor=north west] (indexlabel4) at ([xshift=1em,yshift=0.0em]indexlabel3.south west) {\scriptsize{�ʱ��е�3����}};
\draw [->] ([xshift=1.2em,yshift=-0.2em]indexlabel4.north west) -- ([xshift=1.2em,yshift=0.3em]indexlabel4.north west);
}

\visible<3->{
\node [anchor=west] (embedinglabel0) at ([xshift=1em,yshift=-1em]e2.east) {\scriptsize{{\blue \textbf{�ʵķֲ�ʽ��ʾ}}}};
\node [anchor=north west] (embedinglabel1) at ([yshift=0.3em]embedinglabel0.south west) {\scriptsize{�ʵ�0-1��ʾ��һ������$\textbf{C}$���������}};
\node [anchor=north west] (embedinglabel2) at ([yshift=0.3em]embedinglabel1.south west) {\scriptsize{��$\textbf{C}$����һ����ѯ��}};
}

\visible<4->{
\node [anchor=north west] (wordvector) at ([yshift=-1em]embedinglabel2.south west) {\tiny{$(0,0,{\red 1},...)$}};
\node [anchor=west] (timeslabel) at ([xshift=-0.3em]wordvector.east) {\footnotesize{$\times$}};
\node [anchor=north west,inner sep=2pt] (embeddingmatrix) at ([xshift=1em]wordvector.north east) {\tiny{$\begin{pmatrix} 0 & 1 & 3 \\ .2 & -1 & .3 \\ 1 & 7 & .3 \\ ... \end{pmatrix}$}};
\node [anchor=south,inner sep=1pt] (wordvectorlabel) at (wordvector.north) {\scriptsize{$w_{i-1}$}};
\node [anchor=south,inner sep=1pt] (embeddingmatrixlabel) at (embeddingmatrix.north) {\scriptsize{$\textbf{C}$}};
\node [anchor=north west] (selectedlabel) at ([yshift=-2em]wordvector.south west) {\scriptsize{�ڰ�$\textbf{C}$���������������(i.e., $e_{i-1}$)}};

\begin{pgfonlayer}{background}
\visible<4->{
\node [anchor=north west,fill=blue!20!white,minimum height=0.6em,minimum width=5.0em] (selected) at ([yshift=-1.3em]embeddingmatrix.north west) {};
}
\end{pgfonlayer}
\draw [->] ([xshift=0.15em,yshift=0.3em]wordvector.south) .. controls +(south:0.3) and +(west:0.5) .. (selected.west);
}

\visible<5->{
\node [anchor=south west] (hiddenlabel0) at ([yshift=5em]embedinglabel0.north west) {\scriptsize{{\color{orange} \textbf{���������}}}};
\node [anchor=north west] (hiddenlabel1) at ([yshift=0.3em]hiddenlabel0.south west) {\scriptsize{$[e_0,e_1,e_2]$��ʾ����������������һ��}};
\node [anchor=north west] (hiddenlabel2) at ([yshift=0.3em]hiddenlabel1.south west) {\scriptsize{֮�󾭹��������磬���ͨ��Softmax���}};
\node [anchor=north west] (hiddenlabel3) at ([yshift=0.3em]hiddenlabel2.south west) {\scriptsize{ע�⣬$h_0\textbf{U}$�õ����дʵı�ʾ(����)��}};
\node [anchor=north west] (hiddenlabel4) at ([yshift=0.3em]hiddenlabel3.south west) {\scriptsize{Softmaxȷ������ʻ���ϵ�һ���ֲ�}};
}

\visible<6->{
\node [anchor=south west] (directlabel0) at ([yshift=1em]hiddenlabel0.north west) {\scriptsize{\alert{\textbf{�ײ����ϲ��ֱ������(��ѡ)}}}};
}

\end{scope}
\end{tikzpicture}
\end{center}

\end{frame}

%%%------------------------------------------------------------------------------------------------------------
%%% FNNLM implementation
\begin{frame}{ǰ������������ģ��(FNN LM)��ʵ��}

\begin{itemize}
\item ʵ�ַdz��򵥣����д���
    \begin{itemize}
    \item ϸ��1����batchingʱ���԰�$w[i]$������չ�������������
    \item ϸ��2��TanHһ�����HardTanHʵ�֣���ΪTanH�������
    \end{itemize}
\end{itemize}

\begin{tcolorbox}
[bicolor,sidebyside,righthand width=3.8cm,size=title,frame engine=empty,
 colback=blue!10!white,colbacklower=black!5!white]
 {\scriptsize
\begin{tabbing}
\texttt{XTensor w[3], e[3], h0, y;} \\
\texttt{XTensor C, H, d, U;} \\
\texttt{...}\\

\texttt{} \\
\texttt{for(unsigned i = 0; i < 3; i++)} \\
\texttt{\ \ \ \ e[i] = MMul(w[i], C);}\\
\texttt{e01 = Concatenate(e[0], e[1], -1);}\\
\texttt{e = Concatenate(e01, e[2], -1);}\\

\texttt{} \\
\texttt{h0 = TanH(MMul(e, H) + d);}\\
\texttt{y = Softmax(MMul(h0, U));}\\

\texttt{} \\
\texttt{for(unsigned k = 0; k < size; k++)\{} \\
\texttt{} \ \ \ \ ... // \alert{\texttt{y}}�ĵ�$k$Ԫ�ر�ʾ $\textrm{P}(w|...)$\\
\texttt{} \ \ \ \ ... // $w$Ϊ�ʻ�����$k$����\\
\texttt{\}}

\end{tabbing}
}
\tcblower
\begin{center}
\begin{tikzpicture}
\begin{scope}
\node [anchor=west] (w0) at (0,0) {\scriptsize{$w_{i-3}$}};
\node [anchor=west] (w1) at ([xshift=0.5em]w0.east) {\scriptsize{$w_{i-2}$}};
\node [anchor=west] (w2) at ([xshift=0.5em]w1.east) {\scriptsize{$w_{i-1}$}};
\node [anchor=north] (index0) at ([yshift=0.5em]w0.south) {\tiny(index)};
\node [anchor=north] (index1) at ([yshift=0.5em]w1.south) {\tiny(index)};
\node [anchor=north] (index2) at ([yshift=0.5em]w2.south) {\tiny(index)};
\node [anchor=south,draw,inner sep=3pt,align=left] (e0) at ([yshift=1.0em]w0.north) {\tiny{$e_0:$}\\\tiny{$w_{i-3} \textbf{C}$}};
\node [anchor=south,draw,inner sep=3pt,align=left] (e1) at ([yshift=1.0em]w1.north) {\tiny{$e_1:$}\\\tiny{$w_{i-2} \textbf{C}$}};
\node [anchor=south,draw,inner sep=3pt,align=left] (e2) at ([yshift=1.0em]w2.north) {\tiny{$e_2:$}\\\tiny{$w_{i-1} \textbf{C}$}};
\node [anchor=south,draw,minimum width=9em,inner sep=3pt] (h0) at ([yshift=1.5em]e1.north) {\tiny{$h_0=\textrm{Tanh}([e_0,e_1,e_2] \textbf{H} + \textbf{d})$}};
\node [anchor=south,draw,minimum width=9em,inner sep=3pt] (h1) at ([yshift=1.5em]h0.north) {\tiny{$y=\textrm{Softmax}(h_0 \textbf{U})$}};
\node [anchor=south] (ylabel) at ([yshift=1em]h1.north) {\scriptsize{$\textrm{P}(w_i|w_{i-3}w_{i-2}w_{i-1})$}};

\draw [->] ([yshift=0.1em]w0.north) -- ([yshift=-0.1em]e0.south);
\draw [->] ([yshift=0.1em]w1.north) -- ([yshift=-0.1em]e1.south);
\draw [->] ([yshift=0.1em]w2.north) -- ([yshift=-0.1em]e2.south);
\draw [->] ([yshift=0.1em]e0.north) -- ([xshift=-2em,yshift=-0.1em]h0.south);
\draw [->] ([yshift=0.1em]e1.north) -- ([yshift=-0.1em]h0.south);
\draw [->] ([yshift=0.1em]e2.north) -- ([xshift=2em,yshift=-0.1em]h0.south);
\draw [->] ([yshift=0.1em]h0.north) -- ([yshift=-0.1em]h1.south);
\draw [->] ([yshift=0.1em]h1.north) -- ([yshift=-0.1em]ylabel.south);
\end{scope}
\end{tikzpicture}
\end{center}
\end{tcolorbox}
\vspace{-0.5em}
\footnotesize{ע: size��ʾ�ʻ����С}

\end{frame}

%%%------------------------------------------------------------------------------------------------------------
%%% ������ģ�͸����Ǵ�����ʲô
\begin{frame}{�����Խ�ģ������}

\begin{itemize}
\item Bengio el al. (2003)���д����۵�����
    \begin{enumerate}
    \item ������ÿһ�㾿��ѧ����ʲô \\
    �ʻ㡢�䷨����������һЩ֪ʶ����ν��ͣ�
    \item ����IJ����������� - 10�㡢20�㡢100������� \\
    \# of layers: 10 $\to$ 20 $\to$ 100 $\to$ 1000
    \item ����(�������ز��С)���ѡ�� - ��ͬ�������������\\
    ���ʵķֲ�ʽ��ʾά�ȶ��ã�\\
    ������ã�\\
    ��������ѡ��\\
    ...
    \end{enumerate}
\item<2-> ��FNN LM�õ�������
    \begin{itemize}
    \item ���¶������ʲô - �Ǵʵ����һ�����һ��ʵ������
    \item �����������Ժܺõı�ʾ����֮���(�̾���)����
    \item $n$-gram�����ɸ��ʿ���ʹ�������ռ亯����������������ϡ�����⣬ģ�Ͳ�����Ҫ��¼������$n$-gram
    \end{itemize}
\end{itemize}

\end{frame}

%%%------------------------------------------------------------------------------------------------------------
%%% ѭ��������
\begin{frame}{ѭ��������(Recurrent Neural Networks)}

\begin{itemize}
\item FNN LM��Ȼ��Ч�����Ǻʹ�ͳ��$n$-gram LMһ������Ҫ����\alert{����������}����
\begin{center}
\begin{tikzpicture}
\begin{scope}
\node [anchor=west] (w0) at (0,0) {$w_1$};
\node [anchor=west] (w1) at ([xshift=0.5em]w0.east) {$w_2$};
\node [anchor=west] (w2) at ([xshift=0.5em]w1.east) {$...$};
\node [anchor=west] (w3) at ([xshift=0.5em]w2.east) {$w_{m-n+1}$};
\node [anchor=west] (w4) at ([xshift=0.5em]w3.east) {$...$};
\node [anchor=west,fill=green!20!white] (w5) at ([xshift=0.5em]w4.east) {$w_{m}$};
\draw [->,thick,ublue] (w5.south).. controls +(210:0.5) and +(-30:0.5) .. (w3.south);
\draw [->,thick,red] (w5.north).. controls +(150:1) and +(30:1) .. (w1.north);
\draw [->,very thick,ublue] ([xshift=-5em,yshift=1em]w0.west) -- ([xshift=-6.5em,yshift=1em]w0.west) node [pos=0,right] {\scriptsize{����}};
\draw [->,very thick,red] ([xshift=-5em,yshift=-0.5em]w0.west) -- ([xshift=-6.5em,yshift=-0.5em]w0.west) node [pos=0,right] {\scriptsize{������}};

\end{scope}
\end{tikzpicture}
\end{center}
\item<2-> �ܷ�ֱ�Ӷ�ԭʼ���⽨ģ�������庯��$g$�����������$w_{1} ... w_{m}$��
    \vspace{-0.5em}
    \begin{displaymath}
    g(w_{1} ... w_{m}) \approx \textrm{P}(w_m | w_{1} ... w_{m-1})
    \end{displaymath}
\item<3-> \textbf{ѭ��������(RNNs)}���ԺܺõĽ���������⣬���Ҳ���ɹ���Ӧ�������Խ�ģ����
	\begin{itemize}
	\item ������ÿ���ʵ����ɶ������Ѿ����ɵ����д�
	\item ���ڲ�ͬλ�õĴʵ����ɸ��ʶ�������ͬһ����������
	\end{itemize}
	
        \textbf{Recurrent Neural Network Based Language Model}\\
        \textbf{Mikolov et al., 2010, In Proc. of Interspeech, 1045-1048}
\end{itemize}

\end{frame}

%%%------------------------------------------------------------------------------------------------------------
%%% ѭ��������Ľṹ
\begin{frame}{ѭ����Ԫ}

\begin{itemize}
\item ����������$(\textbf{x}_0,\textbf{x}_1,...,\textbf{x}_t,...)$������$\textbf{x}_t$��ʾ�����е�$t$��Ԫ�أ�Ҳ������\alert{ʱ��$t$}���롣������Ӧ�����������$(\textbf{y}_0,\textbf{y}_1,...,\textbf{y}_t,...)$�� ��ѭ���������У�ÿ��ʱ�̵������������ͬһ��\alert{ѭ����Ԫ}��������\visible<2->{��������ģ�ͣ�һ�ּ򵥵Ľṹ��}

\visible<2->{
{\small
\begin{tcolorbox}
[bicolor,sidebyside,righthand width=4.3cm,size=title,frame engine=empty,
 colback=blue!10!white,colbacklower=black!5!white]

\begin{eqnarray}
\textbf{y}_t & = & \textrm{Softmax}(\textbf{h}_t \textbf{V}) \nonumber \\
\textbf{h}_t & = & \textrm{TanH}(\textbf{x}_t \textbf{U} + \textbf{h}_{t-1} \textbf{W}) \nonumber
\end{eqnarray}
\footnotesize{$\textbf{h}_t$: $t$ʱ�̵�����״̬\\
$\textbf{h}_{t-1}$: $t-1$ʱ�̵�����״̬\\
$\textbf{V}, \textbf{U}, \textbf{W}$: ����
}
\tcblower
\begin{center}
\begin{tikzpicture}
\begin{scope}
\node [anchor=west,inner sep=3pt,minimum width=8em] (h) at (0,0) {\tiny{$\textbf{h}_t  =  \textrm{TanH}(\textbf{x}_t \textbf{U} + \textbf{h}_{t-1} \textbf{W})$}};
\node [anchor=south west,inner sep=3pt] (r) at ([yshift=-0.2em]h.north west) {\tiny{ѭ����Ԫ:}};
\begin{pgfonlayer}{background}
\node [rectangle,draw,inner sep=0em,fill=green!20!white] [fit = (r) (h)] (rbox) {};
\end{pgfonlayer}
\node [anchor=south,draw,minimum width=8em,fill=green!20!white] (y) at ([yshift=1.5em]rbox.north) {\tiny{$\textbf{y}_t = \textrm{Softmax}(\textbf{h}_t \textbf{V})$}};
\node [anchor=south,inner sep=2pt] (output) at ([yshift=1em]y.north) {\scriptsize{$\textbf{y}_t$}};
\node [anchor=north,inner sep=2pt] (input) at ([yshift=-1em]h.south) {\scriptsize{$\textbf{x}_t$}};
\draw [->,thick] (input.north) -- ([yshift=-0.1em]rbox.south);
\draw [->,thick] ([yshift=0.1em]rbox.north) -- ([yshift=-0.1em]y.south) node [pos=0.5,left] {\tiny{$\textbf{h}_t$}};
\draw [->,thick] ([yshift=0.1em]y.north) -- (output.south);
\draw [->,thick] ([xshift=0.1em]rbox.east) -- ([xshift=1em]rbox.east) node [pos=1,above] {\tiny{$\textbf{h}_t$}};
\draw [->,thick] ([xshift=-1em]rbox.west) -- ([xshift=-0.1em]rbox.west) node [pos=0,above] {\tiny{$\textbf{h}_{t-1}$}};

\end{scope}
\end{tikzpicture}
\end{center}
\end{tcolorbox}
}
}

\item<3-> \textbf{�������ѭ����}$t$ʱ�̵�״̬��$t-1$ʱ��״̬�ĺ�����������̿��Բ��ϱ�ִ��
\end{itemize}

\end{frame}

%%%------------------------------------------------------------------------------------------------------------
%%% ѭ��������ġ����䡱
\begin{frame}{ѭ���������``����''}
\begin{itemize}
\item ѭ����������Լ������ⳤ�ȵ���ʷ����˿��Էdz��ʺϴ��������������У�������Ȼ���Ծ���
    \begin{itemize}
    \item ע�⣺$\textbf{h}_{t-1}$���Ա����ݵ�����״̬
    \end{itemize}
\end{itemize}

\vspace{-1em}
\begin{eqnarray}
\textbf{h}_t & = & \textrm{TanH}(\textbf{x}_t \textbf{U} + \alert{\textbf{h}_{t-1}} \textbf{W}) \nonumber \\
\visible<2->{
\textbf{h}_{t+1} & = & \textrm{TanH}(\textbf{x}_{t+1} \textbf{U} + \textbf{h}_{t} \textbf{W}) \nonumber \\
                 & = & \textrm{TanH}(\textbf{x}_{t+1} \textbf{U} + \textrm{TanH}(\textbf{x}_t \textbf{U} + \alert{\textbf{h}_{t-1}} \textbf{W}) \textbf{W}) \nonumber \\
                 }
\visible<3->{
\textbf{h}_{t+2} & = & \textrm{TanH}(\textbf{x}_{t+2} \textbf{U} + \textbf{h}_{t+1} \textbf{W}) \nonumber \\
                 & = & \textrm{TanH}(\textbf{x}_{t+2} \textbf{U} + \nonumber \\
                 &   & \textrm{TanH}(\textbf{x}_{t+1} \textbf{U} + \textrm{TanH}(\textbf{x}_t \textbf{U} + \alert{\textbf{h}_{t-1}} \textbf{W}) \textbf{W}) \textbf{W}) \nonumber
                 }
\end{eqnarray}

\vspace{-1em}
\begin{center}
\begin{tikzpicture}
\begin{scope}
\tikzstyle{rnnnode} = [draw,inner sep=5pt,fill=green!30!white,blur shadow={shadow xshift=1pt,shadow yshift=-1pt}]
\node [anchor=west,rnnnode] (node1) at (0,0) {\scriptsize{RNN Cell}};
\visible<2->{
\node [anchor=west,rnnnode] (node2) at ([xshift=4.5em]node1.east) {\scriptsize{RNN Cell}};
}
\visible<3->{
\node [anchor=west,rnnnode] (node3) at ([xshift=4.5em]node2.east) {\scriptsize{RNN Cell}};
}
\node [anchor=north] (x1) at ([yshift=-1em]node1.south) {\footnotesize{$\textbf{x}_{t}$}};
\visible<2->{
\node [anchor=north] (x2) at ([yshift=-1em]node2.south) {\footnotesize{$\textbf{x}_{t+1}$}};
}
\visible<3->{
\node [anchor=north] (x3) at ([yshift=-1em]node3.south) {\footnotesize{$\textbf{x}_{t+2}$}};
}
\node [anchor=south] (h1) at ([yshift=1em]node1.north) {\footnotesize{$\textbf{h}_{t}$}};
\visible<2->{
\node [anchor=south] (h2) at ([yshift=1em]node2.north) {\footnotesize{$\textbf{h}_{t+1}$}};
}
\visible<3->{
\node [anchor=south] (h3) at ([yshift=1em]node3.north) {\footnotesize{$\textbf{h}_{t+2}$}};
}
\draw [->,thick] ([xshift=-1.0em]node1.west)--([xshift=-0.1em]node1.west) node [pos=0,left] {\scriptsize{$\alert{\textbf{h}_{t-1}}$}};
\visible<3->{
\draw [->,thick] ([xshift=0.1em]node3.east)--([xshift=1.0em]node3.east) node [pos=1,right] {\scriptsize{$\textbf{h}_{t+2}$}};
}
\draw [->,thick] ([xshift=0.1em]node1.east)--([xshift=-0.1em]node2.west) node [pos=0.5,above] {\tiny{$\textbf{h}_{t}(\alert{\textbf{h}_{t-1}})$}};
\visible<2->{
\draw [->,thick] ([xshift=0.1em]node2.east)--([xshift=-0.1em]node3.west) node [pos=0.5,above] {\tiny{$\textbf{h}_{t+1}(\textbf{h}_{t}(\alert{\textbf{h}_{t-1}}))$}};
}
\draw [->,thick] (x1.north)--([yshift=-0.1em]node1.south);
\visible<2->{
\draw [->,thick] (x2.north)--([yshift=-0.1em]node2.south);
}
\visible<3->{
\draw [->,thick] (x3.north)--([yshift=-0.1em]node3.south);
}
\draw [->,thick] ([yshift=0.1em]node1.north)--(h1.south);
\visible<2->{
\draw [->,thick] ([yshift=0.1em]node2.north)--(h2.south);
}
\visible<3->{
\draw [->,thick] ([yshift=0.1em]node3.north)--(h3.south);
}

\end{scope}
\end{tikzpicture}
\end{center}

\end{frame}

%%%------------------------------------------------------------------------------------------------------------
%%% ����ѭ�������������ģ��
\begin{frame}{����ѭ�������������ģ��(RNN LM)}
\begin{itemize}
\item ѭ����������Ա�ֱ����������ģ��
    \begin{itemize}
    \item<2-> ��FNN LM���ƣ����ȰѴʴ�one-hot��ʾת���ɷֲ�ʽ��ʾ
    \item<3-> $t$ʱ��Ԥ��$\textrm{P}(x_{t+1}|x_1...x_{t})$
    \item<4-> ���Ե��Ӹ���IJ�
    \end{itemize}
\end{itemize}

\visible<2->{
\begin{center}
\begin{tikzpicture}
\begin{scope}
\tikzstyle{rnnnode} = [draw,inner sep=5pt,minimum width=4em,minimum height=1.5em,fill=green!30!white,blur shadow={shadow xshift=1pt,shadow yshift=-1pt}]
\visible<3->{
\node [anchor=west,rnnnode] (node11) at (0,0) {\scriptsize{RNN Cell}};
\node [anchor=west,rnnnode] (node12) at ([xshift=2em]node11.east) {\scriptsize{RNN Cell}};
\node [anchor=west,rnnnode] (node13) at ([xshift=2em]node12.east) {\scriptsize{RNN Cell}};
\node [anchor=west,rnnnode] (node14) at ([xshift=2em]node13.east) {\scriptsize{RNN Cell}};
}
\node [anchor=north,rnnnode,fill=red!30!white] (e1) at ([yshift=-1.2em]node11.south) {\tiny{$e_1=w_1\textbf{C}$}};
\node [anchor=north,rnnnode,fill=red!30!white] (e2) at ([yshift=-1.2em]node12.south) {\tiny{$e_2=w_2\textbf{C}$}};
\node [anchor=north,rnnnode,fill=red!30!white] (e3) at ([yshift=-1.2em]node13.south) {\tiny{$e_3=w_3\textbf{C}$}};
\node [anchor=north,rnnnode,fill=red!30!white] (e4) at ([yshift=-1.2em]node14.south) {\tiny{$e_4=w_4\textbf{C}$}};
\node [anchor=north] (w1) at ([yshift=-1em]e1.south) {\footnotesize{$w_1$}};
\node [anchor=north] (w2) at ([yshift=-1em]e2.south) {\footnotesize{$w_2$}};
\node [anchor=north] (w3) at ([yshift=-1em]e3.south) {\footnotesize{$w_3$}};
\node [anchor=north] (w4) at ([yshift=-1em]e4.south) {\footnotesize{$w_4$}};

\draw [->,thick] ([yshift=0.1em]w1.north)--([yshift=-0.1em]e1.south);
\draw [->,thick] ([yshift=0.1em]w2.north)--([yshift=-0.1em]e2.south);
\draw [->,thick] ([yshift=0.1em]w3.north)--([yshift=-0.1em]e3.south);
\draw [->,thick] ([yshift=0.1em]w4.north)--([yshift=-0.1em]e4.south);

\draw [->,thick] ([yshift=0.1em]e1.north)--([yshift=-0.1em]node11.south);
\draw [->,thick] ([yshift=0.1em]e2.north)--([yshift=-0.1em]node12.south);
\draw [->,thick] ([yshift=0.1em]e3.north)--([yshift=-0.1em]node13.south);
\draw [->,thick] ([yshift=0.1em]e4.north)--([yshift=-0.1em]node14.south);

\visible<4->{
\node [anchor=south,rnnnode] (node21) at ([yshift=1.5em]node11.north) {\scriptsize{RNN Cell}};
\node [anchor=south,rnnnode] (node22) at ([yshift=1.5em]node12.north) {\scriptsize{RNN Cell}};
\node [anchor=south,rnnnode] (node23) at ([yshift=1.5em]node13.north) {\scriptsize{RNN Cell}};
\node [anchor=south,rnnnode] (node24) at ([yshift=1.5em]node14.north) {\scriptsize{RNN Cell}};

\node [anchor=south,rnnnode,fill=blue!30!white] (node31) at ([yshift=1.5em]node21.north) {\scriptsize{Softmax($\cdot$)}};
\node [anchor=south,rnnnode,fill=blue!30!white] (node32) at ([yshift=1.5em]node22.north) {\scriptsize{Softmax($\cdot$)}};
\node [anchor=south,rnnnode,fill=blue!30!white] (node33) at ([yshift=1.5em]node23.north) {\scriptsize{Softmax($\cdot$)}};
\node [anchor=south,rnnnode,fill=blue!30!white] (node34) at ([yshift=1.5em]node24.north) {\scriptsize{Softmax($\cdot$)}};
}

\visible<3>{
\node [anchor=south,rnnnode,fill=blue!30!white] (node21) at ([yshift=1.5em]node11.north) {\scriptsize{Softmax($\cdot$)}};
\node [anchor=south,rnnnode,fill=blue!30!white] (node22) at ([yshift=1.5em]node12.north) {\scriptsize{Softmax($\cdot$)}};
\node [anchor=south,rnnnode,fill=blue!30!white] (node23) at ([yshift=1.5em]node13.north) {\scriptsize{Softmax($\cdot$)}};
\node [anchor=south,rnnnode,fill=blue!30!white] (node24) at ([yshift=1.5em]node14.north) {\scriptsize{Softmax($\cdot$)}};

\draw [->,thick] ([yshift=0.1em]node21.north)--([yshift=-0.1em]node31.south) node[pos=1,above] {\scriptsize{$\textrm{P}(w_2)$}};
\draw [->,thick] ([yshift=0.1em]node22.north)--([yshift=-0.1em]node32.south) node[pos=1,above] {\scriptsize{$\textrm{P}(w_3|w_2)$}};
\draw [->,thick] ([yshift=0.1em]node23.north)--([yshift=-0.1em]node33.south) node[pos=1,above] {\scriptsize{$\textrm{P}(w_4|w_2 w_3)$}};
\draw [->,thick] ([yshift=0.1em]node24.north)--([yshift=-0.1em]node34.south) node[pos=1,above] {\scriptsize{$\textrm{P}(w_5|w_2 w_3 w_4)$}};
}

\visible<4->{
\draw [->,thick] ([yshift=0.1em]node31.north)--([yshift=1em]node31.north) node[pos=1,above] {\scriptsize{$\textrm{P}(w_2)$}};
\draw [->,thick] ([yshift=0.1em]node32.north)--([yshift=1em]node32.north) node[pos=1,above] {\scriptsize{$\textrm{P}(w_3|w_2)$}};
\draw [->,thick] ([yshift=0.1em]node33.north)--([yshift=1em]node33.north) node[pos=1,above] {\scriptsize{$\textrm{P}(w_4|w_2 w_3)$}};
\draw [->,thick] ([yshift=0.1em]node34.north)--([yshift=1em]node34.north) node[pos=1,above] {\scriptsize{$\textrm{P}(w_5|w_2 w_3 w_4)$}};

\draw [->,thick] ([yshift=0.1em]node21.north)--([yshift=-0.1em]node31.south);
\draw [->,thick] ([yshift=0.1em]node22.north)--([yshift=-0.1em]node32.south);
\draw [->,thick] ([yshift=0.1em]node23.north)--([yshift=-0.1em]node33.south);
\draw [->,thick] ([yshift=0.1em]node24.north)--([yshift=-0.1em]node34.south);

\draw [->,thick] ([xshift=-1em]node21.west)--([xshift=-0.1em]node21.west);
\draw [->,thick] ([xshift=0.1em]node21.east)--([xshift=-0.1em]node22.west);
\draw [->,thick] ([xshift=0.1em]node22.east)--([xshift=-0.1em]node23.west);
\draw [->,thick] ([xshift=0.1em]node23.east)--([xshift=-0.1em]node24.west);
\draw [->,thick] ([xshift=0.1em]node24.east)--([xshift=1em]node24.east);
}

\visible<3->{
\draw [->,thick] ([yshift=0.1em]node11.north)--([yshift=-0.1em]node21.south);
\draw [->,thick] ([yshift=0.1em]node12.north)--([yshift=-0.1em]node22.south);
\draw [->,thick] ([yshift=0.1em]node13.north)--([yshift=-0.1em]node23.south);
\draw [->,thick] ([yshift=0.1em]node14.north)--([yshift=-0.1em]node24.south);

\draw [->,thick] ([xshift=-1em]node11.west)--([xshift=-0.1em]node11.west);
\draw [->,thick] ([xshift=0.1em]node11.east)--([xshift=-0.1em]node12.west);
\draw [->,thick] ([xshift=0.1em]node12.east)--([xshift=-0.1em]node13.west);
\draw [->,thick] ([xshift=0.1em]node13.east)--([xshift=-0.1em]node14.west);
\draw [->,thick] ([xshift=0.1em]node14.east)--([xshift=1em]node14.east);
}

\end{scope}
\end{tikzpicture}
\end{center}
}

\end{frame}

%%%------------------------------------------------------------------------------------------------------------
%%% ѭ����Ԫ����ơ��ݶ���ʧ��ѵ��������
\begin{frame}{��һ��������}
\begin{itemize}
\item \textbf{ѭ����Ԫ���}��ѭ����Ԫ����һ�������������ǰʱ�̵��������һʱ�̵�״̬�����ɵ�ǰʱ�̵�״̬
    \begin{displaymath}
    \textbf{h}_t = g(\textbf{x}_t, \textbf{h}_{t-1}; \theta)
    \end{displaymath}
    �ܶ��ַ�ʽ���$g(\cdot)$����������LSTM��GRU��
\item<2-> \textbf{�ݶ���ʧ/��ը}���������б䳤���ڷ��򴫲�ʱѭ����������������ľֲ��ݶ���˼��㣬��ᵼ��\alert{�ݶ���ʧ/��ը����}
    \begin{displaymath}
    \underbrace{0.2 \times 0.3 \times ... \times 0.2 \times 0.1}_{\text{100��}} \approx 0
    \end{displaymath}
    \vspace{-0.8em}
    \begin{itemize}
    \item ���Կ����ݶȲü��������ݶȵĴ�С
    \item Ҳ��������short-cut connection����в�����
    \end{itemize}
\item<2-> \textbf{ѵ��}�������Զ�΢�֣��ⲻ�Ǹ������� :)
\end{itemize}
\end{frame}

%%%------------------------------------------------------------------------------------------------------------
%%% ��ע��������
\begin{frame}{��ע��������(Self-Attention)}

\begin{itemize}
\item RNN LMЧ���ܺã����ǵ����й���,�ʻ�֮����Ϣ����·�����������׳����ݶ���ʧ���ݶȱ�ը�����⡣
\vspace{0.5em}
\begin{center}
\begin{tikzpicture}
\begin{scope}
\node [anchor=west] (w0) at (0,0) {$w_1$};
\node [anchor=west] (w1) at ([xshift=0.5em]w0.east) {$w_2$};
\node [anchor=west] (w2) at ([xshift=0.5em]w1.east) {$w_3$};
\node [anchor=west] (w3) at ([xshift=0.5em]w2.east) {$...$};
\node [anchor=west] (w4) at ([xshift=0.5em]w3.east) {$w_{m-1}$};
\node [anchor=west,fill=green!20!white] (w5) at ([xshift=0.5em]w4.east) {$w_{m}$};
\draw [->,thick,red] (w1.north).. controls +(130:0.5) and +(50:0.5) .. (w0.north);
\draw [->,thick,red] (w2.north).. controls +(130:0.5) and +(50:0.5) .. (w1.north);
\draw [->,thick,red] ([yshift=0.2em]w3.north).. controls +(130:0.5) and +(50:0.5) .. (w2.north);
\draw [->,thick,red] (w4.north).. controls +(130:0.5) and +(50:0.5) .. ([yshift=0.2em]w3.north);
\draw [->,thick,red] (w5.north).. controls +(130:0.5) and +(50:0.5) .. (w4.north);
\draw [->,very thick,red] ([xshift=-5em]w0.west) -- ([xshift=-6.5em]w0.west) node [pos=0,right] {\scriptsize{��Ϣ����}};


\end{scope}

\end{tikzpicture}
\end{center}
\item<2-> �ܷ񽫲�ͬλ��֮��Ĵʻ����Ϣ���ݵľ�������Ϊ1��


\begin{center}
\begin{tikzpicture}
\begin{scope}
\node [anchor=west] (w0) at (0,-2) {$w_1$};
\node [anchor=west] (w1) at ([xshift=0.5em]w0.east) {$w_2$};
\node [anchor=west] (w2) at ([xshift=0.5em]w1.east) {$w_3$};
\node [anchor=west] (w3) at ([xshift=0.5em]w2.east) {$...$};
\node [anchor=west] (w4) at ([xshift=0.5em]w3.east) {$w_{m-1}$};
\node [anchor=west,fill=green!20!white] (w5) at ([xshift=0.5em]w4.east) {$w_{m}$};
\draw [->,thick,red] (w5.north).. controls +(100:0.8) and +(50:0.8) .. (w0.north);
\draw [->,thick,red] (w5.north).. controls +(110:0.7) and +(50:0.7) .. (w1.north);
\draw [->,thick,red] (w5.north).. controls +(120:0.6) and +(50:0.6) .. ([yshift=0.2em]w3.north);
\draw [->,thick,red] (w5.north).. controls +(130:0.5) and +(50:0.5) .. (w4.north);
\draw [->,very thick,red] ([xshift=-5em]w0.west) -- ([xshift=-6.5em]w0.west) node [pos=0,right] {\scriptsize{��Ϣ����}};


\end{scope}

\end{tikzpicture}
\end{center}
\item<3-> \textbf{��ע��������(Self-Attention)}���ԺܺõĽ���������������⣬�ڳ��������Խ�ģ����ȡ���˺ܺõ�Ч��
	\begin{itemize}
	\item ����ֵı�ʾ���в�ͬλ��֮��ĸ��ӹ�ϵ
	\item ����ѵ�������Ч��
	\end{itemize}
	
        \textbf{Attention Is All You Need}\\
        \textbf{Vaswani et al., 2017, In Proc. of Neural Information Processing Systems, 6000-6010}
\end{itemize}

\end{frame}

%%%------------------------------------------------------------------------------------------------------------
%%% Transformer architecture
\begin{frame}{Transformer����ģ��(Vaswani et al., 2017)}
\begin{itemize}
\item һ���򵥵�����
\end{itemize}

\vspace{-2em}
\begin{center}
\begin{tikzpicture}
\begin{scope}
\node [anchor=west] (w0) at (0,0) {\footnotesize{$w_{0}$}};
\node [anchor=west] (w1) at ([xshift=4em]w0.east) {\footnotesize{$w_{1}$}};
\node [anchor=west] (w2) at ([xshift=4em]w1.east) {\footnotesize{$w_{2}$}};
\node [anchor=west] (w3) at ([xshift=4em]w2.east) {\footnotesize{$w_{3}$}};
\node [anchor=north] (index0) at ([yshift=0.5em]w0.south) {\tiny(index)};
\node [anchor=north] (index1) at ([yshift=0.5em]w1.south) {\tiny(index)};
\node [anchor=north] (index2) at ([yshift=0.5em]w2.south) {\tiny(index)};
\node [anchor=north] (index3) at ([yshift=0.5em]w3.south) {\tiny(index)};
\node [anchor=south,draw,inner sep=3pt] (e0) at ([yshift=1em]w0.north) {\tiny{$e_0=w_{0} \textbf{C}$}};
\node [anchor=south,draw,inner sep=3pt] (e1) at ([yshift=1em]w1.north) {\tiny{$e_1=w_{1} \textbf{C}$}};
\node [anchor=south,draw,inner sep=3pt] (e2) at ([yshift=1em]w2.north) {\tiny{$e_2=w_{2} \textbf{C}$}};
\node [anchor=south,draw,inner sep=3pt] (e3) at ([yshift=1em]w3.north) {\tiny{$e_3=w_{3} \textbf{C}$}};

\node [anchor=south,draw,inner sep=3pt] (h0) at ([xshift=-0.5em, yshift=1.5em]e0.north) {\tiny{$h_{0}=\textrm{SelfAtt}(e_0,e_3)$}};
\node [anchor=south,draw,inner sep=3pt] (h1) at ([xshift=0.5em, yshift=1.5em]e1.north) {\tiny{$h_{1}=\textrm{SelfAtt}(e_1,e_3)$}};
\node [anchor=south,draw,inner sep=3pt] (h2) at ([xshift=1.5em, yshift=1.5em]e2.north) {\tiny{$h_{2}=\textrm{SelfAtt}(e_2,e_3)$}};
\node [anchor=south,draw,minimum width=9em,inner sep=3pt] (f1) at ([xshift=0.5em, yshift=1.5em]h2.north) {\tiny{$f_3=\textrm{FNN}([h_0,h_1,h_2,e_3])$}};
\node [anchor=south,draw,minimum width=9em,inner sep=3pt] (o1) at ([yshift=1em]f1.north) {\tiny{$y=\textrm{Softmax}(f_3 \textbf{U})$}};
\node [anchor=south] (ylabel) at ([yshift=1em]o1.north) {\footnotesize{$\textrm{P}(w_4|w_{0}w_{1}w_{2}w_{3})$}};

\draw [->] ([yshift=0.1em]w0.north) -- ([yshift=-0.1em]e0.south);
\draw [->] ([yshift=0.1em]w1.north) -- ([yshift=-0.1em]e1.south);
\draw [->] ([yshift=0.1em]w2.north) -- ([yshift=-0.1em]e2.south);
\draw [->] ([yshift=0.1em]w3.north) -- ([yshift=-0.1em]e3.south);
\draw [->] ([yshift=0.1em]e0.north) -- ([xshift=0em,yshift=-0.1em]h0.south);
\draw [->] ([yshift=0.1em]e1.north) -- ([xshift=-0.5em,yshift=-0.1em]h1.south);
\draw [->] ([yshift=0.1em]e2.north) -- ([xshift=-1em,yshift=-0.1em]h2.south);
\draw [->] ([yshift=0.1em]e3.north) -- ([xshift=1em,yshift=-0.1em]h0.south);
\draw [->] ([yshift=0.1em]e3.north) -- ([xshift=1em,yshift=-0.1em]h1.south);
\draw [->] ([yshift=0.1em]e3.north) -- ([xshift=1em,yshift=-0.1em]h2.south);

\draw [->] ([yshift=0.1em]h0.north) -- ([xshift=-2em,yshift=-0.1em]f1.south);
\draw [->] ([yshift=0.1em]e3.north) -- ([xshift=2em,yshift=-0.1em]f1.south);
\draw [->] ([yshift=0.1em]h1.north) -- ([xshift=-1em,yshift=-0.1em]f1.south);
\draw [->] ([yshift=0.1em]h2.north) -- ([xshift=0em,yshift=-0.1em]f1.south);
\draw [->] ([yshift=0.1em]f1.north) -- ([yshift=-0.1em]o1.south);
\draw [->] ([yshift=0.1em]o1.north) -- ([yshift=-0.1em]ylabel.south);

\visible<2->{
\node [anchor=south,draw,inner sep=3pt,fill=blue!20!white] (e0) at ([yshift=1em]w0.north) {\tiny{$e_0=w_{0} \textbf{C}$}};
\node [anchor=south,draw,inner sep=3pt,fill=blue!20!white] (e1) at ([yshift=1em]w1.north) {\tiny{$e_1=w_{1} \textbf{C}$}};
\node [anchor=south,draw,inner sep=3pt,fill=blue!20!white] (e2) at ([yshift=1em]w2.north) {\tiny{$e_2=w_{2} \textbf{C}$}};
\node [anchor=south,draw,inner sep=3pt,fill=blue!20!white] (e3) at ([yshift=1em]w3.north) {\tiny{$e_3=w_{3} \textbf{C}$}};
}

\visible<2->{
\node [anchor=west] (embedinglabel0) at ([xshift=-5em,yshift=-2em]w0.south) {\scriptsize{{\blue \textbf{�ʵķֲ�ʽ��ʾ}}}};
\node [anchor=north west] (embedinglabel1) at ([yshift=0.3em]embedinglabel0.south west) {\scriptsize{ǰ���Ѿ����ܹ���}};
\node [anchor=north west] (embedinglabel2) at ([yshift=0.3em]embedinglabel1.south west) {\scriptsize{����One-hot��ʾ���}};
\node [anchor=north west] (embedinglabel3) at ([yshift=0.3em]embedinglabel2.south west) {\scriptsize{�¼���λ������}};
}

\visible<3->{
\node [anchor=south,draw,inner sep=3pt,fill=ugreen!20!white] (h0) at ([xshift=-0.5em, yshift=1.5em]e0.north) {\tiny{$h_{0}=\textrm{SelfAtt}(e_0,e_3)$}};
\node [anchor=south,draw,inner sep=3pt,fill=ugreen!20!white] (h1) at ([xshift=0.5em, yshift=1.5em]e1.north) {\tiny{$h_{1}=\textrm{SelfAtt}(e_1,e_3)$}};
\node [anchor=south,draw,inner sep=3pt,fill=ugreen!20!white] (h2) at ([xshift=1.5em, yshift=1.5em]e2.north) {\tiny{$h_{2}=\textrm{SelfAtt}(e_2,e_3)$}};
}

\visible<3->{
\node [anchor=west] (selfattlabel0) at ([xshift=3em]embedinglabel0.east) {\scriptsize{{\color{ugreen} \textbf{��ע��������}}}};
\node [anchor=west] (selfattlabel1) at ([yshift=-0.3em]selfattlabel0.south west) {\scriptsize{����ʻ�֮�����ض�}};
\node [anchor=west] (selfattlabel2) at ([yshift=-0.3em]selfattlabel1.south west) {\scriptsize{��ͷ��ע��������}};
\node [anchor=west] (directlabel0) at ([yshift=-0.3em]selfattlabel2.south west) {\scriptsize{\alert{\textbf{���潫�����}}}};
}

\visible<4->{
\node [anchor=south,draw,minimum width=9em,inner sep=3pt,fill=orange!20!white] (f1) at ([xshift=0.5em, yshift=1.5em]h2.north) {\tiny{$f_3=\textrm{FNN}([h_0,h_1,h_2,e_3])$}};
\node [anchor=south,draw,minimum width=9em,inner sep=3pt,fill=orange!20!white] (o1) at ([yshift=1em]f1.north) {\tiny{$y=\textrm{Softmax}(f_3 \textbf{U})$}};
}

\visible<4->{
\node [anchor=west] (ffnlabel0) at ([xshift=3em]selfattlabel0.east) {\scriptsize{{\color{orange} \textbf{ǰ��������������}}}};
\node [anchor=west] (ffnlabel1) at ([yshift=-0.3em]ffnlabel0.south west) {\scriptsize{˫��ȫ��������}};
\node [anchor=west] (ffnlabel2) at ([yshift=-0.3em]ffnlabel1.south west) {\scriptsize{�����ΪRelu}};
\node [anchor=west] (ffnlabel3) at ([yshift=-0.3em]ffnlabel2.south west) {\scriptsize{���ͨ��Softmax���}};
}


\end{scope}
\end{tikzpicture}
\end{center}

\end{frame}

%%%------------------------------------------------------------------------------------------------------------
%%% Transformer architecture
\begin{frame}{Transformer����ģ��(Vaswani et al., 2017)}
\begin{itemize}
\item ��ͷע��������
\end{itemize}

\vspace{-1.5em}
\begin{center}
\begin{tikzpicture}
\begin{scope}

\node [anchor=west,draw=black!30,inner sep=4pt,fill=ugreen!20!white] (Linear0) at (0,0) {\tiny{Linear}};
\node [anchor=south west,draw=black!50,fill=ugreen!20!white,draw,inner sep=4pt] (Linear01) at ([shift={(-0.2em,-0.2em)}]Linear0.south west) {\tiny{Linear}};
\node [anchor=south west,fill=ugreen!20!white,draw,inner sep=4pt] (Linear02) at ([shift={(-0.2em,-0.2em)}]Linear01.south west) {\tiny{Linear}};
\node [anchor=north] (Q) at ([xshift=0em,yshift=-1em]Linear02.south) {\footnotesize{$Q$}};

\node [anchor=west,draw=black!30,inner sep=4pt,fill=ugreen!20!white] (Linear1) at ([xshift=1.5em]Linear0.east) {\tiny{Linear}};
\node [anchor=south west,draw=black!50,fill=ugreen!20!white,draw,inner sep=4pt] (Linear11) at ([shift={(-0.2em,-0.2em)}]Linear1.south west) {\tiny{Linear}};
\node [anchor=south west,fill=ugreen!20!white,draw,inner sep=4pt] (Linear12) at ([shift={(-0.2em,-0.2em)}]Linear11.south west) {\tiny{Linear}};
\node [anchor=north] (K) at ([xshift=0em,yshift=-1em]Linear12.south) {\footnotesize{$K$}};

\node [anchor=west,draw=black!30,inner sep=4pt,fill=ugreen!20!white] (Linear2) at ([xshift=1.5em]Linear1.east) {\tiny{Linear}};
\node [anchor=south west,draw=black!50,fill=ugreen!20!white,draw,inner sep=4pt] (Linear21) at ([shift={(-0.2em,-0.2em)}]Linear2.south west) {\tiny{Linear}};
\node [anchor=south west,fill=ugreen!20!white,draw,inner sep=4pt] (Linear22) at ([shift={(-0.2em,-0.2em)}]Linear21.south west) {\tiny{Linear}};
\node [anchor=north] (V) at ([xshift=0em,yshift=-1em]Linear22.south) {\footnotesize{$V$}};

\node [anchor=south,draw=black!30,minimum width=9em,inner sep=4pt,fill=blue!20!white] (Scale) at ([yshift=1em]Linear1.north) {\tiny{Scaled Dot-Product Attention}};
\node [anchor=south west,draw=black!50,minimum width=9em,fill=blue!20!white,draw,inner sep=4pt] (Scale1) at ([shift={(-0.2em,-0.2em)}]Scale.south west) {\tiny{Scaled Dot-Product Attention}};
\node [anchor=south west,fill=blue!20!white,draw,minimum width=9em,inner sep=4pt] (Scale2) at ([shift={(-0.2em,-0.2em)}]Scale1.south west) {\tiny{Scaled Dot-Product Attention}};

\node [anchor=south,draw,minimum width=4em,inner sep=4pt,fill=yellow!30] (Concat) at ([yshift=1em]Scale2.north) {\tiny{Concat}};

\node [anchor=south,draw,minimum width=4em,inner sep=4pt,fill=ugreen!20!white] (Linear) at ([yshift=1em]Concat.north) {\tiny{Linear}};


\draw [->] ([yshift=0.1em]Q.north) -- ([yshift=-0.1em]Linear02.south);
\draw [-,draw=black!50] ([yshift=0.1em]Q.north) -- ([xshift=0.2em,yshift=-0.1em]Linear02.south);
\draw [-,draw=black!30] ([yshift=0.1em]Q.north) -- ([xshift=0.4em,yshift=-0.1em]Linear02.south);

\draw [->] ([yshift=0.1em]K.north) -- ([yshift=-0.1em]Linear12.south);
\draw [-,draw=black!50] ([yshift=0.1em]K.north) -- ([xshift=0.2em,yshift=-0.1em]Linear12.south);
\draw [-,draw=black!30] ([yshift=0.1em]K.north) -- ([xshift=0.4em,yshift=-0.1em]Linear12.south);

\draw [->] ([yshift=0.1em]V.north) -- ([yshift=-0.1em]Linear22.south);
\draw [-,draw=black!50] ([yshift=0.1em]V.north) -- ([xshift=0.2em,yshift=-0.1em]Linear22.south);
\draw [-,draw=black!30] ([yshift=0.1em]V.north) -- ([xshift=0.4em,yshift=-0.1em]Linear22.south);

\draw [->] ([yshift=0em]Linear02.north) -- ([yshift=1em]Linear02.north);
\draw [-,draw=black!50] ([yshift=0em]Linear01.north) -- ([yshift=0.8em]Linear01.north);
\draw [-,draw=black!30] ([yshift=0em]Linear0.north) -- ([yshift=0.6em]Linear0.north);

\draw [->] ([yshift=0em]Linear12.north) -- ([yshift=1em]Linear12.north);
\draw [-,draw=black!50] ([yshift=0em]Linear11.north) -- ([yshift=0.8em]Linear11.north);
\draw [-,draw=black!30] ([yshift=0em]Linear1.north) -- ([yshift=0.6em]Linear1.north);

\draw [->] ([yshift=0em]Linear22.north) -- ([yshift=1em]Linear22.north);
\draw [-,draw=black!50] ([yshift=0em]Linear21.north) -- ([yshift=0.8em]Linear21.north);
\draw [-,draw=black!30] ([yshift=0em]Linear2.north) -- ([yshift=0.6em]Linear2.north);

\draw [->] ([yshift=0em]Scale2.north) -- ([yshift=0em]Concat.south);
\draw [-,draw=black!50] ([yshift=0em]Scale1.north) -- ([yshift=0.8em]Scale1.north);
\draw [-,draw=black!30] ([yshift=0em]Scale.north) -- ([yshift=0.6em]Scale.north);

\draw [->] ([yshift=0em]Concat.north) -- ([yshift=0em]Linear.south);
\draw [->] ([yshift=0em]Linear.north) -- ([yshift=1em]Linear.north);

\node [anchor=west] (Multiheadlabel0) at ([xshift=-5em,yshift=-1.2em]Q.south) {\scriptsize{{\blue \textbf{��ͷע����}}}};
\node [anchor=north west] (Multiheadlabel1) at ([yshift=0em]Multiheadlabel0.south west) {\scriptsize{$MultiHead(Q,K,V)=Concat(head_1,...head_n)W^0$}};
\node [anchor=north west] (Multiheadlabel2) at ([yshift=0.2em]Multiheadlabel1.south west) {\scriptsize{������ѹ���ɶ��ά�Ƚ�С��������ֱ�����ע����}};
\node [anchor=north west] (Multiheadlabel3) at ([yshift=0.2em]Multiheadlabel2.south west) {\scriptsize{�ٰѽ���������������Ա任�õ��������}};


\visible<2->{
\node [anchor=south west,fill=white,draw,inner sep=4pt,minimum width=3.5em,fill=blue!20!white] (MatMul) at ([xshift=8em]Linear22.south west) {\tiny{MatMul}};
\node [anchor=north] (Q1) at ([xshift=-1em,yshift=-1em]MatMul.south) {\footnotesize{$Q$}};
\node [anchor=north] (K1) at ([xshift=1em,yshift=-1em]MatMul.south) {\footnotesize{$K$}};
\node [anchor=south,draw,inner sep=4pt,fill=yellow!30] (Scale3) at ([yshift=1em]MatMul.north) {\tiny{Scale}};
\node [anchor=south,draw,inner sep=4pt,fill=purple!20,minimum width=3.5em] (Mask) at ([yshift=0.8em]Scale3.north) {\tiny{Mask(opt.)}};
\node [anchor=south,draw,inner sep=4pt,fill=ugreen!20!white] (SoftMax) at ([yshift=1em]Mask.north) {\tiny{SoftMax}};
\node [anchor=south,draw,minimum width=3.5em,inner sep=4pt,fill=blue!20!white] (MatMul1) at ([xshift=1.5em,yshift=1em]SoftMax.north) {\tiny{MatMul}};
\node [anchor=north] (V1) at ([xshift=2em]K1.north) {\footnotesize{$V$}};
\node [anchor=north] (null) at ([yshift=0.8em]MatMul1.north) {};

\node [rectangle,draw, densely dashed,inner sep=0.4em] [fit = (MatMul) (MatMul1) (Q1) (K1) (V1) (null)] (inputshadow) {};

\draw [->] ([yshift=0.1em]Q1.north) -- ([xshift=-1em,yshift=-0.1em]MatMul.south);
\draw [->] ([yshift=0.1em]K1.north) -- ([xshift=1em,yshift=-0.1em]MatMul.south);
\draw [->] ([yshift=0.1em]MatMul.north) -- ([yshift=-0.1em]Scale3.south);
\draw [->] ([yshift=0.1em]Scale3.north) -- ([yshift=-0.1em]Mask.south);
\draw [->] ([yshift=0.1em]Mask.north) -- ([yshift=-0.1em]SoftMax.south);
\draw [->] ([yshift=0.1em]SoftMax.north) -- ([yshift=0.9em]SoftMax.north);
\draw [->] ([yshift=0.1em]V1.north) -- ([yshift=9.1em]V1.north);
\draw [->] ([yshift=0.1em]MatMul1.north) -- ([yshift=0.8em]MatMul1.north);

\draw [->,dashed,red,thick] ([xshift=0.1em]Scale.east) .. controls +(east:1) and +(west:1) .. ([xshift=-0.1em,yshift=1em]inputshadow.west);

\node [anchor=west] (Attentionlabel0) at ([xshift=-2em,yshift=-1.2em]Q1.south) {\scriptsize{{\color{ugreen} \textbf{���ڵ�˵���ע����}}}};
\node [anchor=north west] (Attentionlabel1) at ([yshift=0.3em]Attentionlabel0.south west) {\scriptsize{$head_i=softmax(\frac{QK^{T}}{\sqrt{d_k}})V$}};
\node [anchor=north west] (Attentionlabel2) at ([yshift=0.6em]Attentionlabel1.south west) {\scriptsize{����õ�λ�������ļ�Ȩ��}};
\node [anchor=north west] (Attentionlabel3) at ([yshift=0.2em]Attentionlabel2.south west) {\scriptsize{Q,K,V������ͬ��}};
}

\end{scope}
\end{tikzpicture}
\end{center}

\end{frame}

%%%------------------------------------------------------------------------------------------------------------
%%% evaluation
\begin{frame}{����ģ������}
\begin{itemize}
\item ����ģ�͵�����ָ�� - �����(Perplexity, PPL)
\begin{itemize}
\item ����ģ��Ԥ��һ����������������
\item �����Խ�ͣ���ģ��Ч��Խ��
\end{itemize}
\vspace{0.5em}
\begin{displaymath}
\textrm{PPL}(w_1 ... w_m)=\textrm{P}(w_1 ... w_m)^{-1/m}
\end{displaymath}
\vspace{-0.5em}
\item<2-> Penn Treebank(PTB)�ϵ����۽��
\end{itemize}
\vspace{0.0em}
\visible<2->{
\begin{tabular}{l | l | l | r}
ģ�� & ���� & ��� & PPL \\ \hline
FNN LM & Bengio et al. & 2003 & 162.2 \\
RNN LM & Mikolov et al. & 2010 & 124.7 \\
RNN-LDA LM & Mikolov et al. & 2012 & 92.0 \\
RNN(LSTM) LM & Zaremba et al. & 2014 & 78.4 \\
RHN & Zilly et al. & 2016 & 65.4 \\
RNN(AWD-LSTM) LM & Merity et al. & 2018 & 58.8 \\
GPT-2 (Transformer) & Radford et al. & 2019 & 35.7
\end{tabular}
}

\end{frame}

%%%------------------------------------------------------------------------------------------------------------
\subsection{��Ƕ��}

%%%------------------------------------------------------------------------------------------------------------
%%% �ʵ�one-hot��distributed��ʾ
\begin{frame}{���ʵı�ʾ}
\begin{itemize}
\item ��α�ʾһ�����ʣ�
    \begin{itemize}
    \item \textbf{One-hot}: ������һ���ʵ�$V$���������10k�����ʣ������б�š�ÿ�����ʶ����Ա�ʾΪ10kά��one-hot���������ڱ���Ǹ�ά��Ϊ1������Ϊ0
    \item<2-> \textbf{Distributed}: ������������ģ�ͣ�ÿ�����ʿ��Ա���ʾΪһ��ʵ��������ÿһά����Ӧһ��``����'' - \alert{��Ƕ��}
    \end{itemize}
\end{itemize}

\begin{center}
\begin{tikzpicture}
\begin{scope}
\node [anchor=north west] (o1) at (0,0) {\footnotesize{$\begin{bmatrix} 0 \\ 1 \\ 0 \\ 0 \\ 0 \\ ... \\ 0 \end{bmatrix}$}};
\node [anchor=north west] (o2) at ([xshift=1em]o1.north east) {\footnotesize{$\begin{bmatrix} 0 \\ 0 \\ 0 \\ 1 \\ 0 \\ ... \\ 0 \end{bmatrix}$}};
\node [anchor=north east] (v) at ([xshift=-0em]o1.north west) {\footnotesize{$\begin{matrix} \textrm{\ \ \ \ \ ��}_1 \\ \textrm{\ \ ����}_2 \\ \textrm{\ \ \ \ \ ��}_3 \\ \textrm{\ \ ����}_4 \\ \textrm{\ \ ����}_5 \\ ... \\ \textrm{���}_{10k} \end{matrix}$}};
\node [anchor=south] (w1) at (o1.north) {\footnotesize{����}};
\node [anchor=south] (w2) at (o2.north) {\footnotesize{����}};
\node [anchor=north] (label) at (o1.south) {\footnotesize{���ʵ�one-hot��ʾ}};
\visible<3->{
\node [anchor=south,fill=red!20!white] (cosine) at (w1.north) {\footnotesize{$cosine(\textrm{`����'},\textrm{`����'})=0$}};
}
\end{scope}

\visible<2->{
\begin{scope}[xshift=2in]
\node [anchor=north west] (o1) at (0,0) {\footnotesize{$\begin{bmatrix} .1 \\ -1 \\ 2 \\ ... \\ 0 \end{bmatrix}$}};
\node [anchor=north west] (o2) at ([xshift=1em]o1.north east) {\footnotesize{$\begin{bmatrix} 1 \\ 2 \\ .2 \\ ... \\ -1 \end{bmatrix}$}};
\node [anchor=north east] (v) at ([xshift=-0em]o1.north west) {\footnotesize{$\begin{matrix} \textrm{\ \ \ ����}_1 \\ \textrm{\ \ \ ����}_2 \\ \textrm{\ \ \ ����}_3 \\ ... \\ \textrm{����}_{512} \end{matrix}$}};
\node [anchor=south] (w1) at (o1.north) {\footnotesize{����}};
\node [anchor=south] (w2) at (o2.north) {\footnotesize{����}};
\node [anchor=north] (label) at ([yshift=-2em]o1.south) {\footnotesize{���ʵķֲ�ʽ��ʾ(��Ƕ��)}};
\visible<3->{
\node [anchor=south,fill=red!20!white] (cosine) at (w1.north) {\footnotesize{$cosine(\textrm{`����'},\textrm{`����'})=0.5$}};
}
\end{scope}
}
\end{tikzpicture}
\end{center}

\end{frame}

%%%------------------------------------------------------------------------------------------------------------
%%% �ֲ�ʽ��ʾ���ŵ�
\begin{frame}{Ϊʲô��Ҫ�ֲ�ʽ��ʾ��}
\begin{itemize}
\item \textbf{һ����Ȼ������}���ֲ�ʽ��ʾ��ÿһά����ʲô��˼
    \begin{itemize}
    \item ���԰�ÿһά������Ϊһ�����ԣ����磺�Ա����ߵ�
    \item ���ǣ�ģ�͸�����ǰ�һ��ά�ȿ����������һ��``�̻�''����һ��ͳ�������ϵ�``����''�������˹����ɵ�����
    \end{itemize}
\item<2-> �����ַ�����ʲô�ô���
    \begin{itemize}
    \item �����׿̻�����֮���\alert{������}
    \item �����ռ��ʾģ�Ϳ��Ը�׼ȷ�Ŀ̻��͹���������Ƿ��㼴һ���ж�
    \end{itemize}
\item<2-> Ԥ����һ��������
    \begin{itemize}
    \item �ֲ�ʽ��ʾ������ָ��``����''��``����''�����Ƶ�
    \item ��ʹ``����''û����������г��ֹ���ϵͳ��Ȼ����ͨ������``����''�������Խ���Ԥ��
    \end{itemize}
    \begin{tabular}{l | l}
    ���� Ҫ �ڷ� һ�� \_\_\_\_\_ & Ԥ���¸��� \\ \hline
    ���� Ҫ �ڷ� һ�� \alert{����} & ���� \\
    ���� Ҫ �ڷ� һ�� \blue{����} & û������������Ȼ�Ǻ���Ԥ��
    \end{tabular}
\end{itemize}
\end{frame}

%%%------------------------------------------------------------------------------------------------------------
%%% ��ʵ������ʵķֲ�ʽ��ʾ
\begin{frame}{�ֲ�ʽ��ʾ�Ŀ��ӻ�}
\begin{itemize}
\item \textbf{һ������������}������ $\to$ ����\\
    \begin{displaymath}
    \vv{\textrm{����}} - \vv{\textrm{����}} + \vv{\textrm{��}} = \vv{\textrm{����}}
    \end{displaymath}
    ���$\vv{\textrm{word}}$��ʾ���ʵķֲ�ʽ������ʾ
\item ����ĴʵĿ��ӻ������ƵĴʾ���һ��
\end{itemize}
\begin{center}
\includegraphics[scale=0.4]{./Figures/word-graph.png}
\end{center}
\end{frame}

%%%------------------------------------------------------------------------------------------------------------
%%% ������ģ���еĴ�Ƕ��
\begin{frame}{������ģ���еĴ�Ƕ��}
\begin{itemize}
\item ��������ģ���У���Ҫ�Ѵʱ�ʾ�����ķֲ�ʽ��ʾ
    \begin{itemize}
    \item<2-> ����$\textbf{C}$�Ǵ�Ƕ�����ÿһ�ж�Ӧһ���ʵķֲ�ʽ��ʾ
    \item<3-> $\textbf{C}$����������ģ��ѵ����Ҳ������������ģ��ѵ�����̶���Ƕ�룬������ģ��רע��Ƭ�ε�ѧϰ
    \end{itemize}
\end{itemize}

\vspace{-0.5em}
\begin{center}
\begin{tikzpicture}
\begin{scope}
\node [anchor=center,inner sep=2pt] (e) at (0,0) {\small{$e=w$}};
\node [anchor=west,inner sep=2pt] (c) at (e.east) {\small{$\textbf{C}$}};

\begin{pgfonlayer}{background}
\node [rectangle,inner sep=0.4em,draw,fill=blue!20!white] [fit = (e) (c)] (box) {};
\end{pgfonlayer}

\draw [->,thick] ([yshift=-1em]box.south)--([yshift=-0.1em]box.south) node [pos=0,below] (bottom1) {\small{����$w$}};
\draw [->,thick] ([yshift=0.1em]box.north)--([yshift=1em]box.north) node [pos=1,above] (top1) {\scriptsize{$e$=(8,.2,-1,.9,...,1)}};
\node [anchor=north] (bottom2) at ([yshift=0.3em]bottom1.south) {\scriptsize{$w$=(0,0,1,0,...,0)}};
\node [anchor=south] (top2) at ([yshift=-0.3em]top1.north) {\small{$w$�ķֲ�ʽ��ʾ}};

\visible<2->{
\node [anchor=north west,fill=red!20!white] (cmatrix) at ([xshift=3em,yshift=1.0em]c.north east) {\scriptsize{$\begin{pmatrix} 1 & .2 & -.2 & 8 & ... & 0 \\ .6 & .8 & -2 & 1 & ... & -.2 \\ 8 & .2 & -1 & .9 & ... & 2.3 \\ 1 & 1.2 & -.9 & 3 & ... & .2 \\ ... & ... & ... & ... & ... & ... \\ 1 & .3 & 3 & .9 & ... & 5.1 \end{pmatrix}$}};
\node [anchor=west,inner sep=2pt,fill=red!30!white] (c) at (e.east) {\small{$\textbf{C}$}};
\draw [<-,thick] (c.east) -- ([xshift=3em]c.east);
}

\visible<3->{
\node [anchor=south,draw,fill=green!20!white] (e2) at ([yshift=1.5em]cmatrix.north) {\scriptsize{�ⲿ��Ƕ��ϵͳ�õ���$\textbf{C}$}};
\draw [->,very thick,dashed] (e2.south) -- (cmatrix.north);
}

\end{scope}
\end{tikzpicture}
\end{center}

\vspace{-1.0em}

\begin{itemize}
\item<4-> ��Ƕ�����ѧϰ�õ���
    \begin{itemize}
    \item ���Ժ�����ģ�͵���������һ��ѵ���������ٶȽ���
    \item Ҳ���Կ���ʹ��Ч�ʸ��ߵ��ⲿģ�ͣ���word2vec�� Glove�ȣ���������ʹ�ø����ģ������
    \end{itemize}
\end{itemize}

\end{frame}

%%%------------------------------------------------------------------------------------------------------------
\subsection{���ӱ�ʾģ�ͼ�Ԥѵ��}

%%%------------------------------------------------------------------------------------------------------------
%%% ��Ƕ�������
\begin{frame}{������``��''}
\begin{itemize}
\item ��Ƕ���Ѿ���Ϊ���NLPϵͳ�ı��䣬��ȻҲ���������ֻ�ʽ�淨��������``embed everything''�Ŀںţ����Ǵ�Ƕ��Ҳ������
    \begin{itemize}
    \item ÿ���ʶ���ӦΨһ��������ʾ�����Ƕ���һ�ʶ������󣬴�����Ҫͨ�������Ľ������֡�һ�����������ӣ�
    \end{itemize}
    \vspace{0.3em}
    \hspace{6em} Jobs was the CEO of \alert{\underline{apple}}.\\
    \hspace{6em} He finally ate the \alert{\underline{apple}}.
\item<2-> ������������Ϣ
    \begin{itemize}
    \item ���������������µ�˼�������ܼ򵥵ؿ��Ǵʵı�ʾ��Ӧͬʱ��������������Ϣ
    \item ���ھ����е�һ����(����λ��)��ͬʱ��ʾ�ʺ�������
    \end{itemize}
\end{itemize}

\visible<2->{
\begin{center}
\begin{tikzpicture}
\begin{scope}
\node [anchor=west] (node1) at (0,0) {\footnotesize{Jobs was the CEO of}};
\node [anchor=west] (node2) at ([xshift=-0.2em,yshift=-0.05em]node1.east) {\footnotesize{\alert{\underline{apple}}}};
\node [anchor=west] (node3) at ([xshift=-0.2em,yshift=-0.1em]node2.east) {\footnotesize{.}};
\node [anchor=south,inner sep=2pt,minimum width=2.4em,fill=red!20!white] (node4) at ([yshift=1.5em]node2.north) {\scriptsize{��}};
\node [anchor=north] (label) at ([xshift=1em]node1.south) {\scriptsize{\textbf{�ʱ�ʾģ��}}};
\draw [->,thick] (node2.north) -- (node4.south);
\end{scope}
\begin{scope}[xshift=2in]
\node [anchor=west] (node1) at (0,0) {\footnotesize{Jobs was the CEO of}};
\node [anchor=west] (node2) at ([xshift=-0.2em,yshift=-0.05em]node1.east) {\footnotesize{\alert{\underline{apple}}}};
\node [anchor=west] (node3) at ([xshift=-0.2em,yshift=-0.1em]node2.east) {\footnotesize{.}};
\node [anchor=south,inner sep=2pt,minimum width=2.4em,fill=red!20!white] (node4) at ([yshift=1.5em]node2.north) {\scriptsize{��}};
\node [anchor=south,inner sep=2pt,minimum width=2.4em,fill=blue!20!white] (node5) at (node4.north) {\scriptsize{������}};
\node [anchor=north] (label) at ([xshift=1em]node1.south) {\scriptsize{\textbf{��+�����ı�ʾģ��}}};
\draw [->,thick] (node2.north) -- (node4.south);
\draw [->] ([xshift=1em]node1.north west) .. controls +(north:1) and +(west:2) .. ([yshift=0.2em]node5.west);
\draw [->] ([xshift=3em]node1.north west) .. controls +(north:0.8) and +(west:1.5) .. ([yshift=-0.2em]node5.west);
\node [anchor=east] (morelines) at ([xshift=-1.5em]node4.west) {...};
\end{scope}
\end{tikzpicture}
\end{center}
}
\end{frame}

%%%------------------------------------------------------------------------------------------------------------
%%% �����ı�ʾģ��
\begin{frame}{��ʾ������Ƭ�� - �����ı�ʾģ��}
\begin{itemize}
\item ������ģ�����Ѿ�������ÿ��λ�õ������ı�ʾ��Ϣ
    \begin{itemize}
    \item ��RNN LMΪ����λ��$i$�������������һ��$w_1...w_i$�ı�ʾ
    \end{itemize}
\end{itemize}

\vspace{-0.5em}
\begin{center}
\begin{tikzpicture}
\begin{scope}
\tikzstyle{rnnnode} = [draw,inner sep=5pt,minimum width=4em,minimum height=1.5em,fill=green!30!white,blur shadow={shadow xshift=1pt,shadow yshift=-1pt}]
\node [anchor=west,rnnnode] (node11) at (0,0) {\scriptsize{RNN Cell}};
\node [anchor=west,rnnnode] (node12) at ([xshift=2em]node11.east) {\scriptsize{RNN Cell}};
\node [anchor=west,rnnnode] (node13) at ([xshift=2em]node12.east) {\scriptsize{RNN Cell}};
\node [anchor=west,rnnnode] (node14) at ([xshift=2em]node13.east) {\scriptsize{RNN Cell}};

\node [anchor=north,rnnnode,fill=red!30!white] (e1) at ([yshift=-1.2em]node11.south) {\scriptsize{embedding}};
\node [anchor=north,rnnnode,fill=red!30!white] (e2) at ([yshift=-1.2em]node12.south) {\scriptsize{embedding}};
\node [anchor=north,rnnnode,fill=red!30!white] (e3) at ([yshift=-1.2em]node13.south) {\scriptsize{embedding}};
\node [anchor=north,rnnnode,fill=red!30!white] (e4) at ([yshift=-1.2em]node14.south) {\scriptsize{embedding}};
\node [anchor=north] (w1) at ([yshift=-1em]e1.south) {\footnotesize{�Dz�˹}};
\node [anchor=north] (w2) at ([yshift=-1em]e2.south) {\footnotesize{��ְ}};
\node [anchor=north] (w3) at ([yshift=-1em]e3.south) {\footnotesize{��}};
\node [anchor=north] (w4) at ([yshift=-1em]e4.south) {\footnotesize{ƻ��}};

\draw [->,thick] ([yshift=0.1em]w1.north)--([yshift=-0.1em]e1.south);
\draw [->,thick] ([yshift=0.1em]w2.north)--([yshift=-0.1em]e2.south);
\draw [->,thick] ([yshift=0.1em]w3.north)--([yshift=-0.1em]e3.south);
\draw [->,thick] ([yshift=0.1em]w4.north)--([yshift=-0.1em]e4.south);

\draw [->,thick] ([yshift=0.1em]e1.north)--([yshift=-0.1em]node11.south);
\draw [->,thick] ([yshift=0.1em]e2.north)--([yshift=-0.1em]node12.south);
\draw [->,thick] ([yshift=0.1em]e3.north)--([yshift=-0.1em]node13.south);
\draw [->,thick] ([yshift=0.1em]e4.north)--([yshift=-0.1em]node14.south);

\node [anchor=south,rnnnode] (node21) at ([yshift=1.5em]node11.north) {\scriptsize{RNN Cell}};
\node [anchor=south,rnnnode] (node22) at ([yshift=1.5em]node12.north) {\scriptsize{RNN Cell}};
\node [anchor=south,rnnnode] (node23) at ([yshift=1.5em]node13.north) {\scriptsize{RNN Cell}};
\node [anchor=south,rnnnode] (node24) at ([yshift=1.5em]node14.north) {\scriptsize{RNN Cell}};

\node [anchor=south] (node31) at ([yshift=1.0em]node21.north) {\scriptsize{�ı�ʾ}};
\node [anchor=south west] (node31new) at ([yshift=-0.3em]node31.north west) {\scriptsize{``�Dz�˹''}};
\node [anchor=south] (node32) at ([yshift=1.0em]node22.north) {\scriptsize{�ı�ʾ\ \ \ }};
\node [anchor=south west] (node32new) at ([yshift=-0.3em]node32.north west) {\scriptsize{``�Dz�˹ ��ְ''}};
\node [anchor=south] (node33) at ([yshift=1.0em]node23.north) {\scriptsize{�ı�ʾ\ \ \ \ \ \ \ \ }};
\node [anchor=south west] (node33new) at ([yshift=-0.3em]node33.north west) {\scriptsize{``�Dz�˹ ��ְ ��''}};
\node [anchor=south] (node34) at ([yshift=1.0em]node24.north) {\scriptsize{�ı�ʾ\ \ \ \ \ \ \ \ }};
\node [anchor=south west] (node34new) at ([yshift=-0.3em]node34.north west) {\scriptsize{``�Dz�˹ ��ְ �� ƻ��''}};

\draw [->,thick] ([yshift=0.1em]node21.north)--([yshift=-0.1em]node31.south);
\draw [->,thick] ([yshift=0.1em]node22.north)--([yshift=-0.1em]node32.south);
\draw [->,thick] ([yshift=0.1em]node23.north)--([yshift=-0.1em]node33.south);
\draw [->,thick] ([yshift=0.1em]node24.north)--([yshift=-0.1em]node34.south);

\draw [->,thick] ([xshift=-1em]node21.west)--([xshift=-0.1em]node21.west);
\draw [->,thick] ([xshift=0.1em]node21.east)--([xshift=-0.1em]node22.west);
\draw [->,thick] ([xshift=0.1em]node22.east)--([xshift=-0.1em]node23.west);
\draw [->,thick] ([xshift=0.1em]node23.east)--([xshift=-0.1em]node24.west);
\draw [->,thick] ([xshift=0.1em]node24.east)--([xshift=1em]node24.east);

\draw [->,thick] ([yshift=0.1em]node11.north)--([yshift=-0.1em]node21.south);
\draw [->,thick] ([yshift=0.1em]node12.north)--([yshift=-0.1em]node22.south);
\draw [->,thick] ([yshift=0.1em]node13.north)--([yshift=-0.1em]node23.south);
\draw [->,thick] ([yshift=0.1em]node14.north)--([yshift=-0.1em]node24.south);

\draw [->,thick] ([xshift=-1em]node11.west)--([xshift=-0.1em]node11.west);
\draw [->,thick] ([xshift=0.1em]node11.east)--([xshift=-0.1em]node12.west);
\draw [->,thick] ([xshift=0.1em]node12.east)--([xshift=-0.1em]node13.west);
\draw [->,thick] ([xshift=0.1em]node13.east)--([xshift=-0.1em]node14.west);
\draw [->,thick] ([xshift=0.1em]node14.east)--([xshift=1em]node14.east);

\visible<2->{
\node [anchor=south] (toplabel1) at ([yshift=2em,xshift=-2em]node32new.north) {\footnotesize{``ƻ��''�ı�ʾ��}};
\node [anchor=west,fill=blue!20!white,minimum width=3em] (toplabel2) at (toplabel1.east) {\footnotesize{������}};
}
\visible<3->{
\node [anchor=west,fill=red!20!white,minimum width=3em] (toplabel3) at (toplabel2.east) {\footnotesize{��}};
}

\begin{pgfonlayer}{background}
\visible<3->{
\node [rectangle,inner sep=2pt,draw,thick,dashed,red] [fit = (e4)] (r2) {};
\draw [->,thick,red] (r2.west) .. controls +(west:0.8) and +(south:2) .. ([xshift=1.3em]toplabel3.south);
}
\visible<2->{
\node [rectangle,inner sep=2pt,draw,thick,dashed,ublue,fill=white] [fit = (node33) (node33new)] (r1) {};
\draw [->,thick,ublue] ([xshift=-2em]r1.north) .. controls +(north:0.7) and +(south:0.7) .. ([xshift=-0.5em]toplabel2.south);
}
\end{pgfonlayer}

\end{scope}
\end{tikzpicture}
\end{center}

\end{frame}

%%%------------------------------------------------------------------------------------------------------------
%%% ��ǿ��ı�ʾģ�� - ELMO
\begin{frame}{��ǿ�ı�ʾģ�� - ELMO}
\begin{itemize}
\item \textbf{ELMO}(Embedding from Language Models)����˵�������˻�������ģ�͵�Ԥѵ�����ȳ�
    \begin{itemize}
    \item ��Ȼʹ��RNN�ṹ������ѭ����Ԫ������LSTM
    \item ͬʱ�����������Һ���������Ľ�ģ��ʽ��ͬʱ��ʾһ������˺��Ҷ˵�������
    \item �ں����в��������͸�����Ӧ�ã��ṩ�˸��ḻ����Ϣ
    \end{itemize}
\end{itemize}
\vspace{0.5em}
\begin{center}
\begin{tikzpicture}
\begin{scope}[scale=1.2]

\node [anchor=west,draw,inner sep=4pt,fill=blue!20!white] (Lstm0) at (0,0) {\scriptsize{Lstm}};
\node [anchor=west,draw,inner sep=4pt,fill=blue!20!white] (Lstm1) at ([xshift=1em]Lstm0.east) {\scriptsize{Lstm}};
\node [anchor=west,inner sep=4pt] (sep) at ([xshift=1em]Lstm1.east) {\scriptsize{...}};
\node [anchor=west,draw,inner sep=4pt,fill=blue!20!white] (Lstm2) at ([xshift=1em]sep.east) {\scriptsize{Lstm}};

\node [anchor=south,draw,inner sep=4pt,fill=blue!20!white] (Lstm3) at ([yshift=1em]Lstm0.north) {\scriptsize{Lstm}};
\node [anchor=west,draw,inner sep=4pt,fill=blue!20!white] (Lstm4) at ([xshift=1em]Lstm3.east) {\scriptsize{Lstm}};
\node [anchor=west,inner sep=4pt] (sep1) at ([xshift=1em]Lstm4.east) {\scriptsize{...}};
\node [anchor=west,draw,inner sep=4pt,fill=blue!20!white] (Lstm5) at ([xshift=1em]sep1.east) {\scriptsize{Lstm}};

\node [rectangle,rounded corners,draw=black!50,densely dashed,inner sep=0.4em] [fit = (Lstm0) (Lstm2) (Lstm3) (Lstm5)] (inputshadow) {};

\node [anchor=north,draw,inner sep=4pt,fill=ugreen!20!white,minimum width=2em] (e1) at ([xshift=-2em,yshift=-1em]Lstm2.south) {\scriptsize{$E_1$}};
\node [anchor=west,draw,inner sep=4pt,fill=ugreen!20!white,minimum width=2em] (e2) at ([xshift=1em]e1.east) {\scriptsize{$E_2$}};
\node [anchor=west,inner sep=4pt] (sep5) at ([xshift=1em]e2.east) {\scriptsize{...}};
\node [anchor=west,draw,inner sep=4pt,fill=ugreen!20!white,minimum width=2em] (e3) at ([xshift=1em]sep5.east) {\scriptsize{$E_3$}};

\node [anchor=south,draw,inner sep=4pt,fill=yellow!30,minimum width=2em] (t1) at ([xshift=-2em,yshift=1em]Lstm5.north) {\scriptsize{$T_1$}};
\node [anchor=west,draw,inner sep=4pt,fill=yellow!30,minimum width=2em] (t2) at ([xshift=1em]t1.east) {\scriptsize{$T_2$}};
\node [anchor=west,inner sep=4pt] (sep6) at ([xshift=1em]t2.east) {\scriptsize{...}};
\node [anchor=west,draw,inner sep=4pt,fill=yellow!30,minimum width=2em] (t3) at ([xshift=1em]sep6.east) {\scriptsize{$T_3$}};

\node [anchor=west,draw,inner sep=4pt,fill=blue!20!white] (Lstm6) at ([xshift=1.5em]Lstm2.east) {\scriptsize{Lstm}};
\node [anchor=west,draw,inner sep=4pt,fill=blue!20!white] (Lstm7) at ([xshift=1em]Lstm6.east) {\scriptsize{Lstm}};
\node [anchor=west,inner sep=4pt] (sep3) at ([xshift=1em]Lstm7.east) {\scriptsize{...}};
\node [anchor=west,draw,inner sep=4pt,fill=blue!20!white] (Lstm8) at ([xshift=1em]sep3.east) {\scriptsize{Lstm}};

\node [anchor=south,draw,inner sep=4pt,fill=blue!20!white] (Lstm9) at ([yshift=1em]Lstm6.north) {\scriptsize{Lstm}};
\node [anchor=west,draw,inner sep=4pt,fill=blue!20!white] (Lstm10) at ([xshift=1em]Lstm9.east) {\scriptsize{Lstm}};
\node [anchor=west,inner sep=4pt] (sep4) at ([xshift=1em]Lstm10.east) {\scriptsize{...}};
\node [anchor=west,draw,inner sep=4pt,fill=blue!20!white] (Lstm11) at ([xshift=1em]sep4.east) {\scriptsize{Lstm}};

\node [rectangle,rounded corners,draw=black!50,densely dashed,inner sep=0.4em] [fit = (Lstm6) (Lstm8) (Lstm9) (Lstm11)] (inputshadow) {};

\draw [->] ([xshift=0.1em]Lstm0.east) -- ([xshift=-0.1em]Lstm1.west);
\draw [->] ([xshift=0.1em]Lstm1.east) -- ([xshift=-0.1em]sep.west);
\draw [->] ([xshift=0.1em]sep.east) -- ([xshift=-0.1em]Lstm2.west);

\draw [->] ([xshift=0.1em]Lstm3.east) -- ([xshift=-0.1em]Lstm4.west);
\draw [->] ([xshift=0.1em]Lstm4.east) -- ([xshift=-0.1em]sep1.west);
\draw [->] ([xshift=0.1em]sep1.east) -- ([xshift=-0.1em]Lstm5.west);

\draw [->] ([yshift=0.1em]Lstm0.north) -- ([yshift=-0.1em]Lstm3.south);
\draw [->] ([yshift=0.1em]Lstm1.north) -- ([yshift=-0.1em]Lstm4.south);
\draw [->] ([yshift=0.1em]Lstm2.north) -- ([yshift=-0.1em]Lstm5.south);

\draw [->] ([xshift=0.1em]Lstm6.east) -- ([xshift=-0.1em]Lstm7.west);
\draw [->] ([xshift=0.1em]Lstm7.east) -- ([xshift=-0.1em]sep3.west);
\draw [->] ([xshift=0.1em]sep3.east) -- ([xshift=-0.1em]Lstm8.west);

\draw [->] ([xshift=0.1em]Lstm9.east) -- ([xshift=-0.1em]Lstm10.west);
\draw [->] ([xshift=0.1em]Lstm10.east) -- ([xshift=-0.1em]sep4.west);
\draw [->] ([xshift=0.1em]sep4.east) -- ([xshift=-0.1em]Lstm11.west);

\draw [->] ([yshift=0.1em]Lstm6.north) -- ([yshift=-0.1em]Lstm9.south);
\draw [->] ([yshift=0.1em]Lstm7.north) -- ([yshift=-0.1em]Lstm10.south);
\draw [->] ([yshift=0.1em]Lstm8.north) -- ([yshift=-0.1em]Lstm11.south);

\draw [->] ([yshift=0.1em]e1.north) -- ([yshift=-0.1em]Lstm0.south);
\draw [->] ([yshift=0.1em]e1.north) -- ([yshift=-0.1em]Lstm6.south);
\draw [->] ([yshift=0.1em]e2.north) -- ([yshift=-0.1em]Lstm1.south);
\draw [->] ([yshift=0.1em]e2.north) -- ([yshift=-0.1em]Lstm7.south);
\draw [->] ([yshift=0.1em]e3.north) -- ([yshift=-0.1em]Lstm2.south);
\draw [->] ([yshift=0.1em]e3.north) -- ([yshift=-0.1em]Lstm8.south);

\draw [->] ([yshift=0.1em]Lstm3.north) -- ([xshift=-0.05em,yshift=-0.1em]t1.south);
\draw [->] ([yshift=0.1em]Lstm9.north) -- ([yshift=-0.1em]t1.south);
\draw [->] ([yshift=0.1em]Lstm4.north) -- ([xshift=-0.05em,yshift=-0.1em]t2.south);
\draw [->] ([yshift=0.1em]Lstm10.north) -- ([yshift=-0.1em]t2.south);
\draw [->] ([yshift=0.1em]Lstm5.north) -- ([xshift=-0.05em,yshift=-0.1em]t3.south);
\draw [->] ([yshift=0.1em]Lstm11.north) -- ([yshift=-0.1em]t3.south);

\end{scope}
\end{tikzpicture}
\end{center}

\end{frame}

%%%------------------------------------------------------------------------------------------------------------
%%% ��ǿ��ı�ʾģ�� - GTP
\begin{frame}{��ǿ�ı�ʾģ�� - GPT}
\begin{itemize}
\item \textbf{GPT}(Generative Pre-Training)Ҳ��һ�ֻ�������ģ�͵ı�ʾģ��
    \begin{itemize}
    \item �ܹ�������Transformer��������ȡ������ǿ
    \item ����Pre-training + Fine-tuning�Ŀ�ܣ�Ԥѵ����Ϊ����ϵͳ�����IJ�����ʼֵ����˿��Ը��õ���ӦĿ������
    \end{itemize}
\end{itemize}
\vspace{0.5em}
\begin{center}
\begin{tikzpicture}
\begin{scope}[scale=1.2]

\node [anchor=west,draw,inner sep=4pt,fill=blue!20!white,minimum width=3em] (Trm0) at (0,0) {\scriptsize{Trm}};
\node [anchor=west,draw,inner sep=4pt,fill=blue!20!white,minimum width=3em] (Trm1) at ([xshift=1em]Trm0.east) {\scriptsize{Trm}};
\node [anchor=west,draw,inner sep=4pt,fill=blue!20!white,minimum width=3em] (Trm2) at ([xshift=1em]Trm1.east) {\scriptsize{Trm}};
\node [anchor=west,draw,inner sep=4pt,fill=blue!20!white,minimum width=3em] (Trm3) at ([xshift=1em]Trm2.east) {\scriptsize{Trm}};
\node [anchor=west,inner sep=4pt] (sep) at ([xshift=1em]Trm3.east) {\scriptsize{...}};
\node [anchor=west,draw,inner sep=4pt,fill=blue!20!white,minimum width=3em] (Trm4) at ([xshift=1em]sep.east) {\scriptsize{Trm}};

\node [anchor=south,draw,inner sep=4pt,fill=blue!20!white,minimum width=3em] (Trm5) at ([yshift=1em]Trm0.north) {\scriptsize{Trm}};
\node [anchor=west,draw,inner sep=4pt,fill=blue!20!white,minimum width=3em] (Trm6) at ([xshift=1em]Trm5.east) {\scriptsize{Trm}};
\node [anchor=west,draw,inner sep=4pt,fill=blue!20!white,minimum width=3em] (Trm7) at ([xshift=1em]Trm6.east) {\scriptsize{Trm}};
\node [anchor=west,draw,inner sep=4pt,fill=blue!20!white,minimum width=3em] (Trm8) at ([xshift=1em]Trm7.east) {\scriptsize{Trm}};
\node [anchor=west,inner sep=4pt] (sep1) at ([xshift=1em]Trm8.east) {\scriptsize{...}};
\node [anchor=west,draw,inner sep=4pt,fill=blue!20!white,minimum width=3em] (Trm9) at ([xshift=1em]sep1.east) {\scriptsize{Trm}};

\node [rectangle,rounded corners,draw=black!50,densely dashed,inner sep=0.4em] [fit = (Trm0) (Trm4) (Trm5) (Trm9)] (inputshadow) {};

\node [anchor=north,draw,inner sep=4pt,fill=ugreen!20!white,minimum width=2em] (e1) at ([yshift=-1em]Trm0.south) {\scriptsize{$E_1$}};
\node [anchor=north,draw,inner sep=4pt,fill=ugreen!20!white,minimum width=2em] (e2) at ([yshift=-1em]Trm1.south) {\scriptsize{$E_2$}};
\node [anchor=north,draw,inner sep=4pt,fill=ugreen!20!white,minimum width=2em] (e3) at ([yshift=-1em]Trm2.south) {\scriptsize{$E_3$}};
\node [anchor=north,draw,inner sep=4pt,fill=ugreen!20!white,minimum width=2em] (e4) at ([yshift=-1em]Trm3.south) {\scriptsize{$E_4$}};
\node [anchor=north,inner sep=4pt] (sep5) at ([yshift=-1em]sep.south) {\scriptsize{...}};
\node [anchor=north,draw,inner sep=4pt,fill=ugreen!20!white,minimum width=2em] (e5) at ([yshift=-1em]Trm4.south) {\scriptsize{$E_5$}};

\node [anchor=south,draw,inner sep=4pt,fill=yellow!30,minimum width=2em] (t1) at ([yshift=1em]Trm5.north) {\scriptsize{$T_1$}};
\node [anchor=south,draw,inner sep=4pt,fill=yellow!30,minimum width=2em] (t2) at ([yshift=1em]Trm6.north) {\scriptsize{$T_2$}};
\node [anchor=south,draw,inner sep=4pt,fill=yellow!30,minimum width=2em] (t3) at ([yshift=1em]Trm7.north) {\scriptsize{$T_3$}};
\node [anchor=south,draw,inner sep=4pt,fill=yellow!30,minimum width=2em] (t4) at ([yshift=1em]Trm8.north) {\scriptsize{$T_4$}};
\node [anchor=south,inner sep=4pt] (sep6) at ([yshift=1em]sep1.north) {\scriptsize{...}};
\node [anchor=south,draw,inner sep=4pt,fill=yellow!30,minimum width=2em] (t5) at ([yshift=1em]Trm9.north) {\scriptsize{$T_5$}};

\draw [->] ([yshift=0.1em]e1.north) -- ([yshift=-0.1em]Trm0.south);
\draw [->] ([yshift=0.1em]e1.north) -- ([yshift=-0.1em]Trm1.south);
\draw [->] ([yshift=0.1em]e1.north) -- ([yshift=-0.1em]Trm2.south);
\draw [->] ([yshift=0.1em]e1.north) -- ([yshift=-0.1em]Trm3.south);
\draw [->] ([yshift=0.1em]e1.north) -- ([yshift=-0.1em]Trm4.south);
\draw [->] ([yshift=0.1em]e2.north) -- ([yshift=-0.1em]Trm1.south);
\draw [->] ([yshift=0.1em]e3.north) -- ([yshift=-0.1em]Trm2.south);
\draw [->] ([yshift=0.1em]e3.north) -- ([yshift=-0.1em]Trm3.south);
\draw [->] ([yshift=0.1em]e3.north) -- ([yshift=-0.1em]Trm4.south);
\draw [->] ([yshift=0.1em]e4.north) -- ([yshift=-0.1em]Trm3.south);
\draw [->] ([yshift=0.1em]e5.north) -- ([yshift=-0.1em]Trm4.south);

\draw [->] ([yshift=0.1em]Trm0.north) -- ([yshift=-0.1em]Trm5.south);
\draw [->] ([yshift=0.1em]Trm0.north) -- ([yshift=-0.1em]Trm6.south);
\draw [->] ([yshift=0.1em]Trm0.north) -- ([yshift=-0.1em]Trm7.south);
\draw [->] ([yshift=0.1em]Trm0.north) -- ([yshift=-0.1em]Trm8.south);
\draw [->] ([yshift=0.1em]Trm0.north) -- ([yshift=-0.1em]Trm9.south);
\draw [->] ([yshift=0.1em]Trm1.north) -- ([yshift=-0.1em]Trm6.south);
\draw [->] ([yshift=0.1em]Trm2.north) -- ([yshift=-0.1em]Trm7.south);
\draw [->] ([yshift=0.1em]Trm2.north) -- ([yshift=-0.1em]Trm8.south);
\draw [->] ([yshift=0.1em]Trm2.north) -- ([yshift=-0.1em]Trm9.south);
\draw [->] ([yshift=0.1em]Trm3.north) -- ([yshift=-0.1em]Trm8.south);
\draw [->] ([yshift=0.1em]Trm4.north) -- ([yshift=-0.1em]Trm9.south);

\draw [->] ([yshift=0.1em]Trm5.north) -- ([yshift=-0.1em]t1.south);
\draw [->] ([yshift=0.1em]Trm6.north) -- ([yshift=-0.1em]t2.south);
\draw [->] ([yshift=0.1em]Trm7.north) -- ([yshift=-0.1em]t3.south);
\draw [->] ([yshift=0.1em]Trm8.north) -- ([yshift=-0.1em]t4.south);
\draw [->] ([yshift=0.1em]Trm9.north) -- ([yshift=-0.1em]t5.south);

\end{scope}
\end{tikzpicture}
\end{center}

\end{frame}

%%%------------------------------------------------------------------------------------------------------------
%%% ��ǿ��ı�ʾģ�� - BERT
\begin{frame}{��ǿ�ı�ʾģ�� - BERT}
\begin{itemize}
\item \textbf{BERT}( Bidirectional Encoder Representations from Transformers)������dz��𱬵ı�ʾģ��
    \begin{itemize}
    \item ��Ȼ����Transformer���ǿ������������˵�������(���ԶԱ�GPT)
    \item ʹ����Mask����������ѵ���õ�ģ�͵Ľ�׳�ԣ��������������Ϊ��Ԥѵ����ʾģ�͵��·�ʽ
    \end{itemize}
\end{itemize}
\vspace{0.5em}
\begin{center}
\begin{tikzpicture}
\begin{scope}[scale=1.2]

\node [anchor=west,draw,inner sep=4pt,fill=blue!20!white,minimum width=3em] (Trm0) at (0,0) {\scriptsize{Trm}};
\node [anchor=west,draw,inner sep=4pt,fill=blue!20!white,minimum width=3em] (Trm1) at ([xshift=1em]Trm0.east) {\scriptsize{Trm}};
\node [anchor=west,draw,inner sep=4pt,fill=blue!20!white,minimum width=3em] (Trm2) at ([xshift=1em]Trm1.east) {\scriptsize{Trm}};
\node [anchor=west,draw,inner sep=4pt,fill=blue!20!white,minimum width=3em] (Trm3) at ([xshift=1em]Trm2.east) {\scriptsize{Trm}};
\node [anchor=west,inner sep=4pt] (sep) at ([xshift=1em]Trm3.east) {\scriptsize{...}};
\node [anchor=west,draw,inner sep=4pt,fill=blue!20!white,minimum width=3em] (Trm4) at ([xshift=1em]sep.east) {\scriptsize{Trm}};

\node [anchor=south,draw,inner sep=4pt,fill=blue!20!white,minimum width=3em] (Trm5) at ([yshift=1em]Trm0.north) {\scriptsize{Trm}};
\node [anchor=west,draw,inner sep=4pt,fill=blue!20!white,minimum width=3em] (Trm6) at ([xshift=1em]Trm5.east) {\scriptsize{Trm}};
\node [anchor=west,draw,inner sep=4pt,fill=blue!20!white,minimum width=3em] (Trm7) at ([xshift=1em]Trm6.east) {\scriptsize{Trm}};
\node [anchor=west,draw,inner sep=4pt,fill=blue!20!white,minimum width=3em] (Trm8) at ([xshift=1em]Trm7.east) {\scriptsize{Trm}};
\node [anchor=west,inner sep=4pt] (sep1) at ([xshift=1em]Trm8.east) {\scriptsize{...}};
\node [anchor=west,draw,inner sep=4pt,fill=blue!20!white,minimum width=3em] (Trm9) at ([xshift=1em]sep1.east) {\scriptsize{Trm}};

\node [rectangle,rounded corners,draw=black!50,densely dashed,inner sep=0.4em] [fit = (Trm0) (Trm4) (Trm5) (Trm9)] (inputshadow) {};

\node [anchor=north,draw,inner sep=4pt,fill=ugreen!20!white,minimum width=2em] (e1) at ([yshift=-1em]Trm0.south) {\scriptsize{$E_1$}};
\node [anchor=north,draw,inner sep=4pt,fill=ugreen!20!white,minimum width=2em] (e2) at ([yshift=-1em]Trm1.south) {\scriptsize{$E_2$}};
\node [anchor=north,draw,inner sep=4pt,fill=ugreen!20!white,minimum width=2em] (e3) at ([yshift=-1em]Trm2.south) {\scriptsize{$E_3$}};
\node [anchor=north,draw,inner sep=4pt,fill=ugreen!20!white,minimum width=2em] (e4) at ([yshift=-1em]Trm3.south) {\scriptsize{$E_4$}};
\node [anchor=north,inner sep=4pt] (sep5) at ([yshift=-1em]sep.south) {\scriptsize{...}};
\node [anchor=north,draw,inner sep=4pt,fill=ugreen!20!white,minimum width=2em] (e5) at ([yshift=-1em]Trm4.south) {\scriptsize{$E_5$}};

\node [anchor=south,draw,inner sep=4pt,fill=yellow!30,minimum width=2em] (t1) at ([yshift=1em]Trm5.north) {\scriptsize{$T_1$}};
\node [anchor=south,draw,inner sep=4pt,fill=yellow!30,minimum width=2em] (t2) at ([yshift=1em]Trm6.north) {\scriptsize{$T_2$}};
\node [anchor=south,draw,inner sep=4pt,fill=yellow!30,minimum width=2em] (t3) at ([yshift=1em]Trm7.north) {\scriptsize{$T_3$}};
\node [anchor=south,draw,inner sep=4pt,fill=yellow!30,minimum width=2em] (t4) at ([yshift=1em]Trm8.north) {\scriptsize{$T_4$}};
\node [anchor=south,inner sep=4pt] (sep6) at ([yshift=1em]sep1.north) {\scriptsize{...}};
\node [anchor=south,draw,inner sep=4pt,fill=yellow!30,minimum width=2em] (t5) at ([yshift=1em]Trm9.north) {\scriptsize{$T_5$}};

\draw [->] ([yshift=0.1em]e1.north) -- ([yshift=-0.1em]Trm0.south);
\draw [->] ([yshift=0.1em]e1.north) -- ([yshift=-0.1em]Trm1.south);
\draw [->] ([yshift=0.1em]e1.north) -- ([yshift=-0.1em]Trm2.south);
\draw [->] ([yshift=0.1em]e1.north) -- ([yshift=-0.1em]Trm3.south);
\draw [->] ([yshift=0.1em]e1.north) -- ([yshift=-0.1em]Trm4.south);
\draw [->] ([yshift=0.1em]e2.north) -- ([yshift=-0.1em]Trm1.south);
\draw [->] ([yshift=0.1em]e3.north) -- ([yshift=-0.1em]Trm0.south);
\draw [->] ([yshift=0.1em]e3.north) -- ([yshift=-0.1em]Trm1.south);
\draw [->] ([yshift=0.1em]e3.north) -- ([yshift=-0.1em]Trm2.south);
\draw [->] ([yshift=0.1em]e3.north) -- ([yshift=-0.1em]Trm3.south);
\draw [->] ([yshift=0.1em]e3.north) -- ([yshift=-0.1em]Trm4.south);
\draw [->] ([yshift=0.1em]e4.north) -- ([yshift=-0.1em]Trm3.south);
\draw [->] ([yshift=0.1em]e5.north) -- ([yshift=-0.1em]Trm4.south);

\draw [->] ([yshift=0.1em]Trm0.north) -- ([yshift=-0.1em]Trm5.south);
\draw [->] ([yshift=0.1em]Trm0.north) -- ([yshift=-0.1em]Trm6.south);
\draw [->] ([yshift=0.1em]Trm0.north) -- ([yshift=-0.1em]Trm7.south);
\draw [->] ([yshift=0.1em]Trm0.north) -- ([yshift=-0.1em]Trm8.south);
\draw [->] ([yshift=0.1em]Trm0.north) -- ([yshift=-0.1em]Trm9.south);
\draw [->] ([yshift=0.1em]Trm1.north) -- ([yshift=-0.1em]Trm6.south);
\draw [->] ([yshift=0.1em]Trm2.north) -- ([yshift=-0.1em]Trm5.south);
\draw [->] ([yshift=0.1em]Trm2.north) -- ([yshift=-0.1em]Trm6.south);
\draw [->] ([yshift=0.1em]Trm2.north) -- ([yshift=-0.1em]Trm7.south);
\draw [->] ([yshift=0.1em]Trm2.north) -- ([yshift=-0.1em]Trm8.south);
\draw [->] ([yshift=0.1em]Trm2.north) -- ([yshift=-0.1em]Trm9.south);
\draw [->] ([yshift=0.1em]Trm3.north) -- ([yshift=-0.1em]Trm8.south);
\draw [->] ([yshift=0.1em]Trm4.north) -- ([yshift=-0.1em]Trm9.south);

\draw [->] ([yshift=0.1em]Trm5.north) -- ([yshift=-0.1em]t1.south);
\draw [->] ([yshift=0.1em]Trm6.north) -- ([yshift=-0.1em]t2.south);
\draw [->] ([yshift=0.1em]Trm7.north) -- ([yshift=-0.1em]t3.south);
\draw [->] ([yshift=0.1em]Trm8.north) -- ([yshift=-0.1em]t4.south);
\draw [->] ([yshift=0.1em]Trm9.north) -- ([yshift=-0.1em]t5.south);

\end{scope}
\end{tikzpicture}
\end{center}

\end{frame}

%%%------------------------------------------------------------------------------------------------------------
%%% Ԥѵ��
\begin{frame}{Ԥѵ��}
\begin{itemize}
\item ����ģ�Ϳ���ʹ�ô����ޱ�ע���ݽ���ѵ�����õ���ģ�Ϳ��Ա�ֱ����������ϵͳ�������е���������Ϊ��

\begin{center}
\begin{tikzpicture}
\node [anchor=south,minimum width=17em,fill=red!20!white] (encoder) at (0,0) {Encoder (����ģ��Ԥ��ѵ��)};
\node [anchor=south,minimum width=17em,fill=blue!20!white] (decoder) at (encoder.north) {Decoder (Ŀ����������ѵ��)};
\end{tikzpicture}
\end{center}

\item<2-> �������˷dz��𱬵�\alert{��ʽ}�����ģ����ģ��pre-training + Ŀ������fine-tuning
	\begin{itemize}
	\item ����NLP���񶼿��Ա�����Ϊ���Խ�ģ�����ⲿѵ���õ�������ģ����Ϊģ�����Ŀ��ϵͳ��(������ʼ��)
	\end{itemize}
\end{itemize}

\visible<2->{
\begin{center}
\begin{tikzpicture}

\begin{scope}

\node [anchor=west,draw,thick,minimum width=4em,minimum height=1.7em,fill=blue!20] (encoder) at (0,0) {ģ��};
\node [anchor=south,minimum width=4em,minimum height=1.7em] (space) at ([yshift=0.3em]encoder.north) {\footnotesize{Ŀ��ϵͳ}};

\begin{pgfonlayer}{background}
\node [rectangle,draw,thick,fill=red!20] [fit = (encoder) (space)] (system) {};
\end{pgfonlayer}

\node [anchor=north] (data) at ([yshift=-1em]system.south) {\scriptsize{\textbf{Ŀ�������б�ע����}}};
\draw [->,thick] (data.north) -- ([yshift=-0.1em]system.south);
\node [anchor=north] (label) at ([yshift=-0em]data.south) {\scriptsize{(a) standard method}};

\end{scope}

\begin{scope}[xshift=2.8in]

\node [anchor=west,draw,dashed,thick,minimum width=4em,minimum height=1.7em,fill=blue!20] (encoder) at (0,0) {ģ��};
\node [anchor=south,minimum width=4em,minimum height=1.7em] (space) at ([yshift=0.3em]encoder.north) {\footnotesize{Ŀ��ϵͳ}};
\node [anchor=center,draw,thick,minimum width=4em,minimum height=1.7em,fill=green!20] (encoderpre) at ([xshift=-7em]encoder.center) {\footnotesize{����ģ��}};
\draw [->,thick] (encoderpre.east) -- (encoder.west);

\begin{pgfonlayer}{background}
\node [rectangle,draw,thick,fill=red!20] [fit = (encoder) (space)] (system) {};
\end{pgfonlayer}

\node [anchor=north] (data) at ([yshift=-1em]system.south) {\scriptsize{\textbf{Ŀ�������б�ע����}}};
\draw [->,thick] (data.north) -- ([yshift=-0.1em]system.south);
\node [anchor=north] (data2) at ([yshift=-1em,xshift=-7em]system.south) {\scriptsize{\textbf{���ģ�ޱ�ע����}}};
\draw [->,thick] (data2.north) -- ([yshift=-0.1em]encoderpre.south);
\node [anchor=north] (label) at ([yshift=-0em,xshift=-4em]data.south) {\scriptsize{(b) pre-training + fine-tuning}};

\end{scope}

\end{tikzpicture}
\end{center}
}

\end{frame}

%%%------------------------------------------------------------------------------------------------------------
%%% Ԥѵ����������˼·
\begin{frame}{Ԥѵ����������˼·}
\begin{itemize}
\item Ԥѵ��ģ��ˢ����������ͬʱ��������һЩ˼����\\
      Ԥѵ�����������Ǵ�����ʲô��
    \begin{itemize}
    \item �б�ע���������ޣ�Ԥѵ���ṩʹ�ó����ģ���ݵķ���
    \item �Ӵ��ģ�ޱ�ע������ѧϰͨ��֪ʶ��������������
    \item �����縴���Ҳ�����ѵ����Ԥѵ������ʹģ�͹�ע���ʽ�ĸ��ܶ�����
    \end{itemize}
\end{itemize}

\visible<2->{
\begin{center}
\begin{tikzpicture}
\draw[name path=ellipse,thick] (0,0) circle[x radius = 2, y radius = 1];
\node[rectangle,minimum size=0.1em,inner sep=2pt,fill=red] (p1) at (0.2,0.5) {};
\node[rectangle,minimum size=0.1em,inner sep=2pt,fill=red] (p2) at (0.3,0.6) {};
\node[rectangle,minimum size=0.1em,inner sep=2pt,fill=red] (p3) at (0.1,-0.1) {};
\node[rectangle,minimum size=0.1em,inner sep=2pt,fill=red] (p4) at (0.4,0) {};
\node[rectangle,minimum size=0.1em,inner sep=2pt,fill=red] (p5) at (0.5,0.3) {};
\node[rectangle,minimum size=0.1em,inner sep=2pt,fill=red] (p6) at (0.6,0.1) {};
\node[rectangle,minimum size=0.1em,inner sep=2pt,fill=red] (p7) at (0.7,-0.1) {};
\node[rectangle,minimum size=0.1em,inner sep=2pt,fill=red] (p8) at (-1.2,0.4) {};
\node[rectangle,minimum size=0.1em,inner sep=2pt,fill=red] (p9) at (-1.0,-0.3) {};
\node[rectangle,minimum size=0.1em,inner sep=2pt,fill=red] (p10) at (-0.1,-0.8) {};

\begin{pgfonlayer}{background}
\visible<4->{
\node [rectangle,inner sep=0.4em,draw,blue] [fit = (p1) (p2) (p3) (p4) (p5) (p6)] (area) {};
}
\end{pgfonlayer}

\draw [->] (2.5,-0.7) -- (1.8,-0.5) node [pos=0,right] {\scriptsize{ģ�Ͳ�����ռ�}};

\visible<4->{
\draw [->] (2.0,0.7) -- (area.20) node [pos=0,right] {\scriptsize{���ʽ���ܶ�����(Ԥѵ��)}};
}
\visible<3->{
\draw [->] (-2.0,0.7) -- (p8.west) node [pos=0,left] {\scriptsize{����Ľ�}};
}

\end{tikzpicture}
\end{center}
}

\begin{itemize}
\item<5-> ���������е�Ԥѵ��
    \begin{itemize}
    \item ����������Ԥѵ����û������һ�������ںܶ������������ѵ������������С����һ����Ҳ��Ӧ�������˫�ィģ��Ԥѵ��Ҳ������µ�Ҫ��
    \end{itemize}
\end{itemize}

\end{frame}

%%%------------------------------------------------------------------------------------------------------------
%%% �ܽ�
\begin{frame}{�ܽ� - ����һ����}
\begin{itemize}
\item ���˺ܶ࣬��ѽ�ۣ�������һ����Ҫ�۵�
    \begin{itemize}
    \item ������û����ô���ӣ����Ų�����
    \item �򵥵�����ṹ������ϳ�ǿ���ģ��
    \item ����ģ�Ϳ�����������ʵ�֣�Ч���ܺã�������ֵ�Ԥѵ���ȷ�ʽ֤����������ģ�͵�DZ��
    \end{itemize}
\item<2-> ��Ȼ�кܶ�������Ҫ����
    \begin{itemize}
    \item ������������ṹ(����NLP)\\
          googleһ��LSTM��GRU��CNN
    \item ���ģ�ͺ�ѵ�����������ѧϰ�������``��''?\\
          ���������Դ���ʲô��\\
          �����Ч��ѵ�����ģ�ͣ�
    \item ��ΰ����������ڰ��������������ڵ�����NLP����\\
          ����encoder-decoder���
    \item ���ѧϰ��ʵ������\\
          ``������''�˽��£���Ϊ��ͬ������κ�ģ����ƶ��м���\\
          ...
    \end{itemize}
\end{itemize}
\end{frame}

%%%------------------------------------------------------------------------------------------------------------
%%% last slide
\begin{frame}{�ֽ���һ������~}

\vspace{2em}

\begin{center}
\textbf{���ݺܶ࣬�����˸�ͷ}\\
\textbf{ѧϰ���ѧϰ������Ҫʵ���;���Ļ��ۣ�}

\vspace{2em}

\begin{tikzpicture}

\tikzstyle{rnnnode} = [draw,inner sep=5pt,minimum width=4em,minimum height=1.5em,fill=green!30!white,blur shadow={shadow xshift=1pt,shadow yshift=-1pt}]
\node [anchor=west,rnnnode] (node11) at (0,0) {\tiny{RNN Cell}};
\node [anchor=west,rnnnode] (node12) at ([xshift=2em]node11.east) {\tiny{RNN Cell}};
\node [anchor=west,rnnnode] (node13) at ([xshift=2em]node12.east) {\tiny{RNN Cell}};
\node [anchor=west,rnnnode] (node14) at ([xshift=2em]node13.east) {\tiny{RNN Cell}};

\node [anchor=north,rnnnode,fill=red!30!white] (e1) at ([yshift=-1.2em]node11.south) {\tiny{embedding}};
\node [anchor=north,rnnnode,fill=red!30!white] (e2) at ([yshift=-1.2em]node12.south) {\tiny{embedding}};
\node [anchor=north,rnnnode,fill=red!30!white] (e3) at ([yshift=-1.2em]node13.south) {\tiny{embedding}};
\node [anchor=north,rnnnode,fill=red!30!white] (e4) at ([yshift=-1.2em]node14.south) {\tiny{embedding}};
\node [anchor=north] (w1) at ([yshift=-1em]e1.south) {\footnotesize{$<$s$>$}};
\node [anchor=north] (w2) at ([yshift=-1em]e2.south) {\footnotesize{лл}};
\node [anchor=north] (w3) at ([yshift=-1em]e3.south) {\footnotesize{���}};
\node [anchor=north] (w4) at ([yshift=-1em]e4.south) {\footnotesize{����}};

\draw [->,thick] ([yshift=0.1em]w1.north)--([yshift=-0.1em]e1.south);
\draw [->,thick] ([yshift=0.1em]w2.north)--([yshift=-0.1em]e2.south);
\draw [->,thick] ([yshift=0.1em]w3.north)--([yshift=-0.1em]e3.south);
\draw [->,thick] ([yshift=0.1em]w4.north)--([yshift=-0.1em]e4.south);

\draw [->,thick] ([yshift=0.1em]e1.north)--([yshift=-0.1em]node11.south);
\draw [->,thick] ([yshift=0.1em]e2.north)--([yshift=-0.1em]node12.south);
\draw [->,thick] ([yshift=0.1em]e3.north)--([yshift=-0.1em]node13.south);
\draw [->,thick] ([yshift=0.1em]e4.north)--([yshift=-0.1em]node14.south);

\node [anchor=south,rnnnode,fill=red!30!white] (node21) at ([yshift=1.0em]node11.north) {\tiny{Softmax($\cdot$)}};
\node [anchor=south,rnnnode,fill=red!30!white] (node22) at ([yshift=1.0em]node12.north) {\tiny{Softmax($\cdot$)}};
\node [anchor=south,rnnnode,fill=red!30!white] (node23) at ([yshift=1.0em]node13.north) {\tiny{Softmax($\cdot$)}};
\node [anchor=south,rnnnode,fill=red!30!white] (node24) at ([yshift=1.0em]node14.north) {\tiny{Softmax($\cdot$)}};

\node [anchor=south] (output1) at ([yshift=1em]node21.north) {\Large{\textbf{лл}}};
\node [anchor=south] (output2) at ([yshift=1em]node22.north) {\Large{\textbf{���}}};
\node [anchor=south] (output3) at ([yshift=1em]node23.north) {\Large{\textbf{����}}};
\node [anchor=south] (output4) at ([yshift=1em]node24.north) {\Large{\textbf{$<$/s$>$}}};

\draw [->,thick] ([yshift=0.1em]node21.north)--([yshift=-0.1em]output1.south);
\draw [->,thick] ([yshift=0.1em]node22.north)--([yshift=-0.1em]output2.south);
\draw [->,thick] ([yshift=0.1em]node23.north)--([yshift=-0.1em]output3.south);
\draw [->,thick] ([yshift=0.1em]node24.north)--([yshift=-0.1em]output4.south);

\draw [->,thick] ([yshift=0.1em]node11.north)--([yshift=-0.1em]node21.south);
\draw [->,thick] ([yshift=0.1em]node12.north)--([yshift=-0.1em]node22.south);
\draw [->,thick] ([yshift=0.1em]node13.north)--([yshift=-0.1em]node23.south);
\draw [->,thick] ([yshift=0.1em]node14.north)--([yshift=-0.1em]node24.south);

\draw [->,thick] ([xshift=-1em]node11.west)--([xshift=-0.1em]node11.west);
\draw [->,thick] ([xshift=0.1em]node11.east)--([xshift=-0.1em]node12.west);
\draw [->,thick] ([xshift=0.1em]node12.east)--([xshift=-0.1em]node13.west);
\draw [->,thick] ([xshift=0.1em]node13.east)--([xshift=-0.1em]node14.west);
\draw [->,thick] ([xshift=0.1em]node14.east)--([xshift=1em]node14.east);

\end{tikzpicture}

\end{center}

\end{frame}

\end{CJK}
\end{document}