\def\CTeXPreproc{Created by ctex v0.2.13, don't edit!} \documentclass[cjk,t,compress,12pt]{beamer} \usepackage{pstricks} \usepackage{etex} \usepackage{eso-pic,graphicx} \usepackage{fancybox} \usepackage{amsmath,amssymb} \usepackage{setspace} \usepackage{xcolor} \usepackage{array,multirow} \usepackage{CJK} \usepackage{tikz} \usepackage{tikz-qtree} \usepackage{hyperref} \usepackage{changepage} \usepackage{pgfplots} \usepackage{subfigure} \usepackage{tikz-3dplot} \usepackage{esvect} \usepackage{tcolorbox} \tcbuselibrary{skins} \usetikzlibrary{calc,intersections} \usetikzlibrary{matrix} \usetikzlibrary{arrows,decorations.pathreplacing} \usetikzlibrary{shadows} % LATEX and plain TEX when using Tik Z \usetikzlibrary{shadows.blur} \usepgflibrary{arrows} % LATEX and plain TEX and pure pgf \usetikzlibrary{arrows} % LATEX and plain TEX when using Tik Z \usetikzlibrary{decorations} \usetikzlibrary{arrows,shapes} \usetikzlibrary{positioning,fit,calc} \usetikzlibrary{mindmap,backgrounds} % mind map \DeclareMathOperator*{\argmax}{arg\,max} \DeclareMathOperator*{\argmin}{arg\,min} \setbeamertemplate{items}[ball] \usefonttheme[onlymath]{serif} % fout of math \definecolor{ugreen}{rgb}{0,0.5,0} \definecolor{lgreen}{rgb}{0.9,1,0.8} \definecolor{xtgreen1}{rgb}{0.824,0.898,0.8} \definecolor{xtgreen}{rgb}{0.914,0.945,0.902} \definecolor{lightgray}{gray}{0.85} \setbeamercolor{uppercol}{fg=white,bg=ugreen} \setbeamercolor{lowercol}{fg=black,bg=xtgreen} \definecolor{ublue}{rgb}{0.152,0.250,0.545} \setbeamercolor{uppercolblue}{fg=white,bg=ublue} \setbeamercolor{lowercolblue}{fg=black,bg=blue!10} %\usetheme{default} %\usetheme{Darmstadt} %\usetheme{Madrid} %\usetheme{Frankfurt} %\usetheme{Dresden} %\usetheme{Boadilla} %\usecolortheme{dolphin} \newcounter{mycount1} \newcounter{mycount2} \newcounter{mycount3} \newcounter{mycount4} \usefonttheme[onlylarge]{structurebold} \IfFileExists{C:/WINDOWS/win.ini} {\newcommand{\mycfont}{you}} {\newcommand{\mycfont}{gbsn}} \begin{CJK}{GBK}{\mycfont} \end{CJK} \setbeamerfont*{frametitle}{size=\large,series=\bfseries} \setbeamertemplate{navigation symbols}{\begin{CJK}{GBK}{\mycfont} ������ �����������ģ�� \hspace*{2em} Фͩ\&�쾸�� \end{CJK} \hspace*{2em} \today \hspace*{2em} \insertframenumber{}/\inserttotalframenumber} \setbeamertemplate{itemize items}[circle] % if you want a circle \setbeamertemplate{itemize subitem}[triangle] % if you wnat a triangle \setbeamertemplate{itemize subsubitem}[ball] % if you want a ball \begin{document} \begin{CJK}{GBK}{\mycfont} \title{\Large{�����������ģ��}} \author{\large{\textbf{Фͩ\ \ �쾸��}}} \institute{ \blue{\url{xiaotong@mail.neu.edu.cn}} \black{} \\ \blue{\url{zhujingbo@mail.neu.edu.cn}} \black{} \\ \vspace{1.0em} ������ѧ ��Ȼ���Դ���ʵ���� \\ \blue{\underline{\url{http://www.nlplab.com}}} \black{} \\ \vspace{0.2cm} \hspace{0.1cm} \includegraphics[scale=0.1]{../Figures/logo.pdf} } \date{} \maketitle \setlength{\leftmargini}{1em} \setlength{\leftmarginii}{1em} %%%------------------------------------------------------------------------------------------------------------ \section{ΪʲôҪ̸������} %%%------------------------------------------------------------------------------------------------------------ \subsection{��ʷ} %%%------------------------------------------------------------------------------------------------------------ %%% ΪʲôҪ̸������ \begin{frame}{ΪʲôҪ̸������} \begin{itemize} \item ��Щ��\textbf{���ѧϰ��Deep Learning��}�����˾��DZ�� \begin{itemize} \item ϯ���˰��������������ڵĺܶ�NLP���� \item �Ѿ���Ϊ��NLP�з������·�ʽ \item ������\textbf{��������}����һ����������һ�����ݣ� \end{itemize} \vspace{0.2em} \begin{center} \includegraphics[scale=0.45]{./Figures/deeplearning.jpg} \end{center} \vspace{0.5em} \item<2-> \textbf{�˹������磨Artificial Neural Network��}�����ѧϰ��ʵ������ \end{itemize} \end{frame} %%%------------------------------------------------------------------------------------------------------------ %%% ����ʷ \begin{frame}{����������ѧϰ�ĸ��1940s-1970s��} \begin{itemize} \item \textbf{������}��������ڿ������У�Cybernetics��������������������壨Connectionism���б��ἰ \begin{itemize} \item \textbf{������뷨}��ģ����Ե�����ѧϰ���ƽ��м������ģ \item<2-> ����ʹ�����Լ�Ȩ��������������$\textbf{x}$�ͽ��$\textbf{y}$֮�����ϵ \vspace{-0.5em} \begin{displaymath} f(\textbf{x},\textbf{w})=x_1 \cdot w_1 + ... + x_n \cdot w_n \end{displaymath}\\ \vspace{-0.5em} ����$\textbf{w}$��Ȩ�ء�����ģ��ҲӰ��������ݶ��½������ڻ���ѧϰ�����ķ�չ�� \item<3-> ������ľ���Ҳ�����ԣ����������������⣬���������������XOR��ѧϰ���� \end{itemize} \end{itemize} \vspace{-0.5em} \begin{center} \includegraphics[scale=0.21]{./Figures/concept-history.jpg}\\ \scriptsize{ͼƬ���ԡ�Deep Learning��} \end{center} \end{frame} %%%------------------------------------------------------------------------------------------------------------ %%% ���ѧϰ�ķ�չ \begin{frame}{����������ѧϰ�ķ�չ��1980s-1990s��} \begin{itemize} \item ���ڣ�����ѧ�����Ѿ������������Ψһ�����Դ�����ѧϰҲ�������µķ�չ�Ρ�����˼��Ӱ��� \begin{itemize} \item<2-> \textbf{�������壨Connectionism��}������֪ѧ���У����ڵķ������壨Symbolicism�����ѽ��ʹ������ʹ����Ԫ������������������ĺ���˼���ǣ��������ļ��㵥Ԫ���ӵ�һ�����ʵ��������Ϊ����\\ ��Ҳ�ƶ��˷�����ѵ����������緽����Ӧ�ã�����չ�˰�������ʱ����ģ�����ڵľ��佨ģ������ \item<3-> \textbf{�ֲ�ʽ��ʾ��Distributed representation��}��һ������ϵͳ���κβ��ֵ����붼Ӧ���Ƕ��������ͬ��ʾ�Ľ�������磬һ�����ʲ���һ�������������ɳɰ���ǧ��������ͬ������������ÿ������������������ʵ�"ij��"���档 \end{itemize} \item<4-> \alert{�ź�����}��������90������ڣ��ںܶ�Ӧ�������Ƕ������緽���������ߣ����ǽ����û�дﵽԤ�ڡ��ر��ǣ��˷�����ͼģ�͵Ȼ���ѧϰ����ȡ���˺ܺõ�Ч�����������о�������һ�εȡ� \end{itemize} \end{frame} %%%------------------------------------------------------------------------------------------------------------ %%% ���ѧϰ�ĵ������˳� \begin{frame}{�������˳���2000s-now��} \begin{itemize} \item \textbf{���ѧϰ�ı���}Դ��2006��Hinton���˳ɹ�ѵ����һ������������磨deep belief network����֮�����ѧϰ���˳���ϯ���˻���ѧϰ���˹�����Ӧ���������������ִ����ѧϰ�ijɹ���������ԭ�� \begin{enumerate} \item \textbf{ģ�ͺ��㷨}��������Ľ� \item \textbf{���м�������}������ʹ���ģʵ����Ϊ�˿��� \item ��Hinton����Ϊ������ѧ�ߵ�\textbf{��������Ͷ��} \end{enumerate} \item<2-> \textbf{��Ӧ�õĽǶ�}���������Ŀ���������ģ������������ҲΪ���ѧϰ�ijɹ��ṩ������ \end{itemize} \visible<2->{ \begin{center} \begin{tikzpicture} \scriptsize{ \begin{semilogyaxis}[ width=.95\textwidth, height=.38\textwidth, yticklabel style={/pgf/number format/precision=1,/pgf/number format/fixed zerofill}, xticklabel style={/pgf/number format/1000 sep=}, xlabel style={yshift=0.5em}, xlabel={\footnotesize{Year}},ylabel={\footnotesize{\# of sents.}}, ymin=1,ymax=1000000000000, xmin=1999,xmax=2020,xtick={2000,2005,2010,2015,2020}, legend style={yshift=-5em,xshift=0em,legend cell align=left,legend plot pos=right} ] \addplot[purple,mark=square,mark=star,very thick] coordinates {(2001,10000) (2005,2000000) (2008,8000000) (2009,9000000) (2011,10000000) (2012,12000000) (2014,20000000) (2016,30000000) (2018,40000000) }; \addlegendentry{\tiny{Bi-text used in MT papers}\ \ \ \ \ \ \ \ \ \ } \only<3->{ \addplot[ublue,mark=otimes*,very thick] coordinates {(2005,10000000) (2008,100000000) (2012,3000000000) (2016,5000000000) (2019,10000000000) }; \addlegendentry{\tiny{Bi-text used in practical systems}} } \end{semilogyaxis} } \end{tikzpicture} \end{center} } \end{frame} %%%------------------------------------------------------------------------------------------------------------ \subsection{���ѧϰ������} %%%------------------------------------------------------------------------------------------------------------ %%% �˵���ѧϰ \begin{frame}{�˵���ѧϰ} \begin{itemize} \item ���������������ṩ��һ�ֻ��ƣ�����ֱ�Ӵ�ѧϰ���뵽����Ĺ�ϵ����֮Ϊ\alert{�˵���ѧϰ} \begin{itemize} \item<2-> \textbf{�����������̵ķ���}����Ҫ�����˹������������������������������������������� \item<3-> \textbf{���ڶ˵���ѧϰ�ķ���}��û���˹����������������������ȫ�������罨ģ \end{itemize} \end{itemize} \vspace{-0.5em} \begin{center} \visible<2->{ \includegraphics[scale=0.31]{./Figures/end2end-learning-1.jpg}\\ } \visible<3->{ \Large{\textbf{VS.}}\\ \vspace{0.3em} \includegraphics[scale=0.31]{./Figures/end2end-learning-2.jpg}\\ } \end{center} \end{frame} %%%------------------------------------------------------------------------------------------------------------ %%% ���ѧϰ�ı��� \begin{frame}{���ѧϰ�ı��� - �����Խ�ģΪ��} \begin{itemize} \item \textbf{����}�������Խ�ģ��LM�������ϣ���������������ѧϰ�ķ��������˾����ƣ���PTB������PPLֵ�Ѿ��õ����˵��½���PPLԽ��Խ�ã� \begin{itemize} \item ��ͳ$n$Ԫ�ģ����������ϡ������� \item<2-> ������ģ�Ϳ��Ը��õ����������������� \end{itemize} \end{itemize} \begin{tabular}{l | l | l | r} ģ�� & ���� & ��� & PPL \\ \hline 3-gram LM & Brown et al. & 1992 & 178.0 \pause \\ \hline Feed-forward Neural LM & Bengio et al. & 2003 & 162.2 \\ Recurrent NN-based LM & Mikolov et al. & 2010 & 124.7 \\ Recurrent NN-LDA & Mikolov et al. & 2012 & 92.0 \\ LSTM & Zaremba et al. & 2014 & 78.4 \\ RHN & Zilly et al. & 2016 & 65.4 \\ AWD-LSTM & Merity et al. & 2018 & 58.8 \\ GPT-2 (Transformer) & Radford et al. & 2019 & \alert{35.7} \end{tabular} \end{frame} %%%------------------------------------------------------------------------------------------------------------ \section{���������} %%%------------------------------------------------------------------------------------------------------------ \subsection{������} %%%------------------------------------------------------------------------------------------------------------ %%% ��Ԫ \begin{frame}{������Ļ�����Ԫ - ��Ԫ} \begin{itemize} \item ����ѧ�ϣ���Ԫ����ϵͳ�Ļ�����ɵ�Ԫ���ܶ��������������Ӧ����������\\ \begin{center} \includegraphics[scale=0.25]{./Figures/neuron-real.jpg}\\ \end{center} \item<2-> ����������˵����\textbf{�˹���Ԫ}��ʵ������������ :) \begin{itemize} \item ����$\textbf{x}$����$\textbf{w}$�������Ա仯��֮�����ƫ��$\textbf{b}$���ھ��������$f$�����õ�$\textbf{y}$ - ɶ���������� \end{itemize} {\Large \begin{displaymath} \textbf{y} = f(\textbf{x} \cdot \textbf{w} + \textbf{b}) \end{displaymath} } \\ \vspace{-0.5em} \end{itemize} \end{frame} %%%------------------------------------------------------------------------------------------------------------ %%% ��֪�� \begin{frame}{����˹���Ԫģ�� - ��֪����Perceptron��} \begin{itemize} \item ��֪�����˹���Ԫ��һ��ʵ������������50-60�����������������о���������ԶӰ�졣 \begin{itemize} \item<2-> \textbf{����}�����ɸ���ֵ������$x_i=0$ or $1$ \item<3-> ÿһ�����������Ӧһ��\textbf{Ȩ��}$w_i$��ʵ���� \item<4-> \textbf{���}Ҳ��һ����ֵ�����$y=0$ or $1$�� �жϵ������ǣ�����ͼ�Ȩ���Ƿ���ڣ�����С�ڣ�һ����ֵ$\sigma$�� \begin{displaymath} y = \left\{ \begin{array}{ll} 0 & \sum_i {w_i \cdot x_i} < \sigma \\ 1 & \sum_i {w_i \cdot x_i} \ge \sigma \end{array} \right. \end{displaymath} \end{itemize} \end{itemize} \begin{center} \begin{tikzpicture} \begin{scope} \node [anchor=center,circle,draw,ublue,very thick,minimum size=3.5em,fill=white,drop shadow={shadow xshift=0.1em,shadow yshift=-0.1em}] (neuron) at (0,0) {}; \node [anchor=east] (x1) at ([xshift=-6em]neuron.west) {\Large{$x_1$}}; \node [anchor=center] (x0) at ([yshift=3em]x1.center) {\Large{$x_0$}}; \node [anchor=center] (x2) at ([yshift=-3em]x1.center) {\Large{$x_2$}}; \node [anchor=west] (y) at ([xshift=6em]neuron.east) {\Large{$y$}}; \draw [->,thick] (x0.east) -- (neuron.150) node [pos=0.5,above] {$w_0$}; \draw [->,thick] (x1.east) -- (neuron.180) node [pos=0.5,above] {$w_1$}; \draw [->,thick] (x2.east) -- (neuron.210) node [pos=0.5,above] {$w_2$}; \draw [->,thick] (neuron.east) -- (y.west); \visible<2>{ \draw [->,thick,red] (x0.east) -- (neuron.150) node [pos=0.5,above] {\black{$w_0$}}; \draw [->,thick,red] (x1.east) -- (neuron.180) node [pos=0.5,above] {\black{$w_1$}}; \draw [->,thick,red] (x2.east) -- (neuron.210) node [pos=0.5,above] {\black{$w_2$}}; } \visible<3>{ \draw [->,thick] (x0.east) -- (neuron.150) node [pos=0.5,above] {\red{$w_0$}}; \draw [->,thick] (x1.east) -- (neuron.180) node [pos=0.5,above] {\red{$w_1$}}; \draw [->,thick] (x2.east) -- (neuron.210) node [pos=0.5,above] {\red{$w_2$}}; } \visible<4->{ \node [anchor=center] (neuronmath) at (neuron.center) {\red{\small{$\sum \ge \sigma$}}}; } \visible<5->{ \node [anchor=south] (prediction) at ([xshift=-2em,yshift=1em]y.north west) {\footnotesize{\red{$x_0 w_0 + x_1 w_1 + x_2 w_2 \ge \sigma$}}}; \draw [->,thick,red] (neuron.east) -- (y.west); \node [anchor=west] (yvalue) at ([yshift=0.2em]y.east) {\Large{$=1$}}; } \end{scope} \end{tikzpicture} \end{center} \end{frame} %%%------------------------------------------------------------------------------------------------------------ %%% ��֪�� - һ������ \begin{frame}{һ������} \begin{itemize} \item һ���dz������ӡ����磬��һ�����ֻᣬ�����ھ����Ƿ�ȥ�μӣ����������ػ�Ӱ����ľ��� \begin{itemize} \item $x_0$���糡�Ƿ������㹻���� \item $x_1$��Ʊ���Ƿ����300Ԫ�� \item $x_2$��Ů�����Ƿ�ϲ�����ֻ \end{itemize} \item<2-> ��ξ��������磬Ů���Ѻ�ϣ������һ�𣬵��Ǿ糡��Զ����Ʊ��500Ԫ�������Щ���ض���ľ��߶���ͬ����Ҫ�ģ���ô����һ���ۺϵ÷֣� \begin{displaymath} x_0 \cdot w_0 + x_1 \cdot w_1 + x_2 \cdot w_2 = 0 \cdot 1 + 0 \cdot 1 + 1 \cdot 1 = 1 \end{displaymath} \item<3-> ����㲻��ʮ�־��ᣬ�ܹ����ܲ����������飬����ܻ���$\sigma=1$������ \begin{displaymath} \sum_i x_i \cdot w_i \ge \sigma \end{displaymath} \textbf{��ô}�����ȥ�μ����ֻ� \end{itemize} \end{frame} %%%------------------------------------------------------------------------------------------------------------ %%% ��֪�� - һ�����ӣ�Ȩ�� \begin{frame}{һ������ - Ȩ��} \begin{itemize} \item ���Կ�����ʵ����������߹��̱����Ͼ���һ����֪�� \item<2-> ���ǣ��˲����������ģ���������Щ����������һЩ���������\textbf{�ز�ū}����˻��Ʊ�ۿ��ĸ���һЩ����ʱ����ò����ȵ�Ȩ�ؼ���ÿ�����ص�Ӱ�죬���磺$w_0=0.5$��$w_1=2$��$w_2=0.5$ \item<3-> Ů�Ѻ�ϣ������һ�𣬵��Ǿ糡��Զ����Ʊ��500Ԫ���ᵼ����\alert{ѡ��ȥ}�����ֻᣨŮ���Ѷ���Ҫ�ˣ�զ���� \begin{displaymath} \sum_i x_i \cdot w_i = 0 \cdot 0.5 + 0 \cdot 2 + 1 \cdot 0.5 = 0.5 < \sigma = 1 \end{displaymath} \end{itemize} \vspace{-1.8em} \begin{center} \begin{tikzpicture} \begin{scope} \node [anchor=center,circle,draw,ublue,very thick,minimum size=3.5em,fill=white,drop shadow={shadow xshift=0.1em,shadow yshift=-0.1em}] (neuron) at (0,0) {}; \node [anchor=east] (x1) at ([xshift=-6em]neuron.west) {$x_1$:Ʊ�۹��ͣ�}; \node [anchor=center] (x0) at ([yshift=3em]x1.center) {$x_0$:���빻����}; \node [anchor=center] (x2) at ([yshift=-3em]x1.center) {$x_2$:Ů��ϲ����}; \node [anchor=west] (y) at ([xshift=2em]neuron.east) {$y$:ȥ�����Dz�ȥ��}; \visible<1>{ \draw [->,thick] (x0.east) -- (neuron.150) node [pos=0.5,above,yshift=0.2em] {\small{$w_0=1$}}; \draw [->,thick] (x1.east) -- (neuron.180) node [pos=0.5,above,yshift=-0.1em] {\small{$w_1=1$}}; \draw [->,thick] (x2.east) -- (neuron.210) node [pos=0.5,above,yshift=0.1em] {\small{$w_2=1$}}; } \draw [->,thick] (neuron.east) -- (y.west); \node [anchor=center] (neuronmath) at (neuron.center) {\small{$\sum \ge \sigma$}}; \visible<2->{ \draw [->,thin,red] (x0.east) -- (neuron.150) node [pos=0.5,above,yshift=0.2em] {\small{$w_0=.5$}}; \draw [->,line width=0.8mm,red] (x1.east) -- (neuron.180) node [pos=0.5,above,yshift=-0.1em] {\textbf{\small{$w_1=2$}}}; \draw [->,thin,red] (x2.east) -- (neuron.210) node [pos=0.5,above,yshift=0.1em] {\small{$w_2=.5$}}; } \visible<3->{ \node [anchor=south] (ylabel) at (y.north) {\red{\textbf{��ȥ�ˣ�}}}; } \end{scope} \end{tikzpicture} \end{center} \end{frame} %%%------------------------------------------------------------------------------------------------------------ %%% ��֪�� - һ�����ӣ�������ʽ \begin{frame}{һ������ - ������ʽ} \begin{itemize} \item ��������Ů��һ����˺�֮������ʶ�����߲�Ӧ��ֻ���Ƿ�0��1�����أ�Ӧ�ð�``�̶�''���ǽ����� \begin{itemize} \item $x_0$��10/���� \item $x_1$��150/Ʊ�� \item $x_2$��Ů�����Ƿ�ϲ����(�������Ҹ�) \end{itemize} \item<2-> ��ģ���У�$x_0$��$x_1$������������$x_2$��һ����ɢ���� \end{itemize} \visible<2->{ \begin{tikzpicture} \begin{scope} \draw [->,thick] (0,0) -- (2.5,0); \draw [->,thick] (0,0) -- (0, 1.5); \draw [-,very thick,ublue,domain=0.6:2,samples=100] plot (\x,{ 1/\x - 0.2}); \node [anchor=east] (ylabel) at (0, 3.2em) {\footnotesize{$x_0$}}; \node [anchor=north] (xlabel) at (5em, 0em) {\scriptsize{����(km)}}; \end{scope} \begin{scope}[xshift=9em] \draw [->,thick] (0,0) -- (2.5,0); \draw [->,thick] (0,0) -- (0, 1.5); \draw [-,very thick,ublue,domain=0.4:2,samples=100] plot (\x,{ 0.5/\x}); \node [anchor=east] (ylabel) at (0, 3.2em) {\footnotesize{$x_1$}}; \node [anchor=north] (xlabel) at (5em, 0em) {\scriptsize{Ʊ��(Ԫ)}}; \end{scope} \begin{scope}[xshift=18em] \draw [->,thick] (0,0) -- (2.5,0); \draw [->,thick] (0,0) -- (0, 1.5); \node [anchor=east] (ylabel) at (0, 3.2em) {\footnotesize{$x_2$}}; \node [anchor=south, fill=ublue, minimum width=1.5em, minimum height=0.1em, inner sep=0] (histogram1) at (1.5em, 0) {}; \node [anchor=south, fill=ublue, minimum width=1.5em, minimum height=3em, inner sep=0] (histogram2) at (4.0em, 0) {}; \node [anchor=north] (hlabel1) at (histogram1.south) {\tiny{Ů�Ѳ�ȥ}}; \node [anchor=north] (hlabel2) at (histogram2.south) {\tiny{Ů��ȥ}}; \end{scope} \end{tikzpicture} } \begin{itemize} \item<3-> Ů���Ѻ�ϣ������һ�𣬵��Ǿ糡��20kmԶ����Ʊ��500Ԫ��������\ $x_0 = 10/20 = 0.5$��$x_1=150/500 = 0.3$, $x_2=1$���ۺ�����$\sum_i x_i \cdot w_i \ge \sigma$������{\color{red} ȥ�����ֻ�} :) \begin{displaymath} \sum_i x_i \cdot w_i = 0.5 \cdot 0.5 + 0.3 \cdot 2 + 1 \cdot 0.5 = 1.35 \ge \sigma = 1 \end{displaymath} \end{itemize} \end{frame} %%%------------------------------------------------------------------------------------------------------------ %%% ��֪�� - һ������ - ѧϰ \begin{frame}{һ������ - ѧϰ} \begin{itemize} \item һ�γɹ������ֻ�֮�����ƺ�������������ֻҪŮ���ѿ��ľͺã�Ϊ�β���������ص�Ȩ�ص�����ķ�ʽ�ǰ�$w_0$��$w_1$��Ȩ�ض���0��ͬʱ��$w_3 > 0$ \item<3-> �ܿ�����һ�����ֻᣬ����1000���Ʊ�ۣ�����·�ѣ�3000Ԫ����Ȼ��Ů����һֱ��ϲ�����ֻ�ġ������µľ���ģ�ͣ��������˵�\alert{����ȥ��}�ⳡ���ֻ� \item<4-> \textbf{֮��}����Ů�����ָ�����1����˺���ʹ�������� \begin{itemize} \item \alert{����㷢��}��Ů�Ѽ�Ҫ������ͬʱҲ���� \end{itemize} \end{itemize} \begin{center} \begin{tikzpicture} \begin{scope} \node [anchor=center,circle,draw,ublue,very thick,minimum size=3.5em,fill=white,drop shadow={shadow xshift=0.1em,shadow yshift=-0.1em}] (neuron) at (0,0) {}; \node [anchor=east] (x1) at ([xshift=-6em]neuron.west) {$x_1$:���˳̶�\ \ \ \ }; \node [anchor=center] (x0) at ([yshift=3em]x1.center) {$x_0$:Զ���̶�\ \ \ \ }; \node [anchor=center] (x2) at ([yshift=-3em]x1.center) {$x_2$:Ů��ϲ����}; \node [anchor=west] (y) at ([xshift=2em]neuron.east) {$y$:ȥ�����Dz�ȥ��}; \draw [->,thick] (neuron.east) -- (y.west); \node [anchor=center] (neuronmath) at (neuron.center) {\small{$\sum \ge \sigma$}}; \visible<1>{ \draw [->,thin] (x0.east) -- (neuron.150) node [pos=0.5,above,yshift=0.2em] {\small{$w_0=.5$}}; \draw [->,line width=0.8mm] (x1.east) -- (neuron.180) node [pos=0.5,above,yshift=-0.1em] {\textbf{\small{$w_1=2$}}}; \draw [->,thin] (x2.east) -- (neuron.210) node [pos=0.5,above,yshift=0.1em] {\small{$w_2=.5$}}; } \visible<2->{ \draw [->,dotted] (x0.east) -- (neuron.150) node [pos=0.5,above,yshift=0.2em] {\small{$w_0=0$}}; \draw [->,dotted] (x1.east) -- (neuron.180) node [pos=0.5,above,yshift=-0.1em] {\textbf{\small{$w_1=0$}}}; \draw [->,line width=1mm] (x2.east) -- (neuron.210) node [pos=0.5,above,yshift=0.1em] {\small{$w_2=10$}}; } \end{scope} \end{tikzpicture} \end{center} \end{frame} %%%------------------------------------------------------------------------------------------------------------ %%% ��֪�� - һ������ - ѧϰ(cont) \begin{frame}{һ������ - Ȩ��ѧϰ} \begin{itemize} \item \textbf{ʹ��˼ʹ}���㷢��ÿ�����ص�Ȩ����Ҫȷ�����ò��ܴﵽ��õľ���Ч�� \begin{itemize} \item ���ȷ����õ�Ȩ�أ� \end{itemize} \item<2-> \textbf{��Ȼ}������һ������ʵ������ \begin{itemize} \item �����ܼ����ϵس��ԣ����ݽṹ���ϵص���Ȩ�� \item<10-> �ڽ����˺ܶ��ʵ���������Ժõ�һ��Ȩ�� \end{itemize} \end{itemize} \visible<2->{ \begin{center} \begin{tikzpicture} \begin{scope}[scale=0.6] \visible<3->{ \draw [->,thick] (0,0) -- (2.5,0); \draw [->,thick] (0,0) -- (0, 1.5); \draw [-,very thick,ublue,domain=0.6:2,samples=100] plot (\x,{ 1/\x - 0.2}); \node [anchor=east] (ylabel) at (0, 3.2em) {\footnotesize{$x_0$}}; \node [anchor=north] (xlabel) at (5em, 0em) {\tiny{����(km)}}; } \visible<4->{ \draw [-,thick] (0.25,-1.5) -- (2.25,-1.5); \node [anchor=east] (wlabel) at (0.25,-1.5) {\footnotesize{$w_0$}}; } \visible<5>{\node [anchor=north,fill=ugreen,minimum height=0.5em,minimum width=1.5em] (w0) at (1.25,-1.5) {};} \visible<6>{\node [anchor=north,fill=ugreen,minimum height=0.3em,minimum width=1.5em] (w0) at (1.25,-1.5) {};} \visible<7>{\node [anchor=north,fill=ugreen,minimum height=1.8em,minimum width=1.5em] (w0) at (1.25,-1.5) {};} \visible<8>{\node [anchor=north,fill=ugreen,minimum height=0.3em,minimum width=1.5em] (w0) at (1.25,-1.5) {};} \visible<9>{\node [anchor=north,fill=ugreen,minimum height=0.3em,minimum width=1.5em] (w0) at (1.25,-1.5) {};} \visible<10->{\node [anchor=north,fill=ugreen,minimum height=0.8em,minimum width=1.5em] (w0) at (1.25,-1.5) {};} \end{scope} \begin{scope}[scale=0.6,xshift=12em] \visible<3->{ \draw [->,thick] (0,0) -- (2.5,0); \draw [->,thick] (0,0) -- (0, 1.5); \draw [-,very thick,ublue,domain=0.4:2,samples=100] plot (\x,{ 0.5/\x}); \node [anchor=east] (ylabel) at (0, 3.2em) {\footnotesize{$x_1$}}; \node [anchor=north] (xlabel) at (5em, 0em) {\tiny{Ʊ��(Ԫ)}}; } \visible<4->{ \draw [-,thick] (0.25,-1.5) -- (2.25,-1.5); \node [anchor=east] (wlabel) at (0.25,-1.5) {\footnotesize{$w_1$}}; } \visible<5>{\node [anchor=north,fill=ugreen,minimum height=0.5em,minimum width=1.5em] (w1) at (1.25,-1.5) {};} \visible<6>{\node [anchor=north,fill=ugreen,minimum height=1.5em,minimum width=1.5em] (w1) at (1.25,-1.5) {};} \visible<7>{\node [anchor=north,fill=ugreen,minimum height=0.8em,minimum width=1.5em] (w1) at (1.25,-1.5) {};} \visible<8>{\node [anchor=north,fill=ugreen,minimum height=0.1em,minimum width=1.5em] (w1) at (1.25,-1.5) {};} \visible<9>{\node [anchor=north,fill=ugreen,minimum height=1.0em,minimum width=1.5em] (w1) at (1.25,-1.5) {};} \visible<10->{\node [anchor=north,fill=ugreen,minimum height=0.3em,minimum width=1.5em] (w1) at (1.25,-1.5) {};} \end{scope} \begin{scope}[scale=0.6,xshift=24em] \visible<3->{ \draw [->,thick] (0,0) -- (2.5,0); \draw [->,thick] (0,0) -- (0, 1.5); \node [anchor=east] (ylabel) at (0, 3.2em) {\footnotesize{$x_2$}}; \node [anchor=south, fill=ublue, minimum width=0.8em, minimum height=0.1em, inner sep=0] (histogram1) at (1.5em, 0) {}; \node [anchor=south, fill=ublue, minimum width=0.8em, minimum height=2em, inner sep=0] (histogram2) at (4.0em, 0) {}; \node [anchor=north,align=left] (hlabel1) at (histogram1.south) {\tiny{Ů��no}}; \node [anchor=north,align=left] (hlabel2) at ([xshift=0.5em]histogram2.south) {\tiny{Ů��yes}}; } \visible<4->{ \draw [-,thick] (0.25,-1.5) -- (2.25,-1.5); \node [anchor=east] (wlabel) at (0.25,-1.5) {\footnotesize{$w_2$}}; } \visible<5>{\node [anchor=north,fill=ugreen,minimum height=0.5em,minimum width=1.5em] (w2) at (1.25,-1.5) {};} \visible<6>{\node [anchor=north,fill=ugreen,minimum height=1.2em,minimum width=1.5em] (w2) at (1.25,-1.5) {};} \visible<7>{\node [anchor=north,fill=ugreen,minimum height=0.8em,minimum width=1.5em] (w2) at (1.25,-1.5) {};} \visible<8>{\node [anchor=north,fill=ugreen,minimum height=1.2em,minimum width=1.5em] (w2) at (1.25,-1.5) {};} \visible<9>{\node [anchor=north,fill=ugreen,minimum height=1.5em,minimum width=1.5em] (w2) at (1.25,-1.5) {};} \visible<10->{\node [anchor=north,fill=ugreen,minimum height=1.3em,minimum width=1.5em] (w2) at (1.25,-1.5) {};} \end{scope} \end{tikzpicture} \end{center} } \visible<5->{ \begin{center} \begin{tabular}{c<{\onslide<5->}c<{\onslide<6->}c<{\onslide<7->}c<{\onslide<8->}c<{\onslide<9->}c<{\onslide<10->}c<{\onslide}} ʵ�� & 1 & 2 & 3 & 4 & ... & 10k \\ ��� & ʧ�� & �ɹ� & ʧ�� & ʧ�� & ... & �ɹ� \end{tabular} \end{center} } \end{frame} %%%------------------------------------------------------------------------------------------------------------ %%% ��֪�� - һ������ - �ܽ� \begin{frame}{һ������ - �ܽ�} \begin{itemize} \item �������һ�������⣬������һ�ֺ���������ȷ�Ľ��о��߲��������������ģ���У�����һЩ\alert{����}��Ҫ�ش� \begin{itemize} \item<2-> �����⽨ģ��������������$\{x_i\}$����ʽ \item<3-> �����Ч�ľ���ģ�ͣ���������$y$ \item<4-> ����ģ�����漰�IJ�������Ȩ��$\{w_i\}$��������ֵ \end{itemize} \end{itemize} \vspace{-2em} \begin{center} \begin{tikzpicture} \begin{scope} \node [anchor=center,circle,draw,ublue,very thick,minimum size=3.5em,fill=white,drop shadow={shadow xshift=0.1em,shadow yshift=-0.1em}] (neuron) at (0,0) {}; \visible<2->{ \node [anchor=east] (x1) at ([xshift=-6em]neuron.west) {$x_1$:���˳̶�\ \ \ \ }; \node [anchor=center] (x0) at ([yshift=3em]x1.center) {$x_0$:Զ���̶�\ \ \ \ }; \node [anchor=center] (x2) at ([yshift=-3em]x1.center) {$x_2$:Ů��ϲ����}; } \visible<3->{ \node [anchor=west] (y) at ([xshift=2em]neuron.east) {$y$:ȥ�����Dz�ȥ��}; \node [anchor=center] (neuronmath) at (neuron.center) {\small{$\sum \ge \sigma$}}; } \draw [->,thick] (neuron.east) -- (y.west); \draw [->,thick] (x0.east) -- (neuron.150); \draw [->,thick] (x1.east) -- (neuron.180); \draw [->,thick] (x2.east) -- (neuron.210); \visible<4->{ \draw [->,thick] (x0.east) -- (neuron.150) node [pos=0.5,above,yshift=0.2em] {$w_0$}; \draw [->,thick] (x1.east) -- (neuron.180) node [pos=0.5,above,yshift=-0.1em] {$w_1$}; \draw [->,thick] (x2.east) -- (neuron.210) node [pos=0.5,above,yshift=0.1em] {$w_2$}; } \end{scope} \end{tikzpicture} \end{center} \vspace{-0.5em} \begin{itemize} \item<5-> \textbf{��Ȼ}����������ݻ��漰��������⣬���Ҳ�ֹ��Щ :) \end{itemize} \end{frame} %%%------------------------------------------------------------------------------------------------------------ \subsection{�˹���Ԫ} %%%------------------------------------------------------------------------------------------------------------ %%% outline \begin{frame}{�����˹�������(���ѧϰ)��������������} \vspace{1em} \begin{tcolorbox}[enhanced,size=normal,left=2mm,right=1mm,colback=red!5!white,colframe=red!75!black,drop fuzzy shadow] {\Large \textbf{1. �˹�������Ļ�����Ԫ��ʲô,} \vspace{0.4em} \textbf{\hspace{0.9em} �����ϳ���ǿ���ģ�ͣ�} } \end{tcolorbox} \vspace{0.5em} \begin{tcolorbox}[enhanced,size=normal,left=2mm,right=1mm,colback=red!5!white,colframe=red!75!black,drop fuzzy shadow] {\Large \textbf{2. �˹����������ѧ������ʲô,} \vspace{0.4em} \textbf{\hspace{0.9em} ��α��ʵ��������ѧģ�ͣ�} } \end{tcolorbox} \vspace{0.5em} \begin{tcolorbox}[enhanced,size=normal,left=2mm,right=1mm,colback=red!5!white,colframe=red!75!black,drop fuzzy shadow] {\Large \textbf{3. ��ζ�ģ���еIJ�������ѧϰ,} \vspace{0.4em} \textbf{\hspace{0.9em} ֮��ʹ��ѧϰ����ģ�ͽ����ƶϣ�} } \end{tcolorbox} \end{frame} %%%------------------------------------------------------------------------------------------------------------ %%% outline: problem 1 \begin{frame}{����} \vspace{6em} \begin{tcolorbox}[enhanced,size=normal,left=2mm,right=1mm,colback=red!5!white,colframe=red!75!black,drop fuzzy shadow] {\Large \textbf{�˹�������Ļ�����Ԫ��ʲô,} \vspace{0.4em} \textbf{�����ϳ���ǿ���ģ�ͣ�} } \end{tcolorbox} \vspace{2em} \begin{center} \begin{tikzpicture} \node [fill=blue!10] (label) at (0,0) {\Large{$\textbf{y} = ?(\textbf{x})$ }}; \end{tikzpicture} \end{center} \end{frame} %%%------------------------------------------------------------------------------------------------------------ %%% ���Դ������� \begin{frame}{Ԥ�� - ���Դ���֪ʶ} \begin{itemize} \item \textbf{����}��������$a$��ʾһ������(һ����)���ô���$\textbf{a}$��ʾһ������(������)������$a_{ij}$��ʾ$\textbf{a}$��$i$�С���$j$�е�Ԫ��\\ \begin{displaymath} a = 5 \hspace{3em} \textbf{a} = \begin{pmatrix} a_{11} & a_{12} \\ a_{21} & a_{22} \end{pmatrix} = \begin{pmatrix} 1 & 2 \\ 3 & 4 \end{pmatrix} \end{displaymath} \item \textbf{����}��һ������ľ���ֻ��һ�л���һ�У�����Ĭ��ʹ��������������$\textbf{a} = (a_1,a_2,a_3) = (10, 20, 30)$��$\textbf{a}$��Ӧ����������Ϊ$\textbf{a}^T$ \item<2-> \textbf{��������}���������λ����+��-�ȴ������㣬����$\textbf{a} = \begin{pmatrix} 1 & 2 \\ 3 & 4 \end{pmatrix}$��$\textbf{b} = \begin{pmatrix} 1 & 1 \\ 1 & 1 \end{pmatrix}$����$\textbf{a} + \textbf{b} = \begin{pmatrix} 2 & 3 \\ 4 & 5 \end{pmatrix}$ \item<3-> \textbf{�������}����λ���У����ھ���$\textbf{c}$�ͱ���$x$�� \begin{displaymath} \frac{\partial \textbf{c}}{\partial x} = \begin{pmatrix} \frac{\partial c_{11}}{\partial x} & \frac{\partial c_{12}}{\partial x} \\ \frac{\partial c_{21}}{\partial x} & \frac{\partial c_{22}}{\partial x} \end{pmatrix} \hspace{2em} \frac{\partial x}{\partial \textbf{c}} = \begin{pmatrix} \frac{\partial x}{\partial c_{11}} & \frac{\partial x}{\partial c_{12}} \\ \frac{\partial x}{\partial c_{21}} & \frac{\partial x}{\partial c_{22}} \end{pmatrix} \end{displaymath} \end{itemize} \end{frame} %%%------------------------------------------------------------------------------------------------------------ %%% ���Դ������� \begin{frame}{Ԥ�� - ���Դ���֪ʶ(��)} \begin{itemize} \item \textbf{����ij˷�}������$\textbf{a} \in \mathbb{R}^{n \times k}$��$\textbf{b} \in \mathbb{R}^{k \times m}$����$\textbf{c} = \textbf{a} \textbf{b} \in \mathbb{R}^{n \times m}$��ʾ\textbf{a}��\textbf{b}�ľ���˷������� \begin{displaymath} c_{pq} = \sum_{i = 1}^k a_{pi} b_{iq} \end{displaymath} ���ڷ���$\left\{ \begin{array}{l} 5x_{1} + 2x_{2} = y_{1} \\ 3x_{1} + x_{2} = y_{2}\end{array} \right.$�����Ա�ʾΪ$\textbf{a} \textbf{x}^T = \textbf{y}^T$ ����$\textbf{a}=\begin{pmatrix} 5 & 2 \\ 3 & 1 \end{pmatrix}$��$\textbf{x}^T =\begin{pmatrix} x_1 \\ x_2 \end{pmatrix}$��$\textbf{y}^T =\begin{pmatrix} y_1 \\ y_2 \end{pmatrix}$ \item<2-> \textbf{����} \begin{itemize} \item \textbf{��λ����}������$\textbf{I}$��$I_{ij} = 1$���ҽ���$i=j$������$I_{ij} = 0$ \item \textbf{ת��}��$\textbf{a}$��ת�ü�Ϊ$\textbf{a}^T$����$a^T_{ji}=a_{ij}$ \item \textbf{�����}������$\textbf{a}$��������Ϊ$\textbf{a}^{-1}$����$\textbf{a} \textbf{a}^{-1} = \textbf{a}^{-1} \textbf{a} = \textbf{I}$ \item \textbf{����(����)�ķ���}��$||\textbf{a}||_p = \big( \sum_i |a_i|^p \big)^{\frac{1}{p}}$ \end{itemize} \end{itemize} \end{frame} %%%------------------------------------------------------------------------------------------------------------ %%% �˹���Ԫ�ĺ�����ʽ \begin{frame}{�˹���Ԫ��һ������} \begin{itemize} \item ��Ԫ�� \end{itemize} \vspace{-1em} \begin{center} \begin{tikzpicture} \node [anchor=center] (y) at (0,0) {\LARGE{$\textbf{y}$}}; \node [anchor=west] (eq) at (y.east) {\LARGE{$=$}}; \node [anchor=west] (func) at (eq.east) {\LARGE{$f$}}; \node [anchor=west] (brace01) at (func.east) {\LARGE{$($}}; \node [anchor=west] (x) at (brace01.east) {\LARGE{$\textbf{x}$}}; \node [anchor=west] (dot) at (x.east) {\LARGE{$\cdot$}}; \node [anchor=west] (w) at (dot.east) {\LARGE{$\textbf{w}$}}; \node [anchor=west] (plus) at (w.east) {\LARGE{$+$}}; \node [anchor=west] (b) at (plus.east) {\LARGE{$\textbf{b}$}}; \node [anchor=west] (brace02) at (b.east) {\LARGE{$)$}}; \visible<2->{ \node [anchor=center,fill=yellow!30] (x2) at (x) {\LARGE{$\textbf{x}$}}; \node [anchor=south] (xlabel) at ([yshift=1.5em]x.north) {����}; \draw [<-] ([yshift=0.2em]x2.north) -- (xlabel.south); } \visible<3->{ \node [anchor=center,fill=green!20] (w2) at (w) {\LARGE{$\textbf{w}$}}; \node [anchor=north] (wlabel) at ([yshift=-1.5em]w.south) {����(Ȩ��)}; \draw [<-] ([yshift=-0.2em]w2.south) -- (wlabel.north); } \visible<4->{ \node [anchor=center,fill=purple!20] (b2) at (b) {\LARGE{$\textbf{b}$}}; \node [anchor=south] (blabel) at ([yshift=1.3em]b.north) {ƫ��}; \draw [<-] ([yshift=0.2em]b2.north) -- (blabel.south); } \visible<5->{ \node [anchor=center,fill=blue!20] (func2) at (func) {\LARGE{$f$}}; \node [anchor=north] (funclabel) at ([yshift=-1.1em]func.south) {�����}; \draw [<-] ([yshift=-0.2em]func2.south) -- (funclabel.north); } \visible<6->{ \node [anchor=center,fill=red!20] (y2) at (y) {\LARGE{$\textbf{y}$}}; \node [anchor=south] (ylabel) at ([yshift=1.3em]y.north) {���}; \draw [<-] ([yshift=0.2em]y2.north) -- (ylabel.south); } \end{tikzpicture} \end{center} \vspace{-1em} \begin{itemize} \item<7-> �Ը�֪��Ϊ�� \begin{itemize} \item ���룺$\textbf{x}=(x_0,...,x_n)$ \item Ȩ�أ�$\textbf{w}=(w_0,...,w_n)$ \item ƫ�ƣ�$\textbf{b} = (-\sigma)$ \item �������$f(z)=1$ ��$z \ge 0$, �������$f(z)=0$ \item �����$\textbf{y}=f(\textbf{x} \cdot \textbf{w} - \sigma)$ \end{itemize} \end{itemize} \end{frame} %%%------------------------------------------------------------------------------------------------------------ %%% ��ĸ��� \begin{frame}{``��"�ĸ���} \begin{itemize} \item ����һ�����⣨��ͬ���룩�����ܻ��ж���������ʱ����\alert{�����ͬ����Ԫ��������}������һ\alert{``��"} \begin{itemize} \item ���磬����Ԥ����ҪͬʱԤ��ʪ�Ⱥ��¶� \end{itemize} \end{itemize} \vspace{-2em} \begin{center} \begin{tikzpicture} \begin{scope} \tikzstyle{neuronnode} = [minimum size=1.5em,circle,draw,ublue,very thick,fill=white,drop shadow={shadow xshift=0.1em,shadow yshift=-0.1em}] \node [anchor=center,neuronnode] (neuron00) at (0,0) {}; \visible<2->{ \node [anchor=center,neuronnode] (neuron01) at ([yshift=-3em]neuron00) {}; } \visible<3->{ \node [anchor=center,neuronnode] (neuron02) at ([yshift=-3em]neuron01) {}; } \node [anchor=east] (x0) at ([xshift=-6em]neuron00.west) {$x_0$}; \node [anchor=east] (x1) at ([xshift=-6em]neuron01.west) {$x_1$}; \node [anchor=east] (x2) at ([xshift=-6em]neuron02.west) {$b$}; \node [anchor=west] (y0) at ([xshift=4em]neuron00.east) {$y_0$}; \draw [->] (x0.east) -- (neuron00.180) node [pos=0.1,above] {\tiny{$w_{00}$}}; \draw [->] (x1.east) -- (neuron00.200) node [pos=0.1,above] {\tiny{$w_{10}$}}; \draw [->] (x2.east) -- (neuron00.220) node [pos=0.05,above,yshift=0.3em] {\tiny{$b_{0}$}}; \draw [->] (neuron00.east) -- (y0.west); \visible<2->{ \node [anchor=west] (y1) at ([xshift=4em]neuron01.east) {$y_1$}; \draw [->] (x0.east) -- (neuron01.160) node [pos=0.4,above] {\tiny{$w_{01}$}}; \draw [->] (x1.east) -- (neuron01.180) node [pos=0.35,above,yshift=-0.2em] {\tiny{$w_{11}$}}; \draw [->] (x2.east) -- (neuron01.200) node [pos=0.3,below,yshift=0.2em] {\tiny{$b_{1}$}}; \draw [->] (neuron01.east) -- (y1.west); } \visible<3->{ \node [anchor=west] (y2) at ([xshift=4em]neuron02.east) {$y_2$}; \draw [->] (x0.east) -- (neuron02.140) node [pos=0.1,below,yshift=-0.2em] {\tiny{$w_{02}$}}; \draw [->] (x1.east) -- (neuron02.160) node [pos=0.1,below] {\tiny{$w_{12}$}}; \draw [->] (x2.east) -- (neuron02.180) node [pos=0.3,below] {\tiny{$b_{2}$}}; \draw [->] (neuron02.east) -- (y2.west); } \visible<4->{ \node [anchor=east,align=left] (inputlabel) at ([xshift=-0.1em]x1.west) {��������:\\\small{$\textbf{x}=(x_0,x_1)$}}; } \visible<5->{ \node [anchor=west,align=left] (outputlabel) at ([xshift=0.1em]y1.east) {�������:\\\small{$\textbf{y}=(y_0,y_1,y_2)$}}; } \begin{pgfonlayer}{background} \visible<6->{ \node [rectangle,inner sep=0.4em,fill=red!20] [fit = (neuron00) (neuron01) (neuron02)] (layer) {}; \node [anchor=south] (layerlabel) at ([yshift=0.2em]layer.north) {һ����Ԫ}; } \visible<4->{ \node [rectangle,inner sep=0.1em,fill=ugreen!20] [fit = (x0) (x1)] (inputshadow) {}; } \visible<5->{ \node [rectangle,inner sep=0.1em,fill=blue!20] [fit = (y0) (y1) (y2)] (outputshadow) {}; } \end{pgfonlayer} \visible<7->{ \node [anchor=north west] (wlabel) at ([yshift=-1em,xshift=-7em]x2.south) {����(����):$\textbf{w} = \Big( \begin{array}{lll} w_{00} & w_{01} & w_{02} \\ w_{10} & w_{11} & w_{12} \end{array} \Big)$}; } \visible<8->{ \node [anchor=west] (blabel) at (wlabel.east) {����(����):$\textbf{b} = (b_0, b_1, b_2)$}; } \end{scope} \end{tikzpicture} \end{center} \end{frame} %%%------------------------------------------------------------------------------------------------------------ %%% ����������� \begin{frame}{�����磺���Ա任 + �����} \begin{itemize} \item ��������$\textbf{x} \in \mathbb{R}^m$��һ�����������Ȱ�������\textbf{\alert{���Ա任}}ӳ�䵽$\mathbb{R}^m$��֮��\textbf{{\color{blue}�����}}�任��$\textbf{y} \in \mathbb{R}^n$ \end{itemize} \vspace{1em} \begin{center} \begin{tikzpicture} \node [anchor=center] (y) at (0,0) {\Large{$\textbf{y}$}}; \node [anchor=west] (eq) at (y.east) {\Large{$=$}}; \node [anchor=west] (func) at (eq.east) {\Large{$f$}}; \node [anchor=west] (brace01) at (func.east) {\Large{$($}}; \node [anchor=west] (x) at (brace01.east) {\Large{$\textbf{x}$}}; \node [anchor=west] (dot) at (x.east) {\Large{$\cdot$}}; \node [anchor=west] (w) at (dot.east) {\Large{$\textbf{w}$}}; \node [anchor=west] (plus) at (w.east) {\Large{$+$}}; \node [anchor=west] (b) at (plus.east) {\Large{$\textbf{b}$}}; \node [anchor=west] (brace02) at (b.east) {\Large{$)$}}; \node [anchor=center,fill=blue!20] (func2) at (func) {\LARGE{$f$}}; \node [anchor=north] (funclabel) at ([yshift=-1.1em]func.south) {\blue{�����}}; \draw [<-] ([yshift=-0.2em]func2.south) -- (funclabel.north); \begin{pgfonlayer}{background} \node [rectangle,inner sep=0.2em,fill=red!20] [fit = (x) (w) (b)] (linear) {}; \node [anchor=north] (linearlabel) at ([yshift=-1.1em]linear.south) {\alert{���Ա任}}; \draw [<-] ([yshift=-0.2em]linear.south) -- (linearlabel.north); \end{pgfonlayer} \end{tikzpicture} \begin{figure}[htp!] \includegraphics[scale=0.24]{./Figures/wf.png} % \begin{tikzpicture} % \node [rectangle,inner sep=0.2em,fill=red!20] [fit = (x) (w) (b)] (linear) {}; % \node [anchor=north] (linearlabel) at ([yshift=-1.1em]linear.south) {\alert{���Ա任}} \end{figure} \tikz {\node () at (0,0) {}; \node () at (0,10) {};} \end{center} \end{frame} %%%------------------------------------------------------------------------------------------------------------ %%% ���Ա任 \begin{frame}{���Ա任} \begin{itemize} \item �������Կռ�$V$������$\textbf{a}$��$\textbf{b} \in V$�������е�����$\alpha$�����Ա任$T(\cdot)$������ \begin{eqnarray} T(\textbf{a} + \textbf{b}) & = & T(\textbf{a}) + T(\textbf{b}) \nonumber \\ T(\alpha \textbf{a}) & = & \alpha T(\textbf{a}) \nonumber \end{eqnarray} \item<2-> ���Ա任��һ�ּ��ν��ͣ� \end{itemize} \vspace{-0.5em} \visible<2->{ \begin{center} \begin{tikzpicture} \node [anchor=west] (x) at (0,0) {\Large{$\textbf{x}$}}; \node [anchor=west] (dot) at (x.east) {\Large{$\cdot$}}; \node [anchor=west] (w) at (dot.east) {\Large{$\textbf{w}$}}; \node [anchor=west] (plus) at (w.east) {\Large{$+$}}; \node [anchor=west] (b) at (plus.east) {\Large{$\textbf{b}$}}; \tikzstyle{neuron} = [rectangle,draw,thick,fill=red!30,red!35,minimum height=2em,minimum width=2em,font=\small] \node[neuron,anchor=north] (a1) at ([xshift=-6em,yshift=-4em]x.south) {}; \draw[->,thick] ([xshift=-2em,yshift=0em]a1.south) to ([xshift=3em,yshift=0em]a1.south); \draw[->,thick] ([xshift=0em,yshift=-4em]a1.west) to ([xshift=0em,yshift=2em]a1.west); \node[below] at ([xshift=0.5em,yshift=-1em]a1.west){0}; \node[below] at ([xshift=2em,yshift=-1em]a1.west){1}; \node[below] at ([xshift=-0.5em,yshift=2em]a1.west){1}; \node [anchor=west] (x) at ([xshift=-0.7em,yshift=1em]a1.south) {\Large{$\textbf{F}$}}; \visible<3->{ \node [anchor=center,fill=green!20] (w2) at (w) {\Large{$\textbf{w}$}}; \node [anchor=north,inner sep=1pt] (wlabel) at ([yshift=-0.7em]w.south) {\small{��ת(rotation)}}; \draw [<-] ([yshift=-0.2em]w2.south) -- (wlabel.north); \tikzstyle{neuron} = [rectangle,draw,thick,fill=red!30,red!35,minimum height=2em,minimum width=2em,font=\small] \node[neuron,anchor=north] (a2) at ([xshift=10em,yshift=0em]a1.south) {}; \draw[->,thick] ([xshift=-2em,yshift=0em]a2.north) to ([xshift=3em,yshift=0em]a2.north); \draw[->,thick] ([xshift=0em,yshift=-2em]a2.west) to ([xshift=0em,yshift=4em]a2.west); \node[above] at ([xshift=0.5em,yshift=1em]a2.west){0}; \node[above] at ([xshift=2em,yshift=1em]a2.west){1}; \node[below] at ([xshift=-0.5em,yshift=0em]a2.west){-1}; \node [anchor=west] (x) at ([xshift=-3.5cm,yshift=2em]a2.north) {\scriptsize{ $w=\begin{bmatrix} 1&0&0\\ 0&-1&0\\ 0&0&1 \end{bmatrix}$} }; \node [anchor=west,rotate = 180] (x) at ([xshift=0.7em,yshift=1em]a2.south) {\Large{$\textbf{F}$}}; \draw[-stealth, line width=2pt,dashed] ([xshift=4em,yshift=0em]a1.south) to ([xshift=-3em,yshift=0em]a2.north); } \visible<4->{ \node [anchor=center,fill=purple!20] (b2) at (b) {\Large{$\textbf{b}$}}; \node [anchor=west] (blabel) at ([xshift=1.5em]b2.east) {ƽ��(shift)}; \draw [<-] ([xshift=0.2em]b2.east) -- (blabel.west); \tikzstyle{neuron} = [rectangle,draw,thick,fill=red!30,red!35,minimum height=2em,minimum width=2em,font=\small] \node[neuron,anchor=north] (a3) at ([xshift=11em,yshift=2.05em]a2.south) {}; \draw[->,thick] ([xshift=-3em,yshift=0em]a3.north) to ([xshift=2em,yshift=0em]a3.north); \draw[->,thick] ([xshift=-1em,yshift=-2em]a3.west) to ([xshift=-1em,yshift=4em]a3.west); \node[above] at ([xshift=-0.5em,yshift=1em]a3.west){0}; \node[above] at ([xshift=1em,yshift=1em]a3.west){1}; \node[left] at ([xshift=-0.75em,yshift=-0.5em]a3.west){-1}; \node [anchor=west,rotate = 180] (x) at ([xshift=0.7em,yshift=1em]a3.south) {\Large{$\textbf{F}$}}; \node [anchor=west] (x) at ([xshift=-4cm,yshift=2em]a3.north) {\scriptsize{ $b=\begin{bmatrix} 0.5&0&0\\ 0&0&0\\ 0&0&0 \end{bmatrix}$} }; \draw[-stealth, line width=2pt,dashed] ([xshift=3em,yshift=1em]a2.east) to ([xshift=-3em,yshift=1em]a3.west); } \end{tikzpicture} \end{center} } \end{frame} %%%------------------------------------------------------------------------------------------------------------ %%% ���Ա任�������ӵ�ʵ�� \begin{frame}[fragile]{���Ա任������} \begin{itemize} \item ���Ա任Ҳ�����ڸ��Ӹ��ӵ��������Ҳ���������ṩ����ϲ�ͬ���ݷֲ������� \begin{itemize} \item ���磬���ǿ�����άͼ��ͶӰ����άƽ���� \item �ٱ��磬����Ҳ���Ѷ�άƽ���ϵ�ͼ��ӳ�䵽��άƽ�� \end{itemize} \end{itemize} \begin{tiny} $$ \begin{smallmatrix} \underbrace{ \left\{ \begin{smallmatrix} \left[ \begin{array}{cccc} 1& 0 &0 \\ 0& 1 &0 \\ 0& 0 &1 \end{array} \right ] \cdots \left[ \begin{array}{cccc} 1& 0 &0 \\ 0& 1 &0 \\ 0& 0 &1 \end{array} \right] \end{smallmatrix} \right\} }\\5 \end{smallmatrix} \times \begin{smallmatrix} \left[ \begin{array}{cccc} 1\\ 1\\ 1 \end{array} \right ] \end{smallmatrix} = \begin{smallmatrix} \underbrace{ \left\{ \begin{smallmatrix} \left[ \begin{array}{cccc} 1 \\ 1 \\ 1 \end{array} \right ] \cdots \left[ \begin{array}{cccc} 1 \\ 1 \\ 1 \end{array} \right] \end{smallmatrix} \right\} }\\5 \end{smallmatrix} $$ \end{tiny} %\vspace{1em} \newcommand{\plane}[1]{ (-1.95, #1, 1.35) -- ++(3.6, 0.6, 0.0) -- ++(0.3, -1.8, -2.7) -- ++(-3.6, -0.6, -0.0) -- cycle} \newcommand{\nullspacepicture}{ % bottom part of the row space line \draw (0,0,0) -- (0.3,-1.8,1.233); % five planes \draw[fill=gray!20]\plane{-0.2}; \draw[fill=gray!20]\plane{0.2}; \draw[fill=blue!70!gray]\plane{0.6}; \draw[fill=gray!20]\plane{1}; \draw[fill=gray!20]\plane{1.4}; % top part of the row space line \draw (-.094,.562,-.385) -- (-0.3,1.8,-1.233); } \newcommand{\rangepicture}[1]{ % axes \draw[help lines,->] (-2,0) -- (2,0); \draw[help lines,->] (0,-2) -- (0,2); % the line and circles \draw (1,-2) -- (-1,2); \draw[fill=#1] (0,0) circle (2.5pt); \draw[fill=gray!50] (0.2,-0.4) circle (2.5pt); \draw[fill=gray!50] (0.4,-0.8) circle (2.5pt); \draw[fill=gray!50] (-0.2,0.4) circle (2.5pt); \draw[fill=gray!50] (-0.4,0.8) circle (2.5pt); } \begin{tikzpicture}[scale=0.95] \centering \nullspacepicture % the label \node at (-2,1.8) {$\mathbb{R}^3$}; % arrow between diagrams \path[->] (3,0) edge[bend left] node[above] {���Ա任} (4.5,0); \begin{scope}[xshift=7cm] \rangepicture{blue!70!gray} \node at (1.8,1.8) {$\mathbb{R}^2$}; \end{scope} \end{tikzpicture} \end{frame} %%%------------------------------------------------------------------------------------------------------------ %%% ����� \begin{frame}{�����} \begin{itemize} \item ������������Ϊ�˽��ʵ�������е�\alert{������}�任 \begin{itemize} \item �����Բ����ṩ��������⺯�����������Ժ���ܣ� \end{itemize} \end{itemize} \vspace{-1em} \begin{center} \begin{tikzpicture} \begin{scope} \draw [line width=3pt,ublue,-](0,0) -- (-2.0,1); \node [anchor=north] (linelabel) at (-1.0,-0.5) {\footnotesize{����һ������}}; \end{scope} \begin{scope}[xshift=10em] \draw [line width=3pt,ublue,-,line cap=round](0,0) .. controls (-0.5,-0.25) and (-0.5,1).. (-1.3,0.3) .. controls (-2.3,-0.3) and (-1.1,1.8).. (-2.0,1); \node [] at (-2,1) {\white{$\cdot$}}; \node [anchor=north] (linelabel) at (-1.0,-0.5) {\footnotesize{����һֻ���}}; \end{scope} \end{tikzpicture} \end{center} \begin{itemize} \item<2-> �ķ����Ժ��� \end{itemize} \vspace{-1em} \visible<2->{ \begin{center} \begin{tikzpicture} \begin{scope}[] \draw [->,thick] (-1.5,0) -- (1.5,0); \draw [->,thick] (0,-0.1) -- (0,1.5); \draw [-,very thick,ublue,domain=-1.2:1.2,samples=100] plot (\x,{0.5 * (\x -0.3)^2 + 0.2}); \node [anchor=west] (ylabel) at (0,1.3) {$y$}; \node [anchor=north] (xlabel) at (1.3,0) {$x$}; \node [anchor=north] (func) at (0,-0.8) {\footnotesize{$y = \frac{1}{2} (x - 0.3)^2 + 0.2$}}; \node [anchor=south west] (flabel) at (func.north west) {\footnotesize{Quadratic:}}; \end{scope} \begin{scope}[xshift=9.5em] \draw [->,thick] (-1.5,0) -- (1.5,0); \draw [->,thick] (0,-0.1) -- (0,1.5); \draw [-,very thick,ublue,domain=-1.2:1.2,samples=100] plot (\x, {0.5 * exp(\x)}); \node [anchor=west] (ylabel) at (0,1.3) {$y$}; \node [anchor=north] (xlabel) at (1.3,0) {$x$}; \node [anchor=north] (func) at (0,-0.8) {\footnotesize{$y = 0.5 \cdot \exp(x)$}}; \node [anchor=south west] (flabel) at ([xshift=-1.8em]func.north west) {\footnotesize{Exponential:}}; \end{scope} \begin{scope}[xshift=19em] \draw [->,thick] (-1.5,0) -- (1.5,0); \draw [->,thick] (0,-0.1) -- (0,1.5); \draw [-,very thick,ublue,domain=-1.1:1.2,samples=100] plot (\x,{abs(\x -0.2) + 0.1}); \node [anchor=west] (ylabel) at (0,1.3) {$y$}; \node [anchor=north] (xlabel) at (1.3,0) {$x$}; \node [anchor=north] (func) at (0,-0.8) {\footnotesize{$y = |x - 0.3| + 0.1$}}; \node [anchor=south west] (flabel) at ([xshift=-0.4em]func.north west) {\footnotesize{Absolute:}}; \end{scope} \end{tikzpicture} \end{center} } \end{frame} %%%------------------------------------------------------------------------------------------------------------ %%% ���õļ���� \begin{frame}{���õļ����} \begin{itemize} \item �ö�ö࣬�оٲ�ȫ ... \end{itemize} \vspace{-1em} \begin{figure} \subfigure[softplus]{ \centering \begin{minipage}{.2\textwidth} \begin{tikzpicture} \draw[->](-1.2,0)--(1.2,0)node[left,below,font=\tiny]{$x$}; \draw[->](0,-1.2)--(0,1.2)node[right,font=\tiny]{$y$}; \foreach \x in {-1.0,-0.5,0.0,0.5,1.0}{\draw(\x,0)--(\x,0.05)node[below,outer sep=2pt,font=\tiny]at(\x,0){\x};} \foreach \y in {1.0,0.5}{\draw(0,\y)--(0.05,\y)node[left,outer sep=2pt,font=\tiny]at(0,\y){\y};} \draw[color=red ,domain=-1.2:1]plot(\x,{ln(1+(exp(\x))}); \node[black,anchor=south] at (0,1.2) {\small $y = ln(1+e^x)$}; \end{tikzpicture} \end{minipage}% } \hfill \subfigure[sigmoid]{ \centering \begin{minipage}{.2\textwidth} \begin{tikzpicture} \draw[->](-1.2,0)--(1.2,0)node[left,below,font=\tiny]{$x$}; \draw[->](0,-1.2)--(0,1.2)node[right,font=\tiny]{$y$}; \draw[dashed](-1.2,1)--(1.2,1); \foreach \x in {-1,-0.5,0,0.5,1}{\draw(\x,0)--(\x,0.05)node[below,outer sep=2pt,font=\tiny]at(\x,0){ \pgfmathparse{(\x)*5} \pgfmathresult};} \foreach \y in {0.5,1.0}{\draw(0,\y)--(0.05,\y)node[left,outer sep=2pt,font=\tiny]at(0,\y){\y};} \draw[color=red,domain=-1.2:1.2]plot(\x,{1/(1+(exp(-5*\x)))}); \node[black,anchor=south] at (0,1.2) {\small $y = \frac{1}{1+e^{-x}}$}; \end{tikzpicture} \end{minipage}% } \hfill \subfigure[tanh]{ \centering \begin{minipage}{.2\textwidth} \begin{tikzpicture} \draw[->](-1.2,0)--(1.2,0)node[left,below,font=\tiny]{$x$}; \draw[->](0,-1.2)--(0,1.2)node[right,font=\tiny]{$y$}; \draw[dashed](-1.2,1)--(1.2,1); \draw[dashed](-1.2,-1)--(1.2,-1); \foreach \x in {-1.0,-0.5,0.0,0.5,1.0}{\draw(\x,0)--(\x,0.05)node[below,outer sep=2pt,font=\tiny]at(\x,0){\x};} \foreach \y in {0.5,1.0}{\draw(0,\y)--(0.05,\y)node[left,outer sep=2pt,font=\tiny]at(0,\y){\y};} \draw[color=red ,domain=-1.2:1.2]plot(\x,{tanh(\x)}); \node[black,anchor=south] at (0,1.2) {\small $y = \frac{e^{x}-e^{-x}}{e^{x}+e^{-x}}$}; \end{tikzpicture} \end{minipage} } \end{figure} \vspace{-1em} \begin{figure} \subfigure[relu]{ \centering \begin{minipage}{.2\textwidth} \begin{tikzpicture} \draw[->](-1.2,0)--(1.2,0)node[left,below,font=\tiny]{$x$}; \draw[->](0,-1.2)--(0,1.2)node[right,font=\tiny]{$y$}; \draw[dashed](-1.2,1)--(1.2,1); \draw[dashed](-1.2,-1)--(1.2,-1); \foreach \x in {-1.0,-0.5,0.0,0.5,1.0}{\draw(\x,0)--(\x,0.05)node[below,outer sep=2pt,font=\tiny]at(\x,0){\x};} \foreach \y in {0.5,1.0}{\draw(0,\y)--(0.05,\y)node[left,outer sep=2pt,font=\tiny]at(0,\y){\y};} \draw[color=red ,domain=-1.2:1.2]plot(\x,{max(\x,0)}); \node[black,anchor=south] at (0,1.2) {\small $y =\max (0, x)$}; \end{tikzpicture} \end{minipage}% } \hfill \subfigure[gaussian]{ \centering \begin{minipage}{.2\textwidth} \begin{tikzpicture} \draw[->](-1.2,0)--(1.2,0)node[left,below,font=\tiny]{$x$}; \draw[->](0,-1.2)--(0,1.2)node[right,font=\tiny]{$y$}; \draw[dashed](-1.2,1)--(1.2,1); \foreach \x in {-1.0,-0.5,0.0,0.5,1.0}{\draw(\x,0)--(\x,0.05)node[below,outer sep=2pt,font=\tiny]at(\x,0){\x};} \foreach \y in {0.5,1.0}{\draw(0,\y)--(0.05,\y)node[left,outer sep=2pt,font=\tiny]at(0,\y){\y};} \draw[color=red ,domain=-1.2:1.2]plot(\x,{exp(-1*((\x)^2))}); \node[black,anchor=south] at (0,1.2) {\small $y =e^{-x^2}$}; \end{tikzpicture} \end{minipage}% } \hfill \subfigure[identity]{ \centering \begin{minipage}{.2\textwidth} \begin{tikzpicture} \draw[->](-1.2,0)--(1.2,0)node[left,below,font=\tiny]{$x$}; \draw[->](0,-1.2)--(0,1.2)node[right,font=\tiny]{$y$}; \foreach \x in {-1.0,-0.5,0.0,0.5,1.0}{\draw(\x,0)--(\x,0.05)node[below,outer sep=2pt,font=\tiny]at(\x,0){\x};} \foreach \y in {0.5,1.0}{\draw(0,\y)--(0.05,\y)node[left,outer sep=2pt,font=\tiny]at(0,\y){\y};} \draw[color=red ,domain=-1:1]plot(\x,\x); \node[black,anchor=south] at (0,1.2) {\small $y =x$}; \end{tikzpicture} \end{minipage} } \end{figure} \end{frame} %%%------------------------------------------------------------------------------------------------------------ \subsection{���������} %%%------------------------------------------------------------------------------------------------------------ %%% һ�� -> ��� \begin{frame}{����IJ�} \begin{itemize} \item \textbf{����������}�����Ա任 + ������������ԣ� \item ���ǿ����ظ�����Ĺ��̣�����\textbf{���������} \end{itemize} \vspace{-1.0em} \begin{center} \begin{tikzpicture} \begin{scope}[] \def\neuronsep{1.6} \tikzstyle{neuronnode} = [minimum size=1.7em,circle,draw,ublue,very thick,inner sep=1pt, fill=white,align=center,drop shadow={shadow xshift=0.1em,shadow yshift=-0.1em}] %%% layer 1 \foreach \n in {1,...,5}{ \node [neuronnode] (neuron0\n) at (\n * \neuronsep,0) {\tiny{$f_1$}\\[-1ex] \tiny{$\sum$}}; \draw [-,ublue] (neuron0\n.east) -- (neuron0\n.west); } \foreach \n in {1,...,5}{ \foreach \m in {1,...,5}{ \draw [<-] (neuron0\m.south) -- ([yshift=-2em]neuron0\n.south); } \node [anchor=north] (x\n) at ([yshift=-2em]neuron0\n.south) {$x_\n$}; \visible<1>{ \draw [<-,thick] ([yshift=1.5em]neuron0\n.north) -- (neuron0\n.north); \node [anchor=south] (y\n) at ([yshift=1.5em]neuron0\n.north) {$y_\n$}; } } \node [anchor=west] (w1label) at ([xshift=-0.5em,yshift=0.5em]x5.north east) {$\textbf{w}_1$}; \begin{pgfonlayer}{background} \node [rectangle,inner sep=0.2em,fill=red!20] [fit = (neuron01) (neuron05)] (layer01) {}; \end{pgfonlayer} \node [anchor=west] (layer00label) at ([xshift=1.25em]x5.east) {\alert{�����}}; \visible<2->{ \node [anchor=west] (layer01label) at ([xshift=1em]layer01.east) {�ڶ���}; } \visible<4->{ \node [anchor=west] (layer01label2) at (layer01label.east) {(\alert{����})}; } %%% layer 2 \visible<2->{ \foreach \n in {2,...,4}{ \node [neuronnode] (neuron1\n) at (\n * \neuronsep,4em) {\tiny{$f_2$}\\[-1ex] \tiny{$\sum$}}; \draw [-,ublue] (neuron1\n.east) -- (neuron1\n.west); } \foreach \n in {2,...,4}{ \foreach \m in {1,...,5}{ \draw [<-] (neuron1\n.south) -- (neuron0\m.north); } \visible<2>{ \draw [<-,thick] ([yshift=1.5em]neuron1\n.north) -- (neuron1\n.north); \node [anchor=south] (y\n) at ([yshift=1.5em]neuron1\n.north) {$y_\n$}; } } \node [anchor=west] (w2label) at ([xshift=-2.5em,yshift=5.0em]x5.north east) {$\textbf{w}_2$}; \begin{pgfonlayer}{background} \visible<2->{ \node [rectangle,inner sep=0.2em,fill=ugreen!20] [fit = (neuron12) (neuron14)] (layer02) {}; } \end{pgfonlayer} \node [anchor=west] (layer02label) at ([xshift=4.9em]layer02.east) {������}; \visible<4->{ \node [anchor=west] (layer02label2) at (layer02label.east) {(\alert{����})}; } } %%% layer 3 \visible<3->{ \foreach \n in {1,...,5}{ \node [neuronnode] (neuron2\n) at (\n * \neuronsep,8em) {\tiny{$f_3$}\\[-1ex] \tiny{$\sum$}}; \draw [-,ublue] (neuron2\n.east) -- (neuron2\n.west); } \foreach \n in {1,...,5}{ \foreach \m in {2,...,4}{ \draw [<-] (neuron2\n.south) -- (neuron1\m.north); } \node [anchor=south] (y\n) at ([yshift=1.5em]neuron2\n.north) {$y_\n$}; \draw [<-,thick] ([yshift=1.5em]neuron2\n.north) -- (neuron2\n.north); } \node [anchor=west] (w3label) at ([xshift=-2.5em,yshift=8.5em]x5.north east) {$\textbf{w}_3$}; \begin{pgfonlayer}{background} \visible<3->{ \node [rectangle,inner sep=0.2em,fill=blue!20] [fit = (neuron21) (neuron25)] (layer03) {}; } \end{pgfonlayer} \node [anchor=west] (layer03label) at ([xshift=1em]layer03.east) {���IJ�}; \visible<4->{ \node [anchor=west] (layer03label2) at (layer03label.east) {(\alert{�����})}; } } \end{scope} \end{tikzpicture} \end{center} \end{frame} %%%------------------------------------------------------------------------------------------------------------ %%% ������������Աƽ��κκ��� \begin{frame}{�����������Աƽ����⺯��} \begin{itemize} \item ��һ������������Ϊ�������㼤�����sigmoid�� \end{itemize} \begin{center} \begin{tikzpicture} %% a two-layer neural network \begin{scope} \tikzstyle{neuronnode} = [minimum size=1.7em,circle,draw,ublue,very thick,inner sep=1pt, fill=white,align=center,drop shadow={shadow xshift=0.1em,shadow yshift=-0.1em}] %% input and hidden layers \node [neuronnode] (n10) at (0,0) {\tiny{$f$}\\[-1ex] \tiny{$\sum$}}; \node [neuronnode] (n11) at (1.5,0) {\tiny{$f$}\\[-1ex] \tiny{$\sum$}}; \draw [-,ublue] (n10.west) -- (n10.east); \draw [-,ublue] (n11.west) -- (n11.east); \node [anchor=north] (x1) at ([yshift=-6em]n11.south) {$x_1$}; \node [anchor=north] (b) at ([yshift=-6em]n10.south) {$b$}; \visible<1-10>{ \draw [->,thick] (b.north) -- ([yshift=-0.1em]n10.south); \draw [->,thick] (x1.north) -- ([yshift=-0.1em]n10.290); } \visible<1>{ \draw [->,thick] (b.north) -- ([yshift=-0.1em]n11.250); \draw [->,thick] (x1.north) -- ([yshift=-0.1em]n11.south); } \visible<11->{ \draw [->,thick,red] (b.north) -- ([yshift=-0.1em]n10.south); \draw [->,thick,ugreen] (x1.north) -- ([yshift=-0.1em]n10.290); } \visible<2->{ \draw [->,thick,blue] (b.north) -- ([yshift=-0.1em]n11.250); \draw [->,thick,purple] (x1.north) -- ([yshift=-0.1em]n11.south); } \visible<15->{ \node [neuronnode] (n12) at (2.7,0) {\tiny{$f$}\\[-1ex] \tiny{$\sum$}}; \node [neuronnode] (n13) at (3.8,0) {\tiny{$f$}\\[-1ex] \tiny{$\sum$}}; \draw [-,ublue] (n12.west) -- (n12.east); \draw [-,ublue] (n13.west) -- (n13.east); \draw [->,thick] (b.north) -- ([yshift=-0.1em]n12.250); \draw [->,thick] (x1.north) -- ([yshift=-0.1em]n12.270); \draw [->,thick] (b.north) -- ([yshift=-0.1em]n13.230); \draw [->,thick] (x1.north) -- ([yshift=-0.1em]n13.250); } \visible<16->{ \node [anchor=west] (morenodes) at (n13.east) {...}; } %% output layers \node [neuronnode] (n20) at (0.75,5em) {\scriptsize{$\sum$}}; \visible<1-10>{\draw [->,thick] ([yshift=0.1em]n10.north) -- ([yshift=-0.1em]n20.250);} \visible<1-8>{\draw [->,thick] ([yshift=0.1em]n11.north) -- ([yshift=-0.1em]n20.290);} \visible<11->{\draw [->,thick,brown] ([yshift=0.1em]n10.north) -- ([yshift=-0.1em]n20.250);} \visible<9->{\draw [->,thick,orange] ([yshift=0.1em]n11.north) -- ([yshift=-0.1em]n20.290);} \node [] (y) at ([yshift=3em]n20.north) {$y$}; \draw [->,thick] ([yshift=0.1em]n20.north) -- (y.south); \visible<15->{ \draw [->,thick] ([yshift=0.1em]n12.north) -- ([yshift=-0.1em]n20.310); \draw [->,thick] ([yshift=0.1em]n13.north) -- ([yshift=-0.1em]n20.330); } %% weight and bias \visible<11->{\node [anchor=center,rotate=90,fill=white,inner sep=1pt] (b0) at ([yshift=3em,xshift=-0.5em]b.north) {\tiny{$b=-6$}};} \visible<11->{\node [anchor=center,rotate=-59,fill=white,inner sep=1pt] (w2) at ([yshift=1.2em,xshift=-1.2em]x1.north) {\tiny{$w=100$}};} \visible<2-6>{\node [anchor=center,rotate=59,fill=white,inner sep=1pt] (b1) at ([yshift=4.9em,xshift=2.2em]b.north) {\tiny{$b=0$}};} \visible<7>{\node [anchor=center,rotate=59,fill=white,inner sep=1pt] (b1) at ([yshift=4.9em,xshift=2.2em]b.north) {\tiny{$b=-2$}};} \visible<8->{\node [anchor=center,rotate=59,fill=white,inner sep=1pt] (b1) at ([yshift=4.9em,xshift=2.2em]b.north) {\tiny{$b=-4$}};} \visible<2-4>{\node [anchor=center,rotate=90,fill=white,inner sep=1pt] (w1) at ([yshift=3em,xshift=0.5em]x1.north) {\tiny{$w=1$}};} \visible<5>{\node [anchor=center,rotate=90,fill=white,inner sep=1pt] (w1) at ([yshift=3em,xshift=0.5em]x1.north) {\tiny{$w=10$}};} \visible<6->{\node [anchor=center,rotate=90,fill=white,inner sep=1pt] (w1) at ([yshift=3em,xshift=0.5em]x1.north) {\tiny{$w=100$}};} \visible<11>{\node [anchor=center,rotate=62,fill=white,inner sep=1pt] (w21) at ([yshift=2em,xshift=0.5em]n10.north) {\tiny{$w'=0.7$}};} \visible<12->{\node [anchor=center,rotate=62,fill=white,inner sep=1pt] (w21) at ([yshift=2em,xshift=0.5em]n10.north) {\tiny{$w'=-0.7$}};} \visible<2-8>{\node [anchor=center,rotate=-62,fill=white,inner sep=1pt] (w22) at ([yshift=2em,xshift=-0.5em]n11.north) {\tiny{$w'=1$}};} \visible<9>{\node [anchor=center,rotate=-62,fill=white,inner sep=1pt] (w22) at ([yshift=2em,xshift=-0.5em]n11.north) {\tiny{$w'=0.9$}};} \visible<10->{\node [anchor=center,rotate=-62,fill=white,inner sep=1pt] (w22) at ([yshift=2em,xshift=-0.5em]n11.north) {\tiny{$w'=0.7$}};} %% sigmoid box \begin{scope} \visible<3->{ \node [anchor=west] (flabel) at ([xshift=1.2in]y.east) {\footnotesize{sigmoid:}}; \node [anchor=north east] (slabel) at ([xshift=0]flabel.south east) {\footnotesize{sum:}}; \node [anchor=west,inner sep=2pt] (flabel2) at (flabel.east) {\footnotesize{$f(s)=1/(1+e^{-s})$}}; \node [anchor=west,inner sep=2pt] (flabel3) at (slabel.east) {\footnotesize{$s=x_1 \cdot w + b$}}; \draw [->,thick,dotted] ([yshift=-0.3em,xshift=-0.1em]n11.60) .. controls +(east:1) and +(west:2) .. ([xshift=-0.2em]flabel.west) ; \begin{pgfonlayer}{background} \visible<3->{ \node [rectangle,inner sep=0.2em,fill=blue!20,drop shadow={shadow xshift=0.1em,shadow yshift=-0.1em}] [fit = (flabel) (flabel2) (flabel3)] (funcbox) {}; } \end{pgfonlayer} } \end{scope} %% output illustration \begin{scope}[xshift=2.8in,yshift=0.1in] \visible<4->{ \draw [->,thick] (-2.2,0) -- (2.2,0); \draw [->,thick] (0,0) -- (0,2); \draw [-] (-0.05,1) -- (0.05,1); \node [anchor=east,inner sep=1pt] (label1) at (0,1) {\tiny{1}}; \node [anchor=south east,inner sep=1pt] (label2) at (0,0) {\tiny{0}}; } \visible<4>{\draw [-,very thick,ublue,domain=-2:2,samples=100] plot (\x,{1/(1+exp(-2*\x))});} \visible<5>{\draw [-,very thick,ublue,domain=-2:2,samples=100] plot (\x,{1/(1+exp(-4*\x))});} \visible<6>{\draw [-,very thick,ublue,rounded corners=0.1em] (-2,0) -- (0,0) -- (0,1) -- (2,1);} \visible<7>{\draw [-,very thick,ublue,rounded corners=0.1em] (-2,0) -- (0.25,0) -- (0.25,1) -- (2,1);} \visible<8>{\draw [-,very thick,ublue,rounded corners=0.1em] (-2,0) -- (0.5,0) -- (0.5,1) -- (2,1);} \visible<9>{\draw [-,very thick,ublue,rounded corners=0.1em] (-2,0) -- (0.5,0) -- (0.5,0.9) -- (2,0.9);} \visible<10>{\draw [-,very thick,ublue,rounded corners=0.1em] (-2,0) -- (0.5,0) -- (0.5,0.7) -- (2,0.7);} \visible<11>{\draw [-,very thick,ublue,rounded corners=0.1em] (-2,0) -- (0.5,0) -- (0.5,0.7) -- (0.7,0.7) -- (0.7,1.4) -- (2,1.4);} \visible<12->{\draw [-,very thick,ublue,rounded corners=0.1em] (-2,0) -- (0.5,0) -- (0.5,0.7) -- (0.7,0.7) -- (0.7,0) -- (2,0);} \visible<15->{\draw [-,very thick,ublue,rounded corners=0.1em] (-2,0) -- (0.7,0) -- (0.7,0.6) -- (0.9,0.6) -- (0.9,0) -- (2,0);} \visible<14>{\draw [->,dashed] (0.6,-0.05) -- (0.6,-0.96in);} \visible<15->{\draw [->,dashed] (0.8,-0.05) -- (0.8,-0.98in);} \visible<4>{\node [anchor=north west,align=left] (wblabel) at (-2,2) {\scriptsize{$w_1=1$}\\[-0ex] \scriptsize{\ $b_1=0$}};} \visible<5>{\node [anchor=north west,align=left] (wblabel) at (-2,2) {\alert{\scriptsize{$w_1=10$}}\\[-0ex] \scriptsize{\ $b_1=0$}};} \visible<6>{\node [anchor=north west,align=left] (wblabel) at (-2,2) {\alert{\scriptsize{$w_1=100$}}\\[-0ex] \scriptsize{\ $b_1=0$}};} \visible<7>{\node [anchor=north west,align=left] (wblabel) at (-2,2) {\scriptsize{$w_1=100$}\\[-0ex] \alert{\scriptsize{\ $b_1=-2$}}};} \visible<8>{\node [anchor=north west,align=left] (wblabel) at (-2,2) {\scriptsize{$w_1=100$}\\[-0ex] \alert{\scriptsize{\ $b_1=-4$}}};} \visible<9>{\node [anchor=north west,align=left] (wblabel) at (-2,2) {\alert{\scriptsize{$w'_1=0.9$}}};} \visible<10>{\node [anchor=north west,align=left] (wblabel) at (-2,2) {\alert{\scriptsize{$w'_1=0.7$}}};} \visible<11>{\node [anchor=north west,align=left] (wblabel) at (-2,2) {\alert{\scriptsize{$w_2=100$}}\\[-0ex] \alert{\scriptsize{\ $b_2=-6$}}\\[-0ex] \alert{\scriptsize{\ $w'_2=0.7$}}};} \visible<12>{\node [anchor=north west,align=left] (wblabel) at (-2,2) {\scriptsize{$w_2=100$}\\[-0ex] \scriptsize{\ $b_2=-6$}\\[-0ex] \alert{\scriptsize{\ $w'_2=-0.7$}}};} \visible<13->{\node [anchor=north west,align=left] (wblabel) at (-2.5,2) {\scriptsize{����һ��}\\[-1ex] \scriptsize{step function}};} \end{scope} \begin{scope}[xshift=2.8in,yshift=-1.2in] \visible<13->{ \draw [->,thick] (-2.2,0) -- (2.2,0); \draw [->,thick] (0,0) -- (0,2); \draw [-,very thick,red,domain=-1.98:2,samples=100] plot (\x,{0.2 * (\x +0.4)^3 + 1.2 - 0.3 *(\x + 0.8)^2}); } \visible<14->{ \foreach \n in {0.5}{ \pgfmathsetmacro{\result}{0.2 * (\n + 0.1 + 0.4)^3 + 1.2 - 0.3 *(\n + 0.1 + 0.8)^2}; \draw [-,ublue,thick] (\n,0) -- (\n, \result) -- (\n + 0.2, \result) -- (\n + 0.2, 0); } } \visible<15->{ \foreach \n in {0.7}{ \pgfmathsetmacro{\result}{0.2 * (\n + 0.1 + 0.4)^3 + 1.2 - 0.3 *(\n + 0.1 + 0.8)^2}; \draw [-,ublue,thick] (\n,0) -- (\n, \result) -- (\n + 0.2, \result) -- (\n + 0.2, 0); } } \visible<16->{ \foreach \n in {-1.9,-1.7,...,1.9}{ \pgfmathsetmacro{\result}{0.2 * (\n + 0.1 + 0.4)^3 + 1.2 - 0.3 *(\n + 0.1 + 0.8)^2}; \draw [-,ublue,thick] (\n,0) -- (\n, \result) -- (\n + 0.2, \result) -- (\n + 0.2, 0); } } \visible<14>{\node [anchor=north west,align=left] (wblabel) at (-2.5,2.5) {\scriptsize{������ÿһ�ζ���}\\[-1ex] \scriptsize{��step function}\\[-1ex] \scriptsize{����}};} \visible<15>{\node [anchor=north west,align=left] (wblabel) at (-2.5,2.5) {\scriptsize{���������Ԫ}\\[-1ex] \scriptsize{������ϸ����}\\[-1ex] \scriptsize{����}};} \visible<16>{\node [anchor=north west,align=left] (wblabel) at (-2.5,2.5) {\scriptsize{�������㹻���}\\[-1ex] \scriptsize{������Ԫ����}\\[-1ex] \scriptsize{���\alert{���⺯��}}};} \end{scope} \end{scope} \end{tikzpicture} \end{center} \end{frame} %%%------------------------------------------------------------------------------------------------------------ \subsection{������ļ�ʵ�� - ��������} %%%------------------------------------------------------------------------------------------------------------ %%% outline: problem 2 \begin{frame}{Ȼ��} \vspace{6em} \begin{tcolorbox}[enhanced,size=normal,left=2mm,right=1mm,colback=red!5!white,colframe=red!75!black,drop fuzzy shadow] {\Large \textbf{�˹����������ѧ������ʲô,} \vspace{0.4em} \textbf{��α��ʵ��������ѧģ�ͣ�} } \end{tcolorbox} \vspace{1em} \begin{center} \begin{tikzpicture} \begin{scope}[yshift=6.5em,xshift=1em] \setcounter{mycount1}{1} \draw[step=0.5cm,color=orange,thick] (-1,-1) grid (0.5,0.5); \foreach \y in {+0.25,-0.25,-0.75} \foreach \x in {-0.75,-0.25,0.25}{ \node [fill=orange!20,inner sep=0pt,minimum height=0.49cm,minimum width=0.49cm] at (\x,\y) {\number\value{mycount1}}; \addtocounter{mycount1}{1}; } \end{scope} \begin{scope}[yshift=6em,xshift=0.5em] \setcounter{mycount2}{2} \draw[step=0.5cm,color=blue,thick] (-1,-1) grid (0.5,0.5); \foreach \y in {+0.25,-0.25,-0.75} \foreach \x in {-0.75,-0.25,0.25}{ \node [fill=blue!20,inner sep=0pt,minimum height=0.49cm,minimum width=0.49cm] at (\x,\y) {\number\value{mycount2}}; \addtocounter{mycount2}{1}; } \end{scope} \begin{scope}[yshift=5.5em,xshift=0em] \setcounter{mycount3}{3} \draw[step=0.5cm,color=ugreen,thick] (-1,-1) grid (0.5,0.5); \foreach \y in {+0.25,-0.25,-0.75} \foreach \x in {-0.75,-0.25,0.25}{ \node [fill=green!20,inner sep=0pt,minimum height=0.49cm,minimum width=0.49cm] at (\x,\y) {\number\value{mycount3}}; \addtocounter{mycount3}{1}; } \end{scope} \begin{scope}[yshift=5em,xshift=-0.5em] \setcounter{mycount4}{4} \draw[step=0.5cm,color=red,thick] (-1,-1) grid (0.5,0.5); \foreach \y in {+0.25,-0.25,-0.75} \foreach \x in {-0.75,-0.25,0.25}{ \node [fill=red!20,inner sep=0pt,minimum height=0.49cm,minimum width=0.49cm] at (\x,\y) {\number\value{mycount4}}; \addtocounter{mycount4}{1}; } \end{scope} \end{tikzpicture} \end{center} \end{frame} %%%------------------------------------------------------------------------------------------------------------ %%% ���� \begin{frame}{������������� - ��������} \begin{itemize} \item ���������磬����$\textbf{x}$�����$\textbf{y}$����ʽ�������������� \end{itemize} \begin{center} \begin{tikzpicture} \node [anchor=center] (y) at (0,0) {\LARGE{$\textbf{y}$}}; \node [anchor=west] (eq) at (y.east) {\LARGE{$=$}}; \node [anchor=west] (func) at (eq.east) {\LARGE{$f$}}; \node [anchor=west] (brace01) at (func.east) {\LARGE{$($}}; \node [anchor=west] (x) at (brace01.east) {\LARGE{$\textbf{x}$}}; \node [anchor=west] (dot) at (x.east) {\LARGE{$\cdot$}}; \node [anchor=west] (w) at (dot.east) {\LARGE{$\textbf{w}$}}; \node [anchor=west] (plus) at (w.east) {\LARGE{$+$}}; \node [anchor=west] (b) at (plus.east) {\LARGE{$\textbf{b}$}}; \node [anchor=west] (brace02) at (b.east) {\LARGE{$)$}}; \visible<2->{ \node [anchor=center,fill=yellow!30] (x2) at (x) {\LARGE{$\textbf{x}$}}; \node [anchor=south] (xlabel) at ([xshift=-3em,yshift=1.5em]x.north) {\alert{����������...}}; \draw [<-] ([yshift=0.2em,xshift=-0.5em]x2.north) -- ([xshift=1em]xlabel.south); \node [anchor=center,fill=red!20] (y2) at (y) {\LARGE{$\textbf{y}$}}; \draw [<-] ([yshift=0.2em,xshift=0.5em]y2.north) -- ([xshift=-1em]xlabel.south); \node [anchor=center,fill=green!20] (w2) at (w) {\LARGE{$\textbf{w}$}}; \node [anchor=north] (wlabel) at ([yshift=-1.0em]w.south) {���� e.g.,}; \draw [<-] ([yshift=-0.2em]w2.south) -- (wlabel.north); \node [anchor=west] (wsample) at ([xshift=-0.5em]wlabel.east) {\footnotesize{$\left(\begin{array}{c c} 1 & 2 \\ 3 & 4 \end{array}\right)$}}; \node [anchor=center,fill=purple!20] (b2) at (b) {\LARGE{$\textbf{b}$}}; \node [anchor=south] (blabel) at ([yshift=1.3em]b.north) {���� e.g.,}; \draw [<-] ([yshift=0.2em]b2.north) -- (blabel.south); \node [anchor=west] (bsample) at ([xshift=-0.5em]blabel.east) {\footnotesize{$(1, 3)$}}; } \end{tikzpicture} \end{center} \begin{itemize} \item<3-> $\textbf{x}$��$\textbf{y}$ʵ������һ����tensor�Ķ�������\textbf{����}�����磬 \end{itemize} \begin{center} \begin{tikzpicture} \begin{scope} \visible<4->{\node [anchor=west] (vector) at (0,0) {$\textbf{x} = (1, 3)$};} \visible<5->{\node [anchor=west] (matrix) at ([xshift=0.1in]vector.east) {$\textbf{x} = \left(\begin{array}{c c} -1 & 3 \\ 0.2 & 2 \end{array}\right)$};} \visible<6->{\node [anchor=west] (tensor3d) at ([xshift=0.1in]matrix.east) {ɶ��$\textbf{x} = \left(\begin{array}{c} \left(\begin{array}{c c} -1 & 3 \\ 0.2 & 2 \end{array}\right) \\ \left(\begin{array}{c c} -1 & 3 \\ 0.2 & 2 \end{array}\right) \end{array}\right)$};} \end{scope} \end{tikzpicture} \end{center} \end{frame} %%%------------------------------------------------------------------------------------------------------------ %%% �����ļ��� \begin{frame}{������ʲô} \begin{itemize} \item \textbf{���ѧϰ}�У�������``��"�ض���Ϊ\alert{��ά����} \begin{itemize} \item �����Ľף�rank����ʾ�ж��ٸ������ķ���ÿ����������ɶ��ά�ȱ�ʾ \end{itemize} \end{itemize} \begin{center} \begin{tikzpicture} \begin{scope} \visible<2->{ \node [anchor=north] (label) at (0,0) {����}; \node [anchor=center] (label2) at ([yshift=-0.7em]label.south) {scalar}; \node [anchor=center] (rank) at ([yshift=-1.5em]label2.center) {(rank=0)}; \node [anchor=center] (scalar) at ([yshift=5em]label.north) {\Huge{3}}; } \end{scope} \begin{scope}[xshift=1in] \visible<3->{ \node [anchor=north] (label) at (0,0) {����}; \node [anchor=center] (label2) at ([yshift=-0.7em]label.south) {vector}; \node [anchor=center] (rank) at ([yshift=-1.5em]label2.center) {(rank=1)}; \node [anchor=center] (scalar) at ([yshift=5em]label.north) {$\begin{pmatrix} 2 \\ .3 \\ -8 \\ .2\end{pmatrix}$}; } \end{scope} \begin{scope}[xshift=2in] \visible<4->{ \node [anchor=north] (label) at (0,0) {����}; \node [anchor=center] (label2) at ([yshift=-0.7em]label.south) {matrix}; \node [anchor=center] (rank) at ([yshift=-1.5em]label2.center) {(rank=2)}; \node [anchor=center] (scalar) at ([yshift=5em]label.north) {$\begin{pmatrix} 1 & 1 & 9 \\ 1 & 0 & 0 \\ 1 & -4 & 7 \end{pmatrix}$}; } \end{scope} \begin{scope}[xshift=3.2in] \visible<5->{ \node [anchor=north] (label) at (0,0) {3������}; \node [anchor=center] (label2) at ([yshift=-0.7em]label.south) {tensor}; \node [anchor=center] (rank) at ([yshift=-1.5em]label2.center) {(rank=3)}; } \begin{scope}[yshift=6.5em,xshift=1em] \visible<5->{ \setcounter{mycount1}{1} \draw[step=0.5cm,color=orange,thick] (-1,-1) grid (1,1); \foreach \y in {+0.75,+0.25,-0.25,-0.75} \foreach \x in {-0.75,-0.25,0.25,0.75}{ \node [fill=orange!20,inner sep=0pt,minimum height=0.49cm,minimum width=0.49cm] at (\x,\y) {\number\value{mycount1}}; \addtocounter{mycount1}{1}; } } \end{scope} \begin{scope}[yshift=6em,xshift=0.5em] \visible<5->{ \setcounter{mycount2}{1} \draw[step=0.5cm,color=blue,thick] (-1,-1) grid (1,1); \foreach \y in {+0.75,+0.25,-0.25,-0.75} \foreach \x in {-0.75,-0.25,0.25,0.75}{ \node [fill=blue!20,inner sep=0pt,minimum height=0.49cm,minimum width=0.49cm] at (\x,\y) {\number\value{mycount2}}; \addtocounter{mycount2}{1}; } } \end{scope} \begin{scope}[yshift=5.5em,xshift=0em] \visible<5->{ \setcounter{mycount3}{1} \draw[step=0.5cm,color=ugreen,thick] (-1,-1) grid (1,1); \foreach \y in {+0.75,+0.25,-0.25,-0.75} \foreach \x in {-0.75,-0.25,0.25,0.75}{ \node [fill=green!20,inner sep=0pt,minimum height=0.49cm,minimum width=0.49cm] at (\x,\y) {\number\value{mycount3}}; \addtocounter{mycount3}{1}; } } \end{scope} \begin{scope}[yshift=5em,xshift=-0.5em] \visible<5->{ \setcounter{mycount4}{1} \draw[step=0.5cm,color=red,thick] (-1,-1) grid (1,1); \foreach \y in {+0.75,+0.25,-0.25,-0.75} \foreach \x in {-0.75,-0.25,0.25,0.75}{ \node [fill=red!20,inner sep=0pt,minimum height=0.49cm,minimum width=0.49cm] at (\x,\y) {\number\value{mycount4}}; \addtocounter{mycount4}{1}; } } \end{scope} \end{scope} \end{tikzpicture} \end{center} \end{frame} %%%------------------------------------------------------------------------------------------------------------ %%% ������һ����ά���Ժ��� \begin{frame}{��ʵ�ϣ��������ǼĶ�ά���� - ��ţ��˽��� :)} \begin{itemize} \item \textbf{�dz������ε�˵}������\alert{����}�����;���ļ���չ������˵����ά����\alert{Ҳ����}����������ı�����ʽ \item<2-> �ϸ������ϣ������ǣ� \begin{enumerate} \item<2-> \textbf{�������Ķ���}������������ϵ�ı�ʱ����һ������ת����ϵ�ij����������һ���������ϵ������任���仯�ļ����������ζ��壩 \item<3-> \textbf{���ǿ������Ķ���}������������Э����ͨ�������˷�����������������壩 \item<4-> \textbf{�����Խ��͵Ķ���}��\alert{�����Ƕ������Ժ���}���Ƕ�����һЩ�����ռ�͵ѿ������ϵĶ�������ӳ�� \begin{itemize} \item ������Ϊ$T(v_0,...,v_r)$������������$r$������$\{v_0,...,v_r\}$ \item ����������ָ������ÿ�����룬�����������Եģ����磬����һ��$v_i$�������� \vspace{-0.3em} \begin{displaymath} T(v_0,...,v_i+c \cdot u,...,v_r) = T(v_0,...,v_i,...,v_r) + c \cdot T(v_0,...,u,...,v_r) \end{displaymath} ���У�$c$Ϊ��������������ʷdz���Ҫ���������Ƶ���ǰ����������塣 \end{itemize} \end{enumerate} \end{itemize} \end{frame} %%%------------------------------------------------------------------------------------------------------------ %%% ��һ������һ�������Ķ��� \begin{frame}{������һ��``��''������``����''} \begin{itemize} \item ������һ�£� \begin{itemize} \item ���һ�����������������ij��λ����ֻ��һ����ֵ�������DZ����������ܶ� \item �������ͬһ��λ�á��Ӷ���ķ����Ͽ����в�ͬ��ֵ�����������ǡ���þ���˹۲췽�������������������(rank$>$1)������Ӧ������ \end{itemize} \end{itemize} \vspace{-0.8em} \begin{center} \tdplotsetmaincoords{50}{140} \begin{tikzpicture}[scale=2,tdplot_main_coords] \visible<3->{ \draw[thick,->] (0,0,0) -- (1,0,0) node[anchor=north east]{$a$}; \draw[thick,->] (0,0,0) -- (0,1,0) node[anchor=north west]{$b$}; \draw[thick,->] (0,0,0) -- (0,0,1) node[anchor=south]{$c$}; } \pgfmathsetmacro{\ax}{2} \pgfmathsetmacro{\ay}{2} \pgfmathsetmacro{\az}{1} \tdplotsetrotatedcoords{20}{40}{00} \visible<4->{ \draw[thick,color=red,tdplot_rotated_coords,->] (0,0,0) -- (.7,0,0) node[anchor=east]{$a'$}; \draw[thick,color=green!50!black,tdplot_rotated_coords,->] (0,0,0) -- (0,.7,0) node[anchor=west]{$b'$}; \draw[thick,color=blue,tdplot_rotated_coords,->] (0,0,0) -- (0,0,.7) node[anchor=south]{$c'$}; } \tdplottransformmainrot{\ax}{\ay}{\az} \visible<3->{\node [anchor=west,inner sep=2pt] (coord1) at (-0.40in,-0.4in) {\footnotesize{����$v=(a,b,c)$}};} \visible<4->{\node [anchor=north west,inner sep=2pt] (coord2) at (coord1.south west) {\footnotesize{����$u=(\red{a'}\black{,}{\color{ugreen} b'}\black{,}\blue{c'}\black{)}$}};} \begin{scope}[xshift=0.4in,yshift=0.35in] \visible<2->{ \node [anchor=west,inner sep = 2pt] (description) at (0,0) {\small{$T(v,u)$��һ����ά�ռ�$(x,y,z)$�ϵ�}}; \node [anchor=north west,inner sep = 2pt] (description2) at (description.south west) {\small{2������������$v$��$u$����������}}; } \visible<5->{ \node [anchor=north west,inner sep=2pt] (T) at ([yshift=-2em]description2.south west) {\small{$T(v,u)=$}}; \node [anchor=west,inner sep=1pt] (T2) at (T.east) {\footnotesize{$\begin{pmatrix} v_x \\ v_y \\ v_z \end{pmatrix}^T$}}; \node [anchor=west,inner sep=1pt] (T3) at ([xshift=2pt]T2.east) {\footnotesize{$\begin{pmatrix} T_{xx} & T_{xy} & T_{xz} \\ T_{yx} & T_{yy} & T_{yz} \\ T_{zx} & T_{zy} & T_{zz} \end{pmatrix}$}}; \node [anchor=west,inner sep=1pt] (T4) at ([xshift=2pt]T3.east) {\footnotesize{$\begin{pmatrix} u_x \\ u_y \\ u_z \end{pmatrix}$}}; } \begin{pgfonlayer}{background} \visible<7->{ \node [rectangle,inner sep=0pt,fill=red!20,minimum height=3.5em,minimum width=7em] [fit = (T3) ] (TBox) {}; } \visible<6->{ \node [rectangle,inner sep=0pt,fill=green!20,minimum height=3.5em,minimum width=3em] [fit = (T2) ] (VBox) {}; \node [rectangle,inner sep=0pt,fill=blue!20,minimum height=3.5em,minimum width=2.5em] [fit = (T4) ] (UBox) {}; } \end{pgfonlayer} \visible<6->{ \draw [<-] (VBox.north) -- ([yshift=0.3em]VBox.north); \node [anchor=south,align=left] (Vlabel) at ([yshift=0.3em]VBox.north) {\scriptsize{$v$�ڻ������ϵ�ͶӰ}}; \draw [<-] (UBox.north) -- ([yshift=0.3em]UBox.north); \node [anchor=south,align=left] (Ulabel) at ([yshift=0.3em,xshift=-1em]UBox.north) {\scriptsize{$u$�ڻ������ϵ�ͶӰ}}; } \visible<7->{ \draw [<-] (TBox.south) -- ([yshift=-0.3em]TBox.south); \node [anchor=north,align=left] (Vlabel) at ([xshift=-0.5em,yshift=-0.3em]TBox.south) {\scriptsize{������$3 \times 3$�������ϵķ�����ǡ����``����''��ʾ��}}; \node [anchor=north west,align=left] (Vlabel2) at ([yshift=0.2em]Vlabel.south west) {\scriptsize{��Ϊ$[T]$������һ������ϵ����ת}}; } \end{scope} \end{tikzpicture} \end{center} \end{frame} %%%------------------------------------------------------------------------------------------------------------ %%% ��������ѧϰ�ж���һ������ \begin{frame}{``����''��``����''����չ������������ʹ������} \begin{itemize} \item ����ǰ��Ŀ��Ժ��� - �����``\alert{�������Ƕ�ά����}'' \begin{itemize} \item �����������Կ�����ѧ�ϵ�``����''����չ \end{itemize} \item<2-> ����$T(1:3)$��ʾһ��������������Ԫ��\\ \vspace{0.5em} \begin{tikzpicture} \begin{scope} \node [anchor=north east, inner sep=1pt] (label) at (0,0) {�����洢��}; \draw[step=0.5cm,thick] (0,-0.5) grid (1.5,0); \setcounter{mycount1}{1} \foreach \x in {0.25,0.75,1.25}{ \node [fill=green!20,inner sep=0pt,minimum height=0.49cm,minimum width=0.49cm] at (\x,-0.25) {$\number\value{mycount1}$}; \addtocounter{mycount1}{1}; } \end{scope} \end{tikzpicture} \item<3-> ����$T(1:2,1:3)$��ʾһ��$3 \times 2$�ľ���\\ \vspace{0.5em} \begin{tikzpicture} \begin{scope} \node [anchor=north east, inner sep=1pt] (label) at (0,0) {�����洢��}; \draw[step=0.5cm,thick] (0,-0.5) grid (3.0,0); \setcounter{mycount2}{1} \foreach \x in {0.25,0.75,1.25}{ \node [fill=green!20,inner sep=0pt,minimum height=0.49cm,minimum width=0.49cm] at (\x,-0.25) {$\number\value{mycount2}$}; \addtocounter{mycount2}{1}; } \foreach \x in {1.75,2.25,2.75}{ \node [fill=red!20,inner sep=0pt,minimum height=0.49cm,minimum width=0.49cm] at (\x,-0.25) {$\number\value{mycount2}$}; \addtocounter{mycount2}{1}; } \end{scope} \end{tikzpicture} \item<4-> ����$T(1:2,1:2,1:3)$��ʾһ��������������С��$3 \times 2 \times 2$\\ \vspace{0.5em} \begin{tikzpicture} \begin{scope} \node [anchor=north east, inner sep=1pt] (label) at (0,0) {�����洢��}; \draw[step=0.5cm,thick] (0,-0.5) grid (6.0,0); \setcounter{mycount3}{1} \foreach \x in {0.25,0.75,1.25}{ \node [fill=green!20,inner sep=0pt,minimum height=0.49cm,minimum width=0.49cm] at (\x,-0.25) {$\number\value{mycount3}$}; \addtocounter{mycount3}{1}; } \foreach \x in {1.75,2.25,2.75}{ \node [fill=red!20,inner sep=0pt,minimum height=0.49cm,minimum width=0.49cm] at (\x,-0.25) {$\number\value{mycount3}$}; \addtocounter{mycount3}{1}; } \foreach \x in {3.25,3.75,4.25}{ \node [fill=green!20,inner sep=0pt,minimum height=0.49cm,minimum width=0.49cm] at (\x,-0.25) {$\number\value{mycount3}$}; \addtocounter{mycount3}{1}; } \foreach \x in {4.75,5.25,5.75}{ \node [fill=red!20,inner sep=0pt,minimum height=0.49cm,minimum width=0.49cm] at (\x,-0.25) {$\number\value{mycount3}$}; \addtocounter{mycount3}{1}; } \draw[decorate,thick,decoration={brace,mirror,raise=0.2em}] (0,-0.50) -- (2.95,-0.50); \draw[decorate,thick,decoration={brace,mirror,raise=0.2em}] (3.05,-0.50) -- (6,-0.50); \node [anchor=north] (subtensor1) at (1.5,-0.6) {\footnotesize{$3 \times 2$ sub-tensor}}; \node [anchor=north] (subtensor1) at (4.5,-0.6) {\footnotesize{$3 \times 2$ sub-tensor}}; \end{scope} \end{tikzpicture} \item<5-> �߽����������飡���飡���飡 \begin{itemize} \item ��C++��Python�еĶ�ά����һģһ�� \end{itemize} \end{itemize} \end{frame} %%%------------------------------------------------------------------------------------------------------------ %%% ��������˷� \begin{frame}{�����ľ���˷�} \begin{itemize} \item ����������$\textbf{y}=f(\textbf{x}\cdot \textbf{w} + \textbf{b})$��$\textbf{x} \cdot \textbf{w}$��$\textbf{x} \times \textbf{w}$�����Ա任������$\textbf{x}$������������$\textbf{w}$��һ������ \begin{itemize} \item $\textbf{x} \cdot \textbf{w}$��ʾ���Ǿ���˷������Ϊ$\times$�� \item ע�⣬���ﲻ�������˷�����Ϊ�����˷������������� \item $\textbf{w}$��$n \times m$�ľ���$\textbf{x}$����״��$... \times n$����$\textbf{x}$�ĵ�һά����Ҫ��$\textbf{w}$��������С���\\ \vspace{0.5em} $\textbf{x}(1:4,1:4,\alert{1:4}) \times \textbf{w}(\alert{1:4},1:2) = \textbf{s}(1:4,1:4,1:2)$ \end{itemize} \end{itemize} \begin{center} \begin{tikzpicture} \begin{scope}[yshift=6.5em,xshift=1em] \visible<2->{ \setcounter{mycount1}{1} \draw[step=0.5cm,color=orange,thick] (-1,-1) grid (1,1); \foreach \y in {+0.75,+0.25,-0.25,-0.75} \foreach \x in {-0.75,-0.25,0.25,0.75}{ \node [fill=orange!20,inner sep=0pt,minimum height=0.49cm,minimum width=0.49cm] at (\x,\y) {$\number\value{mycount1}$}; \addtocounter{mycount1}{1}; } } \end{scope} \begin{scope}[yshift=6em,xshift=0.5em] \visible<2->{ \setcounter{mycount2}{2} \draw[step=0.5cm,color=blue,thick] (-1,-1) grid (1,1); \foreach \y in {+0.75,+0.25,-0.25,-0.75} \foreach \x in {-0.75,-0.25,0.25,0.75}{ \node [fill=blue!20,inner sep=0pt,minimum height=0.49cm,minimum width=0.49cm] at (\x,\y) {$\number\value{mycount2}$}; \addtocounter{mycount2}{1}; } } \end{scope} \begin{scope}[yshift=5.5em,xshift=0em] \visible<2->{ \setcounter{mycount3}{3} \draw[step=0.5cm,color=ugreen,thick] (-1,-1) grid (1,1); \foreach \y in {+0.75,+0.25,-0.25,-0.75} \foreach \x in {-0.75,-0.25,0.25,0.75}{ \node [fill=green!20,inner sep=0pt,minimum height=0.49cm,minimum width=0.49cm] at (\x,\y) {$\number\value{mycount3}$}; \addtocounter{mycount3}{1}; } } \end{scope} \begin{scope}[yshift=5em,xshift=-0.5em] \visible<2->{ \setcounter{mycount4}{4} \draw[step=0.5cm,color=red,thick] (-1,-1) grid (1,1); \foreach \y in {+0.75,+0.25,-0.25,-0.75} \foreach \x in {-0.75,-0.25,0.25,0.75}{ \node [fill=red!20,inner sep=0pt,minimum height=0.49cm,minimum width=0.49cm] at (\x,\y) {$\number\value{mycount4}$}; \addtocounter{mycount4}{1}; } \node [anchor=north] (xlabel) at (0,-1.2) {$\textbf{x}$}; } \end{scope} \begin{scope}[yshift=5em,xshift=1.5in] \visible<2->{ \draw[step=0.5cm,thick] (-0.5,-1) grid (0.5,1.0); \node [fill=black!20,inner sep=0pt,minimum height=0.49cm,minimum width=0.49cm] at (-0.25,0.75) {\small{$-1$}}; \node [fill=black!20,inner sep=0pt,minimum height=0.49cm,minimum width=0.49cm] at (-0.25,0.25) {$0$}; \node [fill=black!20,inner sep=0pt,minimum height=0.49cm,minimum width=0.49cm] at (-0.25,-0.25) {$1$}; \node [fill=black!20,inner sep=0pt,minimum height=0.49cm,minimum width=0.49cm] at (-0.25,-0.75) {$0$}; \node [fill=black!20,inner sep=0pt,minimum height=0.49cm,minimum width=0.49cm] at (0.25,0.75) {$0$}; \node [fill=black!20,inner sep=0pt,minimum height=0.49cm,minimum width=0.49cm] at (0.25,0.25) {\small{$-1$}}; \node [fill=black!20,inner sep=0pt,minimum height=0.49cm,minimum width=0.49cm] at (0.25,-0.25) {$1$}; \node [fill=black!20,inner sep=0pt,minimum height=0.49cm,minimum width=0.49cm] at (0.25,-0.75) {$0$}; \node [anchor=north] (xlabel) at (0,-1.2) {$\textbf{w}$}; } \visible<3>{\draw [->,thick,dashed] (-1.5in+2em+1.5em,-0.3) .. controls +(east:2) and +(west:1) .. (-0.55,0.8) node [pos=0.5,left] {\scriptsize{\textbf{�����}}};} \visible<4>{\draw [->,thick,dashed] (-1.5in+2em+1.0em,-0.5) .. controls +(east:2) and +(west:1) .. (-0.55,0.8) node [pos=0.5,left] {\scriptsize{\textbf{�����}}};} \visible<5>{\draw [->,thick,dashed] (-1.5in+2em+0.5em,-0.7) .. controls +(east:2.5) and +(west:1) .. (-0.55,0.8) node [pos=0.5,left] {\scriptsize{\textbf{�����}}};} \visible<6->{\draw [->,thick,dashed] (-1.5in+2em,-0.9) .. controls +(east:3) and +(west:1) .. (-0.55,0.8) node [pos=0.5,left] {\scriptsize{\textbf{�����}}};} \end{scope} \begin{scope}[yshift=6.5em,xshift=1em+3in] \visible<3->{ \draw[step=0.5cm,color=orange,thick] (-0.5,-1) grid (0.5,1.0); \foreach \y in {+0.75,+0.25,-0.25,-0.75}{ \setcounter{mycount1}{2} \foreach \x in {-0.25,0.25}{ \node [fill=orange!20,inner sep=0pt,minimum height=0.49cm,minimum width=0.49cm] at (\x,\y) {$\number\value{mycount1}$}; \addtocounter{mycount1}{-1}; } } } \end{scope} \begin{scope}[yshift=6em,xshift=0.5em+3in] \visible<4->{ \draw[step=0.5cm,color=blue,thick] (-0.5,-1) grid (0.5,1.0); \foreach \y in {+0.75,+0.25,-0.25,-0.75}{ \setcounter{mycount1}{2} \foreach \x in {-0.25,0.25}{ \node [fill=blue!20,inner sep=0pt,minimum height=0.49cm,minimum width=0.49cm] at (\x,\y) {$\number\value{mycount1}$}; \addtocounter{mycount1}{-1}; } } } \end{scope} \begin{scope}[yshift=5.5em,xshift=0em+3in] \visible<5->{ \draw[step=0.5cm,color=ugreen,thick] (-0.5,-1) grid (0.5,1.0); \foreach \y in {+0.75,+0.25,-0.25,-0.75}{ \setcounter{mycount1}{2} \foreach \x in {-0.25,0.25}{ \node [fill=green!20,inner sep=0pt,minimum height=0.49cm,minimum width=0.49cm] at (\x,\y) {$\number\value{mycount1}$}; \addtocounter{mycount1}{-1}; } } } \end{scope} \begin{scope}[yshift=5.0em,xshift=-0.5em+3in] \visible<6->{ \draw[step=0.5cm,color=red,thick] (-0.5,-1) grid (0.5,1.0); \foreach \y in {+0.75,+0.25,-0.25,-0.75}{ \setcounter{mycount1}{2} \foreach \x in {-0.25,0.25}{ \node [fill=red!20,inner sep=0pt,minimum height=0.49cm,minimum width=0.49cm] at (\x,\y) {$\number\value{mycount1}$}; \addtocounter{mycount1}{-1}; } } } \visible<3->{ \node [anchor=north] (xlabel) at (0,-1.2) {$\textbf{x} \cdot \textbf{w}$}; \node [anchor=center] (elabel) at (-0.7in,0) {\Huge{$\textbf{=}$}}; } \end{scope} \end{tikzpicture} \end{center} \end{frame} %%%------------------------------------------------------------------------------------------------------------ %%% �����ĵ�Ԫ���� \begin{frame}{�����ĵ�Ԫ����} \begin{itemize} \item ������$\textbf{y}=f(\textbf{x}\cdot \textbf{w} + \textbf{b})$Ҳ����һЩ�����ĵ�Ԫ������element-wise operation�� \begin{itemize} \item �ӷ���$\textbf{s}+\textbf{b}$������$\textbf{s} = \textbf{x}\cdot \textbf{w}$ \item �������$f(\cdot)$ \end{itemize} \item<2-> \textbf{��Ԫ��}���Ƕ������е�ÿ��λ�ö����мӷ� \begin{itemize} \item<3-> ��չ���ӷ���\textbf{�㲥}���ظ�����һ���������мӷ�������Ҫ������������״��ͬ \end{itemize} \end{itemize} \vspace{-1.5em} \begin{center} \begin{tikzpicture} \visible<3->{ \begin{scope} \setcounter{mycount1}{1} \draw[step=0.5cm,color=orange,thick] (-1,-0.5) grid (1,0.5); \foreach \y in {+0.25,-0.25} \foreach \x in {-0.75,-0.25,0.25,0.75}{ \node [fill=orange!20,inner sep=0pt,minimum height=0.49cm,minimum width=0.49cm] at (\x,\y) {$\number\value{mycount1}$}; \addtocounter{mycount1}{1}; } \node [anchor=south] (varlabel) at (0,0.6) {$\textbf{s}$}; \end{scope} \begin{scope}[xshift=1.5in] \setcounter{mycount1}{1} \draw[step=0.5cm,color=ugreen,thick] (-1,-0) grid (1,0.5); \foreach \y in {+0.25} \foreach \x in {-0.75,-0.25,0.25,0.75}{ \node [fill=green!20,inner sep=0pt,minimum height=0.49cm,minimum width=0.49cm] at (\x,\y) {$1$}; \addtocounter{mycount1}{1}; } \node [anchor=center] (plabel) at (-4.5em,0) {\huge{$\textbf{+}$}}; \node [anchor=south] (varlabel) at (0,0.6) {$\textbf{b}$}; \end{scope} \begin{scope}[xshift=3in] \setcounter{mycount1}{2} \draw[step=0.5cm,color=orange,thick] (-1,-0.5) grid (1,0.5); \foreach \y in {+0.25,-0.25} \foreach \x in {-0.75,-0.25,0.25,0.75}{ \node [fill=orange!20,inner sep=0pt,minimum height=0.49cm,minimum width=0.49cm] at (\x,\y) {$\number\value{mycount1}$}; \addtocounter{mycount1}{1}; } \node [anchor=center] (plabel) at (-4.5em,0) {\huge{$\textbf{=}$}}; \node [anchor=south] (varlabel) at (0,0.6) {$\textbf{s+b}$}; \end{scope} } \end{tikzpicture} \end{center} \vspace{-0.3em} \begin{itemize} \item<4-> ���Ƶģ����ǿ������������˷���Ҳ�������������Ҳ��������������������vectorization�� \end{itemize} \vspace{-0.5em} \visible<4->{ \begin{displaymath} \textrm{Relu} \Big( \begin{pmatrix} 2 \\ -.3 \end{pmatrix} \Big) = \begin{pmatrix} 2 \\ 0 \end{pmatrix} \end{displaymath} } \end{frame} %%%------------------------------------------------------------------------------------------------------------ %%% ���ѧϰ���߰� \begin{frame}{���ʵ�֣�- ��Դ����������} \begin{itemize} \item ʵ��������Ŀ�Դϵͳ�ܶ࣬һ�����õĹ��߰�NumPy \url{https://numpy.org/} \begin{itemize} \item Python�ӿڣ���ά����Ķ���ʹ�÷��� \item �ṩ��������ʾ��ʹ�õķ�ʽ \end{itemize} \item<2-> ������ܻ��������ܣ�TensorFlow��PyTorch \begin{itemize} \item Google��Facebook��Ʒ�������б�֤ \item ����ǿ�ӿڷḻ \item ���Խ��д��ģ�����Ӧ�� \item �����ɲο���ʵ�� \end{itemize} \includegraphics[scale=0.13]{./Figures/tensorflowpytorch.jpg} \item<3-> �����������ڸ��µ������ܣ� CNTK��MXNet��PaddlePaddle��Keras��Chainer�� dl4j��NiuTensor�� \end{itemize} \end{frame} %%%------------------------------------------------------------------------------------------------------------ %%% NiuTrans.Tensor���߰� \begin{frame}{NiuTensor} \begin{itemize} \item ����ʹ���������е�NiuTensor���߰����н�ѧ \url{http://www.niutrans.com/opensource/niutensor/index.html} \begin{itemize} \item ��С�ɣ������� \item C++���Ա�д������߶��Ż� \item ͬʱ֧��CPU��GPU�豸 \item �ḻ����������ӿ� \end{itemize} \end{itemize} \includegraphics[scale=0.35]{./Figures/niutensor.jpg} \end{frame} %%%------------------------------------------------------------------------------------------------------------ %%% ʹ��NiuTensor \begin{frame}{ʹ��NiuTensor} \begin{itemize} \item NiuTensor��ʹ�úܼ�������һ��C++���� \end{itemize} \begin{tcolorbox}[enhanced,frame engine=empty,boxrule=0.1mm,size=title,colback=blue!10!white] \begin{flushleft} {\scriptsize \begin{tabbing} \texttt{\#include "source/tensor/XTensor.h"} \hspace{4em} \= // ����XTensor�����ͷ�ļ� \\ \texttt{using namespace nts;} \> // ����nts�����ռ� \\ \ \\ \texttt{int main(int argc, const char ** argv)\{} \\ \ \ \ \ \texttt{XTensor tensor;} \> // ��������tensor \\ \ \ \ \ \texttt{InitTensor2D(\&tensor, 2, 2, X\_FLOAT);} \> // ��������Ϊ2*2�ľ��� \\ \ \ \ \ \texttt{tensor.SetDataRand();} \> // [0,1]���ȷֲ���ʼ������ \\ \ \ \ \ \texttt{tensor.Dump(stdout);} \> // ����������� \\ \ \ \ \ \texttt{return 0;}\\ \texttt{\}} \end{tabbing} } \end{flushleft} \end{tcolorbox} \begin{itemize} \item<2-> ��������������ʾ����ÿ��Ԫ�ص�ֵ \begin{itemize} \item<2-> ��������(order=2)����״��$2 \times 2$ (dimsize=2,2)�����������ǵ����ȸ���(dtype=X\_FLOAT)����ϡ��(dense=1.00) \end{itemize} \end{itemize} \vspace{-0em} \visible<2->{ \begin{tcolorbox}[enhanced,frame engine=empty,boxrule=0.1mm,size=title,colback=black!10!white] \begin{flushleft} {\scriptsize \begin{tabbing} \texttt{order=2 dimsize=2,2 dtype=X\_FLOAT dense=1.000000} \\ \texttt{3.605762e-001 2.992340e-001 1.393780e-001 7.301248e-001} \end{tabbing} } \end{flushleft} \end{tcolorbox} } \end{frame} %%%------------------------------------------------------------------------------------------------------------ %%% ����XTensor \begin{frame}{����XTensor} \begin{itemize} \item ��������XTensor��ʾ������InitTensor���壬������ \begin{itemize} \item ָ��XTensor���ͱ�����ָ�� \item �����Ľ� \item ��������ά�ȵĴ�С���봫ͳ��ά����Լ��һ���� \item �������������͵ȣ���ȱʡֵ�� \end{itemize} \end{itemize} \vspace{-0.3em} \begin{tcolorbox}[enhanced,frame engine=empty,boxrule=0.1mm,size=title,colback=blue!10!white] \begin{flushleft} {\scriptsize \begin{tabbing} \texttt{XTensor tensor;} \hspace{12em} \= // ��������tensor \\ \texttt{int sizes[6] = \{2,3,4,2,3,4\};} \> // ��������״Ϊ2*3*4*2*3*4 \\ \texttt{InitTensor(\&tensor, 6, sizes, X\_FLOAT);} \> // ������״Ϊsizes��6������ \end{tabbing} } \end{flushleft} \end{tcolorbox} \visible<2->{ \begin{itemize} \item �����Ķ��巽ʽ \end{itemize} \vspace{-0.2em} \begin{tcolorbox}[enhanced,frame engine=empty,boxrule=0.1mm,size=title,colback=blue!10!white] \begin{flushleft} {\scriptsize \begin{tabbing} \texttt{XTensor a, b, c;} \hspace{11.5em} \= // ��������tensor \\ \texttt{InitTensor1D(\&a, 10, X\_INT);} \> // 10ά������������\\ \texttt{InitTensor1D(\&b, 10);} \> // 10ά��������ȱʡ����(����)\\ \texttt{InitTensor4D(\&c, 10, 20, 30, 40);} \> // 10*20*30*40��4������(����) \end{tabbing} } \end{flushleft} \end{tcolorbox} } \visible<3->{ \begin{itemize} \item ֱ����GPU�϶������� \end{itemize} \vspace{-0.2em} \begin{tcolorbox}[enhanced,frame engine=empty,boxrule=0.1mm,size=title,colback=blue!10!white] \begin{flushleft} {\scriptsize \begin{tabbing} \texttt{XTensor tensorGPU;} \hspace{10.5em} \= // ��������tensor \\ \texttt{InitTensor2D(\&tensorGPU, 10, 20,} $\backslash$ \> // �ڱ��Ϊ0��GPU�϶������� \\ \hspace{6.7em} \texttt{X\_FLOAT, 0);} \end{tabbing} } \end{flushleft} \end{tcolorbox} } \end{frame} %%%------------------------------------------------------------------------------------------------------------ %%% XTensor�Ĵ������� \begin{frame}{��������} \begin{itemize} \item ���ֵ�Ԫ���ӣ�1�����㣩+��-��*��$\backslash$��Log��Exp�� Power��Absolute�ȣ�����Sigmoid��Softmax�ȼ���� \end{itemize} \vspace{-0.2em} \begin{tcolorbox}[enhanced,frame engine=empty,boxrule=0.1mm,size=title,colback=blue!10!white] \begin{flushleft} {\scriptsize \begin{tabbing} \texttt{XTensor a, b, c, d, e;} \hspace{7em} \= // ��������tensor \\ \texttt{InitTensor3D(\&a, 2, 3, 4);} \> // aΪ2*3*4��3������ \\ \texttt{InitTensor3D(\&b, 2, 3, 4);} \> // bΪ2*3*4��3������ \\ \texttt{InitTensor3D(\&c, 2, 3, 4);} \> // cΪ2*3*4��3������ \\ \texttt{a.SetDataRand();} \> // �����ʼ��a \\ \texttt{b.SetDataRand();} \> // �����ʼ��b \\ \texttt{c.SetDataRand();} \> // �����ʼ��c \\ \texttt{d = a + b * c;} \> // d����ֵΪ a + b * c \\ \texttt{d = ((a + b) * d - b / c ) * d;} \> // d���Ա�Ƕ��ʹ�� \\ \texttt{e = Sigmoid(d);} \> // d���������Sigmoid��ֵ��e \end{tabbing} } \end{flushleft} \end{tcolorbox} \visible<2->{ \begin{itemize} \item �߽����㣬��õ��Ǿ���˷�(MMul) \end{itemize} \vspace{-0.2em} \begin{tcolorbox}[enhanced,frame engine=empty,boxrule=0.1mm,size=title,colback=blue!10!white] \begin{flushleft} {\scriptsize \begin{tabbing} \texttt{XTensor a, b, c;} \hspace{10.0em} \= // ��������tensor \\ \texttt{InitTensor4D(\&a, 2, 2, 3, 4);} \> // aΪ2*2*3*4��4������ \\ \texttt{InitTensor2D(\&b, 4, 5);} \> // bΪ4*5�ľ��� \\ \texttt{a.SetDataRand();} \> // �����ʼ��a \\ \texttt{b.SetDataRand();} \> // �����ʼ��b \\ \texttt{c = MMul(a, b);} \> // ����˵Ľ��Ϊ2*2*3*5��4������ \end{tabbing} } \end{flushleft} \end{tcolorbox} } \end{frame} %%%------------------------------------------------------------------------------------------------------------ %%% XTensor���������� \begin{frame}{�������ú���} \begin{itemize} \item �����������в�ȫ�����Բο���վ�ϵ���ϸ˵�� \end{itemize} \footnotesize{ \begin{center} \begin{tabular}{l|l} ���� & ���� \\ \hline \texttt{a.Reshape(o, s)} & ��a�任Ϊ��Ϊo����״Ϊs������\\ \texttt{a.Get(pos)} & ȡ������λ��Ϊpos��Ԫ�� \\ \texttt{a.Set(v, pos)} & ��������λ��Ϊpos��Ԫ�ص�ֵ��Ϊv \\ \texttt{a.Dump(file)} & �������浽file�У�fileΪ�ļ���� \\ \texttt{a.Read(file)} & ��file�ж�ȡ������fileΪ�ļ���� \\ \hline \texttt{Power(a, p)} & ����ָ��$\textrm{a}^{\textrm{p}}$ \\ \texttt{Linear(a, s, b)} & ���� a * s + b��s��b����һ���� \\ \texttt{CopyValues(a)} & ����a��һ������ \\ \texttt{ReduceMax(a, d)} & ��a���ŷ���d���й�Լ���õ����ֵ \\ \texttt{ReduceSum(a, d)} & ��a���ŷ���d���й�Լ���õ��� \\ \texttt{Concatenate(a, b, d)} & ����������a��b��d������\\ \texttt{Merge(a, d)} & ������a��d����ϲ�\\ \texttt{Split(a, d, n)} & ������a��d������ѳ�n��\\ \hline \texttt{Sigmoid(a)} & ��a����Sigmoid�任 \\ \texttt{Softmax(a)} & ��a����Softmax�任�������һ������ \\ \texttt{HardTanH(a)} & ��a����hard tanh�任(˫�����еĽ���)\\ \texttt{Relu(a)} & ��a����Relu�任\\ \end{tabular} \end{center} } \end{frame} %%%------------------------------------------------------------------------------------------------------------ %%% ����XTensor���������� \begin{frame}{����������} \begin{itemize} \item ���Ժܷ���Ĺ���һ���������� \end{itemize} \begin{tcolorbox} [bicolor,sidebyside,righthand width=4cm,size=title,frame engine=empty, colback=blue!10!white,colbacklower=black!5!white] {\scriptsize \begin{tabbing} \texttt{XTensor x, y, w, b;} \\ \texttt{InitTensor3D(\&x, 3, 4, 5);} \\ \texttt{InitTensor2D(\&w, 5, 3);} \\ \texttt{InitTensor1D(\&b, 3);} \\ \texttt{...} \\ \texttt{y = Sigmoid(MMul(x, w) + b);} \end{tabbing} } \tcblower \begin{center} \begin{tikzpicture} \node [draw,circle,inner sep=2pt,fill=red!30!white,blur shadow={shadow xshift=1pt,shadow yshift=-1pt}] (x) at (0,0) {\footnotesize{$\textrm{x}$}}; \node [anchor=south,draw,rounded corners,inner sep=2pt,minimum width=4em,fill=green!30!white,blur shadow={shadow xshift=1pt,shadow yshift=-1pt}] (layer) at ([yshift=0.7em]x.north) {\scriptsize{layer}}; \node [anchor=south,draw,circle,inner sep=2pt,fill=red!30!white,blur shadow={shadow xshift=1pt,shadow yshift=-1pt}] (y) at ([yshift=0.7em]layer.north) {\scriptsize{$\textrm{y}$}}; \draw [thick,->] (x.north) -- (layer.south); \draw [thick,->] (layer.north) -- (y.south); \node [anchor=west,align=left] (xshape) at (x.east) {\tiny{shape: 3*4*5}}; \node [anchor=west,align=left] (yshape) at (y.east) {\tiny{shape: 3*4*3}}; \end{tikzpicture} \end{center} \end{tcolorbox} \visible<2->{ \begin{itemize} \item һ��������� \end{itemize} \begin{tcolorbox} [bicolor,sidebyside,righthand width=4cm,size=title,frame engine=empty, colback=blue!10!white,colbacklower=black!5!white] {\scriptsize \begin{tabbing} \texttt{XTensor x, y, h1, h2;} \\ \texttt{XTensor w1, b1, w2, w3;} \\ \texttt{InitTensor3D(\&x, 3, 4, 5);} \\ \texttt{InitTensor2D(\&w1, 5, 3);} \\ \texttt{InitTensor1D(\&b1, 3);} \\ \texttt{InitTensor2D(\&w2, 3, 6);} \\ \texttt{InitTensor2D(\&w3, 6, 4);} \\ \texttt{...} \\ \texttt{h1 = Sigmoid(MMul(x, w1) + b1);} \\ \texttt{h2 = HandTanH(MMul(h1, w2));} \\ \texttt{y = Relu(MMul(h2, w3));} \end{tabbing} } \tcblower \begin{center} \begin{tikzpicture} \node [draw,circle,inner sep=2pt,fill=red!30!white,blur shadow={shadow xshift=1pt,shadow yshift=-1pt}] (x) at (0,0) {\footnotesize{$\textrm{x}$}}; \node [anchor=south,draw,rounded corners,inner sep=2pt,minimum width=4em,fill=green!30!white,blur shadow={shadow xshift=1pt,shadow yshift=-1pt}] (layer1) at ([yshift=0.7em]x.north) {\scriptsize{layer1}}; \node [anchor=south,draw,rounded corners,inner sep=2pt,minimum width=4em,fill=green!30!white,blur shadow={shadow xshift=1pt,shadow yshift=-1pt}] (layer2) at ([yshift=1.0em]layer.north) {\scriptsize{layer2}}; \node [anchor=south,draw,rounded corners,inner sep=2pt,minimum width=4em,fill=green!30!white,blur shadow={shadow xshift=1pt,shadow yshift=-1pt}] (layer3) at ([yshift=1.0em]layer2.north) {\scriptsize{layer3}}; \node [anchor=south,draw,circle,inner sep=2pt,fill=red!30!white,blur shadow={shadow xshift=1pt,shadow yshift=-1pt}] (y) at ([yshift=0.7em]layer3.north) {\scriptsize{$\textrm{y}$}}; \draw [thick,->] (x.north) -- (layer1.south); \draw [thick,->] (layer1.north) -- (layer2.south); \draw [thick,->] (layer2.north) -- (layer3.south); \draw [thick,->] (layer3.north) -- (y.south); \node [anchor=west,align=left] (xshape) at (x.east) {\tiny{shape: 3*4*5}}; \node [anchor=west,align=left] (yshape) at (y.east) {\tiny{shape: 3*4*4}}; \node [anchor=south west,align=left,inner sep=2pt] (l1shape) at (layer1.north) {\tiny{shape: 3*4*3}}; \node [anchor=south west,align=left,inner sep=2pt] (l2shape) at (layer2.north) {\tiny{shape: 3*4*6}}; \end{tikzpicture} \end{center} \end{tcolorbox} } \end{frame} %%%------------------------------------------------------------------------------------------------------------ %%% ����XTensor���������ӵ������� \begin{frame}{������һ�������} \begin{itemize} \item �κ����綼���Թ���������RNN��Transformer �� \end{itemize} \begin{tcolorbox} [bicolor,sidebyside,righthand width=4cm,size=title,frame engine=empty, colback=blue!10!white,colbacklower=black!5!white] {\scriptsize \begin{tabbing} \texttt{XTensor x[3], y[3], r, wh;} \\ \texttt{XTensor h1, h2, w1, b1, h3, h4;} \\ \texttt{XList splits;} \\ \texttt{...} \\ \texttt{for(unsigned i = 0; i < 3; i++)\{} \\ \texttt{\hspace{2em}r = Concatenate(x[i] + r) * wh;}\\ \texttt{\hspace{2em}splits.Add(\&r);}\\ \texttt{\}}\\ \visible<2->{ \texttt{} \\ \texttt{h1 = Merge(splits, 0);}\\ \texttt{h2 = Relu(h1 * w1 + b1);}\\ \texttt{h3 = h1 + h2;} \\ \texttt{h4 = Softmax(h3);} \\ } \visible<3->{ \texttt{} \\ \texttt{Split(h4, splits, 0);} \\ \texttt{} \\ \texttt{for(unsigned i = 0; i < 3; i++)\{} \\ \texttt{\hspace{2em}y[i] = *(XTensor*)splits.Get(i);}\\ \texttt{\hspace{2em}y[i].Dump(stdout);}\\ \texttt{\}} } \end{tabbing} } \tcblower \begin{center} \begin{tikzpicture} \node [draw,circle,inner sep=1pt,fill=red!30!white,blur shadow={shadow xshift=1pt,shadow yshift=-1pt}] (x1) at (0,0) {\footnotesize{$\textrm{x}_1$}}; \node [anchor=west,draw,circle,inner sep=1pt,fill=red!30!white,blur shadow={shadow xshift=1pt,shadow yshift=-1pt}] (x2) at ([xshift=2em]x1.east) {\footnotesize{$\textrm{x}_2$}}; \node [anchor=west,draw,circle,inner sep=1pt,fill=red!30!white,blur shadow={shadow xshift=1pt,shadow yshift=-1pt}] (x3) at ([xshift=2em]x2.east) {\footnotesize{$\textrm{x}_3$}}; \node [anchor=south,draw,rounded corners,inner sep=2pt,minimum width=2.5em,fill=green!30!white,blur shadow={shadow xshift=1pt,shadow yshift=-1pt}] (rlayer1) at ([yshift=1em]x1.north) {\tiny{rlayer}}; \node [anchor=south,draw,rounded corners,inner sep=2pt,minimum width=2.5em,fill=green!30!white,blur shadow={shadow xshift=1pt,shadow yshift=-1pt}] (rlayer2) at ([yshift=1em]x2.north) {\tiny{rlayer}}; \node [anchor=south,draw,rounded corners,inner sep=2pt,minimum width=2.5em,fill=green!30!white,blur shadow={shadow xshift=1pt,shadow yshift=-1pt}] (rlayer3) at ([yshift=1em]x3.north) {\tiny{rlayer}}; \draw [->,thick] (x1.north) -- (rlayer1.south); \draw [->,thick] (x2.north) -- (rlayer2.south); \draw [->,thick] (x3.north) -- (rlayer3.south); \draw [->,thick] (rlayer1.east) -- (rlayer2.west); \draw [->,thick] (rlayer2.east) -- (rlayer3.west); \draw [->,thick] (rlayer1.north) -- ([yshift=1em]rlayer1.north); \draw [->,thick] (rlayer2.north) -- ([yshift=1em]rlayer2.north); \draw [->,thick] (rlayer3.north) -- ([yshift=1em]rlayer3.north); \visible<2->{ \node [anchor=south,draw,rounded corners,inner sep=2pt,minimum width=9.4em,minimum height=1.0em,fill=green!30!white,blur shadow={shadow xshift=1pt,shadow yshift=-1pt}] (h1) at ([yshift=1em]rlayer2.north) {\tiny{h1 = Merge($\cdot$)}}; \node [anchor=south,draw,rounded corners,inner sep=2pt,minimum width=9.4em,minimum height=1.0em,fill=green!30!white,blur shadow={shadow xshift=1pt,shadow yshift=-1pt}] (h2) at ([yshift=1em]h1.north) {\tiny{h2 = Relu($\cdot$)}}; \node [anchor=south,draw,rounded corners,inner sep=2pt,minimum width=9.4em,minimum height=1.0em,fill=green!30!white,blur shadow={shadow xshift=1pt,shadow yshift=-1pt}] (h3) at ([yshift=1em]h2.north) {\tiny{h3 = Sum($\cdot$)}}; \node [anchor=south,draw,rounded corners,inner sep=2pt,minimum width=9.4em,minimum height=1.0em,fill=green!30!white,blur shadow={shadow xshift=1pt,shadow yshift=-1pt}] (h4) at ([yshift=1em]h3.north) {\tiny{h4 = Softmax($\cdot$)}}; \draw [->,thick] (h1.north) -- (h2.south); \draw [->,thick] (h2.north) -- (h3.south); \draw [->,thick] (h3.north) -- (h4.south); \draw [->,thick,rounded corners] (h1.east) -- ([xshift=0.5em]h1.east) -- ([xshift=0.5em,yshift=0.5em]h2.north east) -- ([xshift=-2em,yshift=0.5em]h2.north east) -- ([xshift=-2em,yshift=1em]h2.north east); } \visible<3->{ \node [anchor=south,draw,rounded corners,inner sep=2pt,minimum width=9.4em,minimum height=1.0em,fill=green!30!white,blur shadow={shadow xshift=1pt,shadow yshift=-1pt}] (slayer) at ([yshift=1em]h4.north) {\tiny{Split($\cdot$)}}; \node [anchor=south,draw,circle,inner sep=1pt,fill=red!30!white,blur shadow={shadow xshift=1pt,shadow yshift=-1pt}] (y2) at ([yshift=1em]slayer.north) {\footnotesize{$\textrm{y}_2$}}; \node [anchor=east,draw,circle,inner sep=1pt,fill=red!30!white,blur shadow={shadow xshift=1pt,shadow yshift=-1pt}] (y1) at ([xshift=-2em]y2.west) {\footnotesize{$\textrm{y}_1$}}; \node [anchor=west,draw,circle,inner sep=1pt,fill=red!30!white,blur shadow={shadow xshift=1pt,shadow yshift=-1pt}] (y3) at ([xshift=2em]y2.east) {\footnotesize{$\textrm{y}_3$}}; \draw [<-,thick] (y1.south) -- ([yshift=-1em]y1.south); \draw [<-,thick] (y2.south) -- ([yshift=-1em]y2.south); \draw [<-,thick] (y3.south) -- ([yshift=-1em]y3.south); } \visible<2->{ \draw [->,thick] (h4.north) -- (slayer.south); } \end{tikzpicture} \end{center} \end{tcolorbox} \end{frame} %%%------------------------------------------------------------------------------------------------------------ \subsection{����ѧϰ - ����} %%%------------------------------------------------------------------------------------------------------------ %%% outline: problem 3 \begin{frame}{����һ������} \vspace{6em} \begin{tcolorbox}[enhanced,size=normal,left=2mm,right=1mm,colback=red!5!white,colframe=red!75!black,drop fuzzy shadow] {\Large \textbf{��ζ�ģ���еIJ�������ѧϰ,} \vspace{0.4em} \textbf{֮��ʹ��ѧϰ����ģ�ͽ����ƶϣ�} } \end{tcolorbox} \vspace{2em} \begin{center} \begin{tikzpicture} \node [fill=blue!10] (label) at (0,0) {\LARGE{$\frac{\partial L(\textbf{w})}{\partial \textbf{w}} = $ ? }}; \end{tikzpicture} \end{center} \end{frame} %%%------------------------------------------------------------------------------------------------------------ %%% ������ = ����ʽ \begin{frame}{������ = ��������ʽ} \begin{itemize} \item ���е������綼���Կ����ɱ����ͺ�����ɵı���ʽ\\ \end{itemize} \begin{center} \begin{tikzpicture} \node [anchor=north west] (eq1) at (0,0) {$\textbf{y} = \textbf{x} + \textbf{b}$}; \node [anchor=north west] (eq2) at (eq1.south west) {$\textbf{y} = \textrm{Relu}(\textbf{x} \cdot \textbf{w} + \textbf{b})$}; \node [anchor=north west] (eq3) at (eq2.south west) {$\textbf{y} = (\textrm{Relu}(\textbf{x} \cdot \textbf{w}_1 + \textbf{b}) + \textbf{x}) \cdot \textbf{w}_2$}; \node [anchor=north west] (eq4) at (eq3.south west) {$\textbf{y} = \textrm{Sigmoid}(\textrm{Relu}(\textbf{x} \cdot \textbf{w}_1 + \textbf{b}_1) + \textbf{x}) \cdot \textbf{w}_2 + \textbf{b}_2$}; \visible<2->{ \node [anchor=north west,minimum height=1.2em,minimum width=1.2em,fill=green!30!white] (xlabel) at ([yshift=-0.5em,xshift=0.3em]eq4.south west) {}; \node [anchor=west] (xlabel2) at (xlabel.east) {������� - ���û�ָ��}; } \begin{pgfonlayer}{background} \visible<2->{ \node [anchor=south, minimum height=1.6em,minimum width=0.8em,fill=green!30!white] (x1) at ([xshift=-1.3em]eq4.south) {}; \node [anchor=south, minimum height=1.6em,minimum width=0.8em,fill=green!30!white] (x2) at ([xshift=4.9em]eq4.south) {}; } \end{pgfonlayer} \visible<3->{ \node [anchor=north west,minimum height=1.2em,minimum width=1.2em,fill=red!30!white] (wlabel) at ([yshift=-0.3em]xlabel.south west) {}; \node [anchor=west] (wlabel2) at (wlabel.east) {ģ�Ͳ��� - ��ô���ã�����}; } \begin{pgfonlayer}{background} \visible<3->{ \node [anchor=south, minimum height=1.6em,minimum width=1.2em,fill=red!30!white] (w1) at ([xshift=0.2em]eq4.south) {}; \node [anchor=south, minimum height=1.6em,minimum width=1.2em,fill=red!30!white] (b1) at ([xshift=2.5em]eq4.south) {}; \node [anchor=south, minimum height=1.6em,minimum width=1.2em,fill=red!30!white] (w2) at ([xshift=6.85em]eq4.south) {}; \node [anchor=south, minimum height=1.6em,minimum width=1.2em,fill=red!30!white] (b2) at ([xshift=9.2em]eq4.south) {}; } \end{pgfonlayer} \end{tikzpicture} \end{center} \visible<4->{ \begin{tcolorbox}[enhanced,size=normal,left=2mm,right=1mm,colback=blue!5!white,colframe=blue!75!black,drop fuzzy shadow] {\Large \textbf{�������ˣ�} \vspace{0.4em} \textbf{���ȷ��w��b��ʹx��y��Ӧ�ø��ã�} } \end{tcolorbox} } \end{frame} %%%------------------------------------------------------------------------------------------------------------ %%% ѧϰ��Ŀ����ʲô \begin{frame}{Ŀ�꺯������ʧ����} \begin{itemize} \item ����һ�����͵��Ż����⣬��������������\\ \begin{enumerate} \item �Ż���Ŀ����ʲô�� \item ��ε�������$\textbf{w}$��$\textbf{b}$���Ŀ�ꣿ \end{enumerate} \item<2-> \textbf{����Ŀ��}�����ڸ���$\textbf{x}$��ʲô����$\textbf{y}$�Ǻõ� \begin{itemize} \item ���裺�����������$\{\textbf{x}_1,...,\textbf{x}_n\}$��ÿ��$\textbf{x}_i$����Ӧ\alert{��ȷ��}$\hat{\textbf{y}}_i$ \item ����һ��������$\textbf{y}=f(\textbf{x})$��ÿ��$\textbf{x}_i$Ҳ����һ�����$\textbf{y}_i$ \item ������Զ�����$\hat{\textbf{y}}_i$���������$\textbf{y}_i$֮���ƫ������������������С����ƫ��Ϳ��Եõ����õ�ģ�� \end{itemize} \end{itemize} \visible<3->{ \vspace{-0.7em} \begin{center} \begin{tikzpicture} \begin{scope}[yscale=0.2,xscale=0.8] \draw[-,very thick,ublue,domain=-4.2:3.5,samples=100] plot (\x,{ - 1/14 * (\x + 4) * (\x + 1) * (\x - 1) * (\x - 3)}); \visible<4->{ \draw[-,very thick,ugreen,domain=-3.8:3.0,samples=100] plot (\x,{ - 1/14 * (4*\x*\x*\x + 3*\x*\x - 26*\x - 1)}); } \draw[->,thick] (-6,0) -- (5,0); \draw[->,thick] (-5,-4) -- (-5,5); \draw [<-] (-2.5,4) -- (-2,5) node [pos=1,right,inner sep=2pt] {\footnotesize{��$\hat{\textbf{y}}_i$}}; \visible<4->{ \draw [<-] (-3,-3) -- (-2.5,-2) node [pos=0,left,inner sep=2pt] {\footnotesize{Ԥ��$\textbf{y}_i$}};} \visible<5->{ \draw [<-] (2.3,1) -- (3.3,2) node [pos=1,right,inner sep=2pt] {\footnotesize{ƫ��$|\hat{\textbf{y}}_i - \textbf{y}_i|$}}; \foreach \x in {-3.8,-3.7,...,3.0}{ \pgfmathsetmacro{\p}{- 1/14 * (\x + 4) * (\x + 1) * (\x - 1) * (\x - 3)}; \pgfmathsetmacro{\q}{- 1/14 * (4*\x*\x*\x + 3*\x*\x - 26*\x - 1)}; \draw [-] (\x,\p) -- (\x, \q); } } \end{scope} \end{tikzpicture} \end{center} } \vspace{-0.3em} \begin{itemize} \item<6-> ������̾���\alert{�����Ż�/ѵ��}����$\hat{\textbf{y}}_i$��$\textbf{y}_i$֮��ƫ��Ķ�������һ��\alert{��ʧ����}��Ҳ����ѵ����\alert{Ŀ�꺯��}�����Ż���Ŀ�����\textbf{��С����ʧ����} \end{itemize} \end{frame} %%%------------------------------------------------------------------------------------------------------------ %%% ������Ŀ�꺯�� \begin{frame}{��������ʧ����} \begin{itemize} \item ��ʧ������Ϊ$Loss(\hat{\textbf{y}}_i,\textbf{y}_i)$�����Ϊ$L$�������dz��õĶ��� \end{itemize} \vspace{0.5em} \footnotesize{ \renewcommand{\arraystretch}{1.2} \begin{tabular}{l | l | l | l} ���� & ���� & NiuTensorʵ��(\texttt{yh}��ʾ$\hat{\textbf{y}}_i$) & Ӧ�� \\ \hline 0-1 & $L = \left\{ \begin{array}{ll} 0 & \hat{\textbf{y}}_i = \textbf{y}_i \\ 1 & \hat{\textbf{y}}_i \ne \textbf{y}_i \end{array} \right.$ & \scriptsize{\texttt{L = Sign(Absolute(yh - y))}} & ��֪�� \\ Hinge & $L=\max(0,1-\hat{\textbf{y}}_i \cdot \textbf{y}_i)$ & \scriptsize{\texttt{L = Max(0, 1 - yh * y))}} & SVM \\ ����ֵ & $L=|\hat{\textbf{y}}_i - \textbf{y}_i|$ & \scriptsize{\texttt{L = Absolute(yh - y)}} & �ع� \\ Logistic & $L=\log(1 + \hat{\textbf{y}}_i \cdot \textbf{y}_i)$ & \scriptsize{\texttt{L = Log(1 + yh * y)}} & �ع� \\ ƽ�� & $L=(\hat{\textbf{y}}_i - \textbf{y}_i)^2$ & \scriptsize{\texttt{L = Power(yh - y, 2)}} & �ع� \\ ָ�� & $L=\exp(- \hat{\textbf{y}}_i \cdot \textbf{y}_i) $ & \scriptsize{\texttt{L = Exp(Negate(yh * y))}} & \scriptsize{AdaBoost} \\ ������ & $L=-\sum_k \textbf{y}_i^{[k]} \log \hat{\textbf{y}}_i^{[k]} $ & \scriptsize{\texttt{L = CrossEntropy(y, yh)}} & ����� \\ & \scriptsize{$\textbf{y}_i^{[k]}$: $\textbf{y}_i$�ĵ�$k$ά} & & \\ \end{tabular} \renewcommand{\arraystretch}{1.0} } \vspace{-0.5em} \begin{itemize} \item ע�⣺ \begin{itemize} \item ��ʧ�������Ը������ⲻͬ����ѡ��û�й̶�Ҫ�� \item ��Щ��ʧ���������������Լ�������罻����Ҫ��$\hat{\textbf{y}}_i$��$\textbf{y}_i$���Ǹ��ʷֲ� \end{itemize} \end{itemize} \end{frame} %%%------------------------------------------------------------------------------------------------------------ %%% �Ż�Ŀ�꺯�� \begin{frame}{�����Ż�} \begin{itemize} \item ���ڵ�$i$����������($\textbf{x}_i$,$\hat{\textbf{y}}_i$)���������ʧ������������$\textbf{w}$�ĺ�������$\textbf{b}$Ҳ��Ϊһ��$\textbf{w}$������Ϊ$L(\textbf{x}_i,\hat{\textbf{y}}_i;\textbf{w})$�������ѧϰ���Ա�����Ϊ��\\ \begin{displaymath} \textbf{w}^* = \argmin_{\textbf{w}} \frac{1}{n} \sum_{i=1}^{n} L(\textbf{x}_i,\hat{\textbf{y}}_i;\textbf{w}) \end{displaymath} $\textbf{w}^*$��ʾ��ѵ������ʹ����ʧ��ƽ��ֵ�ﵽ��С�IJ�����$\frac{1}{n} \sum_{i=1}^{n} L(\textbf{x}_i,\hat{\textbf{y}}_i;\textbf{w})$���������ۺ���(cost function)��������ʧ������ֵ�����Ĺ��ơ� \vspace{0.5em} \item<2-> �������⣺\textbf{���$\argmin$�����ҵ����ۺ�����Сֵ��} \begin{itemize} \item ���Ƿdz����������⣬����һ�µ����µ�IBMģ�ͣ���ʱʹ�õ���EM�㷨 \item �������ﲢ����һ������ģ�� \item ��Ҫһ�ָ���ͨ�õ���ⷽ�� \end{itemize} \end{itemize} \end{frame} %%%------------------------------------------------------------------------------------------------------------ %%% �ݶ��½� \begin{frame}{�ݶ��½���Gradient Descent��} \begin{itemize} \item �����Ŀ�꺯�������Dz���$\textbf{w}$�ĺ�������Ϊ$J(\textbf{w})$���Ż�Ŀ���ǣ��ҵ�ʹ$J(\textbf{w})$�ﵽ��С��$\textbf{w}$ \item ע�⣬$\textbf{w}$���ܰ������ڸ�ʵ����������SMT��MERT֮��ĵ��η�����������Կ���һ�ָ����ʺϴ���ʵ���������Ż������������˼����\alert{�ݶ��½�}�� \begin{itemize} \item<2-> ���$J(\textbf{w})$����$\textbf{w}$���֣�$\frac{\partial J(\textbf{w})}{\partial \textbf{w}}$��ʾ$J$��$\textbf{w}$���仯���ķ��� \item<2-> $\textbf{w}$�����ݶȷ�����£��µ�$\textbf{w}$����ʹ�������ӽ���ֵ \end{itemize} \end{itemize} \pgfplotsset{% colormap={whitered}{color(-1cm)=(orange!75!red);color(1cm)=(white)} } \begin{center} \begin{tikzpicture}[ declare function = {mu1=1;}, declare function = {mu2=2;}, declare function = {sigma1=0.5;}, declare function = {sigma2=1;}, declare function = {normal(\m,\s)=1/(2*\s*sqrt(pi))*exp(-(x-\m)^2/(2*\s^2));}, declare function = {bivar(\ma,\sa,\mb,\sb)=1/(2*pi*\sa*\sb) * exp(-((x-\ma)^2/\sa^2 + (y-\mb)^2/\sb^2))/2;}] \footnotesize{ \visible<2->{ \begin{scope} \begin{axis}[ colormap name = whitered, width = 8cm, height = 5cm, view = {20}{45}, enlargelimits = false, grid = major, domain = -1:3, y domain = 0:4, samples = 30, xlabel = $\textbf{w}^{[1]}$, ylabel = $\textbf{w}^{[2]}$, xlabel style = {xshift=0em,yshift=0.8em}, ylabel style = {xshift=0.2em,yshift=0.8em}, zlabel = {$J(\textbf{w})$}, ztick = {-0.1}, colorbar, colorbar style = { at = {(1.2,0.5)}, anchor = north west, ytick = {0,-0.1}, height = 0.25*\pgfkeysvalueof{/pgfplots/parent axis height}, title = {} } ] \addplot3 [surf] {-bivar(mu1,sigma1,mu2,sigma2)}; \node [circle,fill=red,minimum size=3pt,inner sep=1.5pt] () at (axis cs:0.5,2,-0.01) {}; \draw [->,very thick,ublue] (axis cs:0.5,2,-0.01) -- (axis cs:0.8,1.6,-0.03) node [pos=1,right,inner sep=2pt] {\tiny{-$\frac{\partial J(\textbf{w})}{\partial \textbf{w}}$}}; \draw [->,very thick,dotted] (axis cs:0.5,2,-0.01) -- (axis cs:0.2,1.5,-0.03); \draw [->,very thick,dotted] (axis cs:0.5,2,-0.01) -- (axis cs:0.2,3.5,-0.03); %\draw [black!50] (axis cs:0,-1,0) -- (axis cs:0,4,0); \end{axis} \end{scope} } } \end{tikzpicture} \end{center} \end{frame} %%%------------------------------------------------------------------------------------------------------------ %%% �ݶ��½��ı��� \begin{frame}{�ݶ��½��IJ�ͬʵ�ַ�ʽ} \begin{itemize} \item \textbf{�ݶ��½�}�����ǿ��������ݶȷ������$\textbf{w}$һС����֮��õ����õ�$\textbf{w}$��֮�����¼����ݶȣ������ظ��������� \begin{displaymath} \textbf{w}_{t+1} = \textbf{w}_t - \alpha \cdot \frac{\partial J(\textbf{w}_t)}{\partial \textbf{w}_t} \end{displaymath} ����$t$��ʾ���µIJ�����$\alpha$��һ����������ʾ���²����Ĵ�С��$\alpha$��������Ҫ����������е�������$J(\textbf{w}_t)$����ʽ�����˾�����㷨�����ʵ�֡� \item<2-> \textbf{�����ݶ��½�(Batch Gradient Descent)}�� \begin{displaymath} J(\textbf{w}_t) = \frac{1}{n} \sum_{i=1}^{n} L(\textbf{x}_i,\hat{\textbf{y}}_i;\textbf{w}_t) \end{displaymath} ���ַ���ѵ���ȶ�����������ÿ�θ�����Ҫ������ѵ���������б�����Ч�ʵͣ�����$n$�ܴ����ģ�����Ϻ���ʹ�� \end{itemize} \end{frame} %%%------------------------------------------------------------------------------------------------------------ %%% �ݶ��½��ı��� \begin{frame}{�ݶ��½��IJ�ͬʵ�ַ�ʽ(��)} \begin{itemize} \item \textbf{����ݶ��½�(Stochastic Gradient Descent)}�� \begin{displaymath} J(\textbf{w}_t) = L(\textbf{x}_i,\hat{\textbf{y}}_i;\textbf{w}_t) \end{displaymath} ����������SGD�����л���ѧϰ�Ŀγ��X�����н��ܡ�ÿ�����ѡȡһ�����������ݶȼ���Ͳ������£����µļ�����۵ͣ�������������������������������ѧϰ(online learning)���������������� \vspace{0.3em} \item<2-> \textbf{С�����ݶ��½�(Mini-batch Gradient Descent)}�� \begin{displaymath} J(\textbf{w}_t) = \frac{1}{m} \sum_{i=j}^{j+m} L(\textbf{x}_i,\hat{\textbf{y}}_i;\textbf{w}_t) \end{displaymath} ÿ�����ʹ�������������в�������(���������ر��)������һ�����з�����������õķ���֮һ \end{itemize} \end{frame} %%%------------------------------------------------------------------------------------------------------------ %%% �����ݶ��½��ĸĽ� \begin{frame}{һЩ�Ľ�} \begin{itemize} \item \textbf{���ֺĽ�}��������ݶȵķ����������ٶȡ�ѵ���ȶ��Եȣ�����googleһ�� \begin{itemize} \item Momentum, Adagrad, Adadelta, RMSprop, Adam, AdaMax, Nadam, AMSGrad�ȵ� \item \footnotesize{\url{http://ruder.io/optimizing-gradient-descent}} \end{itemize} \item<2-> \textbf{���л�}�����ģ���ݴ�����Ҫ�ֲ�ʽ���㣬�ݶȸ��µIJ�����Ҫ��� \begin{itemize} \item \textbf{ͬ������}�����м���ڵ���ɼ����ͳһ���ܲ����²�����Ч���ȶ������Dz��жȵ� \item \textbf{�첽����}��ÿ���ڵ������ʱ���¡����жȸߣ��������ڽڵ��������ܲ�ͬ����������ʮ���ȶ� \end{itemize} \item<3-> \textbf{����} \begin{itemize} \item ��������ݶ���ʧ�ͱ�ը�����⣬ʹ���ݶȲü����в����ӵ� \item �����������ӣ����Զ��ⲿ֪ʶ��ģ����������������ѵ�����ȶ� \end{itemize} \end{itemize} \end{frame} %%%------------------------------------------------------------------------------------------------------------ %%% ��μ����ݶ� \begin{frame}{��μ����ݶ�? - ��ֵ��} \begin{itemize} \item \textbf{����һ����������}����μ����ݶ� \begin{displaymath} \frac{\partial L(\textbf{w})}{\partial \textbf{w}} = ? \end{displaymath} \vspace{0.5em} \item<2-> \textbf{��ֵ��} - �ֱ��ķ��� \begin{displaymath} \frac{\partial L(\textbf{w})}{\partial \textbf{w}} = \lim_{\Delta \textbf{w} \to 0} \frac{L(\textbf{w} + \Delta \textbf{w}) - L(\textbf{w} - \Delta \textbf{w}) }{2\Delta \textbf{w}} \end{displaymath} ��������ֹ�ʽ�����ǿ��Խ�$\textbf{w}$�仯һ�������$\Delta \textbf{w}$��ʾ����֮��$L(\cdot)$�ı仯 \begin{itemize} \item<3-> \textbf{�ŵ������}��������ķdz�������ʵ�� \item<3-> \textbf{ȱ��Ҳ������}��Ч��̫�ͣ����ڸ������硢����������һЩ��ģ�ͻ�������ʹ�� \end{itemize} \end{itemize} \end{frame} %%%------------------------------------------------------------------------------------------------------------ %%% ��μ����ݶ� - ������ \begin{frame}{��μ����ݶ�? - ������} \begin{itemize} \item \textbf{������}����������д���ֱ���ʽ�������������ֵ���õ��ֽ�������磬�������±���ʽ \begin{displaymath} L(\textbf{w}) = \textbf{x} \cdot \textbf{w} + 2 \textbf{w}^2 \end{displaymath} \visible<2->{ \vspace{0.5em} �����ֶ��Ƶ����ֱ���ʽ \begin{displaymath} \frac{\partial L(\textbf{w})}{\partial \textbf{w}} = \textbf{x} + 4 \textbf{w} \end{displaymath} } \visible<3->{ \vspace{0.5em} �����$\textbf{x} = \begin{pmatrix} 2 \\ -3 \end{pmatrix}$��$\textbf{w} = \begin{pmatrix} -1 \\ 1 \end{pmatrix}$���õ��ֽ��\\ \vspace{1em} \begin{displaymath} \frac{\partial L(\textbf{w})}{\partial \textbf{w}} = \begin{pmatrix} 2 \\ -3 \end{pmatrix} + 4 \begin{pmatrix} -1 \\ 1 \end{pmatrix} = \begin{pmatrix} -2 \\ 1 \end{pmatrix} \end{displaymath} } \end{itemize} \end{frame} %%%------------------------------------------------------------------------------------------------------------ %%% �����ֵ��������� \begin{frame}{�����ֵ���������} \begin{itemize} \item \textbf{Expression Swell}����㺯�����ֱ���ʽ��dz����� \begin{itemize} \item ����ʽ�߳����״洢���� \item ������Ҫ�����ֵ�\alert{���ֵ}���������ֱ���ʽ \end{itemize} \end{itemize} \vspace{0.5em} {\small \begin{tabular} {l | l | l} ���� & �ֱ���ʽ & ������ֱ���ʽ \\ \hline $x$ & $1$ & $1$ \\ \hline $x(x+1)$ & $(x+1)+x$ & $2x + 1$ \\ \hline $x(x+1)$ & $(x+1)(x^2+x+1)$ & $4x^3+6x^2$ \\ $(x^2+x+1)$ & $+x(x^2+x+1)$ & $+4x+1$ \\ & $+x(x+1)(2x+1)$ & \\ \hline $(x^2+x)$ & $(2x+1)(x^2+x+1)$ & $8x^7+28x^6$ \\ $(x^2+x+1)$ & $(x^4+2x^3+2x^2+x+1)$ & $+48x^5+50x^4$ \\ $(x^4+2x^3$ & $+(2x+1)(x^2+x)$ & $+36x^3+18x^2$ \\ $+2x^2+x+1)$ & \ \ $(x^4+2x^3+2x^2+x+1)$ & $+6x+1$ \\ & $+(x^2+x)(x^2+x+1)$ & \\ & \ \ $(4x^3+6x^2+4x+1)$ & \\ \end{tabular} } \end{frame} %%%------------------------------------------------------------------------------------------------------------ %%% �Զ��� \begin{frame}{��μ����ݶ�? - �Զ���} \begin{itemize} \item \textbf{�Զ���}�����ӵ��ֱ�ɼIJ��裬��Щ������ȫ�Զ��������������д洢�����㡣�������һ�ַ���ģʽ����������Ҳ����\alert{����}˼�룩���������� \begin{enumerate} \item \textbf{ǰ�����}��������������룬������ÿ����������ֵ����Ҳ��������ı�ʹ�÷�ʽ \item \textbf{�������}��������������������������ÿ���������루���������Ӧ���� \end{enumerate} \end{itemize} \visible<2->{ \vspace{-1em} \begin{center} \begin{tikzpicture} \begin{scope} \tikzstyle{layernode} = [draw,thick,fill=ugreen!30!white,blur shadow={shadow xshift=1pt,shadow yshift=-1pt}]; \node [anchor=center,layernode,minimum height=4em,minimum width=1em] (layer01) at (0,0) {}; \node [anchor=north west,layernode,minimum height=3em,minimum width=1em] (layer02) at ([xshift=3em]layer01.north east) {}; \node [anchor=south west,layernode,minimum height=3em,minimum width=1em] (layer03) at ([xshift=7em]layer01.south east) {}; \node [anchor=south west,layernode,minimum height=4em,minimum width=1em] (layer04) at ([xshift=11em]layer01.south east) {}; \node [anchor=south west,layernode,minimum height=4em,minimum width=1em] (layer05) at ([xshift=3em]layer04.south east) {}; \node [anchor=east] (input) at ([xshift=-1em]layer01.west){\scriptsize{����}}; \node [anchor=west] (output) at ([xshift=1em]layer05.east){\scriptsize{���}}; \draw [->] ([xshift=-1em]layer01.west) -- ([xshift=-0.1em]layer01.west); \draw [->] ([xshift=0.1em,yshift=-0.5em]layer01.north east) -- ([xshift=-0.1em,yshift=-0.5em]layer02.north west); \draw [->] ([xshift=0.1em,yshift=0.5em]layer01.south east) -- ([xshift=-0.1em,yshift=0.5em]layer03.south west); \draw [->] ([xshift=0.1em,yshift=-0.5em]layer02.north east) -- ([xshift=-0.1em,yshift=-0.5em]layer04.north west); \draw [->] ([xshift=0.1em,yshift=0.5em]layer03.south east) -- ([xshift=-0.1em,yshift=0.5em]layer04.south west); \draw [->] ([xshift=0.1em]layer04.east) -- ([xshift=-0.1em]layer05.west); \draw [->] ([xshift=0.1em]layer05.east) -- ([xshift=1.0em]layer05.east); \visible<3->{ \draw [->,very thick,ublue] ([xshift=-1em]layer01.west) -- ([xshift=-0.1em]layer01.west); } \visible<4->{ \draw [->,very thick,ublue] ([xshift=0.1em,yshift=-0.5em]layer01.north east) -- ([xshift=-0.1em,yshift=-0.5em]layer02.north west); } \visible<5->{ \draw [->,very thick,ublue] ([xshift=0.1em,yshift=0.5em]layer01.south east) -- ([xshift=-0.1em,yshift=0.5em]layer03.south west); } \visible<6->{ \draw [->,very thick,ublue] ([xshift=0.1em,yshift=-0.5em]layer02.north east) -- ([xshift=-0.1em,yshift=-0.5em]layer04.north west); \draw [->,very thick,ublue] ([xshift=0.1em,yshift=0.5em]layer03.south east) -- ([xshift=-0.1em,yshift=0.5em]layer04.south west); \draw [->,very thick,ublue] ([xshift=0.1em]layer04.east) -- ([xshift=-0.1em]layer05.west); \draw [->,very thick,ublue] ([xshift=0.1em]layer05.east) -- ([xshift=1.0em]layer05.east); } \visible<8->{ \draw [<-,very thick,red] ([xshift=-1em,yshift=-0.3em]layer01.west) -- ([xshift=-0.1em,yshift=-0.3em]layer01.west); \draw [<-,very thick,red] ([xshift=0.1em,yshift=-0.8em]layer01.north east) -- ([xshift=-0.1em,yshift=-0.8em]layer02.north west); \draw [<-,very thick,red] ([xshift=0.1em,yshift=0.2em]layer01.south east) -- ([xshift=-0.1em,yshift=0.2em]layer03.south west); \draw [<-,very thick,red] ([xshift=0.1em,yshift=-0.8em]layer02.north east) -- ([xshift=-0.1em,yshift=-0.8em]layer04.north west); \draw [<-,very thick,red] ([xshift=0.1em,yshift=0.2em]layer03.south east) -- ([xshift=-0.1em,yshift=0.2em]layer04.south west); \draw [<-,very thick,red] ([xshift=0.1em,yshift=-0.3em]layer04.east) -- ([xshift=-0.1em,yshift=-0.3em]layer05.west); \draw [<-,very thick,red] ([xshift=0.1em,yshift=-0.3em]layer05.east) -- ([xshift=1.0em,yshift=-0.3em]layer05.east); } \visible<7->{ \draw [<-,thin] ([xshift=0.3em,yshift=0.3em]layer04.east) .. controls +(35:1) and +(215:1) .. ([xshift=-2em,yshift=0.3em]layer05.north west) node [pos=1,above] {\scriptsize{ǰ��$i$ �����$h_{i}$}}; } \visible<9->{ \draw [<-,thin] ([xshift=0.3em,yshift=-0.7em]layer04.east) .. controls +(-35:1) and +(145:1) .. ([xshift=-2em,yshift=-0.3em]layer05.south west) node [pos=1,below] {\scriptsize{����$h_{i}$ �����ݶ�$\frac{\partial L}{\partial h_i}$}}; } \end{scope} \end{tikzpicture} \end{center} } \vspace{-1em} \begin{itemize} \item<10-> �Զ��ֿ�����\alert{����ͼ}ʵ��(TensorFlow�� NiuTensor ��)����������ͼ�����˿γ̵ķ�Χ������������ѧϰ \end{itemize} \end{frame} %%%------------------------------------------------------------------------------------------------------------ %%% ���� - ����˵�� \begin{frame}{����˵��} \begin{itemize} \item ��һ��$K$��������Ϊ��������ȷһ�·��� \begin{itemize} \item �������ÿ���������ж�����ƫ�������$\textbf{b}$�� \end{itemize} \end{itemize} \vspace{-1em} \begin{center} \begin{tikzpicture} \begin{scope} \def\neuronsep{1} \tikzstyle{neuronnode} = [minimum size=1.2em,circle,draw,ublue,very thick,inner sep=1pt, fill=white,align=center,drop shadow={shadow xshift=0.1em,shadow yshift=-0.1em}]; %%% layer 1 \foreach \n in {1,...,4}{ \node [neuronnode] (neuron0\n) at (\n * \neuronsep,0) {}; \draw [->] ([yshift=-0.8em]neuron0\n.south) -- ([yshift=-0.1em]neuron0\n.south) node [pos=0,below] {\tiny{...}}; } \begin{pgfonlayer}{background} \node [rectangle,inner sep=0.2em,fill=red!20] [fit = (neuron01) (neuron04)] (layer01) {}; \node [anchor=east] (layer01label) at (layer01.west) {\scriptsize{��$l-1$}}; \end{pgfonlayer} %%% layer 2 \foreach \n in {1,...,4}{ \node [neuronnode] (neuron1\n) at (\n * \neuronsep,3em) {}; } \foreach \n in {1,...,4}{ \foreach \m in {1,...,4}{ \draw [<-] (neuron1\n.south) -- (neuron0\m.north); } } \begin{pgfonlayer}{background} \node [rectangle,inner sep=0.2em,fill=ugreen!20] [fit = (neuron11) (neuron14)] (layer02) {}; \node [anchor=east] (layer02label) at (layer02.west) {\scriptsize{��$l$}}; \end{pgfonlayer} %%% layer 3 \foreach \n in {1,...,4}{ \node [neuronnode] (neuron2\n) at (\n * \neuronsep,6em) {}; \draw [<-] ([yshift=0.8em]neuron2\n.north) -- ([yshift=0.0em]neuron2\n.north) node [pos=0,above] {\tiny{...}}; } \foreach \n in {1,...,4}{ \foreach \m in {1,...,4}{ \draw [<-] (neuron2\n.south) -- (neuron1\m.north); } } \begin{pgfonlayer}{background} \node [rectangle,inner sep=0.2em,fill=blue!20] [fit = (neuron21) (neuron24)] (layer03) {}; \node [anchor=east] (layer03label) at (layer03.west) {\scriptsize{��$l+1$}}; \end{pgfonlayer} %%% output layer \foreach \n in {1,...,4}{ \node [neuronnode] (neuron3\n) at (\n * \neuronsep,9.4em) {}; \visible<1-3,5->{ \draw [<-] ([yshift=0.6em]neuron3\n.north) -- ([yshift=0.0em]neuron3\n.north) node [pos=0,above] {\tiny{output}}; } \visible<4>{ \draw [<-,red,very thick] ([yshift=0.6em]neuron3\n.north) -- ([yshift=0.0em]neuron3\n.north) node [pos=0,above] {\tiny{output}}; } \draw [->] ([yshift=-0.6em]neuron3\n.south) -- ([yshift=0.0em]neuron3\n.south); } \begin{pgfonlayer}{background} \node [rectangle,inner sep=0.2em,fill=ugreen!20] [fit = (neuron31) (neuron34)] (layer04) {}; \node [anchor=east] (layer04label) at (layer04.west) {\scriptsize{��$L$(���)}}; \end{pgfonlayer} \visible<2->{ \node [neuronnode,draw=red,fill=red!20!white,inner sep=1pt] (neuron12new) at (2 * \neuronsep,3em) {}; \node [anchor=east] (neuronsamplelabel) at ([yshift=-1em]layer02label.south east) {\alert{\textbf{\tiny{��$l$��, ��$i$����Ԫ}}}}; \draw [->,dashed,very thick,red] ([xshift=-0.2em,yshift=0.2em]neuronsamplelabel.east) .. controls +(30:1) and +(220:1) .. ([xshift=-0em,yshift=-0em]neuron12new.210); } \visible<3>{ \foreach \n in {1,...,4}{ \draw [<-,thick,red] (neuron2\n.south) -- (neuron12.north); } } \visible<5->{ \draw [<-,thick,red] (neuron14.south) -- (neuron04.north); \node [anchor=north] (wlabel) at (layer02.south east) {\alert{\scriptsize{$w_{4,4}^{l}$}}}; } \visible<3->{ \node [anchor=west,align=left] (line01) at ([xshift=1em,yshift=1em]layer04.east) {\footnotesize{$h_{i}^{k}$����$l$��, ��$i$����Ԫ�����}}; \node [anchor=north west,align=left] (line02) at (line01.south west) {\footnotesize{$\textbf{h}^{k}$����$k$������}}; \node [anchor=north west,align=left] (line03) at (line02.south west) {\footnotesize{$\textbf{s}^{k}$����$k$������Ա任$\textbf{s}^k=\textbf{h}^{k-1}\textbf{w}^k$}}; \node [anchor=north west,align=left] (line04) at (line03.south west) {\footnotesize{$f^{k}$����$k$��ļ����$\textbf{h}^k=f^l(\textbf{s}^k)$}}; } \visible<4->{ \node [anchor=north west,align=left] (line05) at (line04.south west) {\footnotesize{$\textbf{h}^{K}$�������������}}; } \visible<5->{ \node [anchor=north west,align=left] (line06) at (line05.south west) {\footnotesize{$w_{j,i}^{k}$����$k-1$����Ԫ$j$��}\\\footnotesize{��$k$����Ԫ$i$������Ȩ��}}; \node [anchor=north west,align=left] (line07) at (line06.south west) {\footnotesize{$\textbf{w}^{k}$����$k-1$�����$k1$���}\\\footnotesize{����Ȩ��}}; } \end{scope} \end{tikzpicture} \end{center} \vspace{-1.5em} \visible<6->{ \begin{displaymath} \textrm{���ڵ�}k\textrm{��}: \textbf{h}^k = f^k(\textbf{s}^k) = f^k(\sum_j h_{j}^{k-1}w_{j,i}^k) = f^k(\textbf{h}^{k-1} \textbf{w}^k) \end{displaymath} } \end{frame} %%%------------------------------------------------------------------------------------------------------------ %%% �����ķ��� \begin{frame}{���� - �����} \begin{itemize} \item �����(������) \end{itemize} \vspace{-1.0em} \begin{center} \begin{tikzpicture} \begin{scope} \node [anchor=center,minimum height=1.7em,fill=yellow!20,draw] (h) at (0,0) {$\textbf{h}^{K-1}$}; \node [anchor=west,minimum height=1.7em,fill=blue!20,draw] (s) at ([xshift=5.5em]h.east) {$\textbf{s}^{K}$}; \node [anchor=west,minimum height=1.7em,fill=green!20,draw] (h2) at ([xshift=5.5em]s.east) {$\textbf{h}^{K}$}; \node [anchor=west,minimum height=1.7em,fill=orange!20,draw] (l) at ([xshift=5.5em]h2.east) {$L$}; \draw [->] (h.east) -- (s.west); \draw [->] (s.east) -- (h2.west); \draw [->] (h2.east) -- (l.west) node [pos=0.5,above] {\tiny{��ʧ}}; \node [anchor=south west,inner sep=2pt] (step100) at ([xshift=0.5em,yshift=-0.8em]h.north east) {\tiny{$\textbf{s}^K = \textbf{h}^{K-1} \textbf{w}^K$}}; \node [anchor=south west,inner sep=2pt] (step101) at (step100.north west) {\tiny{���Ա任}}; \node [anchor=south west,inner sep=2pt] (step200) at ([xshift=0.5em,yshift=-0.8em]s.north east) {\tiny{$\textbf{h}^K = f^K(\textbf{s}^K)$}}; \node [anchor=south west,inner sep=2pt] (step201) at (step200.north west) {\tiny{�����}}; \node [anchor=south,inner sep=1pt] (outputlabel) at ([yshift=0.0em]h2.north) {\tiny{\textbf{�����}}}; \visible<2->{ \draw[decorate,thick,decoration={brace,mirror,raise=0.4em,amplitude=2mm}] (h.south west) -- (s.south west) node [pos=0.5,below,yshift=-1em] {\scriptsize{\textbf{��һ�Σ����Ա任}}}; } \visible<3->{ \draw[decorate,thick,decoration={brace,mirror,raise=0.4em,amplitude=2mm}] ([xshift=0.2em]s.south west) -- (l.south east) node [pos=0.5,below,yshift=-1em] (step2) {\scriptsize{\textbf{�ڶ��Σ������+��ʧ����}}}; } \visible<4->{ \draw [->,very thick,red] ([yshift=1em,xshift=-0.1em]l.north) -- ([yshift=1em,xshift=0.1em]s.north) node [pos=0.5,above] {\tiny{�������ݶ�\alert{$\frac{\partial L}{\partial \textbf{s}^K} = ?$}}}; \draw [-,very thick,red] ([yshift=0.5em]l.north) -- ([yshift=1.5em]l.north); \draw [-,very thick,red] ([yshift=0.5em]s.north) -- ([yshift=1.5em]s.north); } \end{scope} \end{tikzpicture} \end{center} \begin{itemize} \item<4-> ��������������봫���ݶȣ���������ȿ��ǽζ�\visible<5->{����$\pi^k = \frac{\partial L}{\partial \textbf{s}^k}$��ʾ��ʧ$L$�ڵ�$k$�㼤������봦���ݶȣ�������ʽ����} \vspace{-1.5em} \visible<5->{ \begin{eqnarray} \pi^K & = & \frac{\partial L}{\partial \textbf{s}^K} \nonumber \\ & = & \frac{\partial L}{\partial \textbf{h}^K} \cdot \frac{\partial \textbf{h}^K}{\partial \textbf{s}^K} \nonumber \\ & = & \frac{\partial L}{\partial \textbf{h}^K} \cdot \frac{\partial f^K(\textbf{s}^K)}{\partial \textbf{s}^K} \nonumber %\frac{\partial L}{\partial \textbf{w}^K} & = & \frac{\partial L}{\partial \textbf{h}^K} \cdot \frac{\partial \textbf{h}^K}{\partial \textbf{w}^K} \nonumber \\ % & \visible<4->{=} & \visible<4->{\frac{\partial L}{\partial \textbf{h}^K} \cdot \frac{\partial f^K(\textbf{h}^{K-1} \textbf{w}^K)}{\partial \textbf{w}^K} \ \ \ \ (\textrm{��Ϊ}\textbf{h}^K=f^K(\textbf{h}^{K-1} \textbf{w}^K))} \nonumber \\ % & \visible<5->{=} & \visible<5->{\frac{\partial L}{\partial \textbf{h}^K} \cdot \frac{\partial f^K(\textbf{s}^K)}{\partial \textbf{s}^K} \cdot \frac{\partial \textbf{s}^K}{\partial \textbf{w}^K} \ \ \ (\textrm{��Ϊ}\textbf{s}^K=\textbf{h}^{K-1} \textbf{w}^K)} \nonumber \end{eqnarray} } \end{itemize} \end{frame} %%%------------------------------------------------------------------------------------------------------------ %%% �����ķ��� - �������ӵ����� \begin{frame}{���� - �����($\textbf{s}^K$�����ݶ�)} \begin{center} \begin{tikzpicture} \begin{scope} \node [anchor=center] (factor00) at (0,0) {${\displaystyle \pi^K \ = }$}; \node [anchor=west] (factor01) at (factor00.east) {${\displaystyle \frac{\partial L}{\partial \textbf{h}^K}}$}; \node [anchor=west,inner sep=1pt] (factor02) at (factor01.east) {${\displaystyle \cdot}$}; \node [anchor=west] (factor03) at (factor02.east) {${\displaystyle \frac{\partial f^K(\textbf{s}^K)}{\partial \textbf{s}^K}}$}; \begin{pgfonlayer}{background} \visible<2-4>{ \node [rectangle,inner sep=0em,fill=red!20] [fit = (factor01)] (p1) {}; } \visible<3-4>{ \node [rectangle,inner sep=0em,fill=blue!20] [fit = (factor03)] (p2) {}; } \visible<5->{ \node [circle,inner sep=0em,fill=green!20] [fit = (factor02)] (p3) {}; } \end{pgfonlayer} \end{scope} \end{tikzpicture} \end{center} \begin{itemize} \item<2-> \raisebox{-0.7em}{\tikz{\node [anchor=west,fill=red!20] (factor01) at (factor00.east) {$\frac{\partial L}{\partial \textbf{h}^K}$};}} ��ʾ��ʧ$L$�����������ı仯�ʣ����磬����$L = \frac{1}{2} ||\hat{\textbf{y}} - \textbf{h}^K||^2$����$\frac{\partial L}{\partial \textbf{h}^K} = \hat{\textbf{y}} - \textbf{h}^K$ \item<3-> \raisebox{-0.7em}{\tikz{\node [anchor=west,fill=blue!20] (factor01) at (factor00.east) {$\frac{\partial f^K(\textbf{s}^K)}{\partial \textbf{s}^K}$};}} ��ʾ�������������Լ�������ı仯�ʣ����磬����$f(\textbf{s}) = \frac{1}{1+\exp(-\textbf{s})}$����$\frac{\partial f(\textbf{s})}{\partial \textbf{s}} = f(\textbf{s})(1-f(\textbf{s}))$ \item<4-> ����������ֱ������$s^K$�����ݶ��൱������ʧ������($\frac{\partial L}{\partial \textbf{h}^K}$)�ͼ������($\frac{\partial f^K(\textbf{s}^K)}{\partial \textbf{s}^K}$) �ij˻�\visible<5->{��ע���������в������ǵ�Ԫ����������������Ԫ�˷�} \end{itemize} \visible<4->{ \vspace{-0.5em} \begin{center} \begin{tikzpicture} \begin{scope} \node [anchor=west,minimum height=1.7em,fill=blue!20,draw] (s) at (0,0) {$\textbf{s}^{K}$}; \node [anchor=west,minimum height=1.7em,fill=green!20,draw] (h2) at ([xshift=5.5em]s.east) {$\textbf{h}^{K}$}; \node [anchor=west,minimum height=1.7em,fill=orange!20,draw] (l) at ([xshift=5.5em]h2.east) {$L$}; \draw [->] (s.east) -- (h2.west); \draw [->] (h2.east) -- (l.west); \draw [->,very thick,red] ([yshift=1em,xshift=-0.1em]l.north) -- ([yshift=1em,xshift=0.1em]h2.north) node [pos=0.5,above] {\tiny{���ݶ�\alert{$\frac{\partial L}{\partial \textbf{h}^K} = ?$}}}; \draw [->,very thick,red] ([yshift=1em,xshift=-0.1em]h2.north) -- ([yshift=1em,xshift=0.1em]s.north) node [pos=0.5,above] {\tiny{���ݶ�\alert{$\frac{\partial f^K(\textbf{s}^K)}{\partial \textbf{s}^K} = ?$}}}; \draw [-,very thick,red] ([yshift=0.5em]l.north) -- ([yshift=1.5em]l.north); \draw [-,very thick,red] ([yshift=0.5em]h2.north) -- ([yshift=1.5em]h2.north); \draw [-,very thick,red] ([yshift=0.5em]s.north) -- ([yshift=1.5em]s.north); \end{scope} \end{tikzpicture} \end{center} } \end{frame} %%%------------------------------------------------------------------------------------------------------------ %%% �����ķ��� - �� dL/dw \begin{frame}{���� - �����($\textbf{h}^{K-1}$�����ݶ�)} \begin{itemize} \item �Ѿ��õ�$\textbf{s}^K$�����ݶ�\visible<2->{�����������������} \begin{enumerate} \item<2-> ������ʧ$L$���ڵ�$K$���������$\textbf{w}^K$���ݶȣ�$\frac{\partial L}{\partial \textbf{w}^K}$ \item<2-> ������ʧ$L$���ڵ�$K$������$\textbf{h}^{K-1}$���ݶȣ�$\frac{\partial L}{\partial \textbf{h}^{K-1}}$ \end{enumerate} \end{itemize} \vspace{-0.8em} \begin{center} \begin{tikzpicture} \begin{scope} \node [anchor=center,minimum height=1.7em,fill=yellow!20,draw] (h) at (0,0) {$\textbf{h}^{K-1}$}; \node [anchor=west,minimum height=1.7em,fill=blue!20,draw] (s) at ([xshift=5.5em]h.east) {$\textbf{s}^{K}$}; \node [anchor=west,minimum height=1.7em,fill=green!20,draw] (h2) at ([xshift=5.5em]s.east) {$\textbf{h}^{K}$}; \node [anchor=west,minimum height=1.7em,fill=orange!20,draw] (l) at ([xshift=5.5em]h2.east) {$L$}; \draw [->] (h.east) -- (s.west); \draw [->] (s.east) -- (h2.west); \draw [->] (h2.east) -- (l.west) node [pos=0.5,above] {\tiny{��ʧ}}; \node [anchor=south west,inner sep=2pt] (step100) at ([xshift=0.5em,yshift=-0.8em]h.north east) {\tiny{$\textbf{s}^K = \textbf{h}^{K-1} \textbf{w}^K$}}; \node [anchor=south west,inner sep=2pt] (step200) at ([xshift=0.5em,yshift=-0.8em]s.north east) {\tiny{$\textbf{h}^K = f^K(\textbf{s}^K)$}}; \node [anchor=south,inner sep=1pt] (outputlabel) at ([yshift=0.0em]h2.north) {\tiny{\textbf{�����}}}; \node [anchor=south west] (slabel) at ([yshift=1em,xshift=0.3em]s.north) {\scriptsize{\textbf{\alert{�Ѿ��õ���$\pi^K = \frac{\partial L}{\partial \textbf{s}^K}$}}}}; \draw [->,red] ([yshift=0.3em]slabel.south) .. controls +(south:0.5) and +(north:0.5) .. ([xshift=0.5em]s.north); \visible<2->{ \draw [->,very thick,red] ([yshift=1em,xshift=-0.1em]s.north) -- ([yshift=1em,xshift=0.1em]h.north) node [pos=0.5,above] {\tiny{\alert{$\frac{\partial L}{\partial \textbf{w}^K} = ?$, $\frac{\partial L}{\partial \textbf{h}^{K-1}} = ?$}}}; \draw [-,very thick,red] ([yshift=0.5em]h.north) -- ([yshift=1.5em]h.north); \draw [-,very thick,red] ([yshift=0.5em]s.north) -- ([yshift=1.5em]s.north); } \end{scope} \end{tikzpicture} \end{center} \begin{itemize} \item<3-> ����$\textbf{s}^K = \textbf{h}^{K-1} \textbf{w}^K$������$\pi^K = \frac{\partial L}{\partial \textbf{s}^K}$�Ѿ���⣬���Եõ�(��ҪһЩ��ѧ���������Դ�����֪ʶ���Ƶ�һ��!)�� \vspace{-1.2em} \begin{eqnarray} \frac{\partial L}{\partial \textbf{w}^K} & = & [\textbf{h}^{K-1}]^T \pi^K \nonumber \\ \frac{\partial L}{\partial \textbf{h}^{K-1}} & = & \pi^K [\textbf{w}^K]^T\nonumber \end{eqnarray} ���$[\textbf{A}]^T$��ʾ$\textbf{A}$��ת�ã�$\pi^K [\textbf{w}^K]^T$��ʾ����$\pi^K$\alert{�����}$\textbf{w}^K$��ת�� \end{itemize} \end{frame} %%%------------------------------------------------------------------------------------------------------------ %%% ����ķ��� \begin{frame}{���� - ����} \begin{itemize} \item ������������$k$��$\textbf{h}^k = f^k(\textbf{s}^k) = f^k(\textbf{h}^{k-1}\textbf{w}^k)$��������������������ݶ�$\pi^k=\frac{\partial L}{\partial \textbf{h}^{k}}$����Ҫ \begin{enumerate} \item ������ʧ$L$���ڵ�$k$���������$\textbf{w}^k$���ݶȣ�$\frac{\partial L}{\partial \textbf{w}^k}$ \item ������ʧ$L$���ڵ�$k$������$\textbf{h}^{k-1}$���ݶȣ�$\frac{\partial L}{\partial \textbf{h}^{k-1}}$ \end{enumerate} \item<2-> ֱ��������һҳ�ķ��������Խ�$\pi^k=\frac{\partial L}{\partial \textbf{h}^{k}} \frac{\partial f^k(\textbf{s}^k)}{\partial \textbf{s}^{k}}$���� \vspace{-0.0em} \begin{eqnarray} \frac{\partial L}{\partial \textbf{w}^k} & = & [\textbf{h}^{k-1}]^T \pi^k \nonumber \\ \frac{\partial L}{\partial \textbf{h}^{k-1}} & = & \pi^k [\textbf{w}^k]^T\nonumber \end{eqnarray} \end{itemize} \visible<3->{ \begin{center} \begin{tikzpicture} \begin{scope} \node [anchor=center,draw,fill=red!20,minimum height=1.8em,minimum width=2.5em] (h) at (0,0) {$\textbf{h}^{k-1}$}; \node [anchor=west,draw,fill=blue!20,minimum height=1.8em,minimum width=2.5em] (s) at ([xshift=6em]h.east) {$\textbf{s}^{k}$}; \node [anchor=west,draw,fill=green!20,minimum height=1.8em,minimum width=2.5em] (h2) at ([xshift=6em]s.east) {$\textbf{h}^{k}$}; \node [anchor=east] (prev) at ([xshift=-2em]h.west) {...}; \node [anchor=west] (next) at ([xshift=2em]h2.east) {...}; \draw [->,thick] ([xshift=0.1em]prev.east) -- ([xshift=-0.1em]h.west); \draw [->,thick] ([xshift=0.1em]h.east) -- ([xshift=-0.1em]s.west) node [pos=0.5,below] {\tiny{$\textbf{s}^k = \textbf{h}^{k-1}\textbf{w}^k$}}; \draw [->,thick] ([xshift=0.1em]s.east) -- ([xshift=-0.1em]h2.west) node [pos=0.5,below] {\tiny{$\textbf{h}^k = f(\textbf{s}^{k})$}}; \draw [->,thick] ([xshift=0.1em]h2.east) -- ([xshift=-0.1em]next.west); \visible<4->{ \draw [<-,thick,red] ([xshift=0.1em,yshift=0.4em]h2.east) -- ([xshift=-0.1em,yshift=0.4em]next.west) node [pos=0.8,above] {\tiny{����}}; } \visible<5->{ \draw [<-,thick,red] ([xshift=0.1em,yshift=0.4em]s.east) -- ([xshift=-0.1em,yshift=0.4em]h2.west) node [pos=0.5,above] {\tiny{����}}; } \visible<6->{ \draw [<-,thick,red] ([xshift=0.1em,yshift=0.4em]h.east) -- ([xshift=-0.1em,yshift=0.4em]s.west) node [pos=0.5,above] {\tiny{����}}; } \visible<7->{ \draw [->,thick,red,dashed] ([yshift=-0.1em]h.south) -- ([yshift=-1em]h.south) -- ([yshift=-1em]h2.south) -- ([yshift=-0.1em]h2.south); \node [anchor=north,red] (recur) at ([yshift=-1em]s.south) {\scriptsize{$k=k-1$�ظ���������}}; } \visible<4->{ \node [anchor=south] (h2label) at (h2.north) {$\frac{\partial L}{\partial \textbf{h}^{k}}$}; } \visible<5->{ \node [anchor=south] (slabel) at (s.north) {$\pi^k = \frac{\partial L}{\partial \textbf{s}^{k}}$}; } \visible<6->{ \node [anchor=south] (hlabel) at (h.north) {$\frac{\partial L}{\partial \textbf{h}^{k-1}}$, $\frac{\partial L}{\partial \textbf{w}^{k}}$}; } \end{scope} \end{tikzpicture} \end{center} } \end{frame} %%%------------------------------------------------------------------------------------------------------------ %%% ����ʵ�� \begin{frame}{������ʵ��} \begin{itemize} \item ����һ����������������ʵ�ַ��� \end{itemize} \begin{tcolorbox} [bicolor,sidebyside,righthand width=3.5cm,size=title,frame engine=empty, colback=blue!10!white,colbacklower=black!5!white] {\scriptsize \begin{tabbing} \texttt{XTensor x, y, gold, h[5], w[5], s[5];} \\ \texttt{XTensor dh[5], dw[5], ds[5];} \\ \texttt{...} // ǰ����� \\ \texttt{h[0] = x;} \\ \texttt{y = h[4];} \\ \visible<2->{ \texttt{} \\ \texttt{CrossEntropyBackward(dldh[4], y, gold);} \\ \texttt{SoftmaxBackward(y, s[4], dh[4], ds[4]);}\\ \texttt{MMul(h[3], {\tiny X\_TRANS}, ds[4], {\tiny X\_NOTRANS}, dw[4]);}\\ } \visible<3->{ \texttt{} \\ \texttt{dh[2] = dh[3];}\\ \texttt{dh[1] = dh[3];}\\ } \visible<4->{ \texttt{} \\ \texttt{ReluBackward(h[2], s[2], dh[2], ds[2]);}\\ \texttt{MMul(h[1], {\tiny X\_TRANS}, ds[2], {\tiny X\_NOTRANS}, dw[2]);}\\ \texttt{} \\ \texttt{ReluBackward(h[1], s[1], dh[1], ds[1]);}\\ \texttt{MMul(h[0], {\tiny X\_TRANS}, ds[1], {\tiny X\_NOTRANS}, dw[1]);}\\ } \visible<5->{ \texttt{} \\ \texttt{for(unsigned i = 0; i < 5; i++)\{} \\ \texttt{} \ \ \ \ ... // ͨ��\alert{\texttt{dw[i]}}���ʲ������ݶ�\\ \texttt{\}} } \end{tabbing} } \tcblower \begin{center} \begin{tikzpicture} \node [anchor=south,draw,rounded corners,inner sep=2pt,minimum width=8em,minimum height=1.2em,fill=red!30!white,blur shadow={shadow xshift=1pt,shadow yshift=-1pt}] (h1) at (0,0) {\tiny{x (input)}}; \node [anchor=south,draw,rounded corners,inner sep=2pt,minimum width=8em,minimum height=1.2em,fill=green!30!white,blur shadow={shadow xshift=1pt,shadow yshift=-1pt}] (h2) at ([yshift=1.5em]h1.north) {\tiny{h1 = Relu(x * w1)}}; \node [anchor=south,draw,rounded corners,inner sep=2pt,minimum width=8em,minimum height=1.2em,fill=green!30!white,blur shadow={shadow xshift=1pt,shadow yshift=-1pt}] (h3) at ([yshift=1.5em]h2.north) {\tiny{h2 = Relu(h1 * w2)}}; \node [anchor=south,draw,rounded corners,inner sep=2pt,minimum width=8em,minimum height=1.2em,fill=green!30!white,blur shadow={shadow xshift=1pt,shadow yshift=-1pt}] (h4) at ([yshift=1.5em]h3.north) {\tiny{h3 = h2 + h1}}; \visible<1-3>{\draw [->,thick] (h1.north) -- (h2.south);} \visible<1-3>{\draw [->,thick] (h2.north) -- (h3.south);} \visible<1-2>{\draw [->,thick] (h3.north) -- (h4.south);} \visible<1-2>{\draw [->,thick,rounded corners] (h2.east) -- ([xshift=0.5em]h2.east) -- ([xshift=0.5em,yshift=0.5em]h3.north east) -- ([xshift=-2em,yshift=0.5em]h3.north east) -- ([xshift=-2em,yshift=1.5em]h3.north east);} \visible<4->{\draw [<-,very thick,red] (h1.north) -- (h2.south);} \visible<4->{\draw [<-,very thick,red] (h2.north) -- (h3.south);} \visible<3->{\draw [<-,very thick,red] (h3.north) -- (h4.south);} \visible<3->{\draw [<-,very thick,red,rounded corners] (h2.east) -- ([xshift=0.5em]h2.east) -- ([xshift=0.5em,yshift=0.5em]h3.north east) -- ([xshift=-2em,yshift=0.5em]h3.north east) -- ([xshift=-2em,yshift=1.5em]h3.north east);} \node [anchor=south,draw,rounded corners,inner sep=2pt,minimum width=8.0em,minimum height=1.2em,fill=red!30!white,blur shadow={shadow xshift=1pt,shadow yshift=-1pt}] (slayer) at ([yshift=1.5em]h4.north) {\tiny{h4 = Softmax(h3 * w4) (output)}}; \node [anchor=south] (losslabel) at (slayer.north) {\scriptsize{\textbf{Cross Entropy Loss}}}; \visible<1>{\draw [->,thick] (h4.north) -- (slayer.south);} \visible<2->{\draw [<-,very thick,red] (h4.north) -- (slayer.south);} \end{tikzpicture} \end{center} \end{tcolorbox} \end{frame} %%%------------------------------------------------------------------------------------------------------------ %%% �Զ��ֵ�ʵ�� \begin{frame}{����ʵ��} \begin{itemize} \item ���˵��ǣ����ڼ������е��������ѧϰ��ܶ�ʵ�����Զ��֣�һ���������Ը㶨 \end{itemize} \begin{tcolorbox} [bicolor,sidebyside,righthand width=3.5cm,size=title,frame engine=empty, colback=blue!10!white,colbacklower=black!5!white] {\scriptsize \begin{tabbing} \texttt{XTensor x, loss, gold, h[5], w[5], b[5];} \\ \texttt{...} \\ \texttt{} \\ \texttt{h[1] = Relu(MMul(x, w[1]) + b[1]);} \\ \texttt{h[2] = Relu(MMul(h[1], w[2]) + b[2]);} \\ \texttt{h[3] = HardTanH(h[2]);} \\ \texttt{h[4] = Softmax(MMul(h[3], w[3]));} \\ \texttt{loss = CrossEntropy(h[4], gold);} \\ \texttt{} \\ \texttt{XNet net;}\\ \alert{\texttt{net.Backward(loss);} //һ�д���ʵ���Զ���}\\ \texttt{} \\ \texttt{for(unsigned i = 0; i < 5; i++)\{} \\ \texttt{} \ \ \ \ ... // ͨ��\alert{\texttt{w[i].grad}}���ʲ������ݶ�\\ \texttt{\}} \end{tabbing} } \tcblower \begin{center} \begin{tikzpicture} \node [anchor=south,draw,rounded corners,inner sep=2pt,minimum width=8em,minimum height=1.0em,fill=red!30!white,blur shadow={shadow xshift=1pt,shadow yshift=-1pt}] (h1) at (0,0) {\tiny{x (input)}}; \node [anchor=south,draw,rounded corners,inner sep=2pt,minimum width=8em,minimum height=1.0em,fill=green!30!white,blur shadow={shadow xshift=1pt,shadow yshift=-1pt}] (h2) at ([yshift=1.0em]h1.north) {\tiny{h1 = Relu(x * w1 + b1)}}; \node [anchor=south,draw,rounded corners,inner sep=2pt,minimum width=8em,minimum height=1.0em,fill=green!30!white,blur shadow={shadow xshift=1pt,shadow yshift=-1pt}] (h3) at ([yshift=1.0em]h2.north) {\tiny{h2 = Relu(h1 * w2 + b2)}}; \node [anchor=south,draw,rounded corners,inner sep=2pt,minimum width=8em,minimum height=1.0em,fill=green!30!white,blur shadow={shadow xshift=1pt,shadow yshift=-1pt}] (h4) at ([yshift=1.0em]h3.north) {\tiny{h3 = HardTanh(h2)}}; \draw [->,thick] (h1.north) -- (h2.south); \draw [->,thick] (h2.north) -- (h3.south); \draw [->,thick] (h3.north) -- (h4.south); \node [anchor=south,draw,rounded corners,inner sep=2pt,minimum width=8.0em,minimum height=1.0em,fill=red!30!white,blur shadow={shadow xshift=1pt,shadow yshift=-1pt}] (slayer) at ([yshift=1.0em]h4.north) {\tiny{h4 = Softmax(h3 * w4) (output)}}; \node [anchor=south] (losslabel) at (slayer.north) {\scriptsize{\textbf{Cross Entropy Loss}}}; \draw [->,thick] (h4.north) -- (slayer.south); \end{tikzpicture} \end{center} \end{tcolorbox} \begin{itemize} \item ����������Զ���ʵ��Ҳ���Բο�TensorFlow�� PyTorch�ȹ��� \end{itemize} \end{frame} %%%------------------------------------------------------------------------------------------------------------ %%% ǰ�������̼�����ֵ�ù�ע������ \begin{frame}{ǰ����㼰��������} \begin{itemize} \item \alert{ǰ�����}ʵ���Ͼ������繹���Ĺ��̣������ֳ��÷�ʽ \begin{itemize} \item \textbf{��̬ͼ}(��PyTorch��NiuTensor)��д�꺯������ʽ��ǰ����㼴��ɣ����ڵ��� \item \textbf{��̬ͼ}(��TensorFlow)����������ʽ��ɺ����ܵõ�ǰ�����������Ҫ���Ե���һ��Forward���������Ǽ���ͼ���Խ�������Ż���ִ��Ч�ʽϸ� \end{itemize} \item<2-> ����һЩ���ѧϰϵͳʵ�ֵ����⣬ֵ�ù�ע��������Щ�������˱��γ̵ķ�Χ \begin{itemize} \item \textbf{�ֲ�ʽѵ��}�����ڸ���ģ�͵ĺ�������ѵ������Ҫ���ö���豸����������ͬʱѵ�� \item \textbf{�;��ȼ���}��Ϊ�����Ч�ʿ��Բ��ð뾫�Ȼ��߶��������м��� \item \textbf{ģ��ѹ��}���������࣬����ѹ��ģ�ͣ�ʹ��ģ�����ڴ洢ͬʱ���ϵͳ����Ч�� \item \textbf{ѵ�������ͳ���ѡ��}����ͬ����������Ҫ��ͬ��ѵ�����ԣ������������ã��Ӻܶ࣬��Ҫ���۾��� \end{itemize} \end{itemize} \end{frame} %%%------------------------------------------------------------------------------------------------------------ \section{������ģ��} %%%------------------------------------------------------------------------------------------------------------ %%% outline: neural language modeling \begin{frame}{��������} \vspace{6em} \begin{tcolorbox}[enhanced,size=normal,left=2mm,right=1mm,colback=red!5!white,colframe=red!75!black,drop fuzzy shadow] {\Large \textbf{��ν���Ԫ����Ӧ�õ�NLP��} \vspace{0.4em} \textbf{- ����ģ�͵������罨ģ} } \end{tcolorbox} \vspace{2em} \begin{center} \begin{tikzpicture} \end{tikzpicture} \end{center} \end{frame} %%%------------------------------------------------------------------------------------------------------------ \subsection{ǰ����ѭ������ע����������} %%%------------------------------------------------------------------------------------------------------------ %%% ��NLP���������ܸ�ʲô�� \begin{frame}{��Ȼ���Դ�������������} \begin{itemize} \item �����緽������Ȼ���Դ���(NLP)�������µ�˼· \end{itemize} \begin{tabular} {l | l} \textbf{��ͳ����ͳ�Ƶķ���} & \textbf{���ѧϰ����} \\ \hline ����\alert{��ɢ}�ռ�ı�ʾģ�� & ����\alert{����}�ռ�ı�ʾģ�� \\ NLP�����\alert{�����ṹ}���� & �������ṹ���裬\alert{�˵���}ѧϰ \\ \alert{��������}Ϊ�� & ����������������Ҫ\alert{�������} \\ �����������\alert{�洢����Դ} & ģ�ʹ洢���С����\alert{������} \end{tabular} \vspace{0em} \begin{itemize} \item<2-> ����ģ������Ҳ����ʹ�����ѧϰ����(Ч���dz���) \begin{itemize} \item ����ģ��Ҫ�ش���������������һ���ʴ��ĺû� \item ���Ի���һ�µڶ����ᵽ��$n$Ԫ�ģ�� \end{itemize} \vspace{0.5em} \begin{displaymath} \textbf{P}(w_0 w_1 ... w_m) = ? \end{displaymath} \end{itemize} \visible<3->{ \begin{tcolorbox}[enhanced,size=normal,left=2mm,right=1mm,colback=blue!5!white,colframe=blue!75!black,drop fuzzy shadow] {\Large \textbf{��ζԴʴ������ɸ��ʽ��н�ģ��} } \end{tcolorbox} } \end{frame} %%%------------------------------------------------------------------------------------------------------------ %%% n-gram����ģ�� \begin{frame}{$n$-gram����ģ��} \begin{itemize} \item \textbf{��ʽ����} \begin{eqnarray} \textrm{P}(w_1 w_2 ... w_m) & = & \textrm{P}(w_1) \textrm{P}(w_2|w_1) \textrm{P}(w_3 | w_1 w_2) ... \nonumber \\ & & \textrm{P}(w_m|w_1...w_{m-1}) \nonumber \end{eqnarray} \item<2-> \textbf{��ͳ$n$-gram����ģ��}����ǰ�ʽ�������ǰ��$n-1$���� \begin{eqnarray} \textrm{P}(w_1 w_2 ... w_m) & = & \textrm{P}(w_1) \textrm{P}(w_2|w_1) \textrm{P}(w_3 | w_1 w_2) ... \nonumber \\ & & \textrm{P}(w_m|\underbrace{w_{m-n+1}...w_{m-1}}_{\text{ǰ��$n-1$����}}) \nonumber \end{eqnarray} \vspace{-1.0em} \ \ \ \ \ \ ���� \begin{displaymath} \textrm{P}(w_m | w_{m-n+1} ... w_{m-1}) = \frac{\textrm{count}(w_{m-n+1}...w_{m})}{\textrm{count}(w_{m-n+1}...w_{m-1})} \end{displaymath} \ \ \ \ \ \ $\textrm{count}(\cdot)$��ʾ��ѵ��������ͳ�Ƶ�Ƶ�� \end{itemize} \end{frame} %%%------------------------------------------------------------------------------------------------------------ %%% n-gram lm => neural lm \begin{frame}{$n$-gram���ɸ��ʵ������罨ģ} \begin{itemize} \item ��ͳ��$n$-gram����ģ��ʵ���Ͼ���һ����ѯ������$w_{m-n+1} ... w_{m}$��ѯ$n$-gram����$\textrm{P}(w_m | w_{m-n+1} ... w_{m-1})$ \begin{itemize} \item ���ű���������һ��$w_{m-n+1} ... w_{m}$��\alert{��ɢ��ʾ} \item ����$n$������\alert{����ϡ��}�����dz����أ���Ϊ�������$n$-gram��û������ \item ��ΪҪά��$n$-gram���������洢���Ĵ� \end{itemize} \item<2-> ��һ��˼·��ֱ�Ӷ�$\textrm{P}(w_m | w_{m-n+1} ... w_{m-1})$���������ռ佨ģ�������庯��$g$�����������$w_{m-n+1} ... w_{m}$�� \begin{displaymath} g(w_{m-n+1} ... w_{m}) \approx \textrm{P}(w_m | w_{m-n+1} ... w_{m-1}) \end{displaymath} \item<3-> ��ߴ����Եķ�����ǰ��������(FNN)����ģ�� \begin{itemize} \item �����еľ��䣬���ִ�������ģ�͵���Ʋ�����ԶӰ�� \end{itemize} \textbf{A Neural Probabilistic Language Model}\\ \textbf{Bengio et al., 2003, Journal of Machine Learning Research 3: 1137-1155} \end{itemize} \end{frame} %%%------------------------------------------------------------------------------------------------------------ %%% FNNLM architecture \begin{frame}{ǰ������������ģ��(Bengio et al., 2003)} \begin{itemize} \item ��4-gram����ģ��Ϊ�� \end{itemize} \vspace{-1em} \begin{center} \begin{tikzpicture} \begin{scope} \node [anchor=west] (w0) at (0,0) {\footnotesize{$w_{i-3}$}}; \node [anchor=west] (w1) at ([xshift=2em]w0.east) {\footnotesize{$w_{i-2}$}}; \node [anchor=west] (w2) at ([xshift=2em]w1.east) {\footnotesize{$w_{i-1}$}}; \node [anchor=north] (index0) at ([yshift=0.5em]w0.south) {\tiny(index)}; \node [anchor=north] (index1) at ([yshift=0.5em]w1.south) {\tiny(index)}; \node [anchor=north] (index2) at ([yshift=0.5em]w2.south) {\tiny(index)}; \node [anchor=south,draw,inner sep=3pt] (e0) at ([yshift=1em]w0.north) {\tiny{$e_0=w_{i-3} \textbf{C}$}}; \node [anchor=south,draw,inner sep=3pt] (e1) at ([yshift=1em]w1.north) {\tiny{$e_1=w_{i-2} \textbf{C}$}}; \node [anchor=south,draw,inner sep=3pt] (e2) at ([yshift=1em]w2.north) {\tiny{$e_2=w_{i-1} \textbf{C}$}}; \node [anchor=south,draw,minimum width=9em,inner sep=3pt] (h0) at ([yshift=1.5em]e1.north) {\tiny{$h_0=\textrm{Tanh}([e_0,e_1,e_2] \textbf{H} + \textbf{d})$}}; \node [anchor=south,draw,minimum width=9em,inner sep=3pt] (h1) at ([yshift=1.5em]h0.north) {\tiny{$y=\textrm{Softmax}(h_0 \textbf{U})$}}; \node [anchor=south] (ylabel) at ([yshift=1em]h1.north) {\footnotesize{$\textrm{P}(w_i|w_{i-3}w_{i-2}w_{i-1})$}}; \draw [->] ([yshift=0.1em]w0.north) -- ([yshift=-0.1em]e0.south); \draw [->] ([yshift=0.1em]w1.north) -- ([yshift=-0.1em]e1.south); \draw [->] ([yshift=0.1em]w2.north) -- ([yshift=-0.1em]e2.south); \draw [->] ([yshift=0.1em]e0.north) -- ([xshift=-2em,yshift=-0.1em]h0.south); \draw [->] ([yshift=0.1em]e1.north) -- ([yshift=-0.1em]h0.south); \draw [->] ([yshift=0.1em]e2.north) -- ([xshift=2em,yshift=-0.1em]h0.south); \draw [->] ([yshift=0.1em]h0.north) -- ([yshift=-0.1em]h1.south); \draw [->] ([yshift=0.1em]h1.north) -- ([yshift=-0.1em]ylabel.south); \visible<6->{ \draw [->,dashed,red,thick] ([xshift=1em,yshift=0.1em]e1.north) -- ([xshift=1em,yshift=-0.1em]h1.south); \draw [->,dashed,red,thick] ([xshift=-1em,yshift=0.1em]e0.north) .. controls +(north:2) and +(south:1) .. ([xshift=-3em,yshift=-0.1em]h1.south); \draw [->,dashed,red,thick] ([xshift=1em,yshift=0.1em]e2.north) .. controls +(north:2) and +(south:1) .. ([xshift=3em,yshift=-0.1em]h1.south); } \begin{pgfonlayer}{background} \visible<2->{ \node [rectangle,inner sep=0.1em,fill=ugreen!20!white] [fit = (w0) (index0)] (wordbox0) {}; \node [rectangle,inner sep=0.1em,fill=ugreen!20!white] [fit = (w1) (index1)] (wordbox1) {}; \node [rectangle,inner sep=0.1em,fill=ugreen!20!white] [fit = (w2) (index2)] (wordbox2) {}; } \end{pgfonlayer} \visible<3->{ \node [anchor=south,draw,inner sep=3pt,fill=blue!20!white] (e0) at ([yshift=1em]w0.north) {\tiny{$e_0=w_{i-3} \textbf{C}$}}; \node [anchor=south,draw,inner sep=3pt,fill=blue!20!white] (e1) at ([yshift=1em]w1.north) {\tiny{$e_1=w_{i-2} \textbf{C}$}}; \node [anchor=south,draw,inner sep=3pt,fill=blue!20!white] (e2) at ([yshift=1em]w2.north) {\tiny{$e_2=w_{i-1} \textbf{C}$}}; } \visible<5->{ \node [anchor=south,draw,minimum width=9em,inner sep=3pt,fill=orange!20!white] (h0) at ([yshift=1.5em]e1.north) {\tiny{$h_0=\textrm{Tanh}([e_0,e_1,e_2] \textbf{H} + \textbf{d})$}}; \node [anchor=south,draw,minimum width=9em,inner sep=3pt,fill=orange!20!white] (h1) at ([yshift=1.5em]h0.north) {\tiny{$y=\textrm{Softmax}(h_0 \textbf{U})$}}; } \visible<2->{ \node [anchor=north west] (indexlabel0) at ([yshift=-0.5em,xshift=-1.2em]index0.south west) {\scriptsize{{\color{ugreen} \textbf{One-hot��ʾ}}}}; \node [anchor=north west] (indexlabel1) at ([yshift=0.3em]indexlabel0.south west) {\scriptsize{ÿ������һ���ʻ����С��0-1������ʾ��}}; \node [anchor=north west] (indexlabel2) at ([yshift=0.3em]indexlabel1.south west) {\scriptsize{��һλΪ1������Ϊ0�����磺}}; \node [anchor=north west] (indexlabel3) at ([yshift=0.0em]indexlabel2.south west) {\scriptsize{$(0,0,{\red 1},0,0,0,0,0,0,0,0,0)$}}; \node [anchor=north west] (indexlabel4) at ([xshift=1em,yshift=0.0em]indexlabel3.south west) {\scriptsize{�ʱ��е�3����}}; \draw [->] ([xshift=1.2em,yshift=-0.2em]indexlabel4.north west) -- ([xshift=1.2em,yshift=0.3em]indexlabel4.north west); } \visible<3->{ \node [anchor=west] (embedinglabel0) at ([xshift=1em,yshift=-1em]e2.east) {\scriptsize{{\blue \textbf{�ʵķֲ�ʽ��ʾ}}}}; \node [anchor=north west] (embedinglabel1) at ([yshift=0.3em]embedinglabel0.south west) {\scriptsize{�ʵ�0-1��ʾ��һ������$\textbf{C}$���������}}; \node [anchor=north west] (embedinglabel2) at ([yshift=0.3em]embedinglabel1.south west) {\scriptsize{��$\textbf{C}$����һ����ѯ��}}; } \visible<4->{ \node [anchor=north west] (wordvector) at ([yshift=-1em]embedinglabel2.south west) {\tiny{$(0,0,{\red 1},...)$}}; \node [anchor=west] (timeslabel) at ([xshift=-0.3em]wordvector.east) {\footnotesize{$\times$}}; \node [anchor=north west,inner sep=2pt] (embeddingmatrix) at ([xshift=1em]wordvector.north east) {\tiny{$\begin{pmatrix} 0 & 1 & 3 \\ .2 & -1 & .3 \\ 1 & 7 & .3 \\ ... \end{pmatrix}$}}; \node [anchor=south,inner sep=1pt] (wordvectorlabel) at (wordvector.north) {\scriptsize{$w_{i-1}$}}; \node [anchor=south,inner sep=1pt] (embeddingmatrixlabel) at (embeddingmatrix.north) {\scriptsize{$\textbf{C}$}}; \node [anchor=north west] (selectedlabel) at ([yshift=-2em]wordvector.south west) {\scriptsize{�ڰ�$\textbf{C}$���������������(i.e., $e_{i-1}$)}}; \begin{pgfonlayer}{background} \visible<4->{ \node [anchor=north west,fill=blue!20!white,minimum height=0.6em,minimum width=5.0em] (selected) at ([yshift=-1.3em]embeddingmatrix.north west) {}; } \end{pgfonlayer} \draw [->] ([xshift=0.15em,yshift=0.3em]wordvector.south) .. controls +(south:0.3) and +(west:0.5) .. (selected.west); } \visible<5->{ \node [anchor=south west] (hiddenlabel0) at ([yshift=5em]embedinglabel0.north west) {\scriptsize{{\color{orange} \textbf{���������}}}}; \node [anchor=north west] (hiddenlabel1) at ([yshift=0.3em]hiddenlabel0.south west) {\scriptsize{$[e_0,e_1,e_2]$��ʾ����������������һ��}}; \node [anchor=north west] (hiddenlabel2) at ([yshift=0.3em]hiddenlabel1.south west) {\scriptsize{֮���������磬���ͨ��Softmax���}}; \node [anchor=north west] (hiddenlabel3) at ([yshift=0.3em]hiddenlabel2.south west) {\scriptsize{ע�⣬$h_0\textbf{U}$�õ����дʵı�ʾ(����)��}}; \node [anchor=north west] (hiddenlabel4) at ([yshift=0.3em]hiddenlabel3.south west) {\scriptsize{Softmaxȷ������ʻ���ϵ�һ���ֲ�}}; } \visible<6->{ \node [anchor=south west] (directlabel0) at ([yshift=1em]hiddenlabel0.north west) {\scriptsize{\alert{\textbf{�ײ����ϲ��ֱ������(��ѡ)}}}}; } \end{scope} \end{tikzpicture} \end{center} \end{frame} %%%------------------------------------------------------------------------------------------------------------ %%% FNNLM implementation \begin{frame}{ǰ������������ģ��(FNN LM)��ʵ��} \begin{itemize} \item ʵ�ַdz������д��� \begin{itemize} \item ϸ��1����batchingʱ����$w[i]$������չ������������� \item ϸ��2��TanHһ�����HardTanHʵ�֣���ΪTanH������� \end{itemize} \end{itemize} \begin{tcolorbox} [bicolor,sidebyside,righthand width=3.8cm,size=title,frame engine=empty, colback=blue!10!white,colbacklower=black!5!white] {\scriptsize \begin{tabbing} \texttt{XTensor w[3], e[3], h0, y;} \\ \texttt{XTensor C, H, d, U;} \\ \texttt{...}\\ \texttt{} \\ \texttt{for(unsigned i = 0; i < 3; i++)} \\ \texttt{\ \ \ \ e[i] = MMul(w[i], C);}\\ \texttt{e01 = Concatenate(e[0], e[1], -1);}\\ \texttt{e = Concatenate(e01, e[2], -1);}\\ \texttt{} \\ \texttt{h0 = TanH(MMul(e, H) + d);}\\ \texttt{y = Softmax(MMul(h0, U));}\\ \texttt{} \\ \texttt{for(unsigned k = 0; k < size; k++)\{} \\ \texttt{} \ \ \ \ ... // \alert{\texttt{y}}�ĵ�$k$Ԫ�ر�ʾ $\textrm{P}(w|...)$\\ \texttt{} \ \ \ \ ... // $w$Ϊ�ʻ�����$k$����\\ \texttt{\}} \end{tabbing} } \tcblower \begin{center} \begin{tikzpicture} \begin{scope} \node [anchor=west] (w0) at (0,0) {\scriptsize{$w_{i-3}$}}; \node [anchor=west] (w1) at ([xshift=0.5em]w0.east) {\scriptsize{$w_{i-2}$}}; \node [anchor=west] (w2) at ([xshift=0.5em]w1.east) {\scriptsize{$w_{i-1}$}}; \node [anchor=north] (index0) at ([yshift=0.5em]w0.south) {\tiny(index)}; \node [anchor=north] (index1) at ([yshift=0.5em]w1.south) {\tiny(index)}; \node [anchor=north] (index2) at ([yshift=0.5em]w2.south) {\tiny(index)}; \node [anchor=south,draw,inner sep=3pt,align=left] (e0) at ([yshift=1.0em]w0.north) {\tiny{$e_0:$}\\\tiny{$w_{i-3} \textbf{C}$}}; \node [anchor=south,draw,inner sep=3pt,align=left] (e1) at ([yshift=1.0em]w1.north) {\tiny{$e_1:$}\\\tiny{$w_{i-2} \textbf{C}$}}; \node [anchor=south,draw,inner sep=3pt,align=left] (e2) at ([yshift=1.0em]w2.north) {\tiny{$e_2:$}\\\tiny{$w_{i-1} \textbf{C}$}}; \node [anchor=south,draw,minimum width=9em,inner sep=3pt] (h0) at ([yshift=1.5em]e1.north) {\tiny{$h_0=\textrm{Tanh}([e_0,e_1,e_2] \textbf{H} + \textbf{d})$}}; \node [anchor=south,draw,minimum width=9em,inner sep=3pt] (h1) at ([yshift=1.5em]h0.north) {\tiny{$y=\textrm{Softmax}(h_0 \textbf{U})$}}; \node [anchor=south] (ylabel) at ([yshift=1em]h1.north) {\scriptsize{$\textrm{P}(w_i|w_{i-3}w_{i-2}w_{i-1})$}}; \draw [->] ([yshift=0.1em]w0.north) -- ([yshift=-0.1em]e0.south); \draw [->] ([yshift=0.1em]w1.north) -- ([yshift=-0.1em]e1.south); \draw [->] ([yshift=0.1em]w2.north) -- ([yshift=-0.1em]e2.south); \draw [->] ([yshift=0.1em]e0.north) -- ([xshift=-2em,yshift=-0.1em]h0.south); \draw [->] ([yshift=0.1em]e1.north) -- ([yshift=-0.1em]h0.south); \draw [->] ([yshift=0.1em]e2.north) -- ([xshift=2em,yshift=-0.1em]h0.south); \draw [->] ([yshift=0.1em]h0.north) -- ([yshift=-0.1em]h1.south); \draw [->] ([yshift=0.1em]h1.north) -- ([yshift=-0.1em]ylabel.south); \end{scope} \end{tikzpicture} \end{center} \end{tcolorbox} \vspace{-0.5em} \footnotesize{ע: size��ʾ�ʻ����С} \end{frame} %%%------------------------------------------------------------------------------------------------------------ %%% ������ģ�����Ǵ�����ʲô \begin{frame}{�����Խ�ģ������} \begin{itemize} \item Bengio el al. (2003)���д����۵����� \begin{enumerate} \item ������ÿһ�㾿��ѧ����ʲô \\ �ʻ㡢�䷨����������һЩ֪ʶ����ν��ͣ� \item ����IJ����������� - 10�㡢20�㡢100������� \\ \# of layers: 10 $\to$ 20 $\to$ 100 $\to$ 1000 \item ����(�������ز��С)���ѡ�� - ��ͬ�������������\\ ���ʵķֲ�ʽ��ʾά�ȶ��ã�\\ ������ã�\\ ��������ѡ��\\ ... \end{enumerate} \item<2-> ��FNN LM�õ������� \begin{itemize} \item ���¶������ʲô - �Ǵʵ����һ�����һ��ʵ������ \item �����������Ժܺõı�ʾ����֮���(�̾���)���� \item $n$-gram�����ɸ��ʿ���ʹ�������ռ亯����������������ϡ�����⣬ģ�Ͳ�����Ҫ��¼������$n$-gram \end{itemize} \end{itemize} \end{frame} %%%------------------------------------------------------------------------------------------------------------ %%% ѭ�������� \begin{frame}{ѭ��������(Recurrent Neural Networks)} \begin{itemize} \item FNN LM��Ȼ��Ч�����Ǻʹ�ͳ��$n$-gram LMһ������Ҫ����\alert{����������}���� \begin{center} \begin{tikzpicture} \begin{scope} \node [anchor=west] (w0) at (0,0) {$w_1$}; \node [anchor=west] (w1) at ([xshift=0.5em]w0.east) {$w_2$}; \node [anchor=west] (w2) at ([xshift=0.5em]w1.east) {$...$}; \node [anchor=west] (w3) at ([xshift=0.5em]w2.east) {$w_{m-n+1}$}; \node [anchor=west] (w4) at ([xshift=0.5em]w3.east) {$...$}; \node [anchor=west,fill=green!20!white] (w5) at ([xshift=0.5em]w4.east) {$w_{m}$}; \draw [->,thick,ublue] (w5.south).. controls +(210:0.5) and +(-30:0.5) .. (w3.south); \draw [->,thick,red] (w5.north).. controls +(150:1) and +(30:1) .. (w1.north); \draw [->,very thick,ublue] ([xshift=-5em,yshift=1em]w0.west) -- ([xshift=-6.5em,yshift=1em]w0.west) node [pos=0,right] {\scriptsize{����}}; \draw [->,very thick,red] ([xshift=-5em,yshift=-0.5em]w0.west) -- ([xshift=-6.5em,yshift=-0.5em]w0.west) node [pos=0,right] {\scriptsize{������}}; \end{scope} \end{tikzpicture} \end{center} \item<2-> �ܷ�ֱ�Ӷ�ԭʼ���⽨ģ�������庯��$g$�����������$w_{1} ... w_{m}$�� \vspace{-0.5em} \begin{displaymath} g(w_{1} ... w_{m}) \approx \textrm{P}(w_m | w_{1} ... w_{m-1}) \end{displaymath} \item<3-> \textbf{ѭ��������(RNNs)}���ԺܺõĽ���������⣬���Ҳ���ɹ���Ӧ�������Խ�ģ���� \begin{itemize} \item ������ÿ���ʵ����ɶ������Ѿ����ɵ����д� \item ���ڲ�ͬλ�õĴʵ����ɸ��ʶ�������ͬһ���������� \end{itemize} \textbf{Recurrent Neural Network Based Language Model}\\ \textbf{Mikolov et al., 2010, In Proc. of Interspeech, 1045-1048} \end{itemize} \end{frame} %%%------------------------------------------------------------------------------------------------------------ %%% ѭ��������Ľṹ \begin{frame}{ѭ����Ԫ} \begin{itemize} \item ����������$(\textbf{x}_0,\textbf{x}_1,...,\textbf{x}_t,...)$������$\textbf{x}_t$��ʾ�����е�$t$��Ԫ�أ�Ҳ������\alert{ʱ��$t$}���롣������Ӧ�����������$(\textbf{y}_0,\textbf{y}_1,...,\textbf{y}_t,...)$�� ��ѭ���������У�ÿ��ʱ�̵������������ͬһ��\alert{ѭ����Ԫ}��������\visible<2->{��������ģ�ͣ�һ�ּĽṹ��} \visible<2->{ {\small \begin{tcolorbox} [bicolor,sidebyside,righthand width=4.3cm,size=title,frame engine=empty, colback=blue!10!white,colbacklower=black!5!white] \begin{eqnarray} \textbf{y}_t & = & \textrm{Softmax}(\textbf{h}_t \textbf{V}) \nonumber \\ \textbf{h}_t & = & \textrm{TanH}(\textbf{x}_t \textbf{U} + \textbf{h}_{t-1} \textbf{W}) \nonumber \end{eqnarray} \footnotesize{$\textbf{h}_t$: $t$ʱ�̵�����״̬\\ $\textbf{h}_{t-1}$: $t-1$ʱ�̵�����״̬\\ $\textbf{V}, \textbf{U}, \textbf{W}$: ���� } \tcblower \begin{center} \begin{tikzpicture} \begin{scope} \node [anchor=west,inner sep=3pt,minimum width=8em] (h) at (0,0) {\tiny{$\textbf{h}_t = \textrm{TanH}(\textbf{x}_t \textbf{U} + \textbf{h}_{t-1} \textbf{W})$}}; \node [anchor=south west,inner sep=3pt] (r) at ([yshift=-0.2em]h.north west) {\tiny{ѭ����Ԫ:}}; \begin{pgfonlayer}{background} \node [rectangle,draw,inner sep=0em,fill=green!20!white] [fit = (r) (h)] (rbox) {}; \end{pgfonlayer} \node [anchor=south,draw,minimum width=8em,fill=green!20!white] (y) at ([yshift=1.5em]rbox.north) {\tiny{$\textbf{y}_t = \textrm{Softmax}(\textbf{h}_t \textbf{V})$}}; \node [anchor=south,inner sep=2pt] (output) at ([yshift=1em]y.north) {\scriptsize{$\textbf{y}_t$}}; \node [anchor=north,inner sep=2pt] (input) at ([yshift=-1em]h.south) {\scriptsize{$\textbf{x}_t$}}; \draw [->,thick] (input.north) -- ([yshift=-0.1em]rbox.south); \draw [->,thick] ([yshift=0.1em]rbox.north) -- ([yshift=-0.1em]y.south) node [pos=0.5,left] {\tiny{$\textbf{h}_t$}}; \draw [->,thick] ([yshift=0.1em]y.north) -- (output.south); \draw [->,thick] ([xshift=0.1em]rbox.east) -- ([xshift=1em]rbox.east) node [pos=1,above] {\tiny{$\textbf{h}_t$}}; \draw [->,thick] ([xshift=-1em]rbox.west) -- ([xshift=-0.1em]rbox.west) node [pos=0,above] {\tiny{$\textbf{h}_{t-1}$}}; \end{scope} \end{tikzpicture} \end{center} \end{tcolorbox} } } \item<3-> \textbf{�������ѭ����}$t$ʱ�̵�״̬��$t-1$ʱ��״̬�ĺ�����������̿��Բ��ϱ�ִ�� \end{itemize} \end{frame} %%%------------------------------------------------------------------------------------------------------------ %%% ѭ��������ġ����䡱 \begin{frame}{ѭ���������``����''} \begin{itemize} \item ѭ����������Լ������ⳤ�ȵ���ʷ����˿��Էdz��ʺϴ��������������У�������Ȼ���Ծ��� \begin{itemize} \item ע�⣺$\textbf{h}_{t-1}$���Ա����ݵ�����״̬ \end{itemize} \end{itemize} \vspace{-1em} \begin{eqnarray} \textbf{h}_t & = & \textrm{TanH}(\textbf{x}_t \textbf{U} + \alert{\textbf{h}_{t-1}} \textbf{W}) \nonumber \\ \visible<2->{ \textbf{h}_{t+1} & = & \textrm{TanH}(\textbf{x}_{t+1} \textbf{U} + \textbf{h}_{t} \textbf{W}) \nonumber \\ & = & \textrm{TanH}(\textbf{x}_{t+1} \textbf{U} + \textrm{TanH}(\textbf{x}_t \textbf{U} + \alert{\textbf{h}_{t-1}} \textbf{W}) \textbf{W}) \nonumber \\ } \visible<3->{ \textbf{h}_{t+2} & = & \textrm{TanH}(\textbf{x}_{t+2} \textbf{U} + \textbf{h}_{t+1} \textbf{W}) \nonumber \\ & = & \textrm{TanH}(\textbf{x}_{t+2} \textbf{U} + \nonumber \\ & & \textrm{TanH}(\textbf{x}_{t+1} \textbf{U} + \textrm{TanH}(\textbf{x}_t \textbf{U} + \alert{\textbf{h}_{t-1}} \textbf{W}) \textbf{W}) \textbf{W}) \nonumber } \end{eqnarray} \vspace{-1em} \begin{center} \begin{tikzpicture} \begin{scope} \tikzstyle{rnnnode} = [draw,inner sep=5pt,fill=green!30!white,blur shadow={shadow xshift=1pt,shadow yshift=-1pt}] \node [anchor=west,rnnnode] (node1) at (0,0) {\scriptsize{RNN Cell}}; \visible<2->{ \node [anchor=west,rnnnode] (node2) at ([xshift=4.5em]node1.east) {\scriptsize{RNN Cell}}; } \visible<3->{ \node [anchor=west,rnnnode] (node3) at ([xshift=4.5em]node2.east) {\scriptsize{RNN Cell}}; } \node [anchor=north] (x1) at ([yshift=-1em]node1.south) {\footnotesize{$\textbf{x}_{t}$}}; \visible<2->{ \node [anchor=north] (x2) at ([yshift=-1em]node2.south) {\footnotesize{$\textbf{x}_{t+1}$}}; } \visible<3->{ \node [anchor=north] (x3) at ([yshift=-1em]node3.south) {\footnotesize{$\textbf{x}_{t+2}$}}; } \node [anchor=south] (h1) at ([yshift=1em]node1.north) {\footnotesize{$\textbf{h}_{t}$}}; \visible<2->{ \node [anchor=south] (h2) at ([yshift=1em]node2.north) {\footnotesize{$\textbf{h}_{t+1}$}}; } \visible<3->{ \node [anchor=south] (h3) at ([yshift=1em]node3.north) {\footnotesize{$\textbf{h}_{t+2}$}}; } \draw [->,thick] ([xshift=-1.0em]node1.west)--([xshift=-0.1em]node1.west) node [pos=0,left] {\scriptsize{$\alert{\textbf{h}_{t-1}}$}}; \visible<3->{ \draw [->,thick] ([xshift=0.1em]node3.east)--([xshift=1.0em]node3.east) node [pos=1,right] {\scriptsize{$\textbf{h}_{t+2}$}}; } \draw [->,thick] ([xshift=0.1em]node1.east)--([xshift=-0.1em]node2.west) node [pos=0.5,above] {\tiny{$\textbf{h}_{t}(\alert{\textbf{h}_{t-1}})$}}; \visible<2->{ \draw [->,thick] ([xshift=0.1em]node2.east)--([xshift=-0.1em]node3.west) node [pos=0.5,above] {\tiny{$\textbf{h}_{t+1}(\textbf{h}_{t}(\alert{\textbf{h}_{t-1}}))$}}; } \draw [->,thick] (x1.north)--([yshift=-0.1em]node1.south); \visible<2->{ \draw [->,thick] (x2.north)--([yshift=-0.1em]node2.south); } \visible<3->{ \draw [->,thick] (x3.north)--([yshift=-0.1em]node3.south); } \draw [->,thick] ([yshift=0.1em]node1.north)--(h1.south); \visible<2->{ \draw [->,thick] ([yshift=0.1em]node2.north)--(h2.south); } \visible<3->{ \draw [->,thick] ([yshift=0.1em]node3.north)--(h3.south); } \end{scope} \end{tikzpicture} \end{center} \end{frame} %%%------------------------------------------------------------------------------------------------------------ %%% ����ѭ�������������ģ�� \begin{frame}{����ѭ�������������ģ��(RNN LM)} \begin{itemize} \item ѭ����������Ա�ֱ����������ģ�� \begin{itemize} \item<2-> ��FNN LM���ƣ����ȰѴʴ�one-hot��ʾת���ɷֲ�ʽ��ʾ \item<3-> $t$ʱ��Ԥ��$\textrm{P}(x_{t+1}|x_1...x_{t})$ \item<4-> ���Ե��Ӹ���IJ� \end{itemize} \end{itemize} \visible<2->{ \begin{center} \begin{tikzpicture} \begin{scope} \tikzstyle{rnnnode} = [draw,inner sep=5pt,minimum width=4em,minimum height=1.5em,fill=green!30!white,blur shadow={shadow xshift=1pt,shadow yshift=-1pt}] \visible<3->{ \node [anchor=west,rnnnode] (node11) at (0,0) {\scriptsize{RNN Cell}}; \node [anchor=west,rnnnode] (node12) at ([xshift=2em]node11.east) {\scriptsize{RNN Cell}}; \node [anchor=west,rnnnode] (node13) at ([xshift=2em]node12.east) {\scriptsize{RNN Cell}}; \node [anchor=west,rnnnode] (node14) at ([xshift=2em]node13.east) {\scriptsize{RNN Cell}}; } \node [anchor=north,rnnnode,fill=red!30!white] (e1) at ([yshift=-1.2em]node11.south) {\tiny{$e_1=w_1\textbf{C}$}}; \node [anchor=north,rnnnode,fill=red!30!white] (e2) at ([yshift=-1.2em]node12.south) {\tiny{$e_2=w_2\textbf{C}$}}; \node [anchor=north,rnnnode,fill=red!30!white] (e3) at ([yshift=-1.2em]node13.south) {\tiny{$e_3=w_3\textbf{C}$}}; \node [anchor=north,rnnnode,fill=red!30!white] (e4) at ([yshift=-1.2em]node14.south) {\tiny{$e_4=w_4\textbf{C}$}}; \node [anchor=north] (w1) at ([yshift=-1em]e1.south) {\footnotesize{$w_1$}}; \node [anchor=north] (w2) at ([yshift=-1em]e2.south) {\footnotesize{$w_2$}}; \node [anchor=north] (w3) at ([yshift=-1em]e3.south) {\footnotesize{$w_3$}}; \node [anchor=north] (w4) at ([yshift=-1em]e4.south) {\footnotesize{$w_4$}}; \draw [->,thick] ([yshift=0.1em]w1.north)--([yshift=-0.1em]e1.south); \draw [->,thick] ([yshift=0.1em]w2.north)--([yshift=-0.1em]e2.south); \draw [->,thick] ([yshift=0.1em]w3.north)--([yshift=-0.1em]e3.south); \draw [->,thick] ([yshift=0.1em]w4.north)--([yshift=-0.1em]e4.south); \draw [->,thick] ([yshift=0.1em]e1.north)--([yshift=-0.1em]node11.south); \draw [->,thick] ([yshift=0.1em]e2.north)--([yshift=-0.1em]node12.south); \draw [->,thick] ([yshift=0.1em]e3.north)--([yshift=-0.1em]node13.south); \draw [->,thick] ([yshift=0.1em]e4.north)--([yshift=-0.1em]node14.south); \visible<4->{ \node [anchor=south,rnnnode] (node21) at ([yshift=1.5em]node11.north) {\scriptsize{RNN Cell}}; \node [anchor=south,rnnnode] (node22) at ([yshift=1.5em]node12.north) {\scriptsize{RNN Cell}}; \node [anchor=south,rnnnode] (node23) at ([yshift=1.5em]node13.north) {\scriptsize{RNN Cell}}; \node [anchor=south,rnnnode] (node24) at ([yshift=1.5em]node14.north) {\scriptsize{RNN Cell}}; \node [anchor=south,rnnnode,fill=blue!30!white] (node31) at ([yshift=1.5em]node21.north) {\scriptsize{Softmax($\cdot$)}}; \node [anchor=south,rnnnode,fill=blue!30!white] (node32) at ([yshift=1.5em]node22.north) {\scriptsize{Softmax($\cdot$)}}; \node [anchor=south,rnnnode,fill=blue!30!white] (node33) at ([yshift=1.5em]node23.north) {\scriptsize{Softmax($\cdot$)}}; \node [anchor=south,rnnnode,fill=blue!30!white] (node34) at ([yshift=1.5em]node24.north) {\scriptsize{Softmax($\cdot$)}}; } \visible<3>{ \node [anchor=south,rnnnode,fill=blue!30!white] (node21) at ([yshift=1.5em]node11.north) {\scriptsize{Softmax($\cdot$)}}; \node [anchor=south,rnnnode,fill=blue!30!white] (node22) at ([yshift=1.5em]node12.north) {\scriptsize{Softmax($\cdot$)}}; \node [anchor=south,rnnnode,fill=blue!30!white] (node23) at ([yshift=1.5em]node13.north) {\scriptsize{Softmax($\cdot$)}}; \node [anchor=south,rnnnode,fill=blue!30!white] (node24) at ([yshift=1.5em]node14.north) {\scriptsize{Softmax($\cdot$)}}; \draw [->,thick] ([yshift=0.1em]node21.north)--([yshift=-0.1em]node31.south) node[pos=1,above] {\scriptsize{$\textrm{P}(w_2)$}}; \draw [->,thick] ([yshift=0.1em]node22.north)--([yshift=-0.1em]node32.south) node[pos=1,above] {\scriptsize{$\textrm{P}(w_3|w_2)$}}; \draw [->,thick] ([yshift=0.1em]node23.north)--([yshift=-0.1em]node33.south) node[pos=1,above] {\scriptsize{$\textrm{P}(w_4|w_2 w_3)$}}; \draw [->,thick] ([yshift=0.1em]node24.north)--([yshift=-0.1em]node34.south) node[pos=1,above] {\scriptsize{$\textrm{P}(w_5|w_2 w_3 w_4)$}}; } \visible<4->{ \draw [->,thick] ([yshift=0.1em]node31.north)--([yshift=1em]node31.north) node[pos=1,above] {\scriptsize{$\textrm{P}(w_2)$}}; \draw [->,thick] ([yshift=0.1em]node32.north)--([yshift=1em]node32.north) node[pos=1,above] {\scriptsize{$\textrm{P}(w_3|w_2)$}}; \draw [->,thick] ([yshift=0.1em]node33.north)--([yshift=1em]node33.north) node[pos=1,above] {\scriptsize{$\textrm{P}(w_4|w_2 w_3)$}}; \draw [->,thick] ([yshift=0.1em]node34.north)--([yshift=1em]node34.north) node[pos=1,above] {\scriptsize{$\textrm{P}(w_5|w_2 w_3 w_4)$}}; \draw [->,thick] ([yshift=0.1em]node21.north)--([yshift=-0.1em]node31.south); \draw [->,thick] ([yshift=0.1em]node22.north)--([yshift=-0.1em]node32.south); \draw [->,thick] ([yshift=0.1em]node23.north)--([yshift=-0.1em]node33.south); \draw [->,thick] ([yshift=0.1em]node24.north)--([yshift=-0.1em]node34.south); \draw [->,thick] ([xshift=-1em]node21.west)--([xshift=-0.1em]node21.west); \draw [->,thick] ([xshift=0.1em]node21.east)--([xshift=-0.1em]node22.west); \draw [->,thick] ([xshift=0.1em]node22.east)--([xshift=-0.1em]node23.west); \draw [->,thick] ([xshift=0.1em]node23.east)--([xshift=-0.1em]node24.west); \draw [->,thick] ([xshift=0.1em]node24.east)--([xshift=1em]node24.east); } \visible<3->{ \draw [->,thick] ([yshift=0.1em]node11.north)--([yshift=-0.1em]node21.south); \draw [->,thick] ([yshift=0.1em]node12.north)--([yshift=-0.1em]node22.south); \draw [->,thick] ([yshift=0.1em]node13.north)--([yshift=-0.1em]node23.south); \draw [->,thick] ([yshift=0.1em]node14.north)--([yshift=-0.1em]node24.south); \draw [->,thick] ([xshift=-1em]node11.west)--([xshift=-0.1em]node11.west); \draw [->,thick] ([xshift=0.1em]node11.east)--([xshift=-0.1em]node12.west); \draw [->,thick] ([xshift=0.1em]node12.east)--([xshift=-0.1em]node13.west); \draw [->,thick] ([xshift=0.1em]node13.east)--([xshift=-0.1em]node14.west); \draw [->,thick] ([xshift=0.1em]node14.east)--([xshift=1em]node14.east); } \end{scope} \end{tikzpicture} \end{center} } \end{frame} %%%------------------------------------------------------------------------------------------------------------ %%% ѭ����Ԫ����ơ��ݶ���ʧ��ѵ�������� \begin{frame}{��һ��������} \begin{itemize} \item \textbf{ѭ����Ԫ���}��ѭ����Ԫ����һ�������������ǰʱ�̵��������һʱ�̵�״̬�����ɵ�ǰʱ�̵�״̬ \begin{displaymath} \textbf{h}_t = g(\textbf{x}_t, \textbf{h}_{t-1}; \theta) \end{displaymath} �ܶ��ַ�ʽ���$g(\cdot)$����������LSTM��GRU�� \item<2-> \textbf{�ݶ���ʧ/��ը}���������б䳤���ڷ���ʱѭ����������������ľֲ��ݶ���˼��㣬��ᵼ��\alert{�ݶ���ʧ/��ը����} \begin{displaymath} \underbrace{0.2 \times 0.3 \times ... \times 0.2 \times 0.1}_{\text{100��}} \approx 0 \end{displaymath} \vspace{-0.8em} \begin{itemize} \item ���Կ����ݶȲü��������ݶȵĴ�С \item Ҳ��������short-cut connection����в����� \end{itemize} \item<2-> \textbf{ѵ��}�������Զ��֣��ⲻ�Ǹ������� :) \end{itemize} \end{frame} %%%------------------------------------------------------------------------------------------------------------ %%% ��ע�������� \begin{frame}{��ע��������(Self-Attention)} \begin{itemize} \item RNN LMЧ���ܺã����ǵ����й���,�ʻ�֮����Ϣ����·�����������׳����ݶ���ʧ���ݶȱ�ը�����⡣ \vspace{0.5em} \begin{center} \begin{tikzpicture} \begin{scope} \node [anchor=west] (w0) at (0,0) {$w_1$}; \node [anchor=west] (w1) at ([xshift=0.5em]w0.east) {$w_2$}; \node [anchor=west] (w2) at ([xshift=0.5em]w1.east) {$w_3$}; \node [anchor=west] (w3) at ([xshift=0.5em]w2.east) {$...$}; \node [anchor=west] (w4) at ([xshift=0.5em]w3.east) {$w_{m-1}$}; \node [anchor=west,fill=green!20!white] (w5) at ([xshift=0.5em]w4.east) {$w_{m}$}; \draw [->,thick,red] (w1.north).. controls +(130:0.5) and +(50:0.5) .. (w0.north); \draw [->,thick,red] (w2.north).. controls +(130:0.5) and +(50:0.5) .. (w1.north); \draw [->,thick,red] ([yshift=0.2em]w3.north).. controls +(130:0.5) and +(50:0.5) .. (w2.north); \draw [->,thick,red] (w4.north).. controls +(130:0.5) and +(50:0.5) .. ([yshift=0.2em]w3.north); \draw [->,thick,red] (w5.north).. controls +(130:0.5) and +(50:0.5) .. (w4.north); \draw [->,very thick,red] ([xshift=-5em]w0.west) -- ([xshift=-6.5em]w0.west) node [pos=0,right] {\scriptsize{��Ϣ����}}; \end{scope} \end{tikzpicture} \end{center} \item<2-> �ܷ�ͬλ��֮��Ĵʻ����Ϣ���ݵľ�������Ϊ1�� \begin{center} \begin{tikzpicture} \begin{scope} \node [anchor=west] (w0) at (0,-2) {$w_1$}; \node [anchor=west] (w1) at ([xshift=0.5em]w0.east) {$w_2$}; \node [anchor=west] (w2) at ([xshift=0.5em]w1.east) {$w_3$}; \node [anchor=west] (w3) at ([xshift=0.5em]w2.east) {$...$}; \node [anchor=west] (w4) at ([xshift=0.5em]w3.east) {$w_{m-1}$}; \node [anchor=west,fill=green!20!white] (w5) at ([xshift=0.5em]w4.east) {$w_{m}$}; \draw [->,thick,red] (w5.north).. controls +(100:0.8) and +(50:0.8) .. (w0.north); \draw [->,thick,red] (w5.north).. controls +(110:0.7) and +(50:0.7) .. (w1.north); \draw [->,thick,red] (w5.north).. controls +(120:0.6) and +(50:0.6) .. ([yshift=0.2em]w3.north); \draw [->,thick,red] (w5.north).. controls +(130:0.5) and +(50:0.5) .. (w4.north); \draw [->,very thick,red] ([xshift=-5em]w0.west) -- ([xshift=-6.5em]w0.west) node [pos=0,right] {\scriptsize{��Ϣ����}}; \end{scope} \end{tikzpicture} \end{center} \item<3-> \textbf{��ע��������(Self-Attention)}���ԺܺõĽ���������������⣬�ڳ��������Խ�ģ����ȡ���˺ܺõ�Ч�� \begin{itemize} \item ����ֵı�ʾ���в�ͬλ��֮��ĸ��ӹ�ϵ \item ����ѵ�������Ч�� \end{itemize} \textbf{Attention Is All You Need}\\ \textbf{Vaswani et al., 2017, In Proc. of Neural Information Processing Systems, 6000-6010} \end{itemize} \end{frame} %%%------------------------------------------------------------------------------------------------------------ %%% Transformer architecture \begin{frame}{Transformer����ģ��(Vaswani et al., 2017)} \begin{itemize} \item һ�������� \end{itemize} \vspace{-2em} \begin{center} \begin{tikzpicture} \begin{scope} \node [anchor=west] (w0) at (0,0) {\footnotesize{$w_{0}$}}; \node [anchor=west] (w1) at ([xshift=4em]w0.east) {\footnotesize{$w_{1}$}}; \node [anchor=west] (w2) at ([xshift=4em]w1.east) {\footnotesize{$w_{2}$}}; \node [anchor=west] (w3) at ([xshift=4em]w2.east) {\footnotesize{$w_{3}$}}; \node [anchor=north] (index0) at ([yshift=0.5em]w0.south) {\tiny(index)}; \node [anchor=north] (index1) at ([yshift=0.5em]w1.south) {\tiny(index)}; \node [anchor=north] (index2) at ([yshift=0.5em]w2.south) {\tiny(index)}; \node [anchor=north] (index3) at ([yshift=0.5em]w3.south) {\tiny(index)}; \node [anchor=south,draw,inner sep=3pt] (e0) at ([yshift=1em]w0.north) {\tiny{$e_0=w_{0} \textbf{C}$}}; \node [anchor=south,draw,inner sep=3pt] (e1) at ([yshift=1em]w1.north) {\tiny{$e_1=w_{1} \textbf{C}$}}; \node [anchor=south,draw,inner sep=3pt] (e2) at ([yshift=1em]w2.north) {\tiny{$e_2=w_{2} \textbf{C}$}}; \node [anchor=south,draw,inner sep=3pt] (e3) at ([yshift=1em]w3.north) {\tiny{$e_3=w_{3} \textbf{C}$}}; \node [anchor=south,draw,inner sep=3pt] (h0) at ([xshift=-0.5em, yshift=1.5em]e0.north) {\tiny{$h_{0}=\textrm{SelfAtt}(e_0,e_3)$}}; \node [anchor=south,draw,inner sep=3pt] (h1) at ([xshift=0.5em, yshift=1.5em]e1.north) {\tiny{$h_{1}=\textrm{SelfAtt}(e_1,e_3)$}}; \node [anchor=south,draw,inner sep=3pt] (h2) at ([xshift=1.5em, yshift=1.5em]e2.north) {\tiny{$h_{2}=\textrm{SelfAtt}(e_2,e_3)$}}; \node [anchor=south,draw,minimum width=9em,inner sep=3pt] (f1) at ([xshift=0.5em, yshift=1.5em]h2.north) {\tiny{$f_3=\textrm{FNN}([h_0,h_1,h_2,e_3])$}}; \node [anchor=south,draw,minimum width=9em,inner sep=3pt] (o1) at ([yshift=1em]f1.north) {\tiny{$y=\textrm{Softmax}(f_3 \textbf{U})$}}; \node [anchor=south] (ylabel) at ([yshift=1em]o1.north) {\footnotesize{$\textrm{P}(w_4|w_{0}w_{1}w_{2}w_{3})$}}; \draw [->] ([yshift=0.1em]w0.north) -- ([yshift=-0.1em]e0.south); \draw [->] ([yshift=0.1em]w1.north) -- ([yshift=-0.1em]e1.south); \draw [->] ([yshift=0.1em]w2.north) -- ([yshift=-0.1em]e2.south); \draw [->] ([yshift=0.1em]w3.north) -- ([yshift=-0.1em]e3.south); \draw [->] ([yshift=0.1em]e0.north) -- ([xshift=0em,yshift=-0.1em]h0.south); \draw [->] ([yshift=0.1em]e1.north) -- ([xshift=-0.5em,yshift=-0.1em]h1.south); \draw [->] ([yshift=0.1em]e2.north) -- ([xshift=-1em,yshift=-0.1em]h2.south); \draw [->] ([yshift=0.1em]e3.north) -- ([xshift=1em,yshift=-0.1em]h0.south); \draw [->] ([yshift=0.1em]e3.north) -- ([xshift=1em,yshift=-0.1em]h1.south); \draw [->] ([yshift=0.1em]e3.north) -- ([xshift=1em,yshift=-0.1em]h2.south); \draw [->] ([yshift=0.1em]h0.north) -- ([xshift=-2em,yshift=-0.1em]f1.south); \draw [->] ([yshift=0.1em]e3.north) -- ([xshift=2em,yshift=-0.1em]f1.south); \draw [->] ([yshift=0.1em]h1.north) -- ([xshift=-1em,yshift=-0.1em]f1.south); \draw [->] ([yshift=0.1em]h2.north) -- ([xshift=0em,yshift=-0.1em]f1.south); \draw [->] ([yshift=0.1em]f1.north) -- ([yshift=-0.1em]o1.south); \draw [->] ([yshift=0.1em]o1.north) -- ([yshift=-0.1em]ylabel.south); \visible<2->{ \node [anchor=south,draw,inner sep=3pt,fill=blue!20!white] (e0) at ([yshift=1em]w0.north) {\tiny{$e_0=w_{0} \textbf{C}$}}; \node [anchor=south,draw,inner sep=3pt,fill=blue!20!white] (e1) at ([yshift=1em]w1.north) {\tiny{$e_1=w_{1} \textbf{C}$}}; \node [anchor=south,draw,inner sep=3pt,fill=blue!20!white] (e2) at ([yshift=1em]w2.north) {\tiny{$e_2=w_{2} \textbf{C}$}}; \node [anchor=south,draw,inner sep=3pt,fill=blue!20!white] (e3) at ([yshift=1em]w3.north) {\tiny{$e_3=w_{3} \textbf{C}$}}; } \visible<2->{ \node [anchor=west] (embedinglabel0) at ([xshift=-5em,yshift=-2em]w0.south) {\scriptsize{{\blue \textbf{�ʵķֲ�ʽ��ʾ}}}}; \node [anchor=north west] (embedinglabel1) at ([yshift=0.3em]embedinglabel0.south west) {\scriptsize{ǰ���Ѿ����ܹ���}}; \node [anchor=north west] (embedinglabel2) at ([yshift=0.3em]embedinglabel1.south west) {\scriptsize{����One-hot��ʾ���}}; \node [anchor=north west] (embedinglabel3) at ([yshift=0.3em]embedinglabel2.south west) {\scriptsize{�¼���λ������}}; } \visible<3->{ \node [anchor=south,draw,inner sep=3pt,fill=ugreen!20!white] (h0) at ([xshift=-0.5em, yshift=1.5em]e0.north) {\tiny{$h_{0}=\textrm{SelfAtt}(e_0,e_3)$}}; \node [anchor=south,draw,inner sep=3pt,fill=ugreen!20!white] (h1) at ([xshift=0.5em, yshift=1.5em]e1.north) {\tiny{$h_{1}=\textrm{SelfAtt}(e_1,e_3)$}}; \node [anchor=south,draw,inner sep=3pt,fill=ugreen!20!white] (h2) at ([xshift=1.5em, yshift=1.5em]e2.north) {\tiny{$h_{2}=\textrm{SelfAtt}(e_2,e_3)$}}; } \visible<3->{ \node [anchor=west] (selfattlabel0) at ([xshift=3em]embedinglabel0.east) {\scriptsize{{\color{ugreen} \textbf{��ע��������}}}}; \node [anchor=west] (selfattlabel1) at ([yshift=-0.3em]selfattlabel0.south west) {\scriptsize{����ʻ�֮�����ض�}}; \node [anchor=west] (selfattlabel2) at ([yshift=-0.3em]selfattlabel1.south west) {\scriptsize{��ͷ��ע��������}}; \node [anchor=west] (directlabel0) at ([yshift=-0.3em]selfattlabel2.south west) {\scriptsize{\alert{\textbf{���潫�����}}}}; } \visible<4->{ \node [anchor=south,draw,minimum width=9em,inner sep=3pt,fill=orange!20!white] (f1) at ([xshift=0.5em, yshift=1.5em]h2.north) {\tiny{$f_3=\textrm{FNN}([h_0,h_1,h_2,e_3])$}}; \node [anchor=south,draw,minimum width=9em,inner sep=3pt,fill=orange!20!white] (o1) at ([yshift=1em]f1.north) {\tiny{$y=\textrm{Softmax}(f_3 \textbf{U})$}}; } \visible<4->{ \node [anchor=west] (ffnlabel0) at ([xshift=3em]selfattlabel0.east) {\scriptsize{{\color{orange} \textbf{ǰ��������������}}}}; \node [anchor=west] (ffnlabel1) at ([yshift=-0.3em]ffnlabel0.south west) {\scriptsize{˫��ȫ��������}}; \node [anchor=west] (ffnlabel2) at ([yshift=-0.3em]ffnlabel1.south west) {\scriptsize{�����ΪRelu}}; \node [anchor=west] (ffnlabel3) at ([yshift=-0.3em]ffnlabel2.south west) {\scriptsize{���ͨ��Softmax���}}; } \end{scope} \end{tikzpicture} \end{center} \end{frame} %%%------------------------------------------------------------------------------------------------------------ %%% Transformer architecture \begin{frame}{Transformer����ģ��(Vaswani et al., 2017)} \begin{itemize} \item ��ͷע�������� \end{itemize} \vspace{-1.5em} \begin{center} \begin{tikzpicture} \begin{scope} \node [anchor=west,draw=black!30,inner sep=4pt,fill=ugreen!20!white] (Linear0) at (0,0) {\tiny{Linear}}; \node [anchor=south west,draw=black!50,fill=ugreen!20!white,draw,inner sep=4pt] (Linear01) at ([shift={(-0.2em,-0.2em)}]Linear0.south west) {\tiny{Linear}}; \node [anchor=south west,fill=ugreen!20!white,draw,inner sep=4pt] (Linear02) at ([shift={(-0.2em,-0.2em)}]Linear01.south west) {\tiny{Linear}}; \node [anchor=north] (Q) at ([xshift=0em,yshift=-1em]Linear02.south) {\footnotesize{$Q$}}; \node [anchor=west,draw=black!30,inner sep=4pt,fill=ugreen!20!white] (Linear1) at ([xshift=1.5em]Linear0.east) {\tiny{Linear}}; \node [anchor=south west,draw=black!50,fill=ugreen!20!white,draw,inner sep=4pt] (Linear11) at ([shift={(-0.2em,-0.2em)}]Linear1.south west) {\tiny{Linear}}; \node [anchor=south west,fill=ugreen!20!white,draw,inner sep=4pt] (Linear12) at ([shift={(-0.2em,-0.2em)}]Linear11.south west) {\tiny{Linear}}; \node [anchor=north] (K) at ([xshift=0em,yshift=-1em]Linear12.south) {\footnotesize{$K$}}; \node [anchor=west,draw=black!30,inner sep=4pt,fill=ugreen!20!white] (Linear2) at ([xshift=1.5em]Linear1.east) {\tiny{Linear}}; \node [anchor=south west,draw=black!50,fill=ugreen!20!white,draw,inner sep=4pt] (Linear21) at ([shift={(-0.2em,-0.2em)}]Linear2.south west) {\tiny{Linear}}; \node [anchor=south west,fill=ugreen!20!white,draw,inner sep=4pt] (Linear22) at ([shift={(-0.2em,-0.2em)}]Linear21.south west) {\tiny{Linear}}; \node [anchor=north] (V) at ([xshift=0em,yshift=-1em]Linear22.south) {\footnotesize{$V$}}; \node [anchor=south,draw=black!30,minimum width=9em,inner sep=4pt,fill=blue!20!white] (Scale) at ([yshift=1em]Linear1.north) {\tiny{Scaled Dot-Product Attention}}; \node [anchor=south west,draw=black!50,minimum width=9em,fill=blue!20!white,draw,inner sep=4pt] (Scale1) at ([shift={(-0.2em,-0.2em)}]Scale.south west) {\tiny{Scaled Dot-Product Attention}}; \node [anchor=south west,fill=blue!20!white,draw,minimum width=9em,inner sep=4pt] (Scale2) at ([shift={(-0.2em,-0.2em)}]Scale1.south west) {\tiny{Scaled Dot-Product Attention}}; \node [anchor=south,draw,minimum width=4em,inner sep=4pt,fill=yellow!30] (Concat) at ([yshift=1em]Scale2.north) {\tiny{Concat}}; \node [anchor=south,draw,minimum width=4em,inner sep=4pt,fill=ugreen!20!white] (Linear) at ([yshift=1em]Concat.north) {\tiny{Linear}}; \draw [->] ([yshift=0.1em]Q.north) -- ([yshift=-0.1em]Linear02.south); \draw [-,draw=black!50] ([yshift=0.1em]Q.north) -- ([xshift=0.2em,yshift=-0.1em]Linear02.south); \draw [-,draw=black!30] ([yshift=0.1em]Q.north) -- ([xshift=0.4em,yshift=-0.1em]Linear02.south); \draw [->] ([yshift=0.1em]K.north) -- ([yshift=-0.1em]Linear12.south); \draw [-,draw=black!50] ([yshift=0.1em]K.north) -- ([xshift=0.2em,yshift=-0.1em]Linear12.south); \draw [-,draw=black!30] ([yshift=0.1em]K.north) -- ([xshift=0.4em,yshift=-0.1em]Linear12.south); \draw [->] ([yshift=0.1em]V.north) -- ([yshift=-0.1em]Linear22.south); \draw [-,draw=black!50] ([yshift=0.1em]V.north) -- ([xshift=0.2em,yshift=-0.1em]Linear22.south); \draw [-,draw=black!30] ([yshift=0.1em]V.north) -- ([xshift=0.4em,yshift=-0.1em]Linear22.south); \draw [->] ([yshift=0em]Linear02.north) -- ([yshift=1em]Linear02.north); \draw [-,draw=black!50] ([yshift=0em]Linear01.north) -- ([yshift=0.8em]Linear01.north); \draw [-,draw=black!30] ([yshift=0em]Linear0.north) -- ([yshift=0.6em]Linear0.north); \draw [->] ([yshift=0em]Linear12.north) -- ([yshift=1em]Linear12.north); \draw [-,draw=black!50] ([yshift=0em]Linear11.north) -- ([yshift=0.8em]Linear11.north); \draw [-,draw=black!30] ([yshift=0em]Linear1.north) -- ([yshift=0.6em]Linear1.north); \draw [->] ([yshift=0em]Linear22.north) -- ([yshift=1em]Linear22.north); \draw [-,draw=black!50] ([yshift=0em]Linear21.north) -- ([yshift=0.8em]Linear21.north); \draw [-,draw=black!30] ([yshift=0em]Linear2.north) -- ([yshift=0.6em]Linear2.north); \draw [->] ([yshift=0em]Scale2.north) -- ([yshift=0em]Concat.south); \draw [-,draw=black!50] ([yshift=0em]Scale1.north) -- ([yshift=0.8em]Scale1.north); \draw [-,draw=black!30] ([yshift=0em]Scale.north) -- ([yshift=0.6em]Scale.north); \draw [->] ([yshift=0em]Concat.north) -- ([yshift=0em]Linear.south); \draw [->] ([yshift=0em]Linear.north) -- ([yshift=1em]Linear.north); \node [anchor=west] (Multiheadlabel0) at ([xshift=-5em,yshift=-1.2em]Q.south) {\scriptsize{{\blue \textbf{��ͷע����}}}}; \node [anchor=north west] (Multiheadlabel1) at ([yshift=0em]Multiheadlabel0.south west) {\scriptsize{$MultiHead(Q,K,V)=Concat(head_1,...head_n)W^0$}}; \node [anchor=north west] (Multiheadlabel2) at ([yshift=0.2em]Multiheadlabel1.south west) {\scriptsize{������ѹ���ɶ��ά�Ƚ�С��������ֱ�����ע����}}; \node [anchor=north west] (Multiheadlabel3) at ([yshift=0.2em]Multiheadlabel2.south west) {\scriptsize{�ٰѽ���������������Ա任�õ��������}}; \visible<2->{ \node [anchor=south west,fill=white,draw,inner sep=4pt,minimum width=3.5em,fill=blue!20!white] (MatMul) at ([xshift=8em]Linear22.south west) {\tiny{MatMul}}; \node [anchor=north] (Q1) at ([xshift=-1em,yshift=-1em]MatMul.south) {\footnotesize{$Q$}}; \node [anchor=north] (K1) at ([xshift=1em,yshift=-1em]MatMul.south) {\footnotesize{$K$}}; \node [anchor=south,draw,inner sep=4pt,fill=yellow!30] (Scale3) at ([yshift=1em]MatMul.north) {\tiny{Scale}}; \node [anchor=south,draw,inner sep=4pt,fill=purple!20,minimum width=3.5em] (Mask) at ([yshift=0.8em]Scale3.north) {\tiny{Mask(opt.)}}; \node [anchor=south,draw,inner sep=4pt,fill=ugreen!20!white] (SoftMax) at ([yshift=1em]Mask.north) {\tiny{SoftMax}}; \node [anchor=south,draw,minimum width=3.5em,inner sep=4pt,fill=blue!20!white] (MatMul1) at ([xshift=1.5em,yshift=1em]SoftMax.north) {\tiny{MatMul}}; \node [anchor=north] (V1) at ([xshift=2em]K1.north) {\footnotesize{$V$}}; \node [anchor=north] (null) at ([yshift=0.8em]MatMul1.north) {}; \node [rectangle,draw, densely dashed,inner sep=0.4em] [fit = (MatMul) (MatMul1) (Q1) (K1) (V1) (null)] (inputshadow) {}; \draw [->] ([yshift=0.1em]Q1.north) -- ([xshift=-1em,yshift=-0.1em]MatMul.south); \draw [->] ([yshift=0.1em]K1.north) -- ([xshift=1em,yshift=-0.1em]MatMul.south); \draw [->] ([yshift=0.1em]MatMul.north) -- ([yshift=-0.1em]Scale3.south); \draw [->] ([yshift=0.1em]Scale3.north) -- ([yshift=-0.1em]Mask.south); \draw [->] ([yshift=0.1em]Mask.north) -- ([yshift=-0.1em]SoftMax.south); \draw [->] ([yshift=0.1em]SoftMax.north) -- ([yshift=0.9em]SoftMax.north); \draw [->] ([yshift=0.1em]V1.north) -- ([yshift=9.1em]V1.north); \draw [->] ([yshift=0.1em]MatMul1.north) -- ([yshift=0.8em]MatMul1.north); \draw [->,dashed,red,thick] ([xshift=0.1em]Scale.east) .. controls +(east:1) and +(west:1) .. ([xshift=-0.1em,yshift=1em]inputshadow.west); \node [anchor=west] (Attentionlabel0) at ([xshift=-2em,yshift=-1.2em]Q1.south) {\scriptsize{{\color{ugreen} \textbf{���ڵ�˵���ע����}}}}; \node [anchor=north west] (Attentionlabel1) at ([yshift=0.3em]Attentionlabel0.south west) {\scriptsize{$head_i=softmax(\frac{QK^{T}}{\sqrt{d_k}})V$}}; \node [anchor=north west] (Attentionlabel2) at ([yshift=0.6em]Attentionlabel1.south west) {\scriptsize{����õ�λ�������ļ�Ȩ��}}; \node [anchor=north west] (Attentionlabel3) at ([yshift=0.2em]Attentionlabel2.south west) {\scriptsize{Q,K,V������ͬ��}}; } \end{scope} \end{tikzpicture} \end{center} \end{frame} %%%------------------------------------------------------------------------------------------------------------ %%% evaluation \begin{frame}{����ģ������} \begin{itemize} \item ����ģ�͵�����ָ�� - �����(Perplexity, PPL) \begin{itemize} \item ����ģ��Ԥ��һ���������������� \item �����Խ�ͣ���ģ��Ч��Խ�� \end{itemize} \vspace{0.5em} \begin{displaymath} \textrm{PPL}(w_1 ... w_m)=\textrm{P}(w_1 ... w_m)^{-1/m} \end{displaymath} \vspace{-0.5em} \item<2-> Penn Treebank(PTB)�ϵ����۽�� \end{itemize} \vspace{0.0em} \visible<2->{ \begin{tabular}{l | l | l | r} ģ�� & ���� & ��� & PPL \\ \hline FNN LM & Bengio et al. & 2003 & 162.2 \\ RNN LM & Mikolov et al. & 2010 & 124.7 \\ RNN-LDA LM & Mikolov et al. & 2012 & 92.0 \\ RNN(LSTM) LM & Zaremba et al. & 2014 & 78.4 \\ RHN & Zilly et al. & 2016 & 65.4 \\ RNN(AWD-LSTM) LM & Merity et al. & 2018 & 58.8 \\ GPT-2 (Transformer) & Radford et al. & 2019 & 35.7 \end{tabular} } \end{frame} %%%------------------------------------------------------------------------------------------------------------ \subsection{��Ƕ��} %%%------------------------------------------------------------------------------------------------------------ %%% �ʵ�one-hot��distributed��ʾ \begin{frame}{���ʵı�ʾ} \begin{itemize} \item ��α�ʾһ�����ʣ� \begin{itemize} \item \textbf{One-hot}: ������һ���ʵ�$V$���������10k�����ʣ������б�š�ÿ�����ʶ����Ա�ʾΪ10kά��one-hot���������ڱ���Ǹ�ά��Ϊ1������Ϊ0 \item<2-> \textbf{Distributed}: ������������ģ�ͣ�ÿ�����ʿ��Ա���ʾΪһ��ʵ��������ÿһά����Ӧһ��``����'' - \alert{��Ƕ��} \end{itemize} \end{itemize} \begin{center} \begin{tikzpicture} \begin{scope} \node [anchor=north west] (o1) at (0,0) {\footnotesize{$\begin{bmatrix} 0 \\ 1 \\ 0 \\ 0 \\ 0 \\ ... \\ 0 \end{bmatrix}$}}; \node [anchor=north west] (o2) at ([xshift=1em]o1.north east) {\footnotesize{$\begin{bmatrix} 0 \\ 0 \\ 0 \\ 1 \\ 0 \\ ... \\ 0 \end{bmatrix}$}}; \node [anchor=north east] (v) at ([xshift=-0em]o1.north west) {\footnotesize{$\begin{matrix} \textrm{\ \ \ \ \ ��}_1 \\ \textrm{\ \ ����}_2 \\ \textrm{\ \ \ \ \ ��}_3 \\ \textrm{\ \ ����}_4 \\ \textrm{\ \ ����}_5 \\ ... \\ \textrm{���}_{10k} \end{matrix}$}}; \node [anchor=south] (w1) at (o1.north) {\footnotesize{����}}; \node [anchor=south] (w2) at (o2.north) {\footnotesize{����}}; \node [anchor=north] (label) at (o1.south) {\footnotesize{���ʵ�one-hot��ʾ}}; \visible<3->{ \node [anchor=south,fill=red!20!white] (cosine) at (w1.north) {\footnotesize{$cosine(\textrm{`����'},\textrm{`����'})=0$}}; } \end{scope} \visible<2->{ \begin{scope}[xshift=2in] \node [anchor=north west] (o1) at (0,0) {\footnotesize{$\begin{bmatrix} .1 \\ -1 \\ 2 \\ ... \\ 0 \end{bmatrix}$}}; \node [anchor=north west] (o2) at ([xshift=1em]o1.north east) {\footnotesize{$\begin{bmatrix} 1 \\ 2 \\ .2 \\ ... \\ -1 \end{bmatrix}$}}; \node [anchor=north east] (v) at ([xshift=-0em]o1.north west) {\footnotesize{$\begin{matrix} \textrm{\ \ \ ����}_1 \\ \textrm{\ \ \ ����}_2 \\ \textrm{\ \ \ ����}_3 \\ ... \\ \textrm{����}_{512} \end{matrix}$}}; \node [anchor=south] (w1) at (o1.north) {\footnotesize{����}}; \node [anchor=south] (w2) at (o2.north) {\footnotesize{����}}; \node [anchor=north] (label) at ([yshift=-2em]o1.south) {\footnotesize{���ʵķֲ�ʽ��ʾ(��Ƕ��)}}; \visible<3->{ \node [anchor=south,fill=red!20!white] (cosine) at (w1.north) {\footnotesize{$cosine(\textrm{`����'},\textrm{`����'})=0.5$}}; } \end{scope} } \end{tikzpicture} \end{center} \end{frame} %%%------------------------------------------------------------------------------------------------------------ %%% �ֲ�ʽ��ʾ���ŵ� \begin{frame}{Ϊʲô��Ҫ�ֲ�ʽ��ʾ��} \begin{itemize} \item \textbf{һ����Ȼ������}���ֲ�ʽ��ʾ��ÿһά����ʲô��˼ \begin{itemize} \item ����ÿһά������Ϊһ�����ԣ����磺�Ա����ߵ� \item ���ǣ�ģ������ǰ�һ��ά�ȿ����������һ��``�̻�''����һ��ͳ�������ϵ�``����''�������˹����ɵ����� \end{itemize} \item<2-> �����ַ�����ʲô�ô��� \begin{itemize} \item �����̻�����֮���\alert{������} \item �����ռ��ʾģ�Ϳ��Ը�ȷ�Ŀ̻�����������Ƿ��㼴һ���ж� \end{itemize} \item<2-> Ԥ����һ�������� \begin{itemize} \item �ֲ�ʽ��ʾ������ָ��``����''��``����''�����Ƶ� \item ��ʹ``����''û����������г��ֹ���ϵͳ��Ȼ����ͨ������``����''�������Խ���Ԥ�� \end{itemize} \begin{tabular}{l | l} ���� Ҫ �ڷ� һ�� \_\_\_\_\_ & Ԥ���¸��� \\ \hline ���� Ҫ �ڷ� һ�� \alert{����} & ���� \\ ���� Ҫ �ڷ� һ�� \blue{����} & û������������Ȼ�Ǻ���Ԥ�� \end{tabular} \end{itemize} \end{frame} %%%------------------------------------------------------------------------------------------------------------ %%% ��ʵ������ʵķֲ�ʽ��ʾ \begin{frame}{�ֲ�ʽ��ʾ�Ŀ��ӻ�} \begin{itemize} \item \textbf{һ������������}������ $\to$ ����\\ \begin{displaymath} \vv{\textrm{����}} - \vv{\textrm{����}} + \vv{\textrm{Ů��}} = \vv{\textrm{����}} \end{displaymath} ���$\vv{\textrm{word}}$��ʾ���ʵķֲ�ʽ������ʾ \item ����ĴʵĿ��ӻ������ƵĴʾ���һ�� \end{itemize} \begin{center} \includegraphics[scale=0.4]{./Figures/word-graph.png} \end{center} \end{frame} %%%------------------------------------------------------------------------------------------------------------ %%% ������ģ���еĴ�Ƕ�� \begin{frame}{������ģ���еĴ�Ƕ��} \begin{itemize} \item ��������ģ���У���Ҫ�Ѵʱ�ʾ�����ķֲ�ʽ��ʾ \begin{itemize} \item<2-> ����$\textbf{C}$�Ǵ�Ƕ�����ÿһ�ж�Ӧһ���ʵķֲ�ʽ��ʾ \item<3-> $\textbf{C}$����������ģ��ѵ����Ҳ������������ģ��ѵ�����̶���Ƕ�룬������ģ��רע��Ƭ�ε�ѧϰ \end{itemize} \end{itemize} \vspace{-0.5em} \begin{center} \begin{tikzpicture} \begin{scope} \node [anchor=center,inner sep=2pt] (e) at (0,0) {\small{$e=w$}}; \node [anchor=west,inner sep=2pt] (c) at (e.east) {\small{$\textbf{C}$}}; \begin{pgfonlayer}{background} \node [rectangle,inner sep=0.4em,draw,fill=blue!20!white] [fit = (e) (c)] (box) {}; \end{pgfonlayer} \draw [->,thick] ([yshift=-1em]box.south)--([yshift=-0.1em]box.south) node [pos=0,below] (bottom1) {\small{����$w$}}; \draw [->,thick] ([yshift=0.1em]box.north)--([yshift=1em]box.north) node [pos=1,above] (top1) {\scriptsize{$e$=(8,.2,-1,.9,...,1)}}; \node [anchor=north] (bottom2) at ([yshift=0.3em]bottom1.south) {\scriptsize{$w$=(0,0,1,0,...,0)}}; \node [anchor=south] (top2) at ([yshift=-0.3em]top1.north) {\small{$w$�ķֲ�ʽ��ʾ}}; \visible<2->{ \node [anchor=north west,fill=red!20!white] (cmatrix) at ([xshift=3em,yshift=1.0em]c.north east) {\scriptsize{$\begin{pmatrix} 1 & .2 & -.2 & 8 & ... & 0 \\ .6 & .8 & -2 & 1 & ... & -.2 \\ 8 & .2 & -1 & .9 & ... & 2.3 \\ 1 & 1.2 & -.9 & 3 & ... & .2 \\ ... & ... & ... & ... & ... & ... \\ 1 & .3 & 3 & .9 & ... & 5.1 \end{pmatrix}$}}; \node [anchor=west,inner sep=2pt,fill=red!30!white] (c) at (e.east) {\small{$\textbf{C}$}}; \draw [<-,thick] (c.east) -- ([xshift=3em]c.east); } \visible<3->{ \node [anchor=south,draw,fill=green!20!white] (e2) at ([yshift=1.5em]cmatrix.north) {\scriptsize{�ⲿ��Ƕ��ϵͳ�õ���$\textbf{C}$}}; \draw [->,very thick,dashed] (e2.south) -- (cmatrix.north); } \end{scope} \end{tikzpicture} \end{center} \vspace{-1.0em} \begin{itemize} \item<4-> ��Ƕ�����ѧϰ�õ��� \begin{itemize} \item ���Ժ�����ģ�͵���������һ��ѵ���������ٶȽ��� \item Ҳ���Կ���ʹ��Ч�ʸ��ߵ��ⲿģ�ͣ���word2vec�� Glove�ȣ���������ʹ�ø����ģ������ \end{itemize} \end{itemize} \end{frame} %%%------------------------------------------------------------------------------------------------------------ \subsection{���ӱ�ʾģ�ͼ�Ԥѵ��} %%%------------------------------------------------------------------------------------------------------------ %%% ��Ƕ������� \begin{frame}{������``��''} \begin{itemize} \item ��Ƕ���Ѿ���Ϊ���NLPϵͳ�ı��䣬��ȻҲ���������ֻ�ʽ�淨��������``embed everything''�Ŀںţ����Ǵ�Ƕ��Ҳ������ \begin{itemize} \item ÿ���ʶ���ӦΨһ��������ʾ�����Ƕ���һ�ʶ�����������Ҫͨ�������Ľ������֡�һ�����������ӣ� \end{itemize} \vspace{0.3em} \hspace{6em} Jobs was the CEO of \alert{\underline{apple}}.\\ \hspace{6em} He finally ate the \alert{\underline{apple}}. \item<2-> ������������Ϣ \begin{itemize} \item ���������������µ�˼�������ܼؿ��Ǵʵı�ʾ��Ӧͬʱ��������������Ϣ \item ���ھ����е�һ����(����λ��)��ͬʱ��ʾ�ʺ������� \end{itemize} \end{itemize} \visible<2->{ \begin{center} \begin{tikzpicture} \begin{scope} \node [anchor=west] (node1) at (0,0) {\footnotesize{Jobs was the CEO of}}; \node [anchor=west] (node2) at ([xshift=-0.2em,yshift=-0.05em]node1.east) {\footnotesize{\alert{\underline{apple}}}}; \node [anchor=west] (node3) at ([xshift=-0.2em,yshift=-0.1em]node2.east) {\footnotesize{.}}; \node [anchor=south,inner sep=2pt,minimum width=2.4em,fill=red!20!white] (node4) at ([yshift=1.5em]node2.north) {\scriptsize{��}}; \node [anchor=north] (label) at ([xshift=1em]node1.south) {\scriptsize{\textbf{�ʱ�ʾģ��}}}; \draw [->,thick] (node2.north) -- (node4.south); \end{scope} \begin{scope}[xshift=2in] \node [anchor=west] (node1) at (0,0) {\footnotesize{Jobs was the CEO of}}; \node [anchor=west] (node2) at ([xshift=-0.2em,yshift=-0.05em]node1.east) {\footnotesize{\alert{\underline{apple}}}}; \node [anchor=west] (node3) at ([xshift=-0.2em,yshift=-0.1em]node2.east) {\footnotesize{.}}; \node [anchor=south,inner sep=2pt,minimum width=2.4em,fill=red!20!white] (node4) at ([yshift=1.5em]node2.north) {\scriptsize{��}}; \node [anchor=south,inner sep=2pt,minimum width=2.4em,fill=blue!20!white] (node5) at (node4.north) {\scriptsize{������}}; \node [anchor=north] (label) at ([xshift=1em]node1.south) {\scriptsize{\textbf{��+�����ı�ʾģ��}}}; \draw [->,thick] (node2.north) -- (node4.south); \draw [->] ([xshift=1em]node1.north west) .. controls +(north:1) and +(west:2) .. ([yshift=0.2em]node5.west); \draw [->] ([xshift=3em]node1.north west) .. controls +(north:0.8) and +(west:1.5) .. ([yshift=-0.2em]node5.west); \node [anchor=east] (morelines) at ([xshift=-1.5em]node4.west) {...}; \end{scope} \end{tikzpicture} \end{center} } \end{frame} %%%------------------------------------------------------------------------------------------------------------ %%% �����ı�ʾģ�� \begin{frame}{��ʾ������Ƭ�� - �����ı�ʾģ��} \begin{itemize} \item ������ģ�����Ѿ�������ÿ��λ�õ������ı�ʾ��Ϣ \begin{itemize} \item ��RNN LMΪ����λ��$i$�������������һ��$w_1...w_i$�ı�ʾ \end{itemize} \end{itemize} \vspace{-0.5em} \begin{center} \begin{tikzpicture} \begin{scope} \tikzstyle{rnnnode} = [draw,inner sep=5pt,minimum width=4em,minimum height=1.5em,fill=green!30!white,blur shadow={shadow xshift=1pt,shadow yshift=-1pt}] \node [anchor=west,rnnnode] (node11) at (0,0) {\scriptsize{RNN Cell}}; \node [anchor=west,rnnnode] (node12) at ([xshift=2em]node11.east) {\scriptsize{RNN Cell}}; \node [anchor=west,rnnnode] (node13) at ([xshift=2em]node12.east) {\scriptsize{RNN Cell}}; \node [anchor=west,rnnnode] (node14) at ([xshift=2em]node13.east) {\scriptsize{RNN Cell}}; \node [anchor=north,rnnnode,fill=red!30!white] (e1) at ([yshift=-1.2em]node11.south) {\scriptsize{embedding}}; \node [anchor=north,rnnnode,fill=red!30!white] (e2) at ([yshift=-1.2em]node12.south) {\scriptsize{embedding}}; \node [anchor=north,rnnnode,fill=red!30!white] (e3) at ([yshift=-1.2em]node13.south) {\scriptsize{embedding}}; \node [anchor=north,rnnnode,fill=red!30!white] (e4) at ([yshift=-1.2em]node14.south) {\scriptsize{embedding}}; \node [anchor=north] (w1) at ([yshift=-1em]e1.south) {\footnotesize{�Dz�˹}}; \node [anchor=north] (w2) at ([yshift=-1em]e2.south) {\footnotesize{��ְ}}; \node [anchor=north] (w3) at ([yshift=-1em]e3.south) {\footnotesize{��}}; \node [anchor=north] (w4) at ([yshift=-1em]e4.south) {\footnotesize{ƻ��}}; \draw [->,thick] ([yshift=0.1em]w1.north)--([yshift=-0.1em]e1.south); \draw [->,thick] ([yshift=0.1em]w2.north)--([yshift=-0.1em]e2.south); \draw [->,thick] ([yshift=0.1em]w3.north)--([yshift=-0.1em]e3.south); \draw [->,thick] ([yshift=0.1em]w4.north)--([yshift=-0.1em]e4.south); \draw [->,thick] ([yshift=0.1em]e1.north)--([yshift=-0.1em]node11.south); \draw [->,thick] ([yshift=0.1em]e2.north)--([yshift=-0.1em]node12.south); \draw [->,thick] ([yshift=0.1em]e3.north)--([yshift=-0.1em]node13.south); \draw [->,thick] ([yshift=0.1em]e4.north)--([yshift=-0.1em]node14.south); \node [anchor=south,rnnnode] (node21) at ([yshift=1.5em]node11.north) {\scriptsize{RNN Cell}}; \node [anchor=south,rnnnode] (node22) at ([yshift=1.5em]node12.north) {\scriptsize{RNN Cell}}; \node [anchor=south,rnnnode] (node23) at ([yshift=1.5em]node13.north) {\scriptsize{RNN Cell}}; \node [anchor=south,rnnnode] (node24) at ([yshift=1.5em]node14.north) {\scriptsize{RNN Cell}}; \node [anchor=south] (node31) at ([yshift=1.0em]node21.north) {\scriptsize{�ı�ʾ}}; \node [anchor=south west] (node31new) at ([yshift=-0.3em]node31.north west) {\scriptsize{``�Dz�˹''}}; \node [anchor=south] (node32) at ([yshift=1.0em]node22.north) {\scriptsize{�ı�ʾ\ \ \ }}; \node [anchor=south west] (node32new) at ([yshift=-0.3em]node32.north west) {\scriptsize{``�Dz�˹ ��ְ''}}; \node [anchor=south] (node33) at ([yshift=1.0em]node23.north) {\scriptsize{�ı�ʾ\ \ \ \ \ \ \ \ }}; \node [anchor=south west] (node33new) at ([yshift=-0.3em]node33.north west) {\scriptsize{``�Dz�˹ ��ְ ��''}}; \node [anchor=south] (node34) at ([yshift=1.0em]node24.north) {\scriptsize{�ı�ʾ\ \ \ \ \ \ \ \ }}; \node [anchor=south west] (node34new) at ([yshift=-0.3em]node34.north west) {\scriptsize{``�Dz�˹ ��ְ �� ƻ��''}}; \draw [->,thick] ([yshift=0.1em]node21.north)--([yshift=-0.1em]node31.south); \draw [->,thick] ([yshift=0.1em]node22.north)--([yshift=-0.1em]node32.south); \draw [->,thick] ([yshift=0.1em]node23.north)--([yshift=-0.1em]node33.south); \draw [->,thick] ([yshift=0.1em]node24.north)--([yshift=-0.1em]node34.south); \draw [->,thick] ([xshift=-1em]node21.west)--([xshift=-0.1em]node21.west); \draw [->,thick] ([xshift=0.1em]node21.east)--([xshift=-0.1em]node22.west); \draw [->,thick] ([xshift=0.1em]node22.east)--([xshift=-0.1em]node23.west); \draw [->,thick] ([xshift=0.1em]node23.east)--([xshift=-0.1em]node24.west); \draw [->,thick] ([xshift=0.1em]node24.east)--([xshift=1em]node24.east); \draw [->,thick] ([yshift=0.1em]node11.north)--([yshift=-0.1em]node21.south); \draw [->,thick] ([yshift=0.1em]node12.north)--([yshift=-0.1em]node22.south); \draw [->,thick] ([yshift=0.1em]node13.north)--([yshift=-0.1em]node23.south); \draw [->,thick] ([yshift=0.1em]node14.north)--([yshift=-0.1em]node24.south); \draw [->,thick] ([xshift=-1em]node11.west)--([xshift=-0.1em]node11.west); \draw [->,thick] ([xshift=0.1em]node11.east)--([xshift=-0.1em]node12.west); \draw [->,thick] ([xshift=0.1em]node12.east)--([xshift=-0.1em]node13.west); \draw [->,thick] ([xshift=0.1em]node13.east)--([xshift=-0.1em]node14.west); \draw [->,thick] ([xshift=0.1em]node14.east)--([xshift=1em]node14.east); \visible<2->{ \node [anchor=south] (toplabel1) at ([yshift=2em,xshift=-2em]node32new.north) {\footnotesize{``ƻ��''�ı�ʾ��}}; \node [anchor=west,fill=blue!20!white,minimum width=3em] (toplabel2) at (toplabel1.east) {\footnotesize{������}}; } \visible<3->{ \node [anchor=west,fill=red!20!white,minimum width=3em] (toplabel3) at (toplabel2.east) {\footnotesize{��}}; } \begin{pgfonlayer}{background} \visible<3->{ \node [rectangle,inner sep=2pt,draw,thick,dashed,red] [fit = (e4)] (r2) {}; \draw [->,thick,red] (r2.west) .. controls +(west:0.8) and +(south:2) .. ([xshift=1.3em]toplabel3.south); } \visible<2->{ \node [rectangle,inner sep=2pt,draw,thick,dashed,ublue,fill=white] [fit = (node33) (node33new)] (r1) {}; \draw [->,thick,ublue] ([xshift=-2em]r1.north) .. controls +(north:0.7) and +(south:0.7) .. ([xshift=-0.5em]toplabel2.south); } \end{pgfonlayer} \end{scope} \end{tikzpicture} \end{center} \end{frame} %%%------------------------------------------------------------------------------------------------------------ %%% ��ǿ��ı�ʾģ�� - ELMO \begin{frame}{��ǿ�ı�ʾģ�� - ELMO} \begin{itemize} \item \textbf{ELMO}(Embedding from Language Models)����˵�������˻�������ģ�͵�Ԥѵ�����ȳ� \begin{itemize} \item ��Ȼʹ��RNN�ṹ������ѭ����Ԫ������LSTM \item ͬʱ�����������Һ���������Ľ�ģ��ʽ��ͬʱ��ʾһ������˺��Ҷ˵������� \item �ں����в�������������Ӧ�ã��ṩ�˸��ḻ����Ϣ \end{itemize} \end{itemize} \vspace{0.5em} \begin{center} \begin{tikzpicture} \begin{scope}[scale=1.2] \node [anchor=west,draw,inner sep=4pt,fill=blue!20!white] (Lstm0) at (0,0) {\scriptsize{Lstm}}; \node [anchor=west,draw,inner sep=4pt,fill=blue!20!white] (Lstm1) at ([xshift=1em]Lstm0.east) {\scriptsize{Lstm}}; \node [anchor=west,inner sep=4pt] (sep) at ([xshift=1em]Lstm1.east) {\scriptsize{...}}; \node [anchor=west,draw,inner sep=4pt,fill=blue!20!white] (Lstm2) at ([xshift=1em]sep.east) {\scriptsize{Lstm}}; \node [anchor=south,draw,inner sep=4pt,fill=blue!20!white] (Lstm3) at ([yshift=1em]Lstm0.north) {\scriptsize{Lstm}}; \node [anchor=west,draw,inner sep=4pt,fill=blue!20!white] (Lstm4) at ([xshift=1em]Lstm3.east) {\scriptsize{Lstm}}; \node [anchor=west,inner sep=4pt] (sep1) at ([xshift=1em]Lstm4.east) {\scriptsize{...}}; \node [anchor=west,draw,inner sep=4pt,fill=blue!20!white] (Lstm5) at ([xshift=1em]sep1.east) {\scriptsize{Lstm}}; \node [rectangle,rounded corners,draw=black!50,densely dashed,inner sep=0.4em] [fit = (Lstm0) (Lstm2) (Lstm3) (Lstm5)] (inputshadow) {}; \node [anchor=north,draw,inner sep=4pt,fill=ugreen!20!white,minimum width=2em] (e1) at ([xshift=-2em,yshift=-1em]Lstm2.south) {\scriptsize{$E_1$}}; \node [anchor=west,draw,inner sep=4pt,fill=ugreen!20!white,minimum width=2em] (e2) at ([xshift=1em]e1.east) {\scriptsize{$E_2$}}; \node [anchor=west,inner sep=4pt] (sep5) at ([xshift=1em]e2.east) {\scriptsize{...}}; \node [anchor=west,draw,inner sep=4pt,fill=ugreen!20!white,minimum width=2em] (e3) at ([xshift=1em]sep5.east) {\scriptsize{$E_3$}}; \node [anchor=south,draw,inner sep=4pt,fill=yellow!30,minimum width=2em] (t1) at ([xshift=-2em,yshift=1em]Lstm5.north) {\scriptsize{$T_1$}}; \node [anchor=west,draw,inner sep=4pt,fill=yellow!30,minimum width=2em] (t2) at ([xshift=1em]t1.east) {\scriptsize{$T_2$}}; \node [anchor=west,inner sep=4pt] (sep6) at ([xshift=1em]t2.east) {\scriptsize{...}}; \node [anchor=west,draw,inner sep=4pt,fill=yellow!30,minimum width=2em] (t3) at ([xshift=1em]sep6.east) {\scriptsize{$T_3$}}; \node [anchor=west,draw,inner sep=4pt,fill=blue!20!white] (Lstm6) at ([xshift=1.5em]Lstm2.east) {\scriptsize{Lstm}}; \node [anchor=west,draw,inner sep=4pt,fill=blue!20!white] (Lstm7) at ([xshift=1em]Lstm6.east) {\scriptsize{Lstm}}; \node [anchor=west,inner sep=4pt] (sep3) at ([xshift=1em]Lstm7.east) {\scriptsize{...}}; \node [anchor=west,draw,inner sep=4pt,fill=blue!20!white] (Lstm8) at ([xshift=1em]sep3.east) {\scriptsize{Lstm}}; \node [anchor=south,draw,inner sep=4pt,fill=blue!20!white] (Lstm9) at ([yshift=1em]Lstm6.north) {\scriptsize{Lstm}}; \node [anchor=west,draw,inner sep=4pt,fill=blue!20!white] (Lstm10) at ([xshift=1em]Lstm9.east) {\scriptsize{Lstm}}; \node [anchor=west,inner sep=4pt] (sep4) at ([xshift=1em]Lstm10.east) {\scriptsize{...}}; \node [anchor=west,draw,inner sep=4pt,fill=blue!20!white] (Lstm11) at ([xshift=1em]sep4.east) {\scriptsize{Lstm}}; \node [rectangle,rounded corners,draw=black!50,densely dashed,inner sep=0.4em] [fit = (Lstm6) (Lstm8) (Lstm9) (Lstm11)] (inputshadow) {}; \draw [->] ([xshift=0.1em]Lstm0.east) -- ([xshift=-0.1em]Lstm1.west); \draw [->] ([xshift=0.1em]Lstm1.east) -- ([xshift=-0.1em]sep.west); \draw [->] ([xshift=0.1em]sep.east) -- ([xshift=-0.1em]Lstm2.west); \draw [->] ([xshift=0.1em]Lstm3.east) -- ([xshift=-0.1em]Lstm4.west); \draw [->] ([xshift=0.1em]Lstm4.east) -- ([xshift=-0.1em]sep1.west); \draw [->] ([xshift=0.1em]sep1.east) -- ([xshift=-0.1em]Lstm5.west); \draw [->] ([yshift=0.1em]Lstm0.north) -- ([yshift=-0.1em]Lstm3.south); \draw [->] ([yshift=0.1em]Lstm1.north) -- ([yshift=-0.1em]Lstm4.south); \draw [->] ([yshift=0.1em]Lstm2.north) -- ([yshift=-0.1em]Lstm5.south); \draw [->] ([xshift=0.1em]Lstm6.east) -- ([xshift=-0.1em]Lstm7.west); \draw [->] ([xshift=0.1em]Lstm7.east) -- ([xshift=-0.1em]sep3.west); \draw [->] ([xshift=0.1em]sep3.east) -- ([xshift=-0.1em]Lstm8.west); \draw [->] ([xshift=0.1em]Lstm9.east) -- ([xshift=-0.1em]Lstm10.west); \draw [->] ([xshift=0.1em]Lstm10.east) -- ([xshift=-0.1em]sep4.west); \draw [->] ([xshift=0.1em]sep4.east) -- ([xshift=-0.1em]Lstm11.west); \draw [->] ([yshift=0.1em]Lstm6.north) -- ([yshift=-0.1em]Lstm9.south); \draw [->] ([yshift=0.1em]Lstm7.north) -- ([yshift=-0.1em]Lstm10.south); \draw [->] ([yshift=0.1em]Lstm8.north) -- ([yshift=-0.1em]Lstm11.south); \draw [->] ([yshift=0.1em]e1.north) -- ([yshift=-0.1em]Lstm0.south); \draw [->] ([yshift=0.1em]e1.north) -- ([yshift=-0.1em]Lstm6.south); \draw [->] ([yshift=0.1em]e2.north) -- ([yshift=-0.1em]Lstm1.south); \draw [->] ([yshift=0.1em]e2.north) -- ([yshift=-0.1em]Lstm7.south); \draw [->] ([yshift=0.1em]e3.north) -- ([yshift=-0.1em]Lstm2.south); \draw [->] ([yshift=0.1em]e3.north) -- ([yshift=-0.1em]Lstm8.south); \draw [->] ([yshift=0.1em]Lstm3.north) -- ([xshift=-0.05em,yshift=-0.1em]t1.south); \draw [->] ([yshift=0.1em]Lstm9.north) -- ([yshift=-0.1em]t1.south); \draw [->] ([yshift=0.1em]Lstm4.north) -- ([xshift=-0.05em,yshift=-0.1em]t2.south); \draw [->] ([yshift=0.1em]Lstm10.north) -- ([yshift=-0.1em]t2.south); \draw [->] ([yshift=0.1em]Lstm5.north) -- ([xshift=-0.05em,yshift=-0.1em]t3.south); \draw [->] ([yshift=0.1em]Lstm11.north) -- ([yshift=-0.1em]t3.south); \end{scope} \end{tikzpicture} \end{center} \end{frame} %%%------------------------------------------------------------------------------------------------------------ %%% ��ǿ��ı�ʾģ�� - GTP \begin{frame}{��ǿ�ı�ʾģ�� - GPT} \begin{itemize} \item \textbf{GPT}(Generative Pre-Training)Ҳ��һ�ֻ�������ģ�͵ı�ʾģ�� \begin{itemize} \item �ܹ�������Transformer��������ȡ������ǿ \item ����Pre-training + Fine-tuning�Ŀ�ܣ�Ԥѵ����Ϊ����ϵͳ�����IJ�����ʼֵ����˿��Ը��õ���ӦĿ������ \end{itemize} \end{itemize} \vspace{0.5em} \begin{center} \begin{tikzpicture} \begin{scope}[scale=1.2] \node [anchor=west,draw,inner sep=4pt,fill=blue!20!white,minimum width=3em] (Trm0) at (0,0) {\scriptsize{Trm}}; \node [anchor=west,draw,inner sep=4pt,fill=blue!20!white,minimum width=3em] (Trm1) at ([xshift=1em]Trm0.east) {\scriptsize{Trm}}; \node [anchor=west,draw,inner sep=4pt,fill=blue!20!white,minimum width=3em] (Trm2) at ([xshift=1em]Trm1.east) {\scriptsize{Trm}}; \node [anchor=west,draw,inner sep=4pt,fill=blue!20!white,minimum width=3em] (Trm3) at ([xshift=1em]Trm2.east) {\scriptsize{Trm}}; \node [anchor=west,inner sep=4pt] (sep) at ([xshift=1em]Trm3.east) {\scriptsize{...}}; \node [anchor=west,draw,inner sep=4pt,fill=blue!20!white,minimum width=3em] (Trm4) at ([xshift=1em]sep.east) {\scriptsize{Trm}}; \node [anchor=south,draw,inner sep=4pt,fill=blue!20!white,minimum width=3em] (Trm5) at ([yshift=1em]Trm0.north) {\scriptsize{Trm}}; \node [anchor=west,draw,inner sep=4pt,fill=blue!20!white,minimum width=3em] (Trm6) at ([xshift=1em]Trm5.east) {\scriptsize{Trm}}; \node [anchor=west,draw,inner sep=4pt,fill=blue!20!white,minimum width=3em] (Trm7) at ([xshift=1em]Trm6.east) {\scriptsize{Trm}}; \node [anchor=west,draw,inner sep=4pt,fill=blue!20!white,minimum width=3em] (Trm8) at ([xshift=1em]Trm7.east) {\scriptsize{Trm}}; \node [anchor=west,inner sep=4pt] (sep1) at ([xshift=1em]Trm8.east) {\scriptsize{...}}; \node [anchor=west,draw,inner sep=4pt,fill=blue!20!white,minimum width=3em] (Trm9) at ([xshift=1em]sep1.east) {\scriptsize{Trm}}; \node [rectangle,rounded corners,draw=black!50,densely dashed,inner sep=0.4em] [fit = (Trm0) (Trm4) (Trm5) (Trm9)] (inputshadow) {}; \node [anchor=north,draw,inner sep=4pt,fill=ugreen!20!white,minimum width=2em] (e1) at ([yshift=-1em]Trm0.south) {\scriptsize{$E_1$}}; \node [anchor=north,draw,inner sep=4pt,fill=ugreen!20!white,minimum width=2em] (e2) at ([yshift=-1em]Trm1.south) {\scriptsize{$E_2$}}; \node [anchor=north,draw,inner sep=4pt,fill=ugreen!20!white,minimum width=2em] (e3) at ([yshift=-1em]Trm2.south) {\scriptsize{$E_3$}}; \node [anchor=north,draw,inner sep=4pt,fill=ugreen!20!white,minimum width=2em] (e4) at ([yshift=-1em]Trm3.south) {\scriptsize{$E_4$}}; \node [anchor=north,inner sep=4pt] (sep5) at ([yshift=-1em]sep.south) {\scriptsize{...}}; \node [anchor=north,draw,inner sep=4pt,fill=ugreen!20!white,minimum width=2em] (e5) at ([yshift=-1em]Trm4.south) {\scriptsize{$E_5$}}; \node [anchor=south,draw,inner sep=4pt,fill=yellow!30,minimum width=2em] (t1) at ([yshift=1em]Trm5.north) {\scriptsize{$T_1$}}; \node [anchor=south,draw,inner sep=4pt,fill=yellow!30,minimum width=2em] (t2) at ([yshift=1em]Trm6.north) {\scriptsize{$T_2$}}; \node [anchor=south,draw,inner sep=4pt,fill=yellow!30,minimum width=2em] (t3) at ([yshift=1em]Trm7.north) {\scriptsize{$T_3$}}; \node [anchor=south,draw,inner sep=4pt,fill=yellow!30,minimum width=2em] (t4) at ([yshift=1em]Trm8.north) {\scriptsize{$T_4$}}; \node [anchor=south,inner sep=4pt] (sep6) at ([yshift=1em]sep1.north) {\scriptsize{...}}; \node [anchor=south,draw,inner sep=4pt,fill=yellow!30,minimum width=2em] (t5) at ([yshift=1em]Trm9.north) {\scriptsize{$T_5$}}; \draw [->] ([yshift=0.1em]e1.north) -- ([yshift=-0.1em]Trm0.south); \draw [->] ([yshift=0.1em]e1.north) -- ([yshift=-0.1em]Trm1.south); \draw [->] ([yshift=0.1em]e1.north) -- ([yshift=-0.1em]Trm2.south); \draw [->] ([yshift=0.1em]e1.north) -- ([yshift=-0.1em]Trm3.south); \draw [->] ([yshift=0.1em]e1.north) -- ([yshift=-0.1em]Trm4.south); \draw [->] ([yshift=0.1em]e2.north) -- ([yshift=-0.1em]Trm1.south); \draw [->] ([yshift=0.1em]e3.north) -- ([yshift=-0.1em]Trm2.south); \draw [->] ([yshift=0.1em]e3.north) -- ([yshift=-0.1em]Trm3.south); \draw [->] ([yshift=0.1em]e3.north) -- ([yshift=-0.1em]Trm4.south); \draw [->] ([yshift=0.1em]e4.north) -- ([yshift=-0.1em]Trm3.south); \draw [->] ([yshift=0.1em]e5.north) -- ([yshift=-0.1em]Trm4.south); \draw [->] ([yshift=0.1em]Trm0.north) -- ([yshift=-0.1em]Trm5.south); \draw [->] ([yshift=0.1em]Trm0.north) -- ([yshift=-0.1em]Trm6.south); \draw [->] ([yshift=0.1em]Trm0.north) -- ([yshift=-0.1em]Trm7.south); \draw [->] ([yshift=0.1em]Trm0.north) -- ([yshift=-0.1em]Trm8.south); \draw [->] ([yshift=0.1em]Trm0.north) -- ([yshift=-0.1em]Trm9.south); \draw [->] ([yshift=0.1em]Trm1.north) -- ([yshift=-0.1em]Trm6.south); \draw [->] ([yshift=0.1em]Trm2.north) -- ([yshift=-0.1em]Trm7.south); \draw [->] ([yshift=0.1em]Trm2.north) -- ([yshift=-0.1em]Trm8.south); \draw [->] ([yshift=0.1em]Trm2.north) -- ([yshift=-0.1em]Trm9.south); \draw [->] ([yshift=0.1em]Trm3.north) -- ([yshift=-0.1em]Trm8.south); \draw [->] ([yshift=0.1em]Trm4.north) -- ([yshift=-0.1em]Trm9.south); \draw [->] ([yshift=0.1em]Trm5.north) -- ([yshift=-0.1em]t1.south); \draw [->] ([yshift=0.1em]Trm6.north) -- ([yshift=-0.1em]t2.south); \draw [->] ([yshift=0.1em]Trm7.north) -- ([yshift=-0.1em]t3.south); \draw [->] ([yshift=0.1em]Trm8.north) -- ([yshift=-0.1em]t4.south); \draw [->] ([yshift=0.1em]Trm9.north) -- ([yshift=-0.1em]t5.south); \end{scope} \end{tikzpicture} \end{center} \end{frame} %%%------------------------------------------------------------------------------------------------------------ %%% ��ǿ��ı�ʾģ�� - BERT \begin{frame}{��ǿ�ı�ʾģ�� - BERT} \begin{itemize} \item \textbf{BERT}( Bidirectional Encoder Representations from Transformers)������dz��𱬵ı�ʾģ�� \begin{itemize} \item ��Ȼ����Transformer���ǿ������������˵�������(���ԶԱ�GPT) \item ʹ����Mask����������ѵ���õ�ģ�͵Ľ�׳�ԣ��������������Ϊ��Ԥѵ����ʾģ�͵��·�ʽ \end{itemize} \end{itemize} \vspace{0.5em} \begin{center} \begin{tikzpicture} \begin{scope}[scale=1.2] \node [anchor=west,draw,inner sep=4pt,fill=blue!20!white,minimum width=3em] (Trm0) at (0,0) {\scriptsize{Trm}}; \node [anchor=west,draw,inner sep=4pt,fill=blue!20!white,minimum width=3em] (Trm1) at ([xshift=1em]Trm0.east) {\scriptsize{Trm}}; \node [anchor=west,draw,inner sep=4pt,fill=blue!20!white,minimum width=3em] (Trm2) at ([xshift=1em]Trm1.east) {\scriptsize{Trm}}; \node [anchor=west,draw,inner sep=4pt,fill=blue!20!white,minimum width=3em] (Trm3) at ([xshift=1em]Trm2.east) {\scriptsize{Trm}}; \node [anchor=west,inner sep=4pt] (sep) at ([xshift=1em]Trm3.east) {\scriptsize{...}}; \node [anchor=west,draw,inner sep=4pt,fill=blue!20!white,minimum width=3em] (Trm4) at ([xshift=1em]sep.east) {\scriptsize{Trm}}; \node [anchor=south,draw,inner sep=4pt,fill=blue!20!white,minimum width=3em] (Trm5) at ([yshift=1em]Trm0.north) {\scriptsize{Trm}}; \node [anchor=west,draw,inner sep=4pt,fill=blue!20!white,minimum width=3em] (Trm6) at ([xshift=1em]Trm5.east) {\scriptsize{Trm}}; \node [anchor=west,draw,inner sep=4pt,fill=blue!20!white,minimum width=3em] (Trm7) at ([xshift=1em]Trm6.east) {\scriptsize{Trm}}; \node [anchor=west,draw,inner sep=4pt,fill=blue!20!white,minimum width=3em] (Trm8) at ([xshift=1em]Trm7.east) {\scriptsize{Trm}}; \node [anchor=west,inner sep=4pt] (sep1) at ([xshift=1em]Trm8.east) {\scriptsize{...}}; \node [anchor=west,draw,inner sep=4pt,fill=blue!20!white,minimum width=3em] (Trm9) at ([xshift=1em]sep1.east) {\scriptsize{Trm}}; \node [rectangle,rounded corners,draw=black!50,densely dashed,inner sep=0.4em] [fit = (Trm0) (Trm4) (Trm5) (Trm9)] (inputshadow) {}; \node [anchor=north,draw,inner sep=4pt,fill=ugreen!20!white,minimum width=2em] (e1) at ([yshift=-1em]Trm0.south) {\scriptsize{$E_1$}}; \node [anchor=north,draw,inner sep=4pt,fill=ugreen!20!white,minimum width=2em] (e2) at ([yshift=-1em]Trm1.south) {\scriptsize{$E_2$}}; \node [anchor=north,draw,inner sep=4pt,fill=ugreen!20!white,minimum width=2em] (e3) at ([yshift=-1em]Trm2.south) {\scriptsize{$E_3$}}; \node [anchor=north,draw,inner sep=4pt,fill=ugreen!20!white,minimum width=2em] (e4) at ([yshift=-1em]Trm3.south) {\scriptsize{$E_4$}}; \node [anchor=north,inner sep=4pt] (sep5) at ([yshift=-1em]sep.south) {\scriptsize{...}}; \node [anchor=north,draw,inner sep=4pt,fill=ugreen!20!white,minimum width=2em] (e5) at ([yshift=-1em]Trm4.south) {\scriptsize{$E_5$}}; \node [anchor=south,draw,inner sep=4pt,fill=yellow!30,minimum width=2em] (t1) at ([yshift=1em]Trm5.north) {\scriptsize{$T_1$}}; \node [anchor=south,draw,inner sep=4pt,fill=yellow!30,minimum width=2em] (t2) at ([yshift=1em]Trm6.north) {\scriptsize{$T_2$}}; \node [anchor=south,draw,inner sep=4pt,fill=yellow!30,minimum width=2em] (t3) at ([yshift=1em]Trm7.north) {\scriptsize{$T_3$}}; \node [anchor=south,draw,inner sep=4pt,fill=yellow!30,minimum width=2em] (t4) at ([yshift=1em]Trm8.north) {\scriptsize{$T_4$}}; \node [anchor=south,inner sep=4pt] (sep6) at ([yshift=1em]sep1.north) {\scriptsize{...}}; \node [anchor=south,draw,inner sep=4pt,fill=yellow!30,minimum width=2em] (t5) at ([yshift=1em]Trm9.north) {\scriptsize{$T_5$}}; \draw [->] ([yshift=0.1em]e1.north) -- ([yshift=-0.1em]Trm0.south); \draw [->] ([yshift=0.1em]e1.north) -- ([yshift=-0.1em]Trm1.south); \draw [->] ([yshift=0.1em]e1.north) -- ([yshift=-0.1em]Trm2.south); \draw [->] ([yshift=0.1em]e1.north) -- ([yshift=-0.1em]Trm3.south); \draw [->] ([yshift=0.1em]e1.north) -- ([yshift=-0.1em]Trm4.south); \draw [->] ([yshift=0.1em]e2.north) -- ([yshift=-0.1em]Trm1.south); \draw [->] ([yshift=0.1em]e3.north) -- ([yshift=-0.1em]Trm0.south); \draw [->] ([yshift=0.1em]e3.north) -- ([yshift=-0.1em]Trm1.south); \draw [->] ([yshift=0.1em]e3.north) -- ([yshift=-0.1em]Trm2.south); \draw [->] ([yshift=0.1em]e3.north) -- ([yshift=-0.1em]Trm3.south); \draw [->] ([yshift=0.1em]e3.north) -- ([yshift=-0.1em]Trm4.south); \draw [->] ([yshift=0.1em]e4.north) -- ([yshift=-0.1em]Trm3.south); \draw [->] ([yshift=0.1em]e5.north) -- ([yshift=-0.1em]Trm4.south); \draw [->] ([yshift=0.1em]Trm0.north) -- ([yshift=-0.1em]Trm5.south); \draw [->] ([yshift=0.1em]Trm0.north) -- ([yshift=-0.1em]Trm6.south); \draw [->] ([yshift=0.1em]Trm0.north) -- ([yshift=-0.1em]Trm7.south); \draw [->] ([yshift=0.1em]Trm0.north) -- ([yshift=-0.1em]Trm8.south); \draw [->] ([yshift=0.1em]Trm0.north) -- ([yshift=-0.1em]Trm9.south); \draw [->] ([yshift=0.1em]Trm1.north) -- ([yshift=-0.1em]Trm6.south); \draw [->] ([yshift=0.1em]Trm2.north) -- ([yshift=-0.1em]Trm5.south); \draw [->] ([yshift=0.1em]Trm2.north) -- ([yshift=-0.1em]Trm6.south); \draw [->] ([yshift=0.1em]Trm2.north) -- ([yshift=-0.1em]Trm7.south); \draw [->] ([yshift=0.1em]Trm2.north) -- ([yshift=-0.1em]Trm8.south); \draw [->] ([yshift=0.1em]Trm2.north) -- ([yshift=-0.1em]Trm9.south); \draw [->] ([yshift=0.1em]Trm3.north) -- ([yshift=-0.1em]Trm8.south); \draw [->] ([yshift=0.1em]Trm4.north) -- ([yshift=-0.1em]Trm9.south); \draw [->] ([yshift=0.1em]Trm5.north) -- ([yshift=-0.1em]t1.south); \draw [->] ([yshift=0.1em]Trm6.north) -- ([yshift=-0.1em]t2.south); \draw [->] ([yshift=0.1em]Trm7.north) -- ([yshift=-0.1em]t3.south); \draw [->] ([yshift=0.1em]Trm8.north) -- ([yshift=-0.1em]t4.south); \draw [->] ([yshift=0.1em]Trm9.north) -- ([yshift=-0.1em]t5.south); \end{scope} \end{tikzpicture} \end{center} \end{frame} %%%------------------------------------------------------------------------------------------------------------ %%% Ԥѵ�� \begin{frame}{Ԥѵ��} \begin{itemize} \item ����ģ�Ϳ���ʹ�ô����ޱ�ע���ݽ���ѵ�����õ���ģ�Ϳ��Ա�ֱ����������ϵͳ�������е���������Ϊ�� \begin{center} \begin{tikzpicture} \node [anchor=south,minimum width=17em,fill=red!20!white] (encoder) at (0,0) {Encoder (����ģ��Ԥ��ѵ��)}; \node [anchor=south,minimum width=17em,fill=blue!20!white] (decoder) at (encoder.north) {Decoder (Ŀ����������ѵ��)}; \end{tikzpicture} \end{center} \item<2-> �������˷dz��𱬵�\alert{��ʽ}�����ģ����ģ��pre-training + Ŀ������fine-tuning \begin{itemize} \item ����NLP�����Ա�����Ϊ���Խ�ģ�����ⲿѵ���õ�������ģ����Ϊģ�����Ŀ��ϵͳ��(������ʼ��) \end{itemize} \end{itemize} \visible<2->{ \begin{center} \begin{tikzpicture} \begin{scope} \node [anchor=west,draw,thick,minimum width=4em,minimum height=1.7em,fill=blue!20] (encoder) at (0,0) {ģ��}; \node [anchor=south,minimum width=4em,minimum height=1.7em] (space) at ([yshift=0.3em]encoder.north) {\footnotesize{Ŀ��ϵͳ}}; \begin{pgfonlayer}{background} \node [rectangle,draw,thick,fill=red!20] [fit = (encoder) (space)] (system) {}; \end{pgfonlayer} \node [anchor=north] (data) at ([yshift=-1em]system.south) {\scriptsize{\textbf{Ŀ�������б�ע����}}}; \draw [->,thick] (data.north) -- ([yshift=-0.1em]system.south); \node [anchor=north] (label) at ([yshift=-0em]data.south) {\scriptsize{(a) standard method}}; \end{scope} \begin{scope}[xshift=2.8in] \node [anchor=west,draw,dashed,thick,minimum width=4em,minimum height=1.7em,fill=blue!20] (encoder) at (0,0) {ģ��}; \node [anchor=south,minimum width=4em,minimum height=1.7em] (space) at ([yshift=0.3em]encoder.north) {\footnotesize{Ŀ��ϵͳ}}; \node [anchor=center,draw,thick,minimum width=4em,minimum height=1.7em,fill=green!20] (encoderpre) at ([xshift=-7em]encoder.center) {\footnotesize{����ģ��}}; \draw [->,thick] (encoderpre.east) -- (encoder.west); \begin{pgfonlayer}{background} \node [rectangle,draw,thick,fill=red!20] [fit = (encoder) (space)] (system) {}; \end{pgfonlayer} \node [anchor=north] (data) at ([yshift=-1em]system.south) {\scriptsize{\textbf{Ŀ�������б�ע����}}}; \draw [->,thick] (data.north) -- ([yshift=-0.1em]system.south); \node [anchor=north] (data2) at ([yshift=-1em,xshift=-7em]system.south) {\scriptsize{\textbf{���ģ�ޱ�ע����}}}; \draw [->,thick] (data2.north) -- ([yshift=-0.1em]encoderpre.south); \node [anchor=north] (label) at ([yshift=-0em,xshift=-4em]data.south) {\scriptsize{(b) pre-training + fine-tuning}}; \end{scope} \end{tikzpicture} \end{center} } \end{frame} %%%------------------------------------------------------------------------------------------------------------ %%% Ԥѵ����������˼· \begin{frame}{Ԥѵ����������˼·} \begin{itemize} \item Ԥѵ��ģ��ˢ����������ͬʱ��������һЩ˼����\\ Ԥѵ�����������Ǵ�����ʲô�� \begin{itemize} \item �б�ע���������ޣ�Ԥѵ���ṩʹ�ó����ģ���ݵķ��� \item �Ӵ��ģ�ޱ�ע������ѧϰͨ��֪ʶ�������������� \item �����縴���Ҳ�����ѵ����Ԥѵ������ʹģ��ע���ʽ�ĸ��ܶ����� \end{itemize} \end{itemize} \visible<2->{ \begin{center} \begin{tikzpicture} \draw[name path=ellipse,thick] (0,0) circle[x radius = 2, y radius = 1]; \node[rectangle,minimum size=0.1em,inner sep=2pt,fill=red] (p1) at (0.2,0.5) {}; \node[rectangle,minimum size=0.1em,inner sep=2pt,fill=red] (p2) at (0.3,0.6) {}; \node[rectangle,minimum size=0.1em,inner sep=2pt,fill=red] (p3) at (0.1,-0.1) {}; \node[rectangle,minimum size=0.1em,inner sep=2pt,fill=red] (p4) at (0.4,0) {}; \node[rectangle,minimum size=0.1em,inner sep=2pt,fill=red] (p5) at (0.5,0.3) {}; \node[rectangle,minimum size=0.1em,inner sep=2pt,fill=red] (p6) at (0.6,0.1) {}; \node[rectangle,minimum size=0.1em,inner sep=2pt,fill=red] (p7) at (0.7,-0.1) {}; \node[rectangle,minimum size=0.1em,inner sep=2pt,fill=red] (p8) at (-1.2,0.4) {}; \node[rectangle,minimum size=0.1em,inner sep=2pt,fill=red] (p9) at (-1.0,-0.3) {}; \node[rectangle,minimum size=0.1em,inner sep=2pt,fill=red] (p10) at (-0.1,-0.8) {}; \begin{pgfonlayer}{background} \visible<4->{ \node [rectangle,inner sep=0.4em,draw,blue] [fit = (p1) (p2) (p3) (p4) (p5) (p6)] (area) {}; } \end{pgfonlayer} \draw [->] (2.5,-0.7) -- (1.8,-0.5) node [pos=0,right] {\scriptsize{ģ�Ͳ�����ռ�}}; \visible<4->{ \draw [->] (2.0,0.7) -- (area.20) node [pos=0,right] {\scriptsize{���ʽ���ܶ�����(Ԥѵ��)}}; } \visible<3->{ \draw [->] (-2.0,0.7) -- (p8.west) node [pos=0,left] {\scriptsize{����Ľ�}}; } \end{tikzpicture} \end{center} } \begin{itemize} \item<5-> ���������е�Ԥѵ�� \begin{itemize} \item ����������Ԥѵ����û������һ�������ںܶ������������ѵ������������С����һ����Ҳ��Ӧ�������˫�ィģ��Ԥѵ��Ҳ������µ�Ҫ�� \end{itemize} \end{itemize} \end{frame} %%%------------------------------------------------------------------------------------------------------------ %%% �ܽ� \begin{frame}{�ܽ� - ����һ����} \begin{itemize} \item ���˺ܶ࣬��ѽ�ۣ�������һ����Ҫ�۵� \begin{itemize} \item ������û����ô���ӣ����Ų����� \item ������ṹ������ϳ�ǿ���ģ�� \item ����ģ�Ϳ�����������ʵ�֣�Ч���ܺã�������ֵ�Ԥѵ���ȷ�ʽ֤����������ģ�͵�DZ�� \end{itemize} \item<2-> ��Ȼ�кܶ�������Ҫ���� \begin{itemize} \item ������������ṹ(����NLP)\\ googleһ��LSTM��GRU��CNN \item ���ģ�ͺ�ѵ�����������ѧϰ�������``��''?\\ ���������Դ���ʲô��\\ �����Ч��ѵ�����ģ�ͣ� \item ��ΰ����������ڰ��������������ڵ�����NLP����\\ ����encoder-decoder��� \item ���ѧϰ��ʵ������\\ ``������''�˽��£���Ϊ��ͬ������κ�ģ����ƶ��м���\\ ... \end{itemize} \end{itemize} \end{frame} %%%------------------------------------------------------------------------------------------------------------ %%% last slide \begin{frame}{�ֽ���һ������~} \vspace{2em} \begin{center} \textbf{���ݺܶ࣬�����˸�ͷ}\\ \textbf{ѧϰ���ѧϰ������Ҫʵ���;���Ļ��ۣ�} \vspace{2em} \begin{tikzpicture} \tikzstyle{rnnnode} = [draw,inner sep=5pt,minimum width=4em,minimum height=1.5em,fill=green!30!white,blur shadow={shadow xshift=1pt,shadow yshift=-1pt}] \node [anchor=west,rnnnode] (node11) at (0,0) {\tiny{RNN Cell}}; \node [anchor=west,rnnnode] (node12) at ([xshift=2em]node11.east) {\tiny{RNN Cell}}; \node [anchor=west,rnnnode] (node13) at ([xshift=2em]node12.east) {\tiny{RNN Cell}}; \node [anchor=west,rnnnode] (node14) at ([xshift=2em]node13.east) {\tiny{RNN Cell}}; \node [anchor=north,rnnnode,fill=red!30!white] (e1) at ([yshift=-1.2em]node11.south) {\tiny{embedding}}; \node [anchor=north,rnnnode,fill=red!30!white] (e2) at ([yshift=-1.2em]node12.south) {\tiny{embedding}}; \node [anchor=north,rnnnode,fill=red!30!white] (e3) at ([yshift=-1.2em]node13.south) {\tiny{embedding}}; \node [anchor=north,rnnnode,fill=red!30!white] (e4) at ([yshift=-1.2em]node14.south) {\tiny{embedding}}; \node [anchor=north] (w1) at ([yshift=-1em]e1.south) {\footnotesize{$<$s$>$}}; \node [anchor=north] (w2) at ([yshift=-1em]e2.south) {\footnotesize{лл}}; \node [anchor=north] (w3) at ([yshift=-1em]e3.south) {\footnotesize{���}}; \node [anchor=north] (w4) at ([yshift=-1em]e4.south) {\footnotesize{����}}; \draw [->,thick] ([yshift=0.1em]w1.north)--([yshift=-0.1em]e1.south); \draw [->,thick] ([yshift=0.1em]w2.north)--([yshift=-0.1em]e2.south); \draw [->,thick] ([yshift=0.1em]w3.north)--([yshift=-0.1em]e3.south); \draw [->,thick] ([yshift=0.1em]w4.north)--([yshift=-0.1em]e4.south); \draw [->,thick] ([yshift=0.1em]e1.north)--([yshift=-0.1em]node11.south); \draw [->,thick] ([yshift=0.1em]e2.north)--([yshift=-0.1em]node12.south); \draw [->,thick] ([yshift=0.1em]e3.north)--([yshift=-0.1em]node13.south); \draw [->,thick] ([yshift=0.1em]e4.north)--([yshift=-0.1em]node14.south); \node [anchor=south,rnnnode,fill=red!30!white] (node21) at ([yshift=1.0em]node11.north) {\tiny{Softmax($\cdot$)}}; \node [anchor=south,rnnnode,fill=red!30!white] (node22) at ([yshift=1.0em]node12.north) {\tiny{Softmax($\cdot$)}}; \node [anchor=south,rnnnode,fill=red!30!white] (node23) at ([yshift=1.0em]node13.north) {\tiny{Softmax($\cdot$)}}; \node [anchor=south,rnnnode,fill=red!30!white] (node24) at ([yshift=1.0em]node14.north) {\tiny{Softmax($\cdot$)}}; \node [anchor=south] (output1) at ([yshift=1em]node21.north) {\Large{\textbf{лл}}}; \node [anchor=south] (output2) at ([yshift=1em]node22.north) {\Large{\textbf{���}}}; \node [anchor=south] (output3) at ([yshift=1em]node23.north) {\Large{\textbf{����}}}; \node [anchor=south] (output4) at ([yshift=1em]node24.north) {\Large{\textbf{$<$/s$>$}}}; \draw [->,thick] ([yshift=0.1em]node21.north)--([yshift=-0.1em]output1.south); \draw [->,thick] ([yshift=0.1em]node22.north)--([yshift=-0.1em]output2.south); \draw [->,thick] ([yshift=0.1em]node23.north)--([yshift=-0.1em]output3.south); \draw [->,thick] ([yshift=0.1em]node24.north)--([yshift=-0.1em]output4.south); \draw [->,thick] ([yshift=0.1em]node11.north)--([yshift=-0.1em]node21.south); \draw [->,thick] ([yshift=0.1em]node12.north)--([yshift=-0.1em]node22.south); \draw [->,thick] ([yshift=0.1em]node13.north)--([yshift=-0.1em]node23.south); \draw [->,thick] ([yshift=0.1em]node14.north)--([yshift=-0.1em]node24.south); \draw [->,thick] ([xshift=-1em]node11.west)--([xshift=-0.1em]node11.west); \draw [->,thick] ([xshift=0.1em]node11.east)--([xshift=-0.1em]node12.west); \draw [->,thick] ([xshift=0.1em]node12.east)--([xshift=-0.1em]node13.west); \draw [->,thick] ([xshift=0.1em]node13.east)--([xshift=-0.1em]node14.west); \draw [->,thick] ([xshift=0.1em]node14.east)--([xshift=1em]node14.east); \end{tikzpicture} \end{center} \end{frame} \end{CJK} \end{document}