Commit caac3b7a by xiaotong

tensor of neural networks

parent 1037ae94
......@@ -102,588 +102,56 @@
\section{为什么要谈神经网络}
%%%------------------------------------------------------------------------------------------------------------
\subsection{数学基础:张量计算}
\subsection{神经网络的简单实现:张量计算}
%%%------------------------------------------------------------------------------------------------------------
%%% 神经网络的作用
\begin{frame}{神经网络:线性变换 + 激活函数}
%%% 何为张量
\begin{frame}{如何描述神经网络 - 张量计算}
\begin{itemize}
\item 对于向量$\textbf{x} \in \mathbb{R}^m$,一层神经网络首先把他经过\textbf{\alert{线性变换}}映射到$\mathbb{R}^m$,之后经过\textbf{{\color{blue}激活函数}}变换成$\textbf{y} \in \mathbb{R}^n$
\item 对于神经网络,输入$\textbf{x}$和输出$\textbf{y}$的形式并不仅仅是向量
\end{itemize}
\vspace{1em}
\begin{center}
\begin{tikzpicture}
\node [anchor=center] (y) at (0,0) {\Large{$\textbf{y}$}};
\node [anchor=west] (eq) at (y.east) {\Large{$=$}};
\node [anchor=west] (func) at (eq.east) {\Large{$f$}};
\node [anchor=west] (brace01) at (func.east) {\Large{$($}};
\node [anchor=west] (x) at (brace01.east) {\Large{$\textbf{x}$}};
\node [anchor=west] (dot) at (x.east) {\Large{$\cdot$}};
\node [anchor=west] (w) at (dot.east) {\Large{$\textbf{w}$}};
\node [anchor=west] (plus) at (w.east) {\Large{$+$}};
\node [anchor=west] (b) at (plus.east) {\Large{$\textbf{b}$}};
\node [anchor=west] (brace02) at (b.east) {\Large{$)$}};
\node [anchor=center,fill=blue!20] (func2) at (func) {\LARGE{$f$}};
\node [anchor=north] (funclabel) at ([yshift=-1.1em]func.south) {\blue{激活函数}};
\draw [<-] ([yshift=-0.2em]func2.south) -- (funclabel.north);
\begin{pgfonlayer}{background}
\node [rectangle,inner sep=0.2em,fill=red!20] [fit = (x) (w) (b)] (linear) {};
\node [anchor=north] (linearlabel) at ([yshift=-1.1em]linear.south) {\alert{线性变换}};
\draw [<-] ([yshift=-0.2em]linear.south) -- (linearlabel.north);
\end{pgfonlayer}
\end{tikzpicture}
\begin{figure}[htp!]
\includegraphics[scale=0.24]{./Figures/wf.png}
% \begin{tikzpicture}
% \node [rectangle,inner sep=0.2em,fill=red!20] [fit = (x) (w) (b)] (linear) {};
% \node [anchor=north] (linearlabel) at ([yshift=-1.1em]linear.south) {\alert{线性变换}}
\end{figure}
\tikz {\node () at (0,0) {}; \node () at (0,10) {};}
\end{center}
\end{frame}
%%%------------------------------------------------------------------------------------------------------------
%%% 线性变换
\begin{frame}{线性变换}
\begin{itemize}
\item 对于线性空间$V$,任意$\textbf{a}$$\textbf{b} \in V$和数域中的任意$\alpha$,线性变换$T(\cdot)$需满足
\begin{eqnarray}
T(\textbf{a} + \textbf{b}) & = & T(\textbf{a}) + T(\textbf{b}) \nonumber \\
T(\alpha \textbf{a}) & = & \alpha T(\textbf{a}) \nonumber
\end{eqnarray}
\item<2-> 线性变换的一种几何解释:
\end{itemize}
\vspace{-0.5em}
\visible<2->{
\begin{center}
\begin{tikzpicture}
\node [anchor=west] (x) at (0,0) {\Large{$\textbf{x}$}};
\node [anchor=west] (dot) at (x.east) {\Large{$\cdot$}};
\node [anchor=west] (w) at (dot.east) {\Large{$\textbf{w}$}};
\node [anchor=west] (plus) at (w.east) {\Large{$+$}};
\node [anchor=west] (b) at (plus.east) {\Large{$\textbf{b}$}};
\tikzstyle{neuron} = [rectangle,draw,thick,fill=red!30,red!35,minimum height=2em,minimum width=2em,font=\small]
\node[neuron,anchor=north] (a1) at ([xshift=-6em,yshift=-4em]x.south) {};
\draw[->,thick] ([xshift=-2em,yshift=0em]a1.south) to ([xshift=3em,yshift=0em]a1.south);
\draw[->,thick] ([xshift=0em,yshift=-4em]a1.west) to ([xshift=0em,yshift=2em]a1.west);
\node[below] at ([xshift=0.5em,yshift=-1em]a1.west){0};
\node[below] at ([xshift=2em,yshift=-1em]a1.west){1};
\node[below] at ([xshift=-0.5em,yshift=2em]a1.west){1};
\node [anchor=west] (x) at ([xshift=-0.7em,yshift=1em]a1.south) {\Large{$\textbf{F}$}};
\visible<3->{
\node [anchor=center,fill=green!20] (w2) at (w) {\Large{$\textbf{w}$}};
\node [anchor=north,inner sep=1pt] (wlabel) at ([yshift=-0.7em]w.south) {\small{旋转(rotation)}};
\draw [<-] ([yshift=-0.2em]w2.south) -- (wlabel.north);
\tikzstyle{neuron} = [rectangle,draw,thick,fill=red!30,red!35,minimum height=2em,minimum width=2em,font=\small]
\node[neuron,anchor=north] (a2) at ([xshift=10em,yshift=0em]a1.south) {};
\draw[->,thick] ([xshift=-2em,yshift=0em]a2.north) to ([xshift=3em,yshift=0em]a2.north);
\draw[->,thick] ([xshift=0em,yshift=-2em]a2.west) to ([xshift=0em,yshift=4em]a2.west);
\node[above] at ([xshift=0.5em,yshift=1em]a2.west){0};
\node[above] at ([xshift=2em,yshift=1em]a2.west){1};
\node[below] at ([xshift=-0.5em,yshift=0em]a2.west){-1};
\node [anchor=west] (x) at ([xshift=-3.5cm,yshift=2em]a2.north) {\scriptsize{
$w=\begin{bmatrix}
1&0&0\\
0&-1&0\\
0&0&1
\end{bmatrix}$}
};
\node [anchor=west,rotate = 180] (x) at ([xshift=0.7em,yshift=1em]a2.south) {\Large{$\textbf{F}$}};
\draw[-stealth, line width=2pt,dashed] ([xshift=4em,yshift=0em]a1.south) to ([xshift=-3em,yshift=0em]a2.north);
}
\visible<4->{
\node [anchor=center,fill=purple!20] (b2) at (b) {\Large{$\textbf{b}$}};
\node [anchor=west] (blabel) at ([xshift=1.5em]b2.east) {平移(shift)};
\draw [<-] ([xshift=0.2em]b2.east) -- (blabel.west);
\tikzstyle{neuron} = [rectangle,draw,thick,fill=red!30,red!35,minimum height=2em,minimum width=2em,font=\small]
\node[neuron,anchor=north] (a3) at ([xshift=11em,yshift=2.05em]a2.south) {};
\draw[->,thick] ([xshift=-3em,yshift=0em]a3.north) to ([xshift=2em,yshift=0em]a3.north);
\draw[->,thick] ([xshift=-1em,yshift=-2em]a3.west) to ([xshift=-1em,yshift=4em]a3.west);
\node[above] at ([xshift=-0.5em,yshift=1em]a3.west){0};
\node[above] at ([xshift=1em,yshift=1em]a3.west){1};
\node[left] at ([xshift=-0.75em,yshift=-0.5em]a3.west){-1};
\node [anchor=west,rotate = 180] (x) at ([xshift=0.7em,yshift=1em]a3.south) {\Large{$\textbf{F}$}};
\node [anchor=west] (x) at ([xshift=-4cm,yshift=2em]a3.north) {\scriptsize{
$b=\begin{bmatrix}
0.5&0&0\\
0&0&0\\
0&0&0
\end{bmatrix}$}
};
\draw[-stealth, line width=2pt,dashed] ([xshift=3em,yshift=1em]a2.east) to ([xshift=-3em,yshift=1em]a3.west);
}
\end{tikzpicture}
\end{center}
}
\end{frame}
%%%------------------------------------------------------------------------------------------------------------
%%% 线性变换:更复杂的实例
\begin{frame}[fragile]{线性变换(续)}
\begin{itemize}
\item 线性变换也适用于更加复杂的情况,这也给神经网络提供了拟合不同数据分布的能力
\begin{itemize}
\item 比如,我们可以把三维图形投影到二维平面上
\item 再比如,我们也可以把二维平面上的图形映射到三维平面上
\end{itemize}
\end{itemize}
\vspace{1em}
\newcommand{\plane}[1]{
(-1.95, #1, 1.35) --
++(3.6, 0.6, 0.0) --
++(0.3, -1.8, -2.7) --
++(-3.6, -0.6, -0.0) --
cycle}
\newcommand{\nullspacepicture}{
% bottom part of the row space line
\draw (0,0,0) -- (0.3,-1.8,1.233);
% five planes
\draw[fill=gray!20]\plane{-0.2};
\draw[fill=gray!20]\plane{0.2};
\draw[fill=blue!70!gray]\plane{0.6};
\draw[fill=gray!20]\plane{1};
\draw[fill=gray!20]\plane{1.4};
% top part of the row space line
\draw (-.094,.562,-.385) -- (-0.3,1.8,-1.233);
}
\newcommand{\rangepicture}[1]{
% axes
\draw[help lines,->] (-2,0) -- (2,0);
\draw[help lines,->] (0,-2) -- (0,2);
% the line and circles
\draw (1,-2) -- (-1,2);
\draw[fill=#1] (0,0) circle (2.5pt);
\draw[fill=gray!50] (0.2,-0.4) circle (2.5pt);
\draw[fill=gray!50] (0.4,-0.8) circle (2.5pt);
\draw[fill=gray!50] (-0.2,0.4) circle (2.5pt);
\draw[fill=gray!50] (-0.4,0.8) circle (2.5pt);
}
\begin{tikzpicture}[scale=0.95]
\centering
\nullspacepicture
% the label
\node at (-2,1.8) {$\mathbb{R}^3$};
% arrow between diagrams
\path[->] (3,0) edge[bend left] node[above] {线性变换} (4.5,0);
\begin{scope}[xshift=7cm]
\rangepicture{blue!70!gray}
\node at (1.8,1.8) {$\mathbb{R}^2$};
\end{scope}
\end{tikzpicture}
\end{frame}
%%%------------------------------------------------------------------------------------------------------------
%%% 激活函数
\begin{frame}{激活函数}
\begin{itemize}
\item 激活函数更多地是为了解决实际问题中的\alert{非线性}变换
\begin{itemize}
\item 非线性部分提供了拟合任意函数的能力(稍后介绍)
\end{itemize}
\end{itemize}
\vspace{-1em}
\begin{center}
\begin{tikzpicture}
\begin{scope}
\draw [line width=3pt,ublue,-](0,0) -- (-2.0,1);
\node [anchor=north] (linelabel) at (-1.0,-0.5) {\footnotesize{我是一根筷子}};
\end{scope}
\begin{scope}[xshift=10em]
\draw [line width=3pt,ublue,-,line cap=round](0,0) .. controls (-0.5,-0.25) and (-0.5,1).. (-1.3,0.3) .. controls (-2.3,-0.3) and (-1.1,1.8).. (-2.0,1);
\node [] at (-2,1) {\white{$\cdot$}};
\node [anchor=north] (linelabel) at (-1.0,-0.5) {\footnotesize{我是一只蚯蚓}};
\end{scope}
\end{tikzpicture}
\end{center}
\begin{itemize}
\item<2-> 简单的非线性函数
\end{itemize}
\vspace{-1em}
\node [anchor=center] (y) at (0,0) {\LARGE{$\textbf{y}$}};
\node [anchor=west] (eq) at (y.east) {\LARGE{$=$}};
\node [anchor=west] (func) at (eq.east) {\LARGE{$f$}};
\node [anchor=west] (brace01) at (func.east) {\LARGE{$($}};
\node [anchor=west] (x) at (brace01.east) {\LARGE{$\textbf{x}$}};
\node [anchor=west] (dot) at (x.east) {\LARGE{$\cdot$}};
\node [anchor=west] (w) at (dot.east) {\LARGE{$\textbf{w}$}};
\node [anchor=west] (plus) at (w.east) {\LARGE{$+$}};
\node [anchor=west] (b) at (plus.east) {\LARGE{$\textbf{b}$}};
\node [anchor=west] (brace02) at (b.east) {\LARGE{$)$}};
\visible<2->{
\begin{center}
\begin{tikzpicture}
\begin{scope}[]
\draw [->,thick] (-1.5,0) -- (1.5,0);
\draw [->,thick] (0,-0.1) -- (0,1.5);
\draw [-,very thick,ublue,domain=-1.2:1.2,samples=100] plot (\x,{0.5 * (\x -0.3)^2 + 0.2});
\node [anchor=west] (ylabel) at (0,1.3) {$y$};
\node [anchor=north] (xlabel) at (1.3,0) {$x$};
\node [anchor=north] (func) at (0,-0.8) {\footnotesize{$y = \frac{1}{2} (x - 0.3)^2 + 0.2$}};
\node [anchor=south west] (flabel) at (func.north west) {\footnotesize{Quadratic:}};
\end{scope}
\begin{scope}[xshift=9.5em]
\draw [->,thick] (-1.5,0) -- (1.5,0);
\draw [->,thick] (0,-0.1) -- (0,1.5);
\draw [-,very thick,ublue,domain=-1.2:1.2,samples=100] plot (\x, {0.5 * exp(\x)});
\node [anchor=west] (ylabel) at (0,1.3) {$y$};
\node [anchor=north] (xlabel) at (1.3,0) {$x$};
\node [anchor=north] (func) at (0,-0.8) {\footnotesize{$y = 0.5 \cdot \exp(x)$}};
\node [anchor=south west] (flabel) at ([xshift=-1.8em]func.north west) {\footnotesize{Exponential:}};
\end{scope}
\begin{scope}[xshift=19em]
\draw [->,thick] (-1.5,0) -- (1.5,0);
\draw [->,thick] (0,-0.1) -- (0,1.5);
\draw [-,very thick,ublue,domain=-1.1:1.2,samples=100] plot (\x,{abs(\x -0.2) + 0.1});
\node [anchor=west] (ylabel) at (0,1.3) {$y$};
\node [anchor=north] (xlabel) at (1.3,0) {$x$};
\node [anchor=north] (func) at (0,-0.8) {\footnotesize{$y = |x - 0.3| + 0.1$}};
\node [anchor=south west] (flabel) at ([xshift=-0.4em]func.north west) {\footnotesize{Absolute:}};
\end{scope}
\end{tikzpicture}
\end{center}
}
\end{frame}
%%%------------------------------------------------------------------------------------------------------------
%%% 常用的激活函数
\begin{frame}{常用的激活函数}
\begin{itemize}
\item 好多好多,列举不全 ...
\end{itemize}
\end{frame}
%%%------------------------------------------------------------------------------------------------------------
\subsection{多层神经网络}
%%%------------------------------------------------------------------------------------------------------------
%%% 一层 -> 多层
\begin{frame}{更多的层}
\begin{itemize}
\item \textbf{单层神经网络}:线性变换 + 激活函数(非线性)
\item 我们可以重复上面的过程,构建\textbf{多层神经网络}
\end{itemize}
\vspace{-1.0em}
\begin{center}
\begin{tikzpicture}
\begin{scope}[]
\node [anchor=center,fill=yellow!30] (x2) at (x) {\LARGE{$\textbf{x}$}};
\node [anchor=south] (xlabel) at ([xshift=-3em,yshift=1.5em]x.north) {\alert{向量?矩阵?...}};
\draw [<-] ([yshift=0.2em,xshift=-0.5em]x2.north) -- ([xshift=1em]xlabel.south);
\def\neuronsep{1.6}
\tikzstyle{neuronnode} = [minimum size=1.7em,circle,draw,ublue,very thick,inner sep=1pt, fill=white,align=center,drop shadow={shadow xshift=0.1em,shadow yshift=-0.1em}]
%%% layer 1
\foreach \n in {1,...,5}{
\node [neuronnode] (neuron0\n) at (\n * \neuronsep,0) {\tiny{$f_1$}\\[-1ex] \tiny{$\sum$}};
\draw [-,ublue] (neuron0\n.east) -- (neuron0\n.west);
}
\foreach \n in {1,...,5}{
\foreach \m in {1,...,5}{
\draw [<-] (neuron0\m.south) -- ([yshift=-2em]neuron0\n.south);
}
\node [anchor=north] (x\n) at ([yshift=-2em]neuron0\n.south) {$x_\n$};
\visible<1>{
\draw [<-,thick] ([yshift=1.5em]neuron0\n.north) -- (neuron0\n.north);
\node [anchor=south] (y\n) at ([yshift=1.5em]neuron0\n.north) {$y_\n$};
}
}
\node [anchor=center,fill=red!20] (y2) at (y) {\LARGE{$\textbf{y}$}};
\draw [<-] ([yshift=0.2em,xshift=0.5em]y2.north) -- ([xshift=-1em]xlabel.south);
\node [anchor=west] (w1label) at ([xshift=-0.5em,yshift=0.5em]x5.north east) {$\textbf{w}_1$};
\begin{pgfonlayer}{background}
\node [rectangle,inner sep=0.2em,fill=red!20] [fit = (neuron01) (neuron05)] (layer01) {};
\end{pgfonlayer}
\node [anchor=west] (layer00label) at ([xshift=1.25em]x5.east) {\alert{输入层}};
\visible<2->{
\node [anchor=west] (layer01label) at ([xshift=1em]layer01.east) {第二层};
}
\visible<4->{
\node [anchor=west] (layer01label2) at (layer01label.east) {(\alert{隐层})};
}
%%% layer 2
\visible<2->{
\foreach \n in {2,...,4}{
\node [neuronnode] (neuron1\n) at (\n * \neuronsep,4em) {\tiny{$f_2$}\\[-1ex] \tiny{$\sum$}};
\draw [-,ublue] (neuron1\n.east) -- (neuron1\n.west);
}
\foreach \n in {2,...,4}{
\foreach \m in {1,...,5}{
\draw [<-] (neuron1\n.south) -- (neuron0\m.north);
}
\visible<2>{
\draw [<-,thick] ([yshift=1.5em]neuron1\n.north) -- (neuron1\n.north);
\node [anchor=south] (y\n) at ([yshift=1.5em]neuron1\n.north) {$y_\n$};
}
}
\node [anchor=west] (w2label) at ([xshift=-2.5em,yshift=5.0em]x5.north east) {$\textbf{w}_2$};
\begin{pgfonlayer}{background}
\visible<2->{
\node [rectangle,inner sep=0.2em,fill=ugreen!20] [fit = (neuron12) (neuron14)] (layer02) {};
}
\end{pgfonlayer}
\node [anchor=west] (layer02label) at ([xshift=4.9em]layer02.east) {第三层};
\visible<4->{
\node [anchor=west] (layer02label2) at (layer02label.east) {(\alert{隐层})};
}
}
%%% layer 3
\visible<3->{
\foreach \n in {1,...,5}{
\node [neuronnode] (neuron2\n) at (\n * \neuronsep,8em) {\tiny{$f_3$}\\[-1ex] \tiny{$\sum$}};
\draw [-,ublue] (neuron2\n.east) -- (neuron2\n.west);
}
\foreach \n in {1,...,5}{
\foreach \m in {2,...,4}{
\draw [<-] (neuron2\n.south) -- (neuron1\m.north);
}
\node [anchor=center,fill=green!20] (w2) at (w) {\LARGE{$\textbf{w}$}};
\node [anchor=north] (wlabel) at ([yshift=-1.0em]w.south) {矩阵 e.g.,};
\draw [<-] ([yshift=-0.2em]w2.south) -- (wlabel.north);
\node [anchor=west] (wsample) at ([xshift=-0.5em]wlabel.east) {\footnotesize{$\left(\begin{array}{c c} 1 & 2 \\ 3 & 4 \end{array}\right)$}};
\node [anchor=south] (y\n) at ([yshift=1.5em]neuron2\n.north) {$y_\n$};
\draw [<-,thick] ([yshift=1.5em]neuron2\n.north) -- (neuron2\n.north);
\node [anchor=center,fill=purple!20] (b2) at (b) {\LARGE{$\textbf{b}$}};
\node [anchor=south] (blabel) at ([yshift=1.3em]b.north) {向量 e.g.,};
\draw [<-] ([yshift=0.2em]b2.north) -- (blabel.south);
\node [anchor=west] (bsample) at ([xshift=-0.5em]blabel.east) {\footnotesize{$(1 \ \ 3)$}};
}
\node [anchor=west] (w3label) at ([xshift=-2.5em,yshift=8.5em]x5.north east) {$\textbf{w}_3$};
\begin{pgfonlayer}{background}
\visible<3->{
\node [rectangle,inner sep=0.2em,fill=blue!20] [fit = (neuron21) (neuron25)] (layer03) {};
}
\end{pgfonlayer}
\node [anchor=west] (layer03label) at ([xshift=1em]layer03.east) {第四层};
\visible<4->{
\node [anchor=west] (layer03label2) at (layer03label.east) {(\alert{输出层})};
}
}
\end{scope}
\end{tikzpicture}
\end{center}
\end{frame}
%%%------------------------------------------------------------------------------------------------------------
%%% 两层神经网络可以逼近任何函数
\begin{frame}{多层神经网络可以逼近任意函数}
\begin{itemize}
\item 以一个简单的三层网络为例(隐层激活函数:sigmoid)
\end{itemize}
\begin{center}
\begin{tikzpicture}
%% a two-layer neural network
\begin{scope}
\tikzstyle{neuronnode} = [minimum size=1.7em,circle,draw,ublue,very thick,inner sep=1pt, fill=white,align=center,drop shadow={shadow xshift=0.1em,shadow yshift=-0.1em}]
%% input and hidden layers
\node [neuronnode] (n10) at (0,0) {\tiny{$f$}\\[-1ex] \tiny{$\sum$}};
\node [neuronnode] (n11) at (1.5,0) {\tiny{$f$}\\[-1ex] \tiny{$\sum$}};
\draw [-,ublue] (n10.west) -- (n10.east);
\draw [-,ublue] (n11.west) -- (n11.east);
\node [anchor=north] (x1) at ([yshift=-6em]n11.south) {$x_1$};
\node [anchor=north] (b) at ([yshift=-6em]n10.south) {$b$};
\visible<1-10>{
\draw [->,thick] (b.north) -- ([yshift=-0.1em]n10.south);
\draw [->,thick] (x1.north) -- ([yshift=-0.1em]n10.290);
}
\visible<1>{
\draw [->,thick] (b.north) -- ([yshift=-0.1em]n11.250);
\draw [->,thick] (x1.north) -- ([yshift=-0.1em]n11.south);
}
\visible<11->{
\draw [->,thick,red] (b.north) -- ([yshift=-0.1em]n10.south);
\draw [->,thick,ugreen] (x1.north) -- ([yshift=-0.1em]n10.290);
}
\visible<2->{
\draw [->,thick,blue] (b.north) -- ([yshift=-0.1em]n11.250);
\draw [->,thick,purple] (x1.north) -- ([yshift=-0.1em]n11.south);
}
\visible<15->{
\node [neuronnode] (n12) at (2.7,0) {\tiny{$f$}\\[-1ex] \tiny{$\sum$}};
\node [neuronnode] (n13) at (3.8,0) {\tiny{$f$}\\[-1ex] \tiny{$\sum$}};
\draw [-,ublue] (n12.west) -- (n12.east);
\draw [-,ublue] (n13.west) -- (n13.east);
\draw [->,thick] (b.north) -- ([yshift=-0.1em]n12.250);
\draw [->,thick] (x1.north) -- ([yshift=-0.1em]n12.270);
\draw [->,thick] (b.north) -- ([yshift=-0.1em]n13.230);
\draw [->,thick] (x1.north) -- ([yshift=-0.1em]n13.250);
}
\visible<16->{
\node [anchor=west] (morenodes) at (n13.east) {...};
}
%% output layers
\node [neuronnode] (n20) at (0.75,5em) {\scriptsize{$\sum$}};
\visible<1-10>{\draw [->,thick] ([yshift=0.1em]n10.north) -- ([yshift=-0.1em]n20.250);}
\visible<1-8>{\draw [->,thick] ([yshift=0.1em]n11.north) -- ([yshift=-0.1em]n20.290);}
\visible<11->{\draw [->,thick,brown] ([yshift=0.1em]n10.north) -- ([yshift=-0.1em]n20.250);}
\visible<9->{\draw [->,thick,orange] ([yshift=0.1em]n11.north) -- ([yshift=-0.1em]n20.290);}
\node [] (y) at ([yshift=3em]n20.north) {$y$};
\draw [->,thick] ([yshift=0.1em]n20.north) -- (y.south);
\visible<15->{
\draw [->,thick] ([yshift=0.1em]n12.north) -- ([yshift=-0.1em]n20.310);
\draw [->,thick] ([yshift=0.1em]n13.north) -- ([yshift=-0.1em]n20.330);
}
%% weight and bias
\visible<11->{\node [anchor=center,rotate=90,fill=white,inner sep=1pt] (b0) at ([yshift=3em,xshift=-0.5em]b.north) {\tiny{$b=-6$}};}
\visible<11->{\node [anchor=center,rotate=-59,fill=white,inner sep=1pt] (w2) at ([yshift=1.2em,xshift=-1.2em]x1.north) {\tiny{$w=100$}};}
\visible<2-6>{\node [anchor=center,rotate=59,fill=white,inner sep=1pt] (b1) at ([yshift=4.9em,xshift=2.2em]b.north) {\tiny{$b=0$}};}
\visible<7>{\node [anchor=center,rotate=59,fill=white,inner sep=1pt] (b1) at ([yshift=4.9em,xshift=2.2em]b.north) {\tiny{$b=-2$}};}
\visible<8->{\node [anchor=center,rotate=59,fill=white,inner sep=1pt] (b1) at ([yshift=4.9em,xshift=2.2em]b.north) {\tiny{$b=-4$}};}
\visible<2-4>{\node [anchor=center,rotate=90,fill=white,inner sep=1pt] (w1) at ([yshift=3em,xshift=0.5em]x1.north) {\tiny{$w=1$}};}
\visible<5>{\node [anchor=center,rotate=90,fill=white,inner sep=1pt] (w1) at ([yshift=3em,xshift=0.5em]x1.north) {\tiny{$w=10$}};}
\visible<6->{\node [anchor=center,rotate=90,fill=white,inner sep=1pt] (w1) at ([yshift=3em,xshift=0.5em]x1.north) {\tiny{$w=100$}};}
\visible<11>{\node [anchor=center,rotate=62,fill=white,inner sep=1pt] (w21) at ([yshift=2em,xshift=0.5em]n10.north) {\tiny{$w'=0.7$}};}
\visible<12->{\node [anchor=center,rotate=62,fill=white,inner sep=1pt] (w21) at ([yshift=2em,xshift=0.5em]n10.north) {\tiny{$w'=-0.7$}};}
\visible<2-8>{\node [anchor=center,rotate=-62,fill=white,inner sep=1pt] (w22) at ([yshift=2em,xshift=-0.5em]n11.north) {\tiny{$w'=1$}};}
\visible<9>{\node [anchor=center,rotate=-62,fill=white,inner sep=1pt] (w22) at ([yshift=2em,xshift=-0.5em]n11.north) {\tiny{$w'=0.9$}};}
\visible<10->{\node [anchor=center,rotate=-62,fill=white,inner sep=1pt] (w22) at ([yshift=2em,xshift=-0.5em]n11.north) {\tiny{$w'=0.7$}};}
%% sigmoid box
\begin{scope}
\visible<3->{
\node [anchor=west] (flabel) at ([xshift=1.2in]y.east) {\footnotesize{sigmoid:}};
\node [anchor=north east] (slabel) at ([xshift=0]flabel.south east) {\footnotesize{sum:}};
\node [anchor=west,inner sep=2pt] (flabel2) at (flabel.east) {\footnotesize{$f(s)=1/(1+e^{-s})$}};
\node [anchor=west,inner sep=2pt] (flabel3) at (slabel.east) {\footnotesize{$s=x_1 \cdot w + b$}};
\draw [->,thick,dotted] ([yshift=-0.3em,xshift=-0.1em]n11.60) .. controls +(east:1) and +(west:2) .. ([xshift=-0.2em]flabel.west) ;
\begin{pgfonlayer}{background}
\visible<3->{
\node [rectangle,inner sep=0.2em,fill=blue!20,drop shadow={shadow xshift=0.1em,shadow yshift=-0.1em}] [fit = (flabel) (flabel2) (flabel3)] (funcbox) {};
}
\end{pgfonlayer}
}
\end{scope}
%% output illustration
\begin{scope}[xshift=2.8in,yshift=0.1in]
\visible<4->{
\draw [->,thick] (-2.2,0) -- (2.2,0);
\draw [->,thick] (0,0) -- (0,2);
\draw [-] (-0.05,1) -- (0.05,1);
\node [anchor=east,inner sep=1pt] (label1) at (0,1) {\tiny{1}};
\node [anchor=south east,inner sep=1pt] (label2) at (0,0) {\tiny{0}};
}
\visible<4>{\draw [-,very thick,ublue,domain=-2:2,samples=100] plot (\x,{1/(1+exp(-2*\x))});}
\visible<5>{\draw [-,very thick,ublue,domain=-2:2,samples=100] plot (\x,{1/(1+exp(-4*\x))});}
\visible<6>{\draw [-,very thick,ublue,rounded corners=0.1em] (-2,0) -- (0,0) -- (0,1) -- (2,1);}
\visible<7>{\draw [-,very thick,ublue,rounded corners=0.1em] (-2,0) -- (0.25,0) -- (0.25,1) -- (2,1);}
\visible<8>{\draw [-,very thick,ublue,rounded corners=0.1em] (-2,0) -- (0.5,0) -- (0.5,1) -- (2,1);}
\visible<9>{\draw [-,very thick,ublue,rounded corners=0.1em] (-2,0) -- (0.5,0) -- (0.5,0.9) -- (2,0.9);}
\visible<10>{\draw [-,very thick,ublue,rounded corners=0.1em] (-2,0) -- (0.5,0) -- (0.5,0.7) -- (2,0.7);}
\visible<11>{\draw [-,very thick,ublue,rounded corners=0.1em] (-2,0) -- (0.5,0) -- (0.5,0.7) -- (0.7,0.7) -- (0.7,1.4) -- (2,1.4);}
\visible<12->{\draw [-,very thick,ublue,rounded corners=0.1em] (-2,0) -- (0.5,0) -- (0.5,0.7) -- (0.7,0.7) -- (0.7,0) -- (2,0);}
\visible<15->{\draw [-,very thick,ublue,rounded corners=0.1em] (-2,0) -- (0.7,0) -- (0.7,0.6) -- (0.9,0.6) -- (0.9,0) -- (2,0);}
\visible<14>{\draw [->,dashed] (0.6,-0.05) -- (0.6,-0.96in);}
\visible<15->{\draw [->,dashed] (0.8,-0.05) -- (0.8,-0.98in);}
\visible<4>{\node [anchor=north west,align=left] (wblabel) at (-2,2) {\scriptsize{$w_1=1$}\\[-0ex] \scriptsize{\ $b_1=0$}};}
\visible<5>{\node [anchor=north west,align=left] (wblabel) at (-2,2) {\alert{\scriptsize{$w_1=10$}}\\[-0ex] \scriptsize{\ $b_1=0$}};}
\visible<6>{\node [anchor=north west,align=left] (wblabel) at (-2,2) {\alert{\scriptsize{$w_1=100$}}\\[-0ex] \scriptsize{\ $b_1=0$}};}
\visible<7>{\node [anchor=north west,align=left] (wblabel) at (-2,2) {\scriptsize{$w_1=100$}\\[-0ex] \alert{\scriptsize{\ $b_1=-2$}}};}
\visible<8>{\node [anchor=north west,align=left] (wblabel) at (-2,2) {\scriptsize{$w_1=100$}\\[-0ex] \alert{\scriptsize{\ $b_1=-4$}}};}
\visible<9>{\node [anchor=north west,align=left] (wblabel) at (-2,2) {\alert{\scriptsize{$w'_1=0.9$}}};}
\visible<10>{\node [anchor=north west,align=left] (wblabel) at (-2,2) {\alert{\scriptsize{$w'_1=0.7$}}};}
\visible<11>{\node [anchor=north west,align=left] (wblabel) at (-2,2) {\alert{\scriptsize{$w_2=100$}}\\[-0ex] \alert{\scriptsize{\ $b_2=-6$}}\\[-0ex] \alert{\scriptsize{\ $w'_2=0.7$}}};}
\visible<12>{\node [anchor=north west,align=left] (wblabel) at (-2,2) {\scriptsize{$w_2=100$}\\[-0ex] \scriptsize{\ $b_2=-6$}\\[-0ex] \alert{\scriptsize{\ $w'_2=-0.7$}}};}
\visible<13->{\node [anchor=north west,align=left] (wblabel) at (-2.5,2) {\scriptsize{这是一个}\\[-1ex] \scriptsize{step function}};}
\end{scope}
\begin{scope}[xshift=2.8in,yshift=-1.2in]
\visible<13->{
\draw [->,thick] (-2.2,0) -- (2.2,0);
\draw [->,thick] (0,0) -- (0,2);
\draw [-,very thick,red,domain=-1.98:2,samples=100] plot (\x,{0.2 * (\x +0.4)^3 + 1.2 - 0.3 *(\x + 0.8)^2});
}
\visible<14->{
\foreach \n in {0.5}{
\pgfmathsetmacro{\result}{0.2 * (\n + 0.1 + 0.4)^3 + 1.2 - 0.3 *(\n + 0.1 + 0.8)^2}%
\draw [-,ublue,thick] (\n,0) -- (\n, \result) -- (\n + 0.2, \result) -- (\n + 0.2, 0);
}
}
\visible<15->{
\foreach \n in {0.7}{
\pgfmathsetmacro{\result}{0.2 * (\n + 0.1 + 0.4)^3 + 1.2 - 0.3 *(\n + 0.1 + 0.8)^2}%
\draw [-,ublue,thick] (\n,0) -- (\n, \result) -- (\n + 0.2, \result) -- (\n + 0.2, 0);
}
}
\visible<16->{
\foreach \n in {-1.9,-1.7,...,1.9}{
\pgfmathsetmacro{\result}{0.2 * (\n + 0.1 + 0.4)^3 + 1.2 - 0.3 *(\n + 0.1 + 0.8)^2}%
\draw [-,ublue,thick] (\n,0) -- (\n, \result) -- (\n + 0.2, \result) -- (\n + 0.2, 0);
}
}
\visible<14>{\node [anchor=north west,align=left] (wblabel) at (-2.5,2.5) {\scriptsize{函数的每一段都可}\\[-1ex] \scriptsize{由step function}\\[-1ex] \scriptsize{近似}};}
\visible<15>{\node [anchor=north west,align=left] (wblabel) at (-2.5,2.5) {\scriptsize{增加因层神经元}\\[-1ex] \scriptsize{可以拟合更多的}\\[-1ex] \scriptsize{部分}};}
\visible<16>{\node [anchor=north west,align=left] (wblabel) at (-2.5,2.5) {\scriptsize{理论上足够多的}\\[-1ex] \scriptsize{隐层神经元可以}\\[-1ex] \scriptsize{拟合\alert{任意函数}}};}
\end{scope}
\end{scope}
\end{tikzpicture}
\end{center}
\end{frame}
%%%------------------------------------------------------------------------------------------------------------
\subsection{参数学习 - 反向传播}
\end{CJK}
......
\frametitle{线性变换(续)}
\begin{itemize}
\item 线性变换也适用于更加复杂的情况,这也给神经网络提供了拟合不同数据分布的能力
\begin{itemize}
\item 比如,我们可以把三维图形投影到二维平面上
\item 再比如,我们也可以把二维平面上的图形映射到三维平面上
\end{itemize}
\end{itemize}
\vspace{1em}
\newcommand{\plane}[1]{
(-1.95, #1, 1.35) --
++(3.6, 0.6, 0.0) --
++(0.3, -1.8, -2.7) --
++(-3.6, -0.6, -0.0) --
cycle}
\newcommand{\nullspacepicture}{
% bottom part of the row space line
\draw (0,0,0) -- (0.3,-1.8,1.233);
% five planes
\draw[fill=gray!20]\plane{-0.2};
\draw[fill=gray!20]\plane{0.2};
\draw[fill=blue!70!gray]\plane{0.6};
\draw[fill=gray!20]\plane{1};
\draw[fill=gray!20]\plane{1.4};
% top part of the row space line
\draw (-.094,.562,-.385) -- (-0.3,1.8,-1.233);
}
\newcommand{\rangepicture}[1]{
% axes
\draw[help lines,->] (-2,0) -- (2,0);
\draw[help lines,->] (0,-2) -- (0,2);
% the line and circles
\draw (1,-2) -- (-1,2);
\draw[fill=#1] (0,0) circle (2.5pt);
\draw[fill=gray!50] (0.2,-0.4) circle (2.5pt);
\draw[fill=gray!50] (0.4,-0.8) circle (2.5pt);
\draw[fill=gray!50] (-0.2,0.4) circle (2.5pt);
\draw[fill=gray!50] (-0.4,0.8) circle (2.5pt);
}
\begin{tikzpicture}[scale=0.95]
\centering
\nullspacepicture
% the label
\node at (-2,1.8) {$\mathbb{R}^3$};
% arrow between diagrams
\path[->] (3,0) edge[bend left] node[above] {线性变换} (4.5,0);
\begin{scope}[xshift=7cm]
\rangepicture{blue!70!gray}
\node at (1.8,1.8) {$\mathbb{R}^2$};
\end{scope}
\end{tikzpicture}
......@@ -699,7 +699,7 @@ GPT-2 (Transformer) & Radford et al. & 2019 & \alert{35.7}
\end{frame}
%%%------------------------------------------------------------------------------------------------------------
\subsection{数学基础:张量计算}
\subsection{人工神经元}
%%%------------------------------------------------------------------------------------------------------------
%%% 人工神经元的函数形式
......@@ -865,6 +865,8 @@ GPT-2 (Transformer) & Radford et al. & 2019 & \alert{35.7}
\item 对于向量$\textbf{x} \in \mathbb{R}^m$,一层神经网络首先把他经过\textbf{\alert{线性变换}}映射到$\mathbb{R}^m$,之后经过\textbf{{\color{blue}激活函数}}变换成$\textbf{y} \in \mathbb{R}^n$
\end{itemize}
\vspace{1em}
\begin{center}
\begin{tikzpicture}
......@@ -890,10 +892,19 @@ GPT-2 (Transformer) & Radford et al. & 2019 & \alert{35.7}
\end{pgfonlayer}
\end{tikzpicture}
\end{center}
\begin{figure}[htp!]
\includegraphics[scale=0.24]{./Figures/wf.png}
% \begin{tikzpicture}
% \node [rectangle,inner sep=0.2em,fill=red!20] [fit = (x) (w) (b)] (linear) {};
% \node [anchor=north] (linearlabel) at ([yshift=-1.1em]linear.south) {\alert{线性变换}}
\end{figure}
\tikz {\node () at (0,0) {}; \node () at (0,10) {};}
\end{center}
\end{frame}
%%%------------------------------------------------------------------------------------------------------------
%%% 线性变换
\begin{frame}{线性变换}
......@@ -906,7 +917,7 @@ T(\alpha \textbf{a}) & = & \alpha T(\textbf{a}) \nonumber
\item<2-> 线性变换的一种几何解释:
\end{itemize}
\vspace{-1em}
\vspace{-0.5em}
\visible<2->{
\begin{center}
\begin{tikzpicture}
......@@ -917,32 +928,191 @@ T(\alpha \textbf{a}) & = & \alpha T(\textbf{a}) \nonumber
\node [anchor=west] (plus) at (w.east) {\Large{$+$}};
\node [anchor=west] (b) at (plus.east) {\Large{$\textbf{b}$}};
\tikzstyle{neuron} = [rectangle,draw,thick,fill=red!30,red!35,minimum height=2em,minimum width=2em,font=\small]
\node[neuron,anchor=north] (a1) at ([xshift=-6em,yshift=-4em]x.south) {};
\draw[->,thick] ([xshift=-2em,yshift=0em]a1.south) to ([xshift=3em,yshift=0em]a1.south);
\draw[->,thick] ([xshift=0em,yshift=-4em]a1.west) to ([xshift=0em,yshift=2em]a1.west);
\node[below] at ([xshift=0.5em,yshift=-1em]a1.west){0};
\node[below] at ([xshift=2em,yshift=-1em]a1.west){1};
\node[below] at ([xshift=-0.5em,yshift=2em]a1.west){1};
\node [anchor=west] (x) at ([xshift=-0.7em,yshift=1em]a1.south) {\Large{$\textbf{F}$}};
\visible<3->{
\node [anchor=center,fill=green!20] (w2) at (w) {\Large{$\textbf{w}$}};
\node [anchor=north,inner sep=1pt] (wlabel) at ([yshift=-0.7em]w.south) {\small{旋转(rotation)}};
\draw [<-] ([yshift=-0.2em]w2.south) -- (wlabel.north);
\tikzstyle{neuron} = [rectangle,draw,thick,fill=red!30,red!35,minimum height=2em,minimum width=2em,font=\small]
\node[neuron,anchor=north] (a2) at ([xshift=10em,yshift=0em]a1.south) {};
\draw[->,thick] ([xshift=-2em,yshift=0em]a2.north) to ([xshift=3em,yshift=0em]a2.north);
\draw[->,thick] ([xshift=0em,yshift=-2em]a2.west) to ([xshift=0em,yshift=4em]a2.west);
\node[above] at ([xshift=0.5em,yshift=1em]a2.west){0};
\node[above] at ([xshift=2em,yshift=1em]a2.west){1};
\node[below] at ([xshift=-0.5em,yshift=0em]a2.west){-1};
\node [anchor=west] (x) at ([xshift=-3.5cm,yshift=2em]a2.north) {\scriptsize{
$w=\begin{bmatrix}
1&0&0\\
0&-1&0\\
0&0&1
\end{bmatrix}$}
};
\node [anchor=west,rotate = 180] (x) at ([xshift=0.7em,yshift=1em]a2.south) {\Large{$\textbf{F}$}};
\draw[-stealth, line width=2pt,dashed] ([xshift=4em,yshift=0em]a1.south) to ([xshift=-3em,yshift=0em]a2.north);
}
\visible<4->{
\node [anchor=center,fill=purple!20] (b2) at (b) {\Large{$\textbf{b}$}};
\node [anchor=west] (blabel) at ([xshift=1.5em]b2.east) {平移(shift)};
\draw [<-] ([xshift=0.2em]b2.east) -- (blabel.west);
\tikzstyle{neuron} = [rectangle,draw,thick,fill=red!30,red!35,minimum height=2em,minimum width=2em,font=\small]
\node[neuron,anchor=north] (a3) at ([xshift=11em,yshift=2.05em]a2.south) {};
\draw[->,thick] ([xshift=-3em,yshift=0em]a3.north) to ([xshift=2em,yshift=0em]a3.north);
\draw[->,thick] ([xshift=-1em,yshift=-2em]a3.west) to ([xshift=-1em,yshift=4em]a3.west);
\node[above] at ([xshift=-0.5em,yshift=1em]a3.west){0};
\node[above] at ([xshift=1em,yshift=1em]a3.west){1};
\node[left] at ([xshift=-0.75em,yshift=-0.5em]a3.west){-1};
\node [anchor=west,rotate = 180] (x) at ([xshift=0.7em,yshift=1em]a3.south) {\Large{$\textbf{F}$}};
\node [anchor=west] (x) at ([xshift=-4cm,yshift=2em]a3.north) {\scriptsize{
$b=\begin{bmatrix}
0.5&0&0\\
0&0&0\\
0&0&0
\end{bmatrix}$}
};
\draw[-stealth, line width=2pt,dashed] ([xshift=3em,yshift=1em]a2.east) to ([xshift=-3em,yshift=1em]a3.west);
}
\end{tikzpicture}
\end{center}
}
\end{frame}
%%%------------------------------------------------------------------------------------------------------------
%%% 线性变换:更复杂的实例
\begin{frame}{线性变换(续)}
\begin{frame}[fragile]{线性变换(续)}
\begin{itemize}
\item 线性变换也适用于更加复杂的情况,这也给神经网络提供了拟合不同数据分布的能力
\begin{itemize}
\item 比如,我们可以把三维图形投影到二维平面上
\item 再比如,我们也可以把二维平面上的图形映射到三维平面
\end{itemize}
\end{itemize}
\begin{tiny}
$$
\begin{smallmatrix} \underbrace{
\left\{
\begin{smallmatrix}
\left[
\begin{array}{cccc}
1& 0 &0 \\
0& 1 &0 \\
0& 0 &1
\end{array}
\right ]
\cdots
\left[
\begin{array}{cccc}
1& 0 &0 \\
0& 1 &0 \\
0& 0 &1
\end{array}
\right]
\end{smallmatrix}
\right\}
}\\5
\end{smallmatrix}
\times
\begin{smallmatrix}
\left[
\begin{array}{cccc}
1\\
1\\
1
\end{array}
\right ]
\end{smallmatrix}
=
\begin{smallmatrix} \underbrace{
\left\{
\begin{smallmatrix}
\left[
\begin{array}{cccc}
1 \\
1 \\
1
\end{array}
\right ]
\cdots
\left[
\begin{array}{cccc}
1 \\
1 \\
1
\end{array}
\right]
\end{smallmatrix}
\right\}
}\\5
\end{smallmatrix}
$$
\end{tiny}
%\vspace{1em}
\newcommand{\plane}[1]{
(-1.95, #1, 1.35) --
++(3.6, 0.6, 0.0) --
++(0.3, -1.8, -2.7) --
++(-3.6, -0.6, -0.0) --
cycle}
\newcommand{\nullspacepicture}{
% bottom part of the row space line
\draw (0,0,0) -- (0.3,-1.8,1.233);
% five planes
\draw[fill=gray!20]\plane{-0.2};
\draw[fill=gray!20]\plane{0.2};
\draw[fill=blue!70!gray]\plane{0.6};
\draw[fill=gray!20]\plane{1};
\draw[fill=gray!20]\plane{1.4};
% top part of the row space line
\draw (-.094,.562,-.385) -- (-0.3,1.8,-1.233);
}
\newcommand{\rangepicture}[1]{
% axes
\draw[help lines,->] (-2,0) -- (2,0);
\draw[help lines,->] (0,-2) -- (0,2);
% the line and circles
\draw (1,-2) -- (-1,2);
\draw[fill=#1] (0,0) circle (2.5pt);
\draw[fill=gray!50] (0.2,-0.4) circle (2.5pt);
\draw[fill=gray!50] (0.4,-0.8) circle (2.5pt);
\draw[fill=gray!50] (-0.2,0.4) circle (2.5pt);
\draw[fill=gray!50] (-0.4,0.8) circle (2.5pt);
}
\begin{tikzpicture}[scale=0.95]
\centering
\nullspacepicture
% the label
\node at (-2,1.8) {$\mathbb{R}^3$};
% arrow between diagrams
\path[->] (3,0) edge[bend left] node[above] {线性变换} (4.5,0);
\begin{scope}[xshift=7cm]
\rangepicture{blue!70!gray}
\node at (1.8,1.8) {$\mathbb{R}^2$};
\end{scope}
\end{tikzpicture}
\end{frame}
%%%------------------------------------------------------------------------------------------------------------
%%% 激活函数
\begin{frame}{激活函数}
......@@ -1013,19 +1183,419 @@ T(\alpha \textbf{a}) & = & \alpha T(\textbf{a}) \nonumber
\end{frame}
%%%------------------------------------------------------------------------------------------------------------
%%% 常用的激活函数
\begin{frame}{常用的激活函数}
\begin{itemize}
\item 好多好多,列举不全 ...
\end{itemize}
\begin{figure}
\centering
\subfigure[softplus]{
\centering
\begin{minipage}{.2\textwidth}
\begin{tikzpicture}
\draw[->](-1.2,0)--(1.2,0)node[left,below,font=\tiny]{$x$};
\draw[->](0,-1.2)--(0,1.2)node[right,font=\tiny]{$y$};
\foreach \x in {-1,-0.5,0,0.5,1}{\draw(\x,0)--(\x,0.05)node[below,outer sep=2pt,font=\tiny]at(\x,0){\x};}
\foreach \y in {1,0.5}{\draw(0,\y)--(0.05,\y)node[left,outer sep=2pt,font=\tiny]at(0,\y){\y};}
\draw[color=red ,domain=-1.2:1]plot(\x,{ln(1+(exp(\x))})node[right,black]{\tiny $y = ln(1+e^x)$};
\end{tikzpicture}
\end{minipage}%
}
\hfill
\subfigure[sigmoid]{
\centering
\begin{minipage}{.2\textwidth}
\begin{tikzpicture}
\draw[->](-1.2,0)--(1.2,0)node[left,below,font=\tiny]{$x$};
\draw[->](0,-1.2)--(0,1.2)node[right,font=\tiny]{$y$};
\draw[dashed](-1.2,1)--(1.2,1);
\foreach \x in {-1,-0.5,0,0.5,1}{\draw(\x,0)--(\x,0.05)node[below,outer sep=2pt,font=\tiny]at(\x,0){\x};}
\foreach \y in {0.5,1}{\draw(0,\y)--(0.05,\y)node[left,outer sep=2pt,font=\tiny]at(0,\y){\y};}
\draw[color=red ,domain=-1.2:1.2]plot(\x,{1/(1+(exp(-1*(\x))))})node[right,black]{\tiny $y = \frac{1}{1+e^{-x}}$};
\node[black,anchor=south] at (0,1.2) {\tiny $y = \frac{1}{1+e^{-x}}$};
\end{tikzpicture}
\end{minipage}%
}
\hfill
\subfigure[tanh]{
\centering
\begin{minipage}{.2\textwidth}
\begin{tikzpicture}
\draw[->](-1.2,0)--(1.2,0)node[left,below,font=\tiny]{$x$};
\draw[->](0,-1.2)--(0,1.2)node[right,font=\tiny]{$y$};
\draw[dashed](-1.2,1)--(1.2,1);
\draw[dashed](-1.2,-1)--(1.2,-1);
\foreach \x in {-1,-0.5,0,0.5,1}{\draw(\x,0)--(\x,0.05)node[below,outer sep=2pt,font=\tiny]at(\x,0){\x};}
\foreach \y in {0.5,1}{\draw(0,\y)--(0.05,\y)node[left,outer sep=2pt,font=\tiny]at(0,\y){\y};}
\draw[color=red ,domain=-1.2:1.2]plot(\x,{tanh(\x)})node[below,black]{\tiny $y = \frac{e^{x}-e^{-x}}{e^{x}+e^{-x}}$};
\end{tikzpicture}
\end{minipage}
}
\end{figure}
\begin{figure}
\centering
\subfigure[relu]{
\centering
\begin{minipage}{.2\textwidth}
\begin{tikzpicture}
\draw[->](-1.2,0)--(1.2,0)node[left,below,font=\tiny]{$x$};
\draw[->](0,-1.2)--(0,1.2)node[right,font=\tiny]{$y$};
\draw[dashed](-1.2,1)--(1.2,1);
\draw[dashed](-1.2,-1)--(1.2,-1);
\foreach \x in {-1,-0.5,0,0.5,1}{\draw(\x,0)--(\x,0.05)node[below,outer sep=2pt,font=\tiny]at(\x,0){\x};}
\foreach \y in {0.5,1}{\draw(0,\y)--(0.05,\y)node[left,outer sep=2pt,font=\tiny]at(0,\y){\y};}
\draw[color=red ,domain=-1.2:1.2]plot(\x,{max(\x,0)})node[right,black]{\tiny $y =\max (0, x)$};
\end{tikzpicture}
\end{minipage}%
}
\hfill
\subfigure[gaussian]{
\centering
\begin{minipage}{.2\textwidth}
\begin{tikzpicture}
\draw[->](-1.2,0)--(1.2,0)node[left,below,font=\tiny]{$x$};
\draw[->](0,-1.2)--(0,1.2)node[right,font=\tiny]{$y$};
\draw[dashed](-1.2,1)--(1.2,1);
\foreach \x in {-1,-0.5,0,0.5,1}{\draw(\x,0)--(\x,0.05)node[below,outer sep=2pt,font=\tiny]at(\x,0){\x};}
\foreach \y in {0.5,1}{\draw(0,\y)--(0.05,\y)node[left,outer sep=2pt,font=\tiny]at(0,\y){\y};}
\draw[color=red ,domain=-1.2:1.2]plot(\x,{exp(-1*((\x)^2))})node[right,black]{\tiny $y =e^{-x^2}$};
\end{tikzpicture}
\end{minipage}%
}
\hfill
\subfigure[identity]{
\centering
\begin{minipage}{.2\textwidth}
\begin{tikzpicture}
\draw[->](-1.2,0)--(1.2,0)node[left,below,font=\tiny]{$x$};
\draw[->](0,-1.2)--(0,1.2)node[right,font=\tiny]{$y$};
\foreach \x in {-1,-0.5,0,0.5,1}{\draw(\x,0)--(\x,0.05)node[below,outer sep=2pt,font=\tiny]at(\x,0){\x};}
\foreach \y in {0.5,1}{\draw(0,\y)--(0.05,\y)node[left,outer sep=2pt,font=\tiny]at(0,\y){\y};}
\draw[color=red ,domain=-1:1]plot(\x,\x)node[right,black]{\tiny $y =x$};
\end{tikzpicture}
\end{minipage}
}
\end{figure}
\end{frame}
%%%------------------------------------------------------------------------------------------------------------
\subsection{多层神经网络}
%%%------------------------------------------------------------------------------------------------------------
%%% 一层 -> 多层
\begin{frame}{更多的层}
\begin{itemize}
\item \textbf{单层神经网络}:线性变换 + 激活函数(非线性)
\item 我们可以重复上面的过程,构建\textbf{多层神经网络}
\end{itemize}
\vspace{-1.0em}
\begin{center}
\begin{tikzpicture}
\begin{scope}[]
\def\neuronsep{1.6}
\tikzstyle{neuronnode} = [minimum size=1.7em,circle,draw,ublue,very thick,inner sep=1pt, fill=white,align=center,drop shadow={shadow xshift=0.1em,shadow yshift=-0.1em}]
%%% layer 1
\foreach \n in {1,...,5}{
\node [neuronnode] (neuron0\n) at (\n * \neuronsep,0) {\tiny{$f_1$}\\[-1ex] \tiny{$\sum$}};
\draw [-,ublue] (neuron0\n.east) -- (neuron0\n.west);
}
\foreach \n in {1,...,5}{
\foreach \m in {1,...,5}{
\draw [<-] (neuron0\m.south) -- ([yshift=-2em]neuron0\n.south);
}
\node [anchor=north] (x\n) at ([yshift=-2em]neuron0\n.south) {$x_\n$};
\visible<1>{
\draw [<-,thick] ([yshift=1.5em]neuron0\n.north) -- (neuron0\n.north);
\node [anchor=south] (y\n) at ([yshift=1.5em]neuron0\n.north) {$y_\n$};
}
}
\node [anchor=west] (w1label) at ([xshift=-0.5em,yshift=0.5em]x5.north east) {$\textbf{w}_1$};
\begin{pgfonlayer}{background}
\node [rectangle,inner sep=0.2em,fill=red!20] [fit = (neuron01) (neuron05)] (layer01) {};
\end{pgfonlayer}
\node [anchor=west] (layer00label) at ([xshift=1.25em]x5.east) {\alert{输入层}};
\visible<2->{
\node [anchor=west] (layer01label) at ([xshift=1em]layer01.east) {第二层};
}
\visible<4->{
\node [anchor=west] (layer01label2) at (layer01label.east) {(\alert{隐层})};
}
%%% layer 2
\visible<2->{
\foreach \n in {2,...,4}{
\node [neuronnode] (neuron1\n) at (\n * \neuronsep,4em) {\tiny{$f_2$}\\[-1ex] \tiny{$\sum$}};
\draw [-,ublue] (neuron1\n.east) -- (neuron1\n.west);
}
\foreach \n in {2,...,4}{
\foreach \m in {1,...,5}{
\draw [<-] (neuron1\n.south) -- (neuron0\m.north);
}
\visible<2>{
\draw [<-,thick] ([yshift=1.5em]neuron1\n.north) -- (neuron1\n.north);
\node [anchor=south] (y\n) at ([yshift=1.5em]neuron1\n.north) {$y_\n$};
}
}
\node [anchor=west] (w2label) at ([xshift=-2.5em,yshift=5.0em]x5.north east) {$\textbf{w}_2$};
\begin{pgfonlayer}{background}
\visible<2->{
\node [rectangle,inner sep=0.2em,fill=ugreen!20] [fit = (neuron12) (neuron14)] (layer02) {};
}
\end{pgfonlayer}
\node [anchor=west] (layer02label) at ([xshift=4.9em]layer02.east) {第三层};
\visible<4->{
\node [anchor=west] (layer02label2) at (layer02label.east) {(\alert{隐层})};
}
}
%%% layer 3
\visible<3->{
\foreach \n in {1,...,5}{
\node [neuronnode] (neuron2\n) at (\n * \neuronsep,8em) {\tiny{$f_3$}\\[-1ex] \tiny{$\sum$}};
\draw [-,ublue] (neuron2\n.east) -- (neuron2\n.west);
}
\foreach \n in {1,...,5}{
\foreach \m in {2,...,4}{
\draw [<-] (neuron2\n.south) -- (neuron1\m.north);
}
\node [anchor=south] (y\n) at ([yshift=1.5em]neuron2\n.north) {$y_\n$};
\draw [<-,thick] ([yshift=1.5em]neuron2\n.north) -- (neuron2\n.north);
}
\node [anchor=west] (w3label) at ([xshift=-2.5em,yshift=8.5em]x5.north east) {$\textbf{w}_3$};
\begin{pgfonlayer}{background}
\visible<3->{
\node [rectangle,inner sep=0.2em,fill=blue!20] [fit = (neuron21) (neuron25)] (layer03) {};
}
\end{pgfonlayer}
\node [anchor=west] (layer03label) at ([xshift=1em]layer03.east) {第四层};
\visible<4->{
\node [anchor=west] (layer03label2) at (layer03label.east) {(\alert{输出层})};
}
}
\end{scope}
\end{tikzpicture}
\end{center}
\end{frame}
%%%------------------------------------------------------------------------------------------------------------
%%% 两层神经网络可以逼近任何函数
\begin{frame}{多层神经网络可以逼近任意函数}
\begin{itemize}
\item 以一个简单的三层网络为例(隐层激活函数:sigmoid)
\end{itemize}
\begin{center}
\begin{tikzpicture}
%% a two-layer neural network
\begin{scope}
\tikzstyle{neuronnode} = [minimum size=1.7em,circle,draw,ublue,very thick,inner sep=1pt, fill=white,align=center,drop shadow={shadow xshift=0.1em,shadow yshift=-0.1em}]
%% input and hidden layers
\node [neuronnode] (n10) at (0,0) {\tiny{$f$}\\[-1ex] \tiny{$\sum$}};
\node [neuronnode] (n11) at (1.5,0) {\tiny{$f$}\\[-1ex] \tiny{$\sum$}};
\draw [-,ublue] (n10.west) -- (n10.east);
\draw [-,ublue] (n11.west) -- (n11.east);
\node [anchor=north] (x1) at ([yshift=-6em]n11.south) {$x_1$};
\node [anchor=north] (b) at ([yshift=-6em]n10.south) {$b$};
\visible<1-10>{
\draw [->,thick] (b.north) -- ([yshift=-0.1em]n10.south);
\draw [->,thick] (x1.north) -- ([yshift=-0.1em]n10.290);
}
\visible<1>{
\draw [->,thick] (b.north) -- ([yshift=-0.1em]n11.250);
\draw [->,thick] (x1.north) -- ([yshift=-0.1em]n11.south);
}
\visible<11->{
\draw [->,thick,red] (b.north) -- ([yshift=-0.1em]n10.south);
\draw [->,thick,ugreen] (x1.north) -- ([yshift=-0.1em]n10.290);
}
\visible<2->{
\draw [->,thick,blue] (b.north) -- ([yshift=-0.1em]n11.250);
\draw [->,thick,purple] (x1.north) -- ([yshift=-0.1em]n11.south);
}
\visible<15->{
\node [neuronnode] (n12) at (2.7,0) {\tiny{$f$}\\[-1ex] \tiny{$\sum$}};
\node [neuronnode] (n13) at (3.8,0) {\tiny{$f$}\\[-1ex] \tiny{$\sum$}};
\draw [-,ublue] (n12.west) -- (n12.east);
\draw [-,ublue] (n13.west) -- (n13.east);
\draw [->,thick] (b.north) -- ([yshift=-0.1em]n12.250);
\draw [->,thick] (x1.north) -- ([yshift=-0.1em]n12.270);
\draw [->,thick] (b.north) -- ([yshift=-0.1em]n13.230);
\draw [->,thick] (x1.north) -- ([yshift=-0.1em]n13.250);
}
\visible<16->{
\node [anchor=west] (morenodes) at (n13.east) {...};
}
%% output layers
\node [neuronnode] (n20) at (0.75,5em) {\scriptsize{$\sum$}};
\visible<1-10>{\draw [->,thick] ([yshift=0.1em]n10.north) -- ([yshift=-0.1em]n20.250);}
\visible<1-8>{\draw [->,thick] ([yshift=0.1em]n11.north) -- ([yshift=-0.1em]n20.290);}
\visible<11->{\draw [->,thick,brown] ([yshift=0.1em]n10.north) -- ([yshift=-0.1em]n20.250);}
\visible<9->{\draw [->,thick,orange] ([yshift=0.1em]n11.north) -- ([yshift=-0.1em]n20.290);}
\node [] (y) at ([yshift=3em]n20.north) {$y$};
\draw [->,thick] ([yshift=0.1em]n20.north) -- (y.south);
\visible<15->{
\draw [->,thick] ([yshift=0.1em]n12.north) -- ([yshift=-0.1em]n20.310);
\draw [->,thick] ([yshift=0.1em]n13.north) -- ([yshift=-0.1em]n20.330);
}
%% weight and bias
\visible<11->{\node [anchor=center,rotate=90,fill=white,inner sep=1pt] (b0) at ([yshift=3em,xshift=-0.5em]b.north) {\tiny{$b=-6$}};}
\visible<11->{\node [anchor=center,rotate=-59,fill=white,inner sep=1pt] (w2) at ([yshift=1.2em,xshift=-1.2em]x1.north) {\tiny{$w=100$}};}
\visible<2-6>{\node [anchor=center,rotate=59,fill=white,inner sep=1pt] (b1) at ([yshift=4.9em,xshift=2.2em]b.north) {\tiny{$b=0$}};}
\visible<7>{\node [anchor=center,rotate=59,fill=white,inner sep=1pt] (b1) at ([yshift=4.9em,xshift=2.2em]b.north) {\tiny{$b=-2$}};}
\visible<8->{\node [anchor=center,rotate=59,fill=white,inner sep=1pt] (b1) at ([yshift=4.9em,xshift=2.2em]b.north) {\tiny{$b=-4$}};}
\visible<2-4>{\node [anchor=center,rotate=90,fill=white,inner sep=1pt] (w1) at ([yshift=3em,xshift=0.5em]x1.north) {\tiny{$w=1$}};}
\visible<5>{\node [anchor=center,rotate=90,fill=white,inner sep=1pt] (w1) at ([yshift=3em,xshift=0.5em]x1.north) {\tiny{$w=10$}};}
\visible<6->{\node [anchor=center,rotate=90,fill=white,inner sep=1pt] (w1) at ([yshift=3em,xshift=0.5em]x1.north) {\tiny{$w=100$}};}
\visible<11>{\node [anchor=center,rotate=62,fill=white,inner sep=1pt] (w21) at ([yshift=2em,xshift=0.5em]n10.north) {\tiny{$w'=0.7$}};}
\visible<12->{\node [anchor=center,rotate=62,fill=white,inner sep=1pt] (w21) at ([yshift=2em,xshift=0.5em]n10.north) {\tiny{$w'=-0.7$}};}
\visible<2-8>{\node [anchor=center,rotate=-62,fill=white,inner sep=1pt] (w22) at ([yshift=2em,xshift=-0.5em]n11.north) {\tiny{$w'=1$}};}
\visible<9>{\node [anchor=center,rotate=-62,fill=white,inner sep=1pt] (w22) at ([yshift=2em,xshift=-0.5em]n11.north) {\tiny{$w'=0.9$}};}
\visible<10->{\node [anchor=center,rotate=-62,fill=white,inner sep=1pt] (w22) at ([yshift=2em,xshift=-0.5em]n11.north) {\tiny{$w'=0.7$}};}
%% sigmoid box
\begin{scope}
\visible<3->{
\node [anchor=west] (flabel) at ([xshift=1.2in]y.east) {\footnotesize{sigmoid:}};
\node [anchor=north east] (slabel) at ([xshift=0]flabel.south east) {\footnotesize{sum:}};
\node [anchor=west,inner sep=2pt] (flabel2) at (flabel.east) {\footnotesize{$f(s)=1/(1+e^{-s})$}};
\node [anchor=west,inner sep=2pt] (flabel3) at (slabel.east) {\footnotesize{$s=x_1 \cdot w + b$}};
\draw [->,thick,dotted] ([yshift=-0.3em,xshift=-0.1em]n11.60) .. controls +(east:1) and +(west:2) .. ([xshift=-0.2em]flabel.west) ;
\begin{pgfonlayer}{background}
\visible<3->{
\node [rectangle,inner sep=0.2em,fill=blue!20,drop shadow={shadow xshift=0.1em,shadow yshift=-0.1em}] [fit = (flabel) (flabel2) (flabel3)] (funcbox) {};
}
\end{pgfonlayer}
}
\end{scope}
%% output illustration
\begin{scope}[xshift=2.8in,yshift=0.1in]
\visible<4->{
\draw [->,thick] (-2.2,0) -- (2.2,0);
\draw [->,thick] (0,0) -- (0,2);
\draw [-] (-0.05,1) -- (0.05,1);
\node [anchor=east,inner sep=1pt] (label1) at (0,1) {\tiny{1}};
\node [anchor=south east,inner sep=1pt] (label2) at (0,0) {\tiny{0}};
}
\visible<4>{\draw [-,very thick,ublue,domain=-2:2,samples=100] plot (\x,{1/(1+exp(-2*\x))});}
\visible<5>{\draw [-,very thick,ublue,domain=-2:2,samples=100] plot (\x,{1/(1+exp(-4*\x))});}
\visible<6>{\draw [-,very thick,ublue,rounded corners=0.1em] (-2,0) -- (0,0) -- (0,1) -- (2,1);}
\visible<7>{\draw [-,very thick,ublue,rounded corners=0.1em] (-2,0) -- (0.25,0) -- (0.25,1) -- (2,1);}
\visible<8>{\draw [-,very thick,ublue,rounded corners=0.1em] (-2,0) -- (0.5,0) -- (0.5,1) -- (2,1);}
\visible<9>{\draw [-,very thick,ublue,rounded corners=0.1em] (-2,0) -- (0.5,0) -- (0.5,0.9) -- (2,0.9);}
\visible<10>{\draw [-,very thick,ublue,rounded corners=0.1em] (-2,0) -- (0.5,0) -- (0.5,0.7) -- (2,0.7);}
\visible<11>{\draw [-,very thick,ublue,rounded corners=0.1em] (-2,0) -- (0.5,0) -- (0.5,0.7) -- (0.7,0.7) -- (0.7,1.4) -- (2,1.4);}
\visible<12->{\draw [-,very thick,ublue,rounded corners=0.1em] (-2,0) -- (0.5,0) -- (0.5,0.7) -- (0.7,0.7) -- (0.7,0) -- (2,0);}
\visible<15->{\draw [-,very thick,ublue,rounded corners=0.1em] (-2,0) -- (0.7,0) -- (0.7,0.6) -- (0.9,0.6) -- (0.9,0) -- (2,0);}
\visible<14>{\draw [->,dashed] (0.6,-0.05) -- (0.6,-0.96in);}
\visible<15->{\draw [->,dashed] (0.8,-0.05) -- (0.8,-0.98in);}
\visible<4>{\node [anchor=north west,align=left] (wblabel) at (-2,2) {\scriptsize{$w_1=1$}\\[-0ex] \scriptsize{\ $b_1=0$}};}
\visible<5>{\node [anchor=north west,align=left] (wblabel) at (-2,2) {\alert{\scriptsize{$w_1=10$}}\\[-0ex] \scriptsize{\ $b_1=0$}};}
\visible<6>{\node [anchor=north west,align=left] (wblabel) at (-2,2) {\alert{\scriptsize{$w_1=100$}}\\[-0ex] \scriptsize{\ $b_1=0$}};}
\visible<7>{\node [anchor=north west,align=left] (wblabel) at (-2,2) {\scriptsize{$w_1=100$}\\[-0ex] \alert{\scriptsize{\ $b_1=-2$}}};}
\visible<8>{\node [anchor=north west,align=left] (wblabel) at (-2,2) {\scriptsize{$w_1=100$}\\[-0ex] \alert{\scriptsize{\ $b_1=-4$}}};}
\visible<9>{\node [anchor=north west,align=left] (wblabel) at (-2,2) {\alert{\scriptsize{$w'_1=0.9$}}};}
\visible<10>{\node [anchor=north west,align=left] (wblabel) at (-2,2) {\alert{\scriptsize{$w'_1=0.7$}}};}
\visible<11>{\node [anchor=north west,align=left] (wblabel) at (-2,2) {\alert{\scriptsize{$w_2=100$}}\\[-0ex] \alert{\scriptsize{\ $b_2=-6$}}\\[-0ex] \alert{\scriptsize{\ $w'_2=0.7$}}};}
\visible<12>{\node [anchor=north west,align=left] (wblabel) at (-2,2) {\scriptsize{$w_2=100$}\\[-0ex] \scriptsize{\ $b_2=-6$}\\[-0ex] \alert{\scriptsize{\ $w'_2=-0.7$}}};}
\visible<13->{\node [anchor=north west,align=left] (wblabel) at (-2.5,2) {\scriptsize{这是一个}\\[-1ex] \scriptsize{step function}};}
\end{scope}
\begin{scope}[xshift=2.8in,yshift=-1.2in]
\visible<13->{
\draw [->,thick] (-2.2,0) -- (2.2,0);
\draw [->,thick] (0,0) -- (0,2);
\draw [-,very thick,red,domain=-1.98:2,samples=100] plot (\x,{0.2 * (\x +0.4)^3 + 1.2 - 0.3 *(\x + 0.8)^2});
}
\visible<14->{
\foreach \n in {0.5}{
\pgfmathsetmacro{\result}{0.2 * (\n + 0.1 + 0.4)^3 + 1.2 - 0.3 *(\n + 0.1 + 0.8)^2}%
\draw [-,ublue,thick] (\n,0) -- (\n, \result) -- (\n + 0.2, \result) -- (\n + 0.2, 0);
}
}
\visible<15->{
\foreach \n in {0.7}{
\pgfmathsetmacro{\result}{0.2 * (\n + 0.1 + 0.4)^3 + 1.2 - 0.3 *(\n + 0.1 + 0.8)^2}%
\draw [-,ublue,thick] (\n,0) -- (\n, \result) -- (\n + 0.2, \result) -- (\n + 0.2, 0);
}
}
\visible<16->{
\foreach \n in {-1.9,-1.7,...,1.9}{
\pgfmathsetmacro{\result}{0.2 * (\n + 0.1 + 0.4)^3 + 1.2 - 0.3 *(\n + 0.1 + 0.8)^2}%
\draw [-,ublue,thick] (\n,0) -- (\n, \result) -- (\n + 0.2, \result) -- (\n + 0.2, 0);
}
}
\visible<14>{\node [anchor=north west,align=left] (wblabel) at (-2.5,2.5) {\scriptsize{函数的每一段都可}\\[-1ex] \scriptsize{由step function}\\[-1ex] \scriptsize{近似}};}
\visible<15>{\node [anchor=north west,align=left] (wblabel) at (-2.5,2.5) {\scriptsize{增加因层神经元}\\[-1ex] \scriptsize{可以拟合更多的}\\[-1ex] \scriptsize{部分}};}
\visible<16>{\node [anchor=north west,align=left] (wblabel) at (-2.5,2.5) {\scriptsize{理论上足够多的}\\[-1ex] \scriptsize{隐层神经元可以}\\[-1ex] \scriptsize{拟合\alert{任意函数}}};}
\end{scope}
\end{scope}
\end{tikzpicture}
\end{center}
\end{frame}
%%%------------------------------------------------------------------------------------------------------------
\subsection{神经网络的简单实现:张量计算}
%%%------------------------------------------------------------------------------------------------------------
\subsection{参数学习 - 反向传播}
\end{CJK}
......
Markdown 格式
0%
您添加了 0 到此讨论。请谨慎行事。
请先完成此评论的编辑!
注册 或者 后发表评论