linear transformation and activation function

dbe871f1 · xiaotong · bcb189ce · dbe871f1 · dbe871f1
Commit dbe871f1 authored Sep 05, 2019 by xiaotong
--- a/Section05-Neural-Networks-and-Language-Modeling/section05-test.tex
+++ b/Section05-Neural-Networks-and-Language-Modeling/section05-test.tex
@@ -105,90 +105,108 @@
 \subsection{数学基础：张量计算}
 %%%------------------------------------------------------------------------------------------------------------
-%%% 层的概念
+%%% 神经网络的作用
-\begin{frame}{``层"的概念}
+\begin{frame}{神经网络：线性变换 + 激活函数}
 \begin{itemize}
-\item 对于一个问题（相同输入），可能会有多个输出，这时可以把\alert{多个相同的神经元并列起来}，构成一\alert{``层"}
+\item 对于向量$\textbf{x} \in \mathbb{R}^m$，一层神经网络首先把他经过\textbf{\alert{线性变换}}映射到$\mathbb{R}^m$，之后经过\textbf{{\color{blue}激活函数}}变换成$\textbf{y} \in \mathbb{R}^n$
-    \begin{itemize}
-    \item 比如，天气预报需要同时预测湿度和温度
-    \end{itemize}
 \end{itemize}
-\vspace{-2em}
 \begin{center}
 \begin{tikzpicture}
-\begin{scope}
-\tikzstyle{neuronnode} = [minimum size=1.5em,circle,draw,ublue,very thick,fill=white,drop shadow={shadow xshift=0.1em,shadow yshift=-0.1em}]
+\node [anchor=center] (y) at (0,0) {\Large{$\textbf{y}$}};
+\node [anchor=west] (eq) at (y.east) {\Large{$=$}};
+\node [anchor=west] (func) at (eq.east) {\Large{$f$}};
+\node [anchor=west] (brace01) at (func.east) {\Large{$($}};
+\node [anchor=west] (x) at (brace01.east) {\Large{$\textbf{x}$}};
+\node [anchor=west] (dot) at (x.east) {\Large{$\cdot$}};
+\node [anchor=west] (w) at (dot.east) {\Large{$\textbf{w}$}};
+\node [anchor=west] (plus) at (w.east) {\Large{$+$}};
+\node [anchor=west] (b) at (plus.east) {\Large{$\textbf{b}$}};
+\node [anchor=west] (brace02) at (b.east) {\Large{$)$}};
+\node [anchor=center,fill=blue!20] (func2) at (func) {\LARGE{$f$}};
+\node [anchor=north] (funclabel) at ([yshift=-1.1em]func.south) {\blue{激活函数}};
+\draw [<-] ([yshift=-0.2em]func2.south) -- (funclabel.north);
-\node [anchor=center,neuronnode] (neuron00) at (0,0) {};
+\begin{pgfonlayer}{background}
-\visible<2->{
+\node [rectangle,inner sep=0.2em,fill=red!20] [fit = (x) (w) (b)] (linear) {};
-\node [anchor=center,neuronnode] (neuron01) at ([yshift=-3em]neuron00) {};
+\node [anchor=north] (linearlabel) at ([yshift=-1.1em]linear.south) {\alert{线性变换}};
-}
+\draw [<-] ([yshift=-0.2em]linear.south) -- (linearlabel.north);
-\visible<3->{
+\end{pgfonlayer}
-\node [anchor=center,neuronnode] (neuron02) at ([yshift=-3em]neuron01) {};
-}
-\node [anchor=east] (x0) at ([xshift=-6em]neuron00.west) {$x_0$};
+\end{tikzpicture}
-\node [anchor=east] (x1) at ([xshift=-6em]neuron01.west) {$x_1$};
+\end{center}
-\node [anchor=east] (x2) at ([xshift=-6em]neuron02.west) {$b$};
-\node [anchor=west] (y0) at ([xshift=4em]neuron00.east) {$y_0$};
+\end{frame}
-\draw [->] (x0.east) -- (neuron00.180) node [pos=0.1,above] {\tiny{$w_{00}$}};
+%%%------------------------------------------------------------------------------------------------------------
-\draw [->] (x1.east) -- (neuron00.200) node [pos=0.1,above] {\tiny{$w_{10}$}};
+%%% 线性变换
-\draw [->] (x2.east) -- (neuron00.220) node [pos=0.05,above,yshift=0.3em] {\tiny{$b_{0}$}};
+\begin{frame}{线性变换}
-\draw [->] (neuron00.east) -- (y0.west);
+\begin{itemize}
+\item 对于线性空间$V$，任意$\textbf{a}$，$\textbf{b} \in V$和数域中的任意$\alpha$，线性变换$T(\cdot)$需满足
+\begin{eqnarray}
+T(\textbf{a} + \textbf{b}) & = & T(\textbf{a}) + T(\textbf{b}) \nonumber \\
+T(\alpha \textbf{a}) & = & \alpha T(\textbf{a}) \nonumber
+\end{eqnarray}
+\item<2-> 线性变换的一种几何解释：
+\end{itemize}
+\vspace{-1em}
 \visible<2->{
-\node [anchor=west] (y1) at ([xshift=4em]neuron01.east) {$y_1$};
+\begin{center}
-\draw [->] (x0.east) -- (neuron01.160) node [pos=0.4,above] {\tiny{$w_{01}$}};
+\begin{tikzpicture}
-\draw [->] (x1.east) -- (neuron01.180) node [pos=0.35,above,yshift=-0.2em] {\tiny{$w_{11}$}};
-\draw [->] (x2.east) -- (neuron01.200) node [pos=0.4,below] {\tiny{$b_{1}$}};
+\node [anchor=west] (x) at (0,0) {\Large{$\textbf{x}$}};
-\draw [->] (neuron01.east) -- (y1.west);
+\node [anchor=west] (dot) at (x.east) {\Large{$\cdot$}};
-}
+\node [anchor=west] (w) at (dot.east) {\Large{$\textbf{w}$}};
+\node [anchor=west] (plus) at (w.east) {\Large{$+$}};
+\node [anchor=west] (b) at (plus.east) {\Large{$\textbf{b}$}};
 \visible<3->{
-\node [anchor=west] (y2) at ([xshift=4em]neuron02.east) {$y_2$};
+\node [anchor=center,fill=green!20] (w2) at (w) {\Large{$\textbf{w}$}};
-\draw [->] (x0.east) -- (neuron02.140) node [pos=0.1,below,yshift=-0.2em] {\tiny{$w_{02}$}};
+\node [anchor=north,inner sep=1pt] (wlabel) at ([yshift=-0.7em]w.south) {\small{旋转(rotation)}};
-\draw [->] (x1.east) -- (neuron02.160) node [pos=0.1,below] {\tiny{$w_{12}$}};
+\draw [<-] ([yshift=-0.2em]w2.south) -- (wlabel.north);
-\draw [->] (x2.east) -- (neuron02.180) node [pos=0.3,below] {\tiny{$b_{2}$}};
-\draw [->] (neuron02.east) -- (y2.west);
 }
 \visible<4->{
-\node [anchor=east,align=left] (inputlabel) at ([xshift=-0.1em]x1.west) {输入向量:\\\small{$\textbf{x}=(x_0,x_1)$}};
+\node [anchor=center,fill=purple!20] (b2) at (b) {\Large{$\textbf{b}$}};
-}
+\node [anchor=west] (blabel) at ([xshift=1.5em]b2.east) {平移(shift)};
-\visible<5->{
+\draw [<-] ([xshift=0.2em]b2.east) -- (blabel.west);
-\node [anchor=west,align=left] (outputlabel) at ([xshift=0.1em]y1.east) {输出向量:\\\small{$\textbf{y}=(y_0,y_1,y_2)$}};
 }
-\begin{pgfonlayer}{background}
+\end{tikzpicture}
-\visible<6->{
+\end{center}
-\node [rectangle,inner sep=0.4em,fill=red!20] [fit = (neuron00) (neuron01) (neuron02)] (layer) {};
-\node [anchor=south] (layerlabel) at ([yshift=0.2em]layer.north) {一层神经元};
 }
-\visible<4->{
+\end{frame}
-\node [rectangle,inner sep=0.1em,fill=ugreen!20] [fit = (x0) (x1)] (inputshadow) {};
-}
-\visible<5->{
-\node [rectangle,inner sep=0.1em,fill=blue!20] [fit = (y0) (y1) (y2)] (outputshadow) {};
-}
-\end{pgfonlayer}
-\visible<7->{
+%%%------------------------------------------------------------------------------------------------------------
-\node [anchor=north west] (wlabel) at ([yshift=-1em,xshift=-7em]x2.south) {参数(矩阵):$\textbf{w} = \Big( \begin{array}{lll} w_{01} & w_{01} & w_{02} \\ w_{11} & w_{11} & w_{12} \end{array} \Big)$};
+%%% 线性变换：更复杂的实例
-}
+\begin{frame}{线性变换（续）}
-\visible<8->{
+\begin{itemize}
-\node [anchor=west] (blabel) at (wlabel.east) {参数(向量):$\textbf{b} = (b_0, b_1, b_2)$};
+\item 线性变换也适用于更加复杂的情况，这也给神经网络提供了拟合不同数据分布的能力
-}
+\end{itemize}
+\end{frame}
-\end{scope}
+%%%------------------------------------------------------------------------------------------------------------
-\end{tikzpicture}
+%%% 激活函数
-\end{center}
+\begin{frame}{激活函数}
+\begin{itemize}
+\item 激活函数的设计更多的是为了进行\alert{非线性}变换
+    \begin{itemize}
+    \item 很多实际问题都是非线性的
+    \item 非线性部分提供了拟合任意函数的能力（稍后介绍）
+    \end{itemize}
+\end{itemize}
+\end{frame}
+%%%------------------------------------------------------------------------------------------------------------
+%%% 常用的激活函数
+\begin{frame}{常用的激活函数}
+\begin{itemize}
+\item 好多好多，列举不全 ...
+\end{itemize}
 \end{frame}
 %%%------------------------------------------------------------------------------------------------------------

--- a/Section05-Neural-Networks-and-Language-Modeling/section05.tex
+++ b/Section05-Neural-Networks-and-Language-Modeling/section05.tex
@@ -717,27 +717,27 @@ GPT-2 (Transformer) & Radford et al. & 2019 & \alert{35.7}
 \node [anchor=west] (eq) at (y.east) {\LARGE{$=$}};
 \node [anchor=west] (func) at (eq.east) {\LARGE{$f$}};
 \node [anchor=west] (brace01) at (func.east) {\LARGE{$($}};
-\node [anchor=west] (x) at (brace01.east) {\LARGE{$\textbf{x}$}}; 
+\node [anchor=west] (x) at (brace01.east) {\LARGE{$\textbf{x}$}};
-\node [anchor=west] (dot) at (x.east) {\LARGE{$\cdot$}}; 
+\node [anchor=west] (dot) at (x.east) {\LARGE{$\cdot$}};
-\node [anchor=west] (w) at (dot.east) {\LARGE{$\textbf{w}$}}; 
+\node [anchor=west] (w) at (dot.east) {\LARGE{$\textbf{w}$}};
-\node [anchor=west] (plus) at (w.east) {\LARGE{$+$}}; 
+\node [anchor=west] (plus) at (w.east) {\LARGE{$+$}};
-\node [anchor=west] (b) at (plus.east) {\LARGE{$\textbf{b}$}}; 
+\node [anchor=west] (b) at (plus.east) {\LARGE{$\textbf{b}$}};
-\node [anchor=west] (brace02) at (b.east) {\LARGE{$)$}}; 
+\node [anchor=west] (brace02) at (b.east) {\LARGE{$)$}};
 \visible<2->{
-\node [anchor=center,fill=yellow!30] (x2) at (x) {\LARGE{$\textbf{x}$}}; 
+\node [anchor=center,fill=yellow!30] (x2) at (x) {\LARGE{$\textbf{x}$}};
 \node [anchor=south] (xlabel) at ([yshift=1.5em]x.north) {输入};
 \draw [<-] ([yshift=0.2em]x2.north) -- (xlabel.south);
 }
 \visible<3->{
-\node [anchor=center,fill=green!20] (w2) at (w) {\LARGE{$\textbf{w}$}}; 
+\node [anchor=center,fill=green!20] (w2) at (w) {\LARGE{$\textbf{w}$}};
 \node [anchor=north] (wlabel) at ([yshift=-1.5em]w.south) {参数(权重)};
 \draw [<-] ([yshift=-0.2em]w2.south) -- (wlabel.north);
 }
 \visible<4->{
-\node [anchor=center,fill=purple!20] (b2) at (b) {\LARGE{$\textbf{b}$}}; 
+\node [anchor=center,fill=purple!20] (b2) at (b) {\LARGE{$\textbf{b}$}};
 \node [anchor=south] (blabel) at ([yshift=1.3em]b.north) {偏移};
 \draw [<-] ([yshift=0.2em]b2.north) -- (blabel.south);
 }
@@ -859,6 +859,14 @@ GPT-2 (Transformer) & Radford et al. & 2019 & \alert{35.7}
 \end{frame}
 %%%------------------------------------------------------------------------------------------------------------
+%%% 神经网络的作用
+\begin{frame}{神经网络：线性变换 + 激活函数}
+\begin{itemize}
+\item 对于向量$\textbf{x} \in \mathbb{R}^m$，一层神经网络实际上就是把
+\end{itemize}
+\end{frame}
+%%%------------------------------------------------------------------------------------------------------------
 \subsection{多层神经网络}
 %%%------------------------------------------------------------------------------------------------------------