auto grad pages

b5e1108d · xiaotong · 63f6e3b1 · b5e1108d · b5e1108d
Commit b5e1108d authored Oct 11, 2019 by xiaotong
--- a/Section05-Neural-Networks-and-Language-Modeling/section05-test.tex
+++ b/Section05-Neural-Networks-and-Language-Modeling/section05-test.tex
@@ -116,13 +116,17 @@
 \subsection{参数学习 - 反向传播}
 %%%------------------------------------------------------------------------------------------------------------
-%%% 反向传播
+%%% 反向传播 - 符号说明
-\begin{frame}{反向传播}
+\begin{frame}{符号说明}
 \begin{itemize}
-\item 反向传播
+\item 以一个$L$层神经网络为例重新明确一下符号
+    \begin{itemize}
+    \item 这里假设每层神经网络中都不含偏置项（不含$\textbf{b}$）
+    \end{itemize}
 \end{itemize}
+\vspace{-1em}
 \begin{center}
 \begin{tikzpicture}
 \begin{scope}
@@ -139,7 +143,7 @@
 \begin{pgfonlayer}{background}
 \node [rectangle,inner sep=0.2em,fill=red!20] [fit = (neuron01) (neuron04)] (layer01) {};
-\node [anchor=east] (layer01label) at (layer01.west) {\scriptsize{层$i-1$}};
+\node [anchor=east] (layer01label) at (layer01.west) {\scriptsize{层$l-1$}};
 \end{pgfonlayer}
 %%% layer 2
@@ -155,7 +159,7 @@
 \begin{pgfonlayer}{background}
 \node [rectangle,inner sep=0.2em,fill=ugreen!20] [fit = (neuron11) (neuron14)] (layer02) {};
-\node [anchor=east] (layer02label) at (layer02.west) {\scriptsize{层$i$}};
+\node [anchor=east] (layer02label) at (layer02.west) {\scriptsize{层$l$}};
 \end{pgfonlayer}
 %%% layer 3
@@ -172,28 +176,66 @@
 \begin{pgfonlayer}{background}
 \node [rectangle,inner sep=0.2em,fill=blue!20] [fit = (neuron21) (neuron24)] (layer03) {};
-\node [anchor=east] (layer03label) at (layer03.west) {\scriptsize{层$i+1$}};
+\node [anchor=east] (layer03label) at (layer03.west) {\scriptsize{层$l+1$}};
 \end{pgfonlayer}
+%%% output layer
+\foreach \n in {1,...,4}{
+    \node [neuronnode] (neuron3\n) at (\n * \neuronsep,9.4em) {};
+    \visible<1-3,5->{
+    \draw [<-] ([yshift=0.6em]neuron3\n.north) -- ([yshift=0.0em]neuron3\n.north) node [pos=0,above] {\tiny{output}};
+    }
+    \visible<4>{
+    \draw [<-,red,very thick] ([yshift=0.6em]neuron3\n.north) -- ([yshift=0.0em]neuron3\n.north) node [pos=0,above] {\tiny{output}};
+    }
+    \draw [->] ([yshift=-0.6em]neuron3\n.south) -- ([yshift=0.0em]neuron3\n.south);
+}
+\begin{pgfonlayer}{background}
+\node [rectangle,inner sep=0.2em,fill=ugreen!20] [fit = (neuron31) (neuron34)] (layer04) {};
+\node [anchor=east] (layer04label) at (layer04.west) {\scriptsize{层$L$(输出)}};
+\end{pgfonlayer}
+\visible<2->{
 \node [neuronnode,draw=red,fill=red!20!white,inner sep=1pt] (neuron12new) at (2 * \neuronsep,3em) {};
 \node [anchor=east] (neuronsamplelabel) at ([yshift=-1em]layer02label.south east) {\alert{\textbf{\tiny{第$l$层, 第$i$个神经元}}}};
-\draw [->,dashed,very thick,red] ([xshift=-0.2em,yshift=0.2em]neuronsamplelabel.east) .. controls +(40:1) and +(220:1) .. ([xshift=-0em,yshift=-0em]neuron12new.210);
+\draw [->,dashed,very thick,red] ([xshift=-0.2em,yshift=0.2em]neuronsamplelabel.east) .. controls +(30:1) and +(220:1) .. ([xshift=-0em,yshift=-0em]neuron12new.210);
+}
+\visible<3>{
 \foreach \n in {1,...,4}{
 \draw [<-,thick,red] (neuron2\n.south) -- (neuron12.north);
 }
+}
-\draw [<-,thick,red] (neuron24.south) -- (neuron14.north);
+\visible<5->{
-\node [anchor=north] (wlabel) at (layer03.south east) {\alert{\scriptsize{$w_{4,4}^{2}$}}};
+\draw [<-,thick,red] (neuron14.south) -- (neuron04.north);
+\node [anchor=north] (wlabel) at (layer02.south east) {\alert{\scriptsize{$w_{4,4}^{l}$}}};
+}
-\node [anchor=west,align=left] (line01) at ([xshift=2em]layer03.east) {\footnotesize{$h_{i}^{l}$：第$l$层, 第$i$个神经元的输出}};
+\visible<3->{
+\node [anchor=west,align=left] (line01) at ([xshift=2em]layer04.east) {\footnotesize{$h_{i}^{l}$：第$l$层, 第$i$个神经元的输出}};
 \node [anchor=north west,align=left] (line02) at (line01.south west) {\footnotesize{$\textbf{h}^{l}$：第$l$层的输出}};
-\node [anchor=north west,align=left] (line03) at (line02.south west) {\footnotesize{$w_{j,i}^{l}$：第$l-1$层神经元$j$与}\\\footnotesize{第$l+1$层神经元$i$的连接权重}};
+\node [anchor=north west,align=left] (line03) at (line02.south west) {\footnotesize{$f^{l}$：第$l$层的激活函数}};
+}
+\visible<4->{
+\node [anchor=north west,align=left] (line04) at (line03.south west) {\footnotesize{$\textbf{h}^{L}$：网络最后的输出}};
+}
+\visible<5->{
+\node [anchor=north west,align=left] (line05) at (line04.south west) {\footnotesize{$w_{j,i}^{l}$：第$l-1$层神经元$j$与}\\\footnotesize{第$l$层神经元$i$的连接权重}};
+\node [anchor=north west,align=left] (line06) at (line05.south west) {\footnotesize{$\textbf{w}^{l}$：第$l-1$层与第$l-1$层的}\\\footnotesize{连接权重}};
+}
 \end{scope}
 \end{tikzpicture}
 \end{center}
+\vspace{-1.5em}
+\visible<6->{
+\begin{displaymath} \textrm{对于第}l\textrm{层}: \textbf{h}^l = f^l(\sum_j h_{j}^{l-1}w_{j,i}^l) = f^l(\textbf{h}^{l-1} \textbf{w}^l) \end{displaymath}
+}
 \end{frame}
 \end{CJK}

--- a/Section05-Neural-Networks-and-Language-Modeling/section05.tex
+++ b/Section05-Neural-Networks-and-Language-Modeling/section05.tex
@@ -3061,7 +3061,7 @@ J(\textbf{w}_t) = \frac{1}{m} \sum_{i=j}^{j+m} L(\textbf{x}_i,\hat{\textbf{y}}_i
 %%%------------------------------------------------------------------------------------------------------------
 %%% 如何计算梯度 - 符号微分
-\begin{frame}{符号微分}
+\begin{frame}{如何计算梯度? - 符号微分}
 \begin{itemize}
 \item \textbf{符号微分}：类似于手写出微分表达式，最后带入变量的值，得到微分结果。比如，对于如下表达式
@@ -3130,7 +3130,7 @@ $+2x^2+x+1)$ & \ \ $(x^4+2x^3+2x^2+x+1)$ & $+6x+1$ \\
 %%%------------------------------------------------------------------------------------------------------------
 %%% 自动微分
-\begin{frame}{自动微分}
+\begin{frame}{如何计算梯度? - 自动微分}
 \begin{itemize}
 \item \textbf{自动微分}：复杂的微分变成简单的步骤，这些步骤完全自动化，而且容易进行存储、计算。这可以用一种反向模式进行描述（也就是\alert{反向传播}思想），包括两步
@@ -3204,10 +3204,133 @@ $+2x^2+x+1)$ & \ \ $(x^4+2x^3+2x^2+x+1)$ & $+6x+1$ \\
 \vspace{-1em}
 \begin{itemize}
-\item<10-> 自动微分可以用\alert{计算图}实现(TensorFlow、 NiuTensor等)，不过计算图超出了课程的范围，建议大家自行学习
+\item<10-> 自动微分可以用\alert{计算图}实现(TensorFlow、 NiuTensor 等)，不过计算图超出了课程的范围，建议大家自行学习
 \end{itemize}
 \end{frame}
+%%%------------------------------------------------------------------------------------------------------------
+%%% 反向传播 - 符号说明
+\begin{frame}{符号说明}
+\begin{itemize}
+\item 以一个$L$层神经网络为例重新明确一下符号
+    \begin{itemize}
+    \item 这里假设每层神经网络中都不含偏置项（不含$\textbf{b}$）
+    \end{itemize}
+\end{itemize}
+\vspace{-1em}
+\begin{center}
+\begin{tikzpicture}
+\begin{scope}
+\def\neuronsep{1}
+\tikzstyle{neuronnode} = [minimum size=1.2em,circle,draw,ublue,very thick,inner sep=1pt, fill=white,align=center,drop shadow={shadow xshift=0.1em,shadow yshift=-0.1em}];
+%%% layer 1
+\foreach \n in {1,...,4}{
+    \node [neuronnode] (neuron0\n) at (\n * \neuronsep,0) {};
+    \draw [->] ([yshift=-0.8em]neuron0\n.south) -- ([yshift=-0.1em]neuron0\n.south) node [pos=0,below] {\tiny{...}};
+}
+\begin{pgfonlayer}{background}
+\node [rectangle,inner sep=0.2em,fill=red!20] [fit = (neuron01) (neuron04)] (layer01) {};
+\node [anchor=east] (layer01label) at (layer01.west) {\scriptsize{层$l-1$}};
+\end{pgfonlayer}
+%%% layer 2
+\foreach \n in {1,...,4}{
+    \node [neuronnode] (neuron1\n) at (\n * \neuronsep,3em) {};
+}
+\foreach \n in {1,...,4}{
+    \foreach \m in {1,...,4}{
+        \draw [<-] (neuron1\n.south) -- (neuron0\m.north);
+    }
+}
+\begin{pgfonlayer}{background}
+\node [rectangle,inner sep=0.2em,fill=ugreen!20] [fit = (neuron11) (neuron14)] (layer02) {};
+\node [anchor=east] (layer02label) at (layer02.west) {\scriptsize{层$l$}};
+\end{pgfonlayer}
+%%% layer 3
+\foreach \n in {1,...,4}{
+    \node [neuronnode] (neuron2\n) at (\n * \neuronsep,6em) {};
+    \draw [<-] ([yshift=0.8em]neuron2\n.north) -- ([yshift=0.0em]neuron2\n.north) node [pos=0,above] {\tiny{...}};
+}
+\foreach \n in {1,...,4}{
+    \foreach \m in {1,...,4}{
+        \draw [<-] (neuron2\n.south) -- (neuron1\m.north);
+    }
+}
+\begin{pgfonlayer}{background}
+\node [rectangle,inner sep=0.2em,fill=blue!20] [fit = (neuron21) (neuron24)] (layer03) {};
+\node [anchor=east] (layer03label) at (layer03.west) {\scriptsize{层$l+1$}};
+\end{pgfonlayer}
+%%% output layer
+\foreach \n in {1,...,4}{
+    \node [neuronnode] (neuron3\n) at (\n * \neuronsep,9.4em) {};
+    \visible<1-3,5->{
+    \draw [<-] ([yshift=0.6em]neuron3\n.north) -- ([yshift=0.0em]neuron3\n.north) node [pos=0,above] {\tiny{output}};
+    }
+    \visible<4>{
+    \draw [<-,red,very thick] ([yshift=0.6em]neuron3\n.north) -- ([yshift=0.0em]neuron3\n.north) node [pos=0,above] {\tiny{output}};
+    }
+    \draw [->] ([yshift=-0.6em]neuron3\n.south) -- ([yshift=0.0em]neuron3\n.south);
+}
+\begin{pgfonlayer}{background}
+\node [rectangle,inner sep=0.2em,fill=ugreen!20] [fit = (neuron31) (neuron34)] (layer04) {};
+\node [anchor=east] (layer04label) at (layer04.west) {\scriptsize{层$L$(输出)}};
+\end{pgfonlayer}
+\visible<2->{
+\node [neuronnode,draw=red,fill=red!20!white,inner sep=1pt] (neuron12new) at (2 * \neuronsep,3em) {};
+\node [anchor=east] (neuronsamplelabel) at ([yshift=-1em]layer02label.south east) {\alert{\textbf{\tiny{第$l$层, 第$i$个神经元}}}};
+\draw [->,dashed,very thick,red] ([xshift=-0.2em,yshift=0.2em]neuronsamplelabel.east) .. controls +(30:1) and +(220:1) .. ([xshift=-0em,yshift=-0em]neuron12new.210);
+}
+\visible<3>{
+\foreach \n in {1,...,4}{
+\draw [<-,thick,red] (neuron2\n.south) -- (neuron12.north);
+}
+}
+\visible<5->{
+\draw [<-,thick,red] (neuron14.south) -- (neuron04.north);
+\node [anchor=north] (wlabel) at (layer02.south east) {\alert{\scriptsize{$w_{4,4}^{l}$}}};
+}
+\visible<3->{
+\node [anchor=west,align=left] (line01) at ([xshift=2em]layer04.east) {\footnotesize{$h_{i}^{l}$：第$l$层, 第$i$个神经元的输出}};
+\node [anchor=north west,align=left] (line02) at (line01.south west) {\footnotesize{$\textbf{h}^{l}$：第$l$层的输出}};
+\node [anchor=north west,align=left] (line03) at (line02.south west) {\footnotesize{$f^{l}$：第$l$层的激活函数}};
+}
+\visible<4->{
+\node [anchor=north west,align=left] (line04) at (line03.south west) {\footnotesize{$\textbf{h}^{L}$：网络最后的输出}};
+}
+\visible<5->{
+\node [anchor=north west,align=left] (line05) at (line04.south west) {\footnotesize{$w_{j,i}^{l}$：第$l-1$层神经元$j$与}\\\footnotesize{第$l$层神经元$i$的连接权重}};
+\node [anchor=north west,align=left] (line06) at (line05.south west) {\footnotesize{$\textbf{w}^{l}$：第$l-1$层与第$l-1$层的}\\\footnotesize{连接权重}};
+}
+\end{scope}
+\end{tikzpicture}
+\end{center}
+\vspace{-1.5em}
+\visible<6->{
+\begin{displaymath} \textrm{对于第}l\textrm{层}: \textbf{h}^l = f^l(\sum_j h_{j}^{l-1}w_{j,i}^l) = f^l(\textbf{h}^{l-1} \textbf{w}^l) \end{displaymath}
+}
+\end{frame}
 \end{CJK}
 \end{document}