new update

0292bcaf · xiaotong · 9933e1fc · 0292bcaf · 0292bcaf
Commit 0292bcaf authored Oct 17, 2019 by xiaotong
--- a/Section05-Neural-Networks-and-Language-Modeling/section05-test.tex
+++ b/Section05-Neural-Networks-and-Language-Modeling/section05-test.tex
@@ -116,126 +116,58 @@
 \subsection{参数学习 - 反向传播}

 %%%------------------------------------------------------------------------------------------------------------
-%%% 反向传播 - 符号说明
-\begin{frame}{符号说明}
+%%% 输出层的反向传播
+\begin{frame}{反向传播 - 输出层}

 \begin{itemize}
-\item 以一个$L$层神经网络为例重新明确一下符号
-    \begin{itemize}
-    \item 这里假设每层神经网络中都不含偏置项（不含$\textbf{b}$）
-    \end{itemize}
+\item 对于输入$\textbf{x}$和参数$\textbf{w}$，$L(\textbf{x},\textbf{h}^K;\textbf{w})$表示网络输出结果的损失，简记为$L$，其中$\textbf{h}^K$是网络的输出
+\item 令$\pi^k = \frac{\partial L}{\partial \textbf{h}^k}$表示损失$L$在第$k$层输出处的梯度
 \end{itemize}

-\vspace{-1em}
+\vspace*{\fill}
 \begin{center}
 \begin{tikzpicture}
 \begin{scope}
+\tikzstyle{layernode} = [draw,thick,fill=ugreen!30!white,blur shadow={shadow xshift=1pt,shadow yshift=-1pt}];

-\def\neuronsep{1}
-\tikzstyle{neuronnode} = [minimum size=1.2em,circle,draw,ublue,very thick,inner sep=1pt, fill=white,align=center,drop shadow={shadow xshift=0.1em,shadow yshift=-0.1em}];
+\node [anchor=center,layernode,minimum height=4em,minimum width=1em] (layer01) at (0,0) {};
+\node [anchor=north west,layernode,minimum height=3em,minimum width=1em] (layer02) at ([xshift=3em]layer01.north east) {};
+\node [anchor=south west,layernode,minimum height=3em,minimum width=1em] (layer03) at ([xshift=7em]layer01.south east) {};
+\node [anchor=south west,layernode,minimum height=4em,minimum width=1em] (layer04) at ([xshift=11em]layer01.south east) {};
+\node [anchor=south west,layernode,minimum height=4em,minimum width=1em] (layer05) at ([xshift=3em]layer04.south east) {};

-%%% layer 1
-\foreach \n in {1,...,4}{
-    \node [neuronnode] (neuron0\n) at (\n * \neuronsep,0) {};
-    \draw [->] ([yshift=-0.8em]neuron0\n.south) -- ([yshift=-0.1em]neuron0\n.south) node [pos=0,below] {\tiny{...}};
-}
-
-
-\begin{pgfonlayer}{background}
-\node [rectangle,inner sep=0.2em,fill=red!20] [fit = (neuron01) (neuron04)] (layer01) {};
-\node [anchor=east] (layer01label) at (layer01.west) {\scriptsize{层$l-1$}};
-\end{pgfonlayer}
-
-%%% layer 2
-\foreach \n in {1,...,4}{
-    \node [neuronnode] (neuron1\n) at (\n * \neuronsep,3em) {};
-}
-
-\foreach \n in {1,...,4}{
-    \foreach \m in {1,...,4}{
-        \draw [<-] (neuron1\n.south) -- (neuron0\m.north);
-    }
-}
+\node [anchor=east] (input) at ([xshift=-1em]layer01.west){\scriptsize{输入}};
+\node [anchor=west] (output) at ([xshift=1em]layer05.east){\scriptsize{输出\&损失}};

-\begin{pgfonlayer}{background}
-\node [rectangle,inner sep=0.2em,fill=ugreen!20] [fit = (neuron11) (neuron14)] (layer02) {};
-\node [anchor=east] (layer02label) at (layer02.west) {\scriptsize{层$l$}};
-\end{pgfonlayer}
+\draw [->] ([xshift=-1em]layer01.west) -- ([xshift=-0.1em]layer01.west);
+\draw [->] ([xshift=0.1em,yshift=-0.5em]layer01.north east) -- ([xshift=-0.1em,yshift=-0.5em]layer02.north west);
+\draw [->] ([xshift=0.1em,yshift=0.5em]layer01.south east) -- ([xshift=-0.1em,yshift=0.5em]layer03.south west);
+\draw [->] ([xshift=0.1em,yshift=-0.5em]layer02.north east) -- ([xshift=-0.1em,yshift=-0.5em]layer04.north west);
+\draw [->] ([xshift=0.1em,yshift=0.5em]layer03.south east) -- ([xshift=-0.1em,yshift=0.5em]layer04.south west);
+\draw [->] ([xshift=0.1em]layer04.east) -- ([xshift=-0.1em]layer05.west);
+\draw [->] ([xshift=0.1em]layer05.east) -- ([xshift=1.0em]layer05.east);

-%%% layer 3
-\foreach \n in {1,...,4}{
-    \node [neuronnode] (neuron2\n) at (\n * \neuronsep,6em) {};
-    \draw [<-] ([yshift=0.8em]neuron2\n.north) -- ([yshift=0.0em]neuron2\n.north) node [pos=0,above] {\tiny{...}};
-}
+\draw [->,very thick,ublue] ([xshift=-1em]layer01.west) -- ([xshift=-0.1em]layer01.west);
+\draw [->,very thick,ublue] ([xshift=0.1em,yshift=-0.5em]layer01.north east) -- ([xshift=-0.1em,yshift=-0.5em]layer02.north west);
+\draw [->,very thick,ublue] ([xshift=0.1em,yshift=0.5em]layer01.south east) -- ([xshift=-0.1em,yshift=0.5em]layer03.south west);
+\draw [->,very thick,ublue] ([xshift=0.1em,yshift=-0.5em]layer02.north east) -- ([xshift=-0.1em,yshift=-0.5em]layer04.north west);
+\draw [->,very thick,ublue] ([xshift=0.1em,yshift=0.5em]layer03.south east) -- ([xshift=-0.1em,yshift=0.5em]layer04.south west);
+\draw [->,very thick,ublue] ([xshift=0.1em]layer04.east) -- ([xshift=-0.1em]layer05.west);
+\draw [->,very thick,ublue] ([xshift=0.1em]layer05.east) -- ([xshift=1.0em]layer05.east);

-\foreach \n in {1,...,4}{
-    \foreach \m in {1,...,4}{
-        \draw [<-] (neuron2\n.south) -- (neuron1\m.north);
-    }
-}
+\draw [<-,very thick,red] ([xshift=0.1em,yshift=-0.3em]layer05.east) -- ([xshift=1.0em,yshift=-0.3em]layer05.east);

-\begin{pgfonlayer}{background}
-\node [rectangle,inner sep=0.2em,fill=blue!20] [fit = (neuron21) (neuron24)] (layer03) {};
-\node [anchor=east] (layer03label) at (layer03.west) {\scriptsize{层$l+1$}};
-\end{pgfonlayer}
-
-%%% output layer
-\foreach \n in {1,...,4}{
-    \node [neuronnode] (neuron3\n) at (\n * \neuronsep,9.4em) {};
-    \visible<1-3,5->{
-    \draw [<-] ([yshift=0.6em]neuron3\n.north) -- ([yshift=0.0em]neuron3\n.north) node [pos=0,above] {\tiny{output}};
-    }
-    \visible<4>{
-    \draw [<-,red,very thick] ([yshift=0.6em]neuron3\n.north) -- ([yshift=0.0em]neuron3\n.north) node [pos=0,above] {\tiny{output}};
-    }
-    \draw [->] ([yshift=-0.6em]neuron3\n.south) -- ([yshift=0.0em]neuron3\n.south);
-}
+%\draw [<-,thin] ([xshift=0.3em,yshift=0.3em]layer04.east) .. controls +(35:1) and +(215:1) .. ([xshift=-2em,yshift=0.3em]layer05.north west) node [pos=1,above] {\scriptsize{前向：层$i$ 的输出$h_{i}$}};
+%\draw [<-,thin] ([xshift=0.3em,yshift=-0.7em]layer04.east) .. controls +(-35:1) and +(145:1) .. ([xshift=-2em,yshift=-0.3em]layer05.south west) node [pos=1,below] {\scriptsize{反向：$h_{i}$ 处的梯度$\frac{\partial L}{\partial h_i}$}};

-\begin{pgfonlayer}{background}
-\node [rectangle,inner sep=0.2em,fill=ugreen!20] [fit = (neuron31) (neuron34)] (layer04) {};
-\node [anchor=east] (layer04label) at (layer04.west) {\scriptsize{层$L$(输出)}};
-\end{pgfonlayer}
+\node [anchor=center] (leftend) at (layer05.east) {};

-\visible<2->{
-\node [neuronnode,draw=red,fill=red!20!white,inner sep=1pt] (neuron12new) at (2 * \neuronsep,3em) {};
-\node [anchor=east] (neuronsamplelabel) at ([yshift=-1em]layer02label.south east) {\alert{\textbf{\tiny{第$l$层, 第$i$个神经元}}}};
-\draw [->,dashed,very thick,red] ([xshift=-0.2em,yshift=0.2em]neuronsamplelabel.east) .. controls +(30:1) and +(220:1) .. ([xshift=-0em,yshift=-0em]neuron12new.210);
-}
-
-\visible<3>{
-\foreach \n in {1,...,4}{
-\draw [<-,thick,red] (neuron2\n.south) -- (neuron12.north);
-}
-}
-
-\visible<5->{
-\draw [<-,thick,red] (neuron14.south) -- (neuron04.north);
-\node [anchor=north] (wlabel) at (layer02.south east) {\alert{\scriptsize{$w_{4,4}^{l}$}}};
-}
-
-\visible<3->{
-\node [anchor=west,align=left] (line01) at ([xshift=2em]layer04.east) {\footnotesize{$h_{i}^{l}$：第$l$层, 第$i$个神经元的输出}};
-\node [anchor=north west,align=left] (line02) at (line01.south west) {\footnotesize{$\textbf{h}^{l}$：第$l$层的输出}};
-\node [anchor=north west,align=left] (line03) at (line02.south west) {\footnotesize{$f^{l}$：第$l$层的激活函数}};
-}
-\visible<4->{
-\node [anchor=north west,align=left] (line04) at (line03.south west) {\footnotesize{$\textbf{h}^{L}$：网络最后的输出}};
-}
-\visible<5->{
-\node [anchor=north west,align=left] (line05) at (line04.south west) {\footnotesize{$w_{j,i}^{l}$：第$l-1$层神经元$j$与}\\\footnotesize{第$l$层神经元$i$的连接权重}};
-\node [anchor=north west,align=left] (line06) at (line05.south west) {\footnotesize{$\textbf{w}^{l}$：第$l-1$层与第$l-1$层的}\\\footnotesize{连接权重}};
-}
+\node [rectangle,inner sep=0.2em,draw,purple,very thick,dashed] [fit = (leftend) (output)] (problembox) {};

 \end{scope}
 \end{tikzpicture}
 \end{center}

-\vspace{-1.5em}
-
-\visible<6->{
-\begin{displaymath} \textrm{对于第}l\textrm{层}: \textbf{h}^l = f^l(\sum_j h_{j}^{l-1}w_{j,i}^l) = f^l(\textbf{h}^{l-1} \textbf{w}^l) \end{displaymath}
-}
-
 \end{frame}

 \end{CJK}

--- a/Section05-Neural-Networks-and-Language-Modeling/section05.tex
+++ b/Section05-Neural-Networks-and-Language-Modeling/section05.tex
@@ -3214,7 +3214,7 @@ $+2x^2+x+1)$ & \ \ $(x^4+2x^3+2x^2+x+1)$ & $+6x+1$ \\
 \begin{frame}{符号说明}

 \begin{itemize}
-\item 以一个$L$层神经网络为例重新明确一下符号
+\item 以一个$K$层神经网络为例重新明确一下符号
    \begin{itemize}
    \item 这里假设每层神经网络中都不含偏置项（不含$\textbf{b}$）
    \end{itemize}
@@ -3308,16 +3308,17 @@ $+2x^2+x+1)$ & \ \ $(x^4+2x^3+2x^2+x+1)$ & $+6x+1$ \\
 }

 \visible<3->{
-\node [anchor=west,align=left] (line01) at ([xshift=2em]layer04.east) {\footnotesize{$h_{i}^{l}$：第$l$层, 第$i$个神经元的输出}};
-\node [anchor=north west,align=left] (line02) at (line01.south west) {\footnotesize{$\textbf{h}^{l}$：第$l$层的输出}};
-\node [anchor=north west,align=left] (line03) at (line02.south west) {\footnotesize{$f^{l}$：第$l$层的激活函数}};
+\node [anchor=west,align=left] (line01) at ([xshift=1em,yshift=1em]layer04.east) {\footnotesize{$h_{i}^{k}$：第$l$层, 第$i$个神经元的输出}};
+\node [anchor=north west,align=left] (line02) at (line01.south west) {\footnotesize{$\textbf{h}^{k}$：第$k$层的输出}};
+\node [anchor=north west,align=left] (line03) at (line02.south west) {\footnotesize{$\textbf{s}^{k}$：第$k$层的线性变换$\textbf{s}^k=\textbf{h}^{k-1}\textbf{w}^k$}};
+\node [anchor=north west,align=left] (line04) at (line03.south west) {\footnotesize{$f^{k}$：第$k$层的激活函数$\textbf{h}^k=f^l(\textbf{s}^k)$}};
 }
 \visible<4->{
-\node [anchor=north west,align=left] (line04) at (line03.south west) {\footnotesize{$\textbf{h}^{L}$：网络最后的输出}};
+\node [anchor=north west,align=left] (line05) at (line04.south west) {\footnotesize{$\textbf{h}^{K}$：网络最后的输出}};
 }
 \visible<5->{
-\node [anchor=north west,align=left] (line05) at (line04.south west) {\footnotesize{$w_{j,i}^{l}$：第$l-1$层神经元$j$与}\\\footnotesize{第$l$层神经元$i$的连接权重}};
-\node [anchor=north west,align=left] (line06) at (line05.south west) {\footnotesize{$\textbf{w}^{l}$：第$l-1$层与第$l-1$层的}\\\footnotesize{连接权重}};
+\node [anchor=north west,align=left] (line06) at (line05.south west) {\footnotesize{$w_{j,i}^{k}$：第$k-1$层神经元$j$与}\\\footnotesize{第$k$层神经元$i$的连接权重}};
+\node [anchor=north west,align=left] (line07) at (line06.south west) {\footnotesize{$\textbf{w}^{k}$：第$k-1$层与第$k1$层的}\\\footnotesize{连接权重}};
 }

 \end{scope}
@@ -3327,7 +3328,7 @@ $+2x^2+x+1)$ & \ \ $(x^4+2x^3+2x^2+x+1)$ & $+6x+1$ \\
 \vspace{-1.5em}

 \visible<6->{
-\begin{displaymath} \textrm{对于第}l\textrm{层}: \textbf{h}^l = f^l(\sum_j h_{j}^{l-1}w_{j,i}^l) = f^l(\textbf{h}^{l-1} \textbf{w}^l) \end{displaymath}
+\begin{displaymath} \textrm{对于第}k\textrm{层}: \textbf{h}^k = f^k(\textbf{s}^k) = f^k(\sum_j h_{j}^{k-1}w_{j,i}^k) = f^k(\textbf{h}^{k-1} \textbf{w}^k) \end{displaymath}
 }

 \end{frame}