Commit a204dca2 by xiaotong

new update

parent 0292bcaf
......@@ -120,54 +120,92 @@
\begin{frame}{反向传播 - 输出层}
\begin{itemize}
\item 对于输入$\textbf{x}$和参数$\textbf{w}$$L(\textbf{x},\textbf{h}^K;\textbf{w})$表示网络输出结果的损失,简记为$L$,其中$\textbf{h}^K$是网络的输出
\item$\pi^k = \frac{\partial L}{\partial \textbf{h}^k}$表示损失$L$在第$k$层输出处的梯度
\item 输出层(两个阶段)
\end{itemize}
\vspace*{\fill}
\vspace{-0.5em}
\begin{center}
\begin{tikzpicture}
\begin{scope}
\tikzstyle{layernode} = [draw,thick,fill=ugreen!30!white,blur shadow={shadow xshift=1pt,shadow yshift=-1pt}];
\node [anchor=center,minimum height=1.7em,fill=yellow!20,draw] (h) at (0,0) {$\textbf{h}^{K-1}$};
\node [anchor=west,minimum height=1.7em,fill=blue!20,draw] (s) at ([xshift=6.5em]h.east) {$\textbf{s}^{K}$};
\node [anchor=west,minimum height=1.7em,fill=green!20,draw] (h2) at ([xshift=6.5em]s.east) {$\textbf{h}^{K}$};
\node [anchor=west,minimum height=1.7em,fill=orange!20,draw] (l) at ([xshift=4em]h2.east) {$L$};
\draw [->] (h.east) -- (s.west) node [pos=0.5,above] {\tiny{线性变换$\textbf{s}^K = \textbf{h}^{K-1} \textbf{w}^K$}};
\draw [->] (s.east) -- (h2.west) node [pos=0.5,above] {\tiny{激活函数$\textbf{h}^K = f^K(\textbf{s}^K)$}};
\end{scope}
\draw [->] (h2.east) -- (l.west) node [pos=0.5,above] {\tiny{损失}};
\node [anchor=center,layernode,minimum height=4em,minimum width=1em] (layer01) at (0,0) {};
\node [anchor=north west,layernode,minimum height=3em,minimum width=1em] (layer02) at ([xshift=3em]layer01.north east) {};
\node [anchor=south west,layernode,minimum height=3em,minimum width=1em] (layer03) at ([xshift=7em]layer01.south east) {};
\node [anchor=south west,layernode,minimum height=4em,minimum width=1em] (layer04) at ([xshift=11em]layer01.south east) {};
\node [anchor=south west,layernode,minimum height=4em,minimum width=1em] (layer05) at ([xshift=3em]layer04.south east) {};
\node [anchor=south] (outputlabel) at ([yshift=0.3em]h2.north) {\scriptsize{\textbf{网络输出层}}};
\node [anchor=east] (input) at ([xshift=-1em]layer01.west){\scriptsize{输入}};
\node [anchor=west] (output) at ([xshift=1em]layer05.east){\scriptsize{输出\&损失}};
\visible<2->{
\draw[decorate,thick,decoration={brace,mirror,raise=0.4em,amplitude=2mm}] (h.south west) -- (s.south west) node [pos=0.5,below,yshift=-1em] {\scriptsize{\textbf{第一阶段:线性变换}}};
}
\visible<3->{
\draw[decorate,thick,decoration={brace,mirror,raise=0.4em,amplitude=2mm}] ([xshift=0.2em]s.south west) -- (l.south east) node [pos=0.5,below,yshift=-1em] (step2) {\scriptsize{\textbf{第二阶段:激活函数+损失函数}}};
}
\draw [->] ([xshift=-1em]layer01.west) -- ([xshift=-0.1em]layer01.west);
\draw [->] ([xshift=0.1em,yshift=-0.5em]layer01.north east) -- ([xshift=-0.1em,yshift=-0.5em]layer02.north west);
\draw [->] ([xshift=0.1em,yshift=0.5em]layer01.south east) -- ([xshift=-0.1em,yshift=0.5em]layer03.south west);
\draw [->] ([xshift=0.1em,yshift=-0.5em]layer02.north east) -- ([xshift=-0.1em,yshift=-0.5em]layer04.north west);
\draw [->] ([xshift=0.1em,yshift=0.5em]layer03.south east) -- ([xshift=-0.1em,yshift=0.5em]layer04.south west);
\draw [->] ([xshift=0.1em]layer04.east) -- ([xshift=-0.1em]layer05.west);
\draw [->] ([xshift=0.1em]layer05.east) -- ([xshift=1.0em]layer05.east);
\begin{pgfonlayer}{background}
\visible<4->{
\node [rectangle,inner sep=0em,fill=red!20] [fit = (step2)] (step2label) {};
}
\end{pgfonlayer}
\draw [->,very thick,ublue] ([xshift=-1em]layer01.west) -- ([xshift=-0.1em]layer01.west);
\draw [->,very thick,ublue] ([xshift=0.1em,yshift=-0.5em]layer01.north east) -- ([xshift=-0.1em,yshift=-0.5em]layer02.north west);
\draw [->,very thick,ublue] ([xshift=0.1em,yshift=0.5em]layer01.south east) -- ([xshift=-0.1em,yshift=0.5em]layer03.south west);
\draw [->,very thick,ublue] ([xshift=0.1em,yshift=-0.5em]layer02.north east) -- ([xshift=-0.1em,yshift=-0.5em]layer04.north west);
\draw [->,very thick,ublue] ([xshift=0.1em,yshift=0.5em]layer03.south east) -- ([xshift=-0.1em,yshift=0.5em]layer04.south west);
\draw [->,very thick,ublue] ([xshift=0.1em]layer04.east) -- ([xshift=-0.1em]layer05.west);
\draw [->,very thick,ublue] ([xshift=0.1em]layer05.east) -- ([xshift=1.0em]layer05.east);
\end{tikzpicture}
\end{center}
\draw [<-,very thick,red] ([xshift=0.1em,yshift=-0.3em]layer05.east) -- ([xshift=1.0em,yshift=-0.3em]layer05.east);
\begin{itemize}
\item<4-> 反向传播从输出向输入传播梯度,因此我们先考虑阶段二。令$\pi^k = \frac{\partial L}{\partial \textbf{s}^k}$表示损失$L$在第$k$层激活函数输入处的梯度\visible<5->{,利用链式法有}
\vspace{-1em}
\visible<5->{
\begin{eqnarray}
\pi^K & = & \frac{\partial L}{\partial \textbf{s}^K} \nonumber \\
& = & \frac{\partial L}{\partial \textbf{h}^K} \cdot \frac{\partial \textbf{h}^K}{\partial \textbf{s}^K} \nonumber \\
& = & \frac{\partial L}{\partial \textbf{h}^K} \cdot \frac{\partial f^K(\textbf{s}^K)}{\partial \textbf{s}^K} \nonumber
%\frac{\partial L}{\partial \textbf{w}^K} & = & \frac{\partial L}{\partial \textbf{h}^K} \cdot \frac{\partial \textbf{h}^K}{\partial \textbf{w}^K} \nonumber \\
% & \visible<4->{=} & \visible<4->{\frac{\partial L}{\partial \textbf{h}^K} \cdot \frac{\partial f^K(\textbf{h}^{K-1} \textbf{w}^K)}{\partial \textbf{w}^K} \ \ \ \ (\textrm{因为}\textbf{h}^K=f^K(\textbf{h}^{K-1} \textbf{w}^K))} \nonumber \\
% & \visible<5->{=} & \visible<5->{\frac{\partial L}{\partial \textbf{h}^K} \cdot \frac{\partial f^K(\textbf{s}^K)}{\partial \textbf{s}^K} \cdot \frac{\partial \textbf{s}^K}{\partial \textbf{w}^K} \ \ \ (\textrm{因为}\textbf{s}^K=\textbf{h}^{K-1} \textbf{w}^K)} \nonumber
\end{eqnarray}
}
%\draw [<-,thin] ([xshift=0.3em,yshift=0.3em]layer04.east) .. controls +(35:1) and +(215:1) .. ([xshift=-2em,yshift=0.3em]layer05.north west) node [pos=1,above] {\scriptsize{前向:层$i$ 的输出$h_{i}$}};
%\draw [<-,thin] ([xshift=0.3em,yshift=-0.7em]layer04.east) .. controls +(-35:1) and +(145:1) .. ([xshift=-2em,yshift=-0.3em]layer05.south west) node [pos=1,below] {\scriptsize{反向:$h_{i}$ 处的梯度$\frac{\partial L}{\partial h_i}$}};
\end{itemize}
\node [anchor=center] (leftend) at (layer05.east) {};
\end{frame}
\node [rectangle,inner sep=0.2em,draw,purple,very thick,dashed] [fit = (leftend) (output)] (problembox) {};
%%%------------------------------------------------------------------------------------------------------------
%%% 输出层的反向传播 - 各个因子的意义
\begin{frame}{反向传播 - 输出层(续)}
\begin{center}
\begin{tikzpicture}
\begin{scope}
\node [anchor=center] (factor00) at (0,0) {${\displaystyle \frac{\partial L}{\partial \textbf{w}^K} \ = }$};
\node [anchor=west] (factor01) at (factor00.east) {${\displaystyle \frac{\partial L}{\partial \textbf{h}^K}}$};
\node [anchor=west,inner sep=1pt] (factor02) at (factor01.east) {${\displaystyle \cdot}$};
\node [anchor=west] (factor03) at (factor02.east) {${\displaystyle \frac{\partial f^K(\textbf{s}^K)}{\partial \textbf{s}^K}}$};
\node [anchor=west,inner sep=1pt] (factor04) at (factor03.east) {${\displaystyle \cdot}$};
\node [anchor=west] (factor05) at (factor04.east) {${\displaystyle \frac{\partial \textbf{s}^K}{\partial \textbf{w}^K}}$};
\begin{pgfonlayer}{background}
\node [rectangle,inner sep=0em,fill=red!20] [fit = (factor01)] (p1) {};
\node [rectangle,inner sep=0em,fill=blue!20] [fit = (factor03)] (p2) {};
\node [rectangle,inner sep=0em,fill=green!20] [fit = (factor05)] (p3) {};
\node [circle,inner sep=0em,fill=purple!20] [fit = (factor02)] (p4) {};
\node [circle,inner sep=0em,fill=purple!20] [fit = (factor04)] (p5) {};
\end{pgfonlayer}
\end{scope}
\end{tikzpicture}
\end{center}
\begin{itemize}
\item \raisebox{-0.7em}{\tikz{\node [anchor=west,fill=red!20] (factor01) at (factor00.east) {$\frac{\partial L}{\partial \textbf{h}^K}$};}} 表示损失$L$相对网络输出的变化率,比如,对于$L = \frac{1}{2} (\hat{\textbf{y}} - \textbf{h}^K)^2$,有$\frac{\partial L}{\partial \textbf{h}^K} = \hat{\textbf{y}} - \textbf{h}^K$
\item \raisebox{-0.7em}{\tikz{\node [anchor=west,fill=blue!20] (factor01) at (factor00.east) {$\frac{\partial f^K(\textbf{s}^K)}{\partial \textbf{s}^K}$};}} 表示激活函数相对于它自己的输入的变化率,比如,对于$f(\textbf{s}) = \frac{1}{1+\exp(-\textbf{s})}$,有$\frac{\partial f(\textbf{s})}{\partial \textbf{s}} = f(\textbf{s})(1-f(\textbf{s}))$
\item \raisebox{-0.7em}{\tikz{\node [anchor=west,fill=green!20] (factor01) at (factor00.east) {$\frac{\partial \textbf{s}^K}{\partial \textbf{w}^K}$};}} 表示激活函数的输入相对于参数矩阵$\textbf{w}^K$的变化率,根据简单的数学,可以得到$\frac{\partial \textbf{s}^K}{\partial \textbf{w}^K} = \frac{\partial \textbf{h}^{K-1}\textbf{w}^K}{\partial \textbf{w}^K}$
\end{itemize}
\end{frame}
\end{CJK}
......
Markdown 格式
0%
您添加了 0 到此讨论。请谨慎行事。
请先完成此评论的编辑!
注册 或者 后发表评论