Commit 44d2f25a by xiaotong

new update

parent a204dca2
...@@ -128,15 +128,20 @@ ...@@ -128,15 +128,20 @@
\begin{tikzpicture} \begin{tikzpicture}
\begin{scope} \begin{scope}
\node [anchor=center,minimum height=1.7em,fill=yellow!20,draw] (h) at (0,0) {$\textbf{h}^{K-1}$}; \node [anchor=center,minimum height=1.7em,fill=yellow!20,draw] (h) at (0,0) {$\textbf{h}^{K-1}$};
\node [anchor=west,minimum height=1.7em,fill=blue!20,draw] (s) at ([xshift=6.5em]h.east) {$\textbf{s}^{K}$}; \node [anchor=west,minimum height=1.7em,fill=blue!20,draw] (s) at ([xshift=5.5em]h.east) {$\textbf{s}^{K}$};
\node [anchor=west,minimum height=1.7em,fill=green!20,draw] (h2) at ([xshift=6.5em]s.east) {$\textbf{h}^{K}$}; \node [anchor=west,minimum height=1.7em,fill=green!20,draw] (h2) at ([xshift=5.5em]s.east) {$\textbf{h}^{K}$};
\node [anchor=west,minimum height=1.7em,fill=orange!20,draw] (l) at ([xshift=4em]h2.east) {$L$}; \node [anchor=west,minimum height=1.7em,fill=orange!20,draw] (l) at ([xshift=5.5em]h2.east) {$L$};
\draw [->] (h.east) -- (s.west) node [pos=0.5,above] {\tiny{线性变换$\textbf{s}^K = \textbf{h}^{K-1} \textbf{w}^K$}}; \draw [->] (h.east) -- (s.west);
\draw [->] (s.east) -- (h2.west) node [pos=0.5,above] {\tiny{激活函数$\textbf{h}^K = f^K(\textbf{s}^K)$}}; \draw [->] (s.east) -- (h2.west);
\end{scope}
\draw [->] (h2.east) -- (l.west) node [pos=0.5,above] {\tiny{损失}}; \draw [->] (h2.east) -- (l.west) node [pos=0.5,above] {\tiny{损失}};
\node [anchor=south] (outputlabel) at ([yshift=0.3em]h2.north) {\scriptsize{\textbf{网络输出层}}}; \node [anchor=south west,inner sep=2pt] (step100) at ([xshift=0.5em,yshift=-0.8em]h.north east) {\tiny{$\textbf{s}^K = \textbf{h}^{K-1} \textbf{w}^K$}};
\node [anchor=south west,inner sep=2pt] (step101) at (step100.north west) {\tiny{线性变换}};
\node [anchor=south west,inner sep=2pt] (step200) at ([xshift=0.5em,yshift=-0.8em]s.north east) {\tiny{$\textbf{h}^K = f^K(\textbf{s}^K)$}};
\node [anchor=south west,inner sep=2pt] (step201) at (step200.north west) {\tiny{激活函数}};
\node [anchor=south,inner sep=1pt] (outputlabel) at ([yshift=0.0em]h2.north) {\tiny{\textbf{输出层}}};
\visible<2->{ \visible<2->{
\draw[decorate,thick,decoration={brace,mirror,raise=0.4em,amplitude=2mm}] (h.south west) -- (s.south west) node [pos=0.5,below,yshift=-1em] {\scriptsize{\textbf{第一阶段:线性变换}}}; \draw[decorate,thick,decoration={brace,mirror,raise=0.4em,amplitude=2mm}] (h.south west) -- (s.south west) node [pos=0.5,below,yshift=-1em] {\scriptsize{\textbf{第一阶段:线性变换}}};
...@@ -145,19 +150,21 @@ ...@@ -145,19 +150,21 @@
\draw[decorate,thick,decoration={brace,mirror,raise=0.4em,amplitude=2mm}] ([xshift=0.2em]s.south west) -- (l.south east) node [pos=0.5,below,yshift=-1em] (step2) {\scriptsize{\textbf{第二阶段:激活函数+损失函数}}}; \draw[decorate,thick,decoration={brace,mirror,raise=0.4em,amplitude=2mm}] ([xshift=0.2em]s.south west) -- (l.south east) node [pos=0.5,below,yshift=-1em] (step2) {\scriptsize{\textbf{第二阶段:激活函数+损失函数}}};
} }
\begin{pgfonlayer}{background}
\visible<4->{ \visible<4->{
\node [rectangle,inner sep=0em,fill=red!20] [fit = (step2)] (step2label) {}; \draw [->,very thick,red] ([yshift=1em,xshift=-0.1em]l.north) -- ([yshift=1em,xshift=0.1em]s.north) node [pos=0.5,above] {\tiny{反向求梯度\alert{$\frac{\partial L}{\partial \textbf{s}^k} = ?$}}};
\draw [-,very thick,red] ([yshift=0.5em]l.north) -- ([yshift=1.5em]l.north);
\draw [-,very thick,red] ([yshift=0.5em]s.north) -- ([yshift=1.5em]s.north);
} }
\end{pgfonlayer}
\end{scope}
\end{tikzpicture} \end{tikzpicture}
\end{center} \end{center}
\begin{itemize} \begin{itemize}
\item<4-> 反向传播从输出向输入传播梯度,因此我们先考虑阶段二。令$\pi^k = \frac{\partial L}{\partial \textbf{s}^k}$表示损失$L$在第$k$层激活函数输入处的梯度\visible<5->{,利用链式法有} \item<4-> 反向传播从输出向输入传播梯度,因此我们先考虑阶段二\visible<5->{。令$\pi^k = \frac{\partial L}{\partial \textbf{s}^k}$表示损失$L$在第$k$层激活函数输入处的梯度,利用链式法有}
\vspace{-1em} \vspace{-1.5em}
\visible<5->{ \visible<5->{
\begin{eqnarray} \begin{eqnarray}
\pi^K & = & \frac{\partial L}{\partial \textbf{s}^K} \nonumber \\ \pi^K & = & \frac{\partial L}{\partial \textbf{s}^K} \nonumber \\
...@@ -180,19 +187,21 @@ ...@@ -180,19 +187,21 @@
\begin{tikzpicture} \begin{tikzpicture}
\begin{scope} \begin{scope}
\node [anchor=center] (factor00) at (0,0) {${\displaystyle \frac{\partial L}{\partial \textbf{w}^K} \ = }$}; \node [anchor=center] (factor00) at (0,0) {${\displaystyle \pi^K \ = }$};
\node [anchor=west] (factor01) at (factor00.east) {${\displaystyle \frac{\partial L}{\partial \textbf{h}^K}}$}; \node [anchor=west] (factor01) at (factor00.east) {${\displaystyle \frac{\partial L}{\partial \textbf{h}^K}}$};
\node [anchor=west,inner sep=1pt] (factor02) at (factor01.east) {${\displaystyle \cdot}$}; \node [anchor=west,inner sep=1pt] (factor02) at (factor01.east) {${\displaystyle \cdot}$};
\node [anchor=west] (factor03) at (factor02.east) {${\displaystyle \frac{\partial f^K(\textbf{s}^K)}{\partial \textbf{s}^K}}$}; \node [anchor=west] (factor03) at (factor02.east) {${\displaystyle \frac{\partial f^K(\textbf{s}^K)}{\partial \textbf{s}^K}}$};
\node [anchor=west,inner sep=1pt] (factor04) at (factor03.east) {${\displaystyle \cdot}$};
\node [anchor=west] (factor05) at (factor04.east) {${\displaystyle \frac{\partial \textbf{s}^K}{\partial \textbf{w}^K}}$};
\begin{pgfonlayer}{background} \begin{pgfonlayer}{background}
\visible<2-4>{
\node [rectangle,inner sep=0em,fill=red!20] [fit = (factor01)] (p1) {}; \node [rectangle,inner sep=0em,fill=red!20] [fit = (factor01)] (p1) {};
}
\visible<3-4>{
\node [rectangle,inner sep=0em,fill=blue!20] [fit = (factor03)] (p2) {}; \node [rectangle,inner sep=0em,fill=blue!20] [fit = (factor03)] (p2) {};
\node [rectangle,inner sep=0em,fill=green!20] [fit = (factor05)] (p3) {}; }
\node [circle,inner sep=0em,fill=purple!20] [fit = (factor02)] (p4) {}; \visible<5->{
\node [circle,inner sep=0em,fill=purple!20] [fit = (factor04)] (p5) {}; \node [circle,inner sep=0em,fill=green!20] [fit = (factor02)] (p3) {};
}
\end{pgfonlayer} \end{pgfonlayer}
\end{scope} \end{scope}
...@@ -200,12 +209,73 @@ ...@@ -200,12 +209,73 @@
\end{center} \end{center}
\begin{itemize} \begin{itemize}
\item \raisebox{-0.7em}{\tikz{\node [anchor=west,fill=red!20] (factor01) at (factor00.east) {$\frac{\partial L}{\partial \textbf{h}^K}$};}} 表示损失$L$相对网络输出的变化率,比如,对于$L = \frac{1}{2} (\hat{\textbf{y}} - \textbf{h}^K)^2$,有$\frac{\partial L}{\partial \textbf{h}^K} = \hat{\textbf{y}} - \textbf{h}^K$ \item<2-> \raisebox{-0.7em}{\tikz{\node [anchor=west,fill=red!20] (factor01) at (factor00.east) {$\frac{\partial L}{\partial \textbf{h}^K}$};}} 表示损失$L$相对网络输出的变化率,比如,对于$L = \frac{1}{2} ||\hat{\textbf{y}} - \textbf{h}^K||^2$,有$\frac{\partial L}{\partial \textbf{h}^K} = \hat{\textbf{y}} - \textbf{h}^K$
\item \raisebox{-0.7em}{\tikz{\node [anchor=west,fill=blue!20] (factor01) at (factor00.east) {$\frac{\partial f^K(\textbf{s}^K)}{\partial \textbf{s}^K}$};}} 表示激活函数相对于它自己的输入的变化率,比如,对于$f(\textbf{s}) = \frac{1}{1+\exp(-\textbf{s})}$,有$\frac{\partial f(\textbf{s})}{\partial \textbf{s}} = f(\textbf{s})(1-f(\textbf{s}))$ \item<3-> \raisebox{-0.7em}{\tikz{\node [anchor=west,fill=blue!20] (factor01) at (factor00.east) {$\frac{\partial f^K(\textbf{s}^K)}{\partial \textbf{s}^K}$};}} 表示激活函数相对于它自己的输入的变化率,比如,对于$f(\textbf{s}) = \frac{1}{1+\exp(-\textbf{s})}$,有$\frac{\partial f(\textbf{s})}{\partial \textbf{s}} = f(\textbf{s})(1-f(\textbf{s}))$
\item \raisebox{-0.7em}{\tikz{\node [anchor=west,fill=green!20] (factor01) at (factor00.east) {$\frac{\partial \textbf{s}^K}{\partial \textbf{w}^K}$};}} 表示激活函数的输入相对于参数矩阵$\textbf{w}^K$的变化率,根据简单的数学,可以得到$\frac{\partial \textbf{s}^K}{\partial \textbf{w}^K} = \frac{\partial \textbf{h}^{K-1}\textbf{w}^K}{\partial \textbf{w}^K}$ \item<4-> 这个结果符合直觉,在$s^K$出的梯度相当于在损失函数微分($\frac{\partial L}{\partial \textbf{h}^K}$)和激活函数微分($\frac{\partial f^K(\textbf{s}^K)}{\partial \textbf{s}^K}$) 的乘积\visible<5->{,注意这里所有操作都是单元级,比如张量按单元乘法}
\end{itemize}
\visible<4->{
\vspace{-0.5em}
\begin{center}
\begin{tikzpicture}
\begin{scope}
\node [anchor=west,minimum height=1.7em,fill=blue!20,draw] (s) at (0,0) {$\textbf{s}^{K}$};
\node [anchor=west,minimum height=1.7em,fill=green!20,draw] (h2) at ([xshift=5.5em]s.east) {$\textbf{h}^{K}$};
\node [anchor=west,minimum height=1.7em,fill=orange!20,draw] (l) at ([xshift=5.5em]h2.east) {$L$};
\draw [->] (s.east) -- (h2.west);
\draw [->] (h2.east) -- (l.west);
\draw [->,very thick,red] ([yshift=1em,xshift=-0.1em]l.north) -- ([yshift=1em,xshift=0.1em]h2.north) node [pos=0.5,above] {\tiny{求梯度\alert{$\frac{\partial L}{\partial \textbf{h}^k} = ?$}}};
\draw [->,very thick,red] ([yshift=1em,xshift=-0.1em]h2.north) -- ([yshift=1em,xshift=0.1em]s.north) node [pos=0.5,above] {\tiny{求梯度\alert{$\frac{\partial f^K(\textbf{s}^K)}{\partial \textbf{s}^K} = ?$}}};
\draw [-,very thick,red] ([yshift=0.5em]l.north) -- ([yshift=1.5em]l.north);
\draw [-,very thick,red] ([yshift=0.5em]h2.north) -- ([yshift=1.5em]h2.north);
\draw [-,very thick,red] ([yshift=0.5em]s.north) -- ([yshift=1.5em]s.north);
\end{scope}
\end{tikzpicture}
\end{center}
}
\end{frame}
%%%------------------------------------------------------------------------------------------------------------
%%% 输出层的反向传播 - 求 dL/dw
\begin{frame}{反向传播 - 输出层}
\begin{itemize}
\item 输出层(两个阶段)
\end{itemize} \end{itemize}
\vspace{-0.5em}
\begin{center}
\begin{tikzpicture}
\begin{scope}
\node [anchor=center,minimum height=1.7em,fill=yellow!20,draw] (h) at (0,0) {$\textbf{h}^{K-1}$};
\node [anchor=west,minimum height=1.7em,fill=blue!20,draw] (s) at ([xshift=5.5em]h.east) {$\textbf{s}^{K}$};
\node [anchor=west,minimum height=1.7em,fill=green!20,draw] (h2) at ([xshift=5.5em]s.east) {$\textbf{h}^{K}$};
\node [anchor=west,minimum height=1.7em,fill=orange!20,draw] (l) at ([xshift=5.5em]h2.east) {$L$};
\draw [->] (h.east) -- (s.west);
\draw [->] (s.east) -- (h2.west);
\draw [->] (h2.east) -- (l.west) node [pos=0.5,above] {\tiny{损失}};
\node [anchor=south west,inner sep=2pt] (step100) at ([xshift=0.5em,yshift=-0.8em]h.north east) {\tiny{$\textbf{s}^K = \textbf{h}^{K-1} \textbf{w}^K$}};
\node [anchor=south west,inner sep=2pt] (step200) at ([xshift=0.5em,yshift=-0.8em]s.north east) {\tiny{$\textbf{h}^K = f^K(\textbf{s}^K)$}};
\node [anchor=south,inner sep=1pt] (outputlabel) at ([yshift=0.0em]h2.north) {\tiny{\textbf{输出层}}};
\node [anchor=south west] (slabel) at ([yshift=1em,xshift=0.3em]s.north) {\scriptsize{\textbf{\alert{已经得到:$\pi^K$}}}};
\draw [->,very thick,red] ([yshift=1em,xshift=-0.1em]s.north) -- ([yshift=1em,xshift=0.1em]h.north) node [pos=0.5,above] {\tiny{\alert{$\frac{\partial L}{\partial \textbf{w}^k} = ?$, $\frac{\partial L}{\partial \textbf{h}^k} = ?$}}};
\draw [-,very thick,red] ([yshift=0.5em]h.north) -- ([yshift=1.5em]h.north);
\draw [-,very thick,red] ([yshift=0.5em]s.north) -- ([yshift=1.5em]s.north);
\end{scope}
\end{tikzpicture}
\end{center}
\end{frame} \end{frame}
\end{CJK} \end{CJK}
......
Markdown 格式
0%
您添加了 0 到此讨论。请谨慎行事。
请先完成此评论的编辑!
注册 或者 后发表评论