Commit 6d86f2c0 by liuhui

update slides

parent be087cd2
...@@ -3040,7 +3040,7 @@ J(\textbf{w}_t) = L(\textbf{x}_i,\tilde{\textbf{y}}_i;\textbf{w}_t) ...@@ -3040,7 +3040,7 @@ J(\textbf{w}_t) = L(\textbf{x}_i,\tilde{\textbf{y}}_i;\textbf{w}_t)
\item<2-> \textbf{小批量梯度下降(Mini-batch Gradient Descent)} \item<2-> \textbf{小批量梯度下降(Mini-batch Gradient Descent)}
\begin{displaymath} \begin{displaymath}
J(\textbf{w}_t) = \frac{1}{m} \sum_{i=j}^{j+m} L(\textbf{x}_i,\tilde{\textbf{y}}_i;\textbf{w}_t) J(\textbf{w}_t) = \frac{1}{m} \sum_{i=j}^{j+m-1} L(\textbf{x}_i,\tilde{\textbf{y}}_i;\textbf{w}_t)
\end{displaymath} \end{displaymath}
每次随机使用若干样本进行参数更新(数量不会特别大),算是一种折中方案,当今最常用的方法之一 每次随机使用若干样本进行参数更新(数量不会特别大),算是一种折中方案,当今最常用的方法之一
...@@ -3279,7 +3279,7 @@ $+2x^2+x+1)$ & \ \ $(x^4+2x^3+2x^2+x+1)$ & $+6x+1$ \\ ...@@ -3279,7 +3279,7 @@ $+2x^2+x+1)$ & \ \ $(x^4+2x^3+2x^2+x+1)$ & $+6x+1$ \\
\begin{pgfonlayer}{background} \begin{pgfonlayer}{background}
\node [rectangle,inner sep=0.2em,fill=red!20] [fit = (neuron01) (neuron04)] (layer01) {}; \node [rectangle,inner sep=0.2em,fill=red!20] [fit = (neuron01) (neuron04)] (layer01) {};
\node [anchor=east] (layer01label) at (layer01.west) {\scriptsize{$l-1$}}; \node [anchor=east] (layer01label) at (layer01.west) {\scriptsize{$k-1$}};
\end{pgfonlayer} \end{pgfonlayer}
%%% layer 2 %%% layer 2
...@@ -3295,7 +3295,7 @@ $+2x^2+x+1)$ & \ \ $(x^4+2x^3+2x^2+x+1)$ & $+6x+1$ \\ ...@@ -3295,7 +3295,7 @@ $+2x^2+x+1)$ & \ \ $(x^4+2x^3+2x^2+x+1)$ & $+6x+1$ \\
\begin{pgfonlayer}{background} \begin{pgfonlayer}{background}
\node [rectangle,inner sep=0.2em,fill=ugreen!20] [fit = (neuron11) (neuron14)] (layer02) {}; \node [rectangle,inner sep=0.2em,fill=ugreen!20] [fit = (neuron11) (neuron14)] (layer02) {};
\node [anchor=east] (layer02label) at (layer02.west) {\scriptsize{$l$}}; \node [anchor=east] (layer02label) at (layer02.west) {\scriptsize{$k$}};
\end{pgfonlayer} \end{pgfonlayer}
%%% layer 3 %%% layer 3
...@@ -3312,7 +3312,7 @@ $+2x^2+x+1)$ & \ \ $(x^4+2x^3+2x^2+x+1)$ & $+6x+1$ \\ ...@@ -3312,7 +3312,7 @@ $+2x^2+x+1)$ & \ \ $(x^4+2x^3+2x^2+x+1)$ & $+6x+1$ \\
\begin{pgfonlayer}{background} \begin{pgfonlayer}{background}
\node [rectangle,inner sep=0.2em,fill=blue!20] [fit = (neuron21) (neuron24)] (layer03) {}; \node [rectangle,inner sep=0.2em,fill=blue!20] [fit = (neuron21) (neuron24)] (layer03) {};
\node [anchor=east] (layer03label) at (layer03.west) {\scriptsize{$l+1$}}; \node [anchor=east] (layer03label) at (layer03.west) {\scriptsize{$k+1$}};
\end{pgfonlayer} \end{pgfonlayer}
%%% output layer %%% output layer
...@@ -3329,12 +3329,12 @@ $+2x^2+x+1)$ & \ \ $(x^4+2x^3+2x^2+x+1)$ & $+6x+1$ \\ ...@@ -3329,12 +3329,12 @@ $+2x^2+x+1)$ & \ \ $(x^4+2x^3+2x^2+x+1)$ & $+6x+1$ \\
\begin{pgfonlayer}{background} \begin{pgfonlayer}{background}
\node [rectangle,inner sep=0.2em,fill=ugreen!20] [fit = (neuron31) (neuron34)] (layer04) {}; \node [rectangle,inner sep=0.2em,fill=ugreen!20] [fit = (neuron31) (neuron34)] (layer04) {};
\node [anchor=east] (layer04label) at (layer04.west) {\scriptsize{$L$(输出)}}; \node [anchor=east] (layer04label) at (layer04.west) {\scriptsize{$K$(输出)}};
\end{pgfonlayer} \end{pgfonlayer}
\visible<2->{ \visible<2->{
\node [neuronnode,draw=red,fill=red!20!white,inner sep=1pt] (neuron12new) at (2 * \neuronsep,3em) {}; \node [neuronnode,draw=red,fill=red!20!white,inner sep=1pt] (neuron12new) at (2 * \neuronsep,3em) {};
\node [anchor=east] (neuronsamplelabel) at ([yshift=-1em]layer02label.south east) {\alert{\textbf{\tiny{$l$层, 第$i$个神经元}}}}; \node [anchor=east] (neuronsamplelabel) at ([yshift=-1em]layer02label.south east) {\alert{\textbf{\tiny{$k$层, 第$i$个神经元}}}};
\draw [->,dashed,very thick,red] ([xshift=-0.2em,yshift=0.2em]neuronsamplelabel.east) .. controls +(30:1) and +(220:1) .. ([xshift=-0em,yshift=-0em]neuron12new.210); \draw [->,dashed,very thick,red] ([xshift=-0.2em,yshift=0.2em]neuronsamplelabel.east) .. controls +(30:1) and +(220:1) .. ([xshift=-0em,yshift=-0em]neuron12new.210);
} }
...@@ -3350,17 +3350,17 @@ $+2x^2+x+1)$ & \ \ $(x^4+2x^3+2x^2+x+1)$ & $+6x+1$ \\ ...@@ -3350,17 +3350,17 @@ $+2x^2+x+1)$ & \ \ $(x^4+2x^3+2x^2+x+1)$ & $+6x+1$ \\
} }
\visible<3->{ \visible<3->{
\node [anchor=west,align=left] (line01) at ([xshift=1em,yshift=1em]layer04.east) {\footnotesize{$h_{i}^{k}$:第$l$层, 第$i$个神经元的输出}}; \node [anchor=west,align=left] (line01) at ([xshift=1em,yshift=1em]layer04.east) {\footnotesize{$h_{i}^{k}$:第$k$层, 第$i$个神经元的输出}};
\node [anchor=north west,align=left] (line02) at (line01.south west) {\footnotesize{$\textbf{h}^{k}$:第$k$层的输出}}; \node [anchor=north west,align=left] (line02) at (line01.south west) {\footnotesize{$\textbf{h}^{k}$:第$k$层的输出}};
\node [anchor=north west,align=left] (line03) at (line02.south west) {\footnotesize{$\textbf{s}^{k}$:第$k$层的线性变换$\textbf{s}^k=\textbf{h}^{k-1}\textbf{w}^k$}}; \node [anchor=north west,align=left] (line03) at (line02.south west) {\footnotesize{$\textbf{s}^{k}$:第$k$层的线性变换$\textbf{s}^k=\textbf{h}^{k-1}\textbf{w}^k$}};
\node [anchor=north west,align=left] (line04) at (line03.south west) {\footnotesize{$f^{k}$:第$k$层的激活函数$\textbf{h}^k=f^l(\textbf{s}^k)$}}; \node [anchor=north west,align=left] (line04) at (line03.south west) {\footnotesize{$f^{k}$:第$k$层的激活函数$\textbf{h}^k=f^k(\textbf{s}^k)$}};
} }
\visible<4->{ \visible<4->{
\node [anchor=north west,align=left] (line05) at (line04.south west) {\footnotesize{$\textbf{h}^{K}$:网络最后的输出}}; \node [anchor=north west,align=left] (line05) at (line04.south west) {\footnotesize{$\textbf{h}^{K}$:网络最后的输出}};
} }
\visible<5->{ \visible<5->{
\node [anchor=north west,align=left] (line06) at (line05.south west) {\footnotesize{$w_{j,i}^{k}$:第$k-1$层神经元$j$}\\\footnotesize{$k$层神经元$i$的连接权重}}; \node [anchor=north west,align=left] (line06) at (line05.south west) {\footnotesize{$w_{j,i}^{k}$:第$k-1$层神经元$j$}\\\footnotesize{$k$层神经元$i$的连接权重}};
\node [anchor=north west,align=left] (line07) at (line06.south west) {\footnotesize{$\textbf{w}^{k}$:第$k-1$层与第$k1$层的}\\\footnotesize{连接权重}}; \node [anchor=north west,align=left] (line07) at (line06.south west) {\footnotesize{$\textbf{w}^{k}$:第$k-1$层与第$k$层的}\\\footnotesize{连接权重}};
} }
\end{scope} \end{scope}
...@@ -3646,27 +3646,27 @@ $+2x^2+x+1)$ & \ \ $(x^4+2x^3+2x^2+x+1)$ & $+6x+1$ \\ ...@@ -3646,27 +3646,27 @@ $+2x^2+x+1)$ & \ \ $(x^4+2x^3+2x^2+x+1)$ & $+6x+1$ \\
\visible<2->{ \visible<2->{
\texttt{} \\ \texttt{} \\
\texttt{CrossEntropyBackward(dldh[4], y, gold);} \\ \texttt{CrossEntropyBackward(dh[4], y, gold);} \\
\texttt{SoftmaxBackward(y, s[4], dh[4], ds[4]);}\\ \texttt{SoftmaxBackward(y, s[4], dh[4], ds[4]);}\\
\texttt{MMul(h[3], {\tiny X\_TRANS}, ds[4], {\tiny X\_NOTRANS}, dw[4]);}\\ \texttt{MMul(h[3], {\tiny X\_TRANS}, ds[4], {\tiny X\_NOTRANS}, dw[4]);}\\
\texttt{MMul(ds[4], {\tiny X\_NOTRANS}, w[4], {\tiny X\_RANS}, dh[3]);}\\
} }
\visible<3->{ \visible<3->{
\texttt{} \\ \texttt{} \\
\texttt{dh[2] = dh[3];}\\ \texttt{dh[2] = dh[3];}\\
\texttt{dh[1] = dh[3];}\\ \texttt{ReluBackward(h[2], s[2], dh[2], ds[2]);}\\
\texttt{MMul(h[1], {\tiny X\_TRANS}, ds[2], {\tiny X\_NOTRANS}, dw[2]);}\\
\texttt{MMul(ds[2], {\tiny X\_NOTRANS}, w[2], {\tiny X\_TRANS}, dh[2]);}\\
} }
\visible<4->{ \visible<4->{
\texttt{} \\ \texttt{} \\
\texttt{ReluBackward(h[2], s[2], dh[2], ds[2]);}\\ \texttt{dh[1] = dh[1] + dh[3];}\\
\texttt{MMul(h[1], {\tiny X\_TRANS}, ds[2], {\tiny X\_NOTRANS}, dw[2]);}\\
\texttt{} \\
\texttt{ReluBackward(h[1], s[1], dh[1], ds[1]);}\\
\texttt{MMul(h[0], {\tiny X\_TRANS}, ds[1], {\tiny X\_NOTRANS}, dw[1]);}\\
} }
\visible<5->{ \visible<5->{
\texttt{...} // 继续反向传播 \\
\texttt{} \\ \texttt{} \\
\texttt{for(unsigned i = 0; i < 5; i++)\{} \\ \texttt{for(unsigned i = 0; i < 5; i++)\{} \\
\texttt{} \ \ \ \ ... // 通过\alert{\texttt{dw[i]}}访问参数的梯度\\ \texttt{} \ \ \ \ ... // 通过\alert{\texttt{dw[i]}}访问参数的梯度\\
...@@ -3685,15 +3685,15 @@ $+2x^2+x+1)$ & \ \ $(x^4+2x^3+2x^2+x+1)$ & $+6x+1$ \\ ...@@ -3685,15 +3685,15 @@ $+2x^2+x+1)$ & \ \ $(x^4+2x^3+2x^2+x+1)$ & $+6x+1$ \\
\node [anchor=south,draw,rounded corners,inner sep=2pt,minimum width=8em,minimum height=1.2em,fill=green!30!white,blur shadow={shadow xshift=1pt,shadow yshift=-1pt}] (h3) at ([yshift=1.5em]h2.north) {\tiny{h2 = Relu(h1 * w2)}}; \node [anchor=south,draw,rounded corners,inner sep=2pt,minimum width=8em,minimum height=1.2em,fill=green!30!white,blur shadow={shadow xshift=1pt,shadow yshift=-1pt}] (h3) at ([yshift=1.5em]h2.north) {\tiny{h2 = Relu(h1 * w2)}};
\node [anchor=south,draw,rounded corners,inner sep=2pt,minimum width=8em,minimum height=1.2em,fill=green!30!white,blur shadow={shadow xshift=1pt,shadow yshift=-1pt}] (h4) at ([yshift=1.5em]h3.north) {\tiny{h3 = h2 + h1}}; \node [anchor=south,draw,rounded corners,inner sep=2pt,minimum width=8em,minimum height=1.2em,fill=green!30!white,blur shadow={shadow xshift=1pt,shadow yshift=-1pt}] (h4) at ([yshift=1.5em]h3.north) {\tiny{h3 = h2 + h1}};
\visible<1-3>{\draw [->,thick] (h1.north) -- (h2.south);} \visible<1-4>{\draw [->,thick] (h1.north) -- (h2.south);}
\visible<1-3>{\draw [->,thick] (h2.north) -- (h3.south);} \visible<1-2>{\draw [->,thick] (h2.north) -- (h3.south);}
\visible<1-2>{\draw [->,thick] (h3.north) -- (h4.south);} \visible<1-2>{\draw [->,thick] (h3.north) -- (h4.south);}
\visible<1-2>{\draw [->,thick,rounded corners] (h2.east) -- ([xshift=0.5em]h2.east) -- ([xshift=0.5em,yshift=0.5em]h3.north east) -- ([xshift=-2em,yshift=0.5em]h3.north east) -- ([xshift=-2em,yshift=1.5em]h3.north east);} \visible<1-3>{\draw [->,thick,rounded corners] (h2.east) -- ([xshift=0.5em]h2.east) -- ([xshift=0.5em,yshift=0.5em]h3.north east) -- ([xshift=-2em,yshift=0.5em]h3.north east) -- ([xshift=-2em,yshift=1.5em]h3.north east);}
\visible<4->{\draw [<-,very thick,red] (h1.north) -- (h2.south);} \visible<5>{\draw [<-,very thick,red] (h1.north) -- (h2.south);}
\visible<4->{\draw [<-,very thick,red] (h2.north) -- (h3.south);} \visible<3->{\draw [<-,very thick,red] (h2.north) -- (h3.south);}
\visible<3->{\draw [<-,very thick,red] (h3.north) -- (h4.south);} \visible<3->{\draw [<-,very thick,red] (h3.north) -- (h4.south);}
\visible<3->{\draw [<-,very thick,red,rounded corners] (h2.east) -- ([xshift=0.5em]h2.east) -- ([xshift=0.5em,yshift=0.5em]h3.north east) -- ([xshift=-2em,yshift=0.5em]h3.north east) -- ([xshift=-2em,yshift=1.5em]h3.north east);} \visible<4->{\draw [<-,very thick,red,rounded corners] (h2.east) -- ([xshift=0.5em]h2.east) -- ([xshift=0.5em,yshift=0.5em]h3.north east) -- ([xshift=-2em,yshift=0.5em]h3.north east) -- ([xshift=-2em,yshift=1.5em]h3.north east);}
\node [anchor=south,draw,rounded corners,inner sep=2pt,minimum width=8.0em,minimum height=1.2em,fill=red!30!white,blur shadow={shadow xshift=1pt,shadow yshift=-1pt}] (slayer) at ([yshift=1.5em]h4.north) {\tiny{h4 = Softmax(h3 * w4) (output)}}; \node [anchor=south,draw,rounded corners,inner sep=2pt,minimum width=8.0em,minimum height=1.2em,fill=red!30!white,blur shadow={shadow xshift=1pt,shadow yshift=-1pt}] (slayer) at ([yshift=1.5em]h4.north) {\tiny{h4 = Softmax(h3 * w4) (output)}};
\node [anchor=south] (losslabel) at (slayer.north) {\scriptsize{\textbf{Cross Entropy Loss}}}; \node [anchor=south] (losslabel) at (slayer.north) {\scriptsize{\textbf{Cross Entropy Loss}}};
......
Markdown 格式
0%
您添加了 0 到此讨论。请谨慎行事。
请先完成此评论的编辑!
注册 或者 后发表评论