new pages

d1b1d4b2 · xiaotong · 178205a4 · d1b1d4b2 · d1b1d4b2
Commit d1b1d4b2 authored Oct 09, 2019 by xiaotong
--- a/Section05-Neural-Networks-and-Language-Modeling/section05-test.tex
+++ b/Section05-Neural-Networks-and-Language-Modeling/section05-test.tex
@@ -116,32 +116,73 @@
 \subsection{参数学习 - 反向传播}

 %%%------------------------------------------------------------------------------------------------------------
-%%% 如何计算梯度
-\begin{frame}{如何计算梯度?}
+%%% 如何计算梯度 - 符号微分
+\begin{frame}{符号微分}

 \begin{itemize}
-\item \textbf{还有一个核心问题}：如何计算梯度
+\item \textbf{符号微分}：类似于手写出微分表达式，最后带入变量的值，得到微分结果。比如，对于如下表达式
 \begin{displaymath}
-\frac{\partial L(\textbf{w})}{\partial \textbf{w}} = ?
+L(\textbf{w}) = \textbf{x} \cdot \textbf{w} + 2 \textbf{w}^2
 \end{displaymath}

+\visible<2->{
 \vspace{0.5em}
+可以手动推导出微分表达式

-\item<2-> \textbf{数值微分} - 简单粗暴的方法
 \begin{displaymath}
-\frac{\partial L(\textbf{w})}{\partial \textbf{w}} = \lim_{\Delta \textbf{w} \to 0} \frac{L(\textbf{w} + \Delta \textbf{w}) - L(\textbf{w} - \Delta \textbf{w}) }{2\Delta \textbf{w}}
+\frac{\partial L(\textbf{w})}{\partial \textbf{w}} = \textbf{x} + 4 \textbf{w}
 \end{displaymath}
+}

-最基本的微分公式，我们可以将$\textbf{w}$变化一点儿（用$\Delta \textbf{w}$表示），之后看$L(\cdot)$的变化。
+\visible<3->{
+\vspace{0.5em}
+最后，带入$\textbf{x} = \begin{pmatrix} 2 \\ -3 \end{pmatrix}$和$\textbf{w} = \begin{pmatrix} -1 \\ 1 \end{pmatrix}$，得到微分结果\\

-    \begin{itemize}
-    \item<3-> \textbf{优点很明显}：方法真的非常简单，易于实现
-    \item<3-> \textbf{缺点也和明显}：效率太低，对于复杂网络、参数量稍微大一些的模型基本上无法使用
-    \end{itemize}
+\vspace{1em}
+
+\begin{displaymath}
+\frac{\partial L(\textbf{w})}{\partial \textbf{w}} =  \begin{pmatrix} 2 \\ -3 \end{pmatrix} + 4 \begin{pmatrix} -1 \\ 1 \end{pmatrix} = \begin{pmatrix} -2 \\ 1 \end{pmatrix}
+\end{displaymath}
+}

 \end{itemize}

 \end{frame}

+%%%------------------------------------------------------------------------------------------------------------
+%%% 符号微分的膨胀问题
+\begin{frame}{符号微分的膨胀问题}
+
+\begin{itemize}
+\item \textbf{Expression Swell}：深层函数的微分表达式会非常复杂
+	\begin{itemize}
+	\item 表达式冗长不易存储和管理
+	\item 真正需要的是微分结果，而不是微分表达式
+	\end{itemize}
+\end{itemize}
+
+\vspace{0.5em}
+
+{\small
+\begin{tabular} {l | l | l}
+函数 & 微分表达式 & 化简的微分表达式 \\ \hline
+$x$ & $1$ & $1$ \\ \hline
+$x(x+1)$ & $(x+1)+x$ & $2x + 1$ \\ \hline
+$x(x+1)$ & $(x+1)(x^2+x+1)$ & $4x^3+6x^2$ \\ 
+$(x^2+x+1)$ & $+x(x^2+x+1)$ & $+4x+1$ \\
+                     & $+x(x+1)(2x+1)$ & \\ \hline
+$(x^2+x)$ & $(2x+1)(x^2+x+1)$ & $8x^7+28x^6$ \\
+$(x^2+x+1)$ & $(x^4+2x^3+2x^2+x+1)$ & $+48x^5+50x^4$ \\
+$(x^4+2x^3$ & $+(2x+1)(x^2+x)$ & $+36x^3+18x^2$ \\
+$+2x^2+x+1)$ & \ \ $(x^4+2x^3+2x^2+x+1)$ & $+6x+1$ \\
+ & $+(x^2+x)(x^2+x+1)$ & \\
+ & \ \ $(4x^3+6x^2+4x+1)$ & \\
+
+
+\end{tabular}
+}
+
+\end{frame}
+
 \end{CJK}
 \end{document}
--- a/Section05-Neural-Networks-and-Language-Modeling/section05.tex
+++ b/Section05-Neural-Networks-and-Language-Modeling/section05.tex
@@ -3033,7 +3033,7 @@ J(\textbf{w}_t) = \frac{1}{m} \sum_{i=j}^{j+m} L(\textbf{x}_i,\hat{\textbf{y}}_i

 %%%------------------------------------------------------------------------------------------------------------
 %%% 如何计算梯度
-\begin{frame}{如何计算梯度?}
+\begin{frame}{如何计算梯度? - 数值微分}

 \begin{itemize}
 \item \textbf{还有一个核心问题}：如何计算梯度
@@ -3048,16 +3048,85 @@ J(\textbf{w}_t) = \frac{1}{m} \sum_{i=j}^{j+m} L(\textbf{x}_i,\hat{\textbf{y}}_i
 \frac{\partial L(\textbf{w})}{\partial \textbf{w}} = \lim_{\Delta \textbf{w} \to 0} \frac{L(\textbf{w} + \Delta \textbf{w}) - L(\textbf{w} - \Delta \textbf{w}) }{2\Delta \textbf{w}}
 \end{displaymath}

-最基本的微分公式，我们可以将$\textbf{w}$变化一点儿（用$\Delta \textbf{w}$表示），之后看$L(\cdot)$的变化。
+最基本的微分公式，我们可以将$\textbf{w}$变化一点儿（用$\Delta \textbf{w}$表示），之后看$L(\cdot)$的变化

    \begin{itemize}
    \item<3-> \textbf{优点很明显}：方法真的非常简单，易于实现
-    \item<3-> \textbf{缺点也和明显}：效率太低，对于复杂网络、参数量稍微大一些的模型基本上无法使用
+    \item<3-> \textbf{缺点也很明显}：效率太低，对于复杂网络、参数量稍微大一些的模型基本上无法使用
    \end{itemize}

 \end{itemize}

 \end{frame}

+%%%------------------------------------------------------------------------------------------------------------
+%%% 如何计算梯度 - 符号微分
+\begin{frame}{符号微分}
+
+\begin{itemize}
+\item \textbf{符号微分}：类似于手写出微分表达式，最后带入变量的值，得到微分结果。比如，对于如下表达式
+\begin{displaymath}
+L(\textbf{w}) = \textbf{x} \cdot \textbf{w} + 2 \textbf{w}^2
+\end{displaymath}
+
+\visible<2->{
+\vspace{0.5em}
+可以手动推导出微分表达式
+
+\begin{displaymath}
+\frac{\partial L(\textbf{w})}{\partial \textbf{w}} = \textbf{x} + 4 \textbf{w}
+\end{displaymath}
+}
+
+\visible<3->{
+\vspace{0.5em}
+最后，带入$\textbf{x} = \begin{pmatrix} 2 \\ -3 \end{pmatrix}$和$\textbf{w} = \begin{pmatrix} -1 \\ 1 \end{pmatrix}$，得到微分结果\\
+
+\vspace{1em}
+
+\begin{displaymath}
+\frac{\partial L(\textbf{w})}{\partial \textbf{w}} =  \begin{pmatrix} 2 \\ -3 \end{pmatrix} + 4 \begin{pmatrix} -1 \\ 1 \end{pmatrix} = \begin{pmatrix} -2 \\ 1 \end{pmatrix}
+\end{displaymath}
+}
+
+\end{itemize}
+
+\end{frame}
+
+%%%------------------------------------------------------------------------------------------------------------
+%%% 符号微分的膨胀问题
+\begin{frame}{符号微分的膨胀问题}
+
+\begin{itemize}
+\item \textbf{Expression Swell}：深层函数的微分表达式会非常复杂
+	\begin{itemize}
+	\item 表达式冗长不易存储和管理
+	\item 真正需要的是微分\alert{结果}，而不是微分表达式
+	\end{itemize}
+\end{itemize}
+
+\vspace{0.5em}
+
+{\small
+\begin{tabular} {l | l | l}
+函数 & 微分表达式 & 化简的微分表达式 \\ \hline
+$x$ & $1$ & $1$ \\ \hline
+$x(x+1)$ & $(x+1)+x$ & $2x + 1$ \\ \hline
+$x(x+1)$ & $(x+1)(x^2+x+1)$ & $4x^3+6x^2$ \\ 
+$(x^2+x+1)$ & $+x(x^2+x+1)$ & $+4x+1$ \\
+                     & $+x(x+1)(2x+1)$ & \\ \hline
+$(x^2+x)$ & $(2x+1)(x^2+x+1)$ & $8x^7+28x^6$ \\
+$(x^2+x+1)$ & $(x^4+2x^3+2x^2+x+1)$ & $+48x^5+50x^4$ \\
+$(x^4+2x^3$ & $+(2x+1)(x^2+x)$ & $+36x^3+18x^2$ \\
+$+2x^2+x+1)$ & \ \ $(x^4+2x^3+2x^2+x+1)$ & $+6x+1$ \\
+ & $+(x^2+x)(x^2+x+1)$ & \\
+ & \ \ $(4x^3+6x^2+4x+1)$ & \\
+
+
+\end{tabular}
+}
+
+\end{frame}
+
 \end{CJK}
 \end{document}