new pages

bea44fa2 · xiaotong · fcd45fd6 · bea44fa2 · bea44fa2 · bea44fa2
Commit bea44fa2 authored Nov 01, 2019 by xiaotong
--- a/Section05-Neural-Networks-and-Language-Modeling/section05-gbk.tex
+++ b/Section05-Neural-Networks-and-Language-Modeling/section05-gbk.tex
@@ -291,7 +291,7 @@ GPT-2 (Transformer) & Radford et al. & 2019 & \alert{35.7}
    \end{itemize}
 {\Large
 \begin{displaymath}
-\textbf{y} = f(\textbf{w} \cdot \textbf{x} + \textbf{b})
+\textbf{y} = f(\textbf{x} \cdot \textbf{w} + \textbf{b})
 \end{displaymath}
 }
 \\
@@ -754,6 +754,46 @@ GPT-2 (Transformer) & Radford et al. & 2019 & \alert{35.7}
 \end{frame}

 %%%------------------------------------------------------------------------------------------------------------
+%%% 线性代数基础
+\begin{frame}{预热 - 线性代数知识}
+\begin{itemize}
+\item \textbf{矩阵}：我们用$a$表示一个标量(一个数)，用粗体$\textbf{a}$表示一个矩阵(或向量)，其中$a_{ij}$表示$\textbf{a}$第$i$行、第$j$列的元素\\
+    \begin{displaymath}
+    a = 5 \hspace{3em} \textbf{a} = \begin{pmatrix} a_{11} & a_{12} \\ a_{21} & a_{22} \end{pmatrix} = \begin{pmatrix} 1 & 2 \\ 3 & 4 \end{pmatrix}
+    \end{displaymath}
+\item \textbf{向量}：一种特殊的矩阵，只有一行或者一列，这里默认使用行向量，比如$\textbf{a} = (a_1,a_2,a_3) = (10, 20, 30)$，$\textbf{a}$对应的列向量记为$\textbf{a}^T$
+\item<2-> \textbf{代数运算}：矩阵可以按位进行+、-等代数运算，对于$\textbf{a} = \begin{pmatrix} 1 & 2 \\ 3 & 4 \end{pmatrix}$，$\textbf{b} = \begin{pmatrix} 1 & 1 \\ 1 & 1 \end{pmatrix}$，有$\textbf{a} + \textbf{b} = \begin{pmatrix} 2 & 3 \\ 4 & 5 \end{pmatrix}$
+\item<3-> \textbf{矩阵的微分}：按位进行，对于矩阵$\textbf{c}$和标量$x$有
+    \begin{displaymath}
+    \frac{\partial \textbf{c}}{\partial x} = \begin{pmatrix} \frac{\partial c_{11}}{\partial x} & \frac{\partial c_{12}}{\partial x} \\ \frac{\partial c_{21}}{\partial x} & \frac{\partial c_{22}}{\partial x} \end{pmatrix} \hspace{2em} \frac{\partial x}{\partial \textbf{c}} = \begin{pmatrix} \frac{\partial x}{\partial c_{11}} & \frac{\partial x}{\partial c_{12}} \\ \frac{\partial x}{\partial c_{21}} & \frac{\partial x}{\partial c_{22}} \end{pmatrix}
+    \end{displaymath}
+\end{itemize}
+
+\end{frame}
+
+%%%------------------------------------------------------------------------------------------------------------
+%%% 线性代数基础
+\begin{frame}{预热 - 线性代数知识(续)}
+\begin{itemize}
+
+
+\item \textbf{矩阵的乘法}：对于$\textbf{a} \in \mathbb{R}^{n \times k}$和$\textbf{b} \in \mathbb{R}^{k \times m}$，用$\textbf{c} = \textbf{a} \textbf{b} \in \mathbb{R}^{n \times m}$表示\textbf{a}和\textbf{b}的矩阵乘法，其中
+    \begin{displaymath}
+    c_{pq} = \sum_{i = 1}^k a_{pi} b_{iq}
+    \end{displaymath}
+    对于方程$\left\{ \begin{array}{l} 5x_{1} + 2x_{2} = y_{1} \\ 3x_{1} + x_{2} = y_{2}\end{array} \right.$，可以表示为$\textbf{a} \textbf{x}^T = \textbf{y}^T$ 其中$\textbf{a}=\begin{pmatrix} 5 & 2 \\ 3 & 1 \end{pmatrix}$，$\textbf{x}^T =\begin{pmatrix} x_1 \\ x_2 \end{pmatrix}$，$\textbf{y}^T =\begin{pmatrix} y_1 \\ y_2 \end{pmatrix}$
+\item<2-> \textbf{其它}
+    \begin{itemize}
+    \item \textbf{单位矩阵}：方阵$\textbf{I}$，$I_{ij} = 1$当且仅当$i=j$，否则$I_{ij} = 0$
+    \item \textbf{转置}：$\textbf{a}$的转置记为$\textbf{a}^T$，有$a^T_{ji}=a_{ij}$
+    \item \textbf{逆矩阵}：方阵$\textbf{a}$的逆矩阵记为$\textbf{a}^{-1}$，有$\textbf{a} \textbf{a}^{-1} = \textbf{a}^{-1} \textbf{a} = \textbf{I}$
+    \item \textbf{向量(矩阵)的范数}：$||\textbf{a}||_p = \big( \sum_i |a_i|^p \big)^{\frac{1}{p}}$
+    \end{itemize}
+
+\end{itemize}
+\end{frame}
+
+%%%------------------------------------------------------------------------------------------------------------
 %%% 人工神经元的函数形式
 \begin{frame}{人工神经元即一个函数}

@@ -817,7 +857,7 @@ GPT-2 (Transformer) & Radford et al. & 2019 & \alert{35.7}
 	\item 权重：$\textbf{w}=(w_0,...,w_n)$
 	\item 偏移：$\textbf{b} = (-\sigma)$
 	\item 激活函数：$f(z)=1$ 当$z \ge 0$, 其它情况$f(z)=0$
-	\item 输出：$\textbf{y}=f(\textbf{x} \cdot \textbf{z} - \sigma)$
+	\item 输出：$\textbf{y}=f(\textbf{x} \cdot \textbf{w} - \sigma)$
 	\end{itemize}
 \end{itemize}

@@ -2057,7 +2097,7 @@ cycle}
    \item 注意，这里不是张量乘法，因为张量乘法还有其它定义
    \item $\textbf{w}$是$n \times m$的矩阵，$\textbf{x}$的形状是$... \times n$，即$\textbf{x}$的第一维度需要和$\textbf{w}$的行数大小相等\\
    \vspace{0.5em}
-    $\textbf{x}(1:2,1:2,\alert{1:3}) \times \textbf{w}(\alert{1:3},1:2) = \textbf{s}(1:2,1:2,1:2)$
+    $\textbf{x}(1:4,1:4,\alert{1:4}) \times \textbf{w}(\alert{1:4},1:2) = \textbf{s}(1:4,1:4,1:2)$
    \end{itemize}
 \end{itemize}


--- a/Section05-Neural-Networks-and-Language-Modeling/section05-test.tex
+++ b/Section05-Neural-Networks-and-Language-Modeling/section05-test.tex
@@ -121,19 +121,43 @@
 \subsection{词嵌入}

 %%%------------------------------------------------------------------------------------------------------------
-%%% 用实例理解词的分布式表示
-\begin{frame}{分布式表示的可视化}
+%%% 线性代数基础
+\begin{frame}{预热 - 线性代数知识}
 \begin{itemize}
-\item \textbf{一个著名的例子}：国王 $\to$ 王后\\
+\item \textbf{矩阵}：我们用$a$表示一个标量(一个数)，用粗体$\textbf{a}$表示一个矩阵(或向量)，其中$a_{ij}$表示$\textbf{a}$第$i$行、第$j$列的元素\\
    \begin{displaymath}
-    \vv{\textrm{国王}} - \vv{\textrm{男人}} + \vv{\textrm{女人}} = \vv{\textrm{王后}}
+    a = 5 \hspace{3em} \textbf{a} = \begin{pmatrix} a_{11} & a_{12} \\ a_{21} & a_{22} \end{pmatrix} = \begin{pmatrix} 1 & 2 \\ 3 & 4 \end{pmatrix}
    \end{displaymath}
-    这里，$\vv{\textrm{word}}$表示单词的分布式向量表示
-\item 更多的词的可视化：相似的词聚在一起
+\item \textbf{向量}：一种特殊的矩阵，只有一行或者一列，这里默认使用行向量，比如$\textbf{a} = (a_1,a_2,a_3) = (10, 20, 30)$，$\textbf{a}$对应的列向量记为$\textbf{a}^T$
+\item<2-> \textbf{代数运算}：矩阵可以按位进行+、-等代数运算，对于$\textbf{a} = \begin{pmatrix} 1 & 2 \\ 3 & 4 \end{pmatrix}$，$\textbf{b} = \begin{pmatrix} 1 & 1 \\ 1 & 1 \end{pmatrix}$，有$\textbf{a} + \textbf{b} = \begin{pmatrix} 2 & 3 \\ 4 & 5 \end{pmatrix}$
+\item<3-> \textbf{矩阵的微分}：按位进行，对于矩阵$\textbf{c}$和标量$x$有
+    \begin{displaymath}
+    \frac{\partial \textbf{c}}{\partial x} = \begin{pmatrix} \frac{\partial c_{11}}{\partial x} & \frac{\partial c_{12}}{\partial x} \\ \frac{\partial c_{21}}{\partial x} & \frac{\partial c_{22}}{\partial x} \end{pmatrix} \hspace{2em} \frac{\partial x}{\partial \textbf{c}} = \begin{pmatrix} \frac{\partial x}{\partial c_{11}} & \frac{\partial x}{\partial c_{12}} \\ \frac{\partial x}{\partial c_{21}} & \frac{\partial x}{\partial c_{22}} \end{pmatrix}
+    \end{displaymath}
+\end{itemize}
+
+\end{frame}
+
+%%%------------------------------------------------------------------------------------------------------------
+%%% 线性代数基础
+\begin{frame}{预热 - 线性代数知识(续)}
+\begin{itemize}
+
+
+\item \textbf{矩阵的乘法}：对于$\textbf{a} \in \mathbb{R}^{n \times k}$和$\textbf{b} \in \mathbb{R}^{k \times m}$，用$\textbf{c} = \textbf{a} \textbf{b} \in \mathbb{R}^{n \times m}$表示\textbf{a}和\textbf{b}的矩阵乘法，其中
+    \begin{displaymath}
+    c_{pq} = \sum_{i = 1}^k a_{pi} b_{iq} 
+    \end{displaymath}
+    对于方程$\left\{ \begin{array}{l} 5x_{1} + 2x_{2} = y_{1} \\ 3x_{1} + x_{2} = y_{2}\end{array} \right.$，可以表示为$\textbf{a} \textbf{x}^T = \textbf{y}^T$ 其中$\textbf{a}=\begin{pmatrix} 5 & 2 \\ 3 & 1 \end{pmatrix}$，$\textbf{x}^T =\begin{pmatrix} x_1 \\ x_2 \end{pmatrix}$，$\textbf{y}^T =\begin{pmatrix} y_1 \\ y_2 \end{pmatrix}$
+\item<2-> \textbf{其它}
+    \begin{itemize}
+    \item \textbf{单位矩阵}：方阵$\textbf{I}$，$I_{ij} = 1$当且仅当$i=j$，否则$I_{ij} = 0$
+    \item \textbf{转置}：$\textbf{a}$的转置记为$\textbf{a}^T$，有$a^T_{ji}=a_{ij}$
+    \item \textbf{逆矩阵}：方阵$\textbf{a}$的逆矩阵记为$\textbf{a}^{-1}$，有$\textbf{a} \textbf{a}^{-1} = \textbf{a}^{-1} \textbf{a} = \textbf{I}$
+    \item \textbf{向量(矩阵)的范数}：$||\textbf{a}||_p = \big( \sum_i |a_i|^p \big)^{\frac{1}{p}}$ 
+    \end{itemize}
+
 \end{itemize}
-\begin{center}
-\includegraphics[scale=0.4]{./Figures/word-graph.png}
-\end{center}
 \end{frame}

 %%%------------------------------------------------------------------------------------------------------------

--- a/Section05-Neural-Networks-and-Language-Modeling/section05.tex
+++ b/Section05-Neural-Networks-and-Language-Modeling/section05.tex
@@ -294,7 +294,7 @@ GPT-2 (Transformer) & Radford et al. & 2019 & \alert{35.7}
    \end{itemize}
 {\Large
 \begin{displaymath}
-\textbf{y} = f(\textbf{w} \cdot \textbf{x} + \textbf{b})
+\textbf{y} = f(\textbf{x} \cdot \textbf{w} + \textbf{b})
 \end{displaymath}
 }
 \\
@@ -757,6 +757,46 @@ GPT-2 (Transformer) & Radford et al. & 2019 & \alert{35.7}
 \end{frame}

 %%%------------------------------------------------------------------------------------------------------------
+%%% 线性代数基础
+\begin{frame}{预热 - 线性代数知识}
+\begin{itemize}
+\item \textbf{矩阵}：我们用$a$表示一个标量(一个数)，用粗体$\textbf{a}$表示一个矩阵(或向量)，其中$a_{ij}$表示$\textbf{a}$第$i$行、第$j$列的元素\\
+    \begin{displaymath}
+    a = 5 \hspace{3em} \textbf{a} = \begin{pmatrix} a_{11} & a_{12} \\ a_{21} & a_{22} \end{pmatrix} = \begin{pmatrix} 1 & 2 \\ 3 & 4 \end{pmatrix}
+    \end{displaymath}
+\item \textbf{向量}：一种特殊的矩阵，只有一行或者一列，这里默认使用行向量，比如$\textbf{a} = (a_1,a_2,a_3) = (10, 20, 30)$，$\textbf{a}$对应的列向量记为$\textbf{a}^T$
+\item<2-> \textbf{代数运算}：矩阵可以按位进行+、-等代数运算，对于$\textbf{a} = \begin{pmatrix} 1 & 2 \\ 3 & 4 \end{pmatrix}$，$\textbf{b} = \begin{pmatrix} 1 & 1 \\ 1 & 1 \end{pmatrix}$，有$\textbf{a} + \textbf{b} = \begin{pmatrix} 2 & 3 \\ 4 & 5 \end{pmatrix}$
+\item<3-> \textbf{矩阵的微分}：按位进行，对于矩阵$\textbf{c}$和标量$x$有
+    \begin{displaymath}
+    \frac{\partial \textbf{c}}{\partial x} = \begin{pmatrix} \frac{\partial c_{11}}{\partial x} & \frac{\partial c_{12}}{\partial x} \\ \frac{\partial c_{21}}{\partial x} & \frac{\partial c_{22}}{\partial x} \end{pmatrix} \hspace{2em} \frac{\partial x}{\partial \textbf{c}} = \begin{pmatrix} \frac{\partial x}{\partial c_{11}} & \frac{\partial x}{\partial c_{12}} \\ \frac{\partial x}{\partial c_{21}} & \frac{\partial x}{\partial c_{22}} \end{pmatrix}
+    \end{displaymath}
+\end{itemize}
+
+\end{frame}
+
+%%%------------------------------------------------------------------------------------------------------------
+%%% 线性代数基础
+\begin{frame}{预热 - 线性代数知识(续)}
+\begin{itemize}
+
+
+\item \textbf{矩阵的乘法}：对于$\textbf{a} \in \mathbb{R}^{n \times k}$和$\textbf{b} \in \mathbb{R}^{k \times m}$，用$\textbf{c} = \textbf{a} \textbf{b} \in \mathbb{R}^{n \times m}$表示\textbf{a}和\textbf{b}的矩阵乘法，其中
+    \begin{displaymath}
+    c_{pq} = \sum_{i = 1}^k a_{pi} b_{iq}
+    \end{displaymath}
+    对于方程$\left\{ \begin{array}{l} 5x_{1} + 2x_{2} = y_{1} \\ 3x_{1} + x_{2} = y_{2}\end{array} \right.$，可以表示为$\textbf{a} \textbf{x}^T = \textbf{y}^T$ 其中$\textbf{a}=\begin{pmatrix} 5 & 2 \\ 3 & 1 \end{pmatrix}$，$\textbf{x}^T =\begin{pmatrix} x_1 \\ x_2 \end{pmatrix}$，$\textbf{y}^T =\begin{pmatrix} y_1 \\ y_2 \end{pmatrix}$
+\item<2-> \textbf{其它}
+    \begin{itemize}
+    \item \textbf{单位矩阵}：方阵$\textbf{I}$，$I_{ij} = 1$当且仅当$i=j$，否则$I_{ij} = 0$
+    \item \textbf{转置}：$\textbf{a}$的转置记为$\textbf{a}^T$，有$a^T_{ji}=a_{ij}$
+    \item \textbf{逆矩阵}：方阵$\textbf{a}$的逆矩阵记为$\textbf{a}^{-1}$，有$\textbf{a} \textbf{a}^{-1} = \textbf{a}^{-1} \textbf{a} = \textbf{I}$
+    \item \textbf{向量(矩阵)的范数}：$||\textbf{a}||_p = \big( \sum_i |a_i|^p \big)^{\frac{1}{p}}$
+    \end{itemize}
+
+\end{itemize}
+\end{frame}
+
+%%%------------------------------------------------------------------------------------------------------------
 %%% 人工神经元的函数形式
 \begin{frame}{人工神经元即一个函数}

@@ -820,7 +860,7 @@ GPT-2 (Transformer) & Radford et al. & 2019 & \alert{35.7}
 	\item 权重：$\textbf{w}=(w_0,...,w_n)$
 	\item 偏移：$\textbf{b} = (-\sigma)$
 	\item 激活函数：$f(z)=1$ 当$z \ge 0$, 其它情况$f(z)=0$
-	\item 输出：$\textbf{y}=f(\textbf{x} \cdot \textbf{z} - \sigma)$
+	\item 输出：$\textbf{y}=f(\textbf{x} \cdot \textbf{w} - \sigma)$
 	\end{itemize}
 \end{itemize}

@@ -2060,7 +2100,7 @@ cycle}
    \item 注意，这里不是张量乘法，因为张量乘法还有其它定义
    \item $\textbf{w}$是$n \times m$的矩阵，$\textbf{x}$的形状是$... \times n$，即$\textbf{x}$的第一维度需要和$\textbf{w}$的行数大小相等\\
    \vspace{0.5em}
-    $\textbf{x}(1:2,1:2,\alert{1:3}) \times \textbf{w}(\alert{1:3},1:2) = \textbf{s}(1:2,1:2,1:2)$
+    $\textbf{x}(1:4,1:4,\alert{1:4}) \times \textbf{w}(\alert{1:4},1:2) = \textbf{s}(1:4,1:4,1:2)$
    \end{itemize}
 \end{itemize}


--- a/Section06-Neural-Machine-Translation/section06.tex
+++ b/Section06-Neural-Machine-Translation/section06.tex
@@ -114,7 +114,7 @@
 \section{编码器-解码器框架}

 %%%------------------------------------------------------------------------------------------------------------
-\section{基于循环单元的翻译模型及注意力机制}
+\section{循环神经网络翻译模型及注意力机制}

 %%%------------------------------------------------------------------------------------------------------------
 \subsection{模型结构}
@@ -126,7 +126,7 @@
 \section{Transformer}

 %%%------------------------------------------------------------------------------------------------------------
-\subsection{模型结构}
+\subsection{多头自注意力模型}

 %%%------------------------------------------------------------------------------------------------------------
 \subsection{训练}