new pages

9752394b · xiaotong · 415f6e39 · 9752394b · 9752394b
Commit 9752394b authored Oct 06, 2019 by xiaotong
--- a/Section05-Neural-Networks-and-Language-Modeling/section05-test.tex
+++ b/Section05-Neural-Networks-and-Language-Modeling/section05-test.tex
@@ -116,60 +116,42 @@
 \subsection{参数学习 - 反向传播}

 %%%------------------------------------------------------------------------------------------------------------
-%%% 神经网络 = 表达式
-\begin{frame}{神经网络 = 表达式}
+%%% 常见的目标函数
+\begin{frame}{常见的损失函数}
+
 \begin{itemize}
-\item 所有的神经网络都可以看做由变量和函数组成的表达式\\
+\item 损失函数记为$Loss(\hat{\textbf{y}}_i,\textbf{y}_i)$，简记为$L$，以下是常用的定义
 \end{itemize}

-\begin{center}
-\begin{tikzpicture}
-\node [anchor=north west] (eq1) at (0,0) {$\textbf{y} = \textbf{x} + \textbf{b}$};
-\node [anchor=north west] (eq2) at (eq1.south west) {$\textbf{y} = \textrm{Relu}(\textbf{x} \cdot \textbf{w} + \textbf{b})$};
-\node [anchor=north west] (eq3) at (eq2.south west) {$\textbf{y} = (\textrm{Relu}(\textbf{x} \cdot \textbf{w}_1 + \textbf{b}) + \textbf{x}) \cdot \textbf{w}_2$};
-\node [anchor=north west] (eq4) at (eq3.south west) {$\textbf{y} = \textrm{Sigmoid}(\textrm{Relu}(\textbf{x} \cdot \textbf{w}_1 + \textbf{b}_1) + \textbf{x}) \cdot \textbf{w}_2 + \textbf{b}_2$};
-
-\visible<2->{
-\node [anchor=north west,minimum height=1.2em,minimum width=1.2em,fill=green!30!white] (xlabel) at ([yshift=-0.5em,xshift=0.3em]eq4.south west) {};
-\node [anchor=west] (xlabel2) at (xlabel.east) {输入变量 - 由用户指定};
-}
-
-\begin{pgfonlayer}{background}
-\visible<2->{
-\node [anchor=south, minimum height=1.6em,minimum width=0.8em,fill=green!30!white] (x1) at ([xshift=-1.3em]eq4.south) {};
-\node [anchor=south, minimum height=1.6em,minimum width=0.8em,fill=green!30!white] (x2) at ([xshift=4.9em]eq4.south) {};
-}
-\end{pgfonlayer}
-
-\visible<3->{
-\node [anchor=north west,minimum height=1.2em,minimum width=1.2em,fill=red!30!white] (wlabel) at ([yshift=-0.3em]xlabel.south west) {};
-\node [anchor=west] (wlabel2) at (wlabel.east) {模型参数 - 怎么设置？？？};
-}
-
-\begin{pgfonlayer}{background}
-\visible<3->{
-\node [anchor=south, minimum height=1.6em,minimum width=1.2em,fill=red!30!white] (w1) at ([xshift=0.2em]eq4.south) {};
-\node [anchor=south, minimum height=1.6em,minimum width=1.2em,fill=red!30!white] (b1) at ([xshift=2.5em]eq4.south) {};
-\node [anchor=south, minimum height=1.6em,minimum width=1.2em,fill=red!30!white] (w2) at ([xshift=6.85em]eq4.south) {};
-\node [anchor=south, minimum height=1.6em,minimum width=1.2em,fill=red!30!white] (b2) at ([xshift=9.2em]eq4.south) {};
+\vspace{0.5em}
+
+\footnotesize{
+\renewcommand{\arraystretch}{1.2}
+\begin{tabular}{l | l | l | l}
+名称 & 定义 & NiuTensor实现(\texttt{yh}表示$\hat{\textbf{y}}_i$) & 应用 \\ \hline
+0-1 & $L = \left\{ \begin{array}{ll} 0 & \hat{\textbf{y}}_i = \textbf{y}_i \\ 1 & \hat{\textbf{y}}_i \ne \textbf{y}_i \end{array} \right.$ & \scriptsize{\texttt{L = Sign(Absolute(yh - y))}} & 感知机 \\ 
+Hinge & $L=\max(0,1-\hat{\textbf{y}}_i \cdot \textbf{y}_i)$ & \scriptsize{\texttt{L = Max(0, 1 - yh * y))}} & SVM \\
+绝对值 & $L=|\hat{\textbf{y}}_i - \textbf{y}_i|$ & \scriptsize{\texttt{L = Absolute(yh - y)}} & 回归 \\ 
+Logistic & $L=\log(1 + \hat{\textbf{y}}_i \cdot \textbf{y}_i)$ & \scriptsize{\texttt{L = Log(1 + yh * y)}} & 回归 \\
+平方 & $L=(\hat{\textbf{y}}_i - \textbf{y}_i)^2$ & \scriptsize{\texttt{L = Power(yh - y, 2)}} & 回归 \\
+指数 & $L=\exp(- \hat{\textbf{y}}_i \cdot \textbf{y}_i) $ & \scriptsize{\texttt{L = Exp(Negate(yh * y))}} & \scriptsize{AdaBoost} \\
+交叉熵 & $L=-\sum_k \textbf{y}_i^{[k]} \log \hat{\textbf{y}}_i^{[k]} $ & \scriptsize{\texttt{L = CrossEntropy(y, yh)}} & 多分类 \\
+       & \scriptsize{$\textbf{y}_i^{[k]}$: $\textbf{y}_i$的第$k$维} & & \\
+\end{tabular}
+\renewcommand{\arraystretch}{1.0}
 }
-\end{pgfonlayer}

-\end{tikzpicture}
-\end{center}
-
-\visible<4->{
-\begin{tcolorbox}[enhanced,size=normal,left=2mm,right=1mm,colback=blue!5!white,colframe=blue!75!black,drop fuzzy shadow]
-{\Large
-\textbf{问题来了,}
-
-\vspace{0.4em}
-\textbf{如何确定w和b，使x与y对应得更好？}
-}
-\end{tcolorbox}
-}
+\vspace{-0.5em}
+\begin{itemize}
+\item 注意：
+    \begin{itemize}
+    \item 损失函数可以根据问题不同进行选择，没有固定要求
+    \item 有些损失函数对网络输出有要求，比如交叉熵要求$\hat{\textbf{y}}_i$和$\textbf{y}_i$都是概率分布
+    \end{itemize}
+\end{itemize}

 \end{frame}

+
 \end{CJK}
 \end{document}
--- a/Section05-Neural-Networks-and-Language-Modeling/section05.tex
+++ b/Section05-Neural-Networks-and-Language-Modeling/section05.tex
@@ -1616,21 +1616,21 @@ cycle}

 \visible<14->{
 \foreach \n in {0.5}{
-    \pgfmathsetmacro{\result}{0.2 * (\n + 0.1 + 0.4)^3 + 1.2 - 0.3 *(\n + 0.1 + 0.8)^2}%
+    \pgfmathsetmacro{\result}{0.2 * (\n + 0.1 + 0.4)^3 + 1.2 - 0.3 *(\n + 0.1 + 0.8)^2};
    \draw [-,ublue,thick] (\n,0) -- (\n, \result) -- (\n + 0.2, \result) -- (\n + 0.2, 0);
 }
 }

 \visible<15->{
 \foreach \n in {0.7}{
-    \pgfmathsetmacro{\result}{0.2 * (\n + 0.1 + 0.4)^3 + 1.2 - 0.3 *(\n + 0.1 + 0.8)^2}%
+    \pgfmathsetmacro{\result}{0.2 * (\n + 0.1 + 0.4)^3 + 1.2 - 0.3 *(\n + 0.1 + 0.8)^2};
    \draw [-,ublue,thick] (\n,0) -- (\n, \result) -- (\n + 0.2, \result) -- (\n + 0.2, 0);
 }
 }

 \visible<16->{
 \foreach \n in {-1.9,-1.7,...,1.9}{
-    \pgfmathsetmacro{\result}{0.2 * (\n + 0.1 + 0.4)^3 + 1.2 - 0.3 *(\n + 0.1 + 0.8)^2}%
+    \pgfmathsetmacro{\result}{0.2 * (\n + 0.1 + 0.4)^3 + 1.2 - 0.3 *(\n + 0.1 + 0.8)^2};
    \draw [-,ublue,thick] (\n,0) -- (\n, \result) -- (\n + 0.2, \result) -- (\n + 0.2, 0);
 }
 }
@@ -1881,7 +1881,7 @@ cycle}
    \item<3-> \textbf{还是看不懂的定义}：若干向量和协向量通过张量乘法定义的量（代数定义）
    \item<4-> \textbf{还可以解释的定义}：\alert{张量是多重线性函数}，是定义在一些向量空间和笛卡儿积上的多重线性映射
        \begin{itemize}
-        \item 这里把张量表示为$T(v_0,...,v_r)$，其中输入的是$r$个向量$\{v_0,...,v_r\}$
+        \item 张量记为$T(v_0,...,v_r)$，其中输入是$r$个向量$\{v_0,...,v_r\}$
        \item 多重线性是指，对于每个输入，函数都是线性的，比如，对于一个$v_i$，我们有
        \vspace{-0.3em}
        \begin{displaymath}
@@ -2754,7 +2754,7 @@ cycle}
 \visible<4->{
 \begin{tcolorbox}[enhanced,size=normal,left=2mm,right=1mm,colback=blue!5!white,colframe=blue!75!black,drop fuzzy shadow]
 {\Large
-\textbf{问题来了,}
+\textbf{问题来了：}

 \vspace{0.4em}
 \textbf{如何确定w和b，使x与y对应得更好？}
@@ -2764,5 +2764,96 @@ cycle}

 \end{frame}

+%%%------------------------------------------------------------------------------------------------------------
+%%% 学习的目标是什么
+\begin{frame}{目标函数和损失函数}
+\begin{itemize}
+\item 这是一个典型的优化问题，有两个基本问题\\
+    \begin{enumerate}
+    \item 优化的目标是什么？
+    \item 如何调整参数$\textbf{w}$和$\textbf{b}$达成目标？
+    \end{enumerate}
+\item<2-> \textbf{定义目标}：对于给定$\textbf{x}$，什么样的$\textbf{y}$是好的
+    \begin{itemize}
+    \item 假设：多个输入样本$\{\textbf{x}_1,...,\textbf{x}_n\}$，每个$\textbf{x}_i$都对应\alert{正确答案}$\hat{\textbf{y}}_i$
+    \item 对于一个神经网络$\textbf{y}=f(\textbf{x})$，每个$\textbf{x}_i$也会有一个输出$\textbf{y}_i$
+    \item 如果可以度量答案$\hat{\textbf{y}}_i$和网络输出$\textbf{y}_i$之间的偏差，进而调整网络参数减小这种偏差，就可以得到更好的模型
+    \end{itemize}
+\end{itemize}
+
+\visible<3->{
+\vspace{-0.7em}
+\begin{center}
+\begin{tikzpicture}
+\begin{scope}[yscale=0.2,xscale=0.8]
+\draw[-,very thick,ublue,domain=-4.2:3.5,samples=100] plot (\x,{ - 1/14 * (\x + 4) * (\x + 1) * (\x - 1) * (\x - 3)});
+\visible<4->{
+\draw[-,very thick,ugreen,domain=-3.8:3.0,samples=100] plot (\x,{ - 1/14 * (4*\x*\x*\x + 3*\x*\x - 26*\x - 1)});
+}
+\draw[->,thick] (-6,0) -- (5,0);
+\draw[->,thick] (-5,-4) -- (-5,5);
+
+\draw [<-] (-2.5,4) -- (-2,5) node [pos=1,right,inner sep=2pt] {\footnotesize{答案$\hat{\textbf{y}}_i$}};
+\visible<4->{
+\draw [<-] (-3,-3) -- (-2.5,-2) node [pos=0,left,inner sep=2pt] {\footnotesize{预测$\textbf{y}_i$}};}
+
+\visible<5->{
+\draw [<-] (2.3,1) -- (3.3,2) node [pos=1,right,inner sep=2pt] {\footnotesize{偏差$|\hat{\textbf{y}}_i - \textbf{y}_i|$}};
+\foreach \x in {-3.8,-3.7,...,3.0}{
+    \pgfmathsetmacro{\p}{- 1/14 * (\x + 4) * (\x + 1) * (\x - 1) * (\x - 3)};
+    \pgfmathsetmacro{\q}{- 1/14 * (4*\x*\x*\x + 3*\x*\x - 26*\x - 1)};
+    \draw [-] (\x,\p) -- (\x, \q);
+}
+}
+
+\end{scope}
+\end{tikzpicture}
+\end{center}
+}
+
+\vspace{-0.3em}
+\begin{itemize}
+\item<6-> 这个过程就是\alert{参数优化/训练}，而$\hat{\textbf{y}}_i$和$\textbf{y}_i$之间偏差的度量就是一种\alert{损失函数}，也称作训练的\alert{目标函数}，而优化的目标就是\textbf{最小化损失函数}
+\end{itemize}
+
+\end{frame}
+
+%%%------------------------------------------------------------------------------------------------------------
+%%% 常见的目标函数
+\begin{frame}{常见的损失函数}
+
+\begin{itemize}
+\item 损失函数记为$Loss(\hat{\textbf{y}}_i,\textbf{y}_i)$，简记为$L$，以下是常用的定义
+\end{itemize}
+
+\vspace{0.5em}
+
+\footnotesize{
+\renewcommand{\arraystretch}{1.2}
+\begin{tabular}{l | l | l | l}
+名称 & 定义 & NiuTensor实现(\texttt{yh}表示$\hat{\textbf{y}}_i$) & 应用 \\ \hline
+0-1 & $L = \left\{ \begin{array}{ll} 0 & \hat{\textbf{y}}_i = \textbf{y}_i \\ 1 & \hat{\textbf{y}}_i \ne \textbf{y}_i \end{array} \right.$ & \scriptsize{\texttt{L = Sign(Absolute(yh - y))}} & 感知机 \\
+Hinge & $L=\max(0,1-\hat{\textbf{y}}_i \cdot \textbf{y}_i)$ & \scriptsize{\texttt{L = Max(0, 1 - yh * y))}} & SVM \\
+绝对值 & $L=|\hat{\textbf{y}}_i - \textbf{y}_i|$ & \scriptsize{\texttt{L = Absolute(yh - y)}} & 回归 \\
+Logistic & $L=\log(1 + \hat{\textbf{y}}_i \cdot \textbf{y}_i)$ & \scriptsize{\texttt{L = Log(1 + yh * y)}} & 回归 \\
+平方 & $L=(\hat{\textbf{y}}_i - \textbf{y}_i)^2$ & \scriptsize{\texttt{L = Power(yh - y, 2)}} & 回归 \\
+指数 & $L=\exp(- \hat{\textbf{y}}_i \cdot \textbf{y}_i) $ & \scriptsize{\texttt{L = Exp(Negate(yh * y))}} & \scriptsize{AdaBoost} \\
+交叉熵 & $L=-\sum_k \textbf{y}_i^{[k]} \log \hat{\textbf{y}}_i^{[k]} $ & \scriptsize{\texttt{L = CrossEntropy(y, yh)}} & 多分类 \\
+       & \scriptsize{$\textbf{y}_i^{[k]}$: $\textbf{y}_i$的第$k$维} & & \\
+\end{tabular}
+\renewcommand{\arraystretch}{1.0}
+}
+
+\vspace{-0.5em}
+\begin{itemize}
+\item 注意：
+    \begin{itemize}
+    \item 损失函数可以根据问题不同进行选择，没有固定要求
+    \item 有些损失函数对网络输出有约束，比如交叉熵要求$\hat{\textbf{y}}_i$和$\textbf{y}_i$都是概率分布
+    \end{itemize}
+\end{itemize}
+
+\end{frame}
+
 \end{CJK}
 \end{document}