Skip to content
项目
群组
代码片段
帮助
当前项目
正在载入...
登录 / 注册
切换导航面板
T
Toy-MT-Introduction
概览
Overview
Details
Activity
Cycle Analytics
版本库
Repository
Files
Commits
Branches
Tags
Contributors
Graph
Compare
Charts
问题
0
Issues
0
列表
Board
标记
里程碑
合并请求
0
Merge Requests
0
CI / CD
CI / CD
流水线
作业
日程表
图表
维基
Wiki
代码片段
Snippets
成员
Collapse sidebar
Close sidebar
活动
图像
聊天
创建新问题
作业
提交
Issue Boards
Open sidebar
单韦乔
Toy-MT-Introduction
Commits
caac3b7a
Commit
caac3b7a
authored
Sep 19, 2019
by
xiaotong
Browse files
Options
Browse Files
Download
Email Patches
Plain Diff
tensor of neural networks
parent
1037ae94
显示空白字符变更
内嵌
并排
正在显示
3 个修改的文件
包含
602 行增加
和
618 行删除
+602
-618
Section05-Neural-Networks-and-Language-Modeling/section05-test.tex
+27
-559
Section05-Neural-Networks-and-Language-Modeling/section05-test.vrb
+0
-54
Section05-Neural-Networks-and-Language-Modeling/section05.tex
+575
-5
没有找到文件。
Section05-Neural-Networks-and-Language-Modeling/section05-test.tex
查看文件 @
caac3b7a
...
...
@@ -102,588 +102,56 @@
\section
{
为什么要谈神经网络
}
%%%------------------------------------------------------------------------------------------------------------
\subsection
{
数学基础
:张量计算
}
\subsection
{
神经网络的简单实现
:张量计算
}
%%%------------------------------------------------------------------------------------------------------------
%%%
神经网络的作用
\begin{frame}
{
神经网络:线性变换 + 激活函数
}
%%%
何为张量
\begin{frame}
{
如何描述神经网络 - 张量计算
}
\begin{itemize}
\item
对于
向量
$
\textbf
{
x
}
\in
\mathbb
{
R
}^
m
$
,一层神经网络首先把他经过
\textbf
{
\alert
{
线性变换
}}
映射到
$
\mathbb
{
R
}^
m
$
,之后经过
\textbf
{{
\color
{
blue
}
激活函数
}}
变换成
$
\textbf
{
y
}
\in
\mathbb
{
R
}^
n
$
\item
对于
神经网络,输入
$
\textbf
{
x
}$
和输出
$
\textbf
{
y
}$
的形式并不仅仅是向量
\end{itemize}
\vspace
{
1em
}
\begin{center}
\begin{tikzpicture}
\node
[anchor=center] (y) at (0,0)
{
\Large
{$
\textbf
{
y
}$}}
;
\node
[anchor=west] (eq) at (y.east)
{
\Large
{$
=
$}}
;
\node
[anchor=west] (func) at (eq.east)
{
\Large
{$
f
$}}
;
\node
[anchor=west] (brace01) at (func.east)
{
\Large
{$
(
$}}
;
\node
[anchor=west] (x) at (brace01.east)
{
\Large
{$
\textbf
{
x
}$}}
;
\node
[anchor=west] (dot) at (x.east)
{
\Large
{$
\cdot
$}}
;
\node
[anchor=west] (w) at (dot.east)
{
\Large
{$
\textbf
{
w
}$}}
;
\node
[anchor=west] (plus) at (w.east)
{
\Large
{$
+
$}}
;
\node
[anchor=west] (b) at (plus.east)
{
\Large
{$
\textbf
{
b
}$}}
;
\node
[anchor=west] (brace02) at (b.east)
{
\Large
{$
)
$}}
;
\node
[anchor=center,fill=blue!20] (func2) at (func)
{
\LARGE
{$
f
$}}
;
\node
[anchor=north] (funclabel) at ([yshift=-1.1em]func.south)
{
\blue
{
激活函数
}}
;
\draw
[<-] ([yshift=-0.2em]func2.south) -- (funclabel.north);
\begin{pgfonlayer}
{
background
}
\node
[rectangle,inner sep=0.2em,fill=red!20] [fit = (x) (w) (b)] (linear)
{}
;
\node
[anchor=north] (linearlabel) at ([yshift=-1.1em]linear.south)
{
\alert
{
线性变换
}}
;
\draw
[<-] ([yshift=-0.2em]linear.south) -- (linearlabel.north);
\end{pgfonlayer}
\end{tikzpicture}
\begin{figure}
[htp!]
\includegraphics
[scale=0.24]
{
./Figures/wf.png
}
% \begin{tikzpicture}
% \node [rectangle,inner sep=0.2em,fill=red!20] [fit = (x) (w) (b)] (linear) {};
% \node [anchor=north] (linearlabel) at ([yshift=-1.1em]linear.south) {\alert{线性变换}}
\end{figure}
\tikz
{
\node
() at (0,0)
{}
;
\node
() at (0,10)
{}
;
}
\end{center}
\end{frame}
%%%------------------------------------------------------------------------------------------------------------
%%% 线性变换
\begin{frame}
{
线性变换
}
\begin{itemize}
\item
对于线性空间
$
V
$
,任意
$
\textbf
{
a
}$
,
$
\textbf
{
b
}
\in
V
$
和数域中的任意
$
\alpha
$
,线性变换
$
T
(
\cdot
)
$
需满足
\begin{eqnarray}
T(
\textbf
{
a
}
+
\textbf
{
b
}
)
&
=
&
T(
\textbf
{
a
}
) + T(
\textbf
{
b
}
)
\nonumber
\\
T(
\alpha
\textbf
{
a
}
)
&
=
&
\alpha
T(
\textbf
{
a
}
)
\nonumber
\end{eqnarray}
\item
<2-> 线性变换的一种几何解释:
\end{itemize}
\vspace
{
-0.5em
}
\visible
<2->
{
\begin{center}
\begin{tikzpicture}
\node
[anchor=west] (x) at (0,0)
{
\Large
{$
\textbf
{
x
}$}}
;
\node
[anchor=west] (dot) at (x.east)
{
\Large
{$
\cdot
$}}
;
\node
[anchor=west] (w) at (dot.east)
{
\Large
{$
\textbf
{
w
}$}}
;
\node
[anchor=west] (plus) at (w.east)
{
\Large
{$
+
$}}
;
\node
[anchor=west] (b) at (plus.east)
{
\Large
{$
\textbf
{
b
}$}}
;
\tikzstyle
{
neuron
}
= [rectangle,draw,thick,fill=red!30,red!35,minimum height=2em,minimum width=2em,font=
\small
]
\node
[neuron,anchor=north]
(a1) at ([xshift=-6em,yshift=-4em]x.south)
{}
;
\draw
[->,thick]
([xshift=-2em,yshift=0em]a1.south) to ([xshift=3em,yshift=0em]a1.south);
\draw
[->,thick]
([xshift=0em,yshift=-4em]a1.west) to ([xshift=0em,yshift=2em]a1.west);
\node
[below]
at ([xshift=0.5em,yshift=-1em]a1.west)
{
0
}
;
\node
[below]
at ([xshift=2em,yshift=-1em]a1.west)
{
1
}
;
\node
[below]
at ([xshift=-0.5em,yshift=2em]a1.west)
{
1
}
;
\node
[anchor=west] (x) at ([xshift=-0.7em,yshift=1em]a1.south)
{
\Large
{$
\textbf
{
F
}$}}
;
\visible
<3->
{
\node
[anchor=center,fill=green!20] (w2) at (w)
{
\Large
{$
\textbf
{
w
}$}}
;
\node
[anchor=north,inner sep=1pt] (wlabel) at ([yshift=-0.7em]w.south)
{
\small
{
旋转(rotation)
}}
;
\draw
[<-] ([yshift=-0.2em]w2.south) -- (wlabel.north);
\tikzstyle
{
neuron
}
= [rectangle,draw,thick,fill=red!30,red!35,minimum height=2em,minimum width=2em,font=
\small
]
\node
[neuron,anchor=north]
(a2) at ([xshift=10em,yshift=0em]a1.south)
{}
;
\draw
[->,thick]
([xshift=-2em,yshift=0em]a2.north) to ([xshift=3em,yshift=0em]a2.north);
\draw
[->,thick]
([xshift=0em,yshift=-2em]a2.west) to ([xshift=0em,yshift=4em]a2.west);
\node
[above]
at ([xshift=0.5em,yshift=1em]a2.west)
{
0
}
;
\node
[above]
at ([xshift=2em,yshift=1em]a2.west)
{
1
}
;
\node
[below]
at ([xshift=-0.5em,yshift=0em]a2.west)
{
-1
}
;
\node
[anchor=west] (x) at ([xshift=-3.5cm,yshift=2em]a2.north)
{
\scriptsize
{
$
w
=
\begin
{
bmatrix
}
1
&
0
&
0
\\
0
&
-
1
&
0
\\
0
&
0
&
1
\end
{
bmatrix
}$}
}
;
\node
[anchor=west,rotate = 180] (x) at ([xshift=0.7em,yshift=1em]a2.south)
{
\Large
{$
\textbf
{
F
}$}}
;
\draw
[-stealth, line width=2pt,dashed]
([xshift=4em,yshift=0em]a1.south) to ([xshift=-3em,yshift=0em]a2.north);
}
\visible
<4->
{
\node
[anchor=center,fill=purple!20] (b2) at (b)
{
\Large
{$
\textbf
{
b
}$}}
;
\node
[anchor=west] (blabel) at ([xshift=1.5em]b2.east)
{
平移(shift)
}
;
\draw
[<-] ([xshift=0.2em]b2.east) -- (blabel.west);
\tikzstyle
{
neuron
}
= [rectangle,draw,thick,fill=red!30,red!35,minimum height=2em,minimum width=2em,font=
\small
]
\node
[neuron,anchor=north]
(a3) at ([xshift=11em,yshift=2.05em]a2.south)
{}
;
\draw
[->,thick]
([xshift=-3em,yshift=0em]a3.north) to ([xshift=2em,yshift=0em]a3.north);
\draw
[->,thick]
([xshift=-1em,yshift=-2em]a3.west) to ([xshift=-1em,yshift=4em]a3.west);
\node
[above]
at ([xshift=-0.5em,yshift=1em]a3.west)
{
0
}
;
\node
[above]
at ([xshift=1em,yshift=1em]a3.west)
{
1
}
;
\node
[left]
at ([xshift=-0.75em,yshift=-0.5em]a3.west)
{
-1
}
;
\node
[anchor=west,rotate = 180] (x) at ([xshift=0.7em,yshift=1em]a3.south)
{
\Large
{$
\textbf
{
F
}$}}
;
\node
[anchor=west] (x) at ([xshift=-4cm,yshift=2em]a3.north)
{
\scriptsize
{
$
b
=
\begin
{
bmatrix
}
0
.
5
&
0
&
0
\\
0
&
0
&
0
\\
0
&
0
&
0
\end
{
bmatrix
}$}
}
;
\draw
[-stealth, line width=2pt,dashed]
([xshift=3em,yshift=1em]a2.east) to ([xshift=-3em,yshift=1em]a3.west);
}
\end{tikzpicture}
\end{center}
}
\end{frame}
%%%------------------------------------------------------------------------------------------------------------
%%% 线性变换:更复杂的实例
\begin{frame}
[fragile]
{
线性变换(续)
}
\begin{itemize}
\item
线性变换也适用于更加复杂的情况,这也给神经网络提供了拟合不同数据分布的能力
\begin{itemize}
\item
比如,我们可以把三维图形投影到二维平面上
\item
再比如,我们也可以把二维平面上的图形映射到三维平面上
\end{itemize}
\end{itemize}
\vspace
{
1em
}
\newcommand
{
\plane
}
[1]
{
(-1.95, #1, 1.35) --
++(3.6, 0.6, 0.0) --
++(0.3, -1.8, -2.7) --
++(-3.6, -0.6, -0.0) --
cycle
}
\newcommand
{
\nullspacepicture
}{
% bottom part of the row space line
\draw
(0,0,0) -- (0.3,-1.8,1.233);
% five planes
\draw
[fill=gray!20]
\plane
{
-0.2
}
;
\draw
[fill=gray!20]
\plane
{
0.2
}
;
\draw
[fill=blue!70!gray]
\plane
{
0.6
}
;
\draw
[fill=gray!20]
\plane
{
1
}
;
\draw
[fill=gray!20]
\plane
{
1.4
}
;
% top part of the row space line
\draw
(-.094,.562,-.385) -- (-0.3,1.8,-1.233);
}
\newcommand
{
\rangepicture
}
[1]
{
% axes
\draw
[help lines,->]
(-2,0) -- (2,0);
\draw
[help lines,->]
(0,-2) -- (0,2);
% the line and circles
\draw
(1,-2) -- (-1,2);
\draw
[fill=#1]
(0,0) circle (2.5pt);
\draw
[fill=gray!50]
(0.2,-0.4) circle (2.5pt);
\draw
[fill=gray!50]
(0.4,-0.8) circle (2.5pt);
\draw
[fill=gray!50]
(-0.2,0.4) circle (2.5pt);
\draw
[fill=gray!50]
(-0.4,0.8) circle (2.5pt);
}
\begin{tikzpicture}
[scale=0.95]
\centering
\nullspacepicture
% the label
\node
at (-2,1.8)
{$
\mathbb
{
R
}^
3
$}
;
% arrow between diagrams
\path
[->]
(3,0) edge[bend left] node[above]
{
线性变换
}
(4.5,0);
\begin{scope}
[xshift=7cm]
\rangepicture
{
blue!70!gray
}
\node
at (1.8,1.8)
{$
\mathbb
{
R
}^
2
$}
;
\end{scope}
\end{tikzpicture}
\end{frame}
%%%------------------------------------------------------------------------------------------------------------
%%% 激活函数
\begin{frame}
{
激活函数
}
\begin{itemize}
\item
激活函数更多地是为了解决实际问题中的
\alert
{
非线性
}
变换
\begin{itemize}
\item
非线性部分提供了拟合任意函数的能力(稍后介绍)
\end{itemize}
\end{itemize}
\vspace
{
-1em
}
\begin{center}
\begin{tikzpicture}
\begin{scope}
\draw
[line width=3pt,ublue,-](0,0) -- (-2.0,1);
\node
[anchor=north] (linelabel) at (-1.0,-0.5)
{
\footnotesize
{
我是一根筷子
}}
;
\end{scope}
\begin{scope}
[xshift=10em]
\draw
[line width=3pt,ublue,-,line cap=round](0,0) .. controls (-0.5,-0.25) and (-0.5,1).. (-1.3,0.3) .. controls (-2.3,-0.3) and (-1.1,1.8).. (-2.0,1);
\node
[] at (-2,1)
{
\white
{$
\cdot
$}}
;
\node
[anchor=north] (linelabel) at (-1.0,-0.5)
{
\footnotesize
{
我是一只蚯蚓
}}
;
\end{scope}
\end{tikzpicture}
\end{center}
\begin{itemize}
\item
<2-> 简单的非线性函数
\end{itemize}
\vspace
{
-1em
}
\node
[anchor=center] (y) at (0,0)
{
\LARGE
{$
\textbf
{
y
}$}}
;
\node
[anchor=west] (eq) at (y.east)
{
\LARGE
{$
=
$}}
;
\node
[anchor=west] (func) at (eq.east)
{
\LARGE
{$
f
$}}
;
\node
[anchor=west] (brace01) at (func.east)
{
\LARGE
{$
(
$}}
;
\node
[anchor=west] (x) at (brace01.east)
{
\LARGE
{$
\textbf
{
x
}$}}
;
\node
[anchor=west] (dot) at (x.east)
{
\LARGE
{$
\cdot
$}}
;
\node
[anchor=west] (w) at (dot.east)
{
\LARGE
{$
\textbf
{
w
}$}}
;
\node
[anchor=west] (plus) at (w.east)
{
\LARGE
{$
+
$}}
;
\node
[anchor=west] (b) at (plus.east)
{
\LARGE
{$
\textbf
{
b
}$}}
;
\node
[anchor=west] (brace02) at (b.east)
{
\LARGE
{$
)
$}}
;
\visible
<2->
{
\begin{center}
\begin{tikzpicture}
\begin{scope}
[]
\draw
[->,thick] (-1.5,0) -- (1.5,0);
\draw
[->,thick] (0,-0.1) -- (0,1.5);
\draw
[-,very thick,ublue,domain=-1.2:1.2,samples=100] plot (
\x
,
{
0.5 * (
\x
-0.3)
^
2 + 0.2
}
);
\node
[anchor=west] (ylabel) at (0,1.3)
{$
y
$}
;
\node
[anchor=north] (xlabel) at (1.3,0)
{$
x
$}
;
\node
[anchor=north] (func) at (0,-0.8)
{
\footnotesize
{$
y
=
\frac
{
1
}{
2
}
(
x
-
0
.
3
)
^
2
+
0
.
2
$}}
;
\node
[anchor=south west] (flabel) at (func.north west)
{
\footnotesize
{
Quadratic:
}}
;
\end{scope}
\begin{scope}
[xshift=9.5em]
\draw
[->,thick] (-1.5,0) -- (1.5,0);
\draw
[->,thick] (0,-0.1) -- (0,1.5);
\draw
[-,very thick,ublue,domain=-1.2:1.2,samples=100] plot (
\x
,
{
0.5 * exp(
\x
)
}
);
\node
[anchor=west] (ylabel) at (0,1.3)
{$
y
$}
;
\node
[anchor=north] (xlabel) at (1.3,0)
{$
x
$}
;
\node
[anchor=north] (func) at (0,-0.8)
{
\footnotesize
{$
y
=
0
.
5
\cdot
\exp
(
x
)
$}}
;
\node
[anchor=south west] (flabel) at ([xshift=-1.8em]func.north west)
{
\footnotesize
{
Exponential:
}}
;
\end{scope}
\begin{scope}
[xshift=19em]
\draw
[->,thick] (-1.5,0) -- (1.5,0);
\draw
[->,thick] (0,-0.1) -- (0,1.5);
\draw
[-,very thick,ublue,domain=-1.1:1.2,samples=100] plot (
\x
,
{
abs(
\x
-0.2) + 0.1
}
);
\node
[anchor=west] (ylabel) at (0,1.3)
{$
y
$}
;
\node
[anchor=north] (xlabel) at (1.3,0)
{$
x
$}
;
\node
[anchor=north] (func) at (0,-0.8)
{
\footnotesize
{$
y
=
|x
-
0
.
3
|
+
0
.
1
$}}
;
\node
[anchor=south west] (flabel) at ([xshift=-0.4em]func.north west)
{
\footnotesize
{
Absolute:
}}
;
\end{scope}
\end{tikzpicture}
\end{center}
}
\end{frame}
%%%------------------------------------------------------------------------------------------------------------
%%% 常用的激活函数
\begin{frame}
{
常用的激活函数
}
\begin{itemize}
\item
好多好多,列举不全 ...
\end{itemize}
\end{frame}
%%%------------------------------------------------------------------------------------------------------------
\subsection
{
多层神经网络
}
%%%------------------------------------------------------------------------------------------------------------
%%% 一层 -> 多层
\begin{frame}
{
更多的层
}
\begin{itemize}
\item
\textbf
{
单层神经网络
}
:线性变换 + 激活函数(非线性)
\item
我们可以重复上面的过程,构建
\textbf
{
多层神经网络
}
\end{itemize}
\vspace
{
-1.0em
}
\begin{center}
\begin{tikzpicture}
\begin{scope}
[]
\node
[anchor=center,fill=yellow!30] (x2) at (x)
{
\LARGE
{$
\textbf
{
x
}$}}
;
\node
[anchor=south] (xlabel) at ([xshift=-3em,yshift=1.5em]x.north)
{
\alert
{
向量?矩阵?...
}}
;
\draw
[<-] ([yshift=0.2em,xshift=-0.5em]x2.north) -- ([xshift=1em]xlabel.south);
\def\neuronsep
{
1.6
}
\tikzstyle
{
neuronnode
}
= [minimum size=1.7em,circle,draw,ublue,very thick,inner sep=1pt, fill=white,align=center,drop shadow=
{
shadow xshift=0.1em,shadow yshift=-0.1em
}
]
%%% layer 1
\foreach
\n
in
{
1,...,5
}{
\node
[neuronnode] (neuron0
\n
) at (
\n
*
\neuronsep
,0)
{
\tiny
{$
f
_
1
$}
\\
[-1ex]
\tiny
{$
\sum
$}}
;
\draw
[-,ublue] (neuron0
\n
.east) -- (neuron0
\n
.west);
}
\foreach
\n
in
{
1,...,5
}{
\foreach
\m
in
{
1,...,5
}{
\draw
[<-] (neuron0
\m
.south) -- ([yshift=-2em]neuron0
\n
.south);
}
\node
[anchor=north] (x
\n
) at ([yshift=-2em]neuron0
\n
.south)
{$
x
_
\n
$}
;
\visible
<1>
{
\draw
[<-,thick] ([yshift=1.5em]neuron0
\n
.north) -- (neuron0
\n
.north);
\node
[anchor=south] (y
\n
) at ([yshift=1.5em]neuron0
\n
.north)
{$
y
_
\n
$}
;
}
}
\node
[anchor=center,fill=red!20] (y2) at (y)
{
\LARGE
{$
\textbf
{
y
}$}}
;
\draw
[<-] ([yshift=0.2em,xshift=0.5em]y2.north) -- ([xshift=-1em]xlabel.south);
\node
[anchor=west] (w1label) at ([xshift=-0.5em,yshift=0.5em]x5.north east)
{$
\textbf
{
w
}_
1
$}
;
\begin{pgfonlayer}
{
background
}
\node
[rectangle,inner sep=0.2em,fill=red!20] [fit = (neuron01) (neuron05)] (layer01)
{}
;
\end{pgfonlayer}
\node
[anchor=west] (layer00label) at ([xshift=1.25em]x5.east)
{
\alert
{
输入层
}}
;
\visible
<2->
{
\node
[anchor=west] (layer01label) at ([xshift=1em]layer01.east)
{
第二层
}
;
}
\visible
<4->
{
\node
[anchor=west] (layer01label2) at (layer01label.east)
{
(
\alert
{
隐层
}
)
}
;
}
%%% layer 2
\visible
<2->
{
\foreach
\n
in
{
2,...,4
}{
\node
[neuronnode] (neuron1
\n
) at (
\n
*
\neuronsep
,4em)
{
\tiny
{$
f
_
2
$}
\\
[-1ex]
\tiny
{$
\sum
$}}
;
\draw
[-,ublue] (neuron1
\n
.east) -- (neuron1
\n
.west);
}
\foreach
\n
in
{
2,...,4
}{
\foreach
\m
in
{
1,...,5
}{
\draw
[<-] (neuron1
\n
.south) -- (neuron0
\m
.north);
}
\visible
<2>
{
\draw
[<-,thick] ([yshift=1.5em]neuron1
\n
.north) -- (neuron1
\n
.north);
\node
[anchor=south] (y
\n
) at ([yshift=1.5em]neuron1
\n
.north)
{$
y
_
\n
$}
;
}
}
\node
[anchor=west] (w2label) at ([xshift=-2.5em,yshift=5.0em]x5.north east)
{$
\textbf
{
w
}_
2
$}
;
\begin{pgfonlayer}
{
background
}
\visible
<2->
{
\node
[rectangle,inner sep=0.2em,fill=ugreen!20] [fit = (neuron12) (neuron14)] (layer02)
{}
;
}
\end{pgfonlayer}
\node
[anchor=west] (layer02label) at ([xshift=4.9em]layer02.east)
{
第三层
}
;
\visible
<4->
{
\node
[anchor=west] (layer02label2) at (layer02label.east)
{
(
\alert
{
隐层
}
)
}
;
}
}
%%% layer 3
\visible
<3->
{
\foreach
\n
in
{
1,...,5
}{
\node
[neuronnode] (neuron2
\n
) at (
\n
*
\neuronsep
,8em)
{
\tiny
{$
f
_
3
$}
\\
[-1ex]
\tiny
{$
\sum
$}}
;
\draw
[-,ublue] (neuron2
\n
.east) -- (neuron2
\n
.west);
}
\foreach
\n
in
{
1,...,5
}{
\foreach
\m
in
{
2,...,4
}{
\draw
[<-] (neuron2
\n
.south) -- (neuron1
\m
.north);
}
\node
[anchor=center,fill=green!20] (w2) at (w)
{
\LARGE
{$
\textbf
{
w
}$}}
;
\node
[anchor=north] (wlabel) at ([yshift=-1.0em]w.south)
{
矩阵 e.g.,
}
;
\draw
[<-] ([yshift=-0.2em]w2.south) -- (wlabel.north);
\node
[anchor=west] (wsample) at ([xshift=-0.5em]wlabel.east)
{
\footnotesize
{$
\left
(
\begin
{
array
}{
c c
}
1
&
2
\\
3
&
4
\end
{
array
}
\right
)
$}}
;
\node
[anchor=south] (y
\n
) at ([yshift=1.5em]neuron2
\n
.north)
{$
y
_
\n
$}
;
\draw
[<-,thick] ([yshift=1.5em]neuron2
\n
.north) -- (neuron2
\n
.north);
\node
[anchor=center,fill=purple!20] (b2) at (b)
{
\LARGE
{$
\textbf
{
b
}$}}
;
\node
[anchor=south] (blabel) at ([yshift=1.3em]b.north)
{
向量 e.g.,
}
;
\draw
[<-] ([yshift=0.2em]b2.north) -- (blabel.south);
\node
[anchor=west] (bsample) at ([xshift=-0.5em]blabel.east)
{
\footnotesize
{$
(
1
\ \
3
)
$}}
;
}
\node
[anchor=west] (w3label) at ([xshift=-2.5em,yshift=8.5em]x5.north east)
{$
\textbf
{
w
}_
3
$}
;
\begin{pgfonlayer}
{
background
}
\visible
<3->
{
\node
[rectangle,inner sep=0.2em,fill=blue!20] [fit = (neuron21) (neuron25)] (layer03)
{}
;
}
\end{pgfonlayer}
\node
[anchor=west] (layer03label) at ([xshift=1em]layer03.east)
{
第四层
}
;
\visible
<4->
{
\node
[anchor=west] (layer03label2) at (layer03label.east)
{
(
\alert
{
输出层
}
)
}
;
}
}
\end{scope}
\end{tikzpicture}
\end{center}
\end{frame}
%%%------------------------------------------------------------------------------------------------------------
%%% 两层神经网络可以逼近任何函数
\begin{frame}
{
多层神经网络可以逼近任意函数
}
\begin{itemize}
\item
以一个简单的三层网络为例(隐层激活函数:sigmoid)
\end{itemize}
\begin{center}
\begin{tikzpicture}
%% a two-layer neural network
\begin{scope}
\tikzstyle
{
neuronnode
}
= [minimum size=1.7em,circle,draw,ublue,very thick,inner sep=1pt, fill=white,align=center,drop shadow=
{
shadow xshift=0.1em,shadow yshift=-0.1em
}
]
%% input and hidden layers
\node
[neuronnode] (n10) at (0,0)
{
\tiny
{$
f
$}
\\
[-1ex]
\tiny
{$
\sum
$}}
;
\node
[neuronnode] (n11) at (1.5,0)
{
\tiny
{$
f
$}
\\
[-1ex]
\tiny
{$
\sum
$}}
;
\draw
[-,ublue] (n10.west) -- (n10.east);
\draw
[-,ublue] (n11.west) -- (n11.east);
\node
[anchor=north] (x1) at ([yshift=-6em]n11.south)
{$
x
_
1
$}
;
\node
[anchor=north] (b) at ([yshift=-6em]n10.south)
{$
b
$}
;
\visible
<1-10>
{
\draw
[->,thick] (b.north) -- ([yshift=-0.1em]n10.south);
\draw
[->,thick] (x1.north) -- ([yshift=-0.1em]n10.290);
}
\visible
<1>
{
\draw
[->,thick] (b.north) -- ([yshift=-0.1em]n11.250);
\draw
[->,thick] (x1.north) -- ([yshift=-0.1em]n11.south);
}
\visible
<11->
{
\draw
[->,thick,red] (b.north) -- ([yshift=-0.1em]n10.south);
\draw
[->,thick,ugreen] (x1.north) -- ([yshift=-0.1em]n10.290);
}
\visible
<2->
{
\draw
[->,thick,blue] (b.north) -- ([yshift=-0.1em]n11.250);
\draw
[->,thick,purple] (x1.north) -- ([yshift=-0.1em]n11.south);
}
\visible
<15->
{
\node
[neuronnode] (n12) at (2.7,0)
{
\tiny
{$
f
$}
\\
[-1ex]
\tiny
{$
\sum
$}}
;
\node
[neuronnode] (n13) at (3.8,0)
{
\tiny
{$
f
$}
\\
[-1ex]
\tiny
{$
\sum
$}}
;
\draw
[-,ublue] (n12.west) -- (n12.east);
\draw
[-,ublue] (n13.west) -- (n13.east);
\draw
[->,thick] (b.north) -- ([yshift=-0.1em]n12.250);
\draw
[->,thick] (x1.north) -- ([yshift=-0.1em]n12.270);
\draw
[->,thick] (b.north) -- ([yshift=-0.1em]n13.230);
\draw
[->,thick] (x1.north) -- ([yshift=-0.1em]n13.250);
}
\visible
<16->
{
\node
[anchor=west] (morenodes) at (n13.east)
{
...
}
;
}
%% output layers
\node
[neuronnode] (n20) at (0.75,5em)
{
\scriptsize
{$
\sum
$}}
;
\visible
<1-10>
{
\draw
[->,thick] ([yshift=0.1em]n10.north) -- ([yshift=-0.1em]n20.250);
}
\visible
<1-8>
{
\draw
[->,thick] ([yshift=0.1em]n11.north) -- ([yshift=-0.1em]n20.290);
}
\visible
<11->
{
\draw
[->,thick,brown] ([yshift=0.1em]n10.north) -- ([yshift=-0.1em]n20.250);
}
\visible
<9->
{
\draw
[->,thick,orange] ([yshift=0.1em]n11.north) -- ([yshift=-0.1em]n20.290);
}
\node
[] (y) at ([yshift=3em]n20.north)
{$
y
$}
;
\draw
[->,thick] ([yshift=0.1em]n20.north) -- (y.south);
\visible
<15->
{
\draw
[->,thick] ([yshift=0.1em]n12.north) -- ([yshift=-0.1em]n20.310);
\draw
[->,thick] ([yshift=0.1em]n13.north) -- ([yshift=-0.1em]n20.330);
}
%% weight and bias
\visible
<11->
{
\node
[anchor=center,rotate=90,fill=white,inner sep=1pt] (b0) at ([yshift=3em,xshift=-0.5em]b.north)
{
\tiny
{$
b
=-
6
$}}
;
}
\visible
<11->
{
\node
[anchor=center,rotate=-59,fill=white,inner sep=1pt] (w2) at ([yshift=1.2em,xshift=-1.2em]x1.north)
{
\tiny
{$
w
=
100
$}}
;
}
\visible
<2-6>
{
\node
[anchor=center,rotate=59,fill=white,inner sep=1pt] (b1) at ([yshift=4.9em,xshift=2.2em]b.north)
{
\tiny
{$
b
=
0
$}}
;
}
\visible
<7>
{
\node
[anchor=center,rotate=59,fill=white,inner sep=1pt] (b1) at ([yshift=4.9em,xshift=2.2em]b.north)
{
\tiny
{$
b
=-
2
$}}
;
}
\visible
<8->
{
\node
[anchor=center,rotate=59,fill=white,inner sep=1pt] (b1) at ([yshift=4.9em,xshift=2.2em]b.north)
{
\tiny
{$
b
=-
4
$}}
;
}
\visible
<2-4>
{
\node
[anchor=center,rotate=90,fill=white,inner sep=1pt] (w1) at ([yshift=3em,xshift=0.5em]x1.north)
{
\tiny
{$
w
=
1
$}}
;
}
\visible
<5>
{
\node
[anchor=center,rotate=90,fill=white,inner sep=1pt] (w1) at ([yshift=3em,xshift=0.5em]x1.north)
{
\tiny
{$
w
=
10
$}}
;
}
\visible
<6->
{
\node
[anchor=center,rotate=90,fill=white,inner sep=1pt] (w1) at ([yshift=3em,xshift=0.5em]x1.north)
{
\tiny
{$
w
=
100
$}}
;
}
\visible
<11>
{
\node
[anchor=center,rotate=62,fill=white,inner sep=1pt] (w21) at ([yshift=2em,xshift=0.5em]n10.north)
{
\tiny
{$
w'
=
0
.
7
$}}
;
}
\visible
<12->
{
\node
[anchor=center,rotate=62,fill=white,inner sep=1pt] (w21) at ([yshift=2em,xshift=0.5em]n10.north)
{
\tiny
{$
w'
=-
0
.
7
$}}
;
}
\visible
<2-8>
{
\node
[anchor=center,rotate=-62,fill=white,inner sep=1pt] (w22) at ([yshift=2em,xshift=-0.5em]n11.north)
{
\tiny
{$
w'
=
1
$}}
;
}
\visible
<9>
{
\node
[anchor=center,rotate=-62,fill=white,inner sep=1pt] (w22) at ([yshift=2em,xshift=-0.5em]n11.north)
{
\tiny
{$
w'
=
0
.
9
$}}
;
}
\visible
<10->
{
\node
[anchor=center,rotate=-62,fill=white,inner sep=1pt] (w22) at ([yshift=2em,xshift=-0.5em]n11.north)
{
\tiny
{$
w'
=
0
.
7
$}}
;
}
%% sigmoid box
\begin{scope}
\visible
<3->
{
\node
[anchor=west] (flabel) at ([xshift=1.2in]y.east)
{
\footnotesize
{
sigmoid:
}}
;
\node
[anchor=north east] (slabel) at ([xshift=0]flabel.south east)
{
\footnotesize
{
sum:
}}
;
\node
[anchor=west,inner sep=2pt] (flabel2) at (flabel.east)
{
\footnotesize
{$
f
(
s
)=
1
/(
1
+
e
^{
-
s
}
)
$}}
;
\node
[anchor=west,inner sep=2pt] (flabel3) at (slabel.east)
{
\footnotesize
{$
s
=
x
_
1
\cdot
w
+
b
$}}
;
\draw
[->,thick,dotted] ([yshift=-0.3em,xshift=-0.1em]n11.60) .. controls +(east:1) and +(west:2) .. ([xshift=-0.2em]flabel.west) ;
\begin{pgfonlayer}
{
background
}
\visible
<3->
{
\node
[rectangle,inner sep=0.2em,fill=blue!20,drop shadow=
{
shadow xshift=0.1em,shadow yshift=-0.1em
}
] [fit = (flabel) (flabel2) (flabel3)] (funcbox)
{}
;
}
\end{pgfonlayer}
}
\end{scope}
%% output illustration
\begin{scope}
[xshift=2.8in,yshift=0.1in]
\visible
<4->
{
\draw
[->,thick] (-2.2,0) -- (2.2,0);
\draw
[->,thick] (0,0) -- (0,2);
\draw
[-] (-0.05,1) -- (0.05,1);
\node
[anchor=east,inner sep=1pt] (label1) at (0,1)
{
\tiny
{
1
}}
;
\node
[anchor=south east,inner sep=1pt] (label2) at (0,0)
{
\tiny
{
0
}}
;
}
\visible
<4>
{
\draw
[-,very thick,ublue,domain=-2:2,samples=100] plot (
\x
,
{
1/(1+exp(-2*
\x
))
}
);
}
\visible
<5>
{
\draw
[-,very thick,ublue,domain=-2:2,samples=100] plot (
\x
,
{
1/(1+exp(-4*
\x
))
}
);
}
\visible
<6>
{
\draw
[-,very thick,ublue,rounded corners=0.1em] (-2,0) -- (0,0) -- (0,1) -- (2,1);
}
\visible
<7>
{
\draw
[-,very thick,ublue,rounded corners=0.1em] (-2,0) -- (0.25,0) -- (0.25,1) -- (2,1);
}
\visible
<8>
{
\draw
[-,very thick,ublue,rounded corners=0.1em] (-2,0) -- (0.5,0) -- (0.5,1) -- (2,1);
}
\visible
<9>
{
\draw
[-,very thick,ublue,rounded corners=0.1em] (-2,0) -- (0.5,0) -- (0.5,0.9) -- (2,0.9);
}
\visible
<10>
{
\draw
[-,very thick,ublue,rounded corners=0.1em] (-2,0) -- (0.5,0) -- (0.5,0.7) -- (2,0.7);
}
\visible
<11>
{
\draw
[-,very thick,ublue,rounded corners=0.1em] (-2,0) -- (0.5,0) -- (0.5,0.7) -- (0.7,0.7) -- (0.7,1.4) -- (2,1.4);
}
\visible
<12->
{
\draw
[-,very thick,ublue,rounded corners=0.1em] (-2,0) -- (0.5,0) -- (0.5,0.7) -- (0.7,0.7) -- (0.7,0) -- (2,0);
}
\visible
<15->
{
\draw
[-,very thick,ublue,rounded corners=0.1em] (-2,0) -- (0.7,0) -- (0.7,0.6) -- (0.9,0.6) -- (0.9,0) -- (2,0);
}
\visible
<14>
{
\draw
[->,dashed] (0.6,-0.05) -- (0.6,-0.96in);
}
\visible
<15->
{
\draw
[->,dashed] (0.8,-0.05) -- (0.8,-0.98in);
}
\visible
<4>
{
\node
[anchor=north west,align=left] (wblabel) at (-2,2)
{
\scriptsize
{$
w
_
1
=
1
$}
\\
[-0ex]
\scriptsize
{
\
$
b
_
1
=
0
$}}
;
}
\visible
<5>
{
\node
[anchor=north west,align=left] (wblabel) at (-2,2)
{
\alert
{
\scriptsize
{$
w
_
1
=
10
$}}
\\
[-0ex]
\scriptsize
{
\
$
b
_
1
=
0
$}}
;
}
\visible
<6>
{
\node
[anchor=north west,align=left] (wblabel) at (-2,2)
{
\alert
{
\scriptsize
{$
w
_
1
=
100
$}}
\\
[-0ex]
\scriptsize
{
\
$
b
_
1
=
0
$}}
;
}
\visible
<7>
{
\node
[anchor=north west,align=left] (wblabel) at (-2,2)
{
\scriptsize
{$
w
_
1
=
100
$}
\\
[-0ex]
\alert
{
\scriptsize
{
\
$
b
_
1
=-
2
$}}}
;
}
\visible
<8>
{
\node
[anchor=north west,align=left] (wblabel) at (-2,2)
{
\scriptsize
{$
w
_
1
=
100
$}
\\
[-0ex]
\alert
{
\scriptsize
{
\
$
b
_
1
=-
4
$}}}
;
}
\visible
<9>
{
\node
[anchor=north west,align=left] (wblabel) at (-2,2)
{
\alert
{
\scriptsize
{$
w'
_
1
=
0
.
9
$}}}
;
}
\visible
<10>
{
\node
[anchor=north west,align=left] (wblabel) at (-2,2)
{
\alert
{
\scriptsize
{$
w'
_
1
=
0
.
7
$}}}
;
}
\visible
<11>
{
\node
[anchor=north west,align=left] (wblabel) at (-2,2)
{
\alert
{
\scriptsize
{$
w
_
2
=
100
$}}
\\
[-0ex]
\alert
{
\scriptsize
{
\
$
b
_
2
=-
6
$}}
\\
[-0ex]
\alert
{
\scriptsize
{
\
$
w'
_
2
=
0
.
7
$}}}
;
}
\visible
<12>
{
\node
[anchor=north west,align=left] (wblabel) at (-2,2)
{
\scriptsize
{$
w
_
2
=
100
$}
\\
[-0ex]
\scriptsize
{
\
$
b
_
2
=-
6
$}
\\
[-0ex]
\alert
{
\scriptsize
{
\
$
w'
_
2
=-
0
.
7
$}}}
;
}
\visible
<13->
{
\node
[anchor=north west,align=left] (wblabel) at (-2.5,2)
{
\scriptsize
{
这是一个
}
\\
[-1ex]
\scriptsize
{
step function
}}
;
}
\end{scope}
\begin{scope}
[xshift=2.8in,yshift=-1.2in]
\visible
<13->
{
\draw
[->,thick] (-2.2,0) -- (2.2,0);
\draw
[->,thick] (0,0) -- (0,2);
\draw
[-,very thick,red,domain=-1.98:2,samples=100] plot (
\x
,
{
0.2 * (
\x
+0.4)
^
3 + 1.2 - 0.3 *(
\x
+ 0.8)
^
2
}
);
}
\visible
<14->
{
\foreach
\n
in
{
0.5
}{
\pgfmathsetmacro
{
\result
}{
0.2 * (
\n
+ 0.1 + 0.4)
^
3 + 1.2 - 0.3 *(
\n
+ 0.1 + 0.8)
^
2
}
%
\draw
[-,ublue,thick] (
\n
,0) -- (
\n
,
\result
) -- (
\n
+ 0.2,
\result
) -- (
\n
+ 0.2, 0);
}
}
\visible
<15->
{
\foreach
\n
in
{
0.7
}{
\pgfmathsetmacro
{
\result
}{
0.2 * (
\n
+ 0.1 + 0.4)
^
3 + 1.2 - 0.3 *(
\n
+ 0.1 + 0.8)
^
2
}
%
\draw
[-,ublue,thick] (
\n
,0) -- (
\n
,
\result
) -- (
\n
+ 0.2,
\result
) -- (
\n
+ 0.2, 0);
}
}
\visible
<16->
{
\foreach
\n
in
{
-1.9,-1.7,...,1.9
}{
\pgfmathsetmacro
{
\result
}{
0.2 * (
\n
+ 0.1 + 0.4)
^
3 + 1.2 - 0.3 *(
\n
+ 0.1 + 0.8)
^
2
}
%
\draw
[-,ublue,thick] (
\n
,0) -- (
\n
,
\result
) -- (
\n
+ 0.2,
\result
) -- (
\n
+ 0.2, 0);
}
}
\visible
<14>
{
\node
[anchor=north west,align=left] (wblabel) at (-2.5,2.5)
{
\scriptsize
{
函数的每一段都可
}
\\
[-1ex]
\scriptsize
{
由step function
}
\\
[-1ex]
\scriptsize
{
近似
}}
;
}
\visible
<15>
{
\node
[anchor=north west,align=left] (wblabel) at (-2.5,2.5)
{
\scriptsize
{
增加因层神经元
}
\\
[-1ex]
\scriptsize
{
可以拟合更多的
}
\\
[-1ex]
\scriptsize
{
部分
}}
;
}
\visible
<16>
{
\node
[anchor=north west,align=left] (wblabel) at (-2.5,2.5)
{
\scriptsize
{
理论上足够多的
}
\\
[-1ex]
\scriptsize
{
隐层神经元可以
}
\\
[-1ex]
\scriptsize
{
拟合
\alert
{
任意函数
}}}
;
}
\end{scope}
\end{scope}
\end{tikzpicture}
\end{center}
\end{frame}
%%%------------------------------------------------------------------------------------------------------------
\subsection
{
参数学习 - 反向传播
}
\end{CJK}
...
...
Section05-Neural-Networks-and-Language-Modeling/section05-test.vrb
deleted
100644 → 0
查看文件 @
1037ae94
\frametitle{线性变换(续)}
\begin{itemize}
\item 线性变换也适用于更加复杂的情况,这也给神经网络提供了拟合不同数据分布的能力
\begin{itemize}
\item 比如,我们可以把三维图形投影到二维平面上
\item 再比如,我们也可以把二维平面上的图形映射到三维平面上
\end{itemize}
\end{itemize}
\vspace{1em}
\newcommand{\plane}[1]{
(-1.95, #1, 1.35) --
++(3.6, 0.6, 0.0) --
++(0.3, -1.8, -2.7) --
++(-3.6, -0.6, -0.0) --
cycle}
\newcommand{\nullspacepicture}{
% bottom part of the row space line
\draw (0,0,0) -- (0.3,-1.8,1.233);
% five planes
\draw[fill=gray!20]\plane{-0.2};
\draw[fill=gray!20]\plane{0.2};
\draw[fill=blue!70!gray]\plane{0.6};
\draw[fill=gray!20]\plane{1};
\draw[fill=gray!20]\plane{1.4};
% top part of the row space line
\draw (-.094,.562,-.385) -- (-0.3,1.8,-1.233);
}
\newcommand{\rangepicture}[1]{
% axes
\draw[help lines,->] (-2,0) -- (2,0);
\draw[help lines,->] (0,-2) -- (0,2);
% the line and circles
\draw (1,-2) -- (-1,2);
\draw[fill=#1] (0,0) circle (2.5pt);
\draw[fill=gray!50] (0.2,-0.4) circle (2.5pt);
\draw[fill=gray!50] (0.4,-0.8) circle (2.5pt);
\draw[fill=gray!50] (-0.2,0.4) circle (2.5pt);
\draw[fill=gray!50] (-0.4,0.8) circle (2.5pt);
}
\begin{tikzpicture}[scale=0.95]
\centering
\nullspacepicture
% the label
\node at (-2,1.8) {$\mathbb{R}^3$};
% arrow between diagrams
\path[->] (3,0) edge[bend left] node[above] {线性变换} (4.5,0);
\begin{scope}[xshift=7cm]
\rangepicture{blue!70!gray}
\node at (1.8,1.8) {$\mathbb{R}^2$};
\end{scope}
\end{tikzpicture}
Section05-Neural-Networks-and-Language-Modeling/section05.tex
查看文件 @
caac3b7a
...
...
@@ -699,7 +699,7 @@ GPT-2 (Transformer) & Radford et al. & 2019 & \alert{35.7}
\end{frame}
%%%------------------------------------------------------------------------------------------------------------
\subsection
{
数学基础:张量计算
}
\subsection
{
人工神经元
}
%%%------------------------------------------------------------------------------------------------------------
%%% 人工神经元的函数形式
...
...
@@ -865,6 +865,8 @@ GPT-2 (Transformer) & Radford et al. & 2019 & \alert{35.7}
\item
对于向量
$
\textbf
{
x
}
\in
\mathbb
{
R
}^
m
$
,一层神经网络首先把他经过
\textbf
{
\alert
{
线性变换
}}
映射到
$
\mathbb
{
R
}^
m
$
,之后经过
\textbf
{{
\color
{
blue
}
激活函数
}}
变换成
$
\textbf
{
y
}
\in
\mathbb
{
R
}^
n
$
\end{itemize}
\vspace
{
1em
}
\begin{center}
\begin{tikzpicture}
...
...
@@ -890,10 +892,19 @@ GPT-2 (Transformer) & Radford et al. & 2019 & \alert{35.7}
\end{pgfonlayer}
\end{tikzpicture}
\end{center}
\begin{figure}
[htp!]
\includegraphics
[scale=0.24]
{
./Figures/wf.png
}
% \begin{tikzpicture}
% \node [rectangle,inner sep=0.2em,fill=red!20] [fit = (x) (w) (b)] (linear) {};
% \node [anchor=north] (linearlabel) at ([yshift=-1.1em]linear.south) {\alert{线性变换}}
\end{figure}
\tikz
{
\node
() at (0,0)
{}
;
\node
() at (0,10)
{}
;
}
\end{center}
\end{frame}
%%%------------------------------------------------------------------------------------------------------------
%%% 线性变换
\begin{frame}
{
线性变换
}
...
...
@@ -906,7 +917,7 @@ T(\alpha \textbf{a}) & = & \alpha T(\textbf{a}) \nonumber
\item
<2-> 线性变换的一种几何解释:
\end{itemize}
\vspace
{
-
1
em
}
\vspace
{
-
0.5
em
}
\visible
<2->
{
\begin{center}
\begin{tikzpicture}
...
...
@@ -917,32 +928,191 @@ T(\alpha \textbf{a}) & = & \alpha T(\textbf{a}) \nonumber
\node
[anchor=west] (plus) at (w.east)
{
\Large
{$
+
$}}
;
\node
[anchor=west] (b) at (plus.east)
{
\Large
{$
\textbf
{
b
}$}}
;
\tikzstyle
{
neuron
}
= [rectangle,draw,thick,fill=red!30,red!35,minimum height=2em,minimum width=2em,font=
\small
]
\node
[neuron,anchor=north]
(a1) at ([xshift=-6em,yshift=-4em]x.south)
{}
;
\draw
[->,thick]
([xshift=-2em,yshift=0em]a1.south) to ([xshift=3em,yshift=0em]a1.south);
\draw
[->,thick]
([xshift=0em,yshift=-4em]a1.west) to ([xshift=0em,yshift=2em]a1.west);
\node
[below]
at ([xshift=0.5em,yshift=-1em]a1.west)
{
0
}
;
\node
[below]
at ([xshift=2em,yshift=-1em]a1.west)
{
1
}
;
\node
[below]
at ([xshift=-0.5em,yshift=2em]a1.west)
{
1
}
;
\node
[anchor=west] (x) at ([xshift=-0.7em,yshift=1em]a1.south)
{
\Large
{$
\textbf
{
F
}$}}
;
\visible
<3->
{
\node
[anchor=center,fill=green!20] (w2) at (w)
{
\Large
{$
\textbf
{
w
}$}}
;
\node
[anchor=north,inner sep=1pt] (wlabel) at ([yshift=-0.7em]w.south)
{
\small
{
旋转(rotation)
}}
;
\draw
[<-] ([yshift=-0.2em]w2.south) -- (wlabel.north);
\tikzstyle
{
neuron
}
= [rectangle,draw,thick,fill=red!30,red!35,minimum height=2em,minimum width=2em,font=
\small
]
\node
[neuron,anchor=north]
(a2) at ([xshift=10em,yshift=0em]a1.south)
{}
;
\draw
[->,thick]
([xshift=-2em,yshift=0em]a2.north) to ([xshift=3em,yshift=0em]a2.north);
\draw
[->,thick]
([xshift=0em,yshift=-2em]a2.west) to ([xshift=0em,yshift=4em]a2.west);
\node
[above]
at ([xshift=0.5em,yshift=1em]a2.west)
{
0
}
;
\node
[above]
at ([xshift=2em,yshift=1em]a2.west)
{
1
}
;
\node
[below]
at ([xshift=-0.5em,yshift=0em]a2.west)
{
-1
}
;
\node
[anchor=west] (x) at ([xshift=-3.5cm,yshift=2em]a2.north)
{
\scriptsize
{
$
w
=
\begin
{
bmatrix
}
1
&
0
&
0
\\
0
&
-
1
&
0
\\
0
&
0
&
1
\end
{
bmatrix
}$}
}
;
\node
[anchor=west,rotate = 180] (x) at ([xshift=0.7em,yshift=1em]a2.south)
{
\Large
{$
\textbf
{
F
}$}}
;
\draw
[-stealth, line width=2pt,dashed]
([xshift=4em,yshift=0em]a1.south) to ([xshift=-3em,yshift=0em]a2.north);
}
\visible
<4->
{
\node
[anchor=center,fill=purple!20] (b2) at (b)
{
\Large
{$
\textbf
{
b
}$}}
;
\node
[anchor=west] (blabel) at ([xshift=1.5em]b2.east)
{
平移(shift)
}
;
\draw
[<-] ([xshift=0.2em]b2.east) -- (blabel.west);
\tikzstyle
{
neuron
}
= [rectangle,draw,thick,fill=red!30,red!35,minimum height=2em,minimum width=2em,font=
\small
]
\node
[neuron,anchor=north]
(a3) at ([xshift=11em,yshift=2.05em]a2.south)
{}
;
\draw
[->,thick]
([xshift=-3em,yshift=0em]a3.north) to ([xshift=2em,yshift=0em]a3.north);
\draw
[->,thick]
([xshift=-1em,yshift=-2em]a3.west) to ([xshift=-1em,yshift=4em]a3.west);
\node
[above]
at ([xshift=-0.5em,yshift=1em]a3.west)
{
0
}
;
\node
[above]
at ([xshift=1em,yshift=1em]a3.west)
{
1
}
;
\node
[left]
at ([xshift=-0.75em,yshift=-0.5em]a3.west)
{
-1
}
;
\node
[anchor=west,rotate = 180] (x) at ([xshift=0.7em,yshift=1em]a3.south)
{
\Large
{$
\textbf
{
F
}$}}
;
\node
[anchor=west] (x) at ([xshift=-4cm,yshift=2em]a3.north)
{
\scriptsize
{
$
b
=
\begin
{
bmatrix
}
0
.
5
&
0
&
0
\\
0
&
0
&
0
\\
0
&
0
&
0
\end
{
bmatrix
}$}
}
;
\draw
[-stealth, line width=2pt,dashed]
([xshift=3em,yshift=1em]a2.east) to ([xshift=-3em,yshift=1em]a3.west);
}
\end{tikzpicture}
\end{center}
}
\end{frame}
%%%------------------------------------------------------------------------------------------------------------
%%% 线性变换:更复杂的实例
\begin{frame}
{
线性变换(续)
}
\begin{frame}
[fragile]
{
线性变换(续)
}
\begin{itemize}
\item
线性变换也适用于更加复杂的情况,这也给神经网络提供了拟合不同数据分布的能力
\begin{itemize}
\item
比如,我们可以把三维图形投影到二维平面上
\item
再比如,我们也可以把二维平面上的图形映射到三维平面
\end{itemize}
\end{itemize}
\begin{tiny}
$$
\begin
{
smallmatrix
}
\underbrace
{
\left\{
\begin
{
smallmatrix
}
\left
[
\begin
{
array
}{
cccc
}
1
&
0
&
0
\\
0
&
1
&
0
\\
0
&
0
&
1
\end
{
array
}
\right
]
\cdots
\left
[
\begin
{
array
}{
cccc
}
1
&
0
&
0
\\
0
&
1
&
0
\\
0
&
0
&
1
\end
{
array
}
\right
]
\end
{
smallmatrix
}
\right\}
}
\\
5
\end
{
smallmatrix
}
\times
\begin
{
smallmatrix
}
\left
[
\begin
{
array
}{
cccc
}
1
\\
1
\\
1
\end
{
array
}
\right
]
\end
{
smallmatrix
}
=
\begin
{
smallmatrix
}
\underbrace
{
\left\{
\begin
{
smallmatrix
}
\left
[
\begin
{
array
}{
cccc
}
1
\\
1
\\
1
\end
{
array
}
\right
]
\cdots
\left
[
\begin
{
array
}{
cccc
}
1
\\
1
\\
1
\end
{
array
}
\right
]
\end
{
smallmatrix
}
\right\}
}
\\
5
\end
{
smallmatrix
}
$$
\end{tiny}
%\vspace{1em}
\newcommand
{
\plane
}
[1]
{
(-1.95, #1, 1.35) --
++(3.6, 0.6, 0.0) --
++(0.3, -1.8, -2.7) --
++(-3.6, -0.6, -0.0) --
cycle
}
\newcommand
{
\nullspacepicture
}{
% bottom part of the row space line
\draw
(0,0,0) -- (0.3,-1.8,1.233);
% five planes
\draw
[fill=gray!20]
\plane
{
-0.2
}
;
\draw
[fill=gray!20]
\plane
{
0.2
}
;
\draw
[fill=blue!70!gray]
\plane
{
0.6
}
;
\draw
[fill=gray!20]
\plane
{
1
}
;
\draw
[fill=gray!20]
\plane
{
1.4
}
;
% top part of the row space line
\draw
(-.094,.562,-.385) -- (-0.3,1.8,-1.233);
}
\newcommand
{
\rangepicture
}
[1]
{
% axes
\draw
[help lines,->]
(-2,0) -- (2,0);
\draw
[help lines,->]
(0,-2) -- (0,2);
% the line and circles
\draw
(1,-2) -- (-1,2);
\draw
[fill=#1]
(0,0) circle (2.5pt);
\draw
[fill=gray!50]
(0.2,-0.4) circle (2.5pt);
\draw
[fill=gray!50]
(0.4,-0.8) circle (2.5pt);
\draw
[fill=gray!50]
(-0.2,0.4) circle (2.5pt);
\draw
[fill=gray!50]
(-0.4,0.8) circle (2.5pt);
}
\begin{tikzpicture}
[scale=0.95]
\centering
\nullspacepicture
% the label
\node
at (-2,1.8)
{$
\mathbb
{
R
}^
3
$}
;
% arrow between diagrams
\path
[->]
(3,0) edge[bend left] node[above]
{
线性变换
}
(4.5,0);
\begin{scope}
[xshift=7cm]
\rangepicture
{
blue!70!gray
}
\node
at (1.8,1.8)
{$
\mathbb
{
R
}^
2
$}
;
\end{scope}
\end{tikzpicture}
\end{frame}
%%%------------------------------------------------------------------------------------------------------------
%%% 激活函数
\begin{frame}
{
激活函数
}
...
...
@@ -1013,19 +1183,419 @@ T(\alpha \textbf{a}) & = & \alpha T(\textbf{a}) \nonumber
\end{frame}
%%%------------------------------------------------------------------------------------------------------------
%%% 常用的激活函数
\begin{frame}
{
常用的激活函数
}
\begin{itemize}
\item
好多好多,列举不全 ...
\end{itemize}
\begin{figure}
\centering
\subfigure
[softplus]
{
\centering
\begin{minipage}
{
.2
\textwidth
}
\begin{tikzpicture}
\draw
[->]
(-1.2,0)--(1.2,0)node[left,below,font=
\tiny
]
{$
x
$}
;
\draw
[->]
(0,-1.2)--(0,1.2)node[right,font=
\tiny
]
{$
y
$}
;
\foreach
\x
in
{
-1,-0.5,0,0.5,1
}{
\draw
(
\x
,0)--(
\x
,0.05)node[below,outer sep=2pt,font=
\tiny
]at(
\x
,0)
{
\x
}
;
}
\foreach
\y
in
{
1,0.5
}{
\draw
(0,
\y
)--(0.05,
\y
)node[left,outer sep=2pt,font=
\tiny
]at(0,
\y
)
{
\y
}
;
}
\draw
[color=red ,domain=-1.2:1]
plot(
\x
,
{
ln(1+(exp(
\x
))
}
)node[right,black]
{
\tiny
$
y
=
ln
(
1
+
e
^
x
)
$}
;
\end{tikzpicture}
\end{minipage}
%
}
\hfill
\subfigure
[sigmoid]
{
\centering
\begin{minipage}
{
.2
\textwidth
}
\begin{tikzpicture}
\draw
[->]
(-1.2,0)--(1.2,0)node[left,below,font=
\tiny
]
{$
x
$}
;
\draw
[->]
(0,-1.2)--(0,1.2)node[right,font=
\tiny
]
{$
y
$}
;
\draw
[dashed]
(-1.2,1)--(1.2,1);
\foreach
\x
in
{
-1,-0.5,0,0.5,1
}{
\draw
(
\x
,0)--(
\x
,0.05)node[below,outer sep=2pt,font=
\tiny
]at(
\x
,0)
{
\x
}
;
}
\foreach
\y
in
{
0.5,1
}{
\draw
(0,
\y
)--(0.05,
\y
)node[left,outer sep=2pt,font=
\tiny
]at(0,
\y
)
{
\y
}
;
}
\draw
[color=red ,domain=-1.2:1.2]
plot(
\x
,
{
1/(1+(exp(-1*(
\x
))))
}
)node[right,black]
{
\tiny
$
y
=
\frac
{
1
}{
1
+
e
^{
-
x
}}$}
;
\node
[black,anchor=south]
at (0,1.2)
{
\tiny
$
y
=
\frac
{
1
}{
1
+
e
^{
-
x
}}$}
;
\end{tikzpicture}
\end{minipage}
%
}
\hfill
\subfigure
[tanh]
{
\centering
\begin{minipage}
{
.2
\textwidth
}
\begin{tikzpicture}
\draw
[->]
(-1.2,0)--(1.2,0)node[left,below,font=
\tiny
]
{$
x
$}
;
\draw
[->]
(0,-1.2)--(0,1.2)node[right,font=
\tiny
]
{$
y
$}
;
\draw
[dashed]
(-1.2,1)--(1.2,1);
\draw
[dashed]
(-1.2,-1)--(1.2,-1);
\foreach
\x
in
{
-1,-0.5,0,0.5,1
}{
\draw
(
\x
,0)--(
\x
,0.05)node[below,outer sep=2pt,font=
\tiny
]at(
\x
,0)
{
\x
}
;
}
\foreach
\y
in
{
0.5,1
}{
\draw
(0,
\y
)--(0.05,
\y
)node[left,outer sep=2pt,font=
\tiny
]at(0,
\y
)
{
\y
}
;
}
\draw
[color=red ,domain=-1.2:1.2]
plot(
\x
,
{
tanh(
\x
)
}
)node[below,black]
{
\tiny
$
y
=
\frac
{
e
^{
x
}
-
e
^{
-
x
}}{
e
^{
x
}
+
e
^{
-
x
}}$}
;
\end{tikzpicture}
\end{minipage}
}
\end{figure}
\begin{figure}
\centering
\subfigure
[relu]
{
\centering
\begin{minipage}
{
.2
\textwidth
}
\begin{tikzpicture}
\draw
[->]
(-1.2,0)--(1.2,0)node[left,below,font=
\tiny
]
{$
x
$}
;
\draw
[->]
(0,-1.2)--(0,1.2)node[right,font=
\tiny
]
{$
y
$}
;
\draw
[dashed]
(-1.2,1)--(1.2,1);
\draw
[dashed]
(-1.2,-1)--(1.2,-1);
\foreach
\x
in
{
-1,-0.5,0,0.5,1
}{
\draw
(
\x
,0)--(
\x
,0.05)node[below,outer sep=2pt,font=
\tiny
]at(
\x
,0)
{
\x
}
;
}
\foreach
\y
in
{
0.5,1
}{
\draw
(0,
\y
)--(0.05,
\y
)node[left,outer sep=2pt,font=
\tiny
]at(0,
\y
)
{
\y
}
;
}
\draw
[color=red ,domain=-1.2:1.2]
plot(
\x
,
{
max(
\x
,0)
}
)node[right,black]
{
\tiny
$
y
=
\max
(
0
, x
)
$}
;
\end{tikzpicture}
\end{minipage}
%
}
\hfill
\subfigure
[gaussian]
{
\centering
\begin{minipage}
{
.2
\textwidth
}
\begin{tikzpicture}
\draw
[->]
(-1.2,0)--(1.2,0)node[left,below,font=
\tiny
]
{$
x
$}
;
\draw
[->]
(0,-1.2)--(0,1.2)node[right,font=
\tiny
]
{$
y
$}
;
\draw
[dashed]
(-1.2,1)--(1.2,1);
\foreach
\x
in
{
-1,-0.5,0,0.5,1
}{
\draw
(
\x
,0)--(
\x
,0.05)node[below,outer sep=2pt,font=
\tiny
]at(
\x
,0)
{
\x
}
;
}
\foreach
\y
in
{
0.5,1
}{
\draw
(0,
\y
)--(0.05,
\y
)node[left,outer sep=2pt,font=
\tiny
]at(0,
\y
)
{
\y
}
;
}
\draw
[color=red ,domain=-1.2:1.2]
plot(
\x
,
{
exp(-1*((
\x
)
^
2))
}
)node[right,black]
{
\tiny
$
y
=
e
^{
-
x
^
2
}$}
;
\end{tikzpicture}
\end{minipage}
%
}
\hfill
\subfigure
[identity]
{
\centering
\begin{minipage}
{
.2
\textwidth
}
\begin{tikzpicture}
\draw
[->]
(-1.2,0)--(1.2,0)node[left,below,font=
\tiny
]
{$
x
$}
;
\draw
[->]
(0,-1.2)--(0,1.2)node[right,font=
\tiny
]
{$
y
$}
;
\foreach
\x
in
{
-1,-0.5,0,0.5,1
}{
\draw
(
\x
,0)--(
\x
,0.05)node[below,outer sep=2pt,font=
\tiny
]at(
\x
,0)
{
\x
}
;
}
\foreach
\y
in
{
0.5,1
}{
\draw
(0,
\y
)--(0.05,
\y
)node[left,outer sep=2pt,font=
\tiny
]at(0,
\y
)
{
\y
}
;
}
\draw
[color=red ,domain=-1:1]
plot(
\x
,
\x
)node[right,black]
{
\tiny
$
y
=
x
$}
;
\end{tikzpicture}
\end{minipage}
}
\end{figure}
\end{frame}
%%%------------------------------------------------------------------------------------------------------------
\subsection
{
多层神经网络
}
%%%------------------------------------------------------------------------------------------------------------
%%% 一层 -> 多层
\begin{frame}
{
更多的层
}
\begin{itemize}
\item
\textbf
{
单层神经网络
}
:线性变换 + 激活函数(非线性)
\item
我们可以重复上面的过程,构建
\textbf
{
多层神经网络
}
\end{itemize}
\vspace
{
-1.0em
}
\begin{center}
\begin{tikzpicture}
\begin{scope}
[]
\def\neuronsep
{
1.6
}
\tikzstyle
{
neuronnode
}
= [minimum size=1.7em,circle,draw,ublue,very thick,inner sep=1pt, fill=white,align=center,drop shadow=
{
shadow xshift=0.1em,shadow yshift=-0.1em
}
]
%%% layer 1
\foreach
\n
in
{
1,...,5
}{
\node
[neuronnode] (neuron0
\n
) at (
\n
*
\neuronsep
,0)
{
\tiny
{$
f
_
1
$}
\\
[-1ex]
\tiny
{$
\sum
$}}
;
\draw
[-,ublue] (neuron0
\n
.east) -- (neuron0
\n
.west);
}
\foreach
\n
in
{
1,...,5
}{
\foreach
\m
in
{
1,...,5
}{
\draw
[<-] (neuron0
\m
.south) -- ([yshift=-2em]neuron0
\n
.south);
}
\node
[anchor=north] (x
\n
) at ([yshift=-2em]neuron0
\n
.south)
{$
x
_
\n
$}
;
\visible
<1>
{
\draw
[<-,thick] ([yshift=1.5em]neuron0
\n
.north) -- (neuron0
\n
.north);
\node
[anchor=south] (y
\n
) at ([yshift=1.5em]neuron0
\n
.north)
{$
y
_
\n
$}
;
}
}
\node
[anchor=west] (w1label) at ([xshift=-0.5em,yshift=0.5em]x5.north east)
{$
\textbf
{
w
}_
1
$}
;
\begin{pgfonlayer}
{
background
}
\node
[rectangle,inner sep=0.2em,fill=red!20] [fit = (neuron01) (neuron05)] (layer01)
{}
;
\end{pgfonlayer}
\node
[anchor=west] (layer00label) at ([xshift=1.25em]x5.east)
{
\alert
{
输入层
}}
;
\visible
<2->
{
\node
[anchor=west] (layer01label) at ([xshift=1em]layer01.east)
{
第二层
}
;
}
\visible
<4->
{
\node
[anchor=west] (layer01label2) at (layer01label.east)
{
(
\alert
{
隐层
}
)
}
;
}
%%% layer 2
\visible
<2->
{
\foreach
\n
in
{
2,...,4
}{
\node
[neuronnode] (neuron1
\n
) at (
\n
*
\neuronsep
,4em)
{
\tiny
{$
f
_
2
$}
\\
[-1ex]
\tiny
{$
\sum
$}}
;
\draw
[-,ublue] (neuron1
\n
.east) -- (neuron1
\n
.west);
}
\foreach
\n
in
{
2,...,4
}{
\foreach
\m
in
{
1,...,5
}{
\draw
[<-] (neuron1
\n
.south) -- (neuron0
\m
.north);
}
\visible
<2>
{
\draw
[<-,thick] ([yshift=1.5em]neuron1
\n
.north) -- (neuron1
\n
.north);
\node
[anchor=south] (y
\n
) at ([yshift=1.5em]neuron1
\n
.north)
{$
y
_
\n
$}
;
}
}
\node
[anchor=west] (w2label) at ([xshift=-2.5em,yshift=5.0em]x5.north east)
{$
\textbf
{
w
}_
2
$}
;
\begin{pgfonlayer}
{
background
}
\visible
<2->
{
\node
[rectangle,inner sep=0.2em,fill=ugreen!20] [fit = (neuron12) (neuron14)] (layer02)
{}
;
}
\end{pgfonlayer}
\node
[anchor=west] (layer02label) at ([xshift=4.9em]layer02.east)
{
第三层
}
;
\visible
<4->
{
\node
[anchor=west] (layer02label2) at (layer02label.east)
{
(
\alert
{
隐层
}
)
}
;
}
}
%%% layer 3
\visible
<3->
{
\foreach
\n
in
{
1,...,5
}{
\node
[neuronnode] (neuron2
\n
) at (
\n
*
\neuronsep
,8em)
{
\tiny
{$
f
_
3
$}
\\
[-1ex]
\tiny
{$
\sum
$}}
;
\draw
[-,ublue] (neuron2
\n
.east) -- (neuron2
\n
.west);
}
\foreach
\n
in
{
1,...,5
}{
\foreach
\m
in
{
2,...,4
}{
\draw
[<-] (neuron2
\n
.south) -- (neuron1
\m
.north);
}
\node
[anchor=south] (y
\n
) at ([yshift=1.5em]neuron2
\n
.north)
{$
y
_
\n
$}
;
\draw
[<-,thick] ([yshift=1.5em]neuron2
\n
.north) -- (neuron2
\n
.north);
}
\node
[anchor=west] (w3label) at ([xshift=-2.5em,yshift=8.5em]x5.north east)
{$
\textbf
{
w
}_
3
$}
;
\begin{pgfonlayer}
{
background
}
\visible
<3->
{
\node
[rectangle,inner sep=0.2em,fill=blue!20] [fit = (neuron21) (neuron25)] (layer03)
{}
;
}
\end{pgfonlayer}
\node
[anchor=west] (layer03label) at ([xshift=1em]layer03.east)
{
第四层
}
;
\visible
<4->
{
\node
[anchor=west] (layer03label2) at (layer03label.east)
{
(
\alert
{
输出层
}
)
}
;
}
}
\end{scope}
\end{tikzpicture}
\end{center}
\end{frame}
%%%------------------------------------------------------------------------------------------------------------
%%% 两层神经网络可以逼近任何函数
\begin{frame}
{
多层神经网络可以逼近任意函数
}
\begin{itemize}
\item
以一个简单的三层网络为例(隐层激活函数:sigmoid)
\end{itemize}
\begin{center}
\begin{tikzpicture}
%% a two-layer neural network
\begin{scope}
\tikzstyle
{
neuronnode
}
= [minimum size=1.7em,circle,draw,ublue,very thick,inner sep=1pt, fill=white,align=center,drop shadow=
{
shadow xshift=0.1em,shadow yshift=-0.1em
}
]
%% input and hidden layers
\node
[neuronnode] (n10) at (0,0)
{
\tiny
{$
f
$}
\\
[-1ex]
\tiny
{$
\sum
$}}
;
\node
[neuronnode] (n11) at (1.5,0)
{
\tiny
{$
f
$}
\\
[-1ex]
\tiny
{$
\sum
$}}
;
\draw
[-,ublue] (n10.west) -- (n10.east);
\draw
[-,ublue] (n11.west) -- (n11.east);
\node
[anchor=north] (x1) at ([yshift=-6em]n11.south)
{$
x
_
1
$}
;
\node
[anchor=north] (b) at ([yshift=-6em]n10.south)
{$
b
$}
;
\visible
<1-10>
{
\draw
[->,thick] (b.north) -- ([yshift=-0.1em]n10.south);
\draw
[->,thick] (x1.north) -- ([yshift=-0.1em]n10.290);
}
\visible
<1>
{
\draw
[->,thick] (b.north) -- ([yshift=-0.1em]n11.250);
\draw
[->,thick] (x1.north) -- ([yshift=-0.1em]n11.south);
}
\visible
<11->
{
\draw
[->,thick,red] (b.north) -- ([yshift=-0.1em]n10.south);
\draw
[->,thick,ugreen] (x1.north) -- ([yshift=-0.1em]n10.290);
}
\visible
<2->
{
\draw
[->,thick,blue] (b.north) -- ([yshift=-0.1em]n11.250);
\draw
[->,thick,purple] (x1.north) -- ([yshift=-0.1em]n11.south);
}
\visible
<15->
{
\node
[neuronnode] (n12) at (2.7,0)
{
\tiny
{$
f
$}
\\
[-1ex]
\tiny
{$
\sum
$}}
;
\node
[neuronnode] (n13) at (3.8,0)
{
\tiny
{$
f
$}
\\
[-1ex]
\tiny
{$
\sum
$}}
;
\draw
[-,ublue] (n12.west) -- (n12.east);
\draw
[-,ublue] (n13.west) -- (n13.east);
\draw
[->,thick] (b.north) -- ([yshift=-0.1em]n12.250);
\draw
[->,thick] (x1.north) -- ([yshift=-0.1em]n12.270);
\draw
[->,thick] (b.north) -- ([yshift=-0.1em]n13.230);
\draw
[->,thick] (x1.north) -- ([yshift=-0.1em]n13.250);
}
\visible
<16->
{
\node
[anchor=west] (morenodes) at (n13.east)
{
...
}
;
}
%% output layers
\node
[neuronnode] (n20) at (0.75,5em)
{
\scriptsize
{$
\sum
$}}
;
\visible
<1-10>
{
\draw
[->,thick] ([yshift=0.1em]n10.north) -- ([yshift=-0.1em]n20.250);
}
\visible
<1-8>
{
\draw
[->,thick] ([yshift=0.1em]n11.north) -- ([yshift=-0.1em]n20.290);
}
\visible
<11->
{
\draw
[->,thick,brown] ([yshift=0.1em]n10.north) -- ([yshift=-0.1em]n20.250);
}
\visible
<9->
{
\draw
[->,thick,orange] ([yshift=0.1em]n11.north) -- ([yshift=-0.1em]n20.290);
}
\node
[] (y) at ([yshift=3em]n20.north)
{$
y
$}
;
\draw
[->,thick] ([yshift=0.1em]n20.north) -- (y.south);
\visible
<15->
{
\draw
[->,thick] ([yshift=0.1em]n12.north) -- ([yshift=-0.1em]n20.310);
\draw
[->,thick] ([yshift=0.1em]n13.north) -- ([yshift=-0.1em]n20.330);
}
%% weight and bias
\visible
<11->
{
\node
[anchor=center,rotate=90,fill=white,inner sep=1pt] (b0) at ([yshift=3em,xshift=-0.5em]b.north)
{
\tiny
{$
b
=-
6
$}}
;
}
\visible
<11->
{
\node
[anchor=center,rotate=-59,fill=white,inner sep=1pt] (w2) at ([yshift=1.2em,xshift=-1.2em]x1.north)
{
\tiny
{$
w
=
100
$}}
;
}
\visible
<2-6>
{
\node
[anchor=center,rotate=59,fill=white,inner sep=1pt] (b1) at ([yshift=4.9em,xshift=2.2em]b.north)
{
\tiny
{$
b
=
0
$}}
;
}
\visible
<7>
{
\node
[anchor=center,rotate=59,fill=white,inner sep=1pt] (b1) at ([yshift=4.9em,xshift=2.2em]b.north)
{
\tiny
{$
b
=-
2
$}}
;
}
\visible
<8->
{
\node
[anchor=center,rotate=59,fill=white,inner sep=1pt] (b1) at ([yshift=4.9em,xshift=2.2em]b.north)
{
\tiny
{$
b
=-
4
$}}
;
}
\visible
<2-4>
{
\node
[anchor=center,rotate=90,fill=white,inner sep=1pt] (w1) at ([yshift=3em,xshift=0.5em]x1.north)
{
\tiny
{$
w
=
1
$}}
;
}
\visible
<5>
{
\node
[anchor=center,rotate=90,fill=white,inner sep=1pt] (w1) at ([yshift=3em,xshift=0.5em]x1.north)
{
\tiny
{$
w
=
10
$}}
;
}
\visible
<6->
{
\node
[anchor=center,rotate=90,fill=white,inner sep=1pt] (w1) at ([yshift=3em,xshift=0.5em]x1.north)
{
\tiny
{$
w
=
100
$}}
;
}
\visible
<11>
{
\node
[anchor=center,rotate=62,fill=white,inner sep=1pt] (w21) at ([yshift=2em,xshift=0.5em]n10.north)
{
\tiny
{$
w'
=
0
.
7
$}}
;
}
\visible
<12->
{
\node
[anchor=center,rotate=62,fill=white,inner sep=1pt] (w21) at ([yshift=2em,xshift=0.5em]n10.north)
{
\tiny
{$
w'
=-
0
.
7
$}}
;
}
\visible
<2-8>
{
\node
[anchor=center,rotate=-62,fill=white,inner sep=1pt] (w22) at ([yshift=2em,xshift=-0.5em]n11.north)
{
\tiny
{$
w'
=
1
$}}
;
}
\visible
<9>
{
\node
[anchor=center,rotate=-62,fill=white,inner sep=1pt] (w22) at ([yshift=2em,xshift=-0.5em]n11.north)
{
\tiny
{$
w'
=
0
.
9
$}}
;
}
\visible
<10->
{
\node
[anchor=center,rotate=-62,fill=white,inner sep=1pt] (w22) at ([yshift=2em,xshift=-0.5em]n11.north)
{
\tiny
{$
w'
=
0
.
7
$}}
;
}
%% sigmoid box
\begin{scope}
\visible
<3->
{
\node
[anchor=west] (flabel) at ([xshift=1.2in]y.east)
{
\footnotesize
{
sigmoid:
}}
;
\node
[anchor=north east] (slabel) at ([xshift=0]flabel.south east)
{
\footnotesize
{
sum:
}}
;
\node
[anchor=west,inner sep=2pt] (flabel2) at (flabel.east)
{
\footnotesize
{$
f
(
s
)=
1
/(
1
+
e
^{
-
s
}
)
$}}
;
\node
[anchor=west,inner sep=2pt] (flabel3) at (slabel.east)
{
\footnotesize
{$
s
=
x
_
1
\cdot
w
+
b
$}}
;
\draw
[->,thick,dotted] ([yshift=-0.3em,xshift=-0.1em]n11.60) .. controls +(east:1) and +(west:2) .. ([xshift=-0.2em]flabel.west) ;
\begin{pgfonlayer}
{
background
}
\visible
<3->
{
\node
[rectangle,inner sep=0.2em,fill=blue!20,drop shadow=
{
shadow xshift=0.1em,shadow yshift=-0.1em
}
] [fit = (flabel) (flabel2) (flabel3)] (funcbox)
{}
;
}
\end{pgfonlayer}
}
\end{scope}
%% output illustration
\begin{scope}
[xshift=2.8in,yshift=0.1in]
\visible
<4->
{
\draw
[->,thick] (-2.2,0) -- (2.2,0);
\draw
[->,thick] (0,0) -- (0,2);
\draw
[-] (-0.05,1) -- (0.05,1);
\node
[anchor=east,inner sep=1pt] (label1) at (0,1)
{
\tiny
{
1
}}
;
\node
[anchor=south east,inner sep=1pt] (label2) at (0,0)
{
\tiny
{
0
}}
;
}
\visible
<4>
{
\draw
[-,very thick,ublue,domain=-2:2,samples=100] plot (
\x
,
{
1/(1+exp(-2*
\x
))
}
);
}
\visible
<5>
{
\draw
[-,very thick,ublue,domain=-2:2,samples=100] plot (
\x
,
{
1/(1+exp(-4*
\x
))
}
);
}
\visible
<6>
{
\draw
[-,very thick,ublue,rounded corners=0.1em] (-2,0) -- (0,0) -- (0,1) -- (2,1);
}
\visible
<7>
{
\draw
[-,very thick,ublue,rounded corners=0.1em] (-2,0) -- (0.25,0) -- (0.25,1) -- (2,1);
}
\visible
<8>
{
\draw
[-,very thick,ublue,rounded corners=0.1em] (-2,0) -- (0.5,0) -- (0.5,1) -- (2,1);
}
\visible
<9>
{
\draw
[-,very thick,ublue,rounded corners=0.1em] (-2,0) -- (0.5,0) -- (0.5,0.9) -- (2,0.9);
}
\visible
<10>
{
\draw
[-,very thick,ublue,rounded corners=0.1em] (-2,0) -- (0.5,0) -- (0.5,0.7) -- (2,0.7);
}
\visible
<11>
{
\draw
[-,very thick,ublue,rounded corners=0.1em] (-2,0) -- (0.5,0) -- (0.5,0.7) -- (0.7,0.7) -- (0.7,1.4) -- (2,1.4);
}
\visible
<12->
{
\draw
[-,very thick,ublue,rounded corners=0.1em] (-2,0) -- (0.5,0) -- (0.5,0.7) -- (0.7,0.7) -- (0.7,0) -- (2,0);
}
\visible
<15->
{
\draw
[-,very thick,ublue,rounded corners=0.1em] (-2,0) -- (0.7,0) -- (0.7,0.6) -- (0.9,0.6) -- (0.9,0) -- (2,0);
}
\visible
<14>
{
\draw
[->,dashed] (0.6,-0.05) -- (0.6,-0.96in);
}
\visible
<15->
{
\draw
[->,dashed] (0.8,-0.05) -- (0.8,-0.98in);
}
\visible
<4>
{
\node
[anchor=north west,align=left] (wblabel) at (-2,2)
{
\scriptsize
{$
w
_
1
=
1
$}
\\
[-0ex]
\scriptsize
{
\
$
b
_
1
=
0
$}}
;
}
\visible
<5>
{
\node
[anchor=north west,align=left] (wblabel) at (-2,2)
{
\alert
{
\scriptsize
{$
w
_
1
=
10
$}}
\\
[-0ex]
\scriptsize
{
\
$
b
_
1
=
0
$}}
;
}
\visible
<6>
{
\node
[anchor=north west,align=left] (wblabel) at (-2,2)
{
\alert
{
\scriptsize
{$
w
_
1
=
100
$}}
\\
[-0ex]
\scriptsize
{
\
$
b
_
1
=
0
$}}
;
}
\visible
<7>
{
\node
[anchor=north west,align=left] (wblabel) at (-2,2)
{
\scriptsize
{$
w
_
1
=
100
$}
\\
[-0ex]
\alert
{
\scriptsize
{
\
$
b
_
1
=-
2
$}}}
;
}
\visible
<8>
{
\node
[anchor=north west,align=left] (wblabel) at (-2,2)
{
\scriptsize
{$
w
_
1
=
100
$}
\\
[-0ex]
\alert
{
\scriptsize
{
\
$
b
_
1
=-
4
$}}}
;
}
\visible
<9>
{
\node
[anchor=north west,align=left] (wblabel) at (-2,2)
{
\alert
{
\scriptsize
{$
w'
_
1
=
0
.
9
$}}}
;
}
\visible
<10>
{
\node
[anchor=north west,align=left] (wblabel) at (-2,2)
{
\alert
{
\scriptsize
{$
w'
_
1
=
0
.
7
$}}}
;
}
\visible
<11>
{
\node
[anchor=north west,align=left] (wblabel) at (-2,2)
{
\alert
{
\scriptsize
{$
w
_
2
=
100
$}}
\\
[-0ex]
\alert
{
\scriptsize
{
\
$
b
_
2
=-
6
$}}
\\
[-0ex]
\alert
{
\scriptsize
{
\
$
w'
_
2
=
0
.
7
$}}}
;
}
\visible
<12>
{
\node
[anchor=north west,align=left] (wblabel) at (-2,2)
{
\scriptsize
{$
w
_
2
=
100
$}
\\
[-0ex]
\scriptsize
{
\
$
b
_
2
=-
6
$}
\\
[-0ex]
\alert
{
\scriptsize
{
\
$
w'
_
2
=-
0
.
7
$}}}
;
}
\visible
<13->
{
\node
[anchor=north west,align=left] (wblabel) at (-2.5,2)
{
\scriptsize
{
这是一个
}
\\
[-1ex]
\scriptsize
{
step function
}}
;
}
\end{scope}
\begin{scope}
[xshift=2.8in,yshift=-1.2in]
\visible
<13->
{
\draw
[->,thick] (-2.2,0) -- (2.2,0);
\draw
[->,thick] (0,0) -- (0,2);
\draw
[-,very thick,red,domain=-1.98:2,samples=100] plot (
\x
,
{
0.2 * (
\x
+0.4)
^
3 + 1.2 - 0.3 *(
\x
+ 0.8)
^
2
}
);
}
\visible
<14->
{
\foreach
\n
in
{
0.5
}{
\pgfmathsetmacro
{
\result
}{
0.2 * (
\n
+ 0.1 + 0.4)
^
3 + 1.2 - 0.3 *(
\n
+ 0.1 + 0.8)
^
2
}
%
\draw
[-,ublue,thick] (
\n
,0) -- (
\n
,
\result
) -- (
\n
+ 0.2,
\result
) -- (
\n
+ 0.2, 0);
}
}
\visible
<15->
{
\foreach
\n
in
{
0.7
}{
\pgfmathsetmacro
{
\result
}{
0.2 * (
\n
+ 0.1 + 0.4)
^
3 + 1.2 - 0.3 *(
\n
+ 0.1 + 0.8)
^
2
}
%
\draw
[-,ublue,thick] (
\n
,0) -- (
\n
,
\result
) -- (
\n
+ 0.2,
\result
) -- (
\n
+ 0.2, 0);
}
}
\visible
<16->
{
\foreach
\n
in
{
-1.9,-1.7,...,1.9
}{
\pgfmathsetmacro
{
\result
}{
0.2 * (
\n
+ 0.1 + 0.4)
^
3 + 1.2 - 0.3 *(
\n
+ 0.1 + 0.8)
^
2
}
%
\draw
[-,ublue,thick] (
\n
,0) -- (
\n
,
\result
) -- (
\n
+ 0.2,
\result
) -- (
\n
+ 0.2, 0);
}
}
\visible
<14>
{
\node
[anchor=north west,align=left] (wblabel) at (-2.5,2.5)
{
\scriptsize
{
函数的每一段都可
}
\\
[-1ex]
\scriptsize
{
由step function
}
\\
[-1ex]
\scriptsize
{
近似
}}
;
}
\visible
<15>
{
\node
[anchor=north west,align=left] (wblabel) at (-2.5,2.5)
{
\scriptsize
{
增加因层神经元
}
\\
[-1ex]
\scriptsize
{
可以拟合更多的
}
\\
[-1ex]
\scriptsize
{
部分
}}
;
}
\visible
<16>
{
\node
[anchor=north west,align=left] (wblabel) at (-2.5,2.5)
{
\scriptsize
{
理论上足够多的
}
\\
[-1ex]
\scriptsize
{
隐层神经元可以
}
\\
[-1ex]
\scriptsize
{
拟合
\alert
{
任意函数
}}}
;
}
\end{scope}
\end{scope}
\end{tikzpicture}
\end{center}
\end{frame}
%%%------------------------------------------------------------------------------------------------------------
\subsection
{
神经网络的简单实现:张量计算
}
%%%------------------------------------------------------------------------------------------------------------
\subsection
{
参数学习 - 反向传播
}
\end{CJK}
...
...
编写
预览
Markdown
格式
0%
重试
或
添加新文件
添加附件
取消
您添加了
0
人
到此讨论。请谨慎行事。
请先完成此评论的编辑!
取消
请
注册
或者
登录
后发表评论