Skip to content
项目
群组
代码片段
帮助
当前项目
正在载入...
登录 / 注册
切换导航面板
T
Toy-MT-Introduction
概览
Overview
Details
Activity
Cycle Analytics
版本库
Repository
Files
Commits
Branches
Tags
Contributors
Graph
Compare
Charts
问题
0
Issues
0
列表
Board
标记
里程碑
合并请求
0
Merge Requests
0
CI / CD
CI / CD
流水线
作业
日程表
图表
维基
Wiki
代码片段
Snippets
成员
Collapse sidebar
Close sidebar
活动
图像
聊天
创建新问题
作业
提交
Issue Boards
Open sidebar
单韦乔
Toy-MT-Introduction
Commits
dbe871f1
Commit
dbe871f1
authored
Sep 05, 2019
by
xiaotong
Browse files
Options
Browse Files
Download
Email Patches
Plain Diff
linear transformation and activation function
parent
bcb189ce
隐藏空白字符变更
内嵌
并排
正在显示
2 个修改的文件
包含
95 行增加
和
69 行删除
+95
-69
Section05-Neural-Networks-and-Language-Modeling/section05-test.tex
+78
-60
Section05-Neural-Networks-and-Language-Modeling/section05.tex
+17
-9
没有找到文件。
Section05-Neural-Networks-and-Language-Modeling/section05-test.tex
查看文件 @
dbe871f1
...
...
@@ -105,90 +105,108 @@
\subsection
{
数学基础:张量计算
}
%%%------------------------------------------------------------------------------------------------------------
%%%
层的概念
\begin{frame}
{
``层"的概念
}
%%%
神经网络的作用
\begin{frame}
{
神经网络:线性变换 + 激活函数
}
\begin{itemize}
\item
对于一个问题(相同输入),可能会有多个输出,这时可以把
\alert
{
多个相同的神经元并列起来
}
,构成一
\alert
{
``层"
}
\begin{itemize}
\item
比如,天气预报需要同时预测湿度和温度
\end{itemize}
\item
对于向量
$
\textbf
{
x
}
\in
\mathbb
{
R
}^
m
$
,一层神经网络首先把他经过
\textbf
{
\alert
{
线性变换
}}
映射到
$
\mathbb
{
R
}^
m
$
,之后经过
\textbf
{{
\color
{
blue
}
激活函数
}}
变换成
$
\textbf
{
y
}
\in
\mathbb
{
R
}^
n
$
\end{itemize}
\vspace
{
-2em
}
\begin{center}
\begin{tikzpicture}
\begin{scope}
\tikzstyle
{
neuronnode
}
= [minimum size=1.5em,circle,draw,ublue,very thick,fill=white,drop shadow=
{
shadow xshift=0.1em,shadow yshift=-0.1em
}
]
\node
[anchor=center] (y) at (0,0)
{
\Large
{$
\textbf
{
y
}$}}
;
\node
[anchor=west] (eq) at (y.east)
{
\Large
{$
=
$}}
;
\node
[anchor=west] (func) at (eq.east)
{
\Large
{$
f
$}}
;
\node
[anchor=west] (brace01) at (func.east)
{
\Large
{$
(
$}}
;
\node
[anchor=west] (x) at (brace01.east)
{
\Large
{$
\textbf
{
x
}$}}
;
\node
[anchor=west] (dot) at (x.east)
{
\Large
{$
\cdot
$}}
;
\node
[anchor=west] (w) at (dot.east)
{
\Large
{$
\textbf
{
w
}$}}
;
\node
[anchor=west] (plus) at (w.east)
{
\Large
{$
+
$}}
;
\node
[anchor=west] (b) at (plus.east)
{
\Large
{$
\textbf
{
b
}$}}
;
\node
[anchor=west] (brace02) at (b.east)
{
\Large
{$
)
$}}
;
\node
[anchor=center,fill=blue!20] (func2) at (func)
{
\LARGE
{$
f
$}}
;
\node
[anchor=north] (funclabel) at ([yshift=-1.1em]func.south)
{
\blue
{
激活函数
}}
;
\draw
[<-] ([yshift=-0.2em]func2.south) -- (funclabel.north);
\node
[anchor=center,neuronnode] (neuron00) at (0,0)
{}
;
\visible
<2->
{
\node
[anchor=center,neuronnode] (neuron01) at ([yshift=-3em]neuron00)
{}
;
}
\visible
<3->
{
\node
[anchor=center,neuronnode] (neuron02) at ([yshift=-3em]neuron01)
{}
;
}
\begin{pgfonlayer}
{
background
}
\node
[rectangle,inner sep=0.2em,fill=red!20] [fit = (x) (w) (b)] (linear)
{}
;
\node
[anchor=north] (linearlabel) at ([yshift=-1.1em]linear.south)
{
\alert
{
线性变换
}}
;
\draw
[<-] ([yshift=-0.2em]linear.south) -- (linearlabel.north);
\end{pgfonlayer}
\node
[anchor=east] (x0) at ([xshift=-6em]neuron00.west)
{$
x
_
0
$}
;
\node
[anchor=east] (x1) at ([xshift=-6em]neuron01.west)
{$
x
_
1
$}
;
\node
[anchor=east] (x2) at ([xshift=-6em]neuron02.west)
{$
b
$}
;
\end{tikzpicture}
\end{center}
\
node
[anchor=west] (y0) at ([xshift=4em]neuron00.east)
{$
y
_
0
$}
;
\
end{frame}
\draw
[->] (x0.east) -- (neuron00.180) node [pos=0.1,above]
{
\tiny
{$
w
_{
00
}$}}
;
\draw
[->] (x1.east) -- (neuron00.200) node [pos=0.1,above]
{
\tiny
{$
w
_{
10
}$}}
;
\draw
[->] (x2.east) -- (neuron00.220) node [pos=0.05,above,yshift=0.3em]
{
\tiny
{$
b
_{
0
}$}}
;
\draw
[->] (neuron00.east) -- (y0.west);
%%%------------------------------------------------------------------------------------------------------------
%%% 线性变换
\begin{frame}
{
线性变换
}
\begin{itemize}
\item
对于线性空间
$
V
$
,任意
$
\textbf
{
a
}$
,
$
\textbf
{
b
}
\in
V
$
和数域中的任意
$
\alpha
$
,线性变换
$
T
(
\cdot
)
$
需满足
\begin{eqnarray}
T(
\textbf
{
a
}
+
\textbf
{
b
}
)
&
=
&
T(
\textbf
{
a
}
) + T(
\textbf
{
b
}
)
\nonumber
\\
T(
\alpha
\textbf
{
a
}
)
&
=
&
\alpha
T(
\textbf
{
a
}
)
\nonumber
\end{eqnarray}
\item
<2-> 线性变换的一种几何解释:
\end{itemize}
\vspace
{
-1em
}
\visible
<2->
{
\node
[anchor=west] (y1) at ([xshift=4em]neuron01.east)
{$
y
_
1
$}
;
\draw
[->] (x0.east) -- (neuron01.160) node [pos=0.4,above]
{
\tiny
{$
w
_{
01
}$}}
;
\draw
[->] (x1.east) -- (neuron01.180) node [pos=0.35,above,yshift=-0.2em]
{
\tiny
{$
w
_{
11
}$}}
;
\draw
[->] (x2.east) -- (neuron01.200) node [pos=0.4,below]
{
\tiny
{$
b
_{
1
}$}}
;
\draw
[->] (neuron01.east) -- (y1.west);
}
\begin{center}
\begin{tikzpicture}
\node
[anchor=west] (x) at (0,0)
{
\Large
{$
\textbf
{
x
}$}}
;
\node
[anchor=west] (dot) at (x.east)
{
\Large
{$
\cdot
$}}
;
\node
[anchor=west] (w) at (dot.east)
{
\Large
{$
\textbf
{
w
}$}}
;
\node
[anchor=west] (plus) at (w.east)
{
\Large
{$
+
$}}
;
\node
[anchor=west] (b) at (plus.east)
{
\Large
{$
\textbf
{
b
}$}}
;
\visible
<3->
{
\node
[anchor=west] (y2) at ([xshift=4em]neuron02.east)
{$
y
_
2
$}
;
\draw
[->] (x0.east) -- (neuron02.140) node [pos=0.1,below,yshift=-0.2em]
{
\tiny
{$
w
_{
02
}$}}
;
\draw
[->] (x1.east) -- (neuron02.160) node [pos=0.1,below]
{
\tiny
{$
w
_{
12
}$}}
;
\draw
[->] (x2.east) -- (neuron02.180) node [pos=0.3,below]
{
\tiny
{$
b
_{
2
}$}}
;
\draw
[->] (neuron02.east) -- (y2.west);
\node
[anchor=center,fill=green!20] (w2) at (w)
{
\Large
{$
\textbf
{
w
}$}}
;
\node
[anchor=north,inner sep=1pt] (wlabel) at ([yshift=-0.7em]w.south)
{
\small
{
旋转(rotation)
}}
;
\draw
[<-] ([yshift=-0.2em]w2.south) -- (wlabel.north);
}
\visible
<4->
{
\node
[anchor=east,align=left] (inputlabel) at ([xshift=-0.1em]x1.west)
{
输入向量:
\\\small
{$
\textbf
{
x
}
=(
x
_
0
,x
_
1
)
$}}
;
}
\visible
<5->
{
\node
[anchor=west,align=left] (outputlabel) at ([xshift=0.1em]y1.east)
{
输出向量:
\\\small
{$
\textbf
{
y
}
=(
y
_
0
,y
_
1
,y
_
2
)
$}}
;
\node
[anchor=center,fill=purple!20] (b2) at (b)
{
\Large
{$
\textbf
{
b
}$}}
;
\node
[anchor=west] (blabel) at ([xshift=1.5em]b2.east)
{
平移(shift)
}
;
\draw
[<-] ([xshift=0.2em]b2.east) -- (blabel.west);
}
\begin{pgfonlayer}
{
background
}
\visible
<6->
{
\node
[rectangle,inner sep=0.4em,fill=red!20] [fit = (neuron00) (neuron01) (neuron02)] (layer)
{}
;
\node
[anchor=south] (layerlabel) at ([yshift=0.2em]layer.north)
{
一层神经元
}
;
\end{tikzpicture}
\end{center}
}
\visible
<4->
{
\node
[rectangle,inner sep=0.1em,fill=ugreen!20] [fit = (x0) (x1)] (inputshadow)
{}
;
}
\visible
<5->
{
\node
[rectangle,inner sep=0.1em,fill=blue!20] [fit = (y0) (y1) (y2)] (outputshadow)
{}
;
}
\end{pgfonlayer}
\end{frame}
\visible
<7->
{
\node
[anchor=north west] (wlabel) at ([yshift=-1em,xshift=-7em]x2.south)
{
参数(矩阵):
$
\textbf
{
w
}
=
\Big
(
\begin
{
array
}{
lll
}
w
_{
01
}
&
w
_{
01
}
&
w
_{
02
}
\\
w
_{
11
}
&
w
_{
11
}
&
w
_{
12
}
\end
{
array
}
\Big
)
$}
;
}
\visible
<8->
{
\node
[anchor=west] (blabel) at (wlabel.east)
{
参数(向量):
$
\textbf
{
b
}
=
(
b
_
0
, b
_
1
, b
_
2
)
$}
;
}
%%%------------------------------------------------------------------------------------------------------------
%%% 线性变换:更复杂的实例
\begin{frame}
{
线性变换(续)
}
\begin{itemize}
\item
线性变换也适用于更加复杂的情况,这也给神经网络提供了拟合不同数据分布的能力
\end{itemize}
\end{frame}
\end{scope}
\end{tikzpicture}
\end{center}
%%%------------------------------------------------------------------------------------------------------------
%%% 激活函数
\begin{frame}
{
激活函数
}
\begin{itemize}
\item
激活函数的设计更多的是为了进行
\alert
{
非线性
}
变换
\begin{itemize}
\item
很多实际问题都是非线性的
\item
非线性部分提供了拟合任意函数的能力(稍后介绍)
\end{itemize}
\end{itemize}
\end{frame}
%%%------------------------------------------------------------------------------------------------------------
%%% 常用的激活函数
\begin{frame}
{
常用的激活函数
}
\begin{itemize}
\item
好多好多,列举不全 ...
\end{itemize}
\end{frame}
%%%------------------------------------------------------------------------------------------------------------
...
...
Section05-Neural-Networks-and-Language-Modeling/section05.tex
查看文件 @
dbe871f1
...
...
@@ -717,27 +717,27 @@ GPT-2 (Transformer) & Radford et al. & 2019 & \alert{35.7}
\node
[anchor=west] (eq) at (y.east)
{
\LARGE
{$
=
$}}
;
\node
[anchor=west] (func) at (eq.east)
{
\LARGE
{$
f
$}}
;
\node
[anchor=west] (brace01) at (func.east)
{
\LARGE
{$
(
$}}
;
\node
[anchor=west] (x) at (brace01.east)
{
\LARGE
{$
\textbf
{
x
}$}}
;
\node
[anchor=west] (dot) at (x.east)
{
\LARGE
{$
\cdot
$}}
;
\node
[anchor=west] (w) at (dot.east)
{
\LARGE
{$
\textbf
{
w
}$}}
;
\node
[anchor=west] (plus) at (w.east)
{
\LARGE
{$
+
$}}
;
\node
[anchor=west] (b) at (plus.east)
{
\LARGE
{$
\textbf
{
b
}$}}
;
\node
[anchor=west] (brace02) at (b.east)
{
\LARGE
{$
)
$}}
;
\node
[anchor=west] (x) at (brace01.east)
{
\LARGE
{$
\textbf
{
x
}$}}
;
\node
[anchor=west] (dot) at (x.east)
{
\LARGE
{$
\cdot
$}}
;
\node
[anchor=west] (w) at (dot.east)
{
\LARGE
{$
\textbf
{
w
}$}}
;
\node
[anchor=west] (plus) at (w.east)
{
\LARGE
{$
+
$}}
;
\node
[anchor=west] (b) at (plus.east)
{
\LARGE
{$
\textbf
{
b
}$}}
;
\node
[anchor=west] (brace02) at (b.east)
{
\LARGE
{$
)
$}}
;
\visible
<2->
{
\node
[anchor=center,fill=yellow!30] (x2) at (x)
{
\LARGE
{$
\textbf
{
x
}$}}
;
\node
[anchor=center,fill=yellow!30] (x2) at (x)
{
\LARGE
{$
\textbf
{
x
}$}}
;
\node
[anchor=south] (xlabel) at ([yshift=1.5em]x.north)
{
输入
}
;
\draw
[<-] ([yshift=0.2em]x2.north) -- (xlabel.south);
}
\visible
<3->
{
\node
[anchor=center,fill=green!20] (w2) at (w)
{
\LARGE
{$
\textbf
{
w
}$}}
;
\node
[anchor=center,fill=green!20] (w2) at (w)
{
\LARGE
{$
\textbf
{
w
}$}}
;
\node
[anchor=north] (wlabel) at ([yshift=-1.5em]w.south)
{
参数(权重)
}
;
\draw
[<-] ([yshift=-0.2em]w2.south) -- (wlabel.north);
}
\visible
<4->
{
\node
[anchor=center,fill=purple!20] (b2) at (b)
{
\LARGE
{$
\textbf
{
b
}$}}
;
\node
[anchor=center,fill=purple!20] (b2) at (b)
{
\LARGE
{$
\textbf
{
b
}$}}
;
\node
[anchor=south] (blabel) at ([yshift=1.3em]b.north)
{
偏移
}
;
\draw
[<-] ([yshift=0.2em]b2.north) -- (blabel.south);
}
...
...
@@ -859,6 +859,14 @@ GPT-2 (Transformer) & Radford et al. & 2019 & \alert{35.7}
\end{frame}
%%%------------------------------------------------------------------------------------------------------------
%%% 神经网络的作用
\begin{frame}
{
神经网络:线性变换 + 激活函数
}
\begin{itemize}
\item
对于向量
$
\textbf
{
x
}
\in
\mathbb
{
R
}^
m
$
,一层神经网络实际上就是把
\end{itemize}
\end{frame}
%%%------------------------------------------------------------------------------------------------------------
\subsection
{
多层神经网络
}
%%%------------------------------------------------------------------------------------------------------------
...
...
编写
预览
Markdown
格式
0%
重试
或
添加新文件
添加附件
取消
您添加了
0
人
到此讨论。请谨慎行事。
请先完成此评论的编辑!
取消
请
注册
或者
登录
后发表评论