Skip to content
项目
群组
代码片段
帮助
当前项目
正在载入...
登录 / 注册
切换导航面板
T
Toy-MT-Introduction
概览
Overview
Details
Activity
Cycle Analytics
版本库
Repository
Files
Commits
Branches
Tags
Contributors
Graph
Compare
Charts
问题
0
Issues
0
列表
Board
标记
里程碑
合并请求
0
Merge Requests
0
CI / CD
CI / CD
流水线
作业
日程表
图表
维基
Wiki
代码片段
Snippets
成员
Collapse sidebar
Close sidebar
活动
图像
聊天
创建新问题
作业
提交
Issue Boards
Open sidebar
单韦乔
Toy-MT-Introduction
Commits
08a23d17
Commit
08a23d17
authored
Nov 12, 2019
by
xiaotong
Browse files
Options
Browse Files
Download
Email Patches
Plain Diff
new pages
parent
95f50640
隐藏空白字符变更
内嵌
并排
正在显示
2 个修改的文件
包含
143 行增加
和
6 行删除
+143
-6
Section06-Neural-Machine-Translation/section06-test.tex
+58
-6
Section06-Neural-Machine-Translation/section06.tex
+85
-0
没有找到文件。
Section06-Neural-Machine-Translation/section06-test.tex
查看文件 @
08a23d17
...
...
@@ -142,6 +142,13 @@
\begin{frame}
{
上下文向量
$
C
_
i
$}
\begin{itemize}
\item
对于目标语位置
$
i
$
,
$
C
_
i
$
是目标语
$
i
$
使用的上下文向量
\begin{itemize}
\item
$
h
_
j
$
表示编码器第
$
j
$
个位置的隐层状态
\item
$
s
_
i
$
表示解码器第
$
i
$
个位置的隐层状态
\item
<2->
$
\alpha
_{
i,j
}$
表示注意力权重,表示目标语第
$
i
$
个位置与源语第
$
j
$
个位置之间的相关性大小
\item
<2->
$
a
(
\cdot
)
$
表示注意力函数,计算
$
s
_{
i
-
1
}$
和
$
h
_
j
$
之间的相关性
\item
<3->
$
C
_
i
$
是所有源语编码表示a
$
\{
h
_
j
\}
$
的加权求和,权重为
$
\{\alpha
_{
i,j
}
\}
$
\end{itemize}
\end{itemize}
\begin{center}
...
...
@@ -149,14 +156,59 @@
\begin{scope}
\node
[anchor=west,fill=red!20!white,inner sep=2pt,minimum width=2em] (h1) at (0,0)
{
\scriptsize
{$
h
_
1
$}}
;
\node
[anchor=west,fill=red!20!white,inner sep=2pt,minimum width=2em] (h2) at ([xshift=1em]h1.east)
{
\scriptsize
{$
h
_
2
$}}
;
\node
[anchor=west,inner sep=0pt,minimum width=2em] (h3) at ([xshift=0.7em]h2.east)
{
\scriptsize
{
...
}}
;
\node
[anchor=west,fill=red!20!white,inner sep=2pt,minimum width=2em] (h4) at ([xshift=0.7em]h3.east)
{
\scriptsize
{$
h
_
n
$}}
;
\node
[anchor=west,draw,fill=red!20!white,inner sep=3pt,minimum width=2em,minimum height=1.2em] (h1) at (0,0)
{
\scriptsize
{$
h
_
1
$}}
;
\node
[anchor=west,draw,fill=red!20!white,inner sep=3pt,minimum width=2em,minimum height=1.2em] (h2) at ([xshift=1em]h1.east)
{
\scriptsize
{$
h
_
2
$}}
;
\node
[anchor=west,inner sep=0pt,minimum width=3em] (h3) at ([xshift=0.5em]h2.east)
{
\scriptsize
{
...
}}
;
\node
[anchor=west,draw,fill=red!20!white,inner sep=3pt,minimum width=2em,minimum height=1.2em] (h4) at ([xshift=0.5em]h3.east)
{
\scriptsize
{$
h
_
n
$}}
;
\node
[anchor=south,circle,minimum size=1.0em,draw,ublue,thick] (sum) at ([yshift=2em]h2.north east)
{}
;
\draw
[thick,-,ublue] (sum.north) -- (sum.south);
\draw
[thick,-,ublue] (sum.west) -- (sum.east);
\node
[anchor=south,draw,fill=green!20!white,inner sep=3pt,minimum width=2em,minimum height=1.2em] (th1) at ([yshift=2em,xshift=-1em]sum.north west)
{
\scriptsize
{$
s
_{
i
-
1
}$}}
;
\node
[anchor=west,draw,fill=green!20!white,inner sep=3pt,minimum width=2em,minimum height=1.2em] (th2) at ([xshift=2em]th1.east)
{
\scriptsize
{$
s
_{
i
}$}}
;
\draw
[->] (h1.north) .. controls +(north:0.8) and +(west:1) .. (sum.190) node [pos=0.3,left]
{
\tiny
{$
\alpha
_{
i,
1
}$}}
;
\draw
[->] (h2.north) .. controls +(north:0.6) and +(220:0.2) .. (sum.220) node [pos=0.2,right]
{
\tiny
{$
\alpha
_{
i,
2
}$}}
;
\draw
[->] (h4.north) .. controls +(north:0.8) and +(east:1) .. (sum.-10) node [pos=0.1,left] (alphan)
{
\tiny
{$
\alpha
_{
i,n
}$}}
;
\draw
[->] ([xshift=-1.5em]th1.west) -- ([xshift=-0.1em]th1.west);
\draw
[->] ([xshift=0.1em]th1.east) -- ([xshift=-0.1em]th2.west);
\draw
[->] ([xshift=0.1em]th2.east) -- ([xshift=1.5em]th2.east);
\draw
[->] (sum.north) .. controls +(north:0.8) and +(west:0.2) .. ([yshift=-0.4em,xshift=-0.1em]th2.west) node [pos=0.2,right] (ci)
{
\scriptsize
{$
C
_{
i
}$}}
;
\node
[anchor=south,inner sep=1pt] (output) at ([yshift=0.8em]th2.north)
{
\tiny
{
输出层
}}
;
\draw
[->] ([yshift=0.1em]th2.north) -- ([yshift=-0.1em]output.south);
\node
[anchor=north] (enc1) at (h1.south)
{
\tiny
{
编码器输出
}}
;
\node
[anchor=north] (enc12) at ([yshift=0.5em]enc1.south)
{
\tiny
{
(位置
$
1
$
)
}}
;
\node
[anchor=north] (enc2) at (h2.south)
{
\tiny
{
编码器输出
}}
;
\node
[anchor=north] (enc22) at ([yshift=0.5em]enc2.south)
{
\tiny
{
(位置
$
2
$
)
}}
;
\node
[anchor=north] (enc4) at (h4.south)
{
\tiny
{
编码器输出
}}
;
\node
[anchor=north] (enc42) at ([yshift=0.5em]enc4.south)
{
\tiny
{
(位置
$
4
$
)
}}
;
\visible
<2->
{
\node
[anchor=west] (math1) at ([xshift=5em,yshift=1em]th2.east)
{$
C
_
i
=
\sum
_{
j
}
\alpha
_{
i,j
}
h
_
j
\ \
$}
;
}
\visible
<3->
{
\node
[anchor=north west] (math2) at ([yshift=-2em]math1.south west)
{$
\alpha
_{
i,j
}
=
\frac
{
\exp
(
\beta
_{
i,j
}
)
}{
\sum
_
j
\exp
(
\beta
_{
i,j
}
)
}$}
;
\node
[anchor=north west] (math3) at ([yshift=-0em]math2.south west)
{$
\beta
_{
i,j
}
=
a
(
s
_{
i
-
1
}
, h
_
j
)
$}
;
}
\node
[anchor=south,circle,minimum size=1.5em,draw,ublue,thick] (sum) at ([yshift=2em]h2.north east)
{}
;
\begin{pgfonlayer}
{
background
}
\visible
<2->
{
\node
[rectangle,inner sep=0.4em,rounded corners=1pt,fill=blue!10,drop shadow] [fit = (math1)] (box1)
{}
;
}
\visible
<3->
{
\node
[rectangle,inner sep=0.4em,rounded corners=1pt,fill=orange!10,drop shadow] [fit = (math2) (math3)] (box2)
{}
;
}
\end{pgfonlayer}
\node
[anchor=south,fill=green!20!white,inner sep=3pt,minimum width=2em] (th1) at ([yshift=2em]sum.north)
{
\scriptsize
{$
s
_{
i
_
1
}$}}
;
\visible
<2->
{
\draw
[->,dotted,thick,blue] (box1.west) .. controls +(west:1.2) and +(east:2.0) .. ([xshift=-0.3em]ci.east);
}
\visible
<3->
{
\draw
[->,dotted,thick,orange] ([yshift=1em]box2.west) .. controls +(west:1.2) and +(east:1.0) .. ([xshift=-0.35em]alphan.east);
}
\end{scope}
...
...
Section06-Neural-Machine-Translation/section06.tex
查看文件 @
08a23d17
...
...
@@ -849,6 +849,11 @@ NLP问题的隐含结构假设 & 无隐含结构假设,端到端学习 \\
\end{frame}
%%%------------------------------------------------------------------------------------------------------------
%%% NMT的数学描述
\begin{frame}
{
数学建模
}
\end{frame}
%%%------------------------------------------------------------------------------------------------------------
%%% 词嵌入
\begin{frame}
{
模块1:词嵌入层
}
\begin{itemize}
...
...
@@ -1395,6 +1400,86 @@ NLP问题的隐含结构假设 & 无隐含结构假设,端到端学习 \\
\end{frame}
%%%------------------------------------------------------------------------------------------------------------
%%% C_i的定义
\begin{frame}
{
上下文向量
$
C
_
i
$}
\begin{itemize}
\item
对于目标语位置
$
i
$
,
$
C
_
i
$
是目标语
$
i
$
使用的上下文向量
\begin{itemize}
\item
$
h
_
j
$
表示编码器第
$
j
$
个位置的隐层状态
\item
$
s
_
i
$
表示解码器第
$
i
$
个位置的隐层状态
\item
<2->
$
\alpha
_{
i,j
}$
表示注意力权重,表示目标语第
$
i
$
个位置与源语第
$
j
$
个位置之间的相关性大小
\item
<2->
$
a
(
\cdot
)
$
表示注意力函数,计算
$
s
_{
i
-
1
}$
和
$
h
_
j
$
之间的相关性
\item
<3->
$
C
_
i
$
是所有源语编码表示
$
\{
h
_
j
\}
$
的加权求和,权重为
$
\{\alpha
_{
i,j
}
\}
$
\end{itemize}
\end{itemize}
\begin{center}
\begin{tikzpicture}
\begin{scope}
\node
[anchor=west,draw,fill=red!20!white,inner sep=3pt,minimum width=2em,minimum height=1.2em] (h1) at (0,0)
{
\scriptsize
{$
h
_
1
$}}
;
\node
[anchor=west,draw,fill=red!20!white,inner sep=3pt,minimum width=2em,minimum height=1.2em] (h2) at ([xshift=1em]h1.east)
{
\scriptsize
{$
h
_
2
$}}
;
\node
[anchor=west,inner sep=0pt,minimum width=3em] (h3) at ([xshift=0.5em]h2.east)
{
\scriptsize
{
...
}}
;
\node
[anchor=west,draw,fill=red!20!white,inner sep=3pt,minimum width=2em,minimum height=1.2em] (h4) at ([xshift=0.5em]h3.east)
{
\scriptsize
{$
h
_
n
$}}
;
\node
[anchor=south,circle,minimum size=1.0em,draw,ublue,thick] (sum) at ([yshift=2em]h2.north east)
{}
;
\draw
[thick,-,ublue] (sum.north) -- (sum.south);
\draw
[thick,-,ublue] (sum.west) -- (sum.east);
\node
[anchor=south,draw,fill=green!20!white,inner sep=3pt,minimum width=2em,minimum height=1.2em] (th1) at ([yshift=2em,xshift=-1em]sum.north west)
{
\scriptsize
{$
s
_{
i
-
1
}$}}
;
\node
[anchor=west,draw,fill=green!20!white,inner sep=3pt,minimum width=2em,minimum height=1.2em] (th2) at ([xshift=2em]th1.east)
{
\scriptsize
{$
s
_{
i
}$}}
;
\draw
[->] (h1.north) .. controls +(north:0.8) and +(west:1) .. (sum.190) node [pos=0.3,left]
{
\tiny
{$
\alpha
_{
i,
1
}$}}
;
\draw
[->] (h2.north) .. controls +(north:0.6) and +(220:0.2) .. (sum.220) node [pos=0.2,right]
{
\tiny
{$
\alpha
_{
i,
2
}$}}
;
\draw
[->] (h4.north) .. controls +(north:0.8) and +(east:1) .. (sum.-10) node [pos=0.1,left] (alphan)
{
\tiny
{$
\alpha
_{
i,n
}$}}
;
\draw
[->] ([xshift=-1.5em]th1.west) -- ([xshift=-0.1em]th1.west);
\draw
[->] ([xshift=0.1em]th1.east) -- ([xshift=-0.1em]th2.west);
\draw
[->] ([xshift=0.1em]th2.east) -- ([xshift=1.5em]th2.east);
\draw
[->] (sum.north) .. controls +(north:0.8) and +(west:0.2) .. ([yshift=-0.4em,xshift=-0.1em]th2.west) node [pos=0.2,right] (ci)
{
\scriptsize
{$
C
_{
i
}$}}
;
\node
[anchor=south,inner sep=1pt] (output) at ([yshift=0.8em]th2.north)
{
\tiny
{
输出层
}}
;
\draw
[->] ([yshift=0.1em]th2.north) -- ([yshift=-0.1em]output.south);
\node
[anchor=north] (enc1) at (h1.south)
{
\tiny
{
编码器输出
}}
;
\node
[anchor=north] (enc12) at ([yshift=0.5em]enc1.south)
{
\tiny
{
(位置
$
1
$
)
}}
;
\node
[anchor=north] (enc2) at (h2.south)
{
\tiny
{
编码器输出
}}
;
\node
[anchor=north] (enc22) at ([yshift=0.5em]enc2.south)
{
\tiny
{
(位置
$
2
$
)
}}
;
\node
[anchor=north] (enc4) at (h4.south)
{
\tiny
{
编码器输出
}}
;
\node
[anchor=north] (enc42) at ([yshift=0.5em]enc4.south)
{
\tiny
{
(位置
$
4
$
)
}}
;
\visible
<2->
{
\node
[anchor=west] (math1) at ([xshift=5em,yshift=1em]th2.east)
{$
C
_
i
=
\sum
_{
j
}
\alpha
_{
i,j
}
h
_
j
\ \
$}
;
}
\visible
<3->
{
\node
[anchor=north west] (math2) at ([yshift=-2em]math1.south west)
{$
\alpha
_{
i,j
}
=
\frac
{
\exp
(
\beta
_{
i,j
}
)
}{
\sum
_
j
\exp
(
\beta
_{
i,j
}
)
}$}
;
\node
[anchor=north west] (math3) at ([yshift=-0em]math2.south west)
{$
\beta
_{
i,j
}
=
a
(
s
_{
i
-
1
}
, h
_
j
)
$}
;
}
\begin{pgfonlayer}
{
background
}
\visible
<2->
{
\node
[rectangle,inner sep=0.4em,rounded corners=1pt,fill=blue!10,drop shadow] [fit = (math1)] (box1)
{}
;
}
\visible
<3->
{
\node
[rectangle,inner sep=0.4em,rounded corners=1pt,fill=orange!10,drop shadow] [fit = (math2) (math3)] (box2)
{}
;
}
\end{pgfonlayer}
\visible
<2->
{
\draw
[->,dotted,thick,blue] (box1.west) .. controls +(west:1.2) and +(east:2.0) .. ([xshift=-0.3em]ci.east);
}
\visible
<3->
{
\draw
[->,dotted,thick,orange] ([yshift=1em]box2.west) .. controls +(west:1.2) and +(east:1.0) .. ([xshift=-0.35em]alphan.east);
}
\end{scope}
\end{tikzpicture}
\end{center}
\end{frame}
%%%------------------------------------------------------------------------------------------------------------
\section
{
Transformer
}
%%%------------------------------------------------------------------------------------------------------------
...
...
编写
预览
Markdown
格式
0%
重试
或
添加新文件
添加附件
取消
您添加了
0
人
到此讨论。请谨慎行事。
请先完成此评论的编辑!
取消
请
注册
或者
登录
后发表评论