Commit a1e891a5 by xiaotong

reorganization

parent e7420cbc
......@@ -1588,6 +1588,9 @@ NLP问题的隐含结构假设 & 无隐含结构假设,端到端学习 \\
\begin{frame}{另一种循环单元 - 门循环单元(GRU)}
\begin{itemize}
\item GRU是LSTM的一个变种,它把隐藏状态$h$和记忆$c$合并成一个隐藏状态$h$,同时使用了更少的``门''单元,大大提升了计算效率
\begin{itemize}
\item 在NMT中GRU会带来20-25\%的速度提升
\end{itemize}
\end{itemize}
%%% 图
\begin{center}
......@@ -1736,7 +1739,7 @@ NLP问题的隐含结构假设 & 无隐含结构假设,端到端学习 \\
\end{tikzpicture}
\end{center}
{\scriptsize\begin{tabular}{l}
{\footnotesize\begin{tabular}{l}
*$x_t$: 上一层的输出\\
*$h_t$: 同一层上一时刻的隐藏状态
\end{tabular}}
......@@ -2843,7 +2846,7 @@ $\textrm{``you''} = \argmax_{y} \textrm{P}(y|\textbf{s}_1, \alert{\textbf{C}})$
}{
\draw[decorate,decoration={brace}] ([yshift=-\base]rnn30.west) to node[wordnode,align=right,left] {空闲的\\设备3} ([yshift=\base]rnn30.west);
}
\foreach \i in {1,2,3}
\node[wordnode,font=\scriptsize,anchor=south west] () at (rnn\i0.north west) {\i};
......@@ -3491,7 +3494,10 @@ $\textrm{``you''} = \argmax_{y} \textrm{P}(y|\textbf{s}_1, \alert{\textbf{C}})$
\end{frame}
%%%------------------------------------------------------------------------------------------------------------
%%% 第一部分
\section{Transformer}
%%%------------------------------------------------------------------------------------------------------------
%%% Transformer模型部分
\begin{frame}{Transformer}
\vspace{5.0em}
......@@ -3567,6 +3573,9 @@ $\textrm{``you''} = \argmax_{y} \textrm{P}(y|\textbf{s}_1, \alert{\textbf{C}})$
\end{frame}
%%%------------------------------------------------------------------------------------------------------------
\subsection{自注意力机制}
%%%------------------------------------------------------------------------------------------------------------
%%% 自注意力机制
\begin{frame}{自注意力机制}
\begin{itemize}
......@@ -3701,7 +3710,9 @@ $\textrm{``you''} = \argmax_{y} \textrm{P}(y|\textbf{s}_1, \alert{\textbf{C}})$
\end{frame}
%%%------------------------------------------------------------------------------------------------------------
\subsection{Transformer}
\subsection{Transformer模型}
%%%------------------------------------------------------------------------------------------------------------
\begin{frame}{Transformer 介绍}
\begin{itemize}
......@@ -3742,7 +3753,6 @@ $\textrm{``you''} = \argmax_{y} \textrm{P}(y|\textbf{s}_1, \alert{\textbf{C}})$
\end{frame}
%%%------------------------------------------------------------------------------------------------------------
\section{Transformer}
\begin{frame}{Transformer}
%\begin{tcolorbox}
%[bicolor,sidebyside,righthand width=4.5cm,size=title,frame engine=empty,
......@@ -3824,7 +3834,6 @@ $\textrm{``you''} = \argmax_{y} \textrm{P}(y|\textbf{s}_1, \alert{\textbf{C}})$
\end{frame}
%%%------------------------------------------------------------------------------------------------------------
\section{Transformer}
\begin{frame}{Transformer}
%\begin{tcolorbox}
%[bicolor,sidebyside,righthand width=4.5cm,size=title,frame engine=empty,
......@@ -3910,10 +3919,7 @@ $\textrm{``you''} = \argmax_{y} \textrm{P}(y|\textbf{s}_1, \alert{\textbf{C}})$
%\end{tcolorbox}
\end{frame}
%%%------------------------------------------------------------------------------------------------------------
\subsection{输入}
\begin{frame}{位置编码}
\begin{itemize}
\item 自注意力机制与前面的循环神经网络相比,忽略了词之间的顺序关系,例如下面两个语义不同的句子,通过自注意力得到的表示却是相同的
......@@ -3975,7 +3981,6 @@ $\textrm{``you''} = \argmax_{y} \textrm{P}(y|\textbf{s}_1, \alert{\textbf{C}})$
\end{frame}
%%%------------------------------------------------------------------------------------------------------------
\subsection{输入}
\begin{frame}{位置编码(续)}
\begin{itemize}
\item 位置编码的计算方式有很多种,这里使用正余弦函数来编码。式中\textit{pos}代表第几个词,\textit{i}代表词嵌入中的第几维
......@@ -4060,7 +4065,6 @@ PE_{(pos,2i+1)} = cos(pos/10000^{2i/d_{model}})
%%%------------------------------------------------------------------------------------------------------------
\section{Transformer}
\begin{frame}{Transformer}
%\begin{tcolorbox}
%[bicolor,sidebyside,righthand width=4.5cm,size=title,frame engine=empty,
......@@ -4148,7 +4152,6 @@ PE_{(pos,2i+1)} = cos(pos/10000^{2i/d_{model}})
\end{frame}
%%%------------------------------------------------------------------------------------------------------------
\subsection{自注意力模型}
\begin{frame}{基于点乘的注意力机制}
\begin{itemize}
\item Transformer使用点乘的自注意力方法来捕获句子内部各个位置之间的相似性:
......@@ -4246,9 +4249,9 @@ PE_{(pos,2i+1)} = cos(pos/10000^{2i/d_{model}})
\end{itemize}
\end{frame}
%
%%%------------------------------------------------------------------------------------------------------------
\subsection{多头自注意力模型}
\begin{frame}{多头自注意力模型}
\begin{itemize}
\item Transformer首次提出了多头注意力机制,将输入的Query、Key、Value沿着隐层维度切分为$h$个子集,分别进行注意力操作,取得了很好的效果
......@@ -4352,7 +4355,6 @@ PE_{(pos,2i+1)} = cos(pos/10000^{2i/d_{model}})
%%%------------------------------------------------------------------------------------------------------------
\section{Transformer}
\begin{frame}{Transformer}
%\begin{tcolorbox}
%[bicolor,sidebyside,righthand width=4.5cm,size=title,frame engine=empty,
......@@ -4439,8 +4441,8 @@ PE_{(pos,2i+1)} = cos(pos/10000^{2i/d_{model}})
%\end{tcolorbox}
\end{frame}
%%%------------------------------------------------------------------------------------------------------------
\subsection{残差和层正则化}
\begin{frame}{残差\&层正则化}
\begin{itemize}
\item 在Transformer中,编码器、解码器分别由6层网络组成,每层网络又包含多个子层(自注意力网络、前馈神经网络)。Transformer实际上是一个很深的网络结构,在训练过程中容易出现梯度消失的情况
......@@ -4523,7 +4525,6 @@ x_{l+1} = x_l+\mathcal{F}(x_l)
\end{frame}
%%%------------------------------------------------------------------------------------------------------------
\subsection{残差和层正则化}
\begin{frame}{残差\&层正则化(续)}
\begin{itemize}
\item 在Transformer的训练过程中,由于引入了残差操作,将前面所有层的输出加到一起。这样会导致高层的参数分布不断变大,造成训练过程不稳定、训练时间较长。
......@@ -4579,7 +4580,6 @@ x_{l+1} = x_l+\mathcal{F}(x_l)
%%%------------------------------------------------------------------------------------------------------------
\section{Transformer}
\begin{frame}{Transformer}
%\begin{tcolorbox}
%[bicolor,sidebyside,righthand width=4.5cm,size=title,frame engine=empty,
......@@ -4666,7 +4666,6 @@ x_{l+1} = x_l+\mathcal{F}(x_l)
\end{frame}
%%------------------------------------------------------------------------------------------------------------
\subsection{自注意力模型}
\begin{frame}{前馈全连接网络}
\begin{itemize}
\item 在每层中,除了注意力操作,还包含了一个全连接的前馈神经网络,网络中包含两次线性变换和一次非线性变换(ReLU激活函数),每层的前馈神经网络参数不共享
......@@ -4721,22 +4720,16 @@ x_{l+1} = x_l+\mathcal{F}(x_l)
}
}
\visible<2->{
\node [anchor=east] (line1) at ([xshift=-3.5em,yshift=2.5em]neuron14.west) {\scriptsize{全连接网络的作用主要体现在}};
\node [anchor=north west] (line2) at ([yshift=0.3em]line1.south west) {\scriptsize{将经过注意力操作之后的表示}};
\node [anchor=north west] (line3) at ([yshift=0.3em]line2.south west) {\scriptsize{映射到更大的网络空间中}};
\node [anchor=north west] (line4) at ([yshift=0.3em]line3.south west) {\scriptsize{提升了网络模型的表示能力}};
\node [anchor=north west] (line5) at ([yshift=0.3em]line4.south west) {\scriptsize{实验证明,去掉全连接网络}};
\node [anchor=north west] (line6) at ([yshift=0.3em]line5.south west) {\scriptsize{会对模型的性能造成影响}};
}
\begin{pgfonlayer}{background}
\node [rectangle,inner sep=0.2em,fill=blue!20] [fit = (neuron01) (neuron14) (neuron13) (neuron22)] (ffn) {};
\visible<2->{
\node [rectangle,inner sep=0.2em,rounded corners=1pt,fill=green!10,drop shadow,draw=ugreen] [fit = (line1) (line2) (line3) (line6)] (box1) {};
}
\end{pgfonlayer}
\end{scope}
......@@ -4866,6 +4859,97 @@ x_{l+1} = x_l+\mathcal{F}(x_l)
\end{frame}
%%%------------------------------------------------------------------------------------------------------------
%%% Transformer所使用的优化器
\begin{frame}{优化器}
% Adam
% 学习率调整
\end{frame}
%%%------------------------------------------------------------------------------------------------------------
\begin{frame}{训练配置}
\begin{itemize}
\item \textbf{优化器}:使用Adam优化器,$\beta_1$=0.9,$\beta_2$=0.98,$\epsilon=10^{-9}$ 关于学习率的设置,引入了warmup策略,在训练初期,学习率从一个较小的初始值逐渐增大,当到达一定的步数,学习率再逐渐减小
\begin{displaymath}
lrate=d_{\mathrm{model}}^{-0.5}\cdot \min(step^{-0.5},step\cdot \mathrm{warmup\_steps}^{-1.5})
\end{displaymath}
这样做可以减缓在训练初期的不稳定现象,保持分布平稳,通常warmup\_steps通常设置为4000
\vspace{0.5em}
\only<1>{
\begin{figure}
\centering
\begin{tikzpicture}
\footnotesize{
\begin{axis}[
width=.60\textwidth,
height=.40\textwidth,
legend style={at={(0.60,0.08)}, anchor=south west},
xlabel={\footnotesize{num update (10k)}},
ylabel={\footnotesize{Learn rate (\scriptsize{$10^{-3}$)}}},
ylabel style={yshift=-1em},xlabel style={yshift=0.0em},
yticklabel style={/pgf/number format/precision=2,/pgf/number format/fixed zerofill},
ymin=0,ymax=0.9, ytick={0.2, 0.4, 0.6, 0.8},
xmin=0,xmax=12,xtick={2,4,6,8,10},
legend style={yshift=-6pt, legend plot pos=right,font=\scriptsize,cells={anchor=west}}
]
\addplot[orange,line width=1.25pt] coordinates {(0,0) (4,0.7) (5,0.63) (6,0.57) (7,0.525) (8,0.49) (9,0.465) (10,0.44) (11,0.42) (12,0.4)};
\end{axis}
}
\end{tikzpicture}
\caption{}\label{}
\end{figure}
}
\item<2-> \textbf{Dropout }:为了防止网络训练过拟合,加入了Dropout操作。在四个地方用到了Dropout,词嵌入和位置编码、残差连接、注意力操作和前馈神经网络。Drop率通常设置为0.1
\item<3-> \textbf{标签平滑}:学习一个较平滑的的目标,可以提升泛化能力,防止过拟合 :)
\end{itemize}
\vspace{-0.8em}
\end{frame}
%%%------------------------------------------------------------------------------------------------------------
\begin{frame}{训练配置(续)}
\begin{itemize}
\item \textbf{Transformer Base}:标准的Transformer结构,解码器编码器均包含6层,隐层维度为512,前馈神经网络维度为2048,多头注意力机制为8头,Dropout设为0.1
\item \textbf{Transformer Big}:为了提升网络的表示能力,在Base的基础上增大隐层维度至1024,前馈神经网络的维度变为4096,多头注意力机制为16头,Dropout设为0.3
\item \textbf{Transformer Deep}:加深编码器网络层数可以进一步提升网络的性能,但简单堆叠网络层数会出现梯度消失问题,导致训练无法收敛。需要使用DLCL、正则化前作等方法来训练更深的网络。
\end{itemize}
\vspace{-0.8em}
{
\footnotesize
\begin{center}
\setlength{\tabcolsep}{3pt}
\renewcommand\arraystretch{1}
\begin{tabular}{l}
\begin{tabular}{lcccl}
\specialrule{1pt}{1pt}{1pt}
\multirow{2}{*}{\#} & \multicolumn{2}{c}{BLEU} & \multicolumn{2}{c}{ \multirow{2}{*}{params}}\\
\cline{2-3}
& EN-DE & EN-FR & \\
\specialrule{0.6pt}{1pt}{1pt}
Transformer Base & 27.3 & 38.1 & \multicolumn{2}{c}{ 65$\times10^{6}$} \\
Transformer Big & 28.4 & 41.8 & \multicolumn{2}{c}{ 213$\times10^{6}$} \\
Transformer Deep(48层) & 30.2 & 43.1 & \multicolumn{2}{c}{ 194$\times10^{6}$} \\
\specialrule{1pt}{1pt}{1pt}
\end{tabular}\\
\addlinespace[-0.3ex]
%\footnote \\
\end{tabular}
\end{center}
}
\end{frame}
%%%------------------------------------------------------------------------------------------------------------
\subsection{推断}
\begin{frame}{推断}
\begin{itemize}
......@@ -5051,90 +5135,7 @@ x_{l+1} = x_l+\mathcal{F}(x_l)
\end{frame}
%%%------------------------------------------------------------------------------------------------------------
\subsection{训练配置}
\begin{frame}{训练配置}
\begin{itemize}
\item \textbf{优化器}:使用Adam优化器,$\beta_1$=0.9,$\beta_2$=0.98,$\epsilon=10^{-9}$ 关于学习率的设置,引入了warmup策略,在训练初期,学习率从一个较小的初始值逐渐增大,当到达一定的步数,学习率再逐渐减小
\begin{displaymath}
lrate=d_{\mathrm{model}}^{-0.5}\cdot \min(step^{-0.5},step\cdot \mathrm{warmup\_steps}^{-1.5})
\end{displaymath}
这样做可以减缓在训练初期的不稳定现象,保持分布平稳,通常warmup\_steps通常设置为4000
\vspace{0.5em}
\only<1>{
\begin{figure}
\centering
\begin{tikzpicture}
\footnotesize{
\begin{axis}[
width=.60\textwidth,
height=.40\textwidth,
legend style={at={(0.60,0.08)}, anchor=south west},
xlabel={\footnotesize{num update (10k)}},
ylabel={\footnotesize{Learn rate (\scriptsize{$10^{-3}$)}}},
ylabel style={yshift=-1em},xlabel style={yshift=0.0em},
yticklabel style={/pgf/number format/precision=2,/pgf/number format/fixed zerofill},
ymin=0,ymax=0.9, ytick={0.2, 0.4, 0.6, 0.8},
xmin=0,xmax=12,xtick={2,4,6,8,10},
legend style={yshift=-6pt, legend plot pos=right,font=\scriptsize,cells={anchor=west}}
]
\addplot[orange,line width=1.25pt] coordinates {(0,0) (4,0.7) (5,0.63) (6,0.57) (7,0.525) (8,0.49) (9,0.465) (10,0.44) (11,0.42) (12,0.4)};
\end{axis}
}
\end{tikzpicture}
\caption{}\label{}
\end{figure}
}
\item<2-> \textbf{Dropout }:为了防止网络训练过拟合,加入了Dropout操作。在四个地方用到了Dropout,词嵌入和位置编码、残差连接、注意力操作和前馈神经网络。Drop率通常设置为0.1
\item<3-> \textbf{标签平滑}:学习一个较平滑的的目标,可以提升泛化能力,防止过拟合 :)
\end{itemize}
\vspace{-0.8em}
\end{frame}
%%%------------------------------------------------------------------------------------------------------------
\subsection{训练配置}
\begin{frame}{训练配置(续)}
\begin{itemize}
\item \textbf{Transformer Base}:标准的Transformer结构,解码器编码器均包含6层,隐层维度为512,前馈神经网络维度为2048,多头注意力机制为8头,Dropout设为0.1
\item \textbf{Transformer Big}:为了提升网络的表示能力,在Base的基础上增大隐层维度至1024,前馈神经网络的维度变为4096,多头注意力机制为16头,Dropout设为0.3
\item \textbf{Transformer Deep}:加深编码器网络层数可以进一步提升网络的性能,但简单堆叠网络层数会出现梯度消失问题,导致训练无法收敛。需要使用DLCL、正则化前作等方法来训练更深的网络。
\end{itemize}
\vspace{-0.8em}
{
\footnotesize
\begin{center}
\setlength{\tabcolsep}{3pt}
\renewcommand\arraystretch{1}
\begin{tabular}{l}
\begin{tabular}{lcccl}
\specialrule{1pt}{1pt}{1pt}
\multirow{2}{*}{\#} & \multicolumn{2}{c}{BLEU} & \multicolumn{2}{c}{ \multirow{2}{*}{params}}\\
\cline{2-3}
& EN-DE & EN-FR & \\
\specialrule{0.6pt}{1pt}{1pt}
Transformer Base & 27.3 & 38.1 & \multicolumn{2}{c}{ 65$\times10^{6}$} \\
Transformer Big & 28.4 & 41.8 & \multicolumn{2}{c}{ 213$\times10^{6}$} \\
Transformer Deep(48层) & 30.2 & 43.1 & \multicolumn{2}{c}{ 194$\times10^{6}$} \\
\specialrule{1pt}{1pt}{1pt}
\end{tabular}\\
\addlinespace[-0.3ex]
%\footnote \\
\end{tabular}
\end{center}
}
\end{frame}
\section{应用}
%%%------------------------------------------------------------------------------------------------------------
%%% 应用
......@@ -5146,17 +5147,17 @@ x_{l+1} = x_l+\mathcal{F}(x_l)
\textbf{一些有趣的神经机器翻译应用}
}
\end{tcolorbox}
\begin{center}
\begin{tikzpicture}
\begin{scope}[scale=0.7]
\tikzstyle{rnnnode} = [draw,inner sep=5pt,minimum width=3em,minimum height=0.8em,fill=green!30!white,blur shadow={shadow xshift=1pt,shadow yshift=-1pt}]
\node [anchor=west,rnnnode] (node11) at (0,0) {\tiny{Self-Attention}};
\node [anchor=west,rnnnode] (node12) at ([xshift=1em]node11.east) {\tiny{Self-Attention}};
\node [anchor=west,rnnnode] (node13) at ([xshift=1em]node12.east) {\tiny{Self-Attention}};
\node [anchor=west,rnnnode] (node14) at ([xshift=1em]node13.east) {\tiny{Self-Attention}};
\node [anchor=north,rnnnode,fill=blue!30!white] (e1) at ([yshift=-2em]node11.south) {\tiny{}};
\node [anchor=north,rnnnode,fill=blue!30!white] (e2) at ([yshift=-2em]node12.south) {\tiny{}};
\node [anchor=north,rnnnode,fill=blue!30!white] (e3) at ([yshift=-2em]node13.south) {\tiny{}};
......@@ -5165,51 +5166,50 @@ x_{l+1} = x_l+\mathcal{F}(x_l)
\node [anchor=north,inner sep=2pt] (w2) at ([yshift=-1em]e2.south) {\tiny{}};
\node [anchor=north,inner sep=2pt] (w3) at ([yshift=-1em]e3.south) {\tiny{我们}};
\node [anchor=north,inner sep=2pt] (w4) at ([yshift=-1em]e4.south) {\tiny{开始}};
\draw [->,thick] ([yshift=0.1em]w1.north)--([yshift=-0.1em]e1.south);
\draw [->,thick] ([yshift=0.1em]w2.north)--([yshift=-0.1em]e2.south);
\draw [->,thick] ([yshift=0.1em]w3.north)--([yshift=-0.1em]e3.south);
\draw [->,thick] ([yshift=0.1em]w4.north)--([yshift=-0.1em]e4.south);
\draw [->,thick] ([yshift=0.1em]e1.north)--([yshift=-0.1em]node11.south);
\draw [->,thick] ([yshift=0.1em]e2.north)--([yshift=-0.1em]node12.south);
\draw [->,thick] ([yshift=0.1em]e3.north)--([yshift=-0.1em]node13.south);
\draw [->,thick] ([yshift=0.1em]e4.north)--([yshift=-0.1em]node14.south);
\draw [->,thick] ([yshift=0.1em]e1.north)--([xshift=-0.5em,yshift=-0.2em]node12.south);
\draw [->,thick] ([yshift=0.1em]e3.north)--([xshift=0.5em,yshift=-0.2em]node12.south);
\draw [->,thick] ([yshift=0.1em]e4.north)--([xshift=1.5em,yshift=-0.2em]node12.south);
\node [anchor=south,rnnnode,fill=blue!30!white] (node21) at ([yshift=2.0em]node11.north) {\tiny{}};
\node [anchor=south,rnnnode,fill=blue!30!white] (node22) at ([yshift=2.0em]node12.north) {\tiny{}};
\node [anchor=south,rnnnode,fill=blue!30!white] (node23) at ([yshift=2.0em]node13.north) {\tiny{}};
\node [anchor=south,rnnnode,fill=blue!30!white] (node24) at ([yshift=2.0em]node14.north) {\tiny{}};
\node [anchor=south] (output1) at ([yshift=1em]node21.north) {\Large{\textbf{}}};
\node [anchor=south] (output2) at ([yshift=1em]node22.north) {\Large{\textbf{}}};
\node [anchor=south] (output3) at ([yshift=1em]node23.north) {\Large{\textbf{}}};
\node [anchor=south] (output4) at ([yshift=1em]node24.north) {\Large{\textbf{}}};
\draw [->,thick] ([yshift=0.1em]node21.north)--([yshift=-0.1em]output1.south);
\draw [->,thick] ([yshift=0.1em]node22.north)--([yshift=-0.1em]output2.south);
\draw [->,thick] ([yshift=0.1em]node23.north)--([yshift=-0.1em]output3.south);
\draw [->,thick] ([yshift=0.1em]node24.north)--([yshift=-0.1em]output4.south);
\draw [->,thick] ([yshift=0.1em]node11.north)--([yshift=-0.1em]node21.south);
\draw [->,thick] ([yshift=0.1em]node12.north)--([yshift=-0.1em]node22.south);
\draw [->,thick] ([yshift=0.1em]node13.north)--([yshift=-0.1em]node23.south);
\draw [->,thick] ([yshift=0.1em]node14.north)--([yshift=-0.1em]node24.south);
\end{scope}
\end{tikzpicture}
\end{center}
\end{frame}
%%%------------------------------------------------------------------------------------------------------------
\subsection{应用}
\begin{frame}{NMT应用}
\begin{itemize}
\item 神经机器翻译翻译系统除了满足日常翻译需求,还有很多其他有意思的应用!
......@@ -5252,7 +5252,6 @@ x_{l+1} = x_l+\mathcal{F}(x_l)
%%%------------------------------------------------------------------------------------------------------------
\subsection{应用}
\begin{frame}{NMT应用}
\begin{itemize}
\item 古文翻译实例
......@@ -5292,7 +5291,6 @@ x_{l+1} = x_l+\mathcal{F}(x_l)
\end{frame}
%%%------------------------------------------------------------------------------------------------------------
\subsection{应用}
\begin{frame}{NMT应用}
\begin{itemize}
\item 神经机器翻译翻译系统除了满足日常翻译需求,还有很多其他有意思的应用!
......@@ -5335,7 +5333,6 @@ x_{l+1} = x_l+\mathcal{F}(x_l)
%%%------------------------------------------------------------------------------------------------------------
\subsection{应用}
\begin{frame}{NMT应用}
\begin{itemize}
\item 对联实例
......@@ -5388,7 +5385,6 @@ x_{l+1} = x_l+\mathcal{F}(x_l)
\end{frame}
%%%------------------------------------------------------------------------------------------------------------
\subsection{应用}
\begin{frame}{NMT应用}
\begin{itemize}
\item 神经机器翻译翻译系统除了满足日常翻译需求,还有很多其他有意思的应用!
......@@ -5456,6 +5452,10 @@ x_{l+1} = x_l+\mathcal{F}(x_l)
\end{frame}
%%%------------------------------------------------------------------------------------------------------------
%%% 小结
\section{小结}
%%%------------------------------------------------------------------------------------------------------------
%%% open source NMT
\begin{frame}{一些开源NMT系统}
\end{frame}
......
Markdown 格式
0%
您添加了 0 到此讨论。请谨慎行事。
请先完成此评论的编辑!
注册 或者 后发表评论