Commit c6facf66 by Lee

Merge branch 'master' into jiangyufan

parents 8c083c20 7e80636a
...@@ -1133,7 +1133,9 @@ NLP问题的隐含结构假设 & 无隐含结构假设,端到端学习 \\ ...@@ -1133,7 +1133,9 @@ NLP问题的隐含结构假设 & 无隐含结构假设,端到端学习 \\
%%% 词嵌入 %%% 词嵌入
\begin{frame}{模块1:词嵌入层} \begin{frame}{模块1:词嵌入层}
\begin{itemize} \begin{itemize}
\item 词嵌入 \item 把输入的词转换成唯一对应的词表大小的0-1向量
\item 根据0-1向量,从词嵌入矩阵中取出对应的词嵌入$e_y$
\item 取出的词嵌入$e_y$作为循环神经网络的输入
\end{itemize} \end{itemize}
%%% 图 %%% 图
\begin{center} \begin{center}
...@@ -1216,14 +1218,14 @@ NLP问题的隐含结构假设 & 无隐含结构假设,端到端学习 \\ ...@@ -1216,14 +1218,14 @@ NLP问题的隐含结构假设 & 无隐含结构假设,端到端学习 \\
\node [anchor=north] (w) at ([yshift=3pt]one.south) {\scriptsize{\color{ugreen} you}}; \node [anchor=north] (w) at ([yshift=3pt]one.south) {\scriptsize{\color{ugreen} you}};
\node [anchor=north west] (words) at ([xshift=10pt]one.north east) {\scriptsize{$\begin{matrix} \langle\textrm{eos}\rangle \\ \langle\textrm{sos}\rangle \\ \textrm{Do} \\ \vdots \\ \textrm{know} \\ \textrm{you} \\ \textrm{?} \\ \textrm{have} \end{matrix}$}}; \node [anchor=north west] (words) at ([xshift=10pt]one.north east) {\scriptsize{$\begin{matrix} \langle\textrm{eos}\rangle \\ \langle\textrm{sos}\rangle \\ \textrm{Do} \\ \vdots \\ \textrm{know} \\ \textrm{you} \\ \textrm{?} \\ \textrm{have} \end{matrix}$}};
\node [anchor=north west] (mat) at ([xshift=-6pt]words.north east) {\scriptsize{$ \node [anchor=north west] (mat) at ([xshift=-6pt]words.north east) {\scriptsize{$
\begin{bmatrix} \begin{bmatrix}
.1 & -4 & \cdots & 2 \\ .1 & -4 & \cdots & 2 \\
5 & 2 & \cdots & .2 \\ 5 & 2 & \cdots & .2 \\
2 & .1 & \cdots & .3 \\ 2 & .1 & \cdots & .3 \\
\vdots & \vdots & \ddots & \vdots \\ \vdots & \vdots & \ddots & \vdots \\
0 & .8 & \cdots & 4 \\ 0 & .8 & \cdots & 4 \\
-1 & -2 & \cdots & -3 \\ -1 & -2 & \cdots & -3 \\
.7 & .5 & \cdots & 3 \\ .7 & .5 & \cdots & 3 \\
-2 & .3 & \cdots & .1 -2 & .3 & \cdots & .1
\end{bmatrix}$ \end{bmatrix}$
}}; }};
...@@ -1250,7 +1252,9 @@ NLP问题的隐含结构假设 & 无隐含结构假设,端到端学习 \\ ...@@ -1250,7 +1252,9 @@ NLP问题的隐含结构假设 & 无隐含结构假设,端到端学习 \\
%%% 输出 %%% 输出
\begin{frame}{模块2:输出层} \begin{frame}{模块2:输出层}
\begin{itemize} \begin{itemize}
\item Softmax \item 循环网络输出$s$经过权重矩阵$W$变换成词表大小的向量
\item 获得的向量经过Softmax变换得到不同词作为输出的概率
\item 一般选取概率最高的词作为模型最终的输出
\end{itemize} \end{itemize}
%%% 图 %%% 图
\begin{center} \begin{center}
...@@ -1329,13 +1333,13 @@ NLP问题的隐含结构假设 & 无隐含结构假设,端到端学习 \\ ...@@ -1329,13 +1333,13 @@ NLP问题的隐含结构假设 & 无隐含结构假设,端到端学习 \\
\begin{scope} \begin{scope}
\coordinate (start) at (8.5\base,0.1\base); \coordinate (start) at (8.5\base,0.1\base);
\node [anchor=center,minimum width=5.7em,minimum height=1.3em,draw,rounded corners=0.3em] (hidden) at (start) {}; \node [anchor=center,minimum width=5.7em,minimum height=1.3em,draw,rounded corners=0.3em] (hidden) at (start) {};
\node [anchor=west,minimum width=1em,minimum size=1em,fill=ugreen!20] (cell01) at ([xshift=0.2em]hidden.west) {\scriptsize{.2}}; \node [anchor=west,minimum width=1em,minimum size=1em,fill=ugreen!20] (cell01) at ([xshift=0.2em]hidden.west) {\scriptsize{.2}};
\node [anchor=west,minimum width=1em,minimum size=1em,fill=ugreen!10] (cell02) at (cell01.east) {\scriptsize{-1}}; \node [anchor=west,minimum width=1em,minimum size=1em,fill=ugreen!10] (cell02) at (cell01.east) {\scriptsize{-1}};
\node [anchor=west,minimum width=1em,minimum size=1em,fill=white] (cell03) at (cell02.east) {\scriptsize{$\cdots$}}; \node [anchor=west,minimum width=1em,minimum size=1em,fill=white] (cell03) at (cell02.east) {\scriptsize{$\cdots$}};
\node [anchor=west,minimum width=1em,minimum size=1em,fill=ugreen!50] (cell04) at (cell03.east) {\scriptsize{5}}; \node [anchor=west,minimum width=1em,minimum size=1em,fill=ugreen!50] (cell04) at (cell03.east) {\scriptsize{5}};
\node [anchor=south,minimum width=10.9em,minimum height=1.3em,draw,rounded corners=0.3em] (target) at ([yshift=1.5em]hidden.north) {}; \node [anchor=south,minimum width=10.9em,minimum height=1.3em,draw,rounded corners=0.3em] (target) at ([yshift=1.5em]hidden.north) {};
\node [anchor=west,minimum width=1em,minimum size=1em,fill=ugreen!10] (cell11) at ([xshift=0.2em]target.west) {\scriptsize{-2}}; \node [anchor=west,minimum width=1em,minimum size=1em,fill=ugreen!10] (cell11) at ([xshift=0.2em]target.west) {\scriptsize{-2}};
\node [anchor=west,minimum width=1em,minimum size=1em,fill=ugreen!10] (cell12) at (cell11.east) {\scriptsize{-1}}; \node [anchor=west,minimum width=1em,minimum size=1em,fill=ugreen!10] (cell12) at (cell11.east) {\scriptsize{-1}};
...@@ -1365,7 +1369,7 @@ NLP问题的隐含结构假设 & 无隐含结构假设,端到端学习 \\ ...@@ -1365,7 +1369,7 @@ NLP问题的隐含结构假设 & 无隐含结构假设,端到端学习 \\
\filldraw [fill=red!20,draw=white] (target.south west) -- (target.south east) -- ([xshift=-0.2em,yshift=0.1em]hidden.north east) -- ([xshift=0.2em,yshift=0.1em]hidden.north west); \filldraw [fill=red!20,draw=white] (target.south west) -- (target.south east) -- ([xshift=-0.2em,yshift=0.1em]hidden.north east) -- ([xshift=0.2em,yshift=0.1em]hidden.north west);
\draw [->,thick] ([xshift=0.2em,yshift=0.1em]hidden.north west) -- (target.south west); \draw [->,thick] ([xshift=0.2em,yshift=0.1em]hidden.north west) -- (target.south west);
\draw [->,thick] ([xshift=-0.2em,yshift=0.1em]hidden.north east) -- (target.south east); \draw [->,thick] ([xshift=-0.2em,yshift=0.1em]hidden.north east) -- (target.south east);
\node [rounded corners=0.3em] (softmax) at ([yshift=1.25em]target.north) {\scriptsize{$p(\hat{s}_i)=\frac{e^{\hat{s}_i}}{\sum_j e^{\hat{s}_j}}$}}; \node [rounded corners=0.3em] (softmax) at ([yshift=1.25em]target.north) {\scriptsize{$p(\hat{s}_i)=\frac{e^{\hat{s}_i}}{\sum_j e^{\hat{s}_j}}$}};
\begin{pgfonlayer}{background} \begin{pgfonlayer}{background}
\filldraw [fill=blue!20,draw=white] ([yshift=0.1em]cell11.north west) {[rounded corners=0.3em] -- (softmax.west)} -- (label1.south west) -- (label8.south east) {[rounded corners=0.3em] -- (softmax.east)} -- ([yshift=0.1em]cell18.north east) -- ([yshift=0.1em]cell11.north west); \filldraw [fill=blue!20,draw=white] ([yshift=0.1em]cell11.north west) {[rounded corners=0.3em] -- (softmax.west)} -- (label1.south west) -- (label8.south east) {[rounded corners=0.3em] -- (softmax.east)} -- ([yshift=0.1em]cell18.north east) -- ([yshift=0.1em]cell11.north west);
...@@ -1387,9 +1391,16 @@ NLP问题的隐含结构假设 & 无隐含结构假设,端到端学习 \\ ...@@ -1387,9 +1391,16 @@ NLP问题的隐含结构假设 & 无隐含结构假设,端到端学习 \\
%%%------------------------------------------------------------------------------------------------------------ %%%------------------------------------------------------------------------------------------------------------
%%% LSTM %%% LSTM
\begin{frame}{模块3:循环单元 - 长短时记忆模型(LSTM)} \begin{frame}{模块3:循环单元 - 长短时记忆模型(LSTM)}
\begin{itemize} \only<1>{遗忘门根据输入的$x_t$$h_t$决定保留多大比例的$c_t$\\[5pt]}
\item LSTM \only<2>{输入门根据$x_t$$h_t$计算需要保存的记忆$\hat{c}_t$和其比例\\[5pt]}
\end{itemize} \only<3>{$c_t$$\hat{c}_t$组合得到新的记忆$c_{t+1}$\\[5pt]}
\only<4>{输出门根据$x_t$$h_t$$c_{t+1}$得到新的隐藏状态$h_{t+1}$\\[5pt]}
\only<5>{如此反复,不断更新$c$$h$直到不再有新的$x$输入\\[5pt]}
{\scriptsize\begin{tabular}{rl}
*$x_t$:&上一层的输出\\
*$h_t$:&同一层上一时刻的隐藏状态\\
*$c_t$:&同一层上一时刻的记忆
\end{tabular}}
%%% 图 %%% 图
\begin{center} \begin{center}
\begin{tikzpicture} \begin{tikzpicture}
...@@ -3002,9 +3013,9 @@ $\textrm{``you''} = \argmax_{y} \textrm{P}(y|\textbf{s}_1, \alert{\textbf{C}})$ ...@@ -3002,9 +3013,9 @@ $\textrm{``you''} = \argmax_{y} \textrm{P}(y|\textbf{s}_1, \alert{\textbf{C}})$
\item$\textrm{P}(y_j|\textbf{y}_{<j},\textbf{x})$进行乘积会导致长句的概率很低 \item$\textrm{P}(y_j|\textbf{y}_{<j},\textbf{x})$进行乘积会导致长句的概率很低
\item 模型本身并没有考虑每个源语言单词被使用的程度,比如一个单词可能会被翻译了很多``次'' \item 模型本身并没有考虑每个源语言单词被使用的程度,比如一个单词可能会被翻译了很多``次''
\end{itemize} \end{itemize}
\item<2-> 因此,解码时会使用其它特征与$\textrm{P}(\textbf{y}|\textbf{x})$一起组成模型得分$score(\textbf{y},\textbf{x})$$score(\textbf{y},\textbf{x})$也作为beam search的排序依据 \item<2-> 因此,解码时会使用其它特征与$\textrm{P}(\textbf{y}|\textbf{x})$一起组成模型得分$\textrm{score}(\textbf{y},\textbf{x})$$\textrm{score}(\textbf{y},\textbf{x})$也作为beam search 的排序依据
\begin{eqnarray} \begin{eqnarray}
score(\textbf{y},\textbf{x}) & = & \textrm{P}(\textbf{y}|\textbf{x})/\textrm{lp}(\textbf{y}) + \textrm{cp}(\textbf{y},\textbf{x}) \nonumber \\ \textrm{score}(\textbf{y},\textbf{x}) & = & \textrm{P}(\textbf{y}|\textbf{x})/\textrm{lp}(\textbf{y}) + \textrm{cp}(\textbf{y},\textbf{x}) \nonumber \\
\textrm{lp}(\textbf{y}) & = & \frac{(5 + |\textbf{y}|)^\alpha}{(5 + 1)^\alpha} \nonumber \\ \textrm{lp}(\textbf{y}) & = & \frac{(5 + |\textbf{y}|)^\alpha}{(5 + 1)^\alpha} \nonumber \\
\textrm{cp}(\textbf{y},\textbf{x}) & = & \beta \cdot \sum\nolimits_{i=1}^{|\textbf{x}|} \log (\min(\sum\nolimits_{j}^{|\textbf{y}|} a_{ij}, 1))) \nonumber \textrm{cp}(\textbf{y},\textbf{x}) & = & \beta \cdot \sum\nolimits_{i=1}^{|\textbf{x}|} \log (\min(\sum\nolimits_{j}^{|\textbf{y}|} a_{ij}, 1))) \nonumber
\end{eqnarray} \end{eqnarray}
...@@ -3077,7 +3088,7 @@ $\textrm{``you''} = \argmax_{y} \textrm{P}(y|\textbf{s}_1, \alert{\textbf{C}})$ ...@@ -3077,7 +3088,7 @@ $\textrm{``you''} = \argmax_{y} \textrm{P}(y|\textbf{s}_1, \alert{\textbf{C}})$
\draw[-latex'] (enc11) to (enc12); \draw[-latex'] (enc11) to (enc12);
\draw[-latex'] (enc12) to (enc13); \draw[-latex'] (enc12) to (enc13);
\draw[-latex'] (enc13) to (enc14); \draw[-latex'] (enc13) to (enc14);
\draw[-latex'] (enc24) to (enc23); \draw[-latex'] (enc24) to (enc23);
\draw[-latex'] (enc23) to (enc22); \draw[-latex'] (enc23) to (enc22);
\draw[-latex'] (enc22) to (enc21); \draw[-latex'] (enc22) to (enc21);
...@@ -3105,7 +3116,7 @@ $\textrm{``you''} = \argmax_{y} \textrm{P}(y|\textbf{s}_1, \alert{\textbf{C}})$ ...@@ -3105,7 +3116,7 @@ $\textrm{``you''} = \argmax_{y} \textrm{P}(y|\textbf{s}_1, \alert{\textbf{C}})$
\draw[-latex'] ([xshift=-2pt]enc11.north) to [out=150,in=-150] ([xshift=-2pt]enc31.south); \draw[-latex'] ([xshift=-2pt]enc11.north) to [out=150,in=-150] ([xshift=-2pt]enc31.south);
\draw[-latex'] ([xshift=-2pt]enc12.north) to [out=150,in=-150] ([xshift=-2pt]enc32.south); \draw[-latex'] ([xshift=-2pt]enc12.north) to [out=150,in=-150] ([xshift=-2pt]enc32.south);
\draw[-latex'] ([xshift=-2pt]enc14.north) to [out=150,in=-150] ([xshift=-2pt]enc34.south); \draw[-latex'] ([xshift=-2pt]enc14.north) to [out=150,in=-150] ([xshift=-2pt]enc34.south);
\draw[-latex'] (enc22) to (enc32); \draw[-latex'] (enc22) to (enc32);
\draw[-latex'] (enc21) to (enc31); \draw[-latex'] (enc21) to (enc31);
\draw[-latex'] (enc24) to (enc34); \draw[-latex'] (enc24) to (enc34);
...@@ -3113,19 +3124,19 @@ $\textrm{``you''} = \argmax_{y} \textrm{P}(y|\textbf{s}_1, \alert{\textbf{C}})$ ...@@ -3113,19 +3124,19 @@ $\textrm{``you''} = \argmax_{y} \textrm{P}(y|\textbf{s}_1, \alert{\textbf{C}})$
\draw[-latex'] ([xshift=-2pt]enc31.north) to [out=150,in=-150] ([xshift=-2pt]enc51.south); \draw[-latex'] ([xshift=-2pt]enc31.north) to [out=150,in=-150] ([xshift=-2pt]enc51.south);
\draw[-latex'] ([xshift=-2pt]enc32.north) to [out=150,in=-150] ([xshift=-2pt]enc52.south); \draw[-latex'] ([xshift=-2pt]enc32.north) to [out=150,in=-150] ([xshift=-2pt]enc52.south);
\draw[-latex'] ([xshift=-2pt]enc34.north) to [out=150,in=-150] ([xshift=-2pt]enc54.south); \draw[-latex'] ([xshift=-2pt]enc34.north) to [out=150,in=-150] ([xshift=-2pt]enc54.south);
\draw[-latex'] (enc31) to (enc41); \draw[-latex'] (enc31) to (enc41);
\draw[-latex'] (enc32) to (enc42); \draw[-latex'] (enc32) to (enc42);
\draw[-latex'] (enc34) to (enc44); \draw[-latex'] (enc34) to (enc44);
\draw[-latex'] (enc41) to (enc51); \draw[-latex'] (enc41) to (enc51);
\draw[-latex'] (enc42) to (enc52); \draw[-latex'] (enc42) to (enc52);
\draw[-latex'] (enc44) to (enc54); \draw[-latex'] (enc44) to (enc54);
\draw[-latex'] (enc51) to (enc61); \draw[-latex'] (enc51) to (enc61);
\draw[-latex'] (enc52) to (enc62); \draw[-latex'] (enc52) to (enc62);
\draw[-latex'] (enc54) to (enc64); \draw[-latex'] (enc54) to (enc64);
\draw[-latex'] (enc61) to ([yshift=\base]enc61.north); \draw[-latex'] (enc61) to ([yshift=\base]enc61.north);
\draw[-latex'] (enc62) to ([yshift=\base]enc62.north); \draw[-latex'] (enc62) to ([yshift=\base]enc62.north);
\draw[-latex'] (enc64) to ([yshift=\base]enc64.north); \draw[-latex'] (enc64) to ([yshift=\base]enc64.north);
...@@ -3138,32 +3149,32 @@ $\textrm{``you''} = \argmax_{y} \textrm{P}(y|\textbf{s}_1, \alert{\textbf{C}})$ ...@@ -3138,32 +3149,32 @@ $\textrm{``you''} = \argmax_{y} \textrm{P}(y|\textbf{s}_1, \alert{\textbf{C}})$
\node[rnnnode,fill=green!20,right=\base of decemb1] (decemb2) {}; \node[rnnnode,fill=green!20,right=\base of decemb1] (decemb2) {};
\node[rnnnode,draw=white,fill=white,right=\base of decemb2] (decemb3) {$\cdots$}; \node[rnnnode,draw=white,fill=white,right=\base of decemb2] (decemb3) {$\cdots$};
\node[rnnnode,fill=green!20,right=\base of decemb3] (decemb4) {}; \node[rnnnode,fill=green!20,right=\base of decemb3] (decemb4) {};
\node[rnnnode,above=\base of decemb1] (dec11) {}; \node[rnnnode,above=\base of decemb1] (dec11) {};
\node[rnnnode,above=\base of decemb2] (dec12) {}; \node[rnnnode,above=\base of decemb2] (dec12) {};
\node[rnnnode,draw=white,fill=white,above=\base of decemb3] (dec13) {$\cdots$}; \node[rnnnode,draw=white,fill=white,above=\base of decemb3] (dec13) {$\cdots$};
\node[rnnnode,above=\base of decemb4] (dec14) {}; \node[rnnnode,above=\base of decemb4] (dec14) {};
\node[rnnnode,above=\base of dec11] (dec21) {}; \node[rnnnode,above=\base of dec11] (dec21) {};
\node[rnnnode,above=\base of dec12] (dec22) {}; \node[rnnnode,above=\base of dec12] (dec22) {};
\node[rnnnode,draw=white,fill=white,above=\base of dec13] (dec23) {$\cdots$}; \node[rnnnode,draw=white,fill=white,above=\base of dec13] (dec23) {$\cdots$};
\node[rnnnode,above=\base of dec14] (dec24) {}; \node[rnnnode,above=\base of dec14] (dec24) {};
\node[rnnnode,above=\base of dec21] (dec31) {}; \node[rnnnode,above=\base of dec21] (dec31) {};
\node[rnnnode,above=\base of dec22] (dec32) {}; \node[rnnnode,above=\base of dec22] (dec32) {};
\node[rnnnode,draw=white,fill=white,above=\base of dec23] (dec33) {$\cdots$}; \node[rnnnode,draw=white,fill=white,above=\base of dec23] (dec33) {$\cdots$};
\node[rnnnode,above=\base of dec24] (dec34) {}; \node[rnnnode,above=\base of dec24] (dec34) {};
\node[rnnnode,draw=white,fill=white,above=\base of dec31] (dec41) {$\cdots$}; \node[rnnnode,draw=white,fill=white,above=\base of dec31] (dec41) {$\cdots$};
\node[rnnnode,draw=white,fill=white,above=\base of dec32] (dec42) {$\cdots$}; \node[rnnnode,draw=white,fill=white,above=\base of dec32] (dec42) {$\cdots$};
\node[rnnnode,draw=white,fill=white,above=\base of dec33] (dec43) {}; \node[rnnnode,draw=white,fill=white,above=\base of dec33] (dec43) {};
\node[rnnnode,draw=white,fill=white,above=\base of dec34] (dec44) {$\cdots$}; \node[rnnnode,draw=white,fill=white,above=\base of dec34] (dec44) {$\cdots$};
\node[rnnnode,above=\base of dec41] (dec51) {}; \node[rnnnode,above=\base of dec41] (dec51) {};
\node[rnnnode,above=\base of dec42] (dec52) {}; \node[rnnnode,above=\base of dec42] (dec52) {};
\node[rnnnode,draw=white,fill=white,above=\base of dec43] (dec53) {$\cdots$}; \node[rnnnode,draw=white,fill=white,above=\base of dec43] (dec53) {$\cdots$};
\node[rnnnode,above=\base of dec44] (dec54) {}; \node[rnnnode,above=\base of dec44] (dec54) {};
\node[rnnnode,fill=blue!20,above=\base of dec51] (softmax1) {}; \node[rnnnode,fill=blue!20,above=\base of dec51] (softmax1) {};
\node[rnnnode,fill=blue!20,above=\base of dec52] (softmax2) {}; \node[rnnnode,fill=blue!20,above=\base of dec52] (softmax2) {};
\node[rnnnode,draw=white,fill=white,above=\base of dec53] (softmax3) {$\cdots$}; \node[rnnnode,draw=white,fill=white,above=\base of dec53] (softmax3) {$\cdots$};
...@@ -3173,7 +3184,7 @@ $\textrm{``you''} = \argmax_{y} \textrm{P}(y|\textbf{s}_1, \alert{\textbf{C}})$ ...@@ -3173,7 +3184,7 @@ $\textrm{``you''} = \argmax_{y} \textrm{P}(y|\textbf{s}_1, \alert{\textbf{C}})$
\node[wnode,below=0pt of decemb1] (decinword1) {SOS}; \node[wnode,below=0pt of decemb1] (decinword1) {SOS};
\node[wnode,below=0pt of decemb2] (decinword2) {Have}; \node[wnode,below=0pt of decemb2] (decinword2) {Have};
\node[wnode,below=0pt of decemb4] (decinword4) {?}; \node[wnode,below=0pt of decemb4] (decinword4) {?};
\node[wnode,above=0pt of softmax1] (decoutword1) {Have}; \node[wnode,above=0pt of softmax1] (decoutword1) {Have};
\ExtractX{$(softmax2.north)$} \ExtractX{$(softmax2.north)$}
\ExtractY{$(decoutword1.base)$} \ExtractY{$(decoutword1.base)$}
...@@ -3186,15 +3197,15 @@ $\textrm{``you''} = \argmax_{y} \textrm{P}(y|\textbf{s}_1, \alert{\textbf{C}})$ ...@@ -3186,15 +3197,15 @@ $\textrm{``you''} = \argmax_{y} \textrm{P}(y|\textbf{s}_1, \alert{\textbf{C}})$
\draw[-latex'] (dec11) to (dec12); \draw[-latex'] (dec11) to (dec12);
\draw[-latex'] (dec12) to (dec13); \draw[-latex'] (dec12) to (dec13);
\draw[-latex'] (dec13) to (dec14); \draw[-latex'] (dec13) to (dec14);
\draw[-latex'] (dec21) to (dec22); \draw[-latex'] (dec21) to (dec22);
\draw[-latex'] (dec22) to (dec23); \draw[-latex'] (dec22) to (dec23);
\draw[-latex'] (dec23) to (dec24); \draw[-latex'] (dec23) to (dec24);
\draw[-latex'] (dec31) to (dec32); \draw[-latex'] (dec31) to (dec32);
\draw[-latex'] (dec32) to (dec33); \draw[-latex'] (dec32) to (dec33);
\draw[-latex'] (dec33) to (dec34); \draw[-latex'] (dec33) to (dec34);
\draw[-latex'] (dec51) to (dec52); \draw[-latex'] (dec51) to (dec52);
\draw[-latex'] (dec52) to (dec53); \draw[-latex'] (dec52) to (dec53);
\draw[-latex'] (dec53) to (dec54); \draw[-latex'] (dec53) to (dec54);
...@@ -3202,7 +3213,7 @@ $\textrm{``you''} = \argmax_{y} \textrm{P}(y|\textbf{s}_1, \alert{\textbf{C}})$ ...@@ -3202,7 +3213,7 @@ $\textrm{``you''} = \argmax_{y} \textrm{P}(y|\textbf{s}_1, \alert{\textbf{C}})$
\draw[-latex'] (decemb1) to (dec11); \draw[-latex'] (decemb1) to (dec11);
\draw[-latex'] (decemb2) to (dec12); \draw[-latex'] (decemb2) to (dec12);
\draw[-latex'] (decemb4) to (dec14); \draw[-latex'] (decemb4) to (dec14);
\foreach \cur [count=\prev from 1] in {2,...,5} \foreach \cur [count=\prev from 1] in {2,...,5}
{ {
\draw[-latex'] (dec\prev1) to (dec\cur1); \draw[-latex'] (dec\prev1) to (dec\cur1);
...@@ -4649,7 +4660,7 @@ x_{l+1} = x_l+\mathcal{F}(x_l) ...@@ -4649,7 +4660,7 @@ x_{l+1} = x_l+\mathcal{F}(x_l)
\item 由于自回归性,Transformer在推断阶段无法进行并行化操作,导致推断速度非常慢! \item 由于自回归性,Transformer在推断阶段无法进行并行化操作,导致推断速度非常慢!
\item<2-> 加速手段:Cache(缓存需要重复计算的变量) 、Average Attention Network、Share Attention Network \item<2-> 加速手段:低精度、Cache(缓存需要重复计算的变量) 、Average Attention Network、Share Attention Network
\end{itemize} \end{itemize}
......
Markdown 格式
0%
您添加了 0 到此讨论。请谨慎行事。
请先完成此评论的编辑!
注册 或者 后发表评论