Commit 94b2cdf8 by 曹润柘

合并分支 'caorunzhe' 到 'master'

Caorunzhe

查看合并请求 !981
parents 6e807eec 05c69cad
......@@ -15,7 +15,7 @@
\node [neuronnode] (neuron_y') at (2.4 * \nodespace,-1.5 * \neuronsep) {\scriptsize{$x_{i}^{l+1}$}};
\node [anchor=north] (standard) at ([yshift=-4em]neuron_z.south) {\scriptsize{标准网络}};
\node [] (standard) at ([xshift=-1em]neuron_z.west) {\scriptsize{$\mathbf{w}_{i}^{l}$}};
\node [] (standard) at ([xshift=-1em]neuron_z.west) {\scriptsize{$\mathbi{w}_{i}^{l}$}};
\node [] (standard) at ([xshift=0.6em,yshift=0.3em]neuron_z.east) {\scriptsize{$f$}};
\draw [->,line width=0.3mm] (neuron_b.east) -- (neuron_z.130);
......@@ -41,7 +41,7 @@
\node [neuronnode] (drop_neuron_r1) at (4.4*\nodespace,-2.5*\neuronsep) {\scriptsize{$r_{1}^{l}$}};
\node [anchor=north] (standard) at ([xshift=2em,yshift=-4em]drop_neuron_z.south) {\scriptsize{应用Dropout后的网络}};
\node [] (standard) at ([xshift=-1em]drop_neuron_z.west) {\scriptsize{$\mathbf{w}_{i}^{l}$}};
\node [] (standard) at ([xshift=-1em]drop_neuron_z.west) {\scriptsize{$\mathbi{w}_{i}^{l}$}};
\node [] (standard) at ([xshift=0.6em,yshift=0.3em]drop_neuron_z.east) {\scriptsize{$f$}};
%structure
\draw [->,line width=0.3mm] (drop_neuron_b.east) -- (drop_neuron_z.130);
......@@ -60,12 +60,12 @@
%equ
\node [anchor=west,inner sep = 2pt] (line1) at (9*\nodespace,0) {未应用Dropout:};
\node [anchor=north west,inner sep = 2pt] (line2) at (line1.south west) {$z_{i}^{l+1}=\mathbf{w}^{l} \mathbf{x}^{l} + b^{l}$};
\node [anchor=north west,inner sep = 2pt] (line2) at (line1.south west) {$z_{i}^{l+1}=\mathbi{w}^{l} \mathbi{x}^{l} + b^{l}$};
\node [anchor=north west,inner sep = 2pt] (line3) at (line2.south west) {$x_{i}^{l+1}=f\left(z_{i}^{l+1}\right)$};
\node [anchor=north west,inner sep = 2pt] (line4) at (line3.south west) {应用Dropout:};
\node [anchor=north west,inner sep = 2pt] (line5) at (line4.south west) {$r_{j}^{l} \sim$ Bernoulli $(1-p)$};
\node [anchor=north west,inner sep = 2pt] (line6) at (line5.south west) {$\tilde{\mathbf{x}}=\mathbf{r} * \mathbf{x}$};
\node [anchor=north west,inner sep = 2pt] (line7) at (line6.south west) {$z_{i}^{l+1}=\mathbf{w}^{l} \widetilde{\mathbf{x}}^{l} + b^{l}$};
\node [anchor=north west,inner sep = 2pt] (line6) at (line5.south west) {$\tilde{\mathbi{x}}=\mathbi{r} * \mathbi{x}$};
\node [anchor=north west,inner sep = 2pt] (line7) at (line6.south west) {$z_{i}^{l+1}=\mathbi{w}^{l} \widetilde{\mathbi{x}}^{l} + b^{l}$};
\node [anchor=north west,inner sep = 2pt] (line8) at (line7.south west) {$x_{i}^{l+1}=f\left(z_{i}^{l+1}\right)$};
\end{tikzpicture}
\ No newline at end of file
......@@ -4,10 +4,10 @@
\begin{scope}[]
% Column 1
\node [prob,minimum size=0.1cm] (prob11) at (0,0) {};
\node [prob,minimum size=0.5cm,anchor=center] (prob21) at ([yshift=-0.5cm]prob11.center) {$.7$};
\node [prob,minimum size=0.5cm,anchor=center] (prob21) at ([yshift=-0.5cm]prob11.center) {$0.7$};
\node [prob,minimum size=0.1cm,anchor=center] (prob31) at ([yshift=-0.5cm]prob21.center) {};
\node [prob,minimum size=0.1cm,anchor=center] (prob41) at ([yshift=-0.5cm]prob31.center) {};
\node [prob,minimum size=0.3cm,anchor=center] (prob51) at ([yshift=-0.5cm]prob41.center) {$.2$};
\node [prob,minimum size=0.3cm,anchor=center,font=\tiny] (prob51) at ([yshift=-0.5cm]prob41.center) {$0.2$};
\begin{pgfonlayer}{background}
\coordinate (bottomleft) at ([shift={(-0.25cm,-0.25cm)}]prob51.center);
\coordinate (topright) at ([shift={(0.25cm,0.25cm)}]prob11.center);
......@@ -18,8 +18,8 @@
% Column 2
\node [prob,minimum size=0.1cm,anchor=center] (prob12) at ([xshift=1cm]prob11.center) {};
\node [prob,minimum size=0.1cm,anchor=center] (prob22) at ([yshift=-0.5cm]prob12.center) {};
\node [prob,minimum size=0.4cm,anchor=center] (prob32) at ([yshift=-0.5cm]prob22.center) {$.4$};
\node [prob,minimum size=0.3cm,anchor=center] (prob42) at ([yshift=-0.5cm]prob32.center) {$.3$};
\node [prob,minimum size=0.4cm,anchor=center,font=\tiny] (prob32) at ([yshift=-0.5cm]prob22.center) {0$.4$};
\node [prob,minimum size=0.3cm,anchor=center,font=\tiny] (prob42) at ([yshift=-0.5cm]prob32.center) {$0.3$};
\node [prob,minimum size=0.1cm,anchor=center] (prob52) at ([yshift=-0.5cm]prob42.center) {};
\begin{pgfonlayer}{background}
\coordinate (bottomleft) at ([shift={(-0.25cm,-0.25cm)}]prob52.center);
......@@ -33,7 +33,7 @@
\node [prob,minimum size=0.1cm,anchor=center] (prob23) at ([yshift=-0.5cm]prob13.center) {};
\node [prob,minimum size=0.1cm,anchor=center] (prob33) at ([yshift=-0.5cm]prob23.center) {};
\node [prob,minimum size=0.1cm,anchor=center] (prob43) at ([yshift=-0.5cm]prob33.center) {};
\node [prob,minimum size=0.4cm,anchor=center] (prob53) at ([yshift=-0.5cm]prob43.center) {$.6$};
\node [prob,minimum size=0.4cm,anchor=center,font=\tiny] (prob53) at ([yshift=-0.5cm]prob43.center) {$0.6$};
\begin{pgfonlayer}{background}
\coordinate (bottomleft) at ([shift={(-0.25cm,-0.25cm)}]prob53.center);
\coordinate (topright) at ([shift={(0.25cm,0.25cm)}]prob13.center);
......@@ -42,7 +42,7 @@
% \node [anchor=center] (word13) at ([yshift=0.7cm]prob13.center) {fine};
% Column 4
\node [prob,minimum size=0.5cm,anchor=center] (prob14) at ([xshift=1cm]prob13.center) {$.8$};
\node [prob,minimum size=0.5cm,anchor=center] (prob14) at ([xshift=1cm]prob13.center) {$0.8$};
\node [prob,minimum size=0.1cm,anchor=center] (prob24) at ([yshift=-0.5cm]prob14.center) {};
\node [prob,minimum size=0.1cm,anchor=center] (prob34) at ([yshift=-0.5cm]prob24.center) {};
\node [prob,minimum size=0.1cm,anchor=center] (prob44) at ([yshift=-0.5cm]prob34.center) {};
......@@ -92,7 +92,7 @@
\begin{scope}[xshift=2.7in]
% Column 1
\node [prob,minimum size=0.1cm] (prob11) at (0,0) {};
\node [prob,minimum size=0.5cm,anchor=center] (prob21) at ([yshift=-0.5cm]prob11.center) {$1.$};
\node [prob,minimum size=0.5cm,anchor=center] (prob21) at ([yshift=-0.5cm]prob11.center) {$1$};
\node [prob,minimum size=0.1cm,anchor=center] (prob31) at ([yshift=-0.5cm]prob21.center) {};
\node [prob,minimum size=0.1cm,anchor=center] (prob41) at ([yshift=-0.5cm]prob31.center) {};
\node [prob,minimum size=0.1cm,anchor=center] (prob51) at ([yshift=-0.5cm]prob41.center) {};
......@@ -106,7 +106,7 @@
% Column 2
\node [prob,minimum size=0.1cm,anchor=center] (prob12) at ([xshift=1cm]prob11.center) {};
\node [prob,minimum size=0.1cm,anchor=center] (prob22) at ([yshift=-0.5cm]prob12.center) {};
\node [prob,minimum size=0.5cm,anchor=center] (prob32) at ([yshift=-0.5cm]prob22.center) {$1.$};
\node [prob,minimum size=0.5cm,anchor=center] (prob32) at ([yshift=-0.5cm]prob22.center) {$1$};
\node [prob,minimum size=0.1cm,anchor=center] (prob42) at ([yshift=-0.5cm]prob32.center) {};
\node [prob,minimum size=0.1cm,anchor=center] (prob52) at ([yshift=-0.5cm]prob42.center) {};
\begin{pgfonlayer}{background}
......@@ -121,7 +121,7 @@
\node [prob,minimum size=0.1cm,anchor=center] (prob23) at ([yshift=-0.5cm]prob13.center) {};
\node [prob,minimum size=0.1cm,anchor=center] (prob33) at ([yshift=-0.5cm]prob23.center) {};
\node [prob,minimum size=0.1cm,anchor=center] (prob43) at ([yshift=-0.5cm]prob33.center) {};
\node [prob,minimum size=0.5cm,anchor=center] (prob53) at ([yshift=-0.5cm]prob43.center) {$1.$};
\node [prob,minimum size=0.5cm,anchor=center] (prob53) at ([yshift=-0.5cm]prob43.center) {$1$};
\begin{pgfonlayer}{background}
\coordinate (bottomleft) at ([shift={(-0.25cm,-0.25cm)}]prob53.center);
\coordinate (topright) at ([shift={(0.25cm,0.25cm)}]prob13.center);
......@@ -130,7 +130,7 @@
\node [anchor=center] (word13) at ([yshift=0.68cm]prob13.center) {good};
% Column 4
\node [prob,minimum size=0.5cm,anchor=center] (prob14) at ([xshift=1cm]prob13.center) {$1.$};
\node [prob,minimum size=0.5cm,anchor=center] (prob14) at ([xshift=1cm]prob13.center) {$1$};
\node [prob,minimum size=0.1cm,anchor=center] (prob24) at ([yshift=-0.5cm]prob14.center) {};
\node [prob,minimum size=0.1cm,anchor=center] (prob34) at ([yshift=-0.5cm]prob24.center) {};
\node [prob,minimum size=0.1cm,anchor=center] (prob44) at ([yshift=-0.5cm]prob34.center) {};
......
......@@ -20,21 +20,21 @@
\foreach \curr / \prev in {1/0,2/1,3/2}
{
% models
\node[modelnode,fill=yellow!20] (stu\curr1) at ([yshift=-3em]stu\prev1.south) {\rotatebox{90}{学生模型 $1$}};
\node[modelnode,fill=yellow!20] (stu\curr2) at ([yshift=-3em]stu\prev2.south) {\rotatebox{90}{学生模型 $2$}};
\node[modelnode,fill=yellow!20] (stu\curr3) at ([yshift=-3em]stu\prev3.south) {\rotatebox{90}{学生模型 $3$}};
\node[modelnode,fill=yellow!20] (stu\curr4) at ([yshift=-3em]stu\prev4.south) {\rotatebox{90}{学生模型 $4$}};
\node[modelnode,fill=yellow!20] (stu\curr5) at ([yshift=-3em]stu\prev5.south) {\rotatebox{90}{学生模型 $5$}};
\node[modelnode] (tea\curr1) at ([yshift=-3em]tea\prev1.south) {\rotatebox{90}{\color{red!60} 教师模型 $1$}};
\node[modelnode] (tea\curr2) at ([yshift=-3em]tea\prev2.south) {\rotatebox{90}{\color{blue!60} 教师模型 $2$}};
\node[modelnode,fill=yellow!20,align=center] (stu\curr1) at ([yshift=-3em]stu\prev1.south) {\\[-0.5ex]\\[-0.5ex]\\[-0.5ex]\\[-0.5ex]$1$};
\node[modelnode,fill=yellow!20,align=center] (stu\curr2) at ([yshift=-3em]stu\prev2.south) {\\[-0.5ex]\\[-0.5ex]\\[-0.5ex]\\[-0.5ex]$2$};
\node[modelnode,fill=yellow!20,align=center] (stu\curr3) at ([yshift=-3em]stu\prev3.south) {\\[-0.5ex]\\[-0.5ex]\\[-0.5ex]\\[-0.5ex]$3$};
\node[modelnode,fill=yellow!20,align=center] (stu\curr4) at ([yshift=-3em]stu\prev4.south) {\\[-0.5ex]\\[-0.5ex]\\[-0.5ex]\\[-0.5ex]$4$};
\node[modelnode,fill=yellow!20,align=center] (stu\curr5) at ([yshift=-3em]stu\prev5.south) {\\[-0.5ex]\\[-0.5ex]\\[-0.5ex]\\[-0.5ex]$5$};
\node[modelnode,align=center,text=red!60] (tea\curr1) at ([yshift=-3em]tea\prev1.south) {\\[-0.5ex]\\[-0.5ex]\\[-0.5ex]\\[-0.5ex]$1$};
\node[modelnode,align=center,text=blue!60] (tea\curr2) at ([yshift=-3em]tea\prev2.south) {\\[-0.5ex]\\[-0.5ex]\\[-0.5ex]\\[-0.5ex]$2$};
% 集成 labels
\draw[->,very thick] ([xshift=2pt]stu\curr5.east) to node [auto] {\small 集成} ([xshift=-2pt]tea\curr1.west);
}
% iteration labels
\node[font=\small,anchor=east,purple!80] (iterate1) at ([xshift=-1em]stu21.west) {\rotatebox{90}{轮数 $1$}};
\node[font=\small,anchor=east,purple!80] (iterate2) at ([xshift=-1em]stu31.west) {\rotatebox{90}{轮数 $2$}};
\node[font=\small,anchor=east,purple!80,align=center] (iterate1) at ([xshift=-1em]stu21.west) {\\\\$1$};
\node[font=\small,anchor=east,purple!80,align=center] (iterate2) at ([xshift=-1em]stu31.west) {\\\\$2$};
% distillation labels
\node[font=\small,anchor=south west] (distill1) at ([yshift=1.2em]iterate1.north west) {知识蒸馏};
......
......@@ -55,8 +55,8 @@
\node [anchor=south,font=\scriptsize] (w8) at (label7.north) {$0.1$};
\node[font=\scriptsize] (line1) at ([xshift=13em,yshift=-1.5em]model_label7.east) {$Loss =-0.3 \log p_{3}-\sum_{i=1}^{7} 0.1 \log p_{i}$};
\node[font=\scriptsize] (line2) at ([xshift=9.5em,yshift=3em]model_label7.east) {$Loss =-\log p_{3}$};
\node[font=\scriptsize] (line1) at ([xshift=13em,yshift=-1.5em]model_label7.east) {$\textrm{Loss} =-0.3 \log p_{3}-\sum_{i=1}^{7} 0.1 \log p_{i}$};
\node[font=\scriptsize] (line2) at ([xshift=9.5em,yshift=3em]model_label7.east) {$\textrm{Loss} =-\log p_{3}$};
\begin{pgfonlayer}{background}
\node [rectangle,inner sep=0.5em,rounded corners=1pt,very thick,dotted,draw=red] [fit =(model_w3) (model_label1) (model_label7) (one_hot_w3)] (box1) {};
......
......@@ -94,7 +94,7 @@
\node [anchor=south,colnode,minimum height=4.2em,minimum width=1em] (b2) at ([xshift=1.67em,yshift=0em]b1.south) {};
\node [anchor=south,colnode,minimum height=3.7em,minimum width=1em] (b3) at ([xshift=1.67em,yshift=0em]b2.south) {};
\node [anchor=south,colnode,minimum height=4.2em,minimum width=1em] (b4) at ([xshift=1.67em,yshift=0em]b3.south) {};
\node [anchor=south,colnode,minimum height=0.8em,minimum width=1em] (b5) at ([xshift=1.67em,yshift=0em]b4.south) {};
\node [anchor=south,colnode,minimum height=1.4em,minimum width=1em] (b5) at ([xshift=1.67em,yshift=0em]b4.south) {};
\node [anchor=south,colnode,minimum height=0.15em,minimum width=1em] (b6) at ([xshift=1.67em,yshift=0em]b5.south) {};
{\scriptsize
......
......@@ -253,7 +253,7 @@ C(\mathbi{x}_j \mathbi{W}_K,\omega) &=& (\mathbi{x}_{j-\omega},\ldots,\mathbi{x}
\vspace{0.5em}
\item {\small\bfnew{使用轻量卷积和动态卷积神经网络}}\upcite{Wu2019PayLA,DBLP:conf/interspeech/GulatiQCPZYHWZW20}。比如,分别在编码器和解码器利用轻量卷积或动态卷积神经网络(见{\chapternine})替换Transformer的自注意力机制,同时保留解码器的编码-解码注意力机制,一定程度上加强了模型对局部信息的建模能力,同时提高了计算效率。
\vspace{0.5em}
\item {\small\bfnew{使用1维卷积注意力网络}}(图\ref{fig:15-5}(b))。可以使用一维的卷积自注意力网络(1D-CSAN)将关注的范围限制在相近的元素窗口中。其形式上十分简单,只需预先设定好局部建模的窗口大小$D$,并在进行注意力权重计算和对Value值进行加权求和时,将其限制在设定好的窗口范围内。
\item {\small\bfnew{使用1维卷积注意力网络}}(图\ref{fig:15-5}(b)){\red (没引用?)}。可以使用一维的卷积自注意力网络(1D-CSAN)将关注的范围限制在相近的元素窗口中。其形式上十分简单,只需预先设定好局部建模的窗口大小$D$,并在进行注意力权重计算和对Value值进行加权求和时,将其限制在设定好的窗口范围内。
\vspace{0.5em}
\item {\small\bfnew{使用2维卷积注意力网络}}(图\ref{fig:15-5}(c))。在一维卷积注意力网络的基础上,对多个注意力头之间的信息进行交互建模,打破了注意力头之间的界限。 1D-CDAN{\red (S?)}的关注区域为$1\times D$,当将其扩展为二维矩形$D \times N$,长和宽分别为局部窗口的大小和参与建模的自注意力头的个数。这样,模型可以计算某个头中的第$i$个元素和另一个头中的第$j$个元素之间的相关性系数,实现了对不同子空间之间关系的建模,所得到的注意力分布{\red (哪里得到了?)}表示了头之间的依赖关系。
......
......@@ -1732,7 +1732,7 @@ z_t&=&\gamma z_{t-1}+(1-\gamma) \frac{\partial J}{\partial {\theta}_t} \cdot \f
\parinterval 这个过程可以得到$ {\mathbi{s}}^K $节点处的梯度$ {\bm \pi}^K= \frac{\partial L}{\partial {\mathbi{s}}^K} $,在后续的过程中可以直接使用其作为前一层提供的梯度计算结果,而不需要从$ {\mathbi{h}}^K $节点处重新计算。这也体现了自动微分与符号微分的差别,对于计算图的每一个阶段,并不需要得到完成的微分表达式,而是通过前一层提供的梯度,直接计算当前的梯度即可,这样避免了大量的重复计算。
\parinterval 在得到$ {\bm \pi}^K= \frac{\partial L}{\partial {\mathbi{s}}^K} $之后,下一步的目标是:1)计算损失函数$ L $相对于第$ K-1 $层与输出层之间连接权重$ {\mathbi{W}}^K $的梯度;2)计算损失函数$ L $相对于神经网络网络$ K-1 $层输出结果$ {\mathbi{h}}^{K-1} $的梯度。这部分内容如图\ref{fig:9-55}所示。
\parinterval 在得到$ {\bm \pi}^K= \frac{\partial L}{\partial {\mathbi{s}}^K} $之后,下一步的目标是:1)计算损失函数$ L $相对于第$ K-1 $层与输出层之间连接权重$ {\mathbi{W}}^K $的梯度;2)计算损失函数$ L $相对于神经网络第$ K-1 $层输出结果$ {\mathbi{h}}^{K-1} $的梯度。这部分内容如图\ref{fig:9-55}所示。
%----------------------------------------------
\begin{figure}[htp]
......
Markdown 格式
0%
您添加了 0 到此讨论。请谨慎行事。
请先完成此评论的编辑!
注册 或者 后发表评论