Commit afc9b12a by 单韦乔

合并分支 'shanweiqiao' 到 'caorunzhe'

13章

查看合并请求 !722
parents 8c14e5a8 87981c60
...@@ -2,11 +2,11 @@ ...@@ -2,11 +2,11 @@
\tikzstyle{node} =[font=\scriptsize] \tikzstyle{node} =[font=\scriptsize]
\tikzstyle{sentence} =[font=\scriptsize,fill=blue!5!white] \tikzstyle{sentence} =[font=\scriptsize,fill=blue!5!white]
\node[sentence] (node1) at (0,0) {[`low', `lower', `newest', `widest']}; \node[sentence] (node1) at (0,0) {['low', 'lower', 'newest', 'widest']};
\node[sentence,anchor = north] (node2) at ([yshift = -1em]node1.south) {[`l o w $<$e$>$':5, `l o w e r $<$e$>$':2, `n e w e s t $<$e$>$':6, `w i d e s t $<$e$>$':3]}; \node[sentence,anchor = north] (node2) at ([yshift = -1em]node1.south) {['l o w $<$e$>$':5, 'l o w e r $<$e$>$':2, 'n e w e s t $<$e$>$':6, 'w i d e s t $<$e$>$':3]};
\node[sentence,anchor = north] (node3) at ([yshift = -1.5em]node2.south) {[`l o w $<$e$>$':5, `l o w e r $<$e$>$':2, `n e w {\red es} t $<$e$>$':6, `w i d {\red es} t $<$e$>$':3]}; \node[sentence,anchor = north] (node3) at ([yshift = -1.5em]node2.south) {['l o w $<$e$>$':5, 'l o w e r $<$e$>$':2, 'n e w {\red es} t $<$e$>$':6, 'w i d {\red es} t $<$e$>$':3]};
\node[sentence,anchor = north] (node4) at ([yshift = -1em]node3.south) {[`l o w $<$e$>$':5, `l o w e r $<$e$>$':2, `n e w {\red est} $<$e$>$':6, `w i d {\red est} $<$e$>$':3]}; \node[sentence,anchor = north] (node4) at ([yshift = -1em]node3.south) {['l o w $<$e$>$':5, 'l o w e r $<$e$>$':2, 'n e w {\red est} $<$e$>$':6, 'w i d {\red est} $<$e$>$':3]};
\node[sentence,anchor = north] (node5) at ([yshift = -1em]node4.south) {[`l o w $<$e$>$':5, `l o w e r $<$e$>$':2, `n e w {\red est$<$e$>$}':6, `w i d {\red est$<$e$>$}':3]}; \node[sentence,anchor = north] (node5) at ([yshift = -1em]node4.south) {['l o w $<$e$>$':5, 'l o w e r $<$e$>$':2, 'n e w {\red est$<$e$>$}':6, 'w i d {\red est$<$e$>$}':3]};
\node[sentence,anchor = north] (node6) at ([yshift = -1em]node5.south) {$\cdots$}; \node[sentence,anchor = north] (node6) at ([yshift = -1em]node5.south) {$\cdots$};
\node[node,anchor = north] (node7) at ([yshift = -1.6em]node6.south) {直到达到预设的子词词表大小或下一个最高频的字节对出现频率为1。}; \node[node,anchor = north] (node7) at ([yshift = -1.6em]node6.south) {直到达到预设的子词词表大小或下一个最高频的字节对出现频率为1。};
......
...@@ -14,7 +14,7 @@ ...@@ -14,7 +14,7 @@
\node [neuronnode] (neuron_z) at (1.2 * \nodespace,-1.5 * \neuronsep) {\scriptsize{$z_{i}^{l+1}$}}; \node [neuronnode] (neuron_z) at (1.2 * \nodespace,-1.5 * \neuronsep) {\scriptsize{$z_{i}^{l+1}$}};
\node [neuronnode] (neuron_y') at (2.4 * \nodespace,-1.5 * \neuronsep) {\scriptsize{$x_{i}^{l+1}$}}; \node [neuronnode] (neuron_y') at (2.4 * \nodespace,-1.5 * \neuronsep) {\scriptsize{$x_{i}^{l+1}$}};
\node [anchor=north,ublue] (standard) at ([yshift=-4em]neuron_z.south) {\scriptsize{standard}}; \node [anchor=north,ublue] (standard) at ([yshift=-4em]neuron_z.south) {\scriptsize{标准网络}};
\node [ublue] (standard) at ([xshift=-1em]neuron_z.west) {\scriptsize{$\mathbf{w}_{i}^{l}$}}; \node [ublue] (standard) at ([xshift=-1em]neuron_z.west) {\scriptsize{$\mathbf{w}_{i}^{l}$}};
\node [ublue] (standard) at ([xshift=0.6em,yshift=0.3em]neuron_z.east) {\scriptsize{$f$}}; \node [ublue] (standard) at ([xshift=0.6em,yshift=0.3em]neuron_z.east) {\scriptsize{$f$}};
...@@ -40,7 +40,7 @@ ...@@ -40,7 +40,7 @@
\node [neuronnode] (drop_neuron_r2) at (4.4*\nodespace,-1.5*\neuronsep) {\scriptsize{$r_{2}^{l}$}}; \node [neuronnode] (drop_neuron_r2) at (4.4*\nodespace,-1.5*\neuronsep) {\scriptsize{$r_{2}^{l}$}};
\node [neuronnode] (drop_neuron_r1) at (4.4*\nodespace,-2.5*\neuronsep) {\scriptsize{$r_{1}^{l}$}}; \node [neuronnode] (drop_neuron_r1) at (4.4*\nodespace,-2.5*\neuronsep) {\scriptsize{$r_{1}^{l}$}};
\node [anchor=north,ublue] (standard) at ([yshift=-4em]drop_neuron_z.south) {\scriptsize{dropout}}; \node [anchor=north,ublue] (standard) at ([xshift=2em,yshift=-4em]drop_neuron_z.south) {\scriptsize{应用Dropout后的网络}};
\node [ublue] (standard) at ([xshift=-1em]drop_neuron_z.west) {\scriptsize{$\mathbf{w}_{i}^{l}$}}; \node [ublue] (standard) at ([xshift=-1em]drop_neuron_z.west) {\scriptsize{$\mathbf{w}_{i}^{l}$}};
\node [ublue] (standard) at ([xshift=0.6em,yshift=0.3em]drop_neuron_z.east) {\scriptsize{$f$}}; \node [ublue] (standard) at ([xshift=0.6em,yshift=0.3em]drop_neuron_z.east) {\scriptsize{$f$}};
%structure %structure
...@@ -59,10 +59,10 @@ ...@@ -59,10 +59,10 @@
\draw [-,line width=0.3mm] (drop_neuron_r1.south) -- ([yshift=-1em]drop_neuron_r1.south); \draw [-,line width=0.3mm] (drop_neuron_r1.south) -- ([yshift=-1em]drop_neuron_r1.south);
%equ %equ
\node [anchor=west,inner sep = 2pt] (line1) at (9*\nodespace,0) {未应用dropout:}; \node [anchor=west,inner sep = 2pt] (line1) at (9*\nodespace,0) {未应用Dropout:};
\node [anchor=north west,inner sep = 2pt] (line2) at (line1.south west) {$z_{i}^{l+1}=\mathbf{w}_{i}^{l} \mathbf{x}+b_{i}^{l}$}; \node [anchor=north west,inner sep = 2pt] (line2) at (line1.south west) {$z_{i}^{l+1}=\mathbf{w}_{i}^{l} \mathbf{x}+b_{i}^{l}$};
\node [anchor=north west,inner sep = 2pt] (line3) at (line2.south west) {$x_{i}^{l+1}=f\left(x_{i}^{l}\right)$}; \node [anchor=north west,inner sep = 2pt] (line3) at (line2.south west) {$x_{i}^{l+1}=f\left(x_{i}^{l}\right)$};
\node [anchor=north west,inner sep = 2pt] (line4) at (line3.south west) {应用dropout:}; \node [anchor=north west,inner sep = 2pt] (line4) at (line3.south west) {应用Dropout:};
\node [anchor=north west,inner sep = 2pt] (line5) at (line4.south west) {$r_{j}^{l} \sim$ Bernoulli $(1-p)$}; \node [anchor=north west,inner sep = 2pt] (line5) at (line4.south west) {$r_{j}^{l} \sim$ Bernoulli $(1-p)$};
\node [anchor=north west,inner sep = 2pt] (line6) at (line5.south west) {$\tilde{\mathbf{x}}=\mathbf{r} * \mathbf{x}$}; \node [anchor=north west,inner sep = 2pt] (line6) at (line5.south west) {$\tilde{\mathbf{x}}=\mathbf{r} * \mathbf{x}$};
\node [anchor=north west,inner sep = 2pt] (line7) at (line6.south west) {$z_{i}^{l+1}=\mathbf{w}_{i}^{l} \widetilde{\mathbf{x}}+b_{i}^{l}$}; \node [anchor=north west,inner sep = 2pt] (line7) at (line6.south west) {$z_{i}^{l+1}=\mathbf{w}_{i}^{l} \widetilde{\mathbf{x}}+b_{i}^{l}$};
......
...@@ -6,7 +6,7 @@ ...@@ -6,7 +6,7 @@
\setlength{\YShift}{0.8\base} \setlength{\YShift}{0.8\base}
\setlength{\XShift}{0.8\base} \setlength{\XShift}{0.8\base}
\tikzstyle{modelnode} = [rectangle,draw,rounded corners=2pt,inner sep=0pt,minimum height=4.2em,minimum width=2em,font=\small,anchor=north] \tikzstyle{modelnode} = [rectangle,draw,rounded corners=2pt,inner sep=0pt,minimum height=4.5em,minimum width=2em,font=\small,anchor=north]
\coordinate (stu01) at (0,0); \coordinate (stu01) at (0,0);
\coordinate (stu02) at ([xshift=3em]stu01); \coordinate (stu02) at ([xshift=3em]stu01);
...@@ -20,27 +20,27 @@ ...@@ -20,27 +20,27 @@
\foreach \curr / \prev in {1/0,2/1,3/2} \foreach \curr / \prev in {1/0,2/1,3/2}
{ {
% models % models
\node[modelnode,fill=yellow!20] (stu\curr1) at ([yshift=-2em]stu\prev1.south) {\rotatebox{90}{Student $1$}}; \node[modelnode,fill=yellow!20] (stu\curr1) at ([yshift=-2em]stu\prev1.south) {\rotatebox{90}{学生模型 $1$}};
\node[modelnode,fill=yellow!20] (stu\curr2) at ([yshift=-2em]stu\prev2.south) {\rotatebox{90}{Student $2$}}; \node[modelnode,fill=yellow!20] (stu\curr2) at ([yshift=-2em]stu\prev2.south) {\rotatebox{90}{学生模型 $2$}};
\node[modelnode,fill=yellow!20] (stu\curr3) at ([yshift=-2em]stu\prev3.south) {\rotatebox{90}{Student $3$}}; \node[modelnode,fill=yellow!20] (stu\curr3) at ([yshift=-2em]stu\prev3.south) {\rotatebox{90}{学生模型 $3$}};
\node[modelnode,fill=yellow!20] (stu\curr4) at ([yshift=-2em]stu\prev4.south) {\rotatebox{90}{Student $4$}}; \node[modelnode,fill=yellow!20] (stu\curr4) at ([yshift=-2em]stu\prev4.south) {\rotatebox{90}{学生模型 $4$}};
\node[modelnode,fill=yellow!20] (stu\curr5) at ([yshift=-2em]stu\prev5.south) {\rotatebox{90}{Student $5$}}; \node[modelnode,fill=yellow!20] (stu\curr5) at ([yshift=-2em]stu\prev5.south) {\rotatebox{90}{学生模型 $5$}};
\node[modelnode] (tea\curr1) at ([yshift=-2em]tea\prev1.south) {\rotatebox{90}{\color{red!60} Teacher $1$}}; \node[modelnode] (tea\curr1) at ([yshift=-2em]tea\prev1.south) {\rotatebox{90}{\color{red!60} 教师模型 $1$}};
\node[modelnode] (tea\curr2) at ([yshift=-2em]tea\prev2.south) {\rotatebox{90}{\color{blue!60} Teacher $2$}}; \node[modelnode] (tea\curr2) at ([yshift=-2em]tea\prev2.south) {\rotatebox{90}{\color{blue!60} 教师模型 $2$}};
% ensemble labels % 集成 labels
\draw[-latex'] ([xshift=2pt]stu\curr5.east) to node [auto] {\small Ensemble} ([xshift=-2pt]tea\curr1.west); \draw[-latex'] ([xshift=2pt]stu\curr5.east) to node [auto] {\small 集成} ([xshift=-2pt]tea\curr1.west);
} }
% iteration labels % iteration labels
\node[font=\small,anchor=east,purple!80] (iterate1) at ([xshift=-1em]stu21.west) {\rotatebox{90}{Iteration $1$}}; \node[font=\small,anchor=east,purple!80] (iterate1) at ([xshift=-1em]stu21.west) {\rotatebox{90}{轮数 $1$}};
\node[font=\small,anchor=east,purple!80] (iterate2) at ([xshift=-1em]stu31.west) {\rotatebox{90}{Iteration $2$}}; \node[font=\small,anchor=east,purple!80] (iterate2) at ([xshift=-1em]stu31.west) {\rotatebox{90}{轮数 $2$}};
% distillation labels % distillation labels
\node[font=\small,anchor=south west] (distill1) at ([yshift=0.2em]iterate1.north west) {Distillation}; \node[font=\small,anchor=south west] (distill1) at ([yshift=0.8em]iterate1.north west) {知识蒸馏};
\node[font=\small,anchor=south west] (distill2) at ([yshift=0.2em]iterate2.north west) {Distillation}; \node[font=\small,anchor=south west] (distill2) at ([yshift=0.8em]iterate2.north west) {知识蒸馏};
% student groups % 学生模型 groups
\begin{pgfonlayer}{background} \begin{pgfonlayer}{background}
\node[rectangle,draw,very thick,red!60,densely dotted,inner sep=2pt,rounded corners=2pt,fill=red!20] [fit = (stu21) (stu22) (stu23) ] (group21) {}; \node[rectangle,draw,very thick,red!60,densely dotted,inner sep=2pt,rounded corners=2pt,fill=red!20] [fit = (stu21) (stu22) (stu23) ] (group21) {};
\node[rectangle,draw,very thick,blue!60,densely dotted,inner sep=2pt,rounded corners=2pt,fill=blue!20] [fit = (stu24) (stu25) ] (group22) {}; \node[rectangle,draw,very thick,blue!60,densely dotted,inner sep=2pt,rounded corners=2pt,fill=blue!20] [fit = (stu24) (stu25) ] (group22) {};
......
\begin{tikzpicture} \begin{tikzpicture}
\node[font=\scriptsize] (model) at (0,0) {Model Output:}; \node[font=\scriptsize,align=left] (model) at (0,0) {模型输出:\\(未使用\\标签平滑)};
\node[anchor=north west,font=\scriptsize] (label_smooth) at ([yshift=-1.8em]model.south west) {Label Smoothing:}; \node[anchor=north west,font=\scriptsize,align=left] (label_smooth) at ([yshift=-0.3em]model.south west) {模型输出:\\(使用标\\签平滑)};
\node[anchor=south west,font=\scriptsize] (one-hot) at ([yshift=2em]model.north west) {One-hot:}; \node[anchor=south west,font=\scriptsize] (one-hot) at ([yshift=1em]model.north west) {One-hot分布:};
%model out %model out
\node [anchor=west,minimum width=1.2em,minimum height=0.2em,fill=ublue!80,inner sep=0pt] (model_label1) at ([xshift=1.5em,yshift=-0.5em]model.east) {}; \node [anchor=west,minimum width=1.2em,minimum height=0.2em,fill=ublue!80,inner sep=0pt] (model_label1) at ([xshift=1.5em,yshift=-0.8em]model.east) {};
\node [anchor=south,font=\scriptsize] (model_w1) at (model_label1.north) {$p_{1}$}; \node [anchor=south,font=\scriptsize] (model_w1) at (model_label1.north) {$p_{1}$};
\node [anchor=south west,minimum width=1.2em,minimum height=0.1em,fill=ublue!80,inner sep=0pt] (model_label2) at (model_label1.south east) {}; \node [anchor=south west,minimum width=1.2em,minimum height=0.1em,fill=ublue!80,inner sep=0pt] (model_label2) at (model_label1.south east) {};
\node [anchor=south,font=\scriptsize] (model_w2) at (model_label2.north) {$p_{2}$}; \node [anchor=south,font=\scriptsize] (model_w2) at (model_label2.north) {$p_{2}$};
...@@ -21,7 +21,7 @@ ...@@ -21,7 +21,7 @@
\node [anchor=south west,minimum width=1.2em,minimum height=0.2em,fill=ublue!80,inner sep=0pt] (model_label7) at (model_label6.south east) {}; \node [anchor=south west,minimum width=1.2em,minimum height=0.2em,fill=ublue!80,inner sep=0pt] (model_label7) at (model_label6.south east) {};
\node [anchor=south,font=\scriptsize] (model_w8) at (model_label7.north) {$p_{7}$}; \node [anchor=south,font=\scriptsize] (model_w8) at (model_label7.north) {$p_{7}$};
%no label smooth %no label smooth
\node [anchor=west,minimum width=1.2em,minimum height=0.05em,fill=orange!50,inner sep=0pt,font=\tiny] (one_hot_label1) at ([xshift=1.5em,yshift=3em]model.east) {}; \node [anchor=west,minimum width=1.2em,minimum height=0.05em,fill=orange!50,inner sep=0pt,font=\tiny] (one_hot_label1) at ([xshift=1.5em,yshift=2.5em]model.east) {};
\node [anchor=south,font=\scriptsize] (one_hot_w1) at (one_hot_label1.north) {$0$}; \node [anchor=south,font=\scriptsize] (one_hot_w1) at (one_hot_label1.north) {$0$};
\node [anchor=south west,minimum width=1.2em,minimum height=0.05em,fill=orange!50,inner sep=0pt,font=\tiny] (one_hot_label2) at (one_hot_label1.south east) {}; \node [anchor=south west,minimum width=1.2em,minimum height=0.05em,fill=orange!50,inner sep=0pt,font=\tiny] (one_hot_label2) at (one_hot_label1.south east) {};
\node [anchor=south,font=\scriptsize] (one_hot_w2) at (one_hot_label2.north) {$0$}; \node [anchor=south,font=\scriptsize] (one_hot_w2) at (one_hot_label2.north) {$0$};
...@@ -38,7 +38,7 @@ ...@@ -38,7 +38,7 @@
\node [anchor=south,font=\scriptsize] (one_hot_w7) at (one_hot_label7.north) {$0$}; \node [anchor=south,font=\scriptsize] (one_hot_w7) at (one_hot_label7.north) {$0$};
%label smoothing %label smoothing
\node [anchor=west,minimum width=1.2em,minimum height=0.2em,fill=red!50,inner sep=0pt] (label1) at ([xshift=1.5em,yshift=-3.2em]model.east) {}; \node [anchor=west,minimum width=1.2em,minimum height=0.2em,fill=red!50,inner sep=0pt] (label1) at ([xshift=1.5em,yshift=-4.4em]model.east) {};
\node [anchor=south,font=\scriptsize] (w1) at (label1.north) {$0.1$}; \node [anchor=south,font=\scriptsize] (w1) at (label1.north) {$0.1$};
\node [anchor=south west,minimum width=1.2em,minimum height=0.2em,fill=red!50,inner sep=0pt] (label2) at (label1.south east) {}; \node [anchor=south west,minimum width=1.2em,minimum height=0.2em,fill=red!50,inner sep=0pt] (label2) at (label1.south east) {};
\node [anchor=south,font=\scriptsize] (w2) at (label2.north) {$0.1$}; \node [anchor=south,font=\scriptsize] (w2) at (label2.north) {$0.1$};
...@@ -55,18 +55,18 @@ ...@@ -55,18 +55,18 @@
\node [anchor=south,font=\scriptsize] (w8) at (label7.north) {$0.1$}; \node [anchor=south,font=\scriptsize] (w8) at (label7.north) {$0.1$};
\node[font=\scriptsize] (line1) at ([xshift=9em,yshift=-1.5em]model_label7.east) {$loss =-0.3 \log p_{3}-\sum_{i=1}^{7} 0.1 \log p_{i}$}; \node[font=\scriptsize] (line1) at ([xshift=9em,yshift=-1.5em]model_label7.east) {$Loss =-0.3 \log p_{3}-\sum_{i=1}^{7} 0.1 \log p_{i}$};
\node[font=\scriptsize] (line2) at ([xshift=5.9em,yshift=3.5em]model_label7.east) {$loss =-\log p_{3}$}; \node[font=\scriptsize] (line2) at ([xshift=5.9em,yshift=3em]model_label7.east) {$Loss =-\log p_{3}$};
\begin{pgfonlayer}{background} \begin{pgfonlayer}{background}
\node [rectangle,inner sep=0.1em,rounded corners=1pt,very thick,dotted,draw=red] [fit = (one_hot_label1) (one_hot_w3) (one_hot_label7) (model_label1) (model_label7)] (box1) {}; \node [rectangle,inner sep=0.5em,rounded corners=1pt,very thick,dotted,draw=red] [fit =(model_w3) (model_label1) (model_label7)] (box1) {};
\node [rectangle,inner sep=0.2em,rounded corners=1pt,fill=purple!10,drop shadow,draw=purple] [fit = (line2)] (box3) {}; \node [rectangle,inner sep=0.2em,rounded corners=1pt,fill=purple!10,drop shadow,draw=purple] [fit = (line2)] (box3) {};
\draw [->,dotted,very thick,red] ([yshift=-1em]box1.east) .. controls +(east:1) and +(west:1) .. (box3.west); \draw [->,dotted,very thick,red] ([yshift=-0.5em]box1.east) .. controls +(east:1) and +(west:1) .. (box3.west);
\node [rectangle,inner sep=0.1em,rounded corners=1pt,very thick,dotted,draw=ugreen] [fit = (label1) (label7) (model_label1) (model_label7) (model_w3)] (box2) {}; \node [rectangle,inner sep=0.5em,rounded corners=1pt,very thick,dotted,draw=ugreen] [fit =(w3) (label1) (label7) ] (box2) {};
\node [rectangle,inner sep=0.2em,rounded corners=1pt,fill=green!10,drop shadow,draw=ugreen] [fit = (line1)] (box4) {}; \node [rectangle,inner sep=0.2em,rounded corners=1pt,fill=green!10,drop shadow,draw=ugreen] [fit = (line1)] (box4) {};
\draw [->,dotted,very thick,ugreen] ([yshift=1em]box2.east) .. controls +(east:1) and +(west:1) .. (box4.west); \draw [->,dotted,very thick,ugreen] ([yshift=-0.5em]box2.east) .. controls +(east:1) and +(west:1) .. (box4.west);
\end{pgfonlayer} \end{pgfonlayer}
......
...@@ -43,8 +43,6 @@ ...@@ -43,8 +43,6 @@
\foreach \x/\d in {1/2em, 2/8em, 3/14em} \foreach \x/\d in {1/2em, 2/8em, 3/14em}
\node[unit,fill=yellow!20] at (0,\d) (ln_\x) {层正则化}; \node[unit,fill=yellow!20] at (0,\d) (ln_\x) {层正则化};
\node[unit,fill=green!20] at (0,24em) (sa_1) {8头自注意力:512};
\foreach \x/\d in {1/6em, 2/12em, 3/22em} \foreach \x/\d in {1/6em, 2/12em, 3/22em}
\node[draw,circle,minimum size=1em,inner sep=1pt] at (0,\d) (add_\x) {\scriptsize\bfnew{+}}; \node[draw,circle,minimum size=1em,inner sep=1pt] at (0,\d) (add_\x) {\scriptsize\bfnew{+}};
...@@ -76,7 +74,7 @@ ...@@ -76,7 +74,7 @@
\node[font=\scriptsize,align=center] at (0em, -1.5em){(b) 使用结构搜索方法优化后的 \\ Transformer编码器中若干块的结构}; \node[font=\scriptsize,align=center] at (0em, -1.5em){(b) 使用结构搜索方法优化后的 \\ Transformer编码器中若干块的结构};
\node[minimum size=0.8em,inner sep=0pt,rounded corners=1pt,draw,fill=blue!20] (act) at (5.5em, 24em){}; \node[minimum size=0.8em,inner sep=0pt,rounded corners=1pt,draw,fill=blue!20] (act) at (5.5em, 20em){};
\node[anchor=west,font=\footnotesize] at ([xshift=0.1em]act.east){激活函数}; \node[anchor=west,font=\footnotesize] at ([xshift=0.1em]act.east){激活函数};
\node[anchor=north,minimum size=0.8em,inner sep=0pt,rounded corners=1pt,draw,fill=yellow!20] (nor) at ([yshift=-0.6em]act.south){}; \node[anchor=north,minimum size=0.8em,inner sep=0pt,rounded corners=1pt,draw,fill=yellow!20] (nor) at ([yshift=-0.6em]act.south){};
\node[anchor=west,font=\footnotesize] at ([xshift=0.1em]nor.east){层正则化}; \node[anchor=west,font=\footnotesize] at ([xshift=0.1em]nor.east){层正则化};
......
...@@ -6829,6 +6829,106 @@ year={2012} ...@@ -6829,6 +6829,106 @@ year={2012}
publisher = {International Conference on Learning Representations}, publisher = {International Conference on Learning Representations},
year = {2017} year = {2017}
} }
@inproceedings{DBLP:journals/nature/LeCunBH15,
author = {Yann LeCun and
Yoshua Bengio and
Geoffrey E. Hinton},
title = {Deep learning},
publisher = {Nature},
volume = {521},
number = {7553},
pages = {436--444},
year = {2015}
}
@inproceedings{DBLP:journals/corr/abs-1207-0580,
author = {Geoffrey E. Hinton and
Nitish Srivastava and
Alex Krizhevsky and
Ilya Sutskever and
Ruslan Salakhutdinov},
title = {Improving neural networks by preventing co-adaptation of feature detectors},
publisher = {CoRR},
volume = {abs/1207.0580},
year = {2012}
}
@inproceedings{DBLP:journals/tslp/ZhuM12,
author = {Jingbo Zhu and
Matthew Y. Ma},
title = {Uncertainty-based active learning with instability estimation for
text classification},
publisher = {ACM Transactions on Speech and Language Processing},
volume = {8},
number = {4},
pages = {5:1--5:21},
year = {2012}
}
@inproceedings{DBLP:conf/coling/ZhuWYT08,
author = {Jingbo Zhu and
Huizhen Wang and
Tianshun Yao and
Benjamin K. Tsou},
title = {Active Learning with Sampling by Uncertainty and Density for Word
Sense Disambiguation and Text Classification},
publisher = {International Conference on Computational Linguistics},
pages = {1137--1144},
year = {2008}
}
@inproceedings{DBLP:conf/medprai/SurendranathJ18,
author = {Ajay Surendranath and
Dinesh Babu Jayagopi},
title = {Curriculum Learning for Depth Estimation with Deep Convolutional Neural
Networks},
publisher = {Mediterranean Conference on Pattern Recognition and Artificial Intelligence},
pages = {95--100},
year = {2018}
}
@inproceedings{DBLP:conf/icml/BengioLCW09,
author = {Yoshua Bengio and
J{\'{e}}r{\^{o}}me Louradour and
Ronan Collobert and
Jason Weston},
title = {Curriculum learning},
series = {{ACM} International Conference Proceeding Series},
volume = {382},
pages = {41--48},
publisher = {International Conference on Machine Learning}
}
@inproceedings{DBLP:journals/corr/abs-2002-11794,
author = {Zhuohan Li and
Eric Wallace and
Sheng Shen and
Kevin Lin and
Kurt Keutzer and
Dan Klein and
Joseph E. Gonzalez},
title = {Train Large, Then Compress: Rethinking Model Size for Efficient Training
and Inference of Transformers},
publisher = {CoRR},
volume = {abs/2002.11794},
year = {2020}
}
@inproceedings{kim-rush-2016-sequence,
author = {Yoon Kim and
Alexander M. Rush},
title = {Sequence-Level Knowledge Distillation},
pages = {1317--1327},
publisher = {Conference on Empirical Methods in Natural Language Processing},
year = {2016}
}
@inproceedings{Jiao2020TinyBERTDB,
author = {Xiaoqi Jiao and
Yichun Yin and
Lifeng Shang and
Xin Jiang and
Xiao Chen and
Linlin Li and
Fang Wang and
Qun Liu},
title = {TinyBERT: Distilling {BERT} for Natural Language Understanding},
pages = {4163--4174},
publisher={Conference on Empirical Methods in Natural Language Processing},
year={2020}
}
%%%%% chapter 13------------------------------------------------------ %%%%% chapter 13------------------------------------------------------
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
......
Markdown 格式
0%
您添加了 0 到此讨论。请谨慎行事。
请先完成此评论的编辑!
注册 或者 后发表评论