合并分支 'shanweiqiao' 到 'caorunzhe'

13章文字和图片查看合并请求 !800

合并分支 'shanweiqiao' 到 'caorunzhe'
13章文字和图片查看合并请求 !800
61cd2dd7 · 单韦乔 · 633f9701 · d7774f37 · 61cd2dd7 · 61cd2dd7
Commit 61cd2dd7 authored Jan 06, 2021 by 单韦乔
--- a/Chapter13/Figures/figure-a-predefined-course-planning.tex
+++ b/Chapter13/Figures/figure-a-predefined-course-planning.tex

 \begin{tikzpicture}

-\tikzstyle{node1}=[inner sep=0mm,minimum height=1em,minimum width=3em,fill=ugreen!10!blue!10]
-\tikzstyle{node2}=[inner sep=0mm,minimum height=1em,minimum width=3em,fill=ugreen!20!blue!20]
-\tikzstyle{node3}=[inner sep=0mm,minimum height=1em,minimum width=3em,fill=ugreen!30!blue!30]
-\tikzstyle{node4}=[inner sep=0mm,minimum height=1em,minimum width=3em,fill=ugreen!40!blue!40]
-\tikzstyle{node5}=[inner sep=0mm,minimum height=1em,minimum width=3em,fill=ugreen!50!blue!50]
+\tikzstyle{node1}=[inner sep=0mm,minimum height=1.5em,minimum width=3em,fill=ugreen!10!blue!10]
+\tikzstyle{node2}=[inner sep=0mm,minimum height=1.5em,minimum width=3em,fill=ugreen!20!blue!20]
+\tikzstyle{node3}=[inner sep=0mm,minimum height=1.5em,minimum width=3em,fill=ugreen!30!blue!30]
+\tikzstyle{node4}=[inner sep=0mm,minimum height=1.5em,minimum width=3em,fill=ugreen!40!blue!40]
+\tikzstyle{node5}=[inner sep=0mm,minimum height=1.5em,minimum width=3em,fill=ugreen!50!blue!50]

 \begin{scope}
-
-\node[anchor=north west] (l1) at (0,0) {易};
-\node[anchor=west] (l2) at ([xshift=10.3em,yshift=0em]l1.east) {难};
-
-\draw [->,thick] ([xshift=-2em,yshift=-1em]l1.south) --  ([xshift=2em,yshift=-1em]l2.south);
-
-\node[anchor=north,node1] (c1) at ([xshift=0em,yshift=-2em]l1.south) {};
-\node[anchor=west,node2] (c2) at ([xshift=0em,yshift=0em]c1.east) {};
-\node[anchor=west,node3] (c3) at ([xshift=0em,yshift=0em]c2.east) {};
-\node[anchor=west,node4] (c4) at ([xshift=0em,yshift=0em]c3.east) {};
-\node[anchor=west,node5] (c5) at ([xshift=0em,yshift=0em]c4.east) {};
-
-\node[anchor=north] (lb1) at ([xshift=0em,yshift=-1.5em]c3.south) {(a)样本难易程度图示};
-
-\end{scope}
-
-\begin{scope}[yshift = -1.7in]
 \foreach \i / \j / \z in 
-		{0/0/node1, 1/0/node1, 2/0/node1, 3/0/node1, 4/0/node1, 5/0/node1, 6/0/node1,
-		 1/1/node2, 2/1/node2, 3/1/node2, 4/1/node2, 5/1/node2, 6/1/node2,
-		 2/2/node3, 3/2/node3, 4/2/node3, 5/2/node3, 6/2/node3,
-		 3/3/node4, 4/3/node4, 5/3/node4, 6/3/node4,
-		 4/4/node5, 5/4/node5, 6/4/node5,
+		{0/0/node1, 
+		 1/0/node1, 1/1/node2,
+		 2/0/node1, 2/1/node2, 2/2/node3,
+		 3/0/node1, 3/1/node2, 3/2/node3, 3/3/node4,
+		 4/0/node1, 4/1/node2, 4/2/node3, 4/3/node4, 4/4/node5,
+		 5/0/node1, 5/1/node2, 5/2/node3, 5/3/node4, 5/4/node5,
 		}
-	\node[anchor=north west,\z,draw=white](n\i\j) at (1.0*3em*\j + 0*0em,-1.0*1em*\i + 0*0em){};
+	\node[anchor=south west,\z,draw=white](n\i\j) at (1.0*3.2em*\i + 0*0em,1.5*1em*\j + 0*0em){};
+
+	\node[anchor=south west,node1,draw=white](nc) at (0,0){};

-	\node[anchor=north west,node1,draw=white](nc) at (0,0){};
+\draw [->,very thick] ([xshift=-1em,yshift=-1em]n00.south west) --  ([xshift=19.7em,yshift=-1em]n00.south west);
+\draw [->,very thick] ([xshift=-1em,yshift=-1em]n00.south west) --  ([xshift=-1em,yshift=9em]n00.south west);

-\draw [->,very thick] ([xshift=-1em,yshift=1em]n00.north west) --  ([xshift=16em,yshift=1em]n00.north west);
-\draw [->,very thick] ([xshift=-1em,yshift=1em]n00.north west) --  ([xshift=-1em,yshift=-9em]n00.north west);
+\node[anchor=east] (x1) at ([xshift=-1em,yshift=8em]n00.south west) {数据块};
+\node[anchor=north west] (y1) at ([xshift=14.5em,yshift=-1.3em]n00.south west) {训练时长};
+\node[anchor=west,font=\small] (t1) at ([xshift=0.5em,yshift=0em]n52.east) {$\cdots$};

-\node[anchor=west] (x1) at ([xshift=12em,yshift=2em]n00.north west) {数据块};
-\node[anchor=east] (y1) at ([xshift=-1em,yshift=-7.5em]n00.north west) {轮次};
-\node[anchor=west,font=\small] (t1) at ([xshift=0em,yshift=-1em]n60.south west) {$\ldots$直到模型收敛};
+\node[anchor=west,node1,minimum width=0.5em] (c1) at ([xshift=3em,yshift=-2.5em]t1.east) {};
+\node[anchor=south,node2,minimum width=0.5em] (c2) at ([xshift=0em,yshift=0em]c1.north) {};
+\node[anchor=south,node3,minimum width=0.5em] (c3) at ([xshift=0em,yshift=0em]c2.north) {};
+\node[anchor=south,node4,minimum width=0.5em] (c4) at ([xshift=0em,yshift=0em]c3.north) {};
+\node[anchor=south,node5,minimum width=0.5em] (c5) at ([xshift=0em,yshift=0em]c4.north) {};

-\node[anchor=north] (lb2) at ([xshift=0em,yshift=-3em]n62.south) {(b)不同训练阶段使用的数据};
+\node[anchor=north] (l1) at ([xshift=0em,yshift=-0.3em]c1.south) {易};
+\node[anchor=south] (l2) at ([xshift=0em,yshift=0.3em]c5.north) {难};

 \end{scope}


--- a/Chapter13/Figures/figure-bpe.tex
+++ b/Chapter13/Figures/figure-bpe.tex
--- a/Chapter13/Figures/figure-computation-of-dropout.tex
+++ b/Chapter13/Figures/figure-computation-of-dropout.tex
@@ -4,7 +4,7 @@
 \def\nodespace{1}
 \def\picturespace{0.8}

-\tikzstyle{neuronnode} = [minimum size=1.8em,circle,draw,very thick,ublue,inner sep=0pt, fill=white,align=center]
+\tikzstyle{neuronnode} = [minimum size=1.8em,circle,draw=ublue,very thick,inner sep=0pt, fill=white,align=center]

 %standard
 \node [neuronnode] (neuron_b) at (0,0) {\scriptsize{$b^{l}$}};
@@ -14,9 +14,9 @@
 \node [neuronnode] (neuron_z) at (1.2 * \nodespace,-1.5 * \neuronsep) {\scriptsize{$z_{i}^{l+1}$}};
 \node [neuronnode] (neuron_y') at (2.4 * \nodespace,-1.5 * \neuronsep) {\scriptsize{$x_{i}^{l+1}$}};

-\node [anchor=north,ublue] (standard) at ([yshift=-4em]neuron_z.south) {\scriptsize{标准网络}};
-\node [ublue] (standard) at ([xshift=-1em]neuron_z.west) {\scriptsize{$\mathbf{w}_{i}^{l}$}};
-\node [ublue] (standard) at ([xshift=0.6em,yshift=0.3em]neuron_z.east) {\scriptsize{$f$}};
+\node [anchor=north] (standard) at ([yshift=-4em]neuron_z.south) {\scriptsize{标准网络}};
+\node [] (standard) at ([xshift=-1em]neuron_z.west) {\scriptsize{$\mathbf{w}_{i}^{l}$}};
+\node [] (standard) at ([xshift=0.6em,yshift=0.3em]neuron_z.east) {\scriptsize{$f$}};

 \draw [->,line width=0.3mm] (neuron_b.east) -- (neuron_z.west);
 \draw [->,line width=0.3mm] (neuron_y3.east) -- (neuron_z.west);
@@ -40,9 +40,9 @@
 \node [neuronnode] (drop_neuron_r2) at (4.4*\nodespace,-1.5*\neuronsep) {\scriptsize{$r_{2}^{l}$}};
 \node [neuronnode] (drop_neuron_r1) at (4.4*\nodespace,-2.5*\neuronsep) {\scriptsize{$r_{1}^{l}$}};

-\node [anchor=north,ublue] (standard) at ([xshift=2em,yshift=-4em]drop_neuron_z.south) {\scriptsize{应用Dropout后的网络}};
-\node [ublue] (standard) at ([xshift=-1em]drop_neuron_z.west) {\scriptsize{$\mathbf{w}_{i}^{l}$}};
-\node [ublue] (standard) at ([xshift=0.6em,yshift=0.3em]drop_neuron_z.east) {\scriptsize{$f$}};
+\node [anchor=north] (standard) at ([xshift=2em,yshift=-4em]drop_neuron_z.south) {\scriptsize{应用Dropout后的网络}};
+\node [] (standard) at ([xshift=-1em]drop_neuron_z.west) {\scriptsize{$\mathbf{w}_{i}^{l}$}};
+\node [] (standard) at ([xshift=0.6em,yshift=0.3em]drop_neuron_z.east) {\scriptsize{$f$}};
 %structure
 \draw [->,line width=0.3mm] (drop_neuron_b.east) -- (drop_neuron_z.west);
 \draw [->,line width=0.3mm] (drop_neuron_y3'.east) -- (drop_neuron_z.west);

--- a/Chapter13/Figures/figure-difference-between-word-level-and-sequence-level-in-knowledge-distillation.tex
+++ b/Chapter13/Figures/figure-difference-between-word-level-and-sequence-level-in-knowledge-distillation.tex
@@ -54,7 +54,7 @@
                    \draw [decorate,decoration={brace}] ([yshift=0.1cm]prob1.north west) to node [midway,above,font=\small] {学习目标(Teacher输出)} ([yshift=0.1cm]prob4.north east);
                
                    % Vocab
-                    \node [word,anchor=center] () at ([xshift=-0.9cm]prob11.center) {EOS};
+                    \node [word,anchor=center] () at ([xshift=-0.9cm]prob11.center) {$\langle$eos$\rangle$};
                    \node [word,anchor=center] () at ([xshift=-0.9cm]prob21.center) {I};
                    \node [word,anchor=center] () at ([xshift=-0.9cm]prob31.center) {am};
                    \node [word,anchor=center] () at ([xshift=-0.9cm]prob41.center) {fine};
@@ -72,7 +72,7 @@
                        \draw [-latex,thick] (prob\i.south) to ([yshift=-0.5cm]prob\i.south);
                
                    % Input
-                    \node [word,anchor=south] (input1) at ([yshift=-1.8cm]prob1.south) {EOS};
+                    \node [word,anchor=south] (input1) at ([yshift=-1.8cm]prob1.south) {$\langle$eos$\rangle$};
                    \node [word,anchor=south] (input2) at ([yshift=-1.8cm]prob2.south) {I};
                    \node [word,anchor=south] (input3) at ([yshift=-1.8cm]prob3.south) {am};
                    \node [word,anchor=south] (input4) at ([yshift=-1.8cm]prob4.south) {fine};
@@ -137,7 +137,7 @@
                    \draw [decorate,decoration={brace}] ([yshift=0.1cm]prob1.north west) to node [midway,above,font=\small] {学习目标(Teacher输出)} ([yshift=0.1cm]prob4.north east);
                
                    % Vocab
-                    \node [word,anchor=center] () at ([xshift=-0.9cm]prob11.center) {EOS};
+                    \node [word,anchor=center] () at ([xshift=-0.9cm]prob11.center) {$\langle$eos$\rangle$};
                    \node [word,anchor=center] () at ([xshift=-0.9cm]prob21.center) {I};
                    \node [word,anchor=center] () at ([xshift=-0.9cm]prob31.center) {am};
                    \node [word,anchor=center] () at ([xshift=-0.9cm]prob41.center) {fine};
@@ -155,7 +155,7 @@
                        \draw [-latex,thick] (prob\i.south) to ([yshift=-0.5cm]prob\i.south);
                
                    % Input
-                    \node [word,anchor=south] (input1) at ([yshift=-1.8cm]prob1.south) {EOS};
+                    \node [word,anchor=south] (input1) at ([yshift=-1.8cm]prob1.south) {$\langle$eos$\rangle$};
                    \node [word,anchor=south] (input2) at ([yshift=-1.8cm]prob2.south) {I};
                    \node [word,anchor=south] (input3) at ([yshift=-1.8cm]prob3.south) {am};
                    \node [word,anchor=center] (input4) at ([xshift=1cm]input3.center) {good};

--- a/Chapter13/Figures/figure-exposure-bias-t.tex
+++ b/Chapter13/Figures/figure-exposure-bias-t.tex
--- a/Chapter13/Figures/figure-exposure-bias.tex
+++ b/Chapter13/Figures/figure-exposure-bias.tex
--- a/Chapter13/Figures/figure-label-smoothing.tex
+++ b/Chapter13/Figures/figure-label-smoothing.tex

 \begin{tikzpicture}
 	
-	\node[font=\scriptsize,align=left] (model) at (0,0) {模型输出:\\（未使用\\标签平滑）};
-	\node[anchor=north west,font=\scriptsize,align=left] (label_smooth) at ([yshift=-0.3em]model.south west) {模型输出:\\（使用标\\签平滑）};
-	\node[anchor=south west,font=\scriptsize] (one-hot) at ([yshift=1em]model.north west) {One-hot分布:};
+	\node[font=\scriptsize,align=left] (model) at (0,0) {模型输出:};
+	\node[anchor=north west,font=\scriptsize,align=left] (label_smooth) at ([yshift=-1em]model.south west) {学习目标:\\（使用标\\签平滑）};
+	\node[anchor=south west,font=\scriptsize,align=left] (one-hot) at ([yshift=1em]model.north west) {标准答案:\\（未使用\\标签平滑）};
 	%model out
-	\node [anchor=west,minimum width=1.2em,minimum height=0.2em,fill=ublue!80,inner sep=0pt] (model_label1) at ([xshift=1.5em,yshift=-0.8em]model.east) {};
+	\node [anchor=west,minimum width=1.2em,minimum height=0.2em,fill=ublue!80,inner sep=0pt] (model_label1) at ([xshift=1.5em,yshift=-0.4em]model.east) {};
    \node [anchor=south,font=\scriptsize] (model_w1) at (model_label1.north) {$p_{1}$};
    \node [anchor=south west,minimum width=1.2em,minimum height=0.1em,fill=ublue!80,inner sep=0pt] (model_label2) at (model_label1.south east) {};
    \node [anchor=south,font=\scriptsize] (model_w2) at (model_label2.north) {$p_{2}$};
@@ -59,14 +59,14 @@
    \node[font=\scriptsize] (line2) at ([xshift=5.9em,yshift=3em]model_label7.east) {$Loss =-\log p_{3}$};

    \begin{pgfonlayer}{background}
-        \node [rectangle,inner sep=0.5em,rounded corners=1pt,very thick,dotted,draw=red] [fit =(model_w3) (model_label1) (model_label7)] (box1) {};
+        \node [rectangle,inner sep=0.5em,rounded corners=1pt,very thick,dotted,draw=red] [fit =(model_w3) (model_label1) (model_label7) (one_hot_w3)] (box1) {};

        \node [rectangle,inner sep=0.2em,rounded corners=1pt,fill=purple!10,drop shadow,draw=purple] [fit = (line2)] (box3) {}; 
-        \draw [->,dotted,very thick,red] ([yshift=-0.5em]box1.east) .. controls +(east:1) and +(west:1) .. (box3.west);
+        \draw [->,dotted,very thick,red] ([yshift=1em]box1.east) .. controls +(east:1) and +(west:1) .. (box3.west);
        
-        \node [rectangle,inner sep=0.5em,rounded corners=1pt,very thick,dotted,draw=ugreen] [fit =(w3) (label1) (label7) ] (box2) {};
+        \node [rectangle,inner sep=0.7em,rounded corners=1pt,very thick,dotted,draw=ugreen] [fit =(w3) (label1) (label7) (model_w3)] (box2) {};
        \node [rectangle,inner sep=0.2em,rounded corners=1pt,fill=green!10,drop shadow,draw=ugreen] [fit = (line1)] (box4) {};
-        \draw [->,dotted,very thick,ugreen] ([yshift=-0.5em]box2.east) .. controls +(east:1) and +(west:1) .. (box4.west);
+        \draw [->,dotted,very thick,ugreen] ([yshift=-1.5em]box2.east) .. controls +(east:1) and +(west:1) .. (box4.west);
        
    \end{pgfonlayer}


--- a/Chapter13/chapter13.tex
+++ b/Chapter13/chapter13.tex
--- a/bibliography.bib
+++ b/bibliography.bib
@@ -6619,13 +6619,13 @@ author    = {Yoshua Bengio and
  year      = {2017}
 }
 @inproceedings{DBLP:journals/corr/SuGMRUVWY16a,
-  author    = {Pei{-}Hao Su and
+  author    = {Pei-Hao Su and
               Milica Gasic and
               Nikola Mrksic and
-               Lina Maria Rojas{-}Barahona and
+               Lina Maria Rojas-Barahona and
               Stefan Ultes and
               David Vandyke and
-               Tsung{-}Hsien Wen and
+               Tsung-Hsien Wen and
               Steve J. Young},
  title     = {Continuously Learning Neural Dialogue Management},
  publisher   = {CoRR},
@@ -6661,7 +6661,7 @@ author    = {Yoshua Bengio and
               Fei Tian and
               Tao Qin and
               Jianhuang Lai and
-               Tie{-}Yan Liu},
+               Tie-Yan Liu},
  title     = {A Study of Reinforcement Learning for Neural Machine Translation},
  pages     = {3612--3621},
  publisher = {Annual Meeting of the Association for Computational Linguistics},
@@ -9213,6 +9213,36 @@ author    = {Zhuang Liu and
  publisher = {{IEEE} Conference on Computer Vision and Pattern Recognition},
  year      = {2017}
 }
+@inproceedings{DBLP:conf/coling/XuHJFWHJXZ20,
+  author    = {Chen Xu and
+               Bojie Hu and
+               Yufan Jiang and
+               Kai Feng and
+               Zeyang Wang and
+               Shen Huang and
+               Qi Ju and
+               Tong Xiao and
+               Jingbo Zhu},
+  title     = {Dynamic Curriculum Learning for Low-Resource Neural Machine Translation},
+  pages     = {3977--3989},
+  publisher = {International Conference on Computational Linguistics},
+  year      = {2020}
+}
+@inproceedings{DBLP:conf/acml/WuXTZQLL18,
+  author    = {Lijun Wu and
+               Yingce Xia and
+               Fei Tian and
+               Li Zhao and
+               Tao Qin and
+               Jianhuang Lai and
+               Tie-Yan Liu},
+  title     = {Adversarial Neural Machine Translation},
+  series    = {Proceedings of Machine Learning Research},
+  volume    = {95},
+  pages     = {534--549},
+  publisher = {Asian Conference on Machine Learning},
+  year      = {2018}
+}
 %%%%% chapter 15------------------------------------------------------
 %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
 %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%