合并分支 'caorunzhe' 到 'master'

Caorunzhe 查看合并请求 !1064

合并分支 'caorunzhe' 到 'master'
Caorunzhe 查看合并请求 !1064
e329e9d6 · 曹润柘 · c90d70e8 · 6e13b60f · e329e9d6 · e329e9d6
Commit e329e9d6 authored Mar 23, 2021 by 曹润柘
--- a/Chapter13/Figures/figure-unk-of-bpe.tex
+++ b/Chapter13/Figures/figure-unk-of-bpe.tex
@@ -8,7 +8,7 @@
 		};
 		\node[font=\footnotesize,anchor=north] (l1) at ([xshift=0em,yshift=-1em]top.south) {(a) 符号合并表};
-		\node[font=\scriptsize,anchor=west] (n1) at ([xshift=-4.5em,yshift=-6em]top.west) {l\ o\ w\ e\ r\ $<$e$>$};
+		\node[font=\scriptsize,anchor=west] (n1) at ([xshift=-3em,yshift=-6em]top.west) {l\ o\ w\ e\ r\ $<$e$>$};
 		\node[font=\scriptsize,anchor=west] (n2) at ([xshift=2.6em]n1.east) {l\ o\ w\ e\ {\red r$<$e$>$}};
 		\node[font=\scriptsize,anchor=west] (n3) at ([xshift=2.6em]n2.east) {{\red lo}\ w\ e\ r$<$e$>$};
 		\node[font=\scriptsize,anchor=west] (n4) at ([xshift=2.6em]n3.east) {{\red low}\ e\ r$<$e$>$};
@@ -24,7 +24,7 @@
 		\node[font=\scriptsize,anchor=north east] (s1) at ([yshift=0.1em]n1.north west) {样例1:};
 		\node[font=\scriptsize,anchor=north east] (s1) at ([yshift=0.1em]t1.north west) {样例2:};
-		\node[font=\footnotesize,anchor=north] (l2) at ([xshift=2em,yshift=-1em]t3.south) {(b) 合并样例};
+		\node[font=\footnotesize,anchor=north] (l2) at ([xshift=0.5em,yshift=-1em]t3.south) {(b) 合并样例};
 		\draw[->,thick](n1.east) -- (n2.west);
 		\draw[->,thick](n2.east) -- (n3.west);

--- a/Chapter15/Figures/figure-introducing-rnn-mechanism-into-transformer.tex
+++ b/Chapter15/Figures/figure-introducing-rnn-mechanism-into-transformer.tex
@@ -38,7 +38,7 @@
 \end{scope}
-\begin{scope}[xshift=1.85in]
+\begin{scope}[xshift=1.75in]
 \node [anchor=west,stnode] (r1) at (0, 0) {第1层};
 \node [anchor=south,tnode] (r2) at ([xshift=0em,yshift=1em]r1.north){第2层};
@@ -60,11 +60,11 @@
 \draw[->,thick] ([xshift=0em,yshift=0em]r3.north)--([xshift=0em,yshift=0em]r4.south);
 \draw[->,thick] ([xshift=0em,yshift=0em]r4.north)--([xshift=0em,yshift=0em]output.south);
-\node [anchor=north,font=\small] (label) at ([xshift=-1.5em,yshift=-0.7em]input.south) {(b)原始Transformer模型};
+\node [anchor=north,font=\small] (label) at ([xshift=-1.7em,yshift=-0.7em]input.south) {(b)原始Transformer模型};
 \end{scope}
-\begin{scope}[xshift=3.9in]
+\begin{scope}[xshift=3.85in]
 \node [anchor=west,stnode] (r1) at (0, 0) {第1层};
 \node [anchor=south,stnode] (r2) at ([xshift=0em,yshift=1em]r1.north){第2层};
@@ -89,7 +89,7 @@
 \draw[->,thick] ([xshift=0em,yshift=0em]wr2.east)--([xshift=0em,yshift=0em]r2.west);
 \draw[->,thick] ([xshift=0em,yshift=0em]wr3.east)--([xshift=0em,yshift=0em]r4.west);
-\node [anchor=north,font=\small,align=left] (label) at ([xshift=-3em,yshift=-0.7em]input.south) {(c)共享权重的\\ Transformer模型};
+\node [anchor=north,font=\small] (label) at ([xshift=-3em,yshift=-0.7em]input.south) {(c)共享权重的Transformer模型};
 \end{scope}

--- a/Chapter15/Figures/figure-light-weight-transformer-module.tex
+++ b/Chapter15/Figures/figure-light-weight-transformer-module.tex
@@ -14,7 +14,7 @@
 \node [anchor=south west,manode] (a1) at ([xshift=0em,yshift=1em]e1.north west){Attention};
 \node [anchor=south east,manode] (c1) at ([xshift=0em,yshift=1em]e1.north east){Conv};
 \node [anchor=south west,ebnode] (e2) at ([xshift=0em,yshift=1em]a1.north west){Embedding};
-\node [anchor=south,draw,circle,inner sep=4pt] (add1) at ([xshift=0em,yshift=0.5em]e2.north){};
+\node [anchor=south,draw,circle,inner sep=4pt,thick] (add1) at ([xshift=0em,yshift=0.5em]e2.north){};
 \node [anchor=south,ffnnode] (f2) at ([xshift=0em,yshift=0.5em]add1.north){FFN};
 \node [anchor=south,inner sep=0mm,minimum height=1.8em] (op) at ([xshift=0em,yshift=0.5em]f2.north){output};
@@ -29,8 +29,8 @@
 \draw[->,thick] ([xshift=0em,yshift=0em]f2.north)--([xshift=0em,yshift=0.3em]op.south);
-\draw[-] ([xshift=0em,yshift=0em]add1.west)--([xshift=-0em,yshift=0em]add1.east);
+\draw[-,thick] ([xshift=0em,yshift=0em]add1.west)--([xshift=-0em,yshift=0em]add1.east);
-\draw[-] ([xshift=0em,yshift=0em]add1.south)--([xshift=-0em,yshift=-0em]add1.north);
+\draw[-,thick] ([xshift=0em,yshift=0em]add1.south)--([xshift=-0em,yshift=-0em]add1.north);
 \draw[->,thick,rectangle,rounded corners=5pt] ([xshift=0em,yshift=0.5em]f1.north)--([xshift=-6em,yshift=0.5em]f1.north)--([xshift=-5.45em,yshift=0em]add1.west)--([xshift=0em,yshift=0em]add1.west);

--- a/Chapter15/Figures/figure-multi-branch-attention-model.tex
+++ b/Chapter15/Figures/figure-multi-branch-attention-model.tex
@@ -10,10 +10,10 @@
 \begin{scope}[]
 \node [anchor=east,circle,fill=black,inner sep = 2pt] (n1) at (-0, 0) {};
-\node [anchor=west,draw,circle,inner sep=5pt] (n2) at ([xshift=13em,yshift=0em]n1.east){};
+\node [anchor=west,draw,circle,inner sep=5pt,thick] (n2) at ([xshift=13em,yshift=0em]n1.east){};
 \node [anchor=west,lnnode] (n3) at ([xshift=1.5em,yshift=0em]n2.east){LN};
 \node [anchor=west,circle,fill=black,inner sep=2pt] (n4) at ([xshift=1.5em,yshift=0em]n3.east){};
-\node [anchor=west,draw,circle,inner sep=5pt] (n5) at ([xshift=5em,yshift=0em]n4.east){};
+\node [anchor=west,draw,circle,inner sep=5pt,thick] (n5) at ([xshift=5em,yshift=0em]n4.east){};
 \node [anchor=west,lnnode] (n6) at ([xshift=1.5em,yshift=0em]n5.east){LN};
 \node [anchor=west,manode] (a1) at ([xshift=1.5em,yshift=2em]n1.east){Multi-Head Attention};

--- a/Chapter15/Figures/figure-post-norm-vs-pre-norm.tex
+++ b/Chapter15/Figures/figure-post-norm-vs-pre-norm.tex
@@ -7,14 +7,14 @@
 \node [anchor=east] (x1) at (-0.5em, 0) {$\mathbi{x}_l$};
 \node [anchor=west,draw,fill=red!20,inner xsep=5pt,rounded corners=2pt,thick] (F1) at ([xshift=4em]x1.east){\small{$F$}};
-\node [anchor=west,circle,draw,minimum size=1em] (n1) at ([xshift=4em]F1.east) {};
+\node [anchor=west,circle,draw,minimum size=1em,thick] (n1) at ([xshift=4em]F1.east) {};
 \node [anchor=west,draw,fill=green!20,inner xsep=5pt,rounded corners=2pt,thick] (ln1) at ([xshift=4em]n1.east){\small{\textrm{LN}}};
 \node [anchor=west] (x2) at ([xshift=4em]ln1.east) {$\mathbi{x}_{l+1}$};
 \node [anchor=north] (x3) at ([yshift=-5em]x1.south) {$\mathbi{x}_l$};
 \node [anchor=west,draw,fill=green!20,inner xsep=5pt,rounded corners=2pt,thick] (F2) at ([xshift=4em]x3.east){\small{\textrm{LN}}};
 \node [anchor=west,draw,fill=red!20,inner xsep=5pt,rounded corners=2pt,thick] (ln2) at ([xshift=4em]F2.east){\small{$F$}};
-\node [anchor=west,circle,draw,,minimum size=1em] (n2) at ([xshift=4em]ln2.east){};
+\node [anchor=west,circle,draw,,minimum size=1em,thick] (n2) at ([xshift=4em]ln2.east){};
 \node [anchor=west] (x4) at ([xshift=4em]n2.east) {$\mathbi{x}_{l+1}$};
 \draw[->, line width=1pt] ([xshift=-0.1em]x1.east)--(F1.west);
@@ -27,10 +27,10 @@
 \draw[->, line width=1pt] (n2.east)--(x4.west);
 \draw[->,rounded corners,line width=1pt] ([yshift=-0.2em]x1.north) -- ([yshift=1em]x1.north) -- ([yshift=1.4em]n1.north) -- (n1.north);
 \draw[->,rounded corners,line width=1pt] ([yshift=-0.2em]x3.north) -- ([yshift=1em]x3.north) -- ([yshift=1.4em]n2.north) -- (n2.north);
-\draw[-] (n1.west)--(n1.east);
+\draw[-,thick] (n1.west)--(n1.east);
-\draw[-] (n1.north)--(n1.south);
+\draw[-,thick] (n1.north)--(n1.south);
-\draw[-] (n2.west)--(n2.east);
+\draw[-,thick] (n2.west)--(n2.east);
-\draw[-] (n2.north)--(n2.south);
+\draw[-,thick] (n2.north)--(n2.south);
 \node [anchor=south] (k1) at ([yshift=-0.1em]x1.north){};
 \node [anchor=south] (k2) at ([yshift=-0.1em]x3.north){};

--- a/Chapter15/Figures/figure-sublayer-skip.tex
+++ b/Chapter15/Figures/figure-sublayer-skip.tex
@@ -8,11 +8,11 @@
 \node [anchor=east] (x1) at (-0.5em, 0) {$\mathbi{x}_l$};
 \node [anchor=west,draw,fill=red!30,inner xsep=5pt,rounded corners=2pt,draw,thick] (ln1) at ([xshift=1em]x1.east){\small{\textrm{LN}}};
 \node [anchor=west,draw,fill=green!30,inner xsep=5pt,rounded corners=2pt,draw,thick] (f1) at ([xshift=0.6em]ln1.east){\small{$F$}};
-\node [anchor=west,circle,draw,,minimum size=1em] (n1) at ([xshift=3em]f1.east){};
+\node [anchor=west,circle,draw,,minimum size=1em,thick] (n1) at ([xshift=3em]f1.east){};
 \node [anchor=west] (x2) at ([xshift=1em]n1.east) {$\mathbi{x}_{l+1}$};
 \node [anchor=west,draw,fill=red!30,inner xsep=5pt,rounded corners=2pt,draw,thick] (ln12) at ([xshift=1em]x2.east){\small{\textrm{LN}}};
 \node [anchor=west,draw,fill=green!30,inner xsep=5pt,rounded corners=2pt,draw,thick] (f12) at ([xshift=0.6em]ln12.east){\small{$F$}};
-\node [anchor=west,circle,draw,,minimum size=1em] (n12) at ([xshift=3em]f12.east){};
+\node [anchor=west,circle,draw,,minimum size=1em,thick] (n12) at ([xshift=3em]f12.east){};
 \node [anchor=west] (x22) at ([xshift=1em]n12.east) {$\mathbi{x}_{l+2}$};
 \node [anchor=north] (x3) at ([yshift=-5em]x1.south) {$\mathbi{x}_l$};
@@ -20,13 +20,13 @@
 \node [anchor=west,draw,fill=green!30,inner xsep=5pt,rounded corners=2pt,draw,thick] (f2) at ([xshift=0.6em]ln2.east){\small{$F$}};
 \node [anchor=west,minimum size=1em] (p1) at ([xshift=1em]f2.east){};
 \node [anchor=north] (m1) at ([yshift=0.6em]p1.south){\footnotesize{\red{Mask=1}}};
-\node [anchor=west,circle,draw,,minimum size=1em] (n2) at ([xshift=3em]f2.east){};
+\node [anchor=west,circle,draw,,minimum size=1em,thick] (n2) at ([xshift=3em]f2.east){};
 \node [anchor=west] (x4) at ([xshift=1em]n2.east) {$\mathbi{x}_{l+1}$};
 \node [anchor=west,draw,fill=red!30,inner xsep=5pt,rounded corners=2pt,draw,thick] (ln22) at ([xshift=1em]x4.east){\small{\textrm{LN}}};
 \node [anchor=west,draw,fill=green!30,inner xsep=5pt,rounded corners=2pt,draw,thick] (f22) at ([xshift=0.6em]ln22.east){\small{$F$}};
 \node [anchor=west,minimum size=1em] (p2) at ([xshift=1em]f22.east){};
 \node [anchor=north] (m2) at ([yshift=0.6em]p2.south){\footnotesize{\red{Mask=0}}};
-\node [anchor=west,circle,draw,,minimum size=1em] (n22) at ([xshift=3em]f22.east){};
+\node [anchor=west,circle,draw,,minimum size=1em,thick] (n22) at ([xshift=3em]f22.east){};
 \node [anchor=west] (x42) at ([xshift=1em]n22.east) {$\mathbi{x}_{l+2}$};
 \draw[->, line width=1pt] ([xshift=-0.1em]x1.east)--(ln1.west);
@@ -41,10 +41,10 @@
 \draw[->, line width=1pt] (n2.east)--(x4.west);
 \draw[->,rounded corners,line width=1pt] ([yshift=-0.2em]x1.north) -- ([yshift=1em]x1.north) -- ([yshift=1.4em]n1.north) -- (n1.north);
 \draw[->,rounded corners,line width=1pt] ([yshift=-0.2em]x3.north) -- ([yshift=1em]x3.north) -- ([yshift=1.4em]n2.north) -- (n2.north);
-\draw[-] (n1.west)--(n1.east);
+\draw[-,thick] (n1.west)--(n1.east);
-\draw[-] (n1.north)--(n1.south);
+\draw[-,thick] (n1.north)--(n1.south);
-\draw[-] (n2.west)--(n2.east);
+\draw[-,thick] (n2.west)--(n2.east);
-\draw[-] (n2.north)--(n2.south);
+\draw[-,thick] (n2.north)--(n2.south);
 \draw[->, line width=1pt] ([xshift=-0.1em]x2.east)--(ln12.west);
 \draw[->, line width=1pt] ([xshift=-0.1em]ln12.east)--(f12.west);
@@ -58,10 +58,10 @@
 \draw[->, line width=1pt] (n22.east)--(x42.west);
 \draw[->,rounded corners,line width=1pt] ([yshift=-0.2em]x2.north) -- ([yshift=1em]x2.north) -- ([yshift=1.4em]n12.north) -- (n12.north);
 \draw[->,rounded corners,line width=1pt] ([yshift=-0.2em]x4.north) -- ([yshift=1em]x4.north) -- ([yshift=1.4em]n22.north) -- (n22.north);
-\draw[-] (n12.west)--(n12.east);
+\draw[-,thick] (n12.west)--(n12.east);
-\draw[-] (n12.north)--(n12.south);
+\draw[-,thick] (n12.north)--(n12.south);
-\draw[-] (n22.west)--(n22.east);
+\draw[-,thick] (n22.west)--(n22.east);
-\draw[-] (n22.north)--(n22.south);
+\draw[-,thick] (n22.north)--(n22.south);
 \node [anchor=south] (k1) at ([yshift=-0.1em]x1.north){};
 \node [anchor=south] (k2) at ([yshift=-0.1em]x3.north){};

--- a/Chapter15/Figures/figure-three-fusion-methods-of-tree-structure-information-3.tex
+++ b/Chapter15/Figures/figure-three-fusion-methods-of-tree-structure-information-3.tex
@@ -44,7 +44,7 @@
 \node [anchor=east,font=\small] (r1) at ([xshift=-2em,yshift=0em]box1.west) {混合RNN};
 {\small
-\node [anchor=south west,wnode] (l1) at ([xshift=1em,yshift=5em]r1.north west) {先序遍历句法树，得到序列：};
+\node [anchor=south west,wnode] (l1) at ([xshift=0em,yshift=5em]r1.north west) {先序遍历句法树，得到序列：};
 \node [anchor=north west,wnode,align=center] (l2) at ([xshift=0.5em,yshift=-0.6em]l1.north east) {S\\[0.5em]$l_1$};
 \node [anchor=north west,wnode,align=center] (l3) at ([xshift=0.5em,yshift=0em]l2.north east) {NP\\[0.5em]$l_2$};
 \node [anchor=north west,wnode,align=center] (l4) at ([xshift=0.5em,yshift=0em]l3.north east) {PRN\\[0.5em]$l_3$};

--- a/Chapter8/Figures/figure-tree-segment-corresponding-to-phrase.tex
+++ b/Chapter8/Figures/figure-tree-segment-corresponding-to-phrase.tex
@@ -41,7 +41,7 @@
 \node[scale=0.9,anchor=west,minimum size=18pt] (tw13) at ([yshift=0.1em,xshift=0.5em]tw12.east){worried};
 \node[scale=0.9,anchor=west,minimum size=18pt] (tw14) at ([xshift=0.5em]tw13.east){about};
 \node[scale=0.9,anchor=west,minimum size=18pt] (tw15) at ([xshift=0.5em]tw14.east){the};
-\node[scale=0.9,anchor=west,minimum size=18pt] (tw16) at ([yshift=-0.1em,xshift=0.5em]tw15.east){situation};
+\node[scale=0.9,anchor=west,minimum size=18pt] (tw16) at ([xshift=0.5em]tw15.east){situation};
 \draw[dashed] ([xshift=-0.3em]cfrag1.south) -- ([yshift=-0.3em]tw11.north);
 \draw[dashed] (cfrag2.south) -- ([yshift=-0.4em]tw14.north);

--- a/bibliography.bib
+++ b/bibliography.bib
@@ -5402,8 +5402,7 @@ author    = {Yoshua Bengio and
 @inproceedings{garcia-martinez2016factored,
 	title={Factored Neural Machine Translation Architectures},
 	author={Mercedes {Garcia-Martinez} and Loïc {Barrault} and Fethi {Bougares}},
-	publisher={International Workshop on Spoken Language Translation (IWSLT'16)},
+	publisher={International Workshop on Spoken Language Translation},
-	notes={Sourced from Microsoft Academic - https://academic.microsoft.com/paper/2949810612},
 	year={2016}
 }
 @inproceedings{DBLP:conf/acl/Kudo18,
@@ -5695,7 +5694,7 @@ author    = {Yoshua Bengio and
 @inproceedings{britz2017effective,
  title={Effective domain mixing for neural machine translation},
  author={Britz, Denny and Le, Quoc and Pryzant, Reid},
-  publisher={Proceedings of the Second Conference on Machine Translation},
+  publisher={Annual Meeting of the Association for Computational Linguistics},
  pages={118--126},
  year={2017}
 }
@@ -5770,7 +5769,7 @@ author    = {Yoshua Bengio and
               Wolfgang Menzel},
  title     = {Automatic Threshold Detection for Data Selection in Machine Translation},
  pages     = {483--488},
-  publisher = {Proceedings of the Second Conference on Machine Translation},
+  publisher = {Annual Meeting of the Association for Computational Linguistics},
  year      = {2017}
 }
 @inproceedings{DBLP:conf/wmt/BiciciY11,
@@ -5778,7 +5777,7 @@ author    = {Yoshua Bengio and
               Deniz Yuret},
  title     = {Instance Selection for Machine Translation using Feature Decay Algorithms},
  pages     = {272--283},
-  publisher = {Proceedings of the Sixth Workshop on Statistical Machine Translation},
+  publisher = {Annual Meeting of the Association for Computational Linguistics},
  year      = {2011}
 }
 @inproceedings{poncelas2018feature,
@@ -6047,7 +6046,7 @@ author    = {Yoshua Bengio and
 @inproceedings{hoang2018iterative,
  title={Iterative back-translation for neural machine translation},
  author={Hoang, Vu Cong Duy and Koehn, Philipp and Haffari, Gholamreza and Cohn, Trevor},
-  publisher={Proceedings of the 2nd Workshop on Neural Machine Translation and Generation},
+  publisher={Annual Meeting of the Association for Computational Linguistics},
  pages={18--24},
  year={2018}
 }
@@ -6257,7 +6256,7 @@ author    = {Yoshua Bengio and
               Jingbo Zhu},
  title     = {Dynamic Curriculum Learning for Low-Resource Neural Machine Translation},
  pages     = {3977--3989},
-  publisher = {International Committee on Computational Linguistics},
+  publisher = {International Conference on Computational Linguistics},
  year      = {2020}
 }
 @inproceedings{DBLP:conf/acl/ZhouYWWC20,
@@ -6307,7 +6306,7 @@ author    = {Yoshua Bengio and
               Andrew McCallum},
  title     = {Active Bias: Training More Accurate Neural Networks by Emphasizing
               High Variance Samples},
-  publisher = {Annual Conference on Neural Information Processing Systems},
+  publisher = {Conference on Neural Information Processing Systems},
  pages     = {1002--1012},
  year      = {2017}
 }
@@ -6360,7 +6359,7 @@ author    = {Yoshua Bengio and
  title     = {Investigating Catastrophic Forgetting During Continual Training for
               Neural Machine Translation},
  pages     = {4315--4326},
-  publisher = {International Committee on Computational Linguistics},
+  publisher = {International Conference on Computational Linguistics},
  year      = {2020}
 }
 @inproceedings{DBLP:conf/cvpr/RebuffiKSL17,
@@ -6392,7 +6391,7 @@ author    = {Yoshua Bengio and
               Oriol Vinyals and
               Navdeep Jaitly and
               Noam Shazeer},
-  publisher = {Annual Conference on Neural Information Processing Systems},
+  publisher = {Conference on Neural Information Processing Systems},
  pages     = {1171--1179},
  year      = {2015}
 }
@@ -6835,7 +6834,7 @@ author    = {Yoshua Bengio and
 @inproceedings{Gu2019LevenshteinT,
  title={Levenshtein Transformer},
  author={Jiatao Gu and Changhan Wang and Jake Zhao},
-  publisher = {Annual Conference on Neural Information Processing Systems},
+  publisher = {Conference on Neural Information Processing Systems},
  pages     = {11179--11189},
  year      = {2019},
 }
@@ -6963,7 +6962,7 @@ author    = {Yoshua Bengio and
 @inproceedings{Jiang2012LearnedPF,
  title={Learned Prioritization for Trading Off Accuracy and Speed},
  author={Jiarong Jiang and Adam R. Teichert and Hal Daum{\'e} and Jason Eisner},
-  publisher={Annual Conference on Neural Information Processing Systems},
+  publisher={Conference on Neural Information Processing Systems},
  pages={1340--1348},
  year= {2012}
 }
@@ -7123,7 +7122,7 @@ author    = {Yoshua Bengio and
  author    = {Paul Michel and
               Omer Levy and
               Graham Neubig},
-  publisher = {Annual Conference on Neural Information Processing Systems},
+  publisher = {Conference on Neural Information Processing Systems},
  pages     = {14014--14024},
  year      = {2019}
 }
@@ -7157,7 +7156,7 @@ author    = {Yoshua Bengio and
  title={Generative Neural Machine Translation},
  author={Harshil Shah and
               David Barber},
-  publisher={Annual Conference on Neural Information Processing Systems},
+  publisher={Conference on Neural Information Processing Systems},
  pages={1353--1362},
  year={2018}
 }
@@ -7246,7 +7245,7 @@ author    = {Yoshua Bengio and
               Jeff Pool and
               John Tran and
               William J. Dally},
-  publisher={Annual Conference on Neural Information Processing Systems},
+  publisher={Conference on Neural Information Processing Systems},
  pages={1135--1143},
  year={2015}
 }
@@ -7281,9 +7280,8 @@ author    = {Yoshua Bengio and
               Tinghui Zhou and
               Gao Huang and
               Trevor Darrell},
-  publisher={ArXiv},
+  publisher={International Conference on Learning Representations},
-  year={2019},
+  year={2019}
-  volume={abs/1810.05270}
 }
 @inproceedings{Liu2017LearningEC,
 author    = {Zhuang Liu and
@@ -7341,8 +7339,8 @@ author    = {Zhuang Liu and
               Luke Zettlemoyer and
               Omer Levy},
  title     = {Aligned Cross Entropy for Non-Autoregressive Machine Translation},
-  publisher   = {CoRR},
+  publisher   = {	International Conference on Machine Learning},
-  volume    = {abs/2004.01655},
+  volume    = {119},
  year      = {2020},
 }
 @inproceedings{Shao2020MinimizingTB,
@@ -7394,9 +7392,8 @@ author    = {Zhuang Liu and
 @inproceedings{Zhou2020UnderstandingKD,
  title={Understanding Knowledge Distillation in Non-autoregressive Machine Translation},
  author={Chunting Zhou and Graham Neubig and Jiatao Gu},
-  publisher={ArXiv},
+  publisher={International Conference on Learning Representations},
-  year={2020},
+  year={2020}
-  volume={abs/1911.02727}
 }
 @inproceedings{Wang2019NonAutoregressiveMT,
  title={Non-Autoregressive Machine Translation with Auxiliary Regularization},
@@ -7456,16 +7453,16 @@ author    = {Zhuang Liu and
 @inproceedings{Ho2016GenerativeAI,
  title={Generative Adversarial Imitation Learning},
  author={Jonathan Ho and Stefano Ermon},
-  publisher={Annual Conference on Neural Information Processing Systems},
+  publisher={Conference on Neural Information Processing Systems},
  pages={4565--4573},
  year={2016}
 }
 @inproceedings{Duan2017OneShotIL,
  title={One-Shot Imitation Learning},
  author={Yan Duan and Marcin Andrychowicz and Bradly C. Stadie and Jonathan Ho and Jonas Schneider and Ilya Sutskever and Pieter Abbeel and Wojciech Zaremba},
-  publisher={CoRR},
+  publisher={Conference on Neural Information Processing Systems},
  year={2017},
-  volume={abs/1703.07326}
+  pages= {1087--1098}
 }
 @inproceedings{Wang2018SemiAutoregressiveNM,
  title={Semi-Autoregressive Neural Machine Translation},
@@ -7486,7 +7483,7 @@ author    = {Zhuang Liu and
 @inproceedings{Kasai2020NonAutoregressiveMT,
  title={Non-Autoregressive Machine Translation with Disentangled Context Transformer},
  author={Jungo Kasai and J. Cross and Marjan Ghazvininejad and Jiatao Gu},
-  publisher={arXiv: Computation and Language},
+  publisher={International Conference on Machine Learning},
  year={2020}
 }
 @inproceedings{Zhou2019SynchronousBN,
@@ -7517,7 +7514,7 @@ author    = {Zhuang Liu and
 @inproceedings{Xiao2016ALA,
  title={A Loss-Augmented Approach to Training Syntactic Machine Translation Systems},
  author={Tong Xiao and Derek F. Wong and Jingbo Zhu},
-  publisher={IEEE/ACM Transactions on Audio, Speech, and Language Processing},
+  publisher={IEEE Transactions on Audio, Speech, and Language Processing},
  year={2016},
  volume={24},
  pages={2069-2083}
@@ -7571,7 +7568,7 @@ author    = {Zhuang Liu and
               Ran El-Yaniv and
               Yoshua Bengio},
  title     = {Binarized Neural Networks},
-  publisher = {Annual Conference on Neural Information Processing Systems},
+  publisher = {Conference on Neural Information Processing Systems},
  pages     = {4107--4115},
  year      = {2016}
 }
@@ -7702,7 +7699,7 @@ author    = {Zhuang Liu and
  author    = {Alexei Baevski and
               Michael Auli},
  title     = {Adaptive Input Representations for Neural Language Modeling},
-  publisher   = {arXiv preprint arXiv:1809.10853},
+  publisher   = {International Conference on Learning Representations},
  year      = {2019}
 }
 @inproceedings{DBLP:journals/corr/abs-2006-04768,
@@ -7736,8 +7733,7 @@ author    = {Zhuang Liu and
               Dawei Song and
               Ming Zhou},
  title     = {A Tensorized Transformer for Language Modeling},
-  publisher   = {CoRR},
+  publisher   = {Conference on Neural Information Processing Systems},
-  volume    = {abs/1906.09777},
  year      = {2019}
 }
 @inproceedings{DBLP:conf/nips/YangLSL19,
@@ -7816,7 +7812,7 @@ author    = {Zhuang Liu and
 		  Zhongjun He and 
 		  Hua Wu and 
 		  Haifeng Wang},
-  publisher={arXiv preprint arXiv:1909.01101},
+  publisher={Conference on Empirical Methods in Natural Language Processing},
  year={2019}
 }
 @inproceedings{DBLP:conf/aclnmt/KoehnK17,
@@ -7876,7 +7872,7 @@ author    = {Zhuang Liu and
 @inproceedings{Eisner2011LearningST,
  title={Learning Speed-Accuracy Tradeoffs in Nondeterministic Inference Algorithms},
  author={J. Eisner and Hal Daum{\'e}},
-  publisher={Annual Conference on Neural Information Processing Systems},
+  publisher={Conference on Neural Information Processing Systems},
  year={2011}
 }
 @inproceedings{Kazimi2017CoverageFC,
@@ -8929,7 +8925,7 @@ author    = {Zhuang Liu and
               Jesse Bettencourt and
               David Duvenaud},
  title     = {Neural Ordinary Differential Equations},
-  publisher = {Annual Conference on Neural Information Processing Systems},
+  publisher = {Conference on Neural Information Processing Systems},
  pages     = {6572--6583},
  year      = {2018}
 }
@@ -9189,7 +9185,7 @@ author    = {Zhuang Liu and
               Michael Wilber and
               Serge Belongie},
  title     = {Residual Networks Behave Like Ensembles of Relatively Shallow Networks},
-  publisher = {Annual Conference on Neural Information Processing Systems},
+  publisher = {Conference on Neural Information Processing Systems},
  pages     = {550--558},
  year      = {2016}
 }