Commit e329e9d6 by 曹润柘

合并分支 'caorunzhe' 到 'master'

Caorunzhe

查看合并请求 !1064
parents c90d70e8 6e13b60f
......@@ -8,7 +8,7 @@
};
\node[font=\footnotesize,anchor=north] (l1) at ([xshift=0em,yshift=-1em]top.south) {(a) 符号合并表};
\node[font=\scriptsize,anchor=west] (n1) at ([xshift=-4.5em,yshift=-6em]top.west) {l\ o\ w\ e\ r\ $<$e$>$};
\node[font=\scriptsize,anchor=west] (n1) at ([xshift=-3em,yshift=-6em]top.west) {l\ o\ w\ e\ r\ $<$e$>$};
\node[font=\scriptsize,anchor=west] (n2) at ([xshift=2.6em]n1.east) {l\ o\ w\ e\ {\red r$<$e$>$}};
\node[font=\scriptsize,anchor=west] (n3) at ([xshift=2.6em]n2.east) {{\red lo}\ w\ e\ r$<$e$>$};
\node[font=\scriptsize,anchor=west] (n4) at ([xshift=2.6em]n3.east) {{\red low}\ e\ r$<$e$>$};
......@@ -24,7 +24,7 @@
\node[font=\scriptsize,anchor=north east] (s1) at ([yshift=0.1em]n1.north west) {样例1:};
\node[font=\scriptsize,anchor=north east] (s1) at ([yshift=0.1em]t1.north west) {样例2:};
\node[font=\footnotesize,anchor=north] (l2) at ([xshift=2em,yshift=-1em]t3.south) {(b) 合并样例};
\node[font=\footnotesize,anchor=north] (l2) at ([xshift=0.5em,yshift=-1em]t3.south) {(b) 合并样例};
\draw[->,thick](n1.east) -- (n2.west);
\draw[->,thick](n2.east) -- (n3.west);
......
......@@ -38,7 +38,7 @@
\end{scope}
\begin{scope}[xshift=1.85in]
\begin{scope}[xshift=1.75in]
\node [anchor=west,stnode] (r1) at (0, 0) {第1层};
\node [anchor=south,tnode] (r2) at ([xshift=0em,yshift=1em]r1.north){第2层};
......@@ -60,11 +60,11 @@
\draw[->,thick] ([xshift=0em,yshift=0em]r3.north)--([xshift=0em,yshift=0em]r4.south);
\draw[->,thick] ([xshift=0em,yshift=0em]r4.north)--([xshift=0em,yshift=0em]output.south);
\node [anchor=north,font=\small] (label) at ([xshift=-1.5em,yshift=-0.7em]input.south) {(b)原始Transformer模型};
\node [anchor=north,font=\small] (label) at ([xshift=-1.7em,yshift=-0.7em]input.south) {(b)原始Transformer模型};
\end{scope}
\begin{scope}[xshift=3.9in]
\begin{scope}[xshift=3.85in]
\node [anchor=west,stnode] (r1) at (0, 0) {第1层};
\node [anchor=south,stnode] (r2) at ([xshift=0em,yshift=1em]r1.north){第2层};
......@@ -89,7 +89,7 @@
\draw[->,thick] ([xshift=0em,yshift=0em]wr2.east)--([xshift=0em,yshift=0em]r2.west);
\draw[->,thick] ([xshift=0em,yshift=0em]wr3.east)--([xshift=0em,yshift=0em]r4.west);
\node [anchor=north,font=\small,align=left] (label) at ([xshift=-3em,yshift=-0.7em]input.south) {(c)共享权重的\\ Transformer模型};
\node [anchor=north,font=\small] (label) at ([xshift=-3em,yshift=-0.7em]input.south) {(c)共享权重的Transformer模型};
\end{scope}
......
......@@ -14,7 +14,7 @@
\node [anchor=south west,manode] (a1) at ([xshift=0em,yshift=1em]e1.north west){Attention};
\node [anchor=south east,manode] (c1) at ([xshift=0em,yshift=1em]e1.north east){Conv};
\node [anchor=south west,ebnode] (e2) at ([xshift=0em,yshift=1em]a1.north west){Embedding};
\node [anchor=south,draw,circle,inner sep=4pt] (add1) at ([xshift=0em,yshift=0.5em]e2.north){};
\node [anchor=south,draw,circle,inner sep=4pt,thick] (add1) at ([xshift=0em,yshift=0.5em]e2.north){};
\node [anchor=south,ffnnode] (f2) at ([xshift=0em,yshift=0.5em]add1.north){FFN};
\node [anchor=south,inner sep=0mm,minimum height=1.8em] (op) at ([xshift=0em,yshift=0.5em]f2.north){output};
......@@ -29,8 +29,8 @@
\draw[->,thick] ([xshift=0em,yshift=0em]f2.north)--([xshift=0em,yshift=0.3em]op.south);
\draw[-] ([xshift=0em,yshift=0em]add1.west)--([xshift=-0em,yshift=0em]add1.east);
\draw[-] ([xshift=0em,yshift=0em]add1.south)--([xshift=-0em,yshift=-0em]add1.north);
\draw[-,thick] ([xshift=0em,yshift=0em]add1.west)--([xshift=-0em,yshift=0em]add1.east);
\draw[-,thick] ([xshift=0em,yshift=0em]add1.south)--([xshift=-0em,yshift=-0em]add1.north);
\draw[->,thick,rectangle,rounded corners=5pt] ([xshift=0em,yshift=0.5em]f1.north)--([xshift=-6em,yshift=0.5em]f1.north)--([xshift=-5.45em,yshift=0em]add1.west)--([xshift=0em,yshift=0em]add1.west);
......
......@@ -10,10 +10,10 @@
\begin{scope}[]
\node [anchor=east,circle,fill=black,inner sep = 2pt] (n1) at (-0, 0) {};
\node [anchor=west,draw,circle,inner sep=5pt] (n2) at ([xshift=13em,yshift=0em]n1.east){};
\node [anchor=west,draw,circle,inner sep=5pt,thick] (n2) at ([xshift=13em,yshift=0em]n1.east){};
\node [anchor=west,lnnode] (n3) at ([xshift=1.5em,yshift=0em]n2.east){LN};
\node [anchor=west,circle,fill=black,inner sep=2pt] (n4) at ([xshift=1.5em,yshift=0em]n3.east){};
\node [anchor=west,draw,circle,inner sep=5pt] (n5) at ([xshift=5em,yshift=0em]n4.east){};
\node [anchor=west,draw,circle,inner sep=5pt,thick] (n5) at ([xshift=5em,yshift=0em]n4.east){};
\node [anchor=west,lnnode] (n6) at ([xshift=1.5em,yshift=0em]n5.east){LN};
\node [anchor=west,manode] (a1) at ([xshift=1.5em,yshift=2em]n1.east){Multi-Head Attention};
......
......@@ -7,14 +7,14 @@
\node [anchor=east] (x1) at (-0.5em, 0) {$\mathbi{x}_l$};
\node [anchor=west,draw,fill=red!20,inner xsep=5pt,rounded corners=2pt,thick] (F1) at ([xshift=4em]x1.east){\small{$F$}};
\node [anchor=west,circle,draw,minimum size=1em] (n1) at ([xshift=4em]F1.east) {};
\node [anchor=west,circle,draw,minimum size=1em,thick] (n1) at ([xshift=4em]F1.east) {};
\node [anchor=west,draw,fill=green!20,inner xsep=5pt,rounded corners=2pt,thick] (ln1) at ([xshift=4em]n1.east){\small{\textrm{LN}}};
\node [anchor=west] (x2) at ([xshift=4em]ln1.east) {$\mathbi{x}_{l+1}$};
\node [anchor=north] (x3) at ([yshift=-5em]x1.south) {$\mathbi{x}_l$};
\node [anchor=west,draw,fill=green!20,inner xsep=5pt,rounded corners=2pt,thick] (F2) at ([xshift=4em]x3.east){\small{\textrm{LN}}};
\node [anchor=west,draw,fill=red!20,inner xsep=5pt,rounded corners=2pt,thick] (ln2) at ([xshift=4em]F2.east){\small{$F$}};
\node [anchor=west,circle,draw,,minimum size=1em] (n2) at ([xshift=4em]ln2.east){};
\node [anchor=west,circle,draw,,minimum size=1em,thick] (n2) at ([xshift=4em]ln2.east){};
\node [anchor=west] (x4) at ([xshift=4em]n2.east) {$\mathbi{x}_{l+1}$};
\draw[->, line width=1pt] ([xshift=-0.1em]x1.east)--(F1.west);
......@@ -27,10 +27,10 @@
\draw[->, line width=1pt] (n2.east)--(x4.west);
\draw[->,rounded corners,line width=1pt] ([yshift=-0.2em]x1.north) -- ([yshift=1em]x1.north) -- ([yshift=1.4em]n1.north) -- (n1.north);
\draw[->,rounded corners,line width=1pt] ([yshift=-0.2em]x3.north) -- ([yshift=1em]x3.north) -- ([yshift=1.4em]n2.north) -- (n2.north);
\draw[-] (n1.west)--(n1.east);
\draw[-] (n1.north)--(n1.south);
\draw[-] (n2.west)--(n2.east);
\draw[-] (n2.north)--(n2.south);
\draw[-,thick] (n1.west)--(n1.east);
\draw[-,thick] (n1.north)--(n1.south);
\draw[-,thick] (n2.west)--(n2.east);
\draw[-,thick] (n2.north)--(n2.south);
\node [anchor=south] (k1) at ([yshift=-0.1em]x1.north){};
\node [anchor=south] (k2) at ([yshift=-0.1em]x3.north){};
......
......@@ -8,11 +8,11 @@
\node [anchor=east] (x1) at (-0.5em, 0) {$\mathbi{x}_l$};
\node [anchor=west,draw,fill=red!30,inner xsep=5pt,rounded corners=2pt,draw,thick] (ln1) at ([xshift=1em]x1.east){\small{\textrm{LN}}};
\node [anchor=west,draw,fill=green!30,inner xsep=5pt,rounded corners=2pt,draw,thick] (f1) at ([xshift=0.6em]ln1.east){\small{$F$}};
\node [anchor=west,circle,draw,,minimum size=1em] (n1) at ([xshift=3em]f1.east){};
\node [anchor=west,circle,draw,,minimum size=1em,thick] (n1) at ([xshift=3em]f1.east){};
\node [anchor=west] (x2) at ([xshift=1em]n1.east) {$\mathbi{x}_{l+1}$};
\node [anchor=west,draw,fill=red!30,inner xsep=5pt,rounded corners=2pt,draw,thick] (ln12) at ([xshift=1em]x2.east){\small{\textrm{LN}}};
\node [anchor=west,draw,fill=green!30,inner xsep=5pt,rounded corners=2pt,draw,thick] (f12) at ([xshift=0.6em]ln12.east){\small{$F$}};
\node [anchor=west,circle,draw,,minimum size=1em] (n12) at ([xshift=3em]f12.east){};
\node [anchor=west,circle,draw,,minimum size=1em,thick] (n12) at ([xshift=3em]f12.east){};
\node [anchor=west] (x22) at ([xshift=1em]n12.east) {$\mathbi{x}_{l+2}$};
\node [anchor=north] (x3) at ([yshift=-5em]x1.south) {$\mathbi{x}_l$};
......@@ -20,13 +20,13 @@
\node [anchor=west,draw,fill=green!30,inner xsep=5pt,rounded corners=2pt,draw,thick] (f2) at ([xshift=0.6em]ln2.east){\small{$F$}};
\node [anchor=west,minimum size=1em] (p1) at ([xshift=1em]f2.east){};
\node [anchor=north] (m1) at ([yshift=0.6em]p1.south){\footnotesize{\red{Mask=1}}};
\node [anchor=west,circle,draw,,minimum size=1em] (n2) at ([xshift=3em]f2.east){};
\node [anchor=west,circle,draw,,minimum size=1em,thick] (n2) at ([xshift=3em]f2.east){};
\node [anchor=west] (x4) at ([xshift=1em]n2.east) {$\mathbi{x}_{l+1}$};
\node [anchor=west,draw,fill=red!30,inner xsep=5pt,rounded corners=2pt,draw,thick] (ln22) at ([xshift=1em]x4.east){\small{\textrm{LN}}};
\node [anchor=west,draw,fill=green!30,inner xsep=5pt,rounded corners=2pt,draw,thick] (f22) at ([xshift=0.6em]ln22.east){\small{$F$}};
\node [anchor=west,minimum size=1em] (p2) at ([xshift=1em]f22.east){};
\node [anchor=north] (m2) at ([yshift=0.6em]p2.south){\footnotesize{\red{Mask=0}}};
\node [anchor=west,circle,draw,,minimum size=1em] (n22) at ([xshift=3em]f22.east){};
\node [anchor=west,circle,draw,,minimum size=1em,thick] (n22) at ([xshift=3em]f22.east){};
\node [anchor=west] (x42) at ([xshift=1em]n22.east) {$\mathbi{x}_{l+2}$};
\draw[->, line width=1pt] ([xshift=-0.1em]x1.east)--(ln1.west);
......@@ -41,10 +41,10 @@
\draw[->, line width=1pt] (n2.east)--(x4.west);
\draw[->,rounded corners,line width=1pt] ([yshift=-0.2em]x1.north) -- ([yshift=1em]x1.north) -- ([yshift=1.4em]n1.north) -- (n1.north);
\draw[->,rounded corners,line width=1pt] ([yshift=-0.2em]x3.north) -- ([yshift=1em]x3.north) -- ([yshift=1.4em]n2.north) -- (n2.north);
\draw[-] (n1.west)--(n1.east);
\draw[-] (n1.north)--(n1.south);
\draw[-] (n2.west)--(n2.east);
\draw[-] (n2.north)--(n2.south);
\draw[-,thick] (n1.west)--(n1.east);
\draw[-,thick] (n1.north)--(n1.south);
\draw[-,thick] (n2.west)--(n2.east);
\draw[-,thick] (n2.north)--(n2.south);
\draw[->, line width=1pt] ([xshift=-0.1em]x2.east)--(ln12.west);
\draw[->, line width=1pt] ([xshift=-0.1em]ln12.east)--(f12.west);
......@@ -58,10 +58,10 @@
\draw[->, line width=1pt] (n22.east)--(x42.west);
\draw[->,rounded corners,line width=1pt] ([yshift=-0.2em]x2.north) -- ([yshift=1em]x2.north) -- ([yshift=1.4em]n12.north) -- (n12.north);
\draw[->,rounded corners,line width=1pt] ([yshift=-0.2em]x4.north) -- ([yshift=1em]x4.north) -- ([yshift=1.4em]n22.north) -- (n22.north);
\draw[-] (n12.west)--(n12.east);
\draw[-] (n12.north)--(n12.south);
\draw[-] (n22.west)--(n22.east);
\draw[-] (n22.north)--(n22.south);
\draw[-,thick] (n12.west)--(n12.east);
\draw[-,thick] (n12.north)--(n12.south);
\draw[-,thick] (n22.west)--(n22.east);
\draw[-,thick] (n22.north)--(n22.south);
\node [anchor=south] (k1) at ([yshift=-0.1em]x1.north){};
\node [anchor=south] (k2) at ([yshift=-0.1em]x3.north){};
......
......@@ -44,7 +44,7 @@
\node [anchor=east,font=\small] (r1) at ([xshift=-2em,yshift=0em]box1.west) {混合RNN};
{\small
\node [anchor=south west,wnode] (l1) at ([xshift=1em,yshift=5em]r1.north west) {先序遍历句法树,得到序列:};
\node [anchor=south west,wnode] (l1) at ([xshift=0em,yshift=5em]r1.north west) {先序遍历句法树,得到序列:};
\node [anchor=north west,wnode,align=center] (l2) at ([xshift=0.5em,yshift=-0.6em]l1.north east) {S\\[0.5em]$l_1$};
\node [anchor=north west,wnode,align=center] (l3) at ([xshift=0.5em,yshift=0em]l2.north east) {NP\\[0.5em]$l_2$};
\node [anchor=north west,wnode,align=center] (l4) at ([xshift=0.5em,yshift=0em]l3.north east) {PRN\\[0.5em]$l_3$};
......
......@@ -41,7 +41,7 @@
\node[scale=0.9,anchor=west,minimum size=18pt] (tw13) at ([yshift=0.1em,xshift=0.5em]tw12.east){worried};
\node[scale=0.9,anchor=west,minimum size=18pt] (tw14) at ([xshift=0.5em]tw13.east){about};
\node[scale=0.9,anchor=west,minimum size=18pt] (tw15) at ([xshift=0.5em]tw14.east){the};
\node[scale=0.9,anchor=west,minimum size=18pt] (tw16) at ([yshift=-0.1em,xshift=0.5em]tw15.east){situation};
\node[scale=0.9,anchor=west,minimum size=18pt] (tw16) at ([xshift=0.5em]tw15.east){situation};
\draw[dashed] ([xshift=-0.3em]cfrag1.south) -- ([yshift=-0.3em]tw11.north);
\draw[dashed] (cfrag2.south) -- ([yshift=-0.4em]tw14.north);
......
......@@ -5402,8 +5402,7 @@ author = {Yoshua Bengio and
@inproceedings{garcia-martinez2016factored,
title={Factored Neural Machine Translation Architectures},
author={Mercedes {Garcia-Martinez} and Loïc {Barrault} and Fethi {Bougares}},
publisher={International Workshop on Spoken Language Translation (IWSLT'16)},
notes={Sourced from Microsoft Academic - https://academic.microsoft.com/paper/2949810612},
publisher={International Workshop on Spoken Language Translation},
year={2016}
}
@inproceedings{DBLP:conf/acl/Kudo18,
......@@ -5695,7 +5694,7 @@ author = {Yoshua Bengio and
@inproceedings{britz2017effective,
title={Effective domain mixing for neural machine translation},
author={Britz, Denny and Le, Quoc and Pryzant, Reid},
publisher={Proceedings of the Second Conference on Machine Translation},
publisher={Annual Meeting of the Association for Computational Linguistics},
pages={118--126},
year={2017}
}
......@@ -5770,7 +5769,7 @@ author = {Yoshua Bengio and
Wolfgang Menzel},
title = {Automatic Threshold Detection for Data Selection in Machine Translation},
pages = {483--488},
publisher = {Proceedings of the Second Conference on Machine Translation},
publisher = {Annual Meeting of the Association for Computational Linguistics},
year = {2017}
}
@inproceedings{DBLP:conf/wmt/BiciciY11,
......@@ -5778,7 +5777,7 @@ author = {Yoshua Bengio and
Deniz Yuret},
title = {Instance Selection for Machine Translation using Feature Decay Algorithms},
pages = {272--283},
publisher = {Proceedings of the Sixth Workshop on Statistical Machine Translation},
publisher = {Annual Meeting of the Association for Computational Linguistics},
year = {2011}
}
@inproceedings{poncelas2018feature,
......@@ -6047,7 +6046,7 @@ author = {Yoshua Bengio and
@inproceedings{hoang2018iterative,
title={Iterative back-translation for neural machine translation},
author={Hoang, Vu Cong Duy and Koehn, Philipp and Haffari, Gholamreza and Cohn, Trevor},
publisher={Proceedings of the 2nd Workshop on Neural Machine Translation and Generation},
publisher={Annual Meeting of the Association for Computational Linguistics},
pages={18--24},
year={2018}
}
......@@ -6257,7 +6256,7 @@ author = {Yoshua Bengio and
Jingbo Zhu},
title = {Dynamic Curriculum Learning for Low-Resource Neural Machine Translation},
pages = {3977--3989},
publisher = {International Committee on Computational Linguistics},
publisher = {International Conference on Computational Linguistics},
year = {2020}
}
@inproceedings{DBLP:conf/acl/ZhouYWWC20,
......@@ -6307,7 +6306,7 @@ author = {Yoshua Bengio and
Andrew McCallum},
title = {Active Bias: Training More Accurate Neural Networks by Emphasizing
High Variance Samples},
publisher = {Annual Conference on Neural Information Processing Systems},
publisher = {Conference on Neural Information Processing Systems},
pages = {1002--1012},
year = {2017}
}
......@@ -6360,7 +6359,7 @@ author = {Yoshua Bengio and
title = {Investigating Catastrophic Forgetting During Continual Training for
Neural Machine Translation},
pages = {4315--4326},
publisher = {International Committee on Computational Linguistics},
publisher = {International Conference on Computational Linguistics},
year = {2020}
}
@inproceedings{DBLP:conf/cvpr/RebuffiKSL17,
......@@ -6392,7 +6391,7 @@ author = {Yoshua Bengio and
Oriol Vinyals and
Navdeep Jaitly and
Noam Shazeer},
publisher = {Annual Conference on Neural Information Processing Systems},
publisher = {Conference on Neural Information Processing Systems},
pages = {1171--1179},
year = {2015}
}
......@@ -6835,7 +6834,7 @@ author = {Yoshua Bengio and
@inproceedings{Gu2019LevenshteinT,
title={Levenshtein Transformer},
author={Jiatao Gu and Changhan Wang and Jake Zhao},
publisher = {Annual Conference on Neural Information Processing Systems},
publisher = {Conference on Neural Information Processing Systems},
pages = {11179--11189},
year = {2019},
}
......@@ -6963,7 +6962,7 @@ author = {Yoshua Bengio and
@inproceedings{Jiang2012LearnedPF,
title={Learned Prioritization for Trading Off Accuracy and Speed},
author={Jiarong Jiang and Adam R. Teichert and Hal Daum{\'e} and Jason Eisner},
publisher={Annual Conference on Neural Information Processing Systems},
publisher={Conference on Neural Information Processing Systems},
pages={1340--1348},
year= {2012}
}
......@@ -7123,7 +7122,7 @@ author = {Yoshua Bengio and
author = {Paul Michel and
Omer Levy and
Graham Neubig},
publisher = {Annual Conference on Neural Information Processing Systems},
publisher = {Conference on Neural Information Processing Systems},
pages = {14014--14024},
year = {2019}
}
......@@ -7157,7 +7156,7 @@ author = {Yoshua Bengio and
title={Generative Neural Machine Translation},
author={Harshil Shah and
David Barber},
publisher={Annual Conference on Neural Information Processing Systems},
publisher={Conference on Neural Information Processing Systems},
pages={1353--1362},
year={2018}
}
......@@ -7246,7 +7245,7 @@ author = {Yoshua Bengio and
Jeff Pool and
John Tran and
William J. Dally},
publisher={Annual Conference on Neural Information Processing Systems},
publisher={Conference on Neural Information Processing Systems},
pages={1135--1143},
year={2015}
}
......@@ -7281,9 +7280,8 @@ author = {Yoshua Bengio and
Tinghui Zhou and
Gao Huang and
Trevor Darrell},
publisher={ArXiv},
year={2019},
volume={abs/1810.05270}
publisher={International Conference on Learning Representations},
year={2019}
}
@inproceedings{Liu2017LearningEC,
author = {Zhuang Liu and
......@@ -7341,8 +7339,8 @@ author = {Zhuang Liu and
Luke Zettlemoyer and
Omer Levy},
title = {Aligned Cross Entropy for Non-Autoregressive Machine Translation},
publisher = {CoRR},
volume = {abs/2004.01655},
publisher = { International Conference on Machine Learning},
volume = {119},
year = {2020},
}
@inproceedings{Shao2020MinimizingTB,
......@@ -7394,9 +7392,8 @@ author = {Zhuang Liu and
@inproceedings{Zhou2020UnderstandingKD,
title={Understanding Knowledge Distillation in Non-autoregressive Machine Translation},
author={Chunting Zhou and Graham Neubig and Jiatao Gu},
publisher={ArXiv},
year={2020},
volume={abs/1911.02727}
publisher={International Conference on Learning Representations},
year={2020}
}
@inproceedings{Wang2019NonAutoregressiveMT,
title={Non-Autoregressive Machine Translation with Auxiliary Regularization},
......@@ -7456,16 +7453,16 @@ author = {Zhuang Liu and
@inproceedings{Ho2016GenerativeAI,
title={Generative Adversarial Imitation Learning},
author={Jonathan Ho and Stefano Ermon},
publisher={Annual Conference on Neural Information Processing Systems},
publisher={Conference on Neural Information Processing Systems},
pages={4565--4573},
year={2016}
}
@inproceedings{Duan2017OneShotIL,
title={One-Shot Imitation Learning},
author={Yan Duan and Marcin Andrychowicz and Bradly C. Stadie and Jonathan Ho and Jonas Schneider and Ilya Sutskever and Pieter Abbeel and Wojciech Zaremba},
publisher={CoRR},
publisher={Conference on Neural Information Processing Systems},
year={2017},
volume={abs/1703.07326}
pages= {1087--1098}
}
@inproceedings{Wang2018SemiAutoregressiveNM,
title={Semi-Autoregressive Neural Machine Translation},
......@@ -7486,7 +7483,7 @@ author = {Zhuang Liu and
@inproceedings{Kasai2020NonAutoregressiveMT,
title={Non-Autoregressive Machine Translation with Disentangled Context Transformer},
author={Jungo Kasai and J. Cross and Marjan Ghazvininejad and Jiatao Gu},
publisher={arXiv: Computation and Language},
publisher={International Conference on Machine Learning},
year={2020}
}
@inproceedings{Zhou2019SynchronousBN,
......@@ -7517,7 +7514,7 @@ author = {Zhuang Liu and
@inproceedings{Xiao2016ALA,
title={A Loss-Augmented Approach to Training Syntactic Machine Translation Systems},
author={Tong Xiao and Derek F. Wong and Jingbo Zhu},
publisher={IEEE/ACM Transactions on Audio, Speech, and Language Processing},
publisher={IEEE Transactions on Audio, Speech, and Language Processing},
year={2016},
volume={24},
pages={2069-2083}
......@@ -7571,7 +7568,7 @@ author = {Zhuang Liu and
Ran El-Yaniv and
Yoshua Bengio},
title = {Binarized Neural Networks},
publisher = {Annual Conference on Neural Information Processing Systems},
publisher = {Conference on Neural Information Processing Systems},
pages = {4107--4115},
year = {2016}
}
......@@ -7702,7 +7699,7 @@ author = {Zhuang Liu and
author = {Alexei Baevski and
Michael Auli},
title = {Adaptive Input Representations for Neural Language Modeling},
publisher = {arXiv preprint arXiv:1809.10853},
publisher = {International Conference on Learning Representations},
year = {2019}
}
@inproceedings{DBLP:journals/corr/abs-2006-04768,
......@@ -7736,8 +7733,7 @@ author = {Zhuang Liu and
Dawei Song and
Ming Zhou},
title = {A Tensorized Transformer for Language Modeling},
publisher = {CoRR},
volume = {abs/1906.09777},
publisher = {Conference on Neural Information Processing Systems},
year = {2019}
}
@inproceedings{DBLP:conf/nips/YangLSL19,
......@@ -7816,7 +7812,7 @@ author = {Zhuang Liu and
Zhongjun He and
Hua Wu and
Haifeng Wang},
publisher={arXiv preprint arXiv:1909.01101},
publisher={Conference on Empirical Methods in Natural Language Processing},
year={2019}
}
@inproceedings{DBLP:conf/aclnmt/KoehnK17,
......@@ -7876,7 +7872,7 @@ author = {Zhuang Liu and
@inproceedings{Eisner2011LearningST,
title={Learning Speed-Accuracy Tradeoffs in Nondeterministic Inference Algorithms},
author={J. Eisner and Hal Daum{\'e}},
publisher={Annual Conference on Neural Information Processing Systems},
publisher={Conference on Neural Information Processing Systems},
year={2011}
}
@inproceedings{Kazimi2017CoverageFC,
......@@ -8929,7 +8925,7 @@ author = {Zhuang Liu and
Jesse Bettencourt and
David Duvenaud},
title = {Neural Ordinary Differential Equations},
publisher = {Annual Conference on Neural Information Processing Systems},
publisher = {Conference on Neural Information Processing Systems},
pages = {6572--6583},
year = {2018}
}
......@@ -9189,7 +9185,7 @@ author = {Zhuang Liu and
Michael Wilber and
Serge Belongie},
title = {Residual Networks Behave Like Ensembles of Relatively Shallow Networks},
publisher = {Annual Conference on Neural Information Processing Systems},
publisher = {Conference on Neural Information Processing Systems},
pages = {550--558},
year = {2016}
}
......
Markdown 格式
0%
您添加了 0 到此讨论。请谨慎行事。
请先完成此评论的编辑!
注册 或者 后发表评论