author={Ashish {Vaswani} and Noam {Shazeer} and Niki {Parmar} and Jakob {Uszkoreit} and Llion {Jones} and Aidan N. {Gomez} and Lukasz {Kaiser} and Illia {Polosukhin}},
author={Ashish {Vaswani} and Noam {Shazeer} and Niki {Parmar} and Jakob {Uszkoreit} and Llion {Jones} and Aidan {Gomez} and Lukasz {Kaiser} and Illia {Polosukhin}},
publisher={International Conference on Neural Information Processing},
publisher={International Conference on Neural Information Processing},
pages={5998--6008},
pages={5998--6008},
year={2017}
year={2017}
...
@@ -4237,8 +4237,8 @@ author = {Yoshua Bengio and
...
@@ -4237,8 +4237,8 @@ author = {Yoshua Bengio and
Tong Xiao and
Tong Xiao and
Jingbo Zhu and
Jingbo Zhu and
Changliang Li and
Changliang Li and
Derek F. Wong and
Derek Wong and
Lidia S. Chao},
Lidia Chao},
title = {Learning Deep Transformer Models for Machine Translation},
title = {Learning Deep Transformer Models for Machine Translation},
pages = {1810--1822},
pages = {1810--1822},
publisher = {Annual Meeting of the Association for Computational Linguistics},
publisher = {Annual Meeting of the Association for Computational Linguistics},
...
@@ -4429,9 +4429,9 @@ author = {Yoshua Bengio and
...
@@ -4429,9 +4429,9 @@ author = {Yoshua Bengio and
}
}
@inproceedings{Yang2017TowardsBH,
@inproceedings{Yang2017TowardsBH,
author = {Baosong Yang and
author = {Baosong Yang and
Derek F. Wong and
Derek Wong and
Tong Xiao and
Tong Xiao and
Lidia S. Chao and
Lidia Chao and
Jingbo Zhu},
Jingbo Zhu},
title = {Towards Bidirectional Hierarchical Representations for Attention-based
title = {Towards Bidirectional Hierarchical Representations for Attention-based
Neural Machine Translation},
Neural Machine Translation},
...
@@ -4479,8 +4479,8 @@ author = {Yoshua Bengio and
...
@@ -4479,8 +4479,8 @@ author = {Yoshua Bengio and
author = {Zihang Dai and
author = {Zihang Dai and
Zhilin Yang and
Zhilin Yang and
Yiming Yang and
Yiming Yang and
Jaime G. Carbonell and
Jaime Carbonell and
Quoc V. Le and
Quoc Le and
Ruslan Salakhutdinov},
Ruslan Salakhutdinov},
title = {Transformer-XL: Attentive Language Models Beyond a Fixed-Length Context},
title = {Transformer-XL: Attentive Language Models Beyond a Fixed-Length Context},
publisher = {Annual Meeting of the Association for Computational Linguistics},
publisher = {Annual Meeting of the Association for Computational Linguistics},
...
@@ -4592,7 +4592,7 @@ author = {Yoshua Bengio and
...
@@ -4592,7 +4592,7 @@ author = {Yoshua Bengio and
}
}
@inproceedings{kim-rush-2016-sequence,
@inproceedings{kim-rush-2016-sequence,
author = {Yoon Kim and
author = {Yoon Kim and
Alexander M. Rush},
Alexander Rush},
title = {Sequence-Level Knowledge Distillation},
title = {Sequence-Level Knowledge Distillation},
pages = {1317--1327},
pages = {1317--1327},
publisher = {Conference on Empirical Methods in Natural Language Processing},
publisher = {Conference on Empirical Methods in Natural Language Processing},
...
@@ -4612,7 +4612,7 @@ author = {Yoshua Bengio and
...
@@ -4612,7 +4612,7 @@ author = {Yoshua Bengio and
Ankur Bapna and
Ankur Bapna and
Melvin Johnson and
Melvin Johnson and
Wolfgang Macherey and
Wolfgang Macherey and
George F. Foster and
George Foster and
Llion Jones and
Llion Jones and
Mike Schuster and
Mike Schuster and
Noam Shazeer and
Noam Shazeer and
...
@@ -4987,7 +4987,7 @@ author = {Yoshua Bengio and
...
@@ -4987,7 +4987,7 @@ author = {Yoshua Bengio and
author = {Felix Wu and
author = {Felix Wu and
Angela Fan and
Angela Fan and
Alexei Baevski and
Alexei Baevski and
Yann N. Dauphin and
Yann Dauphin and
Michael Auli},
Michael Auli},
title = {Pay Less Attention with Lightweight and Dynamic Convolutions},
title = {Pay Less Attention with Lightweight and Dynamic Convolutions},
publisher = {International Conference on Learning Representations},
publisher = {International Conference on Learning Representations},
...
@@ -5281,9 +5281,9 @@ author = {Yoshua Bengio and
...
@@ -5281,9 +5281,9 @@ author = {Yoshua Bengio and
@inproceedings{Yang2018ModelingLF,
@inproceedings{Yang2018ModelingLF,
author = {Baosong Yang and
author = {Baosong Yang and
Zhaopeng Tu and
Zhaopeng Tu and
Derek F. Wong and
Derek Wong and
Fandong Meng and
Fandong Meng and
Lidia S. Chao and
Lidia Chao and
Tong Zhang},
Tong Zhang},
title = {Modeling Localness for Self-Attention Networks},
title = {Modeling Localness for Self-Attention Networks},
pages = {4449--4458},
pages = {4449--4458},
...
@@ -5360,7 +5360,7 @@ author = {Yoshua Bengio and
...
@@ -5360,7 +5360,7 @@ author = {Yoshua Bengio and
Samy Bengio and
Samy Bengio and
Eugene Brevdo and
Eugene Brevdo and
Fran{\c{c}}ois Chollet and
Fran{\c{c}}ois Chollet and
Aidan N. Gomez and
Aidan Gomez and
Stephan Gouws and
Stephan Gouws and
Llion Jones and
Llion Jones and
Lukasz Kaiser and
Lukasz Kaiser and
...
@@ -5450,7 +5450,7 @@ author = {Yoshua Bengio and
...
@@ -5450,7 +5450,7 @@ author = {Yoshua Bengio and
}
}
@inproceedings{DBLP:conf/sp/Carlini017,
@inproceedings{DBLP:conf/sp/Carlini017,
author = {Nicholas Carlini and
author = {Nicholas Carlini and
David A. Wagner},
David Wagner},
title = {Towards Evaluating the Robustness of Neural Networks},
title = {Towards Evaluating the Robustness of Neural Networks},
pages = {39--57},
pages = {39--57},
publisher = {IEEE Symposium on Security and Privacy},
publisher = {IEEE Symposium on Security and Privacy},
...
@@ -5497,7 +5497,7 @@ author = {Yoshua Bengio and
...
@@ -5497,7 +5497,7 @@ author = {Yoshua Bengio and
year = {2014}
year = {2014}
}
}
@inproceedings{DBLP:journals/corr/GoodfellowSS14,
@inproceedings{DBLP:journals/corr/GoodfellowSS14,
author = {Ian J. Goodfellow and
author = {Ian Goodfellow and
Jonathon Shlens and
Jonathon Shlens and
Christian Szegedy},
Christian Szegedy},
title = {Explaining and Harnessing Adversarial Examples},
title = {Explaining and Harnessing Adversarial Examples},
...
@@ -5525,7 +5525,7 @@ author = {Yoshua Bengio and
...
@@ -5525,7 +5525,7 @@ author = {Yoshua Bengio and
@inproceedings{DBLP:conf/naacl/YasunagaKR18,
@inproceedings{DBLP:conf/naacl/YasunagaKR18,
author = {Michihiro Yasunaga and
author = {Michihiro Yasunaga and
Jungo Kasai and
Jungo Kasai and
Dragomir R. Radev},
Dragomir Radev},
title = {Robust Multilingual Part-of-Speech Tagging via Adversarial Training},
title = {Robust Multilingual Part-of-Speech Tagging via Adversarial Training},
pages = {976--986},
pages = {976--986},
publisher = {Annual Conference of the North American Chapter of the Association for Computational Linguistics},
publisher = {Annual Conference of the North American Chapter of the Association for Computational Linguistics},
...
@@ -5569,7 +5569,7 @@ author = {Yoshua Bengio and
...
@@ -5569,7 +5569,7 @@ author = {Yoshua Bengio and
@inproceedings{DBLP:conf/naacl/AnastasopoulosL19,
@inproceedings{DBLP:conf/naacl/AnastasopoulosL19,
author = {Antonios Anastasopoulos and
author = {Antonios Anastasopoulos and
Alison Lui and
Alison Lui and
Toan Q. Nguyen and
Toan Nguyen and
David Chiang},
David Chiang},
title = {Neural Machine Translation of Text from Non-Native Speakers},
title = {Neural Machine Translation of Text from Non-Native Speakers},
pages = {3070--3080},
pages = {3070--3080},
...
@@ -5645,11 +5645,11 @@ author = {Yoshua Bengio and
...
@@ -5645,11 +5645,11 @@ author = {Yoshua Bengio and
publisher = {Annual Meeting of the Association for Computational Linguistics},
publisher = {Annual Meeting of the Association for Computational Linguistics},
year = {2019}
year = {2019}
}
}
@techreport{chen1999gaussian,
@inproceedings{chen1999gaussian,
title={A Gaussian prior for smoothing maximum entropy models},
title={A Gaussian prior for smoothing maximum entropy models},
author={Chen, Stanley F and Rosenfeld, Ronald},
author={Chen, Stanley and Rosenfeld, Ronald},
year={1999},
year={1999},
institution={CARNEGIE-MELLON UNIV PITTSBURGH PA SCHOOL OF COMPUTER SCIENCE}
publisher={CARNEGIE-MELLON UNIV PITTSBURGH PA SCHOOL OF COMPUTER SCIENCE}
}
}
@inproceedings{DBLP:conf/emnlp/MichelN18,
@inproceedings{DBLP:conf/emnlp/MichelN18,
author = {Paul Michel and
author = {Paul Michel and
...
@@ -5758,7 +5758,7 @@ author = {Yoshua Bengio and
...
@@ -5758,7 +5758,7 @@ author = {Yoshua Bengio and
@inproceedings{DBLP:conf/aclnmt/ChenCFL17,
@inproceedings{DBLP:conf/aclnmt/ChenCFL17,
author = {Boxing Chen and
author = {Boxing Chen and
Colin Cherry and
Colin Cherry and
George F. Foster and
George Foster and
Samuel Larkin},
Samuel Larkin},
title = {Cost Weighting for Neural Machine Translation Domain Adaptation},
title = {Cost Weighting for Neural Machine Translation Domain Adaptation},
pages = {40--46},
pages = {40--46},
...
@@ -6263,7 +6263,7 @@ author = {Yoshua Bengio and
...
@@ -6263,7 +6263,7 @@ author = {Yoshua Bengio and
@inproceedings{DBLP:conf/acl/ZhouYWWC20,
@inproceedings{DBLP:conf/acl/ZhouYWWC20,
author = {Yikai Zhou and
author = {Yikai Zhou and
Baosong Yang and
Baosong Yang and
Derek F. Wong and
Derek Wong and
Yu Wan and
Yu Wan and
Lidia S. Chao},
Lidia S. Chao},
title = {Uncertainty-Aware Curriculum Learning for Neural Machine Translation},
title = {Uncertainty-Aware Curriculum Learning for Neural Machine Translation},
...
@@ -6324,7 +6324,7 @@ author = {Yoshua Bengio and
...
@@ -6324,7 +6324,7 @@ author = {Yoshua Bengio and
}
}
@inproceedings{rusu2016progressive,
@inproceedings{rusu2016progressive,
title={Progressive neural networks},
title={Progressive neural networks},
author={Rusu, Andrei A and Rabinowitz, Neil C and Desjardins, Guillaume and Soyer, Hubert and Kirkpatrick, James and Kavukcuoglu, Koray and Pascanu, Razvan and Hadsell, Raia},
author={Rusu, Andrei and Rabinowitz, Neil and Desjardins, Guillaume and Soyer, Hubert and Kirkpatrick, James and Kavukcuoglu, Koray and Pascanu, Razvan and Hadsell, Raia},
publisher={arXiv preprint arXiv:1606.04671},
publisher={arXiv preprint arXiv:1606.04671},
year={2016}
year={2016}
}
}
...
@@ -6334,7 +6334,7 @@ author = {Yoshua Bengio and
...
@@ -6334,7 +6334,7 @@ author = {Yoshua Bengio and
Charles Blundell and
Charles Blundell and
Yori Zwols and
Yori Zwols and
David Ha and
David Ha and
Andrei A. Rusu and
Andrei Rusu and
Alexander Pritzel and
Alexander Pritzel and
Daan Wierstra},
Daan Wierstra},
title = {PathNet: Evolution Channels Gradient Descent in Super Neural Networks},
title = {PathNet: Evolution Channels Gradient Descent in Super Neural Networks},
...
@@ -6374,8 +6374,8 @@ author = {Yoshua Bengio and
...
@@ -6374,8 +6374,8 @@ author = {Yoshua Bengio and
year = {2017}
year = {2017}
}
}
@inproceedings{DBLP:conf/eccv/CastroMGSA18,
@inproceedings{DBLP:conf/eccv/CastroMGSA18,
author = {Francisco M. Castro and
author = {Francisco Castro and
Manuel J. Mar{\'{\i}}n-Jim{\'{e}}nez and
Manuel Mar{\'{\i}}n-Jim{\'{e}}nez and
Nicol{\'{a}}s Guil and
Nicol{\'{a}}s Guil and
Cordelia Schmid and
Cordelia Schmid and
Karteek Alahari},
Karteek Alahari},
...
@@ -6440,14 +6440,14 @@ author = {Yoshua Bengio and
...
@@ -6440,14 +6440,14 @@ author = {Yoshua Bengio and