title = {Longformer: The Long-Document Transformer},
...
...
@@ -5721,10 +5722,16 @@ author = {Yoshua Bengio and
year={2019}
}
@inproceedings{Yang2018ModelingLF,
title={Modeling Localness for Self-Attention Networks},
author={Baosong Yang and Zhaopeng Tu and Derek F. Wong and Fandong Meng and Lidia S. Chao and T. Zhang},
publisher={Conference on Empirical Methods in Natural Language Processing},
year={2018}
author = {Baosong Yang and
Zhaopeng Tu and
Derek F. Wong and
Fandong Meng and
Lidia S. Chao and
Tong Zhang},
title = {Modeling Localness for Self-Attention Networks},
pages = {4449--4458},
publisher = {Association for Computational Linguistics},
year = {2018}
}
@inproceedings{DBLP:journals/corr/abs-1904-03107,
author = {Baosong Yang and
...
...
@@ -5745,10 +5752,16 @@ author = {Yoshua Bengio and
volume={abs/2002.06714}
}
@inproceedings{Bapna2018TrainingDN,
title={Training Deeper Neural Machine Translation Models with Transparent Attention},
author={Ankur Bapna and M. Chen and Orhan Firat and Yuan Cao and Y. Wu},
publisher={Conference on Empirical Methods in Natural Language Processing},
year={2018}
author = {Ankur Bapna and
Mia Xu Chen and
Orhan Firat and
Yuan Cao and
Yonghui Wu},
title = {Training Deeper Neural Machine Translation Models with Transparent
Attention},
pages = {3028--3033},
publisher = {Association for Computational Linguistics},
year = {2018}
}
@inproceedings{Dou2018ExploitingDR,
author = {Zi-Yi Dou and
...
...
@@ -5788,18 +5801,32 @@ author = {Yoshua Bengio and
}
@inproceedings{Vaswani2018Tensor2TensorFN,
title={Tensor2Tensor for Neural Machine Translation},
author={Ashish Vaswani and S. Bengio and E. Brevdo and F. Chollet and Aidan N. Gomez and S. Gouws and Llion Jones and L. Kaiser and Nal Kalchbrenner and Niki Parmar and Ryan Sepassi and Noam Shazeer and Jakob Uszkoreit},
booktitle={American Mobile Telecommunications Association },
year={2018}
author = {Ashish Vaswani and
Samy Bengio and
Eugene Brevdo and
Fran{\c{c}}ois Chollet and
Aidan N. Gomez and
Stephan Gouws and
Llion Jones and
Lukasz Kaiser and
Nal Kalchbrenner and
Niki Parmar and
Ryan Sepassi and
Noam Shazeer and
Jakob Uszkoreit},
title = {Tensor2Tensor for Neural Machine Translation},
pages = {193--199},
publisher = {Association for Machine Translation in the Americas},
year = {2018}
}
@article{Kitaev2020ReformerTE,
title={Reformer: The Efficient Transformer},
author={Nikita Kitaev and L. Kaiser and Anselm Levskaya},