title = {Longformer: The Long-Document Transformer},
title = {Longformer: The Long-Document Transformer},
...
@@ -5721,10 +5722,16 @@ author = {Yoshua Bengio and
...
@@ -5721,10 +5722,16 @@ author = {Yoshua Bengio and
year={2019}
year={2019}
}
}
@inproceedings{Yang2018ModelingLF,
@inproceedings{Yang2018ModelingLF,
title={Modeling Localness for Self-Attention Networks},
author = {Baosong Yang and
author={Baosong Yang and Zhaopeng Tu and Derek F. Wong and Fandong Meng and Lidia S. Chao and T. Zhang},
Zhaopeng Tu and
publisher={Conference on Empirical Methods in Natural Language Processing},
Derek F. Wong and
year={2018}
Fandong Meng and
Lidia S. Chao and
Tong Zhang},
title = {Modeling Localness for Self-Attention Networks},
pages = {4449--4458},
publisher = {Association for Computational Linguistics},
year = {2018}
}
}
@inproceedings{DBLP:journals/corr/abs-1904-03107,
@inproceedings{DBLP:journals/corr/abs-1904-03107,
author = {Baosong Yang and
author = {Baosong Yang and
...
@@ -5745,10 +5752,16 @@ author = {Yoshua Bengio and
...
@@ -5745,10 +5752,16 @@ author = {Yoshua Bengio and
volume={abs/2002.06714}
volume={abs/2002.06714}
}
}
@inproceedings{Bapna2018TrainingDN,
@inproceedings{Bapna2018TrainingDN,
title={Training Deeper Neural Machine Translation Models with Transparent Attention},
author = {Ankur Bapna and
author={Ankur Bapna and M. Chen and Orhan Firat and Yuan Cao and Y. Wu},
Mia Xu Chen and
publisher={Conference on Empirical Methods in Natural Language Processing},
Orhan Firat and
year={2018}
Yuan Cao and
Yonghui Wu},
title = {Training Deeper Neural Machine Translation Models with Transparent
Attention},
pages = {3028--3033},
publisher = {Association for Computational Linguistics},
year = {2018}
}
}
@inproceedings{Dou2018ExploitingDR,
@inproceedings{Dou2018ExploitingDR,
author = {Zi-Yi Dou and
author = {Zi-Yi Dou and
...
@@ -5788,18 +5801,32 @@ author = {Yoshua Bengio and
...
@@ -5788,18 +5801,32 @@ author = {Yoshua Bengio and
}
}
@inproceedings{Vaswani2018Tensor2TensorFN,
@inproceedings{Vaswani2018Tensor2TensorFN,
title={Tensor2Tensor for Neural Machine Translation},
author = {Ashish Vaswani and
author={Ashish Vaswani and S. Bengio and E. Brevdo and F. Chollet and Aidan N. Gomez and S. Gouws and Llion Jones and L. Kaiser and Nal Kalchbrenner and Niki Parmar and Ryan Sepassi and Noam Shazeer and Jakob Uszkoreit},
Samy Bengio and
booktitle={American Mobile Telecommunications Association },
Eugene Brevdo and
year={2018}
Fran{\c{c}}ois Chollet and
Aidan N. Gomez and
Stephan Gouws and
Llion Jones and
Lukasz Kaiser and
Nal Kalchbrenner and
Niki Parmar and
Ryan Sepassi and
Noam Shazeer and
Jakob Uszkoreit},
title = {Tensor2Tensor for Neural Machine Translation},
pages = {193--199},
publisher = {Association for Machine Translation in the Americas},
year = {2018}
}
}
@article{Kitaev2020ReformerTE,
@article{Kitaev2020ReformerTE,
title={Reformer: The Efficient Transformer},
author = {Nikita Kitaev and
author={Nikita Kitaev and L. Kaiser and Anselm Levskaya},