全部章节bib更新

fe657d1d · 单韦乔 · bb2d2e1e · fe657d1d
Commit fe657d1d authored Jan 03, 2021 by 单韦乔
--- a/bibliography.bib
+++ b/bibliography.bib
@@ -2400,15 +2400,6 @@ year = {2012}
  pages     = {260--269},
  year      = {1967}
 }
-@inproceedings{DBLP:conf/acl/OchN02,
-  author    = {Franz Josef Och and
-               Hermann Ney},
-  title     = {Discriminative Training and Maximum Entropy Models for Statistical
-               Machine Translation},
-  pages     = {295--302},
-  publisher = {Annual Meeting of the Association for Computational Linguistics},
-  year      = {2002}
-}
 @inproceedings{koehn2000estimating,
  author    = {Philipp Koehn and
               Kevin Knight},
@@ -3853,15 +3844,6 @@ year = {2012}
  pages     = {701--710},
  year      = {2014}
 }
-@inproceedings{2011Natural,
-  title={Natural Language Processing (almost) from Scratch},
-  author={ Collobert, Ronan  and  Weston, Jason  and Bottou, Léon and  Karlen, Michael  and  Kavukcuoglu, Koray  and  Kuksa, Pavel },
-  publisher={Journal of Machine Learning Research},
-  volume={12},
-  number={1},
-  pages={2493-2537},
-  year={2011}
-}
 @inproceedings{mccann2017learned,
  author    = {Bryan Mccann and
               James Bradbury and
@@ -3874,16 +3856,17 @@ year = {2012}
 }
 %%%%%%%%%%%%%%%%%%%%%%%神经语言模型，已检查修改%%%%%%%%%%%%%%%%%%%%%%%%%
 @inproceedings{Peters2018DeepCW,
-  title={Deep contextualized word representations},
+  author    = {Matthew Peters and
-  author={Matthew Peters and 
+               Mark Neumann and
-          Mark Neumann and 
+               Mohit Iyyer and
-		  Mohit Iyyer and 
+               Matt Gardner and
-		  Matt Gardner and 
+               Christopher Clark and
-		  Christopher Clark and 
+               Kenton Lee and
-		  Kenton Lee and 
+               Luke Zettlemoyer},
-		  Luke Zettlemoyer},
+  title     = {Deep Contextualized Word Representations},
-  publisher={Proceedings of the Human Language Technology Conference of the North American Chapter of the Association for Computational Linguistics},
+  pages     = {2227--2237},
-  year={2018}
+  publisher = {Annual Conference of the North American Chapter of the Association for Computational Linguistics},
+  year      = {2018}
 }
 @inproceedings{Graves2013HybridSR,
  title={Hybrid speech recognition with Deep Bidirectional LSTM},
@@ -4116,13 +4099,6 @@ year = {2012}
 %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
 %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
 %%%%% chapter 10------------------------------------------------------
-@inproceedings{vaswani2017attention,
-	title={Attention is All You Need},
-	author={Ashish {Vaswani} and Noam {Shazeer} and Niki {Parmar} and Jakob {Uszkoreit} and Llion {Jones} and Aidan N. {Gomez} and Lukasz {Kaiser} and Illia {Polosukhin}},
-	publisher={International Conference on Neural Information Processing},
-	pages={5998--6008},
-	year={2017}
-}
 @inproceedings{DBLP:conf/acl/LiLWJXZLL20,
  author    = {Bei Li and
               Hui Liu and
@@ -4679,15 +4655,6 @@ author    = {Yoshua Bengio and
  publisher = {Annual Meeting of the Association for Computational Linguistics},
  year      = {2015}
 }
-@inproceedings{DBLP:journals/corr/LuongPM15,
-  author    = {Thang Luong and
-               Hieu Pham and
-               Christopher D. Manning},
-  title     = {Effective Approaches to Attention-based Neural Machine Translation},
-  publisher = {Conference on Empirical Methods in Natural Language Processing},
-  pages     = {1412--1421},
-  year      = {2015}
-}
 @inproceedings{He2016ImprovedNM,
  author    = {Wei He and
               Zhongjun He and
@@ -4775,19 +4742,6 @@ author    = {Yoshua Bengio and
  pages     = {21--37},
  year      = {2016}
 }
-@inproceedings{devlin-etal-2014-fast,
-  author    = {Jacob Devlin and
-               Rabih Zbib and
-               Zhongqiang Huang and
-               Thomas Lamar and
-               Richard M. Schwartz and
-               John Makhoul},
-  title     = {Fast and Robust Neural Network Joint Models for Statistical Machine
-               Translation},
-  pages     = {1370--1380},
-  publisher = {Annual Meeting of the Association for Computational Linguistics},
-  year      = {2014}
-}
 @inproceedings{DBLP:conf/acl/WangLLJL15,
  author    = {Mingxuan Wang and
               Zhengdong Lu and
@@ -4818,15 +4772,6 @@ author    = {Yoshua Bengio and
  publisher = {International Conference on Acoustics, Speech and Signal Processing},
  year      = {2013}
 }
-@inproceedings{DBLP:journals/corr/LuongPM15,
-  author    = {Thang Luong and
-               Hieu Pham and
-               Christopher D. Manning},
-  title     = {Effective Approaches to Attention-based Neural Machine Translation},
-  publisher = {Conference on Empirical Methods in Natural Language Processing},
-  pages     = {1412--1421},
-  year      = {2015}
-}
 @inproceedings{DBLP:conf/acl-codeswitch/WangCK18,
  author    = {Changhan Wang and
               Kyunghyun Cho and
@@ -4879,15 +4824,6 @@ author    = {Yoshua Bengio and
  publisher = {Springer},
  year      = {2017}
 }
-@inproceedings{2011Natural,
-  title={Natural Language Processing (almost) from Scratch},
-  author={ Collobert, Ronan  and  Weston, Jason  and Bottou, Léon and  Karlen, Michael  and  Kavukcuoglu, Koray  and  Kuksa, Pavel },
-  publisher={Journal of Machine Learning Research},
-  volume={12},
-  number={1},
-  pages={2493-2537},
-  year={2011},
-}
 @inproceedings{DBLP:conf/acl/NguyenG15,
  author    = {Thien Huu Nguyen and
               Ralph Grishman},
@@ -4943,14 +4879,6 @@ author    = {Yoshua Bengio and
  publisher = {Proceedings of the Human Language Technology Conference of the North American Chapter of the Association for Computational Linguistics},
  year      = {2015}
 }
-@inproceedings{StahlbergNeural,
-  title={Neural Machine Translation: A Review},
-  author={Felix Stahlberg},
-  publisher={Journal of Artificial Intelligence Research},
-  year={2020},
-  volume={69},
-  pages={343-418}
-}
 @inproceedings{Sennrich2016ImprovingNM,
  author    = {Rico Sennrich and
               Barry Haddow and
@@ -4959,14 +4887,6 @@ author    = {Yoshua Bengio and
  publisher = {Annual Meeting of the Association for Computational Linguistics},
  year      = {2016}
 }
-@inproceedings{bahdanau2014neural,
-  author    = {Dzmitry Bahdanau and
-               Kyunghyun Cho and
-               Yoshua Bengio},
-  title     = {Neural Machine Translation by Jointly Learning to Align and Translate},
-  publisher = {International Conference on Learning Representations},
-  year      = {2015}
-}
 @inproceedings{Waibel1989PhonemeRU,
  title={Phoneme recognition using time-delay neural networks},
  author={Alexander Waibel and Toshiyuki Hanazawa and Geoffrey Hinton and Kiyohiro Shikano and Kevin J. Lang},
@@ -5002,30 +4922,6 @@ author    = {Yoshua Bengio and
  pages     = {770--778},
  year      = {2016}
 }
-@inproceedings{DBLP:conf/cvpr/HuangLMW17,
-  author    = {Gao Huang and
-               Zhuang Liu and
-               Laurens van der Maaten and
-               Kilian Q. Weinberger},
-  title     = {Densely Connected Convolutional Networks},
-  pages     = {2261--2269},
-  publisher = {{IEEE} Conference on Computer Vision and Pattern Recognition},
-  year      = {2017}
-}
-@inproceedings{Girshick2015FastR,
-  title={Fast R-CNN},
-  author={Ross Girshick},
-  publisher={International Conference on Computer Vision},
-  year={2015},
-  pages={1440-1448}
-}
-@inproceedings{He2020MaskR,
-  title={Mask R-CNN},
-  author={Kaiming He and Georgia Gkioxari and Piotr Doll{\'a}r and Ross B. Girshick},
-  publisher={International Conference on Computer Vision},
-  pages={2961--2969},
-  year={2017}
-}
 @inproceedings{Kalchbrenner2014ACN,
  title={A Convolutional Neural Network for Modelling Sentences},
  author={Nal Kalchbrenner and Edward Grefenstette and Phil Blunsom},
@@ -5079,18 +4975,6 @@ author    = {Yoshua Bengio and
  pages     = {123--135},
  year={2017}
 }
-@inproceedings{DBLP:journals/corr/GehringAGYD17,
-  author    = {Jonas Gehring and
-               Michael Auli and
-               David Grangier and
-               Denis Yarats and
-               Yann N. Dauphin},
-  title     = {Convolutional Sequence to Sequence Learning},
-  publisher = {International Conference on Machine Learning},
-  volume    = {70},
-  pages     = {1243--1252},
-  year      = {2017}
-}
 @inproceedings{Kaiser2018DepthwiseSC,
  title={Depthwise Separable Convolutions for Neural Machine Translation},
  author    = {Lukasz Kaiser and
@@ -5109,14 +4993,6 @@ author    = {Yoshua Bengio and
 publisher = {International Conference on Learning Representations},
 year = {2019}
 }
-@inproceedings{kalchbrenner-blunsom-2013-recurrent,
-  author    = {Nal Kalchbrenner and
-               Phil Blunsom},
-  title     = {Recurrent Continuous Translation Models},
-  pages     = {1700--1709},
-  publisher = {Conference on Empirical Methods in Natural Language Processing},
-  year      = {2013}
-}
 @inproceedings{Wu2016GooglesNM,
  title={Google's Neural Machine Translation System: Bridging the Gap between Human and Machine Translation},
  author    = {Yonghui Wu and
@@ -5154,16 +5030,6 @@ author    = {Yoshua Bengio and
  year={2016},
  volume={abs/1609.08144}
 }
-@inproceedings{DBLP:journals/corr/HeZRS15,
-  author    = {Kaiming He and
-               Xiangyu Zhang and
-               Shaoqing Ren and
-               Jian Sun},
-  title     = {Deep Residual Learning for Image Recognition},
-  publisher = {IEEE Conference on Computer Vision and Pattern Recognition},
-  pages     = {770--778},
-  year      = {2016},
-}
 @inproceedings{Sukhbaatar2015EndToEndMN,
  title={End-To-End Memory Networks},
  author    = {Sainbayar Sukhbaatar and
@@ -5326,13 +5192,6 @@ author    = {Yoshua Bengio and
  volume    = {abs/1802.05751},
  year      = {2018}
 }
-@inproceedings{vaswani2017attention,
-	title={Attention is All You Need},
-	author={Ashish {Vaswani} and Noam {Shazeer} and Niki {Parmar} and Jakob {Uszkoreit} and Llion {Jones} and Aidan N. {Gomez} and Lukasz {Kaiser} and Illia {Polosukhin}},
-	publisher={International Conference on Neural Information Processing},
-	pages={5998--6008},
-	year={2017}
-}
 @inproceedings{DBLP:conf/iclr/WuLLLH20,
  author    = {Zhanghao Wu and
               Zhijian Liu and
@@ -5377,24 +5236,6 @@ author    = {Yoshua Bengio and
  pages     = {464--468},
  year      = {2018},
 }
-@inproceedings{DBLP:journals/corr/HeZRS15,
-  author    = {Kaiming He and
-               Xiangyu Zhang and
-               Shaoqing Ren and
-               Jian Sun},
-  title     = {Deep Residual Learning for Image Recognition},
-  publisher = {IEEE Conference on Computer Vision and Pattern Recognition},
-  pages     = {770--778},
-  year      = {2016},
-}
-@inproceedings{JMLR:v15:srivastava14a,
-  author  = {Nitish Srivastava and Geoffrey Hinton and Alex Krizhevsky and Ilya Sutskever and Ruslan Salakhutdinov},
-  title   = {Dropout: A Simple Way to Prevent Neural Networks from Overfitting},
-  publisher = {Journal of Machine Learning Research},
-  year    = {2014},
-  volume  = {15},
-  pages   = {1929-1958},
-}
 @inproceedings{Szegedy_2016_CVPR,
  author    = {Christian Szegedy and
               Vincent Vanhoucke and
@@ -5424,28 +5265,6 @@ author    = {Yoshua Bengio and
  volume    = {abs/1602.02830},
  year      = {2016},
 }
-@inproceedings{Wu2019PayLA,
- author = {Felix Wu and
-		 Angela Fan and
-		 Alexei Baevski and
-		 Yann N. Dauphin and
-		 Michael Auli},
- title = {Pay Less Attention with Lightweight and Dynamic Convolutions},
- publisher = {International Conference on Learning Representations},
- year = {2019},
-}
-@inproceedings{dai-etal-2019-transformer,
- author    = {Zihang Dai and
-               Zhilin Yang and
-               Yiming Yang and
-               Jaime G. Carbonell and
-               Quoc Viet Le and
-               Ruslan Salakhutdinov},
-  title     = {Transformer-XL: Attentive Language Models beyond a Fixed-Length Context},
-  pages     = {2978--2988},
-  publisher = {Annual Meeting of the Association for Computational Linguistics},
-  year      = {2019}
-}
 @inproceedings{Liu2020LearningTE,
 	title={Learning to Encode Position for Transformer with Continuous Dynamical Model},
 	author={Xuanqing Liu and Hsiang-Fu Yu and Inderjit Dhillon and Cho-Jui Hsieh},
@@ -5620,14 +5439,6 @@ author    = {Yoshua Bengio and
  publisher = {IEEE International Conference on Acoustics, Speech and Signal Processing},
  year      = {2012}
 }
-@inproceedings{JMLR:v15:srivastava14a,
-  author  = {Nitish Srivastava and Geoffrey Hinton and Alex Krizhevsky and Ilya Sutskever and Ruslan Salakhutdinov},
-  title   = {Dropout: A Simple Way to Prevent Neural Networks from Overfitting},
-  publisher = {Journal of Machine Learning Research},
-  year    = {2014},
-  volume  = {15},
-  pages   = {1929-1958},
-}
 @inproceedings{DBLP:conf/amta/MullerRS20,
  author    = {Mathias M{\"{u}}ller and
               Annette Rios and
@@ -5834,21 +5645,6 @@ author    = {Yoshua Bengio and
  publisher = {Annual Meeting of the Association for Computational Linguistics},
  year      = {2019}
 }
-@inproceedings{DBLP:conf/acl/LiLWJXZLL20,
-  author    = {Bei Li and
-               Hui Liu and
-               Ziyang Wang and
-               Yufan Jiang and
-               Tong Xiao and
-               Jingbo Zhu and
-               Tongran Liu and
-               Changliang Li},
-  title     = {Does Multi-Encoder Help? {A} Case Study on Context-Aware Neural Machine
-               Translation},
-  pages     = {3512--3518},
-  publisher = {Annual Meeting of the Association for Computational Linguistics},
-  year      = {2020}
-}
 @techreport{chen1999gaussian,
  title={A Gaussian prior for smoothing maximum entropy models},
  author={Chen, Stanley F and Rosenfeld, Ronald},
@@ -5863,14 +5659,6 @@ author    = {Yoshua Bengio and
  publisher = {Conference on Empirical Methods in Natural Language Processing},
  year      = {2018}
 }
-@inproceedings{DBLP:conf/icassp/SchusterN12,
-  author    = {Mike Schuster and
-               Kaisuke Nakajima},
-  title     = {Japanese and Korean voice search},
-  pages     = {5149--5152},
-  publisher = {IEEE International Conference on Acoustics, Speech and Signal Processing},
-  year      = {2012}
-}
 @inproceedings{kudo2018sentencepiece,
 	title={SentencePiece: A simple and language independent subword tokenizer and detokenizer for Neural Text Processing},
 	author={Taku {Kudo} and John {Richardson}},
@@ -6166,12 +5954,6 @@ author    = {Yoshua Bengio and
  publisher = {Conference on Computational Learning Theory},
  year      = {1992}
 }
-@book{mitchell1996m,
-  title={Machine Learning},
-  author={Mitchell, Tom},
-  journal={McCraw Hill},
-  year={1996}
-}
 @inproceedings{DBLP:conf/icml/AbeM98,
  author    = {Naoki Abe and
               Hiroshi Mamitsuka},
@@ -6195,15 +5977,6 @@ author    = {Yoshua Bengio and
  publisher = {{IEEE} Conference on Computer Vision and Pattern Recognition},
  year      = {2005}
 }
-@inproceedings{726791,
-  author={Yann {Lecun} and Leon {Bottou} and Yoshua {Bengio} and Patrick {Haffner}},
-  publisher={Proceedings of the IEEE}, 
-  title={Gradient-based learning applied to document recognition}, 
-  year={1998},
-  volume={86},
-  number={11},
-  pages={2278-2324}
-}
 @book{atkinson2007optimum,
  title={Optimum experimental designs, with SAS},
  author={Atkinson, Anthony and Donev, Alexander and Tobias, Randall and others},
@@ -6245,16 +6018,6 @@ author    = {Yoshua Bengio and
  publisher = {{IEEE} Winter Conference on Applications of Computer Vision},
  year      = {2020}
 }
-@inproceedings{DBLP:conf/acl/JeanCMB15,
-  author    = {S{\'{e}}bastien Jean and
-               KyungHyun Cho and
-               Roland Memisevic and
-               Yoshua Bengio},
-  title     = {On Using Very Large Target Vocabulary for Neural Machine Translation},
-  pages     = {1--10},
-  publisher = {Annual Meeting of the Association for Computational Linguistics},
-  year      = {2015}
-}
 @inproceedings{2015OnGulcehre,
  title = {On Using Monolingual Corpora in Neural Machine Translation},
  author = {Gulcehre Caglar  and  
@@ -6269,14 +6032,6 @@ author    = {Yoshua Bengio and
  publisher = {Computer Science},
  year = {2015},
 }
-@inproceedings{Sennrich2016ImprovingNM,
-  author    = {Rico Sennrich and
-               Barry Haddow and
-               Alexandra Birch},
-  title     = {Improving Neural Machine Translation Models with Monolingual Data},
-  publisher = {Annual Meeting of the Association for Computational Linguistics},
-  year      = {2016}
-}
 @inproceedings{DBLP:conf/aaai/Zhang0LZC18,
  author    = {Zhirui Zhang and
               Shujie Liu and
@@ -6641,16 +6396,6 @@ author    = {Yoshua Bengio and
  pages     = {1171--1179},
  year      = {2015}
 }
-@inproceedings{Bengio2015ScheduledSF,
-  title={Scheduled Sampling for Sequence Prediction with Recurrent Neural Networks},
-  author={Samy Bengio and
-               Oriol Vinyals and
-               Navdeep Jaitly and
-               Noam Shazeer},
-  publisher = {Annual Conference on Neural Information Processing Systems},
-  pages     = {1171--1179},
-  year      = {2015}
-}
 @inproceedings{Ranzato2016SequenceLT,
  title={Sequence Level Training with Recurrent Neural Networks},
  author={Marc'Aurelio Ranzato and
@@ -6674,43 +6419,6 @@ author    = {Yoshua Bengio and
  pages     = {2672--2680},
  year      = {2014}
 }
-@inproceedings{DBLP:conf/acl/ShenCHHWSL16,
-  author    = {Shiqi Shen and
-               Yong Cheng and
-               Zhongjun He and
-               Wei He and
-               Hua Wu and
-               Maosong Sun and
-               Yang Liu},
-  title     = {Minimum Risk Training for Neural Machine Translation},
-  publisher = {Annual Meeting of the Association for Computational Linguistics},
-  year      = {2016},
-}
-@inproceedings{DBLP:conf/acl/PapineniRWZ02,
-  author    = {Kishore Papineni and
-               Salim Roukos and
-               Todd Ward and
-               Wei-jing Zhu},
-  title     = {Bleu: a Method for Automatic Evaluation of Machine Translation},
-  pages     = {311--318},
-  publisher = {Annual Meeting of the Association for Computational Linguistics},
-  year      = {2002}
-}
-@inproceedings{doddington2002automatic,
-  title={Automatic evaluation of machine translation quality using n-gram co-occurrence statistics},
-  publisher={Proceedings of the second international conference on Human Language Technology Research},
-  author={Doddington, George},
-  pages={138--145},
-  year={2002}
-}
-@inproceedings{snover2006study,
-  title={A study of translation edit rate with targeted human annotation},
-  author={Snover, Matthew and Dorr, Bonnie and Schwartz, Richard and Micciulla, Linnea and Makhoul, John},
-  publisher={Proceedings of association for machine translation in the Americas},
-  volume={200},
-  number={6},
-  year={2006}
-}
 @inproceedings{lavie2009meteor,
  title={The METEOR metric for automatic evaluation of machine translation},
  author={Lavie, Alon and Denkowski, Michael J},
@@ -6720,36 +6428,6 @@ author    = {Yoshua Bengio and
  pages={105--115},
  year={2009}
 }
-@inproceedings{bahdanau2014neural,
-  author    = {Dzmitry Bahdanau and
-               Kyunghyun Cho and
-               Yoshua Bengio},
-  title     = {Neural Machine Translation by Jointly Learning to Align and Translate},
-  publisher = {International Conference on Learning Representations},
-  year      = {2015}
-}
-@inproceedings{koehn2003statistical,
-  author    = {Philipp Koehn and
-               Franz Josef Och and
-               Daniel Marcu},
-  title     = {Statistical Phrase-Based Translation},
-  publisher = {Annual Meeting of the Association for Computational Linguistics},
-  year      = {2003}
-}
-@inproceedings{smith2006minimum,
-  author    = {David A. Smith and
-               Jason Eisner},
-  title     = {Minimum Risk Annealing for Training Log-Linear Models},
-  publisher = {Annual Meeting of the Association for Computational Linguistics},
-  year      = {2006}
-}
-@inproceedings{he2012maximum,
-title={Maximum expected bleu training of phrase and lexicon translation models},
-author={He, Xiaodong and Deng, Li},
-publisher={Annual Meeting of the Association for Computational Linguistics},
-pages={292--301},
-year={2012}
-}
 @inproceedings{DBLP:conf/acl/GaoHYD14,
  author    = {Jianfeng Gao and
               Xiaodong He and
@@ -6907,14 +6585,6 @@ year={2012}
  volume    = {abs/2002.11794},
  year      = {2020}
 }
-@inproceedings{kim-rush-2016-sequence,
-    author    = {Yoon Kim and
-               Alexander M. Rush},
-  title     = {Sequence-Level Knowledge Distillation},
-  pages     = {1317--1327},
-  publisher = {Conference on Empirical Methods in Natural Language Processing},
-  year      = {2016}
-}
 @inproceedings{Jiao2020TinyBERTDB,
  author    = {Xiaoqi Jiao and
               Yichun Yin and
@@ -6952,34 +6622,6 @@ year={2012}
 %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
 %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
 %%%%% chapter 14------------------------------------------------------
-@inproceedings{Koehn2007Moses,
-  author    = {Philipp Koehn and
-               Hieu Hoang and
-			    Alexandra Birch and
-               Chris Callison-Burch and
-               Marcello Federico and
-               Nicola Bertoldi and
-               Brooke Cowan and
-               Wade Shen and
-               Christine Moran and
-               Richard Zens and
-               Chris Dyer and
-               Ondrej Bojar and
-               Alexandra Constantin and
-               Evan Herbst},
-  title     = {Moses: Open Source Toolkit for Statistical Machine Translation},
-  publisher = {Annual Meeting of the Association for Computational Linguistics},
-  year      = {2007}
-}
-@inproceedings{DBLP:conf/amta/Koehn04,
-  author    = {Philipp Koehn},
-  title     = {Pharaoh: {A} Beam Search Decoder for Phrase-Based Statistical Machine
-               Translation Models},
-  volume    = {3265},
-  pages     = {115--124},
-  publisher = {	Association for Machine Translation in the Americas},
-  year      = {2004}
-}
 @inproceedings{DBLP:conf/emnlp/StahlbergHSB17,
  author    = {Felix Stahlberg and
               Eva Hasler and
@@ -7189,16 +6831,6 @@ year={2012}
  publisher = {Conference on Empirical Methods in Natural Language Processing},
  year      = {2016}
 }
-@inproceedings{DBLP:conf/emnlp/HuangZM17,
-  author    = {Liang Huang and
-               Kai Zhao and
-               Mingbo Ma},
-  title     = {When to Finish? Optimal Beam Search for Neural Text Generation (modulo
-               beam size)},
-  pages     = {2134--2139},
-  publisher = {Annual Meeting of the Association for Computational Linguistics},
-  year      = {2017}
-}
 @inproceedings{Wiseman2016SequencetoSequenceLA,
  title={Sequence-to-Sequence Learning as Beam-Search Optimization},
  author={Sam Wiseman and Alexander M. Rush},
@@ -7206,16 +6838,6 @@ year={2012}
  pages={1296--1306},
  year={2016}
 }
-@inproceedings{DBLP:conf/emnlp/Yang0M18,
-  author    = {Yilin Yang and
-               Liang Huang and
-               Mingbo Ma},
-  title     = {Breaking the Beam Search Curse: {A} Study of (Re-)Scoring Methods
-               and Stopping Criteria for Neural Machine Translation},
-  pages     = {3054--3059},
-  publisher = {Annual Meeting of the Association for Computational Linguistics},
-  year      = {2018}
-}
 @inproceedings{Ma2019LearningTS,
  title={Learning to Stop in Structured Prediction for Neural Machine Translation},
  author={Mingbo Ma and
@@ -7236,14 +6858,6 @@ year={2012}
  publisher = {Annual Meeting of the Association for Computational Linguistics},
  year      = {2017}
 }
-@inproceedings{bahdanau2014neural,
-  author    = {Dzmitry Bahdanau and
-               Kyunghyun Cho and
-               Yoshua Bengio},
-  title     = {Neural Machine Translation by Jointly Learning to Align and Translate},
-  publisher = {International Conference on Learning Representations},
-  year      = {2015}
-}
 @inproceedings{Jiang2012LearnedPF,
  title={Learned Prioritization for Trading Off Accuracy and Speed},
  author={Jiarong Jiang and Adam R. Teichert and Hal Daum{\'e} and Jason Eisner},
@@ -7379,33 +6993,6 @@ year={2012}
  publisher={Annual Meeting of the Association for Computational Linguistics},
  year={2017}
 }
-@inproceedings{StahlbergNeural,
-  title={Neural Machine Translation: A Review},
-  author={Felix Stahlberg},
-  publisher={Journal of Artificial Intelligence Research},
-  year={2020},
-  volume={69},
-  pages={343-418}
-}
-@inproceedings{Ranzato2016SequenceLT,
-  title={Sequence Level Training with Recurrent Neural Networks},
-  author={Marc'Aurelio Ranzato and
-               Sumit Chopra and
-               Michael Auli and
-               Wojciech Zaremba},
-  publisher={International Conference on Learning Representations},
-  year={2016}
-}
-@inproceedings{Bengio2015ScheduledSF,
-  title={Scheduled Sampling for Sequence Prediction with Recurrent Neural Networks},
-  author={Samy Bengio and
-               Oriol Vinyals and
-               Navdeep Jaitly and
-               Noam Shazeer},
-  publisher = {Annual Conference on Neural Information Processing Systems},
-  pages     = {1171--1179},
-  year      = {2015}
-}
 @inproceedings{Zhang2019BridgingTG,
  title={Bridging the Gap between Training and Inference for Neural Machine Translation},
  author={Wen Zhang and Yang Feng and Fandong Meng and Di You and Qun Liu},
@@ -7413,55 +7000,6 @@ year={2012}
  publisher = {Annual Meeting of the Association for Computational Linguistics},
  year      = {2019}
 }
-@inproceedings{DBLP:conf/acl/ShenCHHWSL16,
-  author    = {Shiqi Shen and
-               Yong Cheng and
-               Zhongjun He and
-               Wei He and
-               Hua Wu and
-               Maosong Sun and
-               Yang Liu},
-  title     = {Minimum Risk Training for Neural Machine Translation},
-  publisher = {Annual Meeting of the Association for Computational Linguistics},
-  year      = {2016},
-}
-@inproceedings{DBLP:conf/acl/SennrichHB16a,
-  author    = {Rico Sennrich and
-               Barry Haddow and
-               Alexandra Birch},
-  title     = {Neural Machine Translation of Rare Words with Subword Units},
-  publisher = {Annual Meeting of the Association for Computational Linguistics},
-  year      = {2016},
-}
-@inproceedings{DBLP:conf/emnlp/ZensSX12,
-  author    = {Richard Zens and
-               Daisy Stanton and
-               Peng Xu},
-  title     = {A Systematic Comparison of Phrase Table Pruning Techniques},
-  pages     = {972--983},
-  publisher = {Annual Meeting of the Association for Computational Linguistics},
-  year      = {2012}
-}
-@inproceedings{DBLP:conf/emnlp/JohnsonMFK07,
-  author    = {Howard Johnson and
-               Joel D. Martin and
-               George F. Foster and
-               Roland Kuhn},
-  title     = {Improving Translation Quality by Discarding Most of the Phrasetable},
-  pages     = {967--975},
-  publisher = {Annual Meeting of the Association for Computational Linguistics},
-  year      = {2007}
-}
-@inproceedings{DBLP:conf/emnlp/LingGTB12,
-  author    = {Wang Ling and
-               Jo{\~{a}}o Gra{\c{c}}a and
-               Isabel Trancoso and
-               Alan W. Black},
-  title     = {Entropy-based Pruning for Phrase-based Machine Translation},
-  pages     = {962--971},
-  publisher = {Annual Meeting of the Association for Computational Linguistics},
-  year      = {2012}
-}
 @inproceedings{Narang2017BlockSparseRN,
  title={Block-Sparse Recurrent Neural Networks},
  author={Sharan Narang and Eric Undersander and Gregory Diamos},
@@ -7483,31 +7021,10 @@ year={2012}
  author    = {Paul Michel and
               Omer Levy and
               Graham Neubig},
-  title     = {Are Sixteen Heads Really Better than One?},
  publisher = {Annual Conference on Neural Information Processing Systems},
  pages     = {14014--14024},
  year      = {2019}
 }
-@inproceedings{DBLP:journals/corr/abs-1905-09418,
-  author    = {Elena Voita and
-               David Talbot and
-               Fedor Moiseev and
-               Rico Sennrich and
-               Ivan Titov},
-  title     = {Analyzing Multi-Head Self-Attention: Specialized Heads Do the Heavy
-               Lifting, the Rest Can Be Pruned},
-  pages     = {5797--5808},
-  publisher = {Annual Meeting of the Association for Computational Linguistics},
-  year      = {2019},
-}
-@inproceedings{Kitaev2020ReformerTE,
-  author    = {Nikita Kitaev and
-               Lukasz Kaiser and
-               Anselm Levskaya},
-  title     = {Reformer: The Efficient Transformer},
-  publisher = {International Conference on Learning Representations},
-  year      = {2020}
-}
 @inproceedings{Katharopoulos2020TransformersAR,
  title={Transformers are RNNs: Fast Autoregressive Transformers with Linear Attention},
  author={Angelos Katharopoulos and Apoorv Vyas and Nikolaos Pappas and Franccois Fleuret},
@@ -7515,15 +7032,6 @@ year={2012}
  year={2020},
  volume={abs/2006.16236}
 }
-@inproceedings{xiao2011language,
-  title ={Language Modeling for Syntax-Based Machine Translation Using Tree Substitution Grammars: A Case Study on Chinese-English Translation},
-  author ={Xiao, Tong and Zhu, Jingbo and Zhu, Muhua},
-  volume ={10},
-  number ={4},
-  pages ={1--29},
-  year ={2011},
-  publisher ={ACM Transactions on Asian Language Information Processing (TALIP)}
-}
 @inproceedings{Li2009VariationalDF,
  title={Variational Decoding for Statistical Machine Translation},
  author={Zhifei Li and
@@ -7558,18 +7066,6 @@ year={2012}
  pages={5488--5495},
  year={2018}
 }
-@inproceedings{DBLP:journals/corr/GehringAGYD17,
-  author    = {Jonas Gehring and
-               Michael Auli and
-               David Grangier and
-               Denis Yarats and
-               Yann N. Dauphin},
-  title     = {Convolutional Sequence to Sequence Learning},
-  publisher = {International Conference on Machine Learning},
-  volume    = {70},
-  pages     = {1243--1252},
-  year      = {2017}
-}
 @inproceedings{Wei2019ImitationLF,
  title={Imitation Learning for Non-Autoregressive Neural Machine Translation},
  author={Bingzhen Wei and Mingxuan Wang and Hao Zhou and Junyang Lin and Xu Sun},
@@ -7717,15 +7213,6 @@ author    = {Zhuang Liu and
  volume={18},
  pages={187:1-187:30}
 }
-@inproceedings{DBLP:journals/corr/HintonVD15,
-  author    = {Geoffrey E. Hinton and
-               Oriol Vinyals and
-               Jeffrey Dean},
-  title     = {Distilling the Knowledge in a Neural Network},
-  publisher   = {CoRR},
-  volume    = {abs/1503.02531},
-  year      = {2015}
-}
 @inproceedings{Munim2019SequencelevelKD,
  title={Sequence-level Knowledge Distillation for Model Compression of Attention-based Sequence-to-sequence Speech Recognition},
  author={Raden Mu'az Mun'im and Nakamasa Inoue and Koichi Shinoda},
@@ -7746,20 +7233,6 @@ author    = {Zhuang Liu and
  volume    = {abs/1903.12136},
  year      = {2019}
 }
-@inproceedings{Jiao2020TinyBERTDB,
-  author    = {Xiaoqi Jiao and
-               Yichun Yin and
-               Lifeng Shang and
-               Xin Jiang and
-               Xiao Chen and
-               Linlin Li and
-               Fang Wang and
-               Qun Liu},
-  title     = {TinyBERT: Distilling {BERT} for Natural Language Understanding},
-  pages     = {4163--4174},
-  publisher={Conference on Empirical Methods in Natural Language Processing},
-  year={2020}
-}
 @inproceedings{Ghazvininejad2020AlignedCE,
  author    = {Marjan Ghazvininejad and
               Vladimir Karpukhin and
@@ -7816,23 +7289,6 @@ author    = {Zhuang Liu and
  volume    = {abs/1911.02215},
  year      = {2019}
 }
-@inproceedings{vaswani2017attention,
-	title={Attention is All You Need},
-	author={Ashish {Vaswani} and Noam {Shazeer} and Niki {Parmar} and Jakob {Uszkoreit} and Llion {Jones} and Aidan N. {Gomez} and Lukasz {Kaiser} and Illia {Polosukhin}},
-	publisher={International Conference on Neural Information Processing},
-	pages={5998--6008},
-	year={2017}
-}
-@inproceedings{Gu2017NonAutoregressiveNM,
-  author    = {Jiatao Gu and
-               James Bradbury and
-               Caiming Xiong and
-               Victor O. K. Li and
-               Richard Socher},
-  title     = {Non-Autoregressive Neural Machine Translation},
-  publisher = {International Conference on Learning Representations},
-  year      = {2018}
-}
 @inproceedings{Zhou2020UnderstandingKD,
  title={Understanding Knowledge Distillation in Non-autoregressive Machine Translation},
  author={Chunting Zhou and Graham Neubig and Jiatao Gu},
@@ -7941,13 +7397,6 @@ author    = {Zhuang Liu and
  volume={7},
  pages={91-105}
 }
-@inproceedings{devlin2019bert,
-  title={Bert: Pre-training of deep bidirectional transformers for language understanding},
-  author={Devlin Jacob and Chang Ming-Wei and Lee Kenton and Toutanova Kristina},
-  year={2019},
-  pages = {4171--4186},
-  publisher = {Annual Meeting of the Association for Computational Linguistics},
-}
 @inproceedings{Feng2016ImprovingAM,
  title={Improving Attention Modeling with Implicit Distortion and Fertility for Machine Translation},
  author={Shi Feng and Shujie Liu and Nan Yang and Mu Li and Ming Zhou and Kenny Q. Zhu},
@@ -7955,65 +7404,6 @@ author    = {Zhuang Liu and
  pages={3082--3092},
  year={2016}
 }
-@inproceedings{TuModeling,
-  author    = {Zhaopeng Tu and
-               Zhengdong Lu and
-               Yang Liu and
-               Xiaohua Liu and
-               Hang Li},
-  title     = {Modeling Coverage for Neural Machine Translation},
-  publisher = {Annual Meeting of the Association for Computational Linguistics},
-  year      = {2016}
-}
-@inproceedings{Wu2016GooglesNM,
-  title={Google's Neural Machine Translation System: Bridging the Gap between Human and Machine Translation},
-  author    = {Yonghui Wu and
-               Mike Schuster and
-               Zhifeng Chen and
-               Quoc V. Le and
-               Mohammad Norouzi and
-               Wolfgang Macherey and
-               Maxim Krikun and
-               Yuan Cao and
-               Qin Gao and
-               Klaus Macherey and
-               Jeff Klingner and
-               Apurva Shah and
-               Melvin Johnson and
-               Xiaobing Liu and
-               Lukasz Kaiser and
-               Stephan Gouws and
-               Yoshikiyo Kato and
-               Taku Kudo and
-               Hideto Kazawa and
-               Keith Stevens and
-               George Kurian and
-               Nishant Patil and
-               Wei Wang and
-               Cliff Young and
-               Jason Smith and
-               Jason Riesa and
-               Alex Rudnick and
-               Oriol Vinyals and
-               Greg Corrado and
-               Macduff Hughes and
-               Jeffrey Dean},
-  publisher   = {CoRR},
-  year={2016},
-  volume={abs/1609.08144}
-}
-@inproceedings{li-etal-2018-simple,
-  author    = {Yanyang Li and
-               Tong Xiao and
-               Yinqiao Li and
-               Qiang Wang and
-               Changming Xu and
-               Jingbo Zhu},
-  title     = {A Simple and Effective Approach to Coverage-Aware Neural Machine Translation},
-  publisher = {Annual Meeting of the Association for Computational Linguistics},
-  pages     = {292--297},
-  year      = {2018}
-}
 @inproceedings{Peris2017InteractiveNM,
  title={Interactive neural machine translation},
  author={{\'A}lvaro Peris and Miguel Domingo and F. Casacuberta},
@@ -8022,13 +7412,6 @@ author    = {Zhuang Liu and
  volume={45},
  pages={201-220}
 }
-@inproceedings{Peris2018ActiveLF,
-  title={Active Learning for Interactive Neural Machine Translation of Data Streams},
-  author={{\'A}lvaro Peris and Francisco Casacuberta},
-  publisher={The SIGNLL Conference on Computational Natural Language Learning},
-  pages={151--160},
-  year={2018}
-}
 @inproceedings{Xiao2016ALA,
  title={A Loss-Augmented Approach to Training Syntactic Machine Translation Systems},
  author={Tong Xiao and Derek F. Wong and Jingbo Zhu},
@@ -8037,16 +7420,6 @@ author    = {Zhuang Liu and
  volume={24},
  pages={2069-2083}
 }
-@inproceedings{DBLP:conf/acl/JeanCMB15,
-  author    = {S{\'{e}}bastien Jean and
-               KyungHyun Cho and
-               Roland Memisevic and
-               Yoshua Bengio},
-  title     = {On Using Very Large Target Vocabulary for Neural Machine Translation},
-  pages     = {1--10},
-  publisher = {Annual Meeting of the Association for Computational Linguistics},
-  year      = {2015}
-}
 @inproceedings{61115,
  author={Jianhua Lin},
  publisher={IEEE Transactions on Information Theory}, 
@@ -8065,73 +7438,6 @@ author    = {Zhuang Liu and
  publisher = {	AAAI Conference on Artificial Intelligence},
  year      = {2019}
 }
-@inproceedings{DBLP:journals/corr/abs-1805-00631,
-  author    = {Biao Zhang and
-               Deyi Xiong and
-               Jinsong Su},
-  title     = {Accelerating Neural Transformer via an Average Attention Network},
-  publisher = {Annual Meeting of the Association for Computational Linguistics},
-  pages     = {1789--1798},
-  year      = {2018},
-}
-@inproceedings{Wu2019PayLA,
- author = {Felix Wu and
-		 Angela Fan and
-		 Alexei Baevski and
-		 Yann N. Dauphin and
-		 Michael Auli},
- title = {Pay Less Attention with Lightweight and Dynamic Convolutions},
- publisher = {International Conference on Learning Representations},
- year = {2019},
-}
-@inproceedings{Xiao2019SharingAW,
-  author    = {Tong Xiao and
-               Yinqiao Li and
-               Jingbo Zhu and
-               Zhengtao Yu and
-               Tongran Liu},
-  title     = {Sharing Attention Weights for Fast Transformer},
-  publisher = {International Joint Conference on Artificial Intelligence},
-  pages     = {5292--5298},
-  year      = {2019}
-}
-@inproceedings{Chen2018TheBO,
-  author    = {Mia Xu Chen and
-               Orhan Firat and
-               Ankur Bapna and
-               Melvin Johnson and
-               Wolfgang Macherey and
-               George F. Foster and
-               Llion Jones and
-               Mike Schuster and
-               Noam Shazeer and
-               Niki Parmar and
-               Ashish Vaswani and
-               Jakob Uszkoreit and
-               Lukasz Kaiser and
-               Zhifeng Chen and
-               Yonghui Wu and
-               Macduff Hughes},
-  title     = {The Best of Both Worlds: Combining Recent Advances in Neural Machine
-               Translation},
-  pages     = {76--86},
-  publisher = {Annual Meeting of the Association for Computational Linguistics},
-  year      = {2018}
-}
-@inproceedings{DBLP:journals/corr/abs-1906-00532,
-  author    = {Aishwarya Bhandare and
-               Vamsi Sripathi and
-               Deepthi Karkada and
-               Vivek Menon and
-               Sun Choi and
-               Kushal Datta and
-               Vikram Saletore},
-  title     = {Efficient 8-Bit Quantization of Transformer Neural Machine Language
-               Translation Model},
-  publisher   = {CoRR},
-  volume    = {abs/1906.00532},
-  year      = {2019}
-}
 @inproceedings{DBLP:conf/cvpr/JacobKCZTHAK18,
  author    = {Benoit Jacob and
               Skirmantas Kligys and
@@ -8239,14 +7545,6 @@ author    = {Zhuang Liu and
  volume    = {abs/1611.08562},
  year      = {2016}
 }
-@inproceedings{xiao2013bagging,
-  title ={Bagging and boosting statistical machine translation systems},
-  author ={Tong Xiao and Jingbo Zhu and Tongran Liu },
-  publisher ={Artificial Intelligence},
-  volume ={195},
-  pages ={496--527},
-  year ={2013}
-}
 @inproceedings{DBLP:conf/emnlp/TrombleKOM08,
  author    = {Roy Tromble and
               Shankar Kumar and
@@ -8270,29 +7568,6 @@ author    = {Zhuang Liu and
  pages     = {3302--3308},
  year      = {2017}
 }
-@inproceedings{Shaw2018SelfAttentionWR,
-  author    = {Peter Shaw and
-               Jakob Uszkoreit and
-               Ashish Vaswani},
-  title     = {Self-Attention with Relative Position Representations},
-  publisher = {Proceedings of the Human Language Technology Conference of 
-               the North American Chapter of the Association for Computational Linguistics},
-  pages     = {464--468},
-  year      = {2018}
-}
-@inproceedings{WangLearning,
-  author    = {Qiang Wang and
-               Bei Li and
-               Tong Xiao and
-               Jingbo Zhu and
-               Changliang Li and
-               Derek F. Wong and
-               Lidia S. Chao},
-  title     = {Learning Deep Transformer Models for Machine Translation},
-  pages     = {1810--1822},
-  publisher = {Annual Meeting of the Association for Computational Linguistics},
-  year      = {2019}
-}
 @inproceedings{DBLP:conf/iclr/FanGJ20,
  author    = {Angela Fan and
               Edouard Grave and
@@ -8409,25 +7684,6 @@ author    = {Zhuang Liu and
  volume    = {abs/2010.02416},
  year      = {2020}
 }
-@inproceedings{Vaswani2018Tensor2TensorFN,
-   author    = {Ashish Vaswani and
-               Samy Bengio and
-               Eugene Brevdo and
-               Fran{\c{c}}ois Chollet and
-               Aidan N. Gomez and
-               Stephan Gouws and
-               Llion Jones and
-               Lukasz Kaiser and
-               Nal Kalchbrenner and
-               Niki Parmar and
-               Ryan Sepassi and
-               Noam Shazeer and
-               Jakob Uszkoreit},
-  title     = {Tensor2Tensor for Neural Machine Translation},
-  pages     = {193--199},
-  publisher = {Association for Machine Translation in the Americas},
-  year      = {2018}
-}
 @inproceedings{Sun2019BaiduNM,
  title={Baidu Neural Machine Translation Systems for WMT19},
  author    = {Meng Sun and
@@ -8610,17 +7866,6 @@ author    = {Zhuang Liu and
  publisher = {Annual Meeting of the Association for Computational Linguistics},
  year      = {2019}
 }
-@inproceedings{KleinOpenNMT,
-  author    = {Guillaume Klein and
-               Yoon Kim and
-               Yuntian Deng and
-               Jean Senellart and
-               Alexander M. Rush},
-  title     = {OpenNMT: Open-Source Toolkit for Neural Machine Translation},
-  pages     = {67--72},
-  publisher = {Annual Meeting of the Association for Computational Linguistics},
-  year      = {2017}
-}
 @inproceedings{DBLP:conf/acl/WuWXTGQLL19,
  author    = {Lijun Wu and
               Yiren Wang and
@@ -8635,16 +7880,6 @@ author    = {Zhuang Liu and
  publisher = {Annual Meeting of the Association for Computational Linguistics},
  year      = {2019}
 }
-@inproceedings{DBLP:conf/cvpr/HuangLMW17,
-  author    = {Gao Huang and
-               Zhuang Liu and
-               Laurens van der Maaten and
-               Kilian Q. Weinberger},
-  title     = {Densely Connected Convolutional Networks},
-  pages     = {2261--2269},
-  publisher = {{IEEE} Conference on Computer Vision and Pattern Recognition},
-  year      = {2017}
-}
 @inproceedings{DBLP:journals/corr/GreffSS16,
  author    = {Klaus Greff and
               Rupesh Kumar Srivastava and
@@ -8653,31 +7888,6 @@ author    = {Zhuang Liu and
  publisher = {International Conference on Learning Representations},
  year      = {2017}
 }
-@inproceedings{Bapna2018TrainingDN,
-  author    = {Ankur Bapna and
-               Mia Xu Chen and
-               Orhan Firat and
-               Yuan Cao and
-               Yonghui Wu},
-  title     = {Training Deeper Neural Machine Translation Models with Transparent
-               Attention},
-  pages     = {3028--3033},
-  publisher = {Annual Meeting of the Association for Computational Linguistics},
-  year      = {2018}
-}
-@inproceedings{WangLearning,
-  author    = {Qiang Wang and
-               Bei Li and
-               Tong Xiao and
-               Jingbo Zhu and
-               Changliang Li and
-               Derek F. Wong and
-               Lidia S. Chao},
-  title     = {Learning Deep Transformer Models for Machine Translation},
-  pages     = {1810--1822},
-  publisher = {Annual Meeting of the Association for Computational Linguistics},
-  year      = {2019}
-}
 @inproceedings{DBLP:journals/corr/abs-2002-04745,
  author    = {Ruibin Xiong and
               Yunchang Yang and
@@ -8705,86 +7915,6 @@ author    = {Zhuang Liu and
  publisher = {Annual Meeting of the Association for Computational Linguistics},
  year      = {2020}
 }
-@inproceedings{DBLP:journals/corr/HeZRS15,
-  author    = {Kaiming He and
-               Xiangyu Zhang and
-               Shaoqing Ren and
-               Jian Sun},
-  title     = {Deep Residual Learning for Image Recognition},
-  publisher = {IEEE Conference on Computer Vision and Pattern Recognition},
-  pages     = {770--778},
-  year      = {2016},
-}
-@inproceedings{Ba2016LayerN,
-  author    = {Lei Jimmy Ba and
-               Jamie Ryan Kiros and
-               Geoffrey E. Hinton},
-  title     = {Layer Normalization},
-  publisher   = {CoRR},
-  volume    = {abs/1607.06450},
-  year      = {2016}
-}
-@inproceedings{Vaswani2018Tensor2TensorFN,
-   author    = {Ashish Vaswani and
-               Samy Bengio and
-               Eugene Brevdo and
-               Fran{\c{c}}ois Chollet and
-               Aidan N. Gomez and
-               Stephan Gouws and
-               Llion Jones and
-               Lukasz Kaiser and
-               Nal Kalchbrenner and
-               Niki Parmar and
-               Ryan Sepassi and
-               Noam Shazeer and
-               Jakob Uszkoreit},
-  title     = {Tensor2Tensor for Neural Machine Translation},
-  pages     = {193--199},
-  publisher = {Association for Machine Translation in the Americas},
-  year      = {2018}
-}
-@inproceedings{Dou2019DynamicLA,
-  author    = {Zi-Yi Dou and
-               Zhaopeng Tu and
-               Xing Wang and
-               Longyue Wang and
-               Shuming Shi and
-               Tong Zhang},
-  title     = {Dynamic Layer Aggregation for Neural Machine Translation with Routing-by-Agreement},
-  pages     = {86--93},
-  publisher = {AAAI Conference on Artificial Intelligence},
-  year      = {2019}
-}
-@inproceedings{Wang2018MultilayerRF,
-  title={Multi-layer Representation Fusion for Neural Machine Translation},
-  author={Qiang Wang and Fuxue Li and Tong Xiao and Yanyang Li and Yinqiao Li and Jingbo Zhu},
-  publisher={International Conference on Computational Linguistics},
-  year={2018},
-  volume={abs/2002.06714}
-}
-@inproceedings{Dou2018ExploitingDR,
-   author    = {Zi-Yi Dou and
-               Zhaopeng Tu and
-               Xing Wang and
-               Shuming Shi and
-               Tong Zhang},
-  title     = {Exploiting Deep Representations for Neural Machine Translation},
-  pages     = {4253--4262},
-  publisher = {Annual Meeting of the Association for Computational Linguistics},
-  year      = {2018}
-}
-@inproceedings{DBLP:journals/corr/LinFSYXZB17,
-  author    = {Zhouhan Lin and
-               Minwei Feng and
-               C{\'{\i}}cero Nogueira dos Santos and
-               Mo Yu and
-               Bing Xiang and
-               Bowen Zhou and
-               Yoshua Bengio},
-  title     = {A Structured Self-Attentive Sentence Embedding},
-  publisher = {International Conference on Learning Representations},
-  year      = {2017},
-}
 @inproceedings{DBLP:conf/nips/SrivastavaGS15,
  author    = {Rupesh Kumar Srivastava and
               Klaus Greff and
@@ -8830,15 +7960,6 @@ author    = {Zhuang Liu and
  pages     = {1675--1685},
  year      = {2019}
 }
-@inproceedings{pmlr-v9-glorot10a,
-  author    = {Xavier Glorot and
-               Yoshua Bengio},
-  title     = {Understanding the difficulty of training deep feedforward neural networks},
-  publisher = {International Conference on Artificial Intelligence and Statistics},
-  volume    = {9},
-  pages     = {249--256},
-  year      = {2010}
-}
 @inproceedings{DBLP:conf/iccv/HeZRS15,
  author    = {Kaiming He and
               Xiangyu Zhang and
@@ -9269,13 +8390,6 @@ author    = {Zhuang Liu and
  volume    = {abs/2003.03384},
  year      = {2020}
 }
-@inproceedings{Chollet2017XceptionDL,
-  title={Xception: Deep Learning with Depthwise Separable Convolutions},
-  author    = {Fran{\c{c}}ois Chollet},
-  publisher={IEEE Conference on Computer Vision and Pattern Recognition},
-  year={2017},
-  pages={1800-1807}
-}
 @inproceedings{DBLP:journals/tnn/AngelineSP94,
  author    = {Peter J. Angeline and
               Gregory M. Saunders and
@@ -9523,20 +8637,6 @@ author    = {Zhuang Liu and
  volume    = {abs/2009.02070},
  year      = {2020}
 }
-@inproceedings{DBLP:conf/acl/WangWLCZGH20,
-  author    = {Hanrui Wang and
-               Zhanghao Wu and
-               Zhijian Liu and
-               Han Cai and
-               Ligeng Zhu and
-               Chuang Gan and
-               Song Han},
-  title     = {{HAT:} Hardware-Aware Transformers for Efficient Natural Language
-               Processing},
-  pages     = {7675--7688},
-  publisher = {Annual Meeting of the Association for Computational Linguistics},
-  year      = {2020}
-}
 @inproceedings{DBLP:journals/corr/abs-2008-06808,
  author    = {Henry Tsai and
               Jayden Ooi and
@@ -9549,24 +8649,6 @@ author    = {Zhuang Liu and
  volume    = {abs/2008.06808},
  year      = {2020}
 }
-@inproceedings{Wang2019ExploitingSC,
-  title={Exploiting Sentential Context for Neural Machine Translation},
-  author={Xing Wang and Zhaopeng Tu and Longyue Wang and Shuming Shi},
-  publisher={Annual Meeting of the Association for Computational Linguistics},
-  year={2019}
-}
-@inproceedings{Wei2020MultiscaleCD,
-  title={Multiscale Collaborative Deep Models for Neural Machine Translation},
-  author={Xiangpeng Wei and Heng Yu and Yue Hu and Yue Zhang and Rongxiang Weng and Weihua Luo},
-  publisher={Annual Meeting of the Association for Computational Linguistics},
-  year={2020}
-}
-@inproceedings{li2020shallow,
-  title={Shallow-to-Deep Training for Neural Machine Translation},
-  author={Li, Bei and Wang, Ziyang and Liu, Hui and Jiang, Yufan and Du, Quan and Xiao, Tong and Wang, Huizhen and Zhu, Jingbo},
-  publisher={Conference on Empirical Methods in Natural Language Processing},
-  year={2020}
-}
 @inproceedings{DBLP:journals/corr/abs-2007-06257,
  author    = {Hongfei Xu and
               Qiuhui Liu and
@@ -9588,18 +8670,6 @@ author    = {Zhuang Liu and
  publisher = {Annual Meeting of the Association for Computational Linguistics},
  year      = {2020}
 }
-@inproceedings{DBLP:journals/corr/abs-2006-10369,
-  author    = {Jungo Kasai and
-               Nikolaos Pappas and
-               Hao Peng and
-               James Cross and
-               Noah A. Smith},
-  title     = {Deep Encoder, Shallow Decoder: Reevaluating the Speed-Quality Tradeoff
-               in Machine Translation},
-  publisher   = {CoRR},
-  volume    = {abs/2006.10369},
-  year      = {2020}
-}
 @inproceedings{DBLP:journals/corr/abs-1806-01261,
  author    = {Peter W. Battaglia and
               Jessica B. Hamrick and
@@ -9633,34 +8703,6 @@ author    = {Zhuang Liu and
  volume    = {abs/1806.01261},
  year      = {2018}
 }
-@inproceedings{Shaw2018SelfAttentionWR,
-  author    = {Peter Shaw and
-               Jakob Uszkoreit and
-               Ashish Vaswani},
-  title     = {Self-Attention with Relative Position Representations},
-  publisher = {Annual Conference of the North American Chapter of the Association for Computational Linguistics},
-  pages     = {464--468},
-  year      = {2018},
-}
-@inproceedings{Dai2019TransformerXLAL,
-  author    = {Zihang Dai and
-               Zhilin Yang and
-               Yiming Yang and
-               Jaime G. Carbonell and
-               Quoc V. Le and
-               Ruslan Salakhutdinov},
-  title     = {Transformer-XL: Attentive Language Models Beyond a Fixed-Length Context},
-  publisher   = {Annual Meeting of the Association for Computational Linguistics},
-  pages     = {2978--2988},
-  year      = {2019}
-}
-@inproceedings{vaswani2017attention,
-	title={Attention is All You Need},
-	author={Ashish {Vaswani} and Noam {Shazeer} and Niki {Parmar} and Jakob {Uszkoreit} and Llion {Jones} and Aidan N. {Gomez} and Lukasz {Kaiser} and Illia {Polosukhin}},
-	publisher={International Conference on Neural Information Processing},
-	pages={5998--6008},
-	year={2017}
-}
 @inproceedings{DBLP:conf/acl/LiXTZZZ17,
  author    = {Junhui Li and
               Deyi Xiong and
@@ -9681,18 +8723,6 @@ author    = {Zhuang Liu and
  publisher = {Annual Meeting of the Association for Computational Linguistics},
  year      = {2016}
 }
-@inproceedings{Yang2017TowardsBH,
-  author    = {Baosong Yang and
-               Derek F. Wong and
-               Tong Xiao and
-               Lidia S. Chao and
-               Jingbo Zhu},
-  title     = {Towards Bidirectional Hierarchical Representations for Attention-based
-               Neural Machine Translation},
-  publisher = {Conference on Empirical Methods in Natural Language Processing},
-  pages     = {1432--1441},
-  year      = {2017}
-}
 @inproceedings{DBLP:conf/acl/ChenHCC17,
  author    = {Huadong Chen and
               Shujian Huang and
@@ -9704,16 +8734,6 @@ author    = {Zhuang Liu and
  publisher = {Annual Meeting of the Association for Computational Linguistics},
  year      = {2017}
 }
-@inproceedings{TuModeling,
-  author    = {Zhaopeng Tu and
-               Zhengdong Lu and
-               Yang Liu and
-               Xiaohua Liu and
-               Hang Li},
-  title     = {Modeling Coverage for Neural Machine Translation},
-  publisher = {Annual Meeting of the Association for Computational Linguistics},
-  year      = {2016}
-}
 @inproceedings{DBLP:conf/wmt/SennrichH16,
  author    = {Rico Sennrich and
               Barry Haddow},
@@ -9739,13 +8759,6 @@ author    = {Zhuang Liu and
  publisher = {Annual Meeting of the Association for Computational Linguistics},
  year      = {2020}
 }
-@inproceedings{Aharoni2017TowardsSN,
-  title={Towards String-To-Tree Neural Machine Translation},
-  author={Roee Aharoni and 
-          Yoav Goldberg},
-  publisher={Annual Meeting of the Association for Computational Linguistics},
-  year={2017}
-}
 @inproceedings{DBLP:conf/iclr/Alvarez-MelisJ17,
  author    = {David Alvarez-Melis and
               Tommi S. Jaakkola},
@@ -9763,13 +8776,6 @@ author    = {Zhuang Liu and
  publisher = {Annual Meeting of the Association for Computational Linguistics},
  year      = {2016}
 }
-@book{aho1972theory,
-  author    = {Aho, Alfred V and
-               Ullman, Jeffrey D},
-  title     = {The theory of parsing, translation, and compiling},
-  publisher = {Prentice-Hall Englewood Cliffs, NJ},
-  year      = {1973},
-}
 @inproceedings{DBLP:journals/corr/LuongLSVK15,
  author    = {Minh-Thang Luong and
               Quoc V. Le and
@@ -9805,26 +8811,6 @@ author    = {Zhuang Liu and
  publisher = {Annual Meeting of the Association for Computational Linguistics},
  year      = {2017}
 }
-@inproceedings{DBLP:journals/corr/abs-1808-09374,
-  author    = {Xinyi Wang and
-               Hieu Pham and
-               Pengcheng Yin and
-               Graham Neubig},
-  title     = {A Tree-based Decoder for Neural Machine Translation},
-  publisher = {Conference on Empirical Methods in Natural Language Processing},
-  pages     = {4772--4777},
-  year      = {2018}
-}
-@inproceedings{Tong2016Syntactic,
-  author    = {Tong Xiao and
-               Jingbo Zhu and
-               Chunliang Zhang and
-               Tongran Liu},
-  title     = {Syntactic Skeleton-Based Translation},
-  pages     = {2856--2862},
-  publisher = {AAAI Conference on Artificial Intelligence},
-  year      = {2016},
-}
 @inproceedings{DBLP:conf/emnlp/WangTWS19a,
  author    = {Xing Wang and
               Zhaopeng Tu and
@@ -9835,13 +8821,6 @@ author    = {Zhuang Liu and
  publisher = {Conference on Empirical Methods in Natural Language Processing},
  year      = {2019}
 }
-@inproceedings{Liu2020LearningTE,
-	title={Learning to Encode Position for Transformer with Continuous Dynamical Model},
-	author={Xuanqing Liu and Hsiang-Fu Yu and Inderjit Dhillon and Cho-Jui Hsieh},
-	publisher={ArXiv},
-	year={2020},
-	volume={abs/2003.09229}
-}
 @inproceedings{DBLP:conf/nips/ChenRBD18,
  author    = {Tian Qi Chen and
               Yulia Rubanova and
@@ -9852,27 +8831,6 @@ author    = {Zhuang Liu and
  pages     = {6572--6583},
  year      = {2018}
 }
-@inproceedings{DBLP:journals/corr/LuongPM15,
-  author    = {Thang Luong and
-               Hieu Pham and
-               Christopher D. Manning},
-  title     = {Effective Approaches to Attention-based Neural Machine Translation},
-  publisher = {Conference on Empirical Methods in Natural Language Processing},
-  pages     = {1412--1421},
-  year      = {2015}
-}
-@inproceedings{Yang2018ModelingLF,
-	author    = {Baosong Yang and
-               Zhaopeng Tu and
-               Derek F. Wong and
-               Fandong Meng and
-               Lidia S. Chao and
-               Tong Zhang},
-  title     = {Modeling Localness for Self-Attention Networks},
-  pages     = {4449--4458},
-  publisher = {Annual Meeting of the Association for Computational Linguistics},
-  year      = {2018}
-}
 @inproceedings{DBLP:conf/aaai/GuoQLXZ20,
  author    = {Qipeng Guo and
               Xipeng Qiu and
@@ -9884,33 +8842,6 @@ author    = {Zhuang Liu and
  publisher = {AAAI Conference on Artificial Intelligence},
  year      = {2020}
 }
-@inproceedings{Wu2019PayLA,
- author = {Felix Wu and
-		 Angela Fan and
-		 Alexei Baevski and
-		 Yann N. Dauphin and
-		 Michael Auli},
- title = {Pay Less Attention with Lightweight and Dynamic Convolutions},
- publisher = {International Conference on Learning Representations},
- year = {2019},
-}
-@inproceedings{DBLP:conf/interspeech/GulatiQCPZYHWZW20,
-  author    = {Anmol Gulati and
-               James Qin and
-               Chung-Cheng Chiu and
-               Niki Parmar and
-               Yu Zhang and
-               Jiahui Yu and
-               Wei Han and
-               Shibo Wang and
-               Zhengdong Zhang and
-               Yonghui Wu and
-               Ruoming Pang},
-  title     = {Conformer: Convolution-augmented Transformer for Speech Recognition},
-  pages     = {5036--5040},
-  publisher = {International Speech Communication Association},
-  year      = {2020}
-}
 @inproceedings{DBLP:conf/cvpr/XieGDTH17,
  author    = {Saining Xie and
               Ross B. Girshick and
@@ -9961,16 +8892,6 @@ author    = {Zhuang Liu and
  number={3},
  year={2019},
 }
-@inproceedings{DBLP:conf/iclr/WuLLLH20,
-  author    = {Zhanghao Wu and
-               Zhijian Liu and
-               Ji Lin and
-               Yujun Lin and
-               Song Han},
-  title     = {Lite Transformer with Long-Short Range Attention},
-  publisher = {International Conference on Learning Representations},
-  year      = {2020}
-}
 @inproceedings{DBLP:conf/iclr/DehghaniGVUK19,
  author    = {Mostafa Dehghani and
               Stephan Gouws and
@@ -9981,12 +8902,6 @@ author    = {Zhuang Liu and
  publisher = {International Conference on Learning Representations},
  year      = {2019}
 }
-@inproceedings{Lan2020ALBERTAL,
-  title={ALBERT: A Lite BERT for Self-supervised Learning of Language Representations},
-  author={Zhenzhong Lan and Mingda Chen and Sebastian Goodman and Kevin Gimpel and Piyush Sharma and Radu Soricut},
-  publisher={International Conference on Learning Representations},
-  year={2020}
-}
 @inproceedings{DBLP:conf/naacl/HaoWYWZT19,
  author    = {Jie Hao and
               Xing Wang and
@@ -10032,14 +8947,6 @@ author    = {Zhuang Liu and
  volume    = {abs/2004.05150},
  year      = {2020}
 }
-@inproceedings{Kitaev2020ReformerTE,
-  author    = {Nikita Kitaev and
-               Lukasz Kaiser and
-               Anselm Levskaya},
-  title     = {Reformer: The Efficient Transformer},
-  publisher = {International Conference on Learning Representations},
-  year      = {2020}
-}
 @inproceedings{DBLP:journals/corr/abs-2003-05997,
  author    = {Aurko Roy and
               Mohammad Saffar and
@@ -10050,13 +8957,6 @@ author    = {Zhuang Liu and
  volume    = {abs/2003.05997},
  year      = {2020}
 }
-@inproceedings{Katharopoulos2020TransformersAR,
-  title={Transformers are RNNs: Fast Autoregressive Transformers with Linear Attention},
-  author={Angelos Katharopoulos and Apoorv Vyas and Nikolaos Pappas and Franccois Fleuret},
-  publisher={CoRR},
-  year={2020},
-  volume={abs/2006.16236}
-}
 @inproceedings{DBLP:journals/corr/abs-2009-14794,
  author    = {Krzysztof Choromanski and
               Valerii Likhosherstov and
@@ -10099,17 +8999,6 @@ author    = {Zhuang Liu and
  publisher = {Conference on Empirical Methods in Natural Language Processing},
  year      = {2018}
 }
-@inproceedings{DBLP:journals/corr/abs-2006-04768,
-  author    = {Sinong Wang and
-               Belinda Z. Li and
-               Madian Khabsa and
-               Han Fang and
-               Hao Ma},
-  title     = {Linformer: Self-Attention with Linear Complexity},
-  publisher   = {CoRR},
-  volume    = {abs/2006.04768},
-  year      = {2020}
-}
 @inproceedings{DBLP:conf/nips/BergstraBBK11,
  author    = {James Bergstra and
               R{\'{e}}mi Bardenet and
@@ -10131,18 +9020,6 @@ author    = {Zhuang Liu and
  publisher = {Learning and Intelligent Optimization},
  year      = {2011}
 }
-@inproceedings{DBLP:conf/icml/BergstraYC13,
-  author    = {James Bergstra and
-               Daniel Yamins and
-               David D. Cox},
-  title     = {Making a Science of Model Search: Hyperparameter Optimization in Hundreds
-               of Dimensions for Vision Architectures},
-  series    = {{JMLR} Workshop and Conference Proceedings},
-  volume    = {28},
-  pages     = {115--123},
-  publisher = {International Conference on Machine Learning},
-  year      = {2013}
-}
 @inproceedings{DBLP:conf/iccv/ChenXW019,
  author    = {Xin Chen and
               Lingxi Xie and
@@ -10165,122 +9042,34 @@ author    = {Zhuang Liu and
  publisher = {International Conference on Machine Learning},
  year      = {2020}
 }
-@inproceedings{Jawahar2019WhatDB,
-	title={What Does BERT Learn about the Structure of Language?},
-	author={Ganesh Jawahar and Beno{\^{\i}}t Sagot and Djam{\'e} Seddah},
-	publisher={Annual Meeting of the Association for Computational Linguistics},
-	year={2019}
-}
 @inproceedings{DBLP:conf/emnlp/Ethayarajh19,
  author    = {Kawin Ethayarajh},
  title     = {How Contextual are Contextualized Word Representations? Comparing
               the Geometry of BERT, ELMo, and {GPT-2} Embeddings},
  pages     = {55--65},
  publisher = {Conference on Empirical Methods in Natural Language Processing},
  year      = {2019}
 }
-@inproceedings{DBLP:journals/corr/abs-1905-09418,
+@inproceedings{DBLP:conf/emnlp/LiTYLZ18,
-  author    = {Elena Voita and
+  author    = {Jian Li and
-               David Talbot and
+               Zhaopeng Tu and
-               Fedor Moiseev and
+               Baosong Yang and
-               Rico Sennrich and
+               Michael R. Lyu and
-               Ivan Titov},
+               Tong Zhang},
-  title     = {Analyzing Multi-Head Self-Attention: Specialized Heads Do the Heavy
+  title     = {Multi-Head Attention with Disagreement Regularization},
-               Lifting, the Rest Can Be Pruned},
+  pages     = {2897--2903},
-  pages     = {5797--5808},
+  publisher = {Conference on Empirical Methods in Natural Language Processing},
-  publisher = {Annual Meeting of the Association for Computational Linguistics},
-  year      = {2019},
-}
-@inproceedings{Michel2019AreSH,
-  title={Are Sixteen Heads Really Better than One?},
-  author    = {Paul Michel and
-               Omer Levy and
-               Graham Neubig},
-  title     = {Are Sixteen Heads Really Better than One?},
-  publisher = {Annual Conference on Neural Information Processing Systems},
-  pages     = {14014--14024},
-  year      = {2019}
-}
-@inproceedings{DBLP:conf/emnlp/LiTYLZ18,
-  author    = {Jian Li and
-               Zhaopeng Tu and
-               Baosong Yang and
-               Michael R. Lyu and
-               Tong Zhang},
-  title     = {Multi-Head Attention with Disagreement Regularization},
-  pages     = {2897--2903},
-  publisher = {Conference on Empirical Methods in Natural Language Processing},
-  year      = {2018}
-}
-@inproceedings{Su2018VariationalRN,
-  title={Variational Recurrent Neural Machine Translation},
-  author={Jinsong Su and Shan Wu and Deyi Xiong and Yaojie Lu and Xianpei Han and Biao Zhang},
-  publisher={AAAI Conference on Artificial Intelligence},
-  pages={5488--5495},
-  year={2018}
-}
-@inproceedings{DBLP:conf/acl/SetiawanSNP20,
-  author    = {Hendra Setiawan and
-               Matthias Sperber and
-               Udhyakumar Nallasamy and
-               Matthias Paulik},
-  title     = {Variational Neural Machine Translation with Normalizing Flows},
-  publisher = {Annual Meeting of the Association for Computational Linguistics},
-  year      = {2020}
-}
-@inproceedings{Li2020NeuralMT,
-  author    = {Yanyang Li and
-               Qiang Wang and
-               Tong Xiao and
-               Tongran Liu and
-               Jingbo Zhu},
-  title     = {Neural Machine Translation with Joint Representation},
-  pages     = {8285--8292},
-  publisher = {AAAI Conference on Artificial Intelligence},
-  year      = {2020}
-}
-@inproceedings{JMLR:v15:srivastava14a,
-  author  = {Nitish Srivastava and Geoffrey Hinton and Alex Krizhevsky and Ilya Sutskever and Ruslan Salakhutdinov},
-  title   = {Dropout: A Simple Way to Prevent Neural Networks from Overfitting},
-  publisher = {Journal of Machine Learning Research},
-  year    = {2014},
-  volume  = {15},
-  pages   = {1929-1958},
-}
-@inproceedings{Szegedy_2016_CVPR,
-  author    = {Christian Szegedy and
-               Vincent Vanhoucke and
-               Sergey Ioffe and
-               Jonathon Shlens and
-               Zbigniew Wojna},
-  title     = {Rethinking the Inception Architecture for Computer Vision},
-  publisher = {IEEE Conference on Computer Vision and Pattern Recognition},
-  pages     = {2818--2826},
-  year      = {2016},
-}
-@inproceedings{Chen2018TheBO,
-  author    = {Mia Xu Chen and
-               Orhan Firat and
-               Ankur Bapna and
-               Melvin Johnson and
-               Wolfgang Macherey and
-               George F. Foster and
-               Llion Jones and
-               Mike Schuster and
-               Noam Shazeer and
-               Niki Parmar and
-               Ashish Vaswani and
-               Jakob Uszkoreit and
-               Lukasz Kaiser and
-               Zhifeng Chen and
-               Yonghui Wu and
-               Macduff Hughes},
-  title     = {The Best of Both Worlds: Combining Recent Advances in Neural Machine
-               Translation},
-  pages     = {76--86},
-  publisher = {Annual Meeting of the Association for Computational Linguistics},
  year      = {2018}
 }
+@inproceedings{DBLP:conf/acl/SetiawanSNP20,
+  author    = {Hendra Setiawan and
+               Matthias Sperber and
+               Udhyakumar Nallasamy and
+               Matthias Paulik},
+  title     = {Variational Neural Machine Translation with Normalizing Flows},
+  publisher = {Annual Meeting of the Association for Computational Linguistics},
+  year      = {2020}
+}
 @inproceedings{DBLP:conf/naacl/GuoQLSXZ19,
  author    = {Qipeng Guo and
               Xipeng Qiu and
@@ -10312,6 +9101,16 @@ author    = {Zhuang Liu and
  publisher = {International Conference on Learning Representations},
  year      = {2018}
 }
+@inproceedings{DBLP:conf/cvpr/HuangLMW17,
+  author    = {Gao Huang and
+               Zhuang Liu and
+               Laurens van der Maaten and
+               Kilian Q. Weinberger},
+  title     = {Densely Connected Convolutional Networks},
+  pages     = {2261--2269},
+  publisher = {{IEEE} Conference on Computer Vision and Pattern Recognition},
+  year      = {2017}
+}
 %%%%% chapter 15------------------------------------------------------
 %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
 %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
@@ -10575,20 +9374,6 @@ author    = {Zhuang Liu and
  publisher = {Annual Meeting of the Association for Computational Linguistics},
  year      = {2019}
 }
-@inproceedings{2015OnGulcehre,
-  title = {On Using Monolingual Corpora in Neural Machine Translation},
-  author = {Gulcehre Caglar  and  
-           Firat Orhan  and  
-           Xu Kelvin  and  
-           Cho Kyunghyun  and  
-           Barrault Loic  and  
-           Lin Huei Chi  and  
-           Bougares Fethi  and  
-           Schwenk Holger  and  
-           Bengio  Yoshua},
-  publisher = {Computer Science},
-  year = {2015},
-}
 @inproceedings{黄书剑0统计机器翻译中的词对齐研究,
  title={统计机器翻译中的词对齐研究},
  author={黄书剑},
@@ -11087,18 +9872,6 @@ author    = {Zhuang Liu and
  publisher = {Annual Conference of the North American Chapter of the Association for Computational Linguistics},
  year      = {2016}
 }
-@inproceedings{DBLP:conf/emnlp/KimPPKN19,
-  author    = {Yunsu Kim and
-               Petre Petrov and
-               Pavel Petrushkov and
-               Shahram Khadivi and
-               Hermann Ney},
-  title     = {Pivot-based Transfer Learning for Neural Machine Translation between
-               Non-English Languages},
-  pages     = {866--876},
-  publisher = {Annual Meeting of the Association for Computational Linguistics},
-  year      = {2019}
-}
 @inproceedings{DBLP:journals/mt/WuW07,
  author    = {Hua Wu and
               Haifeng Wang},
@@ -11211,15 +9984,6 @@ author    = {Zhuang Liu and
  publisher = {International Joint Conference on Natural Language Processing},
  year      = {2011}
 }
-@inproceedings{DBLP:journals/corr/HintonVD15,
-  author    = {Geoffrey E. Hinton and
-               Oriol Vinyals and
-               Jeffrey Dean},
-  title     = {Distilling the Knowledge in a Neural Network},
-  publisher   = {CoRR},
-  volume    = {abs/1503.02531},
-  year      = {2015}
-}
 @inproceedings{gu2018meta,
  author    = {Jiatao Gu and
               Yong Wang and
@@ -11541,25 +10305,6 @@ author    = {Zhuang Liu and
  publisher = {Annual Meeting of the Association for Computational Linguistics},
  year      = {2019}
 }
-@inproceedings{DBLP:conf/emnlp/FiratSAYC16,
-  author    = {Orhan Firat and
-               Baskaran Sankaran and
-               Yaser Al-Onaizan and
-               Fatos T. Yarman-Vural and
-               Kyunghyun Cho},
-  title     = {Zero-Resource Translation with Multi-Lingual Neural Machine Translation},
-  pages     = {268--277},
-  publisher = {Conference on Empirical Methods in Natural Language Processing},
-  year      = {2016}
-}
-@inproceedings{DBLP:conf/emnlp/CurreyH19,
-  author    = {Anna Currey and
-               Kenneth Heafield},
-  title     = {Zero-Resource Neural Machine Translation with Monolingual Pivot Data},
-  pages     = {99--107},
-  publisher = {Conference on Empirical Methods in Natural Language Processing},
-  year      = {2019}
-}
 @inproceedings{DBLP:conf/acl/FadaeeBM17a,
  author    = {Marzieh Fadaee and
               Arianna Bisazza and
@@ -11609,15 +10354,6 @@ author    = {Zhuang Liu and
  year      = {2008},
  publisher = {International Conference on Machine Learning}
 }
-@inproceedings{DBLP:conf/iclr/LampleCDR18,
-  author    = {Guillaume Lample and
-               Alexis Conneau and
-               Ludovic Denoyer and
-               Marc'Aurelio Ranzato},
-  title     = {Unsupervised Machine Translation Using Monolingual Corpora Only},
-  publisher = {International Conference on Learning Representations},
-  year      = {2018}
-}
 @inproceedings{DBLP:journals/coling/BhagatH13,
  author    = {Rahul Bhagat and
               Eduard Hovy},
@@ -11684,16 +10420,6 @@ author    = {Zhuang Liu and
  pages     = {569--631},
  year      = {2019}
 }
-@inproceedings{DBLP:conf/acl/TuLLLL16,
-  author    = {Zhaopeng Tu and
-               Zhengdong Lu and
-               Yang Liu and
-               Xiaohua Liu and
-               Hang Li},
-  title     = {Modeling Coverage for Neural Machine Translation},
-  publisher = {Annual Meeting of the Association for Computational Linguistics},
-  year      = {2016}
-}
 @inproceedings{DBLP:journals/tacl/TuLLLL17,
  author    = {Zhaopeng Tu and
               Yang Liu and
@@ -11748,29 +10474,6 @@ author    = {Zhuang Liu and
  publisher = {Annual Meeting of the Association for Computational Linguistics},
  year      = {2018}
 }
-@inproceedings{DBLP:conf/wmt/LiLXLLLWZXWFCLL19,
-  author    = {Bei Li and
-               Yinqiao Li and
-               Chen Xu and
-               Ye Lin and
-               Jiqiang Liu and
-               Hui Liu and
-               Ziyang Wang and
-               Yuhao Zhang and
-               Nuo Xu and
-               Zeyang Wang and
-               Kai Feng and
-               Hexuan Chen and
-               Tengbo Liu and
-               Yanyang Li and
-               Qiang Wang and
-               Tong Xiao and
-               Jingbo Zhu},
-  title     = {The NiuTrans Machine Translation Systems for {WMT19}},
-  pages     = {257--266},
-  publisher = {Annual Meeting of the Association for Computational Linguistics},
-  year      = {2019}
-}
 @inproceedings{DBLP:conf/nips/DaiL15,
  author    = {Andrew Dai and
               Quoc Le},
@@ -11779,19 +10482,6 @@ author    = {Zhuang Liu and
  publisher = {Annual Conference on Neural Information Processing Systems},
  year      = {2015}
 }
-@inproceedings{DBLP:journals/corr/abs-1802-05365,
-  author    = {Matthew Peters and
-               Mark Neumann and
-               Mohit Iyyer and
-               Matt Gardner and
-               Christopher Clark and
-               Kenton Lee and
-               Luke Zettlemoyer},
-  title     = {Deep Contextualized Word Representations},
-  pages     = {2227--2237},
-  publisher = {Annual Conference of the North American Chapter of the Association for Computational Linguistics},
-  year      = {2018}
-}
 @inproceedings{DBLP:conf/icml/CollobertW08,
  author    = {Ronan Collobert and
               Jason Weston},
@@ -11889,16 +10579,6 @@ author    = {Zhuang Liu and
  publisher = {Annual Meeting of the Association for Computational Linguistics},
  year      = {2019}
 }
-@inproceedings{DBLP:journals/corr/LuongLSVK15,
-  author    = {Minh-Thang Luong and
-               Quoc V. Le and
-               Ilya Sutskever and
-               Oriol Vinyals and
-               Lukasz Kaiser},
-  title     = {Multi-task Sequence to Sequence Learning},
-  publisher = {International Conference on Learning Representations},
-  year      = {2016}
-}
 @inproceedings{DBLP:conf/emnlp/ZhangZ16,
  author    = {Jiajun Zhang and
               Chengqing Zong},
@@ -12094,13 +10774,6 @@ author    = {Zhuang Liu and
  pages={117},
  year={2015}
 }
-@inproceedings{chen2016bilingual,
-  title={Bilingual methods for adaptive training data selection for machine translation},
-  author={Chen, Boxing and Kuhn, Roland and Foster, George and Cherry, Colin and Huang, Fei},
-  publisher={Association for Machine Translation in the Americas},
-  pages={93--103},
-  year={2016}
-}
 @inproceedings{DBLP:conf/iwslt/Ueffing06,
  author    = {Nicola Ueffing},
  title     = {Using monolingual source-language data to improve {MT} performance},
@@ -12272,15 +10945,6 @@ author    = {Zhuang Liu and
  publisher = {Annual Meeting of the Association for Computational Linguistics},
  year      = {2020}
 }
-@inproceedings{DBLP:conf/emnlp/AxelrodHG11,
-  author    = {Amittai Axelrod and
-               Xiaodong He and
-               Jianfeng Gao},
-  title     = {Domain Adaptation via Pseudo In-Domain Data Selection},
-  pages     = {355--362},
-  publisher = {Conference on Empirical Methods in Natural Language Processing},
-  year      = {2011}
-}
 @inproceedings{DBLP:conf/icdm/Remus12,
  author    = {Robert Remus},
  title     = {Domain Adaptation Using Domain Similarity- and Domain Complexity-Based
@@ -12309,13 +10973,6 @@ author    = {Zhuang Liu and
  publisher = {Annual Meeting of the Association for Computational Linguistics},
  year      = {2019}
 }
-@inproceedings{britz2017effective,
-  title={Effective domain mixing for neural machine translation},
-  author={Britz, Denny and Le, Quoc and Pryzant, Reid},
-  publisher={Proceedings of the Second Conference on Machine Translation},
-  pages={118--126},
-  year={2017}
-}
 @inproceedings{DBLP:conf/ranlp/KobusCS17,
  author    = {Catherine Kobus and
               Josep Maria Crego and
@@ -12326,27 +10983,6 @@ author    = {Zhuang Liu and
               Language Processing},
  year      = {2017}
 }
-@inproceedings{DBLP:conf/emnlp/WangULCS17,
-  author    = {Rui Wang and
-               Masao Utiyama and
-               Lemao Liu and
-               Kehai Chen and
-               Eiichiro Sumita},
-  title     = {Instance Weighting for Neural Machine Translation Domain Adaptation},
-  pages     = {1482--1488},
-  publisher = {Conference on Empirical Methods in Natural Language Processing},
-  year      = {2017}
-}
-@inproceedings{DBLP:conf/aclnmt/ChenCFL17,
-  author    = {Boxing Chen and
-               Colin Cherry and
-               George F. Foster and
-               Samuel Larkin},
-  title     = {Cost Weighting for Neural Machine Translation Domain Adaptation},
-  pages     = {40--46},
-  publisher = {Annual Meeting of the Association for Computational Linguistics},
-  year      = {2017}
-}
 @inproceedings{DBLP:journals/corr/abs-1906-03129,
  author    = {Shen Yan and
               Leonard Dahlmann and
@@ -12432,15 +11068,6 @@ author    = {Zhuang Liu and
  publisher = {Annual Meeting of the Association for Computational Linguistics},
  year      = {2019}
 }
-@inproceedings{DBLP:conf/wmt/BritzLP17,
-  author    = {Denny Britz and
-               Quoc V. Le and
-               Reid Pryzant},
-  title     = {Effective Domain Mixing for Neural Machine Translation},
-  pages     = {118--126},
-  publisher = {Annual Meeting of the Association for Computational Linguistics},
-  year      = {2017}
-}
 @inproceedings{DBLP:journals/ibmrd/Luhn58,
  author    = {Hans Peter Luhn},
  title     = {The Automatic Creation of Literature Abstracts},
@@ -12468,27 +11095,6 @@ author    = {Zhuang Liu and
  publisher = {Annual Conference of the North American Chapter of the Association for Computational Linguistics},
  year      = {2019}
 }
-@inproceedings{DBLP:conf/emnlp/WeesBM17,
-  author    = {Marlies van der Wees and
-               Arianna Bisazza and
-               Christof Monz},
-  title     = {Dynamic Data Selection for Neural Machine Translation},
-  pages     = {1400--1410},
-  publisher = {Conference on Empirical Methods in Natural Language Processing},
-  year      = {2017}
-}
-@inproceedings{DBLP:conf/naacl/ZhangSKMCD19,
-  author    = {Xuan Zhang and
-               Pamela Shapiro and
-               Gaurav Kumar and
-               Paul McNamee and
-               Marine Carpuat and
-               Kevin Duh},
-  title     = {Curriculum Learning for Domain Adaptation in Neural Machine Translation},
-  pages     = {1903--1915},
-  publisher = {Annual Conference of the North American Chapter of the Association for Computational Linguistics},
-  year      = {2019}
-}
 @inproceedings{DBLP:conf/acl/ChuDK17,
  author    = {Chenhui Chu and
               Raj Dabre and
@@ -12564,18 +11170,6 @@ author    = {Zhuang Liu and
  publisher = {Annual Conference of the European Association for Machine Translation},
  year      = {2017}
 }
-@inproceedings{DBLP:conf/aaai/Zhang0LZC18,
-  author    = {Zhirui Zhang and
-               Shujie Liu and
-               Mu Li and
-               Ming Zhou and
-               Enhong Chen},
-  title     = {Joint Training for Neural Machine Translation Models with Monolingual
-               Data},
-  pages     = {555--562},
-  publisher = {AAAI Conference on Artificial Intelligence},
-  year      = {2018}
-}
 @inproceedings{DBLP:conf/wmt/SunJXHWW19,
  author    = {Meng Sun and
               Bojian Jiang and
@@ -12794,19 +11388,6 @@ author    = {Zhuang Liu and
  publisher = {International Conference on Machine Learning},
  year      = {2018}
 }
-@inproceedings{DBLP:conf/nips/HeXQWYLM16,
-  author    = {Di He and
-               Yingce Xia and
-               Tao Qin and
-               Liwei Wang and
-               Nenghai Yu and
-               Tie-Yan Liu and
-               Wei-Ying Ma},
-  title     = {Dual Learning for Machine Translation},
-  publisher = {Annual Conference on Neural Information Processing Systems},
-  pages     = {820--828},
-  year      = {2016}
-}
 @article{zhao2020dual,
  title={Dual Learning: Theoretical Study and an Algorithmic Extension},
  author={Zhao, Zhibing and Xia, Yingce and Qin, Tao and Xia, Lirong and Liu, Tie-Yan},
@@ -12832,12 +11413,6 @@ author    = {Zhuang Liu and
  volume    = {abs/1901.09115},
  year      = {2019}
 }
-@book{jurafsky2000speech,
-  title={Speech \& language processing},
-  author={Jurafsky, Dan},
-  year={2000},
-  publisher={Pearson Education India}
-}
 @inproceedings{DBLP:conf/anlp/MarcuCW00,
  author    = {Daniel Marcu and
               Lynn Carlson and
@@ -12936,19 +11511,10 @@ author    = {Zhuang Liu and
  author    = {Thomas Meyer and
               Andrei Popescu-Belis},
  title     = {Using Sense-labeled Discourse Connectives for Statistical Machine
               Translation},
  pages     = {129--138},
  publisher = {Annual Meeting of the Association for Computational Linguistics},
  year      = {2012}
-}
-@inproceedings{DBLP:conf/nips/SutskeverVL14,
-  author    = {Ilya Sutskever and
-               Oriol Vinyals and
-               Quoc V. Le},
-  title     = {Sequence to Sequence Learning with Neural Networks},
-  pages     = {3104--3112},
-  year      = {2014},
-  publisher = {Annual Conference on Neural Information Processing Systems}
 }
 @inproceedings{DBLP:conf/emnlp/LaubliS018,
  author    = {Samuel L{\"{a}}ubli and
@@ -12995,16 +11561,6 @@ author    = {Zhuang Liu and
  volume    = {abs/1704.05135},
  year      = {2017}
 }
-@inproceedings{DBLP:conf/acl/TitovSSV18,
-  author    = {Elena Voita and
-               Pavel Serdyukov and
-               Rico Sennrich and
-               Ivan Titov},
-  title     = {Context-Aware Neural Machine Translation Learns Anaphora Resolution},
-  pages     = {1264--1274},
-  publisher = {Annual Meeting of the Association for Computational Linguistics},
-  year      = {2018}
-}
 @inproceedings{DBLP:conf/acl/HaffariM18,
  author    = {Sameen Maruf and
               Gholamreza Haffari},
@@ -13129,14 +11685,6 @@ author    = {Zhuang Liu and
  publisher = {Annual Meeting of the Association for Computational Linguistics},
  year      = {2018}
 }
-@inproceedings{DBLP:conf/iclr/KitaevKL20,
-  author    = {Nikita Kitaev and
-               Lukasz Kaiser and
-               Anselm Levskaya},
-  title     = {Reformer: The Efficient Transformer},
-  publisher = {International Conference on Learning Representations},
-  year      = {2020}
-}
 @inproceedings{agrawal2018contextual,
  title={Contextual handling in neural machine translation: Look behind, ahead and on both sides},
  author={Agrawal, Ruchit Rajeshkumar and Turchi, Marco and Negri, Matteo},
@@ -13144,17 +11692,6 @@ author    = {Zhuang Liu and
  pages={11--20},
  year={2018}
 }
-@inproceedings{DBLP:conf/emnlp/WerlenRPH18,
-  author    = {Lesly Miculicich Werlen and
-               Dhananjay Ram and
-               Nikolaos Pappas and
-               James Henderson},
-  title     = {Document-Level Neural Machine Translation with Hierarchical Attention
-               Networks},
-  pages     = {2947--2954},
-  publisher = {Conference on Empirical Methods in Natural Language Processing},
-  year      = {2018}
-}
 @inproceedings{DBLP:conf/naacl/MarufMH19,
  author    = {Sameen Maruf and
               Andr{\'{e}} F. T. Martins and
@@ -13230,21 +11767,6 @@ author    = {Zhuang Liu and
  publisher = {Annual Meeting of the Association for Computational Linguistics},
  year      = {2017}
 }
-@inproceedings{DBLP:conf/acl/LiLWJXZLL20,
-  author    = {Bei Li and
-               Hui Liu and
-               Ziyang Wang and
-               Yufan Jiang and
-               Tong Xiao and
-               Jingbo Zhu and
-               Tongran Liu and
-               Changliang Li},
-  title     = {Does Multi-Encoder Help? {A} Case Study on Context-Aware Neural Machine
-               Translation},
-  pages     = {3512--3518},
-  publisher = {Annual Meeting of the Association for Computational Linguistics},
-  year      = {2020}
-}
 @inproceedings{DBLP:conf/discomt/KimTN19,
  author    = {Yunsu Kim and
               Duc Thanh Tran and
@@ -13364,21 +11886,6 @@ author    = {Zhuang Liu and
  volume    = {abs/1911.03110},
  year      = {2019}
 }
-@article{DBLP:journals/tacl/LiuGGLEGLZ20,
-  author    = {Yinhan Liu and
-               Jiatao Gu and
-               Naman Goyal and
-               Xian Li and
-               Sergey Edunov and
-               Marjan Ghazvininejad and
-               Mike Lewis and
-               Luke Zettlemoyer},
-  title     = {Multilingual Denoising Pre-training for Neural Machine Translation},
-  journal   = {Transactions of the Association for Computational Linguistics},
-  volume    = {8},
-  pages     = {726--742},
-  year      = {2020}
-}
 @inproceedings{DBLP:conf/wmt/MarufMH18,
  author    = {Sameen Maruf and
               Andr{\'{e}} F. T. Martins and
@@ -13480,17 +11987,6 @@ author    = {Zhuang Liu and
  publisher = {Annual Meeting of the Association for Computational Linguistics},
  year      = {2019}
 }
-@inproceedings{DBLP:conf/acl/LiuTMCZ18,
-  author    = {Yong Cheng and
-               Zhaopeng Tu and
-               Fandong Meng and
-               Junjie Zhai and
-               Yang Liu},
-  title     = {Towards Robust Neural Machine Translation},
-  pages     = {1756--1766},
-  publisher = {Annual Meeting of the Association for Computational Linguistics},
-  year      = {2018}
-}
 @inproceedings{DBLP:conf/naacl/DuongACBC16,
  author    = {Long Duong and
               Antonios Anastasopoulos and
@@ -14262,20 +12758,6 @@ author    = {Zhuang Liu and
  publisher = {International Conference on Learning Representations},
  year      = {2020}
 }
-@inproceedings{DBLP:conf/nips/GoodfellowPMXWOCB14,
-  author    = {Ian J. Goodfellow and
-               Jean Pouget-Abadie and
-               Mehdi Mirza and
-               Bing Xu and
-               David Warde-Farley and
-               Sherjil Ozair and
-               Aaron C. Courville and
-               Yoshua Bengio},
-  title     = {Generative Adversarial Nets},
-  publisher = {Conference on Neural Information Processing Systems},
-  pages     = {2672--2680},
-  year      = {2014}
-}
 @inproceedings{DBLP:conf/nips/ZhuZPDEWS17,
  author    = {Jun-Yan Zhu and
               Richard Zhang and
@@ -14320,16 +12802,6 @@ author    = {Zhuang Liu and
  publisher = {International Conference on Computer Vision},
  year      = {2017}
 }
-@inproceedings{DBLP:conf/iccv/YiZTG17,
-  author    = {Zili Yi and
-               Hao (Richard) Zhang and
-               Ping Tan and
-               Minglun Gong},
-  title     = {DualGAN: Unsupervised Dual Learning for Image-to-Image Translation},
-  pages     = {2868--2876},
-  publisher = {International Conference on Computer Vision},
-  year      = {2017}
-}
 @inproceedings{DBLP:conf/nips/LiuBK17,
  author    = {Ming-Yu Liu and
               Thomas Breuel and
@@ -14584,24 +13056,6 @@ author    = {Zhuang Liu and
  pages     = {163--185},
  year      = {2017}
 }
-@inproceedings{Peris2017InteractiveNM,
-  title={Interactive neural machine translation},
-  author={{\'A}lvaro Peris and Miguel Domingo and F. Casacuberta},
-  publisher={Computer Speech and Language},
-  year={2017},
-  volume={45},
-  pages={201-220}
-}
-@inproceedings{DBLP:journals/csl/PerisC19,
-  author    = {{\'{A}}lvaro Peris and
-               Francisco Casacuberta},
-  title     = {Online learning for effort reduction in interactive neural machine
-               translation},
-  publisher   = {Computer Speech Language},
-  volume    = {58},
-  pages     = {98--126},
-  year      = {2019}
-}
 @inproceedings{DBLP:journals/coling/BarrachinaBCCCKLNTVV09,
  author    = {Sergio Barrachina and
               Oliver Bender and
@@ -14670,16 +13124,6 @@ author    = {Zhuang Liu and
  volume    = {abs/1702.07811},
  year      = {2017}
 }
-@inproceedings{DBLP:conf/emnlp/WangXZ20,
-  author    = {Qiang Wang and
-               Tong Xiao and
-               Jingbo Zhu},
-  title     = {Training Flexible Depth Model by Multi-Task Learning for Neural Machine
-               Translation},
-  pages     = {4307--4312},
-  publisher = {Conference on Empirical Methods in Natural Language Processing},
-  year      = {2020}
-}
 @inproceedings{DBLP:conf/ijcai/ChenCWL20,
  author    = {Guanhua Chen and
               Yun Chen and
@@ -14762,18 +13206,6 @@ author    = {Zhuang Liu and
  publisher = {Annual Meeting of the Association for Computational Linguistics},
  year      = {2017}
 }
-@inproceedings{DBLP:conf/naacl/ThompsonGKDK19,
-  author    = {Brian Thompson and
-               Jeremy Gwinnup and
-               Huda Khayrallah and
-               Kevin Duh and
-               Philipp Koehn},
-  title     = {Overcoming Catastrophic Forgetting During Domain Adaptation of Neural
-               Machine Translation},
-  pages     = {2062--2068},
-  publisher = {Annual Meeting of the Association for Computational Linguistics},
-  year      = {2019}
-}
 @inproceedings{DBLP:conf/aclnmt/KhayrallahTDK18,
  author    = {Huda Khayrallah and
               Brian Thompson and
@@ -14785,12 +13217,6 @@ author    = {Zhuang Liu and
  publisher = {Annual Meeting of the Association for Computational Linguistics},
  year      = {2018}
 }
-@inproceedings{barone2017regularization,
-  title={Regularization techniques for fine-tuning in neural machine translation},
-  author={Barone, Antonio Valerio Miceli and Haddow, Barry and Germann, Ulrich and Sennrich, Rico},
-  publisher={arXiv preprint arXiv:1707.09920},
-  year={2017}
-}
 @inproceedings{DBLP:journals/corr/ChuDK17,
  author    = {Chenhui Chu and
               Raj Dabre and
@@ -14801,15 +13227,6 @@ author    = {Zhuang Liu and
  volume    = {abs/1701.03214},
  year      = {2017}
 }
-@inproceedings{DBLP:conf/coling/GuF20,
-  author    = {Shuhao Gu and
-               Yang Feng},
-  title     = {Investigating Catastrophic Forgetting During Continual Training for
-               Neural Machine Translation},
-  pages     = {4315--4326},
-  publisher = {International Committee on Computational Linguistics},
-  year      = {2020}
-}
 %%%%% chapter 18------------------------------------------------------
 %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
 %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
@@ -14885,15 +13302,6 @@ author    = {Zhuang Liu and
  pages={197--216},
  year={2012}
 }
-@inproceedings{DBLP:conf/naacl/DyerCS13,
-  author    = {Chris Dyer and
-               Victor Chahuneau and
-               Noah A. Smith},
-  title     = {A Simple, Fast, and Effective Reparameterization of {IBM} Model 2},
-  pages     = {644--648},
-  publisher = {Annual Meeting of the Association for Computational Linguistics},
-  year      = {2013}
-}
 @inproceedings{al2016theano,
  author    = {Rami Al-Rfou and
               Guillaume Alain and
@@ -15013,63 +13421,6 @@ author    = {Zhuang Liu and
  volume    = {abs/1605.02688},
  year      = {2016}
 }
-@inproceedings{DBLP:journals/corr/SennrichFCBHHJL17,
-  author    = {Rico Sennrich and
-               Orhan Firat and
-               Kyunghyun Cho and
-               Barry Haddow and
-			   Alexandra Birch and
-               Julian Hitschler and
-               Marcin Junczys-Dowmunt and
-               Samuel L{\"{a}}ubli and
-               Antonio Valerio Miceli Barone and
-               Jozef Mokry and
-               Maria Nadejde},
-  title     = {Nematus: a Toolkit for Neural Machine Translation},
-  publisher = {Annual Conference of the European Association for Machine Translation},
-  pages     = {65--68},
-  year      = {2017}
-}
-@inproceedings{Koehn2007Moses,
-  author    = {Philipp Koehn and
-               Hieu Hoang and
-			    Alexandra Birch and
-               Chris Callison-Burch and
-               Marcello Federico and
-               Nicola Bertoldi and
-               Brooke Cowan and
-               Wade Shen and
-               Christine Moran and
-               Richard Zens and
-               Chris Dyer and
-               Ondrej Bojar and
-               Alexandra Constantin and
-               Evan Herbst},
-  title     = {Moses: Open Source Toolkit for Statistical Machine Translation},
-  publisher = {Annual Meeting of the Association for Computational Linguistics},
-  year      = {2007}
-}
-@inproceedings{zollmann2007the,
-  author    = {Andreas Zollmann and
-               Ashish Venugopal and
-               Matthias Paulik and
-               Stephan Vogel},
-  title     = {The Syntax Augmented {MT} {(SAMT)} System at the Shared Task for the
-               2007 {ACL} Workshop on Statistical Machine Translation},
-  pages     = {216--219},
-  publisher = {Annual Meeting of the Association for Computational Linguistics},
-  year      = {2007}
-}
-@inproceedings{och2003systematic,
-  author    = {Franz Josef Och and
-               Hermann Ney},
-  title     = {A Systematic Comparison of Various Statistical Alignment Models},
-  publisher   = {Computational Linguistics},
-  volume    = {29},
-  number    = {1},
-  pages     = {19--51},
-  year      = {2003}
-}
 @inproceedings{zoph2016simple,
  author    = {Barret Zoph and
               Ashish Vaswani and
@@ -15080,50 +13431,6 @@ author    = {Zhuang Liu and
  publisher = {Annual Meeting of the Association for Computational Linguistics},
  year      = {2016}
 }
-@inproceedings{Ottfairseq,
-  author    = {Myle Ott and
-               Sergey Edunov and
-               Alexei Baevski and
-               Angela Fan and
-               Sam Gross and
-               Nathan Ng and
-               David Grangier and
-               Michael Auli},
-  title     = {fairseq: {A} Fast, Extensible Toolkit for Sequence Modeling},
-  pages     = {48--53},
-  publisher = {Annual Meeting of the Association for Computational Linguistics},
-  year      = {2019}
-}
-@inproceedings{Vaswani2018Tensor2TensorFN,
-   author    = {Ashish Vaswani and
-               Samy Bengio and
-               Eugene Brevdo and
-               Fran{\c{c}}ois Chollet and
-               Aidan N. Gomez and
-               Stephan Gouws and
-               Llion Jones and
-               Lukasz Kaiser and
-               Nal Kalchbrenner and
-               Niki Parmar and
-               Ryan Sepassi and
-               Noam Shazeer and
-               Jakob Uszkoreit},
-  title     = {Tensor2Tensor for Neural Machine Translation},
-  pages     = {193--199},
-  publisher = {Association for Machine Translation in the Americas},
-  year      = {2018}
-}
-@inproceedings{KleinOpenNMT,
-  author    = {Guillaume Klein and
-               Yoon Kim and
-               Yuntian Deng and
-               Jean Senellart and
-               Alexander M. Rush},
-  title     = {OpenNMT: Open-Source Toolkit for Neural Machine Translation},
-  pages     = {67--72},
-  publisher = {Annual Meeting of the Association for Computational Linguistics},
-  year      = {2017}
-}
 @inproceedings{luong2016acl_hybrid,
  author    = {Minh-Thang Luong and
               Christopher D. Manning},