14参考文献

d39483b7 · 孟霞 · 5e72e252 · d39483b7
Commit d39483b7 authored Nov 27, 2020 by 孟霞
--- a/bibliography.bib
+++ b/bibliography.bib
@@ -5944,7 +5944,7 @@ author    = {Yoshua Bengio and
               Translation Models},
  volume    = {3265},
  pages     = {115--124},
-  publisher = {Springer},
+  publisher = {	Association for Machine Translation in the Americas},
  year      = {2004}
 }
@@ -5955,19 +5955,20 @@ author    = {Yoshua Bengio and
               Bill Byrne},
  title     = {SGNMT - A Flexible NMT Decoding Platform for Quick Prototyping
               of New Models and Search Strategies},
-  booktitle = {Proceedings of the 2017 Conference on Empirical Methods in Natural
-               Language Processing, {EMNLP} 2017, Copenhagen, Denmark, September
-               9-11, 2017 - System Demonstrations},
  pages     = {25--30},
-  publisher = {Association for Computational Linguistics},
+  publisher = {Conference on Empirical Methods in Natural Language Processing},
  year      = {2017}
 }
 @inproceedings{Liu2016AgreementOT,
  title={Agreement on Target-bidirectional Neural Machine Translation},
-  author={L. Liu and M. Utiyama and A. Finch and Eiichiro Sumita},
+  author={Lemao Liu and
-  booktitle={HLT-NAACL},
+               Masao Utiyama and
-  year={2016}
+               Andrew M. Finch and
+               Eiichiro Sumita},
+  pages     = {411--416},
+  publisher = {	Annual Conference of the North American Chapter of the Association for Computational Linguistics},
+  year      = {2016}
 }
 @inproceedings{DBLP:conf/wmt/LiLXLLLWZXWFCLL19,
@@ -5989,11 +5990,8 @@ author    = {Yoshua Bengio and
               Tong Xiao and
               Jingbo Zhu},
  title     = {The NiuTrans Machine Translation Systems for {WMT19}},
-  booktitle = {Proceedings of the Fourth Conference on Machine Translation, {WMT}
-               2019, Florence, Italy, August 1-2, 2019 - Volume 2: Shared Task Papers,
-               Day 1},
  pages     = {257--266},
-  publisher = {Association for Computational Linguistics},
+  publisher = {Annual Meeting of the Association for Computational Linguistics},
  year      = {2019}
 }
@@ -6002,19 +6000,19 @@ author    = {Yoshua Bengio and
               Barry Haddow and
               Alexandra Birch},
  title     = {Edinburgh Neural Machine Translation Systems for {WMT} 16},
-  booktitle = {Proceedings of the First Conference on Machine Translation, {WMT}
-               2016, colocated with {ACL} 2016, August 11-12, Berlin, Germany},
  pages     = {371--376},
-  publisher = {The Association for Computer Linguistics},
+  publisher = {Annual Meeting of the Association for Computational Linguistics},
  year      = {2016}
 }
 @article{Stahlberg2018TheUO,
  title={The University of Cambridge's Machine Translation Systems for WMT18},
-  author={Felix Stahlberg and A. Gispert and B. Byrne},
+  author={Felix Stahlberg and
-  journal={ArXiv},
+               Adri{\`{a}} de Gispert and
-  year={2018},
+               Bill Byrne},
-  volume={abs/1808.09465}
+  pages     = {504--512},
+  publisher = {Annual Meeting of the Association for Computational Linguistics},
+  year      = {2018}
 }
 @inproceedings{DBLP:conf/aaai/ZhangSQLJW18,
@@ -6025,20 +6023,18 @@ author    = {Yoshua Bengio and
               Rongrong Ji and
               Hongji Wang},
  title     = {Asynchronous Bidirectional Decoding for Neural Machine Translation},
-  booktitle = {Proceedings of the Thirty-Second {AAAI} Conference on Artificial Intelligence,
-               (AAAI-18), the 30th innovative Applications of Artificial Intelligence
-               (IAAI-18), and the 8th {AAAI} Symposium on Educational Advances in
-               Artificial Intelligence (EAAI-18), New Orleans, Louisiana, USA, February
-               2-7, 2018},
  pages     = {5698--5705},
-  publisher = {{AAAI} Press},
+  publisher = {	AAAI Conference on Artificial Intelligence},
  year      = {2018}
 }
 @article{Li2017EnhancedNM,
  title={Enhanced neural machine translation by learning from draft},
-  author={A. Li and Shiyue Zhang and D. Wang and T. Zheng},
+  author={Aodong Li and
-  journal={2017 Asia-Pacific Signal and Information Processing Association Annual Summit and Conference (APSIPA ASC)},
+               Shiyue Zhang and
+               Dong Wang and
+               Thomas Fang Zheng},
+  publisher={IEEE Asia-Pacific Services Computing Conference},
  year={2017},
  pages={1583-1587}
 }
@@ -6046,120 +6042,141 @@ author    = {Yoshua Bengio and
 @inproceedings{ElMaghraby2018EnhancingTF,
  title={Enhancing Translation from English to Arabic Using Two-Phase Decoder Translation},
  author={Ayah ElMaghraby and Ahmed Rafea},
-  booktitle={IntelliSys},
+  pages     = {539--549},
-  year={2018}
+  publisher = {Intelligent Systems and Applications},
+  year      = {2018}
 }
 @inproceedings{Geng2018AdaptiveMD,
  title={Adaptive Multi-pass Decoder for Neural Machine Translation},
-  author={X. Geng and X. Feng and B. Qin and T. Liu},
+  author={Xinwei Geng and
-  booktitle={EMNLP},
+               Xiaocheng Feng and
+               Bing Qin and
+               Ting Liu},
+  publisher ={Conference on Empirical Methods in Natural Language Processing},
+  pages={523--532},
  year={2018}
 }
 @article{Lee2018DeterministicNN,
  title={Deterministic Non-Autoregressive Neural Sequence Modeling by Iterative Refinement},
  author={Jason Lee and Elman Mansimov and Kyunghyun Cho},
-  journal={ArXiv},
+  pages     = {1173--1182},
-  year={2018},
+  publisher = {Conference on Empirical Methods in Natural Language Processing},
-  volume={abs/1802.06901}
+  year      = {2018}
 }
 @inproceedings{Gu2019LevenshteinT,
  title={Levenshtein Transformer},
  author={Jiatao Gu and Changhan Wang and Jake Zhao},
-  booktitle={NeurIPS},
+  publisher = {Conference and Workshop on Neural Information Processing Systems},
-  year={2019}
+  pages     = {11179--11189},
+  year      = {2019},
 }
 @inproceedings{Guo2020JointlyMS,
  title={Jointly Masked Sequence-to-Sequence Model for Non-Autoregressive Neural Machine Translation},
-  author={Junliang Guo and Linli Xu and E. Chen},
+  author={Junliang Guo and Linli Xu and Enhong Chen},
-  booktitle={ACL},
+  pages     = {376--385},
-  year={2020}
+  publisher = {Annual Meeting of the Association for Computational Linguistics},
+  year      = {2020}
 }
 @article{Stahlberg2018AnOS,
  title={An Operation Sequence Model for Explainable Neural Machine Translation},
-  author={Felix Stahlberg and Danielle Saunders and B. Byrne},
+  author={Felix Stahlberg and Danielle Saunders and Bill Byrne},
-  journal={ArXiv},
+  pages     = {175--186},
-  year={2018},
+  publisher = {Conference on Empirical Methods in Natural Language Processing},
-  volume={abs/1808.09688}
+  year      = {2018}
 }
 @inproceedings{Stern2019InsertionTF,
  title={Insertion Transformer: Flexible Sequence Generation via Insertion Operations},
-  author={Mitchell Stern and William Chan and J. Kiros and Jakob Uszkoreit},
+  author={Mitchell Stern and William Chan and Jamie Kiros and Jakob Uszkoreit},
-  booktitle={ICML},
+  publisher={International Conference on Machine Learning},
+  pages={5976--5985},
  year={2019}
 }
 @article{stling2017NeuralMT,
  title={Neural machine translation for low-resource languages},
-  author={Robert {\"O}stling and J. Tiedemann},
+  author={Robert {\"O}stling and J{\"{o}}rg Tiedemann},
-  journal={ArXiv},
+  journal={CoRR},
  year={2017},
  volume={abs/1708.05729}
 }
 @article{Kikuchi2016ControllingOL,
  title={Controlling Output Length in Neural Encoder-Decoders},
-  author={Yuta Kikuchi and Graham Neubig and Ryohei Sasano and H. Takamura and M. Okumura},
+  author={Yuta Kikuchi and
-  journal={ArXiv},
+               Graham Neubig and
-  year={2016},
+               Ryohei Sasano and
-  volume={abs/1609.09552}
+               Hiroya Takamura and
+               Manabu Okumura},
+  pages     = {1328--1338},
+  publisher = {Conference on Empirical Methods in Natural Language Processing},
+  year      = {2016}
 }
 @inproceedings{Takase2019PositionalET,
  title={Positional Encoding to Control Output Sequence Length},
-  author={S. Takase and N. Okazaki},
+  author={Sho Takase and
-  booktitle={NAACL-HLT},
+               Naoaki Okazaki},
+  publisher={Annual Conference of the North American Chapter of the Association for Computational Linguistics},
+  pages={3999--4004},
  year={2019}
 }
 @inproceedings{Murray2018CorrectingLB,
  title={Correcting Length Bias in Neural Machine Translation},
  author={Kenton Murray and David Chiang},
-  booktitle={WMT},
+  pages     = {212--223},
-  year={2018}
+  publisher = {Annual Meeting of the Association for Computational Linguistics},
+  year      = {2018}
 }
 @article{Sountsov2016LengthBI,
  title={Length bias in Encoder Decoder Models and a Case for Global Conditioning},
  author={Pavel Sountsov and Sunita Sarawagi},
-  journal={ArXiv},
+  pages     = {1516--1525},
-  year={2016},
+  publisher = {Conference on Empirical Methods in Natural Language Processing},
-  volume={abs/1606.03402}
+  year      = {2016}
 }
 @inproceedings{Jean2015MontrealNM,
  title={Montreal Neural Machine Translation Systems for WMT'15},
-  author={S. Jean and Orhan Firat and Kyunghyun Cho and R. Memisevic and Yoshua Bengio},
+  author={S{\'{e}}bastien Jean and
-  booktitle={WMT@EMNLP},
+               Orhan Firat and
+               Kyunghyun Cho and
+               Roland Memisevic and
+               Yoshua Bengio},
+  publisher={Conference on Empirical Methods in Natural Language Processing},
+  pages={134--140},
  year={2015}
 }
 @inproceedings{Yang2018OtemUtemOA,
  title={Otem{\&}Utem: Over- and Under-Translation Evaluation Metric for NMT},
-  author={J. Yang and Biao Zhang and Yue Qin and Xiangwen Zhang and Q. Lin and Jinsong Su},
+  author={Jing Yang and
-  booktitle={NLPCC},
+               Biao Zhang and
+               Yue Qin and
+               Xiangwen Zhang and
+               Qian Lin and
+               Jinsong Su},
+  publisher={CCF International Conference on Natural Language Processing and Chinese Computing},
+  pages={291--302},
  year={2018}
 }
 @inproceedings{Mi2016CoverageEM,
  title={Coverage Embedding Models for Neural Machine Translation},
-  author={Haitao Mi and B. Sankaran and Z. Wang and Abe Ittycheriah},
+  author={Haitao Mi and
-  booktitle={EMNLP},
+               Baskaran Sankaran and
-  year={2016}
+               Zhiguo Wang and
-}
+               Abe Ittycheriah},
+  pages     = {955--960},
-@article{Kazimi2017CoverageFC,
+  publisher = {Conference on Empirical Methods in Natural Language Processing},
-  title={Coverage for Character Based Neural Machine Translation},
+  year      = {2016}
-  author={M. Kazimi and Marta R. Costa-juss{\`a}},
-  journal={Proces. del Leng. Natural},
-  year={2017},
-  volume={59},
-  pages={99-106}
 }
 @inproceedings{DBLP:conf/emnlp/HuangZM17,
@@ -6176,7 +6193,8 @@ author    = {Yoshua Bengio and
 @inproceedings{Wiseman2016SequencetoSequenceLA,
  title={Sequence-to-Sequence Learning as Beam-Search Optimization},
  author={Sam Wiseman and Alexander M. Rush},
-  booktitle={EMNLP},
+  publisher={Conference on Empirical Methods in Natural Language Processing},
+  pages={1296--1306},
  year={2016}
 }
@@ -6193,10 +6211,12 @@ author    = {Yoshua Bengio and
 @article{Ma2019LearningTS,
  title={Learning to Stop in Structured Prediction for Neural Machine Translation},
-  author={M. Ma and Renjie Zheng and Liang Huang},
+  author={Mingbo Ma and
-  journal={ArXiv},
+               Renjie Zheng and
-  year={2019},
+               Liang Huang},
-  volume={abs/1904.01032}
+  pages     = {1884--1889},
+  publisher = {	Annual Conference of the North American Chapter of the Association for Computational Linguistics},
+  year      = {2019}
 }
 @inproceedings{KleinOpenNMT,
@@ -6220,119 +6240,153 @@ author    = {Yoshua Bengio and
  year      = {2015}
 }
-@inproceedings{Eisner2011LearningST,
-  title={Learning Speed-Accuracy Tradeoffs in Nondeterministic Inference Algorithms},
-  author={J. Eisner and Hal Daum{\'e}},
-  year={2011}
-}
 @inproceedings{Jiang2012LearnedPF,
  title={Learned Prioritization for Trading Off Accuracy and Speed},
-  author={J. Jiang and Adam R. Teichert and Hal Daum{\'e} and J. Eisner},
+  author={Jiarong Jiang and Adam R. Teichert and Hal Daum{\'e} and Jason Eisner},
-  booktitle={NIPS},
+  publisher={Conference and Workshop on Neural Information Processing Systems},
-  year={2012}
+  pages={1340--1348},
+  year= {2012}
 }
 @inproceedings{Zheng2020OpportunisticDW,
  title={Opportunistic Decoding with Timely Correction for Simultaneous Translation},
-  author={Renjie Zheng and M. Ma and Baigong Zheng and Kaibo Liu and Liang Huang},
+  author={Renjie Zheng and
-  booktitle={ACL},
+               Mingbo Ma and
+               Baigong Zheng and
+               Kaibo Liu and
+               Liang Huang},
+  publisher={Annual Meeting of the Association for Computational Linguistics},
+  pages={437--442},
  year={2020}
 }
 @inproceedings{Ma2019STACLST,
  title={STACL: Simultaneous Translation with Implicit Anticipation and Controllable Latency using Prefix-to-Prefix Framework},
-  author={M. Ma and L. Huang and Hao Xiong and Renjie Zheng and Kaibo Liu and Baigong Zheng and Chuanqiang Zhang and Zhongjun He and Hairong Liu and X. Li and H. Wu and Haifeng Wang},
+  author={Mingbo Ma and
-  booktitle={ACL},
+               Liang Huang and
+               Hao Xiong and
+               Renjie Zheng and
+               Kaibo Liu and
+               Baigong Zheng and
+               Chuanqiang Zhang and
+               Zhongjun He and
+               Hairong Liu and
+               Xing Li and
+               Hua Wu and
+               Haifeng Wang},
+  publisher={Annual Meeting of the Association for Computational Linguistics},
+  pages={3025--3036},
  year={2019}
 }
 @inproceedings{Gimpel2013ASE,
  title={A Systematic Exploration of Diversity in Machine Translation},
  author={Kevin Gimpel and Dhruv Batra and Chris Dyer and Gregory Shakhnarovich},
-  booktitle={EMNLP},
+  publisher={Conference on Empirical Methods in Natural Language Processing},
+  pages={1100--1111},
  year={2013}
 }
 @article{Li2016MutualIA,
  title={Mutual Information and Diverse Decoding Improve Neural Machine Translation},
-  author={J. Li and Dan Jurafsky},
+  author={Jiwei Li and Dan Jurafsky},
-  journal={ArXiv},
+  journal={CoRR},
  year={2016},
  volume={abs/1601.00372}
 }
 @inproceedings{Li2016ADO,
  title={A Diversity-Promoting Objective Function for Neural Conversation Models},
-  author={J. Li and Michel Galley and Chris Brockett and Jianfeng Gao and W. Dolan},
+  author={Jiwei Li and
-  booktitle={HLT-NAACL},
+               Michel Galley and
+               Chris Brockett and
+               Jianfeng Gao and
+               Bill Dolan},
+  publisher={Annual Conference of the North American Chapter of the Association for Computational Linguistics},
+  pages={110--119},
  year={2016}
 }
 @inproceedings{He2018SequenceTS,
  title={Sequence to Sequence Mixture Model for Diverse Machine Translation},
  author={Xuanli He and Gholamreza Haffari and Mohammad Norouzi},
-  booktitle={CoNLL},
+  pages     = {583--592},
-  year={2018}
+  publisher = {International Conference on Computational Linguistics},
+  year      = {2018}
 }
 @article{Shen2019MixtureMF,
  title={Mixture Models for Diverse Machine Translation: Tricks of the Trade},
-  author={Tianxiao Shen and Myle Ott and M. Auli and Marc'Aurelio Ranzato},
+  author={Tianxiao Shen and Myle Ott and Michael Auli and Marc'Aurelio Ranzato},
-  journal={ArXiv},
+  pages     = {5719--5728},
-  year={2019},
+  publisher = {International Conference on Machine Learning},
-  volume={abs/1902.07816}
+  year      = {2019},
 }
 @article{Wu2020GeneratingDT,
  title={Generating Diverse Translation from Model Distribution with Dropout},
  author={Xuanfu Wu and Yang Feng and Chenze Shao},
-  journal={ArXiv},
+  pages={1088--1097},
-  year={2020},
+  publisher={Annual Meeting of the Association for Computational Linguistics},
-  volume={abs/2010.08178}
+  year={2020}
 }
 @inproceedings{Sun2020GeneratingDT,
  title={Generating Diverse Translation by Manipulating Multi-Head Attention},
-  author={Zewei Sun and Shujian Huang and Hao-Ran Wei and Xin-Yu Dai and Jiajun Chen},
+  author={Zewei Sun and Shujian Huang and Hao Ran Wei and Xin Yu Dai and Jiajun Chen},
-  booktitle={AAAI},
+  publisher={AAAI Conference on Artificial Intelligence},
+  pages={8976--8983},
  year={2020}
 }
 @article{Vijayakumar2016DiverseBS,
  title={Diverse Beam Search: Decoding Diverse Solutions from Neural Sequence Models},
-  author={Ashwin K. Vijayakumar and Michael Cogswell and R. R. Selvaraju and Q. Sun and Stefan Lee and David J. Crandall and Dhruv Batra},
+  author={Ashwin K. Vijayakumar and
-  journal={ArXiv},
+               Michael Cogswell and
+               Ramprasaath R. Selvaraju and
+               Qing Sun and
+               Stefan Lee and
+               David J. Crandall and
+               Dhruv Batra},
+  journal={CoRR},
  year={2016},
  volume={abs/1610.02424}
 }
 @inproceedings{Liu2014SearchAwareTF,
  title={Search-Aware Tuning for Machine Translation},
-  author={L. Liu and Liang Huang},
+  author={Lemao Liu and
-  booktitle={EMNLP},
+               Liang Huang},
+  publisher={Conference on Empirical Methods in Natural Language Processing},
+  pages={1942--1952},
  year={2014}
 }
 @inproceedings{Yu2013MaxViolationPA,
  title={Max-Violation Perceptron and Forced Decoding for Scalable MT Training},
  author={Heng Yu and Liang Huang and Haitao Mi and Kai Zhao},
-  booktitle={EMNLP},
+  publisher={Conference on Empirical Methods in Natural Language Processing},
+  pages={1112--1123},
  year={2013}
 }
 @inproceedings{Stahlberg2019OnNS,
  title={On NMT Search Errors and Model Errors: Cat Got Your Tongue?},
  author={Felix Stahlberg and 
-          B. Byrne},
+          Bill Byrne},
-  booktitle={EMNLP/IJCNLP},
+  publisher={Conference on Empirical Methods in Natural Language Processing},
+  pages={3354--3360},
  year={2019}
 }
 @inproceedings{Niehues2017AnalyzingNM,
  title={Analyzing Neural MT Search and Model Performance},
-  author={J. Niehues and Eunah Cho and Thanh-Le Ha and Alexander H. Waibel},
+  author={Jan Niehues and
-  booktitle={NMT@ACL},
+               Eunah Cho and
+               Thanh-Le Ha and
+               Alex Waibel},
+  pages={11--17},
+  publisher={Annual Meeting of the Association for Computational Linguistics},
  year={2017}
 }
@@ -6347,26 +6401,31 @@ author    = {Yoshua Bengio and
 @article{Ranzato2016SequenceLT,
  title={Sequence Level Training with Recurrent Neural Networks},
-  author={Marc'Aurelio Ranzato and S. Chopra and M. Auli and W. Zaremba},
+  author={Marc'Aurelio Ranzato and
-  journal={CoRR},
+               Sumit Chopra and
-  year={2016},
+               Michael Auli and
-  volume={abs/1511.06732}
+               Wojciech Zaremba},
+  publisher={International Conference on Learning Representations},
+  year={2016}
 }
 @article{Bengio2015ScheduledSF,
  title={Scheduled Sampling for Sequence Prediction with Recurrent Neural Networks},
-  author={S. Bengio and Oriol Vinyals and Navdeep Jaitly and Noam Shazeer},
+  author={Samy Bengio and
-  journal={ArXiv},
+               Oriol Vinyals and
-  year={2015},
+               Navdeep Jaitly and
-  volume={abs/1506.03099}
+               Noam Shazeer},
+  booktitle = {Conference and Workshop on Neural Information Processing Systems},
+  pages     = {1171--1179},
+  year      = {2015}
 }
 @article{Zhang2019BridgingTG,
  title={Bridging the Gap between Training and Inference for Neural Machine Translation},
-  author={Wen Zhang and Y. Feng and Fandong Meng and Di You and Qun Liu},
+  author={Wen Zhang and Yang Feng and Fandong Meng and Di You and Qun Liu},
-  journal={ArXiv},
+  pages     = {4334--4343},
-  year={2019},
+  publisher = {Annual Meeting of the Association for Computational Linguistics},
-  volume={abs/1906.02448}
+  year      = {2019}
 }
 @inproceedings{DBLP:conf/acl/ShenCHHWSL16,
@@ -6382,15 +6441,6 @@ author    = {Yoshua Bengio and
  year      = {2016},
 }
-@article{Gage1994ANA,
-  title={A new algorithm for data compression},
-  author={P. Gage},
-  journal={The C Users Journal archive},
-  year={1994},
-  volume={12},
-  pages={23-38}
-}
 @inproceedings{DBLP:conf/acl/SennrichHB16a,
  author    = {Rico Sennrich and
               Barry Haddow and
@@ -6434,26 +6484,31 @@ author    = {Yoshua Bengio and
 @article{Narang2017BlockSparseRN,
  title={Block-Sparse Recurrent Neural Networks},
-  author={Sharan Narang and Eric Undersander and G. Diamos},
+  author={Sharan Narang and Eric Undersander and Gregory Diamos},
-  journal={ArXiv},
+  journal={CoRR},
  year={2017},
  volume={abs/1711.02782}
 }
 @article{Gale2019TheSO,
  title={The State of Sparsity in Deep Neural Networks},
-  author={T. Gale and E. Elsen and Sara Hooker},
+  author={Trevor Gale and
-  journal={ArXiv},
+               Erich Elsen and
+               Sara Hooker},
+  journal={CoRR},
  year={2019},
  volume={abs/1902.09574}
 }
 @article{Michel2019AreSH,
  title={Are Sixteen Heads Really Better than One?},
-  author={Paul Michel and Omer Levy and Graham Neubig},
+  author    = {Paul Michel and
-  journal={ArXiv},
+               Omer Levy and
-  year={2019},
+               Graham Neubig},
-  volume={abs/1905.10650}
+  title     = {Are Sixteen Heads Really Better than One?},
+  publisher = {Conference and Workshop on Neural Information Processing Systems},
+  pages     = {14014--14024},
+  year      = {2019}
 }
 @inproceedings{DBLP:journals/corr/abs-1905-09418,
@@ -6481,17 +6536,11 @@ author    = {Yoshua Bengio and
 @article{Katharopoulos2020TransformersAR,
  title={Transformers are RNNs: Fast Autoregressive Transformers with Linear Attention},
  author={Angelos Katharopoulos and Apoorv Vyas and Nikolaos Pappas and Franccois Fleuret},
-  journal={ArXiv},
+  journal={CoRR},
  year={2020},
  volume={abs/2006.16236}
 }
-@inproceedings{Beal2003VariationalAF,
-  title={Variational algorithms for approximate Bayesian inference},
-  author={M. Beal},
-  year={2003}
-}
 @article{xiao2011language,
  title ={Language Modeling for Syntax-Based Machine Translation Using Tree Substitution Grammars: A Case Study on Chinese-English Translation},
  author ={Xiao, Tong and Zhu, Jingbo and Zhu, Muhua},
@@ -6504,33 +6553,40 @@ author    = {Yoshua Bengio and
 @inproceedings{Li2009VariationalDF,
  title={Variational Decoding for Statistical Machine Translation},
-  author={Zhifei Li and J. Eisner and S. Khudanpur},
+  author={Zhifei Li and
-  booktitle={ACL/IJCNLP},
+               Jason Eisner and
+               Sanjeev Khudanpur},
+  publisher={Annual Meeting of the Association for Computational Linguistics},
+  pages={593--601},
  year={2009}
 }
 @article{Bastings2019ModelingLS,
  title={Modeling Latent Sentence Structure in Neural Machine Translation},
-  author={Jasmijn Bastings and W. Aziz and Ivan Titov and K. Sima'an},
+  author={Jasmijn Bastings and
-  journal={ArXiv},
+               Wilker Aziz and
-  year={2019},
+               Ivan Titov and
-  volume={abs/1901.06436}
+               Khalil Sima'an},
+  journal   = {CoRR},
+  volume    = {abs/1901.06436},
+  year      = {2019}
 }
 @article{Shah2018GenerativeNM,
  title={Generative Neural Machine Translation},
-  author={Harshil Shah and D. Barber},
+  author={Harshil Shah and
-  journal={ArXiv},
+               David Barber},
-  year={2018},
+  publisher={Conference and Workshop on Neural Information Processing Systems},
-  volume={abs/1806.05138}
+  pages={1353--1362},
+  year={2018}
 }
 @article{Su2018VariationalRN,
  title={Variational Recurrent Neural Machine Translation},
  author={Jinsong Su and Shan Wu and Deyi Xiong and Yaojie Lu and Xianpei Han and Biao Zhang},
-  journal={ArXiv},
+  publisher={AAAI Conference on Artificial Intelligence},
-  year={2018},
+  pages={5488--5495},
-  volume={abs/1801.05119}
+  year={2018}
 }
 @inproceedings{DBLP:journals/corr/GehringAGYD17,
@@ -6549,127 +6605,161 @@ author    = {Yoshua Bengio and
 @inproceedings{Wei2019ImitationLF,
  title={Imitation Learning for Non-Autoregressive Neural Machine Translation},
  author={Bingzhen Wei and Mingxuan Wang and Hao Zhou and Junyang Lin and Xu Sun},
-  booktitle={ACL},
+  publisher={Annual Meeting of the Association for Computational Linguistics},
+  pages     = {1304--1312},
  year={2019}
 }
 @inproceedings{Shao2019RetrievingSI,
  title={Retrieving Sequential Information for Non-Autoregressive Neural Machine Translation},
-  author={Chenze Shao and Y. Feng and J. Zhang and Fandong Meng and X. Chen and Jie Zhou},
+  author={Chenze Shao and
-  booktitle={ACL},
+               Yang Feng and
+               Jinchao Zhang and
+               Fandong Meng and
+               Xilin Chen and
+               Jie Zhou},
+  publisher={Annual Meeting of the Association for Computational Linguistics},
+  pages={3013--3024},
  year={2019}
 }
 @article{Akoury2019SyntacticallyST,
  title={Syntactically Supervised Transformers for Faster Neural Machine Translation},
  author={Nader Akoury and Kalpesh Krishna and Mohit Iyyer},
-  journal={ArXiv},
+  pages     = {1269--1281},
-  year={2019},
+  publisher = {Annual Meeting of the Association for Computational Linguistics},
-  volume={abs/1906.02780}
+  year      = {2019},
 }
 @article{Guo2020FineTuningBC,
  title={Fine-Tuning by Curriculum Learning for Non-Autoregressive Neural Machine Translation},
-  author={Junliang Guo and Xu Tan and Linli Xu and Tao Qin and E. Chen and T. Liu},
+  author={Junliang Guo and
-  journal={ArXiv},
+               Xu Tan and
-  year={2020},
+               Linli Xu and
-  volume={abs/1911.08717}
+               Tao Qin and
+               Enhong Chen and
+               Tie-Yan Liu},
+  pages     = {7839--7846},
+  publisher = {AAAI Conference on Artificial Intelligence},
+  year      = {2020}
 }
 @inproceedings{Ran2020LearningTR,
  title={Learning to Recover from Multi-Modality Errors for Non-Autoregressive Neural Machine Translation},
-  author={Qiu Ran and Yankai Lin and Peng Li and J. Zhou},
+  author={Qiu Ran and Yankai Lin and Peng Li and Jie Zhou},
-  booktitle={ACL},
+  publisher={Annual Meeting of the Association for Computational Linguistics},
+  pages={3059--3069},
  year={2020}
 }
 @article{Liu2020FastBERTAS,
  title={FastBERT: a Self-distilling BERT with Adaptive Inference Time},
-  author={Weijie Liu and P. Zhou and Zhe Zhao and Zhiruo Wang and Haotang Deng and Q. Ju},
+  author={Weijie Liu and
-  journal={ArXiv},
+               Peng Zhou and
-  year={2020},
+               Zhiruo Wang and
-  volume={abs/2004.02178}
+               Zhe Zhao and
+               Haotang Deng and
+               Qi Ju},
+  pages     = {6035--6044},
+  publisher = {Annual Meeting of the Association for Computational Linguistics},
+  year      = {2020}
 }
 @article{Elbayad2020DepthAdaptiveT,
  title={Depth-Adaptive Transformer},
-  author={Maha Elbayad and Jiatao Gu and E. Grave and M. Auli},
+  author={Maha Elbayad and
-  journal={ArXiv},
+               Jiatao Gu and
-  year={2020},
+               Edouard Grave and
-  volume={abs/1910.10073}
+               Michael Auli},
+  publisher={International Conference on Learning Representations},
+  year={2020}
 }
 @article{Lan2020ALBERTAL,
  title={ALBERT: A Lite BERT for Self-supervised Learning of Language Representations},
  author={Zhenzhong Lan and Mingda Chen and Sebastian Goodman and Kevin Gimpel and Piyush Sharma and Radu Soricut},
-  journal={ArXiv},
+  publisher={International Conference on Learning Representations}
-  year={2020},
-  volume={abs/1909.11942}
 }
 @inproceedings{Han2015LearningBW,
  title={Learning both Weights and Connections for Efficient Neural Network},
-  author={Song Han and J. Pool and John Tran and W. Dally},
+  author={Song Han and
-  booktitle={NIPS},
+               Jeff Pool and
+               John Tran and
+               William J. Dally},
+  publisher={Conference and Workshop on Neural Information Processing Systems},
+  pages={1135--1143},
  year={2015}
 }
 @article{Lee2019SNIPSN,
-  title={SNIP: Single-shot Network Pruning based on Connection Sensitivity},
+  author    = {Namhoon Lee and
-  author={N. Lee and Thalaiyasingam Ajanthan and P. Torr},
+               Thalaiyasingam Ajanthan and
-  journal={ArXiv},
+               Philip H. S. Torr},
-  year={2019},
+  title     = {Snip: single-Shot Network Pruning based on Connection sensitivity},
-  volume={abs/1810.02340}
+  publisher = {International Conference on Learning Representations},
+  year      = {2019},
 }
 @article{Frankle2019TheLT,
  title={The Lottery Ticket Hypothesis: Finding Sparse, Trainable Neural Networks},
  author={Jonathan Frankle and Michael Carbin},
-  journal={arXiv: Learning},
+  publisher={International Conference on Learning Representations},
  year={2019}
 }
 @article{Brix2020SuccessfullyAT,
-  title={Successfully Applying the Stabilized Lottery Ticket Hypothesis to the Transformer Architecture},
+  author    = {Christopher Brix and
-  author={Christopher Brix and P. Bahar and H. Ney},
+               Parnia Bahar and
-  journal={ArXiv},
+               Hermann Ney},
-  year={2020},
+  title     = {Successfully Applying the Stabilized Lottery Ticket Hypothesis to
-  volume={abs/2005.03454}
+               the Transformer Architecture},
+  pages     = {3909--3915},
+  publisher = {Annual Meeting of the Association for Computational Linguistics},
+  year      = {2020},
 }
 @article{Liu2019RethinkingTV,
  title={Rethinking the Value of Network Pruning},
-  author={Zhuang Liu and M. Sun and Tinghui Zhou and Gao Huang and Trevor Darrell},
+  author={Zhuang Liu and
+               Mingjie Sun and
+               Tinghui Zhou and
+               Gao Huang and
+               Trevor Darrell},
  journal={ArXiv},
  year={2019},
  volume={abs/1810.05270}
 }
 @article{Liu2017LearningEC,
-  title={Learning Efficient Convolutional Networks through Network Slimming},
+author    = {Zhuang Liu and
-  author={Zhuang Liu and J. Li and Zhiqiang Shen and Gao Huang and S. Yan and C. Zhang},
+               Jianguo Li and
-  journal={2017 IEEE International Conference on Computer Vision (ICCV)},
+               Zhiqiang Shen and
-  year={2017},
+               Gao Huang and
-  pages={2755-2763}
+               Shoumeng Yan and
-}
+               Changshui Zhang},
+  title     = {Learning Efficient Convolutional Networks through Network Slimming},
-@inproceedings{Cheong2019transformersZ,
+  pages     = {2755--2763},
-  title={transformers.zip : Compressing Transformers with Pruning and Quantization},
+  publisher = {{IEEE} International Conference on Computer Vision},
-  author={Robin Cheong},
+  year      = {2017}
-  year={2019}
 }
 @inproceedings{Banner2018ScalableMF,
  title={Scalable Methods for 8-bit Training of Neural Networks},
-  author={R. Banner and Itay Hubara and E. Hoffer and Daniel Soudry},
+  author={Ron Banner and
-  booktitle={NeurIPS},
+               Itay Hubara and
+               Elad Hoffer and
+               Daniel Soudry},
+  publisher={Conference on Neural Information Processing Systems},
+  pages={5151--5159},
  year={2018}
 }
 @article{Hubara2017QuantizedNN,
  title={Quantized Neural Networks: Training Neural Networks with Low Precision Weights and Activations},
  author={Itay Hubara and Matthieu Courbariaux and Daniel Soudry and Ran El-Yaniv and Yoshua Bengio},
-  journal={J. Mach. Learn. Res.},
+  journal={Journal of Machine Learning Reseach},
  year={2017},
  volume={18},
  pages={187:1-187:30}
@@ -6687,62 +6777,100 @@ author    = {Yoshua Bengio and
 @article{Munim2019SequencelevelKD,
  title={Sequence-level Knowledge Distillation for Model Compression of Attention-based Sequence-to-sequence Speech Recognition},
-  author={Raden Mu'az Mun'im and N. Inoue and Koichi Shinoda},
+  author={Raden Mu'az Mun'im and Nakamasa Inoue and Koichi Shinoda},
-  journal={ICASSP 2019 - 2019 IEEE International Conference on Acoustics, Speech and Signal Processing (ICASSP)},
+  publisher={{IEEE} International Conference on Acoustics, Speech and Signal Processing},
  year={2019},
  pages={6151-6155}
 }
 @article{Tang2019DistillingTK,
-  title={Distilling Task-Specific Knowledge from BERT into Simple Neural Networks},
+  author    = {Raphael Tang and
-  author={Raphael Tang and Yao Lu and L. Liu and Lili Mou and Olga Vechtomova and Jimmy Lin},
+               Yao Lu and
-  journal={ArXiv},
+               Linqing Liu and
-  year={2019},
+               Lili Mou and
-  volume={abs/1903.12136}
+               Olga Vechtomova and
+               Jimmy Lin},
+  title     = {Distilling Task-Specific Knowledge from {BERT} into Simple Neural
+               Networks},
+  journal   = {CoRR},
+  volume    = {abs/1903.12136},
+  year      = {2019}
 }
 @inproceedings{Jiao2020TinyBERTDB,
-  title={TinyBERT: Distilling BERT for Natural Language Understanding},
+  author    = {Xiaoqi Jiao and
-  author={Xiaoqi Jiao and Y. Yin and L. Shang and Xin Jiang and X. Chen and Linlin Li and F. Wang and Qun Liu},
+               Yichun Yin and
-  booktitle={EMNLP},
+               Lifeng Shang and
+               Xin Jiang and
+               Xiao Chen and
+               Linlin Li and
+               Fang Wang and
+               Qun Liu},
+  title     = {TinyBERT: Distilling {BERT} for Natural Language Understanding},
+  pages     = {4163--4174},
+  publisher={Conference on Empirical Methods in Natural Language Processing},
  year={2020}
 }
 @article{Ghazvininejad2020AlignedCE,
-  title={Aligned Cross Entropy for Non-Autoregressive Machine Translation},
+  author    = {Marjan Ghazvininejad and
-  author={Marjan Ghazvininejad and V. Karpukhin and Luke Zettlemoyer and Omer Levy},
+               Vladimir Karpukhin and
-  journal={ArXiv},
+               Luke Zettlemoyer and
-  year={2020},
+               Omer Levy},
-  volume={abs/2004.01655}
+  title     = {Aligned Cross Entropy for Non-Autoregressive Machine Translation},
+  journal   = {CoRR},
+  volume    = {abs/2004.01655},
+  year      = {2020},
 }
 @inproceedings{Shao2020MinimizingTB,
-  title={Minimizing the Bag-of-Ngrams Difference for Non-Autoregressive Neural Machine Translation},
+  author    = {Chenze Shao and
-  author={Chenze Shao and Jinchao Zhang and Yun-jie Feng and Fandong Meng and Jie Zhou},
+               Jinchao Zhang and
-  booktitle={AAAI},
+               Yang Feng and
-  year={2020}
+               Fandong Meng and
+               Jie Zhou},
+  title     = {Minimizing the Bag-of-Ngrams Difference for Non-Autoregressive Neural
+               Machine Translation},
+  pages     = {198--205},
+  publisher = {AAAI Conference on Artificial Intelligence},
+  year      = {2020},
 }
 @inproceedings{Ma2019FlowSeqNC,
  title={FlowSeq: Non-Autoregressive Conditional Sequence Generation with Generative Flow},
-  author={Xuezhe Ma and Chunting Zhou and X. Li and Graham Neubig and E. Hovy},
+  author={Xuezhe Ma and
-  booktitle={EMNLP/IJCNLP},
+               Chunting Zhou and
+               Xian Li and
+               Graham Neubig and
+               Eduard H. Hovy},
+  publisher={Conference on Empirical Methods in Natural Language Processing},
+  pages={4281--4291},
  year={2019}
 }
 @inproceedings{Guo2019NonAutoregressiveNM,
  title={Non-Autoregressive Neural Machine Translation with Enhanced Decoder Input},
-  author={Junliang Guo and X. Tan and D. He and T. Qin and Linli Xu and T. Liu},
+  author={Junliang Guo and
-  booktitle={AAAI},
+               Xu Tan and
+               Di He and
+               Tao Qin and
+               Linli Xu and
+               Tie-Yan Liu},
+  pages={3723--3730},
+  publisher={AAAI Conference on Artificial Intelligence},
  year={2019}
 }
 @article{Ran2019GuidingNN,
-  title={Guiding Non-Autoregressive Neural Machine Translation Decoding with Reordering Information},
+  author    = {Qiu Ran and
-  author={Qiu Ran and Yankai Lin and Peng Li and J. Zhou},
+               Yankai Lin and
-  journal={ArXiv},
+               Peng Li and
-  year={2019},
+               Jie Zhou},
-  volume={abs/1911.02215}
+  title     = {Guiding Non-Autoregressive Neural Machine Translation Decoding with
+               Reordering Information},
+  journal   = {CoRR},
+  volume    = {abs/1911.02215},
+  year      = {2019}
 }
 @inproceedings{vaswani2017attention,
@@ -6774,73 +6902,96 @@ author    = {Yoshua Bengio and
 @inproceedings{Wang2019NonAutoregressiveMT,
  title={Non-Autoregressive Machine Translation with Auxiliary Regularization},
-  author={Yiren Wang and Fei Tian and D. He and T. Qin and ChengXiang Zhai and T. Liu},
+  author={Yiren Wang and
-  booktitle={AAAI},
+               Fei Tian and
+               Di He and
+               Tao Qin and
+               ChengXiang Zhai and
+               Tie-Yan Liu},
+  publisher={AAAI Conference on Artificial Intelligence},
+  pages={5377--5384},
  year={2019}
 }
 @inproceedings{Kaiser2018FastDI,
  title={Fast Decoding in Sequence Models using Discrete Latent Variables},
-  author={Łukasz Kaiser and Aurko Roy and Ashish Vaswani and Niki Parmar and S. Bengio and Jakob Uszkoreit and Noam Shazeer},
+  author={Łukasz Kaiser and Aurko Roy and Ashish Vaswani and Niki Parmar and Samy Bengio and Jakob Uszkoreit and Noam Shazeer},
-  booktitle={ICML},
+  publisher={International Conference on Machine Learning},
+  pages={2395--2404},
  year={2018}
 }
 @article{Tu2020ENGINEEI,
  title={ENGINE: Energy-Based Inference Networks for Non-Autoregressive Machine Translation},
  author={Lifu Tu and Richard Yuanzhe Pang and Sam Wiseman and Kevin Gimpel},
-  journal={ArXiv},
+  pages={2819--2826},
-  year={2020},
+  publisher={Annual Meeting of the Association for Computational Linguistics},
-  volume={abs/2005.00850}
+  year={2020}
 }
 @inproceedings{Shu2020LatentVariableNN,
  title={Latent-Variable Non-Autoregressive Neural Machine Translation with Deterministic Inference using a Delta Posterior},
  author={Raphael Shu and Jason Lee and Hideki Nakayama and Kyunghyun Cho},
-  booktitle={AAAI},
+  publisher={AAAI Conference on Artificial Intelligence},
+  pages={8846--8853},
  year={2020}
 }
 @inproceedings{Li2019HintBasedTF,
  title={Hint-Based Training for Non-Autoregressive Machine Translation},
-  author={Zhuohan Li and Zi Lin and Di He and Fei Tian and Tao Qin and Liwei Wang and T. Liu},
+  author={Zhuohan Li and
-  booktitle={EMNLP/IJCNLP},
+               Zi Lin and
+               Di He and
+               Fei Tian and
+               Tao Qin and
+               Liwei Wang and
+               Tie-Yan Liu},
+  publisher={Conference on Empirical Methods in Natural Language Processing},
+  pages={5707--5712},
  year={2019}
 }
 @inproceedings{Ho2016ModelFreeIL,
  title={Model-Free Imitation Learning with Policy Optimization},
-  author={Jonathan Ho and J. Gupta and S. Ermon},
+  author={Jonathan Ho and
-  booktitle={ICML},
+               Jayesh K. Gupta and
+               Stefano Ermon},
+  publisher={International Conference on Machine Learning},
+  pages={2760--2769},
  year={2016}
 }
 @inproceedings{Ho2016GenerativeAI,
  title={Generative Adversarial Imitation Learning},
-  author={Jonathan Ho and S. Ermon},
+  author={Jonathan Ho and Stefano Ermon},
-  booktitle={NIPS},
+  publisher={Conference and Workshop on Neural Information Processing Systems},
+  pages={4565--4573},
  year={2016}
 }
 @article{Duan2017OneShotIL,
  title={One-Shot Imitation Learning},
-  author={Yan Duan and Marcin Andrychowicz and Bradly C. Stadie and Jonathan Ho and J. Schneider and Ilya Sutskever and P. Abbeel and W. Zaremba},
+  author={Yan Duan and Marcin Andrychowicz and Bradly C. Stadie and Jonathan Ho and Jonas Schneider and Ilya Sutskever and Pieter Abbeel and Wojciech Zaremba},
-  journal={ArXiv},
+  journal={CoRR},
  year={2017},
  volume={abs/1703.07326}
 }
 @inproceedings{Wang2018SemiAutoregressiveNM,
  title={Semi-Autoregressive Neural Machine Translation},
-  author={C. Wang and Ji Zhang and Haiqing Chen},
+  author={Chunqi Wang and
-  booktitle={EMNLP},
+               Ji Zhang and
+               Haiqing Chen},
+  booktitle={Conference on Empirical Methods in Natural Language Processing},
+  pages={479--488},
  year={2018}
 }
 @inproceedings{Ghazvininejad2019MaskPredictPD,
  title={Mask-Predict: Parallel Decoding of Conditional Masked Language Models},
  author={Marjan Ghazvininejad and Omer Levy and Yinhan Liu and Luke Zettlemoyer},
-  booktitle={EMNLP/IJCNLP},
+  publisher={Conference on Empirical Methods in Natural Language Processing},
+  pages={6111--6120},
  year={2019}
 }
@@ -6853,7 +7004,9 @@ author    = {Yoshua Bengio and
 @article{Zhou2019SynchronousBN,
  title={Synchronous Bidirectional Neural Machine Translation},
-  author={L. Zhou and Jiajun Zhang and C. Zong},
+  author={Long Zhou and
+               Jiajun Zhang and
+               Chengqing Zong},
  journal={Transactions of the Association for Computational Linguistics},
  year={2019},
  volume={7},
@@ -6870,8 +7023,9 @@ author    = {Yoshua Bengio and
 @inproceedings{Feng2016ImprovingAM,
  title={Improving Attention Modeling with Implicit Distortion and Fertility for Machine Translation},
-  author={Shi Feng and Shujie Liu and Nan Yang and Mu Li and M. Zhou and K. Q. Zhu},
+  author={Shi Feng and Shujie Liu and Nan Yang and Mu Li and Ming Zhou and Kenny Q. Zhu},
-  booktitle={COLING},
+  booktitle={International Conference on Computational Linguistics},
+  pages={3082--3092},
  year={2016}
 }
@@ -6940,7 +7094,7 @@ author    = {Yoshua Bengio and
 @article{Peris2017InteractiveNM,
  title={Interactive neural machine translation},
  author={{\'A}lvaro Peris and Miguel Domingo and F. Casacuberta},
-  journal={Comput. Speech Lang.},
+  journal={Computer Speech and Language},
  year={2017},
  volume={45},
  pages={201-220}
@@ -6948,8 +7102,9 @@ author    = {Yoshua Bengio and
 @inproceedings{Peris2018ActiveLF,
  title={Active Learning for Interactive Neural Machine Translation of Data Streams},
-  author={{\'A}lvaro Peris and F. Casacuberta},
+  author={{\'A}lvaro Peris and Francisco Casacuberta},
-  booktitle={CoNLL},
+  publisher={The SIGNLL Conference on Computational Natural Language Learning},
+  pages={151--160},
  year={2018}
 }
@@ -6974,7 +7129,7 @@ author    = {Yoshua Bengio and
 }
 @article{61115,
-  author={J. {Lin}},
+  author={Jianhua Lin},
  journal={IEEE Transactions on Information Theory}, 
  title={Divergence measures based on the Shannon entropy}, 
  year={1991},
@@ -6988,13 +7143,8 @@ author    = {Yoshua Bengio and
               Atsushi Fujita},
  title     = {Recurrent Stacking of Layers for Compact Neural Machine Translation
               Models},
-  booktitle = {The Thirty-Third {AAAI} Conference on Artificial Intelligence, {AAAI}
-               2019, The Thirty-First Innovative Applications of Artificial Intelligence
-               Conference, {IAAI} 2019, The Ninth {AAAI} Symposium on Educational
-               Advances in Artificial Intelligence, {EAAI} 2019, Honolulu, Hawaii,
-               USA, January 27 - February 1, 2019},
  pages     = {6292--6299},
-  publisher = {{AAAI} Press},
+  publisher = {	AAAI Conference on Artificial Intelligence},
  year      = {2019}
 }
@@ -7082,10 +7232,8 @@ author    = {Yoshua Bengio and
               Dmitry Kalenichenko},
  title     = {Quantization and Training of Neural Networks for Efficient Integer-Arithmetic-Only
               Inference},
-  booktitle = {2018 {IEEE} Conference on Computer Vision and Pattern Recognition,
+  publisher = {{IEEE} Conference on Computer Vision and Pattern Recognition},
-               {CVPR} 2018, Salt Lake City, UT, USA, June 18-22, 2018},
  pages     = {2704--2713},
-  publisher = {{IEEE} Computer Society},
  year      = {2018}
 }
@@ -7106,9 +7254,7 @@ author    = {Yoshua Bengio and
               Ran El-Yaniv and
               Yoshua Bengio},
  title     = {Binarized Neural Networks},
-  booktitle = {Advances in Neural Information Processing Systems 29: Annual Conference
+  publisher = {Conference and Workshop on Neural Information Processing Systems},
-               on Neural Information Processing Systems 2016, December 5-10, 2016,
-               Barcelona, Spain},
  pages     = {4107--4115},
  year      = {2016}
 }
@@ -7131,10 +7277,8 @@ author    = {Yoshua Bengio and
               Muhua Zhu and
               Huizhen Wang},
  title     = {Boosting-Based System Combination for Machine Translation},
-  booktitle = {{ACL} 2010, Proceedings of the 48th Annual Meeting of the Association
-               for Computational Linguistics, July 11-16, 2010, Uppsala, Sweden},
  pages     = {739--748},
-  publisher = {The Association for Computer Linguistics},
+  publisher = {Annual Meeting of the Association for Computational Linguistics},
  year      = {2010}
 }
@@ -7146,11 +7290,9 @@ author    = {Yoshua Bengio and
               Philip C. Woodland},
  title     = {Consensus Network Decoding for Statistical Machine Translation System
               Combination},
-  booktitle = {Proceedings of the {IEEE} International Conference on Acoustics, Speech,
+  publisher = {Proceedings of the {IEEE} International Conference on Acoustics, Speech,
-               and Signal Processing, {ICASSP} 2007, Honolulu, Hawaii, USA, April
+               and Signal Processing},
-               15-20, 2007},
  pages     = {105--108},
-  publisher = {{IEEE}},
  year      = {2007}
 }
@@ -7159,9 +7301,7 @@ author    = {Yoshua Bengio and
               Spyridon Matsoukas and
               Richard M. Schwartz},
  title     = {Improved Word-Level System Combination for Machine Translation},
-  booktitle = {{ACL} 2007, Proceedings of the 45th Annual Meeting of the Association
+  publisher = {Annual Meeting of the Association for Computational Linguistics},
-               for Computational Linguistics, June 23-30, 2007, Prague, Czech Republic},
-  publisher = {The Association for Computational Linguistics},
  year      = {2007}
 }
@@ -7172,10 +7312,8 @@ author    = {Yoshua Bengio and
               Richard M. Schwartz},
  title     = {Incremental Hypothesis Alignment for Building Confusion Networks with
               Application to Machine Translation System Combination},
-  booktitle = {Proceedings of the Third Workshop on Statistical Machine Translation,
+  publisher = {Proceedings of the Third Workshop on Statistical Machine Translation},
-               WMT@ACL 2008, Columbus, Ohio, USA, June 19, 2008},
  pages     = {183--186},
-  publisher = {Association for Computational Linguistics},
  year      = {2008}
 }
@@ -7185,11 +7323,8 @@ author    = {Yoshua Bengio and
               Tong Xiao and
               Ming Zhou},
  title     = {The Feature Subspace Method for SMT System Combination},
-  booktitle = {Proceedings of the 2009 Conference on Empirical Methods in Natural
+  publisher = {Conference on Empirical Methods in Natural Language Processing},
-               Language Processing, {EMNLP} 2009, 6-7 August 2009, Singapore, {A}
-               meeting of SIGDAT, a Special Interest Group of the {ACL}},
  pages     = {1096--1104},
-  publisher = {{ACL}},
  year      = {2009}
 }
@@ -7218,12 +7353,8 @@ author    = {Yoshua Bengio and
               Franz Josef Och and
               Wolfgang Macherey},
  title     = {Lattice Minimum Bayes-Risk Decoding for Statistical Machine Translation},
-  booktitle = {2008 Conference on Empirical Methods in Natural Language Processing,
+  publisher = {Conference on Empirical Methods in Natural Language Processing},
-               {EMNLP} 2008, Proceedings of the Conference, 25-27 October 2008, Honolulu,
-               Hawaii, USA, {A} meeting of SIGDAT, a Special Interest Group of the
-               {ACL}},
  pages     = {620--629},
-  publisher = {{ACL}},
  year      = {2008}
 }
@@ -7236,10 +7367,8 @@ author    = {Yoshua Bengio and
               Yang Liu},
  title     = {Lattice-Based Recurrent Neural Network Encoders for Neural Machine
               Translation},
-  booktitle = {Proceedings of the Thirty-First {AAAI} Conference on Artificial Intelligence,
+  publisher = {AAAI Conference on Artificial Intelligence},
-               February 4-9, 2017, San Francisco, California, {USA}},
  pages     = {3302--3308},
-  publisher = {{AAAI} Press},
  year      = {2017}
 }
@@ -7251,7 +7380,7 @@ author    = {Yoshua Bengio and
  publisher = {Proceedings of the Human Language Technology Conference of 
               the North American Chapter of the Association for Computational Linguistics},
  pages     = {464--468},
-  year      = {2018},
+  year      = {2018}
 }
 @inproceedings{WangLearning,
@@ -7273,9 +7402,7 @@ author    = {Yoshua Bengio and
               Edouard Grave and
               Armand Joulin},
  title     = {Reducing Transformer Depth on Demand with Structured Dropout},
-  booktitle = {8th International Conference on Learning Representations, {ICLR} 2020,
+  publisher = {International Conference on Learning Representations},
-               Addis Ababa, Ethiopia, April 26-30, 2020},
-  publisher = {OpenReview.net},
  year      = {2020}
 }
@@ -7283,16 +7410,10 @@ author    = {Yoshua Bengio and
  author    = {Qiang Wang and
               Tong Xiao and
               Jingbo Zhu},
-  editor    = {Trevor Cohn and
-               Yulan He and
-               Yang Liu},
  title     = {Training Flexible Depth Model by Multi-Task Learning for Neural Machine
               Translation},
-  booktitle = {Proceedings of the 2020 Conference on Empirical Methods in Natural
-               Language Processing: Findings, {EMNLP} 2020, Online Event, 16-20 November
-               2020},
  pages     = {4307--4312},
-  publisher = {Association for Computational Linguistics},
+  publisher = {Conference on Empirical Methods in Natural Language Processing},
  year      = {2020}
 }
@@ -7303,8 +7424,7 @@ author    = {Yoshua Bengio and
               Furu Wei and
               Ming Zhou},
  title     = {BERT-of-Theseus: Compressing {BERT} by Progressive Module Replacing},
-  journal   = {CoRR},
+  publisher = {Conference on Empirical Methods in Natural Language Processing},
-  volume    = {abs/2002.02925},
  year      = {2020}
 }
@@ -7312,9 +7432,7 @@ author    = {Yoshua Bengio and
  author    = {Alexei Baevski and
               Michael Auli},
  title     = {Adaptive Input Representations for Neural Language Modeling},
-  booktitle = {7th International Conference on Learning Representations, {ICLR} 2019,
+  journal   = {arXiv preprint arXiv:1809.10853},
-               New Orleans, LA, USA, May 6-9, 2019},
-  publisher = {OpenReview.net},
  year      = {2019}
 }
@@ -7362,9 +7480,7 @@ author    = {Yoshua Bengio and
               Ruslan Salakhutdinov and
               Quoc V. Le},
  title     = {Mixtape: Breaking the Softmax Bottleneck Efficiently},
-  booktitle = {Advances in Neural Information Processing Systems 32: Annual Conference
+  booktitle = {Conference on Neural Information Processing Systems},
-               on Neural Information Processing Systems 2019, NeurIPS 2019, 8-14
-               December 2019, Vancouver, BC, Canada},
  pages     = {15922--15930},
  year      = {2019}
 }
@@ -7391,11 +7507,9 @@ author    = {Yoshua Bengio and
               Chenglong Wang and
               Tong Xiao and
               Jingbo Zhu},
-  title     = {The NiuTrans System for {WNGT} 2020 Efficiency Task},
+  title     = {The NiuTrans System for WNGT 2020 Efficiency Task},
-  booktitle = {Proceedings of the Fourth Workshop on Neural Generation and Translation,
-               NGT@ACL 2020, Online, July 5-10, 2020},
  pages     = {204--210},
-  publisher = {Association for Computational Linguistics},
+  publisher = {Annual Meeting of the Association for Computational Linguistics},
  year      = {2020}
 }
@@ -7432,37 +7546,38 @@ author    = {Yoshua Bengio and
 @inproceedings{Sun2019BaiduNM,
  title={Baidu Neural Machine Translation Systems for WMT19},
-  author={M. Sun and 
+  author    = {Meng Sun and
-          B. Jiang and 
+               Bojian Jiang and
-		  H. Xiong and 
+               Hao Xiong and
-		  Zhongjun He and 
+               Zhongjun He and
-		  H. Wu and 
+               Hua Wu and
-		  Haifeng Wang},
+               Haifeng Wang},
-  booktitle={WMT},
+  publisher={Annual Meeting of the Association for Computational Linguistics},
+  pages     = {374--381},
  year={2019}
 }
 @inproceedings{Wang2018TencentNM,
  title={Tencent Neural Machine Translation Systems for WMT18},
-  author={Mingxuan Wang and 
+  author={Mingxuan Wang and
-          Li Gong and 
+          Li Gong and
-		  Wenhuan Zhu and 
+          Wenhuan Zhu and
-		  J. Xie and 
+          Jun Xie and
-		  C. Bian},
+          Chao Bian},
-  booktitle={WMT},
+  publisher={Annual Meeting of the Association for Computational Linguistics},
+  pages={522--527},
  year={2018}
 }
 @article{Bi2019MultiagentLF,
  title={Multi-agent Learning for Neural Machine Translation},
  author={Tianchi Bi and 
-          H. Xiong and 
+          Hao Xiong and 
 		  Zhongjun He and 
-		  H. Wu and 
+		  Hua Wu and 
 		  Haifeng Wang},
-  journal={ArXiv},
+  publisher={arXiv preprint arXiv:1909.01101},
-  year={2019},
+  year={2019}
-  volume={abs/1909.01101}
 }
 @inproceedings{DBLP:conf/aclnmt/KoehnK17,
@@ -7476,23 +7591,73 @@ author    = {Yoshua Bengio and
 @inproceedings{Held2013AppliedSI,
-  title={Applied Statistical Inference: Likelihood and Bayes},
+  title={Applied statistical inference},
-  author={L. Held and Daniel Sabans Bov},
+  author={Leonhard Held and Saban{\'e}s Bov{\'e}, D},
-  year={2013}
+  volume={10},
+  number={978-3},
+  pages={16},
+  year={2014},
+  publisher={Springer}
+}
+@inproceedings{Zhang2016VariationalNM,
+  title={Variational Neural Machine Translation},
+  author    = {Biao Zhang and
+               Deyi Xiong and
+               Jinsong Su and
+               Hong Duan and
+               Min Zhang},
+  pages     = {521--530},
+  publisher = {Annual Meeting of the Association for Computational Linguistics},
+  year      = {2016}
 }
 @inproceedings{Silvey2018StatisticalI,
  title={Statistical Inference},
  author={S. D. Silvey},
-  booktitle={Encyclopedia of Social Network Analysis and Mining. 2nd Ed.},
+  publisher={Encyclopedia of Social Network Analysis and Mining},
  year={2018}
 }
-@inproceedings{Zhang2016VariationalNM,
+@inproceedings{Cheong2019transformersZ,
-  title={Variational Neural Machine Translation},
+  title={transformers.zip : Compressing Transformers with Pruning and Quantization},
-  author={Biao Zhang and Deyi Xiong and Jinsong Su and H. Duan and Min Zhang},
+  author={Robin Cheong and Robel Daniel},
-  booktitle={EMNLP},
+  publisher={Stanford University},
-  year={2016}
+  year={2019}
+}
+@inproceedings{Beal2003VariationalAF,
+  title={Variational algorithms for approximate Bayesian inference},
+  author={Matthew J. Beal},
+  publisher={University College London},
+  year={2003}
+}
+@article{Gage1994ANA,
+  title={A new algorithm for data compression},
+  author={P. Gage},
+  journal={The C Users Journal archive},
+  year={1994},
+  volume={12},
+  pages={23-38}
+}
+@inproceedings{Eisner2011LearningST,
+  title={Learning Speed-Accuracy Tradeoffs in Nondeterministic Inference Algorithms},
+  author={J. Eisner and Hal Daum{\'e}},
+  publisher={Conference and Workshop on Neural Information Processing Systems},
+  year={2011}
+}
+@article{Kazimi2017CoverageFC,
+  title={Coverage for Character Based Neural Machine Translation},
+  author={M. Kazimi and Marta R. Costa-juss{\`a}},
+  journal={arXiv preprint arXiv:1810.02340},
+  year={2017},
+  volume={59},
+  pages={99-106}
 }
 %%%%% chapter 14------------------------------------------------------
 %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%