Commit d39483b7 by 孟霞

14参考文献

parent 5e72e252
......@@ -5944,7 +5944,7 @@ author = {Yoshua Bengio and
Translation Models},
volume = {3265},
pages = {115--124},
publisher = {Springer},
publisher = { Association for Machine Translation in the Americas},
year = {2004}
}
......@@ -5955,19 +5955,20 @@ author = {Yoshua Bengio and
Bill Byrne},
title = {SGNMT - A Flexible NMT Decoding Platform for Quick Prototyping
of New Models and Search Strategies},
booktitle = {Proceedings of the 2017 Conference on Empirical Methods in Natural
Language Processing, {EMNLP} 2017, Copenhagen, Denmark, September
9-11, 2017 - System Demonstrations},
pages = {25--30},
publisher = {Association for Computational Linguistics},
publisher = {Conference on Empirical Methods in Natural Language Processing},
year = {2017}
}
@inproceedings{Liu2016AgreementOT,
title={Agreement on Target-bidirectional Neural Machine Translation},
author={L. Liu and M. Utiyama and A. Finch and Eiichiro Sumita},
booktitle={HLT-NAACL},
year={2016}
author={Lemao Liu and
Masao Utiyama and
Andrew M. Finch and
Eiichiro Sumita},
pages = {411--416},
publisher = { Annual Conference of the North American Chapter of the Association for Computational Linguistics},
year = {2016}
}
@inproceedings{DBLP:conf/wmt/LiLXLLLWZXWFCLL19,
......@@ -5989,11 +5990,8 @@ author = {Yoshua Bengio and
Tong Xiao and
Jingbo Zhu},
title = {The NiuTrans Machine Translation Systems for {WMT19}},
booktitle = {Proceedings of the Fourth Conference on Machine Translation, {WMT}
2019, Florence, Italy, August 1-2, 2019 - Volume 2: Shared Task Papers,
Day 1},
pages = {257--266},
publisher = {Association for Computational Linguistics},
publisher = {Annual Meeting of the Association for Computational Linguistics},
year = {2019}
}
......@@ -6002,19 +6000,19 @@ author = {Yoshua Bengio and
Barry Haddow and
Alexandra Birch},
title = {Edinburgh Neural Machine Translation Systems for {WMT} 16},
booktitle = {Proceedings of the First Conference on Machine Translation, {WMT}
2016, colocated with {ACL} 2016, August 11-12, Berlin, Germany},
pages = {371--376},
publisher = {The Association for Computer Linguistics},
publisher = {Annual Meeting of the Association for Computational Linguistics},
year = {2016}
}
@article{Stahlberg2018TheUO,
title={The University of Cambridge's Machine Translation Systems for WMT18},
author={Felix Stahlberg and A. Gispert and B. Byrne},
journal={ArXiv},
year={2018},
volume={abs/1808.09465}
author={Felix Stahlberg and
Adri{\`{a}} de Gispert and
Bill Byrne},
pages = {504--512},
publisher = {Annual Meeting of the Association for Computational Linguistics},
year = {2018}
}
@inproceedings{DBLP:conf/aaai/ZhangSQLJW18,
......@@ -6025,20 +6023,18 @@ author = {Yoshua Bengio and
Rongrong Ji and
Hongji Wang},
title = {Asynchronous Bidirectional Decoding for Neural Machine Translation},
booktitle = {Proceedings of the Thirty-Second {AAAI} Conference on Artificial Intelligence,
(AAAI-18), the 30th innovative Applications of Artificial Intelligence
(IAAI-18), and the 8th {AAAI} Symposium on Educational Advances in
Artificial Intelligence (EAAI-18), New Orleans, Louisiana, USA, February
2-7, 2018},
pages = {5698--5705},
publisher = {{AAAI} Press},
publisher = { AAAI Conference on Artificial Intelligence},
year = {2018}
}
@article{Li2017EnhancedNM,
title={Enhanced neural machine translation by learning from draft},
author={A. Li and Shiyue Zhang and D. Wang and T. Zheng},
journal={2017 Asia-Pacific Signal and Information Processing Association Annual Summit and Conference (APSIPA ASC)},
author={Aodong Li and
Shiyue Zhang and
Dong Wang and
Thomas Fang Zheng},
publisher={IEEE Asia-Pacific Services Computing Conference},
year={2017},
pages={1583-1587}
}
......@@ -6046,120 +6042,141 @@ author = {Yoshua Bengio and
@inproceedings{ElMaghraby2018EnhancingTF,
title={Enhancing Translation from English to Arabic Using Two-Phase Decoder Translation},
author={Ayah ElMaghraby and Ahmed Rafea},
booktitle={IntelliSys},
year={2018}
pages = {539--549},
publisher = {Intelligent Systems and Applications},
year = {2018}
}
@inproceedings{Geng2018AdaptiveMD,
title={Adaptive Multi-pass Decoder for Neural Machine Translation},
author={X. Geng and X. Feng and B. Qin and T. Liu},
booktitle={EMNLP},
author={Xinwei Geng and
Xiaocheng Feng and
Bing Qin and
Ting Liu},
publisher ={Conference on Empirical Methods in Natural Language Processing},
pages={523--532},
year={2018}
}
@article{Lee2018DeterministicNN,
title={Deterministic Non-Autoregressive Neural Sequence Modeling by Iterative Refinement},
author={Jason Lee and Elman Mansimov and Kyunghyun Cho},
journal={ArXiv},
year={2018},
volume={abs/1802.06901}
pages = {1173--1182},
publisher = {Conference on Empirical Methods in Natural Language Processing},
year = {2018}
}
@inproceedings{Gu2019LevenshteinT,
title={Levenshtein Transformer},
author={Jiatao Gu and Changhan Wang and Jake Zhao},
booktitle={NeurIPS},
year={2019}
publisher = {Conference and Workshop on Neural Information Processing Systems},
pages = {11179--11189},
year = {2019},
}
@inproceedings{Guo2020JointlyMS,
title={Jointly Masked Sequence-to-Sequence Model for Non-Autoregressive Neural Machine Translation},
author={Junliang Guo and Linli Xu and E. Chen},
booktitle={ACL},
year={2020}
author={Junliang Guo and Linli Xu and Enhong Chen},
pages = {376--385},
publisher = {Annual Meeting of the Association for Computational Linguistics},
year = {2020}
}
@article{Stahlberg2018AnOS,
title={An Operation Sequence Model for Explainable Neural Machine Translation},
author={Felix Stahlberg and Danielle Saunders and B. Byrne},
journal={ArXiv},
year={2018},
volume={abs/1808.09688}
author={Felix Stahlberg and Danielle Saunders and Bill Byrne},
pages = {175--186},
publisher = {Conference on Empirical Methods in Natural Language Processing},
year = {2018}
}
@inproceedings{Stern2019InsertionTF,
title={Insertion Transformer: Flexible Sequence Generation via Insertion Operations},
author={Mitchell Stern and William Chan and J. Kiros and Jakob Uszkoreit},
booktitle={ICML},
author={Mitchell Stern and William Chan and Jamie Kiros and Jakob Uszkoreit},
publisher={International Conference on Machine Learning},
pages={5976--5985},
year={2019}
}
@article{stling2017NeuralMT,
title={Neural machine translation for low-resource languages},
author={Robert {\"O}stling and J. Tiedemann},
journal={ArXiv},
author={Robert {\"O}stling and J{\"{o}}rg Tiedemann},
journal={CoRR},
year={2017},
volume={abs/1708.05729}
}
@article{Kikuchi2016ControllingOL,
title={Controlling Output Length in Neural Encoder-Decoders},
author={Yuta Kikuchi and Graham Neubig and Ryohei Sasano and H. Takamura and M. Okumura},
journal={ArXiv},
year={2016},
volume={abs/1609.09552}
author={Yuta Kikuchi and
Graham Neubig and
Ryohei Sasano and
Hiroya Takamura and
Manabu Okumura},
pages = {1328--1338},
publisher = {Conference on Empirical Methods in Natural Language Processing},
year = {2016}
}
@inproceedings{Takase2019PositionalET,
title={Positional Encoding to Control Output Sequence Length},
author={S. Takase and N. Okazaki},
booktitle={NAACL-HLT},
author={Sho Takase and
Naoaki Okazaki},
publisher={Annual Conference of the North American Chapter of the Association for Computational Linguistics},
pages={3999--4004},
year={2019}
}
@inproceedings{Murray2018CorrectingLB,
title={Correcting Length Bias in Neural Machine Translation},
author={Kenton Murray and David Chiang},
booktitle={WMT},
year={2018}
pages = {212--223},
publisher = {Annual Meeting of the Association for Computational Linguistics},
year = {2018}
}
@article{Sountsov2016LengthBI,
title={Length bias in Encoder Decoder Models and a Case for Global Conditioning},
author={Pavel Sountsov and Sunita Sarawagi},
journal={ArXiv},
year={2016},
volume={abs/1606.03402}
pages = {1516--1525},
publisher = {Conference on Empirical Methods in Natural Language Processing},
year = {2016}
}
@inproceedings{Jean2015MontrealNM,
title={Montreal Neural Machine Translation Systems for WMT'15},
author={S. Jean and Orhan Firat and Kyunghyun Cho and R. Memisevic and Yoshua Bengio},
booktitle={WMT@EMNLP},
author={S{\'{e}}bastien Jean and
Orhan Firat and
Kyunghyun Cho and
Roland Memisevic and
Yoshua Bengio},
publisher={Conference on Empirical Methods in Natural Language Processing},
pages={134--140},
year={2015}
}
@inproceedings{Yang2018OtemUtemOA,
title={Otem{\&}Utem: Over- and Under-Translation Evaluation Metric for NMT},
author={J. Yang and Biao Zhang and Yue Qin and Xiangwen Zhang and Q. Lin and Jinsong Su},
booktitle={NLPCC},
author={Jing Yang and
Biao Zhang and
Yue Qin and
Xiangwen Zhang and
Qian Lin and
Jinsong Su},
publisher={CCF International Conference on Natural Language Processing and Chinese Computing},
pages={291--302},
year={2018}
}
@inproceedings{Mi2016CoverageEM,
title={Coverage Embedding Models for Neural Machine Translation},
author={Haitao Mi and B. Sankaran and Z. Wang and Abe Ittycheriah},
booktitle={EMNLP},
year={2016}
}
@article{Kazimi2017CoverageFC,
title={Coverage for Character Based Neural Machine Translation},
author={M. Kazimi and Marta R. Costa-juss{\`a}},
journal={Proces. del Leng. Natural},
year={2017},
volume={59},
pages={99-106}
author={Haitao Mi and
Baskaran Sankaran and
Zhiguo Wang and
Abe Ittycheriah},
pages = {955--960},
publisher = {Conference on Empirical Methods in Natural Language Processing},
year = {2016}
}
@inproceedings{DBLP:conf/emnlp/HuangZM17,
......@@ -6176,7 +6193,8 @@ author = {Yoshua Bengio and
@inproceedings{Wiseman2016SequencetoSequenceLA,
title={Sequence-to-Sequence Learning as Beam-Search Optimization},
author={Sam Wiseman and Alexander M. Rush},
booktitle={EMNLP},
publisher={Conference on Empirical Methods in Natural Language Processing},
pages={1296--1306},
year={2016}
}
......@@ -6193,10 +6211,12 @@ author = {Yoshua Bengio and
@article{Ma2019LearningTS,
title={Learning to Stop in Structured Prediction for Neural Machine Translation},
author={M. Ma and Renjie Zheng and Liang Huang},
journal={ArXiv},
year={2019},
volume={abs/1904.01032}
author={Mingbo Ma and
Renjie Zheng and
Liang Huang},
pages = {1884--1889},
publisher = { Annual Conference of the North American Chapter of the Association for Computational Linguistics},
year = {2019}
}
@inproceedings{KleinOpenNMT,
......@@ -6220,119 +6240,153 @@ author = {Yoshua Bengio and
year = {2015}
}
@inproceedings{Eisner2011LearningST,
title={Learning Speed-Accuracy Tradeoffs in Nondeterministic Inference Algorithms},
author={J. Eisner and Hal Daum{\'e}},
year={2011}
}
@inproceedings{Jiang2012LearnedPF,
title={Learned Prioritization for Trading Off Accuracy and Speed},
author={J. Jiang and Adam R. Teichert and Hal Daum{\'e} and J. Eisner},
booktitle={NIPS},
year={2012}
author={Jiarong Jiang and Adam R. Teichert and Hal Daum{\'e} and Jason Eisner},
publisher={Conference and Workshop on Neural Information Processing Systems},
pages={1340--1348},
year= {2012}
}
@inproceedings{Zheng2020OpportunisticDW,
title={Opportunistic Decoding with Timely Correction for Simultaneous Translation},
author={Renjie Zheng and M. Ma and Baigong Zheng and Kaibo Liu and Liang Huang},
booktitle={ACL},
author={Renjie Zheng and
Mingbo Ma and
Baigong Zheng and
Kaibo Liu and
Liang Huang},
publisher={Annual Meeting of the Association for Computational Linguistics},
pages={437--442},
year={2020}
}
@inproceedings{Ma2019STACLST,
title={STACL: Simultaneous Translation with Implicit Anticipation and Controllable Latency using Prefix-to-Prefix Framework},
author={M. Ma and L. Huang and Hao Xiong and Renjie Zheng and Kaibo Liu and Baigong Zheng and Chuanqiang Zhang and Zhongjun He and Hairong Liu and X. Li and H. Wu and Haifeng Wang},
booktitle={ACL},
author={Mingbo Ma and
Liang Huang and
Hao Xiong and
Renjie Zheng and
Kaibo Liu and
Baigong Zheng and
Chuanqiang Zhang and
Zhongjun He and
Hairong Liu and
Xing Li and
Hua Wu and
Haifeng Wang},
publisher={Annual Meeting of the Association for Computational Linguistics},
pages={3025--3036},
year={2019}
}
@inproceedings{Gimpel2013ASE,
title={A Systematic Exploration of Diversity in Machine Translation},
author={Kevin Gimpel and Dhruv Batra and Chris Dyer and Gregory Shakhnarovich},
booktitle={EMNLP},
publisher={Conference on Empirical Methods in Natural Language Processing},
pages={1100--1111},
year={2013}
}
@article{Li2016MutualIA,
title={Mutual Information and Diverse Decoding Improve Neural Machine Translation},
author={J. Li and Dan Jurafsky},
journal={ArXiv},
author={Jiwei Li and Dan Jurafsky},
journal={CoRR},
year={2016},
volume={abs/1601.00372}
}
@inproceedings{Li2016ADO,
title={A Diversity-Promoting Objective Function for Neural Conversation Models},
author={J. Li and Michel Galley and Chris Brockett and Jianfeng Gao and W. Dolan},
booktitle={HLT-NAACL},
author={Jiwei Li and
Michel Galley and
Chris Brockett and
Jianfeng Gao and
Bill Dolan},
publisher={Annual Conference of the North American Chapter of the Association for Computational Linguistics},
pages={110--119},
year={2016}
}
@inproceedings{He2018SequenceTS,
title={Sequence to Sequence Mixture Model for Diverse Machine Translation},
author={Xuanli He and Gholamreza Haffari and Mohammad Norouzi},
booktitle={CoNLL},
year={2018}
pages = {583--592},
publisher = {International Conference on Computational Linguistics},
year = {2018}
}
@article{Shen2019MixtureMF,
title={Mixture Models for Diverse Machine Translation: Tricks of the Trade},
author={Tianxiao Shen and Myle Ott and M. Auli and Marc'Aurelio Ranzato},
journal={ArXiv},
year={2019},
volume={abs/1902.07816}
author={Tianxiao Shen and Myle Ott and Michael Auli and Marc'Aurelio Ranzato},
pages = {5719--5728},
publisher = {International Conference on Machine Learning},
year = {2019},
}
@article{Wu2020GeneratingDT,
title={Generating Diverse Translation from Model Distribution with Dropout},
author={Xuanfu Wu and Yang Feng and Chenze Shao},
journal={ArXiv},
year={2020},
volume={abs/2010.08178}
pages={1088--1097},
publisher={Annual Meeting of the Association for Computational Linguistics},
year={2020}
}
@inproceedings{Sun2020GeneratingDT,
title={Generating Diverse Translation by Manipulating Multi-Head Attention},
author={Zewei Sun and Shujian Huang and Hao-Ran Wei and Xin-Yu Dai and Jiajun Chen},
booktitle={AAAI},
author={Zewei Sun and Shujian Huang and Hao Ran Wei and Xin Yu Dai and Jiajun Chen},
publisher={AAAI Conference on Artificial Intelligence},
pages={8976--8983},
year={2020}
}
@article{Vijayakumar2016DiverseBS,
title={Diverse Beam Search: Decoding Diverse Solutions from Neural Sequence Models},
author={Ashwin K. Vijayakumar and Michael Cogswell and R. R. Selvaraju and Q. Sun and Stefan Lee and David J. Crandall and Dhruv Batra},
journal={ArXiv},
author={Ashwin K. Vijayakumar and
Michael Cogswell and
Ramprasaath R. Selvaraju and
Qing Sun and
Stefan Lee and
David J. Crandall and
Dhruv Batra},
journal={CoRR},
year={2016},
volume={abs/1610.02424}
}
@inproceedings{Liu2014SearchAwareTF,
title={Search-Aware Tuning for Machine Translation},
author={L. Liu and Liang Huang},
booktitle={EMNLP},
author={Lemao Liu and
Liang Huang},
publisher={Conference on Empirical Methods in Natural Language Processing},
pages={1942--1952},
year={2014}
}
@inproceedings{Yu2013MaxViolationPA,
title={Max-Violation Perceptron and Forced Decoding for Scalable MT Training},
author={Heng Yu and Liang Huang and Haitao Mi and Kai Zhao},
booktitle={EMNLP},
publisher={Conference on Empirical Methods in Natural Language Processing},
pages={1112--1123},
year={2013}
}
@inproceedings{Stahlberg2019OnNS,
title={On NMT Search Errors and Model Errors: Cat Got Your Tongue?},
author={Felix Stahlberg and
B. Byrne},
booktitle={EMNLP/IJCNLP},
Bill Byrne},
publisher={Conference on Empirical Methods in Natural Language Processing},
pages={3354--3360},
year={2019}
}
@inproceedings{Niehues2017AnalyzingNM,
title={Analyzing Neural MT Search and Model Performance},
author={J. Niehues and Eunah Cho and Thanh-Le Ha and Alexander H. Waibel},
booktitle={NMT@ACL},
author={Jan Niehues and
Eunah Cho and
Thanh-Le Ha and
Alex Waibel},
pages={11--17},
publisher={Annual Meeting of the Association for Computational Linguistics},
year={2017}
}
......@@ -6347,26 +6401,31 @@ author = {Yoshua Bengio and
@article{Ranzato2016SequenceLT,
title={Sequence Level Training with Recurrent Neural Networks},
author={Marc'Aurelio Ranzato and S. Chopra and M. Auli and W. Zaremba},
journal={CoRR},
year={2016},
volume={abs/1511.06732}
author={Marc'Aurelio Ranzato and
Sumit Chopra and
Michael Auli and
Wojciech Zaremba},
publisher={International Conference on Learning Representations},
year={2016}
}
@article{Bengio2015ScheduledSF,
title={Scheduled Sampling for Sequence Prediction with Recurrent Neural Networks},
author={S. Bengio and Oriol Vinyals and Navdeep Jaitly and Noam Shazeer},
journal={ArXiv},
year={2015},
volume={abs/1506.03099}
author={Samy Bengio and
Oriol Vinyals and
Navdeep Jaitly and
Noam Shazeer},
booktitle = {Conference and Workshop on Neural Information Processing Systems},
pages = {1171--1179},
year = {2015}
}
@article{Zhang2019BridgingTG,
title={Bridging the Gap between Training and Inference for Neural Machine Translation},
author={Wen Zhang and Y. Feng and Fandong Meng and Di You and Qun Liu},
journal={ArXiv},
year={2019},
volume={abs/1906.02448}
author={Wen Zhang and Yang Feng and Fandong Meng and Di You and Qun Liu},
pages = {4334--4343},
publisher = {Annual Meeting of the Association for Computational Linguistics},
year = {2019}
}
@inproceedings{DBLP:conf/acl/ShenCHHWSL16,
......@@ -6382,15 +6441,6 @@ author = {Yoshua Bengio and
year = {2016},
}
@article{Gage1994ANA,
title={A new algorithm for data compression},
author={P. Gage},
journal={The C Users Journal archive},
year={1994},
volume={12},
pages={23-38}
}
@inproceedings{DBLP:conf/acl/SennrichHB16a,
author = {Rico Sennrich and
Barry Haddow and
......@@ -6434,26 +6484,31 @@ author = {Yoshua Bengio and
@article{Narang2017BlockSparseRN,
title={Block-Sparse Recurrent Neural Networks},
author={Sharan Narang and Eric Undersander and G. Diamos},
journal={ArXiv},
author={Sharan Narang and Eric Undersander and Gregory Diamos},
journal={CoRR},
year={2017},
volume={abs/1711.02782}
}
@article{Gale2019TheSO,
title={The State of Sparsity in Deep Neural Networks},
author={T. Gale and E. Elsen and Sara Hooker},
journal={ArXiv},
author={Trevor Gale and
Erich Elsen and
Sara Hooker},
journal={CoRR},
year={2019},
volume={abs/1902.09574}
}
@article{Michel2019AreSH,
title={Are Sixteen Heads Really Better than One?},
author={Paul Michel and Omer Levy and Graham Neubig},
journal={ArXiv},
year={2019},
volume={abs/1905.10650}
author = {Paul Michel and
Omer Levy and
Graham Neubig},
title = {Are Sixteen Heads Really Better than One?},
publisher = {Conference and Workshop on Neural Information Processing Systems},
pages = {14014--14024},
year = {2019}
}
@inproceedings{DBLP:journals/corr/abs-1905-09418,
......@@ -6481,17 +6536,11 @@ author = {Yoshua Bengio and
@article{Katharopoulos2020TransformersAR,
title={Transformers are RNNs: Fast Autoregressive Transformers with Linear Attention},
author={Angelos Katharopoulos and Apoorv Vyas and Nikolaos Pappas and Franccois Fleuret},
journal={ArXiv},
journal={CoRR},
year={2020},
volume={abs/2006.16236}
}
@inproceedings{Beal2003VariationalAF,
title={Variational algorithms for approximate Bayesian inference},
author={M. Beal},
year={2003}
}
@article{xiao2011language,
title ={Language Modeling for Syntax-Based Machine Translation Using Tree Substitution Grammars: A Case Study on Chinese-English Translation},
author ={Xiao, Tong and Zhu, Jingbo and Zhu, Muhua},
......@@ -6504,33 +6553,40 @@ author = {Yoshua Bengio and
@inproceedings{Li2009VariationalDF,
title={Variational Decoding for Statistical Machine Translation},
author={Zhifei Li and J. Eisner and S. Khudanpur},
booktitle={ACL/IJCNLP},
author={Zhifei Li and
Jason Eisner and
Sanjeev Khudanpur},
publisher={Annual Meeting of the Association for Computational Linguistics},
pages={593--601},
year={2009}
}
@article{Bastings2019ModelingLS,
title={Modeling Latent Sentence Structure in Neural Machine Translation},
author={Jasmijn Bastings and W. Aziz and Ivan Titov and K. Sima'an},
journal={ArXiv},
year={2019},
volume={abs/1901.06436}
author={Jasmijn Bastings and
Wilker Aziz and
Ivan Titov and
Khalil Sima'an},
journal = {CoRR},
volume = {abs/1901.06436},
year = {2019}
}
@article{Shah2018GenerativeNM,
title={Generative Neural Machine Translation},
author={Harshil Shah and D. Barber},
journal={ArXiv},
year={2018},
volume={abs/1806.05138}
author={Harshil Shah and
David Barber},
publisher={Conference and Workshop on Neural Information Processing Systems},
pages={1353--1362},
year={2018}
}
@article{Su2018VariationalRN,
title={Variational Recurrent Neural Machine Translation},
author={Jinsong Su and Shan Wu and Deyi Xiong and Yaojie Lu and Xianpei Han and Biao Zhang},
journal={ArXiv},
year={2018},
volume={abs/1801.05119}
publisher={AAAI Conference on Artificial Intelligence},
pages={5488--5495},
year={2018}
}
@inproceedings{DBLP:journals/corr/GehringAGYD17,
......@@ -6549,127 +6605,161 @@ author = {Yoshua Bengio and
@inproceedings{Wei2019ImitationLF,
title={Imitation Learning for Non-Autoregressive Neural Machine Translation},
author={Bingzhen Wei and Mingxuan Wang and Hao Zhou and Junyang Lin and Xu Sun},
booktitle={ACL},
publisher={Annual Meeting of the Association for Computational Linguistics},
pages = {1304--1312},
year={2019}
}
@inproceedings{Shao2019RetrievingSI,
title={Retrieving Sequential Information for Non-Autoregressive Neural Machine Translation},
author={Chenze Shao and Y. Feng and J. Zhang and Fandong Meng and X. Chen and Jie Zhou},
booktitle={ACL},
author={Chenze Shao and
Yang Feng and
Jinchao Zhang and
Fandong Meng and
Xilin Chen and
Jie Zhou},
publisher={Annual Meeting of the Association for Computational Linguistics},
pages={3013--3024},
year={2019}
}
@article{Akoury2019SyntacticallyST,
title={Syntactically Supervised Transformers for Faster Neural Machine Translation},
author={Nader Akoury and Kalpesh Krishna and Mohit Iyyer},
journal={ArXiv},
year={2019},
volume={abs/1906.02780}
pages = {1269--1281},
publisher = {Annual Meeting of the Association for Computational Linguistics},
year = {2019},
}
@article{Guo2020FineTuningBC,
title={Fine-Tuning by Curriculum Learning for Non-Autoregressive Neural Machine Translation},
author={Junliang Guo and Xu Tan and Linli Xu and Tao Qin and E. Chen and T. Liu},
journal={ArXiv},
year={2020},
volume={abs/1911.08717}
author={Junliang Guo and
Xu Tan and
Linli Xu and
Tao Qin and
Enhong Chen and
Tie-Yan Liu},
pages = {7839--7846},
publisher = {AAAI Conference on Artificial Intelligence},
year = {2020}
}
@inproceedings{Ran2020LearningTR,
title={Learning to Recover from Multi-Modality Errors for Non-Autoregressive Neural Machine Translation},
author={Qiu Ran and Yankai Lin and Peng Li and J. Zhou},
booktitle={ACL},
author={Qiu Ran and Yankai Lin and Peng Li and Jie Zhou},
publisher={Annual Meeting of the Association for Computational Linguistics},
pages={3059--3069},
year={2020}
}
@article{Liu2020FastBERTAS,
title={FastBERT: a Self-distilling BERT with Adaptive Inference Time},
author={Weijie Liu and P. Zhou and Zhe Zhao and Zhiruo Wang and Haotang Deng and Q. Ju},
journal={ArXiv},
year={2020},
volume={abs/2004.02178}
author={Weijie Liu and
Peng Zhou and
Zhiruo Wang and
Zhe Zhao and
Haotang Deng and
Qi Ju},
pages = {6035--6044},
publisher = {Annual Meeting of the Association for Computational Linguistics},
year = {2020}
}
@article{Elbayad2020DepthAdaptiveT,
title={Depth-Adaptive Transformer},
author={Maha Elbayad and Jiatao Gu and E. Grave and M. Auli},
journal={ArXiv},
year={2020},
volume={abs/1910.10073}
author={Maha Elbayad and
Jiatao Gu and
Edouard Grave and
Michael Auli},
publisher={International Conference on Learning Representations},
year={2020}
}
@article{Lan2020ALBERTAL,
title={ALBERT: A Lite BERT for Self-supervised Learning of Language Representations},
author={Zhenzhong Lan and Mingda Chen and Sebastian Goodman and Kevin Gimpel and Piyush Sharma and Radu Soricut},
journal={ArXiv},
year={2020},
volume={abs/1909.11942}
publisher={International Conference on Learning Representations}
}
@inproceedings{Han2015LearningBW,
title={Learning both Weights and Connections for Efficient Neural Network},
author={Song Han and J. Pool and John Tran and W. Dally},
booktitle={NIPS},
author={Song Han and
Jeff Pool and
John Tran and
William J. Dally},
publisher={Conference and Workshop on Neural Information Processing Systems},
pages={1135--1143},
year={2015}
}
@article{Lee2019SNIPSN,
title={SNIP: Single-shot Network Pruning based on Connection Sensitivity},
author={N. Lee and Thalaiyasingam Ajanthan and P. Torr},
journal={ArXiv},
year={2019},
volume={abs/1810.02340}
author = {Namhoon Lee and
Thalaiyasingam Ajanthan and
Philip H. S. Torr},
title = {Snip: single-Shot Network Pruning based on Connection sensitivity},
publisher = {International Conference on Learning Representations},
year = {2019},
}
@article{Frankle2019TheLT,
title={The Lottery Ticket Hypothesis: Finding Sparse, Trainable Neural Networks},
author={Jonathan Frankle and Michael Carbin},
journal={arXiv: Learning},
publisher={International Conference on Learning Representations},
year={2019}
}
@article{Brix2020SuccessfullyAT,
title={Successfully Applying the Stabilized Lottery Ticket Hypothesis to the Transformer Architecture},
author={Christopher Brix and P. Bahar and H. Ney},
journal={ArXiv},
year={2020},
volume={abs/2005.03454}
author = {Christopher Brix and
Parnia Bahar and
Hermann Ney},
title = {Successfully Applying the Stabilized Lottery Ticket Hypothesis to
the Transformer Architecture},
pages = {3909--3915},
publisher = {Annual Meeting of the Association for Computational Linguistics},
year = {2020},
}
@article{Liu2019RethinkingTV,
title={Rethinking the Value of Network Pruning},
author={Zhuang Liu and M. Sun and Tinghui Zhou and Gao Huang and Trevor Darrell},
author={Zhuang Liu and
Mingjie Sun and
Tinghui Zhou and
Gao Huang and
Trevor Darrell},
journal={ArXiv},
year={2019},
volume={abs/1810.05270}
}
@article{Liu2017LearningEC,
title={Learning Efficient Convolutional Networks through Network Slimming},
author={Zhuang Liu and J. Li and Zhiqiang Shen and Gao Huang and S. Yan and C. Zhang},
journal={2017 IEEE International Conference on Computer Vision (ICCV)},
year={2017},
pages={2755-2763}
}
@inproceedings{Cheong2019transformersZ,
title={transformers.zip : Compressing Transformers with Pruning and Quantization},
author={Robin Cheong},
year={2019}
author = {Zhuang Liu and
Jianguo Li and
Zhiqiang Shen and
Gao Huang and
Shoumeng Yan and
Changshui Zhang},
title = {Learning Efficient Convolutional Networks through Network Slimming},
pages = {2755--2763},
publisher = {{IEEE} International Conference on Computer Vision},
year = {2017}
}
@inproceedings{Banner2018ScalableMF,
title={Scalable Methods for 8-bit Training of Neural Networks},
author={R. Banner and Itay Hubara and E. Hoffer and Daniel Soudry},
booktitle={NeurIPS},
author={Ron Banner and
Itay Hubara and
Elad Hoffer and
Daniel Soudry},
publisher={Conference on Neural Information Processing Systems},
pages={5151--5159},
year={2018}
}
@article{Hubara2017QuantizedNN,
title={Quantized Neural Networks: Training Neural Networks with Low Precision Weights and Activations},
author={Itay Hubara and Matthieu Courbariaux and Daniel Soudry and Ran El-Yaniv and Yoshua Bengio},
journal={J. Mach. Learn. Res.},
journal={Journal of Machine Learning Reseach},
year={2017},
volume={18},
pages={187:1-187:30}
......@@ -6687,62 +6777,100 @@ author = {Yoshua Bengio and
@article{Munim2019SequencelevelKD,
title={Sequence-level Knowledge Distillation for Model Compression of Attention-based Sequence-to-sequence Speech Recognition},
author={Raden Mu'az Mun'im and N. Inoue and Koichi Shinoda},
journal={ICASSP 2019 - 2019 IEEE International Conference on Acoustics, Speech and Signal Processing (ICASSP)},
author={Raden Mu'az Mun'im and Nakamasa Inoue and Koichi Shinoda},
publisher={{IEEE} International Conference on Acoustics, Speech and Signal Processing},
year={2019},
pages={6151-6155}
}
@article{Tang2019DistillingTK,
title={Distilling Task-Specific Knowledge from BERT into Simple Neural Networks},
author={Raphael Tang and Yao Lu and L. Liu and Lili Mou and Olga Vechtomova and Jimmy Lin},
journal={ArXiv},
year={2019},
volume={abs/1903.12136}
author = {Raphael Tang and
Yao Lu and
Linqing Liu and
Lili Mou and
Olga Vechtomova and
Jimmy Lin},
title = {Distilling Task-Specific Knowledge from {BERT} into Simple Neural
Networks},
journal = {CoRR},
volume = {abs/1903.12136},
year = {2019}
}
@inproceedings{Jiao2020TinyBERTDB,
title={TinyBERT: Distilling BERT for Natural Language Understanding},
author={Xiaoqi Jiao and Y. Yin and L. Shang and Xin Jiang and X. Chen and Linlin Li and F. Wang and Qun Liu},
booktitle={EMNLP},
author = {Xiaoqi Jiao and
Yichun Yin and
Lifeng Shang and
Xin Jiang and
Xiao Chen and
Linlin Li and
Fang Wang and
Qun Liu},
title = {TinyBERT: Distilling {BERT} for Natural Language Understanding},
pages = {4163--4174},
publisher={Conference on Empirical Methods in Natural Language Processing},
year={2020}
}
@article{Ghazvininejad2020AlignedCE,
title={Aligned Cross Entropy for Non-Autoregressive Machine Translation},
author={Marjan Ghazvininejad and V. Karpukhin and Luke Zettlemoyer and Omer Levy},
journal={ArXiv},
year={2020},
volume={abs/2004.01655}
author = {Marjan Ghazvininejad and
Vladimir Karpukhin and
Luke Zettlemoyer and
Omer Levy},
title = {Aligned Cross Entropy for Non-Autoregressive Machine Translation},
journal = {CoRR},
volume = {abs/2004.01655},
year = {2020},
}
@inproceedings{Shao2020MinimizingTB,
title={Minimizing the Bag-of-Ngrams Difference for Non-Autoregressive Neural Machine Translation},
author={Chenze Shao and Jinchao Zhang and Yun-jie Feng and Fandong Meng and Jie Zhou},
booktitle={AAAI},
year={2020}
author = {Chenze Shao and
Jinchao Zhang and
Yang Feng and
Fandong Meng and
Jie Zhou},
title = {Minimizing the Bag-of-Ngrams Difference for Non-Autoregressive Neural
Machine Translation},
pages = {198--205},
publisher = {AAAI Conference on Artificial Intelligence},
year = {2020},
}
@inproceedings{Ma2019FlowSeqNC,
title={FlowSeq: Non-Autoregressive Conditional Sequence Generation with Generative Flow},
author={Xuezhe Ma and Chunting Zhou and X. Li and Graham Neubig and E. Hovy},
booktitle={EMNLP/IJCNLP},
author={Xuezhe Ma and
Chunting Zhou and
Xian Li and
Graham Neubig and
Eduard H. Hovy},
publisher={Conference on Empirical Methods in Natural Language Processing},
pages={4281--4291},
year={2019}
}
@inproceedings{Guo2019NonAutoregressiveNM,
title={Non-Autoregressive Neural Machine Translation with Enhanced Decoder Input},
author={Junliang Guo and X. Tan and D. He and T. Qin and Linli Xu and T. Liu},
booktitle={AAAI},
author={Junliang Guo and
Xu Tan and
Di He and
Tao Qin and
Linli Xu and
Tie-Yan Liu},
pages={3723--3730},
publisher={AAAI Conference on Artificial Intelligence},
year={2019}
}
@article{Ran2019GuidingNN,
title={Guiding Non-Autoregressive Neural Machine Translation Decoding with Reordering Information},
author={Qiu Ran and Yankai Lin and Peng Li and J. Zhou},
journal={ArXiv},
year={2019},
volume={abs/1911.02215}
author = {Qiu Ran and
Yankai Lin and
Peng Li and
Jie Zhou},
title = {Guiding Non-Autoregressive Neural Machine Translation Decoding with
Reordering Information},
journal = {CoRR},
volume = {abs/1911.02215},
year = {2019}
}
@inproceedings{vaswani2017attention,
......@@ -6774,73 +6902,96 @@ author = {Yoshua Bengio and
@inproceedings{Wang2019NonAutoregressiveMT,
title={Non-Autoregressive Machine Translation with Auxiliary Regularization},
author={Yiren Wang and Fei Tian and D. He and T. Qin and ChengXiang Zhai and T. Liu},
booktitle={AAAI},
author={Yiren Wang and
Fei Tian and
Di He and
Tao Qin and
ChengXiang Zhai and
Tie-Yan Liu},
publisher={AAAI Conference on Artificial Intelligence},
pages={5377--5384},
year={2019}
}
@inproceedings{Kaiser2018FastDI,
title={Fast Decoding in Sequence Models using Discrete Latent Variables},
author={Łukasz Kaiser and Aurko Roy and Ashish Vaswani and Niki Parmar and S. Bengio and Jakob Uszkoreit and Noam Shazeer},
booktitle={ICML},
author={Łukasz Kaiser and Aurko Roy and Ashish Vaswani and Niki Parmar and Samy Bengio and Jakob Uszkoreit and Noam Shazeer},
publisher={International Conference on Machine Learning},
pages={2395--2404},
year={2018}
}
@article{Tu2020ENGINEEI,
title={ENGINE: Energy-Based Inference Networks for Non-Autoregressive Machine Translation},
author={Lifu Tu and Richard Yuanzhe Pang and Sam Wiseman and Kevin Gimpel},
journal={ArXiv},
year={2020},
volume={abs/2005.00850}
pages={2819--2826},
publisher={Annual Meeting of the Association for Computational Linguistics},
year={2020}
}
@inproceedings{Shu2020LatentVariableNN,
title={Latent-Variable Non-Autoregressive Neural Machine Translation with Deterministic Inference using a Delta Posterior},
author={Raphael Shu and Jason Lee and Hideki Nakayama and Kyunghyun Cho},
booktitle={AAAI},
publisher={AAAI Conference on Artificial Intelligence},
pages={8846--8853},
year={2020}
}
@inproceedings{Li2019HintBasedTF,
title={Hint-Based Training for Non-Autoregressive Machine Translation},
author={Zhuohan Li and Zi Lin and Di He and Fei Tian and Tao Qin and Liwei Wang and T. Liu},
booktitle={EMNLP/IJCNLP},
author={Zhuohan Li and
Zi Lin and
Di He and
Fei Tian and
Tao Qin and
Liwei Wang and
Tie-Yan Liu},
publisher={Conference on Empirical Methods in Natural Language Processing},
pages={5707--5712},
year={2019}
}
@inproceedings{Ho2016ModelFreeIL,
title={Model-Free Imitation Learning with Policy Optimization},
author={Jonathan Ho and J. Gupta and S. Ermon},
booktitle={ICML},
author={Jonathan Ho and
Jayesh K. Gupta and
Stefano Ermon},
publisher={International Conference on Machine Learning},
pages={2760--2769},
year={2016}
}
@inproceedings{Ho2016GenerativeAI,
title={Generative Adversarial Imitation Learning},
author={Jonathan Ho and S. Ermon},
booktitle={NIPS},
author={Jonathan Ho and Stefano Ermon},
publisher={Conference and Workshop on Neural Information Processing Systems},
pages={4565--4573},
year={2016}
}
@article{Duan2017OneShotIL,
title={One-Shot Imitation Learning},
author={Yan Duan and Marcin Andrychowicz and Bradly C. Stadie and Jonathan Ho and J. Schneider and Ilya Sutskever and P. Abbeel and W. Zaremba},
journal={ArXiv},
author={Yan Duan and Marcin Andrychowicz and Bradly C. Stadie and Jonathan Ho and Jonas Schneider and Ilya Sutskever and Pieter Abbeel and Wojciech Zaremba},
journal={CoRR},
year={2017},
volume={abs/1703.07326}
}
@inproceedings{Wang2018SemiAutoregressiveNM,
title={Semi-Autoregressive Neural Machine Translation},
author={C. Wang and Ji Zhang and Haiqing Chen},
booktitle={EMNLP},
author={Chunqi Wang and
Ji Zhang and
Haiqing Chen},
booktitle={Conference on Empirical Methods in Natural Language Processing},
pages={479--488},
year={2018}
}
@inproceedings{Ghazvininejad2019MaskPredictPD,
title={Mask-Predict: Parallel Decoding of Conditional Masked Language Models},
author={Marjan Ghazvininejad and Omer Levy and Yinhan Liu and Luke Zettlemoyer},
booktitle={EMNLP/IJCNLP},
publisher={Conference on Empirical Methods in Natural Language Processing},
pages={6111--6120},
year={2019}
}
......@@ -6853,7 +7004,9 @@ author = {Yoshua Bengio and
@article{Zhou2019SynchronousBN,
title={Synchronous Bidirectional Neural Machine Translation},
author={L. Zhou and Jiajun Zhang and C. Zong},
author={Long Zhou and
Jiajun Zhang and
Chengqing Zong},
journal={Transactions of the Association for Computational Linguistics},
year={2019},
volume={7},
......@@ -6870,8 +7023,9 @@ author = {Yoshua Bengio and
@inproceedings{Feng2016ImprovingAM,
title={Improving Attention Modeling with Implicit Distortion and Fertility for Machine Translation},
author={Shi Feng and Shujie Liu and Nan Yang and Mu Li and M. Zhou and K. Q. Zhu},
booktitle={COLING},
author={Shi Feng and Shujie Liu and Nan Yang and Mu Li and Ming Zhou and Kenny Q. Zhu},
booktitle={International Conference on Computational Linguistics},
pages={3082--3092},
year={2016}
}
......@@ -6940,7 +7094,7 @@ author = {Yoshua Bengio and
@article{Peris2017InteractiveNM,
title={Interactive neural machine translation},
author={{\'A}lvaro Peris and Miguel Domingo and F. Casacuberta},
journal={Comput. Speech Lang.},
journal={Computer Speech and Language},
year={2017},
volume={45},
pages={201-220}
......@@ -6948,8 +7102,9 @@ author = {Yoshua Bengio and
@inproceedings{Peris2018ActiveLF,
title={Active Learning for Interactive Neural Machine Translation of Data Streams},
author={{\'A}lvaro Peris and F. Casacuberta},
booktitle={CoNLL},
author={{\'A}lvaro Peris and Francisco Casacuberta},
publisher={The SIGNLL Conference on Computational Natural Language Learning},
pages={151--160},
year={2018}
}
......@@ -6974,7 +7129,7 @@ author = {Yoshua Bengio and
}
@article{61115,
author={J. {Lin}},
author={Jianhua Lin},
journal={IEEE Transactions on Information Theory},
title={Divergence measures based on the Shannon entropy},
year={1991},
......@@ -6988,13 +7143,8 @@ author = {Yoshua Bengio and
Atsushi Fujita},
title = {Recurrent Stacking of Layers for Compact Neural Machine Translation
Models},
booktitle = {The Thirty-Third {AAAI} Conference on Artificial Intelligence, {AAAI}
2019, The Thirty-First Innovative Applications of Artificial Intelligence
Conference, {IAAI} 2019, The Ninth {AAAI} Symposium on Educational
Advances in Artificial Intelligence, {EAAI} 2019, Honolulu, Hawaii,
USA, January 27 - February 1, 2019},
pages = {6292--6299},
publisher = {{AAAI} Press},
publisher = { AAAI Conference on Artificial Intelligence},
year = {2019}
}
......@@ -7082,10 +7232,8 @@ author = {Yoshua Bengio and
Dmitry Kalenichenko},
title = {Quantization and Training of Neural Networks for Efficient Integer-Arithmetic-Only
Inference},
booktitle = {2018 {IEEE} Conference on Computer Vision and Pattern Recognition,
{CVPR} 2018, Salt Lake City, UT, USA, June 18-22, 2018},
publisher = {{IEEE} Conference on Computer Vision and Pattern Recognition},
pages = {2704--2713},
publisher = {{IEEE} Computer Society},
year = {2018}
}
......@@ -7106,9 +7254,7 @@ author = {Yoshua Bengio and
Ran El-Yaniv and
Yoshua Bengio},
title = {Binarized Neural Networks},
booktitle = {Advances in Neural Information Processing Systems 29: Annual Conference
on Neural Information Processing Systems 2016, December 5-10, 2016,
Barcelona, Spain},
publisher = {Conference and Workshop on Neural Information Processing Systems},
pages = {4107--4115},
year = {2016}
}
......@@ -7131,10 +7277,8 @@ author = {Yoshua Bengio and
Muhua Zhu and
Huizhen Wang},
title = {Boosting-Based System Combination for Machine Translation},
booktitle = {{ACL} 2010, Proceedings of the 48th Annual Meeting of the Association
for Computational Linguistics, July 11-16, 2010, Uppsala, Sweden},
pages = {739--748},
publisher = {The Association for Computer Linguistics},
publisher = {Annual Meeting of the Association for Computational Linguistics},
year = {2010}
}
......@@ -7146,11 +7290,9 @@ author = {Yoshua Bengio and
Philip C. Woodland},
title = {Consensus Network Decoding for Statistical Machine Translation System
Combination},
booktitle = {Proceedings of the {IEEE} International Conference on Acoustics, Speech,
and Signal Processing, {ICASSP} 2007, Honolulu, Hawaii, USA, April
15-20, 2007},
publisher = {Proceedings of the {IEEE} International Conference on Acoustics, Speech,
and Signal Processing},
pages = {105--108},
publisher = {{IEEE}},
year = {2007}
}
......@@ -7159,9 +7301,7 @@ author = {Yoshua Bengio and
Spyridon Matsoukas and
Richard M. Schwartz},
title = {Improved Word-Level System Combination for Machine Translation},
booktitle = {{ACL} 2007, Proceedings of the 45th Annual Meeting of the Association
for Computational Linguistics, June 23-30, 2007, Prague, Czech Republic},
publisher = {The Association for Computational Linguistics},
publisher = {Annual Meeting of the Association for Computational Linguistics},
year = {2007}
}
......@@ -7172,10 +7312,8 @@ author = {Yoshua Bengio and
Richard M. Schwartz},
title = {Incremental Hypothesis Alignment for Building Confusion Networks with
Application to Machine Translation System Combination},
booktitle = {Proceedings of the Third Workshop on Statistical Machine Translation,
WMT@ACL 2008, Columbus, Ohio, USA, June 19, 2008},
publisher = {Proceedings of the Third Workshop on Statistical Machine Translation},
pages = {183--186},
publisher = {Association for Computational Linguistics},
year = {2008}
}
......@@ -7185,11 +7323,8 @@ author = {Yoshua Bengio and
Tong Xiao and
Ming Zhou},
title = {The Feature Subspace Method for SMT System Combination},
booktitle = {Proceedings of the 2009 Conference on Empirical Methods in Natural
Language Processing, {EMNLP} 2009, 6-7 August 2009, Singapore, {A}
meeting of SIGDAT, a Special Interest Group of the {ACL}},
publisher = {Conference on Empirical Methods in Natural Language Processing},
pages = {1096--1104},
publisher = {{ACL}},
year = {2009}
}
......@@ -7218,12 +7353,8 @@ author = {Yoshua Bengio and
Franz Josef Och and
Wolfgang Macherey},
title = {Lattice Minimum Bayes-Risk Decoding for Statistical Machine Translation},
booktitle = {2008 Conference on Empirical Methods in Natural Language Processing,
{EMNLP} 2008, Proceedings of the Conference, 25-27 October 2008, Honolulu,
Hawaii, USA, {A} meeting of SIGDAT, a Special Interest Group of the
{ACL}},
publisher = {Conference on Empirical Methods in Natural Language Processing},
pages = {620--629},
publisher = {{ACL}},
year = {2008}
}
......@@ -7236,10 +7367,8 @@ author = {Yoshua Bengio and
Yang Liu},
title = {Lattice-Based Recurrent Neural Network Encoders for Neural Machine
Translation},
booktitle = {Proceedings of the Thirty-First {AAAI} Conference on Artificial Intelligence,
February 4-9, 2017, San Francisco, California, {USA}},
publisher = {AAAI Conference on Artificial Intelligence},
pages = {3302--3308},
publisher = {{AAAI} Press},
year = {2017}
}
......@@ -7251,7 +7380,7 @@ author = {Yoshua Bengio and
publisher = {Proceedings of the Human Language Technology Conference of
the North American Chapter of the Association for Computational Linguistics},
pages = {464--468},
year = {2018},
year = {2018}
}
@inproceedings{WangLearning,
......@@ -7273,9 +7402,7 @@ author = {Yoshua Bengio and
Edouard Grave and
Armand Joulin},
title = {Reducing Transformer Depth on Demand with Structured Dropout},
booktitle = {8th International Conference on Learning Representations, {ICLR} 2020,
Addis Ababa, Ethiopia, April 26-30, 2020},
publisher = {OpenReview.net},
publisher = {International Conference on Learning Representations},
year = {2020}
}
......@@ -7283,16 +7410,10 @@ author = {Yoshua Bengio and
author = {Qiang Wang and
Tong Xiao and
Jingbo Zhu},
editor = {Trevor Cohn and
Yulan He and
Yang Liu},
title = {Training Flexible Depth Model by Multi-Task Learning for Neural Machine
Translation},
booktitle = {Proceedings of the 2020 Conference on Empirical Methods in Natural
Language Processing: Findings, {EMNLP} 2020, Online Event, 16-20 November
2020},
pages = {4307--4312},
publisher = {Association for Computational Linguistics},
publisher = {Conference on Empirical Methods in Natural Language Processing},
year = {2020}
}
......@@ -7303,8 +7424,7 @@ author = {Yoshua Bengio and
Furu Wei and
Ming Zhou},
title = {BERT-of-Theseus: Compressing {BERT} by Progressive Module Replacing},
journal = {CoRR},
volume = {abs/2002.02925},
publisher = {Conference on Empirical Methods in Natural Language Processing},
year = {2020}
}
......@@ -7312,9 +7432,7 @@ author = {Yoshua Bengio and
author = {Alexei Baevski and
Michael Auli},
title = {Adaptive Input Representations for Neural Language Modeling},
booktitle = {7th International Conference on Learning Representations, {ICLR} 2019,
New Orleans, LA, USA, May 6-9, 2019},
publisher = {OpenReview.net},
journal = {arXiv preprint arXiv:1809.10853},
year = {2019}
}
......@@ -7362,9 +7480,7 @@ author = {Yoshua Bengio and
Ruslan Salakhutdinov and
Quoc V. Le},
title = {Mixtape: Breaking the Softmax Bottleneck Efficiently},
booktitle = {Advances in Neural Information Processing Systems 32: Annual Conference
on Neural Information Processing Systems 2019, NeurIPS 2019, 8-14
December 2019, Vancouver, BC, Canada},
booktitle = {Conference on Neural Information Processing Systems},
pages = {15922--15930},
year = {2019}
}
......@@ -7391,11 +7507,9 @@ author = {Yoshua Bengio and
Chenglong Wang and
Tong Xiao and
Jingbo Zhu},
title = {The NiuTrans System for {WNGT} 2020 Efficiency Task},
booktitle = {Proceedings of the Fourth Workshop on Neural Generation and Translation,
NGT@ACL 2020, Online, July 5-10, 2020},
title = {The NiuTrans System for WNGT 2020 Efficiency Task},
pages = {204--210},
publisher = {Association for Computational Linguistics},
publisher = {Annual Meeting of the Association for Computational Linguistics},
year = {2020}
}
......@@ -7432,37 +7546,38 @@ author = {Yoshua Bengio and
@inproceedings{Sun2019BaiduNM,
title={Baidu Neural Machine Translation Systems for WMT19},
author={M. Sun and
B. Jiang and
H. Xiong and
Zhongjun He and
H. Wu and
Haifeng Wang},
booktitle={WMT},
author = {Meng Sun and
Bojian Jiang and
Hao Xiong and
Zhongjun He and
Hua Wu and
Haifeng Wang},
publisher={Annual Meeting of the Association for Computational Linguistics},
pages = {374--381},
year={2019}
}
@inproceedings{Wang2018TencentNM,
title={Tencent Neural Machine Translation Systems for WMT18},
author={Mingxuan Wang and
Li Gong and
Wenhuan Zhu and
J. Xie and
C. Bian},
booktitle={WMT},
author={Mingxuan Wang and
Li Gong and
Wenhuan Zhu and
Jun Xie and
Chao Bian},
publisher={Annual Meeting of the Association for Computational Linguistics},
pages={522--527},
year={2018}
}
@article{Bi2019MultiagentLF,
title={Multi-agent Learning for Neural Machine Translation},
author={Tianchi Bi and
H. Xiong and
Hao Xiong and
Zhongjun He and
H. Wu and
Hua Wu and
Haifeng Wang},
journal={ArXiv},
year={2019},
volume={abs/1909.01101}
publisher={arXiv preprint arXiv:1909.01101},
year={2019}
}
@inproceedings{DBLP:conf/aclnmt/KoehnK17,
......@@ -7476,23 +7591,73 @@ author = {Yoshua Bengio and
@inproceedings{Held2013AppliedSI,
title={Applied Statistical Inference: Likelihood and Bayes},
author={L. Held and Daniel Sabans Bov},
year={2013}
title={Applied statistical inference},
author={Leonhard Held and Saban{\'e}s Bov{\'e}, D},
volume={10},
number={978-3},
pages={16},
year={2014},
publisher={Springer}
}
@inproceedings{Zhang2016VariationalNM,
title={Variational Neural Machine Translation},
author = {Biao Zhang and
Deyi Xiong and
Jinsong Su and
Hong Duan and
Min Zhang},
pages = {521--530},
publisher = {Annual Meeting of the Association for Computational Linguistics},
year = {2016}
}
@inproceedings{Silvey2018StatisticalI,
title={Statistical Inference},
author={S. D. Silvey},
booktitle={Encyclopedia of Social Network Analysis and Mining. 2nd Ed.},
publisher={Encyclopedia of Social Network Analysis and Mining},
year={2018}
}
@inproceedings{Zhang2016VariationalNM,
title={Variational Neural Machine Translation},
author={Biao Zhang and Deyi Xiong and Jinsong Su and H. Duan and Min Zhang},
booktitle={EMNLP},
year={2016}
@inproceedings{Cheong2019transformersZ,
title={transformers.zip : Compressing Transformers with Pruning and Quantization},
author={Robin Cheong and Robel Daniel},
publisher={Stanford University},
year={2019}
}
@inproceedings{Beal2003VariationalAF,
title={Variational algorithms for approximate Bayesian inference},
author={Matthew J. Beal},
publisher={University College London},
year={2003}
}
@article{Gage1994ANA,
title={A new algorithm for data compression},
author={P. Gage},
journal={The C Users Journal archive},
year={1994},
volume={12},
pages={23-38}
}
@inproceedings{Eisner2011LearningST,
title={Learning Speed-Accuracy Tradeoffs in Nondeterministic Inference Algorithms},
author={J. Eisner and Hal Daum{\'e}},
publisher={Conference and Workshop on Neural Information Processing Systems},
year={2011}
}
@article{Kazimi2017CoverageFC,
title={Coverage for Character Based Neural Machine Translation},
author={M. Kazimi and Marta R. Costa-juss{\`a}},
journal={arXiv preprint arXiv:1810.02340},
year={2017},
volume={59},
pages={99-106}
}
%%%%% chapter 14------------------------------------------------------
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
......
Markdown 格式
0%
您添加了 0 到此讨论。请谨慎行事。
请先完成此评论的编辑!
注册 或者 后发表评论