% !Mode:: "TeX:UTF-8" % !TEX encoding = UTF-8 Unicode new %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% %%%%% chapter 1------------------------------------------------------ @book{慧立2000大慈恩寺三藏法師傳, title={大慈恩寺三藏法師傳}, author={慧立 and 彦悰 and 道宣}, volume={2}, year={2000}, publisher={中华书局} } @book{2019cns, title={2019中国语言服务行业发展报告}, author={中国翻译协会}, publisher={中国翻译协会}, year={2019} } @inproceedings{赵军峰2019深化改革, title ={深化改革 探讨创新 推进发展——全国翻译专业学位研究生教育2019年会综述}, author ={赵军峰,姚恺璇}, publisher ={中国翻译}, year ={2019}, } @book{knowlson1975universal, title={Universal Language Schemes in England and France 1600-1800}, author={James {Knowlson}}, year={1975}, publisher={University of Toronto Press} } @inproceedings{DBLP:journals/bstj/Shannon48, author = {Claude E. Shannon}, title = {A mathematical theory of communication}, publisher = {Bell System Technical Journal}, volume = {27}, number = {3}, pages = {379--423}, year = {1948} } @inproceedings{shannon1949the, title={The mathematical theory of communication}, author={Claude E. {Shannon} and Warren {Weaver}}, publisher={IEEE Transactions on Instrumentation and Measurement}, volume={13}, year={1949} } @inproceedings{weaver1955translation, title={Translation}, author={Weaver, Warren}, publisher={Machine translation of languages}, volume={14}, number={15-23}, pages={10}, year={1955}, publisher={Cambridge: Technology Press, MIT} } @inproceedings{chomsky1957syntactic, title={Syntactic Structures}, author={Chomsky, Noam}, publisher={Language}, volume={33}, number={3}, year={1957}, } @inproceedings{DBLP:conf/coling/SatoN90, author = {Satoshi Sato and Makoto Nagao}, title = {Toward Memory-based Translation}, publisher = {International Conference on Computational Linguistics}, pages = {247--252}, year = {1990} } @inproceedings{DBLP:journals/coling/BrownPPM94, author = {Peter F. Brown and Stephen Della Pietra and Vincent J. Della Pietra and Robert L. Mercer}, title = {The Mathematics of Statistical Machine Translation: Parameter Estimation}, publisher = {Computational Linguistics}, volume = {19}, number = {2}, pages = {263--311}, year = {1993} } @inproceedings{nirenburg1989knowledge, title={Knowledge-based machine translation}, author={Nirenburg, Sergei}, publisher={Machine Translation}, volume={4}, number={1}, pages={5--24}, year={1989}, publisher={Springer} } @book{hutchins1986machine, title={Machine translation: past, present, future}, author={Hutchins, William John}, year={1986}, publisher={Ellis Horwood Chichester} } @inproceedings{zarechnak1979history, title={The history of machine translation}, author={Zarechnak, Michael}, publisher={Machine Translation}, volume={1979}, pages={1--87}, year={1979} } @book{冯志伟2004机器翻译研究, title={机器翻译研究}, author={冯志伟}, publisher={中国对外翻译出版公司}, year={2004}, } @inproceedings{王宝库1991机器翻译系统中一种规则描述语言, title={机器翻译系统中一种规则描述语言(CTRDL)}, author={王宝库,张中义,姚天顺}, publisher={中文信息学报}, volume={5}, number={4}, year={1991}, } @inproceedings{唐泓英1995基于搭配词典的词汇语义驱动算法, title={基于搭配词典的词汇语义驱动算法}, author={唐泓英,姚天顺}, publisher={软件学报}, volume={6}, number={A01}, pages={78-85}, year={1995}, } @inproceedings{nagao1984framework, title={A framework of a mechanical translation between Japanese and English by analogy principle}, author={Nagao, Makoto}, publisher={Artificial and human intelligence}, pages={351--354}, year={1984} } @inproceedings{gale1993a, title={A program for aligning sentences in bilingual corpora}, author={William A. {Gale} and Kenneth W. {Church}}, publisher={Computational Linguistics}, volume={19}, number={1}, pages={75--102}, year={1993} } @inproceedings{Wu2016GooglesNM, author = {Yonghui Wu and Mike Schuster and Zhifeng Chen and Quoc V. Le and Mohammad Norouzi and Wolfgang Macherey and Maxim Krikun and Yuan Cao and Qin Gao and Klaus Macherey and Jeff Klingner and Apurva Shah and Melvin Johnson and Xiaobing Liu and Lukasz Kaiser and Stephan Gouws and Yoshikiyo Kato and Taku Kudo and Hideto Kazawa and Keith Stevens and George Kurian and Nishant Patil and Wei Wang and Cliff Young and Jason Smith and Jason Riesa and Alex Rudnick and Oriol Vinyals and Greg Corrado and Macduff Hughes and Jeffrey Dean}, title = {Google's Neural Machine Translation System: Bridging the Gap between Human and Machine Translation}, publisher = {CoRR}, volume = {abs/1609.08144}, year = {2016} } @inproceedings{DBLP:journals/corr/LuongPM15, author = {Thang Luong and Hieu Pham and Christopher D. Manning}, title = {Effective Approaches to Attention-based Neural Machine Translation}, publisher = {Conference on Empirical Methods in Natural Language Processing}, pages = {1412--1421}, year = {2015} } @inproceedings{DBLP:journals/corr/GehringAGYD17, author = {Jonas Gehring and Michael Auli and David Grangier and Denis Yarats and Yann N. Dauphin}, title = {Convolutional Sequence to Sequence Learning}, publisher = {International Conference on Machine Learning}, volume = {70}, pages = {1243--1252}, year = {2017} } @inproceedings{bahdanau2014neural, author = {Dzmitry Bahdanau and Kyunghyun Cho and Yoshua Bengio}, title = {Neural Machine Translation by Jointly Learning to Align and Translate}, publisher = {International Conference on Learning Representations}, year = {2015} } @inproceedings{NIPS2014_5346, author = {Ilya Sutskever and Oriol Vinyals and Quoc V. Le}, title = {Sequence to Sequence Learning with Neural Networks}, publisher = {Advances in Neural Information Processing Systems}, pages = {3104--3112}, year = {2014} } @book{koehn2009statistical, author = {Philipp Koehn}, title = {Statistical Machine Translation}, publisher = {Cambridge University Press}, year = {2010} } @book{DBLP:journals/corr/abs-1709-07809, author = {Philipp Koehn}, title = {Neural Machine Translation}, publisher = {Cambridge University Press}, year = {2020} } @book{宗成庆2013统计自然语言处理, title ={统计自然语言处理}, author ={宗成庆}, year ={2013}, publisher ={清华大学出版社} } @book{Goodfellow-et-al-2016, author = {Ian J. Goodfellow and Yoshua Bengio and Aaron C. Courville}, title = {Deep Learning}, publisher = {{MIT} Press}, year = {2016} } @inproceedings{goldberg2017neural, title={Neural network methods for natural language processing}, author={Goldberg, Yoav}, publisher={Synthesis Lectures on Human Language Technologies}, volume={10}, number={1}, pages={1--309}, year={2017}, publisher={Morgan \& Claypool Publishers} } @book{周志华2016机器学习, title ={机器学习}, author ={周志华}, year ={2016}, publisher ={清华大学出版社} } @book{李航2019统计学习方法, title ={统计学习方法}, author ={李航}, year ={2019}, publisher ={清华大学出版社} } @book{邱锡鹏2020神经网络与深度学习, title ={神经网络与深度学习}, author ={邱锡鹏}, publisher ={机械工业出版社}, year ={2020} } %%%%% chapter 1------------------------------------------------------ %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% %%%%% chapter 2------------------------------------------------------ @book{kolmogorov2018foundations, title ={Foundations of the theory of probability: Second English Edition}, author ={Kolmogorov, Andre Nikolaevich and Bharucha-Reid, Albert T}, year ={2018}, publisher ={Courier Dover Publications} } @book{mao-prob-book-2011, title ={概率论与数理统计教程: 第二版}, author ={魏宗舒}, year ={2011}, publisher ={北京: 高等教育出版社} } @inproceedings{resnick1992adventures, author = {Barbour, A. and Resnick, Sidney}, year = {1993}, month = {12}, pages = {1474}, title = {Adventures in Stochastic Processes.}, volume = {88}, publisher = {Journal of the American Statistical Association} } @book{liuke-markov-2004, title ={实用马尔可夫决策过程}, author ={刘克}, year ={2004}, publisher ={清华大学出版社} } @inproceedings{gale1995good, author = {William A. Gale and Geoffrey Sampson}, title = {Good-Turing Frequency Estimation Without Tears}, publisher = {Journal of Quantitative Linguistics}, volume = {2}, number = {3}, pages = {217--237}, year = {1995} } @inproceedings{good1953population, title ={The population frequencies of species and the estimation of population parameters}, author ={Good, Irving J}, publisher ={Biometrika}, volume ={40}, number ={3-4}, pages ={237--264}, year ={1953}, publisher ={Oxford University Press} } @inproceedings{kneser1995improved, author = {Reinhard Kneser and Hermann Ney}, title = {Improved backing-off for M-gram language modeling}, publisher = {International Conference on Acoustics, Speech, and Signal Processing}, pages = {181--184}, year = {1995} } @inproceedings{ney1991smoothing, title={On smoothing techniques for bigram-based natural language modelling}, author={Ney, Hermann and Essen, Ute}, publisher={International Conference on Acoustics, Speech, and Signal Processing}, pages={825--828}, year={1991}, } @inproceedings{stolcke2002srilm, author = {Andreas Stolcke}, title = {{SRILM} - an extensible language modeling toolkit}, publisher = {International Conference on Spoken Language Processing}, year = {2002} } @inproceedings{chen1999empirical, author = {Stanley F. Chen and Joshua Goodman}, title = {An empirical study of smoothing techniques for language modeling}, publisher = {Computer Speech \& Language}, volume = {13}, number = {4}, pages = {359--393}, year = {1999} } @inproceedings{ney1994structuring, author = {Hermann Ney and Ute Essen and Reinhard Kneser}, title = {On structuring probabilistic dependences in stochastic language modelling}, publisher = {Computer Speech \& Language}, volume = {8}, number = {1}, pages = {1--38}, year = {1994} } @book{parsing2009speech, author = {Dan Jurafsky and James H. Martin}, title = {Speech and language processing: an introduction to natural language processing, computational linguistics, and speech recognition, 2nd Edition}, publisher = {Prentice Hall, Pearson Education International}, year = {2009} } @book{DBLP:books/mg/CormenLR89, author = {Thomas H. Cormen and Charles E. Leiserson and Ronald L. Rivest}, title = {Introduction to Algorithms}, publisher = {The {MIT} Press and McGraw-Hill Book Company}, year = {1989} } @book{even2011graph, title={Graph algorithms}, author={Even, Shimon}, year={2011}, publisher={Cambridge University Press} } @inproceedings{tarjan1972depth, title={Depth-First Search and Linear Graph Algorithms}, author={Robert Endre {Tarjan}}, publisher={SIAM Journal on Computing}, volume={1}, number={2}, pages={146--160}, year={1972} } @inproceedings{DBLP:journals/ai/SabharwalS11, author = {Ashish Sabharwal and Bart Selman}, title = {S. Russell, P. Norvig, Artificial Intelligence: {A} Modern Approach, Third Edition}, publisher = {Artificial Intelligence}, volume = {175}, number = {5-6}, pages = {935--937}, year = {2011} } @book{sahni1978fundamentals, title={Fundamentals of Computer Algorithms}, author={Sartaj {Sahni} and Ellis {Horowitz}}, year={1978}, publisher={Computer Science Press} } @inproceedings{hart1968a, title={A Formal Basis for the Heuristic Determination of Minimum Cost Paths}, author={Peter E. {Hart} and Nils J. {Nilsson} and Bertram {Raphael}}, publisher={IEEE Transactions on Systems Science and Cybernetics}, volume={4}, number={2}, pages={100--107}, year={1968} } @book{lowerre1976the, title={The HARPY speech recognition system}, author={Bruce T. {Lowerre}}, publisher={Carnegie Mellon University}, year={1976} } @book{bishop1995neural, title={Neural networks for pattern recognition}, author={Christopher M. {Bishop}}, year={1995}, publisher={Oxford university press} } @inproceedings{åström1965optimal, title={Optimal control of Markov processes with incomplete state information}, author={Karl Johan {Åström}}, publisher={Journal of Mathematical Analysis and Applications}, volume={10}, number={1}, pages={174--205}, year={1965} } @inproceedings{korf1990real, title={Real-time heuristic search}, author={Richard E. {Korf}}, publisher={Artificial Intelligence}, volume={42}, number={2}, pages={189--211}, year={1990} } @inproceedings{DBLP:conf/emnlp/HuangZM17, author = {Liang Huang and Kai Zhao and Mingbo Ma}, title = {When to Finish? Optimal Beam Search for Neural Text Generation (modulo beam size)}, pages = {2134--2139}, publisher = {Annual Meeting of the Association for Computational Linguistics}, year = {2017} } @inproceedings{DBLP:conf/emnlp/Yang0M18, author = {Yilin Yang and Liang Huang and Mingbo Ma}, title = {Breaking the Beam Search Curse: {A} Study of (Re-)Scoring Methods and Stopping Criteria for Neural Machine Translation}, pages = {3054--3059}, publisher = {Annual Meeting of the Association for Computational Linguistics}, year = {2018} } @inproceedings{jelinek1980interpolated, title={Interpolated estimation of Markov source parameters from sparse data}, author={F. {Jelinek}}, publisher={Pattern Recognition in Practice}, pages={381--397}, year={1980} } @inproceedings{katz1987estimation, title={Estimation of probabilities from sparse data for the language model component of a speech recognizer}, author={S. {Katz}}, publisher={IEEE Transactions on Acoustics, Speech, and Signal Processing}, volume={35}, number={3}, pages={400--401}, year={1987} } @inproceedings{witten1991the, title={The zero-frequency problem: estimating the probabilities of novel events in adaptive text compression}, author={I.H. {Witten} and T.C. {Bell}}, publisher={IEEE Transactions on Information Theory}, volume={37}, number={4}, pages={1085--1094}, year={1991} } @book{bell1990text, title={Text compression}, author={Timothy C. {Bell} and John G. {Cleary} and Ian H. {Witten}}, year={1990}, publisher={Prentice Hall} } @inproceedings{goodman2001a, title={A bit of progress in language modeling}, author={Joshua T. {Goodman}}, publisher={Computer Speech \& Language}, volume={15}, number={4}, pages={403--434}, year={2001} } @inproceedings{kirchhoff2005improved, title={Improved Language Modeling for Statistical Machine Translation}, author={Katrin {Kirchhoff} and Mei {Yang}}, publisher={Annual Meeting of the Association for Computational Linguistics}, pages={125--128}, year={2005} } @inproceedings{koehn2007factored, title={Factored Translation Models}, author={Philipp {Koehn} and Hieu {Hoang}}, publisher = {Annual Meeting of the Association for Computational Linguistics}, pages={868--876}, year={2007} } @inproceedings{sarikaya2007joint, title={Joint Morphological-Lexical Language Modeling for Machine Translation}, author={Ruhi {Sarikaya} and Yonggang {Deng}}, publisher = {Annual Meeting of the Association for Computational Linguistics}, pages={145--148}, year={2007} } @inproceedings{heafield2011kenlm, title={KenLM: Faster and Smaller Language Model Queries}, author={Kenneth {Heafield}}, publisher = {Annual Meeting of the Association for Computational Linguistics}, pages={187--197}, year={2011} } @inproceedings{federico2006how, title={How Many Bits Are Needed To Store Probabilities for Phrase-Based Translation?}, author={Marcello {Federico} and Nicola {Bertoldi}}, publisher = {Annual Meeting of the Association for Computational Linguistics}, pages={94--101}, year={2006} } @inproceedings{federico2007efficient, title={Efficient Handling of N-gram Language Models for Statistical Machine Translation}, author={Marcello {Federico} and Mauro {Cettolo}}, publisher = {Annual Meeting of the Association for Computational Linguistics}, pages={88--95}, year={2007} } @inproceedings{talbot2007randomised, title={Randomised Language Modelling for Statistical Machine Translation}, author={David {Talbot} and Miles {Osborne}}, publisher = {Annual Meeting of the Association for Computational Linguistics}, pages={512--519}, year={2007} } @inproceedings{talbot2007smoothed, title={Smoothed Bloom Filter Language Models: Tera-Scale LMs on the Cheap}, author={David {Talbot} and Miles {Osborne}}, publisher = {Annual Meeting of the Association for Computational Linguistics}, pages={468--476}, year={2007} } @inproceedings{jing2019a, title={A Survey on Neural Network Language Models.}, author={Kun {Jing} and Jungang {Xu}}, publisher={arXiv preprint arXiv:1906.03591}, year={2019} } @inproceedings{bengio2003a, title={A neural probabilistic language model}, author={Yoshua {Bengio} and Réjean {Ducharme} and Pascal {Vincent} and Christian {Janvin}}, publisher={Journal of Machine Learning Research}, volume={3}, number={6}, pages={1137--1155}, year={2003} } @inproceedings{mikolov2010recurrent, author = {Tomas Mikolov and Martin Karafi{\'{a}}t and Luk{\'{a}}s Burget and Jan Cernock{\'{y}} and Sanjeev Khudanpur}, title = {Recurrent neural network based language model}, pages = {1045--1048}, publisher = {International Speech Communication Association}, year = {2010} } @inproceedings{sundermeyer2012lstm, author = {Martin Sundermeyer and Ralf Schl{\"{u}}ter and Hermann Ney}, title = {{LSTM} Neural Networks for Language Modeling}, pages = {194--197}, publisher = {International Speech Communication Association}, year = {2012} } @inproceedings{vaswani2017attention, title={Attention is All You Need}, author={Ashish {Vaswani} and Noam {Shazeer} and Niki {Parmar} and Jakob {Uszkoreit} and Llion {Jones} and Aidan N. {Gomez} and Lukasz {Kaiser} and Illia {Polosukhin}}, publisher={International Conference on Neural Information Processing}, pages={5998--6008}, year={2017} } @inproceedings{tillmann1997a, title={A DP-based Search Using Monotone Alignments in Statistical Translation}, author={Christoph {Tillmann} and Stephan {Vogel} and Hermann {Ney} and Alex {Zubiaga}}, publisher = {Morgan Kaufmann Publishers}, pages={289--296}, year={1997} } @inproceedings{DBLP:conf/acl/WangW97, author = {Ye-Yi Wang and Alex Waibel}, title = {Decoding Algorithm in Statistical Machine Translation}, pages = {366--372}, publisher = {Morgan Kaufmann Publishers}, year = {1997} } @inproceedings{DBLP:conf/acl/OchUN01, author = {Franz Josef Och and Nicola Ueffing and Hermann Ney}, title = {An Efficient A* Search Algorithm for Statistical Machine Translation}, publisher = {Proceedings of the {ACL} Workshop on Data-Driven Methods in Machine Translation}, year = {2001} } @inproceedings{germann2001fast, title={Fast Decoding and Optimal Decoding for Machine Translation}, author={Ulrich {Germann} and Michael {Jahr} and Kevin {Knight} and Daniel {Marcu} and Kenji {Yamada}}, publisher = {Morgan Kaufmann Publishers}, pages={228--235}, year={2001} } @inproceedings{germann2003greedy, title={Greedy decoding for statistical machine translation in almost linear time}, author={Ulrich {Germann}}, publisher = {Annual Meeting of the Association for Computational Linguistics}, pages={1--8}, year={2003} } @inproceedings{Koehn2007Moses, author = {Philipp Koehn and Hieu Hoang and Alexandra Birch and Chris Callison-Burch and Marcello Federico and Nicola Bertoldi and Brooke Cowan and Wade Shen and Christine Moran and Richard Zens and Chris Dyer and Ondrej Bojar and Alexandra Constantin and Evan Herbst}, title = {Moses: Open Source Toolkit for Statistical Machine Translation}, publisher = {Annual Meeting of the Association for Computational Linguistics}, year = {2007} } @inproceedings{DBLP:conf/amta/Koehn04, author = {Philipp Koehn}, title = {Pharaoh: {A} Beam Search Decoder for Phrase-Based Statistical Machine Translation Models}, volume = {3265}, pages = {115--124}, publisher = {Springer}, year = {2004} } @inproceedings{bangalore2001a, title={A finite-state approach to machine translation}, author={S. {Bangalore} and G. {Riccardi}}, publisher = {Annual Meeting of the Association for Computational Linguistics}, pages={381--388}, year={2001} } @inproceedings{DBLP:journals/mt/BangaloreR02, author = {Srinivas Bangalore and Giuseppe Riccardi}, title = {Stochastic Finite-State Models for Spoken Language Machine Translation}, publisher = {Machine Translation}, volume = {17}, number = {3}, pages = {165--184}, year = {2002} } @inproceedings{venugopal2007an, title={An Efficient Two-Pass Approach to Synchronous-CFG Driven Statistical MT}, author={Ashish {Venugopal} and Andreas {Zollmann} and Vogel {Stephan}}, publisher = {Annual Meeting of the Association for Computational Linguistics}, pages={500--507}, year={2007} } @inproceedings{zollmann2007the, author = {Andreas Zollmann and Ashish Venugopal and Matthias Paulik and Stephan Vogel}, title = {The Syntax Augmented {MT} {(SAMT)} System at the Shared Task for the 2007 {ACL} Workshop on Statistical Machine Translation}, pages = {216--219}, publisher = {Annual Meeting of the Association for Computational Linguistics}, year = {2007} } @inproceedings{liu2006tree, author = {Yang Liu and Qun Liu and Shouxun Lin}, title = {Tree-to-String Alignment Template for Statistical Machine Translation}, publisher = {Annual Meeting of the Association for Computational Linguistics}, year = {2006} } @inproceedings{galley2006scalable, author = {Michel Galley and Jonathan Graehl and Kevin Knight and Daniel Marcu and Steve DeNeefe and Wei Wang and Ignacio Thayer}, title = {Scalable Inference and Training of Context-Rich Syntactic Translation Models}, publisher = {Annual Meeting of the Association for Computational Linguistics}, year = {2006} } @inproceedings{chiang2005a, author = {David Chiang}, title = {A Hierarchical Phrase-Based Model for Statistical Machine Translation}, pages = {263--270}, publisher = {Annual Meeting of the Association for Computational Linguistics}, year = {2005} } %%%%% chapter 2------------------------------------------------------ %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% %%%%% chapter 3------------------------------------------------------ @inproceedings{ng2002discriminative, author = {Ng, Andrew Y and Jordan, Michael I}, title = {On Discriminative vs. Generative Classifiers: {A} comparison of logistic regression and naive Bayes}, pages = {841--848}, publisher = {{MIT} Press}, year = {2001}, } @inproceedings{huang2008coling, author = {Huang, Liang}, title = {Coling 2008: Advanced Dynamic Programming in Computational Linguistics: Theory, Algorithms and Applications-Tutorial notes}, year = {2008}, publisher = {International Conference on Computational Linguistics}, } @book{aho1972theory, author = {Aho, Alfred V and Ullman, Jeffrey D}, title = {The theory of parsing, translation, and compiling}, publisher = {Prentice-Hall Englewood Cliffs, NJ}, year = {1973}, } @inproceedings{rau1991extracting, title={Extracting company names from text}, author={Rau, Lisa F}, pages={29--30}, year={1991}, publisher={IEEE Conference on Artificial Intelligence Application}, } @inproceedings{张小衡1997中文机构名称的识别与分析, title={中文机构名称的识别与分析}, author={张小衡 and 王玲玲}, publisher={中文信息学报}, volume={11}, number={4}, pages={22-33}, year={1997}, } @inproceedings{lample2016neural, author = {Guillaume Lample and Miguel Ballesteros and Sandeep Subramanian and Kazuya Kawakami and Chris Dyer}, title = {Neural Architectures for Named Entity Recognition}, pages = {260--270}, publisher = {Annual Meeting of the Association for Computational Linguistics}, year = {2016}, } @inproceedings{Baum1966Statistical, title={Statistical Inference for Probabilistic Functions of Finite State Markov Chains}, author={Baum, Leonard E and Petrie, Ted}, publisher={Annals of Mathematical Stats}, volume={37}, number={6}, pages={1554-1563}, year={1966}, } @inproceedings{baum1970maximization, title={A maximization technique occurring in the statistical analysis of probabilistic functions of Markov chains}, author={Baum, Leonard E and Petrie, Ted and Soules, George and Weiss, Norman}, publisher={Annals of Mathematical Stats}, volume={41}, number={1}, pages={164--171}, year={1970}, } @inproceedings{1977Maximum, title={Maximum likelihood from incomplete data via the EM algorithm}, author={Dempster, Arthur P and Laird, Nan M and Rubin, Donald B}, publisher={Journal of the Royal Statistical Society: Series B (Methodological)}, volume={39}, number={1}, pages={1--22}, year={1977} } @inproceedings{1967Error, title={Error bounds for convolutional codes and an asymptotically optimum decoding algorithm}, author={Viterbi, Andrew}, publisher={IEEE Transactions on Information Theory}, volume={13}, number={2}, pages={260-269}, year={1967}, } @inproceedings{harrington2013机器学习实战, title={机器学习实战}, author={Harrington, Peter}, publisher={人民邮电出版社, 北京}, year={2013} } @inproceedings{brants-2000-tnt, title = {TnT - {A} Statistical Part-of-Speech Tagger}, author = {Brants, Thorsten}, year = {2000}, publisher = {Annual Meeting of the Association for Computational Linguistics}, pages = {224--231}, } @inproceedings{tsuruoka-tsujii-2005-chunk, title = {Chunk Parsing Revisited}, author = {Yoshimasa Tsuruoka and Jun'ichi Tsujii}, year = {2005}, publisher = {Annual Meeting of the Association for Computational Linguistics}, pages = {133--140}, } @inproceedings{li-etal-2003-news-oriented, title = {News-Oriented Automatic Chinese Keyword Indexing}, author = {Li, Sujian and Wang, Houfeng and Yu, Shiwen and Xin, Chengsheng}, year = {2003}, publisher = {Annual Meeting of the Association for Computational Linguistics}, pages = {92--97}, } @inproceedings{2015Bidirectional, title={Bidirectional LSTM-CRF Models for Sequence Tagging}, author={ Huang, Zhiheng and Xu, Wei and Yu, Kai }, publisher={CoRR}, year={2015}, } @inproceedings{chiu2016named, title={Named entity recognition with bidirectional LSTM-CNNs}, author={Chiu, Jason PC and Nichols, Eric}, publisher={Transactions of the Association for Computational Linguistics}, volume={4}, pages={357--370}, year={2016}, publisher={MIT Press} } @inproceedings{vzukov2018named, author = {Andrej Zukov Gregoric and Yoram Bachrach and Sam Coope}, title = {Named Entity Recognition With Parallel Recurrent Neural Networks}, pages = {69--74}, publisher = {Annual Meeting of the Association for Computational Linguistics}, year = {2018}, } @inproceedings{Li2020A, title={A Survey on Deep Learning for Named Entity Recognition}, author={Li, Jing and Sun, Aixin and Han, Jianglei and Li, Chenliang}, publisher={IEEE Transactions on Knowledge and Data Engineering}, volume={PP}, number={99}, pages={1-1}, year={2020}, } @inproceedings{devlin2019bert, title={Bert: Pre-training of deep bidirectional transformers for language understanding}, author={Devlin, Jacob and Chang, Ming-Wei and Lee, Kenton and Toutanova, Kristina}, year={2019}, pages = {4171--4186}, publisher = {Annual Meeting of the Association for Computational Linguistics}, } @inproceedings{conneau2019unsupervised, author = {Alexis Conneau and Kartikay Khandelwal and Naman Goyal and Vishrav Chaudhary and Guillaume Wenzek and Francisco Guzm{\'{a}}n and Edouard Grave and Myle Ott and Luke Zettlemoyer and Veselin Stoyanov}, title = {Unsupervised Cross-lingual Representation Learning at Scale}, pages = {8440--8451}, publisher = {Annual Meeting of the Association for Computational Linguistics}, year = {2020}, } @book{chomsky1993lectures, title={Lectures on government and binding: The Pisa lectures}, author={Chomsky, Noam}, year={1993}, publisher={Walter de Gruyter} } @inproceedings{DBLP:conf/acl/SennrichHB16a, author = {Rico Sennrich and Barry Haddow and Alexandra Birch}, title = {Neural Machine Translation of Rare Words with Subword Units}, publisher = {Annual Meeting of the Association for Computational Linguistics}, year = {2016}, } @inproceedings{刘挺1998最大概率分词问题及其解法, title={最大概率分词问题及其解法}, author={刘挺 and 吴岩 and 王开铸}, publisher={哈尔滨工业大学学报}, number={06}, pages={37-41}, year={1998}, } @inproceedings{丁洁2010基于最大概率分词算法的中文分词方法研究, title={基于最大概率分词算法的中文分词方法研究}, author={丁洁}, publisher={科技信息}, number={21}, pages={I0075--I0075}, year={2010} } @book{1995University, title = {University of Sheffield: Description of the LaSIE-II system as used for MUC-7}, author = {Kevin Humphreys and Robert J. Gaizauskas and Saliha Azzam and Charles Huyck and Brian Mitchell and Hamish Cunningham and Yorick Wilks}, publisher = {Annual Meeting of the Association for Computational Linguistics}, year = {1995}, } @inproceedings{krupka1998isoquest, title={IsoQuest Inc.: Description of the NetOwl™ Extractor System as Used for MUC-7}, author={Krupka, George and Hausman, Kevin}, publisher = {Annual Meeting of the Association for Computational Linguistics}, year={1998} } @inproceedings{DBLP:conf/muc/BlackRM98, author = {Black, William J and Rinaldi, Fabio and Mowatt, David}, title = {{FACILE:} Description of the {NE} System Used for {MUC-7}}, publisher = {Annual Meeting of the Association for Computational Linguistics}, year = {1998}, } @inproceedings{1996Hidden, title={Hidden Markov models.}, author={ Eddy, Sean R }, publisher={Current Opinion in Structural Biology}, volume={6}, number={3}, pages={361-5}, year={1996}, } @inproceedings{lafferty2001conditional, author = {John D. Lafferty and Andrew McCallum and Fernando C. N. Pereira}, title = {Conditional Random Fields: Probabilistic Models for Segmenting and Labeling Sequence Data}, pages = {282--289}, publisher = {proceedings of the Eighteenth International Conference on Machine Learning}, year = {2001}, } @book{kapur1989maximum, title={Maximum-entropy models in science and engineering}, author={Kapur, Jagat Narain}, year={1989}, publisher={John Wiley \& Sons} } @inproceedings{1998Support, title={Support vector machines}, author={Hearst, Marti A. and Dumais, Susan T and Osuna, Edgar and Platt, John and Scholkopf, Bernhard}, publisher={IEEE Intelligent Systems \& Their Applications}, volume={13}, number={4}, pages={18-28}, year={1998}, } @inproceedings{2011Natural, title={Natural Language Processing (almost) from Scratch}, author={ Collobert, Ronan and Weston, Jason and Bottou, Léon and Karlen, Michael and Kavukcuoglu, Koray and Kuksa, Pavel }, publisher={Journal of Machine Learning Research}, volume={12}, number={1}, pages={2493-2537}, year={2011}, } @book{manning2008introduction, title={Introduction to information retrieval}, author={Manning, Christopher D and Sch{\"u}tze, Hinrich and Raghavan, Prabhakar}, year={2008}, publisher={Cambridge university press} } @inproceedings{berger1996maximum, title={A maximum entropy approach to natural language processing}, author={Berger, Adam and Della Pietra, Stephen A and Della Pietra, Vincent J}, publisher={Computational linguistics}, volume={22}, number={1}, pages={39--71}, year={1996} } @book{mitchell1996m, title={Machine Learning}, author={Mitchell, Tom}, publisher={McCraw Hill}, year={1996} } @inproceedings{DBLP:conf/acl/OchN02, author = {Franz Josef Och and Hermann Ney}, title = {Discriminative Training and Maximum Entropy Models for Statistical Machine Translation}, pages = {295--302}, publisher = {Annual Meeting of the Association for Computational Linguistics}, year = {2002}, } @incollection{mohri2008speech, title={Speech recognition with weighted finite-state transducers}, author={Mohri, Mehryar and Pereira, Fernando and Riley, Michael}, pages={559--584}, year={2008}, publisher={Springer} } @inproceedings{bellman1966dynamic, title={Dynamic programming}, author={Bellman, Richard}, publisher={Science}, volume={153}, number={3731}, pages={34--37}, year={1966} } %%%%% chapter 3------------------------------------------------------ %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% %%%%% chapter 4------------------------------------------------------ @inproceedings{DBLP:conf/acl/PapineniRWZ02, author = {Kishore Papineni and Salim Roukos and Todd Ward and Wei-jing Zhu}, title = {Bleu: a Method for Automatic Evaluation of Machine Translation}, pages = {311--318}, publisher = {Annual Meeting of the Association for Computational Linguistics}, year = {2002} } @inproceedings{DBLP:journals/mt/ChurchH93, title={Good applications for crummy machine translation}, author={Church, Kenneth W and Hovy, Eduard H}, volume={8}, number={4}, pages={239--258}, year={1993}, publisher={Springer} } @inproceedings{DBLP:conf/coling/SuWC92, author = {Keh-Yih Su and Ming-Wen Wu and Jing-Shin Chang}, title = {A New Quantitative Quality Measure for Machine Translation Systems}, publisher = {International Conference on Computational Linguistics}, pages = {433--439}, year = {1992}, } @inproceedings{DBLP:conf/interspeech/TillmannVNZS97, title={Accelerated DP based search for statistical translation}, author={Tillmann, Christoph and Vogel, Stephan and Ney, Hermann and Zubiaga, Arkaitz and Sawaf, Hassan}, publisher={European Conference on Speech Communication and Technology}, year={1997} } @inproceedings{snover2006study, title={A study of translation edit rate with targeted human annotation}, author={Snover, Matthew and Dorr, Bonnie and Schwartz, Richard and Micciulla, Linnea and Makhoul, John}, publisher={Proceedings of association for machine translation in the Americas}, volume={200}, number={6}, year={2006} } @inproceedings{DBLP:conf/muc/Chinchor92, author = {Nancy Chinchor}, title = {{MUC-4} evaluation metrics}, pages = {22--29}, publisher = {Annual Meeting of the Association for Computational Linguistics}, year = {1992} } @inproceedings{DBLP:conf/emnlp/ChiangDCN08, author = {David Chiang and Steve DeNeefe and Yee Seng Chan and Hwee Tou Ng}, title = {Decomposability of Translation Metrics for Improved Evaluation and Efficient Algorithms}, pages = {610--619}, publisher = {Annual Meeting of the Association for Computational Linguistics}, year = {2008} } @inproceedings{DBLP:conf/acl/BanerjeeL05, author = {Satanjeev Banerjee and Alon Lavie}, title = {{METEOR:} An Automatic Metric for {MT} Evaluation with Improved Correlation with Human Judgments}, pages = {65--72}, publisher = {Annual Meeting of the Association for Computational Linguistics}, year = {2005} } @inproceedings{DBLP:conf/wmt/DenkowskiL10, author = {Michael J. Denkowski and Alon Lavie}, title = {{METEOR-NEXT} and the {METEOR} Paraphrase Tables: Improved Evaluation Support for Five Target Languages}, pages = {339--342}, publisher = {Annual Meeting of the Association for Computational Linguistics}, year = {2010} } @inproceedings{DBLP:conf/wmt/DenkowskiL11, author = {Michael J. Denkowski and Alon Lavie}, title = {Meteor 1.3: Automatic Metric for Reliable Optimization and Evaluation of Machine Translation Systems}, pages = {85--91}, publisher = {Annual Meeting of the Association for Computational Linguistics}, year = {2011}, } @inproceedings{DBLP:conf/wmt/DenkowskiL14, author = {Michael J. Denkowski and Alon Lavie}, title = {Meteor Universal: Language Specific Translation Evaluation for Any Target Language}, pages = {376--380}, publisher = {Annual Meeting of the Association for Computational Linguistics}, year = {2014} } @inproceedings{DBLP:journals/mt/Shiwen93, author = {Shiwen Yu}, title = {Automatic evaluation of output quality for Machine Translation systems}, publisher = {Mach. Transl.}, volume = {8}, number = {1-2}, pages = {117--126}, year = {1993} } @inproceedings{DBLP:conf/coling/ZhouWLLZZ08, author = {Ming Zhou and Bo Wang and Shujie Liu and Mu Li and Dongdong Zhang and Tiejun Zhao}, title = {Diagnostic Evaluation of Machine Translation Systems Using Automatically Constructed Linguistic Check-Points}, publisher = {International Conference on Computational Linguistics}, pages = {1121--1128}, year = {2008} } @inproceedings{DBLP:conf/acl/AlbrechtH07a, author = {Joshua Albrecht and Rebecca Hwa}, title = {A Re-examination of Machine Learning Approaches for Sentence-Level {MT} Evaluation}, publisher = {Annual Meeting of the Association for Computational Linguistics}, year = {2007} } @inproceedings{DBLP:conf/acl/AlbrechtH07, author = {Joshua Albrecht and Rebecca Hwa}, title = {Regression for Sentence-Level {MT} Evaluation with Pseudo References}, publisher = {Annual Meeting of the Association for Computational Linguistics}, year = {2007} } @inproceedings{DBLP:conf/naacl/LiuG07, author = {Ding Liu and Daniel Gildea}, title = {Source-Language Features and Maximum Correlation Training for Machine Translation Evaluation}, pages = {41--48}, publisher = {Annual Meeting of the Association for Computational Linguistics}, year = {2007} } @inproceedings{DBLP:conf/ijcnlp/GimenezM08, author = {Jes{\'{u}}s Gim{\'{e}}nez and Llu{\'{\i}}s M{\`{a}}rquez}, title = {Heterogeneous Automatic {MT} Evaluation Through Non-Parametric Metric Combinations}, pages = {319--326}, publisher = {Annual Meeting of the Association for Computational Linguistics}, year = {2008} } @inproceedings{DBLP:conf/naacl/DreyerM12, author = {Markus Dreyer and Daniel Marcu}, title = {HyTER: Meaning-Equivalent Semantics for Translation Evaluation}, pages = {162--171}, publisher = {Annual Meeting of the Association for Computational Linguistics}, year = {2012} } @inproceedings{DBLP:conf/tsd/BojarMTZ13, author = {Ondrej Bojar and Matous Mach{\'{a}}cek and Ales Tamchyna and Daniel Zeman}, title = {Scratching the Surface of Possible Translations}, volume = {8082}, pages = {465--474}, publisher = {Springer}, year = {2013} } @inproceedings{DBLP:conf/eamt/QinS15, author = {Ying Qin and Lucia Specia}, title = {Truly Exploring Multiple References for Machine Translation Evaluation}, publisher = {European Association for Machine Translation}, year = {2015} } @inproceedings{DBLP:conf/emnlp/SocherPHNM11, author = {Richard Socher and Jeffrey Pennington and Eric H. Huang and Andrew Y. Ng and Christopher D. Manning}, title = {Semi-Supervised Recursive Autoencoders for Predicting Sentiment Distributions}, pages = {151--161}, publisher = {Annual Meeting of the Association for Computational Linguistics}, year = {2011} } @inproceedings{DBLP:conf/emnlp/SocherPWCMNP13, author = {Richard Socher and Alex Perelygin and Jean Wu and Jason Chuang and Christopher D. Manning and Andrew Y. Ng and Christopher Potts}, title = {Recursive Deep Models for Semantic Compositionality Over a Sentiment Treebank}, pages = {1631--1642}, publisher = {Annual Meeting of the Association for Computational Linguistics}, year = {2013}, } @inproceedings{DBLP:journals/corr/MatsuoKS17, author = {Junki Matsuo and Mamoru Komachi and Katsuhito Sudoh}, title = {Word-Alignment-Based Segment-Level Machine Translation Evaluation using Word Embeddings}, publisher = {CoRR}, volume = {abs/1704.00380}, year = {2017} } @inproceedings{DBLP:journals/csl/GuzmanJMN17, title={Machine translation evaluation with neural networks}, author={Guzm{\'a}n, Francisco and Joty, Shafiq and M{\`a}rquez, Llu{\'\i}s and Nakov, Preslav}, publisher={Computer Speech \& Language}, volume={45}, pages={180--200}, year={2017} } @inproceedings{gamon2005sentence, title={Sentence-level MT evaluation without reference translations: Beyond language modeling}, author={Gamon, Michael and Aue, Anthony and Smets, Martine}, publisher={Proceedings of EAMT}, pages={103--111}, year={2005} } @inproceedings{DBLP:conf/lrec/Quirk04, author = {Christopher Quirk}, title = {Training a Sentence-Level Machine Translation Confidence Measure}, publisher = {European Language Resources Association}, year = {2004} } @inproceedings{DBLP:conf/icassp/JonesGSGHRW05, author = {Douglas A. Jones and Edward Gibson and Wade Shen and Neil Granoien and Martha Herzog and Douglas A. Reynolds and Clifford J. Weinstein}, title = {Measuring human readability of machine generated text: three case studies in speech recognition and machine translation}, pages = {1009--1012}, publisher = {{IEEE}}, year = {2005} } @inproceedings{DBLP:conf/eamt/ScartonZVGS15, author = {Carolina Scarton and Marcos Zampieri and Mihaela Vela and Josef van Genabith and Lucia Specia}, title = {Searching for Context: a Study on Document-Level Labels for Translation Quality Estimation}, publisher = {European Association for Machine Translation}, year = {2015} } @inproceedings{DBLP:conf/interspeech/FetterDR96, title={Word graph rescoring using confidence measures}, author={Fetter, Pablo and Dandurand, Fr{\'e}d{\'e}ric and Regel-Brietzmann, Peter}, publisher={Proceeding of Fourth International Conference on Spoken Language Processing}, volume={1}, pages={10--13}, year={1996} } @inproceedings{DBLP:conf/acl/ShenCHHWSL16, author = {Shiqi Shen and Yong Cheng and Zhongjun He and Wei He and Hua Wu and Maosong Sun and Yang Liu}, title = {Minimum Risk Training for Neural Machine Translation}, publisher = {Annual Meeting of the Association for Computational Linguistics}, year = {2016}, } @inproceedings{DBLP:conf/wmt/FreitagCR19, author = {Markus Freitag and Isaac Caswell and Scott Roy}, title = {{APE} at Scale and Its Implications on {MT} Evaluation Biases}, pages = {34--44}, publisher = {Annual Meeting of the Association for Computational Linguistics}, year = {2019} } @inproceedings{DBLP:journals/mt/BiciciGG13, title={Predicting sentence translation quality using extrinsic and language independent features}, author={Bi{\c{c}}ici, Ergun and Groves, Declan and van Genabith, Josef}, publisher={Machine Translation}, volume={27}, number={3-4}, pages={171--192}, year={2013} } @inproceedings{DBLP:conf/wmt/BiciciW14, author = {Ergun Bi{\c{c}}ici and Andy Way}, title = {Referential Translation Machines for Predicting Translation Quality}, pages = {313--321}, publisher = {Annual Meeting of the Association for Computational Linguistics}, year = {2014} } @inproceedings{DBLP:conf/wmt/BiciciLW15a, author = {Ergun Bi{\c{c}}ici and Qun Liu and Andy Way}, title = {Referential Translation Machines for Predicting Translation Quality and Related Statistics}, pages = {304--308}, publisher = {Annual Meeting of the Association for Computational Linguistics}, year = {2015} } @inproceedings{DBLP:conf/iclr/LogeswaranL18, author = {Lajanugen Logeswaran and Honglak Lee}, title = {An efficient framework for learning sentence representations}, publisher = {OpenReview.net}, year = {2018} } @inproceedings{iyyer-etal-2015-deep, title = {Deep Unordered Composition Rivals Syntactic Methods for Text Classification}, author = {Iyyer, Mohit and Manjunatha, Varun and Boyd-Graber, Jordan and Daum{\'e} III, Hal}, month = jul, year = {2015}, publisher = {Annual Meeting of the Association for Computational Linguistics}, pages = {1681--1691}, } @inproceedings{DBLP:journals/corr/abs-1301-3781, author = {Tomas Mikolov and Kai Chen and Greg Corrado and Jeffrey Dean}, title = {Efficient Estimation of Word Representations in Vector Space}, publisher = {arXiv preprint arXiv:1301.3781}, year = {2013} } @inproceedings{DBLP:conf/icml/LeM14, title={Distributed representations of sentences and documents}, author={Le, Quoc and Mikolov, Tomas}, publisher={International conference on machine learning}, pages={1188--1196}, year={2014} } @inproceedings{DBLP:conf/acl/AthiwaratkunW17, author = {Ben Athiwaratkun and Andrew Gordon Wilson}, title = {Multimodal Word Distributions}, pages = {1645--1656}, publisher = {Annual Meeting of the Association for Computational Linguistics}, year = {2017} } @inproceedings{DBLP:conf/emnlp/PenningtonSM14, author = {Jeffrey Pennington and Richard Socher and Christopher D. Manning}, title = {Glove: Global Vectors for Word Representation}, pages = {1532--1543}, publisher = {Annual Meeting of the Association for Computational Linguistics}, year = {2014} } @inproceedings{DBLP:conf/nips/KirosZSZUTF15, title={Skip-thought vectors}, author={Kiros, Ryan and Zhu, Yukun and Salakhutdinov, Russ R and Zemel, Richard and Urtasun, Raquel and Torralba, Antonio and Fidler, Sanja}, publisher={Advances in neural information processing systems}, pages={3294--3302}, year={2015} } @inproceedings{radford2018improving, title={Improving language understanding by generative pre-training}, author={Radford, Alec and Narasimhan, Karthik and Salimans, Tim and Sutskever, Ilya}, year={2018} } @inproceedings{DBLP:journals/mtcl/Carroll66, author = {John B. Carroll}, title = {An experiment in evaluating the quality of translations}, publisher = {Mech. Transl. Comput. Linguistics}, volume = {9}, number = {3-4}, pages = {55--66}, year = {1966} } @inproceedings{DBLP:conf/amta/WhiteOO94, title={The ARPA MT evaluation methodologies: evolution, lessons, and future approaches}, author={White, John S and O’Connell, Theresa A and O’Mara, Francis E}, publisher={Proceedings of the First Conference of the Association for Machine Translation in the Americas}, year={1994} } @inproceedings{king2003femti, title={FEMTI: creating and using a framework for MT evaluation}, author={King, Margaret and Popescu-Belis, Andrei and Hovy, Eduard}, publisher={Proceedings of MT Summit IX, New Orleans, LA}, pages={224--231}, year={2003} } @inproceedings{DBLP:journals/mt/PrzybockiPBS09, author = {Mark A. Przybocki and Kay Peterson and Sebastien Bronsart and Gregory A. Sanders}, title = {The {NIST} 2008 Metrics for machine translation challenge - overview, methodology, metrics, and results}, publisher = {Machine Translation}, volume = {23}, number = {2-3}, pages = {71--103}, year = {2009} } @inproceedings{reeder2006direct, title={Direct application of a language learner test to MT evaluation}, author={Reeder, Florence}, publisher={Proceedings of AMTA}, year={2006} } @inproceedings{DBLP:conf/wmt/Callison-BurchF07, author = {Chris Callison-Burch and Cameron S. Fordyce and Philipp Koehn and Christof Monz and Josh Schroeder}, title = {(Meta-) Evaluation of Machine Translation}, pages = {136--158}, publisher = {Annual Meeting of the Association for Computational Linguistics}, year = {2007} } @inproceedings{DBLP:conf/wmt/Callison-BurchK12, author = {Chris Callison-Burch and Philipp Koehn and Christof Monz and Matt Post and Radu Soricut and Lucia Specia}, title = {Findings of the 2012 Workshop on Statistical Machine Translation}, pages = {10--51}, publisher = {Annual Meeting of the Association for Computational Linguistics}, year = {2012} } @inproceedings{DBLP:conf/wmt/Lopez12, author = {Adam Lopez}, title = {Putting Human Assessments of Machine Translation Systems in Order}, pages = {1--9}, publisher = {Annual Meeting of the Association for Computational Linguistics}, year = {2012} } @inproceedings{DBLP:conf/wmt/BojarCFHHHKLMNP15, author = {Ondrej Bojar and Rajen Chatterjee and Christian Federmann and Barry Haddow and Matthias Huck and Chris Hokamp and Philipp Koehn and Varvara Logacheva and Christof Monz and Matteo Negri and Matt Post and Carolina Scarton and Lucia Specia and Marco Turchi}, title = {Findings of the 2015 Workshop on Statistical Machine Translation}, pages = {1--46}, publisher = {Annual Meeting of the Association for Computational Linguistics}, year = {2015} } @inproceedings{DBLP:conf/iwslt/Koehn12, author = {Philipp Koehn}, title = {Simulating human judgment in machine translation evaluation campaigns}, pages = {179--184}, publisher = {International Workshop on Spoken Language Translation}, year = {2012} } @inproceedings{DBLP:conf/acl/LiuG05, author = {Ding Liu and Daniel Gildea}, title = {Syntactic Features for Evaluation of Machine Translation}, pages = {25--32}, publisher = {Annual Meeting of the Association for Computational Linguistics}, year = {2005} } @inproceedings{DBLP:conf/wmt/GimenezM07a, author = {Jes{\'{u}}s Gim{\'{e}}nez and Llu{\'{\i}}s M{\`{a}}rquez}, title = {Linguistic Features for Automatic Evaluation of Heterogenous {MT} Systems}, pages = {256--264}, publisher = {Annual Meeting of the Association for Computational Linguistics}, year = {2007} } @inproceedings{DBLP:journals/mt/PadoCGJM09, author = {Sebastian Pad{\'{o}} and Daniel M. Cer and Michel Galley and Dan Jurafsky and Christopher D. Manning}, title = {Measuring machine translation quality as semantic equivalence: {A} metric based on entailment features}, publisher = {Machine Translation}, volume = {23}, number = {2-3}, pages = {181--193}, year = {2009} } @inproceedings{DBLP:conf/ssst/OwczarzakGW07, author = {Karolina Owczarzak and Josef van Genabith and Andy Way}, title = {Dependency-Based Automatic Evaluation for Machine Translation}, pages = {80--87}, publisher = {Annual Meeting of the Association for Computational Linguistics}, year = {2007}, } @inproceedings{DBLP:conf/wmt/OwczarzakGW07, author = {Karolina Owczarzak and Josef van Genabith and Andy Way}, title = {Labelled Dependencies in Machine Translation Evaluation}, pages = {104--111}, publisher = {Annual Meeting of the Association for Computational Linguistics}, year = {2007}, } @inproceedings{DBLP:conf/coling/YuWXJLL14, author = {Hui Yu and Xiaofeng Wu and Jun Xie and Wenbin Jiang and Qun Liu and Shouxun Lin}, title = {{RED:} {A} Reference Dependency Based {MT} Evaluation Metric}, pages = {2042--2051}, publisher = {Annual Meeting of the Association for Computational Linguistics}, year = {2014} } @inproceedings{DBLP:conf/wmt/PopovicN09, author = {Maja Popovic and Hermann Ney}, title = {Syntax-Oriented Evaluation Measures for Machine Translation Output}, pages = {29--32}, publisher = {Annual Meeting of the Association for Computational Linguistics}, year = {2009} } @inproceedings{DBLP:conf/acl/BanchsL11, author = {Rafael E. Banchs and Haizhou Li}, title = {{AM-FM:} {A} Semantic Framework for Translation Quality Assessment}, pages = {153--158}, publisher = {Annual Meeting of the Association for Computational Linguistics}, year = {2011} } @inproceedings{reeder2006measuring, title={Measuring MT adequacy using latent semantic analysis}, author={Reeder, Florence}, publisher={Proceedings of the 7th Conference of the Association for Machine Translation of the Americas. Cambridge, Massachusetts}, pages={176--184}, year={2006} } @inproceedings{DBLP:conf/acl/LoBSW14, author = {Chi-kiu Lo and Meriem Beloucif and Markus Saers and Dekai Wu}, title = {{XMEANT:} Better semantic {MT} evaluation without reference translations}, pages = {765--771}, publisher = {Annual Meeting of the Association for Computational Linguistics}, year = {2014} } @inproceedings{DBLP:conf/lrec/VilarXDN06, author = {David Vilar and Jia Xu and Luis Fernando D'Haro and Hermann Ney}, title = {Error Analysis of Statistical Machine Translation Output}, pages = {697--702}, publisher = {European Language Resources Association {(ELRA)}}, year = {2006} } @inproceedings{popovic2011human, title={From human to automatic error classification for machine translation output}, author={Popovic, Maja and Burchardt, Aljoscha and others}, publisher={European Association for Machine Translation}, year={2011} } @inproceedings{DBLP:journals/mt/CostaLLCC15, author = {{\^{A}}ngela Costa and Wang Ling and Tiago Lu{\'{\i}}s and Rui Correia and Lu{\'{\i}}sa Coheur}, title = {A linguistically motivated taxonomy for Machine Translation error analysis}, publisher = {Machine Translation}, volume = {29}, number = {2}, pages = {127--161}, year = {2015} } @inproceedings{lommel2014using, title={Using a new analytic measure for the annotation and analysis of MT errors on real data}, author={Lommel, Arle and Burchardt, Aljoscha and Popovic, Maja and Harris, Kim and Avramidis, Eleftherios and Uszkoreit, Hans}, publisher={European Association for Machine Translation}, pages={165--172}, year={2014} } @inproceedings{DBLP:conf/wmt/PopovicGGLNMFB06, author = {Maja Popovic and Adri{\`{a}} de Gispert and Deepa Gupta and Patrik Lambert and Hermann Ney and Jos{\'{e}} B. Mari{\~{n}}o and Marcello Federico and Rafael E. Banchs}, title = {Morpho-syntactic Information for Automatic Error Analysis of Statistical Machine Translation Output}, pages = {1--6}, publisher = {Annual Meeting of the Association for Computational Linguistics}, year = {2006} } @inproceedings{DBLP:conf/wmt/PopovicN07, author = {Maja Popovic and Hermann Ney}, title = {Word Error Rates: Decomposition over {POS} classes and Applications for Error Analysis}, pages = {48--55}, publisher = {Annual Meeting of the Association for Computational Linguistics}, year = {2007} } @inproceedings{DBLP:conf/acl/GonzalezMM13, author = {Meritxell Gonz{\'{a}}lez and Laura Mascarell and Llu{\'{\i}}s M{\`{a}}rquez}, title = {tSEARCH: Flexible and Fast Search over Automatic Translations for Improved Quality/Error Analysis}, pages = {181--186}, publisher = {Annual Meeting of the Association for Computational Linguistics}, year = {2013} } @inproceedings{coughlin2003correlating, title={Correlating automated and human assessments of machine translation quality}, author={Coughlin, Deborah}, year={2003} } @inproceedings{popescu2003experiment, title={An experiment in comparative evaluation: humans vs. computers}, author={Popescu-Belis, Andrei}, publisher={Proceedings of the Ninth Machine Translation Summit. New Orleans}, year={2003} } @inproceedings{pearson1920notes, title={Notes on the history of correlation}, author={Pearson, Karl}, publisher={Biometrika}, volume={13}, number={1}, pages={25--45}, year={1920}, publisher={JSTOR} } @inproceedings{culy2003limits, title={The limits of N-gram translation evaluation metrics}, author={Culy, Christopher and Riehemann, Susanne Z}, publisher={MT Summit IX}, pages={71--78}, year={2003} } @inproceedings{finch2004using, title={Using a paraphraser to improve machine translation evaluation}, author={Finch, Andrew and Akiba, Yasuhiro and Sumita, Eiichiro}, publisher={International Joint Conference on Natural Language Processing}, year={2004} } @inproceedings{DBLP:conf/coling/HamonM08, author = {Olivier Hamon and Djamel Mostefa}, title = {The Impact of Reference Quality on Automatic {MT} Evaluation}, publisher = {International conference on machine learning}, pages = {39--42}, year = {2008} } @inproceedings{doddington2002automatic, title={Automatic evaluation of machine translation quality using n-gram co-occurrence statistics}, publisher={Proceedings of the second international conference on Human Language Technology Research}, author={Doddington, George}, pages={138--145}, year={2002} } @inproceedings{callison2006re, title={Re-evaluation the role of bleu in machine translation research}, author={Callison-Burch, Chris and Osborne, Miles and Koehn, Philipp}, publisher={11th Conference of the European Chapter of the Association for Computational Linguistics}, year={2006} } @InProceedings{Miller:2005:MTS, author = {Keith J. Miller and Michelle Vanni}, title = {Inter-rater Agreement Measures, and the Refinement of Metrics in the PLATO MT Evaluation Paradigm}, publisher = {The tenth Machine Translation Summit}, pages = {125--132}, year = {2005} } @inproceedings{DBLP:conf/acl/Och03, author = {Franz Josef Och}, title = {Minimum Error Rate Training in Statistical Machine Translation}, pages = {160--167}, publisher = {Annual Meeting of the Association for Computational Linguistics}, year = {2003}, } @inproceedings{DBLP:conf/wmt/Post18, author = {Matt Post}, title = {A Call for Clarity in Reporting {BLEU} Scores}, pages = {186--191}, publisher = {Annual Meeting of the Association for Computational Linguistics}, year = {2018}, } @inproceedings{he2012maximum, title={Maximum expected bleu training of phrase and lexicon translation models}, author={He, Xiaodong and Deng, Li}, publisher={Annual Meeting of the Association for Computational Linguistics}, pages={292--301}, year={2012} } @inproceedings{DBLP:conf/acl/ChenG15, author = {Boxing Chen and Hongyu Guo}, title = {Representation Based Translation Evaluation Metrics}, pages = {150--155}, publisher = {Annual Meeting of the Association for Computational Linguistics}, year = {2015}, } @inproceedings{kulesza2004learning, title={A learning approach to improving sentence-level MT evaluation}, author={Kulesza, Alex and Shieber, Stuart}, publisher={Proceedings of the 10th International Conference on Theoretical and Methodological Issues in Machine Translation}, year={2004} } @inproceedings{corston2001machine, title={A machine learning approach to the automatic evaluation of machine translation}, author={Corston-Oliver, Simon and Gamon, Michael and Brockett, Chris}, publisher={Annual Meeting of the Association for Computational Linguistics}, pages={148--155}, year={2001} } @inproceedings{albrecht2008regression, title={Regression for machine translation evaluation at the sentence level}, author={Albrecht, Joshua S and Hwa, Rebecca}, volume={22}, number={1-2}, pages={1}, year={2008}, publisher={Springer} } @inproceedings{duh2008ranking, title={Ranking vs. regression in machine translation evaluation}, author={Duh, Kevin}, publisher={Proceedings of the Third Workshop on Statistical Machine Translation}, pages={191--194}, year={2008} } @inproceedings{chen2015multi, title={Multi-level evaluation for machine translation}, author={Chen, Boxing and Guo, Hongyu and Kuhn, Roland}, publisher={Proceedings of the Tenth Workshop on Statistical Machine Translation}, pages={361--365}, year={2015} } @book{DBLP:books/sp/EfronT93, author = {Bradley Efron and Robert Tibshirani}, title = {An Introduction to the Bootstrap}, publisher = {Springer}, year = {1993} } @inproceedings{DBLP:conf/emnlp/Koehn04, author = {Philipp Koehn}, title = {Statistical Significance Tests for Machine Translation Evaluation}, pages = {388--395}, publisher = {{ACL}}, year = {2004} } @book{noreen1989computer, title={Computer-intensive methods for testing hypotheses}, author={Noreen, Eric W}, year={1989}, publisher={Wiley New York} } @inproceedings{DBLP:conf/acl/RiezlerM05, author = {Stefan Riezler and John T. Maxwell III}, title = {On Some Pitfalls in Automatic Evaluation and Significance Testing for {MT}}, pages = {57--64}, publisher = {Annual Meeting of the Association for Computational Linguistics}, year = {2005} } @inproceedings{DBLP:conf/emnlp/Berg-KirkpatrickBK12, author = {Taylor Berg-Kirkpatrick and David Burkett and Dan Klein}, title = {An Empirical Investigation of Statistical Significance in {NLP}}, pages = {995--1005}, publisher = {Annual Meeting of the Association for Computational Linguistics}, year = {2012} } @inproceedings{DBLP:conf/wmt/Bicici13a, author = {Ergun Bi{\c{c}}ici}, title = {Referential Translation Machines for Quality Estimation}, pages = {343--351}, publisher = {Annual Meeting of the Association for Computational Linguistics}, year = {2013} } @inproceedings{DBLP:conf/wmt/SouzaBTN13, author = {Jos{\'{e}} Guilherme Camargo de Souza and Christian Buck and Marco Turchi and Matteo Negri}, title = {FBK-UEdin Participation to the {WMT13} Quality Estimation Shared Task}, pages = {352--358}, publisher = {Annual Meeting of the Association for Computational Linguistics}, year = {2013} } @inproceedings{DBLP:conf/wmt/SouzaGBTN14, author = {Jos{\'{e}} Guilherme Camargo de Souza and Jes{\'{u}}s Gonz{\'{a}}lez-Rubio and Christian Buck and Marco Turchi and Matteo Negri}, title = {FBK-UPV-UEdin participation in the {WMT14} Quality Estimation shared-task}, pages = {322--328}, publisher = {Annual Meeting of the Association for Computational Linguistics}, year = {2014} } @inproceedings{DBLP:conf/wmt/Espla-GomisSF15, author = {Miquel Espl{\`{a}}-Gomis and Felipe S{\'{a}}nchez-Mart{\'{\i}}nez and Mikel L. Forcada}, title = {UAlacant word-level machine translation quality estimation system at {WMT} 2015}, pages = {309--315}, publisher = {Annual Meeting of the Association for Computational Linguistics}, year = {2015} } @inproceedings{DBLP:conf/wmt/KreutzerSR15, author = {Julia Kreutzer and Shigehiko Schamoni and Stefan Riezler}, title = {QUality Estimation from ScraTCH {(QUETCH):} Deep Learning for Word-level Translation Quality Estimation}, pages = {316--322}, publisher = {Annual Meeting of the Association for Computational Linguistics}, year = {2015} } @inproceedings{DBLP:conf/wmt/MartinsAHK16, author = {Andr{\'{e}} F. T. Martins and Ram{\'{o}}n Fern{\'{a}}ndez Astudillo and Chris Hokamp and Fabio Kepler}, title = {Unbabel's Participation in the {WMT16} Word-Level Translation Quality Estimation Shared Task}, pages = {806--811}, publisher = {Annual Meeting of the Association for Computational Linguistics}, year = {2016} } @inproceedings{DBLP:conf/wmt/ChenTZXZLW17, author = {Zhiming Chen and Yiming Tan and Chenlin Zhang and Qingyu Xiang and Lilin Zhang and Maoxi Li and Mingwen Wang}, title = {Improving Machine Translation Quality Estimation with Neural Network Features}, pages = {551--555}, publisher = {Annual Meeting of the Association for Computational Linguistics}, year = {2017} } @inproceedings{kreutzer2015quality, title={Quality estimation from scratch (quetch): Deep learning for word-level translation quality estimation}, author={Kreutzer, Julia and Schamoni, Shigehiko and Riezler, Stefan}, publisher={Proceedings of the Tenth Workshop on Statistical Machine Translation}, pages={316--322}, year={2015} } @inproceedings{DBLP:conf/wmt/ScartonBSSS16, author = {Carolina Scarton and Daniel Beck and Kashif Shah and Karin Sim Smith and Lucia Specia}, title = {Word embeddings and discourse information for Quality Estimation}, pages = {831--837}, publisher = {Annual Meeting of the Association for Computational Linguistics}, year = {2016} } @inproceedings{DBLP:conf/wmt/AbdelsalamBE16, author = {Amal Abdelsalam and Ondrej Bojar and Samhaa El-Beltagy}, title = {Bilingual Embeddings and Word Alignments for Translation Quality Estimation}, pages = {764--771}, publisher = {Annual Meeting of the Association for Computational Linguistics}, year = {2016} } @inproceedings{DBLP:conf/wmt/BasuPN18, author = {Prasenjit Basu and Santanu Pal and Sudip Kumar Naskar}, title = {Keep It or Not: Word Level Quality Estimation for Post-Editing}, pages = {759--764}, publisher = {Annual Meeting of the Association for Computational Linguistics}, year = {2018} } @inproceedings{DBLP:conf/wmt/Lo19, author = {Chi-kiu Lo}, title = {YiSi - a Unified Semantic {MT} Quality Evaluation and Estimation Metric for Languages with Different Levels of Available Resources}, pages = {507--513}, publisher = {Annual Meeting of the Association for Computational Linguistics}, year = {2019} } @inproceedings{DBLP:conf/wmt/YankovskayaTF19, author = {Elizaveta Yankovskaya and Andre T{\"{a}}ttar and Mark Fishel}, title = {Quality Estimation and Translation Metrics via Pre-trained Word and Sentence Embeddings}, pages = {101--105}, publisher = {Annual Meeting of the Association for Computational Linguistics}, year = {2019} } @inproceedings{DBLP:conf/wmt/Qi19, author = {Hou Qi}, title = {{NJU} Submissions for the {WMT19} Quality Estimation Shared Task}, pages = {95--100}, publisher = {Annual Meeting of the Association for Computational Linguistics}, year = {2019} } @inproceedings{DBLP:conf/wmt/ZhouZH19, author = {Junpei Zhou and Zhisong Zhang and Zecong Hu}, title = {{SOURCE:} SOURce-Conditional Elmo-style Model for Machine Translation Quality Estimation}, pages = {106--111}, publisher = {Annual Meeting of the Association for Computational Linguistics}, year = {2019} } @inproceedings{DBLP:conf/wmt/Hokamp17, author = {Chris Hokamp}, title = {Ensembling Factored Neural Machine Translation Models for Automatic Post-Editing and Quality Estimation}, pages = {647--654}, publisher = {Annual Meeting of the Association for Computational Linguistics}, year = {2017} } @inproceedings{DBLP:conf/wmt/KimLKN19, author = {Hyun Kim and Joon-Ho Lim and Hyun-Ki Kim and Seung-Hoon Na}, title = {{QE} {BERT:} Bilingual {BERT} Using Multi-task Learning for Neural Quality Estimation}, pages = {85--89}, publisher = {Annual Meeting of the Association for Computational Linguistics}, year = {2019} } @inproceedings{DBLP:conf/wmt/HildebrandV13, author = {Silja Hildebrand and Stephan Vogel}, title = {{MT} Quality Estimation: The {CMU} System for WMT'13}, pages = {373--379}, publisher = {Annual Meeting of the Association for Computational Linguistics}, year = {2013} } @inproceedings{kepler2019unbabel, title={Unbabel's Participation in the WMT19 Translation Quality Estimation Shared Task}, pages={78--84}, author={Kepler, F{\'a}bio and Tr{\'e}nous, Jonay and Treviso, Marcos and Vera, Miguel and G{\'o}is, Ant{\'o}nio and Farajian, M Amin and Lopes, Ant{\'o}nio V and Martins, Andr{\'e} FT}, year={2019} } @inproceedings{martins2016unbabel, title={Unbabel’s participation in the wmt16 word-level translation quality estimation shared task}, author={Martins, Andr{\'e} FT and Astudillo, Ram{\'o}n and Hokamp, Chris and Kepler, Fabio}, publisher={Proceedings of the First Conference on Machine Translation}, pages={806--811}, year={2016} } @inproceedings{DBLP:conf/wmt/ShahLPBBBS15, author = {Kashif Shah and Varvara Logacheva and Gustavo Paetzold and Fr{\'{e}}d{\'{e}}ric Blain and Daniel Beck and Fethi Bougares and Lucia Specia}, title = {{SHEF-NN:} Translation Quality Estimation with Neural Networks}, pages = {342--347}, publisher = {Annual Meeting of the Association for Computational Linguistics}, year = {2015} } @book{huang2019machine, title={Machine Translation: 15th China Conference, CCMT 2019, Nanchang, China, September 27--29, 2019, Revised Selected Papers}, author={Huang, Shujian and Knight, Kevin}, volume={1104}, year={2019}, publisher={Springer Nature} } @inproceedings{akaike1974new, title={A new look at the statistical model identification}, author={Akaike, Hirotugu}, volume={19}, number={6}, pages={716--723}, year={1974}, publisher={IEEE} } @inproceedings{wang2019niutrans, title={NiuTrans Submission for CCMT19 Quality Estimation Task}, author={Wang, Ziyang and Liu, Hui and Chen, Hexuan and Feng, Kai and Wang, Zeyang and Li, Bei and Xu, Chen and Xiao, Tong and Zhu, Jingbo}, pages={82--92}, year={2019}, publisher={Springer} } @book{jurafsky2000speech, title={Speech \& language processing}, author={Jurafsky, Dan}, year={2000}, publisher={Pearson Education India} } %%%%% chapter 4------------------------------------------------------ %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% %%%%% chapter 5------------------------------------------------------ @inproceedings{brown1990statistical, author = {Peter F. Brown and John Cocke and Stephen Della Pietra and Vincent J. Della Pietra and Frederick Jelinek and John D. Lafferty and Robert L. Mercer and Paul S. Roossin}, title = {A Statistical Approach to Machine Translation}, publisher = {Computational Linguistics}, volume = {16}, number = {2}, pages = {79--85}, year = {1990} } @inproceedings{knight1999decoding, author = {Kevin Knight}, title = {Decoding Complexity in Word-Replacement Translation Models}, publisher = {Computational Linguistics}, volume = {25}, number = {4}, pages = {607--615}, year = {1999} } @inproceedings{shannon1949communication, title ={Communication theory of secrecy systems}, author ={Claude Elwood Shannon}, publisher ={Bell system technical journal}, volume ={28}, number ={4}, pages ={656--715}, year ={1949} } @inproceedings{DBLP:conf/acl/Moore04, author = {Robert C. Moore}, title = {Improving {IBM} Word Alignment Model 1}, pages = {518--525}, publisher = {Annual Meeting of the Association for Computational Linguistics}, year = {2004} } @inproceedings{肖桐1991面向统计机器翻译的重对齐方法研究, title={面向统计机器翻译的重对齐方法研究}, author={肖桐 and 李天宁 and 陈如山 and 朱靖波 and 王会珍}, publisher ={中文信息学报}, volume={24}, number={110--116}, year={2010}, } @inproceedings{2005Improvin, author = {Hua Wu and Haifeng Wang}, title = {Improving Statistical Word Alignment with Ensemble Methods}, volume = {3651}, pages = {462--473}, publisher = {International Joint Conference on Natural Language Processing}, year = {2005} } @inproceedings{1998Grammar, title={Grammar Inference and Statistical Machine Translation}, author={Ye-Yi Wang and Wayne Ward}, year={1999}, publisher={Carnegie Mellon University} } @inproceedings{DBLP:conf/acl-vlc/DaganCG93, author = {Ido Dagan and Kenneth Ward Church and Willian Gale}, title = {Robust Bilingual Word Alignment for Machine Aided Translation}, publisher = {Very Large Corpora}, year = {1993} } @inproceedings{DBLP:conf/naacl/GaleC91, author = {William A. Gale and Kenneth Ward Church}, title = {Identifying Word Correspondences in Parallel Texts}, publisher = {Morgan Kaufmann}, year = {1991} } @inproceedings{DBLP:conf/naacl/LiangTK06, author = {Percy Liang and Benjamin Taskar and Dan Klein}, title = {Alignment by Agreement}, publisher = {Annual Meeting of the Association for Computational Linguistics}, year = {2006} } @inproceedings{DBLP:conf/naacl/DyerCS13, author = {Chris Dyer and Victor Chahuneau and Noah A. Smith}, title = {A Simple, Fast, and Effective Reparameterization of {IBM} Model 2}, pages = {644--648}, publisher = {Annual Meeting of the Association for Computational Linguistics}, year = {2013} } @inproceedings{DBLP:conf/acl/DeNeroK07, author = {John DeNero and Dan Klein}, title = {Tailoring Word Alignments to Syntactic Machine Translation}, publisher = {Annual Meeting of the Association for Computational Linguistics}, year = {2007} } @inproceedings{paul2007all, author = {Paul C Davis,Zhuli Xie and Kevin Small}, publisher={Machine Translation Summit XI}, title = {All Links are not the Same: Evaluating Word Alignments for Statistical Machine Translation}, year = {2007} } @inproceedings{黄书剑2009一种错误敏感的词对齐评价方法, title={一种错误敏感的词对齐评价方法}, author={黄书剑 and 奚宁 and 赵迎功 and 戴新宇 and 陈家骏}, publisher ={中文信息学报}, volume={23}, number={88-94}, year={2009} } @inproceedings{DBLP:journals/coling/FraserM07, author = {Alexander Fraser and Daniel Marcu}, title = {Measuring Word Alignment Quality for Statistical Machine Translation}, publisher = {Computational Linguistics}, volume = {33}, number = {3}, pages = {293--303}, year = {2007} } @inproceedings{DBLP:journals/corr/FengLLZ16, author = {Shi Feng and Shujie Liu and Mu Li and Ming Zhou}, title = {Implicit Distortion and Fertility Models for Attention-based Encoder-Decoder {NMT} Model}, publisher = {CoRR}, volume = {abs/1601.03317}, year = {2016} } @inproceedings{DBLP:conf/coling/UdupaFM04, author = {Raghavendra Udupa and Tanveer A. Faruquie and Hemanta Kumar Maji}, title = {An Algorithmic Framework for Solving the Decoding Problem in Statistical Machine Translation}, year = {2004}, publisher = {International Conference on Computational Linguistics} } @inproceedings{DBLP:conf/naacl/RiedelC09, author = {Sebastian Riedel and James Clarke}, title = {Revisiting Optimal Decoding for Machine Translation {IBM} Model 4}, publisher = {Annual Meeting of the Association for Computational Linguistics}, year = {2009}, } @inproceedings{DBLP:conf/eacl/UdupaM06, author = {Raghavendra Udupa and Hemanta Kumar Maji}, title = {Computational Complexity of Statistical Machine Translation}, publisher = {Annual Meeting of the Association for Computational Linguistics}, year = {2006} } @inproceedings{DBLP:conf/emnlp/LeuschMN08, author = {Gregor Leusch and Evgeny Matusov and Hermann Ney}, title = {Complexity of Finding the BLEU-optimal Hypothesis in a Confusion Network}, pages = {839--847}, publisher = {Annual Meeting of the Association for Computational Linguistics}, year = {2008} } @inproceedings{DBLP:journals/mt/FlemingKN15, author = {Noah Fleming and Antonina Kolokolova and Renesa Nizamee}, title = {Complexity of alignment and decoding problems: restrictions and approximations}, publisher = {Machine Translation}, volume = {29}, number = {3-4}, pages = {163--187}, year = {2015} } %%%%% chapter 5------------------------------------------------------ %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% %%%%% chapter 6------------------------------------------------------ @inproceedings{ittycheriah2005maximum, author = {Abraham Ittycheriah and Salim Roukos}, title = {A Maximum Entropy Word Aligner for Arabic-English Machine Translation}, publisher = {Annual Meeting of the Association for Computational Linguistics}, year = {2005} } @inproceedings{koehn2003statistical, author = {Philipp Koehn and Franz Josef Och and Daniel Marcu}, title = {Statistical Phrase-Based Translation}, publisher = {Annual Meeting of the Association for Computational Linguistics}, year = {2003} } @book{manning1999foundations, title ={Foundations of statistical natural language processing}, author ={Manning, Christopher D and Manning, Christopher D and Sch{\"u}tze, Hinrich}, year ={1999}, publisher ={Massachusetts Institute of Technology Press} } @inproceedings{och2003systematic, author = {Franz Josef Och and Hermann Ney}, title = {A Systematic Comparison of Various Statistical Alignment Models}, publisher = {Computational Linguistics}, volume = {29}, number = {1}, pages = {19--51}, year = {2003} } @inproceedings{och2004alignment, author = {Franz Josef Och and Hermann Ney}, title = {The Alignment Template Approach to Statistical Machine Translation}, publisher = {Computational Linguistics}, volume = {30}, number = {4}, pages = {417--449}, year = {2004} } @inproceedings{vogel1996hmm, author = {Stephan Vogel and Hermann Ney and Christoph Tillmann}, title = {HMM-Based Word Alignment in Statistical Translation}, publisher = {International Conference on Computational Linguistics}, pages = {836--841}, year = {1996} } @inproceedings{xiao2013unsupervised, title ={Unsupervised sub-tree alignment for tree-to-tree translation}, author ={Tong Xiao and Jingbo Zhu}, publisher ={Journal of Artificial Intelligence Research}, volume ={48}, pages ={733--782}, year ={2013} } @inproceedings{1966Decentering, author = {Brown D.C.}, title = {Decentering Distortion of Lenses}, publisher = {Photogrammetric Engineering}, volume = {32}, pages = {444--462}, year = {1966} } @inproceedings{ClausF05, author = {David Claus and Andrew W. Fitzgibbon}, title = {A Rational Function Lens Distortion Model for General Cameras}, pages = {213--219}, publisher = {{IEEE} Computer Society Conference on Computer Vision and Pattern Recognition}, year = {2005}, } @inproceedings{ChiangLMMRS05, author = {David Chiang and Adam Lopez and Nitin Madnani and Christof Monz and Philip Resnik and Michael Subotin}, title = {The Hiero Machine Translation System: Extensions, Evaluation, and Analysis}, pages = {779--786}, publisher = {Annual Meeting of the Association for Computational Linguistics}, year = {2005}, } %%%%% chapter 6------------------------------------------------------ %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% %%%%% chapter 7------------------------------------------------------ @inproceedings{DBLP:journals/tit/Viterbi67, author = {Andrew J. Viterbi}, title = {Error bounds for convolutional codes and an asymptotically optimum decoding algorithm}, publisher = {IEEE Transactions on Information Theory}, volume = {13}, number = {2}, pages = {260--269}, year = {1967} } @inproceedings{DBLP:conf/acl/OchN02, author = {Franz Josef Och and Hermann Ney}, title = {Discriminative Training and Maximum Entropy Models for Statistical Machine Translation}, pages = {295--302}, publisher = {Annual Meeting of the Association for Computational Linguistics}, year = {2002} } @inproceedings{koehn2000estimating, author = {Philipp Koehn and Kevin Knight}, title = {Estimating Word Translation Probabilities from Unrelated Monolingual Corpora Using the {EM} Algorithm}, pages = {711--715}, publisher = {AAAI Press}, year = {2000} } @inproceedings{taskar2005a, author = {Benjamin Taskar and Simon Lacoste-Julien and Dan Klein}, title = {A Discriminative Matching Approach to Word Alignment}, pages = {73--80}, publisher = {Annual Meeting of the Association for Computational Linguistics}, year = {2005} } @inproceedings{DBLP:conf/coling/OchN00, author = {Franz Josef Och and Hermann Ney}, title = {A Comparison of Alignment Models for Statistical Machine Translation}, pages = {1086--1090}, publisher = {Morgan Kaufmann}, year = {2000} } @inproceedings{koehn2002learning, author = {Kevin Knight}, title = {Learning a translation lexicon from monolingual corpora}, publisher = {Annual Meeting of the Association for Computational Linguistics}, year = {2002}, pages = {9--16} } @inproceedings{Gros2008MSD, author ={Gros, Jerneja {\v{Z}}ganec}, title ={MSD Recombination Method in Statistical Machine Translation}, volume ={1060}, pages ={186--189}, publisher ={American Institute of Physics}, year ={2008}, } @inproceedings{xiong2006maximum, author = {Deyi Xiong and Qun Liu and Shouxun Lin}, title = {Maximum Entropy Based Phrase Reordering Model for Statistical Machine Translation}, publisher = {Annual Meeting of the Association for Computational Linguistics}, year = {2006} } @inproceedings{DBLP:conf/naacl/KumarB05, author = {Shankar Kumar and William J. Byrne}, title = {Local Phrase Reordering Models for Statistical Machine Translation}, pages = {161--168}, publisher = {Annual Meeting of the Association for Computational Linguistics}, year = {2005} } @inproceedings{li-etal-2014-neural, author = {Peng Li and Yang Liu and Maosong Sun and Tatsuya Izuha and Dakun Zhang}, title = {A Neural Reordering Model for Phrase-based Translation}, pages = {1897--1907}, publisher = {Annual Meeting of the Association for Computational Linguistics}, year = {2014} } @inproceedings{powell1964an, author = {M. J. D. Powell}, title = {An efficient method for finding the minimum of a function of several variables without calculating derivatives}, publisher = {The Computer Journal}, volume = {7}, number = {2}, pages = {155--162}, year = {1964} } @inproceedings{DBLP:conf/emnlp/ChiangMR08, author = {David Chiang and Yuval Marton and Philip Resnik}, title = {Online Large-Margin Training of Syntactic and Structural Translation Features}, pages = {224--233}, publisher = {Annual Meeting of the Association for Computational Linguistics}, year = {2008} } @inproceedings{Hopkins2011Tuning, author = {Mark Hopkins and Jonathan May}, title = {Tuning as Ranking}, pages = {1352--1362}, publisher = {Annual Meeting of the Association for Computational Linguistics}, year = {2011} } @inproceedings{DBLP:conf/acl/KleinM03, author = {Dan Klein and Christopher D. Manning}, title = {Accurate Unlexicalized Parsing}, pages = {423--430}, publisher = {Annual Meeting of the Association for Computational Linguistics}, year = {2003} } @inproceedings{DBLP:conf/acl/OchW98, author = {Franz Josef Och and Hans Weber}, title = {Improving Statistical Natural Language Translation with Categories and Rules}, pages = {985--989}, publisher = {Annual Meeting of the Association for Computational Linguistics}, year = {1998} } @phdthesis{DBLP:phd/dnb/Och02, author = {Franz Josef Och}, title = {Statistical machine translation: from single word models to alignment templates}, publisher = {{RWTH} Aachen University, Germany}, year = {2002} } @inproceedings{DBLP:conf/acl/WangW98, author = {Ye-Yi Wang and Alex Waibel}, title = {Modeling with Structures in Statistical Machine Translation}, pages = {1357--1363}, publisher = {Annual Meeting of the Association for Computational Linguistics}, year = {1998} } @inproceedings{DBLP:conf/acl/WatanabeSO03, author = {Taro Watanabe and Eiichiro Sumita and Hiroshi G. Okuno}, title = {Chunk-Based Statistical Translation}, pages = {303--310}, publisher = {Annual Meeting of the Association for Computational Linguistics}, year = {2003} } @inproceedings{DBLP:conf/acl/Marcu01, author = {Daniel Marcu}, title = {Towards a Unified Approach to Memory- and Statistical-Based Machine Translation}, pages = {378--385}, publisher = {Morgan Kaufmann Publishers}, year = {2001} } @inproceedings{DBLP:conf/ki/ZensON02, author = {Richard Zens and Franz Josef Och and Hermann Ney}, title = {Phrase-Based Statistical Machine Translation}, pages = {18--32}, publisher = {Annual Conference on Artificial Intelligence}, year = {2002} } @inproceedings{DBLP:conf/naacl/ZensN04, author = {Richard Zens and Hermann Ney}, title = {Improvements in Phrase-Based Statistical Machine Translation}, pages = {257--264}, publisher = {Annual Meeting of the Association for Computational Linguistics}, year = {2004} } @inproceedings{DBLP:conf/emnlp/MarcuW02, author = {Daniel Marcu and Daniel Wong}, title = {A Phrase-Based, Joint Probability Model for Statistical Machine Translation}, publisher = {Conference on Empirical Methods in Natural Language Processing}, pages = {133--139}, year = {2002} } @inproceedings{DBLP:conf/wmt/DeNeroGZK06, author = {John DeNero and Dan Gillick and James Zhang and Dan Klein}, title = {Why Generative Phrase Models Underperform Surface Heuristics}, pages = {31--38}, publisher = {Annual Meeting of the Association for Computational Linguistics}, year = {2006} } @inproceedings{german2011bilingual, author = {German Sanchis-Trilles and Daniel Ortiz-Martinez and Jesus Gonzalez-Rubio and Jorge Gonzalez and Francisco Casacuberta}, title = {Bilingual segmentation for phrasetable pruning in Statistical Machine Translation}, pages = {257--264}, publisher = {Conference of the European Association for Machine Translation}, year = {2011} } @inproceedings{DBLP:conf/coling/BlackwoodGB08, author = {Graeme W. Blackwood and Adri{\`{a}} de Gispert and William Byrne}, title = {Phrasal Segmentation Models for Statistical Machine Translation}, publisher = {International Conference on Computational Linguistics}, pages = {19--22}, year = {2008} } @inproceedings{DBLP:conf/naacl/XiongZL10, author = {Deyi Xiong and Min Zhang and Haizhou Li}, title = {Learning Translation Boundaries for Phrase-Based Decoding}, pages = {136--144}, publisher = {Annual Meeting of the Association for Computational Linguistics}, year = {2010} } @inproceedings{DBLP:conf/naacl/Tillman04, author = {Christoph Tillman}, title = {A Unigram Orientation Model for Statistical Machine Translation}, publisher = {Annual Meeting of the Association for Computational Linguistics}, year = {2004} } @inproceedings{DBLP:conf/acl/NagataSYO06, author = {Masaaki Nagata and Kuniko Saito and Kazuhide Yamamoto and Kazuteru Ohashi}, title = {A Clustered Global Phrase Reordering Model for Statistical Machine Translation}, publisher = {Annual Meeting of the Association for Computational Linguistics}, year = {2006} } @inproceedings{DBLP:conf/wmt/ZensN06, author = {Richard Zens and Hermann Ney}, title = {Discriminative Reordering Models for Statistical Machine Translation}, pages = {55--63}, publisher = {Annual Meeting of the Association for Computational Linguistics}, year = {2006} } @inproceedings{DBLP:conf/naacl/GreenGM10, author = {Spence Green and Michel Galley and Christopher D. Manning}, title = {Improved Models of Distortion Cost for Statistical Machine Translation}, pages = {867--875}, publisher = {Annual Meeting of the Association for Computational Linguistics}, year = {2010} } @inproceedings{DBLP:conf/naacl/Cherry13, author = {Colin Cherry}, title = {Improved Reordering for Phrase-Based Translation using Sparse Features}, pages = {22--31}, publisher = {Annual Meeting of the Association for Computational Linguistics}, year = {2013} } @inproceedings{DBLP:conf/wmt/HuckWRN13, author = {Matthias Huck and Joern Wuebker and Felix Rietig and Hermann Ney}, title = {A Phrase Orientation Model for Hierarchical Machine Translation}, pages = {452--463}, publisher = {Annual Meeting of the Association for Computational Linguistics}, year = {2013} } @inproceedings{matthias2012discriminative, author = {Matthias Huck and Stephan Peitz and Markus Freitag and Hermann Ney}, title = {Discriminative Reordering Extensions for Hierarchical Phrase-Based Machine Translation }, publisher = {International Conference on Material Engineering and Advanced Manufacturing Technology}, year = {2012} } @inproceedings{vinh2009improving, author = {Vinh Van Nguyen and Akira Shimazu and Minh Le Nguyen and Thai Phuong Nguyen}, title = {Improving a Lexicalized Hierarchical Reordering Model Using Maximum Entropy}, publisher = {Machine Translation Summit XII}, year = {2009} } @inproceedings{DBLP:journals/coling/BisazzaF16, author = {Arianna Bisazza and Marcello Federico}, title = {A Survey of Word Reordering in Statistical Machine Translation: Computational Models and Language Phenomena}, publisher = {Computational Linguistics}, volume = {42}, number = {2}, pages = {163--205}, year = {2016} } @inproceedings{DBLP:conf/coling/XiaM04, author = {Fei Xia and Michael C. McCord}, title = {Improving a Statistical {MT} System with Automatically Learned Rewrite Patterns}, publisher = {International Conference on Computational Linguistics}, year = {2004} } @inproceedings{DBLP:conf/acl/CollinsKK05, author = {Michael Collins and Philipp Koehn and Ivona Kucerova}, title = {Clause Restructuring for Statistical Machine Translation}, pages = {531--540}, publisher = {Annual Meeting of the Association for Computational Linguistics}, year = {2005} } @inproceedings{DBLP:conf/emnlp/WangCK07, author = {Chao Wang and Michael Collins and Philipp Koehn}, title = {Chinese Syntactic Reordering for Statistical Machine Translation}, pages = {737--745}, publisher = {Annual Meeting of the Association for Computational Linguistics}, year = {2007} } @inproceedings{DBLP:conf/ijcnlp/WuSDTN11, author = {Xianchao Wu and Katsuhito Sudoh and Kevin Duh and Hajime Tsukada and Masaaki Nagata}, title = {Extracting Pre-ordering Rules from Predicate-Argument Structures}, pages = {29--37}, publisher = {Annual Meeting of the Association for Computational Linguistics}, year = {2011} } @inproceedings{DBLP:conf/coling/TillmannN00, author = {Christoph Tillmann and Hermann Ney}, title = {Word Re-ordering and DP-based Search in Statistical Machine Translation}, pages = {850--856}, publisher = {Morgan Kaufmann}, year = {2000} } @inproceedings{DBLP:conf/iwslt/ShenDA06a, author = {Wade Shen and Brian Delaney and Timothy R. Anderson}, title = {An efficient graph search decoder for phrase-based statistical machine translation}, pages = {197--204}, publisher = {International Symposium on Computer Architecture}, year = {2006} } @inproceedings{robert2007faster, author = {Robert C. Moore and Chris Quirk}, title = {Faster Beam-Search Decoding for Phrasal Statistical Machine Translation}, publisher = {Machine Translation Summit XI}, year = {2007} } @inproceedings{DBLP:conf/acl/HeafieldKM14, author = {Kenneth Heafield and Michael Kayser and Christopher D. Manning}, title = {Faster Phrase-Based Decoding by Refining Feature State}, pages = {130--135}, publisher = {Annual Meeting of the Association for Computational Linguistics}, year = {2014} } @inproceedings{DBLP:conf/acl/WuebkerNZ12, author = {Joern Wuebker and Hermann Ney and Richard Zens}, title = {Fast and Scalable Decoding with Language Model Look-Ahead for Phrase-based Statistical Machine Translation}, pages = {28--32}, publisher = {Annual Meeting of the Association for Computational Linguistics}, year = {2012} } @inproceedings{DBLP:conf/iwslt/ZensN08, author = {Richard Zens and Hermann Ney}, title = {Improvements in dynamic programming beam search for phrase-based statistical machine translation}, pages = {198--205}, publisher = {International Symposium on Computer Architecture}, year = {2008} } @inproceedings{och2004smorgasbord, author = {Franz Josef Och and Daniel Gildea and Sanjeev Khudanpur and Anoop Sarkar and Kenji Yamada and Alexander M. Fraser and Shankar Kumar and Libin Shen and David Smith and Katherine Eng and Viren Jain and Zhen Jin and Dragomir R. Radev}, title = {A Smorgasbord of Features for Statistical Machine Translation}, pages = {161--168}, publisher = {Annual Meeting of the Association for Computational Linguistics}, year = {2004} } @inproceedings{Chiang200911, author = {David Chiang and Kevin Knight and Wei Wang}, title = {11,001 New Features for Statistical Machine Translation}, pages = {218--226}, publisher = {Annual Meeting of the Association for Computational Linguistics}, year = {2009} } @inproceedings{gildea2003loosely, author = {Daniel Gildea}, title = {Loosely Tree-Based Alignment for Machine Translation}, pages = {80--87}, publisher = {Annual Meeting of the Association for Computational Linguistics}, year = {2003} } @inproceedings{Blunsom2008A, author = {Phil Blunsom and Trevor Cohn and Miles Osborne}, title = {A Discriminative Latent Variable Model for Statistical Machine Translation}, pages = {200--208}, publisher = {Annual Meeting of the Association for Computational Linguistics}, year = {2008} } @inproceedings{Blunsom2009A, author = {Phil Blunsom and Trevor Cohn and Chris Dyer and Miles Osborne}, title = {A Gibbs Sampler for Phrasal Synchronous Grammar Induction}, pages = {782--790}, publisher = {Annual Meeting of the Association for Computational Linguistics}, year = {2009} } @inproceedings{Cohn2009A, author = {Trevor Cohn and Phil Blunsom}, title = {A Bayesian Model of Syntax-Directed Tree to String Grammar Induction}, pages = {352--361}, publisher = {Annual Meeting of the Association for Computational Linguistics}, year = {2009} } @inproceedings{smith2006minimum, author = {David A. Smith and Jason Eisner}, title = {Minimum Risk Annealing for Training Log-Linear Models}, publisher = {Annual Meeting of the Association for Computational Linguistics}, year = {2006} } @inproceedings{li2009first, author = {Zhifei Li and Jason Eisner}, title = {First- and Second-Order Expectation Semirings with Applications to Minimum-Risk Training on Translation Forests}, pages = {40--51}, publisher = {Annual Meeting of the Association for Computational Linguistics}, year = {2009} } @inproceedings{watanabe2007online, author = {Taro Watanabe and Jun Suzuki and Hajime Tsukada and Hideki Isozaki}, title = {Online Large-Margin Training for Statistical Machine Translation}, pages = {764--773}, publisher = {Annual Meeting of the Association for Computational Linguistics}, year = {2007}, } @inproceedings{dreyer2015apro, author = {Markus Dreyer and Yuanzhe Dong}, title = {{APRO:} All-Pairs Ranking Optimization for {MT} Tuning}, pages = {1018--1023}, publisher = {Annual Meeting of the Association for Computational Linguistics}, year = {2015} } @inproceedings{XiaoA, author = {Tong Xiao and Derek F. Wong and Jingbo Zhu}, title = {A Loss-Augmented Approach to Training Syntactic Machine Translation Systems}, publisher = {IEEE Transactions on Audio, Speech, and Language Processing}, volume = {24}, number = {11}, pages = {2069--2083}, year = {2016} } @book{marcu2006practical, title={Practical structured learning techniques for natural language processing}, author={Daume Iii, Harold Charles }, publisher={University of Southern California}, year={2006}, } @inproceedings{DBLP:conf/emnlp/SchwenkCF07, author = {Holger Schwenk and Marta R. Costa-juss{\`{a}} and Jos{\'{e}} A. R. Fonollosa}, title = {Smooth Bilingual N-Gram Translation}, pages = {430--438}, publisher = {Annual Meeting of the Association for Computational Linguistics}, year = {2007} } @inproceedings{boxing2011unpacking, author = {Boxing Chen and Roland Kuhn and George Foster and Howard Johnson}, title = {Unpacking and Transforming Feature Functions: New Ways to Smooth Phrase Tables}, publisher = {Machine Translation Summit}, year = {2011} } @inproceedings{DBLP:conf/coling/DuanSZ10, author = {Nan Duan and Hong Sun and Ming Zhou}, title = {Translation Model Generalization using Probability Averaging for Machine Translation}, publisher = {International Conference on Computational Linguistics}, year = {2010} } @inproceedings{DBLP:conf/naacl/QuirkM06, author = {Christopher Quirk and Arul Menezes}, title = {Do we need phrases? Challenging the conventional wisdom in Statistical Machine Translation}, publisher = {Annual Meeting of the Association for Computational Linguistics}, year = {2006} } @inproceedings{DBLP:journals/coling/MarinoBCGLFC06, author = {Jos{\'{e}} B. Mari{\~{n}}o and Rafael E. Banchs and Josep Maria Crego and Adri{\`{a}} de Gispert and Patrik Lambert and Jos{\'{e}} A. R. Fonollosa and Marta R. Costa-juss{\`{a}}}, title = {\emph{N}-gram-based Machine Translation}, publisher = {Computational Linguistics}, volume = {32}, number = {4}, pages = {527--549}, year = {2006} } @inproceedings{DBLP:conf/emnlp/ZensSX12, author = {Richard Zens and Daisy Stanton and Peng Xu}, title = {A Systematic Comparison of Phrase Table Pruning Techniques}, pages = {972--983}, publisher = {Annual Meeting of the Association for Computational Linguistics}, year = {2012} } @inproceedings{DBLP:conf/emnlp/JohnsonMFK07, author = {Howard Johnson and Joel D. Martin and George F. Foster and Roland Kuhn}, title = {Improving Translation Quality by Discarding Most of the Phrasetable}, pages = {967--975}, publisher = {Annual Meeting of the Association for Computational Linguistics}, year = {2007} } @inproceedings{DBLP:conf/emnlp/LingGTB12, author = {Wang Ling and Jo{\~{a}}o Gra{\c{c}}a and Isabel Trancoso and Alan W. Black}, title = {Entropy-based Pruning for Phrase-based Machine Translation}, pages = {962--971}, publisher = {Annual Meeting of the Association for Computational Linguistics}, year = {2012} } @inproceedings{DBLP:conf/naacl/ZettlemoyerM07, author = {Luke S. Zettlemoyer and Robert C. Moore}, title = {Selective Phrase Pair Extraction for Improved Statistical Machine Translation}, pages = {209--212}, publisher = {Annual Meeting of the Association for Computational Linguistics}, year = {2007} } @inproceedings{DBLP:conf/naacl/EckVW07, author = {Matthias Eck and Stephan Vogel and Alex Waibel}, title = {Translation Model Pruning via Usage Statistics for Statistical Machine Translation}, pages = {21--24}, publisher = {Annual Meeting of the Association for Computational Linguistics}, year = {2007}, } @inproceedings{DBLP:conf/acl/Callison-BurchBS05, author = {Chris Callison-Burch and Colin J. Bannard and Josh Schroeder}, title = {Scaling Phrase-Based Statistical Machine Translation to Larger Corpora and Longer Phrases}, pages = {255--262}, publisher = {Annual Meeting of the Association for Computational Linguistics}, year = {2005} } @inproceedings{DBLP:conf/acl/Callison-BurchBS05, author = {Paul McNamee and James Mayfield}, title = {Translation of Multiword Expressions Using Parallel Suffix Arrays}, publisher = {Association for Machine Translation in the Americas}, year = {2006} } @inproceedings{DBLP:conf/naacl/ZensN07, author = {Richard Zens and Hermann Ney}, title = {Efficient Phrase-Table Representation for Machine Translation with Applications to Online {MT} and Speech Translation}, pages = {492--499}, publisher = {Annual Meeting of the Association for Computational Linguistics}, year = {2007} } @inproceedings{2014Dynamic, title={Dynamic Phrase Tables for Machine Translation in an Interactive Post-editing Scenario}, author={Ulrich Germann }, publisher = {Association for Machine Translation in the Americas}, year={2014}, } %%%%% chapter 7------------------------------------------------------ %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% %%%%% chapter 8------------------------------------------------------ @inproceedings{Chiang2012Hope, author = {David Chiang}, title = {Hope and Fear for Discriminative Training of Statistical Translation Models}, publisher = {Journal of Machine Learning Research}, volume = {13}, pages = {1159--1187}, year = {2012} } @inproceedings{chiang2007hierarchical, title={Hierarchical Phrase-Based Translation}, author ={David Chiang}, publisher ={Computational Linguistics}, volume ={33}, number ={2}, pages ={201--228}, year ={2007} } @book{cocke1969programming, title ={Programming Languages and Their Compilers: Preliminary Notes}, author ={Cocke, John and Schwartz, J.T.}, year ={1970}, publisher ={Courant Institute of Mathematical Sciences, New York University} } @inproceedings{younger1967recognition, author = {Daniel H. Younger}, title = {Recognition and Parsing of Context-Free Languages in Time n{\^{3}}}, publisher = {Information and Control}, volume = {10}, number = {2}, pages = {189--208}, year = {1967} } @inproceedings{kasami1966efficient, author ={Tadao Kasami}, title ={An efficient recognition and syntax-analysis algorithm for context-free languages}, publisher ={Coordinated Science Laboratory Report no. R-257}, year ={1966} } @inproceedings{huang2005better, author = {Liang Huang and David Chiang}, title = {Better k-best Parsing}, pages = {53--64}, publisher = {Annual Meeting of the Association for Computational Linguistics}, year = {2005} } @inproceedings{wu1997stochastic, author = {Dekai Wu}, title = {Stochastic Inversion Transduction Grammars and Bilingual Parsing of Parallel Corpora}, publisher = {Computational Linguistics}, volume = {23}, number = {3}, pages = {377--403}, year = {1997} } @inproceedings{huang2006statistical, title ={Statistical syntax-directed translation with extended domain of locality}, author ={Liang Huang and Kevin Knight and Aravind Joshi}, pages ={66--73}, year ={2006}, publisher ={Computationally Hard Problems \& Joint Inference in Speech \& Language Processing} } @inproceedings{galley2004s, title ={What’s in a translation rule?}, author ={Michel Galleyand Mark Hopkins and Kevin Knight and Daniel Marcu}, publisher={Proceedings of the Human Language Technology Conference of the North American Chapter of the Association for Computational Linguistics}, pages ={273--280}, year ={2004} } @inproceedings{eisner2003learning, author = {Jason Eisner}, title = {Learning Non-Isomorphic Tree Mappings for Machine Translation}, pages = {205--208}, publisher = {Annual Meeting of the Association for Computational Linguistics}, year = {2003} } @inproceedings{zhang2008tree, author = {Min Zhang and Hongfei Jiang and AiTi Aw and Haizhou Li and Chew Lim Tan and Sheng Li}, title = {A Tree Sequence Alignment-based Tree-to-Tree Translation Model}, pages = {559--567}, publisher = {Annual Meeting of the Association for Computational Linguistics}, year = {2008} } @inproceedings{liu2009improving, author = {Yang Liu and Yajuan L{\"{u}} and Qun Liu}, title = {Improving Tree-to-Tree Translation with Packed Forests}, pages = {558--566}, publisher = {Annual Meeting of the Association for Computational Linguistics}, year = {2009}, } @inproceedings{chiang2010learning, author = {David Chiang}, title = {Learning to Translate with Source and Target Syntax}, pages = {1443--1452}, publisher = {Annual Meeting of the Association for Computational Linguistics}, year = {2010} } @inproceedings{marcu2006spmt, author = {Daniel Marcu and Wei Wang and Abdessamad Echihabi and Kevin Knight}, title = {{SPMT:} Statistical Machine Translation with Syntactified Target Language Phrases}, pages = {44--52}, publisher = {Annual Meeting of the Association for Computational Linguistics}, year = {2006} } @inproceedings{xue2005building, title={Building a large annotated Chinese corpus: the Penn Chinese treebank}, author={Xue, Nianwen and Xia, Fei and dong Chiou, Fu and Palmer, Martha}, publisher ={Journal of Natural Language Engineering}, volume={11}, number={2}, pages={207--238}, year={2005} } @inproceedings{DBLP:journals/coling/MarcusSM94, author = {Mitchell P. Marcus and Beatrice Santorini and Mary Ann Marcinkiewicz}, title = {Building a Large Annotated Corpus of English: The Penn Treebank}, publisher = {Computational Linguistics}, volume = {19}, number = {2}, pages = {313--330}, year = {1993} } @inproceedings{DBLP:conf/naacl/ZhangHGK06, author = {Hao Zhang and Liang Huang and Daniel Gildea and Kevin Knight}, title = {Synchronous Binarization for Machine Translation}, publisher = {Annual Meeting of the Association for Computational Linguistics}, year = {2006} } @inproceedings{Tong2009Better, author = {Tong Xiao and Mu Li and Dongdong Zhang and Jingbo Zhu and Ming Zhou}, title = {Better Synchronous Binarization for Machine Translation}, pages = {362--370}, publisher = {Annual Meeting of the Association for Computational Linguistics}, year = {2009} } @inproceedings{charniak2006multilevel, title={Multilevel Coarse-to-Fine PCFG Parsing}, author={Eugene {Charniak} and Mark {Johnson} and Micha {Elsner} and Joseph {Austerweil} and David {Ellis} and Isaac {Haxton} and Catherine {Hill} and R. {Shrivaths} and Jeremy {Moore} and Michael {Pozar} and Theresa {Vu}}, publisher={Proceedings of the Human Language Technology Conference of the North American Chapter of the Association for Computational Linguistics}, pages={168--175}, year={2006} } @inproceedings{DBLP:conf/coling/GrovesHW04, author = {Declan Groves and Mary Hearne and Andy Way}, title = {Robust Sub-Sentential Alignment of Phrase-Structure Trees}, publisher = {International Conference on Computational Linguistics}, year = {2004} } @inproceedings{DBLP:conf/coling/SunZT10, author = {Jun Sun and Min Zhang and Chew Lim Tan}, title = {Discriminative Induction of Sub-Tree Alignment using Limited Labeled Data}, pages = {1047--1055}, publisher = {International Conference on Computational Linguistics}, year = {2010} } @inproceedings{liu2009weighted, author = {Yang Liu and Tian Xia and Xinyan Xiao and Qun Liu}, title = {Weighted Alignment Matrices for Statistical Machine Translation}, pages = {1017--1026}, publisher = {Annual Meeting of the Association for Computational Linguistics}, year = {2009} } @inproceedings{sun2010exploring, author = {Jun Sun and Min Zhang and Chew Lim Tan}, title = {Exploring Syntactic Structural Features for Sub-Tree Alignment Using Bilingual Tree Kernels}, pages = {306--315}, publisher = {Annual Meeting of the Association for Computational Linguistics}, year = {2010} } @inproceedings{ilprints729, title={Parsing and Hypergraphs}, author={Klein, Dan and Manning, Christopher D.}, publisher ={New Developments in Parsing Technology}, volume={65}, number={3}, pages={123--134}, year={2001}, } @inproceedings{goodman1999semiring, author = {Joshua Goodman}, title = {Semiring Parsing}, publisher = {Computational Linguistics}, volume = {25}, number = {4}, pages = {573--605}, year = {1999} } @inproceedings{eisner2002parameter, author = {Jason Eisner}, title = {Parameter Estimation for Probabilistic Finite-State Transducers}, pages = {1--8}, publisher = {Annual Meeting of the Association for Computational Linguistics}, year = {2002} } @inproceedings{zhu2011improving, author = {Jingbo Zhu and Tong Xiao}, title = {Improving Decoding Generalization for Tree-to-String Translation}, pages = {418--423}, publisher = {Annual Meeting of the Association for Computational Linguistics}, year = {2011} } @inproceedings{DBLP:conf/acl/AlshawiBX97, author = {Hiyan Alshawi and Adam L. Buchsbaum and Fei Xia}, title = {A Comparison of Head Transducers and Transfer for a Limited Domain Translation Application}, pages = {360--365}, publisher = {Morgan Kaufmann Publishers}, year = {1997} } @inproceedings{DBLP:conf/acl-vlc/Wu95, author = {Dekai Wu}, title = {Trainable Coarse Bilingual Grammars for Parallel Text Bracketing}, publisher = {Third Workshop on Very Large Corpor}, year = {1995} } @inproceedings{DBLP:conf/acl/WuW98, author = {Dekai Wu and Hongsing Wong}, title = {Machine Translation with a Stochastic Grammatical Channel}, pages = {1408--1415}, publisher = {Morgan Kaufmann Publishers}, year = {1998} } @inproceedings{ja2006obtaining, author = {J.A.Sánchez and J.M.Benedí.}, title = {Obtaining Word Phrases with Stochastic Inversion Transduction Grammars for Phrase-based Statistical Machine Translation}, publisher = {Annual Meeting of the Association for Computational Linguistics}, year = {2006} } @inproceedings{DBLP:conf/acl/ZhangQMG08, author = {Hao Zhang and Chris Quirk and Robert C. Moore and Daniel Gildea}, title = {Bayesian Learning of Non-Compositional Phrases with Synchronous Parsing}, publisher = {Annual Meeting of the Association for Computational Linguistics}, year = {2008} } @inproceedings{DBLP:conf/coling/ZollmannVOP08, author = {Andreas Zollmann and Ashish Venugopal and Franz Josef Och and Jay M. Ponte}, title = {A Systematic Comparison of Phrase-Based, Hierarchical and Syntax-Augmented Statistical {MT}}, publisher = {International Conference on Computational Linguistics}, pages = {1145--1152}, year = {2008} } @inproceedings{DBLP:conf/acl/WatanabeTI06, author = {Taro Watanabe and Hajime Tsukada and Hideki Isozaki}, title = {Left-to-Right Target Generation for Hierarchical Phrase-Based Translation}, publisher = {Annual Meeting of the Association for Computational Linguisticss}, year = {2006} } @inproceedings{DBLP:conf/naacl/GalleyHKM04, author = {Michel Galley and Mark Hopkins and Kevin Knight and Daniel Marcu}, title = {What's in a translation rule?}, pages = {273--280}, publisher = {Annual Meeting of the Association for Computational Linguistics}, year = {2004} } @inproceedings{DBLP:conf/naacl/HuangK06, author = {Bryant Huang and Kevin Knight}, title = {Relabeling Syntax Trees to Improve Syntax-Based Machine Translation Quality}, publisher = {Annual Meeting of the Association for Computational Linguistics}, year = {2006} } @inproceedings{DBLP:conf/emnlp/DeNeefeKWM07, author = {Steve DeNeefe and Kevin Knight and Wei Wang and Daniel Marcu}, title = {What Can Syntax-Based {MT} Learn from Phrase-Based MT?}, pages = {755--763}, publisher = {Annual Meeting of the Association for Computational Linguistics}, year = {2007} } @inproceedings{DBLP:conf/wmt/LiuG08, author = {Ding Liu and Daniel Gildea}, title = {Improved Tree-to-String Transducer for Machine Translation}, pages = {62--69}, publisher = {Annual Meeting of the Association for Computational Linguistics}, year = {2008} } @INPROCEEDINGS{Nesson06inductionof, author = {Rebecca Nesson and Stuart M. Shieber and Alexander Rush}, title = {Induction of probabilistic synchronous tree-insertion grammars for machine translation}, publisher = {Annual Meeting of the Association for Computational Linguistics}, year = {2006} } @MISC{Zhang07atree-to-tree, author = {Min Zhang and Hongfei Jiang and Ai Ti Aw and Jun Sun and Sheng Li and Chew Lim Tan}, title = {A Tree-to-Tree Alignment-based Model for Statistical Machine Translation}, year = {2007}, publisher = {Machine Translation Summit} } @inproceedings{DBLP:conf/emnlp/WangKM07, author = {Wei Wang and Kevin Knight and Daniel Marcu}, title = {Binarizing Syntax Trees to Improve Syntax-Based Machine Translation Accuracy}, pages = {746--754}, publisher = {Annual Meeting of the Association for Computational Linguistics}, year = {2007} } @inproceedings{DBLP:conf/acl/MiHL08, author = {Haitao Mi and Liang Huang and Qun Liu}, title = {Forest-Based Translation}, pages = {192--199}, publisher = {Annual Meeting of the Association for Computational Linguistics}, year = {2008} } @inproceedings{DBLP:conf/emnlp/MiH08, author = {Haitao Mi and Liang Huang}, title = {Forest-based Translation Rule Extraction}, pages = {206--214}, publisher = {Annual Meeting of the Association for Computational Linguistics}, year = {2008} } @inproceedings{DBLP:conf/emnlp/ZhangZZ11, author = {Jiajun Zhang and Feifei Zhai and Chengqing Zong}, title = {Augmenting String-to-Tree Translation Models with Fuzzy Use of Source-side Syntax}, pages = {204--215}, publisher = {Annual Meeting of the Association for Computational Linguistics}, year = {2011} } @inproceedings{DBLP:conf/wmt/ZollmannV06, author = {Andreas Zollmann and Ashish Venugopal}, title = {Syntax Augmented Machine Translation via Chart Parsing}, pages = {138--141}, publisher = {Annual Meeting of the Association for Computational Linguistics}, year = {2006} } @inproceedings{DBLP:conf/acl/MartonR08, author = {Yuval Marton and Philip Resnik}, title = {Soft Syntactic Constraints for Hierarchical Phrased-Based Translation}, pages = {1003--1011}, publisher = {Annual Meeting of the Association for Computational Linguistics}, year = {2008} } @inproceedings{DBLP:conf/wmt/PopelMGZ11, author = {Martin Popel and David Marecek and Nathan Green and Zdenek Zabokrtsk{\'{y}}}, title = {Influence of Parser Choice on Dependency-Based {MT}}, pages = {433--439}, publisher = {Annual Meeting of the Association for Computational Linguistics}, year = {2011} } @inproceedings{DBLP:conf/coling/XiaoZZZ10, author = {Tong Xiao and Jingbo Zhu and Hao Zhang and Muhua Zhu}, title = {An Empirical Study of Translation Rule Extraction with Multiple Parsers}, pages = {1345--1353}, publisher = {Chinese Information Processing Society of China}, year = {2010} } @inproceedings{Zhai2012Treebased, title={Treebased translation without using parse trees}, author = {Feifei Zhai and Jiajun Zhang and Yu Zhou and Chengqing Zong}, publisher = {International Conference on Computational Linguistics}, year={2012}, } @inproceedings{DBLP:journals/tacl/ZhaiZZZ13, author = {Feifei Zhai and Jiajun Zhang and Yu Zhou and Chengqing Zong}, title = {Unsupervised Tree Induction for Tree-based Translation}, publisher = {Transactions of Association for Computational Linguistic}, volume = {1}, pages = {243--254}, year = {2013} } @inproceedings{DBLP:journals/mt/QuirkM06, author = {Christopher Quirk and Arul Menezes}, title = {Dependency treelet translation: the convergence of statistical and example-based machine-translation?}, publisher = {Machine Translation}, volume = {20}, number = {1}, pages = {43--65}, year = {2006} } @inproceedings{DBLP:conf/wmt/XiongLL07, author = {Deyi Xiong and Qun Liu and Shouxun Lin}, title = {A Dependency Treelet String Correspondence Model for Statistical Machine Translation}, pages = {40--47}, publisher = {Annual Meeting of the Association for Computational Linguistics}, year = {2007} } @inproceedings{DBLP:conf/coling/Lin04, author = {Dekang Lin}, title = {A Path-based Transfer Model for Machine Translation}, publisher = {International Conference on Computational Linguistics}, year = {2004} } @inproceedings{DBLP:conf/acl/DingP05, author = {Yuan Ding and Martha Palmer}, title = {Machine Translation Using Probabilistic Synchronous Dependency Insertion Grammars}, pages = {541--548}, publisher = {Annual Meeting of the Association for Computational Linguistics}, year = {2005} } @inproceedings{DBLP:conf/coling/ChenXMJL14, author = {Hongshen Chen and Jun Xie and Fandong Meng and Wenbin Jiang and Qun Liu}, title = {A Dependency Edge-based Transfer Model for Statistical Machine Translation}, pages = {1103--1113}, publisher = {Annual Meeting of the Association for Computational Linguistics}, year = {2014} } @inproceedings{DBLP:conf/coling/SuLMZLL10, author = {Jinsong Su and Yang Liu and Haitao Mi and Hongmei Zhao and Yajuan Lv and Qun Liu}, title = {Dependency-Based Bracketing Transduction Grammar for Statistical Machine Translation}, pages = {1185--1193}, publisher = {Chinese Information Processing Society of China}, year = {2010} } @inproceedings{DBLP:conf/coling/XieXL14, author = {Jun Xie and Jinan Xu and Qun Liu}, title = {Augment Dependency-to-String Translation with Fixed and Floating Structures}, pages = {2217--2226}, publisher = {Annual Meeting of the Association for Computational Linguistics}, year = {2014} } @inproceedings{DBLP:conf/emnlp/LiWL15, author = {Liangyou Li and Andy Way and Qun Liu}, title = {Dependency Graph-to-String Translation}, pages = {33--43}, publisher = {Annual Meeting of the Association for Computational Linguistics}, year = {2015} } @inproceedings{DBLP:conf/acl/MiL10, author = {Haitao Mi and Qun Liu}, title = {Constituency to Dependency Translation with Forests}, pages = {1433--1442}, publisher = {Annual Meeting of the Association for Computational Linguistics}, year = {2010} } @inproceedings{DBLP:conf/coling/TuLHLL10, author = {Zhaopeng Tu and Yang Liu and Young-Sook Hwang and Qun Liu and Shouxun Lin}, title = {Dependency Forest for Statistical Machine Translation}, pages = {1092--1100}, publisher = {International Conference on Computational Linguistics}, year = {2010} } @inproceedings{bangalore2001computing, title ={Computing consensus translation from multiple machine translation systems}, author ={Srinivas Bangalore, German Bordel and Giuseppe Riccardi}, publisher = {IEEE Workshop on Automatic Speech Recognition and Understanding}, pages ={351--354}, year ={2001} } @inproceedings{rosti2007combining, author = {Antti-Veikko I. Rosti and Necip Fazil Ayan and Bing Xiang and Spyridon Matsoukas and Richard M. Schwartz and Bonnie J. Dorr}, title = {Combining Outputs from Multiple Machine Translation Systems}, pages = {228--235}, publisher = {Annual Meeting of the Association for Computational Linguistics}, year = {2007} } @inproceedings{xiao2013bagging, title ={Bagging and boosting statistical machine translation systems}, author ={Tong Xiao and Jingbo Zhu and Tongran Liu }, publisher ={Artificial Intelligence}, volume ={195}, pages ={496--527}, year ={2013} } @inproceedings{Yang2009Lattice, author = {Yang Feng and Yang Liu and Haitao Mi and Qun Liu and Yajuan L{\"{u}}}, title = {Lattice-based System Combination for Statistical Machine Translation}, pages = {1105--1113}, publisher = {Annual Meeting of the Association for Computational Linguistics}, year = {2009} } @inproceedings{He2008Indirect, author = {Xiaodong He and Mei Yang and Jianfeng Gao and Patrick Nguyen and Robert C. Moore}, title = {Indirect-HMM-based Hypothesis Alignment for Combining Outputs from Machine Translation Systems}, pages = {98--107}, publisher = {Annual Meeting of the Association for Computational Linguistics}, year = {2008} } @inproceedings{Li2009Incremental, author = {Chi-Ho Li and Xiaodong He and Yupeng Liu and Ning Xi}, title = {Incremental {HMM} Alignment for {MT} System Combination}, pages = {949--957}, publisher = {Annual Meeting of the Association for Computational Linguistics}, year = {2009} } @inproceedings{Yang2009Joint, author = {Yang Liu and Haitao Mi and Yang Feng and Qun Liu}, title = {Joint Decoding with Multiple Translation Models}, pages = {576--584}, publisher = {Annual Meeting of the Association for Computational Linguistics}, year = {2009} } @inproceedings{Mu2009Collaborative, author = {Mu Li and Nan Duan and Dongdong Zhang and Chi-Ho Li and Ming Zhou}, title = {Collaborative Decoding: Partial Hypothesis Re-ranking Using Translation Consensus between Decoders}, pages = {585--592}, publisher = {Annual Meeting of the Association for Computational Linguistics}, year = {2009} } @inproceedings{Tong2016Syntactic, author = {Tong Xiao and Jingbo Zhu and Chunliang Zhang and Tongran Liu}, title = {Syntactic Skeleton-Based Translation}, pages = {2856--2862}, publisher = {AAAI Conference on Artificial Intelligence}, year = {2016}, } @inproceedings{charniak2001immediate, author = {Eugene Charniak}, title = {Immediate-Head Parsing for Language Models}, pages = {116--123}, publisher = {Morgan Kaufmann Publishers}, year = {2001} } @inproceedings{shen2008a, author = {Libin Shen and Jinxi Xu and Ralph M. Weischedel}, title = {A New String-to-Dependency Machine Translation Algorithm with a Target Dependency Language Model}, pages = {577--585}, publisher = {Annual Meeting of the Association for Computational Linguistics}, year = {2008} } @inproceedings{xiao2011language, title ={Language Modeling for Syntax-Based Machine Translation Using Tree Substitution Grammars: A Case Study on Chinese-English Translation}, author ={Xiao, Tong and Zhu, Jingbo and Zhu, Muhua}, volume ={10}, number ={4}, pages ={1--29}, year ={2011}, publisher ={ACM Transactions on Asian Language Information Processing (TALIP)} } %%%%% chapter 8------------------------------------------------------ %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% %%%%% chapter 9------------------------------------------------------ @inproceedings{brown1992class, title={Class-based n-gram models of natural language}, author={Peter F. Brown and Vincent J. Della Pietra and Peter V. De Souza and Jennifer C. Lai and Robert L. Mercer}, publisher={Computational linguistics}, volume={18}, number={4}, pages={467--479}, year={1992} } @inproceedings{mikolov2012context, title={Context dependent recurrent neural network language model}, author={Tomas Mikolov and Geoffrey Zweig}, publisher={IEEE Spoken Language Technology Workshop}, pages={234--239}, year={2012} } @inproceedings{zaremba2014recurrent, title={Recurrent Neural Network Regularization}, author={Wojciech Zaremba and Ilya Sutskever and Oriol Vinyals}, publisher={arXiv: Neural and Evolutionary Computing}, year={2014} } @inproceedings{zilly2016recurrent, title={Recurrent Highway Networks}, author={Julian G. Zilly and Rupesh Kumar Srivastava and Jan Koutn{\'{\i}}k and J{\"{u}}rgen Schmidhuber}, publisher={International Conference on Machine Learning}, year={2016} } @inproceedings{merity2017regularizing, title={Regularizing and optimizing LSTM language models}, author={Stephen Merity and Nitish Shirish Keskar and Richard Socher}, publisher={International Conference on Learning Representations}, year={2017} } @inproceedings{radford2019language, title ={Language models are unsupervised multitask learners}, author ={Radford, Alec and Wu, Jeffrey and Child, Rewon and Luan, David and Amodei, Dario and Sutskever, Ilya}, publisher ={OpenAI Blog}, volume ={1}, number ={8}, pages ={9}, year ={2019} } @inproceedings{baydin2017automatic, title ={Automatic differentiation in machine learning: a survey}, author ={Baydin, At{\i}l{\i}m G{\"u}nes and Pearlmutter, Barak A and Radul, Alexey Andreyevich and Siskind, Jeffrey Mark}, publisher ={Journal of Machine Learning Research}, volume ={18}, number ={1}, pages ={5595--5637}, year ={2017} } @inproceedings{qian1999momentum, author = {Ning Qian}, title = {On the momentum term in gradient descent learning algorithms}, publisher = {Neural Networks}, volume = {12}, number = {1}, pages = {145--151}, year = {1999}, } @inproceedings{duchi2011adaptive, author = {John C. Duchi and Elad Hazan and Yoram Singer}, title = {Adaptive Subgradient Methods for Online Learning and Stochastic Optimization}, publisher = {Journal of Machine Learning Research}, volume = {12}, pages = {2121--2159}, year = {2011}, } @inproceedings{tieleman2012rmsprop, title ={Lecture 6.5-rmsprop: Divide the gradient by a running average of its recent magnitude}, author ={Tieleman, Tijmen and Hinton, Geoffrey}, publisher ={COURSERA: Neural networks for machine learning}, volume ={4}, number ={2}, pages ={26--31}, year ={2012} } @inproceedings{kingma2014adam, author = {Diederik P. Kingma and Jimmy Ba}, title = {Adam: {A} Method for Stochastic Optimization}, publisher = {International Conference on Learning Representations}, year = {2015} } @inproceedings{ioffe2015batch, author = {Sergey Ioffe and Christian Szegedy}, title = {Batch Normalization: Accelerating Deep Network Training by Reducing Internal Covariate Shift}, publisher = {International Conference on Machine Learning}, volume = {37}, pages = {448--456}, year = {2015} } @inproceedings{Ba2016LayerN, author = {Lei Jimmy Ba and Jamie Ryan Kiros and Geoffrey Hinton}, title = {Layer Normalization}, publisher = {CoRR}, volume = {abs/1607.06450}, year = {2016} } @inproceedings{mikolov2013distributed, author = {Tomas Mikolov and Ilya Sutskever and Kai Chen and Gregory S. Corrado and Jeffrey Dean}, title = {Distributed Representations of Words and Phrases and their Compositionality}, publisher = {Conference on Neural Information Processing Systems}, pages = {3111--3119}, year = {2013} } @inproceedings{arthur2016incorporating, author = {Philip Arthur and Graham Neubig and Satoshi Nakamura}, title = {Incorporating Discrete Translation Lexicons into Neural Machine Translation}, pages = {1557--1567}, publisher = {Conference on Empirical Methods in Natural Language Processing}, year = {2016} } @inproceedings{stahlberg2016syntactically, author = {Felix Stahlberg and Eva Hasler and Aurelien Waite and Bill Byrne}, title = {Syntactically Guided Neural Machine Translation}, publisher = {Annual Meeting of the Association for Computational Linguistics}, year = {2016}, } @inproceedings{plank2013embedding, author = {Barbara Plank and Alessandro Moschitti}, title = {Embedding Semantic Similarity in Tree Kernels for Domain Adaptation of Relation Extraction}, pages = {1498--1507}, publisher = {Annual Meeting of the Association for Computational Linguistics}, year = {2013} } @inproceedings{perozzi2014deepwalk, author = {Bryan Perozzi and Rami Al-Rfou and Steven Skiena}, title = {DeepWalk: online learning of social representations}, publisher = {ACM SIGKDD Conference on Knowledge Discovery and Data Mining}, pages = {701--710}, year = {2014} } @inproceedings{2011Natural, title={Natural Language Processing (almost) from Scratch}, author={ Collobert, Ronan and Weston, Jason and Bottou, Léon and Karlen, Michael and Kavukcuoglu, Koray and Kuksa, Pavel }, publisher={Journal of Machine Learning Research}, volume={12}, number={1}, pages={2493-2537}, year={2011} } @inproceedings{mccann2017learned, author = {Bryan Mccann and James Bradbury and Caiming Xiong and Richard Socher}, title = {Learned in Translation: Contextualized Word Vectors}, publisher = {Conference on Neural Information Processing Systems}, pages = {6294--6305}, year = {2017} } %%%%%%%%%%%%%%%%%%%%%%%神经语言模型,已检查修改%%%%%%%%%%%%%%%%%%%%%%%%% @inproceedings{Peters2018DeepCW, title={Deep contextualized word representations}, author={Matthew Peters and Mark Neumann and Mohit Iyyer and Matt Gardner and Christopher Clark and Kenton Lee and Luke Zettlemoyer}, publisher={Proceedings of the Human Language Technology Conference of the North American Chapter of the Association for Computational Linguistics}, year={2018} } @inproceedings{Graves2013HybridSR, title={Hybrid speech recognition with Deep Bidirectional LSTM}, author={Alex Graves and Navdeep Jaitly and Abdel-rahman Mohamed}, publisher={IEEE Workshop on Automatic Speech Recognition and Understanding}, year={2013}, pages={273-278} } @inproceedings{Verwimp2017CharacterWordLL, title={Character-Word LSTM Language Models}, author={Lyan Verwimp and Joris Pelemans and Hugo Van Hamme and Patrick Wambacq}, publisher={Annual Conference of the European Association for Machine Translation}, year={2017} } @inproceedings{Onoe2016GatedWR, author = {Yasumasa Miyamoto and Kyunghyun Cho}, title = {Gated Word-Character Recurrent Language Model}, pages = {1992--1997}, publisher = {Conference on Empirical Methods in Natural Language Processing}, year = {2016} } @inproceedings{Hwang2017CharacterlevelLM, title={Character-level language modeling with hierarchical recurrent neural networks}, author={Kyuyeon Hwang and Wonyong Sung}, publisher={International Conference on Acoustics, Speech and Signal Processing}, year={2017}, pages={5720-5724} } @inproceedings{Kim2016CharacterAwareNL, title={Character-Aware Neural Language Models}, author={Yoon Kim and Yacine Jernite and David Sontag and Alexander M. Rush}, publisher={AAAI Conference on Artificial Intelligence}, year={2016} } @inproceedings{Ahn2016ANK, title={A Neural Knowledge Language Model}, author={Sungjin Ahn and Heeyoul Choi and Tanel P{\"a}rnamaa and Yoshua Bengio}, publisher={arXiv preprint arXiv:1608.00318}, year={2016} } @inproceedings{Wang2015LargerContextLM, title={Larger-Context Language Modelling}, author={Tian Wang and Kyunghyun Cho}, publisher={Annual Meeting of the Association for Computational Linguistics}, year={2015} } @inproceedings{Adel2015SyntacticAS, title={Syntactic and Semantic Features For Code-Switching Factored Language Models}, author={Heike Adel and Ngoc Vu and Katrin Kirchhoff and Dominic Telaar and Tanja Schultz}, publisher={IEEE/ACM Transactions on Audio, Speech, and Language Processing}, year={2015}, volume={23}, pages={431-440} } @inproceedings{Wu2012FactoredLM, title={Factored Language Model based on Recurrent Neural Network}, author={Youzheng Wu and Xugang Lu and Hitoshi Yamamoto and Shigeki Matsuda and Chiori Hori and Hideki Kashioka}, publisher={International Conference on Computational Linguistics}, year={2012} } @inproceedings{Pham2016ConvolutionalNN, title={Convolutional Neural Network Language Models}, author={Ngoc-quan Pham and German Kruszewski and Gemma Boleda}, publisher={Conference on Empirical Methods in Natural Language Processing}, year={2016} } %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% %%%%%%%%%%%%%%%%%%%%%深度阅读修改和补充,待检查修改%%%%%%%%%%%%%%%%%%% @inproceedings{moraffah2020causal, title={Causal Interpretability for Machine Learning-Problems, Methods and Evaluation}, author={Raha Moraffah and Mansooreh Karami and Ruocheng Guo and Adrienne Raglin and Huan Liu}, publisher={ACM SIGKDD Conference on Knowledge Discovery and Data Mining}, volume={22}, number={1}, pages={18--33}, year={2020} } @incollection{nguyen2019understanding, title={Understanding neural networks via feature visualization: A survey}, author={Anh Nguyen and Jason Yosinski and Jeff Clune}, pages={55--76}, year={2019}, publisher={Springer} } @inproceedings{yang2017improving, title={Improving adversarial neural machine translation with prior knowledge}, author={Yating Yang and Xiao Li and Tonghai Jiang and Jinying Kong and Bo Ma and Xi Zhou and Lei Wang }, publisher={IEEE Global Conference on Signal and Information Processing}, pages={1373--1377}, year={2017} } @inproceedings{currey2019incorporating, title={Incorporating source syntax into transformer-based neural machine translation}, author={Anna Currey and Kenneth Heafield}, publisher={Annual Meeting of the Association for Computational Linguistics}, pages={24--33}, year={2019} } @inproceedings{currey2018multi, title={Multi-source syntactic neural machine translation}, author={Anna Currey and Kenneth Heafield}, publisher={Conference on Empirical Methods in Natural Language Processing}, year={2018} } @inproceedings{marevcek2018extracting, title={Extracting syntactic trees from transformer encoder self-attentions}, author={David Mare{\v{c}}ek and Rudolf Rosa}, publisher={Conference on Empirical Methods in Natural Language Processing}, pages={347--349}, year={2018} } @inproceedings{blevins2018deep, title={Deep rnns encode soft hierarchical syntax}, author={Blevins, Terra and Levy, Omer and Zettlemoyer, Luke}, publisher={Annual Meeting of the Association for Computational Linguistics}, year={2018} } @inproceedings{Yin2018StructVAETL, title={StructVAE: Tree-structured Latent Variable Models for Semi-supervised Semantic Parsing}, author={Pengcheng Yin and Chunting Zhou and Junxian He and Graham Neubig}, publisher={Annual Meeting of the Association for Computational Linguistics}, year={2018} } @inproceedings{Aharoni2017TowardsSN, title={Towards String-To-Tree Neural Machine Translation}, author={Roee Aharoni and Yoav Goldberg}, publisher={Annual Meeting of the Association for Computational Linguistics}, year={2017} } @inproceedings{Bastings2017GraphCE, title={Graph Convolutional Encoders for Syntax-aware Neural Machine Translation}, author={Jasmijn Bastings and Ivan Titov and Wilker Aziz and Diego Marcheggiani and Khalil Sima'an}, publisher={Conference on Empirical Methods in Natural Language Processing}, year={2017} } @inproceedings{KoncelKedziorski2019TextGF, title={Text Generation from Knowledge Graphs with Graph Transformers}, author={Rik Koncel-Kedziorski and Dhanush Bekal and Yi Luan and Mirella Lapata and Hannaneh Hajishirzi}, publisher={Annual Conference of the North American Chapter of the Association for Computational Linguistics}, year={2019} } @inproceedings{Kovalerchuk2020SurveyOE, title={Survey of explainable machine learning with visual and granular methods beyond quasi-explanations}, author={Boris Kovalerchuk and Muhammad Ahmad and Ankur Teredesai}, publisher={ArXiv}, year={2020}, volume={abs/2009.10221} } @inproceedings{DoshiVelez2017TowardsAR, title={Towards A Rigorous Science of Interpretable Machine Learning}, author={Finale Doshi-Velez and Been Kim}, publisher={arXiv preprint arXiv:1702.08608}, year={2017} } @inproceedings{Dozat2016IncorporatingNM, title={Incorporating Nesterov Momentum into Adam}, author={Timothy Dozat}, publisher={International Conference on Learning Representations}, year={2016} } @inproceedings{Reddi2018OnTC, author = {Sashank J. Reddi and Satyen Kale and Sanjiv Kumar}, title = {On the Convergence of Adam and Beyond}, publisher = {International Conference on Learning Representations}, year = {2018} } @inproceedings{Zeiler2012ADADELTAAA, author = {Matthew D. Zeiler}, title = {ADADELTA:An Adaptive Learning Rate Method}, publisher = {arXiv preprint arXiv:1212.5701}, year = {2012} } %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% %%%%% chapter 9------------------------------------------------------ %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% %%%%% chapter 10------------------------------------------------------ @inproceedings{vaswani2017attention, title={Attention is All You Need}, author={Ashish {Vaswani} and Noam {Shazeer} and Niki {Parmar} and Jakob {Uszkoreit} and Llion {Jones} and Aidan N. {Gomez} and Lukasz {Kaiser} and Illia {Polosukhin}}, publisher={International Conference on Neural Information Processing}, pages={5998--6008}, year={2017} } @inproceedings{DBLP:conf/acl/LiLWJXZLL20, author = {Bei Li and Hui Liu and Ziyang Wang and Yufan Jiang and Tong Xiao and Jingbo Zhu and Tongran Liu and Changliang Li}, title = {Does Multi-Encoder Help? {A} Case Study on Context-Aware Neural Machine Translation}, pages = {3512--3518}, publisher = {Annual Meeting of the Association for Computational Linguistics}, year = {2020} } @inproceedings{DBLP:conf/emnlp/MiWI16, author = {Haitao Mi and Zhiguo Wang and Abe Ittycheriah}, title = {Supervised Attentions for Neural Machine Translation}, pages = {2283--2288}, publisher = {Annual Meeting of the Association for Computational Linguistics}, year = {2016} } @inproceedings{DBLP:conf/coling/LiuUFS16, author = {Lemao Liu and Masao Utiyama and Andrew M. Finch and Eiichiro Sumita}, title = {Neural Machine Translation with Supervised Attention}, pages = {3093--3102}, publisher = {Annual Meeting of the Association for Computational Linguistics}, year = {2016} } @inproceedings{devlin-etal-2014-fast, author = {Jacob Devlin and Rabih Zbib and Zhongqiang Huang and Thomas Lamar and Richard M. Schwartz and John Makhoul}, title = {Fast and Robust Neural Network Joint Models for Statistical Machine Translation}, pages = {1370--1380}, publisher = {Annual Meeting of the Association for Computational Linguistics}, year = {2014} } @inproceedings{Schwenk_continuousspace, author = {Holger Schwenk}, title = {Continuous Space Translation Models for Phrase-Based Statistical Machine Translation}, pages = {1071--1080}, publisher = {International Conference on Computational Linguistics}, year = {2012} } @inproceedings{kalchbrenner-blunsom-2013-recurrent, author = {Nal Kalchbrenner and Phil Blunsom}, title = {Recurrent Continuous Translation Models}, pages = {1700--1709}, publisher = {Annual Meeting of the Association for Computational Linguistics}, year = {2013} } @inproceedings{HochreiterThe, author = {Sepp Hochreiter}, title = {The Vanishing Gradient Problem During Learning Recurrent Neural Nets and Problem Solutions}, publisher = {International Journal of Uncertainty, Fuzziness and Knowledge-Based Systems}, volume = {6}, number = {2}, pages = {107--116}, year = {1998} } @inproceedings{BENGIO1994Learning, author = {Yoshua Bengio and Patrice Y. Simard and Paolo Frasconi}, title = {Learning long-term dependencies with gradient descent is difficult}, publisher = {IEEE Transportation Neural Networks}, volume = {5}, number = {2}, pages = {157--166}, year = {1994} } @inproceedings{StahlbergNeural, title={Neural Machine Translation: A Review}, author={Felix Stahlberg}, publisher={Journal of Artificial Intelligence Research}, year={2020}, volume={69}, pages={343-418} } @inproceedings{Bentivogli2016NeuralVP, author = {Luisa Bentivogli and Arianna Bisazza and Mauro Cettolo and Marcello Federico}, title = {Neural versus Phrase-Based Machine Translation Quality: a Case Study}, pages = {257--267}, publisher = {Annual Meeting of the Association for Computational Linguistics}, year = {2016} } @inproceedings{Hassan2018AchievingHP, author = {Hany Hassan and Anthony Aue and Chang Chen and Vishal Chowdhary and Jonathan Clark and Christian Federmann and Xuedong Huang and Marcin Junczys-Dowmunt and William Lewis and Mu Li and Shujie Liu and Tie-Yan Liu and Renqian Luo and Arul Menezes and Tao Qin and Frank Seide and Xu Tan and Fei Tian and Lijun Wu and Shuangzhi Wu and Yingce Xia and Dongdong Zhang and Zhirui Zhang and Ming Zhou}, title = {Achieving Human Parity on Automatic Chinese to English News Translation}, publisher = {CoRR}, volume = {abs/1803.05567}, year = {2018}, } @inproceedings{WangLearning, author = {Qiang Wang and Bei Li and Tong Xiao and Jingbo Zhu and Changliang Li and Derek F. Wong and Lidia S. Chao}, title = {Learning Deep Transformer Models for Machine Translation}, pages = {1810--1822}, publisher = {Annual Meeting of the Association for Computational Linguistics}, year = {2019} } @inproceedings{Li2020NeuralMT, author = {Yanyang Li and Qiang Wang and Tong Xiao and Tongran Liu and Jingbo Zhu}, title = {Neural Machine Translation with Joint Representation}, pages = {8285--8292}, publisher = {AAAI Conference on Artificial Intelligence}, year = {2020} } @inproceedings{HochreiterLong, author = {Hochreiter, Sepp and Schmidhuber, Jürgen}, year = {1997}, month = {12}, pages = {1735-80}, title = {Long Short-term Memory}, volume = {9}, publisher = {Neural Computation} } @inproceedings{Cho2014Learning, author = {Kyunghyun Cho and Bart van Merrienboer and {\c{C}}aglar G{\"{u}}l{\c{c}}ehre and Dzmitry Bahdanau and Fethi Bougares and Holger Schwenk and Yoshua Bengio}, title = {Learning Phrase Representations using {RNN} Encoder-Decoder for Statistical Machine Translation}, publisher = {Annual Meeting of the Association for Computational Linguistics}, pages = {1724--1734}, year = {2014} } @inproceedings{pmlr-v9-glorot10a, author = {Xavier Glorot and Yoshua Bengio}, title = {Understanding the difficulty of training deep feedforward neural networks}, publisher = {International Conference on Artificial Intelligence and Statistics}, volume = {9}, pages = {249--256}, year = {2010} } @inproceedings{xiao2017fast, author = {Tong Xiao and Jingbo Zhu and Tongran Liu and Chunliang Zhang}, title = {Fast Parallel Training of Neural Language Models}, publisher = {International Joint Conference on Artificial Intelligence}, pages = {4193--4199}, year = {2017} } @inproceedings{Gu2017NonAutoregressiveNM, author = {Jiatao Gu and James Bradbury and Caiming Xiong and Victor O. K. Li and Richard Socher}, title = {Non-Autoregressive Neural Machine Translation}, publisher = {International Conference on Learning Representations}, year = {2018} } @inproceedings{li-etal-2018-simple, author = {Yanyang Li and Tong Xiao and Yinqiao Li and Qiang Wang and Changming Xu and Jingbo Zhu}, title = {A Simple and Effective Approach to Coverage-Aware Neural Machine Translation}, publisher = {Annual Meeting of the Association for Computational Linguistics}, pages = {292--297}, year = {2018} } @inproceedings{TuModeling, author = {Zhaopeng Tu and Zhengdong Lu and Yang Liu and Xiaohua Liu and Hang Li}, title = {Modeling Coverage for Neural Machine Translation}, publisher = {Annual Meeting of the Association for Computational Linguistics}, year = {2016} } @inproceedings{DBLP:journals/corr/SennrichFCBHHJL17, author = {Rico Sennrich and Orhan Firat and Kyunghyun Cho and Barry Haddow and Alexandra Birch and Julian Hitschler and Marcin Junczys-Dowmunt and Samuel L{\"{a}}ubli and Antonio Valerio Miceli Barone and Jozef Mokry and Maria Nadejde}, title = {Nematus: a Toolkit for Neural Machine Translation}, publisher = {Annual Conference of the European Association for Machine Translation}, pages = {65--68}, year = {2017} } @inproceedings{DBLP:journals/corr/abs-1905-13324, author = {Biao Zhang and Rico Sennrich}, title = {A Lightweight Recurrent Network for Sequence Modeling}, publisher = {Annual Meeting of the Association for Computational Linguistics}, pages = {1538--1548}, year = {2019} } @inproceedings{Lei2017TrainingRA, author = {Tao Lei and Yu Zhang and Yoav Artzi}, title = {Training RNNs as Fast as CNNs}, publisher = {CoRR}, volume = {abs/1709.02755}, year = {2017} } @inproceedings{Zhang2018SimplifyingNM, author = {Biao Zhang and Deyi Xiong and Jinsong Su and Qian Lin and Huiji Zhang}, title = {Simplifying Neural Machine Translation with Addition-Subtraction Twin-Gated Recurrent Networks}, publisher = {Conference on Empirical Methods in Natural Language Processing}, pages = {4273--4283}, year = {2018} } @inproceedings{Liu_2019_CVPR, author = {Shikun Liu and Edward Johns and Andrew J. Davison}, title = {End-To-End Multi-Task Learning With Attention}, publisher = {IEEE Conference on Computer Vision and Pattern Recognition}, pages = {1871--1880}, year = {2019} } @inproceedings{DBLP:journals/corr/abs-1811-00498, author = {Ra{\'{u}}l V{\'{a}}zquez and Alessandro Raganato and J{\"{o}}rg Tiedemann and Mathias Creutz}, title = {Multilingual {NMT} with a Language-Independent Attention Bridge}, publisher = {Annual Meeting of the Association for Computational Linguistics}, pages = {33--39}, year = {2019} } @inproceedings{MoradiInterrogating, author = {Pooya Moradi and Nishant Kambhatla and Anoop Sarkar}, title = {Interrogating the Explanatory Power of Attention in Neural Machine Translation}, publisher = {Conference on Empirical Methods in Natural Language Processing}, pages = {221--230}, year = {2019} } @inproceedings{WangNeural, author = {Xing Wang and Zhengdong Lu and Zhaopeng Tu and Hang Li and Deyi Xiong and Min Zhang}, title = {Neural Machine Translation Advised by Statistical Machine Translation}, publisher = {AAAI Conference on Artificial Intelligence}, pages = {3330--3336}, year = {2017} } @inproceedings{Xiao2019SharingAW, author = {Tong Xiao and Yinqiao Li and Jingbo Zhu and Zhengtao Yu and Tongran Liu}, title = {Sharing Attention Weights for Fast Transformer}, publisher = {International Joint Conference on Artificial Intelligence}, pages = {5292--5298}, year = {2019} } @inproceedings{Yang2017TowardsBH, author = {Baosong Yang and Derek F. Wong and Tong Xiao and Lidia S. Chao and Jingbo Zhu}, title = {Towards Bidirectional Hierarchical Representations for Attention-based Neural Machine Translation}, publisher = {Conference on Empirical Methods in Natural Language Processing}, pages = {1432--1441}, year = {2017} } @inproceedings{Wang2019TreeTI, author = {Yau-Shian Wang and Hung-yi Lee and Yun-Nung Chen}, title = {Tree Transformer: Integrating Tree Structures into Self-Attention}, publisher = {Conference on Empirical Methods in Natural Language Processing}, pages = {1061--1070}, year = {2019} } @inproceedings{DBLP:journals/corr/abs-1809-01854, author = {Jetic Gu and Hassan S. Shavarani and Anoop Sarkar}, title = {Top-down Tree Structured Decoding with Syntactic Connections for Neural Machine Translation and Parsing}, publisher = {Conference on Empirical Methods in Natural Language Processing}, pages = {401--413}, year = {2018} } @inproceedings{DBLP:journals/corr/abs-1808-09374, author = {Xinyi Wang and Hieu Pham and Pengcheng Yin and Graham Neubig}, title = {A Tree-based Decoder for Neural Machine Translation}, publisher = {Conference on Empirical Methods in Natural Language Processing}, pages = {4772--4777}, year = {2018} } @inproceedings{DBLP:journals/corr/ZhangZ16c, author = {Jiajun Zhang and Chengqing Zong}, title = {Bridging Neural Machine Translation and Bilingual Dictionaries}, publisher = {CoRR}, volume = {abs/1610.07272}, year = {2016} } @inproceedings{Dai2019TransformerXLAL, author = {Zihang Dai and Zhilin Yang and Yiming Yang and Jaime G. Carbonell and Quoc V. Le and Ruslan Salakhutdinov}, title = {Transformer-XL: Attentive Language Models Beyond a Fixed-Length Context}, publisher = {Annual Meeting of the Association for Computational Linguistics}, pages = {2978--2988}, year = {2019} } @inproceedings{li-etal-2019-word, author = {Xintong Li and Guanlin Li and Lemao Liu and Max Meng and Shuming Shi}, title = {On the Word Alignment from Neural Machine Translation}, publisher = {Annual Meeting of the Association for Computational Linguistics}, pages = {1293--1303}, year = {2019} } @inproceedings{Werlen2018DocumentLevelNM, author = {Lesly Miculicich Werlen and Dhananjay Ram and Nikolaos Pappas and James Henderson}, title = {Document-Level Neural Machine Translation with Hierarchical Attention Networks}, publisher = {Conference on Empirical Methods in Natural Language Processing}, pages = {2947--2954}, year = {2018} } @inproceedings{DBLP:journals/corr/abs-1805-10163, author = {Elena Voita and Pavel Serdyukov and Rico Sennrich and Ivan Titov}, title = {Context-Aware Neural Machine Translation Learns Anaphora Resolution}, publisher = {Annual Meeting of the Association for Computational Linguistics}, pages = {1264--1274}, year = {2018} } @inproceedings{DBLP:journals/corr/abs-1906-00532, author = {Aishwarya Bhandare and Vamsi Sripathi and Deepthi Karkada and Vivek Menon and Sun Choi and Kushal Datta and Vikram Saletore}, title = {Efficient 8-Bit Quantization of Transformer Neural Machine Language Translation Model}, publisher = {CoRR}, volume = {abs/1906.00532}, year = {2019} } @inproceedings{Zhang2018SpeedingUN, author = {Wen Zhang and Liang Huang and Yang Feng and Lei Shen and Qun Liu}, title = {Speeding Up Neural Machine Translation Decoding by Cube Pruning}, publisher = {Conference on Empirical Methods in Natural Language Processing}, pages = {4284--4294}, year = {2018} } @inproceedings{DBLP:journals/corr/SeeLM16, author = {Abigail See and Minh-Thang Luong and Christopher D. Manning}, title = {Compression of Neural Machine Translation Models via Pruning}, publisher = {International Conference on Computational Linguistics}, pages = {291--301}, year = {2016} } @inproceedings{DBLP:journals/corr/ChenLCL17, author = {Yun Chen and Yang Liu and Yong Cheng and Victor O. K. Li}, title = {A Teacher-Student Framework for Zero-Resource Neural Machine Translation}, pages = {1925--1935}, publisher = {Annual Meeting of the Association for Computational Linguistics}, year = {2017} } @inproceedings{Hinton2015Distilling, author = {Geoffrey Hinton and Oriol Vinyals and Jeffrey Dean}, title = {Distilling the Knowledge in a Neural Network}, publisher = {CoRR}, volume = {abs/1503.02531}, year = {2015} } @inproceedings{Ott2018ScalingNM, title={Scaling Neural Machine Translation}, author={Myle Ott and Sergey Edunov and David Grangier and Michael Auli}, publisher={Annual Meeting of the Association for Computational Linguistics}, year={2018} } @inproceedings{Lin2020TowardsF8, author = {Ye Lin and Yanyang Li and Tengbo Liu and Tong Xiao and Tongran Liu and Jingbo Zhu}, title = {Towards Fully 8-bit Integer Inference for the Transformer Model}, pages = {3759--3765}, publisher = {International Joint Conference on Artificial Intelligence}, year = {2020} } @inproceedings{kim-rush-2016-sequence, author = {Yoon Kim and Alexander M. Rush}, title = {Sequence-Level Knowledge Distillation}, pages = {1317--1327}, publisher = {Conference on Empirical Methods in Natural Language Processing}, year = {2016} } @inproceedings{Akaike1969autoregressive, author = {Hirotugu Akaike}, title = {Fitting autoregressive models for prediction}, publisher = {Annals of the institute of Statistical Mathematics}, volume = {21(1)}, year = {2015}, pages = {243--247}, } @inproceedings{Chen2018TheBO, author = {Mia Xu Chen and Orhan Firat and Ankur Bapna and Melvin Johnson and Wolfgang Macherey and George F. Foster and Llion Jones and Mike Schuster and Noam Shazeer and Niki Parmar and Ashish Vaswani and Jakob Uszkoreit and Lukasz Kaiser and Zhifeng Chen and Yonghui Wu and Macduff Hughes}, title = {The Best of Both Worlds: Combining Recent Advances in Neural Machine Translation}, pages = {76--86}, publisher = {Annual Meeting of the Association for Computational Linguistics}, year = {2018} } @inproceedings{He2018LayerWiseCB, title={Layer-Wise Coordination between Encoder and Decoder for Neural Machine Translation}, author={Tianyu He and Xu Tan and Yingce Xia and Di He and Tao Qin and Zhibo Chen and Tie-Yan Liu}, publisher={Conference on Neural Information Processing Systems}, year={2018} } @inproceedings{cho-etal-2014-properties, author = {Kyunghyun Cho and Bart van Merrienboer and Dzmitry Bahdanau and Yoshua Bengio}, title = {On the Properties of Neural Machine Translation: Encoder-Decoder Approaches}, pages = {103--111}, publisher = {Annual Meeting of the Association for Computational Linguistics}, year = {2014} } @inproceedings{DBLP:conf/acl/JeanCMB15, author = {S{\'{e}}bastien Jean and KyungHyun Cho and Roland Memisevic and Yoshua Bengio}, title = {On Using Very Large Target Vocabulary for Neural Machine Translation}, pages = {1--10}, publisher = {Annual Meeting of the Association for Computational Linguistics}, year = {2015} } @inproceedings{DBLP:journals/corr/LuongPM15, author = {Thang Luong and Hieu Pham and Christopher D. Manning}, title = {Effective Approaches to Attention-based Neural Machine Translation}, publisher = {Conference on Empirical Methods in Natural Language Processing}, pages = {1412--1421}, year = {2015} } @inproceedings{He2016ImprovedNM, author = {Wei He and Zhongjun He and Hua Wu and Haifeng Wang}, title = {Improved Neural Machine Translation with {SMT} Features}, pages = {151--157}, publisher = {AAAI Conference on Artificial Intelligence}, year = {2016} } @inproceedings{zhang-etal-2017-prior, title = {Prior Knowledge Integration for Neural Machine Translation using Posterior Regularization}, author = {Zhang, Jiacheng and Liu, Yang and Luan, Huanbo and Xu, Jingfang and Sun, Maosong}, year = {2017}, publisher = {Annual Meeting of the Association for Computational Linguistics}, pages = {1514--1523}, } @inproceedings{duan-etal-2020-bilingual, author = {Xiangyu Duan and Baijun Ji and Hao Jia and Min Tan and Min Zhang and Boxing Chen and Weihua Luo and Yue Zhang}, title = {Bilingual Dictionary Based Neural Machine Translation without Using Parallel Sentences}, pages = {1570--1579}, publisher = {Annual Meeting of the Association for Computational Linguistics}, year = {2020} } @inproceedings{cao-xiong-2018-encoding, author = {Qian Cao and Deyi Xiong}, title = {Encoding Gated Translation Memory into Neural Machine Translation}, pages = {3042--3047}, publisher = {Conference on Empirical Methods in Natural Language Processing}, year = {2018} } @inproceedings{yang-etal-2016-hierarchical, author = {Zichao Yang and Diyi Yang and Chris Dyer and Xiaodong He and Alexander J. Smola and Eduard H. Hovy}, title = {Hierarchical Attention Networks for Document Classification}, pages = {1480--1489}, publisher = {Proceedings of the Human Language Technology Conference of the North American Chapter of the Association for Computational Linguistics}, year = {2016} } %%%%% chapter 10------------------------------------------------------ %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% %%%%% chapter 11------------------------------------------------------ @inproceedings{DBLP:journals/pami/RenHG017, author = {Shaoqing Ren and Kaiming He and Ross Girshick and Jian Sun}, title = {Faster {R-CNN:} Towards Real-Time Object Detection with Region Proposal Networks}, publisher = {{IEEE} Transactions on Pattern Analysis and Machine Intelligence}, volume = {39}, number = {6}, pages = {1137--1149}, year = {2017} } @inproceedings{DBLP:conf/eccv/LiuAESRFB16, author = {Wei Liu and Dragomir Anguelov and Dumitru Erhan and Christian Szegedy and Scott Reed and Cheng-Yang Fu and Alexander C. Berg}, title = {{SSD:} Single Shot MultiBox Detector}, publisher = {European Conference on Computer Vision}, volume = {9905}, pages = {21--37}, year = {2016} } @inproceedings{devlin-etal-2014-fast, author = {Jacob Devlin and Rabih Zbib and Zhongqiang Huang and Thomas Lamar and Richard M. Schwartz and John Makhoul}, title = {Fast and Robust Neural Network Joint Models for Statistical Machine Translation}, pages = {1370--1380}, publisher = {Annual Meeting of the Association for Computational Linguistics}, year = {2014} } @inproceedings{DBLP:conf/acl/WangLLJL15, author = {Mingxuan Wang and Zhengdong Lu and Hang Li and Wenbin Jiang and Qun Liu}, title = {genCNN: {A} Convolutional Architecture for Word Sequence Prediction}, pages = {1567--1576}, publisher = {Annual Meeting of the Association for Computational Linguistics}, year = {2015} } @inproceedings{DBLP:conf/icassp/ZhangCJ17, author = {Yu Zhang and William Chan and Navdeep Jaitly}, title = {Very deep convolutional networks for end-to-end speech recognition}, pages = {4845--4849}, publisher = {International Conference on Acoustics, Speech and Signal Processing}, year = {2017} } @inproceedings{DBLP:conf/icassp/DengAY13, author = {Li Deng and Ossama Abdel-Hamid and Dong Yu}, title = {A deep convolutional neural network using heterogeneous pooling for trading acoustic invariance with phonetic confusion}, pages = {6669--6673}, publisher = {International Conference on Acoustics, Speech and Signal Processing}, year = {2013} } @inproceedings{DBLP:journals/corr/LuongPM15, author = {Thang Luong and Hieu Pham and Christopher D. Manning}, title = {Effective Approaches to Attention-based Neural Machine Translation}, publisher = {Conference on Empirical Methods in Natural Language Processing}, pages = {1412--1421}, year = {2015} } @inproceedings{DBLP:conf/acl-codeswitch/WangCK18, author = {Changhan Wang and Kyunghyun Cho and Douwe Kiela}, title = {Code-Switched Named Entity Recognition with Embedding Attention}, pages = {154--158}, publisher = {Annual Meeting of the Association for Computational Linguistics}, year = {2018} } @inproceedings{DBLP:conf/emnlp/LiDWCM17, author = {Peng-Hsuan Li and Ruo-Ping Dong and Yu-Siang Wang and Ju-Chieh Chou and Wei-Yun Ma}, title = {Leveraging Linguistic Structures for Named Entity Recognition with Bidirectional Recursive Neural Networks}, pages = {2664--2669}, publisher = {Conference on Empirical Methods in Natural Language Processing}, year = {2017} } @inproceedings{DBLP:conf/acl/MaH16, author = {Xuezhe Ma and Eduard H. Hovy}, title = {End-to-end Sequence Labeling via Bi-directional LSTM-CNNs-CRF}, publisher = {Annual Meeting of the Association for Computational Linguistics}, year = {2016} } @inproceedings{DBLP:conf/emnlp/StrubellVBM17, author = {Emma Strubell and Patrick Verga and David Belanger and Andrew Mccallum}, title = {Fast and Accurate Entity Recognition with Iterated Dilated Convolutions}, pages = {2670--2680}, publisher = {Conference on Empirical Methods in Natural Language Processing}, year = {2017} } @inproceedings{DBLP:conf/cncl/ZhouZXQBX17, author = {Peng Zhou and Suncong Zheng and Jiaming Xu and Zhenyu Qi and Hongyun Bao and Bo Xu}, title = {Joint Extraction of Multiple Relations and Entities by Using a Hybrid Neural Network}, volume = {10565}, pages = {135--146}, publisher = {Springer}, year = {2017} } @inproceedings{2011Natural, title={Natural Language Processing (almost) from Scratch}, author={ Collobert, Ronan and Weston, Jason and Bottou, Léon and Karlen, Michael and Kavukcuoglu, Koray and Kuksa, Pavel }, publisher={Journal of Machine Learning Research}, volume={12}, number={1}, pages={2493-2537}, year={2011}, } @inproceedings{DBLP:conf/acl/NguyenG15, author = {Thien Huu Nguyen and Ralph Grishman}, title = {Event Detection and Domain Adaptation with Convolutional Neural Networks}, pages = {365--371}, publisher = {Annual Meeting of the Association for Computational Linguistics}, year = {2015} } @inproceedings{DBLP:conf/aaai/LaiXLZ15, author = {Siwei Lai and Liheng Xu and Kang Liu and Jun Zhao}, title = {Recurrent Convolutional Neural Networks for Text Classification}, pages = {2267--2273}, publisher = {AAAI Conference on Artificial Intelligence}, year = {2015} } @inproceedings{DBLP:conf/acl/ChenXLZ015, author = {Yubo Chen and Liheng Xu and Kang Liu and Daojian Zeng and Jun Zhao}, title = {Event Extraction via Dynamic Multi-Pooling Convolutional Neural Networks}, pages = {167--176}, publisher = {Annual Meeting of the Association for Computational Linguistics}, year = {2015} } @inproceedings{DBLP:conf/emnlp/LeiBJ15, author = {Tao Lei and Regina Barzilay and Tommi S. Jaakkola}, title = {Molding CNNs for text: non-linear, non-consecutive convolutions}, pages = {1565--1575}, publisher = {Conference on Empirical Methods in Natural Language Processing}, year = {2015} } @inproceedings{DBLP:conf/naacl/Johnson015, author = {Rie Johnson and Tong Zhang}, title = {Effective Use of Word Order for Text Categorization with Convolutional Neural Networks}, pages = {103--112}, publisher = {Proceedings of the Human Language Technology Conference of the North American Chapter of the Association for Computational Linguistics}, year = {2015} } @inproceedings{DBLP:conf/naacl/NguyenG15, author = {Thien Huu Nguyen and Ralph Grishman}, title = {Relation Extraction: Perspective from Convolutional Neural Networks}, pages = {39--48}, publisher = {Proceedings of the Human Language Technology Conference of the North American Chapter of the Association for Computational Linguistics}, year = {2015} } @inproceedings{StahlbergNeural, title={Neural Machine Translation: A Review}, author={Felix Stahlberg}, publisher={Journal of Artificial Intelligence Research}, year={2020}, volume={69}, pages={343-418} } @inproceedings{Sennrich2016ImprovingNM, author = {Rico Sennrich and Barry Haddow and Alexandra Birch}, title = {Improving Neural Machine Translation Models with Monolingual Data}, publisher = {Annual Meeting of the Association for Computational Linguistics}, year = {2016} } @inproceedings{bahdanau2014neural, author = {Dzmitry Bahdanau and Kyunghyun Cho and Yoshua Bengio}, title = {Neural Machine Translation by Jointly Learning to Align and Translate}, publisher = {International Conference on Learning Representations}, year = {2015} } @inproceedings{Waibel1989PhonemeRU, title={Phoneme recognition using time-delay neural networks}, author={Alexander Waibel and Toshiyuki Hanazawa and Geoffrey Hinton and Kiyohiro Shikano and Kevin J. Lang}, publisher={IEEE Transactions on Acoustics, Speech, and Signal Processing}, year={1989}, volume={37}, pages={328-339} } @inproceedings{LeCun1989BackpropagationAT, title={Backpropagation Applied to Handwritten Zip Code Recognition}, author={Yann Lecun and Bernhard Boser and John Denker and Don Henderson and Richard E.Howard and Wayne E. Hubbard and Larry Jackel}, publisher={Neural Computation}, year={1989}, volume={1}, pages={541-551} } @inproceedings{726791, author={Yann {Lecun} and Leon {Bottou} and Yoshua {Bengio} and Patrick {Haffner}}, publisher={Proceedings of the IEEE}, title={Gradient-based learning applied to document recognition}, year={1998}, volume={86}, number={11}, pages={2278-2324} } @inproceedings{DBLP:journals/corr/HeZRS15, author = {Kaiming He and Xiangyu Zhang and Shaoqing Ren and Jian Sun}, title = {Deep Residual Learning for Image Recognition}, publisher = {{IEEE} Conference on Computer Vision and Pattern Recognition}, pages = {770--778}, year = {2016} } @inproceedings{DBLP:conf/cvpr/HuangLMW17, author = {Gao Huang and Zhuang Liu and Laurens van der Maaten and Kilian Q. Weinberger}, title = {Densely Connected Convolutional Networks}, pages = {2261--2269}, publisher = {{IEEE} Conference on Computer Vision and Pattern Recognition}, year = {2017} } @inproceedings{Girshick2015FastR, title={Fast R-CNN}, author={Ross Girshick}, publisher={International Conference on Computer Vision}, year={2015}, pages={1440-1448} } @inproceedings{He2020MaskR, title={Mask R-CNN}, author={Kaiming He and Georgia Gkioxari and Piotr Doll{\'a}r and Ross B. Girshick}, publisher={International Conference on Computer Vision}, pages={2961--2969}, year={2017} } @inproceedings{Kalchbrenner2014ACN, title={A Convolutional Neural Network for Modelling Sentences}, author={Nal Kalchbrenner and Edward Grefenstette and Phil Blunsom}, publisher={Annual Meeting of the Association for Computational Linguistics}, pages={655--665}, year={2014} } @inproceedings{Kim2014ConvolutionalNN, title={Convolutional Neural Networks for Sentence Classification}, author={Yoon Kim}, publisher={Conference on Empirical Methods in Natural Language Processing}, pages = {1746--1751}, year={2014} } @inproceedings{Ma2015DependencybasedCN, title={Dependency-based Convolutional Neural Networks for Sentence Embedding}, author = {Mingbo Ma and Liang Huang and Bowen Zhou and Bing Xiang}, pages = {174--179}, publisher={Annual Meeting of the Association for Computational Linguistics}, year={2015} } @inproceedings{Santos2014DeepCN, title={Deep Convolutional Neural Networks for Sentiment Analysis of Short Texts}, author = {C{\'{\i}}cero Nogueira dos Santos and Maira Gatti}, pages = {69--78}, publisher = {International Conference on Computational Linguistics}, year={2014} } @inproceedings{Dauphin2017LanguageMW, title={Language Modeling with Gated Convolutional Networks}, author = {Yann N. Dauphin and Angela Fan and Michael Auli and David Grangier}, publisher={International Conference on Machine Learning}, volume = {70}, pages = {933--941}, year={2017} } @inproceedings{Gehring2017ACE, title={A Convolutional Encoder Model for Neural Machine Translation}, author = {Jonas Gehring and Michael Auli and David Grangier and Yann N. Dauphin}, publisher={Annual Meeting of the Association for Computational Linguistics}, pages = {123--135}, year={2017} } @inproceedings{DBLP:journals/corr/GehringAGYD17, author = {Jonas Gehring and Michael Auli and David Grangier and Denis Yarats and Yann N. Dauphin}, title = {Convolutional Sequence to Sequence Learning}, publisher = {International Conference on Machine Learning}, volume = {70}, pages = {1243--1252}, year = {2017} } @inproceedings{Kaiser2018DepthwiseSC, title={Depthwise Separable Convolutions for Neural Machine Translation}, author = {Lukasz Kaiser and Aidan N. Gomez and Fran{\c{c}}ois Chollet}, publisher = {International Conference on Learning Representations}, year={2018}, } @inproceedings{Wu2019PayLA, author = {Felix Wu and Angela Fan and Alexei Baevski and Yann N. Dauphin and Michael Auli}, title = {Pay Less Attention with Lightweight and Dynamic Convolutions}, publisher = {International Conference on Learning Representations}, year = {2019} } @inproceedings{kalchbrenner-blunsom-2013-recurrent, author = {Nal Kalchbrenner and Phil Blunsom}, title = {Recurrent Continuous Translation Models}, pages = {1700--1709}, publisher = {Conference on Empirical Methods in Natural Language Processing}, year = {2013} } @inproceedings{Wu2016GooglesNM, title={Google's Neural Machine Translation System: Bridging the Gap between Human and Machine Translation}, author = {Yonghui Wu and Mike Schuster and Zhifeng Chen and Quoc V. Le and Mohammad Norouzi and Wolfgang Macherey and Maxim Krikun and Yuan Cao and Qin Gao and Klaus Macherey and Jeff Klingner and Apurva Shah and Melvin Johnson and Xiaobing Liu and Lukasz Kaiser and Stephan Gouws and Yoshikiyo Kato and Taku Kudo and Hideto Kazawa and Keith Stevens and George Kurian and Nishant Patil and Wei Wang and Cliff Young and Jason Smith and Jason Riesa and Alex Rudnick and Oriol Vinyals and Greg Corrado and Macduff Hughes and Jeffrey Dean}, publisher = {CoRR}, year={2016}, volume={abs/1609.08144} } @inproceedings{DBLP:journals/corr/HeZRS15, author = {Kaiming He and Xiangyu Zhang and Shaoqing Ren and Jian Sun}, title = {Deep Residual Learning for Image Recognition}, publisher = {IEEE Conference on Computer Vision and Pattern Recognition}, pages = {770--778}, year = {2016}, } @inproceedings{Sukhbaatar2015EndToEndMN, title={End-To-End Memory Networks}, author = {Sainbayar Sukhbaatar and Arthur Szlam and Jason Weston and Rob Fergus}, publisher={Conference on Neural Information Processing Systems}, pages = {2440--2448}, year={2015} } @inproceedings{Islam2020HowMP, author = {Md. Amirul Islam and Sen Jia and Neil Bruce}, title = {How much Position Information Do Convolutional Neural Networks Encode?}, publisher = {International Conference on Learning Representations}, year = {2020}, } @inproceedings{Sutskever2013OnTI, title={On the importance of initialization and momentum in deep learning}, author = {Ilya Sutskever and James Martens and George E. Dahl and Geoffrey Hinton}, publisher = {International Conference on Machine Learning}, pages = {1139--1147}, year={2013} } @inproceedings{Bengio2013AdvancesIO, title={Advances in optimizing recurrent networks}, author={Yoshua Bengio and Nicolas Boulanger-Lewandowski and Razvan Pascanu}, publisher={IEEE Transactions on Acoustics, Speech, and Signal Processing}, year={2013}, pages={8624-8628} } @inproceedings{JMLR:v15:srivastava14a, author = {Nitish Srivastava and Geoffrey Hinton and Alex Krizhevsky and Ilya Sutskever and Ruslan Salakhutdinov}, title = {Dropout: A Simple Way to Prevent Neural Networks from Overfitting}, publisher = {Journal of Machine Learning Research}, year = {2014}, volume = {15}, pages = {1929-1958}, } @inproceedings{Chollet2017XceptionDL, title={Xception: Deep Learning with Depthwise Separable Convolutions}, author = {Fran{\c{c}}ois Chollet}, publisher={IEEE Conference on Computer Vision and Pattern Recognition}, year={2017}, pages={1800-1807} } @inproceedings{Howard2017MobileNetsEC, title={MobileNets: Efficient Convolutional Neural Networks for Mobile Vision Applications}, author = {Andrew Howard and Menglong Zhu and Bo Chen and Dmitry Kalenichenko and Weijun Wang and Tobias Weyand and Marco Andreetto and Hartwig Adam}, journal={CoRR}, year={2017}, } @inproceedings{sifre2014rigid, title={Rigid-motion scattering for image classification}, author={Sifre, Laurent and Mallat, St{\'e}phane}, year={2014}, publisher={Citeseer} } @inproceedings{Taigman2014DeepFaceCT, title={DeepFace: Closing the Gap to Human-Level Performance in Face Verification}, author={Yaniv Taigman and Ming Yang and Marc'Aurelio Ranzato and Lior Wolf}, publisher={IEEE Conference on Computer Vision and Pattern Recognition}, year={2014}, pages={1701-1708} } @inproceedings{Chen2015LocallyconnectedAC, title={Locally-connected and convolutional neural networks for small footprint speaker recognition}, author = {Yu-hsin Chen and Ignacio Lopez-Moreno and Tara Sainath and Mirk{\'{o}} Visontai and Raziel Alvarez and Carolina Parada}, publisher={Conference of the International Speech Communication Association}, pages = {1136--1140}, year={2015} } @inproceedings{Chen2020DynamicCA, title={Dynamic Convolution: Attention Over Convolution Kernels}, author = {Yinpeng Chen and Xiyang Dai and Mengchen Liu and Dongdong Chen and Lu Yuan and Zicheng Liu}, publisher = {IEEE Conference on Computer Vision and Pattern Recognition}, year={2020}, pages={11027-11036} } %%%%% chapter 11------------------------------------------------------ %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% %%%%% chapter 12------------------------------------------------------ @inproceedings{DBLP:conf/coling/ZengLLZZ14, author = {Daojian Zeng and Kang Liu and Siwei Lai and Guangyou Zhou and Jun Zhao}, title = {Relation Classification via Convolutional Deep Neural Network}, pages = {2335--2344}, publisher = {International Conference on Computational Linguistics}, year = {2014} } @inproceedings{DBLP:conf/acl/JohnsonZ17, author = {Rie Johnson and Tong Zhang}, title = {Deep Pyramid Convolutional Neural Networks for Text Categorization}, pages = {562--570}, publisher = {Annual Meeting of the Association for Computational Linguistics}, year = {2017} } @inproceedings{DBLP:conf/interspeech/GulatiQCPZYHWZW20, author = {Anmol Gulati and James Qin and Chung-Cheng Chiu and Niki Parmar and Yu Zhang and Jiahui Yu and Wei Han and Shibo Wang and Zhengdong Zhang and Yonghui Wu and Ruoming Pang}, title = {Conformer: Convolution-augmented Transformer for Speech Recognition}, pages = {5036--5040}, publisher = {International Speech Communication Association}, year = {2020} } @inproceedings{DBLP:conf/icassp/DongXX18, author = {Linhao Dong and Shuang Xu and Bo Xu}, title = {Speech-Transformer: {A} No-Recurrence Sequence-to-Sequence Model for Speech Recognition}, pages = {5884--5888}, publisher = {International Conference on Acoustics, Speech and Signal Processing}, year = {2018} } @inproceedings{DBLP:journals/corr/abs-1802-05751, author = {Niki Parmar and Ashish Vaswani and Jakob Uszkoreit and Lukasz Kaiser and Noam Shazeer and Alexander Ku}, title = {Image Transformer}, publisher = {CoRR}, volume = {abs/1802.05751}, year = {2018} } @inproceedings{vaswani2017attention, title={Attention is All You Need}, author={Ashish {Vaswani} and Noam {Shazeer} and Niki {Parmar} and Jakob {Uszkoreit} and Llion {Jones} and Aidan N. {Gomez} and Lukasz {Kaiser} and Illia {Polosukhin}}, publisher={International Conference on Neural Information Processing}, pages={5998--6008}, year={2017} } @inproceedings{DBLP:conf/iclr/WuLLLH20, author = {Zhanghao Wu and Zhijian Liu and Ji Lin and Yujun Lin and Song Han}, title = {Lite Transformer with Long-Short Range Attention}, publisher = {International Conference on Learning Representations}, year = {2020} } @inproceedings{DBLP:journals/corr/abs-1905-09418, author = {Elena Voita and David Talbot and Fedor Moiseev and Rico Sennrich and Ivan Titov}, title = {Analyzing Multi-Head Self-Attention: Specialized Heads Do the Heavy Lifting, the Rest Can Be Pruned}, pages = {5797--5808}, publisher = {Annual Meeting of the Association for Computational Linguistics}, year = {2019}, } @inproceedings{DBLP:journals/corr/LinFSYXZB17, author = {Zhouhan Lin and Minwei Feng and C{\'{\i}}cero Nogueira dos Santos and Mo Yu and Bing Xiang and Bowen Zhou and Yoshua Bengio}, title = {A Structured Self-Attentive Sentence Embedding}, publisher = {International Conference on Learning Representations}, year = {2017}, } @inproceedings{Shaw2018SelfAttentionWR, author = {Peter Shaw and Jakob Uszkoreit and Ashish Vaswani}, title = {Self-Attention with Relative Position Representations}, publisher = {Proceedings of the Human Language Technology Conference of the North American Chapter of the Association for Computational Linguistics}, pages = {464--468}, year = {2018}, } @inproceedings{DBLP:journals/corr/HeZRS15, author = {Kaiming He and Xiangyu Zhang and Shaoqing Ren and Jian Sun}, title = {Deep Residual Learning for Image Recognition}, publisher = {IEEE Conference on Computer Vision and Pattern Recognition}, pages = {770--778}, year = {2016}, } @inproceedings{JMLR:v15:srivastava14a, author = {Nitish Srivastava and Geoffrey Hinton and Alex Krizhevsky and Ilya Sutskever and Ruslan Salakhutdinov}, title = {Dropout: A Simple Way to Prevent Neural Networks from Overfitting}, publisher = {Journal of Machine Learning Research}, year = {2014}, volume = {15}, pages = {1929-1958}, } @inproceedings{Szegedy_2016_CVPR, author = {Christian Szegedy and Vincent Vanhoucke and Sergey Ioffe and Jonathon Shlens and Zbigniew Wojna}, title = {Rethinking the Inception Architecture for Computer Vision}, publisher = {IEEE Conference on Computer Vision and Pattern Recognition}, pages = {2818--2826}, year = {2016}, } @inproceedings{DBLP:journals/corr/abs-1805-00631, author = {Biao Zhang and Deyi Xiong and Jinsong Su}, title = {Accelerating Neural Transformer via an Average Attention Network}, publisher = {Annual Meeting of the Association for Computational Linguistics}, pages = {1789--1798}, year = {2018}, } @inproceedings{DBLP:journals/corr/CourbariauxB16, author = {Matthieu Courbariaux and Yoshua Bengio}, title = {BinaryNet: Training Deep Neural Networks with Weights and Activations Constrained to +1 or -1}, publisher = {CoRR}, volume = {abs/1602.02830}, year = {2016}, } @inproceedings{Wu2019PayLA, author = {Felix Wu and Angela Fan and Alexei Baevski and Yann N. Dauphin and Michael Auli}, title = {Pay Less Attention with Lightweight and Dynamic Convolutions}, publisher = {International Conference on Learning Representations}, year = {2019}, } @inproceedings{dai-etal-2019-transformer, author = {Zihang Dai and Zhilin Yang and Yiming Yang and Jaime G. Carbonell and Quoc Viet Le and Ruslan Salakhutdinov}, title = {Transformer-XL: Attentive Language Models beyond a Fixed-Length Context}, pages = {2978--2988}, publisher = {Annual Meeting of the Association for Computational Linguistics}, year = {2019} } @inproceedings{Liu2020LearningTE, title={Learning to Encode Position for Transformer with Continuous Dynamical Model}, author={Xuanqing Liu and Hsiang-Fu Yu and Inderjit Dhillon and Cho-Jui Hsieh}, publisher={ArXiv}, year={2020}, volume={abs/2003.09229} } @inproceedings{Jawahar2019WhatDB, title={What Does BERT Learn about the Structure of Language?}, author={Ganesh Jawahar and Beno{\^{\i}}t Sagot and Djam{\'e} Seddah}, publisher={Annual Meeting of the Association for Computational Linguistics}, year={2019} } @inproceedings{Yang2018ModelingLF, author = {Baosong Yang and Zhaopeng Tu and Derek F. Wong and Fandong Meng and Lidia S. Chao and Tong Zhang}, title = {Modeling Localness for Self-Attention Networks}, pages = {4449--4458}, publisher = {Annual Meeting of the Association for Computational Linguistics}, year = {2018} } @inproceedings{DBLP:journals/corr/abs-1904-03107, author = {Baosong Yang and Longyue Wang and Derek F. Wong and Lidia S. Chao and Zhaopeng Tu}, title = {Convolutional Self-Attention Networks}, pages = {4040--4045}, publisher = {Annual Meeting of the Association for Computational Linguistics}, year = {2019}, } @inproceedings{Wang2018MultilayerRF, title={Multi-layer Representation Fusion for Neural Machine Translation}, author={Qiang Wang and Fuxue Li and Tong Xiao and Yanyang Li and Yinqiao Li and Jingbo Zhu}, publisher={International Conference on Computational Linguistics}, year={2018}, volume={abs/2002.06714} } @inproceedings{Bapna2018TrainingDN, author = {Ankur Bapna and Mia Xu Chen and Orhan Firat and Yuan Cao and Yonghui Wu}, title = {Training Deeper Neural Machine Translation Models with Transparent Attention}, pages = {3028--3033}, publisher = {Annual Meeting of the Association for Computational Linguistics}, year = {2018} } @inproceedings{Dou2018ExploitingDR, author = {Zi-Yi Dou and Zhaopeng Tu and Xing Wang and Shuming Shi and Tong Zhang}, title = {Exploiting Deep Representations for Neural Machine Translation}, pages = {4253--4262}, publisher = {Annual Meeting of the Association for Computational Linguistics}, year = {2018} } @inproceedings{Wang2019ExploitingSC, title={Exploiting Sentential Context for Neural Machine Translation}, author={Xing Wang and Zhaopeng Tu and Longyue Wang and Shuming Shi}, publisher={Annual Meeting of the Association for Computational Linguistics}, year={2019} } @inproceedings{Dou2019DynamicLA, author = {Zi-Yi Dou and Zhaopeng Tu and Xing Wang and Longyue Wang and Shuming Shi and Tong Zhang}, title = {Dynamic Layer Aggregation for Neural Machine Translation with Routing-by-Agreement}, pages = {86--93}, publisher = {AAAI Conference on Artificial Intelligence}, year = {2019} } @inproceedings{Wei2020MultiscaleCD, title={Multiscale Collaborative Deep Models for Neural Machine Translation}, author={Xiangpeng Wei and Heng Yu and Yue Hu and Yue Zhang and Rongxiang Weng and Weihua Luo}, publisher={Annual Meeting of the Association for Computational Linguistics}, year={2020} } @inproceedings{Vaswani2018Tensor2TensorFN, author = {Ashish Vaswani and Samy Bengio and Eugene Brevdo and Fran{\c{c}}ois Chollet and Aidan N. Gomez and Stephan Gouws and Llion Jones and Lukasz Kaiser and Nal Kalchbrenner and Niki Parmar and Ryan Sepassi and Noam Shazeer and Jakob Uszkoreit}, title = {Tensor2Tensor for Neural Machine Translation}, pages = {193--199}, publisher = {Association for Machine Translation in the Americas}, year = {2018} } @inproceedings{Kitaev2020ReformerTE, author = {Nikita Kitaev and Lukasz Kaiser and Anselm Levskaya}, title = {Reformer: The Efficient Transformer}, publisher = {International Conference on Learning Representations}, year = {2020} } @inproceedings{Lin2020WeightDT, title={Weight Distillation: Transferring the Knowledge in Neural Network Parameters}, author={Ye Lin and Yanyang Li and Ziyang Wang and Bei Li and Quan Du and Tong Xiao and Jingbo Zhu}, publisher={ArXiv}, year={2020}, volume={abs/2009.09152} } @inproceedings{li2020shallow, title={Shallow-to-Deep Training for Neural Machine Translation}, author={Li, Bei and Wang, Ziyang and Liu, Hui and Jiang, Yufan and Du, Quan and Xiao, Tong and Wang, Huizhen and Zhu, Jingbo}, publisher={Conference on Empirical Methods in Natural Language Processing}, year={2020} } %%%%% chapter 12------------------------------------------------------ %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% %%%%% chapter 13------------------------------------------------------ @inproceedings{garcia-martinez2016factored, title={Factored Neural Machine Translation Architectures}, author={Mercedes {Garcia-Martinez} and Loïc {Barrault} and Fethi {Bougares}}, publisher={International Workshop on Spoken Language Translation (IWSLT'16)}, notes={Sourced from Microsoft Academic - https://academic.microsoft.com/paper/2949810612}, year={2016} } @inproceedings{DBLP:conf/acl/Kudo18, author = {Taku Kudo}, title = {Subword Regularization: Improving Neural Network Translation Models with Multiple Subword Candidates}, pages = {66--75}, publisher = {Annual Meeting of the Association for Computational Linguistics}, year = {2018} } @inproceedings{DBLP:journals/jmlr/RaffelSRLNMZLL20, author = {Colin Raffel and Noam Shazeer and Adam Roberts and Katherine Lee and Sharan Narang and Michael Matena and Yanqi Zhou and Wei Li and Peter J. Liu}, title = {Exploring the Limits of Transfer Learning with a Unified Text-to-Text Transformer}, publisher = {Journal of Machine Learning Reseach}, volume = {21}, pages = {140:1--140:67}, year = {2020} } @inproceedings{DBLP:conf/icassp/SchusterN12, author = {Mike Schuster and Kaisuke Nakajima}, title = {Japanese and Korean voice search}, pages = {5149--5152}, publisher = {IEEE International Conference on Acoustics, Speech and Signal Processing}, year = {2012} } @inproceedings{JMLR:v15:srivastava14a, author = {Nitish Srivastava and Geoffrey Hinton and Alex Krizhevsky and Ilya Sutskever and Ruslan Salakhutdinov}, title = {Dropout: A Simple Way to Prevent Neural Networks from Overfitting}, publisher = {Journal of Machine Learning Research}, year = {2014}, volume = {15}, pages = {1929-1958}, } @inproceedings{DBLP:conf/amta/MullerRS20, author = {Mathias M{\"{u}}ller and Annette Rios and Rico Sennrich}, title = {Domain Robustness in Neural Machine Translation}, pages = {151--164}, publisher = {Association for Machine Translation in the Americas}, year = {2020} } @inproceedings{DBLP:conf/sp/Carlini017, author = {Nicholas Carlini and David A. Wagner}, title = {Towards Evaluating the Robustness of Neural Networks}, pages = {39--57}, publisher = {IEEE Symposium on Security and Privacy}, year = {2017} } @inproceedings{DBLP:conf/cvpr/Moosavi-Dezfooli16, author = {Seyed-Mohsen Moosavi-Dezfooli and Alhussein Fawzi and Pascal Frossard}, title = {DeepFool: {A} Simple and Accurate Method to Fool Deep Neural Networks}, pages = {2574--2582}, publisher = {IEEE Conference on Computer Vision and Pattern Recognition}, year = {2016} } @inproceedings{DBLP:conf/acl/ChengJM19, author = {Yong Cheng and Lu Jiang and Wolfgang Macherey}, title = {Robust Neural Machine Translation with Doubly Adversarial Inputs}, pages = {4324--4333}, publisher = {Annual Meeting of the Association for Computational Linguistics}, year = {2019} } @inproceedings{DBLP:conf/cvpr/NguyenYC15, author = {Anh Mai Nguyen and Jason Yosinski and Jeff Clune}, title = {Deep neural networks are easily fooled: High confidence predictions for unrecognizable images}, pages = {427--436}, publisher = {IEEE Conference on Computer Vision and Pattern Recognition}, year = {2015} } @inproceedings{DBLP:journals/corr/SzegedyZSBEGF13, author = {Christian Szegedy and Wojciech Zaremba and Ilya Sutskever and Joan Bruna and Dumitru Erhan and Ian J. Goodfellow and Rob Fergus}, title = {Intriguing properties of neural networks}, publisher = {International Conference on Learning Representations}, year = {2014} } @inproceedings{DBLP:journals/corr/GoodfellowSS14, author = {Ian J. Goodfellow and Jonathon Shlens and Christian Szegedy}, title = {Explaining and Harnessing Adversarial Examples}, publisher = {International Conference on Learning Representations}, year = {2015} } @inproceedings{DBLP:conf/emnlp/JiaL17, author = {Robin Jia and Percy Liang}, title = {Adversarial Examples for Evaluating Reading Comprehension Systems}, pages = {2021--2031}, publisher = {Conference on Empirical Methods in Natural Language Processing}, year = {2017} } @inproceedings{DBLP:conf/emnlp/BekoulisDDD18, author = {Giannis Bekoulis and Johannes Deleu and Thomas Demeester and Chris Develder}, title = {Adversarial training for multi-context joint entity and relation extraction}, pages = {2830--2836}, publisher = {Conference on Empirical Methods in Natural Language Processing}, year = {2018} } @inproceedings{DBLP:conf/naacl/YasunagaKR18, author = {Michihiro Yasunaga and Jungo Kasai and Dragomir R. Radev}, title = {Robust Multilingual Part-of-Speech Tagging via Adversarial Training}, pages = {976--986}, publisher = {Annual Conference of the North American Chapter of the Association for Computational Linguistics}, year = {2018} } @inproceedings{DBLP:conf/iclr/BelinkovB18, author = {Yonatan Belinkov and Yonatan Bisk}, title = {Synthetic and Natural Noise Both Break Neural Machine Translation}, publisher = {International Conference on Learning Representations}, year = {2018} } @inproceedings{DBLP:conf/naacl/MichelLNP19, author = {Paul Michel and Xian Li and Graham Neubig and Juan Miguel Pino}, title = {On Evaluation of Adversarial Perturbations for Sequence-to-Sequence Models}, pages = {3103--3114}, publisher = {Annual Conference of the North American Chapter of the Association for Computational Linguistics}, year = {2019} } @inproceedings{Gong2018AdversarialTW, title={Adversarial Texts with Gradient Methods}, author={Zhitao Gong and Wenlu Wang and B. Li and D. Song and W. Ku}, publisher={ArXiv}, year={2018}, volume={abs/1801.07175} } @inproceedings{DBLP:conf/naacl/VaibhavSSN19, author = {Vaibhav and Sumeet Singh and Craig Stewart and Graham Neubig}, title = {Improving Robustness of Machine Translation with Synthetic Noise}, pages = {1916--1920}, publisher = {Annual Conference of the North American Chapter of the Association for Computational Linguistics}, year = {2019} } @inproceedings{DBLP:conf/naacl/AnastasopoulosL19, author = {Antonios Anastasopoulos and Alison Lui and Toan Q. Nguyen and David Chiang}, title = {Neural Machine Translation of Text from Non-Native Speakers}, pages = {3070--3080}, publisher = {Annual Conference of the North American Chapter of the Association for Computational Linguistics}, year = {2019} } @inproceedings{DBLP:conf/acl/SinghGR18, author = {Marco T{\'{u}}lio Ribeiro and Sameer Singh and Carlos Guestrin}, title = {Semantically Equivalent Adversarial Rules for Debugging {NLP} models}, pages = {856--865}, publisher = {Annual Meeting of the Association for Computational Linguistics}, year = {2018} } @inproceedings{DBLP:journals/corr/SamantaM17, author = {Suranjana Samanta and Sameep Mehta}, title = {Towards Crafting Text Adversarial Samples}, publisher = {CoRR}, volume = {abs/1707.02812}, year = {2017} } @inproceedings{DBLP:conf/ijcai/0002LSBLS18, author = {Bin Liang and Hongcheng Li and Miaoqiang Su and Pan Bian and Xirong Li and Wenchang Shi}, title = {Deep Text Classification Can be Fooled}, pages = {4208--4215}, publisher = {International Joint Conference on Artificial Intelligence}, year = {2018} } @inproceedings{DBLP:conf/coling/EbrahimiLD18, author = {Javid Ebrahimi and Daniel Lowd and Dejing Dou}, title = {On Adversarial Examples for Character-Level Neural Machine Translation}, pages = {653--663}, publisher = {International Conference on Computational Linguistics}, year = {2018} } @inproceedings{DBLP:conf/iclr/ZhaoDS18, author = {Zhengli Zhao and Dheeru Dua and Sameer Singh}, title = {Generating Natural Adversarial Examples}, publisher = {International Conference on Learning Representations}, year = {2018} } @inproceedings{DBLP:conf/acl/LiuTMCZ18, author = {Yong Cheng and Zhaopeng Tu and Fandong Meng and Junjie Zhai and Yang Liu}, title = {Towards Robust Neural Machine Translation}, pages = {1756--1766}, publisher = {Annual Meeting of the Association for Computational Linguistics}, year = {2018} } @inproceedings{DBLP:conf/acl/LiuMHXH19, author = {Hairong Liu and Mingbo Ma and Liang Huang and Hao Xiong and Zhongjun He}, title = {Robust Neural Machine Translation with Joint Textual and Phonetic Embedding}, pages = {3044--3049}, publisher = {Annual Meeting of the Association for Computational Linguistics}, year = {2019} } @inproceedings{DBLP:conf/acl/LiLWJXZLL20, author = {Bei Li and Hui Liu and Ziyang Wang and Yufan Jiang and Tong Xiao and Jingbo Zhu and Tongran Liu and Changliang Li}, title = {Does Multi-Encoder Help? {A} Case Study on Context-Aware Neural Machine Translation}, pages = {3512--3518}, publisher = {Annual Meeting of the Association for Computational Linguistics}, year = {2020} } @techreport{chen1999gaussian, title={A Gaussian prior for smoothing maximum entropy models}, author={Chen, Stanley F and Rosenfeld, Ronald}, year={1999}, institution={CARNEGIE-MELLON UNIV PITTSBURGH PA SCHOOL OF COMPUTER SCIENCE} } @inproceedings{DBLP:conf/emnlp/MichelN18, author = {Paul Michel and Graham Neubig}, title = {{MTNT:} {A} Testbed for Machine Translation of Noisy Text}, pages = {543--553}, publisher = {Conference on Empirical Methods in Natural Language Processing}, year = {2018} } @inproceedings{DBLP:conf/icassp/SchusterN12, author = {Mike Schuster and Kaisuke Nakajima}, title = {Japanese and Korean voice search}, pages = {5149--5152}, publisher = {IEEE International Conference on Acoustics, Speech and Signal Processing}, year = {2012} } @inproceedings{kudo2018sentencepiece, title={SentencePiece: A simple and language independent subword tokenizer and detokenizer for Neural Text Processing}, author={Taku {Kudo} and John {Richardson}}, publisher={Conference on Empirical Methods in Natural Language Processing}, pages={66--71}, year={2018} } @inproceedings{provilkov2020bpe, title={BPE-Dropout: Simple and Effective Subword Regularization}, author={Ivan {Provilkov} and Dmitrii {Emelianenko} and Elena {Voita}}, publisher={Annual Meeting of the Association for Computational Linguistics}, pages={1882--1892}, year={2020} } @inproceedings{he2020dynamic, title={Dynamic Programming Encoding for Subword Segmentation in Neural Machine Translation}, author={Xuanli {He} and Gholamreza {Haffari} and Mohammad {Norouzi}}, publisher={Annual Meeting of the Association for Computational Linguistics}, pages={3042--3051}, year={2020} } @inproceedings{DBLP:journals/mt/EetemadiLTR15, author = {Sauleh Eetemadi and William Lewis and Kristina Toutanova and Hayder Radha}, title = {Survey of data-selection methods in statistical machine translation}, publisher = {Machine Translation}, volume = {29}, number = {3-4}, pages = {189--223}, year = {2015} } @inproceedings{britz2017effective, title={Effective domain mixing for neural machine translation}, author={Britz, Denny and Le, Quoc and Pryzant, Reid}, publisher={Proceedings of the Second Conference on Machine Translation}, pages={118--126}, year={2017} } @inproceedings{DBLP:conf/emnlp/AxelrodHG11, author = {Amittai Axelrod and Xiaodong He and Jianfeng Gao}, title = {Domain Adaptation via Pseudo In-Domain Data Selection}, pages = {355--362}, publisher = {Conference on Empirical Methods in Natural Language Processing}, year = {2011} } @inproceedings{DBLP:conf/wmt/AxelrodRHO15, author = {Amittai Axelrod and Philip Resnik and Xiaodong He and Mari Ostendorf}, title = {Data Selection With Fewer Words}, pages = {58--65}, publisher = {Conference on Empirical Methods in Natural Language Processing}, year = {2015} } @inproceedings{DBLP:conf/emnlp/WangULCS17, author = {Rui Wang and Masao Utiyama and Lemao Liu and Kehai Chen and Eiichiro Sumita}, title = {Instance Weighting for Neural Machine Translation Domain Adaptation}, pages = {1482--1488}, publisher = {Conference on Empirical Methods in Natural Language Processing}, year = {2017} } @inproceedings{DBLP:conf/iwslt/MansourWN11, author = {Saab Mansour and Joern Wuebker and Hermann Ney}, title = {Combining translation and language model scoring for domain-specific data filtering}, pages = {222--229}, publisher = {International Workshop on Spoken Language Translation}, year = {2011} } @inproceedings{DBLP:conf/conll/ChenH16, author = {Boxing Chen and Fei Huang}, title = {Semi-supervised Convolutional Networks for Translation Adaptation with Tiny Amount of In-domain Data}, pages = {314--323}, publisher = {The SIGNLL Conference on Computational Natural Language Learning}, year = {2016} } @inproceedings{chen2016bilingual, title={Bilingual methods for adaptive training data selection for machine translation}, author={Chen, Boxing and Kuhn, Roland and Foster, George and Cherry, Colin and Huang, Fei}, publisher={Association for Machine Translation in the Americas}, pages={93--103}, year={2016} } @inproceedings{DBLP:conf/aclnmt/ChenCFL17, author = {Boxing Chen and Colin Cherry and George F. Foster and Samuel Larkin}, title = {Cost Weighting for Neural Machine Translation Domain Adaptation}, pages = {40--46}, publisher = {Annual Meeting of the Association for Computational Linguistics}, year = {2017} } @inproceedings{DBLP:conf/wmt/DumaM17, author = {Mirela-Stefania Duma and Wolfgang Menzel}, title = {Automatic Threshold Detection for Data Selection in Machine Translation}, pages = {483--488}, publisher = {Proceedings of the Second Conference on Machine Translation}, year = {2017} } @inproceedings{DBLP:conf/wmt/BiciciY11, author = {Ergun Bi{\c{c}}ici and Deniz Yuret}, title = {Instance Selection for Machine Translation using Feature Decay Algorithms}, pages = {272--283}, publisher = {Proceedings of the Sixth Workshop on Statistical Machine Translation}, year = {2011} } @inproceedings{poncelas2018feature, title={Feature decay algorithms for neural machine translation}, author={Poncelas, Alberto and Maillette de Buy Wenniger, Gideon and Way, Andy}, year={2018}, publisher={European Association for Machine Translation} } @inproceedings{DBLP:conf/acl/SotoSPW20, author = {Xabier Soto and Dimitar Sht. Shterionov and Alberto Poncelas and Andy Way}, title = {Selecting Backtranslated Data from Multiple Sources for Improved Neural Machine Translation}, pages = {3898--3908}, publisher = {Annual Meeting of the Association for Computational Linguistics}, year = {2020} } @inproceedings{DBLP:journals/corr/abs-1811-03039, author = {Alberto Poncelas and Gideon Maillette de Buy Wenniger and Andy Way}, title = {Data Selection with Feature Decay Algorithms Using an Approximated Target Side}, publisher = {CoRR}, volume = {abs/1811.03039}, year = {2018} } @inproceedings{DBLP:conf/emnlp/WeesBM17, author = {Marlies van der Wees and Arianna Bisazza and Christof Monz}, title = {Dynamic Data Selection for Neural Machine Translation}, pages = {1400--1410}, publisher = {Conference on Empirical Methods in Natural Language Processing}, year = {2017} } @inproceedings{DBLP:conf/wmt/WangWHNC18, author = {Wei Wang and Taro Watanabe and Macduff Hughes and Tetsuji Nakagawa and Ciprian Chelba}, title = {Denoising Neural Machine Translation Training with Trusted Data and Online Data Selection}, pages = {133--143}, publisher = {Proceedings of the Third Conference on Machine Translation}, year = {2018} } @inproceedings{DBLP:conf/acl/WangUS18, author = {Rui Wang and Masao Utiyama and Eiichiro Sumita}, title = {Dynamic Sentence Sampling for Efficient Training of Neural Machine Translation}, pages = {298--304}, publisher = {Annual Meeting of the Association for Computational Linguistics}, year = {2018} } @inproceedings{DBLP:conf/aclnmt/KhayrallahK18, author = {Huda Khayrallah and Philipp Koehn}, title = {On the Impact of Various Types of Noise on Neural Machine Translation}, pages = {74--83}, publisher = {Annual Meeting of the Association for Computational Linguistics}, year = {2018} } @inproceedings{DBLP:conf/coling/FormigaF12, author = {Llu{\'{\i}}s Formiga and Jos{\'{e}} A. R. Fonollosa}, title = {Dealing with Input Noise in Statistical Machine Translation}, pages = {319--328}, publisher = {International Conference on Computational Linguistics}, year = {2012} } @inproceedings{DBLP:conf/acl/CuiZLLZ13, author = {Lei Cui and Dongdong Zhang and Shujie Liu and Mu Li and Ming Zhou}, title = {Bilingual Data Cleaning for {SMT} using Graph-based Random Walk}, pages = {340--345}, publisher = {Annual Meeting of the Association for Computational Linguistics}, year = {2013} } @phdthesis{DBLP:phd/dnb/Mediani17, author = {Mohammed Mediani}, title = {Learning from Noisy Data in Statistical Machine Translation}, school = {Karlsruhe Institute of Technology, Germany}, year = {2017} } @inproceedings{rarrick2011mt, title={MT detection in web-scraped parallel corpora}, author={Rarrick, Spencer and Quirk, Chris and Lewis, Will}, publisher={Machine Translation}, pages={422--430}, year={2011} } @inproceedings{taghipour2011parallel, title={Parallel corpus refinement as an outlier detection algorithm}, author={Taghipour, Kaveh and Khadivi, Shahram and Xu, Jia}, publisher={Machine Translation}, pages={414--421}, year={2011} } @inproceedings{Xu2017ZipporahAF, title={Zipporah: a Fast and Scalable Data Cleaning System for Noisy Web-Crawled Parallel Corpora}, author={Hainan Xu and Philipp Koehn}, booktitle={Conference on Empirical Methods in Natural Language Processing}, year={2017} } @inproceedings{DBLP:conf/aclnmt/CarpuatVN17, author = {Marine Carpuat and Yogarshi Vyas and Xing Niu}, title = {Detecting Cross-Lingual Semantic Divergence for Neural Machine Translation}, pages = {69--79}, publisher = {Annual Meeting of the Association for Computational Linguistics}, year = {2017} } @inproceedings{DBLP:conf/naacl/VyasNC18, author = {Yogarshi Vyas and Xing Niu and Marine Carpuat}, title = {Identifying Semantic Divergences in Parallel Text without Annotations}, pages = {1503--1515}, publisher = {Annual Conference of the North American Chapter of the Association for Computational Linguistics}, year = {2018} } @inproceedings{DBLP:conf/acl/WangCC19, author = {Wei Wang and Isaac Caswell and Ciprian Chelba}, title = {Dynamically Composing Domain-Data Selection with Clean-Data Selection by "Co-Curricular Learning" for Neural Machine Translation}, pages = {1282--1292}, publisher = {Annual Meeting of the Association for Computational Linguistics}, year = {2019} } @inproceedings{DBLP:conf/emnlp/SettlesC08, author = {Burr Settles and Mark Craven}, title = {An Analysis of Active Learning Strategies for Sequence Labeling Tasks}, pages = {1070--1079}, publisher = {Conference on Empirical Methods in Natural Language Processing}, year = {2008} } @inproceedings{campbell2000query, title={Query learning with large margin classifiers}, author={Campbell, Colin and Cristianini, Nello and Smola, Alex and others}, publisher={International Conference on Machine Learning}, volume={20}, number={0}, pages={0}, year={2000} } @inproceedings{DBLP:conf/icml/SchohnC00, author = {Greg Schohn and David Cohn}, title = {Less is More: Active Learning with Support Vector Machines}, pages = {839--846}, publisher = {International Conference on Machine Learning}, year = {2000} } @inproceedings{DBLP:conf/colt/SeungOS92, author = {H. Sebastian Seung and Manfred Opper and Haim Sompolinsky}, title = {Query by Committee}, pages = {287--294}, publisher = {Conference on Computational Learning Theory}, year = {1992} } @book{mitchell1996m, title={Machine Learning}, author={Mitchell, Tom}, journal={McCraw Hill}, year={1996} } @inproceedings{DBLP:conf/icml/AbeM98, author = {Naoki Abe and Hiroshi Mamitsuka}, title = {Query Learning Strategies Using Boosting and Bagging}, pages = {1--9}, publisher = {International Conference on Machine Learning}, year = {1998} } @inproceedings{mccallumzy1998employing, title={Employing EM and pool-based active learning for text classification}, author={McCallumzy, Andrew Kachites and Nigamy, Kamal}, publisher={International Conference on Machine Learning}, pages={359--367}, year={1998} } @inproceedings{DBLP:conf/cvpr/DalalT05, author = {Navneet Dalal and Bill Triggs}, title = {Histograms of Oriented Gradients for Human Detection}, pages = {886--893}, publisher = {{IEEE} Conference on Computer Vision and Pattern Recognition}, year = {2005} } @inproceedings{726791, author={Yann {Lecun} and Leon {Bottou} and Yoshua {Bengio} and Patrick {Haffner}}, publisher={Proceedings of the IEEE}, title={Gradient-based learning applied to document recognition}, year={1998}, volume={86}, number={11}, pages={2278-2324} } @book{atkinson2007optimum, title={Optimum experimental designs, with SAS}, author={Atkinson, Anthony and Donev, Alexander and Tobias, Randall and others}, volume={34}, year={2007}, publisher={Oxford University Press} } @inproceedings{DBLP:journals/jmlr/JiH12, author = {Ming Ji and Jiawei Han}, title = {A Variance Minimization Criterion to Active Learning on Graphs}, series = {{JMLR} Proceedings}, volume = {22}, pages = {556--564}, publisher = {International Conference on Artificial Intelligence and Statistics}, year = {2012} } @inproceedings{DBLP:journals/corr/ZhuB17, author = {Jia-Jie Zhu and Jos{\'{e}} Bento}, title = {Generative Adversarial Active Learning}, publisher = {CoRR}, volume = {abs/1702.07956}, year = {2017} } @inproceedings{DBLP:conf/iccv/HuijserG17, author = {Miriam W. Huijser and Jan C. van Gemert}, title = {Active Decision Boundary Annotation with Deep Generative Models}, pages = {5296--5305}, publisher = {{IEEE} International Conference on Computer Vision}, year = {2017} } @inproceedings{DBLP:conf/wacv/0007T20, author = {Christoph Mayer and Radu Timofte}, title = {Adversarial Sampling for Active Learning}, pages = {3060--3068}, publisher = {{IEEE} Winter Conference on Applications of Computer Vision}, year = {2020} } @inproceedings{DBLP:conf/acl/JeanCMB15, author = {S{\'{e}}bastien Jean and KyungHyun Cho and Roland Memisevic and Yoshua Bengio}, title = {On Using Very Large Target Vocabulary for Neural Machine Translation}, pages = {1--10}, publisher = {Annual Meeting of the Association for Computational Linguistics}, year = {2015} } @inproceedings{2015OnGulcehre, title = {On Using Monolingual Corpora in Neural Machine Translation}, author = {Gulcehre Caglar and Firat Orhan and Xu Kelvin and Cho Kyunghyun and Barrault Loic and Lin Huei Chi and Bougares Fethi and Schwenk Holger and Bengio Yoshua}, publisher = {Computer Science}, year = {2015}, } @inproceedings{Sennrich2016ImprovingNM, author = {Rico Sennrich and Barry Haddow and Alexandra Birch}, title = {Improving Neural Machine Translation Models with Monolingual Data}, publisher = {Annual Meeting of the Association for Computational Linguistics}, year = {2016} } @inproceedings{DBLP:conf/aaai/Zhang0LZC18, author = {Zhirui Zhang and Shujie Liu and Mu Li and Ming Zhou and Enhong Chen}, title = {Joint Training for Neural Machine Translation Models with Monolingual Data}, pages = {555--562}, publisher = {AAAI Conference on Artificial Intelligence}, year = {2018} } @inproceedings{hoang2018iterative, title={Iterative back-translation for neural machine translation}, author={Hoang, Vu Cong Duy and Koehn, Philipp and Haffari, Gholamreza and Cohn, Trevor}, publisher={Proceedings of the 2nd Workshop on Neural Machine Translation and Generation}, pages={18--24}, year={2018} } @inproceedings{DBLP:conf/mtsummit/ImankulovaDFI19, author = {Aizhan Imankulova and Raj Dabre and Atsushi Fujita and Kenji Imamura}, title = {Exploiting Out-of-Domain Parallel Data through Multilingual Transfer Learning for Low-Resource Neural Machine Translation}, pages = {128--139}, publisher = {Machine Translation}, year = {2019} } @inproceedings{DBLP:conf/emnlp/CurreyH19, author = {Anna Currey and Kenneth Heafield}, title = {Zero-Resource Neural Machine Translation with Monolingual Pivot Data}, pages = {99--107}, publisher = {Conference on Empirical Methods in Natural Language Processing}, year = {2019} } @inproceedings{DBLP:conf/emnlp/KimPPKN19, author = {Yunsu Kim and Petre Petrov and Pavel Petrushkov and Shahram Khadivi and Hermann Ney}, title = {Pivot-based Transfer Learning for Neural Machine Translation between Non-English Languages}, pages = {866--876}, publisher = {Annual Meeting of the Association for Computational Linguistics}, year = {2019} } @inproceedings{DBLP:conf/iclr/LampleCDR18, author = {Guillaume Lample and Alexis Conneau and Ludovic Denoyer and Marc'Aurelio Ranzato}, title = {Unsupervised Machine Translation Using Monolingual Corpora Only}, publisher = {International Conference on Learning Representations}, year = {2018} } @inproceedings{DBLP:conf/iclr/ArtetxeLAC18, author = {Mikel Artetxe and Gorka Labaka and Eneko Agirre and Kyunghyun Cho}, title = {Unsupervised Neural Machine Translation}, publisher = {International Conference on Learning Representations}, year = {2018} } %下面的publisher在看看 @inproceedings{DBLP:conf/conll/LiuBH18, author = {Ming Liu and Wray L. Buntine and Gholamreza Haffari}, title = {Learning to Actively Learn Neural Machine Translation}, pages = {334--344}, publisher = {The SIGNLL Conference on Computational Natural Language Learning}, year = {2018} } @inproceedings{DBLP:conf/emnlp/ZhaoZZZ20, author = {Yuekai Zhao and Haoran Zhang and Shuchang Zhou and Zhihua Zhang}, title = {Active Learning Approaches to Enhancing Neural Machine Translation: An Empirical Study}, pages = {1796--1806}, publisher = {Conference on Empirical Methods in Natural Language Processing}, year = {2020} } @inproceedings{Peris2018ActiveLF, title={Active Learning for Interactive Neural Machine Translation of Data Streams}, author={{\'A}lvaro Peris and Francisco Casacuberta}, publisher={The SIGNLL Conference on Computational Natural Language Learning}, pages={151--160}, year={2018} } @inproceedings{DBLP:journals/pbml/TurchiNFF17, author = {Marco Turchi and Matteo Negri and M. Amin Farajian and Marcello Federico}, title = {Continuous Learning from Human Post-Edits for Neural Machine Translation}, publisher = {The Prague Bulletin of Mathematical Linguistics}, volume = {108}, pages = {233--244}, year = {2017} } @inproceedings{DBLP:journals/csl/PerisC19, author = {{\'{A}}lvaro Peris and Francisco Casacuberta}, title = {Online learning for effort reduction in interactive neural machine translation}, publisher = {Computer Speech Language}, volume = {58}, pages = {98--126}, year = {2019} } @inproceedings{DBLP:conf/eccv/GuoHZZDSH18, author = {Sheng Guo and Weilin Huang and Haozhi Zhang and Chenfan Zhuang and Dengke Dong and Matthew R. Scott and Dinglong Huang}, title = {CurriculumNet: Weakly Supervised Learning from Large-Scale Web Images}, series = {Lecture Notes in Computer Science}, volume = {11214}, pages = {139--154}, publisher = {European Conference on Computer Vision}, year = {2018} } @inproceedings{DBLP:conf/mm/JiangMMH14, author = {Lu Jiang and Deyu Meng and Teruko Mitamura and Alexander G. Hauptmann}, title = {Easy Samples First: Self-paced Reranking for Zero-Example Multimedia Search}, pages = {547--556}, publisher = {ACM International Conference on Multimedia}, year = {2014} } %下面的pubisher @inproceedings{DBLP:conf/naacl/PlataniosSNPM19, author = {Emmanouil Antonios Platanios and Otilia Stretcu and Graham Neubig and Barnab{\'{a}}s P{\'{o}}czos and Tom M. Mitchell}, title = {Competence-based Curriculum Learning for Neural Machine Translation}, pages = {1162--1172}, publisher = {Conference of the North American Chapter of the Association for Computational Linguistics: Human Language Technologies}, year = {2019} } @inproceedings{DBLP:conf/acl/TayWLFPYRHZ19, author = {Yi Tay and Shuohang Wang and Anh Tuan Luu and Jie Fu and Minh C. Phan and Xingdi Yuan and Jinfeng Rao and Siu Cheung Hui and Aston Zhang}, title = {Simple and Effective Curriculum Pointer-Generator Networks for Reading Comprehension over Long Narratives}, pages = {4922--4931}, publisher = {Annual Meeting of the Association for Computational Linguistics}, year = {2019} } @inproceedings{DBLP:conf/icml/GuoCZZ0HT20, author = {Yong Guo and Yaofo Chen and Yin Zheng and Peilin Zhao and Jian Chen and Junzhou Huang and Mingkui Tan}, title = {Breaking the Curse of Space Explosion: Towards Efficient {NAS} with Curriculum Search}, series = {Proceedings of Machine Learning Research}, volume = {119}, pages = {3822--3831}, publisher = {International Conference on Machine Learning}, year = {2020} } @inproceedings{DBLP:conf/ranlp/KocmiB17, author = {Tom Kocmi and Ondrej Bojar}, title = {Curriculum Learning and Minibatch Bucketing in Neural Machine Translation}, pages = {379--386}, publisher = {International Conference Recent Advances in Natural Language Processing}, year = {2017} } @inproceedings{DBLP:conf/naacl/ZhangSKMCD19, author = {Xuan Zhang and Pamela Shapiro and Gaurav Kumar and Paul McNamee and Marine Carpuat and Kevin Duh}, title = {Curriculum Learning for Domain Adaptation in Neural Machine Translation}, pages = {1903--1915}, publisher = {Annual Conference of the North American Chapter of the Association for Computational Linguistics}, year = {2019} } @inproceedings{zhang2018empirical, title={An empirical exploration of curriculum learning for neural machine translation}, author={Zhang, Xuan and Kumar, Gaurav and Khayrallah, Huda and Murray, Kenton and Gwinnup, Jeremy and Martindale, Marianna J and McNamee, Paul and Duh, Kevin and Carpuat, Marine}, publisher={arXiv preprint arXiv:1811.00739}, year={2018} } @inproceedings{DBLP:conf/coling/XuHJFWHJXZ20, author = {Chen Xu and Bojie Hu and Yufan Jiang and Kai Feng and Zeyang Wang and Shen Huang and Qi Ju and Tong Xiao and Jingbo Zhu}, title = {Dynamic Curriculum Learning for Low-Resource Neural Machine Translation}, pages = {3977--3989}, publisher = {International Committee on Computational Linguistics}, year = {2020} } @inproceedings{DBLP:conf/acl/ZhouYWWC20, author = {Yikai Zhou and Baosong Yang and Derek F. Wong and Yu Wan and Lidia S. Chao}, title = {Uncertainty-Aware Curriculum Learning for Neural Machine Translation}, pages = {6934--6944}, publisher = {Annual Meeting of the Association for Computational Linguistics}, year = {2020} } @inproceedings{DBLP:conf/aaai/ZhaoWNW20, author = {Mingjun Zhao and Haijiang Wu and Di Niu and Xiaoli Wang}, title = {Reinforced Curriculum Learning on Pre-Trained Neural Machine Translation Models}, pages = {9652--9659}, publisher = {AAAI Conference on Artificial Intelligence}, year = {2020} } @inproceedings{DBLP:conf/cvpr/PentinaSL15, author = {Anastasia Pentina and Viktoriia Sharmanska and Christoph H. Lampert}, title = {Curriculum learning of multiple tasks}, pages = {5492--5500}, publisher = {IEEE Conference on Computer Vision and Pattern Recognition}, year = {2015} } @inproceedings{DBLP:conf/iccvw/SarafianosGNK17, author = {Nikolaos Sarafianos and Theodore Giannakopoulos and Christophoros Nikou and Ioannis A. Kakadiaris}, title = {Curriculum Learning for Multi-task Classification of Visual Attributes}, pages = {2608--2615}, publisher = {IEEE International Conference on Computer Vision}, year = {2017} } @inproceedings{DBLP:conf/nips/ChangLM17, author = {Haw-Shiuan Chang and Erik G. Learned-Miller and Andrew McCallum}, title = {Active Bias: Training More Accurate Neural Networks by Emphasizing High Variance Samples}, publisher = {Annual Conference on Neural Information Processing Systems}, pages = {1002--1012}, year = {2017} } %ieee加{ @inproceedings{DBLP:journals/pami/LiH18a, author = {Zhizhong Li and Derek Hoiem}, title = {Learning without Forgetting}, publisher = {IEEE Transactions on Pattern Analysis and Machine Intelligence}, volume = {40}, number = {12}, pages = {2935--2947}, year = {2018} } @inproceedings{rusu2016progressive, title={Progressive neural networks}, author={Rusu, Andrei A and Rabinowitz, Neil C and Desjardins, Guillaume and Soyer, Hubert and Kirkpatrick, James and Kavukcuoglu, Koray and Pascanu, Razvan and Hadsell, Raia}, publisher={arXiv preprint arXiv:1606.04671}, year={2016} } @inproceedings{DBLP:journals/corr/FernandoBBZHRPW17, author = {Chrisantha Fernando and Dylan Banarse and Charles Blundell and Yori Zwols and David Ha and Andrei A. Rusu and Alexander Pritzel and Daan Wierstra}, title = {PathNet: Evolution Channels Gradient Descent in Super Neural Networks}, publisher = {CoRR}, volume = {abs/1701.08734}, year = {2017} } @inproceedings{DBLP:conf/naacl/ThompsonGKDK19, author = {Brian Thompson and Jeremy Gwinnup and Huda Khayrallah and Kevin Duh and Philipp Koehn}, title = {Overcoming Catastrophic Forgetting During Domain Adaptation of Neural Machine Translation}, pages = {2062--2068}, publisher = {Annual Meeting of the Association for Computational Linguistics}, year = {2019} } @inproceedings{DBLP:conf/coling/GuF20, author = {Shuhao Gu and Yang Feng}, title = {Investigating Catastrophic Forgetting During Continual Training for Neural Machine Translation}, pages = {4315--4326}, publisher = {International Committee on Computational Linguistics}, year = {2020} } @inproceedings{DBLP:conf/cvpr/RebuffiKSL17, author = {Sylvestre-Alvise Rebuffi and Alexander Kolesnikov and Georg Sperl and Christoph H. Lampert}, title = {iCaRL: Incremental Classifier and Representation Learning}, pages = {5533--5542}, publisher = {IEEE Conference on Computer Vision and Pattern Recognition}, year = {2017} } @inproceedings{DBLP:conf/eccv/CastroMGSA18, author = {Francisco M. Castro and Manuel J. Mar{\'{\i}}n-Jim{\'{e}}nez and Nicol{\'{a}}s Guil and Cordelia Schmid and Karteek Alahari}, title = {End-to-End Incremental Learning}, series = {Lecture Notes in Computer Science}, volume = {11216}, pages = {241--257}, publisher = {European Conference on Computer Vision}, year = {2018} } @inproceedings{Bengio2015ScheduledSF, title={Scheduled Sampling for Sequence Prediction with Recurrent Neural Networks}, author={Samy Bengio and Oriol Vinyals and Navdeep Jaitly and Noam Shazeer}, publisher = {Annual Conference on Neural Information Processing Systems}, pages = {1171--1179}, year = {2015} } @inproceedings{Bengio2015ScheduledSF, title={Scheduled Sampling for Sequence Prediction with Recurrent Neural Networks}, author={Samy Bengio and Oriol Vinyals and Navdeep Jaitly and Noam Shazeer}, publisher = {Annual Conference on Neural Information Processing Systems}, pages = {1171--1179}, year = {2015} } @inproceedings{Ranzato2016SequenceLT, title={Sequence Level Training with Recurrent Neural Networks}, author={Marc'Aurelio Ranzato and Sumit Chopra and Michael Auli and Wojciech Zaremba}, publisher={International Conference on Learning Representations}, year={2016} } @inproceedings{DBLP:conf/nips/GoodfellowPMXWOCB14, author = {Ian J. Goodfellow and Jean Pouget-Abadie and Mehdi Mirza and Bing Xu and David Warde-Farley and Sherjil Ozair and Aaron C. Courville and Yoshua Bengio}, title = {Generative Adversarial Nets}, publisher = {Conference on Neural Information Processing Systems}, pages = {2672--2680}, year = {2014} } @inproceedings{DBLP:conf/acl/ShenCHHWSL16, author = {Shiqi Shen and Yong Cheng and Zhongjun He and Wei He and Hua Wu and Maosong Sun and Yang Liu}, title = {Minimum Risk Training for Neural Machine Translation}, publisher = {Annual Meeting of the Association for Computational Linguistics}, year = {2016}, } @inproceedings{DBLP:conf/acl/PapineniRWZ02, author = {Kishore Papineni and Salim Roukos and Todd Ward and Wei-jing Zhu}, title = {Bleu: a Method for Automatic Evaluation of Machine Translation}, pages = {311--318}, publisher = {Annual Meeting of the Association for Computational Linguistics}, year = {2002} } @inproceedings{doddington2002automatic, title={Automatic evaluation of machine translation quality using n-gram co-occurrence statistics}, publisher={Proceedings of the second international conference on Human Language Technology Research}, author={Doddington, George}, pages={138--145}, year={2002} } @inproceedings{snover2006study, title={A study of translation edit rate with targeted human annotation}, author={Snover, Matthew and Dorr, Bonnie and Schwartz, Richard and Micciulla, Linnea and Makhoul, John}, publisher={Proceedings of association for machine translation in the Americas}, volume={200}, number={6}, year={2006} } @inproceedings{lavie2009meteor, title={The METEOR metric for automatic evaluation of machine translation}, author={Lavie, Alon and Denkowski, Michael J}, publisher={Machine translation}, volume={23}, number={2-3}, pages={105--115}, year={2009} } @inproceedings{bahdanau2014neural, author = {Dzmitry Bahdanau and Kyunghyun Cho and Yoshua Bengio}, title = {Neural Machine Translation by Jointly Learning to Align and Translate}, publisher = {International Conference on Learning Representations}, year = {2015} } @inproceedings{koehn2003statistical, author = {Philipp Koehn and Franz Josef Och and Daniel Marcu}, title = {Statistical Phrase-Based Translation}, publisher = {Annual Meeting of the Association for Computational Linguistics}, year = {2003} } @inproceedings{smith2006minimum, author = {David A. Smith and Jason Eisner}, title = {Minimum Risk Annealing for Training Log-Linear Models}, publisher = {Annual Meeting of the Association for Computational Linguistics}, year = {2006} } @inproceedings{he2012maximum, title={Maximum expected bleu training of phrase and lexicon translation models}, author={He, Xiaodong and Deng, Li}, publisher={Annual Meeting of the Association for Computational Linguistics}, pages={292--301}, year={2012} } @inproceedings{DBLP:conf/acl/GaoHYD14, author = {Jianfeng Gao and Xiaodong He and Wen-tau Yih and Li Deng}, title = {Learning Continuous Phrase Representations for Translation Modeling}, pages = {699--709}, publisher = {Annual Meeting of the Association for Computational Linguistics}, year = {2014} } @book{sutton2018reinforcement, title={Reinforcement learning: An introduction}, author={Richard S. Sutton and Andrew G. Barto}, year={2018}, publisher={MIT press} } @inproceedings{DBLP:journals/nature/SilverHMGSDSAPL16, author = {David Silver and Aja Huang and Chris J. Maddison and Arthur Guez and Laurent Sifre and George van den Driessche and Julian Schrittwieser and Ioannis Antonoglou and Vedavyas Panneershelvam and Marc Lanctot and Sander Dieleman and Dominik Grewe and John Nham and Nal Kalchbrenner and Ilya Sutskever and Timothy P. Lillicrap and Madeleine Leach and Koray Kavukcuoglu and Thore Graepel and Demis Hassabis}, title = {Mastering the game of Go with deep neural networks and tree search}, publisher = {Nature}, volume = {529}, number = {7587}, pages = {484--489}, year = {2016} } @inproceedings{DBLP:conf/icml/ZarembaMJF16, author = {Wojciech Zaremba and Tomas Mikolov and Armand Joulin and Rob Fergus}, title = {Learning Simple Algorithms from Examples}, series = {{JMLR} Workshop and Conference Proceedings}, volume = {48}, pages = {421--429}, publisher = {International Conference on Machine Learning}, year = {2016} } @inproceedings{DBLP:conf/icml/NgHR99, author = {Andrew Y. Ng and Daishi Harada and Stuart J. Russell}, title = {Policy Invariance Under Reward Transformations: Theory and Application to Reward Shaping}, pages = {278--287}, publisher = {International Conference on Machine Learning}, year = {1999} } @inproceedings{DBLP:conf/iclr/BahdanauBXGLPCB17, author = {Dzmitry Bahdanau and Philemon Brakel and Kelvin Xu and Anirudh Goyal and Ryan Lowe and Joelle Pineau and Aaron C. Courville and Yoshua Bengio}, title = {An Actor-Critic Algorithm for Sequence Prediction}, publisher = {International Conference on Learning Representations}, year = {2017} } @inproceedings{DBLP:journals/nature/LeCunBH15, author = {Yann LeCun and Yoshua Bengio and Geoffrey E. Hinton}, title = {Deep learning}, publisher = {Nature}, volume = {521}, number = {7553}, pages = {436--444}, year = {2015} } @inproceedings{DBLP:journals/corr/abs-1207-0580, author = {Geoffrey E. Hinton and Nitish Srivastava and Alex Krizhevsky and Ilya Sutskever and Ruslan Salakhutdinov}, title = {Improving neural networks by preventing co-adaptation of feature detectors}, publisher = {CoRR}, volume = {abs/1207.0580}, year = {2012} } @inproceedings{DBLP:journals/tslp/ZhuM12, author = {Jingbo Zhu and Matthew Y. Ma}, title = {Uncertainty-based active learning with instability estimation for text classification}, publisher = {ACM Transactions on Speech and Language Processing}, volume = {8}, number = {4}, pages = {5:1--5:21}, year = {2012} } @inproceedings{DBLP:conf/coling/ZhuWYT08, author = {Jingbo Zhu and Huizhen Wang and Tianshun Yao and Benjamin K. Tsou}, title = {Active Learning with Sampling by Uncertainty and Density for Word Sense Disambiguation and Text Classification}, publisher = {International Conference on Computational Linguistics}, pages = {1137--1144}, year = {2008} } @inproceedings{DBLP:conf/medprai/SurendranathJ18, author = {Ajay Surendranath and Dinesh Babu Jayagopi}, title = {Curriculum Learning for Depth Estimation with Deep Convolutional Neural Networks}, publisher = {Mediterranean Conference on Pattern Recognition and Artificial Intelligence}, pages = {95--100}, year = {2018} } @inproceedings{DBLP:conf/icml/BengioLCW09, author = {Yoshua Bengio and J{\'{e}}r{\^{o}}me Louradour and Ronan Collobert and Jason Weston}, title = {Curriculum learning}, series = {{ACM} International Conference Proceeding Series}, volume = {382}, pages = {41--48}, publisher = {International Conference on Machine Learning} } @inproceedings{DBLP:journals/corr/abs-2002-11794, author = {Zhuohan Li and Eric Wallace and Sheng Shen and Kevin Lin and Kurt Keutzer and Dan Klein and Joseph E. Gonzalez}, title = {Train Large, Then Compress: Rethinking Model Size for Efficient Training and Inference of Transformers}, publisher = {CoRR}, volume = {abs/2002.11794}, year = {2020} } @inproceedings{kim-rush-2016-sequence, author = {Yoon Kim and Alexander M. Rush}, title = {Sequence-Level Knowledge Distillation}, pages = {1317--1327}, publisher = {Conference on Empirical Methods in Natural Language Processing}, year = {2016} } @inproceedings{Jiao2020TinyBERTDB, author = {Xiaoqi Jiao and Yichun Yin and Lifeng Shang and Xin Jiang and Xiao Chen and Linlin Li and Fang Wang and Qun Liu}, title = {TinyBERT: Distilling {BERT} for Natural Language Understanding}, pages = {4163--4174}, publisher={Conference on Empirical Methods in Natural Language Processing}, year={2020} } %%%%% chapter 13------------------------------------------------------ %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% %%%%% chapter 14------------------------------------------------------ @inproceedings{Koehn2007Moses, author = {Philipp Koehn and Hieu Hoang and Alexandra Birch and Chris Callison-Burch and Marcello Federico and Nicola Bertoldi and Brooke Cowan and Wade Shen and Christine Moran and Richard Zens and Chris Dyer and Ondrej Bojar and Alexandra Constantin and Evan Herbst}, title = {Moses: Open Source Toolkit for Statistical Machine Translation}, publisher = {Annual Meeting of the Association for Computational Linguistics}, year = {2007} } @inproceedings{DBLP:conf/amta/Koehn04, author = {Philipp Koehn}, title = {Pharaoh: {A} Beam Search Decoder for Phrase-Based Statistical Machine Translation Models}, volume = {3265}, pages = {115--124}, publisher = { Association for Machine Translation in the Americas}, year = {2004} } @inproceedings{DBLP:conf/emnlp/StahlbergHSB17, author = {Felix Stahlberg and Eva Hasler and Danielle Saunders and Bill Byrne}, title = {SGNMT - A Flexible NMT Decoding Platform for Quick Prototyping of New Models and Search Strategies}, pages = {25--30}, publisher = {Conference on Empirical Methods in Natural Language Processing}, year = {2017} } @inproceedings{Liu2016AgreementOT, title={Agreement on Target-bidirectional Neural Machine Translation}, author={Lemao Liu and Masao Utiyama and Andrew M. Finch and Eiichiro Sumita}, pages = {411--416}, publisher = { Annual Conference of the North American Chapter of the Association for Computational Linguistics}, year = {2016} } @inproceedings{DBLP:conf/wmt/LiLXLLLWZXWFCLL19, author = {Bei Li and Yinqiao Li and Chen Xu and Ye Lin and Jiqiang Liu and Hui Liu and Ziyang Wang and Yuhao Zhang and Nuo Xu and Zeyang Wang and Kai Feng and Hexuan Chen and Tengbo Liu and Yanyang Li and Qiang Wang and Tong Xiao and Jingbo Zhu}, title = {The NiuTrans Machine Translation Systems for {WMT19}}, pages = {257--266}, publisher = {Annual Meeting of the Association for Computational Linguistics}, year = {2019} } @inproceedings{DBLP:conf/wmt/SennrichHB16, author = {Rico Sennrich and Barry Haddow and Alexandra Birch}, title = {Edinburgh Neural Machine Translation Systems for {WMT} 16}, pages = {371--376}, publisher = {Annual Meeting of the Association for Computational Linguistics}, year = {2016} } @inproceedings{Stahlberg2018TheUO, title={The University of Cambridge's Machine Translation Systems for WMT18}, author={Felix Stahlberg and Adri{\`{a}} de Gispert and Bill Byrne}, pages = {504--512}, publisher = {Annual Meeting of the Association for Computational Linguistics}, year = {2018} } @inproceedings{DBLP:conf/aaai/ZhangSQLJW18, author = {Xiangwen Zhang and Jinsong Su and Yue Qin and Yang Liu and Rongrong Ji and Hongji Wang}, title = {Asynchronous Bidirectional Decoding for Neural Machine Translation}, pages = {5698--5705}, publisher = { AAAI Conference on Artificial Intelligence}, year = {2018} } @inproceedings{Li2017EnhancedNM, title={Enhanced neural machine translation by learning from draft}, author={Aodong Li and Shiyue Zhang and Dong Wang and Thomas Fang Zheng}, publisher={IEEE Asia-Pacific Services Computing Conference}, year={2017}, pages={1583-1587} } @inproceedings{ElMaghraby2018EnhancingTF, title={Enhancing Translation from English to Arabic Using Two-Phase Decoder Translation}, author={Ayah ElMaghraby and Ahmed Rafea}, pages = {539--549}, publisher = {Intelligent Systems and Applications}, year = {2018} } @inproceedings{Geng2018AdaptiveMD, title={Adaptive Multi-pass Decoder for Neural Machine Translation}, author={Xinwei Geng and Xiaocheng Feng and Bing Qin and Ting Liu}, publisher ={Conference on Empirical Methods in Natural Language Processing}, pages={523--532}, year={2018} } @inproceedings{Lee2018DeterministicNN, title={Deterministic Non-Autoregressive Neural Sequence Modeling by Iterative Refinement}, author={Jason Lee and Elman Mansimov and Kyunghyun Cho}, pages = {1173--1182}, publisher = {Conference on Empirical Methods in Natural Language Processing}, year = {2018} } @inproceedings{Gu2019LevenshteinT, title={Levenshtein Transformer}, author={Jiatao Gu and Changhan Wang and Jake Zhao}, publisher = {Annual Conference on Neural Information Processing Systems}, pages = {11179--11189}, year = {2019}, } @inproceedings{Guo2020JointlyMS, title={Jointly Masked Sequence-to-Sequence Model for Non-Autoregressive Neural Machine Translation}, author={Junliang Guo and Linli Xu and Enhong Chen}, pages = {376--385}, publisher = {Annual Meeting of the Association for Computational Linguistics}, year = {2020} } @inproceedings{Stahlberg2018AnOS, title={An Operation Sequence Model for Explainable Neural Machine Translation}, author={Felix Stahlberg and Danielle Saunders and Bill Byrne}, pages = {175--186}, publisher = {Conference on Empirical Methods in Natural Language Processing}, year = {2018} } @inproceedings{Stern2019InsertionTF, title={Insertion Transformer: Flexible Sequence Generation via Insertion Operations}, author={Mitchell Stern and William Chan and Jamie Kiros and Jakob Uszkoreit}, publisher={International Conference on Machine Learning}, pages={5976--5985}, year={2019} } @inproceedings{stling2017NeuralMT, title={Neural machine translation for low-resource languages}, author={Robert {\"O}stling and J{\"{o}}rg Tiedemann}, publisher={CoRR}, year={2017}, volume={abs/1708.05729} } @inproceedings{Kikuchi2016ControllingOL, title={Controlling Output Length in Neural Encoder-Decoders}, author={Yuta Kikuchi and Graham Neubig and Ryohei Sasano and Hiroya Takamura and Manabu Okumura}, pages = {1328--1338}, publisher = {Conference on Empirical Methods in Natural Language Processing}, year = {2016} } @inproceedings{Takase2019PositionalET, title={Positional Encoding to Control Output Sequence Length}, author={Sho Takase and Naoaki Okazaki}, publisher={Annual Conference of the North American Chapter of the Association for Computational Linguistics}, pages={3999--4004}, year={2019} } @inproceedings{Murray2018CorrectingLB, title={Correcting Length Bias in Neural Machine Translation}, author={Kenton Murray and David Chiang}, pages = {212--223}, publisher = {Annual Meeting of the Association for Computational Linguistics}, year = {2018} } @inproceedings{Sountsov2016LengthBI, title={Length bias in Encoder Decoder Models and a Case for Global Conditioning}, author={Pavel Sountsov and Sunita Sarawagi}, pages = {1516--1525}, publisher = {Conference on Empirical Methods in Natural Language Processing}, year = {2016} } @inproceedings{Jean2015MontrealNM, title={Montreal Neural Machine Translation Systems for WMT'15}, author={S{\'{e}}bastien Jean and Orhan Firat and Kyunghyun Cho and Roland Memisevic and Yoshua Bengio}, publisher={Conference on Empirical Methods in Natural Language Processing}, pages={134--140}, year={2015} } @inproceedings{Yang2018OtemUtemOA, title={Otem{\&}Utem: Over- and Under-Translation Evaluation Metric for NMT}, author={Jing Yang and Biao Zhang and Yue Qin and Xiangwen Zhang and Qian Lin and Jinsong Su}, publisher={CCF International Conference on Natural Language Processing and Chinese Computing}, pages={291--302}, year={2018} } @inproceedings{Mi2016CoverageEM, title={Coverage Embedding Models for Neural Machine Translation}, author={Haitao Mi and Baskaran Sankaran and Zhiguo Wang and Abe Ittycheriah}, pages = {955--960}, publisher = {Conference on Empirical Methods in Natural Language Processing}, year = {2016} } @inproceedings{DBLP:conf/emnlp/HuangZM17, author = {Liang Huang and Kai Zhao and Mingbo Ma}, title = {When to Finish? Optimal Beam Search for Neural Text Generation (modulo beam size)}, pages = {2134--2139}, publisher = {Annual Meeting of the Association for Computational Linguistics}, year = {2017} } @inproceedings{Wiseman2016SequencetoSequenceLA, title={Sequence-to-Sequence Learning as Beam-Search Optimization}, author={Sam Wiseman and Alexander M. Rush}, publisher={Conference on Empirical Methods in Natural Language Processing}, pages={1296--1306}, year={2016} } @inproceedings{DBLP:conf/emnlp/Yang0M18, author = {Yilin Yang and Liang Huang and Mingbo Ma}, title = {Breaking the Beam Search Curse: {A} Study of (Re-)Scoring Methods and Stopping Criteria for Neural Machine Translation}, pages = {3054--3059}, publisher = {Annual Meeting of the Association for Computational Linguistics}, year = {2018} } @inproceedings{Ma2019LearningTS, title={Learning to Stop in Structured Prediction for Neural Machine Translation}, author={Mingbo Ma and Renjie Zheng and Liang Huang}, pages = {1884--1889}, publisher = { Annual Conference of the North American Chapter of the Association for Computational Linguistics}, year = {2019} } @inproceedings{KleinOpenNMT, author = {Guillaume Klein and Yoon Kim and Yuntian Deng and Jean Senellart and Alexander M. Rush}, title = {OpenNMT: Open-Source Toolkit for Neural Machine Translation}, pages = {67--72}, publisher = {Annual Meeting of the Association for Computational Linguistics}, year = {2017} } @inproceedings{bahdanau2014neural, author = {Dzmitry Bahdanau and Kyunghyun Cho and Yoshua Bengio}, title = {Neural Machine Translation by Jointly Learning to Align and Translate}, publisher = {International Conference on Learning Representations}, year = {2015} } @inproceedings{Jiang2012LearnedPF, title={Learned Prioritization for Trading Off Accuracy and Speed}, author={Jiarong Jiang and Adam R. Teichert and Hal Daum{\'e} and Jason Eisner}, publisher={Annual Conference on Neural Information Processing Systems}, pages={1340--1348}, year= {2012} } @inproceedings{Zheng2020OpportunisticDW, title={Opportunistic Decoding with Timely Correction for Simultaneous Translation}, author={Renjie Zheng and Mingbo Ma and Baigong Zheng and Kaibo Liu and Liang Huang}, publisher={Annual Meeting of the Association for Computational Linguistics}, pages={437--442}, year={2020} } @inproceedings{Ma2019STACLST, title={STACL: Simultaneous Translation with Implicit Anticipation and Controllable Latency using Prefix-to-Prefix Framework}, author={Mingbo Ma and Liang Huang and Hao Xiong and Renjie Zheng and Kaibo Liu and Baigong Zheng and Chuanqiang Zhang and Zhongjun He and Hairong Liu and Xing Li and Hua Wu and Haifeng Wang}, publisher={Annual Meeting of the Association for Computational Linguistics}, pages={3025--3036}, year={2019} } @inproceedings{Gimpel2013ASE, title={A Systematic Exploration of Diversity in Machine Translation}, author={Kevin Gimpel and Dhruv Batra and Chris Dyer and Gregory Shakhnarovich}, publisher={Conference on Empirical Methods in Natural Language Processing}, pages={1100--1111}, year={2013} } @inproceedings{Li2016MutualIA, title={Mutual Information and Diverse Decoding Improve Neural Machine Translation}, author={Jiwei Li and Dan Jurafsky}, publisher={CoRR}, year={2016}, volume={abs/1601.00372} } @inproceedings{Li2016ADO, title={A Diversity-Promoting Objective Function for Neural Conversation Models}, author={Jiwei Li and Michel Galley and Chris Brockett and Jianfeng Gao and Bill Dolan}, publisher={Annual Conference of the North American Chapter of the Association for Computational Linguistics}, pages={110--119}, year={2016} } @inproceedings{He2018SequenceTS, title={Sequence to Sequence Mixture Model for Diverse Machine Translation}, author={Xuanli He and Gholamreza Haffari and Mohammad Norouzi}, pages = {583--592}, publisher = {International Conference on Computational Linguistics}, year = {2018} } @inproceedings{Shen2019MixtureMF, title={Mixture Models for Diverse Machine Translation: Tricks of the Trade}, author={Tianxiao Shen and Myle Ott and Michael Auli and Marc'Aurelio Ranzato}, pages = {5719--5728}, publisher = {International Conference on Machine Learning}, year = {2019}, } @inproceedings{Wu2020GeneratingDT, title={Generating Diverse Translation from Model Distribution with Dropout}, author={Xuanfu Wu and Yang Feng and Chenze Shao}, pages={1088--1097}, publisher={Annual Meeting of the Association for Computational Linguistics}, year={2020} } @inproceedings{Sun2020GeneratingDT, title={Generating Diverse Translation by Manipulating Multi-Head Attention}, author={Zewei Sun and Shujian Huang and Hao Ran Wei and Xin Yu Dai and Jiajun Chen}, publisher={AAAI Conference on Artificial Intelligence}, pages={8976--8983}, year={2020} } @inproceedings{Vijayakumar2016DiverseBS, title={Diverse Beam Search: Decoding Diverse Solutions from Neural Sequence Models}, author={Ashwin K. Vijayakumar and Michael Cogswell and Ramprasaath R. Selvaraju and Qing Sun and Stefan Lee and David J. Crandall and Dhruv Batra}, publisher={CoRR}, year={2016}, volume={abs/1610.02424} } @inproceedings{Liu2014SearchAwareTF, title={Search-Aware Tuning for Machine Translation}, author={Lemao Liu and Liang Huang}, publisher={Conference on Empirical Methods in Natural Language Processing}, pages={1942--1952}, year={2014} } @inproceedings{Yu2013MaxViolationPA, title={Max-Violation Perceptron and Forced Decoding for Scalable MT Training}, author={Heng Yu and Liang Huang and Haitao Mi and Kai Zhao}, publisher={Conference on Empirical Methods in Natural Language Processing}, pages={1112--1123}, year={2013} } @inproceedings{Stahlberg2019OnNS, title={On NMT Search Errors and Model Errors: Cat Got Your Tongue?}, author={Felix Stahlberg and Bill Byrne}, publisher={Conference on Empirical Methods in Natural Language Processing}, pages={3354--3360}, year={2019} } @inproceedings{Niehues2017AnalyzingNM, title={Analyzing Neural MT Search and Model Performance}, author={Jan Niehues and Eunah Cho and Thanh-Le Ha and Alex Waibel}, pages={11--17}, publisher={Annual Meeting of the Association for Computational Linguistics}, year={2017} } @inproceedings{StahlbergNeural, title={Neural Machine Translation: A Review}, author={Felix Stahlberg}, publisher={Journal of Artificial Intelligence Research}, year={2020}, volume={69}, pages={343-418} } @inproceedings{Ranzato2016SequenceLT, title={Sequence Level Training with Recurrent Neural Networks}, author={Marc'Aurelio Ranzato and Sumit Chopra and Michael Auli and Wojciech Zaremba}, publisher={International Conference on Learning Representations}, year={2016} } @inproceedings{Bengio2015ScheduledSF, title={Scheduled Sampling for Sequence Prediction with Recurrent Neural Networks}, author={Samy Bengio and Oriol Vinyals and Navdeep Jaitly and Noam Shazeer}, publisher = {Annual Conference on Neural Information Processing Systems}, pages = {1171--1179}, year = {2015} } @inproceedings{Zhang2019BridgingTG, title={Bridging the Gap between Training and Inference for Neural Machine Translation}, author={Wen Zhang and Yang Feng and Fandong Meng and Di You and Qun Liu}, pages = {4334--4343}, publisher = {Annual Meeting of the Association for Computational Linguistics}, year = {2019} } @inproceedings{DBLP:conf/acl/ShenCHHWSL16, author = {Shiqi Shen and Yong Cheng and Zhongjun He and Wei He and Hua Wu and Maosong Sun and Yang Liu}, title = {Minimum Risk Training for Neural Machine Translation}, publisher = {Annual Meeting of the Association for Computational Linguistics}, year = {2016}, } @inproceedings{DBLP:conf/acl/SennrichHB16a, author = {Rico Sennrich and Barry Haddow and Alexandra Birch}, title = {Neural Machine Translation of Rare Words with Subword Units}, publisher = {Annual Meeting of the Association for Computational Linguistics}, year = {2016}, } @inproceedings{DBLP:conf/emnlp/ZensSX12, author = {Richard Zens and Daisy Stanton and Peng Xu}, title = {A Systematic Comparison of Phrase Table Pruning Techniques}, pages = {972--983}, publisher = {Annual Meeting of the Association for Computational Linguistics}, year = {2012} } @inproceedings{DBLP:conf/emnlp/JohnsonMFK07, author = {Howard Johnson and Joel D. Martin and George F. Foster and Roland Kuhn}, title = {Improving Translation Quality by Discarding Most of the Phrasetable}, pages = {967--975}, publisher = {Annual Meeting of the Association for Computational Linguistics}, year = {2007} } @inproceedings{DBLP:conf/emnlp/LingGTB12, author = {Wang Ling and Jo{\~{a}}o Gra{\c{c}}a and Isabel Trancoso and Alan W. Black}, title = {Entropy-based Pruning for Phrase-based Machine Translation}, pages = {962--971}, publisher = {Annual Meeting of the Association for Computational Linguistics}, year = {2012} } @inproceedings{Narang2017BlockSparseRN, title={Block-Sparse Recurrent Neural Networks}, author={Sharan Narang and Eric Undersander and Gregory Diamos}, publisher={CoRR}, year={2017}, volume={abs/1711.02782} } @inproceedings{Gale2019TheSO, title={The State of Sparsity in Deep Neural Networks}, author={Trevor Gale and Erich Elsen and Sara Hooker}, publisher={CoRR}, year={2019}, volume={abs/1902.09574} } @inproceedings{Michel2019AreSH, title={Are Sixteen Heads Really Better than One?}, author = {Paul Michel and Omer Levy and Graham Neubig}, title = {Are Sixteen Heads Really Better than One?}, publisher = {Annual Conference on Neural Information Processing Systems}, pages = {14014--14024}, year = {2019} } @inproceedings{DBLP:journals/corr/abs-1905-09418, author = {Elena Voita and David Talbot and Fedor Moiseev and Rico Sennrich and Ivan Titov}, title = {Analyzing Multi-Head Self-Attention: Specialized Heads Do the Heavy Lifting, the Rest Can Be Pruned}, pages = {5797--5808}, publisher = {Annual Meeting of the Association for Computational Linguistics}, year = {2019}, } @inproceedings{Kitaev2020ReformerTE, author = {Nikita Kitaev and Lukasz Kaiser and Anselm Levskaya}, title = {Reformer: The Efficient Transformer}, publisher = {International Conference on Learning Representations}, year = {2020} } @inproceedings{Katharopoulos2020TransformersAR, title={Transformers are RNNs: Fast Autoregressive Transformers with Linear Attention}, author={Angelos Katharopoulos and Apoorv Vyas and Nikolaos Pappas and Franccois Fleuret}, publisher={CoRR}, year={2020}, volume={abs/2006.16236} } @inproceedings{xiao2011language, title ={Language Modeling for Syntax-Based Machine Translation Using Tree Substitution Grammars: A Case Study on Chinese-English Translation}, author ={Xiao, Tong and Zhu, Jingbo and Zhu, Muhua}, volume ={10}, number ={4}, pages ={1--29}, year ={2011}, publisher ={ACM Transactions on Asian Language Information Processing (TALIP)} } @inproceedings{Li2009VariationalDF, title={Variational Decoding for Statistical Machine Translation}, author={Zhifei Li and Jason Eisner and Sanjeev Khudanpur}, publisher={Annual Meeting of the Association for Computational Linguistics}, pages={593--601}, year={2009} } @inproceedings{Bastings2019ModelingLS, title={Modeling Latent Sentence Structure in Neural Machine Translation}, author={Jasmijn Bastings and Wilker Aziz and Ivan Titov and Khalil Sima'an}, publisher = {CoRR}, volume = {abs/1901.06436}, year = {2019} } @inproceedings{Shah2018GenerativeNM, title={Generative Neural Machine Translation}, author={Harshil Shah and David Barber}, publisher={Annual Conference on Neural Information Processing Systems}, pages={1353--1362}, year={2018} } @inproceedings{Su2018VariationalRN, title={Variational Recurrent Neural Machine Translation}, author={Jinsong Su and Shan Wu and Deyi Xiong and Yaojie Lu and Xianpei Han and Biao Zhang}, publisher={AAAI Conference on Artificial Intelligence}, pages={5488--5495}, year={2018} } @inproceedings{DBLP:journals/corr/GehringAGYD17, author = {Jonas Gehring and Michael Auli and David Grangier and Denis Yarats and Yann N. Dauphin}, title = {Convolutional Sequence to Sequence Learning}, publisher = {International Conference on Machine Learning}, volume = {70}, pages = {1243--1252}, year = {2017} } @inproceedings{Wei2019ImitationLF, title={Imitation Learning for Non-Autoregressive Neural Machine Translation}, author={Bingzhen Wei and Mingxuan Wang and Hao Zhou and Junyang Lin and Xu Sun}, publisher={Annual Meeting of the Association for Computational Linguistics}, pages = {1304--1312}, year={2019} } @inproceedings{Shao2019RetrievingSI, title={Retrieving Sequential Information for Non-Autoregressive Neural Machine Translation}, author={Chenze Shao and Yang Feng and Jinchao Zhang and Fandong Meng and Xilin Chen and Jie Zhou}, publisher={Annual Meeting of the Association for Computational Linguistics}, pages={3013--3024}, year={2019} } @inproceedings{Akoury2019SyntacticallyST, title={Syntactically Supervised Transformers for Faster Neural Machine Translation}, author={Nader Akoury and Kalpesh Krishna and Mohit Iyyer}, pages = {1269--1281}, publisher = {Annual Meeting of the Association for Computational Linguistics}, year = {2019}, } @inproceedings{Guo2020FineTuningBC, title={Fine-Tuning by Curriculum Learning for Non-Autoregressive Neural Machine Translation}, author={Junliang Guo and Xu Tan and Linli Xu and Tao Qin and Enhong Chen and Tie-Yan Liu}, pages = {7839--7846}, publisher = {AAAI Conference on Artificial Intelligence}, year = {2020} } @inproceedings{Ran2020LearningTR, title={Learning to Recover from Multi-Modality Errors for Non-Autoregressive Neural Machine Translation}, author={Qiu Ran and Yankai Lin and Peng Li and Jie Zhou}, publisher={Annual Meeting of the Association for Computational Linguistics}, pages={3059--3069}, year={2020} } @inproceedings{Liu2020FastBERTAS, title={FastBERT: a Self-distilling BERT with Adaptive Inference Time}, author={Weijie Liu and Peng Zhou and Zhiruo Wang and Zhe Zhao and Haotang Deng and Qi Ju}, pages = {6035--6044}, publisher = {Annual Meeting of the Association for Computational Linguistics}, year = {2020} } @inproceedings{Elbayad2020DepthAdaptiveT, title={Depth-Adaptive Transformer}, author={Maha Elbayad and Jiatao Gu and Edouard Grave and Michael Auli}, publisher={International Conference on Learning Representations}, year={2020} } @inproceedings{Lan2020ALBERTAL, title={ALBERT: A Lite BERT for Self-supervised Learning of Language Representations}, author={Zhenzhong Lan and Mingda Chen and Sebastian Goodman and Kevin Gimpel and Piyush Sharma and Radu Soricut}, publisher={International Conference on Learning Representations}, year={2020} } @inproceedings{Han2015LearningBW, title={Learning both Weights and Connections for Efficient Neural Network}, author={Song Han and Jeff Pool and John Tran and William J. Dally}, publisher={Annual Conference on Neural Information Processing Systems}, pages={1135--1143}, year={2015} } @inproceedings{Lee2019SNIPSN, author = {Namhoon Lee and Thalaiyasingam Ajanthan and Philip H. S. Torr}, title = {Snip: single-Shot Network Pruning based on Connection sensitivity}, publisher = {International Conference on Learning Representations}, year = {2019}, } @inproceedings{Frankle2019TheLT, title={The Lottery Ticket Hypothesis: Finding Sparse, Trainable Neural Networks}, author={Jonathan Frankle and Michael Carbin}, publisher={International Conference on Learning Representations}, year={2019} } @inproceedings{Brix2020SuccessfullyAT, author = {Christopher Brix and Parnia Bahar and Hermann Ney}, title = {Successfully Applying the Stabilized Lottery Ticket Hypothesis to the Transformer Architecture}, pages = {3909--3915}, publisher = {Annual Meeting of the Association for Computational Linguistics}, year = {2020}, } @inproceedings{Liu2019RethinkingTV, title={Rethinking the Value of Network Pruning}, author={Zhuang Liu and Mingjie Sun and Tinghui Zhou and Gao Huang and Trevor Darrell}, publisher={ArXiv}, year={2019}, volume={abs/1810.05270} } @inproceedings{Liu2017LearningEC, author = {Zhuang Liu and Jianguo Li and Zhiqiang Shen and Gao Huang and Shoumeng Yan and Changshui Zhang}, title = {Learning Efficient Convolutional Networks through Network Slimming}, pages = {2755--2763}, publisher = {{IEEE} International Conference on Computer Vision}, year = {2017} } @inproceedings{Banner2018ScalableMF, title={Scalable Methods for 8-bit Training of Neural Networks}, author={Ron Banner and Itay Hubara and Elad Hoffer and Daniel Soudry}, publisher={Conference on Neural Information Processing Systems}, pages={5151--5159}, year={2018} } @inproceedings{Hubara2017QuantizedNN, title={Quantized Neural Networks: Training Neural Networks with Low Precision Weights and Activations}, author={Itay Hubara and Matthieu Courbariaux and Daniel Soudry and Ran El-Yaniv and Yoshua Bengio}, publisher={Journal of Machine Learning Reseach}, year={2017}, volume={18}, pages={187:1-187:30} } @inproceedings{DBLP:journals/corr/HintonVD15, author = {Geoffrey E. Hinton and Oriol Vinyals and Jeffrey Dean}, title = {Distilling the Knowledge in a Neural Network}, publisher = {CoRR}, volume = {abs/1503.02531}, year = {2015} } @inproceedings{Munim2019SequencelevelKD, title={Sequence-level Knowledge Distillation for Model Compression of Attention-based Sequence-to-sequence Speech Recognition}, author={Raden Mu'az Mun'im and Nakamasa Inoue and Koichi Shinoda}, publisher={{IEEE} International Conference on Acoustics, Speech and Signal Processing}, year={2019}, pages={6151-6155} } @inproceedings{Tang2019DistillingTK, author = {Raphael Tang and Yao Lu and Linqing Liu and Lili Mou and Olga Vechtomova and Jimmy Lin}, title = {Distilling Task-Specific Knowledge from {BERT} into Simple Neural Networks}, publisher = {CoRR}, volume = {abs/1903.12136}, year = {2019} } @inproceedings{Jiao2020TinyBERTDB, author = {Xiaoqi Jiao and Yichun Yin and Lifeng Shang and Xin Jiang and Xiao Chen and Linlin Li and Fang Wang and Qun Liu}, title = {TinyBERT: Distilling {BERT} for Natural Language Understanding}, pages = {4163--4174}, publisher={Conference on Empirical Methods in Natural Language Processing}, year={2020} } @inproceedings{Ghazvininejad2020AlignedCE, author = {Marjan Ghazvininejad and Vladimir Karpukhin and Luke Zettlemoyer and Omer Levy}, title = {Aligned Cross Entropy for Non-Autoregressive Machine Translation}, publisher = {CoRR}, volume = {abs/2004.01655}, year = {2020}, } @inproceedings{Shao2020MinimizingTB, author = {Chenze Shao and Jinchao Zhang and Yang Feng and Fandong Meng and Jie Zhou}, title = {Minimizing the Bag-of-Ngrams Difference for Non-Autoregressive Neural Machine Translation}, pages = {198--205}, publisher = {AAAI Conference on Artificial Intelligence}, year = {2020}, } @inproceedings{Ma2019FlowSeqNC, title={FlowSeq: Non-Autoregressive Conditional Sequence Generation with Generative Flow}, author={Xuezhe Ma and Chunting Zhou and Xian Li and Graham Neubig and Eduard H. Hovy}, publisher={Conference on Empirical Methods in Natural Language Processing}, pages={4281--4291}, year={2019} } @inproceedings{Guo2019NonAutoregressiveNM, title={Non-Autoregressive Neural Machine Translation with Enhanced Decoder Input}, author={Junliang Guo and Xu Tan and Di He and Tao Qin and Linli Xu and Tie-Yan Liu}, pages={3723--3730}, publisher={AAAI Conference on Artificial Intelligence}, year={2019} } @inproceedings{Ran2019GuidingNN, author = {Qiu Ran and Yankai Lin and Peng Li and Jie Zhou}, title = {Guiding Non-Autoregressive Neural Machine Translation Decoding with Reordering Information}, publisher = {CoRR}, volume = {abs/1911.02215}, year = {2019} } @inproceedings{vaswani2017attention, title={Attention is All You Need}, author={Ashish {Vaswani} and Noam {Shazeer} and Niki {Parmar} and Jakob {Uszkoreit} and Llion {Jones} and Aidan N. {Gomez} and Lukasz {Kaiser} and Illia {Polosukhin}}, publisher={International Conference on Neural Information Processing}, pages={5998--6008}, year={2017} } @inproceedings{Gu2017NonAutoregressiveNM, author = {Jiatao Gu and James Bradbury and Caiming Xiong and Victor O. K. Li and Richard Socher}, title = {Non-Autoregressive Neural Machine Translation}, publisher = {International Conference on Learning Representations}, year = {2018} } @inproceedings{Zhou2020UnderstandingKD, title={Understanding Knowledge Distillation in Non-autoregressive Machine Translation}, author={Chunting Zhou and Graham Neubig and Jiatao Gu}, publisher={ArXiv}, year={2020}, volume={abs/1911.02727} } @inproceedings{Wang2019NonAutoregressiveMT, title={Non-Autoregressive Machine Translation with Auxiliary Regularization}, author={Yiren Wang and Fei Tian and Di He and Tao Qin and ChengXiang Zhai and Tie-Yan Liu}, publisher={AAAI Conference on Artificial Intelligence}, pages={5377--5384}, year={2019} } @inproceedings{Kaiser2018FastDI, title={Fast Decoding in Sequence Models using Discrete Latent Variables}, author={Łukasz Kaiser and Aurko Roy and Ashish Vaswani and Niki Parmar and Samy Bengio and Jakob Uszkoreit and Noam Shazeer}, publisher={International Conference on Machine Learning}, pages={2395--2404}, year={2018} } @inproceedings{Tu2020ENGINEEI, title={ENGINE: Energy-Based Inference Networks for Non-Autoregressive Machine Translation}, author={Lifu Tu and Richard Yuanzhe Pang and Sam Wiseman and Kevin Gimpel}, pages={2819--2826}, publisher={Annual Meeting of the Association for Computational Linguistics}, year={2020} } @inproceedings{Shu2020LatentVariableNN, title={Latent-Variable Non-Autoregressive Neural Machine Translation with Deterministic Inference using a Delta Posterior}, author={Raphael Shu and Jason Lee and Hideki Nakayama and Kyunghyun Cho}, publisher={AAAI Conference on Artificial Intelligence}, pages={8846--8853}, year={2020} } @inproceedings{Li2019HintBasedTF, title={Hint-Based Training for Non-Autoregressive Machine Translation}, author={Zhuohan Li and Zi Lin and Di He and Fei Tian and Tao Qin and Liwei Wang and Tie-Yan Liu}, publisher={Conference on Empirical Methods in Natural Language Processing}, pages={5707--5712}, year={2019} } @inproceedings{Ho2016ModelFreeIL, title={Model-Free Imitation Learning with Policy Optimization}, author={Jonathan Ho and Jayesh K. Gupta and Stefano Ermon}, publisher={International Conference on Machine Learning}, pages={2760--2769}, year={2016} } @inproceedings{Ho2016GenerativeAI, title={Generative Adversarial Imitation Learning}, author={Jonathan Ho and Stefano Ermon}, publisher={Annual Conference on Neural Information Processing Systems}, pages={4565--4573}, year={2016} } @inproceedings{Duan2017OneShotIL, title={One-Shot Imitation Learning}, author={Yan Duan and Marcin Andrychowicz and Bradly C. Stadie and Jonathan Ho and Jonas Schneider and Ilya Sutskever and Pieter Abbeel and Wojciech Zaremba}, publisher={CoRR}, year={2017}, volume={abs/1703.07326} } @inproceedings{Wang2018SemiAutoregressiveNM, title={Semi-Autoregressive Neural Machine Translation}, author={Chunqi Wang and Ji Zhang and Haiqing Chen}, publisher={Conference on Empirical Methods in Natural Language Processing}, pages={479--488}, year={2018} } @inproceedings{Ghazvininejad2019MaskPredictPD, title={Mask-Predict: Parallel Decoding of Conditional Masked Language Models}, author={Marjan Ghazvininejad and Omer Levy and Yinhan Liu and Luke Zettlemoyer}, publisher={Conference on Empirical Methods in Natural Language Processing}, pages={6111--6120}, year={2019} } @inproceedings{Kasai2020NonAutoregressiveMT, title={Non-Autoregressive Machine Translation with Disentangled Context Transformer}, author={Jungo Kasai and J. Cross and Marjan Ghazvininejad and Jiatao Gu}, publisher={arXiv: Computation and Language}, year={2020} } @inproceedings{Zhou2019SynchronousBN, title={Synchronous Bidirectional Neural Machine Translation}, author={Long Zhou and Jiajun Zhang and Chengqing Zong}, publisher={Transactions of the Association for Computational Linguistics}, year={2019}, volume={7}, pages={91-105} } @inproceedings{devlin2019bert, title={Bert: Pre-training of deep bidirectional transformers for language understanding}, author={Devlin Jacob and Chang Ming-Wei and Lee Kenton and Toutanova Kristina}, year={2019}, pages = {4171--4186}, publisher = {Annual Meeting of the Association for Computational Linguistics}, } @inproceedings{Feng2016ImprovingAM, title={Improving Attention Modeling with Implicit Distortion and Fertility for Machine Translation}, author={Shi Feng and Shujie Liu and Nan Yang and Mu Li and Ming Zhou and Kenny Q. Zhu}, publisher={International Conference on Computational Linguistics}, pages={3082--3092}, year={2016} } @inproceedings{TuModeling, author = {Zhaopeng Tu and Zhengdong Lu and Yang Liu and Xiaohua Liu and Hang Li}, title = {Modeling Coverage for Neural Machine Translation}, publisher = {Annual Meeting of the Association for Computational Linguistics}, year = {2016} } @inproceedings{Wu2016GooglesNM, title={Google's Neural Machine Translation System: Bridging the Gap between Human and Machine Translation}, author = {Yonghui Wu and Mike Schuster and Zhifeng Chen and Quoc V. Le and Mohammad Norouzi and Wolfgang Macherey and Maxim Krikun and Yuan Cao and Qin Gao and Klaus Macherey and Jeff Klingner and Apurva Shah and Melvin Johnson and Xiaobing Liu and Lukasz Kaiser and Stephan Gouws and Yoshikiyo Kato and Taku Kudo and Hideto Kazawa and Keith Stevens and George Kurian and Nishant Patil and Wei Wang and Cliff Young and Jason Smith and Jason Riesa and Alex Rudnick and Oriol Vinyals and Greg Corrado and Macduff Hughes and Jeffrey Dean}, publisher = {CoRR}, year={2016}, volume={abs/1609.08144} } @inproceedings{li-etal-2018-simple, author = {Yanyang Li and Tong Xiao and Yinqiao Li and Qiang Wang and Changming Xu and Jingbo Zhu}, title = {A Simple and Effective Approach to Coverage-Aware Neural Machine Translation}, publisher = {Annual Meeting of the Association for Computational Linguistics}, pages = {292--297}, year = {2018} } @inproceedings{Peris2017InteractiveNM, title={Interactive neural machine translation}, author={{\'A}lvaro Peris and Miguel Domingo and F. Casacuberta}, publisher={Computer Speech and Language}, year={2017}, volume={45}, pages={201-220} } @inproceedings{Peris2018ActiveLF, title={Active Learning for Interactive Neural Machine Translation of Data Streams}, author={{\'A}lvaro Peris and Francisco Casacuberta}, publisher={The SIGNLL Conference on Computational Natural Language Learning}, pages={151--160}, year={2018} } @inproceedings{Xiao2016ALA, title={A Loss-Augmented Approach to Training Syntactic Machine Translation Systems}, author={Tong Xiao and Derek F. Wong and Jingbo Zhu}, publisher={IEEE/ACM Transactions on Audio, Speech, and Language Processing}, year={2016}, volume={24}, pages={2069-2083} } @inproceedings{DBLP:conf/acl/JeanCMB15, author = {S{\'{e}}bastien Jean and KyungHyun Cho and Roland Memisevic and Yoshua Bengio}, title = {On Using Very Large Target Vocabulary for Neural Machine Translation}, pages = {1--10}, publisher = {Annual Meeting of the Association for Computational Linguistics}, year = {2015} } @inproceedings{61115, author={Jianhua Lin}, publisher={IEEE Transactions on Information Theory}, title={Divergence measures based on the Shannon entropy}, year={1991}, volume={37}, number={1}, pages={145-151} } @inproceedings{DBLP:conf/aaai/DabreF19, author = {Raj Dabre and Atsushi Fujita}, title = {Recurrent Stacking of Layers for Compact Neural Machine Translation Models}, pages = {6292--6299}, publisher = { AAAI Conference on Artificial Intelligence}, year = {2019} } @inproceedings{DBLP:journals/corr/abs-1805-00631, author = {Biao Zhang and Deyi Xiong and Jinsong Su}, title = {Accelerating Neural Transformer via an Average Attention Network}, publisher = {Annual Meeting of the Association for Computational Linguistics}, pages = {1789--1798}, year = {2018}, } @inproceedings{Wu2019PayLA, author = {Felix Wu and Angela Fan and Alexei Baevski and Yann N. Dauphin and Michael Auli}, title = {Pay Less Attention with Lightweight and Dynamic Convolutions}, publisher = {International Conference on Learning Representations}, year = {2019}, } @inproceedings{Xiao2019SharingAW, author = {Tong Xiao and Yinqiao Li and Jingbo Zhu and Zhengtao Yu and Tongran Liu}, title = {Sharing Attention Weights for Fast Transformer}, publisher = {International Joint Conference on Artificial Intelligence}, pages = {5292--5298}, year = {2019} } @inproceedings{Chen2018TheBO, author = {Mia Xu Chen and Orhan Firat and Ankur Bapna and Melvin Johnson and Wolfgang Macherey and George F. Foster and Llion Jones and Mike Schuster and Noam Shazeer and Niki Parmar and Ashish Vaswani and Jakob Uszkoreit and Lukasz Kaiser and Zhifeng Chen and Yonghui Wu and Macduff Hughes}, title = {The Best of Both Worlds: Combining Recent Advances in Neural Machine Translation}, pages = {76--86}, publisher = {Annual Meeting of the Association for Computational Linguistics}, year = {2018} } @inproceedings{DBLP:journals/corr/abs-1906-00532, author = {Aishwarya Bhandare and Vamsi Sripathi and Deepthi Karkada and Vivek Menon and Sun Choi and Kushal Datta and Vikram Saletore}, title = {Efficient 8-Bit Quantization of Transformer Neural Machine Language Translation Model}, publisher = {CoRR}, volume = {abs/1906.00532}, year = {2019} } @inproceedings{DBLP:conf/cvpr/JacobKCZTHAK18, author = {Benoit Jacob and Skirmantas Kligys and Bo Chen and Menglong Zhu and Matthew Tang and Andrew G. Howard and Hartwig Adam and Dmitry Kalenichenko}, title = {Quantization and Training of Neural Networks for Efficient Integer-Arithmetic-Only Inference}, publisher = {{IEEE} Conference on Computer Vision and Pattern Recognition}, pages = {2704--2713}, year = {2018} } @inproceedings{DBLP:journals/corr/abs-1910-10485, author = {Gabriele Prato and Ella Charlaix and Mehdi Rezagholizadeh}, title = {Fully Quantized Transformer for Improved Translation}, publisher = {CoRR}, volume = {abs/1910.10485}, year = {2019} } @inproceedings{DBLP:conf/nips/HubaraCSEB16, author = {Itay Hubara and Matthieu Courbariaux and Daniel Soudry and Ran El-Yaniv and Yoshua Bengio}, title = {Binarized Neural Networks}, publisher = {Annual Conference on Neural Information Processing Systems}, pages = {4107--4115}, year = {2016} } @inproceedings{DBLP:journals/jcss/FreundS97, author = {Yoav Freund and Robert E. Schapire}, title = {A Decision-Theoretic Generalization of On-Line Learning and an Application to Boosting}, publisher = {Journal of Computer and System Sciences}, volume = {55}, number = {1}, pages = {119--139}, year = {1997} } @inproceedings{DBLP:conf/acl/XiaoZZW10, author = {Tong Xiao and Jingbo Zhu and Muhua Zhu and Huizhen Wang}, title = {Boosting-Based System Combination for Machine Translation}, pages = {739--748}, publisher = {Annual Meeting of the Association for Computational Linguistics}, year = {2010} } @inproceedings{DBLP:conf/icassp/SimBGSW07, author = {Khe Chai Sim and William J. Byrne and Mark J. F. Gales and Hichem Sahbi and Philip C. Woodland}, title = {Consensus Network Decoding for Statistical Machine Translation System Combination}, publisher = {Proceedings of the {IEEE} International Conference on Acoustics, Speech, and Signal Processing}, pages = {105--108}, year = {2007} } @inproceedings{DBLP:conf/acl/RostiMS07, author = {Antti-Veikko I. Rosti and Spyridon Matsoukas and Richard M. Schwartz}, title = {Improved Word-Level System Combination for Machine Translation}, publisher = {Annual Meeting of the Association for Computational Linguistics}, year = {2007} } @inproceedings{DBLP:conf/wmt/RostiZMS08, author = {Antti-Veikko I. Rosti and Bing Zhang and Spyros Matsoukas and Richard M. Schwartz}, title = {Incremental Hypothesis Alignment for Building Confusion Networks with Application to Machine Translation System Combination}, publisher = {Proceedings of the Third Workshop on Statistical Machine Translation}, pages = {183--186}, year = {2008} } @inproceedings{DBLP:conf/emnlp/DuanLXZ09, author = {Nan Duan and Mu Li and Tong Xiao and Ming Zhou}, title = {The Feature Subspace Method for SMT System Combination}, publisher = {Conference on Empirical Methods in Natural Language Processing}, pages = {1096--1104}, year = {2009} } @inproceedings{DBLP:journals/corr/LiMJ16, author = {Jiwei Li and Will Monroe and Dan Jurafsky}, title = {A Simple, Fast Diverse Decoding Algorithm for Neural Generation}, publisher = {CoRR}, volume = {abs/1611.08562}, year = {2016} } @inproceedings{xiao2013bagging, title ={Bagging and boosting statistical machine translation systems}, author ={Tong Xiao and Jingbo Zhu and Tongran Liu }, publisher ={Artificial Intelligence}, volume ={195}, pages ={496--527}, year ={2013} } @inproceedings{DBLP:conf/emnlp/TrombleKOM08, author = {Roy Tromble and Shankar Kumar and Franz Josef Och and Wolfgang Macherey}, title = {Lattice Minimum Bayes-Risk Decoding for Statistical Machine Translation}, publisher = {Conference on Empirical Methods in Natural Language Processing}, pages = {620--629}, year = {2008} } @inproceedings{DBLP:conf/aaai/SuTXJSL17, author = {Jinsong Su and Zhixing Tan and Deyi Xiong and Rongrong Ji and Xiaodong Shi and Yang Liu}, title = {Lattice-Based Recurrent Neural Network Encoders for Neural Machine Translation}, publisher = {AAAI Conference on Artificial Intelligence}, pages = {3302--3308}, year = {2017} } @inproceedings{Shaw2018SelfAttentionWR, author = {Peter Shaw and Jakob Uszkoreit and Ashish Vaswani}, title = {Self-Attention with Relative Position Representations}, publisher = {Proceedings of the Human Language Technology Conference of the North American Chapter of the Association for Computational Linguistics}, pages = {464--468}, year = {2018} } @inproceedings{WangLearning, author = {Qiang Wang and Bei Li and Tong Xiao and Jingbo Zhu and Changliang Li and Derek F. Wong and Lidia S. Chao}, title = {Learning Deep Transformer Models for Machine Translation}, pages = {1810--1822}, publisher = {Annual Meeting of the Association for Computational Linguistics}, year = {2019} } @inproceedings{DBLP:conf/iclr/FanGJ20, author = {Angela Fan and Edouard Grave and Armand Joulin}, title = {Reducing Transformer Depth on Demand with Structured Dropout}, publisher = {International Conference on Learning Representations}, year = {2020} } @inproceedings{DBLP:conf/emnlp/WangXZ20, author = {Qiang Wang and Tong Xiao and Jingbo Zhu}, title = {Training Flexible Depth Model by Multi-Task Learning for Neural Machine Translation}, pages = {4307--4312}, publisher = {Conference on Empirical Methods in Natural Language Processing}, year = {2020} } @inproceedings{DBLP:journals/corr/abs-2002-02925, author = {Canwen Xu and Wangchunshu Zhou and Tao Ge and Furu Wei and Ming Zhou}, title = {BERT-of-Theseus: Compressing {BERT} by Progressive Module Replacing}, publisher = {Conference on Empirical Methods in Natural Language Processing}, year = {2020} } @inproceedings{DBLP:conf/iclr/BaevskiA19, author = {Alexei Baevski and Michael Auli}, title = {Adaptive Input Representations for Neural Language Modeling}, publisher = {arXiv preprint arXiv:1809.10853}, year = {2019} } @inproceedings{DBLP:journals/corr/abs-2006-04768, author = {Sinong Wang and Belinda Z. Li and Madian Khabsa and Han Fang and Hao Ma}, title = {Linformer: Self-Attention with Linear Complexity}, publisher = {CoRR}, volume = {abs/2006.04768}, year = {2020} } @inproceedings{DBLP:journals/corr/abs-1911-12385, author = {Sachin Mehta and Rik Koncel-Kedziorski and Mohammad Rastegari and Hannaneh Hajishirzi}, title = {DeFINE: DEep Factorized INput Word Embeddings for Neural Sequence Modeling}, publisher = {CoRR}, volume = {abs/1911.12385}, year = {2019} } @inproceedings{DBLP:journals/corr/abs-1906-09777, author = {Xindian Ma and Peng Zhang and Shuai Zhang and Nan Duan and Yuexian Hou and Dawei Song and Ming Zhou}, title = {A Tensorized Transformer for Language Modeling}, publisher = {CoRR}, volume = {abs/1906.09777}, year = {2019} } @inproceedings{DBLP:conf/nips/YangLSL19, author = {Zhilin Yang and Thang Luong and Ruslan Salakhutdinov and Quoc V. Le}, title = {Mixtape: Breaking the Softmax Bottleneck Efficiently}, publisher = {Conference on Neural Information Processing Systems}, pages = {15922--15930}, year = {2019} } @inproceedings{DBLP:journals/corr/abs-2006-10369, author = {Jungo Kasai and Nikolaos Pappas and Hao Peng and James Cross and Noah A. Smith}, title = {Deep Encoder, Shallow Decoder: Reevaluating the Speed-Quality Tradeoff in Machine Translation}, publisher = {CoRR}, volume = {abs/2006.10369}, year = {2020} } @inproceedings{DBLP:conf/aclnmt/HuLLLLWXZ20, author = {Chi Hu and Bei Li and Yinqiao Li and Ye Lin and Yanyang Li and Chenglong Wang and Tong Xiao and Jingbo Zhu}, title = {The NiuTrans System for WNGT 2020 Efficiency Task}, pages = {204--210}, publisher = {Annual Meeting of the Association for Computational Linguistics}, year = {2020} } @inproceedings{DBLP:journals/corr/abs-2010-02416, author = {Yi-Te Hsu and Sarthak Garg and Yi-Hsiu Liao and Ilya Chatsviorkin}, title = {Efficient Inference For Neural Machine Translation}, publisher = {CoRR}, volume = {abs/2010.02416}, year = {2020} } @inproceedings{Vaswani2018Tensor2TensorFN, author = {Ashish Vaswani and Samy Bengio and Eugene Brevdo and Fran{\c{c}}ois Chollet and Aidan N. Gomez and Stephan Gouws and Llion Jones and Lukasz Kaiser and Nal Kalchbrenner and Niki Parmar and Ryan Sepassi and Noam Shazeer and Jakob Uszkoreit}, title = {Tensor2Tensor for Neural Machine Translation}, pages = {193--199}, publisher = {Association for Machine Translation in the Americas}, year = {2018} } @inproceedings{Sun2019BaiduNM, title={Baidu Neural Machine Translation Systems for WMT19}, author = {Meng Sun and Bojian Jiang and Hao Xiong and Zhongjun He and Hua Wu and Haifeng Wang}, publisher={Annual Meeting of the Association for Computational Linguistics}, pages = {374--381}, year={2019} } @inproceedings{Wang2018TencentNM, title={Tencent Neural Machine Translation Systems for WMT18}, author={Mingxuan Wang and Li Gong and Wenhuan Zhu and Jun Xie and Chao Bian}, publisher={Annual Meeting of the Association for Computational Linguistics}, pages={522--527}, year={2018} } @inproceedings{Bi2019MultiagentLF, title={Multi-agent Learning for Neural Machine Translation}, author={Tianchi Bi and Hao Xiong and Zhongjun He and Hua Wu and Haifeng Wang}, publisher={arXiv preprint arXiv:1909.01101}, year={2019} } @inproceedings{DBLP:conf/aclnmt/KoehnK17, author = {Philipp Koehn and Rebecca Knowles}, title = {Six Challenges for Neural Machine Translation}, pages = {28--39}, publisher = {Annual Meeting of the Association for Computational Linguistics}, year = {2017} } @inproceedings{Held2013AppliedSI, title={Applied statistical inference}, author={Leonhard Held and Saban{\'e}s Bov{\'e}, D}, volume={10}, number={978-3}, pages={16}, year={2014}, publisher={Springer} } @inproceedings{Zhang2016VariationalNM, title={Variational Neural Machine Translation}, author = {Biao Zhang and Deyi Xiong and Jinsong Su and Hong Duan and Min Zhang}, pages = {521--530}, publisher = {Annual Meeting of the Association for Computational Linguistics}, year = {2016} } @inproceedings{Silvey2018StatisticalI, title={Statistical Inference}, author={S. D. Silvey}, publisher={Encyclopedia of Social Network Analysis and Mining}, year={2018} } @inproceedings{Cheong2019transformersZ, title={transformers.zip : Compressing Transformers with Pruning and Quantization}, author={Robin Cheong and Robel Daniel}, publisher={Stanford University}, year={2019} } @inproceedings{Beal2003VariationalAF, title={Variational algorithms for approximate Bayesian inference}, author={Matthew J. Beal}, publisher={University College London}, year={2003} } @inproceedings{Gage1994ANA, title={A new algorithm for data compression}, author={P. Gage}, publisher={The C Users Journal archive}, year={1994}, volume={12}, pages={23-38} } @inproceedings{Eisner2011LearningST, title={Learning Speed-Accuracy Tradeoffs in Nondeterministic Inference Algorithms}, author={J. Eisner and Hal Daum{\'e}}, publisher={Annual Conference on Neural Information Processing Systems}, year={2011} } @inproceedings{Kazimi2017CoverageFC, title={Coverage for Character Based Neural Machine Translation}, author={M. Kazimi and Marta R. Costa-juss{\`a}}, publisher={arXiv preprint arXiv:1810.02340}, year={2017}, volume={59}, pages={99-106} } @inproceedings{zhang-EtAl:2020:WMT, author = {Zhang, Yuhao and Wang, Ziyang and Cao, Runzhe and Wei, Binghao and Shan, Weiqiao and Zhou, Shuhan and Reheman, Abudurexiti and Zhou, Tao and Zeng, Xin and Wang, Laohu and Mu, Yongyu and Zhang, Jingnan and Liu, Xiaoqian and Zhou, Xuanjun and Li, Yinqiao and Li, Bei and Xiao, Tong and Zhu, Jingbo}, title = {The NiuTrans Machine Translation Systems for WMT20}, month = {November}, year = {2020}, publisher = {Annual Meeting of the Association for Computational Linguistics}, pages = {336--343} } @inproceedings{DBLP:conf/nips/MehriS18, author = {Shikib Mehri and Leonid Sigal}, title = {Middle-Out Decoding}, publisher = {Conference on Neural Information Processing Systems}, pages = {5523--5534}, year = {2018} } %%%%% chapter 14------------------------------------------------------ %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% %%%%% chapter 15------------------------------------------------------ @inproceedings{DBLP:conf/cvpr/YuYR18, author = {Xin Yu and Zhiding Yu and Srikumar Ramalingam}, title = {Learning Strict Identity Mappings in Deep Residual Networks}, pages = {4432--4440}, publisher = {{IEEE} Conference on Computer Vision and Pattern Recognition}, year = {2018} } @inproceedings{DBLP:conf/emnlp/ZhangTS19, author = {Biao Zhang and Ivan Titov and Rico Sennrich}, title = {Improving Deep Transformer with Depth-Scaled Initialization and Merged Attention}, pages = {898--909}, publisher = {Annual Meeting of the Association for Computational Linguistics}, year = {2019} } @inproceedings{DBLP:conf/eccv/HeZRS16, author = {Kaiming He and Xiangyu Zhang and Shaoqing Ren and Jian Sun}, title = {Identity Mappings in Deep Residual Networks}, volume = {9908}, pages = {630--645}, publisher = {European Conference on Computer Vision}, year = {2016} } @inproceedings{Ottfairseq, author = {Myle Ott and Sergey Edunov and Alexei Baevski and Angela Fan and Sam Gross and Nathan Ng and David Grangier and Michael Auli}, title = {fairseq: {A} Fast, Extensible Toolkit for Sequence Modeling}, pages = {48--53}, publisher = {Annual Meeting of the Association for Computational Linguistics}, year = {2019} } @inproceedings{KleinOpenNMT, author = {Guillaume Klein and Yoon Kim and Yuntian Deng and Jean Senellart and Alexander M. Rush}, title = {OpenNMT: Open-Source Toolkit for Neural Machine Translation}, pages = {67--72}, publisher = {Annual Meeting of the Association for Computational Linguistics}, year = {2017} } @inproceedings{DBLP:conf/acl/WuWXTGQLL19, author = {Lijun Wu and Yiren Wang and Yingce Xia and Fei Tian and Fei Gao and Tao Qin and Jianhuang Lai and Tie-Yan Liu}, title = {Depth Growing for Neural Machine Translation}, pages = {5558--5563}, publisher = {Annual Meeting of the Association for Computational Linguistics}, year = {2019} } @inproceedings{DBLP:conf/cvpr/HuangLMW17, author = {Gao Huang and Zhuang Liu and Laurens van der Maaten and Kilian Q. Weinberger}, title = {Densely Connected Convolutional Networks}, pages = {2261--2269}, publisher = {{IEEE} Conference on Computer Vision and Pattern Recognition}, year = {2017} } @inproceedings{DBLP:journals/corr/GreffSS16, author = {Klaus Greff and Rupesh Kumar Srivastava and J{\"{u}}rgen Schmidhuber}, title = {Highway and Residual Networks learn Unrolled Iterative Estimation}, publisher = {International Conference on Learning Representations}, year = {2017} } @inproceedings{Bapna2018TrainingDN, author = {Ankur Bapna and Mia Xu Chen and Orhan Firat and Yuan Cao and Yonghui Wu}, title = {Training Deeper Neural Machine Translation Models with Transparent Attention}, pages = {3028--3033}, publisher = {Annual Meeting of the Association for Computational Linguistics}, year = {2018} } @inproceedings{WangLearning, author = {Qiang Wang and Bei Li and Tong Xiao and Jingbo Zhu and Changliang Li and Derek F. Wong and Lidia S. Chao}, title = {Learning Deep Transformer Models for Machine Translation}, pages = {1810--1822}, publisher = {Annual Meeting of the Association for Computational Linguistics}, year = {2019} } @inproceedings{DBLP:journals/corr/abs-2002-04745, author = {Ruibin Xiong and Yunchang Yang and Di He and Kai Zheng and Shuxin Zheng and Chen Xing and Huishuai Zhang and Yanyan Lan and Liwei Wang and Tie-Yan Liu}, title = {On Layer Normalization in the Transformer Architecture}, publisher = {International Conference on Machine Learning}, volume = {abs/2002.04745}, year = {2020} } @inproceedings{DBLP:conf/emnlp/LiuLGCH20, author = {Liyuan Liu and Xiaodong Liu and Jianfeng Gao and Weizhu Chen and Jiawei Han}, title = {Understanding the Difficulty of Training Transformers}, pages = {5747--5763}, publisher = {Annual Meeting of the Association for Computational Linguistics}, year = {2020} } @inproceedings{DBLP:journals/corr/HeZRS15, author = {Kaiming He and Xiangyu Zhang and Shaoqing Ren and Jian Sun}, title = {Deep Residual Learning for Image Recognition}, publisher = {IEEE Conference on Computer Vision and Pattern Recognition}, pages = {770--778}, year = {2016}, } @inproceedings{Ba2016LayerN, author = {Lei Jimmy Ba and Jamie Ryan Kiros and Geoffrey E. Hinton}, title = {Layer Normalization}, publisher = {CoRR}, volume = {abs/1607.06450}, year = {2016} } @inproceedings{Vaswani2018Tensor2TensorFN, author = {Ashish Vaswani and Samy Bengio and Eugene Brevdo and Fran{\c{c}}ois Chollet and Aidan N. Gomez and Stephan Gouws and Llion Jones and Lukasz Kaiser and Nal Kalchbrenner and Niki Parmar and Ryan Sepassi and Noam Shazeer and Jakob Uszkoreit}, title = {Tensor2Tensor for Neural Machine Translation}, pages = {193--199}, publisher = {Association for Machine Translation in the Americas}, year = {2018} } @inproceedings{Dou2019DynamicLA, author = {Zi-Yi Dou and Zhaopeng Tu and Xing Wang and Longyue Wang and Shuming Shi and Tong Zhang}, title = {Dynamic Layer Aggregation for Neural Machine Translation with Routing-by-Agreement}, pages = {86--93}, publisher = {AAAI Conference on Artificial Intelligence}, year = {2019} } @inproceedings{Wang2018MultilayerRF, title={Multi-layer Representation Fusion for Neural Machine Translation}, author={Qiang Wang and Fuxue Li and Tong Xiao and Yanyang Li and Yinqiao Li and Jingbo Zhu}, publisher={International Conference on Computational Linguistics}, year={2018}, volume={abs/2002.06714} } @inproceedings{Dou2018ExploitingDR, author = {Zi-Yi Dou and Zhaopeng Tu and Xing Wang and Shuming Shi and Tong Zhang}, title = {Exploiting Deep Representations for Neural Machine Translation}, pages = {4253--4262}, publisher = {Annual Meeting of the Association for Computational Linguistics}, year = {2018} } @inproceedings{DBLP:journals/corr/LinFSYXZB17, author = {Zhouhan Lin and Minwei Feng and C{\'{\i}}cero Nogueira dos Santos and Mo Yu and Bing Xiang and Bowen Zhou and Yoshua Bengio}, title = {A Structured Self-Attentive Sentence Embedding}, publisher = {International Conference on Learning Representations}, year = {2017}, } @inproceedings{DBLP:conf/nips/SrivastavaGS15, author = {Rupesh Kumar Srivastava and Klaus Greff and J{\"{u}}rgen Schmidhuber}, title = {Training Very Deep Networks}, publisher = {Conference on Neural Information Processing Systems}, pages = {2377--2385}, year = {2015} } @inproceedings{DBLP:conf/icml/BalduzziFLLMM17, author = {David Balduzzi and Marcus Frean and Lennox Leary and J. P. Lewis and Kurt Wan-Duo Ma and Brian McWilliams}, title = {The Shattered Gradients Problem: If resnets are the answer, then what is the question?}, publisher = {International Conference on Machine Learning}, volume = {70}, pages = {342--350}, year = {2017} } @inproceedings{DBLP:conf/icml/Allen-ZhuLS19, author = {Zeyuan Allen-Zhu and Yuanzhi Li and Zhao Song}, title = {A Convergence Theory for Deep Learning via Over-Parameterization}, publisher = {International Conference on Machine Learning}, volume = {97}, pages = {242--252}, year = {2019} } @inproceedings{DBLP:conf/icml/DuLL0Z19, author = {Simon S. Du and Jason D. Lee and Haochuan Li and Liwei Wang and Xiyu Zhai}, title = {Gradient Descent Finds Global Minima of Deep Neural Networks}, publisher = {International Conference on Machine Learning}, volume = {97}, pages = {1675--1685}, year = {2019} } @inproceedings{pmlr-v9-glorot10a, author = {Xavier Glorot and Yoshua Bengio}, title = {Understanding the difficulty of training deep feedforward neural networks}, publisher = {International Conference on Artificial Intelligence and Statistics}, volume = {9}, pages = {249--256}, year = {2010} } @inproceedings{DBLP:conf/iccv/HeZRS15, author = {Kaiming He and Xiangyu Zhang and Shaoqing Ren and Jian Sun}, title = {Delving Deep into Rectifiers: Surpassing Human-Level Performance on ImageNet Classification}, pages = {1026--1034}, publisher = {IEEE International Conference on Computer Vision}, year = {2015} } @inproceedings{huang2020improving, title={Improving Transformer Optimization Through Better Initialization}, author={Xiao Shi {Huang} and Juan {Perez} and Jimmy {Ba} and Maksims {Volkovs}}, publisher = {International Conference on Machine Learning}, year={2020} } @inproceedings{DBLP:conf/iclr/ZophL17, author = {Barret Zoph and Quoc V. Le}, title = {Neural Architecture Search with Reinforcement Learning}, publisher = {International Conference on Learning Representations}, year = {2017} } @inproceedings{DBLP:conf/cvpr/ZophVSL18, author = {Barret Zoph and Vijay Vasudevan and Jonathon Shlens and Quoc V. Le}, title = {Learning Transferable Architectures for Scalable Image Recognition}, pages = {8697--8710}, publisher = {IEEE Conference on Computer Vision and Pattern Recognition}, year = {2018} } @inproceedings{Real2019AgingEF, title={Aging Evolution for Image Classifier Architecture Search}, author={Esteban Real and Alok Aggarwal and Yanping Huang and Quoc V. Le }, publisher={AAAI Conference on Artificial Intelligence}, year={2019} } @inproceedings{DBLP:conf/icml/SoLL19, author = {David R. So and Quoc V. Le and Chen Liang}, title = {The Evolved Transformer}, volume = {97}, pages = {5877--5886}, publisher = {International Conference on Machine Learning}, year = {2019} } @inproceedings{DBLP:conf/icga/MillerTH89, author = {Geoffrey F. Miller and Peter M. Todd and Shailesh U. Hegde}, title = {Designing Neural Networks using Genetic Algorithms}, pages = {379--384}, publisher = {International Conference on Genetic Algorithms}, year = {1989} } @inproceedings{mandischer1993representation, title={Representation and evolution of neural networks}, author={Mandischer, Martin}, publisher={Artificial Neural Nets and Genetic Algorithms}, pages={643--649}, year={1993} } @inproceedings{koza1991genetic, title={Genetic generation of both the weights and architecture for a neural network}, author={Koza, John R and Rice, James P}, publisher={international joint conference on neural networks}, volume={2}, pages={397--404}, year={1991} } @inproceedings{DBLP:conf/ijcnn/Dodd90, author = {Dodd Nigel}, title = {Optimisation of network structure using genetic techniques}, publisher = {International Joint Conference on Neural Networks, San Diego, CA, USA, June 17-21, 1990}, pages = {965--970}, year = {1990} } @inproceedings{DBLP:conf/nips/HarpSG89, author = {Steven A. Harp and Tariq Samad and Aloke Guha}, title = {Designing Application-Specific Neural Networks Using the Genetic Algorithm}, publisher = {Advances in Neural Information Processing Systems}, pages = {447--454}, year = {1989} } @inproceedings{DBLP:journals/compsys/Kitano90, author = {Hiroaki Kitano}, title = {Designing Neural Networks Using Genetic Algorithms with Graph Generation System}, publisher = {Complex Systems}, volume = {4}, number = {4}, year = {1990} } @inproceedings{DBLP:conf/icec/SantosD94, author = {Jos{\'{e}} Santos Reyes and Richard J. Duro}, title = {Evolutionary Generation and Training of Recurrent Artificial Neural Networks}, pages = {759--763}, publisher = {IEEE Conference on Evolutionary Computation}, year = {1994} } @inproceedings{DBLP:conf/nips/LuoTQCL18, author = {Renqian Luo and Fei Tian and Tao Qin and Enhong Chen and Tie-Yan Liu}, title = {Neural Architecture Optimization}, publisher = {Advances in Neural Information Processing Systems}, pages = {7827--7838}, year = {2018} } @inproceedings{DBLP:conf/icml/PhamGZLD18, author = {Hieu Pham and Melody Y. Guan and Barret Zoph and Quoc V. Le and Jeff Dean}, title = {Efficient Neural Architecture Search via Parameter Sharing}, volume = {80}, pages = {4092--4101}, publisher = {International Conference on Machine Learning}, year = {2018} } @inproceedings{DBLP:conf/iclr/LiuSY19, author = {Hanxiao Liu and Karen Simonyan and Yiming Yang}, title = {{DARTS:} Differentiable Architecture Search}, publisher = {International Conference on Learning Representations}, year = {2019} } @inproceedings{DBLP:conf/acl/LiHZXJXZLL20, author = {Yinqiao Li and Chi Hu and Yuhao Zhang and Nuo Xu and Yufan Jiang and Tong Xiao and Jingbo Zhu and Tongran Liu and Changliang Li}, title = {Learning Architectures from an Extended Search Space for Language Modeling}, pages = {6629--6639}, publisher = {Annual Meeting of the Association for Computational Linguistics}, year = {2020} } @inproceedings{DBLP:conf/emnlp/JiangHXZZ19, author = {Yufan Jiang and Chi Hu and Tong Xiao and Chunliang Zhang and Jingbo Zhu}, title = {Improved Differentiable Architecture Search for Language Modeling and Named Entity Recognition}, pages = {3583--3588}, publisher = {Annual Meeting of the Association for Computational Linguistics}, year = {2019} } @inproceedings{DBLP:conf/aaai/RealAHL19, author = {Esteban Real and Alok Aggarwal and Yanping Huang and Quoc V. Le}, title = {Regularized Evolution for Image Classifier Architecture Search}, pages = {4780--4789}, publisher = {AAAI Conference on Artificial Intelligence}, year = {2019} } @inproceedings{DBLP:conf/icml/RealMSSSTLK17, author = {Esteban Real and Sherry Moore and Andrew Selle and Saurabh Saxena and Yutaka Leon Suematsu and Jie Tan and Quoc V. Le and Alexey Kurakin}, title = {Large-Scale Evolution of Image Classifiers}, volume = {70}, pages = {2902--2911}, publisher = {International Conference on Machine Learning}, year = {2017} } @inproceedings{DBLP:conf/iclr/ElskenMH19, author = {Thomas Elsken and Jan Hendrik Metzen and Frank Hutter}, title = {Efficient Multi-Objective Neural Architecture Search via Lamarckian Evolution}, publisher = {International Conference on Learning Representations}, year = {2019} } @inproceedings{DBLP:conf/iclr/BakerGNR17, author = {Bowen Baker and Otkrist Gupta and Nikhil Naik and Ramesh Raskar}, title = {Designing Neural Network Architectures using Reinforcement Learning}, publisher = {International Conference on Learning Representations}, year = {2017} } @inproceedings{DBLP:conf/cvpr/TanCPVSHL19, author = {Mingxing Tan and Bo Chen and Ruoming Pang and Vijay Vasudevan and Mark Sandler and Andrew Howard and Quoc V. Le}, title = {MnasNet: Platform-Aware Neural Architecture Search for Mobile}, pages = {2820--2828}, publisher = {IEEE Conference on Computer Vision and Pattern Recognition}, year = {2019} } @inproceedings{DBLP:conf/iclr/LiuSVFK18, author = {Hanxiao Liu and Karen Simonyan and Oriol Vinyals and Chrisantha Fernando and Koray Kavukcuoglu}, title = {Hierarchical Representations for Efficient Architecture Search}, publisher = {International Conference on Learning Representations}, year = {2018} } @inproceedings{DBLP:conf/iclr/CaiZH19, author = {Han Cai and Ligeng Zhu and Song Han}, title = {ProxylessNAS: Direct Neural Architecture Search on Target Task and Hardware}, publisher = {International Conference on Learning Representations}, year = {2019} } @inproceedings{DBLP:conf/cvpr/LiuCSAHY019, author = {Chenxi Liu and Liang-Chieh Chen and Florian Schroff and Hartwig Adam and Wei Hua and Alan L. Yuille and Fei-Fei Li}, title = {Auto-DeepLab: Hierarchical Neural Architecture Search for Semantic Image Segmentation}, pages = {82--92}, publisher = {IEEE Conference on Computer Vision and Pattern Recognition}, year = {2019} } @inproceedings{DBLP:conf/cvpr/WuDZWSWTVJK19, author = {Bichen Wu and Xiaoliang Dai and Peizhao Zhang and Yanghan Wang and Fei Sun and Yiming Wu and Yuandong Tian and Peter Vajda and Yangqing Jia and Kurt Keutzer}, title = {FBNet: Hardware-Aware Efficient ConvNet Design via Differentiable Neural Architecture Search}, pages = {10734--10742}, publisher = {IEEE Conference on Computer Vision and Pattern Recognition}, year = {2019} } @inproceedings{DBLP:conf/iclr/XieZLL19, author = {Sirui Xie and Hehui Zheng and Chunxiao Liu and Liang Lin}, title = {{SNAS:} stochastic neural architecture search}, publisher = {International Conference on Learning Representations}, year = {2019} } @inproceedings{DBLP:conf/uai/LiT19, author = {Liam Li and Ameet Talwalkar}, title = {Random Search and Reproducibility for Neural Architecture Search}, pages = {129}, publisher = {Conference on Uncertainty in Artificial Intelligence}, year = {2019} } @inproceedings{DBLP:conf/cvpr/DongY19, author = {Xuanyi Dong and Yi Yang}, title = {Searching for a Robust Neural Architecture in Four {GPU} Hours}, pages = {1761--1770}, publisher = {IEEE Conference on Computer Vision and Pattern Recognition}, year = {2019} } @inproceedings{DBLP:conf/iclr/XuX0CQ0X20, author = {Yuhui Xu and Lingxi Xie and Xiaopeng Zhang and Xin Chen and Guo-Jun Qi and Qi Tian and Hongkai Xiong}, title = {{PC-DARTS:} Partial Channel Connections for Memory-Efficient Architecture Search}, publisher = {International Conference on Learning Representations}, year = {2020} } @inproceedings{DBLP:conf/iclr/ZelaESMBH20, author = {Arber Zela and Thomas Elsken and Tonmoy Saikia and Yassine Marrakchi and Thomas Brox and Frank Hutter}, title = {Understanding and Robustifying Differentiable Architecture Search}, publisher = {International Conference on Learning Representations}, year = {2020} } @inproceedings{DBLP:conf/iclr/MeiLLJYYY20, author = {Jieru Mei and Yingwei Li and Xiaochen Lian and Xiaojie Jin and Linjie Yang and Alan L. Yuille and Jianchao Yang}, title = {AtomNAS: Fine-Grained End-to-End Neural Architecture Search}, publisher = {International Conference on Learning Representations}, year = {2020} } @inproceedings{DBLP:journals/jmlr/LiJDRT17, author = {Lisha Li and Kevin G. Jamieson and Giulia DeSalvo and Afshin Rostamizadeh and Ameet Talwalkar}, title = {Hyperband: {A} Novel Bandit-Based Approach to Hyperparameter Optimization}, publisher = {Journal of Machine Learning Research}, volume = {18}, pages = {185:1--185:52}, year = {2017} } @inproceedings{DBLP:conf/eccv/LiuZNSHLFYHM18, author = {Chenxi Liu and Barret Zoph and Maxim Neumann and Jonathon Shlens and Wei Hua and Li-Jia Li and Li Fei-Fei and Alan L. Yuille and Jonathan Huang and Kevin Murphy}, title = {Progressive Neural Architecture Search}, volume = {11205}, pages = {19--35}, publisher = {European Conference on Computer Vision}, year = {2018} } @inproceedings{DBLP:journals/taslp/FanTXQLL20, author = {Yang Fan and Fei Tian and Yingce Xia and Tao Qin and Xiang-Yang Li and Tie-Yan Liu}, title = {Searching Better Architectures for Neural Machine Translation}, publisher = {IEEE Transactions on Audio, Speech, and Language Processing}, volume = {28}, pages = {1574--1585}, year = {2020} } @inproceedings{DBLP:conf/ijcai/ChenLQWLDDHLZ20, author = {Daoyuan Chen and Yaliang Li and Minghui Qiu and Zhen Wang and Bofang Li and Bolin Ding and Hongbo Deng and Jun Huang and Wei Lin and Jingren Zhou}, title = {AdaBERT: Task-Adaptive {BERT} Compression with Differentiable Neural Architecture Search}, publisher = {International Joint Conference on Artificial Intelligence}, pages = {2463--2469}, year = {2020} } @inproceedings{DBLP:conf/acl/WangWLCZGH20, author = {Hanrui Wang and Zhanghao Wu and Zhijian Liu and Han Cai and Ligeng Zhu and Chuang Gan and Song Han}, title = {{HAT:} Hardware-Aware Transformers for Efficient Natural Language Processing}, pages = {7675--7688}, publisher = {Annual Meeting of the Association for Computational Linguistics}, year = {2020} } @inproceedings{DBLP:conf/icml/CaiYZHY18, author = {Han Cai and Jiacheng Yang and Weinan Zhang and Song Han and Yong Yu}, title = {Path-Level Network Transformation for Efficient Architecture Search}, volume = {80}, pages = {677--686}, publisher = {International Conference on Machine Learning}, year = {2018} } @inproceedings{DBLP:journals/corr/abs-2003-03384, author = {Esteban Real and Chen Liang and David R. So and Quoc V. Le}, title = {AutoML-Zero: Evolving Machine Learning Algorithms From Scratch}, publisher = {CoRR}, volume = {abs/2003.03384}, year = {2020} } @inproceedings{Chollet2017XceptionDL, title={Xception: Deep Learning with Depthwise Separable Convolutions}, author = {Fran{\c{c}}ois Chollet}, publisher={IEEE Conference on Computer Vision and Pattern Recognition}, year={2017}, pages={1800-1807} } @inproceedings{DBLP:journals/tnn/AngelineSP94, author = {Peter J. Angeline and Gregory M. Saunders and Jordan B. Pollack}, title = {An evolutionary algorithm that constructs recurrent neural networks}, publisher = {IEEE Transactions on Neural Networks}, volume = {5}, number = {1}, pages = {54--65}, year = {1994} } @inproceedings{stanley2002evolving, title={Evolving neural networks through augmenting topologies}, author={Stanley, Kenneth O and Miikkulainen, Risto}, publisher={Evolutionary computation}, volume={10}, number={2}, pages={99--127}, year={2002} } @inproceedings{DBLP:journals/alife/StanleyDG09, author = {Kenneth O. Stanley and David B. D'Ambrosio and Jason Gauci}, title = {A Hypercube-Based Encoding for Evolving Large-Scale Neural Networks}, publisher = {Artificial Life}, volume = {15}, number = {2}, pages = {185--212}, year = {2009} } @inproceedings{DBLP:conf/ijcai/SuganumaSN18, author = {Masanori Suganuma and Shinichi Shirakawa and Tomoharu Nagao}, title = {A Genetic Programming Approach to Designing Convolutional Neural Network Architectures}, pages = {5369--5373}, publisher = {International Joint Conference on Artificial Intelligence}, year = {2018} } @inproceedings{DBLP:conf/iccv/XieY17, author = {Lingxi Xie and Alan L. Yuille}, title = {Genetic {CNN}}, pages = {1388--1397}, publisher = {IEEE International Conference on Computer Vision}, year = {2017} } @inproceedings{DBLP:conf/cvpr/ZhongYWSL18, author = {Zhao Zhong and Junjie Yan and Wei Wu and Jing Shao and Cheng-Lin Liu}, title = {Practical Block-Wise Neural Network Architecture Generation}, pages = {2423--2432}, publisher = {IEEE Conference on Computer Vision and Pattern Recognition}, year = {2018} } @inproceedings{DBLP:conf/icml/BergstraYC13, author = {James Bergstra and Daniel Yamins and David D. Cox}, title = {Making a Science of Model Search: Hyperparameter Optimization in Hundreds of Dimensions for Vision Architectures}, volume = {28}, pages = {115--123}, publisher = {International Conference on Machine Learning}, year = {2013} } @inproceedings{DBLP:conf/ijcai/DomhanSH15, author = {Tobias Domhan and Jost Tobias Springenberg and Frank Hutter}, title = {Speeding Up Automatic Hyperparameter Optimization of Deep Neural Networks by Extrapolation of Learning Curves}, pages = {3460--3468}, publisher = {International Joint Conference on Artificial Intelligence}, year = {2015} } @inproceedings{DBLP:conf/icml/MendozaKFSH16, author = {Hector Mendoza and Aaron Klein and Matthias Feurer and Jost Tobias Springenberg and Frank Hutter}, title = {Towards Automatically-Tuned Neural Networks}, volume = {64}, pages = {58--65}, publisher = {International Conference on Machine Learning}, year = {2016} } @inproceedings{DBLP:journals/corr/abs-1807-06906, author = {Arber Zela and Aaron Klein and Stefan Falkner and Frank Hutter}, title = {Towards Automated Deep Learning: Efficient Joint Neural Architecture and Hyperparameter Search}, publisher = {International Conference on Machine Learning}, year = {2018} } @inproceedings{li2020automated, title={Automated and Lightweight Network Design via Random Search for Remote Sensing Image Scene Classification}, author={Li, Jihao and Diao, Wenhui and Sun, Xian and Feng, Yingchao and Zhang, Wenkai and Chang, Zhonghan and Fu, Kun}, publisher={The International Archives of Photogrammetry, Remote Sensing and Spatial Information Sciences}, volume={43}, pages={1217--1224}, year={2020} } @inproceedings{DBLP:conf/cvpr/BenderLCCCKL20, author = {Gabriel Bender and Hanxiao Liu and Bo Chen and Grace Chu and Shuyang Cheng and Pieter-Jan Kindermans and Quoc V. Le}, title = {Can Weight Sharing Outperform Random Architecture Search? An Investigation With TuNAS}, pages = {14311--14320}, publisher = {IEEE Conference on Computer Vision and Pattern Recognition}, year = {2020} } @inproceedings{DBLP:conf/aistats/KleinFBHH17, author = {Aaron Klein and Stefan Falkner and Simon Bartels and Philipp Hennig and Frank Hutter}, title = {Fast Bayesian Optimization of Machine Learning Hyperparameters on Large Datasets}, volume = {54}, pages = {528--536}, publisher = {International Conference on Artificial Intelligence and Statistics}, year = {2017} } @inproceedings{DBLP:journals/corr/ChrabaszczLH17, author = {Patryk Chrabaszcz and Ilya Loshchilov and Frank Hutter}, title = {A Downsampled Variant of ImageNet as an Alternative to the {CIFAR} datasets}, publisher = {CoRR}, volume = {abs/1707.08819}, year = {2017} } @inproceedings{DBLP:conf/aaai/CaiCZYW18, author = {Han Cai and Tianyao Chen and Weinan Zhang and Yong Yu and Jun Wang}, title = {Efficient Architecture Search by Network Transformation}, pages = {2787--2794}, publisher = {AAAI Conference on Artificial Intelligence}, year = {2018} } @inproceedings{DBLP:conf/iclr/ElskenMH18, author = {Thomas Elsken and Jan Hendrik Metzen and Frank Hutter}, title = {Simple and efficient architecture search for Convolutional Neural Networks}, publisher = {International Conference on Learning Representations}, year = {2018} } @inproceedings{DBLP:conf/icml/BenderKZVL18, author = {Gabriel Bender and Pieter-Jan Kindermans and Barret Zoph and Vijay Vasudevan and Quoc V. Le}, title = {Understanding and Simplifying One-Shot Architecture Search}, volume = {80}, pages = {549--558}, publisher = {International Conference on Machine Learning}, year = {2018} } @inproceedings{DBLP:conf/nips/SaxenaV16, author = {Shreyas Saxena and Jakob Verbeek}, title = {Convolutional Neural Fabrics}, publisher = {Advances in Neural Information Processing Systems}, pages = {4053--4061}, year = {2016} } @inproceedings{DBLP:conf/iclr/KleinFSH17, author = {Aaron Klein and Stefan Falkner and Jost Tobias Springenberg and Frank Hutter}, title = {Learning Curve Prediction with Bayesian Neural Networks}, publisher = {International Conference on Learning Representations}, year = {2017} } @inproceedings{DBLP:conf/iclr/BakerGRN18, author = {Bowen Baker and Otkrist Gupta and Ramesh Raskar and Nikhil Naik}, title = {Accelerating Neural Architecture Search using Performance Prediction}, publisher = {International Conference on Learning Representations}, year = {2018} } @inproceedings{DBLP:conf/wmt/XiaTTGHCFGLLWWZ19, author = {Yingce Xia and Xu Tan and Fei Tian and Fei Gao and Di He and Weicong Chen and Yang Fan and Linyuan Gong and Yichong Leng and Renqian Luo and Yiren Wang and Lijun Wu and Jinhua Zhu and Tao Qin and Tie-Yan Liu}, title = {Microsoft Research Asia's Systems for {WMT19}}, pages = {424--433}, publisher = {Annual Meeting of the Association for Computational Linguistics}, year = {2019} } @inproceedings{DBLP:conf/iclr/RamachandranZL18, author = {Prajit Ramachandran and Barret Zoph and Quoc V. Le}, title = {Searching for Activation Functions}, publisher = {International Conference on Learning Representations}, year = {2018} } @inproceedings{DBLP:journals/corr/abs-2009-02070, author = {Wei Zhu and Xiaoling Wang and Xipeng Qiu and Yuan Ni and Guotong Xie}, title = {AutoTrans: Automating Transformer Design via Reinforced Architecture Search}, publisher = {CoRR}, volume = {abs/2009.02070}, year = {2020} } @inproceedings{DBLP:conf/acl/WangWLCZGH20, author = {Hanrui Wang and Zhanghao Wu and Zhijian Liu and Han Cai and Ligeng Zhu and Chuang Gan and Song Han}, title = {{HAT:} Hardware-Aware Transformers for Efficient Natural Language Processing}, pages = {7675--7688}, publisher = {Annual Meeting of the Association for Computational Linguistics}, year = {2020} } @inproceedings{DBLP:journals/corr/abs-2008-06808, author = {Henry Tsai and Jayden Ooi and Chun-Sung Ferng and Hyung Won Chung and Jason Riesa}, title = {Finding Fast Transformers: One-Shot Neural Architecture Search by Component Composition}, publisher = {CoRR}, volume = {abs/2008.06808}, year = {2020} } @inproceedings{Wang2019ExploitingSC, title={Exploiting Sentential Context for Neural Machine Translation}, author={Xing Wang and Zhaopeng Tu and Longyue Wang and Shuming Shi}, publisher={Annual Meeting of the Association for Computational Linguistics}, year={2019} } @inproceedings{Wei2020MultiscaleCD, title={Multiscale Collaborative Deep Models for Neural Machine Translation}, author={Xiangpeng Wei and Heng Yu and Yue Hu and Yue Zhang and Rongxiang Weng and Weihua Luo}, publisher={Annual Meeting of the Association for Computational Linguistics}, year={2020} } @inproceedings{li2020shallow, title={Shallow-to-Deep Training for Neural Machine Translation}, author={Li, Bei and Wang, Ziyang and Liu, Hui and Jiang, Yufan and Du, Quan and Xiao, Tong and Wang, Huizhen and Zhu, Jingbo}, publisher={Conference on Empirical Methods in Natural Language Processing}, year={2020} } @inproceedings{DBLP:journals/corr/abs-2007-06257, author = {Hongfei Xu and Qiuhui Liu and Deyi Xiong and Josef van Genabith}, title = {Transformer with Depth-Wise {LSTM}}, publisher = {CoRR}, volume = {abs/2007.06257}, year = {2020} } @inproceedings{DBLP:conf/acl/XuLGXZ20, author = {Hongfei Xu and Qiuhui Liu and Josef van Genabith and Deyi Xiong and Jingyi Zhang}, title = {Lipschitz Constrained Parameter Initialization for Deep Transformers}, pages = {397--402}, publisher = {Annual Meeting of the Association for Computational Linguistics}, year = {2020} } @inproceedings{DBLP:journals/corr/abs-2006-10369, author = {Jungo Kasai and Nikolaos Pappas and Hao Peng and James Cross and Noah A. Smith}, title = {Deep Encoder, Shallow Decoder: Reevaluating the Speed-Quality Tradeoff in Machine Translation}, publisher = {CoRR}, volume = {abs/2006.10369}, year = {2020} } @inproceedings{DBLP:journals/corr/abs-1806-01261, author = {Peter W. Battaglia and Jessica B. Hamrick and Victor Bapst and Alvaro Sanchez-Gonzalez and Vin{\'{\i}}cius Flores Zambaldi and Mateusz Malinowski and Andrea Tacchetti and David Raposo and Adam Santoro and Ryan Faulkner and {\c{C}}aglar G{\"{u}}l{\c{c}}ehre and H. Francis Song and Andrew J. Ballard and Justin Gilmer and George E. Dahl and Ashish Vaswani and Kelsey R. Allen and Charles Nash and Victoria Langston and Chris Dyer and Nicolas Heess and Daan Wierstra and Pushmeet Kohli and Matthew Botvinick and Oriol Vinyals and Yujia Li and Razvan Pascanu}, title = {Relational inductive biases, deep learning, and graph networks}, publisher = {CoRR}, volume = {abs/1806.01261}, year = {2018} } @inproceedings{Shaw2018SelfAttentionWR, author = {Peter Shaw and Jakob Uszkoreit and Ashish Vaswani}, title = {Self-Attention with Relative Position Representations}, publisher = {Annual Conference of the North American Chapter of the Association for Computational Linguistics}, pages = {464--468}, year = {2018}, } @inproceedings{Dai2019TransformerXLAL, author = {Zihang Dai and Zhilin Yang and Yiming Yang and Jaime G. Carbonell and Quoc V. Le and Ruslan Salakhutdinov}, title = {Transformer-XL: Attentive Language Models Beyond a Fixed-Length Context}, publisher = {Annual Meeting of the Association for Computational Linguistics}, pages = {2978--2988}, year = {2019} } @inproceedings{vaswani2017attention, title={Attention is All You Need}, author={Ashish {Vaswani} and Noam {Shazeer} and Niki {Parmar} and Jakob {Uszkoreit} and Llion {Jones} and Aidan N. {Gomez} and Lukasz {Kaiser} and Illia {Polosukhin}}, publisher={International Conference on Neural Information Processing}, pages={5998--6008}, year={2017} } @inproceedings{DBLP:conf/acl/LiXTZZZ17, author = {Junhui Li and Deyi Xiong and Zhaopeng Tu and Muhua Zhu and Min Zhang and Guodong Zhou}, title = {Modeling Source Syntax for Neural Machine Translation}, pages = {688--697}, publisher = {Annual Meeting of the Association for Computational Linguistics}, year = {2017} } @inproceedings{DBLP:conf/acl/EriguchiHT16, author = {Akiko Eriguchi and Kazuma Hashimoto and Yoshimasa Tsuruoka}, title = {Tree-to-Sequence Attentional Neural Machine Translation}, publisher = {Annual Meeting of the Association for Computational Linguistics}, year = {2016} } @inproceedings{Yang2017TowardsBH, author = {Baosong Yang and Derek F. Wong and Tong Xiao and Lidia S. Chao and Jingbo Zhu}, title = {Towards Bidirectional Hierarchical Representations for Attention-based Neural Machine Translation}, publisher = {Conference on Empirical Methods in Natural Language Processing}, pages = {1432--1441}, year = {2017} } @inproceedings{DBLP:conf/acl/ChenHCC17, author = {Huadong Chen and Shujian Huang and David Chiang and Jiajun Chen}, title = {Improved Neural Machine Translation with a Syntax-Aware Encoder and Decoder}, pages = {1936--1945}, publisher = {Annual Meeting of the Association for Computational Linguistics}, year = {2017} } @inproceedings{TuModeling, author = {Zhaopeng Tu and Zhengdong Lu and Yang Liu and Xiaohua Liu and Hang Li}, title = {Modeling Coverage for Neural Machine Translation}, publisher = {Annual Meeting of the Association for Computational Linguistics}, year = {2016} } @inproceedings{DBLP:conf/wmt/SennrichH16, author = {Rico Sennrich and Barry Haddow}, title = {Linguistic Input Features Improve Neural Machine Translation}, pages = {83--91}, publisher = {Annual Meeting of the Association for Computational Linguistics}, year = {2016} } @inproceedings{DBLP:conf/emnlp/ShiPK16, author = {Xing Shi and Inkit Padhi and Kevin Knight}, title = {Does String-Based Neural {MT} Learn Source Syntax?}, pages = {1526--1534}, publisher = {Annual Meeting of the Association for Computational Linguistics}, year = {2016} } @inproceedings{DBLP:conf/acl/BugliarelloO20, author = {Emanuele Bugliarello and Naoaki Okazaki}, title = {Enhancing Machine Translation with Dependency-Aware Self-Attention}, pages = {1618--1627}, publisher = {Annual Meeting of the Association for Computational Linguistics}, year = {2020} } @inproceedings{Aharoni2017TowardsSN, title={Towards String-To-Tree Neural Machine Translation}, author={Roee Aharoni and Yoav Goldberg}, publisher={Annual Meeting of the Association for Computational Linguistics}, year={2017} } @inproceedings{DBLP:conf/iclr/Alvarez-MelisJ17, author = {David Alvarez-Melis and Tommi S. Jaakkola}, title = {Tree-structured decoding with doubly-recurrent neural networks}, publisher = {International Conference on Learning Representations}, year = {2017} } @inproceedings{DBLP:conf/naacl/DyerKBS16, author = {Chris Dyer and Adhiguna Kuncoro and Miguel Ballesteros and Noah A. Smith}, title = {Recurrent Neural Network Grammars}, pages = {199--209}, publisher = {Annual Meeting of the Association for Computational Linguistics}, year = {2016} } @book{aho1972theory, author = {Aho, Alfred V and Ullman, Jeffrey D}, title = {The theory of parsing, translation, and compiling}, publisher = {Prentice-Hall Englewood Cliffs, NJ}, year = {1973}, } @inproceedings{DBLP:journals/corr/LuongLSVK15, author = {Minh-Thang Luong and Quoc V. Le and Ilya Sutskever and Oriol Vinyals and Lukasz Kaiser}, title = {Multi-task Sequence to Sequence Learning}, publisher = {International Conference on Learning Representations}, year = {2016} } @inproceedings{DBLP:conf/wmt/NadejdeRSDJKB17, author = {Maria Nadejde and Siva Reddy and Rico Sennrich and Tomasz Dwojak and Marcin Junczys-Dowmunt and Philipp Koehn and Alexandra Birch}, title = {Predicting Target Language {CCG} Supertags Improves Neural Machine Translation}, pages = {68--79}, publisher = {Annual Meeting of the Association for Computational Linguistics}, year = {2017} } @inproceedings{DBLP:conf/acl/WuZYLZ17, author = {Shuangzhi Wu and Dongdong Zhang and Nan Yang and Mu Li and Ming Zhou}, title = {Sequence-to-Dependency Neural Machine Translation}, pages = {698--707}, publisher = {Annual Meeting of the Association for Computational Linguistics}, year = {2017} } @inproceedings{DBLP:journals/corr/abs-1808-09374, author = {Xinyi Wang and Hieu Pham and Pengcheng Yin and Graham Neubig}, title = {A Tree-based Decoder for Neural Machine Translation}, publisher = {Conference on Empirical Methods in Natural Language Processing}, pages = {4772--4777}, year = {2018} } @inproceedings{Tong2016Syntactic, author = {Tong Xiao and Jingbo Zhu and Chunliang Zhang and Tongran Liu}, title = {Syntactic Skeleton-Based Translation}, pages = {2856--2862}, publisher = {AAAI Conference on Artificial Intelligence}, year = {2016}, } @inproceedings{DBLP:conf/emnlp/WangTWS19a, author = {Xing Wang and Zhaopeng Tu and Longyue Wang and Shuming Shi}, title = {Self-Attention with Structural Position Representations}, pages = {1403--1409}, publisher = {Conference on Empirical Methods in Natural Language Processing}, year = {2019} } @inproceedings{Liu2020LearningTE, title={Learning to Encode Position for Transformer with Continuous Dynamical Model}, author={Xuanqing Liu and Hsiang-Fu Yu and Inderjit Dhillon and Cho-Jui Hsieh}, publisher={ArXiv}, year={2020}, volume={abs/2003.09229} } @inproceedings{DBLP:conf/nips/ChenRBD18, author = {Tian Qi Chen and Yulia Rubanova and Jesse Bettencourt and David Duvenaud}, title = {Neural Ordinary Differential Equations}, publisher = {Annual Conference on Neural Information Processing Systems}, pages = {6572--6583}, year = {2018} } @inproceedings{DBLP:journals/corr/LuongPM15, author = {Thang Luong and Hieu Pham and Christopher D. Manning}, title = {Effective Approaches to Attention-based Neural Machine Translation}, publisher = {Conference on Empirical Methods in Natural Language Processing}, pages = {1412--1421}, year = {2015} } @inproceedings{Yang2018ModelingLF, author = {Baosong Yang and Zhaopeng Tu and Derek F. Wong and Fandong Meng and Lidia S. Chao and Tong Zhang}, title = {Modeling Localness for Self-Attention Networks}, pages = {4449--4458}, publisher = {Annual Meeting of the Association for Computational Linguistics}, year = {2018} } @inproceedings{DBLP:conf/aaai/GuoQLXZ20, author = {Qipeng Guo and Xipeng Qiu and Pengfei Liu and Xiangyang Xue and Zheng Zhang}, title = {Multi-Scale Self-Attention for Text Classification}, pages = {7847--7854}, publisher = {AAAI Conference on Artificial Intelligence}, year = {2020} } @inproceedings{Wu2019PayLA, author = {Felix Wu and Angela Fan and Alexei Baevski and Yann N. Dauphin and Michael Auli}, title = {Pay Less Attention with Lightweight and Dynamic Convolutions}, publisher = {International Conference on Learning Representations}, year = {2019}, } @inproceedings{DBLP:conf/interspeech/GulatiQCPZYHWZW20, author = {Anmol Gulati and James Qin and Chung-Cheng Chiu and Niki Parmar and Yu Zhang and Jiahui Yu and Wei Han and Shibo Wang and Zhengdong Zhang and Yonghui Wu and Ruoming Pang}, title = {Conformer: Convolution-augmented Transformer for Speech Recognition}, pages = {5036--5040}, publisher = {International Speech Communication Association}, year = {2020} } @inproceedings{DBLP:conf/cvpr/XieGDTH17, author = {Saining Xie and Ross B. Girshick and Piotr Doll{\'{a}}r and Zhuowen Tu and Kaiming He}, title = {Aggregated Residual Transformations for Deep Neural Networks}, pages = {5987--5995}, publisher = {IEEE Conference on Computer Vision and Pattern Recognition}, year = {2017} } @inproceedings{DBLP:journals/corr/abs-1711-02132, author = {Karim Ahmed and Nitish Shirish Keskar and Richard Socher}, title = {Weighted Transformer Network for Machine Translation}, publisher = {CoRR}, volume = {abs/1711.02132}, year = {2017} } @inproceedings{DBLP:journals/corr/abs-2006-10270, author = {Yang Fan and Shufang Xie and Yingce Xia and Lijun Wu and Tao Qin and Xiang-Yang Li and Tie-Yan Liu}, title = {Multi-branch Attentive Transformer}, publisher = {CoRR}, volume = {abs/2006.10270}, year = {2020} } @inproceedings{DBLP:conf/emnlp/YanMZ20, author = {Jianhao Yan and Fandong Meng and Jie Zhou}, title = {Multi-Unit Transformers for Neural Machine Translation}, pages = {1047--1059}, publisher = {Conference on Empirical Methods in Natural Language Processing}, year = {2020} } @inproceedings{李北2019面向神经机器翻译的集成学习方法分析, title={面向神经机器翻译的集成学习方法分析}, author={李北 and 王强 and 肖桐 and 姜雨帆 and 张哲旸 and 刘继强 and 张俐 and 于清}, publisher={中文信息学报}, volume={33}, number={3}, year={2019}, } @inproceedings{DBLP:conf/iclr/WuLLLH20, author = {Zhanghao Wu and Zhijian Liu and Ji Lin and Yujun Lin and Song Han}, title = {Lite Transformer with Long-Short Range Attention}, publisher = {International Conference on Learning Representations}, year = {2020} } @inproceedings{DBLP:conf/iclr/DehghaniGVUK19, author = {Mostafa Dehghani and Stephan Gouws and Oriol Vinyals and Jakob Uszkoreit and Lukasz Kaiser}, title = {Universal Transformers}, publisher = {International Conference on Learning Representations}, year = {2019} } @inproceedings{Lan2020ALBERTAL, title={ALBERT: A Lite BERT for Self-supervised Learning of Language Representations}, author={Zhenzhong Lan and Mingda Chen and Sebastian Goodman and Kevin Gimpel and Piyush Sharma and Radu Soricut}, publisher={International Conference on Learning Representations}, year={2020} } @inproceedings{DBLP:conf/naacl/HaoWYWZT19, author = {Jie Hao and Xing Wang and Baosong Yang and Longyue Wang and Jinfeng Zhang and Zhaopeng Tu}, title = {Modeling Recurrence for Transformer}, pages = {1198--1207}, publisher = {Annual Conference of the North American Chapter of the Association for Computational Linguistics}, year = {2019} } @inproceedings{DBLP:conf/emnlp/QiuMLYW020, author = {Jiezhong Qiu and Hao Ma and Omer Levy and Wen-tau Yih and Sinong Wang and Jie Tang}, title = {Blockwise Self-Attention for Long Document Understanding}, pages = {2555--2565}, publisher = {Conference on Empirical Methods in Natural Language Processing}, year = {2020} } @inproceedings{DBLP:conf/iclr/LiuSPGSKS18, author = {Peter J. Liu and Mohammad Saleh and Etienne Pot and Ben Goodrich and Ryan Sepassi and Lukasz Kaiser and Noam Shazeer}, title = {Generating Wikipedia by Summarizing Long Sequences}, publisher = {International Conference on Learning Representations}, year = {2018} } @inproceedings{DBLP:journals/corr/abs-2004-05150, author = {Iz Beltagy and Matthew Peters and Arman Cohan}, title = {Longformer: The Long-Document Transformer}, publisher = {CoRR}, volume = {abs/2004.05150}, year = {2020} } @inproceedings{Kitaev2020ReformerTE, author = {Nikita Kitaev and Lukasz Kaiser and Anselm Levskaya}, title = {Reformer: The Efficient Transformer}, publisher = {International Conference on Learning Representations}, year = {2020} } @inproceedings{DBLP:journals/corr/abs-2003-05997, author = {Aurko Roy and Mohammad Saffar and Ashish Vaswani and David Grangier}, title = {Efficient Content-Based Sparse Attention with Routing Transformers}, publisher = {CoRR}, volume = {abs/2003.05997}, year = {2020} } @inproceedings{Katharopoulos2020TransformersAR, title={Transformers are RNNs: Fast Autoregressive Transformers with Linear Attention}, author={Angelos Katharopoulos and Apoorv Vyas and Nikolaos Pappas and Franccois Fleuret}, publisher={CoRR}, year={2020}, volume={abs/2006.16236} } @inproceedings{DBLP:journals/corr/abs-2009-14794, author = {Krzysztof Choromanski and Valerii Likhosherstov and David Dohan and Xingyou Song and Andreea Gane and Tam{\'{a}}s Sarl{\'{o}}s and Peter Hawkins and Jared Davis and Afroz Mohiuddin and Lukasz Kaiser and David Belanger and Lucy Colwell and Adrian Weller}, title = {Rethinking Attention with Performers}, publisher = {CoRR}, volume = {abs/2009.14794}, year = {2020} } @inproceedings{DBLP:conf/emnlp/HaoWSZT19, author = {Jie Hao and Xing Wang and Shuming Shi and Jinfeng Zhang and Zhaopeng Tu}, title = {Multi-Granularity Self-Attention for Neural Machine Translation}, pages = {887--897}, publisher = {Conference on Empirical Methods in Natural Language Processing}, year = {2019} } @inproceedings{DBLP:conf/emnlp/Lin0RLS18, author = {Junyang Lin and Xu Sun and Xuancheng Ren and Muyu Li and Qi Su}, title = {Learning When to Concentrate or Divert Attention: Self-Adaptive Attention Temperature for Neural Machine Translation}, pages = {2985--2990}, publisher = {Conference on Empirical Methods in Natural Language Processing}, year = {2018} } @inproceedings{DBLP:journals/corr/abs-2006-04768, author = {Sinong Wang and Belinda Z. Li and Madian Khabsa and Han Fang and Hao Ma}, title = {Linformer: Self-Attention with Linear Complexity}, publisher = {CoRR}, volume = {abs/2006.04768}, year = {2020} } @inproceedings{DBLP:conf/nips/BergstraBBK11, author = {James Bergstra and R{\'{e}}mi Bardenet and Yoshua Bengio and Bal{\'{a}}zs K{\'{e}}gl}, title = {Algorithms for Hyper-Parameter Optimization}, publisher = {Advances in Neural Information Processing Systems}, pages = {2546--2554}, year = {2011} } @inproceedings{DBLP:conf/lion/HutterHL11, author = {Frank Hutter and Holger H. Hoos and Kevin Leyton-Brown}, title = {Sequential Model-Based Optimization for General Algorithm Configuration}, series = {Lecture Notes in Computer Science}, volume = {6683}, pages = {507--523}, publisher = {Learning and Intelligent Optimization}, year = {2011} } @inproceedings{DBLP:conf/icml/BergstraYC13, author = {James Bergstra and Daniel Yamins and David D. Cox}, title = {Making a Science of Model Search: Hyperparameter Optimization in Hundreds of Dimensions for Vision Architectures}, series = {{JMLR} Workshop and Conference Proceedings}, volume = {28}, pages = {115--123}, publisher = {International Conference on Machine Learning}, year = {2013} } @inproceedings{DBLP:conf/iccv/ChenXW019, author = {Xin Chen and Lingxi Xie and Jun Wu and Qi Tian}, title = {Progressive Differentiable Architecture Search: Bridging the Depth Gap Between Search and Evaluation}, pages = {1294--1303}, publisher = {IEEE International Conference on Computer Vision}, year = {2019} } @inproceedings{DBLP:conf/icml/ChenH20, author = {Xiangning Chen and Cho-Jui Hsieh}, title = {Stabilizing Differentiable Architecture Search via Perturbation-based Regularization}, series = {Proceedings of Machine Learning Research}, volume = {119}, pages = {1554--1565}, publisher = {International Conference on Machine Learning}, year = {2020} } @inproceedings{Jawahar2019WhatDB, title={What Does BERT Learn about the Structure of Language?}, author={Ganesh Jawahar and Beno{\^{\i}}t Sagot and Djam{\'e} Seddah}, publisher={Annual Meeting of the Association for Computational Linguistics}, year={2019} } @inproceedings{DBLP:conf/emnlp/Ethayarajh19, author = {Kawin Ethayarajh}, title = {How Contextual are Contextualized Word Representations? Comparing the Geometry of BERT, ELMo, and {GPT-2} Embeddings}, pages = {55--65}, publisher = {Conference on Empirical Methods in Natural Language Processing}, year = {2019} } @inproceedings{DBLP:journals/corr/abs-1905-09418, author = {Elena Voita and David Talbot and Fedor Moiseev and Rico Sennrich and Ivan Titov}, title = {Analyzing Multi-Head Self-Attention: Specialized Heads Do the Heavy Lifting, the Rest Can Be Pruned}, pages = {5797--5808}, publisher = {Annual Meeting of the Association for Computational Linguistics}, year = {2019}, } @inproceedings{Michel2019AreSH, title={Are Sixteen Heads Really Better than One?}, author = {Paul Michel and Omer Levy and Graham Neubig}, title = {Are Sixteen Heads Really Better than One?}, publisher = {Annual Conference on Neural Information Processing Systems}, pages = {14014--14024}, year = {2019} } @inproceedings{DBLP:conf/emnlp/LiTYLZ18, author = {Jian Li and Zhaopeng Tu and Baosong Yang and Michael R. Lyu and Tong Zhang}, title = {Multi-Head Attention with Disagreement Regularization}, pages = {2897--2903}, publisher = {Conference on Empirical Methods in Natural Language Processing}, year = {2018} } @inproceedings{Su2018VariationalRN, title={Variational Recurrent Neural Machine Translation}, author={Jinsong Su and Shan Wu and Deyi Xiong and Yaojie Lu and Xianpei Han and Biao Zhang}, publisher={AAAI Conference on Artificial Intelligence}, pages={5488--5495}, year={2018} } @inproceedings{DBLP:conf/acl/SetiawanSNP20, author = {Hendra Setiawan and Matthias Sperber and Udhyakumar Nallasamy and Matthias Paulik}, title = {Variational Neural Machine Translation with Normalizing Flows}, publisher = {Annual Meeting of the Association for Computational Linguistics}, year = {2020} } @inproceedings{Li2020NeuralMT, author = {Yanyang Li and Qiang Wang and Tong Xiao and Tongran Liu and Jingbo Zhu}, title = {Neural Machine Translation with Joint Representation}, pages = {8285--8292}, publisher = {AAAI Conference on Artificial Intelligence}, year = {2020} } @inproceedings{JMLR:v15:srivastava14a, author = {Nitish Srivastava and Geoffrey Hinton and Alex Krizhevsky and Ilya Sutskever and Ruslan Salakhutdinov}, title = {Dropout: A Simple Way to Prevent Neural Networks from Overfitting}, publisher = {Journal of Machine Learning Research}, year = {2014}, volume = {15}, pages = {1929-1958}, } @inproceedings{Szegedy_2016_CVPR, author = {Christian Szegedy and Vincent Vanhoucke and Sergey Ioffe and Jonathon Shlens and Zbigniew Wojna}, title = {Rethinking the Inception Architecture for Computer Vision}, publisher = {IEEE Conference on Computer Vision and Pattern Recognition}, pages = {2818--2826}, year = {2016}, } @inproceedings{Chen2018TheBO, author = {Mia Xu Chen and Orhan Firat and Ankur Bapna and Melvin Johnson and Wolfgang Macherey and George F. Foster and Llion Jones and Mike Schuster and Noam Shazeer and Niki Parmar and Ashish Vaswani and Jakob Uszkoreit and Lukasz Kaiser and Zhifeng Chen and Yonghui Wu and Macduff Hughes}, title = {The Best of Both Worlds: Combining Recent Advances in Neural Machine Translation}, pages = {76--86}, publisher = {Annual Meeting of the Association for Computational Linguistics}, year = {2018} } %%%%% chapter 15------------------------------------------------------ %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% %%%%% chapter 16------------------------------------------------------ @inproceedings{DBLP:conf/wmt/CurreyBH17, author = {Anna Currey and Antonio Valerio Miceli Barone and Kenneth Heafield}, title = {Copied Monolingual Data Improves Low-Resource Neural Machine Translation}, pages = {148--156}, publisher = {Annual Meeting of the Association for Computational Linguistics}, year = {2017} } @inproceedings{DBLP:conf/emnlp/EdunovOAG18, author = {Sergey Edunov and Myle Ott and Michael Auli and David Grangier}, title = {Understanding Back-Translation at Scale}, pages = {489--500}, publisher = {Annual Meeting of the Association for Computational Linguistics}, year = {2018} } @inproceedings{DBLP:conf/emnlp/FadaeeM18, author = {Marzieh Fadaee and Christof Monz}, title = {Back-Translation Sampling by Targeting Difficult Words in Neural Machine Translation}, pages = {436--446}, publisher = {Annual Meeting of the Association for Computational Linguistics}, year = {2018} } @inproceedings{DBLP:conf/nlpcc/XuLXLLXZ19, author = {Nuo Xu and Yinqiao Li and Chen Xu and Yanyang Li and Bei Li and Tong Xiao and Jingbo Zhu}, title = {Analysis of Back-Translation Methods for Low-Resource Neural Machine Translation}, volume = {11839}, pages = {466--475}, publisher = {Natural Language Processing and Chinese Computing}, year = {2019} } @inproceedings{DBLP:conf/wmt/CaswellCG19, author = {Isaac Caswell and Ciprian Chelba and David Grangier}, title = {Tagged Back-Translation}, pages = {53--63}, publisher = {Annual Meeting of the Association for Computational Linguistics}, year = {2019} } @inproceedings{DBLP:conf/emnlp/WangLWLS19, author = {Shuo Wang and Yang Liu and Chao Wang and Huanbo Luan and Maosong Sun}, title = {Improving Back-Translation with Uncertainty-based Confidence Estimation}, pages = {791--802}, publisher = {Annual Meeting of the Association for Computational Linguistics}, year = {2019} } @inproceedings{DBLP:journals/corr/abs200111327, author = {Idris Abdulmumin and Bashir Shehu Galadanci and Abubakar Isa}, title = {Iterative Batch Back-Translation for Neural Machine Translation: {A} Conceptual Model}, publisher = {CoRR}, year = {2020} } @inproceedings{DBLP:journals/corr/abs200403672, author = {Zi-Yi Dou and Antonios Anastasopoulos and Graham Neubig}, title = {Dynamic Data Selection and Weighting for Iterative Back-Translation}, pages = {5894--5904}, publisher = {Conference on Empirical Methods in Natural Language Processing}, year = {2020} } @inproceedings{DBLP:conf/emnlp/WuZHGQLL19, author = {Lijun Wu and Jinhua Zhu and Di He and Fei Gao and Tao Qin and Jianhuang Lai and Tie-Yan Liu}, title = {Machine Translation With Weakly Paired Documents}, pages = {4374--4383}, publisher = {Annual Meeting of the Association for Computational Linguistics}, year = {2019} } @inproceedings{DBLP:journals/corr/abs-1901-09069, author = {Felipe Almeida and Geraldo Xex{\'{e}}o}, title = {Word Embeddings: {A} Survey}, publisher = {CoRR}, year = {2019} } @inproceedings{DBLP:journals/corr/abs-2002-06823, author = {Jinhua Zhu and Yingce Xia and Lijun Wu and Di He and Tao Qin and Wengang Zhou and Houqiang Li and Tie-Yan Liu}, title = {Incorporating {BERT} into Neural Machine Translation}, publisher = {International Conference on Learning Representations}, year = {2020} } @inproceedings{song2019mass, author = {Kaitao Song and Xu Tan and Tao Qin and Jianfeng Lu and Tie-Yan Liu}, title = {{MASS:} Masked Sequence to Sequence Pre-training for Language Generation}, volume = {97}, pages = {5926--5936}, publisher = {International Conference on Machine Learning}, year = {2019} } @inproceedings{DBLP:journals/corr/Ruder17a, author = {Sebastian Ruder}, title = {An Overview of Multi-Task Learning in Deep Neural Networks}, publisher = {CoRR}, volume = {abs/1706.05098}, year = {2017} } @inproceedings{DBLP:conf/icml/XiaQCBYL17, author = {Yingce Xia and Tao Qin and Wei Chen and Jiang Bian and Nenghai Yu and Tie-Yan Liu}, title = {Dual Supervised Learning}, volume = {70}, pages = {3789--3798}, publisher = {International Conference on Machine Learning}, year = {2017} } @inproceedings{DBLP:conf/nips/HeXQWYLM16, author = {Di He and Yingce Xia and Tao Qin and Liwei Wang and Nenghai Yu and Tie-Yan Liu and Wei-Ying Ma}, title = {Dual Learning for Machine Translation}, pages = {820--828}, year = {2016} } @inproceedings{DBLP:conf/nips/SuttonMSM99, author = {Richard Sutton and David Allen McAllester and Satinder Singh and Yishay Mansour}, title = {Policy Gradient Methods for Reinforcement Learning with Function Approximation}, pages = {1057--1063}, publisher = {The {MIT} Press}, year = {1999} } @inproceedings{DBLP:conf/aclnmt/HoangKHC18, author = {Cong Duy Vu Hoang and Philipp Koehn and Gholamreza Haffari and Trevor Cohn}, title = {Iterative Back-Translation for Neural Machine Translation}, pages = {18--24}, publisher = {Annual Meeting of the Association for Computational Linguistics}, year = {2018} } @inproceedings{DBLP:conf/icml/OttAGR18, author = {Myle Ott and Michael Auli and David Grangier and Marc'Aurelio Ranzato}, title = {Analyzing Uncertainty in Neural Machine Translation}, volume = {80}, pages = {3953--3962}, publisher = {International Conference on Machine Learning}, year = {2018} } @inproceedings{finding2006adafre, author = {Sisay Fissaha Adafre and Maarten de Rijke}, title = {Finding Similar Sentences across Multiple Languages in Wikipedia}, publisher = {Annual Conference of the European Association for Machine Translation}, year = {2006} } @inproceedings{method2008keiji, author = {Keiji Yasuda and Eiichiro Sumita}, title = {Method for building sentence-aligned corpus from wikipedia}, publisher = {AAAI Conference on Artificial Intelligence}, year = {2008} } @inproceedings{DBLP:journals/coling/MunteanuM05, author = {Dragos Stefan Munteanu and Daniel Marcu}, title = {Improving Machine Translation Performance by Exploiting Non-Parallel Corpora}, publisher = {Computational Linguistics}, volume = {31}, number = {4}, pages = {477--504}, year = {2005} } @inproceedings{DBLP:conf/acl/XiaKAN19, author = {Mengzhou Xia and Xiang Kong and Antonios Anastasopoulos and Graham Neubig}, title = {Generalized Data Augmentation for Low-Resource Translation}, pages = {5786--5796}, publisher = {Annual Meeting of the Association for Computational Linguistics}, year = {2019} } @inproceedings{DBLP:conf/acl/GaoZWXQCZL19, author = {Fei Gao and Jinhua Zhu and Lijun Wu and Yingce Xia and Tao Qin and Xueqi Cheng and Wengang Zhou and Tie-Yan Liu}, title = {Soft Contextual Data Augmentation for Neural Machine Translation}, pages = {5539--5544}, publisher = {Annual Meeting of the Association for Computational Linguistics}, year = {2019} } @inproceedings{DBLP:conf/emnlp/WuWXQLL19, author = {Lijun Wu and Yiren Wang and Yingce Xia and Tao Qin and Jianhuang Lai and Tie-Yan Liu}, title = {Exploiting Monolingual Data at Scale for Neural Machine Translation}, pages = {4205--4215}, publisher = {Annual Meeting of the Association for Computational Linguistics}, year = {2019} } @inproceedings{DBLP:conf/emnlp/LiLHZZ19, author = {Guanlin Li and Lemao Liu and Guoping Huang and Conghui Zhu and Tiejun Zhao}, title = {Understanding Data Augmentation in Neural Machine Translation: Two Perspectives towards Generalization}, pages = {5688--5694}, publisher = {Annual Meeting of the Association for Computational Linguistics}, year = {2019} } @inproceedings{2015OnGulcehre, title = {On Using Monolingual Corpora in Neural Machine Translation}, author = {Gulcehre Caglar and Firat Orhan and Xu Kelvin and Cho Kyunghyun and Barrault Loic and Lin Huei Chi and Bougares Fethi and Schwenk Holger and Bengio Yoshua}, publisher = {Computer Science}, year = {2015}, } @inproceedings{黄书剑0统计机器翻译中的词对齐研究, title={统计机器翻译中的词对齐研究}, author={黄书剑}, publisher={南京大学}, year={2012} } @inproceedings{DBLP:journals/corr/MikolovLS13, author = {Tomas Mikolov and Quoc V. Le and Ilya Sutskever}, title = {Exploiting Similarities among Languages for Machine Translation}, publisher = {CoRR}, volume = {abs/1309.4168}, year = {2013} } @inproceedings{DBLP:conf/acl/VulicK16, author = {Ivan Vulic and Anna Korhonen}, title = {On the Role of Seed Lexicons in Learning Bilingual Word Embeddings}, publisher = {Annual Meeting of the Association for Computational Linguistics}, year = {2016} } @inproceedings{DBLP:conf/acl/ArtetxeLA17, author = {Mikel Artetxe and Gorka Labaka and Eneko Agirre}, title = {Learning bilingual word embeddings with (almost) no bilingual data}, pages = {451--462}, publisher = {Annual Meeting of the Association for Computational Linguistics}, year = {2017} } @inproceedings{1966ASchnemann, title={A generalized solution of the orthogonal procrustes problem}, author={Schnemann and Peter}, publisher={Psychometrika}, volume={31}, number={1}, pages={1-10}, year={1966}, } @inproceedings{DBLP:conf/iclr/LampleCRDJ18, author = {Guillaume Lample and Alexis Conneau and Marc'Aurelio Ranzato and Ludovic Denoyer and Herv{\'{e}} J{\'{e}}gou}, title = {Word translation without parallel data}, publisher = {International Conference on Learning Representations}, year = {2018} } @inproceedings{DBLP:conf/acl/ZhangLLS17, author = {Meng Zhang and Yang Liu and Huanbo Luan and Maosong Sun}, title = {Adversarial Training for Unsupervised Bilingual Lexicon Induction}, pages = {1959--1970}, publisher = {Annual Meeting of the Association for Computational Linguistics}, year = {2017} } @inproceedings{DBLP:conf/emnlp/Alvarez-MelisJ18, author = {David Alvarez-Melis and Tommi S. Jaakkola}, title = {Gromov-Wasserstein Alignment of Word Embedding Spaces}, pages = {1881--1890}, publisher = {Conference on Empirical Methods in Natural Language Processing}, year = {2018} } @inproceedings{DBLP:conf/lrec/GarneauGBDL20, author = {Nicolas Garneau and Mathieu Godbout and David Beauchemin and Audrey Durand and Luc Lamontagne}, title = {A Robust Self-Learning Method for Fully Unsupervised Cross-Lingual Mappings of Word Embeddings: Making the Method Robustly Reproducible as Well}, pages = {5546--5554}, publisher = {Language Resources and Evaluation Conference}, year = {2020} } @inproceedings{DBLP:conf/naacl/XingWLL15, author = {Chao Xing and Dong Wang and Chao Liu and Yiye Lin}, title = {Normalized Word Embedding and Orthogonal Transform for Bilingual Word Translation}, pages = {1006--1011}, publisher = {Annual Conference of the North American Chapter of the Association for Computational Linguistics}, year = {2015} } @inproceedings{DBLP:conf/iclr/SmithTHH17, author = {Samuel L. Smith and David H. P. Turban and Steven Hamblin and Nils Y. Hammerla}, title = {Offline bilingual word vectors, orthogonal transformations and the inverted softmax}, publisher = {International Conference on Learning Representations}, year = {2017} } @inproceedings{DBLP:conf/emnlp/VulicGRK19, author = {Ivan Vulic and Goran Glavas and Roi Reichart and Anna Korhonen}, title = {Do We Really Need Fully Unsupervised Cross-Lingual Embeddings?}, pages = {4406--4417}, publisher = {Conference on Empirical Methods in Natural Language Processing}, year = {2019} } @inproceedings{DBLP:journals/talip/MarieF20, author = {Benjamin Marie and Atsushi Fujita}, title = {Iterative Training of Unsupervised Neural and Statistical Machine Translation Systems}, publisher = {ACM Transactions on Asian and Low-Resource Language Information Processing}, volume = {19}, number = {5}, pages = {68:1--68:21}, year = {2020} } @inproceedings{DBLP:conf/acl/ArtetxeLA19, author = {Mikel Artetxe and Gorka Labaka and Eneko Agirre}, title = {An Effective Approach to Unsupervised Machine Translation}, pages = {194--203}, publisher = {Annual Meeting of the Association for Computational Linguistics}, year = {2019} } @inproceedings{DBLP:conf/acl/PourdamghaniAGK19, author = {Nima Pourdamghani and Nada Aldarrab and Marjan Ghazvininejad and Kevin Knight and Jonathan May}, title = {Translating Translationese: {A} Two-Step Approach to Unsupervised Machine Translation}, pages = {3057--3062}, publisher = {Annual Meeting of the Association for Computational Linguistics}, year = {2019} } @inproceedings{DBLP:conf/nips/ConneauL19, author = {Alexis Conneau and Guillaume Lample}, title = {Cross-lingual Language Model Pretraining}, pages = {7057--7067}, publisher = {Annual Conference on Neural Information Processing Systems}, year = {2019} } @inproceedings{DBLP:journals/ipm/FarhanTAJATT20, author = {Wael Farhan and Bashar Talafha and Analle Abuammar and Ruba Jaikat and Mahmoud Al-Ayyoub and Ahmad Bisher Tarakji and Anas Toma}, title = {Unsupervised dialectal neural machine translation}, publisher = {Information Processing \& Management}, volume = {57}, number = {3}, pages = {102181}, year = {2020} } @inproceedings{A2020Li, title={A Simple and Effective Approach to Robust Unsupervised Bilingual Dictionary Induction}, author={Yanyang Li and Yingfeng Luo and Ye Lin and Quan Du and Huizhen Wang and Shujian Huang and Tong Xiao and Jingbo Zhu}, publisher={International Conference on Computational Linguistics}, year={2020} } @inproceedings{2018When, title={When and Why are Pre-trained Word Embeddings Useful for Neural Machine Translation?}, author={ Qi, Ye and Sachan, Devendra Singh and Felix, Matthieu and Padmanabhan, Sarguna Janani and Neubig, Graham }, publisher={Annual Conference of the North American Chapter of the Association for Computational Linguistics}, year={2018}, } @inproceedings{DBLP:conf/emnlp/ClinchantJN19, author = {St{\'{e}}phane Clinchant and Kweon Woo Jung and Vassilina Nikoulina}, title = {On the use of {BERT} for Neural Machine Translation}, pages = {108--117}, publisher = {Annual Meeting of the Association for Computational Linguistics}, year = {2019} } @inproceedings{DBLP:conf/aaai/YangW0Z00020, author = {Jiacheng Yang and Mingxuan Wang and Hao Zhou and Chengqi Zhao and Weinan Zhang and Yong Yu and Lei Li}, title = {Towards Making the Most of {BERT} in Neural Machine Translation}, pages = {9378--9385}, publisher = {AAAI Conference on Artificial Intelligence}, year = {2020} } @inproceedings{DBLP:conf/aaai/WengYHCL20, author = {Rongxiang Weng and Heng Yu and Shujian Huang and Shanbo Cheng and Weihua Luo}, title = {Acquiring Knowledge from Pre-Trained Model to Neural Machine Translation}, pages = {9266--9273}, publisher = {AAAI Conference on Artificial Intelligence}, year = {2020} } @inproceedings{DBLP:journals/corr/abs-2001-08210, author = {Yinhan Liu and Jiatao Gu and Naman Goyal and Xian Li and Sergey Edunov and Marjan Ghazvininejad and Mike Lewis and Luke Zettlemoyer}, title = {Multilingual Denoising Pre-training for Neural Machine Translation}, publisher = {Transactions of the Association for Computational Linguistics}, volume = {8}, pages = {726--742}, year = {2020} } @inproceedings{DBLP:conf/aaai/JiZDZCL20, author = {Baijun Ji and Zhirui Zhang and Xiangyu Duan and Min Zhang and Boxing Chen and Weihua Luo}, title = {Cross-Lingual Pre-Training Based Transfer for Zero-Shot Neural Machine Translation}, pages = {115--122}, publisher = {AAAI Conference on Artificial Intelligence}, year = {2020} } @inproceedings{DBLP:conf/acl/LewisLGGMLSZ20, author = {Mike Lewis and Yinhan Liu and Naman Goyal and Marjan Ghazvininejad and Abdelrahman Mohamed and Omer Levy and Veselin Stoyanov and Luke Zettlemoyer}, title = {{BART:} Denoising Sequence-to-Sequence Pre-training for Natural Language Generation, Translation, and Comprehension}, pages = {7871--7880}, publisher = {Annual Meeting of the Association for Computational Linguistics}, year = {2020} } @inproceedings{DBLP:journals/corr/abs-2009-08088, author = {Zhen Yang and Bojie Hu and Ambyera Han and Shen Huang and Qi Ju}, title = {{CSP:} Code-Switching Pre-training for Neural Machine Translation}, pages = {2624--2636}, publisher = {Conference on Empirical Methods in Natural Language Processing}, year = {2020} } @inproceedings{DBLP:journals/corr/abs-2010-09403, author = {Dusan Varis and Ondrej Bojar}, title = {Unsupervised Pretraining for Neural Machine Translation Using Elastic Weight Consolidation}, pages = {130--135}, publisher = {Annual Meeting of the Association for Computational Linguistics}, year = {2019} } @inproceedings{DBLP:conf/emnlp/LampleOCDR18, author = {Guillaume Lample and Myle Ott and Alexis Conneau and Ludovic Denoyer and Marc'Aurelio Ranzato}, title = {Phrase-Based {\&} Neural Unsupervised Machine Translation}, pages = {5039--5049}, publisher = {Annual Meeting of the Association for Computational Linguistics}, year = {2018} } @inproceedings{DBLP:journals/jbd/ShortenK19, author = {Connor Shorten and Taghi M. Khoshgoftaar}, title = {A survey on Image Data Augmentation for Deep Learning}, publisher = {Journal of Big Data}, volume = {6}, pages = {60}, year = {2019} } @inproceedings{DBLP:conf/naacl/MohiuddinJ19, author = {Tasnim Mohiuddin and Shafiq Rayhan Joty}, title = {Revisiting Adversarial Autoencoder for Unsupervised Word Translation with Cycle Consistency and Improved Training}, pages = {3857--3867}, publisher = {Annual Meeting of the Association for Computational Linguistics}, year = {2019} } @inproceedings{DBLP:conf/acl/HuangQC19, author = {Jiaji Huang and Qiang Qiu and Kenneth Church}, title = {Hubless Nearest Neighbor Search for Bilingual Lexicon Induction}, pages = {4072--4080}, publisher = {Annual Meeting of the Association for Computational Linguistics}, year = {2019} } @inproceedings{DBLP:journals/corr/abs-1811-01124, author = {Jean Alaux and Edouard Grave and Marco Cuturi and Armand Joulin}, title = {Unsupervised Hyperalignment for Multilingual Word Embeddings}, publisher = {International Conference on Learning Representations}, year = {2018} } @inproceedings{DBLP:conf/emnlp/DouZH18, author = {Zi-Yi Dou and Zhi-Hao Zhou and Shujian Huang}, title = {Unsupervised Bilingual Lexicon Induction via Latent Variable Models}, pages = {621--626}, publisher = {Conference on Empirical Methods in Natural Language Processing}, year = {2018} } @inproceedings{DBLP:conf/emnlp/HoshenW18, author = {Yedid Hoshen and Lior Wolf}, title = {Non-Adversarial Unsupervised Word Translation}, pages = {469--478}, publisher = {Annual Meeting of the Association for Computational Linguistics}, year = {2018} } @inproceedings{DBLP:conf/emnlp/KimGN18, author = {Yunsu Kim and Jiahui Geng and Hermann Ney}, title = {Improving Unsupervised Word-by-Word Translation with Language Model and Denoising Autoencoder}, pages = {862--868}, publisher = {Annual Meeting of the Association for Computational Linguistics}, year = {2018} } @inproceedings{DBLP:conf/emnlp/MukherjeeYH18, author = {Tanmoy Mukherjee and Makoto Yamada and Timothy Hospedales}, title = {Learning Unsupervised Word Translations Without Adversaries}, pages = {627--632}, publisher = {Conference on Empirical Methods in Natural Language Processing}, year = {2018} } @inproceedings{DBLP:conf/emnlp/ChenC18, author = {Xilun Chen and Claire Cardie}, title = {Unsupervised Multilingual Word Embeddings}, pages = {261--270}, publisher = {Conference on Empirical Methods in Natural Language Processing}, year = {2018} } @inproceedings{DBLP:conf/emnlp/TaitelbaumCG19, author = {Hagai Taitelbaum and Gal Chechik and Jacob Goldberger}, title = {Multilingual word translation using auxiliary languages}, pages = {1330--1335}, publisher = {Conference on Empirical Methods in Natural Language Processing}, year = {2019} } @inproceedings{DBLP:conf/acl/YangLCLS19, author = {Pengcheng Yang and Fuli Luo and Peng Chen and Tianyu Liu and Xu Sun}, title = {{MAAM:} {A} Morphology-Aware Alignment Model for Unsupervised Bilingual Lexicon Induction}, pages = {3190--3196}, publisher = {Annual Meeting of the Association for Computational Linguistics}, year = {2019} } @inproceedings{DBLP:conf/acl/OrmazabalALSA19, author = {Aitor Ormazabal and Mikel Artetxe and Gorka Labaka and Aitor Soroa and Eneko Agirre}, title = {Analyzing the Limitations of Cross-lingual Word Embedding Mappings}, pages = {4990--4995}, publisher = {Annual Meeting of the Association for Computational Linguistics}, year = {2019} } @inproceedings{DBLP:conf/acl/ArtetxeLA19a, author = {Mikel Artetxe and Gorka Labaka and Eneko Agirre}, title = {Bilingual Lexicon Induction through Unsupervised Machine Translation}, pages = {5002--5007}, publisher = {Annual Meeting of the Association for Computational Linguistics}, year = {2019} } @inproceedings{DBLP:conf/rep4nlp/VulicKG20, author = {Ivan Vulic and Anna Korhonen and Goran Glavas}, title = {Improving Bilingual Lexicon Induction with Unsupervised Post-Processing of Monolingual Word Vector Spaces}, pages = {45--54}, publisher = {Annual Meeting of the Association for Computational Linguistics}, year = {2020} } @inproceedings{hartmann2018empirical, title={Empirical observations on the instability of aligning word vector spaces with GANs}, author={Hartmann, Mareike and Kementchedjhieva, Yova and S{\o}gaard, Anders}, publisher = {openreview.net}, year={2018} } @inproceedings{DBLP:conf/emnlp/Kementchedjhieva19, author = {Yova Kementchedjhieva and Mareike Hartmann and Anders S{\o}gaard}, title = {Lost in Evaluation: Misleading Benchmarks for Bilingual Dictionary Induction}, pages = {3334--3339}, publisher = {Conference on Empirical Methods in Natural Language Processing}, year = {2019} } @inproceedings{DBLP:conf/nips/HartmannKS19, author = {Mareike Hartmann and Yova Kementchedjhieva and Anders S{\o}gaard}, title = {Comparing Unsupervised Word Translation Methods Step by Step}, pages = {6031--6041}, publisher = {Annual Conference on Neural Information Processing Systems}, year = {2019} } @inproceedings{DBLP:conf/emnlp/HartmannKS18, author = {Mareike Hartmann and Yova Kementchedjhieva and Anders S{\o}gaard}, title = {Why is unsupervised alignment of English embeddings from different algorithms so hard?}, pages = {582--586}, publisher = {Conference on Empirical Methods in Natural Language Processing}, year = {2018} } @inproceedings{DBLP:conf/emnlp/JoulinBMJG18, author = {Armand Joulin and Piotr Bojanowski and Tomas Mikolov and Herv{\'{e}} J{\'{e}}gou and Edouard Grave}, title = {Loss in Translation: Learning Bilingual Word Mapping with a Retrieval Criterion}, pages = {2979--2984}, publisher = {Conference on Empirical Methods in Natural Language Processing}, year = {2018} } @inproceedings{DBLP:conf/acl/SogaardVR18, author = {Anders S{\o}gaard and Sebastian Ruder and Ivan Vulic}, title = {On the Limitations of Unsupervised Bilingual Dictionary Induction}, pages = {778--788}, publisher = {Annual Meeting of the Association for Computational Linguistics}, year = {2018} } @inproceedings{DBLP:conf/naacl/HeymanVVM19, author = {Geert Heyman and Bregt Verreet and Ivan Vulic and Marie-Francine Moens}, title = {Learning Unsupervised Multilingual Word Embeddings with Incremental Multilingual Hubs}, pages = {1890--1902}, publisher = {Annual Conference of the North American Chapter of the Association for Computational Linguistics}, year = {2019} } @inproceedings{2019ADabre, title={A Survey of Multilingual Neural Machine Translation}, author={Dabre, Raj and Chu, Chenhui and Kunchukuttan, Anoop }, publisher={ACM Computing Surveys}, year={2019}, } @inproceedings{DBLP:conf/naacl/ZophK16, author = {Barret Zoph and Kevin Knight}, title = {Multi-Source Neural Translation}, pages = {30--34}, publisher = {Annual Conference of the North American Chapter of the Association for Computational Linguistics}, year = {2016} } @inproceedings{DBLP:conf/emnlp/KimPPKN19, author = {Yunsu Kim and Petre Petrov and Pavel Petrushkov and Shahram Khadivi and Hermann Ney}, title = {Pivot-based Transfer Learning for Neural Machine Translation between Non-English Languages}, pages = {866--876}, publisher = {Annual Meeting of the Association for Computational Linguistics}, year = {2019} } @inproceedings{DBLP:journals/mt/WuW07, author = {Hua Wu and Haifeng Wang}, title = {Pivot language approach for phrase-based statistical machine translation}, publisher = {Machine Translation}, volume = {21}, number = {3}, pages = {165--181}, year = {2007} } @inproceedings{Farsi2010somayeh, author = {Somayeh Bakhshaei and Shahram Khadivi and Noushin Riahi }, title = {Farsi-german statistical machine translation through bridge language}, publisher = {International Telecommunications Symposium}, pages = {165--181}, year = {2010} } @inproceedings{DBLP:conf/acl/ZahabiBK13, author = {Samira Tofighi Zahabi and Somayeh Bakhshaei and Shahram Khadivi}, title = {Using Context Vectors in Improving a Machine Translation System with Bridge Language}, pages = {318--322}, publisher = {Annual Meeting of the Association for Computational Linguistics}, year = {2013} } @inproceedings{DBLP:conf/emnlp/ZhuHWZWZ14, author = {Xiaoning Zhu and Zhongjun He and Hua Wu and Conghui Zhu and Haifeng Wang and Tiejun Zhao}, title = {Improving Pivot-Based Statistical Machine Translation by Pivoting the Co-occurrence Count of Phrase Pairs}, pages = {1665--1675}, publisher = {Conference on Empirical Methods in Natural Language Processing}, year = {2014} } @inproceedings{DBLP:conf/acl/MiuraNSTN15, author = {Akiva Miura and Graham Neubig and Sakriani Sakti and Tomoki Toda and Satoshi Nakamura}, title = {Improving Pivot Translation by Remembering the Pivot}, pages = {573--577}, publisher = {Annual Meeting of the Association for Computational Linguistics}, year = {2015} } @inproceedings{DBLP:conf/acl/CohnL07, author = {Trevor Cohn and Mirella Lapata}, title = {Machine Translation by Triangulation: Making Effective Use of Multi-Parallel Corpora}, publisher = {Annual Meeting of the Association for Computational Linguistics}, year = {2007} } @inproceedings{DBLP:conf/acl/WuW09, author = {Hua Wu and Haifeng Wang}, title = {Revisiting Pivot Language Approach for Machine Translation}, pages = {154--162}, publisher = {Annual Meeting of the Association for Computational Linguistics}, year = {2009} } @inproceedings{DBLP:journals/corr/ChengLYSX16, author = {Yong Cheng and Yang Liu and Qian Yang and Maosong Sun and Wei Xu}, title = {Neural Machine Translation with Pivot Languages}, publisher = {CoRR}, volume = {abs/1611.04928}, year = {2016} } @inproceedings{DBLP:conf/interspeech/KauersVFW02, author = {Manuel Kauers and Stephan Vogel and Christian F{\"{u}}gen and Alex Waibel}, title = {Interlingua based statistical machine translation}, publisher = {International Symposium on Computer Architecture}, year = {2002} } @inproceedings{de2006catalan, title={Catalan-English statistical machine translation without parallel corpus: bridging through Spanish}, author={De Gispert, Adri{\`a} and Marino, Jose B}, publisher={International Conference on Language Resources and Evaluation}, pages={65--68}, year={2006} } @inproceedings{DBLP:conf/naacl/UtiyamaI07, author = {Masao Utiyama and Hitoshi Isahara}, title = {A Comparison of Pivot Methods for Phrase-Based Statistical Machine Translation}, pages = {484--491}, publisher = {Annual Meeting of the Association for Computational Linguistics}, year = {2007} } @inproceedings{DBLP:conf/ijcnlp/Costa-JussaHB11, author = {Marta R. Costa-juss{\`{a}} and Carlos A. Henr{\'{\i}}quez Q. and Rafael E. Banchs}, title = {Enhancing scarce-resource language translation through pivot combinations}, pages = {1361--1365}, publisher = {International Joint Conference on Natural Language Processing}, year = {2011} } @inproceedings{DBLP:journals/corr/HintonVD15, author = {Geoffrey E. Hinton and Oriol Vinyals and Jeffrey Dean}, title = {Distilling the Knowledge in a Neural Network}, publisher = {CoRR}, volume = {abs/1503.02531}, year = {2015} } @inproceedings{gu2018meta, author = {Jiatao Gu and Yong Wang and Yun Chen and Victor O. K. Li and Kyunghyun Cho}, title = {Meta-Learning for Low-Resource Neural Machine Translation}, pages = {3622--3631}, publisher = {Conference on Empirical Methods in Natural Language Processing}, year = {2018} } @inproceedings{DBLP:conf/naacl/GuHDL18, author = {Jiatao Gu and Hany Hassan and Jacob Devlin and Victor O. K. Li}, title = {Universal Neural Machine Translation for Extremely Low Resource Languages}, pages = {344--354}, publisher = {Annual Conference of the North American Chapter of the Association for Computational Linguistics}, year = {2018} } @inproceedings{DBLP:conf/icml/FinnAL17, author = {Chelsea Finn and Pieter Abbeel and Sergey Levine}, title = {Model-Agnostic Meta-Learning for Fast Adaptation of Deep Networks}, series = {Proceedings of Machine Learning Research}, volume = {70}, pages = {1126--1135}, publisher = {International Conference on Machine Learning}, year = {2017} } @inproceedings{DBLP:conf/acl/DongWHYW15, author = {Daxiang Dong and Hua Wu and Wei He and Dianhai Yu and Haifeng Wang}, title = {Multi-Task Learning for Multiple Language Translation}, pages = {1723--1732}, publisher = {Annual Meeting of the Association for Computational Linguistics}, year = {2015} } @inproceedings{DBLP:conf/lrec/RiktersPK18, author = {Matiss Rikters and Marcis Pinnis and Rihards Krislauks}, title = {Training and Adapting Multilingual {NMT} for Less-resourced and Morphologically Rich Languages}, publisher = {European Language Resources Association}, year = {2018} } @inproceedings{DBLP:journals/tkde/PanY10, author = {Sinno Jialin Pan and Qiang Yang}, title = {A Survey on Transfer Learning}, publisher = {IEEE Transactions on knowledge and data engineering}, volume = {22}, number = {10}, pages = {1345--1359}, year = {2010} } @book{2009Handbook, title={Handbook Of Research On Machine Learning Applications and Trends: Algorithms, Methods and Techniques - 2 Volumes}, author={Olivas, Emilio Soria and Guerrero, Jose David Martin and Sober, Marcelino Martinez and Benedito, Jose Rafael Magdalena and Lopez, Antonio Jose Serrano }, publisher={Information Science Reference - Imprint of: IGI Publishing}, year={2009}, } @incollection{DBLP:books/crc/aggarwal14/Pan14, author = {Sinno Jialin Pan}, title = {Transfer Learning}, publisher = {Data Classification: Algorithms and Applications}, pages = {537--570}, publisher = {{CRC} Press}, year = {2014} } @inproceedings{DBLP:conf/iclr/TanRHQZL19, author = {Xu Tan and Yi Ren and Di He and Tao Qin and Zhou Zhao and Tie-Yan Liu}, title = {Multilingual Neural Machine Translation with Knowledge Distillation}, publisher = {International Conference on Learning Representations}, year = {2019} } @inproceedings{platanios2018contextual, author = {Emmanouil Antonios Platanios and Mrinmaya Sachan and Graham Neubig and Tom M. Mitchell}, title = {Contextual Parameter Generation for Universal Neural Machine Translation}, pages = {425--435}, publisher = {Conference on Empirical Methods in Natural Language Processing}, year = {2018} } @inproceedings{ji2020cross, title={Cross-Lingual Pre-Training Based Transfer for Zero-Shot Neural Machine Translation}, author={Ji, Baijun and Zhang, Zhirui and Duan, Xiangyu and Zhang, Min and Chen, Boxing and Luo, Weihua}, publisher={Proceedings of the AAAI Conference on Artificial Intelligence}, volume={34}, number={01}, pages={115--122}, year={2020} } @inproceedings{DBLP:conf/wmt/KocmiB18, author = {Tom Kocmi and Ondrej Bojar}, title = {Trivial Transfer Learning for Low-Resource Neural Machine Translation}, pages = {244--252}, publisher = {Annual Meeting of the Association for Computational Linguistics}, year = {2018} } @inproceedings{DBLP:conf/acl/ZhangWTS20, author = {Biao Zhang and Philip Williams and Ivan Titov and Rico Sennrich}, title = {Improving Massively Multilingual Neural Machine Translation and Zero-Shot Translation}, pages = {1628--1639}, publisher = {Annual Meeting of the Association for Computational Linguistics}, year = {2020} } @inproceedings{DBLP:conf/naacl/PaulYSN09, author = {Michael Paul and Hirofumi Yamamoto and Eiichiro Sumita and Satoshi Nakamura}, title = {On the Importance of Pivot Language Selection for Statistical Machine Translation}, pages = {221--224}, publisher = {Annual Conference of the North American Chapter of the Association for Computational Linguistics}, year = {2009} } @inproceedings{dabre2020survey, title={A survey of multilingual neural machine translation}, author={Dabre, Raj and Chu, Chenhui and Kunchukuttan, Anoop}, publisher={ACM Computing Surveys}, volume={53}, number={5}, pages={1--38}, year={2020} } @inproceedings{DBLP:conf/emnlp/XuYOW18, author = {Ruochen Xu and Yiming Yang and Naoki Otani and Yuexin Wu}, title = {Unsupervised Cross-lingual Transfer of Word Embedding Spaces}, pages = {2465--2474}, publisher = {Conference on Empirical Methods in Natural Language Processing}, year = {2018} } @inproceedings{DBLP:conf/emnlp/ZhangLLS17, author = {Meng Zhang and Yang Liu and Huanbo Luan and Maosong Sun}, title = {Earth Mover's Distance Minimization for Unsupervised Bilingual Lexicon Induction}, pages = {1934--1945}, publisher = {Conference on Empirical Methods in Natural Language Processing}, year = {2017} } @inproceedings{DBLP:conf/emnlp/ArtetxeLA18, author = {Mikel Artetxe and Gorka Labaka and Eneko Agirre}, title = {Unsupervised Statistical Machine Translation}, pages = {3632--3642}, publisher = {Conference on Empirical Methods in Natural Language Processing}, year = {2018} } @inproceedings{DBLP:journals/tacl/LeeCH17, author = {Jason Lee and Kyunghyun Cho and Thomas Hofmann}, title = {Fully Character-Level Neural Machine Translation without Explicit Segmentation}, publisher = {Transactions of the Association for Computational Linguistics}, volume = {5}, pages = {365--378}, year = {2017} } @inproceedings{DBLP:conf/naacl/FiratCB16, author = {Orhan Firat and Kyunghyun Cho and Yoshua Bengio}, title = {Multi-Way, Multilingual Neural Machine Translation with a Shared Attention Mechanism}, pages = {866--875}, publisher = {Annual Conference of the North American Chapter of the Association for Computational Linguistics}, year = {2016} } @inproceedings{DBLP:journals/corr/HaNW16, author = {Thanh-Le Ha and Jan Niehues and Alexander H. Waibel}, title = {Toward Multilingual Neural Machine Translation with Universal Encoder and Decoder}, publisher = {CoRR}, volume = {abs/1611.04798}, year = {2016} } @inproceedings{DBLP:conf/coling/BlackwoodBW18, author = {Graeme W. Blackwood and Miguel Ballesteros and Todd Ward}, title = {Multilingual Neural Machine Translation with Task-Specific Attention}, pages = {3112--3122}, publisher = {International Conference on Computational Linguistics}, year = {2018} } @inproceedings{DBLP:conf/wmt/SachanN18, author = {Devendra Singh Sachan and Graham Neubig}, title = {Parameter Sharing Methods for Multilingual Self-Attentional Translation Models}, pages = {261--271}, publisher = {Annual Meeting of the Association for Computational Linguistics}, year = {2018} } @inproceedings{DBLP:conf/wmt/LuKLBZS18, author = {Yichao Lu and Phillip Keung and Faisal Ladhak and Vikas Bhardwaj and Shaonan Zhang and Jason Sun}, title = {A neural interlingua for multilingual machine translation}, pages = {84--92}, publisher = {Annual Meeting of the Association for Computational Linguistics}, year = {2018} } @inproceedings{DBLP:conf/acl/WangZZZXZ19, author = {Yining Wang and Long Zhou and Jiajun Zhang and Feifei Zhai and Jingfang Xu and Chengqing Zong}, title = {A Compact and Language-Sensitive Multilingual Translation Method}, pages = {1213--1223}, publisher = {Annual Meeting of the Association for Computational Linguistics}, year = {2019} } @inproceedings{DBLP:conf/iclr/WangPAN19, author = {Xinyi Wang and Hieu Pham and Philip Arthur and Graham Neubig}, title = {Multilingual Neural Machine Translation With Soft Decoupled Encoding}, publisher = {International Conference on Learning Representations}, year = {2019} } @inproceedings{DBLP:conf/emnlp/TanCHXQL19, author = {Xu Tan and Jiale Chen and Di He and Yingce Xia and Tao Qin and Tie-Yan Liu}, title = {Multilingual Neural Machine Translation with Language Clustering}, pages = {963--973}, publisher = {Conference on Empirical Methods in Natural Language Processing}, year = {2019} } @inproceedings{DBLP:journals/corr/abs-1903-07091, author = {Naveen Arivazhagan and Ankur Bapna and Orhan Firat and Roee Aharoni and Melvin Johnson and Wolfgang Macherey}, title = {The Missing Ingredient in Zero-Shot Neural Machine Translation}, publisher = {CoRR}, volume = {abs/1903.07091}, year = {2019} } @inproceedings{DBLP:conf/naacl/Al-ShedivatP19, author = {Maruan Al-Shedivat and Ankur P. Parikh}, title = {Consistency by Agreement in Zero-Shot Neural Machine Translation}, pages = {1184--1197}, publisher = {Annual Conference of the North American Chapter of the Association for Computational Linguistics}, year = {2019} } @inproceedings{firat2016zero, author = {Orhan Firat and Baskaran Sankaran and Yaser Al-Onaizan and Fatos T. Yarman-Vural and Kyunghyun Cho}, title = {Zero-Resource Translation with Multi-Lingual Neural Machine Translation}, pages = {268--277}, publisher = {Conference on Empirical Methods in Natural Language Processing}, year = {2016} } @inproceedings{DBLP:journals/corr/abs-1805-10338, author = {Lierni Sestorain and Massimiliano Ciaramita and Christian Buck and Thomas Hofmann}, title = {Zero-Shot Dual Machine Translation}, publisher = {CoRR}, volume = {abs/1805.10338}, year = {2018} } @inproceedings{DBLP:conf/acl/GuWCL19, author = {Jiatao Gu and Yong Wang and Kyunghyun Cho and Victor O. K. Li}, title = {Improved Zero-shot Neural Machine Translation via Ignoring Spurious Correlations}, pages = {1258--1268}, publisher = {Annual Meeting of the Association for Computational Linguistics}, year = {2019} } @inproceedings{DBLP:conf/emnlp/FiratSAYC16, author = {Orhan Firat and Baskaran Sankaran and Yaser Al-Onaizan and Fatos T. Yarman-Vural and Kyunghyun Cho}, title = {Zero-Resource Translation with Multi-Lingual Neural Machine Translation}, pages = {268--277}, publisher = {Conference on Empirical Methods in Natural Language Processing}, year = {2016} } @inproceedings{DBLP:conf/emnlp/CurreyH19, author = {Anna Currey and Kenneth Heafield}, title = {Zero-Resource Neural Machine Translation with Monolingual Pivot Data}, pages = {99--107}, publisher = {Conference on Empirical Methods in Natural Language Processing}, year = {2019} } @inproceedings{DBLP:conf/acl/FadaeeBM17a, author = {Marzieh Fadaee and Arianna Bisazza and Christof Monz}, title = {Data Augmentation for Low-Resource Neural Machine Translation}, pages = {567--573}, publisher = {Annual Meeting of the Association for Computational Linguistics}, year = {2017} } @inproceedings{DBLP:conf/emnlp/WangPDN18, author = {Xinyi Wang and Hieu Pham and Zihang Dai and Graham Neubig}, title = {SwitchOut: an Efficient Data Augmentation Algorithm for Neural Machine Translation}, pages = {856--861}, publisher = {Conference on Empirical Methods in Natural Language Processing}, year = {2018} } @inproceedings{DBLP:conf/emnlp/MartonCR09, author = {Yuval Marton and Chris Callison-Burch and Philip Resnik}, title = {Improved Statistical Machine Translation Using Monolingually-Derived Paraphrases}, pages = {381--390}, publisher = {Annual Meeting of the Association for Computational Linguistics}, year = {2009} } @inproceedings{DBLP:conf/aclnmt/ImamuraFS18, author = {Kenji Imamura and Atsushi Fujita and Eiichiro Sumita}, title = {Enhancement of Encoder and Attention Using Target Monolingual Corpora in Neural Machine Translation}, pages = {55--63}, publisher = {Annual Meeting of the Association for Computational Linguistics}, year = {2018} } @inproceedings{DBLP:conf/icml/VincentLBM08, author = {Pascal Vincent and Hugo Larochelle and Yoshua Bengio and Pierre-Antoine Manzagol}, title = {Extracting and composing robust features with denoising autoencoders}, year = {2008}, publisher = {International Conference on Machine Learning} } @inproceedings{DBLP:conf/iclr/LampleCDR18, author = {Guillaume Lample and Alexis Conneau and Ludovic Denoyer and Marc'Aurelio Ranzato}, title = {Unsupervised Machine Translation Using Monolingual Corpora Only}, publisher = {International Conference on Learning Representations}, year = {2018} } @inproceedings{DBLP:journals/coling/BhagatH13, author = {Rahul Bhagat and Eduard Hovy}, title = {What Is a Paraphrase?}, publisher = {Computational Linguistics}, volume = {39}, number = {3}, pages = {463--472}, year = {2013} } @inproceedings{2010Generating, title={Generating Phrasal and Sentential Paraphrases: A Survey of Data-Driven Methods}, author={ Madnani, Nitin and Dorr, Bonnie}, publisher={Computational Linguistics}, volume={36}, number={3}, pages={341-387}, year={2010}, } @inproceedings{DBLP:conf/wmt/GuoH19, author = {Yinuo Guo and Junfeng Hu}, title = {Meteor++ 2.0: Adopt Syntactic Level Paraphrase Knowledge into Machine Translation Evaluation}, pages = {501--506}, publisher = {Annual Meeting of the Association for Computational Linguistics}, year = {2019} } @inproceedings{DBLP:conf/acl/ZhouSW19, author = {Zhong Zhou and Matthias Sperber and Alexander Waibel}, title = {Paraphrases as Foreign Languages in Multilingual Neural Machine Translation}, pages = {113--122}, publisher = {Annual Meeting of the Association for Computational Linguistics}, year = {2019} } @inproceedings{DBLP:conf/eacl/LapataSM17, author = {Jonathan Mallinson and Rico Sennrich and Mirella Lapata}, title = {Paraphrasing Revisited with Neural Machine Translation}, pages = {881--893}, publisher = {Annual Conference of the European Association for Machine Translation}, year = {2017} } @inproceedings{DBLP:conf/naacl/SmithQT10, author = {Jason Smith and Chris Quirk and Kristina Toutanova}, title = {Extracting Parallel Sentences from Comparable Corpora using Document Level Alignment}, pages = {403--411}, publisher = {Annual Meeting of the Association for Computational Linguistics}, year = {2010} } @inproceedings{DBLP:journals/jair/RuderVS19, author = {Sebastian Ruder and Ivan Vulic and Anders S{\o}gaard}, title = {A Survey of Cross-lingual Word Embedding Models}, publisher = {Journal of Artificial Intelligence Research}, volume = {65}, pages = {569--631}, year = {2019} } @inproceedings{DBLP:conf/acl/TuLLLL16, author = {Zhaopeng Tu and Zhengdong Lu and Yang Liu and Xiaohua Liu and Hang Li}, title = {Modeling Coverage for Neural Machine Translation}, publisher = {Annual Meeting of the Association for Computational Linguistics}, year = {2016} } @inproceedings{DBLP:journals/tacl/TuLLLL17, author = {Zhaopeng Tu and Yang Liu and Zhengdong Lu and Xiaohua Liu and Hang Li}, title = {Context Gates for Neural Machine Translation}, publisher = {Annual Meeting of the Association for Computational Linguistics}, volume = {5}, pages = {87--99}, year = {2017} } @inproceedings{DBLP:conf/wmt/WangCJYCLSWY17, author = {Yuguang Wang and Shanbo Cheng and Liyang Jiang and Jiajun Yang and Wei Chen and Muze Li and Lin Shi and Yanfeng Wang and Hongtao Yang}, title = {Sogou Neural Machine Translation Systems for {WMT17}}, pages = {410--415}, publisher = {Annual Meeting of the Association for Computational Linguistics}, year = {2017} } @inproceedings{ng2019facebook, author = {Nathan Ng and Kyra Yee and Alexei Baevski and Myle Ott and Michael Auli and Sergey Edunov}, title = {Facebook FAIR's {WMT19} News Translation Task Submission}, pages = {314--319}, publisher = {Association for Computational Linguistics}, year = {2019} } @inproceedings{DBLP:conf/wmt/WangLLJZLLXZ18, author = {Qiang Wang and Bei Li and Jiqiang Liu and Bojian Jiang and Zheyang Zhang and Yinqiao Li and Ye Lin and Tong Xiao and Jingbo Zhu}, title = {The NiuTrans Machine Translation System for {WMT18}}, pages = {528--534}, publisher = {Annual Meeting of the Association for Computational Linguistics}, year = {2018} } @inproceedings{DBLP:conf/wmt/LiLXLLLWZXWFCLL19, author = {Bei Li and Yinqiao Li and Chen Xu and Ye Lin and Jiqiang Liu and Hui Liu and Ziyang Wang and Yuhao Zhang and Nuo Xu and Zeyang Wang and Kai Feng and Hexuan Chen and Tengbo Liu and Yanyang Li and Qiang Wang and Tong Xiao and Jingbo Zhu}, title = {The NiuTrans Machine Translation Systems for {WMT19}}, pages = {257--266}, publisher = {Annual Meeting of the Association for Computational Linguistics}, year = {2019} } @inproceedings{DBLP:conf/nips/DaiL15, author = {Andrew Dai and Quoc Le}, title = {Semi-supervised Sequence Learning}, pages = {3079--3087}, publisher = {Annual Conference on Neural Information Processing Systems}, year = {2015} } @inproceedings{DBLP:journals/corr/abs-1802-05365, author = {Matthew Peters and Mark Neumann and Mohit Iyyer and Matt Gardner and Christopher Clark and Kenton Lee and Luke Zettlemoyer}, title = {Deep Contextualized Word Representations}, pages = {2227--2237}, publisher = {Annual Conference of the North American Chapter of the Association for Computational Linguistics}, year = {2018} } @inproceedings{DBLP:conf/icml/CollobertW08, author = {Ronan Collobert and Jason Weston}, title = {A unified architecture for natural language processing: deep neural networks with multitask learning}, volume = {307}, pages = {160--167}, publisher = {International Conference on Machine Learning}, year = {2008} } @inproceedings{DBLP:conf/aclwat/NeishiSTIYT17, author = {Masato Neishi and Jin Sakuma and Satoshi Tohda and Shonosuke Ishiwatari and Naoki Yoshinaga and Masashi Toyoda}, title = {A Bag of Useful Tricks for Practical Neural Machine Translation: Embedding Layer Initialization and Large Batch Size}, pages = {99--109}, publisher = {Asian Federation of Natural Language Processing}, year = {2017} } @inproceedings{2018When, title={When and Why are Pre-trainedWord Embeddings Useful for Neural Machine Translation?}, author={ Qi, Ye and Sachan, Devendra Singh and Felix, Matthieu and Padmanabhan, Sarguna Janani and Neubig, Graham }, publisher = {Annual Conference of the North American Chapter of the Association for Computational Linguistics}, year={2018}, } @inproceedings{DBLP:conf/acl/PetersABP17, author = {Matthew Peters and Waleed Ammar and Chandra Bhagavatula and Russell Power}, title = {Semi-supervised sequence tagging with bidirectional language models}, pages = {1756--1765}, publisher = {Annual Meeting of the Association for Computational Linguistics}, year = {2017} } @inproceedings{DBLP:conf/emnlp/ImamuraS19, author = {Kenji Imamura and Eiichiro Sumita}, title = {Recycling a Pre-trained {BERT} Encoder for Neural Machine Translation}, pages = {23--31}, publisher = {Annual Meeting of the Association for Computational Linguistics}, year = {2019} } @inproceedings{DBLP:conf/naacl/EdunovBA19, author = {Sergey Edunov and Alexei Baevski and Michael Auli}, title = {Pre-trained language model representations for language generation}, pages = {4052--4059}, publisher = {Annual Conference of the North American Chapter of the Association for Computational Linguistics}, year = {2019} } @inproceedings{DBLP:journals/corr/abs-1908-06259, author = {Tianyu He and Xu Tan and Tao Qin}, title = {Hard but Robust, Easy but Sensitive: How Encoder and Decoder Perform in Neural Machine Translation}, publisher = {CoRR}, volume = {abs/1908.06259}, year = {2019} } @inproceedings{DBLP:conf/emnlp/QiYGLDCZ020, author = {Weizhen Qi and Yu Yan and Yeyun Gong and Dayiheng Liu and Nan Duan and Jiusheng Chen and Ruofei Zhang and Ming Zhou}, title = {ProphetNet: Predicting Future N-gram for Sequence-to-Sequence Pre-training}, pages = {2401--2410}, publisher = {Annual Meeting of the Association for Computational Linguistics}, year = {2020} } @incollection{DBLP:books/sp/98/Caruana98, author = {Rich Caruana}, title = {Multitask Learning}, pages = {95--133}, publisher = {Springer}, year = {1998} } @inproceedings{liu2019multi, author = {Xiaodong Liu and Pengcheng He and Weizhu Chen and Jianfeng Gao}, title = {Multi-Task Deep Neural Networks for Natural Language Understanding}, pages = {4487--4496}, publisher = {Annual Meeting of the Association for Computational Linguistics}, year = {2019} } @inproceedings{DBLP:journals/corr/LuongLSVK15, author = {Minh-Thang Luong and Quoc V. Le and Ilya Sutskever and Oriol Vinyals and Lukasz Kaiser}, title = {Multi-task Sequence to Sequence Learning}, publisher = {International Conference on Learning Representations}, year = {2016} } @inproceedings{DBLP:conf/emnlp/ZhangZ16, author = {Jiajun Zhang and Chengqing Zong}, title = {Exploiting Source-side Monolingual Data in Neural Machine Translation}, pages = {1535--1545}, publisher = {Conference on Empirical Methods in Natural Language Processing}, year = {2016} } @inproceedings{DBLP:journals/tacl/JohnsonSLKWCTVW17, author = {Melvin Johnson and Mike Schuster and Quoc V. Le and Maxim Krikun and Yonghui Wu and Zhifeng Chen and Nikhil Thorat and Fernanda B. Vi{\'{e}}gas and Martin Wattenberg and Greg Corrado and Macduff Hughes and Jeffrey Dean}, title = {Google's Multilingual Neural Machine Translation System: Enabling Zero-Shot Translation}, publisher = {Transactions of the Association for Computational Linguistics}, volume = {5}, pages = {339--351}, year = {2017} } @inproceedings{DBLP:journals/csl/GulcehreFXCB17, author = {{\c{C}}aglar G{\"{u}}l{\c{c}}ehre and Orhan Firat and Kelvin Xu and Kyunghyun Cho and Yoshua Bengio}, title = {On integrating a language model into neural machine translation}, publisher = {Computational Linguistics}, volume = {45}, pages = {137--148}, year = {2017} } @inproceedings{DBLP:conf/wmt/StahlbergCS18, author = {Felix Stahlberg and James Cross and Veselin Stoyanov}, title = {Simple Fusion: Return of the Language Model}, pages = {204--211}, publisher = {Annual Meeting of the Association for Computational Linguistics}, year = {2018} } @inproceedings{DBLP:conf/iccv/SunSSG17, author = {Chen Sun and Abhinav Shrivastava and Saurabh Singh and Abhinav Gupta}, title = {Revisiting Unreasonable Effectiveness of Data in Deep Learning Era}, pages = {843--852}, publisher = {IEEE International Conference on Computer Vision}, year = {2017} } @inproceedings{DBLP:conf/acl/DuhNST13, author = {Kevin Duh and Graham Neubig and Katsuhito Sudoh and Hajime Tsukada}, title = {Adaptation Data Selection using Neural Language Models: Experiments in Machine Translation}, pages = {678--683}, publisher = {Annual Meeting of the Association for Computational Linguistics}, year = {2013} } @inproceedings{DBLP:conf/wmt/FosterK07, author = {George F. Foster and Roland Kuhn}, title = {Mixture-Model Adaptation for {SMT}}, pages = {128--135}, publisher = {Annual Meeting of the Association for Computational Linguistics}, year = {2007} } @inproceedings{DBLP:conf/iwslt/BisazzaRF11, author = {Arianna Bisazza and Nick Ruiz and Marcello Federico}, title = {Fill-up versus interpolation methods for phrase-based {SMT} adaptation}, pages = {136--143}, publisher = {International Workshop on Spoken Language Translation}, year = {2011} } @inproceedings{niehues2012detailed, title={Detailed analysis of different strategies for phrase table adaptation in SMT}, author={Niehues, Jan and Waibel, Alex}, publisher={Association for Machine Translation in the Americas}, year={2012} } @inproceedings{DBLP:conf/acl/SennrichSA13, author = {Rico Sennrich and Holger Schwenk and Walid Aransa}, title = {A Multi-Domain Translation Model Framework for Statistical Machine Translation}, pages = {832--840}, publisher = {Annual Meeting of the Association for Computational Linguistics}, year = {2013} } @inproceedings{imamura2016multi, title={Multi-domain adaptation for statistical machine translation based on feature augmentation}, author={Imamura, Kenji and Sumita, Eiichiro}, publisher={Association for Machine Translation in the Americas}, pages={79}, year={2016} } @inproceedings{DBLP:conf/emnlp/MatsoukasRZ09, author = {Spyros Matsoukas and Antti-Veikko I. Rosti and Bing Zhang}, title = {Discriminative Corpus Weight Estimation for Machine Translation}, pages = {708--717}, publisher = {Conference on Empirical Methods in Natural Language Processing}, year = {2009} } @inproceedings{DBLP:conf/emnlp/FosterGK10, author = {George F. Foster and Cyril Goutte and Roland Kuhn}, title = {Discriminative Instance Weighting for Domain Adaptation in Statistical Machine Translation}, pages = {451--459}, publisher = {Conference on Empirical Methods in Natural Language Processing}, year = {2010} } @inproceedings{shah2012general, title={A general framework to weight heterogeneous parallel data for model adaptation in statistical machine translation}, author={Shah, Kashif and Barrault, Lo{\i}c and Schwenk, Holger and Le Mans, France}, publisher={Machine Translation Summit}, year={2012} } @inproceedings{DBLP:conf/iwslt/MansourN12, author = {Saab Mansour and Hermann Ney}, title = {A simple and effective weighted phrase extraction for machine translation adaptation}, pages = {193--200}, publisher = {International Symposium on Computer Architecture}, year = {2012} } @inproceedings{DBLP:conf/cncl/ZhouCZ15, author = {Xinpeng Zhou and Hailong Cao and Tiejun Zhao}, title = {Domain Adaptation for {SMT} Using Sentence Weight}, volume = {9427}, pages = {153--163}, publisher = {Springer}, year = {2015} } @inproceedings{DBLP:conf/lrec/EckVW04, author = {Matthias Eck and Stephan Vogel and Alex Waibel}, title = {Language Model Adaptation for Statistical Machine Translation Based on Information Retrieval}, publisher = {International Conference on Language Resources and Evaluation}, year = {2004} } @inproceedings{DBLP:conf/coling/ZhaoEV04, author = {Bing Zhao and Matthias Eck and Stephan Vogel}, title = {Language Model Adaptation for Statistical Machine Translation via Structured Query Models}, publisher = {International Conference on Computational Linguistics}, year = {2004} } @inproceedings{DBLP:conf/acl/MooreL10, author = {Robert C. Moore and William D. Lewis}, title = {Intelligent Selection of Language Model Training Data}, pages = {220--224}, publisher = {Annual Meeting of the Association for Computational Linguistics}, year = {2010} } @inproceedings{DBLP:conf/coling/HoangS14, author = {Cuong Hoang and Khalil Sima'an}, title = {Latent Domain Translation Models in Mix-of-Domains Haystack}, pages = {1928--1939}, publisher = {International Conference on Computational Linguistics}, year = {2014} } @inproceedings{joty2015using, title={Using joint models for domain adaptation in statistical machine translation}, author={Joty, Nadir Durrani Hassan Sajjad Shafiq and Vogel, Ahmed Abdelali Stephan}, publisher={Proceedings of MT Summit XV}, pages={117}, year={2015} } @inproceedings{chen2016bilingual, title={Bilingual methods for adaptive training data selection for machine translation}, author={Chen, Boxing and Kuhn, Roland and Foster, George and Cherry, Colin and Huang, Fei}, publisher={Association for Machine Translation in the Americas}, pages={93--103}, year={2016} } @inproceedings{DBLP:conf/iwslt/Ueffing06, author = {Nicola Ueffing}, title = {Using monolingual source-language data to improve {MT} performance}, pages = {174--181}, publisher = {International Symposium on Computer Architecture}, year = {2006} } @inproceedings{DBLP:conf/coling/WuWZ08, author = {Hua Wu and Haifeng Wang and Chengqing Zong}, title = {Domain Adaptation for Statistical Machine Translation with Domain Dictionary and Monolingual Corpora}, publisher = {International Conference on Computational Linguistics}, pages = {993--1000}, year = {2008} } @inproceedings{DBLP:conf/iwslt/Schwenk08, author = {Holger Schwenk}, title = {Investigations on large-scale lightly-supervised training for statistical machine translation}, pages = {182--189}, publisher = {International Symposium on Computer Architecture}, year = {2008} } @inproceedings{DBLP:conf/wmt/BertoldiF09, author = {Nicola Bertoldi and Marcello Federico}, title = {Domain Adaptation for Statistical Machine Translation with Monolingual Resources}, pages = {182--189}, publisher = {Annual Meeting of the Association for Computational Linguistics}, year = {2009} } @inproceedings{DBLP:conf/wmt/LambertSSA11, author = {Patrik Lambert and Holger Schwenk and Christophe Servan and Sadaf Abdul-Rauf}, title = {Investigations on Translation Model Adaptation Using Monolingual Data}, pages = {284--293}, publisher = {Annual Meeting of the Association for Computational Linguistics}, year = {2011} } @inproceedings{DBLP:conf/eacl/Sennrich12, author = {Rico Sennrich}, title = {Perplexity Minimization for Translation Model Domain Adaptation in Statistical Machine Translation}, pages = {539--549}, publisher = {Annual Meeting of the Association for Computational Linguistics}, year = {2012} } @inproceedings{DBLP:conf/wmt/ShahBS10, author = {Kashif Shah and Lo{\"{\i}}c Barrault and Holger Schwenk}, title = {Translation Model Adaptation by Resampling}, pages = {392--399}, publisher = {Annual Meeting of the Association for Computational Linguistics}, year = {2010} } @inproceedings{rousseau2011lium, title={LIUM's systems for the IWSLT 2011 Speech Translation Tasks}, author={Rousseau, Anthony and Bougares, Fethi and Del{\'e}glise, Paul and Schwenk, Holger and Est{\`e}ve, Yannick}, publisher={International Workshop on Spoken Language Translation}, year={2011} } @inproceedings{moore2010intelligent, title = {Intelligent selection of language model training data}, author = {Moore, Robert C and Lewis, Will}, publisher = {Annual Meeting of the Association for Computational Linguistics}, year = {2010} } @inproceedings{DBLP:conf/acl/UtiyamaI03, author = {Masao Utiyama and Hitoshi Isahara}, title = {Reliable Measures for Aligning Japanese-English News Articles and Sentences}, pages = {72--79}, publisher = {Annual Meeting of the Association for Computational Linguistics}, year = {2003} } @inproceedings{DBLP:conf/acl/MarieF17, author = {Benjamin Marie and Atsushi Fujita}, title = {Efficient Extraction of Pseudo-Parallel Sentences from Raw Monolingual Data Using Word Embeddings}, pages = {392--398}, publisher = {Annual Meeting of the Association for Computational Linguistics}, year = {2017} } @inproceedings{DBLP:conf/emnlp/WangZLUS14, author = {Rui Wang and Hai Zhao and Bao-Liang Lu and Masao Utiyama and Eiichiro Sumita}, title = {Neural Network Based Bilingual Language Model Growing for Statistical Machine Translation}, pages = {189--195}, publisher = {Annual Meeting of the Association for Computational Linguistics}, year = {2014} } @inproceedings{DBLP:conf/coling/WangZLUS16, author = {Rui Wang and Hai Zhao and Bao-Liang Lu and Masao Utiyama and Eiichiro Sumita}, title = {Connecting Phrase based Statistical Machine Translation Adaptation}, pages = {3135--3145}, publisher = {International Conference on Computational Linguistics}, year = {2016} } @inproceedings{chu2015integrated, title={Integrated parallel data extraction from comparable corpora for statistical machine translation}, author={Chu, Chenhui}, year={2015}, publisher={Kyoto University} } @inproceedings{DBLP:journals/tit/Scudder65a, author = {H. J. Scudder III}, title = {Probability of error of some adaptive pattern-recognition machines}, publisher = {{IEEE} Transactions on Information Theory}, volume = {11}, number = {3}, pages = {363--371}, year = {1965} } @inproceedings{DBLP:conf/coling/ChuW18, author = {Chenhui Chu and Rui Wang}, title = {A Survey of Domain Adaptation for Neural Machine Translation}, pages = {1304--1319}, publisher = {International Conference on Computational Linguistics}, year = {2018} } @inproceedings{DBLP:journals/corr/abs-1708-08712, author = {Hassan Sajjad and Nadir Durrani and Fahim Dalvi and Yonatan Belinkov and Stephan Vogel}, title = {Neural Machine Translation Training in a Multi-Domain Scenario}, publisher = {CoRR}, volume = {abs/1708.08712}, year = {2017} } @inproceedings{DBLP:conf/acl/WangTNYCP20, author = {Wei Wang and Ye Tian and Jiquan Ngiam and Yinfei Yang and Isaac Caswell and Zarana Parekh}, title = {Learning a Multi-Domain Curriculum for Neural Machine Translation}, pages = {7711--7723}, publisher = {Annual Meeting of the Association for Computational Linguistics}, year = {2020} } @inproceedings{DBLP:conf/acl/JiangLWZ20, author = {Haoming Jiang and Chen Liang and Chong Wang and Tuo Zhao}, title = {Multi-Domain Neural Machine Translation with Word-Level Adaptive Layer-wise Domain Mixing}, pages = {1823--1834}, publisher = {Annual Meeting of the Association for Computational Linguistics}, year = {2020} } @inproceedings{DBLP:conf/emnlp/AxelrodHG11, author = {Amittai Axelrod and Xiaodong He and Jianfeng Gao}, title = {Domain Adaptation via Pseudo In-Domain Data Selection}, pages = {355--362}, publisher = {Conference on Empirical Methods in Natural Language Processing}, year = {2011} } @inproceedings{DBLP:conf/icdm/Remus12, author = {Robert Remus}, title = {Domain Adaptation Using Domain Similarity- and Domain Complexity-Based Instance Selection for Cross-Domain Sentiment Analysis}, pages = {717--723}, publisher = {International Conference on Data Mining Workshops}, year = {2012} } @inproceedings{DBLP:conf/acl/WangFUS17, author = {Rui Wang and Andrew M. Finch and Masao Utiyama and Eiichiro Sumita}, title = {Sentence Embedding for Neural Machine Translation Domain Adaptation}, pages = {560--566}, publisher = {Annual Meeting of the Association for Computational Linguistics}, year = {2017} } @inproceedings{DBLP:conf/acl/HuXNC19, author = {Junjie Hu and Mengzhou Xia and Graham Neubig and Jaime G. Carbonell}, title = {Domain Adaptation of Neural Machine Translation by Lexicon Induction}, pages = {2989--3001}, publisher = {Annual Meeting of the Association for Computational Linguistics}, year = {2019} } @inproceedings{britz2017effective, title={Effective domain mixing for neural machine translation}, author={Britz, Denny and Le, Quoc and Pryzant, Reid}, publisher={Proceedings of the Second Conference on Machine Translation}, pages={118--126}, year={2017} } @inproceedings{DBLP:conf/ranlp/KobusCS17, author = {Catherine Kobus and Josep Maria Crego and Jean Senellart}, title = {Domain Control for Neural Machine Translation}, pages = {372--378}, publisher = {International Conference Recent Advances in Natural Language Processing}, year = {2017} } @inproceedings{DBLP:conf/emnlp/WangULCS17, author = {Rui Wang and Masao Utiyama and Lemao Liu and Kehai Chen and Eiichiro Sumita}, title = {Instance Weighting for Neural Machine Translation Domain Adaptation}, pages = {1482--1488}, publisher = {Conference on Empirical Methods in Natural Language Processing}, year = {2017} } @inproceedings{DBLP:conf/aclnmt/ChenCFL17, author = {Boxing Chen and Colin Cherry and George F. Foster and Samuel Larkin}, title = {Cost Weighting for Neural Machine Translation Domain Adaptation}, pages = {40--46}, publisher = {Annual Meeting of the Association for Computational Linguistics}, year = {2017} } @inproceedings{DBLP:journals/corr/abs-1906-03129, author = {Shen Yan and Leonard Dahlmann and Pavel Petrushkov and Sanjika Hewavitharana and Shahram Khadivi}, title = {Word-based Domain Adaptation for Neural Machine Translation}, publisher = {CoRR}, volume = {abs/1906.03129}, year = {2019} } @inproceedings{dakwale2017finetuning, title={Finetuning for neural machine translation with limited degradation across in-and out-of-domain data}, author={Dakwale, Praveen and Monz, Christof}, publisher={Proceedings of the XVI Machine Translation Summit}, volume={117}, year={2017} } @inproceedings{DBLP:conf/emnlp/ZengLSGLYL19, author = {Jiali Zeng and Yang Liu and Jinsong Su and Yubin Ge and Yaojie Lu and Yongjing Yin and Jiebo Luo}, title = {Iterative Dual Domain Adaptation for Neural Machine Translation}, pages = {845--855}, publisher = {Conference on Empirical Methods in Natural Language Processing}, year = {2019} } @inproceedings{barone2017regularization, author = {Antonio Valerio Miceli Barone and Barry Haddow and Ulrich Germann and Rico Sennrich}, title = {Regularization techniques for fine-tuning in neural machine translation}, pages = {1489--1494}, publisher = {Conference on Empirical Methods in Natural Language Processing}, year = {2017} } @inproceedings{DBLP:conf/acl/SaundersB20, author = {Danielle Saunders and Bill Byrne}, title = {Reducing Gender Bias in Neural Machine Translation as a Domain Adaptation Problem}, pages = {7724--7736}, publisher = {Annual Meeting of the Association for Computational Linguistics}, year = {2020} } @inproceedings{khayrallah2017neural, title={Neural lattice search for domain adaptation in machine translation}, author={Khayrallah, Huda and Kumar, Gaurav and Duh, Kevin and Post, Matt and Koehn, Philipp}, publisher={International Joint Conference on Natural Language Processing}, pages={20--25}, year={2017} } @inproceedings{DBLP:conf/emnlp/DouWHN19, author = {Zi-Yi Dou and Xinyi Wang and Junjie Hu and Graham Neubig}, title = {Domain Differential Adaptation for Neural Machine Translation}, pages = {59--69}, publisher = {Association for Computational Linguistics}, year = {2019} } @inproceedings{DBLP:journals/corr/FreitagA16, author = {Markus Freitag and Yaser Al-Onaizan}, title = {Fast Domain Adaptation for Neural Machine Translation}, publisher = {CoRR}, volume = {abs/1612.06897}, year = {2016} } @inproceedings{DBLP:conf/acl/SaundersSGB19, author = {Danielle Saunders and Felix Stahlberg and Adri{\`{a}} de Gispert and Bill Byrne}, title = {Domain Adaptive Inference for Neural Machine Translation}, pages = {222--228}, publisher = {Annual Meeting of the Association for Computational Linguistics}, year = {2019} } @inproceedings{DBLP:conf/wmt/BritzLP17, author = {Denny Britz and Quoc V. Le and Reid Pryzant}, title = {Effective Domain Mixing for Neural Machine Translation}, pages = {118--126}, publisher = {Annual Meeting of the Association for Computational Linguistics}, year = {2017} } @inproceedings{DBLP:journals/ibmrd/Luhn58, author = {Hans Peter Luhn}, title = {The Automatic Creation of Literature Abstracts}, publisher = {IBM Journal of research and development}, volume = {2}, number = {2}, pages = {159--165}, year = {1958} } @inproceedings{DBLP:conf/emnlp/DomhanH17, author = {Tobias Domhan and Felix Hieber}, title = {Using Target-side Monolingual Data for Neural Machine Translation through Multi-task Learning}, pages = {1500--1505}, publisher = {Conference on Empirical Methods in Natural Language Processing}, year = {2017} } @inproceedings{DBLP:conf/naacl/SimianerWD19, author = {Patrick Simianer and Joern Wuebker and John DeNero}, title = {Measuring Immediate Adaptation Performance for Neural Machine Translation}, pages = {2038--2046}, publisher = {Annual Conference of the North American Chapter of the Association for Computational Linguistics}, year = {2019} } @inproceedings{DBLP:conf/emnlp/WeesBM17, author = {Marlies van der Wees and Arianna Bisazza and Christof Monz}, title = {Dynamic Data Selection for Neural Machine Translation}, pages = {1400--1410}, publisher = {Conference on Empirical Methods in Natural Language Processing}, year = {2017} } @inproceedings{DBLP:conf/naacl/ZhangSKMCD19, author = {Xuan Zhang and Pamela Shapiro and Gaurav Kumar and Paul McNamee and Marine Carpuat and Kevin Duh}, title = {Curriculum Learning for Domain Adaptation in Neural Machine Translation}, pages = {1903--1915}, publisher = {Annual Conference of the North American Chapter of the Association for Computational Linguistics}, year = {2019} } @inproceedings{DBLP:conf/acl/ChuDK17, author = {Chenhui Chu and Raj Dabre and Sadao Kurohashi}, title = {An Empirical Comparison of Domain Adaptation Methods for Neural Machine Translation}, pages = {385--391}, publisher = {Annual Meeting of the Association for Computational Linguistics}, year = {2017} } @inproceedings{DBLP:conf/naacl/BapnaF19, author = {Ankur Bapna and Orhan Firat}, title = {Non-Parametric Adaptation for Neural Machine Translation}, pages = {1921--1931}, publisher = {Annual Conference of the North American Chapter of the Association for Computational Linguistics}, year = {2019} } @inproceedings{DBLP:journals/corr/abs-2010-11125, author = {Angela Fan and Shruti Bhosale and Holger Schwenk and Zhiyi Ma and Ahmed El-Kishky and Siddharth Goyal and Mandeep Baines and Onur Celebi and Guillaume Wenzek and Vishrav Chaudhary and Naman Goyal and Tom Birch and Vitaliy Liptchinsky and Sergey Edunov and Edouard Grave and Michael Auli and Armand Joulin}, title = {Beyond English-Centric Multilingual Machine Translation}, publisher = {CoRR}, volume = {abs/2010.11125}, year = {2020} } @inproceedings{DBLP:conf/emnlp/LinPWQFZL20, author = {Zehui Lin and Xiao Pan and Mingxuan Wang and Xipeng Qiu and Jiangtao Feng and Hao Zhou and Lei Li}, title = {Pre-training Multilingual Neural Machine Translation by Leveraging Alignment Information}, pages = {2649--2663}, publisher = {Conference on Empirical Methods in Natural Language Processing}, year = {2020} } @inproceedings{DBLP:conf/emnlp/ZhuH07, author = {Jingbo Zhu and Eduard H. Hovy}, title = {Active Learning for Word Sense Disambiguation with Methods for Addressing the Class Imbalance Problem}, pages = {783--790}, publisher = {Conference on Empirical Methods in Natural Language Processing}, year = {2007} } @inproceedings{DBLP:conf/eacl/NegriTFBF17, author = {Mohammad Amin Farajian and Marco Turchi and Matteo Negri and Nicola Bertoldi and Marcello Federico}, title = {Neural vs. Phrase-Based Machine Translation in a Multi-Domain Scenario}, pages = {280--284}, publisher = {Annual Conference of the European Association for Machine Translation}, year = {2017} } @inproceedings{DBLP:conf/aaai/Zhang0LZC18, author = {Zhirui Zhang and Shujie Liu and Mu Li and Ming Zhou and Enhong Chen}, title = {Joint Training for Neural Machine Translation Models with Monolingual Data}, pages = {555--562}, publisher = {AAAI Conference on Artificial Intelligence}, year = {2018} } @inproceedings{DBLP:conf/wmt/SunJXHWW19, author = {Meng Sun and Bojian Jiang and Hao Xiong and Zhongjun He and Hua Wu and Haifeng Wang}, title = {Baidu Neural Machine Translation Systems for {WMT19}}, pages = {374--381}, publisher = {Annual Meeting of the Association for Computational Linguistics}, year = {2019} } @inproceedings{DBLP:conf/acl/SuHC19, author = {Shang-Yu Su and Chao-Wei Huang and Yun-Nung Chen}, title = {Dual Supervised Learning for Natural Language Understanding and Generation}, pages = {5472--5477}, publisher = {Annual Meeting of the Association for Computational Linguistics}, year = {2019} } @inproceedings{DBLP:journals/ejasmp/RadzikowskiNWY19, author = {Kacper Radzikowski and Robert Nowak and Le Wang and Osamu Yoshie}, title = {Dual supervised learning for non-native speech recognition}, publisher = {EURASIP Journal on Audio, Speech, and Music Processing}, volume = {2019}, pages = {3}, year = {2019} } @incollection{qin2020dual, title={Dual Learning for Machine Translation and Beyond}, author={Qin, Tao}, pages={49--72}, year={2020}, publisher={Springer} } @inproceedings{DBLP:conf/iccv/YiZTG17, author = {Zili Yi and Hao (Richard) Zhang and Ping Tan and Minglun Gong}, title = {DualGAN: Unsupervised Dual Learning for Image-to-Image Translation}, pages = {2868--2876}, publisher = {{IEEE} Computer Society}, year = {2017} } @inproceedings{DBLP:journals/access/DuRZH20, author = {Liang Du and Xin Ren and Peng Zhou and Zhiguo Hu}, title = {Unsupervised Dual Learning for Feature and Instance Selection}, publisher = {{IEEE} Access}, volume = {8}, pages = {170248--170260}, year = {2020} } @inproceedings{DBLP:conf/acl/MarieRF20, author = {Benjamin Marie and Raphael Rubino and Atsushi Fujita}, title = {Tagged Back-translation Revisited: Why Does It Really Work?}, pages = {5990--5997}, publisher = {Annual Meeting of the Association for Computational Linguistics}, year = {2020} } @inproceedings{DBLP:conf/nips/YangDYCSL19, author = {Zhilin Yang and Zihang Dai and Yiming Yang and Jaime G. Carbonell and Ruslan Salakhutdinov and Quoc V. Le}, title = {XLNet: Generalized Autoregressive Pretraining for Language Understanding}, publisher = {Annual Conference on Neural Information Processing Systems}, pages = {5754--5764}, year = {2019} } @inproceedings{DBLP:conf/iclr/LanCGGSS20, author = {Zhenzhong Lan and Mingda Chen and Sebastian Goodman and Kevin Gimpel and Piyush Sharma and Radu Soricut}, title = {{ALBERT:} {A} Lite {BERT} for Self-supervised Learning of Language Representations}, publisher = {International Conference on Learning Representations}, year = {2020} } @inproceedings{DBLP:conf/acl/ZhangHLJSL19, author = {Zhengyan Zhang and Xu Han and Zhiyuan Liu and Xin Jiang and Maosong Sun and Qun Liu}, title = {{ERNIE:} Enhanced Language Representation with Informative Entities}, pages = {1441--1451}, publisher = {Annual Meeting of the Association for Computational Linguistics}, year = {2019} } @inproceedings{DBLP:conf/emnlp/HuangLDGSJZ19, author = {Haoyang Huang and Yaobo Liang and Nan Duan and Ming Gong and Linjun Shou and Daxin Jiang and Ming Zhou}, title = {Unicoder: {A} Universal Language Encoder by Pre-training with Multiple Cross-lingual Tasks}, pages = {2485--2494}, publisher = {Conference on Empirical Methods in Natural Language Processing}, year = {2019} } @inproceedings{DBLP:conf/iccv/SunMV0S19, author = {Chen Sun and Austin Myers and Carl Vondrick and Kevin Murphy and Cordelia Schmid}, title = {VideoBERT: {A} Joint Model for Video and Language Representation Learning}, pages = {7463--7472}, publisher = {International Conference on Computer Vision}, year = {2019} } @inproceedings{DBLP:journals/corr/abs-2010-12831, author = {Liunian Harold Li and Haoxuan You and Zhecan Wang and Alireza Zareian and Shih-Fu Chang and Kai-Wei Chang}, title = {Weakly-supervised VisualBERT: Pre-training without Parallel Images and Captions}, publisher = {CoRR}, volume = {abs/2010.12831}, year = {2020} } @inproceedings{DBLP:conf/nips/LuBPL19, author = {Jiasen Lu and Dhruv Batra and Devi Parikh and Stefan Lee}, title = {ViLBERT: Pretraining Task-Agnostic Visiolinguistic Representations for Vision-and-Language Tasks}, publisher = {Annual Annual Conference on Neural Information Processing Systems}, pages = {13--23}, year = {2019} } @inproceedings{DBLP:conf/interspeech/ChuangLLL20, author = {Yung-Sung Chuang and Chi-Liang Liu and Hung-yi Lee and Lin-Shan Lee}, title = {SpeechBERT: An Audio-and-Text Jointly Learned Language Model for End-to-End Spoken Question Answering}, pages = {4168--4172}, publisher = {Annual Conference of the International Speech Communication Association}, year = {2020} } @inproceedings{DBLP:conf/rep4nlp/PetersRS19, author = {Matthew Peters and Sebastian Ruder and Noah A. Smith}, title = {To Tune or Not to Tune? Adapting Pretrained Representations to Diverse Tasks}, pages = {7--14}, publisher = {Annual Meeting of the Association for Computational Linguistics}, year = {2019} } @inproceedings{DBLP:conf/cncl/SunQXH19, author = {Chi Sun and Xipeng Qiu and Yige Xu and Xuanjing Huang}, title = {How to Fine-Tune {BERT} for Text Classification?}, volume = {11856}, pages = {194--206}, publisher = {Chinese Computational Linguistics}, year = {2019} } @inproceedings{shen2020q, title={Q-BERT: Hessian Based Ultra Low Precision Quantization of BERT.}, author={Shen, Sheng and Dong, Zhen and Ye, Jiayu and Ma, Linjian and Yao, Zhewei and Gholami, Amir and Mahoney, Michael W and Keutzer, Kurt}, publisher={AAAI Conference on Artificial Intelligence}, pages={8815--8821}, year={2020} } @inproceedings{DBLP:journals/corr/abs-1910-01108, author = {Victor Sanh and Lysandre Debut and Julien Chaumond and Thomas Wolf}, title = {DistilBERT, a distilled version of {BERT:} smaller, faster, cheaper and lighter}, publisher = {CoRR}, volume = {abs/1910.01108}, year = {2019} } @inproceedings{DBLP:conf/icml/XiaTTQYL18, author = {Yingce Xia and Xu Tan and Fei Tian and Tao Qin and Nenghai Yu and Tie-Yan Liu}, title = {Model-Level Dual Learning}, series = {Proceedings of Machine Learning Research}, volume = {80}, pages = {5379--5388}, publisher = {International Conference on Machine Learning}, year = {2018} } @inproceedings{DBLP:conf/nips/HeXQWYLM16, author = {Di He and Yingce Xia and Tao Qin and Liwei Wang and Nenghai Yu and Tie-Yan Liu and Wei-Ying Ma}, title = {Dual Learning for Machine Translation}, publisher = {Annual Conference on Neural Information Processing Systems}, pages = {820--828}, year = {2016} } @article{zhao2020dual, title={Dual Learning: Theoretical Study and an Algorithmic Extension}, author={Zhao, Zhibing and Xia, Yingce and Qin, Tao and Xia, Lirong and Liu, Tie-Yan}, journal={arXiv preprint arXiv:2005.08238}, year={2020} } %%%%% chapter 16------------------------------------------------------ %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% %%%%% chapter 17------------------------------------------------------ @article{DBLP:journals/ac/Bar-Hillel60, author = {Yehoshua Bar-Hillel}, title = {The Present Status of Automatic Translation of Languages}, journal = {Advances in computers}, volume = {1}, pages = {91--163}, year = {1960} } @article{DBLP:journals/corr/abs-1901-09115, author = {Andrei Popescu-Belis}, title = {Context in Neural Machine Translation: {A} Review of Models and Evaluations}, journal = {CoRR}, volume = {abs/1901.09115}, year = {2019} } @book{jurafsky2000speech, title={Speech \& language processing}, author={Jurafsky, Dan}, year={2000}, publisher={Pearson Education India} } @inproceedings{DBLP:conf/anlp/MarcuCW00, author = {Daniel Marcu and Lynn Carlson and Maki Watanabe}, title = {The Automatic Translation of Discourse Structures}, pages = {9--17}, publisher = {Applied Natural Language Processing Conference}, year = {2000} } @inproceedings{foster2010translating, title={Translating structured documents}, author={Foster, George and Isabelle, Pierre and Kuhn, Roland}, booktitle={Proceedings of AMTA}, year={2010} } @inproceedings{DBLP:conf/eacl/LouisW14, author = {Annie Louis and Bonnie L. Webber}, title = {Structured and Unstructured Cache Models for {SMT} Domain Adaptation}, pages = {155--163}, publisher = {Annual Conference of the European Association for Machine Translation}, year = {2014} } @inproceedings{DBLP:conf/iwslt/HardmeierF10, author = {Christian Hardmeier and Marcello Federico}, title = {Modelling pronominal anaphora in statistical machine translation}, pages = {283--289}, publisher = {International Workshop on Spoken Language Translation}, year = {2010} } @inproceedings{DBLP:conf/wmt/NagardK10, author = {Ronan Le Nagard and Philipp Koehn}, title = {Aiding Pronoun Translation with Co-Reference Resolution}, pages = {252--261}, publisher = {Annual Meeting of the Association for Computational Linguistics}, year = {2010} } @inproceedings{DBLP:conf/eamt/LuongP16, author = {Ngoc-Quang Luong and Andrei Popescu-Belis}, title = {A Contextual Language Model to Improve Machine Translation of Pronouns by Re-ranking Translation Hypotheses}, pages = {292--304}, publisher = {European Association for Machine Translation}, year = {2016} } @inproceedings{tiedemann2010context, title={Context adaptation in statistical machine translation using models with exponentially decaying cache}, author={Tiedemann, J{\"o}rg}, publisher={Domain Adaptation for Natural Language Processing}, pages={8--15}, year={2010} } @inproceedings{DBLP:conf/emnlp/GongZZ11, author = {Zhengxian Gong and Min Zhang and Guodong Zhou}, title = {Cache-based Document-level Statistical Machine Translation}, pages = {909--919}, publisher = {Conference on Empirical Methods in Natural Language Processing}, year = {2011} } @inproceedings{DBLP:conf/ijcai/XiongBZLL13, author = {Deyi Xiong and Guosheng Ben and Min Zhang and Yajuan Lv and Qun Liu}, title = {Modeling Lexical Cohesion for Document-Level Machine Translation}, pages = {2183--2189}, publisher = { International Joint Conference on Artificial Intelligence}, year = {2013} } @inproceedings{xiao2011document, title={Document-level consistency verification in machine translation}, author={Xiao, Tong and Zhu, Jingbo and Yao, Shujie and Zhang, Hao}, booktitle={Machine Translation Summit}, volume={13}, pages={131--138}, year={2011} } @inproceedings{DBLP:conf/sigdial/MeyerPZC11, author = {Thomas Meyer and Andrei Popescu-Belis and Sandrine Zufferey and Bruno Cartoni}, title = {Multilingual Annotation and Disambiguation of Discourse Connectives for Machine Translation}, pages = {194--203}, publisher = {Annual Meeting of the Association for Computational Linguistics}, year = {2011} } @inproceedings{DBLP:conf/hytra/MeyerP12, author = {Thomas Meyer and Andrei Popescu-Belis}, title = {Using Sense-labeled Discourse Connectives for Statistical Machine Translation}, pages = {129--138}, publisher = {Annual Meeting of the Association for Computational Linguistics}, year = {2012} } @inproceedings{DBLP:conf/nips/SutskeverVL14, author = {Ilya Sutskever and Oriol Vinyals and Quoc V. Le}, title = {Sequence to Sequence Learning with Neural Networks}, pages = {3104--3112}, year = {2014}, publisher = {Annual Conference on Neural Information Processing Systems} } @inproceedings{DBLP:conf/emnlp/LaubliS018, author = {Samuel L{\"{a}}ubli and Rico Sennrich and Martin Volk}, title = {Has Machine Translation Achieved Human Parity? {A} Case for Document-level Evaluation}, pages = {4791--4796}, publisher = {Conference on Empirical Methods in Natural Language Processing}, year = {2018} } @article{DBLP:journals/corr/abs-1912-08494, author = {Sameen Maruf and Fahimeh Saleh and Gholamreza Haffari}, title = {A Survey on Document-level Machine Translation: Methods and Evaluation}, journal = {CoRR}, volume = {abs/1912.08494}, year = {2019} } @inproceedings{DBLP:conf/discomt/TiedemannS17, author = {J{\"{o}}rg Tiedemann and Yves Scherrer}, title = {Neural Machine Translation with Extended Context}, pages = {82--92}, publisher = {Association for Computational Linguistics}, year = {2017} } @article{DBLP:journals/corr/abs-1910-07481, author = {Valentin Mac{\'{e}} and Christophe Servan}, title = {Using Whole Document Context in Neural Machine Translation}, journal = {CoRR}, volume = {abs/1910.07481}, year = {2019} } @article{DBLP:journals/corr/JeanLFC17, author = {S{\'{e}}bastien Jean and Stanislas Lauly and Orhan Firat and Kyunghyun Cho}, title = {Does Neural Machine Translation Benefit from Larger Context?}, journal = {CoRR}, volume = {abs/1704.05135}, year = {2017} } @inproceedings{DBLP:conf/acl/TitovSSV18, author = {Elena Voita and Pavel Serdyukov and Rico Sennrich and Ivan Titov}, title = {Context-Aware Neural Machine Translation Learns Anaphora Resolution}, pages = {1264--1274}, publisher = {Annual Meeting of the Association for Computational Linguistics}, year = {2018} } @inproceedings{DBLP:conf/acl/HaffariM18, author = {Sameen Maruf and Gholamreza Haffari}, title = {Document Context Neural Machine Translation with Memory Networks}, pages = {1275--1284}, publisher = {Annual Meeting of the Association for Computational Linguistics}, year = {2018} } @inproceedings{DBLP:conf/coling/KuangXLZ18, author = {Shaohui Kuang and Deyi Xiong and Weihua Luo and Guodong Zhou}, title = {Modeling Coherence for Neural Machine Translation with Dynamic and Topic Caches}, pages = {596--606}, publisher = {International Conference on Computational Linguistics}, year = {2018} } @inproceedings{DBLP:conf/discomt/GarciaCE19, author = {Eva Mart{\'{\i}}nez Garcia and Carles Creus and Cristina Espa{\~{n}}a-Bonet}, title = {Context-Aware Neural Machine Translation Decoding}, pages = {13--23}, publisher = {Annual Meeting of the Association for Computational Linguistics}, year = {2019} } @article{DBLP:journals/corr/abs-2010-12827, author = {Amane Sugiyama and Naoki Yoshinaga}, title = {Context-aware Decoder for Neural Machine Translation using a Target-side Document-Level Language Model}, journal = {CoRR}, volume = {abs/2010.12827}, year = {2020} } @inproceedings{DBLP:conf/acl/VoitaST19, author = {Elena Voita and Rico Sennrich and Ivan Titov}, title = {When a Good Translation is Wrong in Context: Context-Aware Machine Translation Improves on Deixis, Ellipsis, and Lexical Cohesion}, pages = {1198--1212}, publisher = {Annual Meeting of the Association for Computational Linguistics}, year = {2019} } @inproceedings{DBLP:conf/emnlp/VoitaST19, author = {Elena Voita and Rico Sennrich and Ivan Titov}, title = {Context-Aware Monolingual Repair for Neural Machine Translation}, pages = {877--886}, publisher = {Conference on Empirical Methods in Natural Language Processing}, year = {2019} } @inproceedings{DBLP:conf/discomt/WerlenP17, author = {Lesly Miculicich Werlen and Andrei Popescu-Belis}, title = {Validation of an Automatic Metric for the Accuracy of Pronoun Translation {(APT)}}, pages = {17--25}, publisher = {Annual Meeting of the Association for Computational Linguistics}, year = {2017} } @inproceedings{DBLP:conf/emnlp/WongK12, author = {Billy Tak-Ming Wong and Chunyu Kit}, title = {Extending Machine Translation Evaluation Metrics with Lexical Cohesion to Document Level}, pages = {1060--1068}, publisher = {Conference on Empirical Methods in Natural Language Processing}, year = {2012} } @inproceedings{DBLP:conf/discomt/GongZZ15, author = {Zhengxian Gong and Min Zhang and Guodong Zhou}, title = {Document-Level Machine Translation Evaluation with Gist Consistency and Text Cohesion}, pages = {33--40}, publisher = {Annual Meeting of the Association for Computational Linguistics}, year = {2015} } @inproceedings{DBLP:conf/cicling/HajlaouiP13, author = {Najeh Hajlaoui and Andrei Popescu-Belis}, title = {Assessing the Accuracy of Discourse Connective Translations: Validation of an Automatic Metric}, volume = {7817}, pages = {236--247}, publisher = {Springer}, year = {2013} } @inproceedings{DBLP:conf/wmt/RiosMS18, author = {Annette Rios and Mathias M{\"{u}}ller and Rico Sennrich}, title = {The Word Sense Disambiguation Test Suite at {WMT18}}, pages = {588--596}, publisher = {Annual Meeting of the Association for Computational Linguistics}, year = {2018} } @inproceedings{DBLP:conf/naacl/BawdenSBH18, author = {Rachel Bawden and Rico Sennrich and Alexandra Birch and Barry Haddow}, title = {Evaluating Discourse Phenomena in Neural Machine Translation}, pages = {1304--1313}, publisher = {Annual Conference of the North American Chapter of the Association for Computational Linguistics}, year = {2018} } @inproceedings{DBLP:conf/wmt/MullerRVS18, author = {Mathias M{\"{u}}ller and Annette Rios and Elena Voita and Rico Sennrich}, title = {A Large-Scale Test Set for the Evaluation of Context-Aware Pronoun Translation in Neural Machine Translation}, pages = {61--72}, publisher = {Annual Meeting of the Association for Computational Linguistics}, year = {2018} } @inproceedings{DBLP:conf/iclr/KitaevKL20, author = {Nikita Kitaev and Lukasz Kaiser and Anselm Levskaya}, title = {Reformer: The Efficient Transformer}, publisher = {International Conference on Learning Representations}, year = {2020} } @inproceedings{agrawal2018contextual, title={Contextual handling in neural machine translation: Look behind, ahead and on both sides}, author={Agrawal, Ruchit Rajeshkumar and Turchi, Marco and Negri, Matteo}, booktitle={Annual Conference of the European Association for Machine Translation}, pages={11--20}, year={2018} } @inproceedings{DBLP:conf/emnlp/WerlenRPH18, author = {Lesly Miculicich Werlen and Dhananjay Ram and Nikolaos Pappas and James Henderson}, title = {Document-Level Neural Machine Translation with Hierarchical Attention Networks}, pages = {2947--2954}, publisher = {Conference on Empirical Methods in Natural Language Processing}, year = {2018} } @inproceedings{DBLP:conf/naacl/MarufMH19, author = {Sameen Maruf and Andr{\'{e}} F. T. Martins and Gholamreza Haffari}, title = {Selective Attention for Context-aware Neural Machine Translation}, pages = {3092--3102}, publisher = {Annual Conference of the North American Chapter of the Association for Computational Linguistics}, year = {2019} } @inproceedings{DBLP:conf/emnlp/TanZXZ19, author = {Xin Tan and Longyin Zhang and Deyi Xiong and Guodong Zhou}, title = {Hierarchical Modeling of Global Context for Document-Level Neural Machine Translation}, pages = {1576--1585}, publisher = {Conference on Empirical Methods in Natural Language Processing}, year = {2019} } @inproceedings{DBLP:conf/emnlp/YangZMGFZ19, author = {Zhengxin Yang and Jinchao Zhang and Fandong Meng and Shuhao Gu and Yang Feng and Jie Zhou}, title = {Enhancing Context Modeling with a Query-Guided Capsule Network for Document-level Translation}, pages = {1527--1537}, publisher = {Conference on Empirical Methods in Natural Language Processing}, year = {2019} } @inproceedings{DBLP:conf/ijcai/ZhengYHCB20, author = {Zaixiang Zheng and Xiang Yue and Shujian Huang and Jiajun Chen and Alexandra Birch}, title = {Towards Making the Most of Context in Neural Machine Translation}, pages = {3983--3989}, publisher = {International Joint Conference on Artificial Intelligence}, year = {2020} } @article{DBLP:journals/tacl/TuLSZ18, author = {Zhaopeng Tu and Yang Liu and Shuming Shi and Tong Zhang}, title = {Learning to Remember Translation History with a Continuous Cache}, publisher = {Transactions of the Association for Computational Linguistics}, volume = {6}, pages = {407--420}, year = {2018} } @inproceedings{DBLP:conf/discomt/ScherrerTL19, author = {Yves Scherrer and J{\"{o}}rg Tiedemann and Sharid Lo{\'{a}}iciga}, title = {Analysing concatenation approaches to document-level {NMT} in two different domains}, pages = {51--61}, publisher = {Annual Meeting of the Association for Computational Linguistics}, year = {2019} } @inproceedings{DBLP:conf/wmt/GonzalesMS17, author = {Annette Rios Gonzales and Laura Mascarell and Rico Sennrich}, title = {Improving Word Sense Disambiguation in Neural Machine Translation with Sense Embeddings}, pages = {11--19}, publisher = {Annual Meeting of the Association for Computational Linguistics}, year = {2017} } @inproceedings{DBLP:conf/acl/LiLWJXZLL20, author = {Bei Li and Hui Liu and Ziyang Wang and Yufan Jiang and Tong Xiao and Jingbo Zhu and Tongran Liu and Changliang Li}, title = {Does Multi-Encoder Help? {A} Case Study on Context-Aware Neural Machine Translation}, pages = {3512--3518}, publisher = {Annual Meeting of the Association for Computational Linguistics}, year = {2020} } @inproceedings{DBLP:conf/discomt/KimTN19, author = {Yunsu Kim and Duc Thanh Tran and Hermann Ney}, title = {When and Why is Document-level Context Useful in Neural Machine Translation?}, pages = {24--34}, publisher = {Annual Meeting of the Association for Computational Linguistics}, year = {2019} } @inproceedings{DBLP:conf/discomt/SugiyamaY19, author = {Amane Sugiyama and Naoki Yoshinaga}, title = {Data augmentation using back-translation for context-aware neural machine translation}, pages = {35--44}, publisher = {Annual Meeting of the Association for Computational Linguistics}, year = {2019} } @inproceedings{DBLP:conf/pacling/YamagishiK19, author = {Hayahide Yamagishi and Mamoru Komachi}, title = {Improving Context-Aware Neural Machine Translation with Target-Side Context}, volume = {1215}, pages = {112--122}, publisher = {Springer}, year = {2019} } @inproceedings{DBLP:conf/emnlp/ZhangLSZXZL18, author = {Jiacheng Zhang and Huanbo Luan and Maosong Sun and Feifei Zhai and Jingfang Xu and Min Zhang and Yang Liu}, title = {Improving the Transformer Translation Model with Document-Level Context}, pages = {533--542}, publisher = {Conference on Empirical Methods in Natural Language Processing}, year = {2018} } @inproceedings{DBLP:conf/coling/KuangX18, author = {Shaohui Kuang and Deyi Xiong}, title = {Fusing Recency into Neural Machine Translation with an Inter-Sentence Gate Model}, pages = {607--617}, publisher = {International Conference on Computational Linguistics}, year = {2018} } @inproceedings{DBLP:conf/emnlp/WangTWL17, author = {Longyue Wang and Zhaopeng Tu and Andy Way and Qun Liu}, title = {Exploiting Cross-Sentence Context for Neural Machine Translation}, pages = {2826--2831}, publisher = {Conference on Empirical Methods in Natural Language Processing}, year = {2017} } @inproceedings{DBLP:conf/aaai/XiongH0W19, author = {Hao Xiong and Zhongjun He and Hua Wu and Haifeng Wang}, title = {Modeling Coherence for Discourse Neural Machine Translation}, pages = {7338--7345}, publisher = {{AAAI} Press}, year = {2019} } @article{DBLP:journals/tacl/YuSSLKBD20, author = {Lei Yu and Laurent Sartran and Wojciech Stokowiec and Wang Ling and Lingpeng Kong and Phil Blunsom and Chris Dyer}, title = {Better Document-Level Machine Translation with Bayes' Rule}, journal = {Transactions of the Association for Computational Linguistics}, volume = {8}, pages = {346--360}, year = {2020} } @article{DBLP:journals/corr/abs-1903-04715, author = {S{\'{e}}bastien Jean and Kyunghyun Cho}, title = {Context-Aware Learning for Neural Machine Translation}, journal = {CoRR}, volume = {abs/1903.04715}, year = {2019} } @inproceedings{DBLP:conf/acl/SaundersSB20, author = {Danielle Saunders and Felix Stahlberg and Bill Byrne}, title = {Using Context in Neural Machine Translation Training Objectives}, pages = {7764--7770}, publisher = {Annual Meeting of the Association for Computational Linguistics}, year = {2020} } @inproceedings{DBLP:conf/mtsummit/StojanovskiF19, author = {Dario Stojanovski and Alexander M. Fraser}, title = {Improving Anaphora Resolution in Neural Machine Translation Using Curriculum Learning}, pages = {140--150}, publisher = {Annual Conference of the European Association for Machine Translation}, year = {2019} } @article{DBLP:journals/corr/abs-1911-03110, author = {Liangyou Li and Xin Jiang and Qun Liu}, title = {Pretrained Language Models for Document-Level Neural Machine Translation}, publisher = {CoRR}, volume = {abs/1911.03110}, year = {2019} } @article{DBLP:journals/tacl/LiuGGLEGLZ20, author = {Yinhan Liu and Jiatao Gu and Naman Goyal and Xian Li and Sergey Edunov and Marjan Ghazvininejad and Mike Lewis and Luke Zettlemoyer}, title = {Multilingual Denoising Pre-training for Neural Machine Translation}, journal = {Transactions of the Association for Computational Linguistics}, volume = {8}, pages = {726--742}, year = {2020} } @inproceedings{DBLP:conf/wmt/MarufMH18, author = {Sameen Maruf and Andr{\'{e}} F. T. Martins and Gholamreza Haffari}, title = {Contextual Neural Model for Translating Bilingual Multi-Speaker Conversations}, pages = {101--112}, publisher = {Annual Meeting of the Association for Computational Linguistics}, year = {2018} } @inproceedings{DBLP:conf/interspeech/SainathWSWV15, author = {Tara N. Sainath and Ron J. Weiss and Andrew W. Senior and Kevin W. Wilson and Oriol Vinyals}, title = {Learning the speech front-end with raw waveform CLDNNs}, pages = {1--5}, publisher = {International Symposium on Computer Architecture}, year = {2015} } @inproceedings{DBLP:conf/icassp/MohamedHP12, author = {Abdel-rahman Mohamed and Geoffrey E. Hinton and Gerald Penn}, title = {Understanding how Deep Belief Networks perform acoustic modelling}, pages = {4273--4276}, publisher = {IEEE Transactions on Acoustics, Speech, and Signal Processing}, year = {2012} } @article{DBLP:journals/ftsig/GalesY07, author = {Mark J. F. Gales and Steve J. Young}, title = {The Application of Hidden Markov Models in Speech Recognition}, journal = {Found Trends Signal Process}, volume = {1}, number = {3}, pages = {195--304}, year = {2007} } @article{DBLP:journals/taslp/MohamedDH12, author = {Abdel-rahman Mohamed and George E. Dahl and Geoffrey E. Hinton}, title = {Acoustic Modeling Using Deep Belief Networks}, journal = {IEEE Transactions on Speech and Audio Processing}, volume = {20}, number = {1}, pages = {14--22}, year = {2012} } @article{DBLP:journals/spm/X12a, title = {Deep Neural Networks for Acoustic Modeling in Speech Recognition: The Shared Views of Four Research Groups}, journal = {IEEE Signal Processing Magazine}, volume = {29}, number = {6}, pages = {82--97}, year = {2012} } @inproceedings{DBLP:conf/nips/ChorowskiBSCB15, author = {Jan Chorowski and Dzmitry Bahdanau and Dmitriy Serdyuk and Kyunghyun Cho and Yoshua Bengio}, title = {Attention-Based Models for Speech Recognition}, publisher = {Annual Conference on Neural Information Processing Systems}, pages = {577--585}, year = {2015} } @inproceedings{DBLP:conf/icassp/ChanJLV16, author = {William Chan and Navdeep Jaitly and Quoc V. Le and Oriol Vinyals}, title = {Listen, attend and spell: A neural network for large vocabulary conversational speech recognition}, publisher = {IEEE Transactions on Acoustics, Speech, and Signal Processing}, pages = {4960--4964}, year = {2016} } @inproceedings{DBLP:conf/acl/ZhangGCF19, author = {Pei Zhang and Niyu Ge and Boxing Chen and Kai Fan}, title = {Lattice Transformer for Speech Translation}, pages = {6475--6484}, publisher = {Annual Meeting of the Association for Computational Linguistics}, year = {2019} } @inproceedings{DBLP:conf/acl/SperberNPW19, author = {Matthias Sperber and Graham Neubig and Ngoc-Quan Pham and Alex Waibel}, title = {Self-Attentional Models for Lattice Inputs}, pages = {1185--1197}, publisher = {Annual Meeting of the Association for Computational Linguistics}, year = {2019} } @inproceedings{DBLP:conf/acl/LiuTMCZ18, author = {Yong Cheng and Zhaopeng Tu and Fandong Meng and Junjie Zhai and Yang Liu}, title = {Towards Robust Neural Machine Translation}, pages = {1756--1766}, publisher = {Annual Meeting of the Association for Computational Linguistics}, year = {2018} } @inproceedings{DBLP:conf/naacl/DuongACBC16, author = {Long Duong and Antonios Anastasopoulos and David Chiang and Steven Bird and Trevor Cohn}, title = {An Attentional Model for Speech Translation Without Transcription}, pages = {949--959}, publisher = {Annual Conference of the North American Chapter of the Association for Computational Linguistics}, year = {2016} } @article{DBLP:journals/corr/BerardPSB16, author = {Alexandre Berard and Olivier Pietquin and Christophe Servan and Laurent Besacier}, title = {Listen and Translate: A Proof of Concept for End-to-End Speech-to-Text Translation}, journal = {CoRR}, volume = {abs/1612.01744}, year = {2016} } @inproceedings{DBLP:conf/interspeech/WeissCJWC17, author = {Ron J. Weiss and Jan Chorowski and Navdeep Jaitly and Yonghui Wu and Zhifeng Chen}, title = {Sequence-to-Sequence Models Can Directly Translate Foreign Speech}, pages = {2625--2629}, publisher = {International Symposium on Computer Architecture}, year = {2017} } @inproceedings{DBLP:conf/mtsummit/GangiNCDT19, author = {Mattia Antonino Di Gangi and Matteo Negri and Roldano Cattoni and Roberto Dess{\`{\i}} and Marco Turchi}, title = {Enhancing Transformer for End-to-end Speech-to-Text Translation}, pages = {21--31}, publisher = {European Association for Machine Translation}, year = {2019} } @inproceedings{DBLP:conf/icml/GravesFGS06, author = {Alex Graves and Santiago Fern{\'{a}}ndez and Faustino J. Gomez and J{\"{u}}rgen Schmidhuber}, title = {Connectionist temporal classification: labelling unsegmented sequence data with recurrent neural networks}, volume = {148}, pages = {369--376}, publisher = {International Conference on Machine Learning}, year = {2006} } @article{DBLP:journals/jstsp/WatanabeHKHH17, author = {Shinji Watanabe and Takaaki Hori and Suyoun Kim and John R. Hershey and Tomoki Hayashi}, title = {Hybrid CTC/Attention Architecture for End-to-End Speech Recognition}, journal = {IEEE Journal of Selected Topics in Signal Processing}, volume = {11}, number = {8}, pages = {1240--1253}, year = {2017} } @inproceedings{DBLP:conf/icassp/KimHW17, author = {Suyoun Kim and Takaaki Hori and Shinji Watanabe}, title = {Joint CTC-attention based end-to-end speech recognition using multi-task learning}, pages = {4835--4839}, publisher = {IEEE Transactions on Acoustics, Speech, and Signal Processing}, year = {2017} } @article{DBLP:journals/pami/ShiBY17, author = {Baoguang Shi and Xiang Bai and Cong Yao}, title = {An End-to-End Trainable Neural Network for Image-Based Sequence Recognition and Its Application to Scene Text Recognition}, journal = {{IEEE} Transactions on Pattern Analysis and Machine Intelligence}, volume = {39}, number = {11}, pages = {2298--2304}, year = {2017} } @inproceedings{DBLP:conf/naacl/AnastasopoulosC18, author = {Antonios Anastasopoulos and David Chiang}, title = {Tied Multitask Learning for Neural Speech Translation}, pages = {82--91}, publisher = {Annual Conference of the North American Chapter of the Association for Computational Linguistics}, year = {2018} } @inproceedings{DBLP:conf/asru/BaharBN19, author = {Parnia Bahar and Tobias Bieschke and Hermann Ney}, title = {A Comparative Study on End-to-End Speech to Text Translation}, pages = {792--799}, publisher = { IEEE Automatic Speech Recognition and Understanding Workshop}, year = {2019} } @inproceedings{DBLP:conf/naacl/BansalKLLG19, author = {Sameer Bansal and Herman Kamper and Karen Livescu and Adam Lopez and Sharon Goldwater}, title = {Pre-training on high-resource speech recognition improves low-resource speech-to-text translation}, pages = {58--68}, publisher = {Annual Conference of the North American Chapter of the Association for Computational Linguistics}, year = {2019} } @inproceedings{DBLP:conf/icassp/BerardBKP18, author = {Alexandre Berard and Laurent Besacier and Ali Can Kocabiyikoglu and Olivier Pietquin}, title = {End-to-End Automatic Speech Translation of Audiobooks}, pages = {6224--6228}, publisher = {IEEE Transactions on Acoustics, Speech, and Signal Processing}, year = {2018} } @inproceedings{DBLP:conf/icassp/JiaJMWCCALW19, author = {Ye Jia and Melvin Johnson and Wolfgang Macherey and Ron J. Weiss and Yuan Cao and Chung-Cheng Chiu and Naveen Ari and Stella Laurenzo and Yonghui Wu}, title = {Leveraging Weakly Supervised Data to Improve End-to-end Speech-to-text Translation}, pages = {7180--7184}, publisher = {IEEE Transactions on Acoustics, Speech, and Signal Processing}, year = {2019} } @inproceedings{DBLP:conf/interspeech/WuWPG20, author = {Anne Wu and Changhan Wang and Juan Pino and Jiatao Gu}, title = {Self-Supervised Representations Improve End-to-End Speech Translation}, pages = {1491--1495}, publisher = {International Symposium on Computer Architecture}, year = {2020} } @inproceedings{DBLP:conf/interspeech/LiuXZHWWZ19, author = {Yuchen Liu and Hao Xiong and Jiajun Zhang and Zhongjun He and Hua Wu and Haifeng Wang and Chengqing Zong}, title = {End-to-End Speech Translation with Knowledge Distillation}, pages = {1128--1132}, publisher = {International Symposium on Computer Architecture}, year = {2019} } @inproceedings{DBLP:conf/emnlp/AlinejadS20, author = {Ashkan Alinejad and Anoop Sarkar}, title = {Effectively pretraining a speech translation decoder with Machine Translation data}, pages = {8014--8020}, publisher = {Annual Meeting of the Association for Computational Linguistics}, year = {2020} } @article{DBLP:journals/corr/abs-1802-06003, author = {Takatomo Kano and Sakriani Sakti and Satoshi Nakamura}, title = {Structured-based Curriculum Learning for End-to-end English-Japanese Speech Translation}, journal = {CoRR}, volume = {abs/1802.06003}, year = {2018} } @inproceedings{DBLP:conf/acl/WangWLZY20, author = {Chengyi Wang and Yu Wu and Shujie Liu and Ming Zhou and Zhenglu Yang}, title = {Curriculum Pre-training for End-to-End Speech Translation}, pages = {3728--3738}, publisher = {Annual Meeting of the Association for Computational Linguistics}, year = {2020} } @book{DBLP:books/daglib/0071550, author = {Lawrence R. Rabiner and Biing-Hwang Juang}, title = {Fundamentals of speech recognition}, series = {Prentice Hall signal processing series}, publisher = {Prentice Hall}, year = {1993} } @book{benesty2008automatic, title={Automatic Speech Recognition: a Deep Learning Approach}, author={Dong Yu,Li Deng}, year={2008}, publisher={Springer} } @book{Huang2001SpokenLP, title={Spoken Language Processing: A Guide to Theory, Algorithm and System Development}, author={Xuedong Huang and Alex Acero and Hsiao-Wuen Hon}, year={2001}, publisher={Prentice Hall PTR} } @book{Quatieri2001DiscreteTimeSS, title={Discrete-Time Speech Signal Processing: Principles and Practice}, author={Thomas F. Quatieri}, year={2001}, publisher={Prentice Hall PTR} } @inproceedings{Oppenheim2001DiscretetimeSP, title={Discrete-time Signal Processing}, author={Alan V. Oppenheim and Ronald W. Schafer}, year={2009}, publisher={Pearson} } @book{洪青阳2020语音识别原理与应用, title={语音识别:原理与应用}, author={洪青阳,李琳}, publisher={电子工业出版社}, year={2020} } @book{陈果果2020语音识别实战, title={Kaldi语音识别实战}, author={陈果果 and 都家宇 and 那兴宇 and 张俊博}, publisher={电子工业出版社}, year={2020} } %%%%%%%%%%%%%%%%%王屹超部分,孟霞加%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% @inproceedings{DBLP:conf/mm/LinMSYYGZL20, author = {Huan Lin and Fandong Meng and Jinsong Su and Yongjing Yin and Zhengyuan Yang and Yubin Ge and Jie Zhou and Jiebo Luo}, title = {Dynamic Context-guided Capsule Network for Multimodal Machine Translation}, pages = {1320--1329}, publisher = { ACM Multimedia}, year = {2020} } @inproceedings{DBLP:conf/wmt/SpeciaFSE16, author = {Lucia Specia and Stella Frank and Khalil Sima'an and Desmond Elliott}, title = {A Shared Task on Multimodal Machine Translation and Crosslingual Image Description}, pages = {543--553}, publisher = {Annual Meeting of the Association for Computational Linguistics}, year = {2016} } @inproceedings{DBLP:conf/wmt/ElliottFBBS17, author = {Desmond Elliott and Stella Frank and Lo{\"{\i}}c Barrault and Fethi Bougares and Lucia Specia}, title = {Findings of the Second Shared Task on Multimodal Machine Translation and Multilingual Image Description}, pages = {215--233}, publisher = {Annual Meeting of the Association for Computational Linguistics}, year = {2017} } @inproceedings{DBLP:conf/wmt/BarraultBSLEF18, author = {Lo{\"{\i}}c Barrault and Fethi Bougares and Lucia Specia and Chiraag Lala and Desmond Elliott and Stella Frank}, title = {Findings of the Third Shared Task on Multimodal Machine Translation}, pages = {304--323}, publisher = {Annual Meeting of the Association for Computational Linguistics}, year = {2018} } @inproceedings{DBLP:conf/wmt/CaglayanABGBBMH17, author = {Ozan Caglayan and Walid Aransa and Adrien Bardet and Mercedes Garc{\'{\i}}a-Mart{\'{\i}}nez and Fethi Bougares and Lo{\"{\i}}c Barrault and Marc Masana and Luis Herranz and Joost van de Weijer}, title = {{LIUM-CVC} Submissions for {WMT17} Multimodal Translation Task}, pages = {432--439}, publisher = {Annual Meeting of the Association for Computational Linguistics}, year = {2017} } @inproceedings{DBLP:conf/wmt/LibovickyHTBP16, author = {Jindrich Libovick{\'{y}} and Jindrich Helcl and Marek Tlust{\'{y}} and Ondrej Bojar and Pavel Pecina}, title = {{CUNI} System for {WMT16} Automatic Post-Editing and Multimodal Translation Tasks}, pages = {646--654}, publisher = {Annual Meeting of the Association for Computational Linguistics}, year = {2016} } @inproceedings{DBLP:conf/emnlp/CalixtoL17, author = {Iacer Calixto and Qun Liu}, title = {Incorporating Global Visual Features into Attention-based Neural Machine Translation}, pages = {992--1003}, publisher = {Conference on Empirical Methods in Natural Language Processing}, year = {2017} } @inproceedings{DBLP:conf/wmt/HuangLSOD16, author = {Po-Yao Huang and Frederick Liu and Sz-Rung Shiang and Jean Oh and Chris Dyer}, title = {Attention-based Multimodal Neural Machine Translation}, pages = {639--645}, publisher = {Annual Meeting of the Association for Computational Linguistics}, year = {2016} } @article{Elliott2015MultilingualID, title={Multilingual Image Description with Neural Sequence Models}, author={Desmond Elliott and Stella Frank and Eva Hasler}, journal={arXiv: Computation and Language}, year={2015} } @inproceedings{DBLP:conf/wmt/MadhyasthaWS17, author = {Pranava Swaroop Madhyastha and Josiah Wang and Lucia Specia}, title = {Sheffield MultiMT: Using Object Posterior Predictions for Multimodal Machine Translation}, pages = {470--476}, publisher = {Annual Meeting of the Association for Computational Linguistics}, year = {2017} } @article{DBLP:journals/corr/CaglayanBB16, author = {Ozan Caglayan and Lo{\"{\i}}c Barrault and Fethi Bougares}, title = {Multimodal Attention for Neural Machine Translation}, journal = {CoRR}, volume = {abs/1609.03976}, year = {2016} } @inproceedings{DBLP:conf/acl/CalixtoLC17, author = {Iacer Calixto and Qun Liu and Nick Campbell}, title = {Doubly-Attentive Decoder for Multi-modal Neural Machine Translation}, pages = {1913--1924}, publisher = {Annual Meeting of the Association for Computational Linguistics}, year = {2017} } @article{DBLP:journals/corr/DelbrouckD17, author = {Jean-Benoit Delbrouck and St{\'{e}}phane Dupont}, title = {Multimodal Compact Bilinear Pooling for Multimodal Neural Machine Translation}, journal = {CoRR}, volume = {abs/1703.08084}, year = {2017} } @inproceedings{DBLP:conf/acl/LibovickyH17, author = {Jindrich Libovick{\'{y}} and Jindrich Helcl}, title = {Attention Strategies for Multi-Source Sequence-to-Sequence Learning}, pages = {196--202}, publisher = {Annual Meeting of the Association for Computational Linguistics}, year = {2017} } @article{DBLP:journals/corr/abs-1712-03449, author = {Jean-Benoit Delbrouck and St{\'{e}}phane Dupont}, title = {Modulating and attending the source image during encoding improves Multimodal Translation}, journal = {CoRR}, volume = {abs/1712.03449}, year = {2017} } @article{DBLP:journals/corr/abs-1807-11605, author = {Hasan Sait Arslan and Mark Fishel and Gholamreza Anbarjafari}, title = {Doubly Attentive Transformer Machine Translation}, journal = {CoRR}, volume = {abs/1807.11605}, year = {2018} } @inproceedings{DBLP:conf/wmt/HelclLV18, author = {Jindrich Helcl and Jindrich Libovick{\'{y}} and Dusan Varis}, title = {{CUNI} System for the {WMT18} Multimodal Translation Task}, pages = {616--623}, publisher = {Annual Meeting of the Association for Computational Linguistics}, year = {2018} } @inproceedings{DBLP:conf/ijcnlp/ElliottK17, author = {Desmond Elliott and {\'{A}}kos K{\'{a}}d{\'{a}}r}, title = {Imagination Improves Multimodal Translation}, pages = {130--141}, publisher = {International Joint Conference on Natural Language Processing}, year = {2017} } @inproceedings{DBLP:conf/emnlp/ZhouCLY18, author = {Mingyang Zhou and Runxiang Cheng and Yong Jae Lee and Zhou Yu}, title = {A Visual Attention Grounding Neural Model for Multimodal Machine Translation}, pages = {3643--3653}, publisher = {Conference on Empirical Methods in Natural Language Processing}, year = {2018} } @inproceedings{DBLP:conf/acl/CalixtoRA19, author = {Iacer Calixto and Miguel Rios and Wilker Aziz}, title = {Latent Variable Model for Multi-modal Translation}, pages = {6392--6405}, publisher = {Annual Meeting of the Association for Computational Linguistics}, year = {2019} } @inproceedings{DBLP:conf/acl/YinMSZYZL20, author = {Yongjing Yin and Fandong Meng and Jinsong Su and Chulun Zhou and Zhengyuan Yang and Jie Zhou and Jiebo Luo}, title = {A Novel Graph-based Multi-modal Fusion Encoder for Neural Machine Translation}, pages = {3025--3035}, publisher = {Annual Meeting of the Association for Computational Linguistics}, year = {2020} } @inproceedings{DBLP:conf/acl/YaoW20, author = {Shaowei Yao and Xiaojun Wan}, title = {Multimodal Transformer for Multimodal Machine Translation}, pages = {4346--4350}, publisher = {Annual Meeting of the Association for Computational Linguistics}, year = {2020} } @inproceedings{DBLP:conf/nips/LuYBP16, author = {Jiasen Lu and Jianwei Yang and Dhruv Batra and Devi Parikh}, title = {Hierarchical Question-Image Co-Attention for Visual Question Answering}, booktitle = {Conference on Neural Information Processing Systems}, pages = {289--297}, year = {2016} } @inproceedings{DBLP:conf/cvpr/VinyalsTBE15, author = {Oriol Vinyals and Alexander Toshev and Samy Bengio and Dumitru Erhan}, title = {Show and tell: {A} neural image caption generator}, pages = {3156--3164}, publisher = {IEEE Conference on Computer Vision and Pattern Recognition}, year = {2015} } @inproceedings{DBLP:conf/icml/XuBKCCSZB15, author = {Kelvin Xu and Jimmy Ba and Ryan Kiros and Kyunghyun Cho and Aaron C. Courville and Ruslan Salakhutdinov and Richard S. Zemel and Yoshua Bengio}, title = {Show, Attend and Tell: Neural Image Caption Generation with Visual Attention}, volume = {37}, pages = {2048--2057}, publisher = {International Conference on Machine Learning}, year = {2015} } @inproceedings{DBLP:conf/cvpr/YouJWFL16, author = {Quanzeng You and Hailin Jin and Zhaowen Wang and Chen Fang and Jiebo Luo}, title = {Image Captioning with Semantic Attention}, pages = {4651--4659}, publisher = {IEEE Conference on Computer Vision and Pattern Recognition}, year = {2016} } @inproceedings{DBLP:conf/cvpr/ChenZXNSLC17, author = {Long Chen and Hanwang Zhang and Jun Xiao and Liqiang Nie and Jian Shao and Wei Liu and Tat-Seng Chua}, title = {{SCA-CNN:} Spatial and Channel-Wise Attention in Convolutional Networks for Image Captioning}, pages = {6298--6306}, publisher = {IEEE Conference on Computer Vision and Pattern Recognition}, year = {2017} } @article{DBLP:journals/pami/FuJCSZ17, author = {Kun Fu and Junqi Jin and Runpeng Cui and Fei Sha and Changshui Zhang}, title = {Aligning Where to See and What to Tell: Image Captioning with Region-Based Attention and Scene-Specific Contexts}, journal = {IEEE Transactions on Pattern Analysis and Machine Intelligence}, volume = {39}, number = {12}, pages = {2321--2334}, year = {2017} } @inproceedings{DBLP:conf/eccv/YaoPLM18, author = {Ting Yao and Yingwei Pan and Yehao Li and Tao Mei}, title = {Exploring Visual Relationship for Image Captioning}, series = {Lecture Notes in Computer Science}, volume = {11218}, pages = {711--727}, publisher = {European Conference on Computer Vision}, year = {2018} } @inproceedings{DBLP:conf/ijcai/LiuSWWY17, author = {Chang Liu and Fuchun Sun and Changhu Wang and Feng Wang and Alan L. Yuille}, title = {{MAT:} {A} Multimodal Attentive Translator for Image Captioning}, pages = {4033--4039}, publisher = {International Joint Conference on Artificial Intelligence}, year = {2017} } @article{DBLP:journals/corr/abs-1804-02767, author = {Joseph Redmon and Ali Farhadi}, title = {YOLOv3: An Incremental Improvement}, journal = {CoRR}, volume = {abs/1804.02767}, year = {2018} } @article{DBLP:journals/corr/abs-2004-10934, author = {Alexey Bochkovskiy and Chien-Yao Wang and Hong-Yuan Mark Liao}, title = {YOLOv4: Optimal Speed and Accuracy of Object Detection}, journal = {CoRR}, volume = {abs/2004.10934}, year = {2020} } @inproceedings{DBLP:conf/cvpr/LuXPS17, author = {Jiasen Lu and Caiming Xiong and Devi Parikh and Richard Socher}, title = {Knowing When to Look: Adaptive Attention via a Visual Sentinel for Image Captioning}, pages = {3242--3250}, publisher = {IEEE Conference on Computer Vision and Pattern Recognition}, year = {2017} } @inproceedings{DBLP:conf/cvpr/00010BT0GZ18, author = {Peter Anderson and Xiaodong He and Chris Buehler and Damien Teney and Mark Johnson and Stephen Gould and Lei Zhang}, title = {Bottom-Up and Top-Down Attention for Image Captioning and Visual Question Answering}, pages = {6077--6086}, publisher = {IEEE Conference on Computer Vision and Pattern Recognition}, year = {2018} } @inproceedings{DBLP:conf/mm/ZhouXKC17, author = {Luowei Zhou and Chenliang Xu and Parker A. Koch and Jason J. Corso}, title = {Watch What You Just Said: Image Captioning with Text-Conditional Attention}, pages = {305--313}, publisher = {ACM Multimedia}, year = {2017} } @article{DBLP:journals/mta/FangWCT18, author = {Fang Fang and Hanli Wang and Yihao Chen and Pengjie Tang}, title = {Looking deeper and transferring attention for image captioning}, journal = {Multimedia Tools Applications}, volume = {77}, number = {23}, pages = {31159--31175}, year = {2018} } @inproceedings{DBLP:conf/cvpr/AnejaDS18, author = {Jyoti Aneja and Aditya Deshpande and Alexander G. Schwing}, title = {Convolutional Image Captioning}, pages = {5561--5570}, publisher = {IEEE Conference on Computer Vision and Pattern Recognition}, year = {2018} } @article{DBLP:journals/corr/abs-1805-09019, author = {Qingzhong Wang and Antoni B. Chan}, title = {{CNN+CNN:} Convolutional Decoders for Image Captioning}, journal = {CoRR}, volume = {abs/1805.09019}, year = {2018} } @inproceedings{DBLP:conf/eccv/DaiYL18, author = {Bo Dai and Deming Ye and Dahua Lin}, title = {Rethinking the Form of Latent States in Image Captioning}, volume = {11209}, pages = {294--310}, publisher = {European Conference on Computer Vision}, year = {2018} } @inproceedings{DBLP:conf/iccv/AntolALMBZP15, author = {Stanislaw Antol and Aishwarya Agrawal and Jiasen Lu and Margaret Mitchell and Dhruv Batra and C. Lawrence Zitnick and Devi Parikh}, title = {{VQA:} Visual Question Answering}, pages = {2425--2433}, publisher = {International Conference on Computer Vision}, year = {2015} } @inproceedings{DBLP:conf/eccv/CarionMSUKZ20, author = {Nicolas Carion and Francisco Massa and Gabriel Synnaeve and Nicolas Usunier and Alexander Kirillov and Sergey Zagoruyko}, title = {End-to-End Object Detection with Transformers}, volume = {12346}, pages = {213--229}, publisher = {European Conference on Computer Vision}, year = {2020} } @article{DBLP:journals/tcsv/YuLYH20, author = {Jun Yu and Jing Li and Zhou Yu and Qingming Huang}, title = {Multimodal Transformer With Multi-View Visual Representation for Image Captioning}, journal = {IEEE Transactions on Circuits and Systems for Video Technology}, volume = {30}, number = {12}, pages = {4467--4480}, year = {2020} } @article{Huasong2020SelfAdaptiveNM, title={Self-Adaptive Neural Module Transformer for Visual Question Answering}, author={Zhong Huasong and Jingyuan Chen and Chen Shen and Hanwang Zhang and Jianqiang Huang and Xian-Sheng Hua}, journal={IEEE Transactions on Multimedia}, year={2020}, pages={1-1} } @inproceedings{DBLP:conf/emnlp/GokhaleBBY20, author = {Tejas Gokhale and Pratyay Banerjee and Chitta Baral and Yezhou Yang}, title = {{MUTANT:} {A} Training Paradigm for Out-of-Distribution Generalization in Visual Question Answering}, pages = {878--892}, publisher = {Conference on Empirical Methods in Natural Language Processing}, year = {2020} } @inproceedings{DBLP:conf/eccv/Tang0ZWY20, author = {Ruixue Tang and Chao Ma and Wei Emma Zhang and Qi Wu and Xiaokang Yang}, title = {Semantic Equivalent Adversarial Data Augmentation for Visual Question Answering}, volume = {12364}, pages = {437--453}, publisher = { European Conference on Computer Vision}, year = {2020} } @inproceedings{DBLP:conf/eccv/Li0LZHZWH0WCG20, author = {Xiujun Li and Xi Yin and Chunyuan Li and Pengchuan Zhang and Xiaowei Hu and Lei Zhang and Lijuan Wang and Houdong Hu and Li Dong and Furu Wei and Yejin Choi and Jianfeng Gao}, title = {Oscar: Object-Semantics Aligned Pre-training for Vision-Language Tasks}, volume = {12375}, pages = {121--137}, publisher = { European Conference on Computer Vision}, year = {2020} } @inproceedings{DBLP:conf/aaai/ZhouPZHCG20, author = {Luowei Zhou and Hamid Palangi and Lei Zhang and Houdong Hu and Jason J. Corso and Jianfeng Gao}, title = {Unified Vision-Language Pre-Training for Image Captioning and {VQA}}, pages = {13041--13049}, publisher = {AAAI Conference on Artificial Intelligence}, year = {2020} } @inproceedings{DBLP:conf/iclr/SuZCLLWD20, author = {Weijie Su and Xizhou Zhu and Yue Cao and Bin Li and Lewei Lu and Furu Wei and Jifeng Dai}, title = {{VL-BERT:} Pre-training of Generic Visual-Linguistic Representations}, publisher = {International Conference on Learning Representations}, year = {2020} } @inproceedings{DBLP:conf/nips/GoodfellowPMXWOCB14, author = {Ian J. Goodfellow and Jean Pouget-Abadie and Mehdi Mirza and Bing Xu and David Warde-Farley and Sherjil Ozair and Aaron C. Courville and Yoshua Bengio}, title = {Generative Adversarial Nets}, publisher = {Conference on Neural Information Processing Systems}, pages = {2672--2680}, year = {2014} } @inproceedings{DBLP:conf/nips/ZhuZPDEWS17, author = {Jun-Yan Zhu and Richard Zhang and Deepak Pathak and Trevor Darrell and Alexei A. Efros and Oliver Wang and Eli Shechtman}, title = {Toward Multimodal Image-to-Image Translation}, publisher = {Conference on Neural Information Processing Systems}, pages = {465--476}, year = {2017} } @article{DBLP:journals/corr/abs-1908-06616, author = {Hajar Emami and Majid Moradi Aliabadi and Ming Dong and Ratna Babu Chinnam}, title = {{SPA-GAN:} Spatial Attention {GAN} for Image-to-Image Translation}, journal = {CoRR}, volume = {abs/1908.06616}, year = {2019} } @article{DBLP:journals/access/XiongWG19, author = {Feng Xiong and Qianqian Wang and Quanxue Gao}, title = {Consistent Embedded {GAN} for Image-to-Image Translation}, journal = {International Conference on Access Networks}, volume = {7}, pages = {126651--126661}, year = {2019} } @inproceedings{DBLP:conf/iccv/ZhuPIE17, author = {Jun-Yan Zhu and Taesung Park and Phillip Isola and Alexei A. Efros}, title = {Unpaired Image-to-Image Translation Using Cycle-Consistent Adversarial Networks}, pages = {2242--2251}, publisher = {International Conference on Computer Vision}, year = {2017} } @inproceedings{DBLP:conf/iccv/YiZTG17, author = {Zili Yi and Hao (Richard) Zhang and Ping Tan and Minglun Gong}, title = {DualGAN: Unsupervised Dual Learning for Image-to-Image Translation}, pages = {2868--2876}, publisher = {International Conference on Computer Vision}, year = {2017} } @inproceedings{DBLP:conf/nips/LiuBK17, author = {Ming-Yu Liu and Thomas Breuel and Jan Kautz}, title = {Unsupervised Image-to-Image Translation Networks}, publisher = {Conference on Neural Information Processing Systems}, pages = {700--708}, year = {2017} } @inproceedings{DBLP:conf/cvpr/IsolaZZE17, author = {Phillip Isola and Jun-Yan Zhu and Tinghui Zhou and Alexei A. Efros}, title = {Image-to-Image Translation with Conditional Adversarial Networks}, pages = {5967--5976}, publisher = {IEEE Conference on Computer Vision and Pattern Recognition}, year = {2017} } @inproceedings{DBLP:conf/icml/ReedAYLSL16, author = {Scott E. Reed and Zeynep Akata and Xinchen Yan and Lajanugen Logeswaran and Bernt Schiele and Honglak Lee}, title = {Generative Adversarial Text to Image Synthesis}, volume = {48}, pages = {1060--1069}, publisher = {International Conference on Machine Learning}, year = {2016} } @article{DBLP:journals/corr/DashGALA17, author = {Ayushman Dash and John Cristian Borges Gamboa and Sheraz Ahmed and Marcus Liwicki and Muhammad Zeshan Afzal}, title = {{TAC-GAN} - Text Conditioned Auxiliary Classifier Generative Adversarial Network}, journal = {CoRR}, volume = {abs/1703.06412}, year = {2017} } @inproceedings{DBLP:conf/nips/ReedAMTSL16, author = {Scott E. Reed and Zeynep Akata and Santosh Mohan and Samuel Tenka and Bernt Schiele and Honglak Lee}, title = {Learning What and Where to Draw}, publisher = {Conference on Neural Information Processing Systems}, pages = {217--225}, year = {2016} } @inproceedings{DBLP:conf/cvpr/ZhangXY18, author = {Zizhao Zhang and Yuanpu Xie and Lin Yang}, title = {Photographic Text-to-Image Synthesis With a Hierarchically-Nested Adversarial Network}, pages = {6199--6208}, publisher = {IEEE Conference on Computer Vision and Pattern Recognition}, year = {2018} } @inproceedings{DBLP:conf/acl/MaHXZLZZHLLWW19, author = {Mingbo Ma and Liang Huang and Hao Xiong and Renjie Zheng and Kaibo Liu and Baigong Zheng and Chuanqiang Zhang and Zhongjun He and Hairong Liu and Xing Li and Hua Wu and Haifeng Wang}, title = {{STACL:} Simultaneous Translation with Implicit Anticipation and Controllable Latency using Prefix-to-Prefix Framework}, pages = {3025--3036}, publisher = {Annual Meeting of the Association for Computational Linguistics}, year = {2019} } @inproceedings{DBLP:conf/emnlp/ZhengMZH19, author = {Renjie Zheng and Mingbo Ma and Baigong Zheng and Liang Huang}, title = {Speculative Beam Search for Simultaneous Translation}, pages = {1395--1402}, publisher = {Conference on Empirical Methods in Natural Language Processing}, year = {2019} } @inproceedings{DBLP:conf/naacl/DalviDSV18, author = {Fahim Dalvi and Nadir Durrani and Hassan Sajjad and Stephan Vogel}, title = {Incremental Decoding and Training Methods for Simultaneous Translation in Neural Machine Translation}, pages = {493--499}, publisher = {Annual Conference of the North American Chapter of the Association for Computational Linguistics}, year = {2018} } @article{DBLP:journals/corr/ChoE16, author = {Kyunghyun Cho and Masha Esipova}, title = {Can neural machine translation do simultaneous translation?}, journal = {CoRR}, volume = {abs/1606.02012}, year = {2016} } @inproceedings{DBLP:conf/eacl/NeubigCGL17, author = {Jiatao Gu and Graham Neubig and Kyunghyun Cho and Victor O. K. Li}, title = {Learning to Translate in Real-time with Neural Machine Translation}, pages = {1053--1062}, publisher = {Annual Conference of the European Association for Machine Translation}, year = {2017} } @inproceedings{DBLP:conf/emnlp/GrissomHBMD14, author = {Alvin Grissom II and He He and Jordan L. Boyd-Graber and John Morgan and Hal Daum{\'{e}} III}, title = {Don't Until the Final Verb Wait: Reinforcement Learning for Simultaneous Machine Translation}, pages = {1342--1352}, publisher = {Conference on Empirical Methods in Natural Language Processing}, year = {2014} } @inproceedings{DBLP:conf/acl/ZhengLZMLH20, author = {Baigong Zheng and Kaibo Liu and Renjie Zheng and Mingbo Ma and Hairong Liu and Liang Huang}, title = {Simultaneous Translation Policies: From Fixed to Adaptive}, pages = {2847--2853}, publisher = {Annual Meeting of the Association for Computational Linguistics}, year = {2020} } @inproceedings{DBLP:conf/emnlp/ZhengZMH19, author = {Baigong Zheng and Renjie Zheng and Mingbo Ma and Liang Huang}, title = {Simpler and Faster Learning of Adaptive Policies for Simultaneous Translation}, pages = {1349--1354}, publisher = {Conference on Empirical Methods in Natural Language Processing}, year = {2019} } @inproceedings{DBLP:conf/acl/ZhengZMH19, author = {Baigong Zheng and Renjie Zheng and Mingbo Ma and Liang Huang}, title = {Simultaneous Translation with Flexible Policy via Restricted Imitation Learning}, pages = {5816--5822}, publisher = {Conference on Empirical Methods in Natural Language Processing}, year = {2019} } @inproceedings{DBLP:conf/acl/ArivazhaganCMCY19, author = {Naveen Arivazhagan and Colin Cherry and Wolfgang Macherey and Chung-Cheng Chiu and Semih Yavuz and Ruoming Pang and Wei Li and Colin Raffel}, title = {Monotonic Infinite Lookback Attention for Simultaneous Machine Translation}, pages = {1313--1323}, publisher = {Annual Meeting of the Association for Computational Linguistics}, year = {2019} } %%%%% chapter 17------------------------------------------------------ %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%cha %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% %%%%% chapter 18------------------------------------------------------ @inproceedings{DBLP:conf/aaai/XiaHLS19, author = {Mengzhou Xia and Guoping Huang and Lemao Liu and Shuming Shi}, title = {Graph Based Translation Memory for Neural Machine Translation}, pages = {7297--7304}, publisher = {the Association for the Advance of Artificial Intelligence}, year = {2019} } @inproceedings{DBLP:conf/acl/WangZS13, author = {Kun Wang and Chengqing Zong and Keh-Yih Su}, title = {Integrating Translation Memory into Phrase-Based Machine Translation during Decoding}, pages = {11--21}, publisher = {Annual Meeting of the Association for Computational Linguistics}, year = {2013} } @inproceedings{DBLP:conf/acl/DomingoGEBHPCH19, author = {Miguel Domingo and Mercedes Garc{\'{\i}}a-Mart{\'{\i}}nez and Amando Estela and Laurent Bi{\'{e}} and Alexandre Helle and {\'{A}}lvaro Peris and Francisco Casacuberta and Manuel Herranz}, title = {Demonstration of a Neural Machine Translation System with Online Learning for Translators}, pages = {70--74}, publisher = {Annual Meeting of the Association for Computational Linguistics}, year = {2019} } @inproceedings{DBLP:conf/emnlp/NguyenDB17, author = {Khanh Nguyen and Hal Daum{\'{e}} III and Jordan L. Boyd-Graber}, title = {Reinforcement Learning for Bandit Neural Machine Translation with Simulated Human Feedback}, pages = {1464--1474}, publisher = {Empirical Methods in Natural Language Processing}, year = {2017} } @inproceedings{DBLP:journals/corr/abs-1805-01553, author = {Tsz Kin Lam and Julia Kreutzer and Stefan Riezler}, title = {A Reinforcement Learning Approach to Interactive-Predictive Neural Machine Translation}, publisher = {CoRR}, volume = {abs/1805.01553}, year = {2018} } @inproceedings{DBLP:journals/mt/DomingoPC17, author = {Miguel Domingo and {\'{A}}lvaro Peris and Francisco Casacuberta}, title = {Segment-based interactive-predictive machine translation}, publisher = {Machine Translation}, volume = {31}, number = {4}, pages = {163--185}, year = {2017} } @inproceedings{Peris2017InteractiveNM, title={Interactive neural machine translation}, author={{\'A}lvaro Peris and Miguel Domingo and F. Casacuberta}, publisher={Computer Speech and Language}, year={2017}, volume={45}, pages={201-220} } @inproceedings{DBLP:journals/csl/PerisC19, author = {{\'{A}}lvaro Peris and Francisco Casacuberta}, title = {Online learning for effort reduction in interactive neural machine translation}, publisher = {Computer Speech Language}, volume = {58}, pages = {98--126}, year = {2019} } @inproceedings{DBLP:journals/coling/BarrachinaBCCCKLNTVV09, author = {Sergio Barrachina and Oliver Bender and Francisco Casacuberta and Jorge Civera and Elsa Cubel and Shahram Khadivi and Antonio L. Lagarda and Hermann Ney and Jes{\'{u}}s Tom{\'{a}}s and Enrique Vidal and Juan Miguel Vilar}, title = {Statistical Approaches to Computer-Assisted Translation}, publisher = {Computer Linguistics}, volume = {35}, number = {1}, pages = {3--28}, year = {2009} } @inproceedings{Zens2003EfficientSF, title={Efficient Search for Interactive Statistical Machine Translation}, author={Franz Josef Och and Richard Zens and Hermann Ney}, booktitle={the European Chapter of the Association for Computational Linguistics}, year={2003}, pages = {387--393} } @inproceedings{DBLP:conf/acl/WuebkerGDHL16, author = {Joern Wuebker and Spence Green and John DeNero and Sasa Hasan and Minh-Thang Luong}, title = {Models and Inference for Prefix-Constrained Machine Translation}, publisher = {Annual Meeting of the Association for Computational Linguistics}, year = {2016} } @inproceedings{DBLP:journals/corr/abs-2010-05680, author = {Jiarui Fang and Yang Yu and Chengduo Zhao and Jie Zhou}, title = {TurboTransformers: An Efficient {GPU} Serving System For Transformer Models}, publisher = {CoRR}, volume = {abs/2010.05680}, year = {2020} } @inproceedings{DBLP:conf/iclr/HuangCLWMW18, author = {Gao Huang and Danlu Chen and Tianhong Li and Felix Wu and Laurens van der Maaten and Kilian Q. Weinberger}, title = {Multi-Scale Dense Networks for Resource Efficient Image Classification}, publisher = {International Conference on Learning Representations}, year = {2018} } @inproceedings{DBLP:journals/corr/BolukbasiWDS17, author = {Tolga Bolukbasi and Joseph Wang and Ofer Dekel and Venkatesh Saligrama}, title = {Adaptive Neural Networks for Fast Test-Time Prediction}, publisher = {CoRR}, volume = {abs/1702.07811}, year = {2017} } @inproceedings{DBLP:conf/emnlp/WangXZ20, author = {Qiang Wang and Tong Xiao and Jingbo Zhu}, title = {Training Flexible Depth Model by Multi-Task Learning for Neural Machine Translation}, pages = {4307--4312}, publisher = {Conference on Empirical Methods in Natural Language Processing}, year = {2020} } @inproceedings{DBLP:conf/ijcai/ChenCWL20, author = {Guanhua Chen and Yun Chen and Yong Wang and Victor O. K. Li}, title = {Lexical-Constraint-Aware Neural Machine Translation via Data Augmentation}, pages = {3587--3593}, publisher = {International Joint Conference on Artificial Intelligence}, year = {2020} } @inproceedings{DBLP:journals/corr/abs-1912-00567, author = {Tao Wang and Shaohui Kuang and Deyi Xiong and Ant{\'{o}}nio Branco}, title = {Merging External Bilingual Pairs into Neural Machine Translation}, publisher = {CoRR}, volume = {abs/1912.00567}, year = {2019} } @inproceedings{DBLP:conf/acl/DinuMFA19, author = {Georgiana Dinu and Prashant Mathur and Marcello Federico and Yaser Al-Onaizan}, title = {Training Neural Machine Translation to Apply Terminology Constraints}, pages = {3063--3068}, publisher = {Annual Meeting of the Association for Computational Linguistics}, year = {2019} } @inproceedings{DBLP:conf/naacl/SongZYLWZ19, author = {Kai Song and Yue Zhang and Heng Yu and Weihua Luo and Kun Wang and Min Zhang}, title = {Code-Switching for Enhancing {NMT} with Pre-Specified Translation}, pages = {449--459}, publisher = {Annual Meeting of the Association for Computational Linguistics}, year = {2019} } @inproceedings{DBLP:conf/naacl/HaslerGIB18, author = {Eva Hasler and Adri{\`{a}} de Gispert and Gonzalo Iglesias and Bill Byrne}, title = {Neural Machine Translation Decoding with Terminology Constraints}, pages = {506--512}, publisher = {Annual Meeting of the Association for Computational Linguistics}, year = {2018} } @inproceedings{DBLP:conf/wmt/ChatterjeeNTFSB17, author = {Rajen Chatterjee and Matteo Negri and Marco Turchi and Marcello Federico and Lucia Specia and Fr{\'{e}}d{\'{e}}ric Blain}, title = {Guiding Neural Machine Translation Decoding with External Knowledge}, pages = {157--168}, publisher = {Annual Meeting of the Association for Computational Linguistics}, year = {2017} } @inproceedings{DBLP:conf/naacl/PostV18, author = {Matt Post and David Vilar}, title = {Fast Lexically Constrained Decoding with Dynamic Beam Allocation for Neural Machine Translation}, pages = {1314--1324}, publisher = {Annual Meeting of the Association for Computational Linguistics}, year = {2018} } @inproceedings{DBLP:conf/acl/HokampL17, author = {Chris Hokamp and Qun Liu}, title = {Lexically Constrained Decoding for Sequence Generation Using Grid Beam Search}, pages = {1535--1546}, publisher = {Annual Meeting of the Association for Computational Linguistics}, year = {2017} } @inproceedings{DBLP:conf/naacl/ThompsonGKDK19, author = {Brian Thompson and Jeremy Gwinnup and Huda Khayrallah and Kevin Duh and Philipp Koehn}, title = {Overcoming Catastrophic Forgetting During Domain Adaptation of Neural Machine Translation}, pages = {2062--2068}, publisher = {Annual Meeting of the Association for Computational Linguistics}, year = {2019} } @inproceedings{DBLP:conf/aclnmt/KhayrallahTDK18, author = {Huda Khayrallah and Brian Thompson and Kevin Duh and Philipp Koehn}, title = {Regularized Training Objective for Continued Training for Domain Adaptation in Neural Machine Translation}, pages = {36--44}, publisher = {Annual Meeting of the Association for Computational Linguistics}, year = {2018} } @inproceedings{barone2017regularization, title={Regularization techniques for fine-tuning in neural machine translation}, author={Barone, Antonio Valerio Miceli and Haddow, Barry and Germann, Ulrich and Sennrich, Rico}, publisher={arXiv preprint arXiv:1707.09920}, year={2017} } @inproceedings{DBLP:journals/corr/ChuDK17, author = {Chenhui Chu and Raj Dabre and Sadao Kurohashi}, title = {An Empirical Comparison of Simple Domain Adaptation Methods for Neural Machine Translation}, publisher = {CoRR}, volume = {abs/1701.03214}, year = {2017} } @inproceedings{DBLP:conf/coling/GuF20, author = {Shuhao Gu and Yang Feng}, title = {Investigating Catastrophic Forgetting During Continual Training for Neural Machine Translation}, pages = {4315--4326}, publisher = {International Committee on Computational Linguistics}, year = {2020} } %%%%% chapter 18------------------------------------------------------ %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% %%%%% chapter appendix-A------------------------------------------------------ @inproceedings{Tong2012NiuTrans, author = {Tong Xiao and Jingbo Zhu and Hao Zhang and Qiang Li}, title = {NiuTrans: An Open Source Toolkit for Phrase-based and Syntax-based Machine Translation}, pages = {19--24}, publisher = {Annual Meeting of the Association for Computational Linguistics}, year = {2012} } @inproceedings{Li2010Joshua, author = {Zhifei Li and Chris Callison-Burch and Chris Dyer and Sanjeev Khudanpur and Lane Schwartz and Wren N. G. Thornton and Jonathan Weese and Omar Zaidan}, title = {Joshua: An Open Source Toolkit for Parsing-Based Machine Translation}, pages = {135--139}, publisher = {Annual Meeting of the Association for Computational Linguistics}, year = {2009} } @inproceedings{iglesias2009hierarchical, author = {Gonzalo Iglesias and Adri{\`{a}} de Gispert and Eduardo Rodr{\'{\i}}guez Banga and William J. Byrne}, title = {Hierarchical Phrase-Based Translation with Weighted Finite State Transducers}, pages = {433--441}, publisher = {Annual Meeting of the Association for Computational Linguistics}, year = {2009} } @inproceedings{dyer2010cdec, author = {Chris Dyer and Adam Lopez and Juri Ganitkevitch and Jonathan Weese and Ferhan T{\"{u}}re and Phil Blunsom and Hendra Setiawan and Vladimir Eidelman and Philip Resnik}, title = {cdec: {A} Decoder, Alignment, and Learning Framework for Finite-State and Context-Free Translation Models}, pages = {7--12}, publisher = {Annual Meeting of the Association for Computational Linguistics}, year = {2010} } @inproceedings{Cer2010Phrasal, author = {Daniel M. Cer and Michel Galley and Daniel Jurafsky and Christopher D. Manning}, title = {Phrasal: {A} Statistical Machine Translation Toolkit for Exploring New Model Features}, pages = {9--12}, publisher = {Annual Meeting of the Association for Computational Linguistics}, year = {2010} } @inproceedings{vilar2012jane, title={Jane: an advanced freely available hierarchical machine translation toolkit}, author={Vilar, David and Stein, Daniel and Huck, Matthias and Ney, Hermann}, publisher={Machine Translation}, volume={26}, number={3}, pages={197--216}, year={2012} } @inproceedings{DBLP:conf/naacl/DyerCS13, author = {Chris Dyer and Victor Chahuneau and Noah A. Smith}, title = {A Simple, Fast, and Effective Reparameterization of {IBM} Model 2}, pages = {644--648}, publisher = {Annual Meeting of the Association for Computational Linguistics}, year = {2013} } @inproceedings{al2016theano, author = {Rami Al-Rfou and Guillaume Alain and Amjad Almahairi and Christof Angerm{\"{u}}ller and Dzmitry Bahdanau and Nicolas Ballas and Fr{\'{e}}d{\'{e}}ric Bastien and Justin Bayer and Anatoly Belikov and Alexander Belopolsky and Yoshua Bengio and Arnaud Bergeron and James Bergstra and Valentin Bisson and Josh Bleecher Snyder and Nicolas Bouchard and Nicolas Boulanger-Lewandowski and Xavier Bouthillier and Alexandre de Br{\'{e}}bisson and Olivier Breuleux and Pierre Luc Carrier and Kyunghyun Cho and Jan Chorowski and Paul F. Christiano and Tim Cooijmans and Marc-Alexandre C{\^{o}}t{\'{e}} and Myriam C{\^{o}}t{\'{e}} and Aaron C. Courville and Yann N. Dauphin and Olivier Delalleau and Julien Demouth and Guillaume Desjardins and Sander Dieleman and Laurent Dinh and Melanie Ducoffe and Vincent Dumoulin and Samira Ebrahimi Kahou and Dumitru Erhan and Ziye Fan and Orhan Firat and Mathieu Germain and Xavier Glorot and Ian J. Goodfellow and Matthew Graham and {\c{C}}aglar G{\"{u}}l{\c{c}}ehre and Philippe Hamel and Iban Harlouchet and Jean-Philippe Heng and Bal{\'{a}}zs Hidasi and Sina Honari and Arjun Jain and S{\'{e}}bastien Jean and Kai Jia and Mikhail Korobov and Vivek Kulkarni and Alex Lamb and Pascal Lamblin and Eric Larsen and C{\'{e}}sar Laurent and Sean Lee and Simon Lefran{\c{c}}ois and Simon Lemieux and Nicholas L{\'{e}}onard and Zhouhan Lin and Jesse A. Livezey and Cory Lorenz and Jeremiah Lowin and Qianli Ma and Pierre-Antoine Manzagol and Olivier Mastropietro and Robert McGibbon and Roland Memisevic and Bart van Merri{\"{e}}nboer and Vincent Michalski and Mehdi Mirza and Alberto Orlandi and Christopher Joseph Pal and Razvan Pascanu and Mohammad Pezeshki and Colin Raffel and Daniel Renshaw and Matthew Rocklin and Adriana Romero and Markus Roth and Peter Sadowski and John Salvatier and Fran{\c{c}}ois Savard and Jan Schl{\"{u}}ter and John Schulman and Gabriel Schwartz and Iulian Vlad Serban and Dmitriy Serdyuk and Samira Shabanian and {\'{E}}tienne Simon and Sigurd Spieckermann and S. Ramana Subramanyam and Jakub Sygnowski and J{\'{e}}r{\'{e}}mie Tanguay and Gijs van Tulder and Joseph P. Turian and Sebastian Urban and Pascal Vincent and Francesco Visin and Harm de Vries and David Warde-Farley and Dustin J. Webb and Matthew Willson and Kelvin Xu and Lijun Xue and Li Yao and Saizheng Zhang and Ying Zhang}, title = {Theano: {A} Python framework for fast computation of mathematical expressions}, publisher = {CoRR}, volume = {abs/1605.02688}, year = {2016} } @inproceedings{DBLP:journals/corr/SennrichFCBHHJL17, author = {Rico Sennrich and Orhan Firat and Kyunghyun Cho and Barry Haddow and Alexandra Birch and Julian Hitschler and Marcin Junczys-Dowmunt and Samuel L{\"{a}}ubli and Antonio Valerio Miceli Barone and Jozef Mokry and Maria Nadejde}, title = {Nematus: a Toolkit for Neural Machine Translation}, publisher = {Annual Conference of the European Association for Machine Translation}, pages = {65--68}, year = {2017} } @inproceedings{Koehn2007Moses, author = {Philipp Koehn and Hieu Hoang and Alexandra Birch and Chris Callison-Burch and Marcello Federico and Nicola Bertoldi and Brooke Cowan and Wade Shen and Christine Moran and Richard Zens and Chris Dyer and Ondrej Bojar and Alexandra Constantin and Evan Herbst}, title = {Moses: Open Source Toolkit for Statistical Machine Translation}, publisher = {Annual Meeting of the Association for Computational Linguistics}, year = {2007} } @inproceedings{zollmann2007the, author = {Andreas Zollmann and Ashish Venugopal and Matthias Paulik and Stephan Vogel}, title = {The Syntax Augmented {MT} {(SAMT)} System at the Shared Task for the 2007 {ACL} Workshop on Statistical Machine Translation}, pages = {216--219}, publisher = {Annual Meeting of the Association for Computational Linguistics}, year = {2007} } @inproceedings{och2003systematic, author = {Franz Josef Och and Hermann Ney}, title = {A Systematic Comparison of Various Statistical Alignment Models}, publisher = {Computational Linguistics}, volume = {29}, number = {1}, pages = {19--51}, year = {2003} } @inproceedings{zoph2016simple, author = {Barret Zoph and Ashish Vaswani and Jonathan May and Kevin Knight}, title = {Simple, Fast Noise-Contrastive Estimation for Large {RNN} Vocabularies}, pages = {1217--1222}, publisher = {Annual Meeting of the Association for Computational Linguistics}, year = {2016} } @inproceedings{Ottfairseq, author = {Myle Ott and Sergey Edunov and Alexei Baevski and Angela Fan and Sam Gross and Nathan Ng and David Grangier and Michael Auli}, title = {fairseq: {A} Fast, Extensible Toolkit for Sequence Modeling}, pages = {48--53}, publisher = {Annual Meeting of the Association for Computational Linguistics}, year = {2019} } @inproceedings{Vaswani2018Tensor2TensorFN, author = {Ashish Vaswani and Samy Bengio and Eugene Brevdo and Fran{\c{c}}ois Chollet and Aidan N. Gomez and Stephan Gouws and Llion Jones and Lukasz Kaiser and Nal Kalchbrenner and Niki Parmar and Ryan Sepassi and Noam Shazeer and Jakob Uszkoreit}, title = {Tensor2Tensor for Neural Machine Translation}, pages = {193--199}, publisher = {Association for Machine Translation in the Americas}, year = {2018} } @inproceedings{KleinOpenNMT, author = {Guillaume Klein and Yoon Kim and Yuntian Deng and Jean Senellart and Alexander M. Rush}, title = {OpenNMT: Open-Source Toolkit for Neural Machine Translation}, pages = {67--72}, publisher = {Annual Meeting of the Association for Computational Linguistics}, year = {2017} } @inproceedings{luong2016acl_hybrid, author = {Minh-Thang Luong and Christopher D. Manning}, title = {Achieving Open Vocabulary Neural Machine Translation with Hybrid Word-Character Models}, publisher = {Annual Meeting of the Association for Computational Linguistics}, year = {2016} } @inproceedings{ZhangTHUMT, author = {Jiacheng Zhang and Yanzhuo Ding and Shiqi Shen and Yong Cheng and Maosong Sun and Huan-Bo Luan and Yang Liu}, title = {{THUMT:} An Open Source Toolkit for Neural Machine Translation}, publisher = {CoRR}, volume = {abs/1706.06415}, year = {2017} } @inproceedings{JunczysMarian, author = {Marcin Junczys-Dowmunt and Roman Grundkiewicz and Tomasz Dwojak and Hieu Hoang and Kenneth Heafield and Tom Neckermann and Frank Seide and Ulrich Germann and Alham Fikri Aji and Nikolay Bogoychev and Andr{\'{e}} F. T. Martins and Alexandra Birch}, title = {Marian: Fast Neural Machine Translation in {C++}}, pages = {116--121}, publisher = {Annual Meeting of the Association for Computational Linguistics}, year = {2018} } @inproceedings{hieber2017sockeye, author = {Felix Hieber and Tobias Domhan and Michael Denkowski and David Vilar and Artem Sokolov and Ann Clifton and Matt Post}, title = {Sockeye: {A} Toolkit for Neural Machine Translation}, publisher = {CoRR}, volume = {abs/1712.05690}, year = {2017} } @inproceedings{WangCytonMT, author = {Xiaolin Wang and Masao Utiyama and Eiichiro Sumita}, title = {CytonMT: an Efficient Neural Machine Translation Open-source Toolkit Implemented in {C++}}, pages = {133--138}, publisher = {Annual Meeting of the Association for Computational Linguistics}, year = {2018} } @inproceedings{DBLP:journals/corr/abs-1805-10387, author = {Oleksii Kuchaiev and Boris Ginsburg and Igor Gitman and Vitaly Lavrukhin and Carl Case and Paulius Micikevicius}, title = {OpenSeq2Seq: extensible toolkit for distributed and mixed precision training of sequence-to-sequence models}, publisher = {CoRR}, volume = {abs/1805.10387}, year = {2018} } @inproceedings{nmtpy2017, author = {Ozan Caglayan and Mercedes Garc{\'{\i}}a-Mart{\'{\i}}nez and Adrien Bardet and Walid Aransa and Fethi Bougares and Lo{\"{\i}}c Barrault}, title = {{NMTPY:} {A} Flexible Toolkit for Advanced Neural Machine Translation Systems}, publisher = {The Prague Bulletin of Mathematical Linguistics}, volume = {109}, pages = {15--28}, year = {2017} } %%%%% chapter appendix-A------------------------------------------------------ %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%