Commit 473bbd70 by 曹润柘

add chapter 10

parent aedfae50
This source diff could not be displayed because it is too large. You can view the blob instead.
......@@ -3064,6 +3064,900 @@
%%%%% chapter 10------------------------------------------------------
author = {Jacob Devlin and
Rabih Zbib and
Zhongqiang Huang and
Thomas Lamar and
Richard M. Schwartz and
John Makhoul},
title = {Fast and Robust Neural Network Joint Models for Statistical Machine
publisher = {Proceedings of the 52nd Annual Meeting of the Association for Computational
Linguistics, {ACL} 2014, June 22-27, 2014, Baltimore, MD, USA, Volume
1: Long Papers},
pages = {1370--1380},
//publisher = {The Association for Computer Linguistics},
year = {2014},
//url = {},
//doi = {10.3115/v1/p14-1129},
//timestamp = {Tue, 28 Jan 2020 10:27:56 +0100},
//biburl = {},
//bibsource = {dblp computer science bibliography,}
author = {Holger Schwenk},
//editor = {Martin Kay and
Christian Boitet},
title = {Continuous Space Translation Models for Phrase-Based Statistical Machine
publisher = {{COLING} 2012, 24th International Conference on Computational Linguistics,
Proceedings of the Conference: Posters, 8-15 December 2012, Mumbai,
pages = {1071--1080},
//publisher = {Indian Institute of Technology Bombay},
year = {2012},
//url = {},
//timestamp = {Wed, 18 Sep 2019 12:15:53 +0200},
//biburl = {},
//bibsource = {dblp computer science bibliography,}
author = {Nal Kalchbrenner and
Phil Blunsom},
title = {Recurrent Continuous Translation Models},
publisher = {Proceedings of the 2013 Conference on Empirical Methods in Natural
Language Processing, {EMNLP} 2013, 18-21 October 2013, Grand Hyatt
Seattle, Seattle, Washington, USA, {A} meeting of SIGDAT, a Special
Interest Group of the {ACL}},
pages = {1700--1709},
//publisher = {{ACL}},
year = {2013},
//url = {},
//timestamp = {Fri, 13 Sep 2019 13:08:45 +0200},
//biburl = {},
//bibsource = {dblp computer science bibliography,}
author = {Sepp Hochreiter},
title = {The Vanishing Gradient Problem During Learning Recurrent Neural Nets
and Problem Solutions},
journal = {International Journal of Uncertainty, Fuzziness and Knowledge-Based
volume = {6},
number = {2},
pages = {107--116},
year = {1998},
//url = {},
//doi = {10.1142/S0218488598000094},
//timestamp = {Wed, 14 Nov 2018 10:41:42 +0100},
//biburl = {},
//bibsource = {dblp computer science bibliography,}
author ={Y. {Bengio} and P. {Simard} and P. {Frasconi}},
journal ={IEEE Transactions on Neural Networks},
title ={Learning long-term dependencies with gradient descent is difficult},
year ={1994},
volume ={5},
number ={2},
pages ={157-166},
author = {Ashish Vaswani and
Noam Shazeer and
Niki Parmar and
Jakob Uszkoreit and
Llion Jones and
Aidan N. Gomez and
Lukasz Kaiser and
Illia Polosukhin},
//editor = {Isabelle Guyon and
Ulrike von Luxburg and
Samy Bengio and
Hanna M. Wallach and
Rob Fergus and
S. V. N. Vishwanathan and
Roman Garnett},
title = {Attention is All you Need},
publisher = {Advances in Neural Information Processing Systems 30: Annual Conference
on Neural Information Processing Systems 2017, 4-9 December 2017,
Long Beach, CA, {USA}},
pages = {5998--6008},
year = {2017},
//url = {},
//timestamp = {Fri, 06 Mar 2020 17:00:11 +0100},
//biburl = {},
//bibsource = {dblp computer science bibliography,}
author = {Felix Stahlberg},
title = {Neural Machine Translation: {A} Review},
journal = {CoRR},
volume = {abs/1912.02047},
year = {2019},
//url = {},
//archivePrefix = {arXiv},
//eprint = {1912.02047},
//timestamp = {Thu, 02 Jan 2020 18:08:18 +0100},
//biburl = {},
//bibsource = {dblp computer science bibliography,}
author = {Luisa Bentivogli and
Arianna Bisazza and
Mauro Cettolo and
Marcello Federico},
//editor = {Jian Su and
Xavier Carreras and
Kevin Duh},
title = {Neural versus Phrase-Based Machine Translation Quality: a Case Study},
publisher = {Proceedings of the 2016 Conference on Empirical Methods in Natural
Language Processing, {EMNLP} 2016, Austin, Texas, USA, November 1-4,
pages = {257--267},
//publisher = {The Association for Computational Linguistics},
year = {2016},
//url = {},
//doi = {10.18653/v1/d16-1025},
//timestamp = {Tue, 28 Jan 2020 10:28:39 +0100},
//biburl = {},
//bibsource = {dblp computer science bibliography,}
author = {Hany Hassan and
Anthony Aue and
Chang Chen and
Vishal Chowdhary and
Jonathan Clark and
Christian Federmann and
Xuedong Huang and
Marcin Junczys{-}Dowmunt and
William Lewis and
Mu Li and
Shujie Liu and
Tie{-}Yan Liu and
Renqian Luo and
Arul Menezes and
Tao Qin and
Frank Seide and
Xu Tan and
Fei Tian and
Lijun Wu and
Shuangzhi Wu and
Yingce Xia and
Dongdong Zhang and
Zhirui Zhang and
Ming Zhou},
title = {Achieving Human Parity on Automatic Chinese to English News Translation},
journal = {CoRR},
volume = {abs/1803.05567},
year = {2018},
//url = {},
//archivePrefix = {arXiv},
//eprint = {1803.05567},
//timestamp = {Mon, 13 Aug 2018 16:47:23 +0200},
//biburl = {},
//bibsource = {dblp computer science bibliography,}
author = {Qiang Wang and
Bei Li and
Tong Xiao and
Jingbo Zhu and
Changliang Li and
Derek F. Wong and
Lidia S. Chao},
//editor = {Anna Korhonen and
David R. Traum and
Llu{\'{\i}}s M{\`{a}}rquez},
title = {Learning Deep Transformer Models for Machine Translation},
publisher = {Proceedings of the 57th Conference of the Association for Computational
Linguistics, {ACL} 2019, Florence, Italy, July 28- August 2, 2019,
Volume 1: Long Papers},
pages = {1810--1822},
//publisher = {Association for Computational Linguistics},
year = {2019},
//url = {},
//doi = {10.18653/v1/p19-1176},
//timestamp = {Tue, 28 Jan 2020 10:27:53 +0100},
//biburl = {},
//bibsource = {dblp computer science bibliography,}
author = {Yanyang Li and
Qiang Wang and
Tong Xiao and
Tongran Liu and
Jingbo Zhu},
title = {Neural Machine Translation with Joint Representation},
journal = {CoRR},
volume = {abs/2002.06546},
year = {2020},
//url = {},
//archivePrefix = {arXiv},
//eprint = {2002.06546},
//timestamp = {Mon, 02 Mar 2020 16:46:06 +0100},
//biburl = {},
//bibsource = {dblp computer science bibliography,}
author = {Hochreiter, Sepp and Schmidhuber, Jürgen},
year = {1997},
month = {12},
pages = {1735-80},
title = {Long Short-term Memory},
volume = {9},
journal = {Neural computation},
//doi = {10.1162/neco.1997.9.8.1735}
author = {Kyunghyun Cho and
Bart van Merrienboer and
{\c{C}}aglar G{\"{u}}l{\c{c}}ehre and
Dzmitry Bahdanau and
Fethi Bougares and
Holger Schwenk and
Yoshua Bengio},
//editor = {Alessandro Moschitti and
Bo Pang and
Walter Daelemans},
title = {Learning Phrase Representations using {RNN} Encoder-Decoder for Statistical
Machine Translation},
publisher = {Proceedings of the 2014 Conference on Empirical Methods in Natural
Language Processing, {EMNLP} 2014, October 25-29, 2014, Doha, Qatar,
{A} meeting of SIGDAT, a Special Interest Group of the {ACL}},
pages = {1724--1734},
//publisher = {{ACL}},
year = {2014},
//url = {},
//doi = {10.3115/v1/d14-1179},
//timestamp = {Tue, 28 Jan 2020 10:28:17 +0100},
//biburl = {},
//bibsource = {dblp computer science bibliography,}
author = {Xavier Glorot and
Yoshua Bengio},
//editor = {Yee Whye Teh and
D. Mike Titterington},
title = {Understanding the difficulty of training deep feedforward neural networks},
publisher = {Proceedings of the Thirteenth International Conference on Artificial
Intelligence and Statistics, {AISTATS} 2010, Chia Laguna Resort, Sardinia,
Italy, May 13-15, 2010},
//series = {{JMLR} Proceedings},
volume = {9},
pages = {249--256},
//publisher = {},
year = {2010},
//url = {},
//timestamp = {Wed, 29 May 2019 08:41:47 +0200},
//biburl = {},
//bibsource = {dblp computer science bibliography,}
author = {Tong Xiao and
Jingbo Zhu and
Tongran Liu and
Chunliang Zhang},
//editor = {Carles Sierra},
title = {Fast Parallel Training of Neural Language Models},
publisher = {Proceedings of the Twenty-Sixth International Joint Conference on
Artificial Intelligence, {IJCAI} 2017, Melbourne, Australia, August
19-25, 2017},
pages = {4193--4199},
//publisher = {},
year = {2017},
//url = {},
//doi = {10.24963/ijcai.2017/586},
//timestamp = {Tue, 20 Aug 2019 16:17:12 +0200},
//biburl = {},
//bibsource = {dblp computer science bibliography,}
author = {Jiatao Gu and
James Bradbury and
Caiming Xiong and
Victor O. K. Li and
Richard Socher},
title = {Non-Autoregressive Neural Machine Translation},
publisher = {6th International Conference on Learning Representations, {ICLR} 2018,
Vancouver, BC, Canada, April 30 - May 3, 2018, Conference Track Proceedings},
//publisher = {},
year = {2018},
//url = {},
//timestamp = {Thu, 25 Jul 2019 14:25:57 +0200},
//biburl = {},
//bibsource = {dblp computer science bibliography,}
author = {Yanyang Li and
Tong Xiao and
Yinqiao Li and
Qiang Wang and
Changming Xu and
Jingbo Zhu},
//editor = {Iryna Gurevych and
Yusuke Miyao},
title = {A Simple and Effective Approach to Coverage-Aware Neural Machine Translation},
publisher = {Proceedings of the 56th Annual Meeting of the Association for Computational
Linguistics, {ACL} 2018, Melbourne, Australia, July 15-20, 2018, Volume
2: Short Papers},
pages = {292--297},
//publisher = {Association for Computational Linguistics},
year = {2018},
//url = {},
//doi = {10.18653/v1/P18-2047},
//timestamp = {Mon, 16 Sep 2019 13:46:41 +0200},
//biburl = {},
//bibsource = {dblp computer science bibliography,}
author = {Zhaopeng Tu and
Zhengdong Lu and
Yang Liu and
Xiaohua Liu and
Hang Li},
title = {Modeling Coverage for Neural Machine Translation},
publisher = {Proceedings of the 54th Annual Meeting of the Association for Computational
Linguistics, {ACL} 2016, August 7-12, 2016, Berlin, Germany, Volume
1: Long Papers},
//publisher = {The Association for Computer Linguistics},
year = {2016},
//url = {},
//doi = {10.18653/v1/p16-1008},
//timestamp = {Tue, 28 Jan 2020 10:27:13 +0100},
//biburl = {},
//bibsource = {dblp computer science bibliography,}
author = {Rico Sennrich and
Orhan Firat and
Kyunghyun Cho and
Alexandra Birch and
Barry Haddow and
Julian Hitschler and
Marcin Junczys{-}Dowmunt and
Samuel L{\"{a}}ubli and
Antonio Valerio Miceli Barone and
Jozef Mokry and
Maria Nadejde},
//editor = {Andre Martins and
Anselmo Pe{\~{n}}as},
title = {Nematus: a Toolkit for Neural Machine Translation},
publisher = {Proceedings of the 15th Conference of the European Chapter of the
Association for Computational Linguistics, {EACL} 2017, Valencia,
Spain, April 3-7, 2017, Software Demonstrations},
pages = {65--68},
//publisher = {Association for Computational Linguistics},
year = {2017},
//url = {},
//doi = {10.18653/v1/e17-3017},
//timestamp = {Tue, 28 Jan 2020 10:31:12 +0100},
//biburl = {},
//bibsource = {dblp computer science bibliography,}
author = {Biao Zhang and
Rico Sennrich},
//editor = {Anna Korhonen and
David R. Traum and
Llu{\'{\i}}s M{\`{a}}rquez},
title = {A Lightweight Recurrent Network for Sequence Modeling},
publisher = {Proceedings of the 57th Conference of the Association for Computational
Linguistics, {ACL} 2019, Florence, Italy, July 28- August 2, 2019,
Volume 1: Long Papers},
pages = {1538--1548},
//publisher = {Association for Computational Linguistics},
year = {2019},
//url = {},
//doi = {10.18653/v1/p19-1149},
//timestamp = {Tue, 28 Jan 2020 10:28:03 +0100},
//biburl = {},
//bibsource = {dblp computer science bibliography,}
author = {Tao Lei and
Yu Zhang and
Yoav Artzi},
title = {Training RNNs as Fast as CNNs},
journal = {CoRR},
volume = {abs/1709.02755},
year = {2017},
//url = {},
//archivePrefix = {arXiv},
//eprint = {1709.02755},
//timestamp = {Mon, 13 Aug 2018 16:46:29 +0200},
//biburl = {},
//bibsource = {dblp computer science bibliography,}
author = {Biao Zhang and
Deyi Xiong and
Jinsong Su and
Qian Lin and
Huiji Zhang},
//editor = {Ellen Riloff and
David Chiang and
Julia Hockenmaier and
Jun'ichi Tsujii},
title = {Simplifying Neural Machine Translation with Addition-Subtraction Twin-Gated
Recurrent Networks},
publisher = {Proceedings of the 2018 Conference on Empirical Methods in Natural
Language Processing, Brussels, Belgium, October 31 - November 4, 2018},
pages = {4273--4283},
//publisher = {Association for Computational Linguistics},
year = {2018},
//url = {},
//timestamp = {Fri, 13 Sep 2019 13:08:45 +0200},
//biburl = {},
//bibsource = {dblp computer science bibliography,}
author = {Shikun Liu and
Edward Johns and
Andrew J. Davison},
title = {End-To-End Multi-Task Learning With Attention},
publisher = {{IEEE} Conference on Computer Vision and Pattern Recognition, {CVPR}
2019, Long Beach, CA, USA, June 16-20, 2019},
pages = {1871--1880},
//publisher = {Computer Vision Foundation / {IEEE}},
year = {2019},
//url = {\_CVPR\_2019/html/Liu\_End-To-End\_Multi-Task\_Learning\_With\_Attention\_CVPR\_2019\_paper.html},
//doi = {10.1109/CVPR.2019.00197},
//timestamp = {Mon, 20 Jan 2020 15:36:04 +0100},
//biburl = {},
//bibsource = {dblp computer science bibliography,}
author = {Ra{\'{u}}l V{\'{a}}zquez and
Alessandro Raganato and
J{\"{o}}rg Tiedemann and
Mathias Creutz},
//editor = {Isabelle Augenstein and
Spandana Gella and
Sebastian Ruder and
Katharina Kann and
Burcu Can and
Johannes Welbl and
Alexis Conneau and
Xiang Ren and
Marek Rei},
title = {Multilingual {NMT} with a Language-Independent Attention Bridge},
publisher = {Proceedings of the 4th Workshop on Representation Learning for NLP,
RepL4NLP@ACL 2019, Florence, Italy, August 2, 2019},
pages = {33--39},
//publisher = {Association for Computational Linguistics},
year = {2019},
//url = {},
//doi = {10.18653/v1/w19-4305},
//timestamp = {Fri, 27 Mar 2020 08:52:29 +0100},
//biburl = {},
//bibsource = {dblp computer science bibliography,}
author = {Pooya Moradi and
Nishant Kambhatla and
Anoop Sarkar},
//editor = {Alexandra Birch and
Andrew M. Finch and
Hiroaki Hayashi and
Ioannis Konstas and
Thang Luong and
Graham Neubig and
Yusuke Oda and
Katsuhito Sudoh},
title = {Interrogating the Explanatory Power of Attention in Neural Machine
publisher = {Proceedings of the 3rd Workshop on Neural Generation and Translation@EMNLP-IJCNLP
2019, Hong Kong, November 4, 2019},
pages = {221--230},
//publisher = {Association for Computational Linguistics},
year = {2019},
//url = {},
//doi = {10.18653/v1/D19-5624},
//timestamp = {Tue, 24 Mar 2020 15:04:09 +0100},
//biburl = {},
//bibsource = {dblp computer science bibliography,}
author = {Xing Wang and
Zhengdong Lu and
Zhaopeng Tu and
Hang Li and
Deyi Xiong and
Min Zhang},
//editor = {Satinder P. Singh and
Shaul Markovitch},
title = {Neural Machine Translation Advised by Statistical Machine Translation},
publisher = {Proceedings of the Thirty-First {AAAI} Conference on Artificial Intelligence,
February 4-9, 2017, San Francisco, California, {USA}},
pages = {3330--3336},
//publisher = {{AAAI} Press},
year = {2017},
//url = {},
//timestamp = {Tue, 15 Jan 2019 11:48:13 +0100},
//biburl = {},
//bibsource = {dblp computer science bibliography,}
author = {Elena Voita and
David Talbot and
Fedor Moiseev and
Rico Sennrich and
Ivan Titov},
//editor = {Anna Korhonen and
David R. Traum and
Llu{\'{\i}}s M{\`{a}}rquez},
title = {Analyzing Multi-Head Self-Attention: Specialized Heads Do the Heavy
Lifting, the Rest Can Be Pruned},
publisher = {Proceedings of the 57th Conference of the Association for Computational
Linguistics, {ACL} 2019, Florence, Italy, July 28- August 2, 2019,
Volume 1: Long Papers},
pages = {5797--5808},
//publisher = {Association for Computational Linguistics},
year = {2019},
//url = {},
//doi = {10.18653/v1/p19-1580},
//timestamp = {Tue, 28 Jan 2020 10:27:29 +0100},
//biburl = {},
//bibsource = {dblp computer science bibliography,}
author = {Tong Xiao and
Yinqiao Li and
Jingbo Zhu and
Zhengtao Yu and
Tongran Liu},
//editor = {Sarit Kraus},
title = {Sharing Attention Weights for Fast Transformer},
publisher = {Proceedings of the Twenty-Eighth International Joint Conference on
Artificial Intelligence, {IJCAI} 2019, Macao, China, August 10-16,
pages = {5292--5298},
//publisher = {},
year = {2019},
//url = {},
//doi = {10.24963/ijcai.2019/735},
//timestamp = {Tue, 20 Aug 2019 16:18:18 +0200},
//biburl = {},
//bibsource = {dblp computer science bibliography,}
author = {Baosong Yang and
Derek F. Wong and
Tong Xiao and
Lidia S. Chao and
Jingbo Zhu},
//editor = {Martha Palmer and
Rebecca Hwa and
Sebastian Riedel},
title = {Towards Bidirectional Hierarchical Representations for Attention-based
Neural Machine Translation},
publisher = {Proceedings of the 2017 Conference on Empirical Methods in Natural
Language Processing, {EMNLP} 2017, Copenhagen, Denmark, September
9-11, 2017},
pages = {1432--1441},
//publisher = {Association for Computational Linguistics},
year = {2017},
//url = {},
//doi = {10.18653/v1/d17-1150},
//timestamp = {Tue, 28 Jan 2020 10:28:08 +0100},
//biburl = {},
//bibsource = {dblp computer science bibliography,}
author = {Yau{-}Shian Wang and
Hung{-}yi Lee and
Yun{-}Nung Chen},
//editor = {Kentaro Inui and
Jing Jiang and
Vincent Ng and
Xiaojun Wan},
title = {Tree Transformer: Integrating Tree Structures into Self-Attention},
publisher = {Proceedings of the 2019 Conference on Empirical Methods in Natural
Language Processing and the 9th International Joint Conference on
Natural Language Processing, {EMNLP-IJCNLP} 2019, Hong Kong, China,
November 3-7, 2019},
//publisher = {Association for Computational Linguistics},
pages = {1061--1070},
year = {2019\\},
//url = {},
//doi = {10.18653/v1/D19-1098},
//timestamp = {Thu, 12 Dec 2019 13:23:46 +0100},
//biburl = {},
//bibsource = {dblp computer science bibliography,}
author = {Jetic Gu and
Hassan S. Shavarani and
Anoop Sarkar},
//editor = {Ellen Riloff and
David Chiang and
Julia Hockenmaier and
Jun'ichi Tsujii},
title = {Top-down Tree Structured Decoding with Syntactic Connections for Neural Machine Translation and Parsing},
publisher = {Proceedings of the 2018 Conference on Empirical Methods in Natural
Language Processing, Brussels, Belgium, October 31 - November 4, 2018},
pages = {401--413},
//publisher = {Association for Computational Linguistics},
year = {2018},
//url = {},
//doi = {10.18653/v1/d18-1037},
//timestamp = {Tue, 28 Jan 2020 10:28:48 +0100},
//biburl = {},
//bibsource = {dblp computer science bibliography,}
author = {Xinyi Wang and
Hieu Pham and
Pengcheng Yin and
Graham Neubig},
//editor = {Ellen Riloff and
David Chiang and
Julia Hockenmaier and
Jun'ichi Tsujii},
title = {A Tree-based Decoder for Neural Machine Translation},
publisher = {Proceedings of the 2018 Conference on Empirical Methods in Natural
Language Processing, Brussels, Belgium, October 31 - November 4, 2018},
pages = {4772--4777},
//publisher = {Association for Computational Linguistics},
year = {2018},
//url = {},
//timestamp = {Fri, 13 Sep 2019 13:08:45 +0200},
//biburl = {},
//bibsource = {dblp computer science bibliography,}
author = {Jiajun Zhang and
Chengqing Zong},
title = {Bridging Neural Machine Translation and Bilingual Dictionaries},
journal = {CoRR},
volume = {abs/1610.07272},
year = {2016},
//url = {},
//archivePrefix = {arXiv},
//eprint = {1610.07272},
//timestamp = {Mon, 13 Aug 2018 16:47:14 +0200},
//biburl = {},
//bibsource = {dblp computer science bibliography,}
author = {Zihang Dai and
Zhilin Yang and
Yiming Yang and
Jaime G. Carbonell and
Quoc V. Le and
Ruslan Salakhutdinov},
title = {Transformer-XL: Attentive Language Models Beyond a Fixed-Length Context},
journal = {CoRR},
volume = {abs/1901.02860},
year = {2019},
//url = {},
//archivePrefix = {arXiv},
//eprint = {1901.02860},
//timestamp = {Fri, 01 Feb 2019 13:39:59 +0100},
//biburl = {},
//bibsource = {dblp computer science bibliography,}
author = {Xintong Li and
Guanlin Li and
Lemao Liu and
Max Meng and
Shuming Shi},
//editor = {Anna Korhonen and
David R. Traum and
Llu{\'{\i}}s M{\`{a}}rquez},
title = {On the Word Alignment from Neural Machine Translation},
publisher = {Proceedings of the 57th Conference of the Association for Computational
Linguistics, {ACL} 2019, Florence, Italy, July 28- August 2, 2019,
Volume 1: Long Papers},
pages = {1293--1303},
//publisher = {Association for Computational Linguistics},
year = {2019},
//url = {},
//doi = {10.18653/v1/p19-1124},
//timestamp = {Tue, 28 Jan 2020 10:27:51 +0100},
//biburl = {},
//bibsource = {dblp computer science bibliography,}
author = {Jiacheng Zhang and
Yang Liu and
Huanbo Luan and
Jingfang Xu and
Maosong Sun},
//editor = {Regina Barzilay and
Min{-}Yen Kan},
title = {Prior Knowledge Integration for Neural Machine Translation using Posterior
publisher = {Proceedings of the 55th Annual Meeting of the Association for Computational
Linguistics, {ACL} 2017, Vancouver, Canada, July 30 - August 4, Volume
1: Long Papers},
pages = {1514--1523},
//publisher = {Association for Computational Linguistics},
year = {2017},
//url = {},
//doi = {10.18653/v1/P17-1139},
//timestamp = {Tue, 20 Aug 2019 11:59:06 +0200},
//biburl = {},
//bibsource = {dblp computer science bibliography,}
author = {Lesly Miculicich Werlen and
Dhananjay Ram and
Nikolaos Pappas and
James Henderson},
//editor = {Ellen Riloff and
David Chiang and
Julia Hockenmaier and
Jun'ichi Tsujii},
title = {Document-Level Neural Machine Translation with Hierarchical Attention
publisher = {Proceedings of the 2018 Conference on Empirical Methods in Natural
Language Processing, Brussels, Belgium, October 31 - November 4, 2018},
pages = {2947--2954},
//publisher = {Association for Computational Linguistics},
year = {2018},
//url = {},
//doi = {10.18653/v1/d18-1325},
//timestamp = {Fri, 27 Mar 2020 08:46:30 +0100},
//biburl = {},
//bibsource = {dblp computer science bibliography,}
author = {Elena Voita and
Pavel Serdyukov and
Rico Sennrich and
Ivan Titov},
//editor = {Iryna Gurevych and
Yusuke Miyao},
title = {Context-Aware Neural Machine Translation Learns Anaphora Resolution},
publisher = {Proceedings of the 56th Annual Meeting of the Association for Computational
Linguistics, {ACL} 2018, Melbourne, Australia, July 15-20, 2018, Volume
1: Long Papers},
pages = {1264--1274},
//publisher = {Association for Computational Linguistics},
year = {2018},
//url = {},
//doi = {10.18653/v1/P18-1117},
//timestamp = {Mon, 16 Sep 2019 13:46:41 +0200},
//biburl = {},
//bibsource = {dblp computer science bibliography,}
author = {Aishwarya Bhandare and
Vamsi Sripathi and
Deepthi Karkada and
Vivek Menon and
Sun Choi and
Kushal Datta and
Vikram Saletore},
title = {Efficient 8-Bit Quantization of Transformer Neural Machine Language
Translation Model},
journal = {CoRR},
volume = {abs/1906.00532},
year = {2019},
//url = {},
//archivePrefix = {arXiv},
//eprint = {1906.00532},
//timestamp = {Thu, 13 Jun 2019 13:36:00 +0200},
//biburl = {},
//bibsource = {dblp computer science bibliography,}
author = {Matthieu Courbariaux and
Yoshua Bengio},
title = {BinaryNet: Training Deep Neural Networks with Weights and Activations
Constrained to +1 or -1},
journal = {CoRR},
volume = {abs/1602.02830},
year = {2016},
//url = {},
//archivePrefix = {arXiv},
//eprint = {1602.02830},
//timestamp = {Mon, 13 Aug 2018 16:46:57 +0200},
//biburl = {},
//bibsource = {dblp computer science bibliography,}
author = {Wen Zhang and
Liang Huang and
Yang Feng and
Lei Shen and
Qun Liu},
//editor = {Ellen Riloff and
David Chiang and
Julia Hockenmaier and
Jun'ichi Tsujii},
title = {Speeding Up Neural Machine Translation Decoding by Cube Pruning},
publisher = {Proceedings of the 2018 Conference on Empirical Methods in Natural
Language Processing, Brussels, Belgium, October 31 - November 4, 2018},
pages = {4284--4294},
//publisher = {Association for Computational Linguistics},
year = {2018},
//url = {},
//timestamp = {Fri, 29 Nov 2019 14:00:46 +0100},
//biburl = {},
//bibsource = {dblp computer science bibliography,}
author = {Abigail See and
Minh{-}Thang Luong and
Christopher D. Manning},
//editor = {Yoav Goldberg and
Stefan Riezler},
title = {Compression of Neural Machine Translation Models via Pruning},
publisher = {Proceedings of the 20th {SIGNLL} Conference on Computational Natural
Language Learning, CoNLL 2016, Berlin, Germany, August 11-12, 2016},
pages = {291--301},
//publisher = {{ACL}},
year = {2016},
//url = {},
//doi = {10.18653/v1/k16-1029},
//timestamp = {Tue, 28 Jan 2020 10:29:27 +0100},
//biburl = {},
//bibsource = {dblp computer science bibliography,}
author = {Yun Chen and
Yang Liu and
Yong Cheng and
Victor O. K. Li},
//editor = {Regina Barzilay and
Min{-}Yen Kan},
title = {A Teacher-Student Framework for Zero-Resource Neural Machine Translation},
publisher = {Proceedings of the 55th Annual Meeting of the Association for Computational
Linguistics, {ACL} 2017, Vancouver, Canada, July 30 - August 4, Volume
1: Long Papers},
pages = {1925--1935},
//publisher = {Association for Computational Linguistics},
year = {2017},
//url = {},
//doi = {10.18653/v1/P17-1176},
//timestamp = {Tue, 20 Aug 2019 11:59:05 +0200},
//biburl = {},
//bibsource = {dblp computer science bibliography,}
author = {Geoffrey E. Hinton and
Oriol Vinyals and
Jeffrey Dean},
title = {Distilling the Knowledge in a Neural Network},
journal = {CoRR},
volume = {abs/1503.02531},
year = {2015},
//url = {},
//archivePrefix = {arXiv},
//eprint = {1503.02531},
//timestamp = {Mon, 13 Aug 2018 16:48:36 +0200},
//biburl = {},
//bibsource = {dblp computer science bibliography,}
author = {Siqi Sun and
Yu Cheng and
Zhe Gan and
Jingjing Liu},
//editor = {Kentaro Inui and
Jing Jiang and
Vincent Ng and
Xiaojun Wan},
title = {Patient Knowledge Distillation for {BERT} Model Compression},
publisher = {Proceedings of the 2019 Conference on Empirical Methods in Natural
Language Processing and the 9th International Joint Conference on
Natural Language Processing, {EMNLP-IJCNLP} 2019, Hong Kong, China,
November 3-7, 2019},
pages = {4322--4331},
//publisher = {Association for Computational Linguistics},
year = {2019},
//url = {},
//doi = {10.18653/v1/D19-1441},
//timestamp = {Mon, 06 Apr 2020 14:36:31 +0200},
//biburl = {},
//bibsource = {dblp computer science bibliography,}
%%%%% chapter 10------------------------------------------------------
Markdown 格式
您添加了 0 到此讨论。请谨慎行事。
注册 或者 后发表评论