Commit 65f8fd03 by zengxin

合并分支 'zengxin' 到 'caorunzhe'

Zengxin

查看合并请求 !655
parents 29da486c 017c3ecb
...@@ -4387,32 +4387,32 @@ year = {2012} ...@@ -4387,32 +4387,32 @@ year = {2012}
publisher = {Annual Meeting of the Association for Computational Linguistics}, publisher = {Annual Meeting of the Association for Computational Linguistics},
year = {2013} year = {2013}
} }
@article{HochreiterThe, @inproceedings{HochreiterThe,
author = {Sepp Hochreiter}, author = {Sepp Hochreiter},
title = {The Vanishing Gradient Problem During Learning Recurrent Neural Nets title = {The Vanishing Gradient Problem During Learning Recurrent Neural Nets
and Problem Solutions}, and Problem Solutions},
journal = {International Journal of Uncertainty, Fuzziness and Knowledge-Based Systems}, publisher = {International Journal of Uncertainty, Fuzziness and Knowledge-Based Systems},
volume = {6}, volume = {6},
number = {2}, number = {2},
pages = {107--116}, pages = {107--116},
year = {1998} year = {1998}
} }
@article{BENGIO1994Learning, @inproceedings{BENGIO1994Learning,
author = {Yoshua Bengio and author = {Yoshua Bengio and
Patrice Y. Simard and Patrice Y. Simard and
Paolo Frasconi}, Paolo Frasconi},
title = {Learning long-term dependencies with gradient descent is difficult}, title = {Learning long-term dependencies with gradient descent is difficult},
journal = {IEEE Transportation Neural Networks}, publisher = {IEEE Transportation Neural Networks},
volume = {5}, volume = {5},
number = {2}, number = {2},
pages = {157--166}, pages = {157--166},
year = {1994} year = {1994}
} }
@article{StahlbergNeural, @inproceedings{StahlbergNeural,
title={Neural Machine Translation: A Review}, title={Neural Machine Translation: A Review},
author={Felix Stahlberg}, author={Felix Stahlberg},
journal={Journal of Artificial Intelligence Research}, publisher={Journal of Artificial Intelligence Research},
year={2020}, year={2020},
volume={69}, volume={69},
pages={343-418} pages={343-418}
...@@ -4427,7 +4427,7 @@ author = {Yoshua Bengio and ...@@ -4427,7 +4427,7 @@ author = {Yoshua Bengio and
publisher = {Annual Meeting of the Association for Computational Linguistics}, publisher = {Annual Meeting of the Association for Computational Linguistics},
year = {2016} year = {2016}
} }
@article{Hassan2018AchievingHP, @inproceedings{Hassan2018AchievingHP,
author = {Hany Hassan and author = {Hany Hassan and
Anthony Aue and Anthony Aue and
Chang Chen and Chang Chen and
...@@ -4453,7 +4453,7 @@ author = {Yoshua Bengio and ...@@ -4453,7 +4453,7 @@ author = {Yoshua Bengio and
Zhirui Zhang and Zhirui Zhang and
Ming Zhou}, Ming Zhou},
title = {Achieving Human Parity on Automatic Chinese to English News Translation}, title = {Achieving Human Parity on Automatic Chinese to English News Translation},
journal = {CoRR}, publisher = {CoRR},
volume = {abs/1803.05567}, volume = {abs/1803.05567},
year = {2018}, year = {2018},
} }
...@@ -4481,14 +4481,14 @@ author = {Yoshua Bengio and ...@@ -4481,14 +4481,14 @@ author = {Yoshua Bengio and
publisher = {AAAI Conference on Artificial Intelligence}, publisher = {AAAI Conference on Artificial Intelligence},
year = {2020} year = {2020}
} }
@article{HochreiterLong, @inproceedings{HochreiterLong,
author = {Hochreiter, Sepp and Schmidhuber, Jürgen}, author = {Hochreiter, Sepp and Schmidhuber, Jürgen},
year = {1997}, year = {1997},
month = {12}, month = {12},
pages = {1735-80}, pages = {1735-80},
title = {Long Short-term Memory}, title = {Long Short-term Memory},
volume = {9}, volume = {9},
journal = {Neural Computation} publisher = {Neural Computation}
} }
@inproceedings{Cho2014Learning, @inproceedings{Cho2014Learning,
author = {Kyunghyun Cho and author = {Kyunghyun Cho and
...@@ -4580,12 +4580,12 @@ author = {Yoshua Bengio and ...@@ -4580,12 +4580,12 @@ author = {Yoshua Bengio and
pages = {1538--1548}, pages = {1538--1548},
year = {2019} year = {2019}
} }
@article{Lei2017TrainingRA, @inproceedings{Lei2017TrainingRA,
author = {Tao Lei and author = {Tao Lei and
Yu Zhang and Yu Zhang and
Yoav Artzi}, Yoav Artzi},
title = {Training RNNs as Fast as CNNs}, title = {Training RNNs as Fast as CNNs},
journal = {CoRR}, publisher = {CoRR},
volume = {abs/1709.02755}, volume = {abs/1709.02755},
year = {2017} year = {2017}
} }
...@@ -4693,15 +4693,15 @@ author = {Yoshua Bengio and ...@@ -4693,15 +4693,15 @@ author = {Yoshua Bengio and
pages = {4772--4777}, pages = {4772--4777},
year = {2018} year = {2018}
} }
@article{DBLP:journals/corr/ZhangZ16c, @inproceedings{DBLP:journals/corr/ZhangZ16c,
author = {Jiajun Zhang and author = {Jiajun Zhang and
Chengqing Zong}, Chengqing Zong},
title = {Bridging Neural Machine Translation and Bilingual Dictionaries}, title = {Bridging Neural Machine Translation and Bilingual Dictionaries},
journal = {CoRR}, publisher = {CoRR},
volume = {abs/1610.07272}, volume = {abs/1610.07272},
year = {2016} year = {2016}
} }
@article{Dai2019TransformerXLAL, @inproceedings{Dai2019TransformerXLAL,
author = {Zihang Dai and author = {Zihang Dai and
Zhilin Yang and Zhilin Yang and
Yiming Yang and Yiming Yang and
...@@ -4709,7 +4709,7 @@ author = {Yoshua Bengio and ...@@ -4709,7 +4709,7 @@ author = {Yoshua Bengio and
Quoc V. Le and Quoc V. Le and
Ruslan Salakhutdinov}, Ruslan Salakhutdinov},
title = {Transformer-XL: Attentive Language Models Beyond a Fixed-Length Context}, title = {Transformer-XL: Attentive Language Models Beyond a Fixed-Length Context},
journal = {Annual Meeting of the Association for Computational Linguistics}, publisher = {Annual Meeting of the Association for Computational Linguistics},
pages = {2978--2988}, pages = {2978--2988},
year = {2019} year = {2019}
} }
...@@ -4746,7 +4746,7 @@ author = {Yoshua Bengio and ...@@ -4746,7 +4746,7 @@ author = {Yoshua Bengio and
pages = {1264--1274}, pages = {1264--1274},
year = {2018} year = {2018}
} }
@article{DBLP:journals/corr/abs-1906-00532, @inproceedings{DBLP:journals/corr/abs-1906-00532,
author = {Aishwarya Bhandare and author = {Aishwarya Bhandare and
Vamsi Sripathi and Vamsi Sripathi and
Deepthi Karkada and Deepthi Karkada and
...@@ -4756,7 +4756,7 @@ author = {Yoshua Bengio and ...@@ -4756,7 +4756,7 @@ author = {Yoshua Bengio and
Vikram Saletore}, Vikram Saletore},
title = {Efficient 8-Bit Quantization of Transformer Neural Machine Language title = {Efficient 8-Bit Quantization of Transformer Neural Machine Language
Translation Model}, Translation Model},
journal = {CoRR}, publisher = {CoRR},
volume = {abs/1906.00532}, volume = {abs/1906.00532},
year = {2019} year = {2019}
} }
...@@ -4791,12 +4791,12 @@ author = {Yoshua Bengio and ...@@ -4791,12 +4791,12 @@ author = {Yoshua Bengio and
publisher = {Annual Meeting of the Association for Computational Linguistics}, publisher = {Annual Meeting of the Association for Computational Linguistics},
year = {2017} year = {2017}
} }
@article{Hinton2015Distilling, @inproceedings{Hinton2015Distilling,
author = {Geoffrey Hinton and author = {Geoffrey Hinton and
Oriol Vinyals and Oriol Vinyals and
Jeffrey Dean}, Jeffrey Dean},
title = {Distilling the Knowledge in a Neural Network}, title = {Distilling the Knowledge in a Neural Network},
journal = {CoRR}, publisher = {CoRR},
volume = {abs/1503.02531}, volume = {abs/1503.02531},
year = {2015} year = {2015}
} }
...@@ -4827,10 +4827,10 @@ author = {Yoshua Bengio and ...@@ -4827,10 +4827,10 @@ author = {Yoshua Bengio and
publisher = {Conference on Empirical Methods in Natural Language Processing}, publisher = {Conference on Empirical Methods in Natural Language Processing},
year = {2016} year = {2016}
} }
@article{Akaike1969autoregressive, @inproceedings{Akaike1969autoregressive,
author = {Hirotugu Akaike}, author = {Hirotugu Akaike},
title = {Fitting autoregressive models for prediction}, title = {Fitting autoregressive models for prediction},
journal = {Annals of the institute of Statistical Mathematics}, publisher = {Annals of the institute of Statistical Mathematics},
volume = {21(1)}, volume = {21(1)},
year = {2015}, year = {2015},
pages = {243--247}, pages = {243--247},
...@@ -4958,14 +4958,14 @@ author = {Yoshua Bengio and ...@@ -4958,14 +4958,14 @@ author = {Yoshua Bengio and
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
%%%%% chapter 11------------------------------------------------------ %%%%% chapter 11------------------------------------------------------
@article{DBLP:journals/pami/RenHG017, @inproceedings{DBLP:journals/pami/RenHG017,
author = {Shaoqing Ren and author = {Shaoqing Ren and
Kaiming He and Kaiming He and
Ross Girshick and Ross Girshick and
Jian Sun}, Jian Sun},
title = {Faster {R-CNN:} Towards Real-Time Object Detection with Region Proposal title = {Faster {R-CNN:} Towards Real-Time Object Detection with Region Proposal
Networks}, Networks},
journal = {{IEEE} Transactions on Pattern Analysis and Machine Intelligence}, publisher = {{IEEE} Transactions on Pattern Analysis and Machine Intelligence},
volume = {39}, volume = {39},
number = {6}, number = {6},
pages = {1137--1149}, pages = {1137--1149},
...@@ -5101,10 +5101,10 @@ author = {Yoshua Bengio and ...@@ -5101,10 +5101,10 @@ author = {Yoshua Bengio and
year = {2017} year = {2017}
} }
@article{2011Natural, @inproceedings{2011Natural,
title={Natural Language Processing (almost) from Scratch}, title={Natural Language Processing (almost) from Scratch},
author={ Collobert, Ronan and Weston, Jason and Bottou, Léon and Karlen, Michael and Kavukcuoglu, Koray and Kuksa, Pavel }, author={ Collobert, Ronan and Weston, Jason and Bottou, Léon and Karlen, Michael and Kavukcuoglu, Koray and Kuksa, Pavel },
journal={Journal of Machine Learning Research}, publisher={Journal of Machine Learning Research},
volume={12}, volume={12},
number={1}, number={1},
pages={2493-2537}, pages={2493-2537},
...@@ -5171,16 +5171,16 @@ author = {Yoshua Bengio and ...@@ -5171,16 +5171,16 @@ author = {Yoshua Bengio and
year = {2015} year = {2015}
} }
@article{StahlbergNeural, @inproceedings{StahlbergNeural,
title={Neural Machine Translation: A Review}, title={Neural Machine Translation: A Review},
author={Felix Stahlberg}, author={Felix Stahlberg},
journal={Journal of Artificial Intelligence Research}, publisher={Journal of Artificial Intelligence Research},
year={2020}, year={2020},
volume={69}, volume={69},
pages={343-418} pages={343-418}
} }
@article{Sennrich2016ImprovingNM, @inproceedings{Sennrich2016ImprovingNM,
author = {Rico Sennrich and author = {Rico Sennrich and
Barry Haddow and Barry Haddow and
Alexandra Birch}, Alexandra Birch},
...@@ -5198,27 +5198,27 @@ author = {Yoshua Bengio and ...@@ -5198,27 +5198,27 @@ author = {Yoshua Bengio and
year = {2015} year = {2015}
} }
@article{Waibel1989PhonemeRU, @inproceedings{Waibel1989PhonemeRU,
title={Phoneme recognition using time-delay neural networks}, title={Phoneme recognition using time-delay neural networks},
author={Alexander Waibel and Toshiyuki Hanazawa and Geoffrey Hinton and Kiyohiro Shikano and Kevin J. Lang}, author={Alexander Waibel and Toshiyuki Hanazawa and Geoffrey Hinton and Kiyohiro Shikano and Kevin J. Lang},
journal={IEEE Transactions on Acoustics, Speech, and Signal Processing}, publisher={IEEE Transactions on Acoustics, Speech, and Signal Processing},
year={1989}, year={1989},
volume={37}, volume={37},
pages={328-339} pages={328-339}
} }
@article{LeCun1989BackpropagationAT, @inproceedings{LeCun1989BackpropagationAT,
title={Backpropagation Applied to Handwritten Zip Code Recognition}, title={Backpropagation Applied to Handwritten Zip Code Recognition},
author={Yann Lecun and Bernhard Boser and John Denker and Don Henderson and Richard E.Howard and Wayne E. Hubbard and Larry Jackel}, author={Yann Lecun and Bernhard Boser and John Denker and Don Henderson and Richard E.Howard and Wayne E. Hubbard and Larry Jackel},
journal={Neural Computation}, publisher={Neural Computation},
year={1989}, year={1989},
volume={1}, volume={1},
pages={541-551} pages={541-551}
} }
@article{726791, @inproceedings{726791,
author={Yann {Lecun} and Leon {Bottou} and Yoshua {Bengio} and Patrick {Haffner}}, author={Yann {Lecun} and Leon {Bottou} and Yoshua {Bengio} and Patrick {Haffner}},
journal={Proceedings of the IEEE}, publisher={Proceedings of the IEEE},
title={Gradient-based learning applied to document recognition}, title={Gradient-based learning applied to document recognition},
year={1998}, year={1998},
volume={86}, volume={86},
...@@ -5248,18 +5248,18 @@ author = {Yoshua Bengio and ...@@ -5248,18 +5248,18 @@ author = {Yoshua Bengio and
year = {2017} year = {2017}
} }
@article{Girshick2015FastR, @inproceedings{Girshick2015FastR,
title={Fast R-CNN}, title={Fast R-CNN},
author={Ross Girshick}, author={Ross Girshick},
journal={International Conference on Computer Vision}, publisher={International Conference on Computer Vision},
year={2015}, year={2015},
pages={1440-1448} pages={1440-1448}
} }
@article{He2020MaskR, @inproceedings{He2020MaskR,
title={Mask R-CNN}, title={Mask R-CNN},
author={Kaiming He and Georgia Gkioxari and Piotr Doll{\'a}r and Ross B. Girshick}, author={Kaiming He and Georgia Gkioxari and Piotr Doll{\'a}r and Ross B. Girshick},
journal={International Conference on Computer Vision}, publisher={International Conference on Computer Vision},
pages={2961--2969}, pages={2961--2969},
year={2017} year={2017}
} }
...@@ -5336,12 +5336,12 @@ author = {Yoshua Bengio and ...@@ -5336,12 +5336,12 @@ author = {Yoshua Bengio and
year = {2017} year = {2017}
} }
@article{Kaiser2018DepthwiseSC, @inproceedings{Kaiser2018DepthwiseSC,
title={Depthwise Separable Convolutions for Neural Machine Translation}, title={Depthwise Separable Convolutions for Neural Machine Translation},
author = {Lukasz Kaiser and author = {Lukasz Kaiser and
Aidan N. Gomez and Aidan N. Gomez and
Fran{\c{c}}ois Chollet}, Fran{\c{c}}ois Chollet},
journal = {International Conference on Learning Representations}, publisher = {International Conference on Learning Representations},
year={2018}, year={2018},
} }
...@@ -5365,7 +5365,7 @@ author = {Yoshua Bengio and ...@@ -5365,7 +5365,7 @@ author = {Yoshua Bengio and
year = {2013} year = {2013}
} }
@article{Wu2016GooglesNM, @inproceedings{Wu2016GooglesNM,
title={Google's Neural Machine Translation System: Bridging the Gap between Human and Machine Translation}, title={Google's Neural Machine Translation System: Bridging the Gap between Human and Machine Translation},
author = {Yonghui Wu and author = {Yonghui Wu and
Mike Schuster and Mike Schuster and
...@@ -5398,7 +5398,7 @@ author = {Yoshua Bengio and ...@@ -5398,7 +5398,7 @@ author = {Yoshua Bengio and
Greg Corrado and Greg Corrado and
Macduff Hughes and Macduff Hughes and
Jeffrey Dean}, Jeffrey Dean},
journal = {CoRR}, publisher = {CoRR},
year={2016}, year={2016},
volume={abs/1609.08144} volume={abs/1609.08144}
} }
...@@ -5444,32 +5444,32 @@ author = {Yoshua Bengio and ...@@ -5444,32 +5444,32 @@ author = {Yoshua Bengio and
year={2013} year={2013}
} }
@article{Bengio2013AdvancesIO, @inproceedings{Bengio2013AdvancesIO,
title={Advances in optimizing recurrent networks}, title={Advances in optimizing recurrent networks},
author={Yoshua Bengio and Nicolas Boulanger-Lewandowski and Razvan Pascanu}, author={Yoshua Bengio and Nicolas Boulanger-Lewandowski and Razvan Pascanu},
journal={IEEE Transactions on Acoustics, Speech, and Signal Processing}, publisher={IEEE Transactions on Acoustics, Speech, and Signal Processing},
year={2013}, year={2013},
pages={8624-8628} pages={8624-8628}
} }
@article{JMLR:v15:srivastava14a, @inproceedings{JMLR:v15:srivastava14a,
author = {Nitish Srivastava and Geoffrey Hinton and Alex Krizhevsky and Ilya Sutskever and Ruslan Salakhutdinov}, author = {Nitish Srivastava and Geoffrey Hinton and Alex Krizhevsky and Ilya Sutskever and Ruslan Salakhutdinov},
title = {Dropout: A Simple Way to Prevent Neural Networks from Overfitting}, title = {Dropout: A Simple Way to Prevent Neural Networks from Overfitting},
journal = {Journal of Machine Learning Research}, publisher = {Journal of Machine Learning Research},
year = {2014}, year = {2014},
volume = {15}, volume = {15},
pages = {1929-1958}, pages = {1929-1958},
} }
@article{Chollet2017XceptionDL, @inproceedings{Chollet2017XceptionDL,
title={Xception: Deep Learning with Depthwise Separable Convolutions}, title={Xception: Deep Learning with Depthwise Separable Convolutions},
author = {Fran{\c{c}}ois Chollet}, author = {Fran{\c{c}}ois Chollet},
journal={IEEE Conference on Computer Vision and Pattern Recognition}, publisher={IEEE Conference on Computer Vision and Pattern Recognition},
year={2017}, year={2017},
pages={1800-1807} pages={1800-1807}
} }
@article{Howard2017MobileNetsEC, @inproceedings{Howard2017MobileNetsEC,
title={MobileNets: Efficient Convolutional Neural Networks for Mobile Vision Applications}, title={MobileNets: Efficient Convolutional Neural Networks for Mobile Vision Applications},
author = {Andrew Howard and author = {Andrew Howard and
Menglong Zhu and Menglong Zhu and
...@@ -5483,17 +5483,17 @@ author = {Yoshua Bengio and ...@@ -5483,17 +5483,17 @@ author = {Yoshua Bengio and
year={2017}, year={2017},
} }
@article{sifre2014rigid, @inproceedings{sifre2014rigid,
title={Rigid-motion scattering for image classification}, title={Rigid-motion scattering for image classification},
author={Sifre, Laurent and Mallat, St{\'e}phane}, author={Sifre, Laurent and Mallat, St{\'e}phane},
year={2014}, year={2014},
journal={Citeseer} publisher={Citeseer}
} }
@article{Taigman2014DeepFaceCT, @inproceedings{Taigman2014DeepFaceCT,
title={DeepFace: Closing the Gap to Human-Level Performance in Face Verification}, title={DeepFace: Closing the Gap to Human-Level Performance in Face Verification},
author={Yaniv Taigman and Ming Yang and Marc'Aurelio Ranzato and Lior Wolf}, author={Yaniv Taigman and Ming Yang and Marc'Aurelio Ranzato and Lior Wolf},
journal={IEEE Conference on Computer Vision and Pattern Recognition}, publisher={IEEE Conference on Computer Vision and Pattern Recognition},
year={2014}, year={2014},
pages={1701-1708} pages={1701-1708}
} }
...@@ -5511,7 +5511,7 @@ author = {Yoshua Bengio and ...@@ -5511,7 +5511,7 @@ author = {Yoshua Bengio and
year={2015} year={2015}
} }
@article{Chen2020DynamicCA, @inproceedings{Chen2020DynamicCA,
title={Dynamic Convolution: Attention Over Convolution Kernels}, title={Dynamic Convolution: Attention Over Convolution Kernels},
author = {Yinpeng Chen and author = {Yinpeng Chen and
Xiyang Dai and Xiyang Dai and
...@@ -5519,7 +5519,7 @@ author = {Yoshua Bengio and ...@@ -5519,7 +5519,7 @@ author = {Yoshua Bengio and
Dongdong Chen and Dongdong Chen and
Lu Yuan and Lu Yuan and
Zicheng Liu}, Zicheng Liu},
journal = {IEEE Conference on Computer Vision and Pattern Recognition}, publisher = {IEEE Conference on Computer Vision and Pattern Recognition},
year={2020}, year={2020},
pages={11027-11036} pages={11027-11036}
} }
...@@ -5579,7 +5579,7 @@ author = {Yoshua Bengio and ...@@ -5579,7 +5579,7 @@ author = {Yoshua Bengio and
year = {2018} year = {2018}
} }
@article{DBLP:journals/corr/abs-1802-05751, @inproceedings{DBLP:journals/corr/abs-1802-05751,
author = {Niki Parmar and author = {Niki Parmar and
Ashish Vaswani and Ashish Vaswani and
Jakob Uszkoreit and Jakob Uszkoreit and
...@@ -5587,7 +5587,7 @@ author = {Yoshua Bengio and ...@@ -5587,7 +5587,7 @@ author = {Yoshua Bengio and
Noam Shazeer and Noam Shazeer and
Alexander Ku}, Alexander Ku},
title = {Image Transformer}, title = {Image Transformer},
journal = {CoRR}, publisher = {CoRR},
volume = {abs/1802.05751}, volume = {abs/1802.05751},
year = {2018} year = {2018}
} }
...@@ -5612,17 +5612,17 @@ author = {Yoshua Bengio and ...@@ -5612,17 +5612,17 @@ author = {Yoshua Bengio and
year = {2020} year = {2020}
} }
@article{DBLP:journals/corr/abs-2004-05150, @inproceedings{DBLP:journals/corr/abs-2004-05150,
author = {Iz Beltagy and author = {Iz Beltagy and
Matthew E. Peters and Matthew E. Peters and
Arman Cohan}, Arman Cohan},
title = {Longformer: The Long-Document Transformer}, title = {Longformer: The Long-Document Transformer},
journal = {CoRR}, publisher = {CoRR},
volume = {abs/2004.05150}, volume = {abs/2004.05150},
year = {2020} year = {2020}
} }
@article{DBLP:journals/corr/abs-2005-00743, @inproceedings{DBLP:journals/corr/abs-2005-00743,
author = {Yi Tay and author = {Yi Tay and
Dara Bahri and Dara Bahri and
Donald Metzler and Donald Metzler and
...@@ -5630,7 +5630,7 @@ author = {Yoshua Bengio and ...@@ -5630,7 +5630,7 @@ author = {Yoshua Bengio and
Zhe Zhao and Zhe Zhao and
Che Zheng}, Che Zheng},
title = {Synthesizer: Rethinking Self-Attention in Transformer Models}, title = {Synthesizer: Rethinking Self-Attention in Transformer Models},
journal = {CoRR}, publisher = {CoRR},
volume = {abs/2005.00743}, volume = {abs/2005.00743},
year = {2020} year = {2020}
} }
...@@ -5691,10 +5691,10 @@ author = {Yoshua Bengio and ...@@ -5691,10 +5691,10 @@ author = {Yoshua Bengio and
pages = {770--778}, pages = {770--778},
year = {2016}, year = {2016},
} }
@article{JMLR:v15:srivastava14a, @inproceedings{JMLR:v15:srivastava14a,
author = {Nitish Srivastava and Geoffrey Hinton and Alex Krizhevsky and Ilya Sutskever and Ruslan Salakhutdinov}, author = {Nitish Srivastava and Geoffrey Hinton and Alex Krizhevsky and Ilya Sutskever and Ruslan Salakhutdinov},
title = {Dropout: A Simple Way to Prevent Neural Networks from Overfitting}, title = {Dropout: A Simple Way to Prevent Neural Networks from Overfitting},
journal = {Journal of Machine Learning Research}, publisher = {Journal of Machine Learning Research},
year = {2014}, year = {2014},
volume = {15}, volume = {15},
pages = {1929-1958}, pages = {1929-1958},
...@@ -5719,12 +5719,12 @@ author = {Yoshua Bengio and ...@@ -5719,12 +5719,12 @@ author = {Yoshua Bengio and
pages = {1789--1798}, pages = {1789--1798},
year = {2018}, year = {2018},
} }
@article{DBLP:journals/corr/CourbariauxB16, @inproceedings{DBLP:journals/corr/CourbariauxB16,
author = {Matthieu Courbariaux and author = {Matthieu Courbariaux and
Yoshua Bengio}, Yoshua Bengio},
title = {BinaryNet: Training Deep Neural Networks with Weights and Activations title = {BinaryNet: Training Deep Neural Networks with Weights and Activations
Constrained to +1 or -1}, Constrained to +1 or -1},
journal = {CoRR}, publisher = {CoRR},
volume = {abs/1602.02830}, volume = {abs/1602.02830},
year = {2016}, year = {2016},
} }
...@@ -5751,10 +5751,10 @@ author = {Yoshua Bengio and ...@@ -5751,10 +5751,10 @@ author = {Yoshua Bengio and
publisher = {Annual Meeting of the Association for Computational Linguistics}, publisher = {Annual Meeting of the Association for Computational Linguistics},
year = {2019} year = {2019}
} }
@article{Liu2020LearningTE, @inproceedings{Liu2020LearningTE,
title={Learning to Encode Position for Transformer with Continuous Dynamical Model}, title={Learning to Encode Position for Transformer with Continuous Dynamical Model},
author={Xuanqing Liu and Hsiang-Fu Yu and Inderjit Dhillon and Cho-Jui Hsieh}, author={Xuanqing Liu and Hsiang-Fu Yu and Inderjit Dhillon and Cho-Jui Hsieh},
journal={ArXiv}, publisher={ArXiv},
year={2020}, year={2020},
volume={abs/2003.09229} volume={abs/2003.09229}
} }
...@@ -5787,10 +5787,10 @@ author = {Yoshua Bengio and ...@@ -5787,10 +5787,10 @@ author = {Yoshua Bengio and
publisher = {Annual Meeting of the Association for Computational Linguistics}, publisher = {Annual Meeting of the Association for Computational Linguistics},
year = {2019}, year = {2019},
} }
@article{Wang2018MultilayerRF, @inproceedings{Wang2018MultilayerRF,
title={Multi-layer Representation Fusion for Neural Machine Translation}, title={Multi-layer Representation Fusion for Neural Machine Translation},
author={Qiang Wang and Fuxue Li and Tong Xiao and Yanyang Li and Yinqiao Li and Jingbo Zhu}, author={Qiang Wang and Fuxue Li and Tong Xiao and Yanyang Li and Yinqiao Li and Jingbo Zhu},
journal={International Conference on Computational Linguistics}, publisher={International Conference on Computational Linguistics},
year={2018}, year={2018},
volume={abs/2002.06714} volume={abs/2002.06714}
} }
...@@ -5863,27 +5863,27 @@ author = {Yoshua Bengio and ...@@ -5863,27 +5863,27 @@ author = {Yoshua Bengio and
year = {2018} year = {2018}
} }
@article{Kitaev2020ReformerTE, @inproceedings{Kitaev2020ReformerTE,
author = {Nikita Kitaev and author = {Nikita Kitaev and
Lukasz Kaiser and Lukasz Kaiser and
Anselm Levskaya}, Anselm Levskaya},
title = {Reformer: The Efficient Transformer}, title = {Reformer: The Efficient Transformer},
journal = {International Conference on Learning Representations}, publisher = {International Conference on Learning Representations},
year = {2020} year = {2020}
} }
@article{Lin2020WeightDT, @inproceedings{Lin2020WeightDT,
title={Weight Distillation: Transferring the Knowledge in Neural Network Parameters}, title={Weight Distillation: Transferring the Knowledge in Neural Network Parameters},
author={Ye Lin and Yanyang Li and Ziyang Wang and Bei Li and Quan Du and Tong Xiao and Jingbo Zhu}, author={Ye Lin and Yanyang Li and Ziyang Wang and Bei Li and Quan Du and Tong Xiao and Jingbo Zhu},
journal={ArXiv}, publisher={ArXiv},
year={2020}, year={2020},
volume={abs/2009.09152} volume={abs/2009.09152}
} }
@article{li2020shallow, @inproceedings{li2020shallow,
title={Shallow-to-Deep Training for Neural Machine Translation}, title={Shallow-to-Deep Training for Neural Machine Translation},
author={Li, Bei and Wang, Ziyang and Liu, Hui and Jiang, Yufan and Du, Quan and Xiao, Tong and Wang, Huizhen and Zhu, Jingbo}, author={Li, Bei and Wang, Ziyang and Liu, Hui and Jiang, Yufan and Du, Quan and Xiao, Tong and Wang, Huizhen and Zhu, Jingbo},
journal={Conference on Empirical Methods in Natural Language Processing}, publisher={Conference on Empirical Methods in Natural Language Processing},
year={2020} year={2020}
} }
%%%%% chapter 12------------------------------------------------------ %%%%% chapter 12------------------------------------------------------
...@@ -14049,50 +14049,50 @@ author = {Zhuang Liu and ...@@ -14049,50 +14049,50 @@ author = {Zhuang Liu and
year = {2017} year = {2017}
} }
@article{DBLP:journals/corr/abs-1805-01553, @inproceedings{DBLP:journals/corr/abs-1805-01553,
author = {Tsz Kin Lam and author = {Tsz Kin Lam and
Julia Kreutzer and Julia Kreutzer and
Stefan Riezler}, Stefan Riezler},
title = {A Reinforcement Learning Approach to Interactive-Predictive Neural title = {A Reinforcement Learning Approach to Interactive-Predictive Neural
Machine Translation}, Machine Translation},
journal = {CoRR}, publisher = {CoRR},
volume = {abs/1805.01553}, volume = {abs/1805.01553},
year = {2018} year = {2018}
} }
@article{DBLP:journals/mt/DomingoPC17, @inproceedings{DBLP:journals/mt/DomingoPC17,
author = {Miguel Domingo and author = {Miguel Domingo and
{\'{A}}lvaro Peris and {\'{A}}lvaro Peris and
Francisco Casacuberta}, Francisco Casacuberta},
title = {Segment-based interactive-predictive machine translation}, title = {Segment-based interactive-predictive machine translation},
journal = {Machine Translation}, publisher = {Machine Translation},
volume = {31}, volume = {31},
number = {4}, number = {4},
pages = {163--185}, pages = {163--185},
year = {2017} year = {2017}
} }
@article{Peris2017InteractiveNM, @inproceedings{Peris2017InteractiveNM,
title={Interactive neural machine translation}, title={Interactive neural machine translation},
author={{\'A}lvaro Peris and Miguel Domingo and F. Casacuberta}, author={{\'A}lvaro Peris and Miguel Domingo and F. Casacuberta},
journal={Computer Speech and Language}, publisher={Computer Speech and Language},
year={2017}, year={2017},
volume={45}, volume={45},
pages={201-220} pages={201-220}
} }
@article{DBLP:journals/csl/PerisC19, @inproceedings{DBLP:journals/csl/PerisC19,
author = {{\'{A}}lvaro Peris and author = {{\'{A}}lvaro Peris and
Francisco Casacuberta}, Francisco Casacuberta},
title = {Online learning for effort reduction in interactive neural machine title = {Online learning for effort reduction in interactive neural machine
translation}, translation},
journal = {Computer Speech Language}, publisher = {Computer Speech Language},
volume = {58}, volume = {58},
pages = {98--126}, pages = {98--126},
year = {2019} year = {2019}
} }
@article{DBLP:journals/coling/BarrachinaBCCCKLNTVV09, @inproceedings{DBLP:journals/coling/BarrachinaBCCCKLNTVV09,
author = {Sergio Barrachina and author = {Sergio Barrachina and
Oliver Bender and Oliver Bender and
Francisco Casacuberta and Francisco Casacuberta and
...@@ -14105,7 +14105,7 @@ author = {Zhuang Liu and ...@@ -14105,7 +14105,7 @@ author = {Zhuang Liu and
Enrique Vidal and Enrique Vidal and
Juan Miguel Vilar}, Juan Miguel Vilar},
title = {Statistical Approaches to Computer-Assisted Translation}, title = {Statistical Approaches to Computer-Assisted Translation},
journal = {Computer Linguistics}, publisher = {Computer Linguistics},
volume = {35}, volume = {35},
number = {1}, number = {1},
pages = {3--28}, pages = {3--28},
...@@ -14131,14 +14131,14 @@ author = {Zhuang Liu and ...@@ -14131,14 +14131,14 @@ author = {Zhuang Liu and
year = {2016} year = {2016}
} }
@article{DBLP:journals/corr/abs-2010-05680, @inproceedings{DBLP:journals/corr/abs-2010-05680,
author = {Jiarui Fang and author = {Jiarui Fang and
Yang Yu and Yang Yu and
Chengduo Zhao and Chengduo Zhao and
Jie Zhou}, Jie Zhou},
title = {TurboTransformers: An Efficient {GPU} Serving System For Transformer title = {TurboTransformers: An Efficient {GPU} Serving System For Transformer
Models}, Models},
journal = {CoRR}, publisher = {CoRR},
volume = {abs/2010.05680}, volume = {abs/2010.05680},
year = {2020} year = {2020}
} }
...@@ -14155,13 +14155,13 @@ author = {Zhuang Liu and ...@@ -14155,13 +14155,13 @@ author = {Zhuang Liu and
year = {2018} year = {2018}
} }
@article{DBLP:journals/corr/BolukbasiWDS17, @inproceedings{DBLP:journals/corr/BolukbasiWDS17,
author = {Tolga Bolukbasi and author = {Tolga Bolukbasi and
Joseph Wang and Joseph Wang and
Ofer Dekel and Ofer Dekel and
Venkatesh Saligrama}, Venkatesh Saligrama},
title = {Adaptive Neural Networks for Fast Test-Time Prediction}, title = {Adaptive Neural Networks for Fast Test-Time Prediction},
journal = {CoRR}, publisher = {CoRR},
volume = {abs/1702.07811}, volume = {abs/1702.07811},
year = {2017} year = {2017}
} }
...@@ -14188,13 +14188,13 @@ author = {Zhuang Liu and ...@@ -14188,13 +14188,13 @@ author = {Zhuang Liu and
year = {2020} year = {2020}
} }
@article{DBLP:journals/corr/abs-1912-00567, @inproceedings{DBLP:journals/corr/abs-1912-00567,
author = {Tao Wang and author = {Tao Wang and
Shaohui Kuang and Shaohui Kuang and
Deyi Xiong and Deyi Xiong and
Ant{\'{o}}nio Branco}, Ant{\'{o}}nio Branco},
title = {Merging External Bilingual Pairs into Neural Machine Translation}, title = {Merging External Bilingual Pairs into Neural Machine Translation},
journal = {CoRR}, publisher = {CoRR},
volume = {abs/1912.00567}, volume = {abs/1912.00567},
year = {2019} year = {2019}
} }
...@@ -14292,20 +14292,20 @@ author = {Zhuang Liu and ...@@ -14292,20 +14292,20 @@ author = {Zhuang Liu and
year = {2018} year = {2018}
} }
@article{barone2017regularization, @inproceedings{barone2017regularization,
title={Regularization techniques for fine-tuning in neural machine translation}, title={Regularization techniques for fine-tuning in neural machine translation},
author={Barone, Antonio Valerio Miceli and Haddow, Barry and Germann, Ulrich and Sennrich, Rico}, author={Barone, Antonio Valerio Miceli and Haddow, Barry and Germann, Ulrich and Sennrich, Rico},
journal={arXiv preprint arXiv:1707.09920}, publisher={arXiv preprint arXiv:1707.09920},
year={2017} year={2017}
} }
@article{DBLP:journals/corr/ChuDK17, @inproceedings{DBLP:journals/corr/ChuDK17,
author = {Chenhui Chu and author = {Chenhui Chu and
Raj Dabre and Raj Dabre and
Sadao Kurohashi}, Sadao Kurohashi},
title = {An Empirical Comparison of Simple Domain Adaptation Methods for Neural title = {An Empirical Comparison of Simple Domain Adaptation Methods for Neural
Machine Translation}, Machine Translation},
journal = {CoRR}, publisher = {CoRR},
volume = {abs/1701.03214}, volume = {abs/1701.03214},
year = {2017} year = {2017}
} }
...@@ -14393,7 +14393,7 @@ author = {Zhuang Liu and ...@@ -14393,7 +14393,7 @@ author = {Zhuang Liu and
year = {2010} year = {2010}
} }
@article{vilar2012jane, @inproceedings{vilar2012jane,
title={Jane: an advanced freely available hierarchical machine translation toolkit}, title={Jane: an advanced freely available hierarchical machine translation toolkit},
author={Vilar, David and Stein, Daniel and Huck, Matthias and Ney, Hermann}, author={Vilar, David and Stein, Daniel and Huck, Matthias and Ney, Hermann},
publisher={Machine Translation}, publisher={Machine Translation},
...@@ -14413,7 +14413,7 @@ author = {Zhuang Liu and ...@@ -14413,7 +14413,7 @@ author = {Zhuang Liu and
year = {2013} year = {2013}
} }
@article{al2016theano, @inproceedings{al2016theano,
author = {Rami Al-Rfou and author = {Rami Al-Rfou and
Guillaume Alain and Guillaume Alain and
Amjad Almahairi and Amjad Almahairi and
...@@ -14528,7 +14528,7 @@ author = {Zhuang Liu and ...@@ -14528,7 +14528,7 @@ author = {Zhuang Liu and
Ying Zhang}, Ying Zhang},
title = {Theano: {A} Python framework for fast computation of mathematical title = {Theano: {A} Python framework for fast computation of mathematical
expressions}, expressions},
journal = {CoRR}, publisher = {CoRR},
volume = {abs/1605.02688}, volume = {abs/1605.02688},
year = {2016} year = {2016}
} }
...@@ -14583,11 +14583,11 @@ author = {Zhuang Liu and ...@@ -14583,11 +14583,11 @@ author = {Zhuang Liu and
year = {2007} year = {2007}
} }
@article{och2003systematic, @inproceedings{och2003systematic,
author = {Franz Josef Och and author = {Franz Josef Och and
Hermann Ney}, Hermann Ney},
title = {A Systematic Comparison of Various Statistical Alignment Models}, title = {A Systematic Comparison of Various Statistical Alignment Models},
journal = {Computational Linguistics}, publisher = {Computational Linguistics},
volume = {29}, volume = {29},
number = {1}, number = {1},
pages = {19--51}, pages = {19--51},
...@@ -14661,7 +14661,7 @@ author = {Zhuang Liu and ...@@ -14661,7 +14661,7 @@ author = {Zhuang Liu and
year = {2016} year = {2016}
} }
@article{ZhangTHUMT, @inproceedings{ZhangTHUMT,
author = {Jiacheng Zhang and author = {Jiacheng Zhang and
Yanzhuo Ding and Yanzhuo Ding and
Shiqi Shen and Shiqi Shen and
...@@ -14670,7 +14670,7 @@ author = {Zhuang Liu and ...@@ -14670,7 +14670,7 @@ author = {Zhuang Liu and
Huan-Bo Luan and Huan-Bo Luan and
Yang Liu}, Yang Liu},
title = {{THUMT:} An Open Source Toolkit for Neural Machine Translation}, title = {{THUMT:} An Open Source Toolkit for Neural Machine Translation},
journal = {CoRR}, publisher = {CoRR},
volume = {abs/1706.06415}, volume = {abs/1706.06415},
year = {2017} year = {2017}
} }
...@@ -14694,7 +14694,7 @@ author = {Zhuang Liu and ...@@ -14694,7 +14694,7 @@ author = {Zhuang Liu and
year = {2018} year = {2018}
} }
@article{hieber2017sockeye, @inproceedings{hieber2017sockeye,
author = {Felix Hieber and author = {Felix Hieber and
Tobias Domhan and Tobias Domhan and
Michael Denkowski and Michael Denkowski and
...@@ -14703,7 +14703,7 @@ author = {Zhuang Liu and ...@@ -14703,7 +14703,7 @@ author = {Zhuang Liu and
Ann Clifton and Ann Clifton and
Matt Post}, Matt Post},
title = {Sockeye: {A} Toolkit for Neural Machine Translation}, title = {Sockeye: {A} Toolkit for Neural Machine Translation},
journal = {CoRR}, publisher = {CoRR},
volume = {abs/1712.05690}, volume = {abs/1712.05690},
year = {2017} year = {2017}
} }
...@@ -14719,7 +14719,7 @@ author = {Zhuang Liu and ...@@ -14719,7 +14719,7 @@ author = {Zhuang Liu and
year = {2018} year = {2018}
} }
@article{DBLP:journals/corr/abs-1805-10387, @inproceedings{DBLP:journals/corr/abs-1805-10387,
author = {Oleksii Kuchaiev and author = {Oleksii Kuchaiev and
Boris Ginsburg and Boris Ginsburg and
Igor Gitman and Igor Gitman and
...@@ -14728,12 +14728,12 @@ author = {Zhuang Liu and ...@@ -14728,12 +14728,12 @@ author = {Zhuang Liu and
Paulius Micikevicius}, Paulius Micikevicius},
title = {OpenSeq2Seq: extensible toolkit for distributed and mixed precision title = {OpenSeq2Seq: extensible toolkit for distributed and mixed precision
training of sequence-to-sequence models}, training of sequence-to-sequence models},
journal = {CoRR}, publisher = {CoRR},
volume = {abs/1805.10387}, volume = {abs/1805.10387},
year = {2018} year = {2018}
} }
@article{nmtpy2017, @inproceedings{nmtpy2017,
author = {Ozan Caglayan and author = {Ozan Caglayan and
Mercedes Garc{\'{\i}}a-Mart{\'{\i}}nez and Mercedes Garc{\'{\i}}a-Mart{\'{\i}}nez and
Adrien Bardet and Adrien Bardet and
...@@ -14742,7 +14742,7 @@ author = {Zhuang Liu and ...@@ -14742,7 +14742,7 @@ author = {Zhuang Liu and
Lo{\"{\i}}c Barrault}, Lo{\"{\i}}c Barrault},
title = {{NMTPY:} {A} Flexible Toolkit for Advanced Neural Machine Translation title = {{NMTPY:} {A} Flexible Toolkit for Advanced Neural Machine Translation
Systems}, Systems},
journal = {The Prague Bulletin of Mathematical Linguistics}, publisher = {The Prague Bulletin of Mathematical Linguistics},
volume = {109}, volume = {109},
pages = {15--28}, pages = {15--28},
year = {2017} year = {2017}
......
Markdown 格式
0%
您添加了 0 到此讨论。请谨慎行事。
请先完成此评论的编辑!
注册 或者 后发表评论