ref of chapter 9

3f391056 · 孟霞 · 7082772e · 3f391056
Commit 3f391056 authored Sep 09, 2020 by 孟霞
--- a/bibliography.bib
+++ b/bibliography.bib
@@ -3508,6 +3508,411 @@

 %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
 %%%%% chapter 9------------------------------------------------------
+@article{brown1992class,
+  title={Class-based n-gram models of natural language},
+  author={Brown and
+              Peter F and 
+              Desouza and
+              Peter V and 
+              Mercer amd 
+              Robert L 
+              and Pietra and
+              Vincent J Della 
+              and Lai and 
+              Jenifer C},
+  journal={Computational linguistics},
+  volume={18},
+  number={4},
+  pages={467--479},
+  year={1992},
+  publisher={MIT Press}
+}
+
+@inproceedings{mikolov2012context,
+  title={Context dependent recurrent neural network language model},
+  author={Mikolov and
+            Tomas and
+            Zweig and
+            Geoffrey},
+  booktitle={2012 IEEE Spoken Language Technology Workshop (SLT)},
+  pages={234--239},
+  year={2012},
+  organization={IEEE}
+}
+
+@article{zaremba2014recurrent,
+  title={Recurrent Neural Network Regularization},
+  author={Zaremba and 
+             Wojciech and 
+             Sutskever and 
+             Ilya and 
+             Vinyals and
+             Oriol},
+  journal={arXiv: Neural and Evolutionary Computing},
+  year={2014}
+}
+
+@article{zilly2016recurrent,
+  title={Recurrent Highway Networks},
+  author={Zilly and
+            Julian and
+            Srivastava and
+            Rupesh Kumar and
+            Koutnik and
+            Jan and 
+            Schmidhuber and
+            Jurgen},
+  journal={arXiv: Learning},
+  year={2016}
+}
+
+@article{merity2017regularizing,
+  title={Regularizing and optimizing LSTM language models},
+  author={Merity and
+             tephen and
+             Keskar and
+             Nitish Shirish and
+             Socher and
+             Richard},
+  journal={arXiv: Computation and Language},
+  year={2017}
+}
+
+@article{radford2019language,
+  title ={Language models are unsupervised multitask learners},
+  author ={Radford, Alec and Wu, Jeffrey and Child, Rewon and Luan, David and Amodei, Dario and Sutskever, Ilya},
+  journal ={OpenAI Blog},
+  volume ={1},
+  number ={8},
+  pages ={9},
+  year ={2019}
+}
+
+@article{baydin2017automatic,
+  title ={Automatic differentiation in machine learning: a survey},
+  author ={Baydin, At{\i}l{\i}m G{\"u}nes and Pearlmutter, Barak A and Radul, Alexey Andreyevich and Siskind, Jeffrey Mark},
+  journal ={The Journal of Machine Learning Research},
+  volume ={18},
+  number ={1},
+  pages ={5595--5637},
+  year ={2017},
+  publisher ={JMLR. org}
+}
+
+@article{qian1999momentum,
+  author    = {Ning Qian},
+  title     = {On the momentum term in gradient descent learning algorithms},
+  journal   = {Neural Networks},
+  volume    = {12},
+  number    = {1},
+  pages     = {145--151},
+  year      = {1999},
+  //url       = {https://doi.org/10.1016/S0893-6080(98)00116-6},
+  //doi       = {10.1016/S0893-6080(98)00116-6},
+  //timestamp = {Wed, 14 Nov 2018 10:30:22 +0100},
+  //biburl    = {https://dblp.org/rec/journals/nn/Qian99.bib},
+  //bibsource = {dblp computer science bibliography, https://dblp.org}
+}
+
+@article{duchi2011adaptive,
+  author    = {John C. Duchi and
+               Elad Hazan and
+               Yoram Singer},
+  title     = {Adaptive Subgradient Methods for Online Learning and Stochastic Optimization},
+  journal   = {Journal of Machine Learning Research},
+  volume    = {12},
+  pages     = {2121--2159},
+  year      = {2011},
+  //url       = {http://dl.acm.org/citation.cfm?id=2021068},
+  //timestamp = {Wed, 10 Jul 2019 15:28:02 +0200},
+  //biburl    = {https://dblp.org/rec/journals/jmlr/DuchiHS11.bib},
+  //bibsource = {dblp computer science bibliography, https://dblp.org}
+}
+
+@article{tieleman2012rmsprop,
+  title ={Lecture 6.5-rmsprop: Divide the gradient by a running average of its recent magnitude},
+  author ={Tieleman, Tijmen and Hinton, Geoffrey},
+  journal ={COURSERA: Neural networks for machine learning},
+  volume ={4},
+  number ={2},
+  pages ={26--31},
+  year ={2012}
+}
+
+@inproceedings{kingma2014adam,
+  author    = {Diederik P. Kingma and
+               Jimmy Ba},
+  //editor    = {Yoshua Bengio and
+               Yann LeCun},
+  title     = {Adam: {A} Method for Stochastic Optimization},
+  booktitle = {3rd International Conference on Learning Representations, {ICLR} 2015,
+               San Diego, CA, USA, May 7-9, 2015, Conference Track Proceedings},
+  year      = {2015},
+  //url       = {http://arxiv.org/abs/1412.6980},
+  //timestamp = {Thu, 25 Jul 2019 14:25:37 +0200},
+  //biburl    = {https://dblp.org/rec/journals/corr/KingmaB14.bib},
+  //bibsource = {dblp computer science bibliography, https://dblp.org}
+}
+
+@inproceedings{ioffe2015batch,
+  author    = {Sergey Ioffe and
+               Christian Szegedy},
+  //editor    = {Francis R. Bach and
+               David M. Blei},
+  title     = {Batch Normalization: Accelerating Deep Network Training by Reducing
+               Internal Covariate Shift},
+  booktitle = {Proceedings of the 32nd International Conference on Machine Learning,
+               {ICML} 2015, Lille, France, 6-11 July 2015},
+  series    = {{JMLR} Workshop and Conference Proceedings},
+  volume    = {37},
+  pages     = {448--456},
+  publisher = {JMLR.org},
+  year      = {2015},
+  //url       = {http://proceedings.mlr.press/v37/ioffe15.html},
+  //timestamp = {Wed, 29 May 2019 08:41:45 +0200},
+  //biburl    = {https://dblp.org/rec/conf/icml/IoffeS15.bib},
+  //bibsource = {dblp computer science bibliography, https://dblp.org}
+}
+
+@article{Ba2016LayerN,
+  author    = {Lei Jimmy Ba and
+               Jamie Ryan Kiros and
+               Geoffrey E. Hinton},
+  title     = {Layer Normalization},
+  journal   = {CoRR},
+  volume    = {abs/1607.06450},
+  year      = {2016},
+  //url       = {http://arxiv.org/abs/1607.06450},
+  //archivePrefix = {arXiv},
+  //eprint    = {1607.06450},
+  //timestamp = {Tue, 23 Jul 2019 17:33:23 +0200},
+  //biburl    = {https://dblp.org/rec/journals/corr/BaKH16.bib},
+  //bibsource = {dblp computer science bibliography, https://dblp.org}
+}
+
+@inproceedings{DBLP:journals/corr/HeZRS15,
+  author    = {Kaiming He and
+               Xiangyu Zhang and
+               Shaoqing Ren and
+               Jian Sun},
+  title     = {Deep Residual Learning for Image Recognition},
+  booktitle = {2016 {IEEE} Conference on Computer Vision and Pattern Recognition,
+               {CVPR} 2016, Las Vegas, NV, USA, June 27-30, 2016},
+  pages     = {770--778},
+  publisher = {{IEEE} Computer Society},
+  year      = {2016},
+  //url       = {https://doi.org/10.1109/CVPR.2016.90},
+  //doi       = {10.1109/CVPR.2016.90},
+  //timestamp = {Wed, 16 Oct 2019 14:14:50 +0200},
+  //biburl    = {https://dblp.org/rec/conf/cvpr/HeZRS16.bib},
+  //bibsource = {dblp computer science bibliography, https://dblp.org}
+}
+
+@inproceedings{mikolov2013distributed,
+  author    = {Tomas Mikolov and
+               Ilya Sutskever and
+               Kai Chen and
+               Gregory S. Corrado and
+               Jeffrey Dean},
+  //editor    = {Christopher J. C. Burges and
+               L{\'{e}}on Bottou and
+               Zoubin Ghahramani and
+               Kilian Q. Weinberger},
+  title     = {Distributed Representations of Words and Phrases and their Compositionality},
+  booktitle = {Advances in Neural Information Processing Systems 26: 27th Annual
+               Conference on Neural Information Processing Systems 2013. Proceedings
+               of a meeting held December 5-8, 2013, Lake Tahoe, Nevada, United States},
+  pages     = {3111--3119},
+  year      = {2013},
+  //url       = {http://papers.nips.cc/paper/5021-distributed-representations-of-words-and-phrases-and-their-compositionality},
+  //timestamp = {Fri, 06 Mar 2020 17:00:12 +0100},
+  //biburl    = {https://dblp.org/rec/conf/nips/MikolovSCCD13.bib},
+  //bibsource = {dblp computer science bibliography, https://dblp.org}
+}
+
+@article{guidotti2018survey,
+  author    = {Riccardo Guidotti and
+               Anna Monreale and
+               Salvatore Ruggieri and
+               Franco Turini and
+               Fosca Giannotti and
+               Dino Pedreschi},
+  title     = {A Survey of Methods for Explaining Black Box Models},
+  journal   = {ACM Computing Surveys},
+  volume    = {51},
+  number    = {5},
+  pages     = {93:1--93:42},
+  year      = {2019},
+  //url       = {https://doi.org/10.1145/3236009},
+  //doi       = {10.1145/3236009},
+  //timestamp = {Thu, 09 May 2019 16:06:21 +0200},
+  //biburl    = {https://dblp.org/rec/journals/csur/GuidottiMRTGP19.bib},
+  //bibsource = {dblp computer science bibliography, https://dblp.org}
+}
+
+@inproceedings{koh2017understanding,
+  author    = {Pang Wei Koh and
+               Percy Liang},
+  //editor    = {Doina Precup and
+               Yee Whye Teh},
+  title     = {Understanding Black-box Predictions via Influence Functions},
+  booktitle = {Proceedings of the 34th International Conference on Machine Learning,
+               {ICML} 2017, Sydney, NSW, Australia, 6-11 August 2017},
+  series    = {Proceedings of Machine Learning Research},
+  volume    = {70},
+  pages     = {1885--1894},
+  publisher = {{PMLR}},
+  year      = {2017},
+  //url       = {http://proceedings.mlr.press/v70/koh17a.html},
+  //timestamp = {Wed, 29 May 2019 08:41:45 +0200},
+  //biburl    = {https://dblp.org/rec/conf/icml/KohL17.bib},
+  //bibsource = {dblp computer science bibliography, https://dblp.org}
+}
+
+@inproceedings{arthur2016incorporating,
+  author    = {Philip Arthur and
+               Graham Neubig and
+               Satoshi Nakamura},
+  //editor    = {Jian Su and
+               Xavier Carreras and
+               Kevin Duh},
+  title     = {Incorporating Discrete Translation Lexicons into Neural Machine Translation},
+  booktitle = {Proceedings of the 2016 Conference on Empirical Methods in Natural
+               Language Processing, {EMNLP} 2016, Austin, Texas, USA, November 1-4,
+               2016},
+  pages     = {1557--1567},
+  publisher = {The Association for Computational Linguistics},
+  year      = {2016},
+  //url       = {https://doi.org/10.18653/v1/d16-1162},
+  //doi       = {10.18653/v1/d16-1162},
+  //timestamp = {Tue, 28 Jan 2020 10:28:31 +0100},
+  //biburl    = {https://dblp.org/rec/conf/emnlp/ArthurNN16.bib},
+  //bibsource = {dblp computer science bibliography, https://dblp.org}
+}
+
+@inproceedings{zollmann2006syntax,
+  author    = {Andreas Zollmann and
+               Ashish Venugopal},
+  //editor    = {Philipp Koehn and
+               Christof Monz},
+  title     = {Syntax Augmented Machine Translation via Chart Parsing},
+  booktitle = {Proceedings on the Workshop on Statistical Machine Translation, WMT@HLT-NAACL
+               2006, New York City, NY, USA, June 8-9, 2006},
+  pages     = {138--141},
+  publisher = {Association for Computational Linguistics},
+  year      = {2006},
+  //url       = {https://www.aclweb.org/anthology/W06-3119/},
+  //timestamp = {Fri, 13 Sep 2019 13:08:46 +0200},
+  //biburl    = {https://dblp.org/rec/conf/wmt/ZollmannV06.bib},
+  //bibsource = {dblp computer science bibliography, https://dblp.org}
+}
+
+@INPROCEEDINGS{charniak2003syntax,
+    author = {Eugene Charniak and Kevin Knight and Kenji Yamada},
+    title = {Syntax-based Language Models for Statistical Machine Translation},
+    booktitle = {In MT Summit IX. Intl. Assoc. for Machine Translation},
+    year = {2003}
+}
+
+@inproceedings{stahlberg2016syntactically,
+  author    = {Felix Stahlberg and
+               Eva Hasler and
+               Aurelien Waite and
+               Bill Byrne},
+  title     = {Syntactically Guided Neural Machine Translation},
+  booktitle = {Proceedings of the 54th Annual Meeting of the Association for Computational
+               Linguistics, {ACL} 2016, August 7-12, 2016, Berlin, Germany, Volume
+               2: Short Papers},
+  publisher = {The Association for Computer Linguistics},
+  year      = {2016},
+  //url       = {https://doi.org/10.18653/v1/p16-2049},
+  //doi       = {10.18653/v1/p16-2049},
+  //timestamp = {Tue, 28 Jan 2020 10:27:31 +0100},
+  //biburl    = {https://dblp.org/rec/conf/acl/StahlbergHWB16.bib},
+  //bibsource = {dblp computer science bibliography, https://dblp.org}
+}
+
+@inproceedings{plank2013embedding,
+  author    = {Barbara Plank and
+               Alessandro Moschitti},
+  title     = {Embedding Semantic Similarity in Tree Kernels for Domain Adaptation
+               of Relation Extraction},
+  booktitle = {Proceedings of the 51st Annual Meeting of the Association for Computational
+               Linguistics, {ACL} 2013, 4-9 August 2013, Sofia, Bulgaria, Volume
+               1: Long Papers},
+  pages     = {1498--1507},
+  publisher = {The Association for Computer Linguistics},
+  year      = {2013},
+  //url       = {https://www.aclweb.org/anthology/P13-1147/},
+  //timestamp = {Mon, 19 Aug 2019 18:10:05 +0200},
+  //biburl    = {https://dblp.org/rec/conf/acl/PlankM13.bib},
+  //bibsource = {dblp computer science bibliography, https://dblp.org}
+}
+
+@inproceedings{perozzi2014deepwalk,
+  author    = {Bryan Perozzi and
+               Rami Al{-}Rfou and
+               Steven Skiena},
+  //editor    = {Sofus A. Macskassy and
+               Claudia Perlich and
+               Jure Leskovec and
+               Wei Wang and
+               Rayid Ghani},
+  title     = {DeepWalk: online learning of social representations},
+  booktitle = {The 20th {ACM} {SIGKDD} International Conference on Knowledge Discovery
+               and Data Mining, {KDD} '14, New York, NY, {USA} - August 24 - 27,
+               2014},
+  pages     = {701--710},
+  publisher = {{ACM}},
+  year      = {2014},
+  //url       = {https://doi.org/10.1145/2623330.2623732},
+  //doi       = {10.1145/2623330.2623732},
+  //timestamp = {Sun, 02 Jun 2019 21:11:52 +0200},
+  //biburl    = {https://dblp.org/rec/conf/kdd/PerozziAS14.bib},
+  //bibsource = {dblp computer science bibliography, https://dblp.org}
+}
+
+@article{collobert2011natural,
+  author    = {Ronan Collobert and
+               Jason Weston and
+               L{\'{e}}on Bottou and
+               Michael Karlen and
+               Koray Kavukcuoglu and
+               Pavel P. Kuksa},
+  title     = {Natural Language Processing (Almost) from Scratch},
+  journal   = {Journal of Machine Learning Research},
+  volume    = {12},
+  pages     = {2493--2537},
+  year      = {2011},
+  //url       = {http://dl.acm.org/citation.cfm?id=2078186},
+  //timestamp = {Wed, 10 Jul 2019 15:28:44 +0200},
+  //biburl    = {https://dblp.org/rec/journals/jmlr/CollobertWBKKK11.bib},
+  //bibsource = {dblp computer science bibliography, https://dblp.org}
+}
+
+@inproceedings{mccann2017learned,
+  author    = {Bryan McCann and
+               James Bradbury and
+               Caiming Xiong and
+               Richard Socher},
+  //editor    = {Isabelle Guyon and
+               Ulrike von Luxburg and
+               Samy Bengio and
+               Hanna M. Wallach and
+               Rob Fergus and
+               S. V. N. Vishwanathan and
+               Roman Garnett},
+  title     = {Learned in Translation: Contextualized Word Vectors},
+  booktitle = {Advances in Neural Information Processing Systems 30: Annual Conference
+               on Neural Information Processing Systems 2017, 4-9 December 2017,
+               Long Beach, CA, {USA}},
+  pages     = {6294--6305},
+  year      = {2017},
+  //url       = {http://papers.nips.cc/paper/7209-learned-in-translation-contextualized-word-vectors},
+  //timestamp = {Fri, 06 Mar 2020 16:57:53 +0100},
+  //biburl    = {https://dblp.org/rec/conf/nips/McCannBXS17.bib},
+  //bibsource = {dblp computer science bibliography, https://dblp.org}
+}

 %%%%% chapter 9------------------------------------------------------
 %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%