Commit a727e58c by 曹润柘

合并分支 'master' 到 'caorunzhe'

Master

查看合并请求 !40
parents ad5bed97 9f6e49e0
......@@ -6,7 +6,7 @@
\begin{tikzpicture}
\begin{scope}[scale=1.0,xshift=0.9in,yshift=-0.87in,level distance=20pt,sibling distance=-1pt,grow'=up]
\begin{scope}[scale=1.0,level distance=30pt,sibling distance=15pt,grow'=up]
{
\Tree[.\node(sn0){IP};
[.\node(sn1){NP};
......
This source diff could not be displayed because it is too large. You can view the blob instead.
......@@ -6,13 +6,13 @@
\begin{scope}[minimum height = 20pt]
\node [anchor=east] (x1) at (-0.5em, 0) {$x_l$};
\node [anchor=west,draw=green,fill=green!20,inner xsep=5pt] (F1) at ([xshift=2em]x1.east){$\mathcal{F}$};
\node [anchor=west,draw=green,fill=green!20,inner xsep=5pt] (F1) at ([xshift=2em]x1.east){$\textrm{F}$};
\node [anchor=west,circle,draw,minimum size=1em] (n1) at ([xshift=2em]F1.east) {};
\node [anchor=west,draw=green,fill=green!20,inner xsep=5pt] (ln1) at ([xshift=2em]n1.east){\textrm{LN}};
\node [anchor=west] (x2) at ([xshift=2em]ln1.east) {$x_{l+l}$};
\node [anchor=north] (x3) at ([yshift=-5em]x1.south) {$x_l$};
\node [anchor=west,draw=green,fill=green!20,inner xsep=5pt] (F2) at ([xshift=2em]x3.east){$\mathcal{F}$};
\node [anchor=west,draw=green,fill=green!20,inner xsep=5pt] (F2) at ([xshift=2em]x3.east){$\textrm{F}$};
\node [anchor=west,draw=green,fill=green!20,inner xsep=5pt] (ln2) at ([xshift=2em]F2.east){\textrm{LN}};
\node [anchor=west,circle,draw,,minimum size=1em] (n2) at ([xshift=2em]ln2.east){};
\node [anchor=west] (x4) at ([xshift=2em]n2.east) {$x_{l+l}$};
......
......@@ -4,21 +4,21 @@
\node [anchor=north,rectangle, inner sep=0mm,minimum height=1.2em,minimum width=2em,rounded corners=5pt,thick] (n1) at (0, 0) {编码端};
\node [anchor=west,rectangle, inner sep=0mm,minimum height=1.2em,minimum width=0em,rounded corners=5pt,thick] (n2) at ([xshift=3.5em,yshift=-0.5em]n1.east) {$z_0$};
\node [anchor=west,rectangle, inner sep=0mm,minimum height=1.2em,minimum width=0em,rounded corners=5pt,thick] (n2) at ([xshift=3.5em,yshift=-0.5em]n1.east) {$x_1$};
\node [anchor=west,rectangle,draw, inner sep=0mm,minimum height=1.2em,minimum width=3em,fill=orange!20,rounded corners=5pt,thick] (n3) at ([xshift=3.5em,yshift=0em]n2.east) {$z_1$};
\node [anchor=west,rectangle,draw, inner sep=0mm,minimum height=1.2em,minimum width=3em,fill=orange!20,rounded corners=5pt,thick] (n3) at ([xshift=3.5em,yshift=0em]n2.east) {$x_2$};
\node [anchor=west,rectangle,draw, inner sep=0mm,minimum height=1.2em,minimum width=3em,fill=orange!20,rounded corners=5pt,thick] (n4) at ([xshift=3.5em,yshift=0em]n3.east) {$z_2$};
\node [anchor=west,rectangle,draw, inner sep=0mm,minimum height=1.2em,minimum width=3em,fill=orange!20,rounded corners=5pt,thick] (n4) at ([xshift=3.5em,yshift=0em]n3.east) {$x_3$};
\node [anchor=west,rectangle, inner sep=0mm,minimum height=1.2em,minimum width=1em,rounded corners=5pt,thick] (n6) at ([xshift=1.5em,yshift=0em]n4.east) {$\ldots$};
\node [anchor=west,rectangle,draw, inner sep=0mm,minimum height=1.2em,minimum width=3em,fill=orange!20,rounded corners=5pt,thick] (n5) at ([xshift=3.5em,yshift=0em]n6.east) {$z_{l}$};
\node [anchor=west,rectangle,draw, inner sep=0mm,minimum height=1.2em,minimum width=3em,fill=orange!20,rounded corners=5pt,thick] (n5) at ([xshift=3.5em,yshift=0em]n6.east) {$x_{l+1}$};
\node [anchor=west,rectangle,draw, inner sep=0mm,minimum height=1.2em,minimum width=3em,fill=orange!20,rounded corners=5pt,thick] (n7) at ([xshift=1.5em,yshift=0em]n5.east) {$z_{l+1}$};
\node [anchor=west,rectangle,draw, inner sep=0mm,minimum height=1.2em,minimum width=3em,fill=orange!20,rounded corners=5pt,thick] (n7) at ([xshift=1.5em,yshift=0em]n5.east) {$x_{l+2}$};
\node [anchor=north,rectangle,draw, inner sep=0mm,minimum height=1.2em,minimum width=15em,fill=teal!17,rounded corners=5pt,thick] (n8) at ([xshift=0em,yshift=-3em]n4.south) {层正则化};
\node [anchor=north,rectangle,draw, inner sep=0mm,minimum height=1.2em,minimum width=15em,fill=purple!17,rounded corners=5pt,thick] (n9) at ([xshift=0em,yshift=-1em]n8.south) {$L_0\ \quad L_1\ \quad L_2\quad \ldots \quad\ L_l$};
\node [anchor=north,rectangle,draw, inner sep=0mm,minimum height=1.2em,minimum width=15em,fill=purple!17,rounded corners=5pt,thick] (n9) at ([xshift=0em,yshift=-1em]n8.south) {$z_0\ \quad z_1\ \quad z_2\quad \ldots \quad\ z_l$};
\node [anchor=north,rectangle,draw, inner sep=0mm,minimum height=1.2em,minimum width=15em,fill=teal!17,rounded corners=5pt,thick] (n10) at ([xshift=0em,yshift=-2em]n9.south) {权重累加};
......
......@@ -22,10 +22,10 @@
\node [anchor=west,fill=orange!20,draw=red,rounded corners=3pt,minimum height=1.4em,minimum width=1.4em,dashed] (s44) at ([xshift=1.5em]s43.east) {$\times h$};
\node [anchor=west,fill=blue!20,draw=blue,rounded corners=3pt,minimum height=1.4em,minimum width=1.5em] (s45) at ([xshift=1.5em]s44.east) {};
\node [anchor=east] (p1) at ([xshift=-2em]s11.west) {step1};
\node [anchor=east] (p2) at ([xshift=-2em]s21.west) {step2};
\node [anchor=east] (p3) at ([xshift=-2em]s31.west) {step3};
\node [anchor=east] (p4) at ([xshift=-2em]s41.west) {step4};
\node [anchor=east] (p1) at ([xshift=-2em]s11.west) {step 1};
\node [anchor=east] (p2) at ([xshift=-2em]s21.west) {step 2};
\node [anchor=east] (p3) at ([xshift=-2em]s31.west) {step 3};
\node [anchor=east] (p4) at ([xshift=-2em]s41.west) {step 4};
\node [anchor=south,fill=orange!20,draw=orange,rounded corners=3pt,minimum height=1.4em,minimum width=1.4em] (b1) at ([xshift=-0.2em,yshift=2em]p1.north) {};
\node [anchor=west] (b2) at (b1.east) {:编码器};
......
% !Mode:: "TeX:UTF-8"
% !TEX encoding = UTF-8 Unicode
\newpage
%% Saved with string encoding Western (ASCII)
@article{Aho-Ullman:1969:JCSS,
author = {Aho, Alfred V. and Ullman, Jeffrey D.},
title = {Syntax directed translations and the pushdown assembler},
journal = {Journal of Computer and System Sciences},
pages = {37--57},
volume = {3},
year = {1969}
}
@article{alshawi-EtAl:2000:CL,
author = {Alshawi, Hiyan and Bangalore, Srinivas and Douglas, Shona},
title = {Learning dependency translation models as collections of finite state head transducers},
journal = {Computational Linguistics},
mumber = {1},
pages = {45--60},
volume = {26},
year = {2000}
}
@article{brown-EtAl:1993:CL,
author = {Brown, Peter E. and Pietra, Stephen A. Della and Pietra, Vincent J. Della and Mercer, Robert L.},
title = {The Mathematics of Statistical Machine Translation: Parameter Estimation },
journal = {Computational Linguistics},
mumber = {2},
pages = {263--311},
volume = {19},
year = {1993}
}
@article{bergert-EtAl:1996:CL,
author = {Bergert, Adam L. and Pietra, Vincent J. Della and Pietra, Stephen A. Della},
title = {A maximum entropy approach to natural language processing},
journal = {Computational Linguistics},
mumber = {1},
pages = {39--71},
volume = {22},
year = {1996}
}
@InProceedings{cer-EtAl:2010:DEMO,
author = {Cer, Daniel and Galley, Michel and Jurafsky, Daniel and Manning, Christopher D.},
title = {Phrasal: A Statistical Machine Translation Toolkit for Exploring New Model Features},
booktitle = {Proceedings of the NAACL HLT 2010 Demonstration Session},
month = {June},
year = {2010},
address = {Los Angeles, California},
publisher = {Association for Computational Linguistics},
pages = {9--12},
url = {http://www.aclweb.org/anthology/N10-2003}
}
@InProceedings{chiang:2005:ACL,
author = {Chiang, David},
title = {A Hierarchical Phrase-Based Model for Statistical Machine Translation},
booktitle = {Proceedings of the 43rd Annual Meeting of the Association for Computational Linguistics (ACL'05)},
month = {June},
year = {2005},
address = {Ann Arbor, Michigan},
publisher = {Association for Computational Linguistics},
pages = {263--270},
url = {http://www.aclweb.org/anthology/P05-1033},
doi = {10.3115/1219840.1219873}
}
@InProceedings{chiang-kevin:2006:ACL,
author = {Chiang, David and Knight, Kevin},
title = {An introduction to synchronous grammars},
booktitle = {Proceedings of the 44rd Annual Meeting of the Association for Computational Linguistics (ACL'05)},
year = {2006},
publisher = {Association for Computational Linguistics}
}
@article{chiang:2007:CL,
author = {David Chiang},
title = {Hierarchical Phrase-Based Translation},
journal = {Computational Linguistics},
mumber = {2},
pages = {45--60},
volume = {33},
year = {2007}
}
@InProceedings{chiang-EtAl:2008:EMNLP,
author = {Chiang, David and DeNeefe, Steve and Chan, Yee Seng and Ng, Hwee Tou},
title = {Decomposability of Translation Metrics for Improved Evaluation and Efficient Algorithms},
booktitle = {Proceedings of the 2008 Conference on Empirical Methods in Natural Language Processing},
month = {October},
year = {2008},
address = {Honolulu, Hawaii},
publisher = {Association for Computational Linguistics},
pages = {610--619},
url = {http://www.aclweb.org/anthology/D08-1064}
}
@InProceedings{denero-EtAl:2010:NAACLHLT,
author = {DeNero, John and Kumar, Shankar and Chelba, Ciprian and Och, Franz},
title = {Model Combination for Machine Translation},
booktitle = {Human Language Technologies: The 2010 Annual Conference of the North American Chapter of the Association for Computational Linguistics},
month = {June},
year = {2010},
address = {Los Angeles, California},
publisher = {Association for Computational Linguistics},
pages = {975--983},
url = {http://www.aclweb.org/anthology/N10-1141}
}
@InProceedings{dyer-EtAl:2010:Demos,
author = {Dyer, Chris and Lopez, Adam and Ganitkevitch, Juri and Weese, Jonathan and Ture, Ferhan and Blunsom, Phil and Setiawan, Hendra and Eidelman, Vladimir and Resnik, Philip},
title = {cdec: A Decoder, Alignment, and Learning Framework for Finite-State and Context-Free Translation Models},
booktitle = {Proceedings of the ACL 2010 System Demonstrations},
month = {July},
year = {2010},
address = {Uppsala, Sweden},
publisher = {Association for Computational Linguistics},
pages = {7--12},
url = {http://www.aclweb.org/anthology/P10-4002}
}
@InProceedings{eisner:2003:ACL-companion,
author = {Jason Eisner},
title = {Learning Non-Isomorphic Tree Mappings for Machine Translation},
booktitle = {The Companion Volume to the Proceedings of 41st Annual Meeting of the Association for Computational Linguistics},
month = {July},
year = {2003},
address = {Sapporo, Japan},
publisher = {Association for Computational Linguistics},
pages = {205--208},
url = {http://www.aclweb.org/anthology/P03-2039},
doi = {10.3115/1075178.1075217}
}
@InProceedings{galley-EtAl:2006:COLACL,
author = {Galley, Michel and Graehl, Jonathan and Knight, Kevin and Marcu, Daniel and DeNeefe, Steve and Wang, Wei and Thayer, Ignacio},
title = {Scalable Inference and Training of Context-Rich Syntactic Translation Models},
booktitle = {Proceedings of the 21st International Conference on Computational Linguistics and 44th Annual Meeting of the Association for Computational Linguistics},
month = {July},
year = {2006},
address = {Sydney, Australia},
publisher = {Association for Computational Linguistics},
pages = {961--968},
url = {http://www.aclweb.org/anthology/P06-1121},
doi = {10.3115/1220175.1220296}
}
@InProceedings{galley-manning:2008:EMNLP,
author = {Galley, Michel and Manning, Christopher D.},
title = {A Simple and Effective Hierarchical Phrase Reordering Model},
booktitle = {Proceedings of the 2008 Conference on Empirical Methods in Natural Language Processing},
month = {October},
year = {2008},
address = {Honolulu, Hawaii},
publisher = {Association for Computational Linguistics},
pages = {848--856},
url = {http://www.aclweb.org/anthology/D08-1089}
}
@InProceedings{huang-chiang:2005:IWPT,
author = {Huang, Liang and Chiang, David},
title = {Better k-best Parsing},
booktitle = {Proceedings of the Ninth International Workshop on Parsing Technology},
month = {October},
year = {2005},
address = {Vancouver, British Columbia},
publisher = {Association for Computational Linguistics},
pages = {53--64},
url = {http://www.aclweb.org/anthology/W/W05/W05-1506}
}
@inproceedings{koehn-EtAl:2003:NAACL,
author = {Koehn, Philipp and Och, Franz and Marcu, Daniel},
title = {Statistical Phrase-Based Translation},
booktitle = {Proceedings of the 2003 Human Language Technology Conference of the North American Chapter of the Association for Computational Linguistics},
month = {June},
year = {2003},
address = {Edmonton}
publisher = {The North American Chapter of the Association for Computational Linguistics},
pages = {48--54},
url = {http://aclweb.org/anthology-new/N/N03/N03-1017.pdf}
}
@inproceedings{koehn-EtAl:2007:PosterDemo,
author = {Koehn, Philipp and Hoang, Hieu and Birch, Alexandra and Callison-Burch, Chris and Federico, Marcello and Bertoldi, Nicola and Cowan, Brooke and Shen, Wade and Moran, Christine and Zens, Richard and Dyer, Chris and Bojar, Ondrej and Constantin, Alexandra and Herbst, Evan},
title = {Moses: Open Source Toolkit for Statistical Machine Translation},
booktitle = {Proceedings of the 45th Annual Meeting of the Association for Computational Linguistics Companion Volume Proceedings of the Demo and Poster Sessions},
month = {June},
year = {2007},
address = {Prague, Czech Republic},
publisher = {Association for Computational Linguistics},
pages = {177--180},
url = {http://www.aclweb.org/anthology/P07-2045}
}
@book{koehn:2010:smt,
author = {Koehn, Philipp},
title = {Statistical Machine Translation},
booktitle = {Statistical Machine Translation},
year = {2010},
publisher = {Cambridge University Press},
}
@InProceedings{li-EtAl:2009:WMT1,
author = {Li, Zhifei and Callison-Burch, Chris and Dyer, Chris and Khudanpur, Sanjeev and Schwartz, Lane and Thornton, Wren and Weese, Jonathan and Zaidan, Omar},
title = {{Joshua}: An Open Source Toolkit for Parsing-Based Machine Translation},
booktitle = {Proceedings of the Fourth Workshop on Statistical Machine Translation},
month = {March},
year = {2009},
address = {Athens, Greece},
publisher = {Association for Computational Linguistics},
pages = {135--139},
url = {http://www.aclweb.org/anthology/W09-0424}
}
@article{lopez:2008:ACMComputing,
author = {Lopez, Adam},
title = {Statistical Machine Translation},
journal = {ACM Computing Surveys},
mumber = {3},
pages = {1--49},
volume = {40},
year = {2008}
}
@InProceedings{mi-huang-liu:2008:ACLMain,
author = {Mi, Haitao and Huang, Liang and Liu, Qun},
title = {Forest-Based Translation},
booktitle = {Proceedings of ACL-08: HLT},
month = {June},
year = {2008},
address = {Columbus, Ohio},
publisher = {Association for Computational Linguistics},
pages = {192--199},
url = {http://www.aclweb.org/anthology/P/P08/P08-1023}
}
@InProceedings{marcu-wong:2002:EMNLP02,
author = {Marcu, Daniel and Wong, Daniel},
title = {A Phrase-Based,Joint Probability Model for Statistical Machine Translation},
booktitle = {Proceedings of the 2002 Conference on Empirical Methods in Natural Language Processing},
month = {July},
year = {2002},
publisher = {Association for Computational Linguistics},
pages = {133--139},
url = {http://www.aclweb.org/anthology/W02-1018},
doi = {10.3115/1118693.1118711}
}
@InProceedings{och-ney:2002:ACL,
author = {Och, Franz and Ney, Hermann},
title = {Discriminative Training and Maximum Entropy Models for Statistical Machine Translation},
booktitle = {Proceedings of 40th Annual Meeting of the Association for Computational Linguistics},
month = {July},
year = {2002},
address = {Philadelphia, Pennsylvania, USA},
publisher = {Association for Computational Linguistics},
pages = {295--302},
url = {http://www.aclweb.org/anthology/P02-1038},
doi = {10.3115/1073083.1073133}
}
@InProceedings{och:2003:ACL,
author = {Och, Franz},
title = {Minimum Error Rate Training in Statistical Machine Translation},
booktitle = {Proceedings of the 41st Annual Meeting of the Association for Computational Linguistics},
month = {July},
year = {2003},
address = {Sapporo, Japan},
publisher = {Association for Computational Linguistics},
pages = {160--167},
url = {http://www.aclweb.org/anthology/P03-1021},
doi = {10.3115/1075096.1075117}
}
@InProceedings{papineni-EtAl:2002:ACL,
author = {Papineni, Kishore and Roukos, Salim and Ward, Todd and Zhu, Wei-Jing},
title = {Bleu: a Method for Automatic Evaluation of Machine Translation},
booktitle = {Proceedings of 40th Annual Meeting of the Association for Computational Linguistics},
month = {July},
year = {2002},
address = {Philadelphia, Pennsylvania, USA},
publisher = {Association for Computational Linguistics},
pages = {311--318},
url = {http://www.aclweb.org/anthology/P02-1040},
doi = {10.3115/1073083.1073135}
}
@inproceedings{tillman:2004:HLTNAACL,
author = {Tillman, Christoph},
title = {A Unigram Orientation Model for Statistical Machine Translation},
booktitle = {HLT-NAACL 2004: Short Papers},
editor = {Susan Dumais, Daniel Marcu and Salim Roukos},
year = 2004,
month = {May 2 - May 7},
address = {Boston, Massachusetts, USA},
publisher = {Association for Computational Linguistics},
pages = {101--104}
}
@InProceedings{vilar-EtAl:2010:WMT,
author = {Vilar, David and Stein, Daniel and Huck, Matthias and Ney, Hermann},
title = {Jane: Open Source Hierarchical Translation, Extended with Reordering and Lexicon Models},
booktitle = {Proceedings of the Joint Fifth Workshop on Statistical Machine Translation and MetricsMATR},
month = {July},
year = {2010},
address = {Uppsala, Sweden},
publisher = {Association for Computational Linguistics},
pages = {262--270},
url = {http://www.aclweb.org/anthology/W10-1738}
}
@article{wu:1997:CL,
author = {Wu, Dekai},
title = {Stochastic inversion transduction grammars and bilingual parsing of parallel corpora},
journal = {Computational Linguistics},
mumber = {3},
pages = {377--404},
volume = {23},
year = {1997}
}
@InProceedings{xiao-EtAl:2009:CWMT,
author = {Xiao, Tong and Chen, Rushan and Li, Tianning and Zhu, Muhua and Zhu, Jingbo and Wang, Huizhen and Ren, Feiliang},
title = {NEUTrans: a Phrase-Based SMT System for CWMT2009},
booktitle = {Proceedings of the 5th China Workshop on Machine Translation},
month = {Sep},
year = {2009},
address = {Nanjing, China},
publisher = {CWMT},
url = {http://www.icip.org.cn/cwmt2009/downloads/papers/6.pdf}
}
@InProceedings{xiao-EtAl:2011:CWMT,
author = {Xiao, Tong and Zhang, Hao and Li, Qiang and Lu, Qi and Zhu, Jingbo and Ren, Feiliang and Wang, Huizhen},
title = {The NiuTrans Machine Translation System for CWMT2011},
booktitle = {Proceedings of the 6th China Workshop on Machine Translation},
month = {August},
year = {2011},
address = {Xiamen, China},
publisher = {CWMT},
}
@InProceedings{xiao-EtAl:2011:NTCIR,
author = {Xiao, Tong and Li, Qiang and Lu, Qi and Zhang, Hao and Ding, Haibo and Yao, Shujie and Xu, Xiaoming and Fei, Xiaoxu and Zhu, Jingbo and Ren, Feiliang and Wang, Huizhen},
title = {The NiuTrans Machine Translation System for NTCIR-9 PatentMT},
booktitle = {Proceedings of the NTCIR-9 Workshop Meeting},
month = {Dec},
year = {2011},
address = {Tokyo, Japan},
publisher = {NTCIR},
pages = {593--599},
url = {http://research.nii.ac.jp/ntcir/workshop/OnlineProceedings9/NTCIR/04-NTCIR9-PATENTMT-XiaoT.pdf}
}
@InProceedings{xiong-liu-lin:2006:COLACL,
author = {Xiong, Deyi and Liu, Qun and Lin, Shouxun},
title = {Maximum Entropy Based Phrase Reordering Model for Statistical Machine Translation},
booktitle = {Proceedings of the 21st International Conference on Computational Linguistics and 44th Annual Meeting of the Association for Computational Linguistics},
month = {July},
year = {2006},
address = {Sydney, Australia},
publisher = {Association for Computational Linguistics},
pages = {521--528},
url = {http://www.aclweb.org/anthology/P06-1066},
doi = {10.3115/1220175.1220241}
}
@InProceedings{zhang-EtAl:2006:HLT-NAACL06-Main,
author = {Zhang, Hao and Huang, Liang and Gildea, Daniel and Knight, Kevin},
title = {Synchronous Binarization for Machine Translation},
booktitle = {Proceedings of the Human Language Technology Conference of the NAACL, Main Conference},
month = {June},
year = {2006},
address = {New York City, USA},
publisher = {Association for Computational Linguistics},
pages = {256--263},
url = {http://www.aclweb.org/anthology/N/N06/N06-1033}
}
@InProceedings{zollmann-venugopal:2006:WMT,
author = {Zollmann, Andreas and Venugopal, Ashish},
title = {Syntax Augmented Machine Translation via Chart Parsing},
booktitle = {Proceedings on the Workshop on Statistical Machine Translation},
month = {June},
year = {2006},
address = {New York City},
publisher = {Association for Computational Linguistics},
pages = {138--141},
url = {http://www.aclweb.org/anthology/W/W06/W06-3119}
}
%\bibliographystyle{plainnat}
%\bibliographystyle{FG-bibstyle}
%\addcontentsline{toc}{chapter}{Bibliography}
%----------------------------------------------------------------------------------------
% PART
%----------------------------------------------------------------------------------------
\part{哈哈哈哈哈~~爽朗的笑声}
%----------------------------------------------------------------------------------------
% CHAPTER 1
%----------------------------------------------------------------------------------------
\chapterimage{chapter_head_2.pdf} % Chapter heading image
\chapter{我是姜英俊}
\section{我姓姜,我很英俊}\index{Paragraphs of Text}
\lipsum[1-7] % Dummy text
%------------------------------------------------
\section{再说一遍?}\index{Citation}
This statement requires citation \cite{article_key}; this one is more specific \cite[162]{book_key}.
%------------------------------------------------
\section{咋的?}\index{Lists}
Lists are useful to present information in a concise and/or ordered way\footnote{Footnote example...}.
\subsection{我问你,你敢再说一遍不?}\index{Lists!Numbered List}
\begin{enumerate}
\item The first item
\item The second item
\item The third item
\end{enumerate}
\subsection{咋的?}\index{Lists!Bullet Points}
\begin{itemize}
\item The first item
\item The second item
\item The third item
\end{itemize}
\subsection{我靠,你还问我,我问你呢}\index{Lists!Descriptions and Definitions}
\begin{description}
\item[Name] Description
\item[Word] Definition
\item[Comment] Elaboration
\end{description}
\ No newline at end of file
%----------------------------------------------------------------------------------------
% CHAPTER 2
%----------------------------------------------------------------------------------------
\chapterimage{chapter_head_2.pdf} % Chapter heading image
\chapter{In-text Elements}
\section{Theorems}\index{Theorems}
This is an example of theorems.
\subsection{Several equations}\index{Theorems!Several Equations}
This is a theorem consisting of several equations.
\begin{theorem}[Name of the theorem]
In $E=\mathbb{R}^n$ all norms are equivalent. It has the properties:
\begin{align}
& \big| ||\mathbf{x}|| - ||\mathbf{y}|| \big|\leq || \mathbf{x}- \mathbf{y}||\\
& ||\sum_{i=1}^n\mathbf{x}_i||\leq \sum_{i=1}^n||\mathbf{x}_i||\quad\text{where $n$ is a finite integer}
\end{align}
\end{theorem}
\subsection{Single Line}\index{Theorems!Single Line}
This is a theorem consisting of just one line.
\begin{theorem}
A set $\mathcal{D}(G)$ in dense in $L^2(G)$, $|\cdot|_0$.
\end{theorem}
%------------------------------------------------
\section{Definitions}\index{Definitions}
This is an example of a definition. A definition could be mathematical or it could define a concept.
\begin{definition}[Definition name]
Given a vector space $E$, a norm on $E$ is an application, denoted $||\cdot||$, $E$ in $\mathbb{R}^+=[0,+\infty[$ such that:
\begin{align}
& ||\mathbf{x}||=0\ \Rightarrow\ \mathbf{x}=\mathbf{0}\\
& ||\lambda \mathbf{x}||=|\lambda|\cdot ||\mathbf{x}||\\
& ||\mathbf{x}+\mathbf{y}||\leq ||\mathbf{x}||+||\mathbf{y}||
\end{align}
\end{definition}
%------------------------------------------------
\section{Notations}\index{Notations}
\begin{notation}
Given an open subset $G$ of $\mathbb{R}^n$, the set of functions $\varphi$ are:
\begin{enumerate}
\item Bounded support $G$;
\item Infinitely differentiable;
\end{enumerate}
a vector space is denoted by $\mathcal{D}(G)$.
\end{notation}
%------------------------------------------------
\section{Remarks}\index{Remarks}
This is an example of a remark.
\begin{remark}
The concepts presented here are now in conventional employment in mathematics. Vector spaces are taken over the field $\mathbb{K}=\mathbb{R}$, however, established properties are easily extended to $\mathbb{K}=\mathbb{C}$.
\end{remark}
%------------------------------------------------
\section{Corollaries}\index{Corollaries}
This is an example of a corollary.
\begin{corollary}[Corollary name]
The concepts presented here are now in conventional employment in mathematics. Vector spaces are taken over the field $\mathbb{K}=\mathbb{R}$, however, established properties are easily extended to $\mathbb{K}=\mathbb{C}$.
\end{corollary}
%------------------------------------------------
\section{Propositions}\index{Propositions}
This is an example of propositions.
\subsection{Several equations}\index{Propositions!Several Equations}
\begin{proposition}[Proposition name]
It has the properties:
\begin{align}
& \big| ||\mathbf{x}|| - ||\mathbf{y}|| \big|\leq || \mathbf{x}- \mathbf{y}||\\
& ||\sum_{i=1}^n\mathbf{x}_i||\leq \sum_{i=1}^n||\mathbf{x}_i||\quad\text{where $n$ is a finite integer}
\end{align}
\end{proposition}
\subsection{Single Line}\index{Propositions!Single Line}
\begin{proposition}
Let $f,g\in L^2(G)$; if $\forall \varphi\in\mathcal{D}(G)$, $(f,\varphi)_0=(g,\varphi)_0$ then $f = g$.
\end{proposition}
%------------------------------------------------
\section{Examples}\index{Examples}
This is an example of examples.
\subsection{Equation and Text}\index{Examples!Equation and Text}
\begin{example}
Let $G=\{x\in\mathbb{R}^2:|x|<3\}$ and denoted by: $x^0=(1,1)$; consider the function:
\begin{equation}
f(x)=\left\{\begin{aligned} & \mathrm{e}^{|x|} & & \text{si $|x-x^0|\leq 1/2$}\\
& 0 & & \text{si $|x-x^0|> 1/2$}\end{aligned}\right.
\end{equation}
The function $f$ has bounded support, we can take $A=\{x\in\mathbb{R}^2:|x-x^0|\leq 1/2+\epsilon\}$ for all $\epsilon\in\intoo{0}{5/2-\sqrt{2}}$.
\end{example}
\subsection{Paragraph of Text}\index{Examples!Paragraph of Text}
\begin{example}[Example name]
\lipsum[2]
\end{example}
%------------------------------------------------
\section{Exercises}\index{Exercises}
This is an example of an exercise.
\begin{exercise}
This is a good place to ask a question to test learning progress or further cement ideas into students' minds.
\end{exercise}
%------------------------------------------------
\section{Problems}\index{Problems}
\begin{problem}
What is the average airspeed velocity of an unladen swallow?
\end{problem}
%------------------------------------------------
\section{Vocabulary}\index{Vocabulary}
Define a word to improve a students' vocabulary.
\begin{vocabulary}[Word]
Definition of word.
\end{vocabulary}
%----------------------------------------------------------------------------------------
% PART
%----------------------------------------------------------------------------------------
\part{Part Two}
\ No newline at end of file
%----------------------------------------------------------------------------------------
% CHAPTER 3
%----------------------------------------------------------------------------------------
\renewcommand\figurename{}%将figure改为图
\renewcommand\tablename{}%将figure改为图
\chapterimage{chapter_head_1.pdf} % Chapter heading image
\chapter{基于词的翻译模型}
\hspace{2em}统计机器翻译和神经机器翻译在当前具有统治性意义。这两种方法各有优缺点,并没有那种方法具有绝对的优势。但从研究的角度来看,神经机器翻译整体上更具有前沿性。本章主要介绍了统计机器翻译的开山之作—IBM模型,它主要讲了怎么使用词汇对机器翻译进行建模。IBM模型由Peter E. Brown等人在1993年提出,并详细阐述于论文—《The Mathematics of Statistical Machine Translation: Parameter Estimation》。这篇文章的视野和对问题的定义远超当时人所能看到的东西,其衍生出来的一系列方法和新的问题还被后人花费将近10年的时间来进行研究与讨论。
\section{什么是基于词的翻译模型}\index{Chapter3.1}%Index的作用,目前不清晰
\hspace{2em}在机器翻译中,我们希望得到一个源语句到目标语译文的翻译。但机器并不知道如何翻译。因此在做机器翻译的过程中,最初面临的一个问题是:如何对翻译进行建模?从计算机的角度看,建模的目的就是把抽象的问题转换为可计算的问题。所以机器翻译的一个核心问题是:如何将翻译转换为一个可计算的模型或过程。
\noindent\hspace{2em}基于单词的统计机器翻译模型又是如何描述翻译的呢?IBM模型提出了一个观点:在翻译源语句时,通常是把每个源语句的单词翻译成对应的目标语单词,然后调整这些单词的顺序,最后得到翻译结果。尽管在人看来基于单词的对应进行翻译是很自然的事,但是机器并不一定。
\noindent\hspace{2em}举个例子说明如何基于单词的对应进行翻译。如图 \ref{figureC3.1}所示,表示的是汉译英的例子。其中源语句是“我 对 你 感到 满意”。首先我们把源语句的单词“我”、“对”、“\\你”、“感到”和“满意”分别翻译为“I”、“with”、“you”、“am”和“satisfied”,然后调整单词的顺序,比如“am”放在译文的第2个位置,“you”应该放在最后的位置等,最后得到译文“I am satisfied with you”。
%空一行用来段落换行,noindent取消首行缩进,hspace{}指定缩进距离,1em等于两个英文字符|一个汉字
%----------------------------------------------
% 图3.1
\definecolor{ublue}{rgb}{0.152,0.250,0.545}
\begin{figure}[htp]
\centering
\definecolor{ublue}{rgb}{0.152,0.250,0.545}
\begin{tikzpicture}
\begin{scope}
\node [anchor=west] (s1) at (0,0) {};
\node [anchor=west] (s2) at ([xshift=0.5em]s1.east) {};
\node [anchor=west] (s3) at ([xshift=0.5em]s2.east) {};
\node [anchor=west] (s4) at ([xshift=0.5em]s3.east) {感到};
\node [anchor=west] (s5) at ([xshift=0.5em]s4.east) {满意};
\end{scope}
\begin{scope}[yshift=-3em]
\node [anchor=west] (t1) at (0.35em,0) {I};
\node [anchor=west] (t2) at ([xshift=0.3em,yshift=-0.1em]t1.east) {am};
\node [anchor=west] (t3) at ([xshift=0.3em,yshift=0.1em]t2.east) {satisfied};
\node [anchor=west] (t4) at ([xshift=0.3em]t3.east) {with};
\node [anchor=west] (t5) at ([xshift=0.3em,yshift=-0.2em]t4.east) {you};
\end{scope}
{
\draw [-,thick,ublue,dashed] (s1.south) -- (t1.north);
\draw [-,thick,ublue,dashed] (s4.south) -- ([yshift=0.3em]t2.north);
\draw [-,thick,ublue,dashed] (s2.south) ..controls +(south:1em) and +(north:1em).. (t4.north);
\draw [-,thick,ublue,dashed] (s3.south) ..controls +(south:0.5em) and +(north:1.5em).. (t5.north);
\draw [-,thick,ublue,dashed] (s5.south) -- (t3.north);
}
\end{tikzpicture}
\caption{此处为图片的描述...例:基于词对应进行翻译.}
\label{figureC3.1}
\end{figure}
%-------------------------------------------
\noindent\hspace{2em}传统观点认为基于词的翻译过程包含如下三个步骤。如图 \ref{figureC3.2}所示。
\noindent\hspace{2em}第一、分析。将源语句或目标语句切分或者表示为能够处理的最小单元的过程。在基于词的翻译模型的最小处理单元就是单词。在这里也可以简单地将分析理解为分词。
\noindent\hspace{2em}第二、转换。把源语句中的每个单词都翻译成目标语单词。此处引用Figure \ref{figureC3.1}作为示例。
\noindent\hspace{2em}第三、生成。基于转换的结果,将目标语译文变成通顺且合乎语法的句子。
%----------------------------------------------
% 图3.2
\begin{figure}[htp]
\centering
\begin{tikzpicture}
\definecolor{ugreen}{rgb}{0,0.5,0}
\definecolor{ublue}{rgb}{0.152,0.250,0.545}
\node [anchor=west,draw,thick,minimum width=6.7em,minimum height=0.8em] (sent) at (0,0) {};
\node [anchor=west,draw,thick,circle,minimum size=0.3em,red] (s1) at ([yshift=-1.5em]sent.south west) {};
\node [anchor=west,draw,thick,circle,minimum size=0.3em,ugreen] (s2) at ([xshift=0.4em]s1.east) {};
\node [anchor=west,draw,thick,circle,minimum size=0.3em,orange] (s3) at ([xshift=0.4em]s2.east) {};
\node [anchor=west,draw,thick,circle,minimum size=0.3em,ublue] (s4) at ([xshift=0.4em]s3.east) {};
\node [anchor=west,draw,thick,circle,minimum size=0.3em,purple] (s5) at ([xshift=0.4em]s4.east) {};
{
\node [anchor=west,draw,thick,circle,minimum size=0.3em,red,fill=red] (t1) at ([yshift=-2.0em]s1.west) {};
\node [anchor=west,draw,thick,circle,minimum size=0.3em,ugreen,fill=ugreen] (t2) at ([xshift=0.4em]t1.east) {};
\node [anchor=west,draw,thick,circle,minimum size=0.3em,orange,fill=orange] (t3) at ([xshift=0.4em]t2.east) {};
\node [anchor=west,draw,thick,circle,minimum size=0.3em,ublue,fill=ublue] (t4) at ([xshift=0.4em]t3.east) {};
\node [anchor=west,draw,thick,circle,minimum size=0.3em,purple,fill=purple] (t5) at ([xshift=0.4em]t4.east) {};
}
{
\node [anchor=west,draw,thick,circle,minimum size=0.3em,red,fill=red] (ft1) at ([yshift=-2.0em]t1.west) {};
\node [anchor=west,draw,thick,circle,minimum size=0.3em,ublue,fill=ublue] (ft2) at ([xshift=0.4em]ft1.east) {};
\node [anchor=west,draw,thick,circle,minimum size=0.3em,purple,fill=purple] (ft3) at ([xshift=0.4em]ft2.east) {};
\node [anchor=west,draw,thick,circle,minimum size=0.3em,ugreen,fill=ugreen] (ft4) at ([xshift=0.4em]ft3.east) {};
\node [anchor=west,draw,thick,circle,minimum size=0.3em,orange,fill=orange] (ft5) at ([xshift=0.4em]ft4.east) {};
}
\draw [->,thick,double] ([yshift=-0.1em]sent.south) -- ([yshift=-0.8em]sent.south);
{
\draw [->,thick] ([yshift=-0.1em]s1.south) -- ([yshift=0.1em]t1.north);
\draw [->,thick] ([yshift=-0.1em]s2.south) -- ([yshift=0.1em]t2.north);
\draw [->,thick] ([yshift=-0.1em]s3.south) -- ([yshift=0.1em]t3.north);
\draw [->,thick] ([yshift=-0.1em]s4.south) -- ([yshift=0.1em]t4.north);
\draw [->,thick] ([yshift=-0.1em]s5.south) -- ([yshift=0.1em]t5.north);
}
{
\draw [->,thick] ([yshift=-0.1em]t1.south) -- ([yshift=0.1em]ft1.north);
\draw [->,thick] ([yshift=-0.1em]t2.south) -- ([yshift=0.1em]ft4.north);
\draw [->,thick] ([yshift=-0.1em]t3.south) -- ([yshift=0.1em]ft5.north);
\draw [->,thick] ([yshift=-0.1em]t4.south) -- ([yshift=0.1em]ft2.north);
\draw [->,thick] ([yshift=-0.1em]t5.south) -- ([yshift=0.1em]ft3.north);
}
{
\node [anchor=north west] (label1) at ([yshift=0.3em]sent.south east) {{\scriptsize \textbf{分析}}};
\node [anchor=north west] (label2) at ([yshift=-0.5em]label1.south west) {{\scriptsize \textbf{转换}}};
\node [anchor=north west] (label3) at ([yshift=-0.5em]label2.south west) {{\scriptsize \textbf{生成}}};
}
\end{tikzpicture}
\caption{此处为图片的描述...例:基于词的翻译过程.}
\label{figureC3.2}
\end{figure}
%---------------------------
\noindent\hspace{2em}从现在的角度看,分析、转换和生成依然是非常深刻的一个观点。该过程蕴含于很多的任务,比如句法分析。即使对于神经机器翻译,从大的框架来说,依然在做分析、转换和生成,只不过有些过程隐含在神经网络的设计中。
\section{构建一个简易机器翻译系统}\index{Chapter3.2}%Index的作用,目前不清晰
\noindent\hspace{2em}本节首先对比人工翻译流程和机器翻译流程的异同点,从中我们可以归纳出构建机器翻译系统的两个主要流程:训练和解码。其次从单词翻译概率、句子级翻译模型和解码三个方面描述如何构建一个简易机器翻译系统。其中单词翻译概率和句子级翻译模型属于训练流程。
\subsection{机器翻译的思路}\index{Chapter3.2.1}
\subsubsection{人工翻译流程}\index{Chapter3.2.1.1}
\noindent\hspace{2em}当我们翻译一个句子时,首先会快速地分析出句子中词的构成,然后基于以往的知识,得到每个词的候选翻译,最后利用对单词的理解拼出来一个译文。这描述了人在翻译时的基本过程。尽管并不是严格的从心理学或者脑科学得出来的观点,但至少可以帮助我们理解人的翻译。
%----------------------------------------------
% 图3.3
\begin{figure}[htp]
\centering
\definecolor{ublue}{rgb}{0.152,0.250,0.545}
\begin{tikzpicture}
\begin{scope}
\node [anchor=west] (s1) at (0,0) {\textbf{}};
\node [anchor=west] (s2) at ([xshift=2em]s1.east) {\textbf{}};
\node [anchor=west] (s3) at ([xshift=2em]s2.east) {\textbf{}};
\node [anchor=west] (s4) at ([xshift=2em]s3.east) {\textbf{表示}};
\node [anchor=west] (s5) at ([xshift=2em]s4.east) {\textbf{满意}};
\node [anchor=south west] (sentlabel) at ([yshift=-0.5em]s1.north west) {\scriptsize{\textbf{\color{red}{待翻译句子(已经分词):}}}};
{
\draw [->,very thick,ublue] (s1.south) -- ([yshift=-0.7em]s1.south);
\draw [->,very thick,ublue] (s2.south) -- ([yshift=-0.7em]s2.south);
\draw [->,very thick,ublue] (s3.south) -- ([yshift=-0.7em]s3.south);
\draw [->,very thick,ublue] (s4.south) -- ([yshift=-0.7em]s4.south);
\draw [->,very thick,ublue] (s5.south) -- ([yshift=-0.7em]s5.south);
{\small
\node [anchor=north,inner sep=2pt,fill=red!20,minimum height=1.5em,minimum width=2.5em] (t11) at ([yshift=-1em]s1.south) {I};
\node [anchor=north,inner sep=2pt,fill=red!20,minimum height=1.5em,minimum width=2.5em] (t12) at ([yshift=-0.2em]t11.south) {me};
\node [anchor=north,inner sep=2pt,fill=red!20,minimum height=1.5em,minimum width=2.5em] (t13) at ([yshift=-0.2em]t12.south) {I'm};
\node [anchor=north west,inner sep=1pt,fill=black] (tl11) at (t11.north west) {\tiny{{\color{white} \textbf{1}}}};
\node [anchor=north west,inner sep=1pt,fill=black] (tl12) at (t12.north west) {\tiny{{\color{white} \textbf{1}}}};
\node [anchor=north west,inner sep=1pt,fill=black] (tl13) at (t13.north west) {\tiny{{\color{white} \textbf{1}}}};
\node [anchor=north,inner sep=2pt,fill=green!20,minimum height=1.5em,minimum width=2.5em] (t21) at ([yshift=-1em]s2.south) {to};
\node [anchor=north,inner sep=2pt,fill=green!20,minimum height=1.5em,minimum width=2.5em] (t22) at ([yshift=-0.2em]t21.south) {with};
\node [anchor=north,inner sep=2pt,fill=green!20,minimum height=1.5em,minimum width=2.5em] (t23) at ([yshift=-0.2em]t22.south) {for};
\node [anchor=north west,inner sep=1pt,fill=black] (tl21) at (t21.north west) {\tiny{{\color{white} \textbf{2}}}};
\node [anchor=north west,inner sep=1pt,fill=black] (tl22) at (t22.north west) {\tiny{{\color{white} \textbf{2}}}};
\node [anchor=north west,inner sep=1pt,fill=black] (tl23) at (t23.north west) {\tiny{{\color{white} \textbf{2}}}};
\node [anchor=north,inner sep=2pt,fill=blue!20,minimum height=1.5em,minimum width=2.5em] (t31) at ([yshift=-1em]s3.south) {you};
\node [anchor=north west,inner sep=1pt,fill=black] (tl31) at (t31.north west) {\tiny{{\color{white} \textbf{3}}}};
\node [anchor=north,inner sep=2pt,fill=orange!20,minimum height=1.5em,minimum width=3em] (t41) at ([yshift=-1em]s4.south) {$\phi$};
\node [anchor=north,inner sep=2pt,fill=orange!20,minimum height=1.5em,minimum width=3em] (t42) at ([yshift=-0.2em]t41.south) {show};
\node [anchor=north west,inner sep=1pt,fill=black] (tl41) at (t41.north west) {\tiny{{\color{white} \textbf{4}}}};
\node [anchor=north west,inner sep=1pt,fill=black] (tl42) at (t42.north west) {\tiny{{\color{white} \textbf{4}}}};
\node [anchor=north,inner sep=2pt,fill=purple!20,minimum height=1.5em,minimum width=4.5em] (t51) at ([yshift=-1em]s5.south) {satisfy};
\node [anchor=north,inner sep=2pt,fill=purple!20,minimum height=1.5em,minimum width=4.5em] (t52) at ([yshift=-0.2em]t51.south) {satisfied};
\node [anchor=north,inner sep=2pt,fill=purple!20,minimum height=1.5em,minimum width=4.5em] (t53) at ([yshift=-0.2em]t52.south) {satisfies};
\node [anchor=north west,inner sep=1pt,fill=black] (tl51) at (t51.north west) {\tiny{{\color{white} \textbf{5}}}};
\node [anchor=north west,inner sep=1pt,fill=black] (tl52) at (t52.north west) {\tiny{{\color{white} \textbf{5}}}};
\node [anchor=north west,inner sep=1pt,fill=black] (tl53) at (t53.north west) {\tiny{{\color{white} \textbf{5}}}};
}
}
\end{scope}
\begin{scope}
{\small
{
\node [anchor=west,inner sep=2pt,fill=red!20,minimum height=1.5em,minimum width=2.5em] (ft11) at ([yshift=-1.2in]t11.west) {I'm};
\node [anchor=center,inner sep=2pt,fill=purple!20,minimum height=1.5em,minimum width=5em] (ft12) at ([xshift=5.0em]ft11.center) {satisfied};
\node [anchor=center,inner sep=2pt,fill=green!20,minimum height=1.5em,minimum width=2.5em] (ft13) at ([xshift=5.0em]ft12.center) {with};
\node [anchor=center,inner sep=2pt,fill=blue!20,minimum height=1.5em,minimum width=2.5em] (ft14) at ([xshift=4.0em]ft13.center) {you};
}
{
\node [anchor=north west,inner sep=1pt,fill=black] (ftl11) at (ft11.north west) {\tiny{{\color{white} \textbf{1}}}};
\node [anchor=north west,inner sep=1pt,fill=black] (ftl12) at (ft12.north west) {\tiny{{\color{white} \textbf{5}}}};
\node [anchor=north west,inner sep=1pt,fill=black] (ftl13) at (ft13.north west) {\tiny{{\color{white} \textbf{2}}}};
\node [anchor=north west,inner sep=1pt,fill=black] (ftl14) at (ft14.north west) {\tiny{{\color{white} \textbf{3}}}};
}
{
\draw [->,thick] ([yshift=-0.1em]t13.south) -- ([yshift=0.1em]ft11.north);
\draw [->,thick] ([yshift=0.1em]t22.south east) ..controls +(280:3em) and +(north:3em).. ([yshift=0.1em]ft13.north);
\draw [->,thick] ([yshift=-0.1em,xshift=0.2em]t31.south west) ..controls +(south:3em) and +(north:3em).. ([yshift=0.1em,xshift=0.2em]ft14.north west);
\draw [->,thick] ([yshift=0.1em]t52.south west) ..controls +(250:4em) and +(north:4em).. ([yshift=0.1em]ft12.north);
\node [anchor=east,inner sep=1pt] (nulltranslabel) at (t42.south west) {\scriptsize{\textbf{翻空}}};
\draw [->,thick] ([yshift=0.1em]t41.south west) ..controls +(250:1em) and +(north:1em).. (nulltranslabel.north);
}
}
\end{scope}
\begin{scope}
{
\node [anchor=north west] (label1) at (ft11.south west) {\small{选择最佳单词翻译,调整词序,得到完美的结果}};
}
{
\draw[decorate,thick,decoration={brace,amplitude=5pt,mirror}] ([yshift=8em,xshift=-0.5em]t13.south west) -- ([xshift=-0.5em]t13.south west) node [pos=0.5,left,xshift=-0.5em,yshift=0.5em] (label2) {\footnotesize{\textbf{学习到的}}};
\node [anchor=north west] (label2part2) at ([yshift=0.3em]label2.south west) {\footnotesize{\textbf{单词翻译}}};
}
{
\draw[decorate,thick,decoration={brace,amplitude=5pt,mirror}] ([yshift=-0.2em,xshift=-0.5em]t13.south west) -- ([yshift=-5em,xshift=-0.5em]t13.south west) node [pos=0.5,left,xshift=-0.5em,yshift=0.5em] (label3) {\footnotesize{\textbf{运用知识}}};
\node [anchor=north west] (label3part2) at ([yshift=0.3em]label3.south west) {\footnotesize{\textbf{生成译文}}};
}
\end{scope}
\end{tikzpicture}
\caption{此处为图片的描述...例:人工翻译的过程}
\label{figureC3.3}
\end{figure}
%---------------------------
\noindent\hspace{2em}如图\ref{figureC3.3}所示,表示的是人工翻译的过程。其中待翻译的句子是“我 对 你表示 满意”,它的一个可能译文是“I’m satisfied with you”。我们可以把上述过程总结为如下两个过程。
\noindent\hspace{2em}第一、学习单词的翻译。即获取待翻译句子的每个单词的候选翻译。比如“对”的可能译文有“to”、“with”和“for”等。对于人来说,可以通过阅读、背诵、做题或者老师教等途径获得,并在大脑中形成知识。这些知识就包含了源语词与目标语词对应关系。我们也把这个过程称之为学习过程。
\noindent\hspace{2em}第二、运用知识生成译文。当翻译一个课本上没有见过的句子时,我们可能会想到词之间的翻译关系、一些常见的单词搭配、主谓宾等语法知识等,比如“satisfied”后面常使用介词“with”来表示对人的满意等,并基于这些知识快速地对译文进行生成。
\subsubsection{机器翻译流程}\index{Chapter3.2.1.2}
\noindent\hspace{2em}那么机器是如何完成翻译的呢?机器翻译可能没有人那么智能。因为一方面没有老师教给它一些非常好用的规则,使它能够快速得到译文。另一方面机器很笨,尽管它能知道每个单词的译文,但是不知道这些译文怎么拼装成句,甚至不知道哪些译文是对的。为了更加直观地理解机器在翻译时的困境,我们将其中需要解决的问题归纳如下。
\noindent\hspace{2em}问题一、如何将单词的译文拼装成句?
\noindent\hspace{2em}问题二、如果可以将译文成句,那如何判断结果的好坏?
\noindent\hspace{2em}对于问题一。机器最擅长的就是计算,它可以类似于走一条路径,尝试把译文拼装成句。如图3.4中蓝色和红色的线,表示的就是两条译文选择路径,区别在于“满意”和“对”的翻译候选是不一样的,蓝色线选择的是“satisfy”和“to”,而红色线是“satisfied”和“with”。换句话说,翻译就是一条译文选择路径,不同的译文对应不同的路径,并且词序也可以不同。机器可以利用它的计算能力找到很多这样的路径。
%-----------公式样例
\subsubsection{如何从一个双语平行数据中学习?}\index{Chapter3.2.3.2}
\noindent\hspace{2em}在上一节的骰子游戏中,我们重复地掷骰子很多次,然后计算“1”到“6”各出现的次数,再除以掷的总次数,最后得到它们出现的近似概率。我们同样用类似的方式估计单词翻译概率。假设$x$表示任意源语单词,所有的目标语单词$y \in Y$都可能是它的译文。当给定一个互译的句对$(s,t)$,我们定义$\textrm{P}(x \leftrightarrow y; s, t)$表示在$(s,t)$$x$$y$互译的可能性或概率。其中$x$属于句子$s$中的词,而$y$属于$t$中的词,具体计算如公式\ref{eqC3.1}\\所示。
\begin{equation}
\textrm{P}(x \leftrightarrow y; s,t) \equiv \textrm{P}(x,y;s,t) = \frac{c(x,y;s,t)}{\sum_{x',y'} c(x',y';s,t)}
\label{eqC3.1}
\end{equation}
\noindent\hspace{2em}上式中分子$c(x,y;s,t)$表示$x$$y$在句对$(s,t)$中共现的总次数,\cite{Aho-Ullman:1969:JCSS}分母$\sum_{x',y'} c(x',y';$ $s,t)$表示任意的$x'$和任意的$y'$$(s,t)$中共现的总次数。\cite{book_key}为了更加清晰的理解如何计算$c(x,y;s,t)$,我们用图3.7描述的算法进行了说明。
\noindent\hspace{2em}这是一个table的样例,word里目前还没有使用表格,因此借用学长论文里的table,如表\ref{table-ner}所示
%----------------------
\begin{table}[htp]
\small
\begin{center}
\begin{tabular}{l|l}
\hline \bf Model & \bf F1 \\
\hline
$best \ published$ \\
BiLSTM-CRF \cite{lample-etal-2016-neural} & 90.94 \\
BiLSTM-CRF+ELMo \cite{peters-etal-2018-deep} & 92.22 \\
BERT Base \cite{devlin2018bert} & 92.40 \\
BERT Large \cite{devlin2018bert} & 92.80 \\
BiLSTM-CRF+PCE \cite{Akbik2019PooledCE} & 93.18 \\
\hline
Random RNNs w/o pre-trained LM & 90.64 \\
DARTS w/o pre-trained LM & 91.05 \\
I-DARTS ($n=2$) w/o pre-trained LM & 90.96 \\
I-DARTS ($n=1$) w/o pre-trained LM & 91.23 \\
\hline
Random RNNs & 92.89 \\
DARTS & 93.13 \\
I-DARTS ($n=2$) & 93.14 \\
I-DARTS ($n=1$) & 93.47 \\
\hline
\end{tabular}
\end{center}
\caption{\label{font-table} F1 scores on the CoNLL-2003 English NER test set. }
\label{table-ner}
\end{table}
%----------------------------------------------------------------------------------------
% INDEX
%----------------------------------------------------------------------------------------
\cleardoublepage % Make sure the index starts on an odd (right side) page
\phantomsection
\setlength{\columnsep}{0.75cm} % Space between the 2 columns of the index
\addcontentsline{toc}{chapter}{\textcolor{ocre}{Index}} % Add an Index heading to the table of contents
\printindex % Output the index
%----------------------------------------------------------------------------------------
\ No newline at end of file
%----------------------------------------------------------------------------------------
% TITLE PAGE
%----------------------------------------------------------------------------------------
\begingroup
\thispagestyle{empty} % Suppress headers and footers on the title page
\begin{tikzpicture}[remember picture,overlay]
\node[inner sep=0pt] (background) at (current page.center) {\includegraphics[width=\paperwidth]{background.pdf}};
\draw (current page.center) node [fill=ocre!30!white,fill opacity=0.6,text opacity=1,inner sep=1cm]{\Huge\centering\bfseries\sffamily\parbox[c][][t]{\paperwidth}{\centering The Search for a Title\\[15pt] % Book title
{\Large A Profound Subtitle}\\[20pt] % Subtitle
{\huge Dr. John Smith}}}; % Author name
\end{tikzpicture}
\vfill
\endgroup
%----------------------------------------------------------------------------------------
% COPYRIGHT PAGE
%----------------------------------------------------------------------------------------
\newpage
~\vfill
\thispagestyle{empty}
\noindent Copyright \copyright\ 2019 John Smith\\ % Copyright notice
\noindent \textsc{Published by Publisher}\\ % Publisher
\noindent \textsc{book-website.com}\\ % URL
\noindent Licensed under the Creative Commons Attribution-NonCommercial 3.0 Unported License (the ``License''). You may not use this file except in compliance with the License. You may obtain a copy of the License at \url{http://creativecommons.org/licenses/by-nc/3.0}. Unless required by applicable law or agreed to in writing, software distributed under the License is distributed on an \textsc{``as is'' basis, without warranties or conditions of any kind}, either express or implied. See the License for the specific language governing permissions and limitations under the License.\\ % License information, replace this with your own license (if any)
\noindent \textit{First printing, March 2019} % Printing/edition date
%----------------------------------------------------------------------------------------
% TABLE OF CONTENTS
%----------------------------------------------------------------------------------------
%\usechapterimagefalse % If you don't want to include a chapter image, use this to toggle images off - it can be enabled later with \usechapterimagetrue
\chapterimage{chapter_head_1.pdf} % Table of contents heading image
\pagestyle{empty} % Disable headers and footers for the following pages
\tableofcontents % Print the table of contents itself
\cleardoublepage % Forces the first chapter to start on an odd page so it's on the right side of the book
\pagestyle{fancy} % Enable headers and footers again
\ No newline at end of file
......@@ -113,14 +113,18 @@
\indexentry{Back Translation|hyperpage}{57}
\indexentry{迭代式回译|hyperpage}{58}
\indexentry{Iterative Back Translation|hyperpage}{58}
\indexentry{前向翻译|hyperpage}{58}
\indexentry{Forward Translation|hyperpage}{58}
\indexentry{前向翻译|hyperpage}{59}
\indexentry{Forward Translation|hyperpage}{59}
\indexentry{分布式表示|hyperpage}{59}
\indexentry{Distributed Representation|hyperpage}{59}
\indexentry{预训练|hyperpage}{59}
\indexentry{Pre-training|hyperpage}{59}
\indexentry{微调|hyperpage}{59}
\indexentry{Fine-tuning|hyperpage}{59}
\indexentry{多任务学习|hyperpage}{61}
\indexentry{Multitask Learning|hyperpage}{61}
\indexentry{知识精炼|hyperpage}{62}
\indexentry{Knowledge Distillation|hyperpage}{62}
\indexentry{模型压缩|hyperpage}{62}
\indexentry{Model Compression|hyperpage}{62}
\indexentry{学习难度|hyperpage}{62}
......@@ -136,10 +140,10 @@
\indexentry{中间层输出|hyperpage}{64}
\indexentry{Hint-based Knowledge Transfer|hyperpage}{64}
\indexentry{注意力分布|hyperpage}{64}
\indexentry{Attention To Attention Transfer|hyperpage}{64}
\indexentry{Attention To Attention Transfer|hyperpage}{65}
\indexentry{循环一致性|hyperpage}{67}
\indexentry{Circle Consistency|hyperpage}{67}
\indexentry{翻译中回译|hyperpage}{68}
\indexentry{On-the-fly Back-translation|hyperpage}{68}
\indexentry{网络结构搜索技术|hyperpage}{71}
\indexentry{Neural Architecture Search;NAS|hyperpage}{71}
\indexentry{神经架构搜索|hyperpage}{71}
\indexentry{Neural Architecture Search|hyperpage}{71}
......@@ -2,690 +2,714 @@
\defcounter {refsection}{0}\relax
\select@language {english}
\defcounter {refsection}{0}\relax
\contentsline {part}{\@mypartnumtocformat {I}{机器翻译基础}}{11}{part.1}
\contentsline {part}{\@mypartnumtocformat {I}{机器翻译基础}}{13}{part.1}
\ttl@starttoc {default@1}
\defcounter {refsection}{0}\relax
\contentsline {chapter}{\numberline {1}机器翻译简介}{13}{chapter.1}
\contentsline {chapter}{\numberline {1}机器翻译简介}{15}{chapter.1}
\defcounter {refsection}{0}\relax
\contentsline {section}{\numberline {1.1}机器翻译的概念}{13}{section.1.1}
\contentsline {section}{\numberline {1.1}机器翻译的概念}{15}{section.1.1}
\defcounter {refsection}{0}\relax
\contentsline {section}{\numberline {1.2}机器翻译简史}{16}{section.1.2}
\contentsline {section}{\numberline {1.2}机器翻译简史}{18}{section.1.2}
\defcounter {refsection}{0}\relax
\contentsline {subsection}{\numberline {1.2.1}人工翻译}{16}{subsection.1.2.1}
\contentsline {subsection}{\numberline {1.2.1}人工翻译}{18}{subsection.1.2.1}
\defcounter {refsection}{0}\relax
\contentsline {subsection}{\numberline {1.2.2}机器翻译的萌芽}{17}{subsection.1.2.2}
\contentsline {subsection}{\numberline {1.2.2}机器翻译的萌芽}{19}{subsection.1.2.2}
\defcounter {refsection}{0}\relax
\contentsline {subsection}{\numberline {1.2.3}机器翻译的受挫}{18}{subsection.1.2.3}
\contentsline {subsection}{\numberline {1.2.3}机器翻译的受挫}{20}{subsection.1.2.3}
\defcounter {refsection}{0}\relax
\contentsline {subsection}{\numberline {1.2.4}机器翻译的快速成长}{19}{subsection.1.2.4}
\contentsline {subsection}{\numberline {1.2.4}机器翻译的快速成长}{21}{subsection.1.2.4}
\defcounter {refsection}{0}\relax
\contentsline {subsection}{\numberline {1.2.5}机器翻译的爆发}{20}{subsection.1.2.5}
\contentsline {subsection}{\numberline {1.2.5}机器翻译的爆发}{22}{subsection.1.2.5}
\defcounter {refsection}{0}\relax
\contentsline {section}{\numberline {1.3}机器翻译现状}{21}{section.1.3}
\contentsline {section}{\numberline {1.3}机器翻译现状}{23}{section.1.3}
\defcounter {refsection}{0}\relax
\contentsline {section}{\numberline {1.4}机器翻译方法}{22}{section.1.4}
\contentsline {section}{\numberline {1.4}机器翻译方法}{24}{section.1.4}
\defcounter {refsection}{0}\relax
\contentsline {subsection}{\numberline {1.4.1}基于规则的机器翻译}{24}{subsection.1.4.1}
\contentsline {subsection}{\numberline {1.4.1}基于规则的机器翻译}{26}{subsection.1.4.1}
\defcounter {refsection}{0}\relax
\contentsline {subsection}{\numberline {1.4.2}基于实例的机器翻译}{24}{subsection.1.4.2}
\contentsline {subsection}{\numberline {1.4.2}基于实例的机器翻译}{26}{subsection.1.4.2}
\defcounter {refsection}{0}\relax
\contentsline {subsection}{\numberline {1.4.3}统计机器翻译}{25}{subsection.1.4.3}
\contentsline {subsection}{\numberline {1.4.3}统计机器翻译}{27}{subsection.1.4.3}
\defcounter {refsection}{0}\relax
\contentsline {subsection}{\numberline {1.4.4}神经机器翻译}{26}{subsection.1.4.4}
\contentsline {subsection}{\numberline {1.4.4}神经机器翻译}{28}{subsection.1.4.4}
\defcounter {refsection}{0}\relax
\contentsline {subsection}{\numberline {1.4.5}对比分析}{27}{subsection.1.4.5}
\contentsline {subsection}{\numberline {1.4.5}对比分析}{29}{subsection.1.4.5}
\defcounter {refsection}{0}\relax
\contentsline {section}{\numberline {1.5}翻译质量评价}{28}{section.1.5}
\contentsline {section}{\numberline {1.5}翻译质量评价}{30}{section.1.5}
\defcounter {refsection}{0}\relax
\contentsline {subsection}{\numberline {1.5.1}人工评价}{28}{subsection.1.5.1}
\contentsline {subsection}{\numberline {1.5.1}人工评价}{30}{subsection.1.5.1}
\defcounter {refsection}{0}\relax
\contentsline {subsection}{\numberline {1.5.2}自动评价}{29}{subsection.1.5.2}
\contentsline {subsection}{\numberline {1.5.2}自动评价}{31}{subsection.1.5.2}
\defcounter {refsection}{0}\relax
\contentsline {subsubsection}{BLEU}{29}{section*.15}
\contentsline {subsubsection}{BLEU}{31}{section*.15}
\defcounter {refsection}{0}\relax
\contentsline {subsubsection}{TER}{31}{section*.16}
\contentsline {subsubsection}{TER}{33}{section*.16}
\defcounter {refsection}{0}\relax
\contentsline {subsubsection}{基于检测点的评价}{31}{section*.17}
\contentsline {subsubsection}{基于检测点的评价}{33}{section*.17}
\defcounter {refsection}{0}\relax
\contentsline {section}{\numberline {1.6}机器翻译应用}{32}{section.1.6}
\contentsline {section}{\numberline {1.6}机器翻译应用}{34}{section.1.6}
\defcounter {refsection}{0}\relax
\contentsline {section}{\numberline {1.7}开源项目与评测}{34}{section.1.7}
\contentsline {section}{\numberline {1.7}开源项目与评测}{36}{section.1.7}
\defcounter {refsection}{0}\relax
\contentsline {subsection}{\numberline {1.7.1}开源机器翻译系统}{34}{subsection.1.7.1}
\contentsline {subsection}{\numberline {1.7.1}开源机器翻译系统}{36}{subsection.1.7.1}
\defcounter {refsection}{0}\relax
\contentsline {subsubsection}{统计机器翻译开源系统}{35}{section*.19}
\contentsline {subsubsection}{统计机器翻译开源系统}{37}{section*.19}
\defcounter {refsection}{0}\relax
\contentsline {subsubsection}{神经机器翻译开源系统}{36}{section*.20}
\contentsline {subsubsection}{神经机器翻译开源系统}{38}{section*.20}
\defcounter {refsection}{0}\relax
\contentsline {subsection}{\numberline {1.7.2}常用数据集及公开评测任务}{38}{subsection.1.7.2}
\contentsline {subsection}{\numberline {1.7.2}常用数据集及公开评测任务}{40}{subsection.1.7.2}
\defcounter {refsection}{0}\relax
\contentsline {section}{\numberline {1.8}推荐学习资源}{40}{section.1.8}
\contentsline {section}{\numberline {1.8}推荐学习资源}{42}{section.1.8}
\defcounter {refsection}{0}\relax
\contentsline {chapter}{\numberline {2}词法、语法及统计建模基础}{45}{chapter.2}
\contentsline {chapter}{\numberline {2}词法、语法及统计建模基础}{47}{chapter.2}
\defcounter {refsection}{0}\relax
\contentsline {section}{\numberline {2.1}问题概述 }{46}{section.2.1}
\contentsline {section}{\numberline {2.1}问题概述 }{48}{section.2.1}
\defcounter {refsection}{0}\relax
\contentsline {section}{\numberline {2.2}概率论基础}{47}{section.2.2}
\contentsline {section}{\numberline {2.2}概率论基础}{49}{section.2.2}
\defcounter {refsection}{0}\relax
\contentsline {subsection}{\numberline {2.2.1}随机变量和概率}{47}{subsection.2.2.1}
\contentsline {subsection}{\numberline {2.2.1}随机变量和概率}{50}{subsection.2.2.1}
\defcounter {refsection}{0}\relax
\contentsline {subsection}{\numberline {2.2.2}联合概率、条件概率和边缘概率}{49}{subsection.2.2.2}
\contentsline {subsection}{\numberline {2.2.2}联合概率、条件概率和边缘概率}{51}{subsection.2.2.2}
\defcounter {refsection}{0}\relax
\contentsline {subsection}{\numberline {2.2.3}链式法则}{50}{subsection.2.2.3}
\contentsline {subsection}{\numberline {2.2.3}链式法则}{52}{subsection.2.2.3}
\defcounter {refsection}{0}\relax
\contentsline {subsection}{\numberline {2.2.4}贝叶斯法则}{51}{subsection.2.2.4}
\contentsline {subsection}{\numberline {2.2.4}贝叶斯法则}{53}{subsection.2.2.4}
\defcounter {refsection}{0}\relax
\contentsline {subsection}{\numberline {2.2.5}KL距离和熵}{53}{subsection.2.2.5}
\contentsline {subsection}{\numberline {2.2.5}KL距离和熵}{55}{subsection.2.2.5}
\defcounter {refsection}{0}\relax
\contentsline {subsubsection}{信息熵}{53}{section*.27}
\contentsline {subsubsection}{信息熵}{55}{section*.27}
\defcounter {refsection}{0}\relax
\contentsline {subsubsection}{KL距离}{54}{section*.29}
\contentsline {subsubsection}{KL距离}{56}{section*.29}
\defcounter {refsection}{0}\relax
\contentsline {subsubsection}{交叉熵}{54}{section*.30}
\contentsline {subsubsection}{交叉熵}{56}{section*.30}
\defcounter {refsection}{0}\relax
\contentsline {section}{\numberline {2.3}中文分词}{55}{section.2.3}
\contentsline {section}{\numberline {2.3}中文分词}{57}{section.2.3}
\defcounter {refsection}{0}\relax
\contentsline {subsection}{\numberline {2.3.1}基于词典的分词方法}{56}{subsection.2.3.1}
\contentsline {subsection}{\numberline {2.3.1}基于词典的分词方法}{58}{subsection.2.3.1}
\defcounter {refsection}{0}\relax
\contentsline {subsection}{\numberline {2.3.2}基于统计的分词方法}{57}{subsection.2.3.2}
\contentsline {subsection}{\numberline {2.3.2}基于统计的分词方法}{59}{subsection.2.3.2}
\defcounter {refsection}{0}\relax
\contentsline {subsubsection}{统计模型的学习与推断}{57}{section*.34}
\contentsline {subsubsection}{统计模型的学习与推断}{59}{section*.34}
\defcounter {refsection}{0}\relax
\contentsline {subsubsection}{掷骰子游戏}{58}{section*.36}
\contentsline {subsubsection}{掷骰子游戏}{60}{section*.36}
\defcounter {refsection}{0}\relax
\contentsline {subsubsection}{全概率分词方法}{60}{section*.40}
\contentsline {subsubsection}{全概率分词方法}{62}{section*.40}
\defcounter {refsection}{0}\relax
\contentsline {section}{\numberline {2.4}$n$-gram语言模型 }{62}{section.2.4}
\contentsline {section}{\numberline {2.4}$n$-gram语言模型 }{64}{section.2.4}
\defcounter {refsection}{0}\relax
\contentsline {subsection}{\numberline {2.4.1}建模}{63}{subsection.2.4.1}
\contentsline {subsection}{\numberline {2.4.1}建模}{65}{subsection.2.4.1}
\defcounter {refsection}{0}\relax
\contentsline {subsection}{\numberline {2.4.2}未登录词和平滑算法}{65}{subsection.2.4.2}
\contentsline {subsection}{\numberline {2.4.2}未登录词和平滑算法}{67}{subsection.2.4.2}
\defcounter {refsection}{0}\relax
\contentsline {subsubsection}{加法平滑方法}{66}{section*.46}
\contentsline {subsubsection}{加法平滑方法}{68}{section*.46}
\defcounter {refsection}{0}\relax
\contentsline {subsubsection}{古德-图灵估计法}{67}{section*.48}
\contentsline {subsubsection}{古德-图灵估计法}{69}{section*.48}
\defcounter {refsection}{0}\relax
\contentsline {subsubsection}{Kneser-Ney平滑方法}{68}{section*.50}
\contentsline {subsubsection}{Kneser-Ney平滑方法}{70}{section*.50}
\defcounter {refsection}{0}\relax
\contentsline {section}{\numberline {2.5}句法分析(短语结构分析)}{70}{section.2.5}
\contentsline {section}{\numberline {2.5}句法分析(短语结构分析)}{72}{section.2.5}
\defcounter {refsection}{0}\relax
\contentsline {subsection}{\numberline {2.5.1}句子的句法树表示}{70}{subsection.2.5.1}
\contentsline {subsection}{\numberline {2.5.1}句子的句法树表示}{72}{subsection.2.5.1}
\defcounter {refsection}{0}\relax
\contentsline {subsection}{\numberline {2.5.2}上下文无关文法}{72}{subsection.2.5.2}
\contentsline {subsection}{\numberline {2.5.2}上下文无关文法}{74}{subsection.2.5.2}
\defcounter {refsection}{0}\relax
\contentsline {subsection}{\numberline {2.5.3}规则和推导的概率}{76}{subsection.2.5.3}
\contentsline {subsection}{\numberline {2.5.3}规则和推导的概率}{78}{subsection.2.5.3}
\defcounter {refsection}{0}\relax
\contentsline {section}{\numberline {2.6}小结及深入阅读}{78}{section.2.6}
\contentsline {section}{\numberline {2.6}小结及深入阅读}{80}{section.2.6}
\defcounter {refsection}{0}\relax
\contentsline {part}{\@mypartnumtocformat {II}{统计机器翻译}}{81}{part.2}
\contentsline {part}{\@mypartnumtocformat {II}{统计机器翻译}}{83}{part.2}
\ttl@stoptoc {default@1}
\ttl@starttoc {default@2}
\defcounter {refsection}{0}\relax
\contentsline {chapter}{\numberline {3}基于词的机器翻译模型}{83}{chapter.3}
\contentsline {chapter}{\numberline {3}基于词的机器翻译模型}{85}{chapter.3}
\defcounter {refsection}{0}\relax
\contentsline {section}{\numberline {3.1}什么是基于词的翻译模型}{83}{section.3.1}
\contentsline {section}{\numberline {3.1}什么是基于词的翻译模型}{85}{section.3.1}
\defcounter {refsection}{0}\relax
\contentsline {section}{\numberline {3.2}构建一个简单的机器翻译系统}{85}{section.3.2}
\contentsline {section}{\numberline {3.2}构建一个简单的机器翻译系统}{87}{section.3.2}
\defcounter {refsection}{0}\relax
\contentsline {subsection}{\numberline {3.2.1}如何进行翻译?}{85}{subsection.3.2.1}
\contentsline {subsection}{\numberline {3.2.1}如何进行翻译?}{87}{subsection.3.2.1}
\defcounter {refsection}{0}\relax
\contentsline {subsubsection}{机器翻译流程}{86}{section*.63}
\contentsline {subsubsection}{机器翻译流程}{88}{section*.63}
\defcounter {refsection}{0}\relax
\contentsline {subsubsection}{人工翻译 vs. 机器翻译}{87}{section*.65}
\contentsline {subsubsection}{人工翻译 vs. 机器翻译}{89}{section*.65}
\defcounter {refsection}{0}\relax
\contentsline {subsection}{\numberline {3.2.2}基本框架}{87}{subsection.3.2.2}
\contentsline {subsection}{\numberline {3.2.2}基本框架}{89}{subsection.3.2.2}
\defcounter {refsection}{0}\relax
\contentsline {subsection}{\numberline {3.2.3}单词翻译概率}{88}{subsection.3.2.3}
\contentsline {subsection}{\numberline {3.2.3}单词翻译概率}{90}{subsection.3.2.3}
\defcounter {refsection}{0}\relax
\contentsline {subsubsection}{什么是单词翻译概率?}{88}{section*.67}
\contentsline {subsubsection}{什么是单词翻译概率?}{90}{section*.67}
\defcounter {refsection}{0}\relax
\contentsline {subsubsection}{如何从一个双语平行数据中学习?}{88}{section*.69}
\contentsline {subsubsection}{如何从一个双语平行数据中学习?}{90}{section*.69}
\defcounter {refsection}{0}\relax
\contentsline {subsubsection}{如何从大量的双语平行数据中学习?}{90}{section*.70}
\contentsline {subsubsection}{如何从大量的双语平行数据中学习?}{92}{section*.70}
\defcounter {refsection}{0}\relax
\contentsline {subsection}{\numberline {3.2.4}句子级翻译模型}{91}{subsection.3.2.4}
\contentsline {subsection}{\numberline {3.2.4}句子级翻译模型}{93}{subsection.3.2.4}
\defcounter {refsection}{0}\relax
\contentsline {subsubsection}{基础模型}{91}{section*.72}
\contentsline {subsubsection}{基础模型}{93}{section*.72}
\defcounter {refsection}{0}\relax
\contentsline {subsubsection}{生成流畅的译文}{93}{section*.74}
\contentsline {subsubsection}{生成流畅的译文}{95}{section*.74}
\defcounter {refsection}{0}\relax
\contentsline {subsection}{\numberline {3.2.5}解码}{95}{subsection.3.2.5}
\contentsline {subsection}{\numberline {3.2.5}解码}{97}{subsection.3.2.5}
\defcounter {refsection}{0}\relax
\contentsline {section}{\numberline {3.3}基于词的翻译建模}{98}{section.3.3}
\contentsline {section}{\numberline {3.3}基于词的翻译建模}{100}{section.3.3}
\defcounter {refsection}{0}\relax
\contentsline {subsection}{\numberline {3.3.1}噪声信道模型}{98}{subsection.3.3.1}
\contentsline {subsection}{\numberline {3.3.1}噪声信道模型}{100}{subsection.3.3.1}
\defcounter {refsection}{0}\relax
\contentsline {subsection}{\numberline {3.3.2}统计机器翻译的三个基本问题}{100}{subsection.3.3.2}
\contentsline {subsection}{\numberline {3.3.2}统计机器翻译的三个基本问题}{102}{subsection.3.3.2}
\defcounter {refsection}{0}\relax
\contentsline {subsubsection}{词对齐}{101}{section*.83}
\contentsline {subsubsection}{词对齐}{103}{section*.83}
\defcounter {refsection}{0}\relax
\contentsline {subsubsection}{基于词对齐的翻译模型}{101}{section*.86}
\contentsline {subsubsection}{基于词对齐的翻译模型}{103}{section*.86}
\defcounter {refsection}{0}\relax
\contentsline {subsubsection}{基于词对齐的翻译实例}{103}{section*.88}
\contentsline {subsubsection}{基于词对齐的翻译实例}{105}{section*.88}
\defcounter {refsection}{0}\relax
\contentsline {section}{\numberline {3.4}IBM模型1-2}{104}{section.3.4}
\contentsline {section}{\numberline {3.4}IBM模型1-2}{106}{section.3.4}
\defcounter {refsection}{0}\relax
\contentsline {subsection}{\numberline {3.4.1}IBM模型1}{104}{subsection.3.4.1}
\contentsline {subsection}{\numberline {3.4.1}IBM模型1}{106}{subsection.3.4.1}
\defcounter {refsection}{0}\relax
\contentsline {subsection}{\numberline {3.4.2}IBM模型2}{106}{subsection.3.4.2}
\contentsline {subsection}{\numberline {3.4.2}IBM模型2}{108}{subsection.3.4.2}
\defcounter {refsection}{0}\relax
\contentsline {subsection}{\numberline {3.4.3}解码及计算优化}{107}{subsection.3.4.3}
\contentsline {subsection}{\numberline {3.4.3}解码及计算优化}{109}{subsection.3.4.3}
\defcounter {refsection}{0}\relax
\contentsline {subsection}{\numberline {3.4.4}训练}{108}{subsection.3.4.4}
\contentsline {subsection}{\numberline {3.4.4}训练}{110}{subsection.3.4.4}
\defcounter {refsection}{0}\relax
\contentsline {subsubsection}{目标函数}{108}{section*.93}
\contentsline {subsubsection}{目标函数}{110}{section*.93}
\defcounter {refsection}{0}\relax
\contentsline {subsubsection}{优化}{109}{section*.95}
\contentsline {subsubsection}{优化}{111}{section*.95}
\defcounter {refsection}{0}\relax
\contentsline {section}{\numberline {3.5}IBM模型3-5及隐马尔可夫模型}{115}{section.3.5}
\contentsline {section}{\numberline {3.5}IBM模型3-5及隐马尔可夫模型}{117}{section.3.5}
\defcounter {refsection}{0}\relax
\contentsline {subsection}{\numberline {3.5.1}基于产出率的翻译模型}{115}{subsection.3.5.1}
\contentsline {subsection}{\numberline {3.5.1}基于产出率的翻译模型}{117}{subsection.3.5.1}
\defcounter {refsection}{0}\relax
\contentsline {subsection}{\numberline {3.5.2}IBM 模型3}{118}{subsection.3.5.2}
\contentsline {subsection}{\numberline {3.5.2}IBM 模型3}{120}{subsection.3.5.2}
\defcounter {refsection}{0}\relax
\contentsline {subsection}{\numberline {3.5.3}IBM 模型4}{119}{subsection.3.5.3}
\contentsline {subsection}{\numberline {3.5.3}IBM 模型4}{121}{subsection.3.5.3}
\defcounter {refsection}{0}\relax
\contentsline {subsection}{\numberline {3.5.4} IBM 模型5}{121}{subsection.3.5.4}
\contentsline {subsection}{\numberline {3.5.4} IBM 模型5}{123}{subsection.3.5.4}
\defcounter {refsection}{0}\relax
\contentsline {subsection}{\numberline {3.5.5}隐马尔可夫模型}{122}{subsection.3.5.5}
\contentsline {subsection}{\numberline {3.5.5}隐马尔可夫模型}{124}{subsection.3.5.5}
\defcounter {refsection}{0}\relax
\contentsline {subsubsection}{隐马尔可夫模型}{123}{section*.107}
\contentsline {subsubsection}{隐马尔可夫模型}{125}{section*.107}
\defcounter {refsection}{0}\relax
\contentsline {subsubsection}{词对齐模型}{124}{section*.109}
\contentsline {subsubsection}{词对齐模型}{126}{section*.109}
\defcounter {refsection}{0}\relax
\contentsline {subsection}{\numberline {3.5.6}解码和训练}{125}{subsection.3.5.6}
\contentsline {subsection}{\numberline {3.5.6}解码和训练}{127}{subsection.3.5.6}
\defcounter {refsection}{0}\relax
\contentsline {section}{\numberline {3.6}问题分析}{125}{section.3.6}
\contentsline {section}{\numberline {3.6}问题分析}{127}{section.3.6}
\defcounter {refsection}{0}\relax
\contentsline {subsection}{\numberline {3.6.1}词对齐及对称化}{125}{subsection.3.6.1}
\contentsline {subsection}{\numberline {3.6.1}词对齐及对称化}{127}{subsection.3.6.1}
\defcounter {refsection}{0}\relax
\contentsline {subsection}{\numberline {3.6.2}Deficiency}{126}{subsection.3.6.2}
\contentsline {subsection}{\numberline {3.6.2}Deficiency}{128}{subsection.3.6.2}
\defcounter {refsection}{0}\relax
\contentsline {subsection}{\numberline {3.6.3}句子长度}{127}{subsection.3.6.3}
\contentsline {subsection}{\numberline {3.6.3}句子长度}{129}{subsection.3.6.3}
\defcounter {refsection}{0}\relax
\contentsline {subsection}{\numberline {3.6.4}其他问题}{128}{subsection.3.6.4}
\contentsline {subsection}{\numberline {3.6.4}其他问题}{130}{subsection.3.6.4}
\defcounter {refsection}{0}\relax
\contentsline {section}{\numberline {3.7}小结及深入阅读}{128}{section.3.7}
\contentsline {section}{\numberline {3.7}小结及深入阅读}{130}{section.3.7}
\defcounter {refsection}{0}\relax
\contentsline {chapter}{\numberline {4}基于短语和句法的机器翻译模型}{131}{chapter.4}
\contentsline {chapter}{\numberline {4}基于短语和句法的机器翻译模型}{133}{chapter.4}
\defcounter {refsection}{0}\relax
\contentsline {section}{\numberline {4.1}翻译中的结构信息}{131}{section.4.1}
\contentsline {section}{\numberline {4.1}翻译中的结构信息}{133}{section.4.1}
\defcounter {refsection}{0}\relax
\contentsline {subsection}{\numberline {4.1.1}更大粒度的翻译单元}{132}{subsection.4.1.1}
\contentsline {subsection}{\numberline {4.1.1}更大粒度的翻译单元}{134}{subsection.4.1.1}
\defcounter {refsection}{0}\relax
\contentsline {subsection}{\numberline {4.1.2}句子的结构信息}{134}{subsection.4.1.2}
\contentsline {subsection}{\numberline {4.1.2}句子的结构信息}{136}{subsection.4.1.2}
\defcounter {refsection}{0}\relax
\contentsline {section}{\numberline {4.2}基于短语的翻译模型}{136}{section.4.2}
\contentsline {section}{\numberline {4.2}基于短语的翻译模型}{138}{section.4.2}
\defcounter {refsection}{0}\relax
\contentsline {subsection}{\numberline {4.2.1}机器翻译中的短语}{136}{subsection.4.2.1}
\contentsline {subsection}{\numberline {4.2.1}机器翻译中的短语}{138}{subsection.4.2.1}
\defcounter {refsection}{0}\relax
\contentsline {subsection}{\numberline {4.2.2}数学建模及判别式模型}{139}{subsection.4.2.2}
\contentsline {subsection}{\numberline {4.2.2}数学建模及判别式模型}{141}{subsection.4.2.2}
\defcounter {refsection}{0}\relax
\contentsline {subsubsection}{基于翻译推导的建模}{139}{section*.121}
\contentsline {subsubsection}{基于翻译推导的建模}{141}{section*.121}
\defcounter {refsection}{0}\relax
\contentsline {subsubsection}{对数线性模型}{140}{section*.122}
\contentsline {subsubsection}{对数线性模型}{142}{section*.122}
\defcounter {refsection}{0}\relax
\contentsline {subsubsection}{搭建模型的基本流程}{141}{section*.123}
\contentsline {subsubsection}{搭建模型的基本流程}{143}{section*.123}
\defcounter {refsection}{0}\relax
\contentsline {subsection}{\numberline {4.2.3}短语抽取}{142}{subsection.4.2.3}
\contentsline {subsection}{\numberline {4.2.3}短语抽取}{144}{subsection.4.2.3}
\defcounter {refsection}{0}\relax
\contentsline {subsubsection}{与词对齐一致的短语}{143}{section*.126}
\contentsline {subsubsection}{与词对齐一致的短语}{145}{section*.126}
\defcounter {refsection}{0}\relax
\contentsline {subsubsection}{获取词对齐}{144}{section*.130}
\contentsline {subsubsection}{获取词对齐}{146}{section*.130}
\defcounter {refsection}{0}\relax
\contentsline {subsubsection}{度量双语短语质量}{145}{section*.132}
\contentsline {subsubsection}{度量双语短语质量}{147}{section*.132}
\defcounter {refsection}{0}\relax
\contentsline {subsection}{\numberline {4.2.4}调序}{146}{subsection.4.2.4}
\contentsline {subsection}{\numberline {4.2.4}调序}{148}{subsection.4.2.4}
\defcounter {refsection}{0}\relax
\contentsline {subsubsection}{基于距离的调序}{146}{section*.136}
\contentsline {subsubsection}{基于距离的调序}{148}{section*.136}
\defcounter {refsection}{0}\relax
\contentsline {subsubsection}{基于方向的调序}{147}{section*.138}
\contentsline {subsubsection}{基于方向的调序}{149}{section*.138}
\defcounter {refsection}{0}\relax
\contentsline {subsubsection}{基于分类的调序}{149}{section*.141}
\contentsline {subsubsection}{基于分类的调序}{151}{section*.141}
\defcounter {refsection}{0}\relax
\contentsline {subsection}{\numberline {4.2.5}特征}{149}{subsection.4.2.5}
\contentsline {subsection}{\numberline {4.2.5}特征}{151}{subsection.4.2.5}
\defcounter {refsection}{0}\relax
\contentsline {subsection}{\numberline {4.2.6}最小错误率训练}{150}{subsection.4.2.6}
\contentsline {subsection}{\numberline {4.2.6}最小错误率训练}{152}{subsection.4.2.6}
\defcounter {refsection}{0}\relax
\contentsline {subsection}{\numberline {4.2.7}栈解码}{153}{subsection.4.2.7}
\contentsline {subsection}{\numberline {4.2.7}栈解码}{155}{subsection.4.2.7}
\defcounter {refsection}{0}\relax
\contentsline {subsubsection}{翻译候选匹配}{154}{section*.146}
\contentsline {subsubsection}{翻译候选匹配}{156}{section*.146}
\defcounter {refsection}{0}\relax
\contentsline {subsubsection}{翻译假设扩展}{154}{section*.148}
\contentsline {subsubsection}{翻译假设扩展}{156}{section*.148}
\defcounter {refsection}{0}\relax
\contentsline {subsubsection}{剪枝}{155}{section*.150}
\contentsline {subsubsection}{剪枝}{157}{section*.150}
\defcounter {refsection}{0}\relax
\contentsline {subsubsection}{解码中的栈结构}{157}{section*.152}
\contentsline {subsubsection}{解码中的栈结构}{159}{section*.152}
\defcounter {refsection}{0}\relax
\contentsline {section}{\numberline {4.3}基于层次短语的模型}{158}{section.4.3}
\contentsline {section}{\numberline {4.3}基于层次短语的模型}{160}{section.4.3}
\defcounter {refsection}{0}\relax
\contentsline {subsection}{\numberline {4.3.1}同步上下文无关文法}{161}{subsection.4.3.1}
\contentsline {subsection}{\numberline {4.3.1}同步上下文无关文法}{163}{subsection.4.3.1}
\defcounter {refsection}{0}\relax
\contentsline {subsubsection}{文法定义}{161}{section*.157}
\contentsline {subsubsection}{文法定义}{163}{section*.157}
\defcounter {refsection}{0}\relax
\contentsline {subsubsection}{推导}{162}{section*.158}
\contentsline {subsubsection}{推导}{164}{section*.158}
\defcounter {refsection}{0}\relax
\contentsline {subsubsection}{胶水规则}{163}{section*.159}
\contentsline {subsubsection}{胶水规则}{165}{section*.159}
\defcounter {refsection}{0}\relax
\contentsline {subsubsection}{处理流程}{164}{section*.160}
\contentsline {subsubsection}{处理流程}{166}{section*.160}
\defcounter {refsection}{0}\relax
\contentsline {subsection}{\numberline {4.3.2}层次短语规则抽取}{164}{subsection.4.3.2}
\contentsline {subsection}{\numberline {4.3.2}层次短语规则抽取}{166}{subsection.4.3.2}
\defcounter {refsection}{0}\relax
\contentsline {subsection}{\numberline {4.3.3}翻译模型及特征}{166}{subsection.4.3.3}
\contentsline {subsection}{\numberline {4.3.3}翻译模型及特征}{168}{subsection.4.3.3}
\defcounter {refsection}{0}\relax
\contentsline {subsection}{\numberline {4.3.4}CYK解码}{167}{subsection.4.3.4}
\contentsline {subsection}{\numberline {4.3.4}CYK解码}{169}{subsection.4.3.4}
\defcounter {refsection}{0}\relax
\contentsline {subsection}{\numberline {4.3.5}立方剪枝}{170}{subsection.4.3.5}
\contentsline {subsection}{\numberline {4.3.5}立方剪枝}{172}{subsection.4.3.5}
\defcounter {refsection}{0}\relax
\contentsline {section}{\numberline {4.4}基于语言学句法的模型}{173}{section.4.4}
\contentsline {section}{\numberline {4.4}基于语言学句法的模型}{175}{section.4.4}
\defcounter {refsection}{0}\relax
\contentsline {subsection}{\numberline {4.4.1}基于句法的翻译模型分类}{175}{subsection.4.4.1}
\contentsline {subsection}{\numberline {4.4.1}基于句法的翻译模型分类}{177}{subsection.4.4.1}
\defcounter {refsection}{0}\relax
\contentsline {subsection}{\numberline {4.4.2}基于树结构的文法}{175}{subsection.4.4.2}
\contentsline {subsection}{\numberline {4.4.2}基于树结构的文法}{177}{subsection.4.4.2}
\defcounter {refsection}{0}\relax
\contentsline {subsubsection}{树到树翻译规则}{177}{section*.176}
\contentsline {subsubsection}{树到树翻译规则}{179}{section*.176}
\defcounter {refsection}{0}\relax
\contentsline {subsubsection}{基于树结构的翻译推导}{179}{section*.178}
\contentsline {subsubsection}{基于树结构的翻译推导}{181}{section*.178}
\defcounter {refsection}{0}\relax
\contentsline {subsubsection}{树到串翻译规则}{181}{section*.181}
\contentsline {subsubsection}{树到串翻译规则}{183}{section*.181}
\defcounter {refsection}{0}\relax
\contentsline {subsection}{\numberline {4.4.3}树到串翻译规则抽取}{182}{subsection.4.4.3}
\contentsline {subsection}{\numberline {4.4.3}树到串翻译规则抽取}{184}{subsection.4.4.3}
\defcounter {refsection}{0}\relax
\contentsline {subsubsection}{树的切割与最小规则}{183}{section*.183}
\contentsline {subsubsection}{树的切割与最小规则}{185}{section*.183}
\defcounter {refsection}{0}\relax
\contentsline {subsubsection}{空对齐处理}{186}{section*.189}
\contentsline {subsubsection}{空对齐处理}{188}{section*.189}
\defcounter {refsection}{0}\relax
\contentsline {subsubsection}{组合规则}{187}{section*.191}
\contentsline {subsubsection}{组合规则}{189}{section*.191}
\defcounter {refsection}{0}\relax
\contentsline {subsubsection}{SPMT规则}{188}{section*.193}
\contentsline {subsubsection}{SPMT规则}{190}{section*.193}
\defcounter {refsection}{0}\relax
\contentsline {subsubsection}{句法树二叉化}{189}{section*.195}
\contentsline {subsubsection}{句法树二叉化}{191}{section*.195}
\defcounter {refsection}{0}\relax
\contentsline {subsection}{\numberline {4.4.4}树到树翻译规则抽取}{190}{subsection.4.4.4}
\contentsline {subsection}{\numberline {4.4.4}树到树翻译规则抽取}{192}{subsection.4.4.4}
\defcounter {refsection}{0}\relax
\contentsline {subsubsection}{基于节点对齐的规则抽取}{191}{section*.199}
\contentsline {subsubsection}{基于节点对齐的规则抽取}{193}{section*.199}
\defcounter {refsection}{0}\relax
\contentsline {subsubsection}{基于对齐矩阵的规则抽取}{192}{section*.202}
\contentsline {subsubsection}{基于对齐矩阵的规则抽取}{194}{section*.202}
\defcounter {refsection}{0}\relax
\contentsline {subsection}{\numberline {4.4.5}句法翻译模型的特征}{194}{subsection.4.4.5}
\contentsline {subsection}{\numberline {4.4.5}句法翻译模型的特征}{196}{subsection.4.4.5}
\defcounter {refsection}{0}\relax
\contentsline {subsection}{\numberline {4.4.6}基于超图的推导空间表示}{195}{subsection.4.4.6}
\contentsline {subsection}{\numberline {4.4.6}基于超图的推导空间表示}{197}{subsection.4.4.6}
\defcounter {refsection}{0}\relax
\contentsline {subsection}{\numberline {4.4.7}基于树的解码 vs 基于串的解码}{197}{subsection.4.4.7}
\contentsline {subsection}{\numberline {4.4.7}基于树的解码 vs 基于串的解码}{199}{subsection.4.4.7}
\defcounter {refsection}{0}\relax
\contentsline {subsubsection}{基于树的解码}{199}{section*.209}
\contentsline {subsubsection}{基于树的解码}{201}{section*.209}
\defcounter {refsection}{0}\relax
\contentsline {subsubsection}{基于串的解码}{200}{section*.212}
\contentsline {subsubsection}{基于串的解码}{202}{section*.212}
\defcounter {refsection}{0}\relax
\contentsline {section}{\numberline {4.5}小结及深入阅读}{202}{section.4.5}
\contentsline {section}{\numberline {4.5}小结及深入阅读}{204}{section.4.5}
\defcounter {refsection}{0}\relax
\contentsline {part}{\@mypartnumtocformat {III}{神经机器翻译}}{205}{part.3}
\contentsline {part}{\@mypartnumtocformat {III}{神经机器翻译}}{207}{part.3}
\ttl@stoptoc {default@2}
\ttl@starttoc {default@3}
\defcounter {refsection}{0}\relax
\contentsline {chapter}{\numberline {5}人工神经网络和神经语言建模}{207}{chapter.5}
\contentsline {chapter}{\numberline {5}人工神经网络和神经语言建模}{209}{chapter.5}
\defcounter {refsection}{0}\relax
\contentsline {section}{\numberline {5.1}深度学习与人工神经网络}{208}{section.5.1}
\contentsline {section}{\numberline {5.1}深度学习与人工神经网络}{210}{section.5.1}
\defcounter {refsection}{0}\relax
\contentsline {subsection}{\numberline {5.1.1}发展简史}{208}{subsection.5.1.1}
\contentsline {subsection}{\numberline {5.1.1}发展简史}{210}{subsection.5.1.1}
\defcounter {refsection}{0}\relax
\contentsline {subsubsection}{早期的人工神经网络和第一次寒冬}{208}{section*.214}
\contentsline {subsubsection}{早期的人工神经网络和第一次寒冬}{210}{section*.214}
\defcounter {refsection}{0}\relax
\contentsline {subsubsection}{神经网络的第二次高潮和第二次寒冬}{209}{section*.215}
\contentsline {subsubsection}{神经网络的第二次高潮和第二次寒冬}{211}{section*.215}
\defcounter {refsection}{0}\relax
\contentsline {subsubsection}{深度学习和神经网络方法的崛起}{210}{section*.216}
\contentsline {subsubsection}{深度学习和神经网络方法的崛起}{212}{section*.216}
\defcounter {refsection}{0}\relax
\contentsline {subsection}{\numberline {5.1.2}为什么需要深度学习}{211}{subsection.5.1.2}
\contentsline {subsection}{\numberline {5.1.2}为什么需要深度学习}{213}{subsection.5.1.2}
\defcounter {refsection}{0}\relax
\contentsline {subsubsection}{端到端学习和表示学习}{211}{section*.218}
\contentsline {subsubsection}{端到端学习和表示学习}{213}{section*.218}
\defcounter {refsection}{0}\relax
\contentsline {subsubsection}{深度学习的效果}{212}{section*.220}
\contentsline {subsubsection}{深度学习的效果}{214}{section*.220}
\defcounter {refsection}{0}\relax
\contentsline {section}{\numberline {5.2}神经网络基础}{212}{section.5.2}
\contentsline {section}{\numberline {5.2}神经网络基础}{214}{section.5.2}
\defcounter {refsection}{0}\relax
\contentsline {subsection}{\numberline {5.2.1}线性代数基础}{212}{subsection.5.2.1}
\contentsline {subsection}{\numberline {5.2.1}线性代数基础}{214}{subsection.5.2.1}
\defcounter {refsection}{0}\relax
\contentsline {subsubsection}{标量、向量和矩阵}{213}{section*.222}
\contentsline {subsubsection}{标量、向量和矩阵}{215}{section*.222}
\defcounter {refsection}{0}\relax
\contentsline {subsubsection}{矩阵的转置}{214}{section*.223}
\contentsline {subsubsection}{矩阵的转置}{216}{section*.223}
\defcounter {refsection}{0}\relax
\contentsline {subsubsection}{矩阵加法和数乘}{214}{section*.224}
\contentsline {subsubsection}{矩阵加法和数乘}{216}{section*.224}
\defcounter {refsection}{0}\relax
\contentsline {subsubsection}{矩阵乘法和矩阵点乘}{215}{section*.225}
\contentsline {subsubsection}{矩阵乘法和矩阵点乘}{217}{section*.225}
\defcounter {refsection}{0}\relax
\contentsline {subsubsection}{线性映射}{216}{section*.226}
\contentsline {subsubsection}{线性映射}{218}{section*.226}
\defcounter {refsection}{0}\relax
\contentsline {subsubsection}{范数}{217}{section*.227}
\contentsline {subsubsection}{范数}{219}{section*.227}
\defcounter {refsection}{0}\relax
\contentsline {subsection}{\numberline {5.2.2}人工神经元和感知机}{218}{subsection.5.2.2}
\contentsline {subsection}{\numberline {5.2.2}人工神经元和感知机}{220}{subsection.5.2.2}
\defcounter {refsection}{0}\relax
\contentsline {subsubsection}{感知机\ \raisebox {0.5mm}{------}\ 最简单的人工神经元模型}{219}{section*.230}
\contentsline {subsubsection}{感知机\ \raisebox {0.5mm}{------}\ 最简单的人工神经元模型}{221}{section*.230}
\defcounter {refsection}{0}\relax
\contentsline {subsubsection}{神经元内部权重}{220}{section*.233}
\contentsline {subsubsection}{神经元内部权重}{222}{section*.233}
\defcounter {refsection}{0}\relax
\contentsline {subsubsection}{神经元的输入\ \raisebox {0.5mm}{------}\ 离散 vs 连续}{221}{section*.235}
\contentsline {subsubsection}{神经元的输入\ \raisebox {0.5mm}{------}\ 离散 vs 连续}{223}{section*.235}
\defcounter {refsection}{0}\relax
\contentsline {subsubsection}{神经元内部的参数学习}{221}{section*.237}
\contentsline {subsubsection}{神经元内部的参数学习}{223}{section*.237}
\defcounter {refsection}{0}\relax
\contentsline {subsection}{\numberline {5.2.3}多层神经网络}{222}{subsection.5.2.3}
\contentsline {subsection}{\numberline {5.2.3}多层神经网络}{224}{subsection.5.2.3}
\defcounter {refsection}{0}\relax
\contentsline {subsubsection}{线性变换和激活函数}{222}{section*.239}
\contentsline {subsubsection}{线性变换和激活函数}{224}{section*.239}
\defcounter {refsection}{0}\relax
\contentsline {subsubsection}{单层神经网络$\rightarrow $多层神经网络}{224}{section*.246}
\contentsline {subsubsection}{单层神经网络$\rightarrow $多层神经网络}{226}{section*.246}
\defcounter {refsection}{0}\relax
\contentsline {subsection}{\numberline {5.2.4}函数拟合能力}{225}{subsection.5.2.4}
\contentsline {subsection}{\numberline {5.2.4}函数拟合能力}{227}{subsection.5.2.4}
\defcounter {refsection}{0}\relax
\contentsline {section}{\numberline {5.3}神经网络的张量实现}{229}{section.5.3}
\contentsline {section}{\numberline {5.3}神经网络的张量实现}{231}{section.5.3}
\defcounter {refsection}{0}\relax
\contentsline {subsection}{\numberline {5.3.1} 张量及其计算}{230}{subsection.5.3.1}
\contentsline {subsection}{\numberline {5.3.1} 张量及其计算}{232}{subsection.5.3.1}
\defcounter {refsection}{0}\relax
\contentsline {subsubsection}{张量}{230}{section*.256}
\contentsline {subsubsection}{张量}{232}{section*.256}
\defcounter {refsection}{0}\relax
\contentsline {subsubsection}{张量的矩阵乘法}{232}{section*.259}
\contentsline {subsubsection}{张量的矩阵乘法}{234}{section*.259}
\defcounter {refsection}{0}\relax
\contentsline {subsubsection}{张量的单元操作}{233}{section*.261}
\contentsline {subsubsection}{张量的单元操作}{235}{section*.261}
\defcounter {refsection}{0}\relax
\contentsline {subsection}{\numberline {5.3.2}张量的物理存储形式}{234}{subsection.5.3.2}
\contentsline {subsection}{\numberline {5.3.2}张量的物理存储形式}{236}{subsection.5.3.2}
\defcounter {refsection}{0}\relax
\contentsline {subsection}{\numberline {5.3.3}使用开源框架实现张量计算}{234}{subsection.5.3.3}
\contentsline {subsection}{\numberline {5.3.3}使用开源框架实现张量计算}{236}{subsection.5.3.3}
\defcounter {refsection}{0}\relax
\contentsline {subsection}{\numberline {5.3.4}前向传播与计算图}{236}{subsection.5.3.4}
\contentsline {subsection}{\numberline {5.3.4}前向传播与计算图}{238}{subsection.5.3.4}
\defcounter {refsection}{0}\relax
\contentsline {subsection}{\numberline {5.3.5}神经网络实例}{239}{subsection.5.3.5}
\contentsline {subsection}{\numberline {5.3.5}神经网络实例}{241}{subsection.5.3.5}
\defcounter {refsection}{0}\relax
\contentsline {section}{\numberline {5.4}神经网络的参数训练}{240}{section.5.4}
\contentsline {section}{\numberline {5.4}神经网络的参数训练}{242}{section.5.4}
\defcounter {refsection}{0}\relax
\contentsline {subsection}{\numberline {5.4.1}损失函数}{241}{subsection.5.4.1}
\contentsline {subsection}{\numberline {5.4.1}损失函数}{243}{subsection.5.4.1}
\defcounter {refsection}{0}\relax
\contentsline {subsection}{\numberline {5.4.2}基于梯度的参数优化}{241}{subsection.5.4.2}
\contentsline {subsection}{\numberline {5.4.2}基于梯度的参数优化}{243}{subsection.5.4.2}
\defcounter {refsection}{0}\relax
\contentsline {subsubsection}{梯度下降}{242}{section*.279}
\contentsline {subsubsection}{梯度下降}{244}{section*.279}
\defcounter {refsection}{0}\relax
\contentsline {subsubsection}{梯度获取}{244}{section*.281}
\contentsline {subsubsection}{梯度获取}{246}{section*.281}
\defcounter {refsection}{0}\relax
\contentsline {subsubsection}{基于梯度的方法的变种和改进}{247}{section*.285}
\contentsline {subsubsection}{基于梯度的方法的变种和改进}{249}{section*.285}
\defcounter {refsection}{0}\relax
\contentsline {subsection}{\numberline {5.4.3}参数更新的并行化策略}{250}{subsection.5.4.3}
\contentsline {subsection}{\numberline {5.4.3}参数更新的并行化策略}{252}{subsection.5.4.3}
\defcounter {refsection}{0}\relax
\contentsline {subsection}{\numberline {5.4.4}梯度消失、梯度爆炸和稳定性训练}{252}{subsection.5.4.4}
\contentsline {subsection}{\numberline {5.4.4}梯度消失、梯度爆炸和稳定性训练}{254}{subsection.5.4.4}
\defcounter {refsection}{0}\relax
\contentsline {subsubsection}{易于优化的激活函数}{252}{section*.288}
\contentsline {subsubsection}{易于优化的激活函数}{254}{section*.288}
\defcounter {refsection}{0}\relax
\contentsline {subsubsection}{梯度裁剪}{253}{section*.292}
\contentsline {subsubsection}{梯度裁剪}{255}{section*.292}
\defcounter {refsection}{0}\relax
\contentsline {subsubsection}{稳定性训练}{254}{section*.293}
\contentsline {subsubsection}{稳定性训练}{256}{section*.293}
\defcounter {refsection}{0}\relax
\contentsline {subsection}{\numberline {5.4.5}过拟合}{255}{subsection.5.4.5}
\contentsline {subsection}{\numberline {5.4.5}过拟合}{257}{subsection.5.4.5}
\defcounter {refsection}{0}\relax
\contentsline {subsection}{\numberline {5.4.6}反向传播}{256}{subsection.5.4.6}
\contentsline {subsection}{\numberline {5.4.6}反向传播}{258}{subsection.5.4.6}
\defcounter {refsection}{0}\relax
\contentsline {subsubsection}{输出层的反向传播}{257}{section*.296}
\contentsline {subsubsection}{输出层的反向传播}{259}{section*.296}
\defcounter {refsection}{0}\relax
\contentsline {subsubsection}{隐藏层的反向传播}{259}{section*.300}
\contentsline {subsubsection}{隐藏层的反向传播}{261}{section*.300}
\defcounter {refsection}{0}\relax
\contentsline {subsubsection}{程序实现}{260}{section*.303}
\contentsline {subsubsection}{程序实现}{262}{section*.303}
\defcounter {refsection}{0}\relax
\contentsline {section}{\numberline {5.5}神经语言模型}{262}{section.5.5}
\contentsline {section}{\numberline {5.5}神经语言模型}{264}{section.5.5}
\defcounter {refsection}{0}\relax
\contentsline {subsection}{\numberline {5.5.1}基于神经网络的语言建模}{262}{subsection.5.5.1}
\contentsline {subsection}{\numberline {5.5.1}基于神经网络的语言建模}{264}{subsection.5.5.1}
\defcounter {refsection}{0}\relax
\contentsline {subsubsection}{基于前馈神经网络的语言模型}{263}{section*.306}
\contentsline {subsubsection}{基于前馈神经网络的语言模型}{265}{section*.306}
\defcounter {refsection}{0}\relax
\contentsline {subsubsection}{基于循环神经网络的语言模型}{265}{section*.309}
\contentsline {subsubsection}{基于循环神经网络的语言模型}{267}{section*.309}
\defcounter {refsection}{0}\relax
\contentsline {subsubsection}{基于自注意力机制的语言模型}{266}{section*.311}
\contentsline {subsubsection}{基于自注意力机制的语言模型}{268}{section*.311}
\defcounter {refsection}{0}\relax
\contentsline {subsubsection}{语言模型的评价}{267}{section*.313}
\contentsline {subsubsection}{语言模型的评价}{269}{section*.313}
\defcounter {refsection}{0}\relax
\contentsline {subsection}{\numberline {5.5.2}单词表示模型}{268}{subsection.5.5.2}
\contentsline {subsection}{\numberline {5.5.2}单词表示模型}{270}{subsection.5.5.2}
\defcounter {refsection}{0}\relax
\contentsline {subsubsection}{One-hot编码}{268}{section*.314}
\contentsline {subsubsection}{One-hot编码}{270}{section*.314}
\defcounter {refsection}{0}\relax
\contentsline {subsubsection}{分布式表示}{268}{section*.316}
\contentsline {subsubsection}{分布式表示}{270}{section*.316}
\defcounter {refsection}{0}\relax
\contentsline {subsection}{\numberline {5.5.3}句子表示模型及预训练}{270}{subsection.5.5.3}
\contentsline {subsection}{\numberline {5.5.3}句子表示模型及预训练}{272}{subsection.5.5.3}
\defcounter {refsection}{0}\relax
\contentsline {subsubsection}{简单的上下文表示模型}{270}{section*.320}
\contentsline {subsubsection}{简单的上下文表示模型}{272}{section*.320}
\defcounter {refsection}{0}\relax
\contentsline {subsubsection}{ELMO模型}{272}{section*.323}
\contentsline {subsubsection}{ELMO模型}{274}{section*.323}
\defcounter {refsection}{0}\relax
\contentsline {subsubsection}{GPT模型}{272}{section*.325}
\contentsline {subsubsection}{GPT模型}{274}{section*.325}
\defcounter {refsection}{0}\relax
\contentsline {subsubsection}{BERT模型}{273}{section*.327}
\contentsline {subsubsection}{BERT模型}{275}{section*.327}
\defcounter {refsection}{0}\relax
\contentsline {subsubsection}{为什么要预训练?}{274}{section*.329}
\contentsline {subsubsection}{为什么要预训练?}{276}{section*.329}
\defcounter {refsection}{0}\relax
\contentsline {section}{\numberline {5.6}小结及深入阅读}{275}{section.5.6}
\contentsline {section}{\numberline {5.6}小结及深入阅读}{277}{section.5.6}
\defcounter {refsection}{0}\relax
\contentsline {chapter}{\numberline {6}神经机器翻译模型}{277}{chapter.6}
\contentsline {chapter}{\numberline {6}神经机器翻译模型}{279}{chapter.6}
\defcounter {refsection}{0}\relax
\contentsline {section}{\numberline {6.1}神经机器翻译的发展简史}{277}{section.6.1}
\contentsline {section}{\numberline {6.1}神经机器翻译的发展简史}{279}{section.6.1}
\defcounter {refsection}{0}\relax
\contentsline {subsection}{\numberline {6.1.1}神经机器翻译的起源}{279}{subsection.6.1.1}
\contentsline {subsection}{\numberline {6.1.1}神经机器翻译的起源}{281}{subsection.6.1.1}
\defcounter {refsection}{0}\relax
\contentsline {subsection}{\numberline {6.1.2}神经机器翻译的品质 }{281}{subsection.6.1.2}
\contentsline {subsection}{\numberline {6.1.2}神经机器翻译的品质 }{283}{subsection.6.1.2}
\defcounter {refsection}{0}\relax
\contentsline {subsection}{\numberline {6.1.3}神经机器翻译的优势 }{284}{subsection.6.1.3}
\contentsline {subsection}{\numberline {6.1.3}神经机器翻译的优势 }{286}{subsection.6.1.3}
\defcounter {refsection}{0}\relax
\contentsline {section}{\numberline {6.2}编码器-解码器框架}{286}{section.6.2}
\contentsline {section}{\numberline {6.2}编码器-解码器框架}{288}{section.6.2}
\defcounter {refsection}{0}\relax
\contentsline {subsection}{\numberline {6.2.1}框架结构}{286}{subsection.6.2.1}
\contentsline {subsection}{\numberline {6.2.1}框架结构}{288}{subsection.6.2.1}
\defcounter {refsection}{0}\relax
\contentsline {subsection}{\numberline {6.2.2}表示学习}{287}{subsection.6.2.2}
\contentsline {subsection}{\numberline {6.2.2}表示学习}{289}{subsection.6.2.2}
\defcounter {refsection}{0}\relax
\contentsline {subsection}{\numberline {6.2.3}简单的运行实例}{288}{subsection.6.2.3}
\contentsline {subsection}{\numberline {6.2.3}简单的运行实例}{290}{subsection.6.2.3}
\defcounter {refsection}{0}\relax
\contentsline {subsection}{\numberline {6.2.4}机器翻译范式的对比}{289}{subsection.6.2.4}
\contentsline {subsection}{\numberline {6.2.4}机器翻译范式的对比}{291}{subsection.6.2.4}
\defcounter {refsection}{0}\relax
\contentsline {section}{\numberline {6.3}基于循环神经网络的翻译模型及注意力机制}{290}{section.6.3}
\contentsline {section}{\numberline {6.3}基于循环神经网络的翻译模型及注意力机制}{292}{section.6.3}
\defcounter {refsection}{0}\relax
\contentsline {subsection}{\numberline {6.3.1}建模}{290}{subsection.6.3.1}
\contentsline {subsection}{\numberline {6.3.1}建模}{292}{subsection.6.3.1}
\defcounter {refsection}{0}\relax
\contentsline {subsection}{\numberline {6.3.2}输入(词嵌入)及输出(Softmax)}{294}{subsection.6.3.2}
\contentsline {subsection}{\numberline {6.3.2}输入(词嵌入)及输出(Softmax)}{296}{subsection.6.3.2}
\defcounter {refsection}{0}\relax
\contentsline {subsection}{\numberline {6.3.3}循环神经网络结构}{298}{subsection.6.3.3}
\contentsline {subsection}{\numberline {6.3.3}循环神经网络结构}{300}{subsection.6.3.3}
\defcounter {refsection}{0}\relax
\contentsline {subsubsection}{循环神经单元(RNN)}{298}{section*.351}
\contentsline {subsubsection}{循环神经单元(RNN)}{300}{section*.351}
\defcounter {refsection}{0}\relax
\contentsline {subsubsection}{长短时记忆网络(LSTM)}{298}{section*.352}
\contentsline {subsubsection}{长短时记忆网络(LSTM)}{300}{section*.352}
\defcounter {refsection}{0}\relax
\contentsline {subsubsection}{门控循环单元(GRU)}{300}{section*.355}
\contentsline {subsubsection}{门控循环单元(GRU)}{302}{section*.355}
\defcounter {refsection}{0}\relax
\contentsline {subsubsection}{双向模型}{302}{section*.357}
\contentsline {subsubsection}{双向模型}{304}{section*.357}
\defcounter {refsection}{0}\relax
\contentsline {subsubsection}{多层循环神经网络}{302}{section*.359}
\contentsline {subsubsection}{多层循环神经网络}{304}{section*.359}
\defcounter {refsection}{0}\relax
\contentsline {subsection}{\numberline {6.3.4}注意力机制}{303}{subsection.6.3.4}
\contentsline {subsection}{\numberline {6.3.4}注意力机制}{305}{subsection.6.3.4}
\defcounter {refsection}{0}\relax
\contentsline {subsubsection}{翻译中的注意力机制}{304}{section*.362}
\contentsline {subsubsection}{翻译中的注意力机制}{306}{section*.362}
\defcounter {refsection}{0}\relax
\contentsline {subsubsection}{上下文向量的计算}{305}{section*.365}
\contentsline {subsubsection}{上下文向量的计算}{307}{section*.365}
\defcounter {refsection}{0}\relax
\contentsline {subsubsection}{注意力机制的解读}{308}{section*.370}
\contentsline {subsubsection}{注意力机制的解读}{310}{section*.370}
\defcounter {refsection}{0}\relax
\contentsline {subsection}{\numberline {6.3.5}训练}{310}{subsection.6.3.5}
\contentsline {subsection}{\numberline {6.3.5}训练}{312}{subsection.6.3.5}
\defcounter {refsection}{0}\relax
\contentsline {subsubsection}{损失函数}{310}{section*.373}
\contentsline {subsubsection}{损失函数}{312}{section*.373}
\defcounter {refsection}{0}\relax
\contentsline {subsubsection}{长参数初始化}{311}{section*.374}
\contentsline {subsubsection}{长参数初始化}{313}{section*.374}
\defcounter {refsection}{0}\relax
\contentsline {subsubsection}{优化策略}{312}{section*.375}
\contentsline {subsubsection}{优化策略}{314}{section*.375}
\defcounter {refsection}{0}\relax
\contentsline {subsubsection}{梯度裁剪}{312}{section*.377}
\contentsline {subsubsection}{梯度裁剪}{314}{section*.377}
\defcounter {refsection}{0}\relax
\contentsline {subsubsection}{学习率策略}{312}{section*.378}
\contentsline {subsubsection}{学习率策略}{314}{section*.378}
\defcounter {refsection}{0}\relax
\contentsline {subsubsection}{并行训练}{314}{section*.381}
\contentsline {subsubsection}{并行训练}{316}{section*.381}
\defcounter {refsection}{0}\relax
\contentsline {subsection}{\numberline {6.3.6}推断}{315}{subsection.6.3.6}
\contentsline {subsection}{\numberline {6.3.6}推断}{317}{subsection.6.3.6}
\defcounter {refsection}{0}\relax
\contentsline {subsubsection}{贪婪搜索}{317}{section*.385}
\contentsline {subsubsection}{贪婪搜索}{319}{section*.385}
\defcounter {refsection}{0}\relax
\contentsline {subsubsection}{束搜索}{318}{section*.388}
\contentsline {subsubsection}{束搜索}{320}{section*.388}
\defcounter {refsection}{0}\relax
\contentsline {subsubsection}{长度惩罚}{319}{section*.390}
\contentsline {subsubsection}{长度惩罚}{321}{section*.390}
\defcounter {refsection}{0}\relax
\contentsline {subsection}{\numberline {6.3.7}实例-GNMT}{320}{subsection.6.3.7}
\contentsline {subsection}{\numberline {6.3.7}实例-GNMT}{322}{subsection.6.3.7}
\defcounter {refsection}{0}\relax
\contentsline {section}{\numberline {6.4}Transformer}{321}{section.6.4}
\contentsline {section}{\numberline {6.4}Transformer}{323}{section.6.4}
\defcounter {refsection}{0}\relax
\contentsline {subsection}{\numberline {6.4.1}自注意力模型}{323}{subsection.6.4.1}
\contentsline {subsection}{\numberline {6.4.1}自注意力模型}{325}{subsection.6.4.1}
\defcounter {refsection}{0}\relax
\contentsline {subsection}{\numberline {6.4.2}Transformer架构}{324}{subsection.6.4.2}
\contentsline {subsection}{\numberline {6.4.2}Transformer架构}{326}{subsection.6.4.2}
\defcounter {refsection}{0}\relax
\contentsline {subsection}{\numberline {6.4.3}位置编码}{326}{subsection.6.4.3}
\contentsline {subsection}{\numberline {6.4.3}位置编码}{328}{subsection.6.4.3}
\defcounter {refsection}{0}\relax
\contentsline {subsection}{\numberline {6.4.4}基于点乘的注意力机制}{329}{subsection.6.4.4}
\contentsline {subsection}{\numberline {6.4.4}基于点乘的注意力机制}{331}{subsection.6.4.4}
\defcounter {refsection}{0}\relax
\contentsline {subsection}{\numberline {6.4.5}掩码操作}{331}{subsection.6.4.5}
\contentsline {subsection}{\numberline {6.4.5}掩码操作}{333}{subsection.6.4.5}
\defcounter {refsection}{0}\relax
\contentsline {subsection}{\numberline {6.4.6}多头注意力}{332}{subsection.6.4.6}
\contentsline {subsection}{\numberline {6.4.6}多头注意力}{334}{subsection.6.4.6}
\defcounter {refsection}{0}\relax
\contentsline {subsection}{\numberline {6.4.7}残差网络和层正则化}{333}{subsection.6.4.7}
\contentsline {subsection}{\numberline {6.4.7}残差网络和层正则化}{335}{subsection.6.4.7}
\defcounter {refsection}{0}\relax
\contentsline {subsection}{\numberline {6.4.8}前馈全连接网络子层}{334}{subsection.6.4.8}
\contentsline {subsection}{\numberline {6.4.8}前馈全连接网络子层}{336}{subsection.6.4.8}
\defcounter {refsection}{0}\relax
\contentsline {subsection}{\numberline {6.4.9}训练}{335}{subsection.6.4.9}
\contentsline {subsection}{\numberline {6.4.9}训练}{337}{subsection.6.4.9}
\defcounter {refsection}{0}\relax
\contentsline {subsection}{\numberline {6.4.10}推断}{338}{subsection.6.4.10}
\contentsline {subsection}{\numberline {6.4.10}推断}{340}{subsection.6.4.10}
\defcounter {refsection}{0}\relax
\contentsline {section}{\numberline {6.5}序列到序列问题及应用}{338}{section.6.5}
\contentsline {section}{\numberline {6.5}序列到序列问题及应用}{340}{section.6.5}
\defcounter {refsection}{0}\relax
\contentsline {subsection}{\numberline {6.5.1}自动问答}{339}{subsection.6.5.1}
\contentsline {subsection}{\numberline {6.5.1}自动问答}{341}{subsection.6.5.1}
\defcounter {refsection}{0}\relax
\contentsline {subsection}{\numberline {6.5.2}自动文摘}{339}{subsection.6.5.2}
\contentsline {subsection}{\numberline {6.5.2}自动文摘}{341}{subsection.6.5.2}
\defcounter {refsection}{0}\relax
\contentsline {subsection}{\numberline {6.5.3}文言文翻译}{340}{subsection.6.5.3}
\contentsline {subsection}{\numberline {6.5.3}文言文翻译}{342}{subsection.6.5.3}
\defcounter {refsection}{0}\relax
\contentsline {subsection}{\numberline {6.5.4}对联生成}{340}{subsection.6.5.4}
\contentsline {subsection}{\numberline {6.5.4}对联生成}{342}{subsection.6.5.4}
\defcounter {refsection}{0}\relax
\contentsline {subsection}{\numberline {6.5.5}古诗生成}{341}{subsection.6.5.5}
\contentsline {subsection}{\numberline {6.5.5}古诗生成}{343}{subsection.6.5.5}
\defcounter {refsection}{0}\relax
\contentsline {section}{\numberline {6.6}小结及深入阅读}{342}{section.6.6}
\contentsline {section}{\numberline {6.6}小结及深入阅读}{344}{section.6.6}
\defcounter {refsection}{0}\relax
\contentsline {chapter}{\numberline {7}神经机器翻译实战 \ \raisebox {0.5mm}{------}\ 参加一次比赛}{345}{chapter.7}
\contentsline {chapter}{\numberline {7}神经机器翻译实战 \ \raisebox {0.5mm}{------}\ 参加一次比赛}{347}{chapter.7}
\defcounter {refsection}{0}\relax
\contentsline {section}{\numberline {7.1}神经机器翻译并不简单}{345}{section.7.1}
\contentsline {section}{\numberline {7.1}神经机器翻译并不简单}{347}{section.7.1}
\defcounter {refsection}{0}\relax
\contentsline {subsection}{\numberline {7.1.1}影响神经机器翻译性能的因素}{346}{subsection.7.1.1}
\contentsline {subsection}{\numberline {7.1.1}影响神经机器翻译性能的因素}{348}{subsection.7.1.1}
\defcounter {refsection}{0}\relax
\contentsline {subsection}{\numberline {7.1.2}搭建神经机器翻译系统的步骤 }{347}{subsection.7.1.2}
\contentsline {subsection}{\numberline {7.1.2}搭建神经机器翻译系统的步骤 }{349}{subsection.7.1.2}
\defcounter {refsection}{0}\relax
\contentsline {subsection}{\numberline {7.1.3}架构选择 }{348}{subsection.7.1.3}
\contentsline {subsection}{\numberline {7.1.3}架构选择 }{350}{subsection.7.1.3}
\defcounter {refsection}{0}\relax
\contentsline {section}{\numberline {7.2}数据处理}{348}{section.7.2}
\contentsline {section}{\numberline {7.2}数据处理}{350}{section.7.2}
\defcounter {refsection}{0}\relax
\contentsline {subsection}{\numberline {7.2.1}分词}{349}{subsection.7.2.1}
\contentsline {subsection}{\numberline {7.2.1}分词}{351}{subsection.7.2.1}
\defcounter {refsection}{0}\relax
\contentsline {subsection}{\numberline {7.2.2}标准化}{350}{subsection.7.2.2}
\contentsline {subsection}{\numberline {7.2.2}标准化}{352}{subsection.7.2.2}
\defcounter {refsection}{0}\relax
\contentsline {subsection}{\numberline {7.2.3}数据清洗}{351}{subsection.7.2.3}
\contentsline {subsection}{\numberline {7.2.3}数据清洗}{353}{subsection.7.2.3}
\defcounter {refsection}{0}\relax
\contentsline {subsection}{\numberline {7.2.4}子词切分}{353}{subsection.7.2.4}
\contentsline {subsection}{\numberline {7.2.4}子词切分}{355}{subsection.7.2.4}
\defcounter {refsection}{0}\relax
\contentsline {subsubsection}{大词表和OOV问题}{354}{section*.428}
\contentsline {subsubsection}{大词表和OOV问题}{356}{section*.428}
\defcounter {refsection}{0}\relax
\contentsline {subsubsection}{子词}{354}{section*.430}
\contentsline {subsubsection}{子词}{356}{section*.430}
\defcounter {refsection}{0}\relax
\contentsline {subsubsection}{双字节编码(BPE)}{355}{section*.432}
\contentsline {subsubsection}{双字节编码(BPE)}{357}{section*.432}
\defcounter {refsection}{0}\relax
\contentsline {subsubsection}{其他方法}{358}{section*.435}
\contentsline {subsubsection}{其他方法}{360}{section*.435}
\defcounter {refsection}{0}\relax
\contentsline {section}{\numberline {7.3}建模与训练}{358}{section.7.3}
\contentsline {section}{\numberline {7.3}建模与训练}{360}{section.7.3}
\defcounter {refsection}{0}\relax
\contentsline {subsection}{\numberline {7.3.1}正则化}{358}{subsection.7.3.1}
\contentsline {subsection}{\numberline {7.3.1}正则化}{360}{subsection.7.3.1}
\defcounter {refsection}{0}\relax
\contentsline {subsubsection}{L1/L2正则化}{360}{section*.437}
\contentsline {subsubsection}{L1/L2正则化}{362}{section*.437}
\defcounter {refsection}{0}\relax
\contentsline {subsubsection}{标签平滑}{361}{section*.438}
\contentsline {subsubsection}{标签平滑}{363}{section*.438}
\defcounter {refsection}{0}\relax
\contentsline {subsubsection}{Dropout}{361}{section*.440}
\contentsline {subsubsection}{Dropout}{364}{section*.440}
\defcounter {refsection}{0}\relax
\contentsline {subsubsection}{Layer Dropout}{363}{section*.443}
\contentsline {subsubsection}{Layer Dropout}{365}{section*.443}
\defcounter {refsection}{0}\relax
\contentsline {subsection}{\numberline {7.3.2}增大模型容量}{364}{subsection.7.3.2}
\contentsline {subsection}{\numberline {7.3.2}增大模型容量}{366}{subsection.7.3.2}
\defcounter {refsection}{0}\relax
\contentsline {subsubsection}{宽网络}{364}{section*.445}
\contentsline {subsubsection}{宽网络}{366}{section*.445}
\defcounter {refsection}{0}\relax
\contentsline {subsubsection}{深网络}{365}{section*.447}
\contentsline {subsubsection}{深网络}{367}{section*.447}
\defcounter {refsection}{0}\relax
\contentsline {subsubsection}{增大输入层和输出层表示能力}{366}{section*.449}
\contentsline {subsubsection}{增大输入层和输出层表示能力}{369}{section*.449}
\defcounter {refsection}{0}\relax
\contentsline {subsubsection}{大模型的分布式计算}{367}{section*.450}
\contentsline {subsubsection}{大模型的分布式计算}{369}{section*.450}
\defcounter {refsection}{0}\relax
\contentsline {subsection}{\numberline {7.3.3}大批量训练}{367}{subsection.7.3.3}
\contentsline {subsection}{\numberline {7.3.3}大批量训练}{369}{subsection.7.3.3}
\defcounter {refsection}{0}\relax
\contentsline {subsubsection}{为什么需要大批量训练}{367}{section*.451}
\contentsline {subsubsection}{为什么需要大批量训练}{370}{section*.451}
\defcounter {refsection}{0}\relax
\contentsline {subsubsection}{如何构建批次}{369}{section*.454}
\contentsline {subsubsection}{如何构建批次}{371}{section*.454}
\defcounter {refsection}{0}\relax
\contentsline {section}{\numberline {7.4}推断}{370}{section.7.4}
\contentsline {section}{\numberline {7.4}推断}{372}{section.7.4}
\defcounter {refsection}{0}\relax
\contentsline {subsection}{\numberline {7.4.1}推断优化}{370}{subsection.7.4.1}
\contentsline {subsection}{\numberline {7.4.1}推断优化}{372}{subsection.7.4.1}
\defcounter {refsection}{0}\relax
\contentsline {subsubsection}{推断系统的架构}{370}{section*.456}
\contentsline {subsubsection}{推断系统的架构}{372}{section*.456}
\defcounter {refsection}{0}\relax
\contentsline {subsubsection}{自左向右推断 vs 自右向左推断}{371}{section*.458}
\contentsline {subsubsection}{自左向右推断 vs 自右向左推断}{373}{section*.458}
\defcounter {refsection}{0}\relax
\contentsline {subsubsection}{推断加速}{372}{section*.459}
\contentsline {subsubsection}{推断加速}{374}{section*.459}
\defcounter {refsection}{0}\relax
\contentsline {subsection}{\numberline {7.4.2}译文长度控制}{379}{subsection.7.4.2}
\contentsline {subsection}{\numberline {7.4.2}译文长度控制}{381}{subsection.7.4.2}
\defcounter {refsection}{0}\relax
\contentsline {subsubsection}{长度惩罚因子}{379}{section*.465}
\contentsline {subsubsection}{长度惩罚因子}{382}{section*.465}
\defcounter {refsection}{0}\relax
\contentsline {subsubsection}{译文长度范围约束}{380}{section*.467}
\contentsline {subsubsection}{译文长度范围约束}{383}{section*.467}
\defcounter {refsection}{0}\relax
\contentsline {subsubsection}{覆盖度模型}{381}{section*.468}
\contentsline {subsubsection}{覆盖度模型}{383}{section*.468}
\defcounter {refsection}{0}\relax
\contentsline {subsection}{\numberline {7.4.3}多模型集成}{382}{subsection.7.4.3}
\contentsline {subsection}{\numberline {7.4.3}多模型集成}{384}{subsection.7.4.3}
\defcounter {refsection}{0}\relax
\contentsline {subsubsection}{假设选择}{382}{section*.469}
\contentsline {subsubsection}{假设选择}{385}{section*.469}
\defcounter {refsection}{0}\relax
\contentsline {subsubsection}{局部预测融合}{383}{section*.471}
\contentsline {subsubsection}{局部预测融合}{386}{section*.471}
\defcounter {refsection}{0}\relax
\contentsline {subsubsection}{译文重组}{384}{section*.473}
\contentsline {subsubsection}{译文重组}{387}{section*.473}
\defcounter {refsection}{0}\relax
\contentsline {section}{\numberline {7.5}进阶技术}{385}{section.7.5}
\contentsline {section}{\numberline {7.5}进阶技术}{388}{section.7.5}
\defcounter {refsection}{0}\relax
\contentsline {subsection}{\numberline {7.5.1}深层模型}{385}{subsection.7.5.1}
\contentsline {subsection}{\numberline {7.5.1}深层模型}{388}{subsection.7.5.1}
\defcounter {refsection}{0}\relax
\contentsline {subsubsection}{Post-Norm vs Pre-Norm}{386}{section*.476}
\contentsline {subsubsection}{Post-Norm vs Pre-Norm}{388}{section*.476}
\defcounter {refsection}{0}\relax
\contentsline {subsubsection}{层聚合}{388}{section*.479}
\contentsline {subsubsection}{层聚合}{391}{section*.479}
\defcounter {refsection}{0}\relax
\contentsline {subsubsection}{深层模型的训练加速}{389}{section*.481}
\contentsline {subsubsection}{深层模型的训练加速}{392}{section*.481}
\defcounter {refsection}{0}\relax
\contentsline {subsubsection}{渐进式训练}{390}{section*.482}
\contentsline {subsubsection}{渐进式训练}{392}{section*.482}
\defcounter {refsection}{0}\relax
\contentsline {subsubsection}{分组稠密连接}{390}{section*.484}
\contentsline {subsubsection}{分组稠密连接}{392}{section*.484}
\defcounter {refsection}{0}\relax
\contentsline {subsubsection}{学习率重置策略}{391}{section*.486}
\contentsline {subsubsection}{学习率重置策略}{393}{section*.486}
\defcounter {refsection}{0}\relax
\contentsline {subsubsection}{深层模型的鲁棒性训练}{392}{section*.488}
\contentsline {subsubsection}{深层模型的鲁棒性训练}{395}{section*.488}
\defcounter {refsection}{0}\relax
\contentsline {subsection}{\numberline {7.5.2}单语数据的使用}{394}{subsection.7.5.2}
\contentsline {subsection}{\numberline {7.5.2}单语数据的使用}{396}{subsection.7.5.2}
\defcounter {refsection}{0}\relax
\contentsline {subsubsection}{伪数据}{395}{section*.491}
\contentsline {subsubsection}{伪数据}{397}{section*.491}
\defcounter {refsection}{0}\relax
\contentsline {subsubsection}{预训练}{396}{section*.494}
\contentsline {subsubsection}{预训练}{399}{section*.494}
\defcounter {refsection}{0}\relax
\contentsline {subsubsection}{联合训练}{398}{section*.497}
\contentsline {subsubsection}{联合训练}{401}{section*.497}
\defcounter {refsection}{0}\relax
\contentsline {subsection}{\numberline {7.5.3}知识精炼}{399}{subsection.7.5.3}
\contentsline {subsection}{\numberline {7.5.3}知识精炼}{401}{subsection.7.5.3}
\defcounter {refsection}{0}\relax
\contentsline {subsubsection}{什么是知识精炼}{399}{section*.499}
\contentsline {subsubsection}{什么是知识精炼}{402}{section*.499}
\defcounter {refsection}{0}\relax
\contentsline {subsubsection}{知识精炼的基本方法}{401}{section*.500}
\contentsline {subsubsection}{知识精炼的基本方法}{403}{section*.500}
\defcounter {refsection}{0}\relax
\contentsline {subsubsection}{机器翻译中的知识精炼}{402}{section*.502}
\contentsline {subsubsection}{机器翻译中的知识精炼}{404}{section*.502}
\defcounter {refsection}{0}\relax
\contentsline {subsection}{\numberline {7.5.4}双向训练}{403}{subsection.7.5.4}
\contentsline {subsection}{\numberline {7.5.4}双向训练}{406}{subsection.7.5.4}
\defcounter {refsection}{0}\relax
\contentsline {subsubsection}{有监督对偶学习}{404}{section*.504}
\contentsline {subsubsection}{有监督对偶学习}{406}{section*.504}
\defcounter {refsection}{0}\relax
\contentsline {subsubsection}{无监督对偶学习}{405}{section*.505}
\contentsline {subsubsection}{无监督对偶学习}{407}{section*.505}
\defcounter {refsection}{0}\relax
\contentsline {subsubsection}{翻译中回译}{406}{section*.507}
\contentsline {subsubsection}{翻译中回译}{408}{section*.507}
\defcounter {refsection}{0}\relax
\contentsline {section}{\numberline {7.6}小结及深入阅读}{406}{section.7.6}
\contentsline {section}{\numberline {7.6}小结及深入阅读}{408}{section.7.6}
\defcounter {refsection}{0}\relax
\contentsline {part}{\@mypartnumtocformat {IV}{附录}}{413}{part.4}
\ttl@stoptoc {default@3}
\ttl@starttoc {default@4}
\defcounter {refsection}{0}\relax
\contentsline {chapter}{\numberline {A}附录A}{415}{Appendix.1.A}
\defcounter {refsection}{0}\relax
\contentsline {section}{\numberline {A.1}基准数据集}{415}{section.1.A.1}
\defcounter {refsection}{0}\relax
\contentsline {section}{\numberline {A.2}平行语料}{416}{section.1.A.2}
\defcounter {refsection}{0}\relax
\contentsline {section}{\numberline {A.3}相关工具}{417}{section.1.A.3}
\defcounter {refsection}{0}\relax
\contentsline {subsection}{\numberline {A.3.1}数据预处理工具}{417}{subsection.1.A.3.1}
\defcounter {refsection}{0}\relax
\contentsline {subsection}{\numberline {A.3.2}评价工具}{418}{subsection.1.A.3.2}
\defcounter {refsection}{0}\relax
\contentsline {chapter}{\numberline {B}附录B}{419}{Appendix.2.B}
\defcounter {refsection}{0}\relax
\contentsline {section}{\numberline {B.1}IBM模型3训练方法}{419}{section.2.B.1}
\defcounter {refsection}{0}\relax
\contentsline {section}{\numberline {B.2}IBM模型4训练方法}{421}{section.2.B.2}
\defcounter {refsection}{0}\relax
\contentsline {section}{\numberline {B.3}IBM模型5训练方法}{423}{section.2.B.3}
\contentsfinish
......@@ -48,6 +48,7 @@
\geometry{
paper=b5paper, % Paper size, change to letterpaper for US letter size
%papersize={185mm,260mm}, % specify paper size by (width,height)
top=2cm, % Top margin
bottom=1.5cm, % Bottom margin
left=1.8cm, % Left margin
......
Markdown 格式
0%
您添加了 0 到此讨论。请谨慎行事。
请先完成此评论的编辑!
注册 或者 后发表评论