Commit 623389ab by xiaotong

updates of the paper size and figures

parent 4aeb846b
...@@ -6,7 +6,7 @@ ...@@ -6,7 +6,7 @@
\begin{tikzpicture} \begin{tikzpicture}
\begin{scope}[scale=1.0,xshift=0.9in,yshift=-0.87in,level distance=20pt,sibling distance=-1pt,grow'=up] \begin{scope}[scale=1.0,level distance=30pt,sibling distance=15pt,grow'=up]
{ {
\Tree[.\node(sn0){IP}; \Tree[.\node(sn0){IP};
[.\node(sn1){NP}; [.\node(sn1){NP};
......
...@@ -22,10 +22,10 @@ ...@@ -22,10 +22,10 @@
\node [anchor=west,fill=orange!20,draw=red,rounded corners=3pt,minimum height=1.4em,minimum width=1.4em,dashed] (s44) at ([xshift=1.5em]s43.east) {$\times h$}; \node [anchor=west,fill=orange!20,draw=red,rounded corners=3pt,minimum height=1.4em,minimum width=1.4em,dashed] (s44) at ([xshift=1.5em]s43.east) {$\times h$};
\node [anchor=west,fill=blue!20,draw=blue,rounded corners=3pt,minimum height=1.4em,minimum width=1.5em] (s45) at ([xshift=1.5em]s44.east) {}; \node [anchor=west,fill=blue!20,draw=blue,rounded corners=3pt,minimum height=1.4em,minimum width=1.5em] (s45) at ([xshift=1.5em]s44.east) {};
\node [anchor=east] (p1) at ([xshift=-2em]s11.west) {step1}; \node [anchor=east] (p1) at ([xshift=-2em]s11.west) {step 1};
\node [anchor=east] (p2) at ([xshift=-2em]s21.west) {step2}; \node [anchor=east] (p2) at ([xshift=-2em]s21.west) {step 2};
\node [anchor=east] (p3) at ([xshift=-2em]s31.west) {step3}; \node [anchor=east] (p3) at ([xshift=-2em]s31.west) {step 3};
\node [anchor=east] (p4) at ([xshift=-2em]s41.west) {step4}; \node [anchor=east] (p4) at ([xshift=-2em]s41.west) {step 4};
\node [anchor=south,fill=orange!20,draw=orange,rounded corners=3pt,minimum height=1.4em,minimum width=1.4em] (b1) at ([xshift=-0.2em,yshift=2em]p1.north) {}; \node [anchor=south,fill=orange!20,draw=orange,rounded corners=3pt,minimum height=1.4em,minimum width=1.4em] (b1) at ([xshift=-0.2em,yshift=2em]p1.north) {};
\node [anchor=west] (b2) at (b1.east) {:编码器}; \node [anchor=west] (b2) at (b1.east) {:编码器};
......
\indexentry{未登录词|hyperpage}{11} \indexentry{未登录词|hyperpage}{17}
\indexentry{Out of Vocabulary Word,OOV Word|hyperpage}{11} \indexentry{Out of Vocabulary Word,OOV Word|hyperpage}{17}
\indexentry{子词切分|hyperpage}{11} \indexentry{子词切分|hyperpage}{17}
\indexentry{Sub-word Segmentation|hyperpage}{11} \indexentry{Sub-word Segmentation|hyperpage}{17}
\indexentry{标准化|hyperpage}{11} \indexentry{标准化|hyperpage}{17}
\indexentry{Normalization|hyperpage}{11} \indexentry{Normalization|hyperpage}{17}
\indexentry{数据清洗|hyperpage}{11} \indexentry{数据清洗|hyperpage}{17}
\indexentry{Dada Cleaning|hyperpage}{11} \indexentry{Dada Cleaning|hyperpage}{17}
\indexentry{数据选择|hyperpage}{13} \indexentry{数据选择|hyperpage}{19}
\indexentry{Data Selection|hyperpage}{13} \indexentry{Data Selection|hyperpage}{19}
\indexentry{数据过滤|hyperpage}{13} \indexentry{数据过滤|hyperpage}{19}
\indexentry{Data Filtering|hyperpage}{13} \indexentry{Data Filtering|hyperpage}{19}
\indexentry{开放词表|hyperpage}{16} \indexentry{开放词表|hyperpage}{22}
\indexentry{Open-Vocabulary|hyperpage}{16} \indexentry{Open-Vocabulary|hyperpage}{22}
\indexentry{子词|hyperpage}{17} \indexentry{子词|hyperpage}{23}
\indexentry{Sub-word|hyperpage}{17} \indexentry{Sub-word|hyperpage}{23}
\indexentry{字节对编码|hyperpage}{17} \indexentry{字节对编码|hyperpage}{23}
\indexentry{双字节编码|hyperpage}{17} \indexentry{双字节编码|hyperpage}{23}
\indexentry{Byte Pair Encoding,BPE|hyperpage}{17} \indexentry{Byte Pair Encoding,BPE|hyperpage}{23}
\indexentry{正则化|hyperpage}{20} \indexentry{正则化|hyperpage}{26}
\indexentry{Regularization|hyperpage}{20} \indexentry{Regularization|hyperpage}{26}
\indexentry{过拟合问题|hyperpage}{20} \indexentry{过拟合问题|hyperpage}{26}
\indexentry{Overfitting Problem|hyperpage}{20} \indexentry{Overfitting Problem|hyperpage}{26}
\indexentry{反问题|hyperpage}{20} \indexentry{反问题|hyperpage}{26}
\indexentry{Inverse Problem|hyperpage}{20} \indexentry{Inverse Problem|hyperpage}{26}
\indexentry{适定的|hyperpage}{20} \indexentry{适定的|hyperpage}{26}
\indexentry{Well-posed|hyperpage}{20} \indexentry{Well-posed|hyperpage}{26}
\indexentry{不适定问题|hyperpage}{20} \indexentry{不适定问题|hyperpage}{26}
\indexentry{Ill-posed Problem|hyperpage}{20} \indexentry{Ill-posed Problem|hyperpage}{26}
\indexentry{降噪|hyperpage}{21} \indexentry{降噪|hyperpage}{27}
\indexentry{Denoising|hyperpage}{21} \indexentry{Denoising|hyperpage}{27}
\indexentry{泛化|hyperpage}{21} \indexentry{泛化|hyperpage}{27}
\indexentry{Generalization|hyperpage}{21} \indexentry{Generalization|hyperpage}{27}
\indexentry{标签平滑|hyperpage}{23} \indexentry{标签平滑|hyperpage}{29}
\indexentry{Label Smoothing|hyperpage}{23} \indexentry{Label Smoothing|hyperpage}{29}
\indexentry{相互适应|hyperpage}{24} \indexentry{相互适应|hyperpage}{30}
\indexentry{Co-Adaptation|hyperpage}{24} \indexentry{Co-Adaptation|hyperpage}{30}
\indexentry{集成学习|hyperpage}{25} \indexentry{集成学习|hyperpage}{31}
\indexentry{Ensemble Learning|hyperpage}{25} \indexentry{Ensemble Learning|hyperpage}{31}
\indexentry{容量|hyperpage}{26} \indexentry{容量|hyperpage}{32}
\indexentry{Capacity|hyperpage}{26} \indexentry{Capacity|hyperpage}{32}
\indexentry{宽残差网络|hyperpage}{27} \indexentry{宽残差网络|hyperpage}{33}
\indexentry{Wide Residual Network|hyperpage}{27} \indexentry{Wide Residual Network|hyperpage}{33}
\indexentry{探测任务|hyperpage}{28} \indexentry{探测任务|hyperpage}{34}
\indexentry{Probing Task|hyperpage}{28} \indexentry{Probing Task|hyperpage}{34}
\indexentry{表面信息|hyperpage}{28} \indexentry{表面信息|hyperpage}{34}
\indexentry{Surface Information|hyperpage}{28} \indexentry{Surface Information|hyperpage}{34}
\indexentry{语法信息|hyperpage}{28} \indexentry{语法信息|hyperpage}{34}
\indexentry{Syntactic Information|hyperpage}{28} \indexentry{Syntactic Information|hyperpage}{34}
\indexentry{语义信息|hyperpage}{28} \indexentry{语义信息|hyperpage}{34}
\indexentry{Semantic Information|hyperpage}{28} \indexentry{Semantic Information|hyperpage}{34}
\indexentry{词嵌入|hyperpage}{29} \indexentry{词嵌入|hyperpage}{35}
\indexentry{Embedding|hyperpage}{29} \indexentry{Embedding|hyperpage}{35}
\indexentry{数据并行|hyperpage}{29} \indexentry{数据并行|hyperpage}{35}
\indexentry{Data Parallelism|hyperpage}{29} \indexentry{Data Parallelism|hyperpage}{35}
\indexentry{模型并行|hyperpage}{29} \indexentry{模型并行|hyperpage}{35}
\indexentry{Model Parallelism|hyperpage}{29} \indexentry{Model Parallelism|hyperpage}{35}
\indexentry{小批量训练|hyperpage}{29} \indexentry{小批量训练|hyperpage}{35}
\indexentry{Mini-batch Training|hyperpage}{29} \indexentry{Mini-batch Training|hyperpage}{35}
\indexentry{课程学习|hyperpage}{31} \indexentry{课程学习|hyperpage}{37}
\indexentry{Curriculum Learning|hyperpage}{31} \indexentry{Curriculum Learning|hyperpage}{37}
\indexentry{推断|hyperpage}{32} \indexentry{推断|hyperpage}{38}
\indexentry{Inference|hyperpage}{32} \indexentry{Inference|hyperpage}{38}
\indexentry{解码|hyperpage}{32} \indexentry{解码|hyperpage}{38}
\indexentry{Decoding|hyperpage}{32} \indexentry{Decoding|hyperpage}{38}
\indexentry{搜索错误|hyperpage}{32} \indexentry{搜索错误|hyperpage}{38}
\indexentry{Search Error|hyperpage}{32} \indexentry{Search Error|hyperpage}{38}
\indexentry{模型错误|hyperpage}{32} \indexentry{模型错误|hyperpage}{38}
\indexentry{Modeling Error|hyperpage}{32} \indexentry{Modeling Error|hyperpage}{38}
\indexentry{重排序|hyperpage}{34} \indexentry{重排序|hyperpage}{40}
\indexentry{Re-ranking|hyperpage}{34} \indexentry{Re-ranking|hyperpage}{40}
\indexentry{双向推断|hyperpage}{34} \indexentry{双向推断|hyperpage}{40}
\indexentry{Bidirectional Inference|hyperpage}{34} \indexentry{Bidirectional Inference|hyperpage}{40}
\indexentry{批量推断|hyperpage}{38} \indexentry{批量推断|hyperpage}{44}
\indexentry{Batch Inference|hyperpage}{38} \indexentry{Batch Inference|hyperpage}{44}
\indexentry{批量处理|hyperpage}{38} \indexentry{批量处理|hyperpage}{44}
\indexentry{Batching|hyperpage}{38} \indexentry{Batching|hyperpage}{44}
\indexentry{二值网络|hyperpage}{39} \indexentry{二值网络|hyperpage}{45}
\indexentry{Binarized Neural Networks|hyperpage}{39} \indexentry{Binarized Neural Networks|hyperpage}{45}
\indexentry{自回归翻译|hyperpage}{40} \indexentry{自回归翻译|hyperpage}{46}
\indexentry{Autoregressive Translation|hyperpage}{40} \indexentry{Autoregressive Translation|hyperpage}{46}
\indexentry{非自回归翻译|hyperpage}{40} \indexentry{非自回归翻译|hyperpage}{46}
\indexentry{Regressive Translation|hyperpage}{40} \indexentry{Regressive Translation|hyperpage}{46}
\indexentry{繁衍率|hyperpage}{40} \indexentry{繁衍率|hyperpage}{46}
\indexentry{Fertility|hyperpage}{40} \indexentry{Fertility|hyperpage}{46}
\indexentry{偏置|hyperpage}{41} \indexentry{偏置|hyperpage}{47}
\indexentry{Bias|hyperpage}{41} \indexentry{Bias|hyperpage}{47}
\indexentry{退化|hyperpage}{42} \indexentry{退化|hyperpage}{48}
\indexentry{Degenerate|hyperpage}{42} \indexentry{Degenerate|hyperpage}{48}
\indexentry{过翻译|hyperpage}{43} \indexentry{过翻译|hyperpage}{49}
\indexentry{Over Translation|hyperpage}{43} \indexentry{Over Translation|hyperpage}{49}
\indexentry{欠翻译|hyperpage}{43} \indexentry{欠翻译|hyperpage}{49}
\indexentry{Under Translation|hyperpage}{43} \indexentry{Under Translation|hyperpage}{49}
\indexentry{充分性|hyperpage}{44} \indexentry{充分性|hyperpage}{50}
\indexentry{Adequacy|hyperpage}{44} \indexentry{Adequacy|hyperpage}{50}
\indexentry{系统融合|hyperpage}{44} \indexentry{系统融合|hyperpage}{50}
\indexentry{System Combination|hyperpage}{44} \indexentry{System Combination|hyperpage}{50}
\indexentry{假设选择|hyperpage}{45} \indexentry{假设选择|hyperpage}{51}
\indexentry{Hypothesis Selection|hyperpage}{45} \indexentry{Hypothesis Selection|hyperpage}{51}
\indexentry{多样性|hyperpage}{45} \indexentry{多样性|hyperpage}{51}
\indexentry{Diversity|hyperpage}{45} \indexentry{Diversity|hyperpage}{51}
\indexentry{重排序|hyperpage}{46} \indexentry{重排序|hyperpage}{52}
\indexentry{Re-ranking|hyperpage}{46} \indexentry{Re-ranking|hyperpage}{52}
\indexentry{混淆网络|hyperpage}{47} \indexentry{混淆网络|hyperpage}{53}
\indexentry{Confusion Network|hyperpage}{47} \indexentry{Confusion Network|hyperpage}{53}
\indexentry{动态线性层聚合方法|hyperpage}{51} \indexentry{动态线性层聚合方法|hyperpage}{57}
\indexentry{Dynamic Linear Combination of Layers,DLCL|hyperpage}{51} \indexentry{Dynamic Linear Combination of Layers,DLCL|hyperpage}{57}
\indexentry{相互适应|hyperpage}{55} \indexentry{相互适应|hyperpage}{61}
\indexentry{Co-adaptation|hyperpage}{55} \indexentry{Co-adaptation|hyperpage}{61}
\indexentry{数据增强|hyperpage}{57} \indexentry{数据增强|hyperpage}{63}
\indexentry{Data Augmentation|hyperpage}{57} \indexentry{Data Augmentation|hyperpage}{63}
\indexentry{回译|hyperpage}{57} \indexentry{回译|hyperpage}{63}
\indexentry{Back Translation|hyperpage}{57} \indexentry{Back Translation|hyperpage}{63}
\indexentry{迭代式回译|hyperpage}{58} \indexentry{迭代式回译|hyperpage}{64}
\indexentry{Iterative Back Translation|hyperpage}{58} \indexentry{Iterative Back Translation|hyperpage}{64}
\indexentry{前向翻译|hyperpage}{58} \indexentry{前向翻译|hyperpage}{64}
\indexentry{Forward Translation|hyperpage}{58} \indexentry{Forward Translation|hyperpage}{64}
\indexentry{预训练|hyperpage}{59} \indexentry{预训练|hyperpage}{65}
\indexentry{Pre-training|hyperpage}{59} \indexentry{Pre-training|hyperpage}{65}
\indexentry{微调|hyperpage}{59} \indexentry{微调|hyperpage}{65}
\indexentry{Fine-tuning|hyperpage}{59} \indexentry{Fine-tuning|hyperpage}{65}
\indexentry{多任务学习|hyperpage}{61} \indexentry{多任务学习|hyperpage}{67}
\indexentry{Multitask Learning|hyperpage}{61} \indexentry{Multitask Learning|hyperpage}{67}
\indexentry{模型压缩|hyperpage}{62} \indexentry{模型压缩|hyperpage}{68}
\indexentry{Model Compression|hyperpage}{62} \indexentry{Model Compression|hyperpage}{68}
\indexentry{学习难度|hyperpage}{62} \indexentry{学习难度|hyperpage}{68}
\indexentry{Learning Difficulty|hyperpage}{62} \indexentry{Learning Difficulty|hyperpage}{68}
\indexentry{教师模型|hyperpage}{63} \indexentry{教师模型|hyperpage}{69}
\indexentry{Teacher Model|hyperpage}{63} \indexentry{Teacher Model|hyperpage}{69}
\indexentry{学生模型|hyperpage}{63} \indexentry{学生模型|hyperpage}{69}
\indexentry{Student Model|hyperpage}{63} \indexentry{Student Model|hyperpage}{69}
\indexentry{基于单词的知识精炼|hyperpage}{63} \indexentry{基于单词的知识精炼|hyperpage}{69}
\indexentry{Word-level Knowledge Distillation|hyperpage}{63} \indexentry{Word-level Knowledge Distillation|hyperpage}{69}
\indexentry{基于序列的知识精炼|hyperpage}{63} \indexentry{基于序列的知识精炼|hyperpage}{69}
\indexentry{Sequence-level Knowledge Distillation|hyperpage}{63} \indexentry{Sequence-level Knowledge Distillation|hyperpage}{69}
\indexentry{中间层输出|hyperpage}{64} \indexentry{中间层输出|hyperpage}{70}
\indexentry{Hint-based Knowledge Transfer|hyperpage}{64} \indexentry{Hint-based Knowledge Transfer|hyperpage}{70}
\indexentry{注意力分布|hyperpage}{64} \indexentry{注意力分布|hyperpage}{70}
\indexentry{Attention To Attention Transfer|hyperpage}{64} \indexentry{Attention To Attention Transfer|hyperpage}{70}
\indexentry{循环一致性|hyperpage}{67} \indexentry{循环一致性|hyperpage}{73}
\indexentry{Circle Consistency|hyperpage}{67} \indexentry{Circle Consistency|hyperpage}{73}
\indexentry{翻译中回译|hyperpage}{68} \indexentry{翻译中回译|hyperpage}{74}
\indexentry{On-the-fly Back-translation|hyperpage}{68} \indexentry{On-the-fly Back-translation|hyperpage}{74}
\indexentry{网络结构搜索技术|hyperpage}{71} \indexentry{网络结构搜索技术|hyperpage}{77}
\indexentry{Neural Architecture Search;NAS|hyperpage}{71} \indexentry{Neural Architecture Search;NAS|hyperpage}{77}
...@@ -2,690 +2,714 @@ ...@@ -2,690 +2,714 @@
\defcounter {refsection}{0}\relax \defcounter {refsection}{0}\relax
\select@language {english} \select@language {english}
\defcounter {refsection}{0}\relax \defcounter {refsection}{0}\relax
\contentsline {part}{\@mypartnumtocformat {I}{机器翻译基础}}{11}{part.1} \contentsline {part}{\@mypartnumtocformat {I}{机器翻译基础}}{13}{part.1}
\ttl@starttoc {default@1} \ttl@starttoc {default@1}
\defcounter {refsection}{0}\relax \defcounter {refsection}{0}\relax
\contentsline {chapter}{\numberline {1}机器翻译简介}{13}{chapter.1} \contentsline {chapter}{\numberline {1}机器翻译简介}{15}{chapter.1}
\defcounter {refsection}{0}\relax \defcounter {refsection}{0}\relax
\contentsline {section}{\numberline {1.1}机器翻译的概念}{13}{section.1.1} \contentsline {section}{\numberline {1.1}机器翻译的概念}{15}{section.1.1}
\defcounter {refsection}{0}\relax \defcounter {refsection}{0}\relax
\contentsline {section}{\numberline {1.2}机器翻译简史}{16}{section.1.2} \contentsline {section}{\numberline {1.2}机器翻译简史}{18}{section.1.2}
\defcounter {refsection}{0}\relax \defcounter {refsection}{0}\relax
\contentsline {subsection}{\numberline {1.2.1}人工翻译}{16}{subsection.1.2.1} \contentsline {subsection}{\numberline {1.2.1}人工翻译}{18}{subsection.1.2.1}
\defcounter {refsection}{0}\relax \defcounter {refsection}{0}\relax
\contentsline {subsection}{\numberline {1.2.2}机器翻译的萌芽}{17}{subsection.1.2.2} \contentsline {subsection}{\numberline {1.2.2}机器翻译的萌芽}{19}{subsection.1.2.2}
\defcounter {refsection}{0}\relax \defcounter {refsection}{0}\relax
\contentsline {subsection}{\numberline {1.2.3}机器翻译的受挫}{18}{subsection.1.2.3} \contentsline {subsection}{\numberline {1.2.3}机器翻译的受挫}{20}{subsection.1.2.3}
\defcounter {refsection}{0}\relax \defcounter {refsection}{0}\relax
\contentsline {subsection}{\numberline {1.2.4}机器翻译的快速成长}{19}{subsection.1.2.4} \contentsline {subsection}{\numberline {1.2.4}机器翻译的快速成长}{21}{subsection.1.2.4}
\defcounter {refsection}{0}\relax \defcounter {refsection}{0}\relax
\contentsline {subsection}{\numberline {1.2.5}机器翻译的爆发}{20}{subsection.1.2.5} \contentsline {subsection}{\numberline {1.2.5}机器翻译的爆发}{22}{subsection.1.2.5}
\defcounter {refsection}{0}\relax \defcounter {refsection}{0}\relax
\contentsline {section}{\numberline {1.3}机器翻译现状}{21}{section.1.3} \contentsline {section}{\numberline {1.3}机器翻译现状}{23}{section.1.3}
\defcounter {refsection}{0}\relax \defcounter {refsection}{0}\relax
\contentsline {section}{\numberline {1.4}机器翻译方法}{22}{section.1.4} \contentsline {section}{\numberline {1.4}机器翻译方法}{24}{section.1.4}
\defcounter {refsection}{0}\relax \defcounter {refsection}{0}\relax
\contentsline {subsection}{\numberline {1.4.1}基于规则的机器翻译}{24}{subsection.1.4.1} \contentsline {subsection}{\numberline {1.4.1}基于规则的机器翻译}{26}{subsection.1.4.1}
\defcounter {refsection}{0}\relax \defcounter {refsection}{0}\relax
\contentsline {subsection}{\numberline {1.4.2}基于实例的机器翻译}{24}{subsection.1.4.2} \contentsline {subsection}{\numberline {1.4.2}基于实例的机器翻译}{26}{subsection.1.4.2}
\defcounter {refsection}{0}\relax \defcounter {refsection}{0}\relax
\contentsline {subsection}{\numberline {1.4.3}统计机器翻译}{25}{subsection.1.4.3} \contentsline {subsection}{\numberline {1.4.3}统计机器翻译}{27}{subsection.1.4.3}
\defcounter {refsection}{0}\relax \defcounter {refsection}{0}\relax
\contentsline {subsection}{\numberline {1.4.4}神经机器翻译}{26}{subsection.1.4.4} \contentsline {subsection}{\numberline {1.4.4}神经机器翻译}{28}{subsection.1.4.4}
\defcounter {refsection}{0}\relax \defcounter {refsection}{0}\relax
\contentsline {subsection}{\numberline {1.4.5}对比分析}{27}{subsection.1.4.5} \contentsline {subsection}{\numberline {1.4.5}对比分析}{29}{subsection.1.4.5}
\defcounter {refsection}{0}\relax \defcounter {refsection}{0}\relax
\contentsline {section}{\numberline {1.5}翻译质量评价}{28}{section.1.5} \contentsline {section}{\numberline {1.5}翻译质量评价}{30}{section.1.5}
\defcounter {refsection}{0}\relax \defcounter {refsection}{0}\relax
\contentsline {subsection}{\numberline {1.5.1}人工评价}{28}{subsection.1.5.1} \contentsline {subsection}{\numberline {1.5.1}人工评价}{30}{subsection.1.5.1}
\defcounter {refsection}{0}\relax \defcounter {refsection}{0}\relax
\contentsline {subsection}{\numberline {1.5.2}自动评价}{29}{subsection.1.5.2} \contentsline {subsection}{\numberline {1.5.2}自动评价}{31}{subsection.1.5.2}
\defcounter {refsection}{0}\relax \defcounter {refsection}{0}\relax
\contentsline {subsubsection}{BLEU}{29}{section*.15} \contentsline {subsubsection}{BLEU}{31}{section*.15}
\defcounter {refsection}{0}\relax \defcounter {refsection}{0}\relax
\contentsline {subsubsection}{TER}{31}{section*.16} \contentsline {subsubsection}{TER}{33}{section*.16}
\defcounter {refsection}{0}\relax \defcounter {refsection}{0}\relax
\contentsline {subsubsection}{基于检测点的评价}{31}{section*.17} \contentsline {subsubsection}{基于检测点的评价}{33}{section*.17}
\defcounter {refsection}{0}\relax \defcounter {refsection}{0}\relax
\contentsline {section}{\numberline {1.6}机器翻译应用}{32}{section.1.6} \contentsline {section}{\numberline {1.6}机器翻译应用}{34}{section.1.6}
\defcounter {refsection}{0}\relax \defcounter {refsection}{0}\relax
\contentsline {section}{\numberline {1.7}开源项目与评测}{34}{section.1.7} \contentsline {section}{\numberline {1.7}开源项目与评测}{36}{section.1.7}
\defcounter {refsection}{0}\relax \defcounter {refsection}{0}\relax
\contentsline {subsection}{\numberline {1.7.1}开源机器翻译系统}{34}{subsection.1.7.1} \contentsline {subsection}{\numberline {1.7.1}开源机器翻译系统}{36}{subsection.1.7.1}
\defcounter {refsection}{0}\relax \defcounter {refsection}{0}\relax
\contentsline {subsubsection}{统计机器翻译开源系统}{35}{section*.19} \contentsline {subsubsection}{统计机器翻译开源系统}{37}{section*.19}
\defcounter {refsection}{0}\relax \defcounter {refsection}{0}\relax
\contentsline {subsubsection}{神经机器翻译开源系统}{36}{section*.20} \contentsline {subsubsection}{神经机器翻译开源系统}{38}{section*.20}
\defcounter {refsection}{0}\relax \defcounter {refsection}{0}\relax
\contentsline {subsection}{\numberline {1.7.2}常用数据集及公开评测任务}{38}{subsection.1.7.2} \contentsline {subsection}{\numberline {1.7.2}常用数据集及公开评测任务}{40}{subsection.1.7.2}
\defcounter {refsection}{0}\relax \defcounter {refsection}{0}\relax
\contentsline {section}{\numberline {1.8}推荐学习资源}{40}{section.1.8} \contentsline {section}{\numberline {1.8}推荐学习资源}{42}{section.1.8}
\defcounter {refsection}{0}\relax \defcounter {refsection}{0}\relax
\contentsline {chapter}{\numberline {2}词法、语法及统计建模基础}{45}{chapter.2} \contentsline {chapter}{\numberline {2}词法、语法及统计建模基础}{47}{chapter.2}
\defcounter {refsection}{0}\relax \defcounter {refsection}{0}\relax
\contentsline {section}{\numberline {2.1}问题概述 }{46}{section.2.1} \contentsline {section}{\numberline {2.1}问题概述 }{48}{section.2.1}
\defcounter {refsection}{0}\relax \defcounter {refsection}{0}\relax
\contentsline {section}{\numberline {2.2}概率论基础}{47}{section.2.2} \contentsline {section}{\numberline {2.2}概率论基础}{49}{section.2.2}
\defcounter {refsection}{0}\relax \defcounter {refsection}{0}\relax
\contentsline {subsection}{\numberline {2.2.1}随机变量和概率}{47}{subsection.2.2.1} \contentsline {subsection}{\numberline {2.2.1}随机变量和概率}{50}{subsection.2.2.1}
\defcounter {refsection}{0}\relax \defcounter {refsection}{0}\relax
\contentsline {subsection}{\numberline {2.2.2}联合概率、条件概率和边缘概率}{49}{subsection.2.2.2} \contentsline {subsection}{\numberline {2.2.2}联合概率、条件概率和边缘概率}{51}{subsection.2.2.2}
\defcounter {refsection}{0}\relax \defcounter {refsection}{0}\relax
\contentsline {subsection}{\numberline {2.2.3}链式法则}{50}{subsection.2.2.3} \contentsline {subsection}{\numberline {2.2.3}链式法则}{52}{subsection.2.2.3}
\defcounter {refsection}{0}\relax \defcounter {refsection}{0}\relax
\contentsline {subsection}{\numberline {2.2.4}贝叶斯法则}{51}{subsection.2.2.4} \contentsline {subsection}{\numberline {2.2.4}贝叶斯法则}{53}{subsection.2.2.4}
\defcounter {refsection}{0}\relax \defcounter {refsection}{0}\relax
\contentsline {subsection}{\numberline {2.2.5}KL距离和熵}{53}{subsection.2.2.5} \contentsline {subsection}{\numberline {2.2.5}KL距离和熵}{55}{subsection.2.2.5}
\defcounter {refsection}{0}\relax \defcounter {refsection}{0}\relax
\contentsline {subsubsection}{信息熵}{53}{section*.27} \contentsline {subsubsection}{信息熵}{55}{section*.27}
\defcounter {refsection}{0}\relax \defcounter {refsection}{0}\relax
\contentsline {subsubsection}{KL距离}{54}{section*.29} \contentsline {subsubsection}{KL距离}{56}{section*.29}
\defcounter {refsection}{0}\relax \defcounter {refsection}{0}\relax
\contentsline {subsubsection}{交叉熵}{54}{section*.30} \contentsline {subsubsection}{交叉熵}{56}{section*.30}
\defcounter {refsection}{0}\relax \defcounter {refsection}{0}\relax
\contentsline {section}{\numberline {2.3}中文分词}{55}{section.2.3} \contentsline {section}{\numberline {2.3}中文分词}{57}{section.2.3}
\defcounter {refsection}{0}\relax \defcounter {refsection}{0}\relax
\contentsline {subsection}{\numberline {2.3.1}基于词典的分词方法}{56}{subsection.2.3.1} \contentsline {subsection}{\numberline {2.3.1}基于词典的分词方法}{58}{subsection.2.3.1}
\defcounter {refsection}{0}\relax \defcounter {refsection}{0}\relax
\contentsline {subsection}{\numberline {2.3.2}基于统计的分词方法}{57}{subsection.2.3.2} \contentsline {subsection}{\numberline {2.3.2}基于统计的分词方法}{59}{subsection.2.3.2}
\defcounter {refsection}{0}\relax \defcounter {refsection}{0}\relax
\contentsline {subsubsection}{统计模型的学习与推断}{57}{section*.34} \contentsline {subsubsection}{统计模型的学习与推断}{59}{section*.34}
\defcounter {refsection}{0}\relax \defcounter {refsection}{0}\relax
\contentsline {subsubsection}{掷骰子游戏}{58}{section*.36} \contentsline {subsubsection}{掷骰子游戏}{60}{section*.36}
\defcounter {refsection}{0}\relax \defcounter {refsection}{0}\relax
\contentsline {subsubsection}{全概率分词方法}{60}{section*.40} \contentsline {subsubsection}{全概率分词方法}{62}{section*.40}
\defcounter {refsection}{0}\relax \defcounter {refsection}{0}\relax
\contentsline {section}{\numberline {2.4}$n$-gram语言模型 }{62}{section.2.4} \contentsline {section}{\numberline {2.4}$n$-gram语言模型 }{64}{section.2.4}
\defcounter {refsection}{0}\relax \defcounter {refsection}{0}\relax
\contentsline {subsection}{\numberline {2.4.1}建模}{63}{subsection.2.4.1} \contentsline {subsection}{\numberline {2.4.1}建模}{65}{subsection.2.4.1}
\defcounter {refsection}{0}\relax \defcounter {refsection}{0}\relax
\contentsline {subsection}{\numberline {2.4.2}未登录词和平滑算法}{65}{subsection.2.4.2} \contentsline {subsection}{\numberline {2.4.2}未登录词和平滑算法}{67}{subsection.2.4.2}
\defcounter {refsection}{0}\relax \defcounter {refsection}{0}\relax
\contentsline {subsubsection}{加法平滑方法}{66}{section*.46} \contentsline {subsubsection}{加法平滑方法}{68}{section*.46}
\defcounter {refsection}{0}\relax \defcounter {refsection}{0}\relax
\contentsline {subsubsection}{古德-图灵估计法}{67}{section*.48} \contentsline {subsubsection}{古德-图灵估计法}{69}{section*.48}
\defcounter {refsection}{0}\relax \defcounter {refsection}{0}\relax
\contentsline {subsubsection}{Kneser-Ney平滑方法}{68}{section*.50} \contentsline {subsubsection}{Kneser-Ney平滑方法}{70}{section*.50}
\defcounter {refsection}{0}\relax \defcounter {refsection}{0}\relax
\contentsline {section}{\numberline {2.5}句法分析(短语结构分析)}{70}{section.2.5} \contentsline {section}{\numberline {2.5}句法分析(短语结构分析)}{72}{section.2.5}
\defcounter {refsection}{0}\relax \defcounter {refsection}{0}\relax
\contentsline {subsection}{\numberline {2.5.1}句子的句法树表示}{70}{subsection.2.5.1} \contentsline {subsection}{\numberline {2.5.1}句子的句法树表示}{72}{subsection.2.5.1}
\defcounter {refsection}{0}\relax \defcounter {refsection}{0}\relax
\contentsline {subsection}{\numberline {2.5.2}上下文无关文法}{72}{subsection.2.5.2} \contentsline {subsection}{\numberline {2.5.2}上下文无关文法}{74}{subsection.2.5.2}
\defcounter {refsection}{0}\relax \defcounter {refsection}{0}\relax
\contentsline {subsection}{\numberline {2.5.3}规则和推导的概率}{76}{subsection.2.5.3} \contentsline {subsection}{\numberline {2.5.3}规则和推导的概率}{78}{subsection.2.5.3}
\defcounter {refsection}{0}\relax \defcounter {refsection}{0}\relax
\contentsline {section}{\numberline {2.6}小结及深入阅读}{78}{section.2.6} \contentsline {section}{\numberline {2.6}小结及深入阅读}{80}{section.2.6}
\defcounter {refsection}{0}\relax \defcounter {refsection}{0}\relax
\contentsline {part}{\@mypartnumtocformat {II}{统计机器翻译}}{81}{part.2} \contentsline {part}{\@mypartnumtocformat {II}{统计机器翻译}}{83}{part.2}
\ttl@stoptoc {default@1} \ttl@stoptoc {default@1}
\ttl@starttoc {default@2} \ttl@starttoc {default@2}
\defcounter {refsection}{0}\relax \defcounter {refsection}{0}\relax
\contentsline {chapter}{\numberline {3}基于词的机器翻译模型}{83}{chapter.3} \contentsline {chapter}{\numberline {3}基于词的机器翻译模型}{85}{chapter.3}
\defcounter {refsection}{0}\relax \defcounter {refsection}{0}\relax
\contentsline {section}{\numberline {3.1}什么是基于词的翻译模型}{83}{section.3.1} \contentsline {section}{\numberline {3.1}什么是基于词的翻译模型}{85}{section.3.1}
\defcounter {refsection}{0}\relax \defcounter {refsection}{0}\relax
\contentsline {section}{\numberline {3.2}构建一个简单的机器翻译系统}{85}{section.3.2} \contentsline {section}{\numberline {3.2}构建一个简单的机器翻译系统}{87}{section.3.2}
\defcounter {refsection}{0}\relax \defcounter {refsection}{0}\relax
\contentsline {subsection}{\numberline {3.2.1}如何进行翻译?}{85}{subsection.3.2.1} \contentsline {subsection}{\numberline {3.2.1}如何进行翻译?}{87}{subsection.3.2.1}
\defcounter {refsection}{0}\relax \defcounter {refsection}{0}\relax
\contentsline {subsubsection}{机器翻译流程}{86}{section*.63} \contentsline {subsubsection}{机器翻译流程}{88}{section*.63}
\defcounter {refsection}{0}\relax \defcounter {refsection}{0}\relax
\contentsline {subsubsection}{人工翻译 vs. 机器翻译}{87}{section*.65} \contentsline {subsubsection}{人工翻译 vs. 机器翻译}{89}{section*.65}
\defcounter {refsection}{0}\relax \defcounter {refsection}{0}\relax
\contentsline {subsection}{\numberline {3.2.2}基本框架}{87}{subsection.3.2.2} \contentsline {subsection}{\numberline {3.2.2}基本框架}{89}{subsection.3.2.2}
\defcounter {refsection}{0}\relax \defcounter {refsection}{0}\relax
\contentsline {subsection}{\numberline {3.2.3}单词翻译概率}{88}{subsection.3.2.3} \contentsline {subsection}{\numberline {3.2.3}单词翻译概率}{90}{subsection.3.2.3}
\defcounter {refsection}{0}\relax \defcounter {refsection}{0}\relax
\contentsline {subsubsection}{什么是单词翻译概率?}{88}{section*.67} \contentsline {subsubsection}{什么是单词翻译概率?}{90}{section*.67}
\defcounter {refsection}{0}\relax \defcounter {refsection}{0}\relax
\contentsline {subsubsection}{如何从一个双语平行数据中学习?}{88}{section*.69} \contentsline {subsubsection}{如何从一个双语平行数据中学习?}{90}{section*.69}
\defcounter {refsection}{0}\relax \defcounter {refsection}{0}\relax
\contentsline {subsubsection}{如何从大量的双语平行数据中学习?}{90}{section*.70} \contentsline {subsubsection}{如何从大量的双语平行数据中学习?}{92}{section*.70}
\defcounter {refsection}{0}\relax \defcounter {refsection}{0}\relax
\contentsline {subsection}{\numberline {3.2.4}句子级翻译模型}{91}{subsection.3.2.4} \contentsline {subsection}{\numberline {3.2.4}句子级翻译模型}{93}{subsection.3.2.4}
\defcounter {refsection}{0}\relax \defcounter {refsection}{0}\relax
\contentsline {subsubsection}{基础模型}{91}{section*.72} \contentsline {subsubsection}{基础模型}{93}{section*.72}
\defcounter {refsection}{0}\relax \defcounter {refsection}{0}\relax
\contentsline {subsubsection}{生成流畅的译文}{93}{section*.74} \contentsline {subsubsection}{生成流畅的译文}{95}{section*.74}
\defcounter {refsection}{0}\relax \defcounter {refsection}{0}\relax
\contentsline {subsection}{\numberline {3.2.5}解码}{95}{subsection.3.2.5} \contentsline {subsection}{\numberline {3.2.5}解码}{97}{subsection.3.2.5}
\defcounter {refsection}{0}\relax \defcounter {refsection}{0}\relax
\contentsline {section}{\numberline {3.3}基于词的翻译建模}{98}{section.3.3} \contentsline {section}{\numberline {3.3}基于词的翻译建模}{100}{section.3.3}
\defcounter {refsection}{0}\relax \defcounter {refsection}{0}\relax
\contentsline {subsection}{\numberline {3.3.1}噪声信道模型}{98}{subsection.3.3.1} \contentsline {subsection}{\numberline {3.3.1}噪声信道模型}{100}{subsection.3.3.1}
\defcounter {refsection}{0}\relax \defcounter {refsection}{0}\relax
\contentsline {subsection}{\numberline {3.3.2}统计机器翻译的三个基本问题}{100}{subsection.3.3.2} \contentsline {subsection}{\numberline {3.3.2}统计机器翻译的三个基本问题}{102}{subsection.3.3.2}
\defcounter {refsection}{0}\relax \defcounter {refsection}{0}\relax
\contentsline {subsubsection}{词对齐}{101}{section*.83} \contentsline {subsubsection}{词对齐}{103}{section*.83}
\defcounter {refsection}{0}\relax \defcounter {refsection}{0}\relax
\contentsline {subsubsection}{基于词对齐的翻译模型}{101}{section*.86} \contentsline {subsubsection}{基于词对齐的翻译模型}{103}{section*.86}
\defcounter {refsection}{0}\relax \defcounter {refsection}{0}\relax
\contentsline {subsubsection}{基于词对齐的翻译实例}{103}{section*.88} \contentsline {subsubsection}{基于词对齐的翻译实例}{105}{section*.88}
\defcounter {refsection}{0}\relax \defcounter {refsection}{0}\relax
\contentsline {section}{\numberline {3.4}IBM模型1-2}{104}{section.3.4} \contentsline {section}{\numberline {3.4}IBM模型1-2}{106}{section.3.4}
\defcounter {refsection}{0}\relax \defcounter {refsection}{0}\relax
\contentsline {subsection}{\numberline {3.4.1}IBM模型1}{104}{subsection.3.4.1} \contentsline {subsection}{\numberline {3.4.1}IBM模型1}{106}{subsection.3.4.1}
\defcounter {refsection}{0}\relax \defcounter {refsection}{0}\relax
\contentsline {subsection}{\numberline {3.4.2}IBM模型2}{106}{subsection.3.4.2} \contentsline {subsection}{\numberline {3.4.2}IBM模型2}{108}{subsection.3.4.2}
\defcounter {refsection}{0}\relax \defcounter {refsection}{0}\relax
\contentsline {subsection}{\numberline {3.4.3}解码及计算优化}{107}{subsection.3.4.3} \contentsline {subsection}{\numberline {3.4.3}解码及计算优化}{109}{subsection.3.4.3}
\defcounter {refsection}{0}\relax \defcounter {refsection}{0}\relax
\contentsline {subsection}{\numberline {3.4.4}训练}{108}{subsection.3.4.4} \contentsline {subsection}{\numberline {3.4.4}训练}{110}{subsection.3.4.4}
\defcounter {refsection}{0}\relax \defcounter {refsection}{0}\relax
\contentsline {subsubsection}{目标函数}{108}{section*.93} \contentsline {subsubsection}{目标函数}{110}{section*.93}
\defcounter {refsection}{0}\relax \defcounter {refsection}{0}\relax
\contentsline {subsubsection}{优化}{109}{section*.95} \contentsline {subsubsection}{优化}{111}{section*.95}
\defcounter {refsection}{0}\relax \defcounter {refsection}{0}\relax
\contentsline {section}{\numberline {3.5}IBM模型3-5及隐马尔可夫模型}{115}{section.3.5} \contentsline {section}{\numberline {3.5}IBM模型3-5及隐马尔可夫模型}{117}{section.3.5}
\defcounter {refsection}{0}\relax \defcounter {refsection}{0}\relax
\contentsline {subsection}{\numberline {3.5.1}基于产出率的翻译模型}{115}{subsection.3.5.1} \contentsline {subsection}{\numberline {3.5.1}基于产出率的翻译模型}{117}{subsection.3.5.1}
\defcounter {refsection}{0}\relax \defcounter {refsection}{0}\relax
\contentsline {subsection}{\numberline {3.5.2}IBM 模型3}{118}{subsection.3.5.2} \contentsline {subsection}{\numberline {3.5.2}IBM 模型3}{120}{subsection.3.5.2}
\defcounter {refsection}{0}\relax \defcounter {refsection}{0}\relax
\contentsline {subsection}{\numberline {3.5.3}IBM 模型4}{119}{subsection.3.5.3} \contentsline {subsection}{\numberline {3.5.3}IBM 模型4}{121}{subsection.3.5.3}
\defcounter {refsection}{0}\relax \defcounter {refsection}{0}\relax
\contentsline {subsection}{\numberline {3.5.4} IBM 模型5}{121}{subsection.3.5.4} \contentsline {subsection}{\numberline {3.5.4} IBM 模型5}{123}{subsection.3.5.4}
\defcounter {refsection}{0}\relax \defcounter {refsection}{0}\relax
\contentsline {subsection}{\numberline {3.5.5}隐马尔可夫模型}{122}{subsection.3.5.5} \contentsline {subsection}{\numberline {3.5.5}隐马尔可夫模型}{124}{subsection.3.5.5}
\defcounter {refsection}{0}\relax \defcounter {refsection}{0}\relax
\contentsline {subsubsection}{隐马尔可夫模型}{123}{section*.107} \contentsline {subsubsection}{隐马尔可夫模型}{125}{section*.107}
\defcounter {refsection}{0}\relax \defcounter {refsection}{0}\relax
\contentsline {subsubsection}{词对齐模型}{124}{section*.109} \contentsline {subsubsection}{词对齐模型}{126}{section*.109}
\defcounter {refsection}{0}\relax \defcounter {refsection}{0}\relax
\contentsline {subsection}{\numberline {3.5.6}解码和训练}{125}{subsection.3.5.6} \contentsline {subsection}{\numberline {3.5.6}解码和训练}{127}{subsection.3.5.6}
\defcounter {refsection}{0}\relax \defcounter {refsection}{0}\relax
\contentsline {section}{\numberline {3.6}问题分析}{125}{section.3.6} \contentsline {section}{\numberline {3.6}问题分析}{127}{section.3.6}
\defcounter {refsection}{0}\relax \defcounter {refsection}{0}\relax
\contentsline {subsection}{\numberline {3.6.1}词对齐及对称化}{125}{subsection.3.6.1} \contentsline {subsection}{\numberline {3.6.1}词对齐及对称化}{127}{subsection.3.6.1}
\defcounter {refsection}{0}\relax \defcounter {refsection}{0}\relax
\contentsline {subsection}{\numberline {3.6.2}Deficiency}{126}{subsection.3.6.2} \contentsline {subsection}{\numberline {3.6.2}Deficiency}{128}{subsection.3.6.2}
\defcounter {refsection}{0}\relax \defcounter {refsection}{0}\relax
\contentsline {subsection}{\numberline {3.6.3}句子长度}{127}{subsection.3.6.3} \contentsline {subsection}{\numberline {3.6.3}句子长度}{129}{subsection.3.6.3}
\defcounter {refsection}{0}\relax \defcounter {refsection}{0}\relax
\contentsline {subsection}{\numberline {3.6.4}其他问题}{128}{subsection.3.6.4} \contentsline {subsection}{\numberline {3.6.4}其他问题}{130}{subsection.3.6.4}
\defcounter {refsection}{0}\relax \defcounter {refsection}{0}\relax
\contentsline {section}{\numberline {3.7}小结及深入阅读}{128}{section.3.7} \contentsline {section}{\numberline {3.7}小结及深入阅读}{130}{section.3.7}
\defcounter {refsection}{0}\relax \defcounter {refsection}{0}\relax
\contentsline {chapter}{\numberline {4}基于短语和句法的机器翻译模型}{131}{chapter.4} \contentsline {chapter}{\numberline {4}基于短语和句法的机器翻译模型}{133}{chapter.4}
\defcounter {refsection}{0}\relax \defcounter {refsection}{0}\relax
\contentsline {section}{\numberline {4.1}翻译中的结构信息}{131}{section.4.1} \contentsline {section}{\numberline {4.1}翻译中的结构信息}{133}{section.4.1}
\defcounter {refsection}{0}\relax \defcounter {refsection}{0}\relax
\contentsline {subsection}{\numberline {4.1.1}更大粒度的翻译单元}{132}{subsection.4.1.1} \contentsline {subsection}{\numberline {4.1.1}更大粒度的翻译单元}{134}{subsection.4.1.1}
\defcounter {refsection}{0}\relax \defcounter {refsection}{0}\relax
\contentsline {subsection}{\numberline {4.1.2}句子的结构信息}{134}{subsection.4.1.2} \contentsline {subsection}{\numberline {4.1.2}句子的结构信息}{136}{subsection.4.1.2}
\defcounter {refsection}{0}\relax \defcounter {refsection}{0}\relax
\contentsline {section}{\numberline {4.2}基于短语的翻译模型}{136}{section.4.2} \contentsline {section}{\numberline {4.2}基于短语的翻译模型}{138}{section.4.2}
\defcounter {refsection}{0}\relax \defcounter {refsection}{0}\relax
\contentsline {subsection}{\numberline {4.2.1}机器翻译中的短语}{136}{subsection.4.2.1} \contentsline {subsection}{\numberline {4.2.1}机器翻译中的短语}{138}{subsection.4.2.1}
\defcounter {refsection}{0}\relax \defcounter {refsection}{0}\relax
\contentsline {subsection}{\numberline {4.2.2}数学建模及判别式模型}{139}{subsection.4.2.2} \contentsline {subsection}{\numberline {4.2.2}数学建模及判别式模型}{141}{subsection.4.2.2}
\defcounter {refsection}{0}\relax \defcounter {refsection}{0}\relax
\contentsline {subsubsection}{基于翻译推导的建模}{139}{section*.121} \contentsline {subsubsection}{基于翻译推导的建模}{141}{section*.121}
\defcounter {refsection}{0}\relax \defcounter {refsection}{0}\relax
\contentsline {subsubsection}{对数线性模型}{140}{section*.122} \contentsline {subsubsection}{对数线性模型}{142}{section*.122}
\defcounter {refsection}{0}\relax \defcounter {refsection}{0}\relax
\contentsline {subsubsection}{搭建模型的基本流程}{141}{section*.123} \contentsline {subsubsection}{搭建模型的基本流程}{143}{section*.123}
\defcounter {refsection}{0}\relax \defcounter {refsection}{0}\relax
\contentsline {subsection}{\numberline {4.2.3}短语抽取}{142}{subsection.4.2.3} \contentsline {subsection}{\numberline {4.2.3}短语抽取}{144}{subsection.4.2.3}
\defcounter {refsection}{0}\relax \defcounter {refsection}{0}\relax
\contentsline {subsubsection}{与词对齐一致的短语}{143}{section*.126} \contentsline {subsubsection}{与词对齐一致的短语}{145}{section*.126}
\defcounter {refsection}{0}\relax \defcounter {refsection}{0}\relax
\contentsline {subsubsection}{获取词对齐}{144}{section*.130} \contentsline {subsubsection}{获取词对齐}{146}{section*.130}
\defcounter {refsection}{0}\relax \defcounter {refsection}{0}\relax
\contentsline {subsubsection}{度量双语短语质量}{145}{section*.132} \contentsline {subsubsection}{度量双语短语质量}{147}{section*.132}
\defcounter {refsection}{0}\relax \defcounter {refsection}{0}\relax
\contentsline {subsection}{\numberline {4.2.4}调序}{146}{subsection.4.2.4} \contentsline {subsection}{\numberline {4.2.4}调序}{148}{subsection.4.2.4}
\defcounter {refsection}{0}\relax \defcounter {refsection}{0}\relax
\contentsline {subsubsection}{基于距离的调序}{146}{section*.136} \contentsline {subsubsection}{基于距离的调序}{148}{section*.136}
\defcounter {refsection}{0}\relax \defcounter {refsection}{0}\relax
\contentsline {subsubsection}{基于方向的调序}{147}{section*.138} \contentsline {subsubsection}{基于方向的调序}{149}{section*.138}
\defcounter {refsection}{0}\relax \defcounter {refsection}{0}\relax
\contentsline {subsubsection}{基于分类的调序}{149}{section*.141} \contentsline {subsubsection}{基于分类的调序}{151}{section*.141}
\defcounter {refsection}{0}\relax \defcounter {refsection}{0}\relax
\contentsline {subsection}{\numberline {4.2.5}特征}{149}{subsection.4.2.5} \contentsline {subsection}{\numberline {4.2.5}特征}{151}{subsection.4.2.5}
\defcounter {refsection}{0}\relax \defcounter {refsection}{0}\relax
\contentsline {subsection}{\numberline {4.2.6}最小错误率训练}{150}{subsection.4.2.6} \contentsline {subsection}{\numberline {4.2.6}最小错误率训练}{152}{subsection.4.2.6}
\defcounter {refsection}{0}\relax \defcounter {refsection}{0}\relax
\contentsline {subsection}{\numberline {4.2.7}栈解码}{153}{subsection.4.2.7} \contentsline {subsection}{\numberline {4.2.7}栈解码}{155}{subsection.4.2.7}
\defcounter {refsection}{0}\relax \defcounter {refsection}{0}\relax
\contentsline {subsubsection}{翻译候选匹配}{154}{section*.146} \contentsline {subsubsection}{翻译候选匹配}{156}{section*.146}
\defcounter {refsection}{0}\relax \defcounter {refsection}{0}\relax
\contentsline {subsubsection}{翻译假设扩展}{154}{section*.148} \contentsline {subsubsection}{翻译假设扩展}{156}{section*.148}
\defcounter {refsection}{0}\relax \defcounter {refsection}{0}\relax
\contentsline {subsubsection}{剪枝}{155}{section*.150} \contentsline {subsubsection}{剪枝}{157}{section*.150}
\defcounter {refsection}{0}\relax \defcounter {refsection}{0}\relax
\contentsline {subsubsection}{解码中的栈结构}{157}{section*.152} \contentsline {subsubsection}{解码中的栈结构}{159}{section*.152}
\defcounter {refsection}{0}\relax \defcounter {refsection}{0}\relax
\contentsline {section}{\numberline {4.3}基于层次短语的模型}{158}{section.4.3} \contentsline {section}{\numberline {4.3}基于层次短语的模型}{160}{section.4.3}
\defcounter {refsection}{0}\relax \defcounter {refsection}{0}\relax
\contentsline {subsection}{\numberline {4.3.1}同步上下文无关文法}{161}{subsection.4.3.1} \contentsline {subsection}{\numberline {4.3.1}同步上下文无关文法}{163}{subsection.4.3.1}
\defcounter {refsection}{0}\relax \defcounter {refsection}{0}\relax
\contentsline {subsubsection}{文法定义}{161}{section*.157} \contentsline {subsubsection}{文法定义}{163}{section*.157}
\defcounter {refsection}{0}\relax \defcounter {refsection}{0}\relax
\contentsline {subsubsection}{推导}{162}{section*.158} \contentsline {subsubsection}{推导}{164}{section*.158}
\defcounter {refsection}{0}\relax \defcounter {refsection}{0}\relax
\contentsline {subsubsection}{胶水规则}{163}{section*.159} \contentsline {subsubsection}{胶水规则}{165}{section*.159}
\defcounter {refsection}{0}\relax \defcounter {refsection}{0}\relax
\contentsline {subsubsection}{处理流程}{164}{section*.160} \contentsline {subsubsection}{处理流程}{166}{section*.160}
\defcounter {refsection}{0}\relax \defcounter {refsection}{0}\relax
\contentsline {subsection}{\numberline {4.3.2}层次短语规则抽取}{164}{subsection.4.3.2} \contentsline {subsection}{\numberline {4.3.2}层次短语规则抽取}{166}{subsection.4.3.2}
\defcounter {refsection}{0}\relax \defcounter {refsection}{0}\relax
\contentsline {subsection}{\numberline {4.3.3}翻译模型及特征}{166}{subsection.4.3.3} \contentsline {subsection}{\numberline {4.3.3}翻译模型及特征}{168}{subsection.4.3.3}
\defcounter {refsection}{0}\relax \defcounter {refsection}{0}\relax
\contentsline {subsection}{\numberline {4.3.4}CYK解码}{167}{subsection.4.3.4} \contentsline {subsection}{\numberline {4.3.4}CYK解码}{169}{subsection.4.3.4}
\defcounter {refsection}{0}\relax \defcounter {refsection}{0}\relax
\contentsline {subsection}{\numberline {4.3.5}立方剪枝}{170}{subsection.4.3.5} \contentsline {subsection}{\numberline {4.3.5}立方剪枝}{172}{subsection.4.3.5}
\defcounter {refsection}{0}\relax \defcounter {refsection}{0}\relax
\contentsline {section}{\numberline {4.4}基于语言学句法的模型}{173}{section.4.4} \contentsline {section}{\numberline {4.4}基于语言学句法的模型}{175}{section.4.4}
\defcounter {refsection}{0}\relax \defcounter {refsection}{0}\relax
\contentsline {subsection}{\numberline {4.4.1}基于句法的翻译模型分类}{175}{subsection.4.4.1} \contentsline {subsection}{\numberline {4.4.1}基于句法的翻译模型分类}{177}{subsection.4.4.1}
\defcounter {refsection}{0}\relax \defcounter {refsection}{0}\relax
\contentsline {subsection}{\numberline {4.4.2}基于树结构的文法}{175}{subsection.4.4.2} \contentsline {subsection}{\numberline {4.4.2}基于树结构的文法}{177}{subsection.4.4.2}
\defcounter {refsection}{0}\relax \defcounter {refsection}{0}\relax
\contentsline {subsubsection}{树到树翻译规则}{177}{section*.176} \contentsline {subsubsection}{树到树翻译规则}{179}{section*.176}
\defcounter {refsection}{0}\relax \defcounter {refsection}{0}\relax
\contentsline {subsubsection}{基于树结构的翻译推导}{179}{section*.178} \contentsline {subsubsection}{基于树结构的翻译推导}{181}{section*.178}
\defcounter {refsection}{0}\relax \defcounter {refsection}{0}\relax
\contentsline {subsubsection}{树到串翻译规则}{181}{section*.181} \contentsline {subsubsection}{树到串翻译规则}{183}{section*.181}
\defcounter {refsection}{0}\relax \defcounter {refsection}{0}\relax
\contentsline {subsection}{\numberline {4.4.3}树到串翻译规则抽取}{182}{subsection.4.4.3} \contentsline {subsection}{\numberline {4.4.3}树到串翻译规则抽取}{184}{subsection.4.4.3}
\defcounter {refsection}{0}\relax \defcounter {refsection}{0}\relax
\contentsline {subsubsection}{树的切割与最小规则}{183}{section*.183} \contentsline {subsubsection}{树的切割与最小规则}{185}{section*.183}
\defcounter {refsection}{0}\relax \defcounter {refsection}{0}\relax
\contentsline {subsubsection}{空对齐处理}{186}{section*.189} \contentsline {subsubsection}{空对齐处理}{188}{section*.189}
\defcounter {refsection}{0}\relax \defcounter {refsection}{0}\relax
\contentsline {subsubsection}{组合规则}{187}{section*.191} \contentsline {subsubsection}{组合规则}{189}{section*.191}
\defcounter {refsection}{0}\relax \defcounter {refsection}{0}\relax
\contentsline {subsubsection}{SPMT规则}{188}{section*.193} \contentsline {subsubsection}{SPMT规则}{190}{section*.193}
\defcounter {refsection}{0}\relax \defcounter {refsection}{0}\relax
\contentsline {subsubsection}{句法树二叉化}{189}{section*.195} \contentsline {subsubsection}{句法树二叉化}{191}{section*.195}
\defcounter {refsection}{0}\relax \defcounter {refsection}{0}\relax
\contentsline {subsection}{\numberline {4.4.4}树到树翻译规则抽取}{190}{subsection.4.4.4} \contentsline {subsection}{\numberline {4.4.4}树到树翻译规则抽取}{192}{subsection.4.4.4}
\defcounter {refsection}{0}\relax \defcounter {refsection}{0}\relax
\contentsline {subsubsection}{基于节点对齐的规则抽取}{191}{section*.199} \contentsline {subsubsection}{基于节点对齐的规则抽取}{193}{section*.199}
\defcounter {refsection}{0}\relax \defcounter {refsection}{0}\relax
\contentsline {subsubsection}{基于对齐矩阵的规则抽取}{192}{section*.202} \contentsline {subsubsection}{基于对齐矩阵的规则抽取}{194}{section*.202}
\defcounter {refsection}{0}\relax \defcounter {refsection}{0}\relax
\contentsline {subsection}{\numberline {4.4.5}句法翻译模型的特征}{194}{subsection.4.4.5} \contentsline {subsection}{\numberline {4.4.5}句法翻译模型的特征}{196}{subsection.4.4.5}
\defcounter {refsection}{0}\relax \defcounter {refsection}{0}\relax
\contentsline {subsection}{\numberline {4.4.6}基于超图的推导空间表示}{195}{subsection.4.4.6} \contentsline {subsection}{\numberline {4.4.6}基于超图的推导空间表示}{197}{subsection.4.4.6}
\defcounter {refsection}{0}\relax \defcounter {refsection}{0}\relax
\contentsline {subsection}{\numberline {4.4.7}基于树的解码 vs 基于串的解码}{197}{subsection.4.4.7} \contentsline {subsection}{\numberline {4.4.7}基于树的解码 vs 基于串的解码}{199}{subsection.4.4.7}
\defcounter {refsection}{0}\relax \defcounter {refsection}{0}\relax
\contentsline {subsubsection}{基于树的解码}{199}{section*.209} \contentsline {subsubsection}{基于树的解码}{201}{section*.209}
\defcounter {refsection}{0}\relax \defcounter {refsection}{0}\relax
\contentsline {subsubsection}{基于串的解码}{200}{section*.212} \contentsline {subsubsection}{基于串的解码}{202}{section*.212}
\defcounter {refsection}{0}\relax \defcounter {refsection}{0}\relax
\contentsline {section}{\numberline {4.5}小结及深入阅读}{202}{section.4.5} \contentsline {section}{\numberline {4.5}小结及深入阅读}{204}{section.4.5}
\defcounter {refsection}{0}\relax \defcounter {refsection}{0}\relax
\contentsline {part}{\@mypartnumtocformat {III}{神经机器翻译}}{205}{part.3} \contentsline {part}{\@mypartnumtocformat {III}{神经机器翻译}}{207}{part.3}
\ttl@stoptoc {default@2} \ttl@stoptoc {default@2}
\ttl@starttoc {default@3} \ttl@starttoc {default@3}
\defcounter {refsection}{0}\relax \defcounter {refsection}{0}\relax
\contentsline {chapter}{\numberline {5}人工神经网络和神经语言建模}{207}{chapter.5} \contentsline {chapter}{\numberline {5}人工神经网络和神经语言建模}{209}{chapter.5}
\defcounter {refsection}{0}\relax \defcounter {refsection}{0}\relax
\contentsline {section}{\numberline {5.1}深度学习与人工神经网络}{208}{section.5.1} \contentsline {section}{\numberline {5.1}深度学习与人工神经网络}{210}{section.5.1}
\defcounter {refsection}{0}\relax \defcounter {refsection}{0}\relax
\contentsline {subsection}{\numberline {5.1.1}发展简史}{208}{subsection.5.1.1} \contentsline {subsection}{\numberline {5.1.1}发展简史}{210}{subsection.5.1.1}
\defcounter {refsection}{0}\relax \defcounter {refsection}{0}\relax
\contentsline {subsubsection}{早期的人工神经网络和第一次寒冬}{208}{section*.214} \contentsline {subsubsection}{早期的人工神经网络和第一次寒冬}{210}{section*.214}
\defcounter {refsection}{0}\relax \defcounter {refsection}{0}\relax
\contentsline {subsubsection}{神经网络的第二次高潮和第二次寒冬}{209}{section*.215} \contentsline {subsubsection}{神经网络的第二次高潮和第二次寒冬}{211}{section*.215}
\defcounter {refsection}{0}\relax \defcounter {refsection}{0}\relax
\contentsline {subsubsection}{深度学习和神经网络方法的崛起}{210}{section*.216} \contentsline {subsubsection}{深度学习和神经网络方法的崛起}{212}{section*.216}
\defcounter {refsection}{0}\relax \defcounter {refsection}{0}\relax
\contentsline {subsection}{\numberline {5.1.2}为什么需要深度学习}{211}{subsection.5.1.2} \contentsline {subsection}{\numberline {5.1.2}为什么需要深度学习}{213}{subsection.5.1.2}
\defcounter {refsection}{0}\relax \defcounter {refsection}{0}\relax
\contentsline {subsubsection}{端到端学习和表示学习}{211}{section*.218} \contentsline {subsubsection}{端到端学习和表示学习}{213}{section*.218}
\defcounter {refsection}{0}\relax \defcounter {refsection}{0}\relax
\contentsline {subsubsection}{深度学习的效果}{212}{section*.220} \contentsline {subsubsection}{深度学习的效果}{214}{section*.220}
\defcounter {refsection}{0}\relax \defcounter {refsection}{0}\relax
\contentsline {section}{\numberline {5.2}神经网络基础}{212}{section.5.2} \contentsline {section}{\numberline {5.2}神经网络基础}{214}{section.5.2}
\defcounter {refsection}{0}\relax \defcounter {refsection}{0}\relax
\contentsline {subsection}{\numberline {5.2.1}线性代数基础}{212}{subsection.5.2.1} \contentsline {subsection}{\numberline {5.2.1}线性代数基础}{214}{subsection.5.2.1}
\defcounter {refsection}{0}\relax \defcounter {refsection}{0}\relax
\contentsline {subsubsection}{标量、向量和矩阵}{213}{section*.222} \contentsline {subsubsection}{标量、向量和矩阵}{215}{section*.222}
\defcounter {refsection}{0}\relax \defcounter {refsection}{0}\relax
\contentsline {subsubsection}{矩阵的转置}{214}{section*.223} \contentsline {subsubsection}{矩阵的转置}{216}{section*.223}
\defcounter {refsection}{0}\relax \defcounter {refsection}{0}\relax
\contentsline {subsubsection}{矩阵加法和数乘}{214}{section*.224} \contentsline {subsubsection}{矩阵加法和数乘}{216}{section*.224}
\defcounter {refsection}{0}\relax \defcounter {refsection}{0}\relax
\contentsline {subsubsection}{矩阵乘法和矩阵点乘}{215}{section*.225} \contentsline {subsubsection}{矩阵乘法和矩阵点乘}{217}{section*.225}
\defcounter {refsection}{0}\relax \defcounter {refsection}{0}\relax
\contentsline {subsubsection}{线性映射}{216}{section*.226} \contentsline {subsubsection}{线性映射}{218}{section*.226}
\defcounter {refsection}{0}\relax \defcounter {refsection}{0}\relax
\contentsline {subsubsection}{范数}{217}{section*.227} \contentsline {subsubsection}{范数}{219}{section*.227}
\defcounter {refsection}{0}\relax \defcounter {refsection}{0}\relax
\contentsline {subsection}{\numberline {5.2.2}人工神经元和感知机}{218}{subsection.5.2.2} \contentsline {subsection}{\numberline {5.2.2}人工神经元和感知机}{220}{subsection.5.2.2}
\defcounter {refsection}{0}\relax \defcounter {refsection}{0}\relax
\contentsline {subsubsection}{感知机\ \raisebox {0.5mm}{------}\ 最简单的人工神经元模型}{219}{section*.230} \contentsline {subsubsection}{感知机\ \raisebox {0.5mm}{------}\ 最简单的人工神经元模型}{221}{section*.230}
\defcounter {refsection}{0}\relax \defcounter {refsection}{0}\relax
\contentsline {subsubsection}{神经元内部权重}{220}{section*.233} \contentsline {subsubsection}{神经元内部权重}{222}{section*.233}
\defcounter {refsection}{0}\relax \defcounter {refsection}{0}\relax
\contentsline {subsubsection}{神经元的输入\ \raisebox {0.5mm}{------}\ 离散 vs 连续}{221}{section*.235} \contentsline {subsubsection}{神经元的输入\ \raisebox {0.5mm}{------}\ 离散 vs 连续}{223}{section*.235}
\defcounter {refsection}{0}\relax \defcounter {refsection}{0}\relax
\contentsline {subsubsection}{神经元内部的参数学习}{221}{section*.237} \contentsline {subsubsection}{神经元内部的参数学习}{223}{section*.237}
\defcounter {refsection}{0}\relax \defcounter {refsection}{0}\relax
\contentsline {subsection}{\numberline {5.2.3}多层神经网络}{222}{subsection.5.2.3} \contentsline {subsection}{\numberline {5.2.3}多层神经网络}{224}{subsection.5.2.3}
\defcounter {refsection}{0}\relax \defcounter {refsection}{0}\relax
\contentsline {subsubsection}{线性变换和激活函数}{222}{section*.239} \contentsline {subsubsection}{线性变换和激活函数}{224}{section*.239}
\defcounter {refsection}{0}\relax \defcounter {refsection}{0}\relax
\contentsline {subsubsection}{单层神经网络$\rightarrow $多层神经网络}{224}{section*.246} \contentsline {subsubsection}{单层神经网络$\rightarrow $多层神经网络}{226}{section*.246}
\defcounter {refsection}{0}\relax \defcounter {refsection}{0}\relax
\contentsline {subsection}{\numberline {5.2.4}函数拟合能力}{225}{subsection.5.2.4} \contentsline {subsection}{\numberline {5.2.4}函数拟合能力}{227}{subsection.5.2.4}
\defcounter {refsection}{0}\relax \defcounter {refsection}{0}\relax
\contentsline {section}{\numberline {5.3}神经网络的张量实现}{229}{section.5.3} \contentsline {section}{\numberline {5.3}神经网络的张量实现}{231}{section.5.3}
\defcounter {refsection}{0}\relax \defcounter {refsection}{0}\relax
\contentsline {subsection}{\numberline {5.3.1} 张量及其计算}{230}{subsection.5.3.1} \contentsline {subsection}{\numberline {5.3.1} 张量及其计算}{232}{subsection.5.3.1}
\defcounter {refsection}{0}\relax \defcounter {refsection}{0}\relax
\contentsline {subsubsection}{张量}{230}{section*.256} \contentsline {subsubsection}{张量}{232}{section*.256}
\defcounter {refsection}{0}\relax \defcounter {refsection}{0}\relax
\contentsline {subsubsection}{张量的矩阵乘法}{232}{section*.259} \contentsline {subsubsection}{张量的矩阵乘法}{234}{section*.259}
\defcounter {refsection}{0}\relax \defcounter {refsection}{0}\relax
\contentsline {subsubsection}{张量的单元操作}{233}{section*.261} \contentsline {subsubsection}{张量的单元操作}{235}{section*.261}
\defcounter {refsection}{0}\relax \defcounter {refsection}{0}\relax
\contentsline {subsection}{\numberline {5.3.2}张量的物理存储形式}{234}{subsection.5.3.2} \contentsline {subsection}{\numberline {5.3.2}张量的物理存储形式}{236}{subsection.5.3.2}
\defcounter {refsection}{0}\relax \defcounter {refsection}{0}\relax
\contentsline {subsection}{\numberline {5.3.3}使用开源框架实现张量计算}{234}{subsection.5.3.3} \contentsline {subsection}{\numberline {5.3.3}使用开源框架实现张量计算}{236}{subsection.5.3.3}
\defcounter {refsection}{0}\relax \defcounter {refsection}{0}\relax
\contentsline {subsection}{\numberline {5.3.4}前向传播与计算图}{236}{subsection.5.3.4} \contentsline {subsection}{\numberline {5.3.4}前向传播与计算图}{238}{subsection.5.3.4}
\defcounter {refsection}{0}\relax \defcounter {refsection}{0}\relax
\contentsline {subsection}{\numberline {5.3.5}神经网络实例}{239}{subsection.5.3.5} \contentsline {subsection}{\numberline {5.3.5}神经网络实例}{241}{subsection.5.3.5}
\defcounter {refsection}{0}\relax \defcounter {refsection}{0}\relax
\contentsline {section}{\numberline {5.4}神经网络的参数训练}{240}{section.5.4} \contentsline {section}{\numberline {5.4}神经网络的参数训练}{242}{section.5.4}
\defcounter {refsection}{0}\relax \defcounter {refsection}{0}\relax
\contentsline {subsection}{\numberline {5.4.1}损失函数}{241}{subsection.5.4.1} \contentsline {subsection}{\numberline {5.4.1}损失函数}{243}{subsection.5.4.1}
\defcounter {refsection}{0}\relax \defcounter {refsection}{0}\relax
\contentsline {subsection}{\numberline {5.4.2}基于梯度的参数优化}{241}{subsection.5.4.2} \contentsline {subsection}{\numberline {5.4.2}基于梯度的参数优化}{243}{subsection.5.4.2}
\defcounter {refsection}{0}\relax \defcounter {refsection}{0}\relax
\contentsline {subsubsection}{梯度下降}{242}{section*.279} \contentsline {subsubsection}{梯度下降}{244}{section*.279}
\defcounter {refsection}{0}\relax \defcounter {refsection}{0}\relax
\contentsline {subsubsection}{梯度获取}{244}{section*.281} \contentsline {subsubsection}{梯度获取}{246}{section*.281}
\defcounter {refsection}{0}\relax \defcounter {refsection}{0}\relax
\contentsline {subsubsection}{基于梯度的方法的变种和改进}{247}{section*.285} \contentsline {subsubsection}{基于梯度的方法的变种和改进}{249}{section*.285}
\defcounter {refsection}{0}\relax \defcounter {refsection}{0}\relax
\contentsline {subsection}{\numberline {5.4.3}参数更新的并行化策略}{250}{subsection.5.4.3} \contentsline {subsection}{\numberline {5.4.3}参数更新的并行化策略}{252}{subsection.5.4.3}
\defcounter {refsection}{0}\relax \defcounter {refsection}{0}\relax
\contentsline {subsection}{\numberline {5.4.4}梯度消失、梯度爆炸和稳定性训练}{252}{subsection.5.4.4} \contentsline {subsection}{\numberline {5.4.4}梯度消失、梯度爆炸和稳定性训练}{254}{subsection.5.4.4}
\defcounter {refsection}{0}\relax \defcounter {refsection}{0}\relax
\contentsline {subsubsection}{易于优化的激活函数}{252}{section*.288} \contentsline {subsubsection}{易于优化的激活函数}{254}{section*.288}
\defcounter {refsection}{0}\relax \defcounter {refsection}{0}\relax
\contentsline {subsubsection}{梯度裁剪}{253}{section*.292} \contentsline {subsubsection}{梯度裁剪}{255}{section*.292}
\defcounter {refsection}{0}\relax \defcounter {refsection}{0}\relax
\contentsline {subsubsection}{稳定性训练}{254}{section*.293} \contentsline {subsubsection}{稳定性训练}{256}{section*.293}
\defcounter {refsection}{0}\relax \defcounter {refsection}{0}\relax
\contentsline {subsection}{\numberline {5.4.5}过拟合}{255}{subsection.5.4.5} \contentsline {subsection}{\numberline {5.4.5}过拟合}{257}{subsection.5.4.5}
\defcounter {refsection}{0}\relax \defcounter {refsection}{0}\relax
\contentsline {subsection}{\numberline {5.4.6}反向传播}{256}{subsection.5.4.6} \contentsline {subsection}{\numberline {5.4.6}反向传播}{258}{subsection.5.4.6}
\defcounter {refsection}{0}\relax \defcounter {refsection}{0}\relax
\contentsline {subsubsection}{输出层的反向传播}{257}{section*.296} \contentsline {subsubsection}{输出层的反向传播}{259}{section*.296}
\defcounter {refsection}{0}\relax \defcounter {refsection}{0}\relax
\contentsline {subsubsection}{隐藏层的反向传播}{259}{section*.300} \contentsline {subsubsection}{隐藏层的反向传播}{261}{section*.300}
\defcounter {refsection}{0}\relax \defcounter {refsection}{0}\relax
\contentsline {subsubsection}{程序实现}{260}{section*.303} \contentsline {subsubsection}{程序实现}{262}{section*.303}
\defcounter {refsection}{0}\relax \defcounter {refsection}{0}\relax
\contentsline {section}{\numberline {5.5}神经语言模型}{262}{section.5.5} \contentsline {section}{\numberline {5.5}神经语言模型}{264}{section.5.5}
\defcounter {refsection}{0}\relax \defcounter {refsection}{0}\relax
\contentsline {subsection}{\numberline {5.5.1}基于神经网络的语言建模}{262}{subsection.5.5.1} \contentsline {subsection}{\numberline {5.5.1}基于神经网络的语言建模}{264}{subsection.5.5.1}
\defcounter {refsection}{0}\relax \defcounter {refsection}{0}\relax
\contentsline {subsubsection}{基于前馈神经网络的语言模型}{263}{section*.306} \contentsline {subsubsection}{基于前馈神经网络的语言模型}{265}{section*.306}
\defcounter {refsection}{0}\relax \defcounter {refsection}{0}\relax
\contentsline {subsubsection}{基于循环神经网络的语言模型}{265}{section*.309} \contentsline {subsubsection}{基于循环神经网络的语言模型}{267}{section*.309}
\defcounter {refsection}{0}\relax \defcounter {refsection}{0}\relax
\contentsline {subsubsection}{基于自注意力机制的语言模型}{266}{section*.311} \contentsline {subsubsection}{基于自注意力机制的语言模型}{268}{section*.311}
\defcounter {refsection}{0}\relax \defcounter {refsection}{0}\relax
\contentsline {subsubsection}{语言模型的评价}{267}{section*.313} \contentsline {subsubsection}{语言模型的评价}{269}{section*.313}
\defcounter {refsection}{0}\relax \defcounter {refsection}{0}\relax
\contentsline {subsection}{\numberline {5.5.2}单词表示模型}{268}{subsection.5.5.2} \contentsline {subsection}{\numberline {5.5.2}单词表示模型}{270}{subsection.5.5.2}
\defcounter {refsection}{0}\relax \defcounter {refsection}{0}\relax
\contentsline {subsubsection}{One-hot编码}{268}{section*.314} \contentsline {subsubsection}{One-hot编码}{270}{section*.314}
\defcounter {refsection}{0}\relax \defcounter {refsection}{0}\relax
\contentsline {subsubsection}{分布式表示}{268}{section*.316} \contentsline {subsubsection}{分布式表示}{270}{section*.316}
\defcounter {refsection}{0}\relax \defcounter {refsection}{0}\relax
\contentsline {subsection}{\numberline {5.5.3}句子表示模型及预训练}{270}{subsection.5.5.3} \contentsline {subsection}{\numberline {5.5.3}句子表示模型及预训练}{272}{subsection.5.5.3}
\defcounter {refsection}{0}\relax \defcounter {refsection}{0}\relax
\contentsline {subsubsection}{简单的上下文表示模型}{270}{section*.320} \contentsline {subsubsection}{简单的上下文表示模型}{272}{section*.320}
\defcounter {refsection}{0}\relax \defcounter {refsection}{0}\relax
\contentsline {subsubsection}{ELMO模型}{272}{section*.323} \contentsline {subsubsection}{ELMO模型}{274}{section*.323}
\defcounter {refsection}{0}\relax \defcounter {refsection}{0}\relax
\contentsline {subsubsection}{GPT模型}{272}{section*.325} \contentsline {subsubsection}{GPT模型}{274}{section*.325}
\defcounter {refsection}{0}\relax \defcounter {refsection}{0}\relax
\contentsline {subsubsection}{BERT模型}{273}{section*.327} \contentsline {subsubsection}{BERT模型}{275}{section*.327}
\defcounter {refsection}{0}\relax \defcounter {refsection}{0}\relax
\contentsline {subsubsection}{为什么要预训练?}{274}{section*.329} \contentsline {subsubsection}{为什么要预训练?}{276}{section*.329}
\defcounter {refsection}{0}\relax \defcounter {refsection}{0}\relax
\contentsline {section}{\numberline {5.6}小结及深入阅读}{275}{section.5.6} \contentsline {section}{\numberline {5.6}小结及深入阅读}{277}{section.5.6}
\defcounter {refsection}{0}\relax \defcounter {refsection}{0}\relax
\contentsline {chapter}{\numberline {6}神经机器翻译模型}{277}{chapter.6} \contentsline {chapter}{\numberline {6}神经机器翻译模型}{279}{chapter.6}
\defcounter {refsection}{0}\relax \defcounter {refsection}{0}\relax
\contentsline {section}{\numberline {6.1}神经机器翻译的发展简史}{277}{section.6.1} \contentsline {section}{\numberline {6.1}神经机器翻译的发展简史}{279}{section.6.1}
\defcounter {refsection}{0}\relax \defcounter {refsection}{0}\relax
\contentsline {subsection}{\numberline {6.1.1}神经机器翻译的起源}{279}{subsection.6.1.1} \contentsline {subsection}{\numberline {6.1.1}神经机器翻译的起源}{281}{subsection.6.1.1}
\defcounter {refsection}{0}\relax \defcounter {refsection}{0}\relax
\contentsline {subsection}{\numberline {6.1.2}神经机器翻译的品质 }{281}{subsection.6.1.2} \contentsline {subsection}{\numberline {6.1.2}神经机器翻译的品质 }{283}{subsection.6.1.2}
\defcounter {refsection}{0}\relax \defcounter {refsection}{0}\relax
\contentsline {subsection}{\numberline {6.1.3}神经机器翻译的优势 }{284}{subsection.6.1.3} \contentsline {subsection}{\numberline {6.1.3}神经机器翻译的优势 }{286}{subsection.6.1.3}
\defcounter {refsection}{0}\relax \defcounter {refsection}{0}\relax
\contentsline {section}{\numberline {6.2}编码器-解码器框架}{286}{section.6.2} \contentsline {section}{\numberline {6.2}编码器-解码器框架}{288}{section.6.2}
\defcounter {refsection}{0}\relax \defcounter {refsection}{0}\relax
\contentsline {subsection}{\numberline {6.2.1}框架结构}{286}{subsection.6.2.1} \contentsline {subsection}{\numberline {6.2.1}框架结构}{288}{subsection.6.2.1}
\defcounter {refsection}{0}\relax \defcounter {refsection}{0}\relax
\contentsline {subsection}{\numberline {6.2.2}表示学习}{287}{subsection.6.2.2} \contentsline {subsection}{\numberline {6.2.2}表示学习}{289}{subsection.6.2.2}
\defcounter {refsection}{0}\relax \defcounter {refsection}{0}\relax
\contentsline {subsection}{\numberline {6.2.3}简单的运行实例}{288}{subsection.6.2.3} \contentsline {subsection}{\numberline {6.2.3}简单的运行实例}{290}{subsection.6.2.3}
\defcounter {refsection}{0}\relax \defcounter {refsection}{0}\relax
\contentsline {subsection}{\numberline {6.2.4}机器翻译范式的对比}{289}{subsection.6.2.4} \contentsline {subsection}{\numberline {6.2.4}机器翻译范式的对比}{291}{subsection.6.2.4}
\defcounter {refsection}{0}\relax \defcounter {refsection}{0}\relax
\contentsline {section}{\numberline {6.3}基于循环神经网络的翻译模型及注意力机制}{290}{section.6.3} \contentsline {section}{\numberline {6.3}基于循环神经网络的翻译模型及注意力机制}{292}{section.6.3}
\defcounter {refsection}{0}\relax \defcounter {refsection}{0}\relax
\contentsline {subsection}{\numberline {6.3.1}建模}{290}{subsection.6.3.1} \contentsline {subsection}{\numberline {6.3.1}建模}{292}{subsection.6.3.1}
\defcounter {refsection}{0}\relax \defcounter {refsection}{0}\relax
\contentsline {subsection}{\numberline {6.3.2}输入(词嵌入)及输出(Softmax)}{294}{subsection.6.3.2} \contentsline {subsection}{\numberline {6.3.2}输入(词嵌入)及输出(Softmax)}{296}{subsection.6.3.2}
\defcounter {refsection}{0}\relax \defcounter {refsection}{0}\relax
\contentsline {subsection}{\numberline {6.3.3}循环神经网络结构}{298}{subsection.6.3.3} \contentsline {subsection}{\numberline {6.3.3}循环神经网络结构}{300}{subsection.6.3.3}
\defcounter {refsection}{0}\relax \defcounter {refsection}{0}\relax
\contentsline {subsubsection}{循环神经单元(RNN)}{298}{section*.351} \contentsline {subsubsection}{循环神经单元(RNN)}{300}{section*.351}
\defcounter {refsection}{0}\relax \defcounter {refsection}{0}\relax
\contentsline {subsubsection}{长短时记忆网络(LSTM)}{298}{section*.352} \contentsline {subsubsection}{长短时记忆网络(LSTM)}{300}{section*.352}
\defcounter {refsection}{0}\relax \defcounter {refsection}{0}\relax
\contentsline {subsubsection}{门控循环单元(GRU)}{300}{section*.355} \contentsline {subsubsection}{门控循环单元(GRU)}{302}{section*.355}
\defcounter {refsection}{0}\relax \defcounter {refsection}{0}\relax
\contentsline {subsubsection}{双向模型}{302}{section*.357} \contentsline {subsubsection}{双向模型}{304}{section*.357}
\defcounter {refsection}{0}\relax \defcounter {refsection}{0}\relax
\contentsline {subsubsection}{多层循环神经网络}{302}{section*.359} \contentsline {subsubsection}{多层循环神经网络}{304}{section*.359}
\defcounter {refsection}{0}\relax \defcounter {refsection}{0}\relax
\contentsline {subsection}{\numberline {6.3.4}注意力机制}{303}{subsection.6.3.4} \contentsline {subsection}{\numberline {6.3.4}注意力机制}{305}{subsection.6.3.4}
\defcounter {refsection}{0}\relax \defcounter {refsection}{0}\relax
\contentsline {subsubsection}{翻译中的注意力机制}{304}{section*.362} \contentsline {subsubsection}{翻译中的注意力机制}{306}{section*.362}
\defcounter {refsection}{0}\relax \defcounter {refsection}{0}\relax
\contentsline {subsubsection}{上下文向量的计算}{305}{section*.365} \contentsline {subsubsection}{上下文向量的计算}{307}{section*.365}
\defcounter {refsection}{0}\relax \defcounter {refsection}{0}\relax
\contentsline {subsubsection}{注意力机制的解读}{308}{section*.370} \contentsline {subsubsection}{注意力机制的解读}{310}{section*.370}
\defcounter {refsection}{0}\relax \defcounter {refsection}{0}\relax
\contentsline {subsection}{\numberline {6.3.5}训练}{310}{subsection.6.3.5} \contentsline {subsection}{\numberline {6.3.5}训练}{312}{subsection.6.3.5}
\defcounter {refsection}{0}\relax \defcounter {refsection}{0}\relax
\contentsline {subsubsection}{损失函数}{310}{section*.373} \contentsline {subsubsection}{损失函数}{312}{section*.373}
\defcounter {refsection}{0}\relax \defcounter {refsection}{0}\relax
\contentsline {subsubsection}{长参数初始化}{311}{section*.374} \contentsline {subsubsection}{长参数初始化}{313}{section*.374}
\defcounter {refsection}{0}\relax \defcounter {refsection}{0}\relax
\contentsline {subsubsection}{优化策略}{312}{section*.375} \contentsline {subsubsection}{优化策略}{314}{section*.375}
\defcounter {refsection}{0}\relax \defcounter {refsection}{0}\relax
\contentsline {subsubsection}{梯度裁剪}{312}{section*.377} \contentsline {subsubsection}{梯度裁剪}{314}{section*.377}
\defcounter {refsection}{0}\relax \defcounter {refsection}{0}\relax
\contentsline {subsubsection}{学习率策略}{312}{section*.378} \contentsline {subsubsection}{学习率策略}{314}{section*.378}
\defcounter {refsection}{0}\relax \defcounter {refsection}{0}\relax
\contentsline {subsubsection}{并行训练}{314}{section*.381} \contentsline {subsubsection}{并行训练}{316}{section*.381}
\defcounter {refsection}{0}\relax \defcounter {refsection}{0}\relax
\contentsline {subsection}{\numberline {6.3.6}推断}{315}{subsection.6.3.6} \contentsline {subsection}{\numberline {6.3.6}推断}{317}{subsection.6.3.6}
\defcounter {refsection}{0}\relax \defcounter {refsection}{0}\relax
\contentsline {subsubsection}{贪婪搜索}{317}{section*.385} \contentsline {subsubsection}{贪婪搜索}{319}{section*.385}
\defcounter {refsection}{0}\relax \defcounter {refsection}{0}\relax
\contentsline {subsubsection}{束搜索}{318}{section*.388} \contentsline {subsubsection}{束搜索}{320}{section*.388}
\defcounter {refsection}{0}\relax \defcounter {refsection}{0}\relax
\contentsline {subsubsection}{长度惩罚}{319}{section*.390} \contentsline {subsubsection}{长度惩罚}{321}{section*.390}
\defcounter {refsection}{0}\relax \defcounter {refsection}{0}\relax
\contentsline {subsection}{\numberline {6.3.7}实例-GNMT}{320}{subsection.6.3.7} \contentsline {subsection}{\numberline {6.3.7}实例-GNMT}{322}{subsection.6.3.7}
\defcounter {refsection}{0}\relax \defcounter {refsection}{0}\relax
\contentsline {section}{\numberline {6.4}Transformer}{321}{section.6.4} \contentsline {section}{\numberline {6.4}Transformer}{323}{section.6.4}
\defcounter {refsection}{0}\relax \defcounter {refsection}{0}\relax
\contentsline {subsection}{\numberline {6.4.1}自注意力模型}{323}{subsection.6.4.1} \contentsline {subsection}{\numberline {6.4.1}自注意力模型}{325}{subsection.6.4.1}
\defcounter {refsection}{0}\relax \defcounter {refsection}{0}\relax
\contentsline {subsection}{\numberline {6.4.2}Transformer架构}{324}{subsection.6.4.2} \contentsline {subsection}{\numberline {6.4.2}Transformer架构}{326}{subsection.6.4.2}
\defcounter {refsection}{0}\relax \defcounter {refsection}{0}\relax
\contentsline {subsection}{\numberline {6.4.3}位置编码}{326}{subsection.6.4.3} \contentsline {subsection}{\numberline {6.4.3}位置编码}{328}{subsection.6.4.3}
\defcounter {refsection}{0}\relax \defcounter {refsection}{0}\relax
\contentsline {subsection}{\numberline {6.4.4}基于点乘的注意力机制}{329}{subsection.6.4.4} \contentsline {subsection}{\numberline {6.4.4}基于点乘的注意力机制}{331}{subsection.6.4.4}
\defcounter {refsection}{0}\relax \defcounter {refsection}{0}\relax
\contentsline {subsection}{\numberline {6.4.5}掩码操作}{331}{subsection.6.4.5} \contentsline {subsection}{\numberline {6.4.5}掩码操作}{333}{subsection.6.4.5}
\defcounter {refsection}{0}\relax \defcounter {refsection}{0}\relax
\contentsline {subsection}{\numberline {6.4.6}多头注意力}{332}{subsection.6.4.6} \contentsline {subsection}{\numberline {6.4.6}多头注意力}{334}{subsection.6.4.6}
\defcounter {refsection}{0}\relax \defcounter {refsection}{0}\relax
\contentsline {subsection}{\numberline {6.4.7}残差网络和层正则化}{333}{subsection.6.4.7} \contentsline {subsection}{\numberline {6.4.7}残差网络和层正则化}{335}{subsection.6.4.7}
\defcounter {refsection}{0}\relax \defcounter {refsection}{0}\relax
\contentsline {subsection}{\numberline {6.4.8}前馈全连接网络子层}{334}{subsection.6.4.8} \contentsline {subsection}{\numberline {6.4.8}前馈全连接网络子层}{336}{subsection.6.4.8}
\defcounter {refsection}{0}\relax \defcounter {refsection}{0}\relax
\contentsline {subsection}{\numberline {6.4.9}训练}{335}{subsection.6.4.9} \contentsline {subsection}{\numberline {6.4.9}训练}{337}{subsection.6.4.9}
\defcounter {refsection}{0}\relax \defcounter {refsection}{0}\relax
\contentsline {subsection}{\numberline {6.4.10}推断}{338}{subsection.6.4.10} \contentsline {subsection}{\numberline {6.4.10}推断}{340}{subsection.6.4.10}
\defcounter {refsection}{0}\relax \defcounter {refsection}{0}\relax
\contentsline {section}{\numberline {6.5}序列到序列问题及应用}{338}{section.6.5} \contentsline {section}{\numberline {6.5}序列到序列问题及应用}{340}{section.6.5}
\defcounter {refsection}{0}\relax \defcounter {refsection}{0}\relax
\contentsline {subsection}{\numberline {6.5.1}自动问答}{339}{subsection.6.5.1} \contentsline {subsection}{\numberline {6.5.1}自动问答}{341}{subsection.6.5.1}
\defcounter {refsection}{0}\relax \defcounter {refsection}{0}\relax
\contentsline {subsection}{\numberline {6.5.2}自动文摘}{339}{subsection.6.5.2} \contentsline {subsection}{\numberline {6.5.2}自动文摘}{341}{subsection.6.5.2}
\defcounter {refsection}{0}\relax \defcounter {refsection}{0}\relax
\contentsline {subsection}{\numberline {6.5.3}文言文翻译}{340}{subsection.6.5.3} \contentsline {subsection}{\numberline {6.5.3}文言文翻译}{342}{subsection.6.5.3}
\defcounter {refsection}{0}\relax \defcounter {refsection}{0}\relax
\contentsline {subsection}{\numberline {6.5.4}对联生成}{340}{subsection.6.5.4} \contentsline {subsection}{\numberline {6.5.4}对联生成}{342}{subsection.6.5.4}
\defcounter {refsection}{0}\relax \defcounter {refsection}{0}\relax
\contentsline {subsection}{\numberline {6.5.5}古诗生成}{341}{subsection.6.5.5} \contentsline {subsection}{\numberline {6.5.5}古诗生成}{343}{subsection.6.5.5}
\defcounter {refsection}{0}\relax \defcounter {refsection}{0}\relax
\contentsline {section}{\numberline {6.6}小结及深入阅读}{342}{section.6.6} \contentsline {section}{\numberline {6.6}小结及深入阅读}{344}{section.6.6}
\defcounter {refsection}{0}\relax \defcounter {refsection}{0}\relax
\contentsline {chapter}{\numberline {7}神经机器翻译实战 \ \raisebox {0.5mm}{------}\ 参加一次比赛}{345}{chapter.7} \contentsline {chapter}{\numberline {7}神经机器翻译实战 \ \raisebox {0.5mm}{------}\ 参加一次比赛}{347}{chapter.7}
\defcounter {refsection}{0}\relax \defcounter {refsection}{0}\relax
\contentsline {section}{\numberline {7.1}神经机器翻译并不简单}{345}{section.7.1} \contentsline {section}{\numberline {7.1}神经机器翻译并不简单}{347}{section.7.1}
\defcounter {refsection}{0}\relax \defcounter {refsection}{0}\relax
\contentsline {subsection}{\numberline {7.1.1}影响神经机器翻译性能的因素}{346}{subsection.7.1.1} \contentsline {subsection}{\numberline {7.1.1}影响神经机器翻译性能的因素}{348}{subsection.7.1.1}
\defcounter {refsection}{0}\relax \defcounter {refsection}{0}\relax
\contentsline {subsection}{\numberline {7.1.2}搭建神经机器翻译系统的步骤 }{347}{subsection.7.1.2} \contentsline {subsection}{\numberline {7.1.2}搭建神经机器翻译系统的步骤 }{349}{subsection.7.1.2}
\defcounter {refsection}{0}\relax \defcounter {refsection}{0}\relax
\contentsline {subsection}{\numberline {7.1.3}架构选择 }{348}{subsection.7.1.3} \contentsline {subsection}{\numberline {7.1.3}架构选择 }{350}{subsection.7.1.3}
\defcounter {refsection}{0}\relax \defcounter {refsection}{0}\relax
\contentsline {section}{\numberline {7.2}数据处理}{348}{section.7.2} \contentsline {section}{\numberline {7.2}数据处理}{350}{section.7.2}
\defcounter {refsection}{0}\relax \defcounter {refsection}{0}\relax
\contentsline {subsection}{\numberline {7.2.1}分词}{349}{subsection.7.2.1} \contentsline {subsection}{\numberline {7.2.1}分词}{351}{subsection.7.2.1}
\defcounter {refsection}{0}\relax \defcounter {refsection}{0}\relax
\contentsline {subsection}{\numberline {7.2.2}标准化}{350}{subsection.7.2.2} \contentsline {subsection}{\numberline {7.2.2}标准化}{352}{subsection.7.2.2}
\defcounter {refsection}{0}\relax \defcounter {refsection}{0}\relax
\contentsline {subsection}{\numberline {7.2.3}数据清洗}{351}{subsection.7.2.3} \contentsline {subsection}{\numberline {7.2.3}数据清洗}{353}{subsection.7.2.3}
\defcounter {refsection}{0}\relax \defcounter {refsection}{0}\relax
\contentsline {subsection}{\numberline {7.2.4}子词切分}{353}{subsection.7.2.4} \contentsline {subsection}{\numberline {7.2.4}子词切分}{355}{subsection.7.2.4}
\defcounter {refsection}{0}\relax \defcounter {refsection}{0}\relax
\contentsline {subsubsection}{大词表和OOV问题}{354}{section*.428} \contentsline {subsubsection}{大词表和OOV问题}{356}{section*.428}
\defcounter {refsection}{0}\relax \defcounter {refsection}{0}\relax
\contentsline {subsubsection}{子词}{354}{section*.430} \contentsline {subsubsection}{子词}{356}{section*.430}
\defcounter {refsection}{0}\relax \defcounter {refsection}{0}\relax
\contentsline {subsubsection}{双字节编码(BPE)}{355}{section*.432} \contentsline {subsubsection}{双字节编码(BPE)}{357}{section*.432}
\defcounter {refsection}{0}\relax \defcounter {refsection}{0}\relax
\contentsline {subsubsection}{其他方法}{358}{section*.435} \contentsline {subsubsection}{其他方法}{360}{section*.435}
\defcounter {refsection}{0}\relax \defcounter {refsection}{0}\relax
\contentsline {section}{\numberline {7.3}建模与训练}{358}{section.7.3} \contentsline {section}{\numberline {7.3}建模与训练}{360}{section.7.3}
\defcounter {refsection}{0}\relax \defcounter {refsection}{0}\relax
\contentsline {subsection}{\numberline {7.3.1}正则化}{358}{subsection.7.3.1} \contentsline {subsection}{\numberline {7.3.1}正则化}{360}{subsection.7.3.1}
\defcounter {refsection}{0}\relax \defcounter {refsection}{0}\relax
\contentsline {subsubsection}{L1/L2正则化}{360}{section*.437} \contentsline {subsubsection}{L1/L2正则化}{362}{section*.437}
\defcounter {refsection}{0}\relax \defcounter {refsection}{0}\relax
\contentsline {subsubsection}{标签平滑}{361}{section*.438} \contentsline {subsubsection}{标签平滑}{363}{section*.438}
\defcounter {refsection}{0}\relax \defcounter {refsection}{0}\relax
\contentsline {subsubsection}{Dropout}{361}{section*.440} \contentsline {subsubsection}{Dropout}{364}{section*.440}
\defcounter {refsection}{0}\relax \defcounter {refsection}{0}\relax
\contentsline {subsubsection}{Layer Dropout}{363}{section*.443} \contentsline {subsubsection}{Layer Dropout}{365}{section*.443}
\defcounter {refsection}{0}\relax \defcounter {refsection}{0}\relax
\contentsline {subsection}{\numberline {7.3.2}增大模型容量}{364}{subsection.7.3.2} \contentsline {subsection}{\numberline {7.3.2}增大模型容量}{366}{subsection.7.3.2}
\defcounter {refsection}{0}\relax \defcounter {refsection}{0}\relax
\contentsline {subsubsection}{宽网络}{364}{section*.445} \contentsline {subsubsection}{宽网络}{366}{section*.445}
\defcounter {refsection}{0}\relax \defcounter {refsection}{0}\relax
\contentsline {subsubsection}{深网络}{365}{section*.447} \contentsline {subsubsection}{深网络}{367}{section*.447}
\defcounter {refsection}{0}\relax \defcounter {refsection}{0}\relax
\contentsline {subsubsection}{增大输入层和输出层表示能力}{366}{section*.449} \contentsline {subsubsection}{增大输入层和输出层表示能力}{369}{section*.449}
\defcounter {refsection}{0}\relax \defcounter {refsection}{0}\relax
\contentsline {subsubsection}{大模型的分布式计算}{367}{section*.450} \contentsline {subsubsection}{大模型的分布式计算}{369}{section*.450}
\defcounter {refsection}{0}\relax \defcounter {refsection}{0}\relax
\contentsline {subsection}{\numberline {7.3.3}大批量训练}{367}{subsection.7.3.3} \contentsline {subsection}{\numberline {7.3.3}大批量训练}{369}{subsection.7.3.3}
\defcounter {refsection}{0}\relax \defcounter {refsection}{0}\relax
\contentsline {subsubsection}{为什么需要大批量训练}{367}{section*.451} \contentsline {subsubsection}{为什么需要大批量训练}{370}{section*.451}
\defcounter {refsection}{0}\relax \defcounter {refsection}{0}\relax
\contentsline {subsubsection}{如何构建批次}{369}{section*.454} \contentsline {subsubsection}{如何构建批次}{371}{section*.454}
\defcounter {refsection}{0}\relax \defcounter {refsection}{0}\relax
\contentsline {section}{\numberline {7.4}推断}{370}{section.7.4} \contentsline {section}{\numberline {7.4}推断}{372}{section.7.4}
\defcounter {refsection}{0}\relax \defcounter {refsection}{0}\relax
\contentsline {subsection}{\numberline {7.4.1}推断优化}{370}{subsection.7.4.1} \contentsline {subsection}{\numberline {7.4.1}推断优化}{372}{subsection.7.4.1}
\defcounter {refsection}{0}\relax \defcounter {refsection}{0}\relax
\contentsline {subsubsection}{推断系统的架构}{370}{section*.456} \contentsline {subsubsection}{推断系统的架构}{372}{section*.456}
\defcounter {refsection}{0}\relax \defcounter {refsection}{0}\relax
\contentsline {subsubsection}{自左向右推断 vs 自右向左推断}{371}{section*.458} \contentsline {subsubsection}{自左向右推断 vs 自右向左推断}{373}{section*.458}
\defcounter {refsection}{0}\relax \defcounter {refsection}{0}\relax
\contentsline {subsubsection}{推断加速}{372}{section*.459} \contentsline {subsubsection}{推断加速}{374}{section*.459}
\defcounter {refsection}{0}\relax \defcounter {refsection}{0}\relax
\contentsline {subsection}{\numberline {7.4.2}译文长度控制}{379}{subsection.7.4.2} \contentsline {subsection}{\numberline {7.4.2}译文长度控制}{381}{subsection.7.4.2}
\defcounter {refsection}{0}\relax \defcounter {refsection}{0}\relax
\contentsline {subsubsection}{长度惩罚因子}{379}{section*.465} \contentsline {subsubsection}{长度惩罚因子}{382}{section*.465}
\defcounter {refsection}{0}\relax \defcounter {refsection}{0}\relax
\contentsline {subsubsection}{译文长度范围约束}{380}{section*.467} \contentsline {subsubsection}{译文长度范围约束}{383}{section*.467}
\defcounter {refsection}{0}\relax \defcounter {refsection}{0}\relax
\contentsline {subsubsection}{覆盖度模型}{381}{section*.468} \contentsline {subsubsection}{覆盖度模型}{383}{section*.468}
\defcounter {refsection}{0}\relax \defcounter {refsection}{0}\relax
\contentsline {subsection}{\numberline {7.4.3}多模型集成}{382}{subsection.7.4.3} \contentsline {subsection}{\numberline {7.4.3}多模型集成}{384}{subsection.7.4.3}
\defcounter {refsection}{0}\relax \defcounter {refsection}{0}\relax
\contentsline {subsubsection}{假设选择}{382}{section*.469} \contentsline {subsubsection}{假设选择}{385}{section*.469}
\defcounter {refsection}{0}\relax \defcounter {refsection}{0}\relax
\contentsline {subsubsection}{局部预测融合}{383}{section*.471} \contentsline {subsubsection}{局部预测融合}{386}{section*.471}
\defcounter {refsection}{0}\relax \defcounter {refsection}{0}\relax
\contentsline {subsubsection}{译文重组}{384}{section*.473} \contentsline {subsubsection}{译文重组}{387}{section*.473}
\defcounter {refsection}{0}\relax \defcounter {refsection}{0}\relax
\contentsline {section}{\numberline {7.5}进阶技术}{385}{section.7.5} \contentsline {section}{\numberline {7.5}进阶技术}{388}{section.7.5}
\defcounter {refsection}{0}\relax \defcounter {refsection}{0}\relax
\contentsline {subsection}{\numberline {7.5.1}深层模型}{385}{subsection.7.5.1} \contentsline {subsection}{\numberline {7.5.1}深层模型}{388}{subsection.7.5.1}
\defcounter {refsection}{0}\relax \defcounter {refsection}{0}\relax
\contentsline {subsubsection}{Post-Norm vs Pre-Norm}{386}{section*.476} \contentsline {subsubsection}{Post-Norm vs Pre-Norm}{388}{section*.476}
\defcounter {refsection}{0}\relax \defcounter {refsection}{0}\relax
\contentsline {subsubsection}{层聚合}{388}{section*.479} \contentsline {subsubsection}{层聚合}{391}{section*.479}
\defcounter {refsection}{0}\relax \defcounter {refsection}{0}\relax
\contentsline {subsubsection}{深层模型的训练加速}{389}{section*.481} \contentsline {subsubsection}{深层模型的训练加速}{392}{section*.481}
\defcounter {refsection}{0}\relax \defcounter {refsection}{0}\relax
\contentsline {subsubsection}{渐进式训练}{390}{section*.482} \contentsline {subsubsection}{渐进式训练}{392}{section*.482}
\defcounter {refsection}{0}\relax \defcounter {refsection}{0}\relax
\contentsline {subsubsection}{分组稠密连接}{390}{section*.484} \contentsline {subsubsection}{分组稠密连接}{392}{section*.484}
\defcounter {refsection}{0}\relax \defcounter {refsection}{0}\relax
\contentsline {subsubsection}{学习率重置策略}{391}{section*.486} \contentsline {subsubsection}{学习率重置策略}{393}{section*.486}
\defcounter {refsection}{0}\relax \defcounter {refsection}{0}\relax
\contentsline {subsubsection}{深层模型的鲁棒性训练}{392}{section*.488} \contentsline {subsubsection}{深层模型的鲁棒性训练}{395}{section*.488}
\defcounter {refsection}{0}\relax \defcounter {refsection}{0}\relax
\contentsline {subsection}{\numberline {7.5.2}单语数据的使用}{394}{subsection.7.5.2} \contentsline {subsection}{\numberline {7.5.2}单语数据的使用}{396}{subsection.7.5.2}
\defcounter {refsection}{0}\relax \defcounter {refsection}{0}\relax
\contentsline {subsubsection}{伪数据}{395}{section*.491} \contentsline {subsubsection}{伪数据}{397}{section*.491}
\defcounter {refsection}{0}\relax \defcounter {refsection}{0}\relax
\contentsline {subsubsection}{预训练}{396}{section*.494} \contentsline {subsubsection}{预训练}{399}{section*.494}
\defcounter {refsection}{0}\relax \defcounter {refsection}{0}\relax
\contentsline {subsubsection}{联合训练}{398}{section*.497} \contentsline {subsubsection}{联合训练}{401}{section*.497}
\defcounter {refsection}{0}\relax \defcounter {refsection}{0}\relax
\contentsline {subsection}{\numberline {7.5.3}知识精炼}{399}{subsection.7.5.3} \contentsline {subsection}{\numberline {7.5.3}知识精炼}{401}{subsection.7.5.3}
\defcounter {refsection}{0}\relax \defcounter {refsection}{0}\relax
\contentsline {subsubsection}{什么是知识精炼}{399}{section*.499} \contentsline {subsubsection}{什么是知识精炼}{402}{section*.499}
\defcounter {refsection}{0}\relax \defcounter {refsection}{0}\relax
\contentsline {subsubsection}{知识精炼的基本方法}{401}{section*.500} \contentsline {subsubsection}{知识精炼的基本方法}{403}{section*.500}
\defcounter {refsection}{0}\relax \defcounter {refsection}{0}\relax
\contentsline {subsubsection}{机器翻译中的知识精炼}{402}{section*.502} \contentsline {subsubsection}{机器翻译中的知识精炼}{404}{section*.502}
\defcounter {refsection}{0}\relax \defcounter {refsection}{0}\relax
\contentsline {subsection}{\numberline {7.5.4}双向训练}{403}{subsection.7.5.4} \contentsline {subsection}{\numberline {7.5.4}双向训练}{406}{subsection.7.5.4}
\defcounter {refsection}{0}\relax \defcounter {refsection}{0}\relax
\contentsline {subsubsection}{有监督对偶学习}{404}{section*.504} \contentsline {subsubsection}{有监督对偶学习}{406}{section*.504}
\defcounter {refsection}{0}\relax \defcounter {refsection}{0}\relax
\contentsline {subsubsection}{无监督对偶学习}{405}{section*.505} \contentsline {subsubsection}{无监督对偶学习}{407}{section*.505}
\defcounter {refsection}{0}\relax \defcounter {refsection}{0}\relax
\contentsline {subsubsection}{翻译中回译}{406}{section*.507} \contentsline {subsubsection}{翻译中回译}{408}{section*.507}
\defcounter {refsection}{0}\relax \defcounter {refsection}{0}\relax
\contentsline {section}{\numberline {7.6}小结及深入阅读}{406}{section.7.6} \contentsline {section}{\numberline {7.6}小结及深入阅读}{408}{section.7.6}
\defcounter {refsection}{0}\relax
\contentsline {part}{\@mypartnumtocformat {IV}{附录}}{413}{part.4}
\ttl@stoptoc {default@3}
\ttl@starttoc {default@4}
\defcounter {refsection}{0}\relax
\contentsline {chapter}{\numberline {A}附录A}{415}{Appendix.1.A}
\defcounter {refsection}{0}\relax
\contentsline {section}{\numberline {A.1}基准数据集}{415}{section.1.A.1}
\defcounter {refsection}{0}\relax
\contentsline {section}{\numberline {A.2}平行语料}{416}{section.1.A.2}
\defcounter {refsection}{0}\relax
\contentsline {section}{\numberline {A.3}相关工具}{417}{section.1.A.3}
\defcounter {refsection}{0}\relax
\contentsline {subsection}{\numberline {A.3.1}数据预处理工具}{417}{subsection.1.A.3.1}
\defcounter {refsection}{0}\relax
\contentsline {subsection}{\numberline {A.3.2}评价工具}{418}{subsection.1.A.3.2}
\defcounter {refsection}{0}\relax
\contentsline {chapter}{\numberline {B}附录B}{419}{Appendix.2.B}
\defcounter {refsection}{0}\relax
\contentsline {section}{\numberline {B.1}IBM模型3训练方法}{419}{section.2.B.1}
\defcounter {refsection}{0}\relax
\contentsline {section}{\numberline {B.2}IBM模型4训练方法}{421}{section.2.B.2}
\defcounter {refsection}{0}\relax
\contentsline {section}{\numberline {B.3}IBM模型5训练方法}{423}{section.2.B.3}
\contentsfinish \contentsfinish
...@@ -48,6 +48,7 @@ ...@@ -48,6 +48,7 @@
\geometry{ \geometry{
paper=b5paper, % Paper size, change to letterpaper for US letter size paper=b5paper, % Paper size, change to letterpaper for US letter size
%papersize={185mm,260mm}, % specify paper size by (width,height)
top=2cm, % Top margin top=2cm, % Top margin
bottom=1.5cm, % Bottom margin bottom=1.5cm, % Bottom margin
left=1.8cm, % Left margin left=1.8cm, % Left margin
......
Markdown 格式
0%
您添加了 0 到此讨论。请谨慎行事。
请先完成此评论的编辑!
注册 或者 后发表评论