Commit 2a4a6360 by 曹润柘

合并分支 'master' 到 'caorunzhe'

Master

查看合并请求 !35
parents c308dbad 4aeb846b
\indexentry{Chapter4.1|hyperpage}{7}
\indexentry{Chapter4.1.1|hyperpage}{8}
\indexentry{Chapter4.1.2|hyperpage}{10}
\indexentry{Chapter4.2|hyperpage}{12}
\indexentry{Chapter4.2.1|hyperpage}{12}
\indexentry{Chapter4.2.2|hyperpage}{15}
\indexentry{Chapter4.2.2.1|hyperpage}{15}
\indexentry{Chapter4.2.2.2|hyperpage}{16}
\indexentry{Chapter4.2.2.3|hyperpage}{17}
\indexentry{Chapter4.2.3|hyperpage}{18}
\indexentry{Chapter4.2.3.1|hyperpage}{19}
\indexentry{Chapter4.2.3.2|hyperpage}{20}
\indexentry{Chapter4.2.3.3|hyperpage}{21}
\indexentry{Chapter4.2.4|hyperpage}{22}
\indexentry{Chapter4.2.4.1|hyperpage}{22}
\indexentry{Chapter4.2.4.2|hyperpage}{23}
\indexentry{Chapter4.2.4.3|hyperpage}{25}
\indexentry{Chapter4.2.5|hyperpage}{25}
\indexentry{Chapter4.2.6|hyperpage}{26}
\indexentry{Chapter4.2.7|hyperpage}{29}
\indexentry{Chapter4.2.7.1|hyperpage}{30}
\indexentry{Chapter4.2.7.2|hyperpage}{30}
\indexentry{Chapter4.2.7.3|hyperpage}{31}
\indexentry{Chapter4.2.7.4|hyperpage}{32}
\indexentry{Chapter4.3|hyperpage}{33}
\indexentry{Chapter4.3.1|hyperpage}{36}
\indexentry{Chapter4.3.1.1|hyperpage}{37}
\indexentry{Chapter4.3.1.2|hyperpage}{38}
\indexentry{Chapter4.3.1.3|hyperpage}{39}
\indexentry{Chapter4.3.1.4|hyperpage}{40}
\indexentry{Chapter4.3.2|hyperpage}{40}
\indexentry{Chapter4.3.3|hyperpage}{41}
\indexentry{Chapter4.3.4|hyperpage}{42}
\indexentry{Chapter4.3.5|hyperpage}{46}
\indexentry{Chapter4.4|hyperpage}{49}
\indexentry{Chapter4.4.1|hyperpage}{51}
\indexentry{Chapter4.4.2|hyperpage}{51}
\indexentry{Chapter4.4.2.1|hyperpage}{53}
\indexentry{Chapter4.4.2.2|hyperpage}{55}
\indexentry{Chapter4.4.2.3|hyperpage}{57}
\indexentry{Chapter4.4.3|hyperpage}{58}
\indexentry{Chapter4.4.3.1|hyperpage}{59}
\indexentry{Chapter4.4.3.2|hyperpage}{62}
\indexentry{Chapter4.4.3.3|hyperpage}{63}
\indexentry{Chapter4.4.3.4|hyperpage}{64}
\indexentry{Chapter4.4.3.5|hyperpage}{65}
\indexentry{Chapter4.4.4|hyperpage}{66}
\indexentry{Chapter4.4.4.1|hyperpage}{67}
\indexentry{Chapter4.4.4.2|hyperpage}{67}
\indexentry{Chapter4.4.5|hyperpage}{68}
\indexentry{Chapter4.4.5|hyperpage}{71}
\indexentry{Chapter4.4.7|hyperpage}{73}
\indexentry{Chapter4.4.7.1|hyperpage}{74}
\indexentry{Chapter4.4.7.2|hyperpage}{76}
\indexentry{Chapter4.5|hyperpage}{77}
\indexentry{未登录词|hyperpage}{11}
\indexentry{Out of Vocabulary Word,OOV Word|hyperpage}{11}
\indexentry{子词切分|hyperpage}{11}
\indexentry{Sub-word Segmentation|hyperpage}{11}
\indexentry{标准化|hyperpage}{11}
\indexentry{Normalization|hyperpage}{11}
\indexentry{数据清洗|hyperpage}{11}
\indexentry{Dada Cleaning|hyperpage}{11}
\indexentry{数据选择|hyperpage}{13}
\indexentry{Data Selection|hyperpage}{13}
\indexentry{数据过滤|hyperpage}{13}
\indexentry{Data Filtering|hyperpage}{13}
\indexentry{开放词表|hyperpage}{16}
\indexentry{Open-Vocabulary|hyperpage}{16}
\indexentry{子词|hyperpage}{17}
\indexentry{Sub-word|hyperpage}{17}
\indexentry{字节对编码|hyperpage}{17}
\indexentry{双字节编码|hyperpage}{17}
\indexentry{Byte Pair Encoding,BPE|hyperpage}{17}
\indexentry{正则化|hyperpage}{20}
\indexentry{Regularization|hyperpage}{20}
\indexentry{过拟合问题|hyperpage}{20}
\indexentry{Overfitting Problem|hyperpage}{20}
\indexentry{反问题|hyperpage}{20}
\indexentry{Inverse Problem|hyperpage}{20}
\indexentry{适定的|hyperpage}{20}
\indexentry{Well-posed|hyperpage}{20}
\indexentry{不适定问题|hyperpage}{20}
\indexentry{Ill-posed Problem|hyperpage}{20}
\indexentry{降噪|hyperpage}{21}
\indexentry{Denoising|hyperpage}{21}
\indexentry{泛化|hyperpage}{21}
\indexentry{Generalization|hyperpage}{21}
\indexentry{标签平滑|hyperpage}{23}
\indexentry{Label Smoothing|hyperpage}{23}
\indexentry{相互适应|hyperpage}{24}
\indexentry{Co-Adaptation|hyperpage}{24}
\indexentry{集成学习|hyperpage}{25}
\indexentry{Ensemble Learning|hyperpage}{25}
\indexentry{容量|hyperpage}{26}
\indexentry{Capacity|hyperpage}{26}
\indexentry{宽残差网络|hyperpage}{27}
\indexentry{Wide Residual Network|hyperpage}{27}
\indexentry{探测任务|hyperpage}{28}
\indexentry{Probing Task|hyperpage}{28}
\indexentry{表面信息|hyperpage}{28}
\indexentry{Surface Information|hyperpage}{28}
\indexentry{语法信息|hyperpage}{28}
\indexentry{Syntactic Information|hyperpage}{28}
\indexentry{语义信息|hyperpage}{28}
\indexentry{Semantic Information|hyperpage}{28}
\indexentry{词嵌入|hyperpage}{29}
\indexentry{Embedding|hyperpage}{29}
\indexentry{数据并行|hyperpage}{29}
\indexentry{Data Parallelism|hyperpage}{29}
\indexentry{模型并行|hyperpage}{29}
\indexentry{Model Parallelism|hyperpage}{29}
\indexentry{小批量训练|hyperpage}{29}
\indexentry{Mini-batch Training|hyperpage}{29}
\indexentry{课程学习|hyperpage}{31}
\indexentry{Curriculum Learning|hyperpage}{31}
\indexentry{推断|hyperpage}{32}
\indexentry{Inference|hyperpage}{32}
\indexentry{解码|hyperpage}{32}
\indexentry{Decoding|hyperpage}{32}
\indexentry{搜索错误|hyperpage}{32}
\indexentry{Search Error|hyperpage}{32}
\indexentry{模型错误|hyperpage}{32}
\indexentry{Modeling Error|hyperpage}{32}
\indexentry{重排序|hyperpage}{34}
\indexentry{Re-ranking|hyperpage}{34}
\indexentry{双向推断|hyperpage}{34}
\indexentry{Bidirectional Inference|hyperpage}{34}
\indexentry{批量推断|hyperpage}{38}
\indexentry{Batch Inference|hyperpage}{38}
\indexentry{批量处理|hyperpage}{38}
\indexentry{Batching|hyperpage}{38}
\indexentry{二值网络|hyperpage}{39}
\indexentry{Binarized Neural Networks|hyperpage}{39}
\indexentry{自回归翻译|hyperpage}{40}
\indexentry{Autoregressive Translation|hyperpage}{40}
\indexentry{非自回归翻译|hyperpage}{40}
\indexentry{Regressive Translation|hyperpage}{40}
\indexentry{繁衍率|hyperpage}{40}
\indexentry{Fertility|hyperpage}{40}
\indexentry{偏置|hyperpage}{41}
\indexentry{Bias|hyperpage}{41}
\indexentry{退化|hyperpage}{42}
\indexentry{Degenerate|hyperpage}{42}
\indexentry{过翻译|hyperpage}{43}
\indexentry{Over Translation|hyperpage}{43}
\indexentry{欠翻译|hyperpage}{43}
\indexentry{Under Translation|hyperpage}{43}
\indexentry{充分性|hyperpage}{44}
\indexentry{Adequacy|hyperpage}{44}
\indexentry{系统融合|hyperpage}{44}
\indexentry{System Combination|hyperpage}{44}
\indexentry{假设选择|hyperpage}{45}
\indexentry{Hypothesis Selection|hyperpage}{45}
\indexentry{多样性|hyperpage}{45}
\indexentry{Diversity|hyperpage}{45}
\indexentry{重排序|hyperpage}{46}
\indexentry{Re-ranking|hyperpage}{46}
\indexentry{混淆网络|hyperpage}{47}
\indexentry{Confusion Network|hyperpage}{47}
\indexentry{动态线性层聚合方法|hyperpage}{51}
\indexentry{Dynamic Linear Combination of Layers,DLCL|hyperpage}{51}
\indexentry{相互适应|hyperpage}{55}
\indexentry{Co-adaptation|hyperpage}{55}
\indexentry{数据增强|hyperpage}{57}
\indexentry{Data Augmentation|hyperpage}{57}
\indexentry{回译|hyperpage}{57}
\indexentry{Back Translation|hyperpage}{57}
\indexentry{迭代式回译|hyperpage}{58}
\indexentry{Iterative Back Translation|hyperpage}{58}
\indexentry{前向翻译|hyperpage}{58}
\indexentry{Forward Translation|hyperpage}{58}
\indexentry{预训练|hyperpage}{59}
\indexentry{Pre-training|hyperpage}{59}
\indexentry{微调|hyperpage}{59}
\indexentry{Fine-tuning|hyperpage}{59}
\indexentry{多任务学习|hyperpage}{61}
\indexentry{Multitask Learning|hyperpage}{61}
\indexentry{模型压缩|hyperpage}{62}
\indexentry{Model Compression|hyperpage}{62}
\indexentry{学习难度|hyperpage}{62}
\indexentry{Learning Difficulty|hyperpage}{62}
\indexentry{教师模型|hyperpage}{63}
\indexentry{Teacher Model|hyperpage}{63}
\indexentry{学生模型|hyperpage}{63}
\indexentry{Student Model|hyperpage}{63}
\indexentry{基于单词的知识精炼|hyperpage}{63}
\indexentry{Word-level Knowledge Distillation|hyperpage}{63}
\indexentry{基于序列的知识精炼|hyperpage}{63}
\indexentry{Sequence-level Knowledge Distillation|hyperpage}{63}
\indexentry{中间层输出|hyperpage}{64}
\indexentry{Hint-based Knowledge Transfer|hyperpage}{64}
\indexentry{注意力分布|hyperpage}{64}
\indexentry{Attention To Attention Transfer|hyperpage}{64}
\indexentry{循环一致性|hyperpage}{67}
\indexentry{Circle Consistency|hyperpage}{67}
\indexentry{翻译中回译|hyperpage}{68}
\indexentry{On-the-fly Back-translation|hyperpage}{68}
\indexentry{网络结构搜索技术|hyperpage}{71}
\indexentry{Neural Architecture Search;NAS|hyperpage}{71}
......@@ -25,7 +25,7 @@
\defcounter {refsection}{0}\relax
\contentsline {section}{\numberline {1.4}机器翻译方法}{22}{section.1.4}
\defcounter {refsection}{0}\relax
\contentsline {subsection}{\numberline {1.4.1}基于规则的机器翻译}{22}{subsection.1.4.1}
\contentsline {subsection}{\numberline {1.4.1}基于规则的机器翻译}{24}{subsection.1.4.1}
\defcounter {refsection}{0}\relax
\contentsline {subsection}{\numberline {1.4.2}基于实例的机器翻译}{24}{subsection.1.4.2}
\defcounter {refsection}{0}\relax
......@@ -53,7 +53,7 @@
\defcounter {refsection}{0}\relax
\contentsline {subsection}{\numberline {1.7.1}开源机器翻译系统}{34}{subsection.1.7.1}
\defcounter {refsection}{0}\relax
\contentsline {subsubsection}{统计机器翻译开源系统}{34}{section*.19}
\contentsline {subsubsection}{统计机器翻译开源系统}{35}{section*.19}
\defcounter {refsection}{0}\relax
\contentsline {subsubsection}{神经机器翻译开源系统}{36}{section*.20}
\defcounter {refsection}{0}\relax
......@@ -255,11 +255,11 @@
\defcounter {refsection}{0}\relax
\contentsline {subsubsection}{剪枝}{155}{section*.150}
\defcounter {refsection}{0}\relax
\contentsline {subsubsection}{解码中的栈结构}{156}{section*.152}
\contentsline {subsubsection}{解码中的栈结构}{157}{section*.152}
\defcounter {refsection}{0}\relax
\contentsline {section}{\numberline {4.3}基于层次短语的模型}{157}{section.4.3}
\contentsline {section}{\numberline {4.3}基于层次短语的模型}{158}{section.4.3}
\defcounter {refsection}{0}\relax
\contentsline {subsection}{\numberline {4.3.1}同步上下文无关文法}{160}{subsection.4.3.1}
\contentsline {subsection}{\numberline {4.3.1}同步上下文无关文法}{161}{subsection.4.3.1}
\defcounter {refsection}{0}\relax
\contentsline {subsubsection}{文法定义}{161}{section*.157}
\defcounter {refsection}{0}\relax
......@@ -271,9 +271,9 @@
\defcounter {refsection}{0}\relax
\contentsline {subsection}{\numberline {4.3.2}层次短语规则抽取}{164}{subsection.4.3.2}
\defcounter {refsection}{0}\relax
\contentsline {subsection}{\numberline {4.3.3}翻译模型及特征}{165}{subsection.4.3.3}
\contentsline {subsection}{\numberline {4.3.3}翻译模型及特征}{166}{subsection.4.3.3}
\defcounter {refsection}{0}\relax
\contentsline {subsection}{\numberline {4.3.4}CYK解码}{166}{subsection.4.3.4}
\contentsline {subsection}{\numberline {4.3.4}CYK解码}{167}{subsection.4.3.4}
\defcounter {refsection}{0}\relax
\contentsline {subsection}{\numberline {4.3.5}立方剪枝}{170}{subsection.4.3.5}
\defcounter {refsection}{0}\relax
......@@ -305,19 +305,19 @@
\defcounter {refsection}{0}\relax
\contentsline {subsubsection}{基于节点对齐的规则抽取}{191}{section*.199}
\defcounter {refsection}{0}\relax
\contentsline {subsubsection}{基于对齐矩阵的规则抽取}{191}{section*.202}
\contentsline {subsubsection}{基于对齐矩阵的规则抽取}{192}{section*.202}
\defcounter {refsection}{0}\relax
\contentsline {subsection}{\numberline {4.4.5}句法翻译模型的特征}{192}{subsection.4.4.5}
\contentsline {subsection}{\numberline {4.4.5}句法翻译模型的特征}{194}{subsection.4.4.5}
\defcounter {refsection}{0}\relax
\contentsline {subsection}{\numberline {4.4.6}基于超图的推导空间表示}{195}{subsection.4.4.6}
\defcounter {refsection}{0}\relax
\contentsline {subsection}{\numberline {4.4.7}基于树的解码 vs 基于串的解码}{197}{subsection.4.4.7}
\defcounter {refsection}{0}\relax
\contentsline {subsubsection}{基于树的解码}{198}{section*.209}
\contentsline {subsubsection}{基于树的解码}{199}{section*.209}
\defcounter {refsection}{0}\relax
\contentsline {subsubsection}{基于串的解码}{200}{section*.212}
\defcounter {refsection}{0}\relax
\contentsline {section}{\numberline {4.5}小结及深入阅读}{201}{section.4.5}
\contentsline {section}{\numberline {4.5}小结及深入阅读}{202}{section.4.5}
\defcounter {refsection}{0}\relax
\contentsline {part}{\@mypartnumtocformat {III}{神经机器翻译}}{205}{part.3}
\ttl@stoptoc {default@2}
......@@ -425,7 +425,7 @@
\defcounter {refsection}{0}\relax
\contentsline {subsubsection}{程序实现}{260}{section*.303}
\defcounter {refsection}{0}\relax
\contentsline {section}{\numberline {5.5}神经语言模型}{261}{section.5.5}
\contentsline {section}{\numberline {5.5}神经语言模型}{262}{section.5.5}
\defcounter {refsection}{0}\relax
\contentsline {subsection}{\numberline {5.5.1}基于神经网络的语言建模}{262}{subsection.5.5.1}
\defcounter {refsection}{0}\relax
......@@ -479,23 +479,23 @@
\defcounter {refsection}{0}\relax
\contentsline {section}{\numberline {6.3}基于循环神经网络的翻译模型及注意力机制}{290}{section.6.3}
\defcounter {refsection}{0}\relax
\contentsline {subsection}{\numberline {6.3.1}建模}{291}{subsection.6.3.1}
\contentsline {subsection}{\numberline {6.3.1}建模}{290}{subsection.6.3.1}
\defcounter {refsection}{0}\relax
\contentsline {subsection}{\numberline {6.3.2}输入(词嵌入)及输出(Softmax)}{294}{subsection.6.3.2}
\defcounter {refsection}{0}\relax
\contentsline {subsection}{\numberline {6.3.3}循环神经网络结构}{297}{subsection.6.3.3}
\contentsline {subsection}{\numberline {6.3.3}循环神经网络结构}{298}{subsection.6.3.3}
\defcounter {refsection}{0}\relax
\contentsline {subsubsection}{循环神经单元(RNN)}{297}{section*.351}
\contentsline {subsubsection}{循环神经单元(RNN)}{298}{section*.351}
\defcounter {refsection}{0}\relax
\contentsline {subsubsection}{长短时记忆网络(LSTM)}{298}{section*.352}
\defcounter {refsection}{0}\relax
\contentsline {subsubsection}{门控循环单元(GRU)}{299}{section*.355}
\contentsline {subsubsection}{门控循环单元(GRU)}{300}{section*.355}
\defcounter {refsection}{0}\relax
\contentsline {subsubsection}{双向模型}{301}{section*.357}
\contentsline {subsubsection}{双向模型}{302}{section*.357}
\defcounter {refsection}{0}\relax
\contentsline {subsubsection}{多层循环神经网络}{302}{section*.359}
\defcounter {refsection}{0}\relax
\contentsline {subsection}{\numberline {6.3.4}注意力机制}{302}{subsection.6.3.4}
\contentsline {subsection}{\numberline {6.3.4}注意力机制}{303}{subsection.6.3.4}
\defcounter {refsection}{0}\relax
\contentsline {subsubsection}{翻译中的注意力机制}{304}{section*.362}
\defcounter {refsection}{0}\relax
......@@ -509,13 +509,13 @@
\defcounter {refsection}{0}\relax
\contentsline {subsubsection}{长参数初始化}{311}{section*.374}
\defcounter {refsection}{0}\relax
\contentsline {subsubsection}{优化策略}{311}{section*.375}
\contentsline {subsubsection}{优化策略}{312}{section*.375}
\defcounter {refsection}{0}\relax
\contentsline {subsubsection}{梯度裁剪}{312}{section*.377}
\defcounter {refsection}{0}\relax
\contentsline {subsubsection}{学习率策略}{312}{section*.378}
\defcounter {refsection}{0}\relax
\contentsline {subsubsection}{并行训练}{313}{section*.381}
\contentsline {subsubsection}{并行训练}{314}{section*.381}
\defcounter {refsection}{0}\relax
\contentsline {subsection}{\numberline {6.3.6}推断}{315}{subsection.6.3.6}
\defcounter {refsection}{0}\relax
......@@ -523,57 +523,169 @@
\defcounter {refsection}{0}\relax
\contentsline {subsubsection}{束搜索}{318}{section*.388}
\defcounter {refsection}{0}\relax
\contentsline {subsubsection}{长度惩罚}{318}{section*.390}
\contentsline {subsubsection}{长度惩罚}{319}{section*.390}
\defcounter {refsection}{0}\relax
\contentsline {subsection}{\numberline {6.3.7}实例-GNMT}{320}{subsection.6.3.7}
\defcounter {refsection}{0}\relax
\contentsline {section}{\numberline {6.4}Transformer}{320}{section.6.4}
\contentsline {section}{\numberline {6.4}Transformer}{321}{section.6.4}
\defcounter {refsection}{0}\relax
\contentsline {subsection}{\numberline {6.4.1}自注意力模型}{322}{subsection.6.4.1}
\contentsline {subsection}{\numberline {6.4.1}自注意力模型}{323}{subsection.6.4.1}
\defcounter {refsection}{0}\relax
\contentsline {subsection}{\numberline {6.4.2}Transformer架构}{324}{subsection.6.4.2}
\defcounter {refsection}{0}\relax
\contentsline {subsection}{\numberline {6.4.3}位置编码}{326}{subsection.6.4.3}
\defcounter {refsection}{0}\relax
\contentsline {subsection}{\numberline {6.4.4}基于点乘的注意力机制}{328}{subsection.6.4.4}
\contentsline {subsection}{\numberline {6.4.4}基于点乘的注意力机制}{329}{subsection.6.4.4}
\defcounter {refsection}{0}\relax
\contentsline {subsection}{\numberline {6.4.5}掩码操作}{330}{subsection.6.4.5}
\contentsline {subsection}{\numberline {6.4.5}掩码操作}{331}{subsection.6.4.5}
\defcounter {refsection}{0}\relax
\contentsline {subsection}{\numberline {6.4.6}多头注意力}{331}{subsection.6.4.6}
\contentsline {subsection}{\numberline {6.4.6}多头注意力}{332}{subsection.6.4.6}
\defcounter {refsection}{0}\relax
\contentsline {subsection}{\numberline {6.4.7}残差网络和层正则化}{332}{subsection.6.4.7}
\contentsline {subsection}{\numberline {6.4.7}残差网络和层正则化}{333}{subsection.6.4.7}
\defcounter {refsection}{0}\relax
\contentsline {subsection}{\numberline {6.4.8}前馈全连接网络子层}{333}{subsection.6.4.8}
\contentsline {subsection}{\numberline {6.4.8}前馈全连接网络子层}{334}{subsection.6.4.8}
\defcounter {refsection}{0}\relax
\contentsline {subsection}{\numberline {6.4.9}训练}{334}{subsection.6.4.9}
\contentsline {subsection}{\numberline {6.4.9}训练}{335}{subsection.6.4.9}
\defcounter {refsection}{0}\relax
\contentsline {subsection}{\numberline {6.4.10}推断}{337}{subsection.6.4.10}
\contentsline {subsection}{\numberline {6.4.10}推断}{338}{subsection.6.4.10}
\defcounter {refsection}{0}\relax
\contentsline {section}{\numberline {6.5}序列到序列问题及应用}{337}{section.6.5}
\contentsline {section}{\numberline {6.5}序列到序列问题及应用}{338}{section.6.5}
\defcounter {refsection}{0}\relax
\contentsline {subsection}{\numberline {6.5.1}自动问答}{338}{subsection.6.5.1}
\contentsline {subsection}{\numberline {6.5.1}自动问答}{339}{subsection.6.5.1}
\defcounter {refsection}{0}\relax
\contentsline {subsection}{\numberline {6.5.2}自动文摘}{338}{subsection.6.5.2}
\contentsline {subsection}{\numberline {6.5.2}自动文摘}{339}{subsection.6.5.2}
\defcounter {refsection}{0}\relax
\contentsline {subsection}{\numberline {6.5.3}文言文翻译}{339}{subsection.6.5.3}
\contentsline {subsection}{\numberline {6.5.3}文言文翻译}{340}{subsection.6.5.3}
\defcounter {refsection}{0}\relax
\contentsline {subsection}{\numberline {6.5.4}对联生成}{339}{subsection.6.5.4}
\contentsline {subsection}{\numberline {6.5.4}对联生成}{340}{subsection.6.5.4}
\defcounter {refsection}{0}\relax
\contentsline {subsection}{\numberline {6.5.5}古诗生成}{340}{subsection.6.5.5}
\contentsline {subsection}{\numberline {6.5.5}古诗生成}{341}{subsection.6.5.5}
\defcounter {refsection}{0}\relax
\contentsline {section}{\numberline {6.6}小结及深入阅读}{341}{section.6.6}
\contentsline {section}{\numberline {6.6}小结及深入阅读}{342}{section.6.6}
\defcounter {refsection}{0}\relax
\contentsline {part}{\@mypartnumtocformat {IV}{附录}}{343}{part.4}
\ttl@stoptoc {default@3}
\ttl@starttoc {default@4}
\contentsline {chapter}{\numberline {7}神经机器翻译实战 \ \raisebox {0.5mm}{------}\ 参加一次比赛}{345}{chapter.7}
\defcounter {refsection}{0}\relax
\contentsline {chapter}{\numberline {A}附录A}{345}{Appendix.1.A}
\contentsline {section}{\numberline {7.1}神经机器翻译并不简单}{345}{section.7.1}
\defcounter {refsection}{0}\relax
\contentsline {chapter}{\numberline {B}附录B}{347}{Appendix.2.B}
\contentsline {subsection}{\numberline {7.1.1}影响神经机器翻译性能的因素}{346}{subsection.7.1.1}
\defcounter {refsection}{0}\relax
\contentsline {section}{\numberline {B.1}IBM模型3训练方法}{347}{section.2.B.1}
\contentsline {subsection}{\numberline {7.1.2}搭建神经机器翻译系统的步骤 }{347}{subsection.7.1.2}
\defcounter {refsection}{0}\relax
\contentsline {section}{\numberline {B.2}IBM模型4训练方法}{349}{section.2.B.2}
\contentsline {subsection}{\numberline {7.1.3}架构选择 }{348}{subsection.7.1.3}
\defcounter {refsection}{0}\relax
\contentsline {section}{\numberline {B.3}IBM模型5训练方法}{351}{section.2.B.3}
\contentsline {section}{\numberline {7.2}数据处理}{348}{section.7.2}
\defcounter {refsection}{0}\relax
\contentsline {subsection}{\numberline {7.2.1}分词}{349}{subsection.7.2.1}
\defcounter {refsection}{0}\relax
\contentsline {subsection}{\numberline {7.2.2}标准化}{350}{subsection.7.2.2}
\defcounter {refsection}{0}\relax
\contentsline {subsection}{\numberline {7.2.3}数据清洗}{351}{subsection.7.2.3}
\defcounter {refsection}{0}\relax
\contentsline {subsection}{\numberline {7.2.4}子词切分}{353}{subsection.7.2.4}
\defcounter {refsection}{0}\relax
\contentsline {subsubsection}{大词表和OOV问题}{354}{section*.428}
\defcounter {refsection}{0}\relax
\contentsline {subsubsection}{子词}{354}{section*.430}
\defcounter {refsection}{0}\relax
\contentsline {subsubsection}{双字节编码(BPE)}{355}{section*.432}
\defcounter {refsection}{0}\relax
\contentsline {subsubsection}{其他方法}{358}{section*.435}
\defcounter {refsection}{0}\relax
\contentsline {section}{\numberline {7.3}建模与训练}{358}{section.7.3}
\defcounter {refsection}{0}\relax
\contentsline {subsection}{\numberline {7.3.1}正则化}{358}{subsection.7.3.1}
\defcounter {refsection}{0}\relax
\contentsline {subsubsection}{L1/L2正则化}{360}{section*.437}
\defcounter {refsection}{0}\relax
\contentsline {subsubsection}{标签平滑}{361}{section*.438}
\defcounter {refsection}{0}\relax
\contentsline {subsubsection}{Dropout}{361}{section*.440}
\defcounter {refsection}{0}\relax
\contentsline {subsubsection}{Layer Dropout}{363}{section*.443}
\defcounter {refsection}{0}\relax
\contentsline {subsection}{\numberline {7.3.2}增大模型容量}{364}{subsection.7.3.2}
\defcounter {refsection}{0}\relax
\contentsline {subsubsection}{宽网络}{364}{section*.445}
\defcounter {refsection}{0}\relax
\contentsline {subsubsection}{深网络}{365}{section*.447}
\defcounter {refsection}{0}\relax
\contentsline {subsubsection}{增大输入层和输出层表示能力}{366}{section*.449}
\defcounter {refsection}{0}\relax
\contentsline {subsubsection}{大模型的分布式计算}{367}{section*.450}
\defcounter {refsection}{0}\relax
\contentsline {subsection}{\numberline {7.3.3}大批量训练}{367}{subsection.7.3.3}
\defcounter {refsection}{0}\relax
\contentsline {subsubsection}{为什么需要大批量训练}{367}{section*.451}
\defcounter {refsection}{0}\relax
\contentsline {subsubsection}{如何构建批次}{369}{section*.454}
\defcounter {refsection}{0}\relax
\contentsline {section}{\numberline {7.4}推断}{370}{section.7.4}
\defcounter {refsection}{0}\relax
\contentsline {subsection}{\numberline {7.4.1}推断优化}{370}{subsection.7.4.1}
\defcounter {refsection}{0}\relax
\contentsline {subsubsection}{推断系统的架构}{370}{section*.456}
\defcounter {refsection}{0}\relax
\contentsline {subsubsection}{自左向右推断 vs 自右向左推断}{371}{section*.458}
\defcounter {refsection}{0}\relax
\contentsline {subsubsection}{推断加速}{372}{section*.459}
\defcounter {refsection}{0}\relax
\contentsline {subsection}{\numberline {7.4.2}译文长度控制}{379}{subsection.7.4.2}
\defcounter {refsection}{0}\relax
\contentsline {subsubsection}{长度惩罚因子}{379}{section*.465}
\defcounter {refsection}{0}\relax
\contentsline {subsubsection}{译文长度范围约束}{380}{section*.467}
\defcounter {refsection}{0}\relax
\contentsline {subsubsection}{覆盖度模型}{381}{section*.468}
\defcounter {refsection}{0}\relax
\contentsline {subsection}{\numberline {7.4.3}多模型集成}{382}{subsection.7.4.3}
\defcounter {refsection}{0}\relax
\contentsline {subsubsection}{假设选择}{382}{section*.469}
\defcounter {refsection}{0}\relax
\contentsline {subsubsection}{局部预测融合}{383}{section*.471}
\defcounter {refsection}{0}\relax
\contentsline {subsubsection}{译文重组}{384}{section*.473}
\defcounter {refsection}{0}\relax
\contentsline {section}{\numberline {7.5}进阶技术}{385}{section.7.5}
\defcounter {refsection}{0}\relax
\contentsline {subsection}{\numberline {7.5.1}深层模型}{385}{subsection.7.5.1}
\defcounter {refsection}{0}\relax
\contentsline {subsubsection}{Post-Norm vs Pre-Norm}{386}{section*.476}
\defcounter {refsection}{0}\relax
\contentsline {subsubsection}{层聚合}{388}{section*.479}
\defcounter {refsection}{0}\relax
\contentsline {subsubsection}{深层模型的训练加速}{389}{section*.481}
\defcounter {refsection}{0}\relax
\contentsline {subsubsection}{渐进式训练}{390}{section*.482}
\defcounter {refsection}{0}\relax
\contentsline {subsubsection}{分组稠密连接}{390}{section*.484}
\defcounter {refsection}{0}\relax
\contentsline {subsubsection}{学习率重置策略}{391}{section*.486}
\defcounter {refsection}{0}\relax
\contentsline {subsubsection}{深层模型的鲁棒性训练}{392}{section*.488}
\defcounter {refsection}{0}\relax
\contentsline {subsection}{\numberline {7.5.2}单语数据的使用}{394}{subsection.7.5.2}
\defcounter {refsection}{0}\relax
\contentsline {subsubsection}{伪数据}{395}{section*.491}
\defcounter {refsection}{0}\relax
\contentsline {subsubsection}{预训练}{396}{section*.494}
\defcounter {refsection}{0}\relax
\contentsline {subsubsection}{联合训练}{398}{section*.497}
\defcounter {refsection}{0}\relax
\contentsline {subsection}{\numberline {7.5.3}知识精炼}{399}{subsection.7.5.3}
\defcounter {refsection}{0}\relax
\contentsline {subsubsection}{什么是知识精炼}{399}{section*.499}
\defcounter {refsection}{0}\relax
\contentsline {subsubsection}{知识精炼的基本方法}{401}{section*.500}
\defcounter {refsection}{0}\relax
\contentsline {subsubsection}{机器翻译中的知识精炼}{402}{section*.502}
\defcounter {refsection}{0}\relax
\contentsline {subsection}{\numberline {7.5.4}双向训练}{403}{subsection.7.5.4}
\defcounter {refsection}{0}\relax
\contentsline {subsubsection}{有监督对偶学习}{404}{section*.504}
\defcounter {refsection}{0}\relax
\contentsline {subsubsection}{无监督对偶学习}{405}{section*.505}
\defcounter {refsection}{0}\relax
\contentsline {subsubsection}{翻译中回译}{406}{section*.507}
\defcounter {refsection}{0}\relax
\contentsline {section}{\numberline {7.6}小结及深入阅读}{406}{section.7.6}
\contentsfinish
Markdown 格式
0%
您添加了 0 到此讨论。请谨慎行事。
请先完成此评论的编辑!
注册 或者 后发表评论