Commit 23b2990b by 曹润柘

合并分支 'caorunzhe' 到 'master'

Caorunzhe

查看合并请求 !705
parents 55e33562 d7f57e4f
...@@ -991,7 +991,7 @@ P(\mathbi{y}|\mathbi{x}) & = & \frac{\mathrm{cos}(\mathbi{x},\mathbi{y})/\tau}{\ ...@@ -991,7 +991,7 @@ P(\mathbi{y}|\mathbi{x}) & = & \frac{\mathrm{cos}(\mathbi{x},\mathbi{y})/\tau}{\
\item 如何更高效地利用已有双语数据或单语数据进行数据增强始终是一个热点问题。研究人员分别探索了源语言单语和目标语言单语的使用方法\upcite{DBLP:conf/emnlp/ZhangZ16,DBLP:conf/emnlp/WuWXQLL19,DBLP:conf/acl/XiaKAN19},以及如何对已有双语数据进行修改\upcite{DBLP:conf/emnlp/WangPDN18,DBLP:conf/acl/GaoZWXQCZL19}。经过数据增强得到的伪数据的质量时好时坏,如何提高伪数据的质量以及更好地利用伪数据进行训练也是十分重要的问题\upcite{DBLP:conf/emnlp/FadaeeM18,DBLP:conf/nlpcc/XuLXLLXZ19,DBLP:conf/wmt/CaswellCG19,DBLP:journals/corr/abs200403672,DBLP:conf/emnlp/WangLWLS19}。此外,还有一些工作对数据增强技术进行了理论分析\upcite{DBLP:conf/emnlp/LiLHZZ19,DBLP:conf/acl/MarieRF20} \item 如何更高效地利用已有双语数据或单语数据进行数据增强始终是一个热点问题。研究人员分别探索了源语言单语和目标语言单语的使用方法\upcite{DBLP:conf/emnlp/ZhangZ16,DBLP:conf/emnlp/WuWXQLL19,DBLP:conf/acl/XiaKAN19},以及如何对已有双语数据进行修改\upcite{DBLP:conf/emnlp/WangPDN18,DBLP:conf/acl/GaoZWXQCZL19}。经过数据增强得到的伪数据的质量时好时坏,如何提高伪数据的质量以及更好地利用伪数据进行训练也是十分重要的问题\upcite{DBLP:conf/emnlp/FadaeeM18,DBLP:conf/nlpcc/XuLXLLXZ19,DBLP:conf/wmt/CaswellCG19,DBLP:journals/corr/abs200403672,DBLP:conf/emnlp/WangLWLS19}。此外,还有一些工作对数据增强技术进行了理论分析\upcite{DBLP:conf/emnlp/LiLHZZ19,DBLP:conf/acl/MarieRF20}
\vspace{0.5em} \vspace{0.5em}
\item 预训练模型也是自然语言处理的重要突破之一。除了基于语言模型或掩码语言模型的方法,也有很多新的架构和模型被提出,如排列语言模型、降噪自编码器等\upcite{DBLP:conf/nips/YangDYCSL19,lewis2019bart,DBLP:conf/iclr/LanCGGSS20,DBLP:conf/acl/ZhangHLJSL19}。预训练技术也逐渐向多语言领域扩展\upcite{DBLP:conf/nips/ConneauL19,DBLP:conf/emnlp/HuangLDGSJZ19,song2019mass},甚至不再只局限于文本任务\upcite{DBLP:conf/iccv/SunMV0S19,DBLP:journals/corr/abs-2010-12831,DBLP:conf/nips/LuBPL19,DBLP:conf/interspeech/ChuangLLL20}。对于如何将预训练模型高效地应用到下游任务中,也进行了很多的经验性对比与分析\upcite{DBLP:journals/corr/abs-1802-05365,DBLP:conf/rep4nlp/PetersRS19,DBLP:conf/cncl/SunQXH19}。但将预训练模型应用于下游任务存在的一个问题是,模型巨大的参数量会带来较大的延时及显存消耗。因此,很多工作对如何压缩预训练模型进行了研究\upcite{shen2020q,Lan2020ALBERTAL,DBLP:journals/corr/abs-1910-01108,Jiao2020TinyBERTDB} \item 预训练模型也是自然语言处理的重要突破之一。除了基于语言模型或掩码语言模型的方法,也有很多新的架构和模型被提出,如排列语言模型、降噪自编码器等\upcite{DBLP:conf/nips/YangDYCSL19,DBLP:conf/acl/LewisLGGMLSZ20,DBLP:conf/iclr/LanCGGSS20,DBLP:conf/acl/ZhangHLJSL19}。预训练技术也逐渐向多语言领域扩展\upcite{DBLP:conf/nips/ConneauL19,DBLP:conf/emnlp/HuangLDGSJZ19,song2019mass},甚至不再只局限于文本任务\upcite{DBLP:conf/iccv/SunMV0S19,DBLP:journals/corr/abs-2010-12831,DBLP:conf/nips/LuBPL19,DBLP:conf/interspeech/ChuangLLL20}。对于如何将预训练模型高效地应用到下游任务中,也进行了很多的经验性对比与分析\upcite{DBLP:journals/corr/abs-1802-05365,DBLP:conf/rep4nlp/PetersRS19,DBLP:conf/cncl/SunQXH19}。但将预训练模型应用于下游任务存在的一个问题是,模型巨大的参数量会带来较大的延时及显存消耗。因此,很多工作对如何压缩预训练模型进行了研究\upcite{shen2020q,Lan2020ALBERTAL,DBLP:journals/corr/abs-1910-01108,Jiao2020TinyBERTDB}
\vspace{0.5em} \vspace{0.5em}
\item 多任务学习是多语言翻译的一种典型方法。通过共享编码器模块或是注意力模块来进行一对多\upcite{DBLP:conf/acl/DongWHYW15}或多对一\upcite{DBLP:journals/tacl/LeeCH17}或多对多\upcite{DBLP:conf/naacl/FiratCB16} 的学习,然而这些方法需要为每个翻译语言对设计单独的编码器和解码器,限制了其可扩展性。为了解决以上问题,研究人员进一步探索了用于多语言翻译的单个机器翻译模型的方法,也就是本章提到的多语言单模型系统\upcite{DBLP:journals/corr/HaNW16,DBLP:journals/tacl/JohnsonSLKWCTVW17}。为了弥补多语言单模型系统中缺乏语言表示多样性的问题,可以重新组织分享模块,设计特定任务相关模块\upcite{DBLP:conf/coling/BlackwoodBW18,DBLP:conf/wmt/SachanN18,DBLP:conf/wmt/LuKLBZS18,DBLP:conf/acl/WangZZZXZ19};也可以将多语言单词编码和语言聚类分离,用一种多语言词典编码框架智能地共享词汇级别的信息,有助于语言间的泛化\upcite{DBLP:conf/iclr/WangPAN19};还可以将语言聚类为不同的组,并为每个聚类单独训练一个多语言模型\upcite{DBLP:conf/emnlp/TanCHXQL19} \item 多任务学习是多语言翻译的一种典型方法。通过共享编码器模块或是注意力模块来进行一对多\upcite{DBLP:conf/acl/DongWHYW15}或多对一\upcite{DBLP:journals/tacl/LeeCH17}或多对多\upcite{DBLP:conf/naacl/FiratCB16} 的学习,然而这些方法需要为每个翻译语言对设计单独的编码器和解码器,限制了其可扩展性。为了解决以上问题,研究人员进一步探索了用于多语言翻译的单个机器翻译模型的方法,也就是本章提到的多语言单模型系统\upcite{DBLP:journals/corr/HaNW16,DBLP:journals/tacl/JohnsonSLKWCTVW17}。为了弥补多语言单模型系统中缺乏语言表示多样性的问题,可以重新组织分享模块,设计特定任务相关模块\upcite{DBLP:conf/coling/BlackwoodBW18,DBLP:conf/wmt/SachanN18,DBLP:conf/wmt/LuKLBZS18,DBLP:conf/acl/WangZZZXZ19};也可以将多语言单词编码和语言聚类分离,用一种多语言词典编码框架智能地共享词汇级别的信息,有助于语言间的泛化\upcite{DBLP:conf/iclr/WangPAN19};还可以将语言聚类为不同的组,并为每个聚类单独训练一个多语言模型\upcite{DBLP:conf/emnlp/TanCHXQL19}
......
...@@ -7023,7 +7023,7 @@ author = {Yoshua Bengio and ...@@ -7023,7 +7023,7 @@ author = {Yoshua Bengio and
} }
@inproceedings{DBLP:conf/cvpr/RebuffiKSL17, @inproceedings{DBLP:conf/cvpr/RebuffiKSL17,
author = {Sylvestre{-}Alvise Rebuffi and author = {Sylvestre-Alvise Rebuffi and
Alexander Kolesnikov and Alexander Kolesnikov and
Georg Sperl and Georg Sperl and
Christoph H. Lampert}, Christoph H. Lampert},
...@@ -7035,7 +7035,7 @@ author = {Yoshua Bengio and ...@@ -7035,7 +7035,7 @@ author = {Yoshua Bengio and
@inproceedings{DBLP:conf/eccv/CastroMGSA18, @inproceedings{DBLP:conf/eccv/CastroMGSA18,
author = {Francisco M. Castro and author = {Francisco M. Castro and
Manuel J. Mar{\'{\i}}n{-}Jim{\'{e}}nez and Manuel J. Mar{\'{\i}}n-Jim{\'{e}}nez and
Nicol{\'{a}}s Guil and Nicol{\'{a}}s Guil and
Cordelia Schmid and Cordelia Schmid and
Karteek Alahari}, Karteek Alahari},
...@@ -7182,7 +7182,7 @@ year={2012} ...@@ -7182,7 +7182,7 @@ year={2012}
@inproceedings{DBLP:conf/acl/GaoHYD14, @inproceedings{DBLP:conf/acl/GaoHYD14,
author = {Jianfeng Gao and author = {Jianfeng Gao and
Xiaodong He and Xiaodong He and
Wen{-}tau Yih and Wen-tau Yih and
Li Deng}, Li Deng},
title = {Learning Continuous Phrase Representations for Translation Modeling}, title = {Learning Continuous Phrase Representations for Translation Modeling},
pages = {699--709}, pages = {699--709},
...@@ -7250,6 +7250,20 @@ year={2012} ...@@ -7250,6 +7250,20 @@ year={2012}
year = {1999} year = {1999}
} }
@inproceedings{DBLP:conf/iclr/BahdanauBXGLPCB17,
author = {Dzmitry Bahdanau and
Philemon Brakel and
Kelvin Xu and
Anirudh Goyal and
Ryan Lowe and
Joelle Pineau and
Aaron C. Courville and
Yoshua Bengio},
title = {An Actor-Critic Algorithm for Sequence Prediction},
publisher = {International Conference on Learning Representations},
year = {2017}
}
%%%%% chapter 13------------------------------------------------------ %%%%% chapter 13------------------------------------------------------
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
...@@ -10955,7 +10969,7 @@ author = {Zhuang Liu and ...@@ -10955,7 +10969,7 @@ author = {Zhuang Liu and
Translation}, Translation},
volume = {11839}, volume = {11839},
pages = {466--475}, pages = {466--475},
publisher = {Springer}, publisher = {Natural Language Processing and Chinese Computing},
year = {2019} year = {2019}
} }
@inproceedings{DBLP:conf/wmt/CaswellCG19, @inproceedings{DBLP:conf/wmt/CaswellCG19,
...@@ -10987,14 +11001,18 @@ author = {Zhuang Liu and ...@@ -10987,14 +11001,18 @@ author = {Zhuang Liu and
publisher = {CoRR}, publisher = {CoRR},
year = {2020} year = {2020}
} }
@inproceedings{DBLP:journals/corr/abs200403672, @inproceedings{DBLP:journals/corr/abs200403672,
author = {Zi-Yi Dou and author = {Zi-Yi Dou and
Antonios Anastasopoulos and Antonios Anastasopoulos and
Graham Neubig}, Graham Neubig},
title = {Dynamic Data Selection and Weighting for Iterative Back-Translation}, title = {Dynamic Data Selection and Weighting for Iterative Back-Translation},
publisher = {CoRR}, pages = {5894--5904},
publisher = {Conference on Empirical Methods in Natural Language Processing},
year = {2020} year = {2020}
} }
@inproceedings{DBLP:conf/emnlp/WuZHGQLL19, @inproceedings{DBLP:conf/emnlp/WuZHGQLL19,
author = {Lijun Wu and author = {Lijun Wu and
Jinhua Zhu and Jinhua Zhu and
...@@ -12925,7 +12943,7 @@ author = {Zhuang Liu and ...@@ -12925,7 +12943,7 @@ author = {Zhuang Liu and
title = {Domain Adaptation Using Domain Similarity- and Domain Complexity-Based title = {Domain Adaptation Using Domain Similarity- and Domain Complexity-Based
Instance Selection for Cross-Domain Sentiment Analysis}, Instance Selection for Cross-Domain Sentiment Analysis},
pages = {717--723}, pages = {717--723},
publisher = {{IEEE} Computer Society}, publisher = {International Conference on Data Mining Workshops},
year = {2012} year = {2012}
} }
@inproceedings{DBLP:conf/acl/WangFUS17, @inproceedings{DBLP:conf/acl/WangFUS17,
...@@ -13042,7 +13060,7 @@ author = {Zhuang Liu and ...@@ -13042,7 +13060,7 @@ author = {Zhuang Liu and
@inproceedings{khayrallah2017neural, @inproceedings{khayrallah2017neural,
title={Neural lattice search for domain adaptation in machine translation}, title={Neural lattice search for domain adaptation in machine translation},
author={Khayrallah, Huda and Kumar, Gaurav and Duh, Kevin and Post, Matt and Koehn, Philipp}, author={Khayrallah, Huda and Kumar, Gaurav and Duh, Kevin and Post, Matt and Koehn, Philipp},
publisher={Proceedings of the Eighth International Joint Conference on Natural Language Processing (Volume 2: Short Papers)}, publisher={International Joint Conference on Natural Language Processing},
pages={20--25}, pages={20--25},
year={2017} year={2017}
} }
...@@ -13189,7 +13207,6 @@ author = {Zhuang Liu and ...@@ -13189,7 +13207,6 @@ author = {Zhuang Liu and
@inproceedings{DBLP:conf/emnlp/ZhuH07, @inproceedings{DBLP:conf/emnlp/ZhuH07,
author = {Jingbo Zhu and author = {Jingbo Zhu and
Eduard H. Hovy}, Eduard H. Hovy},
editor = {Jason Eisner},
title = {Active Learning for Word Sense Disambiguation with Methods for Addressing title = {Active Learning for Word Sense Disambiguation with Methods for Addressing
the Class Imbalance Problem}, the Class Imbalance Problem},
pages = {783--790}, pages = {783--790},
...@@ -13197,7 +13214,7 @@ author = {Zhuang Liu and ...@@ -13197,7 +13214,7 @@ author = {Zhuang Liu and
year = {2007} year = {2007}
} }
@inproceedings{DBLP:conf/eacl/NegriTFBF17, @inproceedings{DBLP:conf/eacl/NegriTFBF17,
author = {M. Amin Farajian and author = {Mohammad Amin Farajian and
Marco Turchi and Marco Turchi and
Matteo Negri and Matteo Negri and
Nicola Bertoldi and Nicola Bertoldi and
...@@ -13301,12 +13318,6 @@ author = {Zhuang Liu and ...@@ -13301,12 +13318,6 @@ author = {Zhuang Liu and
pages = {5754--5764}, pages = {5754--5764},
year = {2019} year = {2019}
} }
@inproceedings{lewis2019bart,
title={Bart: Denoising sequence-to-sequence pre-training for natural language generation, translation, and comprehension},
author={Lewis, Mike and Liu, Yinhan and Goyal, Naman and Ghazvininejad, Marjan and Mohamed, Abdelrahman and Levy, Omer and Stoyanov, Ves and Zettlemoyer, Luke},
publisher={arXiv preprint arXiv:1910.13461},
year={2019}
}
@inproceedings{DBLP:conf/iclr/LanCGGSS20, @inproceedings{DBLP:conf/iclr/LanCGGSS20,
author = {Zhenzhong Lan and author = {Zhenzhong Lan and
Mingda Chen and Mingda Chen and
...@@ -13409,7 +13420,7 @@ author = {Zhuang Liu and ...@@ -13409,7 +13420,7 @@ author = {Zhuang Liu and
title = {How to Fine-Tune {BERT} for Text Classification?}, title = {How to Fine-Tune {BERT} for Text Classification?},
volume = {11856}, volume = {11856},
pages = {194--206}, pages = {194--206},
publisher = {Springer}, publisher = {Chinese Computational Linguistics},
year = {2019} year = {2019}
} }
@inproceedings{shen2020q, @inproceedings{shen2020q,
...@@ -13452,8 +13463,8 @@ author = {Zhuang Liu and ...@@ -13452,8 +13463,8 @@ author = {Zhuang Liu and
Tao Qin and Tao Qin and
Liwei Wang and Liwei Wang and
Nenghai Yu and Nenghai Yu and
Tie{-}Yan Liu and Tie-Yan Liu and
Wei{-}Ying Ma}, Wei-Ying Ma},
title = {Dual Learning for Machine Translation}, title = {Dual Learning for Machine Translation},
publisher = {Conference and Workshop on Neural Information Processing Systems}, publisher = {Conference and Workshop on Neural Information Processing Systems},
pages = {820--828}, pages = {820--828},
......
Markdown 格式
0%
您添加了 0 到此讨论。请谨慎行事。
请先完成此评论的编辑!
注册 或者 后发表评论