Commit a2cae2ac by 单韦乔

13章bib

parent add2ca79
......@@ -942,10 +942,10 @@ L_{\textrm{seq}} = - \textrm{logP}_{\textrm{s}}(\tilde{\seq{y}} | \seq{x})
\begin{itemize}
\vspace{0.5em}
\item 对抗样本除了用于提高模型的健壮性之外,还有很多其他的应用场景,比如评估模型。通过构建由对抗样本构造的数据集,可以验证模型对于不同类型噪声健壮性\upcite{DBLP:conf/emnlp/MichelN18}。但是在生成对抗样本时常常要注意或考虑很多问题,比如扰动是否足够细微,在人类难以察觉的同时做到欺骗模型的目的,对抗样本在不同的模型结构或数据集上是否具有足够的泛化能力。生成的方法是否足够高效等等。({\color{red}参考文献是不是有些少?加个2-3篇?}
\item 对抗样本除了用于提高模型的健壮性之外,还有很多其他的应用场景,比如评估模型。通过构建由对抗样本构造的数据集,可以验证模型对于不同类型噪声健壮性\upcite{DBLP:conf/emnlp/MichelN18}。但是在生成对抗样本时常常要注意或考虑很多问题,比如扰动是否足够细微\upcite{DBLP:conf/cvpr/Moosavi-Dezfooli16,DBLP:conf/cvpr/NguyenYC15},在人类难以察觉的同时做到欺骗模型的目的,对抗样本在不同的模型结构或数据集上是否具有足够的泛化能力\upcite{DBLP:conf/iclr/LiuCLS17,DBLP:journals/tnn/YuanHZL19}。生成的方法是否足够高效等等\upcite{DBLP:conf/emnlp/JiaL17,DBLP:conf/infocom/YuanHL020}
\vspace{0.5em}
\item 在机器翻译中,强化学习的应用还有很多,比如,MIXER算法用混合策略梯度和极大似然估计的目标函数来更新模型{\red Sequence Level Training with Recurrent Neural Networks},DAgger{\red A Reduction of Imitation Learning and Structured Prediction to No-Regret Online Learning}以及DAD{\red Improving Multi-step Prediction of Learned Time Series Models}等算法在训练过程之中逐渐让模型适应推断阶段的模式。此外,强化学习的效果目前还相当不稳定,研究人员提出了大量的方法来进行改善,比如降低方差{\red An Actor-Critic Algorithm for Sequence Prediction;Reinforcement Learning for Bandit Neural Machine Translation with Simulated Human Feedback}、使用单语语料{\red Improving Neural Machine Translation Models with Monolingual Data;A Study of Reinforcement Learning for Neural Machine Translation}等等。由于强化学习能从反馈的奖励中学习的特性,有不少研究探究如何在交互式场景中使用强化学习来提升系统性能。典型的例子就是对话系统,人类的反馈可以被用来训练系统,例如small-talk{\red A Deep Reinforcement Learning Chatbot}以及面向任务的对话{\red Continuously Learning Neural Dialogue Management}
\item 在机器翻译中,强化学习的应用还有很多,比如,MIXER算法用混合策略梯度和极大似然估计的目标函数来更新模型\upcite{Ranzato2016SequenceLT},DAgger\upcite{DBLP:journals/jmlr/RossGB11}以及DAD\upcite{DBLP:conf/aaai/VenkatramanHB15}等算法在训练过程之中逐渐让模型适应推断阶段的模式。此外,强化学习的效果目前还相当不稳定,研究人员提出了大量的方法来进行改善,比如降低方差\upcite{DBLP:conf/iclr/BahdanauBXGLPCB17,DBLP:conf/emnlp/NguyenDB17}、使用单语语料\upcite{Sennrich2016ImprovingNM,DBLP:conf/emnlp/WuTQLL18}等等。由于强化学习能从反馈的奖励中学习的特性,有不少研究探究如何在交互式场景中使用强化学习来提升系统性能。典型的例子就是对话系统,人类的反馈可以被用来训练系统,例如small-talk\upcite{DBLP:journals/corr/abs-1709-02349}以及面向任务的对话\upcite{DBLP:journals/corr/SuGMRUVWY16a}
\vspace{0.5em}
\item 从广义上说,大多数课程学习方法都是遵循由易到难的原则,然而在实践过程中人们逐渐赋予了课程学习更多的内涵,课程学习的含义早已超越了最原始的定义。一方面,课程学习可以与许多任务相结合,此时,评估准则并不一定总是样本的困难度,这取决于具体的任务。另一方面,在一些任务或数据中,由易到难并不总是有效,有时困难优先反而会取得更好的效果\upcite{DBLP:conf/medprai/SurendranathJ18,zhang2018empirical},实际上这和我们的直觉不太符合,一种合理的解释是课程学习更适合标签噪声、离群值较多或者是目标任务困难的场景,能提高模型的健壮性和收敛速度,而困难优先的策略则更适合数据集干净的场景\upcite{DBLP:conf/nips/ChangLM17}
......
......@@ -6618,6 +6618,108 @@ author = {Yoshua Bengio and
publisher = {{IEEE} International Conference on Computer Vision},
year = {2017}
}
@inproceedings{DBLP:journals/corr/SuGMRUVWY16a,
author = {Pei{-}Hao Su and
Milica Gasic and
Nikola Mrksic and
Lina Maria Rojas{-}Barahona and
Stefan Ultes and
David Vandyke and
Tsung{-}Hsien Wen and
Steve J. Young},
title = {Continuously Learning Neural Dialogue Management},
publisher = {CoRR},
volume = {abs/1606.02689},
year = {2016}
}
@inproceedings{DBLP:journals/corr/abs-1709-02349,
author = {Iulian Vlad Serban and
Chinnadhurai Sankar and
Mathieu Germain and
Saizheng Zhang and
Zhouhan Lin and
Sandeep Subramanian and
Taesup Kim and
Michael Pieper and
Sarath Chandar and
Nan Rosemary Ke and
Sai Mudumba and
Alexandre de Br{\'{e}}bisson and
Jose Sotelo and
Dendi Suhubdy and
Vincent Michalski and
Alexandre Nguyen and
Joelle Pineau and
Yoshua Bengio},
title = {A Deep Reinforcement Learning Chatbot},
publisher = {CoRR},
volume = {abs/1709.02349},
year = {2017}
}
@inproceedings{DBLP:conf/emnlp/WuTQLL18,
author = {Lijun Wu and
Fei Tian and
Tao Qin and
Jianhuang Lai and
Tie{-}Yan Liu},
title = {A Study of Reinforcement Learning for Neural Machine Translation},
pages = {3612--3621},
publisher = {Annual Meeting of the Association for Computational Linguistics},
year = {2018}
}
@inproceedings{DBLP:journals/jmlr/RossGB11,
author = {St{\'{e}}phane Ross and
Geoffrey J. Gordon and
Drew Bagnell},
title = {A Reduction of Imitation Learning and Structured Prediction to No-Regret
Online Learning},
publisher = {International Conference on Artificial Intelligence and Statistics},
series = {{JMLR} Proceedings},
volume = {15},
pages = {627--635},
publisher = {JMLR.org},
year = {2011}
}
@inproceedings{DBLP:conf/aaai/VenkatramanHB15,
author = {Arun Venkatraman and
Martial Hebert and
J. Andrew Bagnell},
title = {Improving Multi-Step Prediction of Learned Time Series Models},
publisher = {AAAI Conference on Artificial Intelligence},
pages = {3024--3030},
year = {2015}
}
@inproceedings{DBLP:conf/iclr/LiuCLS17,
author = {Yanpei Liu and
Xinyun Chen and
Chang Liu and
Dawn Song},
title = {Delving into Transferable Adversarial Examples and Black-box Attacks},
publisher = {International Conference on Learning Representations},
year = {2017}
}
@inproceedings{DBLP:journals/tnn/YuanHZL19,
author = {Xiaoyong Yuan and
Pan He and
Qile Zhu and
Xiaolin Li},
title = {Adversarial Examples: Attacks and Defenses for Deep Learning},
publisher = {IEEE Transactions on Neural Networks and Learning Systems},
volume = {30},
number = {9},
pages = {2805--2824},
year = {2019}
}
@inproceedings{DBLP:conf/infocom/YuanHL020,
author = {Xiaoyong Yuan and
Pan He and
Xiaolin Li and
Dapeng Wu},
title = {Adaptive Adversarial Attack on Scene Text Recognition},
pages = {358--363},
publisher = {IEEE Conference on Computer Communications},
year = {2020}
}
%%%%% chapter 13------------------------------------------------------
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
......
Markdown 格式
0%
您添加了 0 到此讨论。请谨慎行事。
请先完成此评论的编辑!
注册 或者 后发表评论