\parinterval 在文本翻译中引入图像信息是最典型的多模态机器翻译任务。虽然多模态机器翻译还是一种从源语言文字到目标语言文字的转换,但是在转换的过程中,融入了其他模态的信息减少了歧义的产生。例如前文提到的通过与源语言相关的图像信息,将“A medium sized child jumps off of a dusty bank”中“bank”译为“河岸”而不是“银行”,通过给定一张相关的图片,机器翻译模型就可以利用视觉信息更好的理解歧义词,避免产生歧义。换句话说,对于同一图像或者视觉场景的描述,源语言和目标语言描述的本质意义是一致的,只不过,体现在语言上会有表达方法上的差异。那么,图像就会存在一些源语言和目标语言的隐含对齐“约束”,将这种“约束”融入到机器翻译系统,会让模型加深对某些歧义词语上下文的理解,从而进一步提高机器翻译质量。
\parinterval 本章仅对音频处理和语音识别进行了简单的介绍,具体内容可以参考一些经典书籍,比如关于信号处理的基础知识\upcite{[Discrete-Time Signal Processing (3rd version)][ Discrete-Time Speech Signal Processing: Principles and Practice]},以及语音识别的传统方法\upcite{[Fundamentals of Speech Recognition][ Spoken Language Processing: A Guide to Theory, Algorithm, and System Development]}和基于深度学习的最新方法\upcite{[ Automatic Speech Recognition: A Deep Learning Approach, 俞栋、邓力]}。此外,语音翻译的一个重要应用是机器同声传译。
title = {Improving Pivot-Based Statistical Machine Translation by Pivoting
the Co-occurrence Count of Phrase Pairs},
pages = {1665--1675},
publisher = {Annual Meeting of the Association for Computational Linguistics},
publisher = {{ACL}},
year = {2014}
}
@inproceedings{DBLP:conf/acl/MiuraNSTN15,
...
...
@@ -10635,14 +10617,14 @@ author = {Zhuang Liu and
publisher = {Annual Meeting of the Association for Computational Linguistics},
year = {2009}
}
@inproceedings{DBLP:journals/corr/ChengLYSX16,
@article{DBLP:journals/corr/ChengLYSX16,
author = {Yong Cheng and
Yang Liu and
Qian Yang and
Maosong Sun and
Wei Xu},
title = {Neural Machine Translation with Pivot Languages},
publisher = {CoRR},
journal = {CoRR},
volume = {abs/1611.04928},
year = {2016}
}
...
...
@@ -10658,7 +10640,7 @@ author = {Zhuang Liu and
@inproceedings{de2006catalan,
title={Catalan-English statistical machine translation without parallel corpus: bridging through Spanish},
author={De Gispert, Adri{\`a} and Marino, Jose B},
publisher={International Conference on Language Resources and Evaluation},
booktitle={Proc. of 5th International Conference on Language Resources and Evaluation (LREC)},
pages={65--68},
year={2006}
}
...
...
@@ -10680,28 +10662,21 @@ author = {Zhuang Liu and
publisher = {Annual Meeting of the Association for Computational Linguistics},
year = {2011}
}
@inproceedings{DBLP:journals/corr/HintonVD15,
@article{DBLP:journals/corr/HintonVD15,
author = {Geoffrey E. Hinton and
Oriol Vinyals and
Jeffrey Dean},
title = {Distilling the Knowledge in a Neural Network},
publisher = {CoRR},
journal = {CoRR},
volume = {abs/1503.02531},
year = {2015}
}
@inproceedings{gu2018meta,
author = {Jiatao Gu and
Yong Wang and
Yun Chen and
Victor O. K. Li and
Kyunghyun Cho},
title = {Meta-Learning for Low-Resource Neural Machine Translation},
pages = {3622--3631},
publisher = {Conference on Empirical Methods in Natural Language Processing},
year = {2018}
@article{gu2018meta,
title={Meta-learning for low-resource neural machine translation},
author={Gu, Jiatao and Wang, Yong and Chen, Yun and Cho, Kyunghyun and Li, Victor OK},
journal={arXiv preprint arXiv:1808.08437},
year={2018}
}
@inproceedings{DBLP:conf/naacl/GuHDL18,
author = {Jiatao Gu and
Hany Hassan and
...
...
@@ -10743,11 +10718,11 @@ author = {Zhuang Liu and
publisher = {European Language Resources Association},
year = {2018}
}
@inproceedings{DBLP:journals/tkde/PanY10,
@article{DBLP:journals/tkde/PanY10,
author = {Sinno Jialin Pan and
Qiang Yang},
title = {A Survey on Transfer Learning},
publisher = {IEEE Transactions on knowledge and data engineering},
journal = {IEEE Transactions on knowledge and data engineering},
volume = {22},
number = {10},
pages = {1345--1359},
...
...
@@ -10755,14 +10730,14 @@ author = {Zhuang Liu and
}
@book{2009Handbook,
title={Handbook Of Research On Machine Learning Applications and Trends: Algorithms, Methods and Techniques - 2 Volumes},
author={Olivas, Emilio Soria and Guerrero, Jose David Martin and Sober, Marcelino Martinez and Benedito, Jose Rafael Magdalena and Lopez, Antonio Jose Serrano },
author={Olivas, Emilio Soria and Guerrero, Jose David Martin and Sober, Marcelino Martinez and Benedito, Jose Rafael Magdalena and Lopez, Antonio Jose Serrano },
title = {Dual supervised learning for non-native speech recognition},
publisher = {EURASIP Journal on Audio, Speech, and Music Processing},
journal = {{EURASIP} J. Audio Speech Music. Process.},
volume = {2019},
pages = {3},
year = {2019}
...
...
@@ -12130,13 +12066,13 @@ author = {Zhuang Liu and
publisher = {{IEEE} Computer Society},
year = {2017}
}
@inproceedings{DBLP:journals/access/DuRZH20,
@article{DBLP:journals/access/DuRZH20,
author = {Liang Du and
Xin Ren and
Peng Zhou and
Zhiguo Hu},
title = {Unsupervised Dual Learning for Feature and Instance Selection},
publisher = {{IEEE} Access},
journal = {{IEEE} Access},
volume = {8},
pages = {170248--170260},
year = {2020}
...
...
@@ -12150,7 +12086,6 @@ author = {Zhuang Liu and
publisher = {Annual Meeting of the Association for Computational Linguistics},
year = {2020}
}
@inproceedings{DBLP:conf/nips/YangDYCSL19,
author = {Zhilin Yang and
Zihang Dai and
...
...
@@ -12159,14 +12094,13 @@ author = {Zhuang Liu and
Ruslan Salakhutdinov and
Quoc V. Le},
title = {XLNet: Generalized Autoregressive Pretraining for Language Understanding},
publisher = {Annual Conference on Neural Information Processing Systems},
pages = {5754--5764},
year = {2019}
}
@inproceedings{lewis2019bart,
@article{lewis2019bart,
title={Bart: Denoising sequence-to-sequence pre-training for natural language generation, translation, and comprehension},
author={Lewis, Mike and Liu, Yinhan and Goyal, Naman and Ghazvininejad, Marjan and Mohamed, Abdelrahman and Levy, Omer and Stoyanov, Ves and Zettlemoyer, Luke},
publisher={arXiv preprint arXiv:1910.13461},
journal={arXiv preprint arXiv:1910.13461},
year={2019}
}
@inproceedings{DBLP:conf/iclr/LanCGGSS20,
...
...
@@ -12218,7 +12152,7 @@ author = {Zhuang Liu and
publisher = {International Conference on Computer Vision},
year = {2019}
}
@inproceedings{DBLP:journals/corr/abs-2010-12831,
@article{DBLP:journals/corr/abs-2010-12831,
author = {Liunian Harold Li and
Haoxuan You and
Zhecan Wang and
...
...
@@ -12227,7 +12161,7 @@ author = {Zhuang Liu and
Kai-Wei Chang},
title = {Weakly-supervised VisualBERT: Pre-training without Parallel Images
and Captions},
publisher = {CoRR},
journal = {CoRR},
volume = {abs/2010.12831},
year = {2020}
}
...
...
@@ -12277,18 +12211,18 @@ author = {Zhuang Liu and
@inproceedings{shen2020q,
title={Q-BERT: Hessian Based Ultra Low Precision Quantization of BERT.},
author={Shen, Sheng and Dong, Zhen and Ye, Jiayu and Ma, Linjian and Yao, Zhewei and Gholami, Amir and Mahoney, Michael W and Keutzer, Kurt},
publisher={AAAI Conference on Artificial Intelligence},
booktitle={AAAI Conference on Artificial Intelligence},
pages={8815--8821},
year={2020}
}
@inproceedings{DBLP:journals/corr/abs-1910-01108,
@article{DBLP:journals/corr/abs-1910-01108,
author = {Victor Sanh and
Lysandre Debut and
Julien Chaumond and
Thomas Wolf},
title = {DistilBERT, a distilled version of {BERT:} smaller, faster, cheaper
and lighter},
publisher = {CoRR},
journal = {CoRR},
volume = {abs/1910.01108},
year = {2019}
}
...
...
@@ -13248,6 +13182,728 @@ author = {Zhuang Liu and