\node[anchor=north,font=\scriptsize,align=center] (w1) at ([yshift=-2em]encoder.south){\scriptsize\bfnew{There exist different}\\\scriptsize\bfnew{opinions on this question}};
\node[anchor=north,font=\scriptsize,align=center] (w2) at ([yshift=-2em]decoder.south){\scriptsize\bfnew{There exist different}\\\scriptsize\bfnew{opinions on this question}};
\node[anchor=north,font=\scriptsize,text=gray] (w3) at ([yshift=0.6em]w2.south){\scriptsize\bfnew{(copy source sentence)}};
\node[anchor=north,font=\scriptsize,text=gray] (w3) at ([yshift=0.6em]w2.south){\scriptsize\bfnew{(复制源语言句子)}};
\node[anchor=south,font=\scriptsize,align=center] (w4) at ([yshift=1.6em]box2.north){\scriptsize\bfnew{on this question}\\\scriptsize\bfnew{There exist different opinions}};
\parinterval 完全独立地对每个词建模,会出现什么问题呢?来看一个例子,将中文“谢谢你”翻译成英文,可以翻译成“Thanks to you”或者“Thanks a lot”。假设生成这两种翻译的概率是相等的,即一半的概率是“Thanks to you”,另一半的概率是“Thanks a lot”。由于非自回归模型的条件独立性假设,解码时第二个词“to”和“a”的概率是差不多大的,第三个词“you”和“lot”的概率差不多大的,会使得模型生成出“Thanks to lot”或者“Thanks a you”这样错误的翻译,如图\ref{fig:14-13}所示。这便是影响句子质量的关键问题,称之为{\small\sffamily\bfseries{多峰问题}}\index{多峰问题}(Multi-modality Problem)\index{Multi-modality Problem}\upcite{Gu2017NonAutoregressiveNM}。针对非自回归模型难以处理多峰问题进行改进是提升非自回归模型质量的关键。
publisher = {Annual Meeting of the Association for Computational Linguistics},
year = {2002}
}
@article{DBLP:journals/mt/ChurchH93,
@inproceedings{DBLP:journals/mt/ChurchH93,
title={Good applications for crummy machine translation},
author={Church, Kenneth W and Hovy, Eduard H},
volume={8},
...
...
@@ -1294,10 +1292,10 @@
publisher = {Annual Meeting of the Association for Computational Linguistics},
year = {2014}
}
@article{DBLP:journals/mt/Shiwen93,
@inproceedings{DBLP:journals/mt/Shiwen93,
author = {Shiwen Yu},
title = {Automatic evaluation of output quality for Machine Translation systems},
journal = {Mach. Transl.},
publisher = {Mach. Transl.},
volume = {8},
number = {1-2},
pages = {117--126},
...
...
@@ -1400,20 +1398,20 @@
publisher = {Annual Meeting of the Association for Computational Linguistics},
year = {2013},
}
@article{DBLP:journals/corr/MatsuoKS17,
@inproceedings{DBLP:journals/corr/MatsuoKS17,
author = {Junki Matsuo and
Mamoru Komachi and
Katsuhito Sudoh},
title = {Word-Alignment-Based Segment-Level Machine Translation Evaluation
using Word Embeddings},
journal = {CoRR},
publisher = {CoRR},
volume = {abs/1704.00380},
year = {2017}
}
@article{DBLP:journals/csl/GuzmanJMN17,
@inproceedings{DBLP:journals/csl/GuzmanJMN17,
title={Machine translation evaluation with neural networks},
author={Guzm{\'a}n, Francisco and Joty, Shafiq and M{\`a}rquez, Llu{\'\i}s and Nakov, Preslav},
journal={Computer Speech \& Language},
publisher={Computer Speech \& Language},
volume={45},
pages={180--200},
year={2017}
...
...
@@ -1485,10 +1483,10 @@
publisher = {Annual Meeting of the Association for Computational Linguistics},
year = {2019}
}
@article{DBLP:journals/mt/BiciciGG13,
@inproceedings{DBLP:journals/mt/BiciciGG13,
title={Predicting sentence translation quality using extrinsic and language independent features},
author={Bi{\c{c}}ici, Ergun and Groves, Declan and van Genabith, Josef},
journal={Machine Translation},
publisher={Machine Translation},
volume={27},
number={3-4},
pages={171--192},
...
...
@@ -1536,7 +1534,7 @@
Greg Corrado and
Jeffrey Dean},
title = {Efficient Estimation of Word Representations in Vector Space},
journal = {arXiv preprint arXiv:1301.3781},
publisher = {arXiv preprint arXiv:1301.3781},
year = {2013}
}
@inproceedings{DBLP:conf/icml/LeM14,
...
...
@@ -1588,10 +1586,10 @@
author={Radford, Alec and Narasimhan, Karthik and Salimans, Tim and Sutskever, Ilya},
year={2018}
}
@article{DBLP:journals/mtcl/Carroll66,
@inproceedings{DBLP:journals/mtcl/Carroll66,
author = {John B. Carroll},
title = {An experiment in evaluating the quality of translations},
journal = {Mech. Transl. Comput. Linguistics},
publisher = {Mech. Transl. Comput. Linguistics},
volume = {9},
number = {3-4},
pages = {55--66},
...
...
@@ -1610,14 +1608,14 @@
pages={224--231},
year={2003}
}
@article{DBLP:journals/mt/PrzybockiPBS09,
@inproceedings{DBLP:journals/mt/PrzybockiPBS09,
author = {Mark A. Przybocki and
Kay Peterson and
Sebastien Bronsart and
Gregory A. Sanders},
title = {The {NIST} 2008 Metrics for machine translation challenge - overview,
methodology, metrics, and results},
journal = {Machine Translation},
publisher = {Machine Translation},
volume = {23},
number = {2-3},
pages = {71--103},
...
...
@@ -1703,7 +1701,7 @@
publisher = {Annual Meeting of the Association for Computational Linguistics},
year = {2007}
}
@article{DBLP:journals/mt/PadoCGJM09,
@inproceedings{DBLP:journals/mt/PadoCGJM09,
author = {Sebastian Pad{\'{o}} and
Daniel M. Cer and
Michel Galley and
...
...
@@ -1711,7 +1709,7 @@
Christopher D. Manning},
title = {Measuring machine translation quality as semantic equivalence: {A}
metric based on entailment features},
journal = {Machine Translation},
publisher = {Machine Translation},
volume = {23},
number = {2-3},
pages = {181--193},
...
...
@@ -1796,7 +1794,7 @@
publisher={European Association for Machine Translation},
year={2011}
}
@article{DBLP:journals/mt/CostaLLCC15,
@inproceedings{DBLP:journals/mt/CostaLLCC15,
author = {{\^{A}}ngela Costa and
Wang Ling and
Tiago Lu{\'{\i}}s and
...
...
@@ -1804,7 +1802,7 @@
Lu{\'{\i}}sa Coheur},
title = {A linguistically motivated taxonomy for Machine Translation error
analysis},
journal = {Machine Translation},
publisher = {Machine Translation},
volume = {29},
number = {2},
pages = {127--161},
...
...
@@ -1862,10 +1860,10 @@
publisher={Proceedings of the Ninth Machine Translation Summit. New Orleans},
year={2003}
}
@article{pearson1920notes,
@inproceedings{pearson1920notes,
title={Notes on the history of correlation},
author={Pearson, Karl},
journal={Biometrika},
publisher={Biometrika},
volume={13},
number={1},
pages={25--45},
...
...
@@ -1879,10 +1877,10 @@
pages={71--78},
year={2003}
}
@article{finch2004using,
@inproceedings{finch2004using,
title={Using a paraphraser to improve machine translation evaluation},
author={Finch, Andrew and Akiba, Yasuhiro and Sumita, Eiichiro},
journal={International Joint Conference on Natural Language Processing},
publisher={International Joint Conference on Natural Language Processing},
year={2004}
}
@inproceedings{DBLP:conf/coling/HamonM08,
...
...
@@ -1955,7 +1953,7 @@ publisher={Annual Meeting of the Association for Computational Linguistics},
pages={148--155},
year={2001}
}
@article{albrecht2008regression,
@inproceedings{albrecht2008regression,
title={Regression for machine translation evaluation at the sentence level},
author={Albrecht, Joshua S and Hwa, Rebecca},
volume={22},
...
...
@@ -2187,7 +2185,7 @@ year = {2012}
publisher = {Annual Meeting of the Association for Computational Linguistics},
year = {2013}
}
@article{kepler2019unbabel,
@inproceedings{kepler2019unbabel,
title={Unbabel's Participation in the WMT19 Translation Quality Estimation Shared Task},
pages={78--84},
author={Kepler, F{\'a}bio and Tr{\'e}nous, Jonay and Treviso, Marcos and Vera, Miguel and G{\'o}is, Ant{\'o}nio and Farajian, M Amin and Lopes, Ant{\'o}nio V and Martins, Andr{\'e} FT},
...
...
@@ -2220,7 +2218,7 @@ year = {2012}
year={2019},
publisher={Springer Nature}
}
@article{akaike1974new,
@inproceedings{akaike1974new,
title={A new look at the statistical model identification},
journal={International Conference on Learning Representations},
publisher={International Conference on Learning Representations},
year={2019}
}
@article{Brix2020SuccessfullyAT,
@inproceedings{Brix2020SuccessfullyAT,
author = {Christopher Brix and
Parnia Bahar and
Hermann Ney},
title = {Successfully Applying the Stabilized Lottery Ticket Hypothesis to
the Transformer Architecture},
pages = {3909--3915},
journal = {Annual Meeting of the Association for Computational Linguistics},
publisher = {Annual Meeting of the Association for Computational Linguistics},
year = {2020},
}
@article{Liu2019RethinkingTV,
@inproceedings{Liu2019RethinkingTV,
title={Rethinking the Value of Network Pruning},
author={Zhuang Liu and
Mingjie Sun and
Tinghui Zhou and
Gao Huang and
Trevor Darrell},
journal={ArXiv},
publisher={ArXiv},
year={2019},
volume={abs/1810.05270}
}
@article{Liu2017LearningEC,
@inproceedings{Liu2017LearningEC,
author = {Zhuang Liu and
Jianguo Li and
Zhiqiang Shen and
...
...
@@ -7067,7 +7062,7 @@ author = {Zhuang Liu and
Changshui Zhang},
title = {Learning Efficient Convolutional Networks through Network Slimming},
pages = {2755--2763},
journal = {{IEEE} International Conference on Computer Vision},
publisher = {{IEEE} International Conference on Computer Vision},
year = {2017}
}
...
...
@@ -7082,34 +7077,34 @@ author = {Zhuang Liu and
year={2018}
}
@article{Hubara2017QuantizedNN,
@inproceedings{Hubara2017QuantizedNN,
title={Quantized Neural Networks: Training Neural Networks with Low Precision Weights and Activations},
author={Itay Hubara and Matthieu Courbariaux and Daniel Soudry and Ran El-Yaniv and Yoshua Bengio},
journal={Journal of Machine Learning Reseach},
publisher={Journal of Machine Learning Reseach},
year={2017},
volume={18},
pages={187:1-187:30}
}
@article{DBLP:journals/corr/HintonVD15,
@inproceedings{DBLP:journals/corr/HintonVD15,
author = {Geoffrey E. Hinton and
Oriol Vinyals and
Jeffrey Dean},
title = {Distilling the Knowledge in a Neural Network},
journal = {CoRR},
publisher = {CoRR},
volume = {abs/1503.02531},
year = {2015}
}
@article{Munim2019SequencelevelKD,
@inproceedings{Munim2019SequencelevelKD,
title={Sequence-level Knowledge Distillation for Model Compression of Attention-based Sequence-to-sequence Speech Recognition},
author={Raden Mu'az Mun'im and Nakamasa Inoue and Koichi Shinoda},
journal={{IEEE} International Conference on Acoustics, Speech and Signal Processing},
publisher={{IEEE} International Conference on Acoustics, Speech and Signal Processing},
year={2019},
pages={6151-6155}
}
@article{Tang2019DistillingTK,
@inproceedings{Tang2019DistillingTK,
author = {Raphael Tang and
Yao Lu and
Linqing Liu and
...
...
@@ -7118,7 +7113,7 @@ author = {Zhuang Liu and
Jimmy Lin},
title = {Distilling Task-Specific Knowledge from {BERT} into Simple Neural
Networks},
journal = {CoRR},
publisher = {CoRR},
volume = {abs/1903.12136},
year = {2019}
}
...
...
@@ -7138,13 +7133,13 @@ author = {Zhuang Liu and
year={2020}
}
@article{Ghazvininejad2020AlignedCE,
@inproceedings{Ghazvininejad2020AlignedCE,
author = {Marjan Ghazvininejad and
Vladimir Karpukhin and
Luke Zettlemoyer and
Omer Levy},
title = {Aligned Cross Entropy for Non-Autoregressive Machine Translation},
journal = {CoRR},
publisher = {CoRR},
volume = {abs/2004.01655},
year = {2020},
}
...
...
@@ -7187,14 +7182,14 @@ author = {Zhuang Liu and
year={2019}
}
@article{Ran2019GuidingNN,
@inproceedings{Ran2019GuidingNN,
author = {Qiu Ran and
Yankai Lin and
Peng Li and
Jie Zhou},
title = {Guiding Non-Autoregressive Neural Machine Translation Decoding with
Reordering Information},
journal = {CoRR},
publisher = {CoRR},
volume = {abs/1911.02215},
year = {2019}
}
...
...
@@ -7218,10 +7213,10 @@ author = {Zhuang Liu and
year = {2018}
}
@article{Zhou2020UnderstandingKD,
@inproceedings{Zhou2020UnderstandingKD,
title={Understanding Knowledge Distillation in Non-autoregressive Machine Translation},
author={Chunting Zhou and Graham Neubig and Jiatao Gu},
journal={ArXiv},
publisher={ArXiv},
year={2020},
volume={abs/1911.02727}
}
...
...
@@ -7247,11 +7242,11 @@ author = {Zhuang Liu and
year={2018}
}
@article{Tu2020ENGINEEI,
@inproceedings{Tu2020ENGINEEI,
title={ENGINE: Energy-Based Inference Networks for Non-Autoregressive Machine Translation},
author={Lifu Tu and Richard Yuanzhe Pang and Sam Wiseman and Kevin Gimpel},
pages={2819--2826},
journal={Annual Meeting of the Association for Computational Linguistics},
publisher={Annual Meeting of the Association for Computational Linguistics},
year={2020}
}
...
...
@@ -7295,10 +7290,10 @@ author = {Zhuang Liu and
year={2016}
}
@article{Duan2017OneShotIL,
@inproceedings{Duan2017OneShotIL,
title={One-Shot Imitation Learning},
author={Yan Duan and Marcin Andrychowicz and Bradly C. Stadie and Jonathan Ho and Jonas Schneider and Ilya Sutskever and Pieter Abbeel and Wojciech Zaremba},
journal={CoRR},
publisher={CoRR},
year={2017},
volume={abs/1703.07326}
}
...
...
@@ -7308,7 +7303,7 @@ author = {Zhuang Liu and
author={Chunqi Wang and
Ji Zhang and
Haiqing Chen},
booktitle={Conference on Empirical Methods in Natural Language Processing},
publisher={Conference on Empirical Methods in Natural Language Processing},
pages={479--488},
year={2018}
}
...
...
@@ -7321,36 +7316,36 @@ author = {Zhuang Liu and
year={2019}
}
@article{Kasai2020NonAutoregressiveMT,
@inproceedings{Kasai2020NonAutoregressiveMT,
title={Non-Autoregressive Machine Translation with Disentangled Context Transformer},
author={Jungo Kasai and J. Cross and Marjan Ghazvininejad and Jiatao Gu},