\rule{0pt}{20pt}Layer Type &\begin{tabular}[l]{@{}l@{}}Complexity\\ per Layer\end{tabular}&\begin{tabular}[l]{@{}l@{}}Sequential\\ Operations\end{tabular}&\begin{tabular}[l]{@{}l@{}}Maximum\\ Path Length\end{tabular}\\\hline
title = {Conformer: Convolution-augmented Transformer for Speech Recognition},
pages = {5036--5040},
publisher = {International Speech Communication Association},
year = {2020}
}
@inproceedings{DBLP:conf/icassp/DongXX18,
author = {Linhao Dong and
Shuang Xu and
Bo Xu},
title = {Speech-Transformer: {A} No-Recurrence Sequence-to-Sequence Model for
Speech Recognition},
pages = {5884--5888},
publisher = {Institute of Electrical and Electronics Engineers},
year = {2018}
}
@article{DBLP:journals/corr/abs-1802-05751,
author = {Niki Parmar and
Ashish Vaswani and
Jakob Uszkoreit and
Lukasz Kaiser and
Noam Shazeer and
Alexander Ku},
title = {Image Transformer},
journal = {CoRR},
volume = {abs/1802.05751},
year = {2018}
}
@inproceedings{vaswani2017attention,
title={Attention is All You Need},
author={Ashish {Vaswani} and Noam {Shazeer} and Niki {Parmar} and Jakob {Uszkoreit} and Llion {Jones} and Aidan N. {Gomez} and Lukasz {Kaiser} and Illia {Polosukhin}},
publisher={International Conference on Neural Information Processing},