arch: pdss2t_transformer_m_8 encoder-embed-dim: 512 pds-stages: 4 pds-layers: 3_3_3_3 pds-ratios: 2_2_1_2 pds-fusion: True pds-fusion-method: all_conv pds-embed-dims: 512_512_512_512 pds-ds-method: conv pds-embed-norm: True pds-position-embed: 1_1_1_1 pds-kernel-sizes: 5_5_5_5 pds-ffn-ratios: 4_4_4_4 pds-attn-heads: 8_8_8_8 share-decoder-input-output-embed: True optimizer: adam clip-norm: 10.0 lr-scheduler: inverse_sqrt warmup-init-lr: 1e-7 warmup-updates: 10000 lr: 0.0014 adam_betas: (0.9,0.98) criterion: label_smoothed_cross_entropy_with_ctc label_smoothing: 0.1 dropout: 0.15 activation-fn: relu encoder-ffn-embed-dim: 2048 encoder-layers: 12 decoder-layers: 6 encoder-attention-heads: 8 decoder-embed-dim: 512 decoder-ffn-embed-dim: 2048 decoder-attention-heads: 8