arch: pdss2t_transformer_s_8 pds-fusion: True ctc-layer: 12 share-decoder-input-output-embed: True optimizer: adam clip-norm: 10.0 lr-scheduler: inverse_sqrt warmup-init-lr: 1e-7 warmup-updates: 10000 lr: 2e-3 adam_betas: (0.9,0.98) criterion: label_smoothed_cross_entropy_with_ctc label_smoothing: 0.1 dropout: 0.1 activation-fn: relu encoder-ffn-embed-dim: 2048 encoder-layers: 12 decoder-layers: 6 encoder-attention-heads: 4 decoder-embed-dim: 256 decoder-ffn-embed-dim: 2048 decoder-attention-heads: 4 #load-pretrained-encoder-from: #load-pretrained-decoder-from: