arch: s2t_ctc optimizer: adam clip-norm: 10.0 lr-scheduler: inverse_sqrt warmup-init-lr: 1e-7 warmup-updates: 10000 lr: 2e-3 adam_betas: (0.9,0.98) criterion: ctc zero_infinity: True post-process: sentencepiece subsampling-type: conv1d subsampling-layers: 2 subsampling-filter: 1024 subsampling-kernel: 5 subsampling-stride: 2 subsampling-norm: none subsampling-activation: glu dropout: 0.1 attention-dropout: 0.1 activation-dropout: 0.1 activation-fn: relu encoder-embed-dim: 256 encoder-ffn-embed-dim: 2048 encoder-layers: 12 encoder-attention-heads: 4 #load-pretrained-encoder-from: