pyramid4_base.yaml 1.12 KB
Newer Older
xuchen committed
1
arch: pys2t_transformer_s
xuchen committed
2
encoder-embed-dim: 256
xuchen committed
3
pyramid-stages: 4
xuchen committed
4 5
#pyramid-dropout: 0
pyramid-layers: 2_2_6_2
xuchen committed
6
pyramid-sr-ratios: 2_2_2_2
xuchen committed
7 8 9
pyramid-fuse: True
pyramid-fuse-way: all_conv
pyramid-embed-dims: 256_256_256_256
xuchen committed
10 11
pyramid-reduced-embed: conv
pyramid-embed-norm: True
xuchen committed
12
pyramid-position-embed: 1_1_1_1
xuchen committed
13
pyramid-kernel-sizes: 5_5_5_5
xuchen committed
14 15
pyramid-ffn-ratios: 8_8_8_8
pyramid-heads: 4_4_4_4
xuchen committed
16 17 18

train-subset: train_asr
valid-subset: dev_asr
xuchen committed
19

xuchen committed
20
max-epoch: 100
xuchen committed
21 22 23
max-update: 100000

num-workers: 8
xuchen committed
24
patience: 20
xuchen committed
25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41
no-progress-bar: True
log-interval: 100
seed: 1
report-accuracy: True

#load-pretrained-encoder-from:
#load-pretrained-decoder-from:

share-decoder-input-output-embed: True
optimizer: adam
clip-norm: 10.0
lr-scheduler: inverse_sqrt
warmup-init-lr: 1e-7
warmup-updates: 10000
lr: 2e-3
#adam_betas: (0.9,0.98)

xuchen committed
42
criterion: label_smoothed_cross_entropy_with_ctc
xuchen committed
43 44 45 46 47 48 49 50 51 52 53 54 55 56 57
label_smoothing: 0.1

conv-channels: 1024
dropout: 0.1
activation-fn: relu
encoder-ffn-embed-dim: 2048
encoder-layers: 12
decoder-layers: 6
encoder-attention-heads: 4

decoder-embed-dim: 256
decoder-ffn-embed-dim: 2048
decoder-attention-heads: 4
attention-dropout: 0.1
activation-dropout: 0.1