Commit 533dac9c by xuchen

accu update

parent f1dc8723
...@@ -6,10 +6,9 @@ lr-scheduler: inverse_sqrt ...@@ -6,10 +6,9 @@ lr-scheduler: inverse_sqrt
warmup-init-lr: 1e-7 warmup-init-lr: 1e-7
warmup-updates: 10000 warmup-updates: 10000
lr: 2e-3 lr: 2e-3
adam_betas: (0.9,0.98) adam-betas: (0.9,0.98)
criterion: label_smoothed_cross_entropy_with_ctc criterion: label_smoothed_cross_entropy_with_ctc
label_smoothing: 0.1
subsampling-type: conv1d subsampling-type: conv1d
subsampling-layers: 2 subsampling-layers: 2
......
...@@ -6,10 +6,9 @@ lr-scheduler: inverse_sqrt ...@@ -6,10 +6,9 @@ lr-scheduler: inverse_sqrt
warmup-init-lr: 1e-7 warmup-init-lr: 1e-7
warmup-updates: 10000 warmup-updates: 10000
lr: 2e-3 lr: 2e-3
adam_betas: (0.9,0.98) adam-betas: (0.9,0.98)
criterion: label_smoothed_cross_entropy_with_ctc criterion: label_smoothed_cross_entropy_with_ctc
label_smoothing: 0.1
subsampling-type: conv1d subsampling-type: conv1d
subsampling-layers: 2 subsampling-layers: 2
......
...@@ -25,5 +25,7 @@ num-workers: 8 ...@@ -25,5 +25,7 @@ num-workers: 8
no-progress-bar: True no-progress-bar: True
log-interval: 100 log-interval: 100
seed: 1 seed: 1
label-smoothing: 0.1
report-accuracy: True report-accuracy: True
skip-invalid-size-inputs-valid-test: True skip-invalid-size-inputs-valid-test: True
\ No newline at end of file
...@@ -6,10 +6,9 @@ lr-scheduler: inverse_sqrt ...@@ -6,10 +6,9 @@ lr-scheduler: inverse_sqrt
warmup-init-lr: 1e-7 warmup-init-lr: 1e-7
warmup-updates: 10000 warmup-updates: 10000
lr: 0.0014 lr: 0.0014
adam_betas: (0.9,0.98) adam-betas: (0.9,0.98)
criterion: label_smoothed_cross_entropy_with_ctc criterion: label_smoothed_cross_entropy_with_ctc
label_smoothing: 0.1
subsampling-type: conv1d subsampling-type: conv1d
subsampling-layers: 2 subsampling-layers: 2
......
...@@ -6,10 +6,9 @@ lr-scheduler: inverse_sqrt ...@@ -6,10 +6,9 @@ lr-scheduler: inverse_sqrt
warmup-init-lr: 1e-7 warmup-init-lr: 1e-7
warmup-updates: 10000 warmup-updates: 10000
lr: 1e-3 lr: 1e-3
adam_betas: (0.9,0.98) adam-betas: (0.9,0.98)
criterion: label_smoothed_cross_entropy_with_ctc criterion: label_smoothed_cross_entropy_with_ctc
label_smoothing: 0.1
subsampling-type: conv2d subsampling-type: conv2d
subsampling-layers: 2 subsampling-layers: 2
......
...@@ -7,10 +7,9 @@ lr-scheduler: inverse_sqrt ...@@ -7,10 +7,9 @@ lr-scheduler: inverse_sqrt
warmup-init-lr: 1e-7 warmup-init-lr: 1e-7
warmup-updates: 10000 warmup-updates: 10000
lr: 2e-3 lr: 2e-3
adam_betas: (0.9,0.98) adam-betas: (0.9,0.98)
criterion: label_smoothed_cross_entropy_with_ctc criterion: label_smoothed_cross_entropy_with_ctc
label_smoothing: 0.1
dropout: 0.1 dropout: 0.1
activation-fn: relu activation-fn: relu
......
...@@ -23,10 +23,9 @@ lr-scheduler: inverse_sqrt ...@@ -23,10 +23,9 @@ lr-scheduler: inverse_sqrt
warmup-init-lr: 1e-7 warmup-init-lr: 1e-7
warmup-updates: 10000 warmup-updates: 10000
lr: 2e-3 lr: 2e-3
adam_betas: (0.9,0.98) adam-betas: (0.9,0.98)
criterion: label_smoothed_cross_entropy_with_ctc criterion: label_smoothed_cross_entropy_with_ctc
label_smoothing: 0.1
dropout: 0.1 dropout: 0.1
activation-fn: relu activation-fn: relu
......
...@@ -23,10 +23,9 @@ lr-scheduler: inverse_sqrt ...@@ -23,10 +23,9 @@ lr-scheduler: inverse_sqrt
warmup-init-lr: 1e-7 warmup-init-lr: 1e-7
warmup-updates: 10000 warmup-updates: 10000
lr: 2e-3 lr: 2e-3
adam_betas: (0.9,0.98) adam-betas: (0.9,0.98)
criterion: label_smoothed_cross_entropy_with_ctc criterion: label_smoothed_cross_entropy_with_ctc
label_smoothing: 0.1
dropout: 0.1 dropout: 0.1
activation-fn: relu activation-fn: relu
......
...@@ -23,10 +23,9 @@ lr-scheduler: inverse_sqrt ...@@ -23,10 +23,9 @@ lr-scheduler: inverse_sqrt
warmup-init-lr: 1e-7 warmup-init-lr: 1e-7
warmup-updates: 10000 warmup-updates: 10000
lr: 2e-3 lr: 2e-3
adam_betas: (0.9,0.98) adam-betas: (0.9,0.98)
criterion: label_smoothed_cross_entropy_with_ctc criterion: label_smoothed_cross_entropy_with_ctc
label_smoothing: 0.1
dropout: 0.1 dropout: 0.1
activation-fn: relu activation-fn: relu
......
...@@ -23,10 +23,9 @@ lr-scheduler: inverse_sqrt ...@@ -23,10 +23,9 @@ lr-scheduler: inverse_sqrt
warmup-init-lr: 1e-7 warmup-init-lr: 1e-7
warmup-updates: 10000 warmup-updates: 10000
lr: 0.0014 lr: 0.0014
adam_betas: (0.9,0.98) adam-betas: (0.9,0.98)
criterion: label_smoothed_cross_entropy_with_ctc criterion: label_smoothed_cross_entropy_with_ctc
label_smoothing: 0.1
dropout: 0.15 dropout: 0.15
activation-fn: relu activation-fn: relu
......
...@@ -5,7 +5,7 @@ lr-scheduler: inverse_sqrt ...@@ -5,7 +5,7 @@ lr-scheduler: inverse_sqrt
warmup-init-lr: 1e-7 warmup-init-lr: 1e-7
warmup-updates: 10000 warmup-updates: 10000
lr: 2e-3 lr: 2e-3
adam_betas: (0.9,0.98) adam-betas: (0.9,0.98)
criterion: ctc criterion: ctc
zero_infinity: True zero_infinity: True
......
...@@ -40,7 +40,7 @@ task=speech_to_text ...@@ -40,7 +40,7 @@ task=speech_to_text
vocab_type=unigram vocab_type=unigram
vocab_type=char vocab_type=char
vocab_size=10000 vocab_size=10000
speed_perturb=0 speed_perturb=1
lcrm=0 lcrm=0
tokenizer=0 tokenizer=0
use_raw_audio=0 use_raw_audio=0
......
...@@ -6,10 +6,9 @@ lr-scheduler: inverse_sqrt ...@@ -6,10 +6,9 @@ lr-scheduler: inverse_sqrt
warmup-init-lr: 1e-7 warmup-init-lr: 1e-7
warmup-updates: 10000 warmup-updates: 10000
lr: 2e-3 lr: 2e-3
adam_betas: (0.9,0.98) adam-betas: (0.9,0.98)
criterion: label_smoothed_cross_entropy_with_ctc criterion: label_smoothed_cross_entropy_with_ctc
label_smoothing: 0.1
encoder-embed-norm: True encoder-embed-norm: True
encoder-no-scale-embedding: True encoder-no-scale-embedding: True
......
...@@ -23,5 +23,6 @@ num-workers: 8 ...@@ -23,5 +23,6 @@ num-workers: 8
no-progress-bar: True no-progress-bar: True
log-interval: 100 log-interval: 100
seed: 1 seed: 1
label-smoothing: 0.1
report-accuracy: True report-accuracy: True
skip-invalid-size-inputs-valid-test: True skip-invalid-size-inputs-valid-test: True
...@@ -6,10 +6,9 @@ lr-scheduler: inverse_sqrt ...@@ -6,10 +6,9 @@ lr-scheduler: inverse_sqrt
warmup-init-lr: 1e-7 warmup-init-lr: 1e-7
warmup-updates: 10000 warmup-updates: 10000
lr: 2e-3 lr: 2e-3
adam_betas: (0.9,0.98) adam-betas: (0.9,0.98)
criterion: label_smoothed_cross_entropy_with_ctc criterion: label_smoothed_cross_entropy_with_ctc
label_smoothing: 0.1
encoder-embed-norm: True encoder-embed-norm: True
encoder-no-scale-embedding: True encoder-no-scale-embedding: True
......
...@@ -9,10 +9,9 @@ lr-scheduler: inverse_sqrt ...@@ -9,10 +9,9 @@ lr-scheduler: inverse_sqrt
warmup-init-lr: 1e-7 warmup-init-lr: 1e-7
warmup-updates: 10000 warmup-updates: 10000
lr: 2e-3 lr: 2e-3
adam_betas: (0.9,0.98) adam-betas: (0.9,0.98)
criterion: label_smoothed_cross_entropy_with_ctc criterion: label_smoothed_cross_entropy_with_ctc
label_smoothing: 0.1
dropout: 0.1 dropout: 0.1
activation-fn: relu activation-fn: relu
......
...@@ -22,10 +22,9 @@ lr-scheduler: inverse_sqrt ...@@ -22,10 +22,9 @@ lr-scheduler: inverse_sqrt
warmup-init-lr: 1e-7 warmup-init-lr: 1e-7
warmup-updates: 10000 warmup-updates: 10000
lr: 2e-3 lr: 2e-3
adam_betas: (0.9,0.98) adam-betas: (0.9,0.98)
criterion: label_smoothed_cross_entropy_with_ctc criterion: label_smoothed_cross_entropy_with_ctc
label_smoothing: 0.1
dropout: 0.1 dropout: 0.1
activation-fn: relu activation-fn: relu
......
...@@ -22,10 +22,9 @@ lr-scheduler: inverse_sqrt ...@@ -22,10 +22,9 @@ lr-scheduler: inverse_sqrt
warmup-init-lr: 1e-7 warmup-init-lr: 1e-7
warmup-updates: 10000 warmup-updates: 10000
lr: 2e-3 lr: 2e-3
adam_betas: (0.9,0.98) adam-betas: (0.9,0.98)
criterion: label_smoothed_cross_entropy_with_ctc criterion: label_smoothed_cross_entropy_with_ctc
label_smoothing: 0.1
dropout: 0.1 dropout: 0.1
activation-fn: relu activation-fn: relu
......
...@@ -31,10 +31,9 @@ lr-scheduler: inverse_sqrt ...@@ -31,10 +31,9 @@ lr-scheduler: inverse_sqrt
warmup-init-lr: 1e-7 warmup-init-lr: 1e-7
warmup-updates: 10000 warmup-updates: 10000
lr: 2e-3 lr: 2e-3
adam_betas: (0.9,0.98) adam-betas: (0.9,0.98)
criterion: label_smoothed_cross_entropy_with_ctc criterion: label_smoothed_cross_entropy_with_ctc
label_smoothing: 0.1
dropout: 0.1 dropout: 0.1
activation-fn: relu activation-fn: relu
......
...@@ -32,10 +32,9 @@ lr-scheduler: inverse_sqrt ...@@ -32,10 +32,9 @@ lr-scheduler: inverse_sqrt
warmup-init-lr: 1e-7 warmup-init-lr: 1e-7
warmup-updates: 10000 warmup-updates: 10000
lr: 2e-3 lr: 2e-3
adam_betas: (0.9,0.98) adam-betas: (0.9,0.98)
criterion: label_smoothed_cross_entropy_with_ctc criterion: label_smoothed_cross_entropy_with_ctc
label_smoothing: 0.1
dropout: 0.1 dropout: 0.1
activation-fn: relu activation-fn: relu
......
...@@ -32,10 +32,9 @@ lr-scheduler: inverse_sqrt ...@@ -32,10 +32,9 @@ lr-scheduler: inverse_sqrt
warmup-init-lr: 1e-7 warmup-init-lr: 1e-7
warmup-updates: 10000 warmup-updates: 10000
lr: 2e-3 lr: 2e-3
adam_betas: (0.9,0.98) adam-betas: (0.9,0.98)
criterion: label_smoothed_cross_entropy_with_ctc criterion: label_smoothed_cross_entropy_with_ctc
label_smoothing: 0.1
dropout: 0.1 dropout: 0.1
activation-fn: relu activation-fn: relu
......
...@@ -22,10 +22,9 @@ lr-scheduler: inverse_sqrt ...@@ -22,10 +22,9 @@ lr-scheduler: inverse_sqrt
warmup-init-lr: 1e-7 warmup-init-lr: 1e-7
warmup-updates: 10000 warmup-updates: 10000
lr: 2e-3 lr: 2e-3
adam_betas: (0.9,0.98) adam-betas: (0.9,0.98)
criterion: label_smoothed_cross_entropy_with_ctc criterion: label_smoothed_cross_entropy_with_ctc
label_smoothing: 0.1
dropout: 0.15 dropout: 0.15
activation-fn: relu activation-fn: relu
......
...@@ -5,7 +5,7 @@ lr-scheduler: inverse_sqrt ...@@ -5,7 +5,7 @@ lr-scheduler: inverse_sqrt
warmup-init-lr: 1e-7 warmup-init-lr: 1e-7
warmup-updates: 10000 warmup-updates: 10000
lr: 2e-3 lr: 2e-3
adam_betas: (0.9,0.98) adam-betas: (0.9,0.98)
criterion: ctc criterion: ctc
zero_infinity: True zero_infinity: True
......
...@@ -32,7 +32,7 @@ lr-scheduler: inverse_sqrt ...@@ -32,7 +32,7 @@ lr-scheduler: inverse_sqrt
warmup-init-lr: 1e-7 warmup-init-lr: 1e-7
warmup-updates: 10000 warmup-updates: 10000
lr: 2e-3 lr: 2e-3
adam_betas: (0.9,0.98) adam-betas: (0.9,0.98)
criterion: ctc criterion: ctc
post-process: sentencepiece post-process: sentencepiece
......
...@@ -30,7 +30,7 @@ lr-scheduler: inverse_sqrt ...@@ -30,7 +30,7 @@ lr-scheduler: inverse_sqrt
warmup-init-lr: 1e-7 warmup-init-lr: 1e-7
warmup-updates: 10000 warmup-updates: 10000
lr: 0.0015 lr: 0.0015
adam_betas: (0.9,0.98) adam-betas: (0.9,0.98)
criterion: ctc criterion: ctc
post-process: sentencepiece post-process: sentencepiece
......
...@@ -34,7 +34,7 @@ lr-scheduler: inverse_sqrt ...@@ -34,7 +34,7 @@ lr-scheduler: inverse_sqrt
warmup-init-lr: 1e-7 warmup-init-lr: 1e-7
warmup-updates: 10000 warmup-updates: 10000
lr: 0.002 lr: 0.002
adam_betas: (0.9,0.98) adam-betas: (0.9,0.98)
criterion: ctc criterion: ctc
post-process: sentencepiece post-process: sentencepiece
......
...@@ -34,7 +34,7 @@ lr-scheduler: inverse_sqrt ...@@ -34,7 +34,7 @@ lr-scheduler: inverse_sqrt
warmup-init-lr: 1e-7 warmup-init-lr: 1e-7
warmup-updates: 10000 warmup-updates: 10000
lr: 0.002 lr: 0.002
adam_betas: (0.9,0.98) adam-betas: (0.9,0.98)
criterion: ctc criterion: ctc
post-process: sentencepiece post-process: sentencepiece
......
...@@ -30,7 +30,7 @@ lr-scheduler: inverse_sqrt ...@@ -30,7 +30,7 @@ lr-scheduler: inverse_sqrt
warmup-init-lr: 1e-7 warmup-init-lr: 1e-7
warmup-updates: 10000 warmup-updates: 10000
lr: 0.0015 lr: 0.0015
adam_betas: (0.9,0.98) adam-betas: (0.9,0.98)
criterion: ctc criterion: ctc
post-process: sentencepiece post-process: sentencepiece
......
...@@ -7,10 +7,9 @@ lr-scheduler: inverse_sqrt ...@@ -7,10 +7,9 @@ lr-scheduler: inverse_sqrt
warmup-init-lr: 1e-7 warmup-init-lr: 1e-7
warmup-updates: 8000 warmup-updates: 8000
lr: 1e-3 lr: 1e-3
adam_betas: (0.9,0.997) adam-betas: (0.9,0.997)
criterion: label_smoothed_cross_entropy criterion: label_smoothed_cross_entropy
label_smoothing: 0.1
dropout: 0.1 dropout: 0.1
attention-dropout: 0.1 attention-dropout: 0.1
......
...@@ -25,5 +25,6 @@ num-workers: 8 ...@@ -25,5 +25,6 @@ num-workers: 8
no-progress-bar: True no-progress-bar: True
log-interval: 100 log-interval: 100
seed: 1 seed: 1
label-smoothing: 0.1
report-accuracy: True report-accuracy: True
skip-invalid-size-inputs-valid-test: True skip-invalid-size-inputs-valid-test: True
\ No newline at end of file
...@@ -6,10 +6,9 @@ lr-scheduler: inverse_sqrt ...@@ -6,10 +6,9 @@ lr-scheduler: inverse_sqrt
warmup-init-lr: 1e-7 warmup-init-lr: 1e-7
warmup-updates: 8000 warmup-updates: 8000
lr: 1e-3 lr: 1e-3
adam_betas: (0.9,0.997) adam-betas: (0.9,0.997)
criterion: label_smoothed_cross_entropy_with_ctc criterion: label_smoothed_cross_entropy_with_ctc
label_smoothing: 0.1
dropout: 0.1 dropout: 0.1
attention-dropout: 0.1 attention-dropout: 0.1
......
...@@ -7,10 +7,9 @@ lr-scheduler: inverse_sqrt ...@@ -7,10 +7,9 @@ lr-scheduler: inverse_sqrt
warmup-init-lr: 1e-7 warmup-init-lr: 1e-7
warmup-updates: 8000 warmup-updates: 8000
lr: 1e-3 lr: 1e-3
adam_betas: (0.9,0.997) adam-betas: (0.9,0.997)
criterion: label_smoothed_cross_entropy criterion: label_smoothed_cross_entropy
label_smoothing: 0.1
dropout: 0.1 dropout: 0.1
attention-dropout: 0.1 attention-dropout: 0.1
......
...@@ -6,10 +6,9 @@ lr-scheduler: inverse_sqrt ...@@ -6,10 +6,9 @@ lr-scheduler: inverse_sqrt
warmup-init-lr: 1e-7 warmup-init-lr: 1e-7
warmup-updates: 10000 warmup-updates: 10000
lr: 2e-3 lr: 2e-3
adam_betas: (0.9,0.98) adam-betas: (0.9,0.98)
criterion: label_smoothed_cross_entropy_with_ctc criterion: label_smoothed_cross_entropy_with_ctc
label_smoothing: 0.1
encoder-normalize-before: True encoder-normalize-before: True
decoder-normalize-before: True decoder-normalize-before: True
......
...@@ -6,10 +6,9 @@ lr-scheduler: inverse_sqrt ...@@ -6,10 +6,9 @@ lr-scheduler: inverse_sqrt
warmup-init-lr: 1e-7 warmup-init-lr: 1e-7
warmup-updates: 10000 warmup-updates: 10000
lr: 1e-3 lr: 1e-3
adam_betas: (0.9,0.98) adam-betas: (0.9,0.98)
criterion: label_smoothed_cross_entropy_with_ctc criterion: label_smoothed_cross_entropy_with_ctc
label_smoothing: 0.1
encoder-normalize-before: True encoder-normalize-before: True
decoder-normalize-before: True decoder-normalize-before: True
......
...@@ -6,10 +6,9 @@ lr-scheduler: inverse_sqrt ...@@ -6,10 +6,9 @@ lr-scheduler: inverse_sqrt
warmup-init-lr: 1e-7 warmup-init-lr: 1e-7
warmup-updates: 10000 warmup-updates: 10000
lr: 1e-3 lr: 1e-3
adam_betas: (0.9,0.98) adam-betas: (0.9,0.98)
criterion: label_smoothed_cross_entropy_with_ctc criterion: label_smoothed_cross_entropy_with_ctc
label_smoothing: 0.1
encoder-normalize-before: True encoder-normalize-before: True
decoder-normalize-before: True decoder-normalize-before: True
......
...@@ -6,10 +6,9 @@ lr-scheduler: inverse_sqrt ...@@ -6,10 +6,9 @@ lr-scheduler: inverse_sqrt
warmup-init-lr: 1e-7 warmup-init-lr: 1e-7
warmup-updates: 10000 warmup-updates: 10000
lr: 2e-3 lr: 2e-3
adam_betas: (0.9,0.98) adam-betas: (0.9,0.98)
criterion: label_smoothed_cross_entropy_with_ctc criterion: label_smoothed_cross_entropy_with_ctc
label_smoothing: 0.1
encoder-embed-norm: True encoder-embed-norm: True
encoder-no-scale-embedding: True encoder-no-scale-embedding: True
......
...@@ -25,5 +25,6 @@ num-workers: 8 ...@@ -25,5 +25,6 @@ num-workers: 8
no-progress-bar: True no-progress-bar: True
log-interval: 100 log-interval: 100
seed: 1 seed: 1
label-smoothing: 0.1
report-accuracy: True report-accuracy: True
skip-invalid-size-inputs-valid-test: True skip-invalid-size-inputs-valid-test: True
...@@ -6,10 +6,9 @@ lr-scheduler: inverse_sqrt ...@@ -6,10 +6,9 @@ lr-scheduler: inverse_sqrt
warmup-init-lr: 1e-7 warmup-init-lr: 1e-7
warmup-updates: 10000 warmup-updates: 10000
lr: 1e-3 lr: 1e-3
adam_betas: (0.9,0.98) adam-betas: (0.9,0.98)
criterion: label_smoothed_cross_entropy_with_ctc criterion: label_smoothed_cross_entropy_with_ctc
label_smoothing: 0.1
encoder-embed-norm: True encoder-embed-norm: True
encoder-no-scale-embedding: True encoder-no-scale-embedding: True
......
...@@ -26,10 +26,9 @@ lr-scheduler: inverse_sqrt ...@@ -26,10 +26,9 @@ lr-scheduler: inverse_sqrt
warmup-init-lr: 1e-7 warmup-init-lr: 1e-7
warmup-updates: 10000 warmup-updates: 10000
lr: 2e-3 lr: 2e-3
adam_betas: (0.9,0.98) adam-betas: (0.9,0.98)
criterion: join_speech_and_text_loss criterion: join_speech_and_text_loss
label_smoothing: 0.1
dropout: 0.1 dropout: 0.1
activation-fn: relu activation-fn: relu
......
...@@ -45,10 +45,9 @@ lr-scheduler: inverse_sqrt ...@@ -45,10 +45,9 @@ lr-scheduler: inverse_sqrt
warmup-init-lr: 1e-7 warmup-init-lr: 1e-7
warmup-updates: 10000 warmup-updates: 10000
lr: 2e-3 lr: 2e-3
adam_betas: (0.9,0.98) adam-betas: (0.9,0.98)
criterion: label_smoothed_cross_entropy_with_ctc criterion: label_smoothed_cross_entropy_with_ctc
label_smoothing: 0.1
subsampling-type: conv1d subsampling-type: conv1d
subsampling-layers: 2 subsampling-layers: 2
......
...@@ -9,10 +9,9 @@ lr-scheduler: inverse_sqrt ...@@ -9,10 +9,9 @@ lr-scheduler: inverse_sqrt
warmup-init-lr: 1e-7 warmup-init-lr: 1e-7
warmup-updates: 10000 warmup-updates: 10000
lr: 2e-3 lr: 2e-3
adam_betas: (0.9,0.98) adam-betas: (0.9,0.98)
criterion: label_smoothed_cross_entropy_with_ctc criterion: label_smoothed_cross_entropy_with_ctc
label_smoothing: 0.1
dropout: 0.1 dropout: 0.1
activation-fn: relu activation-fn: relu
......
...@@ -24,10 +24,9 @@ lr-scheduler: inverse_sqrt ...@@ -24,10 +24,9 @@ lr-scheduler: inverse_sqrt
warmup-init-lr: 1e-7 warmup-init-lr: 1e-7
warmup-updates: 10000 warmup-updates: 10000
lr: 2e-3 lr: 2e-3
adam_betas: (0.9,0.98) adam-betas: (0.9,0.98)
criterion: label_smoothed_cross_entropy_with_ctc criterion: label_smoothed_cross_entropy_with_ctc
label_smoothing: 0.1
dropout: 0.1 dropout: 0.1
activation-fn: relu activation-fn: relu
......
...@@ -24,10 +24,9 @@ lr-scheduler: inverse_sqrt ...@@ -24,10 +24,9 @@ lr-scheduler: inverse_sqrt
warmup-init-lr: 1e-7 warmup-init-lr: 1e-7
warmup-updates: 10000 warmup-updates: 10000
lr: 2e-3 lr: 2e-3
adam_betas: (0.9,0.98) adam-betas: (0.9,0.98)
criterion: label_smoothed_cross_entropy_with_ctc criterion: label_smoothed_cross_entropy_with_ctc
label_smoothing: 0.1
dropout: 0.1 dropout: 0.1
activation-fn: relu activation-fn: relu
......
...@@ -22,10 +22,9 @@ lr-scheduler: inverse_sqrt ...@@ -22,10 +22,9 @@ lr-scheduler: inverse_sqrt
warmup-init-lr: 1e-7 warmup-init-lr: 1e-7
warmup-updates: 10000 warmup-updates: 10000
lr: 2e-3 lr: 2e-3
adam_betas: (0.9,0.98) adam-betas: (0.9,0.98)
criterion: label_smoothed_cross_entropy_with_ctc criterion: label_smoothed_cross_entropy_with_ctc
label_smoothing: 0.1
dropout: 0.1 dropout: 0.1
activation-fn: relu activation-fn: relu
......
...@@ -28,10 +28,9 @@ lr-scheduler: inverse_sqrt ...@@ -28,10 +28,9 @@ lr-scheduler: inverse_sqrt
warmup-init-lr: 1e-7 warmup-init-lr: 1e-7
warmup-updates: 10000 warmup-updates: 10000
lr: 2e-3 lr: 2e-3
adam_betas: (0.9,0.98) adam-betas: (0.9,0.98)
criterion: label_smoothed_cross_entropy_with_ctc criterion: label_smoothed_cross_entropy_with_ctc
label_smoothing: 0.1
dropout: 0.1 dropout: 0.1
activation-fn: relu activation-fn: relu
......
...@@ -28,10 +28,9 @@ lr-scheduler: inverse_sqrt ...@@ -28,10 +28,9 @@ lr-scheduler: inverse_sqrt
warmup-init-lr: 1e-7 warmup-init-lr: 1e-7
warmup-updates: 10000 warmup-updates: 10000
lr: 2e-3 lr: 2e-3
adam_betas: (0.9,0.98) adam-betas: (0.9,0.98)
criterion: label_smoothed_cross_entropy_with_ctc criterion: label_smoothed_cross_entropy_with_ctc
label_smoothing: 0.1
dropout: 0.1 dropout: 0.1
activation-fn: relu activation-fn: relu
......
...@@ -28,10 +28,9 @@ lr-scheduler: inverse_sqrt ...@@ -28,10 +28,9 @@ lr-scheduler: inverse_sqrt
warmup-init-lr: 1e-7 warmup-init-lr: 1e-7
warmup-updates: 10000 warmup-updates: 10000
lr: 2e-3 lr: 2e-3
adam_betas: (0.9,0.98) adam-betas: (0.9,0.98)
criterion: label_smoothed_cross_entropy_with_ctc criterion: label_smoothed_cross_entropy_with_ctc
label_smoothing: 0.1
dropout: 0.1 dropout: 0.1
activation-fn: relu activation-fn: relu
......
...@@ -28,10 +28,9 @@ lr-scheduler: inverse_sqrt ...@@ -28,10 +28,9 @@ lr-scheduler: inverse_sqrt
warmup-init-lr: 1e-7 warmup-init-lr: 1e-7
warmup-updates: 10000 warmup-updates: 10000
lr: 2e-3 lr: 2e-3
adam_betas: (0.9,0.98) adam-betas: (0.9,0.98)
criterion: label_smoothed_cross_entropy_with_ctc criterion: label_smoothed_cross_entropy_with_ctc
label_smoothing: 0.1
dropout: 0.1 dropout: 0.1
activation-fn: relu activation-fn: relu
......
...@@ -28,10 +28,9 @@ lr-scheduler: inverse_sqrt ...@@ -28,10 +28,9 @@ lr-scheduler: inverse_sqrt
warmup-init-lr: 1e-7 warmup-init-lr: 1e-7
warmup-updates: 10000 warmup-updates: 10000
lr: 2e-3 lr: 2e-3
adam_betas: (0.9,0.98) adam-betas: (0.9,0.98)
criterion: label_smoothed_cross_entropy_with_ctc criterion: label_smoothed_cross_entropy_with_ctc
label_smoothing: 0.1
dropout: 0.1 dropout: 0.1
activation-fn: relu activation-fn: relu
......
...@@ -24,10 +24,9 @@ lr-scheduler: inverse_sqrt ...@@ -24,10 +24,9 @@ lr-scheduler: inverse_sqrt
warmup-init-lr: 1e-7 warmup-init-lr: 1e-7
warmup-updates: 10000 warmup-updates: 10000
lr: 1e-3 lr: 1e-3
adam_betas: (0.9,0.98) adam-betas: (0.9,0.98)
criterion: label_smoothed_cross_entropy_with_ctc criterion: label_smoothed_cross_entropy_with_ctc
label_smoothing: 0.1
dropout: 0.15 dropout: 0.15
activation-fn: relu activation-fn: relu
......
...@@ -24,10 +24,9 @@ lr-scheduler: inverse_sqrt ...@@ -24,10 +24,9 @@ lr-scheduler: inverse_sqrt
warmup-init-lr: 1e-7 warmup-init-lr: 1e-7
warmup-updates: 10000 warmup-updates: 10000
lr: 1e-3 lr: 1e-3
adam_betas: (0.9,0.98) adam-betas: (0.9,0.98)
criterion: label_smoothed_cross_entropy_with_ctc criterion: label_smoothed_cross_entropy_with_ctc
label_smoothing: 0.1
dropout: 0.15 dropout: 0.15
activation-fn: relu activation-fn: relu
......
...@@ -24,10 +24,9 @@ lr-scheduler: inverse_sqrt ...@@ -24,10 +24,9 @@ lr-scheduler: inverse_sqrt
warmup-init-lr: 1e-7 warmup-init-lr: 1e-7
warmup-updates: 10000 warmup-updates: 10000
lr: 1e-3 lr: 1e-3
adam_betas: (0.9,0.98) adam-betas: (0.9,0.98)
criterion: label_smoothed_cross_entropy_with_ctc criterion: label_smoothed_cross_entropy_with_ctc
label_smoothing: 0.1
dropout: 0.15 dropout: 0.15
activation-fn: relu activation-fn: relu
......
...@@ -24,10 +24,9 @@ lr-scheduler: inverse_sqrt ...@@ -24,10 +24,9 @@ lr-scheduler: inverse_sqrt
warmup-init-lr: 1e-7 warmup-init-lr: 1e-7
warmup-updates: 10000 warmup-updates: 10000
lr: 1e-3 lr: 1e-3
adam_betas: (0.9,0.98) adam-betas: (0.9,0.98)
criterion: label_smoothed_cross_entropy_with_ctc criterion: label_smoothed_cross_entropy_with_ctc
label_smoothing: 0.1
dropout: 0.15 dropout: 0.15
activation-fn: relu activation-fn: relu
......
...@@ -24,10 +24,9 @@ lr-scheduler: inverse_sqrt ...@@ -24,10 +24,9 @@ lr-scheduler: inverse_sqrt
warmup-init-lr: 1e-7 warmup-init-lr: 1e-7
warmup-updates: 10000 warmup-updates: 10000
lr: 1e-3 lr: 1e-3
adam_betas: (0.9,0.98) adam-betas: (0.9,0.98)
criterion: label_smoothed_cross_entropy_with_ctc criterion: label_smoothed_cross_entropy_with_ctc
label_smoothing: 0.1
dropout: 0.15 dropout: 0.15
activation-fn: relu activation-fn: relu
......
...@@ -24,10 +24,9 @@ lr-scheduler: inverse_sqrt ...@@ -24,10 +24,9 @@ lr-scheduler: inverse_sqrt
warmup-init-lr: 1e-7 warmup-init-lr: 1e-7
warmup-updates: 10000 warmup-updates: 10000
lr: 1e-3 lr: 1e-3
adam_betas: (0.9,0.98) adam-betas: (0.9,0.98)
criterion: label_smoothed_cross_entropy_with_ctc criterion: label_smoothed_cross_entropy_with_ctc
label_smoothing: 0.1
dropout: 0.15 dropout: 0.15
activation-fn: relu activation-fn: relu
......
...@@ -24,10 +24,9 @@ lr-scheduler: inverse_sqrt ...@@ -24,10 +24,9 @@ lr-scheduler: inverse_sqrt
warmup-init-lr: 1e-7 warmup-init-lr: 1e-7
warmup-updates: 10000 warmup-updates: 10000
lr: 2e-3 lr: 2e-3
adam_betas: (0.9,0.98) adam-betas: (0.9,0.98)
criterion: label_smoothed_cross_entropy_with_ctc criterion: label_smoothed_cross_entropy_with_ctc
label_smoothing: 0.1
dropout: 0.1 dropout: 0.1
activation-fn: relu activation-fn: relu
......
...@@ -24,10 +24,9 @@ lr-scheduler: inverse_sqrt ...@@ -24,10 +24,9 @@ lr-scheduler: inverse_sqrt
warmup-init-lr: 1e-7 warmup-init-lr: 1e-7
warmup-updates: 10000 warmup-updates: 10000
lr: 2e-3 lr: 2e-3
adam_betas: (0.9,0.98) adam-betas: (0.9,0.98)
criterion: label_smoothed_cross_entropy_with_ctc criterion: label_smoothed_cross_entropy_with_ctc
label_smoothing: 0.1
dropout: 0.1 dropout: 0.1
activation-fn: relu activation-fn: relu
......
...@@ -22,10 +22,9 @@ lr-scheduler: inverse_sqrt ...@@ -22,10 +22,9 @@ lr-scheduler: inverse_sqrt
warmup-init-lr: 1e-7 warmup-init-lr: 1e-7
warmup-updates: 10000 warmup-updates: 10000
lr: 2e-3 lr: 2e-3
adam_betas: (0.9,0.98) adam-betas: (0.9,0.98)
criterion: label_smoothed_cross_entropy_with_ctc criterion: label_smoothed_cross_entropy_with_ctc
label_smoothing: 0.1
dropout: 0.1 dropout: 0.1
activation-fn: relu activation-fn: relu
......
...@@ -13,7 +13,7 @@ lr-scheduler: inverse_sqrt ...@@ -13,7 +13,7 @@ lr-scheduler: inverse_sqrt
warmup-init-lr: 1e-7 warmup-init-lr: 1e-7
warmup-updates: 10000 warmup-updates: 10000
lr: 2e-3 lr: 2e-3
adam_betas: (0.9,0.98) adam-betas: (0.9,0.98)
encoder-normalize-before: True encoder-normalize-before: True
decoder-normalize-before: True decoder-normalize-before: True
......
...@@ -13,7 +13,7 @@ lr-scheduler: inverse_sqrt ...@@ -13,7 +13,7 @@ lr-scheduler: inverse_sqrt
warmup-init-lr: 1e-7 warmup-init-lr: 1e-7
warmup-updates: 10000 warmup-updates: 10000
lr: 1e-3 lr: 1e-3
adam_betas: (0.9,0.98) adam-betas: (0.9,0.98)
encoder-normalize-before: True encoder-normalize-before: True
decoder-normalize-before: True decoder-normalize-before: True
......
...@@ -13,7 +13,7 @@ lr-scheduler: inverse_sqrt ...@@ -13,7 +13,7 @@ lr-scheduler: inverse_sqrt
warmup-init-lr: 1e-7 warmup-init-lr: 1e-7
warmup-updates: 10000 warmup-updates: 10000
lr: 1e-3 lr: 1e-3
adam_betas: (0.9,0.98) adam-betas: (0.9,0.98)
encoder-normalize-before: True encoder-normalize-before: True
decoder-normalize-before: True decoder-normalize-before: True
......
...@@ -6,7 +6,7 @@ lr-scheduler: inverse_sqrt ...@@ -6,7 +6,7 @@ lr-scheduler: inverse_sqrt
warmup-init-lr: 1e-7 warmup-init-lr: 1e-7
warmup-updates: 10000 warmup-updates: 10000
lr: 2e-3 lr: 2e-3
adam_betas: (0.9,0.98) adam-betas: (0.9,0.98)
criterion: ctc criterion: ctc
zero_infinity: True zero_infinity: True
......
...@@ -12,7 +12,7 @@ lr-scheduler: inverse_sqrt ...@@ -12,7 +12,7 @@ lr-scheduler: inverse_sqrt
warmup-init-lr: 1e-7 warmup-init-lr: 1e-7
warmup-updates: 10000 warmup-updates: 10000
lr: 2e-3 lr: 2e-3
adam_betas: (0.9,0.98) adam-betas: (0.9,0.98)
encoder-normalize-before: True encoder-normalize-before: True
decoder-normalize-before: True decoder-normalize-before: True
......
...@@ -6,10 +6,9 @@ lr-scheduler: inverse_sqrt ...@@ -6,10 +6,9 @@ lr-scheduler: inverse_sqrt
warmup-init-lr: 1e-7 warmup-init-lr: 1e-7
warmup-updates: 10000 warmup-updates: 10000
lr: 2e-3 lr: 2e-3
adam_betas: (0.9,0.98) adam-betas: (0.9,0.98)
criterion: label_smoothed_cross_entropy_with_ctc criterion: label_smoothed_cross_entropy_with_ctc
label_smoothing: 0.1
encoder-normalize-before: True encoder-normalize-before: True
decoder-normalize-before: True decoder-normalize-before: True
......
...@@ -6,10 +6,9 @@ lr-scheduler: inverse_sqrt ...@@ -6,10 +6,9 @@ lr-scheduler: inverse_sqrt
warmup-init-lr: 1e-7 warmup-init-lr: 1e-7
warmup-updates: 10000 warmup-updates: 10000
lr: 1e-3 lr: 1e-3
adam_betas: (0.9,0.98) adam-betas: (0.9,0.98)
criterion: label_smoothed_cross_entropy_with_ctc criterion: label_smoothed_cross_entropy_with_ctc
label_smoothing: 0.1
encoder-normalize-before: True encoder-normalize-before: True
decoder-normalize-before: True decoder-normalize-before: True
......
...@@ -6,10 +6,9 @@ lr-scheduler: inverse_sqrt ...@@ -6,10 +6,9 @@ lr-scheduler: inverse_sqrt
warmup-init-lr: 1e-7 warmup-init-lr: 1e-7
warmup-updates: 10000 warmup-updates: 10000
lr: 1e-3 lr: 1e-3
adam_betas: (0.9,0.98) adam-betas: (0.9,0.98)
criterion: label_smoothed_cross_entropy_with_ctc criterion: label_smoothed_cross_entropy_with_ctc
label_smoothing: 0.1
encoder-embed-norm: True encoder-embed-norm: True
encoder-no-scale-embedding: True encoder-no-scale-embedding: True
......
...@@ -6,10 +6,9 @@ lr-scheduler: inverse_sqrt ...@@ -6,10 +6,9 @@ lr-scheduler: inverse_sqrt
warmup-init-lr: 1e-7 warmup-init-lr: 1e-7
warmup-updates: 10000 warmup-updates: 10000
lr: 2e-3 lr: 2e-3
adam_betas: (0.9,0.98) adam-betas: (0.9,0.98)
criterion: label_smoothed_cross_entropy_with_ctc criterion: label_smoothed_cross_entropy_with_ctc
label_smoothing: 0.1
encoder-normalize-before: True encoder-normalize-before: True
decoder-normalize-before: True decoder-normalize-before: True
......
...@@ -6,10 +6,9 @@ lr-scheduler: inverse_sqrt ...@@ -6,10 +6,9 @@ lr-scheduler: inverse_sqrt
warmup-init-lr: 1e-7 warmup-init-lr: 1e-7
warmup-updates: 10000 warmup-updates: 10000
lr: 2e-3 lr: 2e-3
adam_betas: (0.9,0.98) adam-betas: (0.9,0.98)
criterion: label_smoothed_cross_entropy_with_ctc criterion: label_smoothed_cross_entropy_with_ctc
label_smoothing: 0.1
encoder-embed-norm: True encoder-embed-norm: True
encoder-no-scale-embedding: True encoder-no-scale-embedding: True
......
...@@ -6,10 +6,9 @@ lr-scheduler: inverse_sqrt ...@@ -6,10 +6,9 @@ lr-scheduler: inverse_sqrt
warmup-init-lr: 1e-7 warmup-init-lr: 1e-7
warmup-updates: 10000 warmup-updates: 10000
lr: 2e-3 lr: 2e-3
adam_betas: (0.9,0.98) adam-betas: (0.9,0.98)
criterion: label_smoothed_cross_entropy_with_ctc criterion: label_smoothed_cross_entropy_with_ctc
label_smoothing: 0.1
encoder-embed-norm: True encoder-embed-norm: True
encoder-no-scale-embedding: True encoder-no-scale-embedding: True
......
...@@ -6,10 +6,9 @@ lr-scheduler: inverse_sqrt ...@@ -6,10 +6,9 @@ lr-scheduler: inverse_sqrt
warmup-init-lr: 1e-7 warmup-init-lr: 1e-7
warmup-updates: 10000 warmup-updates: 10000
lr: 2e-3 lr: 2e-3
adam_betas: (0.9,0.98) adam-betas: (0.9,0.98)
criterion: label_smoothed_cross_entropy_with_ctc criterion: label_smoothed_cross_entropy_with_ctc
label_smoothing: 0.1
encoder-embed-norm: True encoder-embed-norm: True
encoder-no-scale-embedding: True encoder-no-scale-embedding: True
......
...@@ -6,10 +6,9 @@ lr-scheduler: inverse_sqrt ...@@ -6,10 +6,9 @@ lr-scheduler: inverse_sqrt
warmup-init-lr: 1e-7 warmup-init-lr: 1e-7
warmup-updates: 10000 warmup-updates: 10000
lr: 2e-3 lr: 2e-3
adam_betas: (0.9,0.98) adam-betas: (0.9,0.98)
criterion: label_smoothed_cross_entropy_with_ctc criterion: label_smoothed_cross_entropy_with_ctc
label_smoothing: 0.1
encoder-embed-norm: True encoder-embed-norm: True
encoder-no-scale-embedding: True encoder-no-scale-embedding: True
......
...@@ -6,10 +6,9 @@ lr-scheduler: inverse_sqrt ...@@ -6,10 +6,9 @@ lr-scheduler: inverse_sqrt
warmup-init-lr: 1e-7 warmup-init-lr: 1e-7
warmup-updates: 10000 warmup-updates: 10000
lr: 2e-3 lr: 2e-3
adam_betas: (0.9,0.98) adam-betas: (0.9,0.98)
criterion: label_smoothed_cross_entropy_with_ctc criterion: label_smoothed_cross_entropy_with_ctc
label_smoothing: 0.1
encoder-embed-norm: True encoder-embed-norm: True
encoder-no-scale-embedding: True encoder-no-scale-embedding: True
......
...@@ -6,10 +6,9 @@ lr-scheduler: inverse_sqrt ...@@ -6,10 +6,9 @@ lr-scheduler: inverse_sqrt
warmup-init-lr: 1e-7 warmup-init-lr: 1e-7
warmup-updates: 10000 warmup-updates: 10000
lr: 2e-3 lr: 2e-3
adam_betas: (0.9,0.98) adam-betas: (0.9,0.98)
criterion: label_smoothed_cross_entropy_with_ctc criterion: label_smoothed_cross_entropy_with_ctc
label_smoothing: 0.1
encoder-embed-norm: True encoder-embed-norm: True
encoder-no-scale-embedding: True encoder-no-scale-embedding: True
......
...@@ -6,10 +6,9 @@ lr-scheduler: inverse_sqrt ...@@ -6,10 +6,9 @@ lr-scheduler: inverse_sqrt
warmup-init-lr: 1e-7 warmup-init-lr: 1e-7
warmup-updates: 10000 warmup-updates: 10000
lr: 2e-3 lr: 2e-3
adam_betas: (0.9,0.98) adam-betas: (0.9,0.98)
criterion: label_smoothed_cross_entropy_with_ctc criterion: label_smoothed_cross_entropy_with_ctc
label_smoothing: 0.1
encoder-embed-norm: True encoder-embed-norm: True
encoder-no-scale-embedding: True encoder-no-scale-embedding: True
......
...@@ -6,10 +6,9 @@ lr-scheduler: inverse_sqrt ...@@ -6,10 +6,9 @@ lr-scheduler: inverse_sqrt
warmup-init-lr: 1e-7 warmup-init-lr: 1e-7
warmup-updates: 10000 warmup-updates: 10000
lr: 2e-3 lr: 2e-3
adam_betas: (0.9,0.98) adam-betas: (0.9,0.98)
criterion: label_smoothed_cross_entropy_with_ctc criterion: label_smoothed_cross_entropy_with_ctc
label_smoothing: 0.1
encoder-embed-norm: True encoder-embed-norm: True
encoder-no-scale-embedding: True encoder-no-scale-embedding: True
......
...@@ -6,10 +6,9 @@ lr-scheduler: inverse_sqrt ...@@ -6,10 +6,9 @@ lr-scheduler: inverse_sqrt
warmup-init-lr: 1e-7 warmup-init-lr: 1e-7
warmup-updates: 10000 warmup-updates: 10000
lr: 2e-3 lr: 2e-3
adam_betas: (0.9,0.98) adam-betas: (0.9,0.98)
criterion: label_smoothed_cross_entropy_with_ctc criterion: label_smoothed_cross_entropy_with_ctc
label_smoothing: 0.1
encoder-embed-norm: True encoder-embed-norm: True
encoder-no-scale-embedding: True encoder-no-scale-embedding: True
......
...@@ -6,10 +6,9 @@ lr-scheduler: inverse_sqrt ...@@ -6,10 +6,9 @@ lr-scheduler: inverse_sqrt
warmup-init-lr: 1e-7 warmup-init-lr: 1e-7
warmup-updates: 10000 warmup-updates: 10000
lr: 2e-3 lr: 2e-3
adam_betas: (0.9,0.98) adam-betas: (0.9,0.98)
criterion: label_smoothed_cross_entropy_with_ctc criterion: label_smoothed_cross_entropy_with_ctc
label_smoothing: 0.1
encoder-normalize-before: True encoder-normalize-before: True
decoder-normalize-before: True decoder-normalize-before: True
......
...@@ -6,10 +6,9 @@ lr-scheduler: inverse_sqrt ...@@ -6,10 +6,9 @@ lr-scheduler: inverse_sqrt
warmup-init-lr: 1e-7 warmup-init-lr: 1e-7
warmup-updates: 10000 warmup-updates: 10000
lr: 2e-3 lr: 2e-3
adam_betas: (0.9,0.98) adam-betas: (0.9,0.98)
criterion: label_smoothed_cross_entropy_with_ctc criterion: label_smoothed_cross_entropy_with_ctc
label_smoothing: 0.1
encoder-embed-norm: True encoder-embed-norm: True
encoder-no-scale-embedding: True encoder-no-scale-embedding: True
......
...@@ -23,5 +23,6 @@ num-workers: 8 ...@@ -23,5 +23,6 @@ num-workers: 8
no-progress-bar: True no-progress-bar: True
log-interval: 100 log-interval: 100
seed: 1 seed: 1
label-smoothing: 0.1
report-accuracy: True report-accuracy: True
skip-invalid-size-inputs-valid-test: True skip-invalid-size-inputs-valid-test: True
...@@ -6,10 +6,9 @@ lr-scheduler: inverse_sqrt ...@@ -6,10 +6,9 @@ lr-scheduler: inverse_sqrt
warmup-init-lr: 1e-7 warmup-init-lr: 1e-7
warmup-updates: 10000 warmup-updates: 10000
lr: 2e-3 lr: 2e-3
adam_betas: (0.9,0.98) adam-betas: (0.9,0.98)
criterion: label_smoothed_cross_entropy_with_ctc criterion: label_smoothed_cross_entropy_with_ctc
label_smoothing: 0.1
encoder-embed-norm: True encoder-embed-norm: True
encoder-no-scale-embedding: True encoder-no-scale-embedding: True
......
...@@ -9,10 +9,9 @@ lr-scheduler: inverse_sqrt ...@@ -9,10 +9,9 @@ lr-scheduler: inverse_sqrt
warmup-init-lr: 1e-7 warmup-init-lr: 1e-7
warmup-updates: 10000 warmup-updates: 10000
lr: 2e-3 lr: 2e-3
adam_betas: (0.9,0.98) adam-betas: (0.9,0.98)
criterion: label_smoothed_cross_entropy_with_ctc criterion: label_smoothed_cross_entropy_with_ctc
label_smoothing: 0.1
dropout: 0.1 dropout: 0.1
activation-fn: relu activation-fn: relu
......
...@@ -22,10 +22,9 @@ lr-scheduler: inverse_sqrt ...@@ -22,10 +22,9 @@ lr-scheduler: inverse_sqrt
warmup-init-lr: 1e-7 warmup-init-lr: 1e-7
warmup-updates: 10000 warmup-updates: 10000
lr: 2e-3 lr: 2e-3
adam_betas: (0.9,0.98) adam-betas: (0.9,0.98)
criterion: label_smoothed_cross_entropy_with_ctc criterion: label_smoothed_cross_entropy_with_ctc
label_smoothing: 0.1
dropout: 0.1 dropout: 0.1
activation-fn: relu activation-fn: relu
......
...@@ -22,10 +22,9 @@ lr-scheduler: inverse_sqrt ...@@ -22,10 +22,9 @@ lr-scheduler: inverse_sqrt
warmup-init-lr: 1e-7 warmup-init-lr: 1e-7
warmup-updates: 10000 warmup-updates: 10000
lr: 2e-3 lr: 2e-3
adam_betas: (0.9,0.98) adam-betas: (0.9,0.98)
criterion: label_smoothed_cross_entropy_with_ctc criterion: label_smoothed_cross_entropy_with_ctc
label_smoothing: 0.1
dropout: 0.1 dropout: 0.1
activation-fn: relu activation-fn: relu
......
arch: pdss2t_transformer_s_8 arch: pdss2t_transformer_s_8
#pds-ctc: 0_1_1_0
#intermedia-adapter: league
#intermedia-ctc-weight: 0.1
#encoder-attention-type: reduced
#pds-attn-ds-ratios: 4_2_2_1
#attention-reduced-method: pool
#attention-reduced-q: True
encoder-embed-dim: 256 encoder-embed-dim: 256
pds-stages: 4 pds-stages: 4
#ctc-layer: 12
pds-layers: 4_2_2_4 pds-layers: 4_2_2_4
pds-ratios: 2_2_1_2 pds-ratios: 2_2_1_2
pds-fusion: True pds-fusion: True
...@@ -31,10 +21,9 @@ lr-scheduler: inverse_sqrt ...@@ -31,10 +21,9 @@ lr-scheduler: inverse_sqrt
warmup-init-lr: 1e-7 warmup-init-lr: 1e-7
warmup-updates: 10000 warmup-updates: 10000
lr: 2e-3 lr: 2e-3
adam_betas: (0.9,0.98) adam-betas: (0.9,0.98)
criterion: label_smoothed_cross_entropy_with_ctc criterion: label_smoothed_cross_entropy_with_ctc
label_smoothing: 0.1
dropout: 0.1 dropout: 0.1
activation-fn: relu activation-fn: relu
......
...@@ -32,10 +32,9 @@ lr-scheduler: inverse_sqrt ...@@ -32,10 +32,9 @@ lr-scheduler: inverse_sqrt
warmup-init-lr: 1e-7 warmup-init-lr: 1e-7
warmup-updates: 10000 warmup-updates: 10000
lr: 2e-3 lr: 2e-3
adam_betas: (0.9,0.98) adam-betas: (0.9,0.98)
criterion: label_smoothed_cross_entropy_with_ctc criterion: label_smoothed_cross_entropy_with_ctc
label_smoothing: 0.1
dropout: 0.1 dropout: 0.1
activation-fn: relu activation-fn: relu
......
...@@ -32,10 +32,9 @@ lr-scheduler: inverse_sqrt ...@@ -32,10 +32,9 @@ lr-scheduler: inverse_sqrt
warmup-init-lr: 1e-7 warmup-init-lr: 1e-7
warmup-updates: 10000 warmup-updates: 10000
lr: 2e-3 lr: 2e-3
adam_betas: (0.9,0.98) adam-betas: (0.9,0.98)
criterion: label_smoothed_cross_entropy_with_ctc criterion: label_smoothed_cross_entropy_with_ctc
label_smoothing: 0.1
dropout: 0.1 dropout: 0.1
activation-fn: relu activation-fn: relu
......
...@@ -22,10 +22,9 @@ lr-scheduler: inverse_sqrt ...@@ -22,10 +22,9 @@ lr-scheduler: inverse_sqrt
warmup-init-lr: 1e-7 warmup-init-lr: 1e-7
warmup-updates: 10000 warmup-updates: 10000
lr: 2e-3 lr: 2e-3
adam_betas: (0.9,0.98) adam-betas: (0.9,0.98)
criterion: label_smoothed_cross_entropy_with_ctc criterion: label_smoothed_cross_entropy_with_ctc
label_smoothing: 0.1
dropout: 0.15 dropout: 0.15
activation-fn: relu activation-fn: relu
......
...@@ -5,7 +5,7 @@ lr-scheduler: inverse_sqrt ...@@ -5,7 +5,7 @@ lr-scheduler: inverse_sqrt
warmup-init-lr: 1e-7 warmup-init-lr: 1e-7
warmup-updates: 10000 warmup-updates: 10000
lr: 2e-3 lr: 2e-3
adam_betas: (0.9,0.98) adam-betas: (0.9,0.98)
criterion: ctc criterion: ctc
zero_infinity: True zero_infinity: True
......
...@@ -32,7 +32,7 @@ lr-scheduler: inverse_sqrt ...@@ -32,7 +32,7 @@ lr-scheduler: inverse_sqrt
warmup-init-lr: 1e-7 warmup-init-lr: 1e-7
warmup-updates: 10000 warmup-updates: 10000
lr: 2e-3 lr: 2e-3
adam_betas: (0.9,0.98) adam-betas: (0.9,0.98)
criterion: ctc criterion: ctc
post-process: sentencepiece post-process: sentencepiece
......
...@@ -30,7 +30,7 @@ lr-scheduler: inverse_sqrt ...@@ -30,7 +30,7 @@ lr-scheduler: inverse_sqrt
warmup-init-lr: 1e-7 warmup-init-lr: 1e-7
warmup-updates: 10000 warmup-updates: 10000
lr: 0.0015 lr: 0.0015
adam_betas: (0.9,0.98) adam-betas: (0.9,0.98)
criterion: ctc criterion: ctc
post-process: sentencepiece post-process: sentencepiece
......
...@@ -34,7 +34,7 @@ lr-scheduler: inverse_sqrt ...@@ -34,7 +34,7 @@ lr-scheduler: inverse_sqrt
warmup-init-lr: 1e-7 warmup-init-lr: 1e-7
warmup-updates: 10000 warmup-updates: 10000
lr: 0.002 lr: 0.002
adam_betas: (0.9,0.98) adam-betas: (0.9,0.98)
criterion: ctc criterion: ctc
post-process: sentencepiece post-process: sentencepiece
......
...@@ -34,7 +34,7 @@ lr-scheduler: inverse_sqrt ...@@ -34,7 +34,7 @@ lr-scheduler: inverse_sqrt
warmup-init-lr: 1e-7 warmup-init-lr: 1e-7
warmup-updates: 10000 warmup-updates: 10000
lr: 0.002 lr: 0.002
adam_betas: (0.9,0.98) adam-betas: (0.9,0.98)
criterion: ctc criterion: ctc
post-process: sentencepiece post-process: sentencepiece
......
...@@ -30,7 +30,7 @@ lr-scheduler: inverse_sqrt ...@@ -30,7 +30,7 @@ lr-scheduler: inverse_sqrt
warmup-init-lr: 1e-7 warmup-init-lr: 1e-7
warmup-updates: 10000 warmup-updates: 10000
lr: 0.0015 lr: 0.0015
adam_betas: (0.9,0.98) adam-betas: (0.9,0.98)
criterion: ctc criterion: ctc
post-process: sentencepiece post-process: sentencepiece
......
...@@ -7,10 +7,9 @@ lr-scheduler: inverse_sqrt ...@@ -7,10 +7,9 @@ lr-scheduler: inverse_sqrt
warmup-init-lr: 1e-7 warmup-init-lr: 1e-7
warmup-updates: 8000 warmup-updates: 8000
lr: 1e-3 lr: 1e-3
adam_betas: (0.9,0.997) adam-betas: (0.9,0.997)
criterion: label_smoothed_cross_entropy criterion: label_smoothed_cross_entropy
label_smoothing: 0.1
dropout: 0.1 dropout: 0.1
attention-dropout: 0.1 attention-dropout: 0.1
......
...@@ -25,5 +25,6 @@ num-workers: 8 ...@@ -25,5 +25,6 @@ num-workers: 8
no-progress-bar: True no-progress-bar: True
log-interval: 100 log-interval: 100
seed: 1 seed: 1
label-smoothing: 0.1
report-accuracy: True report-accuracy: True
skip-invalid-size-inputs-valid-test: True skip-invalid-size-inputs-valid-test: True
\ No newline at end of file
...@@ -6,10 +6,9 @@ lr-scheduler: inverse_sqrt ...@@ -6,10 +6,9 @@ lr-scheduler: inverse_sqrt
warmup-init-lr: 1e-7 warmup-init-lr: 1e-7
warmup-updates: 8000 warmup-updates: 8000
lr: 1e-3 lr: 1e-3
adam_betas: (0.9,0.997) adam-betas: (0.9,0.997)
criterion: label_smoothed_cross_entropy_with_ctc criterion: label_smoothed_cross_entropy_with_ctc
label_smoothing: 0.1
dropout: 0.1 dropout: 0.1
attention-dropout: 0.1 attention-dropout: 0.1
......
...@@ -7,10 +7,9 @@ lr-scheduler: inverse_sqrt ...@@ -7,10 +7,9 @@ lr-scheduler: inverse_sqrt
warmup-init-lr: 1e-7 warmup-init-lr: 1e-7
warmup-updates: 8000 warmup-updates: 8000
lr: 1e-3 lr: 1e-3
adam_betas: (0.9,0.997) adam-betas: (0.9,0.997)
criterion: label_smoothed_cross_entropy criterion: label_smoothed_cross_entropy
label_smoothing: 0.1
dropout: 0.1 dropout: 0.1
attention-dropout: 0.1 attention-dropout: 0.1
......
...@@ -6,10 +6,9 @@ lr-scheduler: inverse_sqrt ...@@ -6,10 +6,9 @@ lr-scheduler: inverse_sqrt
warmup-init-lr: 1e-7 warmup-init-lr: 1e-7
warmup-updates: 10000 warmup-updates: 10000
lr: 2e-3 lr: 2e-3
adam_betas: (0.9,0.98) adam-betas: (0.9,0.98)
criterion: label_smoothed_cross_entropy_with_ctc criterion: label_smoothed_cross_entropy_with_ctc
label_smoothing: 0.1
encoder-normalize-before: True encoder-normalize-before: True
decoder-normalize-before: True decoder-normalize-before: True
......
...@@ -6,10 +6,9 @@ lr-scheduler: inverse_sqrt ...@@ -6,10 +6,9 @@ lr-scheduler: inverse_sqrt
warmup-init-lr: 1e-7 warmup-init-lr: 1e-7
warmup-updates: 10000 warmup-updates: 10000
lr: 1e-3 lr: 1e-3
adam_betas: (0.9,0.98) adam-betas: (0.9,0.98)
criterion: label_smoothed_cross_entropy_with_ctc criterion: label_smoothed_cross_entropy_with_ctc
label_smoothing: 0.1
encoder-normalize-before: True encoder-normalize-before: True
decoder-normalize-before: True decoder-normalize-before: True
......
...@@ -6,10 +6,9 @@ lr-scheduler: inverse_sqrt ...@@ -6,10 +6,9 @@ lr-scheduler: inverse_sqrt
warmup-init-lr: 1e-7 warmup-init-lr: 1e-7
warmup-updates: 10000 warmup-updates: 10000
lr: 2e-3 lr: 2e-3
adam_betas: (0.9,0.98) adam-betas: (0.9,0.98)
criterion: label_smoothed_cross_entropy_with_ctc criterion: label_smoothed_cross_entropy_with_ctc
label_smoothing: 0.1
encoder-embed-norm: True encoder-embed-norm: True
encoder-no-scale-embedding: True encoder-no-scale-embedding: True
......
Markdown 格式
0%
您添加了 0 到此讨论。请谨慎行事。
请先完成此评论的编辑!
注册 或者 后发表评论