Commit 533dac9c by xuchen

accu update

parent f1dc8723
...@@ -6,10 +6,9 @@ lr-scheduler: inverse_sqrt ...@@ -6,10 +6,9 @@ lr-scheduler: inverse_sqrt
warmup-init-lr: 1e-7 warmup-init-lr: 1e-7
warmup-updates: 10000 warmup-updates: 10000
lr: 2e-3 lr: 2e-3
adam_betas: (0.9,0.98) adam-betas: (0.9,0.98)
criterion: label_smoothed_cross_entropy_with_ctc criterion: label_smoothed_cross_entropy_with_ctc
label_smoothing: 0.1
subsampling-type: conv1d subsampling-type: conv1d
subsampling-layers: 2 subsampling-layers: 2
......
...@@ -6,10 +6,9 @@ lr-scheduler: inverse_sqrt ...@@ -6,10 +6,9 @@ lr-scheduler: inverse_sqrt
warmup-init-lr: 1e-7 warmup-init-lr: 1e-7
warmup-updates: 10000 warmup-updates: 10000
lr: 2e-3 lr: 2e-3
adam_betas: (0.9,0.98) adam-betas: (0.9,0.98)
criterion: label_smoothed_cross_entropy_with_ctc criterion: label_smoothed_cross_entropy_with_ctc
label_smoothing: 0.1
subsampling-type: conv1d subsampling-type: conv1d
subsampling-layers: 2 subsampling-layers: 2
......
...@@ -25,5 +25,7 @@ num-workers: 8 ...@@ -25,5 +25,7 @@ num-workers: 8
no-progress-bar: True no-progress-bar: True
log-interval: 100 log-interval: 100
seed: 1 seed: 1
label-smoothing: 0.1
report-accuracy: True report-accuracy: True
skip-invalid-size-inputs-valid-test: True skip-invalid-size-inputs-valid-test: True
\ No newline at end of file
...@@ -6,10 +6,9 @@ lr-scheduler: inverse_sqrt ...@@ -6,10 +6,9 @@ lr-scheduler: inverse_sqrt
warmup-init-lr: 1e-7 warmup-init-lr: 1e-7
warmup-updates: 10000 warmup-updates: 10000
lr: 0.0014 lr: 0.0014
adam_betas: (0.9,0.98) adam-betas: (0.9,0.98)
criterion: label_smoothed_cross_entropy_with_ctc criterion: label_smoothed_cross_entropy_with_ctc
label_smoothing: 0.1
subsampling-type: conv1d subsampling-type: conv1d
subsampling-layers: 2 subsampling-layers: 2
......
...@@ -6,10 +6,9 @@ lr-scheduler: inverse_sqrt ...@@ -6,10 +6,9 @@ lr-scheduler: inverse_sqrt
warmup-init-lr: 1e-7 warmup-init-lr: 1e-7
warmup-updates: 10000 warmup-updates: 10000
lr: 1e-3 lr: 1e-3
adam_betas: (0.9,0.98) adam-betas: (0.9,0.98)
criterion: label_smoothed_cross_entropy_with_ctc criterion: label_smoothed_cross_entropy_with_ctc
label_smoothing: 0.1
subsampling-type: conv2d subsampling-type: conv2d
subsampling-layers: 2 subsampling-layers: 2
......
...@@ -7,10 +7,9 @@ lr-scheduler: inverse_sqrt ...@@ -7,10 +7,9 @@ lr-scheduler: inverse_sqrt
warmup-init-lr: 1e-7 warmup-init-lr: 1e-7
warmup-updates: 10000 warmup-updates: 10000
lr: 2e-3 lr: 2e-3
adam_betas: (0.9,0.98) adam-betas: (0.9,0.98)
criterion: label_smoothed_cross_entropy_with_ctc criterion: label_smoothed_cross_entropy_with_ctc
label_smoothing: 0.1
dropout: 0.1 dropout: 0.1
activation-fn: relu activation-fn: relu
......
...@@ -23,10 +23,9 @@ lr-scheduler: inverse_sqrt ...@@ -23,10 +23,9 @@ lr-scheduler: inverse_sqrt
warmup-init-lr: 1e-7 warmup-init-lr: 1e-7
warmup-updates: 10000 warmup-updates: 10000
lr: 2e-3 lr: 2e-3
adam_betas: (0.9,0.98) adam-betas: (0.9,0.98)
criterion: label_smoothed_cross_entropy_with_ctc criterion: label_smoothed_cross_entropy_with_ctc
label_smoothing: 0.1
dropout: 0.1 dropout: 0.1
activation-fn: relu activation-fn: relu
......
...@@ -23,10 +23,9 @@ lr-scheduler: inverse_sqrt ...@@ -23,10 +23,9 @@ lr-scheduler: inverse_sqrt
warmup-init-lr: 1e-7 warmup-init-lr: 1e-7
warmup-updates: 10000 warmup-updates: 10000
lr: 2e-3 lr: 2e-3
adam_betas: (0.9,0.98) adam-betas: (0.9,0.98)
criterion: label_smoothed_cross_entropy_with_ctc criterion: label_smoothed_cross_entropy_with_ctc
label_smoothing: 0.1
dropout: 0.1 dropout: 0.1
activation-fn: relu activation-fn: relu
......
...@@ -23,10 +23,9 @@ lr-scheduler: inverse_sqrt ...@@ -23,10 +23,9 @@ lr-scheduler: inverse_sqrt
warmup-init-lr: 1e-7 warmup-init-lr: 1e-7
warmup-updates: 10000 warmup-updates: 10000
lr: 2e-3 lr: 2e-3
adam_betas: (0.9,0.98) adam-betas: (0.9,0.98)
criterion: label_smoothed_cross_entropy_with_ctc criterion: label_smoothed_cross_entropy_with_ctc
label_smoothing: 0.1
dropout: 0.1 dropout: 0.1
activation-fn: relu activation-fn: relu
......
...@@ -23,10 +23,9 @@ lr-scheduler: inverse_sqrt ...@@ -23,10 +23,9 @@ lr-scheduler: inverse_sqrt
warmup-init-lr: 1e-7 warmup-init-lr: 1e-7
warmup-updates: 10000 warmup-updates: 10000
lr: 0.0014 lr: 0.0014
adam_betas: (0.9,0.98) adam-betas: (0.9,0.98)
criterion: label_smoothed_cross_entropy_with_ctc criterion: label_smoothed_cross_entropy_with_ctc
label_smoothing: 0.1
dropout: 0.15 dropout: 0.15
activation-fn: relu activation-fn: relu
......
...@@ -5,7 +5,7 @@ lr-scheduler: inverse_sqrt ...@@ -5,7 +5,7 @@ lr-scheduler: inverse_sqrt
warmup-init-lr: 1e-7 warmup-init-lr: 1e-7
warmup-updates: 10000 warmup-updates: 10000
lr: 2e-3 lr: 2e-3
adam_betas: (0.9,0.98) adam-betas: (0.9,0.98)
criterion: ctc criterion: ctc
zero_infinity: True zero_infinity: True
......
...@@ -40,7 +40,7 @@ task=speech_to_text ...@@ -40,7 +40,7 @@ task=speech_to_text
vocab_type=unigram vocab_type=unigram
vocab_type=char vocab_type=char
vocab_size=10000 vocab_size=10000
speed_perturb=0 speed_perturb=1
lcrm=0 lcrm=0
tokenizer=0 tokenizer=0
use_raw_audio=0 use_raw_audio=0
......
...@@ -6,10 +6,9 @@ lr-scheduler: inverse_sqrt ...@@ -6,10 +6,9 @@ lr-scheduler: inverse_sqrt
warmup-init-lr: 1e-7 warmup-init-lr: 1e-7
warmup-updates: 10000 warmup-updates: 10000
lr: 2e-3 lr: 2e-3
adam_betas: (0.9,0.98) adam-betas: (0.9,0.98)
criterion: label_smoothed_cross_entropy_with_ctc criterion: label_smoothed_cross_entropy_with_ctc
label_smoothing: 0.1
encoder-embed-norm: True encoder-embed-norm: True
encoder-no-scale-embedding: True encoder-no-scale-embedding: True
......
...@@ -23,5 +23,6 @@ num-workers: 8 ...@@ -23,5 +23,6 @@ num-workers: 8
no-progress-bar: True no-progress-bar: True
log-interval: 100 log-interval: 100
seed: 1 seed: 1
label-smoothing: 0.1
report-accuracy: True report-accuracy: True
skip-invalid-size-inputs-valid-test: True skip-invalid-size-inputs-valid-test: True
...@@ -6,10 +6,9 @@ lr-scheduler: inverse_sqrt ...@@ -6,10 +6,9 @@ lr-scheduler: inverse_sqrt
warmup-init-lr: 1e-7 warmup-init-lr: 1e-7
warmup-updates: 10000 warmup-updates: 10000
lr: 2e-3 lr: 2e-3
adam_betas: (0.9,0.98) adam-betas: (0.9,0.98)
criterion: label_smoothed_cross_entropy_with_ctc criterion: label_smoothed_cross_entropy_with_ctc
label_smoothing: 0.1
encoder-embed-norm: True encoder-embed-norm: True
encoder-no-scale-embedding: True encoder-no-scale-embedding: True
......
...@@ -9,10 +9,9 @@ lr-scheduler: inverse_sqrt ...@@ -9,10 +9,9 @@ lr-scheduler: inverse_sqrt
warmup-init-lr: 1e-7 warmup-init-lr: 1e-7
warmup-updates: 10000 warmup-updates: 10000
lr: 2e-3 lr: 2e-3
adam_betas: (0.9,0.98) adam-betas: (0.9,0.98)
criterion: label_smoothed_cross_entropy_with_ctc criterion: label_smoothed_cross_entropy_with_ctc
label_smoothing: 0.1
dropout: 0.1 dropout: 0.1
activation-fn: relu activation-fn: relu
......
...@@ -22,10 +22,9 @@ lr-scheduler: inverse_sqrt ...@@ -22,10 +22,9 @@ lr-scheduler: inverse_sqrt
warmup-init-lr: 1e-7 warmup-init-lr: 1e-7
warmup-updates: 10000 warmup-updates: 10000
lr: 2e-3 lr: 2e-3
adam_betas: (0.9,0.98) adam-betas: (0.9,0.98)
criterion: label_smoothed_cross_entropy_with_ctc criterion: label_smoothed_cross_entropy_with_ctc
label_smoothing: 0.1
dropout: 0.1 dropout: 0.1
activation-fn: relu activation-fn: relu
......
...@@ -22,10 +22,9 @@ lr-scheduler: inverse_sqrt ...@@ -22,10 +22,9 @@ lr-scheduler: inverse_sqrt
warmup-init-lr: 1e-7 warmup-init-lr: 1e-7
warmup-updates: 10000 warmup-updates: 10000
lr: 2e-3 lr: 2e-3
adam_betas: (0.9,0.98) adam-betas: (0.9,0.98)
criterion: label_smoothed_cross_entropy_with_ctc criterion: label_smoothed_cross_entropy_with_ctc
label_smoothing: 0.1
dropout: 0.1 dropout: 0.1
activation-fn: relu activation-fn: relu
......
...@@ -31,10 +31,9 @@ lr-scheduler: inverse_sqrt ...@@ -31,10 +31,9 @@ lr-scheduler: inverse_sqrt
warmup-init-lr: 1e-7 warmup-init-lr: 1e-7
warmup-updates: 10000 warmup-updates: 10000
lr: 2e-3 lr: 2e-3
adam_betas: (0.9,0.98) adam-betas: (0.9,0.98)
criterion: label_smoothed_cross_entropy_with_ctc criterion: label_smoothed_cross_entropy_with_ctc
label_smoothing: 0.1
dropout: 0.1 dropout: 0.1
activation-fn: relu activation-fn: relu
......
...@@ -32,10 +32,9 @@ lr-scheduler: inverse_sqrt ...@@ -32,10 +32,9 @@ lr-scheduler: inverse_sqrt
warmup-init-lr: 1e-7 warmup-init-lr: 1e-7
warmup-updates: 10000 warmup-updates: 10000
lr: 2e-3 lr: 2e-3
adam_betas: (0.9,0.98) adam-betas: (0.9,0.98)
criterion: label_smoothed_cross_entropy_with_ctc criterion: label_smoothed_cross_entropy_with_ctc
label_smoothing: 0.1
dropout: 0.1 dropout: 0.1
activation-fn: relu activation-fn: relu
......
...@@ -32,10 +32,9 @@ lr-scheduler: inverse_sqrt ...@@ -32,10 +32,9 @@ lr-scheduler: inverse_sqrt
warmup-init-lr: 1e-7 warmup-init-lr: 1e-7
warmup-updates: 10000 warmup-updates: 10000
lr: 2e-3 lr: 2e-3
adam_betas: (0.9,0.98) adam-betas: (0.9,0.98)
criterion: label_smoothed_cross_entropy_with_ctc criterion: label_smoothed_cross_entropy_with_ctc
label_smoothing: 0.1
dropout: 0.1 dropout: 0.1
activation-fn: relu activation-fn: relu
......
...@@ -22,10 +22,9 @@ lr-scheduler: inverse_sqrt ...@@ -22,10 +22,9 @@ lr-scheduler: inverse_sqrt
warmup-init-lr: 1e-7 warmup-init-lr: 1e-7
warmup-updates: 10000 warmup-updates: 10000
lr: 2e-3 lr: 2e-3
adam_betas: (0.9,0.98) adam-betas: (0.9,0.98)
criterion: label_smoothed_cross_entropy_with_ctc criterion: label_smoothed_cross_entropy_with_ctc
label_smoothing: 0.1
dropout: 0.15 dropout: 0.15
activation-fn: relu activation-fn: relu
......
...@@ -5,7 +5,7 @@ lr-scheduler: inverse_sqrt ...@@ -5,7 +5,7 @@ lr-scheduler: inverse_sqrt
warmup-init-lr: 1e-7 warmup-init-lr: 1e-7
warmup-updates: 10000 warmup-updates: 10000
lr: 2e-3 lr: 2e-3
adam_betas: (0.9,0.98) adam-betas: (0.9,0.98)
criterion: ctc criterion: ctc
zero_infinity: True zero_infinity: True
......
...@@ -32,7 +32,7 @@ lr-scheduler: inverse_sqrt ...@@ -32,7 +32,7 @@ lr-scheduler: inverse_sqrt
warmup-init-lr: 1e-7 warmup-init-lr: 1e-7
warmup-updates: 10000 warmup-updates: 10000
lr: 2e-3 lr: 2e-3
adam_betas: (0.9,0.98) adam-betas: (0.9,0.98)
criterion: ctc criterion: ctc
post-process: sentencepiece post-process: sentencepiece
......
...@@ -30,7 +30,7 @@ lr-scheduler: inverse_sqrt ...@@ -30,7 +30,7 @@ lr-scheduler: inverse_sqrt
warmup-init-lr: 1e-7 warmup-init-lr: 1e-7
warmup-updates: 10000 warmup-updates: 10000
lr: 0.0015 lr: 0.0015
adam_betas: (0.9,0.98) adam-betas: (0.9,0.98)
criterion: ctc criterion: ctc
post-process: sentencepiece post-process: sentencepiece
......
...@@ -34,7 +34,7 @@ lr-scheduler: inverse_sqrt ...@@ -34,7 +34,7 @@ lr-scheduler: inverse_sqrt
warmup-init-lr: 1e-7 warmup-init-lr: 1e-7
warmup-updates: 10000 warmup-updates: 10000
lr: 0.002 lr: 0.002
adam_betas: (0.9,0.98) adam-betas: (0.9,0.98)
criterion: ctc criterion: ctc
post-process: sentencepiece post-process: sentencepiece
......
...@@ -34,7 +34,7 @@ lr-scheduler: inverse_sqrt ...@@ -34,7 +34,7 @@ lr-scheduler: inverse_sqrt
warmup-init-lr: 1e-7 warmup-init-lr: 1e-7
warmup-updates: 10000 warmup-updates: 10000
lr: 0.002 lr: 0.002
adam_betas: (0.9,0.98) adam-betas: (0.9,0.98)
criterion: ctc criterion: ctc
post-process: sentencepiece post-process: sentencepiece
......
...@@ -30,7 +30,7 @@ lr-scheduler: inverse_sqrt ...@@ -30,7 +30,7 @@ lr-scheduler: inverse_sqrt
warmup-init-lr: 1e-7 warmup-init-lr: 1e-7
warmup-updates: 10000 warmup-updates: 10000
lr: 0.0015 lr: 0.0015
adam_betas: (0.9,0.98) adam-betas: (0.9,0.98)
criterion: ctc criterion: ctc
post-process: sentencepiece post-process: sentencepiece
......
...@@ -7,10 +7,9 @@ lr-scheduler: inverse_sqrt ...@@ -7,10 +7,9 @@ lr-scheduler: inverse_sqrt
warmup-init-lr: 1e-7 warmup-init-lr: 1e-7
warmup-updates: 8000 warmup-updates: 8000
lr: 1e-3 lr: 1e-3
adam_betas: (0.9,0.997) adam-betas: (0.9,0.997)
criterion: label_smoothed_cross_entropy criterion: label_smoothed_cross_entropy
label_smoothing: 0.1
dropout: 0.1 dropout: 0.1
attention-dropout: 0.1 attention-dropout: 0.1
......
...@@ -25,5 +25,6 @@ num-workers: 8 ...@@ -25,5 +25,6 @@ num-workers: 8
no-progress-bar: True no-progress-bar: True
log-interval: 100 log-interval: 100
seed: 1 seed: 1
label-smoothing: 0.1
report-accuracy: True report-accuracy: True
skip-invalid-size-inputs-valid-test: True skip-invalid-size-inputs-valid-test: True
\ No newline at end of file
...@@ -6,10 +6,9 @@ lr-scheduler: inverse_sqrt ...@@ -6,10 +6,9 @@ lr-scheduler: inverse_sqrt
warmup-init-lr: 1e-7 warmup-init-lr: 1e-7
warmup-updates: 8000 warmup-updates: 8000
lr: 1e-3 lr: 1e-3
adam_betas: (0.9,0.997) adam-betas: (0.9,0.997)
criterion: label_smoothed_cross_entropy_with_ctc criterion: label_smoothed_cross_entropy_with_ctc
label_smoothing: 0.1
dropout: 0.1 dropout: 0.1
attention-dropout: 0.1 attention-dropout: 0.1
......
...@@ -7,10 +7,9 @@ lr-scheduler: inverse_sqrt ...@@ -7,10 +7,9 @@ lr-scheduler: inverse_sqrt
warmup-init-lr: 1e-7 warmup-init-lr: 1e-7
warmup-updates: 8000 warmup-updates: 8000
lr: 1e-3 lr: 1e-3
adam_betas: (0.9,0.997) adam-betas: (0.9,0.997)
criterion: label_smoothed_cross_entropy criterion: label_smoothed_cross_entropy
label_smoothing: 0.1
dropout: 0.1 dropout: 0.1
attention-dropout: 0.1 attention-dropout: 0.1
......
...@@ -6,10 +6,9 @@ lr-scheduler: inverse_sqrt ...@@ -6,10 +6,9 @@ lr-scheduler: inverse_sqrt
warmup-init-lr: 1e-7 warmup-init-lr: 1e-7
warmup-updates: 10000 warmup-updates: 10000
lr: 2e-3 lr: 2e-3
adam_betas: (0.9,0.98) adam-betas: (0.9,0.98)
criterion: label_smoothed_cross_entropy_with_ctc criterion: label_smoothed_cross_entropy_with_ctc
label_smoothing: 0.1
encoder-normalize-before: True encoder-normalize-before: True
decoder-normalize-before: True decoder-normalize-before: True
......
...@@ -6,10 +6,9 @@ lr-scheduler: inverse_sqrt ...@@ -6,10 +6,9 @@ lr-scheduler: inverse_sqrt
warmup-init-lr: 1e-7 warmup-init-lr: 1e-7
warmup-updates: 10000 warmup-updates: 10000
lr: 1e-3 lr: 1e-3
adam_betas: (0.9,0.98) adam-betas: (0.9,0.98)
criterion: label_smoothed_cross_entropy_with_ctc criterion: label_smoothed_cross_entropy_with_ctc
label_smoothing: 0.1
encoder-normalize-before: True encoder-normalize-before: True
decoder-normalize-before: True decoder-normalize-before: True
......
...@@ -6,10 +6,9 @@ lr-scheduler: inverse_sqrt ...@@ -6,10 +6,9 @@ lr-scheduler: inverse_sqrt
warmup-init-lr: 1e-7 warmup-init-lr: 1e-7
warmup-updates: 10000 warmup-updates: 10000
lr: 1e-3 lr: 1e-3
adam_betas: (0.9,0.98) adam-betas: (0.9,0.98)
criterion: label_smoothed_cross_entropy_with_ctc criterion: label_smoothed_cross_entropy_with_ctc
label_smoothing: 0.1
encoder-normalize-before: True encoder-normalize-before: True
decoder-normalize-before: True decoder-normalize-before: True
......
...@@ -6,10 +6,9 @@ lr-scheduler: inverse_sqrt ...@@ -6,10 +6,9 @@ lr-scheduler: inverse_sqrt
warmup-init-lr: 1e-7 warmup-init-lr: 1e-7
warmup-updates: 10000 warmup-updates: 10000
lr: 2e-3 lr: 2e-3
adam_betas: (0.9,0.98) adam-betas: (0.9,0.98)
criterion: label_smoothed_cross_entropy_with_ctc criterion: label_smoothed_cross_entropy_with_ctc
label_smoothing: 0.1
encoder-embed-norm: True encoder-embed-norm: True
encoder-no-scale-embedding: True encoder-no-scale-embedding: True
......
...@@ -25,5 +25,6 @@ num-workers: 8 ...@@ -25,5 +25,6 @@ num-workers: 8
no-progress-bar: True no-progress-bar: True
log-interval: 100 log-interval: 100
seed: 1 seed: 1
label-smoothing: 0.1
report-accuracy: True report-accuracy: True
skip-invalid-size-inputs-valid-test: True skip-invalid-size-inputs-valid-test: True
...@@ -6,10 +6,9 @@ lr-scheduler: inverse_sqrt ...@@ -6,10 +6,9 @@ lr-scheduler: inverse_sqrt
warmup-init-lr: 1e-7 warmup-init-lr: 1e-7
warmup-updates: 10000 warmup-updates: 10000
lr: 1e-3 lr: 1e-3
adam_betas: (0.9,0.98) adam-betas: (0.9,0.98)
criterion: label_smoothed_cross_entropy_with_ctc criterion: label_smoothed_cross_entropy_with_ctc
label_smoothing: 0.1
encoder-embed-norm: True encoder-embed-norm: True
encoder-no-scale-embedding: True encoder-no-scale-embedding: True
......
...@@ -26,10 +26,9 @@ lr-scheduler: inverse_sqrt ...@@ -26,10 +26,9 @@ lr-scheduler: inverse_sqrt
warmup-init-lr: 1e-7 warmup-init-lr: 1e-7
warmup-updates: 10000 warmup-updates: 10000
lr: 2e-3 lr: 2e-3
adam_betas: (0.9,0.98) adam-betas: (0.9,0.98)
criterion: join_speech_and_text_loss criterion: join_speech_and_text_loss
label_smoothing: 0.1
dropout: 0.1 dropout: 0.1
activation-fn: relu activation-fn: relu
......
...@@ -45,10 +45,9 @@ lr-scheduler: inverse_sqrt ...@@ -45,10 +45,9 @@ lr-scheduler: inverse_sqrt
warmup-init-lr: 1e-7 warmup-init-lr: 1e-7
warmup-updates: 10000 warmup-updates: 10000
lr: 2e-3 lr: 2e-3
adam_betas: (0.9,0.98) adam-betas: (0.9,0.98)
criterion: label_smoothed_cross_entropy_with_ctc criterion: label_smoothed_cross_entropy_with_ctc
label_smoothing: 0.1
subsampling-type: conv1d subsampling-type: conv1d
subsampling-layers: 2 subsampling-layers: 2
......
...@@ -9,10 +9,9 @@ lr-scheduler: inverse_sqrt ...@@ -9,10 +9,9 @@ lr-scheduler: inverse_sqrt
warmup-init-lr: 1e-7 warmup-init-lr: 1e-7
warmup-updates: 10000 warmup-updates: 10000
lr: 2e-3 lr: 2e-3
adam_betas: (0.9,0.98) adam-betas: (0.9,0.98)
criterion: label_smoothed_cross_entropy_with_ctc criterion: label_smoothed_cross_entropy_with_ctc
label_smoothing: 0.1
dropout: 0.1 dropout: 0.1
activation-fn: relu activation-fn: relu
......
...@@ -24,10 +24,9 @@ lr-scheduler: inverse_sqrt ...@@ -24,10 +24,9 @@ lr-scheduler: inverse_sqrt
warmup-init-lr: 1e-7 warmup-init-lr: 1e-7
warmup-updates: 10000 warmup-updates: 10000
lr: 2e-3 lr: 2e-3
adam_betas: (0.9,0.98) adam-betas: (0.9,0.98)
criterion: label_smoothed_cross_entropy_with_ctc criterion: label_smoothed_cross_entropy_with_ctc
label_smoothing: 0.1
dropout: 0.1 dropout: 0.1
activation-fn: relu activation-fn: relu
......
...@@ -24,10 +24,9 @@ lr-scheduler: inverse_sqrt ...@@ -24,10 +24,9 @@ lr-scheduler: inverse_sqrt
warmup-init-lr: 1e-7 warmup-init-lr: 1e-7
warmup-updates: 10000 warmup-updates: 10000
lr: 2e-3 lr: 2e-3
adam_betas: (0.9,0.98) adam-betas: (0.9,0.98)
criterion: label_smoothed_cross_entropy_with_ctc criterion: label_smoothed_cross_entropy_with_ctc
label_smoothing: 0.1
dropout: 0.1 dropout: 0.1
activation-fn: relu activation-fn: relu
......
...@@ -22,10 +22,9 @@ lr-scheduler: inverse_sqrt ...@@ -22,10 +22,9 @@ lr-scheduler: inverse_sqrt
warmup-init-lr: 1e-7 warmup-init-lr: 1e-7
warmup-updates: 10000 warmup-updates: 10000
lr: 2e-3 lr: 2e-3
adam_betas: (0.9,0.98) adam-betas: (0.9,0.98)
criterion: label_smoothed_cross_entropy_with_ctc criterion: label_smoothed_cross_entropy_with_ctc
label_smoothing: 0.1
dropout: 0.1 dropout: 0.1
activation-fn: relu activation-fn: relu
......
...@@ -28,10 +28,9 @@ lr-scheduler: inverse_sqrt ...@@ -28,10 +28,9 @@ lr-scheduler: inverse_sqrt
warmup-init-lr: 1e-7 warmup-init-lr: 1e-7
warmup-updates: 10000 warmup-updates: 10000
lr: 2e-3 lr: 2e-3
adam_betas: (0.9,0.98) adam-betas: (0.9,0.98)
criterion: label_smoothed_cross_entropy_with_ctc criterion: label_smoothed_cross_entropy_with_ctc
label_smoothing: 0.1
dropout: 0.1 dropout: 0.1
activation-fn: relu activation-fn: relu
......
...@@ -28,10 +28,9 @@ lr-scheduler: inverse_sqrt ...@@ -28,10 +28,9 @@ lr-scheduler: inverse_sqrt
warmup-init-lr: 1e-7 warmup-init-lr: 1e-7
warmup-updates: 10000 warmup-updates: 10000
lr: 2e-3 lr: 2e-3
adam_betas: (0.9,0.98) adam-betas: (0.9,0.98)
criterion: label_smoothed_cross_entropy_with_ctc criterion: label_smoothed_cross_entropy_with_ctc
label_smoothing: 0.1
dropout: 0.1 dropout: 0.1
activation-fn: relu activation-fn: relu
......
...@@ -28,10 +28,9 @@ lr-scheduler: inverse_sqrt ...@@ -28,10 +28,9 @@ lr-scheduler: inverse_sqrt
warmup-init-lr: 1e-7 warmup-init-lr: 1e-7
warmup-updates: 10000 warmup-updates: 10000
lr: 2e-3 lr: 2e-3
adam_betas: (0.9,0.98) adam-betas: (0.9,0.98)
criterion: label_smoothed_cross_entropy_with_ctc criterion: label_smoothed_cross_entropy_with_ctc
label_smoothing: 0.1
dropout: 0.1 dropout: 0.1
activation-fn: relu activation-fn: relu
......
...@@ -28,10 +28,9 @@ lr-scheduler: inverse_sqrt ...@@ -28,10 +28,9 @@ lr-scheduler: inverse_sqrt
warmup-init-lr: 1e-7 warmup-init-lr: 1e-7
warmup-updates: 10000 warmup-updates: 10000
lr: 2e-3 lr: 2e-3
adam_betas: (0.9,0.98) adam-betas: (0.9,0.98)
criterion: label_smoothed_cross_entropy_with_ctc criterion: label_smoothed_cross_entropy_with_ctc
label_smoothing: 0.1
dropout: 0.1 dropout: 0.1
activation-fn: relu activation-fn: relu
......
...@@ -28,10 +28,9 @@ lr-scheduler: inverse_sqrt ...@@ -28,10 +28,9 @@ lr-scheduler: inverse_sqrt
warmup-init-lr: 1e-7 warmup-init-lr: 1e-7
warmup-updates: 10000 warmup-updates: 10000
lr: 2e-3 lr: 2e-3
adam_betas: (0.9,0.98) adam-betas: (0.9,0.98)
criterion: label_smoothed_cross_entropy_with_ctc criterion: label_smoothed_cross_entropy_with_ctc
label_smoothing: 0.1
dropout: 0.1 dropout: 0.1
activation-fn: relu activation-fn: relu
......
...@@ -24,10 +24,9 @@ lr-scheduler: inverse_sqrt ...@@ -24,10 +24,9 @@ lr-scheduler: inverse_sqrt
warmup-init-lr: 1e-7 warmup-init-lr: 1e-7
warmup-updates: 10000 warmup-updates: 10000
lr: 1e-3 lr: 1e-3
adam_betas: (0.9,0.98) adam-betas: (0.9,0.98)
criterion: label_smoothed_cross_entropy_with_ctc criterion: label_smoothed_cross_entropy_with_ctc
label_smoothing: 0.1
dropout: 0.15 dropout: 0.15
activation-fn: relu activation-fn: relu
......
...@@ -24,10 +24,9 @@ lr-scheduler: inverse_sqrt ...@@ -24,10 +24,9 @@ lr-scheduler: inverse_sqrt
warmup-init-lr: 1e-7 warmup-init-lr: 1e-7
warmup-updates: 10000 warmup-updates: 10000
lr: 1e-3 lr: 1e-3
adam_betas: (0.9,0.98) adam-betas: (0.9,0.98)
criterion: label_smoothed_cross_entropy_with_ctc criterion: label_smoothed_cross_entropy_with_ctc
label_smoothing: 0.1
dropout: 0.15 dropout: 0.15
activation-fn: relu activation-fn: relu
......
...@@ -24,10 +24,9 @@ lr-scheduler: inverse_sqrt ...@@ -24,10 +24,9 @@ lr-scheduler: inverse_sqrt
warmup-init-lr: 1e-7 warmup-init-lr: 1e-7
warmup-updates: 10000 warmup-updates: 10000
lr: 1e-3 lr: 1e-3
adam_betas: (0.9,0.98) adam-betas: (0.9,0.98)
criterion: label_smoothed_cross_entropy_with_ctc criterion: label_smoothed_cross_entropy_with_ctc
label_smoothing: 0.1
dropout: 0.15 dropout: 0.15
activation-fn: relu activation-fn: relu
......
...@@ -24,10 +24,9 @@ lr-scheduler: inverse_sqrt ...@@ -24,10 +24,9 @@ lr-scheduler: inverse_sqrt
warmup-init-lr: 1e-7 warmup-init-lr: 1e-7
warmup-updates: 10000 warmup-updates: 10000
lr: 1e-3 lr: 1e-3
adam_betas: (0.9,0.98) adam-betas: (0.9,0.98)
criterion: label_smoothed_cross_entropy_with_ctc criterion: label_smoothed_cross_entropy_with_ctc
label_smoothing: 0.1
dropout: 0.15 dropout: 0.15
activation-fn: relu activation-fn: relu
......
...@@ -24,10 +24,9 @@ lr-scheduler: inverse_sqrt ...@@ -24,10 +24,9 @@ lr-scheduler: inverse_sqrt
warmup-init-lr: 1e-7 warmup-init-lr: 1e-7
warmup-updates: 10000 warmup-updates: 10000
lr: 1e-3 lr: 1e-3
adam_betas: (0.9,0.98) adam-betas: (0.9,0.98)
criterion: label_smoothed_cross_entropy_with_ctc criterion: label_smoothed_cross_entropy_with_ctc
label_smoothing: 0.1
dropout: 0.15 dropout: 0.15
activation-fn: relu activation-fn: relu
......
...@@ -24,10 +24,9 @@ lr-scheduler: inverse_sqrt ...@@ -24,10 +24,9 @@ lr-scheduler: inverse_sqrt
warmup-init-lr: 1e-7 warmup-init-lr: 1e-7
warmup-updates: 10000 warmup-updates: 10000
lr: 1e-3 lr: 1e-3
adam_betas: (0.9,0.98) adam-betas: (0.9,0.98)
criterion: label_smoothed_cross_entropy_with_ctc criterion: label_smoothed_cross_entropy_with_ctc
label_smoothing: 0.1
dropout: 0.15 dropout: 0.15
activation-fn: relu activation-fn: relu
......
...@@ -24,10 +24,9 @@ lr-scheduler: inverse_sqrt ...@@ -24,10 +24,9 @@ lr-scheduler: inverse_sqrt
warmup-init-lr: 1e-7 warmup-init-lr: 1e-7
warmup-updates: 10000 warmup-updates: 10000
lr: 2e-3 lr: 2e-3
adam_betas: (0.9,0.98) adam-betas: (0.9,0.98)
criterion: label_smoothed_cross_entropy_with_ctc criterion: label_smoothed_cross_entropy_with_ctc
label_smoothing: 0.1
dropout: 0.1 dropout: 0.1
activation-fn: relu activation-fn: relu
......
...@@ -24,10 +24,9 @@ lr-scheduler: inverse_sqrt ...@@ -24,10 +24,9 @@ lr-scheduler: inverse_sqrt
warmup-init-lr: 1e-7 warmup-init-lr: 1e-7
warmup-updates: 10000 warmup-updates: 10000
lr: 2e-3 lr: 2e-3
adam_betas: (0.9,0.98) adam-betas: (0.9,0.98)
criterion: label_smoothed_cross_entropy_with_ctc criterion: label_smoothed_cross_entropy_with_ctc
label_smoothing: 0.1
dropout: 0.1 dropout: 0.1
activation-fn: relu activation-fn: relu
......
...@@ -22,10 +22,9 @@ lr-scheduler: inverse_sqrt ...@@ -22,10 +22,9 @@ lr-scheduler: inverse_sqrt
warmup-init-lr: 1e-7 warmup-init-lr: 1e-7
warmup-updates: 10000 warmup-updates: 10000
lr: 2e-3 lr: 2e-3
adam_betas: (0.9,0.98) adam-betas: (0.9,0.98)
criterion: label_smoothed_cross_entropy_with_ctc criterion: label_smoothed_cross_entropy_with_ctc
label_smoothing: 0.1
dropout: 0.1 dropout: 0.1
activation-fn: relu activation-fn: relu
......
...@@ -13,7 +13,7 @@ lr-scheduler: inverse_sqrt ...@@ -13,7 +13,7 @@ lr-scheduler: inverse_sqrt
warmup-init-lr: 1e-7 warmup-init-lr: 1e-7
warmup-updates: 10000 warmup-updates: 10000
lr: 2e-3 lr: 2e-3
adam_betas: (0.9,0.98) adam-betas: (0.9,0.98)
encoder-normalize-before: True encoder-normalize-before: True
decoder-normalize-before: True decoder-normalize-before: True
......
...@@ -13,7 +13,7 @@ lr-scheduler: inverse_sqrt ...@@ -13,7 +13,7 @@ lr-scheduler: inverse_sqrt
warmup-init-lr: 1e-7 warmup-init-lr: 1e-7
warmup-updates: 10000 warmup-updates: 10000
lr: 1e-3 lr: 1e-3
adam_betas: (0.9,0.98) adam-betas: (0.9,0.98)
encoder-normalize-before: True encoder-normalize-before: True
decoder-normalize-before: True decoder-normalize-before: True
......
...@@ -13,7 +13,7 @@ lr-scheduler: inverse_sqrt ...@@ -13,7 +13,7 @@ lr-scheduler: inverse_sqrt
warmup-init-lr: 1e-7 warmup-init-lr: 1e-7
warmup-updates: 10000 warmup-updates: 10000
lr: 1e-3 lr: 1e-3
adam_betas: (0.9,0.98) adam-betas: (0.9,0.98)
encoder-normalize-before: True encoder-normalize-before: True
decoder-normalize-before: True decoder-normalize-before: True
......
...@@ -6,7 +6,7 @@ lr-scheduler: inverse_sqrt ...@@ -6,7 +6,7 @@ lr-scheduler: inverse_sqrt
warmup-init-lr: 1e-7 warmup-init-lr: 1e-7
warmup-updates: 10000 warmup-updates: 10000
lr: 2e-3 lr: 2e-3
adam_betas: (0.9,0.98) adam-betas: (0.9,0.98)
criterion: ctc criterion: ctc
zero_infinity: True zero_infinity: True
......
...@@ -12,7 +12,7 @@ lr-scheduler: inverse_sqrt ...@@ -12,7 +12,7 @@ lr-scheduler: inverse_sqrt
warmup-init-lr: 1e-7 warmup-init-lr: 1e-7
warmup-updates: 10000 warmup-updates: 10000
lr: 2e-3 lr: 2e-3
adam_betas: (0.9,0.98) adam-betas: (0.9,0.98)
encoder-normalize-before: True encoder-normalize-before: True
decoder-normalize-before: True decoder-normalize-before: True
......
...@@ -6,10 +6,9 @@ lr-scheduler: inverse_sqrt ...@@ -6,10 +6,9 @@ lr-scheduler: inverse_sqrt
warmup-init-lr: 1e-7 warmup-init-lr: 1e-7
warmup-updates: 10000 warmup-updates: 10000
lr: 2e-3 lr: 2e-3
adam_betas: (0.9,0.98) adam-betas: (0.9,0.98)
criterion: label_smoothed_cross_entropy_with_ctc criterion: label_smoothed_cross_entropy_with_ctc
label_smoothing: 0.1
encoder-normalize-before: True encoder-normalize-before: True
decoder-normalize-before: True decoder-normalize-before: True
......
...@@ -6,10 +6,9 @@ lr-scheduler: inverse_sqrt ...@@ -6,10 +6,9 @@ lr-scheduler: inverse_sqrt
warmup-init-lr: 1e-7 warmup-init-lr: 1e-7
warmup-updates: 10000 warmup-updates: 10000
lr: 1e-3 lr: 1e-3
adam_betas: (0.9,0.98) adam-betas: (0.9,0.98)
criterion: label_smoothed_cross_entropy_with_ctc criterion: label_smoothed_cross_entropy_with_ctc
label_smoothing: 0.1
encoder-normalize-before: True encoder-normalize-before: True
decoder-normalize-before: True decoder-normalize-before: True
......
...@@ -6,10 +6,9 @@ lr-scheduler: inverse_sqrt ...@@ -6,10 +6,9 @@ lr-scheduler: inverse_sqrt
warmup-init-lr: 1e-7 warmup-init-lr: 1e-7
warmup-updates: 10000 warmup-updates: 10000
lr: 1e-3 lr: 1e-3
adam_betas: (0.9,0.98) adam-betas: (0.9,0.98)
criterion: label_smoothed_cross_entropy_with_ctc criterion: label_smoothed_cross_entropy_with_ctc
label_smoothing: 0.1
encoder-embed-norm: True encoder-embed-norm: True
encoder-no-scale-embedding: True encoder-no-scale-embedding: True
......
...@@ -6,10 +6,9 @@ lr-scheduler: inverse_sqrt ...@@ -6,10 +6,9 @@ lr-scheduler: inverse_sqrt
warmup-init-lr: 1e-7 warmup-init-lr: 1e-7
warmup-updates: 10000 warmup-updates: 10000
lr: 2e-3 lr: 2e-3
adam_betas: (0.9,0.98) adam-betas: (0.9,0.98)
criterion: label_smoothed_cross_entropy_with_ctc criterion: label_smoothed_cross_entropy_with_ctc
label_smoothing: 0.1
encoder-normalize-before: True encoder-normalize-before: True
decoder-normalize-before: True decoder-normalize-before: True
......
...@@ -6,10 +6,9 @@ lr-scheduler: inverse_sqrt ...@@ -6,10 +6,9 @@ lr-scheduler: inverse_sqrt
warmup-init-lr: 1e-7 warmup-init-lr: 1e-7
warmup-updates: 10000 warmup-updates: 10000
lr: 2e-3 lr: 2e-3
adam_betas: (0.9,0.98) adam-betas: (0.9,0.98)
criterion: label_smoothed_cross_entropy_with_ctc criterion: label_smoothed_cross_entropy_with_ctc
label_smoothing: 0.1
encoder-embed-norm: True encoder-embed-norm: True
encoder-no-scale-embedding: True encoder-no-scale-embedding: True
......
...@@ -6,10 +6,9 @@ lr-scheduler: inverse_sqrt ...@@ -6,10 +6,9 @@ lr-scheduler: inverse_sqrt
warmup-init-lr: 1e-7 warmup-init-lr: 1e-7
warmup-updates: 10000 warmup-updates: 10000
lr: 2e-3 lr: 2e-3
adam_betas: (0.9,0.98) adam-betas: (0.9,0.98)
criterion: label_smoothed_cross_entropy_with_ctc criterion: label_smoothed_cross_entropy_with_ctc
label_smoothing: 0.1
encoder-embed-norm: True encoder-embed-norm: True
encoder-no-scale-embedding: True encoder-no-scale-embedding: True
......
...@@ -6,10 +6,9 @@ lr-scheduler: inverse_sqrt ...@@ -6,10 +6,9 @@ lr-scheduler: inverse_sqrt
warmup-init-lr: 1e-7 warmup-init-lr: 1e-7
warmup-updates: 10000 warmup-updates: 10000
lr: 2e-3 lr: 2e-3
adam_betas: (0.9,0.98) adam-betas: (0.9,0.98)
criterion: label_smoothed_cross_entropy_with_ctc criterion: label_smoothed_cross_entropy_with_ctc
label_smoothing: 0.1
encoder-embed-norm: True encoder-embed-norm: True
encoder-no-scale-embedding: True encoder-no-scale-embedding: True
......
...@@ -6,10 +6,9 @@ lr-scheduler: inverse_sqrt ...@@ -6,10 +6,9 @@ lr-scheduler: inverse_sqrt
warmup-init-lr: 1e-7 warmup-init-lr: 1e-7
warmup-updates: 10000 warmup-updates: 10000
lr: 2e-3 lr: 2e-3
adam_betas: (0.9,0.98) adam-betas: (0.9,0.98)
criterion: label_smoothed_cross_entropy_with_ctc criterion: label_smoothed_cross_entropy_with_ctc
label_smoothing: 0.1
encoder-embed-norm: True encoder-embed-norm: True
encoder-no-scale-embedding: True encoder-no-scale-embedding: True
......
...@@ -6,10 +6,9 @@ lr-scheduler: inverse_sqrt ...@@ -6,10 +6,9 @@ lr-scheduler: inverse_sqrt
warmup-init-lr: 1e-7 warmup-init-lr: 1e-7
warmup-updates: 10000 warmup-updates: 10000
lr: 2e-3 lr: 2e-3
adam_betas: (0.9,0.98) adam-betas: (0.9,0.98)
criterion: label_smoothed_cross_entropy_with_ctc criterion: label_smoothed_cross_entropy_with_ctc
label_smoothing: 0.1
encoder-embed-norm: True encoder-embed-norm: True
encoder-no-scale-embedding: True encoder-no-scale-embedding: True
......
...@@ -6,10 +6,9 @@ lr-scheduler: inverse_sqrt ...@@ -6,10 +6,9 @@ lr-scheduler: inverse_sqrt
warmup-init-lr: 1e-7 warmup-init-lr: 1e-7
warmup-updates: 10000 warmup-updates: 10000
lr: 2e-3 lr: 2e-3
adam_betas: (0.9,0.98) adam-betas: (0.9,0.98)
criterion: label_smoothed_cross_entropy_with_ctc criterion: label_smoothed_cross_entropy_with_ctc
label_smoothing: 0.1
encoder-embed-norm: True encoder-embed-norm: True
encoder-no-scale-embedding: True encoder-no-scale-embedding: True
......
...@@ -6,10 +6,9 @@ lr-scheduler: inverse_sqrt ...@@ -6,10 +6,9 @@ lr-scheduler: inverse_sqrt
warmup-init-lr: 1e-7 warmup-init-lr: 1e-7
warmup-updates: 10000 warmup-updates: 10000
lr: 2e-3 lr: 2e-3
adam_betas: (0.9,0.98) adam-betas: (0.9,0.98)
criterion: label_smoothed_cross_entropy_with_ctc criterion: label_smoothed_cross_entropy_with_ctc
label_smoothing: 0.1
encoder-embed-norm: True encoder-embed-norm: True
encoder-no-scale-embedding: True encoder-no-scale-embedding: True
......
...@@ -6,10 +6,9 @@ lr-scheduler: inverse_sqrt ...@@ -6,10 +6,9 @@ lr-scheduler: inverse_sqrt
warmup-init-lr: 1e-7 warmup-init-lr: 1e-7
warmup-updates: 10000 warmup-updates: 10000
lr: 2e-3 lr: 2e-3
adam_betas: (0.9,0.98) adam-betas: (0.9,0.98)
criterion: label_smoothed_cross_entropy_with_ctc criterion: label_smoothed_cross_entropy_with_ctc
label_smoothing: 0.1
encoder-embed-norm: True encoder-embed-norm: True
encoder-no-scale-embedding: True encoder-no-scale-embedding: True
......
...@@ -6,10 +6,9 @@ lr-scheduler: inverse_sqrt ...@@ -6,10 +6,9 @@ lr-scheduler: inverse_sqrt
warmup-init-lr: 1e-7 warmup-init-lr: 1e-7
warmup-updates: 10000 warmup-updates: 10000
lr: 2e-3 lr: 2e-3
adam_betas: (0.9,0.98) adam-betas: (0.9,0.98)
criterion: label_smoothed_cross_entropy_with_ctc criterion: label_smoothed_cross_entropy_with_ctc
label_smoothing: 0.1
encoder-embed-norm: True encoder-embed-norm: True
encoder-no-scale-embedding: True encoder-no-scale-embedding: True
......
...@@ -6,10 +6,9 @@ lr-scheduler: inverse_sqrt ...@@ -6,10 +6,9 @@ lr-scheduler: inverse_sqrt
warmup-init-lr: 1e-7 warmup-init-lr: 1e-7
warmup-updates: 10000 warmup-updates: 10000
lr: 2e-3 lr: 2e-3
adam_betas: (0.9,0.98) adam-betas: (0.9,0.98)
criterion: label_smoothed_cross_entropy_with_ctc criterion: label_smoothed_cross_entropy_with_ctc
label_smoothing: 0.1
encoder-normalize-before: True encoder-normalize-before: True
decoder-normalize-before: True decoder-normalize-before: True
......
...@@ -6,10 +6,9 @@ lr-scheduler: inverse_sqrt ...@@ -6,10 +6,9 @@ lr-scheduler: inverse_sqrt
warmup-init-lr: 1e-7 warmup-init-lr: 1e-7
warmup-updates: 10000 warmup-updates: 10000
lr: 2e-3 lr: 2e-3
adam_betas: (0.9,0.98) adam-betas: (0.9,0.98)
criterion: label_smoothed_cross_entropy_with_ctc criterion: label_smoothed_cross_entropy_with_ctc
label_smoothing: 0.1
encoder-embed-norm: True encoder-embed-norm: True
encoder-no-scale-embedding: True encoder-no-scale-embedding: True
......
...@@ -23,5 +23,6 @@ num-workers: 8 ...@@ -23,5 +23,6 @@ num-workers: 8
no-progress-bar: True no-progress-bar: True
log-interval: 100 log-interval: 100
seed: 1 seed: 1
label-smoothing: 0.1
report-accuracy: True report-accuracy: True
skip-invalid-size-inputs-valid-test: True skip-invalid-size-inputs-valid-test: True
...@@ -6,10 +6,9 @@ lr-scheduler: inverse_sqrt ...@@ -6,10 +6,9 @@ lr-scheduler: inverse_sqrt
warmup-init-lr: 1e-7 warmup-init-lr: 1e-7
warmup-updates: 10000 warmup-updates: 10000
lr: 2e-3 lr: 2e-3
adam_betas: (0.9,0.98) adam-betas: (0.9,0.98)
criterion: label_smoothed_cross_entropy_with_ctc criterion: label_smoothed_cross_entropy_with_ctc
label_smoothing: 0.1
encoder-embed-norm: True encoder-embed-norm: True
encoder-no-scale-embedding: True encoder-no-scale-embedding: True
......
...@@ -9,10 +9,9 @@ lr-scheduler: inverse_sqrt ...@@ -9,10 +9,9 @@ lr-scheduler: inverse_sqrt
warmup-init-lr: 1e-7 warmup-init-lr: 1e-7
warmup-updates: 10000 warmup-updates: 10000
lr: 2e-3 lr: 2e-3
adam_betas: (0.9,0.98) adam-betas: (0.9,0.98)
criterion: label_smoothed_cross_entropy_with_ctc criterion: label_smoothed_cross_entropy_with_ctc
label_smoothing: 0.1
dropout: 0.1 dropout: 0.1
activation-fn: relu activation-fn: relu
......
...@@ -22,10 +22,9 @@ lr-scheduler: inverse_sqrt ...@@ -22,10 +22,9 @@ lr-scheduler: inverse_sqrt
warmup-init-lr: 1e-7 warmup-init-lr: 1e-7
warmup-updates: 10000 warmup-updates: 10000
lr: 2e-3 lr: 2e-3
adam_betas: (0.9,0.98) adam-betas: (0.9,0.98)
criterion: label_smoothed_cross_entropy_with_ctc criterion: label_smoothed_cross_entropy_with_ctc
label_smoothing: 0.1
dropout: 0.1 dropout: 0.1
activation-fn: relu activation-fn: relu
......
...@@ -22,10 +22,9 @@ lr-scheduler: inverse_sqrt ...@@ -22,10 +22,9 @@ lr-scheduler: inverse_sqrt
warmup-init-lr: 1e-7 warmup-init-lr: 1e-7
warmup-updates: 10000 warmup-updates: 10000
lr: 2e-3 lr: 2e-3
adam_betas: (0.9,0.98) adam-betas: (0.9,0.98)
criterion: label_smoothed_cross_entropy_with_ctc criterion: label_smoothed_cross_entropy_with_ctc
label_smoothing: 0.1
dropout: 0.1 dropout: 0.1
activation-fn: relu activation-fn: relu
......
arch: pdss2t_transformer_s_8 arch: pdss2t_transformer_s_8
#pds-ctc: 0_1_1_0
#intermedia-adapter: league
#intermedia-ctc-weight: 0.1
#encoder-attention-type: reduced
#pds-attn-ds-ratios: 4_2_2_1
#attention-reduced-method: pool
#attention-reduced-q: True
encoder-embed-dim: 256 encoder-embed-dim: 256
pds-stages: 4 pds-stages: 4
#ctc-layer: 12
pds-layers: 4_2_2_4 pds-layers: 4_2_2_4
pds-ratios: 2_2_1_2 pds-ratios: 2_2_1_2
pds-fusion: True pds-fusion: True
...@@ -31,10 +21,9 @@ lr-scheduler: inverse_sqrt ...@@ -31,10 +21,9 @@ lr-scheduler: inverse_sqrt
warmup-init-lr: 1e-7 warmup-init-lr: 1e-7
warmup-updates: 10000 warmup-updates: 10000
lr: 2e-3 lr: 2e-3
adam_betas: (0.9,0.98) adam-betas: (0.9,0.98)
criterion: label_smoothed_cross_entropy_with_ctc criterion: label_smoothed_cross_entropy_with_ctc
label_smoothing: 0.1
dropout: 0.1 dropout: 0.1
activation-fn: relu activation-fn: relu
......
...@@ -32,10 +32,9 @@ lr-scheduler: inverse_sqrt ...@@ -32,10 +32,9 @@ lr-scheduler: inverse_sqrt
warmup-init-lr: 1e-7 warmup-init-lr: 1e-7
warmup-updates: 10000 warmup-updates: 10000
lr: 2e-3 lr: 2e-3
adam_betas: (0.9,0.98) adam-betas: (0.9,0.98)
criterion: label_smoothed_cross_entropy_with_ctc criterion: label_smoothed_cross_entropy_with_ctc
label_smoothing: 0.1
dropout: 0.1 dropout: 0.1
activation-fn: relu activation-fn: relu
......
...@@ -32,10 +32,9 @@ lr-scheduler: inverse_sqrt ...@@ -32,10 +32,9 @@ lr-scheduler: inverse_sqrt
warmup-init-lr: 1e-7 warmup-init-lr: 1e-7
warmup-updates: 10000 warmup-updates: 10000
lr: 2e-3 lr: 2e-3
adam_betas: (0.9,0.98) adam-betas: (0.9,0.98)
criterion: label_smoothed_cross_entropy_with_ctc criterion: label_smoothed_cross_entropy_with_ctc
label_smoothing: 0.1
dropout: 0.1 dropout: 0.1
activation-fn: relu activation-fn: relu
......
...@@ -22,10 +22,9 @@ lr-scheduler: inverse_sqrt ...@@ -22,10 +22,9 @@ lr-scheduler: inverse_sqrt
warmup-init-lr: 1e-7 warmup-init-lr: 1e-7
warmup-updates: 10000 warmup-updates: 10000
lr: 2e-3 lr: 2e-3
adam_betas: (0.9,0.98) adam-betas: (0.9,0.98)
criterion: label_smoothed_cross_entropy_with_ctc criterion: label_smoothed_cross_entropy_with_ctc
label_smoothing: 0.1
dropout: 0.15 dropout: 0.15
activation-fn: relu activation-fn: relu
......
...@@ -5,7 +5,7 @@ lr-scheduler: inverse_sqrt ...@@ -5,7 +5,7 @@ lr-scheduler: inverse_sqrt
warmup-init-lr: 1e-7 warmup-init-lr: 1e-7
warmup-updates: 10000 warmup-updates: 10000
lr: 2e-3 lr: 2e-3
adam_betas: (0.9,0.98) adam-betas: (0.9,0.98)
criterion: ctc criterion: ctc
zero_infinity: True zero_infinity: True
......
...@@ -32,7 +32,7 @@ lr-scheduler: inverse_sqrt ...@@ -32,7 +32,7 @@ lr-scheduler: inverse_sqrt
warmup-init-lr: 1e-7 warmup-init-lr: 1e-7
warmup-updates: 10000 warmup-updates: 10000
lr: 2e-3 lr: 2e-3
adam_betas: (0.9,0.98) adam-betas: (0.9,0.98)
criterion: ctc criterion: ctc
post-process: sentencepiece post-process: sentencepiece
......
...@@ -30,7 +30,7 @@ lr-scheduler: inverse_sqrt ...@@ -30,7 +30,7 @@ lr-scheduler: inverse_sqrt
warmup-init-lr: 1e-7 warmup-init-lr: 1e-7
warmup-updates: 10000 warmup-updates: 10000
lr: 0.0015 lr: 0.0015
adam_betas: (0.9,0.98) adam-betas: (0.9,0.98)
criterion: ctc criterion: ctc
post-process: sentencepiece post-process: sentencepiece
......
...@@ -34,7 +34,7 @@ lr-scheduler: inverse_sqrt ...@@ -34,7 +34,7 @@ lr-scheduler: inverse_sqrt
warmup-init-lr: 1e-7 warmup-init-lr: 1e-7
warmup-updates: 10000 warmup-updates: 10000
lr: 0.002 lr: 0.002
adam_betas: (0.9,0.98) adam-betas: (0.9,0.98)
criterion: ctc criterion: ctc
post-process: sentencepiece post-process: sentencepiece
......
...@@ -34,7 +34,7 @@ lr-scheduler: inverse_sqrt ...@@ -34,7 +34,7 @@ lr-scheduler: inverse_sqrt
warmup-init-lr: 1e-7 warmup-init-lr: 1e-7
warmup-updates: 10000 warmup-updates: 10000
lr: 0.002 lr: 0.002
adam_betas: (0.9,0.98) adam-betas: (0.9,0.98)
criterion: ctc criterion: ctc
post-process: sentencepiece post-process: sentencepiece
......
...@@ -30,7 +30,7 @@ lr-scheduler: inverse_sqrt ...@@ -30,7 +30,7 @@ lr-scheduler: inverse_sqrt
warmup-init-lr: 1e-7 warmup-init-lr: 1e-7
warmup-updates: 10000 warmup-updates: 10000
lr: 0.0015 lr: 0.0015
adam_betas: (0.9,0.98) adam-betas: (0.9,0.98)
criterion: ctc criterion: ctc
post-process: sentencepiece post-process: sentencepiece
......
...@@ -7,10 +7,9 @@ lr-scheduler: inverse_sqrt ...@@ -7,10 +7,9 @@ lr-scheduler: inverse_sqrt
warmup-init-lr: 1e-7 warmup-init-lr: 1e-7
warmup-updates: 8000 warmup-updates: 8000
lr: 1e-3 lr: 1e-3
adam_betas: (0.9,0.997) adam-betas: (0.9,0.997)
criterion: label_smoothed_cross_entropy criterion: label_smoothed_cross_entropy
label_smoothing: 0.1
dropout: 0.1 dropout: 0.1
attention-dropout: 0.1 attention-dropout: 0.1
......
...@@ -25,5 +25,6 @@ num-workers: 8 ...@@ -25,5 +25,6 @@ num-workers: 8
no-progress-bar: True no-progress-bar: True
log-interval: 100 log-interval: 100
seed: 1 seed: 1
label-smoothing: 0.1
report-accuracy: True report-accuracy: True
skip-invalid-size-inputs-valid-test: True skip-invalid-size-inputs-valid-test: True
\ No newline at end of file
...@@ -6,10 +6,9 @@ lr-scheduler: inverse_sqrt ...@@ -6,10 +6,9 @@ lr-scheduler: inverse_sqrt
warmup-init-lr: 1e-7 warmup-init-lr: 1e-7
warmup-updates: 8000 warmup-updates: 8000
lr: 1e-3 lr: 1e-3
adam_betas: (0.9,0.997) adam-betas: (0.9,0.997)
criterion: label_smoothed_cross_entropy_with_ctc criterion: label_smoothed_cross_entropy_with_ctc
label_smoothing: 0.1
dropout: 0.1 dropout: 0.1
attention-dropout: 0.1 attention-dropout: 0.1
......
...@@ -7,10 +7,9 @@ lr-scheduler: inverse_sqrt ...@@ -7,10 +7,9 @@ lr-scheduler: inverse_sqrt
warmup-init-lr: 1e-7 warmup-init-lr: 1e-7
warmup-updates: 8000 warmup-updates: 8000
lr: 1e-3 lr: 1e-3
adam_betas: (0.9,0.997) adam-betas: (0.9,0.997)
criterion: label_smoothed_cross_entropy criterion: label_smoothed_cross_entropy
label_smoothing: 0.1
dropout: 0.1 dropout: 0.1
attention-dropout: 0.1 attention-dropout: 0.1
......
...@@ -6,10 +6,9 @@ lr-scheduler: inverse_sqrt ...@@ -6,10 +6,9 @@ lr-scheduler: inverse_sqrt
warmup-init-lr: 1e-7 warmup-init-lr: 1e-7
warmup-updates: 10000 warmup-updates: 10000
lr: 2e-3 lr: 2e-3
adam_betas: (0.9,0.98) adam-betas: (0.9,0.98)
criterion: label_smoothed_cross_entropy_with_ctc criterion: label_smoothed_cross_entropy_with_ctc
label_smoothing: 0.1
encoder-normalize-before: True encoder-normalize-before: True
decoder-normalize-before: True decoder-normalize-before: True
......
...@@ -6,10 +6,9 @@ lr-scheduler: inverse_sqrt ...@@ -6,10 +6,9 @@ lr-scheduler: inverse_sqrt
warmup-init-lr: 1e-7 warmup-init-lr: 1e-7
warmup-updates: 10000 warmup-updates: 10000
lr: 1e-3 lr: 1e-3
adam_betas: (0.9,0.98) adam-betas: (0.9,0.98)
criterion: label_smoothed_cross_entropy_with_ctc criterion: label_smoothed_cross_entropy_with_ctc
label_smoothing: 0.1
encoder-normalize-before: True encoder-normalize-before: True
decoder-normalize-before: True decoder-normalize-before: True
......
...@@ -6,10 +6,9 @@ lr-scheduler: inverse_sqrt ...@@ -6,10 +6,9 @@ lr-scheduler: inverse_sqrt
warmup-init-lr: 1e-7 warmup-init-lr: 1e-7
warmup-updates: 10000 warmup-updates: 10000
lr: 2e-3 lr: 2e-3
adam_betas: (0.9,0.98) adam-betas: (0.9,0.98)
criterion: label_smoothed_cross_entropy_with_ctc criterion: label_smoothed_cross_entropy_with_ctc
label_smoothing: 0.1
encoder-embed-norm: True encoder-embed-norm: True
encoder-no-scale-embedding: True encoder-no-scale-embedding: True
......
...@@ -25,5 +25,6 @@ num-workers: 8 ...@@ -25,5 +25,6 @@ num-workers: 8
no-progress-bar: True no-progress-bar: True
log-interval: 100 log-interval: 100
seed: 1 seed: 1
label-smoothing: 0.1
report-accuracy: True report-accuracy: True
skip-invalid-size-inputs-valid-test: True skip-invalid-size-inputs-valid-test: True
...@@ -26,10 +26,9 @@ lr-scheduler: inverse_sqrt ...@@ -26,10 +26,9 @@ lr-scheduler: inverse_sqrt
warmup-init-lr: 1e-7 warmup-init-lr: 1e-7
warmup-updates: 10000 warmup-updates: 10000
lr: 2e-3 lr: 2e-3
adam_betas: (0.9,0.98) adam-betas: (0.9,0.98)
criterion: join_speech_and_text_loss criterion: join_speech_and_text_loss
label_smoothing: 0.1
dropout: 0.1 dropout: 0.1
activation-fn: relu activation-fn: relu
......
...@@ -45,10 +45,9 @@ lr-scheduler: inverse_sqrt ...@@ -45,10 +45,9 @@ lr-scheduler: inverse_sqrt
warmup-init-lr: 1e-7 warmup-init-lr: 1e-7
warmup-updates: 10000 warmup-updates: 10000
lr: 2e-3 lr: 2e-3
adam_betas: (0.9,0.98) adam-betas: (0.9,0.98)
criterion: label_smoothed_cross_entropy_with_ctc criterion: label_smoothed_cross_entropy_with_ctc
label_smoothing: 0.1
subsampling-type: conv1d subsampling-type: conv1d
subsampling-layers: 2 subsampling-layers: 2
......
...@@ -9,10 +9,9 @@ lr-scheduler: inverse_sqrt ...@@ -9,10 +9,9 @@ lr-scheduler: inverse_sqrt
warmup-init-lr: 1e-7 warmup-init-lr: 1e-7
warmup-updates: 10000 warmup-updates: 10000
lr: 2e-3 lr: 2e-3
adam_betas: (0.9,0.98) adam-betas: (0.9,0.98)
criterion: label_smoothed_cross_entropy_with_ctc criterion: label_smoothed_cross_entropy_with_ctc
label_smoothing: 0.1
dropout: 0.1 dropout: 0.1
activation-fn: relu activation-fn: relu
......
...@@ -24,10 +24,9 @@ lr-scheduler: inverse_sqrt ...@@ -24,10 +24,9 @@ lr-scheduler: inverse_sqrt
warmup-init-lr: 1e-7 warmup-init-lr: 1e-7
warmup-updates: 10000 warmup-updates: 10000
lr: 2e-3 lr: 2e-3
adam_betas: (0.9,0.98) adam-betas: (0.9,0.98)
criterion: label_smoothed_cross_entropy_with_ctc criterion: label_smoothed_cross_entropy_with_ctc
label_smoothing: 0.1
dropout: 0.1 dropout: 0.1
activation-fn: relu activation-fn: relu
......
...@@ -24,10 +24,9 @@ lr-scheduler: inverse_sqrt ...@@ -24,10 +24,9 @@ lr-scheduler: inverse_sqrt
warmup-init-lr: 1e-7 warmup-init-lr: 1e-7
warmup-updates: 10000 warmup-updates: 10000
lr: 2e-3 lr: 2e-3
adam_betas: (0.9,0.98) adam-betas: (0.9,0.98)
criterion: label_smoothed_cross_entropy_with_ctc criterion: label_smoothed_cross_entropy_with_ctc
label_smoothing: 0.1
dropout: 0.1 dropout: 0.1
activation-fn: relu activation-fn: relu
......
...@@ -28,10 +28,9 @@ lr-scheduler: inverse_sqrt ...@@ -28,10 +28,9 @@ lr-scheduler: inverse_sqrt
warmup-init-lr: 1e-7 warmup-init-lr: 1e-7
warmup-updates: 10000 warmup-updates: 10000
lr: 2e-3 lr: 2e-3
adam_betas: (0.9,0.98) adam-betas: (0.9,0.98)
criterion: label_smoothed_cross_entropy_with_ctc criterion: label_smoothed_cross_entropy_with_ctc
label_smoothing: 0.1
dropout: 0.1 dropout: 0.1
activation-fn: relu activation-fn: relu
......
...@@ -28,10 +28,9 @@ lr-scheduler: inverse_sqrt ...@@ -28,10 +28,9 @@ lr-scheduler: inverse_sqrt
warmup-init-lr: 1e-7 warmup-init-lr: 1e-7
warmup-updates: 10000 warmup-updates: 10000
lr: 2e-3 lr: 2e-3
adam_betas: (0.9,0.98) adam-betas: (0.9,0.98)
criterion: label_smoothed_cross_entropy_with_ctc criterion: label_smoothed_cross_entropy_with_ctc
label_smoothing: 0.1
dropout: 0.1 dropout: 0.1
activation-fn: relu activation-fn: relu
......
...@@ -28,10 +28,9 @@ lr-scheduler: inverse_sqrt ...@@ -28,10 +28,9 @@ lr-scheduler: inverse_sqrt
warmup-init-lr: 1e-7 warmup-init-lr: 1e-7
warmup-updates: 10000 warmup-updates: 10000
lr: 2e-3 lr: 2e-3
adam_betas: (0.9,0.98) adam-betas: (0.9,0.98)
criterion: label_smoothed_cross_entropy_with_ctc criterion: label_smoothed_cross_entropy_with_ctc
label_smoothing: 0.1
dropout: 0.1 dropout: 0.1
activation-fn: relu activation-fn: relu
......
...@@ -28,10 +28,9 @@ lr-scheduler: inverse_sqrt ...@@ -28,10 +28,9 @@ lr-scheduler: inverse_sqrt
warmup-init-lr: 1e-7 warmup-init-lr: 1e-7
warmup-updates: 10000 warmup-updates: 10000
lr: 2e-3 lr: 2e-3
adam_betas: (0.9,0.98) adam-betas: (0.9,0.98)
criterion: label_smoothed_cross_entropy_with_ctc criterion: label_smoothed_cross_entropy_with_ctc
label_smoothing: 0.1
dropout: 0.1 dropout: 0.1
activation-fn: relu activation-fn: relu
......
...@@ -28,10 +28,9 @@ lr-scheduler: inverse_sqrt ...@@ -28,10 +28,9 @@ lr-scheduler: inverse_sqrt
warmup-init-lr: 1e-7 warmup-init-lr: 1e-7
warmup-updates: 10000 warmup-updates: 10000
lr: 2e-3 lr: 2e-3
adam_betas: (0.9,0.98) adam-betas: (0.9,0.98)
criterion: label_smoothed_cross_entropy_with_ctc criterion: label_smoothed_cross_entropy_with_ctc
label_smoothing: 0.1
dropout: 0.1 dropout: 0.1
activation-fn: relu activation-fn: relu
......
...@@ -13,7 +13,7 @@ lr-scheduler: inverse_sqrt ...@@ -13,7 +13,7 @@ lr-scheduler: inverse_sqrt
warmup-init-lr: 1e-7 warmup-init-lr: 1e-7
warmup-updates: 10000 warmup-updates: 10000
lr: 2e-3 lr: 2e-3
adam_betas: (0.9,0.98) adam-betas: (0.9,0.98)
encoder-normalize-before: True encoder-normalize-before: True
decoder-normalize-before: True decoder-normalize-before: True
......
...@@ -13,7 +13,7 @@ lr-scheduler: inverse_sqrt ...@@ -13,7 +13,7 @@ lr-scheduler: inverse_sqrt
warmup-init-lr: 1e-7 warmup-init-lr: 1e-7
warmup-updates: 10000 warmup-updates: 10000
lr: 1e-3 lr: 1e-3
adam_betas: (0.9,0.98) adam-betas: (0.9,0.98)
encoder-normalize-before: True encoder-normalize-before: True
decoder-normalize-before: True decoder-normalize-before: True
......
...@@ -6,7 +6,7 @@ lr-scheduler: inverse_sqrt ...@@ -6,7 +6,7 @@ lr-scheduler: inverse_sqrt
warmup-init-lr: 1e-7 warmup-init-lr: 1e-7
warmup-updates: 10000 warmup-updates: 10000
lr: 2e-3 lr: 2e-3
adam_betas: (0.9,0.98) adam-betas: (0.9,0.98)
criterion: ctc criterion: ctc
zero_infinity: True zero_infinity: True
......
...@@ -12,7 +12,7 @@ lr-scheduler: inverse_sqrt ...@@ -12,7 +12,7 @@ lr-scheduler: inverse_sqrt
warmup-init-lr: 1e-7 warmup-init-lr: 1e-7
warmup-updates: 10000 warmup-updates: 10000
lr: 2e-3 lr: 2e-3
adam_betas: (0.9,0.98) adam-betas: (0.9,0.98)
encoder-normalize-before: True encoder-normalize-before: True
decoder-normalize-before: True decoder-normalize-before: True
......
...@@ -6,10 +6,9 @@ lr-scheduler: inverse_sqrt ...@@ -6,10 +6,9 @@ lr-scheduler: inverse_sqrt
warmup-init-lr: 1e-7 warmup-init-lr: 1e-7
warmup-updates: 10000 warmup-updates: 10000
lr: 2e-3 lr: 2e-3
adam_betas: (0.9,0.98) adam-betas: (0.9,0.98)
criterion: label_smoothed_cross_entropy_with_ctc criterion: label_smoothed_cross_entropy_with_ctc
label_smoothing: 0.1
encoder-normalize-before: True encoder-normalize-before: True
decoder-normalize-before: True decoder-normalize-before: True
......
...@@ -6,10 +6,9 @@ lr-scheduler: inverse_sqrt ...@@ -6,10 +6,9 @@ lr-scheduler: inverse_sqrt
warmup-init-lr: 1e-7 warmup-init-lr: 1e-7
warmup-updates: 10000 warmup-updates: 10000
lr: 1e-3 lr: 1e-3
adam_betas: (0.9,0.98) adam-betas: (0.9,0.98)
criterion: label_smoothed_cross_entropy_with_ctc criterion: label_smoothed_cross_entropy_with_ctc
label_smoothing: 0.1
encoder-normalize-before: True encoder-normalize-before: True
decoder-normalize-before: True decoder-normalize-before: True
......
...@@ -6,10 +6,9 @@ lr-scheduler: inverse_sqrt ...@@ -6,10 +6,9 @@ lr-scheduler: inverse_sqrt
warmup-init-lr: 1e-7 warmup-init-lr: 1e-7
warmup-updates: 10000 warmup-updates: 10000
lr: 1e-3 lr: 1e-3
adam_betas: (0.9,0.98) adam-betas: (0.9,0.98)
criterion: label_smoothed_cross_entropy_with_ctc criterion: label_smoothed_cross_entropy_with_ctc
label_smoothing: 0.1
encoder-embed-norm: True encoder-embed-norm: True
encoder-no-scale-embedding: True encoder-no-scale-embedding: True
......
...@@ -6,10 +6,9 @@ lr-scheduler: inverse_sqrt ...@@ -6,10 +6,9 @@ lr-scheduler: inverse_sqrt
warmup-init-lr: 1e-7 warmup-init-lr: 1e-7
warmup-updates: 10000 warmup-updates: 10000
lr: 2e-3 lr: 2e-3
adam_betas: (0.9,0.98) adam-betas: (0.9,0.98)
criterion: label_smoothed_cross_entropy_with_ctc criterion: label_smoothed_cross_entropy_with_ctc
label_smoothing: 0.1
encoder-normalize-before: True encoder-normalize-before: True
decoder-normalize-before: True decoder-normalize-before: True
......
...@@ -6,10 +6,9 @@ lr-scheduler: inverse_sqrt ...@@ -6,10 +6,9 @@ lr-scheduler: inverse_sqrt
warmup-init-lr: 1e-7 warmup-init-lr: 1e-7
warmup-updates: 10000 warmup-updates: 10000
lr: 2e-3 lr: 2e-3
adam_betas: (0.9,0.98) adam-betas: (0.9,0.98)
criterion: label_smoothed_cross_entropy_with_ctc criterion: label_smoothed_cross_entropy_with_ctc
label_smoothing: 0.1
encoder-embed-norm: True encoder-embed-norm: True
encoder-no-scale-embedding: True encoder-no-scale-embedding: True
......
...@@ -6,10 +6,9 @@ lr-scheduler: inverse_sqrt ...@@ -6,10 +6,9 @@ lr-scheduler: inverse_sqrt
warmup-init-lr: 1e-7 warmup-init-lr: 1e-7
warmup-updates: 10000 warmup-updates: 10000
lr: 2e-3 lr: 2e-3
adam_betas: (0.9,0.98) adam-betas: (0.9,0.98)
criterion: label_smoothed_cross_entropy_with_ctc criterion: label_smoothed_cross_entropy_with_ctc
label_smoothing: 0.1
encoder-embed-norm: True encoder-embed-norm: True
encoder-no-scale-embedding: True encoder-no-scale-embedding: True
......
...@@ -6,10 +6,9 @@ lr-scheduler: inverse_sqrt ...@@ -6,10 +6,9 @@ lr-scheduler: inverse_sqrt
warmup-init-lr: 1e-7 warmup-init-lr: 1e-7
warmup-updates: 10000 warmup-updates: 10000
lr: 2e-3 lr: 2e-3
adam_betas: (0.9,0.98) adam-betas: (0.9,0.98)
criterion: label_smoothed_cross_entropy_with_ctc criterion: label_smoothed_cross_entropy_with_ctc
label_smoothing: 0.1
encoder-embed-norm: True encoder-embed-norm: True
encoder-no-scale-embedding: True encoder-no-scale-embedding: True
......
...@@ -6,10 +6,9 @@ lr-scheduler: inverse_sqrt ...@@ -6,10 +6,9 @@ lr-scheduler: inverse_sqrt
warmup-init-lr: 1e-7 warmup-init-lr: 1e-7
warmup-updates: 10000 warmup-updates: 10000
lr: 2e-3 lr: 2e-3
adam_betas: (0.9,0.98) adam-betas: (0.9,0.98)
criterion: label_smoothed_cross_entropy_with_ctc criterion: label_smoothed_cross_entropy_with_ctc
label_smoothing: 0.1
encoder-embed-norm: True encoder-embed-norm: True
encoder-no-scale-embedding: True encoder-no-scale-embedding: True
......
...@@ -6,10 +6,9 @@ lr-scheduler: inverse_sqrt ...@@ -6,10 +6,9 @@ lr-scheduler: inverse_sqrt
warmup-init-lr: 1e-7 warmup-init-lr: 1e-7
warmup-updates: 10000 warmup-updates: 10000
lr: 2e-3 lr: 2e-3
adam_betas: (0.9,0.98) adam-betas: (0.9,0.98)
criterion: label_smoothed_cross_entropy_with_ctc criterion: label_smoothed_cross_entropy_with_ctc
label_smoothing: 0.1
encoder-embed-norm: True encoder-embed-norm: True
encoder-no-scale-embedding: True encoder-no-scale-embedding: True
......
...@@ -6,10 +6,9 @@ lr-scheduler: inverse_sqrt ...@@ -6,10 +6,9 @@ lr-scheduler: inverse_sqrt
warmup-init-lr: 1e-7 warmup-init-lr: 1e-7
warmup-updates: 10000 warmup-updates: 10000
lr: 2e-3 lr: 2e-3
adam_betas: (0.9,0.98) adam-betas: (0.9,0.98)
criterion: label_smoothed_cross_entropy_with_ctc criterion: label_smoothed_cross_entropy_with_ctc
label_smoothing: 0.1
encoder-embed-norm: True encoder-embed-norm: True
encoder-no-scale-embedding: True encoder-no-scale-embedding: True
......
...@@ -6,10 +6,9 @@ lr-scheduler: inverse_sqrt ...@@ -6,10 +6,9 @@ lr-scheduler: inverse_sqrt
warmup-init-lr: 1e-7 warmup-init-lr: 1e-7
warmup-updates: 10000 warmup-updates: 10000
lr: 2e-3 lr: 2e-3
adam_betas: (0.9,0.98) adam-betas: (0.9,0.98)
criterion: label_smoothed_cross_entropy_with_ctc criterion: label_smoothed_cross_entropy_with_ctc
label_smoothing: 0.1
encoder-embed-norm: True encoder-embed-norm: True
encoder-no-scale-embedding: True encoder-no-scale-embedding: True
......
...@@ -6,10 +6,9 @@ lr-scheduler: inverse_sqrt ...@@ -6,10 +6,9 @@ lr-scheduler: inverse_sqrt
warmup-init-lr: 1e-7 warmup-init-lr: 1e-7
warmup-updates: 10000 warmup-updates: 10000
lr: 2e-3 lr: 2e-3
adam_betas: (0.9,0.98) adam-betas: (0.9,0.98)
criterion: label_smoothed_cross_entropy_with_ctc criterion: label_smoothed_cross_entropy_with_ctc
label_smoothing: 0.1
encoder-normalize-before: True encoder-normalize-before: True
decoder-normalize-before: True decoder-normalize-before: True
......
...@@ -6,10 +6,9 @@ lr-scheduler: inverse_sqrt ...@@ -6,10 +6,9 @@ lr-scheduler: inverse_sqrt
warmup-init-lr: 1e-7 warmup-init-lr: 1e-7
warmup-updates: 8000 warmup-updates: 8000
lr: 1e-3 lr: 1e-3
adam_betas: (0.9,0.997) adam-betas: (0.9,0.997)
criterion: label_smoothed_cross_entropy criterion: label_smoothed_cross_entropy
label_smoothing: 0.1
dropout: 0.3 dropout: 0.3
attention-dropout: 0.0 attention-dropout: 0.0
......
...@@ -6,10 +6,9 @@ lr-scheduler: inverse_sqrt ...@@ -6,10 +6,9 @@ lr-scheduler: inverse_sqrt
warmup-init-lr: 1e-7 warmup-init-lr: 1e-7
warmup-updates: 8000 warmup-updates: 8000
lr: 1e-3 lr: 1e-3
adam_betas: (0.9,0.997) adam-betas: (0.9,0.997)
criterion: label_smoothed_cross_entropy_with_ctc criterion: label_smoothed_cross_entropy_with_ctc
label_smoothing: 0.1
dropout: 0.3 dropout: 0.3
attention-dropout: 0.0 attention-dropout: 0.0
......
...@@ -7,10 +7,9 @@ weight-decay: 0.0001 ...@@ -7,10 +7,9 @@ weight-decay: 0.0001
warmup-init-lr: 1e-7 warmup-init-lr: 1e-7
warmup-updates: 4000 warmup-updates: 4000
lr: 5e-4 lr: 5e-4
adam_betas: (0.9,0.98) adam-betas: (0.9,0.98)
criterion: label_smoothed_cross_entropy criterion: label_smoothed_cross_entropy
label_smoothing: 0.1
dropout: 0.3 dropout: 0.3
......
...@@ -16,5 +16,6 @@ num-workers: 8 ...@@ -16,5 +16,6 @@ num-workers: 8
no-progress-bar: True no-progress-bar: True
log-interval: 100 log-interval: 100
seed: 1 seed: 1
label-smoothing: 0.1
report-accuracy: True report-accuracy: True
skip-invalid-size-inputs-valid-test: True skip-invalid-size-inputs-valid-test: True
\ No newline at end of file
...@@ -6,10 +6,9 @@ lr-scheduler: inverse_sqrt ...@@ -6,10 +6,9 @@ lr-scheduler: inverse_sqrt
warmup-init-lr: 1e-7 warmup-init-lr: 1e-7
warmup-updates: 10000 warmup-updates: 10000
lr: 2e-3 lr: 2e-3
adam_betas: (0.9,0.98) adam-betas: (0.9,0.98)
criterion: label_smoothed_cross_entropy_with_ctc criterion: label_smoothed_cross_entropy_with_ctc
label_smoothing: 0.1
subsampling-type: conv1d subsampling-type: conv1d
subsampling-layers: 2 subsampling-layers: 2
......
...@@ -16,5 +16,6 @@ num-workers: 8 ...@@ -16,5 +16,6 @@ num-workers: 8
no-progress-bar: True no-progress-bar: True
log-interval: 100 log-interval: 100
seed: 1 seed: 1
label-smoothing: 0.1
report-accuracy: True report-accuracy: True
skip-invalid-size-inputs-valid-test: True skip-invalid-size-inputs-valid-test: True
\ No newline at end of file
...@@ -6,10 +6,9 @@ lr-scheduler: inverse_sqrt ...@@ -6,10 +6,9 @@ lr-scheduler: inverse_sqrt
warmup-init-lr: 1e-7 warmup-init-lr: 1e-7
warmup-updates: 10000 warmup-updates: 10000
lr: 2e-3 lr: 2e-3
adam_betas: (0.9,0.98) adam-betas: (0.9,0.98)
criterion: label_smoothed_cross_entropy_with_ctc criterion: label_smoothed_cross_entropy_with_ctc
label_smoothing: 0.1
subsampling-type: conv1d subsampling-type: conv1d
subsampling-layers: 2 subsampling-layers: 2
......
...@@ -9,10 +9,9 @@ lr-scheduler: inverse_sqrt ...@@ -9,10 +9,9 @@ lr-scheduler: inverse_sqrt
warmup-init-lr: 1e-7 warmup-init-lr: 1e-7
warmup-updates: 10000 warmup-updates: 10000
lr: 2e-3 lr: 2e-3
adam_betas: (0.9,0.98) adam-betas: (0.9,0.98)
criterion: label_smoothed_cross_entropy_with_ctc criterion: label_smoothed_cross_entropy_with_ctc
label_smoothing: 0.1
dropout: 0.1 dropout: 0.1
activation-fn: relu activation-fn: relu
......
...@@ -22,10 +22,9 @@ lr-scheduler: inverse_sqrt ...@@ -22,10 +22,9 @@ lr-scheduler: inverse_sqrt
warmup-init-lr: 1e-7 warmup-init-lr: 1e-7
warmup-updates: 10000 warmup-updates: 10000
lr: 2e-3 lr: 2e-3
adam_betas: (0.9,0.98) adam-betas: (0.9,0.98)
criterion: label_smoothed_cross_entropy_with_ctc criterion: label_smoothed_cross_entropy_with_ctc
label_smoothing: 0.1
dropout: 0.1 dropout: 0.1
activation-fn: relu activation-fn: relu
......
...@@ -22,10 +22,9 @@ lr-scheduler: inverse_sqrt ...@@ -22,10 +22,9 @@ lr-scheduler: inverse_sqrt
warmup-init-lr: 1e-7 warmup-init-lr: 1e-7
warmup-updates: 10000 warmup-updates: 10000
lr: 2e-3 lr: 2e-3
adam_betas: (0.9,0.98) adam-betas: (0.9,0.98)
criterion: label_smoothed_cross_entropy_with_ctc criterion: label_smoothed_cross_entropy_with_ctc
label_smoothing: 0.1
dropout: 0.1 dropout: 0.1
activation-fn: relu activation-fn: relu
......
...@@ -22,10 +22,9 @@ lr-scheduler: inverse_sqrt ...@@ -22,10 +22,9 @@ lr-scheduler: inverse_sqrt
warmup-init-lr: 1e-7 warmup-init-lr: 1e-7
warmup-updates: 10000 warmup-updates: 10000
lr: 2e-3 lr: 2e-3
adam_betas: (0.9,0.98) adam-betas: (0.9,0.98)
criterion: label_smoothed_cross_entropy_with_ctc criterion: label_smoothed_cross_entropy_with_ctc
label_smoothing: 0.1
dropout: 0.1 dropout: 0.1
activation-fn: relu activation-fn: relu
......
...@@ -33,10 +33,9 @@ lr-scheduler: inverse_sqrt ...@@ -33,10 +33,9 @@ lr-scheduler: inverse_sqrt
warmup-init-lr: 1e-7 warmup-init-lr: 1e-7
warmup-updates: 10000 warmup-updates: 10000
lr: 2e-3 lr: 2e-3
adam_betas: (0.9,0.98) adam-betas: (0.9,0.98)
criterion: label_smoothed_cross_entropy_with_ctc criterion: label_smoothed_cross_entropy_with_ctc
label_smoothing: 0.1
dropout: 0.1 dropout: 0.1
activation-fn: relu activation-fn: relu
......
...@@ -22,10 +22,9 @@ lr-scheduler: inverse_sqrt ...@@ -22,10 +22,9 @@ lr-scheduler: inverse_sqrt
warmup-init-lr: 1e-7 warmup-init-lr: 1e-7
warmup-updates: 10000 warmup-updates: 10000
lr: 2e-3 lr: 2e-3
adam_betas: (0.9,0.98) adam-betas: (0.9,0.98)
criterion: label_smoothed_cross_entropy_with_ctc criterion: label_smoothed_cross_entropy_with_ctc
label_smoothing: 0.1
dropout: 0.15 dropout: 0.15
activation-fn: relu activation-fn: relu
......
...@@ -5,7 +5,7 @@ lr-scheduler: inverse_sqrt ...@@ -5,7 +5,7 @@ lr-scheduler: inverse_sqrt
warmup-init-lr: 1e-7 warmup-init-lr: 1e-7
warmup-updates: 10000 warmup-updates: 10000
lr: 2e-3 lr: 2e-3
adam_betas: (0.9,0.98) adam-betas: (0.9,0.98)
criterion: ctc criterion: ctc
zero_infinity: True zero_infinity: True
......
...@@ -6,10 +6,9 @@ lr-scheduler: inverse_sqrt ...@@ -6,10 +6,9 @@ lr-scheduler: inverse_sqrt
warmup-init-lr: 1e-7 warmup-init-lr: 1e-7
warmup-updates: 8000 warmup-updates: 8000
lr: 1e-3 lr: 1e-3
adam_betas: (0.9,0.997) adam-betas: (0.9,0.997)
criterion: label_smoothed_cross_entropy criterion: label_smoothed_cross_entropy
label_smoothing: 0.1
dropout: 0.1 dropout: 0.1
attention-dropout: 0.1 attention-dropout: 0.1
......
...@@ -15,5 +15,6 @@ num-workers: 8 ...@@ -15,5 +15,6 @@ num-workers: 8
no-progress-bar: True no-progress-bar: True
log-interval: 100 log-interval: 100
seed: 1 seed: 1
label-smoothing: 0.1
report-accuracy: True report-accuracy: True
skip-invalid-size-inputs-valid-test: True skip-invalid-size-inputs-valid-test: True
\ No newline at end of file
...@@ -6,10 +6,9 @@ lr-scheduler: inverse_sqrt ...@@ -6,10 +6,9 @@ lr-scheduler: inverse_sqrt
warmup-init-lr: 1e-7 warmup-init-lr: 1e-7
warmup-updates: 1000 warmup-updates: 1000
lr: 2e-4 lr: 2e-4
adam_betas: (0.9,0.997) adam-betas: (0.9,0.997)
criterion: label_smoothed_cross_entropy criterion: label_smoothed_cross_entropy
label_smoothing: 0.1
dropout: 0.1 dropout: 0.1
attention-dropout: 0.1 attention-dropout: 0.1
......
...@@ -6,10 +6,9 @@ lr-scheduler: inverse_sqrt ...@@ -6,10 +6,9 @@ lr-scheduler: inverse_sqrt
warmup-init-lr: 1e-7 warmup-init-lr: 1e-7
warmup-updates: 8000 warmup-updates: 8000
lr: 1e-3 lr: 1e-3
adam_betas: (0.9,0.997) adam-betas: (0.9,0.997)
criterion: label_smoothed_cross_entropy criterion: label_smoothed_cross_entropy
label_smoothing: 0.1
dropout: 0.1 dropout: 0.1
attention-dropout: 0.1 attention-dropout: 0.1
......
...@@ -6,10 +6,9 @@ lr-scheduler: inverse_sqrt ...@@ -6,10 +6,9 @@ lr-scheduler: inverse_sqrt
warmup-init-lr: 1e-7 warmup-init-lr: 1e-7
warmup-updates: 10000 warmup-updates: 10000
lr: 2e-3 lr: 2e-3
adam_betas: (0.9,0.98) adam-betas: (0.9,0.98)
criterion: label_smoothed_cross_entropy_with_ctc criterion: label_smoothed_cross_entropy_with_ctc
label_smoothing: 0.1
subsampling-type: conv1d subsampling-type: conv1d
subsampling-layers: 2 subsampling-layers: 2
......
...@@ -17,5 +17,6 @@ num-workers: 8 ...@@ -17,5 +17,6 @@ num-workers: 8
no-progress-bar: True no-progress-bar: True
log-interval: 100 log-interval: 100
seed: 1 seed: 1
label-smoothing: 0.1
report-accuracy: True report-accuracy: True
skip-invalid-size-inputs-valid-test: True skip-invalid-size-inputs-valid-test: True
\ No newline at end of file
...@@ -13,10 +13,9 @@ lr-scheduler: inverse_sqrt ...@@ -13,10 +13,9 @@ lr-scheduler: inverse_sqrt
warmup-init-lr: 1e-7 warmup-init-lr: 1e-7
warmup-updates: 1000 warmup-updates: 1000
lr: 1e-3 lr: 1e-3
adam_betas: (0.9,0.98) adam-betas: (0.9,0.98)
criterion: join_speech_and_text_loss criterion: join_speech_and_text_loss
label_smoothing: 0.1
encoder-normalize-before: True encoder-normalize-before: True
decoder-normalize-before: True decoder-normalize-before: True
......
...@@ -9,10 +9,9 @@ lr-scheduler: inverse_sqrt ...@@ -9,10 +9,9 @@ lr-scheduler: inverse_sqrt
warmup-init-lr: 1e-7 warmup-init-lr: 1e-7
warmup-updates: 10000 warmup-updates: 10000
lr: 2e-3 lr: 2e-3
adam_betas: (0.9,0.98) adam-betas: (0.9,0.98)
criterion: label_smoothed_cross_entropy_with_ctc criterion: label_smoothed_cross_entropy_with_ctc
label_smoothing: 0.1
dropout: 0.1 dropout: 0.1
activation-fn: relu activation-fn: relu
......
...@@ -22,10 +22,9 @@ lr-scheduler: inverse_sqrt ...@@ -22,10 +22,9 @@ lr-scheduler: inverse_sqrt
warmup-init-lr: 1e-7 warmup-init-lr: 1e-7
warmup-updates: 10000 warmup-updates: 10000
lr: 2e-3 lr: 2e-3
adam_betas: (0.9,0.98) adam-betas: (0.9,0.98)
criterion: label_smoothed_cross_entropy_with_ctc criterion: label_smoothed_cross_entropy_with_ctc
label_smoothing: 0.1
dropout: 0.1 dropout: 0.1
activation-fn: relu activation-fn: relu
......
...@@ -22,10 +22,9 @@ lr-scheduler: inverse_sqrt ...@@ -22,10 +22,9 @@ lr-scheduler: inverse_sqrt
warmup-init-lr: 1e-7 warmup-init-lr: 1e-7
warmup-updates: 10000 warmup-updates: 10000
lr: 2e-3 lr: 2e-3
adam_betas: (0.9,0.98) adam-betas: (0.9,0.98)
criterion: label_smoothed_cross_entropy_with_ctc criterion: label_smoothed_cross_entropy_with_ctc
label_smoothing: 0.1
dropout: 0.1 dropout: 0.1
activation-fn: relu activation-fn: relu
......
...@@ -26,10 +26,9 @@ lr-scheduler: inverse_sqrt ...@@ -26,10 +26,9 @@ lr-scheduler: inverse_sqrt
warmup-init-lr: 1e-7 warmup-init-lr: 1e-7
warmup-updates: 10000 warmup-updates: 10000
lr: 2e-3 lr: 2e-3
adam_betas: (0.9,0.98) adam-betas: (0.9,0.98)
criterion: label_smoothed_cross_entropy_with_ctc criterion: label_smoothed_cross_entropy_with_ctc
label_smoothing: 0.1
dropout: 0.1 dropout: 0.1
activation-fn: relu activation-fn: relu
......
...@@ -6,10 +6,9 @@ lr-scheduler: inverse_sqrt ...@@ -6,10 +6,9 @@ lr-scheduler: inverse_sqrt
warmup-init-lr: 1e-7 warmup-init-lr: 1e-7
warmup-updates: 10000 warmup-updates: 10000
lr: 2e-3 lr: 2e-3
adam_betas: (0.9,0.98) adam-betas: (0.9,0.98)
criterion: label_smoothed_cross_entropy_with_ctc criterion: label_smoothed_cross_entropy_with_ctc
label_smoothing: 0.1
encoder-normalize-before: True encoder-normalize-before: True
decoder-normalize-before: True decoder-normalize-before: True
......
...@@ -6,10 +6,9 @@ lr-scheduler: inverse_sqrt ...@@ -6,10 +6,9 @@ lr-scheduler: inverse_sqrt
warmup-init-lr: 1e-7 warmup-init-lr: 1e-7
warmup-updates: 10000 warmup-updates: 10000
lr: 1e-3 lr: 1e-3
adam_betas: (0.9,0.98) adam-betas: (0.9,0.98)
criterion: label_smoothed_cross_entropy_with_ctc criterion: label_smoothed_cross_entropy_with_ctc
label_smoothing: 0.1
encoder-normalize-before: True encoder-normalize-before: True
decoder-normalize-before: True decoder-normalize-before: True
......
...@@ -6,10 +6,9 @@ lr-scheduler: inverse_sqrt ...@@ -6,10 +6,9 @@ lr-scheduler: inverse_sqrt
warmup-init-lr: 1e-7 warmup-init-lr: 1e-7
warmup-updates: 10000 warmup-updates: 10000
lr: 1e-3 lr: 1e-3
adam_betas: (0.9,0.98) adam-betas: (0.9,0.98)
criterion: label_smoothed_cross_entropy_with_ctc criterion: label_smoothed_cross_entropy_with_ctc
label_smoothing: 0.1
encoder-normalize-before: True encoder-normalize-before: True
decoder-normalize-before: True decoder-normalize-before: True
......
...@@ -6,10 +6,9 @@ lr-scheduler: inverse_sqrt ...@@ -6,10 +6,9 @@ lr-scheduler: inverse_sqrt
warmup-init-lr: 1e-7 warmup-init-lr: 1e-7
warmup-updates: 1000 warmup-updates: 1000
lr: 5e-4 lr: 5e-4
adam_betas: (0.9,0.98) adam-betas: (0.9,0.98)
criterion: label_smoothed_cross_entropy_with_ctc criterion: label_smoothed_cross_entropy_with_ctc
label_smoothing: 0.1
encoder-normalize-before: True encoder-normalize-before: True
decoder-normalize-before: True decoder-normalize-before: True
......
...@@ -6,10 +6,9 @@ lr-scheduler: inverse_sqrt ...@@ -6,10 +6,9 @@ lr-scheduler: inverse_sqrt
warmup-init-lr: 1e-7 warmup-init-lr: 1e-7
warmup-updates: 10000 warmup-updates: 10000
lr: 2e-3 lr: 2e-3
adam_betas: (0.9,0.98) adam-betas: (0.9,0.98)
criterion: label_smoothed_cross_entropy_with_ctc criterion: label_smoothed_cross_entropy_with_ctc
label_smoothing: 0.1
encoder-normalize-before: True encoder-normalize-before: True
decoder-normalize-before: True decoder-normalize-before: True
......
...@@ -7,10 +7,9 @@ warmup-init-lr: 1e-7 ...@@ -7,10 +7,9 @@ warmup-init-lr: 1e-7
warmup-updates: 10000 warmup-updates: 10000
weight-decay: 1e-6 weight-decay: 1e-6
lr: 2e-3 lr: 2e-3
adam_betas: (0.9,0.98) adam-betas: (0.9,0.98)
criterion: label_smoothed_cross_entropy_with_ctc criterion: label_smoothed_cross_entropy_with_ctc
label_smoothing: 0.1
subsampling-type: conv1d subsampling-type: conv1d
subsampling-layers: 2 subsampling-layers: 2
......
...@@ -15,6 +15,7 @@ num-workers: 8 ...@@ -15,6 +15,7 @@ num-workers: 8
no-progress-bar: True no-progress-bar: True
log-interval: 100 log-interval: 100
seed: 1 seed: 1
label-smoothing: 0.1
report-accuracy: True report-accuracy: True
skip-invalid-size-inputs-valid-test: True skip-invalid-size-inputs-valid-test: True
post-process: sentencepiece post-process: sentencepiece
\ No newline at end of file
...@@ -7,10 +7,9 @@ warmup-init-lr: 1e-7 ...@@ -7,10 +7,9 @@ warmup-init-lr: 1e-7
warmup-updates: 10000 warmup-updates: 10000
weight-decay: 1e-6 weight-decay: 1e-6
lr: 2e-3 lr: 2e-3
adam_betas: (0.9,0.98) adam-betas: (0.9,0.98)
criterion: label_smoothed_cross_entropy_with_ctc criterion: label_smoothed_cross_entropy_with_ctc
label_smoothing: 0.1
subsampling-type: conv1d subsampling-type: conv1d
subsampling-layers: 2 subsampling-layers: 2
......
...@@ -9,10 +9,9 @@ lr-scheduler: inverse_sqrt ...@@ -9,10 +9,9 @@ lr-scheduler: inverse_sqrt
warmup-init-lr: 1e-7 warmup-init-lr: 1e-7
warmup-updates: 10000 warmup-updates: 10000
lr: 2e-3 lr: 2e-3
adam_betas: (0.9,0.98) adam-betas: (0.9,0.98)
criterion: label_smoothed_cross_entropy_with_ctc criterion: label_smoothed_cross_entropy_with_ctc
label_smoothing: 0.1
dropout: 0.1 dropout: 0.1
activation-fn: relu activation-fn: relu
......
...@@ -22,10 +22,9 @@ lr-scheduler: inverse_sqrt ...@@ -22,10 +22,9 @@ lr-scheduler: inverse_sqrt
warmup-init-lr: 1e-7 warmup-init-lr: 1e-7
warmup-updates: 10000 warmup-updates: 10000
lr: 2e-3 lr: 2e-3
adam_betas: (0.9,0.98) adam-betas: (0.9,0.98)
criterion: label_smoothed_cross_entropy_with_ctc criterion: label_smoothed_cross_entropy_with_ctc
label_smoothing: 0.1
dropout: 0.1 dropout: 0.1
activation-fn: relu activation-fn: relu
......
...@@ -22,10 +22,9 @@ lr-scheduler: inverse_sqrt ...@@ -22,10 +22,9 @@ lr-scheduler: inverse_sqrt
warmup-init-lr: 1e-7 warmup-init-lr: 1e-7
warmup-updates: 10000 warmup-updates: 10000
lr: 2e-3 lr: 2e-3
adam_betas: (0.9,0.98) adam-betas: (0.9,0.98)
criterion: label_smoothed_cross_entropy_with_ctc criterion: label_smoothed_cross_entropy_with_ctc
label_smoothing: 0.1
dropout: 0.1 dropout: 0.1
activation-fn: relu activation-fn: relu
......
...@@ -22,10 +22,9 @@ lr-scheduler: inverse_sqrt ...@@ -22,10 +22,9 @@ lr-scheduler: inverse_sqrt
warmup-init-lr: 1e-7 warmup-init-lr: 1e-7
warmup-updates: 10000 warmup-updates: 10000
lr: 2e-3 lr: 2e-3
adam_betas: (0.9,0.98) adam-betas: (0.9,0.98)
criterion: label_smoothed_cross_entropy_with_ctc criterion: label_smoothed_cross_entropy_with_ctc
label_smoothing: 0.1
dropout: 0.1 dropout: 0.1
activation-fn: relu activation-fn: relu
......
...@@ -6,7 +6,7 @@ warmup-init-lr: 1e-7 ...@@ -6,7 +6,7 @@ warmup-init-lr: 1e-7
warmup-updates: 10000 warmup-updates: 10000
weight-decay: 1e-6 weight-decay: 1e-6
lr: 2e-3 lr: 2e-3
adam_betas: (0.9,0.98) adam-betas: (0.9,0.98)
criterion: ctc criterion: ctc
zero_infinity: True zero_infinity: True
......
...@@ -7,10 +7,9 @@ warmup-init-lr: 1e-7 ...@@ -7,10 +7,9 @@ warmup-init-lr: 1e-7
warmup-updates: 8000 warmup-updates: 8000
weight-decay: 1e-6 weight-decay: 1e-6
lr: 1e-3 lr: 1e-3
adam_betas: (0.9,0.997) adam-betas: (0.9,0.997)
criterion: label_smoothed_cross_entropy criterion: label_smoothed_cross_entropy
label_smoothing: 0.1
dropout: 0.1 dropout: 0.1
attention-dropout: 0.1 attention-dropout: 0.1
......
...@@ -15,5 +15,6 @@ num-workers: 8 ...@@ -15,5 +15,6 @@ num-workers: 8
no-progress-bar: True no-progress-bar: True
log-interval: 100 log-interval: 100
seed: 1 seed: 1
label-smoothing: 0.1
report-accuracy: True report-accuracy: True
skip-invalid-size-inputs-valid-test: True skip-invalid-size-inputs-valid-test: True
\ No newline at end of file
...@@ -6,10 +6,9 @@ lr-scheduler: inverse_sqrt ...@@ -6,10 +6,9 @@ lr-scheduler: inverse_sqrt
warmup-init-lr: 1e-7 warmup-init-lr: 1e-7
warmup-updates: 8000 warmup-updates: 8000
lr: 1e-3 lr: 1e-3
adam_betas: (0.9,0.997) adam-betas: (0.9,0.997)
criterion: label_smoothed_cross_entropy criterion: label_smoothed_cross_entropy
label_smoothing: 0.1
dropout: 0.1 dropout: 0.1
attention-dropout: 0.1 attention-dropout: 0.1
......
...@@ -7,10 +7,9 @@ warmup-init-lr: 1e-7 ...@@ -7,10 +7,9 @@ warmup-init-lr: 1e-7
warmup-updates: 10000 warmup-updates: 10000
weight-decay: 1e-6 weight-decay: 1e-6
lr: 2e-3 lr: 2e-3
adam_betas: (0.9,0.98) adam-betas: (0.9,0.98)
criterion: label_smoothed_cross_entropy_with_ctc criterion: label_smoothed_cross_entropy_with_ctc
label_smoothing: 0.1
subsampling-type: conv1d subsampling-type: conv1d
subsampling-layers: 2 subsampling-layers: 2
......
...@@ -15,6 +15,7 @@ num-workers: 8 ...@@ -15,6 +15,7 @@ num-workers: 8
no-progress-bar: True no-progress-bar: True
log-interval: 100 log-interval: 100
seed: 1 seed: 1
label-smoothing: 0.1
report-accuracy: True report-accuracy: True
skip-invalid-size-inputs-valid-test: True skip-invalid-size-inputs-valid-test: True
post-process: sentencepiece post-process: sentencepiece
\ No newline at end of file
...@@ -9,10 +9,9 @@ lr-scheduler: inverse_sqrt ...@@ -9,10 +9,9 @@ lr-scheduler: inverse_sqrt
warmup-init-lr: 1e-7 warmup-init-lr: 1e-7
warmup-updates: 10000 warmup-updates: 10000
lr: 2e-3 lr: 2e-3
adam_betas: (0.9,0.98) adam-betas: (0.9,0.98)
criterion: label_smoothed_cross_entropy_with_ctc criterion: label_smoothed_cross_entropy_with_ctc
label_smoothing: 0.1
dropout: 0.1 dropout: 0.1
activation-fn: relu activation-fn: relu
......
...@@ -22,10 +22,9 @@ lr-scheduler: inverse_sqrt ...@@ -22,10 +22,9 @@ lr-scheduler: inverse_sqrt
warmup-init-lr: 1e-7 warmup-init-lr: 1e-7
warmup-updates: 10000 warmup-updates: 10000
lr: 2e-3 lr: 2e-3
adam_betas: (0.9,0.98) adam-betas: (0.9,0.98)
criterion: label_smoothed_cross_entropy_with_ctc criterion: label_smoothed_cross_entropy_with_ctc
label_smoothing: 0.1
dropout: 0.1 dropout: 0.1
activation-fn: relu activation-fn: relu
......
...@@ -22,10 +22,9 @@ lr-scheduler: inverse_sqrt ...@@ -22,10 +22,9 @@ lr-scheduler: inverse_sqrt
warmup-init-lr: 1e-7 warmup-init-lr: 1e-7
warmup-updates: 10000 warmup-updates: 10000
lr: 2e-3 lr: 2e-3
adam_betas: (0.9,0.98) adam-betas: (0.9,0.98)
criterion: label_smoothed_cross_entropy_with_ctc criterion: label_smoothed_cross_entropy_with_ctc
label_smoothing: 0.1
dropout: 0.1 dropout: 0.1
activation-fn: relu activation-fn: relu
......
...@@ -22,10 +22,9 @@ lr-scheduler: inverse_sqrt ...@@ -22,10 +22,9 @@ lr-scheduler: inverse_sqrt
warmup-init-lr: 1e-7 warmup-init-lr: 1e-7
warmup-updates: 10000 warmup-updates: 10000
lr: 2e-3 lr: 2e-3
adam_betas: (0.9,0.98) adam-betas: (0.9,0.98)
criterion: label_smoothed_cross_entropy_with_ctc criterion: label_smoothed_cross_entropy_with_ctc
label_smoothing: 0.1
dropout: 0.1 dropout: 0.1
activation-fn: relu activation-fn: relu
......
...@@ -6,10 +6,9 @@ lr-scheduler: inverse_sqrt ...@@ -6,10 +6,9 @@ lr-scheduler: inverse_sqrt
warmup-init-lr: 1e-7 warmup-init-lr: 1e-7
warmup-updates: 10000 warmup-updates: 10000
lr: 2e-3 lr: 2e-3
adam_betas: (0.9,0.98) adam-betas: (0.9,0.98)
criterion: label_smoothed_cross_entropy_with_ctc criterion: label_smoothed_cross_entropy_with_ctc
label_smoothing: 0.1
encoder-normalize-before: True encoder-normalize-before: True
decoder-normalize-before: True decoder-normalize-before: True
......
...@@ -6,10 +6,9 @@ lr-scheduler: inverse_sqrt ...@@ -6,10 +6,9 @@ lr-scheduler: inverse_sqrt
warmup-init-lr: 1e-7 warmup-init-lr: 1e-7
warmup-updates: 10000 warmup-updates: 10000
lr: 1e-3 lr: 1e-3
adam_betas: (0.9,0.98) adam-betas: (0.9,0.98)
criterion: label_smoothed_cross_entropy_with_ctc criterion: label_smoothed_cross_entropy_with_ctc
label_smoothing: 0.1
encoder-normalize-before: True encoder-normalize-before: True
decoder-normalize-before: True decoder-normalize-before: True
......
...@@ -6,10 +6,9 @@ lr-scheduler: inverse_sqrt ...@@ -6,10 +6,9 @@ lr-scheduler: inverse_sqrt
warmup-init-lr: 1e-7 warmup-init-lr: 1e-7
warmup-updates: 10000 warmup-updates: 10000
lr: 1e-3 lr: 1e-3
adam_betas: (0.9,0.98) adam-betas: (0.9,0.98)
criterion: label_smoothed_cross_entropy_with_ctc criterion: label_smoothed_cross_entropy_with_ctc
label_smoothing: 0.1
encoder-normalize-before: True encoder-normalize-before: True
decoder-normalize-before: True decoder-normalize-before: True
......
...@@ -6,10 +6,9 @@ lr-scheduler: inverse_sqrt ...@@ -6,10 +6,9 @@ lr-scheduler: inverse_sqrt
warmup-init-lr: 1e-7 warmup-init-lr: 1e-7
warmup-updates: 10000 warmup-updates: 10000
lr: 2e-3 lr: 2e-3
adam_betas: (0.9,0.98) adam-betas: (0.9,0.98)
criterion: label_smoothed_cross_entropy_with_ctc criterion: label_smoothed_cross_entropy_with_ctc
label_smoothing: 0.1
encoder-normalize-before: True encoder-normalize-before: True
decoder-normalize-before: True decoder-normalize-before: True
......
...@@ -8,7 +8,7 @@ warmup-init-lr: 1e-7 ...@@ -8,7 +8,7 @@ warmup-init-lr: 1e-7
warmup-updates: 10000 warmup-updates: 10000
weight-decay: 1e-6 weight-decay: 1e-6
lr: 0.0015 lr: 0.0015
adam_betas: (0.9,0.98) adam-betas: (0.9,0.98)
criterion: ctc criterion: ctc
post-process: sentencepiece post-process: sentencepiece
......
...@@ -35,7 +35,7 @@ warmup-init-lr: 1e-7 ...@@ -35,7 +35,7 @@ warmup-init-lr: 1e-7
warmup-updates: 10000 warmup-updates: 10000
weight-decay: 1e-6 weight-decay: 1e-6
lr: 0.0015 lr: 0.0015
adam_betas: (0.9,0.98) adam-betas: (0.9,0.98)
criterion: ctc criterion: ctc
post-process: sentencepiece post-process: sentencepiece
......
...@@ -30,7 +30,7 @@ warmup-init-lr: 1e-7 ...@@ -30,7 +30,7 @@ warmup-init-lr: 1e-7
warmup-updates: 10000 warmup-updates: 10000
weight-decay: 1e-6 weight-decay: 1e-6
lr: 0.002 lr: 0.002
adam_betas: (0.9,0.98) adam-betas: (0.9,0.98)
criterion: ctc criterion: ctc
ctc-weight: 1.0 ctc-weight: 1.0
......
...@@ -6,10 +6,9 @@ lr-scheduler: inverse_sqrt ...@@ -6,10 +6,9 @@ lr-scheduler: inverse_sqrt
warmup-init-lr: 1e-7 warmup-init-lr: 1e-7
warmup-updates: 10000 warmup-updates: 10000
lr: 2e-3 lr: 2e-3
adam_betas: (0.9,0.98) adam-betas: (0.9,0.98)
criterion: label_smoothed_cross_entropy_with_ctc criterion: label_smoothed_cross_entropy_with_ctc
label_smoothing: 0.1
subsampling-type: conv1d subsampling-type: conv1d
subsampling-layers: 2 subsampling-layers: 2
......
...@@ -27,5 +27,6 @@ num-workers: 8 ...@@ -27,5 +27,6 @@ num-workers: 8
no-progress-bar: True no-progress-bar: True
log-interval: 100 log-interval: 100
seed: 1 seed: 1
label-smoothing: 0.1
report-accuracy: True report-accuracy: True
skip-invalid-size-inputs-valid-test: True skip-invalid-size-inputs-valid-test: True
\ No newline at end of file
...@@ -6,10 +6,9 @@ lr-scheduler: inverse_sqrt ...@@ -6,10 +6,9 @@ lr-scheduler: inverse_sqrt
warmup-init-lr: 1e-7 warmup-init-lr: 1e-7
warmup-updates: 10000 warmup-updates: 10000
lr: 0.0014 lr: 0.0014
adam_betas: (0.9,0.98) adam-betas: (0.9,0.98)
criterion: label_smoothed_cross_entropy_with_ctc criterion: label_smoothed_cross_entropy_with_ctc
label_smoothing: 0.1
subsampling-type: conv1d subsampling-type: conv1d
subsampling-layers: 2 subsampling-layers: 2
......
...@@ -28,7 +28,7 @@ lr-scheduler: inverse_sqrt ...@@ -28,7 +28,7 @@ lr-scheduler: inverse_sqrt
warmup-init-lr: 1e-7 warmup-init-lr: 1e-7
warmup-updates: 10000 warmup-updates: 10000
lr: 0.002 lr: 0.002
adam_betas: (0.9,0.98) adam-betas: (0.9,0.98)
criterion: ctc criterion: ctc
ctc-weight: 1.0 ctc-weight: 1.0
......
...@@ -7,7 +7,7 @@ lr-scheduler: inverse_sqrt ...@@ -7,7 +7,7 @@ lr-scheduler: inverse_sqrt
warmup-init-lr: 1e-7 warmup-init-lr: 1e-7
warmup-updates: 10000 warmup-updates: 10000
lr: 0.002 lr: 0.002
adam_betas: (0.9,0.98) adam-betas: (0.9,0.98)
criterion: ctc criterion: ctc
ctc-weight: 1.0 ctc-weight: 1.0
......
...@@ -29,7 +29,7 @@ warmup-init-lr: 1e-7 ...@@ -29,7 +29,7 @@ warmup-init-lr: 1e-7
warmup-updates: 10000 warmup-updates: 10000
weight-decay: 1e-6 weight-decay: 1e-6
lr: 0.002 lr: 0.002
adam_betas: (0.9,0.98) adam-betas: (0.9,0.98)
criterion: ctc criterion: ctc
ctc-weight: 1.0 ctc-weight: 1.0
......
...@@ -7,10 +7,9 @@ lr-scheduler: inverse_sqrt ...@@ -7,10 +7,9 @@ lr-scheduler: inverse_sqrt
warmup-init-lr: 1e-7 warmup-init-lr: 1e-7
warmup-updates: 10000 warmup-updates: 10000
lr: 2e-3 lr: 2e-3
adam_betas: (0.9,0.98) adam-betas: (0.9,0.98)
criterion: label_smoothed_cross_entropy_with_ctc criterion: label_smoothed_cross_entropy_with_ctc
label_smoothing: 0.1
dropout: 0.1 dropout: 0.1
activation-fn: relu activation-fn: relu
......
...@@ -23,10 +23,9 @@ lr-scheduler: inverse_sqrt ...@@ -23,10 +23,9 @@ lr-scheduler: inverse_sqrt
warmup-init-lr: 1e-7 warmup-init-lr: 1e-7
warmup-updates: 10000 warmup-updates: 10000
lr: 2e-3 lr: 2e-3
adam_betas: (0.9,0.98) adam-betas: (0.9,0.98)
criterion: label_smoothed_cross_entropy_with_ctc criterion: label_smoothed_cross_entropy_with_ctc
label_smoothing: 0.1
dropout: 0.1 dropout: 0.1
activation-fn: relu activation-fn: relu
......
...@@ -23,10 +23,9 @@ lr-scheduler: inverse_sqrt ...@@ -23,10 +23,9 @@ lr-scheduler: inverse_sqrt
warmup-init-lr: 1e-7 warmup-init-lr: 1e-7
warmup-updates: 10000 warmup-updates: 10000
lr: 2e-3 lr: 2e-3
adam_betas: (0.9,0.98) adam-betas: (0.9,0.98)
criterion: label_smoothed_cross_entropy_with_ctc criterion: label_smoothed_cross_entropy_with_ctc
label_smoothing: 0.1
dropout: 0.1 dropout: 0.1
activation-fn: relu activation-fn: relu
......
...@@ -24,10 +24,9 @@ lr-scheduler: inverse_sqrt ...@@ -24,10 +24,9 @@ lr-scheduler: inverse_sqrt
warmup-init-lr: 1e-7 warmup-init-lr: 1e-7
warmup-updates: 10000 warmup-updates: 10000
lr: 2e-3 lr: 2e-3
adam_betas: (0.9,0.98) adam-betas: (0.9,0.98)
criterion: label_smoothed_cross_entropy_with_ctc criterion: label_smoothed_cross_entropy_with_ctc
label_smoothing: 0.1
dropout: 0.1 dropout: 0.1
activation-fn: relu activation-fn: relu
......
...@@ -23,10 +23,9 @@ lr-scheduler: inverse_sqrt ...@@ -23,10 +23,9 @@ lr-scheduler: inverse_sqrt
warmup-init-lr: 1e-7 warmup-init-lr: 1e-7
warmup-updates: 10000 warmup-updates: 10000
lr: 2e-3 lr: 2e-3
adam_betas: (0.9,0.98) adam-betas: (0.9,0.98)
criterion: label_smoothed_cross_entropy_with_ctc criterion: label_smoothed_cross_entropy_with_ctc
label_smoothing: 0.1
dropout: 0.1 dropout: 0.1
activation-fn: relu activation-fn: relu
......
...@@ -23,10 +23,9 @@ lr-scheduler: inverse_sqrt ...@@ -23,10 +23,9 @@ lr-scheduler: inverse_sqrt
warmup-init-lr: 1e-7 warmup-init-lr: 1e-7
warmup-updates: 10000 warmup-updates: 10000
lr: 2e-3 lr: 2e-3
adam_betas: (0.9,0.98) adam-betas: (0.9,0.98)
criterion: label_smoothed_cross_entropy_with_ctc criterion: label_smoothed_cross_entropy_with_ctc
label_smoothing: 0.1
dropout: 0.1 dropout: 0.1
activation-fn: relu activation-fn: relu
......
...@@ -23,10 +23,9 @@ lr-scheduler: inverse_sqrt ...@@ -23,10 +23,9 @@ lr-scheduler: inverse_sqrt
warmup-init-lr: 1e-7 warmup-init-lr: 1e-7
warmup-updates: 10000 warmup-updates: 10000
lr: 2e-3 lr: 2e-3
adam_betas: (0.9,0.98) adam-betas: (0.9,0.98)
criterion: label_smoothed_cross_entropy_with_ctc criterion: label_smoothed_cross_entropy_with_ctc
label_smoothing: 0.1
dropout: 0.1 dropout: 0.1
activation-fn: relu activation-fn: relu
......
...@@ -23,10 +23,9 @@ lr-scheduler: inverse_sqrt ...@@ -23,10 +23,9 @@ lr-scheduler: inverse_sqrt
warmup-init-lr: 1e-7 warmup-init-lr: 1e-7
warmup-updates: 10000 warmup-updates: 10000
lr: 2e-3 lr: 2e-3
adam_betas: (0.9,0.98) adam-betas: (0.9,0.98)
criterion: label_smoothed_cross_entropy_with_ctc criterion: label_smoothed_cross_entropy_with_ctc
label_smoothing: 0.1
dropout: 0.1 dropout: 0.1
activation-fn: relu activation-fn: relu
......
...@@ -24,10 +24,9 @@ lr-scheduler: inverse_sqrt ...@@ -24,10 +24,9 @@ lr-scheduler: inverse_sqrt
warmup-init-lr: 1e-7 warmup-init-lr: 1e-7
warmup-updates: 10000 warmup-updates: 10000
lr: 2e-3 lr: 2e-3
adam_betas: (0.9,0.98) adam-betas: (0.9,0.98)
criterion: label_smoothed_cross_entropy_with_ctc criterion: label_smoothed_cross_entropy_with_ctc
label_smoothing: 0.1
dropout: 0.1 dropout: 0.1
activation-fn: relu activation-fn: relu
......
...@@ -24,10 +24,9 @@ lr-scheduler: inverse_sqrt ...@@ -24,10 +24,9 @@ lr-scheduler: inverse_sqrt
warmup-init-lr: 1e-7 warmup-init-lr: 1e-7
warmup-updates: 10000 warmup-updates: 10000
lr: 2e-3 lr: 2e-3
adam_betas: (0.9,0.98) adam-betas: (0.9,0.98)
criterion: label_smoothed_cross_entropy_with_ctc criterion: label_smoothed_cross_entropy_with_ctc
label_smoothing: 0.1
dropout: 0.1 dropout: 0.1
activation-fn: relu activation-fn: relu
......
...@@ -11,10 +11,9 @@ lr-scheduler: inverse_sqrt ...@@ -11,10 +11,9 @@ lr-scheduler: inverse_sqrt
warmup-init-lr: 1e-7 warmup-init-lr: 1e-7
warmup-updates: 10000 warmup-updates: 10000
lr: 2e-3 lr: 2e-3
adam_betas: (0.9,0.98) adam-betas: (0.9,0.98)
criterion: label_smoothed_cross_entropy_with_ctc criterion: label_smoothed_cross_entropy_with_ctc
label_smoothing: 0.1
dropout: 0.15 dropout: 0.15
activation-fn: relu activation-fn: relu
......
...@@ -23,10 +23,9 @@ lr-scheduler: inverse_sqrt ...@@ -23,10 +23,9 @@ lr-scheduler: inverse_sqrt
warmup-init-lr: 1e-7 warmup-init-lr: 1e-7
warmup-updates: 10000 warmup-updates: 10000
lr: 0.0014 lr: 0.0014
adam_betas: (0.9,0.98) adam-betas: (0.9,0.98)
criterion: label_smoothed_cross_entropy_with_ctc criterion: label_smoothed_cross_entropy_with_ctc
label_smoothing: 0.1
dropout: 0.15 dropout: 0.15
activation-fn: relu activation-fn: relu
......
...@@ -23,10 +23,9 @@ lr-scheduler: inverse_sqrt ...@@ -23,10 +23,9 @@ lr-scheduler: inverse_sqrt
warmup-init-lr: 1e-7 warmup-init-lr: 1e-7
warmup-updates: 10000 warmup-updates: 10000
lr: 0.0014 lr: 0.0014
adam_betas: (0.9,0.98) adam-betas: (0.9,0.98)
criterion: label_smoothed_cross_entropy_with_ctc criterion: label_smoothed_cross_entropy_with_ctc
label_smoothing: 0.1
dropout: 0.15 dropout: 0.15
activation-fn: relu activation-fn: relu
......
...@@ -23,10 +23,9 @@ lr-scheduler: inverse_sqrt ...@@ -23,10 +23,9 @@ lr-scheduler: inverse_sqrt
warmup-init-lr: 1e-7 warmup-init-lr: 1e-7
warmup-updates: 10000 warmup-updates: 10000
lr: 0.0014 lr: 0.0014
adam_betas: (0.9,0.98) adam-betas: (0.9,0.98)
criterion: label_smoothed_cross_entropy_with_ctc criterion: label_smoothed_cross_entropy_with_ctc
label_smoothing: 0.1
dropout: 0.15 dropout: 0.15
activation-fn: relu activation-fn: relu
......
...@@ -11,10 +11,9 @@ lr-scheduler: inverse_sqrt ...@@ -11,10 +11,9 @@ lr-scheduler: inverse_sqrt
warmup-init-lr: 1e-7 warmup-init-lr: 1e-7
warmup-updates: 10000 warmup-updates: 10000
lr: 2e-3 lr: 2e-3
adam_betas: (0.9,0.98) adam-betas: (0.9,0.98)
criterion: label_smoothed_cross_entropy_with_ctc criterion: label_smoothed_cross_entropy_with_ctc
label_smoothing: 0.1
dropout: 0.1 dropout: 0.1
activation-fn: relu activation-fn: relu
......
...@@ -23,10 +23,9 @@ lr-scheduler: inverse_sqrt ...@@ -23,10 +23,9 @@ lr-scheduler: inverse_sqrt
warmup-init-lr: 1e-7 warmup-init-lr: 1e-7
warmup-updates: 10000 warmup-updates: 10000
lr: 2e-3 lr: 2e-3
adam_betas: (0.9,0.98) adam-betas: (0.9,0.98)
criterion: label_smoothed_cross_entropy_with_ctc criterion: label_smoothed_cross_entropy_with_ctc
label_smoothing: 0.1
dropout: 0.1 dropout: 0.1
activation-fn: relu activation-fn: relu
......
...@@ -24,10 +24,9 @@ lr-scheduler: inverse_sqrt ...@@ -24,10 +24,9 @@ lr-scheduler: inverse_sqrt
warmup-init-lr: 1e-7 warmup-init-lr: 1e-7
warmup-updates: 10000 warmup-updates: 10000
lr: 2e-3 lr: 2e-3
adam_betas: (0.9,0.98) adam-betas: (0.9,0.98)
criterion: label_smoothed_cross_entropy_with_ctc criterion: label_smoothed_cross_entropy_with_ctc
label_smoothing: 0.1
dropout: 0.1 dropout: 0.1
activation-fn: relu activation-fn: relu
......
...@@ -23,10 +23,9 @@ lr-scheduler: inverse_sqrt ...@@ -23,10 +23,9 @@ lr-scheduler: inverse_sqrt
warmup-init-lr: 1e-7 warmup-init-lr: 1e-7
warmup-updates: 10000 warmup-updates: 10000
lr: 2e-3 lr: 2e-3
adam_betas: (0.9,0.98) adam-betas: (0.9,0.98)
criterion: label_smoothed_cross_entropy_with_ctc criterion: label_smoothed_cross_entropy_with_ctc
label_smoothing: 0.1
dropout: 0.1 dropout: 0.1
activation-fn: relu activation-fn: relu
......
...@@ -23,10 +23,9 @@ lr-scheduler: inverse_sqrt ...@@ -23,10 +23,9 @@ lr-scheduler: inverse_sqrt
warmup-init-lr: 1e-7 warmup-init-lr: 1e-7
warmup-updates: 10000 warmup-updates: 10000
lr: 0.0014 lr: 0.0014
adam_betas: (0.9,0.98) adam-betas: (0.9,0.98)
criterion: label_smoothed_cross_entropy_with_ctc criterion: label_smoothed_cross_entropy_with_ctc
label_smoothing: 0.1
dropout: 0.15 dropout: 0.15
activation-fn: relu activation-fn: relu
......
...@@ -23,10 +23,9 @@ lr-scheduler: inverse_sqrt ...@@ -23,10 +23,9 @@ lr-scheduler: inverse_sqrt
warmup-init-lr: 1e-7 warmup-init-lr: 1e-7
warmup-updates: 10000 warmup-updates: 10000
lr: 2e-3 lr: 2e-3
adam_betas: (0.9,0.98) adam-betas: (0.9,0.98)
criterion: label_smoothed_cross_entropy_with_ctc criterion: label_smoothed_cross_entropy_with_ctc
label_smoothing: 0.1
dropout: 0.1 dropout: 0.1
activation-fn: relu activation-fn: relu
......
...@@ -24,10 +24,9 @@ lr-scheduler: inverse_sqrt ...@@ -24,10 +24,9 @@ lr-scheduler: inverse_sqrt
warmup-init-lr: 1e-7 warmup-init-lr: 1e-7
warmup-updates: 10000 warmup-updates: 10000
lr: 2e-3 lr: 2e-3
adam_betas: (0.9,0.98) adam-betas: (0.9,0.98)
criterion: label_smoothed_cross_entropy_with_ctc criterion: label_smoothed_cross_entropy_with_ctc
label_smoothing: 0.1
dropout: 0.1 dropout: 0.1
activation-fn: relu activation-fn: relu
......
...@@ -23,10 +23,9 @@ lr-scheduler: inverse_sqrt ...@@ -23,10 +23,9 @@ lr-scheduler: inverse_sqrt
warmup-init-lr: 1e-7 warmup-init-lr: 1e-7
warmup-updates: 10000 warmup-updates: 10000
lr: 2e-3 lr: 2e-3
adam_betas: (0.9,0.98) adam-betas: (0.9,0.98)
criterion: label_smoothed_cross_entropy_with_ctc criterion: label_smoothed_cross_entropy_with_ctc
label_smoothing: 0.1
dropout: 0.1 dropout: 0.1
activation-fn: relu activation-fn: relu
......
...@@ -23,10 +23,9 @@ lr-scheduler: inverse_sqrt ...@@ -23,10 +23,9 @@ lr-scheduler: inverse_sqrt
warmup-init-lr: 1e-7 warmup-init-lr: 1e-7
warmup-updates: 10000 warmup-updates: 10000
lr: 2e-3 lr: 2e-3
adam_betas: (0.9,0.98) adam-betas: (0.9,0.98)
criterion: label_smoothed_cross_entropy_with_ctc criterion: label_smoothed_cross_entropy_with_ctc
label_smoothing: 0.1
dropout: 0.1 dropout: 0.1
activation-fn: relu activation-fn: relu
......
...@@ -23,10 +23,9 @@ lr-scheduler: inverse_sqrt ...@@ -23,10 +23,9 @@ lr-scheduler: inverse_sqrt
warmup-init-lr: 1e-7 warmup-init-lr: 1e-7
warmup-updates: 10000 warmup-updates: 10000
lr: 2e-3 lr: 2e-3
adam_betas: (0.9,0.98) adam-betas: (0.9,0.98)
criterion: label_smoothed_cross_entropy_with_ctc criterion: label_smoothed_cross_entropy_with_ctc
label_smoothing: 0.1
dropout: 0.1 dropout: 0.1
activation-fn: relu activation-fn: relu
......
...@@ -23,10 +23,9 @@ lr-scheduler: inverse_sqrt ...@@ -23,10 +23,9 @@ lr-scheduler: inverse_sqrt
warmup-init-lr: 1e-7 warmup-init-lr: 1e-7
warmup-updates: 10000 warmup-updates: 10000
lr: 2e-3 lr: 2e-3
adam_betas: (0.9,0.98) adam-betas: (0.9,0.98)
criterion: label_smoothed_cross_entropy_with_ctc criterion: label_smoothed_cross_entropy_with_ctc
label_smoothing: 0.1
dropout: 0.1 dropout: 0.1
activation-fn: relu activation-fn: relu
......
...@@ -7,7 +7,7 @@ lr-scheduler: inverse_sqrt ...@@ -7,7 +7,7 @@ lr-scheduler: inverse_sqrt
warmup-init-lr: 1e-7 warmup-init-lr: 1e-7
warmup-updates: 10000 warmup-updates: 10000
lr: 2e-3 lr: 2e-3
adam_betas: (0.9,0.98) adam-betas: (0.9,0.98)
criterion: ctc criterion: ctc
zero_infinity: True zero_infinity: True
......
...@@ -7,7 +7,7 @@ lr-scheduler: inverse_sqrt ...@@ -7,7 +7,7 @@ lr-scheduler: inverse_sqrt
warmup-init-lr: 1e-7 warmup-init-lr: 1e-7
warmup-updates: 10000 warmup-updates: 10000
lr: 2e-3 lr: 2e-3
adam_betas: (0.9,0.98) adam-betas: (0.9,0.98)
criterion: ctc criterion: ctc
zero_infinity: True zero_infinity: True
......
...@@ -29,7 +29,7 @@ lr-scheduler: inverse_sqrt ...@@ -29,7 +29,7 @@ lr-scheduler: inverse_sqrt
warmup-init-lr: 1e-7 warmup-init-lr: 1e-7
warmup-updates: 10000 warmup-updates: 10000
lr: 2e-3 lr: 2e-3
adam_betas: (0.9,0.98) adam-betas: (0.9,0.98)
criterion: ctc criterion: ctc
zero_infinity: True zero_infinity: True
......
...@@ -24,7 +24,7 @@ lr-scheduler: inverse_sqrt ...@@ -24,7 +24,7 @@ lr-scheduler: inverse_sqrt
warmup-init-lr: 1e-7 warmup-init-lr: 1e-7
warmup-updates: 10000 warmup-updates: 10000
lr: 2e-3 lr: 2e-3
adam_betas: (0.9,0.98) adam-betas: (0.9,0.98)
criterion: ctc criterion: ctc
ctc-weight: 1.0 ctc-weight: 1.0
......
...@@ -24,7 +24,7 @@ lr-scheduler: inverse_sqrt ...@@ -24,7 +24,7 @@ lr-scheduler: inverse_sqrt
warmup-init-lr: 1e-7 warmup-init-lr: 1e-7
warmup-updates: 10000 warmup-updates: 10000
lr: 2e-3 lr: 2e-3
adam_betas: (0.9,0.98) adam-betas: (0.9,0.98)
criterion: ctc criterion: ctc
ctc-weight: 1.0 ctc-weight: 1.0
......
...@@ -24,7 +24,7 @@ lr-scheduler: inverse_sqrt ...@@ -24,7 +24,7 @@ lr-scheduler: inverse_sqrt
warmup-init-lr: 1e-7 warmup-init-lr: 1e-7
warmup-updates: 10000 warmup-updates: 10000
lr: 2e-3 lr: 2e-3
adam_betas: (0.9,0.98) adam-betas: (0.9,0.98)
criterion: ctc criterion: ctc
ctc-weight: 1.0 ctc-weight: 1.0
......
...@@ -24,7 +24,7 @@ lr-scheduler: inverse_sqrt ...@@ -24,7 +24,7 @@ lr-scheduler: inverse_sqrt
warmup-init-lr: 1e-7 warmup-init-lr: 1e-7
warmup-updates: 10000 warmup-updates: 10000
lr: 2e-3 lr: 2e-3
adam_betas: (0.9,0.98) adam-betas: (0.9,0.98)
criterion: ctc criterion: ctc
ctc-weight: 1.0 ctc-weight: 1.0
......
...@@ -29,7 +29,7 @@ lr-scheduler: inverse_sqrt ...@@ -29,7 +29,7 @@ lr-scheduler: inverse_sqrt
warmup-init-lr: 1e-7 warmup-init-lr: 1e-7
warmup-updates: 10000 warmup-updates: 10000
lr: 2e-3 lr: 2e-3
adam_betas: (0.9,0.98) adam-betas: (0.9,0.98)
criterion: ctc criterion: ctc
zero_infinity: True zero_infinity: True
......
...@@ -29,7 +29,7 @@ lr-scheduler: inverse_sqrt ...@@ -29,7 +29,7 @@ lr-scheduler: inverse_sqrt
warmup-init-lr: 1e-7 warmup-init-lr: 1e-7
warmup-updates: 10000 warmup-updates: 10000
lr: 2e-3 lr: 2e-3
adam_betas: (0.9,0.98) adam-betas: (0.9,0.98)
criterion: ctc criterion: ctc
zero_infinity: True zero_infinity: True
......
...@@ -29,7 +29,7 @@ lr-scheduler: inverse_sqrt ...@@ -29,7 +29,7 @@ lr-scheduler: inverse_sqrt
warmup-init-lr: 1e-7 warmup-init-lr: 1e-7
warmup-updates: 10000 warmup-updates: 10000
lr: 2e-3 lr: 2e-3
adam_betas: (0.9,0.98) adam-betas: (0.9,0.98)
criterion: ctc criterion: ctc
zero_infinity: True zero_infinity: True
......
...@@ -24,7 +24,7 @@ lr-scheduler: inverse_sqrt ...@@ -24,7 +24,7 @@ lr-scheduler: inverse_sqrt
warmup-init-lr: 1e-7 warmup-init-lr: 1e-7
warmup-updates: 10000 warmup-updates: 10000
lr: 2e-3 lr: 2e-3
adam_betas: (0.9,0.98) adam-betas: (0.9,0.98)
criterion: ctc criterion: ctc
ctc-weight: 1.0 ctc-weight: 1.0
......
...@@ -24,7 +24,7 @@ lr-scheduler: inverse_sqrt ...@@ -24,7 +24,7 @@ lr-scheduler: inverse_sqrt
warmup-init-lr: 1e-7 warmup-init-lr: 1e-7
warmup-updates: 10000 warmup-updates: 10000
lr: 2e-3 lr: 2e-3
adam_betas: (0.9,0.98) adam-betas: (0.9,0.98)
criterion: ctc criterion: ctc
ctc-weight: 1.0 ctc-weight: 1.0
......
...@@ -24,7 +24,7 @@ lr-scheduler: inverse_sqrt ...@@ -24,7 +24,7 @@ lr-scheduler: inverse_sqrt
warmup-init-lr: 1e-7 warmup-init-lr: 1e-7
warmup-updates: 10000 warmup-updates: 10000
lr: 2e-3 lr: 2e-3
adam_betas: (0.9,0.98) adam-betas: (0.9,0.98)
criterion: ctc criterion: ctc
ctc-weight: 1.0 ctc-weight: 1.0
......
...@@ -24,7 +24,7 @@ lr-scheduler: inverse_sqrt ...@@ -24,7 +24,7 @@ lr-scheduler: inverse_sqrt
warmup-init-lr: 1e-7 warmup-init-lr: 1e-7
warmup-updates: 10000 warmup-updates: 10000
lr: 0.0014 lr: 0.0014
adam_betas: (0.9,0.98) adam-betas: (0.9,0.98)
criterion: ctc criterion: ctc
zero_infinity: True zero_infinity: True
......
...@@ -24,7 +24,7 @@ lr-scheduler: inverse_sqrt ...@@ -24,7 +24,7 @@ lr-scheduler: inverse_sqrt
warmup-init-lr: 1e-7 warmup-init-lr: 1e-7
warmup-updates: 10000 warmup-updates: 10000
lr: 0.0014 lr: 0.0014
adam_betas: (0.9,0.98) adam-betas: (0.9,0.98)
criterion: ctc criterion: ctc
zero_infinity: True zero_infinity: True
......
...@@ -24,7 +24,7 @@ lr-scheduler: inverse_sqrt ...@@ -24,7 +24,7 @@ lr-scheduler: inverse_sqrt
warmup-init-lr: 1e-7 warmup-init-lr: 1e-7
warmup-updates: 10000 warmup-updates: 10000
lr: 0.001 lr: 0.001
adam_betas: (0.9,0.98) adam-betas: (0.9,0.98)
criterion: ctc criterion: ctc
zero_infinity: True zero_infinity: True
......
...@@ -24,7 +24,7 @@ lr-scheduler: inverse_sqrt ...@@ -24,7 +24,7 @@ lr-scheduler: inverse_sqrt
warmup-init-lr: 1e-7 warmup-init-lr: 1e-7
warmup-updates: 10000 warmup-updates: 10000
lr: 0.001 lr: 0.001
adam_betas: (0.9,0.98) adam-betas: (0.9,0.98)
criterion: ctc criterion: ctc
zero_infinity: True zero_infinity: True
......
...@@ -6,11 +6,10 @@ lr-scheduler: inverse_sqrt ...@@ -6,11 +6,10 @@ lr-scheduler: inverse_sqrt
warmup-init-lr: 1e-7 warmup-init-lr: 1e-7
warmup-updates: 10000 warmup-updates: 10000
lr: 2e-3 lr: 2e-3
adam_betas: (0.9,0.98) adam-betas: (0.9,0.98)
weight-decay: 1e-4 weight-decay: 1e-4
criterion: label_smoothed_cross_entropy_with_ctc criterion: label_smoothed_cross_entropy_with_ctc
label_smoothing: 0.1
subsampling-type: conv1d subsampling-type: conv1d
subsampling-layers: 2 subsampling-layers: 2
......
...@@ -6,10 +6,9 @@ lr-scheduler: inverse_sqrt ...@@ -6,10 +6,9 @@ lr-scheduler: inverse_sqrt
warmup-init-lr: 1e-7 warmup-init-lr: 1e-7
warmup-updates: 10000 warmup-updates: 10000
lr: 2e-3 lr: 2e-3
adam_betas: (0.9,0.98) adam-betas: (0.9,0.98)
criterion: label_smoothed_cross_entropy_with_ctc criterion: label_smoothed_cross_entropy_with_ctc
label_smoothing: 0.1
subsampling-type: conv1d subsampling-type: conv1d
subsampling-layers: 2 subsampling-layers: 2
...@@ -19,6 +18,9 @@ subsampling-stride: 2 ...@@ -19,6 +18,9 @@ subsampling-stride: 2
subsampling-norm: none subsampling-norm: none
subsampling-activation: glu subsampling-activation: glu
encoder-embed-norm: True
encoder-no-scale-embedding: True
dropout: 0.1 dropout: 0.1
activation-fn: relu activation-fn: relu
encoder-embed-dim: 256 encoder-embed-dim: 256
......
...@@ -6,10 +6,9 @@ lr-scheduler: inverse_sqrt ...@@ -6,10 +6,9 @@ lr-scheduler: inverse_sqrt
warmup-init-lr: 1e-7 warmup-init-lr: 1e-7
warmup-updates: 10000 warmup-updates: 10000
lr: 2e-3 lr: 2e-3
adam_betas: (0.9,0.98) adam-betas: (0.9,0.98)
criterion: label_smoothed_cross_entropy_with_ctc criterion: label_smoothed_cross_entropy_with_ctc
label_smoothing: 0.1
subsampling-type: conv1d subsampling-type: conv1d
subsampling-layers: 2 subsampling-layers: 2
...@@ -19,6 +18,9 @@ subsampling-stride: 2 ...@@ -19,6 +18,9 @@ subsampling-stride: 2
subsampling-norm: none subsampling-norm: none
subsampling-activation: glu subsampling-activation: glu
encoder-embed-norm: True
encoder-no-scale-embedding: True
dropout: 0.1 dropout: 0.1
activation-fn: relu activation-fn: relu
encoder-embed-dim: 256 encoder-embed-dim: 256
......
...@@ -6,10 +6,9 @@ lr-scheduler: inverse_sqrt ...@@ -6,10 +6,9 @@ lr-scheduler: inverse_sqrt
warmup-init-lr: 1e-7 warmup-init-lr: 1e-7
warmup-updates: 10000 warmup-updates: 10000
lr: 2e-3 lr: 2e-3
adam_betas: (0.9,0.98) adam-betas: (0.9,0.98)
criterion: label_smoothed_cross_entropy_with_ctc criterion: label_smoothed_cross_entropy_with_ctc
label_smoothing: 0.1
subsampling-type: conv1d subsampling-type: conv1d
subsampling-layers: 2 subsampling-layers: 2
...@@ -19,6 +18,9 @@ subsampling-stride: 2 ...@@ -19,6 +18,9 @@ subsampling-stride: 2
subsampling-norm: none subsampling-norm: none
subsampling-activation: glu subsampling-activation: glu
encoder-embed-norm: True
encoder-no-scale-embedding: True
dropout: 0.1 dropout: 0.1
activation-fn: relu activation-fn: relu
encoder-embed-dim: 256 encoder-embed-dim: 256
......
...@@ -7,7 +7,7 @@ lr-scheduler: inverse_sqrt ...@@ -7,7 +7,7 @@ lr-scheduler: inverse_sqrt
warmup-init-lr: 1e-7 warmup-init-lr: 1e-7
warmup-updates: 10000 warmup-updates: 10000
lr: 2e-3 lr: 2e-3
adam_betas: (0.9,0.98) adam-betas: (0.9,0.98)
criterion: ctc criterion: ctc
zero_infinity: True zero_infinity: True
...@@ -23,6 +23,9 @@ subsampling-stride: 2 ...@@ -23,6 +23,9 @@ subsampling-stride: 2
subsampling-norm: none subsampling-norm: none
subsampling-activation: glu subsampling-activation: glu
encoder-embed-norm: True
encoder-no-scale-embedding: True
dropout: 0.1 dropout: 0.1
activation-fn: relu activation-fn: relu
encoder-embed-dim: 256 encoder-embed-dim: 256
...@@ -52,11 +55,11 @@ inter-mixup-ratio-decay-params: 20000,40000,0 ...@@ -52,11 +55,11 @@ inter-mixup-ratio-decay-params: 20000,40000,0
# Bilingual CTC # Bilingual CTC
share-ctc-and-embed: True share-ctc-and-embed: True
share-xctc-and-embed: True share-xctc-and-embed: True
ctc-weight: 0.3 ctc-weight: 0.2
xctc-weight: 1 xctc-weight: 1
# InterCTC # InterCTC
inter-ctc-weight: 0.2 inter-ctc-weight: 0.1
inter-ctc-layers: 6,9,12,15 inter-ctc-layers: 6,9,12,15
share-inter-ctc: True share-inter-ctc: True
inter-xctc-weight: 1.0 inter-xctc-weight: 1.0
...@@ -67,9 +70,10 @@ ctc-pae: inter_league ...@@ -67,9 +70,10 @@ ctc-pae: inter_league
xctc-pae: inter_league xctc-pae: inter_league
pae-unnorm-input: True pae-unnorm-input: True
ctc-mixup-consistent-weight: 0.15 ctc-mixup-consistent-weight: 0.1
inter-ctc-mixup-consistent-weight: 0.1 inter-ctc-mixup-consistent-weight: 0.05
mixup-consistent-weight: 0.5 xctc-mixup-consistent-weight: 0.5
xinter-ctc-mixup-consistent-weight: 0.5
# Conformer # Conformer
macaron-style: True macaron-style: True
......
...@@ -7,7 +7,7 @@ lr-scheduler: inverse_sqrt ...@@ -7,7 +7,7 @@ lr-scheduler: inverse_sqrt
warmup-init-lr: 1e-7 warmup-init-lr: 1e-7
warmup-updates: 10000 warmup-updates: 10000
lr: 2e-3 lr: 2e-3
adam_betas: (0.9,0.98) adam-betas: (0.9,0.98)
criterion: ctc criterion: ctc
zero_infinity: True zero_infinity: True
...@@ -24,6 +24,9 @@ subsampling-stride: 2 ...@@ -24,6 +24,9 @@ subsampling-stride: 2
subsampling-norm: none subsampling-norm: none
subsampling-activation: glu subsampling-activation: glu
encoder-embed-norm: True
encoder-no-scale-embedding: True
dropout: 0.1 dropout: 0.1
activation-fn: relu activation-fn: relu
encoder-embed-dim: 256 encoder-embed-dim: 256
......
...@@ -7,7 +7,7 @@ lr-scheduler: inverse_sqrt ...@@ -7,7 +7,7 @@ lr-scheduler: inverse_sqrt
warmup-init-lr: 1e-7 warmup-init-lr: 1e-7
warmup-updates: 10000 warmup-updates: 10000
lr: 2e-3 lr: 2e-3
adam_betas: (0.9,0.98) adam-betas: (0.9,0.98)
criterion: ctc criterion: ctc
zero_infinity: True zero_infinity: True
...@@ -24,6 +24,9 @@ subsampling-stride: 2 ...@@ -24,6 +24,9 @@ subsampling-stride: 2
subsampling-norm: none subsampling-norm: none
subsampling-activation: glu subsampling-activation: glu
encoder-embed-norm: True
encoder-no-scale-embedding: True
dropout: 0.1 dropout: 0.1
activation-fn: relu activation-fn: relu
encoder-embed-dim: 256 encoder-embed-dim: 256
......
...@@ -6,19 +6,23 @@ data_tag=asr ...@@ -6,19 +6,23 @@ data_tag=asr
#data_tag=asr_joint_lcrm_niu #data_tag=asr_joint_lcrm_niu
test_subset=(dev-clean dev-other test-clean test-other all) test_subset=(dev-clean dev-other test-clean test-other all)
test_subset=(dev-clean dev-other test-clean test-other) test_subset=(dev-clean dev-other test-clean test-other)
#test_subset=(train-clean-50)
#test_subset=(dev-clean)
#test_subset=(dev-other)
#test_subset=(train-clean-5k-sort)
exp_name= exp_name=
if [ "$#" -eq 1 ]; then if [ "$#" -eq 1 ]; then
exp_name=$1 exp_name=$1
fi fi
ctc_infer=1 ctc_infer=0
n_average=10 n_average=10
beam_size=5 beam_size=1
infer_ctc_weight=0 infer_ctc_weight=0
len_penalty=1.0 len_penalty=1.0
max_tokens=50000 max_tokens=50000
batch_size=0 batch_size=1
infer_debug=0 infer_debug=0
dec_model=checkpoint_best.pt dec_model=checkpoint_best.pt
......
...@@ -23,7 +23,7 @@ if [[ ${tokenizer} -eq 1 ]]; then ...@@ -23,7 +23,7 @@ if [[ ${tokenizer} -eq 1 ]]; then
fi fi
echo "SacreBLEU" >> ${record} echo "SacreBLEU" >> ${record}
cmd="cat ${gen} | sacrebleu ${ref} -m bleu -w 4 -l ${lang_pair}" cmd="cat ${gen} | sacrebleu ${ref} -m bleu -w 4 -l ${lang_pair} | jq -r .score"
eval $cmd >> ${record} eval $cmd >> ${record}
cat ${record} cat ${record}
rm ${record} rm ${record}
./run.sh --stage 2 --stop_stage 2 --gpu_num 1 --exp_name 1123_purectc_conformer_norm_100h_layer36_interw1_x1 --n_average 10 --beam_size 1 --len_penalty 1.0 --batch_size 1 --max_tokens 50000 --dec_model checkpoint_best.pt --ctc_infer 0 --infer_ctc_weight 0 --infer_debug 0 --data_tag asr --test_subset dev-clean,dev-other,test-clean,test-other
Start Stage: 2
Stop Stage: 2
Stage 2: Decoding
Run command:
python3 /xuchen/st/S2T/fairseq_cli/generate.py
/xuchen/st/data/librispeech/asr
--config-yaml config.yaml
--gen-subset dev-clean
--task speech_to_text
--path /xuchen/st/checkpoints/librispeech/asr//1123_purectc_conformer_norm_100h_layer36_interw1_x1/avg_best10_checkpoint.pt
--results-path /xuchen/st/checkpoints/librispeech/asr//1123_purectc_conformer_norm_100h_layer36_interw1_x1
--batch-size 1
--max-tokens 50000
--beam 1
--lenpen 1.0
--infer-ctc-weight 0
--scoring wer
--early-exit-count 6 
Run command:
python3 /xuchen/st/S2T/fairseq_cli/generate.py
/xuchen/st/data/librispeech/asr
--config-yaml config.yaml
--gen-subset dev-other
--task speech_to_text
--path /xuchen/st/checkpoints/librispeech/asr//1123_purectc_conformer_norm_100h_layer36_interw1_x1/avg_best10_checkpoint.pt
--results-path /xuchen/st/checkpoints/librispeech/asr//1123_purectc_conformer_norm_100h_layer36_interw1_x1
--batch-size 1
--max-tokens 50000
--beam 1
--lenpen 1.0
--infer-ctc-weight 0
--scoring wer
--early-exit-count 6 
Run command:
python3 /xuchen/st/S2T/fairseq_cli/generate.py
/xuchen/st/data/librispeech/asr
--config-yaml config.yaml
--gen-subset test-clean
--task speech_to_text
--path /xuchen/st/checkpoints/librispeech/asr//1123_purectc_conformer_norm_100h_layer36_interw1_x1/avg_best10_checkpoint.pt
--results-path /xuchen/st/checkpoints/librispeech/asr//1123_purectc_conformer_norm_100h_layer36_interw1_x1
--batch-size 1
--max-tokens 50000
--beam 1
--lenpen 1.0
--infer-ctc-weight 0
--scoring wer
--early-exit-count 6 
Run command:
python3 /xuchen/st/S2T/fairseq_cli/generate.py
/xuchen/st/data/librispeech/asr
--config-yaml config.yaml
--gen-subset test-other
--task speech_to_text
--path /xuchen/st/checkpoints/librispeech/asr//1123_purectc_conformer_norm_100h_layer36_interw1_x1/avg_best10_checkpoint.pt
--results-path /xuchen/st/checkpoints/librispeech/asr//1123_purectc_conformer_norm_100h_layer36_interw1_x1
--batch-size 1
--max-tokens 50000
--beam 1
--lenpen 1.0
--infer-ctc-weight 0
--scoring wer
--early-exit-count 6 
2023-11-26 00:02:34 | INFO | fairseq_cli.generate | Translated 2,703 sentences (59,825 tokens) in 169.3s (15.96 sentences/s, 353.34 tokens/s)
Generate dev-clean with beam=1: WER: 8.96
2023-11-26 00:07:24 | INFO | fairseq_cli.generate | Translated 2,864 sentences (55,375 tokens) in 219.3s (13.06 sentences/s, 252.51 tokens/s)
Generate dev-other with beam=1: WER: 21.21
2023-11-26 00:10:57 | INFO | fairseq_cli.generate | Translated 2,620 sentences (57,617 tokens) in 158.0s (16.59 sentences/s, 364.77 tokens/s)
Generate test-clean with beam=1: WER: 9.34
2023-11-26 00:15:40 | INFO | fairseq_cli.generate | Translated 2,939 sentences (56,865 tokens) in 226.7s (12.96 sentences/s, 250.80 tokens/s)
Generate test-other with beam=1: WER: 21.57
...@@ -55,8 +55,7 @@ org_data_dir=${data_root_dir}/data/${dataset} ...@@ -55,8 +55,7 @@ org_data_dir=${data_root_dir}/data/${dataset}
data_dir=${data_root_dir}/data/${data_model_subfix} data_dir=${data_root_dir}/data/${data_model_subfix}
test_subset=dev-clean,dev-other,test-clean,test-other,all test_subset=dev-clean,dev-other,test-clean,test-other,all
# exp # exp sub_tag=
sub_tag=
exp_prefix=$(date "+%m%d") exp_prefix=$(date "+%m%d")
extra_tag= extra_tag=
extra_parameter= extra_parameter=
...@@ -71,19 +70,22 @@ step_valid=0 ...@@ -71,19 +70,22 @@ step_valid=0
# Decoding Settings # Decoding Settings
dec_model=checkpoint_best.pt dec_model=checkpoint_best.pt
cer=0 cer=0
ctc_infer=1 ctc_infer=0
infer_ctc_weight=0 infer_ctc_weight=0
ctc_self_ensemble=0 ctc_self_ensemble=0
ctc_inter_logit=0 ctc_inter_logit=0
n_average=10 n_average=10
batch_size=0 batch_size=1
beam_size=5 beam_size=5
len_penalty=1.0 len_penalty=1.0
single=0 single=0
epoch_ensemble=1 epoch_ensemble=0
best_ensemble=1 best_ensemble=1
infer_debug=0 infer_debug=0
infer_score=0 infer_score=0
infer_tag=ee6
infer_parameters="--early-exit-count 6"
#infer_parameters="--early-exit-layer 12"
#infer_parameters="--cal-monotonic-cross-attn-weights --cal-localness --localness-window 0.1 --cal-topk-cross-attn-weights --topk-cross-attn-weights 15 --cal-entropy" #infer_parameters="--cal-monotonic-cross-attn-weights --cal-localness --localness-window 0.1 --cal-topk-cross-attn-weights --topk-cross-attn-weights 15 --cal-entropy"
data_config=config.yaml data_config=config.yaml
...@@ -364,6 +366,9 @@ if [ ${stage} -le 2 ] && [ ${stop_stage} -ge 2 ]; then ...@@ -364,6 +366,9 @@ if [ ${stage} -le 2 ] && [ ${stop_stage} -ge 2 ]; then
if [[ ${infer_score} -eq 1 ]]; then if [[ ${infer_score} -eq 1 ]]; then
suffix=${suffix}_score suffix=${suffix}_score
fi fi
if [[ -n ${infer_tag} ]]; then
suffix=${suffix}_${infer_tag}
fi
suffix=`echo $suffix | sed -e "s#__#_#"` suffix=`echo $suffix | sed -e "s#__#_#"`
result_file=${model_dir}/decode_result_${suffix} result_file=${model_dir}/decode_result_${suffix}
...@@ -476,6 +481,7 @@ if [ ${stage} -le 2 ] && [ ${stop_stage} -ge 2 ]; then ...@@ -476,6 +481,7 @@ if [ ${stage} -le 2 ] && [ ${stop_stage} -ge 2 ]; then
fi fi
done done
echo echo
echo "" >> ${result_file}
cat ${result_file} cat ${result_file}
done done
fi fi
...@@ -6,10 +6,9 @@ lr-scheduler: inverse_sqrt ...@@ -6,10 +6,9 @@ lr-scheduler: inverse_sqrt
warmup-init-lr: 1e-7 warmup-init-lr: 1e-7
warmup-updates: 10000 warmup-updates: 10000
lr: 2e-3 lr: 2e-3
adam_betas: (0.9,0.98) adam-betas: (0.9,0.98)
criterion: label_smoothed_cross_entropy_with_ctc criterion: label_smoothed_cross_entropy_with_ctc
label_smoothing: 0.1
encoder-embed-norm: True encoder-embed-norm: True
encoder-no-scale-embedding: True encoder-no-scale-embedding: True
......
...@@ -25,5 +25,6 @@ num-workers: 8 ...@@ -25,5 +25,6 @@ num-workers: 8
no-progress-bar: True no-progress-bar: True
log-interval: 100 log-interval: 100
seed: 1 seed: 1
label-smoothing: 0.1
report-accuracy: True report-accuracy: True
skip-invalid-size-inputs-valid-test: True skip-invalid-size-inputs-valid-test: True
\ No newline at end of file
...@@ -6,10 +6,9 @@ lr-scheduler: inverse_sqrt ...@@ -6,10 +6,9 @@ lr-scheduler: inverse_sqrt
warmup-init-lr: 1e-7 warmup-init-lr: 1e-7
warmup-updates: 10000 warmup-updates: 10000
lr: 0.0014 lr: 0.0014
adam_betas: (0.9,0.98) adam-betas: (0.9,0.98)
criterion: label_smoothed_cross_entropy_with_ctc criterion: label_smoothed_cross_entropy_with_ctc
label_smoothing: 0.1
encoder-embed-norm: True encoder-embed-norm: True
encoder-no-scale-embedding: True encoder-no-scale-embedding: True
......
...@@ -9,7 +9,7 @@ inter-mixup-decoder-emb: False ...@@ -9,7 +9,7 @@ inter-mixup-decoder-emb: False
ctc-mixup-consistent-weight: 0 ctc-mixup-consistent-weight: 0
inter-ctc-mixup-consistent-weight: 0 inter-ctc-mixup-consistent-weight: 0
mixup-consistent-weight: 0 mixup-consistent-weight: 0
cal-mixup-loss: True mixup-no-hard-loss: False
no-specaugment: False no-specaugment: False
layer-out-norm: False layer-out-norm: False
......
...@@ -23,10 +23,9 @@ lr-scheduler: inverse_sqrt ...@@ -23,10 +23,9 @@ lr-scheduler: inverse_sqrt
warmup-init-lr: 1e-7 warmup-init-lr: 1e-7
warmup-updates: 10000 warmup-updates: 10000
lr: 2e-3 lr: 2e-3
adam_betas: (0.9,0.98) adam-betas: (0.9,0.98)
criterion: label_smoothed_cross_entropy_with_ctc criterion: label_smoothed_cross_entropy_with_ctc
label_smoothing: 0.1
dropout: 0.1 dropout: 0.1
activation-fn: relu activation-fn: relu
......
...@@ -23,10 +23,9 @@ lr-scheduler: inverse_sqrt ...@@ -23,10 +23,9 @@ lr-scheduler: inverse_sqrt
warmup-init-lr: 1e-7 warmup-init-lr: 1e-7
warmup-updates: 10000 warmup-updates: 10000
lr: 2e-3 lr: 2e-3
adam_betas: (0.9,0.98) adam-betas: (0.9,0.98)
criterion: label_smoothed_cross_entropy_with_ctc criterion: label_smoothed_cross_entropy_with_ctc
label_smoothing: 0.1
dropout: 0.1 dropout: 0.1
activation-fn: relu activation-fn: relu
......
...@@ -23,10 +23,9 @@ lr-scheduler: inverse_sqrt ...@@ -23,10 +23,9 @@ lr-scheduler: inverse_sqrt
warmup-init-lr: 1e-7 warmup-init-lr: 1e-7
warmup-updates: 10000 warmup-updates: 10000
lr: 2e-3 lr: 2e-3
adam_betas: (0.9,0.98) adam-betas: (0.9,0.98)
criterion: label_smoothed_cross_entropy_with_ctc criterion: label_smoothed_cross_entropy_with_ctc
label_smoothing: 0.1
dropout: 0.1 dropout: 0.1
activation-fn: relu activation-fn: relu
......
...@@ -24,10 +24,9 @@ lr-scheduler: inverse_sqrt ...@@ -24,10 +24,9 @@ lr-scheduler: inverse_sqrt
warmup-init-lr: 1e-7 warmup-init-lr: 1e-7
warmup-updates: 10000 warmup-updates: 10000
lr: 2e-3 lr: 2e-3
adam_betas: (0.9,0.98) adam-betas: (0.9,0.98)
criterion: label_smoothed_cross_entropy_with_ctc criterion: label_smoothed_cross_entropy_with_ctc
label_smoothing: 0.1
dropout: 0.1 dropout: 0.1
activation-fn: relu activation-fn: relu
......
...@@ -24,10 +24,9 @@ lr-scheduler: inverse_sqrt ...@@ -24,10 +24,9 @@ lr-scheduler: inverse_sqrt
warmup-init-lr: 1e-7 warmup-init-lr: 1e-7
warmup-updates: 10000 warmup-updates: 10000
lr: 2e-3 lr: 2e-3
adam_betas: (0.9,0.98) adam-betas: (0.9,0.98)
criterion: label_smoothed_cross_entropy_with_ctc criterion: label_smoothed_cross_entropy_with_ctc
label_smoothing: 0.1
dropout: 0.1 dropout: 0.1
activation-fn: relu activation-fn: relu
......
...@@ -23,10 +23,9 @@ lr-scheduler: inverse_sqrt ...@@ -23,10 +23,9 @@ lr-scheduler: inverse_sqrt
warmup-init-lr: 1e-7 warmup-init-lr: 1e-7
warmup-updates: 10000 warmup-updates: 10000
lr: 0.0014 lr: 0.0014
adam_betas: (0.9,0.98) adam-betas: (0.9,0.98)
criterion: label_smoothed_cross_entropy_with_ctc criterion: label_smoothed_cross_entropy_with_ctc
label_smoothing: 0.1
dropout: 0.15 dropout: 0.15
activation-fn: relu activation-fn: relu
......
...@@ -5,7 +5,7 @@ lr-scheduler: inverse_sqrt ...@@ -5,7 +5,7 @@ lr-scheduler: inverse_sqrt
warmup-init-lr: 1e-7 warmup-init-lr: 1e-7
warmup-updates: 10000 warmup-updates: 10000
lr: 2e-3 lr: 2e-3
adam_betas: (0.9,0.98) adam-betas: (0.9,0.98)
criterion: ctc criterion: ctc
zero_infinity: True zero_infinity: True
......
...@@ -24,7 +24,7 @@ lr-scheduler: inverse_sqrt ...@@ -24,7 +24,7 @@ lr-scheduler: inverse_sqrt
warmup-init-lr: 1e-7 warmup-init-lr: 1e-7
warmup-updates: 10000 warmup-updates: 10000
lr: 2e-3 lr: 2e-3
adam_betas: (0.9,0.98) adam-betas: (0.9,0.98)
criterion: ctc criterion: ctc
post-process: sentencepiece post-process: sentencepiece
......
...@@ -23,7 +23,7 @@ lr-scheduler: inverse_sqrt ...@@ -23,7 +23,7 @@ lr-scheduler: inverse_sqrt
warmup-init-lr: 1e-7 warmup-init-lr: 1e-7
warmup-updates: 10000 warmup-updates: 10000
lr: 0.0015 lr: 0.0015
adam_betas: (0.9,0.98) adam-betas: (0.9,0.98)
criterion: ctc criterion: ctc
post-process: sentencepiece post-process: sentencepiece
......
...@@ -24,7 +24,7 @@ lr-scheduler: inverse_sqrt ...@@ -24,7 +24,7 @@ lr-scheduler: inverse_sqrt
warmup-init-lr: 1e-7 warmup-init-lr: 1e-7
warmup-updates: 10000 warmup-updates: 10000
lr: 0.002 lr: 0.002
adam_betas: (0.9,0.98) adam-betas: (0.9,0.98)
criterion: ctc criterion: ctc
post-process: sentencepiece post-process: sentencepiece
......
...@@ -24,7 +24,7 @@ lr-scheduler: inverse_sqrt ...@@ -24,7 +24,7 @@ lr-scheduler: inverse_sqrt
warmup-init-lr: 1e-7 warmup-init-lr: 1e-7
warmup-updates: 10000 warmup-updates: 10000
lr: 0.002 lr: 0.002
adam_betas: (0.9,0.98) adam-betas: (0.9,0.98)
criterion: ctc criterion: ctc
post-process: sentencepiece post-process: sentencepiece
......
...@@ -23,7 +23,7 @@ lr-scheduler: inverse_sqrt ...@@ -23,7 +23,7 @@ lr-scheduler: inverse_sqrt
warmup-init-lr: 1e-7 warmup-init-lr: 1e-7
warmup-updates: 10000 warmup-updates: 10000
lr: 0.0015 lr: 0.0015
adam_betas: (0.9,0.98) adam-betas: (0.9,0.98)
criterion: ctc criterion: ctc
post-process: sentencepiece post-process: sentencepiece
......
...@@ -7,10 +7,9 @@ lr-scheduler: inverse_sqrt ...@@ -7,10 +7,9 @@ lr-scheduler: inverse_sqrt
warmup-init-lr: 1e-7 warmup-init-lr: 1e-7
warmup-updates: 8000 warmup-updates: 8000
lr: 1e-3 lr: 1e-3
adam_betas: (0.9,0.997) adam-betas: (0.9,0.997)
criterion: label_smoothed_cross_entropy criterion: label_smoothed_cross_entropy
label_smoothing: 0.1
dropout: 0.1 dropout: 0.1
attention-dropout: 0.1 attention-dropout: 0.1
......
...@@ -25,5 +25,6 @@ num-workers: 8 ...@@ -25,5 +25,6 @@ num-workers: 8
no-progress-bar: True no-progress-bar: True
log-interval: 100 log-interval: 100
seed: 1 seed: 1
label-smoothing: 0.1
report-accuracy: True report-accuracy: True
skip-invalid-size-inputs-valid-test: True skip-invalid-size-inputs-valid-test: True
\ No newline at end of file
...@@ -7,10 +7,9 @@ lr-scheduler: inverse_sqrt ...@@ -7,10 +7,9 @@ lr-scheduler: inverse_sqrt
warmup-init-lr: 1e-7 warmup-init-lr: 1e-7
warmup-updates: 8000 warmup-updates: 8000
lr: 1e-3 lr: 1e-3
adam_betas: (0.9,0.997) adam-betas: (0.9,0.997)
criterion: label_smoothed_cross_entropy criterion: label_smoothed_cross_entropy
label_smoothing: 0.1
dropout: 0.1 dropout: 0.1
attention-dropout: 0.1 attention-dropout: 0.1
......
...@@ -5,6 +5,8 @@ cd ${THIS_DIR} ...@@ -5,6 +5,8 @@ cd ${THIS_DIR}
export ST_ROOT=/xuchen/st export ST_ROOT=/xuchen/st
export NCCL_DEBUG=INFO export NCCL_DEBUG=INFO
source /xuchen/tools/miniconda3/etc/profile.d/conda.sh
conda activate fair
echo "nameserver 114.114.114.114" >> /etc/resolv.conf echo "nameserver 114.114.114.114" >> /etc/resolv.conf
if [[ `pip list | grep fairseq | wc -l` -eq 0 ]]; then if [[ `pip list | grep fairseq | wc -l` -eq 0 ]]; then
......
...@@ -5,6 +5,8 @@ cd ${THIS_DIR} ...@@ -5,6 +5,8 @@ cd ${THIS_DIR}
export ST_ROOT=/xuchen/st export ST_ROOT=/xuchen/st
export NCCL_DEBUG=INFO export NCCL_DEBUG=INFO
source /xuchen/tools/miniconda3/etc/profile.d/conda.sh
conda activate fair
echo "nameserver 114.114.114.114" >> /etc/resolv.conf echo "nameserver 114.114.114.114" >> /etc/resolv.conf
if [[ `pip list | grep fairseq | wc -l` -eq 0 ]]; then if [[ `pip list | grep fairseq | wc -l` -eq 0 ]]; then
...@@ -57,6 +59,7 @@ do ...@@ -57,6 +59,7 @@ do
echo_flag=1 echo_flag=1
while : while :
do do
gpustat
record=$(mktemp -t temp.record.XXXXXX) record=$(mktemp -t temp.record.XXXXXX)
gpustat > $record gpustat > $record
all_devices=$(seq 0 "$(sed '1,2d' ${record} | wc -l)"); all_devices=$(seq 0 "$(sed '1,2d' ${record} | wc -l)");
......
...@@ -205,6 +205,10 @@ def do_setup(package_data): ...@@ -205,6 +205,10 @@ def do_setup(package_data):
"espnet", "espnet",
"torchaudio", "torchaudio",
"pandas", "pandas",
"sacremoses",
"tensorboard",
"jiwer",
"gpustat",
], ],
dependency_links=dependency_links, dependency_links=dependency_links,
packages=find_packages( packages=find_packages(
......
Markdown 格式
0%
您添加了 0 到此讨论。请谨慎行事。
请先完成此评论的编辑!
注册 或者 后发表评论