Commit e7625a34 by xuchen

accu update

parent 45bb3195
......@@ -14,7 +14,7 @@ ctc-mixup-consistent-weight: 0
inter-ctc-mixup-consistent-weight: 0
mixup-consistent-weight: 0
cal-mixup-loss: True
mixup-no-hard-loss: False
no-specaugment: False
layer-out-norm: False
......
......@@ -14,7 +14,7 @@ ctc-mixup-consistent-weight: 0.15
inter-ctc-mixup-consistent-weight: 0.1
mixup-consistent-weight: 0.5
cal-mixup-loss: True
mixup-no-hard-loss: False
no-specaugment: False
layer-out-norm: False
......
......@@ -6,10 +6,9 @@ lr-scheduler: inverse_sqrt
warmup-init-lr: 1e-7
warmup-updates: 10000
lr: 2e-3
adam_betas: (0.9,0.98)
adam-betas: (0.9,0.98)
criterion: label_smoothed_cross_entropy_with_ctc
label_smoothing: 0.1
encoder-embed-norm: True
encoder-no-scale-embedding: True
......
......@@ -25,5 +25,6 @@ num-workers: 8
no-progress-bar: True
log-interval: 100
seed: 1
label-smoothing: 0.1
report-accuracy: True
skip-invalid-size-inputs-valid-test: True
\ No newline at end of file
......@@ -6,10 +6,9 @@ lr-scheduler: inverse_sqrt
warmup-init-lr: 1e-7
warmup-updates: 10000
lr: 1e-3
adam_betas: (0.9,0.98)
adam-betas: (0.9,0.98)
criterion: label_smoothed_cross_entropy_with_ctc
label_smoothing: 0.1
encoder-embed-norm: True
encoder-no-scale-embedding: True
......
......@@ -6,10 +6,9 @@ lr-scheduler: inverse_sqrt
warmup-init-lr: 1e-7
warmup-updates: 10000
lr: 2e-3
adam_betas: (0.9,0.98)
adam-betas: (0.9,0.98)
criterion: label_smoothed_cross_entropy_with_ctc
label_smoothing: 0.1
encoder-normalize-before: True
decoder-normalize-before: True
......
......@@ -6,10 +6,9 @@ lr-scheduler: inverse_sqrt
warmup-init-lr: 1e-7
warmup-updates: 10000
lr: 1e-3
adam_betas: (0.9,0.98)
adam-betas: (0.9,0.98)
criterion: label_smoothed_cross_entropy_with_ctc
label_smoothing: 0.1
encoder-normalize-before: True
decoder-normalize-before: True
......
......@@ -6,10 +6,9 @@ lr-scheduler: inverse_sqrt
warmup-init-lr: 1e-7
warmup-updates: 10000
lr: 1e-3
adam_betas: (0.9,0.98)
adam-betas: (0.9,0.98)
criterion: label_smoothed_cross_entropy_with_ctc
label_smoothing: 0.1
encoder-normalize-before: True
decoder-normalize-before: True
......
......@@ -26,10 +26,9 @@ lr-scheduler: inverse_sqrt
warmup-init-lr: 1e-7
warmup-updates: 10000
lr: 2e-3
adam_betas: (0.9,0.98)
adam-betas: (0.9,0.98)
criterion: join_speech_and_text_loss
label_smoothing: 0.1
dropout: 0.1
activation-fn: relu
......
......@@ -13,7 +13,7 @@ ctc-mixup-consistent-weight: 0
inter-ctc-mixup-consistent-weight: 0
mixup-consistent-weight: 0
cal-mixup-loss: True
mixup-no-hard-loss: False
no-specaugment: False
layer-out-norm: False
......
......@@ -45,10 +45,9 @@ lr-scheduler: inverse_sqrt
warmup-init-lr: 1e-7
warmup-updates: 10000
lr: 2e-3
adam_betas: (0.9,0.98)
adam-betas: (0.9,0.98)
criterion: label_smoothed_cross_entropy_with_ctc
label_smoothing: 0.1
subsampling-type: conv1d
subsampling-layers: 2
......
......@@ -13,7 +13,7 @@ lr-scheduler: inverse_sqrt
warmup-init-lr: 1e-7
warmup-updates: 10000
lr: 2e-3
adam_betas: (0.9,0.98)
adam-betas: (0.9,0.98)
encoder-normalize-before: True
decoder-normalize-before: True
......
......@@ -13,7 +13,7 @@ lr-scheduler: inverse_sqrt
warmup-init-lr: 1e-7
warmup-updates: 10000
lr: 1e-3
adam_betas: (0.9,0.98)
adam-betas: (0.9,0.98)
encoder-normalize-before: True
decoder-normalize-before: True
......
......@@ -13,7 +13,7 @@ lr-scheduler: inverse_sqrt
warmup-init-lr: 1e-7
warmup-updates: 10000
lr: 1e-3
adam_betas: (0.9,0.98)
adam-betas: (0.9,0.98)
encoder-normalize-before: True
decoder-normalize-before: True
......
......@@ -23,10 +23,9 @@ lr-scheduler: inverse_sqrt
warmup-init-lr: 1e-7
warmup-updates: 10000
lr: 2e-3
adam_betas: (0.9,0.98)
adam-betas: (0.9,0.98)
criterion: label_smoothed_cross_entropy_with_ctc
label_smoothing: 0.1
dropout: 0.1
activation-fn: relu
......
......@@ -23,10 +23,9 @@ lr-scheduler: inverse_sqrt
warmup-init-lr: 1e-7
warmup-updates: 10000
lr: 2e-3
adam_betas: (0.9,0.98)
adam-betas: (0.9,0.98)
criterion: label_smoothed_cross_entropy_with_ctc
label_smoothing: 0.1
dropout: 0.1
activation-fn: relu
......
......@@ -23,10 +23,9 @@ lr-scheduler: inverse_sqrt
warmup-init-lr: 1e-7
warmup-updates: 10000
lr: 2e-3
adam_betas: (0.9,0.98)
adam-betas: (0.9,0.98)
criterion: label_smoothed_cross_entropy_with_ctc
label_smoothing: 0.1
dropout: 0.1
activation-fn: relu
......
......@@ -23,10 +23,9 @@ lr-scheduler: inverse_sqrt
warmup-init-lr: 1e-7
warmup-updates: 10000
lr: 2e-3
adam_betas: (0.9,0.98)
adam-betas: (0.9,0.98)
criterion: label_smoothed_cross_entropy_with_ctc
label_smoothing: 0.1
dropout: 0.1
activation-fn: relu
......
......@@ -23,10 +23,9 @@ lr-scheduler: inverse_sqrt
warmup-init-lr: 1e-7
warmup-updates: 10000
lr: 2e-3
adam_betas: (0.9,0.98)
adam-betas: (0.9,0.98)
criterion: label_smoothed_cross_entropy_with_ctc
label_smoothing: 0.1
dropout: 0.1
activation-fn: relu
......
......@@ -23,10 +23,9 @@ lr-scheduler: inverse_sqrt
warmup-init-lr: 1e-7
warmup-updates: 10000
lr: 1e-3
adam_betas: (0.9,0.98)
adam-betas: (0.9,0.98)
criterion: label_smoothed_cross_entropy_with_ctc
label_smoothing: 0.1
dropout: 0.15
activation-fn: relu
......
......@@ -23,10 +23,9 @@ lr-scheduler: inverse_sqrt
warmup-init-lr: 1e-7
warmup-updates: 10000
lr: 1e-3
adam_betas: (0.9,0.98)
adam-betas: (0.9,0.98)
criterion: label_smoothed_cross_entropy_with_ctc
label_smoothing: 0.1
dropout: 0.15
activation-fn: relu
......
......@@ -23,10 +23,9 @@ lr-scheduler: inverse_sqrt
warmup-init-lr: 1e-7
warmup-updates: 10000
lr: 1e-3
adam_betas: (0.9,0.98)
adam-betas: (0.9,0.98)
criterion: label_smoothed_cross_entropy_with_ctc
label_smoothing: 0.1
dropout: 0.15
activation-fn: relu
......
......@@ -23,10 +23,9 @@ lr-scheduler: inverse_sqrt
warmup-init-lr: 1e-7
warmup-updates: 10000
lr: 1e-3
adam_betas: (0.9,0.98)
adam-betas: (0.9,0.98)
criterion: label_smoothed_cross_entropy_with_ctc
label_smoothing: 0.1
dropout: 0.15
activation-fn: relu
......
......@@ -23,10 +23,9 @@ lr-scheduler: inverse_sqrt
warmup-init-lr: 1e-7
warmup-updates: 10000
lr: 1e-3
adam_betas: (0.9,0.98)
adam-betas: (0.9,0.98)
criterion: label_smoothed_cross_entropy_with_ctc
label_smoothing: 0.1
dropout: 0.15
activation-fn: relu
......
......@@ -23,10 +23,9 @@ lr-scheduler: inverse_sqrt
warmup-init-lr: 1e-7
warmup-updates: 10000
lr: 1e-3
adam_betas: (0.9,0.98)
adam-betas: (0.9,0.98)
criterion: label_smoothed_cross_entropy_with_ctc
label_smoothing: 0.1
dropout: 0.15
activation-fn: relu
......
......@@ -23,10 +23,9 @@ lr-scheduler: inverse_sqrt
warmup-init-lr: 1e-7
warmup-updates: 10000
lr: 2e-3
adam_betas: (0.9,0.98)
adam-betas: (0.9,0.98)
criterion: label_smoothed_cross_entropy_with_ctc
label_smoothing: 0.1
dropout: 0.1
activation-fn: relu
......
......@@ -23,10 +23,9 @@ lr-scheduler: inverse_sqrt
warmup-init-lr: 1e-7
warmup-updates: 10000
lr: 2e-3
adam_betas: (0.9,0.98)
adam-betas: (0.9,0.98)
criterion: label_smoothed_cross_entropy_with_ctc
label_smoothing: 0.1
dropout: 0.1
activation-fn: relu
......
......@@ -23,10 +23,9 @@ lr-scheduler: inverse_sqrt
warmup-init-lr: 1e-7
warmup-updates: 10000
lr: 2e-3
adam_betas: (0.9,0.98)
adam-betas: (0.9,0.98)
criterion: label_smoothed_cross_entropy_with_ctc
label_smoothing: 0.1
dropout: 0.1
activation-fn: relu
......
# Append-based Interpolation Augmentation
inter-mixup: True
inter-mixup-layer: -1
inter-mixup-decoder-layer: 0
inter-mixup-prob: 1.0
inter-mixup-ratio: 1.0
inter-mixup-beta: 0.2
inter-mixup-keep-org: True
inter-mixup-decoder-emb: True
mixup-no-hard-loss: False
no-specaugment: False
layer-out-norm: False
inter-mixup-ratio-decay: False
inter-mixup-ratio-decay-params: 20000,40000,0
# Bilingual CTC
share-ctc-and-embed: True
share-xctc-and-embed: True
ctc-weight: 0.2
xctc-weight: 0.1
# InterCTC
inter-ctc-weight: 0.1
inter-ctc-layers: 6,9
share-inter-ctc: True
inter-xctc-weight: 0.05
inter-xctc-layers: 6,9
# Prediction-aware encoding
ctc-pae: inter_league
xctc-pae: inter_league
pae-unnorm-input: True
ctc-mixup-consistent-weight: 0.1
inter-ctc-mixup-consistent-weight: 0.05
xctc-mixup-consistent-weight: 0.05
inter-xctc-mixup-consistent-weight: 0.25
mixup-consistent-weight: 0.5
# Conformer
macaron-style: True
use-cnn-module: True
cnn-module-kernel: 15
encoder-attention-type: rel_pos
encoder-activation-fn: swish
layer-padding-mask: True
\ No newline at end of file
# Append-based Interpolation Augmentation
inter-mixup: True
inter-mixup-layer: -1
inter-mixup-decoder-layer: 0
inter-mixup-prob: 1.0
inter-mixup-ratio: 1.0
inter-mixup-beta: 0.2
inter-mixup-keep-org: True
inter-mixup-decoder-emb: True
mixup-no-hard-loss: True
no-specaugment: False
layer-out-norm: False
inter-mixup-ratio-decay: False
inter-mixup-ratio-decay-params: 20000,40000,0
# Bilingual CTC
share-ctc-and-embed: True
share-xctc-and-embed: True
ctc-weight: 0.2
xctc-weight: 0.1
# InterCTC
inter-ctc-weight: 0.1
inter-ctc-layers: 6,9
share-inter-ctc: True
inter-xctc-weight: 0.05
inter-xctc-layers: 6,9
# Prediction-aware encoding
ctc-pae: inter_league
xctc-pae: inter_league
pae-unnorm-input: True
ctc-mixup-consistent-weight: 0.1
inter-ctc-mixup-consistent-weight: 0.05
xctc-mixup-consistent-weight: 0.05
inter-xctc-mixup-consistent-weight: 0.25
mixup-consistent-weight: 0.5
# Conformer
macaron-style: True
use-cnn-module: True
cnn-module-kernel: 15
encoder-attention-type: rel_pos
encoder-activation-fn: swish
layer-padding-mask: True
\ No newline at end of file
......@@ -10,7 +10,7 @@ inter-mixup-beta: 0.2
inter-mixup-keep-org: True
inter-mixup-decoder-emb: True
cal-mixup-loss: True
mixup-no-hard-loss: False
no-specaugment: False
layer-out-norm: False
......
......@@ -10,7 +10,7 @@ inter-mixup-beta: 0.2
inter-mixup-keep-org: True
inter-mixup-decoder-emb: True
cal-mixup-loss: False
mixup-no-hard-loss: True
no-specaugment: False
layer-out-norm: False
......
......@@ -6,10 +6,9 @@ lr-scheduler: inverse_sqrt
warmup-init-lr: 1e-7
warmup-updates: 10000
lr: 1e-3
adam_betas: (0.9,0.98)
adam-betas: (0.9,0.98)
criterion: label_smoothed_cross_entropy_with_ctc
label_smoothing: 0.1
encoder-normalize-before: True
decoder-normalize-before: True
......
......@@ -6,10 +6,9 @@ lr-scheduler: inverse_sqrt
warmup-init-lr: 1e-7
warmup-updates: 10000
lr: 1e-3
adam_betas: (0.9,0.98)
adam-betas: (0.9,0.98)
criterion: label_smoothed_cross_entropy_with_ctc
label_smoothing: 0.1
encoder-embed-norm: True
encoder-no-scale-embedding: True
......
......@@ -6,10 +6,9 @@ lr-scheduler: inverse_sqrt
warmup-init-lr: 1e-7
warmup-updates: 10000
lr: 1e-3
adam_betas: (0.9,0.98)
adam-betas: (0.9,0.98)
criterion: label_smoothed_cross_entropy_with_ctc
label_smoothing: 0.1
encoder-embed-norm: True
encoder-no-scale-embedding: True
......
......@@ -6,10 +6,9 @@ lr-scheduler: inverse_sqrt
warmup-init-lr: 1e-7
warmup-updates: 10000
lr: 1e-3
adam_betas: (0.9,0.98)
adam-betas: (0.9,0.98)
criterion: label_smoothed_cross_entropy_with_ctc
label_smoothing: 0.1
encoder-normalize-before: True
decoder-normalize-before: True
......
......@@ -15,7 +15,7 @@ lr-scheduler: inverse_sqrt
warmup-init-lr: 1e-7
warmup-updates: 10000
lr: 1e-3
adam_betas: (0.9,0.98)
adam-betas: (0.9,0.98)
encoder-normalize-before: True
decoder-normalize-before: True
......
arch: s2t_ctc
encoder-type: transformer
criterion: ctc
zero_infinity: True
xctc-weight: 1.0
ctc-weight: 1.0
share-ctc-and-embed: True
share-xctc-and-embed: True
share-decoder-input-output-embed: True
optimizer: adam
clip-norm: 10.0
lr-scheduler: inverse_sqrt
warmup-init-lr: 1e-7
warmup-updates: 10000
lr: 1e-3
adam-betas: (0.9,0.98)
encoder-normalize-before: True
decoder-normalize-before: True
encoder-embed-norm: True
encoder-no-scale-embedding: True
subsampling-type: conv1d
subsampling-layers: 2
subsampling-filter: 2048
subsampling-kernel: 5
subsampling-stride: 2
subsampling-norm: none
subsampling-activation: glu
dropout: 0.15
activation-fn: relu
encoder-embed-dim: 512
encoder-ffn-embed-dim: 2048
encoder-layers: 18
encoder-attention-heads: 8
# Conformer
macaron-style: True
use-cnn-module: True
cnn-module-kernel: 15
encoder-attention-type: rel_pos
encoder-activation-fn: swish
# InterCTC
share-inter-ctc: True
inter-ctc-weight: 1.0
inter-ctc-layers: 6,9,12,15
inter-xctc-weight: 1.0
inter-xctc-layers: 6,9,12,15
# Prediction-aware encoding
ctc-pae: inter_league
xctc-pae: inter_league
pae-unnorm-input: True
# Cross-layer attn
# xctc-cross-attn: True
# cross-attn-start-layer: 4
# cross-attn-layer: 3
# cross-attn-collaboration-mode: serial
# cross-attn-league-drop-net: True
# cross-attn-league-drop-net-prob: 0.1
# Curriculum learning mixing
xctc-pae-ground-truth-ratio: 0.8
xctc-pae-ground-truth-only-mistake: True
pae-oracle-smooth: True
......@@ -6,12 +6,11 @@ lr-scheduler: inverse_sqrt
warmup-init-lr: 1e-7
warmup-updates: 10000
lr: 2e-3
adam_betas: (0.9,0.98)
adam-betas: (0.9,0.98)
ctc-weight: 0.3
share-ctc-and-embed: True
criterion: label_smoothed_cross_entropy_with_ctc
label_smoothing: 0.1
encoder-normalize-before: True
decoder-normalize-before: True
......
......@@ -6,10 +6,9 @@ lr-scheduler: inverse_sqrt
warmup-init-lr: 1e-7
warmup-updates: 10000
lr: 2e-3
adam_betas: (0.9,0.98)
adam-betas: (0.9,0.98)
criterion: label_smoothed_cross_entropy_with_ctc
label_smoothing: 0.1
encoder-normalize-before: True
decoder-normalize-before: True
......
......@@ -6,10 +6,9 @@ lr-scheduler: inverse_sqrt
warmup-init-lr: 1e-7
warmup-updates: 10000
lr: 1e-3
adam_betas: (0.9,0.98)
adam-betas: (0.9,0.98)
criterion: label_smoothed_cross_entropy_with_ctc
label_smoothing: 0.1
encoder-normalize-before: True
decoder-normalize-before: True
......
......@@ -6,10 +6,9 @@ lr-scheduler: inverse_sqrt
warmup-init-lr: 1e-7
warmup-updates: 10000
lr: 1e-3
adam_betas: (0.9,0.98)
adam-betas: (0.9,0.98)
criterion: label_smoothed_cross_entropy_with_ctc
label_smoothing: 0.1
encoder-embed-norm: True
encoder-no-scale-embedding: True
......
......@@ -6,10 +6,9 @@ lr-scheduler: inverse_sqrt
warmup-init-lr: 1e-7
warmup-updates: 10000
lr: 2e-3
adam_betas: (0.9,0.98)
adam-betas: (0.9,0.98)
criterion: label_smoothed_cross_entropy_with_ctc
label_smoothing: 0.1
encoder-normalize-before: True
decoder-normalize-before: True
......
......@@ -6,10 +6,9 @@ lr-scheduler: inverse_sqrt
warmup-init-lr: 1e-7
warmup-updates: 10000
lr: 2e-3
adam_betas: (0.9,0.98)
adam-betas: (0.9,0.98)
criterion: label_smoothed_cross_entropy_with_ctc
label_smoothing: 0.1
encoder-embed-norm: True
encoder-no-scale-embedding: True
......
......@@ -6,10 +6,9 @@ lr-scheduler: inverse_sqrt
warmup-init-lr: 1e-7
warmup-updates: 10000
lr: 2e-3
adam_betas: (0.9,0.98)
adam-betas: (0.9,0.98)
criterion: label_smoothed_cross_entropy_with_ctc
label_smoothing: 0.1
encoder-embed-norm: True
encoder-no-scale-embedding: True
......
......@@ -6,10 +6,9 @@ lr-scheduler: inverse_sqrt
warmup-init-lr: 1e-7
warmup-updates: 10000
lr: 2e-3
adam_betas: (0.9,0.98)
adam-betas: (0.9,0.98)
criterion: label_smoothed_cross_entropy_with_ctc
label_smoothing: 0.1
encoder-embed-norm: True
encoder-no-scale-embedding: True
......
......@@ -6,10 +6,9 @@ lr-scheduler: inverse_sqrt
warmup-init-lr: 1e-7
warmup-updates: 10000
lr: 2e-3
adam_betas: (0.9,0.98)
adam-betas: (0.9,0.98)
criterion: label_smoothed_cross_entropy_with_ctc
label_smoothing: 0.1
encoder-embed-norm: True
encoder-no-scale-embedding: True
......
......@@ -6,10 +6,9 @@ lr-scheduler: inverse_sqrt
warmup-init-lr: 1e-7
warmup-updates: 10000
lr: 2e-3
adam_betas: (0.9,0.98)
adam-betas: (0.9,0.98)
criterion: label_smoothed_cross_entropy_with_ctc
label_smoothing: 0.1
encoder-embed-norm: True
encoder-no-scale-embedding: True
......
......@@ -6,10 +6,9 @@ lr-scheduler: inverse_sqrt
warmup-init-lr: 1e-7
warmup-updates: 10000
lr: 2e-3
adam_betas: (0.9,0.98)
adam-betas: (0.9,0.98)
criterion: label_smoothed_cross_entropy_with_ctc
label_smoothing: 0.1
encoder-embed-norm: True
encoder-no-scale-embedding: True
......
......@@ -11,7 +11,7 @@ if [ "$#" -eq 1 ]; then
fi
sacrebleu=1
ctc_infer=0
ctc_infer=1
n_average=10
beam_size=5
infer_ctc_weight=0
......
......@@ -354,8 +354,8 @@ if [ ${stage} -le 1 ] && [ ${stop_stage} -ge 1 ]; then
mv tmp.log $log
log=${model_dir}/train.log
# cmd="${cmd} 2>&1 | tee -a ${log}"
cmd="${cmd} >> ${log} 2>&1 "
cmd="${cmd} 2>&1 | tee -a ${log}"
#cmd="${cmd} >> ${log} 2>&1 "
if [[ $eval -eq 1 ]]; then
# tensorboard
port=6666
......
set -e
eval=1
lcrm=0
tokenizer=0
root_dir=~/st/Fairseq-S2T
data_dir=~/st/data/test
vocab_dir=~/st/data/mustc/st/en-de
asr_vocab_prefix=spm_unigram10000_st_share
src_lang=en
tgt_lang=de
subsets=(2019)
cp -r ${vocab_dir}/${asr_vocab_prefix}.* ${data_dir}/${src_lang}-${tgt_lang}
rm -rf ${data_dir}/${src_lang}-${tgt_lang}/fbank80.zip
splits=$(echo ${subsets[*]} | sed 's/ /,/g')
cmd="python ${root_dir}/examples/speech_to_text/prep_st_data.py
--data-root ${data_dir}
--output-root ${data_dir}
--splits ${splits}
--task asr
--src-lang ${src_lang}
--tgt-lang ${tgt_lang}
--add-src
--share
--asr-prefix ${asr_vocab_prefix}
--cmvn-type utterance"
if [[ ${lcrm} -eq 1 ]]; then
cmd="$cmd
--lowercase-src
--rm-punc-src"
fi
if [[ ${tokenizer} -eq 1 ]]; then
cmd="$cmd
--tokenizer"
fi
echo -e "\033[34mRun command: \n${cmd} \033[0m"
[[ $eval -eq 1 ]] && eval ${cmd}
arch: s2t_transformer_s
share-decoder-input-output-embed: True
optimizer: adam
clip-norm: 10.0
lr-scheduler: inverse_sqrt
warmup-init-lr: 1e-7
warmup-updates: 10000
lr: 2e-3
adam-betas: (0.9,0.98)
criterion: label_smoothed_cross_entropy_with_ctc
subsampling-type: conv1d
subsampling-layers: 2
subsampling-filter: 1024
subsampling-kernel: 5
subsampling-stride: 2
subsampling-norm: none
subsampling-activation: glu
dropout: 0.1
activation-fn: relu
encoder-embed-dim: 256
encoder-ffn-embed-dim: 2048
encoder-layers: 12
decoder-layers: 6
encoder-attention-heads: 4
decoder-embed-dim: 256
decoder-ffn-embed-dim: 2048
decoder-attention-heads: 4
attention-dropout: 0.1
activation-dropout: 0.1
#load-pretrained-encoder-from:
#load-pretrained-decoder-from:
\ No newline at end of file
arch: s2t_transformer_s
share-decoder-input-output-embed: True
optimizer: adam
clip-norm: 10.0
lr-scheduler: inverse_sqrt
warmup-init-lr: 1e-7
warmup-updates: 10000
lr: 2e-3
adam-betas: (0.9,0.98)
criterion: label_smoothed_cross_entropy_with_ctc
subsampling-type: conv1d
subsampling-layers: 2
subsampling-filter: 1024
subsampling-kernel: 5
subsampling-stride: 2
subsampling-norm: none
subsampling-activation: glu
dropout: 0.1
activation-fn: relu
encoder-embed-dim: 256
encoder-ffn-embed-dim: 2048
encoder-layers: 12
decoder-layers: 6
encoder-attention-heads: 4
decoder-embed-dim: 256
decoder-ffn-embed-dim: 2048
decoder-attention-heads: 4
attention-dropout: 0.1
activation-dropout: 0.1
#load-pretrained-encoder-from:
#load-pretrained-decoder-from:
\ No newline at end of file
train-subset: train
valid-subset: dev
max-epoch: 100
max-update: 100000
patience: 20
post-process: sentencepiece
# best-checkpoint-metric: loss
# maximize-best-checkpoint-metric: False
eval-wer: True
eval-wer-args: {"beam": 1, "lenpen": 1.0}
eval-wer-tok-args: {"wer_remove_punct": true, "wer_lowercase": true, "wer_char_level": true}
eval-wer-remove-bpe: sentencepiece
eval-wer-print-samples: True
best_checkpoint_metric: dec_wer
maximize_best_checkpoint_metric: False
no-epoch-checkpoints: True
# keep-last-epochs: 10
keep-best-checkpoints: 10
num-workers: 8
no-progress-bar: True
log-interval: 100
seed: 1
label-smoothing: 0.1
report-accuracy: True
skip-invalid-size-inputs-valid-test: True
\ No newline at end of file
arch: s2t_transformer_m
share-decoder-input-output-embed: True
optimizer: adam
clip-norm: 10.0
lr-scheduler: inverse_sqrt
warmup-init-lr: 1e-7
warmup-updates: 10000
lr: 0.0014
adam-betas: (0.9,0.98)
criterion: label_smoothed_cross_entropy_with_ctc
subsampling-type: conv1d
subsampling-layers: 2
subsampling-filter: 2048
subsampling-kernel: 5
subsampling-stride: 2
subsampling-norm: none
subsampling-activation: glu
dropout: 0.15
activation-fn: relu
encoder-embed-dim: 512
encoder-ffn-embed-dim: 2048
encoder-layers: 12
decoder-layers: 6
encoder-attention-heads: 8
decoder-embed-dim: 512
decoder-ffn-embed-dim: 2048
decoder-attention-heads: 8
\ No newline at end of file
arch: s2t_transformer_m
share-decoder-input-output-embed: True
optimizer: adam
clip-norm: 10.0
lr-scheduler: inverse_sqrt
warmup-init-lr: 1e-7
warmup-updates: 10000
lr: 1e-3
adam-betas: (0.9,0.98)
criterion: label_smoothed_cross_entropy_with_ctc
subsampling-type: conv2d
subsampling-layers: 2
subsampling-filter: 512
subsampling-kernel: 3
subsampling-stride: 2
subsampling-norm: none
subsampling-activation: relu
dropout: 0.15
activation-fn: relu
encoder-embed-dim: 512
encoder-ffn-embed-dim: 2048
encoder-layers: 12
decoder-layers: 6
encoder-attention-heads: 8
decoder-embed-dim: 512
decoder-ffn-embed-dim: 2048
decoder-attention-heads: 8
cnn-module-norm: layer_norm
load-pretrained-encoder-from: /home/xuchen/after.pt
load-pretrained-decoder-from: /home/xuchen/after.pt
#load-pretrained-decoder-from:
macaron-style: True
use-cnn-module: True
cnn-module-kernel: 15
encoder-attention-type: rel_pos
encoder-activation-fn: swish
layer-padding-mask: True
\ No newline at end of file
ctc-weight: 0.3
\ No newline at end of file
use-enc-dlcl: True
use-dec-dlcl: True
compression-metric: threshold
compression-mode: create
compression-layers: 6,9
compression-threshold: 0.95
compression-norm: True
compression-pos: True
\ No newline at end of file
inter-ctc-weight: 0.2
inter-ctc-layers: 6,9
inter-ctc-drop-prob: 0
share-inter-ctc: True
ctc-pae: none
# ctc-pae: inter_league
# ctc-pae-ground-truth-ratio: 0.1
# pae-gumbel: True
# pae-distribution-hard: True
# pae-drop-prob: 0.0
# pae-distribution-cutoff: 10
# share-pae-and-ctc: True
# pae-embed-norm: True
# pae-out-norm: True
# ctc-self-distill-weight: 1
# target-ctc-self-distill-weight: 1
# ctc-self-distill-prob: 0.1
# cal-all-ctc: True
\ No newline at end of file
inter-mixup: True
inter-mixup-layer: -1
inter-mixup-decoder-layer: 0
inter-mixup-prob: 1.0
inter-mixup-ratio: 1.0
inter-mixup-beta: 0.2
inter-mixup-keep-org: False
inter-mixup-decoder-emb: False
ctc-mixup-consistent-weight: 0
inter-ctc-mixup-consistent-weight: 0
mixup-consistent-weight: 0
mixup-no-hard-loss: False
no-specaugment: False
layer-out-norm: False
inter-mixup-ratio-decay: False
inter-mixup-ratio-decay-params: 20000,40000,0
\ No newline at end of file
inter-ctc-mlo: 1:2:3
\ No newline at end of file
encoder-embed-norm: True
encoder-no-scale-embedding: True
\ No newline at end of file
arch: pdss2t_transformer_s_8
share-decoder-input-output-embed: True
optimizer: adam
clip-norm: 10.0
lr-scheduler: inverse_sqrt
warmup-init-lr: 1e-7
warmup-updates: 10000
lr: 2e-3
adam-betas: (0.9,0.98)
criterion: label_smoothed_cross_entropy_with_ctc
dropout: 0.1
activation-fn: relu
encoder-ffn-embed-dim: 2048
encoder-layers: 12
decoder-layers: 6
encoder-attention-heads: 4
decoder-embed-dim: 256
decoder-ffn-embed-dim: 2048
decoder-attention-heads: 4
\ No newline at end of file
arch: pdss2t_transformer_s_16
encoder-embed-dim: 256
pds-stages: 4
pds-layers: 2_2_6_2
pds-ratios: 2_2_2_2
pds-fusion: False
pds-fusion-method: all_conv2
pds-fusion-layers: 0_1_1_1
pds-fusion-weight: 0.2_0.3_0.5
pds-embed-dims: 256_256_256_256
pds-ds-method: conv
pds-embed-norm: True
pds-position-embed: 1_1_1_1
pds-kernel-sizes: 5_5_5_5
pds-ffn-ratios: 8_8_8_8
pds-attn-heads: 4_4_4_4
share-decoder-input-output-embed: True
optimizer: adam
clip-norm: 10.0
lr-scheduler: inverse_sqrt
warmup-init-lr: 1e-7
warmup-updates: 10000
lr: 2e-3
adam-betas: (0.9,0.98)
criterion: label_smoothed_cross_entropy_with_ctc
dropout: 0.1
activation-fn: relu
encoder-ffn-embed-dim: 2048
encoder-layers: 12
decoder-layers: 6
encoder-attention-heads: 4
decoder-embed-dim: 256
decoder-ffn-embed-dim: 2048
decoder-attention-heads: 4
\ No newline at end of file
arch: pdss2t_transformer_s_32
encoder-embed-dim: 256
pds-stages: 5
pds-layers: 2_2_3_3_2
pds-ratios: 2_2_2_2_2
pds-fusion: False
pds-fusion-method: all_conv2
pds-fusion-layers: 0_0_1_1_1
pds-fusion-weight: 0.2_0.3_0.5
pds-embed-dims: 256_256_256_256_256
pds-ds-method: conv
pds-embed-norm: True
pds-position-embed: 1_1_1_1_1
pds-kernel-sizes: 5_5_5_5_5
pds-ffn-ratios: 8_8_8_8_8
pds-attn-heads: 4_4_4_4_4
share-decoder-input-output-embed: True
optimizer: adam
clip-norm: 10.0
lr-scheduler: inverse_sqrt
warmup-init-lr: 1e-7
warmup-updates: 10000
lr: 2e-3
adam-betas: (0.9,0.98)
criterion: label_smoothed_cross_entropy_with_ctc
dropout: 0.1
activation-fn: relu
encoder-ffn-embed-dim: 2048
encoder-layers: 12
decoder-layers: 6
encoder-attention-heads: 4
decoder-embed-dim: 256
decoder-ffn-embed-dim: 2048
decoder-attention-heads: 4
\ No newline at end of file
arch: pdss2t_transformer_s_8
encoder-embed-dim: 256
pds-stages: 4
pds-layers: 3_3_3_3
pds-ratios: 2_2_1_2
pds-fusion: False
pds-fusion-method: all_conv2
pds-fusion-layers: 0_1_1_1
pds-fusion-weight: 0.2_0.3_0.5
pds-embed-dims: 256_256_256_256
pds-ds-method: conv
pds-embed-norm: True
pds-position-embed: 1_1_1_1
pds-kernel-sizes: 5_5_5_5
pds-ffn-ratios: 8_8_8_8
pds-attn-heads: 4_4_4_4
share-decoder-input-output-embed: True
optimizer: adam
clip-norm: 10.0
lr-scheduler: inverse_sqrt
warmup-init-lr: 1e-7
warmup-updates: 10000
lr: 2e-3
adam-betas: (0.9,0.98)
criterion: label_smoothed_cross_entropy_with_ctc
dropout: 0.1
activation-fn: relu
encoder-ffn-embed-dim: 2048
encoder-layers: 12
decoder-layers: 6
encoder-attention-heads: 4
decoder-embed-dim: 256
decoder-ffn-embed-dim: 2048
decoder-attention-heads: 4
\ No newline at end of file
arch: pdss2t_transformer_m_8
encoder-embed-dim: 512
pds-stages: 4
pds-layers: 3_3_3_3
pds-ratios: 2_2_1_2
pds-fusion: False
pds-fusion-method: all_conv2
pds-fusion-layers: 0_1_1_1
pds-fusion-weight: 0.2_0.3_0.5
pds-embed-dims: 512_512_512_512
pds-ds-method: conv
pds-embed-norm: True
pds-position-embed: 1_1_1_1
pds-kernel-sizes: 5_5_5_5
pds-ffn-ratios: 4_4_4_4
pds-attn-heads: 8_8_8_8
share-decoder-input-output-embed: True
optimizer: adam
clip-norm: 10.0
lr-scheduler: inverse_sqrt
warmup-init-lr: 1e-7
warmup-updates: 10000
lr: 0.0014
adam-betas: (0.9,0.98)
criterion: label_smoothed_cross_entropy_with_ctc
dropout: 0.15
activation-fn: relu
encoder-ffn-embed-dim: 2048
encoder-layers: 12
decoder-layers: 6
encoder-attention-heads: 8
decoder-embed-dim: 512
decoder-ffn-embed-dim: 2048
decoder-attention-heads: 8
arch: s2t_ctc
optimizer: adam
clip-norm: 10.0
lr-scheduler: inverse_sqrt
warmup-init-lr: 1e-7
warmup-updates: 10000
lr: 2e-3
adam-betas: (0.9,0.98)
criterion: ctc
zero_infinity: True
ctc-weight: 1.0
encoder-normalize-before: True
decoder-normalize-before: True
subsampling-type: conv1d
subsampling-layers: 2
subsampling-filter: 1024
subsampling-kernel: 5
subsampling-stride: 2
subsampling-norm: none
subsampling-activation: glu
dropout: 0.1
activation-fn: relu
encoder-embed-dim: 256
encoder-ffn-embed-dim: 2048
encoder-layers: 18
encoder-attention-heads: 4
\ No newline at end of file
encoder-attention-type: rel_pos
#encoder-attention-type: rel_pos_legacy
#encoder-attention-type: rel_selfattn
#encoder-attention-type: relative
#decoder-attention-type: relative
#max-encoder-relative-length: 100
#max-decoder-relative-length: 20
xctc-weight: 0.3
share-xctc-and-embed: True
\ No newline at end of file
inter-xctc-weight: 0.2
inter-xctc-layers: 6,9
xctc-pae: none
# xctc-pae: inter_league
xctc-cross-attn: False
cross-attn-start-layer: 7
cross-attn-layer: 6
cross-attn-collaboration-mode: parallel
cross-attn-league-s1-ratio: 0.5
cross-attn-league-s2-ratio: 0.5
cross-attn-league-out-norm: False
cross-attn-league-gated: False
cross-attn-league-drop-net: False
cross-attn-league-drop-net-prob: 0.2
cross-attn-league-drop-net-mix: False
# xctc-pae-ground-truth-ratio: 0.1
# xctc-pae-ground-truth-ratio-adaptive: True
# xctc-pae-ground-truth-only-mistake: True
# pae-oracle-smooth: True
# pae-gumbel: True
# pae-distribution-hard: True
# pae-drop-prob: 0.0
# pae-distribution-cutoff: 10
# share-pae-and-xctc: True
# pae-embed-norm: True
# pae-out-norm: True
# ctc-self-distill-weight: 1
# target-ctc-self-distill-weight: 1
# ctc-self-distill-prob: 0.1
# cal-all-ctc: True
\ No newline at end of file
#!/usr/bin/env bash
gpu_num=1
data_tag=asr
test_subset=(dev test)
exp_name=
if [ "$#" -eq 1 ]; then
exp_name=$1
fi
cer=1
ctc_infer=1
n_average=10
beam_size=5
infer_ctc_weight=0.1
len_penalty=1.0
max_tokens=50000
batch_size=1
infer_debug=0
dec_model=checkpoint_best.pt
cmd="./run.sh
--stage 2
--stop_stage 2
--gpu_num ${gpu_num}
--exp_name ${exp_name}
--n_average ${n_average}
--cer ${cer}
--ctc_infer ${ctc_infer}
--beam_size ${beam_size}
--len_penalty ${len_penalty}
--batch_size ${batch_size}
--max_tokens ${max_tokens}
--dec_model ${dec_model}
--ctc_infer ${ctc_infer}
--infer_ctc_weight ${infer_ctc_weight}
--infer_debug ${infer_debug}
"
if [[ -n ${data_tag} ]]; then
cmd="$cmd --data_tag ${data_tag}"
fi
if [[ ${#test_subset[@]} -ne 0 ]]; then
subsets=$(echo ${test_subset[*]} | sed 's/ /,/g')
cmd="$cmd --test_subset ${subsets}"
fi
echo $cmd
eval $cmd
#!/usr/bin/env bash
set -e
ref=$1
gen=$2
tokenizer=$3
lang=$4
lang_pair=en-${lang}
record=$(mktemp -t temp.record.XXXXXX)
if [[ ${tokenizer} -eq 1 ]]; then
echo "MultiBLEU" > ${record}
cmd="multi-bleu.perl ${ref} < ${gen}"
eval $cmd | head -n 1 >> ${record}
cmd="detokenizer.perl -q -l ${lang} --threads 32 < ${ref} > ${ref}.detok"
eval $cmd
cmd="detokenizer.perl -q -l ${lang} --threads 32 < ${gen} > ${gen}.detok"
eval $cmd
ref=${ref}.detok
gen=${gen}.detok
fi
echo "SacreBLEU" >> ${record}
cmd="cat ${gen} | sacrebleu ${ref} -m bleu -w 4 -l ${lang_pair}"
eval $cmd >> ${record}
cat ${record}
rm ${record}
\ No newline at end of file
#!/usr/bin/env bash
set -e
infer_dir=$1
tag=$2
s2s_infer_file=${infer_dir}/$3
org_ctc_infer_file=${infer_dir}/$4
ref=$5
tokenizer=$6
lang=$7
idx=${infer_dir}/${tag}_idx
ctc_infer=${infer_dir}/${tag}_ctc_infer
ctc_infer_sort=${infer_dir}/${tag}_ctc_infer_sort
if [[ ! -f ${ctc_infer_sort} ]]; then
cut -f1 ${s2s_infer_file} > ${idx}
paste ${idx} ${org_ctc_infer_file} > ${ctc_infer}
sort -n -t $'\t' ${ctc_infer} | cut -f2 > ${ctc_infer_sort}
fi
gen=${ctc_infer_sort}
./cal_bleu.sh ${ref} ${gen} ${tokenizer} ${lang}
\ No newline at end of file
import unicodedata
import re
import jiwer
import jiwer.transforms as tr
import sys
ref_file = sys.argv[1]
hyp_file = sys.argv[2]
wer_standardize = tr.Compose(
[
tr.SubstituteRegexes({r"<<unk>>": r"@"}),
tr.ToLowerCase(),
tr.RemovePunctuation(),
tr.ExpandCommonEnglishContractions(),
tr.RemoveKaldiNonWords(),
tr.RemoveWhiteSpace(replace_by_space=True),
tr.ReduceToListOfListOfWords(),
]
)
cer_standardize = tr.Compose(
[
tr.SubstituteRegexes({r"<<unk>>": r"@"}),
tr.ToLowerCase(),
tr.RemovePunctuation(),
tr.Strip(),
tr.ReduceToListOfListOfChars(),
]
)
def process_text(text):
# 将中文字符和英文字符间加空格
text = re.sub(r'([\u4e00-\u9fa5])([a-zA-Z0-9])', r'\1 \2', text)
text = re.sub(r'([a-zA-Z0-9])([\u4e00-\u9fa5])', r'\1 \2', text)
# 将中文字符间加空格
text = re.sub(r'([\u4e00-\u9fa5])', r'\1 ', text)
# 去掉多余的空格
text = re.sub(r'\s+', ' ', text).strip()
return text
ref_lines = open(ref_file, "r").readlines()
hyp_lines = open(hyp_file, "r").readlines()
ref_lines = [process_text(line) for line in ref_lines]
hyp_lines = [process_text(line) for line in hyp_lines]
print(hyp_lines[:10])
wer = jiwer.wer(ref_lines, hyp_lines,
truth_transform=wer_standardize,
hypothesis_transform=wer_standardize,
)
cer = jiwer.cer(ref_lines, hyp_lines,
truth_transform=cer_standardize,
hypothesis_transform=cer_standardize,
)
print("WER: %.4f" % wer)
print("CER: %.4f" % cer)
#!/usr/bin/env bash
set -e
infer_dir=$1
tag=$2
s2s_infer_file=${infer_dir}/$3
org_ctc_infer_file=${infer_dir}/$4
ref=$5
idx=${infer_dir}/${tag}_idx
ctc_infer=${infer_dir}/${tag}_ctc_infer
ctc_infer_sort=${infer_dir}/${tag}_ctc_infer_sort
cut -f1 ${s2s_infer_file} > ${idx}
paste ${idx} ${org_ctc_infer_file} > ${ctc_infer}
sort -n -t $'\t' ${ctc_infer} | cut -f2 > ${ctc_infer_sort}
python3 ./cal_wer.py ${ref} ${ctc_infer_sort}
\ No newline at end of file
import sys
import csv
tsv_file = sys.argv[1]
out_file = sys.argv[2]
extract_item = sys.argv[3]
with open(tsv_file) as f:
reader = csv.DictReader(
f,
delimiter="\t",
quotechar=None,
doublequote=False,
lineterminator="\n",
quoting=csv.QUOTE_NONE,
)
samples = [dict(e) for e in reader]
fw = open(out_file, "w", encoding="utf-8")
for s in samples:
if extract_item in s:
fw.write("%s\n" % s[extract_item])
else:
print("Error in sample: ")
print(s)
exit()
#!/usr/bin/env bash
gpu_num=4
cmd="sh train.sh"
while :
do
record=$(mktemp -t temp.record.XXXXXX)
gpustat > $record
all_devices=$(seq 0 "$(sed '1,2d' ${record} | wc -l)");
count=0
for dev in ${all_devices[@]}
do
line=$((dev + 2))
use=$(head -n $line ${record} | tail -1 | cut -d '|' -f3 | cut -d '/' -f1)
if [[ $use -lt 100 ]]; then
device[$count]=$dev
count=$((count + 1))
if [[ $count -eq $gpu_num ]]; then
break
fi
fi
done
if [[ ${#device[@]} -lt $gpu_num ]]; then
sleep 60s
else
echo "Run $cmd"
eval $cmd
sleep 10s
exit
fi
done
#!/usr/bin/env bash
# Copyright 2012 Johns Hopkins University (Author: Daniel Povey);
# Arnab Ghoshal, Karel Vesely
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# THIS CODE IS PROVIDED *AS IS* BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
# KIND, EITHER EXPRESS OR IMPLIED, INCLUDING WITHOUT LIMITATION ANY IMPLIED
# WARRANTIES OR CONDITIONS OF TITLE, FITNESS FOR A PARTICULAR PURPOSE,
# MERCHANTABLITY OR NON-INFRINGEMENT.
# See the Apache 2 License for the specific language governing permissions and
# limitations under the License.
# Parse command-line options.
# To be sourced by another script (as in ". parse_options.sh").
# Option format is: --option-name arg
# and shell variable "option_name" gets set to value "arg."
# The exception is --help, which takes no arguments, but prints the
# $help_message variable (if defined).
###
### The --config file options have lower priority to command line
### options, so we need to import them first...
###
# Now import all the configs specified by command-line, in left-to-right order
for ((argpos=1; argpos<$#; argpos++)); do
if [ "${!argpos}" == "--config" ]; then
argpos_plus1=$((argpos+1))
config=${!argpos_plus1}
[ ! -r $config ] && echo "$0: missing config '$config'" && exit 1
. $config # source the config file.
fi
done
###
### Now we process the command line options
###
i=1
argv="$@"
while true; do
key=${!i}
j=$(($i + 1))
value=${!j}
[ -z "${!i:-}" ] && break; # break if there are no arguments
case "${key}" in
# If the enclosing script is called with --help option, print the help
# message and exit. Scripts should put help messages in $help_message
--help|-h) if [ -z "$help_message" ]; then echo "No help found." 1>&2;
else printf "$help_message\n" 1>&2 ; fi;
exit 0 ;;
--*=*) echo "$0: options to scripts must be of the form --name value, got '${key}}'"
exit 1 ;;
# If the first command-line argument begins with "--" (e.g. --foo-bar),
# then work out the variable name as $name, which will equal "foo_bar".
--*) name=`echo "${key}" | sed s/^--// | sed s/-/_/g`;
# Next we test whether the variable in question is undefned-- if so it's
# an invalid option and we die. Note: $0 evaluates to the name of the
# enclosing script.
# The test [ -z ${foo_bar+xxx} ] will return true if the variable foo_bar
# is undefined. We then have to wrap this test inside "eval" because
# foo_bar is itself inside a variable ($name).
#eval '[ -z "${'$name'+xxx}" ]' && echo "$0: invalid option $1" 1>&2 && exit 1;
oldval="`eval echo \\$$name`";
# Work out whether we seem to be expecting a Boolean argument.
if [ "$oldval" == "true" ] || [ "$oldval" == "false" ]; then
was_bool=true;
else
was_bool=false;
fi
# Set the variable to the right value-- the escaped quotes make it work if
# the option had spaces, like --cmd "queue.pl -sync y"
# echo $name
eval $name=\"${value}\";
# Check that Boolean-valued arguments are really Boolean.
if $was_bool && [[ "${value}" != "true" && "${value}" != "false" ]]; then
echo "$0: expected \"true\" or \"false\": ${key} ${value}" 1>&2
exit 1;
fi
# shift 2;
i=$(($i + 2))
;;
*) break;
esac
done
# Check for an empty argument to the --cmd option, which can easily occur as a
# result of scripting errors.
[ ! -z "${cmd+xxx}" ] && [ -z "$cmd" ] && echo "$0: empty argument to --cmd option" 1>&2 && exit 1;
true; # so this script returns exit code 0.
get_devices(){
gpu_num=$1
use_cpu=$2
device=()
while :
do
record=$(mktemp -t temp.record.XXXXXX)
gpustat > $record
all_devices=$(seq 0 "$(sed '1,2d' ${record} | wc -l)");
count=0
for dev in ${all_devices[@]}
do
line=$((dev + 2))
use=$(head -n $line ${record} | tail -1 | cut -d '|' -f3 | cut -d '/' -f1)
if [[ $use -lt 1000 ]]; then
device[$count]=$dev
count=$((count + 1))
if [[ $count -eq $gpu_num ]]; then
break
fi
fi
done
if [[ ${#device[@]} -lt $gpu_num ]]; then
if [[ $use_cpu -eq 1 ]]; then
device=(-1)
else
sleep 60s
fi
else
break
fi
done
echo ${device[*]} | sed 's/ /,/g'
return $?
}
#!/usr/bin/env bash
# training the model
gpu_num=1
update_freq=1
max_tokens=100000
extra_tag=
extra_parameter=
#extra_tag="${extra_tag}"
#extra_parameter="${extra_parameter} "
exp_tag=
# CTC
config_list=(purectc)
# Transformer
config_list=(base ctc)
# Conformer
#config_list=(base conformer ctc)
# PDS
config_list=(purectc_pds_base_8)
config_list=(pds_base_8)
# exp full name
exp_name=
train_config=$(echo ${config_list[*]} | sed 's/ /,/g')
cmd="./run.sh
--stage 1
--stop_stage 2
--gpu_num ${gpu_num}
--update_freq ${update_freq}
--train_config ${train_config}
--max_tokens ${max_tokens}
"
if [[ -n ${exp_name} ]]; then
cmd="$cmd --exp_name ${exp_name}"
fi
if [[ -n ${exp_tag} ]]; then
cmd="$cmd --exp_tag ${exp_tag}"
fi
if [[ -n ${extra_tag} ]]; then
cmd="$cmd --extra_tag ${extra_tag}"
fi
if [[ -n ${extra_parameter} ]]; then
cmd="$cmd --extra_parameter \"${extra_parameter}\""
fi
echo ${cmd}
eval ${cmd}
......@@ -6,10 +6,9 @@ lr-scheduler: inverse_sqrt
warmup-init-lr: 1e-7
warmup-updates: 10000
lr: 2e-3
adam_betas: (0.9,0.98)
adam-betas: (0.9,0.98)
criterion: label_smoothed_cross_entropy_with_ctc
label_smoothing: 0.1
encoder-embed-norm: True
encoder-no-scale-embedding: True
......
......@@ -6,10 +6,9 @@ lr-scheduler: inverse_sqrt
warmup-init-lr: 1e-7
warmup-updates: 10000
lr: 2e-3
adam_betas: (0.9,0.98)
adam-betas: (0.9,0.98)
criterion: label_smoothed_cross_entropy_with_ctc
label_smoothing: 0.1
subsampling-type: conv1d
subsampling-layers: 2
......
......@@ -25,5 +25,6 @@ num-workers: 8
no-progress-bar: True
log-interval: 100
seed: 1
label-smoothing: 0.1
report-accuracy: True
skip-invalid-size-inputs-valid-test: True
\ No newline at end of file
......@@ -6,10 +6,9 @@ lr-scheduler: inverse_sqrt
warmup-init-lr: 1e-7
warmup-updates: 10000
lr: 2e-3
adam_betas: (0.9,0.98)
adam-betas: (0.9,0.98)
criterion: label_smoothed_cross_entropy_with_ctc
label_smoothing: 0.1
encoder-embed-norm: True
encoder-no-scale-embedding: True
......
......@@ -6,10 +6,9 @@ lr-scheduler: inverse_sqrt
warmup-init-lr: 1e-7
warmup-updates: 10000
lr: 1e-3
adam_betas: (0.9,0.98)
adam-betas: (0.9,0.98)
criterion: label_smoothed_cross_entropy_with_ctc
label_smoothing: 0.1
subsampling-type: conv2d
subsampling-layers: 2
......
......@@ -8,6 +8,6 @@ inter-mixup-keep-org: False
inter-mixup-decoder-emb: False
ctc-mixup-consistent-weight: 0
mixup-consistent-weight: 0
cal-mixup-loss: True
mixup-no-hard-loss: False
no-specaugment: False
layer-out-norm: False
\ No newline at end of file
......@@ -10,10 +10,9 @@ lr-scheduler: inverse_sqrt
warmup-init-lr: 1e-7
warmup-updates: 10000
lr: 2e-3
adam_betas: (0.9,0.98)
adam-betas: (0.9,0.98)
criterion: label_smoothed_cross_entropy_with_ctc
label_smoothing: 0.1
dropout: 0.1
activation-fn: relu
......
......@@ -22,10 +22,9 @@ lr-scheduler: inverse_sqrt
warmup-init-lr: 1e-7
warmup-updates: 10000
lr: 2e-3
adam_betas: (0.9,0.98)
adam-betas: (0.9,0.98)
criterion: label_smoothed_cross_entropy_with_ctc
label_smoothing: 0.1
dropout: 0.1
activation-fn: relu
......
......@@ -23,10 +23,9 @@ lr-scheduler: inverse_sqrt
warmup-init-lr: 1e-7
warmup-updates: 10000
lr: 2e-3
adam_betas: (0.9,0.98)
adam-betas: (0.9,0.98)
criterion: label_smoothed_cross_entropy_with_ctc
label_smoothing: 0.1
dropout: 0.1
activation-fn: relu
......
......@@ -23,10 +23,9 @@ lr-scheduler: inverse_sqrt
warmup-init-lr: 1e-7
warmup-updates: 10000
lr: 2e-3
adam_betas: (0.9,0.98)
adam-betas: (0.9,0.98)
criterion: label_smoothed_cross_entropy_with_ctc
label_smoothing: 0.1
dropout: 0.1
activation-fn: relu
......
......@@ -22,10 +22,9 @@ lr-scheduler: inverse_sqrt
warmup-init-lr: 1e-7
warmup-updates: 10000
lr: 2e-3
adam_betas: (0.9,0.98)
adam-betas: (0.9,0.98)
criterion: label_smoothed_cross_entropy_with_ctc
label_smoothing: 0.1
dropout: 0.1
activation-fn: relu
......
......@@ -22,10 +22,9 @@ lr-scheduler: inverse_sqrt
warmup-init-lr: 1e-7
warmup-updates: 10000
lr: 2e-3
adam_betas: (0.9,0.98)
adam-betas: (0.9,0.98)
criterion: label_smoothed_cross_entropy_with_ctc
label_smoothing: 0.1
dropout: 0.1
activation-fn: relu
......
......@@ -22,10 +22,9 @@ lr-scheduler: inverse_sqrt
warmup-init-lr: 1e-7
warmup-updates: 10000
lr: 2e-3
adam_betas: (0.9,0.98)
adam-betas: (0.9,0.98)
criterion: label_smoothed_cross_entropy_with_ctc
label_smoothing: 0.1
dropout: 0.1
activation-fn: relu
......
......@@ -23,10 +23,9 @@ lr-scheduler: inverse_sqrt
warmup-init-lr: 1e-7
warmup-updates: 10000
lr: 2e-3
adam_betas: (0.9,0.98)
adam-betas: (0.9,0.98)
criterion: label_smoothed_cross_entropy_with_ctc
label_smoothing: 0.1
dropout: 0.1
activation-fn: relu
......
......@@ -22,10 +22,9 @@ lr-scheduler: inverse_sqrt
warmup-init-lr: 1e-7
warmup-updates: 10000
lr: 2e-3
adam_betas: (0.9,0.98)
adam-betas: (0.9,0.98)
criterion: label_smoothed_cross_entropy_with_ctc
label_smoothing: 0.1
dropout: 0.1
activation-fn: relu
......
Markdown 格式
0%
您添加了 0 到此讨论。请谨慎行事。
请先完成此评论的编辑!
注册 或者 后发表评论