Commit 99763132 by xuchen

optimize the shell scripts

parent 5160a9f5
...@@ -24,7 +24,7 @@ warmup-updates: 10000 ...@@ -24,7 +24,7 @@ warmup-updates: 10000
lr: 2e-3 lr: 2e-3
#adam_betas: (0.9,0.98) #adam_betas: (0.9,0.98)
criterion: label_smoothed_cross_entropy criterion: label_smoothed_cross_entropy_with_ctc
label_smoothing: 0.1 label_smoothing: 0.1
conv-kernel-sizes: 5,5 conv-kernel-sizes: 5,5
......
...@@ -2,4 +2,4 @@ arch: s2t_conformer_s ...@@ -2,4 +2,4 @@ arch: s2t_conformer_s
macaron-style: True macaron-style: True
use-cnn-module: True use-cnn-module: True
cnn-module-kernel: 15 cnn-module-kernel: 31
train-subset: train-clean-100,train-clean-360,train-other-500
valid-subset: dev-clean
max-epoch: 100
max-update: 300000
num-workers: 8
patience: 10
no-progress-bar: True
log-interval: 100
seed: 1
report-accuracy: True
#load-pretrained-encoder-from:
#load-pretrained-decoder-from:
arch: s2t_transformer_s
share-decoder-input-output-embed: True
optimizer: adam
clip-norm: 10.0
lr-scheduler: inverse_sqrt
warmup-init-lr: 1e-7
warmup-updates: 10000
lr: 2e-3
#adam_betas: (0.9,0.98)
ctc-weight: 0.3 ctc-weight: 0.3
criterion: label_smoothed_cross_entropy_with_ctc
label_smoothing: 0.1
conv-kernel-sizes: 5,5
conv-channels: 1024
dropout: 0.1
activation-fn: relu
encoder-embed-dim: 256
encoder-ffn-embed-dim: 2048
encoder-layers: 12
decoder-layers: 6
encoder-attention-heads: 4
decoder-embed-dim: 256
decoder-ffn-embed-dim: 2048
decoder-attention-heads: 4
attention-dropout: 0.1
activation-dropout: 0.1
#train-subset: train-clean-100,train-clean-360,train-other-500
train-subset: train-clean-100 train-subset: train-clean-100
valid-subset: dev-clean valid-subset: dev-clean
max-epoch: 100 max-epoch: 100
max-update: 300000 max-update: 300000
num-workers: 0 num-workers: 8
patience: 10 patience: 10
no-progress-bar: True no-progress-bar: True
log-interval: 100 log-interval: 100
seed: 1 seed: 1
report-accuracy: True report-accuracy: True
#load-pretrained-encoder-from:
#load-pretrained-decoder-from:
arch: s2t_transformer_s arch: s2t_transformer_s
share-decoder-input-output-embed: True share-decoder-input-output-embed: True
optimizer: adam optimizer: adam
...@@ -22,26 +24,20 @@ warmup-updates: 10000 ...@@ -22,26 +24,20 @@ warmup-updates: 10000
lr: 2e-3 lr: 2e-3
#adam_betas: (0.9,0.98) #adam_betas: (0.9,0.98)
ctc-weight: 0.3
criterion: label_smoothed_cross_entropy_with_ctc criterion: label_smoothed_cross_entropy_with_ctc
ctc-weight: 0.3
label_smoothing: 0.1 label_smoothing: 0.1
conv-kernel-sizes: 5,5 conv-kernel-sizes: 5,5
conv-channels: 1024 conv-channels: 1024
dropout: 0.1 dropout: 0.1
activation-fn: relu activation-fn: relu
#encoder-embed-dim: 256 encoder-embed-dim: 256
encoder-ffn-embed-dim: 2048 encoder-ffn-embed-dim: 2048
encoder-layers: 12 encoder-layers: 12
decoder-layers: 3 decoder-layers: 6
encoder-attention-heads: 4 encoder-attention-heads: 4
macaron-style: True
use-cnn-module: True
cnn-module-kernel: 31
adpater: subsample
decoder-embed-dim: 256 decoder-embed-dim: 256
decoder-ffn-embed-dim: 2048 decoder-ffn-embed-dim: 2048
decoder-attention-heads: 4 decoder-attention-heads: 4
......
arch: pys2t_transformer_s train-subset: train-clean-100,train-clean-360,train-other-500
#conv-kernel-sizes: 5 valid-subset: dev-clean
encoder-embed-dim: 512
pyramid-layers: 3_6_9
ctc-layer: 7
train-subset: train_st
valid-subset: dev_st
max-epoch: 50 max-epoch: 100
max-update: 100000 max-update: 300000
num-workers: 8 num-workers: 8
patience: 10 patience: 20
no-progress-bar: True no-progress-bar: True
log-interval: 100 log-interval: 100
seed: 1 seed: 1
...@@ -20,6 +14,7 @@ report-accuracy: True ...@@ -20,6 +14,7 @@ report-accuracy: True
#load-pretrained-encoder-from: #load-pretrained-encoder-from:
#load-pretrained-decoder-from: #load-pretrained-decoder-from:
arch: s2t_transformer_m
share-decoder-input-output-embed: True share-decoder-input-output-embed: True
optimizer: adam optimizer: adam
clip-norm: 10.0 clip-norm: 10.0
...@@ -29,20 +24,21 @@ warmup-updates: 10000 ...@@ -29,20 +24,21 @@ warmup-updates: 10000
lr: 2e-3 lr: 2e-3
#adam_betas: (0.9,0.98) #adam_betas: (0.9,0.98)
ctc-weight: 0.3
criterion: label_smoothed_cross_entropy_with_ctc criterion: label_smoothed_cross_entropy_with_ctc
label_smoothing: 0.1 label_smoothing: 0.1
conv-kernel-sizes: 5,5
conv-channels: 1024 conv-channels: 1024
dropout: 0.1 dropout: 0.1
activation-fn: relu activation-fn: relu
encoder-embed-dim: 512
encoder-ffn-embed-dim: 2048 encoder-ffn-embed-dim: 2048
encoder-layers: 12 encoder-layers: 12
decoder-layers: 6 decoder-layers: 6
encoder-attention-heads: 4 encoder-attention-heads: 8
decoder-embed-dim: 256 decoder-embed-dim: 512
decoder-ffn-embed-dim: 2048 decoder-ffn-embed-dim: 2048
decoder-attention-heads: 4 decoder-attention-heads: 8
attention-dropout: 0.1 attention-dropout: 0.1
activation-dropout: 0.1 activation-dropout: 0.1
arch: pys2t_transformer_s arch: pys2t_transformer_s
encoder-embed-dim: 512 encoder-embed-dim: 256
pyramid-stages: 3 pyramid-stages: 3
pyramid-layers: 3_6_3 pyramid-layers: 3_6_3
#encoder-attention-type: reduced pyramid-fuse-way: all_conv
#pyramid-attn-sample-ratios: 4_2_1 pyramid-fuse: True
#pyramid-block-attn: True
#pyramid-fuse-way: gated
pyramid-use-ppm: True
pyramid-sr-ratios: 2_2_2 pyramid-sr-ratios: 2_2_2
pyramid-embed-dims: 128_256_512 pyramid-embed-dims: 256_256_256
pyramid-reduced-embed: conv pyramid-reduced-embed: conv
pyramid-embed-norm: True pyramid-embed-norm: True
pyramid-position-embed: 1_1_1 pyramid-position-embed: 1_1_1
pyramid-kernel-sizes: 5_5_5 pyramid-kernel-sizes: 5_5_5
pyramid-ffn-ratios: 8_8_4 pyramid-ffn-ratios: 8_8_8
pyramid-heads: 2_4_8 pyramid-heads: 4_4_4
train-subset: train-clean-100,train-clean-360,train-other-500 train-subset: train-clean-100,train-clean-360,train-other-500
valid-subset: dev-clean valid-subset: dev-clean
...@@ -23,7 +20,7 @@ max-epoch: 100 ...@@ -23,7 +20,7 @@ max-epoch: 100
max-update: 300000 max-update: 300000
num-workers: 8 num-workers: 8
patience: 10 patience: 20
no-progress-bar: True no-progress-bar: True
log-interval: 100 log-interval: 100
seed: 1 seed: 1
...@@ -42,7 +39,6 @@ lr: 2e-3 ...@@ -42,7 +39,6 @@ lr: 2e-3
#adam_betas: (0.9,0.98) #adam_betas: (0.9,0.98)
criterion: label_smoothed_cross_entropy_with_ctc criterion: label_smoothed_cross_entropy_with_ctc
ctc-weight: 0
label_smoothing: 0.1 label_smoothing: 0.1
conv-channels: 1024 conv-channels: 1024
......
arch: pys2t_transformer_s arch: pys2t_transformer_s
encoder-embed-dim: 512 encoder-embed-dim: 256
pyramid-stages: 3 pyramid-stages: 4
pyramid-layers: 3_6_3 #pyramid-dropout: 0
pyramid-sr-ratios: 2_2_2 pyramid-layers: 2_2_6_2
#pyramid-block-attn: True pyramid-sr-ratios: 2_2_2_2
#pyramid-fuse-way: add pyramid-fuse: True
pyramid-use-ppm: True pyramid-fuse-way: all_conv
pyramid-embed-dims: 128_256_512 pyramid-embed-dims: 256_256_256_256
pyramid-reduced-embed: conv pyramid-reduced-embed: conv
pyramid-embed-norm: True pyramid-embed-norm: True
pyramid-position-embed: 1_1_1 pyramid-position-embed: 1_1_1_1
pyramid-kernel-sizes: 5_5_5 pyramid-kernel-sizes: 5_5_5_5
pyramid-ffn-ratios: 8_8_4 pyramid-ffn-ratios: 8_8_8_8
pyramid-heads: 2_4_8 pyramid-heads: 4_4_4_4
train-subset: train_asr train-subset: train-clean-100,train-clean-360,train-other-500
valid-subset: dev_asr valid-subset: dev-clean
max-epoch: 50 max-epoch: 100
max-update: 100000 max-update: 300000
num-workers: 8 num-workers: 8
patience: 10 patience: 20
no-progress-bar: True no-progress-bar: True
log-interval: 100 log-interval: 100
seed: 1 seed: 1
...@@ -39,7 +39,7 @@ warmup-updates: 10000 ...@@ -39,7 +39,7 @@ warmup-updates: 10000
lr: 2e-3 lr: 2e-3
#adam_betas: (0.9,0.98) #adam_betas: (0.9,0.98)
criterion: label_smoothed_cross_entropy criterion: label_smoothed_cross_entropy_with_ctc
label_smoothing: 0.1 label_smoothing: 0.1
conv-channels: 1024 conv-channels: 1024
......
arch: pys2t_transformer_s
encoder-embed-dim: 256
pyramid-stages: 4
pyramid-layers: 3_3_3_3
pyramid-sr-ratios: 2_2_1_2
pyramid-fuse: True
pyramid-fuse-way: all_conv
pyramid-embed-dims: 256_256_256_256
pyramid-reduced-embed: conv
pyramid-embed-norm: True
pyramid-position-embed: 1_1_1_1
pyramid-kernel-sizes: 5_5_5_5
pyramid-ffn-ratios: 8_8_8_8
pyramid-heads: 4_4_4_4
train-subset: train-clean-100,train-clean-360,train-other-500
valid-subset: dev-clean
max-epoch: 100
max-update: 300000
num-workers: 8
patience: 20
no-progress-bar: True
log-interval: 100
seed: 1
report-accuracy: True
#load-pretrained-encoder-from:
#load-pretrained-decoder-from:
share-decoder-input-output-embed: True
optimizer: adam
clip-norm: 10.0
lr-scheduler: inverse_sqrt
warmup-init-lr: 1e-7
warmup-updates: 10000
lr: 2e-3
#adam_betas: (0.9,0.98)
criterion: label_smoothed_cross_entropy_with_ctc
label_smoothing: 0.1
conv-channels: 1024
dropout: 0.1
activation-fn: relu
encoder-ffn-embed-dim: 2048
encoder-layers: 12
decoder-layers: 6
encoder-attention-heads: 4
decoder-embed-dim: 256
decoder-ffn-embed-dim: 2048
decoder-attention-heads: 4
attention-dropout: 0.1
activation-dropout: 0.1
arch: pys2t_transformer_s
encoder-embed-dim: 256
pyramid-stages: 4
pyramid-layers: 3_3_8_4
pyramid-sr-ratios: 2_2_2_2
pyramid-fuse: True
pyramid-fuse-way: all_conv
pyramid-embed-dims: 256_256_256_256
pyramid-reduced-embed: conv
pyramid-embed-norm: True
pyramid-position-embed: 1_1_1_1
pyramid-kernel-sizes: 5_5_5_5
pyramid-ffn-ratios: 8_8_8_8
pyramid-heads: 4_4_4_4
train-subset: train-clean-100,train-clean-360,train-other-500
valid-subset: dev-clean
max-epoch: 100
max-update: 300000
num-workers: 8
patience: 20
no-progress-bar: True
log-interval: 100
seed: 1
report-accuracy: True
#load-pretrained-encoder-from:
#load-pretrained-decoder-from:
share-decoder-input-output-embed: True
optimizer: adam
clip-norm: 10.0
lr-scheduler: inverse_sqrt
warmup-init-lr: 1e-7
warmup-updates: 10000
lr: 2e-3
#adam_betas: (0.9,0.98)
criterion: label_smoothed_cross_entropy_with_ctc
label_smoothing: 0.1
conv-channels: 1024
dropout: 0.1
activation-fn: relu
encoder-ffn-embed-dim: 2048
encoder-layers: 12
decoder-layers: 6
encoder-attention-heads: 4
decoder-embed-dim: 256
decoder-ffn-embed-dim: 2048
decoder-attention-heads: 4
attention-dropout: 0.1
activation-dropout: 0.1
arch: pys2t_transformer_s arch: pys2t_transformer_s
encoder-embed-dim: 512 encoder-embed-dim: 256
pyramid-stages: 3 pyramid-stages: 4
pyramid-layers: 3_6_3 pyramid-layers: 5_5_15_5
encoder-attention-type: reduced pyramid-sr-ratios: 2_2_2_2
pyramid-attn-sample-ratios: 4_2_1 pyramid-fuse: True
pyramid-sr-ratios: 2_2_2 pyramid-fuse-way: all_conv
pyramid-embed-dims: 128_256_512 pyramid-embed-dims: 256_256_256_256
pyramid-reduced-embed: conv pyramid-reduced-embed: conv
pyramid-embed-norm: True pyramid-embed-norm: True
pyramid-position-embed: 1_1_1 pyramid-position-embed: 1_1_1_1
pyramid-kernel-sizes: 5_5_5 pyramid-kernel-sizes: 5_5_5_5
pyramid-ffn-ratios: 8_8_4 pyramid-ffn-ratios: 8_8_8_8
pyramid-heads: 2_4_8 pyramid-heads: 4_4_4_4
train-subset: train-clean-100,train-clean-360,train-other-500 train-subset: train-clean-100,train-clean-360,train-other-500
valid-subset: dev-clean valid-subset: dev-clean
...@@ -20,7 +20,7 @@ max-epoch: 100 ...@@ -20,7 +20,7 @@ max-epoch: 100
max-update: 300000 max-update: 300000
num-workers: 8 num-workers: 8
patience: 10 patience: 20
no-progress-bar: True no-progress-bar: True
log-interval: 100 log-interval: 100
seed: 1 seed: 1
...@@ -38,14 +38,14 @@ warmup-updates: 10000 ...@@ -38,14 +38,14 @@ warmup-updates: 10000
lr: 2e-3 lr: 2e-3
#adam_betas: (0.9,0.98) #adam_betas: (0.9,0.98)
criterion: label_smoothed_cross_entropy criterion: label_smoothed_cross_entropy_with_ctc
label_smoothing: 0.1 label_smoothing: 0.1
conv-channels: 1024 conv-channels: 1024
dropout: 0.1 dropout: 0.1
activation-fn: relu activation-fn: relu
encoder-ffn-embed-dim: 2048 encoder-ffn-embed-dim: 2048
encoder-layers: 6 encoder-layers: 12
decoder-layers: 6 decoder-layers: 6
encoder-attention-heads: 4 encoder-attention-heads: 4
......
...@@ -2,30 +2,26 @@ arch: pys2t_transformer_s ...@@ -2,30 +2,26 @@ arch: pys2t_transformer_s
encoder-embed-dim: 512 encoder-embed-dim: 512
pyramid-stages: 4 pyramid-stages: 4
pyramid-layers: 2_2_6_2 pyramid-layers: 2_2_6_2
#encoder-attention-type: reduced #pyramid-layers: 3_3_3_3
#pyramid-attn-sample-ratios: 8_4_2_1
#pyramid-block-attn: True
#pyramid-fuse-way: add
pyramid-sr-ratios: 2_2_2_2 pyramid-sr-ratios: 2_2_2_2
pyramid-use-ppm: True pyramid-fuse: True
pyramid-embed-dims: 128_128_256_512 pyramid-fuse-way: all_conv
pyramid-reduced-embed: fuse pyramid-embed-dims: 512_512_512_512
pyramid-reduced-embed: conv
pyramid-embed-norm: True pyramid-embed-norm: True
pyramid-position-embed: 1_1_1_1 pyramid-position-embed: 1_1_1_1
pyramid-kernel-sizes: 5_5_5_5 pyramid-kernel-sizes: 5_5_5_5
pyramid-ffn-ratios: 8_8_8_4 pyramid-ffn-ratios: 8_8_8_8
pyramid-heads: 2_2_4_8 pyramid-heads: 8_8_8_8
#ctc-layer: 8
#train-subset: train-clean-100,train-clean-360,train-other-500 train-subset: train-clean-100,train-clean-360,train-other-500
train-subset: train-clean-100
valid-subset: dev-clean valid-subset: dev-clean
max-epoch: 100 max-epoch: 100
max-update: 300000 max-update: 300000
num-workers: 8 num-workers: 8
patience: 10 patience: 20
no-progress-bar: True no-progress-bar: True
log-interval: 100 log-interval: 100
seed: 1 seed: 1
...@@ -44,7 +40,6 @@ lr: 2e-3 ...@@ -44,7 +40,6 @@ lr: 2e-3
#adam_betas: (0.9,0.98) #adam_betas: (0.9,0.98)
criterion: label_smoothed_cross_entropy_with_ctc criterion: label_smoothed_cross_entropy_with_ctc
ctc-weight: 0.0
label_smoothing: 0.1 label_smoothing: 0.1
conv-channels: 1024 conv-channels: 1024
......
#encoder-attention-type: rel_selfattn encoder-attention-type: rel_selfattn
encoder-attention-type: relative #encoder-attention-type: relative
max-encoder-relative-length: 100 #max-encoder-relative-length: 100
...@@ -3,7 +3,7 @@ ...@@ -3,7 +3,7 @@
gpu_num=1 gpu_num=1
data_dir= data_dir=
test_subset=(test-clean test-other) test_subset=(dev-clean dev-other test-clean test-other)
exp_name= exp_name=
if [ "$#" -eq 1 ]; then if [ "$#" -eq 1 ]; then
......
...@@ -20,7 +20,7 @@ stop_stage=0 ...@@ -20,7 +20,7 @@ stop_stage=0
######## hardware ######## ######## hardware ########
# devices # devices
#device=() device=()
gpu_num=8 gpu_num=8
update_freq=1 update_freq=1
...@@ -251,12 +251,14 @@ if [ ${stage} -le 2 ] && [ ${stop_stage} -ge 2 ]; then ...@@ -251,12 +251,14 @@ if [ ${stage} -le 2 ] && [ ${stop_stage} -ge 2 ]; then
# Average models # Average models
dec_model=avg_${n_average}_checkpoint.pt dec_model=avg_${n_average}_checkpoint.pt
cmd="python ${root_dir}/scripts/average_checkpoints.py if [[ ! -f ${model_dir}/${dec_model} ]]; then
--inputs ${model_dir} cmd="python ${root_dir}/scripts/average_checkpoints.py
--num-epoch-checkpoints ${n_average} --inputs ${model_dir}
--output ${model_dir}/${dec_model}" --num-epoch-checkpoints ${n_average}
echo -e "\033[34mRun command: \n${cmd} \033[0m" --output ${model_dir}/${dec_model}"
[[ $eval -eq 1 ]] && eval $cmd echo -e "\033[34mRun command: \n${cmd} \033[0m"
[[ $eval -eq 1 ]] && eval $cmd
fi
else else
dec_model=${dec_model} dec_model=${dec_model}
fi fi
......
...@@ -6,11 +6,14 @@ gpu_num=8 ...@@ -6,11 +6,14 @@ gpu_num=8
update_freq=1 update_freq=1
max_tokens=100000 max_tokens=100000
exp_tag= #exp_tag=
config_list=(base) #config_list=(base)
config_list=(base conformer) #config_list=(ctc)
#config_list=(pyramid) #config_list=(ctc conformer rpr)
#config_list=(pyramid_stage3) config_list=(base conformer rpr)
#config_list=(pyramid4_all256)
#config_list=(pyramid5_all256)
# exp full name # exp full name
exp_name= exp_name=
...@@ -41,8 +44,7 @@ if [[ -n ${extra_tag} ]]; then ...@@ -41,8 +44,7 @@ if [[ -n ${extra_tag} ]]; then
cmd="$cmd --extra_tag ${extra_tag}" cmd="$cmd --extra_tag ${extra_tag}"
fi fi
if [[ -n ${extra_parameter} ]]; then if [[ -n ${extra_parameter} ]]; then
# cmd="$cmd --extra_parameter \"${extra_parameter}\"" cmd="$cmd --extra_parameter \"${extra_parameter}\""
cmd="$cmd --extra_parameter ${extra_parameter}"
fi fi
echo ${cmd} echo ${cmd}
......
train-subset: train_asr train-subset: train_asr
valid-subset: dev_asr valid-subset: dev_asr
max-epoch: 50 max-epoch: 100
max-update: 100000 max-update: 100000
num-workers: 8 num-workers: 8
...@@ -24,7 +24,7 @@ warmup-updates: 10000 ...@@ -24,7 +24,7 @@ warmup-updates: 10000
lr: 2e-3 lr: 2e-3
#adam_betas: (0.9,0.98) #adam_betas: (0.9,0.98)
criterion: label_smoothed_cross_entropy criterion: label_smoothed_cross_entropy_with_ctc
label_smoothing: 0.1 label_smoothing: 0.1
conv-kernel-sizes: 5,5 conv-kernel-sizes: 5,5
......
train-subset: train_asr
valid-subset: dev_asr
max-epoch: 50
max-update: 100000
num-workers: 8
patience: 10
no-progress-bar: True
log-interval: 100
seed: 1
report-accuracy: True
#load-pretrained-encoder-from:
#load-pretrained-decoder-from:
arch: s2t_transformer_s
share-decoder-input-output-embed: True
optimizer: adam
clip-norm: 10.0
lr-scheduler: inverse_sqrt
warmup-init-lr: 1e-7
warmup-updates: 10000
lr: 2e-3
#adam_betas: (0.9,0.98)
ctc-weight: 0.3 ctc-weight: 0.3
criterion: label_smoothed_cross_entropy_with_ctc
label_smoothing: 0.1
conv-kernel-sizes: 5,5
conv-channels: 1024
dropout: 0.1
activation-fn: relu
encoder-embed-dim: 256
encoder-ffn-embed-dim: 2048
encoder-layers: 12
decoder-layers: 6
encoder-attention-heads: 4
decoder-embed-dim: 256
decoder-ffn-embed-dim: 2048
decoder-attention-heads: 4
attention-dropout: 0.1
activation-dropout: 0.1
arch: pys2t_transformer_s arch: pys2t_transformer_s
encoder-embed-dim: 512 encoder-embed-dim: 256
pyramid-stages: 4 pyramid-stages: 4
pyramid-layers: 2_3_5_2 #pyramid-dropout: 0
pyramid-layers: 2_2_6_2
pyramid-sr-ratios: 2_2_2_2 pyramid-sr-ratios: 2_2_2_2
pyramid-embed-dims: 128_128_256_512 pyramid-fuse: True
pyramid-use-ppm: True pyramid-fuse-way: all_conv
pyramid-embed-dims: 256_256_256_256
pyramid-reduced-embed: conv pyramid-reduced-embed: conv
pyramid-embed-norm: True pyramid-embed-norm: True
pyramid-position-embed: 1_1_1_1 pyramid-position-embed: 1_1_1_1
pyramid-kernel-sizes: 5_5_5_5 pyramid-kernel-sizes: 5_5_5_5
pyramid-ffn-ratios: 8_8_8_4 pyramid-ffn-ratios: 8_8_8_8
pyramid-heads: 2_2_4_8 pyramid-heads: 4_4_4_4
train-subset: train_asr train-subset: train_asr
valid-subset: dev_asr valid-subset: dev_asr
max-epoch: 50 max-epoch: 100
max-update: 100000 max-update: 100000
num-workers: 8 num-workers: 8
patience: 10 patience: 20
no-progress-bar: True no-progress-bar: True
log-interval: 100 log-interval: 100
seed: 1 seed: 1
...@@ -37,7 +39,7 @@ warmup-updates: 10000 ...@@ -37,7 +39,7 @@ warmup-updates: 10000
lr: 2e-3 lr: 2e-3
#adam_betas: (0.9,0.98) #adam_betas: (0.9,0.98)
criterion: label_smoothed_cross_entropy criterion: label_smoothed_cross_entropy_with_ctc
label_smoothing: 0.1 label_smoothing: 0.1
conv-channels: 1024 conv-channels: 1024
......
arch: pys2t_transformer_s arch: pys2t_transformer_s
encoder-embed-dim: 512 encoder-embed-dim: 256
pyramid-stages: 3 pyramid-stages: 4
pyramid-layers: 2_2_2 #pyramid-dropout: 0
pyramid-sr-ratios: 2_2_2 pyramid-layers: 3_3_3_3
pyramid-embed-dims: 128_256_512 pyramid-sr-ratios: 2_2_1_2
pyramid-fuse: True
pyramid-fuse-way: all_conv
pyramid-embed-dims: 256_256_256_256
pyramid-reduced-embed: conv pyramid-reduced-embed: conv
pyramid-embed-norm: True pyramid-embed-norm: True
pyramid-position-embed: 1_0_0 pyramid-position-embed: 1_1_1_1
pyramid-kernel-sizes: 5_5_5 pyramid-kernel-sizes: 5_5_5_5
pyramid-ffn-ratios: 8_8_4 pyramid-ffn-ratios: 8_8_8_8
pyramid-heads: 2_4_8 pyramid-heads: 4_4_4_4
train-subset: train_asr train-subset: train_asr
valid-subset: dev_asr valid-subset: dev_asr
max-epoch: 50 max-epoch: 100
max-update: 100000 max-update: 100000
num-workers: 8 num-workers: 8
patience: 10 patience: 20
no-progress-bar: True no-progress-bar: True
log-interval: 100 log-interval: 100
seed: 1 seed: 1
...@@ -36,14 +39,14 @@ warmup-updates: 10000 ...@@ -36,14 +39,14 @@ warmup-updates: 10000
lr: 2e-3 lr: 2e-3
#adam_betas: (0.9,0.98) #adam_betas: (0.9,0.98)
criterion: label_smoothed_cross_entropy criterion: label_smoothed_cross_entropy_with_ctc
label_smoothing: 0.1 label_smoothing: 0.1
conv-channels: 1024 conv-channels: 1024
dropout: 0.1 dropout: 0.1
activation-fn: relu activation-fn: relu
encoder-ffn-embed-dim: 2048 encoder-ffn-embed-dim: 2048
encoder-layers: 6 encoder-layers: 12
decoder-layers: 6 decoder-layers: 6
encoder-attention-heads: 4 encoder-attention-heads: 4
......
#encoder-attention-type: rel_selfattn encoder-attention-type: rel_selfattn
encoder-attention-type: relative #encoder-attention-type: relative
max-encoder-relative-length: 100 #max-encoder-relative-length: 100
gpu_num=1 gpu_num=4
cmd="sh train.sh" cmd="sh train.sh"
while : while :
......
...@@ -41,8 +41,8 @@ lcrm=0 ...@@ -41,8 +41,8 @@ lcrm=0
tokenizer=0 tokenizer=0
use_specific_dict=0 use_specific_dict=0
specific_prefix=valid specific_prefix=st
specific_dir=/home/xuchen/st/data/mustc/st_lcrm/en-de specific_dir=/home/xuchen/st/data/mustc/st/en-de
asr_vocab_prefix=spm_unigram10000_st_share asr_vocab_prefix=spm_unigram10000_st_share
org_data_dir=~/st/data/${dataset} org_data_dir=~/st/data/${dataset}
......
...@@ -6,8 +6,14 @@ gpu_num=8 ...@@ -6,8 +6,14 @@ gpu_num=8
update_freq=1 update_freq=1
max_tokens=40000 max_tokens=40000
exp_tag=baseline exp_tag=
config_list=(base)
#config_list=(base)
#config_list=(ctc)
#config_list=(base conformer)
#config_list=(pyramid4_base)
config_list=(pyramid4_base conformer rpr)
# exp full name # exp full name
exp_name= exp_name=
......
...@@ -13,7 +13,7 @@ fi ...@@ -13,7 +13,7 @@ fi
n_average=10 n_average=10
beam_size=5 beam_size=5
len_penalty=1.0 len_penalty=1.0
max_tokens=10000 max_tokens=80000
dec_model=checkpoint_best.pt dec_model=checkpoint_best.pt
cmd="./run.sh cmd="./run.sh
...@@ -31,9 +31,9 @@ cmd="./run.sh ...@@ -31,9 +31,9 @@ cmd="./run.sh
if [[ -n ${data_dir} ]]; then if [[ -n ${data_dir} ]]; then
cmd="$cmd --data_dir ${data_dir}" cmd="$cmd --data_dir ${data_dir}"
fi fi
if [[ ${#test_subset[@]} -ne 0 ]]; then if [[ -n ${test_subset} ]]; then
subsets=$(echo ${test_subset[*]} | sed 's/ /,/g') test_subset=`echo ${test_subset[*]} | sed 's/ /,/g'`
cmd="$cmd --test_subset ${subsets}" cmd="$cmd --test_subset ${test_subset}"
fi fi
echo $cmd echo $cmd
......
...@@ -20,7 +20,7 @@ stop_stage=0 ...@@ -20,7 +20,7 @@ stop_stage=0
######## hardware ######## ######## hardware ########
# devices # devices
#device=() device=()
gpu_num=8 gpu_num=8
update_freq=1 update_freq=1
...@@ -41,10 +41,10 @@ lcrm=0 ...@@ -41,10 +41,10 @@ lcrm=0
tokenizer=0 tokenizer=0
use_specific_dict=0 use_specific_dict=0
specific_prefix=wmt_share32k specific_prefix=st
specific_dir=/home/xuchen/st/data/wmt/mt_lcrm/en-de/unigram32000_share specific_dir=/home/xuchen/st/data/mustc/st/en-de/
src_vocab_prefix=spm_unigram32000_share src_vocab_prefix=spm_unigram10000_st_share
tgt_vocab_prefix=spm_unigram32000_share tgt_vocab_prefix=spm_unigram10000_st_share
org_data_dir=~/st/data/${dataset} org_data_dir=~/st/data/${dataset}
data_dir=~/st/data/${dataset}/mt/${lang} data_dir=~/st/data/${dataset}/mt/${lang}
...@@ -54,14 +54,14 @@ trans_subset=tst-COMMON ...@@ -54,14 +54,14 @@ trans_subset=tst-COMMON
test_subset=test test_subset=test
# exp # exp
exp_prefix=$(date "+%m%d") exp_prefix=${time}
extra_tag= extra_tag=
extra_parameter= extra_parameter=
exp_tag=baseline exp_tag=baseline
exp_name= exp_name=
# config # config
train_config=base train_config=base_s
# training setting # training setting
fp16=1 fp16=1
...@@ -103,6 +103,7 @@ fi ...@@ -103,6 +103,7 @@ fi
. ./local/parse_options.sh || exit 1; . ./local/parse_options.sh || exit 1;
# full path
if [[ -z ${exp_name} ]]; then if [[ -z ${exp_name} ]]; then
config_string=${train_config//,/_} config_string=${train_config//,/_}
# exp_name=${exp_prefix}_$(basename ${train_config%.*})_${exp_tag} # exp_name=${exp_prefix}_$(basename ${train_config%.*})_${exp_tag}
...@@ -150,7 +151,7 @@ if [ ${stage} -le 0 ] && [ ${stop_stage} -ge 0 ]; then ...@@ -150,7 +151,7 @@ if [ ${stage} -le 0 ] && [ ${stop_stage} -ge 0 ]; then
mkdir -p ${data_dir}/data mkdir -p ${data_dir}/data
for split in ${train_subset} ${valid_subset} ${trans_subset}; do for split in ${train_subset} ${valid_subset} ${trans_subset}; do
{ {
cmd="cat ${org_data_dir}/${lang}/data/${split}.${src_lang}" cmd="cat ${org_data_dir}/${lang}/data/${split}/txt/${split}.${src_lang}"
if [[ ${lcrm} -eq 1 ]]; then if [[ ${lcrm} -eq 1 ]]; then
cmd="python local/lower_rm.py ${org_data_dir}/${lang}/data/${split}.${src_lang}" cmd="python local/lower_rm.py ${org_data_dir}/${lang}/data/${split}.${src_lang}"
fi fi
...@@ -165,7 +166,7 @@ if [ ${stage} -le 0 ] && [ ${stop_stage} -ge 0 ]; then ...@@ -165,7 +166,7 @@ if [ ${stage} -le 0 ] && [ ${stop_stage} -ge 0 ]; then
cmd="spm_encode cmd="spm_encode
--model ${data_dir}/${tgt_vocab_prefix}.model --model ${data_dir}/${tgt_vocab_prefix}.model
--output_format=piece --output_format=piece
< ${org_data_dir}/${lang}/data/${split}.${tgt_lang} < ${org_data_dir}/${lang}/data/${split}/txt/${split}.${tgt_lang}
> ${data_dir}/data/${split}.${tgt_lang}" > ${data_dir}/data/${split}.${tgt_lang}"
echo -e "\033[34mRun command: \n${cmd} \033[0m" echo -e "\033[34mRun command: \n${cmd} \033[0m"
...@@ -178,7 +179,7 @@ if [ ${stage} -le 0 ] && [ ${stop_stage} -ge 0 ]; then ...@@ -178,7 +179,7 @@ if [ ${stage} -le 0 ] && [ ${stop_stage} -ge 0 ]; then
--source-lang ${src_lang} --target-lang ${tgt_lang} --source-lang ${src_lang} --target-lang ${tgt_lang}
--trainpref ${data_dir}/data/${train_subset} --trainpref ${data_dir}/data/${train_subset}
--validpref ${data_dir}/data/${valid_subset} --validpref ${data_dir}/data/${valid_subset}
--testpref ${data_dir}/data/${test_subset} --testpref ${data_dir}/data/${trans_subset}
--destdir ${data_dir}/data-bin --destdir ${data_dir}/data-bin
--srcdict ${data_dir}/${src_vocab_prefix}.txt --srcdict ${data_dir}/${src_vocab_prefix}.txt
--tgtdict ${data_dir}/${tgt_vocab_prefix}.txt --tgtdict ${data_dir}/${tgt_vocab_prefix}.txt
...@@ -265,7 +266,7 @@ if [ ${stage} -le 1 ] && [ ${stop_stage} -ge 1 ]; then ...@@ -265,7 +266,7 @@ if [ ${stage} -le 1 ] && [ ${stop_stage} -ge 1 ]; then
save_interval=1 save_interval=1
keep_last_epochs=10 keep_last_epochs=10
no_epoch_checkpoints=0 no_epoch_checkpoints=0
save_interval_updates=10000 save_interval_updates=500
keep_interval_updates=10 keep_interval_updates=10
else else
validate_interval=1 validate_interval=1
...@@ -352,7 +353,7 @@ if [ ${stage} -le 2 ] && [ ${stop_stage} -ge 2 ]; then ...@@ -352,7 +353,7 @@ if [ ${stage} -le 2 ] && [ ${stop_stage} -ge 2 ]; then
result_file=${model_dir}/decode_result result_file=${model_dir}/decode_result
[[ -f ${result_file} ]] && rm ${result_file} [[ -f ${result_file} ]] && rm ${result_file}
test_subset=${test_subset//,/ } test_subset=(${test_subset//,/ })
for subset in ${test_subset[@]}; do for subset in ${test_subset[@]}; do
cmd="python ${root_dir}/fairseq_cli/generate.py cmd="python ${root_dir}/fairseq_cli/generate.py
${data_dir} ${data_dir}
......
...@@ -2,9 +2,9 @@ ...@@ -2,9 +2,9 @@
# training the model # training the model
gpu_num=8 gpu_num=1
update_freq=1 update_freq=1
max_tokens=4096 max_tokens=8192
exp_tag=baseline exp_tag=baseline
config_list=(base) config_list=(base)
......
train-subset: train_st train-subset: train_st
valid-subset: dev_st valid-subset: dev_st
max-epoch: 50 max-epoch: 100
max-update: 100000 max-update: 100000
num-workers: 8 num-workers: 8
...@@ -24,7 +24,7 @@ warmup-updates: 10000 ...@@ -24,7 +24,7 @@ warmup-updates: 10000
lr: 2e-3 lr: 2e-3
#adam_betas: (0.9,0.98) #adam_betas: (0.9,0.98)
criterion: label_smoothed_cross_entropy criterion: label_smoothed_cross_entropy_with_ctc
label_smoothing: 0.1 label_smoothing: 0.1
conv-kernel-sizes: 5,5 conv-kernel-sizes: 5,5
......
train-subset: train_st ctc-weight: 0.3
valid-subset: dev_st \ No newline at end of file
max-epoch: 50
max-update: 100000
num-workers: 8
patience: 10
no-progress-bar: True
log-interval: 100
seed: 1
report-accuracy: True
#load-pretrained-encoder-from:
#load-pretrained-decoder-from:
arch: s2t_transformer_s
share-decoder-input-output-embed: True
optimizer: adam
clip-norm: 10.0
lr-scheduler: inverse_sqrt
warmup-init-lr: 1e-7
warmup-updates: 10000
lr: 2e-3
#adam_betas: (0.9,0.98)
ctc-weight: 0.3
criterion: label_smoothed_cross_entropy_with_ctc
label_smoothing: 0.1
conv-kernel-sizes: 5,5
conv-channels: 1024
dropout: 0.1
activation-fn: relu
encoder-embed-dim: 256
encoder-ffn-embed-dim: 2048
encoder-layers: 12
decoder-layers: 6
encoder-attention-heads: 4
decoder-embed-dim: 256
decoder-ffn-embed-dim: 2048
decoder-attention-heads: 4
attention-dropout: 0.1
activation-dropout: 0.1
arch: pys2t_transformer_s
encoder-embed-dim: 256
#pyramid-dropout: 0
pyramid-stages: 4
pyramid-layers: 3_3_3_3
pyramid-sr-ratios: 2_2_1_2
pyramid-embed-dims: 256_256_256_256
pyramid-fuse: True
pyramid-reduced-embed: conv
pyramid-embed-norm: True
pyramid-position-embed: 1_1_1_1
pyramid-kernel-sizes: 5_5_5_5
pyramid-ffn-ratios: 8_8_8_8
pyramid-heads: 4_4_4_4
train-subset: train_st
valid-subset: dev_st
max-epoch: 100
max-update: 100000
num-workers: 8
patience: 10
no-progress-bar: True
log-interval: 100
seed: 1
report-accuracy: True
#load-pretrained-encoder-from: /home/xuchen/st/checkpoints/mustc/asr/1002_pyramid4_all256_3333_sr8/avg_10_checkpoint.pt
#load-pretrained-encoder-from: /home/xuchen/st/checkpoints/mustc/asr/1002_pyramid4_all256_3333_sr8/checkpoint_best.pt
load-pretrained-encoder-from: /home/xuchen/st/checkpoints/mustc/asr/1007_st_pyramid4_all256_3333_sr8_ctc/avg_10_checkpoint.pt
load-pretrained-decoder-from: /home/xuchen/st/checkpoints/mustc/mt/st_1003_2349_train_s_baseline/avg_10_checkpoint.pt
share-decoder-input-output-embed: True
optimizer: adam
clip-norm: 10.0
lr-scheduler: inverse_sqrt
warmup-init-lr: 1e-7
warmup-updates: 10000
lr: 2e-3
#adam_betas: (0.9,0.98)
criterion: label_smoothed_cross_entropy_with_ctc
label_smoothing: 0.1
conv-channels: 1024
dropout: 0.1
activation-fn: relu
encoder-ffn-embed-dim: 2048
encoder-layers: 12
decoder-layers: 6
encoder-attention-heads: 4
decoder-embed-dim: 256
decoder-ffn-embed-dim: 2048
decoder-attention-heads: 4
attention-dropout: 0.1
activation-dropout: 0.1
train-subset: train_st train-subset: train_st
valid-subset: dev_st valid-subset: dev_st
max-epoch: 50 max-epoch: 100
max-update: 100000 max-update: 100000
num-workers: 8 num-workers: 8
...@@ -50,8 +50,22 @@ cnn-module-kernel: 31 ...@@ -50,8 +50,22 @@ cnn-module-kernel: 31
acoustic-encoder: transformer acoustic-encoder: transformer
adapter: league adapter: league
#decoder-embed-dim: 256 encoder-embed-dim: 256
#decoder-ffn-embed-dim: 2048 pyramid-stages: 4
#decoder-attention-heads: 4 #pyramid-dropout: 0
#attention-dropout: 0.1 pyramid-layers: 3_3_3_3
#activation-dropout: 0.1 pyramid-sr-ratios: 2_2_1_2
pyramid-embed-dims: 256_256_256_256
pyramid-fuse: True
pyramid-reduced-embed: conv
pyramid-embed-norm: True
pyramid-position-embed: 1_1_1_1
pyramid-kernel-sizes: 5_5_5_5
pyramid-ffn-ratios: 8_8_8_8
pyramid-heads: 4_4_4_4
decoder-embed-dim: 256
decoder-ffn-embed-dim: 2048
decoder-attention-heads: 4
attention-dropout: 0.1
activation-dropout: 0.1
...@@ -13,7 +13,7 @@ fi ...@@ -13,7 +13,7 @@ fi
n_average=10 n_average=10
beam_size=5 beam_size=5
len_penalty=1.0 len_penalty=1.0
max_tokens=10000 max_tokens=80000
dec_model=checkpoint_best.pt dec_model=checkpoint_best.pt
cmd="./run.sh cmd="./run.sh
......
...@@ -6,8 +6,16 @@ gpu_num=8 ...@@ -6,8 +6,16 @@ gpu_num=8
update_freq=1 update_freq=1
max_tokens=40000 max_tokens=40000
exp_tag=baseline exp_tag=
#config_list=(base)
config_list=(ctc) config_list=(ctc)
#config_list=(sate_ctc)
#config_list=(ctc conformer rpr)
#config_list=(base sate)
#config_list=(pyramid4_base_sr8)
#config_list=(pyramid4_base_sr8 conformer)
# exp full name # exp full name
exp_name= exp_name=
......
Markdown 格式
0%
您添加了 0 到此讨论。请谨慎行事。
请先完成此评论的编辑!
注册 或者 后发表评论