modify the shell scripts

6a2f4065 · xuchen · de171aee · 6a2f4065 · 6a2f4065 · 6a2f4065
Commit 6a2f4065 authored Mar 29, 2021 by xuchen
--- a/egs/librispeech/asr/conf/asr_train_ctc.yaml
+++ b/egs/librispeech/asr/conf/asr_train_ctc.yaml
-#train-subset: train-clean-100,train-clean-360,train-other-500
+train-subset: train-clean-100,train-clean-360,train-other-500
-train-subset: train-clean-100
+#train-subset: train-clean-100
 valid-subset: dev-clean
 max-epoch: 100
 max-update: 300000
-num-workers: 0
+num-workers: 8
 patience: 10
 no-progress-bar: True
 log-interval: 100
 seed: 1
 report-accuracy: True
-arch: s2t_transformer_s
+arch: s2t_conformer_s
 share-decoder-input-output-embed: True
 optimizer: adam
 clip-norm: 10.0

--- a/egs/librispeech/asr/conf/train_ctc_conformer.yaml
+++ b/egs/librispeech/asr/conf/train_ctc_conformer.yaml
+train-subset: train-clean-100,train-clean-360,train-other-500
+valid-subset: dev-clean
+max-epoch: 100
+max-update: 300000
+num-workers: 8
+patience: 10
+no-progress-bar: True
+log-interval: 100
+seed: 1
+report-accuracy: True
+arch: s2t_conformer_s
+share-decoder-input-output-embed: True
+optimizer: adam
+clip-norm: 10.0
+lr-scheduler: inverse_sqrt
+warmup-init-lr: 1e-7
+warmup-updates: 10000
+lr: 2e-3
+#adam_betas: (0.9,0.98)
+ctc-weight: 0.3
+criterion: label_smoothed_cross_entropy_with_ctc
+label_smoothing: 0.1
+conv-kernel-sizes: 5,5
+conv-channels: 1024
+dropout: 0.1
+activation-fn: relu
+encoder-embed-dim: 256
+encoder-ffn-embed-dim: 2048
+encoder-layers: 12
+decoder-layers: 6
+encoder-attention-heads: 4
+macaron-style: True
+use-cnn-module: True
+cnn-module-kernel: 31
+#decoder-embed-dim: 256
+#decoder-ffn-embed-dim: 2048
+#decoder-attention-heads: 4
+#attention-dropout: 0.1
+#activation-dropout: 0.1
--- a/egs/librispeech/asr/conf/train_ctc_debug.yaml
+++ b/egs/librispeech/asr/conf/train_ctc_debug.yaml
+#train-subset: train-clean-100,train-clean-360,train-other-500
+train-subset: train-clean-100
+valid-subset: dev-clean
+max-epoch: 100
+max-update: 300000
+num-workers: 0
+patience: 10
+no-progress-bar: True
+log-interval: 100
+seed: 1
+report-accuracy: True
+arch: s2t_conformer_s
+share-decoder-input-output-embed: True
+optimizer: adam
+clip-norm: 10.0
+lr-scheduler: inverse_sqrt
+warmup-init-lr: 1e-7
+warmup-updates: 10000
+lr: 2e-3
+#adam_betas: (0.9,0.98)
+ctc-weight: 0.3
+criterion: label_smoothed_cross_entropy_with_ctc
+label_smoothing: 0.1
+conv-kernel-sizes: 5,5
+conv-channels: 1024
+dropout: 0.1
+activation-fn: relu
+encoder-embed-dim: 256
+encoder-ffn-embed-dim: 2048
+encoder-layers: 3
+decoder-layers: 3
+encoder-attention-heads: 4
+macaron-style: True
+use-cnn-module: True
+cnn-module-kernel: 31
+#decoder-embed-dim: 256
+#decoder-ffn-embed-dim: 2048
+#decoder-attention-heads: 4
+#attention-dropout: 0.1
+#activation-dropout: 0.1
--- a/egs/librispeech/asr/local/utils.sh
+++ b/egs/librispeech/asr/local/utils.sh
@@ -13,7 +13,7 @@ get_devices(){
        do
            line=`expr $dev + 2`
            use=`cat $record | head -n $line | tail -1 | cut -d '|' -f3 | cut -d '/' -f1`
-            if [[ $use -lt 10 ]]; then
+            if [[ $use -lt 100 ]]; then
                device[$count]=$dev
                count=`expr $count + 1`
                if [[ $count -eq $gpu_num ]]; then

--- a/egs/librispeech/asr/run.sh
+++ b/egs/librispeech/asr/run.sh
@@ -24,7 +24,7 @@ device=()
 gpu_num=8
 update_freq=1
-root_dir=~/Code/st/fairseq
+root_dir=~/st/Fairseq-S2T
 pwd_dir=$PWD
 # dataset
@@ -36,7 +36,8 @@ task=speech_to_text
 vocab_type=unigram
 vocab_size=10000
-data_dir=~/Code/st/data/${dataset}
+org_data_dir=/meida/data/${dataset}
+data_dir=~/st/data/${dataset}
 test_subset=(dev-clean dev-other test-clean test-other)
 # exp
@@ -46,11 +47,11 @@ exp_tag=baseline
 exp_name=
 # config
-train_config=asr_train_ctc.yaml
+train_config=train_ctc.yaml
 data_config=config.yaml
 # training setting
-fp16=0
+fp16=1
 max_tokens=40000
 step_valid=0
@@ -80,7 +81,7 @@ if [[ -z ${exp_name} ]]; then
    fi
 fi
-model_dir=$root_dir/../checkpoints/$dataset/$task/asr/${exp_name}
+model_dir=$root_dir/../checkpoints/$dataset/asr/${exp_name}
 if [ ${stage} -le -1 ] && [ ${stop_stage} -ge -1 ]; then
    echo "stage -1: Data Download"
@@ -92,6 +93,7 @@ if [ ${stage} -le 0 ] && [ ${stop_stage} -ge 0 ]; then
    ### But you can utilize Kaldi recipes in most cases
    echo "stage 0: Data Preparation"
    cmd="python ${root_dir}/examples/speech_to_text/prep_librispeech_data.py
+        --data-root ${org_data_dir}
        --output-root ${data_dir}
        --vocab-type ${vocab_type}
        --vocab-size ${vocab_size}"
@@ -101,7 +103,7 @@ fi
 if [ ${stage} -le 1 ] && [ ${stop_stage} -ge 1 ]; then
    echo "stage 1: ASR Network Training"
-    [[ ! -d $data_dir ]] && echo "The data dir $data_dir is not existing!" && exit 1;
+    [[ ! -d ${data_dir} ]] && echo "The data dir $data_dir is not existing!" && exit 1;
    if [[ -z ${device} || ${#device[@]} -eq 0 ]]; then
 		if [[ ${gpu_num} -eq 0 ]]; then
@@ -112,7 +114,7 @@ if [ ${stage} -le 1 ] && [ ${stop_stage} -ge 1 ]; then
 		fi
    fi
-    echo -e "dev=${device} data=$data_dir model=${model_dir}"
+    echo -e "dev=${device} data=${data_dir} model=${model_dir}"
    if [[ ! -d ${model_dir} ]]; then
        mkdir -p ${model_dir}
@@ -125,10 +127,10 @@ if [ ${stage} -le 1 ] && [ ${stop_stage} -ge 1 ]; then
    cp ${train_config} ${model_dir}
    cmd="python3 -u ${root_dir}/fairseq_cli/train.py
-        $data_dir
+        ${data_dir}
        --config-yaml ${data_config}
        --train-config ${train_config}
-        --task speech_to_text
+        --task ${task}
        --max-tokens ${max_tokens}
        --update-freq ${update_freq}
        --log-interval 100
@@ -177,7 +179,7 @@ if [ ${stage} -le 1 ] && [ ${stop_stage} -ge 1 ]; then
    # save info
    log=./history.log
-    echo "${time} | ${device} | $data_dir | ${model_dir} " >> $log
+    echo "${time} | ${device} | ${data_dir} | ${model_dir} " >> $log
    cat $log | tail -n 50 > tmp.log
    mv tmp.log $log
    export CUDA_VISIBLE_DEVICES=${device}
@@ -225,7 +227,7 @@ if [ ${stage} -le 2 ] && [ ${stop_stage} -ge 2 ]; then
 	for subset in ${test_subset[@]}; do
        subset=${subset}
  		cmd="python ${root_dir}/fairseq_cli/generate.py
-        ${data_dir}/$lang
+        ${data_dir}
        --config-yaml ${data_config}
        --gen-subset ${subset}
        --task speech_to_text

--- a/egs/librispeech/asr/train.sh
+++ b/egs/librispeech/asr/train.sh
@@ -2,8 +2,9 @@
 # training the model
-gpu_num=0
+gpu_num=8
-update_freq=1
+update_freq=2
+max_tokens=20000
 extra_tag=
 extra_parameter=
@@ -12,9 +13,8 @@ extra_parameter=
 #extra_parameter="${extra_parameter} "
 exp_tag=test
-train_config=asr_train_ctc.yaml
+train_config=train_ctc.yaml
-max_tokens=4000
 cmd="./run.sh
    --stage 1

--- a/egs/mustc/asr/conf/train_ctc.yaml
+++ b/egs/mustc/asr/conf/train_ctc.yaml
@@ -34,7 +34,7 @@ dropout: 0.1
 activation-fn: relu
 encoder-embed-dim: 256
 encoder-ffn-embed-dim: 2048
-encoder-layers: 6
+encoder-layers: 12
 decoder-layers: 6
 encoder-attention-heads: 4

--- a/egs/mustc/asr/local/utils.sh
+++ b/egs/mustc/asr/local/utils.sh
@@ -13,7 +13,7 @@ get_devices(){
        do
            line=`expr $dev + 2`
            use=`cat $record | head -n $line | tail -1 | cut -d '|' -f3 | cut -d '/' -f1`
-            if [[ $use -lt 10 ]]; then
+            if [[ $use -lt 100 ]]; then
                device[$count]=$dev
                count=`expr $count + 1`
                if [[ $count -eq $gpu_num ]]; then

--- a/egs/mustc/st/conf/train_ctc_conformer.yaml
+++ b/egs/mustc/st/conf/train_ctc_conformer.yaml
@@ -38,8 +38,8 @@ encoder-layers: 6
 decoder-layers: 6
 encoder-attention-heads: 4
-macaron-style: true
+macaron-style: True
-use-cnn-module: true
+use-cnn-module: True
 cnn-module-kernel: 31
 #decoder-embed-dim: 256

--- a/egs/mustc/st/conf/train_ctc_enc_rpr.yaml
+++ b/egs/mustc/st/conf/train_ctc_enc_rpr.yaml
+train-subset: train_st
+valid-subset: dev_st
+max-epoch: 100
+max-update: 100000
+num-workers: 8
+patience: 10
+no-progress-bar: True
+log-interval: 100
+seed: 1
+report-accuracy: True
+#load-params: 
+#load-pretrained-encoder-from: 
+arch: s2t_transformer_s
+share-decoder-input-output-embed: True
+optimizer: adam
+clip-norm: 10.0
+lr-scheduler: inverse_sqrt
+warmup-init-lr: 1e-7
+warmup-updates: 10000
+lr: 2e-3
+#adam_betas: (0.9,0.98)
+ctc-weight: 0.3
+criterion: label_smoothed_cross_entropy_with_ctc
+label_smoothing: 0.1
+conv-kernel-sizes: 5,5
+conv-channels: 1024
+dropout: 0.1
+activation-fn: relu
+encoder-embed-dim: 256
+encoder-ffn-embed-dim: 2048
+encoder-layers: 6
+decoder-layers: 6
+encoder-attention-heads: 4
+encoder-attention-type: rel_selfattn
+#decoder-embed-dim: 256
+#decoder-ffn-embed-dim: 2048
+#decoder-attention-heads: 4
+#attention-dropout: 0.1
+#activation-dropout: 0.1
--- a/egs/mustc/st/local/utils.sh
+++ b/egs/mustc/st/local/utils.sh
@@ -13,7 +13,7 @@ get_devices(){
        do
            line=`expr $dev + 2`
            use=`cat $record | head -n $line | tail -1 | cut -d '|' -f3 | cut -d '/' -f1`
-            if [[ $use -lt 10 ]]; then
+            if [[ $use -lt 100 ]]; then
                device[$count]=$dev
                count=`expr $count + 1`
                if [[ $count -eq $gpu_num ]]; then

--- a/egs/mustc/st/train.sh
+++ b/egs/mustc/st/train.sh
@@ -3,8 +3,8 @@
 # training the model
 gpu_num=8
-update_freq=1
+update_freq=2
-max_tokens=40000
+max_tokens=20000
 extra_tag=
 extra_parameter=