shell and yaml

12201609 · xuchen · d88a22ef · 12201609 · 12201609 · 12201609
Commit 12201609 authored Aug 22, 2023 by xuchen
--- a/egs/aishell/asr/conf/big.yaml
+++ b/egs/aishell/asr/conf/big.yaml
@@ -30,6 +30,3 @@ encoder-attention-heads: 8
 decoder-embed-dim: 512
 decoder-ffn-embed-dim: 2048
 decoder-attention-heads: 8
-#load-pretrained-encoder-from:
-#load-pretrained-decoder-from:
--- a/egs/aishell/asr/conf/ctc.yaml
+++ b/egs/aishell/asr/conf/ctc.yaml
 ctc-weight: 0.3
-# share-ctc-and-embed: True
\ No newline at end of file
--- a/egs/aishell/asr/conf/inter.yaml
+++ b/egs/aishell/asr/conf/inter.yaml
 inter-ctc-weight: 0.2
 inter-ctc-layers: 6,9
+inter-ctc-drop-prob: 0
 share-inter-ctc: True
 ctc-pae: none
 # ctc-pae: inter_league

--- a/egs/aishell/asr/conf/mixup.yaml
+++ b/egs/aishell/asr/conf/mixup.yaml
@@ -3,10 +3,11 @@ inter-mixup-layer: -1
 inter-mixup-decoder-layer: 0
 inter-mixup-prob: 1.0
 inter-mixup-ratio: 1.0
-inter-mixup-beta: 0.5
+inter-mixup-beta: 0.2
 inter-mixup-keep-org: False
 inter-mixup-decoder-emb: False
 ctc-mixup-consistent-weight: 0
+inter-ctc-mixup-consistent-weight: 0
 mixup-consistent-weight: 0
 cal-mixup-loss: True
 no-specaugment: False

--- a/egs/aishell/asr/conf/pds_base.yaml
+++ b/egs/aishell/asr/conf/pds_base.yaml
 arch: pdss2t_transformer_s_8
-pds-fusion: True
 share-decoder-input-output-embed: True
 optimizer: adam
@@ -23,6 +22,3 @@ encoder-attention-heads: 4
 decoder-embed-dim: 256
 decoder-ffn-embed-dim: 2048
 decoder-attention-heads: 4
\ No newline at end of file
-#load-pretrained-encoder-from:
-#load-pretrained-decoder-from:
\ No newline at end of file
--- a/egs/aishell/asr/conf/pds_base_16.yaml
+++ b/egs/aishell/asr/conf/pds_base_16.yaml
@@ -38,6 +38,3 @@ encoder-attention-heads: 4
 decoder-embed-dim: 256
 decoder-ffn-embed-dim: 2048
 decoder-attention-heads: 4
-#load-pretrained-encoder-from:
-#load-pretrained-decoder-from:
\ No newline at end of file
--- a/egs/aishell/asr/conf/pds_base_32.yaml
+++ b/egs/aishell/asr/conf/pds_base_32.yaml
@@ -38,6 +38,3 @@ encoder-attention-heads: 4
 decoder-embed-dim: 256
 decoder-ffn-embed-dim: 2048
 decoder-attention-heads: 4
-#load-pretrained-encoder-from:
-#load-pretrained-decoder-from:
\ No newline at end of file
--- a/egs/aishell/asr/conf/pds_base_8.yaml
+++ b/egs/aishell/asr/conf/pds_base_8.yaml
@@ -38,6 +38,3 @@ encoder-attention-heads: 4
 decoder-embed-dim: 256
 decoder-ffn-embed-dim: 2048
 decoder-attention-heads: 4
-#load-pretrained-encoder-from:
-#load-pretrained-decoder-from:
--- a/egs/aishell/asr/conf/pds_big_8.yaml
+++ b/egs/aishell/asr/conf/pds_big_8.yaml
@@ -38,6 +38,3 @@ encoder-attention-heads: 8
 decoder-embed-dim: 512
 decoder-ffn-embed-dim: 2048
 decoder-attention-heads: 8
-#load-pretrained-encoder-from:
-#load-pretrained-decoder-from:
--- a/egs/aishell/asr/conf/purectc.yaml
+++ b/egs/aishell/asr/conf/purectc.yaml
@@ -23,13 +23,8 @@ subsampling-norm: none
 subsampling-activation: glu
 dropout: 0.1
-attention-dropout: 0.1
-activation-dropout: 0.1
 activation-fn: relu
 encoder-embed-dim: 256
 encoder-ffn-embed-dim: 2048
 encoder-layers: 18
 encoder-attention-heads: 4
\ No newline at end of file
-#load-pretrained-encoder-from:
\ No newline at end of file
--- a/egs/aishell/asr/conf/rpr.yaml
+++ b/egs/aishell/asr/conf/rpr.yaml
-encoder-attention-type: rel_selfattn
+encoder-attention-type: rel_pos
-# encoder-attention-type: relative
-# max-encoder-relative-length: 100
+#encoder-attention-type: rel_pos_legacy
\ No newline at end of file
+#encoder-attention-type: rel_selfattn
+#encoder-attention-type: relative
+#decoder-attention-type: relative
+#max-encoder-relative-length: 100
+#max-decoder-relative-length: 20
--- a/egs/aishell/asr/decode.sh
+++ b/egs/aishell/asr/decode.sh
@@ -19,8 +19,8 @@ max_tokens=80000
 dec_model=checkpoint_best.pt
 cmd="./run.sh
-    --stage 3
+    --stage 2
-    --stop_stage 3
+    --stop_stage 2
    --gpu_num ${gpu_num}
    --exp_name ${exp_name}
    --n_average ${n_average}

--- a/egs/aishell/asr/local/monitor.sh
+++ b/egs/aishell/asr/local/monitor.sh
+#!/usr/bin/env bash
 gpu_num=4
 cmd="sh train.sh"

--- a/egs/aishell/asr/run.sh
+++ b/egs/aishell/asr/run.sh
--- a/egs/aishell/asr/train.sh
+++ b/egs/aishell/asr/train.sh
@@ -4,7 +4,7 @@
 gpu_num=2
 update_freq=1
-max_tokens=160000
+max_tokens=100000
 extra_tag=
 extra_parameter=
@@ -33,7 +33,7 @@ train_config=$(echo ${config_list[*]} | sed 's/ /,/g')
 cmd="./run.sh
    --stage 1
-    --stop_stage 1
+    --stop_stage 2
    --gpu_num ${gpu_num}
    --update_freq ${update_freq}
    --train_config ${train_config}

--- a/egs/librispeech/asr/conf/base.yaml
+++ b/egs/librispeech/asr/conf/base.yaml
@@ -30,3 +30,8 @@ encoder-attention-heads: 4
 decoder-embed-dim: 256
 decoder-ffn-embed-dim: 2048
 decoder-attention-heads: 4
+attention-dropout: 0.1
+activation-dropout: 0.1
+#load-pretrained-encoder-from:
+#load-pretrained-decoder-from:
\ No newline at end of file
--- a/egs/librispeech/asr/conf/basis.yaml
+++ b/egs/librispeech/asr/conf/basis.yaml
@@ -3,7 +3,7 @@ valid-subset: dev-other,dev-clean
 max-epoch: 300
 max-update: 300000
-patience: 50
+patience: 20
 post-process: sentencepiece
 # best-checkpoint-metric: loss
@@ -18,8 +18,8 @@ best_checkpoint_metric: dec_wer
 maximize_best_checkpoint_metric: False
 validate-interval: 5
-# no-epoch-checkpoints: True
+no-epoch-checkpoints: True
-keep-last-epochs: 10
+# keep-last-epochs: 10
 keep-best-checkpoints: 10
 num-workers: 8

--- a/egs/librispeech/asr/conf/conformer.yaml
+++ b/egs/librispeech/asr/conf/conformer.yaml
 macaron-style: True
 use-cnn-module: True
-cnn-module-kernel: 31
+cnn-module-kernel: 15
 encoder-attention-type: rel_pos
 encoder-activation-fn: swish
 layer-padding-mask: True
\ No newline at end of file
--- a/egs/librispeech/asr/conf/inter.yaml
+++ b/egs/librispeech/asr/conf/inter.yaml
@@ -5,10 +5,13 @@ share-inter-ctc: True
 ctc-pae: none
 # ctc-pae: inter_league
+# ctc-pae-ground-truth-ratio: 0.1
 # pae-gumbel: True
 # pae-distribution-hard: True
 # pae-drop-prob: 0.0
 # pae-distribution-cutoff: 10
+# share-pae-and-ctc: True
 # pae-embed-norm: True
 # pae-out-norm: True

--- a/egs/librispeech/asr/conf/mixup.yaml
+++ b/egs/librispeech/asr/conf/mixup.yaml
@@ -3,11 +3,15 @@ inter-mixup-layer: -1
 inter-mixup-decoder-layer: 0
 inter-mixup-prob: 1.0
 inter-mixup-ratio: 1.0
-inter-mixup-beta: 0.5
+inter-mixup-beta: 0.2
 inter-mixup-keep-org: False
 inter-mixup-decoder-emb: False
 ctc-mixup-consistent-weight: 0
+inter-ctc-mixup-consistent-weight: 0
 mixup-consistent-weight: 0
 cal-mixup-loss: True
 no-specaugment: False
 layer-out-norm: False
+inter-mixup-ratio-decay: False
+inter-mixup-ratio-decay-params: 20000,40000,0
\ No newline at end of file
--- a/egs/librispeech/asr/conf/pds_base.yaml
+++ b/egs/librispeech/asr/conf/pds_base.yaml
 arch: pdss2t_transformer_s_8
-pds-fusion: True
 share-decoder-input-output-embed: True
 optimizer: adam
 clip-norm: 10.0

--- a/egs/librispeech/asr/conf/pds_base_8_444.yaml
+++ b/egs/librispeech/asr/conf/pds_base_8_444.yaml
 arch: pdss2t_transformer_s_8
-# pds-ctc: 1_1_0
 encoder-embed-dim: 256
 pds-stages: 3
 pds-layers: 4_4_4

--- a/egs/librispeech/asr/conf/pds_big_32.yaml
+++ b/egs/librispeech/asr/conf/pds_big_32.yaml
--- a/egs/librispeech/asr/conf/pds_deep_32.yaml
+++ b/egs/librispeech/asr/conf/pds_deep_32.yaml
@@ -2,7 +2,6 @@ arch: pdss2t_transformer_sd_32
 encoder-embed-dim: 256
 pds-stages: 5
-# ctc-layer: 12
 pds-layers: 3_3_4_4_4
 pds-ratios: 2_2_2_2_2
 pds-fusion: False

--- a/egs/librispeech/asr/conf/rpr.yaml
+++ b/egs/librispeech/asr/conf/rpr.yaml
-encoder-attention-type: rel_selfattn
+encoder-attention-type: rel_pos
+#encoder-attention-type: rel_pos_legacy
+#encoder-attention-type: rel_selfattn
 #encoder-attention-type: relative
+#decoder-attention-type: relative
 #max-encoder-relative-length: 100
+#max-decoder-relative-length: 20
--- a/egs/librispeech/asr/decode.sh
+++ b/egs/librispeech/asr/decode.sh
@@ -17,8 +17,8 @@ max_tokens=100000
 dec_model=checkpoint_best.pt
 cmd="./run.sh
-    --stage 3
+    --stage 2
-    --stop_stage 3
+    --stop_stage 2
    --gpu_num ${gpu_num}
    --exp_name ${exp_name}
    --n_average ${n_average}

--- a/egs/librispeech/asr/local/utils.sh
+++ b/egs/librispeech/asr/local/utils.sh
@@ -14,7 +14,7 @@ get_devices(){
        do
            line=$((dev + 2))
            use=$(head -n $line ${record} | tail -1 | cut -d '|' -f3 | cut -d '/' -f1)
-            if [[ $use -lt 100 ]]; then
+            if [[ $use -lt 1000 ]]; then
                device[$count]=$dev
                count=$((count + 1))
                if [[ $count -eq $gpu_num ]]; then

--- a/egs/librispeech/asr/run.sh
+++ b/egs/librispeech/asr/run.sh
@@ -2,8 +2,7 @@
 # Processing LibriSpeech Datasets
-# Copyright 2021 Natural Language Processing Laboratory 
+# Copyright 2021 Chen Xu (xuchennlp@outlook.com)
-# Xu Chen (xuchenneu@163.com)
 # Set bash to 'debug' mode, it will exit on :
 # -e 'error', -u 'undefined variable', -o ... 'error in pipeline', -x 'print commands',
@@ -16,27 +15,26 @@ eval=1
 time=$(date "+%m%d_%H%M")
 stage=1
-stop_stage=4
+stop_stage=2
-######## hardware ########
+######## Hardware ########
-# devices
+# Devices
 device=(0)
 gpu_num=8
 update_freq=1
+max_tokens=100000
-root_dir=/opt/tiger
-#  data_root_dir=/mnt/bd/data-model
-data_root_dir=/mnt/bn/nas-xc-1
-code_dir=${root_dir}/s2t
 pwd_dir=$PWD
+root_dir=${ST_ROOT}
+data_root_dir=${root_dir}
-# dataset
+code_dir=${root_dir}/S2T
+# Dataset
 src_lang=en
 lang=${src_lang}
 dataset=librispeech
-data_tag=asr_mlo
+data_tag=asr
 task=speech_to_text
 vocab_type=unigram
@@ -60,22 +58,17 @@ test_subset=dev-clean,dev-other,test-clean,test-other,all
 # exp
 sub_tag=
 exp_prefix=$(date "+%m%d")
-# exp_subfix=${ARNOLD_JOB_ID}_${ARNOLD_TASK_ID}_${ARNOLD_TRIAL_ID}
 extra_tag=
 extra_parameter=
 exp_tag=baseline
 exp_name=
-# config
+# Training Settings
 train_config=base
-data_config=config.yaml
-# training setting
 fp16=1
-max_tokens=100000
 step_valid=0
-# decoding setting
+# Decoding Settings
 dec_model=checkpoint_best.pt
 cer=0
 ctc_infer=0
@@ -87,8 +80,12 @@ len_penalty=1.0
 single=0
 epoch_ensemble=0
 best_ensemble=1
-infer_parameters=
+infer_score=0
+# infer_parameters="--cal-monotonic-cross-attn-weights --cal-localness --localness-window 0.1 --cal-topk-cross-attn-weights --topk-cross-attn-weights 15 --cal-entropy"
+data_config=config.yaml
+# Parsing Options
 if [[ ${speed_perturb} -eq 1 ]]; then
    data_dir=${data_dir}_sp
    exp_prefix=${exp_prefix}_sp
@@ -116,13 +113,6 @@ export NCCL_IB_HCA=$ARNOLD_RDMA_DEVICE:1
 export NCCL_IB_GID_INDEX=3
 export NCCL_SOCKET_IFNAME=eth0
-HOSTS=$ARNOLD_WORKER_HOSTS
-HOST=(${HOSTS//,/ })
-HOST_SPLIT=(${HOST//:/ })
-PORT=${HOST_SPLIT[1]}
-INIT_METHOD="tcp://${ARNOLD_WORKER_0_HOST}:${ARNOLD_WORKER_0_PORT}"
-DIST_RANK=$((ARNOLD_ID * ARNOLD_WORKER_GPU))
 export PATH=$PATH:${code_dir}/scripts
 . ./local/parse_options.sh || exit 1;
@@ -136,22 +126,27 @@ if [[ -z ${exp_name} ]]; then
        exp_name=${exp_name}_${exp_subfix}
    fi
 fi
-ckpt_dir=${code_dir}/checkpoints/
-model_dir=${code_dir}/checkpoints/${data_model_subfix}/${sub_tag}/${exp_name}
-echo "stage: $stage"
+ckpt_dir=${root_dir}/checkpoints/
-echo "stop_stage: $stop_stage"
+model_dir=${root_dir}/checkpoints/${data_model_subfix}/${sub_tag}/${exp_name}
+# Start
 cd ${code_dir}
+echo "Start Stage: $stage"
+echo "Stop  Stage: $stop_stage"
+if [[ `pip list | grep fairseq | wc -l` -eq 0 ]]; then 
+    echo "Default Stage: env configure"
+    pip3 install -e ${code_dir}
+fi
 if [ ${stage} -le -1 ] && [ ${stop_stage} -ge -1 ]; then
-    echo "stage -1: Data Download"
+    echo "Stage -1: Data Download"
-    # pass
 fi
 if [ ${stage} -le 0 ] && [ ${stop_stage} -ge 0 ]; then
    ### Task dependent. You have to make data the following preparation part by yourself.
-    ### But you can utilize Kaldi recipes in most cases
+    echo "Stage 0: Data Preparation"
-    echo "stage 0: Data Preparation"
    if [[ ! -e ${data_dir} ]]; then
        mkdir -p ${data_dir}
@@ -160,8 +155,6 @@ if [ ${stage} -le 0 ] && [ ${stop_stage} -ge 0 ]; then
    cmd="python3 ${code_dir}/examples/speech_to_text/prep_librispeech_data.py
        --data-root ${org_data_dir}
        --output-root ${data_dir}
-		--cmvn-type global
-		--gcmvn-max-num 300000
        --vocab-type ${vocab_type}
        --vocab-size ${vocab_size}"
@@ -178,17 +171,8 @@ if [ ${stage} -le 0 ] && [ ${stop_stage} -ge 0 ]; then
    [[ $eval -eq 1 ]] && eval ${cmd}
 fi
-echo "stage 1: env configure"
+if [ ${stage} -le 1 ] && [ ${stop_stage} -ge 1 ]; then
-if [[ `pip list | grep fairseq | wc -l` -eq 0 ]]; then 
+    echo "Stage 1: Network Training"
-    pip3 install -e ${code_dir} -i https://bytedpypi.byted.org/simple  --no-build-isolation --default-timeout=10000
-fi
-if [[ -d /mnt/bn/nas-xc-1/checkpoints && ! -d ${code_dir}/checkpoints ]]; then
-    ln -s /mnt/bn/nas-xc-1/checkpoints ${code_dir}
-fi
-if [ ${stage} -le 2 ] && [ ${stop_stage} -ge 2 ]; then
-    echo "stage 2: ASR Network Training"
    [[ ! -d ${data_dir} ]] && echo "The data dir ${data_dir} is not existing!" && exit 1;
    if [[ -z ${device} || ${#device[@]} -eq 0 ]]; then
@@ -198,6 +182,7 @@ if [ ${stage} -le 2 ] && [ ${stop_stage} -ge 2 ]; then
            source ./local/utils.sh
            device=$(get_devices $gpu_num 0)
        fi
+        export CUDA_VISIBLE_DEVICES=${device}
    fi
    echo -e "data=${data_dir} model=${model_dir}"
@@ -249,8 +234,6 @@ if [ ${stage} -le 2 ] && [ ${stop_stage} -ge 2 ]; then
        cmd="${cmd}
        --distributed-world-size $gpu_num
        --ddp-backend no_c10d"
-        # --distributed-init-method ${INIT_METHOD}
-        # --distributed-rank ${DIST_RANK}"
    fi
    if [[ $fp16 -eq 1 ]]; then
        cmd="${cmd}
@@ -291,18 +274,13 @@ if [ ${stage} -le 2 ] && [ ${stop_stage} -ge 2 ]; then
    echo "${time} | ${data_dir} | ${exp_name} | ${model_dir} " >> $log
    tail -n 50 ${log} > tmp.log
    mv tmp.log $log
-    # export CUDA_VISIBLE_DEVICES=${device}
    log=${model_dir}/train.log
    cmd="${cmd} 2>&1 | tee -a ${log}"
    #cmd="nohup ${cmd} >> ${log} 2>&1 &"
    if [[ $eval -eq 1 ]]; then
        # tensorboard
-        if [[ -z ${ARNOLD_TENSORBOARD_CURRENT_PORT} ]]; then
        port=6666
-        else
-            port=${ARNOLD_TENSORBOARD_CURRENT_PORT}
-        fi
        tensorboard --logdir ${model_dir} --port ${port} --bind_all &
        echo "${cmd}" > ${model_dir}/cmd
@@ -312,8 +290,8 @@ if [ ${stage} -le 2 ] && [ ${stop_stage} -ge 2 ]; then
    fi
 fi
-if [ ${stage} -le 3 ] && [ ${stop_stage} -ge 3 ]; then
+if [ ${stage} -le 2 ] && [ ${stop_stage} -ge 2 ]; then
-    echo "stage 3: ASR Decoding"
+    echo "Stage 2: Decoding"
    dec_models=
    if [[ ${single} -eq 1 ]]; then
        dec_models=${dec_model}
@@ -355,8 +333,8 @@ if [ ${stage} -le 3 ] && [ ${stop_stage} -ge 3 ]; then
            source ./local/utils.sh
            device=$(get_devices $gpu_num 0)
        fi
+        export CUDA_VISIBLE_DEVICES=${device}
    fi
-    # export CUDA_VISIBLE_DEVICES=${device}
    for dec_model in ${dec_models[@]}; do
        suffix=beam${beam_size}_alpha${len_penalty}_tokens${max_tokens}
@@ -419,13 +397,13 @@ if [ ${stage} -le 3 ] && [ ${stop_stage} -ge 3 ]; then
            echo -e "\033[34mRun command: \n${cmd} \033[0m"
+            cd ${code_dir}
            if [[ $eval -eq 1 ]]; then
-                src_ctc_file=translation-${subset}.txt.ctc
+                ctc_file=translation-${subset}.ctc
-                if [[ -f ${model_dir}/${src_ctc_file} ]]; then
+                if [[ -f ${model_dir}/${ctc_file} ]]; then
-                    rm ${model_dir}/${src_ctc_file}
+                    rm ${model_dir}/${ctc_file}
                fi
-                cd ${code_dir}
                eval $cmd
                echo "" >> ${result_file}
                tail -n 2 ${model_dir}/generate-${subset}.txt >> ${result_file}
@@ -441,27 +419,27 @@ if [ ${stage} -le 3 ] && [ ${stop_stage} -ge 3 ]; then
                fi
                trans_file=translation-${subset}-${suffix}.txt
-                if [[ ${ctc_infer} -eq 1 && -f ${model_dir}/${src_ctc_file} ]]; then
+                if [[ ${ctc_infer} -eq 1 && -f ${model_dir}/${ctc_file} ]]; then
                    ref_file=${model_dir}/${subset}.${src_lang}
                    if [[ ! -f ${ref_file} ]]; then
                        python3 ./local/extract_txt_from_tsv.py ${data_dir}/${subset}.tsv ${ref_file} "src_text"
                    fi
                    if [[ -f ${ref_file} ]]; then
-                        src_ctc=$(mktemp -t temp.record.XXXXXX)
+                        ctc=$(mktemp -t temp.record.XXXXXX)
                        cd ./local
-                        ./cal_wer.sh ${model_dir} ${subset} ${trans_file} ${src_ctc_file} ${ref_file} > ${src_ctc}
+                        ./cal_wer.sh ${model_dir} ${subset} ${trans_file} ${ctc_file} ${ref_file} > ${ctc}
                        cd ..
                        echo "CTC WER" >> ${result_file}
-                        tail -n 2 ${src_ctc} >> ${result_file}
+                        tail -n 2 ${ctc} >> ${result_file}
                        src_bleu=$(mktemp -t temp.record.XXXXXX)
                        cd local
-                        ./cal_ctc_bleu.sh ${model_dir} ${subset} ${trans_file} ${src_ctc_file} ${ref_file} ${tokenizer} ${src_lang} > ${src_bleu}
+                        ./cal_ctc_bleu.sh ${model_dir} ${subset} ${trans_file} ${ctc_file} ${ref_file} ${tokenizer} ${src_lang} > ${src_bleu}
                        cd ..
                        cat ${src_bleu} >> ${result_file}
-                        rm ${src_ctc} ${src_bleu}
+                        rm ${ctc} ${src_bleu}
                    else
                        echo "No reference for source language."
                    fi

--- a/egs/librispeech/asr/train.sh
+++ b/egs/librispeech/asr/train.sh
@@ -59,7 +59,7 @@ train_config=$(echo ${config_list[*]} | sed 's/ /,/g')
 cmd="./run.sh
    --stage 1
-    --stop_stage 4
+    --stop_stage 2
    --gpu_num ${gpu_num}
    --update_freq ${update_freq}
    --train_config ${train_config}

--- a/egs/mustc/asr/run.sh
+++ b/egs/mustc/asr/run.sh
@@ -85,8 +85,8 @@ dec_model=checkpoint_best.pt
 n_average=10
 beam_size=5
 len_penalty=1.0
-infer_score=1
+infer_score=0
-infer_parameters="--cal-monotonic-cross-attn-weights --cal-localness --localness-window 0.1 --cal-topk-cross-attn-weights --topk-cross-attn-weights 15 --cal-entropy"
+# infer_parameters="--cal-monotonic-cross-attn-weights --cal-localness --localness-window 0.1 --cal-topk-cross-attn-weights --topk-cross-attn-weights 15 --cal-entropy"
 # Parsing Options
 if [[ ${speed_perturb} -eq 1 ]]; then

--- a/egs/mustc/asr/train.sh
+++ b/egs/mustc/asr/train.sh
@@ -33,7 +33,7 @@ train_config=$(echo ${config_list[*]} | sed 's/ /,/g')
 cmd="./run.sh
    --stage 1
-    --stop_stage 4
+    --stop_stage 2
    --gpu_num ${gpu_num}
    --update_freq ${update_freq}
    --train_config ${train_config}

--- a/egs/mustc/mt/run.sh
+++ b/egs/mustc/mt/run.sh
@@ -84,8 +84,8 @@ dec_model=checkpoint_best.pt
 n_average=10
 beam_size=5
 len_penalty=1.0
-infer_score=1
+infer_score=0
-infer_parameters="--cal-monotonic-cross-attn-weights --cal-localness --localness-window 0.1 --cal-topk-cross-attn-weights --topk-cross-attn-weights 15 --cal-entropy"
+# infer_parameters="--cal-monotonic-cross-attn-weights --cal-localness --localness-window 0.1 --cal-topk-cross-attn-weights --topk-cross-attn-weights 15 --cal-entropy"
 # Parsing Options
 . ./local/parse_options.sh || exit 1;

--- a/egs/mustc/st/conf/big.yaml
+++ b/egs/mustc/st/conf/big.yaml
-arch: s2t_transformer_s
+arch: s2t_transformer_m
 share-decoder-input-output-embed: True
 optimizer: adam
 clip-norm: 10.0

--- a/egs/mustc/st/conf/ctc.yaml
+++ b/egs/mustc/st/conf/ctc.yaml
 ctc-weight: 0.3
 share-ctc-and-embed: True
-share-inter-ctc: True
-# inter-ctc-weight: 0.2
-# inter-ctc-layers: 6,9
-ctc-pae: none
\ No newline at end of file
--- a/egs/mustc/st/conf/mixup.yaml
+++ b/egs/mustc/st/conf/mixup.yaml
 inter-mixup: True
 inter-mixup-layer: -1
 inter-mixup-decoder-layer: 0
 inter-mixup-prob: 1.0
 inter-mixup-ratio: 1.0
 inter-mixup-beta: 0.2
 inter-mixup-keep-org: False
 inter-mixup-decoder-emb: False
 ctc-mixup-consistent-weight: 0
 inter-ctc-mixup-consistent-weight: 0
 mixup-consistent-weight: 0
 cal-mixup-loss: True
 no-specaugment: False
 layer-out-norm: False

--- a/egs/mustc/st/conf/pds_base_32.yaml
+++ b/egs/mustc/st/conf/pds_base_32.yaml
 arch: pdss2t_transformer_s_32
 encoder-embed-dim: 256
 pds-stages: 5
 pds-layers: 2_2_3_3_2

--- a/egs/mustc/st/conf/pds_base_8_444.yaml
+++ b/egs/mustc/st/conf/pds_base_8_444.yaml
@@ -8,7 +8,7 @@ pds-fusion: False
 pds-fusion-method: all_conv2
 pds-fusion-layers: 0_1_1_1 
 pds-fusion-weight: 0.2_0.3_0.5
-pds-embed-dims: 256_256_256_256
+pds-embed-dims: 256_256_256
 pds-ds-method: conv
 pds-embed-norm: True
 pds-position-embed: 1_1_1

--- a/egs/mustc/st/conf/pds_big_16.yaml
+++ b/egs/mustc/st/conf/pds_big_16.yaml
-arch: pdss2t_transformer_s_16
+arch: pdss2t_transformer_m_16
 encoder-embed-dim: 512
 pds-stages: 4
@@ -13,7 +13,7 @@ pds-ds-method: conv
 pds-embed-norm: True
 pds-position-embed: 1_1_1_1
 pds-kernel-sizes: 5_5_5_5
-pds-ffn-ratios: 8_8_8_8
+pds-ffn-ratios: 4_4_4_4
 pds-attn-heads: 8_8_8_8
 share-decoder-input-output-embed: True
@@ -33,11 +33,11 @@ activation-fn: relu
 encoder-ffn-embed-dim: 2048
 encoder-layers: 12
 decoder-layers: 6
-encoder-attention-heads: 4
+encoder-attention-heads: 8
 decoder-embed-dim: 512
 decoder-ffn-embed-dim: 2048
-decoder-attention-heads: 4
+decoder-attention-heads: 8
 #load-pretrained-encoder-from:
 #load-pretrained-decoder-from:
\ No newline at end of file
--- a/egs/mustc/st/conf/pds_big_32.yaml
+++ b/egs/mustc/st/conf/pds_big_32.yaml
-arch: pdss2t_transformer_s_16
+arch: pdss2t_transformer_m_32
 encoder-embed-dim: 512
 pds-stages: 5
@@ -13,7 +13,7 @@ pds-ds-method: conv
 pds-embed-norm: True
 pds-position-embed: 1_1_1_1_1
 pds-kernel-sizes: 5_5_5_5_5
-pds-ffn-ratios: 8_8_8_8_8
+pds-ffn-ratios: 4_4_4_4_4
 pds-attn-heads: 8_8_8_8_8
 share-decoder-input-output-embed: True
@@ -33,11 +33,8 @@ activation-fn: relu
 encoder-ffn-embed-dim: 2048
 encoder-layers: 12
 decoder-layers: 6
-encoder-attention-heads: 4
+encoder-attention-heads: 8
 decoder-embed-dim: 512
 decoder-ffn-embed-dim: 2048
-decoder-attention-heads: 4
+decoder-attention-heads: 8
\ No newline at end of file
-#load-pretrained-encoder-from:
-#load-pretrained-decoder-from:
\ No newline at end of file
--- a/egs/mustc/st/conf/pds_big_8.yaml
+++ b/egs/mustc/st/conf/pds_big_8.yaml
-arch: pdss2t_transformer_s_8
+arch: pdss2t_transformer_m_8
 encoder-embed-dim: 512
 pds-stages: 4
@@ -13,7 +13,7 @@ pds-ds-method: conv
 pds-embed-norm: True
 pds-position-embed: 1_1_1_1
 pds-kernel-sizes: 5_5_5_5
-pds-ffn-ratios: 8_8_8_8
+pds-ffn-ratios: 4_4_4_4
 pds-attn-heads: 8_8_8_8
 share-decoder-input-output-embed: True
@@ -33,11 +33,11 @@ activation-fn: relu
 encoder-ffn-embed-dim: 2048
 encoder-layers: 12
 decoder-layers: 6
-encoder-attention-heads: 4
+encoder-attention-heads: 8
 decoder-embed-dim: 512
 decoder-ffn-embed-dim: 2048
-decoder-attention-heads: 4
+decoder-attention-heads: 8
 #load-pretrained-encoder-from:
 #load-pretrained-decoder-from:
\ No newline at end of file
--- a/egs/mustc/st/conf/pds_deep_16.yaml
+++ b/egs/mustc/st/conf/pds_deep_16.yaml
-arch: pdss2t_transformer_s_16
+arch: pdss2t_transformer_sd_16
 encoder-embed-dim: 256
 pds-stages: 4
@@ -31,7 +31,7 @@ label_smoothing: 0.1
 dropout: 0.1
 activation-fn: relu
 encoder-ffn-embed-dim: 2048
-encoder-layers: 12
+encoder-layers: 18
 decoder-layers: 6
 encoder-attention-heads: 4

--- a/egs/mustc/st/conf/pds_deep_32.yaml
+++ b/egs/mustc/st/conf/pds_deep_32.yaml
-arch: pdss2t_transformer_s_32
+arch: pdss2t_transformer_sd_32
 encoder-embed-dim: 256
 pds-stages: 5

--- a/egs/mustc/st/conf/pds_deep_8.yaml
+++ b/egs/mustc/st/conf/pds_deep_8.yaml
-arch: pdss2t_transformer_s_8
+arch: pdss2t_transformer_sd_8
 encoder-embed-dim: 256
 pds-stages: 4
@@ -38,6 +38,3 @@ encoder-attention-heads: 4
 decoder-embed-dim: 256
 decoder-ffn-embed-dim: 2048
 decoder-attention-heads: 4
-#load-pretrained-encoder-from:
-#load-pretrained-decoder-from:
\ No newline at end of file
--- a/egs/mustc/st/conf/rpr.yaml
+++ b/egs/mustc/st/conf/rpr.yaml
 encoder-attention-type: rel_pos
 #encoder-attention-type: rel_pos_legacy
 #encoder-attention-type: rel_selfattn
 #encoder-attention-type: relative

--- a/egs/mustc/st/decode.sh
+++ b/egs/mustc/st/decode.sh
@@ -3,7 +3,7 @@
 gpu_num=1
 data_dir=
-test_subset=(tst-COMMON)
+test_subset=(dev tst-COMMON)
 exp_name=
 if [ "$#" -eq 1 ]; then

--- a/egs/mustc/st/run.sh
+++ b/egs/mustc/st/run.sh
@@ -85,9 +85,8 @@ ctc_infer=0
 n_average=10
 beam_size=5
 len_penalty=1.0
-infer_score=1
+infer_score=0
-infer_parameters="--cal-monotonic-cross-attn-weights --cal-localness --localness-window 0.1 --cal-topk-cross-attn-weights --topk-cross-attn-weights 15 --cal-entropy"
+# infer_parameters="--cal-monotonic-cross-attn-weights --cal-localness --localness-window 0.1 --cal-topk-cross-attn-weights --topk-cross-attn-weights 15 --cal-entropy"
 # Parsing Options
 if [[ ${share_dict} -eq 1 ]]; then
@@ -428,7 +427,6 @@ if [ ${stage} -le 2 ] && [ ${stop_stage} -ge 2 ]; then
        --path ${model_dir}/${dec_model}
        --results-path ${model_dir}
        --max-tokens ${max_tokens}
-        --batch-size 1
        --beam ${beam_size}
        --skip-invalid-size-inputs-valid-test
        --lenpen ${len_penalty}"

--- a/egs/mustc/st/train.sh
+++ b/egs/mustc/st/train.sh
 #!/usr/bin/env bash
+# training the model
 gpu_num=8
 update_freq=1
 max_tokens=40000
@@ -37,7 +39,7 @@ train_config=$(echo ${config_list[*]} | sed 's/ /,/g')
 cmd="./run.sh
    --stage 1
-    --stop_stage 4
+    --stop_stage 2
    --gpu_num ${gpu_num}
    --update_freq ${update_freq}
    --train_config ${train_config}