accu update

f1dc8723 · xuchen · f76b8cb3 · f1dc8723 · f1dc8723 · f1dc8723
Commit f1dc8723 authored Nov 26, 2023 by xuchen
--- a/egs/aishell/asr/conf/ipa.yaml
+++ b/egs/aishell/asr/conf/ipa.yaml
@@ -9,7 +9,7 @@ inter-mixup-decoder-emb: False
 ctc-mixup-consistent-weight: 0
 inter-ctc-mixup-consistent-weight: 0
 mixup-consistent-weight: 0
-cal-mixup-loss: True
+mixup-no-hard-loss: False
 no-specaugment: False
 layer-out-norm: False


--- a/egs/aishell/asr/decode.sh
+++ b/egs/aishell/asr/decode.sh
@@ -14,10 +14,10 @@ cer=1
 ctc_infer=1
 n_average=10
 beam_size=5
-infer_ctc_weight=0.1
+infer_ctc_weight=0
 len_penalty=1.0
 max_tokens=50000
-batch_size=1
+batch_size=0
 infer_debug=0
 dec_model=checkpoint_best.pt


--- a/egs/aishell/asr/run.sh
+++ b/egs/aishell/asr/run.sh
@@ -40,7 +40,7 @@ task=speech_to_text
 vocab_type=unigram
 vocab_type=char
 vocab_size=10000
-speed_perturb=1
+speed_perturb=0
 lcrm=0
 tokenizer=0
 use_raw_audio=0
@@ -114,16 +114,6 @@ if [[ ${use_raw_audio} -eq 1 ]]; then
    data_dir=${data_dir}_raw
    exp_prefix=${exp_prefix}_raw
 fi
-if [[ "${vocab_type}" == "char" ]]; then
-    data_dir=${data_dir}_char
-    exp_prefix=${exp_prefix}_char
-fi
-
-# setup nccl envs
-export NCCL_IB_DISABLE=0
-export NCCL_IB_HCA=$ARNOLD_RDMA_DEVICE:1
-export NCCL_IB_GID_INDEX=3
-export NCCL_SOCKET_IFNAME=eth0

 export PATH=$PATH:${code_dir}/scripts
 . ./local/parse_options.sh || exit 1;
@@ -227,11 +217,9 @@ if [ ${stage} -le 1 ] && [ ${stop_stage} -ge 1 ]; then
    cp -f ${pwd_dir}/`basename ${BASH_SOURCE[0]}` ${model_dir}
    cp -f ${pwd_dir}/train.sh ${model_dir}

-    extra_parameter="${extra_parameter}
-        --train-config ${pwd_dir}/conf/basis.yaml"
-    cp -f ${pwd_dir}/conf/basis.yaml ${model_dir}
+    train_config=basis,${train_config}
    config_list="${train_config//,/ }"
-    idx=1
+    idx=0
    for config in ${config_list[@]}
    do
        config_path=${pwd_dir}/conf/${config}.yaml
@@ -241,8 +229,13 @@ if [ ${stage} -le 1 ] && [ ${stop_stage} -ge 1 ]; then
        fi
        cp -f ${config_path} ${model_dir}

+        if [[ $idx -eq 0 ]]; then
+        extra_parameter="${extra_parameter}
+        --train-config ${config_path}"
+        else
        extra_parameter="${extra_parameter}
        --train-config${idx} ${config_path}"
+        fi
        idx=$((idx + 1))
    done

@@ -307,8 +300,8 @@ if [ ${stage} -le 1 ] && [ ${stop_stage} -ge 1 ]; then
    mv tmp.log $log

    log=${model_dir}/train.log
-    # cmd="${cmd} 2>&1 | tee -a ${log}"
-    cmd="${cmd} >> ${log} 2>&1 "
+    cmd="${cmd} 2>&1 | tee -a ${log}"
+    #cmd="${cmd} >> ${log} 2>&1 "
    if [[ $eval -eq 1 ]]; then
        # tensorboard
        port=6666
@@ -316,8 +309,6 @@ if [ ${stage} -le 1 ] && [ ${stop_stage} -ge 1 ]; then
    
        echo "${cmd}" > ${model_dir}/cmd
        eval $cmd
-        #sleep 2s
-        #tail -n "$(wc -l ${log} | awk '{print $1+1}')" -f ${log}
    fi
 fi

@@ -455,9 +446,13 @@ if [ ${stage} -le 2 ] && [ ${stop_stage} -ge 2 ]; then
            cd ${code_dir}
            if [[ $eval -eq 1 ]]; then
                ctc_file=translation-${subset}.ctc
+                xctc_file=translation-${subset}.xctc
                if [[ -f ${model_dir}/${ctc_file} ]]; then
                    rm ${model_dir}/${ctc_file}
                fi
+                if [[ -f ${model_dir}/${xctc_file} ]]; then
+                    rm ${model_dir}/${xctc_file}
+                fi

                eval $cmd
                echo "" >> ${result_file}
@@ -474,6 +469,9 @@ if [ ${stage} -le 2 ] && [ ${stop_stage} -ge 2 ]; then
                fi

                trans_file=translation-${subset}-${suffix}.txt
+                if [[ ! -f ${model_dir}/{ctc_file} && -f ${model_dir}/${xctc_file} ]]; then
+                    ctc_file=${xctc_file}
+                fi
                if [[ ${ctc_infer} -eq 1 && -f ${model_dir}/${ctc_file} ]]; then
                    ref_file=${model_dir}/${subset}.${src_lang}
                    if [[ ! -f ${ref_file} ]]; then
@@ -483,7 +481,6 @@ if [ ${stage} -le 2 ] && [ ${stop_stage} -ge 2 ]; then
                        ctc=$(mktemp -t temp.record.XXXXXX)
                        cd ./local
                        cmd="./cal_wer.sh ${model_dir} ${subset} ${trans_file} ${ctc_file} ${ref_file} > ${ctc}"
-                        #echo $cmd
                        eval $cmd
                        cd ..


--- a/egs/covost/asr/conf/mixup.yaml
+++ b/egs/covost/asr/conf/mixup.yaml
@@ -8,7 +8,7 @@ inter-mixup-keep-org: False
 inter-mixup-decoder-emb: False
 ctc-mixup-consistent-weight: 0
 mixup-consistent-weight: 0
-cal-mixup-loss: True
+mixup-no-hard-loss: False
 no-specaugment: False
 layer-out-norm: False


--- a/egs/covost/st/conf/mixup.yaml
+++ b/egs/covost/st/conf/mixup.yaml
@@ -8,7 +8,7 @@ inter-mixup-keep-org: False
 inter-mixup-decoder-emb: False
 ctc-mixup-consistent-weight: 0
 mixup-consistent-weight: 0
-cal-mixup-loss: True
+mixup-no-hard-loss: False
 no-specaugment: False
 layer-out-norm: False


--- a/egs/fisher_callhome/asr/conf/mixup.yaml
+++ b/egs/fisher_callhome/asr/conf/mixup.yaml
@@ -8,7 +8,7 @@ inter-mixup-keep-org: False
 inter-mixup-decoder-emb: False
 ctc-mixup-consistent-weight: 0
 mixup-consistent-weight: 0
-cal-mixup-loss: True
+mixup-no-hard-loss: False
 no-specaugment: False
 layer-out-norm: False


--- a/egs/fisher_callhome/st/conf/mixup.yaml
+++ b/egs/fisher_callhome/st/conf/mixup.yaml
@@ -8,7 +8,7 @@ inter-mixup-keep-org: False
 inter-mixup-decoder-emb: False
 ctc-mixup-consistent-weight: 0
 mixup-consistent-weight: 0
-cal-mixup-loss: True
+mixup-no-hard-loss: False
 no-specaugment: False
 layer-out-norm: False


--- a/egs/librispeech/asr/conf/ipa.yaml
+++ b/egs/librispeech/asr/conf/ipa.yaml
@@ -9,7 +9,7 @@ inter-mixup-decoder-emb: False
 ctc-mixup-consistent-weight: 0
 inter-ctc-mixup-consistent-weight: 0
 mixup-consistent-weight: 0
-cal-mixup-loss: True
+mixup-no-hard-loss: False
 no-specaugment: False
 layer-out-norm: False


--- a/egs/librispeech/asr/conf/reproduction_encdec_aipa_bilctc_kd.yaml
+++ b/egs/librispeech/asr/conf/reproduction_encdec_aipa_bilctc_kd.yaml
+arch: s2t_transformer_s
+share-decoder-input-output-embed: True
+optimizer: adam
+clip-norm: 10.0
+lr-scheduler: inverse_sqrt
+warmup-init-lr: 1e-7
+warmup-updates: 10000
+lr: 2e-3
+adam_betas: (0.9,0.98)
+
+criterion: label_smoothed_cross_entropy_with_ctc
+label_smoothing: 0.1
+
+subsampling-type: conv1d
+subsampling-layers: 2
+subsampling-filter: 1024
+subsampling-kernel: 5
+subsampling-stride: 2
+subsampling-norm: none
+subsampling-activation: glu
+
+dropout: 0.1
+activation-fn: relu
+encoder-embed-dim: 256
+encoder-ffn-embed-dim: 2048
+encoder-layers: 12
+decoder-layers: 6
+encoder-attention-heads: 4
+
+decoder-embed-dim: 256
+decoder-ffn-embed-dim: 2048
+decoder-attention-heads: 4
+attention-dropout: 0.1
+activation-dropout: 0.1
+
+#load-pretrained-encoder-from:
+#load-pretrained-decoder-from:
+
+# Append-based Interpolation Augmentation
+inter-mixup: True
+
+inter-mixup-layer: -1
+inter-mixup-decoder-layer: 0
+inter-mixup-prob: 1.0
+inter-mixup-ratio: 1.0
+inter-mixup-beta: 0.2
+
+inter-mixup-keep-org: True
+inter-mixup-decoder-emb: True
+
+mixup-no-hard-loss: False
+no-specaugment: False
+layer-out-norm: False
+
+inter-mixup-ratio-decay: False
+inter-mixup-ratio-decay-params: 20000,40000,0
+
+# Bilingual CTC
+share-ctc-and-embed: True
+share-xctc-and-embed: True
+ctc-weight: 0.05
+xctc-weight: 0.2
+
+# InterCTC
+inter-ctc-weight: 0.025
+inter-ctc-layers: 6,9
+share-inter-ctc: True
+inter-xctc-weight: 0.1
+inter-xctc-layers: 6,9
+
+# Prediction-aware encoding
+ctc-pae: inter_league
+xctc-pae: inter_league
+pae-unnorm-input: True
+
+ctc-mixup-consistent-weight: 0.1
+xctc-mixup-consistent-weight: 0.025
+inter-ctc-mixup-consistent-weight: 0.05
+inter-xctc-mixup-consistent-weight: 0.0125
+# mixup-consistent-weight: 0.5
+
+# Conformer
+macaron-style: True
+use-cnn-module: True
+cnn-module-kernel: 15
+encoder-attention-type: rel_pos
+encoder-activation-fn: swish
+layer-padding-mask: True
\ No newline at end of file
--- a/egs/librispeech/asr/conf/reproduction_encdec_aipa_kd.yaml
+++ b/egs/librispeech/asr/conf/reproduction_encdec_aipa_kd.yaml
@@ -48,7 +48,7 @@ inter-mixup-beta: 0.2
 inter-mixup-keep-org: True
 inter-mixup-decoder-emb: True

-cal-mixup-loss: True
+mixup-no-hard-loss: False
 no-specaugment: False
 layer-out-norm: False

@@ -67,7 +67,7 @@ pae-unnorm-input: True

 ctc-mixup-consistent-weight: 0.15
 inter-ctc-mixup-consistent-weight: 0.1
-mixup-consistent-weight: 0.5
+# mixup-consistent-weight: 0.5

 # Conformer
 macaron-style: True

--- a/egs/librispeech/asr/conf/reproduction_encdec_aipa_kd_woiploss.yaml
+++ b/egs/librispeech/asr/conf/reproduction_encdec_aipa_kd_woiploss.yaml
@@ -48,7 +48,7 @@ inter-mixup-beta: 0.2
 inter-mixup-keep-org: True
 inter-mixup-decoder-emb: True

-cal-mixup-loss: False
+mixup-no-hard-loss: True
 no-specaugment: False
 layer-out-norm: False


--- a/egs/librispeech/asr/conf/reproduction_purectc_aipa_bilctc_kd.yaml
+++ b/egs/librispeech/asr/conf/reproduction_purectc_aipa_bilctc_kd.yaml
+arch: s2t_ctc
+encoder-type: transformer
+
+optimizer: adam
+clip-norm: 10.0
+lr-scheduler: inverse_sqrt
+warmup-init-lr: 1e-7
+warmup-updates: 10000
+lr: 2e-3
+adam_betas: (0.9,0.98)
+
+criterion: ctc
+zero_infinity: True
+
+encoder-normalize-before: True
+decoder-normalize-before: True
+
+subsampling-type: conv1d
+subsampling-layers: 2
+subsampling-filter: 1024
+subsampling-kernel: 5
+subsampling-stride: 2
+subsampling-norm: none
+subsampling-activation: glu
+
+dropout: 0.1
+activation-fn: relu
+encoder-embed-dim: 256
+encoder-ffn-embed-dim: 2048
+encoder-layers: 18
+encoder-attention-heads: 4
+
+# Append-based Interpolation Augmentation
+inter-mixup: True
+
+inter-mixup-layer: -1
+inter-mixup-decoder-layer: 0
+inter-mixup-prob: 1.0
+inter-mixup-ratio: 1.0
+inter-mixup-beta: 0.2
+
+inter-mixup-keep-org: True
+inter-mixup-decoder-emb: True
+
+mixup-no-hard-loss: False
+no-specaugment: False
+layer-out-norm: False
+
+inter-mixup-ratio-decay: False
+inter-mixup-ratio-decay-params: 20000,40000,0
+
+# Bilingual CTC
+share-ctc-and-embed: True
+share-xctc-and-embed: True
+ctc-weight: 0.3
+xctc-weight: 1
+
+# InterCTC
+inter-ctc-weight: 0.2
+inter-ctc-layers: 6,9,12,15
+share-inter-ctc: True
+inter-xctc-weight: 1.0
+inter-xctc-layers: 6,9,12,15
+
+# Prediction-aware encoding
+ctc-pae: inter_league
+xctc-pae: inter_league
+pae-unnorm-input: True
+
+ctc-mixup-consistent-weight: 0.15
+inter-ctc-mixup-consistent-weight: 0.1
+mixup-consistent-weight: 0.5
+
+# Conformer
+macaron-style: True
+use-cnn-module: True
+cnn-module-kernel: 15
+encoder-attention-type: rel_pos
+encoder-activation-fn: swish
+layer-padding-mask: True
\ No newline at end of file
--- a/egs/librispeech/asr/conf/reproduction_purectc_aipa_kd.yaml
+++ b/egs/librispeech/asr/conf/reproduction_purectc_aipa_kd.yaml
@@ -43,7 +43,7 @@ inter-mixup-beta: 0.2
 inter-mixup-keep-org: True
 inter-mixup-decoder-emb: True

-cal-mixup-loss: True
+mixup-no-hard-loss: False
 no-specaugment: False
 layer-out-norm: False

@@ -59,9 +59,8 @@ share-ctc-and-embed: True
 ctc-pae: inter_league
 pae-unnorm-input: True

-ctc-mixup-consistent-weight: 0.15
-inter-ctc-mixup-consistent-weight: 0.1
-mixup-consistent-weight: 0.5
+ctc-mixup-consistent-weight: 0.5
+inter-ctc-mixup-consistent-weight: 0.5

 # Conformer
 macaron-style: True

--- a/egs/librispeech/asr/conf/reproduction_purectc_aipa_kd_woiploss.yaml
+++ b/egs/librispeech/asr/conf/reproduction_purectc_aipa_kd_woiploss.yaml
@@ -43,7 +43,7 @@ inter-mixup-beta: 0.2
 inter-mixup-keep-org: True
 inter-mixup-decoder-emb: True

-cal-mixup-loss: False
+mixup-no-hard-loss: True
 no-specaugment: False
 layer-out-norm: False

@@ -59,9 +59,8 @@ share-ctc-and-embed: True
 ctc-pae: inter_league
 pae-unnorm-input: True

-ctc-mixup-consistent-weight: 0.15
-inter-ctc-mixup-consistent-weight: 0.1
-mixup-consistent-weight: 0.5
+ctc-mixup-consistent-weight: 0.5
+inter-ctc-mixup-consistent-weight: 0.5

 # Conformer
 macaron-style: True

--- a/egs/librispeech/asr/decode.sh
+++ b/egs/librispeech/asr/decode.sh
@@ -3,6 +3,7 @@
 gpu_num=1

 data_tag=asr
+#data_tag=asr_joint_lcrm_niu
 test_subset=(dev-clean dev-other test-clean test-other all)
 test_subset=(dev-clean dev-other test-clean test-other)

@@ -11,13 +12,13 @@ if [ "$#" -eq 1 ]; then
    exp_name=$1
 fi

-ctc_infer=0
+ctc_infer=1
 n_average=10
 beam_size=5
-infer_ctc_weight=0.1
+infer_ctc_weight=0
 len_penalty=1.0
 max_tokens=50000
-batch_size=1
+batch_size=0
 infer_debug=0
 dec_model=checkpoint_best.pt


--- a/egs/librispeech/asr/run.sh
+++ b/egs/librispeech/asr/run.sh
@@ -71,7 +71,7 @@ step_valid=0
 # Decoding Settings
 dec_model=checkpoint_best.pt
 cer=0
-ctc_infer=0
+ctc_infer=1
 infer_ctc_weight=0
 ctc_self_ensemble=0
 ctc_inter_logit=0
@@ -81,7 +81,7 @@ beam_size=5
 len_penalty=1.0
 single=0
 epoch_ensemble=1
-best_ensemble=0
+best_ensemble=1
 infer_debug=0
 infer_score=0
 #infer_parameters="--cal-monotonic-cross-attn-weights --cal-localness --localness-window 0.1 --cal-topk-cross-attn-weights --topk-cross-attn-weights 15 --cal-entropy"
@@ -213,9 +213,6 @@ if [ ${stage} -le 1 ] && [ ${stop_stage} -ge 1 ]; then
        fi
        idx=$((idx + 1))
    done
-    #extra_parameter="${extra_parameter}
-    #    --train-config${idx} ${pwd_dir}/conf/basis.yaml"
-    #cp -f ${pwd_dir}/conf/basis.yaml ${model_dir}

    cmd="python3 -u ${code_dir}/fairseq_cli/train.py
        ${data_dir}
@@ -278,8 +275,8 @@ if [ ${stage} -le 1 ] && [ ${stop_stage} -ge 1 ]; then
    mv tmp.log $log

    log=${model_dir}/train.log
-    # cmd="${cmd} 2>&1 | tee -a ${log}"
-    cmd="${cmd} >> ${log} 2>&1 "
+    cmd="${cmd} 2>&1 | tee -a ${log}"
+    #cmd="${cmd} >> ${log} 2>&1 "
    if [[ $eval -eq 1 ]]; then
        # tensorboard
        port=6666
@@ -287,8 +284,6 @@ if [ ${stage} -le 1 ] && [ ${stop_stage} -ge 1 ]; then
    
        echo "${cmd}" > ${model_dir}/cmd
        eval $cmd
-        #sleep 2s
-        #tail -n "$(wc -l ${log} | awk '{print $1+1}')" -f ${log}
    fi
 fi

@@ -426,9 +421,13 @@ if [ ${stage} -le 2 ] && [ ${stop_stage} -ge 2 ]; then
            cd ${code_dir}
            if [[ $eval -eq 1 ]]; then
                ctc_file=translation-${subset}.ctc
+                xctc_file=translation-${subset}.xctc
                if [[ -f ${model_dir}/${ctc_file} ]]; then
                    rm ${model_dir}/${ctc_file}
                fi
+                if [[ -f ${model_dir}/${xctc_file} ]]; then
+                    rm ${model_dir}/${xctc_file}
+                fi

                eval $cmd
                echo "" >> ${result_file}
@@ -445,6 +444,9 @@ if [ ${stage} -le 2 ] && [ ${stop_stage} -ge 2 ]; then
                fi

                trans_file=translation-${subset}-${suffix}.txt
+                if [[ ! -f ${model_dir}/{ctc_file} && -f ${model_dir}/${xctc_file} ]]; then
+                    ctc_file=${xctc_file}
+                fi
                if [[ ${ctc_infer} -eq 1 && -f ${model_dir}/${ctc_file} ]]; then
                    ref_file=${model_dir}/${subset}.${src_lang}
                    if [[ ! -f ${ref_file} ]]; then
@@ -454,7 +456,6 @@ if [ ${stage} -le 2 ] && [ ${stop_stage} -ge 2 ]; then
                        ctc=$(mktemp -t temp.record.XXXXXX)
                        cd ./local
                        cmd="./cal_wer.sh ${model_dir} ${subset} ${trans_file} ${ctc_file} ${ref_file} > ${ctc}"
-                        #echo $cmd
                        eval $cmd
                        cd ..

@@ -477,4 +478,4 @@ if [ ${stage} -le 2 ] && [ ${stop_stage} -ge 2 ]; then
        echo
        cat ${result_file}
    done
-fi
+fi
\ No newline at end of file