add yaml file for reproduction

e7422a42 · xuchen · cf64b587 · e7422a42 · e7422a42 · cf64b587
Commit e7422a42 authored Sep 26, 2023 by xuchen
--- a/egs/mustc/asr/conf/base.yaml
+++ b/egs/mustc/asr/conf/base.yaml
--- a/egs/mustc/asr/conf/inter.yaml
+++ b/egs/mustc/asr/conf/inter.yaml
 inter-ctc-weight: 0.2
 inter-ctc-layers: 6,9
 share-inter-ctc: True
+
 ctc-pae: none
 # ctc-pae: inter_league


--- a/egs/mustc/asr/conf/local_attn.yaml
+++ b/egs/mustc/asr/conf/local_attn.yaml
-encoder-attention-type: local
-hard-mask-window: 0
-gauss-mask-sigma: 3
-init-mask-weight: 0
\ No newline at end of file
--- a/egs/mustc/asr/conf/mixup.yaml
+++ b/egs/mustc/asr/conf/mixup.yaml
@@ -3,10 +3,11 @@ inter-mixup-layer: -1
 inter-mixup-decoder-layer: 0
 inter-mixup-prob: 1.0
 inter-mixup-ratio: 1.0
-inter-mixup-beta: 0.5
+inter-mixup-beta: 0.2
 inter-mixup-keep-org: False
 inter-mixup-decoder-emb: False
 ctc-mixup-consistent-weight: 0
+inter-ctc-mixup-consistent-weight: 0
 mixup-consistent-weight: 0
 cal-mixup-loss: True
 no-specaugment: False

--- a/egs/mustc/asr/conf/pds_base.yaml
+++ b/egs/mustc/asr/conf/pds_base.yaml
-arch: pdss2t_transformer_s_8
-pds-fusion: True
-
-share-decoder-input-output-embed: True
-optimizer: adam
-clip-norm: 10.0
-lr-scheduler: inverse_sqrt
-warmup-init-lr: 1e-7
-warmup-updates: 10000
-lr: 2e-3
-adam_betas: (0.9,0.98)
-
-criterion: label_smoothed_cross_entropy_with_ctc
-label_smoothing: 0.1
-
-dropout: 0.1
-activation-fn: relu
-encoder-ffn-embed-dim: 2048
-encoder-layers: 12
-decoder-layers: 6
-encoder-attention-heads: 4
-
-decoder-embed-dim: 256
-decoder-ffn-embed-dim: 2048
-decoder-attention-heads: 4
-
-#load-pretrained-encoder-from:
-#load-pretrained-decoder-from:
\ No newline at end of file
--- a/egs/mustc/asr/conf/pds_base_8.yaml
+++ b/egs/mustc/asr/conf/pds_base_8.yaml
--- a/egs/mustc/asr/conf/pds_big_8.yaml
+++ b/egs/mustc/asr/conf/pds_big_8.yaml
--- a/egs/mustc/asr/conf/rpr.yaml
+++ b/egs/mustc/asr/conf/rpr.yaml
-encoder-attention-type: rel_selfattn
+encoder-attention-type: rel_pos
+
 # encoder-attention-type: relative
+# decoder-attention-type: relative
 # max-encoder-relative-length: 100
+# max-decoder-relative-length: 20
\ No newline at end of file
--- a/egs/mustc/asr/decode.sh
+++ b/egs/mustc/asr/decode.sh
 #!/usr/bin/env bash

-gpu_num=0
+gpu_num=1

-data_dir=
+data_tag=
 test_subset=(dev tst-COMMON)

 exp_name=
@@ -14,8 +14,11 @@ cer=0
 ctc_infer=0
 n_average=10
 beam_size=5
+infer_ctc_weight=0.1
 len_penalty=1.0
 max_tokens=50000
+batch_size=1
+infer_debug=0
 dec_model=checkpoint_best.pt

 cmd="./run.sh
@@ -28,12 +31,16 @@ cmd="./run.sh
    --ctc_infer ${ctc_infer}
    --beam_size ${beam_size}
    --len_penalty ${len_penalty}
+    --batch_size ${batch_size}
    --max_tokens ${max_tokens}
    --dec_model ${dec_model}
+    --ctc_infer ${ctc_infer}
+    --infer_ctc_weight ${infer_ctc_weight}
+    --infer_debug ${infer_debug}
    "

-if [[ -n ${data_dir} ]]; then
-    cmd="$cmd --data_dir ${data_dir}"
+if [[ -n ${data_tag} ]]; then
+    cmd="$cmd --data_tag ${data_tag}"
 fi
 if [[ ${#test_subset[@]} -ne 0 ]]; then
    subsets=$(echo ${test_subset[*]} | sed 's/ /,/g')

--- a/egs/mustc/asr/local/cal_wer.py
+++ b/egs/mustc/asr/local/cal_wer.py
@@ -12,6 +12,7 @@ wer_standardize = tr.Compose(
        tr.SubstituteRegexes({r"<<unk>>": r"@"}),
        tr.ToLowerCase(),
        tr.RemovePunctuation(),
+        tr.Strip(),
        tr.ExpandCommonEnglishContractions(),
        tr.RemoveKaldiNonWords(),
        tr.RemoveWhiteSpace(replace_by_space=True),

--- a/egs/mustc/asr/run.sh
+++ b/egs/mustc/asr/run.sh
@@ -78,10 +78,12 @@ data_config=config.yaml

 # Decoding Settings
 cer=0
-ctc_infer=0
 ctc_self_ensemble=0
 ctc_inter_logit=0
+batch_size=0
 dec_model=checkpoint_best.pt
+ctc_infer=0
+infer_ctc_weight=0
 n_average=10
 beam_size=5
 len_penalty=1.0
@@ -113,13 +115,6 @@ if [[ "${vocab_type}" == "char" ]]; then
    data_dir=${data_dir}_char
    exp_prefix=${exp_prefix}_char
 fi
-if [[ ! -d /mnt/bd/data-model && -d /mnt/bd/data-model2 ]]; then
-    sudo ln -s /mnt/bd/data-model2/ /mnt/bd/data-model
-fi
-if [[ ! -d ${data_dir} ]]; then
-    echo "No feature dir ${data_dir}"
-    exit
-fi

 export PATH=$PATH:${code_dir}/scripts
 . ./local/parse_options.sh || exit 1;
@@ -300,8 +295,8 @@ if [ ${stage} -le 1 ] && [ ${stop_stage} -ge 1 ]; then
    mv tmp.log $log

    log=${model_dir}/train.log
-    cmd="${cmd} 2>&1 | tee -a ${log}"
-    #cmd="nohup ${cmd} >> ${log} 2>&1 &"
+     # cmd="${cmd} 2>&1 | tee -a ${log}"
+    cmd="${cmd} >> ${log} 2>&1 "
    if [[ $eval -eq 1 ]]; then
        # tensorboard
        port=6666
@@ -343,34 +338,63 @@ if [ ${stage} -le 2 ] && [ ${stop_stage} -ge 2 ]; then
    fi

    suffix=beam${beam_size}_alpha${len_penalty}_tokens${max_tokens}
-    if [[ ${n_average} -ne 1 ]]; then
-        suffix=${suffix}_${n_average}
-    fi
+    suffix=alpha${len_penalty}
+    model_str=`echo $dec_model | sed -e "s#checkpoint##" | sed "s#.pt##"`
+    suffix=${suffix}_${model_str}
+
    if [[ -n ${cer} && ${cer} -eq 1 ]]; then
        suffix=${suffix}_cer
    else
        suffix=${suffix}_wer
    fi
+    suffix=${suffix}_beam${beam_size}
+    if [[ ${batch_size} -ne 0 ]]; then
+        suffix=${suffix}_batch${batch_size}
+    else
+        suffix=${suffix}_tokens${max_tokens}
+    fi
+    if [[ ${ctc_infer} -eq 1 ]]; then
+        suffix=${suffix}_ctc
+    fi
+    if [[ ${ctc_self_ensemble} -eq 1 ]]; then
+        suffix=${suffix}_ensemble
+    fi
+    if [[ ${ctc_inter_logit} -ne 0 ]]; then
+        suffix=${suffix}_logit${ctc_inter_logit}
+    fi
+    if (( $(echo "${infer_ctc_weight} > 0" | bc -l) )); then
+        suffix=${suffix}_ctc${infer_ctc_weight}
+    fi
    if [[ ${infer_score} -eq 1 ]]; then
        suffix=${suffix}_score
    fi
+
+    suffix=`echo $suffix | sed -e "s#__#_#"`
 	result_file=${model_dir}/decode_result_${suffix}
 	[[ -f ${result_file} ]] && rm ${result_file}

    test_subset=${test_subset//,/ }
 	for subset in ${test_subset[@]}; do
        subset=${subset}            
-        cmd="python3 ${code_dir}/fairseq_cli/generate.py
+        if [[ ${infer_debug} -ne 0 ]]; then
+            cmd="python3 -m debugpy --listen 0.0.0.0:5678 --wait-for-client"
+        else
+            cmd="python3 "
+        fi
+        cmd="$cmd ${code_dir}/fairseq_cli/generate.py
        ${data_dir}
        --config-yaml ${data_config}
        --gen-subset ${subset}
        --task speech_to_text
        --path ${model_dir}/${dec_model}
        --results-path ${model_dir}
+        --batch-size ${batch_size}
        --max-tokens ${max_tokens}
        --beam ${beam_size}
+        --skip-invalid-size-inputs-valid-test
+        --infer-ctc-weight ${infer_ctc_weight}
+        --lenpen ${len_penalty}
        --lenpen ${len_penalty}
-        --batch-size 1
        --scoring wer
        --wer-tokenizer 13a
        --wer-lowercase
@@ -385,6 +409,14 @@ if [ ${stage} -le 2 ] && [ ${stop_stage} -ge 2 ]; then
            cmd="${cmd}
        --ctc-infer"
        fi
+        if [[ ${ctc_self_ensemble} -eq 1 ]]; then
+            cmd="${cmd}
+        --ctc-self-ensemble"
+        fi
+        if [[ ${ctc_inter_logit} -ne 0 ]]; then
+            cmd="${cmd}
+        --ctc-inter-logit ${ctc_inter_logit}"
+        fi
        if [[ ${infer_score} -eq 1 ]]; then
            cmd="${cmd}
        --score-reference"

--- a/egs/mustc/mt/conf/basis.yaml
+++ b/egs/mustc/mt/conf/basis.yaml
@@ -17,8 +17,8 @@ eval-bleu-print-samples: True
 best_checkpoint_metric: bleu
 maximize_best_checkpoint_metric: True

-no-epoch-checkpoints: True
-#keep-last-epochs: 10
+# no-epoch-checkpoints: True
+keep-last-epochs: 1
 keep-best-checkpoints: 10

 num-workers: 8

--- a/egs/mustc/mt/conf/inter.yaml
+++ b/egs/mustc/mt/conf/inter.yaml
@@ -3,20 +3,6 @@ criterion: label_smoothed_cross_entropy_with_ctc

 # ctc-layer: 6
 ctc-weight: 0.3
-interleaved-ctc-weight: 0.2
-interleaved-ctc-layers: 4
-sae-ctc-temperature: 1
-interleaved-ctc-drop-prob: 0
 ctc-upsampling-ratio: 3
 ctc-out-downsampling: False
 ctc-out-downsampling-method: maxpooling
\ No newline at end of file
-
-share-interleaved-ctc: True
-
-sae-adapter: inter_league
-sae-drop-prob: 0.0
-# sae-distribution-cutoff: 10
-#share-ctc-and-sae: True
-
-#sae-ground-truth-ratio: 0.3
-#ctc-self-distill-weight: 1
--- a/egs/mustc/mt/conf/debug.yaml
+++ b/egs/mustc/mt/conf/debug.yaml
-arch: transformer_ctc
-share-all-embeddings: True
-optimizer: adam
-clip-norm: 10.0
-lr-scheduler: inverse_sqrt
-warmup-init-lr: 1e-7
-warmup-updates: 8000
-lr: 1e-3
-adam_betas: (0.9,0.997)
-
-criterion: label_smoothed_cross_entropy_with_ctc
-label_smoothing: 0.1
-
-dropout: 0.1
-attention-dropout: 0.1
-activation-dropout: 0.1
-
-activation-fn: relu
-encoder-normalize-before: True
-decoder-normalize-before: True
-encoder-embed-dim: 64
-encoder-ffn-embed-dim: 64
-encoder-layers: 6
-decoder-layers: 6
-encoder-attention-heads: 4
-
-decoder-embed-dim: 64
-decoder-ffn-embed-dim: 64
-decoder-attention-heads: 4
-
-#load-pretrained-encoder-from:
-#load-pretrained-decoder-from:
-
-#ctc-layer:
-#ctc-weight: 0.2
-interleaved-ctc-weight: 0.3
-interleaved-ctc-layers: 6,9
-sae-ctc-temperature: 1.0
-interleaved-ctc-drop-prob: 0
-interleaved-ctc-upsampling-ratio: 3
-
-sae-adapter: league
-sae-drop-prob: 0.0
-#sae-distribution-cutoff: 10
-share-ctc-and-sae: True
-
-sae-ground-truth-ratio: 0.3
-ctc-self-distill-weight: 0
\ No newline at end of file
--- a/egs/mustc/mt/decode.sh
+++ b/egs/mustc/mt/decode.sh
@@ -9,7 +9,7 @@ share_dict=1
 lcrm=0
 tokenizer=0

-data_dir=
+data_tag=
 test_subset=(valid test)

 exp_name=
@@ -22,6 +22,8 @@ n_average=10
 beam_size=5
 len_penalty=1.0
 max_tokens=50000
+batch_size=1
+infer_debug=0
 dec_model=checkpoint_best.pt

 cmd="./run.sh
@@ -38,16 +40,18 @@ cmd="./run.sh
    --n_average ${n_average}
    --beam_size ${beam_size}
    --len_penalty ${len_penalty}
+    --batch_size ${batch_size}
    --max_tokens ${max_tokens}
    --dec_model ${dec_model}
+    --infer_debug ${infer_debug}
    "

-if [[ -n ${data_dir} ]]; then
-    cmd="$cmd --data_dir ${data_dir}"
+if [[ -n ${data_tag} ]]; then
+    cmd="$cmd --data_tag ${data_tag}"
 fi
-if [[ -n ${test_subset} ]]; then
-    test_subset=`echo ${test_subset[*]} | sed 's/ /,/g'`
-    cmd="$cmd --test_subset ${test_subset}"
+if [[ ${#test_subset[@]} -ne 0 ]]; then
+    subsets=$(echo ${test_subset[*]} | sed 's/ /,/g')
+    cmd="$cmd --test_subset ${subsets}"
 fi

 echo $cmd

--- a/egs/mustc/mt/local/lower_rm.py
+++ b/egs/mustc/mt/local/lower_rm.py
-import sys
-import string
-
-
-in_file = sys.argv[1]
-
-with open(in_file, "r", encoding="utf-8") as f:
-    for line in f.readlines():
-        line = line.strip().lower()
-        for w in string.punctuation:
-            line = line.replace(w, "")
-        line = line.replace("  ", "")
-        print(line)
-
--- a/egs/mustc/mt/local/monitor.sh
+++ b/egs/mustc/mt/local/monitor.sh
+#!/usr/bin/env bash
+
 gpu_num=4
 cmd="sh train.sh"


--- a/egs/mustc/mt/local/utils.sh
+++ b/egs/mustc/mt/local/utils.sh
@@ -14,7 +14,7 @@ get_devices(){
        do
            line=$((dev + 2))
            use=$(head -n $line ${record} | tail -1 | cut -d '|' -f3 | cut -d '/' -f1)
-            if [[ $use -lt 100 ]]; then
+            if [[ $use -lt 1000 ]]; then
                device[$count]=$dev
                count=$((count + 1))
                if [[ $count -eq $gpu_num ]]; then

--- a/egs/mustc/mt/pipeline.sh
+++ b/egs/mustc/mt/pipeline.sh
-#./run.sh --stage 0 --stop_stage 0 --tgt_lang fr
-./run.sh --stage 0 --stop_stage 0 --tgt_lang es
-./run.sh --stage 0 --stop_stage 0 --tgt_lang de-v2
-#./run.sh --stage 0 --stop_stage 0 --tgt_lang ja
--- a/egs/mustc/mt/run.sh
+++ b/egs/mustc/mt/run.sh
@@ -84,6 +84,7 @@ dec_model=checkpoint_best.pt
 n_average=10
 beam_size=5
 len_penalty=1.0
+infer_debug=0
 infer_score=0
 # infer_parameters="--cal-monotonic-cross-attn-weights --cal-localness --localness-window 0.1 --cal-topk-cross-attn-weights --topk-cross-attn-weights 15 --cal-entropy"

@@ -336,14 +337,14 @@ if [ ${stage} -le 1 ] && [ ${stop_stage} -ge 1 ]; then
    echo -e "\033[34mRun command: \n${cmd} \033[0m"

    # save info
-    log=./history.log
+    log=${ckpt_dir}/history.log
    echo "${time} | ${data_dir} | ${exp_name} | ${model_dir} " >> $log
    tail -n 50 ${log} > tmp.log
    mv tmp.log $log

    log=${model_dir}/train.log
-    cmd="${cmd} 2>&1 | tee -a ${log}"
-    #cmd="nohup ${cmd} >> ${log} 2>&1 &"
+     # cmd="${cmd} 2>&1 | tee -a ${log}"
+    cmd="${cmd} >> ${log} 2>&1 "
    if [[ $eval -eq 1 ]]; then
        # tensorboard
        port=6666
@@ -384,24 +385,37 @@ if [ ${stage} -le 2 ] && [ ${stop_stage} -ge 2 ]; then
        export CUDA_VISIBLE_DEVICES=${device}
    fi

-    suffix=beam${beam_size}_alpha${len_penalty}_tokens${max_tokens}
-    if [[ ${n_average} -ne 1 ]]; then
-        suffix=${suffix}_${n_average}
-    fi
+    suffix=alpha${len_penalty}
+    model_str=`echo $dec_model | sed -e "s#checkpoint##" | sed "s#.pt##"`
+    suffix=${suffix}_${model_str}
    if [[ ${sacrebleu} -eq 1 ]]; then
        suffix=${suffix}_sacrebleu
    else
        suffix=${suffix}_multibleu
    fi
+    suffix=${suffix}_beam${beam_size}
+    if [[ ${batch_size} -ne 0 ]]; then
+        suffix=${suffix}_batch${batch_size}
+    else
+        suffix=${suffix}_tokens${max_tokens}
+    fi    
    if [[ ${infer_score} -eq 1 ]]; then
        suffix=${suffix}_score
    fi
+
+    suffix=`echo $suffix | sed -e "s#__#_#"`
 	result_file=${model_dir}/decode_result_${suffix}
 	[[ -f ${result_file} ]] && rm ${result_file}

    test_subset=${test_subset//,/ }
 	for subset in ${test_subset[@]}; do
-        cmd="python3 ${code_dir}/fairseq_cli/generate.py
+        subset=${subset}            
+        if [[ ${infer_debug} -ne 0 ]]; then
+            cmd="python3 -m debugpy --listen 0.0.0.0:5678 --wait-for-client"
+        else
+            cmd="python3 "
+        fi
+        cmd="$cmd ${code_dir}/fairseq_cli/generate.py
        ${data_dir}
        --source-lang ${src_lang}
        --target-lang ${tgt_lang}
@@ -409,9 +423,10 @@ if [ ${stage} -le 2 ] && [ ${stop_stage} -ge 2 ]; then
        --task ${task}
        --path ${model_dir}/${dec_model}
        --results-path ${model_dir}
+        --batch-size ${batch_size}
        --max-tokens ${max_tokens}
        --beam ${beam_size}
-        --batch-size 1
+        --skip-invalid-size-inputs-valid-test
        --lenpen ${len_penalty}
        --post-process sentencepiece"


--- a/egs/mustc/mt/train.sh
+++ b/egs/mustc/mt/train.sh
@@ -14,7 +14,6 @@ extra_parameter=
 exp_tag=baseline

 config_list=(small)
-config_list=(small inter)

 # exp full name
 exp_name=
@@ -23,7 +22,7 @@ train_config=$(echo ${config_list[*]} | sed 's/ /,/g')

 cmd="./run.sh
    --stage 1
-    --stop_stage 4
+    --stop_stage 2
    --gpu_num ${gpu_num}
    --update_freq ${update_freq}
    --train_config ${train_config}

--- a/egs/mustc/st/conf/aipa.yaml
+++ b/egs/mustc/st/conf/aipa.yaml
+# Append-based Interpolation Augmentation
+inter-mixup: True
+
+inter-mixup-layer: -1
+inter-mixup-decoder-layer: 0
+inter-mixup-prob: 1.0
+inter-mixup-ratio: 1.0
+inter-mixup-beta: 0.2
+
+inter-mixup-keep-org: True
+inter-mixup-decoder-emb: True
+
+ctc-mixup-consistent-weight: 0
+inter-ctc-mixup-consistent-weight: 0
+mixup-consistent-weight: 0
+
+cal-mixup-loss: True
+no-specaugment: False
+layer-out-norm: False
+
+inter-mixup-ratio-decay: False
+inter-mixup-ratio-decay-params: 20000,40000,0
\ No newline at end of file
--- a/egs/mustc/st/conf/aipa_kd.yaml
+++ b/egs/mustc/st/conf/aipa_kd.yaml
+inter-mixup: True
+
+inter-mixup-layer: -1
+inter-mixup-decoder-layer: 0
+inter-mixup-prob: 1.0
+inter-mixup-ratio: 1.0
+inter-mixup-beta: 0.2
+
+inter-mixup-keep-org: False
+inter-mixup-decoder-emb: False
+
+ctc-mixup-consistent-weight: 0
+inter-ctc-mixup-consistent-weight: 0
+mixup-consistent-weight: 0
+
+cal-mixup-loss: True
+no-specaugment: False
+layer-out-norm: False
+
+inter-mixup-ratio-decay: False
+inter-mixup-ratio-decay-params: 20000,40000,0
\ No newline at end of file
--- a/egs/mustc/st/conf/basis.yaml
+++ b/egs/mustc/st/conf/basis.yaml
--- a/egs/mustc/st/conf/at12.yaml
+++ b/egs/mustc/st/conf/at12.yaml
--- a/egs/mustc/st/conf/at12_big.yaml
+++ b/egs/mustc/st/conf/at12_big.yaml
--- a/egs/mustc/st/conf/at12_pds_big.yaml
+++ b/egs/mustc/st/conf/at12_pds_big.yaml
@@ -48,7 +48,6 @@ pds-layers: 3_3_3_3
 pds-ratios: 2_2_1_2
 pds-fusion: False
 pds-fusion-method: all_conv2
-pds-dropout: 0
 pds-fusion-layers: 0_1_1_1 
 pds-fusion-weight: 0.2_0.3_0.5 
 pds-embed-dims: 512_512_512_512

--- a/egs/mustc/st/conf/inter.yaml
+++ b/egs/mustc/st/conf/inter.yaml
 inter-ctc-weight: 0.2
 inter-ctc-layers: 6,9
 share-inter-ctc: True
+
 ctc-pae: none
 # ctc-pae: inter_league


--- a/egs/mustc/st/conf/local_attn.yaml
+++ b/egs/mustc/st/conf/local_attn.yaml
-encoder-attention-type: local
-hard-mask-window: 0
-gauss-mask-sigma: 3
-init-mask-weight: 0
\ No newline at end of file
--- a/egs/mustc/st/conf/mixup.yaml
+++ b/egs/mustc/st/conf/mixup.yaml
 inter-mixup: True
+
 inter-mixup-layer: -1
 inter-mixup-decoder-layer: 0
 inter-mixup-prob: 1.0
 inter-mixup-ratio: 1.0
 inter-mixup-beta: 0.2
+
 inter-mixup-keep-org: False
 inter-mixup-decoder-emb: False
+
 ctc-mixup-consistent-weight: 0
 inter-ctc-mixup-consistent-weight: 0
 mixup-consistent-weight: 0
+
 cal-mixup-loss: True
 no-specaugment: False
 layer-out-norm: False

--- a/egs/mustc/st/conf/pure2ctc.yaml
+++ b/egs/mustc/st/conf/pure2ctc.yaml
--- a/egs/mustc/st/conf/pure2ctc_big.yaml
+++ b/egs/mustc/st/conf/pure2ctc_big.yaml
--- a/egs/mustc/st/conf/pure2ctc_pds_big.yaml
+++ b/egs/mustc/st/conf/pure2ctc_pds_big.yaml
--- a/egs/mustc/st/conf/purexctc.yaml
+++ b/egs/mustc/st/conf/purexctc.yaml
-arch: s2t_ctc
-encoder-type: sate
-
-criterion: ctc
-zero_infinity: True
-xctc-weight: 1.0
-
-share-decoder-input-output-embed: True
-optimizer: adam
-clip-norm: 10.0
-lr-scheduler: inverse_sqrt
-warmup-init-lr: 1e-7
-warmup-updates: 10000
-lr: 2e-3
-adam_betas: (0.9,0.98)
-
-encoder-normalize-before: True
-decoder-normalize-before: True
-
-textual-encoder-embed-norm: True
-textual-encoder-no-scale-embedding: True
-
-subsampling-type: conv1d
-subsampling-layers: 2
-subsampling-filter: 1024
-subsampling-kernel: 5
-subsampling-stride: 2
-subsampling-norm: none
-subsampling-activation: glu
-
-dropout: 0.1
-activation-fn: relu
-encoder-embed-dim: 256
-encoder-ffn-embed-dim: 2048
-encoder-layers: 0
-text-encoder-layers: 12
-decoder-layers: 6
-encoder-attention-heads: 4
-
-decoder-embed-dim: 256
-decoder-ffn-embed-dim: 2048
-decoder-attention-heads: 4
-
-acoustic-encoder: transformer
-adapter: none
\ No newline at end of file
--- a/egs/mustc/st/conf/reproduction_aipa_kd.yaml
+++ b/egs/mustc/st/conf/reproduction_aipa_kd.yaml
+# Append-based Interpolation Augmentation
+inter-mixup: True
+
+inter-mixup-layer: -1
+inter-mixup-decoder-layer: 0
+inter-mixup-prob: 1.0
+inter-mixup-ratio: 1.0
+inter-mixup-beta: 0.2
+
+inter-mixup-keep-org: True
+inter-mixup-decoder-emb: True
+
+cal-mixup-loss: True
+no-specaugment: False
+layer-out-norm: False
+
+inter-mixup-ratio-decay: False
+inter-mixup-ratio-decay-params: 20000,40000,0
+
+# MTL
+ctc-weight: 0.3
+inter-ctc-weight: 0.2
+inter-ctc-layers: 6,9
+share-inter-ctc: True
+share-ctc-and-embed: True
+
+ctc-pae: inter_league
+pae-unnorm-input: True
+
+ctc-mixup-consistent-weight: 0.15
+inter-ctc-mixup-consistent-weight: 0.1
+mixup-consistent-weight: 0.5
+
+# Conformer
+macaron-style: True
+use-cnn-module: True
+cnn-module-kernel: 15
+encoder-attention-type: rel_pos
+encoder-activation-fn: swish
+layer-padding-mask: True
\ No newline at end of file
--- a/egs/mustc/st/conf/reproduction_aipa_kd_womixuploss.yaml
+++ b/egs/mustc/st/conf/reproduction_aipa_kd_womixuploss.yaml
+# Append-based Interpolation Augmentation
+inter-mixup: True
+
+inter-mixup-layer: -1
+inter-mixup-decoder-layer: 0
+inter-mixup-prob: 1.0
+inter-mixup-ratio: 1.0
+inter-mixup-beta: 0.2
+
+inter-mixup-keep-org: True
+inter-mixup-decoder-emb: True
+
+cal-mixup-loss: False
+no-specaugment: False
+layer-out-norm: False
+
+inter-mixup-ratio-decay: False
+inter-mixup-ratio-decay-params: 20000,40000,0
+
+# MTL
+ctc-weight: 0.3
+share-ctc-and-embed: True
+inter-ctc-weight: 0.2
+inter-ctc-layers: 6,9
+share-inter-ctc: True
+
+ctc-pae: inter_league
+pae-unnorm-input: True
+
+ctc-mixup-consistent-weight: 0.15
+inter-ctc-mixup-consistent-weight: 0.1
+mixup-consistent-weight: 0.5
+
+# Conformer
+macaron-style: True
+use-cnn-module: True
+cnn-module-kernel: 15
+encoder-attention-type: rel_pos
+encoder-activation-fn: swish
+layer-padding-mask: True
\ No newline at end of file
--- a/egs/mustc/st/conf/reproduction_bil_ctc_progressive.yaml
+++ b/egs/mustc/st/conf/reproduction_bil_ctc_progressive.yaml
+arch: s2t_sate
+share-decoder-input-output-embed: True
+optimizer: adam
+clip-norm: 10.0
+lr-scheduler: inverse_sqrt
+warmup-init-lr: 1e-7
+warmup-updates: 10000
+lr: 1e-3
+adam_betas: (0.9,0.98)
+
+criterion: label_smoothed_cross_entropy_with_ctc
+label_smoothing: 0.1
+
+encoder-normalize-before: True
+decoder-normalize-before: True
+
+encoder-embed-norm: True
+encoder-no-scale-embedding: True
+textual-encoder-embed-norm: True
+textual-encoder-no-scale-embedding: True
+
+subsampling-type: conv1d
+subsampling-layers: 2
+subsampling-filter: 2048
+subsampling-kernel: 5
+subsampling-stride: 2
+subsampling-norm: none
+subsampling-activation: glu
+
+dropout: 0.15
+activation-fn: relu
+encoder-embed-dim: 512
+encoder-ffn-embed-dim: 2048
+encoder-layers: 12
+text-encoder-layers: 6
+decoder-layers: 6
+encoder-attention-heads: 8
+
+decoder-embed-dim: 512
+decoder-ffn-embed-dim: 2048
+decoder-attention-heads: 8
+
+acoustic-encoder: transformer
+adapter: inter_league
+
+# Conformer 
+macaron-style: True
+use-cnn-module: True
+cnn-module-kernel: 15
+encoder-attention-type: rel_pos
+encoder-activation-fn: swish
+
+# Bilingual CTC
+share-ctc-and-embed: True
+share-xctc-and-embed: True
+ctc-weight: 0.2
+xctc-weight: 0.1
+
+# InterCTC
+inter-ctc-weight: 0.1
+inter-ctc-layers: 6,9
+share-inter-ctc: True
+inter-xctc-weight: 0.05
+inter-xctc-layers: 4
+
+# Prediction-aware encoding
+ctc-pae: inter_league
+xctc-pae: inter_league
+pae-unnorm-input: True
+
+# Curriculum learning mixing
+xctc-pae-ground-truth-ratio: 0.1
+xctc-pae-ground-truth-only-mistake: True 
+pae-oracle-smooth: True
--- a/egs/mustc/st/conf/reproduction_bil_ctc_progressive2.yaml
+++ b/egs/mustc/st/conf/reproduction_bil_ctc_progressive2.yaml
+arch: s2t_transformer_m
+share-decoder-input-output-embed: True
+optimizer: adam
+clip-norm: 10.0
+lr-scheduler: inverse_sqrt
+warmup-init-lr: 1e-7
+warmup-updates: 10000
+lr: 1e-3
+adam_betas: (0.9,0.98)
+
+criterion: label_smoothed_cross_entropy_with_ctc
+label_smoothing: 0.1
+
+encoder-embed-norm: True
+encoder-no-scale-embedding: True
+
+subsampling-type: conv1d
+subsampling-layers: 2
+subsampling-filter: 2048
+subsampling-kernel: 5
+subsampling-stride: 2
+subsampling-norm: none
+subsampling-activation: glu
+
+dropout: 0.15
+activation-fn: relu
+encoder-embed-dim: 512
+encoder-ffn-embed-dim: 2048
+encoder-layers: 18
+decoder-layers: 6
+encoder-attention-heads: 8
+
+decoder-embed-dim: 512
+decoder-ffn-embed-dim: 2048
+decoder-attention-heads: 8
+
+# Conformer 
+macaron-style: True
+use-cnn-module: True
+cnn-module-kernel: 15
+encoder-attention-type: rel_pos
+encoder-activation-fn: swish
+
+# Bilingual CTC
+share-ctc-and-embed: True
+share-xctc-and-embed: True
+ctc-weight: 0.2
+ctc-layer: 12
+xctc-weight: 0.1
+
+# InterCTC
+inter-ctc-weight: 0.1
+inter-ctc-layers: 6,9
+share-inter-ctc: True
+inter-xctc-weight: 0.05
+inter-xctc-layers: 16
+
+# Prediction-aware encoding
+ctc-pae: inter_league
+xctc-pae: inter_league
+pae-unnorm-input: True
+
+# Curriculum learning mixing
+xctc-pae-ground-truth-ratio: 0.1
+xctc-pae-ground-truth-only-mistake: True 
+pae-oracle-smooth: True
--- a/egs/mustc/st/conf/reproduction_bil_ctc_synchronous.yaml
+++ b/egs/mustc/st/conf/reproduction_bil_ctc_synchronous.yaml
+arch: s2t_transformer_m
+share-decoder-input-output-embed: True
+optimizer: adam
+clip-norm: 10.0
+lr-scheduler: inverse_sqrt
+warmup-init-lr: 1e-7
+warmup-updates: 10000
+lr: 1e-3
+adam_betas: (0.9,0.98)
+
+criterion: label_smoothed_cross_entropy_with_ctc
+label_smoothing: 0.1
+
+encoder-embed-norm: True
+encoder-no-scale-embedding: True
+
+subsampling-type: conv1d
+subsampling-layers: 2
+subsampling-filter: 2048
+subsampling-kernel: 5
+subsampling-stride: 2
+subsampling-norm: none
+subsampling-activation: glu
+
+dropout: 0.15
+activation-fn: relu
+encoder-embed-dim: 512
+encoder-ffn-embed-dim: 2048
+encoder-layers: 18
+decoder-layers: 6
+encoder-attention-heads: 8
+
+decoder-embed-dim: 512
+decoder-ffn-embed-dim: 2048
+decoder-attention-heads: 8
+
+# Conformer 
+macaron-style: True
+use-cnn-module: True
+cnn-module-kernel: 15
+encoder-attention-type: rel_pos
+encoder-activation-fn: swish
+
+# Bilingual CTC
+share-ctc-and-embed: True
+share-xctc-and-embed: True
+ctc-weight: 0.2
+xctc-weight: 0.1
+
+# InterCTC
+inter-ctc-weight: 0.1
+inter-ctc-layers: 6,9,12,15
+share-inter-ctc: True
+inter-xctc-weight: 0.05
+inter-xctc-layers: 6,9,12,15
+
+# Prediction-aware encoding
+ctc-pae: inter_league
+xctc-pae: inter_league
+pae-unnorm-input: True
+
+# Curriculum learning mixing
+xctc-pae-ground-truth-ratio: 0.1
+xctc-pae-ground-truth-only-mistake: True 
+pae-oracle-smooth: True
--- a/egs/mustc/st/conf/reproduction_ctc_aug.yaml
+++ b/egs/mustc/st/conf/reproduction_ctc_aug.yaml
+arch: s2t_sate
+share-decoder-input-output-embed: True
+optimizer: adam
+clip-norm: 10.0
+lr-scheduler: inverse_sqrt
+warmup-init-lr: 1e-7
+warmup-updates: 10000
+lr: 1e-3
+adam_betas: (0.9,0.98)
+
+criterion: label_smoothed_cross_entropy_with_ctc
+label_smoothing: 0.1
+
+encoder-normalize-before: True
+decoder-normalize-before: True
+
+encoder-embed-norm: True
+encoder-no-scale-embedding: True
+text-no-pos-emb: True
+textual-encoder-embed-norm: False
+textual-encoder-no-scale-embedding: True
+
+subsampling-type: conv1d
+subsampling-layers: 2
+subsampling-filter: 2048
+subsampling-kernel: 5
+subsampling-stride: 2
+subsampling-norm: none
+subsampling-activation: glu
+
+dropout: 0.15
+activation-fn: relu
+encoder-embed-dim: 512
+encoder-ffn-embed-dim: 2048
+encoder-layers: 12
+text-encoder-layers: 6
+decoder-layers: 6
+encoder-attention-heads: 8
+
+decoder-embed-dim: 512
+decoder-ffn-embed-dim: 2048
+decoder-attention-heads: 8
+
+acoustic-encoder: transformer
+adapter: inter_league
+
+# Conformer 
+macaron-style: True
+use-cnn-module: True
+cnn-module-kernel: 15
+encoder-attention-type: rel_pos
+encoder-activation-fn: swish
+
+# CTC & XCTC
+share-ctc-and-embed: True
+share-xctc-and-embed: True
+ctc-weight: 0.2
+xctc-weight: 0.2
+
+# InterCTC
+share-inter-ctc: True
+inter-ctc-weight: 0.1
+inter-ctc-layers: 6,9
+inter-xctc-weight: 0.1
+inter-xctc-layers: 4
+
+# Prediction-aware encoding
+ctc-pae: inter_league
+xctc-pae: inter_league
+
+# Cross-layer attn
+xctc-cross-attn: True
+cross-attn-start-layer: 3
+cross-attn-layer: 2
+cross-attn-collaboration-mode: serial
+cross-attn-league-drop-net: True
+cross-attn-league-drop-net-prob: 0.1
+
+# Curriculum learning mixing
+xctc-pae-ground-truth-ratio: 0.5
+xctc-pae-ground-truth-only-mistake: True 
+pae-oracle-smooth: True
--- a/egs/mustc/st/conf/purectc_sate.yaml
+++ b/egs/mustc/st/conf/purectc_sate.yaml
 arch: s2t_ctc
 encoder-type: sate
+
+criterion: ctc
+zero_infinity: True
+xctc-weight: 1.0
+ctc-weight: 1.0
+
+share-decoder-input-output-embed: True
 optimizer: adam
 clip-norm: 10.0
 lr-scheduler: inverse_sqrt
 warmup-init-lr: 1e-7
 warmup-updates: 10000
-lr: 2e-3
+lr: 1e-3
 adam_betas: (0.9,0.98)

-criterion: ctc
-zero_infinity: True
-ctc-weight: 1.0
-
 encoder-normalize-before: True
 decoder-normalize-before: True

 encoder-embed-norm: True
 encoder-no-scale-embedding: True
-textual-encoder-embed-norm: True
+text-no-pos-emb: True
+textual-encoder-embed-norm: False
 textual-encoder-no-scale-embedding: True

 subsampling-type: conv1d
 subsampling-layers: 2
-subsampling-filter: 1024
+subsampling-filter: 2048
 subsampling-kernel: 5
 subsampling-stride: 2
 subsampling-norm: none
 subsampling-activation: glu

-dropout: 0.1
+dropout: 0.15
 activation-fn: relu
-encoder-embed-dim: 256
+encoder-embed-dim: 512
 encoder-ffn-embed-dim: 2048
 encoder-layers: 12
 text-encoder-layers: 12
 decoder-layers: 6
-encoder-attention-heads: 4
-
-decoder-embed-dim: 256
-decoder-ffn-embed-dim: 2048
-decoder-attention-heads: 4
+encoder-attention-heads: 8

 acoustic-encoder: transformer
 adapter: inter_league
-#adapter: none
-#adapter-embed-norm: True
-#adapter-out-norm: True
-#share-adapter-and-ctc: True
-#share-adapter-and-embed: True
+
+# Conformer 
+macaron-style: True
+use-cnn-module: True
+cnn-module-kernel: 15
+encoder-attention-type: rel_pos
+encoder-activation-fn: swish
+
+# InterCTC
+share-inter-ctc: True
+inter-ctc-weight: 1.0
+inter-ctc-layers: 6,9
+inter-xctc-weight: 1.0
+inter-xctc-layers: 6,9
+
+# Prediction-aware encoding
+ctc-pae: inter_league
+xctc-pae: inter_league
+
+# Cross-layer attn
+xctc-cross-attn: True
+cross-attn-start-layer: 4
+cross-attn-layer: 3
+cross-attn-collaboration-mode: serial
+cross-attn-league-drop-net: True
+cross-attn-league-drop-net-prob: 0.1
+
+# Curriculum learning mixing
+xctc-pae-ground-truth-ratio: 0.8
+xctc-pae-ground-truth-only-mistake: True 
+pae-oracle-smooth: True
--- a/egs/mustc/st/conf/rpr.yaml
+++ b/egs/mustc/st/conf/rpr.yaml
 encoder-attention-type: rel_pos

-#encoder-attention-type: rel_pos_legacy
-#encoder-attention-type: rel_selfattn
-#encoder-attention-type: relative
-#decoder-attention-type: relative
-#max-encoder-relative-length: 100
-#max-decoder-relative-length: 20
+# encoder-attention-type: relative
+# decoder-attention-type: relative
+# max-encoder-relative-length: 100
+# max-decoder-relative-length: 20
\ No newline at end of file
--- a/egs/mustc/st/conf/sate_pds.yaml
+++ b/egs/mustc/st/conf/sate_pds.yaml
-arch: s2t_sate
-
-encoder-embed-norm: True
-encoder-no-scale-embedding: True
-textual-encoder-embed-norm: False
-textual-encoder-no-scale-embedding: True
-text-no-pos-emb: True
-
-encoder-normalize-before: True
-decoder-normalize-before: True
-
-text-encoder-layers: 0
-acoustic-encoder: pds
-adapter: none
-
-#load-pretrained-encoder-from:
-#load-pretrained-acoustic-encoder-from:
-#load-pretrained-text-encoder-from:
-#load-pretrained-decoder-from:
\ No newline at end of file
--- a/egs/mustc/st/conf/tt.yaml
+++ b/egs/mustc/st/conf/tt.yaml
-# ctc-weight: 0.3
-share-ctc-and-embed: True
-share-xctc-and-embed: True
-share-inter-ctc: True
-
-xctc-weight: 0.3
-# xctc-layer: 12
-
-# inter-ctc-weight: 0.2
-# inter-ctc-layers: 6,9
-
-# inter-xctc-weight: 0.2
-# inter-xctc-layers: 6,9
-
-ctc-pae: none
-# xctc-pae: none
-
-xctc-cross-attn: False
-cross-attn-start-layer: 7
-cross-attn-layer: 6
-
-cross-attn-collaboration-mode: parallel
-cross-attn-league-s1-ratio: 0.5
-cross-attn-league-s2-ratio: 0.5
-cross-attn-league-out-norm: False
-cross-attn-league-gated: False
-cross-attn-league-drop-net: False
-cross-attn-league-drop-net-prob: 0.2
-cross-attn-league-drop-net-mix: False
-
-# ctc-pae-ground-truth-ratio: 0.3
-# xctc-pae-ground-truth-ratio: 0.3
-# adapter-pae-ground-truth-ratio: 0.3
-# pae-ctc-temperature: 1
-# adapter-temperature: 1
-
-#pae-gumbel: True
-#pae-distribution-hard: True
-#pae-drop-prob: 0.0
-#pae-distribution-cutoff: 10
-#share-pae-and-ctc: True
-#share-pae-and-xctc: True
-#pae-embed-norm: True
-#pae-out-norm: True
-
-#ctc-self-distill-weight: 1
-#target-ctc-self-distill-weight: 1
-#ctc-self-distill-prob: 0.1
-#cal-all-ctc: True
-
-use-additional-ctc-text: True
\ No newline at end of file
--- a/egs/mustc/st/conf/ttr.yaml
+++ b/egs/mustc/st/conf/ttr.yaml
-# ctc-weight: 0.3
-share-ctc-and-embed: True
-share-xctc-and-embed: True
-share-inter-ctc: True
-
-xctc-weight: 0.3
-xctc-layer: 12
-axctc-weight: 0.3
-axctc-layer: 6
-
-inter-ctc-weight: 0.2
-# inter-ctc-layers: 6,9
-
-inter-xctc-weight: 0.2
-# inter-xctc-layers: 10
-inter-axctc-weight: 0.2
-# inter-axctc-layers: 4
-
-ctc-pae: none
-# xctc-pae: none
-# axctc-pae: none
-
-xctc-cross-attn: False
-cross-attn-start-layer: 7
-cross-attn-layer: 6
-
-cross-attn-collaboration-mode: parallel
-cross-attn-league-s1-ratio: 0.5
-cross-attn-league-s2-ratio: 0.5
-cross-attn-league-out-norm: False
-cross-attn-league-gated: False
-cross-attn-league-drop-net: False
-cross-attn-league-drop-net-prob: 0.2
-cross-attn-league-drop-net-mix: False
-
-# ctc-pae-ground-truth-ratio: 0.3
-# axctc-pae-ground-truth-ratio: 0.3
-# xctc-pae-ground-truth-ratio: 0.3
-# adapter-pae-ground-truth-ratio: 0.3
-# pae-ctc-temperature: 1
-# adapter-temperature: 1
-
-#pae-gumbel: True
-#pae-distribution-hard: True
-#pae-drop-prob: 0.0
-#pae-distribution-cutoff: 10
-#share-pae-and-ctc: True
-#share-pae-and-xctc: True
-#pae-embed-norm: True
-#pae-out-norm: True
-
-#ctc-self-distill-weight: 1
-#target-ctc-self-distill-weight: 1
-#ctc-self-distill-prob: 0.1
-#cal-all-ctc: True
-
-use-additional-ctc-text: True
--- a/egs/mustc/st/conf/wo_fusion.yaml
+++ b/egs/mustc/st/conf/wo_fusion.yaml
-pds-fusion-method: none
\ No newline at end of file
--- a/egs/mustc/st/conf/xctc.yaml
+++ b/egs/mustc/st/conf/xctc.yaml
 xctc-weight: 0.3
 share-xctc-and-embed: True
+\ No newline at end of file
-
-# inter-xctc-weight: 0.2
-# inter-xctc-layers: 6,9
-xctc-pae: none
\ No newline at end of file
--- a/egs/mustc/st/conf/xctc_s2s.yaml
+++ b/egs/mustc/st/conf/xctc_s2s.yaml
-arch: s2t_sate
-share-decoder-input-output-embed: True
-optimizer: adam
-clip-norm: 10.0
-lr-scheduler: inverse_sqrt
-warmup-init-lr: 1e-7
-warmup-updates: 10000
-lr: 2e-3
-adam_betas: (0.9,0.98)
-
-criterion: label_smoothed_cross_entropy_with_ctc
-label_smoothing: 0.1
-
-encoder-normalize-before: True
-decoder-normalize-before: True
-
-textual-encoder-embed-norm: True
-textual-encoder-no-scale-embedding: True
-
-subsampling-type: conv1d
-subsampling-layers: 2
-subsampling-filter: 1024
-subsampling-kernel: 5
-subsampling-stride: 2
-subsampling-norm: none
-subsampling-activation: glu
-
-dropout: 0.1
-activation-fn: relu
-encoder-embed-dim: 256
-encoder-ffn-embed-dim: 2048
-encoder-layers: 0
-text-encoder-layers: 12
-decoder-layers: 6
-encoder-attention-heads: 4
-
-decoder-embed-dim: 256
-decoder-ffn-embed-dim: 2048
-decoder-attention-heads: 4
-
-text-use-s2t-layer: False
-acoustic-encoder: transformer
-# adapter: inter_league
-adapter: none
-#adapter-embed-norm: True
-#adapter-out-norm: True
-#share-adapter-and-ctc: True
-#share-adapter-and-embed: True
-
-#load-pretrained-encoder-from:
-#load-pretrained-acoustic-encoder-from:
-#load-pretrained-text-encoder-from:
-#load-pretrained-decoder-from:
--- a/egs/mustc/st/conf/xinter.yaml
+++ b/egs/mustc/st/conf/xinter.yaml
 inter-xctc-weight: 0.2
-#inter-xctc-layers: 4
 inter-xctc-layers: 6,9
-xctc-pae: none

+xctc-pae: none
 # xctc-pae: inter_league

 xctc-cross-attn: False

--- a/egs/mustc/st/decode.sh
+++ b/egs/mustc/st/decode.sh
@@ -11,13 +11,13 @@ if [ "$#" -eq 1 ]; then
 fi

 sacrebleu=1
-ctc_infer=1
+ctc_infer=0
 n_average=10
 beam_size=5
-infer_ctc_weight=0.1
+infer_ctc_weight=0
 len_penalty=1.0
 max_tokens=50000
-batch_size=1
+batch_size=0
 infer_debug=0
 dec_model=checkpoint_best.pt


--- a/egs/mustc/st/pipe.sh
+++ b/egs/mustc/st/pipe.sh
-dir=/xuchen/st/checkpoints/must_c/en-de/st/JointCTC/big
-tag=JointCTC/big
-
-for d in `ls $dir`; do
-    echo $d
-    ./run.sh --stage 2 --max_tokens 10000 --batch_size 1 --ctc_infer 1 --infer_ctc_weight 0.1 --exp_name $tag/$d
-    ./run.sh --stage 2 --max_tokens 10000 --batch_size 1 --ctc_infer 1 --infer_ctc_weight 0.2 --exp_name $tag/$d
-    ./run.sh --stage 2 --max_tokens 10000 --batch_size 1 --ctc_infer 1 --infer_ctc_weight 0.3 --exp_name $tag/$d
-    ./run.sh --stage 2 --max_tokens 10000 --batch_size 1 --ctc_infer 1 --infer_ctc_weight 0.4 --exp_name $tag/$d
-    ./run.sh --stage 2 --max_tokens 10000 --batch_size 1 --ctc_infer 1 --infer_ctc_weight 0.5 --exp_name $tag/$d
-done
\ No newline at end of file
--- a/egs/mustc/st/pipe2.sh
+++ b/egs/mustc/st/pipe2.sh
-./run.sh --stage 2 --tgt_lang fr --infer_ctc_weight 0.1 --batch_size 1 --exp_name big_conformer_ctc_inter_xctc_xinter_enc18_wsum0.45_unnormpae_oracle0.1
-./run.sh --stage 2 --tgt_lang es --infer_ctc_weight 0.1 --batch_size 1 --exp_name big_conformer_ctc_inter_xctc_xinter_enc18_wsum0.45_unnormpae_oracle0.1
-./run.sh --stage 2 --tgt_lang it --infer_ctc_weight 0.1 --batch_size 1 --exp_name big_conformer_ctc_inter_xctc_xinter_enc18_wsum0.45_unnormpae_oracle0.1
-./run.sh --stage 2 --tgt_lang nl --infer_ctc_weight 0.1 --batch_size 1 --exp_name big_conformer_ctc_inter_xctc_xinter_enc18_wsum0.45_unnormpae_oracle0.1
-./run.sh --stage 2 --tgt_lang pt --infer_ctc_weight 0.1 --batch_size 1 --exp_name big_conformer_ctc_inter_xctc_xinter_enc18_wsum0.45_unnormpae_oracle0.1
-./run.sh --stage 2 --tgt_lang ro --infer_ctc_weight 0.1 --batch_size 1 --exp_name big_conformer_ctc_inter_xctc_xinter_enc18_wsum0.45_unnormpae_oracle0.1
-./run.sh --stage 2 --tgt_lang ru --infer_ctc_weight 0.1 --batch_size 1 --exp_name big_conformer_ctc_inter_xctc_xinter_enc18_wsum0.45_unnormpae_oracle0.1
--- a/egs/mustc/st/prep.sh
+++ b/egs/mustc/st/prep.sh
-./run.sh --stage 0 --stop_stage 0 --tgt_lang es
-./run.sh --stage 0 --stop_stage 0 --tgt_lang fr
-./run.sh --stage 0 --stop_stage 0 --tgt_lang it
-./run.sh --stage 0 --stop_stage 0 --tgt_lang nl
-./run.sh --stage 0 --stop_stage 0 --tgt_lang pt
-./run.sh --stage 0 --stop_stage 0 --tgt_lang ro
-./run.sh --stage 0 --stop_stage 0 --tgt_lang ru
--- a/egs/mustc/st/train.sh
+++ b/egs/mustc/st/train.sh
@@ -14,23 +14,13 @@ extra_parameter=
 exp_tag=

 # Base
-# config_list=(base dynamic ctc)
-# config_list=(multibranch ctc)
-#config_list=(base mixup ctc)
 #config_list=(base conformer ctc)

 # SATE
-config_list=(sate ctc)
-# config_list=(sate inter ctc)
-#config_list=(sate conformer ctc)
-
-# SAE
-#config_list=(sate inter)
+config_list=(sate conformer ctc)

 # PDS
-#config_list=(pds_base_8 ctc)
 #config_list=(pds_base_8 conformer ctc)
-#config_list=(sate_pds ctc)

 # exp full name
 exp_name=

--- a/egs/wmt16/mt/conf/basis.yaml
+++ b/egs/wmt16/mt/conf/basis.yaml
@@ -17,9 +17,9 @@ eval-bleu-print-samples: True
 best_checkpoint_metric: bleu
 maximize_best_checkpoint_metric: True

-no-epoch-checkpoints: True
-#keep-last-epochs: 10
-keep-best-checkpoints: 5
+# no-epoch-checkpoints: True
+keep-last-epochs: 1
+keep-best-checkpoints: 10

 num-workers: 8
 no-progress-bar: True

--- a/egs/wmt16/mt/conf/inter.yaml
+++ b/egs/wmt16/mt/conf/inter.yaml
@@ -3,20 +3,6 @@ criterion: label_smoothed_cross_entropy_with_ctc

 # ctc-layer: 6
 ctc-weight: 0.3
-interleaved-ctc-weight: 0.2
-interleaved-ctc-layers: 4
-sae-ctc-temperature: 1.0
-interleaved-ctc-drop-prob: 0
 ctc-upsampling-ratio: 3
 ctc-out-downsampling: False
 ctc-out-downsampling-method: maxpooling
-
-share-interleaved-ctc: True
-
-sae-adapter: inter_league
-sae-drop-prob: 0.0
-#sae-distribution-cutoff: 10
-# share-ctc-and-sae: True
-# share-ctc-and-embed: True
-
-ctc-self-distill-weight: 0
--- a/egs/wmt16/mt/decode.sh
+++ b/egs/wmt16/mt/decode.sh
@@ -15,6 +15,8 @@ n_average=5
 beam_size=4
 len_penalty=0.6
 max_tokens=4000
+batch_size=1
+infer_debug=0
 dec_model=checkpoint_best.pt

 cmd="./run.sh
@@ -26,16 +28,18 @@ cmd="./run.sh
    --n_average ${n_average}
    --beam_size ${beam_size}
    --len_penalty ${len_penalty}
+    --batch_size ${batch_size}
    --max_tokens ${max_tokens}
    --dec_model ${dec_model}
+    --infer_debug ${infer_debug}
    "

 if [[ -n ${data_dir} ]]; then
    cmd="$cmd --data_dir ${data_dir}"
 fi
-if [[ -n ${test_subset} ]]; then
-    test_subset=`echo ${test_subset[*]} | sed 's/ /,/g'`
-    cmd="$cmd --test_subset ${test_subset}"
+if [[ ${#test_subset[@]} -ne 0 ]]; then
+    subsets=$(echo ${test_subset[*]} | sed 's/ /,/g')
+    cmd="$cmd --test_subset ${subsets}"
 fi

 echo $cmd

--- a/egs/wmt16/mt/run.sh
+++ b/egs/wmt16/mt/run.sh
@@ -2,8 +2,7 @@

 # Processing WMT16 En-De Datasets

-# Copyright 2021 Natural Language Processing Laboratory
-# Xu Chen (xuchenneu@163.com)
+# Copyright 2021 Chen Xu (xuchennlp@outlook.com)

 # Set bash to 'debug' mode, it will exit on :
 # -e 'error', -u 'undefined variable', -o ... 'error in pipeline', -x 'print commands',
@@ -16,24 +15,24 @@ eval=1
 time=$(date "+%m%d_%H%M")

 stage=1
-stop_stage=4
+stop_stage=2

-######## hardware ########
-# devices
+######## Hardware ########
+# Devices
 device=(0)
 gpu_num=8
 update_freq=1
-hdfs_get=0

-root_dir=/opt/tiger
-data_root_dir=/mnt/bn/nas-xc-1
-
-code_dir=${root_dir}/s2t
 pwd_dir=$PWD
+root_dir=${ST_ROOT}
+data_root_dir=${root_dir}
+
+code_dir=${root_dir}/S2T

-# dataset
+# Dataset
 src_lang=en
 tgt_lang=de
+dataset=wmt
 data_tag=mt

 task=translation
@@ -47,7 +46,6 @@ tokenizer=0

 . ./local/parse_options.sh || exit 1;
 lang=${src_lang}-${tgt_lang}
-dataset=wmt

 use_specific_dict=1
 subword=0
@@ -64,18 +62,16 @@ valid_subset=dev
 trans_subset=test
 test_subset=valid,test

-# exp
+# Exp
+sub_tag=
 exp_prefix=$(date "+%m%d")
-# exp_subfix=${ARNOLD_JOB_ID}_${ARNOLD_TASK_ID}_${ARNOLD_TRIAL_ID}
 extra_tag=
 extra_parameter=
 exp_tag=baseline
 exp_name=

-# config
+# Training Settings
 train_config=base
-
-# training setting
 fp16=1
 max_tokens=8192
 step_valid=0
@@ -87,7 +83,11 @@ dec_model=checkpoint_best.pt
 n_average=5
 beam_size=4
 len_penalty=0.6
+infer_debug=0
+infer_score=0
+# infer_parameters="--cal-monotonic-cross-attn-weights --cal-localness --localness-window 0.1 --cal-topk-cross-attn-weights --topk-cross-attn-weights 15 --cal-entropy"

+# Parsing Options
 . ./local/parse_options.sh || exit 1;

 if [[ ${use_specific_dict} -eq 1 ]]; then
@@ -129,22 +129,9 @@ if [[ ${tokenizer} -eq 1 ]]; then
    exp_prefix=${exp_prefix}_tok
 fi

-# setup nccl envs
-export NCCL_IB_DISABLE=0
-export NCCL_IB_HCA=$ARNOLD_RDMA_DEVICE:1
-export NCCL_IB_GID_INDEX=3
-export NCCL_SOCKET_IFNAME=eth0
-
-HOSTS=$ARNOLD_WORKER_HOSTS
-HOST=(${HOSTS//,/ })
-HOST_SPLIT=(${HOST//:/ })
-PORT=${HOST_SPLIT[1]}
-INIT_METHOD="tcp://${ARNOLD_WORKER_0_HOST}:${ARNOLD_WORKER_0_PORT}"
-DIST_RANK=$((ARNOLD_ID * ARNOLD_WORKER_GPU))
-
-. ./local/parse_options.sh || exit 1
+export PATH=$PATH:${code_dir}/scripts
+. ./local/parse_options.sh || exit 1;

-# full path
 if [[ -z ${exp_name} ]]; then
    config_string=${train_config//,/_}
    exp_name=${exp_prefix}_${config_string}_${exp_tag}
@@ -155,20 +142,27 @@ if [[ -z ${exp_name} ]]; then
        exp_name=${exp_name}_${exp_subfix}
    fi
 fi
-model_dir=${code_dir}/checkpoints/${data_model_subfix}/${exp_name}

-echo "stage: $stage"
-echo "stop_stage: $stop_stage"
+ckpt_dir=${root_dir}/checkpoints/
+model_dir=${root_dir}/checkpoints/${data_model_subfix}/${sub_tag}/${exp_name}
+
+# Start
 cd ${code_dir}
+echo "Start Stage: $stage"
+echo "Stop  Stage: $stop_stage"
+
+if [[ `pip list | grep fairseq | wc -l` -eq 0 ]]; then 
+    echo "Default Stage: env configure"
+    pip3 install -e ${code_dir}
+fi

 if [ ${stage} -le -1 ] && [ ${stop_stage} -ge -1 ]; then
-    echo "stage -1: Data Download"
-    # pass
+    echo "Stage -1: Data Download"
 fi

 if [ ${stage} -le 0 ] && [ ${stop_stage} -ge 0 ]; then
    ### Task dependent. You have to make data the following preparation part by yourself.
-    echo "stage 0: MT Data Preparation"
+    echo "Stage 0: Data Preparation"
    if [[ ! -e ${data_dir} ]]; then
        mkdir -p ${data_dir}
    fi
@@ -228,32 +222,9 @@ if [ ${stage} -le 0 ] && [ ${stop_stage} -ge 0 ]; then
    [[ $eval -eq 1 ]] && eval ${cmd}
 fi

-echo "stage 1: env configure"
-if [[ `pip list | grep fairseq | wc -l` -eq 0 ]]; then 
-    pip3 install -e ${code_dir} -i https://bytedpypi.byted.org/simple  --no-build-isolation --default-timeout=10000
-fi
-
-if [[ -d /mnt/bn/nas-xc-1/checkpoints && ! -d ${code_dir}/checkpoints ]]; then
-    ln -s /mnt/bn/nas-xc-1/checkpoints ${code_dir}
-fi
-# if [ ${stage} -le 1 ] && [ ${stop_stage} -ge 1 ]; then
-
-    if [ ${hdfs_get} -eq 1 ]; then
-        ln_data_dir=`echo ${data_dir} | sed -e "s#${data_root_dir}#${code_dir}#"`
-        echo ${ln_data_dir}
-        mkdir -p ${ln_data_dir}
-        ln -s ${data_dir}/../* ${ln_data_dir}
-        rm -r ${ln_data_dir}
-
-        hdfs_path=`echo ${data_dir} | sed -e "s#${data_root_dir}#hdfs://haruna/home/byte_arnold_lq_mlnlc/user/xuchen/#"`
-        hdfs dfs -get ${hdfs_path} ${ln_data_dir}
-        data_dir=${ln_data_dir}
-    fi
-# fi
 data_dir=${data_dir}/data-bin
-
-if [ ${stage} -le 2 ] && [ ${stop_stage} -ge 2 ]; then
-    echo "stage 2: MT Network Training"
+if [ ${stage} -le 1 ] && [ ${stop_stage} -ge 1 ]; then
+    echo "Stage 1: Network Training"
    [[ ! -d ${data_dir} ]] && echo "The data dir ${data_dir} is not existing!" && exit 1;

    if [[ -z ${device} || ${#device[@]} -eq 0 ]]; then
@@ -263,6 +234,7 @@ if [ ${stage} -le 2 ] && [ ${stop_stage} -ge 2 ]; then
            source ./local/utils.sh
            device=$(get_devices $gpu_num 0)
 		fi
+        export CUDA_VISIBLE_DEVICES=${device}
    fi

    echo -e "data=${data_dir} model=${model_dir}"
@@ -363,22 +335,17 @@ if [ ${stage} -le 2 ] && [ ${stop_stage} -ge 2 ]; then
    echo -e "\033[34mRun command: \n${cmd} \033[0m"

    # save info
-    log=./history.log
+    log=${ckpt_dir}/history.log
    echo "${time} | ${data_dir} | ${exp_name} | ${model_dir} " >> $log
    tail -n 50 ${log} > tmp.log
    mv tmp.log $log
-    # export CUDA_VISIBLE_DEVICES=${device}

    log=${model_dir}/train.log
-    cmd="${cmd} 2>&1 | tee -a ${log}"
-    #cmd="nohup ${cmd} >> ${log} 2>&1 &"
+     # cmd="${cmd} 2>&1 | tee -a ${log}"
+    cmd="${cmd} >> ${log} 2>&1 "
    if [[ $eval -eq 1 ]]; then
        # tensorboard
-        if [[ -z ${ARNOLD_TENSORBOARD_CURRENT_PORT} ]]; then
        port=6666
-        else
-            port=${ARNOLD_TENSORBOARD_CURRENT_PORT}
-        fi
        tensorboard --logdir ${model_dir} --port ${port} --bind_all &
    
        echo "${cmd}" > ${model_dir}/cmd
@@ -388,8 +355,8 @@ if [ ${stage} -le 2 ] && [ ${stop_stage} -ge 2 ]; then
 	fi
 fi

-if [ ${stage} -le 3 ] && [ ${stop_stage} -ge 3 ]; then
-    echo "stage 3: MT Decoding"
+if [ ${stage} -le 2 ] && [ ${stop_stage} -ge 2 ]; then
+    echo "Stage 2: Decoding"
    if [[ ${n_average} -ne 1 ]]; then
        # Average models
 		dec_model=avg_${n_average}_checkpoint.pt
@@ -413,24 +380,40 @@ if [ ${stage} -le 3 ] && [ ${stop_stage} -ge 3 ]; then
            source ./local/utils.sh
            device=$(get_devices $gpu_num 0)
 		fi
+        export CUDA_VISIBLE_DEVICES=${device}
    fi
-    # export CUDA_VISIBLE_DEVICES=${device}

-    suffix=beam${beam_size}_alpha${len_penalty}_tokens${max_tokens}
-    if [[ ${n_average} -ne 1 ]]; then
-        suffix=${suffix}_${n_average}
-    fi
+    suffix=alpha${len_penalty}
+    model_str=`echo $dec_model | sed -e "s#checkpoint##" | sed "s#.pt##"`
+    suffix=${suffix}_${model_str}
    if [[ ${sacrebleu} -eq 1 ]]; then
        suffix=${suffix}_sacrebleu
    else
        suffix=${suffix}_multibleu
    fi
+    suffix=${suffix}_beam${beam_size}
+    if [[ ${batch_size} -ne 0 ]]; then
+        suffix=${suffix}_batch${batch_size}
+    else
+        suffix=${suffix}_tokens${max_tokens}
+    fi    
+    if [[ ${infer_score} -eq 1 ]]; then
+        suffix=${suffix}_score
+    fi
+
+    suffix=`echo $suffix | sed -e "s#__#_#"`
 	result_file=${model_dir}/decode_result_${suffix}
 	[[ -f ${result_file} ]] && rm ${result_file}

    test_subset=${test_subset//,/ }
 	for subset in ${test_subset[@]}; do
-        cmd="python3 ${code_dir}/fairseq_cli/generate.py
+        subset=${subset}            
+        if [[ ${infer_debug} -ne 0 ]]; then
+            cmd="python3 -m debugpy --listen 0.0.0.0:5678 --wait-for-client"
+        else
+            cmd="python3 "
+        fi
+        cmd="$cmd ${code_dir}/fairseq_cli/generate.py
        ${data_dir}
        --source-lang ${src_lang}
        --target-lang ${tgt_lang}
@@ -438,8 +421,10 @@ if [ ${stage} -le 3 ] && [ ${stop_stage} -ge 3 ]; then
        --task ${task}
        --path ${model_dir}/${dec_model}
        --results-path ${model_dir}
+        --batch-size ${batch_size}
        --max-tokens ${max_tokens}
        --beam ${beam_size}
+        --skip-invalid-size-inputs-valid-test
        --lenpen ${len_penalty}"

        if [[ ${subword} -eq 1 ]]; then
@@ -453,6 +438,13 @@ if [ ${stage} -le 3 ] && [ ${stop_stage} -ge 3 ]; then
        if [[ ${sacrebleu} -eq 1 ]]; then
            cmd="${cmd}
        --scoring sacrebleu"
+            if [[ "${tgt_lang}" = "ja" ]]; then
+                cmd="${cmd}
+        --sacrebleu-tokenizer ja-mecab"
+            elif [[ "${tgt_lang}" == "zh" ]]; then
+                cmd="${cmd}
+        --sacrebleu-tokenizer zh"
+            fi
            if [[ ${tokenizer} -eq 1 ]]; then
                cmd="${cmd}
        --tokenizer moses
@@ -460,29 +452,34 @@ if [ ${stage} -le 3 ] && [ ${stop_stage} -ge 3 ]; then
        --target-lang ${tgt_lang}"
            fi
        fi
+        if [[ ${infer_score} -eq 1 ]]; then
+            cmd="${cmd}
+        --score-reference"
+        fi
+        if [[ -n ${infer_parameters} ]]; then
+            cmd="${cmd}
+        ${infer_parameters}"
+        fi

        echo -e "\033[34mRun command: \n${cmd} \033[0m"

        cd ${code_dir}
        if [[ $eval -eq 1 ]]; then
            eval $cmd
+            echo "" >> ${result_file}
            tail -n 2 ${model_dir}/generate-${subset}.txt >> ${result_file}
            mv ${model_dir}/generate-${subset}.txt ${model_dir}/generate-${subset}-${suffix}.txt
            mv ${model_dir}/translation-${subset}.txt ${model_dir}/translation-${subset}-${suffix}.txt

-            #cd ${pwd_dir}
-            #export PATH=$PATH:${pwd_dir}/local
-            #sh local/wmt_en2de_multi_bleu.sh ${model_dir}/translation-${subset}.txt
+            cd ${pwd_dir}
+            if [[ -f ${model_dir}/enc_dump ]]; then
+                mv ${model_dir}/enc_dump ${model_dir}/dump-${subset}-enc-${suffix}
+            fi
+            if [[ -f ${model_dir}/dec_dump ]]; then
+                mv ${model_dir}/dec_dump ${model_dir}/dump-${subset}-dec-${suffix}
+            fi
        fi
 	done
 	echo
    cat ${result_file}
 fi
-
-# if [ ${stage} -le 4 ] && [ ${stop_stage} -ge 4 ]; then
-#     cd ${fairseq_dir}
-#     echo "Stage 4: Upload model and log"
-#     echo "Path: hdfs://haruna/home/byte_arnold_lq_mlnlc/user/xuchen/s2t/checkpoints/${data_model_subfix}/${exp_name}"
-#     hdfs dfs -mkdir -p hdfs://haruna/home/byte_arnold_lq_mlnlc/user/xuchen/s2t/checkpoints/${data_model_subfix}
-#     hdfs dfs -put -f ${model_dir} hdfs://haruna/home/byte_arnold_lq_mlnlc/user/xuchen/s2t/checkpoints/${data_model_subfix}
-# fi
--- a/egs/wmt16/mt/train.sh
+++ b/egs/wmt16/mt/train.sh
@@ -22,7 +22,7 @@ train_config=$(echo ${config_list[*]} | sed 's/ /,/g')

 cmd="./run.sh
    --stage 1
-    --stop_stage 1
+    --stop_stage 2
    --gpu_num ${gpu_num}
    --update_freq ${update_freq}
    --train_config ${train_config}

--- a/egs/wmt20/mt/binary.sh
+++ b/egs/wmt20/mt/binary.sh
-set -e
-
-eval=1
-lcrm=0
-
-src_lang=en
-tgt_lang=zh
-tokenize=1
-splits=(tst-COMMON test11)
-
-dataset=wmt20
-root_dir=~/st/Fairseq-S2T
-data_dir=/home/xuchen/st/data/$dataset/data
-vocab_dir=/home/xuchen/st/data/$dataset/mt/unigram32000_tok
-dest_dir=$vocab_dir
-src_vocab_prefix=spm_unigram32000_en
-tgt_vocab_prefix=spm_unigram32000_zh
-
-for split in ${splits[@]}; do
-    src_file=${data_dir}/${split}/${split}.${src_lang}
-    tgt_file=${data_dir}/${split}/${split}.${tgt_lang}
-
-    if [[ ${tokenize} -eq 1 ]]; then
-        src_tok_file=${data_dir}/${split}.tok/${split}.tok.${src_lang}
-        tgt_tok_file=${data_dir}/${split}.tok/${split}.tok.${tgt_lang}
-        if [[ ! -f ${src_tok_file} ]]; then
-            cmd="tokenizer.perl -l ${src_lang} --threads 8 -no-escape < ${src_file} > ${src_tok_file}"
-            echo -e "\033[34mRun command: \n${cmd} \033[0m"
-            [[ $eval -eq 1 ]] && eval ${cmd}
-        fi
-
-        if [[ ! -f ${tgt_tok_file} ]]; then
-            cmd="tokenizer.perl -l ${tgt_lang} --threads 8 -no-escape < ${tgt_file} > ${tgt_tok_file}"
-            echo -e "\033[34mRun command: \n${cmd} \033[0m"
-            [[ $eval -eq 1 ]] && eval ${cmd}
-        fi
-        src_file=${src_tok_file}
-        tgt_file=${tgt_tok_file}
-    fi
-
-    cmd="cat ${src_file}"
-    if [[ ${lcrm} -eq 1 ]]; then
-        cmd="python local/lower_rm.py ${src_file}"
-    fi
-    cmd="${cmd}
-    | spm_encode --model ${vocab_dir}/${src_vocab_prefix}.model
-    --output_format=piece
-    > ${src_file}.spm"
-
-    echo -e "\033[34mRun command: \n${cmd} \033[0m"
-    [[ $eval -eq 1 ]] && eval ${cmd}
-
-    cmd="spm_encode
-    --model ${vocab_dir}/${tgt_vocab_prefix}.model
-    --output_format=piece
-    < ${tgt_file}
-    > ${tgt_file}.spm"
-    echo -e "\033[34mRun command: \n${cmd} \033[0m"
-    [[ $eval -eq 1 ]] && eval ${cmd}
-
-    src_file=${src_file}.spm
-    tgt_file=${tgt_file}.spm
-
-    mkdir -p ${dest_dir}/final
-    cmd="cp ${src_file} ${dest_dir}/final/${split}.${src_lang}"
-    echo -e "\033[34mRun command: \n${cmd} \033[0m"
-    [[ $eval -eq 1 ]] && eval ${cmd}
-
-    cmd="cp ${tgt_file} ${dest_dir}/final/${split}.${tgt_lang}"
-    echo -e "\033[34mRun command: \n${cmd} \033[0m"
-    [[ $eval -eq 1 ]] && eval ${cmd}
-done
-
-n_set=${#splits[*]}
-for ((i=0;i<$n_set;i++)); do
-    dataset[$i]=${dest_dir}/final/${splits[$i]}
-done
-pref=`echo ${dataset[*]} | sed 's/ /,/g'`
-
-cmd="python ${root_dir}/fairseq_cli/preprocess.py
-    --source-lang ${src_lang}
-    --target-lang ${tgt_lang}
-    --testpref ${pref}
-    --destdir ${dest_dir}/data-bin
-    --srcdict ${vocab_dir}/${src_vocab_prefix}.txt
-    --tgtdict ${vocab_dir}/${tgt_vocab_prefix}.txt
-    --workers 64"
-
-echo -e "\033[34mRun command: \n${cmd} \033[0m"
-[[ $eval -eq 1 ]] && eval ${cmd}
--- a/egs/wmt20/mt/conf/base.yaml
+++ b/egs/wmt20/mt/conf/base.yaml
-arch: transformer
-share-decoder-input-output-embed: True
-optimizer: adam
-clip-norm: 10.0
-lr-scheduler: inverse_sqrt
-warmup-init-lr: 1e-7
-warmup-updates: 8000
-lr: 2e-3
-adam_betas: (0.9,0.997)
-
-criterion: label_smoothed_cross_entropy
-label_smoothing: 0.1
-
-dropout: 0.1
-attention-dropout: 0.1
-activation-dropout: 0.1
-
-activation-fn: relu
-encoder-normalize-before: True
-decoder-normalize-before: True
-encoder-embed-dim: 512
-encoder-ffn-embed-dim: 2048
-encoder-layers: 6
-decoder-layers: 6
-encoder-attention-heads: 8
-
-decoder-embed-dim: 512
-decoder-ffn-embed-dim: 2048
-decoder-attention-heads: 8
-
-#load-pretrained-encoder-from:
-#load-pretrained-decoder-from:
\ No newline at end of file
--- a/egs/wmt20/mt/conf/base_postnorm.yaml
+++ b/egs/wmt20/mt/conf/base_postnorm.yaml
-arch: transformer
-share-all-embeddings: True
-optimizer: adam
-clip-norm: 10.0
-lr-scheduler: inverse_sqrt
-warmup-init-lr: 1e-7
-warmup-updates: 4000
-lr: 7e-4
-adam_betas: (0.9,0.98)
-
-criterion: label_smoothed_cross_entropy
-label_smoothing: 0.1
-
-dropout: 0.1
-attention-dropout: 0.1
-activation-dropout: 0.1
-
-activation-fn: relu
-encoder-normalize-before: False
-decoder-normalize-before: False
-encoder-embed-dim: 512
-encoder-ffn-embed-dim: 2048
-encoder-layers: 6
-decoder-layers: 6
-encoder-attention-heads: 8
-
-decoder-embed-dim: 512
-decoder-ffn-embed-dim: 2048
-decoder-attention-heads: 8
-
-#load-pretrained-encoder-from:
-#load-pretrained-decoder-from:
\ No newline at end of file
--- a/egs/wmt20/mt/conf/basis.yaml
+++ b/egs/wmt20/mt/conf/basis.yaml
-train-subset: train
-valid-subset: valid
-
-max-epoch: 20
-max-update: 100000
-patience: 5
-best_checkpoint_metric: loss
-maximize_best_checkpoint_metric: False
-
-no-epoch-checkpoints: True
-#keep-last-epochs: 10
-keep-best-checkpoints: 5
-
-num-workers: 8
-no-progress-bar: True
-log-interval: 100
-seed: 1
-report-accuracy: True
-skip-invalid-size-inputs-valid-test: True
-
-max-source-positions: 512
--- a/egs/wmt20/mt/conf/big.yaml
+++ b/egs/wmt20/mt/conf/big.yaml
-arch: transformer_wmt_en_de_big_t2t
-share-all-embeddings: True
-optimizer: adam
-clip-norm: 10.0
-lr-scheduler: inverse_sqrt
-warmup-init-lr: 1e-7
-warmup-updates: 8000
-lr: 7e-4
-adam_betas: (0.9,0.997)
-
-criterion: label_smoothed_cross_entropy
-label_smoothing: 0.1
-
-dropout: 0.3
-attention-dropout: 0.1
-activation-dropout: 0.1
-
-activation-fn: relu
-encoder-normalize-before: True
-decoder-normalize-before: True
-encoder-embed-dim: 1024
-encoder-ffn-embed-dim: 4096
-encoder-layers: 6
-decoder-layers: 6
-encoder-attention-heads: 16
-
-decoder-embed-dim: 512
-decoder-ffn-embed-dim: 2048
-decoder-attention-heads: 8
-
-#load-pretrained-encoder-from:
-#load-pretrained-decoder-from:
\ No newline at end of file
--- a/egs/wmt20/mt/conf/big_postnorm.yaml
+++ b/egs/wmt20/mt/conf/big_postnorm.yaml
-arch: transformer_wmt_en_de_big
-share-all-embeddings: True
-optimizer: adam
-clip-norm: 10.0
-lr-scheduler: inverse_sqrt
-warmup-init-lr: 1e-7
-warmup-updates: 4000
-lr: 5e-4
-adam_betas: (0.9,0.98)
-
-criterion: label_smoothed_cross_entropy
-label_smoothing: 0.1
-
-dropout: 0.3
-attention-dropout: 0.1
-activation-dropout: 0.1
-
-activation-fn: relu
-encoder-normalize-before: False
-decoder-normalize-before: False
-encoder-embed-dim: 1024
-encoder-ffn-embed-dim: 4096
-encoder-layers: 6
-decoder-layers: 6
-encoder-attention-heads: 16
-
-decoder-embed-dim: 1024
-decoder-ffn-embed-dim: 4096
-decoder-attention-heads: 16
-
-#load-pretrained-encoder-from:
-#load-pretrained-decoder-from:
\ No newline at end of file
--- a/egs/wmt20/mt/conf/deep.yaml
+++ b/egs/wmt20/mt/conf/deep.yaml
-arch: transformer
-share-decoder-input-output-embed: True
-optimizer: adam
-clip-norm: 10.0
-lr-scheduler: inverse_sqrt
-warmup-init-lr: 1e-7
-warmup-updates: 16000
-lr: 2e-3
-adam_betas: (0.9,0.997)
-
-criterion: label_smoothed_cross_entropy
-label_smoothing: 0.1
-
-dropout: 0.1
-attention-dropout: 0.1
-activation-dropout: 0.1
-
-activation-fn: relu
-encoder-normalize-before: True
-decoder-normalize-before: True
-encoder-embed-dim: 512
-encoder-ffn-embed-dim: 2048
-encoder-layers: 30
-decoder-layers: 6
-encoder-attention-heads: 8
-
-decoder-embed-dim: 512
-decoder-ffn-embed-dim: 2048
-decoder-attention-heads: 8
-
-#load-pretrained-encoder-from:
-#load-pretrained-decoder-from:
\ No newline at end of file
--- a/egs/wmt20/mt/conf/dlcl.yaml
+++ b/egs/wmt20/mt/conf/dlcl.yaml
-use-enc-dlcl: True
-use-dec-dlcl: True
--- a/egs/wmt20/mt/conf/inter.yaml
+++ b/egs/wmt20/mt/conf/inter.yaml
-#ctc-weight: 0.2
-intermedia-ctc-weight: 0.3
-intermedia-ctc-layers: 10,20
-
-#target-ctc-weight: 0.3
-#target-ctc-layer: 6
-#target-intermedia-ctc-weight: 0.1
-#target-intermedia-ctc-layers: 2,4
-
-intermedia-adapter: league
-#intermedia-drop-prob: 0.2
-#intermedia-temperature: 5
-
-post-process: sentencepiece
\ No newline at end of file
--- a/egs/wmt20/mt/conf/rpr.yaml
+++ b/egs/wmt20/mt/conf/rpr.yaml
-encoder-attention-type: relative
-decoder-attention-type: relative
-max-encoder-relative-length: 8
-max-decoder-relative-length: 8
--- a/egs/wmt20/mt/decode.sh
+++ b/egs/wmt20/mt/decode.sh
-#!/usr/bin/env bash
-
-gpu_num=1
-
-data_dir=
-test_subset=(test)
-
-exp_name=
-if [ "$#" -eq 1 ]; then
-    exp_name=$1
-fi
-
-sacrebleu=0
-n_average=5
-beam_size=4
-len_penalty=0.6
-max_tokens=80000
-dec_model=checkpoint_best.pt
-
-cmd="./run.sh
-    --stage 2
-    --stop_stage 2
-    --gpu_num ${gpu_num}
-    --exp_name ${exp_name}
-    --sacrebleu ${sacrebleu}
-    --n_average ${n_average}
-    --beam_size ${beam_size}
-    --len_penalty ${len_penalty}
-    --max_tokens ${max_tokens}
-    --dec_model ${dec_model}
-    "
-
-if [[ -n ${data_dir} ]]; then
-    cmd="$cmd --data_dir ${data_dir}"
-fi
-if [[ -n ${test_subset} ]]; then
-    test_subset=`echo ${test_subset[*]} | sed 's/ /,/g'`
-    cmd="$cmd --test_subset ${test_subset}"
-fi
-
-echo $cmd
-eval $cmd
--- a/egs/wmt20/mt/local/detokenizer.perl
+++ b/egs/wmt20/mt/local/detokenizer.perl
-#!/usr/bin/env perl
-
-# $Id: detokenizer.perl 4134 2011-08-08 15:30:54Z bgottesman $
-# Sample De-Tokenizer
-# written by Josh Schroeder, based on code by Philipp Koehn
-# further modifications by Ondrej Bojar
-#
-# This file is part of moses.  Its use is licensed under the GNU Lesser General
-# Public License version 2.1 or, at your option, any later version.
-
-binmode(STDIN, ":utf8");
-binmode(STDOUT, ":utf8");
-
-use warnings;
-use strict;
-use utf8; # tell perl this script file is in UTF-8 (see all funny punct below)
-
-my $language = "en";
-my $QUIET = 0;
-my $HELP = 0;
-my $UPPERCASE_SENT = 0;
-my $PENN = 0;
-
-while (@ARGV) {
-	$_ = shift;
-	/^-b$/ && ($| = 1, next);
-	/^-l$/ && ($language = shift, next);
-	/^-q$/ && ($QUIET = 1, next);
-	/^-h$/ && ($HELP = 1, next);
-	/^-u$/ && ($UPPERCASE_SENT = 1, next);
-  /^-penn$/ && ($PENN = 1, next);
-}
-
-if ($HELP) {
-	print "Usage ./detokenizer.perl (-l [en|fr|it|cs|...]) < tokenizedfile > detokenizedfile\n";
-        print "Options:\n";
-        print "  -u     ... uppercase the first char in the final sentence.\n";
-        print "  -q     ... don't report detokenizer revision.\n";
-        print "  -b     ... disable Perl buffering.\n";
-        print "  -penn  ... assume input is tokenized as per tokenizer.perl's -penn option.\n";
-	exit;
-}
-
-if ($language !~ /^(cs|en|fr|it|fi)$/) {
-  print STDERR "Warning: No built-in rules for language $language.\n"
-}
-
-if ($PENN && $language ne "en") {
-  print STDERR "Error: -penn option only supported for English text.\n";
-  exit;
-}
-
-if (!$QUIET) {
-	print STDERR "Detokenizer Version ".'$Revision: 4134 $'."\n";
-	print STDERR "Language: $language\n";
-}
-
-while(<STDIN>) {
-	if (/^<.+>$/ || /^\s*$/) {
-		#don't try to detokenize XML/HTML tag lines
-		print $_;
-  } elsif ($PENN) {
-    print &detokenize_penn($_);
-  } else {
-		print &detokenize($_);
-	}
-}
-
-
-sub ucsecondarg {
-  # uppercase the second argument
-  my $arg1 = shift;
-  my $arg2 = shift;
-  return $arg1.uc($arg2);
-}
-
-sub deescape {
-  # de-escape special chars
-  my ($text) = @_;
-  $text =~ s/\&bar;/\|/g;   # factor separator (legacy)
-  $text =~ s/\&#124;/\|/g;  # factor separator
-  $text =~ s/\&lt;/\</g;    # xml
-  $text =~ s/\&gt;/\>/g;    # xml
-  $text =~ s/\&bra;/\[/g;   # syntax non-terminal (legacy)
-  $text =~ s/\&ket;/\]/g;   # syntax non-terminal (legacy)
-  $text =~ s/\&quot;/\"/g;  # xml
-  $text =~ s/\&apos;/\'/g;  # xml
-  $text =~ s/\&#91;/\[/g;   # syntax non-terminal
-  $text =~ s/\&#93;/\]/g;   # syntax non-terminal
-  $text =~ s/\&amp;/\&/g;   # escape escape
-  return $text;
-}
-
-sub detokenize {
-	my($text) = @_;
-	chomp($text);
-	$text = " $text ";
-  $text =~ s/ \@\-\@ /-/g;
-  $text = &deescape($text);
-
-	my $word;
-	my $i;
-	my @words = split(/ /,$text);
-	$text = "";
-	my %quoteCount =  ("\'"=>0,"\""=>0);
-	my $prependSpace = " ";
-	for ($i=0;$i<(scalar(@words));$i++) {		
-		if (&startsWithCJKChar($words[$i])) {
-		    if (($i > 0 && &endsWithCJKChar($words[$i-1])) && ($language ne "ko")) {
-			# perform left shift if this is a second consecutive CJK (Chinese/Japanese/Korean) word
-			$text=$text.$words[$i];
-		    } else {
-			# ... but do nothing special if this is a CJK word that doesn't follow a CJK word
-			$text=$text.$prependSpace.$words[$i];
-		    }
-		    $prependSpace = " ";
-		} elsif ($words[$i] =~ /^[\p{IsSc}\(\[\{\¿\¡]+$/) {
-			#perform right shift on currency and other random punctuation items
-			$text = $text.$prependSpace.$words[$i];
-			$prependSpace = "";
-		} elsif ($words[$i] =~ /^[\,\.\?\!\:\;\\\%\}\]\)]+$/){
-		    if (($language eq "fr") && ($words[$i] =~ /^[\?\!\:\;\\\%]$/)) {
-			#these punctuations are prefixed with a non-breakable space in french
-			$text .= " "; }
-			#perform left shift on punctuation items
-			$text=$text.$words[$i];
-			$prependSpace = " ";
-		} elsif (($language eq "en") && ($i>0) && ($words[$i] =~ /^[\'][\p{IsAlpha}]/) && ($words[$i-1] =~ /[\p{IsAlnum}]$/)) {
-			#left-shift the contraction for English
-			$text=$text.$words[$i];
-			$prependSpace = " ";
-		} elsif (($language eq "cs") && ($i>1) && ($words[$i-2] =~ /^[0-9]+$/) && ($words[$i-1] =~ /^[.,]$/) && ($words[$i] =~ /^[0-9]+$/)) {
-			#left-shift floats in Czech
-			$text=$text.$words[$i];
-			$prependSpace = " ";
-		}  elsif ((($language eq "fr") ||($language eq "it")) && ($i<=(scalar(@words)-2)) && ($words[$i] =~ /[\p{IsAlpha}][\']$/) && ($words[$i+1] =~ /^[\p{IsAlpha}]/)) {
-			#right-shift the contraction for French and Italian
-			$text = $text.$prependSpace.$words[$i];
-			$prependSpace = "";
-		} elsif (($language eq "cs") && ($i<(scalar(@words)-3))
-				&& ($words[$i] =~ /[\p{IsAlpha}]$/)
-				&& ($words[$i+1] =~ /^[-–]$/)
-				&& ($words[$i+2] =~ /^li$|^mail.*/i)
-				) {
-			#right-shift "-li" in Czech and a few Czech dashed words (e-mail)
-			$text = $text.$prependSpace.$words[$i].$words[$i+1];
-			$i++; # advance over the dash
-			$prependSpace = "";
-		} elsif ($words[$i] =~ /^[\'\"„“`]+$/) {
-			#combine punctuation smartly
-                        my $normalized_quo = $words[$i];
-                        $normalized_quo = '"' if $words[$i] =~ /^[„“”]+$/;
-                        $quoteCount{$normalized_quo} = 0
-                                if !defined $quoteCount{$normalized_quo};
-                        if ($language eq "cs" && $words[$i] eq "„") {
-                          # this is always the starting quote in Czech
-                          $quoteCount{$normalized_quo} = 0;
-                        }
-                        if ($language eq "cs" && $words[$i] eq "“") {
-                          # this is usually the ending quote in Czech
-                          $quoteCount{$normalized_quo} = 1;
-                        }
-			if (($quoteCount{$normalized_quo} % 2) eq 0) {
-				if(($language eq "en") && ($words[$i] eq "'") && ($i > 0) && ($words[$i-1] =~ /[s]$/)) {
-					#single quote for posesssives ending in s... "The Jones' house"
-					#left shift
-					$text=$text.$words[$i];
-					$prependSpace = " ";
-				} else {
-					#right shift
-					$text = $text.$prependSpace.$words[$i];
-					$prependSpace = "";
-					$quoteCount{$normalized_quo} ++;
-
-				}
-			} else {
-				#left shift
-				$text=$text.$words[$i];
-				$prependSpace = " ";
-				$quoteCount{$normalized_quo} ++;
-
-			}
-			
-        } elsif (($language eq "fi") && ($words[$i-1] =~ /:$/) && ($words[$i] =~ /^(N|n|A|a|Ä|ä|ssa|Ssa|ssä|Ssä|sta|stä|Sta|Stä|hun|Hun|hyn|Hyn|han|Han|hän|Hän|hön|Hön|un|Un|yn|Yn|an|An|än|Än|ön|Ön|seen|Seen|lla|Lla|llä|Llä|lta|Lta|ltä|Ltä|lle|Lle|ksi|Ksi|kse|Kse|tta|Tta|ine|Ine)(ni|si|mme|nne|nsa)?(ko|kö|han|hän|pa|pä|kaan|kään|kin)?$/)) {
-            # Finnish : without intervening space if followed by case suffix
-            # EU:N EU:n EU:ssa EU:sta EU:hun EU:iin ...
-            $text=$text. lc $words[$i];
-            $prependSpace = " ";
-		} else {
-			$text=$text.$prependSpace.$words[$i];
-			$prependSpace = " ";
-		}
-	}
-	
-	# clean up spaces at head and tail of each line as well as any double-spacing
-	$text =~ s/ +/ /g;
-	$text =~ s/\n /\n/g;
-	$text =~ s/ \n/\n/g;
-	$text =~ s/^ //g;
-	$text =~ s/ $//g;
-	
-	#add trailing break
-	$text .= "\n" unless $text =~ /\n$/;
-
-        $text =~ s/^([[:punct:]\s]*)([[:alpha:]])/ucsecondarg($1, $2)/e if $UPPERCASE_SENT;
-
-	return $text;
-}
-
-sub detokenize_penn {
-  my($text) = @_;
-
-  chomp($text);
-  $text = " $text ";
-  $text =~ s/ \@\-\@ /-/g;
-  $text =~ s/ \@\/\@ /\//g;
-  $text = &deescape($text);
-
-  # merge de-contracted forms except where the second word begins with an
-  # apostrophe (those are handled later)
-  $text =~ s/ n't /n't /g;
-  $text =~ s/ N'T /N'T /g;
-  $text =~ s/ ([Cc])an not / $1annot /g;
-  $text =~ s/ ([Dd])' ye / $1'ye /g;
-  $text =~ s/ ([Gg])im me / $1imme /g;
-  $text =~ s/ ([Gg])on na / $1onna /g;
-  $text =~ s/ ([Gg])ot ta / $1otta /g;
-  $text =~ s/ ([Ll])em me / $1emme /g;
-  $text =~ s/ '([Tt]) is / '$1is /g;
-  $text =~ s/ '([Tt]) was / '$1was /g;
-  $text =~ s/ ([Ww])an na / $1anna /g;
-
-  # restore brackets
-  $text =~ s/-LRB-/\(/g;
-  $text =~ s/-RRB-/\)/g;
-  $text =~ s/-LSB-/\[/g;
-  $text =~ s/-RSB-/\]/g;
-  $text =~ s/-LCB-/{/g;
-  $text =~ s/-RCB-/}/g;
-
-  my $i;
-  my @words = split(/ /,$text);
-  $text = "";
-  my $prependSpace = " ";
-  for ($i=0;$i<(scalar(@words));$i++) {
-    if ($words[$i] =~ /^[\p{IsSc}\(\[\{\¿\¡]+$/) {
-      # perform right shift on currency and other random punctuation items
-      $text = $text.$prependSpace.$words[$i];
-      $prependSpace = "";
-    } elsif ($words[$i] =~ /^[\,\.\?\!\:\;\\\%\}\]\)]+$/){
-      # perform left shift on punctuation items
-      $text=$text.$words[$i];
-      $prependSpace = " ";
-    } elsif (($i>0) && ($words[$i] =~ /^[\'][\p{IsAlpha}]/) && ($words[$i-1] =~ /[\p{IsAlnum}]$/)) {
-      # left-shift the contraction
-      $text=$text.$words[$i];
-      $prependSpace = " ";
-    } elsif ($words[$i] eq "`") { # Assume that punctuation has been normalized and is one of `, ``, ', '' only
-      # opening single quote: convert to straight quote and right-shift
-      $text = $text.$prependSpace."\'";
-      $prependSpace = "";
-    } elsif ($words[$i] eq "``") {
-      # opening double quote: convert to straight quote and right-shift
-      $text = $text.$prependSpace."\"";
-      $prependSpace = "";
-    } elsif ($words[$i] eq "\'") {
-      # closing single quote: convert to straight quote and left shift
-      $text = $text."\'";
-      $prependSpace = " ";
-    } elsif ($words[$i] eq "\'\'") {
-      # closing double quote: convert to straight quote and left shift
-      $text = $text."\"";
-      $prependSpace = " ";
-    } else {
-      $text = $text.$prependSpace.$words[$i];
-      $prependSpace = " ";
-    }
-  }
-
-  # clean up spaces at head and tail of each line as well as any double-spacing
-  $text =~ s/ +/ /g;
-  $text =~ s/\n /\n/g;
-  $text =~ s/ \n/\n/g;
-  $text =~ s/^ //g;
-  $text =~ s/ $//g;
-
-  # add trailing break
-  $text .= "\n" unless $text =~ /\n$/;
-
-  $text =~ s/^([[:punct:]\s]*)([[:alpha:]])/ucsecondarg($1, $2)/e if $UPPERCASE_SENT;
-
-  return $text;
-}
-
-sub startsWithCJKChar {
-    my ($str) = @_;
-    return 0 if length($str) == 0;
-    my $firstChar = substr($str, 0, 1);
-    return &charIsCJK($firstChar);
-}
-
-sub endsWithCJKChar {
-    my ($str) = @_;
-    return 0 if length($str) == 0;
-    my $lastChar = substr($str, length($str)-1, 1);
-    return &charIsCJK($lastChar);
-}
-
-# Given a string consisting of one character, returns true iff the character
-# is a CJK (Chinese/Japanese/Korean) character
-sub charIsCJK {
-    my ($char) = @_;
-    # $char should be a string of length 1
-    my $codepoint = &codepoint_dec($char);
-
-    # The following is based on http://en.wikipedia.org/wiki/Basic_Multilingual_Plane#Basic_Multilingual_Plane
-
-    # Hangul Jamo (1100–11FF)
-    return 1 if (&between_hexes($codepoint, '1100', '11FF'));
-
-    # CJK Radicals Supplement (2E80–2EFF)
-    # Kangxi Radicals (2F00–2FDF)
-    # Ideographic Description Characters (2FF0–2FFF)
-    # CJK Symbols and Punctuation (3000–303F)
-    # Hiragana (3040–309F)
-    # Katakana (30A0–30FF)
-    # Bopomofo (3100–312F)
-    # Hangul Compatibility Jamo (3130–318F)
-    # Kanbun (3190–319F)
-    # Bopomofo Extended (31A0–31BF)
-    # CJK Strokes (31C0–31EF)
-    # Katakana Phonetic Extensions (31F0–31FF)
-    # Enclosed CJK Letters and Months (3200–32FF)
-    # CJK Compatibility (3300–33FF)
-    # CJK Unified Ideographs Extension A (3400–4DBF)
-    # Yijing Hexagram Symbols (4DC0–4DFF)
-    # CJK Unified Ideographs (4E00–9FFF)
-    # Yi Syllables (A000–A48F)
-    # Yi Radicals (A490–A4CF)
-    return 1 if (&between_hexes($codepoint, '2E80', 'A4CF'));
-
-    # Phags-pa (A840–A87F)
-    return 1 if (&between_hexes($codepoint, 'A840', 'A87F'));
-
-    # Hangul Syllables (AC00–D7AF)
-    return 1 if (&between_hexes($codepoint, 'AC00', 'D7AF'));
-
-    # CJK Compatibility Ideographs (F900–FAFF)
-    return 1 if (&between_hexes($codepoint, 'F900', 'FAFF'));
-
-    # CJK Compatibility Forms (FE30–FE4F)
-    return 1 if (&between_hexes($codepoint, 'FE30', 'FE4F'));
-
-    # Range U+FF65–FFDC encodes halfwidth forms, of Katakana and Hangul characters
-    return 1 if (&between_hexes($codepoint, 'FF65', 'FFDC'));
-
-    # Supplementary Ideographic Plane 20000–2FFFF
-    return 1 if (&between_hexes($codepoint, '20000', '2FFFF'));
-
-    return 0;
-}
-
-# Returns the code point of a Unicode char, represented as a decimal number
-sub codepoint_dec {
-    if (my $char = shift) {
-	return unpack('U0U*', $char);
-    }
-}
-
-sub between_hexes {
-    my ($num, $left, $right) = @_;
-    return $num >= hex($left) && $num <= hex($right);
-}
--- a/egs/wmt20/mt/local/lower_rm.py
+++ b/egs/wmt20/mt/local/lower_rm.py
-import sys
-import string
-
-
-in_file = sys.argv[1]
-
-with open(in_file, "r", encoding="utf-8") as f:
-    for line in f.readlines():
-        line = line.strip().lower()
-        for w in string.punctuation:
-            if w != "'":
-                line = line.replace(w, "")
-        line = line.replace("  ", " ")
-        print(line)
--- a/egs/wmt20/mt/local/monitor.sh
+++ b/egs/wmt20/mt/local/monitor.sh
-gpu_num=4
-cmd="sh train.sh"
-
-while :
-do
-    record=$(mktemp -t temp.record.XXXXXX)
-    gpustat > $record
-    all_devices=$(seq 0 "$(sed '1,2d' ${record} | wc -l)");
-
-    count=0
-    for dev in ${all_devices[@]}
-    do
-        line=$((dev + 2))
-        use=$(head -n $line ${record} | tail -1 | cut -d '|' -f3 | cut -d '/' -f1)
-
-        if [[ $use -lt 100 ]]; then
-            device[$count]=$dev
-            count=$((count + 1))
-            if [[ $count -eq $gpu_num ]]; then
-                break
-            fi
-        fi
-    done
-    if [[ ${#device[@]} -lt $gpu_num ]]; then
-        sleep 60s
-    else
-        echo "Run $cmd"
-        eval $cmd
-        sleep 10s
-        exit
-    fi
-done
--- a/egs/wmt20/mt/local/multi-bleu.perl
+++ b/egs/wmt20/mt/local/multi-bleu.perl
-#!/usr/bin/env perl
-#
-# This file is part of moses.  Its use is licensed under the GNU Lesser General
-# Public License version 2.1 or, at your option, any later version.
-
-# $Id$
-use warnings;
-use strict;
-
-my $lowercase = 0;
-if ($ARGV[0] eq "-lc") {
-  $lowercase = 1;
-  shift;
-}
-
-my $stem = $ARGV[0];
-if (!defined $stem) {
-  print STDERR "usage: multi-bleu.pl [-lc] reference < hypothesis\n";
-  print STDERR "Reads the references from reference or reference0, reference1, ...\n";
-  exit(1);
-}
-
-$stem .= ".ref" if !-e $stem && !-e $stem."0" && -e $stem.".ref0";
-
-my @REF;
-my $ref=0;
-while(-e "$stem$ref") {
-    &add_to_ref("$stem$ref",\@REF);
-    $ref++;
-}
-&add_to_ref($stem,\@REF) if -e $stem;
-die("ERROR: could not find reference file $stem") unless scalar @REF;
-
-# add additional references explicitly specified on the command line
-shift;
-foreach my $stem (@ARGV) {
-    &add_to_ref($stem,\@REF) if -e $stem;
-}
-
-
-
-sub add_to_ref {
-    my ($file,$REF) = @_;
-    my $s=0;
-    if ($file =~ /.gz$/) {
-	open(REF,"gzip -dc $file|") or die "Can't read $file";
-    } else { 
-	open(REF,$file) or die "Can't read $file";
-    }
-    while(<REF>) {
-	chop;
-	push @{$$REF[$s++]}, $_;
-    }
-    close(REF);
-}
-
-my(@CORRECT,@TOTAL,$length_translation,$length_reference);
-my $s=0;
-while(<STDIN>) {
-    chop;
-    $_ = lc if $lowercase;
-    my @WORD = split;
-    my %REF_NGRAM = ();
-    my $length_translation_this_sentence = scalar(@WORD);
-    my ($closest_diff,$closest_length) = (9999,9999);
-    foreach my $reference (@{$REF[$s]}) {
-#      print "$s $_ <=> $reference\n";
-  $reference = lc($reference) if $lowercase;
-	my @WORD = split(' ',$reference);
-	my $length = scalar(@WORD);
-        my $diff = abs($length_translation_this_sentence-$length);
-	if ($diff < $closest_diff) {
-	    $closest_diff = $diff;
-	    $closest_length = $length;
-	    # print STDERR "$s: closest diff ".abs($length_translation_this_sentence-$length)." = abs($length_translation_this_sentence-$length), setting len: $closest_length\n";
-	} elsif ($diff == $closest_diff) {
-            $closest_length = $length if $length < $closest_length;
-            # from two references with the same closeness to me
-            # take the *shorter* into account, not the "first" one.
-        }
-	for(my $n=1;$n<=4;$n++) {
-	    my %REF_NGRAM_N = ();
-	    for(my $start=0;$start<=$#WORD-($n-1);$start++) {
-		my $ngram = "$n";
-		for(my $w=0;$w<$n;$w++) {
-		    $ngram .= " ".$WORD[$start+$w];
-		}
-		$REF_NGRAM_N{$ngram}++;
-	    }
-	    foreach my $ngram (keys %REF_NGRAM_N) {
-		if (!defined($REF_NGRAM{$ngram}) ||
-		    $REF_NGRAM{$ngram} < $REF_NGRAM_N{$ngram}) {
-		    $REF_NGRAM{$ngram} = $REF_NGRAM_N{$ngram};
-#	    print "$i: REF_NGRAM{$ngram} = $REF_NGRAM{$ngram}<BR>\n";
-		}
-	    }
-	}
-    }
-    $length_translation += $length_translation_this_sentence;
-    $length_reference += $closest_length;
-    for(my $n=1;$n<=4;$n++) {
-	my %T_NGRAM = ();
-	for(my $start=0;$start<=$#WORD-($n-1);$start++) {
-	    my $ngram = "$n";
-	    for(my $w=0;$w<$n;$w++) {
-		$ngram .= " ".$WORD[$start+$w];
-	    }
-	    $T_NGRAM{$ngram}++;
-	}
-	foreach my $ngram (keys %T_NGRAM) {
-	    $ngram =~ /^(\d+) /;
-	    my $n = $1;
-            # my $corr = 0;
-#	print "$i e $ngram $T_NGRAM{$ngram}<BR>\n";
-	    $TOTAL[$n] += $T_NGRAM{$ngram};
-	    if (defined($REF_NGRAM{$ngram})) {
-		if ($REF_NGRAM{$ngram} >= $T_NGRAM{$ngram}) {
-		    $CORRECT[$n] += $T_NGRAM{$ngram};
-                    # $corr =  $T_NGRAM{$ngram};
-#	    print "$i e correct1 $T_NGRAM{$ngram}<BR>\n";
-		}
-		else {
-		    $CORRECT[$n] += $REF_NGRAM{$ngram};
-                    # $corr =  $REF_NGRAM{$ngram};
-#	    print "$i e correct2 $REF_NGRAM{$ngram}<BR>\n";
-		}
-	    }
-            # $REF_NGRAM{$ngram} = 0 if !defined $REF_NGRAM{$ngram};
-            # print STDERR "$ngram: {$s, $REF_NGRAM{$ngram}, $T_NGRAM{$ngram}, $corr}\n"
-	}
-    }
-    $s++;
-}
-my $brevity_penalty = 1;
-my $bleu = 0;
-
-my @bleu=();
-
-for(my $n=1;$n<=4;$n++) {
-  if (defined ($TOTAL[$n])){
-    $bleu[$n]=($TOTAL[$n])?$CORRECT[$n]/$TOTAL[$n]:0;
-    # print STDERR "CORRECT[$n]:$CORRECT[$n] TOTAL[$n]:$TOTAL[$n]\n";
-  }else{
-    $bleu[$n]=0;
-  }
-}
-
-if ($length_reference==0){
-  printf "BLEU = 0, 0/0/0/0 (BP=0, ratio=0, hyp_len=0, ref_len=0)\n";
-  exit(1);
-}
-
-if ($length_translation<$length_reference) {
-  $brevity_penalty = exp(1-$length_reference/$length_translation);
-}
-$bleu = $brevity_penalty * exp((my_log( $bleu[1] ) +
-				my_log( $bleu[2] ) +
-				my_log( $bleu[3] ) +
-				my_log( $bleu[4] ) ) / 4) ;
-printf "BLEU = %.2f, %.1f/%.1f/%.1f/%.1f (BP=%.3f, ratio=%.3f, hyp_len=%d, ref_len=%d)\n",
-    100*$bleu,
-    100*$bleu[1],
-    100*$bleu[2],
-    100*$bleu[3],
-    100*$bleu[4],
-    $brevity_penalty,
-    $length_translation / $length_reference,
-    $length_translation,
-    $length_reference;
-
-sub my_log {
-  return -9999999999 unless $_[0];
-  return log($_[0]);
-}
--- a/egs/wmt20/mt/local/parse_options.sh
+++ b/egs/wmt20/mt/local/parse_options.sh
-#!/usr/bin/env bash
-
-# Copyright 2012  Johns Hopkins University (Author: Daniel Povey);
-#                 Arnab Ghoshal, Karel Vesely
-
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#  http://www.apache.org/licenses/LICENSE-2.0
-#
-# THIS CODE IS PROVIDED *AS IS* BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-# KIND, EITHER EXPRESS OR IMPLIED, INCLUDING WITHOUT LIMITATION ANY IMPLIED
-# WARRANTIES OR CONDITIONS OF TITLE, FITNESS FOR A PARTICULAR PURPOSE,
-# MERCHANTABLITY OR NON-INFRINGEMENT.
-# See the Apache 2 License for the specific language governing permissions and
-# limitations under the License.
-
-
-# Parse command-line options.
-# To be sourced by another script (as in ". parse_options.sh").
-# Option format is: --option-name arg
-# and shell variable "option_name" gets set to value "arg."
-# The exception is --help, which takes no arguments, but prints the
-# $help_message variable (if defined).
-
-
-###
-### The --config file options have lower priority to command line
-### options, so we need to import them first...
-###
-
-# Now import all the configs specified by command-line, in left-to-right order
-for ((argpos=1; argpos<$#; argpos++)); do
-  if [ "${!argpos}" == "--config" ]; then
-    argpos_plus1=$((argpos+1))
-    config=${!argpos_plus1}
-    [ ! -r $config ] && echo "$0: missing config '$config'" && exit 1
-    . $config  # source the config file.
-  fi
-done
-
-
-###
-### Now we process the command line options
-###
-while true; do
-  [ -z "${1:-}" ] && break;  # break if there are no arguments
-  case "$1" in
-    # If the enclosing script is called with --help option, print the help
-    # message and exit.  Scripts should put help messages in $help_message
-    --help|-h) if [ -z "$help_message" ]; then echo "No help found." 1>&2;
-      else printf "$help_message\n" 1>&2 ; fi;
-      exit 0 ;;
-    --*=*) echo "$0: options to scripts must be of the form --name value, got '$1'"
-      exit 1 ;;
-    # If the first command-line argument begins with "--" (e.g. --foo-bar),
-    # then work out the variable name as $name, which will equal "foo_bar".
-    --*) name=`echo "$1" | sed s/^--// | sed s/-/_/g`;
-      # Next we test whether the variable in question is undefned-- if so it's
-      # an invalid option and we die.  Note: $0 evaluates to the name of the
-      # enclosing script.
-      # The test [ -z ${foo_bar+xxx} ] will return true if the variable foo_bar
-      # is undefined.  We then have to wrap this test inside "eval" because
-      # foo_bar is itself inside a variable ($name).
-      eval '[ -z "${'$name'+xxx}" ]' && echo "$0: invalid option $1" 1>&2 && exit 1;
-
-      oldval="`eval echo \\$$name`";
-      # Work out whether we seem to be expecting a Boolean argument.
-      if [ "$oldval" == "true" ] || [ "$oldval" == "false" ]; then
-        was_bool=true;
-      else
-        was_bool=false;
-      fi
-
-      # Set the variable to the right value-- the escaped quotes make it work if
-      # the option had spaces, like --cmd "queue.pl -sync y"
-      eval $name=\"$2\";
-
-      # Check that Boolean-valued arguments are really Boolean.
-      if $was_bool && [[ "$2" != "true" && "$2" != "false" ]]; then
-        echo "$0: expected \"true\" or \"false\": $1 $2" 1>&2
-        exit 1;
-      fi
-      shift 2;
-      ;;
-  *) break;
-  esac
-done
-
-
-# Check for an empty argument to the --cmd option, which can easily occur as a
-# result of scripting errors.
-[ ! -z "${cmd+xxx}" ] && [ -z "$cmd" ] && echo "$0: empty argument to --cmd option" 1>&2 && exit 1;
-
-
-true; # so this script returns exit code 0.
--- a/egs/wmt20/mt/local/replace-unicode-punctuation.perl
+++ b/egs/wmt20/mt/local/replace-unicode-punctuation.perl
-#!/usr/bin/env perl
-#
-# This file is part of moses.  Its use is licensed under the GNU Lesser General
-# Public License version 2.1 or, at your option, any later version.
-
-use warnings;
-use strict;
-
-#binmode(STDIN, ":utf8");
-#binmode(STDOUT, ":utf8");
-
-while(<STDIN>) {
-  s/，/,/g;
-  s/。 */. /g;
-  s/、/,/g;
-  s/”/"/g;
-  s/“/"/g;
-  s/∶/:/g;
-  s/：/:/g;
-  s/？/\?/g;
-  s/《/"/g;
-  s/》/"/g;
-  s/）/\)/g;
-  s/！/\!/g;
-  s/（/\(/g;
-  s/；/;/g;
-  s/１/"/g;
-  s/」/"/g;
-  s/「/"/g;
-  s/０/0/g;
-  s/３/3/g;
-  s/２/2/g;
-  s/５/5/g;
-  s/６/6/g;
-  s/９/9/g;
-  s/７/7/g;
-  s/８/8/g;
-  s/４/4/g;
-  s/． */. /g;
-  s/～/\~/g;
-  s/’/\'/g;
-  s/…/\.\.\./g;
-  s/━/\-/g;
-  s/〈/\</g;
-  s/〉/\>/g;
-  s/【/\[/g;
-  s/】/\]/g;
-  s/％/\%/g;
-  print $_;
-}
--- a/egs/wmt20/mt/local/tokenizer.perl
+++ b/egs/wmt20/mt/local/tokenizer.perl
-#!/usr/bin/env perl
-#
-# This file is part of moses.  Its use is licensed under the GNU Lesser General
-# Public License version 2.1 or, at your option, any later version.
-
-use warnings;
-
-# Sample Tokenizer
-### Version 1.1
-# written by Pidong Wang, based on the code written by Josh Schroeder and Philipp Koehn
-# Version 1.1 updates:
-#       (1) add multithreading option "-threads NUM_THREADS" (default is 1);
-#       (2) add a timing option "-time" to calculate the average speed of this tokenizer;
-#       (3) add an option "-lines NUM_SENTENCES_PER_THREAD" to set the number of lines for each thread (default is 2000), and this option controls the memory amount needed: the larger this number is, the larger memory is required (the higher tokenization speed);
-### Version 1.0
-# $Id: tokenizer.perl 915 2009-08-10 08:15:49Z philipp $
-# written by Josh Schroeder, based on code by Philipp Koehn
-
-binmode(STDIN, ":utf8");
-binmode(STDOUT, ":utf8");
-
-use warnings;
-use FindBin qw($RealBin);
-use strict;
-use Time::HiRes;
-
-if  (eval {require Thread;1;}) {
-  #module loaded
-  Thread->import();
-}
-
-my $mydir = "$RealBin/nonbreaking_prefixes";
-
-my %NONBREAKING_PREFIX = ();
-my @protected_patterns = ();
-my $protected_patterns_file = "";
-my $language = "en";
-my $QUIET = 0;
-my $HELP = 0;
-my $AGGRESSIVE = 0;
-my $SKIP_XML = 0;
-my $TIMING = 0;
-my $NUM_THREADS = 1;
-my $NUM_SENTENCES_PER_THREAD = 2000;
-my $PENN = 0;
-my $NO_ESCAPING = 0;
-while (@ARGV)
-{
-	$_ = shift;
-	/^-b$/ && ($| = 1, next);
-	/^-l$/ && ($language = shift, next);
-	/^-q$/ && ($QUIET = 1, next);
-	/^-h$/ && ($HELP = 1, next);
-	/^-x$/ && ($SKIP_XML = 1, next);
-	/^-a$/ && ($AGGRESSIVE = 1, next);
-	/^-time$/ && ($TIMING = 1, next);
-  # Option to add list of regexps to be protected
-  /^-protected/ && ($protected_patterns_file = shift, next);
-	/^-threads$/ && ($NUM_THREADS = int(shift), next);
-	/^-lines$/ && ($NUM_SENTENCES_PER_THREAD = int(shift), next);
-	/^-penn$/ && ($PENN = 1, next);
-	/^-no-escape/ && ($NO_ESCAPING = 1, next);
-}
-
-# for time calculation
-my $start_time;
-if ($TIMING)
-{
-    $start_time = [ Time::HiRes::gettimeofday( ) ];
-}
-
-# print help message
-if ($HELP)
-{
-	print "Usage ./tokenizer.perl (-l [en|de|...]) (-threads 4) < textfile > tokenizedfile\n";
-        print "Options:\n";
-        print "  -q     ... quiet.\n";
-        print "  -a     ... aggressive hyphen splitting.\n";
-        print "  -b     ... disable Perl buffering.\n";
-        print "  -time  ... enable processing time calculation.\n";
-        print "  -penn  ... use Penn treebank-like tokenization.\n";
-        print "  -protected FILE  ... specify file with patters to be protected in tokenisation.\n";
-	print "  -no-escape ... don't perform HTML escaping on apostrophy, quotes, etc.\n";
-	exit;
-}
-
-if (!$QUIET)
-{
-	print STDERR "Tokenizer Version 1.1\n";
-	print STDERR "Language: $language\n";
-	print STDERR "Number of threads: $NUM_THREADS\n";
-}
-
-# load the language-specific non-breaking prefix info from files in the directory nonbreaking_prefixes
-load_prefixes($language,\%NONBREAKING_PREFIX);
-
-if (scalar(%NONBREAKING_PREFIX) eq 0)
-{
-	print STDERR "Warning: No known abbreviations for language '$language'\n";
-}
-
-# Load protected patterns
-if ($protected_patterns_file)
-{
-  open(PP,$protected_patterns_file) || die "Unable to open $protected_patterns_file";
-  while(<PP>) {
-    chomp;
-    push @protected_patterns, $_;
-  }
-}
-
-my @batch_sentences = ();
-my @thread_list = ();
-my $count_sentences = 0;
-
-if ($NUM_THREADS > 1)
-{# multi-threading tokenization
-    while(<STDIN>)
-    {
-        $count_sentences = $count_sentences + 1;
-        push(@batch_sentences, $_);
-        if (scalar(@batch_sentences)>=($NUM_SENTENCES_PER_THREAD*$NUM_THREADS))
-        {
-            # assign each thread work
-            for (my $i=0; $i<$NUM_THREADS; $i++)
-            {
-                my $start_index = $i*$NUM_SENTENCES_PER_THREAD;
-                my $end_index = $start_index+$NUM_SENTENCES_PER_THREAD-1;
-                my @subbatch_sentences = @batch_sentences[$start_index..$end_index];
-                my $new_thread = new Thread \&tokenize_batch, @subbatch_sentences;
-                push(@thread_list, $new_thread);
-            }
-            foreach (@thread_list)
-            {
-                my $tokenized_list = $_->join;
-                foreach (@$tokenized_list)
-                {
-                    print $_;
-                }
-            }
-            # reset for the new run
-            @thread_list = ();
-            @batch_sentences = ();
-        }
-    }
-    # the last batch
-    if (scalar(@batch_sentences)>0)
-    {
-        # assign each thread work
-        for (my $i=0; $i<$NUM_THREADS; $i++)
-        {
-            my $start_index = $i*$NUM_SENTENCES_PER_THREAD;
-            if ($start_index >= scalar(@batch_sentences))
-            {
-                last;
-            }
-            my $end_index = $start_index+$NUM_SENTENCES_PER_THREAD-1;
-            if ($end_index >= scalar(@batch_sentences))
-            {
-                $end_index = scalar(@batch_sentences)-1;
-            }
-            my @subbatch_sentences = @batch_sentences[$start_index..$end_index];
-            my $new_thread = new Thread \&tokenize_batch, @subbatch_sentences;
-            push(@thread_list, $new_thread);
-        }
-        foreach (@thread_list)
-        {
-            my $tokenized_list = $_->join;
-            foreach (@$tokenized_list)
-            {
-                print $_;
-            }
-        }
-    }
-}
-else
-{# single thread only
-    while(<STDIN>)
-    {
-        if (($SKIP_XML && /^<.+>$/) || /^\s*$/)
-        {
-            #don't try to tokenize XML/HTML tag lines
-            print $_;
-        }
-        else
-        {
-            print &tokenize($_);
-        }
-    }
-}
-
-if ($TIMING)
-{
-    my $duration = Time::HiRes::tv_interval( $start_time );
-    print STDERR ("TOTAL EXECUTION TIME: ".$duration."\n");
-    print STDERR ("TOKENIZATION SPEED: ".($duration/$count_sentences*1000)." milliseconds/line\n");
-}
-
-#####################################################################################
-# subroutines afterward
-
-# tokenize a batch of texts saved in an array
-# input: an array containing a batch of texts
-# return: another array containing a batch of tokenized texts for the input array
-sub tokenize_batch
-{
-    my(@text_list) = @_;
-    my(@tokenized_list) = ();
-    foreach (@text_list)
-    {
-        if (($SKIP_XML && /^<.+>$/) || /^\s*$/)
-        {
-            #don't try to tokenize XML/HTML tag lines
-            push(@tokenized_list, $_);
-        }
-        else
-        {
-            push(@tokenized_list, &tokenize($_));
-        }
-    }
-    return \@tokenized_list;
-}
-
-# the actual tokenize function which tokenizes one input string
-# input: one string
-# return: the tokenized string for the input string
-sub tokenize
-{
-    my($text) = @_;
-
-    if ($PENN) {
-      return tokenize_penn($text);
-    }
-
-    chomp($text);
-    $text = " $text ";
-
-    # remove ASCII junk
-    $text =~ s/\s+/ /g;
-    $text =~ s/[\000-\037]//g;
-
-    # Find protected patterns
-    my @protected = ();
-    foreach my $protected_pattern (@protected_patterns) {
-      my $t = $text;
-      while ($t =~ /(?<PATTERN>$protected_pattern)(?<TAIL>.*)$/) {
-        push @protected, $+{PATTERN};
-        $t = $+{TAIL};
-      }
-    }
-
-    for (my $i = 0; $i < scalar(@protected); ++$i) {
-      my $subst = sprintf("THISISPROTECTED%.3d", $i);
-      $text =~ s,\Q$protected[$i], $subst ,g;
-    }
-    $text =~ s/ +/ /g;
-    $text =~ s/^ //g;
-    $text =~ s/ $//g;
-
-    # separate out all "other" special characters
-    if (($language eq "fi") or ($language eq "sv")) {
-        # in Finnish and Swedish, the colon can be used inside words as an apostrophe-like character:
-        # USA:n, 20:een, EU:ssa, USA:s, S:t
-        $text =~ s/([^\p{IsAlnum}\s\.\:\'\`\,\-])/ $1 /g;
-        # if a colon is not immediately followed by lower-case characters, separate it out anyway
-        $text =~ s/(:)(?=$|[^\p{Ll}])/ $1 /g;
-    }
-    else {
-        $text =~ s/([^\p{IsAlnum}\s\.\'\`\,\-])/ $1 /g;
-    }
-
-    # aggressive hyphen splitting
-    if ($AGGRESSIVE)
-    {
-        $text =~ s/([\p{IsAlnum}])\-(?=[\p{IsAlnum}])/$1 \@-\@ /g;
-    }
-
-    #multi-dots stay together
-    $text =~ s/\.([\.]+)/ DOTMULTI$1/g;
-    while($text =~ /DOTMULTI\./)
-    {
-        $text =~ s/DOTMULTI\.([^\.])/DOTDOTMULTI $1/g;
-        $text =~ s/DOTMULTI\./DOTDOTMULTI/g;
-    }
-
-    # seperate out "," except if within numbers (5,300)
-    #$text =~ s/([^\p{IsN}])[,]([^\p{IsN}])/$1 , $2/g;
-
-    # separate out "," except if within numbers (5,300)
-    # previous "global" application skips some:  A,B,C,D,E > A , B,C , D,E
-    # first application uses up B so rule can't see B,C
-    # two-step version here may create extra spaces but these are removed later
-    # will also space digit,letter or letter,digit forms (redundant with next section)
-    $text =~ s/([^\p{IsN}])[,]/$1 , /g;
-    $text =~ s/[,]([^\p{IsN}])/ , $1/g;
-    
-    # separate "," after a number if it's the end of a sentence
-    $text =~ s/([\p{IsN}])[,]$/$1 ,/g;
-
-    # separate , pre and post number
-    #$text =~ s/([\p{IsN}])[,]([^\p{IsN}])/$1 , $2/g;
-    #$text =~ s/([^\p{IsN}])[,]([\p{IsN}])/$1 , $2/g;
-
-    # turn `into '
-    #$text =~ s/\`/\'/g;
-
-    #turn '' into "
-    #$text =~ s/\'\'/ \" /g;
-
-    if ($language eq "en")
-    {
-        #split contractions right
-        $text =~ s/([^\p{IsAlpha}])[']([^\p{IsAlpha}])/$1 ' $2/g;
-        $text =~ s/([^\p{IsAlpha}\p{IsN}])[']([\p{IsAlpha}])/$1 ' $2/g;
-        $text =~ s/([\p{IsAlpha}])[']([^\p{IsAlpha}])/$1 ' $2/g;
-        $text =~ s/([\p{IsAlpha}])[']([\p{IsAlpha}])/$1 '$2/g;
-        #special case for "1990's"
-        $text =~ s/([\p{IsN}])[']([s])/$1 '$2/g;
-    }
-    elsif (($language eq "fr") or ($language eq "it") or ($language eq "ga"))
-    {
-        #split contractions left
-        $text =~ s/([^\p{IsAlpha}])[']([^\p{IsAlpha}])/$1 ' $2/g;
-        $text =~ s/([^\p{IsAlpha}])[']([\p{IsAlpha}])/$1 ' $2/g;
-        $text =~ s/([\p{IsAlpha}])[']([^\p{IsAlpha}])/$1 ' $2/g;
-        $text =~ s/([\p{IsAlpha}])[']([\p{IsAlpha}])/$1' $2/g;
-    }
-    else
-    {
-        $text =~ s/\'/ \' /g;
-    }
-
-    #word token method
-    my @words = split(/\s/,$text);
-    $text = "";
-    for (my $i=0;$i<(scalar(@words));$i++)
-    {
-        my $word = $words[$i];
-        if ( $word =~ /^(\S+)\.$/)
-        {
-            my $pre = $1;
-            if (($pre =~ /\./ && $pre =~ /\p{IsAlpha}/) || ($NONBREAKING_PREFIX{$pre} && $NONBREAKING_PREFIX{$pre}==1) || ($i<scalar(@words)-1 && ($words[$i+1] =~ /^[\p{IsLower}]/)))
-            {
-                #no change
-			}
-            elsif (($NONBREAKING_PREFIX{$pre} && $NONBREAKING_PREFIX{$pre}==2) && ($i<scalar(@words)-1 && ($words[$i+1] =~ /^[0-9]+/)))
-            {
-                #no change
-            }
-            else
-            {
-                $word = $pre." .";
-            }
-        }
-        $text .= $word." ";
-    }
-
-    # clean up extraneous spaces
-    $text =~ s/ +/ /g;
-    $text =~ s/^ //g;
-    $text =~ s/ $//g;
-
-    # .' at end of sentence is missed
-    $text =~ s/\.\' ?$/ . ' /;
-
-    # restore protected
-    for (my $i = 0; $i < scalar(@protected); ++$i) {
-      my $subst = sprintf("THISISPROTECTED%.3d", $i);
-      $text =~ s/$subst/$protected[$i]/g;
-    }
-
-    #restore multi-dots
-    while($text =~ /DOTDOTMULTI/)
-    {
-        $text =~ s/DOTDOTMULTI/DOTMULTI./g;
-    }
-    $text =~ s/DOTMULTI/./g;
-
-    #escape special chars
-    if (!$NO_ESCAPING)
-      {
-	$text =~ s/\&/\&amp;/g;   # escape escape
-	$text =~ s/\|/\&#124;/g;  # factor separator
-	$text =~ s/\</\&lt;/g;    # xml
-	$text =~ s/\>/\&gt;/g;    # xml
-	$text =~ s/\'/\&apos;/g;  # xml
-	$text =~ s/\"/\&quot;/g;  # xml
-	$text =~ s/\[/\&#91;/g;   # syntax non-terminal
-	$text =~ s/\]/\&#93;/g;   # syntax non-terminal
-      }
-
-    #ensure final line break
-    $text .= "\n" unless $text =~ /\n$/;
-
-    return $text;
-}
-
-sub tokenize_penn
-{
-    # Improved compatibility with Penn Treebank tokenization.  Useful if
-    # the text is to later be parsed with a PTB-trained parser.
-    #
-    # Adapted from Robert MacIntyre's sed script:
-    #   http://www.cis.upenn.edu/~treebank/tokenizer.sed
-
-    my($text) = @_;
-    chomp($text);
-
-    # remove ASCII junk
-    $text =~ s/\s+/ /g;
-    $text =~ s/[\000-\037]//g;
-
-    # attempt to get correct directional quotes
-    $text =~ s/^``/`` /g;
-    $text =~ s/^"/`` /g;
-    $text =~ s/^`([^`])/` $1/g;
-    $text =~ s/^'/`  /g;
-    $text =~ s/([ ([{<])"/$1 `` /g;
-    $text =~ s/([ ([{<])``/$1 `` /g;
-    $text =~ s/([ ([{<])`([^`])/$1 ` $2/g;
-    $text =~ s/([ ([{<])'/$1 ` /g;
-    # close quotes handled at end
-
-    $text =~ s=\.\.\.= _ELLIPSIS_ =g;
-
-    # separate out "," except if within numbers (5,300)
-    $text =~ s/([^\p{IsN}])[,]([^\p{IsN}])/$1 , $2/g;
-    # separate , pre and post number
-    $text =~ s/([\p{IsN}])[,]([^\p{IsN}])/$1 , $2/g;
-    $text =~ s/([^\p{IsN}])[,]([\p{IsN}])/$1 , $2/g;
-
-    #$text =~ s=([;:@#\$%&\p{IsSc}])= $1 =g;
-$text =~ s=([;:@#\$%&\p{IsSc}\p{IsSo}])= $1 =g;
-
-    # Separate out intra-token slashes.  PTB tokenization doesn't do this, so
-    # the tokens should be merged prior to parsing with a PTB-trained parser
-    # (see syntax-hyphen-splitting.perl).
-    $text =~ s/([\p{IsAlnum}])\/([\p{IsAlnum}])/$1 \@\/\@ $2/g;
-
-    # Assume sentence tokenization has been done first, so split FINAL periods
-    # only.
-    $text =~ s=([^.])([.])([\]\)}>"']*) ?$=$1 $2$3 =g;
-    # however, we may as well split ALL question marks and exclamation points,
-    # since they shouldn't have the abbrev.-marker ambiguity problem
-    $text =~ s=([?!])= $1 =g;
-
-    # parentheses, brackets, etc.
-    $text =~ s=([\]\[\(\){}<>])= $1 =g;
-    $text =~ s/\(/-LRB-/g;
-    $text =~ s/\)/-RRB-/g;
-    $text =~ s/\[/-LSB-/g;
-    $text =~ s/\]/-RSB-/g;
-    $text =~ s/{/-LCB-/g;
-    $text =~ s/}/-RCB-/g;
-
-    $text =~ s=--= -- =g;
-
-    # First off, add a space to the beginning and end of each line, to reduce
-    # necessary number of regexps.
-    $text =~ s=$= =;
-    $text =~ s=^= =;
-
-    $text =~ s="= '' =g;
-    # possessive or close-single-quote
-    $text =~ s=([^'])' =$1 ' =g;
-    # as in it's, I'm, we'd
-    $text =~ s='([sSmMdD]) = '$1 =g;
-    $text =~ s='ll = 'll =g;
-    $text =~ s='re = 're =g;
-    $text =~ s='ve = 've =g;
-    $text =~ s=n't = n't =g;
-    $text =~ s='LL = 'LL =g;
-    $text =~ s='RE = 'RE =g;
-    $text =~ s='VE = 'VE =g;
-    $text =~ s=N'T = N'T =g;
-
-    $text =~ s= ([Cc])annot = $1an not =g;
-    $text =~ s= ([Dd])'ye = $1' ye =g;
-    $text =~ s= ([Gg])imme = $1im me =g;
-    $text =~ s= ([Gg])onna = $1on na =g;
-    $text =~ s= ([Gg])otta = $1ot ta =g;
-    $text =~ s= ([Ll])emme = $1em me =g;
-    $text =~ s= ([Mm])ore'n = $1ore 'n =g;
-    $text =~ s= '([Tt])is = '$1 is =g;
-    $text =~ s= '([Tt])was = '$1 was =g;
-    $text =~ s= ([Ww])anna = $1an na =g;
-
-    #word token method
-    my @words = split(/\s/,$text);
-    $text = "";
-    for (my $i=0;$i<(scalar(@words));$i++)
-    {
-        my $word = $words[$i];
-        if ( $word =~ /^(\S+)\.$/)
-        {
-            my $pre = $1;
-            if (($pre =~ /\./ && $pre =~ /\p{IsAlpha}/) || ($NONBREAKING_PREFIX{$pre} && $NONBREAKING_PREFIX{$pre}==1) || ($i<scalar(@words)-1 && ($words[$i+1] =~ /^[\p{IsLower}]/)))
-            {
-                #no change
-            }
-            elsif (($NONBREAKING_PREFIX{$pre} && $NONBREAKING_PREFIX{$pre}==2) && ($i<scalar(@words)-1 && ($words[$i+1] =~ /^[0-9]+/)))
-            {
-                #no change
-            }
-            else
-            {
-                $word = $pre." .";
-            }
-        }
-        $text .= $word." ";
-    }
-
-    # restore ellipses
-    $text =~ s=_ELLIPSIS_=\.\.\.=g;
-
-    # clean out extra spaces
-    $text =~ s=  *= =g;
-    $text =~ s=^ *==g;
-    $text =~ s= *$==g;
-
-    #escape special chars
-    $text =~ s/\&/\&amp;/g;   # escape escape
-    $text =~ s/\|/\&#124;/g;  # factor separator
-    $text =~ s/\</\&lt;/g;    # xml
-    $text =~ s/\>/\&gt;/g;    # xml
-    $text =~ s/\'/\&apos;/g;  # xml
-    $text =~ s/\"/\&quot;/g;  # xml
-    $text =~ s/\[/\&#91;/g;   # syntax non-terminal
-    $text =~ s/\]/\&#93;/g;   # syntax non-terminal
-
-    #ensure final line break
-    $text .= "\n" unless $text =~ /\n$/;
-
-    return $text;
-}
-
-sub load_prefixes
-{
-    my ($language, $PREFIX_REF) = @_;
-
-    my $prefixfile = "$mydir/nonbreaking_prefix.$language";
-
-    #default back to English if we don't have a language-specific prefix file
-    if (!(-e $prefixfile))
-    {
-        $prefixfile = "$mydir/nonbreaking_prefix.en";
-        print STDERR "WARNING: No known abbreviations for language '$language', attempting fall-back to English version...\n";
-        die ("ERROR: No abbreviations files found in $mydir\n") unless (-e $prefixfile);
-    }
-
-    if (-e "$prefixfile")
-    {
-        open(PREFIX, "<:utf8", "$prefixfile");
-        while (<PREFIX>)
-        {
-            my $item = $_;
-            chomp($item);
-            if (($item) && (substr($item,0,1) ne "#"))
-            {
-                if ($item =~ /(.*)[\s]+(\#NUMERIC_ONLY\#)/)
-                {
-                    $PREFIX_REF->{$1} = 2;
-                }
-                else
-                {
-                    $PREFIX_REF->{$item} = 1;
-                }
-            }
-        }
-        close(PREFIX);
-    }
-}
--- a/egs/wmt20/mt/local/utils.sh
+++ b/egs/wmt20/mt/local/utils.sh
-
-get_devices(){
-    gpu_num=$1
-    use_cpu=$2
-    device=()
-    while :
-    do
-        record=$(mktemp -t temp.record.XXXXXX)
-        gpustat > $record
-        all_devices=$(seq 0 "$(sed '1,2d' ${record} | wc -l)");
-
-        count=0
-        for dev in ${all_devices[@]}
-        do
-            line=$((dev + 2))
-            use=$(head -n $line ${record} | tail -1 | cut -d '|' -f3 | cut -d '/' -f1)
-            if [[ $use -lt 100 ]]; then
-                device[$count]=$dev
-                count=$((count + 1))
-                if [[ $count -eq $gpu_num ]]; then
-                    break
-                fi
-            fi
-        done
-        if [[ ${#device[@]} -lt $gpu_num ]]; then
-            if [[ $use_cpu -eq 1 ]]; then
-                device=(-1)
-            else
-                sleep 60s
-            fi
-        else
-            break
-        fi
-    done
-
-    echo ${device[*]} | sed 's/ /,/g'
-    return $?
-}
-
-
--- a/egs/wmt20/mt/local/wmt_en2de_multi_bleu.sh
+++ b/egs/wmt20/mt/local/wmt_en2de_multi_bleu.sh
-#!/usr/bin/env bash
-
-# calculate wmt14 en-de multi-bleu score
-
-if [ $# -ne 1 ]; then
-    echo "usage: $0 GENERATE_PY_OUTPUT"
-    exit 1
-fi
-echo -e "\n RUN >> "$0
-
-requirement_scripts=(detokenizer.perl replace-unicode-punctuation.perl tokenizer.perl multi-bleu.perl)
-for script in ${requirement_scripts[@]}; do
-    if ! which ${script} > /dev/null; then
-        echo "Error: it seems that moses is not installed or exported int the environment variables." >&2
-        return 1
-    fi
-done
-
-detokenizer=detokenizer.perl
-replace_unicode_punctuation=replace-unicode-punctuation.perl
-tokenizer=tokenizer.perl
-multi_bleu=multi-bleu.perl
-
-GEN=$1
-SYS=$GEN.sys
-REF=$GEN.ref
-
-cat $GEN | cut -f 3 > $REF
-cat $GEN | cut -f 4 > $SYS
-
-#detokenize the decodes file to format the manner to do tokenize
-$detokenizer -l de < $SYS > $SYS.dtk
-$detokenizer -l de < $REF > $REF.dtk
-
-#replace unicode
-$replace_unicode_punctuation -l de < $SYS.dtk > $SYS.dtk.punc
-$replace_unicode_punctuation -l de < $REF.dtk > $REF.dtk.punc
-
-#tokenize the decodes file by moses tokenizer.perl
-$tokenizer -l de < $SYS.dtk.punc > $SYS.dtk.punc.tok
-$tokenizer -l de < $REF.dtk.punc > $REF.dtk.punc.tok
-
-#"rich-text format" --> rich ##AT##-##AT## text format.
-perl -ple 's{(\S)-(\S)}{$1 ##AT##-##AT## $2}g' < $SYS.dtk.punc.tok > $SYS.dtk.punc.tok.atat
-perl -ple 's{(\S)-(\S)}{$1 ##AT##-##AT## $2}g' < $REF.dtk.punc.tok > $REF.dtk.punc.tok.atat
-
-$multi_bleu $REF.dtk.punc.tok.atat < $SYS.dtk.punc.tok.atat
-
-rm -f $SYS.dtk $SYS.dtk.punc $SYS.dtk.punc.tok $REF.dtk $REF.dtk.punc $REF.dtk.punc.tok
\ No newline at end of file
--- a/egs/wmt20/mt/run.sh
+++ b/egs/wmt20/mt/run.sh
-#!/usr/bin/env bash
-
-# Processing WMT20 En-Zh Datasets
-
-# Copyright 2021 Natural Language Processing Laboratory 
-# Xu Chen (xuchenneu@163.com)
-
-# Set bash to 'debug' mode, it will exit on :
-# -e 'error', -u 'undefined variable', -o ... 'error in pipeline', -x 'print commands',
-set -e
-#set -u
-set -o pipefail
-export PYTHONIOENCODING=UTF-8
-
-eval=1
-time=$(date "+%m%d")
-
-stage=0
-stop_stage=0
-
-######## hardware ########
-# devices
-device=()
-gpu_num=8
-update_freq=1
-
-root_dir=~/st
-code_dir=${root_dir}/Fairseq-S2T
-pwd_dir=$PWD
-
-# dataset
-src_lang=en
-tgt_lang=zh
-lang=${src_lang}-${tgt_lang}
-
-dataset=wmt20
-task=translation
-src_vocab_type=unigram
-tgt_vocab_type=unigram
-src_vocab_size=32000
-tgt_vocab_size=32000
-share_dict=0
-lcrm=1
-tokenizer=1
-
-use_specific_dict=1
-specific_prefix=unified
-specific_dir=${root_dir}/data/${dataset}/vocab
-src_vocab_prefix=spm_en
-tgt_vocab_prefix=spm_zh
-
-org_data_dir=${root_dir}/data/${dataset}
-data_dir=${root_dir}/data/${dataset}/mt
-train_subset=train
-valid_subset=dev
-trans_subset=tst-COMMON
-test_subset=test
-
-# exp
-exp_prefix=${time}
-extra_tag=
-extra_parameter=
-exp_tag=baseline
-exp_name=
-
-# config
-train_config=base_s
-
-# training setting
-fp16=1
-max_tokens=4096
-step_valid=0
-bleu_valid=0
-
-# decoding setting
-sacrebleu=0
-dec_model=checkpoint_best.pt
-n_average=10
-beam_size=5
-len_penalty=1.0
-
-if [[ ${use_specific_dict} -eq 1 ]]; then
-    exp_prefix=${exp_prefix}_${specific_prefix}
-    data_dir=${data_dir}/${specific_prefix}
-else
-    if [[ "${tgt_vocab_type}" == "char" ]]; then
-        vocab_name=char
-        exp_prefix=${exp_prefix}_char
-    else
-        if [[ ${src_vocab_size} -ne ${tgt_vocab_size} || "${src_vocab_type}" -ne "${tgt_vocab_type}" ]]; then
-            src_vocab_name=${src_vocab_type}${src_vocab_size}
-            tgt_vocab_name=${tgt_vocab_type}${tgt_vocab_size}
-            vocab_name=${src_vocab_name}_${tgt_vocab_name}
-        else
-            vocab_name=${tgt_vocab_type}${tgt_vocab_size}
-            src_vocab_name=${vocab_name}
-            tgt_vocab_name=${vocab_name}
-        fi
-    fi
-    data_dir=${data_dir}/${vocab_name}
-    src_vocab_prefix=spm_${src_vocab_name}_${src_lang}
-    tgt_vocab_prefix=spm_${tgt_vocab_name}_${tgt_lang}
-    if [[ $share_dict -eq 1 ]]; then
-        data_dir=${data_dir}_share
-        src_vocab_prefix=spm_${vocab_name}_share
-        tgt_vocab_prefix=spm_${vocab_name}_share
-    fi
-fi
-if [[ ${lcrm} -eq 1 ]]; then
-    data_dir=${data_dir}_lcrm
-    exp_prefix=${exp_prefix}_lcrm
-fi
-if [[ ${tokenizer} -eq 1 ]]; then
-    train_subset=${train_subset}.tok
-    valid_subset=${valid_subset}.tok
-    trans_subset=${trans_subset}.tok
-    data_dir=${data_dir}_tok
-    exp_prefix=${exp_prefix}_tok
-fi
-
-. ./local/parse_options.sh || exit 1;
-
-# full path
-if [[ -z ${exp_name} ]]; then
-    config_string=${train_config//,/_}
-    exp_name=${exp_prefix}_${config_string}_${exp_tag}
-    if [[ -n ${extra_tag} ]]; then
-        exp_name=${exp_name}_${extra_tag}
-    fi
-fi
-model_dir=${root_dir}/checkpoints/${dataset}/mt/${exp_name}
-
-if [ ${stage} -le -1 ] && [ ${stop_stage} -ge -1 ]; then
-    echo "stage -1: Data Download"
-    # pass
-fi
-
-if [ ${stage} -le 0 ] && [ ${stop_stage} -ge 0 ]; then
-    ### Task dependent. You have to make data the following preparation part by yourself.
-    echo "stage 0: MT Data Preparation"
-    if [[ ! -e ${data_dir} ]]; then
-        mkdir -p ${data_dir}
-    fi
-    if [[ ! -e ${data_dir}/data ]]; then
-        mkdir -p ${data_dir}/data
-    fi
-
-    if [[ ! -f ${data_dir}/${src_vocab_prefix}.txt || ! -f ${data_dir}/${tgt_vocab_prefix}.txt ]]; then
-        if [[ ${use_specific_dict} -eq 0 ]]; then
-            cmd="python ${code_dir}/examples/speech_to_text/prep_mt_data.py
-                --data-root ${org_data_dir}
-                --output-root ${data_dir}
-                --splits ${train_subset},${valid_subset},${trans_subset}
-                --src-lang ${src_lang}
-                --tgt-lang ${tgt_lang}
-                --src-vocab-type ${src_vocab_type}
-                --tgt-vocab-type ${tgt_vocab_type}
-                --src-vocab-size ${src_vocab_size}
-                --tgt-vocab-size ${tgt_vocab_size}"
-        else
-            cp -r ${specific_dir}/${src_vocab_prefix}.* ${data_dir}
-            cp ${specific_dir}/${tgt_vocab_prefix}.* ${data_dir}
-
-            cmd="python ${code_dir}/examples/speech_to_text/prep_mt_data.py
-                --data-root ${org_data_dir}
-                --output-root ${data_dir}
-                --splits ${train_subset},${valid_subset},${trans_subset}
-                --src-lang ${src_lang}
-                --tgt-lang ${tgt_lang}
-                --src-vocab-prefix ${src_vocab_prefix}
-                --tgt-vocab-prefix ${tgt_vocab_prefix}"
-        fi
-        if [[ $share_dict -eq 1 ]]; then
-            cmd="$cmd
-                --share"
-        fi
-        if [[ ${lcrm} -eq 1 ]]; then
-            cmd="$cmd
-                --lowercase-src
-                --rm-punc-src"
-        fi
-        echo -e "\033[34mRun command: \n${cmd} \033[0m"
-        [[ $eval -eq 1 ]] && eval ${cmd}
-    fi
-
-    cmd="python ${code_dir}/fairseq_cli/preprocess.py
-        --source-lang ${src_lang} --target-lang ${tgt_lang}
-        --trainpref ${data_dir}/data/${train_subset}
-        --validpref ${data_dir}/data/${valid_subset}
-        --testpref ${data_dir}/data/${trans_subset}
-        --destdir ${data_dir}/data-bin
-        --srcdict ${data_dir}/${src_vocab_prefix}.txt
-        --tgtdict ${data_dir}/${tgt_vocab_prefix}.txt
-        --workers 64"
-
-    echo -e "\033[34mRun command: \n${cmd} \033[0m"
-    [[ $eval -eq 1 ]] && eval ${cmd}
-fi
-
-data_dir=${data_dir}/data-bin
-
-if [ ${stage} -le 1 ] && [ ${stop_stage} -ge 1 ]; then
-    echo "stage 1: MT Network Training"
-    [[ ! -d ${data_dir} ]] && echo "The data dir ${data_dir} is not existing!" && exit 1;
-
-    if [[ -z ${device} || ${#device[@]} -eq 0 ]]; then
-		if [[ ${gpu_num} -eq 0 ]]; then
-			device=""
-		else
-        	source ./local/utils.sh
-        	device=$(get_devices $gpu_num 0)
-		fi
-    fi
-
-    echo -e "dev=${device} data=${data_dir} model=${model_dir}"
-
-    if [[ ! -d ${model_dir} ]]; then
-        mkdir -p ${model_dir}
-    else
-        echo "${model_dir} exists."
-    fi
-
-    cp ${BASH_SOURCE[0]} ${model_dir}
-    cp ${PWD}/train.sh ${model_dir}
-
-    extra_parameter="${extra_parameter}
-        --train-config ${pwd_dir}/conf/basis.yaml"
-    cp ${pwd_dir}/conf/basis.yaml ${model_dir}
-    config_list="${train_config//,/ }"
-    idx=1
-    for config in ${config_list[@]}
-    do
-        config_path=${pwd_dir}/conf/${config}.yaml
-        if [[ ! -f ${config_path} ]]; then
-            echo "No config file ${config_path}"
-            exit
-        fi
-        cp ${config_path} ${model_dir}
-
-        extra_parameter="${extra_parameter}
-        --train-config${idx} ${config_path}"
-        idx=$((idx + 1))
-    done
-
-    cmd="python3 -u ${code_dir}/fairseq_cli/train.py
-        ${data_dir}
-        --source-lang ${src_lang}
-        --target-lang ${tgt_lang}
-        --task ${task}
-        --max-tokens ${max_tokens}
-        --skip-invalid-size-inputs-valid-test
-        --update-freq ${update_freq}
-        --log-interval 100
-        --save-dir ${model_dir}
-        --tensorboard-logdir ${model_dir}"
-
-	if [[ -n ${extra_parameter} ]]; then
-        cmd="${cmd}
-        ${extra_parameter}"
-    fi
-	if [[ ${gpu_num} -gt 0 ]]; then
-		cmd="${cmd}
-        --distributed-world-size $gpu_num
-        --ddp-backend no_c10d"
-	fi
-    if [[ $fp16 -eq 1 ]]; then
-        cmd="${cmd}
-        --fp16"
-    fi
-    if [[ $step_valid -eq 1 ]]; then
-        validate_interval=1
-        save_interval=1
-        no_epoch_checkpoints=0
-        save_interval_updates=500
-        keep_interval_updates=10
-    fi
-    if [[ $bleu_valid -eq 1 ]]; then
-        cmd="$cmd
-        --eval-bleu
-        --eval-bleu-args '{\"beam\": 1}'
-        --eval-tokenized-bleu
-        --eval-bleu-remove-bpe
-        --best-checkpoint-metric bleu
-        --maximize-best-checkpoint-metric"
-    fi
-    if [[ -n $no_epoch_checkpoints && $no_epoch_checkpoints -eq 1 ]]; then
-        cmd="$cmd
-        --no-epoch-checkpoints"
-    fi
-    if [[ -n $validate_interval ]]; then
-        cmd="${cmd}
-        --validate-interval $validate_interval "
-    fi
-    if [[ -n $save_interval ]]; then
-        cmd="${cmd}
-        --save-interval $save_interval "
-    fi
-    if [[ -n $save_interval_updates ]]; then
-        cmd="${cmd}
-        --save-interval-updates $save_interval_updates"
-        if [[ -n $keep_interval_updates ]]; then
-        cmd="${cmd}
-        --keep-interval-updates $keep_interval_updates"
-        fi
-    fi
-
-    echo -e "\033[34mRun command: \n${cmd} \033[0m"
-
-    # save info
-    log=./history.log
-    echo "${time} | ${device} | ${data_dir} | ${exp_name} | ${model_dir} " >> $log
-    tail -n 50 ${log} > tmp.log
-    mv tmp.log $log
-    export CUDA_VISIBLE_DEVICES=${device}
-
-    log=${model_dir}/train.log
-    cmd="nohup ${cmd} >> ${log} 2>&1 &"
-    if [[ $eval -eq 1 ]]; then
-		eval $cmd
-		sleep 2s
-		tail -n "$(wc -l ${log} | awk '{print $1+1}')" -f ${log}
-	fi
-fi
-wait
-
-if [ ${stage} -le 2 ] && [ ${stop_stage} -ge 2 ]; then
-    echo "stage 2: MT Decoding"
-    if [[ ${n_average} -ne 1 ]]; then
-        # Average models
-		dec_model=avg_${n_average}_checkpoint.pt
-
-        if [[ ! -f ${model_dir}/${dec_model} ]]; then
-            cmd="python ${code_dir}/scripts/average_checkpoints.py
-            --inputs ${model_dir}
-            --num-best-checkpoints ${n_average}
-            --output ${model_dir}/${dec_model}"
-            echo -e "\033[34mRun command: \n${cmd} \033[0m"
-            [[ $eval -eq 1 ]] && eval $cmd
-        fi
-	else
-		dec_model=${dec_model}
-	fi
-
-    if [[ -z ${device} || ${#device[@]} -eq 0 ]]; then
-		if [[ ${gpu_num} -eq 0 ]]; then
-			device=""
-		else
-        	source ./local/utils.sh
-        	device=$(get_devices $gpu_num 0)
-		fi
-    fi
-    export CUDA_VISIBLE_DEVICES=${device}
-
-	result_file=${model_dir}/decode_result
-	[[ -f ${result_file} ]] && rm ${result_file}
-
-    test_subset=(${test_subset//,/ })
-	for subset in ${test_subset[@]}; do
-  		cmd="python ${code_dir}/fairseq_cli/generate.py
-        ${data_dir}
-        --source-lang ${src_lang}
-        --target-lang ${tgt_lang}
-        --gen-subset ${subset}
-        --task ${task}
-        --path ${model_dir}/${dec_model}
-        --results-path ${model_dir}
-        --max-tokens ${max_tokens}
-        --beam ${beam_size}
-        --lenpen ${len_penalty}
-        --post-process sentencepiece"
-
-        if [[ ${sacrebleu} -eq 1 ]]; then
-            cmd="${cmd}
-        --scoring sacrebleu"
-            if [[ ${tokenizer} -eq 1 ]]; then
-                cmd="${cmd}
-        --tokenizer moses
-        --moses-source-lang ${src_lang}
-        --moses-target-lang ${tgt_lang}"
-            fi
-        fi
-
-    	echo -e "\033[34mRun command: \n${cmd} \033[0m"
-
-        if [[ $eval -eq 1 ]]; then
-    	    eval $cmd
-    	    tail -n 2 ${model_dir}/generate-${subset}.txt >> ${result_file}
-        fi
-	done
-    cat ${result_file}
-fi
--- a/egs/wmt20/mt/train.sh
+++ b/egs/wmt20/mt/train.sh
-#!/usr/bin/env bash
-
-# training the model
-
-gpu_num=4
-update_freq=4
-max_tokens=8192
-
-exp_tag=baseline
-#config_list=(base)
-config_list=(deep)
-
-# exp full name
-exp_name=
-
-extra_tag=
-extra_parameter=
-#extra_tag="${extra_tag}"
-#extra_parameter="${extra_parameter} "
-
-train_config=$(echo ${config_list[*]} | sed 's/ /,/g')
-
-cmd="./run.sh
-    --stage 1
-    --stop_stage 1
-    --gpu_num ${gpu_num}
-    --update_freq ${update_freq}
-    --train_config ${train_config}
-    --max_tokens ${max_tokens}
-    "
-
-if [[ -n ${exp_name} ]]; then
-    cmd="$cmd --exp_name ${exp_name}"
-fi
-if [[ -n ${exp_tag} ]]; then
-    cmd="$cmd --exp_tag ${exp_tag}"
-fi
-if [[ -n ${extra_tag} ]]; then
-    cmd="$cmd --extra_tag ${extra_tag}"
-fi
-if [[ -n ${extra_parameter} ]]; then
-    cmd="$cmd --extra_parameter \"${extra_parameter}\""
-fi
-
-echo ${cmd}
-eval ${cmd}