optimize the shell scripts after IWSLT 2021

29faf16f · xuchen · f190005c · f190005c · f190005c · f190005c
Commit 29faf16f authored Apr 26, 2021 by xuchen
--- a/egs/covost/asr/conf/train.yaml
+++ b/egs/covost/asr/conf/train.yaml
-train-subset: train_asr
-valid-subset: dev_asr
-
-max-epoch: 50
-max-update: 100000
-
-num-workers: 8
-patience: 10
-no-progress-bar: True
-log-interval: 100
-seed: 1
-report-accuracy: True
-
-#load-pretrained-encoder-from:
-#load-pretrained-decoder-from:
-
-arch: s2t_transformer_s
-share-decoder-input-output-embed: True
-optimizer: adam
-clip-norm: 10.0
-lr-scheduler: inverse_sqrt
-warmup-init-lr: 1e-7
-warmup-updates: 10000
-lr: 2e-3
-#adam_betas: (0.9,0.98)
-
-criterion: label_smoothed_cross_entropy
-label_smoothing: 0.1
-
-conv-kernel-sizes: 5,5
-conv-channels: 1024
-dropout: 0.1
-activation-fn: relu
-encoder-embed-dim: 256
-encoder-ffn-embed-dim: 2048
-encoder-layers: 12
-decoder-layers: 6
-encoder-attention-heads: 4
--- a/egs/covost/asr/conf/train_ctc_conformer_m.yaml
+++ b/egs/covost/asr/conf/train_ctc_conformer_m.yaml
-train-subset: train_asr
-valid-subset: dev_asr
-
-max-epoch: 50
-max-update: 100000
-
-num-workers: 8
-patience: 10
-no-progress-bar: True
-log-interval: 100
-seed: 1
-report-accuracy: True
-
-#load-pretrained-encoder-from:
-#load-pretrained-decoder-from:
-
-arch: s2t_conformer_m
-share-decoder-input-output-embed: True
-optimizer: adam
-clip-norm: 10.0
-lr-scheduler: inverse_sqrt
-warmup-init-lr: 1e-7
-warmup-updates: 10000
-lr: 1e-3
-#adam_betas: (0.9,0.98)
-
-ctc-weight: 0.3
-criterion: label_smoothed_cross_entropy_with_ctc
-label_smoothing: 0.1
-
-conv-kernel-sizes: 5,5
-conv-channels: 1024
-#dropout: 0.1
-#activation-fn: relu
-#encoder-embed-dim: 256
-#encoder-ffn-embed-dim: 2048
-#encoder-layers: 12
-#decoder-layers: 6
-#encoder-attention-heads: 4
-
-macaron-style: True
-use-cnn-module: True
-cnn-module-kernel: 31
-
-#decoder-embed-dim: 256
-#decoder-ffn-embed-dim: 2048
-#decoder-attention-heads: 4
-#attention-dropout: 0.1
-#activation-dropout: 0.1
--- a/egs/covost/asr/conf/train_ctc_rpr.yaml
+++ b/egs/covost/asr/conf/train_ctc_rpr.yaml
-train-subset: train_asr
-valid-subset: dev_asr
-
-max-epoch: 50
-max-update: 100000
-
-num-workers: 8
-patience: 10
-no-progress-bar: True
-log-interval: 100
-seed: 1
-report-accuracy: True
-
-#load-pretrained-encoder-from:
-#load-pretrained-decoder-from:
-
-arch: s2t_transformer_s
-share-decoder-input-output-embed: True
-optimizer: adam
-clip-norm: 10.0
-lr-scheduler: inverse_sqrt
-warmup-init-lr: 1e-7
-warmup-updates: 10000
-lr: 2e-3
-#adam_betas: (0.9,0.98)
-
-ctc-weight: 0.3
-criterion: label_smoothed_cross_entropy_with_ctc
-label_smoothing: 0.1
-
-conv-kernel-sizes: 5,5
-conv-channels: 1024
-dropout: 0.1
-activation-fn: relu
-encoder-embed-dim: 256
-encoder-ffn-embed-dim: 2048
-encoder-layers: 12
-decoder-layers: 6
-encoder-attention-heads: 4
-
-encoder-attention-type: relative
-#decoder-attention-type: relative
-max-relative-length: 100
-
-#decoder-embed-dim: 256
-#decoder-ffn-embed-dim: 2048
-#decoder-attention-heads: 4
-#attention-dropout: 0.1
-#activation-dropout: 0.1
--- a/egs/covost/asr/decode.sh
+++ b/egs/covost/asr/decode.sh
-#! /bin/bash
-
-gpu_num=1
-
-data_dir=
-test_subset=tst-COMMON
-
-exp_name=
-if [ "$#" -eq 1 ]; then
-    exp_name=$1
-fi
-
-n_average=10
-beam_size=5
-max_tokens=40000
-
-cmd="./run.sh
-    --stage 2
-    --stop_stage 2
-    --gpu_num ${gpu_num}
-    --exp_name ${exp_name}
-    --n_average ${n_average}
-    --beam_size ${beam_size}
-    --max_tokens ${max_tokens}
-    "
-
-if [[ -n ${data_dir} ]]; then
-    cmd="$cmd --data_dir ${data_dir}"
-fi
-if [[ -n ${test_subset} ]]; then
-    cmd="$cmd --test_subset ${test_subset}"
-fi
-
-echo $cmd
-eval $cmd
--- a/egs/covost/asr/local/monitor.sh
+++ b/egs/covost/asr/local/monitor.sh
-gpu_num=1
-
-while :
-do
-    all_devices=$(seq 0 `gpustat | sed '1,2d' | wc -l`);
-    count=0
-    for dev in ${all_devices[@]}
-    do
-        line=`expr $dev + 2`
-        use=`gpustat -p | head -n $line | tail -1 | cut -d '|' -f4 | wc -w`
-        if [[ $use -eq 0 ]]; then
-            device[$count]=$dev
-            count=`expr $count + 1`
-            if [[ $count -eq $gpu_num ]]; then
-                break
-            fi
-        fi
-    done
-    if [[ ${#device[@]} -lt $gpu_num ]]; then
-        sleep 60s
-    else
-        echo "Run $cmd"
-        eval $cmd
-        sleep 10s
-        exit
-    fi
-done
--- a/egs/covost/asr/local/parse_options.sh
+++ b/egs/covost/asr/local/parse_options.sh
-#!/usr/bin/env bash
-
-# Copyright 2012  Johns Hopkins University (Author: Daniel Povey);
-#                 Arnab Ghoshal, Karel Vesely
-
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#  http://www.apache.org/licenses/LICENSE-2.0
-#
-# THIS CODE IS PROVIDED *AS IS* BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-# KIND, EITHER EXPRESS OR IMPLIED, INCLUDING WITHOUT LIMITATION ANY IMPLIED
-# WARRANTIES OR CONDITIONS OF TITLE, FITNESS FOR A PARTICULAR PURPOSE,
-# MERCHANTABLITY OR NON-INFRINGEMENT.
-# See the Apache 2 License for the specific language governing permissions and
-# limitations under the License.
-
-
-# Parse command-line options.
-# To be sourced by another script (as in ". parse_options.sh").
-# Option format is: --option-name arg
-# and shell variable "option_name" gets set to value "arg."
-# The exception is --help, which takes no arguments, but prints the
-# $help_message variable (if defined).
-
-
-###
-### The --config file options have lower priority to command line
-### options, so we need to import them first...
-###
-
-# Now import all the configs specified by command-line, in left-to-right order
-for ((argpos=1; argpos<$#; argpos++)); do
-  if [ "${!argpos}" == "--config" ]; then
-    argpos_plus1=$((argpos+1))
-    config=${!argpos_plus1}
-    [ ! -r $config ] && echo "$0: missing config '$config'" && exit 1
-    . $config  # source the config file.
-  fi
-done
-
-
-###
-### Now we process the command line options
-###
-while true; do
-  [ -z "${1:-}" ] && break;  # break if there are no arguments
-  case "$1" in
-    # If the enclosing script is called with --help option, print the help
-    # message and exit.  Scripts should put help messages in $help_message
-    --help|-h) if [ -z "$help_message" ]; then echo "No help found." 1>&2;
-      else printf "$help_message\n" 1>&2 ; fi;
-      exit 0 ;;
-    --*=*) echo "$0: options to scripts must be of the form --name value, got '$1'"
-      exit 1 ;;
-    # If the first command-line argument begins with "--" (e.g. --foo-bar),
-    # then work out the variable name as $name, which will equal "foo_bar".
-    --*) name=`echo "$1" | sed s/^--// | sed s/-/_/g`;
-      # Next we test whether the variable in question is undefned-- if so it's
-      # an invalid option and we die.  Note: $0 evaluates to the name of the
-      # enclosing script.
-      # The test [ -z ${foo_bar+xxx} ] will return true if the variable foo_bar
-      # is undefined.  We then have to wrap this test inside "eval" because
-      # foo_bar is itself inside a variable ($name).
-      eval '[ -z "${'$name'+xxx}" ]' && echo "$0: invalid option $1" 1>&2 && exit 1;
-
-      oldval="`eval echo \\$$name`";
-      # Work out whether we seem to be expecting a Boolean argument.
-      if [ "$oldval" == "true" ] || [ "$oldval" == "false" ]; then
-        was_bool=true;
-      else
-        was_bool=false;
-      fi
-
-      # Set the variable to the right value-- the escaped quotes make it work if
-      # the option had spaces, like --cmd "queue.pl -sync y"
-      eval $name=\"$2\";
-
-      # Check that Boolean-valued arguments are really Boolean.
-      if $was_bool && [[ "$2" != "true" && "$2" != "false" ]]; then
-        echo "$0: expected \"true\" or \"false\": $1 $2" 1>&2
-        exit 1;
-      fi
-      shift 2;
-      ;;
-  *) break;
-  esac
-done
-
-
-# Check for an empty argument to the --cmd option, which can easily occur as a
-# result of scripting errors.
-[ ! -z "${cmd+xxx}" ] && [ -z "$cmd" ] && echo "$0: empty argument to --cmd option" 1>&2 && exit 1;
-
-
-true; # so this script returns exit code 0.
--- a/egs/covost/asr/local/path.sh
+++ b/egs/covost/asr/local/path.sh
-MAIN_ROOT=$PWD/../../..
-KALDI_ROOT=$MAIN_ROOT/tools/kaldi
-
-export PATH=$PWD/utils/:$KALDI_ROOT/tools/openfst/bin:$PATH
-[ ! -f $KALDI_ROOT/tools/config/common_path.sh ] && echo >&2 "The standard file $KALDI_ROOT/tools/config/common_path.sh is not present -> Exit!" && exit 1
-. $KALDI_ROOT/tools/config/common_path.sh
-export LC_ALL=C
-
-export LD_LIBRARY_PATH=${LD_LIBRARY_PATH}:$MAIN_ROOT/src/lib
-export LD_LIBRARY_PATH=${LD_LIBRARY_PATH}:$MAIN_ROOT/tools/chainer_ctc/ext/warp-ctc/build
-. "${MAIN_ROOT}"/tools/activate_python.sh && . "${MAIN_ROOT}"/tools/extra_path.sh
-export PATH=$MAIN_ROOT/utils:$MAIN_ROOT/espnet/bin:$PATH
-
-export OMP_NUM_THREADS=1
-
-# check extra module installation
-if ! which tokenizer.perl > /dev/null; then
-    echo "Error: it seems that moses is not installed." >&2
-    echo "Error: please install moses as follows." >&2
-    echo "Error: cd ${MAIN_ROOT}/tools && make moses.done" >&2
-    return 1
-fi
-
-# NOTE(kan-bayashi): Use UTF-8 in Python to avoid UnicodeDecodeError when LC_ALL=C
-export PYTHONIOENCODING=UTF-8
--- a/egs/covost/asr/local/utils.sh
+++ b/egs/covost/asr/local/utils.sh
-
-get_devices(){
-    gpu_num=$1
-    use_cpu=$2
-    device=()
-    while :
-    do
-        record=`mktemp -t temp.record.XXXXXX`
-        gpustat > $record
-        all_devices=$(seq 0 `cat $record | sed '1,2d' | wc -l`);
-        count=0
-        for dev in ${all_devices[@]}
-        do
-            line=`expr $dev + 2`
-            use=`cat $record | head -n $line | tail -1 | cut -d '|' -f3 | cut -d '/' -f1`
-            if [[ $use -lt 100 ]]; then
-                device[$count]=$dev
-                count=`expr $count + 1`
-                if [[ $count -eq $gpu_num ]]; then
-                    break
-                fi
-            fi
-        done
-        if [[ ${#device[@]} -lt $gpu_num ]]; then
-            if [[ $use_cpu -eq 1 ]]; then
-                device=(-1)
-            else
-                sleep 60s
-            fi
-        else
-            break
-        fi
-    done
-
-    echo ${device[*]} | sed 's/ /,/g'
-    return $?
-}
-
-
--- a/egs/covost/asr/run.sh
+++ b/egs/covost/asr/run.sh
-#! /bin/bash
-
-# Processing MuST-C Datasets
-
-# Copyright 2021 Natural Language Processing Laboratory 
-# Xu Chen (xuchenneu@163.com)
-
-# Set bash to 'debug' mode, it will exit on :
-# -e 'error', -u 'undefined variable', -o ... 'error in pipeline', -x 'print commands',
-set -e
-#set -u
-set -o pipefail
-export PYTHONIOENCODING=UTF-8
-
-eval=1
-time=$(date "+%m%d_%H%M")
-
-stage=0
-stop_stage=0
-
-######## hardware ########
-# devices
-#device=()
-gpu_num=8
-update_freq=1
-
-root_dir=~/st/Fairseq-S2T
-pwd_dir=$PWD
-
-# dataset
-src_lang=en
-tgt_lang=de
-lang=${src_lang}-${tgt_lang}
-
-dataset=covost
-task=speech_to_text
-vocab_type=unigram
-vocab_size=5000
-speed_perturb=0
-lcrm=1
-
-use_specific_dict=1
-specific_prefix=fair
-specific_dir=/home/xuchen/st/data/librispeech/fair
-asr_vocab_prefix=spm_unigram_10000
-
-org_data_dir=/media/data/asr_data/${dataset}
-data_dir=~/st/data/${dataset}/asr
-test_subset=tst-COMMON
-
-# exp
-exp_prefix=${time}
-extra_tag=
-extra_parameter=
-exp_tag=baseline
-exp_name=
-
-# config
-train_config=train_ctc.yaml
-data_config=config_asr.yaml
-data_config=config_st_share.yaml
-
-# training setting
-fp16=1
-max_tokens=40000
-step_valid=0
-
-# decoding setting
-n_average=10
-beam_size=5
-
-if [[ ${speed_perturb} -eq 1 ]]; then
-    data_dir=${data_dir}_sp
-    exp_prefix=${exp_prefix}_sp
-fi
-if [[ ${lcrm} -eq 1 ]]; then
-    data_dir=${data_dir}_lcrm
-    exp_prefix=${exp_prefix}_lcrm
-fi
-if [[ ${use_specific_dict} -eq 1 ]]; then
-    data_dir=${data_dir}_${specific_prefix}
-    exp_prefix=${exp_prefix}_${specific_prefix}
-fi
-
-. ./local/parse_options.sh || exit 1;
-
-# full path
-train_config=$pwd_dir/conf/${train_config}
-if [[ -z ${exp_name} ]]; then
-    exp_name=${exp_prefix}_$(basename ${train_config%.*})_${exp_tag}
-    if [[ -n ${extra_tag} ]]; then
-        exp_name=${exp_name}_${extra_tag}
-    fi
-fi
-model_dir=$root_dir/../checkpoints/$dataset/asr/${exp_name}
-
-if [ ${stage} -le -1 ] && [ ${stop_stage} -ge -1 ]; then
-    echo "stage -1: Data Download"
-    # pass
-fi
-
-if [ ${stage} -le 0 ] && [ ${stop_stage} -ge 0 ]; then
-    ### Task dependent. You have to make data the following preparation part by yourself.
-    ### But you can utilize Kaldi recipes in most cases
-    echo "stage 0: ASR Data Preparation"
-    if [[ ! -e ${data_dir}/${src_lang} ]]; then
-        mkdir -p ${data_dir}/${src_lang}
-    fi
-    source ~/tools/audio/bin/activate
-
-    cmd="python ${root_dir}/examples/speech_to_text/prep_covost_data.py
-        --data-root ${org_data_dir}
-        --output-root ${data_dir}
-        --src-lang ${src_lang}
-        --task asr
-        --vocab-type ${vocab_type}
-        --vocab-size ${vocab_size}"
-
-    if [[ ${use_specific_dict} -eq 1 ]]; then
-        cp -r ${specific_dir}/${asr_vocab_prefix}.* ${data_dir}/${src_lang}
-        cmd="$cmd
-        --asr-prefix ${asr_vocab_prefix}"
-    fi
-    if [[ ${speed_perturb} -eq 1 ]]; then
-        cmd="$cmd
-        --speed-perturb"
-    fi
-    if [[ ${lcrm} -eq 1 ]]; then
-        cmd="$cmd
-        --lowercase-src
-        --rm-punc-src"
-    fi
-    echo -e "\033[34mRun command: \n${cmd} \033[0m"
-    [[ $eval -eq 1 ]] && eval ${cmd}
-    deactivate
-fi
-
-data_dir=${data_dir}/${lang}
-
-if [ ${stage} -le 1 ] && [ ${stop_stage} -ge 1 ]; then
-    echo "stage 1: ASR Network Training"
-    [[ ! -d ${data_dir} ]] && echo "The data dir ${data_dir} is not existing!" && exit 1;
-
-    if [[ -z ${device} || ${#device[@]} -eq 0 ]]; then
-		if [[ ${gpu_num} -eq 0 ]]; then
-			device=()
-		else
-        	source ./local/utils.sh
-        	device=$(get_devices $gpu_num 0)
-		fi
-    fi
-
-    echo -e "dev=${device} data=${data_dir} model=${model_dir}"
-
-    if [[ ! -d ${model_dir} ]]; then
-        mkdir -p ${model_dir}
-    else
-        echo "${model_dir} exists."
-    fi
-
-    cp ${BASH_SOURCE[0]} ${model_dir}
-    cp ${PWD}/train.sh ${model_dir}
-    cp ${train_config} ${model_dir}
-
-    cmd="python3 -u ${root_dir}/fairseq_cli/train.py
-        ${data_dir}
-        --config-yaml ${data_config}
-        --train-config ${train_config}
-        --task ${task}
-        --max-tokens ${max_tokens}
-        --update-freq ${update_freq}
-        --log-interval 100
-        --save-dir ${model_dir}
-        --tensorboard-logdir ${model_dir}"
-
-    if [[ -n ${extra_parameter} ]]; then
-        cmd="${cmd}
-        ${extra_parameter}"
-    fi
-	if [[ ${gpu_num} -gt 0 ]]; then
-		cmd="${cmd}
-        --distributed-world-size $gpu_num
-        --ddp-backend no_c10d"
-	fi
-    if [[ $fp16 -eq 1 ]]; then
-        cmd="${cmd}
-        --fp16"
-    fi
-    if [[ $step_valid -eq 1 ]]; then
-        validate_interval=10000
-        save_interval=10000
-        no_epoch_checkpoints=1
-        save_interval_updates=5000
-        keep_interval_updates=3
-    else
-        validate_interval=1
-        keep_last_epochs=10
-    fi
-    if [[ -n $no_epoch_checkpoints && $no_epoch_checkpoints -eq 1 ]]; then
-        cmd="$cmd
-        --no-epoch-checkpoints"
-    fi
-    if [[ -n $validate_interval ]]; then
-        cmd="${cmd}
-        --validate-interval $validate_interval "
-    fi
-    if [[ -n $save_interval ]]; then
-        cmd="${cmd}
-        --save-interval $save_interval "
-    fi
-    if [[ -n $keep_last_epochs ]]; then
-        cmd="${cmd}
-        --keep-last-epochs $keep_last_epochs "
-    fi
-    if [[ -n $save_interval_updates ]]; then
-        cmd="${cmd}
-        --save-interval-updates $save_interval_updates"
-        if [[ -n $keep_interval_updates ]]; then
-        cmd="${cmd}
-        --keep-interval-updates $keep_interval_updates"
-        fi
-    fi
-
-    echo -e "\033[34mRun command: \n${cmd} \033[0m"
-
-    # save info
-    log=./history.log
-    echo "${time} | ${device} | ${data_dir} | ${model_dir} " >> $log
-    cat $log | tail -n 50 > tmp.log
-    mv tmp.log $log
-    export CUDA_VISIBLE_DEVICES=${device}
-
-    cmd="nohup ${cmd} >> ${model_dir}/train.log 2>&1 &"
-    if [[ $eval -eq 1 ]]; then
-		eval $cmd
-		sleep 2s
-		tail -n `wc -l ${model_dir}/train.log | awk '{print $1+1}'` -f ${model_dir}/train.log
-	fi
-fi
-wait
-
-if [ ${stage} -le 2 ] && [ ${stop_stage} -ge 2 ]; then
-    echo "stage 2: ASR Decoding"
-    if [[ ${n_average} -ne 1 ]]; then
-        # Average models
-		dec_model=avg_${n_average}_checkpoint.pt
-
-		cmd="python ${root_dir}/scripts/average_checkpoints.py
-        --inputs ${model_dir}
-        --num-epoch-checkpoints ${n_average}
-        --output ${model_dir}/${dec_model}"
-    	echo -e "\033[34mRun command: \n${cmd} \033[0m"
-    	[[ $eval -eq 1 ]] && eval $cmd
-	else
-		dec_model=checkpoint_best.pt
-	fi
-
-    if [[ -z ${device} || ${#device[@]} -eq 0 ]]; then
-		if [[ ${gpu_num} -eq 0 ]]; then
-			device=()
-		else
-        	source ./local/utils.sh
-        	device=$(get_devices $gpu_num 0)
-		fi
-    fi
-    export CUDA_VISIBLE_DEVICES=${device}
-
-	#tmp_file=$(mktemp ${model_dir}/tmp-XXXXX)
-	#trap 'rm -rf ${tmp_file}' EXIT
-	result_file=${model_dir}/decode_result
-	[[ -f ${result_file} ]] && rm ${result_file}
-
-    test_subset=(${test_subset//,/ })
-	for subset in ${test_subset[@]}; do
-        subset=${subset}_asr
-  		cmd="python ${root_dir}/fairseq_cli/generate.py
-        ${data_dir}
-        --config-yaml ${data_config}
-        --gen-subset ${subset}
-        --task speech_to_text
-        --path ${model_dir}/${dec_model}
-        --results-path ${model_dir}
-        --max-tokens ${max_tokens}
-        --beam ${beam_size}
-        --scoring wer
-        --wer-tokenizer 13a
-        --wer-lowercase
-        --wer-remove-punct
-        "
-    	echo -e "\033[34mRun command: \n${cmd} \033[0m"
-
-        if [[ $eval -eq 1 ]]; then
-    	    eval $cmd
-    	    tail -n 1 ${model_dir}/generate-${subset}.txt >> ${result_file}
-        fi
-	done
-    cat ${result_file}
-fi
--- a/egs/covost/asr/train.sh
+++ b/egs/covost/asr/train.sh
-#! /bin/bash
-
-# training the model
-
-gpu_num=8
-update_freq=2
-max_tokens=20000
-
-extra_tag=lcrm
-extra_parameter=
-
-#extra_tag="${extra_tag}"
-#extra_parameter="${extra_parameter} "
-
-exp_tag=
-train_config=train_ctc.yaml
-
-cmd="./run.sh
-    --stage 1
-    --stop_stage 1
-    --gpu_num ${gpu_num}
-    --update_freq ${update_freq}
-    --train_config ${train_config}
-    --max_tokens ${max_tokens}
-    "
-
-if [[ -n ${exp_tag} ]]; then
-    cmd="$cmd --exp_tag ${exp_tag}"
-fi
-if [[ -n ${extra_tag} ]]; then
-    cmd="$cmd --extra_tag ${extra_tag}"
-fi
-if [[ -n ${extra_parameter} ]]; then
-    cmd="$cmd --extra_parameter \"${extra_parameter}\""
-fi
-
-echo $cmd
-eval $cmd
--- a/egs/covost/mt/binary.sh
+++ b/egs/covost/mt/binary.sh
-set -e
-
-eval=1
-
-root_dir=~/st/Fairseq-S2T
-data_dir=/home/xuchen/st/data/wmt/test
-vocab_dir=/home/xuchen/st/data/wmt/mt/en-de/unigram32000_share
-src_vocab_prefix=spm_unigram32000_share
-tgt_vocab_prefix=spm_unigram32000_share
-
-src_lang=en
-tgt_lang=de
-tokenize=1
-splits=(newstest2014 newstest2016)
-
-for split in ${splits[@]}; do
-    src_file=${data_dir}/${split}.${src_lang}
-    tgt_file=${data_dir}/${split}.${tgt_lang}
-
-    if [[ ${tokenize} -eq 1 ]]; then
-        cmd="tokenizer.perl -l ${src_lang} --threads 8 -no-escape < ${src_file} > ${src_file}.tok"
-        echo -e "\033[34mRun command: \n${cmd} \033[0m"
-        [[ $eval -eq 1 ]] && eval ${cmd}
-
-        cmd="tokenizer.perl -l ${tgt_lang} --threads 8 -no-escape < ${tgt_file} > ${tgt_file}.tok"
-        echo -e "\033[34mRun command: \n${cmd} \033[0m"
-        [[ $eval -eq 1 ]] && eval ${cmd}
-        src_file=${src_file}.tok
-        tgt_file=${tgt_file}.tok
-    fi
-
-    cmd="spm_encode
-    --model ${vocab_dir}/${src_vocab_prefix}.model
-    --output_format=piece
-    < ${src_file}
-    > ${src_file}.spm"
-    echo -e "\033[34mRun command: \n${cmd} \033[0m"
-    [[ $eval -eq 1 ]] && eval ${cmd}
-
-    cmd="spm_encode
-    --model ${vocab_dir}/${tgt_vocab_prefix}.model
-    --output_format=piece
-    < ${tgt_file}
-    > ${tgt_file}.spm"
-    echo -e "\033[34mRun command: \n${cmd} \033[0m"
-    [[ $eval -eq 1 ]] && eval ${cmd}
-
-    src_file=${src_file}.spm
-    tgt_file=${tgt_file}.spm
-
-    mkdir -p ${data_dir}/final
-    cmd="cp ${src_file} ${data_dir}/final/${split}.${src_lang}"
-    echo -e "\033[34mRun command: \n${cmd} \033[0m"
-    [[ $eval -eq 1 ]] && eval ${cmd}
-
-    cmd="cp ${tgt_file} ${data_dir}/final/${split}.${tgt_lang}"
-    echo -e "\033[34mRun command: \n${cmd} \033[0m"
-    [[ $eval -eq 1 ]] && eval ${cmd}
-done
-
-n_set=${#splits[*]}
-for ((i=0;i<$n_set;i++)); do
-    dataset[$i]=${data_dir}/final/${splits[$i]}
-done
-pref=`echo ${dataset[*]} | sed 's/ /,/g'`
-
-cmd="python ${root_dir}/fairseq_cli/preprocess.py
-    --source-lang ${src_lang}
-    --target-lang ${tgt_lang}
-    --testpref ${pref}
-    --destdir ${data_dir}/data-bin
-    --srcdict ${vocab_dir}/${src_vocab_prefix}.txt
-    --tgtdict ${vocab_dir}/${tgt_vocab_prefix}.txt
-    --workers 64"
-
-echo -e "\033[34mRun command: \n${cmd} \033[0m"
-[[ $eval -eq 1 ]] && eval ${cmd}
\ No newline at end of file
--- a/egs/covost/mt/conf/train.yaml
+++ b/egs/covost/mt/conf/train.yaml
-train-subset: train
-valid-subset: valid
-
-max-epoch: 50
-max-update: 100000
-
-num-workers: 8
-patience: 10
-no-progress-bar: True
-log-interval: 100
-seed: 1
-report-accuracy: True
-skip-invalid-size-inputs-valid-test: True
-
-#load-pretrained-encoder-from:
-#load-pretrained-decoder-from:
-
-arch: transformer
-share-decoder-input-output-embed: True
-optimizer: adam
-clip-norm: 10.0
-lr-scheduler: inverse_sqrt
-warmup-init-lr: 1e-7
-warmup-updates: 8000
-lr: 1e-3
-adam_betas: (0.9,0.997)
-
-criterion: label_smoothed_cross_entropy
-label_smoothing: 0.1
-
-dropout: 0.1
-attention-dropout: 0.1
-activation-dropout: 0.1
-
-activation-fn: relu
-encoder-normalize-before: True
-decoder-normalize-before: True
-encoder-embed-dim: 512
-encoder-ffn-embed-dim: 2048
-encoder-layers: 6
-decoder-layers: 6
-encoder-attention-heads: 8
-
-decoder-embed-dim: 512
-decoder-ffn-embed-dim: 2048
-decoder-attention-heads: 8
--- a/egs/covost/mt/conf/train_dlcl.yaml
+++ b/egs/covost/mt/conf/train_dlcl.yaml
-train-subset: train
-valid-subset: valid
-
-max-epoch: 50
-max-update: 100000
-
-num-workers: 8
-patience: 10
-no-progress-bar: True
-log-interval: 100
-seed: 1
-report-accuracy: True
-skip-invalid-size-inputs-valid-test: True
-
-#load-pretrained-encoder-from:
-#load-pretrained-decoder-from:
-
-arch: dlcl_transformer
-share-decoder-input-output-embed: True
-optimizer: adam
-clip-norm: 10.0
-lr-scheduler: inverse_sqrt
-warmup-init-lr: 1e-7
-warmup-updates: 8000
-lr: 1e-3
-adam_betas: (0.9,0.997)
-
-criterion: label_smoothed_cross_entropy
-label_smoothing: 0.1
-
-dropout: 0.1
-attention-dropout: 0.1
-activation-dropout: 0.1
-
-activation-fn: relu
-encoder-normalize-before: True
-decoder-normalize-before: True
-encoder-embed-dim: 512
-encoder-ffn-embed-dim: 2048
-encoder-layers: 6
-decoder-layers: 6
-encoder-attention-heads: 8
-
-decoder-embed-dim: 512
-decoder-ffn-embed-dim: 2048
-decoder-attention-heads: 8
--- a/egs/covost/mt/conf/train_dlcl_rpr.yaml
+++ b/egs/covost/mt/conf/train_dlcl_rpr.yaml
-train-subset: train
-valid-subset: valid
-
-max-epoch: 50
-max-update: 100000
-
-num-workers: 8
-patience: 10
-no-progress-bar: True
-log-interval: 100
-seed: 1
-report-accuracy: True
-skip-invalid-size-inputs-valid-test: True
-
-#load-pretrained-encoder-from:
-#load-pretrained-decoder-from:
-
-arch: dlcl_transformer
-share-decoder-input-output-embed: True
-optimizer: adam
-clip-norm: 10.0
-lr-scheduler: inverse_sqrt
-warmup-init-lr: 1e-7
-warmup-updates: 8000
-lr: 1e-3
-adam_betas: (0.9,0.997)
-
-criterion: label_smoothed_cross_entropy
-label_smoothing: 0.1
-
-dropout: 0.1
-attention-dropout: 0.1
-activation-dropout: 0.1
-
-activation-fn: relu
-encoder-normalize-before: True
-decoder-normalize-before: True
-encoder-embed-dim: 512
-encoder-ffn-embed-dim: 2048
-encoder-layers: 6
-decoder-layers: 6
-encoder-attention-heads: 8
-
-decoder-embed-dim: 512
-decoder-ffn-embed-dim: 2048
-decoder-attention-heads: 8
-
-encoder-attention-type: relative
-decoder-attention-type: relative
-max-relative-length: 20
\ No newline at end of file
--- a/egs/covost/mt/conf/train_rpr.yaml
+++ b/egs/covost/mt/conf/train_rpr.yaml
-train-subset: train
-valid-subset: valid
-
-max-epoch: 50
-max-update: 100000
-
-num-workers: 8
-patience: 10
-no-progress-bar: True
-log-interval: 100
-seed: 1
-report-accuracy: True
-skip-invalid-size-inputs-valid-test: True
-
-#load-pretrained-encoder-from:
-#load-pretrained-decoder-from:
-
-arch: transformer
-share-decoder-input-output-embed: True
-optimizer: adam
-clip-norm: 10.0
-lr-scheduler: inverse_sqrt
-warmup-init-lr: 1e-7
-warmup-updates: 8000
-lr: 1e-3
-adam_betas: (0.9,0.997)
-
-criterion: label_smoothed_cross_entropy
-label_smoothing: 0.1
-
-dropout: 0.1
-attention-dropout: 0.1
-activation-dropout: 0.1
-
-activation-fn: relu
-encoder-normalize-before: True
-decoder-normalize-before: True
-encoder-embed-dim: 512
-encoder-ffn-embed-dim: 2048
-encoder-layers: 6
-decoder-layers: 6
-encoder-attention-heads: 8
-
-decoder-embed-dim: 512
-decoder-ffn-embed-dim: 2048
-decoder-attention-heads: 8
-
-encoder-attention-type: relative
-decoder-attention-type: relative
-max-relative-length: 20
--- a/egs/covost/mt/conf/train_s.yaml
+++ b/egs/covost/mt/conf/train_s.yaml
-train-subset: train
-valid-subset: valid
-
-max-epoch: 50
-max-update: 100000
-
-num-workers: 8
-patience: 10
-no-progress-bar: True
-log-interval: 100
-seed: 1
-report-accuracy: True
-skip-invalid-size-inputs-valid-test: True
-
-#load-pretrained-encoder-from:
-#load-pretrained-decoder-from:
-
-arch: transformer
-share-decoder-input-output-embed: True
-optimizer: adam
-clip-norm: 10.0
-lr-scheduler: inverse_sqrt
-warmup-init-lr: 1e-7
-warmup-updates: 8000
-lr: 1e-3
-adam_betas: (0.9,0.997)
-
-criterion: label_smoothed_cross_entropy
-label_smoothing: 0.1
-
-dropout: 0.1
-attention-dropout: 0.1
-activation-dropout: 0.1
-
-activation-fn: relu
-encoder-normalize-before: True
-decoder-normalize-before: True
-encoder-embed-dim: 256
-encoder-ffn-embed-dim: 2048
-encoder-layers: 6
-decoder-layers: 6
-encoder-attention-heads: 4
-
-decoder-embed-dim: 256
-decoder-ffn-embed-dim: 2048
-decoder-attention-heads: 4
--- a/egs/covost/mt/decode.sh
+++ b/egs/covost/mt/decode.sh
-#! /bin/bash
-
-gpu_num=1
-
-data_dir=
-test_subset=test
-
-exp_name=
-if [ "$#" -eq 1 ]; then
-    exp_name=$1
-fi
-
-n_average=10
-beam_size=5
-max_tokens=20000
-
-cmd="./run.sh
-    --stage 2
-    --stop_stage 2
-    --gpu_num ${gpu_num}
-    --exp_name ${exp_name}
-    --n_average ${n_average}
-    --beam_size ${beam_size}
-    --max_tokens ${max_tokens}
-    "
-
-if [[ -n ${data_dir} ]]; then
-    cmd="$cmd --data_dir ${data_dir}"
-fi
-if [[ -n ${test_subset} ]]; then
-    cmd="$cmd --test_subset ${test_subset}"
-fi
-
-echo $cmd
-eval $cmd
--- a/egs/covost/mt/local/lower_rm.py
+++ b/egs/covost/mt/local/lower_rm.py
-import sys
-import string
-
-
-in_file = sys.argv[1]
-
-with open(in_file, "r", encoding="utf-8") as f:
-    for line in f.readlines():
-        line = line.strip().lower()
-        for w in string.punctuation:
-            line = line.replace(w, "")
-        line = line.replace("  ", "")
-        print(line)
-
--- a/egs/covost/mt/local/monitor.sh
+++ b/egs/covost/mt/local/monitor.sh
-gpu_num=1
-
-while :
-do
-    all_devices=$(seq 0 `gpustat | sed '1,2d' | wc -l`);
-    count=0
-    for dev in ${all_devices[@]}
-    do
-        line=`expr $dev + 2`
-        use=`gpustat -p | head -n $line | tail -1 | cut -d '|' -f4 | wc -w`
-        if [[ $use -eq 0 ]]; then
-            device[$count]=$dev
-            count=`expr $count + 1`
-            if [[ $count -eq $gpu_num ]]; then
-                break
-            fi
-        fi
-    done
-    if [[ ${#device[@]} -lt $gpu_num ]]; then
-        sleep 60s
-    else
-        echo "Run $cmd"
-        eval $cmd
-        sleep 10s
-        exit
-    fi
-done
--- a/egs/covost/mt/local/parse_options.sh
+++ b/egs/covost/mt/local/parse_options.sh
-#!/usr/bin/env bash
-
-# Copyright 2012  Johns Hopkins University (Author: Daniel Povey);
-#                 Arnab Ghoshal, Karel Vesely
-
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#  http://www.apache.org/licenses/LICENSE-2.0
-#
-# THIS CODE IS PROVIDED *AS IS* BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-# KIND, EITHER EXPRESS OR IMPLIED, INCLUDING WITHOUT LIMITATION ANY IMPLIED
-# WARRANTIES OR CONDITIONS OF TITLE, FITNESS FOR A PARTICULAR PURPOSE,
-# MERCHANTABLITY OR NON-INFRINGEMENT.
-# See the Apache 2 License for the specific language governing permissions and
-# limitations under the License.
-
-
-# Parse command-line options.
-# To be sourced by another script (as in ". parse_options.sh").
-# Option format is: --option-name arg
-# and shell variable "option_name" gets set to value "arg."
-# The exception is --help, which takes no arguments, but prints the
-# $help_message variable (if defined).
-
-
-###
-### The --config file options have lower priority to command line
-### options, so we need to import them first...
-###
-
-# Now import all the configs specified by command-line, in left-to-right order
-for ((argpos=1; argpos<$#; argpos++)); do
-  if [ "${!argpos}" == "--config" ]; then
-    argpos_plus1=$((argpos+1))
-    config=${!argpos_plus1}
-    [ ! -r $config ] && echo "$0: missing config '$config'" && exit 1
-    . $config  # source the config file.
-  fi
-done
-
-
-###
-### Now we process the command line options
-###
-while true; do
-  [ -z "${1:-}" ] && break;  # break if there are no arguments
-  case "$1" in
-    # If the enclosing script is called with --help option, print the help
-    # message and exit.  Scripts should put help messages in $help_message
-    --help|-h) if [ -z "$help_message" ]; then echo "No help found." 1>&2;
-      else printf "$help_message\n" 1>&2 ; fi;
-      exit 0 ;;
-    --*=*) echo "$0: options to scripts must be of the form --name value, got '$1'"
-      exit 1 ;;
-    # If the first command-line argument begins with "--" (e.g. --foo-bar),
-    # then work out the variable name as $name, which will equal "foo_bar".
-    --*) name=`echo "$1" | sed s/^--// | sed s/-/_/g`;
-      # Next we test whether the variable in question is undefned-- if so it's
-      # an invalid option and we die.  Note: $0 evaluates to the name of the
-      # enclosing script.
-      # The test [ -z ${foo_bar+xxx} ] will return true if the variable foo_bar
-      # is undefined.  We then have to wrap this test inside "eval" because
-      # foo_bar is itself inside a variable ($name).
-      eval '[ -z "${'$name'+xxx}" ]' && echo "$0: invalid option $1" 1>&2 && exit 1;
-
-      oldval="`eval echo \\$$name`";
-      # Work out whether we seem to be expecting a Boolean argument.
-      if [ "$oldval" == "true" ] || [ "$oldval" == "false" ]; then
-        was_bool=true;
-      else
-        was_bool=false;
-      fi
-
-      # Set the variable to the right value-- the escaped quotes make it work if
-      # the option had spaces, like --cmd "queue.pl -sync y"
-      eval $name=\"$2\";
-
-      # Check that Boolean-valued arguments are really Boolean.
-      if $was_bool && [[ "$2" != "true" && "$2" != "false" ]]; then
-        echo "$0: expected \"true\" or \"false\": $1 $2" 1>&2
-        exit 1;
-      fi
-      shift 2;
-      ;;
-  *) break;
-  esac
-done
-
-
-# Check for an empty argument to the --cmd option, which can easily occur as a
-# result of scripting errors.
-[ ! -z "${cmd+xxx}" ] && [ -z "$cmd" ] && echo "$0: empty argument to --cmd option" 1>&2 && exit 1;
-
-
-true; # so this script returns exit code 0.
--- a/egs/covost/mt/local/path.sh
+++ b/egs/covost/mt/local/path.sh
-MAIN_ROOT=$PWD/../../..
-KALDI_ROOT=$MAIN_ROOT/tools/kaldi
-
-export PATH=$PWD/utils/:$KALDI_ROOT/tools/openfst/bin:$PATH
-[ ! -f $KALDI_ROOT/tools/config/common_path.sh ] && echo >&2 "The standard file $KALDI_ROOT/tools/config/common_path.sh is not present -> Exit!" && exit 1
-. $KALDI_ROOT/tools/config/common_path.sh
-export LC_ALL=C
-
-export LD_LIBRARY_PATH=${LD_LIBRARY_PATH}:$MAIN_ROOT/src/lib
-export LD_LIBRARY_PATH=${LD_LIBRARY_PATH}:$MAIN_ROOT/tools/chainer_ctc/ext/warp-ctc/build
-. "${MAIN_ROOT}"/tools/activate_python.sh && . "${MAIN_ROOT}"/tools/extra_path.sh
-export PATH=$MAIN_ROOT/utils:$MAIN_ROOT/espnet/bin:$PATH
-
-export OMP_NUM_THREADS=1
-
-# check extra module installation
-if ! which tokenizer.perl > /dev/null; then
-    echo "Error: it seems that moses is not installed." >&2
-    echo "Error: please install moses as follows." >&2
-    echo "Error: cd ${MAIN_ROOT}/tools && make moses.done" >&2
-    return 1
-fi
-
-# NOTE(kan-bayashi): Use UTF-8 in Python to avoid UnicodeDecodeError when LC_ALL=C
-export PYTHONIOENCODING=UTF-8
--- a/egs/covost/mt/local/utils.sh
+++ b/egs/covost/mt/local/utils.sh
-
-get_devices(){
-    gpu_num=$1
-    use_cpu=$2
-    device=()
-    while :
-    do
-        record=`mktemp -t temp.record.XXXXXX`
-        gpustat > $record
-        all_devices=$(seq 0 `cat $record | sed '1,2d' | wc -l`);
-        count=0
-        for dev in ${all_devices[@]}
-        do
-            line=`expr $dev + 2`
-            use=`cat $record | head -n $line | tail -1 | cut -d '|' -f3 | cut -d '/' -f1`
-            if [[ $use -lt 100 ]]; then
-                device[$count]=$dev
-                count=`expr $count + 1`
-                if [[ $count -eq $gpu_num ]]; then
-                    break
-                fi
-            fi
-        done
-        if [[ ${#device[@]} -lt $gpu_num ]]; then
-            if [[ $use_cpu -eq 1 ]]; then
-                device=(-1)
-            else
-                sleep 60s
-            fi
-        else
-            break
-        fi
-    done
-
-    echo ${device[*]} | sed 's/ /,/g'
-    return $?
-}
-
-
--- a/egs/covost/mt/run.sh
+++ b/egs/covost/mt/run.sh
-#! /bin/bash
-
-# Processing MuST-C Datasets
-
-# Copyright 2021 Natural Language Processing Laboratory 
-# Xu Chen (xuchenneu@163.com)
-
-# Set bash to 'debug' mode, it will exit on :
-# -e 'error', -u 'undefined variable', -o ... 'error in pipeline', -x 'print commands',
-set -e
-#set -u
-set -o pipefail
-export PYTHONIOENCODING=UTF-8
-
-eval=1
-time=$(date "+%m%d_%H%M")
-
-stage=0
-stop_stage=0
-
-######## hardware ########
-# devices
-#device=()
-gpu_num=8
-update_freq=1
-
-root_dir=~/st/Fairseq-S2T
-pwd_dir=$PWD
-
-# dataset
-src_lang=en
-tgt_lang=de
-lang=${src_lang}-${tgt_lang}
-
-dataset=mustc
-task=translation
-vocab_type=unigram
-vocab_size=10000
-share_dict=1
-lc_rm=1
-
-use_specific_dict=1
-specific_prefix=st_share10k_lcrm
-specific_dir=/home/xuchen/st/data/mustc/st_lcrm/en-de
-src_vocab_prefix=spm_unigram10000_st_share
-tgt_vocab_prefix=spm_unigram10000_st_share
-
-org_data_dir=/media/data/${dataset}
-data_dir=~/st/data/${dataset}/mt/${lang}
-train_subset=train
-valid_subset=dev
-test_subset=tst-COMMON
-trans_set=test
-
-# exp
-extra_tag=
-extra_parameter=
-exp_tag=baseline
-exp_name=
-
-# config
-train_config=train.yaml
-
-# training setting
-fp16=1
-max_tokens=4096
-step_valid=0
-bleu_valid=0
-
-# decoding setting
-n_average=10
-beam_size=5
-
-if [[ ${use_specific_dict} -eq 1 ]]; then
-    exp_tag=${specific_prefix}_${exp_tag}
-    data_dir=${data_dir}/${specific_prefix}
-    mkdir -p ${data_dir}
-else
-    data_dir=${data_dir}/${vocab_type}${vocab_size}
-    src_vocab_prefix=spm_${vocab_type}${vocab_size}_${src_lang}
-    tgt_vocab_prefix=spm_${vocab_type}${vocab_size}_${tgt_lang}
-    if [[ $share_dict -eq 1 ]]; then
-        data_dir=${data_dir}_share
-        src_vocab_prefix=spm_${vocab_type}${vocab_size}_share
-        tgt_vocab_prefix=spm_${vocab_type}${vocab_size}_share
-    fi
-fi
-
-. ./local/parse_options.sh || exit 1;
-
-# full path
-train_config=$pwd_dir/conf/${train_config}
-if [[ -z ${exp_name} ]]; then
-    exp_name=$(basename ${train_config%.*})_${exp_tag}
-    if [[ -n ${extra_tag} ]]; then
-        exp_name=${exp_name}_${extra_tag}
-    fi
-fi
-model_dir=$root_dir/../checkpoints/$dataset/mt/${exp_name}
-
-if [ ${stage} -le -1 ] && [ ${stop_stage} -ge -1 ]; then
-    echo "stage -1: Data Download"
-    # pass
-fi
-
-if [ ${stage} -le 0 ] && [ ${stop_stage} -ge 0 ]; then
-    ### Task dependent. You have to make data the following preparation part by yourself.
-    echo "stage 0: MT Data Preparation"
-    if [[ ! -e ${data_dir} ]]; then
-        mkdir -p ${data_dir}
-    fi
-
-    if [[ ! -f ${data_dir}/${src_vocab_prefix}.txt || ! -f ${data_dir}/${tgt_vocab_prefix}.txt ]]; then
-        if [[ ${use_specific_dict} -eq 0 ]]; then
-            cmd="python ${root_dir}/examples/speech_to_text/prep_mt_data.py
-                --data-root ${org_data_dir}
-                --output-root ${data_dir}
-                --splits ${train_subset},${valid_subset},${test_subset}
-                --src-lang ${src_lang}
-                --tgt-lang ${tgt_lang}
-                --vocab-type ${vocab_type}
-                --vocab-size ${vocab_size}"
-            if [[ $share_dict -eq 1 ]]; then
-                cmd="$cmd
-                --share"
-            fi
-            echo -e "\033[34mRun command: \n${cmd} \033[0m"
-            [[ $eval -eq 1 ]] && eval ${cmd}
-        else
-            cp -r ${specific_dir}/${src_vocab_prefix}.* ${data_dir}
-            cp ${specific_dir}/${tgt_vocab_prefix}.* ${data_dir}
-        fi
-    fi
-
-    mkdir -p ${data_dir}/data
-    for split in ${train_subset} ${valid_subset} ${test_subset}; do
-    {
-        cmd="cat ${org_data_dir}/${lang}/data/${split}.${src_lang}"
-        if [[ ${lc_rm} -eq 1 ]]; then
-            cmd="python local/lower_rm.py ${org_data_dir}/${lang}/data/${split}.${src_lang}"
-        fi
-        cmd="${cmd}
-        | spm_encode --model ${data_dir}/${src_vocab_prefix}.model
-        --output_format=piece
-        > ${data_dir}/data/${split}.${src_lang}"
-
-        echo -e "\033[34mRun command: \n${cmd} \033[0m"
-        [[ $eval -eq 1 ]] && eval ${cmd}
-
-        cmd="spm_encode
-        --model ${data_dir}/${tgt_vocab_prefix}.model
-        --output_format=piece
-        < ${org_data_dir}/${lang}/data/${split}.${tgt_lang}
-        > ${data_dir}/data/${split}.${tgt_lang}"
-
-        echo -e "\033[34mRun command: \n${cmd} \033[0m"
-        [[ $eval -eq 1 ]] && eval ${cmd}
-    }&
-    done
-    wait
-
-    cmd="python ${root_dir}/fairseq_cli/preprocess.py
-        --source-lang ${src_lang} --target-lang ${tgt_lang}
-        --trainpref ${data_dir}/data/${train_subset}
-        --validpref ${data_dir}/data/${valid_subset}
-        --testpref ${data_dir}/data/${test_subset}
-        --destdir ${data_dir}/data-bin
-        --srcdict ${data_dir}/${src_vocab_prefix}.txt
-        --tgtdict ${data_dir}/${tgt_vocab_prefix}.txt
-        --workers 64"
-
-    echo -e "\033[34mRun command: \n${cmd} \033[0m"
-    [[ $eval -eq 1 ]] && eval ${cmd}
-fi
-
-data_dir=${data_dir}/data-bin
-
-if [ ${stage} -le 1 ] && [ ${stop_stage} -ge 1 ]; then
-    echo "stage 1: MT Network Training"
-    [[ ! -d ${data_dir} ]] && echo "The data dir ${data_dir} is not existing!" && exit 1;
-
-    if [[ -z ${device} || ${#device[@]} -eq 0 ]]; then
-		if [[ ${gpu_num} -eq 0 ]]; then
-			device=()
-		else
-        	source ./local/utils.sh
-        	device=$(get_devices $gpu_num 0)
-		fi
-    fi
-
-    echo -e "dev=${device} data=${data_dir} model=${model_dir}"
-
-    if [[ ! -d ${model_dir} ]]; then
-        mkdir -p ${model_dir}
-    else
-        echo "${model_dir} exists."
-    fi
-
-    cp ${BASH_SOURCE[0]} ${model_dir}
-    cp ${PWD}/train.sh ${model_dir}
-    cp ${train_config} ${model_dir}
-
-    cmd="python3 -u ${root_dir}/fairseq_cli/train.py
-        ${data_dir}
-        --source-lang ${src_lang}
-        --target-lang ${tgt_lang}
-        --train-config ${train_config}
-        --task ${task}
-        --max-tokens ${max_tokens}
-        --update-freq ${update_freq}
-        --log-interval 100
-        --save-dir ${model_dir}
-        --tensorboard-logdir ${model_dir}"
-
-    if [[ -n ${extra_parameter} ]]; then
-        cmd="${cmd}
-        ${extra_parameter}"
-    fi
-	if [[ ${gpu_num} -gt 0 ]]; then
-		cmd="${cmd}
-        --distributed-world-size $gpu_num
-        --ddp-backend no_c10d"
-	fi
-    if [[ $fp16 -eq 1 ]]; then
-        cmd="${cmd}
-        --fp16"
-    fi
-    if [[ $step_valid -eq 1 ]]; then
-        validate_interval=1
-        save_interval=1
-        keep_last_epochs=10
-        no_epoch_checkpoints=0
-        save_interval_updates=10000
-        keep_interval_updates=10
-    else
-        validate_interval=1
-        keep_last_epochs=10
-    fi
-    if [[ $bleu_valid -eq 1 ]]; then
-        cmd="$cmd
-        --eval-bleu
-        --eval-bleu-args '{\"beam\": 1}'
-        --eval-tokenized-bleu
-        --eval-bleu-remove-bpe
-        --best-checkpoint-metric bleu
-        --maximize-best-checkpoint-metric"
-    fi
-    if [[ -n $no_epoch_checkpoints && $no_epoch_checkpoints -eq 1 ]]; then
-        cmd="$cmd
-        --no-epoch-checkpoints"
-    fi
-    if [[ -n $validate_interval ]]; then
-        cmd="${cmd}
-        --validate-interval $validate_interval "
-    fi
-    if [[ -n $save_interval ]]; then
-        cmd="${cmd}
-        --save-interval $save_interval "
-    fi
-    if [[ -n $keep_last_epochs ]]; then
-        cmd="${cmd}
-        --keep-last-epochs $keep_last_epochs "
-    fi
-    if [[ -n $save_interval_updates ]]; then
-        cmd="${cmd}
-        --save-interval-updates $save_interval_updates"
-        if [[ -n $keep_interval_updates ]]; then
-        cmd="${cmd}
-        --keep-interval-updates $keep_interval_updates"
-        fi
-    fi
-
-    echo -e "\033[34mRun command: \n${cmd} \033[0m"
-
-    # save info
-    log=./history.log
-    echo "${time} | ${device} | ${data_dir} | ${model_dir} " >> $log
-    cat $log | tail -n 50 > tmp.log
-    mv tmp.log $log
-    export CUDA_VISIBLE_DEVICES=${device}
-
-    cmd="nohup ${cmd} >> ${model_dir}/train.log 2>&1 &"
-    if [[ $eval -eq 1 ]]; then
-		eval $cmd
-		sleep 2s
-		tail -n `wc -l ${model_dir}/train.log | awk '{print $1+1}'` -f ${model_dir}/train.log
-	fi
-fi
-wait
-
-if [ ${stage} -le 2 ] && [ ${stop_stage} -ge 2 ]; then
-    echo "stage 2: MT Decoding"
-    if [[ ${n_average} -ne 1 ]]; then
-        # Average models
-		dec_model=avg_${n_average}_checkpoint.pt
-
-		cmd="python ${root_dir}/scripts/average_checkpoints.py
-        --inputs ${model_dir}
-        --num-epoch-checkpoints ${n_average}
-        --output ${model_dir}/${dec_model}"
-    	echo -e "\033[34mRun command: \n${cmd} \033[0m"
-    	[[ $eval -eq 1 ]] && eval $cmd
-	else
-		dec_model=checkpoint_best.pt
-	fi
-
-    if [[ -z ${device} || ${#device[@]} -eq 0 ]]; then
-		if [[ ${gpu_num} -eq 0 ]]; then
-			device=()
-		else
-        	source ./local/utils.sh
-        	device=$(get_devices $gpu_num 0)
-		fi
-    fi
-    export CUDA_VISIBLE_DEVICES=${device}
-
-	#tmp_file=$(mktemp ${model_dir}/tmp-XXXXX)
-	#trap 'rm -rf ${tmp_file}' EXIT
-	result_file=${model_dir}/decode_result
-	[[ -f ${result_file} ]] && rm ${result_file}
-
-    trans_set=(${trans_set//,/ })
-	for subset in ${trans_set[@]}; do
-  		cmd="python ${root_dir}/fairseq_cli/generate.py
-        ${data_dir}
-        --source-lang ${src_lang}
-        --target-lang ${tgt_lang}
-        --gen-subset ${subset}
-        --task ${task}
-        --path ${model_dir}/${dec_model}
-        --results-path ${model_dir}
-        --max-tokens ${max_tokens}
-        --beam ${beam_size}
-        --post-process sentencepiece
-        --tokenizer moses
-        --moses-source-lang ${src_lang}
-        --moses-target-lang ${tgt_lang}
-        --scoring sacrebleu"
-    	echo -e "\033[34mRun command: \n${cmd} \033[0m"
-
-        if [[ $eval -eq 1 ]]; then
-    	    eval $cmd
-    	    tail -n 1 ${model_dir}/generate-${subset}.txt >> ${result_file}
-        fi
-	done
-    cat ${result_file}
-fi
--- a/egs/covost/mt/train.sh
+++ b/egs/covost/mt/train.sh
-#! /bin/bash
-
-# training the model
-
-gpu_num=1
-update_freq=1
-max_tokens=4096
-
-extra_tag=
-extra_parameter=
-
-#extra_tag="${extra_tag}"
-#extra_parameter="${extra_parameter} "
-
-exp_tag=baseline
-train_config=train.yaml
-
-cmd="./run.sh
-    --stage 1
-    --stop_stage 1
-    --gpu_num ${gpu_num}
-    --update_freq ${update_freq}
-    --train_config ${train_config}
-    --max_tokens ${max_tokens}
-    "
-
-if [[ -n ${exp_tag} ]]; then
-    cmd="$cmd --exp_tag ${exp_tag}"
-fi
-if [[ -n ${extra_tag} ]]; then
-    cmd="$cmd --extra_tag ${extra_tag}"
-fi
-if [[ -n ${extra_parameter} ]]; then
-    cmd="$cmd --extra_parameter \"${extra_parameter}\""
-fi
-
-echo $cmd
-eval $cmd
--- a/egs/covost/st/conf/train_ctc_debug.yaml
+++ b/egs/covost/st/conf/train_ctc_debug.yaml
-train-subset: train_st,train_v2
-valid-subset: dev_st
-
-max-epoch: 50
-max-update: 100000
-
-num-workers: 8
-patience: 10
-no-progress-bar: True
-log-interval: 100
-seed: 1
-report-accuracy: True
-
-#load-pretrained-encoder-from: /home/xuchen/st/checkpoints/mustc/asr/train_ctc_baseline/avg_10_checkpoint.pt
-#load-pretrained-decoder-from: /home/xuchen/st/checkpoints/mustc/mt/train_baseline/avg_10_checkpoint.pt
-
-arch: s2t_transformer_s
-share-decoder-input-output-embed: True
-optimizer: adam
-clip-norm: 10.0
-lr-scheduler: inverse_sqrt
-warmup-init-lr: 1e-7
-warmup-updates: 10000
-lr: 2e-3
-#adam_betas: (0.9,0.98)
-
-ctc-weight: 0.3
-criterion: label_smoothed_cross_entropy_with_ctc
-label_smoothing: 0.1
-
-conv-kernel-sizes: 5,5
-conv-channels: 1024
-dropout: 0.1
-activation-fn: relu
-encoder-embed-dim: 256
-encoder-ffn-embed-dim: 2048
-encoder-layers: 12
-decoder-layers: 6
-encoder-attention-heads: 4
-
-#decoder-embed-dim: 256
-#decoder-ffn-embed-dim: 2048
-#decoder-attention-heads: 4
-#attention-dropout: 0.1
-#activation-dropout: 0.1
--- a/egs/covost/st/decode.sh
+++ b/egs/covost/st/decode.sh
-#! /bin/bash
-
-gpu_num=1
-
-data_dir=
-test_subset=tst-COMMON
-
-exp_name=
-if [ "$#" -eq 1 ]; then
-    exp_name=$1
-fi
-
-n_average=10
-beam_size=5
-max_tokens=40000
-
-cmd="./run.sh
-    --stage 2
-    --stop_stage 2
-    --gpu_num ${gpu_num}
-    --exp_name ${exp_name}
-    --n_average ${n_average}
-    --beam_size ${beam_size}
-    --max_tokens ${max_tokens}
-    "
-
-if [[ -n ${data_dir} ]]; then
-    cmd="$cmd --data_dir ${data_dir}"
-fi
-if [[ -n ${test_subset} ]]; then
-    cmd="$cmd --test_subset ${test_subset}"
-fi
-
-echo $cmd
-eval $cmd
--- a/egs/covost/st/local/monitor.sh
+++ b/egs/covost/st/local/monitor.sh
-gpu_num=1
-
-while :
-do
-    all_devices=$(seq 0 `gpustat | sed '1,2d' | wc -l`);
-    count=0
-    for dev in ${all_devices[@]}
-    do
-        line=`expr $dev + 2`
-        use=`gpustat -p | head -n $line | tail -1 | cut -d '|' -f4 | wc -w`
-        if [[ $use -eq 0 ]]; then
-            device[$count]=$dev
-            count=`expr $count + 1`
-            if [[ $count -eq $gpu_num ]]; then
-                break
-            fi
-        fi
-    done
-    if [[ ${#device[@]} -lt $gpu_num ]]; then
-        sleep 60s
-    else
-        echo "Run $cmd"
-        eval $cmd
-        sleep 10s
-        exit
-    fi
-done
--- a/egs/covost/st/local/parse_options.sh
+++ b/egs/covost/st/local/parse_options.sh
-#!/usr/bin/env bash
-
-# Copyright 2012  Johns Hopkins University (Author: Daniel Povey);
-#                 Arnab Ghoshal, Karel Vesely
-
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#  http://www.apache.org/licenses/LICENSE-2.0
-#
-# THIS CODE IS PROVIDED *AS IS* BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-# KIND, EITHER EXPRESS OR IMPLIED, INCLUDING WITHOUT LIMITATION ANY IMPLIED
-# WARRANTIES OR CONDITIONS OF TITLE, FITNESS FOR A PARTICULAR PURPOSE,
-# MERCHANTABLITY OR NON-INFRINGEMENT.
-# See the Apache 2 License for the specific language governing permissions and
-# limitations under the License.
-
-
-# Parse command-line options.
-# To be sourced by another script (as in ". parse_options.sh").
-# Option format is: --option-name arg
-# and shell variable "option_name" gets set to value "arg."
-# The exception is --help, which takes no arguments, but prints the
-# $help_message variable (if defined).
-
-
-###
-### The --config file options have lower priority to command line
-### options, so we need to import them first...
-###
-
-# Now import all the configs specified by command-line, in left-to-right order
-for ((argpos=1; argpos<$#; argpos++)); do
-  if [ "${!argpos}" == "--config" ]; then
-    argpos_plus1=$((argpos+1))
-    config=${!argpos_plus1}
-    [ ! -r $config ] && echo "$0: missing config '$config'" && exit 1
-    . $config  # source the config file.
-  fi
-done
-
-
-###
-### Now we process the command line options
-###
-while true; do
-  [ -z "${1:-}" ] && break;  # break if there are no arguments
-  case "$1" in
-    # If the enclosing script is called with --help option, print the help
-    # message and exit.  Scripts should put help messages in $help_message
-    --help|-h) if [ -z "$help_message" ]; then echo "No help found." 1>&2;
-      else printf "$help_message\n" 1>&2 ; fi;
-      exit 0 ;;
-    --*=*) echo "$0: options to scripts must be of the form --name value, got '$1'"
-      exit 1 ;;
-    # If the first command-line argument begins with "--" (e.g. --foo-bar),
-    # then work out the variable name as $name, which will equal "foo_bar".
-    --*) name=`echo "$1" | sed s/^--// | sed s/-/_/g`;
-      # Next we test whether the variable in question is undefned-- if so it's
-      # an invalid option and we die.  Note: $0 evaluates to the name of the
-      # enclosing script.
-      # The test [ -z ${foo_bar+xxx} ] will return true if the variable foo_bar
-      # is undefined.  We then have to wrap this test inside "eval" because
-      # foo_bar is itself inside a variable ($name).
-      eval '[ -z "${'$name'+xxx}" ]' && echo "$0: invalid option $1" 1>&2 && exit 1;
-
-      oldval="`eval echo \\$$name`";
-      # Work out whether we seem to be expecting a Boolean argument.
-      if [ "$oldval" == "true" ] || [ "$oldval" == "false" ]; then
-        was_bool=true;
-      else
-        was_bool=false;
-      fi
-
-      # Set the variable to the right value-- the escaped quotes make it work if
-      # the option had spaces, like --cmd "queue.pl -sync y"
-      eval $name=\"$2\";
-
-      # Check that Boolean-valued arguments are really Boolean.
-      if $was_bool && [[ "$2" != "true" && "$2" != "false" ]]; then
-        echo "$0: expected \"true\" or \"false\": $1 $2" 1>&2
-        exit 1;
-      fi
-      shift 2;
-      ;;
-  *) break;
-  esac
-done
-
-
-# Check for an empty argument to the --cmd option, which can easily occur as a
-# result of scripting errors.
-[ ! -z "${cmd+xxx}" ] && [ -z "$cmd" ] && echo "$0: empty argument to --cmd option" 1>&2 && exit 1;
-
-
-true; # so this script returns exit code 0.
--- a/egs/covost/st/local/path.sh
+++ b/egs/covost/st/local/path.sh
-MAIN_ROOT=$PWD/../../..
-KALDI_ROOT=$MAIN_ROOT/tools/kaldi
-
-export PATH=$PWD/utils/:$KALDI_ROOT/tools/openfst/bin:$PATH
-[ ! -f $KALDI_ROOT/tools/config/common_path.sh ] && echo >&2 "The standard file $KALDI_ROOT/tools/config/common_path.sh is not present -> Exit!" && exit 1
-. $KALDI_ROOT/tools/config/common_path.sh
-export LC_ALL=C
-
-export LD_LIBRARY_PATH=${LD_LIBRARY_PATH}:$MAIN_ROOT/src/lib
-export LD_LIBRARY_PATH=${LD_LIBRARY_PATH}:$MAIN_ROOT/tools/chainer_ctc/ext/warp-ctc/build
-. "${MAIN_ROOT}"/tools/activate_python.sh && . "${MAIN_ROOT}"/tools/extra_path.sh
-export PATH=$MAIN_ROOT/utils:$MAIN_ROOT/espnet/bin:$PATH
-
-export OMP_NUM_THREADS=1
-
-# check extra module installation
-if ! which tokenizer.perl > /dev/null; then
-    echo "Error: it seems that moses is not installed." >&2
-    echo "Error: please install moses as follows." >&2
-    echo "Error: cd ${MAIN_ROOT}/tools && make moses.done" >&2
-    return 1
-fi
-
-# NOTE(kan-bayashi): Use UTF-8 in Python to avoid UnicodeDecodeError when LC_ALL=C
-export PYTHONIOENCODING=UTF-8
--- a/egs/covost/st/local/utils.sh
+++ b/egs/covost/st/local/utils.sh
-
-get_devices(){
-    gpu_num=$1
-    use_cpu=$2
-    device=()
-    while :
-    do
-        record=`mktemp -t temp.record.XXXXXX`
-        gpustat > $record
-        all_devices=$(seq 0 `cat $record | sed '1,2d' | wc -l`);
-        count=0
-        for dev in ${all_devices[@]}
-        do
-            line=`expr $dev + 2`
-            use=`cat $record | head -n $line | tail -1 | cut -d '|' -f3 | cut -d '/' -f1`
-            if [[ $use -lt 100 ]]; then
-                device[$count]=$dev
-                count=`expr $count + 1`
-                if [[ $count -eq $gpu_num ]]; then
-                    break
-                fi
-            fi
-        done
-        if [[ ${#device[@]} -lt $gpu_num ]]; then
-            if [[ $use_cpu -eq 1 ]]; then
-                device=(-1)
-            else
-                sleep 60s
-            fi
-        else
-            break
-        fi
-    done
-
-    echo ${device[*]} | sed 's/ /,/g'
-    return $?
-}
-
-
--- a/egs/covost/st/run.sh
+++ b/egs/covost/st/run.sh
-#! /bin/bash
-
-# Processing MuST-C Datasets
-
-# Copyright 2021 Natural Language Processing Laboratory 
-# Xu Chen (xuchenneu@163.com)
-
-# Set bash to 'debug' mode, it will exit on :
-# -e 'error', -u 'undefined variable', -o ... 'error in pipeline', -x 'print commands',
-set -e
-#set -u
-set -o pipefail
-export PYTHONIOENCODING=UTF-8
-
-eval=1
-time=$(date "+%m%d_%H%M")
-
-stage=0
-stop_stage=0
-
-######## hardware ########
-# devices
-#device=()
-gpu_num=8
-update_freq=1
-
-root_dir=~/st/Fairseq-S2T
-pwd_dir=$PWD
-
-# dataset
-src_lang=en
-tgt_lang=de
-lang=${src_lang}-${tgt_lang}
-
-dataset=mustc
-task=speech_to_text
-vocab_type=unigram
-asr_vocab_size=5000
-vocab_size=10000
-share_dict=1
-speed_perturb=0
-lcrm=1
-tokenizer=1
-
-use_specific_dict=0
-specific_prefix=valid
-specific_dir=/home/xuchen/st/data/mustc/st_lcrm/en-de
-asr_vocab_prefix=spm_unigram10000_st_share
-st_vocab_prefix=spm_unigram10000_st_share
-
-org_data_dir=/media/data/${dataset}
-data_dir=~/st/data/${dataset}/st
-test_subset=tst-COMMON
-
-# exp
-exp_prefix=${time}
-extra_tag=
-extra_parameter=
-exp_tag=baseline
-exp_name=
-
-# config
-train_config=train_ctc.yaml
-
-# training setting
-fp16=1
-max_tokens=40000
-step_valid=0
-bleu_valid=0
-
-# decoding setting
-n_average=10
-beam_size=5
-
-if [[ ${share_dict} -eq 1 ]]; then
-	data_config=config_st_share.yaml
-else
-	data_config=config_st.yaml
-fi
-if [[ ${speed_perturb} -eq 1 ]]; then
-    data_dir=${data_dir}_sp
-    exp_prefix=${exp_prefix}_sp
-fi
-if [[ ${lcrm} -eq 1 ]]; then
-    data_dir=${data_dir}_lcrm
-    exp_prefix=${exp_prefix}_lcrm
-fi
-if [[ ${use_specific_dict} -eq 1 ]]; then
-    data_dir=${data_dir}_${specific_prefix}
-    exp_prefix=${exp_prefix}_${specific_prefix}
-fi
-if [[ ${tokenizer} -eq 1 ]]; then
-    data_dir=${data_dir}_tok
-    exp_prefix=${exp_prefix}_tok
-fi
-
-. ./local/parse_options.sh || exit 1;
-
-# full path
-train_config=$pwd_dir/conf/${train_config}
-if [[ -z ${exp_name} ]]; then
-    exp_name=${exp_prefix}_$(basename ${train_config%.*})_${exp_tag}
-    if [[ -n ${extra_tag} ]]; then
-        exp_name=${exp_name}_${extra_tag}
-    fi
-fi
-model_dir=$root_dir/../checkpoints/$dataset/st/${exp_name}
-
-if [ ${stage} -le -1 ] && [ ${stop_stage} -ge -1 ]; then
-    echo "stage -1: Data Download"
-    # pass
-fi
-
-if [ ${stage} -le 0 ] && [ ${stop_stage} -ge 0 ]; then
-    ### Task dependent. You have to make data the following preparation part by yourself.
-    ### But you can utilize Kaldi recipes in most cases
-    echo "stage 0: ASR Data Preparation"
-    if [[ ! -e ${data_dir}/${lang} ]]; then
-        mkdir -p ${data_dir}/${lang}
-    fi
-    source ~/tools/audio/bin/activate
-
-    cmd="python ${root_dir}/examples/speech_to_text/prep_mustc_data.py
-        --data-root ${org_data_dir}
-        --output-root ${data_dir}
-        --task asr
-        --vocab-type ${vocab_type}
-        --vocab-size ${asr_vocab_size}"
-    if [[ ${speed_perturb} -eq 1 ]]; then
-        cmd="$cmd
-        --speed-perturb"
-    fi
-    echo -e "\033[34mRun command: \n${cmd} \033[0m"
-    [[ $eval -eq 1 && ${share_dict} -ne 1 && ${use_specific_dict} -ne 1 ]] && eval $cmd
-    asr_prefix=spm_${vocab_type}${asr_vocab_size}_asr
-
-    echo "stage 0: ST Data Preparation"
-    cmd="python ${root_dir}/examples/speech_to_text/prep_mustc_data.py
-        --data-root ${org_data_dir}
-        --output-root ${data_dir}
-        --task st
-        --add-src
-        --cmvn-type utterance
-        --vocab-type ${vocab_type}
-        --vocab-size ${vocab_size}"
-
-    if [[ ${use_specific_dict} -eq 1 ]]; then
-        cp -r ${specific_dir}/${asr_vocab_prefix}.* ./
-        cp -r ${specific_dir}/${st_vocab_prefix}.* ./
-        if [[ $share_dict -eq 1 ]]; then
-            cmd="$cmd
-        --share
-        --st-spm-prefix ${st_vocab_prefix}"
-        else
-            cmd="$cmd
-        --st-spm-prefix ${st_vocab_prefix}
-        --asr-prefix ${asr_vocab_prefix}"
-        fi
-    else
-        if [[ $share_dict -eq 1 ]]; then
-            cmd="$cmd
-        --share"
-        else
-            cmd="$cmd
-        --asr-prefix ${asr_prefix}"
-        fi
-    fi
-    if [[ ${speed_perturb} -eq 1 ]]; then
-        cmd="$cmd
-        --speed-perturb"
-    fi
-    if [[ ${lcrm} -eq 1 ]]; then
-        cmd="$cmd
-        --lowercase-src
-        --rm-punc-src"
-    fi
-    if [[ ${tokenizer} -eq 1 ]]; then
-        cmd="$cmd
-        --tokenizer"
-    fi
-
-    echo -e "\033[34mRun command: \n${cmd} \033[0m"
-    [[ $eval -eq 1 ]] && eval ${cmd}
-    deactivate
-fi
-
-data_dir=${data_dir}/${lang}
-
-if [ ${stage} -le 1 ] && [ ${stop_stage} -ge 1 ]; then
-    echo "stage 1: ST Network Training"
-    [[ ! -d ${data_dir} ]] && echo "The data dir ${data_dir} is not existing!" && exit 1;
-
-    if [[ -z ${device} || ${#device[@]} -eq 0 ]]; then
-		if [[ ${gpu_num} -eq 0 ]]; then
-			device=()
-		else
-        	source ./local/utils.sh
-        	device=$(get_devices $gpu_num 0)
-		fi
-    fi
-
-    echo -e "dev=${device} data=${data_dir} model=${model_dir}"
-
-    if [[ ! -d ${model_dir} ]]; then
-        mkdir -p ${model_dir}
-    else
-        echo "${model_dir} exists."
-    fi
-
-    cp ${BASH_SOURCE[0]} ${model_dir}
-    cp ${PWD}/train.sh ${model_dir}
-    cp ${train_config} ${model_dir}
-
-    cmd="python3 -u ${root_dir}/fairseq_cli/train.py
-        ${data_dir}
-        --config-yaml ${data_config}
-        --train-config ${train_config}
-        --task ${task}
-        --max-tokens ${max_tokens}
-        --update-freq ${update_freq}
-        --log-interval 100
-        --save-dir ${model_dir}
-        --tensorboard-logdir ${model_dir}"
-
-    if [[ -n ${extra_parameter} ]]; then
-        cmd="${cmd}
-        ${extra_parameter}"
-    fi
-	if [[ ${gpu_num} -gt 0 ]]; then
-		cmd="${cmd}
-        --distributed-world-size $gpu_num
-        --ddp-backend no_c10d"
-	fi
-    if [[ $fp16 -eq 1 ]]; then
-        cmd="${cmd}
-        --fp16"
-    fi
-    if [[ $step_valid -eq 1 ]]; then
-        validate_interval=10000
-        save_interval=10000
-        no_epoch_checkpoints=1
-        save_interval_updates=5000
-        keep_interval_updates=3
-    else
-        validate_interval=1
-        keep_last_epochs=10
-    fi
-    if [[ $bleu_valid -eq 1 ]]; then
-        cmd="$cmd
-        --eval-bleu
-        --eval-bleu-args '{\"beam\": 1}'
-        --eval-tokenized-bleu
-        --eval-bleu-remove-bpe
-        --best-checkpoint-metric bleu
-        --maximize-best-checkpoint-metric"
-    fi
-    if [[ -n $no_epoch_checkpoints && $no_epoch_checkpoints -eq 1 ]]; then
-        cmd="$cmd
-        --no-epoch-checkpoints"
-    fi
-    if [[ -n $validate_interval ]]; then
-        cmd="${cmd}
-        --validate-interval $validate_interval "
-    fi
-    if [[ -n $save_interval ]]; then
-        cmd="${cmd}
-        --save-interval $save_interval "
-    fi
-    if [[ -n $keep_last_epochs ]]; then
-        cmd="${cmd}
-        --keep-last-epochs $keep_last_epochs "
-    fi
-    if [[ -n $save_interval_updates ]]; then
-        cmd="${cmd}
-        --save-interval-updates $save_interval_updates"
-        if [[ -n $keep_interval_updates ]]; then
-        cmd="${cmd}
-        --keep-interval-updates $keep_interval_updates"
-        fi
-    fi
-
-    echo -e "\033[34mRun command: \n${cmd} \033[0m"
-
-    # save info
-    log=./history.log
-    echo "${time} | ${device} | ${data_dir} | ${model_dir} " >> $log
-    cat $log | tail -n 50 > tmp.log
-    mv tmp.log $log
-    export CUDA_VISIBLE_DEVICES=${device}
-
-    cmd="nohup ${cmd} >> ${model_dir}/train.log 2>&1 &"
-    if [[ $eval -eq 1 ]]; then
-		eval $cmd
-		sleep 2s
-		tail -n `wc -l ${model_dir}/train.log | awk '{print $1+1}'` -f ${model_dir}/train.log
-	fi
-fi
-wait
-
-if [ ${stage} -le 2 ] && [ ${stop_stage} -ge 2 ]; then
-    echo "stage 2: ST Decoding"
-    if [[ ${n_average} -ne 1 ]]; then
-        # Average models
-		dec_model=avg_${n_average}_checkpoint.pt
-
-		cmd="python ${root_dir}/scripts/average_checkpoints.py
-        --inputs ${model_dir}
-        --num-epoch-checkpoints ${n_average}
-        --output ${model_dir}/${dec_model}"
-    	echo -e "\033[34mRun command: \n${cmd} \033[0m"
-    	[[ $eval -eq 1 ]] && eval $cmd
-	else
-		dec_model=checkpoint_best.pt
-	fi
-
-    if [[ -z ${device} || ${#device[@]} -eq 0 ]]; then
-		if [[ ${gpu_num} -eq 0 ]]; then
-			device=()
-		else
-        	source ./local/utils.sh
-        	device=$(get_devices $gpu_num 0)
-		fi
-    fi
-    export CUDA_VISIBLE_DEVICES=${device}
-
-	#tmp_file=$(mktemp ${model_dir}/tmp-XXXXX)
-	#trap 'rm -rf ${tmp_file}' EXIT
-	result_file=${model_dir}/decode_result
-	[[ -f ${result_file} ]] && rm ${result_file}
-
-    test_subset=(${test_subset//,/ })
-	for subset in ${test_subset[@]}; do
-        subset=${subset}_st
-  		cmd="python ${root_dir}/fairseq_cli/generate.py
-        ${data_dir}
-        --config-yaml ${data_config}
-        --gen-subset ${subset}
-        --task speech_to_text
-        --path ${model_dir}/${dec_model}
-        --results-path ${model_dir}
-        --max-tokens ${max_tokens}
-        --beam ${beam_size}
-        --scoring sacrebleu"
-    	echo -e "\033[34mRun command: \n${cmd} \033[0m"
-
-        if [[ $eval -eq 1 ]]; then
-    	    eval $cmd
-    	    tail -n 1 ${model_dir}/generate-${subset}.txt >> ${result_file}
-        fi
-	done
-    cat ${result_file}
-fi
--- a/egs/covost/st/train.sh
+++ b/egs/covost/st/train.sh
-#! /bin/bash
-
-# training the model
-
-gpu_num=8
-update_freq=2
-max_tokens=20000
-
-extra_tag=lcrm
-extra_parameter=
-
-#extra_tag="${extra_tag}"
-#extra_parameter="${extra_parameter} "
-
-exp_tag=baseline
-train_config=train_ctc_sate.yaml
-#train_config=train_ctc.yaml
-
-cmd="./run.sh
-    --stage 1
-    --stop_stage 1
-    --gpu_num ${gpu_num}
-    --update_freq ${update_freq}
-    --train_config ${train_config}
-    --max_tokens ${max_tokens}
-    "
-
-if [[ -n ${exp_tag} ]]; then
-    cmd="$cmd --exp_tag ${exp_tag}"
-fi
-if [[ -n ${extra_tag} ]]; then
-    cmd="$cmd --extra_tag ${extra_tag}"
-fi
-if [[ -n ${extra_parameter} ]]; then
-    cmd="$cmd --extra_parameter \"${extra_parameter}\""
-fi
-
-echo $cmd
-eval $cmd
--- a/egs/librispeech/asr/conf/train.yaml
+++ b/egs/librispeech/asr/conf/train.yaml
-train-subset: train-clean-100,train-clean-360,train-other-500
-#train-subset: train-clean-100
-valid-subset: dev-clean
+train-subset: train_st
+valid-subset: dev_st

-max-epoch: 100
-max-update: 300000
+max-epoch: 50
+max-update: 100000

 num-workers: 8
 patience: 10
@@ -12,6 +11,9 @@ log-interval: 100
 seed: 1
 report-accuracy: True

+#load-pretrained-encoder-from:
+#load-pretrained-decoder-from:
+
 arch: s2t_transformer_s
 share-decoder-input-output-embed: True
 optimizer: adam
@@ -33,4 +35,10 @@ encoder-embed-dim: 256
 encoder-ffn-embed-dim: 2048
 encoder-layers: 12
 decoder-layers: 6
-encoder-attention-heads: 4
\ No newline at end of file
+encoder-attention-heads: 4
+
+decoder-embed-dim: 256
+decoder-ffn-embed-dim: 2048
+decoder-attention-heads: 4
+attention-dropout: 0.1
+activation-dropout: 0.1
--- a/egs/librispeech/asr/conf/train_conformer_l.yaml
+++ b/egs/librispeech/asr/conf/train_conformer_l.yaml
-train-subset: train-clean-100,train-clean-360,train-other-500
-valid-subset: dev-clean
-
-max-epoch: 100
-max-update: 300000
-
-num-workers: 8
-patience: 10
-no-progress-bar: True
-log-interval: 100
-seed: 1
-report-accuracy: True
-
-arch: s2t_conformer_l
-share-decoder-input-output-embed: True
-optimizer: adam
-clip-norm: 10.0
-lr-scheduler: inverse_sqrt
-warmup-init-lr: 1e-7
-warmup-updates: 10000
-lr: 2e-3
-#adam_betas: (0.9,0.98)
-
-criterion: label_smoothed_cross_entropy
-label_smoothing: 0.1
-
-conv-kernel-sizes: 5,5
-conv-channels: 1024
-
-macaron-style: True
-use-cnn-module: True
-cnn-module-kernel: 31
-
-#dropout: 0.1
-#activation-fn: relu
-#encoder-embed-dim: 256
-#encoder-ffn-embed-dim: 2048
-#encoder-layers: 12
-#decoder-layers: 6
-#encoder-attention-heads: 4
-
-#decoder-embed-dim: 256
-#decoder-ffn-embed-dim: 2048
-#decoder-attention-heads: 4
-#attention-dropout: 0.1
-#activation-dropout: 0.1
--- a/egs/librispeech/asr/conf/train_conformer_m.yaml
+++ b/egs/librispeech/asr/conf/train_conformer_m.yaml
-train-subset: train-clean-100,train-clean-360,train-other-500
-valid-subset: dev-clean
-
-max-epoch: 100
-max-update: 300000
-
-num-workers: 8
-patience: 10
-no-progress-bar: True
-log-interval: 100
-seed: 1
-report-accuracy: True
-
-arch: s2t_conformer_m
-share-decoder-input-output-embed: True
-optimizer: adam
-clip-norm: 10.0
-lr-scheduler: inverse_sqrt
-warmup-init-lr: 1e-7
-warmup-updates: 10000
-lr: 1e-3
-#adam_betas: (0.9,0.98)
-
-criterion: label_smoothed_cross_entropy
-label_smoothing: 0.1
-
-conv-kernel-sizes: 5,5
-conv-channels: 1024
-
-macaron-style: True
-use-cnn-module: True
-cnn-module-kernel: 31
-
-#dropout: 0.1
-#activation-fn: relu
-#encoder-embed-dim: 256
-#encoder-ffn-embed-dim: 2048
-#encoder-layers: 12
-#decoder-layers: 6
-#encoder-attention-heads: 4
-
-#decoder-embed-dim: 256
-#decoder-ffn-embed-dim: 2048
-#decoder-attention-heads: 4
-#attention-dropout: 0.1
-#activation-dropout: 0.1
--- a/egs/librispeech/asr/conf/train_ctc.yaml
+++ b/egs/librispeech/asr/conf/train_ctc.yaml
-train-subset: train-clean-100,train-clean-360,train-other-500
-#train-subset: train-clean-100
-valid-subset: dev-clean
+train-subset: train_st
+valid-subset: dev_st

-max-epoch: 100
-max-update: 300000
+max-epoch: 50
+max-update: 100000

 num-workers: 8
 patience: 10
@@ -12,6 +11,9 @@ log-interval: 100
 seed: 1
 report-accuracy: True

+#load-pretrained-encoder-from:
+#load-pretrained-decoder-from:
+
 arch: s2t_transformer_s
 share-decoder-input-output-embed: True
 optimizer: adam
@@ -36,8 +38,8 @@ encoder-layers: 12
 decoder-layers: 6
 encoder-attention-heads: 4

-#decoder-embed-dim: 256
-#decoder-ffn-embed-dim: 2048
-#decoder-attention-heads: 4
-#attention-dropout: 0.1
-#activation-dropout: 0.1
+decoder-embed-dim: 256
+decoder-ffn-embed-dim: 2048
+decoder-attention-heads: 4
+attention-dropout: 0.1
+activation-dropout: 0.1
--- a/egs/librispeech/asr/conf/train_ctc_conformer.yaml
+++ b/egs/librispeech/asr/conf/train_ctc_conformer.yaml
-train-subset: train-clean-100,train-clean-360,train-other-500
-valid-subset: dev-clean
+train-subset: train_st
+valid-subset: dev_st

-max-epoch: 100
-max-update: 300000
+max-epoch: 50
+max-update: 100000

 num-workers: 8
 patience: 10
@@ -11,6 +11,9 @@ log-interval: 100
 seed: 1
 report-accuracy: True

+#load-pretrained-encoder-from:
+#load-pretrained-decoder-from:
+
 arch: s2t_conformer_s
 share-decoder-input-output-embed: True
 optimizer: adam
@@ -27,20 +30,20 @@ label_smoothing: 0.1

 conv-kernel-sizes: 5,5
 conv-channels: 1024
+dropout: 0.1
+activation-fn: relu
+encoder-embed-dim: 256
+encoder-ffn-embed-dim: 2048
+encoder-layers: 12
+decoder-layers: 6
+encoder-attention-heads: 4
+
 macaron-style: True
 use-cnn-module: True
 cnn-module-kernel: 31

-#dropout: 0.1
-#activation-fn: relu
-#encoder-embed-dim: 256
-#encoder-ffn-embed-dim: 2048
-#encoder-layers: 12
-#decoder-layers: 6
-#encoder-attention-heads: 4
-
-#decoder-embed-dim: 256
-#decoder-ffn-embed-dim: 2048
-#decoder-attention-heads: 4
-#attention-dropout: 0.1
-#activation-dropout: 0.1
+decoder-embed-dim: 256
+decoder-ffn-embed-dim: 2048
+decoder-attention-heads: 4
+attention-dropout: 0.1
+activation-dropout: 0.1
--- a/egs/librispeech/asr/conf/train_ctc_conformer_m.yaml
+++ b/egs/librispeech/asr/conf/train_ctc_conformer_m.yaml
-train-subset: train-clean-100,train-clean-360,train-other-500
-valid-subset: dev-clean
-
-max-epoch: 100
-max-update: 300000
-
-num-workers: 8
-patience: 10
-no-progress-bar: True
-log-interval: 100
-seed: 1
-report-accuracy: True
-
-arch: s2t_conformer_m
-share-decoder-input-output-embed: True
-optimizer: adam
-clip-norm: 10.0
-lr-scheduler: inverse_sqrt
-warmup-init-lr: 1e-7
-warmup-updates: 10000
-lr: 1e-3
-#adam_betas: (0.9,0.98)
-
-ctc-weight: 0.3
-criterion: label_smoothed_cross_entropy_with_ctc
-label_smoothing: 0.1
-
-conv-kernel-sizes: 5,5
-conv-channels: 1024
-macaron-style: True
-use-cnn-module: True
-cnn-module-kernel: 31
-
-#dropout: 0.1
-#activation-fn: relu
-#encoder-embed-dim: 256
-#encoder-ffn-embed-dim: 2048
-#encoder-layers: 12
-#decoder-layers: 6
-#encoder-attention-heads: 4
-
-#decoder-embed-dim: 256
-#decoder-ffn-embed-dim: 2048
-#decoder-attention-heads: 4
-#attention-dropout: 0.1
-#activation-dropout: 0.1
\ No newline at end of file
--- a/egs/covost/asr/conf/train_ctc_conformer.yaml
+++ b/egs/covost/asr/conf/train_ctc_conformer.yaml
-train-subset: train_asr
-valid-subset: dev_asr
+train-subset: train_st
+valid-subset: dev_st

 max-epoch: 50
 max-update: 100000
@@ -42,6 +42,11 @@ macaron-style: True
 use-cnn-module: True
 cnn-module-kernel: 31

+encoder-attention-type: relative
+decoder-attention-type: relative
+max-encoder-relative-length: 100
+max-decoder-relative-length: 20
+
 #decoder-embed-dim: 256
 #decoder-ffn-embed-dim: 2048
 #decoder-attention-heads: 4

--- a/egs/covost/asr/conf/train_ctc.yaml
+++ b/egs/covost/asr/conf/train_ctc.yaml
-train-subset: train_asr
-valid-subset: dev_asr
+train-subset: train_st
+valid-subset: dev_st

 max-epoch: 50
 max-update: 100000
@@ -36,4 +36,15 @@ encoder-embed-dim: 256
 encoder-ffn-embed-dim: 2048
 encoder-layers: 12
 decoder-layers: 6
-encoder-attention-heads: 4
\ No newline at end of file
+encoder-attention-heads: 4
+
+encoder-attention-type: relative
+decoder-attention-type: relative
+max-encoder-relative-length: 100
+max-decoder-relative-length: 20
+
+decoder-embed-dim: 256
+decoder-ffn-embed-dim: 2048
+decoder-attention-heads: 4
+attention-dropout: 0.1
+activation-dropout: 0.1
--- a/egs/lower/st/conf/train_ctc_sate.yaml
+++ b/egs/lower/st/conf/train_ctc_sate.yaml
@@ -11,9 +11,10 @@ log-interval: 100
 seed: 1
 report-accuracy: True

-#load-pretrained-acoustic-encoder-from: /home/xuchen/st/checkpoints/mustc/asr/train_ctc_baseline/avg_10_checkpoint.pt
-#load-pretrained-text-encoder-from: /home/xuchen/st/checkpoints/mustc/mt/st_share10k_train_baseline/avg_10_checkpoint.pt
-#load-pretrained-decoder-from: /home/xuchen/st/checkpoints/mustc/mt/st_share10k_train_baseline/avg_10_checkpoint.pt
+#load-pretrained-encoder-from:
+#load-pretrained-acoustic-encoder-from:
+#load-pretrained-text-encoder-from:
+#load-pretrained-decoder-from:

 arch: s2t_sate
 share-decoder-input-output-embed: True

--- a/egs/covost/st/conf/train_ctc_sate_conformer.yaml
+++ b/egs/covost/st/conf/train_ctc_sate_conformer.yaml
@@ -11,9 +11,10 @@ log-interval: 100
 seed: 1
 report-accuracy: True

-#load-pretrained-acoustic-encoder-from: /home/xuchen/st/checkpoints/mustc/asr/train_ctc_baseline/avg_10_checkpoint.pt
-#load-pretrained-text-encoder-from: /home/xuchen/st/checkpoints/mustc/mt/st_share10k_train_baseline/avg_10_checkpoint.pt
-#load-pretrained-decoder-from: /home/xuchen/st/checkpoints/mustc/mt/st_share10k_train_baseline/avg_10_checkpoint.pt
+#load-pretrained-encoder-from:
+#load-pretrained-acoustic-encoder-from:
+#load-pretrained-text-encoder-from:
+#load-pretrained-decoder-from:

 arch: s2t_sate
 share-decoder-input-output-embed: True

--- a/egs/covost/st/conf/train_ctc_sate.yaml
+++ b/egs/covost/st/conf/train_ctc_sate.yaml
@@ -11,13 +11,10 @@ log-interval: 100
 seed: 1
 report-accuracy: True

-#load-pretrained-acoustic-encoder-from: /home/xuchen/st/checkpoints/mustc/asr/train_ctc_st_vocab/avg_10_checkpoint.pt
-#load-pretrained-text-encoder-from: /home/xuchen/st/checkpoints/mustc/mt/st_share10k_train_baseline/avg_10_checkpoint.pt
-#load-pretrained-decoder-from: /home/xuchen/st/checkpoints/mustc/mt/st_share10k_train_baseline/avg_10_checkpoint.pt
-
-#load-pretrained-acoustic-encoder-from: /home/xuchen/st/checkpoints/mustc/asr/train_ctc_baseline_lcrm/avg_10_checkpoint.pt
-#load-pretrained-text-encoder-from: /home/xuchen/st/Fairseq-S2T/../checkpoints/mustc/mt/train_st_share10k_lcrm_baseline/avg_10_checkpoint.pt
-#load-pretrained-decoder-from: /home/xuchen/st/Fairseq-S2T/../checkpoints/mustc/mt/train_st_share10k_lcrm_baseline/avg_10_checkpoint.pt
+#load-pretrained-encoder-from:
+#load-pretrained-acoustic-encoder-from:
+#load-pretrained-text-encoder-from:
+#load-pretrained-decoder-from:

 arch: s2t_sate
 share-decoder-input-output-embed: True
@@ -50,11 +47,16 @@ macaron-style: True
 use-cnn-module: True
 cnn-module-kernel: 31

-acoustic-encoder: transformer
+acoustic-encoder: conformer
 adapter: league

-#decoder-embed-dim: 256
-#decoder-ffn-embed-dim: 2048
-#decoder-attention-heads: 4
-#attention-dropout: 0.1
-#activation-dropout: 0.1
+encoder-attention-type: relative
+decoder-attention-type: relative
+max-encoder-relative-length: 100
+max-decoder-relative-length: 20
+
+decoder-embed-dim: 256
+decoder-ffn-embed-dim: 2048
+decoder-attention-heads: 4
+attention-dropout: 0.1
+activation-dropout: 0.1
--- a/egs/covost/st/conf/train_ctc.yaml
+++ b/egs/covost/st/conf/train_ctc.yaml
@@ -12,9 +12,11 @@ seed: 1
 report-accuracy: True

 #load-pretrained-encoder-from:
+#load-pretrained-acoustic-encoder-from:
+#load-pretrained-text-encoder-from:
 #load-pretrained-decoder-from:

-arch: s2t_transformer_s
+arch: s2t_sate
 share-decoder-input-output-embed: True
 optimizer: adam
 clip-norm: 10.0
@@ -28,6 +30,8 @@ ctc-weight: 0.3
 criterion: label_smoothed_cross_entropy_with_ctc
 label_smoothing: 0.1

+encoder-normalize-before: True
+decoder-normalize-before: True
 conv-kernel-sizes: 5,5
 conv-channels: 1024
 dropout: 0.1
@@ -35,9 +39,22 @@ activation-fn: relu
 encoder-embed-dim: 256
 encoder-ffn-embed-dim: 2048
 encoder-layers: 12
+text-encoder-layers: 6
 decoder-layers: 6
 encoder-attention-heads: 4

+macaron-style: True
+use-cnn-module: True
+cnn-module-kernel: 31
+
+acoustic-encoder: transformer
+adapter: league
+
+encoder-attention-type: relative
+decoder-attention-type: relative
+max-encoder-relative-length: 100
+max-decoder-relative-length: 20
+
 #decoder-embed-dim: 256
 #decoder-ffn-embed-dim: 2048
 #decoder-attention-heads: 4

--- a/egs/covost/st/conf/train_ctc_conformer_m.yaml
+++ b/egs/covost/st/conf/train_ctc_conformer_m.yaml
@@ -38,12 +38,21 @@ conv-channels: 1024
 #decoder-layers: 6
 #encoder-attention-heads: 4

-macaron-style: True
-use-cnn-module: True
-cnn-module-kernel: 31
-
 #decoder-embed-dim: 256
 #decoder-ffn-embed-dim: 2048
 #decoder-attention-heads: 4
 #attention-dropout: 0.1
 #activation-dropout: 0.1
+
+# conformer
+#macaron-style: True
+#use-cnn-module: True
+#cnn-module-kernel: 31
+
+# relative position encoding
+#encoder-attention-type: relative
+#decoder-attention-type: relative
+#max-encoder-relative-length: 100
+#max-decoder-relative-length: 20
+
+
--- a/egs/covost/st/conf/train.yaml
+++ b/egs/covost/st/conf/train.yaml
@@ -12,9 +12,11 @@ seed: 1
 report-accuracy: True

 #load-pretrained-encoder-from:
+#load-pretrained-acoustic-encoder-from:
+#load-pretrained-text-encoder-from:
 #load-pretrained-decoder-from:

-arch: s2t_transformer_s
+arch: s2t_sate
 share-decoder-input-output-embed: True
 optimizer: adam
 clip-norm: 10.0
@@ -27,6 +29,8 @@ lr: 2e-3
 criterion: label_smoothed_cross_entropy
 label_smoothing: 0.1

+encoder-normalize-before: True
+decoder-normalize-before: True
 conv-kernel-sizes: 5,5
 conv-channels: 1024
 dropout: 0.1
@@ -34,9 +38,17 @@ activation-fn: relu
 encoder-embed-dim: 256
 encoder-ffn-embed-dim: 2048
 encoder-layers: 12
+text-encoder-layers: 6
 decoder-layers: 6
 encoder-attention-heads: 4

+macaron-style: True
+use-cnn-module: True
+cnn-module-kernel: 31
+
+acoustic-encoder: transformer
+adapter: league
+
 #decoder-embed-dim: 256
 #decoder-ffn-embed-dim: 2048
 #decoder-attention-heads: 4

--- a/egs/covost/st/conf/train_sate_rpr.yaml
+++ b/egs/covost/st/conf/train_sate_rpr.yaml
@@ -11,9 +11,10 @@ log-interval: 100
 seed: 1
 report-accuracy: True

-load-pretrained-acoustic-encoder-from: /home/xuchen/st/checkpoints/mustc/asr/train_ctc_baseline_lcrm/avg_10_checkpoint.pt
-load-pretrained-text-encoder-from: /home/xuchen/st/Fairseq-S2T/../checkpoints/mustc/mt/train_st_share10k_lcrm_baseline/avg_10_checkpoint.pt
-load-pretrained-decoder-from: /home/xuchen/st/Fairseq-S2T/../checkpoints/mustc/mt/train_st_share10k_lcrm_baseline/avg_10_checkpoint.pt
+#load-pretrained-encoder-from:
+#load-pretrained-acoustic-encoder-from:
+#load-pretrained-text-encoder-from:
+#load-pretrained-decoder-from:

 arch: s2t_sate
 share-decoder-input-output-embed: True
@@ -49,8 +50,9 @@ acoustic-encoder: transformer
 adapter: league

 encoder-attention-type: relative
-#decoder-attention-type: relative
-max-relative-length: 100
+decoder-attention-type: relative
+max-encoder-relative-length: 100
+max-decoder-relative-length: 20

 #decoder-embed-dim: 256
 #decoder-ffn-embed-dim: 2048

--- a/egs/librispeech/asr/decode.sh
+++ b/egs/librispeech/asr/decode.sh
@@ -3,7 +3,7 @@
 gpu_num=1

 data_dir=
-test_subset=test-cleam,test-other
+test_subset=(test-cleam test-other)

 exp_name=
 if [ "$#" -eq 1 ]; then
@@ -12,7 +12,9 @@ fi

 n_average=10
 beam_size=5
-max_tokens=40000
+len_penalty=1.0
+max_tokens=10000
+dec_model=checkpoint_best.pt

 cmd="./run.sh
    --stage 2
@@ -21,13 +23,16 @@ cmd="./run.sh
    --exp_name ${exp_name}
    --n_average ${n_average}
    --beam_size ${beam_size}
+    --len_penalty ${len_penalty}
    --max_tokens ${max_tokens}
+    --dec_model ${dec_model}
    "

 if [[ -n ${data_dir} ]]; then
    cmd="$cmd --data_dir ${data_dir}"
 fi
 if [[ -n ${test_subset} ]]; then
+    test_subset=`echo ${test_subset[*]} | sed 's/ /,/g'`
    cmd="$cmd --test_subset ${test_subset}"
 fi


--- a/egs/librispeech/asr/run.sh
+++ b/egs/librispeech/asr/run.sh
@@ -37,11 +37,17 @@ vocab_type=unigram
 vocab_size=10000
 speed_perturb=0

+use_specific_dict=0
+specific_prefix=valid
+specific_dir=/home/xuchen/st/data/mustc/st_lcrm/en-de
+asr_vocab_prefix=spm_unigram10000_st_share
+
 org_data_dir=/media/data/${dataset}
 data_dir=~/st/data/${dataset}
 test_subset=dev-clean,dev-other,test-clean,test-other

 # exp
+exp_prefix=${time}
 extra_tag=
 extra_parameter=
 exp_tag=baseline
@@ -57,11 +63,18 @@ max_tokens=40000
 step_valid=0

 # decoding setting
+dec_model=checkpoint_best.pt
 n_average=10
 beam_size=5
+len_penalty=1.0

 if [[ ${speed_perturb} -eq 1 ]]; then
    data_dir=${data_dir}_sp
+    exp_prefix=${exp_prefix}_sp
+fi
+if [[ ${use_specific_dict} -eq 1 ]]; then
+    data_dir=${data_dir}_${specific_prefix}
+    exp_prefix=${exp_prefix}_${specific_prefix}
 fi

 . ./local/parse_options.sh || exit 1;
@@ -69,13 +82,10 @@ fi
 # full path
 train_config=$pwd_dir/conf/${train_config}
 if [[ -z ${exp_name} ]]; then
-    exp_name=$(basename ${train_config%.*})_${exp_tag}
+    exp_name=${exp_prefix}_$(basename ${train_config%.*})_${exp_tag}
    if [[ -n ${extra_tag} ]]; then
        exp_name=${exp_name}_${extra_tag}
    fi
-    if [[ ${speed_perturb} -eq 1 ]]; then
-        exp_name=sp_${exp_name}
-    fi
 fi
 model_dir=$root_dir/../checkpoints/$dataset/asr/${exp_name}

@@ -99,6 +109,12 @@ if [ ${stage} -le 0 ] && [ ${stop_stage} -ge 0 ]; then
        --output-root ${data_dir}
        --vocab-type ${vocab_type}
        --vocab-size ${vocab_size}"
+
+    if [[ ${use_specific_dict} -eq 1 ]]; then
+        cp -r ${specific_dir}/${asr_vocab_prefix}.* ${data_dir}/${lang}
+        cmd="$cmd
+        --asr-prefix ${asr_vocab_prefix}"
+    fi
    if [[ ${speed_perturb} -eq 1 ]]; then
        cmd="$cmd
        --speed-perturb"
@@ -138,6 +154,7 @@ if [ ${stage} -le 1 ] && [ ${stop_stage} -ge 1 ]; then
        --train-config ${train_config}
        --task ${task}
        --max-tokens ${max_tokens}
+        --skip-invalid-size-inputs-valid-test
        --update-freq ${update_freq}
        --log-interval 100
        --save-dir ${model_dir}
@@ -157,11 +174,12 @@ if [ ${stage} -le 1 ] && [ ${stop_stage} -ge 1 ]; then
        --fp16"
    fi
    if [[ $step_valid -eq 1 ]]; then
-        validate_interval=10000
-        save_interval=10000
-        no_epoch_checkpoints=1
-        save_interval_updates=5000
-        keep_interval_updates=3
+        validate_interval=1
+        save_interval=1
+        keep_last_epochs=10
+        no_epoch_checkpoints=0
+        save_interval_updates=500
+        keep_interval_updates=10
    else
        validate_interval=1
        keep_last_epochs=10
@@ -222,7 +240,7 @@ if [ ${stage} -le 2 ] && [ ${stop_stage} -ge 2 ]; then
    	echo -e "\033[34mRun command: \n${cmd} \033[0m"
    	[[ $eval -eq 1 ]] && eval $cmd
 	else
-		dec_model=checkpoint_best.pt
+		dec_model=${dec_model}
 	fi

    if [[ -z ${device} || ${#device[@]} -eq 0 ]]; then
@@ -252,6 +270,7 @@ if [ ${stage} -le 2 ] && [ ${stop_stage} -ge 2 ]; then
        --results-path ${model_dir}
        --max-tokens ${max_tokens}
        --beam ${beam_size}
+        --lenpen ${len_penalty}
        --scoring wer"
    	echo -e "\033[34mRun command: \n${cmd} \033[0m"


--- a/egs/lower/asr/conf/train.yaml
+++ b/egs/lower/asr/conf/train.yaml
-train-subset: train-clean-100,train-clean-360,train-other-500
-#train-subset: train-clean-100
-valid-subset: dev-clean
-
-max-epoch: 100
-max-update: 300000
-
-num-workers: 8
-patience: 10
-no-progress-bar: True
-log-interval: 100
-seed: 1
-report-accuracy: True
-
-arch: s2t_transformer_s
-share-decoder-input-output-embed: True
-optimizer: adam
-clip-norm: 10.0
-lr-scheduler: inverse_sqrt
-warmup-init-lr: 1e-7
-warmup-updates: 10000
-lr: 2e-3
-#adam_betas: (0.9,0.98)
-
-criterion: label_smoothed_cross_entropy
-label_smoothing: 0.1
-
-conv-kernel-sizes: 5,5
-conv-channels: 1024
-dropout: 0.1
-activation-fn: relu
-encoder-embed-dim: 256
-encoder-ffn-embed-dim: 2048
-encoder-layers: 12
-decoder-layers: 6
-encoder-attention-heads: 4
\ No newline at end of file
--- a/egs/lower/asr/conf/train_conformer_l.yaml
+++ b/egs/lower/asr/conf/train_conformer_l.yaml
-train-subset: train-clean-100,train-clean-360,train-other-500
-valid-subset: dev-clean
-
-max-epoch: 100
-max-update: 300000
-
-num-workers: 8
-patience: 10
-no-progress-bar: True
-log-interval: 100
-seed: 1
-report-accuracy: True
-
-arch: s2t_conformer_l
-share-decoder-input-output-embed: True
-optimizer: adam
-clip-norm: 10.0
-lr-scheduler: inverse_sqrt
-warmup-init-lr: 1e-7
-warmup-updates: 10000
-lr: 2e-3
-#adam_betas: (0.9,0.98)
-
-criterion: label_smoothed_cross_entropy
-label_smoothing: 0.1
-
-conv-kernel-sizes: 5,5
-conv-channels: 1024
-
-macaron-style: True
-use-cnn-module: True
-cnn-module-kernel: 31
-
-#dropout: 0.1
-#activation-fn: relu
-#encoder-embed-dim: 256
-#encoder-ffn-embed-dim: 2048
-#encoder-layers: 12
-#decoder-layers: 6
-#encoder-attention-heads: 4
-
-#decoder-embed-dim: 256
-#decoder-ffn-embed-dim: 2048
-#decoder-attention-heads: 4
-#attention-dropout: 0.1
-#activation-dropout: 0.1
--- a/egs/lower/asr/conf/train_conformer_m.yaml
+++ b/egs/lower/asr/conf/train_conformer_m.yaml
-train-subset: train-clean-100,train-clean-360,train-other-500
-valid-subset: dev-clean
-
-max-epoch: 100
-max-update: 300000
-
-num-workers: 8
-patience: 10
-no-progress-bar: True
-log-interval: 100
-seed: 1
-report-accuracy: True
-
-arch: s2t_conformer_m
-share-decoder-input-output-embed: True
-optimizer: adam
-clip-norm: 10.0
-lr-scheduler: inverse_sqrt
-warmup-init-lr: 1e-7
-warmup-updates: 10000
-lr: 1e-3
-#adam_betas: (0.9,0.98)
-
-criterion: label_smoothed_cross_entropy
-label_smoothing: 0.1
-
-conv-kernel-sizes: 5,5
-conv-channels: 1024
-
-macaron-style: True
-use-cnn-module: True
-cnn-module-kernel: 31
-
-#dropout: 0.1
-#activation-fn: relu
-#encoder-embed-dim: 256
-#encoder-ffn-embed-dim: 2048
-#encoder-layers: 12
-#decoder-layers: 6
-#encoder-attention-heads: 4
-
-#decoder-embed-dim: 256
-#decoder-ffn-embed-dim: 2048
-#decoder-attention-heads: 4
-#attention-dropout: 0.1
-#activation-dropout: 0.1
--- a/egs/lower/asr/conf/train_ctc.yaml
+++ b/egs/lower/asr/conf/train_ctc.yaml
-train-subset: train-clean-100,train-clean-360,train-other-500
-#train-subset: train-clean-100
-valid-subset: dev-clean
-
-max-epoch: 100
-max-update: 300000
-
-num-workers: 8
-patience: 10
-no-progress-bar: True
-log-interval: 100
-seed: 1
-report-accuracy: True
-
-arch: s2t_transformer_s
-share-decoder-input-output-embed: True
-optimizer: adam
-clip-norm: 10.0
-lr-scheduler: inverse_sqrt
-warmup-init-lr: 1e-7
-warmup-updates: 10000
-lr: 2e-3
-#adam_betas: (0.9,0.98)
-
-ctc-weight: 0.3
-criterion: label_smoothed_cross_entropy_with_ctc
-label_smoothing: 0.1
-
-conv-kernel-sizes: 5,5
-conv-channels: 1024
-dropout: 0.1
-activation-fn: relu
-encoder-embed-dim: 256
-encoder-ffn-embed-dim: 2048
-encoder-layers: 12
-decoder-layers: 6
-encoder-attention-heads: 4
-
-#decoder-embed-dim: 256
-#decoder-ffn-embed-dim: 2048
-#decoder-attention-heads: 4
-#attention-dropout: 0.1
-#activation-dropout: 0.1
--- a/egs/lower/asr/conf/train_ctc_conformer.yaml
+++ b/egs/lower/asr/conf/train_ctc_conformer.yaml
-train-subset: train-clean-100,train-clean-360,train-other-500
-valid-subset: dev-clean
-
-max-epoch: 100
-max-update: 300000
-
-num-workers: 8
-patience: 10
-no-progress-bar: True
-log-interval: 100
-seed: 1
-report-accuracy: True
-
-arch: s2t_conformer_s
-share-decoder-input-output-embed: True
-optimizer: adam
-clip-norm: 10.0
-lr-scheduler: inverse_sqrt
-warmup-init-lr: 1e-7
-warmup-updates: 10000
-lr: 2e-3
-#adam_betas: (0.9,0.98)
-
-ctc-weight: 0.3
-criterion: label_smoothed_cross_entropy_with_ctc
-label_smoothing: 0.1
-
-conv-kernel-sizes: 5,5
-conv-channels: 1024
-macaron-style: True
-use-cnn-module: True
-cnn-module-kernel: 31
-
-#dropout: 0.1
-#activation-fn: relu
-#encoder-embed-dim: 256
-#encoder-ffn-embed-dim: 2048
-#encoder-layers: 12
-#decoder-layers: 6
-#encoder-attention-heads: 4
-
-#decoder-embed-dim: 256
-#decoder-ffn-embed-dim: 2048
-#decoder-attention-heads: 4
-#attention-dropout: 0.1
-#activation-dropout: 0.1
--- a/egs/lower/asr/conf/train_ctc_conformer_m.yaml
+++ b/egs/lower/asr/conf/train_ctc_conformer_m.yaml
-train-subset: train-clean-100,train-clean-360,train-other-500
-valid-subset: dev-clean
-
-max-epoch: 100
-max-update: 300000
-
-num-workers: 8
-patience: 10
-no-progress-bar: True
-log-interval: 100
-seed: 1
-report-accuracy: True
-
-arch: s2t_conformer_m
-share-decoder-input-output-embed: True
-optimizer: adam
-clip-norm: 10.0
-lr-scheduler: inverse_sqrt
-warmup-init-lr: 1e-7
-warmup-updates: 10000
-lr: 1e-3
-#adam_betas: (0.9,0.98)
-
-ctc-weight: 0.3
-criterion: label_smoothed_cross_entropy_with_ctc
-label_smoothing: 0.1
-
-conv-kernel-sizes: 5,5
-conv-channels: 1024
-macaron-style: True
-use-cnn-module: True
-cnn-module-kernel: 31
-
-#dropout: 0.1
-#activation-fn: relu
-#encoder-embed-dim: 256
-#encoder-ffn-embed-dim: 2048
-#encoder-layers: 12
-#decoder-layers: 6
-#encoder-attention-heads: 4
-
-#decoder-embed-dim: 256
-#decoder-ffn-embed-dim: 2048
-#decoder-attention-heads: 4
-#attention-dropout: 0.1
-#activation-dropout: 0.1
\ No newline at end of file
--- a/egs/lower/asr/conf/train_ctc_debug.yaml
+++ b/egs/lower/asr/conf/train_ctc_debug.yaml
-#train-subset: train-clean-100,train-clean-360,train-other-500
-train-subset: train-clean-100
-valid-subset: dev-clean
-
-max-epoch: 100
-max-update: 300000
-
-num-workers: 0
-patience: 10
-no-progress-bar: True
-log-interval: 100
-seed: 1
-report-accuracy: True
-
-arch: s2t_transformer_s
-share-decoder-input-output-embed: True
-optimizer: adam
-clip-norm: 10.0
-lr-scheduler: inverse_sqrt
-warmup-init-lr: 1e-7
-warmup-updates: 10000
-lr: 2e-3
-#adam_betas: (0.9,0.98)
-
-ctc-weight: 0.3
-criterion: label_smoothed_cross_entropy_with_ctc
-label_smoothing: 0.1
-
-conv-kernel-sizes: 5,5
-conv-channels: 1024
-dropout: 0.1
-activation-fn: relu
-encoder-embed-dim: 256
-encoder-ffn-embed-dim: 2048
-encoder-layers: 3
-decoder-layers: 3
-encoder-attention-heads: 4
-
-macaron-style: True
-use-cnn-module: True
-cnn-module-kernel: 31
-
-#decoder-embed-dim: 256
-#decoder-ffn-embed-dim: 2048
-#decoder-attention-heads: 4
-#attention-dropout: 0.1
-#activation-dropout: 0.1
--- a/egs/lower/asr/decode.sh
+++ b/egs/lower/asr/decode.sh
-#! /bin/bash
-
-gpu_num=1
-
-data_dir=
-test_subset=test-cleam,test-other
-
-exp_name=
-if [ "$#" -eq 1 ]; then
-    exp_name=$1
-fi
-
-n_average=10
-beam_size=5
-max_tokens=40000
-
-cmd="./run.sh
-    --stage 2
-    --stop_stage 2
-    --gpu_num ${gpu_num}
-    --exp_name ${exp_name}
-    --n_average ${n_average}
-    --beam_size ${beam_size}
-    --max_tokens ${max_tokens}
-    "
-
-if [[ -n ${data_dir} ]]; then
-    cmd="$cmd --data_dir ${data_dir}"
-fi
-if [[ -n ${test_subset} ]]; then
-    cmd="$cmd --test_subset ${test_subset}"
-fi
-
-echo $cmd
-eval $cmd
--- a/egs/lower/asr/local/monitor.sh
+++ b/egs/lower/asr/local/monitor.sh
-gpu_num=1
-
-while :
-do
-    all_devices=$(seq 0 `gpustat | sed '1,2d' | wc -l`);
-    count=0
-    for dev in ${all_devices[@]}
-    do
-        line=`expr $dev + 2`
-        use=`gpustat -p | head -n $line | tail -1 | cut -d '|' -f4 | wc -w`
-        if [[ $use -eq 0 ]]; then
-            device[$count]=$dev
-            count=`expr $count + 1`
-            if [[ $count -eq $gpu_num ]]; then
-                break
-            fi
-        fi
-    done
-    if [[ ${#device[@]} -lt $gpu_num ]]; then
-        sleep 60s
-    else
-        echo "Run $cmd"
-        eval $cmd
-        sleep 10s
-        exit
-    fi
-done
--- a/egs/lower/asr/local/parse_options.sh
+++ b/egs/lower/asr/local/parse_options.sh
-#!/usr/bin/env bash
-
-# Copyright 2012  Johns Hopkins University (Author: Daniel Povey);
-#                 Arnab Ghoshal, Karel Vesely
-
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#  http://www.apache.org/licenses/LICENSE-2.0
-#
-# THIS CODE IS PROVIDED *AS IS* BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-# KIND, EITHER EXPRESS OR IMPLIED, INCLUDING WITHOUT LIMITATION ANY IMPLIED
-# WARRANTIES OR CONDITIONS OF TITLE, FITNESS FOR A PARTICULAR PURPOSE,
-# MERCHANTABLITY OR NON-INFRINGEMENT.
-# See the Apache 2 License for the specific language governing permissions and
-# limitations under the License.
-
-
-# Parse command-line options.
-# To be sourced by another script (as in ". parse_options.sh").
-# Option format is: --option-name arg
-# and shell variable "option_name" gets set to value "arg."
-# The exception is --help, which takes no arguments, but prints the
-# $help_message variable (if defined).
-
-
-###
-### The --config file options have lower priority to command line
-### options, so we need to import them first...
-###
-
-# Now import all the configs specified by command-line, in left-to-right order
-for ((argpos=1; argpos<$#; argpos++)); do
-  if [ "${!argpos}" == "--config" ]; then
-    argpos_plus1=$((argpos+1))
-    config=${!argpos_plus1}
-    [ ! -r $config ] && echo "$0: missing config '$config'" && exit 1
-    . $config  # source the config file.
-  fi
-done
-
-
-###
-### Now we process the command line options
-###
-while true; do
-  [ -z "${1:-}" ] && break;  # break if there are no arguments
-  case "$1" in
-    # If the enclosing script is called with --help option, print the help
-    # message and exit.  Scripts should put help messages in $help_message
-    --help|-h) if [ -z "$help_message" ]; then echo "No help found." 1>&2;
-      else printf "$help_message\n" 1>&2 ; fi;
-      exit 0 ;;
-    --*=*) echo "$0: options to scripts must be of the form --name value, got '$1'"
-      exit 1 ;;
-    # If the first command-line argument begins with "--" (e.g. --foo-bar),
-    # then work out the variable name as $name, which will equal "foo_bar".
-    --*) name=`echo "$1" | sed s/^--// | sed s/-/_/g`;
-      # Next we test whether the variable in question is undefned-- if so it's
-      # an invalid option and we die.  Note: $0 evaluates to the name of the
-      # enclosing script.
-      # The test [ -z ${foo_bar+xxx} ] will return true if the variable foo_bar
-      # is undefined.  We then have to wrap this test inside "eval" because
-      # foo_bar is itself inside a variable ($name).
-      eval '[ -z "${'$name'+xxx}" ]' && echo "$0: invalid option $1" 1>&2 && exit 1;
-
-      oldval="`eval echo \\$$name`";
-      # Work out whether we seem to be expecting a Boolean argument.
-      if [ "$oldval" == "true" ] || [ "$oldval" == "false" ]; then
-        was_bool=true;
-      else
-        was_bool=false;
-      fi
-
-      # Set the variable to the right value-- the escaped quotes make it work if
-      # the option had spaces, like --cmd "queue.pl -sync y"
-      eval $name=\"$2\";
-
-      # Check that Boolean-valued arguments are really Boolean.
-      if $was_bool && [[ "$2" != "true" && "$2" != "false" ]]; then
-        echo "$0: expected \"true\" or \"false\": $1 $2" 1>&2
-        exit 1;
-      fi
-      shift 2;
-      ;;
-  *) break;
-  esac
-done
-
-
-# Check for an empty argument to the --cmd option, which can easily occur as a
-# result of scripting errors.
-[ ! -z "${cmd+xxx}" ] && [ -z "$cmd" ] && echo "$0: empty argument to --cmd option" 1>&2 && exit 1;
-
-
-true; # so this script returns exit code 0.
--- a/egs/lower/asr/local/path.sh
+++ b/egs/lower/asr/local/path.sh
-MAIN_ROOT=$PWD/../../..
-KALDI_ROOT=$MAIN_ROOT/tools/kaldi
-
-export PATH=$PWD/utils/:$KALDI_ROOT/tools/openfst/bin:$PATH
-[ ! -f $KALDI_ROOT/tools/config/common_path.sh ] && echo >&2 "The standard file $KALDI_ROOT/tools/config/common_path.sh is not present -> Exit!" && exit 1
-. $KALDI_ROOT/tools/config/common_path.sh
-export LC_ALL=C
-
-export LD_LIBRARY_PATH=${LD_LIBRARY_PATH}:$MAIN_ROOT/src/lib
-export LD_LIBRARY_PATH=${LD_LIBRARY_PATH}:$MAIN_ROOT/tools/chainer_ctc/ext/warp-ctc/build
-. "${MAIN_ROOT}"/tools/activate_python.sh && . "${MAIN_ROOT}"/tools/extra_path.sh
-export PATH=$MAIN_ROOT/utils:$MAIN_ROOT/espnet/bin:$PATH
-
-export OMP_NUM_THREADS=1
-
-# check extra module installation
-if ! which tokenizer.perl > /dev/null; then
-    echo "Error: it seems that moses is not installed." >&2
-    echo "Error: please install moses as follows." >&2
-    echo "Error: cd ${MAIN_ROOT}/tools && make moses.done" >&2
-    return 1
-fi
-
-# NOTE(kan-bayashi): Use UTF-8 in Python to avoid UnicodeDecodeError when LC_ALL=C
-export PYTHONIOENCODING=UTF-8
--- a/egs/lower/asr/local/utils.sh
+++ b/egs/lower/asr/local/utils.sh
-
-get_devices(){
-    gpu_num=$1
-    use_cpu=$2
-    device=()
-    while :
-    do
-        record=`mktemp -t temp.record.XXXXXX`
-        gpustat > $record
-        all_devices=$(seq 0 `cat $record | sed '1,2d' | wc -l`);
-        count=0
-        for dev in ${all_devices[@]}
-        do
-            line=`expr $dev + 2`
-            use=`cat $record | head -n $line | tail -1 | cut -d '|' -f3 | cut -d '/' -f1`
-            if [[ $use -lt 100 ]]; then
-                device[$count]=$dev
-                count=`expr $count + 1`
-                if [[ $count -eq $gpu_num ]]; then
-                    break
-                fi
-            fi
-        done
-        if [[ ${#device[@]} -lt $gpu_num ]]; then
-            if [[ $use_cpu -eq 1 ]]; then
-                device=(-1)
-            else
-                sleep 60s
-            fi
-        else
-            break
-        fi
-    done
-
-    echo ${device[*]} | sed 's/ /,/g'
-    return $?
-}
-
-
--- a/egs/lower/asr/run.sh
+++ b/egs/lower/asr/run.sh
-#! /bin/bash
-
-# Processing LibriSpeech Datasets
-
-# Copyright 2021 Natural Language Processing Laboratory 
-# Xu Chen (xuchenneu@163.com)
-
-# Set bash to 'debug' mode, it will exit on :
-# -e 'error', -u 'undefined variable', -o ... 'error in pipeline', -x 'print commands',
-set -e
-#set -u
-set -o pipefail
-export PYTHONIOENCODING=UTF-8
-
-eval=1
-time=$(date "+%m%d_%H%M")
-
-stage=0
-stop_stage=0
-
-######## hardware ########
-# devices
-device=()
-gpu_num=8
-update_freq=1
-
-root_dir=~/st/Fairseq-S2T
-pwd_dir=$PWD
-
-# dataset
-src_lang=swa
-tgt_lang=en
-lang=${src_lang}-${tgt_lang}
-
-dataset=lower
-task=speech_to_text
-vocab_type=unigram
-vocab_size=1000
-speed_perturb=1
-lcrm=1
-
-use_specific_dict=0
-specific_prefix=valid
-specific_dir=/home/xuchen/st/data/mustc/st_lcrm/en-de
-asr_vocab_prefix=spm_unigram10000_st_share
-
-org_data_dir=~/st/data/${dataset}/asr
-data_dir=~/st/data/${dataset}/asr
-test_subset=test
-
-# exp
-exp_prefix=${time}
-extra_tag=
-extra_parameter=
-exp_tag=baseline
-exp_name=
-
-# config
-train_config=train_ctc.yaml
-data_config=config_asr.yaml
-
-# training setting
-fp16=1
-max_tokens=40000
-step_valid=0
-
-# decoding setting
-n_average=10
-beam_size=5
-
-if [[ ${speed_perturb} -eq 1 ]]; then
-    data_dir=${data_dir}_sp
-    exp_prefix=${exp_prefix}_sp
-fi
-if [[ ${lcrm} -eq 1 ]]; then
-    data_dir=${data_dir}_lcrm
-    exp_prefix=${exp_prefix}_lcrm
-fi
-if [[ ${use_specific_dict} -eq 1 ]]; then
-    data_dir=${data_dir}_${specific_prefix}
-    exp_prefix=${exp_prefix}_${specific_prefix}
-fi
-
-. ./local/parse_options.sh || exit 1;
-
-# full path
-train_config=$pwd_dir/conf/${train_config}
-if [[ -z ${exp_name} ]]; then
-    exp_name=${exp_prefix}_$(basename ${train_config%.*})_${exp_tag}
-    if [[ -n ${extra_tag} ]]; then
-        exp_name=${exp_name}_${extra_tag}
-    fi
-fi
-model_dir=$root_dir/../checkpoints/$dataset/asr/${exp_name}
-
-if [ ${stage} -le -1 ] && [ ${stop_stage} -ge -1 ]; then
-    echo "stage -1: Data Download"
-    # pass
-fi
-
-if [ ${stage} -le 0 ] && [ ${stop_stage} -ge 0 ]; then
-    ### Task dependent. You have to make data the following preparation part by yourself.
-    ### But you can utilize Kaldi recipes in most cases
-    echo "stage 0: ASR Data Preparation"
-    if [[ ! -e ${data_dir}/${lang} ]]; then
-        mkdir -p ${data_dir}/${lang}
-    fi
-    source ~/tools/audio/bin/activate
-
-    cmd="python ${root_dir}/examples/speech_to_text/prep_st_data.py
-        --data-root ${org_data_dir}
-        --output-root ${data_dir}
-        --src-lang ${src_lang}
-        --tgt-lang ${tgt_lang}
-        --task asr
-        --vocab-type ${vocab_type}
-        --vocab-size ${vocab_size}"
-
-    if [[ ${use_specific_dict} -eq 1 ]]; then
-        cp -r ${specific_dir}/${asr_vocab_prefix}.* ${data_dir}/${lang}
-        cmd="$cmd
-        --asr-prefix ${asr_vocab_prefix}"
-    fi
-    if [[ ${speed_perturb} -eq 1 ]]; then
-        cmd="$cmd
-        --speed-perturb"
-    fi
-    if [[ ${lcrm} -eq 1 ]]; then
-        cmd="$cmd
-        --lowercase-src
-        --rm-punc-src"
-    fi
-    echo -e "\033[34mRun command: \n${cmd} \033[0m"
-    [[ $eval -eq 1 ]] && eval ${cmd}
-    deactivate
-fi
-
-data_dir=${data_dir}/${lang}
-
-if [ ${stage} -le 1 ] && [ ${stop_stage} -ge 1 ]; then
-    echo "stage 1: ASR Network Training"
-    [[ ! -d ${data_dir} ]] && echo "The data dir ${data_dir} is not existing!" && exit 1;
-
-    if [[ -z ${device} || ${#device[@]} -eq 0 ]]; then
-		if [[ ${gpu_num} -eq 0 ]]; then
-			device=()
-		else
-        	source ./local/utils.sh
-        	device=$(get_devices $gpu_num 0)
-		fi
-    fi
-
-    echo -e "dev=${device} data=${data_dir} model=${model_dir}"
-
-    if [[ ! -d ${model_dir} ]]; then
-        mkdir -p ${model_dir}
-    else
-        echo "${model_dir} exists."
-    fi
-
-    cp ${BASH_SOURCE[0]} ${model_dir}
-    cp ${PWD}/train.sh ${model_dir}
-    cp ${train_config} ${model_dir}
-
-    cmd="python3 -u ${root_dir}/fairseq_cli/train.py
-        ${data_dir}
-        --config-yaml ${data_config}
-        --train-config ${train_config}
-        --task ${task}
-        --max-tokens ${max_tokens}
-        --update-freq ${update_freq}
-        --log-interval 100
-        --save-dir ${model_dir}
-        --tensorboard-logdir ${model_dir}"
-
-    if [[ -n ${extra_parameter} ]]; then
-        cmd="${cmd}
-        ${extra_parameter}"
-    fi
-	if [[ ${gpu_num} -gt 0 ]]; then
-		cmd="${cmd}
-        --distributed-world-size $gpu_num
-        --ddp-backend no_c10d"
-	fi
-    if [[ $fp16 -eq 1 ]]; then
-        cmd="${cmd}
-        --fp16"
-    fi
-    if [[ $step_valid -eq 1 ]]; then
-        validate_interval=10000
-        save_interval=10000
-        no_epoch_checkpoints=1
-        save_interval_updates=5000
-        keep_interval_updates=3
-    else
-        validate_interval=1
-        keep_last_epochs=10
-    fi
-    if [[ -n $no_epoch_checkpoints && $no_epoch_checkpoints -eq 1 ]]; then
-        cmd="$cmd
-        --no-epoch-checkpoints"
-    fi
-    if [[ -n $validate_interval ]]; then
-        cmd="${cmd}
-        --validate-interval $validate_interval "
-    fi
-    if [[ -n $save_interval ]]; then
-        cmd="${cmd}
-        --save-interval $save_interval "
-    fi
-    if [[ -n $keep_last_epochs ]]; then
-        cmd="${cmd}
-        --keep-last-epochs $keep_last_epochs "
-    fi
-    if [[ -n $save_interval_updates ]]; then
-        cmd="${cmd}
-        --save-interval-updates $save_interval_updates"
-        if [[ -n $keep_interval_updates ]]; then
-        cmd="${cmd}
-        --keep-interval-updates $keep_interval_updates"
-        fi
-    fi
-
-    echo -e "\033[34mRun command: \n${cmd} \033[0m"
-
-    # save info
-    log=./history.log
-    echo "${time} | ${device} | ${data_dir} | ${model_dir} " >> $log
-    cat $log | tail -n 50 > tmp.log
-    mv tmp.log $log
-    export CUDA_VISIBLE_DEVICES=${device}
-
-    cmd="nohup ${cmd} >> ${model_dir}/train.log 2>&1 &"
-    if [[ $eval -eq 1 ]]; then
-		eval $cmd
-		sleep 2s
-		tail -n `wc -l ${model_dir}/train.log | awk '{print $1+1}'` -f ${model_dir}/train.log
-	fi
-fi
-wait
-
-if [ ${stage} -le 2 ] && [ ${stop_stage} -ge 2 ]; then
-    echo "stage 2: ASR Decoding"
-    if [[ ${n_average} -ne 1 ]]; then
-        # Average models
-		dec_model=avg_${n_average}_checkpoint.pt
-
-		cmd="python ${root_dir}/scripts/average_checkpoints.py
-        --inputs ${model_dir}
-        --num-epoch-checkpoints ${n_average}
-        --output ${model_dir}/${dec_model}"
-    	echo -e "\033[34mRun command: \n${cmd} \033[0m"
-    	[[ $eval -eq 1 ]] && eval $cmd
-	else
-		dec_model=checkpoint_best.pt
-	fi
-
-    if [[ -z ${device} || ${#device[@]} -eq 0 ]]; then
-		if [[ ${gpu_num} -eq 0 ]]; then
-			device=()
-		else
-        	source ./local/utils.sh
-        	device=$(get_devices $gpu_num 0)
-		fi
-    fi
-    export CUDA_VISIBLE_DEVICES=${device}
-
-	#tmp_file=$(mktemp ${model_dir}/tmp-XXXXX)
-	#trap 'rm -rf ${tmp_file}' EXIT
-	result_file=${model_dir}/decode_result
-	[[ -f ${result_file} ]] && rm ${result_file}
-
-    test_subset=(${test_subset//,/ })
-	for subset in ${test_subset[@]}; do
-        subset=${subset}_asr
-  		cmd="python ${root_dir}/fairseq_cli/generate.py
-        ${data_dir}
-        --config-yaml ${data_config}
-        --gen-subset ${subset}
-        --task speech_to_text
-        --path ${model_dir}/${dec_model}
-        --results-path ${model_dir}
-        --max-tokens ${max_tokens}
-        --beam ${beam_size}
-        --scoring wer
-        --wer-tokenizer 13a
-        --wer-lowercase
-        --wer-remove-punct
-        "
-    	echo -e "\033[34mRun command: \n${cmd} \033[0m"
-
-        if [[ $eval -eq 1 ]]; then
-    	    eval $cmd
-    	    tail -n 1 ${model_dir}/generate-${subset}.txt >> ${result_file}
-        fi
-	done
-    cat ${result_file}
-fi
--- a/egs/lower/asr/train.sh
+++ b/egs/lower/asr/train.sh
-#! /bin/bash
-
-# training the model
-
-gpu_num=8
-update_freq=2
-max_tokens=20000
-
-extra_tag=
-extra_parameter=
-
-#extra_tag="${extra_tag}"
-#extra_parameter="${extra_parameter} "
-
-exp_tag=
-train_config=train_ctc.yaml
-
-cmd="./run.sh
-    --stage 1
-    --stop_stage 1
-    --gpu_num ${gpu_num}
-    --update_freq ${update_freq}
-    --train_config ${train_config}
-    --max_tokens ${max_tokens}
-    "
-
-if [[ -n ${exp_tag} ]]; then
-    cmd="$cmd --exp_tag ${exp_tag}"
-fi
-if [[ -n ${extra_tag} ]]; then
-    cmd="$cmd --extra_tag ${extra_tag}"
-fi
-if [[ -n ${extra_parameter} ]]; then
-    cmd="$cmd --extra_parameter \"${extra_parameter}\""
-fi
-
-echo $cmd
-eval $cmd
--- a/egs/lower/mt/binary.sh
+++ b/egs/lower/mt/binary.sh
-set -e
-
-eval=1
-
-root_dir=~/st/Fairseq-S2T
-data_dir=/home/xuchen/st/data/wmt/test
-vocab_dir=/home/xuchen/st/data/wmt/mt/en-de/unigram32000_share
-src_vocab_prefix=spm_unigram32000_share
-tgt_vocab_prefix=spm_unigram32000_share
-
-src_lang=en
-tgt_lang=de
-tokenize=1
-splits=(newstest2014 newstest2016)
-
-for split in ${splits[@]}; do
-    src_file=${data_dir}/${split}.${src_lang}
-    tgt_file=${data_dir}/${split}.${tgt_lang}
-
-    if [[ ${tokenize} -eq 1 ]]; then
-        cmd="tokenizer.perl -l ${src_lang} --threads 8 -no-escape < ${src_file} > ${src_file}.tok"
-        echo -e "\033[34mRun command: \n${cmd} \033[0m"
-        [[ $eval -eq 1 ]] && eval ${cmd}
-
-        cmd="tokenizer.perl -l ${tgt_lang} --threads 8 -no-escape < ${tgt_file} > ${tgt_file}.tok"
-        echo -e "\033[34mRun command: \n${cmd} \033[0m"
-        [[ $eval -eq 1 ]] && eval ${cmd}
-        src_file=${src_file}.tok
-        tgt_file=${tgt_file}.tok
-    fi
-
-    cmd="spm_encode
-    --model ${vocab_dir}/${src_vocab_prefix}.model
-    --output_format=piece
-    < ${src_file}
-    > ${src_file}.spm"
-    echo -e "\033[34mRun command: \n${cmd} \033[0m"
-    [[ $eval -eq 1 ]] && eval ${cmd}
-
-    cmd="spm_encode
-    --model ${vocab_dir}/${tgt_vocab_prefix}.model
-    --output_format=piece
-    < ${tgt_file}
-    > ${tgt_file}.spm"
-    echo -e "\033[34mRun command: \n${cmd} \033[0m"
-    [[ $eval -eq 1 ]] && eval ${cmd}
-
-    src_file=${src_file}.spm
-    tgt_file=${tgt_file}.spm
-
-    mkdir -p ${data_dir}/final
-    cmd="cp ${src_file} ${data_dir}/final/${split}.${src_lang}"
-    echo -e "\033[34mRun command: \n${cmd} \033[0m"
-    [[ $eval -eq 1 ]] && eval ${cmd}
-
-    cmd="cp ${tgt_file} ${data_dir}/final/${split}.${tgt_lang}"
-    echo -e "\033[34mRun command: \n${cmd} \033[0m"
-    [[ $eval -eq 1 ]] && eval ${cmd}
-done
-
-n_set=${#splits[*]}
-for ((i=0;i<$n_set;i++)); do
-    dataset[$i]=${data_dir}/final/${splits[$i]}
-done
-pref=`echo ${dataset[*]} | sed 's/ /,/g'`
-
-cmd="python ${root_dir}/fairseq_cli/preprocess.py
-    --source-lang ${src_lang}
-    --target-lang ${tgt_lang}
-    --testpref ${pref}
-    --destdir ${data_dir}/data-bin
-    --srcdict ${vocab_dir}/${src_vocab_prefix}.txt
-    --tgtdict ${vocab_dir}/${tgt_vocab_prefix}.txt
-    --workers 64"
-
-echo -e "\033[34mRun command: \n${cmd} \033[0m"
-[[ $eval -eq 1 ]] && eval ${cmd}
\ No newline at end of file
--- a/egs/lower/mt/conf/train.yaml
+++ b/egs/lower/mt/conf/train.yaml
-train-subset: train
-valid-subset: valid
-
-max-epoch: 50
-max-update: 100000
-
-num-workers: 8
-patience: 10
-no-progress-bar: True
-log-interval: 100
-seed: 1
-report-accuracy: True
-skip-invalid-size-inputs-valid-test: True
-
-#load-pretrained-encoder-from:
-
-arch: transformer
-share-decoder-input-output-embed: True
-optimizer: adam
-clip-norm: 10.0
-lr-scheduler: inverse_sqrt
-warmup-init-lr: 1e-7
-warmup-updates: 8000
-lr: 5e-4
-adam_betas: (0.9,0.98)
-
-criterion: label_smoothed_cross_entropy
-label_smoothing: 0.1
-
-dropout: 0.1
-activation-fn: relu
-encoder-normalize-before: True
-decoder-normalize-before: True
-encoder-embed-dim: 256
-encoder-ffn-embed-dim: 2048
-encoder-layers: 6
-decoder-layers: 6
-encoder-attention-heads: 4
-
-decoder-embed-dim: 256
-decoder-ffn-embed-dim: 2048
-decoder-attention-heads: 4
--- a/egs/lower/mt/conf/train_m.yaml
+++ b/egs/lower/mt/conf/train_m.yaml
-train-subset: train
-valid-subset: valid
-
-max-epoch: 50
-max-update: 100000
-
-num-workers: 8
-patience: 10
-no-progress-bar: True
-log-interval: 100
-seed: 1
-report-accuracy: True
-skip-invalid-size-inputs-valid-test: True
-
-#load-pretrained-encoder-from:
-
-arch: transformer
-share-decoder-input-output-embed: True
-optimizer: adam
-clip-norm: 10.0
-lr-scheduler: inverse_sqrt
-warmup-init-lr: 1e-7
-warmup-updates: 8000
-lr: 5e-4
-adam_betas: (0.9,0.98)
-
-criterion: label_smoothed_cross_entropy
-label_smoothing: 0.1
-
-dropout: 0.1
-activation-fn: relu
-encoder-normalize-before: True
-decoder-normalize-before: True
-encoder-embed-dim: 512
-encoder-ffn-embed-dim: 2048
-encoder-layers: 6
-decoder-layers: 6
-encoder-attention-heads: 8
-
-decoder-embed-dim: 512
-decoder-ffn-embed-dim: 2048
-decoder-attention-heads: 8
--- a/egs/lower/mt/decode.sh
+++ b/egs/lower/mt/decode.sh
-#! /bin/bash
-
-gpu_num=1
-
-data_dir=
-test_subset=test
-
-exp_name=
-if [ "$#" -eq 1 ]; then
-    exp_name=$1
-fi
-
-n_average=5
-beam_size=5
-max_tokens=20000
-
-cmd="./run.sh
-    --stage 2
-    --stop_stage 2
-    --gpu_num ${gpu_num}
-    --exp_name ${exp_name}
-    --n_average ${n_average}
-    --beam_size ${beam_size}
-    --max_tokens ${max_tokens}
-    "
-
-if [[ -n ${data_dir} ]]; then
-    cmd="$cmd --data_dir ${data_dir}"
-fi
-if [[ -n ${test_subset} ]]; then
-    cmd="$cmd --test_subset ${test_subset}"
-fi
-
-echo $cmd
-eval $cmd
--- a/egs/lower/mt/local/monitor.sh
+++ b/egs/lower/mt/local/monitor.sh
-gpu_num=1
-
-while :
-do
-    all_devices=$(seq 0 `gpustat | sed '1,2d' | wc -l`);
-    count=0
-    for dev in ${all_devices[@]}
-    do
-        line=`expr $dev + 2`
-        use=`gpustat -p | head -n $line | tail -1 | cut -d '|' -f4 | wc -w`
-        if [[ $use -eq 0 ]]; then
-            device[$count]=$dev
-            count=`expr $count + 1`
-            if [[ $count -eq $gpu_num ]]; then
-                break
-            fi
-        fi
-    done
-    if [[ ${#device[@]} -lt $gpu_num ]]; then
-        sleep 60s
-    else
-        echo "Run $cmd"
-        eval $cmd
-        sleep 10s
-        exit
-    fi
-done
--- a/egs/lower/mt/local/parse_options.sh
+++ b/egs/lower/mt/local/parse_options.sh
-#!/usr/bin/env bash
-
-# Copyright 2012  Johns Hopkins University (Author: Daniel Povey);
-#                 Arnab Ghoshal, Karel Vesely
-
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#  http://www.apache.org/licenses/LICENSE-2.0
-#
-# THIS CODE IS PROVIDED *AS IS* BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-# KIND, EITHER EXPRESS OR IMPLIED, INCLUDING WITHOUT LIMITATION ANY IMPLIED
-# WARRANTIES OR CONDITIONS OF TITLE, FITNESS FOR A PARTICULAR PURPOSE,
-# MERCHANTABLITY OR NON-INFRINGEMENT.
-# See the Apache 2 License for the specific language governing permissions and
-# limitations under the License.
-
-
-# Parse command-line options.
-# To be sourced by another script (as in ". parse_options.sh").
-# Option format is: --option-name arg
-# and shell variable "option_name" gets set to value "arg."
-# The exception is --help, which takes no arguments, but prints the
-# $help_message variable (if defined).
-
-
-###
-### The --config file options have lower priority to command line
-### options, so we need to import them first...
-###
-
-# Now import all the configs specified by command-line, in left-to-right order
-for ((argpos=1; argpos<$#; argpos++)); do
-  if [ "${!argpos}" == "--config" ]; then
-    argpos_plus1=$((argpos+1))
-    config=${!argpos_plus1}
-    [ ! -r $config ] && echo "$0: missing config '$config'" && exit 1
-    . $config  # source the config file.
-  fi
-done
-
-
-###
-### Now we process the command line options
-###
-while true; do
-  [ -z "${1:-}" ] && break;  # break if there are no arguments
-  case "$1" in
-    # If the enclosing script is called with --help option, print the help
-    # message and exit.  Scripts should put help messages in $help_message
-    --help|-h) if [ -z "$help_message" ]; then echo "No help found." 1>&2;
-      else printf "$help_message\n" 1>&2 ; fi;
-      exit 0 ;;
-    --*=*) echo "$0: options to scripts must be of the form --name value, got '$1'"
-      exit 1 ;;
-    # If the first command-line argument begins with "--" (e.g. --foo-bar),
-    # then work out the variable name as $name, which will equal "foo_bar".
-    --*) name=`echo "$1" | sed s/^--// | sed s/-/_/g`;
-      # Next we test whether the variable in question is undefned-- if so it's
-      # an invalid option and we die.  Note: $0 evaluates to the name of the
-      # enclosing script.
-      # The test [ -z ${foo_bar+xxx} ] will return true if the variable foo_bar
-      # is undefined.  We then have to wrap this test inside "eval" because
-      # foo_bar is itself inside a variable ($name).
-      eval '[ -z "${'$name'+xxx}" ]' && echo "$0: invalid option $1" 1>&2 && exit 1;
-
-      oldval="`eval echo \\$$name`";
-      # Work out whether we seem to be expecting a Boolean argument.
-      if [ "$oldval" == "true" ] || [ "$oldval" == "false" ]; then
-        was_bool=true;
-      else
-        was_bool=false;
-      fi
-
-      # Set the variable to the right value-- the escaped quotes make it work if
-      # the option had spaces, like --cmd "queue.pl -sync y"
-      eval $name=\"$2\";
-
-      # Check that Boolean-valued arguments are really Boolean.
-      if $was_bool && [[ "$2" != "true" && "$2" != "false" ]]; then
-        echo "$0: expected \"true\" or \"false\": $1 $2" 1>&2
-        exit 1;
-      fi
-      shift 2;
-      ;;
-  *) break;
-  esac
-done
-
-
-# Check for an empty argument to the --cmd option, which can easily occur as a
-# result of scripting errors.
-[ ! -z "${cmd+xxx}" ] && [ -z "$cmd" ] && echo "$0: empty argument to --cmd option" 1>&2 && exit 1;
-
-
-true; # so this script returns exit code 0.
--- a/egs/lower/mt/local/path.sh
+++ b/egs/lower/mt/local/path.sh
-MAIN_ROOT=$PWD/../../..
-KALDI_ROOT=$MAIN_ROOT/tools/kaldi
-
-export PATH=$PWD/utils/:$KALDI_ROOT/tools/openfst/bin:$PATH
-[ ! -f $KALDI_ROOT/tools/config/common_path.sh ] && echo >&2 "The standard file $KALDI_ROOT/tools/config/common_path.sh is not present -> Exit!" && exit 1
-. $KALDI_ROOT/tools/config/common_path.sh
-export LC_ALL=C
-
-export LD_LIBRARY_PATH=${LD_LIBRARY_PATH}:$MAIN_ROOT/src/lib
-export LD_LIBRARY_PATH=${LD_LIBRARY_PATH}:$MAIN_ROOT/tools/chainer_ctc/ext/warp-ctc/build
-. "${MAIN_ROOT}"/tools/activate_python.sh && . "${MAIN_ROOT}"/tools/extra_path.sh
-export PATH=$MAIN_ROOT/utils:$MAIN_ROOT/espnet/bin:$PATH
-
-export OMP_NUM_THREADS=1
-
-# check extra module installation
-if ! which tokenizer.perl > /dev/null; then
-    echo "Error: it seems that moses is not installed." >&2
-    echo "Error: please install moses as follows." >&2
-    echo "Error: cd ${MAIN_ROOT}/tools && make moses.done" >&2
-    return 1
-fi
-
-# NOTE(kan-bayashi): Use UTF-8 in Python to avoid UnicodeDecodeError when LC_ALL=C
-export PYTHONIOENCODING=UTF-8
--- a/egs/lower/mt/local/utils.sh
+++ b/egs/lower/mt/local/utils.sh
-
-get_devices(){
-    gpu_num=$1
-    use_cpu=$2
-    device=()
-    while :
-    do
-        record=`mktemp -t temp.record.XXXXXX`
-        gpustat > $record
-        all_devices=$(seq 0 `cat $record | sed '1,2d' | wc -l`);
-        count=0
-        for dev in ${all_devices[@]}
-        do
-            line=`expr $dev + 2`
-            use=`cat $record | head -n $line | tail -1 | cut -d '|' -f3 | cut -d '/' -f1`
-            if [[ $use -lt 100 ]]; then
-                device[$count]=$dev
-                count=`expr $count + 1`
-                if [[ $count -eq $gpu_num ]]; then
-                    break
-                fi
-            fi
-        done
-        if [[ ${#device[@]} -lt $gpu_num ]]; then
-            if [[ $use_cpu -eq 1 ]]; then
-                device=(-1)
-            else
-                sleep 60s
-            fi
-        else
-            break
-        fi
-    done
-
-    echo ${device[*]} | sed 's/ /,/g'
-    return $?
-}
-
-
--- a/egs/lower/mt/run.sh
+++ b/egs/lower/mt/run.sh
-#! /bin/bash
-
-# Processing MuST-C Datasets
-
-# Copyright 2021 Natural Language Processing Laboratory 
-# Xu Chen (xuchenneu@163.com)
-
-# Set bash to 'debug' mode, it will exit on :
-# -e 'error', -u 'undefined variable', -o ... 'error in pipeline', -x 'print commands',
-set -e
-#set -u
-set -o pipefail
-export PYTHONIOENCODING=UTF-8
-
-eval=1
-time=$(date "+%m%d_%H%M")
-
-stage=0
-stop_stage=0
-
-######## hardware ########
-# devices
-#device=()
-gpu_num=8
-update_freq=1
-
-root_dir=~/st/Fairseq-S2T
-pwd_dir=$PWD
-
-# dataset
-src_lang=swa
-tgt_lang=en
-lang=${src_lang}-${tgt_lang}
-
-dataset=lower
-task=translation
-vocab_type=unigram
-vocab_size=10000
-share_dict=1
-
-use_specific_dict=1
-specific_prefix=st_share10k
-specific_dir=/home/xuchen/st/data/mustc/st/en-de
-src_vocab_prefix=spm_unigram10000_st_share
-tgt_vocab_prefix=spm_unigram10000_st_share
-
-org_data_dir=/media/data/${dataset}
-data_dir=~/st/data/${dataset}/mt/${lang}
-train_subset=train
-valid_subset=dev
-test_subset=test
-
-# exp
-extra_tag=
-extra_parameter=
-exp_tag=baseline
-exp_name=
-
-# config
-train_config=train.yaml
-
-# training setting
-fp16=1
-max_tokens=4096
-step_valid=0
-bleu_valid=0
-
-# decoding setting
-n_average=10
-beam_size=5
-
-if [[ ${use_specific_dict} -eq 1 ]]; then
-    exp_tag=${specific_prefix}_${exp_tag}
-    data_dir=${data_dir}/${specific_prefix}
-    mkdir -p ${data_dir}
-else
-    data_dir=${data_dir}/${vocab_type}${vocab_size}
-    src_vocab_prefix=spm_${vocab_type}${vocab_size}_${src_lang}
-    tgt_vocab_prefix=spm_${vocab_type}${vocab_size}_${tgt_lang}
-    if [[ $share_dict -eq 1 ]]; then
-        data_dir=${data_dir}_share
-        src_vocab_prefix=spm_${vocab_type}${vocab_size}_share
-        tgt_vocab_prefix=spm_${vocab_type}${vocab_size}_share
-    fi
-fi
-
-. ./local/parse_options.sh || exit 1;
-
-# full path
-train_config=$pwd_dir/conf/${train_config}
-if [[ -z ${exp_name} ]]; then
-    exp_name=$(basename ${train_config%.*})_${exp_tag}
-    if [[ -n ${extra_tag} ]]; then
-        exp_name=${exp_name}_${extra_tag}
-    fi
-fi
-model_dir=$root_dir/../checkpoints/$dataset/mt/${exp_name}
-
-if [ ${stage} -le -1 ] && [ ${stop_stage} -ge -1 ]; then
-    echo "stage -1: Data Download"
-    # pass
-fi
-
-if [ ${stage} -le 0 ] && [ ${stop_stage} -ge 0 ]; then
-    ### Task dependent. You have to make data the following preparation part by yourself.
-    echo "stage 0: MT Data Preparation"
-    if [[ ! -e ${data_dir} ]]; then
-        mkdir -p ${data_dir}
-    fi
-
-    if [[ ! -f ${data_dir}/${src_vocab_prefix}.txt || ! -f ${data_dir}/${tgt_vocab_prefix}.txt ]]; then
-        if [[ ${use_specific_dict} -eq 0 ]]; then
-            cmd="python ${root_dir}/examples/speech_to_text/prep_mt_data.py
-                --data-root ${org_data_dir}
-                --output-root ${data_dir}
-                --splits ${train_subset},${valid_subset},${test_subset}
-                --src-lang ${src_lang}
-                --tgt-lang ${tgt_lang}
-                --vocab-type ${vocab_type}
-                --vocab-size ${vocab_size}"
-            if [[ $share_dict -eq 1 ]]; then
-                cmd="$cmd
-                --share"
-            fi
-            echo -e "\033[34mRun command: \n${cmd} \033[0m"
-            [[ $eval -eq 1 ]] && eval ${cmd}
-        else
-            cp -r ${specific_dir}/${src_vocab_prefix}.* ${data_dir}
-            cp ${specific_dir}/${tgt_vocab_prefix}.* ${data_dir}
-        fi
-    fi
-
-    mkdir -p ${data_dir}/data
-    for split in ${train_subset} ${valid_subset} ${test_subset}; do
-        cmd="spm_encode
-        --model ${data_dir}/${src_vocab_prefix}.model
-        --output_format=piece
-        < ${org_data_dir}/${lang}/data/${split}.${src_lang}
-        > ${data_dir}/data/${split}.${src_lang}"
-
-        echo -e "\033[34mRun command: \n${cmd} \033[0m"
-        [[ $eval -eq 1 ]] && eval ${cmd}
-
-        cmd="spm_encode
-        --model ${data_dir}/${tgt_vocab_prefix}.model
-        --output_format=piece
-        < ${org_data_dir}/${lang}/data/${split}.${tgt_lang}
-        > ${data_dir}/data/${split}.${tgt_lang}"
-
-        echo -e "\033[34mRun command: \n${cmd} \033[0m"
-        [[ $eval -eq 1 ]] && eval ${cmd}
-    done
-
-    cmd="python ${root_dir}/fairseq_cli/preprocess.py
-        --source-lang ${src_lang} --target-lang ${tgt_lang}
-        --trainpref ${data_dir}/data/${train_subset}
-        --validpref ${data_dir}/data/${valid_subset}
-        --testpref ${data_dir}/data/${test_subset}
-        --destdir ${data_dir}/data-bin
-        --srcdict ${data_dir}/${src_vocab_prefix}.txt
-        --tgtdict ${data_dir}/${tgt_vocab_prefix}.txt
-        --workers 64"
-
-    echo -e "\033[34mRun command: \n${cmd} \033[0m"
-    [[ $eval -eq 1 ]] && eval ${cmd}
-fi
-
-data_dir=${data_dir}/data-bin
-
-if [ ${stage} -le 1 ] && [ ${stop_stage} -ge 1 ]; then
-    echo "stage 1: MT Network Training"
-    [[ ! -d ${data_dir} ]] && echo "The data dir ${data_dir} is not existing!" && exit 1;
-
-    if [[ -z ${device} || ${#device[@]} -eq 0 ]]; then
-		if [[ ${gpu_num} -eq 0 ]]; then
-			device=()
-		else
-        	source ./local/utils.sh
-        	device=$(get_devices $gpu_num 0)
-		fi
-    fi
-
-    echo -e "dev=${device} data=${data_dir} model=${model_dir}"
-
-    if [[ ! -d ${model_dir} ]]; then
-        mkdir -p ${model_dir}
-    else
-        echo "${model_dir} exists."
-    fi
-
-    cp ${BASH_SOURCE[0]} ${model_dir}
-    cp ${PWD}/train.sh ${model_dir}
-    cp ${train_config} ${model_dir}
-
-    cmd="python3 -u ${root_dir}/fairseq_cli/train.py
-        ${data_dir}
-        --source-lang ${src_lang}
-        --target-lang ${tgt_lang}
-        --train-config ${train_config}
-        --task ${task}
-        --max-tokens ${max_tokens}
-        --update-freq ${update_freq}
-        --log-interval 100
-        --save-dir ${model_dir}
-        --tensorboard-logdir ${model_dir}"
-
-    if [[ -n ${extra_parameter} ]]; then
-        cmd="${cmd}
-        ${extra_parameter}"
-    fi
-	if [[ ${gpu_num} -gt 0 ]]; then
-		cmd="${cmd}
-        --distributed-world-size $gpu_num
-        --ddp-backend no_c10d"
-	fi
-    if [[ $fp16 -eq 1 ]]; then
-        cmd="${cmd}
-        --fp16"
-    fi
-    if [[ $step_valid -eq 1 ]]; then
-        validate_interval=10000
-        save_interval=10000
-        no_epoch_checkpoints=1
-        save_interval_updates=5000
-        keep_interval_updates=3
-    else
-        validate_interval=1
-        keep_last_epochs=10
-    fi
-    if [[ $bleu_valid -eq 1 ]]; then
-        cmd="$cmd
-        --eval-bleu
-        --eval-bleu-args '{\"beam\": 1}'
-        --eval-tokenized-bleu
-        --eval-bleu-remove-bpe
-        --best-checkpoint-metric bleu
-        --maximize-best-checkpoint-metric"
-    fi
-    if [[ -n $no_epoch_checkpoints && $no_epoch_checkpoints -eq 1 ]]; then
-        cmd="$cmd
-        --no-epoch-checkpoints"
-    fi
-    if [[ -n $validate_interval ]]; then
-        cmd="${cmd}
-        --validate-interval $validate_interval "
-    fi
-    if [[ -n $save_interval ]]; then
-        cmd="${cmd}
-        --save-interval $save_interval "
-    fi
-    if [[ -n $keep_last_epochs ]]; then
-        cmd="${cmd}
-        --keep-last-epochs $keep_last_epochs "
-    fi
-    if [[ -n $save_interval_updates ]]; then
-        cmd="${cmd}
-        --save-interval-updates $save_interval_updates"
-        if [[ -n $keep_interval_updates ]]; then
-        cmd="${cmd}
-        --keep-interval-updates $keep_interval_updates"
-        fi
-    fi
-
-    echo -e "\033[34mRun command: \n${cmd} \033[0m"
-
-    # save info
-    log=./history.log
-    echo "${time} | ${device} | ${data_dir} | ${model_dir} " >> $log
-    cat $log | tail -n 50 > tmp.log
-    mv tmp.log $log
-    export CUDA_VISIBLE_DEVICES=${device}
-
-    cmd="nohup ${cmd} >> ${model_dir}/train.log 2>&1 &"
-    if [[ $eval -eq 1 ]]; then
-		eval $cmd
-		sleep 2s
-		tail -n `wc -l ${model_dir}/train.log | awk '{print $1+1}'` -f ${model_dir}/train.log
-	fi
-fi
-wait
-
-if [ ${stage} -le 2 ] && [ ${stop_stage} -ge 2 ]; then
-    echo "stage 2: MT Decoding"
-    if [[ ${n_average} -ne 1 ]]; then
-        # Average models
-		dec_model=avg_${n_average}_checkpoint.pt
-
-		cmd="python ${root_dir}/scripts/average_checkpoints.py
-        --inputs ${model_dir}
-        --num-epoch-checkpoints ${n_average}
-        --output ${model_dir}/${dec_model}"
-    	echo -e "\033[34mRun command: \n${cmd} \033[0m"
-    	[[ $eval -eq 1 ]] && eval $cmd
-	else
-		dec_model=checkpoint_best.pt
-	fi
-
-    if [[ -z ${device} || ${#device[@]} -eq 0 ]]; then
-		if [[ ${gpu_num} -eq 0 ]]; then
-			device=()
-		else
-        	source ./local/utils.sh
-        	device=$(get_devices $gpu_num 0)
-		fi
-    fi
-    export CUDA_VISIBLE_DEVICES=${device}
-
-	#tmp_file=$(mktemp ${model_dir}/tmp-XXXXX)
-	#trap 'rm -rf ${tmp_file}' EXIT
-	result_file=${model_dir}/decode_result
-	[[ -f ${result_file} ]] && rm ${result_file}
-
-    test_subset=(${test_subset//,/ })
-	for subset in ${test_subset[@]}; do
-  		cmd="python ${root_dir}/fairseq_cli/generate.py
-        ${data_dir}
-        --source-lang ${src_lang}
-        --target-lang ${tgt_lang}
-        --gen-subset ${subset}
-        --task ${task}
-        --path ${model_dir}/${dec_model}
-        --results-path ${model_dir}
-        --max-tokens ${max_tokens}
-        --beam ${beam_size}
-        --post-process sentencepiece
-        --tokenizer moses
-        --moses-source-lang ${src_lang}
-        --moses-target-lang ${tgt_lang}
-        --scoring sacrebleu"
-    	echo -e "\033[34mRun command: \n${cmd} \033[0m"
-
-        if [[ $eval -eq 1 ]]; then
-    	    eval $cmd
-    	    tail -n 1 ${model_dir}/generate-${subset}.txt >> ${result_file}
-        fi
-	done
-    cat ${result_file}
-fi
--- a/egs/lower/mt/train.sh
+++ b/egs/lower/mt/train.sh
-#! /bin/bash
-
-# training the model
-
-gpu_num=1
-update_freq=1
-max_tokens=4096
-
-extra_tag=
-extra_parameter=
-
-#extra_tag="${extra_tag}"
-#extra_parameter="${extra_parameter} "
-
-exp_tag=baseline
-train_config=train.yaml
-
-cmd="./run.sh
-    --stage 1
-    --stop_stage 1
-    --gpu_num ${gpu_num}
-    --update_freq ${update_freq}
-    --train_config ${train_config}
-    --max_tokens ${max_tokens}
-    "
-
-if [[ -n ${exp_tag} ]]; then
-    cmd="$cmd --exp_tag ${exp_tag}"
-fi
-if [[ -n ${extra_tag} ]]; then
-    cmd="$cmd --extra_tag ${extra_tag}"
-fi
-if [[ -n ${extra_parameter} ]]; then
-    cmd="$cmd --extra_parameter \"${extra_parameter}\""
-fi
-
-echo $cmd
-eval $cmd
--- a/egs/lower/st/conf/train_ctc_debug.yaml
+++ b/egs/lower/st/conf/train_ctc_debug.yaml
-train-subset: train_st,train_v2
-valid-subset: dev_st
-
-max-epoch: 50
-max-update: 100000
-
-num-workers: 8
-patience: 10
-no-progress-bar: True
-log-interval: 100
-seed: 1
-report-accuracy: True
-
-#load-pretrained-encoder-from: /home/xuchen/st/checkpoints/mustc/asr/train_ctc_baseline/avg_10_checkpoint.pt
-#load-pretrained-decoder-from: /home/xuchen/st/checkpoints/mustc/mt/train_baseline/avg_10_checkpoint.pt
-
-arch: s2t_transformer_s
-share-decoder-input-output-embed: True
-optimizer: adam
-clip-norm: 10.0
-lr-scheduler: inverse_sqrt
-warmup-init-lr: 1e-7
-warmup-updates: 10000
-lr: 2e-3
-#adam_betas: (0.9,0.98)
-
-ctc-weight: 0.3
-criterion: label_smoothed_cross_entropy_with_ctc
-label_smoothing: 0.1
-
-conv-kernel-sizes: 5,5
-conv-channels: 1024
-dropout: 0.1
-activation-fn: relu
-encoder-embed-dim: 256
-encoder-ffn-embed-dim: 2048
-encoder-layers: 12
-decoder-layers: 6
-encoder-attention-heads: 4
-
-#decoder-embed-dim: 256
-#decoder-ffn-embed-dim: 2048
-#decoder-attention-heads: 4
-#attention-dropout: 0.1
-#activation-dropout: 0.1
--- a/egs/lower/st/decode.sh
+++ b/egs/lower/st/decode.sh
-#! /bin/bash
-
-gpu_num=1
-
-data_dir=
-test_subset=tst-COMMON
-
-exp_name=
-if [ "$#" -eq 1 ]; then
-    exp_name=$1
-fi
-
-n_average=10
-beam_size=5
-max_tokens=40000
-
-cmd="./run.sh
-    --stage 2
-    --stop_stage 2
-    --gpu_num ${gpu_num}
-    --exp_name ${exp_name}
-    --n_average ${n_average}
-    --beam_size ${beam_size}
-    --max_tokens ${max_tokens}
-    "
-
-if [[ -n ${data_dir} ]]; then
-    cmd="$cmd --data_dir ${data_dir}"
-fi
-if [[ -n ${test_subset} ]]; then
-    cmd="$cmd --test_subset ${test_subset}"
-fi
-
-echo $cmd
-eval $cmd
--- a/egs/lower/st/local/monitor.sh
+++ b/egs/lower/st/local/monitor.sh
-gpu_num=1
-
-while :
-do
-    all_devices=$(seq 0 `gpustat | sed '1,2d' | wc -l`);
-    count=0
-    for dev in ${all_devices[@]}
-    do
-        line=`expr $dev + 2`
-        use=`gpustat -p | head -n $line | tail -1 | cut -d '|' -f4 | wc -w`
-        if [[ $use -eq 0 ]]; then
-            device[$count]=$dev
-            count=`expr $count + 1`
-            if [[ $count -eq $gpu_num ]]; then
-                break
-            fi
-        fi
-    done
-    if [[ ${#device[@]} -lt $gpu_num ]]; then
-        sleep 60s
-    else
-        echo "Run $cmd"
-        eval $cmd
-        sleep 10s
-        exit
-    fi
-done
--- a/egs/lower/st/local/parse_options.sh
+++ b/egs/lower/st/local/parse_options.sh
-#!/usr/bin/env bash
-
-# Copyright 2012  Johns Hopkins University (Author: Daniel Povey);
-#                 Arnab Ghoshal, Karel Vesely
-
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#  http://www.apache.org/licenses/LICENSE-2.0
-#
-# THIS CODE IS PROVIDED *AS IS* BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-# KIND, EITHER EXPRESS OR IMPLIED, INCLUDING WITHOUT LIMITATION ANY IMPLIED
-# WARRANTIES OR CONDITIONS OF TITLE, FITNESS FOR A PARTICULAR PURPOSE,
-# MERCHANTABLITY OR NON-INFRINGEMENT.
-# See the Apache 2 License for the specific language governing permissions and
-# limitations under the License.
-
-
-# Parse command-line options.
-# To be sourced by another script (as in ". parse_options.sh").
-# Option format is: --option-name arg
-# and shell variable "option_name" gets set to value "arg."
-# The exception is --help, which takes no arguments, but prints the
-# $help_message variable (if defined).
-
-
-###
-### The --config file options have lower priority to command line
-### options, so we need to import them first...
-###
-
-# Now import all the configs specified by command-line, in left-to-right order
-for ((argpos=1; argpos<$#; argpos++)); do
-  if [ "${!argpos}" == "--config" ]; then
-    argpos_plus1=$((argpos+1))
-    config=${!argpos_plus1}
-    [ ! -r $config ] && echo "$0: missing config '$config'" && exit 1
-    . $config  # source the config file.
-  fi
-done
-
-
-###
-### Now we process the command line options
-###
-while true; do
-  [ -z "${1:-}" ] && break;  # break if there are no arguments
-  case "$1" in
-    # If the enclosing script is called with --help option, print the help
-    # message and exit.  Scripts should put help messages in $help_message
-    --help|-h) if [ -z "$help_message" ]; then echo "No help found." 1>&2;
-      else printf "$help_message\n" 1>&2 ; fi;
-      exit 0 ;;
-    --*=*) echo "$0: options to scripts must be of the form --name value, got '$1'"
-      exit 1 ;;
-    # If the first command-line argument begins with "--" (e.g. --foo-bar),
-    # then work out the variable name as $name, which will equal "foo_bar".
-    --*) name=`echo "$1" | sed s/^--// | sed s/-/_/g`;
-      # Next we test whether the variable in question is undefned-- if so it's
-      # an invalid option and we die.  Note: $0 evaluates to the name of the
-      # enclosing script.
-      # The test [ -z ${foo_bar+xxx} ] will return true if the variable foo_bar
-      # is undefined.  We then have to wrap this test inside "eval" because
-      # foo_bar is itself inside a variable ($name).
-      eval '[ -z "${'$name'+xxx}" ]' && echo "$0: invalid option $1" 1>&2 && exit 1;
-
-      oldval="`eval echo \\$$name`";
-      # Work out whether we seem to be expecting a Boolean argument.
-      if [ "$oldval" == "true" ] || [ "$oldval" == "false" ]; then
-        was_bool=true;
-      else
-        was_bool=false;
-      fi
-
-      # Set the variable to the right value-- the escaped quotes make it work if
-      # the option had spaces, like --cmd "queue.pl -sync y"
-      eval $name=\"$2\";
-
-      # Check that Boolean-valued arguments are really Boolean.
-      if $was_bool && [[ "$2" != "true" && "$2" != "false" ]]; then
-        echo "$0: expected \"true\" or \"false\": $1 $2" 1>&2
-        exit 1;
-      fi
-      shift 2;
-      ;;
-  *) break;
-  esac
-done
-
-
-# Check for an empty argument to the --cmd option, which can easily occur as a
-# result of scripting errors.
-[ ! -z "${cmd+xxx}" ] && [ -z "$cmd" ] && echo "$0: empty argument to --cmd option" 1>&2 && exit 1;
-
-
-true; # so this script returns exit code 0.
--- a/egs/lower/st/local/path.sh
+++ b/egs/lower/st/local/path.sh
-MAIN_ROOT=$PWD/../../..
-KALDI_ROOT=$MAIN_ROOT/tools/kaldi
-
-export PATH=$PWD/utils/:$KALDI_ROOT/tools/openfst/bin:$PATH
-[ ! -f $KALDI_ROOT/tools/config/common_path.sh ] && echo >&2 "The standard file $KALDI_ROOT/tools/config/common_path.sh is not present -> Exit!" && exit 1
-. $KALDI_ROOT/tools/config/common_path.sh
-export LC_ALL=C
-
-export LD_LIBRARY_PATH=${LD_LIBRARY_PATH}:$MAIN_ROOT/src/lib
-export LD_LIBRARY_PATH=${LD_LIBRARY_PATH}:$MAIN_ROOT/tools/chainer_ctc/ext/warp-ctc/build
-. "${MAIN_ROOT}"/tools/activate_python.sh && . "${MAIN_ROOT}"/tools/extra_path.sh
-export PATH=$MAIN_ROOT/utils:$MAIN_ROOT/espnet/bin:$PATH
-
-export OMP_NUM_THREADS=1
-
-# check extra module installation
-if ! which tokenizer.perl > /dev/null; then
-    echo "Error: it seems that moses is not installed." >&2
-    echo "Error: please install moses as follows." >&2
-    echo "Error: cd ${MAIN_ROOT}/tools && make moses.done" >&2
-    return 1
-fi
-
-# NOTE(kan-bayashi): Use UTF-8 in Python to avoid UnicodeDecodeError when LC_ALL=C
-export PYTHONIOENCODING=UTF-8
--- a/egs/lower/st/local/utils.sh
+++ b/egs/lower/st/local/utils.sh
-
-get_devices(){
-    gpu_num=$1
-    use_cpu=$2
-    device=()
-    while :
-    do
-        record=`mktemp -t temp.record.XXXXXX`
-        gpustat > $record
-        all_devices=$(seq 0 `cat $record | sed '1,2d' | wc -l`);
-        count=0
-        for dev in ${all_devices[@]}
-        do
-            line=`expr $dev + 2`
-            use=`cat $record | head -n $line | tail -1 | cut -d '|' -f3 | cut -d '/' -f1`
-            if [[ $use -lt 100 ]]; then
-                device[$count]=$dev
-                count=`expr $count + 1`
-                if [[ $count -eq $gpu_num ]]; then
-                    break
-                fi
-            fi
-        done
-        if [[ ${#device[@]} -lt $gpu_num ]]; then
-            if [[ $use_cpu -eq 1 ]]; then
-                device=(-1)
-            else
-                sleep 60s
-            fi
-        else
-            break
-        fi
-    done
-
-    echo ${device[*]} | sed 's/ /,/g'
-    return $?
-}
-
-
--- a/egs/lower/st/run.sh
+++ b/egs/lower/st/run.sh
-#! /bin/bash
-
-# Processing MuST-C Datasets
-
-# Copyright 2021 Natural Language Processing Laboratory 
-# Xu Chen (xuchenneu@163.com)
-
-# Set bash to 'debug' mode, it will exit on :
-# -e 'error', -u 'undefined variable', -o ... 'error in pipeline', -x 'print commands',
-set -e
-#set -u
-set -o pipefail
-export PYTHONIOENCODING=UTF-8
-
-eval=1
-time=$(date "+%m%d_%H%M")
-
-stage=0
-stop_stage=0
-
-######## hardware ########
-# devices
-device=()
-gpu_num=8
-update_freq=1
-
-root_dir=~/st/Fairseq-S2T
-pwd_dir=$PWD
-
-# dataset
-src_lang=swa
-tgt_lang=en
-lang=${src_lang}-${tgt_lang}
-
-dataset=lower
-task=speech_to_text
-vocab_type=unigram
-asr_vocab_size=5000
-vocab_size=10000
-share_dict=1
-speed_perturb=1
-
-org_data_dir=/media/data/${dataset}
-data_dir=~/st/data/${dataset}/st
-test_subset=tst-COMMON
-
-# exp
-extra_tag=
-extra_parameter=
-exp_tag=baseline
-exp_name=
-
-# config
-train_config=train_ctc.yaml
-
-# training setting
-fp16=1
-max_tokens=40000
-step_valid=0
-bleu_valid=0
-
-# decoding setting
-n_average=10
-beam_size=5
-
-if [[ ${share_dict} -eq 1 ]]; then
-	data_config=config_st_share.yaml
-else
-	data_config=config_st.yaml
-fi
-
-if [[ ${speed_perturb} -eq 1 ]]; then
-    data_dir=${data_dir}_sp
-fi
-
-. ./local/parse_options.sh || exit 1;
-
-# full path
-train_config=$pwd_dir/conf/${train_config}
-if [[ -z ${exp_name} ]]; then
-    exp_name=$(basename ${train_config%.*})_${exp_tag}
-    if [[ -n ${extra_tag} ]]; then
-        exp_name=${exp_name}_${extra_tag}
-    fi
-    if [[ ${speed_perturb} -eq 1 ]]; then
-        exp_name=sp_${exp_name}
-    fi
-fi
-model_dir=$root_dir/../checkpoints/$dataset/st/${exp_name}
-
-if [ ${stage} -le -1 ] && [ ${stop_stage} -ge -1 ]; then
-    echo "stage -1: Data Download"
-    # pass
-fi
-
-if [ ${stage} -le 0 ] && [ ${stop_stage} -ge 0 ]; then
-    ### Task dependent. You have to make data the following preparation part by yourself.
-    ### But you can utilize Kaldi recipes in most cases
-    echo "stage 0: ASR Data Preparation"
-    if [[ ! -e ${data_dir}/${lang} ]]; then
-        mkdir -p ${data_dir}/${lang}
-    fi
-    source ~/tools/audio/bin/activate
-
-    cmd="python ${root_dir}/examples/speech_to_text/prep_mustc_data.py
-        --data-root ${org_data_dir}
-        --output-root ${data_dir}
-        --task asr
-        --vocab-type ${vocab_type}
-        --vocab-size ${asr_vocab_size}"
-    if [[ ${speed_perturb} -eq 1 ]]; then
-        cmd="$cmd
-        --speed-perturb"
-    fi
-    echo -e "\033[34mRun command: \n${cmd} \033[0m"
-    [[ $eval -eq 1 && ${share_dict} -ne 1 ]] && eval $cmd
-
-    echo "stage 0: ST Data Preparation"
-    cmd="python ${root_dir}/examples/speech_to_text/prep_mustc_data.py
-        --data-root ${org_data_dir}
-        --output-root ${data_dir}
-        --task st
-        --add-src
-        --cmvn-type utterance
-        --vocab-type ${vocab_type}
-        --vocab-size ${vocab_size}"
-    if [[ $share_dict -eq 1 ]]; then
-        cmd="$cmd
-        --share"
-	else
-        cmd="$cmd
-        --asr-prefix spm_${vocab_type}${asr_vocab_size}_asr"
-    fi
-    if [[ ${speed_perturb} -eq 1 ]]; then
-        cmd="$cmd
-        --speed-perturb"
-    fi
-
-    echo -e "\033[34mRun command: \n${cmd} \033[0m"
-    [[ $eval -eq 1 ]] && eval ${cmd}
-    deactivate
-fi
-
-data_dir=${data_dir}/${lang}
-
-if [ ${stage} -le 1 ] && [ ${stop_stage} -ge 1 ]; then
-    echo "stage 1: ST Network Training"
-    [[ ! -d ${data_dir} ]] && echo "The data dir ${data_dir} is not existing!" && exit 1;
-
-    if [[ -z ${device} || ${#device[@]} -eq 0 ]]; then
-		if [[ ${gpu_num} -eq 0 ]]; then
-			device=()
-		else
-        	source ./local/utils.sh
-        	device=$(get_devices $gpu_num 0)
-		fi
-    fi
-
-    echo -e "dev=${device} data=${data_dir} model=${model_dir}"
-
-    if [[ ! -d ${model_dir} ]]; then
-        mkdir -p ${model_dir}
-    else
-        echo "${model_dir} exists."
-    fi
-
-    cp ${BASH_SOURCE[0]} ${model_dir}
-    cp ${PWD}/train.sh ${model_dir}
-    cp ${train_config} ${model_dir}
-
-    cmd="python3 -u ${root_dir}/fairseq_cli/train.py
-        ${data_dir}
-        --config-yaml ${data_config}
-        --train-config ${train_config}
-        --task ${task}
-        --max-tokens ${max_tokens}
-        --update-freq ${update_freq}
-        --log-interval 100
-        --save-dir ${model_dir}
-        --tensorboard-logdir ${model_dir}"
-
-    if [[ -n ${extra_parameter} ]]; then
-        cmd="${cmd}
-        ${extra_parameter}"
-    fi
-	if [[ ${gpu_num} -gt 0 ]]; then
-		cmd="${cmd}
-        --distributed-world-size $gpu_num
-        --ddp-backend no_c10d"
-	fi
-    if [[ $fp16 -eq 1 ]]; then
-        cmd="${cmd}
-        --fp16"
-    fi
-    if [[ $step_valid -eq 1 ]]; then
-        validate_interval=10000
-        save_interval=10000
-        no_epoch_checkpoints=1
-        save_interval_updates=5000
-        keep_interval_updates=3
-    else
-        validate_interval=1
-        keep_last_epochs=10
-    fi
-    if [[ $bleu_valid -eq 1 ]]; then
-        cmd="$cmd
-        --eval-bleu
-        --eval-bleu-args '{\"beam\": 1}'
-        --eval-tokenized-bleu
-        --eval-bleu-remove-bpe
-        --best-checkpoint-metric bleu
-        --maximize-best-checkpoint-metric"
-    fi
-    if [[ -n $no_epoch_checkpoints && $no_epoch_checkpoints -eq 1 ]]; then
-        cmd="$cmd
-        --no-epoch-checkpoints"
-    fi
-    if [[ -n $validate_interval ]]; then
-        cmd="${cmd}
-        --validate-interval $validate_interval "
-    fi
-    if [[ -n $save_interval ]]; then
-        cmd="${cmd}
-        --save-interval $save_interval "
-    fi
-    if [[ -n $keep_last_epochs ]]; then
-        cmd="${cmd}
-        --keep-last-epochs $keep_last_epochs "
-    fi
-    if [[ -n $save_interval_updates ]]; then
-        cmd="${cmd}
-        --save-interval-updates $save_interval_updates"
-        if [[ -n $keep_interval_updates ]]; then
-        cmd="${cmd}
-        --keep-interval-updates $keep_interval_updates"
-        fi
-    fi
-
-    echo -e "\033[34mRun command: \n${cmd} \033[0m"
-
-    # save info
-    log=./history.log
-    echo "${time} | ${device} | ${data_dir} | ${model_dir} " >> $log
-    cat $log | tail -n 50 > tmp.log
-    mv tmp.log $log
-    export CUDA_VISIBLE_DEVICES=${device}
-
-    cmd="nohup ${cmd} >> ${model_dir}/train.log 2>&1 &"
-    if [[ $eval -eq 1 ]]; then
-		eval $cmd
-		sleep 2s
-		tail -n `wc -l ${model_dir}/train.log | awk '{print $1+1}'` -f ${model_dir}/train.log
-	fi
-fi
-wait
-
-if [ ${stage} -le 2 ] && [ ${stop_stage} -ge 2 ]; then
-    echo "stage 2: ST Decoding"
-    if [[ ${n_average} -ne 1 ]]; then
-        # Average models
-		dec_model=avg_${n_average}_checkpoint.pt
-
-		cmd="python ${root_dir}/scripts/average_checkpoints.py
-        --inputs ${model_dir}
-        --num-epoch-checkpoints ${n_average}
-        --output ${model_dir}/${dec_model}"
-    	echo -e "\033[34mRun command: \n${cmd} \033[0m"
-    	[[ $eval -eq 1 ]] && eval $cmd
-	else
-		dec_model=checkpoint_best.pt
-	fi
-
-    if [[ -z ${device} || ${#device[@]} -eq 0 ]]; then
-		if [[ ${gpu_num} -eq 0 ]]; then
-			device=()
-		else
-        	source ./local/utils.sh
-        	device=$(get_devices $gpu_num 0)
-		fi
-    fi
-    export CUDA_VISIBLE_DEVICES=${device}
-
-	#tmp_file=$(mktemp ${model_dir}/tmp-XXXXX)
-	#trap 'rm -rf ${tmp_file}' EXIT
-	result_file=${model_dir}/decode_result
-	[[ -f ${result_file} ]] && rm ${result_file}
-
-    test_subset=(${test_subset//,/ })
-	for subset in ${test_subset[@]}; do
-        subset=${subset}_st
-  		cmd="python ${root_dir}/fairseq_cli/generate.py
-        ${data_dir}
-        --config-yaml ${data_config}
-        --gen-subset ${subset}
-        --task speech_to_text
-        --path ${model_dir}/${dec_model}
-        --results-path ${model_dir}
-        --max-tokens ${max_tokens}
-        --beam ${beam_size}
-        --scoring sacrebleu"
-    	echo -e "\033[34mRun command: \n${cmd} \033[0m"
-
-        if [[ $eval -eq 1 ]]; then
-    	    eval $cmd
-    	    tail -n 1 ${model_dir}/generate-${subset}.txt >> ${result_file}
-        fi
-	done
-    cat ${result_file}
-fi
--- a/egs/lower/st/train.sh
+++ b/egs/lower/st/train.sh
-#! /bin/bash
-
-# training the model
-
-gpu_num=8
-update_freq=2
-max_tokens=20000
-
-extra_tag=
-extra_parameter=
-
-#extra_tag="${extra_tag}"
-#extra_parameter="${extra_parameter} "
-
-exp_tag=
-train_config=train_ctc.yaml
-
-cmd="./run.sh
-    --stage 1
-    --stop_stage 1
-    --gpu_num ${gpu_num}
-    --update_freq ${update_freq}
-    --train_config ${train_config}
-    --max_tokens ${max_tokens}
-    "
-
-if [[ -n ${exp_tag} ]]; then
-    cmd="$cmd --exp_tag ${exp_tag}"
-fi
-if [[ -n ${extra_tag} ]]; then
-    cmd="$cmd --extra_tag ${extra_tag}"
-fi
-if [[ -n ${extra_parameter} ]]; then
-    cmd="$cmd --extra_parameter \"${extra_parameter}\""
-fi
-
-echo $cmd
-eval $cmd
--- a/egs/covost/st/binary.sh
+++ b/egs/covost/st/binary.sh
@@ -6,7 +6,6 @@ root_dir=~/st/Fairseq-S2T
 data_dir=/home/xuchen/st/data/test
 vocab_dir=/home/xuchen/st/data/mustc/st_lcrm/en-de
 asr_vocab_prefix=spm_unigram10000_st_share
-st_vocab_prefix=spm_unigram10000_st_share

 src_lang=en
 tgt_lang=de
@@ -16,23 +15,31 @@ source ~/tools/audio/bin/activate

 splits=`echo ${splits[*]} | sed 's/ /,/g'`

-cp -r ${vocab_dir}/${asr_vocab_prefix}.* ${data_dir}
-cp -r ${vocab_dir}/${st_vocab_prefix}.* ${data_dir}
+cp -r ${vocab_dir}/${asr_vocab_prefix}.* ${data_dir}/${src_lang}-${tgt_lang}
+rm -rf ${data_dir}/${src_lang}-${tgt_lang}/fbank80.zip

 cmd="python ${root_dir}/examples/speech_to_text/prep_st_data.py
    --data-root ${data_dir}
    --output-root ${data_dir}
    --splits ${splits}
-    --task st
+    --task asr
    --src-lang ${src_lang}
    --tgt-lang ${tgt_lang}
    --add-src
-    --lowercase-src
-    --rm-punc-src
+    --share
    --asr-prefix ${asr_vocab_prefix}
-    --tgt-prefix ${st_vocab_prefix}
    --cmvn-type utterance"

+    if [[ ${lcrm} -eq 1 ]]; then
+        cmd="$cmd
+    --lowercase-src
+    --rm-punc-src"
+    fi
+    if [[ ${tokenizer} -eq 1 ]]; then
+        cmd="$cmd
+    --tokenizer"
+    fi
+
 echo -e "\033[34mRun command: \n${cmd} \033[0m"
 [[ $eval -eq 1 ]] && eval ${cmd}
 deactivate
--- a/egs/mustc/asr/conf/train.yaml
+++ b/egs/mustc/asr/conf/train.yaml
-train-subset: train_asr
-valid-subset: dev_asr
+train-subset: train_st
+valid-subset: dev_st

 max-epoch: 50
 max-update: 100000
@@ -36,3 +36,9 @@ encoder-ffn-embed-dim: 2048
 encoder-layers: 12
 decoder-layers: 6
 encoder-attention-heads: 4
+
+decoder-embed-dim: 256
+decoder-ffn-embed-dim: 2048
+decoder-attention-heads: 4
+attention-dropout: 0.1
+activation-dropout: 0.1
--- a/egs/mustc/asr/conf/train_ctc.yaml
+++ b/egs/mustc/asr/conf/train_ctc.yaml
-train-subset: train_asr
-valid-subset: dev_asr
+train-subset: train_st
+valid-subset: dev_st

 max-epoch: 50
 max-update: 100000
@@ -36,4 +36,10 @@ encoder-embed-dim: 256
 encoder-ffn-embed-dim: 2048
 encoder-layers: 12
 decoder-layers: 6
-encoder-attention-heads: 4
\ No newline at end of file
+encoder-attention-heads: 4
+
+decoder-embed-dim: 256
+decoder-ffn-embed-dim: 2048
+decoder-attention-heads: 4
+attention-dropout: 0.1
+activation-dropout: 0.1
--- a/egs/mustc/asr/conf/train_ctc_conformer.yaml
+++ b/egs/mustc/asr/conf/train_ctc_conformer.yaml
-train-subset: train_asr
-valid-subset: dev_asr
+train-subset: train_st
+valid-subset: dev_st

 max-epoch: 50
 max-update: 100000
@@ -42,8 +42,8 @@ macaron-style: True
 use-cnn-module: True
 cnn-module-kernel: 31

-#decoder-embed-dim: 256
-#decoder-ffn-embed-dim: 2048
-#decoder-attention-heads: 4
-#attention-dropout: 0.1
-#activation-dropout: 0.1
+decoder-embed-dim: 256
+decoder-ffn-embed-dim: 2048
+decoder-attention-heads: 4
+attention-dropout: 0.1
+activation-dropout: 0.1
--- a/egs/mustc/asr/conf/train_ctc_conformer_m.yaml
+++ b/egs/mustc/asr/conf/train_ctc_conformer_m.yaml
-train-subset: train_asr
-valid-subset: dev_asr
-
-max-epoch: 50
-max-update: 100000
-
-num-workers: 8
-patience: 10
-no-progress-bar: True
-log-interval: 100
-seed: 1
-report-accuracy: True
-
-#load-pretrained-encoder-from:
-#load-pretrained-decoder-from:
-
-arch: s2t_conformer_m
-share-decoder-input-output-embed: True
-optimizer: adam
-clip-norm: 10.0
-lr-scheduler: inverse_sqrt
-warmup-init-lr: 1e-7
-warmup-updates: 10000
-lr: 1e-3
-#adam_betas: (0.9,0.98)
-
-ctc-weight: 0.3
-criterion: label_smoothed_cross_entropy_with_ctc
-label_smoothing: 0.1
-
-conv-kernel-sizes: 5,5
-conv-channels: 1024
-#dropout: 0.1
-#activation-fn: relu
-#encoder-embed-dim: 256
-#encoder-ffn-embed-dim: 2048
-#encoder-layers: 12
-#decoder-layers: 6
-#encoder-attention-heads: 4
-
-macaron-style: True
-use-cnn-module: True
-cnn-module-kernel: 31
-
-#decoder-embed-dim: 256
-#decoder-ffn-embed-dim: 2048
-#decoder-attention-heads: 4
-#attention-dropout: 0.1
-#activation-dropout: 0.1
--- a/egs/covost/st/conf/train_ctc_conformer.yaml
+++ b/egs/covost/st/conf/train_ctc_conformer.yaml
@@ -11,8 +11,8 @@ log-interval: 100
 seed: 1
 report-accuracy: True

-#load-params: 
-#load-pretrained-encoder-from: 
+#load-pretrained-encoder-from:
+#load-pretrained-decoder-from:

 arch: s2t_conformer_s
 share-decoder-input-output-embed: True
@@ -42,6 +42,11 @@ macaron-style: True
 use-cnn-module: True
 cnn-module-kernel: 31

+encoder-attention-type: relative
+decoder-attention-type: relative
+max-encoder-relative-length: 100
+max-decoder-relative-length: 20
+
 #decoder-embed-dim: 256
 #decoder-ffn-embed-dim: 2048
 #decoder-attention-heads: 4

--- a/egs/mustc/asr/conf/train_ctc_rpr.yaml
+++ b/egs/mustc/asr/conf/train_ctc_rpr.yaml
-train-subset: train_asr
-valid-subset: dev_asr
+train-subset: train_st
+valid-subset: dev_st

 max-epoch: 50
 max-update: 100000
@@ -39,11 +39,12 @@ decoder-layers: 6
 encoder-attention-heads: 4

 encoder-attention-type: relative
-#decoder-attention-type: relative
-max-relative-length: 100
-
-#decoder-embed-dim: 256
-#decoder-ffn-embed-dim: 2048
-#decoder-attention-heads: 4
-#attention-dropout: 0.1
-#activation-dropout: 0.1
+decoder-attention-type: relative
+max-encoder-relative-length: 100
+max-decoder-relative-length: 20
+
+decoder-embed-dim: 256
+decoder-ffn-embed-dim: 2048
+decoder-attention-heads: 4
+attention-dropout: 0.1
+activation-dropout: 0.1
--- a/egs/covost/st/conf/train_ctc_sate_rpr.yaml
+++ b/egs/covost/st/conf/train_ctc_sate_rpr.yaml
@@ -11,9 +11,10 @@ log-interval: 100
 seed: 1
 report-accuracy: True

-#load-pretrained-acoustic-encoder-from: /home/xuchen/st/checkpoints/mustc/asr/train_ctc_st_vocab/avg_10_checkpoint.pt
-#load-pretrained-text-encoder-from: /home/xuchen/st/checkpoints/mustc/mt/st_share10k_train_baseline/avg_10_checkpoint.pt
-#load-pretrained-decoder-from: /home/xuchen/st/checkpoints/mustc/mt/st_share10k_train_baseline/avg_10_checkpoint.pt
+#load-pretrained-encoder-from:
+#load-pretrained-acoustic-encoder-from:
+#load-pretrained-text-encoder-from:
+#load-pretrained-decoder-from:

 arch: s2t_sate
 share-decoder-input-output-embed: True
@@ -49,10 +50,6 @@ cnn-module-kernel: 31
 acoustic-encoder: transformer
 adapter: league

-encoder-attention-type: relative
-#decoder-attention-type: relative
-max-relative-length: 100
-
 #decoder-embed-dim: 256
 #decoder-ffn-embed-dim: 2048
 #decoder-attention-heads: 4

--- a/egs/lower/st/conf/train_ctc_sate_conformer.yaml
+++ b/egs/lower/st/conf/train_ctc_sate_conformer.yaml
@@ -11,9 +11,10 @@ log-interval: 100
 seed: 1
 report-accuracy: True

-#load-pretrained-acoustic-encoder-from: /home/xuchen/st/checkpoints/mustc/asr/train_ctc_baseline/avg_10_checkpoint.pt
-#load-pretrained-text-encoder-from: /home/xuchen/st/checkpoints/mustc/mt/st_share10k_train_baseline/avg_10_checkpoint.pt
-#load-pretrained-decoder-from: /home/xuchen/st/checkpoints/mustc/mt/st_share10k_train_baseline/avg_10_checkpoint.pt
+#load-pretrained-encoder-from:
+#load-pretrained-acoustic-encoder-from:
+#load-pretrained-text-encoder-from:
+#load-pretrained-decoder-from:

 arch: s2t_sate
 share-decoder-input-output-embed: True

--- a/egs/lower/st/conf/train_ctc_enc_rpr.yaml
+++ b/egs/lower/st/conf/train_ctc_enc_rpr.yaml
 train-subset: train_st
 valid-subset: dev_st

-max-epoch: 100
+max-epoch: 50
 max-update: 100000

 num-workers: 8
@@ -12,9 +12,11 @@ seed: 1
 report-accuracy: True

 #load-pretrained-encoder-from:
+#load-pretrained-acoustic-encoder-from:
+#load-pretrained-text-encoder-from:
 #load-pretrained-decoder-from:

-arch: s2t_transformer_s
+arch: s2t_sate
 share-decoder-input-output-embed: True
 optimizer: adam
 clip-norm: 10.0
@@ -28,6 +30,8 @@ ctc-weight: 0.3
 criterion: label_smoothed_cross_entropy_with_ctc
 label_smoothing: 0.1

+encoder-normalize-before: True
+decoder-normalize-before: True
 conv-kernel-sizes: 5,5
 conv-channels: 1024
 dropout: 0.1
@@ -35,12 +39,24 @@ activation-fn: relu
 encoder-embed-dim: 256
 encoder-ffn-embed-dim: 2048
 encoder-layers: 12
+text-encoder-layers: 6
 decoder-layers: 6
 encoder-attention-heads: 4
-encoder-attention-type: rel_selfattn

-#decoder-embed-dim: 256
-#decoder-ffn-embed-dim: 2048
-#decoder-attention-heads: 4
-#attention-dropout: 0.1
-#activation-dropout: 0.1
+macaron-style: True
+use-cnn-module: True
+cnn-module-kernel: 31
+
+acoustic-encoder: conformer
+adapter: league
+
+encoder-attention-type: relative
+decoder-attention-type: relative
+max-encoder-relative-length: 100
+max-decoder-relative-length: 20
+
+decoder-embed-dim: 256
+decoder-ffn-embed-dim: 2048
+decoder-attention-heads: 4
+attention-dropout: 0.1
+activation-dropout: 0.1
--- a/egs/lower/st/conf/train_ctc.yaml
+++ b/egs/lower/st/conf/train_ctc.yaml
@@ -12,9 +12,11 @@ seed: 1
 report-accuracy: True

 #load-pretrained-encoder-from:
+#load-pretrained-acoustic-encoder-from:
+#load-pretrained-text-encoder-from:
 #load-pretrained-decoder-from:

-arch: s2t_transformer_s
+arch: s2t_sate
 share-decoder-input-output-embed: True
 optimizer: adam
 clip-norm: 10.0
@@ -28,6 +30,8 @@ ctc-weight: 0.3
 criterion: label_smoothed_cross_entropy_with_ctc
 label_smoothing: 0.1

+encoder-normalize-before: True
+decoder-normalize-before: True
 conv-kernel-sizes: 5,5
 conv-channels: 1024
 dropout: 0.1
@@ -35,9 +39,22 @@ activation-fn: relu
 encoder-embed-dim: 256
 encoder-ffn-embed-dim: 2048
 encoder-layers: 12
+text-encoder-layers: 6
 decoder-layers: 6
 encoder-attention-heads: 4

+macaron-style: True
+use-cnn-module: True
+cnn-module-kernel: 31
+
+acoustic-encoder: transformer
+adapter: league
+
+encoder-attention-type: relative
+decoder-attention-type: relative
+max-encoder-relative-length: 100
+max-decoder-relative-length: 20
+
 #decoder-embed-dim: 256
 #decoder-ffn-embed-dim: 2048
 #decoder-attention-heads: 4

--- a/egs/lower/st/conf/train_ctc_conformer_m.yaml
+++ b/egs/lower/st/conf/train_ctc_conformer_m.yaml
@@ -38,12 +38,21 @@ conv-channels: 1024
 #decoder-layers: 6
 #encoder-attention-heads: 4

-macaron-style: True
-use-cnn-module: True
-cnn-module-kernel: 31
-
 #decoder-embed-dim: 256
 #decoder-ffn-embed-dim: 2048
 #decoder-attention-heads: 4
 #attention-dropout: 0.1
 #activation-dropout: 0.1
+
+# conformer
+#macaron-style: True
+#use-cnn-module: True
+#cnn-module-kernel: 31
+
+# relative position encoding
+#encoder-attention-type: relative
+#decoder-attention-type: relative
+#max-encoder-relative-length: 100
+#max-decoder-relative-length: 20
+
+
--- a/egs/lower/st/conf/train.yaml
+++ b/egs/lower/st/conf/train.yaml
@@ -12,9 +12,11 @@ seed: 1
 report-accuracy: True

 #load-pretrained-encoder-from:
+#load-pretrained-acoustic-encoder-from:
+#load-pretrained-text-encoder-from:
 #load-pretrained-decoder-from:

-arch: s2t_transformer_s
+arch: s2t_sate
 share-decoder-input-output-embed: True
 optimizer: adam
 clip-norm: 10.0
@@ -27,6 +29,8 @@ lr: 2e-3
 criterion: label_smoothed_cross_entropy
 label_smoothing: 0.1

+encoder-normalize-before: True
+decoder-normalize-before: True
 conv-kernel-sizes: 5,5
 conv-channels: 1024
 dropout: 0.1
@@ -34,9 +38,17 @@ activation-fn: relu
 encoder-embed-dim: 256
 encoder-ffn-embed-dim: 2048
 encoder-layers: 12
+text-encoder-layers: 6
 decoder-layers: 6
 encoder-attention-heads: 4

+macaron-style: True
+use-cnn-module: True
+cnn-module-kernel: 31
+
+acoustic-encoder: transformer
+adapter: league
+
 #decoder-embed-dim: 256
 #decoder-ffn-embed-dim: 2048
 #decoder-attention-heads: 4

--- a/egs/covost/st/conf/train_sate.yaml
+++ b/egs/covost/st/conf/train_sate.yaml
-train-subset: train_st
+train-subset: train_st,train_covost
 valid-subset: dev_st

 max-epoch: 50
@@ -11,13 +11,10 @@ log-interval: 100
 seed: 1
 report-accuracy: True

-#load-pretrained-acoustic-encoder-from: /home/xuchen/st/checkpoints/mustc/asr/train_ctc_st_vocab/avg_10_checkpoint.pt
-#load-pretrained-text-encoder-from: /home/xuchen/st/checkpoints/mustc/mt/st_share10k_train_baseline/avg_10_checkpoint.pt
-#load-pretrained-decoder-from: /home/xuchen/st/checkpoints/mustc/mt/st_share10k_train_baseline/avg_10_checkpoint.pt
-
-load-pretrained-acoustic-encoder-from: /home/xuchen/st/checkpoints/mustc/asr/train_ctc_baseline_lcrm/avg_10_checkpoint.pt
-load-pretrained-text-encoder-from: /home/xuchen/st/Fairseq-S2T/../checkpoints/mustc/mt/train_st_share10k_lcrm_baseline/avg_10_checkpoint.pt
-load-pretrained-decoder-from: /home/xuchen/st/Fairseq-S2T/../checkpoints/mustc/mt/train_st_share10k_lcrm_baseline/avg_10_checkpoint.pt
+#load-pretrained-encoder-from:
+#load-pretrained-acoustic-encoder-from:
+#load-pretrained-text-encoder-from:
+#load-pretrained-decoder-from:

 arch: s2t_sate
 share-decoder-input-output-embed: True
@@ -52,6 +49,11 @@ cnn-module-kernel: 31
 acoustic-encoder: transformer
 adapter: league

+encoder-attention-type: relative
+decoder-attention-type: relative
+max-encoder-relative-length: 100
+max-decoder-relative-length: 20
+
 #decoder-embed-dim: 256
 #decoder-ffn-embed-dim: 2048
 #decoder-attention-heads: 4

--- a/egs/mustc/asr/decode.sh
+++ b/egs/mustc/asr/decode.sh
@@ -3,7 +3,7 @@
 gpu_num=1

 data_dir=
-test_subset=tst-COMMON
+test_subset=(tst-COMMON)

 exp_name=
 if [ "$#" -eq 1 ]; then
@@ -12,7 +12,9 @@ fi

 n_average=10
 beam_size=5
-max_tokens=40000
+len_penalty=1.0
+max_tokens=10000
+dec_model=checkpoint_best.pt

 cmd="./run.sh
    --stage 2
@@ -21,13 +23,16 @@ cmd="./run.sh
    --exp_name ${exp_name}
    --n_average ${n_average}
    --beam_size ${beam_size}
+    --len_penalty ${len_penalty}
    --max_tokens ${max_tokens}
+    --dec_model ${dec_model}
    "

 if [[ -n ${data_dir} ]]; then
    cmd="$cmd --data_dir ${data_dir}"
 fi
 if [[ -n ${test_subset} ]]; then
+    test_subset=`echo ${test_subset[*]} | sed 's/ /,/g'`
    cmd="$cmd --test_subset ${test_subset}"
 fi


--- a/egs/mustc/asr/run.sh
+++ b/egs/mustc/asr/run.sh
@@ -32,12 +32,13 @@ src_lang=en
 tgt_lang=de
 lang=${src_lang}-${tgt_lang}

-dataset=mustc
+dataset=mustc-v2
 task=speech_to_text
 vocab_type=unigram
 vocab_size=5000
 speed_perturb=0
 lcrm=1
+tokenizer=0

 use_specific_dict=0
 specific_prefix=valid
@@ -65,8 +66,10 @@ max_tokens=40000
 step_valid=0

 # decoding setting
+dec_model=checkpoint_best.pt
 n_average=10
 beam_size=5
+len_penalty=1.0

 if [[ ${speed_perturb} -eq 1 ]]; then
    data_dir=${data_dir}_sp
@@ -80,6 +83,10 @@ if [[ ${use_specific_dict} -eq 1 ]]; then
    data_dir=${data_dir}_${specific_prefix}
    exp_prefix=${exp_prefix}_${specific_prefix}
 fi
+if [[ ${tokenizer} -eq 1 ]]; then
+    data_dir=${data_dir}_tok
+    exp_prefix=${exp_prefix}_tok
+fi

 . ./local/parse_options.sh || exit 1;

@@ -128,6 +135,11 @@ if [ ${stage} -le 0 ] && [ ${stop_stage} -ge 0 ]; then
        --lowercase-src
        --rm-punc-src"
    fi
+    if [[ ${tokenizer} -eq 1 ]]; then
+        cmd="$cmd
+        --tokenizer"
+    fi
+
    echo -e "\033[34mRun command: \n${cmd} \033[0m"
    [[ $eval -eq 1 ]] && eval ${cmd}
    deactivate
@@ -166,6 +178,7 @@ if [ ${stage} -le 1 ] && [ ${stop_stage} -ge 1 ]; then
        --train-config ${train_config}
        --task ${task}
        --max-tokens ${max_tokens}
+        --skip-invalid-size-inputs-valid-test
        --update-freq ${update_freq}
        --log-interval 100
        --save-dir ${model_dir}
@@ -185,11 +198,12 @@ if [ ${stage} -le 1 ] && [ ${stop_stage} -ge 1 ]; then
        --fp16"
    fi
    if [[ $step_valid -eq 1 ]]; then
-        validate_interval=10000
-        save_interval=10000
-        no_epoch_checkpoints=1
-        save_interval_updates=5000
-        keep_interval_updates=3
+        validate_interval=1
+        save_interval=1
+        keep_last_epochs=10
+        no_epoch_checkpoints=0
+        save_interval_updates=500
+        keep_interval_updates=10
    else
        validate_interval=1
        keep_last_epochs=10
@@ -250,7 +264,7 @@ if [ ${stage} -le 2 ] && [ ${stop_stage} -ge 2 ]; then
    	echo -e "\033[34mRun command: \n${cmd} \033[0m"
    	[[ $eval -eq 1 ]] && eval $cmd
 	else
-		dec_model=checkpoint_best.pt
+		dec_model=${dec_model}
 	fi

    if [[ -z ${device} || ${#device[@]} -eq 0 ]]; then
@@ -280,6 +294,7 @@ if [ ${stage} -le 2 ] && [ ${stop_stage} -ge 2 ]; then
        --results-path ${model_dir}
        --max-tokens ${max_tokens}
        --beam ${beam_size}
+        --lenpen ${len_penalty}
        --scoring wer
        --wer-tokenizer 13a
        --wer-lowercase

--- a/egs/mustc/asr/train.sh
+++ b/egs/mustc/asr/train.sh
@@ -6,14 +6,27 @@ gpu_num=8
 update_freq=2
 max_tokens=20000

-extra_tag=lcrm
+exp_name=
+extra_tag=
 extra_parameter=

 #extra_tag="${extra_tag}"
 #extra_parameter="${extra_parameter} "

-exp_tag=
+#extra_tag="${extra_tag}_encdlcl"
+#extra_parameter="${extra_parameter} --use-enc-dlcl"
+
+#extra_tag="${extra_tag}_decdlcl"
+#extra_parameter="${extra_parameter} --use-dec-dlcl"
+
+exp_tag=baseline
 train_config=train_ctc.yaml
+#train_config=train_ctc_conformer.yaml
+#train_config=train_ctc_conformer_rpr.yaml
+#train_config=train_ctc_sate.yaml
+#train_config=train_ctc_sate_rpr.yaml
+#train_config=train_ctc_sate_conformer.yaml
+#train_config=train_ctc_sate_conformer_rpr.yaml

 cmd="./run.sh
    --stage 1
@@ -24,6 +37,9 @@ cmd="./run.sh
    --max_tokens ${max_tokens}
    "

+if [[ -n ${exp_name} ]]; then
+    cmd="$cmd --exp_name ${exp_name}"
+fi
 if [[ -n ${exp_tag} ]]; then
    cmd="$cmd --exp_tag ${exp_tag}"
 fi

--- a/egs/mustc/mt/binary.sh
+++ b/egs/mustc/mt/binary.sh
@@ -29,11 +29,15 @@ for split in ${splits[@]}; do
        tgt_file=${tgt_file}.tok
    fi

-    cmd="spm_encode
-    --model ${vocab_dir}/${src_vocab_prefix}.model
+    cmd="cat ${src_file}"
+    if [[ ${lcrm} -eq 1 ]]; then
+        cmd="python local/lower_rm.py ${src_file}"
+    fi
+    cmd="${cmd}
+    | spm_encode --model ${vocab_dir}/${src_vocab_prefix}.model
    --output_format=piece
-    < ${src_file}
    > ${src_file}.spm"
+
    echo -e "\033[34mRun command: \n${cmd} \033[0m"
    [[ $eval -eq 1 ]] && eval ${cmd}


--- a/egs/mustc/mt/conf/train_dlcl.yaml
+++ b/egs/mustc/mt/conf/train_dlcl.yaml
@@ -44,3 +44,6 @@ encoder-attention-heads: 8
 decoder-embed-dim: 512
 decoder-ffn-embed-dim: 2048
 decoder-attention-heads: 8
+
+use-enc-dlcl: True
+use-dec-dlcl: True
\ No newline at end of file
--- a/egs/mustc/mt/conf/train_dlcl_rpr.yaml
+++ b/egs/mustc/mt/conf/train_dlcl_rpr.yaml
@@ -47,4 +47,8 @@ decoder-attention-heads: 8

 encoder-attention-type: relative
 decoder-attention-type: relative
-max-relative-length: 20
\ No newline at end of file
+max-encoder-relative-length: 20
+max-decoder-relative-length: 20
+
+use-enc-dlcl: True
+use-dec-dlcl: True
--- a/egs/mustc/mt/conf/train_rpr.yaml
+++ b/egs/mustc/mt/conf/train_rpr.yaml
@@ -47,4 +47,5 @@ decoder-attention-heads: 8

 encoder-attention-type: relative
 decoder-attention-type: relative
-max-relative-length: 20
+max-encoder-relative-length: 20
+max-decoder-relative-length: 20
--- a/egs/mustc/mt/decode.sh
+++ b/egs/mustc/mt/decode.sh
@@ -3,7 +3,7 @@
 gpu_num=1

 data_dir=
-test_subset=test
+test_subset=(test)

 exp_name=
 if [ "$#" -eq 1 ]; then
@@ -12,7 +12,9 @@ fi

 n_average=10
 beam_size=5
-max_tokens=20000
+len_penalty=1.0
+max_tokens=10000
+dec_model=checkpoint_best.pt

 cmd="./run.sh
    --stage 2
@@ -21,13 +23,16 @@ cmd="./run.sh
    --exp_name ${exp_name}
    --n_average ${n_average}
    --beam_size ${beam_size}
+    --len_penalty ${len_penalty}
    --max_tokens ${max_tokens}
+    --dec_model ${dec_model}
    "

 if [[ -n ${data_dir} ]]; then
    cmd="$cmd --data_dir ${data_dir}"
 fi
 if [[ -n ${test_subset} ]]; then
+    test_subset=`echo ${test_subset[*]} | sed 's/ /,/g'`
    cmd="$cmd --test_subset ${test_subset}"
 fi


--- a/egs/mustc/mt/run.sh
+++ b/egs/mustc/mt/run.sh
@@ -40,7 +40,7 @@ share_dict=1
 lcrm=1
 tokenizer=1

-use_specific_dict=1
+use_specific_dict=0
 specific_prefix=wmt_share32k
 specific_dir=/home/xuchen/st/data/wmt/mt_lcrm/en-de/unigram32000_share
 src_vocab_prefix=spm_unigram32000_share
@@ -50,8 +50,8 @@ org_data_dir=/media/data/${dataset}
 data_dir=~/st/data/${dataset}/mt/${lang}
 train_subset=train
 valid_subset=dev
-test_subset=tst-COMMON
-trans_set=test
+trans_subset=tst-COMMON
+test_subset=test

 # exp
 exp_prefix=${time}
@@ -70,8 +70,10 @@ step_valid=0
 bleu_valid=0

 # decoding setting
+dec_model=checkpoint_best.pt
 n_average=10
 beam_size=5
+len_penalty=1.0

 if [[ ${use_specific_dict} -eq 1 ]]; then
    exp_prefix=${specific_prefix}_${exp_prefix}
@@ -94,7 +96,7 @@ fi
 if [[ ${tokenizer} -eq 1 ]]; then
    train_subset=${train_subset}.tok
    valid_subset=${valid_subset}.tok
-    test_subset=${test_subset}.tok
+    trans_subset=${trans_subset}.tok
    data_dir=${data_dir}_tok
    exp_prefix=${exp_prefix}_tok
 fi
@@ -128,7 +130,7 @@ if [ ${stage} -le 0 ] && [ ${stop_stage} -ge 0 ]; then
            cmd="python ${root_dir}/examples/speech_to_text/prep_mt_data.py
                --data-root ${org_data_dir}
                --output-root ${data_dir}
-                --splits ${train_subset},${valid_subset},${test_subset}
+                --splits ${train_subset},${valid_subset},${trans_subset}
                --src-lang ${src_lang}
                --tgt-lang ${tgt_lang}
                --vocab-type ${vocab_type}
@@ -146,7 +148,7 @@ if [ ${stage} -le 0 ] && [ ${stop_stage} -ge 0 ]; then
    fi

    mkdir -p ${data_dir}/data
-    for split in ${train_subset} ${valid_subset} ${test_subset}; do
+    for split in ${train_subset} ${valid_subset} ${trans_subset}; do
    {
        cmd="cat ${org_data_dir}/${lang}/data/${split}.${src_lang}"
        if [[ ${lcrm} -eq 1 ]]; then
@@ -220,6 +222,7 @@ if [ ${stage} -le 1 ] && [ ${stop_stage} -ge 1 ]; then
        --train-config ${train_config}
        --task ${task}
        --max-tokens ${max_tokens}
+        --skip-invalid-size-inputs-valid-test
        --update-freq ${update_freq}
        --log-interval 100
        --save-dir ${model_dir}
@@ -314,7 +317,7 @@ if [ ${stage} -le 2 ] && [ ${stop_stage} -ge 2 ]; then
    	echo -e "\033[34mRun command: \n${cmd} \033[0m"
    	[[ $eval -eq 1 ]] && eval $cmd
 	else
-		dec_model=checkpoint_best.pt
+		dec_model=${dec_model}
 	fi

    if [[ -z ${device} || ${#device[@]} -eq 0 ]]; then
@@ -327,13 +330,12 @@ if [ ${stage} -le 2 ] && [ ${stop_stage} -ge 2 ]; then
    fi
    export CUDA_VISIBLE_DEVICES=${device}

-	#tmp_file=$(mktemp ${model_dir}/tmp-XXXXX)
-	#trap 'rm -rf ${tmp_file}' EXIT
 	result_file=${model_dir}/decode_result
 	[[ -f ${result_file} ]] && rm ${result_file}

-    trans_set=(${trans_set//,/ })
-	for subset in ${trans_set[@]}; do
+    test_subset=(${test_subset//,/ })
+	for subset in ${test_subset[@]}; do
+        subset=${subset}_st
  		cmd="python ${root_dir}/fairseq_cli/generate.py
        ${data_dir}
        --source-lang ${src_lang}
@@ -344,11 +346,17 @@ if [ ${stage} -le 2 ] && [ ${stop_stage} -ge 2 ]; then
        --results-path ${model_dir}
        --max-tokens ${max_tokens}
        --beam ${beam_size}
+        --lenpen ${len_penalty}
        --post-process sentencepiece
+        --scoring sacrebleu"
+
+        if [[ ${tokenizer} -eq 1 ]]; then
+            cmd="${cmd}
        --tokenizer moses
        --moses-source-lang ${src_lang}
-        --moses-target-lang ${tgt_lang}
-        --scoring sacrebleu"
+        --moses-target-lang ${tgt_lang}"
+        fi
+
    	echo -e "\033[34mRun command: \n${cmd} \033[0m"

        if [[ $eval -eq 1 ]]; then

--- a/egs/mustc/st/binary.sh
+++ b/egs/mustc/st/binary.sh
@@ -2,6 +2,9 @@ set -e

 eval=1

+lcrm=1
+tokenizer=0
+
 root_dir=~/st/Fairseq-S2T
 data_dir=/home/xuchen/st/data/test
 vocab_dir=/home/xuchen/st/data/mustc/st_lcrm/en-de
@@ -16,8 +19,9 @@ source ~/tools/audio/bin/activate

 splits=`echo ${splits[*]} | sed 's/ /,/g'`

-cp -r ${vocab_dir}/${asr_vocab_prefix}.* ${data_dir}
-cp -r ${vocab_dir}/${st_vocab_prefix}.* ${data_dir}
+cp -r ${vocab_dir}/${asr_vocab_prefix}.* ${data_dir}/${src_lang}-${tgt_lang}
+cp -r ${vocab_dir}/${st_vocab_prefix}.* ${data_dir}/${src_lang}-${tgt_lang}
+rm -rf ${data_dir}/${src_lang}-${tgt_lang}/fbank80.zip

 cmd="python ${root_dir}/examples/speech_to_text/prep_st_data.py
    --data-root ${data_dir}
@@ -27,12 +31,21 @@ cmd="python ${root_dir}/examples/speech_to_text/prep_st_data.py
    --src-lang ${src_lang}
    --tgt-lang ${tgt_lang}
    --add-src
-    --lowercase-src
-    --rm-punc-src
+    --share
    --asr-prefix ${asr_vocab_prefix}
-    --tgt-prefix ${st_vocab_prefix}
+    --st-spm-prefix ${st_vocab_prefix}
    --cmvn-type utterance"

+    if [[ ${lcrm} -eq 1 ]]; then
+        cmd="$cmd
+    --lowercase-src
+    --rm-punc-src"
+    fi
+    if [[ ${tokenizer} -eq 1 ]]; then
+        cmd="$cmd
+    --tokenizer"
+    fi
+
 echo -e "\033[34mRun command: \n${cmd} \033[0m"
 [[ $eval -eq 1 ]] && eval ${cmd}
 deactivate
--- a/egs/mustc/st/conf/train.yaml
+++ b/egs/mustc/st/conf/train.yaml
@@ -37,8 +37,8 @@ encoder-layers: 12
 decoder-layers: 6
 encoder-attention-heads: 4

-#decoder-embed-dim: 256
-#decoder-ffn-embed-dim: 2048
-#decoder-attention-heads: 4
-#attention-dropout: 0.1
-#activation-dropout: 0.1
+decoder-embed-dim: 256
+decoder-ffn-embed-dim: 2048
+decoder-attention-heads: 4
+attention-dropout: 0.1
+activation-dropout: 0.1
--- a/egs/mustc/st/conf/train_ctc.yaml
+++ b/egs/mustc/st/conf/train_ctc.yaml
@@ -38,8 +38,8 @@ encoder-layers: 12
 decoder-layers: 6
 encoder-attention-heads: 4

-#decoder-embed-dim: 256
-#decoder-ffn-embed-dim: 2048
-#decoder-attention-heads: 4
-#attention-dropout: 0.1
-#activation-dropout: 0.1
+decoder-embed-dim: 256
+decoder-ffn-embed-dim: 2048
+decoder-attention-heads: 4
+attention-dropout: 0.1
+activation-dropout: 0.1
--- a/egs/mustc/st/conf/train_ctc_conformer.yaml
+++ b/egs/mustc/st/conf/train_ctc_conformer.yaml
@@ -11,8 +11,8 @@ log-interval: 100
 seed: 1
 report-accuracy: True

-#load-params: 
-#load-pretrained-encoder-from: 
+#load-pretrained-encoder-from:
+#load-pretrained-decoder-from:

 arch: s2t_conformer_s
 share-decoder-input-output-embed: True
@@ -42,8 +42,8 @@ macaron-style: True
 use-cnn-module: True
 cnn-module-kernel: 31

-#decoder-embed-dim: 256
-#decoder-ffn-embed-dim: 2048
-#decoder-attention-heads: 4
-#attention-dropout: 0.1
-#activation-dropout: 0.1
+decoder-embed-dim: 256
+decoder-ffn-embed-dim: 2048
+decoder-attention-heads: 4
+attention-dropout: 0.1
+activation-dropout: 0.1
--- a/egs/lower/st/conf/train_ctc_conformer.yaml
+++ b/egs/lower/st/conf/train_ctc_conformer.yaml
@@ -11,8 +11,8 @@ log-interval: 100
 seed: 1
 report-accuracy: True

-#load-params: 
-#load-pretrained-encoder-from: 
+#load-pretrained-encoder-from:
+#load-pretrained-decoder-from:

 arch: s2t_conformer_s
 share-decoder-input-output-embed: True
@@ -42,6 +42,11 @@ macaron-style: True
 use-cnn-module: True
 cnn-module-kernel: 31

+encoder-attention-type: relative
+decoder-attention-type: relative
+max-encoder-relative-length: 100
+max-decoder-relative-length: 20
+
 #decoder-embed-dim: 256
 #decoder-ffn-embed-dim: 2048
 #decoder-attention-heads: 4

--- a/egs/mustc/st/conf/train_ctc_debug.yaml
+++ b/egs/mustc/st/conf/train_ctc_debug.yaml
-train-subset: train_st,train_v2
-valid-subset: dev_st
-
-max-epoch: 50
-max-update: 100000
-
-num-workers: 8
-patience: 10
-no-progress-bar: True
-log-interval: 100
-seed: 1
-report-accuracy: True
-
-#load-pretrained-encoder-from: /home/xuchen/st/checkpoints/mustc/asr/train_ctc_baseline/avg_10_checkpoint.pt
-#load-pretrained-decoder-from: /home/xuchen/st/checkpoints/mustc/mt/train_baseline/avg_10_checkpoint.pt
-
-arch: s2t_transformer_s
-share-decoder-input-output-embed: True
-optimizer: adam
-clip-norm: 10.0
-lr-scheduler: inverse_sqrt
-warmup-init-lr: 1e-7
-warmup-updates: 10000
-lr: 2e-3
-#adam_betas: (0.9,0.98)
-
-ctc-weight: 0.3
-criterion: label_smoothed_cross_entropy_with_ctc
-label_smoothing: 0.1
-
-conv-kernel-sizes: 5,5
-conv-channels: 1024
-dropout: 0.1
-activation-fn: relu
-encoder-embed-dim: 256
-encoder-ffn-embed-dim: 2048
-encoder-layers: 12
-decoder-layers: 6
-encoder-attention-heads: 4
-
-#decoder-embed-dim: 256
-#decoder-ffn-embed-dim: 2048
-#decoder-attention-heads: 4
-#attention-dropout: 0.1
-#activation-dropout: 0.1
--- a/egs/mustc/st/conf/train_ctc_rpr.yaml
+++ b/egs/mustc/st/conf/train_ctc_rpr.yaml
@@ -39,11 +39,12 @@ decoder-layers: 6
 encoder-attention-heads: 4

 encoder-attention-type: relative
-#decoder-attention-type: relative
-max-relative-length: 100
-
-#decoder-embed-dim: 256
-#decoder-ffn-embed-dim: 2048
-#decoder-attention-heads: 4
-#attention-dropout: 0.1
-#activation-dropout: 0.1
+decoder-attention-type: relative
+max-encoder-relative-length: 100
+max-decoder-relative-length: 20
+
+decoder-embed-dim: 256
+decoder-ffn-embed-dim: 2048
+decoder-attention-heads: 4
+attention-dropout: 0.1
+activation-dropout: 0.1
--- a/egs/mustc/st/conf/train_ctc_sate.yaml
+++ b/egs/mustc/st/conf/train_ctc_sate.yaml
@@ -11,13 +11,10 @@ log-interval: 100
 seed: 1
 report-accuracy: True

-#load-pretrained-acoustic-encoder-from: /home/xuchen/st/checkpoints/mustc/asr/train_ctc_st_vocab/avg_10_checkpoint.pt
-#load-pretrained-text-encoder-from: /home/xuchen/st/checkpoints/mustc/mt/st_share10k_train_baseline/avg_10_checkpoint.pt
-#load-pretrained-decoder-from: /home/xuchen/st/checkpoints/mustc/mt/st_share10k_train_baseline/avg_10_checkpoint.pt
-
-#load-pretrained-acoustic-encoder-from: /home/xuchen/st/checkpoints/mustc/asr/train_ctc_baseline_lcrm/avg_10_checkpoint.pt
-#load-pretrained-text-encoder-from: /home/xuchen/st/Fairseq-S2T/../checkpoints/mustc/mt/train_st_share10k_lcrm_baseline/avg_10_checkpoint.pt
-#load-pretrained-decoder-from: /home/xuchen/st/Fairseq-S2T/../checkpoints/mustc/mt/train_st_share10k_lcrm_baseline/avg_10_checkpoint.pt
+#load-pretrained-encoder-from:
+#load-pretrained-acoustic-encoder-from:
+#load-pretrained-text-encoder-from:
+#load-pretrained-decoder-from:

 arch: s2t_sate
 share-decoder-input-output-embed: True

--- a/egs/mustc/st/conf/train_ctc_sate_conformer.yaml
+++ b/egs/mustc/st/conf/train_ctc_sate_conformer.yaml
@@ -11,9 +11,10 @@ log-interval: 100
 seed: 1
 report-accuracy: True

-#load-pretrained-acoustic-encoder-from: /home/xuchen/st/checkpoints/mustc/asr/train_ctc_baseline/avg_10_checkpoint.pt
-#load-pretrained-text-encoder-from: /home/xuchen/st/checkpoints/mustc/mt/st_share10k_train_baseline/avg_10_checkpoint.pt
-#load-pretrained-decoder-from: /home/xuchen/st/checkpoints/mustc/mt/st_share10k_train_baseline/avg_10_checkpoint.pt
+#load-pretrained-encoder-from:
+#load-pretrained-acoustic-encoder-from:
+#load-pretrained-text-encoder-from:
+#load-pretrained-decoder-from:

 arch: s2t_sate
 share-decoder-input-output-embed: True

--- a/egs/covost/st/conf/train_ctc_rpr.yaml
+++ b/egs/covost/st/conf/train_ctc_rpr.yaml
@@ -12,9 +12,11 @@ seed: 1
 report-accuracy: True

 #load-pretrained-encoder-from:
+#load-pretrained-acoustic-encoder-from:
+#load-pretrained-text-encoder-from:
 #load-pretrained-decoder-from:

-arch: s2t_transformer_s
+arch: s2t_sate
 share-decoder-input-output-embed: True
 optimizer: adam
 clip-norm: 10.0
@@ -28,6 +30,8 @@ ctc-weight: 0.3
 criterion: label_smoothed_cross_entropy_with_ctc
 label_smoothing: 0.1

+encoder-normalize-before: True
+decoder-normalize-before: True
 conv-kernel-sizes: 5,5
 conv-channels: 1024
 dropout: 0.1
@@ -35,15 +39,24 @@ activation-fn: relu
 encoder-embed-dim: 256
 encoder-ffn-embed-dim: 2048
 encoder-layers: 12
+text-encoder-layers: 6
 decoder-layers: 6
 encoder-attention-heads: 4

+macaron-style: True
+use-cnn-module: True
+cnn-module-kernel: 31
+
+acoustic-encoder: conformer
+adapter: league
+
 encoder-attention-type: relative
-#decoder-attention-type: relative
-max-relative-length: 100
-
-#decoder-embed-dim: 256
-#decoder-ffn-embed-dim: 2048
-#decoder-attention-heads: 4
-#attention-dropout: 0.1
-#activation-dropout: 0.1
+decoder-attention-type: relative
+max-encoder-relative-length: 100
+max-decoder-relative-length: 20
+
+decoder-embed-dim: 256
+decoder-ffn-embed-dim: 2048
+decoder-attention-heads: 4
+attention-dropout: 0.1
+activation-dropout: 0.1
--- a/egs/mustc/st/conf/train_ctc_sate_rpr.yaml
+++ b/egs/mustc/st/conf/train_ctc_sate_rpr.yaml
@@ -11,9 +11,10 @@ log-interval: 100
 seed: 1
 report-accuracy: True

-#load-pretrained-acoustic-encoder-from: /home/xuchen/st/checkpoints/mustc/asr/train_ctc_st_vocab/avg_10_checkpoint.pt
-#load-pretrained-text-encoder-from: /home/xuchen/st/checkpoints/mustc/mt/st_share10k_train_baseline/avg_10_checkpoint.pt
-#load-pretrained-decoder-from: /home/xuchen/st/checkpoints/mustc/mt/st_share10k_train_baseline/avg_10_checkpoint.pt
+#load-pretrained-encoder-from:
+#load-pretrained-acoustic-encoder-from:
+#load-pretrained-text-encoder-from:
+#load-pretrained-decoder-from:

 arch: s2t_sate
 share-decoder-input-output-embed: True
@@ -50,8 +51,9 @@ acoustic-encoder: transformer
 adapter: league

 encoder-attention-type: relative
-#decoder-attention-type: relative
-max-relative-length: 100
+decoder-attention-type: relative
+max-encoder-relative-length: 100
+max-decoder-relative-length: 20

 #decoder-embed-dim: 256
 #decoder-ffn-embed-dim: 2048

--- a/egs/mustc/st/conf/train_ctc_conformer_m.yaml
+++ b/egs/mustc/st/conf/train_ctc_conformer_m.yaml
@@ -38,12 +38,21 @@ conv-channels: 1024
 #decoder-layers: 6
 #encoder-attention-heads: 4

-macaron-style: True
-use-cnn-module: True
-cnn-module-kernel: 31
-
 #decoder-embed-dim: 256
 #decoder-ffn-embed-dim: 2048
 #decoder-attention-heads: 4
 #attention-dropout: 0.1
 #activation-dropout: 0.1
+
+# conformer
+#macaron-style: True
+#use-cnn-module: True
+#cnn-module-kernel: 31
+
+# relative position encoding
+#encoder-attention-type: relative
+#decoder-attention-type: relative
+#max-encoder-relative-length: 100
+#max-decoder-relative-length: 20
+
+
--- a/egs/mustc/st/conf/train_sate.yaml
+++ b/egs/mustc/st/conf/train_sate.yaml
@@ -11,13 +11,10 @@ log-interval: 100
 seed: 1
 report-accuracy: True

-#load-pretrained-acoustic-encoder-from: /home/xuchen/st/checkpoints/mustc/asr/train_ctc_st_vocab/avg_10_checkpoint.pt
-#load-pretrained-text-encoder-from: /home/xuchen/st/checkpoints/mustc/mt/st_share10k_train_baseline/avg_10_checkpoint.pt
-#load-pretrained-decoder-from: /home/xuchen/st/checkpoints/mustc/mt/st_share10k_train_baseline/avg_10_checkpoint.pt
-
-load-pretrained-acoustic-encoder-from: /home/xuchen/st/checkpoints/mustc/asr/train_ctc_baseline_lcrm/avg_10_checkpoint.pt
-load-pretrained-text-encoder-from: /home/xuchen/st/Fairseq-S2T/../checkpoints/mustc/mt/train_st_share10k_lcrm_baseline/avg_10_checkpoint.pt
-load-pretrained-decoder-from: /home/xuchen/st/Fairseq-S2T/../checkpoints/mustc/mt/train_st_share10k_lcrm_baseline/avg_10_checkpoint.pt
+#load-pretrained-encoder-from:
+#load-pretrained-acoustic-encoder-from:
+#load-pretrained-text-encoder-from:
+#load-pretrained-decoder-from:

 arch: s2t_sate
 share-decoder-input-output-embed: True

--- a/egs/mustc/st/conf/train_sate_rpr.yaml
+++ b/egs/mustc/st/conf/train_sate_rpr.yaml
@@ -11,9 +11,10 @@ log-interval: 100
 seed: 1
 report-accuracy: True

-load-pretrained-acoustic-encoder-from: /home/xuchen/st/checkpoints/mustc/asr/train_ctc_baseline_lcrm/avg_10_checkpoint.pt
-load-pretrained-text-encoder-from: /home/xuchen/st/Fairseq-S2T/../checkpoints/mustc/mt/train_st_share10k_lcrm_baseline/avg_10_checkpoint.pt
-load-pretrained-decoder-from: /home/xuchen/st/Fairseq-S2T/../checkpoints/mustc/mt/train_st_share10k_lcrm_baseline/avg_10_checkpoint.pt
+#load-pretrained-encoder-from:
+#load-pretrained-acoustic-encoder-from:
+#load-pretrained-text-encoder-from:
+#load-pretrained-decoder-from:

 arch: s2t_sate
 share-decoder-input-output-embed: True
@@ -49,8 +50,9 @@ acoustic-encoder: transformer
 adapter: league

 encoder-attention-type: relative
-#decoder-attention-type: relative
-max-relative-length: 100
+decoder-attention-type: relative
+max-encoder-relative-length: 100
+max-decoder-relative-length: 20

 #decoder-embed-dim: 256
 #decoder-ffn-embed-dim: 2048

--- a/egs/mustc/st/decode.sh
+++ b/egs/mustc/st/decode.sh
@@ -3,7 +3,7 @@
 gpu_num=1

 data_dir=
-test_subset=tst-COMMON
+test_subset=(tst-COMMON)

 exp_name=
 if [ "$#" -eq 1 ]; then
@@ -12,7 +12,9 @@ fi

 n_average=10
 beam_size=5
-max_tokens=40000
+len_penalty=1.0
+max_tokens=10000
+dec_model=checkpoint_best.pt

 cmd="./run.sh
    --stage 2
@@ -21,13 +23,16 @@ cmd="./run.sh
    --exp_name ${exp_name}
    --n_average ${n_average}
    --beam_size ${beam_size}
+    --len_penalty ${len_penalty}
    --max_tokens ${max_tokens}
+    --dec_model ${dec_model}
    "

 if [[ -n ${data_dir} ]]; then
    cmd="$cmd --data_dir ${data_dir}"
 fi
 if [[ -n ${test_subset} ]]; then
+    test_subset=`echo ${test_subset[*]} | sed 's/ /,/g'`
    cmd="$cmd --test_subset ${test_subset}"
 fi


--- a/egs/mustc/st/ensemble.sh
+++ b/egs/mustc/st/ensemble.sh
+set -e
+
+gpu_num=1
+root_dir=/home/xuchen/st/Fairseq-S2T
+ckpt=/home/xuchen/st/checkpoints/mustc-v2/st
+
+model_txt=$1
+set=$2
+test_subset=$3
+
+#data_dir=/home/xuchen/st/data/mustc-v2/st_lcrm/en-de
+#test_subset=(tst-COMMON)
+
+data_dir=/media/data/tst/$set/en-de
+#test_subset=(office)
+#test_subset=(webrtc1)
+#test_subset=(adap2)
+
+data_config=config_st_share.yaml
+result_file=./result
+
+beam_size=5
+lenpen=0.6
+max_tokens=10000
+
+models=()
+i=0
+for line in `cat $model_txt`; do
+    i=`expr $i + 1`
+    
+    model_dir=$ckpt/$line
+    [[ ! -d $model_dir ]] && echo $model_dir && exit 1;
+
+    if [[ -f $model_dir/avg_10_checkpoint.pt ]]; then
+        model=$model_dir/avg_10_checkpoint.pt
+    else
+        model=$model_dir/checkpoint_best.pt
+    fi
+    [[ ! -f $model ]] && echo $model && exit 1;
+
+    models[$i]=$model
+done
+
+models=`echo ${models[*]} | sed 's/ /:/g'`
+
+res_dir=$ckpt/ensemble/$set
+i=0
+while : 
+do
+    if [[ -d $res_dir/$i ]]; then
+        i=`expr $i + 1`
+    else
+        res_dir=$res_dir/$i
+        break
+    fi 
+done
+
+mkdir -p $res_dir
+cp $model_txt $res_dir
+
+
+if [[ -z ${device} || ${#device[@]} -eq 0 ]]; then
+    if [[ ${gpu_num} -eq 0 ]]; then
+        device=()
+    else
+        source ./local/utils.sh
+        device=$(get_devices $gpu_num 0)
+    fi
+fi
+export CUDA_VISIBLE_DEVICES=${device}
+
+for subset in ${test_subset[@]}; do
+    subset=${subset}_st
+    cmd="python ${root_dir}/fairseq_cli/generate.py
+    ${data_dir}
+    --config-yaml ${data_config}
+    --gen-subset ${subset}
+    --task speech_to_text
+    --path ${models}
+    --results-path ${res_dir}
+    --skip-invalid-size-inputs-valid-test
+    --max-tokens ${max_tokens}
+    --beam ${beam_size}
+    --lenpen ${lenpen}
+    --scoring sacrebleu"
+    echo -e "\033[34mRun command: \n${cmd} \033[0m"
+
+    eval $cmd
+    tail -n 1 ${res_dir}/generate-${subset}.txt
+
+    cd $res_dir
+    evaluate.sh translation-${subset}.txt $set
+    cd -
+done
+
--- a/egs/mustc/st/run.sh
+++ b/egs/mustc/st/run.sh
@@ -69,8 +69,10 @@ step_valid=0
 bleu_valid=0

 # decoding setting
+dec_model=checkpoint_best.pt
 n_average=10
 beam_size=5
+len_penalty=1.0

 if [[ ${share_dict} -eq 1 ]]; then
 	data_config=config_st_share.yaml
@@ -217,6 +219,7 @@ if [ ${stage} -le 1 ] && [ ${stop_stage} -ge 1 ]; then
        --train-config ${train_config}
        --task ${task}
        --max-tokens ${max_tokens}
+        --skip-invalid-size-inputs-valid-test
        --update-freq ${update_freq}
        --log-interval 100
        --save-dir ${model_dir}
@@ -236,11 +239,12 @@ if [ ${stage} -le 1 ] && [ ${stop_stage} -ge 1 ]; then
        --fp16"
    fi
    if [[ $step_valid -eq 1 ]]; then
-        validate_interval=10000
-        save_interval=10000
-        no_epoch_checkpoints=1
-        save_interval_updates=5000
-        keep_interval_updates=3
+        validate_interval=1
+        save_interval=1
+        keep_last_epochs=10
+        no_epoch_checkpoints=0
+        save_interval_updates=500
+        keep_interval_updates=10
    else
        validate_interval=1
        keep_last_epochs=10
@@ -310,7 +314,7 @@ if [ ${stage} -le 2 ] && [ ${stop_stage} -ge 2 ]; then
    	echo -e "\033[34mRun command: \n${cmd} \033[0m"
    	[[ $eval -eq 1 ]] && eval $cmd
 	else
-		dec_model=checkpoint_best.pt
+		dec_model=${dec_model}
 	fi

    if [[ -z ${device} || ${#device[@]} -eq 0 ]]; then
@@ -323,8 +327,6 @@ if [ ${stage} -le 2 ] && [ ${stop_stage} -ge 2 ]; then
    fi
    export CUDA_VISIBLE_DEVICES=${device}

-	#tmp_file=$(mktemp ${model_dir}/tmp-XXXXX)
-	#trap 'rm -rf ${tmp_file}' EXIT
 	result_file=${model_dir}/decode_result
 	[[ -f ${result_file} ]] && rm ${result_file}

@@ -340,6 +342,7 @@ if [ ${stage} -le 2 ] && [ ${stop_stage} -ge 2 ]; then
        --results-path ${model_dir}
        --max-tokens ${max_tokens}
        --beam ${beam_size}
+        --lenpen ${len_penalty}
        --scoring sacrebleu"
    	echo -e "\033[34mRun command: \n${cmd} \033[0m"


--- a/egs/mustc/st/train.sh
+++ b/egs/mustc/st/train.sh
@@ -6,15 +6,27 @@ gpu_num=8
 update_freq=2
 max_tokens=20000

-extra_tag=lcrm
+exp_name=
+extra_tag=
 extra_parameter=

 #extra_tag="${extra_tag}"
 #extra_parameter="${extra_parameter} "

+#extra_tag="${extra_tag}_encdlcl"
+#extra_parameter="${extra_parameter} --use-enc-dlcl"
+
+#extra_tag="${extra_tag}_decdlcl"
+#extra_parameter="${extra_parameter} --use-dec-dlcl"
+
 exp_tag=baseline
-train_config=train_ctc_sate.yaml
-#train_config=train_ctc.yaml
+train_config=train_ctc.yaml
+#train_config=train_ctc_conformer.yaml
+#train_config=train_ctc_conformer_rpr.yaml
+#train_config=train_ctc_sate.yaml
+#train_config=train_ctc_sate_rpr.yaml
+#train_config=train_ctc_sate_conformer.yaml
+#train_config=train_ctc_sate_conformer_rpr.yaml

 cmd="./run.sh
    --stage 1
@@ -25,6 +37,9 @@ cmd="./run.sh
    --max_tokens ${max_tokens}
    "

+if [[ -n ${exp_name} ]]; then
+    cmd="$cmd --exp_name ${exp_name}"
+fi
 if [[ -n ${exp_tag} ]]; then
    cmd="$cmd --exp_tag ${exp_tag}"
 fi

--- a/egs/wmt/mt/binary.sh
+++ b/egs/wmt/mt/binary.sh
-set -e
-
-eval=1
-
-root_dir=~/st/Fairseq-S2T
-data_dir=/home/xuchen/st/data/wmt/test
-vocab_dir=/home/xuchen/st/data/wmt/mt_lcrm/en-de/unigram32000_share
-src_vocab_prefix=spm_unigram32000_share
-tgt_vocab_prefix=spm_unigram32000_share
-
-src_lang=en
-tgt_lang=de
-tokenize=1
-lcrm=1
-splits=(tst-COMMON newstest2014 newstest2016)
-
-for split in ${splits[@]}; do
-    src_file=${data_dir}/${split}.${src_lang}
-    tgt_file=${data_dir}/${split}.${tgt_lang}
-
-    if [[ ${tokenize} -eq 1 ]]; then
-        cmd="tokenizer.perl -l ${src_lang} --threads 8 -no-escape < ${src_file} > ${src_file}.tok"
-        echo -e "\033[34mRun command: \n${cmd} \033[0m"
-        [[ $eval -eq 1 ]] && eval ${cmd}
-
-        cmd="tokenizer.perl -l ${tgt_lang} --threads 8 -no-escape < ${tgt_file} > ${tgt_file}.tok"
-        echo -e "\033[34mRun command: \n${cmd} \033[0m"
-        [[ $eval -eq 1 ]] && eval ${cmd}
-        src_file=${src_file}.tok
-        tgt_file=${tgt_file}.tok
-    fi
-
-    cmd="cat ${src_file}"
-    if [[ ${lcrm} -eq 1 ]]; then
-        cmd="python local/lower_rm.py ${src_file}"
-    fi
-    cmd="${cmd}
-    | spm_encode --model ${vocab_dir}/${src_vocab_prefix}.model
-    --output_format=piece
-    > ${src_file}.spm"
-
-    echo -e "\033[34mRun command: \n${cmd} \033[0m"
-    [[ $eval -eq 1 ]] && eval ${cmd}
-
-    cmd="spm_encode
-    --model ${vocab_dir}/${tgt_vocab_prefix}.model
-    --output_format=piece
-    < ${tgt_file}
-    > ${tgt_file}.spm"
-    echo -e "\033[34mRun command: \n${cmd} \033[0m"
-    [[ $eval -eq 1 ]] && eval ${cmd}
-
-    src_file=${src_file}.spm
-    tgt_file=${tgt_file}.spm
-
-    mkdir -p ${data_dir}/final
-    cmd="cp ${src_file} ${data_dir}/final/${split}.${src_lang}"
-    echo -e "\033[34mRun command: \n${cmd} \033[0m"
-    [[ $eval -eq 1 ]] && eval ${cmd}
-
-    cmd="cp ${tgt_file} ${data_dir}/final/${split}.${tgt_lang}"
-    echo -e "\033[34mRun command: \n${cmd} \033[0m"
-    [[ $eval -eq 1 ]] && eval ${cmd}
-done
-
-n_set=${#splits[*]}
-for ((i=0;i<$n_set;i++)); do
-    dataset[$i]=${data_dir}/final/${splits[$i]}
-done
-pref=`echo ${dataset[*]} | sed 's/ /,/g'`
-
-cmd="python ${root_dir}/fairseq_cli/preprocess.py
-    --source-lang ${src_lang}
-    --target-lang ${tgt_lang}
-    --testpref ${pref}
-    --destdir ${data_dir}/data-bin
-    --srcdict ${vocab_dir}/${src_vocab_prefix}.txt
-    --tgtdict ${vocab_dir}/${tgt_vocab_prefix}.txt
-    --workers 64"
-
-echo -e "\033[34mRun command: \n${cmd} \033[0m"
-[[ $eval -eq 1 ]] && eval ${cmd}
\ No newline at end of file
--- a/egs/wmt/mt/conf/train.yaml
+++ b/egs/wmt/mt/conf/train.yaml
-train-subset: train
-valid-subset: valid
-
-max-epoch: 50
-max-update: 1000000
-
-num-workers: 8
-patience: 10
-no-progress-bar: True
-log-interval: 100
-seed: 1
-report-accuracy: True
-skip-invalid-size-inputs-valid-test: True
-
-#load-pretrained-encoder-from:
-#load-pretrained-decoder-from:
-
-arch: transformer
-share-decoder-input-output-embed: True
-optimizer: adam
-clip-norm: 10.0
-lr-scheduler: inverse_sqrt
-warmup-init-lr: 1e-7
-warmup-updates: 8000
-lr: 1e-3
-adam_betas: (0.9,0.997)
-
-criterion: label_smoothed_cross_entropy
-label_smoothing: 0.1
-
-dropout: 0.1
-attention-dropout: 0.1
-activation-dropout: 0.1
-
-activation-fn: relu
-encoder-normalize-before: True
-decoder-normalize-before: True
-encoder-embed-dim: 512
-encoder-ffn-embed-dim: 2048
-encoder-layers: 6
-decoder-layers: 6
-encoder-attention-heads: 8
-
-decoder-embed-dim: 512
-decoder-ffn-embed-dim: 2048
-decoder-attention-heads: 8
--- a/egs/wmt/mt/conf/train_dlcl.yaml
+++ b/egs/wmt/mt/conf/train_dlcl.yaml
-train-subset: train
-valid-subset: valid
-
-max-epoch: 50
-max-update: 1000000
-
-num-workers: 8
-patience: 10
-no-progress-bar: True
-log-interval: 100
-seed: 1
-report-accuracy: True
-skip-invalid-size-inputs-valid-test: True
-
-#load-pretrained-encoder-from:
-#load-pretrained-decoder-from:
-
-arch: dlcl_transformer
-share-decoder-input-output-embed: True
-optimizer: adam
-clip-norm: 10.0
-lr-scheduler: inverse_sqrt
-warmup-init-lr: 1e-7
-warmup-updates: 16000
-lr: 2e-3
-adam_betas: (0.9,0.997)
-
-criterion: label_smoothed_cross_entropy
-label_smoothing: 0.1
-
-dropout: 0.1
-attention-dropout: 0.1
-activation-dropout: 0.1
-
-activation-fn: relu
-encoder-normalize-before: True
-decoder-normalize-before: True
-encoder-embed-dim: 512
-encoder-ffn-embed-dim: 2048
-encoder-layers: 6
-decoder-layers: 6
-encoder-attention-heads: 8
-
-decoder-embed-dim: 512
-decoder-ffn-embed-dim: 2048
-decoder-attention-heads: 8
--- a/egs/wmt/mt/conf/train_dlcl_rpr.yaml
+++ b/egs/wmt/mt/conf/train_dlcl_rpr.yaml
-train-subset: train
-valid-subset: valid
-
-max-epoch: 50
-max-update: 1000000
-
-num-workers: 8
-patience: 10
-no-progress-bar: True
-log-interval: 100
-seed: 1
-report-accuracy: True
-skip-invalid-size-inputs-valid-test: True
-
-#load-pretrained-encoder-from:
-#load-pretrained-decoder-from:
-
-arch: dlcl_transformer
-share-decoder-input-output-embed: True
-optimizer: adam
-clip-norm: 10.0
-lr-scheduler: inverse_sqrt
-warmup-init-lr: 1e-7
-warmup-updates: 16000
-lr: 2e-3
-adam_betas: (0.9,0.997)
-
-criterion: label_smoothed_cross_entropy
-label_smoothing: 0.1
-
-dropout: 0.1
-attention-dropout: 0.1
-activation-dropout: 0.1
-
-activation-fn: relu
-encoder-normalize-before: True
-decoder-normalize-before: True
-encoder-embed-dim: 512
-encoder-ffn-embed-dim: 2048
-encoder-layers: 6
-decoder-layers: 6
-encoder-attention-heads: 8
-
-decoder-embed-dim: 512
-decoder-ffn-embed-dim: 2048
-decoder-attention-heads: 8
-
-encoder-attention-type: relative
-decoder-attention-type: relative
-max-relative-length: 20
\ No newline at end of file
--- a/egs/wmt/mt/conf/train_rpr.yaml
+++ b/egs/wmt/mt/conf/train_rpr.yaml
-train-subset: train
-valid-subset: valid
-
-max-epoch: 50
-max-update: 1000000
-
-num-workers: 8
-patience: 10
-no-progress-bar: True
-log-interval: 100
-seed: 1
-report-accuracy: True
-skip-invalid-size-inputs-valid-test: True
-
-#load-pretrained-encoder-from:
-#load-pretrained-decoder-from:
-
-arch: transformer
-share-decoder-input-output-embed: True
-optimizer: adam
-clip-norm: 10.0
-lr-scheduler: inverse_sqrt
-warmup-init-lr: 1e-7
-warmup-updates: 8000
-lr: 1e-3
-adam_betas: (0.9,0.997)
-
-criterion: label_smoothed_cross_entropy
-label_smoothing: 0.1
-
-dropout: 0.1
-attention-dropout: 0.1
-activation-dropout: 0.1
-
-activation-fn: relu
-encoder-normalize-before: True
-decoder-normalize-before: True
-encoder-embed-dim: 512
-encoder-ffn-embed-dim: 2048
-encoder-layers: 6
-decoder-layers: 6
-encoder-attention-heads: 8
-
-decoder-embed-dim: 512
-decoder-ffn-embed-dim: 2048
-decoder-attention-heads: 8
-
-encoder-attention-type: relative
-decoder-attention-type: relative
-max-relative-length: 20
--- a/egs/wmt/mt/conf/train_s.yaml
+++ b/egs/wmt/mt/conf/train_s.yaml
-train-subset: train
-valid-subset: valid
-
-max-epoch: 50
-max-update: 1000000
-
-num-workers: 8
-patience: 10
-no-progress-bar: True
-log-interval: 100
-seed: 1
-report-accuracy: True
-skip-invalid-size-inputs-valid-test: True
-
-#load-pretrained-encoder-from:
-#load-pretrained-decoder-from:
-
-arch: transformer
-share-decoder-input-output-embed: True
-optimizer: adam
-clip-norm: 10.0
-lr-scheduler: inverse_sqrt
-warmup-init-lr: 1e-7
-warmup-updates: 8000
-lr: 1e-3
-adam_betas: (0.9,0.997)
-
-criterion: label_smoothed_cross_entropy
-label_smoothing: 0.1
-
-dropout: 0.1
-attention-dropout: 0.1
-activation-dropout: 0.1
-
-activation-fn: relu
-encoder-normalize-before: True
-decoder-normalize-before: True
-encoder-embed-dim: 256
-encoder-ffn-embed-dim: 2048
-encoder-layers: 6
-decoder-layers: 6
-encoder-attention-heads: 4
-
-decoder-embed-dim: 256
-decoder-ffn-embed-dim: 2048
-decoder-attention-heads: 4
--- a/egs/wmt/mt/decode.sh
+++ b/egs/wmt/mt/decode.sh
-#! /bin/bash
-
-gpu_num=1
-
-data_dir=
-test_subset=test,test1
-
-exp_name=
-if [ "$#" -eq 1 ]; then
-    exp_name=$1
-fi
-
-n_average=10
-beam_size=5
-max_tokens=20000
-
-cmd="./run.sh
-    --stage 2
-    --stop_stage 2
-    --gpu_num ${gpu_num}
-    --exp_name ${exp_name}
-    --n_average ${n_average}
-    --beam_size ${beam_size}
-    --max_tokens ${max_tokens}
-    "
-
-if [[ -n ${data_dir} ]]; then
-    cmd="$cmd --data_dir ${data_dir}"
-fi
-if [[ -n ${test_subset} ]]; then
-    cmd="$cmd --test_subset ${test_subset}"
-fi
-
-echo $cmd
-eval $cmd
--- a/egs/wmt/mt/local/lower_rm.py
+++ b/egs/wmt/mt/local/lower_rm.py
-import sys
-import string
-
-
-in_file = sys.argv[1]
-
-with open(in_file, "r", encoding="utf-8") as f:
-    for line in f.readlines():
-        line = line.strip().lower()
-        for w in string.punctuation:
-            line = line.replace(w, "")
-        line = line.replace("  ", "")
-        print(line)
-
--- a/egs/wmt/mt/local/monitor.sh
+++ b/egs/wmt/mt/local/monitor.sh
-gpu_num=1
-
-while :
-do
-    all_devices=$(seq 0 `gpustat | sed '1,2d' | wc -l`);
-    count=0
-    for dev in ${all_devices[@]}
-    do
-        line=`expr $dev + 2`
-        use=`gpustat -p | head -n $line | tail -1 | cut -d '|' -f4 | wc -w`
-        if [[ $use -eq 0 ]]; then
-            device[$count]=$dev
-            count=`expr $count + 1`
-            if [[ $count -eq $gpu_num ]]; then
-                break
-            fi
-        fi
-    done
-    if [[ ${#device[@]} -lt $gpu_num ]]; then
-        sleep 60s
-    else
-        echo "Run $cmd"
-        eval $cmd
-        sleep 10s
-        exit
-    fi
-done
--- a/egs/wmt/mt/local/parse_options.sh
+++ b/egs/wmt/mt/local/parse_options.sh
-#!/usr/bin/env bash
-
-# Copyright 2012  Johns Hopkins University (Author: Daniel Povey);
-#                 Arnab Ghoshal, Karel Vesely
-
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#  http://www.apache.org/licenses/LICENSE-2.0
-#
-# THIS CODE IS PROVIDED *AS IS* BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-# KIND, EITHER EXPRESS OR IMPLIED, INCLUDING WITHOUT LIMITATION ANY IMPLIED
-# WARRANTIES OR CONDITIONS OF TITLE, FITNESS FOR A PARTICULAR PURPOSE,
-# MERCHANTABLITY OR NON-INFRINGEMENT.
-# See the Apache 2 License for the specific language governing permissions and
-# limitations under the License.
-
-
-# Parse command-line options.
-# To be sourced by another script (as in ". parse_options.sh").
-# Option format is: --option-name arg
-# and shell variable "option_name" gets set to value "arg."
-# The exception is --help, which takes no arguments, but prints the
-# $help_message variable (if defined).
-
-
-###
-### The --config file options have lower priority to command line
-### options, so we need to import them first...
-###
-
-# Now import all the configs specified by command-line, in left-to-right order
-for ((argpos=1; argpos<$#; argpos++)); do
-  if [ "${!argpos}" == "--config" ]; then
-    argpos_plus1=$((argpos+1))
-    config=${!argpos_plus1}
-    [ ! -r $config ] && echo "$0: missing config '$config'" && exit 1
-    . $config  # source the config file.
-  fi
-done
-
-
-###
-### Now we process the command line options
-###
-while true; do
-  [ -z "${1:-}" ] && break;  # break if there are no arguments
-  case "$1" in
-    # If the enclosing script is called with --help option, print the help
-    # message and exit.  Scripts should put help messages in $help_message
-    --help|-h) if [ -z "$help_message" ]; then echo "No help found." 1>&2;
-      else printf "$help_message\n" 1>&2 ; fi;
-      exit 0 ;;
-    --*=*) echo "$0: options to scripts must be of the form --name value, got '$1'"
-      exit 1 ;;
-    # If the first command-line argument begins with "--" (e.g. --foo-bar),
-    # then work out the variable name as $name, which will equal "foo_bar".
-    --*) name=`echo "$1" | sed s/^--// | sed s/-/_/g`;
-      # Next we test whether the variable in question is undefned-- if so it's
-      # an invalid option and we die.  Note: $0 evaluates to the name of the
-      # enclosing script.
-      # The test [ -z ${foo_bar+xxx} ] will return true if the variable foo_bar
-      # is undefined.  We then have to wrap this test inside "eval" because
-      # foo_bar is itself inside a variable ($name).
-      eval '[ -z "${'$name'+xxx}" ]' && echo "$0: invalid option $1" 1>&2 && exit 1;
-
-      oldval="`eval echo \\$$name`";
-      # Work out whether we seem to be expecting a Boolean argument.
-      if [ "$oldval" == "true" ] || [ "$oldval" == "false" ]; then
-        was_bool=true;
-      else
-        was_bool=false;
-      fi
-
-      # Set the variable to the right value-- the escaped quotes make it work if
-      # the option had spaces, like --cmd "queue.pl -sync y"
-      eval $name=\"$2\";
-
-      # Check that Boolean-valued arguments are really Boolean.
-      if $was_bool && [[ "$2" != "true" && "$2" != "false" ]]; then
-        echo "$0: expected \"true\" or \"false\": $1 $2" 1>&2
-        exit 1;
-      fi
-      shift 2;
-      ;;
-  *) break;
-  esac
-done
-
-
-# Check for an empty argument to the --cmd option, which can easily occur as a
-# result of scripting errors.
-[ ! -z "${cmd+xxx}" ] && [ -z "$cmd" ] && echo "$0: empty argument to --cmd option" 1>&2 && exit 1;
-
-
-true; # so this script returns exit code 0.
--- a/egs/wmt/mt/local/path.sh
+++ b/egs/wmt/mt/local/path.sh
-MAIN_ROOT=$PWD/../../..
-KALDI_ROOT=$MAIN_ROOT/tools/kaldi
-
-export PATH=$PWD/utils/:$KALDI_ROOT/tools/openfst/bin:$PATH
-[ ! -f $KALDI_ROOT/tools/config/common_path.sh ] && echo >&2 "The standard file $KALDI_ROOT/tools/config/common_path.sh is not present -> Exit!" && exit 1
-. $KALDI_ROOT/tools/config/common_path.sh
-export LC_ALL=C
-
-export LD_LIBRARY_PATH=${LD_LIBRARY_PATH}:$MAIN_ROOT/src/lib
-export LD_LIBRARY_PATH=${LD_LIBRARY_PATH}:$MAIN_ROOT/tools/chainer_ctc/ext/warp-ctc/build
-. "${MAIN_ROOT}"/tools/activate_python.sh && . "${MAIN_ROOT}"/tools/extra_path.sh
-export PATH=$MAIN_ROOT/utils:$MAIN_ROOT/espnet/bin:$PATH
-
-export OMP_NUM_THREADS=1
-
-# check extra module installation
-if ! which tokenizer.perl > /dev/null; then
-    echo "Error: it seems that moses is not installed." >&2
-    echo "Error: please install moses as follows." >&2
-    echo "Error: cd ${MAIN_ROOT}/tools && make moses.done" >&2
-    return 1
-fi
-
-# NOTE(kan-bayashi): Use UTF-8 in Python to avoid UnicodeDecodeError when LC_ALL=C
-export PYTHONIOENCODING=UTF-8
--- a/egs/wmt/mt/local/utils.sh
+++ b/egs/wmt/mt/local/utils.sh
-
-get_devices(){
-    gpu_num=$1
-    use_cpu=$2
-    device=()
-    while :
-    do
-        record=`mktemp -t temp.record.XXXXXX`
-        gpustat > $record
-        all_devices=$(seq 0 `cat $record | sed '1,2d' | wc -l`);
-        count=0
-        for dev in ${all_devices[@]}
-        do
-            line=`expr $dev + 2`
-            use=`cat $record | head -n $line | tail -1 | cut -d '|' -f3 | cut -d '/' -f1`
-            if [[ $use -lt 100 ]]; then
-                device[$count]=$dev
-                count=`expr $count + 1`
-                if [[ $count -eq $gpu_num ]]; then
-                    break
-                fi
-            fi
-        done
-        if [[ ${#device[@]} -lt $gpu_num ]]; then
-            if [[ $use_cpu -eq 1 ]]; then
-                device=(-1)
-            else
-                sleep 60s
-            fi
-        else
-            break
-        fi
-    done
-
-    echo ${device[*]} | sed 's/ /,/g'
-    return $?
-}
-
-
--- a/egs/wmt/mt/run.sh
+++ b/egs/wmt/mt/run.sh
-#! /bin/bash
-
-# Processing MuST-C Datasets
-
-# Copyright 2021 Natural Language Processing Laboratory 
-# Xu Chen (xuchenneu@163.com)
-
-# Set bash to 'debug' mode, it will exit on :
-# -e 'error', -u 'undefined variable', -o ... 'error in pipeline', -x 'print commands',
-set -e
-#set -u
-set -o pipefail
-export PYTHONIOENCODING=UTF-8
-
-eval=1
-time=$(date "+%m%d_%H%M")
-
-stage=0
-stop_stage=0
-
-######## hardware ########
-# devices
-#device=()
-gpu_num=8
-update_freq=1
-
-root_dir=~/st/Fairseq-S2T
-pwd_dir=$PWD
-
-# dataset
-src_lang=en
-tgt_lang=de
-lang=${src_lang}-${tgt_lang}
-
-dataset=wmt
-task=translation
-vocab_type=unigram
-vocab_size=32000
-share_dict=1
-lcrm=1
-
-use_specific_dict=1
-specific_prefix=st_tok_share10k
-specific_dir=/home/xuchen/st/data/mustc/st_lcrm_tok/en-de
-src_vocab_prefix=spm_unigram10000_st_share
-tgt_vocab_prefix=spm_unigram10000_st_share
-
-org_data_dir=~/st/data/${dataset}
-data_dir=~/st/data/${dataset}/mt/${lang}
-train_subset=train
-valid_subset=dev
-test_subset=test
-
-# exp
-exp_prefix=${time}
-extra_tag=
-extra_parameter=
-exp_tag=baseline
-exp_name=
-
-# config
-train_config=train.yaml
-
-# training setting
-fp16=1
-max_tokens=4096
-step_valid=1
-bleu_valid=0
-
-# decoding setting
-n_average=10
-beam_size=5
-
-if [[ ${use_specific_dict} -eq 1 ]]; then
-    exp_prefix=${exp_prefix}_${specific_prefix}
-    data_dir=${data_dir}/${specific_prefix}
-    mkdir -p ${data_dir}
-else
-    data_dir=${data_dir}/${vocab_type}${vocab_size}
-    src_vocab_prefix=spm_${vocab_type}${vocab_size}_${src_lang}
-    tgt_vocab_prefix=spm_${vocab_type}${vocab_size}_${tgt_lang}
-    if [[ $share_dict -eq 1 ]]; then
-        data_dir=${data_dir}_share
-        src_vocab_prefix=spm_${vocab_type}${vocab_size}_share
-        tgt_vocab_prefix=spm_${vocab_type}${vocab_size}_share
-    fi
-fi
-
-if [[ ${lcrm} -eq 1 ]]; then
-    data_dir=${data_dir}_lcrm
-    exp_prefix=${exp_prefix}_lcrm
-fi
-
-. ./local/parse_options.sh || exit 1;
-
-# full path
-train_config=$pwd_dir/conf/${train_config}
-if [[ -z ${exp_name} ]]; then
-    exp_name=${exp_prefix}_$(basename ${train_config%.*})_${exp_tag}
-    if [[ -n ${extra_tag} ]]; then
-        exp_name=${exp_name}_${extra_tag}
-    fi
-fi
-model_dir=$root_dir/../checkpoints/$dataset/mt/${exp_name}
-
-if [ ${stage} -le -1 ] && [ ${stop_stage} -ge -1 ]; then
-    echo "stage -1: Data Download"
-    # pass
-fi
-
-if [ ${stage} -le 0 ] && [ ${stop_stage} -ge 0 ]; then
-    ### Task dependent. You have to make data the following preparation part by yourself.
-    echo "stage 0: MT Data Preparation"
-    if [[ ! -e ${data_dir} ]]; then
-        mkdir -p ${data_dir}
-    fi
-
-    if [[ ! -f ${data_dir}/${src_vocab_prefix}.txt || ! -f ${data_dir}/${tgt_vocab_prefix}.txt ]]; then
-        if [[ ${use_specific_dict} -eq 0 ]]; then
-            cmd="python ${root_dir}/examples/speech_to_text/prep_mt_data.py
-                --data-root ${org_data_dir}
-                --output-root ${data_dir}
-                --splits ${train_subset},${valid_subset},${test_subset}
-                --src-lang ${src_lang}
-                --tgt-lang ${tgt_lang}
-                --lowercase-src
-                --rm-punc-src
-                --vocab-type ${vocab_type}
-                --vocab-size ${vocab_size}"
-            if [[ $share_dict -eq 1 ]]; then
-                cmd="$cmd
-                --share"
-            fi
-            echo -e "\033[34mRun command: \n${cmd} \033[0m"
-            [[ $eval -eq 1 ]] && eval ${cmd}
-        else
-            cp -r ${specific_dir}/${src_vocab_prefix}.* ${data_dir}
-            cp ${specific_dir}/${tgt_vocab_prefix}.* ${data_dir}
-        fi
-    fi
-
-    mkdir -p ${data_dir}/data
-    for split in ${train_subset} ${valid_subset} ${test_subset}; do
-    {
-        cmd="cat ${org_data_dir}/${lang}/data/${split}.${src_lang}"
-        if [[ ${lcrm} -eq 1 ]]; then
-            cmd="python local/lower_rm.py ${org_data_dir}/${lang}/data/${split}.${src_lang}"
-        fi
-        cmd="${cmd}
-        | spm_encode --model ${data_dir}/${src_vocab_prefix}.model
-        --output_format=piece
-        > ${data_dir}/data/${split}.${src_lang}"
-
-        echo -e "\033[34mRun command: \n${cmd} \033[0m"
-        [[ $eval -eq 1 ]] && eval ${cmd}
-
-        cmd="spm_encode
-        --model ${data_dir}/${tgt_vocab_prefix}.model
-        --output_format=piece
-        < ${org_data_dir}/${lang}/data/${split}.${tgt_lang}
-        > ${data_dir}/data/${split}.${tgt_lang}"
-
-        echo -e "\033[34mRun command: \n${cmd} \033[0m"
-        [[ $eval -eq 1 ]] && eval ${cmd}
-    }&
-    done
-    wait
-
-    cmd="python ${root_dir}/fairseq_cli/preprocess.py
-        --source-lang ${src_lang} --target-lang ${tgt_lang}
-        --trainpref ${data_dir}/data/${train_subset}
-        --validpref ${data_dir}/data/${valid_subset}
-        --testpref ${data_dir}/data/${test_subset}
-        --destdir ${data_dir}/data-bin
-        --srcdict ${data_dir}/${src_vocab_prefix}.txt
-        --tgtdict ${data_dir}/${tgt_vocab_prefix}.txt
-        --workers 64"
-
-    echo -e "\033[34mRun command: \n${cmd} \033[0m"
-    [[ $eval -eq 1 ]] && eval ${cmd}
-fi
-
-data_dir=${data_dir}/data-bin
-
-if [ ${stage} -le 1 ] && [ ${stop_stage} -ge 1 ]; then
-    echo "stage 1: MT Network Training"
-    [[ ! -d ${data_dir} ]] && echo "The data dir ${data_dir} is not existing!" && exit 1;
-
-    if [[ -z ${device} || ${#device[@]} -eq 0 ]]; then
-		if [[ ${gpu_num} -eq 0 ]]; then
-			device=()
-		else
-        	source ./local/utils.sh
-        	device=$(get_devices $gpu_num 0)
-		fi
-    fi
-
-    echo -e "dev=${device} data=${data_dir} model=${model_dir}"
-
-    if [[ ! -d ${model_dir} ]]; then
-        mkdir -p ${model_dir}
-    else
-        echo "${model_dir} exists."
-    fi
-
-    cp ${BASH_SOURCE[0]} ${model_dir}
-    cp ${PWD}/train.sh ${model_dir}
-    cp ${train_config} ${model_dir}
-
-    cmd="python3 -u ${root_dir}/fairseq_cli/train.py
-        ${data_dir}
-        --source-lang ${src_lang}
-        --target-lang ${tgt_lang}
-        --train-config ${train_config}
-        --task ${task}
-        --max-tokens ${max_tokens}
-        --update-freq ${update_freq}
-        --log-interval 100
-        --save-dir ${model_dir}
-        --tensorboard-logdir ${model_dir}"
-
-    if [[ -n ${extra_parameter} ]]; then
-        cmd="${cmd}
-        ${extra_parameter}"
-    fi
-	if [[ ${gpu_num} -gt 0 ]]; then
-		cmd="${cmd}
-        --distributed-world-size $gpu_num
-        --ddp-backend no_c10d"
-	fi
-    if [[ $fp16 -eq 1 ]]; then
-        cmd="${cmd}
-        --fp16"
-    fi
-    if [[ $step_valid -eq 1 ]]; then
-        validate_interval=1
-        save_interval=1
-        keep_last_epochs=10
-        no_epoch_checkpoints=0
-        save_interval_updates=10000
-        keep_interval_updates=10
-    else
-        validate_interval=1
-        keep_last_epochs=10
-    fi
-    if [[ $bleu_valid -eq 1 ]]; then
-        cmd="$cmd
-        --eval-bleu
-        --eval-bleu-args '{\"beam\": 1}'
-        --eval-tokenized-bleu
-        --eval-bleu-remove-bpe
-        --best-checkpoint-metric bleu
-        --maximize-best-checkpoint-metric"
-    fi
-    if [[ -n $no_epoch_checkpoints && $no_epoch_checkpoints -eq 1 ]]; then
-        cmd="$cmd
-        --no-epoch-checkpoints"
-    fi
-    if [[ -n $validate_interval ]]; then
-        cmd="${cmd}
-        --validate-interval $validate_interval "
-    fi
-    if [[ -n $save_interval ]]; then
-        cmd="${cmd}
-        --save-interval $save_interval "
-    fi
-    if [[ -n $keep_last_epochs ]]; then
-        cmd="${cmd}
-        --keep-last-epochs $keep_last_epochs "
-    fi
-    if [[ -n $save_interval_updates ]]; then
-        cmd="${cmd}
-        --save-interval-updates $save_interval_updates"
-        if [[ -n $keep_interval_updates ]]; then
-        cmd="${cmd}
-        --keep-interval-updates $keep_interval_updates"
-        fi
-    fi
-
-    echo -e "\033[34mRun command: \n${cmd} \033[0m"
-
-    # save info
-    log=./history.log
-    echo "${time} | ${device} | ${data_dir} | ${model_dir} " >> $log
-    cat $log | tail -n 50 > tmp.log
-    mv tmp.log $log
-    export CUDA_VISIBLE_DEVICES=${device}
-
-    cmd="nohup ${cmd} >> ${model_dir}/train.log 2>&1 &"
-    if [[ $eval -eq 1 ]]; then
-		eval $cmd
-		sleep 2s
-		tail -n `wc -l ${model_dir}/train.log | awk '{print $1+1}'` -f ${model_dir}/train.log
-	fi
-fi
-wait
-
-if [ ${stage} -le 2 ] && [ ${stop_stage} -ge 2 ]; then
-    echo "stage 2: MT Decoding"
-    if [[ ${n_average} -ne 1 ]]; then
-        # Average models
-		dec_model=avg_${n_average}_checkpoint.pt
-
-		cmd="python ${root_dir}/scripts/average_checkpoints.py
-        --inputs ${model_dir}
-        --num-epoch-checkpoints ${n_average}
-        --output ${model_dir}/${dec_model}"
-    	echo -e "\033[34mRun command: \n${cmd} \033[0m"
-    	[[ $eval -eq 1 ]] && eval $cmd
-	else
-		dec_model=checkpoint_best.pt
-	fi
-
-    if [[ -z ${device} || ${#device[@]} -eq 0 ]]; then
-		if [[ ${gpu_num} -eq 0 ]]; then
-			device=()
-		else
-        	source ./local/utils.sh
-        	device=$(get_devices $gpu_num 0)
-		fi
-    fi
-    export CUDA_VISIBLE_DEVICES=${device}
-
-	#tmp_file=$(mktemp ${model_dir}/tmp-XXXXX)
-	#trap 'rm -rf ${tmp_file}' EXIT
-	result_file=${model_dir}/decode_result
-	[[ -f ${result_file} ]] && rm ${result_file}
-
-    test_subset=(${test_subset//,/ })
-	for subset in ${test_subset[@]}; do
-  		cmd="python ${root_dir}/fairseq_cli/generate.py
-        ${data_dir}
-        --source-lang ${src_lang}
-        --target-lang ${tgt_lang}
-        --gen-subset ${subset}
-        --task ${task}
-        --path ${model_dir}/${dec_model}
-        --results-path ${model_dir}
-        --max-tokens ${max_tokens}
-        --beam ${beam_size}
-        --post-process sentencepiece
-        --tokenizer moses
-        --moses-source-lang ${src_lang}
-        --moses-target-lang ${tgt_lang}
-        --scoring sacrebleu"
-    	echo -e "\033[34mRun command: \n${cmd} \033[0m"
-
-        if [[ $eval -eq 1 ]]; then
-    	    eval $cmd
-    	    tail -n 1 ${model_dir}/generate-${subset}.txt >> ${result_file}
-        fi
-	done
-    cat ${result_file}
-fi
--- a/egs/wmt/mt/train.sh
+++ b/egs/wmt/mt/train.sh
-#! /bin/bash
-
-# training the model
-
-gpu_num=7
-update_freq=1
-max_tokens=4096
-
-extra_tag=
-extra_parameter=
-
-#extra_tag="${extra_tag}"
-#extra_parameter="${extra_parameter} "
-
-exp_tag=baseline
-train_config=train.yaml
-
-cmd="./run.sh
-    --stage 1
-    --stop_stage 1
-    --gpu_num ${gpu_num}
-    --update_freq ${update_freq}
-    --train_config ${train_config}
-    --max_tokens ${max_tokens}
-    "
-
-if [[ -n ${exp_name} ]]; then
-    cmd="$cmd --exp_name ${exp_name}"
-fi
-if [[ -n ${exp_tag} ]]; then
-    cmd="$cmd --exp_tag ${exp_tag}"
-fi
-if [[ -n ${extra_tag} ]]; then
-    cmd="$cmd --extra_tag ${extra_tag}"
-fi
-if [[ -n ${extra_parameter} ]]; then
-    cmd="$cmd --extra_parameter \"${extra_parameter}\""
-fi
-
-echo $cmd
-eval $cmd
--- a/examples/speech_to_text/prep_librispeech_data.py
+++ b/examples/speech_to_text/prep_librispeech_data.py
@@ -92,22 +92,30 @@ def process(args):
    # Generate vocab
    vocab_size = "" if args.vocab_type == "char" else str(args.vocab_size)
    spm_filename_prefix = f"spm_{args.vocab_type}{vocab_size}"
-    with NamedTemporaryFile(mode="w") as f:
-        if len(train_text) == 0:
-            print("Loading the training text...")
-            for split in SPLITS:
-                if split.startswith("train"):
-                    dataset = LIBRISPEECH(data_root.as_posix(), url=split)
-                    for wav, sample_rate, utt, spk_id, chapter_no, utt_no in dataset:
-                        train_text.append(utt.lower())
-        for t in train_text:
-            f.write(t + "\n")
-        gen_vocab(
-            Path(f.name),
-            out_root / spm_filename_prefix,
-            args.vocab_type,
-            args.vocab_size,
-        )
+
+    gen_vocab_flag = True
+    if args.asr_prefix is not None:
+        gen_vocab_flag = False
+        spm_filename_prefix = args.asr_prefix
+
+    if gen_vocab_flag:
+        with NamedTemporaryFile(mode="w") as f:
+            if len(train_text) == 0:
+                print("Loading the training text...")
+                for split in SPLITS:
+                    if split.startswith("train"):
+                        dataset = LIBRISPEECH(data_root.as_posix(), url=split)
+                        for wav, sample_rate, utt, spk_id, chapter_no, utt_no in dataset:
+                            train_text.append(utt.lower())
+            for t in train_text:
+                f.write(t + "\n")
+            gen_vocab(
+                Path(f.name),
+                out_root / spm_filename_prefix,
+                args.vocab_type,
+                args.vocab_size,
+            )
+
    # Generate config YAML
    gen_config_yaml(
        out_root, spm_filename_prefix + ".model", specaugment_policy="ld",
@@ -130,6 +138,7 @@ def main():
        choices=["bpe", "unigram", "char"],
    ),
    parser.add_argument("--vocab-size", default=10000, type=int)
+    parser.add_argument("--asr-prefix", type=str, default=None, help="prefix of the asr dict")
    parser.add_argument("--overwrite", action="store_true", help="overwrite the existing files")
    args = parser.parse_args()


--- a/examples/speech_to_text/prep_st_data.py
+++ b/examples/speech_to_text/prep_st_data.py
@@ -115,6 +115,20 @@ class ST_Dataset(Dataset):
                items.append([waveform, sr, sp_n_frames, src_utt, tgt_utt, spk_id, sp_utt_id])
        return items

+    def get_wav(self, n: int, speed_perturb=1.0):
+        wav_path, offset, n_frames, sr, src_utt, tgt_utt, spk_id, utt_id = self.data[n]
+
+        if self.speed_perturb is None or speed_perturb == 1.0:
+            waveform, _ = torchaudio.load(wav_path, frame_offset=offset, num_frames=n_frames)
+        else:
+            waveform, _ = torchaudio.load(wav_path, frame_offset=offset, num_frames=n_frames)
+            effects = [
+                ["speed", f"{speed_perturb}"],
+                ["rate", f"{sr}"]
+            ]
+            waveform, _ = torchaudio.sox_effects.apply_effects_tensor(waveform, sr, effects)
+        return waveform
+
    def get_fast(self, n: int):
        wav_path, offset, n_frames, sr, src_utt, tgt_utt, spk_id, utt_id = self.data[n]

@@ -185,13 +199,20 @@ def process(args):
                print("And estimating cepstral mean and variance stats...")
                gcmvn_feature_list = []

-            for items in tqdm(dataset):
+            for idx in tqdm(range(len(dataset))):
+                items = dataset.get_fast(idx)
                for item in items:
                    index += 1
-                    waveform, sr, _, _, _, _, utt_id = item
-
-                    if gen_feature_flag:
-                        features_path = (feature_root / f"{utt_id}.npy").as_posix()
+                    wav_path, sr, _, _, _, _, utt_id = item
+
+                    features_path = (feature_root / f"{utt_id}.npy").as_posix()
+                    if not os.path.exists(features_path):
+                        sp = 1.0
+                        if dataset.speed_perturb is not None:
+                            sp = float(utt_id.split("_")[0].replace("sp", ""))
+                        waveform = dataset.get_wav(idx, sp)
+                        if waveform.shape[1] == 0:
+                            continue
                        features = extract_fbank_features(waveform, sr, Path(features_path))

                        if split == 'train' and args.cmvn_type == "global" and not utt_id.startswith("sp"):

--- a/scripts/postprocessing.py
+++ b/scripts/postprocessing.py
+import argparse
+import re
+
+
+def read_file(input_path):
+    sentences = []
+    with open(input_path, 'r', encoding='utf8') as f:
+        for line in f.readlines():
+            sen_temp = line.strip()
+            sentences.append(sen_temp)
+    return sentences
+
+
+def write_file(output_path, sentences):
+    with open(output_path, 'w', encoding='utf8') as f:
+        for line in sentences:
+            f.write(line + '\n')
+    return
+
+
+def remove_tag(sentences):
+    ### 去掉(Applaus)等带括号的tag ###
+    sen_new = []
+    for line in sentences:
+        sen_temp = re.sub(u"\\(.*?\\)|\\{.*?}|\\[.*?]", "", line)
+        sen_new.append(sen_temp)
+    return sen_new
+
+
+def remove_beginning_punctuation(sentences):
+    #### 去掉开头的逗号等符号 ###
+    remove_punctuation = [',', '.', '?', ':', '-', ' ']
+    sen_new = []
+    for line in sentences:
+        if len(line) > 0 and line[0] in remove_punctuation:
+            sen_temp = line
+            for p in remove_punctuation:
+                sen_temp = sen_temp.lstrip(p)
+            sen_new.append(sen_temp)
+        else:
+            sen_new.append(line)
+    return sen_new
+
+
+def remove_ending_punctuation(sentences):
+    ### 去掉句尾的逗号等符号 ###
+    remove_punctuation = [',', ':', ' ']
+    sen_new = []
+    for line in sentences:
+        # print(line[-1])
+        if len(line) > 0 and line[-1] in remove_punctuation:
+            sen_temp = line
+            for p in remove_punctuation:
+                sen_temp = sen_temp.rstrip(p)
+            sen_new.append(sen_temp)
+        else:
+            sen_new.append(line)
+    return sen_new
+
+
+def remove_space(sentences):
+    ### 去掉首尾的空格，以及连续的空格 ###
+    sen_new = []
+    for line in sentences:
+        sen_temp = line.strip()
+        sen_temp = ' '.join(sen_temp.split())
+        sen_new.append(sen_temp)
+    return sen_new
+
+
+def remove_special_tag(sentences):
+    ### 去掉双破折号 -- ，可选 ###
+    sen_new = []
+    for line in sentences:
+        sen_temp = line.replace('--', '—')
+        sen_new.append(sen_temp)
+    return sen_new
+
+
+def first_letter_upper(sentences):
+    ### 将首字母大写 ###
+    sen_new = []
+    for line in sentences:
+        if len(line) > 0 and line[0].isalpha() and line[0].islower():
+            l = list(line)
+            l[0] = l[0].upper()
+            sen_temp = ''.join(l)
+            sen_new.append(sen_temp)
+            continue
+        else:
+            sen_new.append(line)
+    return sen_new
+
+
+def add_last_punctuation(sentences):
+    ### 给末尾没有标点的句子加句号 . ###
+    sen_new = []
+    for line in sentences:
+        if len(line) > 0 and line[-1].isalpha():
+            sen_temp = line + '.'
+            sen_new.append(sen_temp)
+        else:
+            sen_new.append(line)
+    return sen_new
+
+
+def process(args):
+    input_path = args.input_absolute_path
+    output_path = args.output_absolute_path
+    sentences = read_file(input_path)
+    sentences = remove_tag(sentences)
+    # sentences = remove_beginning_punctuation(sentences)
+    # sentences = remove_ending_punctuation(sentences)
+    sentences = remove_special_tag(sentences)
+    # sentences = remove_space(sentences)
+    # sentences = first_letter_upper(sentences)
+    # sentences = add_last_punctuation(sentences)
+    write_file(output_path, sentences)
+
+
+def main():
+    parser = argparse.ArgumentParser()
+    parser.add_argument("--input_absolute_path", required=True, type=str)  # 输入文件绝对路径
+    parser.add_argument("--output_absolute_path", required=True, type=str)  # 输出文件绝对路径
+    args = parser.parse_args()
+
+    process(args)
+
+
+if __name__ == '__main__':
+    main()