update the shell scripts

876daed6 · xuchen · 4b4603c0 · 876daed6 · 876daed6 · 876daed6
Commit 876daed6 authored Sep 24, 2021 by xuchen
--- a/egs/librispeech/asr/conf/pyramid.yaml
+++ b/egs/librispeech/asr/conf/pyramid.yaml
@@ -9,7 +9,7 @@ pyramid-layers: 2_2_6_2
 pyramid-sr-ratios: 2_2_2_2
 pyramid-use-ppm: True
 pyramid-embed-dims: 128_128_256_512
-pyramid-reduced-embed: conv
+pyramid-reduced-embed: fuse
 pyramid-embed-norm: True
 pyramid-position-embed: 1_1_1_1
 pyramid-kernel-sizes: 5_5_5_5

--- a/egs/mustc/mt/binary.sh
+++ b/egs/mustc/mt/binary.sh
--- a/egs/mustc/mt/conf/train.yaml
+++ b/egs/mustc/mt/conf/train.yaml
--- a/egs/mustc/mt/conf/train_s.yaml
+++ b/egs/mustc/mt/conf/train_s.yaml
--- a/egs/mustc/mt/conf/dlcl.yaml
+++ b/egs/mustc/mt/conf/dlcl.yaml
+use-enc-dlcl: True
+use-dec-dlcl: True
--- a/egs/mustc/mt/conf/rpr.yaml
+++ b/egs/mustc/mt/conf/rpr.yaml
+#encoder-attention-type: rel_selfattn
+encoder-attention-type: relative
+decoder-attention-type: relative
+max-encoder-relative-length: 20
+max-decoder-relative-length: 20
--- a/egs/mustc/mt/conf/train_dlcl.yaml
+++ b/egs/mustc/mt/conf/train_dlcl.yaml
-train-subset: train
-valid-subset: valid
-
-max-epoch: 50
-max-update: 100000
-
-num-workers: 8
-patience: 10
-no-progress-bar: True
-log-interval: 100
-seed: 1
-report-accuracy: True
-skip-invalid-size-inputs-valid-test: True
-
-#load-pretrained-encoder-from:
-#load-pretrained-decoder-from:
-
-arch: dlcl_transformer
-share-decoder-input-output-embed: True
-optimizer: adam
-clip-norm: 10.0
-lr-scheduler: inverse_sqrt
-warmup-init-lr: 1e-7
-warmup-updates: 8000
-lr: 1e-3
-adam_betas: (0.9,0.997)
-
-criterion: label_smoothed_cross_entropy
-label_smoothing: 0.1
-
-dropout: 0.1
-attention-dropout: 0.1
-activation-dropout: 0.1
-
-activation-fn: relu
-encoder-normalize-before: True
-decoder-normalize-before: True
-encoder-embed-dim: 512
-encoder-ffn-embed-dim: 2048
-encoder-layers: 6
-decoder-layers: 6
-encoder-attention-heads: 8
-
-decoder-embed-dim: 512
-decoder-ffn-embed-dim: 2048
-decoder-attention-heads: 8
-
-use-enc-dlcl: True
-use-dec-dlcl: True
\ No newline at end of file
--- a/egs/mustc/mt/conf/train_dlcl_rpr.yaml
+++ b/egs/mustc/mt/conf/train_dlcl_rpr.yaml
-train-subset: train
-valid-subset: valid
-
-max-epoch: 50
-max-update: 100000
-
-num-workers: 8
-patience: 10
-no-progress-bar: True
-log-interval: 100
-seed: 1
-report-accuracy: True
-skip-invalid-size-inputs-valid-test: True
-
-#load-pretrained-encoder-from:
-#load-pretrained-decoder-from:
-
-arch: dlcl_transformer
-share-decoder-input-output-embed: True
-optimizer: adam
-clip-norm: 10.0
-lr-scheduler: inverse_sqrt
-warmup-init-lr: 1e-7
-warmup-updates: 8000
-lr: 1e-3
-adam_betas: (0.9,0.997)
-
-criterion: label_smoothed_cross_entropy
-label_smoothing: 0.1
-
-dropout: 0.1
-attention-dropout: 0.1
-activation-dropout: 0.1
-
-activation-fn: relu
-encoder-normalize-before: True
-decoder-normalize-before: True
-encoder-embed-dim: 512
-encoder-ffn-embed-dim: 2048
-encoder-layers: 6
-decoder-layers: 6
-encoder-attention-heads: 8
-
-decoder-embed-dim: 512
-decoder-ffn-embed-dim: 2048
-decoder-attention-heads: 8
-
-encoder-attention-type: relative
-decoder-attention-type: relative
-max-encoder-relative-length: 20
-max-decoder-relative-length: 20
-
-use-enc-dlcl: True
-use-dec-dlcl: True
--- a/egs/mustc/mt/conf/train_rpr.yaml
+++ b/egs/mustc/mt/conf/train_rpr.yaml
-train-subset: train
-valid-subset: valid
-
-max-epoch: 50
-max-update: 100000
-
-num-workers: 8
-patience: 10
-no-progress-bar: True
-log-interval: 100
-seed: 1
-report-accuracy: True
-skip-invalid-size-inputs-valid-test: True
-
-#load-pretrained-encoder-from:
-#load-pretrained-decoder-from:
-
-arch: transformer
-share-decoder-input-output-embed: True
-optimizer: adam
-clip-norm: 10.0
-lr-scheduler: inverse_sqrt
-warmup-init-lr: 1e-7
-warmup-updates: 8000
-lr: 1e-3
-adam_betas: (0.9,0.997)
-
-criterion: label_smoothed_cross_entropy
-label_smoothing: 0.1
-
-dropout: 0.1
-attention-dropout: 0.1
-activation-dropout: 0.1
-
-activation-fn: relu
-encoder-normalize-before: True
-decoder-normalize-before: True
-encoder-embed-dim: 512
-encoder-ffn-embed-dim: 2048
-encoder-layers: 6
-decoder-layers: 6
-encoder-attention-heads: 8
-
-decoder-embed-dim: 512
-decoder-ffn-embed-dim: 2048
-decoder-attention-heads: 8
-
-encoder-attention-type: relative
-decoder-attention-type: relative
-max-encoder-relative-length: 20
-max-decoder-relative-length: 20
--- a/egs/mustc/mt/decode.sh
+++ b/egs/mustc/mt/decode.sh
@@ -31,9 +31,9 @@ cmd="./run.sh
 if [[ -n ${data_dir} ]]; then
    cmd="$cmd --data_dir ${data_dir}"
 fi
-if [[ -n ${test_subset} ]]; then
-    test_subset=`echo ${test_subset[*]} | sed 's/ /,/g'`
-    cmd="$cmd --test_subset ${test_subset}"
+if [[ ${#test_subset[@]} -ne 0 ]]; then
+    subsets=$(echo ${test_subset[*]} | sed 's/ /,/g')
+    cmd="$cmd --test_subset ${subsets}"
 fi

 echo $cmd

--- a/egs/mustc/mt/local/monitor.sh
+++ b/egs/mustc/mt/local/monitor.sh
-gpu_num=1
+gpu_num=4
+cmd="sh train.sh"

 while :
 do
-    all_devices=$(seq 0 `gpustat | sed '1,2d' | wc -l`);
+    record=$(mktemp -t temp.record.XXXXXX)
+    gpustat > $record
+    all_devices=$(seq 0 "$(sed '1,2d' ${record} | wc -l)");
+
    count=0
    for dev in ${all_devices[@]}
    do
-        line=`expr $dev + 2`
-        use=`gpustat -p | head -n $line | tail -1 | cut -d '|' -f4 | wc -w`
-        if [[ $use -eq 0 ]]; then
+        line=$((dev + 2))
+        use=$(head -n $line ${record} | tail -1 | cut -d '|' -f3 | cut -d '/' -f1)
+
+        if [[ $use -lt 100 ]]; then
            device[$count]=$dev
-            count=`expr $count + 1`
+            count=$((count + 1))
            if [[ $count -eq $gpu_num ]]; then
                break
            fi

--- a/egs/mustc/mt/local/path.sh
+++ b/egs/mustc/mt/local/path.sh
-MAIN_ROOT=$PWD/../../..
-KALDI_ROOT=$MAIN_ROOT/tools/kaldi
-
-export PATH=$PWD/utils/:$KALDI_ROOT/tools/openfst/bin:$PATH
-[ ! -f $KALDI_ROOT/tools/config/common_path.sh ] && echo >&2 "The standard file $KALDI_ROOT/tools/config/common_path.sh is not present -> Exit!" && exit 1
-. $KALDI_ROOT/tools/config/common_path.sh
-export LC_ALL=C
-
-export LD_LIBRARY_PATH=${LD_LIBRARY_PATH}:$MAIN_ROOT/src/lib
-export LD_LIBRARY_PATH=${LD_LIBRARY_PATH}:$MAIN_ROOT/tools/chainer_ctc/ext/warp-ctc/build
-. "${MAIN_ROOT}"/tools/activate_python.sh && . "${MAIN_ROOT}"/tools/extra_path.sh
-export PATH=$MAIN_ROOT/utils:$MAIN_ROOT/espnet/bin:$PATH
-
-export OMP_NUM_THREADS=1
-
-# check extra module installation
-if ! which tokenizer.perl > /dev/null; then
-    echo "Error: it seems that moses is not installed." >&2
-    echo "Error: please install moses as follows." >&2
-    echo "Error: cd ${MAIN_ROOT}/tools && make moses.done" >&2
-    return 1
-fi
-
-# NOTE(kan-bayashi): Use UTF-8 in Python to avoid UnicodeDecodeError when LC_ALL=C
-export PYTHONIOENCODING=UTF-8
--- a/egs/mustc/mt/local/utils.sh
+++ b/egs/mustc/mt/local/utils.sh
@@ -5,17 +5,18 @@ get_devices(){
    device=()
    while :
    do
-        record=`mktemp -t temp.record.XXXXXX`
+        record=$(mktemp -t temp.record.XXXXXX)
        gpustat > $record
-        all_devices=$(seq 0 `cat $record | sed '1,2d' | wc -l`);
+        all_devices=$(seq 0 "$(sed '1,2d' ${record} | wc -l)");
+
        count=0
        for dev in ${all_devices[@]}
        do
-            line=`expr $dev + 2`
-            use=`cat $record | head -n $line | tail -1 | cut -d '|' -f3 | cut -d '/' -f1`
+            line=$((dev + 2))
+            use=$(head -n $line ${record} | tail -1 | cut -d '|' -f3 | cut -d '/' -f1)
            if [[ $use -lt 100 ]]; then
                device[$count]=$dev
-                count=`expr $count + 1`
+                count=$((count + 1))
                if [[ $count -eq $gpu_num ]]; then
                    break
                fi

--- a/egs/mustc/mt/run.sh
+++ b/egs/mustc/mt/run.sh
@@ -32,13 +32,13 @@ src_lang=en
 tgt_lang=de
 lang=${src_lang}-${tgt_lang}

-dataset=mustc-v2
+dataset=mustc
 task=translation
 vocab_type=unigram
 vocab_size=10000
 share_dict=1
-lcrm=1
-tokenizer=1
+lcrm=0
+tokenizer=0

 use_specific_dict=0
 specific_prefix=wmt_share32k
@@ -46,7 +46,7 @@ specific_dir=/home/xuchen/st/data/wmt/mt_lcrm/en-de/unigram32000_share
 src_vocab_prefix=spm_unigram32000_share
 tgt_vocab_prefix=spm_unigram32000_share

-org_data_dir=/media/data/${dataset}
+org_data_dir=~/st/data/${dataset}
 data_dir=~/st/data/${dataset}/mt/${lang}
 train_subset=train
 valid_subset=dev
@@ -54,14 +54,14 @@ trans_subset=tst-COMMON
 test_subset=test

 # exp
-exp_prefix=${time}
+exp_prefix=$(date "+%m%d")
 extra_tag=
 extra_parameter=
 exp_tag=baseline
 exp_name=

 # config
-train_config=train.yaml
+train_config=base

 # training setting
 fp16=1
@@ -103,10 +103,10 @@ fi

 . ./local/parse_options.sh || exit 1;

-# full path
-train_config=$pwd_dir/conf/${train_config}
 if [[ -z ${exp_name} ]]; then
-    exp_name=${exp_prefix}_$(basename ${train_config%.*})_${exp_tag}
+    config_string=${train_config//,/_}
+#    exp_name=${exp_prefix}_$(basename ${train_config%.*})_${exp_tag}
+    exp_name=${exp_prefix}_${config_string}_${exp_tag}
    if [[ -n ${extra_tag} ]]; then
        exp_name=${exp_name}_${extra_tag}
    fi
@@ -196,7 +196,7 @@ if [ ${stage} -le 1 ] && [ ${stop_stage} -ge 1 ]; then

    if [[ -z ${device} || ${#device[@]} -eq 0 ]]; then
 		if [[ ${gpu_num} -eq 0 ]]; then
-			device=()
+			device=""
 		else
        	source ./local/utils.sh
        	device=$(get_devices $gpu_num 0)
@@ -213,13 +213,32 @@ if [ ${stage} -le 1 ] && [ ${stop_stage} -ge 1 ]; then

    cp ${BASH_SOURCE[0]} ${model_dir}
    cp ${PWD}/train.sh ${model_dir}
-    cp ${train_config} ${model_dir}
+
+    config_list="${train_config//,/ }"
+    idx=0
+    for config in ${config_list[@]}
+    do
+        config_path=$pwd_dir/conf/${config}.yaml
+        if [[ ! -f ${config_path} ]]; then
+            echo "No config file ${config_path}"
+            exit
+        fi
+        cp ${config_path} ${model_dir}
+
+        if [[ idx -eq 0 ]]; then
+            extra_parameter="${extra_parameter}
+        --train-config ${config_path}"
+        else
+            extra_parameter="${extra_parameter}
+        --train-config${idx} ${config_path}"
+        fi
+        idx=$((idx + 1))
+    done

    cmd="python3 -u ${root_dir}/fairseq_cli/train.py
        ${data_dir}
        --source-lang ${src_lang}
        --target-lang ${tgt_lang}
-        --train-config ${train_config}
        --task ${task}
        --max-tokens ${max_tokens}
        --skip-invalid-size-inputs-valid-test
@@ -228,7 +247,7 @@ if [ ${stage} -le 1 ] && [ ${stop_stage} -ge 1 ]; then
        --save-dir ${model_dir}
        --tensorboard-logdir ${model_dir}"

-    if [[ -n ${extra_parameter} ]]; then
+	if [[ -n ${extra_parameter} ]]; then
        cmd="${cmd}
        ${extra_parameter}"
    fi
@@ -290,8 +309,8 @@ if [ ${stage} -le 1 ] && [ ${stop_stage} -ge 1 ]; then

    # save info
    log=./history.log
-    echo "${time} | ${device} | ${data_dir} | ${model_dir} " >> $log
-    cat $log | tail -n 50 > tmp.log
+    echo "${time} | ${device} | ${data_dir} | ${exp_name} | ${model_dir} " >> $log
+    tail -n 50 ${log} > tmp.log
    mv tmp.log $log
    export CUDA_VISIBLE_DEVICES=${device}

@@ -299,7 +318,7 @@ if [ ${stage} -le 1 ] && [ ${stop_stage} -ge 1 ]; then
    if [[ $eval -eq 1 ]]; then
 		eval $cmd
 		sleep 2s
-		tail -n `wc -l ${model_dir}/train.log | awk '{print $1+1}'` -f ${model_dir}/train.log
+		tail -n "$(wc -l ${model_dir}/train.log | awk '{print $1+1}')" -f ${model_dir}/train.log
 	fi
 fi
 wait
@@ -322,7 +341,7 @@ if [ ${stage} -le 2 ] && [ ${stop_stage} -ge 2 ]; then

    if [[ -z ${device} || ${#device[@]} -eq 0 ]]; then
 		if [[ ${gpu_num} -eq 0 ]]; then
-			device=()
+			device=""
 		else
        	source ./local/utils.sh
        	device=$(get_devices $gpu_num 0)
@@ -333,9 +352,8 @@ if [ ${stage} -le 2 ] && [ ${stop_stage} -ge 2 ]; then
 	result_file=${model_dir}/decode_result
 	[[ -f ${result_file} ]] && rm ${result_file}

-    test_subset=(${test_subset//,/ })
+    test_subset=${test_subset//,/ }
 	for subset in ${test_subset[@]}; do
-        subset=${subset}_st
  		cmd="python ${root_dir}/fairseq_cli/generate.py
        ${data_dir}
        --source-lang ${src_lang}

--- a/egs/mustc/mt/train.sh
+++ b/egs/mustc/mt/train.sh
@@ -2,18 +2,22 @@

 # training the model

-gpu_num=1
+gpu_num=8
 update_freq=1
 max_tokens=4096

+exp_tag=baseline
+config_list=(base)
+
+# exp full name
+exp_name=
+
 extra_tag=
 extra_parameter=
-
 #extra_tag="${extra_tag}"
 #extra_parameter="${extra_parameter} "

-exp_tag=baseline
-train_config=train.yaml
+train_config=$(echo ${config_list[*]} | sed 's/ /,/g')

 cmd="./run.sh
    --stage 1
@@ -24,6 +28,9 @@ cmd="./run.sh
    --max_tokens ${max_tokens}
    "

+if [[ -n ${exp_name} ]]; then
+    cmd="$cmd --exp_name ${exp_name}"
+fi
 if [[ -n ${exp_tag} ]]; then
    cmd="$cmd --exp_tag ${exp_tag}"
 fi
@@ -34,5 +41,5 @@ if [[ -n ${extra_parameter} ]]; then
    cmd="$cmd --extra_parameter \"${extra_parameter}\""
 fi

-echo $cmd
-eval $cmd
+echo ${cmd}
+eval ${cmd}
--- a/egs/mustc/st/local/monitor.sh
+++ b/egs/mustc/st/local/monitor.sh
-gpu_num=1
+gpu_num=4
 cmd="sh train.sh"

 while :