shell and yaml

6ca43d9e · xuchen · 9d4fe566 · 6ca43d9e · 6ca43d9e · 6ca43d9e
Commit 6ca43d9e authored Sep 20, 2023 by xuchen
--- a/egs/aishell/asr/conf/xctc.yaml
+++ b/egs/aishell/asr/conf/xctc.yaml
+xctc-weight: 0.3
+share-xctc-and-embed: True
\ No newline at end of file
--- a/egs/aishell/asr/conf/xinter.yaml
+++ b/egs/aishell/asr/conf/xinter.yaml
+inter-xctc-weight: 0.2
+inter-xctc-layers: 6,9
+
+xctc-pae: none
+# xctc-pae: inter_league
+
+xctc-cross-attn: False
+cross-attn-start-layer: 7
+cross-attn-layer: 6
+cross-attn-collaboration-mode: parallel
+cross-attn-league-s1-ratio: 0.5
+cross-attn-league-s2-ratio: 0.5
+cross-attn-league-out-norm: False
+cross-attn-league-gated: False
+cross-attn-league-drop-net: False
+cross-attn-league-drop-net-prob: 0.2
+cross-attn-league-drop-net-mix: False
+
+# xctc-pae-ground-truth-ratio: 0.1
+# xctc-pae-ground-truth-ratio-adaptive: True 
+# xctc-pae-ground-truth-only-mistake: True 
+# pae-oracle-smooth: True
+# pae-gumbel: True
+# pae-distribution-hard: True
+# pae-drop-prob: 0.0
+# pae-distribution-cutoff: 10
+# share-pae-and-xctc: True
+# pae-embed-norm: True
+# pae-out-norm: True
+
+# ctc-self-distill-weight: 1
+# target-ctc-self-distill-weight: 1
+# ctc-self-distill-prob: 0.1
+# cal-all-ctc: True
\ No newline at end of file
--- a/egs/aishell/asr/decode.sh
+++ b/egs/aishell/asr/decode.sh
@@ -2,7 +2,7 @@

 gpu_num=1

-data_dir=
+data_tag=asr
 test_subset=(dev test)

 exp_name=
@@ -11,11 +11,14 @@ if [ "$#" -eq 1 ]; then
 fi

 cer=1
-ctc_infer=0
+ctc_infer=1
 n_average=10
 beam_size=5
+infer_ctc_weight=0.1
 len_penalty=1.0
-max_tokens=80000
+max_tokens=50000
+batch_size=1
+infer_debug=0
 dec_model=checkpoint_best.pt

 cmd="./run.sh
@@ -28,12 +31,16 @@ cmd="./run.sh
    --ctc_infer ${ctc_infer}
    --beam_size ${beam_size}
    --len_penalty ${len_penalty}
+    --batch_size ${batch_size}
    --max_tokens ${max_tokens}
    --dec_model ${dec_model}
+    --ctc_infer ${ctc_infer}
+    --infer_ctc_weight ${infer_ctc_weight}
+    --infer_debug ${infer_debug}
    "

-if [[ -n ${data_dir} ]]; then
-    cmd="$cmd --data_dir ${data_dir}"
+if [[ -n ${data_tag} ]]; then
+    cmd="$cmd --data_tag ${data_tag}"
 fi
 if [[ ${#test_subset[@]} -ne 0 ]]; then
    subsets=$(echo ${test_subset[*]} | sed 's/ /,/g')

--- a/egs/aishell/asr/run.sh
+++ b/egs/aishell/asr/run.sh
@@ -39,7 +39,7 @@ data_tag=asr
 task=speech_to_text
 vocab_type=unigram
 vocab_type=char
-vocab_size=5000
+vocab_size=10000
 speed_perturb=1
 lcrm=0
 tokenizer=0
@@ -77,14 +77,17 @@ step_valid=0
 dec_model=checkpoint_best.pt
 cer=1
 ctc_infer=0
+infer_ctc_weight=0
 ctc_self_ensemble=0
 ctc_inter_logit=0
 n_average=10
+batch_size=0
 beam_size=5
 len_penalty=1.0
 single=0
 epoch_ensemble=0
 best_ensemble=1
+infer_debug=0
 infer_score=0
 # infer_parameters="--cal-monotonic-cross-attn-weights --cal-localness --localness-window 0.1 --cal-topk-cross-attn-weights --topk-cross-attn-weights 15 --cal-entropy"

@@ -115,13 +118,6 @@ if [[ "${vocab_type}" == "char" ]]; then
    data_dir=${data_dir}_char
    exp_prefix=${exp_prefix}_char
 fi
-if [[ ! -d /mnt/bd/data-model && -d /mnt/bd/data-model2 ]]; then
-    sudo ln -s /mnt/bd/data-model2/ /mnt/bd/data-model
-fi
-if [[ ! -d ${data_dir} ]]; then
-    echo "No feature dir ${data_dir}"
-    exit
-fi

 # setup nccl envs
 export NCCL_IB_DISABLE=0
@@ -174,6 +170,8 @@ if [ ${stage} -le 0 ] && [ ${stop_stage} -ge 0 ]; then
        --task asr
        --src-lang ${src_lang}
        --splits ${valid_split},${test_split},${train_split}
+	    --add-src
+	    --share
        --vocab-type ${vocab_type}
        --vocab-size ${vocab_size}"

@@ -309,8 +307,8 @@ if [ ${stage} -le 1 ] && [ ${stop_stage} -ge 1 ]; then
    mv tmp.log $log

    log=${model_dir}/train.log
-    cmd="${cmd} 2>&1 | tee -a ${log}"
-    #cmd="nohup ${cmd} >> ${log} 2>&1 &"
+    # cmd="${cmd} 2>&1 | tee -a ${log}"
+    cmd="${cmd} >> ${log} 2>&1 "
    if [[ $eval -eq 1 ]]; then
        # tensorboard
        port=6666
@@ -326,7 +324,7 @@ fi
 if [ ${stage} -le 2 ] && [ ${stop_stage} -ge 2 ]; then
    echo "Stage 2: Decoding"
    dec_models=
-    if [[ ${single} -eq 1 ]]; then
+    if [[ ${n_average} -eq 1 ]]; then
        dec_models=${dec_model}
    fi
    if [[ ${n_average} -ne 1 ]]; then
@@ -370,7 +368,7 @@ if [ ${stage} -le 2 ] && [ ${stop_stage} -ge 2 ]; then
    fi

    for dec_model in ${dec_models[@]}; do
-        suffix=beam${beam_size}_alpha${len_penalty}_tokens${max_tokens}
+        suffix=alpha${len_penalty}
        model_str=`echo $dec_model | sed -e "s#checkpoint##" | sed "s#.pt##"`
        suffix=${suffix}_${model_str}
        if [[ -n ${cer} && ${cer} -eq 1 ]]; then
@@ -378,6 +376,13 @@ if [ ${stage} -le 2 ] && [ ${stop_stage} -ge 2 ]; then
        else
            suffix=${suffix}_wer
        fi
+
+        suffix=${suffix}_beam${beam_size}
+        if [[ ${batch_size} -ne 0 ]]; then
+            suffix=${suffix}_batch${batch_size}
+        else
+            suffix=${suffix}_tokens${max_tokens}
+        fi
        if [[ ${ctc_infer} -eq 1 ]]; then
            suffix=${suffix}_ctc
        fi
@@ -387,6 +392,12 @@ if [ ${stage} -le 2 ] && [ ${stop_stage} -ge 2 ]; then
        if [[ ${ctc_inter_logit} -ne 0 ]]; then
            suffix=${suffix}_logit${ctc_inter_logit}
        fi
+        if (( $(echo "${infer_ctc_weight} > 0" | bc -l) )); then
+            suffix=${suffix}_ctc${infer_ctc_weight}
+        fi
+        if [[ ${infer_score} -eq 1 ]]; then
+            suffix=${suffix}_score
+        fi

        suffix=`echo $suffix | sed -e "s#__#_#"`
        result_file=${model_dir}/decode_result_${suffix}
@@ -395,16 +406,23 @@ if [ ${stage} -le 2 ] && [ ${stop_stage} -ge 2 ]; then
        test_subset=${test_subset//,/ }
        for subset in ${test_subset[@]}; do
            subset=${subset}
-            cmd="python3 ${code_dir}/fairseq_cli/generate.py
+            if [[ ${infer_debug} -ne 0 ]]; then
+                cmd="python3 -m debugpy --listen 0.0.0.0:5678 --wait-for-client"
+            else
+                cmd="python3 "
+            fi
+            cmd="$cmd ${code_dir}/fairseq_cli/generate.py
            ${data_dir}
            --config-yaml ${data_config}
            --gen-subset ${subset}
            --task speech_to_text
            --path ${model_dir}/${dec_model}
            --results-path ${model_dir}
+            --batch-size ${batch_size}
            --max-tokens ${max_tokens}
            --beam ${beam_size}
            --lenpen ${len_penalty}
+            --infer-ctc-weight ${infer_ctc_weight}
            --scoring wer"

            if [[ ${cer} -eq 1 ]]; then
@@ -423,10 +441,14 @@ if [ ${stage} -le 2 ] && [ ${stop_stage} -ge 2 ]; then
                cmd="${cmd}
            --ctc-inter-logit ${ctc_inter_logit}"
            fi
+            if [[ ${infer_score} -eq 1 ]]; then
+                cmd="${cmd}
+            --score-reference"
+            fi
            if [[ -n ${infer_parameters} ]]; then
                cmd="${cmd}
            ${infer_parameters}"
-            fi
+            fi                        

            echo -e "\033[34mRun command: \n${cmd} \033[0m"

@@ -455,12 +477,14 @@ if [ ${stage} -le 2 ] && [ ${stop_stage} -ge 2 ]; then
                if [[ ${ctc_infer} -eq 1 && -f ${model_dir}/${ctc_file} ]]; then
                    ref_file=${model_dir}/${subset}.${src_lang}
                    if [[ ! -f ${ref_file} ]]; then
-                        python3 ./local/extract_txt_from_tsv.py ${data_dir}/${subset}.tsv ${ref_file} "src_text"
+                        python3 ./local/extract_txt_from_tsv.py ${data_dir}/${subset}.tsv ${ref_file} "tgt_text"
                    fi
                    if [[ -f ${ref_file} ]]; then
                        ctc=$(mktemp -t temp.record.XXXXXX)
                        cd ./local
-                        ./cal_wer.sh ${model_dir} ${subset} ${trans_file} ${ctc_file} ${ref_file} > ${ctc}
+                        cmd="./cal_wer.sh ${model_dir} ${subset} ${trans_file} ${ctc_file} ${ref_file} > ${ctc}"
+                        #echo $cmd
+                        eval $cmd
                        cd ..

                        echo "CTC WER" >> ${result_file}

--- a/egs/librispeech/asr/conf/basis.yaml
+++ b/egs/librispeech/asr/conf/basis.yaml
@@ -18,8 +18,8 @@ best_checkpoint_metric: dec_wer
 maximize_best_checkpoint_metric: False

 validate-interval: 5
-no-epoch-checkpoints: True
-# keep-last-epochs: 10
+# no-epoch-checkpoints: True
+keep-last-epochs: 10
 keep-best-checkpoints: 10

 num-workers: 8

--- a/egs/librispeech/asr/conf/xctc.yaml
+++ b/egs/librispeech/asr/conf/xctc.yaml
+xctc-weight: 0.3
+share-xctc-and-embed: True
\ No newline at end of file
--- a/egs/librispeech/asr/conf/xinter.yaml
+++ b/egs/librispeech/asr/conf/xinter.yaml
+inter-xctc-weight: 0.2
+inter-xctc-layers: 6,9
+
+xctc-pae: none
+# xctc-pae: inter_league
+
+xctc-cross-attn: False
+cross-attn-start-layer: 7
+cross-attn-layer: 6
+cross-attn-collaboration-mode: parallel
+cross-attn-league-s1-ratio: 0.5
+cross-attn-league-s2-ratio: 0.5
+cross-attn-league-out-norm: False
+cross-attn-league-gated: False
+cross-attn-league-drop-net: False
+cross-attn-league-drop-net-prob: 0.2
+cross-attn-league-drop-net-mix: False
+
+# xctc-pae-ground-truth-ratio: 0.1
+# xctc-pae-ground-truth-ratio-adaptive: True 
+# xctc-pae-ground-truth-only-mistake: True 
+# pae-oracle-smooth: True
+# pae-gumbel: True
+# pae-distribution-hard: True
+# pae-drop-prob: 0.0
+# pae-distribution-cutoff: 10
+# share-pae-and-xctc: True
+# pae-embed-norm: True
+# pae-out-norm: True
+
+# ctc-self-distill-weight: 1
+# target-ctc-self-distill-weight: 1
+# ctc-self-distill-prob: 0.1
+# cal-all-ctc: True
\ No newline at end of file
--- a/egs/librispeech/asr/decode.sh
+++ b/egs/librispeech/asr/decode.sh
@@ -2,8 +2,9 @@

 gpu_num=1

-data_dir=
+data_tag=asr
 test_subset=(dev-clean dev-other test-clean test-other all)
+test_subset=(dev-clean dev-other test-clean test-other)

 exp_name=
 if [ "$#" -eq 1 ]; then
@@ -13,7 +14,7 @@ fi
 ctc_infer=0
 n_average=10
 beam_size=5
-infer_ctc_weight=0.3
+infer_ctc_weight=0.1
 len_penalty=1.0
 max_tokens=50000
 batch_size=1
@@ -36,8 +37,8 @@ cmd="./run.sh
    --infer_debug ${infer_debug}
    "

-if [[ -n ${data_dir} ]]; then
-    cmd="$cmd --data_dir ${data_dir}"
+if [[ -n ${data_tag} ]]; then
+    cmd="$cmd --data_tag ${data_tag}"
 fi
 if [[ ${#test_subset[@]} -ne 0 ]]; then
    subsets=$(echo ${test_subset[*]} | sed 's/ /,/g')

--- a/egs/librispeech/asr/run.sh
+++ b/egs/librispeech/asr/run.sh
@@ -76,14 +76,15 @@ infer_ctc_weight=0
 ctc_self_ensemble=0
 ctc_inter_logit=0
 n_average=10
+batch_size=0
 beam_size=5
 len_penalty=1.0
 single=0
-epoch_ensemble=0
-best_ensemble=1
+epoch_ensemble=1
+best_ensemble=0
 infer_debug=0
 infer_score=0
-# infer_parameters="--cal-monotonic-cross-attn-weights --cal-localness --localness-window 0.1 --cal-topk-cross-attn-weights --topk-cross-attn-weights 15 --cal-entropy"
+#infer_parameters="--cal-monotonic-cross-attn-weights --cal-localness --localness-window 0.1 --cal-topk-cross-attn-weights --topk-cross-attn-weights 15 --cal-entropy"

 data_config=config.yaml

@@ -101,13 +102,6 @@ if [[ "${vocab_type}" == "char" ]]; then
    data_dir=${data_dir}_char
    exp_prefix=${exp_prefix}_char
 fi
-if [[ ! -d /mnt/bd/data-model && -d /mnt/bd/data-model2 ]]; then
-    sudo ln -s /mnt/bd/data-model2/ /mnt/bd/data-model
-fi
-if [[ ! -d ${data_dir} ]]; then
-    echo "No feature dir ${data_dir}"
-    exit
-fi

 # setup nccl envs
 export NCCL_IB_DISABLE=0
@@ -278,8 +272,8 @@ if [ ${stage} -le 1 ] && [ ${stop_stage} -ge 1 ]; then
    mv tmp.log $log

    log=${model_dir}/train.log
-    cmd="${cmd} 2>&1 | tee -a ${log}"
-    #cmd="nohup ${cmd} >> ${log} 2>&1 &"
+    # cmd="${cmd} 2>&1 | tee -a ${log}"
+    cmd="${cmd} >> ${log} 2>&1 "
    if [[ $eval -eq 1 ]]; then
        # tensorboard
        port=6666
@@ -295,7 +289,7 @@ fi
 if [ ${stage} -le 2 ] && [ ${stop_stage} -ge 2 ]; then
    echo "Stage 2: Decoding"
    dec_models=
-    if [[ ${single} -eq 1 ]]; then
+    if [[ ${n_average} -eq 1 ]]; then
        dec_models=${dec_model}
    fi
    if [[ ${n_average} -ne 1 ]]; then

--- a/egs/mustc/asr/conf/xctc.yaml
+++ b/egs/mustc/asr/conf/xctc.yaml
+xctc-weight: 0.3
+share-xctc-and-embed: True
\ No newline at end of file
--- a/egs/mustc/asr/conf/xinter.yaml
+++ b/egs/mustc/asr/conf/xinter.yaml
+inter-xctc-weight: 0.2
+inter-xctc-layers: 6,9
+
+xctc-pae: none
+# xctc-pae: inter_league
+
+xctc-cross-attn: False
+cross-attn-start-layer: 7
+cross-attn-layer: 6
+cross-attn-collaboration-mode: parallel
+cross-attn-league-s1-ratio: 0.5
+cross-attn-league-s2-ratio: 0.5
+cross-attn-league-out-norm: False
+cross-attn-league-gated: False
+cross-attn-league-drop-net: False
+cross-attn-league-drop-net-prob: 0.2
+cross-attn-league-drop-net-mix: False
+
+# xctc-pae-ground-truth-ratio: 0.1
+# xctc-pae-ground-truth-ratio-adaptive: True 
+# xctc-pae-ground-truth-only-mistake: True 
+# pae-oracle-smooth: True
+# pae-gumbel: True
+# pae-distribution-hard: True
+# pae-drop-prob: 0.0
+# pae-distribution-cutoff: 10
+# share-pae-and-xctc: True
+# pae-embed-norm: True
+# pae-out-norm: True
+
+# ctc-self-distill-weight: 1
+# target-ctc-self-distill-weight: 1
+# ctc-self-distill-prob: 0.1
+# cal-all-ctc: True
\ No newline at end of file
--- a/egs/mustc/st/decode.sh
+++ b/egs/mustc/st/decode.sh
@@ -2,7 +2,7 @@

 gpu_num=1

-data_dir=
+data_tag=
 test_subset=(dev tst-COMMON)

 exp_name=
@@ -38,8 +38,8 @@ cmd="./run.sh
    --infer_debug ${infer_debug}
    "

-if [[ -n ${data_dir} ]]; then
-    cmd="$cmd --data_dir ${data_dir}"
+if [[ -n ${data_tag} ]]; then
+    cmd="$cmd --data_tag ${data_tag}"
 fi
 if [[ ${#test_subset[@]} -ne 0 ]]; then
    subsets=$(echo ${test_subset[*]} | sed 's/ /,/g')

--- a/egs/mustc/st/pipe2.sh
+++ b/egs/mustc/st/pipe2.sh
+./run.sh --stage 2 --tgt_lang fr --infer_ctc_weight 0.1 --batch_size 1 --exp_name big_conformer_ctc_inter_xctc_xinter_enc18_wsum0.45_unnormpae_oracle0.1
+./run.sh --stage 2 --tgt_lang es --infer_ctc_weight 0.1 --batch_size 1 --exp_name big_conformer_ctc_inter_xctc_xinter_enc18_wsum0.45_unnormpae_oracle0.1
+./run.sh --stage 2 --tgt_lang it --infer_ctc_weight 0.1 --batch_size 1 --exp_name big_conformer_ctc_inter_xctc_xinter_enc18_wsum0.45_unnormpae_oracle0.1
+./run.sh --stage 2 --tgt_lang nl --infer_ctc_weight 0.1 --batch_size 1 --exp_name big_conformer_ctc_inter_xctc_xinter_enc18_wsum0.45_unnormpae_oracle0.1
+./run.sh --stage 2 --tgt_lang pt --infer_ctc_weight 0.1 --batch_size 1 --exp_name big_conformer_ctc_inter_xctc_xinter_enc18_wsum0.45_unnormpae_oracle0.1
+./run.sh --stage 2 --tgt_lang ro --infer_ctc_weight 0.1 --batch_size 1 --exp_name big_conformer_ctc_inter_xctc_xinter_enc18_wsum0.45_unnormpae_oracle0.1
+./run.sh --stage 2 --tgt_lang ru --infer_ctc_weight 0.1 --batch_size 1 --exp_name big_conformer_ctc_inter_xctc_xinter_enc18_wsum0.45_unnormpae_oracle0.1
--- a/egs/mustc/st/prep.sh
+++ b/egs/mustc/st/prep.sh
+./run.sh --stage 0 --stop_stage 0 --tgt_lang es
+./run.sh --stage 0 --stop_stage 0 --tgt_lang fr
+./run.sh --stage 0 --stop_stage 0 --tgt_lang it
+./run.sh --stage 0 --stop_stage 0 --tgt_lang nl
+./run.sh --stage 0 --stop_stage 0 --tgt_lang pt
+./run.sh --stage 0 --stop_stage 0 --tgt_lang ro
+./run.sh --stage 0 --stop_stage 0 --tgt_lang ru
--- a/egs/mustc/st/run.sh
+++ b/egs/mustc/st/run.sh
@@ -79,6 +79,7 @@ step_valid=0
 bleu_valid=0

 # Decoding Settings
+batch_size=0
 sacrebleu=1
 dec_model=checkpoint_best.pt
 ctc_infer=0
@@ -88,7 +89,7 @@ beam_size=5
 len_penalty=1.0
 infer_debug=0
 infer_score=0
-# infer_parameters="--cal-monotonic-cross-attn-weights --cal-localness --localness-window 0.1 --cal-topk-cross-attn-weights --topk-cross-attn-weights 15 --cal-entropy"
+#infer_parameters="--cal-monotonic-cross-attn-weights --cal-localness --localness-window 0.1 --cal-topk-cross-attn-weights --topk-cross-attn-weights 15 --cal-entropy"

 # Parsing Options
 if [[ ${share_dict} -eq 1 ]]; then
@@ -116,14 +117,6 @@ if [[ ${use_raw_audio} -eq 1 ]]; then
    data_dir=${data_dir}_raw
    exp_prefix=${exp_prefix}_raw
 fi
-if [[ ! -d /mnt/bd/data-model && -d /mnt/bd/data-model2 ]]; then
-    sudo ln -s /mnt/bd/data-model2/ /mnt/bd/data-model
-fi
-if [[ ! -d ${data_dir} ]]; then
-    echo "No feature dir ${data_dir}"
-    exit
-fi
-
 export PATH=$PATH:${code_dir}/scripts
 . ./local/parse_options.sh || exit 1;

@@ -361,8 +354,8 @@ if [ ${stage} -le 1 ] && [ ${stop_stage} -ge 1 ]; then
    mv tmp.log $log

    log=${model_dir}/train.log
-    cmd="${cmd} 2>&1 | tee -a ${log}"
-    #cmd="nohup ${cmd} >> ${log} 2>&1 &"
+     # cmd="${cmd} 2>&1 | tee -a ${log}"
+    cmd="${cmd} >> ${log} 2>&1 "
    if [[ $eval -eq 1 ]]; then
        # tensorboard
        port=6666

--- a/entry.sh
+++ b/entry.sh
@@ -6,7 +6,6 @@ export ST_ROOT=/xuchen/st
 export NCCL_DEBUG=INFO

 echo "nameserver 114.114.114.114" >> /etc/resolv.conf
-pip3 install espnet -i https://pypi.tuna.tsinghua.edu.cn/simple

 if [[ `pip list | grep fairseq | wc -l` -eq 0 ]]; then 
    echo "default stage: env configure"

--- a/entry_loop.sh
+++ b/entry_loop.sh
@@ -6,7 +6,6 @@ export ST_ROOT=/xuchen/st
 export NCCL_DEBUG=INFO

 echo "nameserver 114.114.114.114" >> /etc/resolv.conf
-pip3 install espnet -i https://pypi.tuna.tsinghua.edu.cn/simple

 if [[ `pip list | grep fairseq | wc -l` -eq 0 ]]; then 
    echo "default stage: env configure"
@@ -16,21 +15,46 @@ fi
 all_cmd=$1
 all_cmd_dir=`dirname ${all_cmd}`
 echo "cmd dir: $all_cmd_dir"
+echo_flag=1
+pre_num=-1

 cp $all_cmd ${all_cmd}.bak
 while :
 do 
    line=`head -n1 $all_cmd`
    if [[ -z $line ]]; then
-        echo "No cmd. Waiting."
+        record=$(mktemp -t temp.record.XXXXXX)
+        gpustat > $record
+        all_devices=$(seq 0 "$(sed '1,2d' ${record} | wc -l)");
+
+        device=()
+        count=0
+        for dev in ${all_devices[@]}
+        do
+            item=$((dev + 2))
+            use=$(head -n $item ${record} | tail -1 | cut -d '|' -f3 | cut -d '/' -f1)
+
+            if [[ $use -lt 100 ]]; then
+                device[$count]=$dev
+                count=$((count + 1))
+                if [[ $count -eq $gpu_num ]]; then
+                    break
+                fi
+            fi
+        done
+
+        if [[ $echo_flag -eq 1 ]]; then
+            echo "No cmd. Current free GPU: ${count}. Waiting."
+            echo_flag=0
+        fi
        sleep 300s
        continue
    fi
-    echo $line
    gpu_num=$(echo $line | awk '{print $1}')
    shell_script=$(echo $line | awk '{print $2}')
    cmd=$(echo $line | awk '{$1=""; print $0}')

+    echo_flag=1
    while :
    do
        record=$(mktemp -t temp.record.XXXXXX)
@@ -53,6 +77,10 @@ do
            fi
        done
        if [[ ${#device[@]} -lt $gpu_num ]]; then
+            if [[ ${pre_num} -ne ${gpu_num} ]]; then
+            	echo "Current free GPU: ${count}, need GPU: ${gpu_num}. Waiting."
+	        fi
+            pre_num=$gpu_num
            sleep 300s
        else
            echo "Run $cmd"
@@ -67,7 +95,7 @@ do
            export CUDA_VISIBLE_DEVICES=$avail_devices
            eval $cmd &
            cd ${THIS_DIR}
-            sleep 100s
+            sleep 300s
        fi
        break
    done