Support ensemle decoding

3b38b0dd · libei · 2309d8da · 3b38b0dd · 3b38b0dd · 3b38b0dd
Commit 3b38b0dd authored Mar 01, 2019 by libei
--- a/convert_dense_model.sh
+++ b/convert_dense_model.sh
+#! /usr/bin/bash
+set -e
+
+# device, you can set multiple devices, e.g. device=(0 1 2)
+# then program will parallelly translate over various evalset (e.g. evalset=(cwmt18-dev mt06 mt08), or over various alpha (e.g. alphas=(1.0 1.1 1.2). 
+# However, note that multiple evalset and multiple alpha can not set concurrently.
+# more device will not be used. e.g. you set device=(0 1 2 3), but you only choose three evalset, the gpu=3 will not be used
+device=(0 1 2 3 4 5 6 7)
+# your model
+model=dense_transformer
+# your hparams
+params=dense_transformer_base
+# your tag, must set!
+tag=dense16
+
+model_dir=t2tmodel/$tag/ensemble15
+
+output_dir=checkpoints/$tag
+
+if [ ! -d "$output_dir" ]; then
+  mkdir -p $output_dir
+fi
+
+n_head=8
+result=$(echo $params | sed -n '/base/'p)
+                if [[ "$result" != "" ]]; then
+                        n_head=8
+                else
+                        n_head=16
+                fi
+echo "n_head=$n_head"
+
+CUDA_VISIBLE_DEVICES=0 python3 scripts/convert_dense_to_fairseq.py -model $model_dir/ensemble_15-0 -src_vocab t2tmodel/source_dic -tgt_vocab t2tmodel/target_dic -head_num $n_head -vocab_output $model_dir/fairseq.vocab -model_output $output_dir/fairseq.pt
+
+rm online.vocab
--- a/convert_model.sh
+++ b/convert_model.sh
+#! /usr/bin/bash
+set -e
+
+# device, you can set multiple devices, e.g. device=(0 1 2)
+# then program will parallelly translate over various evalset (e.g. evalset=(cwmt18-dev mt06 mt08), or over various alpha (e.g. alphas=(1.0 1.1 1.2). 
+# However, note that multiple evalset and multiple alpha can not set concurrently.
+# more device will not be used. e.g. you set device=(0 1 2 3), but you only choose three evalset, the gpu=3 will not be used
+device=(0 1 2 3 4 5 6 7)
+# your model
+model=transformer
+# your hparams
+params=transformer_big
+# your tag, must set!
+tag=big_v3_multistep4
+
+model_dir=t2tmodel/$tag/ensemble15
+
+output_dir=checkpoints/$tag
+
+if [ ! -d "$output_dir" ]; then
+  mkdir -p $output_dir
+fi
+
+n_head=8
+result=$(echo $params | sed -n '/base/'p)
+                if [[ "$result" != "" ]]; then
+                        n_head=8
+                else
+                        n_head=16
+                fi
+echo "n_head=$n_head"
+
+CUDA_VISIBLE_DEVICES=0 python3 scripts/convert_t2t_to_fairseq.py -model $model_dir/ensemble_15-0 -src_vocab t2tmodel/source_dic -tgt_vocab t2tmodel/target_dic -head_num $n_head -vocab_output $model_dir/fairseq.vocab -model_output $output_dir/fairseq.pt
+
+rm online.vocab
--- a/convert_new_model.sh
+++ b/convert_new_model.sh
+#! /usr/bin/bash
+set -e
+
+# device, you can set multiple devices, e.g. device=(0 1 2)
+# then program will parallelly translate over various evalset (e.g. evalset=(cwmt18-dev mt06 mt08), or over various alpha (e.g. alphas=(1.0 1.1 1.2). 
+# However, note that multiple evalset and multiple alpha can not set concurrently.
+# more device will not be used. e.g. you set device=(0 1 2 3), but you only choose three evalset, the gpu=3 will not be used
+device=(0 1 2 3 4 5 6 7)
+# your model
+#model=transformer_dla
+model=transformer
+# your hparams
+#params=transformer_dla_base
+params=transformer_base
+# your tag, must set!
+tag=base25
+
+model_dir=t2tmodel/$tag/ensemble15
+
+output_dir=checkpoints/$tag
+
+if [ ! -d "$output_dir" ]; then
+  mkdir -p $output_dir
+fi
+
+n_head=8
+result=$(echo $params | sed -n '/base/'p)
+                if [[ "$result" != "" ]]; then
+                        n_head=8
+                else
+                        n_head=16
+                fi
+echo "n_head=$n_head"
+
+CUDA_VISIBLE_DEVICES=7 python3 scripts/convert_t2t_new_to_fairseq.py -name $model -model $model_dir/ensemble_15-0 -src_vocab t2tmodel/source_dic -tgt_vocab t2tmodel/target_dic -head_num $n_head -vocab_output $model_dir/fairseq.vocab -model_output $output_dir/fairseq.pt
+
+rm online.vocab
--- a/ensemble.sh
+++ b/ensemble.sh
+#!/usr/bin/bash
+set -e
+device=(0)
+model_root_dir=checkpoints
+# set tag
+model_dir_tag=(baseline_v2 baseline_v3)
+model_dir=()
+model_dir=$model_root_dir/$model_dir_tag
+ensemble_dir=$model_root_dir/ensemble
+
+checkpoint=fairseq.pt
+data_dir=wmt19_zh2en
+beam=12
+lenpen=1.2
+who=test
+
+if [ -n "$ensemble" ]; then
+	if [ ! -e "$model_dir/last$ensemble.ensemble.pt" ]; then
+		PYTHONPATH=`pwd` python3 scripts/average_checkpoints.py --inputs $model_dir --output $model_dir/last$ensemble.ensemble.pt --num-epoch-checkpoints $ensemble
+	fi
+	checkpoint=last$ensemble.ensemble.pt
+fi
+
+output=$model_dir/translation.log
+
+CUDA_VISIBLE_DEVICES=$device python3 generate_test.py \
+data-bin/$data_dir \
+--path $model_root_dir/big_v3_multistep4/$checkpoint:$model_root_dir/baseline_epoch10/$checkpoint:$model_root_dir/baseline_epoch20/$checkpoint:$model_root_dir/filter_base/$checkpoint:$model_root_dir/baseline_v3/$checkpoint:$model_root_dir/baseline_v3_seed2/$checkpoint:$model_root_dir/baseline_v2_seed2/$checkpoint:$model_root_dir/baseline/$checkpoint \
+--gen-subset $who \
+--output $ensemble_dir/hypo.$who.beam$beam.lenpen$lenpen \
+--batch-size 8 \
+--beam $beam \
+--lenpen $lenpen \
+--remove-bpe | tee $output
+
+#python3 parse_translation_log.py -i $output --tgt_lang de
+
+python3 rerank.py $ensemble_dir/hypo.$who.beam$beam.lenpen$lenpen $ensemble_dir/hypo.$who.beam$beam.lenpen$lenpen.decodes
+#remove the intermediate output
+rm $ensemble_dir/hypo.$who.beam$beam.lenpen$lenpen
+
+if [ $data_dir == "wmt19_zh2en" ] && [ $who == "valid" ]; then
+        perl $multi_bleu reference/wmt17-dev-ref < $ensemble_dir/hypo.$who.beam$beam.lenpen$lenpen.decodes
+elif [ $data_dir == "wmt19_zh2en" ] && [ $who == "test" ]; then
+        perl $multi_bleu reference/wmt17-test-ref < $ensemble_dir/hypo.$who.beam$beam.lenpen$lenpen.decodes
+fi
--- a/translate_multi.sh
+++ b/translate_multi.sh
+#!/usr/bin/bash
+set -e
+
+device=(0 1 3 4 5 6 7)
+
+model_root_dir=checkpoints
+# set tag
+model_dir_tag=dense16
+model_dir=$model_root_dir/$model_dir_tag
+
+ensemble=
+checkpoint=fairseq.pt
+data_dir=wmt19_zh2en
+beam=12
+lenpens=(1.0 1.1 1.2 1.3 1.4 1.5 1.6)
+who=test
+
+n_device=${#device[@]}
+n_lenpens=${#lenpens[@]}
+
+if [ -n "$ensemble" ]; then
+        if [ ! -e "$model_dir/last$ensemble.ensemble.pt" ]; then
+                PYTHONPATH=`pwd` python3 scripts/average_checkpoints.py --inputs $model_dir --output $model_dir/last$ensemble.ensemble.pt --num-epoch-checkpoints $ensemble
+        fi
+        checkpoint=last$ensemble.ensemble.pt
+fi
+
+
+echo $n_device
+echo $n_lenpens
+# device is enough
+if [ $n_device -ge $n_lenpens ]; then
+        #echo "device is enough!"
+        for ((i=0;i<${#lenpens[@]};i++));do
+        {
+                lenpen=${lenpens[$i]}
+                dev=${device[$i]}
+                output=$model_dir/translation.$lenpen.log
+                cmd="python3 generate.py \
+                data-bin/$data_dir \
+                --path $model_dir/$checkpoint \
+                --gen-subset $who \
+                --output $model_dir/hypo.$who.beam$beam.lenpen$lenpen \
+                --batch-size 32 \
+                --beam $beam \
+                --lenpen $lenpen \
+                --remove-bpe"
+                echo $cmd
+                echo "run data=${who} beam=$beam lenpen=$lenpen dev=$dev"
+                CUDA_VISIBLE_DEVICES=$dev $cmd | tee $output
+        }&
+        done
+        #echo "[enough]==> wait it"
+        wait
+# device is poor
+else
+        #echo "device is poor"
+        if [ $(($n_lenpens%$n_device)) -eq 0 ]; then
+                n_group=$(($n_lenpens/$n_device))
+        else
+                n_group=$(($n_lenpens/$n_device+1))
+        fi
+        #echo "group=$n_group"
+        for ((i=0;i<$n_group;i++));do
+        {
+                for ((j=0;j<$n_device;j++));do
+                {
+                        lenpen=${lenpens[$(($i*$n_device+$j))]}
+                        dev=${device[$(($j))]}
+                        output=$model_dir/translation.$lenpen.log
+                        if [ -n "$lenpen" ]; then
+                                echo "run data=${who} beam=$beam lenpen=$lenpen dev=$dev"
+                                CUDA_VISIBLE_DEVICES=$dev $cmd | tee $output
+                        fi
+                } &
+                done
+                #echo "wait group=$i finish"
+                wait
+                #echo "group=$i finish"
+        }
+        done
+fi
+
+for ((i=0;i<${#lenpens[@]};i++));do
+    lenpen=${lenpens[$i]}
+    python3 rerank.py $model_dir/hypo.$who.beam$beam.lenpen$lenpen $model_dir/hypo.$who.beam$beam.lenpen$lenpen.decodes
+    rm $model_dir/hypo.$who.beam$beam.lenpen$lenpen
+done
+
+echo 'multi bleu:'
+for ((i=0;i<${#lenpens[@]};i++));do
+    lenpen=${lenpens[$i]}
+    echo beam: $beam, lenpen: $lenpen
+    if [ $data_dir == "wmt19_zh2en" ] && [ $who == "valid" ]; then
+        perl $multi_bleu reference/wmt17-dev-ref < $model_dir/hypo.$who.beam$beam.lenpen$lenpen.decodes
+    elif [ $data_dir == "wmt19_zh2en" ] && [ $who == "test" ]; then
+        perl $multi_bleu reference/wmt17-test-ref < $model_dir/hypo.$who.beam$beam.lenpen$lenpen.decodes
+    fi
+done
+