Commit 12201609 by xuchen

shell and yaml

parent d88a22ef
...@@ -29,7 +29,4 @@ encoder-attention-heads: 8 ...@@ -29,7 +29,4 @@ encoder-attention-heads: 8
decoder-embed-dim: 512 decoder-embed-dim: 512
decoder-ffn-embed-dim: 2048 decoder-ffn-embed-dim: 2048
decoder-attention-heads: 8 decoder-attention-heads: 8
\ No newline at end of file
#load-pretrained-encoder-from:
#load-pretrained-decoder-from:
ctc-weight: 0.3 ctc-weight: 0.3
# share-ctc-and-embed: True \ No newline at end of file
\ No newline at end of file
inter-ctc-weight: 0.2 inter-ctc-weight: 0.2
inter-ctc-layers: 6,9 inter-ctc-layers: 6,9
inter-ctc-drop-prob: 0
share-inter-ctc: True share-inter-ctc: True
ctc-pae: none ctc-pae: none
# ctc-pae: inter_league # ctc-pae: inter_league
......
...@@ -3,10 +3,11 @@ inter-mixup-layer: -1 ...@@ -3,10 +3,11 @@ inter-mixup-layer: -1
inter-mixup-decoder-layer: 0 inter-mixup-decoder-layer: 0
inter-mixup-prob: 1.0 inter-mixup-prob: 1.0
inter-mixup-ratio: 1.0 inter-mixup-ratio: 1.0
inter-mixup-beta: 0.5 inter-mixup-beta: 0.2
inter-mixup-keep-org: False inter-mixup-keep-org: False
inter-mixup-decoder-emb: False inter-mixup-decoder-emb: False
ctc-mixup-consistent-weight: 0 ctc-mixup-consistent-weight: 0
inter-ctc-mixup-consistent-weight: 0
mixup-consistent-weight: 0 mixup-consistent-weight: 0
cal-mixup-loss: True cal-mixup-loss: True
no-specaugment: False no-specaugment: False
......
arch: pdss2t_transformer_s_8 arch: pdss2t_transformer_s_8
pds-fusion: True
share-decoder-input-output-embed: True share-decoder-input-output-embed: True
optimizer: adam optimizer: adam
...@@ -22,7 +21,4 @@ encoder-attention-heads: 4 ...@@ -22,7 +21,4 @@ encoder-attention-heads: 4
decoder-embed-dim: 256 decoder-embed-dim: 256
decoder-ffn-embed-dim: 2048 decoder-ffn-embed-dim: 2048
decoder-attention-heads: 4 decoder-attention-heads: 4
\ No newline at end of file
#load-pretrained-encoder-from:
#load-pretrained-decoder-from:
\ No newline at end of file
...@@ -37,7 +37,4 @@ encoder-attention-heads: 4 ...@@ -37,7 +37,4 @@ encoder-attention-heads: 4
decoder-embed-dim: 256 decoder-embed-dim: 256
decoder-ffn-embed-dim: 2048 decoder-ffn-embed-dim: 2048
decoder-attention-heads: 4 decoder-attention-heads: 4
\ No newline at end of file
#load-pretrained-encoder-from:
#load-pretrained-decoder-from:
\ No newline at end of file
...@@ -37,7 +37,4 @@ encoder-attention-heads: 4 ...@@ -37,7 +37,4 @@ encoder-attention-heads: 4
decoder-embed-dim: 256 decoder-embed-dim: 256
decoder-ffn-embed-dim: 2048 decoder-ffn-embed-dim: 2048
decoder-attention-heads: 4 decoder-attention-heads: 4
\ No newline at end of file
#load-pretrained-encoder-from:
#load-pretrained-decoder-from:
\ No newline at end of file
...@@ -37,7 +37,4 @@ encoder-attention-heads: 4 ...@@ -37,7 +37,4 @@ encoder-attention-heads: 4
decoder-embed-dim: 256 decoder-embed-dim: 256
decoder-ffn-embed-dim: 2048 decoder-ffn-embed-dim: 2048
decoder-attention-heads: 4 decoder-attention-heads: 4
\ No newline at end of file
#load-pretrained-encoder-from:
#load-pretrained-decoder-from:
...@@ -38,6 +38,3 @@ encoder-attention-heads: 8 ...@@ -38,6 +38,3 @@ encoder-attention-heads: 8
decoder-embed-dim: 512 decoder-embed-dim: 512
decoder-ffn-embed-dim: 2048 decoder-ffn-embed-dim: 2048
decoder-attention-heads: 8 decoder-attention-heads: 8
#load-pretrained-encoder-from:
#load-pretrained-decoder-from:
...@@ -23,13 +23,8 @@ subsampling-norm: none ...@@ -23,13 +23,8 @@ subsampling-norm: none
subsampling-activation: glu subsampling-activation: glu
dropout: 0.1 dropout: 0.1
attention-dropout: 0.1
activation-dropout: 0.1
activation-fn: relu activation-fn: relu
encoder-embed-dim: 256 encoder-embed-dim: 256
encoder-ffn-embed-dim: 2048 encoder-ffn-embed-dim: 2048
encoder-layers: 18 encoder-layers: 18
encoder-attention-heads: 4 encoder-attention-heads: 4
\ No newline at end of file
#load-pretrained-encoder-from:
\ No newline at end of file
encoder-attention-type: rel_selfattn encoder-attention-type: rel_pos
# encoder-attention-type: relative
# max-encoder-relative-length: 100 #encoder-attention-type: rel_pos_legacy
\ No newline at end of file #encoder-attention-type: rel_selfattn
#encoder-attention-type: relative
#decoder-attention-type: relative
#max-encoder-relative-length: 100
#max-decoder-relative-length: 20
...@@ -19,8 +19,8 @@ max_tokens=80000 ...@@ -19,8 +19,8 @@ max_tokens=80000
dec_model=checkpoint_best.pt dec_model=checkpoint_best.pt
cmd="./run.sh cmd="./run.sh
--stage 3 --stage 2
--stop_stage 3 --stop_stage 2
--gpu_num ${gpu_num} --gpu_num ${gpu_num}
--exp_name ${exp_name} --exp_name ${exp_name}
--n_average ${n_average} --n_average ${n_average}
......
#!/usr/bin/env bash
gpu_num=4 gpu_num=4
cmd="sh train.sh" cmd="sh train.sh"
......
...@@ -4,7 +4,7 @@ ...@@ -4,7 +4,7 @@
gpu_num=2 gpu_num=2
update_freq=1 update_freq=1
max_tokens=160000 max_tokens=100000
extra_tag= extra_tag=
extra_parameter= extra_parameter=
...@@ -33,7 +33,7 @@ train_config=$(echo ${config_list[*]} | sed 's/ /,/g') ...@@ -33,7 +33,7 @@ train_config=$(echo ${config_list[*]} | sed 's/ /,/g')
cmd="./run.sh cmd="./run.sh
--stage 1 --stage 1
--stop_stage 1 --stop_stage 2
--gpu_num ${gpu_num} --gpu_num ${gpu_num}
--update_freq ${update_freq} --update_freq ${update_freq}
--train_config ${train_config} --train_config ${train_config}
......
...@@ -29,4 +29,9 @@ encoder-attention-heads: 4 ...@@ -29,4 +29,9 @@ encoder-attention-heads: 4
decoder-embed-dim: 256 decoder-embed-dim: 256
decoder-ffn-embed-dim: 2048 decoder-ffn-embed-dim: 2048
decoder-attention-heads: 4 decoder-attention-heads: 4
\ No newline at end of file attention-dropout: 0.1
activation-dropout: 0.1
#load-pretrained-encoder-from:
#load-pretrained-decoder-from:
\ No newline at end of file
...@@ -3,7 +3,7 @@ valid-subset: dev-other,dev-clean ...@@ -3,7 +3,7 @@ valid-subset: dev-other,dev-clean
max-epoch: 300 max-epoch: 300
max-update: 300000 max-update: 300000
patience: 50 patience: 20
post-process: sentencepiece post-process: sentencepiece
# best-checkpoint-metric: loss # best-checkpoint-metric: loss
...@@ -18,8 +18,8 @@ best_checkpoint_metric: dec_wer ...@@ -18,8 +18,8 @@ best_checkpoint_metric: dec_wer
maximize_best_checkpoint_metric: False maximize_best_checkpoint_metric: False
validate-interval: 5 validate-interval: 5
# no-epoch-checkpoints: True no-epoch-checkpoints: True
keep-last-epochs: 10 # keep-last-epochs: 10
keep-best-checkpoints: 10 keep-best-checkpoints: 10
num-workers: 8 num-workers: 8
......
macaron-style: True macaron-style: True
use-cnn-module: True use-cnn-module: True
cnn-module-kernel: 31 cnn-module-kernel: 15
encoder-attention-type: rel_pos encoder-attention-type: rel_pos
encoder-activation-fn: swish encoder-activation-fn: swish
layer-padding-mask: True layer-padding-mask: True
\ No newline at end of file
...@@ -5,14 +5,17 @@ share-inter-ctc: True ...@@ -5,14 +5,17 @@ share-inter-ctc: True
ctc-pae: none ctc-pae: none
# ctc-pae: inter_league # ctc-pae: inter_league
# ctc-pae-ground-truth-ratio: 0.1
# pae-gumbel: True # pae-gumbel: True
# pae-distribution-hard: True # pae-distribution-hard: True
# pae-drop-prob: 0.0 # pae-drop-prob: 0.0
# pae-distribution-cutoff: 10 # pae-distribution-cutoff: 10
# share-pae-and-ctc: True
# pae-embed-norm: True # pae-embed-norm: True
# pae-out-norm: True # pae-out-norm: True
# ctc-self-distill-weight: 1 # ctc-self-distill-weight: 1
# target-ctc-self-distill-weight: 1 # target-ctc-self-distill-weight: 1
# ctc-self-distill-prob: 0.1 # ctc-self-distill-prob: 0.1
# cal-all-ctc: True # cal-all-ctc: True
\ No newline at end of file
...@@ -3,11 +3,15 @@ inter-mixup-layer: -1 ...@@ -3,11 +3,15 @@ inter-mixup-layer: -1
inter-mixup-decoder-layer: 0 inter-mixup-decoder-layer: 0
inter-mixup-prob: 1.0 inter-mixup-prob: 1.0
inter-mixup-ratio: 1.0 inter-mixup-ratio: 1.0
inter-mixup-beta: 0.5 inter-mixup-beta: 0.2
inter-mixup-keep-org: False inter-mixup-keep-org: False
inter-mixup-decoder-emb: False inter-mixup-decoder-emb: False
ctc-mixup-consistent-weight: 0 ctc-mixup-consistent-weight: 0
inter-ctc-mixup-consistent-weight: 0
mixup-consistent-weight: 0 mixup-consistent-weight: 0
cal-mixup-loss: True cal-mixup-loss: True
no-specaugment: False no-specaugment: False
layer-out-norm: False layer-out-norm: False
\ No newline at end of file
inter-mixup-ratio-decay: False
inter-mixup-ratio-decay-params: 20000,40000,0
\ No newline at end of file
arch: pdss2t_transformer_s_8 arch: pdss2t_transformer_s_8
pds-fusion: True
share-decoder-input-output-embed: True share-decoder-input-output-embed: True
optimizer: adam optimizer: adam
clip-norm: 10.0 clip-norm: 10.0
......
arch: pdss2t_transformer_s_8 arch: pdss2t_transformer_s_8
# pds-ctc: 1_1_0
encoder-embed-dim: 256 encoder-embed-dim: 256
pds-stages: 3 pds-stages: 3
pds-layers: 4_4_4 pds-layers: 4_4_4
......
...@@ -37,4 +37,4 @@ encoder-attention-heads: 8 ...@@ -37,4 +37,4 @@ encoder-attention-heads: 8
decoder-embed-dim: 512 decoder-embed-dim: 512
decoder-ffn-embed-dim: 2048 decoder-ffn-embed-dim: 2048
decoder-attention-heads: 8 decoder-attention-heads: 8
\ No newline at end of file
...@@ -2,7 +2,6 @@ arch: pdss2t_transformer_sd_32 ...@@ -2,7 +2,6 @@ arch: pdss2t_transformer_sd_32
encoder-embed-dim: 256 encoder-embed-dim: 256
pds-stages: 5 pds-stages: 5
# ctc-layer: 12
pds-layers: 3_3_4_4_4 pds-layers: 3_3_4_4_4
pds-ratios: 2_2_2_2_2 pds-ratios: 2_2_2_2_2
pds-fusion: False pds-fusion: False
......
encoder-attention-type: rel_selfattn encoder-attention-type: rel_pos
#encoder-attention-type: rel_pos_legacy
#encoder-attention-type: rel_selfattn
#encoder-attention-type: relative #encoder-attention-type: relative
#decoder-attention-type: relative
#max-encoder-relative-length: 100 #max-encoder-relative-length: 100
#max-decoder-relative-length: 20
...@@ -17,8 +17,8 @@ max_tokens=100000 ...@@ -17,8 +17,8 @@ max_tokens=100000
dec_model=checkpoint_best.pt dec_model=checkpoint_best.pt
cmd="./run.sh cmd="./run.sh
--stage 3 --stage 2
--stop_stage 3 --stop_stage 2
--gpu_num ${gpu_num} --gpu_num ${gpu_num}
--exp_name ${exp_name} --exp_name ${exp_name}
--n_average ${n_average} --n_average ${n_average}
......
...@@ -14,7 +14,7 @@ get_devices(){ ...@@ -14,7 +14,7 @@ get_devices(){
do do
line=$((dev + 2)) line=$((dev + 2))
use=$(head -n $line ${record} | tail -1 | cut -d '|' -f3 | cut -d '/' -f1) use=$(head -n $line ${record} | tail -1 | cut -d '|' -f3 | cut -d '/' -f1)
if [[ $use -lt 100 ]]; then if [[ $use -lt 1000 ]]; then
device[$count]=$dev device[$count]=$dev
count=$((count + 1)) count=$((count + 1))
if [[ $count -eq $gpu_num ]]; then if [[ $count -eq $gpu_num ]]; then
......
...@@ -2,8 +2,7 @@ ...@@ -2,8 +2,7 @@
# Processing LibriSpeech Datasets # Processing LibriSpeech Datasets
# Copyright 2021 Natural Language Processing Laboratory # Copyright 2021 Chen Xu (xuchennlp@outlook.com)
# Xu Chen (xuchenneu@163.com)
# Set bash to 'debug' mode, it will exit on : # Set bash to 'debug' mode, it will exit on :
# -e 'error', -u 'undefined variable', -o ... 'error in pipeline', -x 'print commands', # -e 'error', -u 'undefined variable', -o ... 'error in pipeline', -x 'print commands',
...@@ -16,27 +15,26 @@ eval=1 ...@@ -16,27 +15,26 @@ eval=1
time=$(date "+%m%d_%H%M") time=$(date "+%m%d_%H%M")
stage=1 stage=1
stop_stage=4 stop_stage=2
######## hardware ######## ######## Hardware ########
# devices # Devices
device=(0) device=(0)
gpu_num=8 gpu_num=8
update_freq=1 update_freq=1
max_tokens=100000
root_dir=/opt/tiger
# data_root_dir=/mnt/bd/data-model
data_root_dir=/mnt/bn/nas-xc-1
code_dir=${root_dir}/s2t
pwd_dir=$PWD pwd_dir=$PWD
root_dir=${ST_ROOT}
data_root_dir=${root_dir}
# dataset code_dir=${root_dir}/S2T
# Dataset
src_lang=en src_lang=en
lang=${src_lang} lang=${src_lang}
dataset=librispeech dataset=librispeech
data_tag=asr_mlo data_tag=asr
task=speech_to_text task=speech_to_text
vocab_type=unigram vocab_type=unigram
...@@ -60,22 +58,17 @@ test_subset=dev-clean,dev-other,test-clean,test-other,all ...@@ -60,22 +58,17 @@ test_subset=dev-clean,dev-other,test-clean,test-other,all
# exp # exp
sub_tag= sub_tag=
exp_prefix=$(date "+%m%d") exp_prefix=$(date "+%m%d")
# exp_subfix=${ARNOLD_JOB_ID}_${ARNOLD_TASK_ID}_${ARNOLD_TRIAL_ID}
extra_tag= extra_tag=
extra_parameter= extra_parameter=
exp_tag=baseline exp_tag=baseline
exp_name= exp_name=
# config # Training Settings
train_config=base train_config=base
data_config=config.yaml
# training setting
fp16=1 fp16=1
max_tokens=100000
step_valid=0 step_valid=0
# decoding setting # Decoding Settings
dec_model=checkpoint_best.pt dec_model=checkpoint_best.pt
cer=0 cer=0
ctc_infer=0 ctc_infer=0
...@@ -87,8 +80,12 @@ len_penalty=1.0 ...@@ -87,8 +80,12 @@ len_penalty=1.0
single=0 single=0
epoch_ensemble=0 epoch_ensemble=0
best_ensemble=1 best_ensemble=1
infer_parameters= infer_score=0
# infer_parameters="--cal-monotonic-cross-attn-weights --cal-localness --localness-window 0.1 --cal-topk-cross-attn-weights --topk-cross-attn-weights 15 --cal-entropy"
data_config=config.yaml
# Parsing Options
if [[ ${speed_perturb} -eq 1 ]]; then if [[ ${speed_perturb} -eq 1 ]]; then
data_dir=${data_dir}_sp data_dir=${data_dir}_sp
exp_prefix=${exp_prefix}_sp exp_prefix=${exp_prefix}_sp
...@@ -116,13 +113,6 @@ export NCCL_IB_HCA=$ARNOLD_RDMA_DEVICE:1 ...@@ -116,13 +113,6 @@ export NCCL_IB_HCA=$ARNOLD_RDMA_DEVICE:1
export NCCL_IB_GID_INDEX=3 export NCCL_IB_GID_INDEX=3
export NCCL_SOCKET_IFNAME=eth0 export NCCL_SOCKET_IFNAME=eth0
HOSTS=$ARNOLD_WORKER_HOSTS
HOST=(${HOSTS//,/ })
HOST_SPLIT=(${HOST//:/ })
PORT=${HOST_SPLIT[1]}
INIT_METHOD="tcp://${ARNOLD_WORKER_0_HOST}:${ARNOLD_WORKER_0_PORT}"
DIST_RANK=$((ARNOLD_ID * ARNOLD_WORKER_GPU))
export PATH=$PATH:${code_dir}/scripts export PATH=$PATH:${code_dir}/scripts
. ./local/parse_options.sh || exit 1; . ./local/parse_options.sh || exit 1;
...@@ -136,22 +126,27 @@ if [[ -z ${exp_name} ]]; then ...@@ -136,22 +126,27 @@ if [[ -z ${exp_name} ]]; then
exp_name=${exp_name}_${exp_subfix} exp_name=${exp_name}_${exp_subfix}
fi fi
fi fi
ckpt_dir=${code_dir}/checkpoints/
model_dir=${code_dir}/checkpoints/${data_model_subfix}/${sub_tag}/${exp_name}
echo "stage: $stage" ckpt_dir=${root_dir}/checkpoints/
echo "stop_stage: $stop_stage" model_dir=${root_dir}/checkpoints/${data_model_subfix}/${sub_tag}/${exp_name}
# Start
cd ${code_dir} cd ${code_dir}
echo "Start Stage: $stage"
echo "Stop Stage: $stop_stage"
if [[ `pip list | grep fairseq | wc -l` -eq 0 ]]; then
echo "Default Stage: env configure"
pip3 install -e ${code_dir}
fi
if [ ${stage} -le -1 ] && [ ${stop_stage} -ge -1 ]; then if [ ${stage} -le -1 ] && [ ${stop_stage} -ge -1 ]; then
echo "stage -1: Data Download" echo "Stage -1: Data Download"
# pass
fi fi
if [ ${stage} -le 0 ] && [ ${stop_stage} -ge 0 ]; then if [ ${stage} -le 0 ] && [ ${stop_stage} -ge 0 ]; then
### Task dependent. You have to make data the following preparation part by yourself. ### Task dependent. You have to make data the following preparation part by yourself.
### But you can utilize Kaldi recipes in most cases echo "Stage 0: Data Preparation"
echo "stage 0: Data Preparation"
if [[ ! -e ${data_dir} ]]; then if [[ ! -e ${data_dir} ]]; then
mkdir -p ${data_dir} mkdir -p ${data_dir}
...@@ -160,8 +155,6 @@ if [ ${stage} -le 0 ] && [ ${stop_stage} -ge 0 ]; then ...@@ -160,8 +155,6 @@ if [ ${stage} -le 0 ] && [ ${stop_stage} -ge 0 ]; then
cmd="python3 ${code_dir}/examples/speech_to_text/prep_librispeech_data.py cmd="python3 ${code_dir}/examples/speech_to_text/prep_librispeech_data.py
--data-root ${org_data_dir} --data-root ${org_data_dir}
--output-root ${data_dir} --output-root ${data_dir}
--cmvn-type global
--gcmvn-max-num 300000
--vocab-type ${vocab_type} --vocab-type ${vocab_type}
--vocab-size ${vocab_size}" --vocab-size ${vocab_size}"
...@@ -178,26 +171,18 @@ if [ ${stage} -le 0 ] && [ ${stop_stage} -ge 0 ]; then ...@@ -178,26 +171,18 @@ if [ ${stage} -le 0 ] && [ ${stop_stage} -ge 0 ]; then
[[ $eval -eq 1 ]] && eval ${cmd} [[ $eval -eq 1 ]] && eval ${cmd}
fi fi
echo "stage 1: env configure" if [ ${stage} -le 1 ] && [ ${stop_stage} -ge 1 ]; then
if [[ `pip list | grep fairseq | wc -l` -eq 0 ]]; then echo "Stage 1: Network Training"
pip3 install -e ${code_dir} -i https://bytedpypi.byted.org/simple --no-build-isolation --default-timeout=10000
fi
if [[ -d /mnt/bn/nas-xc-1/checkpoints && ! -d ${code_dir}/checkpoints ]]; then
ln -s /mnt/bn/nas-xc-1/checkpoints ${code_dir}
fi
if [ ${stage} -le 2 ] && [ ${stop_stage} -ge 2 ]; then
echo "stage 2: ASR Network Training"
[[ ! -d ${data_dir} ]] && echo "The data dir ${data_dir} is not existing!" && exit 1; [[ ! -d ${data_dir} ]] && echo "The data dir ${data_dir} is not existing!" && exit 1;
if [[ -z ${device} || ${#device[@]} -eq 0 ]]; then if [[ -z ${device} || ${#device[@]} -eq 0 ]]; then
if [[ ${gpu_num} -eq 0 ]]; then if [[ ${gpu_num} -eq 0 ]]; then
device="" device=""
else else
source ./local/utils.sh source ./local/utils.sh
device=$(get_devices $gpu_num 0) device=$(get_devices $gpu_num 0)
fi fi
export CUDA_VISIBLE_DEVICES=${device}
fi fi
echo -e "data=${data_dir} model=${model_dir}" echo -e "data=${data_dir} model=${model_dir}"
...@@ -241,17 +226,15 @@ if [ ${stage} -le 2 ] && [ ${stop_stage} -ge 2 ]; then ...@@ -241,17 +226,15 @@ if [ ${stage} -le 2 ] && [ ${stop_stage} -ge 2 ]; then
--save-dir ${model_dir} --save-dir ${model_dir}
--tensorboard-logdir ${model_dir}" --tensorboard-logdir ${model_dir}"
if [[ -n ${extra_parameter} ]]; then if [[ -n ${extra_parameter} ]]; then
cmd="${cmd} cmd="${cmd}
${extra_parameter}" ${extra_parameter}"
fi fi
if [[ ${gpu_num} -gt 0 ]]; then if [[ ${gpu_num} -gt 0 ]]; then
cmd="${cmd} cmd="${cmd}
--distributed-world-size $gpu_num --distributed-world-size $gpu_num
--ddp-backend no_c10d" --ddp-backend no_c10d"
# --distributed-init-method ${INIT_METHOD} fi
# --distributed-rank ${DIST_RANK}"
fi
if [[ $fp16 -eq 1 ]]; then if [[ $fp16 -eq 1 ]]; then
cmd="${cmd} cmd="${cmd}
--fp16" --fp16"
...@@ -291,29 +274,24 @@ if [ ${stage} -le 2 ] && [ ${stop_stage} -ge 2 ]; then ...@@ -291,29 +274,24 @@ if [ ${stage} -le 2 ] && [ ${stop_stage} -ge 2 ]; then
echo "${time} | ${data_dir} | ${exp_name} | ${model_dir} " >> $log echo "${time} | ${data_dir} | ${exp_name} | ${model_dir} " >> $log
tail -n 50 ${log} > tmp.log tail -n 50 ${log} > tmp.log
mv tmp.log $log mv tmp.log $log
# export CUDA_VISIBLE_DEVICES=${device}
log=${model_dir}/train.log log=${model_dir}/train.log
cmd="${cmd} 2>&1 | tee -a ${log}" cmd="${cmd} 2>&1 | tee -a ${log}"
#cmd="nohup ${cmd} >> ${log} 2>&1 &" #cmd="nohup ${cmd} >> ${log} 2>&1 &"
if [[ $eval -eq 1 ]]; then if [[ $eval -eq 1 ]]; then
# tensorboard # tensorboard
if [[ -z ${ARNOLD_TENSORBOARD_CURRENT_PORT} ]]; then port=6666
port=6666
else
port=${ARNOLD_TENSORBOARD_CURRENT_PORT}
fi
tensorboard --logdir ${model_dir} --port ${port} --bind_all & tensorboard --logdir ${model_dir} --port ${port} --bind_all &
echo "${cmd}" > ${model_dir}/cmd echo "${cmd}" > ${model_dir}/cmd
eval $cmd eval $cmd
#sleep 2s #sleep 2s
#tail -n "$(wc -l ${log} | awk '{print $1+1}')" -f ${log} #tail -n "$(wc -l ${log} | awk '{print $1+1}')" -f ${log}
fi fi
fi fi
if [ ${stage} -le 3 ] && [ ${stop_stage} -ge 3 ]; then if [ ${stage} -le 2 ] && [ ${stop_stage} -ge 2 ]; then
echo "stage 3: ASR Decoding" echo "Stage 2: Decoding"
dec_models= dec_models=
if [[ ${single} -eq 1 ]]; then if [[ ${single} -eq 1 ]]; then
dec_models=${dec_model} dec_models=${dec_model}
...@@ -346,17 +324,17 @@ if [ ${stage} -le 3 ] && [ ${stop_stage} -ge 3 ]; then ...@@ -346,17 +324,17 @@ if [ ${stage} -le 3 ] && [ ${stop_stage} -ge 3 ]; then
fi fi
dec_models+=(${avg_model}) dec_models+=(${avg_model})
fi fi
fi fi
if [[ -z ${device} || ${#device[@]} -eq 0 ]]; then if [[ -z ${device} || ${#device[@]} -eq 0 ]]; then
if [[ ${gpu_num} -eq 0 ]]; then if [[ ${gpu_num} -eq 0 ]]; then
device="" device=""
else else
source ./local/utils.sh source ./local/utils.sh
device=$(get_devices $gpu_num 0) device=$(get_devices $gpu_num 0)
fi fi
export CUDA_VISIBLE_DEVICES=${device}
fi fi
# export CUDA_VISIBLE_DEVICES=${device}
for dec_model in ${dec_models[@]}; do for dec_model in ${dec_models[@]}; do
suffix=beam${beam_size}_alpha${len_penalty}_tokens${max_tokens} suffix=beam${beam_size}_alpha${len_penalty}_tokens${max_tokens}
...@@ -419,13 +397,13 @@ if [ ${stage} -le 3 ] && [ ${stop_stage} -ge 3 ]; then ...@@ -419,13 +397,13 @@ if [ ${stage} -le 3 ] && [ ${stop_stage} -ge 3 ]; then
echo -e "\033[34mRun command: \n${cmd} \033[0m" echo -e "\033[34mRun command: \n${cmd} \033[0m"
cd ${code_dir}
if [[ $eval -eq 1 ]]; then if [[ $eval -eq 1 ]]; then
src_ctc_file=translation-${subset}.txt.ctc ctc_file=translation-${subset}.ctc
if [[ -f ${model_dir}/${src_ctc_file} ]]; then if [[ -f ${model_dir}/${ctc_file} ]]; then
rm ${model_dir}/${src_ctc_file} rm ${model_dir}/${ctc_file}
fi fi
cd ${code_dir}
eval $cmd eval $cmd
echo "" >> ${result_file} echo "" >> ${result_file}
tail -n 2 ${model_dir}/generate-${subset}.txt >> ${result_file} tail -n 2 ${model_dir}/generate-${subset}.txt >> ${result_file}
...@@ -441,27 +419,27 @@ if [ ${stage} -le 3 ] && [ ${stop_stage} -ge 3 ]; then ...@@ -441,27 +419,27 @@ if [ ${stage} -le 3 ] && [ ${stop_stage} -ge 3 ]; then
fi fi
trans_file=translation-${subset}-${suffix}.txt trans_file=translation-${subset}-${suffix}.txt
if [[ ${ctc_infer} -eq 1 && -f ${model_dir}/${src_ctc_file} ]]; then if [[ ${ctc_infer} -eq 1 && -f ${model_dir}/${ctc_file} ]]; then
ref_file=${model_dir}/${subset}.${src_lang} ref_file=${model_dir}/${subset}.${src_lang}
if [[ ! -f ${ref_file} ]]; then if [[ ! -f ${ref_file} ]]; then
python3 ./local/extract_txt_from_tsv.py ${data_dir}/${subset}.tsv ${ref_file} "src_text" python3 ./local/extract_txt_from_tsv.py ${data_dir}/${subset}.tsv ${ref_file} "src_text"
fi fi
if [[ -f ${ref_file} ]]; then if [[ -f ${ref_file} ]]; then
src_ctc=$(mktemp -t temp.record.XXXXXX) ctc=$(mktemp -t temp.record.XXXXXX)
cd ./local cd ./local
./cal_wer.sh ${model_dir} ${subset} ${trans_file} ${src_ctc_file} ${ref_file} > ${src_ctc} ./cal_wer.sh ${model_dir} ${subset} ${trans_file} ${ctc_file} ${ref_file} > ${ctc}
cd .. cd ..
echo "CTC WER" >> ${result_file} echo "CTC WER" >> ${result_file}
tail -n 2 ${src_ctc} >> ${result_file} tail -n 2 ${ctc} >> ${result_file}
src_bleu=$(mktemp -t temp.record.XXXXXX) src_bleu=$(mktemp -t temp.record.XXXXXX)
cd local cd local
./cal_ctc_bleu.sh ${model_dir} ${subset} ${trans_file} ${src_ctc_file} ${ref_file} ${tokenizer} ${src_lang} > ${src_bleu} ./cal_ctc_bleu.sh ${model_dir} ${subset} ${trans_file} ${ctc_file} ${ref_file} ${tokenizer} ${src_lang} > ${src_bleu}
cd .. cd ..
cat ${src_bleu} >> ${result_file} cat ${src_bleu} >> ${result_file}
rm ${src_ctc} ${src_bleu} rm ${ctc} ${src_bleu}
else else
echo "No reference for source language." echo "No reference for source language."
fi fi
......
...@@ -59,7 +59,7 @@ train_config=$(echo ${config_list[*]} | sed 's/ /,/g') ...@@ -59,7 +59,7 @@ train_config=$(echo ${config_list[*]} | sed 's/ /,/g')
cmd="./run.sh cmd="./run.sh
--stage 1 --stage 1
--stop_stage 4 --stop_stage 2
--gpu_num ${gpu_num} --gpu_num ${gpu_num}
--update_freq ${update_freq} --update_freq ${update_freq}
--train_config ${train_config} --train_config ${train_config}
......
...@@ -85,8 +85,8 @@ dec_model=checkpoint_best.pt ...@@ -85,8 +85,8 @@ dec_model=checkpoint_best.pt
n_average=10 n_average=10
beam_size=5 beam_size=5
len_penalty=1.0 len_penalty=1.0
infer_score=1 infer_score=0
infer_parameters="--cal-monotonic-cross-attn-weights --cal-localness --localness-window 0.1 --cal-topk-cross-attn-weights --topk-cross-attn-weights 15 --cal-entropy" # infer_parameters="--cal-monotonic-cross-attn-weights --cal-localness --localness-window 0.1 --cal-topk-cross-attn-weights --topk-cross-attn-weights 15 --cal-entropy"
# Parsing Options # Parsing Options
if [[ ${speed_perturb} -eq 1 ]]; then if [[ ${speed_perturb} -eq 1 ]]; then
......
...@@ -33,7 +33,7 @@ train_config=$(echo ${config_list[*]} | sed 's/ /,/g') ...@@ -33,7 +33,7 @@ train_config=$(echo ${config_list[*]} | sed 's/ /,/g')
cmd="./run.sh cmd="./run.sh
--stage 1 --stage 1
--stop_stage 4 --stop_stage 2
--gpu_num ${gpu_num} --gpu_num ${gpu_num}
--update_freq ${update_freq} --update_freq ${update_freq}
--train_config ${train_config} --train_config ${train_config}
......
...@@ -84,8 +84,8 @@ dec_model=checkpoint_best.pt ...@@ -84,8 +84,8 @@ dec_model=checkpoint_best.pt
n_average=10 n_average=10
beam_size=5 beam_size=5
len_penalty=1.0 len_penalty=1.0
infer_score=1 infer_score=0
infer_parameters="--cal-monotonic-cross-attn-weights --cal-localness --localness-window 0.1 --cal-topk-cross-attn-weights --topk-cross-attn-weights 15 --cal-entropy" # infer_parameters="--cal-monotonic-cross-attn-weights --cal-localness --localness-window 0.1 --cal-topk-cross-attn-weights --topk-cross-attn-weights 15 --cal-entropy"
# Parsing Options # Parsing Options
. ./local/parse_options.sh || exit 1; . ./local/parse_options.sh || exit 1;
......
arch: s2t_transformer_s arch: s2t_transformer_m
share-decoder-input-output-embed: True share-decoder-input-output-embed: True
optimizer: adam optimizer: adam
clip-norm: 10.0 clip-norm: 10.0
......
ctc-weight: 0.3 ctc-weight: 0.3
share-ctc-and-embed: True share-ctc-and-embed: True
share-inter-ctc: True \ No newline at end of file
# inter-ctc-weight: 0.2
# inter-ctc-layers: 6,9
ctc-pae: none
\ No newline at end of file
inter-mixup: True inter-mixup: True
inter-mixup-layer: -1 inter-mixup-layer: -1
inter-mixup-decoder-layer: 0 inter-mixup-decoder-layer: 0
inter-mixup-prob: 1.0 inter-mixup-prob: 1.0
inter-mixup-ratio: 1.0 inter-mixup-ratio: 1.0
inter-mixup-beta: 0.2 inter-mixup-beta: 0.2
inter-mixup-keep-org: False inter-mixup-keep-org: False
inter-mixup-decoder-emb: False inter-mixup-decoder-emb: False
ctc-mixup-consistent-weight: 0 ctc-mixup-consistent-weight: 0
inter-ctc-mixup-consistent-weight: 0 inter-ctc-mixup-consistent-weight: 0
mixup-consistent-weight: 0 mixup-consistent-weight: 0
cal-mixup-loss: True cal-mixup-loss: True
no-specaugment: False no-specaugment: False
layer-out-norm: False layer-out-norm: False
......
arch: pdss2t_transformer_s_32 arch: pdss2t_transformer_s_32
encoder-embed-dim: 256 encoder-embed-dim: 256
pds-stages: 5 pds-stages: 5
pds-layers: 2_2_3_3_2 pds-layers: 2_2_3_3_2
......
...@@ -8,7 +8,7 @@ pds-fusion: False ...@@ -8,7 +8,7 @@ pds-fusion: False
pds-fusion-method: all_conv2 pds-fusion-method: all_conv2
pds-fusion-layers: 0_1_1_1 pds-fusion-layers: 0_1_1_1
pds-fusion-weight: 0.2_0.3_0.5 pds-fusion-weight: 0.2_0.3_0.5
pds-embed-dims: 256_256_256_256 pds-embed-dims: 256_256_256
pds-ds-method: conv pds-ds-method: conv
pds-embed-norm: True pds-embed-norm: True
pds-position-embed: 1_1_1 pds-position-embed: 1_1_1
......
arch: pdss2t_transformer_s_16 arch: pdss2t_transformer_m_16
encoder-embed-dim: 512 encoder-embed-dim: 512
pds-stages: 4 pds-stages: 4
...@@ -13,7 +13,7 @@ pds-ds-method: conv ...@@ -13,7 +13,7 @@ pds-ds-method: conv
pds-embed-norm: True pds-embed-norm: True
pds-position-embed: 1_1_1_1 pds-position-embed: 1_1_1_1
pds-kernel-sizes: 5_5_5_5 pds-kernel-sizes: 5_5_5_5
pds-ffn-ratios: 8_8_8_8 pds-ffn-ratios: 4_4_4_4
pds-attn-heads: 8_8_8_8 pds-attn-heads: 8_8_8_8
share-decoder-input-output-embed: True share-decoder-input-output-embed: True
...@@ -33,11 +33,11 @@ activation-fn: relu ...@@ -33,11 +33,11 @@ activation-fn: relu
encoder-ffn-embed-dim: 2048 encoder-ffn-embed-dim: 2048
encoder-layers: 12 encoder-layers: 12
decoder-layers: 6 decoder-layers: 6
encoder-attention-heads: 4 encoder-attention-heads: 8
decoder-embed-dim: 512 decoder-embed-dim: 512
decoder-ffn-embed-dim: 2048 decoder-ffn-embed-dim: 2048
decoder-attention-heads: 4 decoder-attention-heads: 8
#load-pretrained-encoder-from: #load-pretrained-encoder-from:
#load-pretrained-decoder-from: #load-pretrained-decoder-from:
\ No newline at end of file
arch: pdss2t_transformer_s_16 arch: pdss2t_transformer_m_32
encoder-embed-dim: 512 encoder-embed-dim: 512
pds-stages: 5 pds-stages: 5
...@@ -13,7 +13,7 @@ pds-ds-method: conv ...@@ -13,7 +13,7 @@ pds-ds-method: conv
pds-embed-norm: True pds-embed-norm: True
pds-position-embed: 1_1_1_1_1 pds-position-embed: 1_1_1_1_1
pds-kernel-sizes: 5_5_5_5_5 pds-kernel-sizes: 5_5_5_5_5
pds-ffn-ratios: 8_8_8_8_8 pds-ffn-ratios: 4_4_4_4_4
pds-attn-heads: 8_8_8_8_8 pds-attn-heads: 8_8_8_8_8
share-decoder-input-output-embed: True share-decoder-input-output-embed: True
...@@ -33,11 +33,8 @@ activation-fn: relu ...@@ -33,11 +33,8 @@ activation-fn: relu
encoder-ffn-embed-dim: 2048 encoder-ffn-embed-dim: 2048
encoder-layers: 12 encoder-layers: 12
decoder-layers: 6 decoder-layers: 6
encoder-attention-heads: 4 encoder-attention-heads: 8
decoder-embed-dim: 512 decoder-embed-dim: 512
decoder-ffn-embed-dim: 2048 decoder-ffn-embed-dim: 2048
decoder-attention-heads: 4 decoder-attention-heads: 8
\ No newline at end of file
#load-pretrained-encoder-from:
#load-pretrained-decoder-from:
\ No newline at end of file
arch: pdss2t_transformer_s_8 arch: pdss2t_transformer_m_8
encoder-embed-dim: 512 encoder-embed-dim: 512
pds-stages: 4 pds-stages: 4
...@@ -13,7 +13,7 @@ pds-ds-method: conv ...@@ -13,7 +13,7 @@ pds-ds-method: conv
pds-embed-norm: True pds-embed-norm: True
pds-position-embed: 1_1_1_1 pds-position-embed: 1_1_1_1
pds-kernel-sizes: 5_5_5_5 pds-kernel-sizes: 5_5_5_5
pds-ffn-ratios: 8_8_8_8 pds-ffn-ratios: 4_4_4_4
pds-attn-heads: 8_8_8_8 pds-attn-heads: 8_8_8_8
share-decoder-input-output-embed: True share-decoder-input-output-embed: True
...@@ -33,11 +33,11 @@ activation-fn: relu ...@@ -33,11 +33,11 @@ activation-fn: relu
encoder-ffn-embed-dim: 2048 encoder-ffn-embed-dim: 2048
encoder-layers: 12 encoder-layers: 12
decoder-layers: 6 decoder-layers: 6
encoder-attention-heads: 4 encoder-attention-heads: 8
decoder-embed-dim: 512 decoder-embed-dim: 512
decoder-ffn-embed-dim: 2048 decoder-ffn-embed-dim: 2048
decoder-attention-heads: 4 decoder-attention-heads: 8
#load-pretrained-encoder-from: #load-pretrained-encoder-from:
#load-pretrained-decoder-from: #load-pretrained-decoder-from:
\ No newline at end of file
arch: pdss2t_transformer_s_16 arch: pdss2t_transformer_sd_16
encoder-embed-dim: 256 encoder-embed-dim: 256
pds-stages: 4 pds-stages: 4
...@@ -31,7 +31,7 @@ label_smoothing: 0.1 ...@@ -31,7 +31,7 @@ label_smoothing: 0.1
dropout: 0.1 dropout: 0.1
activation-fn: relu activation-fn: relu
encoder-ffn-embed-dim: 2048 encoder-ffn-embed-dim: 2048
encoder-layers: 12 encoder-layers: 18
decoder-layers: 6 decoder-layers: 6
encoder-attention-heads: 4 encoder-attention-heads: 4
......
arch: pdss2t_transformer_s_32 arch: pdss2t_transformer_sd_32
encoder-embed-dim: 256 encoder-embed-dim: 256
pds-stages: 5 pds-stages: 5
......
arch: pdss2t_transformer_s_8 arch: pdss2t_transformer_sd_8
encoder-embed-dim: 256 encoder-embed-dim: 256
pds-stages: 4 pds-stages: 4
...@@ -38,6 +38,3 @@ encoder-attention-heads: 4 ...@@ -38,6 +38,3 @@ encoder-attention-heads: 4
decoder-embed-dim: 256 decoder-embed-dim: 256
decoder-ffn-embed-dim: 2048 decoder-ffn-embed-dim: 2048
decoder-attention-heads: 4 decoder-attention-heads: 4
#load-pretrained-encoder-from:
#load-pretrained-decoder-from:
\ No newline at end of file
encoder-attention-type: rel_pos encoder-attention-type: rel_pos
#encoder-attention-type: rel_pos_legacy #encoder-attention-type: rel_pos_legacy
#encoder-attention-type: rel_selfattn #encoder-attention-type: rel_selfattn
#encoder-attention-type: relative #encoder-attention-type: relative
......
...@@ -3,7 +3,7 @@ ...@@ -3,7 +3,7 @@
gpu_num=1 gpu_num=1
data_dir= data_dir=
test_subset=(tst-COMMON) test_subset=(dev tst-COMMON)
exp_name= exp_name=
if [ "$#" -eq 1 ]; then if [ "$#" -eq 1 ]; then
......
...@@ -85,9 +85,8 @@ ctc_infer=0 ...@@ -85,9 +85,8 @@ ctc_infer=0
n_average=10 n_average=10
beam_size=5 beam_size=5
len_penalty=1.0 len_penalty=1.0
infer_score=1 infer_score=0
infer_parameters="--cal-monotonic-cross-attn-weights --cal-localness --localness-window 0.1 --cal-topk-cross-attn-weights --topk-cross-attn-weights 15 --cal-entropy" # infer_parameters="--cal-monotonic-cross-attn-weights --cal-localness --localness-window 0.1 --cal-topk-cross-attn-weights --topk-cross-attn-weights 15 --cal-entropy"
# Parsing Options # Parsing Options
if [[ ${share_dict} -eq 1 ]]; then if [[ ${share_dict} -eq 1 ]]; then
...@@ -428,7 +427,6 @@ if [ ${stage} -le 2 ] && [ ${stop_stage} -ge 2 ]; then ...@@ -428,7 +427,6 @@ if [ ${stage} -le 2 ] && [ ${stop_stage} -ge 2 ]; then
--path ${model_dir}/${dec_model} --path ${model_dir}/${dec_model}
--results-path ${model_dir} --results-path ${model_dir}
--max-tokens ${max_tokens} --max-tokens ${max_tokens}
--batch-size 1
--beam ${beam_size} --beam ${beam_size}
--skip-invalid-size-inputs-valid-test --skip-invalid-size-inputs-valid-test
--lenpen ${len_penalty}" --lenpen ${len_penalty}"
......
#!/usr/bin/env bash #!/usr/bin/env bash
# training the model
gpu_num=8 gpu_num=8
update_freq=1 update_freq=1
max_tokens=40000 max_tokens=40000
...@@ -37,7 +39,7 @@ train_config=$(echo ${config_list[*]} | sed 's/ /,/g') ...@@ -37,7 +39,7 @@ train_config=$(echo ${config_list[*]} | sed 's/ /,/g')
cmd="./run.sh cmd="./run.sh
--stage 1 --stage 1
--stop_stage 4 --stop_stage 2
--gpu_num ${gpu_num} --gpu_num ${gpu_num}
--update_freq ${update_freq} --update_freq ${update_freq}
--train_config ${train_config} --train_config ${train_config}
......
Markdown 格式
0%
您添加了 0 到此讨论。请谨慎行事。
请先完成此评论的编辑!
注册 或者 后发表评论