Commit da4e7dc3 by xuchen

fix the bugs

parent 093098e4
train-subset: train_st
valid-subset: dev_st
max-epoch: 100
max-update: 100000
num-workers: 8
patience: 10
no-progress-bar: True
log-interval: 100
seed: 1
report-accuracy: True
#load-pretrained-encoder-from:
#load-pretrained-decoder-from:
arch: s2t_transformer_s arch: s2t_transformer_s
share-decoder-input-output-embed: True share-decoder-input-output-embed: True
optimizer: adam optimizer: adam
......
train-subset: train
valid-subset: dev
max-epoch: 100
max-update: 100000
patience: 20
best_checkpoint_metric: loss
maximize_best_checkpoint_metric: False
no-epoch-checkpoints: True
#keep-last-epochs: 10
keep-best-checkpoints: 10
num-workers: 8
no-progress-bar: True
log-interval: 100
seed: 1
report-accuracy: True
skip-invalid-size-inputs-valid-test: True
\ No newline at end of file
ctc-weight: 0.3 ctc-weight: 0.3
post-process: sentencepiece
\ No newline at end of file
...@@ -15,6 +15,7 @@ report-accuracy: True ...@@ -15,6 +15,7 @@ report-accuracy: True
#load-pretrained-decoder-from: #load-pretrained-decoder-from:
arch: s2t_transformer_s arch: s2t_transformer_s
#arch: pdss2t_transformer_s
share-decoder-input-output-embed: True share-decoder-input-output-embed: True
optimizer: adam optimizer: adam
clip-norm: 10.0 clip-norm: 10.0
......
arch: pdss2t_transformer_s_8 arch: pdss2t_transformer_s_8
pds-fusion: True
train-subset: train_asr ctc-layer: 12
valid-subset: dev_asr
max-epoch: 100
max-update: 100000
num-workers: 8
patience: 10
no-progress-bar: True
log-interval: 100
seed: 1
report-accuracy: True
#load-pretrained-encoder-from:
#load-pretrained-decoder-from:
share-decoder-input-output-embed: True share-decoder-input-output-embed: True
optimizer: adam optimizer: adam
......
arch: pdss2t_transformer_s_8 arch: pdss2t_transformer_s_16
train-subset: train_st encoder-embed-dim: 256
valid-subset: dev_st pds-stages: 4
ctc-layer: 12
max-epoch: 100 pds-layers: 2_2_6_2
max-update: 100000 pds-ratios: 2_2_2_2
pds-fusion: True
num-workers: 8 pds-fusion-method: all_conv
patience: 10 pds-embed-dims: 256_256_256_256
no-progress-bar: True pds-ds-method: conv
log-interval: 100 pds-embed-norm: True
seed: 1 pds-position-embed: 1_1_1_1
report-accuracy: True pds-kernel-sizes: 5_5_5_5
pds-ffn-ratios: 8_8_8_8
#load-pretrained-encoder-from: pds-attn-heads: 4_4_4_4
#load-pretrained-decoder-from:
share-decoder-input-output-embed: True share-decoder-input-output-embed: True
optimizer: adam optimizer: adam
......
arch: pdss2t_transformer_s_8 arch: pdss2t_transformer_s_32
train-subset: train_asr encoder-embed-dim: 256
valid-subset: dev_asr pds-stages: 5
ctc-layer: 12
max-epoch: 100 pds-layers: 2_2_3_3_2
max-update: 100000 pds-ratios: 2_2_2_2_2
pds-fusion: True
num-workers: 8 pds-fusion-method: all_conv
patience: 10 pds-embed-dims: 256_256_256_256_256
no-progress-bar: True pds-ds-method: conv
log-interval: 100 pds-embed-norm: True
seed: 1 pds-position-embed: 1_1_1_1_1
report-accuracy: True pds-kernel-sizes: 5_5_5_5_5
pds-ffn-ratios: 8_8_8_8_8
pds-attn-heads: 4_4_4_4_4
#load-pretrained-encoder-from:
#load-pretrained-decoder-from:
share-decoder-input-output-embed: True share-decoder-input-output-embed: True
optimizer: adam optimizer: adam
......
arch: pdss2t_transformer_s_8 arch: pdss2t_transformer_s_8
train-subset: train_st encoder-embed-dim: 256
valid-subset: dev_st pds-stages: 4
ctc-layer: 12
max-epoch: 100 pds-layers: 3_3_3_3
max-update: 100000 pds-ratios: 2_2_1_2
pds-fusion: True
num-workers: 8 pds-fusion-method: all_conv
patience: 10 pds-embed-dims: 256_256_256_256
no-progress-bar: True pds-ds-method: conv
log-interval: 100 pds-embed-norm: True
seed: 1 pds-position-embed: 1_1_1_1
report-accuracy: True pds-kernel-sizes: 5_5_5_5
pds-ffn-ratios: 8_8_8_8
#load-pretrained-encoder-from: pds-attn-heads: 4_4_4_4
#load-pretrained-decoder-from:
share-decoder-input-output-embed: True share-decoder-input-output-embed: True
optimizer: adam optimizer: adam
......
...@@ -10,6 +10,7 @@ if [ "$#" -eq 1 ]; then ...@@ -10,6 +10,7 @@ if [ "$#" -eq 1 ]; then
exp_name=$1 exp_name=$1
fi fi
cer=1
n_average=10 n_average=10
beam_size=5 beam_size=5
len_penalty=1.0 len_penalty=1.0
...@@ -21,6 +22,7 @@ cmd="./run.sh ...@@ -21,6 +22,7 @@ cmd="./run.sh
--stop_stage 2 --stop_stage 2
--gpu_num ${gpu_num} --gpu_num ${gpu_num}
--exp_name ${exp_name} --exp_name ${exp_name}
--cer ${cer}
--n_average ${n_average} --n_average ${n_average}
--beam_size ${beam_size} --beam_size ${beam_size}
--len_penalty ${len_penalty} --len_penalty ${len_penalty}
......
...@@ -24,31 +24,31 @@ stop_stage=0 ...@@ -24,31 +24,31 @@ stop_stage=0
gpu_num=0 gpu_num=0
update_freq=1 update_freq=1
s2t_dir=~/Code/st root_dir=~/st
root_dir=${s2t_dir}/Fairseq-S2T code_dir=${root_dir}/Fairseq-S2T
pwd_dir=$PWD pwd_dir=$PWD
# dataset # dataset
src_lang=en src_lang=zh
tgt_lang=fr lang=${src_lang}
lang=${src_lang}-${tgt_lang}
dataset=libri_trans dataset=aishell
task=speech_to_text task=speech_to_text
vocab_type=unigram vocab_type=unigram
vocab_size=1000 vocab_type=char
speed_perturb=0 vocab_size=5000
lcrm=1 speed_perturb=1
lcrm=0
tokenizer=0 tokenizer=0
use_raw_audio=1 use_raw_audio=0
use_specific_dict=0 use_specific_dict=0
specific_prefix=st specific_prefix=st
specific_dir=${s2t_dir}/data/mustc/st/en-de specific_dir=${root_dir}/data/mustc/st/en-de
asr_vocab_prefix=spm_unigram10000_st_share asr_vocab_prefix=spm_unigram10000_st_share
org_data_dir=${s2t_dir}/data/${dataset} org_data_dir=${root_dir}/data/${dataset}
data_dir=${s2t_dir}/data/${dataset}/asr data_dir=${root_dir}/data/${dataset}/asr
train_split=train train_split=train
valid_split=dev valid_split=dev
test_split=test test_split=test
...@@ -71,6 +71,7 @@ max_tokens=40000 ...@@ -71,6 +71,7 @@ max_tokens=40000
step_valid=0 step_valid=0
# decoding setting # decoding setting
cer=1
dec_model=checkpoint_best.pt dec_model=checkpoint_best.pt
n_average=10 n_average=10
beam_size=5 beam_size=5
...@@ -96,6 +97,10 @@ if [[ ${use_raw_audio} -eq 1 ]]; then ...@@ -96,6 +97,10 @@ if [[ ${use_raw_audio} -eq 1 ]]; then
data_dir=${data_dir}_raw data_dir=${data_dir}_raw
exp_prefix=${exp_prefix}_raw exp_prefix=${exp_prefix}_raw
fi fi
if [[ "${vocab_type}" == "char" ]]; then
data_dir=${data_dir}_char
exp_prefix=${exp_prefix}_char
fi
. ./local/parse_options.sh || exit 1; . ./local/parse_options.sh || exit 1;
...@@ -106,7 +111,7 @@ if [[ -z ${exp_name} ]]; then ...@@ -106,7 +111,7 @@ if [[ -z ${exp_name} ]]; then
exp_name=${exp_name}_${extra_tag} exp_name=${exp_name}_${extra_tag}
fi fi
fi fi
model_dir=$root_dir/../checkpoints/$dataset/asr/${exp_name} model_dir=$code_dir/../checkpoints/$dataset/asr/${exp_name}
if [ ${stage} -le -1 ] && [ ${stop_stage} -ge -1 ]; then if [ ${stage} -le -1 ] && [ ${stop_stage} -ge -1 ]; then
echo "stage -1: Data Download" echo "stage -1: Data Download"
...@@ -120,13 +125,19 @@ if [ ${stage} -le 0 ] && [ ${stop_stage} -ge 0 ]; then ...@@ -120,13 +125,19 @@ if [ ${stage} -le 0 ] && [ ${stop_stage} -ge 0 ]; then
if [[ ! -e ${data_dir} ]]; then if [[ ! -e ${data_dir} ]]; then
mkdir -p ${data_dir} mkdir -p ${data_dir}
fi fi
if [[ ! -f ${data_dir}/fbank80.zip && -f ${data_dir}/../fbank80.zip ]]; then
ln -s ${data_dir}/../fbank80.zip ${data_dir}
fi
if [[ ! -f ${data_dir}/fbank80_sp.zip && -f ${data_dir}/../fbank80_sp.zip ]]; then
ln -s ${data_dir}/../fbank80_sp.zip ${data_dir}
fi
cmd="python ${root_dir}/examples/speech_to_text/prep_audio_data.py cmd="python ${code_dir}/examples/speech_to_text/prep_audio_data.py
--data-root ${org_data_dir} --data-root ${org_data_dir}
--output-root ${data_dir} --output-root ${data_dir}
--task asr --task asr
--splits ${train_split},${valid_split},${test_split}
--src-lang ${src_lang} --src-lang ${src_lang}
--splits ${valid_split},${test_split},${train_split}
--vocab-type ${vocab_type} --vocab-type ${vocab_type}
--vocab-size ${vocab_size}" --vocab-size ${vocab_size}"
...@@ -135,7 +146,7 @@ if [ ${stage} -le 0 ] && [ ${stop_stage} -ge 0 ]; then ...@@ -135,7 +146,7 @@ if [ ${stage} -le 0 ] && [ ${stop_stage} -ge 0 ]; then
--raw" --raw"
fi fi
if [[ ${use_specific_dict} -eq 1 ]]; then if [[ ${use_specific_dict} -eq 1 ]]; then
cp -r ${specific_dir}/${asr_vocab_prefix}.* ${data_dir}/${lang} cp -r ${specific_dir}/${asr_vocab_prefix}.* ${data_dir}
cmd="$cmd cmd="$cmd
--asr-prefix ${asr_vocab_prefix}" --asr-prefix ${asr_vocab_prefix}"
fi fi
...@@ -155,6 +166,15 @@ if [ ${stage} -le 0 ] && [ ${stop_stage} -ge 0 ]; then ...@@ -155,6 +166,15 @@ if [ ${stage} -le 0 ] && [ ${stop_stage} -ge 0 ]; then
echo -e "\033[34mRun command: \n${cmd} \033[0m" echo -e "\033[34mRun command: \n${cmd} \033[0m"
[[ $eval -eq 1 ]] && eval ${cmd} [[ $eval -eq 1 ]] && eval ${cmd}
if [[ ! -f ${data_dir}/../fbank80.zip ]]; then
mv ${data_dir}/fbank80.zip ${data_dir}/..
ln -s ${data_dir}/../fbank80.zip ${data_dir}
fi
if [[ ! -f ${data_dir}/../fbank80_sp.zip ]]; then
mv ${data_dir}/fbank80_sp.zip ${data_dir}/..
ln -s ${data_dir}/../fbank80_sp.zip ${data_dir}
fi
fi fi
if [ ${stage} -le 1 ] && [ ${stop_stage} -ge 1 ]; then if [ ${stage} -le 1 ] && [ ${stop_stage} -ge 1 ]; then
...@@ -181,28 +201,26 @@ if [ ${stage} -le 1 ] && [ ${stop_stage} -ge 1 ]; then ...@@ -181,28 +201,26 @@ if [ ${stage} -le 1 ] && [ ${stop_stage} -ge 1 ]; then
cp ${BASH_SOURCE[0]} ${model_dir} cp ${BASH_SOURCE[0]} ${model_dir}
cp ${PWD}/train.sh ${model_dir} cp ${PWD}/train.sh ${model_dir}
extra_parameter="${extra_parameter}
--train-config ${pwd_dir}/conf/basis.yaml"
cp ${pwd_dir}/conf/basis.yaml ${model_dir}
config_list="${train_config//,/ }" config_list="${train_config//,/ }"
idx=0 idx=1
for config in ${config_list[@]} for config in ${config_list[@]}
do do
config_path=$pwd_dir/conf/${config}.yaml config_path=${pwd_dir}/conf/${config}.yaml
if [[ ! -f ${config_path} ]]; then if [[ ! -f ${config_path} ]]; then
echo "No config file ${config_path}" echo "No config file ${config_path}"
exit exit
fi fi
cp ${config_path} ${model_dir} cp ${config_path} ${model_dir}
if [[ idx -eq 0 ]]; then extra_parameter="${extra_parameter}
extra_parameter="${extra_parameter}
--train-config ${config_path}"
else
extra_parameter="${extra_parameter}
--train-config${idx} ${config_path}" --train-config${idx} ${config_path}"
fi
idx=$((idx + 1)) idx=$((idx + 1))
done done
cmd="python3 -u ${root_dir}/fairseq_cli/train.py cmd="python3 -u ${code_dir}/fairseq_cli/train.py
${data_dir} ${data_dir}
--config-yaml ${data_config} --config-yaml ${data_config}
--task ${task} --task ${task}
...@@ -286,12 +304,14 @@ if [ ${stage} -le 2 ] && [ ${stop_stage} -ge 2 ]; then ...@@ -286,12 +304,14 @@ if [ ${stage} -le 2 ] && [ ${stop_stage} -ge 2 ]; then
# Average models # Average models
dec_model=avg_${n_average}_checkpoint.pt dec_model=avg_${n_average}_checkpoint.pt
cmd="python ${root_dir}/scripts/average_checkpoints.py if [[ ! -f ${model_dir}/${dec_model} ]]; then
--inputs ${model_dir} cmd="python ${code_dir}/scripts/average_checkpoints.py
--num-epoch-checkpoints ${n_average} --inputs ${model_dir}
--output ${model_dir}/${dec_model}" --num-best-checkpoints ${n_average}
echo -e "\033[34mRun command: \n${cmd} \033[0m" --output ${model_dir}/${dec_model}"
[[ $eval -eq 1 ]] && eval $cmd echo -e "\033[34mRun command: \n${cmd} \033[0m"
[[ $eval -eq 1 ]] && eval $cmd
fi
else else
dec_model=${dec_model} dec_model=${dec_model}
fi fi
...@@ -311,8 +331,8 @@ if [ ${stage} -le 2 ] && [ ${stop_stage} -ge 2 ]; then ...@@ -311,8 +331,8 @@ if [ ${stage} -le 2 ] && [ ${stop_stage} -ge 2 ]; then
test_subset=${test_subset//,/ } test_subset=${test_subset//,/ }
for subset in ${test_subset[@]}; do for subset in ${test_subset[@]}; do
subset=${subset}_asr subset=${subset}
cmd="python ${root_dir}/fairseq_cli/generate.py cmd="python ${code_dir}/fairseq_cli/generate.py
${data_dir} ${data_dir}
--config-yaml ${data_config} --config-yaml ${data_config}
--gen-subset ${subset} --gen-subset ${subset}
...@@ -323,10 +343,13 @@ if [ ${stage} -le 2 ] && [ ${stop_stage} -ge 2 ]; then ...@@ -323,10 +343,13 @@ if [ ${stage} -le 2 ] && [ ${stop_stage} -ge 2 ]; then
--beam ${beam_size} --beam ${beam_size}
--lenpen ${len_penalty} --lenpen ${len_penalty}
--scoring wer --scoring wer
--wer-tokenizer 13a
--wer-lowercase
--wer-remove-punct
" "
if [[ ${cer} -eq 1 ]]; then
cmd="${cmd}
--wer-char-level"
fi
echo -e "\033[34mRun command: \n${cmd} \033[0m" echo -e "\033[34mRun command: \n${cmd} \033[0m"
if [[ $eval -eq 1 ]]; then if [[ $eval -eq 1 ]]; then
......
set -e set -e
eval=1 eval=1
lcrm=0
root_dir=~/st/Fairseq-S2T root_dir=~/st/Fairseq-S2T
data_dir=/home/xuchen/st/data/wmt/test data_dir=/home/xuchen/st/data/wmt/test
......
train-subset: train
valid-subset: valid
max-epoch: 50
max-update: 100000
num-workers: 8
patience: 10
no-progress-bar: True
log-interval: 100
seed: 1
report-accuracy: True
skip-invalid-size-inputs-valid-test: True
#load-pretrained-encoder-from:
#load-pretrained-decoder-from:
arch: transformer arch: transformer
share-decoder-input-output-embed: True share-decoder-input-output-embed: True
optimizer: adam optimizer: adam
...@@ -28,19 +11,22 @@ adam_betas: (0.9,0.997) ...@@ -28,19 +11,22 @@ adam_betas: (0.9,0.997)
criterion: label_smoothed_cross_entropy criterion: label_smoothed_cross_entropy
label_smoothing: 0.1 label_smoothing: 0.1
dropout: 0.1 dropout: 0.3
attention-dropout: 0.1 attention-dropout: 0.0
activation-dropout: 0.1 activation-dropout: 0.0
activation-fn: relu activation-fn: relu
encoder-normalize-before: True encoder-normalize-before: True
decoder-normalize-before: True decoder-normalize-before: True
encoder-embed-dim: 512 encoder-embed-dim: 512
encoder-ffn-embed-dim: 2048 encoder-ffn-embed-dim: 1024
encoder-layers: 6 encoder-layers: 6
decoder-layers: 6 decoder-layers: 6
encoder-attention-heads: 8 encoder-attention-heads: 4
decoder-embed-dim: 512 decoder-embed-dim: 512
decoder-ffn-embed-dim: 2048 decoder-ffn-embed-dim: 1024
decoder-attention-heads: 8 decoder-attention-heads: 4
load-pretrained-encoder-from:
load-pretrained-decoder-from:
\ No newline at end of file
arch: transformer_iwslt_de_en
share-decoder-input-output-embed: True
optimizer: adam
#clip-norm: 10.0
lr-scheduler: inverse_sqrt
weight-decay: 0.0001
warmup-init-lr: 1e-7
warmup-updates: 4000
lr: 5e-4
adam_betas: (0.9,0.98)
criterion: label_smoothed_cross_entropy
label_smoothing: 0.1
dropout: 0.3
activation-fn: relu
encoder-normalize-before: False
decoder-normalize-before: False
encoder-embed-dim: 512
encoder-ffn-embed-dim: 1024
encoder-layers: 6
decoder-layers: 6
encoder-attention-heads: 4
decoder-embed-dim: 512
decoder-ffn-embed-dim: 1024
decoder-attention-heads: 4
load-pretrained-encoder-from:
load-pretrained-decoder-from:
\ No newline at end of file
train-subset: train
valid-subset: valid
max-epoch: 50
max-update: 100000
num-workers: 8
patience: 10
no-progress-bar: True
log-interval: 100
seed: 1
report-accuracy: True
skip-invalid-size-inputs-valid-test: True
#load-pretrained-encoder-from:
#load-pretrained-decoder-from:
arch: transformer arch: transformer
share-decoder-input-output-embed: True share-decoder-input-output-embed: True
optimizer: adam optimizer: adam
...@@ -28,19 +11,22 @@ adam_betas: (0.9,0.997) ...@@ -28,19 +11,22 @@ adam_betas: (0.9,0.997)
criterion: label_smoothed_cross_entropy criterion: label_smoothed_cross_entropy
label_smoothing: 0.1 label_smoothing: 0.1
dropout: 0.1 dropout: 0.3
attention-dropout: 0.1 attention-dropout: 0.0
activation-dropout: 0.1 activation-dropout: 0.0
activation-fn: relu activation-fn: relu
encoder-normalize-before: True encoder-normalize-before: True
decoder-normalize-before: True decoder-normalize-before: True
encoder-embed-dim: 256 encoder-embed-dim: 512
encoder-ffn-embed-dim: 2048 encoder-ffn-embed-dim: 1024
encoder-layers: 6 encoder-layers: 6
decoder-layers: 6 decoder-layers: 6
encoder-attention-heads: 4 encoder-attention-heads: 4
decoder-embed-dim: 256 decoder-embed-dim: 512
decoder-ffn-embed-dim: 2048 decoder-ffn-embed-dim: 1024
decoder-attention-heads: 4 decoder-attention-heads: 4
load-pretrained-encoder-from:
load-pretrained-decoder-from:
\ No newline at end of file
train-subset: train
valid-subset: valid
max-epoch: 50
max-update: 50000
patience: 20
best_checkpoint_metric: loss
maximize_best_checkpoint_metric: False
no-epoch-checkpoints: True
#keep-last-epochs: 10
keep-best-checkpoints: 10
num-workers: 8
no-progress-bar: True
log-interval: 100
seed: 1
report-accuracy: True
skip-invalid-size-inputs-valid-test: True
\ No newline at end of file
...@@ -2,4 +2,4 @@ ...@@ -2,4 +2,4 @@
encoder-attention-type: relative encoder-attention-type: relative
decoder-attention-type: relative decoder-attention-type: relative
max-encoder-relative-length: 20 max-encoder-relative-length: 20
max-decoder-relative-length: 20 max-decoder-relative-length: 20
\ No newline at end of file
...@@ -10,6 +10,7 @@ if [ "$#" -eq 1 ]; then ...@@ -10,6 +10,7 @@ if [ "$#" -eq 1 ]; then
exp_name=$1 exp_name=$1
fi fi
sacrebleu=0
n_average=10 n_average=10
beam_size=5 beam_size=5
len_penalty=1.0 len_penalty=1.0
...@@ -21,6 +22,7 @@ cmd="./run.sh ...@@ -21,6 +22,7 @@ cmd="./run.sh
--stop_stage 2 --stop_stage 2
--gpu_num ${gpu_num} --gpu_num ${gpu_num}
--exp_name ${exp_name} --exp_name ${exp_name}
--sacrebleu ${sacrebleu}
--n_average ${n_average} --n_average ${n_average}
--beam_size ${beam_size} --beam_size ${beam_size}
--len_penalty ${len_penalty} --len_penalty ${len_penalty}
......
...@@ -13,7 +13,7 @@ set -o pipefail ...@@ -13,7 +13,7 @@ set -o pipefail
export PYTHONIOENCODING=UTF-8 export PYTHONIOENCODING=UTF-8
eval=1 eval=1
time=$(date "+%m%d_%H%M") time=$(date "+%m%d")
stage=0 stage=0
stop_stage=0 stop_stage=0
...@@ -24,33 +24,34 @@ device=() ...@@ -24,33 +24,34 @@ device=()
gpu_num=8 gpu_num=8
update_freq=1 update_freq=1
root_dir=~/st/Fairseq-S2T root_dir=~/st
code_dir=${root_dir}/Fairseq-S2T
pwd_dir=$PWD pwd_dir=$PWD
# dataset # dataset
src_lang=en src_lang=de
tgt_lang=de tgt_lang=en
lang=${src_lang}-${tgt_lang} lang=${src_lang}-${tgt_lang}
dataset=mt dataset=iwslt14
task=translation task=translation
vocab_type=unigram vocab_type=unigram
vocab_size=10000 vocab_size=10000
share_dict=1 share_dict=1
lcrm=0 lcrm=0
tokenizer=0 tokenizer=1
use_specific_dict=0 use_specific_dict=0
specific_prefix=st specific_prefix=st
specific_dir=/home/xuchen/st/data/mustc/st/en-de/ specific_dir=${root_dir}/data/mustc/st/en-de/
src_vocab_prefix=spm_unigram10000_st_share src_vocab_prefix=spm_unigram10000_st_share
tgt_vocab_prefix=spm_unigram10000_st_share tgt_vocab_prefix=spm_unigram10000_st_share
org_data_dir=~/st/data/${dataset} org_data_dir=${root_dir}/data/${dataset}
data_dir=~/st/data/${dataset}/mt/${lang} data_dir=${root_dir}/data/${dataset}/mt
train_subset=train train_subset=train
valid_subset=dev valid_subset=dev
trans_subset=tst-COMMON trans_subset=test
test_subset=test test_subset=test
# exp # exp
...@@ -70,6 +71,7 @@ step_valid=0 ...@@ -70,6 +71,7 @@ step_valid=0
bleu_valid=0 bleu_valid=0
# decoding setting # decoding setting
sacrebleu=0
dec_model=checkpoint_best.pt dec_model=checkpoint_best.pt
n_average=10 n_average=10
beam_size=5 beam_size=5
...@@ -80,13 +82,19 @@ if [[ ${use_specific_dict} -eq 1 ]]; then ...@@ -80,13 +82,19 @@ if [[ ${use_specific_dict} -eq 1 ]]; then
data_dir=${data_dir}/${specific_prefix} data_dir=${data_dir}/${specific_prefix}
mkdir -p ${data_dir} mkdir -p ${data_dir}
else else
data_dir=${data_dir}/${vocab_type}${vocab_size} if [[ "${vocab_type}" == "char" ]]; then
src_vocab_prefix=spm_${vocab_type}${vocab_size}_${src_lang} vocab_name=${vocab_type}
tgt_vocab_prefix=spm_${vocab_type}${vocab_size}_${tgt_lang} exp_prefix=${exp_prefix}_${vocab_type}
else
vocab_name=${vocab_type}${vocab_size}
fi
data_dir=${data_dir}/${vocab_name}
src_vocab_prefix=spm_${vocab_name}_${src_lang}
tgt_vocab_prefix=spm_${vocab_name}_${tgt_lang}
if [[ $share_dict -eq 1 ]]; then if [[ $share_dict -eq 1 ]]; then
data_dir=${data_dir}_share data_dir=${data_dir}_share
src_vocab_prefix=spm_${vocab_type}${vocab_size}_share src_vocab_prefix=spm_${vocab_name}_share
tgt_vocab_prefix=spm_${vocab_type}${vocab_size}_share tgt_vocab_prefix=spm_${vocab_name}_share
fi fi
fi fi
if [[ ${lcrm} -eq 1 ]]; then if [[ ${lcrm} -eq 1 ]]; then
...@@ -111,7 +119,7 @@ if [[ -z ${exp_name} ]]; then ...@@ -111,7 +119,7 @@ if [[ -z ${exp_name} ]]; then
exp_name=${exp_name}_${extra_tag} exp_name=${exp_name}_${extra_tag}
fi fi
fi fi
model_dir=$root_dir/../checkpoints/$dataset/mt/${exp_name} model_dir=$code_dir/../checkpoints/$dataset/mt/${exp_name}
if [ ${stage} -le -1 ] && [ ${stop_stage} -ge -1 ]; then if [ ${stage} -le -1 ] && [ ${stop_stage} -ge -1 ]; then
echo "stage -1: Data Download" echo "stage -1: Data Download"
...@@ -127,7 +135,7 @@ if [ ${stage} -le 0 ] && [ ${stop_stage} -ge 0 ]; then ...@@ -127,7 +135,7 @@ if [ ${stage} -le 0 ] && [ ${stop_stage} -ge 0 ]; then
if [[ ! -f ${data_dir}/${src_vocab_prefix}.txt || ! -f ${data_dir}/${tgt_vocab_prefix}.txt ]]; then if [[ ! -f ${data_dir}/${src_vocab_prefix}.txt || ! -f ${data_dir}/${tgt_vocab_prefix}.txt ]]; then
if [[ ${use_specific_dict} -eq 0 ]]; then if [[ ${use_specific_dict} -eq 0 ]]; then
cmd="python ${root_dir}/examples/speech_to_text/prep_mt_data.py cmd="python ${code_dir}/examples/speech_to_text/prep_mt_data.py
--data-root ${org_data_dir} --data-root ${org_data_dir}
--output-root ${data_dir} --output-root ${data_dir}
--splits ${train_subset},${valid_subset},${trans_subset} --splits ${train_subset},${valid_subset},${trans_subset}
...@@ -150,9 +158,14 @@ if [ ${stage} -le 0 ] && [ ${stop_stage} -ge 0 ]; then ...@@ -150,9 +158,14 @@ if [ ${stage} -le 0 ] && [ ${stop_stage} -ge 0 ]; then
mkdir -p ${data_dir}/data mkdir -p ${data_dir}/data
for split in ${train_subset} ${valid_subset} ${trans_subset}; do for split in ${train_subset} ${valid_subset} ${trans_subset}; do
{ {
cmd="cat ${org_data_dir}/${lang}/data/${split}/txt/${split}.${src_lang}" if [[ -d ${org_data_dir}/data/${split}/txt ]]; then
txt_dir=${org_data_dir}/data/${split}/txt
else
txt_dir=${org_data_dir}/data/${split}
fi
cmd="cat ${txt_dir}/${split}.${src_lang}"
if [[ ${lcrm} -eq 1 ]]; then if [[ ${lcrm} -eq 1 ]]; then
cmd="python local/lower_rm.py ${org_data_dir}/${lang}/data/${split}.${src_lang}" cmd="python local/lower_rm.py ${org_data_dir}/data/${split}.${src_lang}"
fi fi
cmd="${cmd} cmd="${cmd}
| spm_encode --model ${data_dir}/${src_vocab_prefix}.model | spm_encode --model ${data_dir}/${src_vocab_prefix}.model
...@@ -165,7 +178,7 @@ if [ ${stage} -le 0 ] && [ ${stop_stage} -ge 0 ]; then ...@@ -165,7 +178,7 @@ if [ ${stage} -le 0 ] && [ ${stop_stage} -ge 0 ]; then
cmd="spm_encode cmd="spm_encode
--model ${data_dir}/${tgt_vocab_prefix}.model --model ${data_dir}/${tgt_vocab_prefix}.model
--output_format=piece --output_format=piece
< ${org_data_dir}/${lang}/data/${split}.${tgt_lang} < ${txt_dir}/${split}.${tgt_lang}
> ${data_dir}/data/${split}.${tgt_lang}" > ${data_dir}/data/${split}.${tgt_lang}"
echo -e "\033[34mRun command: \n${cmd} \033[0m" echo -e "\033[34mRun command: \n${cmd} \033[0m"
...@@ -174,7 +187,7 @@ if [ ${stage} -le 0 ] && [ ${stop_stage} -ge 0 ]; then ...@@ -174,7 +187,7 @@ if [ ${stage} -le 0 ] && [ ${stop_stage} -ge 0 ]; then
done done
wait wait
cmd="python ${root_dir}/fairseq_cli/preprocess.py cmd="python ${code_dir}/fairseq_cli/preprocess.py
--source-lang ${src_lang} --target-lang ${tgt_lang} --source-lang ${src_lang} --target-lang ${tgt_lang}
--trainpref ${data_dir}/data/${train_subset} --trainpref ${data_dir}/data/${train_subset}
--validpref ${data_dir}/data/${valid_subset} --validpref ${data_dir}/data/${valid_subset}
...@@ -214,28 +227,26 @@ if [ ${stage} -le 1 ] && [ ${stop_stage} -ge 1 ]; then ...@@ -214,28 +227,26 @@ if [ ${stage} -le 1 ] && [ ${stop_stage} -ge 1 ]; then
cp ${BASH_SOURCE[0]} ${model_dir} cp ${BASH_SOURCE[0]} ${model_dir}
cp ${PWD}/train.sh ${model_dir} cp ${PWD}/train.sh ${model_dir}
extra_parameter="${extra_parameter}
--train-config ${pwd_dir}/conf/basis.yaml"
cp ${pwd_dir}/conf/basis.yaml ${model_dir}
config_list="${train_config//,/ }" config_list="${train_config//,/ }"
idx=0 idx=1
for config in ${config_list[@]} for config in ${config_list[@]}
do do
config_path=$pwd_dir/conf/${config}.yaml config_path=${pwd_dir}/conf/${config}.yaml
if [[ ! -f ${config_path} ]]; then if [[ ! -f ${config_path} ]]; then
echo "No config file ${config_path}" echo "No config file ${config_path}"
exit exit
fi fi
cp ${config_path} ${model_dir} cp ${config_path} ${model_dir}
if [[ idx -eq 0 ]]; then extra_parameter="${extra_parameter}
extra_parameter="${extra_parameter}
--train-config ${config_path}"
else
extra_parameter="${extra_parameter}
--train-config${idx} ${config_path}" --train-config${idx} ${config_path}"
fi
idx=$((idx + 1)) idx=$((idx + 1))
done done
cmd="python3 -u ${root_dir}/fairseq_cli/train.py cmd="python3 -u ${code_dir}/fairseq_cli/train.py
${data_dir} ${data_dir}
--source-lang ${src_lang} --source-lang ${src_lang}
--target-lang ${tgt_lang} --target-lang ${tgt_lang}
...@@ -263,13 +274,9 @@ if [ ${stage} -le 1 ] && [ ${stop_stage} -ge 1 ]; then ...@@ -263,13 +274,9 @@ if [ ${stage} -le 1 ] && [ ${stop_stage} -ge 1 ]; then
if [[ $step_valid -eq 1 ]]; then if [[ $step_valid -eq 1 ]]; then
validate_interval=1 validate_interval=1
save_interval=1 save_interval=1
keep_last_epochs=10
no_epoch_checkpoints=0 no_epoch_checkpoints=0
save_interval_updates=500 save_interval_updates=500
keep_interval_updates=10 keep_interval_updates=10
else
validate_interval=1
keep_last_epochs=10
fi fi
if [[ $bleu_valid -eq 1 ]]; then if [[ $bleu_valid -eq 1 ]]; then
cmd="$cmd cmd="$cmd
...@@ -292,10 +299,6 @@ if [ ${stage} -le 1 ] && [ ${stop_stage} -ge 1 ]; then ...@@ -292,10 +299,6 @@ if [ ${stage} -le 1 ] && [ ${stop_stage} -ge 1 ]; then
cmd="${cmd} cmd="${cmd}
--save-interval $save_interval " --save-interval $save_interval "
fi fi
if [[ -n $keep_last_epochs ]]; then
cmd="${cmd}
--keep-last-epochs $keep_last_epochs "
fi
if [[ -n $save_interval_updates ]]; then if [[ -n $save_interval_updates ]]; then
cmd="${cmd} cmd="${cmd}
--save-interval-updates $save_interval_updates" --save-interval-updates $save_interval_updates"
...@@ -329,9 +332,9 @@ if [ ${stage} -le 2 ] && [ ${stop_stage} -ge 2 ]; then ...@@ -329,9 +332,9 @@ if [ ${stage} -le 2 ] && [ ${stop_stage} -ge 2 ]; then
# Average models # Average models
dec_model=avg_${n_average}_checkpoint.pt dec_model=avg_${n_average}_checkpoint.pt
cmd="python ${root_dir}/scripts/average_checkpoints.py cmd="python ${code_dir}/scripts/average_checkpoints.py
--inputs ${model_dir} --inputs ${model_dir}
--num-epoch-checkpoints ${n_average} --num-best-checkpoints ${n_average}
--output ${model_dir}/${dec_model}" --output ${model_dir}/${dec_model}"
echo -e "\033[34mRun command: \n${cmd} \033[0m" echo -e "\033[34mRun command: \n${cmd} \033[0m"
[[ $eval -eq 1 ]] && eval $cmd [[ $eval -eq 1 ]] && eval $cmd
...@@ -354,7 +357,7 @@ if [ ${stage} -le 2 ] && [ ${stop_stage} -ge 2 ]; then ...@@ -354,7 +357,7 @@ if [ ${stage} -le 2 ] && [ ${stop_stage} -ge 2 ]; then
test_subset=(${test_subset//,/ }) test_subset=(${test_subset//,/ })
for subset in ${test_subset[@]}; do for subset in ${test_subset[@]}; do
cmd="python ${root_dir}/fairseq_cli/generate.py cmd="python ${code_dir}/fairseq_cli/generate.py
${data_dir} ${data_dir}
--source-lang ${src_lang} --source-lang ${src_lang}
--target-lang ${tgt_lang} --target-lang ${tgt_lang}
...@@ -365,14 +368,17 @@ if [ ${stage} -le 2 ] && [ ${stop_stage} -ge 2 ]; then ...@@ -365,14 +368,17 @@ if [ ${stage} -le 2 ] && [ ${stop_stage} -ge 2 ]; then
--max-tokens ${max_tokens} --max-tokens ${max_tokens}
--beam ${beam_size} --beam ${beam_size}
--lenpen ${len_penalty} --lenpen ${len_penalty}
--post-process sentencepiece --post-process sentencepiece"
--scoring sacrebleu"
if [[ ${tokenizer} -eq 1 ]]; then if [[ ${sacrebleu} -eq 1 ]]; then
cmd="${cmd} cmd="${cmd}
--scoring sacrebleu"
if [[ ${tokenizer} -eq 1 ]]; then
cmd="${cmd}
--tokenizer moses --tokenizer moses
--moses-source-lang ${src_lang} --moses-source-lang ${src_lang}
--moses-target-lang ${tgt_lang}" --moses-target-lang ${tgt_lang}"
fi
fi fi
echo -e "\033[34mRun command: \n${cmd} \033[0m" echo -e "\033[34mRun command: \n${cmd} \033[0m"
......
arch: pdss2t_transformer_s_16
encoder-embed-dim: 256
pyramid-stages: 4
#pyramid-dropout: 0
pyramid-layers: 2_2_6_2
pyramid-ratios: 2_2_2_2
pyramid-fusion: True
pyramid-fusion-method: all_conv
pyramid-embed-dims: 256_256_256_256
pyramid-ds-method: conv
pyramid-embed-norm: True
pyramid-position-embed: 1_1_1_1
pyramid-kernel-sizes: 5_5_5_5
pyramid-ffn-ratios: 8_8_8_8
pyramid-attn-heads: 4_4_4_4
train-subset: train_asr
valid-subset: dev_asr
max-epoch: 100
max-update: 100000
num-workers: 8
patience: 10
no-progress-bar: True
log-interval: 100
seed: 1
report-accuracy: True
#load-pretrained-encoder-from:
#load-pretrained-decoder-from:
share-decoder-input-output-embed: True
optimizer: adam
clip-norm: 10.0
lr-scheduler: inverse_sqrt
warmup-init-lr: 1e-7
warmup-updates: 10000
lr: 2e-3
#adam_betas: (0.9,0.98)
criterion: label_smoothed_cross_entropy_with_ctc
label_smoothing: 0.1
dropout: 0.1
activation-fn: relu
encoder-ffn-embed-dim: 2048
encoder-layers: 12
decoder-layers: 6
encoder-attention-heads: 4
decoder-embed-dim: 256
decoder-ffn-embed-dim: 2048
decoder-attention-heads: 4
arch: pdss2t_transformer_s_32
encoder-embed-dim: 256
pyramid-stages: 5
#pyramid-dropout: 0
pyramid-layers: 2_2_3_3_2
pyramid-ratios: 2_2_2_2_2
pyramid-fusion: True
pyramid-fusion-method: all_conv
pyramid-embed-dims: 256_256_256_256_256
pyramid-ds-method: conv
pyramid-embed-norm: True
pyramid-position-embed: 1_1_1_1_1
pyramid-kernel-sizes: 5_5_5_5_5
pyramid-ffn-ratios: 8_8_8_8_8
pyramid-attn-heads: 4_4_4_4_4
train-subset: train_asr
valid-subset: dev_asr
max-epoch: 100
max-update: 100000
num-workers: 8
patience: 10
no-progress-bar: True
log-interval: 100
seed: 1
report-accuracy: True
#load-pretrained-encoder-from:
#load-pretrained-decoder-from:
share-decoder-input-output-embed: True
optimizer: adam
clip-norm: 10.0
lr-scheduler: inverse_sqrt
warmup-init-lr: 1e-7
warmup-updates: 10000
lr: 2e-3
#adam_betas: (0.9,0.98)
criterion: label_smoothed_cross_entropy_with_ctc
label_smoothing: 0.1
dropout: 0.1
activation-fn: relu
encoder-ffn-embed-dim: 2048
encoder-layers: 12
decoder-layers: 6
encoder-attention-heads: 4
decoder-embed-dim: 256
decoder-ffn-embed-dim: 2048
decoder-attention-heads: 4
arch: pdss2t_transformer_s_8
encoder-embed-dim: 256
pyramid-stages: 4
#pyramid-dropout: 0
pyramid-layers: 3_3_3_3
pyramid-ratios: 2_2_1_2
pyramid-fusion: True
pyramid-fusion-method: all_conv
pyramid-embed-dims: 256_256_256_256
pyramid-ds-method: conv
pyramid-embed-norm: True
pyramid-position-embed: 1_1_1_1
pyramid-kernel-sizes: 5_5_5_5
pyramid-ffn-ratios: 8_8_8_8
pyramid-attn-heads: 4_4_4_4
train-subset: train_asr
valid-subset: dev_asr
max-epoch: 100
max-update: 100000
num-workers: 8
patience: 10
no-progress-bar: True
log-interval: 100
seed: 1
report-accuracy: True
#load-pretrained-encoder-from:
#load-pretrained-decoder-from:
share-decoder-input-output-embed: True
optimizer: adam
clip-norm: 10.0
lr-scheduler: inverse_sqrt
warmup-init-lr: 1e-7
warmup-updates: 10000
lr: 2e-3
#adam_betas: (0.9,0.98)
criterion: label_smoothed_cross_entropy_with_ctc
label_smoothing: 0.1
dropout: 0.1
activation-fn: relu
encoder-ffn-embed-dim: 2048
encoder-layers: 12
decoder-layers: 6
encoder-attention-heads: 4
decoder-embed-dim: 256
decoder-ffn-embed-dim: 2048
decoder-attention-heads: 4
set -e
eval=1
lcrm=1
tokenizer=0
root_dir=~/st/Fairseq-S2T
data_dir=/home/xuchen/st/data/test
vocab_dir=/home/xuchen/st/data/mustc/st_lcrm/en-de
asr_vocab_prefix=spm_unigram10000_st_share
st_vocab_prefix=spm_unigram10000_st_share
src_lang=en
tgt_lang=de
splits=(2019)
splits=$(echo ${splits[*]} | sed 's/ /_/g')
cp -r ${vocab_dir}/${asr_vocab_prefix}.* ${data_dir}/${src_lang}-${tgt_lang}
cp -r ${vocab_dir}/${st_vocab_prefix}.* ${data_dir}/${src_lang}-${tgt_lang}
rm -rf ${data_dir}/${src_lang}-${tgt_lang}/fbank80.zip
cmd="python ${root_dir}/examples/speech_to_text/prep_st_data.py
--data-root ${data_dir}
--output-root ${data_dir}
--splits ${splits}
--task st
--src-lang ${src_lang}
--tgt-lang ${tgt_lang}
--add-src
--share
--asr-prefix ${asr_vocab_prefix}
--st-spm-prefix ${st_vocab_prefix}
--cmvn-type utterance"
if [[ ${lcrm} -eq 1 ]]; then
cmd="$cmd
--lowercase-src
--rm-punc-src"
fi
if [[ ${tokenizer} -eq 1 ]]; then
cmd="$cmd
--tokenizer"
fi
echo -e "\033[34mRun command: \n${cmd} \033[0m"
[[ $eval -eq 1 ]] && eval ${cmd}
macaron-style: True
use-cnn-module: True
cnn-module-kernel: 31
ctc-weight: 0.3
\ No newline at end of file
use-enc-dlcl: True
use-dec-dlcl: True
encoder-attention-type: local
hard-mask-window: 0
gauss-mask-sigma: 3
init-mask-weight: 0
\ No newline at end of file
arch: pdss2t_transformer_s_16
encoder-embed-dim: 256
pyramid-stages: 4
#pyramid-dropout: 0
pyramid-layers: 2_2_6_2
pyramid-ratios: 2_2_2_2
pyramid-fusion: True
pyramid-fusion-method: all_conv
pyramid-embed-dims: 256_256_256_256
pyramid-ds-method: conv
pyramid-embed-norm: True
pyramid-position-embed: 1_1_1_1
pyramid-kernel-sizes: 5_5_5_5
pyramid-ffn-ratios: 8_8_8_8
pyramid-attn-heads: 4_4_4_4
train-subset: train_st
valid-subset: dev_st
max-epoch: 100
max-update: 100000
num-workers: 8
patience: 10
no-progress-bar: True
log-interval: 100
seed: 1
report-accuracy: True
#load-pretrained-encoder-from:
#load-pretrained-decoder-from:
share-decoder-input-output-embed: True
optimizer: adam
clip-norm: 10.0
lr-scheduler: inverse_sqrt
warmup-init-lr: 1e-7
warmup-updates: 10000
lr: 2e-3
#adam_betas: (0.9,0.98)
criterion: label_smoothed_cross_entropy_with_ctc
label_smoothing: 0.1
dropout: 0.1
activation-fn: relu
encoder-ffn-embed-dim: 2048
encoder-layers: 12
decoder-layers: 6
encoder-attention-heads: 4
decoder-embed-dim: 256
decoder-ffn-embed-dim: 2048
decoder-attention-heads: 4
arch: pdss2t_transformer_s_32
encoder-embed-dim: 256
pyramid-stages: 5
#pyramid-dropout: 0
pyramid-layers: 2_2_3_3_2
pyramid-ratios: 2_2_2_2_2
pyramid-fusion: True
pyramid-fusion-method: all_conv
pyramid-embed-dims: 256_256_256_256_256
pyramid-ds-method: conv
pyramid-embed-norm: True
pyramid-position-embed: 1_1_1_1_1
pyramid-kernel-sizes: 5_5_5_5_5
pyramid-ffn-ratios: 8_8_8_8_8
pyramid-attn-heads: 4_4_4_4_4
train-subset: train_st
valid-subset: dev_st
max-epoch: 100
max-update: 100000
num-workers: 8
patience: 10
no-progress-bar: True
log-interval: 100
seed: 1
report-accuracy: True
#load-pretrained-encoder-from:
#load-pretrained-decoder-from:
share-decoder-input-output-embed: True
optimizer: adam
clip-norm: 10.0
lr-scheduler: inverse_sqrt
warmup-init-lr: 1e-7
warmup-updates: 10000
lr: 2e-3
#adam_betas: (0.9,0.98)
criterion: label_smoothed_cross_entropy_with_ctc
label_smoothing: 0.1
dropout: 0.1
activation-fn: relu
encoder-ffn-embed-dim: 2048
encoder-layers: 12
decoder-layers: 6
encoder-attention-heads: 4
decoder-embed-dim: 256
decoder-ffn-embed-dim: 2048
decoder-attention-heads: 4
arch: pdss2t_transformer_s_8
encoder-embed-dim: 256
pyramid-stages: 4
#pyramid-dropout: 0
pyramid-layers: 3_3_3_3
pyramid-ratios: 2_2_1_2
pyramid-fusion: True
pyramid-fusion-method: all_conv
pyramid-embed-dims: 256_256_256_256
pyramid-ds-method: conv
pyramid-embed-norm: True
pyramid-position-embed: 1_1_1_1
pyramid-kernel-sizes: 5_5_5_5
pyramid-ffn-ratios: 8_8_8_8
pyramid-attn-heads: 4_4_4_4
train-subset: train_st
valid-subset: dev_st
max-epoch: 100
max-update: 100000
num-workers: 8
patience: 10
no-progress-bar: True
log-interval: 100
seed: 1
report-accuracy: True
#load-pretrained-encoder-from:
#load-pretrained-decoder-from:
share-decoder-input-output-embed: True
optimizer: adam
clip-norm: 10.0
lr-scheduler: inverse_sqrt
warmup-init-lr: 1e-7
warmup-updates: 10000
lr: 2e-3
#adam_betas: (0.9,0.98)
criterion: label_smoothed_cross_entropy_with_ctc
label_smoothing: 0.1
dropout: 0.1
activation-fn: relu
encoder-ffn-embed-dim: 2048
encoder-layers: 12
decoder-layers: 6
encoder-attention-heads: 4
decoder-embed-dim: 256
decoder-ffn-embed-dim: 2048
decoder-attention-heads: 4
encoder-attention-type: rel_selfattn
#encoder-attention-type: relative
#decoder-attention-type: relative
#max-encoder-relative-length: 100
#max-decoder-relative-length: 20
train-subset: train_st
valid-subset: dev_st
max-epoch: 100
max-update: 100000
num-workers: 8
patience: 10
no-progress-bar: True
log-interval: 100
seed: 1
report-accuracy: True
#load-pretrained-encoder-from:
#load-pretrained-acoustic-encoder-from:
#load-pretrained-text-encoder-from:
#load-pretrained-decoder-from:
arch: s2t_sate
share-decoder-input-output-embed: True
optimizer: adam
clip-norm: 10.0
lr-scheduler: inverse_sqrt
warmup-init-lr: 1e-7
warmup-updates: 10000
lr: 2e-3
#adam_betas: (0.9,0.98)
ctc-weight: 0.3
criterion: label_smoothed_cross_entropy_with_ctc
label_smoothing: 0.1
encoder-normalize-before: True
decoder-normalize-before: True
conv-kernel-sizes: 5,5
conv-channels: 1024
dropout: 0.1
activation-fn: relu
encoder-embed-dim: 256
encoder-ffn-embed-dim: 2048
encoder-layers: 12
text-encoder-layers: 6
decoder-layers: 6
encoder-attention-heads: 4
#macaron-style: True
#use-cnn-module: True
#cnn-module-kernel: 31
#acoustic-encoder: pds
acoustic-encoder: transformer
adapter: league
encoder-embed-dim: 256
pyramid-stages: 4
#pyramid-dropout: 0
pyramid-layers: 3_3_3_3
pyramid-ratios: 2_2_1_2
pyramid-fusion: True
pyramid-fusion-method: all_conv
pyramid-embed-dims: 256_256_256_256
pyramid-ds-method: conv
pyramid-embed-norm: True
pyramid-position-embed: 1_1_1_1
pyramid-kernel-sizes: 5_5_5_5
pyramid-ffn-ratios: 8_8_8_8
pyramid-attn-heads: 4_4_4_4
decoder-embed-dim: 256
decoder-ffn-embed-dim: 2048
decoder-attention-heads: 4
#! /bin/bash
gpu_num=1
data_dir=
test_subset=(tst-COMMON)
exp_name=
if [ "$#" -eq 1 ]; then
exp_name=$1
fi
n_average=10
beam_size=5
len_penalty=1.0
max_tokens=80000
dec_model=checkpoint_best.pt
cmd="./run.sh
--stage 2
--stop_stage 2
--gpu_num ${gpu_num}
--exp_name ${exp_name}
--n_average ${n_average}
--beam_size ${beam_size}
--len_penalty ${len_penalty}
--max_tokens ${max_tokens}
--dec_model ${dec_model}
"
if [[ -n ${data_dir} ]]; then
cmd="$cmd --data_dir ${data_dir}"
fi
if [[ ${#test_subset[@]} -eq 0 ]]; then
subsets=$(echo ${test_subset[*]} | sed 's/ /,/g')
cmd="$cmd --test_subset ${subsets}"
fi
echo $cmd
eval $cmd
set -e
gpu_num=1
root_dir=/home/xuchen/st/Fairseq-S2T
ckpt=/home/xuchen/st/checkpoints/mustc-v2/st
model_txt=$1
set=$2
test_subset=$3
#data_dir=/home/xuchen/st/data/mustc-v2/st_lcrm/en-de
#test_subset=(tst-COMMON)
data_dir=/media/data/tst/$set/en-de
#test_subset=(office)
#test_subset=(webrtc1)
#test_subset=(adap2)
data_config=config_st_share.yaml
result_file=./result
beam_size=5
lenpen=0.6
max_tokens=10000
models=()
i=0
for line in `cat $model_txt`; do
i=`expr $i + 1`
model_dir=$ckpt/$line
[[ ! -d $model_dir ]] && echo $model_dir && exit 1;
if [[ -f $model_dir/avg_10_checkpoint.pt ]]; then
model=$model_dir/avg_10_checkpoint.pt
else
model=$model_dir/checkpoint_best.pt
fi
[[ ! -f $model ]] && echo $model && exit 1;
models[$i]=$model
done
models=`echo ${models[*]} | sed 's/ /:/g'`
res_dir=$ckpt/ensemble/$set
i=0
while :
do
if [[ -d $res_dir/$i ]]; then
i=`expr $i + 1`
else
res_dir=$res_dir/$i
break
fi
done
mkdir -p $res_dir
cp $model_txt $res_dir
if [[ -z ${device} || ${#device[@]} -eq 0 ]]; then
if [[ ${gpu_num} -eq 0 ]]; then
device=()
else
source ./local/utils.sh
device=$(get_devices $gpu_num 0)
fi
fi
export CUDA_VISIBLE_DEVICES=${device}
for subset in ${test_subset[@]}; do
subset=${subset}_st
cmd="python ${root_dir}/fairseq_cli/generate.py
${data_dir}
--config-yaml ${data_config}
--gen-subset ${subset}
--task speech_to_text
--path ${models}
--results-path ${res_dir}
--skip-invalid-size-inputs-valid-test
--max-tokens ${max_tokens}
--beam ${beam_size}
--lenpen ${lenpen}
--scoring sacrebleu"
echo -e "\033[34mRun command: \n${cmd} \033[0m"
eval $cmd
tail -n 1 ${res_dir}/generate-${subset}.txt
cd $res_dir
evaluate.sh translation-${subset}.txt $set
cd -
done
gpu_num=4
cmd="sh train.sh"
while :
do
record=$(mktemp -t temp.record.XXXXXX)
gpustat > $record
all_devices=$(seq 0 "$(sed '1,2d' ${record} | wc -l)");
count=0
for dev in ${all_devices[@]}
do
line=$((dev + 2))
use=$(head -n $line ${record} | tail -1 | cut -d '|' -f3 | cut -d '/' -f1)
if [[ $use -lt 100 ]]; then
device[$count]=$dev
count=$((count + 1))
if [[ $count -eq $gpu_num ]]; then
break
fi
fi
done
if [[ ${#device[@]} -lt $gpu_num ]]; then
sleep 60s
else
echo "Run $cmd"
eval $cmd
sleep 10s
exit
fi
done
#!/usr/bin/env bash
# Copyright 2012 Johns Hopkins University (Author: Daniel Povey);
# Arnab Ghoshal, Karel Vesely
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# THIS CODE IS PROVIDED *AS IS* BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
# KIND, EITHER EXPRESS OR IMPLIED, INCLUDING WITHOUT LIMITATION ANY IMPLIED
# WARRANTIES OR CONDITIONS OF TITLE, FITNESS FOR A PARTICULAR PURPOSE,
# MERCHANTABLITY OR NON-INFRINGEMENT.
# See the Apache 2 License for the specific language governing permissions and
# limitations under the License.
# Parse command-line options.
# To be sourced by another script (as in ". parse_options.sh").
# Option format is: --option-name arg
# and shell variable "option_name" gets set to value "arg."
# The exception is --help, which takes no arguments, but prints the
# $help_message variable (if defined).
###
### The --config file options have lower priority to command line
### options, so we need to import them first...
###
# Now import all the configs specified by command-line, in left-to-right order
for ((argpos=1; argpos<$#; argpos++)); do
if [ "${!argpos}" == "--config" ]; then
argpos_plus1=$((argpos+1))
config=${!argpos_plus1}
[ ! -r $config ] && echo "$0: missing config '$config'" && exit 1
. $config # source the config file.
fi
done
###
### Now we process the command line options
###
while true; do
[ -z "${1:-}" ] && break; # break if there are no arguments
case "$1" in
# If the enclosing script is called with --help option, print the help
# message and exit. Scripts should put help messages in $help_message
--help|-h) if [ -z "$help_message" ]; then echo "No help found." 1>&2;
else printf "$help_message\n" 1>&2 ; fi;
exit 0 ;;
--*=*) echo "$0: options to scripts must be of the form --name value, got '$1'"
exit 1 ;;
# If the first command-line argument begins with "--" (e.g. --foo-bar),
# then work out the variable name as $name, which will equal "foo_bar".
--*) name=`echo "$1" | sed s/^--// | sed s/-/_/g`;
# Next we test whether the variable in question is undefned-- if so it's
# an invalid option and we die. Note: $0 evaluates to the name of the
# enclosing script.
# The test [ -z ${foo_bar+xxx} ] will return true if the variable foo_bar
# is undefined. We then have to wrap this test inside "eval" because
# foo_bar is itself inside a variable ($name).
eval '[ -z "${'$name'+xxx}" ]' && echo "$0: invalid option $1" 1>&2 && exit 1;
oldval="`eval echo \\$$name`";
# Work out whether we seem to be expecting a Boolean argument.
if [ "$oldval" == "true" ] || [ "$oldval" == "false" ]; then
was_bool=true;
else
was_bool=false;
fi
# Set the variable to the right value-- the escaped quotes make it work if
# the option had spaces, like --cmd "queue.pl -sync y"
eval $name=\"$2\";
# Check that Boolean-valued arguments are really Boolean.
if $was_bool && [[ "$2" != "true" && "$2" != "false" ]]; then
echo "$0: expected \"true\" or \"false\": $1 $2" 1>&2
exit 1;
fi
shift 2;
;;
*) break;
esac
done
# Check for an empty argument to the --cmd option, which can easily occur as a
# result of scripting errors.
[ ! -z "${cmd+xxx}" ] && [ -z "$cmd" ] && echo "$0: empty argument to --cmd option" 1>&2 && exit 1;
true; # so this script returns exit code 0.
get_devices(){
gpu_num=$1
use_cpu=$2
device=()
while :
do
record=$(mktemp -t temp.record.XXXXXX)
gpustat > $record
all_devices=$(seq 0 "$(sed '1,2d' ${record} | wc -l)");
count=0
for dev in ${all_devices[@]}
do
line=$((dev + 2))
use=$(head -n $line ${record} | tail -1 | cut -d '|' -f3 | cut -d '/' -f1)
if [[ $use -lt 100 ]]; then
device[$count]=$dev
count=$((count + 1))
if [[ $count -eq $gpu_num ]]; then
break
fi
fi
done
if [[ ${#device[@]} -lt $gpu_num ]]; then
if [[ $use_cpu -eq 1 ]]; then
device=(-1)
else
sleep 60s
fi
else
break
fi
done
echo ${device[*]} | sed 's/ /,/g'
return $?
}
#! /bin/bash
# Processing MuST-C Datasets
# Copyright 2021 Natural Language Processing Laboratory
# Xu Chen (xuchenneu@163.com)
# Set bash to 'debug' mode, it will exit on :
# -e 'error', -u 'undefined variable', -o ... 'error in pipeline', -x 'print commands',
set -e
#set -u
set -o pipefail
export PYTHONIOENCODING=UTF-8
eval=1
time=$(date "+%m%d_%H%M")
stage=0
stop_stage=0
######## hardware ########
# devices
#device=()
gpu_num=8
update_freq=1
root_dir=~/st/Fairseq-S2T
pwd_dir=$PWD
# dataset
src_lang=en
tgt_lang=de
lang=${src_lang}-${tgt_lang}
dataset=st
task=speech_to_text
vocab_type=unigram
asr_vocab_size=5000
vocab_size=10000
share_dict=1
speed_perturb=0
lcrm=0
tokenizer=0
use_specific_dict=0
specific_prefix=valid
specific_dir=/home/xuchen/st/data/mustc/st_lcrm/en-de
asr_vocab_prefix=spm_unigram10000_st_share
st_vocab_prefix=spm_unigram10000_st_share
org_data_dir=~/st/data/${dataset}
data_dir=~/st/data/${dataset}/st
test_subset=tst-COMMON
# exp
exp_prefix=$(date "+%m%d")
extra_tag=
extra_parameter=
exp_tag=baseline
exp_name=
# config
train_config=ctc
# training setting
fp16=1
max_tokens=40000
step_valid=0
bleu_valid=0
# decoding setting
dec_model=checkpoint_best.pt
n_average=10
beam_size=5
len_penalty=1.0
if [[ ${share_dict} -eq 1 ]]; then
data_config=config_st_share.yaml
else
data_config=config_st.yaml
fi
if [[ ${speed_perturb} -eq 1 ]]; then
data_dir=${data_dir}_sp
exp_prefix=${exp_prefix}_sp
fi
if [[ ${lcrm} -eq 1 ]]; then
data_dir=${data_dir}_lcrm
exp_prefix=${exp_prefix}_lcrm
fi
if [[ ${use_specific_dict} -eq 1 ]]; then
data_dir=${data_dir}_${specific_prefix}
exp_prefix=${exp_prefix}_${specific_prefix}
fi
if [[ ${tokenizer} -eq 1 ]]; then
data_dir=${data_dir}_tok
exp_prefix=${exp_prefix}_tok
fi
. ./local/parse_options.sh || exit 1;
if [[ -z ${exp_name} ]]; then
config_string=${train_config//,/_}
exp_name=${exp_prefix}_${config_string}_${exp_tag}
if [[ -n ${extra_tag} ]]; then
exp_name=${exp_name}_${extra_tag}
fi
fi
model_dir=$root_dir/../checkpoints/$dataset/st/${exp_name}
if [ ${stage} -le -1 ] && [ ${stop_stage} -ge -1 ]; then
echo "stage -1: Data Download"
# pass
fi
if [ ${stage} -le 0 ] && [ ${stop_stage} -ge 0 ]; then
### Task dependent. You have to make data the following preparation part by yourself.
### But you can utilize Kaldi recipes in most cases
echo "stage 0: ASR Data Preparation"
if [[ ! -e ${data_dir}/${lang} ]]; then
mkdir -p ${data_dir}/${lang}
fi
cmd="python ${root_dir}/examples/speech_to_text/prep_asr_data.py
--data-root ${org_data_dir}
--output-root ${data_dir}
--task asr
--vocab-type ${vocab_type}
--vocab-size ${asr_vocab_size}"
if [[ ${speed_perturb} -eq 1 ]]; then
cmd="$cmd
--speed-perturb"
fi
echo -e "\033[34mRun command: \n${cmd} \033[0m"
[[ $eval -eq 1 && ${share_dict} -ne 1 && ${use_specific_dict} -ne 1 ]] && eval $cmd
asr_prefix=spm_${vocab_type}${asr_vocab_size}_asr
echo "stage 0: ST Data Preparation"
cmd="python ${root_dir}/examples/speech_to_text/prep_st_data.py
--data-root ${org_data_dir}
--output-root ${data_dir}
--task st
--add-src
--cmvn-type utterance
--vocab-type ${vocab_type}
--vocab-size ${vocab_size}"
if [[ ${use_specific_dict} -eq 1 ]]; then
cp -r ${specific_dir}/${asr_vocab_prefix}.* ${data_dir}/${lang}
cp -r ${specific_dir}/${st_vocab_prefix}.* ${data_dir}/${lang}
if [[ $share_dict -eq 1 ]]; then
cmd="$cmd
--share
--st-spm-prefix ${st_vocab_prefix}"
else
cmd="$cmd
--st-spm-prefix ${st_vocab_prefix}
--asr-prefix ${asr_vocab_prefix}"
fi
else
if [[ $share_dict -eq 1 ]]; then
cmd="$cmd
--share"
else
cmd="$cmd
--asr-prefix ${asr_prefix}"
fi
fi
if [[ ${speed_perturb} -eq 1 ]]; then
cmd="$cmd
--speed-perturb"
fi
if [[ ${lcrm} -eq 1 ]]; then
cmd="$cmd
--lowercase-src
--rm-punc-src"
fi
if [[ ${tokenizer} -eq 1 ]]; then
cmd="$cmd
--tokenizer"
fi
echo -e "\033[34mRun command: \n${cmd} \033[0m"
[[ $eval -eq 1 ]] && eval ${cmd}
fi
data_dir=${data_dir}/${lang}
if [ ${stage} -le 1 ] && [ ${stop_stage} -ge 1 ]; then
echo "stage 1: ST Network Training"
[[ ! -d ${data_dir} ]] && echo "The data dir ${data_dir} is not existing!" && exit 1;
if [[ -z ${device} || ${#device[@]} -eq 0 ]]; then
if [[ ${gpu_num} -eq 0 ]]; then
device=""
else
source ./local/utils.sh
device=$(get_devices $gpu_num 0)
fi
fi
echo -e "dev=${device} data=${data_dir} model=${model_dir}"
if [[ ! -d ${model_dir} ]]; then
mkdir -p ${model_dir}
else
echo "${model_dir} exists."
fi
cp ${BASH_SOURCE[0]} ${model_dir}
cp ${PWD}/train.sh ${model_dir}
config_list="${train_config//,/ }"
idx=0
for config in ${config_list[@]}
do
config_path=$pwd_dir/conf/${config}.yaml
if [[ ! -f ${config_path} ]]; then
echo "No config file ${config_path}"
exit
fi
cp ${config_path} ${model_dir}
if [[ idx -eq 0 ]]; then
extra_parameter="${extra_parameter}
--train-config ${config_path}"
else
extra_parameter="${extra_parameter}
--train-config${idx} ${config_path}"
fi
idx=$((idx + 1))
done
cmd="python3 -u ${root_dir}/fairseq_cli/train.py
${data_dir}
--config-yaml ${data_config}
--task ${task}
--max-tokens ${max_tokens}
--skip-invalid-size-inputs-valid-test
--update-freq ${update_freq}
--log-interval 100
--save-dir ${model_dir}
--tensorboard-logdir ${model_dir}"
if [[ -n ${extra_parameter} ]]; then
cmd="${cmd}
${extra_parameter}"
fi
if [[ ${gpu_num} -gt 0 ]]; then
cmd="${cmd}
--distributed-world-size $gpu_num
--ddp-backend no_c10d"
fi
if [[ $fp16 -eq 1 ]]; then
cmd="${cmd}
--fp16"
fi
if [[ $step_valid -eq 1 ]]; then
validate_interval=1
save_interval=1
keep_last_epochs=10
no_epoch_checkpoints=0
save_interval_updates=500
keep_interval_updates=10
else
validate_interval=1
keep_last_epochs=10
fi
if [[ $bleu_valid -eq 1 ]]; then
cmd="$cmd
--eval-bleu
--eval-bleu-args '{\"beam\": 1}'
--eval-tokenized-bleu
--eval-bleu-remove-bpe
--best-checkpoint-metric bleu
--maximize-best-checkpoint-metric"
fi
if [[ -n $no_epoch_checkpoints && $no_epoch_checkpoints -eq 1 ]]; then
cmd="$cmd
--no-epoch-checkpoints"
fi
if [[ -n $validate_interval ]]; then
cmd="${cmd}
--validate-interval $validate_interval "
fi
if [[ -n $save_interval ]]; then
cmd="${cmd}
--save-interval $save_interval "
fi
if [[ -n $keep_last_epochs ]]; then
cmd="${cmd}
--keep-last-epochs $keep_last_epochs "
fi
if [[ -n $save_interval_updates ]]; then
cmd="${cmd}
--save-interval-updates $save_interval_updates"
if [[ -n $keep_interval_updates ]]; then
cmd="${cmd}
--keep-interval-updates $keep_interval_updates"
fi
fi
echo -e "\033[34mRun command: \n${cmd} \033[0m"
# save info
log=./history.log
echo "${time} | ${device} | ${data_dir} | ${exp_name} | ${model_dir} " >> $log
tail -n 50 ${log} > tmp.log
mv tmp.log $log
export CUDA_VISIBLE_DEVICES=${device}
cmd="nohup ${cmd} >> ${model_dir}/train.log 2>&1 &"
if [[ $eval -eq 1 ]]; then
eval $cmd
sleep 2s
tail -n "$(wc -l ${model_dir}/train.log | awk '{print $1+1}')" -f ${model_dir}/train.log
fi
fi
wait
if [ ${stage} -le 2 ] && [ ${stop_stage} -ge 2 ]; then
echo "stage 2: ST Decoding"
if [[ ${n_average} -ne 1 ]]; then
# Average models
dec_model=avg_${n_average}_checkpoint.pt
cmd="python ${root_dir}/scripts/average_checkpoints.py
--inputs ${model_dir}
--num-epoch-checkpoints ${n_average}
--output ${model_dir}/${dec_model}"
echo -e "\033[34mRun command: \n${cmd} \033[0m"
[[ $eval -eq 1 ]] && eval $cmd
else
dec_model=${dec_model}
fi
if [[ -z ${device} || ${#device[@]} -eq 0 ]]; then
if [[ ${gpu_num} -eq 0 ]]; then
device=""
else
source ./local/utils.sh
device=$(get_devices $gpu_num 0)
fi
fi
export CUDA_VISIBLE_DEVICES=${device}
result_file=${model_dir}/decode_result
[[ -f ${result_file} ]] && rm ${result_file}
test_subset=${test_subset//,/ }
for subset in "${test_subset[@]}"; do
subset=${subset}_st
cmd="python ${root_dir}/fairseq_cli/generate.py
${data_dir}
--config-yaml ${data_config}
--gen-subset ${subset}
--task speech_to_text
--path ${model_dir}/${dec_model}
--results-path ${model_dir}
--max-tokens ${max_tokens}
--beam ${beam_size}
--lenpen ${len_penalty}
--scoring sacrebleu"
echo -e "\033[34mRun command: \n${cmd} \033[0m"
if [[ $eval -eq 1 ]]; then
eval $cmd
tail -n 1 ${model_dir}/generate-${subset}.txt >> ${result_file}
fi
done
cat ${result_file}
fi
#! /bin/bash
# training the model
gpu_num=8
update_freq=1
max_tokens=40000
extra_tag=
extra_parameter=
#extra_tag="${extra_tag}"
#extra_parameter="${extra_parameter} "
exp_tag=
#config_list=(base)
config_list=(ctc)
#config_list=(sate_ctc)
#config_list=(ctc conformer rpr)
#config_list=(base sate)
#config_list=(pds_base)
#config_list=(pds_base conformer)
# exp full name
exp_name=
train_config=$(echo ${config_list[*]} | sed 's/ /,/g')
cmd="./run.sh
--stage 1
--stop_stage 1
--gpu_num ${gpu_num}
--update_freq ${update_freq}
--train_config ${train_config}
--max_tokens ${max_tokens}
"
if [[ -n ${exp_name} ]]; then
cmd="$cmd --exp_name ${exp_name}"
fi
if [[ -n ${exp_tag} ]]; then
cmd="$cmd --exp_tag ${exp_tag}"
fi
if [[ -n ${extra_tag} ]]; then
cmd="$cmd --extra_tag ${extra_tag}"
fi
if [[ -n ${extra_parameter} ]]; then
cmd="$cmd --extra_parameter \"${extra_parameter}\""
fi
echo ${cmd}
eval ${cmd}
train-subset: train-clean-100,train-clean-360,train-other-500
valid-subset: dev-clean
max-epoch: 100
max-update: 300000
num-workers: 8
patience: 10
no-progress-bar: True
log-interval: 100
seed: 1
report-accuracy: True
#load-pretrained-encoder-from:
#load-pretrained-decoder-from:
arch: s2t_transformer_s arch: s2t_transformer_s
share-decoder-input-output-embed: True share-decoder-input-output-embed: True
optimizer: adam optimizer: adam
...@@ -39,4 +23,4 @@ encoder-attention-heads: 4 ...@@ -39,4 +23,4 @@ encoder-attention-heads: 4
decoder-embed-dim: 256 decoder-embed-dim: 256
decoder-ffn-embed-dim: 2048 decoder-ffn-embed-dim: 2048
decoder-attention-heads: 4 decoder-attention-heads: 4
\ No newline at end of file
train-subset: train-clean-100,train-clean-360,train-other-500
valid-subset: dev-clean
max-epoch: 100
max-update: 300000
patience: 20
best-checkpoint-metric: loss
maximize-best-checkpoint-metric: False
no-epoch-checkpoints: True
#keep-last-epochs: 10
keep-best-checkpoints: 10
num-workers: 8
no-progress-bar: True
log-interval: 100
seed: 1
report-accuracy: True
skip-invalid-size-inputs-valid-test: True
\ No newline at end of file
train-subset: train-clean-100,train-clean-360,train-other-500
valid-subset: dev-clean
max-epoch: 100
max-update: 300000
num-workers: 8
patience: 20
no-progress-bar: True
log-interval: 100
seed: 1
report-accuracy: True
#load-pretrained-encoder-from:
#load-pretrained-decoder-from:
arch: s2t_transformer_m arch: s2t_transformer_m
share-decoder-input-output-embed: True share-decoder-input-output-embed: True
optimizer: adam optimizer: adam
......
...@@ -2,19 +2,19 @@ ...@@ -2,19 +2,19 @@
#arch: s2t_transformer_s #arch: s2t_transformer_s
arch: s2t_sate arch: s2t_sate
encoder-embed-dim: 256 encoder-embed-dim: 256
pyramid-stages: 4 pds-stages: 4
#pyramid-dropout: 0 #pds-dropout: 0
pyramid-layers: 2_2_6_2 pds-layers: 2_2_6_2
pyramid-ratios: 2_2_2_2 pds-ratios: 2_2_2_2
pyramid-fusion: True pds-fusion: True
pyramid-fusion-method: all_conv pds-fusion-method: all_conv
pyramid-embed-dims: 256_256_256_256 pds-embed-dims: 256_256_256_256
pyramid-ds-method: conv pds-ds-method: conv
pyramid-embed-norm: True pds-embed-norm: True
pyramid-position-embed: 1_1_1_1 pds-position-embed: 1_1_1_1
pyramid-kernel-sizes: 5_5_5_5 pds-kernel-sizes: 5_5_5_5
pyramid-ffn-ratios: 8_8_8_8 pds-ffn-ratios: 8_8_8_8
pyramid-attn-heads: 4_4_4_4 pds-attn-heads: 4_4_4_4
cl-dropout: True cl-dropout: True
cl-dropout-epoch: 50 cl-dropout-epoch: 50
......
train-subset: train-clean-100
valid-subset: dev-clean
max-epoch: 100
max-update: 100000
num-workers: 8
patience: 10
no-progress-bar: True
log-interval: 100
seed: 1
report-accuracy: True
#load-pretrained-encoder-from:
#load-pretrained-acoustic-encoder-from:
#load-pretrained-text-encoder-from:
#load-pretrained-decoder-from:
#load-pretrained-acoustic-encoder-from: /home/xuchen/st/checkpoints/mustc/asr/1007_st_ctc_baseline/avg_10_checkpoint.pt
#load-pretrained-acoustic-encoder-from: /home/xuchen/st/checkpoints/mustc/asr/1111_st_ctc_conformer_lr0.001/avg_10_checkpoint.pt
#load-pretrained-acoustic-encoder-from: /home/xuchen/st/checkpoints/mustc/asr/1007_st_pyramid4_all256_3333_sr8_ctc/avg_10_checkpoint.pt
#load-pretrained-acoustic-encoder-from: /home/xuchen/st/checkpoints/mustc/asr/1114_st_pyramid4_all256_ctc_fix/avg_10_checkpoint.pt
#load-pretrained-acoustic-encoder-from: /home/xuchen/st/checkpoints/mustc/asr/1015_st_pyramid4_all256_conformer_baseline/avg_10_checkpoint.pt
#load-pretrained-acoustic-encoder-from: /home/xuchen/st/checkpoints/mustc/asr/1111_st_pyramid4_all256_conformer_ctc/avg_10_checkpoint.pt
arch: s2t_sate
share-decoder-input-output-embed: True
optimizer: adam
clip-norm: 10.0
lr-scheduler: inverse_sqrt
warmup-init-lr: 1e-7
warmup-updates: 10000
lr: 2e-3
dropout: 0.1
activation-fn: relu
encoder-ffn-embed-dim: 2048
encoder-layers: 12
text-encoder-layers: 6
decoder-layers: 6
encoder-attention-heads: 4
#macaron-style: True
#use-cnn-module: True
#cnn-module-kernel: 31
#acoustic-encoder: transformer
#acoustic-encoder: conformer
acoustic-encoder: pyramid
adapter: league
#adapter: none
#adapter: context
encoder-embed-dim: 256
pyramid-stages: 4
#pyramid-dropout: 0
pyramid-layers: 3_3_3_3
pyramid-sr-ratios: 2_2_1_2
pyramid-embed-dims: 256_256_256_256
pyramid-fuse: True
pyramid-reduced-embed: conv
pyramid-embed-norm: True
pyramid-position-embed: 1_1_1_1
pyramid-kernel-sizes: 5_5_5_5
pyramid-ffn-ratios: 8_8_8_8
pyramid-heads: 4_4_4_4
\ No newline at end of file
arch: pdss2t_transformer_s_8 arch: pdss2t_transformer_s_8
train-subset: train-clean-100,train-clean-360,train-other-500
valid-subset: dev-clean
max-epoch: 100
max-update: 300000
num-workers: 8
patience: 10
no-progress-bar: True
log-interval: 100
seed: 1
report-accuracy: True
#load-pretrained-encoder-from:
#load-pretrained-decoder-from:
share-decoder-input-output-embed: True share-decoder-input-output-embed: True
optimizer: adam optimizer: adam
clip-norm: 10.0 clip-norm: 10.0
...@@ -37,4 +20,4 @@ encoder-attention-heads: 4 ...@@ -37,4 +20,4 @@ encoder-attention-heads: 4
decoder-embed-dim: 256 decoder-embed-dim: 256
decoder-ffn-embed-dim: 2048 decoder-ffn-embed-dim: 2048
decoder-attention-heads: 4 decoder-attention-heads: 4
\ No newline at end of file
arch: pdss2t_transformer_s_16 arch: pdss2t_transformer_s_16
encoder-embed-dim: 256 encoder-embed-dim: 256
pyramid-stages: 4 pds-stages: 4
#pyramid-dropout: 0 #pds-dropout: 0
pyramid-layers: 2_2_6_2 pds-layers: 2_2_6_2
pyramid-ratios: 2_2_2_2 pds-ratios: 2_2_2_2
pyramid-fusion: True pds-fusion: True
pyramid-fusion-method: all_conv pds-fusion-method: all_conv
pyramid-embed-dims: 256_256_256_256 pds-embed-dims: 256_256_256_256
pyramid-ds-method: conv pds-ds-method: conv
pyramid-embed-norm: True pds-embed-norm: True
pyramid-position-embed: 1_1_1_1 pds-position-embed: 1_1_1_1
pyramid-kernel-sizes: 5_5_5_5 pds-kernel-sizes: 5_5_5_5
pyramid-ffn-ratios: 8_8_8_8 pds-ffn-ratios: 8_8_8_8
pyramid-attn-heads: 4_4_4_4 pds-attn-heads: 4_4_4_4
train-subset: train-clean-100,train-clean-360,train-other-500
valid-subset: dev-clean
max-epoch: 100
max-update: 300000
num-workers: 8
patience: 10
no-progress-bar: True
log-interval: 100
seed: 1
report-accuracy: True
#load-pretrained-encoder-from:
#load-pretrained-decoder-from:
share-decoder-input-output-embed: True share-decoder-input-output-embed: True
optimizer: adam optimizer: adam
...@@ -52,4 +36,4 @@ encoder-attention-heads: 4 ...@@ -52,4 +36,4 @@ encoder-attention-heads: 4
decoder-embed-dim: 256 decoder-embed-dim: 256
decoder-ffn-embed-dim: 2048 decoder-ffn-embed-dim: 2048
decoder-attention-heads: 4 decoder-attention-heads: 4
\ No newline at end of file
arch: pdss2t_transformer_s_32 arch: pdss2t_transformer_s_32
encoder-embed-dim: 256 encoder-embed-dim: 256
pyramid-stages: 5 pds-stages: 5
#pyramid-dropout: 0 #pds-dropout: 0
pyramid-layers: 2_2_3_3_2 pds-layers: 2_2_3_3_2
pyramid-ratios: 2_2_2_2_2 pds-ratios: 2_2_2_2_2
pyramid-fusion: True pds-fusion: True
pyramid-fusion-method: all_conv pds-fusion-method: all_conv
pyramid-embed-dims: 256_256_256_256_256 pds-embed-dims: 256_256_256_256_256
pyramid-ds-method: conv pds-ds-method: conv
pyramid-embed-norm: True pds-embed-norm: True
pyramid-position-embed: 1_1_1_1_1 pds-position-embed: 1_1_1_1_1
pyramid-kernel-sizes: 5_5_5_5_5 pds-kernel-sizes: 5_5_5_5_5
pyramid-ffn-ratios: 8_8_8_8_8 pds-ffn-ratios: 8_8_8_8_8
pyramid-attn-heads: 4_4_4_4_4 pds-attn-heads: 4_4_4_4_4
train-subset: train-clean-100,train-clean-360,train-other-500
valid-subset: dev-clean
max-epoch: 100
max-update: 300000
num-workers: 8
patience: 10
no-progress-bar: True
log-interval: 100
seed: 1
report-accuracy: True
#load-pretrained-encoder-from:
#load-pretrained-decoder-from:
share-decoder-input-output-embed: True share-decoder-input-output-embed: True
optimizer: adam optimizer: adam
...@@ -52,4 +36,4 @@ encoder-attention-heads: 4 ...@@ -52,4 +36,4 @@ encoder-attention-heads: 4
decoder-embed-dim: 256 decoder-embed-dim: 256
decoder-ffn-embed-dim: 2048 decoder-ffn-embed-dim: 2048
decoder-attention-heads: 4 decoder-attention-heads: 4
\ No newline at end of file
arch: pdss2t_transformer_s_8 arch: pdss2t_transformer_s_8
encoder-embed-dim: 256 encoder-embed-dim: 256
pyramid-stages: 4 pds-stages: 4
#pyramid-dropout: 0 #pds-dropout: 0
pyramid-layers: 3_3_3_3 pds-layers: 3_3_3_3
pyramid-ratios: 2_2_1_2 pds-ratios: 2_2_1_2
pyramid-fusion: True pds-fusion: True
pyramid-fusion-method: all_conv pds-fusion-method: all_conv
pyramid-embed-dims: 256_256_256_256 pds-embed-dims: 256_256_256_256
pyramid-ds-method: conv pds-ds-method: conv
pyramid-embed-norm: True pds-embed-norm: True
pyramid-position-embed: 1_1_1_1 pds-position-embed: 1_1_1_1
pyramid-kernel-sizes: 5_5_5_5 pds-kernel-sizes: 5_5_5_5
pyramid-ffn-ratios: 8_8_8_8 pds-ffn-ratios: 8_8_8_8
pyramid-attn-heads: 4_4_4_4 pds-attn-heads: 4_4_4_4
train-subset: train-clean-100,train-clean-360,train-other-500
valid-subset: dev-clean
max-epoch: 100
max-update: 300000
num-workers: 8
patience: 10
no-progress-bar: True
log-interval: 100
seed: 1
report-accuracy: True
#load-pretrained-encoder-from:
#load-pretrained-decoder-from:
share-decoder-input-output-embed: True share-decoder-input-output-embed: True
optimizer: adam optimizer: adam
...@@ -52,4 +36,4 @@ encoder-attention-heads: 4 ...@@ -52,4 +36,4 @@ encoder-attention-heads: 4
decoder-embed-dim: 256 decoder-embed-dim: 256
decoder-ffn-embed-dim: 2048 decoder-ffn-embed-dim: 2048
decoder-attention-heads: 4 decoder-attention-heads: 4
\ No newline at end of file
...@@ -2,22 +2,6 @@ arch: pdss2t_transformer_m_8 ...@@ -2,22 +2,6 @@ arch: pdss2t_transformer_m_8
#arch: pdss2t_transformer_m_16 #arch: pdss2t_transformer_m_16
#arch: pdss2t_transformer_m_32 #arch: pdss2t_transformer_m_32
train-subset: train-clean-100,train-clean-360,train-other-500
valid-subset: dev-clean
max-epoch: 100
max-update: 300000
num-workers: 8
patience: 10
no-progress-bar: True
log-interval: 100
seed: 1
report-accuracy: True
#load-pretrained-encoder-from:
#load-pretrained-decoder-from:
share-decoder-input-output-embed: True share-decoder-input-output-embed: True
optimizer: adam optimizer: adam
clip-norm: 10.0 clip-norm: 10.0
......
arch: pdss2t_transformer_m_16 arch: pdss2t_transformer_m_16
encoder-embed-dim: 512 encoder-embed-dim: 512
pyramid-stages: 4 pds-stages: 4
#pyramid-dropout: 0 #pds-dropout: 0
pyramid-layers: 2_2_6_2 pds-layers: 2_2_6_2
pyramid-ratios: 2_2_2_2 pds-ratios: 2_2_2_2
pyramid-fusion: True pds-fusion: True
pyramid-fusion-method: all_conv pds-fusion-method: all_conv
pyramid-embed-dims: 512_512_512_512 pds-embed-dims: 512_512_512_512
pyramid-ds-method: conv pds-ds-method: conv
pyramid-embed-norm: True pds-embed-norm: True
pyramid-position-embed: 1_1_1_1 pds-position-embed: 1_1_1_1
pyramid-kernel-sizes: 5_5_5_5 pds-kernel-sizes: 5_5_5_5
pyramid-ffn-ratios: 4_4_4_4 pds-ffn-ratios: 4_4_4_4
pyramid-attn-heads: 8_8_8_8 pds-attn-heads: 8_8_8_8
train-subset: train-clean-100,train-clean-360,train-other-500
valid-subset: dev-clean
max-epoch: 100
max-update: 300000
num-workers: 8
patience: 10
no-progress-bar: True
log-interval: 100
seed: 1
report-accuracy: True
#load-pretrained-encoder-from:
#load-pretrained-decoder-from:
share-decoder-input-output-embed: True share-decoder-input-output-embed: True
optimizer: adam optimizer: adam
......
arch: pdss2t_transformer_m_32 arch: pdss2t_transformer_m_32
encoder-embed-dim: 512 encoder-embed-dim: 512
pyramid-stages: 5 pds-stages: 5
#pyramid-dropout: 0 #pds-dropout: 0
pyramid-layers: 2_2_3_3_2 pds-layers: 2_2_3_3_2
pyramid-ratios: 2_2_2_2_2 pds-ratios: 2_2_2_2_2
pyramid-fusion: True pds-fusion: True
pyramid-fusion-method: all_conv pds-fusion-method: all_conv
pyramid-embed-dims: 512_512_512_512_512 pds-embed-dims: 512_512_512_512_512
pyramid-ds-method: conv pds-ds-method: conv
pyramid-embed-norm: True pds-embed-norm: True
pyramid-position-embed: 1_1_1_1_1 pds-position-embed: 1_1_1_1_1
pyramid-kernel-sizes: 5_5_5_5_5 pds-kernel-sizes: 5_5_5_5_5
pyramid-ffn-ratios: 4_4_4_4_4 pds-ffn-ratios: 4_4_4_4_4
pyramid-attn-heads: 8_8_8_8_8 pds-attn-heads: 8_8_8_8_8
train-subset: train-clean-100,train-clean-360,train-other-500
valid-subset: dev-clean
max-epoch: 100
max-update: 300000
num-workers: 8
patience: 10
no-progress-bar: True
log-interval: 100
seed: 1
report-accuracy: True
#load-pretrained-encoder-from:
#load-pretrained-decoder-from:
share-decoder-input-output-embed: True share-decoder-input-output-embed: True
optimizer: adam optimizer: adam
......
arch: pdss2t_transformer_m_8 arch: pdss2t_transformer_m_8
encoder-embed-dim: 512 encoder-embed-dim: 512
pyramid-stages: 4 pds-stages: 4
#pyramid-dropout: 0 #pds-dropout: 0
pyramid-layers: 3_3_3_3 pds-layers: 3_3_3_3
pyramid-ratios: 2_2_1_2 pds-ratios: 2_2_1_2
pyramid-fusion: True pds-fusion: True
pyramid-fusion-method: all_conv pds-fusion-method: all_conv
pyramid-embed-dims: 512_512_512_512 pds-embed-dims: 512_512_512_512
pyramid-ds-method: conv pds-ds-method: conv
pyramid-embed-norm: True pds-embed-norm: True
pyramid-position-embed: 1_1_1_1 pds-position-embed: 1_1_1_1
pyramid-kernel-sizes: 5_5_5_5 pds-kernel-sizes: 5_5_5_5
pyramid-ffn-ratios: 4_4_4_4 pds-ffn-ratios: 4_4_4_4
pyramid-attn-heads: 8_8_8_8 pds-attn-heads: 8_8_8_8
train-subset: train-clean-100,train-clean-360,train-other-500
valid-subset: dev-clean
max-epoch: 100
max-update: 300000
num-workers: 8
patience: 10
no-progress-bar: True
log-interval: 100
seed: 1
report-accuracy: True
#load-pretrained-encoder-from:
#load-pretrained-decoder-from:
share-decoder-input-output-embed: True share-decoder-input-output-embed: True
optimizer: adam optimizer: adam
......
...@@ -2,22 +2,6 @@ arch: pdss2t_transformer_sd_8 ...@@ -2,22 +2,6 @@ arch: pdss2t_transformer_sd_8
#arch: pdss2t_transformer_sd_16 #arch: pdss2t_transformer_sd_16
#arch: pdss2t_transformer_sd_32 #arch: pdss2t_transformer_sd_32
train-subset: train-clean-100,train-clean-360,train-other-500
valid-subset: dev-clean
max-epoch: 100
max-update: 300000
num-workers: 8
patience: 10
no-progress-bar: True
log-interval: 100
seed: 1
report-accuracy: True
#load-pretrained-encoder-from:
#load-pretrained-decoder-from:
share-decoder-input-output-embed: True share-decoder-input-output-embed: True
optimizer: adam optimizer: adam
clip-norm: 10.0 clip-norm: 10.0
......
arch: pdss2t_transformer_sd_16 arch: pdss2t_transformer_sd_16
encoder-embed-dim: 256 encoder-embed-dim: 256
pyramid-stages: 4 pds-stages: 4
#pyramid-dropout: 0 #pds-dropout: 0
pyramid-layers: 5_5_12_8 pds-layers: 5_5_12_8
pyramid-ratios: 2_2_2_2 pds-ratios: 2_2_2_2
pyramid-fusion: True pds-fusion: True
pyramid-fusion-method: all_conv pds-fusion-method: all_conv
pyramid-embed-dims: 256_256_256_256 pds-embed-dims: 256_256_256_256
pyramid-ds-method: conv pds-ds-method: conv
pyramid-embed-norm: True pds-embed-norm: True
pyramid-position-embed: 1_1_1_1 pds-position-embed: 1_1_1_1
pyramid-kernel-sizes: 5_5_5_5 pds-kernel-sizes: 5_5_5_5
pyramid-ffn-ratios: 8_8_8_8 pds-ffn-ratios: 8_8_8_8
pyramid-attn-heads: 4_4_4_4 pds-attn-heads: 4_4_4_4
train-subset: train-clean-100,train-clean-360,train-other-500
valid-subset: dev-clean
max-epoch: 100
max-update: 300000
num-workers: 8
patience: 10
no-progress-bar: True
log-interval: 100
seed: 1
report-accuracy: True
#load-pretrained-encoder-from:
#load-pretrained-decoder-from:
share-decoder-input-output-embed: True share-decoder-input-output-embed: True
optimizer: adam optimizer: adam
......
arch: pdss2t_transformer_sd_32 arch: pdss2t_transformer_sd_32
encoder-embed-dim: 256 encoder-embed-dim: 256
pyramid-stages: 5 pds-stages: 5
#pyramid-dropout: 0 #pds-dropout: 0
pyramid-layers: 5_5_7_7_6 pds-layers: 5_5_7_7_6
pyramid-ratios: 2_2_2_2_2 pds-ratios: 2_2_2_2_2
pyramid-fusion: True pds-fusion: True
pyramid-fusion-method: all_conv pds-fusion-method: all_conv
pyramid-embed-dims: 256_256_256_256_256 pds-embed-dims: 256_256_256_256_256
pyramid-ds-method: conv pds-ds-method: conv
pyramid-embed-norm: True pds-embed-norm: True
pyramid-position-embed: 1_1_1_1_1 pds-position-embed: 1_1_1_1_1
pyramid-kernel-sizes: 5_5_5_5_5 pds-kernel-sizes: 5_5_5_5_5
pyramid-ffn-ratios: 8_8_8_8_8 pds-ffn-ratios: 8_8_8_8_8
pyramid-attn-heads: 4_4_4_4_4 pds-attn-heads: 4_4_4_4_4
train-subset: train-clean-100,train-clean-360,train-other-500
valid-subset: dev-clean
max-epoch: 100
max-update: 300000
num-workers: 8
patience: 10
no-progress-bar: True
log-interval: 100
seed: 1
report-accuracy: True
#load-pretrained-encoder-from:
#load-pretrained-decoder-from:
share-decoder-input-output-embed: True share-decoder-input-output-embed: True
optimizer: adam optimizer: adam
......
arch: pdss2t_transformer_sd_8 arch: pdss2t_transformer_sd_8
encoder-embed-dim: 256 encoder-embed-dim: 256
pyramid-stages: 4 pds-stages: 4
#pyramid-dropout: 0 #pds-dropout: 0
pyramid-layers: 7_7_7_9 pds-layers: 7_7_7_9
pyramid-ratios: 2_2_1_2 pds-ratios: 2_2_1_2
pyramid-fusion: True pds-fusion: True
pyramid-fusion-method: all_conv pds-fusion-method: all_conv
pyramid-embed-dims: 256_256_256_256 pds-embed-dims: 256_256_256_256
pyramid-ds-method: conv pds-ds-method: conv
pyramid-embed-norm: True pds-embed-norm: True
pyramid-position-embed: 1_1_1_1 pds-position-embed: 1_1_1_1
pyramid-kernel-sizes: 5_5_5_5 pds-kernel-sizes: 5_5_5_5
pyramid-ffn-ratios: 8_8_8_8 pds-ffn-ratios: 8_8_8_8
pyramid-attn-heads: 4_4_4_4 pds-attn-heads: 4_4_4_4
train-subset: train-clean-100,train-clean-360,train-other-500
valid-subset: dev-clean
max-epoch: 100
max-update: 300000
num-workers: 8
patience: 10
no-progress-bar: True
log-interval: 100
seed: 1
report-accuracy: True
#load-pretrained-encoder-from:
#load-pretrained-decoder-from:
share-decoder-input-output-embed: True share-decoder-input-output-embed: True
optimizer: adam optimizer: adam
......
...@@ -24,7 +24,8 @@ device=() ...@@ -24,7 +24,8 @@ device=()
gpu_num=8 gpu_num=8
update_freq=1 update_freq=1
root_dir=~/st/Fairseq-S2T root_dir=~/st
code_dir=${root_dir}/Fairseq-S2T
pwd_dir=$PWD pwd_dir=$PWD
# dataset # dataset
...@@ -42,8 +43,8 @@ specific_prefix=valid ...@@ -42,8 +43,8 @@ specific_prefix=valid
specific_dir=/home/xuchen/st/data/mustc/st_lcrm/en-de specific_dir=/home/xuchen/st/data/mustc/st_lcrm/en-de
asr_vocab_prefix=spm_unigram10000_st_share asr_vocab_prefix=spm_unigram10000_st_share
org_data_dir=~/st/data/${dataset} org_data_dir=${root_dir}/data/${dataset}
data_dir=~/st/data/${dataset} data_dir=${root_dir}/data/${dataset}
test_subset=dev-clean,dev-other,test-clean,test-other test_subset=dev-clean,dev-other,test-clean,test-other
# exp # exp
...@@ -81,13 +82,12 @@ fi ...@@ -81,13 +82,12 @@ fi
if [[ -z ${exp_name} ]]; then if [[ -z ${exp_name} ]]; then
config_string=${train_config//,/_} config_string=${train_config//,/_}
# exp_name=${exp_prefix}_$(basename ${train_config%.*})_${exp_tag}
exp_name=${exp_prefix}_${config_string}_${exp_tag} exp_name=${exp_prefix}_${config_string}_${exp_tag}
if [[ -n ${extra_tag} ]]; then if [[ -n ${extra_tag} ]]; then
exp_name=${exp_name}_${extra_tag} exp_name=${exp_name}_${extra_tag}
fi fi
fi fi
model_dir=$root_dir/../checkpoints/$dataset/asr/${exp_name} model_dir=$code_dir/../checkpoints/$dataset/asr/${exp_name}
if [ ${stage} -le -1 ] && [ ${stop_stage} -ge -1 ]; then if [ ${stage} -le -1 ] && [ ${stop_stage} -ge -1 ]; then
echo "stage -1: Data Download" echo "stage -1: Data Download"
...@@ -103,7 +103,7 @@ if [ ${stage} -le 0 ] && [ ${stop_stage} -ge 0 ]; then ...@@ -103,7 +103,7 @@ if [ ${stage} -le 0 ] && [ ${stop_stage} -ge 0 ]; then
mkdir -p ${data_dir} mkdir -p ${data_dir}
fi fi
cmd="python ${root_dir}/examples/speech_to_text/prep_librispeech_data.py cmd="python ${code_dir}/examples/speech_to_text/prep_librispeech_data.py
--data-root ${org_data_dir} --data-root ${org_data_dir}
--output-root ${data_dir} --output-root ${data_dir}
--vocab-type ${vocab_type} --vocab-type ${vocab_type}
...@@ -146,28 +146,26 @@ if [ ${stage} -le 1 ] && [ ${stop_stage} -ge 1 ]; then ...@@ -146,28 +146,26 @@ if [ ${stage} -le 1 ] && [ ${stop_stage} -ge 1 ]; then
cp ${BASH_SOURCE[0]} ${model_dir} cp ${BASH_SOURCE[0]} ${model_dir}
cp ${PWD}/train.sh ${model_dir} cp ${PWD}/train.sh ${model_dir}
extra_parameter="${extra_parameter}
--train-config ${pwd_dir}/conf/basis.yaml"
cp ${pwd_dir}/conf/basis.yaml ${model_dir}
config_list="${train_config//,/ }" config_list="${train_config//,/ }"
idx=0 idx=1
for config in ${config_list[@]} for config in ${config_list[@]}
do do
config_path=$pwd_dir/conf/${config}.yaml config_path=${pwd_dir}/conf/${config}.yaml
if [[ ! -f ${config_path} ]]; then if [[ ! -f ${config_path} ]]; then
echo "No config file ${config_path}" echo "No config file ${config_path}"
exit exit
fi fi
cp ${config_path} ${model_dir} cp ${config_path} ${model_dir}
if [[ idx -eq 0 ]]; then extra_parameter="${extra_parameter}
extra_parameter="${extra_parameter}
--train-config ${config_path}"
else
extra_parameter="${extra_parameter}
--train-config${idx} ${config_path}" --train-config${idx} ${config_path}"
fi
idx=$((idx + 1)) idx=$((idx + 1))
done done
cmd="python3 -u ${root_dir}/fairseq_cli/train.py cmd="python3 -u ${code_dir}/fairseq_cli/train.py
${data_dir} ${data_dir}
--config-yaml ${data_config} --config-yaml ${data_config}
--task ${task} --task ${task}
...@@ -252,9 +250,9 @@ if [ ${stage} -le 2 ] && [ ${stop_stage} -ge 2 ]; then ...@@ -252,9 +250,9 @@ if [ ${stage} -le 2 ] && [ ${stop_stage} -ge 2 ]; then
dec_model=avg_${n_average}_checkpoint.pt dec_model=avg_${n_average}_checkpoint.pt
if [[ ! -f ${model_dir}/${dec_model} ]]; then if [[ ! -f ${model_dir}/${dec_model} ]]; then
cmd="python ${root_dir}/scripts/average_checkpoints.py cmd="python ${code_dir}/scripts/average_checkpoints.py
--inputs ${model_dir} --inputs ${model_dir}
--num-epoch-checkpoints ${n_average} --num-best-checkpoints ${n_average}
--output ${model_dir}/${dec_model}" --output ${model_dir}/${dec_model}"
echo -e "\033[34mRun command: \n${cmd} \033[0m" echo -e "\033[34mRun command: \n${cmd} \033[0m"
[[ $eval -eq 1 ]] && eval $cmd [[ $eval -eq 1 ]] && eval $cmd
...@@ -279,7 +277,7 @@ if [ ${stage} -le 2 ] && [ ${stop_stage} -ge 2 ]; then ...@@ -279,7 +277,7 @@ if [ ${stage} -le 2 ] && [ ${stop_stage} -ge 2 ]; then
test_subset=(${test_subset//,/ }) test_subset=(${test_subset//,/ })
for subset in ${test_subset[@]}; do for subset in ${test_subset[@]}; do
subset=${subset} subset=${subset}
cmd="python ${root_dir}/fairseq_cli/generate.py cmd="python ${code_dir}/fairseq_cli/generate.py
${data_dir} ${data_dir}
--config-yaml ${data_config} --config-yaml ${data_config}
--gen-subset ${subset} --gen-subset ${subset}
......
train-subset: train_asr
valid-subset: dev_asr
max-epoch: 100
max-update: 100000
num-workers: 8
patience: 10
no-progress-bar: True
log-interval: 100
seed: 1
report-accuracy: True
#load-pretrained-encoder-from:
#load-pretrained-decoder-from:
arch: s2t_transformer_s arch: s2t_transformer_s
share-decoder-input-output-embed: True share-decoder-input-output-embed: True
optimizer: adam optimizer: adam
......
train-subset: train
valid-subset: dev
max-epoch: 100
max-update: 100000
patience: 20
best_checkpoint_metric: loss
maximize_best_checkpoint_metric: False
no-epoch-checkpoints: True
#keep-last-epochs: 10
keep-best-checkpoints: 10
num-workers: 8
no-progress-bar: True
log-interval: 100
seed: 1
report-accuracy: True
skip-invalid-size-inputs-valid-test: True
\ No newline at end of file
ctc-weight: 0.3 ctc-weight: 0.3
post-process: sentencepiece
arch: pdss2t_transformer_s_8 arch: pdss2t_transformer_s_8
pds-fusion: True
train-subset: train_asr ctc-layer: 12
valid-subset: dev_asr
max-epoch: 100
max-update: 100000
num-workers: 8
patience: 10
no-progress-bar: True
log-interval: 100
seed: 1
report-accuracy: True
#load-pretrained-encoder-from:
#load-pretrained-decoder-from:
share-decoder-input-output-embed: True share-decoder-input-output-embed: True
optimizer: adam optimizer: adam
......
arch: pdss2t_transformer_s_16 arch: pdss2t_transformer_s_16
encoder-embed-dim: 256 encoder-embed-dim: 256
pyramid-stages: 4 pds-stages: 4
#pyramid-dropout: 0 ctc-layer: 12
pyramid-layers: 2_2_6_2 pds-layers: 2_2_6_2
pyramid-ratios: 2_2_2_2 pds-ratios: 2_2_2_2
pyramid-fusion: True pds-fusion: True
pyramid-fusion-method: all_conv pds-fusion-method: all_conv
pyramid-embed-dims: 256_256_256_256 pds-embed-dims: 256_256_256_256
pyramid-ds-method: conv pds-ds-method: conv
pyramid-embed-norm: True pds-embed-norm: True
pyramid-position-embed: 1_1_1_1 pds-position-embed: 1_1_1_1
pyramid-kernel-sizes: 5_5_5_5 pds-kernel-sizes: 5_5_5_5
pyramid-ffn-ratios: 8_8_8_8 pds-ffn-ratios: 8_8_8_8
pyramid-attn-heads: 4_4_4_4 pds-attn-heads: 4_4_4_4
train-subset: train_asr
valid-subset: dev_asr
max-epoch: 100
max-update: 100000
num-workers: 8
patience: 10
no-progress-bar: True
log-interval: 100
seed: 1
report-accuracy: True
#load-pretrained-encoder-from:
#load-pretrained-decoder-from:
share-decoder-input-output-embed: True share-decoder-input-output-embed: True
optimizer: adam optimizer: adam
......
arch: pdss2t_transformer_s_32 arch: pdss2t_transformer_s_32
encoder-embed-dim: 256 encoder-embed-dim: 256
pyramid-stages: 5 pds-stages: 5
#pyramid-dropout: 0 ctc-layer: 12
pyramid-layers: 2_2_3_3_2 pds-layers: 2_2_3_3_2
pyramid-ratios: 2_2_2_2_2 pds-ratios: 2_2_2_2_2
pyramid-fusion: True pds-fusion: True
pyramid-fusion-method: all_conv pds-fusion-method: all_conv
pyramid-embed-dims: 256_256_256_256_256 pds-embed-dims: 256_256_256_256_256
pyramid-ds-method: conv pds-ds-method: conv
pyramid-embed-norm: True pds-embed-norm: True
pyramid-position-embed: 1_1_1_1_1 pds-position-embed: 1_1_1_1_1
pyramid-kernel-sizes: 5_5_5_5_5 pds-kernel-sizes: 5_5_5_5_5
pyramid-ffn-ratios: 8_8_8_8_8 pds-ffn-ratios: 8_8_8_8_8
pyramid-attn-heads: 4_4_4_4_4 pds-attn-heads: 4_4_4_4_4
train-subset: train_asr
valid-subset: dev_asr
max-epoch: 100
max-update: 100000
num-workers: 8
patience: 10
no-progress-bar: True
log-interval: 100
seed: 1
report-accuracy: True
#load-pretrained-encoder-from:
#load-pretrained-decoder-from:
share-decoder-input-output-embed: True share-decoder-input-output-embed: True
optimizer: adam optimizer: adam
......
arch: pdss2t_transformer_s_8 arch: pdss2t_transformer_s_8
encoder-embed-dim: 256 encoder-embed-dim: 256
pyramid-stages: 4 pds-stages: 4
#pyramid-dropout: 0 ctc-layer: 12
pyramid-layers: 3_3_3_3 pds-layers: 3_3_3_3
pyramid-ratios: 2_2_1_2 pds-ratios: 2_2_1_2
pyramid-fusion: True pds-fusion: True
pyramid-fusion-method: all_conv pds-fusion-method: all_conv
pyramid-embed-dims: 256_256_256_256 pds-embed-dims: 256_256_256_256
pyramid-ds-method: conv pds-ds-method: conv
pyramid-embed-norm: True pds-embed-norm: True
pyramid-position-embed: 1_1_1_1 pds-position-embed: 1_1_1_1
pyramid-kernel-sizes: 5_5_5_5 pds-kernel-sizes: 5_5_5_5
pyramid-ffn-ratios: 8_8_8_8 pds-ffn-ratios: 8_8_8_8
pyramid-attn-heads: 4_4_4_4 pds-attn-heads: 4_4_4_4
train-subset: train_asr
valid-subset: dev_asr
max-epoch: 100
max-update: 100000
num-workers: 8
patience: 10
no-progress-bar: True
log-interval: 100
seed: 1
report-accuracy: True
#load-pretrained-encoder-from:
#load-pretrained-decoder-from:
share-decoder-input-output-embed: True share-decoder-input-output-embed: True
optimizer: adam optimizer: adam
......
...@@ -24,7 +24,8 @@ stop_stage=0 ...@@ -24,7 +24,8 @@ stop_stage=0
gpu_num=8 gpu_num=8
update_freq=1 update_freq=1
root_dir=~/st/Fairseq-S2T root_dir=~/st
code_dir=${root_dir}/Fairseq-S2T
pwd_dir=$PWD pwd_dir=$PWD
# dataset # dataset
...@@ -41,13 +42,16 @@ lcrm=0 ...@@ -41,13 +42,16 @@ lcrm=0
tokenizer=0 tokenizer=0
use_raw_audio=0 use_raw_audio=0
use_specific_dict=0 use_specific_dict=1
specific_prefix=st specific_prefix=st
specific_dir=/home/xuchen/st/data/mustc/st/en-de specific_dir=${root_dir}/data/mustc/st
asr_vocab_prefix=spm_unigram10000_st_share asr_vocab_prefix=spm_unigram10000_st_share
org_data_dir=~/st/data/${dataset} org_data_dir=${root_dir}/data/${dataset}
data_dir=~/st/data/${dataset}/asr data_dir=${root_dir}/data/${dataset}/asr
train_split=train
valid_split=dev
test_split=tst-COMMON
test_subset=tst-COMMON test_subset=tst-COMMON
# exp # exp
...@@ -59,7 +63,7 @@ exp_name= ...@@ -59,7 +63,7 @@ exp_name=
# config # config
train_config=ctc train_config=ctc
data_config=config_asr.yaml data_config=config.yaml
# training setting # training setting
fp16=1 fp16=1
...@@ -97,13 +101,12 @@ fi ...@@ -97,13 +101,12 @@ fi
if [[ -z ${exp_name} ]]; then if [[ -z ${exp_name} ]]; then
config_string=${train_config//,/_} config_string=${train_config//,/_}
# exp_name=${exp_prefix}_$(basename ${train_config%.*})_${exp_tag}
exp_name=${exp_prefix}_${config_string}_${exp_tag} exp_name=${exp_prefix}_${config_string}_${exp_tag}
if [[ -n ${extra_tag} ]]; then if [[ -n ${extra_tag} ]]; then
exp_name=${exp_name}_${extra_tag} exp_name=${exp_name}_${extra_tag}
fi fi
fi fi
model_dir=$root_dir/../checkpoints/$dataset/asr/${exp_name} model_dir=${code_dir}/../checkpoints/${dataset}/asr/${exp_name}
if [ ${stage} -le -1 ] && [ ${stop_stage} -ge -1 ]; then if [ ${stage} -le -1 ] && [ ${stop_stage} -ge -1 ]; then
echo "stage -1: Data Download" echo "stage -1: Data Download"
...@@ -114,11 +117,23 @@ if [ ${stage} -le 0 ] && [ ${stop_stage} -ge 0 ]; then ...@@ -114,11 +117,23 @@ if [ ${stage} -le 0 ] && [ ${stop_stage} -ge 0 ]; then
### Task dependent. You have to make data the following preparation part by yourself. ### Task dependent. You have to make data the following preparation part by yourself.
### But you can utilize Kaldi recipes in most cases ### But you can utilize Kaldi recipes in most cases
echo "stage 0: ASR Data Preparation" echo "stage 0: ASR Data Preparation"
if [[ ! -e ${data_dir} ]]; then
mkdir -p ${data_dir}
fi
feature_zip=fbank80.zip
if [[ ${speed_perturb} -eq 1 ]]; then
feature_zip=fbank80_sp.zip
fi
if [[ ! -f ${data_dir}/${feature_zip} && -f ${data_dir}/../feature_zip ]]; then
ln -s ${data_dir}/../feature_zip ${data_dir}
fi
cmd="python ${root_dir}/examples/speech_to_text/prep_mustc_data.py cmd="python ${code_dir}/examples/speech_to_text/prep_audio_data.py
--data-root ${org_data_dir} --data-root ${org_data_dir}
--output-root ${data_dir} --output-root ${data_dir}
--task asr --task asr
--src-lang ${src_lang}
--splits ${valid_split},${test_split},${train_split}
--vocab-type ${vocab_type} --vocab-type ${vocab_type}
--vocab-size ${vocab_size}" --vocab-size ${vocab_size}"
...@@ -127,7 +142,7 @@ if [ ${stage} -le 0 ] && [ ${stop_stage} -ge 0 ]; then ...@@ -127,7 +142,7 @@ if [ ${stage} -le 0 ] && [ ${stop_stage} -ge 0 ]; then
--raw" --raw"
fi fi
if [[ ${use_specific_dict} -eq 1 ]]; then if [[ ${use_specific_dict} -eq 1 ]]; then
cp -r ${specific_dir}/${asr_vocab_prefix}.* ${data_dir}/${lang} cp -r ${specific_dir}/${asr_vocab_prefix}.* ${data_dir}
cmd="$cmd cmd="$cmd
--asr-prefix ${asr_vocab_prefix}" --asr-prefix ${asr_vocab_prefix}"
fi fi
...@@ -147,6 +162,11 @@ if [ ${stage} -le 0 ] && [ ${stop_stage} -ge 0 ]; then ...@@ -147,6 +162,11 @@ if [ ${stage} -le 0 ] && [ ${stop_stage} -ge 0 ]; then
echo -e "\033[34mRun command: \n${cmd} \033[0m" echo -e "\033[34mRun command: \n${cmd} \033[0m"
[[ $eval -eq 1 ]] && eval ${cmd} [[ $eval -eq 1 ]] && eval ${cmd}
if [[ -f ${data_dir}/${feature_zip} && ! -f ${data_dir}/../${feature_zip} ]]; then
mv ${data_dir}/${feature_zip} ${data_dir}/..
ln -s ${data_dir}/../${feature_zip} ${data_dir}
fi
fi fi
if [ ${stage} -le 1 ] && [ ${stop_stage} -ge 1 ]; then if [ ${stage} -le 1 ] && [ ${stop_stage} -ge 1 ]; then
...@@ -173,28 +193,26 @@ if [ ${stage} -le 1 ] && [ ${stop_stage} -ge 1 ]; then ...@@ -173,28 +193,26 @@ if [ ${stage} -le 1 ] && [ ${stop_stage} -ge 1 ]; then
cp ${BASH_SOURCE[0]} ${model_dir} cp ${BASH_SOURCE[0]} ${model_dir}
cp ${PWD}/train.sh ${model_dir} cp ${PWD}/train.sh ${model_dir}
extra_parameter="${extra_parameter}
--train-config ${pwd_dir}/conf/basis.yaml"
cp ${pwd_dir}/conf/basis.yaml ${model_dir}
config_list="${train_config//,/ }" config_list="${train_config//,/ }"
idx=0 idx=1
for config in ${config_list[@]} for config in ${config_list[@]}
do do
config_path=$pwd_dir/conf/${config}.yaml config_path=${pwd_dir}/conf/${config}.yaml
if [[ ! -f ${config_path} ]]; then if [[ ! -f ${config_path} ]]; then
echo "No config file ${config_path}" echo "No config file ${config_path}"
exit exit
fi fi
cp ${config_path} ${model_dir} cp ${config_path} ${model_dir}
if [[ idx -eq 0 ]]; then extra_parameter="${extra_parameter}
extra_parameter="${extra_parameter}
--train-config ${config_path}"
else
extra_parameter="${extra_parameter}
--train-config${idx} ${config_path}" --train-config${idx} ${config_path}"
fi
idx=$((idx + 1)) idx=$((idx + 1))
done done
cmd="python3 -u ${root_dir}/fairseq_cli/train.py cmd="python3 -u ${code_dir}/fairseq_cli/train.py
${data_dir} ${data_dir}
--config-yaml ${data_config} --config-yaml ${data_config}
--task ${task} --task ${task}
...@@ -278,12 +296,14 @@ if [ ${stage} -le 2 ] && [ ${stop_stage} -ge 2 ]; then ...@@ -278,12 +296,14 @@ if [ ${stage} -le 2 ] && [ ${stop_stage} -ge 2 ]; then
# Average models # Average models
dec_model=avg_${n_average}_checkpoint.pt dec_model=avg_${n_average}_checkpoint.pt
cmd="python ${root_dir}/scripts/average_checkpoints.py if [[ ! -f ${model_dir}/${dec_model} ]]; then
--inputs ${model_dir} cmd="python ${code_dir}/scripts/average_checkpoints.py
--num-epoch-checkpoints ${n_average} --inputs ${model_dir}
--output ${model_dir}/${dec_model}" --num-best-checkpoints ${n_average}
echo -e "\033[34mRun command: \n${cmd} \033[0m" --output ${model_dir}/${dec_model}"
[[ $eval -eq 1 ]] && eval $cmd echo -e "\033[34mRun command: \n${cmd} \033[0m"
[[ $eval -eq 1 ]] && eval $cmd
fi
else else
dec_model=${dec_model} dec_model=${dec_model}
fi fi
...@@ -303,8 +323,8 @@ if [ ${stage} -le 2 ] && [ ${stop_stage} -ge 2 ]; then ...@@ -303,8 +323,8 @@ if [ ${stage} -le 2 ] && [ ${stop_stage} -ge 2 ]; then
test_subset=${test_subset//,/ } test_subset=${test_subset//,/ }
for subset in ${test_subset[@]}; do for subset in ${test_subset[@]}; do
subset=${subset}_asr subset=${subset}
cmd="python ${root_dir}/fairseq_cli/generate.py cmd="python ${code_dir}/fairseq_cli/generate.py
${data_dir} ${data_dir}
--config-yaml ${data_config} --config-yaml ${data_config}
--gen-subset ${subset} --gen-subset ${subset}
......
train-subset: train
valid-subset: valid
max-epoch: 50
max-update: 100000
num-workers: 8
patience: 10
no-progress-bar: True
log-interval: 100
seed: 1
report-accuracy: True
skip-invalid-size-inputs-valid-test: True
#load-pretrained-encoder-from:
#load-pretrained-decoder-from:
arch: transformer arch: transformer
share-decoder-input-output-embed: True share-decoder-input-output-embed: True
optimizer: adam optimizer: adam
......
train-subset: train
valid-subset: valid
max-epoch: 50
max-update: 100000
num-workers: 8
patience: 10
no-progress-bar: True
log-interval: 100
seed: 1
report-accuracy: True
skip-invalid-size-inputs-valid-test: True
#load-pretrained-encoder-from:
#load-pretrained-decoder-from:
arch: transformer arch: transformer
share-decoder-input-output-embed: True share-decoder-input-output-embed: True
optimizer: adam optimizer: adam
......
train-subset: train
valid-subset: valid
max-epoch: 50
max-update: 100000
patience: 20
best_checkpoint_metric: loss
maximize_best_checkpoint_metric: False
no-epoch-checkpoints: True
#keep-last-epochs: 10
keep-best-checkpoints: 10
num-workers: 8
no-progress-bar: True
log-interval: 100
seed: 1
report-accuracy: True
skip-invalid-size-inputs-valid-test: True
\ No newline at end of file
...@@ -10,6 +10,7 @@ if [ "$#" -eq 1 ]; then ...@@ -10,6 +10,7 @@ if [ "$#" -eq 1 ]; then
exp_name=$1 exp_name=$1
fi fi
sacrebleu=1
n_average=10 n_average=10
beam_size=5 beam_size=5
len_penalty=1.0 len_penalty=1.0
...@@ -21,6 +22,7 @@ cmd="./run.sh ...@@ -21,6 +22,7 @@ cmd="./run.sh
--stop_stage 2 --stop_stage 2
--gpu_num ${gpu_num} --gpu_num ${gpu_num}
--exp_name ${exp_name} --exp_name ${exp_name}
--sacrebleu ${sacrebleu}
--n_average ${n_average} --n_average ${n_average}
--beam_size ${beam_size} --beam_size ${beam_size}
--len_penalty ${len_penalty} --len_penalty ${len_penalty}
......
...@@ -13,7 +13,7 @@ set -o pipefail ...@@ -13,7 +13,7 @@ set -o pipefail
export PYTHONIOENCODING=UTF-8 export PYTHONIOENCODING=UTF-8
eval=1 eval=1
time=$(date "+%m%d_%H%M") time=$(date "+%m%d")
stage=0 stage=0
stop_stage=0 stop_stage=0
...@@ -24,7 +24,8 @@ device=() ...@@ -24,7 +24,8 @@ device=()
gpu_num=8 gpu_num=8
update_freq=1 update_freq=1
root_dir=~/st/Fairseq-S2T root_dir=~/st
code_dir=${root_dir}/Fairseq-S2T
pwd_dir=$PWD pwd_dir=$PWD
# dataset # dataset
...@@ -42,12 +43,12 @@ tokenizer=0 ...@@ -42,12 +43,12 @@ tokenizer=0
use_specific_dict=0 use_specific_dict=0
specific_prefix=st specific_prefix=st
specific_dir=/home/xuchen/st/data/mustc/st/en-de/ specific_dir=${root_dir}/data/mustc/st
src_vocab_prefix=spm_unigram10000_st_share src_vocab_prefix=spm_unigram10000_st_share
tgt_vocab_prefix=spm_unigram10000_st_share tgt_vocab_prefix=spm_unigram10000_st_share
org_data_dir=~/st/data/${dataset} org_data_dir=${root_dir}/data/${dataset}
data_dir=~/st/data/${dataset}/mt/${lang} data_dir=${root_dir}/data/${dataset}/mt
train_subset=train train_subset=train
valid_subset=dev valid_subset=dev
trans_subset=tst-COMMON trans_subset=tst-COMMON
...@@ -70,6 +71,7 @@ step_valid=0 ...@@ -70,6 +71,7 @@ step_valid=0
bleu_valid=0 bleu_valid=0
# decoding setting # decoding setting
sacrebleu=1
dec_model=checkpoint_best.pt dec_model=checkpoint_best.pt
n_average=10 n_average=10
beam_size=5 beam_size=5
...@@ -106,7 +108,6 @@ fi ...@@ -106,7 +108,6 @@ fi
# full path # full path
if [[ -z ${exp_name} ]]; then if [[ -z ${exp_name} ]]; then
config_string=${train_config//,/_} config_string=${train_config//,/_}
# exp_name=${exp_prefix}_$(basename ${train_config%.*})_${exp_tag}
exp_name=${exp_prefix}_${config_string}_${exp_tag} exp_name=${exp_prefix}_${config_string}_${exp_tag}
if [[ -n ${extra_tag} ]]; then if [[ -n ${extra_tag} ]]; then
exp_name=${exp_name}_${extra_tag} exp_name=${exp_name}_${extra_tag}
...@@ -128,7 +129,7 @@ if [ ${stage} -le 0 ] && [ ${stop_stage} -ge 0 ]; then ...@@ -128,7 +129,7 @@ if [ ${stage} -le 0 ] && [ ${stop_stage} -ge 0 ]; then
if [[ ! -f ${data_dir}/${src_vocab_prefix}.txt || ! -f ${data_dir}/${tgt_vocab_prefix}.txt ]]; then if [[ ! -f ${data_dir}/${src_vocab_prefix}.txt || ! -f ${data_dir}/${tgt_vocab_prefix}.txt ]]; then
if [[ ${use_specific_dict} -eq 0 ]]; then if [[ ${use_specific_dict} -eq 0 ]]; then
cmd="python ${root_dir}/examples/speech_to_text/prep_mt_data.py cmd="python ${code_dir}/examples/speech_to_text/prep_mt_data.py
--data-root ${org_data_dir} --data-root ${org_data_dir}
--output-root ${data_dir} --output-root ${data_dir}
--splits ${train_subset},${valid_subset},${trans_subset} --splits ${train_subset},${valid_subset},${trans_subset}
...@@ -151,9 +152,10 @@ if [ ${stage} -le 0 ] && [ ${stop_stage} -ge 0 ]; then ...@@ -151,9 +152,10 @@ if [ ${stage} -le 0 ] && [ ${stop_stage} -ge 0 ]; then
mkdir -p ${data_dir}/data mkdir -p ${data_dir}/data
for split in ${train_subset} ${valid_subset} ${trans_subset}; do for split in ${train_subset} ${valid_subset} ${trans_subset}; do
{ {
cmd="cat ${org_data_dir}/${lang}/data/${split}/txt/${split}.${src_lang}" txt_dir=${org_data_dir}/data/${split}/txt
cmd="cat ${txt_dir}/${split}.${src_lang}"
if [[ ${lcrm} -eq 1 ]]; then if [[ ${lcrm} -eq 1 ]]; then
cmd="python local/lower_rm.py ${org_data_dir}/${lang}/data/${split}.${src_lang}" cmd="python local/lower_rm.py ${org_data_dir}/data/${split}.${src_lang}"
fi fi
cmd="${cmd} cmd="${cmd}
| spm_encode --model ${data_dir}/${src_vocab_prefix}.model | spm_encode --model ${data_dir}/${src_vocab_prefix}.model
...@@ -166,7 +168,7 @@ if [ ${stage} -le 0 ] && [ ${stop_stage} -ge 0 ]; then ...@@ -166,7 +168,7 @@ if [ ${stage} -le 0 ] && [ ${stop_stage} -ge 0 ]; then
cmd="spm_encode cmd="spm_encode
--model ${data_dir}/${tgt_vocab_prefix}.model --model ${data_dir}/${tgt_vocab_prefix}.model
--output_format=piece --output_format=piece
< ${org_data_dir}/${lang}/data/${split}/txt/${split}.${tgt_lang} < ${txt_dir}/${split}.${tgt_lang}
> ${data_dir}/data/${split}.${tgt_lang}" > ${data_dir}/data/${split}.${tgt_lang}"
echo -e "\033[34mRun command: \n${cmd} \033[0m" echo -e "\033[34mRun command: \n${cmd} \033[0m"
...@@ -175,7 +177,7 @@ if [ ${stage} -le 0 ] && [ ${stop_stage} -ge 0 ]; then ...@@ -175,7 +177,7 @@ if [ ${stage} -le 0 ] && [ ${stop_stage} -ge 0 ]; then
done done
wait wait
cmd="python ${root_dir}/fairseq_cli/preprocess.py cmd="python ${code_dir}/fairseq_cli/preprocess.py
--source-lang ${src_lang} --target-lang ${tgt_lang} --source-lang ${src_lang} --target-lang ${tgt_lang}
--trainpref ${data_dir}/data/${train_subset} --trainpref ${data_dir}/data/${train_subset}
--validpref ${data_dir}/data/${valid_subset} --validpref ${data_dir}/data/${valid_subset}
...@@ -215,28 +217,26 @@ if [ ${stage} -le 1 ] && [ ${stop_stage} -ge 1 ]; then ...@@ -215,28 +217,26 @@ if [ ${stage} -le 1 ] && [ ${stop_stage} -ge 1 ]; then
cp ${BASH_SOURCE[0]} ${model_dir} cp ${BASH_SOURCE[0]} ${model_dir}
cp ${PWD}/train.sh ${model_dir} cp ${PWD}/train.sh ${model_dir}
extra_parameter="${extra_parameter}
--train-config ${pwd_dir}/conf/basis.yaml"
cp ${pwd_dir}/conf/basis.yaml ${model_dir}
config_list="${train_config//,/ }" config_list="${train_config//,/ }"
idx=0 idx=1
for config in ${config_list[@]} for config in ${config_list[@]}
do do
config_path=$pwd_dir/conf/${config}.yaml config_path=${pwd_dir}/conf/${config}.yaml
if [[ ! -f ${config_path} ]]; then if [[ ! -f ${config_path} ]]; then
echo "No config file ${config_path}" echo "No config file ${config_path}"
exit exit
fi fi
cp ${config_path} ${model_dir} cp ${config_path} ${model_dir}
if [[ idx -eq 0 ]]; then extra_parameter="${extra_parameter}
extra_parameter="${extra_parameter}
--train-config ${config_path}"
else
extra_parameter="${extra_parameter}
--train-config${idx} ${config_path}" --train-config${idx} ${config_path}"
fi
idx=$((idx + 1)) idx=$((idx + 1))
done done
cmd="python3 -u ${root_dir}/fairseq_cli/train.py cmd="python3 -u ${code_dir}/fairseq_cli/train.py
${data_dir} ${data_dir}
--source-lang ${src_lang} --source-lang ${src_lang}
--target-lang ${tgt_lang} --target-lang ${tgt_lang}
...@@ -330,12 +330,14 @@ if [ ${stage} -le 2 ] && [ ${stop_stage} -ge 2 ]; then ...@@ -330,12 +330,14 @@ if [ ${stage} -le 2 ] && [ ${stop_stage} -ge 2 ]; then
# Average models # Average models
dec_model=avg_${n_average}_checkpoint.pt dec_model=avg_${n_average}_checkpoint.pt
cmd="python ${root_dir}/scripts/average_checkpoints.py if [[ ! -f ${model_dir}/${dec_model} ]]; then
--inputs ${model_dir} cmd="python ${code_dir}/scripts/average_checkpoints.py
--num-epoch-checkpoints ${n_average} --inputs ${model_dir}
--output ${model_dir}/${dec_model}" --num-best-checkpoints ${n_average}
echo -e "\033[34mRun command: \n${cmd} \033[0m" --output ${model_dir}/${dec_model}"
[[ $eval -eq 1 ]] && eval $cmd echo -e "\033[34mRun command: \n${cmd} \033[0m"
[[ $eval -eq 1 ]] && eval $cmd
fi
else else
dec_model=${dec_model} dec_model=${dec_model}
fi fi
...@@ -355,7 +357,7 @@ if [ ${stage} -le 2 ] && [ ${stop_stage} -ge 2 ]; then ...@@ -355,7 +357,7 @@ if [ ${stage} -le 2 ] && [ ${stop_stage} -ge 2 ]; then
test_subset=(${test_subset//,/ }) test_subset=(${test_subset//,/ })
for subset in ${test_subset[@]}; do for subset in ${test_subset[@]}; do
cmd="python ${root_dir}/fairseq_cli/generate.py cmd="python ${code_dir}/fairseq_cli/generate.py
${data_dir} ${data_dir}
--source-lang ${src_lang} --source-lang ${src_lang}
--target-lang ${tgt_lang} --target-lang ${tgt_lang}
...@@ -366,14 +368,17 @@ if [ ${stage} -le 2 ] && [ ${stop_stage} -ge 2 ]; then ...@@ -366,14 +368,17 @@ if [ ${stage} -le 2 ] && [ ${stop_stage} -ge 2 ]; then
--max-tokens ${max_tokens} --max-tokens ${max_tokens}
--beam ${beam_size} --beam ${beam_size}
--lenpen ${len_penalty} --lenpen ${len_penalty}
--post-process sentencepiece --post-process sentencepiece"
--scoring sacrebleu"
if [[ ${tokenizer} -eq 1 ]]; then if [[ ${sacrebleu} -eq 1 ]]; then
cmd="${cmd}
--scoring sacrebleu"
if [[ ${tokenizer} -eq 1 ]]; then
cmd="${cmd} cmd="${cmd}
--tokenizer moses --tokenizer moses
--moses-source-lang ${src_lang} --moses-source-lang ${src_lang}
--moses-target-lang ${tgt_lang}" --moses-target-lang ${tgt_lang}"
fi
fi fi
echo -e "\033[34mRun command: \n${cmd} \033[0m" echo -e "\033[34mRun command: \n${cmd} \033[0m"
......
train-subset: train_st
valid-subset: dev_st
max-epoch: 100
max-update: 100000
num-workers: 8
patience: 10
no-progress-bar: True
log-interval: 100
seed: 1
report-accuracy: True
#load-pretrained-encoder-from:
#load-pretrained-decoder-from:
arch: s2t_transformer_s arch: s2t_transformer_s
share-decoder-input-output-embed: True share-decoder-input-output-embed: True
optimizer: adam optimizer: adam
...@@ -42,3 +26,6 @@ decoder-ffn-embed-dim: 2048 ...@@ -42,3 +26,6 @@ decoder-ffn-embed-dim: 2048
decoder-attention-heads: 4 decoder-attention-heads: 4
attention-dropout: 0.1 attention-dropout: 0.1
activation-dropout: 0.1 activation-dropout: 0.1
#load-pretrained-encoder-from:
#load-pretrained-decoder-from:
\ No newline at end of file
train-subset: train
valid-subset: dev
max-epoch: 100
max-update: 100000
patience: 20
best_checkpoint_metric: loss
maximize_best_checkpoint_metric: False
no-epoch-checkpoints: True
#keep-last-epochs: 10
keep-best-checkpoints: 10
num-workers: 8
no-progress-bar: True
log-interval: 100
seed: 1
report-accuracy: True
skip-invalid-size-inputs-valid-test: True
\ No newline at end of file
ctc-weight: 0.3 ctc-weight: 0.3
\ No newline at end of file post-process: sentencepiece
\ No newline at end of file
arch: pdss2t_transformer_s_8 arch: pdss2t_transformer_s_8
pds-fusion: True
train-subset: train_st ctc-layer: 12
valid-subset: dev_st
max-epoch: 100
max-update: 100000
num-workers: 8
patience: 10
no-progress-bar: True
log-interval: 100
seed: 1
report-accuracy: True
#load-pretrained-encoder-from:
#load-pretrained-decoder-from:
share-decoder-input-output-embed: True share-decoder-input-output-embed: True
optimizer: adam optimizer: adam
...@@ -38,3 +24,6 @@ encoder-attention-heads: 4 ...@@ -38,3 +24,6 @@ encoder-attention-heads: 4
decoder-embed-dim: 256 decoder-embed-dim: 256
decoder-ffn-embed-dim: 2048 decoder-ffn-embed-dim: 2048
decoder-attention-heads: 4 decoder-attention-heads: 4
#load-pretrained-encoder-from:
#load-pretrained-decoder-from:
\ No newline at end of file
arch: pdss2t_transformer_s_16 arch: pdss2t_transformer_s_16
encoder-embed-dim: 256 encoder-embed-dim: 256
pyramid-stages: 4 pds-stages: 4
#pyramid-dropout: 0 ctc-layer: 12
pyramid-layers: 2_2_6_2 pds-layers: 2_2_6_2
pyramid-ratios: 2_2_2_2 pds-ratios: 2_2_2_2
pyramid-fusion: True pds-fusion: True
pyramid-fusion-method: all_conv pds-fusion-method: all_conv
pyramid-embed-dims: 256_256_256_256 pds-embed-dims: 256_256_256_256
pyramid-ds-method: conv pds-ds-method: conv
pyramid-embed-norm: True pds-embed-norm: True
pyramid-position-embed: 1_1_1_1 pds-position-embed: 1_1_1_1
pyramid-kernel-sizes: 5_5_5_5 pds-kernel-sizes: 5_5_5_5
pyramid-ffn-ratios: 8_8_8_8 pds-ffn-ratios: 8_8_8_8
pyramid-attn-heads: 4_4_4_4 pds-attn-heads: 4_4_4_4
train-subset: train_st
valid-subset: dev_st
max-epoch: 100
max-update: 100000
num-workers: 8
patience: 10
no-progress-bar: True
log-interval: 100
seed: 1
report-accuracy: True
#load-pretrained-encoder-from:
#load-pretrained-decoder-from:
share-decoder-input-output-embed: True share-decoder-input-output-embed: True
optimizer: adam optimizer: adam
...@@ -53,3 +37,6 @@ encoder-attention-heads: 4 ...@@ -53,3 +37,6 @@ encoder-attention-heads: 4
decoder-embed-dim: 256 decoder-embed-dim: 256
decoder-ffn-embed-dim: 2048 decoder-ffn-embed-dim: 2048
decoder-attention-heads: 4 decoder-attention-heads: 4
#load-pretrained-encoder-from:
#load-pretrained-decoder-from:
\ No newline at end of file
arch: pdss2t_transformer_s_32 arch: pdss2t_transformer_s_32
encoder-embed-dim: 256 encoder-embed-dim: 256
pyramid-stages: 5 pds-stages: 5
#pyramid-dropout: 0 ctc-layer: 12
pyramid-layers: 2_2_3_3_2 pds-layers: 2_2_3_3_2
pyramid-ratios: 2_2_2_2_2 pds-ratios: 2_2_2_2_2
pyramid-fusion: True pds-fusion: True
pyramid-fusion-method: all_conv pds-fusion-method: all_conv
pyramid-embed-dims: 256_256_256_256_256 pds-embed-dims: 256_256_256_256_256
pyramid-ds-method: conv pds-ds-method: conv
pyramid-embed-norm: True pds-embed-norm: True
pyramid-position-embed: 1_1_1_1_1 pds-position-embed: 1_1_1_1_1
pyramid-kernel-sizes: 5_5_5_5_5 pds-kernel-sizes: 5_5_5_5_5
pyramid-ffn-ratios: 8_8_8_8_8 pds-ffn-ratios: 8_8_8_8_8
pyramid-attn-heads: 4_4_4_4_4 pds-attn-heads: 4_4_4_4_4
train-subset: train_st
valid-subset: dev_st
max-epoch: 100
max-update: 100000
num-workers: 8
patience: 10
no-progress-bar: True
log-interval: 100
seed: 1
report-accuracy: True
#load-pretrained-encoder-from:
#load-pretrained-decoder-from:
share-decoder-input-output-embed: True share-decoder-input-output-embed: True
optimizer: adam optimizer: adam
...@@ -53,3 +37,6 @@ encoder-attention-heads: 4 ...@@ -53,3 +37,6 @@ encoder-attention-heads: 4
decoder-embed-dim: 256 decoder-embed-dim: 256
decoder-ffn-embed-dim: 2048 decoder-ffn-embed-dim: 2048
decoder-attention-heads: 4 decoder-attention-heads: 4
#load-pretrained-encoder-from:
#load-pretrained-decoder-from:
\ No newline at end of file
arch: pdss2t_transformer_s_8 arch: pdss2t_transformer_s_8
encoder-embed-dim: 256 encoder-embed-dim: 256
pyramid-stages: 4 pds-stages: 4
#pyramid-dropout: 0 ctc-layer: 12
pyramid-layers: 3_3_3_3 pds-layers: 3_3_3_3
pyramid-ratios: 2_2_1_2 pds-ratios: 2_2_1_2
pyramid-fusion: True pds-fusion: True
pyramid-fusion-method: all_conv pds-fusion-method: all_conv
pyramid-embed-dims: 256_256_256_256 pds-embed-dims: 256_256_256_256
pyramid-ds-method: conv pds-ds-method: conv
pyramid-embed-norm: True pds-embed-norm: True
pyramid-position-embed: 1_1_1_1 pds-position-embed: 1_1_1_1
pyramid-kernel-sizes: 5_5_5_5 pds-kernel-sizes: 5_5_5_5
pyramid-ffn-ratios: 8_8_8_8 pds-ffn-ratios: 8_8_8_8
pyramid-attn-heads: 4_4_4_4 pds-attn-heads: 4_4_4_4
train-subset: train_st
valid-subset: dev_st
max-epoch: 100
max-update: 100000
num-workers: 8
patience: 10
no-progress-bar: True
log-interval: 100
seed: 1
report-accuracy: True
#load-pretrained-encoder-from:
#load-pretrained-decoder-from:
share-decoder-input-output-embed: True share-decoder-input-output-embed: True
optimizer: adam optimizer: adam
...@@ -53,3 +37,6 @@ encoder-attention-heads: 4 ...@@ -53,3 +37,6 @@ encoder-attention-heads: 4
decoder-embed-dim: 256 decoder-embed-dim: 256
decoder-ffn-embed-dim: 2048 decoder-ffn-embed-dim: 2048
decoder-attention-heads: 4 decoder-attention-heads: 4
#load-pretrained-encoder-from:
#load-pretrained-decoder-from:
\ No newline at end of file
train-subset: train_st
valid-subset: dev_st
max-epoch: 100
max-update: 100000
num-workers: 8
patience: 10
no-progress-bar: True
log-interval: 100
seed: 1
report-accuracy: True
#load-pretrained-encoder-from:
#load-pretrained-acoustic-encoder-from:
#load-pretrained-text-encoder-from:
#load-pretrained-decoder-from:
arch: s2t_sate arch: s2t_sate
share-decoder-input-output-embed: True share-decoder-input-output-embed: True
optimizer: adam optimizer: adam
...@@ -43,6 +25,11 @@ text-encoder-layers: 6 ...@@ -43,6 +25,11 @@ text-encoder-layers: 6
decoder-layers: 6 decoder-layers: 6
encoder-attention-heads: 4 encoder-attention-heads: 4
#load-pretrained-encoder-from:
#load-pretrained-acoustic-encoder-from:
#load-pretrained-text-encoder-from:
#load-pretrained-decoder-from:
#macaron-style: True #macaron-style: True
#use-cnn-module: True #use-cnn-module: True
#cnn-module-kernel: 31 #cnn-module-kernel: 31
...@@ -52,20 +39,20 @@ acoustic-encoder: transformer ...@@ -52,20 +39,20 @@ acoustic-encoder: transformer
adapter: league adapter: league
encoder-embed-dim: 256 encoder-embed-dim: 256
pyramid-stages: 4 pds-stages: 4
#pyramid-dropout: 0 #pds-dropout: 0
pyramid-layers: 3_3_3_3 pds-layers: 3_3_3_3
pyramid-ratios: 2_2_1_2 pds-ratios: 2_2_1_2
pyramid-fusion: True pds-fusion: True
pyramid-fusion-method: all_conv pds-fusion-method: all_conv
pyramid-embed-dims: 256_256_256_256 pds-embed-dims: 256_256_256_256
pyramid-ds-method: conv pds-ds-method: conv
pyramid-embed-norm: True pds-embed-norm: True
pyramid-position-embed: 1_1_1_1 pds-position-embed: 1_1_1_1
pyramid-kernel-sizes: 5_5_5_5 pds-kernel-sizes: 5_5_5_5
pyramid-ffn-ratios: 8_8_8_8 pds-ffn-ratios: 8_8_8_8
pyramid-attn-heads: 4_4_4_4 pds-attn-heads: 4_4_4_4
decoder-embed-dim: 256 decoder-embed-dim: 256
decoder-ffn-embed-dim: 2048 decoder-ffn-embed-dim: 2048
decoder-attention-heads: 4 decoder-attention-heads: 4
\ No newline at end of file
...@@ -3,13 +3,14 @@ ...@@ -3,13 +3,14 @@
gpu_num=1 gpu_num=1
data_dir= data_dir=
test_subset=(tst-COMMON) test_subset=(dev tst-COMMON)
exp_name= exp_name=
if [ "$#" -eq 1 ]; then if [ "$#" -eq 1 ]; then
exp_name=$1 exp_name=$1
fi fi
sacrebleu=1
n_average=10 n_average=10
beam_size=5 beam_size=5
len_penalty=1.0 len_penalty=1.0
...@@ -21,6 +22,7 @@ cmd="./run.sh ...@@ -21,6 +22,7 @@ cmd="./run.sh
--stop_stage 2 --stop_stage 2
--gpu_num ${gpu_num} --gpu_num ${gpu_num}
--exp_name ${exp_name} --exp_name ${exp_name}
--sacrebleu ${sacrebleu}
--n_average ${n_average} --n_average ${n_average}
--beam_size ${beam_size} --beam_size ${beam_size}
--len_penalty ${len_penalty} --len_penalty ${len_penalty}
...@@ -31,7 +33,7 @@ cmd="./run.sh ...@@ -31,7 +33,7 @@ cmd="./run.sh
if [[ -n ${data_dir} ]]; then if [[ -n ${data_dir} ]]; then
cmd="$cmd --data_dir ${data_dir}" cmd="$cmd --data_dir ${data_dir}"
fi fi
if [[ ${#test_subset[@]} -eq 0 ]]; then if [[ ${#test_subset[@]} -ne 0 ]]; then
subsets=$(echo ${test_subset[*]} | sed 's/ /,/g') subsets=$(echo ${test_subset[*]} | sed 's/ /,/g')
cmd="$cmd --test_subset ${subsets}" cmd="$cmd --test_subset ${subsets}"
fi fi
......
...@@ -24,7 +24,8 @@ stop_stage=0 ...@@ -24,7 +24,8 @@ stop_stage=0
gpu_num=8 gpu_num=8
update_freq=1 update_freq=1
root_dir=~/st/Fairseq-S2T root_dir=~/st
code_dir=${root_dir}/Fairseq-S2T
pwd_dir=$PWD pwd_dir=$PWD
# dataset # dataset
...@@ -41,15 +42,19 @@ share_dict=1 ...@@ -41,15 +42,19 @@ share_dict=1
speed_perturb=0 speed_perturb=0
lcrm=0 lcrm=0
tokenizer=0 tokenizer=0
use_raw_audio=0
use_specific_dict=0 use_specific_dict=0
specific_prefix=valid specific_prefix=valid
specific_dir=/home/xuchen/st/data/mustc/st_lcrm/en-de specific_dir=${root_dir}/data/mustc/st
asr_vocab_prefix=spm_unigram10000_st_share asr_vocab_prefix=spm_unigram10000_st_share
st_vocab_prefix=spm_unigram10000_st_share st_vocab_prefix=spm_unigram10000_st_share
org_data_dir=~/st/data/${dataset} org_data_dir=${root_dir}/data/${dataset}
data_dir=~/st/data/${dataset}/st data_dir=${root_dir}/data/${dataset}/st
train_split=train
valid_split=dev
test_split=tst-COMMON
test_subset=tst-COMMON test_subset=tst-COMMON
# exp # exp
...@@ -60,7 +65,7 @@ exp_tag=baseline ...@@ -60,7 +65,7 @@ exp_tag=baseline
exp_name= exp_name=
# config # config
train_config=ctc train_config=base,ctc
# training setting # training setting
fp16=1 fp16=1
...@@ -69,15 +74,16 @@ step_valid=0 ...@@ -69,15 +74,16 @@ step_valid=0
bleu_valid=0 bleu_valid=0
# decoding setting # decoding setting
sacrebleu=1
dec_model=checkpoint_best.pt dec_model=checkpoint_best.pt
n_average=10 n_average=10
beam_size=5 beam_size=5
len_penalty=1.0 len_penalty=1.0
if [[ ${share_dict} -eq 1 ]]; then if [[ ${share_dict} -eq 1 ]]; then
data_config=config_st_share.yaml data_config=config_share.yaml
else else
data_config=config_st.yaml data_config=config.yaml
fi fi
if [[ ${speed_perturb} -eq 1 ]]; then if [[ ${speed_perturb} -eq 1 ]]; then
data_dir=${data_dir}_sp data_dir=${data_dir}_sp
...@@ -95,18 +101,21 @@ if [[ ${tokenizer} -eq 1 ]]; then ...@@ -95,18 +101,21 @@ if [[ ${tokenizer} -eq 1 ]]; then
data_dir=${data_dir}_tok data_dir=${data_dir}_tok
exp_prefix=${exp_prefix}_tok exp_prefix=${exp_prefix}_tok
fi fi
if [[ ${use_raw_audio} -eq 1 ]]; then
data_dir=${data_dir}_raw
exp_prefix=${exp_prefix}_raw
fi
. ./local/parse_options.sh || exit 1; . ./local/parse_options.sh || exit 1;
if [[ -z ${exp_name} ]]; then if [[ -z ${exp_name} ]]; then
config_string=${train_config//,/_} config_string=${train_config//,/_}
# exp_name=${exp_prefix}_$(basename ${train_config%.*})_${exp_tag}
exp_name=${exp_prefix}_${config_string}_${exp_tag} exp_name=${exp_prefix}_${config_string}_${exp_tag}
if [[ -n ${extra_tag} ]]; then if [[ -n ${extra_tag} ]]; then
exp_name=${exp_name}_${extra_tag} exp_name=${exp_name}_${extra_tag}
fi fi
fi fi
model_dir=$root_dir/../checkpoints/$dataset/st/${exp_name} model_dir=$code_dir/../checkpoints/$dataset/st/${exp_name}
if [ ${stage} -le -1 ] && [ ${stop_stage} -ge -1 ]; then if [ ${stage} -le -1 ] && [ ${stop_stage} -ge -1 ]; then
echo "stage -1: Data Download" echo "stage -1: Data Download"
...@@ -117,37 +126,49 @@ if [ ${stage} -le 0 ] && [ ${stop_stage} -ge 0 ]; then ...@@ -117,37 +126,49 @@ if [ ${stage} -le 0 ] && [ ${stop_stage} -ge 0 ]; then
### Task dependent. You have to make data the following preparation part by yourself. ### Task dependent. You have to make data the following preparation part by yourself.
### But you can utilize Kaldi recipes in most cases ### But you can utilize Kaldi recipes in most cases
echo "stage 0: ASR Data Preparation" echo "stage 0: ASR Data Preparation"
if [[ ! -e ${data_dir}/${lang} ]]; then if [[ ! -e ${data_dir} ]]; then
mkdir -p ${data_dir}/${lang} mkdir -p ${data_dir}
fi
if [[ ! -f ${data_dir}/fbank80.zip && -f ${data_dir}/../fbank80.zip ]]; then
ln -s ${data_dir}/../fbank80.zip ${data_dir}
fi
if [[ ! -f ${data_dir}/fbank80_sp.zip && -f ${data_dir}/../fbank80_sp.zip ]]; then
ln -s ${data_dir}/../fbank80_sp.zip ${data_dir}
fi fi
cmd="python ${root_dir}/examples/speech_to_text/prep_mustc_data.py # create ASR vocabulary if necessary
cmd="python ${code_dir}/examples/speech_to_text/prep_audio_data.py
--data-root ${org_data_dir} --data-root ${org_data_dir}
--output-root ${data_dir} --output-root ${data_dir}/asr4st
--task asr --task asr
--raw
--src-lang ${src_lang}
--splits ${valid_split},${test_split},${train_split}
--vocab-type ${vocab_type} --vocab-type ${vocab_type}
--vocab-size ${asr_vocab_size}" --vocab-size ${asr_vocab_size}"
if [[ ${speed_perturb} -eq 1 ]]; then [[ $eval -eq 1 && ${share_dict} -ne 1 && ${use_specific_dict} -ne 1 ]] && (echo -e "\033[34mRun command: \n${cmd} \033[0m" && eval $cmd)
cmd="$cmd
--speed-perturb"
fi
echo -e "\033[34mRun command: \n${cmd} \033[0m"
[[ $eval -eq 1 && ${share_dict} -ne 1 && ${use_specific_dict} -ne 1 ]] && eval $cmd
asr_prefix=spm_${vocab_type}${asr_vocab_size}_asr asr_prefix=spm_${vocab_type}${asr_vocab_size}_asr
echo "stage 0: ST Data Preparation" echo "stage 0: ST Data Preparation"
cmd="python ${root_dir}/examples/speech_to_text/prep_mustc_data.py cmd="python ${code_dir}/examples/speech_to_text/prep_audio_data.py
--data-root ${org_data_dir} --data-root ${org_data_dir}
--output-root ${data_dir} --output-root ${data_dir}
--task st --task st
--add-src --add-src
--src-lang ${src_lang}
--tgt-lang ${tgt_lang}
--splits ${valid_split},${test_split},${train_split}
--cmvn-type utterance --cmvn-type utterance
--vocab-type ${vocab_type} --vocab-type ${vocab_type}
--vocab-size ${vocab_size}" --vocab-size ${vocab_size}"
if [[ ${use_raw_audio} -eq 1 ]]; then
cmd="$cmd
--raw"
fi
if [[ ${use_specific_dict} -eq 1 ]]; then if [[ ${use_specific_dict} -eq 1 ]]; then
cp -r ${specific_dir}/${asr_vocab_prefix}.* ${data_dir}/${lang} cp -r ${specific_dir}/${asr_vocab_prefix}.* ${data_dir}
cp -r ${specific_dir}/${st_vocab_prefix}.* ${data_dir}/${lang} cp -r ${specific_dir}/${st_vocab_prefix}.* ${data_dir}
if [[ $share_dict -eq 1 ]]; then if [[ $share_dict -eq 1 ]]; then
cmd="$cmd cmd="$cmd
--share --share
...@@ -182,9 +203,16 @@ if [ ${stage} -le 0 ] && [ ${stop_stage} -ge 0 ]; then ...@@ -182,9 +203,16 @@ if [ ${stage} -le 0 ] && [ ${stop_stage} -ge 0 ]; then
echo -e "\033[34mRun command: \n${cmd} \033[0m" echo -e "\033[34mRun command: \n${cmd} \033[0m"
[[ $eval -eq 1 ]] && eval ${cmd} [[ $eval -eq 1 ]] && eval ${cmd}
fi
data_dir=${data_dir}/${lang} if [[ ! -f ${data_dir}/../fbank80.zip ]]; then
mv ${data_dir}/fbank80.zip ${data_dir}/..
ln -s ${data_dir}/../fbank80.zip ${data_dir}
fi
if [[ ! -f ${data_dir}/../fbank80_sp.zip ]]; then
mv ${data_dir}/fbank80_sp.zip ${data_dir}/..
ln -s ${data_dir}/../fbank80_sp.zip ${data_dir}
fi
fi
if [ ${stage} -le 1 ] && [ ${stop_stage} -ge 1 ]; then if [ ${stage} -le 1 ] && [ ${stop_stage} -ge 1 ]; then
echo "stage 1: ST Network Training" echo "stage 1: ST Network Training"
...@@ -210,28 +238,26 @@ if [ ${stage} -le 1 ] && [ ${stop_stage} -ge 1 ]; then ...@@ -210,28 +238,26 @@ if [ ${stage} -le 1 ] && [ ${stop_stage} -ge 1 ]; then
cp ${BASH_SOURCE[0]} ${model_dir} cp ${BASH_SOURCE[0]} ${model_dir}
cp ${PWD}/train.sh ${model_dir} cp ${PWD}/train.sh ${model_dir}
extra_parameter="${extra_parameter}
--train-config ${pwd_dir}/conf/basis.yaml"
cp ${pwd_dir}/conf/basis.yaml ${model_dir}
config_list="${train_config//,/ }" config_list="${train_config//,/ }"
idx=0 idx=1
for config in ${config_list[@]} for config in ${config_list[@]}
do do
config_path=$pwd_dir/conf/${config}.yaml config_path=${pwd_dir}/conf/${config}.yaml
if [[ ! -f ${config_path} ]]; then if [[ ! -f ${config_path} ]]; then
echo "No config file ${config_path}" echo "No config file ${config_path}"
exit exit
fi fi
cp ${config_path} ${model_dir} cp ${config_path} ${model_dir}
if [[ idx -eq 0 ]]; then extra_parameter="${extra_parameter}
extra_parameter="${extra_parameter}
--train-config ${config_path}"
else
extra_parameter="${extra_parameter}
--train-config${idx} ${config_path}" --train-config${idx} ${config_path}"
fi
idx=$((idx + 1)) idx=$((idx + 1))
done done
cmd="python3 -u ${root_dir}/fairseq_cli/train.py cmd="python3 -u ${code_dir}/fairseq_cli/train.py
${data_dir} ${data_dir}
--config-yaml ${data_config} --config-yaml ${data_config}
--task ${task} --task ${task}
...@@ -324,12 +350,14 @@ if [ ${stage} -le 2 ] && [ ${stop_stage} -ge 2 ]; then ...@@ -324,12 +350,14 @@ if [ ${stage} -le 2 ] && [ ${stop_stage} -ge 2 ]; then
# Average models # Average models
dec_model=avg_${n_average}_checkpoint.pt dec_model=avg_${n_average}_checkpoint.pt
cmd="python ${root_dir}/scripts/average_checkpoints.py if [[ ! -f ${model_dir}/${dec_model} ]]; then
--inputs ${model_dir} cmd="python ${code_dir}/scripts/average_checkpoints.py
--num-epoch-checkpoints ${n_average} --inputs ${model_dir}
--output ${model_dir}/${dec_model}" --num-best-checkpoints ${n_average}
echo -e "\033[34mRun command: \n${cmd} \033[0m" --output ${model_dir}/${dec_model}"
[[ $eval -eq 1 ]] && eval $cmd echo -e "\033[34mRun command: \n${cmd} \033[0m"
[[ $eval -eq 1 ]] && eval $cmd
fi
else else
dec_model=${dec_model} dec_model=${dec_model}
fi fi
...@@ -348,9 +376,9 @@ if [ ${stage} -le 2 ] && [ ${stop_stage} -ge 2 ]; then ...@@ -348,9 +376,9 @@ if [ ${stage} -le 2 ] && [ ${stop_stage} -ge 2 ]; then
[[ -f ${result_file} ]] && rm ${result_file} [[ -f ${result_file} ]] && rm ${result_file}
test_subset=${test_subset//,/ } test_subset=${test_subset//,/ }
for subset in "${test_subset[@]}"; do for subset in ${test_subset[@]}; do
subset=${subset}_st subset=${subset}
cmd="python ${root_dir}/fairseq_cli/generate.py cmd="python ${code_dir}/fairseq_cli/generate.py
${data_dir} ${data_dir}
--config-yaml ${data_config} --config-yaml ${data_config}
--gen-subset ${subset} --gen-subset ${subset}
...@@ -359,8 +387,19 @@ if [ ${stage} -le 2 ] && [ ${stop_stage} -ge 2 ]; then ...@@ -359,8 +387,19 @@ if [ ${stage} -le 2 ] && [ ${stop_stage} -ge 2 ]; then
--results-path ${model_dir} --results-path ${model_dir}
--max-tokens ${max_tokens} --max-tokens ${max_tokens}
--beam ${beam_size} --beam ${beam_size}
--lenpen ${len_penalty} --lenpen ${len_penalty}"
if [[ ${sacrebleu} -eq 1 ]]; then
cmd="${cmd}
--scoring sacrebleu" --scoring sacrebleu"
if [[ ${tokenizer} -eq 1 ]]; then
cmd="${cmd}
--tokenizer moses
--moses-source-lang ${src_lang}
--moses-target-lang ${tgt_lang}"
fi
fi
echo -e "\033[34mRun command: \n${cmd} \033[0m" echo -e "\033[34mRun command: \n${cmd} \033[0m"
if [[ $eval -eq 1 ]]; then if [[ $eval -eq 1 ]]; then
......
set -e
eval=1
lcrm=0
tokenizer=0
root_dir=~/st/Fairseq-S2T
data_dir=~/st/data/test
vocab_dir=~/st/data/mustc/st/en-de
asr_vocab_prefix=spm_unigram10000_st_share
src_lang=en
tgt_lang=de
subsets=(2019)
cp -r ${vocab_dir}/${asr_vocab_prefix}.* ${data_dir}/${src_lang}-${tgt_lang}
rm -rf ${data_dir}/${src_lang}-${tgt_lang}/fbank80.zip
splits=$(echo ${subsets[*]} | sed 's/ /,/g')
cmd="python ${root_dir}/examples/speech_to_text/prep_st_data.py
--data-root ${data_dir}
--output-root ${data_dir}
--splits ${splits}
--task asr
--src-lang ${src_lang}
--tgt-lang ${tgt_lang}
--add-src
--share
--asr-prefix ${asr_vocab_prefix}
--cmvn-type utterance"
if [[ ${lcrm} -eq 1 ]]; then
cmd="$cmd
--lowercase-src
--rm-punc-src"
fi
if [[ ${tokenizer} -eq 1 ]]; then
cmd="$cmd
--tokenizer"
fi
echo -e "\033[34mRun command: \n${cmd} \033[0m"
[[ $eval -eq 1 ]] && eval ${cmd}
train-subset: train_asr
valid-subset: dev_asr
max-epoch: 100
max-update: 100000
num-workers: 8
patience: 10
no-progress-bar: True
log-interval: 100
seed: 1
report-accuracy: True
#load-pretrained-encoder-from:
#load-pretrained-decoder-from:
arch: s2t_transformer_s
share-decoder-input-output-embed: True
optimizer: adam
clip-norm: 10.0
lr-scheduler: inverse_sqrt
warmup-init-lr: 1e-7
warmup-updates: 10000
lr: 2e-3
#adam_betas: (0.9,0.98)
criterion: label_smoothed_cross_entropy_with_ctc
label_smoothing: 0.1
conv-kernel-sizes: 5,5
conv-channels: 1024
dropout: 0.1
activation-fn: relu
encoder-embed-dim: 256
encoder-ffn-embed-dim: 2048
encoder-layers: 12
decoder-layers: 6
encoder-attention-heads: 4
decoder-embed-dim: 256
decoder-ffn-embed-dim: 2048
decoder-attention-heads: 4
attention-dropout: 0.1
activation-dropout: 0.1
macaron-style: True
use-cnn-module: True
cnn-module-kernel: 31
use-enc-dlcl: True
use-dec-dlcl: True
encoder-attention-type: local
hard-mask-window: 0
gauss-mask-sigma: 3
init-mask-weight: 0
\ No newline at end of file
Markdown 格式
0%
您添加了 0 到此讨论。请谨慎行事。
请先完成此评论的编辑!
注册 或者 后发表评论