Commit a598692d by xuchen

update the shell scripts

parent 9fe8cd1e
...@@ -11,6 +11,9 @@ adam_betas: (0.9,0.98) ...@@ -11,6 +11,9 @@ adam_betas: (0.9,0.98)
criterion: label_smoothed_cross_entropy_with_ctc criterion: label_smoothed_cross_entropy_with_ctc
label_smoothing: 0.1 label_smoothing: 0.1
encoder-embed-norm: True
encoder-no-scale-embedding: True
subsampling-type: conv1d subsampling-type: conv1d
subsampling-layers: 2 subsampling-layers: 2
subsampling-filter: 1024 subsampling-filter: 1024
......
...@@ -17,4 +17,4 @@ no-progress-bar: True ...@@ -17,4 +17,4 @@ no-progress-bar: True
log-interval: 100 log-interval: 100
seed: 1 seed: 1
report-accuracy: True report-accuracy: True
skip-invalid-size-inputs-valid-test: True skip-invalid-size-inputs-valid-test: True
\ No newline at end of file
...@@ -5,12 +5,15 @@ clip-norm: 10.0 ...@@ -5,12 +5,15 @@ clip-norm: 10.0
lr-scheduler: inverse_sqrt lr-scheduler: inverse_sqrt
warmup-init-lr: 1e-7 warmup-init-lr: 1e-7
warmup-updates: 10000 warmup-updates: 10000
lr: 1e-3 lr: 2e-3
adam_betas: (0.9,0.98) adam_betas: (0.9,0.98)
criterion: label_smoothed_cross_entropy_with_ctc criterion: label_smoothed_cross_entropy_with_ctc
label_smoothing: 0.1 label_smoothing: 0.1
encoder-embed-norm: True
encoder-no-scale-embedding: True
subsampling-type: conv1d subsampling-type: conv1d
subsampling-layers: 2 subsampling-layers: 2
subsampling-filter: 2048 subsampling-filter: 2048
......
...@@ -2,4 +2,4 @@ macaron-style: True ...@@ -2,4 +2,4 @@ macaron-style: True
use-cnn-module: True use-cnn-module: True
cnn-module-kernel: 15 cnn-module-kernel: 15
encoder-attention-type: rel_pos encoder-attention-type: rel_pos
encoder-activation-fn: swish encoder-activation-fn: swish
\ No newline at end of file
ctc-weight: 0.2 ctc-weight: 0.3
interleaved-ctc-weight: 0.1 share-ctc-and-embed: True
share-target-ctc-and-embed: True
interleaved-ctc-weight: 0.2
interleaved-ctc-layers: 6,9 interleaved-ctc-layers: 6,9
interleaved-ctc-temperature: 1.0 sae-ctc-temperature: 1.0
interleaved-ctc-drop-prob: 0 interleaved-ctc-drop-prob: 0
sae-adapter: inter_league sae-adapter: inter_league
......
...@@ -27,7 +27,7 @@ adam_betas: (0.9,0.98) ...@@ -27,7 +27,7 @@ adam_betas: (0.9,0.98)
criterion: label_smoothed_cross_entropy_with_ctc criterion: label_smoothed_cross_entropy_with_ctc
label_smoothing: 0.1 label_smoothing: 0.1
dropout: 0.1 dropout: 0.15
activation-fn: relu activation-fn: relu
encoder-ffn-embed-dim: 2048 encoder-ffn-embed-dim: 2048
encoder-layers: 12 encoder-layers: 12
......
...@@ -10,6 +10,9 @@ adam_betas: (0.9,0.98) ...@@ -10,6 +10,9 @@ adam_betas: (0.9,0.98)
criterion: ctc criterion: ctc
zero_infinity: True zero_infinity: True
encoder-embed-norm: True
encoder-no-scale-embedding: True
subsampling-type: conv1d subsampling-type: conv1d
subsampling-layers: 2 subsampling-layers: 2
subsampling-filter: 1024 subsampling-filter: 1024
......
encoder-attention-type: rel_selfattn encoder-attention-type: rel_pos
#encoder-attention-type: relative #encoder-attention-type: relative
#max-encoder-relative-length: 100 #max-encoder-relative-length: 100
...@@ -3,7 +3,7 @@ ...@@ -3,7 +3,7 @@
gpu_num=1 gpu_num=1
data_dir= data_dir=
test_subset=(test) test_subset=(dev test)
exp_name= exp_name=
if [ "$#" -eq 1 ]; then if [ "$#" -eq 1 ]; then
...@@ -11,6 +11,7 @@ if [ "$#" -eq 1 ]; then ...@@ -11,6 +11,7 @@ if [ "$#" -eq 1 ]; then
fi fi
cer=1 cer=1
ctc_infer=0
n_average=10 n_average=10
beam_size=5 beam_size=5
len_penalty=1.0 len_penalty=1.0
...@@ -22,8 +23,9 @@ cmd="./run.sh ...@@ -22,8 +23,9 @@ cmd="./run.sh
--stop_stage 2 --stop_stage 2
--gpu_num ${gpu_num} --gpu_num ${gpu_num}
--exp_name ${exp_name} --exp_name ${exp_name}
--cer ${cer}
--n_average ${n_average} --n_average ${n_average}
--cer ${cer}
--ctc_infer ${ctc_infer}
--beam_size ${beam_size} --beam_size ${beam_size}
--len_penalty ${len_penalty} --len_penalty ${len_penalty}
--max_tokens ${max_tokens} --max_tokens ${max_tokens}
......
set -e
ref=$1
gen=$2
tokenizer=$3
lang=$4
lang_pair=en-${lang}
record=$(mktemp -t temp.record.XXXXXX)
if [[ ${tokenizer} -eq 1 ]]; then
echo "MultiBLEU" > ${record}
cmd="multi-bleu.perl ${ref} < ${gen}"
# echo $cmd
eval $cmd | head -n 1 >> ${record}
cmd="detokenizer.perl -l ${lang} --threads 32 < ${ref} > ${ref}.detok"
# echo $cmd
# echo
eval $cmd
cmd="detokenizer.perl -l ${lang} --threads 32 < ${gen} > ${gen}.detok"
# echo $cmd
# echo
eval $cmd
ref=${ref}.detok
gen=${gen}.detok
fi
echo "SacreBLEU" > ${record}
cmd="cat ${gen} | sacrebleu ${ref} -m bleu -w 4 -l ${lang_pair}"
#echo $cmd
eval $cmd >> ${record}
cat ${record}
rm ${record}
\ No newline at end of file
...@@ -14,7 +14,7 @@ get_devices(){ ...@@ -14,7 +14,7 @@ get_devices(){
do do
line=$((dev + 2)) line=$((dev + 2))
use=$(head -n $line ${record} | tail -1 | cut -d '|' -f3 | cut -d '/' -f1) use=$(head -n $line ${record} | tail -1 | cut -d '|' -f3 | cut -d '/' -f1)
if [[ $use -lt 100 ]]; then if [[ $use -lt 1000 ]]; then
device[$count]=$dev device[$count]=$dev
count=$((count + 1)) count=$((count + 1))
if [[ $count -eq $gpu_num ]]; then if [[ $count -eq $gpu_num ]]; then
......
#! /bin/bash #! /bin/bash
# Processing aishell ASR Datasets # Processing AIShell ASR Datasets
# Copyright 2021 Natural Language Processing Laboratory # Copyright 2021 Natural Language Processing Laboratory
# Xu Chen (xuchenneu@163.com) # Xu Chen (xuchenneu@163.com)
...@@ -72,6 +72,9 @@ step_valid=0 ...@@ -72,6 +72,9 @@ step_valid=0
# decoding setting # decoding setting
cer=1 cer=1
ctc_infer=0
ctc_self_ensemble=0
ctc_inter_logit=0
dec_model=checkpoint_best.pt dec_model=checkpoint_best.pt
n_average=10 n_average=10
beam_size=5 beam_size=5
...@@ -244,13 +247,9 @@ if [ ${stage} -le 1 ] && [ ${stop_stage} -ge 1 ]; then ...@@ -244,13 +247,9 @@ if [ ${stage} -le 1 ] && [ ${stop_stage} -ge 1 ]; then
if [[ $step_valid -eq 1 ]]; then if [[ $step_valid -eq 1 ]]; then
validate_interval=1 validate_interval=1
save_interval=1 save_interval=1
keep_last_epochs=10
no_epoch_checkpoints=0 no_epoch_checkpoints=0
save_interval_updates=500 save_interval_updates=500
keep_interval_updates=10 keep_interval_updates=10
else
validate_interval=1
keep_last_epochs=10
fi fi
if [[ -n $no_epoch_checkpoints && $no_epoch_checkpoints -eq 1 ]]; then if [[ -n $no_epoch_checkpoints && $no_epoch_checkpoints -eq 1 ]]; then
cmd="$cmd cmd="$cmd
...@@ -264,10 +263,6 @@ if [ ${stage} -le 1 ] && [ ${stop_stage} -ge 1 ]; then ...@@ -264,10 +263,6 @@ if [ ${stage} -le 1 ] && [ ${stop_stage} -ge 1 ]; then
cmd="${cmd} cmd="${cmd}
--save-interval $save_interval " --save-interval $save_interval "
fi fi
if [[ -n $keep_last_epochs ]]; then
cmd="${cmd}
--keep-last-epochs $keep_last_epochs "
fi
if [[ -n $save_interval_updates ]]; then if [[ -n $save_interval_updates ]]; then
cmd="${cmd} cmd="${cmd}
--save-interval-updates $save_interval_updates" --save-interval-updates $save_interval_updates"
...@@ -286,11 +281,12 @@ if [ ${stage} -le 1 ] && [ ${stop_stage} -ge 1 ]; then ...@@ -286,11 +281,12 @@ if [ ${stage} -le 1 ] && [ ${stop_stage} -ge 1 ]; then
mv tmp.log $log mv tmp.log $log
export CUDA_VISIBLE_DEVICES=${device} export CUDA_VISIBLE_DEVICES=${device}
cmd="nohup ${cmd} >> ${model_dir}/train.log 2>&1 &" log=${model_dir}/train.log
cmd="nohup ${cmd} >> ${log} 2>&1 &"
if [[ $eval -eq 1 ]]; then if [[ $eval -eq 1 ]]; then
eval $cmd eval $cmd
sleep 2s sleep 2s
tail -n "$(wc -l ${model_dir}/train.log | awk '{print $1+1}')" -f ${model_dir}/train.log tail -n "$(wc -l ${log} | awk '{print $1+1}')" -f ${log}
fi fi
fi fi
wait wait
...@@ -355,14 +351,56 @@ if [ ${stage} -le 2 ] && [ ${stop_stage} -ge 2 ]; then ...@@ -355,14 +351,56 @@ if [ ${stage} -le 2 ] && [ ${stop_stage} -ge 2 ]; then
cmd="${cmd} cmd="${cmd}
--wer-char-level" --wer-char-level"
fi fi
if [[ ${ctc_infer} -eq 1 ]]; then
cmd="${cmd}
--ctc-infer"
fi
if [[ ${ctc_self_ensemble} -eq 1 ]]; then
cmd="${cmd}
--ctc-self-ensemble"
fi
if [[ ${ctc_inter_logit} -eq 1 ]]; then
cmd="${cmd}
--ctc-inter-logit"
fi
echo -e "\033[34mRun command: \n${cmd} \033[0m" echo -e "\033[34mRun command: \n${cmd} \033[0m"
if [[ $eval -eq 1 ]]; then if [[ $eval -eq 1 ]]; then
ctc_file=translation-${subset}.txt.ctc
if [[ -f ${model_dir}/${ctc_file} ]]; then
rm ${model_dir}/${ctc_file}
fi
eval $cmd eval $cmd
echo "" >> ${result_file}
tail -n 1 ${model_dir}/generate-${subset}.txt >> ${result_file} tail -n 1 ${model_dir}/generate-${subset}.txt >> ${result_file}
mv ${model_dir}/generate-${subset}.txt ${model_dir}/generate-${subset}-${suffix}.txt mv ${model_dir}/generate-${subset}.txt ${model_dir}/generate-${subset}-${suffix}.txt
mv ${model_dir}/translation-${subset}.txt ${model_dir}/translation-${subset}-${suffix}.txt mv ${model_dir}/translation-${subset}.txt ${model_dir}/translation-${subset}-${suffix}.txt
trans_file=translation-${subset}-${suffix}.txt
if [[ ${ctc_infer} -eq 1 && -f ${model_dir}/${ctc_file} ]]; then
ref_file=${data_dir}/${subset}.${src_lang}
if [[ -f ${ref_file} ]]; then
src_ctc=$(mktemp -t temp.record.XXXXXX)
cd ./local
sh ./cal_wer.sh ${model_dir} ${subset} ${trans_file} ${ctc_file} ${data_dir}/${subset}.${src_lang} > ${src_ctc}
cd ..
echo "CTC WER" >> ${result_file}
tail -n 1 ${src_ctc} >> ${result_file}
src_bleu=$(mktemp -t temp.record.XXXXXX)
cd local
sh ./cal_ctc_bleu.sh ${model_dir} ${subset} ${trans_file} ${ctc_file} ${data_dir}/${subset}.${src_lang} ${tokenizer} ${src_lang} > ${src_bleu}
cd ..
cat ${src_bleu} >> ${result_file}
rm ${src_ctc} ${src_bleu}
else
echo "No reference for source language."
fi
fi
fi fi
done done
cat ${result_file} cat ${result_file}
......
...@@ -13,12 +13,18 @@ extra_parameter= ...@@ -13,12 +13,18 @@ extra_parameter=
exp_tag= exp_tag=
# CTC
config_list=(purectc)
# Transformer
config_list=(base ctc) config_list=(base ctc)
#config_list=(base ctc conformer)
#config_list=(big ctc conformer)
#config_list=(pds_base_16) # Conformer
#config_list=(pds_base_16 conformer) #config_list=(base conformer ctc)
# PDS
config_list=(purectc_pds_base_8)
config_list=(pds_base_8)
# exp full name # exp full name
exp_name= exp_name=
......
...@@ -2,7 +2,7 @@ ...@@ -2,7 +2,7 @@
#ctc-weight: 0.2 #ctc-weight: 0.2
interleaved-ctc-weight: 0.3 interleaved-ctc-weight: 0.3
interleaved-ctc-layers: 2,4 interleaved-ctc-layers: 2,4
interleaved-ctc-temperature: 1.0 sae-ctc-temperature: 1.0
interleaved-ctc-drop-prob: 0 interleaved-ctc-drop-prob: 0
interleaved_ctc_upsampling_ratio: 2 interleaved_ctc_upsampling_ratio: 2
......
arch: s2t_transformer_s arch: s2t_sate
share-decoder-input-output-embed: True share-decoder-input-output-embed: True
optimizer: adam optimizer: adam
clip-norm: 10.0 clip-norm: 10.0
...@@ -22,14 +22,14 @@ subsampling-activation: glu ...@@ -22,14 +22,14 @@ subsampling-activation: glu
dropout: 0.1 dropout: 0.1
activation-fn: relu activation-fn: relu
encoder-embed-dim: 256 encoder-embed-dim: 64
encoder-ffn-embed-dim: 2048 encoder-ffn-embed-dim: 64
encoder-layers: 12 encoder-layers: 12
decoder-layers: 6 decoder-layers: 6
encoder-attention-heads: 4 encoder-attention-heads: 4
decoder-embed-dim: 256 decoder-embed-dim: 64
decoder-ffn-embed-dim: 2048 decoder-ffn-embed-dim: 64
decoder-attention-heads: 4 decoder-attention-heads: 4
attention-dropout: 0.1 attention-dropout: 0.1
activation-dropout: 0.1 activation-dropout: 0.1
...@@ -37,16 +37,23 @@ activation-dropout: 0.1 ...@@ -37,16 +37,23 @@ activation-dropout: 0.1
ctc-weight: 0.2 ctc-weight: 0.2
interleaved-ctc-weight: 0.1 interleaved-ctc-weight: 0.1
interleaved-ctc-layers: 6,9 interleaved-ctc-layers: 6,9
interleaved-ctc-temperature: 1.0 sae-ctc-temperature: 1.0
interleaved-ctc-drop-prob: 0 interleaved-ctc-drop-prob: 0
#target-ctc-weight: 0.2
#target-ctc-laer: 6
#target-interleaved-ctc-weight: 0.1
#target-interleaved-ctc-layers: 2,4
sae-adapter: league sae-adapter: league
sae-drop-prob: 0.0 sae-drop-prob: 0.0
sae-distribution-cutoff: 10 sae-distribution-cutoff: 10
share-ctc-and-sae: False share-ctc-and-sae: False
ctc-self-distill-weight: 0 ctc-self-distill-weight: 1
#target-ctc-self-distill-weight: 1
ctc-self-distill-prob: 1.0
ctc-self-distill-temperature: 1
sae-ground-truth-ratio: 0.3
#load-pretrained-encoder-from: #load-pretrained-encoder-from:
#load-pretrained-decoder-from: #load-pretrained-decoder-from:
\ No newline at end of file
...@@ -11,6 +11,9 @@ adam_betas: (0.9,0.98) ...@@ -11,6 +11,9 @@ adam_betas: (0.9,0.98)
criterion: label_smoothed_cross_entropy_with_ctc criterion: label_smoothed_cross_entropy_with_ctc
label_smoothing: 0.1 label_smoothing: 0.1
encoder-embed-norm: True
encoder-no-scale-embedding: True
subsampling-type: conv1d subsampling-type: conv1d
subsampling-layers: 2 subsampling-layers: 2
subsampling-filter: 1024 subsampling-filter: 1024
......
...@@ -11,6 +11,9 @@ adam_betas: (0.9,0.98) ...@@ -11,6 +11,9 @@ adam_betas: (0.9,0.98)
criterion: label_smoothed_cross_entropy_with_ctc criterion: label_smoothed_cross_entropy_with_ctc
label_smoothing: 0.1 label_smoothing: 0.1
encoder-embed-norm: True
encoder-no-scale-embedding: True
subsampling-type: conv1d subsampling-type: conv1d
subsampling-layers: 2 subsampling-layers: 2
subsampling-filter: 2048 subsampling-filter: 2048
......
macaron-style: True macaron-style: True
use-cnn-module: True use-cnn-module: True
cnn-module-kernel: 31 cnn-module-kernel: 15
encoder-attention-type: rel_pos encoder-attention-type: rel_pos
encoder-activation-fn: swish encoder-activation-fn: swish
ctc-weight: 0.2 ctc-weight: 0.3
interleaved-ctc-weight: 0.1 share-ctc-and-embed: True
share-target-ctc-and-embed: True
interleaved-ctc-weight: 0.2
interleaved-ctc-layers: 6,9 interleaved-ctc-layers: 6,9
interleaved-ctc-temperature: 1.0
interleaved-ctc-drop-prob: 0 interleaved-ctc-drop-prob: 0
sae-adapter: league
sae-drop-prob: 0.2
sae-distribution-cutoff: 10
share-ctc-and-sae: False
ctc-self-distill-weight: 0 sae-adapter: inter_league
#sae-adapter: none
#target-sae-adapter: none
sae-ctc-temperature: 1
#sae-gumbel: True
#sae-distribution-hard: True
#sae-drop-prob: 0.0
#sae-distribution-cutoff: 10
#share-sae-and-ctc: True
#share-target-sae-and-ctc: True
#sae-embed-norm: True
#sae-out-norm: True
#ctc-self-distill-weight: 1
#target-ctc-self-distill-weight: 1
#ctc-self-distill-prob: 0.1
#cal-all-ctc: True
inter_mixup: True inter_mixup: True
inter_mixup_layer: -1 inter_mixup_layer: -1
inter_mixup_prob: 1.0 inter_mixup_prob: 1.0
inter_mixup_ratio: 0.2 inter_mixup_ratio: 0.2
\ No newline at end of file inter_mixup_beta: 0.2
...@@ -10,6 +10,9 @@ adam_betas: (0.9,0.98) ...@@ -10,6 +10,9 @@ adam_betas: (0.9,0.98)
criterion: ctc criterion: ctc
zero_infinity: True zero_infinity: True
encoder-embed-norm: True
encoder-no-scale-embedding: True
subsampling-type: conv1d subsampling-type: conv1d
subsampling-layers: 2 subsampling-layers: 2
subsampling-filter: 1024 subsampling-filter: 1024
......
...@@ -11,6 +11,7 @@ if [ "$#" -eq 1 ]; then ...@@ -11,6 +11,7 @@ if [ "$#" -eq 1 ]; then
fi fi
cer=0 cer=0
ctc_infer=0
n_average=10 n_average=10
beam_size=5 beam_size=5
len_penalty=1.0 len_penalty=1.0
...@@ -24,6 +25,7 @@ cmd="./run.sh ...@@ -24,6 +25,7 @@ cmd="./run.sh
--exp_name ${exp_name} --exp_name ${exp_name}
--n_average ${n_average} --n_average ${n_average}
--cer ${cer} --cer ${cer}
--ctc_infer ${ctc_infer}
--beam_size ${beam_size} --beam_size ${beam_size}
--len_penalty ${len_penalty} --len_penalty ${len_penalty}
--max_tokens ${max_tokens} --max_tokens ${max_tokens}
......
set -e
ref=$1
gen=$2
tokenizer=$3
lang=$4
lang_pair=en-${lang}
record=$(mktemp -t temp.record.XXXXXX)
if [[ ${tokenizer} -eq 1 ]]; then
echo "MultiBLEU" > ${record}
cmd="multi-bleu.perl ${ref} < ${gen}"
# echo $cmd
eval $cmd | head -n 1 >> ${record}
cmd="detokenizer.perl -l ${lang} --threads 32 < ${ref} > ${ref}.detok"
# echo $cmd
# echo
eval $cmd
cmd="detokenizer.perl -l ${lang} --threads 32 < ${gen} > ${gen}.detok"
# echo $cmd
# echo
eval $cmd
ref=${ref}.detok
gen=${gen}.detok
fi
echo "SacreBLEU" > ${record}
cmd="cat ${gen} | sacrebleu ${ref} -m bleu -w 4 -l ${lang_pair}"
#echo $cmd
eval $cmd >> ${record}
cat ${record}
rm ${record}
\ No newline at end of file
set -e
infer_dir=$1
tag=$2
s2s_infer_file=${infer_dir}/$3
org_ctc_infer_file=${infer_dir}/$4
ref=$5
tokenizer=$6
lang=$7
idx=${infer_dir}/${tag}_idx
ctc_infer=${infer_dir}/${tag}_ctc_infer
ctc_infer_sort=${infer_dir}/${tag}_ctc_infer_sort
if [[ ! -f ${ctc_infer_sort} ]]; then
cut -f1 ${s2s_infer_file} > ${idx}
paste ${idx} ${org_ctc_infer_file} > ${ctc_infer}
sort -n -t $'\t' ${ctc_infer} | cut -f2 > ${ctc_infer_sort}
fi
gen=${ctc_infer_sort}
./cal_bleu.sh ${ref} ${gen} ${tokenizer} ${lang}
\ No newline at end of file
import unicodedata
import jiwer
import jiwer.transforms as tr
import sys
ref_file = sys.argv[1]
hyp_file = sys.argv[2]
wer_standardize = tr.Compose(
[
tr.SubstituteRegexes({r"<<unk>>": r"@"}),
tr.ToLowerCase(),
tr.RemovePunctuation(),
tr.ExpandCommonEnglishContractions(),
tr.RemoveKaldiNonWords(),
tr.RemoveWhiteSpace(replace_by_space=True),
tr.ReduceToListOfListOfWords(),
]
)
cer_standardize = tr.Compose(
[
tr.SubstituteRegexes({r"<<unk>>": r"@"}),
tr.ToLowerCase(),
tr.RemovePunctuation(),
tr.Strip(),
tr.ReduceToListOfListOfChars(),
]
)
ref_lines = open(ref_file, "r").readlines()
hyp_lines = open(hyp_file, "r").readlines()
wer = jiwer.wer(ref_lines, hyp_lines,
truth_transform=wer_standardize,
hypothesis_transform=wer_standardize,
)
cer = jiwer.cer(ref_lines, hyp_lines,
truth_transform=cer_standardize,
hypothesis_transform=cer_standardize,
)
print("WER: %.4f" % wer)
print("CER: %.4f" % cer)
set -e
infer_dir=$1
tag=$2
s2s_infer_file=${infer_dir}/$3
org_ctc_infer_file=${infer_dir}/$4
ref=$5
idx=${infer_dir}/${tag}_idx
ctc_infer=${infer_dir}/${tag}_ctc_infer
ctc_infer_sort=${infer_dir}/${tag}_ctc_infer_sort
cut -f1 ${s2s_infer_file} > ${idx}
paste ${idx} ${org_ctc_infer_file} > ${ctc_infer}
sort -n -t $'\t' ${ctc_infer} | cut -f2 > ${ctc_infer_sort}
python3 ./cal_wer.py ${ref} ${ctc_infer_sort}
\ No newline at end of file
...@@ -14,7 +14,7 @@ get_devices(){ ...@@ -14,7 +14,7 @@ get_devices(){
do do
line=$((dev + 2)) line=$((dev + 2))
use=$(head -n $line ${record} | tail -1 | cut -d '|' -f3 | cut -d '/' -f1) use=$(head -n $line ${record} | tail -1 | cut -d '|' -f3 | cut -d '/' -f1)
if [[ $use -lt 100 ]]; then if [[ $use -lt 1000 ]]; then
device[$count]=$dev device[$count]=$dev
count=$((count + 1)) count=$((count + 1))
if [[ $count -eq $gpu_num ]]; then if [[ $count -eq $gpu_num ]]; then
......
...@@ -39,12 +39,12 @@ vocab_type=unigram ...@@ -39,12 +39,12 @@ vocab_type=unigram
vocab_size=5000 vocab_size=5000
speed_perturb=0 speed_perturb=0
lcrm=0 lcrm=0
tokenizer=0 tokenizer=1
use_raw_audio=0 use_raw_audio=0
use_specific_dict=1 use_specific_dict=1
specific_prefix=st specific_prefix=st
specific_dir=${root_dir}/data/mustc/st specific_dir=${root_dir}/data/mustc/st_tok
asr_vocab_prefix=spm_unigram10000_st_share asr_vocab_prefix=spm_unigram10000_st_share
org_data_dir=${root_dir}/data/${dataset} org_data_dir=${root_dir}/data/${dataset}
...@@ -72,6 +72,9 @@ step_valid=0 ...@@ -72,6 +72,9 @@ step_valid=0
# decoding setting # decoding setting
cer=0 cer=0
ctc_infer=0
ctc_self_ensemble=0
ctc_inter_logit=0
dec_model=checkpoint_best.pt dec_model=checkpoint_best.pt
n_average=10 n_average=10
beam_size=5 beam_size=5
...@@ -97,6 +100,10 @@ if [[ ${use_raw_audio} -eq 1 ]]; then ...@@ -97,6 +100,10 @@ if [[ ${use_raw_audio} -eq 1 ]]; then
data_dir=${data_dir}_raw data_dir=${data_dir}_raw
exp_prefix=${exp_prefix}_raw exp_prefix=${exp_prefix}_raw
fi fi
if [[ "${vocab_type}" == "char" ]]; then
data_dir=${data_dir}_char
exp_prefix=${exp_prefix}_char
fi
. ./local/parse_options.sh || exit 1; . ./local/parse_options.sh || exit 1;
...@@ -347,14 +354,48 @@ if [ ${stage} -le 2 ] && [ ${stop_stage} -ge 2 ]; then ...@@ -347,14 +354,48 @@ if [ ${stage} -le 2 ] && [ ${stop_stage} -ge 2 ]; then
cmd="${cmd} cmd="${cmd}
--wer-char-level" --wer-char-level"
fi fi
if [[ ${ctc_infer} -eq 1 ]]; then
cmd="${cmd}
--ctc-infer"
fi
echo -e "\033[34mRun command: \n${cmd} \033[0m" echo -e "\033[34mRun command: \n${cmd} \033[0m"
if [[ $eval -eq 1 ]]; then if [[ $eval -eq 1 ]]; then
ctc_file=translation-${subset}.txt.ctc
if [[ -f ${model_dir}/${ctc_file} ]]; then
rm ${model_dir}/${ctc_file}
fi
eval $cmd eval $cmd
echo "" >> ${result_file}
tail -n 1 ${model_dir}/generate-${subset}.txt >> ${result_file} tail -n 1 ${model_dir}/generate-${subset}.txt >> ${result_file}
mv ${model_dir}/generate-${subset}.txt ${model_dir}/generate-${subset}-${suffix}.txt mv ${model_dir}/generate-${subset}.txt ${model_dir}/generate-${subset}-${suffix}.txt
mv ${model_dir}/translation-${subset}.txt ${model_dir}/translation-${subset}-${suffix}.txt mv ${model_dir}/translation-${subset}.txt ${model_dir}/translation-${subset}-${suffix}.txt
trans_file=translation-${subset}-${suffix}.txt
if [[ ${ctc_infer} -eq 1 && -f ${model_dir}/${ctc_file} ]]; then
ref_file=${data_dir}/${subset}.${src_lang}
if [[ -f ${ref_file} ]]; then
src_ctc=$(mktemp -t temp.record.XXXXXX)
cd ./local
sh ./cal_wer.sh ${model_dir} ${subset} ${trans_file} ${ctc_file} ${data_dir}/${subset}.${src_lang} > ${src_ctc}
cd ..
echo "CTC WER" >> ${result_file}
tail -n 1 ${src_ctc} >> ${result_file}
src_bleu=$(mktemp -t temp.record.XXXXXX)
cd local
sh ./cal_ctc_bleu.sh ${model_dir} ${subset} ${trans_file} ${ctc_file} ${data_dir}/${subset}.${src_lang} ${tokenizer} ${src_lang} > ${src_bleu}
cd ..
cat ${src_bleu} >> ${result_file}
rm ${src_ctc} ${src_bleu}
else
echo "No reference for source language."
fi
fi
fi fi
done done
cat ${result_file} cat ${result_file}
......
...@@ -13,12 +13,18 @@ extra_parameter= ...@@ -13,12 +13,18 @@ extra_parameter=
exp_tag= exp_tag=
config_list=(base ctc) # CTC
config_list=(purectc) config_list=(purectc)
#config_list=(base conformer)
config_list=(pds_base_8) # Transformer
config_list=(base ctc)
# Conformer
#config_list=(base conformer ctc)
# PDS
config_list=(purectc_pds_base_8) config_list=(purectc_pds_base_8)
config_list=(pds_base_8)
# exp full name # exp full name
exp_name= exp_name=
......
...@@ -11,10 +11,6 @@ adam_betas: (0.9,0.997) ...@@ -11,10 +11,6 @@ adam_betas: (0.9,0.997)
criterion: label_smoothed_cross_entropy_with_ctc criterion: label_smoothed_cross_entropy_with_ctc
label_smoothing: 0.1 label_smoothing: 0.1
ctc-weight: 0.2
intermedia-ctc-weight: 0.1
intermedia-ctc-layers: 2,4
dropout: 0.1 dropout: 0.1
attention-dropout: 0.1 attention-dropout: 0.1
activation-dropout: 0.1 activation-dropout: 0.1
...@@ -22,15 +18,15 @@ activation-dropout: 0.1 ...@@ -22,15 +18,15 @@ activation-dropout: 0.1
activation-fn: relu activation-fn: relu
encoder-normalize-before: True encoder-normalize-before: True
decoder-normalize-before: True decoder-normalize-before: True
encoder-embed-dim: 512 encoder-embed-dim: 64
encoder-ffn-embed-dim: 2048 encoder-ffn-embed-dim: 64
encoder-layers: 6 encoder-layers: 6
decoder-layers: 6 decoder-layers: 6
encoder-attention-heads: 8 encoder-attention-heads: 4
decoder-embed-dim: 512 decoder-embed-dim: 64
decoder-ffn-embed-dim: 2048 decoder-ffn-embed-dim: 64
decoder-attention-heads: 8 decoder-attention-heads: 4
#load-pretrained-encoder-from: #load-pretrained-encoder-from:
#load-pretrained-decoder-from: #load-pretrained-decoder-from:
...@@ -39,13 +35,14 @@ decoder-attention-heads: 8 ...@@ -39,13 +35,14 @@ decoder-attention-heads: 8
#ctc-weight: 0.2 #ctc-weight: 0.2
interleaved-ctc-weight: 0.3 interleaved-ctc-weight: 0.3
interleaved-ctc-layers: 6,9 interleaved-ctc-layers: 6,9
interleaved-ctc-temperature: 1.0 sae-ctc-temperature: 1.0
interleaved-ctc-drop-prob: 0 interleaved-ctc-drop-prob: 0
interleaved_ctc_upsampling_ratio: 3 interleaved-ctc-upsampling-ratio: 3
sae-adapter: league sae-adapter: league
sae-drop-prob: 0.0 sae-drop-prob: 0.0
#sae-distribution-cutoff: 10 #sae-distribution-cutoff: 10
share-ctc-and-sae: True share-ctc-and-sae: True
sae-ground-truth-ratio: 0.3
ctc-self-distill-weight: 0 ctc-self-distill-weight: 0
\ No newline at end of file
#ctc-layer: ctc-layer: 6
#ctc-weight: 0.2 ctc-weight: 0.3
interleaved-ctc-weight: 0.3 interleaved-ctc-weight: 0.3
interleaved-ctc-layers: 8 interleaved-ctc-layers: 4
interleaved-ctc-temperature: 1 interleaved-ctc-temperature: 1
interleaved-ctc-drop-prob: 0 interleaved-ctc-drop-prob: 0
interleaved_ctc_upsampling_ratio: 2 interleaved-ctc-upsampling-ratio: 2
sae-adapter: inter_league sae-adapter: inter_league
sae-drop-prob: 0.0 sae-drop-prob: 0.0
#sae-distribution-cutoff: 10 sae-distribution-cutoff: 10
#share-ctc-and-sae: True #share-ctc-and-sae: True
ctc-self-distill-weight: 0 #sae-ground-truth-ratio: 0.3
#ctc-self-distill-weight: 1
...@@ -41,11 +41,11 @@ src_vocab_size=10000 ...@@ -41,11 +41,11 @@ src_vocab_size=10000
tgt_vocab_size=10000 tgt_vocab_size=10000
share_dict=1 share_dict=1
lcrm=0 lcrm=0
tokenizer=0 tokenizer=1
use_specific_dict=1 use_specific_dict=1
specific_prefix=st specific_prefix=st
specific_dir=${root_dir}/data/${dataset}/st specific_dir=${root_dir}/data/${dataset}/st_tok
src_vocab_prefix=spm_unigram10000_st_share src_vocab_prefix=spm_unigram10000_st_share
tgt_vocab_prefix=spm_unigram10000_st_share tgt_vocab_prefix=spm_unigram10000_st_share
...@@ -111,9 +111,6 @@ if [[ ${lcrm} -eq 1 ]]; then ...@@ -111,9 +111,6 @@ if [[ ${lcrm} -eq 1 ]]; then
exp_prefix=${exp_prefix}_lcrm exp_prefix=${exp_prefix}_lcrm
fi fi
if [[ ${tokenizer} -eq 1 ]]; then if [[ ${tokenizer} -eq 1 ]]; then
train_subset=${train_subset}.tok
valid_subset=${valid_subset}.tok
trans_subset=${trans_subset}.tok
data_dir=${data_dir}_tok data_dir=${data_dir}_tok
exp_prefix=${exp_prefix}_tok exp_prefix=${exp_prefix}_tok
fi fi
...@@ -174,6 +171,10 @@ if [ ${stage} -le 0 ] && [ ${stop_stage} -ge 0 ]; then ...@@ -174,6 +171,10 @@ if [ ${stage} -le 0 ] && [ ${stop_stage} -ge 0 ]; then
cmd="$cmd cmd="$cmd
--share" --share"
fi fi
if [[ $tokenizer -eq 1 ]]; then
cmd="$cmd
--tokenizer"
fi
if [[ ${lcrm} -eq 1 ]]; then if [[ ${lcrm} -eq 1 ]]; then
cmd="$cmd cmd="$cmd
--lowercase-src --lowercase-src
...@@ -379,8 +380,8 @@ if [ ${stage} -le 2 ] && [ ${stop_stage} -ge 2 ]; then ...@@ -379,8 +380,8 @@ if [ ${stage} -le 2 ] && [ ${stop_stage} -ge 2 ]; then
if [[ ${tokenizer} -eq 1 ]]; then if [[ ${tokenizer} -eq 1 ]]; then
cmd="${cmd} cmd="${cmd}
--tokenizer moses --tokenizer moses
--moses-source-lang ${src_lang} --source-lang ${src_lang}
--moses-target-lang ${tgt_lang}" --target-lang ${tgt_lang}"
fi fi
fi fi
......
...@@ -6,17 +6,18 @@ gpu_num=1 ...@@ -6,17 +6,18 @@ gpu_num=1
update_freq=1 update_freq=1
max_tokens=8192 max_tokens=8192
extra_tag=
extra_parameter=
#extra_tag="${extra_tag}"
#extra_parameter="${extra_parameter} "
exp_tag=baseline exp_tag=baseline
config_list=(small) config_list=(small)
# exp full name # exp full name
exp_name= exp_name=
extra_tag=
extra_parameter=
#extra_tag="${extra_tag}"
#extra_parameter="${extra_parameter} "
train_config=$(echo ${config_list[*]} | sed 's/ /,/g') train_config=$(echo ${config_list[*]} | sed 's/ /,/g')
cmd="./run.sh cmd="./run.sh
......
...@@ -11,6 +11,9 @@ adam_betas: (0.9,0.98) ...@@ -11,6 +11,9 @@ adam_betas: (0.9,0.98)
criterion: label_smoothed_cross_entropy_with_ctc criterion: label_smoothed_cross_entropy_with_ctc
label_smoothing: 0.1 label_smoothing: 0.1
encoder-embed-norm: True
encoder-no-scale-embedding: True
subsampling-type: conv1d subsampling-type: conv1d
subsampling-layers: 2 subsampling-layers: 2
subsampling-filter: 1024 subsampling-filter: 1024
...@@ -19,9 +22,6 @@ subsampling-stride: 2 ...@@ -19,9 +22,6 @@ subsampling-stride: 2
subsampling-norm: none subsampling-norm: none
subsampling-activation: glu subsampling-activation: glu
encoder-embed-norm: True
encoder-no-scale-embedding: True
dropout: 0.1 dropout: 0.1
activation-fn: relu activation-fn: relu
encoder-embed-dim: 256 encoder-embed-dim: 256
...@@ -37,4 +37,4 @@ attention-dropout: 0.1 ...@@ -37,4 +37,4 @@ attention-dropout: 0.1
activation-dropout: 0.1 activation-dropout: 0.1
#load-pretrained-encoder-from: #load-pretrained-encoder-from:
#load-pretrained-decoder-from: #load-pretrained-decoder-from:
\ No newline at end of file
...@@ -18,4 +18,4 @@ no-progress-bar: True ...@@ -18,4 +18,4 @@ no-progress-bar: True
log-interval: 100 log-interval: 100
seed: 1 seed: 1
report-accuracy: True report-accuracy: True
skip-invalid-size-inputs-valid-test: True skip-invalid-size-inputs-valid-test: True
\ No newline at end of file
macaron-style: True macaron-style: True
use-cnn-module: True use-cnn-module: True
cnn-module-kernel: 31 cnn-module-kernel: 15
encoder-attention-type: rel_pos encoder-attention-type: rel_pos
encoder-activation-fn: swish encoder-activation-fn: swish
\ No newline at end of file
ctc-weight: 0.3 ctc-weight: 0.3
share-ctc-and-embed: True share-ctc-and-embed: True
share-target-ctc-and-embed: True
interleaved-ctc-weight: 0.2 interleaved-ctc-weight: 0.2
interleaved-ctc-layers: 6,9 interleaved-ctc-layers: 6,9
interleaved-ctc-temperature: 1.0
interleaved-ctc-drop-prob: 0 interleaved-ctc-drop-prob: 0
#target-ctc-weight: 0.3 target-ctc-weight: 0.3
#target-ctc-layer: 6 target-ctc-layer: 6
#target-interleaved-ctc-weight: 0.1 target-interleaved-ctc-weight: 0.2
#target-interleaved-ctc-layers: 2,4 target-interleaved-ctc-layers: 4
#sae-ground-truth-ratio: 0.1
sae-adapter: inter_league sae-adapter: inter_league
sae-drop-prob: 0.0 #sae-adapter: none
#sae-distribution-cutoff: 0 #target-sae-adapter: none
#share-ctc-and-sae: True sae-ctc-temperature: 1
#share-target-ctc-and-sae: True #sae-gumbel: True
#sae-distribution-hard: True
#sae-drop-prob: 0.0
#sae-distribution-cutoff: 10
#share-sae-and-ctc: True
#share-target-sae-and-ctc: True
#sae-embed-norm: True
#sae-out-norm: True
#ctc-self-distill-weight: 1
#target-ctc-self-distill-weight: 1
#ctc-self-distill-prob: 0.1
#cal-all-ctc: True
ctc-self-distill-weight: 0 use-aligned-text: True
\ No newline at end of file aligned-target-ctc: True
...@@ -14,6 +14,9 @@ label_smoothing: 0.1 ...@@ -14,6 +14,9 @@ label_smoothing: 0.1
encoder-normalize-before: True encoder-normalize-before: True
decoder-normalize-before: True decoder-normalize-before: True
encoder-embed-norm: True
encoder-no-scale-embedding: True
subsampling-type: conv1d subsampling-type: conv1d
subsampling-layers: 2 subsampling-layers: 2
subsampling-filter: 1024 subsampling-filter: 1024
...@@ -22,9 +25,6 @@ subsampling-stride: 2 ...@@ -22,9 +25,6 @@ subsampling-stride: 2
subsampling-norm: none subsampling-norm: none
subsampling-activation: glu subsampling-activation: glu
encoder-embed-norm: True
encoder-no-scale-embedding: True
dropout: 0.1 dropout: 0.1
activation-fn: relu activation-fn: relu
encoder-embed-dim: 256 encoder-embed-dim: 256
...@@ -39,14 +39,14 @@ decoder-ffn-embed-dim: 2048 ...@@ -39,14 +39,14 @@ decoder-ffn-embed-dim: 2048
decoder-attention-heads: 4 decoder-attention-heads: 4
acoustic-encoder: transformer acoustic-encoder: transformer
adapter: league adapter: inter_league
#adapter: none
#adapter-embed-norm: True #adapter-embed-norm: True
#adapter-out-norm: True #adapter-out-norm: True
#share-adapter-and-ctc: True #share-adapter-and-ctc: True
#share-adapter-and-embed: True #share-adapter-and-embed: True
#load-pretrained-encoder-from: #load-pretrained-encoder-from:
#load-pretrained-acoustic-encoder-from: #load-pretrained-acoustic-encoder-from:
#load-pretrained-text-encoder-from: #load-pretrained-text-encoder-from:
#load-pretrained-decoder-from: #load-pretrained-decoder-from:
\ No newline at end of file
arch: s2t_sate
share-decoder-input-output-embed: True
optimizer: adam
clip-norm: 10.0
lr-scheduler: inverse_sqrt
warmup-init-lr: 1e-7
warmup-updates: 10000
lr: 2e-3
adam_betas: (0.9,0.98)
criterion: label_smoothed_cross_entropy_with_ctc
label_smoothing: 0.1
encoder-normalize-before: True
decoder-normalize-before: True
encoder-embed-norm: True
encoder-no-scale-embedding: True
subsampling-type: conv1d
subsampling-layers: 2
subsampling-filter: 1024
subsampling-kernel: 5
subsampling-stride: 2
subsampling-norm: none
subsampling-activation: glu
dropout: 0.1
activation-fn: relu
encoder-embed-dim: 256
encoder-ffn-embed-dim: 2048
encoder-layers: 18
text-encoder-layers: 12
decoder-layers: 6
encoder-attention-heads: 4
decoder-embed-dim: 256
decoder-ffn-embed-dim: 2048
decoder-attention-heads: 4
acoustic-encoder: transformer
adapter: inter_league
#adapter-embed-norm: True
#adapter-out-norm: True
#share-adapter-and-ctc: True
#share-adapter-and-embed: True
#load-pretrained-encoder-from:
#load-pretrained-acoustic-encoder-from:
#load-pretrained-text-encoder-from:
#load-pretrained-decoder-from:
...@@ -11,6 +11,7 @@ if [ "$#" -eq 1 ]; then ...@@ -11,6 +11,7 @@ if [ "$#" -eq 1 ]; then
fi fi
sacrebleu=1 sacrebleu=1
ctc_infer=0
n_average=10 n_average=10
beam_size=5 beam_size=5
len_penalty=1.0 len_penalty=1.0
...@@ -23,6 +24,7 @@ cmd="./run.sh ...@@ -23,6 +24,7 @@ cmd="./run.sh
--gpu_num ${gpu_num} --gpu_num ${gpu_num}
--exp_name ${exp_name} --exp_name ${exp_name}
--sacrebleu ${sacrebleu} --sacrebleu ${sacrebleu}
--ctc_infer ${ctc_infer}
--n_average ${n_average} --n_average ${n_average}
--beam_size ${beam_size} --beam_size ${beam_size}
--len_penalty ${len_penalty} --len_penalty ${len_penalty}
......
set -e
ref=$1
gen=$2
tokenizer=$3
lang=$4
lang_pair=en-${lang}
record=$(mktemp -t temp.record.XXXXXX)
if [[ ${tokenizer} -eq 1 ]]; then
echo "MultiBLEU" > ${record}
cmd="multi-bleu.perl ${ref} < ${gen}"
# echo $cmd
eval $cmd | head -n 1 >> ${record}
cmd="detokenizer.perl -l ${lang} --threads 32 < ${ref} > ${ref}.detok"
# echo $cmd
# echo
eval $cmd
cmd="detokenizer.perl -l ${lang} --threads 32 < ${gen} > ${gen}.detok"
# echo $cmd
# echo
eval $cmd
ref=${ref}.detok
gen=${gen}.detok
fi
echo "SacreBLEU" > ${record}
cmd="cat ${gen} | sacrebleu ${ref} -m bleu -w 4 -l ${lang_pair}"
#echo $cmd
eval $cmd >> ${record}
cat ${record}
rm ${record}
\ No newline at end of file
set -e
infer_dir=$1
tag=$2
s2s_infer_file=${infer_dir}/$3
org_ctc_infer_file=${infer_dir}/$4
ref=$5
tokenizer=$6
lang=$7
idx=${infer_dir}/${tag}_idx
ctc_infer=${infer_dir}/${tag}_ctc_infer
ctc_infer_sort=${infer_dir}/${tag}_ctc_infer_sort
if [[ ! -f ${ctc_infer_sort} ]]; then
cut -f1 ${s2s_infer_file} > ${idx}
paste ${idx} ${org_ctc_infer_file} > ${ctc_infer}
sort -n -t $'\t' ${ctc_infer} | cut -f2 > ${ctc_infer_sort}
fi
gen=${ctc_infer_sort}
./cal_bleu.sh ${ref} ${gen} ${tokenizer} ${lang}
\ No newline at end of file
import unicodedata
import jiwer
import jiwer.transforms as tr
import sys
ref_file = sys.argv[1]
hyp_file = sys.argv[2]
wer_standardize = tr.Compose(
[
tr.SubstituteRegexes({r"<<unk>>": r"@"}),
tr.ToLowerCase(),
tr.RemovePunctuation(),
tr.ExpandCommonEnglishContractions(),
tr.RemoveKaldiNonWords(),
tr.RemoveWhiteSpace(replace_by_space=True),
tr.ReduceToListOfListOfWords(),
]
)
cer_standardize = tr.Compose(
[
tr.SubstituteRegexes({r"<<unk>>": r"@"}),
tr.ToLowerCase(),
tr.RemovePunctuation(),
tr.Strip(),
tr.ReduceToListOfListOfChars(),
]
)
ref_lines = open(ref_file, "r").readlines()
hyp_lines = open(hyp_file, "r").readlines()
wer = jiwer.wer(ref_lines, hyp_lines,
truth_transform=wer_standardize,
hypothesis_transform=wer_standardize,
)
cer = jiwer.cer(ref_lines, hyp_lines,
truth_transform=cer_standardize,
hypothesis_transform=cer_standardize,
)
print("WER: %.4f" % wer)
print("CER: %.4f" % cer)
set -e
infer_dir=$1
tag=$2
s2s_infer_file=${infer_dir}/$3
org_ctc_infer_file=${infer_dir}/$4
ref=$5
idx=${infer_dir}/${tag}_idx
ctc_infer=${infer_dir}/${tag}_ctc_infer
ctc_infer_sort=${infer_dir}/${tag}_ctc_infer_sort
cut -f1 ${s2s_infer_file} > ${idx}
paste ${idx} ${org_ctc_infer_file} > ${ctc_infer}
sort -n -t $'\t' ${ctc_infer} | cut -f2 > ${ctc_infer_sort}
python3 ./cal_wer_lcrm.py ${ref} ${ctc_infer_sort}
\ No newline at end of file
...@@ -14,7 +14,7 @@ get_devices(){ ...@@ -14,7 +14,7 @@ get_devices(){
do do
line=$((dev + 2)) line=$((dev + 2))
use=$(head -n $line ${record} | tail -1 | cut -d '|' -f3 | cut -d '/' -f1) use=$(head -n $line ${record} | tail -1 | cut -d '|' -f3 | cut -d '/' -f1)
if [[ $use -lt 100 ]]; then if [[ $use -lt 1000 ]]; then
device[$count]=$dev device[$count]=$dev
count=$((count + 1)) count=$((count + 1))
if [[ $count -eq $gpu_num ]]; then if [[ $count -eq $gpu_num ]]; then
......
...@@ -21,7 +21,6 @@ stop_stage=0 ...@@ -21,7 +21,6 @@ stop_stage=0
######## hardware ######## ######## hardware ########
# devices # devices
#device=() #device=()
use_auto=0
gpu_num=8 gpu_num=8
update_freq=1 update_freq=1
...@@ -42,7 +41,7 @@ vocab_size=10000 ...@@ -42,7 +41,7 @@ vocab_size=10000
share_dict=1 share_dict=1
speed_perturb=0 speed_perturb=0
lcrm=0 lcrm=0
tokenizer=0 tokenizer=1
use_raw_audio=0 use_raw_audio=0
use_specific_dict=0 use_specific_dict=0
...@@ -77,6 +76,7 @@ bleu_valid=0 ...@@ -77,6 +76,7 @@ bleu_valid=0
# decoding setting # decoding setting
sacrebleu=1 sacrebleu=1
dec_model=checkpoint_best.pt dec_model=checkpoint_best.pt
ctc_infer=0
n_average=10 n_average=10
beam_size=5 beam_size=5
len_penalty=1.0 len_penalty=1.0
...@@ -148,7 +148,19 @@ if [ ${stage} -le 0 ] && [ ${stop_stage} -ge 0 ]; then ...@@ -148,7 +148,19 @@ if [ ${stage} -le 0 ] && [ ${stop_stage} -ge 0 ]; then
--splits ${valid_split},${test_split},${train_split} --splits ${valid_split},${test_split},${train_split}
--vocab-type ${vocab_type} --vocab-type ${vocab_type}
--vocab-size ${asr_vocab_size}" --vocab-size ${asr_vocab_size}"
[[ $eval -eq 1 && ${share_dict} -ne 1 && ${use_specific_dict} -ne 1 ]] && (echo -e "\033[34mRun command: \n${cmd} \033[0m" && eval $cmd) if [[ ${lcrm} -eq 1 ]]; then
cmd="$cmd
--lowercase-src
--rm-punc-src"
fi
if [[ ${tokenizer} -eq 1 ]]; then
cmd="$cmd
--tokenizer"
fi
if [[ $eval -eq 1 && ${share_dict} -ne 1 && ${use_specific_dict} -ne 1 ]]; then
echo -e "\033[34mRun command: \n${cmd} \033[0m"
eval $cmd
fi
asr_prefix=spm_${vocab_type}${asr_vocab_size}_asr asr_prefix=spm_${vocab_type}${asr_vocab_size}_asr
echo "stage 0: ST Data Preparation" echo "stage 0: ST Data Preparation"
...@@ -216,9 +228,6 @@ if [ ${stage} -le 1 ] && [ ${stop_stage} -ge 1 ]; then ...@@ -216,9 +228,6 @@ if [ ${stage} -le 1 ] && [ ${stop_stage} -ge 1 ]; then
echo "stage 1: ST Network Training" echo "stage 1: ST Network Training"
[[ ! -d ${data_dir} ]] && echo "The data dir ${data_dir} is not existing!" && exit 1; [[ ! -d ${data_dir} ]] && echo "The data dir ${data_dir} is not existing!" && exit 1;
if [[ ${use_auto} -eq 1 ]]; then
device=(-1)
fi
if [[ -z ${device} || ${#device[@]} -eq 0 ]]; then if [[ -z ${device} || ${#device[@]} -eq 0 ]]; then
if [[ ${gpu_num} -eq 0 ]]; then if [[ ${gpu_num} -eq 0 ]]; then
device="" device=""
...@@ -285,13 +294,9 @@ if [ ${stage} -le 1 ] && [ ${stop_stage} -ge 1 ]; then ...@@ -285,13 +294,9 @@ if [ ${stage} -le 1 ] && [ ${stop_stage} -ge 1 ]; then
if [[ $step_valid -eq 1 ]]; then if [[ $step_valid -eq 1 ]]; then
validate_interval=1 validate_interval=1
save_interval=1 save_interval=1
keep_last_epochs=10
no_epoch_checkpoints=0 no_epoch_checkpoints=0
save_interval_updates=500 save_interval_updates=500
keep_interval_updates=10 keep_interval_updates=10
else
validate_interval=1
keep_last_epochs=10
fi fi
if [[ $bleu_valid -eq 1 ]]; then if [[ $bleu_valid -eq 1 ]]; then
cmd="$cmd cmd="$cmd
...@@ -314,10 +319,6 @@ if [ ${stage} -le 1 ] && [ ${stop_stage} -ge 1 ]; then ...@@ -314,10 +319,6 @@ if [ ${stage} -le 1 ] && [ ${stop_stage} -ge 1 ]; then
cmd="${cmd} cmd="${cmd}
--save-interval $save_interval " --save-interval $save_interval "
fi fi
if [[ -n $keep_last_epochs ]]; then
cmd="${cmd}
--keep-last-epochs $keep_last_epochs "
fi
if [[ -n $save_interval_updates ]]; then if [[ -n $save_interval_updates ]]; then
cmd="${cmd} cmd="${cmd}
--save-interval-updates $save_interval_updates" --save-interval-updates $save_interval_updates"
...@@ -334,20 +335,14 @@ if [ ${stage} -le 1 ] && [ ${stop_stage} -ge 1 ]; then ...@@ -334,20 +335,14 @@ if [ ${stage} -le 1 ] && [ ${stop_stage} -ge 1 ]; then
echo "${time} | ${device} | ${data_dir} | ${exp_name} | ${model_dir} " >> $log echo "${time} | ${device} | ${data_dir} | ${exp_name} | ${model_dir} " >> $log
tail -n 50 ${log} > tmp.log tail -n 50 ${log} > tmp.log
mv tmp.log $log mv tmp.log $log
export CUDA_VISIBLE_DEVICES=${device}
cmd="nohup ${cmd} >> ${model_dir}/train.log 2>&1 &" log=${model_dir}/train.log
cmd="nohup ${cmd} >> ${log} 2>&1 &"
if [[ $eval -eq 1 ]]; then if [[ $eval -eq 1 ]]; then
if [[ ${use_auto} -eq 1 ]]; then eval $cmd
cmd=$(echo ${cmd} | tr -d "\n") sleep 2s
auto_run -c "${cmd}" -n ${gpu_num} tail -n "$(wc -l ${log} | awk '{print $1+1}')" -f ${log}
else
export CUDA_VISIBLE_DEVICES=${device}
eval $cmd
fi
sleep 5s
if [[ -f ${model_dir}/train.log ]]; then
tail -n "$(wc -l ${model_dir}/train.log | awk '{print $1+1}')" -f ${model_dir}/train.log
fi
fi fi
fi fi
wait wait
...@@ -370,9 +365,6 @@ if [ ${stage} -le 2 ] && [ ${stop_stage} -ge 2 ]; then ...@@ -370,9 +365,6 @@ if [ ${stage} -le 2 ] && [ ${stop_stage} -ge 2 ]; then
dec_model=${dec_model} dec_model=${dec_model}
fi fi
if [[ ${use_auto} -eq 1 ]]; then
device=(-1)
fi
if [[ -z ${device} || ${#device[@]} -eq 0 ]]; then if [[ -z ${device} || ${#device[@]} -eq 0 ]]; then
if [[ ${gpu_num} -eq 0 ]]; then if [[ ${gpu_num} -eq 0 ]]; then
device="" device=""
...@@ -381,6 +373,7 @@ if [ ${stage} -le 2 ] && [ ${stop_stage} -ge 2 ]; then ...@@ -381,6 +373,7 @@ if [ ${stage} -le 2 ] && [ ${stop_stage} -ge 2 ]; then
device=$(get_devices $gpu_num 0) device=$(get_devices $gpu_num 0)
fi fi
fi fi
export CUDA_VISIBLE_DEVICES=${device}
suffix=beam${beam_size}_alpha${len_penalty}_tokens${max_tokens} suffix=beam${beam_size}_alpha${len_penalty}_tokens${max_tokens}
if [[ ${n_average} -ne 1 ]]; then if [[ ${n_average} -ne 1 ]]; then
...@@ -408,31 +401,95 @@ if [ ${stage} -le 2 ] && [ ${stop_stage} -ge 2 ]; then ...@@ -408,31 +401,95 @@ if [ ${stage} -le 2 ] && [ ${stop_stage} -ge 2 ]; then
--beam ${beam_size} --beam ${beam_size}
--lenpen ${len_penalty}" --lenpen ${len_penalty}"
if [[ ${ctc_infer} -eq 1 ]]; then
cmd="${cmd}
--ctc-infer"
fi
if [[ ${sacrebleu} -eq 1 ]]; then if [[ ${sacrebleu} -eq 1 ]]; then
cmd="${cmd} cmd="${cmd}
--scoring sacrebleu" --scoring sacrebleu"
if [[ ${tokenizer} -eq 1 ]]; then if [[ ${tokenizer} -eq 1 ]]; then
cmd="${cmd} cmd="${cmd}
--tokenizer moses --tokenizer moses
--moses-source-lang ${src_lang} --source-lang ${src_lang}
--moses-target-lang ${tgt_lang}" --target-lang ${tgt_lang}"
fi fi
fi fi
echo -e "\033[34mRun command: \n${cmd} \033[0m" echo -e "\033[34mRun command: \n${cmd} \033[0m"
if [[ $eval -eq 1 ]]; then if [[ $eval -eq 1 ]]; then
if [[ ${use_auto} -eq 1 ]]; then src_ctc_file=translation-${subset}.txt.src_ctc
cmd=$(echo ${cmd} | tr -d "\n") if [[ ${ctc_infer} -eq 1 && -f ${model_dir}/${src_ctc_file} ]]; then
auto_run -c ${cmd} -n ${gpu_num} rm ${model_dir}/${src_ctc_file}
else elif [[ ${ctc_infer} -eq 1 && -f ${model_dir}/translation-${subset}.txt.ctc ]]; then
export CUDA_VISIBLE_DEVICES=${device} src_ctc_file=translation-${subset}.txt.ctc
eval $cmd rm ${model_dir}/${src_ctc_file}
fi fi
tgt_ctc_file=translation-${subset}.txt.tgt_ctc
if [[ ${ctc_infer} -eq 1 && -f ${model_dir}/${tgt_ctc_file} ]]; then
rm ${model_dir}/${tgt_ctc_file}
fi
eval $cmd
echo "" >> ${result_file}
tail -n 1 ${model_dir}/generate-${subset}.txt >> ${result_file} tail -n 1 ${model_dir}/generate-${subset}.txt >> ${result_file}
mv ${model_dir}/generate-${subset}.txt ${model_dir}/generate-${subset}-${suffix}.txt mv ${model_dir}/generate-${subset}.txt ${model_dir}/generate-${subset}-${suffix}.txt
mv ${model_dir}/translation-${subset}.txt ${model_dir}/translation-${subset}-${suffix}.txt mv ${model_dir}/translation-${subset}.txt ${model_dir}/translation-${subset}-${suffix}.txt
trans_file=translation-${subset}-${suffix}.txt
# if [[ "$subset" =~ ^dev(.*) && -f ${model_dir}/${src_ctc_file} ]]; then
if [[ ${ctc_infer} -eq 1 && -f ${model_dir}/${src_ctc_file} ]]; then
ref_file=${data_dir}/${subset}.${src_lang}
if [[ -f ${ref_file} ]]; then
src_ctc=$(mktemp -t temp.record.XXXXXX)
cd ./local
sh ./cal_wer.sh ${model_dir} ${subset} ${trans_file} ${src_ctc_file} ${data_dir}/${subset}.${src_lang} > ${src_ctc}
cd ..
echo "Source language" >> ${result_file}
echo "CTC WER" >> ${result_file}
tail -n 1 ${src_ctc} >> ${result_file}
src_bleu=$(mktemp -t temp.record.XXXXXX)
cd local
sh ./cal_ctc_bleu.sh ${model_dir} ${subset} ${trans_file} ${src_ctc_file} ${data_dir}/${subset}.${src_lang} ${tokenizer} ${src_lang} > ${src_bleu}
cd ..
cat ${src_bleu} >> ${result_file}
rm ${src_ctc} ${src_bleu}
else
echo "No reference for source language."
fi
fi
tgt_ctc_file=translation-${subset}.txt.tgt_ctc
# if [[ "$subset" =~ ^dev(.*) && -f ${model_dir}/${tgt_ctc_file} ]]; then
if [[ ${ctc_infer} -eq 1 && -f ${model_dir}/${tgt_ctc_file} ]]; then
ref_file=${data_dir}/${subset}.${tgt_lang}
if [[ -f ${ref_file} ]]; then
tgt_ctc=$(mktemp -t temp.record.XXXXXX)
cd local
sh ./cal_wer.sh ${model_dir} ${subset} ${trans_file} ${tgt_ctc_file} ${ref_file} > ${tgt_ctc}
cd ..
echo "Target language" >> ${result_file}
echo "CTC WER" >> ${result_file}
tail -n 1 ${tgt_ctc} >> ${result_file}
tgt_bleu=$(mktemp -t temp.record.XXXXXX)
cd local
sh ./cal_ctc_bleu.sh ${model_dir} ${subset} ${trans_file} ${tgt_ctc_file} ${ref_file} ${tokenizer} ${tgt_lang} > ${tgt_bleu}
cd ..
cat ${tgt_bleu} >> ${result_file}
rm ${tgt_ctc} ${tgt_bleu}
else
echo "No reference for target language."
fi
fi
fi fi
done done
echo
cat ${result_file} cat ${result_file}
fi fi
...@@ -13,12 +13,21 @@ extra_parameter= ...@@ -13,12 +13,21 @@ extra_parameter=
exp_tag= exp_tag=
# Base
#config_list=(base) #config_list=(base)
#config_list=(base ctc conformer) #config_list=(base ctc)
#config_list=(base conformer ctc)
# SATE
#config_list=(sate ctc) #config_list=(sate ctc)
#config_list=(sate conformer ctc)
# SAE
#config_list=(sate inter)
#config_list=(pds_base_8) # PDS
#config_list=(pds_base conformer) #config_list=(pds_base_8 ctc)
#config_list=(pds_base_8 conformer ctc)
#config_list=(sate_pds ctc) #config_list=(sate_pds ctc)
# exp full name # exp full name
......
...@@ -12,6 +12,9 @@ adam_betas: (0.9,0.98) ...@@ -12,6 +12,9 @@ adam_betas: (0.9,0.98)
criterion: label_smoothed_cross_entropy_with_ctc criterion: label_smoothed_cross_entropy_with_ctc
label_smoothing: 0.1 label_smoothing: 0.1
encoder-embed-norm: True
encoder-no-scale-embedding: True
subsampling-type: conv1d subsampling-type: conv1d
subsampling-layers: 2 subsampling-layers: 2
#subsampling-filter: 2048 #subsampling-filter: 2048
...@@ -35,9 +38,5 @@ decoder-attention-heads: 4 ...@@ -35,9 +38,5 @@ decoder-attention-heads: 4
attention-dropout: 0.1 attention-dropout: 0.1
activation-dropout: 0.1 activation-dropout: 0.1
load-pretrained-encoder-from: /home/xuchen/st/checkpoints/aishell/asr/0506_sp_char_base_ctc_sample1024/avg_10_checkpoint.pt #load-pretrained-decoder-from:
load-pretrained-decoder-from: /home/xuchen/st/checkpoints/aishell/asr/0506_sp_char_base_ctc_sample1024/avg_10_checkpoint.pt #load-pretrained-encoder-from:
load-pretrained-encoder-from: /home/xuchen/st/checkpoints/librispeech/asr/base_baseline/avg_10_checkpoint.pt
load-pretrained-decoder-from: /home/xuchen/st/checkpoints/librispeech/asr/base_baseline/avg_10_checkpoint.pt
#load-pretrained-encoder-from: /home/xuchen/st/checkpoints/librispeech/asr/base_conformer_baseline_batch50k_16/avg_10_checkpoint.pt
#load-pretrained-decoder-from: /home/xuchen/st/checkpoints/librispeech/asr/base_conformer_baseline_batch50k_16/avg_10_checkpoint.pt
ctc-weight: 0.2 ctc-weight: 0.3
intermedia-ctc-layers: 6,9 share-ctc-and-embed: True
intermedia-adapter: league share-target-ctc-and-embed: True
intermedia-ctc-weight: 0.1
ctc-self-distill-weight: 0 interleaved-ctc-weight: 0.2
post-process: sentencepiece interleaved-ctc-layers: 6,9
\ No newline at end of file interleaved-ctc-drop-prob: 0
#target-ctc-weight: 0.3
#target-ctc-layer: 6
#target-interleaved-ctc-weight: 0.2
#target-interleaved-ctc-layers: 2,4
#sae-ground-truth-ratio: 0.1
sae-adapter: inter_league
sae-ctc-temperature: 1
#sae-gumbel: True
#sae-distribution-hard: True
#sae-drop-prob: 0.0
#sae-distribution-cutoff: 10
#share-sae-and-ctc: True
#share-target-sae-and-ctc: True
#sae-embed-norm: True
#sae-out-norm: True
#ctc-self-distill-weight: 1
#target-ctc-self-distill-weight: 1
#ctc-self-distill-prob: 0.1
#cal-all-ctc: True
arch: s2t_transformer_s
share-decoder-input-output-embed: True
optimizer: adam
clip-norm: 10.0
lr-scheduler: inverse_sqrt
warmup-init-lr: 1e-7
warmup-updates: 10000
lr: 1e-3
#lr: 5e-4
adam_betas: (0.9,0.98)
criterion: label_smoothed_cross_entropy_with_ctc
label_smoothing: 0.1
encoder-embed-norm: True
encoder-no-scale-embedding: True
subsampling-type: conv1d
subsampling-layers: 2
#subsampling-filter: 2048
subsampling-filter: 1024
subsampling-kernel: 5
subsampling-stride: 2
subsampling-norm: none
subsampling-activation: glu
dropout: 0.1
activation-fn: relu
encoder-embed-dim: 256
encoder-ffn-embed-dim: 1024
encoder-layers: 6
decoder-layers: 6
encoder-attention-heads: 4
decoder-embed-dim: 256
decoder-ffn-embed-dim: 1024
decoder-attention-heads: 4
attention-dropout: 0.1
activation-dropout: 0.1
#load-pretrained-encoder-from: /home/xuchen/st/checkpoints/aishell/asr/0506_sp_char_base_ctc_sample1024/avg_10_checkpoint.pt
#load-pretrained-decoder-from: /home/xuchen/st/checkpoints/aishell/asr/0506_sp_char_base_ctc_sample1024/avg_10_checkpoint.pt
#load-pretrained-encoder-from: /home/xuchen/st/checkpoints/librispeech/asr/base_baseline/avg_10_checkpoint.pt
#load-pretrained-decoder-from: /home/xuchen/st/checkpoints/librispeech/asr/base_baseline/avg_10_checkpoint.pt
#load-pretrained-encoder-from: /home/xuchen/st/checkpoints/librispeech/asr/base_conformer_baseline_batch50k_16/avg_10_checkpoint.pt
#load-pretrained-decoder-from: /home/xuchen/st/checkpoints/librispeech/asr/base_conformer_baseline_batch50k_16/avg_10_checkpoint.pt
...@@ -34,6 +34,7 @@ tgt_lang=de ...@@ -34,6 +34,7 @@ tgt_lang=de
lang=${src_lang}-${tgt_lang} lang=${src_lang}-${tgt_lang}
dataset=tibetan dataset=tibetan
subset=seda
task=speech_to_text task=speech_to_text
vocab_type=unigram vocab_type=unigram
vocab_type=char vocab_type=char
...@@ -49,6 +50,9 @@ specific_prefix=st ...@@ -49,6 +50,9 @@ specific_prefix=st
specific_dir=${root_dir}/data/mustc/st specific_dir=${root_dir}/data/mustc/st
asr_vocab_prefix=spm_unigram10000_st_share asr_vocab_prefix=spm_unigram10000_st_share
if [[ -z ${subset} ]]; then
dataset=${dataset}/${subset}
fi
org_data_dir=${root_dir}/data/${dataset} org_data_dir=${root_dir}/data/${dataset}
data_dir=${root_dir}/data/${dataset}/asr_char data_dir=${root_dir}/data/${dataset}/asr_char
data_dir=${root_dir}/data/${dataset}/asr_word data_dir=${root_dir}/data/${dataset}/asr_word
......
...@@ -4,25 +4,20 @@ ...@@ -4,25 +4,20 @@
gpu_num=1 gpu_num=1
update_freq=1 update_freq=1
max_tokens=20000 max_tokens=50000
#extra_tag=lr0.0005
#extra_tag=lr0.001
#extra_tag=char
extra_tag=word extra_tag=word
extra_parameter= extra_parameter=
#extra_tag="${extra_tag}" #extra_tag="${extra_tag}"
#extra_parameter="${extra_parameter} " #extra_parameter="${extra_parameter} "
exp_tag=batch5w exp_tag=batch5w
exp_tag=pretrain
#exp_tag=batch5w_pre_libri
config_list=(purectc) #config_list=(purectc)
config_list=(base) config_list=(base)
#config_list=(base ctc) #config_list=(base ctc)
#config_list=(base conformer) #config_list=(base conformer)
config_list=(big_wenet conformer ctc) #config_list=(big_wenet conformer ctc)
#config_list=(pds_base_4 ctc) #config_list=(pds_base_4 ctc)
#config_list=(pds_base_8 ctc) #config_list=(pds_base_8 ctc)
......
#ctc-weight: 0.2 #ctc-weight: 0.2
interleaved-ctc-weight: 0.3 interleaved-ctc-weight: 0.3
interleaved-ctc-layers: 10,15 interleaved-ctc-layers: 10,15
interleaved-ctc-temperature: 1.0 sae-ctc-temperature: 1.0
interleaved-ctc-drop-prob: 0 interleaved-ctc-drop-prob: 0
interleaved_ctc_upsampling_ratio: 2 interleaved_ctc_upsampling_ratio: 2
......
Markdown 格式
0%
您添加了 0 到此讨论。请谨慎行事。
请先完成此评论的编辑!
注册 或者 后发表评论