Commit a598692d by xuchen

update the shell scripts

parent 9fe8cd1e
......@@ -11,6 +11,9 @@ adam_betas: (0.9,0.98)
criterion: label_smoothed_cross_entropy_with_ctc
label_smoothing: 0.1
encoder-embed-norm: True
encoder-no-scale-embedding: True
subsampling-type: conv1d
subsampling-layers: 2
subsampling-filter: 1024
......
......@@ -17,4 +17,4 @@ no-progress-bar: True
log-interval: 100
seed: 1
report-accuracy: True
skip-invalid-size-inputs-valid-test: True
\ No newline at end of file
skip-invalid-size-inputs-valid-test: True
......@@ -5,12 +5,15 @@ clip-norm: 10.0
lr-scheduler: inverse_sqrt
warmup-init-lr: 1e-7
warmup-updates: 10000
lr: 1e-3
lr: 2e-3
adam_betas: (0.9,0.98)
criterion: label_smoothed_cross_entropy_with_ctc
label_smoothing: 0.1
encoder-embed-norm: True
encoder-no-scale-embedding: True
subsampling-type: conv1d
subsampling-layers: 2
subsampling-filter: 2048
......
......@@ -2,4 +2,4 @@ macaron-style: True
use-cnn-module: True
cnn-module-kernel: 15
encoder-attention-type: rel_pos
encoder-activation-fn: swish
\ No newline at end of file
encoder-activation-fn: swish
ctc-weight: 0.2
interleaved-ctc-weight: 0.1
ctc-weight: 0.3
share-ctc-and-embed: True
share-target-ctc-and-embed: True
interleaved-ctc-weight: 0.2
interleaved-ctc-layers: 6,9
interleaved-ctc-temperature: 1.0
sae-ctc-temperature: 1.0
interleaved-ctc-drop-prob: 0
sae-adapter: inter_league
......
......@@ -27,7 +27,7 @@ adam_betas: (0.9,0.98)
criterion: label_smoothed_cross_entropy_with_ctc
label_smoothing: 0.1
dropout: 0.1
dropout: 0.15
activation-fn: relu
encoder-ffn-embed-dim: 2048
encoder-layers: 12
......
......@@ -10,6 +10,9 @@ adam_betas: (0.9,0.98)
criterion: ctc
zero_infinity: True
encoder-embed-norm: True
encoder-no-scale-embedding: True
subsampling-type: conv1d
subsampling-layers: 2
subsampling-filter: 1024
......
encoder-attention-type: rel_selfattn
encoder-attention-type: rel_pos
#encoder-attention-type: relative
#max-encoder-relative-length: 100
......@@ -3,7 +3,7 @@
gpu_num=1
data_dir=
test_subset=(test)
test_subset=(dev test)
exp_name=
if [ "$#" -eq 1 ]; then
......@@ -11,6 +11,7 @@ if [ "$#" -eq 1 ]; then
fi
cer=1
ctc_infer=0
n_average=10
beam_size=5
len_penalty=1.0
......@@ -22,8 +23,9 @@ cmd="./run.sh
--stop_stage 2
--gpu_num ${gpu_num}
--exp_name ${exp_name}
--cer ${cer}
--n_average ${n_average}
--cer ${cer}
--ctc_infer ${ctc_infer}
--beam_size ${beam_size}
--len_penalty ${len_penalty}
--max_tokens ${max_tokens}
......
set -e
ref=$1
gen=$2
tokenizer=$3
lang=$4
lang_pair=en-${lang}
record=$(mktemp -t temp.record.XXXXXX)
if [[ ${tokenizer} -eq 1 ]]; then
echo "MultiBLEU" > ${record}
cmd="multi-bleu.perl ${ref} < ${gen}"
# echo $cmd
eval $cmd | head -n 1 >> ${record}
cmd="detokenizer.perl -l ${lang} --threads 32 < ${ref} > ${ref}.detok"
# echo $cmd
# echo
eval $cmd
cmd="detokenizer.perl -l ${lang} --threads 32 < ${gen} > ${gen}.detok"
# echo $cmd
# echo
eval $cmd
ref=${ref}.detok
gen=${gen}.detok
fi
echo "SacreBLEU" > ${record}
cmd="cat ${gen} | sacrebleu ${ref} -m bleu -w 4 -l ${lang_pair}"
#echo $cmd
eval $cmd >> ${record}
cat ${record}
rm ${record}
\ No newline at end of file
......@@ -14,7 +14,7 @@ get_devices(){
do
line=$((dev + 2))
use=$(head -n $line ${record} | tail -1 | cut -d '|' -f3 | cut -d '/' -f1)
if [[ $use -lt 100 ]]; then
if [[ $use -lt 1000 ]]; then
device[$count]=$dev
count=$((count + 1))
if [[ $count -eq $gpu_num ]]; then
......
#! /bin/bash
# Processing aishell ASR Datasets
# Processing AIShell ASR Datasets
# Copyright 2021 Natural Language Processing Laboratory
# Xu Chen (xuchenneu@163.com)
......@@ -72,6 +72,9 @@ step_valid=0
# decoding setting
cer=1
ctc_infer=0
ctc_self_ensemble=0
ctc_inter_logit=0
dec_model=checkpoint_best.pt
n_average=10
beam_size=5
......@@ -244,13 +247,9 @@ if [ ${stage} -le 1 ] && [ ${stop_stage} -ge 1 ]; then
if [[ $step_valid -eq 1 ]]; then
validate_interval=1
save_interval=1
keep_last_epochs=10
no_epoch_checkpoints=0
save_interval_updates=500
keep_interval_updates=10
else
validate_interval=1
keep_last_epochs=10
fi
if [[ -n $no_epoch_checkpoints && $no_epoch_checkpoints -eq 1 ]]; then
cmd="$cmd
......@@ -264,10 +263,6 @@ if [ ${stage} -le 1 ] && [ ${stop_stage} -ge 1 ]; then
cmd="${cmd}
--save-interval $save_interval "
fi
if [[ -n $keep_last_epochs ]]; then
cmd="${cmd}
--keep-last-epochs $keep_last_epochs "
fi
if [[ -n $save_interval_updates ]]; then
cmd="${cmd}
--save-interval-updates $save_interval_updates"
......@@ -286,11 +281,12 @@ if [ ${stage} -le 1 ] && [ ${stop_stage} -ge 1 ]; then
mv tmp.log $log
export CUDA_VISIBLE_DEVICES=${device}
cmd="nohup ${cmd} >> ${model_dir}/train.log 2>&1 &"
log=${model_dir}/train.log
cmd="nohup ${cmd} >> ${log} 2>&1 &"
if [[ $eval -eq 1 ]]; then
eval $cmd
sleep 2s
tail -n "$(wc -l ${model_dir}/train.log | awk '{print $1+1}')" -f ${model_dir}/train.log
tail -n "$(wc -l ${log} | awk '{print $1+1}')" -f ${log}
fi
fi
wait
......@@ -355,14 +351,56 @@ if [ ${stage} -le 2 ] && [ ${stop_stage} -ge 2 ]; then
cmd="${cmd}
--wer-char-level"
fi
if [[ ${ctc_infer} -eq 1 ]]; then
cmd="${cmd}
--ctc-infer"
fi
if [[ ${ctc_self_ensemble} -eq 1 ]]; then
cmd="${cmd}
--ctc-self-ensemble"
fi
if [[ ${ctc_inter_logit} -eq 1 ]]; then
cmd="${cmd}
--ctc-inter-logit"
fi
echo -e "\033[34mRun command: \n${cmd} \033[0m"
if [[ $eval -eq 1 ]]; then
ctc_file=translation-${subset}.txt.ctc
if [[ -f ${model_dir}/${ctc_file} ]]; then
rm ${model_dir}/${ctc_file}
fi
eval $cmd
echo "" >> ${result_file}
tail -n 1 ${model_dir}/generate-${subset}.txt >> ${result_file}
mv ${model_dir}/generate-${subset}.txt ${model_dir}/generate-${subset}-${suffix}.txt
mv ${model_dir}/translation-${subset}.txt ${model_dir}/translation-${subset}-${suffix}.txt
trans_file=translation-${subset}-${suffix}.txt
if [[ ${ctc_infer} -eq 1 && -f ${model_dir}/${ctc_file} ]]; then
ref_file=${data_dir}/${subset}.${src_lang}
if [[ -f ${ref_file} ]]; then
src_ctc=$(mktemp -t temp.record.XXXXXX)
cd ./local
sh ./cal_wer.sh ${model_dir} ${subset} ${trans_file} ${ctc_file} ${data_dir}/${subset}.${src_lang} > ${src_ctc}
cd ..
echo "CTC WER" >> ${result_file}
tail -n 1 ${src_ctc} >> ${result_file}
src_bleu=$(mktemp -t temp.record.XXXXXX)
cd local
sh ./cal_ctc_bleu.sh ${model_dir} ${subset} ${trans_file} ${ctc_file} ${data_dir}/${subset}.${src_lang} ${tokenizer} ${src_lang} > ${src_bleu}
cd ..
cat ${src_bleu} >> ${result_file}
rm ${src_ctc} ${src_bleu}
else
echo "No reference for source language."
fi
fi
fi
done
cat ${result_file}
......
......@@ -13,12 +13,18 @@ extra_parameter=
exp_tag=
# CTC
config_list=(purectc)
# Transformer
config_list=(base ctc)
#config_list=(base ctc conformer)
#config_list=(big ctc conformer)
#config_list=(pds_base_16)
#config_list=(pds_base_16 conformer)
# Conformer
#config_list=(base conformer ctc)
# PDS
config_list=(purectc_pds_base_8)
config_list=(pds_base_8)
# exp full name
exp_name=
......
......@@ -2,7 +2,7 @@
#ctc-weight: 0.2
interleaved-ctc-weight: 0.3
interleaved-ctc-layers: 2,4
interleaved-ctc-temperature: 1.0
sae-ctc-temperature: 1.0
interleaved-ctc-drop-prob: 0
interleaved_ctc_upsampling_ratio: 2
......
arch: s2t_transformer_s
arch: s2t_sate
share-decoder-input-output-embed: True
optimizer: adam
clip-norm: 10.0
......@@ -22,14 +22,14 @@ subsampling-activation: glu
dropout: 0.1
activation-fn: relu
encoder-embed-dim: 256
encoder-ffn-embed-dim: 2048
encoder-embed-dim: 64
encoder-ffn-embed-dim: 64
encoder-layers: 12
decoder-layers: 6
encoder-attention-heads: 4
decoder-embed-dim: 256
decoder-ffn-embed-dim: 2048
decoder-embed-dim: 64
decoder-ffn-embed-dim: 64
decoder-attention-heads: 4
attention-dropout: 0.1
activation-dropout: 0.1
......@@ -37,16 +37,23 @@ activation-dropout: 0.1
ctc-weight: 0.2
interleaved-ctc-weight: 0.1
interleaved-ctc-layers: 6,9
interleaved-ctc-temperature: 1.0
sae-ctc-temperature: 1.0
interleaved-ctc-drop-prob: 0
#target-ctc-weight: 0.2
#target-ctc-laer: 6
#target-interleaved-ctc-weight: 0.1
#target-interleaved-ctc-layers: 2,4
sae-adapter: league
sae-drop-prob: 0.0
sae-distribution-cutoff: 10
share-ctc-and-sae: False
ctc-self-distill-weight: 0
ctc-self-distill-weight: 1
#target-ctc-self-distill-weight: 1
ctc-self-distill-prob: 1.0
ctc-self-distill-temperature: 1
sae-ground-truth-ratio: 0.3
#load-pretrained-encoder-from:
#load-pretrained-decoder-from:
\ No newline at end of file
......@@ -11,6 +11,9 @@ adam_betas: (0.9,0.98)
criterion: label_smoothed_cross_entropy_with_ctc
label_smoothing: 0.1
encoder-embed-norm: True
encoder-no-scale-embedding: True
subsampling-type: conv1d
subsampling-layers: 2
subsampling-filter: 1024
......
......@@ -11,6 +11,9 @@ adam_betas: (0.9,0.98)
criterion: label_smoothed_cross_entropy_with_ctc
label_smoothing: 0.1
encoder-embed-norm: True
encoder-no-scale-embedding: True
subsampling-type: conv1d
subsampling-layers: 2
subsampling-filter: 2048
......
macaron-style: True
use-cnn-module: True
cnn-module-kernel: 31
cnn-module-kernel: 15
encoder-attention-type: rel_pos
encoder-activation-fn: swish
ctc-weight: 0.2
interleaved-ctc-weight: 0.1
ctc-weight: 0.3
share-ctc-and-embed: True
share-target-ctc-and-embed: True
interleaved-ctc-weight: 0.2
interleaved-ctc-layers: 6,9
interleaved-ctc-temperature: 1.0
interleaved-ctc-drop-prob: 0
sae-adapter: league
sae-drop-prob: 0.2
sae-distribution-cutoff: 10
share-ctc-and-sae: False
ctc-self-distill-weight: 0
sae-adapter: inter_league
#sae-adapter: none
#target-sae-adapter: none
sae-ctc-temperature: 1
#sae-gumbel: True
#sae-distribution-hard: True
#sae-drop-prob: 0.0
#sae-distribution-cutoff: 10
#share-sae-and-ctc: True
#share-target-sae-and-ctc: True
#sae-embed-norm: True
#sae-out-norm: True
#ctc-self-distill-weight: 1
#target-ctc-self-distill-weight: 1
#ctc-self-distill-prob: 0.1
#cal-all-ctc: True
inter_mixup: True
inter_mixup_layer: -1
inter_mixup_prob: 1.0
inter_mixup_ratio: 0.2
\ No newline at end of file
inter_mixup_ratio: 0.2
inter_mixup_beta: 0.2
......@@ -10,6 +10,9 @@ adam_betas: (0.9,0.98)
criterion: ctc
zero_infinity: True
encoder-embed-norm: True
encoder-no-scale-embedding: True
subsampling-type: conv1d
subsampling-layers: 2
subsampling-filter: 1024
......
......@@ -11,6 +11,7 @@ if [ "$#" -eq 1 ]; then
fi
cer=0
ctc_infer=0
n_average=10
beam_size=5
len_penalty=1.0
......@@ -24,6 +25,7 @@ cmd="./run.sh
--exp_name ${exp_name}
--n_average ${n_average}
--cer ${cer}
--ctc_infer ${ctc_infer}
--beam_size ${beam_size}
--len_penalty ${len_penalty}
--max_tokens ${max_tokens}
......
set -e
ref=$1
gen=$2
tokenizer=$3
lang=$4
lang_pair=en-${lang}
record=$(mktemp -t temp.record.XXXXXX)
if [[ ${tokenizer} -eq 1 ]]; then
echo "MultiBLEU" > ${record}
cmd="multi-bleu.perl ${ref} < ${gen}"
# echo $cmd
eval $cmd | head -n 1 >> ${record}
cmd="detokenizer.perl -l ${lang} --threads 32 < ${ref} > ${ref}.detok"
# echo $cmd
# echo
eval $cmd
cmd="detokenizer.perl -l ${lang} --threads 32 < ${gen} > ${gen}.detok"
# echo $cmd
# echo
eval $cmd
ref=${ref}.detok
gen=${gen}.detok
fi
echo "SacreBLEU" > ${record}
cmd="cat ${gen} | sacrebleu ${ref} -m bleu -w 4 -l ${lang_pair}"
#echo $cmd
eval $cmd >> ${record}
cat ${record}
rm ${record}
\ No newline at end of file
set -e
infer_dir=$1
tag=$2
s2s_infer_file=${infer_dir}/$3
org_ctc_infer_file=${infer_dir}/$4
ref=$5
tokenizer=$6
lang=$7
idx=${infer_dir}/${tag}_idx
ctc_infer=${infer_dir}/${tag}_ctc_infer
ctc_infer_sort=${infer_dir}/${tag}_ctc_infer_sort
if [[ ! -f ${ctc_infer_sort} ]]; then
cut -f1 ${s2s_infer_file} > ${idx}
paste ${idx} ${org_ctc_infer_file} > ${ctc_infer}
sort -n -t $'\t' ${ctc_infer} | cut -f2 > ${ctc_infer_sort}
fi
gen=${ctc_infer_sort}
./cal_bleu.sh ${ref} ${gen} ${tokenizer} ${lang}
\ No newline at end of file
import unicodedata
import jiwer
import jiwer.transforms as tr
import sys
ref_file = sys.argv[1]
hyp_file = sys.argv[2]
wer_standardize = tr.Compose(
[
tr.SubstituteRegexes({r"<<unk>>": r"@"}),
tr.ToLowerCase(),
tr.RemovePunctuation(),
tr.ExpandCommonEnglishContractions(),
tr.RemoveKaldiNonWords(),
tr.RemoveWhiteSpace(replace_by_space=True),
tr.ReduceToListOfListOfWords(),
]
)
cer_standardize = tr.Compose(
[
tr.SubstituteRegexes({r"<<unk>>": r"@"}),
tr.ToLowerCase(),
tr.RemovePunctuation(),
tr.Strip(),
tr.ReduceToListOfListOfChars(),
]
)
ref_lines = open(ref_file, "r").readlines()
hyp_lines = open(hyp_file, "r").readlines()
wer = jiwer.wer(ref_lines, hyp_lines,
truth_transform=wer_standardize,
hypothesis_transform=wer_standardize,
)
cer = jiwer.cer(ref_lines, hyp_lines,
truth_transform=cer_standardize,
hypothesis_transform=cer_standardize,
)
print("WER: %.4f" % wer)
print("CER: %.4f" % cer)
set -e
infer_dir=$1
tag=$2
s2s_infer_file=${infer_dir}/$3
org_ctc_infer_file=${infer_dir}/$4
ref=$5
idx=${infer_dir}/${tag}_idx
ctc_infer=${infer_dir}/${tag}_ctc_infer
ctc_infer_sort=${infer_dir}/${tag}_ctc_infer_sort
cut -f1 ${s2s_infer_file} > ${idx}
paste ${idx} ${org_ctc_infer_file} > ${ctc_infer}
sort -n -t $'\t' ${ctc_infer} | cut -f2 > ${ctc_infer_sort}
python3 ./cal_wer.py ${ref} ${ctc_infer_sort}
\ No newline at end of file
......@@ -14,7 +14,7 @@ get_devices(){
do
line=$((dev + 2))
use=$(head -n $line ${record} | tail -1 | cut -d '|' -f3 | cut -d '/' -f1)
if [[ $use -lt 100 ]]; then
if [[ $use -lt 1000 ]]; then
device[$count]=$dev
count=$((count + 1))
if [[ $count -eq $gpu_num ]]; then
......
......@@ -39,12 +39,12 @@ vocab_type=unigram
vocab_size=5000
speed_perturb=0
lcrm=0
tokenizer=0
tokenizer=1
use_raw_audio=0
use_specific_dict=1
specific_prefix=st
specific_dir=${root_dir}/data/mustc/st
specific_dir=${root_dir}/data/mustc/st_tok
asr_vocab_prefix=spm_unigram10000_st_share
org_data_dir=${root_dir}/data/${dataset}
......@@ -72,6 +72,9 @@ step_valid=0
# decoding setting
cer=0
ctc_infer=0
ctc_self_ensemble=0
ctc_inter_logit=0
dec_model=checkpoint_best.pt
n_average=10
beam_size=5
......@@ -97,6 +100,10 @@ if [[ ${use_raw_audio} -eq 1 ]]; then
data_dir=${data_dir}_raw
exp_prefix=${exp_prefix}_raw
fi
if [[ "${vocab_type}" == "char" ]]; then
data_dir=${data_dir}_char
exp_prefix=${exp_prefix}_char
fi
. ./local/parse_options.sh || exit 1;
......@@ -347,14 +354,48 @@ if [ ${stage} -le 2 ] && [ ${stop_stage} -ge 2 ]; then
cmd="${cmd}
--wer-char-level"
fi
if [[ ${ctc_infer} -eq 1 ]]; then
cmd="${cmd}
--ctc-infer"
fi
echo -e "\033[34mRun command: \n${cmd} \033[0m"
if [[ $eval -eq 1 ]]; then
ctc_file=translation-${subset}.txt.ctc
if [[ -f ${model_dir}/${ctc_file} ]]; then
rm ${model_dir}/${ctc_file}
fi
eval $cmd
echo "" >> ${result_file}
tail -n 1 ${model_dir}/generate-${subset}.txt >> ${result_file}
mv ${model_dir}/generate-${subset}.txt ${model_dir}/generate-${subset}-${suffix}.txt
mv ${model_dir}/translation-${subset}.txt ${model_dir}/translation-${subset}-${suffix}.txt
trans_file=translation-${subset}-${suffix}.txt
if [[ ${ctc_infer} -eq 1 && -f ${model_dir}/${ctc_file} ]]; then
ref_file=${data_dir}/${subset}.${src_lang}
if [[ -f ${ref_file} ]]; then
src_ctc=$(mktemp -t temp.record.XXXXXX)
cd ./local
sh ./cal_wer.sh ${model_dir} ${subset} ${trans_file} ${ctc_file} ${data_dir}/${subset}.${src_lang} > ${src_ctc}
cd ..
echo "CTC WER" >> ${result_file}
tail -n 1 ${src_ctc} >> ${result_file}
src_bleu=$(mktemp -t temp.record.XXXXXX)
cd local
sh ./cal_ctc_bleu.sh ${model_dir} ${subset} ${trans_file} ${ctc_file} ${data_dir}/${subset}.${src_lang} ${tokenizer} ${src_lang} > ${src_bleu}
cd ..
cat ${src_bleu} >> ${result_file}
rm ${src_ctc} ${src_bleu}
else
echo "No reference for source language."
fi
fi
fi
done
cat ${result_file}
......
......@@ -13,12 +13,18 @@ extra_parameter=
exp_tag=
config_list=(base ctc)
# CTC
config_list=(purectc)
#config_list=(base conformer)
config_list=(pds_base_8)
# Transformer
config_list=(base ctc)
# Conformer
#config_list=(base conformer ctc)
# PDS
config_list=(purectc_pds_base_8)
config_list=(pds_base_8)
# exp full name
exp_name=
......
......@@ -11,10 +11,6 @@ adam_betas: (0.9,0.997)
criterion: label_smoothed_cross_entropy_with_ctc
label_smoothing: 0.1
ctc-weight: 0.2
intermedia-ctc-weight: 0.1
intermedia-ctc-layers: 2,4
dropout: 0.1
attention-dropout: 0.1
activation-dropout: 0.1
......@@ -22,15 +18,15 @@ activation-dropout: 0.1
activation-fn: relu
encoder-normalize-before: True
decoder-normalize-before: True
encoder-embed-dim: 512
encoder-ffn-embed-dim: 2048
encoder-embed-dim: 64
encoder-ffn-embed-dim: 64
encoder-layers: 6
decoder-layers: 6
encoder-attention-heads: 8
encoder-attention-heads: 4
decoder-embed-dim: 512
decoder-ffn-embed-dim: 2048
decoder-attention-heads: 8
decoder-embed-dim: 64
decoder-ffn-embed-dim: 64
decoder-attention-heads: 4
#load-pretrained-encoder-from:
#load-pretrained-decoder-from:
......@@ -39,13 +35,14 @@ decoder-attention-heads: 8
#ctc-weight: 0.2
interleaved-ctc-weight: 0.3
interleaved-ctc-layers: 6,9
interleaved-ctc-temperature: 1.0
sae-ctc-temperature: 1.0
interleaved-ctc-drop-prob: 0
interleaved_ctc_upsampling_ratio: 3
interleaved-ctc-upsampling-ratio: 3
sae-adapter: league
sae-drop-prob: 0.0
#sae-distribution-cutoff: 10
share-ctc-and-sae: True
sae-ground-truth-ratio: 0.3
ctc-self-distill-weight: 0
\ No newline at end of file
#ctc-layer:
#ctc-weight: 0.2
ctc-layer: 6
ctc-weight: 0.3
interleaved-ctc-weight: 0.3
interleaved-ctc-layers: 8
interleaved-ctc-layers: 4
interleaved-ctc-temperature: 1
interleaved-ctc-drop-prob: 0
interleaved_ctc_upsampling_ratio: 2
interleaved-ctc-upsampling-ratio: 2
sae-adapter: inter_league
sae-drop-prob: 0.0
#sae-distribution-cutoff: 10
sae-distribution-cutoff: 10
#share-ctc-and-sae: True
ctc-self-distill-weight: 0
#sae-ground-truth-ratio: 0.3
#ctc-self-distill-weight: 1
......@@ -41,11 +41,11 @@ src_vocab_size=10000
tgt_vocab_size=10000
share_dict=1
lcrm=0
tokenizer=0
tokenizer=1
use_specific_dict=1
specific_prefix=st
specific_dir=${root_dir}/data/${dataset}/st
specific_dir=${root_dir}/data/${dataset}/st_tok
src_vocab_prefix=spm_unigram10000_st_share
tgt_vocab_prefix=spm_unigram10000_st_share
......@@ -111,9 +111,6 @@ if [[ ${lcrm} -eq 1 ]]; then
exp_prefix=${exp_prefix}_lcrm
fi
if [[ ${tokenizer} -eq 1 ]]; then
train_subset=${train_subset}.tok
valid_subset=${valid_subset}.tok
trans_subset=${trans_subset}.tok
data_dir=${data_dir}_tok
exp_prefix=${exp_prefix}_tok
fi
......@@ -174,6 +171,10 @@ if [ ${stage} -le 0 ] && [ ${stop_stage} -ge 0 ]; then
cmd="$cmd
--share"
fi
if [[ $tokenizer -eq 1 ]]; then
cmd="$cmd
--tokenizer"
fi
if [[ ${lcrm} -eq 1 ]]; then
cmd="$cmd
--lowercase-src
......@@ -379,8 +380,8 @@ if [ ${stage} -le 2 ] && [ ${stop_stage} -ge 2 ]; then
if [[ ${tokenizer} -eq 1 ]]; then
cmd="${cmd}
--tokenizer moses
--moses-source-lang ${src_lang}
--moses-target-lang ${tgt_lang}"
--source-lang ${src_lang}
--target-lang ${tgt_lang}"
fi
fi
......
......@@ -6,17 +6,18 @@ gpu_num=1
update_freq=1
max_tokens=8192
extra_tag=
extra_parameter=
#extra_tag="${extra_tag}"
#extra_parameter="${extra_parameter} "
exp_tag=baseline
config_list=(small)
# exp full name
exp_name=
extra_tag=
extra_parameter=
#extra_tag="${extra_tag}"
#extra_parameter="${extra_parameter} "
train_config=$(echo ${config_list[*]} | sed 's/ /,/g')
cmd="./run.sh
......
......@@ -11,6 +11,9 @@ adam_betas: (0.9,0.98)
criterion: label_smoothed_cross_entropy_with_ctc
label_smoothing: 0.1
encoder-embed-norm: True
encoder-no-scale-embedding: True
subsampling-type: conv1d
subsampling-layers: 2
subsampling-filter: 1024
......@@ -19,9 +22,6 @@ subsampling-stride: 2
subsampling-norm: none
subsampling-activation: glu
encoder-embed-norm: True
encoder-no-scale-embedding: True
dropout: 0.1
activation-fn: relu
encoder-embed-dim: 256
......@@ -37,4 +37,4 @@ attention-dropout: 0.1
activation-dropout: 0.1
#load-pretrained-encoder-from:
#load-pretrained-decoder-from:
\ No newline at end of file
#load-pretrained-decoder-from:
......@@ -18,4 +18,4 @@ no-progress-bar: True
log-interval: 100
seed: 1
report-accuracy: True
skip-invalid-size-inputs-valid-test: True
\ No newline at end of file
skip-invalid-size-inputs-valid-test: True
macaron-style: True
use-cnn-module: True
cnn-module-kernel: 31
cnn-module-kernel: 15
encoder-attention-type: rel_pos
encoder-activation-fn: swish
\ No newline at end of file
encoder-activation-fn: swish
ctc-weight: 0.3
share-ctc-and-embed: True
share-target-ctc-and-embed: True
interleaved-ctc-weight: 0.2
interleaved-ctc-layers: 6,9
interleaved-ctc-temperature: 1.0
interleaved-ctc-drop-prob: 0
#target-ctc-weight: 0.3
#target-ctc-layer: 6
#target-interleaved-ctc-weight: 0.1
#target-interleaved-ctc-layers: 2,4
target-ctc-weight: 0.3
target-ctc-layer: 6
target-interleaved-ctc-weight: 0.2
target-interleaved-ctc-layers: 4
#sae-ground-truth-ratio: 0.1
sae-adapter: inter_league
sae-drop-prob: 0.0
#sae-distribution-cutoff: 0
#share-ctc-and-sae: True
#share-target-ctc-and-sae: True
#sae-adapter: none
#target-sae-adapter: none
sae-ctc-temperature: 1
#sae-gumbel: True
#sae-distribution-hard: True
#sae-drop-prob: 0.0
#sae-distribution-cutoff: 10
#share-sae-and-ctc: True
#share-target-sae-and-ctc: True
#sae-embed-norm: True
#sae-out-norm: True
#ctc-self-distill-weight: 1
#target-ctc-self-distill-weight: 1
#ctc-self-distill-prob: 0.1
#cal-all-ctc: True
ctc-self-distill-weight: 0
\ No newline at end of file
use-aligned-text: True
aligned-target-ctc: True
......@@ -14,6 +14,9 @@ label_smoothing: 0.1
encoder-normalize-before: True
decoder-normalize-before: True
encoder-embed-norm: True
encoder-no-scale-embedding: True
subsampling-type: conv1d
subsampling-layers: 2
subsampling-filter: 1024
......@@ -22,9 +25,6 @@ subsampling-stride: 2
subsampling-norm: none
subsampling-activation: glu
encoder-embed-norm: True
encoder-no-scale-embedding: True
dropout: 0.1
activation-fn: relu
encoder-embed-dim: 256
......@@ -39,14 +39,14 @@ decoder-ffn-embed-dim: 2048
decoder-attention-heads: 4
acoustic-encoder: transformer
adapter: league
adapter: inter_league
#adapter: none
#adapter-embed-norm: True
#adapter-out-norm: True
#share-adapter-and-ctc: True
#share-adapter-and-embed: True
#load-pretrained-encoder-from:
#load-pretrained-acoustic-encoder-from:
#load-pretrained-text-encoder-from:
#load-pretrained-decoder-from:
\ No newline at end of file
#load-pretrained-decoder-from:
arch: s2t_sate
share-decoder-input-output-embed: True
optimizer: adam
clip-norm: 10.0
lr-scheduler: inverse_sqrt
warmup-init-lr: 1e-7
warmup-updates: 10000
lr: 2e-3
adam_betas: (0.9,0.98)
criterion: label_smoothed_cross_entropy_with_ctc
label_smoothing: 0.1
encoder-normalize-before: True
decoder-normalize-before: True
encoder-embed-norm: True
encoder-no-scale-embedding: True
subsampling-type: conv1d
subsampling-layers: 2
subsampling-filter: 1024
subsampling-kernel: 5
subsampling-stride: 2
subsampling-norm: none
subsampling-activation: glu
dropout: 0.1
activation-fn: relu
encoder-embed-dim: 256
encoder-ffn-embed-dim: 2048
encoder-layers: 18
text-encoder-layers: 12
decoder-layers: 6
encoder-attention-heads: 4
decoder-embed-dim: 256
decoder-ffn-embed-dim: 2048
decoder-attention-heads: 4
acoustic-encoder: transformer
adapter: inter_league
#adapter-embed-norm: True
#adapter-out-norm: True
#share-adapter-and-ctc: True
#share-adapter-and-embed: True
#load-pretrained-encoder-from:
#load-pretrained-acoustic-encoder-from:
#load-pretrained-text-encoder-from:
#load-pretrained-decoder-from:
......@@ -11,6 +11,7 @@ if [ "$#" -eq 1 ]; then
fi
sacrebleu=1
ctc_infer=0
n_average=10
beam_size=5
len_penalty=1.0
......@@ -23,6 +24,7 @@ cmd="./run.sh
--gpu_num ${gpu_num}
--exp_name ${exp_name}
--sacrebleu ${sacrebleu}
--ctc_infer ${ctc_infer}
--n_average ${n_average}
--beam_size ${beam_size}
--len_penalty ${len_penalty}
......
set -e
ref=$1
gen=$2
tokenizer=$3
lang=$4
lang_pair=en-${lang}
record=$(mktemp -t temp.record.XXXXXX)
if [[ ${tokenizer} -eq 1 ]]; then
echo "MultiBLEU" > ${record}
cmd="multi-bleu.perl ${ref} < ${gen}"
# echo $cmd
eval $cmd | head -n 1 >> ${record}
cmd="detokenizer.perl -l ${lang} --threads 32 < ${ref} > ${ref}.detok"
# echo $cmd
# echo
eval $cmd
cmd="detokenizer.perl -l ${lang} --threads 32 < ${gen} > ${gen}.detok"
# echo $cmd
# echo
eval $cmd
ref=${ref}.detok
gen=${gen}.detok
fi
echo "SacreBLEU" > ${record}
cmd="cat ${gen} | sacrebleu ${ref} -m bleu -w 4 -l ${lang_pair}"
#echo $cmd
eval $cmd >> ${record}
cat ${record}
rm ${record}
\ No newline at end of file
set -e
infer_dir=$1
tag=$2
s2s_infer_file=${infer_dir}/$3
org_ctc_infer_file=${infer_dir}/$4
ref=$5
tokenizer=$6
lang=$7
idx=${infer_dir}/${tag}_idx
ctc_infer=${infer_dir}/${tag}_ctc_infer
ctc_infer_sort=${infer_dir}/${tag}_ctc_infer_sort
if [[ ! -f ${ctc_infer_sort} ]]; then
cut -f1 ${s2s_infer_file} > ${idx}
paste ${idx} ${org_ctc_infer_file} > ${ctc_infer}
sort -n -t $'\t' ${ctc_infer} | cut -f2 > ${ctc_infer_sort}
fi
gen=${ctc_infer_sort}
./cal_bleu.sh ${ref} ${gen} ${tokenizer} ${lang}
\ No newline at end of file
import unicodedata
import jiwer
import jiwer.transforms as tr
import sys
ref_file = sys.argv[1]
hyp_file = sys.argv[2]
wer_standardize = tr.Compose(
[
tr.SubstituteRegexes({r"<<unk>>": r"@"}),
tr.ToLowerCase(),
tr.RemovePunctuation(),
tr.ExpandCommonEnglishContractions(),
tr.RemoveKaldiNonWords(),
tr.RemoveWhiteSpace(replace_by_space=True),
tr.ReduceToListOfListOfWords(),
]
)
cer_standardize = tr.Compose(
[
tr.SubstituteRegexes({r"<<unk>>": r"@"}),
tr.ToLowerCase(),
tr.RemovePunctuation(),
tr.Strip(),
tr.ReduceToListOfListOfChars(),
]
)
ref_lines = open(ref_file, "r").readlines()
hyp_lines = open(hyp_file, "r").readlines()
wer = jiwer.wer(ref_lines, hyp_lines,
truth_transform=wer_standardize,
hypothesis_transform=wer_standardize,
)
cer = jiwer.cer(ref_lines, hyp_lines,
truth_transform=cer_standardize,
hypothesis_transform=cer_standardize,
)
print("WER: %.4f" % wer)
print("CER: %.4f" % cer)
set -e
infer_dir=$1
tag=$2
s2s_infer_file=${infer_dir}/$3
org_ctc_infer_file=${infer_dir}/$4
ref=$5
idx=${infer_dir}/${tag}_idx
ctc_infer=${infer_dir}/${tag}_ctc_infer
ctc_infer_sort=${infer_dir}/${tag}_ctc_infer_sort
cut -f1 ${s2s_infer_file} > ${idx}
paste ${idx} ${org_ctc_infer_file} > ${ctc_infer}
sort -n -t $'\t' ${ctc_infer} | cut -f2 > ${ctc_infer_sort}
python3 ./cal_wer_lcrm.py ${ref} ${ctc_infer_sort}
\ No newline at end of file
......@@ -14,7 +14,7 @@ get_devices(){
do
line=$((dev + 2))
use=$(head -n $line ${record} | tail -1 | cut -d '|' -f3 | cut -d '/' -f1)
if [[ $use -lt 100 ]]; then
if [[ $use -lt 1000 ]]; then
device[$count]=$dev
count=$((count + 1))
if [[ $count -eq $gpu_num ]]; then
......
......@@ -21,7 +21,6 @@ stop_stage=0
######## hardware ########
# devices
#device=()
use_auto=0
gpu_num=8
update_freq=1
......@@ -42,7 +41,7 @@ vocab_size=10000
share_dict=1
speed_perturb=0
lcrm=0
tokenizer=0
tokenizer=1
use_raw_audio=0
use_specific_dict=0
......@@ -77,6 +76,7 @@ bleu_valid=0
# decoding setting
sacrebleu=1
dec_model=checkpoint_best.pt
ctc_infer=0
n_average=10
beam_size=5
len_penalty=1.0
......@@ -148,7 +148,19 @@ if [ ${stage} -le 0 ] && [ ${stop_stage} -ge 0 ]; then
--splits ${valid_split},${test_split},${train_split}
--vocab-type ${vocab_type}
--vocab-size ${asr_vocab_size}"
[[ $eval -eq 1 && ${share_dict} -ne 1 && ${use_specific_dict} -ne 1 ]] && (echo -e "\033[34mRun command: \n${cmd} \033[0m" && eval $cmd)
if [[ ${lcrm} -eq 1 ]]; then
cmd="$cmd
--lowercase-src
--rm-punc-src"
fi
if [[ ${tokenizer} -eq 1 ]]; then
cmd="$cmd
--tokenizer"
fi
if [[ $eval -eq 1 && ${share_dict} -ne 1 && ${use_specific_dict} -ne 1 ]]; then
echo -e "\033[34mRun command: \n${cmd} \033[0m"
eval $cmd
fi
asr_prefix=spm_${vocab_type}${asr_vocab_size}_asr
echo "stage 0: ST Data Preparation"
......@@ -216,9 +228,6 @@ if [ ${stage} -le 1 ] && [ ${stop_stage} -ge 1 ]; then
echo "stage 1: ST Network Training"
[[ ! -d ${data_dir} ]] && echo "The data dir ${data_dir} is not existing!" && exit 1;
if [[ ${use_auto} -eq 1 ]]; then
device=(-1)
fi
if [[ -z ${device} || ${#device[@]} -eq 0 ]]; then
if [[ ${gpu_num} -eq 0 ]]; then
device=""
......@@ -285,13 +294,9 @@ if [ ${stage} -le 1 ] && [ ${stop_stage} -ge 1 ]; then
if [[ $step_valid -eq 1 ]]; then
validate_interval=1
save_interval=1
keep_last_epochs=10
no_epoch_checkpoints=0
save_interval_updates=500
keep_interval_updates=10
else
validate_interval=1
keep_last_epochs=10
fi
if [[ $bleu_valid -eq 1 ]]; then
cmd="$cmd
......@@ -314,10 +319,6 @@ if [ ${stage} -le 1 ] && [ ${stop_stage} -ge 1 ]; then
cmd="${cmd}
--save-interval $save_interval "
fi
if [[ -n $keep_last_epochs ]]; then
cmd="${cmd}
--keep-last-epochs $keep_last_epochs "
fi
if [[ -n $save_interval_updates ]]; then
cmd="${cmd}
--save-interval-updates $save_interval_updates"
......@@ -334,20 +335,14 @@ if [ ${stage} -le 1 ] && [ ${stop_stage} -ge 1 ]; then
echo "${time} | ${device} | ${data_dir} | ${exp_name} | ${model_dir} " >> $log
tail -n 50 ${log} > tmp.log
mv tmp.log $log
export CUDA_VISIBLE_DEVICES=${device}
cmd="nohup ${cmd} >> ${model_dir}/train.log 2>&1 &"
log=${model_dir}/train.log
cmd="nohup ${cmd} >> ${log} 2>&1 &"
if [[ $eval -eq 1 ]]; then
if [[ ${use_auto} -eq 1 ]]; then
cmd=$(echo ${cmd} | tr -d "\n")
auto_run -c "${cmd}" -n ${gpu_num}
else
export CUDA_VISIBLE_DEVICES=${device}
eval $cmd
fi
sleep 5s
if [[ -f ${model_dir}/train.log ]]; then
tail -n "$(wc -l ${model_dir}/train.log | awk '{print $1+1}')" -f ${model_dir}/train.log
fi
eval $cmd
sleep 2s
tail -n "$(wc -l ${log} | awk '{print $1+1}')" -f ${log}
fi
fi
wait
......@@ -370,9 +365,6 @@ if [ ${stage} -le 2 ] && [ ${stop_stage} -ge 2 ]; then
dec_model=${dec_model}
fi
if [[ ${use_auto} -eq 1 ]]; then
device=(-1)
fi
if [[ -z ${device} || ${#device[@]} -eq 0 ]]; then
if [[ ${gpu_num} -eq 0 ]]; then
device=""
......@@ -381,6 +373,7 @@ if [ ${stage} -le 2 ] && [ ${stop_stage} -ge 2 ]; then
device=$(get_devices $gpu_num 0)
fi
fi
export CUDA_VISIBLE_DEVICES=${device}
suffix=beam${beam_size}_alpha${len_penalty}_tokens${max_tokens}
if [[ ${n_average} -ne 1 ]]; then
......@@ -408,31 +401,95 @@ if [ ${stage} -le 2 ] && [ ${stop_stage} -ge 2 ]; then
--beam ${beam_size}
--lenpen ${len_penalty}"
if [[ ${ctc_infer} -eq 1 ]]; then
cmd="${cmd}
--ctc-infer"
fi
if [[ ${sacrebleu} -eq 1 ]]; then
cmd="${cmd}
--scoring sacrebleu"
if [[ ${tokenizer} -eq 1 ]]; then
cmd="${cmd}
--tokenizer moses
--moses-source-lang ${src_lang}
--moses-target-lang ${tgt_lang}"
--source-lang ${src_lang}
--target-lang ${tgt_lang}"
fi
fi
echo -e "\033[34mRun command: \n${cmd} \033[0m"
if [[ $eval -eq 1 ]]; then
if [[ ${use_auto} -eq 1 ]]; then
cmd=$(echo ${cmd} | tr -d "\n")
auto_run -c ${cmd} -n ${gpu_num}
else
export CUDA_VISIBLE_DEVICES=${device}
eval $cmd
src_ctc_file=translation-${subset}.txt.src_ctc
if [[ ${ctc_infer} -eq 1 && -f ${model_dir}/${src_ctc_file} ]]; then
rm ${model_dir}/${src_ctc_file}
elif [[ ${ctc_infer} -eq 1 && -f ${model_dir}/translation-${subset}.txt.ctc ]]; then
src_ctc_file=translation-${subset}.txt.ctc
rm ${model_dir}/${src_ctc_file}
fi
tgt_ctc_file=translation-${subset}.txt.tgt_ctc
if [[ ${ctc_infer} -eq 1 && -f ${model_dir}/${tgt_ctc_file} ]]; then
rm ${model_dir}/${tgt_ctc_file}
fi
eval $cmd
echo "" >> ${result_file}
tail -n 1 ${model_dir}/generate-${subset}.txt >> ${result_file}
mv ${model_dir}/generate-${subset}.txt ${model_dir}/generate-${subset}-${suffix}.txt
mv ${model_dir}/translation-${subset}.txt ${model_dir}/translation-${subset}-${suffix}.txt
trans_file=translation-${subset}-${suffix}.txt
# if [[ "$subset" =~ ^dev(.*) && -f ${model_dir}/${src_ctc_file} ]]; then
if [[ ${ctc_infer} -eq 1 && -f ${model_dir}/${src_ctc_file} ]]; then
ref_file=${data_dir}/${subset}.${src_lang}
if [[ -f ${ref_file} ]]; then
src_ctc=$(mktemp -t temp.record.XXXXXX)
cd ./local
sh ./cal_wer.sh ${model_dir} ${subset} ${trans_file} ${src_ctc_file} ${data_dir}/${subset}.${src_lang} > ${src_ctc}
cd ..
echo "Source language" >> ${result_file}
echo "CTC WER" >> ${result_file}
tail -n 1 ${src_ctc} >> ${result_file}
src_bleu=$(mktemp -t temp.record.XXXXXX)
cd local
sh ./cal_ctc_bleu.sh ${model_dir} ${subset} ${trans_file} ${src_ctc_file} ${data_dir}/${subset}.${src_lang} ${tokenizer} ${src_lang} > ${src_bleu}
cd ..
cat ${src_bleu} >> ${result_file}
rm ${src_ctc} ${src_bleu}
else
echo "No reference for source language."
fi
fi
tgt_ctc_file=translation-${subset}.txt.tgt_ctc
# if [[ "$subset" =~ ^dev(.*) && -f ${model_dir}/${tgt_ctc_file} ]]; then
if [[ ${ctc_infer} -eq 1 && -f ${model_dir}/${tgt_ctc_file} ]]; then
ref_file=${data_dir}/${subset}.${tgt_lang}
if [[ -f ${ref_file} ]]; then
tgt_ctc=$(mktemp -t temp.record.XXXXXX)
cd local
sh ./cal_wer.sh ${model_dir} ${subset} ${trans_file} ${tgt_ctc_file} ${ref_file} > ${tgt_ctc}
cd ..
echo "Target language" >> ${result_file}
echo "CTC WER" >> ${result_file}
tail -n 1 ${tgt_ctc} >> ${result_file}
tgt_bleu=$(mktemp -t temp.record.XXXXXX)
cd local
sh ./cal_ctc_bleu.sh ${model_dir} ${subset} ${trans_file} ${tgt_ctc_file} ${ref_file} ${tokenizer} ${tgt_lang} > ${tgt_bleu}
cd ..
cat ${tgt_bleu} >> ${result_file}
rm ${tgt_ctc} ${tgt_bleu}
else
echo "No reference for target language."
fi
fi
fi
done
echo
cat ${result_file}
fi
......@@ -13,12 +13,21 @@ extra_parameter=
exp_tag=
# Base
#config_list=(base)
#config_list=(base ctc conformer)
#config_list=(base ctc)
#config_list=(base conformer ctc)
# SATE
#config_list=(sate ctc)
#config_list=(sate conformer ctc)
# SAE
#config_list=(sate inter)
#config_list=(pds_base_8)
#config_list=(pds_base conformer)
# PDS
#config_list=(pds_base_8 ctc)
#config_list=(pds_base_8 conformer ctc)
#config_list=(sate_pds ctc)
# exp full name
......
......@@ -12,6 +12,9 @@ adam_betas: (0.9,0.98)
criterion: label_smoothed_cross_entropy_with_ctc
label_smoothing: 0.1
encoder-embed-norm: True
encoder-no-scale-embedding: True
subsampling-type: conv1d
subsampling-layers: 2
#subsampling-filter: 2048
......@@ -35,9 +38,5 @@ decoder-attention-heads: 4
attention-dropout: 0.1
activation-dropout: 0.1
load-pretrained-encoder-from: /home/xuchen/st/checkpoints/aishell/asr/0506_sp_char_base_ctc_sample1024/avg_10_checkpoint.pt
load-pretrained-decoder-from: /home/xuchen/st/checkpoints/aishell/asr/0506_sp_char_base_ctc_sample1024/avg_10_checkpoint.pt
load-pretrained-encoder-from: /home/xuchen/st/checkpoints/librispeech/asr/base_baseline/avg_10_checkpoint.pt
load-pretrained-decoder-from: /home/xuchen/st/checkpoints/librispeech/asr/base_baseline/avg_10_checkpoint.pt
#load-pretrained-encoder-from: /home/xuchen/st/checkpoints/librispeech/asr/base_conformer_baseline_batch50k_16/avg_10_checkpoint.pt
#load-pretrained-decoder-from: /home/xuchen/st/checkpoints/librispeech/asr/base_conformer_baseline_batch50k_16/avg_10_checkpoint.pt
#load-pretrained-decoder-from:
#load-pretrained-encoder-from:
ctc-weight: 0.2
intermedia-ctc-layers: 6,9
intermedia-adapter: league
intermedia-ctc-weight: 0.1
ctc-self-distill-weight: 0
post-process: sentencepiece
\ No newline at end of file
ctc-weight: 0.3
share-ctc-and-embed: True
share-target-ctc-and-embed: True
interleaved-ctc-weight: 0.2
interleaved-ctc-layers: 6,9
interleaved-ctc-drop-prob: 0
#target-ctc-weight: 0.3
#target-ctc-layer: 6
#target-interleaved-ctc-weight: 0.2
#target-interleaved-ctc-layers: 2,4
#sae-ground-truth-ratio: 0.1
sae-adapter: inter_league
sae-ctc-temperature: 1
#sae-gumbel: True
#sae-distribution-hard: True
#sae-drop-prob: 0.0
#sae-distribution-cutoff: 10
#share-sae-and-ctc: True
#share-target-sae-and-ctc: True
#sae-embed-norm: True
#sae-out-norm: True
#ctc-self-distill-weight: 1
#target-ctc-self-distill-weight: 1
#ctc-self-distill-prob: 0.1
#cal-all-ctc: True
arch: s2t_transformer_s
share-decoder-input-output-embed: True
optimizer: adam
clip-norm: 10.0
lr-scheduler: inverse_sqrt
warmup-init-lr: 1e-7
warmup-updates: 10000
lr: 1e-3
#lr: 5e-4
adam_betas: (0.9,0.98)
criterion: label_smoothed_cross_entropy_with_ctc
label_smoothing: 0.1
encoder-embed-norm: True
encoder-no-scale-embedding: True
subsampling-type: conv1d
subsampling-layers: 2
#subsampling-filter: 2048
subsampling-filter: 1024
subsampling-kernel: 5
subsampling-stride: 2
subsampling-norm: none
subsampling-activation: glu
dropout: 0.1
activation-fn: relu
encoder-embed-dim: 256
encoder-ffn-embed-dim: 1024
encoder-layers: 6
decoder-layers: 6
encoder-attention-heads: 4
decoder-embed-dim: 256
decoder-ffn-embed-dim: 1024
decoder-attention-heads: 4
attention-dropout: 0.1
activation-dropout: 0.1
#load-pretrained-encoder-from: /home/xuchen/st/checkpoints/aishell/asr/0506_sp_char_base_ctc_sample1024/avg_10_checkpoint.pt
#load-pretrained-decoder-from: /home/xuchen/st/checkpoints/aishell/asr/0506_sp_char_base_ctc_sample1024/avg_10_checkpoint.pt
#load-pretrained-encoder-from: /home/xuchen/st/checkpoints/librispeech/asr/base_baseline/avg_10_checkpoint.pt
#load-pretrained-decoder-from: /home/xuchen/st/checkpoints/librispeech/asr/base_baseline/avg_10_checkpoint.pt
#load-pretrained-encoder-from: /home/xuchen/st/checkpoints/librispeech/asr/base_conformer_baseline_batch50k_16/avg_10_checkpoint.pt
#load-pretrained-decoder-from: /home/xuchen/st/checkpoints/librispeech/asr/base_conformer_baseline_batch50k_16/avg_10_checkpoint.pt
......@@ -34,6 +34,7 @@ tgt_lang=de
lang=${src_lang}-${tgt_lang}
dataset=tibetan
subset=seda
task=speech_to_text
vocab_type=unigram
vocab_type=char
......@@ -49,6 +50,9 @@ specific_prefix=st
specific_dir=${root_dir}/data/mustc/st
asr_vocab_prefix=spm_unigram10000_st_share
if [[ -z ${subset} ]]; then
dataset=${dataset}/${subset}
fi
org_data_dir=${root_dir}/data/${dataset}
data_dir=${root_dir}/data/${dataset}/asr_char
data_dir=${root_dir}/data/${dataset}/asr_word
......
......@@ -4,25 +4,20 @@
gpu_num=1
update_freq=1
max_tokens=20000
max_tokens=50000
#extra_tag=lr0.0005
#extra_tag=lr0.001
#extra_tag=char
extra_tag=word
extra_parameter=
#extra_tag="${extra_tag}"
#extra_parameter="${extra_parameter} "
exp_tag=batch5w
exp_tag=pretrain
#exp_tag=batch5w_pre_libri
config_list=(purectc)
#config_list=(purectc)
config_list=(base)
#config_list=(base ctc)
#config_list=(base conformer)
config_list=(big_wenet conformer ctc)
#config_list=(big_wenet conformer ctc)
#config_list=(pds_base_4 ctc)
#config_list=(pds_base_8 ctc)
......
#ctc-weight: 0.2
interleaved-ctc-weight: 0.3
interleaved-ctc-layers: 10,15
interleaved-ctc-temperature: 1.0
sae-ctc-temperature: 1.0
interleaved-ctc-drop-prob: 0
interleaved_ctc_upsampling_ratio: 2
......
Markdown 格式
0%
您添加了 0 到此讨论。请谨慎行事。
请先完成此评论的编辑!
注册 或者 后发表评论