Commit eff10263 by xuchen

modify the preprocessing of the librispeech

parent 02ec8720
train-subset: train-clean-100,train-clean-360,train-other-500
#train-subset: train-clean-100
valid-subset: dev-clean
max-epoch: 100
max-update: 300000
num-workers: 8
patience: 10
no-progress-bar: True
log-interval: 100
seed: 1
report-accuracy: True
arch: s2t_conformer_s
share-decoder-input-output-embed: True
optimizer: adam
clip-norm: 10.0
lr-scheduler: inverse_sqrt
warmup-init-lr: 1e-7
warmup-updates: 10000
lr: 2e-3
#adam_betas: (0.9,0.98)
criterion: label_smoothed_cross_entropy
label_smoothing: 0.1
conv-kernel-sizes: 5,5
conv-channels: 1024
dropout: 0.1
activation-fn: relu
encoder-embed-dim: 256
encoder-ffn-embed-dim: 2048
encoder-layers: 12
decoder-layers: 6
encoder-attention-heads: 4
#decoder-embed-dim: 256
#decoder-ffn-embed-dim: 2048
#decoder-attention-heads: 4
#attention-dropout: 0.1
#activation-dropout: 0.1
......@@ -35,8 +35,9 @@ dataset=librispeech
task=speech_to_text
vocab_type=unigram
vocab_size=10000
speed_perturb=0
org_data_dir=/meida/data/${dataset}
org_data_dir=/media/data/${dataset}
data_dir=~/st/data/${dataset}
test_subset=(dev-clean dev-other test-clean test-other)
......@@ -79,8 +80,14 @@ if [[ -z ${exp_name} ]]; then
if [[ -n ${extra_tag} ]]; then
exp_name=${exp_name}_${extra_tag}
fi
if [[ ${speed_perturb} -eq 1 ]]; then
exp_name=sp_${exp_name}
fi
fi
if [[ ${speed_perturb} -eq 1 ]]; then
data_dir=${data_dir}_sp
fi
model_dir=$root_dir/../checkpoints/$dataset/asr/${exp_name}
if [ ${stage} -le -1 ] && [ ${stop_stage} -ge -1 ]; then
......@@ -92,18 +99,28 @@ if [ ${stage} -le 0 ] && [ ${stop_stage} -ge 0 ]; then
### Task dependent. You have to make data the following preparation part by yourself.
### But you can utilize Kaldi recipes in most cases
echo "stage 0: Data Preparation"
if [[ ! -e ${data_dir} ]]; then
mkdir -p ${data_dir}
fi
source ~/tools/audio/bin/activate
cmd="python ${root_dir}/examples/speech_to_text/prep_librispeech_data.py
--data-root ${org_data_dir}
--output-root ${data_dir}
--vocab-type ${vocab_type}
--vocab-size ${vocab_size}"
if [[ ${speed_perturb} -eq 1 ]]; then
cmd="$cmd
--speed-perturb"
fi
echo -e "\033[34mRun command: \n${cmd} \033[0m"
[[ $eval -eq 1 ]] && eval $cmd
fi
if [ ${stage} -le 1 ] && [ ${stop_stage} -ge 1 ]; then
echo "stage 1: ASR Network Training"
[[ ! -d ${data_dir} ]] && echo "The data dir $data_dir is not existing!" && exit 1;
[[ ! -d ${data_dir} ]] && echo "The data dir ${data_dir} is not existing!" && exit 1;
if [[ -z ${device} || ${#device[@]} -eq 0 ]]; then
if [[ ${gpu_num} -eq 0 ]]; then
......
......@@ -123,7 +123,7 @@ data_dir=${data_dir}/${lang}
if [ ${stage} -le 1 ] && [ ${stop_stage} -ge 1 ]; then
echo "stage 1: ASR Network Training"
[[ ! -d $data_dir ]] && echo "The data dir $data_dir is not existing!" && exit 1;
[[ ! -d ${data_dir} ]] && echo "The data dir ${data_dir} is not existing!" && exit 1;
if [[ -z ${device} || ${#device[@]} -eq 0 ]]; then
if [[ ${gpu_num} -eq 0 ]]; then
......
......@@ -39,6 +39,7 @@ MANIFEST_COLUMNS = ["id", "audio", "n_frames", "tgt_text", "speaker"]
def process(args):
data_root = Path(args.data_root).absolute()
out_root = Path(args.output_root).absolute()
out_root.mkdir(exist_ok=True)
# Extract features
......@@ -48,7 +49,7 @@ def process(args):
if args.overwrite or not Path.exists(zip_path):
for split in SPLITS:
print(f"Fetching split {split}...")
dataset = LIBRISPEECH(out_root.as_posix(), url=split, download=True)
dataset = LIBRISPEECH(data_root.as_posix(), url=split, download=True)
print("Extracting log mel filter bank features...")
for wav, sample_rate, _, spk_id, chapter_no, utt_no in tqdm(dataset):
sample_id = f"{spk_id}-{chapter_no}-{utt_no}"
......@@ -96,7 +97,7 @@ def process(args):
print("Loading the training text...")
for split in SPLITS:
if split.startswith("train"):
dataset = LIBRISPEECH(out_root.as_posix(), url=split)
dataset = LIBRISPEECH(data_root.as_posix(), url=split)
for wav, sample_rate, utt, spk_id, chapter_no, utt_no in dataset:
train_text.append(utt.lower())
for t in train_text:
......@@ -119,6 +120,7 @@ def process(args):
def main():
parser = argparse.ArgumentParser()
parser.add_argument("--data-root", "-d", required=True, type=str)
parser.add_argument("--output-root", "-o", required=True, type=str)
parser.add_argument(
"--vocab-type",
......
Markdown 格式
0%
您添加了 0 到此讨论。请谨慎行事。
请先完成此评论的编辑!
注册 或者 后发表评论