Commit a1c60599 by xuchen

fix the bug of the speed perturb

parent eff10263
...@@ -74,7 +74,7 @@ def process(args): ...@@ -74,7 +74,7 @@ def process(args):
print("Generating manifest...") print("Generating manifest...")
for split in SPLITS: for split in SPLITS:
manifest = {c: [] for c in MANIFEST_COLUMNS} manifest = {c: [] for c in MANIFEST_COLUMNS}
dataset = LIBRISPEECH(out_root.as_posix(), url=split) dataset = LIBRISPEECH(data_root.as_posix(), url=split)
for wav, sample_rate, utt, spk_id, chapter_no, utt_no in tqdm(dataset): for wav, sample_rate, utt, spk_id, chapter_no, utt_no in tqdm(dataset):
sample_id = f"{spk_id}-{chapter_no}-{utt_no}" sample_id = f"{spk_id}-{chapter_no}-{utt_no}"
manifest["id"].append(sample_id) manifest["id"].append(sample_id)
......
...@@ -243,7 +243,7 @@ def process(args): ...@@ -243,7 +243,7 @@ def process(args):
manifest = {c: [] for c in MANIFEST_COLUMNS} manifest = {c: [] for c in MANIFEST_COLUMNS}
if args.task == "st" and args.add_src: if args.task == "st" and args.add_src:
manifest["src_text"] = [] manifest["src_text"] = []
dataset = MUSTC(args.data_root, lang, split) dataset = MUSTC(args.data_root, lang, split, args.speed_perturb)
for idx in range(len(dataset)): for idx in range(len(dataset)):
items = dataset.get_fast(idx) items = dataset.get_fast(idx)
for item in items: for item in items:
......
Markdown 格式
0%
您添加了 0 到此讨论。请谨慎行事。
请先完成此评论的编辑!
注册 或者 后发表评论