Skip to content
项目
群组
代码片段
帮助
当前项目
正在载入...
登录 / 注册
切换导航面板
F
Fairseq-S2T
概览
Overview
Details
Activity
Cycle Analytics
版本库
Repository
Files
Commits
Branches
Tags
Contributors
Graph
Compare
Charts
问题
0
Issues
0
列表
Board
标记
里程碑
合并请求
0
Merge Requests
0
CI / CD
CI / CD
流水线
作业
日程表
图表
维基
Wiki
代码片段
Snippets
成员
Collapse sidebar
Close sidebar
活动
图像
聊天
创建新问题
作业
提交
Issue Boards
Open sidebar
xuchen
Fairseq-S2T
Commits
eff10263
Commit
eff10263
authored
Apr 04, 2021
by
xuchen
Browse files
Options
Browse Files
Download
Email Patches
Plain Diff
modify the preprocessing of the librispeech
parent
02ec8720
显示空白字符变更
内嵌
并排
正在显示
4 个修改的文件
包含
66 行增加
和
5 行删除
+66
-5
egs/librispeech/asr/conf/train.yaml
+42
-0
egs/librispeech/asr/run.sh
+19
-2
egs/mustc/asr/run.sh
+1
-1
examples/speech_to_text/prep_librispeech_data.py
+4
-2
没有找到文件。
egs/librispeech/asr/conf/train.yaml
0 → 100644
查看文件 @
eff10263
train-subset
:
train-clean-100,train-clean-360,train-other-500
#train-subset: train-clean-100
valid-subset
:
dev-clean
max-epoch
:
100
max-update
:
300000
num-workers
:
8
patience
:
10
no-progress-bar
:
True
log-interval
:
100
seed
:
1
report-accuracy
:
True
arch
:
s2t_conformer_s
share-decoder-input-output-embed
:
True
optimizer
:
adam
clip-norm
:
10.0
lr-scheduler
:
inverse_sqrt
warmup-init-lr
:
1e-7
warmup-updates
:
10000
lr
:
2e-3
#adam_betas: (0.9,0.98)
criterion
:
label_smoothed_cross_entropy
label_smoothing
:
0.1
conv-kernel-sizes
:
5,5
conv-channels
:
1024
dropout
:
0.1
activation-fn
:
relu
encoder-embed-dim
:
256
encoder-ffn-embed-dim
:
2048
encoder-layers
:
12
decoder-layers
:
6
encoder-attention-heads
:
4
#decoder-embed-dim: 256
#decoder-ffn-embed-dim: 2048
#decoder-attention-heads: 4
#attention-dropout: 0.1
#activation-dropout: 0.1
egs/librispeech/asr/run.sh
查看文件 @
eff10263
...
@@ -35,8 +35,9 @@ dataset=librispeech
...
@@ -35,8 +35,9 @@ dataset=librispeech
task
=
speech_to_text
task
=
speech_to_text
vocab_type
=
unigram
vocab_type
=
unigram
vocab_size
=
10000
vocab_size
=
10000
speed_perturb
=
0
org_data_dir
=
/me
id
a/data/
${
dataset
}
org_data_dir
=
/me
di
a/data/
${
dataset
}
data_dir
=
~/st/data/
${
dataset
}
data_dir
=
~/st/data/
${
dataset
}
test_subset
=(
dev-clean dev-other test-clean test-other
)
test_subset
=(
dev-clean dev-other test-clean test-other
)
...
@@ -79,8 +80,14 @@ if [[ -z ${exp_name} ]]; then
...
@@ -79,8 +80,14 @@ if [[ -z ${exp_name} ]]; then
if
[[
-n
${
extra_tag
}
]]
;
then
if
[[
-n
${
extra_tag
}
]]
;
then
exp_name
=
${
exp_name
}
_
${
extra_tag
}
exp_name
=
${
exp_name
}
_
${
extra_tag
}
fi
fi
if
[[
${
speed_perturb
}
-eq
1
]]
;
then
exp_name
=
sp_
${
exp_name
}
fi
fi
fi
if
[[
${
speed_perturb
}
-eq
1
]]
;
then
data_dir
=
${
data_dir
}
_sp
fi
model_dir
=
$root_dir
/../checkpoints/
$dataset
/asr/
${
exp_name
}
model_dir
=
$root_dir
/../checkpoints/
$dataset
/asr/
${
exp_name
}
if
[
${
stage
}
-le
-1
]
&&
[
${
stop_stage
}
-ge
-1
]
;
then
if
[
${
stage
}
-le
-1
]
&&
[
${
stop_stage
}
-ge
-1
]
;
then
...
@@ -92,18 +99,28 @@ if [ ${stage} -le 0 ] && [ ${stop_stage} -ge 0 ]; then
...
@@ -92,18 +99,28 @@ if [ ${stage} -le 0 ] && [ ${stop_stage} -ge 0 ]; then
### Task dependent. You have to make data the following preparation part by yourself.
### Task dependent. You have to make data the following preparation part by yourself.
### But you can utilize Kaldi recipes in most cases
### But you can utilize Kaldi recipes in most cases
echo
"stage 0: Data Preparation"
echo
"stage 0: Data Preparation"
if
[[
!
-e
${
data_dir
}
]]
;
then
mkdir
-p
${
data_dir
}
fi
source
~/tools/audio/bin/activate
cmd
=
"python
${
root_dir
}
/examples/speech_to_text/prep_librispeech_data.py
cmd
=
"python
${
root_dir
}
/examples/speech_to_text/prep_librispeech_data.py
--data-root
${
org_data_dir
}
--data-root
${
org_data_dir
}
--output-root
${
data_dir
}
--output-root
${
data_dir
}
--vocab-type
${
vocab_type
}
--vocab-type
${
vocab_type
}
--vocab-size
${
vocab_size
}
"
--vocab-size
${
vocab_size
}
"
if
[[
${
speed_perturb
}
-eq
1
]]
;
then
cmd
=
"
$cmd
--speed-perturb"
fi
echo
-e
"
\0
33[34mRun command:
\n
${
cmd
}
\0
33[0m"
echo
-e
"
\0
33[34mRun command:
\n
${
cmd
}
\0
33[0m"
[[
$eval
-eq
1
]]
&&
eval
$cmd
[[
$eval
-eq
1
]]
&&
eval
$cmd
fi
fi
if
[
${
stage
}
-le
1
]
&&
[
${
stop_stage
}
-ge
1
]
;
then
if
[
${
stage
}
-le
1
]
&&
[
${
stop_stage
}
-ge
1
]
;
then
echo
"stage 1: ASR Network Training"
echo
"stage 1: ASR Network Training"
[[
!
-d
${
data_dir
}
]]
&&
echo
"The data dir
$
data_dir
is not existing!"
&&
exit
1
;
[[
!
-d
${
data_dir
}
]]
&&
echo
"The data dir
$
{
data_dir
}
is not existing!"
&&
exit
1
;
if
[[
-z
${
device
}
||
${#
device
[@]
}
-eq
0
]]
;
then
if
[[
-z
${
device
}
||
${#
device
[@]
}
-eq
0
]]
;
then
if
[[
${
gpu_num
}
-eq
0
]]
;
then
if
[[
${
gpu_num
}
-eq
0
]]
;
then
...
...
egs/mustc/asr/run.sh
查看文件 @
eff10263
...
@@ -123,7 +123,7 @@ data_dir=${data_dir}/${lang}
...
@@ -123,7 +123,7 @@ data_dir=${data_dir}/${lang}
if
[
${
stage
}
-le
1
]
&&
[
${
stop_stage
}
-ge
1
]
;
then
if
[
${
stage
}
-le
1
]
&&
[
${
stop_stage
}
-ge
1
]
;
then
echo
"stage 1: ASR Network Training"
echo
"stage 1: ASR Network Training"
[[
!
-d
$
data_dir
]]
&&
echo
"The data dir
$data_dir
is not existing!"
&&
exit
1
;
[[
!
-d
$
{
data_dir
}
]]
&&
echo
"The data dir
${
data_dir
}
is not existing!"
&&
exit
1
;
if
[[
-z
${
device
}
||
${#
device
[@]
}
-eq
0
]]
;
then
if
[[
-z
${
device
}
||
${#
device
[@]
}
-eq
0
]]
;
then
if
[[
${
gpu_num
}
-eq
0
]]
;
then
if
[[
${
gpu_num
}
-eq
0
]]
;
then
...
...
examples/speech_to_text/prep_librispeech_data.py
查看文件 @
eff10263
...
@@ -39,6 +39,7 @@ MANIFEST_COLUMNS = ["id", "audio", "n_frames", "tgt_text", "speaker"]
...
@@ -39,6 +39,7 @@ MANIFEST_COLUMNS = ["id", "audio", "n_frames", "tgt_text", "speaker"]
def
process
(
args
):
def
process
(
args
):
data_root
=
Path
(
args
.
data_root
)
.
absolute
()
out_root
=
Path
(
args
.
output_root
)
.
absolute
()
out_root
=
Path
(
args
.
output_root
)
.
absolute
()
out_root
.
mkdir
(
exist_ok
=
True
)
out_root
.
mkdir
(
exist_ok
=
True
)
# Extract features
# Extract features
...
@@ -48,7 +49,7 @@ def process(args):
...
@@ -48,7 +49,7 @@ def process(args):
if
args
.
overwrite
or
not
Path
.
exists
(
zip_path
):
if
args
.
overwrite
or
not
Path
.
exists
(
zip_path
):
for
split
in
SPLITS
:
for
split
in
SPLITS
:
print
(
f
"Fetching split {split}..."
)
print
(
f
"Fetching split {split}..."
)
dataset
=
LIBRISPEECH
(
out
_root
.
as_posix
(),
url
=
split
,
download
=
True
)
dataset
=
LIBRISPEECH
(
data
_root
.
as_posix
(),
url
=
split
,
download
=
True
)
print
(
"Extracting log mel filter bank features..."
)
print
(
"Extracting log mel filter bank features..."
)
for
wav
,
sample_rate
,
_
,
spk_id
,
chapter_no
,
utt_no
in
tqdm
(
dataset
):
for
wav
,
sample_rate
,
_
,
spk_id
,
chapter_no
,
utt_no
in
tqdm
(
dataset
):
sample_id
=
f
"{spk_id}-{chapter_no}-{utt_no}"
sample_id
=
f
"{spk_id}-{chapter_no}-{utt_no}"
...
@@ -96,7 +97,7 @@ def process(args):
...
@@ -96,7 +97,7 @@ def process(args):
print
(
"Loading the training text..."
)
print
(
"Loading the training text..."
)
for
split
in
SPLITS
:
for
split
in
SPLITS
:
if
split
.
startswith
(
"train"
):
if
split
.
startswith
(
"train"
):
dataset
=
LIBRISPEECH
(
out
_root
.
as_posix
(),
url
=
split
)
dataset
=
LIBRISPEECH
(
data
_root
.
as_posix
(),
url
=
split
)
for
wav
,
sample_rate
,
utt
,
spk_id
,
chapter_no
,
utt_no
in
dataset
:
for
wav
,
sample_rate
,
utt
,
spk_id
,
chapter_no
,
utt_no
in
dataset
:
train_text
.
append
(
utt
.
lower
())
train_text
.
append
(
utt
.
lower
())
for
t
in
train_text
:
for
t
in
train_text
:
...
@@ -119,6 +120,7 @@ def process(args):
...
@@ -119,6 +120,7 @@ def process(args):
def
main
():
def
main
():
parser
=
argparse
.
ArgumentParser
()
parser
=
argparse
.
ArgumentParser
()
parser
.
add_argument
(
"--data-root"
,
"-d"
,
required
=
True
,
type
=
str
)
parser
.
add_argument
(
"--output-root"
,
"-o"
,
required
=
True
,
type
=
str
)
parser
.
add_argument
(
"--output-root"
,
"-o"
,
required
=
True
,
type
=
str
)
parser
.
add_argument
(
parser
.
add_argument
(
"--vocab-type"
,
"--vocab-type"
,
...
...
编写
预览
Markdown
格式
0%
重试
或
添加新文件
添加附件
取消
您添加了
0
人
到此讨论。请谨慎行事。
请先完成此评论的编辑!
取消
请
注册
或者
登录
后发表评论