Skip to content
项目
群组
代码片段
帮助
当前项目
正在载入...
登录 / 注册
切换导航面板
S
S2T
概览
Overview
Details
Activity
Cycle Analytics
版本库
Repository
Files
Commits
Branches
Tags
Contributors
Graph
Compare
Charts
问题
0
Issues
0
列表
Board
标记
里程碑
合并请求
0
Merge Requests
0
CI / CD
CI / CD
流水线
作业
日程表
图表
维基
Wiki
代码片段
Snippets
成员
Collapse sidebar
Close sidebar
活动
图像
聊天
创建新问题
作业
提交
Issue Boards
Open sidebar
xuchen
S2T
Commits
478c694b
Commit
478c694b
authored
Aug 03, 2023
by
xuchen
Browse files
Options
Browse Files
Download
Email Patches
Plain Diff
shell
parent
e248f2f0
显示空白字符变更
内嵌
并排
正在显示
3 个修改的文件
包含
69 行增加
和
120 行删除
+69
-120
egs/mustc/asr/decode.sh
+4
-4
egs/mustc/asr/run.sh
+50
-95
egs/mustc/st/run.sh
+15
-21
没有找到文件。
egs/mustc/asr/decode.sh
查看文件 @
478c694b
#!/usr/bin/env bash
#!/usr/bin/env bash
gpu_num
=
1
gpu_num
=
0
data_dir
=
data_dir
=
test_subset
=(
dev tst-COMMON
)
test_subset
=(
dev tst-COMMON
)
...
@@ -15,12 +15,12 @@ ctc_infer=0
...
@@ -15,12 +15,12 @@ ctc_infer=0
n_average
=
10
n_average
=
10
beam_size
=
5
beam_size
=
5
len_penalty
=
1.0
len_penalty
=
1.0
max_tokens
=
8
0000
max_tokens
=
5
0000
dec_model
=
checkpoint_best.pt
dec_model
=
checkpoint_best.pt
cmd
=
"./run.sh
cmd
=
"./run.sh
--stage
3
--stage
2
--stop_stage
3
--stop_stage
2
--gpu_num
${
gpu_num
}
--gpu_num
${
gpu_num
}
--exp_name
${
exp_name
}
--exp_name
${
exp_name
}
--n_average
${
n_average
}
--n_average
${
n_average
}
...
...
egs/mustc/asr/run.sh
查看文件 @
478c694b
...
@@ -2,8 +2,7 @@
...
@@ -2,8 +2,7 @@
# Processing MuST-C Datasets
# Processing MuST-C Datasets
# Copyright 2021 Natural Language Processing Laboratory
# Copyright 2021 Chen Xu (xuchennlp@outlook.com)
# Xu Chen (xuchenneu@163.com)
# Set bash to 'debug' mode, it will exit on :
# Set bash to 'debug' mode, it will exit on :
# -e 'error', -u 'undefined variable', -o ... 'error in pipeline', -x 'print commands',
# -e 'error', -u 'undefined variable', -o ... 'error in pipeline', -x 'print commands',
...
@@ -16,22 +15,21 @@ eval=1
...
@@ -16,22 +15,21 @@ eval=1
time
=
$(
date
"+%m%d_%H%M"
)
time
=
$(
date
"+%m%d_%H%M"
)
stage
=
1
stage
=
1
stop_stage
=
4
stop_stage
=
2
########
h
ardware ########
########
H
ardware ########
#
d
evices
#
D
evices
device
=(
0
)
device
=(
0
)
gpu_num
=
8
gpu_num
=
8
update_freq
=
1
update_freq
=
1
hdfs_get
=
0
root_dir
=
/opt/tiger
data_root_dir
=
/mnt/bn/nas-xc-1
code_dir
=
${
root_dir
}
/s2t
pwd_dir
=
$PWD
pwd_dir
=
$PWD
root_dir
=
${
ST_ROOT
}
data_root_dir
=
${
root_dir
}
code_dir
=
${
root_dir
}
/S2T
#
d
ataset
#
D
ataset
src_lang
=
en
src_lang
=
en
tgt_lang
=
de
tgt_lang
=
de
dataset
=
must_c
dataset
=
must_c
...
@@ -63,24 +61,22 @@ valid_split=dev
...
@@ -63,24 +61,22 @@ valid_split=dev
test_split
=
tst-COMMON
test_split
=
tst-COMMON
test_subset
=
dev,tst-COMMON
test_subset
=
dev,tst-COMMON
# exp
# Exp
sub_tag
=
exp_prefix
=
$(
date
"+%m%d"
)
exp_prefix
=
$(
date
"+%m%d"
)
# exp_subfix=${ARNOLD_JOB_ID}_${ARNOLD_TASK_ID}_${ARNOLD_TRIAL_ID}
extra_tag
=
extra_tag
=
extra_parameter
=
extra_parameter
=
exp_tag
=
baseline
exp_tag
=
baseline
exp_name
=
exp_name
=
#
config
#
Training Settings
train_config
=
base,ctc
train_config
=
base,ctc
data_config
=
config.yaml
# training setting
fp16
=
1
fp16
=
1
max_tokens
=
40000
max_tokens
=
40000
step_valid
=
0
step_valid
=
0
data_config
=
config.yaml
#
decoding setting
#
Decoding Settings
cer
=
0
cer
=
0
ctc_infer
=
0
ctc_infer
=
0
ctc_self_ensemble
=
0
ctc_self_ensemble
=
0
...
@@ -92,6 +88,7 @@ len_penalty=1.0
...
@@ -92,6 +88,7 @@ len_penalty=1.0
infer_score
=
0
infer_score
=
0
infer_parameters
=
infer_parameters
=
# Parsing Options
if
[[
${
speed_perturb
}
-eq
1
]]
;
then
if
[[
${
speed_perturb
}
-eq
1
]]
;
then
data_dir
=
${
data_dir
}
_sp
data_dir
=
${
data_dir
}
_sp
exp_prefix
=
${
exp_prefix
}
_sp
exp_prefix
=
${
exp_prefix
}
_sp
...
@@ -124,19 +121,6 @@ if [[ ! -d ${data_dir} ]]; then
...
@@ -124,19 +121,6 @@ if [[ ! -d ${data_dir} ]]; then
exit
exit
fi
fi
# setup nccl envs
export
NCCL_IB_DISABLE
=
0
export
NCCL_IB_HCA
=
$ARNOLD_RDMA_DEVICE
:1
export
NCCL_IB_GID_INDEX
=
3
export
NCCL_SOCKET_IFNAME
=
eth0
HOSTS
=
$ARNOLD_WORKER_HOSTS
HOST
=(
${
HOSTS
//,/
}
)
HOST_SPLIT
=(
${
HOST
//
:/
}
)
PORT
=
${
HOST_SPLIT
[1]
}
INIT_METHOD
=
"tcp://
${
ARNOLD_WORKER_0_HOST
}
:
${
ARNOLD_WORKER_0_PORT
}
"
DIST_RANK
=
$((
ARNOLD_ID
*
ARNOLD_WORKER_GPU
))
export
PATH
=
$PATH
:
${
code_dir
}
/scripts
export
PATH
=
$PATH
:
${
code_dir
}
/scripts
.
./local/parse_options.sh
||
exit
1
;
.
./local/parse_options.sh
||
exit
1
;
...
@@ -150,21 +134,27 @@ if [[ -z ${exp_name} ]]; then
...
@@ -150,21 +134,27 @@ if [[ -z ${exp_name} ]]; then
exp_name
=
${
exp_name
}
_
${
exp_subfix
}
exp_name
=
${
exp_name
}
_
${
exp_subfix
}
fi
fi
fi
fi
model_dir
=
${
code_dir
}
/checkpoints/
${
data_model_subfix
}
/
${
exp_name
}
echo
"stage:
$stage
"
ckpt_dir
=
${
root_dir
}
/checkpoints/
echo
"stop_stage:
$stop_stage
"
model_dir
=
${
root_dir
}
/checkpoints/
${
data_model_subfix
}
/
${
sub_tag
}
/
${
exp_name
}
# Start
cd
${
code_dir
}
cd
${
code_dir
}
echo
"Start Stage:
$stage
"
echo
"Stop Stage:
$stop_stage
"
if
[[
`
pip list |
grep
fairseq | wc
-l
`
-eq
0
]]
;
then
echo
"Default Stage: env configure"
pip3 install
-e
${
code_dir
}
fi
if
[
${
stage
}
-le
-1
]
&&
[
${
stop_stage
}
-ge
-1
]
;
then
if
[
${
stage
}
-le
-1
]
&&
[
${
stop_stage
}
-ge
-1
]
;
then
echo
"stage -1: Data Download"
echo
"Stage -1: Data Download"
# pass
fi
fi
if
[
${
stage
}
-le
0
]
&&
[
${
stop_stage
}
-ge
0
]
;
then
if
[
${
stage
}
-le
0
]
&&
[
${
stop_stage
}
-ge
0
]
;
then
### Task dependent. You have to make data the following preparation part by yourself.
### Task dependent. You have to make data the following preparation part by yourself.
### But you can utilize Kaldi recipes in most cases
echo
"Stage 0: ASR Data Preparation"
echo
"stage 0: ASR Data Preparation"
if
[[
!
-e
${
data_dir
}
]]
;
then
if
[[
!
-e
${
data_dir
}
]]
;
then
mkdir
-p
${
data_dir
}
mkdir
-p
${
data_dir
}
fi
fi
...
@@ -205,32 +195,8 @@ if [ ${stage} -le 0 ] && [ ${stop_stage} -ge 0 ]; then
...
@@ -205,32 +195,8 @@ if [ ${stage} -le 0 ] && [ ${stop_stage} -ge 0 ]; then
[[
$eval
-eq
1
]]
&&
eval
${
cmd
}
[[
$eval
-eq
1
]]
&&
eval
${
cmd
}
fi
fi
if
[[
`
pip list |
grep
fairseq | wc
-l
`
-eq
0
]]
;
then
if
[
${
stage
}
-le
1
]
&&
[
${
stop_stage
}
-ge
1
]
;
then
echo
"default stage: env configure"
echo
"Stage 1: Network Training"
pip3 install
-e
${
code_dir
}
-i
https://bytedpypi.byted.org/simple
--no-build-isolation
--default-timeout
=
10000
fi
if
[[
-d
/mnt/bn/nas-xc-1/checkpoints
&&
!
-d
${
code_dir
}
/checkpoints
]]
;
then
ln
-s
/mnt/bn/nas-xc-1/checkpoints
${
code_dir
}
fi
# if [ ${stage} -le 1 ] && [ ${stop_stage} -ge 1 ]; then
if
[
${
hdfs_get
}
-eq
1
]
;
then
ln_data_dir
=
`
echo
${
data_dir
}
| sed
-e
"s#
${
data_root_dir
}
#
${
code_dir
}
#"
`
echo
${
ln_data_dir
}
mkdir
-p
${
ln_data_dir
}
ln
-s
${
data_dir
}
/../
*
${
ln_data_dir
}
rm
-r
${
ln_data_dir
}
hdfs_path
=
`
echo
${
data_dir
}
| sed
-e
"s#
${
data_root_dir
}
#hdfs://haruna/home/byte_arnold_lq_mlnlc/user/xuchen/#"
`
hdfs dfs
-get
${
hdfs_path
}
${
ln_data_dir
}
sed
-i
-e
"s#
${
data_root_dir
}
#
${
code_dir
}
#"
${
ln_data_dir
}
/config
*
data_dir
=
${
ln_data_dir
}
fi
# fi
if
[
${
stage
}
-le
2
]
&&
[
${
stop_stage
}
-ge
2
]
;
then
echo
"stage 2: ASR Network Training"
[[
!
-d
${
data_dir
}
]]
&&
echo
"The data dir
${
data_dir
}
is not existing!"
&&
exit
1
;
[[
!
-d
${
data_dir
}
]]
&&
echo
"The data dir
${
data_dir
}
is not existing!"
&&
exit
1
;
if
[[
-z
${
device
}
||
${#
device
[@]
}
-eq
0
]]
;
then
if
[[
-z
${
device
}
||
${#
device
[@]
}
-eq
0
]]
;
then
...
@@ -240,6 +206,7 @@ if [ ${stage} -le 2 ] && [ ${stop_stage} -ge 2 ]; then
...
@@ -240,6 +206,7 @@ if [ ${stage} -le 2 ] && [ ${stop_stage} -ge 2 ]; then
source
./local/utils.sh
source
./local/utils.sh
device
=
$(
get_devices
$gpu_num
0
)
device
=
$(
get_devices
$gpu_num
0
)
fi
fi
export
CUDA_VISIBLE_DEVICES
=
${
device
}
fi
fi
echo
-e
"data=
${
data_dir
}
model=
${
model_dir
}
"
echo
-e
"data=
${
data_dir
}
model=
${
model_dir
}
"
...
@@ -327,22 +294,17 @@ if [ ${stage} -le 2 ] && [ ${stop_stage} -ge 2 ]; then
...
@@ -327,22 +294,17 @@ if [ ${stage} -le 2 ] && [ ${stop_stage} -ge 2 ]; then
echo
-e
"
\0
33[34mRun command:
\n
${
cmd
}
\0
33[0m"
echo
-e
"
\0
33[34mRun command:
\n
${
cmd
}
\0
33[0m"
# save info
# save info
log
=
.
/history.log
log
=
${
ckpt_dir
}
/history.log
echo
"
${
time
}
|
${
data_dir
}
|
${
exp_name
}
|
${
model_dir
}
"
>>
$log
echo
"
${
time
}
|
${
data_dir
}
|
${
exp_name
}
|
${
model_dir
}
"
>>
$log
tail
-n
50
${
log
}
>
tmp.log
tail
-n
50
${
log
}
>
tmp.log
mv tmp.log
$log
mv tmp.log
$log
# export CUDA_VISIBLE_DEVICES=${device}
log
=
${
model_dir
}
/train.log
log
=
${
model_dir
}
/train.log
cmd
=
"
${
cmd
}
2>&1 | tee -a
${
log
}
"
cmd
=
"
${
cmd
}
2>&1 | tee -a
${
log
}
"
#cmd="nohup ${cmd} >> ${log} 2>&1 &"
#cmd="nohup ${cmd} >> ${log} 2>&1 &"
if
[[
$eval
-eq
1
]]
;
then
if
[[
$eval
-eq
1
]]
;
then
# tensorboard
# tensorboard
if
[[
-z
${
ARNOLD_TENSORBOARD_CURRENT_PORT
}
]]
;
then
port
=
6666
port
=
6666
else
port
=
${
ARNOLD_TENSORBOARD_CURRENT_PORT
}
fi
tensorboard
--logdir
${
model_dir
}
--port
${
port
}
--bind_all
&
tensorboard
--logdir
${
model_dir
}
--port
${
port
}
--bind_all
&
echo
"
${
cmd
}
"
>
${
model_dir
}
/cmd
echo
"
${
cmd
}
"
>
${
model_dir
}
/cmd
...
@@ -352,8 +314,8 @@ if [ ${stage} -le 2 ] && [ ${stop_stage} -ge 2 ]; then
...
@@ -352,8 +314,8 @@ if [ ${stage} -le 2 ] && [ ${stop_stage} -ge 2 ]; then
fi
fi
fi
fi
if
[
${
stage
}
-le
3
]
&&
[
${
stop_stage
}
-ge
3
]
;
then
if
[
${
stage
}
-le
2
]
&&
[
${
stop_stage
}
-ge
2
]
;
then
echo
"
stage 3: ASR
Decoding"
echo
"
Stage 2:
Decoding"
if
[[
${
n_average
}
-ne
1
]]
;
then
if
[[
${
n_average
}
-ne
1
]]
;
then
# Average models
# Average models
dec_model
=
avg_
${
n_average
}
_checkpoint.pt
dec_model
=
avg_
${
n_average
}
_checkpoint.pt
...
@@ -377,18 +339,18 @@ if [ ${stage} -le 3 ] && [ ${stop_stage} -ge 3 ]; then
...
@@ -377,18 +339,18 @@ if [ ${stage} -le 3 ] && [ ${stop_stage} -ge 3 ]; then
source
./local/utils.sh
source
./local/utils.sh
device
=
$(
get_devices
$gpu_num
0
)
device
=
$(
get_devices
$gpu_num
0
)
fi
fi
export
CUDA_VISIBLE_DEVICES
=
${
device
}
fi
fi
# export CUDA_VISIBLE_DEVICES=${device}
suffix
=
beam
${
beam_size
}
_alpha
${
len_penalty
}
_tokens
${
max_tokens
}
suffix
=
beam
${
beam_size
}
_alpha
${
len_penalty
}
_tokens
${
max_tokens
}
if
[[
${
n_average
}
-ne
1
]]
;
then
suffix
=
${
suffix
}
_
${
n_average
}
fi
if
[[
-n
${
cer
}
&&
${
cer
}
-eq
1
]]
;
then
if
[[
-n
${
cer
}
&&
${
cer
}
-eq
1
]]
;
then
suffix
=
${
suffix
}
_cer
suffix
=
${
suffix
}
_cer
else
else
suffix
=
${
suffix
}
_wer
suffix
=
${
suffix
}
_wer
fi
fi
if
[[
${
n_average
}
-ne
1
]]
;
then
suffix
=
${
suffix
}
_
${
n_average
}
fi
if
[[
${
infer_score
}
-eq
1
]]
;
then
if
[[
${
infer_score
}
-eq
1
]]
;
then
suffix
=
${
suffix
}
_score
suffix
=
${
suffix
}
_score
fi
fi
...
@@ -435,9 +397,9 @@ if [ ${stage} -le 3 ] && [ ${stop_stage} -ge 3 ]; then
...
@@ -435,9 +397,9 @@ if [ ${stage} -le 3 ] && [ ${stop_stage} -ge 3 ]; then
cd
${
code_dir
}
cd
${
code_dir
}
if
[[
$eval
-eq
1
]]
;
then
if
[[
$eval
-eq
1
]]
;
then
src_ctc_file
=
translation-
${
subset
}
.txt
.ctc
ctc_file
=
translation-
${
subset
}
.ctc
if
[[
-f
${
model_dir
}
/
${
src_
ctc_file
}
]]
;
then
if
[[
${
ctc_infer
}
-eq
1
&&
-f
${
model_dir
}
/
${
ctc_file
}
]]
;
then
rm
${
model_dir
}
/
${
src_
ctc_file
}
rm
${
model_dir
}
/
${
ctc_file
}
fi
fi
eval
$cmd
eval
$cmd
...
@@ -448,33 +410,34 @@ if [ ${stage} -le 3 ] && [ ${stop_stage} -ge 3 ]; then
...
@@ -448,33 +410,34 @@ if [ ${stage} -le 3 ] && [ ${stop_stage} -ge 3 ]; then
cd
${
pwd_dir
}
cd
${
pwd_dir
}
if
[[
-f
${
model_dir
}
/enc_dump
]]
;
then
if
[[
-f
${
model_dir
}
/enc_dump
]]
;
then
mv
${
model_dir
}
/enc_dump
${
model_dir
}
/
${
subset
}
-
${
suffix
}
-enc-dump
mv
${
model_dir
}
/enc_dump
${
model_dir
}
/
dump-
${
subset
}
-enc-
${
suffix
}
fi
fi
if
[[
-f
${
model_dir
}
/dec_dump
]]
;
then
if
[[
-f
${
model_dir
}
/dec_dump
]]
;
then
mv
${
model_dir
}
/dec_dump
${
model_dir
}
/
${
subset
}
-
${
suffix
}
-dec-dump
mv
${
model_dir
}
/dec_dump
${
model_dir
}
/
dump-
${
subset
}
-dec-
${
suffix
}
fi
fi
trans_file
=
translation-
${
subset
}
-
${
suffix
}
.txt
trans_file
=
translation-
${
subset
}
-
${
suffix
}
.txt
if
[[
${
ctc_infer
}
-eq
1
&&
-f
${
model_dir
}
/
${
src_
ctc_file
}
]]
;
then
if
[[
${
ctc_infer
}
-eq
1
&&
-f
${
model_dir
}
/
${
ctc_file
}
]]
;
then
ref_file
=
${
model_dir
}
/
${
subset
}
.
${
src_lang
}
ref_file
=
${
model_dir
}
/
${
subset
}
.
${
src_lang
}
if
[[
!
-f
${
ref_file
}
]]
;
then
if
[[
!
-f
${
ref_file
}
]]
;
then
python3 ./local/extract_txt_from_tsv.py
${
data_dir
}
/
${
subset
}
.tsv
${
ref_file
}
"src_text"
python3 ./local/extract_txt_from_tsv.py
${
data_dir
}
/
${
subset
}
.tsv
${
ref_file
}
"src_text"
fi
fi
if
[[
-f
${
ref_file
}
]]
;
then
if
[[
-f
${
ref_file
}
]]
;
then
src_
ctc
=
$(
mktemp
-t
temp.record.XXXXXX
)
ctc
=
$(
mktemp
-t
temp.record.XXXXXX
)
cd
./local
cd
./local
./cal_wer.sh
${
model_dir
}
${
subset
}
${
trans_file
}
${
src_ctc_file
}
${
ref_file
}
>
${
src_
ctc
}
./cal_wer.sh
${
model_dir
}
${
subset
}
${
trans_file
}
${
ctc_file
}
${
ref_file
}
>
${
ctc
}
cd
..
cd
..
echo
"CTC WER"
>>
${
result_file
}
echo
"CTC WER"
>>
${
result_file
}
tail
-n
2
${
src_
ctc
}
>>
${
result_file
}
tail
-n
2
${
ctc
}
>>
${
result_file
}
src_bleu
=
$(
mktemp
-t
temp.record.XXXXXX
)
src_bleu
=
$(
mktemp
-t
temp.record.XXXXXX
)
cd local
cd local
./cal_ctc_bleu.sh
${
model_dir
}
${
subset
}
${
trans_file
}
${
src_
ctc_file
}
${
ref_file
}
${
tokenizer
}
${
src_lang
}
>
${
src_bleu
}
./cal_ctc_bleu.sh
${
model_dir
}
${
subset
}
${
trans_file
}
${
ctc_file
}
${
ref_file
}
${
tokenizer
}
${
src_lang
}
>
${
src_bleu
}
cd
..
cd
..
cat
${
src_bleu
}
>>
${
result_file
}
cat
${
src_bleu
}
>>
${
result_file
}
rm
${
src_
ctc
}
${
src_bleu
}
rm
${
ctc
}
${
src_bleu
}
else
else
echo
"No reference for source language."
echo
"No reference for source language."
fi
fi
...
@@ -484,11 +447,3 @@ if [ ${stage} -le 3 ] && [ ${stop_stage} -ge 3 ]; then
...
@@ -484,11 +447,3 @@ if [ ${stage} -le 3 ] && [ ${stop_stage} -ge 3 ]; then
echo
echo
cat
${
result_file
}
cat
${
result_file
}
fi
fi
# if [ ${stage} -le 4 ] && [ ${stop_stage} -ge 4 ]; then
# cd ${fairseq_dir}
# echo "Stage 4: Upload model and log"
# echo "Path: hdfs://haruna/home/byte_arnold_lq_mlnlc/user/xuchen/s2t/checkpoints/${data_model_subfix}/${exp_name}"
# hdfs dfs -mkdir -p hdfs://haruna/home/byte_arnold_lq_mlnlc/user/xuchen/s2t/checkpoints/${data_model_subfix}
# hdfs dfs -put -f ${model_dir} hdfs://haruna/home/byte_arnold_lq_mlnlc/user/xuchen/s2t/checkpoints/${data_model_subfix}
# fi
egs/mustc/st/run.sh
查看文件 @
478c694b
...
@@ -2,7 +2,7 @@
...
@@ -2,7 +2,7 @@
# Processing MuST-C Datasets
# Processing MuST-C Datasets
# Copyright 2021 Chen Xu (xuchenn
eu@163
.com)
# Copyright 2021 Chen Xu (xuchenn
lp@outlook
.com)
# Set bash to 'debug' mode, it will exit on :
# Set bash to 'debug' mode, it will exit on :
# -e 'error', -u 'undefined variable', -o ... 'error in pipeline', -x 'print commands',
# -e 'error', -u 'undefined variable', -o ... 'error in pipeline', -x 'print commands',
...
@@ -17,19 +17,19 @@ time=$(date "+%m%d_%H%M")
...
@@ -17,19 +17,19 @@ time=$(date "+%m%d_%H%M")
stage
=
1
stage
=
1
stop_stage
=
2
stop_stage
=
2
########
h
ardware ########
########
H
ardware ########
#
d
evices
#
D
evices
device
=(
0
)
device
=(
0
)
gpu_num
=
8
gpu_num
=
8
update_freq
=
1
update_freq
=
1
pwd_dir
=
$PWD
pwd_dir
=
$PWD
root_dir
=
${
pwd_dir
}
/../../../..
root_dir
=
${
pwd_dir
}
/../../../..
/
data_root_dir
=
${
root_dir
}
/data
data_root_dir
=
${
root_dir
}
code_dir
=
${
root_dir
}
/S2T
code_dir
=
${
root_dir
}
/S2T
#
d
ataset
#
D
ataset
src_lang
=
en
src_lang
=
en
tgt_lang
=
de
tgt_lang
=
de
dataset
=
must_c
dataset
=
must_c
...
@@ -63,7 +63,7 @@ valid_split=dev
...
@@ -63,7 +63,7 @@ valid_split=dev
test_split
=
tst-COMMON
test_split
=
tst-COMMON
test_subset
=
dev,tst-COMMON
test_subset
=
dev,tst-COMMON
#
e
xp
#
E
xp
sub_tag
=
sub_tag
=
exp_prefix
=
$(
date
"+%m%d"
)
exp_prefix
=
$(
date
"+%m%d"
)
extra_tag
=
extra_tag
=
...
@@ -71,16 +71,14 @@ extra_parameter=
...
@@ -71,16 +71,14 @@ extra_parameter=
exp_tag
=
baseline
exp_tag
=
baseline
exp_name
=
exp_name
=
#
config
#
Training Settings
train_config
=
base,ctc
train_config
=
base,ctc
# training setting
fp16
=
1
fp16
=
1
max_tokens
=
40000
max_tokens
=
40000
step_valid
=
0
step_valid
=
0
bleu_valid
=
0
bleu_valid
=
0
#
decoding setting
#
Decoding Settings
sacrebleu
=
1
sacrebleu
=
1
dec_model
=
checkpoint_best.pt
dec_model
=
checkpoint_best.pt
ctc_infer
=
0
ctc_infer
=
0
...
@@ -90,6 +88,7 @@ len_penalty=1.0
...
@@ -90,6 +88,7 @@ len_penalty=1.0
infer_score
=
0
infer_score
=
0
infer_parameters
=
infer_parameters
=
# Parsing Options
if
[[
${
share_dict
}
-eq
1
]]
;
then
if
[[
${
share_dict
}
-eq
1
]]
;
then
data_config
=
config_share.yaml
data_config
=
config_share.yaml
else
else
...
@@ -136,12 +135,14 @@ if [[ -z ${exp_name} ]]; then
...
@@ -136,12 +135,14 @@ if [[ -z ${exp_name} ]]; then
exp_name
=
${
exp_name
}
_
${
exp_subfix
}
exp_name
=
${
exp_name
}
_
${
exp_subfix
}
fi
fi
fi
fi
ckpt_dir
=
${
code_dir
}
/checkpoints/
ckpt_dir
=
${
code_dir
}
/checkpoints/
model_dir
=
${
code_dir
}
/checkpoints/
${
data_model_subfix
}
/
${
sub_tag
}
/
${
exp_name
}
model_dir
=
${
code_dir
}
/checkpoints/
${
data_model_subfix
}
/
${
sub_tag
}
/
${
exp_name
}
# Start
cd
${
code_dir
}
echo
"Start Stage:
$stage
"
echo
"Start Stage:
$stage
"
echo
"Stop Stage:
$stop_stage
"
echo
"Stop Stage:
$stop_stage
"
cd
${
code_dir
}
if
[[
`
pip list |
grep
fairseq | wc
-l
`
-eq
0
]]
;
then
if
[[
`
pip list |
grep
fairseq | wc
-l
`
-eq
0
]]
;
then
echo
"Default Stage: env configure"
echo
"Default Stage: env configure"
...
@@ -150,12 +151,10 @@ fi
...
@@ -150,12 +151,10 @@ fi
if
[
${
stage
}
-le
-1
]
&&
[
${
stop_stage
}
-ge
-1
]
;
then
if
[
${
stage
}
-le
-1
]
&&
[
${
stop_stage
}
-ge
-1
]
;
then
echo
"Stage -1: Data Download"
echo
"Stage -1: Data Download"
# pass
fi
fi
if
[
${
stage
}
-le
0
]
&&
[
${
stop_stage
}
-ge
0
]
;
then
if
[
${
stage
}
-le
0
]
&&
[
${
stop_stage
}
-ge
0
]
;
then
### Task dependent. You have to make data the following preparation part by yourself.
### Task dependent. You have to make data the following preparation part by yourself.
### But you can utilize Kaldi recipes in most cases
echo
"Stage 0: ASR Data Preparation"
echo
"Stage 0: ASR Data Preparation"
if
[[
!
-e
${
data_dir
}
]]
;
then
if
[[
!
-e
${
data_dir
}
]]
;
then
mkdir
-p
${
data_dir
}
mkdir
-p
${
data_dir
}
...
@@ -255,6 +254,7 @@ if [ ${stage} -le 1 ] && [ ${stop_stage} -ge 1 ]; then
...
@@ -255,6 +254,7 @@ if [ ${stage} -le 1 ] && [ ${stop_stage} -ge 1 ]; then
source
./local/utils.sh
source
./local/utils.sh
device
=
$(
get_devices
$gpu_num
0
)
device
=
$(
get_devices
$gpu_num
0
)
fi
fi
export
CUDA_VISIBLE_DEVICES
=
${
device
}
fi
fi
echo
-e
"data=
${
data_dir
}
model=
${
model_dir
}
"
echo
-e
"data=
${
data_dir
}
model=
${
model_dir
}
"
...
@@ -308,11 +308,6 @@ if [ ${stage} -le 1 ] && [ ${stop_stage} -ge 1 ]; then
...
@@ -308,11 +308,6 @@ if [ ${stage} -le 1 ] && [ ${stop_stage} -ge 1 ]; then
cmd
=
"
${
cmd
}
cmd
=
"
${
cmd
}
--distributed-world-size
$gpu_num
--distributed-world-size
$gpu_num
--ddp-backend no_c10d"
--ddp-backend no_c10d"
if
[[
${
DIST_RANK
}
-ne
0
]]
;
then
cmd
=
"
${
cmd
}
--distributed-init-method
${
INIT_METHOD
}
--distributed-rank
${
DIST_RANK
}
"
fi
fi
fi
if
[[
$fp16
-eq
1
]]
;
then
if
[[
$fp16
-eq
1
]]
;
then
cmd
=
"
${
cmd
}
cmd
=
"
${
cmd
}
...
@@ -362,7 +357,6 @@ if [ ${stage} -le 1 ] && [ ${stop_stage} -ge 1 ]; then
...
@@ -362,7 +357,6 @@ if [ ${stage} -le 1 ] && [ ${stop_stage} -ge 1 ]; then
echo
"
${
time
}
|
${
data_dir
}
|
${
exp_name
}
|
${
model_dir
}
"
>>
$log
echo
"
${
time
}
|
${
data_dir
}
|
${
exp_name
}
|
${
model_dir
}
"
>>
$log
tail
-n
50
${
log
}
>
tmp.log
tail
-n
50
${
log
}
>
tmp.log
mv tmp.log
$log
mv tmp.log
$log
# export CUDA_VISIBLE_DEVICES=${device}
log
=
${
model_dir
}
/train.log
log
=
${
model_dir
}
/train.log
cmd
=
"
${
cmd
}
2>&1 | tee -a
${
log
}
"
cmd
=
"
${
cmd
}
2>&1 | tee -a
${
log
}
"
...
@@ -404,8 +398,8 @@ if [ ${stage} -le 2 ] && [ ${stop_stage} -ge 2 ]; then
...
@@ -404,8 +398,8 @@ if [ ${stage} -le 2 ] && [ ${stop_stage} -ge 2 ]; then
source
./local/utils.sh
source
./local/utils.sh
device
=
$(
get_devices
$gpu_num
0
)
device
=
$(
get_devices
$gpu_num
0
)
fi
fi
export
CUDA_VISIBLE_DEVICES
=
${
device
}
fi
fi
# export CUDA_VISIBLE_DEVICES=${device}
suffix
=
beam
${
beam_size
}
_alpha
${
len_penalty
}
_tokens
${
max_tokens
}
suffix
=
beam
${
beam_size
}
_alpha
${
len_penalty
}
_tokens
${
max_tokens
}
if
[[
${
n_average
}
-ne
1
]]
;
then
if
[[
${
n_average
}
-ne
1
]]
;
then
...
...
编写
预览
Markdown
格式
0%
重试
或
添加新文件
添加附件
取消
您添加了
0
人
到此讨论。请谨慎行事。
请先完成此评论的编辑!
取消
请
注册
或者
登录
后发表评论