Commit 3f269efe by xuchen

yaml

parent 504e81af
encoder-attention-type: local
hard-mask-window: 0
gauss-mask-sigma: 3
init-mask-weight: 0
\ No newline at end of file
train-subset: train-clean-100
lr: 0.001
\ No newline at end of file
......@@ -5,6 +5,7 @@ max-epoch: 300
max-update: 300000
patience: 20
post-process: sentencepiece
weight-decay: 1e-4
# best-checkpoint-metric: loss
# maximize-best-checkpoint-metric: False
......
arch: s2t_ctc
encoder-type: transformer
optimizer: adam
clip-norm: 10.0
lr-scheduler: inverse_sqrt
......
......@@ -6,12 +6,16 @@ clip-norm: 10.0
lr-scheduler: inverse_sqrt
warmup-init-lr: 1e-7
warmup-updates: 10000
lr: 0.002
lr: 2e-3
adam_betas: (0.9,0.98)
criterion: ctc
zero_infinity: True
ctc-weight: 1.0
encoder-normalize-before: True
decoder-normalize-before: True
subsampling-type: conv1d
subsampling-layers: 2
subsampling-filter: 1024
......@@ -26,3 +30,8 @@ encoder-embed-dim: 256
encoder-ffn-embed-dim: 2048
encoder-layers: 18
encoder-attention-heads: 4
# InterCTC
inter-ctc-weight: 1.0
inter-ctc-layers: 6,9,12,15
share-inter-ctc: True
\ No newline at end of file
arch: s2t_transformer_s
share-decoder-input-output-embed: True
optimizer: adam
clip-norm: 10.0
lr-scheduler: inverse_sqrt
warmup-init-lr: 1e-7
warmup-updates: 10000
lr: 2e-3
adam_betas: (0.9,0.98)
weight-decay: 1e-4
criterion: label_smoothed_cross_entropy_with_ctc
label_smoothing: 0.1
subsampling-type: conv1d
subsampling-layers: 2
subsampling-filter: 1024
subsampling-kernel: 5
subsampling-stride: 2
subsampling-norm: none
subsampling-activation: glu
dropout: 0.1
activation-fn: relu
encoder-embed-dim: 256
encoder-ffn-embed-dim: 2048
encoder-layers: 18
decoder-layers: 6
encoder-attention-heads: 4
decoder-embed-dim: 256
decoder-ffn-embed-dim: 2048
decoder-attention-heads: 4
attention-dropout: 0.1
activation-dropout: 0.1
#load-pretrained-encoder-from:
#load-pretrained-decoder-from:
# Conformer
macaron-style: True
use-cnn-module: True
cnn-module-kernel: 15
encoder-attention-type: rel_pos
encoder-activation-fn: swish
# Bilingual CTC
share-ctc-and-embed: True
share-xctc-and-embed: True
ctc-weight: 0.05
xctc-weight: 0.2
# InterCTC
inter-ctc-weight: 0.025
inter-ctc-layers: 6,9,12,15
share-inter-ctc: True
inter-xctc-weight: 0.1
inter-xctc-layers: 6,9,12,15
# Prediction-aware encoding
ctc-pae: inter_league
xctc-pae: inter_league
pae-unnorm-input: True
# Curriculum learning mixing
xctc-pae-ground-truth-ratio: 0.1
xctc-pae-ground-truth-only-mistake: True
pae-oracle-smooth: True
\ No newline at end of file
arch: s2t_transformer_s
share-decoder-input-output-embed: True
optimizer: adam
clip-norm: 10.0
lr-scheduler: inverse_sqrt
warmup-init-lr: 1e-7
warmup-updates: 10000
lr: 2e-3
adam_betas: (0.9,0.98)
criterion: label_smoothed_cross_entropy_with_ctc
label_smoothing: 0.1
subsampling-type: conv1d
subsampling-layers: 2
subsampling-filter: 1024
subsampling-kernel: 5
subsampling-stride: 2
subsampling-norm: none
subsampling-activation: glu
dropout: 0.1
activation-fn: relu
encoder-embed-dim: 256
encoder-ffn-embed-dim: 2048
encoder-layers: 12
decoder-layers: 6
encoder-attention-heads: 4
decoder-embed-dim: 256
decoder-ffn-embed-dim: 2048
decoder-attention-heads: 4
attention-dropout: 0.1
activation-dropout: 0.1
#load-pretrained-encoder-from:
#load-pretrained-decoder-from:
# Append-based Interpolation Augmentation
inter-mixup: True
inter-mixup-layer: -1
inter-mixup-decoder-layer: 0
inter-mixup-prob: 1.0
inter-mixup-ratio: 1.0
inter-mixup-beta: 0.2
inter-mixup-keep-org: True
inter-mixup-decoder-emb: True
cal-mixup-loss: True
no-specaugment: False
layer-out-norm: False
inter-mixup-ratio-decay: False
inter-mixup-ratio-decay-params: 20000,40000,0
# MTL
ctc-weight: 0.3
inter-ctc-weight: 0.2
inter-ctc-layers: 6,9
share-inter-ctc: True
share-ctc-and-embed: True
ctc-pae: inter_league
pae-unnorm-input: True
ctc-mixup-consistent-weight: 0.15
inter-ctc-mixup-consistent-weight: 0.1
mixup-consistent-weight: 0.5
# Conformer
macaron-style: True
use-cnn-module: True
cnn-module-kernel: 15
encoder-attention-type: rel_pos
encoder-activation-fn: swish
layer-padding-mask: True
\ No newline at end of file
arch: s2t_transformer_s
share-decoder-input-output-embed: True
optimizer: adam
clip-norm: 10.0
lr-scheduler: inverse_sqrt
warmup-init-lr: 1e-7
warmup-updates: 10000
lr: 2e-3
adam_betas: (0.9,0.98)
criterion: label_smoothed_cross_entropy_with_ctc
label_smoothing: 0.1
subsampling-type: conv1d
subsampling-layers: 2
subsampling-filter: 1024
subsampling-kernel: 5
subsampling-stride: 2
subsampling-norm: none
subsampling-activation: glu
dropout: 0.1
activation-fn: relu
encoder-embed-dim: 256
encoder-ffn-embed-dim: 2048
encoder-layers: 12
decoder-layers: 6
encoder-attention-heads: 4
decoder-embed-dim: 256
decoder-ffn-embed-dim: 2048
decoder-attention-heads: 4
attention-dropout: 0.1
activation-dropout: 0.1
#load-pretrained-encoder-from:
#load-pretrained-decoder-from:
# Append-based Interpolation Augmentation
inter-mixup: True
inter-mixup-layer: -1
inter-mixup-decoder-layer: 0
inter-mixup-prob: 1.0
inter-mixup-ratio: 1.0
inter-mixup-beta: 0.2
inter-mixup-keep-org: True
inter-mixup-decoder-emb: True
cal-mixup-loss: False
no-specaugment: False
layer-out-norm: False
inter-mixup-ratio-decay: False
inter-mixup-ratio-decay-params: 20000,40000,0
# MTL
ctc-weight: 0.3
inter-ctc-weight: 0.2
inter-ctc-layers: 6,9
share-inter-ctc: True
share-ctc-and-embed: True
ctc-pae: inter_league
pae-unnorm-input: True
ctc-mixup-consistent-weight: 0.15
inter-ctc-mixup-consistent-weight: 0.1
mixup-consistent-weight: 0.5
# Conformer
macaron-style: True
use-cnn-module: True
cnn-module-kernel: 15
encoder-attention-type: rel_pos
encoder-activation-fn: swish
layer-padding-mask: True
\ No newline at end of file
arch: s2t_ctc
encoder-type: transformer
optimizer: adam
clip-norm: 10.0
lr-scheduler: inverse_sqrt
warmup-init-lr: 1e-7
warmup-updates: 10000
lr: 2e-3
adam_betas: (0.9,0.98)
criterion: ctc
zero_infinity: True
ctc-weight: 1.0
encoder-normalize-before: True
decoder-normalize-before: True
subsampling-type: conv1d
subsampling-layers: 2
subsampling-filter: 1024
subsampling-kernel: 5
subsampling-stride: 2
subsampling-norm: none
subsampling-activation: glu
dropout: 0.1
activation-fn: relu
encoder-embed-dim: 256
encoder-ffn-embed-dim: 2048
encoder-layers: 18
encoder-attention-heads: 4
# Append-based Interpolation Augmentation
inter-mixup: True
inter-mixup-layer: -1
inter-mixup-decoder-layer: 0
inter-mixup-prob: 1.0
inter-mixup-ratio: 1.0
inter-mixup-beta: 0.2
inter-mixup-keep-org: True
inter-mixup-decoder-emb: True
cal-mixup-loss: True
no-specaugment: False
layer-out-norm: False
inter-mixup-ratio-decay: False
inter-mixup-ratio-decay-params: 20000,40000,0
# MTL
inter-ctc-weight: 1.0
inter-ctc-layers: 6,9,12,15
share-inter-ctc: True
share-ctc-and-embed: True
ctc-pae: inter_league
pae-unnorm-input: True
ctc-mixup-consistent-weight: 0.15
inter-ctc-mixup-consistent-weight: 0.1
mixup-consistent-weight: 0.5
# Conformer
macaron-style: True
use-cnn-module: True
cnn-module-kernel: 15
encoder-attention-type: rel_pos
encoder-activation-fn: swish
layer-padding-mask: True
\ No newline at end of file
arch: s2t_ctc
encoder-type: transformer
optimizer: adam
clip-norm: 10.0
lr-scheduler: inverse_sqrt
warmup-init-lr: 1e-7
warmup-updates: 10000
lr: 2e-3
adam_betas: (0.9,0.98)
criterion: ctc
zero_infinity: True
ctc-weight: 1.0
encoder-normalize-before: True
decoder-normalize-before: True
subsampling-type: conv1d
subsampling-layers: 2
subsampling-filter: 1024
subsampling-kernel: 5
subsampling-stride: 2
subsampling-norm: none
subsampling-activation: glu
dropout: 0.1
activation-fn: relu
encoder-embed-dim: 256
encoder-ffn-embed-dim: 2048
encoder-layers: 18
encoder-attention-heads: 4
# Append-based Interpolation Augmentation
inter-mixup: True
inter-mixup-layer: -1
inter-mixup-decoder-layer: 0
inter-mixup-prob: 1.0
inter-mixup-ratio: 1.0
inter-mixup-beta: 0.2
inter-mixup-keep-org: True
inter-mixup-decoder-emb: True
cal-mixup-loss: False
no-specaugment: False
layer-out-norm: False
inter-mixup-ratio-decay: False
inter-mixup-ratio-decay-params: 20000,40000,0
# MTL
inter-ctc-weight: 1.0
inter-ctc-layers: 6,9,12,15
share-inter-ctc: True
share-ctc-and-embed: True
ctc-pae: inter_league
pae-unnorm-input: True
ctc-mixup-consistent-weight: 0.15
inter-ctc-mixup-consistent-weight: 0.1
mixup-consistent-weight: 0.5
# Conformer
macaron-style: True
use-cnn-module: True
cnn-module-kernel: 15
encoder-attention-type: rel_pos
encoder-activation-fn: swish
layer-padding-mask: True
\ No newline at end of file
......@@ -192,11 +192,9 @@ if [ ${stage} -le 1 ] && [ ${stop_stage} -ge 1 ]; then
cp -f ${pwd_dir}/`basename ${BASH_SOURCE[0]}` ${model_dir}
cp -f ${pwd_dir}/train.sh ${model_dir}
extra_parameter="${extra_parameter}
--train-config ${pwd_dir}/conf/basis.yaml"
cp -f ${pwd_dir}/conf/basis.yaml ${model_dir}
train_config=${train_config},basis
config_list="${train_config//,/ }"
idx=1
idx=0
for config in ${config_list[@]}
do
config_path=${pwd_dir}/conf/${config}.yaml
......@@ -206,10 +204,18 @@ if [ ${stage} -le 1 ] && [ ${stop_stage} -ge 1 ]; then
fi
cp -f ${config_path} ${model_dir}
extra_parameter="${extra_parameter}
--train-config${idx} ${config_path}"
if [[ $idx -eq 0 ]]; then
extra_parameter="${extra_parameter}
--train-config ${config_path}"
else
extra_parameter="${extra_parameter}
--train-config${idx} ${config_path}"
fi
idx=$((idx + 1))
done
#extra_parameter="${extra_parameter}
# --train-config${idx} ${pwd_dir}/conf/basis.yaml"
#cp -f ${pwd_dir}/conf/basis.yaml ${model_dir}
cmd="python3 -u ${code_dir}/fairseq_cli/train.py
${data_dir}
......
# Append-based Interpolation Augmentation
inter-mixup: True
inter-mixup-layer: -1
......@@ -6,12 +7,12 @@ inter-mixup-prob: 1.0
inter-mixup-ratio: 1.0
inter-mixup-beta: 0.2
inter-mixup-keep-org: False
inter-mixup-decoder-emb: False
inter-mixup-keep-org: True
inter-mixup-decoder-emb: True
ctc-mixup-consistent-weight: 0
inter-ctc-mixup-consistent-weight: 0
mixup-consistent-weight: 0
ctc-mixup-consistent-weight: 0.15
inter-ctc-mixup-consistent-weight: 0.1
mixup-consistent-weight: 0.5
cal-mixup-loss: True
no-specaugment: False
......
......@@ -67,6 +67,7 @@ inter-xctc-layers: 4
# Prediction-aware encoding
ctc-pae: inter_league
xctc-pae: inter_league
pae-unnorm-input: True
# Cross-layer attn
xctc-cross-attn: True
......
......@@ -5,6 +5,8 @@ criterion: ctc
zero_infinity: True
xctc-weight: 1.0
ctc-weight: 1.0
share-ctc-and-embed: True
share-xctc-and-embed: True
share-decoder-input-output-embed: True
optimizer: adam
......
......@@ -469,14 +469,22 @@ class SpeechToTextTask(LegacyFairseqTask):
def compute_bleu(meters):
import inspect
import sacrebleu
try:
from sacrebleu.metrics import BLEU
fn_sig = inspect.getfullargspec(sacrebleu.compute_bleu)[0]
comp_bleu = BLEU.compute_bleu
except ImportError:
# compatibility API for sacrebleu 1.x
import sacrebleu
comp_bleu = sacrebleu.compute_bleu
fn_sig = inspect.getfullargspec(comp_bleu)[0]
if "smooth_method" in fn_sig:
smooth = {"smooth_method": "exp"}
else:
smooth = {"smooth": "exp"}
bleu = sacrebleu.compute_bleu(
bleu = comp_bleu(
correct=meters["_bleu_counts"].sum,
total=meters["_bleu_totals"].sum,
sys_len=meters["_bleu_sys_len"].sum,
......@@ -486,8 +494,8 @@ class SpeechToTextTask(LegacyFairseqTask):
return round(bleu.score, 2)
metrics.log_derived("bleu", compute_bleu)
else:
metrics.log_scalar("bleu", 0)
# else:
# metrics.log_scalar("bleu", 0)
def build_generator(
self,
......
......@@ -198,6 +198,13 @@ def do_setup(package_data):
"sacrebleu>=1.4.12",
"torch",
"tqdm",
"configargparse",
"matplotlib",
"scikit-learn",
"editdistance",
"espnet",
"torchaudio",
"pandas",
],
dependency_links=dependency_links,
packages=find_packages(
......
Markdown 格式
0%
您添加了 0 到此讨论。请谨慎行事。
请先完成此评论的编辑!
注册 或者 后发表评论