Commit 3f269efe by xuchen

yaml

parent 504e81af
encoder-attention-type: local
hard-mask-window: 0
gauss-mask-sigma: 3
init-mask-weight: 0
\ No newline at end of file
train-subset: train-clean-100
lr: 0.001
\ No newline at end of file
...@@ -5,6 +5,7 @@ max-epoch: 300 ...@@ -5,6 +5,7 @@ max-epoch: 300
max-update: 300000 max-update: 300000
patience: 20 patience: 20
post-process: sentencepiece post-process: sentencepiece
weight-decay: 1e-4
# best-checkpoint-metric: loss # best-checkpoint-metric: loss
# maximize-best-checkpoint-metric: False # maximize-best-checkpoint-metric: False
......
arch: s2t_ctc arch: s2t_ctc
encoder-type: transformer
optimizer: adam optimizer: adam
clip-norm: 10.0 clip-norm: 10.0
lr-scheduler: inverse_sqrt lr-scheduler: inverse_sqrt
......
...@@ -6,12 +6,16 @@ clip-norm: 10.0 ...@@ -6,12 +6,16 @@ clip-norm: 10.0
lr-scheduler: inverse_sqrt lr-scheduler: inverse_sqrt
warmup-init-lr: 1e-7 warmup-init-lr: 1e-7
warmup-updates: 10000 warmup-updates: 10000
lr: 0.002 lr: 2e-3
adam_betas: (0.9,0.98) adam_betas: (0.9,0.98)
criterion: ctc criterion: ctc
zero_infinity: True
ctc-weight: 1.0 ctc-weight: 1.0
encoder-normalize-before: True
decoder-normalize-before: True
subsampling-type: conv1d subsampling-type: conv1d
subsampling-layers: 2 subsampling-layers: 2
subsampling-filter: 1024 subsampling-filter: 1024
...@@ -26,3 +30,8 @@ encoder-embed-dim: 256 ...@@ -26,3 +30,8 @@ encoder-embed-dim: 256
encoder-ffn-embed-dim: 2048 encoder-ffn-embed-dim: 2048
encoder-layers: 18 encoder-layers: 18
encoder-attention-heads: 4 encoder-attention-heads: 4
# InterCTC
inter-ctc-weight: 1.0
inter-ctc-layers: 6,9,12,15
share-inter-ctc: True
\ No newline at end of file
arch: s2t_transformer_s
share-decoder-input-output-embed: True
optimizer: adam
clip-norm: 10.0
lr-scheduler: inverse_sqrt
warmup-init-lr: 1e-7
warmup-updates: 10000
lr: 2e-3
adam_betas: (0.9,0.98)
weight-decay: 1e-4
criterion: label_smoothed_cross_entropy_with_ctc
label_smoothing: 0.1
subsampling-type: conv1d
subsampling-layers: 2
subsampling-filter: 1024
subsampling-kernel: 5
subsampling-stride: 2
subsampling-norm: none
subsampling-activation: glu
dropout: 0.1
activation-fn: relu
encoder-embed-dim: 256
encoder-ffn-embed-dim: 2048
encoder-layers: 18
decoder-layers: 6
encoder-attention-heads: 4
decoder-embed-dim: 256
decoder-ffn-embed-dim: 2048
decoder-attention-heads: 4
attention-dropout: 0.1
activation-dropout: 0.1
#load-pretrained-encoder-from:
#load-pretrained-decoder-from:
# Conformer
macaron-style: True
use-cnn-module: True
cnn-module-kernel: 15
encoder-attention-type: rel_pos
encoder-activation-fn: swish
# Bilingual CTC
share-ctc-and-embed: True
share-xctc-and-embed: True
ctc-weight: 0.05
xctc-weight: 0.2
# InterCTC
inter-ctc-weight: 0.025
inter-ctc-layers: 6,9,12,15
share-inter-ctc: True
inter-xctc-weight: 0.1
inter-xctc-layers: 6,9,12,15
# Prediction-aware encoding
ctc-pae: inter_league
xctc-pae: inter_league
pae-unnorm-input: True
# Curriculum learning mixing
xctc-pae-ground-truth-ratio: 0.1
xctc-pae-ground-truth-only-mistake: True
pae-oracle-smooth: True
\ No newline at end of file
arch: s2t_transformer_s
share-decoder-input-output-embed: True
optimizer: adam
clip-norm: 10.0
lr-scheduler: inverse_sqrt
warmup-init-lr: 1e-7
warmup-updates: 10000
lr: 2e-3
adam_betas: (0.9,0.98)
criterion: label_smoothed_cross_entropy_with_ctc
label_smoothing: 0.1
subsampling-type: conv1d
subsampling-layers: 2
subsampling-filter: 1024
subsampling-kernel: 5
subsampling-stride: 2
subsampling-norm: none
subsampling-activation: glu
dropout: 0.1
activation-fn: relu
encoder-embed-dim: 256
encoder-ffn-embed-dim: 2048
encoder-layers: 12
decoder-layers: 6
encoder-attention-heads: 4
decoder-embed-dim: 256
decoder-ffn-embed-dim: 2048
decoder-attention-heads: 4
attention-dropout: 0.1
activation-dropout: 0.1
#load-pretrained-encoder-from:
#load-pretrained-decoder-from:
# Append-based Interpolation Augmentation
inter-mixup: True
inter-mixup-layer: -1
inter-mixup-decoder-layer: 0
inter-mixup-prob: 1.0
inter-mixup-ratio: 1.0
inter-mixup-beta: 0.2
inter-mixup-keep-org: True
inter-mixup-decoder-emb: True
cal-mixup-loss: True
no-specaugment: False
layer-out-norm: False
inter-mixup-ratio-decay: False
inter-mixup-ratio-decay-params: 20000,40000,0
# MTL
ctc-weight: 0.3
inter-ctc-weight: 0.2
inter-ctc-layers: 6,9
share-inter-ctc: True
share-ctc-and-embed: True
ctc-pae: inter_league
pae-unnorm-input: True
ctc-mixup-consistent-weight: 0.15
inter-ctc-mixup-consistent-weight: 0.1
mixup-consistent-weight: 0.5
# Conformer
macaron-style: True
use-cnn-module: True
cnn-module-kernel: 15
encoder-attention-type: rel_pos
encoder-activation-fn: swish
layer-padding-mask: True
\ No newline at end of file
arch: s2t_transformer_s
share-decoder-input-output-embed: True
optimizer: adam
clip-norm: 10.0
lr-scheduler: inverse_sqrt
warmup-init-lr: 1e-7
warmup-updates: 10000
lr: 2e-3
adam_betas: (0.9,0.98)
criterion: label_smoothed_cross_entropy_with_ctc
label_smoothing: 0.1
subsampling-type: conv1d
subsampling-layers: 2
subsampling-filter: 1024
subsampling-kernel: 5
subsampling-stride: 2
subsampling-norm: none
subsampling-activation: glu
dropout: 0.1
activation-fn: relu
encoder-embed-dim: 256
encoder-ffn-embed-dim: 2048
encoder-layers: 12
decoder-layers: 6
encoder-attention-heads: 4
decoder-embed-dim: 256
decoder-ffn-embed-dim: 2048
decoder-attention-heads: 4
attention-dropout: 0.1
activation-dropout: 0.1
#load-pretrained-encoder-from:
#load-pretrained-decoder-from:
# Append-based Interpolation Augmentation
inter-mixup: True
inter-mixup-layer: -1
inter-mixup-decoder-layer: 0
inter-mixup-prob: 1.0
inter-mixup-ratio: 1.0
inter-mixup-beta: 0.2
inter-mixup-keep-org: True
inter-mixup-decoder-emb: True
cal-mixup-loss: False
no-specaugment: False
layer-out-norm: False
inter-mixup-ratio-decay: False
inter-mixup-ratio-decay-params: 20000,40000,0
# MTL
ctc-weight: 0.3
inter-ctc-weight: 0.2
inter-ctc-layers: 6,9
share-inter-ctc: True
share-ctc-and-embed: True
ctc-pae: inter_league
pae-unnorm-input: True
ctc-mixup-consistent-weight: 0.15
inter-ctc-mixup-consistent-weight: 0.1
mixup-consistent-weight: 0.5
# Conformer
macaron-style: True
use-cnn-module: True
cnn-module-kernel: 15
encoder-attention-type: rel_pos
encoder-activation-fn: swish
layer-padding-mask: True
\ No newline at end of file
arch: s2t_ctc
encoder-type: transformer
optimizer: adam
clip-norm: 10.0
lr-scheduler: inverse_sqrt
warmup-init-lr: 1e-7
warmup-updates: 10000
lr: 2e-3
adam_betas: (0.9,0.98)
criterion: ctc
zero_infinity: True
ctc-weight: 1.0
encoder-normalize-before: True
decoder-normalize-before: True
subsampling-type: conv1d
subsampling-layers: 2
subsampling-filter: 1024
subsampling-kernel: 5
subsampling-stride: 2
subsampling-norm: none
subsampling-activation: glu
dropout: 0.1
activation-fn: relu
encoder-embed-dim: 256
encoder-ffn-embed-dim: 2048
encoder-layers: 18
encoder-attention-heads: 4
# Append-based Interpolation Augmentation
inter-mixup: True
inter-mixup-layer: -1
inter-mixup-decoder-layer: 0
inter-mixup-prob: 1.0
inter-mixup-ratio: 1.0
inter-mixup-beta: 0.2
inter-mixup-keep-org: True
inter-mixup-decoder-emb: True
cal-mixup-loss: True
no-specaugment: False
layer-out-norm: False
inter-mixup-ratio-decay: False
inter-mixup-ratio-decay-params: 20000,40000,0
# MTL
inter-ctc-weight: 1.0
inter-ctc-layers: 6,9,12,15
share-inter-ctc: True
share-ctc-and-embed: True
ctc-pae: inter_league
pae-unnorm-input: True
ctc-mixup-consistent-weight: 0.15
inter-ctc-mixup-consistent-weight: 0.1
mixup-consistent-weight: 0.5
# Conformer
macaron-style: True
use-cnn-module: True
cnn-module-kernel: 15
encoder-attention-type: rel_pos
encoder-activation-fn: swish
layer-padding-mask: True
\ No newline at end of file
arch: s2t_ctc
encoder-type: transformer
optimizer: adam
clip-norm: 10.0
lr-scheduler: inverse_sqrt
warmup-init-lr: 1e-7
warmup-updates: 10000
lr: 2e-3
adam_betas: (0.9,0.98)
criterion: ctc
zero_infinity: True
ctc-weight: 1.0
encoder-normalize-before: True
decoder-normalize-before: True
subsampling-type: conv1d
subsampling-layers: 2
subsampling-filter: 1024
subsampling-kernel: 5
subsampling-stride: 2
subsampling-norm: none
subsampling-activation: glu
dropout: 0.1
activation-fn: relu
encoder-embed-dim: 256
encoder-ffn-embed-dim: 2048
encoder-layers: 18
encoder-attention-heads: 4
# Append-based Interpolation Augmentation
inter-mixup: True
inter-mixup-layer: -1
inter-mixup-decoder-layer: 0
inter-mixup-prob: 1.0
inter-mixup-ratio: 1.0
inter-mixup-beta: 0.2
inter-mixup-keep-org: True
inter-mixup-decoder-emb: True
cal-mixup-loss: False
no-specaugment: False
layer-out-norm: False
inter-mixup-ratio-decay: False
inter-mixup-ratio-decay-params: 20000,40000,0
# MTL
inter-ctc-weight: 1.0
inter-ctc-layers: 6,9,12,15
share-inter-ctc: True
share-ctc-and-embed: True
ctc-pae: inter_league
pae-unnorm-input: True
ctc-mixup-consistent-weight: 0.15
inter-ctc-mixup-consistent-weight: 0.1
mixup-consistent-weight: 0.5
# Conformer
macaron-style: True
use-cnn-module: True
cnn-module-kernel: 15
encoder-attention-type: rel_pos
encoder-activation-fn: swish
layer-padding-mask: True
\ No newline at end of file
...@@ -192,11 +192,9 @@ if [ ${stage} -le 1 ] && [ ${stop_stage} -ge 1 ]; then ...@@ -192,11 +192,9 @@ if [ ${stage} -le 1 ] && [ ${stop_stage} -ge 1 ]; then
cp -f ${pwd_dir}/`basename ${BASH_SOURCE[0]}` ${model_dir} cp -f ${pwd_dir}/`basename ${BASH_SOURCE[0]}` ${model_dir}
cp -f ${pwd_dir}/train.sh ${model_dir} cp -f ${pwd_dir}/train.sh ${model_dir}
extra_parameter="${extra_parameter} train_config=${train_config},basis
--train-config ${pwd_dir}/conf/basis.yaml"
cp -f ${pwd_dir}/conf/basis.yaml ${model_dir}
config_list="${train_config//,/ }" config_list="${train_config//,/ }"
idx=1 idx=0
for config in ${config_list[@]} for config in ${config_list[@]}
do do
config_path=${pwd_dir}/conf/${config}.yaml config_path=${pwd_dir}/conf/${config}.yaml
...@@ -206,10 +204,18 @@ if [ ${stage} -le 1 ] && [ ${stop_stage} -ge 1 ]; then ...@@ -206,10 +204,18 @@ if [ ${stage} -le 1 ] && [ ${stop_stage} -ge 1 ]; then
fi fi
cp -f ${config_path} ${model_dir} cp -f ${config_path} ${model_dir}
if [[ $idx -eq 0 ]]; then
extra_parameter="${extra_parameter}
--train-config ${config_path}"
else
extra_parameter="${extra_parameter} extra_parameter="${extra_parameter}
--train-config${idx} ${config_path}" --train-config${idx} ${config_path}"
fi
idx=$((idx + 1)) idx=$((idx + 1))
done done
#extra_parameter="${extra_parameter}
# --train-config${idx} ${pwd_dir}/conf/basis.yaml"
#cp -f ${pwd_dir}/conf/basis.yaml ${model_dir}
cmd="python3 -u ${code_dir}/fairseq_cli/train.py cmd="python3 -u ${code_dir}/fairseq_cli/train.py
${data_dir} ${data_dir}
......
# Append-based Interpolation Augmentation
inter-mixup: True inter-mixup: True
inter-mixup-layer: -1 inter-mixup-layer: -1
...@@ -6,12 +7,12 @@ inter-mixup-prob: 1.0 ...@@ -6,12 +7,12 @@ inter-mixup-prob: 1.0
inter-mixup-ratio: 1.0 inter-mixup-ratio: 1.0
inter-mixup-beta: 0.2 inter-mixup-beta: 0.2
inter-mixup-keep-org: False inter-mixup-keep-org: True
inter-mixup-decoder-emb: False inter-mixup-decoder-emb: True
ctc-mixup-consistent-weight: 0 ctc-mixup-consistent-weight: 0.15
inter-ctc-mixup-consistent-weight: 0 inter-ctc-mixup-consistent-weight: 0.1
mixup-consistent-weight: 0 mixup-consistent-weight: 0.5
cal-mixup-loss: True cal-mixup-loss: True
no-specaugment: False no-specaugment: False
......
...@@ -67,6 +67,7 @@ inter-xctc-layers: 4 ...@@ -67,6 +67,7 @@ inter-xctc-layers: 4
# Prediction-aware encoding # Prediction-aware encoding
ctc-pae: inter_league ctc-pae: inter_league
xctc-pae: inter_league xctc-pae: inter_league
pae-unnorm-input: True
# Cross-layer attn # Cross-layer attn
xctc-cross-attn: True xctc-cross-attn: True
......
...@@ -5,6 +5,8 @@ criterion: ctc ...@@ -5,6 +5,8 @@ criterion: ctc
zero_infinity: True zero_infinity: True
xctc-weight: 1.0 xctc-weight: 1.0
ctc-weight: 1.0 ctc-weight: 1.0
share-ctc-and-embed: True
share-xctc-and-embed: True
share-decoder-input-output-embed: True share-decoder-input-output-embed: True
optimizer: adam optimizer: adam
......
...@@ -469,14 +469,22 @@ class SpeechToTextTask(LegacyFairseqTask): ...@@ -469,14 +469,22 @@ class SpeechToTextTask(LegacyFairseqTask):
def compute_bleu(meters): def compute_bleu(meters):
import inspect import inspect
try:
from sacrebleu.metrics import BLEU
comp_bleu = BLEU.compute_bleu
except ImportError:
# compatibility API for sacrebleu 1.x
import sacrebleu import sacrebleu
fn_sig = inspect.getfullargspec(sacrebleu.compute_bleu)[0] comp_bleu = sacrebleu.compute_bleu
fn_sig = inspect.getfullargspec(comp_bleu)[0]
if "smooth_method" in fn_sig: if "smooth_method" in fn_sig:
smooth = {"smooth_method": "exp"} smooth = {"smooth_method": "exp"}
else: else:
smooth = {"smooth": "exp"} smooth = {"smooth": "exp"}
bleu = sacrebleu.compute_bleu( bleu = comp_bleu(
correct=meters["_bleu_counts"].sum, correct=meters["_bleu_counts"].sum,
total=meters["_bleu_totals"].sum, total=meters["_bleu_totals"].sum,
sys_len=meters["_bleu_sys_len"].sum, sys_len=meters["_bleu_sys_len"].sum,
...@@ -486,8 +494,8 @@ class SpeechToTextTask(LegacyFairseqTask): ...@@ -486,8 +494,8 @@ class SpeechToTextTask(LegacyFairseqTask):
return round(bleu.score, 2) return round(bleu.score, 2)
metrics.log_derived("bleu", compute_bleu) metrics.log_derived("bleu", compute_bleu)
else: # else:
metrics.log_scalar("bleu", 0) # metrics.log_scalar("bleu", 0)
def build_generator( def build_generator(
self, self,
......
...@@ -198,6 +198,13 @@ def do_setup(package_data): ...@@ -198,6 +198,13 @@ def do_setup(package_data):
"sacrebleu>=1.4.12", "sacrebleu>=1.4.12",
"torch", "torch",
"tqdm", "tqdm",
"configargparse",
"matplotlib",
"scikit-learn",
"editdistance",
"espnet",
"torchaudio",
"pandas",
], ],
dependency_links=dependency_links, dependency_links=dependency_links,
packages=find_packages( packages=find_packages(
......
Markdown 格式
0%
您添加了 0 到此讨论。请谨慎行事。
请先完成此评论的编辑!
注册 或者 后发表评论