Commit 4f679c86 by xuchen

fix the bugs

parent b970c7df
arch: s2t_dual
asr-encoder: sate
mt-encoder-layers: 6
mt-encoder: transformer
encoder-drop-net: True
encoder-drop-net-prob: 0.8
encoder-embed-dim: 256
pds-stages: 4
#ctc-layer: 12
pds-layers: 3_3_3_3
pds-ratios: 2_2_1_2
arch: pdss2t_transformer_s_8
pds-fusion: True
pds-fusion-method: all_conv
pds-embed-dims: 256_256_256_256
pds-ds-method: conv
pds-embed-norm: True
pds-position-embed: 1_1_1_1
pds-kernel-sizes: 5_5_5_5
pds-ffn-ratios: 8_8_8_8
pds-attn-heads: 4_4_4_4
ctc-layer: 12
inter_mixup: True
inter_mixup_layer: 0
inter_mixup_ratio: 0.2
share-decoder-input-output-embed: True
optimizer: adam
......@@ -30,8 +15,7 @@ warmup-updates: 10000
lr: 2e-3
adam_betas: (0.9,0.98)
criterion: join_speech_and_text_loss
ctc-weight: 0.3
criterion: label_smoothed_cross_entropy_with_ctc
label_smoothing: 0.1
dropout: 0.1
......@@ -44,8 +28,3 @@ encoder-attention-heads: 4
decoder-embed-dim: 256
decoder-ffn-embed-dim: 2048
decoder-attention-heads: 4
#load-pretrained-encoder-from:
#load-pretrained-asr-encoder-from: /home/xuchen/st/checkpoints/mustc/asr/0225_st_purectc_pds_base_8_baseline_topctc/avg_10_checkpoint.pt
#load-pretrained-mt-encoder-from: /home/xuchen/st/checkpoints/mustc/mt/0223_st_small_baseline/avg_10_checkpoint.pt
#load-pretrained-decoder-from: /home/xuchen/st/checkpoints/mustc/mt/0223_st_small_baseline/avg_10_checkpoint.pt
\ No newline at end of file
inter_mixup: True
inter_mixup_layer: 0
inter_mixup_ratio: 0.2
\ No newline at end of file
......@@ -609,7 +609,13 @@ class PDSS2TTransformerModel(S2TTransformerModel):
"--inter-mixup-prob",
default=1,
type=float,
help="the probability to apply mixup",
help="the probability for mixup",
)
parser.add_argument(
"--inter-mixup-ratio",
default=1,
type=float,
help="the ratio for mixup",
)
pass
......@@ -905,12 +911,16 @@ class PDSS2TTransformerEncoder(FairseqEncoder):
# mixup
self.mixup = getattr(args, "inter_mixup", False)
if self.mixup:
self.mixup_layer = args.inter_mixup_layer
self.mixup_prob = getattr(args, "inter_mixup_prob", 1.0)
beta = args.inter_mixup_beta
self.mixup_layer = int(args.inter_mixup_layer)
self.mixup_prob = float(getattr(args, "inter_mixup_prob", 1.0))
self.mixup_ratio = float(getattr(args, "inter_mixup_ratio", 1.0))
beta = float(args.inter_mixup_beta)
from torch.distributions import Beta
self.beta = Beta(torch.Tensor([beta]), torch.Tensor([beta]))
logger.info("Use mixup in layer %d with beta %f." % (self.mixup_layer, beta))
logger.info("Use mixup in layer %d with beta %.2f, prob %.2f, ratio %.2f." % (
self.mixup_layer, beta, self.mixup_prob, self.mixup_ratio)
)
# gather cosine similarity
self.gather_cos_sim = getattr(args, "gather_cos_sim", False)
......@@ -938,10 +948,20 @@ class PDSS2TTransformerEncoder(FairseqEncoder):
def apply_mixup(self, x, encoder_padding_mask):
batch = x.size(1)
indices = np.random.permutation(batch)
if self.mixup_ratio == 1:
if len(indices) % 2 != 0:
indices = np.append(indices, (indices[-1]))
idx1 = torch.from_numpy(indices[0::2]).to(x.device)
idx2 = torch.from_numpy(indices[1::2]).to(x.device)
idx1 = indices[0::2]
idx2 = indices[1::2]
else:
mix_size = int(max(2, batch * self.mixup_ratio // 2 * 2))
mix_indices = indices[: mix_size]
idx1 = np.append(mix_indices[0::2], (indices[mix_size:]))
idx2 = np.append(mix_indices[1::2], (indices[mix_size:]))
idx1 = torch.from_numpy(idx1).to(x.device)
idx2 = torch.from_numpy(idx2).to(x.device)
x1 = x[:, idx1]
x2 = x[:, idx2]
......
......@@ -107,7 +107,7 @@ class S2TSATEModel(S2TTransformerModel):
help="ctc layer for target sentence",
)
parser.add_argument(
"--target-intermedia-ctc-layer",
"--target-intermedia-ctc-layers",
default=None,
type=str,
help="intermedia ctc layers for target sentence",
......
Markdown 格式
0%
您添加了 0 到此讨论。请谨慎行事。
请先完成此评论的编辑!
注册 或者 后发表评论