Commit 33c67f90 by xuchen

fix the bugs

parent 1f8d6f6c
......@@ -176,9 +176,9 @@ class CtcCriterion(FairseqCriterion):
from torch.distributions import Categorical
# ctc_logit = ctc_logit.sort(dim=-1, descending=True)[0][:, :, 0:100]
# ctc_logit = ctc_logit / ctc_logit.sum(dim=-1, keepdim=True)
cut_ctc_logit = ctc_logit.sort(dim=-1, descending=True)[0][:, :, 0:100]
ctc_entropy = Categorical(logits=cut_ctc_logit).entropy().sum()
# ctc_entropy = Categorical(logits=ctc_logit).entropy().sum()
# cut_ctc_logit = ctc_logit.sort(dim=-1, descending=True)[0][:, :, 0:100]
# ctc_entropy = Categorical(logits=cut_ctc_logit).entropy().sum()
ctc_entropy = Categorical(logits=ctc_logit).entropy().sum()
logging_output["ctc_entropy"] = utils.item(ctc_entropy.data)
logging_output["ctc_loss"] = utils.item(ctc_loss.data)
......
......@@ -19,7 +19,6 @@ from fairseq.modules import (
FairseqDropout,
LayerNorm,
PositionalEmbedding,
PositionalEncoding,
LegacyRelPositionalEncoding,
RelPositionalEncoding,
S2TTransformerEncoderLayer,
......@@ -464,7 +463,7 @@ class S2TCTCEncoder(FairseqEncoder):
self.embed_positions = RelPositionalEncoding(
args.max_source_positions, args.encoder_embed_dim
)
elif self.attn_type == "rel_selfattn":
elif self.attn_type in ["rel_selfattn", "rel_pos_legacy"]:
self.embed_positions = LegacyRelPositionalEncoding(
args.encoder_embed_dim, args.dropout, args.max_source_positions
)
......@@ -560,7 +559,7 @@ class S2TCTCEncoder(FairseqEncoder):
# padding and position embedding
encoder_padding_mask = lengths_to_padding_mask(input_lengths)
if self.attn_type == "rel_pos" or self.attn_type == "rel_selfattn":
if self.attn_type in ["rel_selfattn", "rel_pos", "rel_pos_legacy"]:
positions = self.embed_positions(x)
elif self.attn_type == "rope":
......
......@@ -333,7 +333,6 @@ class S2TSATEEncoder(FairseqEncoder):
if "ctc_logit" in acoustic_encoder_out and len(acoustic_encoder_out["ctc_logit"]) > 0:
ctc_logit = acoustic_encoder_out["ctc_logit"][0]
ctc_prob = F.softmax(ctc_logit / self.temperature, dim=-1)
# ctc_prob = self.acoustic_encoder.ctc.softmax(encoder_out, self.temperature)
else:
ctc_logit = None
ctc_prob = None
......
......@@ -25,6 +25,8 @@ from fairseq.modules import (
LayerNorm,
PositionalEmbedding,
SinusoidalPositionalEmbedding,
RelPositionalEncoding,
LegacyRelPositionalEncoding,
TransformerDecoderLayer,
TransformerEncoderLayer,
DynamicLinearCombination
......@@ -173,7 +175,8 @@ class TransformerModel(FairseqEncoderDecoderModel):
help='checkpoint activations at each layer, which saves GPU '
'memory usage at the cost of some additional compute')
parser.add_argument('--offload-activations', action='store_true',
help='checkpoint activations at each layer, then save to gpu. Sets --checkpoint-activations.')
help='checkpoint activations at each layer, then save to gpu. '
'Sets --checkpoint-activations.')
# args for "Cross+Self-Attention for Transformer Models" (Peitz et al., 2019)
parser.add_argument('--no-cross-attention', default=False, action='store_true',
help='do not perform cross-attention')
......@@ -204,6 +207,8 @@ class TransformerModel(FairseqEncoderDecoderModel):
"selfattn",
"rel_selfattn",
"relative",
"rel_pos",
"rel_pos_legacy"
],
help="transformer encoder self-attention layer type"
)
......@@ -473,6 +478,17 @@ class TransformerEncoder(FairseqEncoder):
self.embed_scale = 1.0 if args.no_scale_embedding else math.sqrt(embed_dim)
if self.attn_type == "rel_pos":
self.embed_positions = RelPositionalEncoding(
args.max_source_positions, args.encoder_embed_dim
)
elif self.attn_type in ["rel_selfattn", "rel_pos_legacy"]:
self.embed_positions = LegacyRelPositionalEncoding(
args.encoder_embed_dim, args.dropout, args.max_source_positions
)
elif self.attn_type == "rope":
self.embed_positions = None
else: # Use absolute positional embedding
self.embed_positions = (
PositionalEmbedding(
args.max_source_positions,
......
......@@ -3,6 +3,8 @@ import torch.nn as nn
import torch.nn.functional as F
import numpy as np
from fairseq.modules.layer_norm import LayerNorm
class DynamicLinearCombination(nn.Module):
"""Implementation of Dynamic Linear Combination of Layers (DLCL)
......@@ -34,9 +36,9 @@ class DynamicLinearCombination(nn.Module):
# init triangular layer norm
if args.normalize_embed:
self.layer_norms = nn.ModuleList([nn.LayerNorm(self.dim) for _ in range(layer_num)])
self.layer_norms = nn.ModuleList([LayerNorm(self.dim) for _ in range(layer_num)])
else:
self.layer_norms = nn.ModuleList([nn.Sequential()] + [nn.LayerNorm(self.dim) for _ in range(layer_num-1)])
self.layer_norms = nn.ModuleList([nn.Sequential()] + [LayerNorm(self.dim) for _ in range(layer_num-1)])
# states
self.count = 0
......@@ -165,5 +167,3 @@ class DynamicLinearCombination(nn.Module):
def forward(self):
pass
......@@ -142,6 +142,9 @@ class Conv1dSubsampling(nn.Module):
get_activation_class(act, dim=1)
) for layer_id in range(num_layers)])
out_dim = filters[-1]
self.linear = nn.Linear(filters, filters)
def forward(self, x, x_len):
# (B, T, D) -> (B, D, T)
......@@ -154,6 +157,7 @@ class Conv1dSubsampling(nn.Module):
if x_len is not None:
x_len = torch.div(x_len - 1, 2, rounding_mode='floor') + 1
x = x.transpose(1, 2)
x = self.linear(x)
return x, x_len
......
Markdown 格式
0%
您添加了 0 到此讨论。请谨慎行事。
请先完成此评论的编辑!
注册 或者 后发表评论