Commit 3492c676 by libei

add new hparams

parent 43b2a870
......@@ -441,6 +441,7 @@ def transformer_dla_base():
hparams.encoder_layers = 6
hparams.decoder_layers = 6
hparams.normalize_before = True
hparams.optimizer_adam_beta2 = 0.997
hparams.attention_dropout = 0.1
hparams.relu_dropout = 0.1
hparams.learning_rate = 0.4
......@@ -450,20 +451,19 @@ def transformer_dla_base():
hparams.optimizer_multistep_accumulate_steps = 4
return hparams
@registry.register_hparams
def transformer_dla_big():
"""HParams for transfomer big model on WMT."""
hparams = transformer_dla()
hparams.hidden_size = 1024
hparams.filter_size = 4096
hparams.num_heads = 16
hparams.batching_mantissa_bits = 2
hparams.residual_dropout = 0.3
return hparams
@registry.register_hparams
def transformer_dla_base25_shared():
hparams = transformer_dla_base()
hparams.shared_decoder_input_and_softmax_weights = int(True)
hparams.encoder_layers = 25
return hparams
@registry.register_hparams
def transformer_dla_base_v2():
hparams = transformer_dla_base()
hparams.learning_rate = 0.4 * (2**0.5)
hparams.learning_rate_warmup_steps = 16000
return hparams
\ No newline at end of file
Markdown 格式
0%
您添加了 0 到此讨论。请谨慎行事。
请先完成此评论的编辑!
注册 或者 后发表评论