Commit 81667eab by libei

remove no use file and import settings

parent 42676b59
......@@ -3,9 +3,8 @@
<component name="ChangeListManager">
<list default="true" id="7d6d9926-f879-4708-ad8e-442bac96b62a" name="Default" comment="">
<change beforePath="$PROJECT_DIR$/.idea/workspace.xml" afterPath="$PROJECT_DIR$/.idea/workspace.xml" />
<change beforePath="$PROJECT_DIR$/tensor2tensor/models/__init__.py" afterPath="$PROJECT_DIR$/tensor2tensor/models/__init__.py" />
<change beforePath="$PROJECT_DIR$/tensor2tensor/models/models.py" afterPath="$PROJECT_DIR$/tensor2tensor/models/models.py" />
<change beforePath="$PROJECT_DIR$/tensor2tensor/models/transformer_dla.py" afterPath="$PROJECT_DIR$/tensor2tensor/models/transformer_dla.py" />
<change beforePath="$PROJECT_DIR$/tensor2tensor/models/transformer_dropout.py" afterPath="" />
</list>
<option name="EXCLUDED_CONVERTED_TO_IGNORED" value="true" />
<option name="TRACKING_ENABLED" value="true" />
......@@ -26,102 +25,26 @@
</provider>
</entry>
</file>
<file leaf-file-name="__init__.py" pinned="false" current-in-tab="false">
<entry file="file://$PROJECT_DIR$/tensor2tensor/models/__init__.py">
<provider selected="true" editor-type-id="text-editor">
<state relative-caret-position="351">
<caret line="13" column="0" lean-forward="true" selection-start-line="13" selection-start-column="0" selection-end-line="13" selection-end-column="0" />
<folding />
</state>
</provider>
</entry>
</file>
<file leaf-file-name="models.py" pinned="false" current-in-tab="false">
<entry file="file://$PROJECT_DIR$/tensor2tensor/models/models.py">
<provider selected="true" editor-type-id="text-editor">
<state relative-caret-position="537">
<caret line="41" column="40" lean-forward="false" selection-start-line="41" selection-start-column="40" selection-end-line="41" selection-end-column="40" />
<folding />
</state>
</provider>
</entry>
</file>
<file leaf-file-name="__init__.py" pinned="false" current-in-tab="false">
<entry file="file://$PROJECT_DIR$/tensor2tensor/utils/__init__.py">
<provider selected="true" editor-type-id="text-editor">
<state relative-caret-position="378">
<caret line="14" column="0" lean-forward="true" selection-start-line="14" selection-start-column="0" selection-end-line="14" selection-end-column="0" />
<folding />
</state>
</provider>
</entry>
</file>
<file leaf-file-name="transformer_dla.py" pinned="false" current-in-tab="true">
<file leaf-file-name="transformer_dla.py" pinned="false" current-in-tab="false">
<entry file="file://$PROJECT_DIR$/tensor2tensor/models/transformer_dla.py">
<provider selected="true" editor-type-id="text-editor">
<state relative-caret-position="1323">
<caret line="49" column="50" lean-forward="true" selection-start-line="49" selection-start-column="50" selection-end-line="49" selection-end-column="50" />
<folding>
<element signature="e#738#776#0" expanded="true" />
</folding>
</state>
</provider>
</entry>
</file>
<file leaf-file-name="transformer_alternative.py" pinned="false" current-in-tab="false">
<entry file="file://$PROJECT_DIR$/tensor2tensor/models/transformer_alternative.py">
<provider selected="true" editor-type-id="text-editor">
<state relative-caret-position="302">
<caret line="167" column="4" lean-forward="false" selection-start-line="167" selection-start-column="4" selection-end-line="167" selection-end-column="4" />
<folding />
</state>
</provider>
</entry>
</file>
<file leaf-file-name="transformer_mlrf.py" pinned="false" current-in-tab="false">
<entry file="file://$PROJECT_DIR$/tensor2tensor/models/transformer_mlrf.py">
<provider selected="true" editor-type-id="text-editor">
<state relative-caret-position="493">
<caret line="39" column="21" lean-forward="true" selection-start-line="39" selection-start-column="21" selection-end-line="39" selection-end-column="21" />
<state relative-caret-position="216">
<caret line="35" column="46" lean-forward="true" selection-start-line="35" selection-start-column="46" selection-end-line="35" selection-end-column="46" />
<folding />
</state>
</provider>
</entry>
</file>
<file leaf-file-name="transformer_dropout.py" pinned="false" current-in-tab="false">
<entry file="file://$PROJECT_DIR$/tensor2tensor/models/transformer_dropout.py">
<file leaf-file-name="models.py" pinned="false" current-in-tab="true">
<entry file="file://$PROJECT_DIR$/tensor2tensor/models/models.py">
<provider selected="true" editor-type-id="text-editor">
<state relative-caret-position="-2834">
<caret line="0" column="0" lean-forward="false" selection-start-line="0" selection-start-column="0" selection-end-line="0" selection-end-column="0" />
<state relative-caret-position="408">
<caret line="37" column="46" lean-forward="false" selection-start-line="37" selection-start-column="46" selection-end-line="37" selection-end-column="46" />
<folding />
</state>
</provider>
</entry>
</file>
<file leaf-file-name="xception.py" pinned="false" current-in-tab="false">
<entry file="file://$PROJECT_DIR$/tensor2tensor/models/xception.py">
<provider selected="true" editor-type-id="text-editor">
<state relative-caret-position="-972">
<caret line="0" column="0" lean-forward="false" selection-start-line="0" selection-start-column="0" selection-end-line="0" selection-end-column="0" />
<folding>
<element signature="e#608#646#0" expanded="true" />
</folding>
</state>
</provider>
</entry>
</file>
<file leaf-file-name="attention_lm.py" pinned="false" current-in-tab="false">
<entry file="file://$PROJECT_DIR$/tensor2tensor/models/attention_lm.py">
<provider selected="true" editor-type-id="text-editor">
<state relative-caret-position="567">
<caret line="45" column="27" lean-forward="true" selection-start-line="45" selection-start-column="27" selection-end-line="45" selection-end-column="27" />
<folding>
<element signature="e#719#757#0" expanded="true" />
</folding>
</state>
</provider>
</entry>
</file>
</leaf>
</component>
<component name="FileTemplateManagerImpl">
......@@ -171,7 +94,7 @@
<detection-done>true</detection-done>
<sorting>DEFINITION_ORDER</sorting>
</component>
<component name="ProjectFrameBounds" extendedState="7">
<component name="ProjectFrameBounds" extendedState="6">
<option name="x" value="22" />
<option name="y" value="5" />
<option name="width" value="1909" />
......@@ -267,7 +190,7 @@
<servers />
</component>
<component name="ToolWindowManager">
<frame x="-8" y="-8" width="1936" height="1056" extended-state="7" />
<frame x="-8" y="-8" width="1936" height="1056" extended-state="6" />
<editor active="true" />
<layout>
<window_info id="TODO" active="false" anchor="bottom" auto_hide="false" internal_type="DOCKED" type="DOCKED" visible="false" show_stripe_button="true" weight="0.33" sideWeight="0.5" order="11" side_tool="false" content_ui="tabs" />
......@@ -324,9 +247,7 @@
<provider selected="true" editor-type-id="text-editor">
<state relative-caret-position="2484">
<caret line="92" column="43" lean-forward="false" selection-start-line="92" selection-start-column="43" selection-end-line="92" selection-end-column="43" />
<folding>
<element signature="e#738#776#0" expanded="true" />
</folding>
<folding />
</state>
</provider>
</entry>
......@@ -500,18 +421,18 @@
</state>
</provider>
</entry>
<entry file="file://$PROJECT_DIR$/tensor2tensor/models/transformer_alternative.py">
<entry file="file://$PROJECT_DIR$/tensor2tensor/models/transformer_mlrf.py">
<provider selected="true" editor-type-id="text-editor">
<state relative-caret-position="302">
<caret line="167" column="4" lean-forward="false" selection-start-line="167" selection-start-column="4" selection-end-line="167" selection-end-column="4" />
<state relative-caret-position="493">
<caret line="39" column="21" lean-forward="true" selection-start-line="39" selection-start-column="21" selection-end-line="39" selection-end-column="21" />
<folding />
</state>
</provider>
</entry>
<entry file="file://$PROJECT_DIR$/tensor2tensor/models/transformer.py">
<entry file="file://$PROJECT_DIR$/tensor2tensor/models/transformer_libei.py">
<provider selected="true" editor-type-id="text-editor">
<state relative-caret-position="162">
<caret line="40" column="12" lean-forward="true" selection-start-line="40" selection-start-column="12" selection-end-line="40" selection-end-column="12" />
<state relative-caret-position="0">
<caret line="0" column="0" lean-forward="false" selection-start-line="0" selection-start-column="0" selection-end-line="0" selection-end-column="0" />
<folding />
</state>
</provider>
......@@ -524,36 +445,26 @@
</state>
</provider>
</entry>
<entry file="file://$PROJECT_DIR$/tensor2tensor/models/attention_lm.py">
<provider selected="true" editor-type-id="text-editor">
<state relative-caret-position="567">
<caret line="45" column="27" lean-forward="true" selection-start-line="45" selection-start-column="27" selection-end-line="45" selection-end-column="27" />
<folding>
<element signature="e#719#757#0" expanded="true" />
</folding>
</state>
</provider>
</entry>
<entry file="file://$PROJECT_DIR$/tensor2tensor/models/transformer_mlrf.py">
<entry file="file://$PROJECT_DIR$/tensor2tensor/models/__init__.py">
<provider selected="true" editor-type-id="text-editor">
<state relative-caret-position="493">
<caret line="39" column="21" lean-forward="true" selection-start-line="39" selection-start-column="21" selection-end-line="39" selection-end-column="21" />
<state relative-caret-position="351">
<caret line="13" column="0" lean-forward="true" selection-start-line="13" selection-start-column="0" selection-end-line="13" selection-end-column="0" />
<folding />
</state>
</provider>
</entry>
<entry file="file://$PROJECT_DIR$/tensor2tensor/models/transformer_dropout.py">
<entry file="file://$PROJECT_DIR$/tensor2tensor/models/transformer.py">
<provider selected="true" editor-type-id="text-editor">
<state relative-caret-position="-2834">
<caret line="0" column="0" lean-forward="false" selection-start-line="0" selection-start-column="0" selection-end-line="0" selection-end-column="0" />
<state relative-caret-position="162">
<caret line="40" column="12" lean-forward="true" selection-start-line="40" selection-start-column="12" selection-end-line="40" selection-end-column="12" />
<folding />
</state>
</provider>
</entry>
<entry file="file://$PROJECT_DIR$/tensor2tensor/models/transformer_libei.py">
<entry file="file://$PROJECT_DIR$/tensor2tensor/models/transformer_alternative.py">
<provider selected="true" editor-type-id="text-editor">
<state relative-caret-position="0">
<caret line="0" column="0" lean-forward="false" selection-start-line="0" selection-start-column="0" selection-end-line="0" selection-end-column="0" />
<state relative-caret-position="302">
<caret line="167" column="4" lean-forward="false" selection-start-line="167" selection-start-column="4" selection-end-line="167" selection-end-column="4" />
<folding />
</state>
</provider>
......@@ -568,29 +479,37 @@
</state>
</provider>
</entry>
<entry file="file://$PROJECT_DIR$/tensor2tensor/models/__init__.py">
<entry file="file://$PROJECT_DIR$/tensor2tensor/models/attention_lm.py">
<provider selected="true" editor-type-id="text-editor">
<state relative-caret-position="351">
<caret line="13" column="0" lean-forward="true" selection-start-line="13" selection-start-column="0" selection-end-line="13" selection-end-column="0" />
<folding />
<state relative-caret-position="567">
<caret line="45" column="27" lean-forward="true" selection-start-line="45" selection-start-column="27" selection-end-line="45" selection-end-column="27" />
<folding>
<element signature="e#719#757#0" expanded="true" />
</folding>
</state>
</provider>
</entry>
<entry file="file://$PROJECT_DIR$/tensor2tensor/models/models.py">
<entry file="file://$PROJECT_DIR$/tensor2tensor/models/transformer_dropout.py">
<provider selected="true" editor-type-id="text-editor">
<state relative-caret-position="537">
<caret line="41" column="40" lean-forward="false" selection-start-line="41" selection-start-column="40" selection-end-line="41" selection-end-column="40" />
<state relative-caret-position="135">
<caret line="162" column="40" lean-forward="false" selection-start-line="162" selection-start-column="40" selection-end-line="162" selection-end-column="40" />
<folding />
</state>
</provider>
</entry>
<entry file="file://$PROJECT_DIR$/tensor2tensor/models/transformer_dla.py">
<provider selected="true" editor-type-id="text-editor">
<state relative-caret-position="1323">
<caret line="49" column="50" lean-forward="true" selection-start-line="49" selection-start-column="50" selection-end-line="49" selection-end-column="50" />
<folding>
<element signature="e#738#776#0" expanded="true" />
</folding>
<state relative-caret-position="216">
<caret line="35" column="46" lean-forward="true" selection-start-line="35" selection-start-column="46" selection-end-line="35" selection-end-column="46" />
<folding />
</state>
</provider>
</entry>
<entry file="file://$PROJECT_DIR$/tensor2tensor/models/models.py">
<provider selected="true" editor-type-id="text-editor">
<state relative-caret-position="408">
<caret line="37" column="46" lean-forward="false" selection-start-line="37" selection-start-column="46" selection-end-line="37" selection-end-column="46" />
<folding />
</state>
</provider>
</entry>
......
......@@ -36,8 +36,6 @@ from tensor2tensor.models import transformer
from tensor2tensor.models import transformer_alternative
from tensor2tensor.models import xception
from tensor2tensor.models import transformer_mlrf
#from tensor2tensor.models import transformer_fix
from tensor2tensor.models import transformer_relative_pos
from tensor2tensor.models import transformer_dropout
from tensor2tensor.models import transformer_dla
# pylint: enable=unused-import
# Copyright 2017 The Tensor2Tensor Authors.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
"""transformer (attention).
encoder: [Self-Attention, Feed-forward] x n
decoder: [Self-Attention, Source-Target-Attention, Feed-forward] x n
"""
from __future__ import absolute_import
from __future__ import division
from __future__ import print_function
import copy
# Dependency imports
from six.moves import xrange # pylint: disable=redefined-builtin
from tensor2tensor.models import common_attention
from tensor2tensor.models import common_hparams
from tensor2tensor.models import common_layers
from tensor2tensor.utils import registry
from tensor2tensor.utils import t2t_model
import tensorflow as tf
@registry.register_model
class TransformerDropout(t2t_model.T2TModel):
"""Attention net. See file docstring."""
def model_fn_body(self, features):
# Remove dropout if not training
hparams = copy.copy(self._hparams)
targets = features["targets"]
inputs = features.get("inputs")
target_space = features.get("target_space_id")
inputs = common_layers.flatten4d3d(inputs)
targets = common_layers.flatten4d3d(targets)
(encoder_input, encoder_attention_bias, _) = (transformer_prepare_encoder(
inputs, target_space, hparams))
(decoder_input, decoder_self_attention_bias) = transformer_prepare_decoder(
targets, hparams)
def residual_fn(x, y, dropout_broadcast_dims=None):
return common_layers.layer_norm(x + common_layers.dropout_with_broadcast_dims(
y, 1.0 - hparams.residual_dropout, broadcast_dims=dropout_broadcast_dims))
# encoder_input = tf.squeeze(encoder_input, 2)
# decoder_input = tf.squeeze(decoder_input, 2)
encoder_input = tf.nn.dropout(encoder_input, 1.0 - hparams.residual_dropout)
decoder_input = tf.nn.dropout(decoder_input, 1.0 - hparams.residual_dropout)
encoder_output = transformer_encoder(encoder_input, residual_fn,
encoder_attention_bias, hparams)
decoder_output = transformer_decoder(
decoder_input, encoder_output, residual_fn, decoder_self_attention_bias,
encoder_attention_bias, hparams)
decoder_output = tf.expand_dims(decoder_output, 2)
return decoder_output
def transformer_prepare_encoder(inputs, target_space, hparams):
"""Prepare one shard of the model for the encoder.
Args:
inputs: a Tensor.
target_space: a Tensor.
hparams: run hyperparameters
Returns:
encoder_input: a Tensor, bottom of encoder stack
encoder_self_attention_bias: a Tensor, containing large negative values
to implement masked attention and possibly baises for diagonal
alignments
encoder_padding: a Tensor
"""
# Flatten inputs.
ishape_static = inputs.shape.as_list()
encoder_input = inputs
encoder_padding = common_attention.embedding_to_padding(encoder_input)
encoder_self_attention_bias = common_attention.attention_bias_ignore_padding(
encoder_padding)
# Append target_space_id embedding to inputs.
emb_target_space = common_layers.embedding(
target_space, 32, ishape_static[-1], name="target_space_embedding")
emb_target_space = tf.reshape(emb_target_space, [1, 1, -1])
encoder_input += emb_target_space
if hparams.pos == "timing":
encoder_input = common_attention.add_timing_signal_1d(encoder_input)
return (encoder_input, encoder_self_attention_bias, encoder_padding)
def transformer_prepare_decoder(targets, hparams):
"""Prepare one shard of the model for the decoder.
Args:
targets: a Tensor.
hparams: run hyperparameters
Returns:
decoder_input: a Tensor, bottom of decoder stack
decoder_self_attention_bias: a Tensor, containing large negative values
to implement masked attention and possibly baises for diagonal alignments
"""
decoder_self_attention_bias = (
common_attention.attention_bias_lower_triangle(tf.shape(targets)[1]))
decoder_input = common_layers.shift_left_3d(targets)
if hparams.pos == "timing":
decoder_input = common_attention.add_timing_signal_1d(decoder_input)
return (decoder_input, decoder_self_attention_bias)
def transformer_encoder(encoder_input,
residual_fn,
encoder_self_attention_bias,
hparams,
name="encoder"):
"""A stack of transformer layers.
Args:
encoder_input: a Tensor
residual_fn: a function from (layer_input, layer_output) -> combined_output
encoder_self_attention_bias: bias Tensor for self-attention
(see common_attention.attention_bias())
hparams: hyperparameters for model
name: a string
Returns:
y: a Tensors
"""
x = encoder_input
residual_dropout_broadcast_dims = (
common_layers.comma_separated_string_to_integer_list(
getattr(hparams, "residual_dropout_broadcast_dims", ""))
)
attention_dropout_broadcast_dims = (
common_layers.comma_separated_string_to_integer_list(
getattr(hparams, "attention_dropout_broadcast_dims", "")))
# Summaries don't work in multi-problem setting yet.
summaries = "problems" not in hparams.values() or len(hparams.problems) == 1
with tf.variable_scope(name):
for layer in xrange(hparams.num_hidden_layers):
with tf.variable_scope("layer_%d" % layer):
x = residual_fn(
x,
common_attention.multihead_attention_broadcast_dropout(
x,
None,
encoder_self_attention_bias,
hparams.attention_key_channels or hparams.hidden_size,
hparams.attention_value_channels or hparams.hidden_size,
hparams.hidden_size,
hparams.num_heads,
hparams.attention_dropout,
attention_type=hparams.attention_type,
max_relative_length=hparams.max_relative_length,
dropout_broadcast_dims=attention_dropout_broadcast_dims,
summaries=False,
name="encoder_self_attention"),
dropout_broadcast_dims=residual_dropout_broadcast_dims)
x = residual_fn(x, transformer_ffn_layer(x, hparams),
dropout_broadcast_dims=residual_dropout_broadcast_dims)
return x
def transformer_decoder(decoder_input,
encoder_output,
residual_fn,
decoder_self_attention_bias,
encoder_decoder_attention_bias,
hparams,
name="decoder"):
"""A stack of transformer layers.
Args:
decoder_input: a Tensor
encoder_output: a Tensor
residual_fn: a function from (layer_input, layer_output) -> combined_output
decoder_self_attention_bias: bias Tensor for self-attention
(see common_attention.attention_bias())
encoder_decoder_attention_bias: bias Tensor for encoder-decoder attention
(see common_attention.attention_bias())
hparams: hyperparameters for model
name: a string
Returns:
y: a Tensors
"""
x = decoder_input
residual_dropout_broadcast_dims = (
common_layers.comma_separated_string_to_integer_list(
getattr(hparams, "residual_dropout_broadcast_dims", ""))
)
attention_dropout_broadcast_dims = (
common_layers.comma_separated_string_to_integer_list(
getattr(hparams, "attention_dropout_broadcast_dims", "")))
# Summaries don't work in multi-problem setting yet.
summaries = "problems" not in hparams.values() or len(hparams.problems) == 1
with tf.variable_scope(name):
for layer in xrange(hparams.num_hidden_layers):
with tf.variable_scope("layer_%d" % layer):
x = residual_fn(
x,
common_attention.multihead_attention_broadcast_dropout(
x,
None,
decoder_self_attention_bias,
hparams.attention_key_channels or hparams.hidden_size,
hparams.attention_value_channels or hparams.hidden_size,
hparams.hidden_size,
hparams.num_heads,
hparams.attention_dropout,
attention_type=hparams.attention_type,
max_relative_length=hparams.max_relative_length,
dropout_broadcast_dims=attention_dropout_broadcast_dims,
summaries=False,
name="decoder_self_attention"),
dropout_broadcast_dims=residual_dropout_broadcast_dims)
x = residual_fn(
x,
common_attention.multihead_attention_broadcast_dropout(
x,
encoder_output,
encoder_decoder_attention_bias,
hparams.attention_key_channels or hparams.hidden_size,
hparams.attention_value_channels or hparams.hidden_size,
hparams.hidden_size,
hparams.num_heads,
hparams.attention_dropout,
dropout_broadcast_dims=attention_dropout_broadcast_dims,
summaries=False,
name="encdec_attention"),
dropout_broadcast_dims=residual_dropout_broadcast_dims)
x = residual_fn(x, transformer_ffn_layer(x, hparams),
dropout_broadcast_dims=residual_dropout_broadcast_dims)
return x
def transformer_ffn_layer(x, hparams):
"""Feed-forward layer in the transformer.
Args:
x: a Tensor of shape [batch_size, length, hparams.hidden_size]
hparams: hyperparmeters for model
Returns:
a Tensor of shape [batch_size, length, hparams.hidden_size]
"""
# wq: although we name it 'relu_dropout_broadcast_dims', it also be used for swish
# todo: merge different activation functions as one
relu_dropout_broadcast_dims = (
common_layers.comma_separated_string_to_integer_list(
getattr(hparams, "relu_dropout_broadcast_dims", "")))
if hparams.ffn_layer == "conv_hidden_relu":
return common_layers.conv_hidden_relu(
x,
hparams.filter_size,
hparams.hidden_size,
dropout=hparams.relu_dropout,
dropout_broadcast_dims=relu_dropout_broadcast_dims,
summaries=False)
if hparams.ffn_layer == "conv_hidden_swish":
return common_layers.conv_hidden_swish(
x,
hparams.filter_size,
hparams.hidden_size,
dropout=hparams.swish_dropout,
beta_is_trainable=hparams.swish_beta_is_trainable,
beta=hparams.swish_beta,
dropout_broadcast_dims=relu_dropout_broadcast_dims,
summaries=False)
elif hparams.ffn_layer == "parameter_attention":
return common_attention.parameter_attention(
x,
hparams.parameter_attention_key_channels or hparams.hidden_size,
hparams.parameter_attention_value_channels or hparams.hidden_size,
hparams.hidden_size,
hparams.filter_size,
hparams.num_heads,
hparams.attention_dropout)
elif hparams.ffn_layer == "conv_hidden_relu_with_sepconv":
return common_layers.conv_hidden_relu(
x,
hparams.filter_size,
hparams.hidden_size,
kernel_size=(3, 1),
second_kernel_size=(31, 1),
padding="LEFT",
dropout=hparams.relu_dropout)
else:
assert hparams.ffn_layer == "none"
return x
@registry.register_hparams
def transformer_base_boradcast_dropout():
"""Set of hyperparameters."""
hparams = common_hparams.basic_params1()
hparams.hidden_size = 512
hparams.batch_size = 4096
hparams.max_length = 256
hparams.dropout = 0.0
hparams.clip_grad_norm = 0. # i.e. no gradient clipping
hparams.optimizer_adam_epsilon = 1e-9
hparams.learning_rate_decay_scheme = "noam"
hparams.learning_rate = 0.1
hparams.learning_rate_warmup_steps = 4000
hparams.initializer_gain = 1.0
hparams.num_hidden_layers = 6
hparams.initializer = "uniform_unit_scaling"
hparams.weight_decay = 0.0
hparams.optimizer_adam_beta1 = 0.9
hparams.optimizer_adam_beta2 = 0.98
hparams.num_sampled_classes = 0
hparams.label_smoothing = 0.1
hparams.shared_embedding_and_softmax_weights = int(True)
hparams.add_hparam("filter_size", 2048) # Add new ones like this.
# attention-related flags
hparams.add_hparam("num_heads", 8)
hparams.add_hparam("attention_key_channels", 0)
hparams.add_hparam("attention_value_channels", 0)
hparams.add_hparam("ffn_layer", "conv_hidden_relu")
hparams.add_hparam("parameter_attention_key_channels", 0)
hparams.add_hparam("parameter_attention_value_channels", 0)
# All hyperparameters ending in "dropout" are automatically set to 0.0
# when not in training mode.
hparams.add_hparam("attention_dropout", 0.0)
hparams.add_hparam("relu_dropout", 0.0)
hparams.add_hparam("residual_dropout", 0.1)
hparams.add_hparam("pos", "timing") # timing, none
hparams.add_hparam("nbr_decoder_problems", 1)
# default is "dot_product" attention, you can choose "relative_dot_product"
hparams.add_hparam("attention_type", "dot_product")
hparams.add_hparam("max_relative_length", 16)
# swish activation function
hparams.add_hparam("swish_beta_is_trainable", False)
hparams.add_hparam("swish_beta", 1.0)
# like "relu_dropout"
hparams.add_hparam("swish_dropout", 0.0)
# update dropout implement, save memory & speed up
hparams.add_hparam("attention_dropout_broadcast_dims", "0,1") # batch, heads
hparams.add_hparam("relu_dropout_broadcast_dims", "1") # length
hparams.add_hparam("residual_dropout_broadcast_dims", "1") # length
return hparams
@registry.register_hparams
def transformer_base_boradcast_dropout_rpr_dropout1():
hparams = transformer_base_boradcast_dropout()
hparams.max_relative_length = 16
hparams.attention_type = "relative_dot_product"
hparams.relu_dropout = 0.1
hparams.attention_dropout = 0.1
return hparams
@registry.register_hparams
def transformer_base_broadcast_dropout_dropout1():
hparams = transformer_base_boradcast_dropout()
hparams.relu_dropout = 0.1
hparams.attention_dropout = 0.1
return hparams
@registry.register_hparams
def transformer_base_broadcast_dropout_tiny():
hparams = transformer_base_boradcast_dropout()
hparams.relu_dropout = 0.1
hparams.attention_dropout = 0.1
hparams.num_hidden_layers = 3
hparams.fused_inner_hidden = 128
hparams.hidden_size = 64
hparams.filter_size = 128
hparams.batch_size = 128
return hparams
Markdown 格式
0%
您添加了 0 到此讨论。请谨慎行事。
请先完成此评论的编辑!
注册 或者 后发表评论