Commit fb9ee9e7 by libei

reset num_hidden_layers into encoder_layers and decoder_layers

set tf.random_seed to initial parameters
parent 6097530a
<?xml version="1.0" encoding="UTF-8"?>
<project version="4">
<component name="PublishConfigData" autoUpload="Always" serverName="39.104.93.174">
<serverData>
<paths name="39.104.93.174">
<serverdata>
<mappings>
<mapping deploy="/WMT19" local="$PROJECT_DIR$" web="/" />
</mappings>
</serverdata>
</paths>
</serverData>
<option name="myAutoUpload" value="ALWAYS" />
</component>
</project>
\ No newline at end of file
<?xml version="1.0" encoding="UTF-8"?>
<project version="4">
<component name="VcsDirectoryMappings">
<mapping directory="$PROJECT_DIR$" vcs="Git" />
</component>
</project>
\ No newline at end of file
<?xml version="1.0" encoding="UTF-8"?>
<project version="4">
<component name="WebServers">
<option name="servers">
<webServer id="660dec94-3859-4796-9d39-b7fec4547030" name="39.104.93.174" url="http://39.104.93.174">
<fileTransfer host="39.104.93.174" port="22" rootFolder="/media/libei" accessType="SFTP">
<advancedOptions>
<advancedOptions dataProtectionLevel="Private" />
</advancedOptions>
<option name="port" value="22" />
</fileTransfer>
</webServer>
</option>
</component>
</project>
\ No newline at end of file
......@@ -41,6 +41,8 @@ def basic_params1():
# [8, 10, 12, 14, 16, 20, 24 ... (max_length or batch_size)]
batching_mantissa_bits=1,
num_hidden_layers=4,
encoder_layers=4,
decoder_layers=4,
kernel_height=3,
kernel_width=1,
hidden_size=64,
......
......@@ -231,7 +231,7 @@ def transformer_decoder(decoder_input,
# Summaries don't work in multi-problem setting yet.
summaries = "problems" not in hparams.values() or len(hparams.problems) == 1
with tf.variable_scope(name):
for layer in xrange(hparams.num_hidden_layers):
for layer in xrange(hparams.decoder_layers):
with tf.variable_scope("layer_%d" % layer):
# self-attention network
residual = x
......@@ -361,7 +361,8 @@ def transformer_base():
hparams.learning_rate = 0.1
hparams.learning_rate_warmup_steps = 4000
hparams.initializer_gain = 1.0
hparams.num_hidden_layers = 6
hparams.encoder_layers = 6
hparams.decoder_layers = 6
hparams.initializer = "uniform_unit_scaling"
hparams.weight_decay = 0.0
hparams.optimizer_adam_beta1 = 0.9
......
......@@ -196,7 +196,7 @@ def create_experiment_components(hparams, output_dir, data_dir, model_name):
model_dir=output_dir,
gpu_memory_fraction=FLAGS.worker_gpu_memory_fraction,
session_config=session_config(),
#tf_random_seed=FLAGS.random_seed,
tf_random_seed=FLAGS.random_seed,
keep_checkpoint_max=FLAGS.keep_checkpoint_max,
save_checkpoints_secs=FLAGS.save_checkpoint_secs))
# Store the hparams in the estimator as well
......
Markdown 格式
0%
您添加了 0 到此讨论。请谨慎行事。
请先完成此评论的编辑!
注册 或者 后发表评论