Commit 42676b59 by libei

fix bugs in transformer_dla

parent a440c641
......@@ -3,6 +3,8 @@
<component name="ChangeListManager">
<list default="true" id="7d6d9926-f879-4708-ad8e-442bac96b62a" name="Default" comment="">
<change beforePath="$PROJECT_DIR$/.idea/workspace.xml" afterPath="$PROJECT_DIR$/.idea/workspace.xml" />
<change beforePath="$PROJECT_DIR$/tensor2tensor/models/__init__.py" afterPath="$PROJECT_DIR$/tensor2tensor/models/__init__.py" />
<change beforePath="$PROJECT_DIR$/tensor2tensor/models/models.py" afterPath="$PROJECT_DIR$/tensor2tensor/models/models.py" />
<change beforePath="$PROJECT_DIR$/tensor2tensor/models/transformer_dla.py" afterPath="$PROJECT_DIR$/tensor2tensor/models/transformer_dla.py" />
</list>
<option name="EXCLUDED_CONVERTED_TO_IGNORED" value="true" />
......@@ -17,46 +19,38 @@
<file leaf-file-name="transformer.py" pinned="false" current-in-tab="false">
<entry file="file://$PROJECT_DIR$/tensor2tensor/models/transformer.py">
<provider selected="true" editor-type-id="text-editor">
<state relative-caret-position="352">
<caret line="294" column="0" lean-forward="false" selection-start-line="294" selection-start-column="0" selection-end-line="294" selection-end-column="0" />
<state relative-caret-position="162">
<caret line="40" column="12" lean-forward="true" selection-start-line="40" selection-start-column="12" selection-end-line="40" selection-end-column="12" />
<folding />
</state>
</provider>
</entry>
</file>
<file leaf-file-name="common_hparams.py" pinned="false" current-in-tab="false">
<entry file="file://$PROJECT_DIR$/tensor2tensor/models/common_hparams.py">
<file leaf-file-name="__init__.py" pinned="false" current-in-tab="false">
<entry file="file://$PROJECT_DIR$/tensor2tensor/models/__init__.py">
<provider selected="true" editor-type-id="text-editor">
<state relative-caret-position="540">
<caret line="30" column="11" lean-forward="false" selection-start-line="30" selection-start-column="11" selection-end-line="30" selection-end-column="11" />
<state relative-caret-position="351">
<caret line="13" column="0" lean-forward="true" selection-start-line="13" selection-start-column="0" selection-end-line="13" selection-end-column="0" />
<folding />
</state>
</provider>
</entry>
</file>
<file leaf-file-name="trainer_utils.py" pinned="false" current-in-tab="false">
<entry file="file://$PROJECT_DIR$/tensor2tensor/utils/trainer_utils.py">
<file leaf-file-name="models.py" pinned="false" current-in-tab="false">
<entry file="file://$PROJECT_DIR$/tensor2tensor/models/models.py">
<provider selected="true" editor-type-id="text-editor">
<state relative-caret-position="243">
<caret line="197" column="42" lean-forward="false" selection-start-line="197" selection-start-column="42" selection-end-line="197" selection-end-column="42" />
<folding>
<element signature="e#18286#18629#1" expanded="false" />
<element signature="e#18684#18904#0" expanded="false" />
<element signature="e#18909#18935#0" expanded="false" />
<element signature="e#19415#19927#0" expanded="false" />
<element signature="e#20145#22476#0" expanded="false" />
<element signature="e#22668#23415#0" expanded="false" />
<element signature="e#23535#23889#0" expanded="false" />
</folding>
<state relative-caret-position="537">
<caret line="41" column="40" lean-forward="false" selection-start-line="41" selection-start-column="40" selection-end-line="41" selection-end-column="40" />
<folding />
</state>
</provider>
</entry>
</file>
<file leaf-file-name="common_layers.py" pinned="false" current-in-tab="false">
<entry file="file://$PROJECT_DIR$/tensor2tensor/models/common_layers.py">
<file leaf-file-name="__init__.py" pinned="false" current-in-tab="false">
<entry file="file://$PROJECT_DIR$/tensor2tensor/utils/__init__.py">
<provider selected="true" editor-type-id="text-editor">
<state relative-caret-position="298">
<caret line="428" column="9" lean-forward="false" selection-start-line="428" selection-start-column="9" selection-end-line="428" selection-end-column="9" />
<state relative-caret-position="378">
<caret line="14" column="0" lean-forward="true" selection-start-line="14" selection-start-column="0" selection-end-line="14" selection-end-column="0" />
<folding />
</state>
</provider>
......@@ -65,8 +59,8 @@
<file leaf-file-name="transformer_dla.py" pinned="false" current-in-tab="true">
<entry file="file://$PROJECT_DIR$/tensor2tensor/models/transformer_dla.py">
<provider selected="true" editor-type-id="text-editor">
<state relative-caret-position="379">
<caret line="239" column="5" lean-forward="true" selection-start-line="239" selection-start-column="5" selection-end-line="239" selection-end-column="5" />
<state relative-caret-position="1323">
<caret line="49" column="50" lean-forward="true" selection-start-line="49" selection-start-column="50" selection-end-line="49" selection-end-column="50" />
<folding>
<element signature="e#738#776#0" expanded="true" />
</folding>
......@@ -74,13 +68,55 @@
</provider>
</entry>
</file>
<file leaf-file-name="layer_history.py" pinned="false" current-in-tab="false">
<entry file="file://$PROJECT_DIR$/tensor2tensor/models/layer_history.py">
<file leaf-file-name="transformer_alternative.py" pinned="false" current-in-tab="false">
<entry file="file://$PROJECT_DIR$/tensor2tensor/models/transformer_alternative.py">
<provider selected="true" editor-type-id="text-editor">
<state relative-caret-position="378">
<caret line="16" column="0" lean-forward="false" selection-start-line="16" selection-start-column="0" selection-end-line="16" selection-end-column="0" />
<state relative-caret-position="302">
<caret line="167" column="4" lean-forward="false" selection-start-line="167" selection-start-column="4" selection-end-line="167" selection-end-column="4" />
<folding />
</state>
</provider>
</entry>
</file>
<file leaf-file-name="transformer_mlrf.py" pinned="false" current-in-tab="false">
<entry file="file://$PROJECT_DIR$/tensor2tensor/models/transformer_mlrf.py">
<provider selected="true" editor-type-id="text-editor">
<state relative-caret-position="493">
<caret line="39" column="21" lean-forward="true" selection-start-line="39" selection-start-column="21" selection-end-line="39" selection-end-column="21" />
<folding />
</state>
</provider>
</entry>
</file>
<file leaf-file-name="transformer_dropout.py" pinned="false" current-in-tab="false">
<entry file="file://$PROJECT_DIR$/tensor2tensor/models/transformer_dropout.py">
<provider selected="true" editor-type-id="text-editor">
<state relative-caret-position="-2834">
<caret line="0" column="0" lean-forward="false" selection-start-line="0" selection-start-column="0" selection-end-line="0" selection-end-column="0" />
<folding />
</state>
</provider>
</entry>
</file>
<file leaf-file-name="xception.py" pinned="false" current-in-tab="false">
<entry file="file://$PROJECT_DIR$/tensor2tensor/models/xception.py">
<provider selected="true" editor-type-id="text-editor">
<state relative-caret-position="-972">
<caret line="0" column="0" lean-forward="false" selection-start-line="0" selection-start-column="0" selection-end-line="0" selection-end-column="0" />
<folding>
<element signature="e#608#646#0" expanded="true" />
</folding>
</state>
</provider>
</entry>
</file>
<file leaf-file-name="attention_lm.py" pinned="false" current-in-tab="false">
<entry file="file://$PROJECT_DIR$/tensor2tensor/models/attention_lm.py">
<provider selected="true" editor-type-id="text-editor">
<state relative-caret-position="567">
<caret line="45" column="27" lean-forward="true" selection-start-line="45" selection-start-column="27" selection-end-line="45" selection-end-column="27" />
<folding>
<element signature="e#0#23#0" expanded="true" />
<element signature="e#719#757#0" expanded="true" />
</folding>
</state>
</provider>
......@@ -97,8 +133,18 @@
</component>
<component name="FindInProjectRecents">
<findStrings>
<find>modalitie</find>
<find>share</find>
<find>random_seed</find>
<find>AttentionLM</find>
<find>convert</find>
<find>assertEqual</find>
<find>transformer_alt</find>
<find>registry</find>
</findStrings>
<dirStrings>
<dir>C:\Users\LiBei\Desktop\WMT19</dir>
</dirStrings>
</component>
<component name="Git.Settings">
<option name="RECENT_GIT_ROOT_PATH" value="$PROJECT_DIR$" />
......@@ -113,6 +159,9 @@
<option value="$PROJECT_DIR$/tensor2tensor/models/common_hparams.py" />
<option value="$PROJECT_DIR$/tensor2tensor/models/transformer.py" />
<option value="$PROJECT_DIR$/tensor2tensor/models/transformer_dla.py" />
<option value="$PROJECT_DIR$/tensor2tensor/models/transformer_libei.py" />
<option value="$PROJECT_DIR$/tensor2tensor/models/__init__.py" />
<option value="$PROJECT_DIR$/tensor2tensor/models/models.py" />
</list>
</option>
</component>
......@@ -231,6 +280,7 @@
<window_info id="Project" active="false" anchor="left" auto_hide="false" internal_type="DOCKED" type="DOCKED" visible="true" show_stripe_button="true" weight="0.16044776" sideWeight="0.5" order="1" side_tool="false" content_ui="combo" />
<window_info id="Docker" active="false" anchor="bottom" auto_hide="false" internal_type="DOCKED" type="DOCKED" visible="false" show_stripe_button="false" weight="0.33" sideWeight="0.5" order="4" side_tool="false" content_ui="tabs" />
<window_info id="Database" active="false" anchor="right" auto_hide="false" internal_type="DOCKED" type="DOCKED" visible="false" show_stripe_button="true" weight="0.33" sideWeight="0.5" order="1" side_tool="false" content_ui="tabs" />
<window_info id="Find" active="false" anchor="bottom" auto_hide="false" internal_type="DOCKED" type="DOCKED" visible="false" show_stripe_button="true" weight="0.33" sideWeight="0.5" order="6" side_tool="false" content_ui="tabs" />
<window_info id="SciView" active="false" anchor="right" auto_hide="false" internal_type="DOCKED" type="DOCKED" visible="false" show_stripe_button="true" weight="0.33" sideWeight="0.5" order="0" side_tool="false" content_ui="tabs" />
<window_info id="Structure" active="false" anchor="left" auto_hide="false" internal_type="DOCKED" type="DOCKED" visible="false" show_stripe_button="true" weight="0.25" sideWeight="0.5" order="2" side_tool="false" content_ui="tabs" />
<window_info id="Favorites" active="false" anchor="left" auto_hide="false" internal_type="DOCKED" type="DOCKED" visible="false" show_stripe_button="true" weight="0.33" sideWeight="0.5" order="0" side_tool="true" content_ui="tabs" />
......@@ -240,7 +290,6 @@
<window_info id="Commander" active="false" anchor="right" auto_hide="false" internal_type="DOCKED" type="DOCKED" visible="false" show_stripe_button="true" weight="0.4" sideWeight="0.5" order="2" side_tool="false" content_ui="tabs" />
<window_info id="Inspection" active="false" anchor="bottom" auto_hide="false" internal_type="DOCKED" type="DOCKED" visible="false" show_stripe_button="true" weight="0.4" sideWeight="0.5" order="10" side_tool="false" content_ui="tabs" />
<window_info id="Hierarchy" active="false" anchor="right" auto_hide="false" internal_type="DOCKED" type="DOCKED" visible="false" show_stripe_button="true" weight="0.25" sideWeight="0.5" order="4" side_tool="false" content_ui="combo" />
<window_info id="Find" active="false" anchor="bottom" auto_hide="false" internal_type="DOCKED" type="DOCKED" visible="false" show_stripe_button="true" weight="0.33" sideWeight="0.5" order="6" side_tool="false" content_ui="tabs" />
<window_info id="Ant Build" active="false" anchor="right" auto_hide="false" internal_type="DOCKED" type="DOCKED" visible="false" show_stripe_button="true" weight="0.25" sideWeight="0.5" order="3" side_tool="false" content_ui="tabs" />
</layout>
</component>
......@@ -286,7 +335,7 @@
<state relative-caret-position="432">
<caret line="16" column="0" lean-forward="true" selection-start-line="16" selection-start-column="0" selection-end-line="16" selection-end-column="0" />
<folding>
<element signature="e#0#23#0" expanded="true" />
<element signature="e#0#23#0" expanded="false" />
</folding>
</state>
</provider>
......@@ -329,101 +378,216 @@
</state>
</provider>
</entry>
<entry file="file://$PROJECT_DIR$/tensor2tensor/models/common_attention.py">
<entry file="file://$PROJECT_DIR$/tensor2tensor/utils/multistep_optimizer.py">
<provider selected="true" editor-type-id="text-editor">
<state relative-caret-position="640">
<caret line="472" column="21" lean-forward="true" selection-start-line="472" selection-start-column="21" selection-end-line="472" selection-end-column="21" />
<state relative-caret-position="459">
<caret line="33" column="44" lean-forward="true" selection-start-line="33" selection-start-column="44" selection-end-line="33" selection-end-column="44" />
<folding />
</state>
</provider>
</entry>
<entry file="file://$PROJECT_DIR$/tensor2tensor/models/transformer_mlrf.py">
<entry file="file://$PROJECT_DIR$/tensor2tensor/models/libei.py" />
<entry file="file://$PROJECT_DIR$/tensor2tensor/models/common_hparams.py">
<provider selected="true" editor-type-id="text-editor">
<state relative-caret-position="-162">
<caret line="0" column="0" lean-forward="false" selection-start-line="0" selection-start-column="0" selection-end-line="0" selection-end-column="0" />
<state relative-caret-position="540">
<caret line="30" column="11" lean-forward="false" selection-start-line="30" selection-start-column="11" selection-end-line="30" selection-end-column="11" />
<folding />
</state>
</provider>
</entry>
<entry file="file://$PROJECT_DIR$/tensor2tensor/models/transformer_dropout.py">
<entry file="file://$PROJECT_DIR$/tensor2tensor/models/layer_history.py">
<provider selected="true" editor-type-id="text-editor">
<state relative-caret-position="0">
<caret line="0" column="0" lean-forward="false" selection-start-line="0" selection-start-column="0" selection-end-line="0" selection-end-column="0" />
<state relative-caret-position="378">
<caret line="16" column="0" lean-forward="false" selection-start-line="16" selection-start-column="0" selection-end-line="16" selection-end-column="0" />
<folding>
<element signature="e#0#23#0" expanded="false" />
</folding>
</state>
</provider>
</entry>
<entry file="file://$PROJECT_DIR$/tensor2tensor/models/common_layers.py">
<provider selected="true" editor-type-id="text-editor">
<state relative-caret-position="298">
<caret line="428" column="9" lean-forward="false" selection-start-line="428" selection-start-column="9" selection-end-line="428" selection-end-column="9" />
<folding />
</state>
</provider>
</entry>
<entry file="file://$PROJECT_DIR$/tensor2tensor/models/transformer_alternative.py">
<entry file="file://$PROJECT_DIR$/tensor2tensor/models/common_attention.py">
<provider selected="true" editor-type-id="text-editor">
<state relative-caret-position="-594">
<caret line="40" column="39" lean-forward="false" selection-start-line="40" selection-start-column="31" selection-end-line="40" selection-end-column="39" />
<state relative-caret-position="-162">
<caret line="781" column="33" lean-forward="true" selection-start-line="781" selection-start-column="33" selection-end-line="782" selection-end-column="28" />
<folding />
</state>
</provider>
</entry>
<entry file="file://$PROJECT_DIR$/tensor2tensor/utils/multistep_optimizer.py">
<entry file="file://$PROJECT_DIR$/tensor2tensor/models/transformer_relative_pos.py">
<provider selected="true" editor-type-id="text-editor">
<state relative-caret-position="459">
<caret line="33" column="44" lean-forward="true" selection-start-line="33" selection-start-column="44" selection-end-line="33" selection-end-column="44" />
<state relative-caret-position="378">
<caret line="210" column="42" lean-forward="false" selection-start-line="210" selection-start-column="42" selection-end-line="210" selection-end-column="42" />
<folding />
</state>
</provider>
</entry>
<entry file="file://$PROJECT_DIR$/tensor2tensor/models/libei.py" />
<entry file="file://$PROJECT_DIR$/tensor2tensor/models/common_hparams.py">
<entry file="file://$PROJECT_DIR$/tensor2tensor/utils/modality.py">
<provider selected="true" editor-type-id="text-editor">
<state relative-caret-position="540">
<caret line="30" column="11" lean-forward="false" selection-start-line="30" selection-start-column="11" selection-end-line="30" selection-end-column="11" />
<state relative-caret-position="207">
<caret line="47" column="38" lean-forward="false" selection-start-line="47" selection-start-column="38" selection-end-line="47" selection-end-column="38" />
<folding />
</state>
</provider>
</entry>
<entry file="file://$PROJECT_DIR$/tensor2tensor/models/layer_history.py">
<entry file="file://$PROJECT_DIR$/tensor2tensor/models/transformer_test.py">
<provider selected="true" editor-type-id="text-editor">
<state relative-caret-position="378">
<caret line="16" column="0" lean-forward="false" selection-start-line="16" selection-start-column="0" selection-end-line="16" selection-end-column="0" />
<folding>
<element signature="e#0#23#0" expanded="true" />
</folding>
<state relative-caret-position="356">
<caret line="58" column="38" lean-forward="false" selection-start-line="58" selection-start-column="38" selection-end-line="58" selection-end-column="38" />
<folding />
</state>
</provider>
</entry>
<entry file="file://$PROJECT_DIR$/tensor2tensor/utils/trainer_utils.py">
<provider selected="true" editor-type-id="text-editor">
<state relative-caret-position="243">
<caret line="197" column="42" lean-forward="false" selection-start-line="197" selection-start-column="42" selection-end-line="197" selection-end-column="42" />
<state relative-caret-position="384">
<caret line="482" column="32" lean-forward="false" selection-start-line="482" selection-start-column="32" selection-end-line="482" selection-end-column="32" />
<folding>
<element signature="e#18286#18629#1" expanded="false" />
<element signature="e#18684#18904#0" expanded="false" />
<element signature="e#18909#18935#0" expanded="false" />
<element signature="e#19415#19927#0" expanded="false" />
<element signature="e#20145#22476#0" expanded="false" />
<element signature="e#22668#23415#0" expanded="false" />
<element signature="e#23535#23889#0" expanded="false" />
</folding>
</state>
</provider>
</entry>
<entry file="file://$PROJECT_DIR$/tensor2tensor/models/common_layers.py">
<entry file="file://$PROJECT_DIR$/tensor2tensor/models/lstm.py">
<provider selected="true" editor-type-id="text-editor">
<state relative-caret-position="298">
<caret line="428" column="9" lean-forward="false" selection-start-line="428" selection-start-column="9" selection-end-line="428" selection-end-column="9" />
<state relative-caret-position="-324">
<caret line="0" column="0" lean-forward="false" selection-start-line="0" selection-start-column="0" selection-end-line="0" selection-end-column="0" />
<folding />
</state>
</provider>
</entry>
<entry file="file://$PROJECT_DIR$/tensor2tensor/models/attention_lm_moe.py">
<provider selected="true" editor-type-id="text-editor">
<state relative-caret-position="-386">
<caret line="21" column="0" lean-forward="true" selection-start-line="21" selection-start-column="0" selection-end-line="21" selection-end-column="0" />
<folding />
</state>
</provider>
</entry>
<entry file="file://$PROJECT_DIR$/tensor2tensor/utils/registry_test.py">
<provider selected="true" editor-type-id="text-editor">
<state relative-caret-position="-328">
<caret line="95" column="60" lean-forward="false" selection-start-line="95" selection-start-column="60" selection-end-line="95" selection-end-column="60" />
<folding />
</state>
</provider>
</entry>
<entry file="file://$PROJECT_DIR$/tensor2tensor/utils/t2t_model.py">
<provider selected="true" editor-type-id="text-editor">
<state relative-caret-position="207">
<caret line="125" column="31" lean-forward="false" selection-start-line="125" selection-start-column="31" selection-end-line="125" selection-end-column="31" />
<folding />
</state>
</provider>
</entry>
<entry file="file://$PROJECT_DIR$/tensor2tensor/utils/registry.py">
<provider selected="true" editor-type-id="text-editor">
<state relative-caret-position="-2425">
<caret line="298" column="24" lean-forward="false" selection-start-line="298" selection-start-column="24" selection-end-line="298" selection-end-column="24" />
<folding />
</state>
</provider>
</entry>
<entry file="file://$PROJECT_DIR$/tensor2tensor/models/transformer_alternative.py">
<provider selected="true" editor-type-id="text-editor">
<state relative-caret-position="302">
<caret line="167" column="4" lean-forward="false" selection-start-line="167" selection-start-column="4" selection-end-line="167" selection-end-column="4" />
<folding />
</state>
</provider>
</entry>
<entry file="file://$PROJECT_DIR$/tensor2tensor/models/transformer.py">
<provider selected="true" editor-type-id="text-editor">
<state relative-caret-position="352">
<caret line="294" column="0" lean-forward="false" selection-start-line="294" selection-start-column="0" selection-end-line="294" selection-end-column="0" />
<state relative-caret-position="162">
<caret line="40" column="12" lean-forward="true" selection-start-line="40" selection-start-column="12" selection-end-line="40" selection-end-column="12" />
<folding />
</state>
</provider>
</entry>
<entry file="file://$PROJECT_DIR$/tensor2tensor/utils/__init__.py">
<provider selected="true" editor-type-id="text-editor">
<state relative-caret-position="378">
<caret line="14" column="0" lean-forward="true" selection-start-line="14" selection-start-column="0" selection-end-line="14" selection-end-column="0" />
<folding />
</state>
</provider>
</entry>
<entry file="file://$PROJECT_DIR$/tensor2tensor/models/attention_lm.py">
<provider selected="true" editor-type-id="text-editor">
<state relative-caret-position="567">
<caret line="45" column="27" lean-forward="true" selection-start-line="45" selection-start-column="27" selection-end-line="45" selection-end-column="27" />
<folding>
<element signature="e#719#757#0" expanded="true" />
</folding>
</state>
</provider>
</entry>
<entry file="file://$PROJECT_DIR$/tensor2tensor/models/transformer_mlrf.py">
<provider selected="true" editor-type-id="text-editor">
<state relative-caret-position="493">
<caret line="39" column="21" lean-forward="true" selection-start-line="39" selection-start-column="21" selection-end-line="39" selection-end-column="21" />
<folding />
</state>
</provider>
</entry>
<entry file="file://$PROJECT_DIR$/tensor2tensor/models/transformer_dropout.py">
<provider selected="true" editor-type-id="text-editor">
<state relative-caret-position="-2834">
<caret line="0" column="0" lean-forward="false" selection-start-line="0" selection-start-column="0" selection-end-line="0" selection-end-column="0" />
<folding />
</state>
</provider>
</entry>
<entry file="file://$PROJECT_DIR$/tensor2tensor/models/transformer_libei.py">
<provider selected="true" editor-type-id="text-editor">
<state relative-caret-position="0">
<caret line="0" column="0" lean-forward="false" selection-start-line="0" selection-start-column="0" selection-end-line="0" selection-end-column="0" />
<folding />
</state>
</provider>
</entry>
<entry file="file://$PROJECT_DIR$/tensor2tensor/models/xception.py">
<provider selected="true" editor-type-id="text-editor">
<state relative-caret-position="-972">
<caret line="0" column="0" lean-forward="false" selection-start-line="0" selection-start-column="0" selection-end-line="0" selection-end-column="0" />
<folding>
<element signature="e#608#646#0" expanded="true" />
</folding>
</state>
</provider>
</entry>
<entry file="file://$PROJECT_DIR$/tensor2tensor/models/__init__.py">
<provider selected="true" editor-type-id="text-editor">
<state relative-caret-position="351">
<caret line="13" column="0" lean-forward="true" selection-start-line="13" selection-start-column="0" selection-end-line="13" selection-end-column="0" />
<folding />
</state>
</provider>
</entry>
<entry file="file://$PROJECT_DIR$/tensor2tensor/models/models.py">
<provider selected="true" editor-type-id="text-editor">
<state relative-caret-position="537">
<caret line="41" column="40" lean-forward="false" selection-start-line="41" selection-start-column="40" selection-end-line="41" selection-end-column="40" />
<folding />
</state>
</provider>
</entry>
<entry file="file://$PROJECT_DIR$/tensor2tensor/models/transformer_dla.py">
<provider selected="true" editor-type-id="text-editor">
<state relative-caret-position="379">
<caret line="239" column="5" lean-forward="true" selection-start-line="239" selection-start-column="5" selection-end-line="239" selection-end-column="5" />
<state relative-caret-position="1323">
<caret line="49" column="50" lean-forward="true" selection-start-line="49" selection-start-column="50" selection-end-line="49" selection-end-column="50" />
<folding>
<element signature="e#738#776#0" expanded="true" />
</folding>
......
......@@ -11,4 +11,3 @@
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
......@@ -39,4 +39,5 @@ from tensor2tensor.models import transformer_mlrf
#from tensor2tensor.models import transformer_fix
from tensor2tensor.models import transformer_relative_pos
from tensor2tensor.models import transformer_dropout
from tensor2tensor.models import transformer_dla
# pylint: enable=unused-import
......@@ -33,6 +33,7 @@ from tensor2tensor.models import common_hparams
from tensor2tensor.models import common_layers
from tensor2tensor.utils import registry
from tensor2tensor.utils import t2t_model
from tensor2tensor.models import layer_history
import tensorflow as tf
......@@ -374,7 +375,7 @@ def transformer_ffn_layer(x, hparams):
@registry.register_hparams
def transformer_base():
def transformer_dla():
"""Set of hyperparameters."""
hparams = common_hparams.basic_params1()
hparams.hidden_size = 512
......@@ -427,326 +428,32 @@ def transformer_base():
hparams.add_hparam("relu_dropout_broadcast_dims", "1") # length
hparams.add_hparam("residual_dropout_broadcast_dims", "1") # length
hparams.add_hparam("normalize_before", False)
hparams.add_hparam("use_emb", True)
hparams.add_hparam("encoder_history_type", "learnable_dense")
hparams.add_hparam("decoder_history_type", "learnable_dense")
return hparams
@registry.register_hparams
def transformer_big():
"""HParams for transfomer big model on WMT."""
hparams = transformer_base()
hparams.hidden_size = 1024
hparams.filter_size = 4096
hparams.num_heads = 16
hparams.batching_mantissa_bits = 2
hparams.residual_dropout = 0.3
return hparams
@registry.register_hparams
def transformer_before():
"""HParams for transfomer big model on WMT."""
hparams = transformer_base()
def transformer_dla_base():
hparams = transformer_dla()
hparams.normalize_before = True
hparams.relu_dropout = 0.1
hparams.attention_dropout = 0.1
hparams.learning_rate = 0.2
hparams.learning_rate_warmup_steps = 8000
hparams.optimizer_adam_beta1 = 0.9
hparams.optimizer_adam_beta2 = 0.997
return hparams
@registry.register_hparams
def transformer_before_big():
"""HParams for transfomer big model on WMT."""
hparams = transformer_before()
hparams.hidden_size = 1024
hparams.filter_size = 4096
hparams.num_heads = 16
hparams.batching_mantissa_bits = 2
hparams.residual_dropout = 0.3
return hparams
@registry.register_hparams
def transformer_big_single_gpu():
"""HParams for transformer big model for single gpu."""
hparams = transformer_big()
hparams.residual_dropout = 0.1
hparams.learning_rate_warmup_steps = 16000
hparams.optimizer_adam_beta2 = 0.998
hparams.batching_mantissa_bits = 3
return hparams
@registry.register_hparams
def transformer_base_single_gpu():
"""HParams for transformer base model for single gpu."""
hparams = transformer_base()
hparams.batch_size = 8192
hparams.learning_rate_warmup_steps = 16000
hparams.batching_mantissa_bits = 2
return hparams
@registry.register_hparams
def transformer_big_dr1():
hparams = transformer_base()
hparams.hidden_size = 1024
hparams.filter_size = 4096
hparams.num_heads = 16
hparams.residual_dropout = 0.1
hparams.batching_mantissa_bits = 2
return hparams
@registry.register_hparams
def transformer_big_enfr():
hparams = transformer_big_dr1()
hparams.shared_embedding_and_softmax_weights = int(False)
hparams.filter_size = 8192
hparams.residual_dropout = 0.1
return hparams
@registry.register_hparams
def transformer_big_dr2():
hparams = transformer_big_dr1()
hparams.residual_dropout = 0.2
return hparams
@registry.register_hparams
def transformer_base_ldcd():
"""Set of hyperparameters."""
hparams = transformer_base()
hparams.learning_rate_decay_scheme = "ld&cd"
hparams.learning_rate_ldcd_epoch = 5
hparams.learning_rate_warmup_steps = 4000
return hparams
@registry.register_hparams
def transformer_base_ldcd_n10():
"""Set of hyperparameters."""
hparams = transformer_base()
hparams.learning_rate_decay_scheme = "ld&cd"
hparams.learning_rate_ldcd_epoch = 10
hparams.learning_rate_warmup_steps = 4000
return hparams
@registry.register_hparams
def transformer_base_ldcd_n2():
"""Set of hyperparameters."""
hparams = transformer_base()
hparams.learning_rate_decay_scheme = "ld&cd"
hparams.learning_rate_ldcd_epoch = 2
hparams.learning_rate_warmup_steps = 4000
return hparams
@registry.register_hparams
def transformer_base_ldcd_n1():
"""Set of hyperparameters."""
hparams = transformer_base()
hparams.learning_rate_decay_scheme = "ld&cd"
hparams.learning_rate_ldcd_epoch = 1
hparams.learning_rate_warmup_steps = 4000
return hparams
@registry.register_hparams
def transformer_base_amsgrad():
"""Set of hyperparameters."""
hparams = transformer_base()
hparams.optimizer = "AMSGrad"
return hparams
@registry.register_hparams
def transformer_base_amsgrad_v2():
"""Set of hyperparameters."""
hparams = transformer_base()
hparams.optimizer = "AMSGrad"
hparams.optimizer_adam_beta1 = 0.9
hparams.optimizer_adam_beta2 = 0.999
hparams.optimizer_adam_epsilon = 1e-8
return hparams
@registry.register_hparams
def transformer_base_amsgrad_v3():
"""Set of hyperparameters."""
hparams = transformer_base()
hparams.optimizer = "AMSGrad"
hparams.optimizer_adam_beta1 = 0.9
hparams.optimizer_adam_beta2 = 0.99
hparams.optimizer_adam_epsilon = 1e-8
return hparams
@registry.register_hparams
def transformer_base_amsgrad_v4():
"""Set of hyperparameters."""
hparams = transformer_base()
hparams.optimizer = "AMSGrad"
hparams.optimizer_adam_beta1 = 0.9
hparams.optimizer_adam_beta2 = 0.99
hparams.optimizer_adam_epsilon = 1e-9
return hparams
@registry.register_hparams
def transformer_base_ldrestart_n3():
"""Set of hyperparameters."""
hparams = transformer_base()
hparams.learning_rate_decay_scheme = "ld&restart"
hparams.learning_rate_ldrestart_epoch = 3
hparams.learning_rate_warmup_steps = 4000
return hparams
@registry.register_hparams
def transformer_base_powersign():
"""Set of hyperparameters."""
hparams = transformer_base()
hparams.optimizer = "PowerSign"
hparams.optimizer_powersign_beta = 0.9
hparams.optimizer_powersign_decay = ""
return hparams
@registry.register_hparams
def transformer_base_powersign_ld():
"""Set of hyperparameters."""
hparams = transformer_base()
hparams.optimizer = "PowerSign"
hparams.optimizer_powersign_beta = 0.9
hparams.optimizer_powersign_decay = "linear"
return hparams
@registry.register_hparams
def transformer_base_powersign_cd():
"""Set of hyperparameters."""
hparams = transformer_base()
hparams.optimizer = "PowerSign"
hparams.optimizer_powersign_beta = 0.9
hparams.optimizer_powersign_decay = "cosine"
hparams.optimizer_powersign_period = 1
return hparams
@registry.register_hparams
def transformer_base_powersign_rd():
"""Set of hyperparameters."""
hparams = transformer_base()
hparams.optimizer = "PowerSign"
hparams.optimizer_powersign_beta = 0.9
hparams.optimizer_powersign_decay = "restart"
hparams.optimizer_powersign_period = 1
return hparams
@registry.register_hparams
def transformer_base_powersign_rd_n10():
"""Set of hyperparameters."""
hparams = transformer_base()
hparams.optimizer = "PowerSign"
hparams.optimizer_powersign_beta = 0.9
hparams.optimizer_powersign_decay = "restart"
hparams.optimizer_powersign_period = 10
return hparams
@registry.register_hparams
def transformer_base_powersign_rd_n20():
"""Set of hyperparameters."""
hparams = transformer_base()
hparams.optimizer = "PowerSign"
hparams.optimizer_powersign_beta = 0.9
hparams.optimizer_powersign_decay = "restart"
hparams.optimizer_powersign_period = 20
return hparams
@registry.register_hparams
def transformer_base_swish1():
"""Set of hyperparameters."""
hparams = transformer_base()
hparams.ffn_layer = "conv_hidden_swish"
hparams.swish_dropout = 0.0
hparams.swish_beta = 1.0
hparams.swish_beta_is_trainable = False
hparams.learning_rate = 0.4
hparams.learning_rate_warmup_steps = 8000
hparams.batch_size = 2048
hparams.optimizer = "MultistepAdam"
hparams.optimizer_multistep_accumulate_steps = 4
return hparams
@registry.register_hparams
def transformer_base_swish_trainable():
"""Set of hyperparameters."""
hparams = transformer_base()
hparams.ffn_layer = "conv_hidden_swish"
hparams.swish_dropout = 0.0
#hparams.swish_beta = 1.0
hparams.swish_beta_is_trainable = True
return hparams
@registry.register_hparams
def transformer_big_adafactor():
def transformer_dla_big():
"""HParams for transfomer big model on WMT."""
hparams = transformer_base()
hparams = transformer_dla_base()
hparams.hidden_size = 1024
hparams.filter_size = 4096
hparams.num_heads = 16
hparams.batching_mantissa_bits = 2
hparams.residual_dropout = 0.3
hparams.optimizer = "Adafactor"
hparams.epsilon = 1e-8
hparams.learning_rate_warmup_steps = 16000
hparams.optimizer_adafactor_beta2 = 0.997
return hparams
@registry.register_hparams
def transformer_base_v2():
"""Set of hyperparameters.
set relu_dropout and attention_dropout as 0.1
"""
hparams = transformer_base()
hparams.attention_dropout = 0.1
hparams.relu_dropout = 0.1
return hparams
@registry.register_hparams
def transformer_base_rpr_dropout1():
hparams = transformer_base()
hparams.max_relative_length = 16
hparams.attention_type = "relative_dot_product"
hparams.relu_dropout = 0.1
hparams.attention_dropout = 0.1
return hparams
@registry.register_hparams
def transformer_base_v3():
"""Set of hyperparameters.
set filter as 4096
"""
hparams = transformer_base_v2()
hparams.filter_size = 4096
return hparams
@registry.register_hparams
def transformer_big_multistep2():
# new model use optimizer MultistepAdam
hparams = transformer_big()
hparams.optimizer = "MultistepAdam"
hparams.optimizer_multistep_accumulate_steps = 2
hparams.batch_size = 2048
#hparams.attention_dropout = 0.1
#hparams.relu_dropout = 0.1
return hparams
@registry.register_hparams
def transformer_big_adafactor_test():
# new model use optimizer MultistepAdam
hparams = transformer_big()
hparams.optimizer = "Adafactor"
hparams.learning_rate_warmup_steps = 8000
hparams.batch_size = 4096
hparams.optimizer_adafactor_beta2 = 0.999
return hparams
Markdown 格式
0%
您添加了 0 到此讨论。请谨慎行事。
请先完成此评论的编辑!
注册 或者 后发表评论