Skip to content
项目
群组
代码片段
帮助
当前项目
正在载入...
登录 / 注册
切换导航面板
W
WMT19-1.0.14
概览
Overview
Details
Activity
Cycle Analytics
版本库
Repository
Files
Commits
Branches
Tags
Contributors
Graph
Compare
Charts
问题
0
Issues
0
列表
Board
标记
里程碑
合并请求
0
Merge Requests
0
CI / CD
CI / CD
流水线
作业
日程表
图表
维基
Wiki
代码片段
Snippets
成员
Collapse sidebar
Close sidebar
活动
图像
聊天
创建新问题
作业
提交
Issue Boards
Open sidebar
Emmay
WMT19-1.0.14
Commits
42676b59
Commit
42676b59
authored
Feb 18, 2019
by
libei
Browse files
Options
Browse Files
Download
Email Patches
Plain Diff
fix bugs in transformer_dla
parent
a440c641
显示空白字符变更
内嵌
并排
正在显示
4 个修改的文件
包含
246 行增加
和
375 行删除
+246
-375
.idea/workspace.xml
+231
-67
tensor2tensor/models/__init__.py
+0
-1
tensor2tensor/models/models.py
+1
-0
tensor2tensor/models/transformer_dla.py
+14
-307
没有找到文件。
.idea/workspace.xml
查看文件 @
42676b59
...
@@ -3,6 +3,8 @@
...
@@ -3,6 +3,8 @@
<component
name=
"ChangeListManager"
>
<component
name=
"ChangeListManager"
>
<list
default=
"true"
id=
"7d6d9926-f879-4708-ad8e-442bac96b62a"
name=
"Default"
comment=
""
>
<list
default=
"true"
id=
"7d6d9926-f879-4708-ad8e-442bac96b62a"
name=
"Default"
comment=
""
>
<change
beforePath=
"$PROJECT_DIR$/.idea/workspace.xml"
afterPath=
"$PROJECT_DIR$/.idea/workspace.xml"
/>
<change
beforePath=
"$PROJECT_DIR$/.idea/workspace.xml"
afterPath=
"$PROJECT_DIR$/.idea/workspace.xml"
/>
<change
beforePath=
"$PROJECT_DIR$/tensor2tensor/models/__init__.py"
afterPath=
"$PROJECT_DIR$/tensor2tensor/models/__init__.py"
/>
<change
beforePath=
"$PROJECT_DIR$/tensor2tensor/models/models.py"
afterPath=
"$PROJECT_DIR$/tensor2tensor/models/models.py"
/>
<change
beforePath=
"$PROJECT_DIR$/tensor2tensor/models/transformer_dla.py"
afterPath=
"$PROJECT_DIR$/tensor2tensor/models/transformer_dla.py"
/>
<change
beforePath=
"$PROJECT_DIR$/tensor2tensor/models/transformer_dla.py"
afterPath=
"$PROJECT_DIR$/tensor2tensor/models/transformer_dla.py"
/>
</list>
</list>
<option
name=
"EXCLUDED_CONVERTED_TO_IGNORED"
value=
"true"
/>
<option
name=
"EXCLUDED_CONVERTED_TO_IGNORED"
value=
"true"
/>
...
@@ -17,46 +19,38 @@
...
@@ -17,46 +19,38 @@
<file
leaf-file-name=
"transformer.py"
pinned=
"false"
current-in-tab=
"false"
>
<file
leaf-file-name=
"transformer.py"
pinned=
"false"
current-in-tab=
"false"
>
<entry
file=
"file://$PROJECT_DIR$/tensor2tensor/models/transformer.py"
>
<entry
file=
"file://$PROJECT_DIR$/tensor2tensor/models/transformer.py"
>
<provider
selected=
"true"
editor-type-id=
"text-editor"
>
<provider
selected=
"true"
editor-type-id=
"text-editor"
>
<state
relative-caret-position=
"
35
2"
>
<state
relative-caret-position=
"
16
2"
>
<caret
line=
"
294"
column=
"0"
lean-forward=
"false"
selection-start-line=
"294"
selection-start-column=
"0"
selection-end-line=
"294"
selection-end-column=
"0
"
/>
<caret
line=
"
40"
column=
"12"
lean-forward=
"true"
selection-start-line=
"40"
selection-start-column=
"12"
selection-end-line=
"40"
selection-end-column=
"12
"
/>
<folding
/>
<folding
/>
</state>
</state>
</provider>
</provider>
</entry>
</entry>
</file>
</file>
<file
leaf-file-name=
"
common_hparams
.py"
pinned=
"false"
current-in-tab=
"false"
>
<file
leaf-file-name=
"
__init__
.py"
pinned=
"false"
current-in-tab=
"false"
>
<entry
file=
"file://$PROJECT_DIR$/tensor2tensor/models/
common_hparams
.py"
>
<entry
file=
"file://$PROJECT_DIR$/tensor2tensor/models/
__init__
.py"
>
<provider
selected=
"true"
editor-type-id=
"text-editor"
>
<provider
selected=
"true"
editor-type-id=
"text-editor"
>
<state
relative-caret-position=
"
540
"
>
<state
relative-caret-position=
"
351
"
>
<caret
line=
"
30"
column=
"11"
lean-forward=
"false"
selection-start-line=
"30"
selection-start-column=
"11"
selection-end-line=
"30"
selection-end-column=
"11
"
/>
<caret
line=
"
13"
column=
"0"
lean-forward=
"true"
selection-start-line=
"13"
selection-start-column=
"0"
selection-end-line=
"13"
selection-end-column=
"0
"
/>
<folding
/>
<folding
/>
</state>
</state>
</provider>
</provider>
</entry>
</entry>
</file>
</file>
<file
leaf-file-name=
"
trainer_uti
ls.py"
pinned=
"false"
current-in-tab=
"false"
>
<file
leaf-file-name=
"
mode
ls.py"
pinned=
"false"
current-in-tab=
"false"
>
<entry
file=
"file://$PROJECT_DIR$/tensor2tensor/
utils/trainer_uti
ls.py"
>
<entry
file=
"file://$PROJECT_DIR$/tensor2tensor/
models/mode
ls.py"
>
<provider
selected=
"true"
editor-type-id=
"text-editor"
>
<provider
selected=
"true"
editor-type-id=
"text-editor"
>
<state
relative-caret-position=
"243"
>
<state
relative-caret-position=
"537"
>
<caret
line=
"197"
column=
"42"
lean-forward=
"false"
selection-start-line=
"197"
selection-start-column=
"42"
selection-end-line=
"197"
selection-end-column=
"42"
/>
<caret
line=
"41"
column=
"40"
lean-forward=
"false"
selection-start-line=
"41"
selection-start-column=
"40"
selection-end-line=
"41"
selection-end-column=
"40"
/>
<folding>
<folding
/>
<element
signature=
"e#18286#18629#1"
expanded=
"false"
/>
<element
signature=
"e#18684#18904#0"
expanded=
"false"
/>
<element
signature=
"e#18909#18935#0"
expanded=
"false"
/>
<element
signature=
"e#19415#19927#0"
expanded=
"false"
/>
<element
signature=
"e#20145#22476#0"
expanded=
"false"
/>
<element
signature=
"e#22668#23415#0"
expanded=
"false"
/>
<element
signature=
"e#23535#23889#0"
expanded=
"false"
/>
</folding>
</state>
</state>
</provider>
</provider>
</entry>
</entry>
</file>
</file>
<file
leaf-file-name=
"
common_layers
.py"
pinned=
"false"
current-in-tab=
"false"
>
<file
leaf-file-name=
"
__init__
.py"
pinned=
"false"
current-in-tab=
"false"
>
<entry
file=
"file://$PROJECT_DIR$/tensor2tensor/
models/common_layers
.py"
>
<entry
file=
"file://$PROJECT_DIR$/tensor2tensor/
utils/__init__
.py"
>
<provider
selected=
"true"
editor-type-id=
"text-editor"
>
<provider
selected=
"true"
editor-type-id=
"text-editor"
>
<state
relative-caret-position=
"
29
8"
>
<state
relative-caret-position=
"
37
8"
>
<caret
line=
"
428"
column=
"9"
lean-forward=
"false"
selection-start-line=
"428"
selection-start-column=
"9"
selection-end-line=
"428"
selection-end-column=
"9
"
/>
<caret
line=
"
14"
column=
"0"
lean-forward=
"true"
selection-start-line=
"14"
selection-start-column=
"0"
selection-end-line=
"14"
selection-end-column=
"0
"
/>
<folding
/>
<folding
/>
</state>
</state>
</provider>
</provider>
...
@@ -65,8 +59,8 @@
...
@@ -65,8 +59,8 @@
<file
leaf-file-name=
"transformer_dla.py"
pinned=
"false"
current-in-tab=
"true"
>
<file
leaf-file-name=
"transformer_dla.py"
pinned=
"false"
current-in-tab=
"true"
>
<entry
file=
"file://$PROJECT_DIR$/tensor2tensor/models/transformer_dla.py"
>
<entry
file=
"file://$PROJECT_DIR$/tensor2tensor/models/transformer_dla.py"
>
<provider
selected=
"true"
editor-type-id=
"text-editor"
>
<provider
selected=
"true"
editor-type-id=
"text-editor"
>
<state
relative-caret-position=
"
379
"
>
<state
relative-caret-position=
"
1323
"
>
<caret
line=
"
239"
column=
"5"
lean-forward=
"true"
selection-start-line=
"239"
selection-start-column=
"5"
selection-end-line=
"239"
selection-end-column=
"5
"
/>
<caret
line=
"
49"
column=
"50"
lean-forward=
"true"
selection-start-line=
"49"
selection-start-column=
"50"
selection-end-line=
"49"
selection-end-column=
"50
"
/>
<folding>
<folding>
<element
signature=
"e#738#776#0"
expanded=
"true"
/>
<element
signature=
"e#738#776#0"
expanded=
"true"
/>
</folding>
</folding>
...
@@ -74,13 +68,55 @@
...
@@ -74,13 +68,55 @@
</provider>
</provider>
</entry>
</entry>
</file>
</file>
<file
leaf-file-name=
"
layer_history
.py"
pinned=
"false"
current-in-tab=
"false"
>
<file
leaf-file-name=
"
transformer_alternative
.py"
pinned=
"false"
current-in-tab=
"false"
>
<entry
file=
"file://$PROJECT_DIR$/tensor2tensor/models/
layer_history
.py"
>
<entry
file=
"file://$PROJECT_DIR$/tensor2tensor/models/
transformer_alternative
.py"
>
<provider
selected=
"true"
editor-type-id=
"text-editor"
>
<provider
selected=
"true"
editor-type-id=
"text-editor"
>
<state
relative-caret-position=
"378"
>
<state
relative-caret-position=
"302"
>
<caret
line=
"16"
column=
"0"
lean-forward=
"false"
selection-start-line=
"16"
selection-start-column=
"0"
selection-end-line=
"16"
selection-end-column=
"0"
/>
<caret
line=
"167"
column=
"4"
lean-forward=
"false"
selection-start-line=
"167"
selection-start-column=
"4"
selection-end-line=
"167"
selection-end-column=
"4"
/>
<folding
/>
</state>
</provider>
</entry>
</file>
<file
leaf-file-name=
"transformer_mlrf.py"
pinned=
"false"
current-in-tab=
"false"
>
<entry
file=
"file://$PROJECT_DIR$/tensor2tensor/models/transformer_mlrf.py"
>
<provider
selected=
"true"
editor-type-id=
"text-editor"
>
<state
relative-caret-position=
"493"
>
<caret
line=
"39"
column=
"21"
lean-forward=
"true"
selection-start-line=
"39"
selection-start-column=
"21"
selection-end-line=
"39"
selection-end-column=
"21"
/>
<folding
/>
</state>
</provider>
</entry>
</file>
<file
leaf-file-name=
"transformer_dropout.py"
pinned=
"false"
current-in-tab=
"false"
>
<entry
file=
"file://$PROJECT_DIR$/tensor2tensor/models/transformer_dropout.py"
>
<provider
selected=
"true"
editor-type-id=
"text-editor"
>
<state
relative-caret-position=
"-2834"
>
<caret
line=
"0"
column=
"0"
lean-forward=
"false"
selection-start-line=
"0"
selection-start-column=
"0"
selection-end-line=
"0"
selection-end-column=
"0"
/>
<folding
/>
</state>
</provider>
</entry>
</file>
<file
leaf-file-name=
"xception.py"
pinned=
"false"
current-in-tab=
"false"
>
<entry
file=
"file://$PROJECT_DIR$/tensor2tensor/models/xception.py"
>
<provider
selected=
"true"
editor-type-id=
"text-editor"
>
<state
relative-caret-position=
"-972"
>
<caret
line=
"0"
column=
"0"
lean-forward=
"false"
selection-start-line=
"0"
selection-start-column=
"0"
selection-end-line=
"0"
selection-end-column=
"0"
/>
<folding>
<element
signature=
"e#608#646#0"
expanded=
"true"
/>
</folding>
</state>
</provider>
</entry>
</file>
<file
leaf-file-name=
"attention_lm.py"
pinned=
"false"
current-in-tab=
"false"
>
<entry
file=
"file://$PROJECT_DIR$/tensor2tensor/models/attention_lm.py"
>
<provider
selected=
"true"
editor-type-id=
"text-editor"
>
<state
relative-caret-position=
"567"
>
<caret
line=
"45"
column=
"27"
lean-forward=
"true"
selection-start-line=
"45"
selection-start-column=
"27"
selection-end-line=
"45"
selection-end-column=
"27"
/>
<folding>
<folding>
<element
signature=
"e#
0#23
#0"
expanded=
"true"
/>
<element
signature=
"e#
719#757
#0"
expanded=
"true"
/>
</folding>
</folding>
</state>
</state>
</provider>
</provider>
...
@@ -97,8 +133,18 @@
...
@@ -97,8 +133,18 @@
</component>
</component>
<component
name=
"FindInProjectRecents"
>
<component
name=
"FindInProjectRecents"
>
<findStrings>
<findStrings>
<find>
modalitie
</find>
<find>
share
</find>
<find>
random_seed
</find>
<find>
random_seed
</find>
<find>
AttentionLM
</find>
<find>
convert
</find>
<find>
assertEqual
</find>
<find>
transformer_alt
</find>
<find>
registry
</find>
</findStrings>
</findStrings>
<dirStrings>
<dir>
C:\Users\LiBei\Desktop\WMT19
</dir>
</dirStrings>
</component>
</component>
<component
name=
"Git.Settings"
>
<component
name=
"Git.Settings"
>
<option
name=
"RECENT_GIT_ROOT_PATH"
value=
"$PROJECT_DIR$"
/>
<option
name=
"RECENT_GIT_ROOT_PATH"
value=
"$PROJECT_DIR$"
/>
...
@@ -113,6 +159,9 @@
...
@@ -113,6 +159,9 @@
<option
value=
"$PROJECT_DIR$/tensor2tensor/models/common_hparams.py"
/>
<option
value=
"$PROJECT_DIR$/tensor2tensor/models/common_hparams.py"
/>
<option
value=
"$PROJECT_DIR$/tensor2tensor/models/transformer.py"
/>
<option
value=
"$PROJECT_DIR$/tensor2tensor/models/transformer.py"
/>
<option
value=
"$PROJECT_DIR$/tensor2tensor/models/transformer_dla.py"
/>
<option
value=
"$PROJECT_DIR$/tensor2tensor/models/transformer_dla.py"
/>
<option
value=
"$PROJECT_DIR$/tensor2tensor/models/transformer_libei.py"
/>
<option
value=
"$PROJECT_DIR$/tensor2tensor/models/__init__.py"
/>
<option
value=
"$PROJECT_DIR$/tensor2tensor/models/models.py"
/>
</list>
</list>
</option>
</option>
</component>
</component>
...
@@ -231,6 +280,7 @@
...
@@ -231,6 +280,7 @@
<window_info
id=
"Project"
active=
"false"
anchor=
"left"
auto_hide=
"false"
internal_type=
"DOCKED"
type=
"DOCKED"
visible=
"true"
show_stripe_button=
"true"
weight=
"0.16044776"
sideWeight=
"0.5"
order=
"1"
side_tool=
"false"
content_ui=
"combo"
/>
<window_info
id=
"Project"
active=
"false"
anchor=
"left"
auto_hide=
"false"
internal_type=
"DOCKED"
type=
"DOCKED"
visible=
"true"
show_stripe_button=
"true"
weight=
"0.16044776"
sideWeight=
"0.5"
order=
"1"
side_tool=
"false"
content_ui=
"combo"
/>
<window_info
id=
"Docker"
active=
"false"
anchor=
"bottom"
auto_hide=
"false"
internal_type=
"DOCKED"
type=
"DOCKED"
visible=
"false"
show_stripe_button=
"false"
weight=
"0.33"
sideWeight=
"0.5"
order=
"4"
side_tool=
"false"
content_ui=
"tabs"
/>
<window_info
id=
"Docker"
active=
"false"
anchor=
"bottom"
auto_hide=
"false"
internal_type=
"DOCKED"
type=
"DOCKED"
visible=
"false"
show_stripe_button=
"false"
weight=
"0.33"
sideWeight=
"0.5"
order=
"4"
side_tool=
"false"
content_ui=
"tabs"
/>
<window_info
id=
"Database"
active=
"false"
anchor=
"right"
auto_hide=
"false"
internal_type=
"DOCKED"
type=
"DOCKED"
visible=
"false"
show_stripe_button=
"true"
weight=
"0.33"
sideWeight=
"0.5"
order=
"1"
side_tool=
"false"
content_ui=
"tabs"
/>
<window_info
id=
"Database"
active=
"false"
anchor=
"right"
auto_hide=
"false"
internal_type=
"DOCKED"
type=
"DOCKED"
visible=
"false"
show_stripe_button=
"true"
weight=
"0.33"
sideWeight=
"0.5"
order=
"1"
side_tool=
"false"
content_ui=
"tabs"
/>
<window_info
id=
"Find"
active=
"false"
anchor=
"bottom"
auto_hide=
"false"
internal_type=
"DOCKED"
type=
"DOCKED"
visible=
"false"
show_stripe_button=
"true"
weight=
"0.33"
sideWeight=
"0.5"
order=
"6"
side_tool=
"false"
content_ui=
"tabs"
/>
<window_info
id=
"SciView"
active=
"false"
anchor=
"right"
auto_hide=
"false"
internal_type=
"DOCKED"
type=
"DOCKED"
visible=
"false"
show_stripe_button=
"true"
weight=
"0.33"
sideWeight=
"0.5"
order=
"0"
side_tool=
"false"
content_ui=
"tabs"
/>
<window_info
id=
"SciView"
active=
"false"
anchor=
"right"
auto_hide=
"false"
internal_type=
"DOCKED"
type=
"DOCKED"
visible=
"false"
show_stripe_button=
"true"
weight=
"0.33"
sideWeight=
"0.5"
order=
"0"
side_tool=
"false"
content_ui=
"tabs"
/>
<window_info
id=
"Structure"
active=
"false"
anchor=
"left"
auto_hide=
"false"
internal_type=
"DOCKED"
type=
"DOCKED"
visible=
"false"
show_stripe_button=
"true"
weight=
"0.25"
sideWeight=
"0.5"
order=
"2"
side_tool=
"false"
content_ui=
"tabs"
/>
<window_info
id=
"Structure"
active=
"false"
anchor=
"left"
auto_hide=
"false"
internal_type=
"DOCKED"
type=
"DOCKED"
visible=
"false"
show_stripe_button=
"true"
weight=
"0.25"
sideWeight=
"0.5"
order=
"2"
side_tool=
"false"
content_ui=
"tabs"
/>
<window_info
id=
"Favorites"
active=
"false"
anchor=
"left"
auto_hide=
"false"
internal_type=
"DOCKED"
type=
"DOCKED"
visible=
"false"
show_stripe_button=
"true"
weight=
"0.33"
sideWeight=
"0.5"
order=
"0"
side_tool=
"true"
content_ui=
"tabs"
/>
<window_info
id=
"Favorites"
active=
"false"
anchor=
"left"
auto_hide=
"false"
internal_type=
"DOCKED"
type=
"DOCKED"
visible=
"false"
show_stripe_button=
"true"
weight=
"0.33"
sideWeight=
"0.5"
order=
"0"
side_tool=
"true"
content_ui=
"tabs"
/>
...
@@ -240,7 +290,6 @@
...
@@ -240,7 +290,6 @@
<window_info
id=
"Commander"
active=
"false"
anchor=
"right"
auto_hide=
"false"
internal_type=
"DOCKED"
type=
"DOCKED"
visible=
"false"
show_stripe_button=
"true"
weight=
"0.4"
sideWeight=
"0.5"
order=
"2"
side_tool=
"false"
content_ui=
"tabs"
/>
<window_info
id=
"Commander"
active=
"false"
anchor=
"right"
auto_hide=
"false"
internal_type=
"DOCKED"
type=
"DOCKED"
visible=
"false"
show_stripe_button=
"true"
weight=
"0.4"
sideWeight=
"0.5"
order=
"2"
side_tool=
"false"
content_ui=
"tabs"
/>
<window_info
id=
"Inspection"
active=
"false"
anchor=
"bottom"
auto_hide=
"false"
internal_type=
"DOCKED"
type=
"DOCKED"
visible=
"false"
show_stripe_button=
"true"
weight=
"0.4"
sideWeight=
"0.5"
order=
"10"
side_tool=
"false"
content_ui=
"tabs"
/>
<window_info
id=
"Inspection"
active=
"false"
anchor=
"bottom"
auto_hide=
"false"
internal_type=
"DOCKED"
type=
"DOCKED"
visible=
"false"
show_stripe_button=
"true"
weight=
"0.4"
sideWeight=
"0.5"
order=
"10"
side_tool=
"false"
content_ui=
"tabs"
/>
<window_info
id=
"Hierarchy"
active=
"false"
anchor=
"right"
auto_hide=
"false"
internal_type=
"DOCKED"
type=
"DOCKED"
visible=
"false"
show_stripe_button=
"true"
weight=
"0.25"
sideWeight=
"0.5"
order=
"4"
side_tool=
"false"
content_ui=
"combo"
/>
<window_info
id=
"Hierarchy"
active=
"false"
anchor=
"right"
auto_hide=
"false"
internal_type=
"DOCKED"
type=
"DOCKED"
visible=
"false"
show_stripe_button=
"true"
weight=
"0.25"
sideWeight=
"0.5"
order=
"4"
side_tool=
"false"
content_ui=
"combo"
/>
<window_info
id=
"Find"
active=
"false"
anchor=
"bottom"
auto_hide=
"false"
internal_type=
"DOCKED"
type=
"DOCKED"
visible=
"false"
show_stripe_button=
"true"
weight=
"0.33"
sideWeight=
"0.5"
order=
"6"
side_tool=
"false"
content_ui=
"tabs"
/>
<window_info
id=
"Ant Build"
active=
"false"
anchor=
"right"
auto_hide=
"false"
internal_type=
"DOCKED"
type=
"DOCKED"
visible=
"false"
show_stripe_button=
"true"
weight=
"0.25"
sideWeight=
"0.5"
order=
"3"
side_tool=
"false"
content_ui=
"tabs"
/>
<window_info
id=
"Ant Build"
active=
"false"
anchor=
"right"
auto_hide=
"false"
internal_type=
"DOCKED"
type=
"DOCKED"
visible=
"false"
show_stripe_button=
"true"
weight=
"0.25"
sideWeight=
"0.5"
order=
"3"
side_tool=
"false"
content_ui=
"tabs"
/>
</layout>
</layout>
</component>
</component>
...
@@ -286,7 +335,7 @@
...
@@ -286,7 +335,7 @@
<state
relative-caret-position=
"432"
>
<state
relative-caret-position=
"432"
>
<caret
line=
"16"
column=
"0"
lean-forward=
"true"
selection-start-line=
"16"
selection-start-column=
"0"
selection-end-line=
"16"
selection-end-column=
"0"
/>
<caret
line=
"16"
column=
"0"
lean-forward=
"true"
selection-start-line=
"16"
selection-start-column=
"0"
selection-end-line=
"16"
selection-end-column=
"0"
/>
<folding>
<folding>
<element
signature=
"e#0#23#0"
expanded=
"
tru
e"
/>
<element
signature=
"e#0#23#0"
expanded=
"
fals
e"
/>
</folding>
</folding>
</state>
</state>
</provider>
</provider>
...
@@ -329,101 +378,216 @@
...
@@ -329,101 +378,216 @@
</state>
</state>
</provider>
</provider>
</entry>
</entry>
<entry
file=
"file://$PROJECT_DIR$/tensor2tensor/
models/common_attention
.py"
>
<entry
file=
"file://$PROJECT_DIR$/tensor2tensor/
utils/multistep_optimizer
.py"
>
<provider
selected=
"true"
editor-type-id=
"text-editor"
>
<provider
selected=
"true"
editor-type-id=
"text-editor"
>
<state
relative-caret-position=
"
640
"
>
<state
relative-caret-position=
"
459
"
>
<caret
line=
"
472"
column=
"21"
lean-forward=
"true"
selection-start-line=
"472"
selection-start-column=
"21"
selection-end-line=
"472"
selection-end-column=
"21
"
/>
<caret
line=
"
33"
column=
"44"
lean-forward=
"true"
selection-start-line=
"33"
selection-start-column=
"44"
selection-end-line=
"33"
selection-end-column=
"44
"
/>
<folding
/>
<folding
/>
</state>
</state>
</provider>
</provider>
</entry>
</entry>
<entry
file=
"file://$PROJECT_DIR$/tensor2tensor/models/transformer_mlrf.py"
>
<entry
file=
"file://$PROJECT_DIR$/tensor2tensor/models/libei.py"
/>
<entry
file=
"file://$PROJECT_DIR$/tensor2tensor/models/common_hparams.py"
>
<provider
selected=
"true"
editor-type-id=
"text-editor"
>
<provider
selected=
"true"
editor-type-id=
"text-editor"
>
<state
relative-caret-position=
"
-162
"
>
<state
relative-caret-position=
"
540
"
>
<caret
line=
"
0"
column=
"0"
lean-forward=
"false"
selection-start-line=
"0"
selection-start-column=
"0"
selection-end-line=
"0"
selection-end-column=
"0
"
/>
<caret
line=
"
30"
column=
"11"
lean-forward=
"false"
selection-start-line=
"30"
selection-start-column=
"11"
selection-end-line=
"30"
selection-end-column=
"11
"
/>
<folding
/>
<folding
/>
</state>
</state>
</provider>
</provider>
</entry>
</entry>
<entry
file=
"file://$PROJECT_DIR$/tensor2tensor/models/
transformer_dropout
.py"
>
<entry
file=
"file://$PROJECT_DIR$/tensor2tensor/models/
layer_history
.py"
>
<provider
selected=
"true"
editor-type-id=
"text-editor"
>
<provider
selected=
"true"
editor-type-id=
"text-editor"
>
<state
relative-caret-position=
"0"
>
<state
relative-caret-position=
"378"
>
<caret
line=
"0"
column=
"0"
lean-forward=
"false"
selection-start-line=
"0"
selection-start-column=
"0"
selection-end-line=
"0"
selection-end-column=
"0"
/>
<caret
line=
"16"
column=
"0"
lean-forward=
"false"
selection-start-line=
"16"
selection-start-column=
"0"
selection-end-line=
"16"
selection-end-column=
"0"
/>
<folding>
<element
signature=
"e#0#23#0"
expanded=
"false"
/>
</folding>
</state>
</provider>
</entry>
<entry
file=
"file://$PROJECT_DIR$/tensor2tensor/models/common_layers.py"
>
<provider
selected=
"true"
editor-type-id=
"text-editor"
>
<state
relative-caret-position=
"298"
>
<caret
line=
"428"
column=
"9"
lean-forward=
"false"
selection-start-line=
"428"
selection-start-column=
"9"
selection-end-line=
"428"
selection-end-column=
"9"
/>
<folding
/>
<folding
/>
</state>
</state>
</provider>
</provider>
</entry>
</entry>
<entry
file=
"file://$PROJECT_DIR$/tensor2tensor/models/
transformer_alternative
.py"
>
<entry
file=
"file://$PROJECT_DIR$/tensor2tensor/models/
common_attention
.py"
>
<provider
selected=
"true"
editor-type-id=
"text-editor"
>
<provider
selected=
"true"
editor-type-id=
"text-editor"
>
<state
relative-caret-position=
"-
594
"
>
<state
relative-caret-position=
"-
162
"
>
<caret
line=
"
40"
column=
"39"
lean-forward=
"false"
selection-start-line=
"40"
selection-start-column=
"31"
selection-end-line=
"40"
selection-end-column=
"39
"
/>
<caret
line=
"
781"
column=
"33"
lean-forward=
"true"
selection-start-line=
"781"
selection-start-column=
"33"
selection-end-line=
"782"
selection-end-column=
"28
"
/>
<folding
/>
<folding
/>
</state>
</state>
</provider>
</provider>
</entry>
</entry>
<entry
file=
"file://$PROJECT_DIR$/tensor2tensor/
utils/multistep_optimizer
.py"
>
<entry
file=
"file://$PROJECT_DIR$/tensor2tensor/
models/transformer_relative_pos
.py"
>
<provider
selected=
"true"
editor-type-id=
"text-editor"
>
<provider
selected=
"true"
editor-type-id=
"text-editor"
>
<state
relative-caret-position=
"
459
"
>
<state
relative-caret-position=
"
378
"
>
<caret
line=
"
33"
column=
"44"
lean-forward=
"true"
selection-start-line=
"33"
selection-start-column=
"44"
selection-end-line=
"33"
selection-end-column=
"44
"
/>
<caret
line=
"
210"
column=
"42"
lean-forward=
"false"
selection-start-line=
"210"
selection-start-column=
"42"
selection-end-line=
"210"
selection-end-column=
"42
"
/>
<folding
/>
<folding
/>
</state>
</state>
</provider>
</provider>
</entry>
</entry>
<entry
file=
"file://$PROJECT_DIR$/tensor2tensor/models/libei.py"
/>
<entry
file=
"file://$PROJECT_DIR$/tensor2tensor/utils/modality.py"
>
<entry
file=
"file://$PROJECT_DIR$/tensor2tensor/models/common_hparams.py"
>
<provider
selected=
"true"
editor-type-id=
"text-editor"
>
<provider
selected=
"true"
editor-type-id=
"text-editor"
>
<state
relative-caret-position=
"
540
"
>
<state
relative-caret-position=
"
207
"
>
<caret
line=
"
30"
column=
"11"
lean-forward=
"false"
selection-start-line=
"30"
selection-start-column=
"11"
selection-end-line=
"30"
selection-end-column=
"11
"
/>
<caret
line=
"
47"
column=
"38"
lean-forward=
"false"
selection-start-line=
"47"
selection-start-column=
"38"
selection-end-line=
"47"
selection-end-column=
"38
"
/>
<folding
/>
<folding
/>
</state>
</state>
</provider>
</provider>
</entry>
</entry>
<entry
file=
"file://$PROJECT_DIR$/tensor2tensor/models/
layer_history
.py"
>
<entry
file=
"file://$PROJECT_DIR$/tensor2tensor/models/
transformer_test
.py"
>
<provider
selected=
"true"
editor-type-id=
"text-editor"
>
<provider
selected=
"true"
editor-type-id=
"text-editor"
>
<state
relative-caret-position=
"378"
>
<state
relative-caret-position=
"356"
>
<caret
line=
"16"
column=
"0"
lean-forward=
"false"
selection-start-line=
"16"
selection-start-column=
"0"
selection-end-line=
"16"
selection-end-column=
"0"
/>
<caret
line=
"58"
column=
"38"
lean-forward=
"false"
selection-start-line=
"58"
selection-start-column=
"38"
selection-end-line=
"58"
selection-end-column=
"38"
/>
<folding>
<folding
/>
<element
signature=
"e#0#23#0"
expanded=
"true"
/>
</folding>
</state>
</state>
</provider>
</provider>
</entry>
</entry>
<entry
file=
"file://$PROJECT_DIR$/tensor2tensor/utils/trainer_utils.py"
>
<entry
file=
"file://$PROJECT_DIR$/tensor2tensor/utils/trainer_utils.py"
>
<provider
selected=
"true"
editor-type-id=
"text-editor"
>
<provider
selected=
"true"
editor-type-id=
"text-editor"
>
<state
relative-caret-position=
"
243
"
>
<state
relative-caret-position=
"
384
"
>
<caret
line=
"
197"
column=
"42"
lean-forward=
"false"
selection-start-line=
"197"
selection-start-column=
"42"
selection-end-line=
"197"
selection-end-column=
"4
2"
/>
<caret
line=
"
482"
column=
"32"
lean-forward=
"false"
selection-start-line=
"482"
selection-start-column=
"32"
selection-end-line=
"482"
selection-end-column=
"3
2"
/>
<folding>
<folding>
<element
signature=
"e#18286#18629#1"
expanded=
"false"
/>
<element
signature=
"e#18286#18629#1"
expanded=
"false"
/>
<element
signature=
"e#18684#18904#0"
expanded=
"false"
/>
<element
signature=
"e#18684#18904#0"
expanded=
"false"
/>
<element
signature=
"e#18909#18935#0"
expanded=
"false"
/>
<element
signature=
"e#18909#18935#0"
expanded=
"false"
/>
<element
signature=
"e#19415#19927#0"
expanded=
"false"
/>
<element
signature=
"e#19415#19927#0"
expanded=
"false"
/>
<element
signature=
"e#20145#22476#0"
expanded=
"false"
/>
<element
signature=
"e#22668#23415#0"
expanded=
"false"
/>
<element
signature=
"e#22668#23415#0"
expanded=
"false"
/>
<element
signature=
"e#23535#23889#0"
expanded=
"false"
/>
<element
signature=
"e#23535#23889#0"
expanded=
"false"
/>
</folding>
</folding>
</state>
</state>
</provider>
</provider>
</entry>
</entry>
<entry
file=
"file://$PROJECT_DIR$/tensor2tensor/models/
common_layers
.py"
>
<entry
file=
"file://$PROJECT_DIR$/tensor2tensor/models/
lstm
.py"
>
<provider
selected=
"true"
editor-type-id=
"text-editor"
>
<provider
selected=
"true"
editor-type-id=
"text-editor"
>
<state
relative-caret-position=
"298"
>
<state
relative-caret-position=
"-324"
>
<caret
line=
"428"
column=
"9"
lean-forward=
"false"
selection-start-line=
"428"
selection-start-column=
"9"
selection-end-line=
"428"
selection-end-column=
"9"
/>
<caret
line=
"0"
column=
"0"
lean-forward=
"false"
selection-start-line=
"0"
selection-start-column=
"0"
selection-end-line=
"0"
selection-end-column=
"0"
/>
<folding
/>
</state>
</provider>
</entry>
<entry
file=
"file://$PROJECT_DIR$/tensor2tensor/models/attention_lm_moe.py"
>
<provider
selected=
"true"
editor-type-id=
"text-editor"
>
<state
relative-caret-position=
"-386"
>
<caret
line=
"21"
column=
"0"
lean-forward=
"true"
selection-start-line=
"21"
selection-start-column=
"0"
selection-end-line=
"21"
selection-end-column=
"0"
/>
<folding
/>
</state>
</provider>
</entry>
<entry
file=
"file://$PROJECT_DIR$/tensor2tensor/utils/registry_test.py"
>
<provider
selected=
"true"
editor-type-id=
"text-editor"
>
<state
relative-caret-position=
"-328"
>
<caret
line=
"95"
column=
"60"
lean-forward=
"false"
selection-start-line=
"95"
selection-start-column=
"60"
selection-end-line=
"95"
selection-end-column=
"60"
/>
<folding
/>
</state>
</provider>
</entry>
<entry
file=
"file://$PROJECT_DIR$/tensor2tensor/utils/t2t_model.py"
>
<provider
selected=
"true"
editor-type-id=
"text-editor"
>
<state
relative-caret-position=
"207"
>
<caret
line=
"125"
column=
"31"
lean-forward=
"false"
selection-start-line=
"125"
selection-start-column=
"31"
selection-end-line=
"125"
selection-end-column=
"31"
/>
<folding
/>
</state>
</provider>
</entry>
<entry
file=
"file://$PROJECT_DIR$/tensor2tensor/utils/registry.py"
>
<provider
selected=
"true"
editor-type-id=
"text-editor"
>
<state
relative-caret-position=
"-2425"
>
<caret
line=
"298"
column=
"24"
lean-forward=
"false"
selection-start-line=
"298"
selection-start-column=
"24"
selection-end-line=
"298"
selection-end-column=
"24"
/>
<folding
/>
</state>
</provider>
</entry>
<entry
file=
"file://$PROJECT_DIR$/tensor2tensor/models/transformer_alternative.py"
>
<provider
selected=
"true"
editor-type-id=
"text-editor"
>
<state
relative-caret-position=
"302"
>
<caret
line=
"167"
column=
"4"
lean-forward=
"false"
selection-start-line=
"167"
selection-start-column=
"4"
selection-end-line=
"167"
selection-end-column=
"4"
/>
<folding
/>
<folding
/>
</state>
</state>
</provider>
</provider>
</entry>
</entry>
<entry
file=
"file://$PROJECT_DIR$/tensor2tensor/models/transformer.py"
>
<entry
file=
"file://$PROJECT_DIR$/tensor2tensor/models/transformer.py"
>
<provider
selected=
"true"
editor-type-id=
"text-editor"
>
<provider
selected=
"true"
editor-type-id=
"text-editor"
>
<state
relative-caret-position=
"352"
>
<state
relative-caret-position=
"162"
>
<caret
line=
"294"
column=
"0"
lean-forward=
"false"
selection-start-line=
"294"
selection-start-column=
"0"
selection-end-line=
"294"
selection-end-column=
"0"
/>
<caret
line=
"40"
column=
"12"
lean-forward=
"true"
selection-start-line=
"40"
selection-start-column=
"12"
selection-end-line=
"40"
selection-end-column=
"12"
/>
<folding
/>
</state>
</provider>
</entry>
<entry
file=
"file://$PROJECT_DIR$/tensor2tensor/utils/__init__.py"
>
<provider
selected=
"true"
editor-type-id=
"text-editor"
>
<state
relative-caret-position=
"378"
>
<caret
line=
"14"
column=
"0"
lean-forward=
"true"
selection-start-line=
"14"
selection-start-column=
"0"
selection-end-line=
"14"
selection-end-column=
"0"
/>
<folding
/>
</state>
</provider>
</entry>
<entry
file=
"file://$PROJECT_DIR$/tensor2tensor/models/attention_lm.py"
>
<provider
selected=
"true"
editor-type-id=
"text-editor"
>
<state
relative-caret-position=
"567"
>
<caret
line=
"45"
column=
"27"
lean-forward=
"true"
selection-start-line=
"45"
selection-start-column=
"27"
selection-end-line=
"45"
selection-end-column=
"27"
/>
<folding>
<element
signature=
"e#719#757#0"
expanded=
"true"
/>
</folding>
</state>
</provider>
</entry>
<entry
file=
"file://$PROJECT_DIR$/tensor2tensor/models/transformer_mlrf.py"
>
<provider
selected=
"true"
editor-type-id=
"text-editor"
>
<state
relative-caret-position=
"493"
>
<caret
line=
"39"
column=
"21"
lean-forward=
"true"
selection-start-line=
"39"
selection-start-column=
"21"
selection-end-line=
"39"
selection-end-column=
"21"
/>
<folding
/>
</state>
</provider>
</entry>
<entry
file=
"file://$PROJECT_DIR$/tensor2tensor/models/transformer_dropout.py"
>
<provider
selected=
"true"
editor-type-id=
"text-editor"
>
<state
relative-caret-position=
"-2834"
>
<caret
line=
"0"
column=
"0"
lean-forward=
"false"
selection-start-line=
"0"
selection-start-column=
"0"
selection-end-line=
"0"
selection-end-column=
"0"
/>
<folding
/>
</state>
</provider>
</entry>
<entry
file=
"file://$PROJECT_DIR$/tensor2tensor/models/transformer_libei.py"
>
<provider
selected=
"true"
editor-type-id=
"text-editor"
>
<state
relative-caret-position=
"0"
>
<caret
line=
"0"
column=
"0"
lean-forward=
"false"
selection-start-line=
"0"
selection-start-column=
"0"
selection-end-line=
"0"
selection-end-column=
"0"
/>
<folding
/>
</state>
</provider>
</entry>
<entry
file=
"file://$PROJECT_DIR$/tensor2tensor/models/xception.py"
>
<provider
selected=
"true"
editor-type-id=
"text-editor"
>
<state
relative-caret-position=
"-972"
>
<caret
line=
"0"
column=
"0"
lean-forward=
"false"
selection-start-line=
"0"
selection-start-column=
"0"
selection-end-line=
"0"
selection-end-column=
"0"
/>
<folding>
<element
signature=
"e#608#646#0"
expanded=
"true"
/>
</folding>
</state>
</provider>
</entry>
<entry
file=
"file://$PROJECT_DIR$/tensor2tensor/models/__init__.py"
>
<provider
selected=
"true"
editor-type-id=
"text-editor"
>
<state
relative-caret-position=
"351"
>
<caret
line=
"13"
column=
"0"
lean-forward=
"true"
selection-start-line=
"13"
selection-start-column=
"0"
selection-end-line=
"13"
selection-end-column=
"0"
/>
<folding
/>
</state>
</provider>
</entry>
<entry
file=
"file://$PROJECT_DIR$/tensor2tensor/models/models.py"
>
<provider
selected=
"true"
editor-type-id=
"text-editor"
>
<state
relative-caret-position=
"537"
>
<caret
line=
"41"
column=
"40"
lean-forward=
"false"
selection-start-line=
"41"
selection-start-column=
"40"
selection-end-line=
"41"
selection-end-column=
"40"
/>
<folding
/>
<folding
/>
</state>
</state>
</provider>
</provider>
</entry>
</entry>
<entry
file=
"file://$PROJECT_DIR$/tensor2tensor/models/transformer_dla.py"
>
<entry
file=
"file://$PROJECT_DIR$/tensor2tensor/models/transformer_dla.py"
>
<provider
selected=
"true"
editor-type-id=
"text-editor"
>
<provider
selected=
"true"
editor-type-id=
"text-editor"
>
<state
relative-caret-position=
"
379
"
>
<state
relative-caret-position=
"
1323
"
>
<caret
line=
"
239"
column=
"5"
lean-forward=
"true"
selection-start-line=
"239"
selection-start-column=
"5"
selection-end-line=
"239"
selection-end-column=
"5
"
/>
<caret
line=
"
49"
column=
"50"
lean-forward=
"true"
selection-start-line=
"49"
selection-start-column=
"50"
selection-end-line=
"49"
selection-end-column=
"50
"
/>
<folding>
<folding>
<element
signature=
"e#738#776#0"
expanded=
"true"
/>
<element
signature=
"e#738#776#0"
expanded=
"true"
/>
</folding>
</folding>
...
...
tensor2tensor/models/__init__.py
查看文件 @
42676b59
...
@@ -11,4 +11,3 @@
...
@@ -11,4 +11,3 @@
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# See the License for the specific language governing permissions and
# limitations under the License.
# limitations under the License.
tensor2tensor/models/models.py
查看文件 @
42676b59
...
@@ -39,4 +39,5 @@ from tensor2tensor.models import transformer_mlrf
...
@@ -39,4 +39,5 @@ from tensor2tensor.models import transformer_mlrf
#from tensor2tensor.models import transformer_fix
#from tensor2tensor.models import transformer_fix
from
tensor2tensor.models
import
transformer_relative_pos
from
tensor2tensor.models
import
transformer_relative_pos
from
tensor2tensor.models
import
transformer_dropout
from
tensor2tensor.models
import
transformer_dropout
from
tensor2tensor.models
import
transformer_dla
# pylint: enable=unused-import
# pylint: enable=unused-import
tensor2tensor/models/transformer_dla.py
查看文件 @
42676b59
...
@@ -33,6 +33,7 @@ from tensor2tensor.models import common_hparams
...
@@ -33,6 +33,7 @@ from tensor2tensor.models import common_hparams
from
tensor2tensor.models
import
common_layers
from
tensor2tensor.models
import
common_layers
from
tensor2tensor.utils
import
registry
from
tensor2tensor.utils
import
registry
from
tensor2tensor.utils
import
t2t_model
from
tensor2tensor.utils
import
t2t_model
from
tensor2tensor.models
import
layer_history
import
tensorflow
as
tf
import
tensorflow
as
tf
...
@@ -374,7 +375,7 @@ def transformer_ffn_layer(x, hparams):
...
@@ -374,7 +375,7 @@ def transformer_ffn_layer(x, hparams):
@registry.register_hparams
@registry.register_hparams
def
transformer_
base
():
def
transformer_
dla
():
"""Set of hyperparameters."""
"""Set of hyperparameters."""
hparams
=
common_hparams
.
basic_params1
()
hparams
=
common_hparams
.
basic_params1
()
hparams
.
hidden_size
=
512
hparams
.
hidden_size
=
512
...
@@ -427,326 +428,32 @@ def transformer_base():
...
@@ -427,326 +428,32 @@ def transformer_base():
hparams
.
add_hparam
(
"relu_dropout_broadcast_dims"
,
"1"
)
# length
hparams
.
add_hparam
(
"relu_dropout_broadcast_dims"
,
"1"
)
# length
hparams
.
add_hparam
(
"residual_dropout_broadcast_dims"
,
"1"
)
# length
hparams
.
add_hparam
(
"residual_dropout_broadcast_dims"
,
"1"
)
# length
hparams
.
add_hparam
(
"normalize_before"
,
False
)
hparams
.
add_hparam
(
"normalize_before"
,
False
)
hparams
.
add_hparam
(
"use_emb"
,
True
)
hparams
.
add_hparam
(
"encoder_history_type"
,
"learnable_dense"
)
hparams
.
add_hparam
(
"decoder_history_type"
,
"learnable_dense"
)
return
hparams
return
hparams
@registry.register_hparams
@registry.register_hparams
def
transformer_big
():
def
transformer_dla_base
():
"""HParams for transfomer big model on WMT."""
hparams
=
transformer_dla
()
hparams
=
transformer_base
()
hparams
.
hidden_size
=
1024
hparams
.
filter_size
=
4096
hparams
.
num_heads
=
16
hparams
.
batching_mantissa_bits
=
2
hparams
.
residual_dropout
=
0.3
return
hparams
@registry.register_hparams
def
transformer_before
():
"""HParams for transfomer big model on WMT."""
hparams
=
transformer_base
()
hparams
.
normalize_before
=
True
hparams
.
normalize_before
=
True
hparams
.
relu_dropout
=
0.1
hparams
.
attention_dropout
=
0.1
hparams
.
attention_dropout
=
0.1
hparams
.
learning_rate
=
0.2
hparams
.
learning_rate_warmup_steps
=
8000
hparams
.
optimizer_adam_beta1
=
0.9
hparams
.
optimizer_adam_beta2
=
0.997
return
hparams
@registry.register_hparams
def
transformer_before_big
():
"""HParams for transfomer big model on WMT."""
hparams
=
transformer_before
()
hparams
.
hidden_size
=
1024
hparams
.
filter_size
=
4096
hparams
.
num_heads
=
16
hparams
.
batching_mantissa_bits
=
2
hparams
.
residual_dropout
=
0.3
return
hparams
@registry.register_hparams
def
transformer_big_single_gpu
():
"""HParams for transformer big model for single gpu."""
hparams
=
transformer_big
()
hparams
.
residual_dropout
=
0.1
hparams
.
residual_dropout
=
0.1
hparams
.
learning_rate_warmup_steps
=
16000
hparams
.
learning_rate
=
0.4
hparams
.
optimizer_adam_beta2
=
0.998
hparams
.
learning_rate_warmup_steps
=
8000
hparams
.
batching_mantissa_bits
=
3
hparams
.
batch_size
=
2048
return
hparams
hparams
.
optimizer
=
"MultistepAdam"
hparams
.
optimizer_multistep_accumulate_steps
=
4
@registry.register_hparams
def
transformer_base_single_gpu
():
"""HParams for transformer base model for single gpu."""
hparams
=
transformer_base
()
hparams
.
batch_size
=
8192
hparams
.
learning_rate_warmup_steps
=
16000
hparams
.
batching_mantissa_bits
=
2
return
hparams
@registry.register_hparams
def
transformer_big_dr1
():
hparams
=
transformer_base
()
hparams
.
hidden_size
=
1024
hparams
.
filter_size
=
4096
hparams
.
num_heads
=
16
hparams
.
residual_dropout
=
0.1
hparams
.
batching_mantissa_bits
=
2
return
hparams
@registry.register_hparams
def
transformer_big_enfr
():
hparams
=
transformer_big_dr1
()
hparams
.
shared_embedding_and_softmax_weights
=
int
(
False
)
hparams
.
filter_size
=
8192
hparams
.
residual_dropout
=
0.1
return
hparams
@registry.register_hparams
def
transformer_big_dr2
():
hparams
=
transformer_big_dr1
()
hparams
.
residual_dropout
=
0.2
return
hparams
@registry.register_hparams
def
transformer_base_ldcd
():
"""Set of hyperparameters."""
hparams
=
transformer_base
()
hparams
.
learning_rate_decay_scheme
=
"ld&cd"
hparams
.
learning_rate_ldcd_epoch
=
5
hparams
.
learning_rate_warmup_steps
=
4000
return
hparams
@registry.register_hparams
def
transformer_base_ldcd_n10
():
"""Set of hyperparameters."""
hparams
=
transformer_base
()
hparams
.
learning_rate_decay_scheme
=
"ld&cd"
hparams
.
learning_rate_ldcd_epoch
=
10
hparams
.
learning_rate_warmup_steps
=
4000
return
hparams
@registry.register_hparams
def
transformer_base_ldcd_n2
():
"""Set of hyperparameters."""
hparams
=
transformer_base
()
hparams
.
learning_rate_decay_scheme
=
"ld&cd"
hparams
.
learning_rate_ldcd_epoch
=
2
hparams
.
learning_rate_warmup_steps
=
4000
return
hparams
@registry.register_hparams
def
transformer_base_ldcd_n1
():
"""Set of hyperparameters."""
hparams
=
transformer_base
()
hparams
.
learning_rate_decay_scheme
=
"ld&cd"
hparams
.
learning_rate_ldcd_epoch
=
1
hparams
.
learning_rate_warmup_steps
=
4000
return
hparams
@registry.register_hparams
def
transformer_base_amsgrad
():
"""Set of hyperparameters."""
hparams
=
transformer_base
()
hparams
.
optimizer
=
"AMSGrad"
return
hparams
@registry.register_hparams
def
transformer_base_amsgrad_v2
():
"""Set of hyperparameters."""
hparams
=
transformer_base
()
hparams
.
optimizer
=
"AMSGrad"
hparams
.
optimizer_adam_beta1
=
0.9
hparams
.
optimizer_adam_beta2
=
0.999
hparams
.
optimizer_adam_epsilon
=
1e-8
return
hparams
@registry.register_hparams
def
transformer_base_amsgrad_v3
():
"""Set of hyperparameters."""
hparams
=
transformer_base
()
hparams
.
optimizer
=
"AMSGrad"
hparams
.
optimizer_adam_beta1
=
0.9
hparams
.
optimizer_adam_beta2
=
0.99
hparams
.
optimizer_adam_epsilon
=
1e-8
return
hparams
@registry.register_hparams
def
transformer_base_amsgrad_v4
():
"""Set of hyperparameters."""
hparams
=
transformer_base
()
hparams
.
optimizer
=
"AMSGrad"
hparams
.
optimizer_adam_beta1
=
0.9
hparams
.
optimizer_adam_beta2
=
0.99
hparams
.
optimizer_adam_epsilon
=
1e-9
return
hparams
@registry.register_hparams
def
transformer_base_ldrestart_n3
():
"""Set of hyperparameters."""
hparams
=
transformer_base
()
hparams
.
learning_rate_decay_scheme
=
"ld&restart"
hparams
.
learning_rate_ldrestart_epoch
=
3
hparams
.
learning_rate_warmup_steps
=
4000
return
hparams
@registry.register_hparams
def
transformer_base_powersign
():
"""Set of hyperparameters."""
hparams
=
transformer_base
()
hparams
.
optimizer
=
"PowerSign"
hparams
.
optimizer_powersign_beta
=
0.9
hparams
.
optimizer_powersign_decay
=
""
return
hparams
@registry.register_hparams
def
transformer_base_powersign_ld
():
"""Set of hyperparameters."""
hparams
=
transformer_base
()
hparams
.
optimizer
=
"PowerSign"
hparams
.
optimizer_powersign_beta
=
0.9
hparams
.
optimizer_powersign_decay
=
"linear"
return
hparams
@registry.register_hparams
def
transformer_base_powersign_cd
():
"""Set of hyperparameters."""
hparams
=
transformer_base
()
hparams
.
optimizer
=
"PowerSign"
hparams
.
optimizer_powersign_beta
=
0.9
hparams
.
optimizer_powersign_decay
=
"cosine"
hparams
.
optimizer_powersign_period
=
1
return
hparams
@registry.register_hparams
def
transformer_base_powersign_rd
():
"""Set of hyperparameters."""
hparams
=
transformer_base
()
hparams
.
optimizer
=
"PowerSign"
hparams
.
optimizer_powersign_beta
=
0.9
hparams
.
optimizer_powersign_decay
=
"restart"
hparams
.
optimizer_powersign_period
=
1
return
hparams
@registry.register_hparams
def
transformer_base_powersign_rd_n10
():
"""Set of hyperparameters."""
hparams
=
transformer_base
()
hparams
.
optimizer
=
"PowerSign"
hparams
.
optimizer_powersign_beta
=
0.9
hparams
.
optimizer_powersign_decay
=
"restart"
hparams
.
optimizer_powersign_period
=
10
return
hparams
@registry.register_hparams
def
transformer_base_powersign_rd_n20
():
"""Set of hyperparameters."""
hparams
=
transformer_base
()
hparams
.
optimizer
=
"PowerSign"
hparams
.
optimizer_powersign_beta
=
0.9
hparams
.
optimizer_powersign_decay
=
"restart"
hparams
.
optimizer_powersign_period
=
20
return
hparams
@registry.register_hparams
def
transformer_base_swish1
():
"""Set of hyperparameters."""
hparams
=
transformer_base
()
hparams
.
ffn_layer
=
"conv_hidden_swish"
hparams
.
swish_dropout
=
0.0
hparams
.
swish_beta
=
1.0
hparams
.
swish_beta_is_trainable
=
False
return
hparams
return
hparams
@registry.register_hparams
@registry.register_hparams
def
transformer_base_swish_trainable
():
def
transformer_dla_big
():
"""Set of hyperparameters."""
hparams
=
transformer_base
()
hparams
.
ffn_layer
=
"conv_hidden_swish"
hparams
.
swish_dropout
=
0.0
#hparams.swish_beta = 1.0
hparams
.
swish_beta_is_trainable
=
True
return
hparams
@registry.register_hparams
def
transformer_big_adafactor
():
"""HParams for transfomer big model on WMT."""
"""HParams for transfomer big model on WMT."""
hparams
=
transformer_base
()
hparams
=
transformer_
dla_
base
()
hparams
.
hidden_size
=
1024
hparams
.
hidden_size
=
1024
hparams
.
filter_size
=
4096
hparams
.
filter_size
=
4096
hparams
.
num_heads
=
16
hparams
.
num_heads
=
16
hparams
.
batching_mantissa_bits
=
2
hparams
.
batching_mantissa_bits
=
2
hparams
.
residual_dropout
=
0.3
hparams
.
residual_dropout
=
0.3
hparams
.
optimizer
=
"Adafactor"
hparams
.
epsilon
=
1e-8
hparams
.
learning_rate_warmup_steps
=
16000
hparams
.
optimizer_adafactor_beta2
=
0.997
return
hparams
@registry.register_hparams
def
transformer_base_v2
():
"""Set of hyperparameters.
set relu_dropout and attention_dropout as 0.1
"""
hparams
=
transformer_base
()
hparams
.
attention_dropout
=
0.1
hparams
.
relu_dropout
=
0.1
return
hparams
@registry.register_hparams
def
transformer_base_rpr_dropout1
():
hparams
=
transformer_base
()
hparams
.
max_relative_length
=
16
hparams
.
attention_type
=
"relative_dot_product"
hparams
.
relu_dropout
=
0.1
hparams
.
attention_dropout
=
0.1
return
hparams
@registry.register_hparams
def
transformer_base_v3
():
"""Set of hyperparameters.
set filter as 4096
"""
hparams
=
transformer_base_v2
()
hparams
.
filter_size
=
4096
return
hparams
@registry.register_hparams
def
transformer_big_multistep2
():
# new model use optimizer MultistepAdam
hparams
=
transformer_big
()
hparams
.
optimizer
=
"MultistepAdam"
hparams
.
optimizer_multistep_accumulate_steps
=
2
hparams
.
batch_size
=
2048
#hparams.attention_dropout = 0.1
#hparams.relu_dropout = 0.1
return
hparams
@registry.register_hparams
def
transformer_big_adafactor_test
():
# new model use optimizer MultistepAdam
hparams
=
transformer_big
()
hparams
.
optimizer
=
"Adafactor"
hparams
.
learning_rate_warmup_steps
=
8000
hparams
.
batch_size
=
4096
hparams
.
optimizer_adafactor_beta2
=
0.999
return
hparams
return
hparams
编写
预览
Markdown
格式
0%
重试
或
添加新文件
添加附件
取消
您添加了
0
人
到此讨论。请谨慎行事。
请先完成此评论的编辑!
取消
请
注册
或者
登录
后发表评论