Skip to content
项目
群组
代码片段
帮助
当前项目
正在载入...
登录 / 注册
切换导航面板
W
WMT19-1.0.14
概览
Overview
Details
Activity
Cycle Analytics
版本库
Repository
Files
Commits
Branches
Tags
Contributors
Graph
Compare
Charts
问题
0
Issues
0
列表
Board
标记
里程碑
合并请求
0
Merge Requests
0
CI / CD
CI / CD
流水线
作业
日程表
图表
维基
Wiki
代码片段
Snippets
成员
Collapse sidebar
Close sidebar
活动
图像
聊天
创建新问题
作业
提交
Issue Boards
Open sidebar
libei
WMT19-1.0.14
Commits
90010cd3
Commit
90010cd3
authored
Feb 16, 2019
by
libei
Browse files
Options
Browse Files
Download
Email Patches
Plain Diff
reviese bugs
parent
fb9ee9e7
隐藏空白字符变更
内嵌
并排
正在显示
3 个修改的文件
包含
153 行增加
和
345 行删除
+153
-345
.idea/workspace.xml
+47
-56
tensor2tensor/models/transformer.py
+3
-0
tensor2tensor/models/transformer_dla.py
+103
-289
没有找到文件。
.idea/workspace.xml
查看文件 @
90010cd3
...
...
@@ -3,9 +3,8 @@
<component
name=
"ChangeListManager"
>
<list
default=
"true"
id=
"7d6d9926-f879-4708-ad8e-442bac96b62a"
name=
"Default"
comment=
""
>
<change
beforePath=
"$PROJECT_DIR$/.idea/workspace.xml"
afterPath=
"$PROJECT_DIR$/.idea/workspace.xml"
/>
<change
beforePath=
"$PROJECT_DIR$/tensor2tensor/models/common_hparams.py"
afterPath=
"$PROJECT_DIR$/tensor2tensor/models/common_hparams.py"
/>
<change
beforePath=
"$PROJECT_DIR$/tensor2tensor/models/transformer.py"
afterPath=
"$PROJECT_DIR$/tensor2tensor/models/transformer.py"
/>
<change
beforePath=
"$PROJECT_DIR$/tensor2tensor/
utils/trainer_utils.py"
afterPath=
"$PROJECT_DIR$/tensor2tensor/utils/trainer_utils
.py"
/>
<change
beforePath=
"$PROJECT_DIR$/tensor2tensor/
models/transformer_dla.py"
afterPath=
"$PROJECT_DIR$/tensor2tensor/models/transformer_dla
.py"
/>
</list>
<option
name=
"EXCLUDED_CONVERTED_TO_IGNORED"
value=
"true"
/>
<option
name=
"TRACKING_ENABLED"
value=
"true"
/>
...
...
@@ -16,11 +15,11 @@
</component>
<component
name=
"FileEditorManager"
>
<leaf
SIDE_TABS_SIZE_LIMIT_KEY=
"300"
>
<file
leaf-file-name=
"transformer.py"
pinned=
"false"
current-in-tab=
"
tru
e"
>
<file
leaf-file-name=
"transformer.py"
pinned=
"false"
current-in-tab=
"
fals
e"
>
<entry
file=
"file://$PROJECT_DIR$/tensor2tensor/models/transformer.py"
>
<provider
selected=
"true"
editor-type-id=
"text-editor"
>
<state
relative-caret-position=
"
541
"
>
<caret
line=
"
364"
column=
"28"
lean-forward=
"true"
selection-start-line=
"364"
selection-start-column=
"28"
selection-end-line=
"364"
selection-end-column=
"28
"
/>
<state
relative-caret-position=
"
352
"
>
<caret
line=
"
294"
column=
"0"
lean-forward=
"false"
selection-start-line=
"294"
selection-start-column=
"0"
selection-end-line=
"294"
selection-end-column=
"0
"
/>
<folding
/>
</state>
</provider>
...
...
@@ -29,8 +28,8 @@
<file
leaf-file-name=
"common_hparams.py"
pinned=
"false"
current-in-tab=
"false"
>
<entry
file=
"file://$PROJECT_DIR$/tensor2tensor/models/common_hparams.py"
>
<provider
selected=
"true"
editor-type-id=
"text-editor"
>
<state
relative-caret-position=
"
235
"
>
<caret
line=
"30"
column=
"11"
lean-forward=
"
tru
e"
selection-start-line=
"30"
selection-start-column=
"11"
selection-end-line=
"30"
selection-end-column=
"11"
/>
<state
relative-caret-position=
"
540
"
>
<caret
line=
"30"
column=
"11"
lean-forward=
"
fals
e"
selection-start-line=
"30"
selection-start-column=
"11"
selection-end-line=
"30"
selection-end-column=
"11"
/>
<folding
/>
</state>
</provider>
...
...
@@ -39,8 +38,8 @@
<file
leaf-file-name=
"trainer_utils.py"
pinned=
"false"
current-in-tab=
"false"
>
<entry
file=
"file://$PROJECT_DIR$/tensor2tensor/utils/trainer_utils.py"
>
<provider
selected=
"true"
editor-type-id=
"text-editor"
>
<state
relative-caret-position=
"
142
"
>
<caret
line=
"19
4"
column=
"30"
lean-forward=
"true"
selection-start-line=
"194"
selection-start-column=
"30"
selection-end-line=
"194"
selection-end-column=
"30
"
/>
<state
relative-caret-position=
"
243
"
>
<caret
line=
"19
7"
column=
"42"
lean-forward=
"false"
selection-start-line=
"197"
selection-start-column=
"42"
selection-end-line=
"197"
selection-end-column=
"42
"
/>
<folding>
<element
signature=
"e#18286#18629#1"
expanded=
"false"
/>
<element
signature=
"e#18684#18904#0"
expanded=
"false"
/>
...
...
@@ -64,11 +63,11 @@
</provider>
</entry>
</file>
<file
leaf-file-name=
"transformer_dla.py"
pinned=
"false"
current-in-tab=
"
fals
e"
>
<file
leaf-file-name=
"transformer_dla.py"
pinned=
"false"
current-in-tab=
"
tru
e"
>
<entry
file=
"file://$PROJECT_DIR$/tensor2tensor/models/transformer_dla.py"
>
<provider
selected=
"true"
editor-type-id=
"text-editor"
>
<state
relative-caret-position=
"
74
"
>
<caret
line=
"
92"
column=
"43"
lean-forward=
"false"
selection-start-line=
"92"
selection-start-column=
"43"
selection-end-line=
"92"
selection-end-column=
"43
"
/>
<state
relative-caret-position=
"
-1723
"
>
<caret
line=
"
209"
column=
"0"
lean-forward=
"false"
selection-start-line=
"209"
selection-start-column=
"0"
selection-end-line=
"209"
selection-end-column=
"0
"
/>
<folding>
<element
signature=
"e#738#776#0"
expanded=
"true"
/>
</folding>
...
...
@@ -79,7 +78,7 @@
<file
leaf-file-name=
"layer_history.py"
pinned=
"false"
current-in-tab=
"false"
>
<entry
file=
"file://$PROJECT_DIR$/tensor2tensor/models/layer_history.py"
>
<provider
selected=
"true"
editor-type-id=
"text-editor"
>
<state
relative-caret-position=
"
432
"
>
<state
relative-caret-position=
"
378
"
>
<caret
line=
"16"
column=
"0"
lean-forward=
"false"
selection-start-line=
"16"
selection-start-column=
"0"
selection-end-line=
"16"
selection-end-column=
"0"
/>
<folding>
<element
signature=
"e#0#23#0"
expanded=
"true"
/>
...
...
@@ -110,11 +109,11 @@
<list>
<option
value=
"$PROJECT_DIR$/tensor2tensor/models/layer_history.py"
/>
<option
value=
"$PROJECT_DIR$/tensor2tensor/models/common_layers.py"
/>
<option
value=
"$PROJECT_DIR$/tensor2tensor/models/transformer_dla.py"
/>
<option
value=
"$PROJECT_DIR$/tensor2tensor/models/libei.py"
/>
<option
value=
"$PROJECT_DIR$/tensor2tensor/utils/trainer_utils.py"
/>
<option
value=
"$PROJECT_DIR$/tensor2tensor/models/common_hparams.py"
/>
<option
value=
"$PROJECT_DIR$/tensor2tensor/models/transformer.py"
/>
<option
value=
"$PROJECT_DIR$/tensor2tensor/models/transformer_dla.py"
/>
</list>
</option>
</component>
...
...
@@ -124,7 +123,7 @@
<detection-done>
true
</detection-done>
<sorting>
DEFINITION_ORDER
</sorting>
</component>
<component
name=
"ProjectFrameBounds"
extendedState=
"
6
"
>
<component
name=
"ProjectFrameBounds"
extendedState=
"
7
"
>
<option
name=
"x"
value=
"22"
/>
<option
name=
"y"
value=
"5"
/>
<option
name=
"width"
value=
"1909"
/>
...
...
@@ -148,8 +147,6 @@
<foldersAlwaysOnTop
value=
"true"
/>
</navigator>
<panes>
<pane
id=
"Scratches"
/>
<pane
id=
"Scope"
/>
<pane
id=
"ProjectPane"
>
<subPane>
<expand>
...
...
@@ -178,13 +175,15 @@
<select
/>
</subPane>
</pane>
<pane
id=
"Scope"
/>
<pane
id=
"Scratches"
/>
</panes>
</component>
<component
name=
"PropertiesComponent"
>
<property
name=
"nodejs_interpreter_path.stuck_in_default_project"
value=
"undefined stuck path"
/>
<property
name=
"settings.editor.selected.configurable"
value=
"com.jetbrains.python.configuration.PyActiveSdkModuleConfigurable"
/>
<property
name=
"WebServerToolWindowFactoryState"
value=
"false"
/>
<property
name=
"last_opened_file_path"
value=
"$PROJECT_DIR$"
/>
<property
name=
"last_opened_file_path"
value=
"$PROJECT_DIR$
/../DeepTransformer-v4
"
/>
</component>
<component
name=
"RecentsManager"
>
<key
name=
"CopyFile.RECENT_KEYS"
>
...
...
@@ -220,23 +219,22 @@
<servers
/>
</component>
<component
name=
"ToolWindowManager"
>
<frame
x=
"-8"
y=
"-8"
width=
"1936"
height=
"1056"
extended-state=
"6"
/>
<editor
active=
"true"
/>
<frame
x=
"-8"
y=
"-8"
width=
"1936"
height=
"1056"
extended-state=
"7"
/>
<layout>
<window_info
id=
"TODO"
active=
"false"
anchor=
"bottom"
auto_hide=
"false"
internal_type=
"DOCKED"
type=
"DOCKED"
visible=
"false"
show_stripe_button=
"true"
weight=
"0.33"
sideWeight=
"0.5"
order=
"11"
side_tool=
"false"
content_ui=
"tabs"
/>
<window_info
id=
"Event Log"
active=
"false"
anchor=
"bottom"
auto_hide=
"false"
internal_type=
"DOCKED"
type=
"DOCKED"
visible=
"false"
show_stripe_button=
"true"
weight=
"0.33"
sideWeight=
"0.5"
order=
"0"
side_tool=
"true"
content_ui=
"tabs"
/>
<window_info
id=
"File Transfer"
active=
"false"
anchor=
"bottom"
auto_hide=
"false"
internal_type=
"DOCKED"
type=
"DOCKED"
visible=
"true"
show_stripe_button=
"true"
weight=
"0.20457019"
sideWeight=
"0.5"
order=
"-1"
side_tool=
"false"
content_ui=
"tabs"
/>
<window_info
id=
"Run"
active=
"false"
anchor=
"bottom"
auto_hide=
"false"
internal_type=
"DOCKED"
type=
"DOCKED"
visible=
"false"
show_stripe_button=
"true"
weight=
"0.33"
sideWeight=
"0.5"
order=
"7"
side_tool=
"false"
content_ui=
"tabs"
/>
<window_info
id=
"File Transfer"
active=
"false"
anchor=
"bottom"
auto_hide=
"false"
internal_type=
"DOCKED"
type=
"DOCKED"
visible=
"true"
show_stripe_button=
"true"
weight=
"0.20457019"
sideWeight=
"0.5"
order=
"12"
side_tool=
"false"
content_ui=
"tabs"
/>
<window_info
id=
"Version Control"
active=
"false"
anchor=
"bottom"
auto_hide=
"false"
internal_type=
"DOCKED"
type=
"DOCKED"
visible=
"false"
show_stripe_button=
"true"
weight=
"0.33"
sideWeight=
"0.5"
order=
"1"
side_tool=
"false"
content_ui=
"tabs"
/>
<window_info
id=
"Python Console"
active=
"false"
anchor=
"bottom"
auto_hide=
"false"
internal_type=
"DOCKED"
type=
"DOCKED"
visible=
"false"
show_stripe_button=
"true"
weight=
"0.33"
sideWeight=
"0.5"
order=
"2"
side_tool=
"false"
content_ui=
"tabs"
/>
<window_info
id=
"Run"
active=
"false"
anchor=
"bottom"
auto_hide=
"false"
internal_type=
"DOCKED"
type=
"DOCKED"
visible=
"false"
show_stripe_button=
"true"
weight=
"0.33"
sideWeight=
"0.5"
order=
"7"
side_tool=
"false"
content_ui=
"tabs"
/>
<window_info
id=
"Terminal"
active=
"false"
anchor=
"bottom"
auto_hide=
"false"
internal_type=
"DOCKED"
type=
"DOCKED"
visible=
"false"
show_stripe_button=
"true"
weight=
"0.33"
sideWeight=
"0.5"
order=
"3"
side_tool=
"false"
content_ui=
"tabs"
/>
<window_info
id=
"Project"
active=
"false"
anchor=
"left"
auto_hide=
"false"
internal_type=
"DOCKED"
type=
"DOCKED"
visible=
"true"
show_stripe_button=
"true"
weight=
"0.160
9808
"
sideWeight=
"0.5"
order=
"1"
side_tool=
"false"
content_ui=
"combo"
/>
<window_info
id=
"Project"
active=
"false"
anchor=
"left"
auto_hide=
"false"
internal_type=
"DOCKED"
type=
"DOCKED"
visible=
"true"
show_stripe_button=
"true"
weight=
"0.160
44776
"
sideWeight=
"0.5"
order=
"1"
side_tool=
"false"
content_ui=
"combo"
/>
<window_info
id=
"Docker"
active=
"false"
anchor=
"bottom"
auto_hide=
"false"
internal_type=
"DOCKED"
type=
"DOCKED"
visible=
"false"
show_stripe_button=
"false"
weight=
"0.33"
sideWeight=
"0.5"
order=
"4"
side_tool=
"false"
content_ui=
"tabs"
/>
<window_info
id=
"Database"
active=
"false"
anchor=
"right"
auto_hide=
"false"
internal_type=
"DOCKED"
type=
"DOCKED"
visible=
"false"
show_stripe_button=
"true"
weight=
"0.33"
sideWeight=
"0.5"
order=
"1"
side_tool=
"false"
content_ui=
"tabs"
/>
<window_info
id=
"SciView"
active=
"false"
anchor=
"right"
auto_hide=
"false"
internal_type=
"DOCKED"
type=
"DOCKED"
visible=
"false"
show_stripe_button=
"true"
weight=
"0.33"
sideWeight=
"0.5"
order=
"0"
side_tool=
"false"
content_ui=
"tabs"
/>
<window_info
id=
"Structure"
active=
"false"
anchor=
"left"
auto_hide=
"false"
internal_type=
"DOCKED"
type=
"DOCKED"
visible=
"false"
show_stripe_button=
"true"
weight=
"0.25"
sideWeight=
"0.5"
order=
"2"
side_tool=
"false"
content_ui=
"tabs"
/>
<window_info
id=
"Debug"
active=
"false"
anchor=
"bottom"
auto_hide=
"false"
internal_type=
"DOCKED"
type=
"DOCKED"
visible=
"false"
show_stripe_button=
"true"
weight=
"0.4"
sideWeight=
"0.5"
order=
"8"
side_tool=
"false"
content_ui=
"tabs"
/>
<window_info
id=
"Favorites"
active=
"false"
anchor=
"left"
auto_hide=
"false"
internal_type=
"DOCKED"
type=
"DOCKED"
visible=
"false"
show_stripe_button=
"true"
weight=
"0.33"
sideWeight=
"0.5"
order=
"0"
side_tool=
"true"
content_ui=
"tabs"
/>
<window_info
id=
"Debug"
active=
"false"
anchor=
"bottom"
auto_hide=
"false"
internal_type=
"DOCKED"
type=
"DOCKED"
visible=
"false"
show_stripe_button=
"true"
weight=
"0.4"
sideWeight=
"0.5"
order=
"8"
side_tool=
"false"
content_ui=
"tabs"
/>
<window_info
id=
"Cvs"
active=
"false"
anchor=
"bottom"
auto_hide=
"false"
internal_type=
"DOCKED"
type=
"DOCKED"
visible=
"false"
show_stripe_button=
"true"
weight=
"0.25"
sideWeight=
"0.5"
order=
"9"
side_tool=
"false"
content_ui=
"tabs"
/>
<window_info
id=
"Message"
active=
"false"
anchor=
"bottom"
auto_hide=
"false"
internal_type=
"DOCKED"
type=
"DOCKED"
visible=
"false"
show_stripe_button=
"true"
weight=
"0.33"
sideWeight=
"0.5"
order=
"5"
side_tool=
"false"
content_ui=
"tabs"
/>
<window_info
id=
"Commander"
active=
"false"
anchor=
"right"
auto_hide=
"false"
internal_type=
"DOCKED"
type=
"DOCKED"
visible=
"false"
show_stripe_button=
"true"
weight=
"0.4"
sideWeight=
"0.5"
order=
"2"
side_tool=
"false"
content_ui=
"tabs"
/>
...
...
@@ -363,16 +361,6 @@
</state>
</provider>
</entry>
<entry
file=
"file://$PROJECT_DIR$/tensor2tensor/models/layer_history.py"
>
<provider
selected=
"true"
editor-type-id=
"text-editor"
>
<state
relative-caret-position=
"432"
>
<caret
line=
"16"
column=
"0"
lean-forward=
"false"
selection-start-line=
"16"
selection-start-column=
"0"
selection-end-line=
"16"
selection-end-column=
"0"
/>
<folding>
<element
signature=
"e#0#23#0"
expanded=
"true"
/>
</folding>
</state>
</provider>
</entry>
<entry
file=
"file://$PROJECT_DIR$/tensor2tensor/utils/multistep_optimizer.py"
>
<provider
selected=
"true"
editor-type-id=
"text-editor"
>
<state
relative-caret-position=
"459"
>
...
...
@@ -381,18 +369,29 @@
</state>
</provider>
</entry>
<entry
file=
"file://$PROJECT_DIR$/tensor2tensor/models/libei.py"
>
<entry
file=
"file://$PROJECT_DIR$/tensor2tensor/models/libei.py"
/>
<entry
file=
"file://$PROJECT_DIR$/tensor2tensor/models/common_hparams.py"
>
<provider
selected=
"true"
editor-type-id=
"text-editor"
>
<state
relative-caret-position=
"
398
"
>
<caret
line=
"
410"
column=
"32"
lean-forward=
"true"
selection-start-line=
"409"
selection-start-column=
"20"
selection-end-line=
"410"
selection-end-column=
"32
"
/>
<state
relative-caret-position=
"
540
"
>
<caret
line=
"
30"
column=
"11"
lean-forward=
"false"
selection-start-line=
"30"
selection-start-column=
"11"
selection-end-line=
"30"
selection-end-column=
"11
"
/>
<folding
/>
</state>
</provider>
</entry>
<entry
file=
"file://$PROJECT_DIR$/tensor2tensor/models/layer_history.py"
>
<provider
selected=
"true"
editor-type-id=
"text-editor"
>
<state
relative-caret-position=
"378"
>
<caret
line=
"16"
column=
"0"
lean-forward=
"false"
selection-start-line=
"16"
selection-start-column=
"0"
selection-end-line=
"16"
selection-end-column=
"0"
/>
<folding>
<element
signature=
"e#0#23#0"
expanded=
"true"
/>
</folding>
</state>
</provider>
</entry>
<entry
file=
"file://$PROJECT_DIR$/tensor2tensor/utils/trainer_utils.py"
>
<provider
selected=
"true"
editor-type-id=
"text-editor"
>
<state
relative-caret-position=
"
142
"
>
<caret
line=
"19
4"
column=
"30"
lean-forward=
"true"
selection-start-line=
"194"
selection-start-column=
"30"
selection-end-line=
"194"
selection-end-column=
"30
"
/>
<state
relative-caret-position=
"
243
"
>
<caret
line=
"19
7"
column=
"42"
lean-forward=
"false"
selection-start-line=
"197"
selection-start-column=
"42"
selection-end-line=
"197"
selection-end-column=
"42
"
/>
<folding>
<element
signature=
"e#18286#18629#1"
expanded=
"false"
/>
<element
signature=
"e#18684#18904#0"
expanded=
"false"
/>
...
...
@@ -413,29 +412,21 @@
</state>
</provider>
</entry>
<entry
file=
"file://$PROJECT_DIR$/tensor2tensor/models/transformer_dla.py"
>
<provider
selected=
"true"
editor-type-id=
"text-editor"
>
<state
relative-caret-position=
"74"
>
<caret
line=
"92"
column=
"43"
lean-forward=
"false"
selection-start-line=
"92"
selection-start-column=
"43"
selection-end-line=
"92"
selection-end-column=
"43"
/>
<folding>
<element
signature=
"e#738#776#0"
expanded=
"true"
/>
</folding>
</state>
</provider>
</entry>
<entry
file=
"file://$PROJECT_DIR$/tensor2tensor/models/common_hparams.py"
>
<entry
file=
"file://$PROJECT_DIR$/tensor2tensor/models/transformer.py"
>
<provider
selected=
"true"
editor-type-id=
"text-editor"
>
<state
relative-caret-position=
"
235
"
>
<caret
line=
"
30"
column=
"11"
lean-forward=
"true"
selection-start-line=
"30"
selection-start-column=
"11"
selection-end-line=
"30"
selection-end-column=
"11
"
/>
<state
relative-caret-position=
"
352
"
>
<caret
line=
"
294"
column=
"0"
lean-forward=
"false"
selection-start-line=
"294"
selection-start-column=
"0"
selection-end-line=
"294"
selection-end-column=
"0
"
/>
<folding
/>
</state>
</provider>
</entry>
<entry
file=
"file://$PROJECT_DIR$/tensor2tensor/models/transformer.py"
>
<entry
file=
"file://$PROJECT_DIR$/tensor2tensor/models/transformer
_dla
.py"
>
<provider
selected=
"true"
editor-type-id=
"text-editor"
>
<state
relative-caret-position=
"541"
>
<caret
line=
"364"
column=
"28"
lean-forward=
"true"
selection-start-line=
"364"
selection-start-column=
"28"
selection-end-line=
"364"
selection-end-column=
"28"
/>
<folding
/>
<state
relative-caret-position=
"-1723"
>
<caret
line=
"209"
column=
"0"
lean-forward=
"false"
selection-start-line=
"209"
selection-start-column=
"0"
selection-end-line=
"209"
selection-end-column=
"0"
/>
<folding>
<element
signature=
"e#738#776#0"
expanded=
"true"
/>
</folding>
</state>
</provider>
</entry>
...
...
tensor2tensor/models/transformer.py
查看文件 @
90010cd3
...
...
@@ -194,6 +194,9 @@ def transformer_encoder(encoder_input,
broadcast_dims
=
residual_dropout_broadcast_dims
)
x
=
residual
+
x
x
=
may_be_layernorm
(
x
,
hparams
,
after
=
True
)
if
hparams
.
normalize_before
:
x
=
may_be_layernorm
(
x
,
hparams
,
before
=
True
,
name
=
"norm_top"
)
return
x
...
...
tensor2tensor/models/transformer_dla.py
查看文件 @
90010cd3
...
...
@@ -33,7 +33,6 @@ from tensor2tensor.models import common_hparams
from
tensor2tensor.models
import
common_layers
from
tensor2tensor.utils
import
registry
from
tensor2tensor.utils
import
t2t_model
from
tensor2tensor.models
import
layer_history
import
tensorflow
as
tf
...
...
@@ -57,9 +56,6 @@ class TransformerDLA(t2t_model.T2TModel):
(
decoder_input
,
decoder_self_attention_bias
)
=
transformer_prepare_decoder
(
targets
,
hparams
)
def
residual_fn
(
x
,
y
,
dropout_broadcast_dims
=
None
):
return
common_layers
.
layer_norm
(
x
+
common_layers
.
dropout_with_broadcast_dims
(
y
,
1.0
-
hparams
.
residual_dropout
,
broadcast_dims
=
dropout_broadcast_dims
))
# encoder_input = tf.squeeze(encoder_input, 2)
# decoder_input = tf.squeeze(decoder_input, 2)
...
...
@@ -68,16 +64,19 @@ class TransformerDLA(t2t_model.T2TModel):
encoder_layer
=
layer_history
.
CreateLayerHistory
(
self
.
_hparams
,
True
,
name
=
"encoder"
)
encoder_output
=
transformer_encoder
(
encoder_input
,
residual_fn
,
encoder_output
=
transformer_encoder
(
encoder_input
,
encoder_attention_bias
,
hparams
,
encoder_layer
)
decoder_layer
=
layer_history
.
CreateLayerHistory
(
self
.
_hparams
,
False
,
name
=
"decoder"
)
decoder_output
=
transformer_decoder
(
decoder_input
,
encoder_output
,
residual_fn
,
decoder_self_attention_bias
,
decoder_input
,
encoder_output
,
decoder_self_attention_bias
,
encoder_attention_bias
,
hparams
,
decoder_layer
)
decoder_output
=
tf
.
expand_dims
(
decoder_output
,
2
)
return
decoder_output
...
...
@@ -132,8 +131,14 @@ def transformer_prepare_decoder(targets, hparams):
return
(
decoder_input
,
decoder_self_attention_bias
)
def
may_be_layernorm
(
input
,
hparams
,
before
=
False
,
after
=
False
,
name
=
None
):
assert
before
^
after
if
after
^
hparams
.
normalize_before
:
return
common_layers
.
layer_norm
(
input
,
name
=
name
)
else
:
return
input
def
transformer_encoder
(
encoder_input
,
residual_fn
,
encoder_self_attention_bias
,
hparams
,
encoder_layer
,
...
...
@@ -163,11 +168,15 @@ def transformer_encoder(encoder_input,
# Summaries don't work in multi-problem setting yet.
summaries
=
"problems"
not
in
hparams
.
values
()
or
len
(
hparams
.
problems
)
==
1
with
tf
.
variable_scope
(
name
):
for
layer
in
xrange
(
hparams
.
num_hidden_layers
):
if
hparams
.
use_emb
:
encoder_layer
.
add
(
x
)
for
layer
in
xrange
(
hparams
.
encoder_layers
):
with
tf
.
variable_scope
(
"layer_
%
d"
%
layer
):
x
=
residual_fn
(
x
,
common_attention
.
multihead_attention
(
#self-attention network
residual
=
x
x
=
may_be_layernorm
(
x
,
hparams
,
before
=
True
)
x
=
common_attention
.
multihead_attention
(
x
,
None
,
encoder_self_attention_bias
,
...
...
@@ -180,16 +189,30 @@ def transformer_encoder(encoder_input,
max_relative_length
=
hparams
.
max_relative_length
,
dropout_broadcast_dims
=
attention_dropout_broadcast_dims
,
summaries
=
False
,
name
=
"encoder_self_attention"
),
dropout_broadcast_dims
=
residual_dropout_broadcast_dims
)
x
=
residual_fn
(
x
,
transformer_ffn_layer
(
x
,
hparams
),
dropout_broadcast_dims
=
residual_dropout_broadcast_dims
)
name
=
"encoder_self_attention"
)
x
=
common_layers
.
dropout_with_broadcast_dims
(
x
,
1.0
-
hparams
.
residual_dropout
,
broadcast_dims
=
residual_dropout_broadcast_dims
)
x
=
residual
+
x
x
=
may_be_layernorm
(
x
,
hparams
,
after
=
True
)
# feed-forward network
residual
=
x
x
=
may_be_layernorm
(
x
,
hparams
,
before
=
True
)
x
=
transformer_ffn_layer
(
x
,
hparams
)
x
=
common_layers
.
dropout_with_broadcast_dims
(
x
,
1.0
-
hparams
.
residual_dropout
,
broadcast_dims
=
residual_dropout_broadcast_dims
)
x
=
residual
+
x
x
=
may_be_layernorm
(
x
,
hparams
,
after
=
True
)
if
hparams
.
normalize_before
:
x
=
may_be_layernorm
(
x
,
hparams
,
before
=
True
,
name
=
"norm_top"
)
return
x
def
transformer_decoder
(
decoder_input
,
encoder_output
,
residual_fn
,
decoder_self_attention_bias
,
encoder_decoder_attention_bias
,
hparams
,
...
...
@@ -223,11 +246,12 @@ def transformer_decoder(decoder_input,
# Summaries don't work in multi-problem setting yet.
summaries
=
"problems"
not
in
hparams
.
values
()
or
len
(
hparams
.
problems
)
==
1
with
tf
.
variable_scope
(
name
):
for
layer
in
xrange
(
hparams
.
num_hidden
_layers
):
for
layer
in
xrange
(
hparams
.
decoder
_layers
):
with
tf
.
variable_scope
(
"layer_
%
d"
%
layer
):
x
=
residual_fn
(
x
,
common_attention
.
multihead_attention
(
# self-attention network
residual
=
x
x
=
may_be_layernorm
(
x
,
hparams
,
before
=
True
)
x
=
common_attention
.
multihead_attention
(
x
,
None
,
decoder_self_attention_bias
,
...
...
@@ -240,11 +264,17 @@ def transformer_decoder(decoder_input,
max_relative_length
=
hparams
.
max_relative_length
,
dropout_broadcast_dims
=
attention_dropout_broadcast_dims
,
summaries
=
False
,
name
=
"decoder_self_attention"
),
dropout_broadcast_dims
=
residual_dropout_broadcast_dims
)
x
=
residual_fn
(
x
,
common_attention
.
multihead_attention
(
name
=
"decoder_self_attention"
)
x
=
common_layers
.
dropout_with_broadcast_dims
(
x
,
1.0
-
hparams
.
residual_dropout
,
broadcast_dims
=
residual_dropout_broadcast_dims
)
x
=
residual
+
x
x
=
may_be_layernorm
(
x
,
hparams
,
after
=
True
)
# encoder-decoder-attention network
residual
=
x
x
=
may_be_layernorm
(
x
,
hparams
,
before
=
True
)
x
=
common_attention
.
multihead_attention
(
x
,
encoder_output
,
encoder_decoder_attention_bias
,
...
...
@@ -255,10 +285,24 @@ def transformer_decoder(decoder_input,
hparams
.
attention_dropout
,
dropout_broadcast_dims
=
attention_dropout_broadcast_dims
,
summaries
=
False
,
name
=
"encdec_attention"
),
dropout_broadcast_dims
=
residual_dropout_broadcast_dims
)
x
=
residual_fn
(
x
,
transformer_ffn_layer
(
x
,
hparams
),
dropout_broadcast_dims
=
residual_dropout_broadcast_dims
)
name
=
"encdec_attention"
)
x
=
common_layers
.
dropout_with_broadcast_dims
(
x
,
1.0
-
hparams
.
residual_dropout
,
broadcast_dims
=
residual_dropout_broadcast_dims
)
x
=
residual
+
x
x
=
may_be_layernorm
(
x
,
hparams
,
after
=
True
)
# feed-forward network
residual
=
x
x
=
may_be_layernorm
(
x
,
hparams
,
before
=
True
)
x
=
transformer_ffn_layer
(
x
,
hparams
)
x
=
common_layers
.
dropout_with_broadcast_dims
(
x
,
1.0
-
hparams
.
residual_dropout
,
broadcast_dims
=
residual_dropout_broadcast_dims
)
x
=
residual
+
x
x
=
may_be_layernorm
(
x
,
hparams
,
after
=
True
)
if
hparams
.
normalize_before
:
x
=
may_be_layernorm
(
x
,
hparams
,
before
=
True
,
name
=
"norm_top"
)
return
x
...
...
@@ -332,7 +376,8 @@ def transformer_base():
hparams
.
learning_rate
=
0.1
hparams
.
learning_rate_warmup_steps
=
4000
hparams
.
initializer_gain
=
1.0
hparams
.
num_hidden_layers
=
6
hparams
.
encoder_layers
=
6
hparams
.
decoder_layers
=
6
hparams
.
initializer
=
"uniform_unit_scaling"
hparams
.
weight_decay
=
0.0
hparams
.
optimizer_adam_beta1
=
0.9
...
...
@@ -370,6 +415,7 @@ def transformer_base():
hparams
.
add_hparam
(
"attention_dropout_broadcast_dims"
,
"0,1"
)
# batch, heads
hparams
.
add_hparam
(
"relu_dropout_broadcast_dims"
,
"1"
)
# length
hparams
.
add_hparam
(
"residual_dropout_broadcast_dims"
,
"1"
)
# length
hparams
.
add_hparam
(
"normalize_before"
,
False
)
return
hparams
...
...
@@ -386,187 +432,49 @@ def transformer_big():
@registry.register_hparams
def
transformer_big_single_gpu
():
"""HParams for transformer big model for single gpu."""
hparams
=
transformer_big
()
hparams
.
residual_dropout
=
0.1
hparams
.
learning_rate_warmup_steps
=
16000
hparams
.
optimizer_adam_beta2
=
0.998
hparams
.
batching_mantissa_bits
=
3
return
hparams
@registry.register_hparams
def
transformer_base_single_gpu
():
"""HParams for transformer base model for single gpu."""
def
transformer_before
():
"""HParams for transfomer big model on WMT."""
hparams
=
transformer_base
()
hparams
.
batch_size
=
8192
hparams
.
learning_rate_warmup_steps
=
16000
hparams
.
batching_mantissa_bits
=
2
hparams
.
normalize_before
=
True
hparams
.
relu_dropout
=
0.1
hparams
.
attention_dropout
=
0.1
hparams
.
learning_rate
=
0.2
hparams
.
learning_rate_warmup_steps
=
8000
hparams
.
optimizer_adam_beta1
=
0.9
hparams
.
optimizer_adam_beta2
=
0.997
return
hparams
@registry.register_hparams
def
transformer_parsing_base
():
"""Hparams for parsing on wsj only."""
hparams
=
transformer_base
()
hparams
.
attention_dropout
=
0.2
hparams
.
residual_dropout
=
0.2
hparams
.
max_length
=
512
hparams
.
learning_rate_warmup_steps
=
16000
def
transformer_before_big
():
"""HParams for transfomer big model on WMT."""
hparams
=
transformer_before
()
hparams
.
hidden_size
=
1024
hparams
.
learning_rate
=
0.05
hparams
.
shared_embedding_and_softmax_weights
=
int
(
False
)
hparams
.
filter_size
=
4096
hparams
.
num_heads
=
16
hparams
.
batching_mantissa_bits
=
2
hparams
.
residual_dropout
=
0.3
return
hparams
@registry.register_hparams
def
transformer_
parsing_big
():
"""HParams for
parsing on wsj semi-supervised
."""
def
transformer_
big_single_gpu
():
"""HParams for
transformer big model for single gpu
."""
hparams
=
transformer_big
()
hparams
.
max_length
=
512
hparams
.
shared_source_target_embedding
=
int
(
False
)
hparams
.
learning_rate_warmup_steps
=
4000
hparams
.
residual_dropout
=
0.1
hparams
.
batch_size
=
2048
hparams
.
learning_rate
=
0.05
return
hparams
@registry.register_hparams
def
transformer_parsing_ice
():
"""Hparams for parsing Icelandic text."""
hparams
=
transformer_base_single_gpu
()
hparams
.
batch_size
=
4096
hparams
.
shared_embedding_and_softmax_weights
=
int
(
False
)
return
hparams
@registry.register_hparams
def
transformer_tiny
():
hparams
=
transformer_base
()
hparams
.
hidden_size
=
64
hparams
.
filter_size
=
128
hparams
.
num_heads
=
4
return
hparams
@registry.register_hparams
def
transformer_l2
():
hparams
=
transformer_base
()
hparams
.
num_hidden_layers
=
2
return
hparams
@registry.register_hparams
def
transformer_l4
():
hparams
=
transformer_base
()
hparams
.
num_hidden_layers
=
4
return
hparams
@registry.register_hparams
def
transformer_l8
():
hparams
=
transformer_base
()
hparams
.
num_hidden_layers
=
8
return
hparams
@registry.register_hparams
def
transformer_h1
():
hparams
=
transformer_base
()
hparams
.
num_heads
=
1
return
hparams
@registry.register_hparams
def
transformer_h4
():
hparams
=
transformer_base
()
hparams
.
num_heads
=
4
return
hparams
@registry.register_hparams
def
transformer_h16
():
hparams
=
transformer_base
()
hparams
.
num_heads
=
16
return
hparams
@registry.register_hparams
def
transformer_h32
():
hparams
=
transformer_base
()
hparams
.
num_heads
=
32
return
hparams
@registry.register_hparams
def
transformer_k128
():
hparams
=
transformer_base
()
hparams
.
attention_key_channels
=
128
return
hparams
@registry.register_hparams
def
transformer_k256
():
hparams
=
transformer_base
()
hparams
.
attention_key_channels
=
256
return
hparams
@registry.register_hparams
def
transformer_ff1024
():
hparams
=
transformer_base
()
hparams
.
filter_size
=
1024
return
hparams
@registry.register_hparams
def
transformer_ff4096
():
hparams
=
transformer_base
()
hparams
.
filter_size
=
4096
return
hparams
@registry.register_hparams
def
transformer_dr0
():
hparams
=
transformer_base
()
hparams
.
residual_dropout
=
0.0
return
hparams
@registry.register_hparams
def
transformer_dr2
():
hparams
=
transformer_base
()
hparams
.
residual_dropout
=
0.2
return
hparams
@registry.register_hparams
def
transformer_ls0
():
hparams
=
transformer_base
()
hparams
.
label_smoothing
=
0.0
return
hparams
@registry.register_hparams
def
transformer_ls2
():
hparams
=
transformer_base
()
hparams
.
label_smoothing
=
0.2
return
hparams
@registry.register_hparams
def
transformer_hs256
():
hparams
=
transformer_base
()
hparams
.
hidden_size
=
256
hparams
.
learning_rate_warmup_steps
=
16000
hparams
.
optimizer_adam_beta2
=
0.998
hparams
.
batching_mantissa_bits
=
3
return
hparams
@registry.register_hparams
def
transformer_hs1024
():
def
transformer_base_single_gpu
():
"""HParams for transformer base model for single gpu."""
hparams
=
transformer_base
()
hparams
.
hidden_size
=
1024
hparams
.
batch_size
=
8192
hparams
.
learning_rate_warmup_steps
=
16000
hparams
.
batching_mantissa_bits
=
2
return
hparams
...
...
@@ -598,36 +506,6 @@ def transformer_big_dr2():
@registry.register_hparams
def
transformer_parameter_attention_a
():
hparams
=
transformer_base
()
hparams
.
ffn_layer
=
"parameter_attention"
hparams
.
filter_size
=
1536
return
hparams
@registry.register_hparams
def
transformer_parameter_attention_b
():
hparams
=
transformer_base
()
hparams
.
ffn_layer
=
"parameter_attention"
hparams
.
filter_size
=
512
hparams
.
parameter_attention_key_channels
=
1024
hparams
.
parameter_attention_value_channels
=
1024
hparams
.
num_heads
=
16
return
hparams
@registry.register_ranged_hparams
(
"transformer_big_single_gpu"
)
def
transformer_range1
(
rhp
):
"""Small range of hyperparameters."""
hparams
=
transformer_big_single_gpu
()
common_hparams
.
fill_ranged_hparams_from_hparams
(
hparams
,
rhp
)
rhp
.
set_float
(
"learning_rate"
,
0.3
,
3.0
,
scale
=
rhp
.
LOG_SCALE
)
rhp
.
set_float
(
"initializer_gain"
,
0.5
,
2.0
)
rhp
.
set_float
(
"optimizer_adam_beta2"
,
0.97
,
0.99
)
rhp
.
set_float
(
"weight_decay"
,
0.0
,
2.0
)
@registry.register_hparams
def
transformer_base_ldcd
():
"""Set of hyperparameters."""
hparams
=
transformer_base
()
...
...
@@ -663,14 +541,6 @@ def transformer_base_ldcd_n1():
hparams
.
learning_rate_warmup_steps
=
4000
return
hparams
@registry.register_hparams
def
transformer_base_nosmooth_dropout1
():
"""Set of hyperparameters."""
hparams
=
transformer_base
()
hparams
.
label_smoothing
=
0.0
hparams
.
relu_dropout
=
0.1
hparams
.
attention_dropout
=
0.1
return
hparams
@registry.register_hparams
def
transformer_base_amsgrad
():
...
...
@@ -722,7 +592,6 @@ def transformer_base_ldrestart_n3():
@registry.register_hparams
def
transformer_base_powersign
():
"""Set of hyperparameters."""
...
...
@@ -817,15 +686,6 @@ def transformer_big_adafactor():
hparams
.
optimizer_adafactor_beta2
=
0.997
return
hparams
@registry.register_hparams
def
transformer_base_debug
():
hparams
=
transformer_big_adafactor
()
hparams
.
num_hidden_layers
=
3
hparams
.
fused_inner_hidden
=
128
hparams
.
hidden_size
=
64
hparams
.
filter_size
=
128
hparams
.
batch_size
=
128
return
hparams
@registry.register_hparams
def
transformer_base_v2
():
...
...
@@ -848,7 +708,7 @@ def transformer_base_rpr_dropout1():
@registry.register_hparams
def
transformer_base_v
2_filter4096
():
def
transformer_base_v
3
():
"""Set of hyperparameters.
set filter as 4096
"""
...
...
@@ -856,42 +716,7 @@ def transformer_base_v2_filter4096():
hparams
.
filter_size
=
4096
return
hparams
@registry.register_hparams
def
transformer_base_filter4096
():
"""Set of hyperparameters.
set filter as 4096
"""
hparams
=
transformer_base
()
hparams
.
filter_size
=
4096
return
hparams
@registry.register_hparams
def
transformer_base_v2_dropout2_filter4096
():
"""Set of hyperparameters.
set relu_dropout and attention_dropout as 0.2
"""
hparams
=
transformer_base
()
hparams
.
attention_dropout
=
0.2
hparams
.
relu_dropout
=
0.2
hparams
.
filter_size
=
4096
return
hparams
@registry.register_hparams
def
transformer_base_lr2
():
"""Set of hyperparameters.
set relu_dropout and attention_dropout as 0.2
"""
hparams
=
transformer_base
()
hparams
.
learning_rate
=
0.2
return
hparams
@registry.register_hparams
def
transformer_base_multistep2
():
# new model use optimizer MultistepAdam
hparams
=
transformer_base
()
hparams
.
optimizer
=
"MultistepAdam"
hparams
.
optimizer_multistep_accumulate_steps
=
2
return
hparams
@registry.register_hparams
def
transformer_big_multistep2
():
...
...
@@ -914,14 +739,3 @@ def transformer_big_adafactor_test():
hparams
.
optimizer_adafactor_beta2
=
0.999
return
hparams
@registry.register_hparams
def
transformer_mobile
():
# new model use optimizer MultistepAdam
hparams
=
transformer_base
()
hparams
.
hidden_size
=
256
hparams
.
num_hidden_layers
=
4
hparams
.
residual_dropout
=
0.1
hparams
.
dropout
=
0.1
return
hparams
编写
预览
Markdown
格式
0%
重试
或
添加新文件
添加附件
取消
您添加了
0
人
到此讨论。请谨慎行事。
请先完成此评论的编辑!
取消
请
注册
或者
登录
后发表评论