Skip to content
项目
群组
代码片段
帮助
当前项目
正在载入...
登录 / 注册
切换导航面板
W
WMT19-1.0.14
概览
Overview
Details
Activity
Cycle Analytics
版本库
Repository
Files
Commits
Branches
Tags
Contributors
Graph
Compare
Charts
问题
0
Issues
0
列表
Board
标记
里程碑
合并请求
0
Merge Requests
0
CI / CD
CI / CD
流水线
作业
日程表
图表
维基
Wiki
代码片段
Snippets
成员
Collapse sidebar
Close sidebar
活动
图像
聊天
创建新问题
作业
提交
Issue Boards
Open sidebar
Emmay
WMT19-1.0.14
Commits
81667eab
Commit
81667eab
authored
Feb 18, 2019
by
libei
Browse files
Options
Browse Files
Download
Email Patches
Plain Diff
remove no use file and import settings
parent
42676b59
显示空白字符变更
内嵌
并排
正在显示
5 个修改的文件
包含
46 行增加
和
526 行删除
+46
-526
.idea/workspace.xml
+46
-127
tensor2tensor/models/__pycache__/transformer_dropout.cpython-35.pyc
+0
-0
tensor2tensor/models/__pycache__/transformer_dropout.cpython-36.pyc
+0
-0
tensor2tensor/models/models.py
+0
-2
tensor2tensor/models/transformer_dropout.py
+0
-397
没有找到文件。
.idea/workspace.xml
查看文件 @
81667eab
...
...
@@ -3,9 +3,8 @@
<component
name=
"ChangeListManager"
>
<list
default=
"true"
id=
"7d6d9926-f879-4708-ad8e-442bac96b62a"
name=
"Default"
comment=
""
>
<change
beforePath=
"$PROJECT_DIR$/.idea/workspace.xml"
afterPath=
"$PROJECT_DIR$/.idea/workspace.xml"
/>
<change
beforePath=
"$PROJECT_DIR$/tensor2tensor/models/__init__.py"
afterPath=
"$PROJECT_DIR$/tensor2tensor/models/__init__.py"
/>
<change
beforePath=
"$PROJECT_DIR$/tensor2tensor/models/models.py"
afterPath=
"$PROJECT_DIR$/tensor2tensor/models/models.py"
/>
<change
beforePath=
"$PROJECT_DIR$/tensor2tensor/models/transformer_d
la.py"
afterPath=
"$PROJECT_DIR$/tensor2tensor/models/transformer_dla.py
"
/>
<change
beforePath=
"$PROJECT_DIR$/tensor2tensor/models/transformer_d
ropout.py"
afterPath=
"
"
/>
</list>
<option
name=
"EXCLUDED_CONVERTED_TO_IGNORED"
value=
"true"
/>
<option
name=
"TRACKING_ENABLED"
value=
"true"
/>
...
...
@@ -26,102 +25,26 @@
</provider>
</entry>
</file>
<file
leaf-file-name=
"__init__.py"
pinned=
"false"
current-in-tab=
"false"
>
<entry
file=
"file://$PROJECT_DIR$/tensor2tensor/models/__init__.py"
>
<provider
selected=
"true"
editor-type-id=
"text-editor"
>
<state
relative-caret-position=
"351"
>
<caret
line=
"13"
column=
"0"
lean-forward=
"true"
selection-start-line=
"13"
selection-start-column=
"0"
selection-end-line=
"13"
selection-end-column=
"0"
/>
<folding
/>
</state>
</provider>
</entry>
</file>
<file
leaf-file-name=
"models.py"
pinned=
"false"
current-in-tab=
"false"
>
<entry
file=
"file://$PROJECT_DIR$/tensor2tensor/models/models.py"
>
<provider
selected=
"true"
editor-type-id=
"text-editor"
>
<state
relative-caret-position=
"537"
>
<caret
line=
"41"
column=
"40"
lean-forward=
"false"
selection-start-line=
"41"
selection-start-column=
"40"
selection-end-line=
"41"
selection-end-column=
"40"
/>
<folding
/>
</state>
</provider>
</entry>
</file>
<file
leaf-file-name=
"__init__.py"
pinned=
"false"
current-in-tab=
"false"
>
<entry
file=
"file://$PROJECT_DIR$/tensor2tensor/utils/__init__.py"
>
<provider
selected=
"true"
editor-type-id=
"text-editor"
>
<state
relative-caret-position=
"378"
>
<caret
line=
"14"
column=
"0"
lean-forward=
"true"
selection-start-line=
"14"
selection-start-column=
"0"
selection-end-line=
"14"
selection-end-column=
"0"
/>
<folding
/>
</state>
</provider>
</entry>
</file>
<file
leaf-file-name=
"transformer_dla.py"
pinned=
"false"
current-in-tab=
"true"
>
<file
leaf-file-name=
"transformer_dla.py"
pinned=
"false"
current-in-tab=
"false"
>
<entry
file=
"file://$PROJECT_DIR$/tensor2tensor/models/transformer_dla.py"
>
<provider
selected=
"true"
editor-type-id=
"text-editor"
>
<state
relative-caret-position=
"1323"
>
<caret
line=
"49"
column=
"50"
lean-forward=
"true"
selection-start-line=
"49"
selection-start-column=
"50"
selection-end-line=
"49"
selection-end-column=
"50"
/>
<folding>
<element
signature=
"e#738#776#0"
expanded=
"true"
/>
</folding>
</state>
</provider>
</entry>
</file>
<file
leaf-file-name=
"transformer_alternative.py"
pinned=
"false"
current-in-tab=
"false"
>
<entry
file=
"file://$PROJECT_DIR$/tensor2tensor/models/transformer_alternative.py"
>
<provider
selected=
"true"
editor-type-id=
"text-editor"
>
<state
relative-caret-position=
"302"
>
<caret
line=
"167"
column=
"4"
lean-forward=
"false"
selection-start-line=
"167"
selection-start-column=
"4"
selection-end-line=
"167"
selection-end-column=
"4"
/>
<state
relative-caret-position=
"216"
>
<caret
line=
"35"
column=
"46"
lean-forward=
"true"
selection-start-line=
"35"
selection-start-column=
"46"
selection-end-line=
"35"
selection-end-column=
"46"
/>
<folding
/>
</state>
</provider>
</entry>
</file>
<file
leaf-file-name=
"transformer_mlrf.py"
pinned=
"false"
current-in-tab=
"false"
>
<entry
file=
"file://$PROJECT_DIR$/tensor2tensor/models/transformer_mlrf.py"
>
<provider
selected=
"true"
editor-type-id=
"text-editor"
>
<state
relative-caret-position=
"493"
>
<caret
line=
"39"
column=
"21"
lean-forward=
"true"
selection-start-line=
"39"
selection-start-column=
"21"
selection-end-line=
"39"
selection-end-column=
"21"
/>
<folding
/>
</state>
</provider>
</entry>
</file>
<file
leaf-file-name=
"transformer_dropout.py"
pinned=
"false"
current-in-tab=
"false"
>
<entry
file=
"file://$PROJECT_DIR$/tensor2tensor/models/transformer_dropout.py"
>
<file
leaf-file-name=
"models.py"
pinned=
"false"
current-in-tab=
"true"
>
<entry
file=
"file://$PROJECT_DIR$/tensor2tensor/models/models.py"
>
<provider
selected=
"true"
editor-type-id=
"text-editor"
>
<state
relative-caret-position=
"
-2834
"
>
<caret
line=
"
0"
column=
"0"
lean-forward=
"false"
selection-start-line=
"0"
selection-start-column=
"0"
selection-end-line=
"0"
selection-end-column=
"0
"
/>
<state
relative-caret-position=
"
408
"
>
<caret
line=
"
37"
column=
"46"
lean-forward=
"false"
selection-start-line=
"37"
selection-start-column=
"46"
selection-end-line=
"37"
selection-end-column=
"46
"
/>
<folding
/>
</state>
</provider>
</entry>
</file>
<file
leaf-file-name=
"xception.py"
pinned=
"false"
current-in-tab=
"false"
>
<entry
file=
"file://$PROJECT_DIR$/tensor2tensor/models/xception.py"
>
<provider
selected=
"true"
editor-type-id=
"text-editor"
>
<state
relative-caret-position=
"-972"
>
<caret
line=
"0"
column=
"0"
lean-forward=
"false"
selection-start-line=
"0"
selection-start-column=
"0"
selection-end-line=
"0"
selection-end-column=
"0"
/>
<folding>
<element
signature=
"e#608#646#0"
expanded=
"true"
/>
</folding>
</state>
</provider>
</entry>
</file>
<file
leaf-file-name=
"attention_lm.py"
pinned=
"false"
current-in-tab=
"false"
>
<entry
file=
"file://$PROJECT_DIR$/tensor2tensor/models/attention_lm.py"
>
<provider
selected=
"true"
editor-type-id=
"text-editor"
>
<state
relative-caret-position=
"567"
>
<caret
line=
"45"
column=
"27"
lean-forward=
"true"
selection-start-line=
"45"
selection-start-column=
"27"
selection-end-line=
"45"
selection-end-column=
"27"
/>
<folding>
<element
signature=
"e#719#757#0"
expanded=
"true"
/>
</folding>
</state>
</provider>
</entry>
</file>
</leaf>
</component>
<component
name=
"FileTemplateManagerImpl"
>
...
...
@@ -171,7 +94,7 @@
<detection-done>
true
</detection-done>
<sorting>
DEFINITION_ORDER
</sorting>
</component>
<component
name=
"ProjectFrameBounds"
extendedState=
"
7
"
>
<component
name=
"ProjectFrameBounds"
extendedState=
"
6
"
>
<option
name=
"x"
value=
"22"
/>
<option
name=
"y"
value=
"5"
/>
<option
name=
"width"
value=
"1909"
/>
...
...
@@ -267,7 +190,7 @@
<servers
/>
</component>
<component
name=
"ToolWindowManager"
>
<frame
x=
"-8"
y=
"-8"
width=
"1936"
height=
"1056"
extended-state=
"
7
"
/>
<frame
x=
"-8"
y=
"-8"
width=
"1936"
height=
"1056"
extended-state=
"
6
"
/>
<editor
active=
"true"
/>
<layout>
<window_info
id=
"TODO"
active=
"false"
anchor=
"bottom"
auto_hide=
"false"
internal_type=
"DOCKED"
type=
"DOCKED"
visible=
"false"
show_stripe_button=
"true"
weight=
"0.33"
sideWeight=
"0.5"
order=
"11"
side_tool=
"false"
content_ui=
"tabs"
/>
...
...
@@ -324,9 +247,7 @@
<provider
selected=
"true"
editor-type-id=
"text-editor"
>
<state
relative-caret-position=
"2484"
>
<caret
line=
"92"
column=
"43"
lean-forward=
"false"
selection-start-line=
"92"
selection-start-column=
"43"
selection-end-line=
"92"
selection-end-column=
"43"
/>
<folding>
<element
signature=
"e#738#776#0"
expanded=
"true"
/>
</folding>
<folding
/>
</state>
</provider>
</entry>
...
...
@@ -500,18 +421,18 @@
</state>
</provider>
</entry>
<entry
file=
"file://$PROJECT_DIR$/tensor2tensor/models/transformer_
alternative
.py"
>
<entry
file=
"file://$PROJECT_DIR$/tensor2tensor/models/transformer_
mlrf
.py"
>
<provider
selected=
"true"
editor-type-id=
"text-editor"
>
<state
relative-caret-position=
"
302
"
>
<caret
line=
"
167"
column=
"4"
lean-forward=
"false"
selection-start-line=
"167"
selection-start-column=
"4"
selection-end-line=
"167"
selection-end-column=
"4
"
/>
<state
relative-caret-position=
"
493
"
>
<caret
line=
"
39"
column=
"21"
lean-forward=
"true"
selection-start-line=
"39"
selection-start-column=
"21"
selection-end-line=
"39"
selection-end-column=
"21
"
/>
<folding
/>
</state>
</provider>
</entry>
<entry
file=
"file://$PROJECT_DIR$/tensor2tensor/models/transformer.py"
>
<entry
file=
"file://$PROJECT_DIR$/tensor2tensor/models/transformer
_libei
.py"
>
<provider
selected=
"true"
editor-type-id=
"text-editor"
>
<state
relative-caret-position=
"
162
"
>
<caret
line=
"
40"
column=
"12"
lean-forward=
"true"
selection-start-line=
"40"
selection-start-column=
"12"
selection-end-line=
"40"
selection-end-column=
"12
"
/>
<state
relative-caret-position=
"
0
"
>
<caret
line=
"
0"
column=
"0"
lean-forward=
"false"
selection-start-line=
"0"
selection-start-column=
"0"
selection-end-line=
"0"
selection-end-column=
"0
"
/>
<folding
/>
</state>
</provider>
...
...
@@ -524,36 +445,26 @@
</state>
</provider>
</entry>
<entry
file=
"file://$PROJECT_DIR$/tensor2tensor/models/attention_lm.py"
>
<provider
selected=
"true"
editor-type-id=
"text-editor"
>
<state
relative-caret-position=
"567"
>
<caret
line=
"45"
column=
"27"
lean-forward=
"true"
selection-start-line=
"45"
selection-start-column=
"27"
selection-end-line=
"45"
selection-end-column=
"27"
/>
<folding>
<element
signature=
"e#719#757#0"
expanded=
"true"
/>
</folding>
</state>
</provider>
</entry>
<entry
file=
"file://$PROJECT_DIR$/tensor2tensor/models/transformer_mlrf.py"
>
<entry
file=
"file://$PROJECT_DIR$/tensor2tensor/models/__init__.py"
>
<provider
selected=
"true"
editor-type-id=
"text-editor"
>
<state
relative-caret-position=
"
493
"
>
<caret
line=
"
39"
column=
"21"
lean-forward=
"true"
selection-start-line=
"39"
selection-start-column=
"21"
selection-end-line=
"39"
selection-end-column=
"21
"
/>
<state
relative-caret-position=
"
351
"
>
<caret
line=
"
13"
column=
"0"
lean-forward=
"true"
selection-start-line=
"13"
selection-start-column=
"0"
selection-end-line=
"13"
selection-end-column=
"0
"
/>
<folding
/>
</state>
</provider>
</entry>
<entry
file=
"file://$PROJECT_DIR$/tensor2tensor/models/transformer
_dropout
.py"
>
<entry
file=
"file://$PROJECT_DIR$/tensor2tensor/models/transformer.py"
>
<provider
selected=
"true"
editor-type-id=
"text-editor"
>
<state
relative-caret-position=
"
-2834
"
>
<caret
line=
"
0"
column=
"0"
lean-forward=
"false"
selection-start-line=
"0"
selection-start-column=
"0"
selection-end-line=
"0"
selection-end-column=
"0
"
/>
<state
relative-caret-position=
"
162
"
>
<caret
line=
"
40"
column=
"12"
lean-forward=
"true"
selection-start-line=
"40"
selection-start-column=
"12"
selection-end-line=
"40"
selection-end-column=
"12
"
/>
<folding
/>
</state>
</provider>
</entry>
<entry
file=
"file://$PROJECT_DIR$/tensor2tensor/models/transformer_
libei
.py"
>
<entry
file=
"file://$PROJECT_DIR$/tensor2tensor/models/transformer_
alternative
.py"
>
<provider
selected=
"true"
editor-type-id=
"text-editor"
>
<state
relative-caret-position=
"
0
"
>
<caret
line=
"
0"
column=
"0"
lean-forward=
"false"
selection-start-line=
"0"
selection-start-column=
"0"
selection-end-line=
"0"
selection-end-column=
"0
"
/>
<state
relative-caret-position=
"
302
"
>
<caret
line=
"
167"
column=
"4"
lean-forward=
"false"
selection-start-line=
"167"
selection-start-column=
"4"
selection-end-line=
"167"
selection-end-column=
"4
"
/>
<folding
/>
</state>
</provider>
...
...
@@ -568,29 +479,37 @@
</state>
</provider>
</entry>
<entry
file=
"file://$PROJECT_DIR$/tensor2tensor/models/
__init__
.py"
>
<entry
file=
"file://$PROJECT_DIR$/tensor2tensor/models/
attention_lm
.py"
>
<provider
selected=
"true"
editor-type-id=
"text-editor"
>
<state
relative-caret-position=
"351"
>
<caret
line=
"13"
column=
"0"
lean-forward=
"true"
selection-start-line=
"13"
selection-start-column=
"0"
selection-end-line=
"13"
selection-end-column=
"0"
/>
<folding
/>
<state
relative-caret-position=
"567"
>
<caret
line=
"45"
column=
"27"
lean-forward=
"true"
selection-start-line=
"45"
selection-start-column=
"27"
selection-end-line=
"45"
selection-end-column=
"27"
/>
<folding>
<element
signature=
"e#719#757#0"
expanded=
"true"
/>
</folding>
</state>
</provider>
</entry>
<entry
file=
"file://$PROJECT_DIR$/tensor2tensor/models/
models
.py"
>
<entry
file=
"file://$PROJECT_DIR$/tensor2tensor/models/
transformer_dropout
.py"
>
<provider
selected=
"true"
editor-type-id=
"text-editor"
>
<state
relative-caret-position=
"
537
"
>
<caret
line=
"
41"
column=
"40"
lean-forward=
"false"
selection-start-line=
"41"
selection-start-column=
"40"
selection-end-line=
"41
"
selection-end-column=
"40"
/>
<state
relative-caret-position=
"
135
"
>
<caret
line=
"
162"
column=
"40"
lean-forward=
"false"
selection-start-line=
"162"
selection-start-column=
"40"
selection-end-line=
"162
"
selection-end-column=
"40"
/>
<folding
/>
</state>
</provider>
</entry>
<entry
file=
"file://$PROJECT_DIR$/tensor2tensor/models/transformer_dla.py"
>
<provider
selected=
"true"
editor-type-id=
"text-editor"
>
<state
relative-caret-position=
"1323"
>
<caret
line=
"49"
column=
"50"
lean-forward=
"true"
selection-start-line=
"49"
selection-start-column=
"50"
selection-end-line=
"49"
selection-end-column=
"50"
/>
<folding>
<element
signature=
"e#738#776#0"
expanded=
"true"
/>
</folding>
<state
relative-caret-position=
"216"
>
<caret
line=
"35"
column=
"46"
lean-forward=
"true"
selection-start-line=
"35"
selection-start-column=
"46"
selection-end-line=
"35"
selection-end-column=
"46"
/>
<folding
/>
</state>
</provider>
</entry>
<entry
file=
"file://$PROJECT_DIR$/tensor2tensor/models/models.py"
>
<provider
selected=
"true"
editor-type-id=
"text-editor"
>
<state
relative-caret-position=
"408"
>
<caret
line=
"37"
column=
"46"
lean-forward=
"false"
selection-start-line=
"37"
selection-start-column=
"46"
selection-end-line=
"37"
selection-end-column=
"46"
/>
<folding
/>
</state>
</provider>
</entry>
...
...
tensor2tensor/models/__pycache__/transformer_dropout.cpython-35.pyc
deleted
100644 → 0
查看文件 @
42676b59
File deleted
tensor2tensor/models/__pycache__/transformer_dropout.cpython-36.pyc
deleted
100644 → 0
查看文件 @
42676b59
File deleted
tensor2tensor/models/models.py
查看文件 @
81667eab
...
...
@@ -36,8 +36,6 @@ from tensor2tensor.models import transformer
from
tensor2tensor.models
import
transformer_alternative
from
tensor2tensor.models
import
xception
from
tensor2tensor.models
import
transformer_mlrf
#from tensor2tensor.models import transformer_fix
from
tensor2tensor.models
import
transformer_relative_pos
from
tensor2tensor.models
import
transformer_dropout
from
tensor2tensor.models
import
transformer_dla
# pylint: enable=unused-import
tensor2tensor/models/transformer_dropout.py
deleted
100644 → 0
查看文件 @
42676b59
# Copyright 2017 The Tensor2Tensor Authors.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
"""transformer (attention).
encoder: [Self-Attention, Feed-forward] x n
decoder: [Self-Attention, Source-Target-Attention, Feed-forward] x n
"""
from
__future__
import
absolute_import
from
__future__
import
division
from
__future__
import
print_function
import
copy
# Dependency imports
from
six.moves
import
xrange
# pylint: disable=redefined-builtin
from
tensor2tensor.models
import
common_attention
from
tensor2tensor.models
import
common_hparams
from
tensor2tensor.models
import
common_layers
from
tensor2tensor.utils
import
registry
from
tensor2tensor.utils
import
t2t_model
import
tensorflow
as
tf
@registry.register_model
class
TransformerDropout
(
t2t_model
.
T2TModel
):
"""Attention net. See file docstring."""
def
model_fn_body
(
self
,
features
):
# Remove dropout if not training
hparams
=
copy
.
copy
(
self
.
_hparams
)
targets
=
features
[
"targets"
]
inputs
=
features
.
get
(
"inputs"
)
target_space
=
features
.
get
(
"target_space_id"
)
inputs
=
common_layers
.
flatten4d3d
(
inputs
)
targets
=
common_layers
.
flatten4d3d
(
targets
)
(
encoder_input
,
encoder_attention_bias
,
_
)
=
(
transformer_prepare_encoder
(
inputs
,
target_space
,
hparams
))
(
decoder_input
,
decoder_self_attention_bias
)
=
transformer_prepare_decoder
(
targets
,
hparams
)
def
residual_fn
(
x
,
y
,
dropout_broadcast_dims
=
None
):
return
common_layers
.
layer_norm
(
x
+
common_layers
.
dropout_with_broadcast_dims
(
y
,
1.0
-
hparams
.
residual_dropout
,
broadcast_dims
=
dropout_broadcast_dims
))
# encoder_input = tf.squeeze(encoder_input, 2)
# decoder_input = tf.squeeze(decoder_input, 2)
encoder_input
=
tf
.
nn
.
dropout
(
encoder_input
,
1.0
-
hparams
.
residual_dropout
)
decoder_input
=
tf
.
nn
.
dropout
(
decoder_input
,
1.0
-
hparams
.
residual_dropout
)
encoder_output
=
transformer_encoder
(
encoder_input
,
residual_fn
,
encoder_attention_bias
,
hparams
)
decoder_output
=
transformer_decoder
(
decoder_input
,
encoder_output
,
residual_fn
,
decoder_self_attention_bias
,
encoder_attention_bias
,
hparams
)
decoder_output
=
tf
.
expand_dims
(
decoder_output
,
2
)
return
decoder_output
def
transformer_prepare_encoder
(
inputs
,
target_space
,
hparams
):
"""Prepare one shard of the model for the encoder.
Args:
inputs: a Tensor.
target_space: a Tensor.
hparams: run hyperparameters
Returns:
encoder_input: a Tensor, bottom of encoder stack
encoder_self_attention_bias: a Tensor, containing large negative values
to implement masked attention and possibly baises for diagonal
alignments
encoder_padding: a Tensor
"""
# Flatten inputs.
ishape_static
=
inputs
.
shape
.
as_list
()
encoder_input
=
inputs
encoder_padding
=
common_attention
.
embedding_to_padding
(
encoder_input
)
encoder_self_attention_bias
=
common_attention
.
attention_bias_ignore_padding
(
encoder_padding
)
# Append target_space_id embedding to inputs.
emb_target_space
=
common_layers
.
embedding
(
target_space
,
32
,
ishape_static
[
-
1
],
name
=
"target_space_embedding"
)
emb_target_space
=
tf
.
reshape
(
emb_target_space
,
[
1
,
1
,
-
1
])
encoder_input
+=
emb_target_space
if
hparams
.
pos
==
"timing"
:
encoder_input
=
common_attention
.
add_timing_signal_1d
(
encoder_input
)
return
(
encoder_input
,
encoder_self_attention_bias
,
encoder_padding
)
def
transformer_prepare_decoder
(
targets
,
hparams
):
"""Prepare one shard of the model for the decoder.
Args:
targets: a Tensor.
hparams: run hyperparameters
Returns:
decoder_input: a Tensor, bottom of decoder stack
decoder_self_attention_bias: a Tensor, containing large negative values
to implement masked attention and possibly baises for diagonal alignments
"""
decoder_self_attention_bias
=
(
common_attention
.
attention_bias_lower_triangle
(
tf
.
shape
(
targets
)[
1
]))
decoder_input
=
common_layers
.
shift_left_3d
(
targets
)
if
hparams
.
pos
==
"timing"
:
decoder_input
=
common_attention
.
add_timing_signal_1d
(
decoder_input
)
return
(
decoder_input
,
decoder_self_attention_bias
)
def
transformer_encoder
(
encoder_input
,
residual_fn
,
encoder_self_attention_bias
,
hparams
,
name
=
"encoder"
):
"""A stack of transformer layers.
Args:
encoder_input: a Tensor
residual_fn: a function from (layer_input, layer_output) -> combined_output
encoder_self_attention_bias: bias Tensor for self-attention
(see common_attention.attention_bias())
hparams: hyperparameters for model
name: a string
Returns:
y: a Tensors
"""
x
=
encoder_input
residual_dropout_broadcast_dims
=
(
common_layers
.
comma_separated_string_to_integer_list
(
getattr
(
hparams
,
"residual_dropout_broadcast_dims"
,
""
))
)
attention_dropout_broadcast_dims
=
(
common_layers
.
comma_separated_string_to_integer_list
(
getattr
(
hparams
,
"attention_dropout_broadcast_dims"
,
""
)))
# Summaries don't work in multi-problem setting yet.
summaries
=
"problems"
not
in
hparams
.
values
()
or
len
(
hparams
.
problems
)
==
1
with
tf
.
variable_scope
(
name
):
for
layer
in
xrange
(
hparams
.
num_hidden_layers
):
with
tf
.
variable_scope
(
"layer_
%
d"
%
layer
):
x
=
residual_fn
(
x
,
common_attention
.
multihead_attention_broadcast_dropout
(
x
,
None
,
encoder_self_attention_bias
,
hparams
.
attention_key_channels
or
hparams
.
hidden_size
,
hparams
.
attention_value_channels
or
hparams
.
hidden_size
,
hparams
.
hidden_size
,
hparams
.
num_heads
,
hparams
.
attention_dropout
,
attention_type
=
hparams
.
attention_type
,
max_relative_length
=
hparams
.
max_relative_length
,
dropout_broadcast_dims
=
attention_dropout_broadcast_dims
,
summaries
=
False
,
name
=
"encoder_self_attention"
),
dropout_broadcast_dims
=
residual_dropout_broadcast_dims
)
x
=
residual_fn
(
x
,
transformer_ffn_layer
(
x
,
hparams
),
dropout_broadcast_dims
=
residual_dropout_broadcast_dims
)
return
x
def
transformer_decoder
(
decoder_input
,
encoder_output
,
residual_fn
,
decoder_self_attention_bias
,
encoder_decoder_attention_bias
,
hparams
,
name
=
"decoder"
):
"""A stack of transformer layers.
Args:
decoder_input: a Tensor
encoder_output: a Tensor
residual_fn: a function from (layer_input, layer_output) -> combined_output
decoder_self_attention_bias: bias Tensor for self-attention
(see common_attention.attention_bias())
encoder_decoder_attention_bias: bias Tensor for encoder-decoder attention
(see common_attention.attention_bias())
hparams: hyperparameters for model
name: a string
Returns:
y: a Tensors
"""
x
=
decoder_input
residual_dropout_broadcast_dims
=
(
common_layers
.
comma_separated_string_to_integer_list
(
getattr
(
hparams
,
"residual_dropout_broadcast_dims"
,
""
))
)
attention_dropout_broadcast_dims
=
(
common_layers
.
comma_separated_string_to_integer_list
(
getattr
(
hparams
,
"attention_dropout_broadcast_dims"
,
""
)))
# Summaries don't work in multi-problem setting yet.
summaries
=
"problems"
not
in
hparams
.
values
()
or
len
(
hparams
.
problems
)
==
1
with
tf
.
variable_scope
(
name
):
for
layer
in
xrange
(
hparams
.
num_hidden_layers
):
with
tf
.
variable_scope
(
"layer_
%
d"
%
layer
):
x
=
residual_fn
(
x
,
common_attention
.
multihead_attention_broadcast_dropout
(
x
,
None
,
decoder_self_attention_bias
,
hparams
.
attention_key_channels
or
hparams
.
hidden_size
,
hparams
.
attention_value_channels
or
hparams
.
hidden_size
,
hparams
.
hidden_size
,
hparams
.
num_heads
,
hparams
.
attention_dropout
,
attention_type
=
hparams
.
attention_type
,
max_relative_length
=
hparams
.
max_relative_length
,
dropout_broadcast_dims
=
attention_dropout_broadcast_dims
,
summaries
=
False
,
name
=
"decoder_self_attention"
),
dropout_broadcast_dims
=
residual_dropout_broadcast_dims
)
x
=
residual_fn
(
x
,
common_attention
.
multihead_attention_broadcast_dropout
(
x
,
encoder_output
,
encoder_decoder_attention_bias
,
hparams
.
attention_key_channels
or
hparams
.
hidden_size
,
hparams
.
attention_value_channels
or
hparams
.
hidden_size
,
hparams
.
hidden_size
,
hparams
.
num_heads
,
hparams
.
attention_dropout
,
dropout_broadcast_dims
=
attention_dropout_broadcast_dims
,
summaries
=
False
,
name
=
"encdec_attention"
),
dropout_broadcast_dims
=
residual_dropout_broadcast_dims
)
x
=
residual_fn
(
x
,
transformer_ffn_layer
(
x
,
hparams
),
dropout_broadcast_dims
=
residual_dropout_broadcast_dims
)
return
x
def
transformer_ffn_layer
(
x
,
hparams
):
"""Feed-forward layer in the transformer.
Args:
x: a Tensor of shape [batch_size, length, hparams.hidden_size]
hparams: hyperparmeters for model
Returns:
a Tensor of shape [batch_size, length, hparams.hidden_size]
"""
# wq: although we name it 'relu_dropout_broadcast_dims', it also be used for swish
# todo: merge different activation functions as one
relu_dropout_broadcast_dims
=
(
common_layers
.
comma_separated_string_to_integer_list
(
getattr
(
hparams
,
"relu_dropout_broadcast_dims"
,
""
)))
if
hparams
.
ffn_layer
==
"conv_hidden_relu"
:
return
common_layers
.
conv_hidden_relu
(
x
,
hparams
.
filter_size
,
hparams
.
hidden_size
,
dropout
=
hparams
.
relu_dropout
,
dropout_broadcast_dims
=
relu_dropout_broadcast_dims
,
summaries
=
False
)
if
hparams
.
ffn_layer
==
"conv_hidden_swish"
:
return
common_layers
.
conv_hidden_swish
(
x
,
hparams
.
filter_size
,
hparams
.
hidden_size
,
dropout
=
hparams
.
swish_dropout
,
beta_is_trainable
=
hparams
.
swish_beta_is_trainable
,
beta
=
hparams
.
swish_beta
,
dropout_broadcast_dims
=
relu_dropout_broadcast_dims
,
summaries
=
False
)
elif
hparams
.
ffn_layer
==
"parameter_attention"
:
return
common_attention
.
parameter_attention
(
x
,
hparams
.
parameter_attention_key_channels
or
hparams
.
hidden_size
,
hparams
.
parameter_attention_value_channels
or
hparams
.
hidden_size
,
hparams
.
hidden_size
,
hparams
.
filter_size
,
hparams
.
num_heads
,
hparams
.
attention_dropout
)
elif
hparams
.
ffn_layer
==
"conv_hidden_relu_with_sepconv"
:
return
common_layers
.
conv_hidden_relu
(
x
,
hparams
.
filter_size
,
hparams
.
hidden_size
,
kernel_size
=
(
3
,
1
),
second_kernel_size
=
(
31
,
1
),
padding
=
"LEFT"
,
dropout
=
hparams
.
relu_dropout
)
else
:
assert
hparams
.
ffn_layer
==
"none"
return
x
@registry.register_hparams
def
transformer_base_boradcast_dropout
():
"""Set of hyperparameters."""
hparams
=
common_hparams
.
basic_params1
()
hparams
.
hidden_size
=
512
hparams
.
batch_size
=
4096
hparams
.
max_length
=
256
hparams
.
dropout
=
0.0
hparams
.
clip_grad_norm
=
0.
# i.e. no gradient clipping
hparams
.
optimizer_adam_epsilon
=
1e-9
hparams
.
learning_rate_decay_scheme
=
"noam"
hparams
.
learning_rate
=
0.1
hparams
.
learning_rate_warmup_steps
=
4000
hparams
.
initializer_gain
=
1.0
hparams
.
num_hidden_layers
=
6
hparams
.
initializer
=
"uniform_unit_scaling"
hparams
.
weight_decay
=
0.0
hparams
.
optimizer_adam_beta1
=
0.9
hparams
.
optimizer_adam_beta2
=
0.98
hparams
.
num_sampled_classes
=
0
hparams
.
label_smoothing
=
0.1
hparams
.
shared_embedding_and_softmax_weights
=
int
(
True
)
hparams
.
add_hparam
(
"filter_size"
,
2048
)
# Add new ones like this.
# attention-related flags
hparams
.
add_hparam
(
"num_heads"
,
8
)
hparams
.
add_hparam
(
"attention_key_channels"
,
0
)
hparams
.
add_hparam
(
"attention_value_channels"
,
0
)
hparams
.
add_hparam
(
"ffn_layer"
,
"conv_hidden_relu"
)
hparams
.
add_hparam
(
"parameter_attention_key_channels"
,
0
)
hparams
.
add_hparam
(
"parameter_attention_value_channels"
,
0
)
# All hyperparameters ending in "dropout" are automatically set to 0.0
# when not in training mode.
hparams
.
add_hparam
(
"attention_dropout"
,
0.0
)
hparams
.
add_hparam
(
"relu_dropout"
,
0.0
)
hparams
.
add_hparam
(
"residual_dropout"
,
0.1
)
hparams
.
add_hparam
(
"pos"
,
"timing"
)
# timing, none
hparams
.
add_hparam
(
"nbr_decoder_problems"
,
1
)
# default is "dot_product" attention, you can choose "relative_dot_product"
hparams
.
add_hparam
(
"attention_type"
,
"dot_product"
)
hparams
.
add_hparam
(
"max_relative_length"
,
16
)
# swish activation function
hparams
.
add_hparam
(
"swish_beta_is_trainable"
,
False
)
hparams
.
add_hparam
(
"swish_beta"
,
1.0
)
# like "relu_dropout"
hparams
.
add_hparam
(
"swish_dropout"
,
0.0
)
# update dropout implement, save memory & speed up
hparams
.
add_hparam
(
"attention_dropout_broadcast_dims"
,
"0,1"
)
# batch, heads
hparams
.
add_hparam
(
"relu_dropout_broadcast_dims"
,
"1"
)
# length
hparams
.
add_hparam
(
"residual_dropout_broadcast_dims"
,
"1"
)
# length
return
hparams
@registry.register_hparams
def
transformer_base_boradcast_dropout_rpr_dropout1
():
hparams
=
transformer_base_boradcast_dropout
()
hparams
.
max_relative_length
=
16
hparams
.
attention_type
=
"relative_dot_product"
hparams
.
relu_dropout
=
0.1
hparams
.
attention_dropout
=
0.1
return
hparams
@registry.register_hparams
def
transformer_base_broadcast_dropout_dropout1
():
hparams
=
transformer_base_boradcast_dropout
()
hparams
.
relu_dropout
=
0.1
hparams
.
attention_dropout
=
0.1
return
hparams
@registry.register_hparams
def
transformer_base_broadcast_dropout_tiny
():
hparams
=
transformer_base_boradcast_dropout
()
hparams
.
relu_dropout
=
0.1
hparams
.
attention_dropout
=
0.1
hparams
.
num_hidden_layers
=
3
hparams
.
fused_inner_hidden
=
128
hparams
.
hidden_size
=
64
hparams
.
filter_size
=
128
hparams
.
batch_size
=
128
return
hparams
编写
预览
Markdown
格式
0%
重试
或
添加新文件
添加附件
取消
您添加了
0
人
到此讨论。请谨慎行事。
请先完成此评论的编辑!
取消
请
注册
或者
登录
后发表评论