Skip to content
项目
群组
代码片段
帮助
当前项目
正在载入...
登录 / 注册
切换导航面板
N
NiuTrans.Tensor
概览
Overview
Details
Activity
Cycle Analytics
版本库
Repository
Files
Commits
Branches
Tags
Contributors
Graph
Compare
Charts
问题
0
Issues
0
列表
Board
标记
里程碑
合并请求
0
Merge Requests
0
CI / CD
CI / CD
流水线
作业
日程表
图表
维基
Wiki
代码片段
Snippets
成员
Collapse sidebar
Close sidebar
活动
图像
聊天
创建新问题
作业
提交
Issue Boards
Open sidebar
杨迪
NiuTrans.Tensor
Commits
3cd237ff
Commit
3cd237ff
authored
Aug 02, 2018
by
xiaotong
Browse files
Options
Browse Files
Download
Email Patches
Plain Diff
t2t embedding and output layers
parent
287e226c
隐藏空白字符变更
内嵌
并排
正在显示
10 个修改的文件
包含
159 行增加
和
4 行删除
+159
-4
source/sample/transformer/T2TEmbedding.cpp
+47
-2
source/sample/transformer/T2TEmbedding.h
+6
-0
source/sample/transformer/T2TModel.cpp
+27
-1
source/sample/transformer/T2TModel.h
+6
-0
source/sample/transformer/T2TOutput.cpp
+13
-0
source/sample/transformer/T2TOutput.h
+3
-0
source/tensor/core/movement/CopyValues.cpp
+30
-1
source/tensor/core/movement/CopyValues.h
+3
-0
source/tensor/function/LogSoftmax.cpp
+21
-0
source/tensor/function/LogSoftmax.h
+3
-0
没有找到文件。
source/sample/transformer/T2TEmbedding.cpp
查看文件 @
3cd237ff
...
@@ -33,6 +33,7 @@ T2TEmbedder::T2TEmbedder()
...
@@ -33,6 +33,7 @@ T2TEmbedder::T2TEmbedder()
devID
=
-
1
;
devID
=
-
1
;
mem
=
NULL
;
mem
=
NULL
;
vSize
=
-
1
;
vSize
=
-
1
;
maxLength
=
-
1
;
}
}
/* deconstructor */
/* deconstructor */
...
@@ -52,7 +53,6 @@ void T2TEmbedder::InitModel(int argc, const char ** argv, int myDevID, XMem * my
...
@@ -52,7 +53,6 @@ void T2TEmbedder::InitModel(int argc, const char ** argv, int myDevID, XMem * my
devID
=
myDevID
;
devID
=
myDevID
;
mem
=
myMem
;
mem
=
myMem
;
int
maxLength
=
0
;
int
d
=
0
;
int
d
=
0
;
LoadParamInt
(
argc
,
argv
,
"vsize"
,
&
vSize
,
-
1
);
LoadParamInt
(
argc
,
argv
,
"vsize"
,
&
vSize
,
-
1
);
...
@@ -102,7 +102,52 @@ make the network
...
@@ -102,7 +102,52 @@ make the network
*/
*/
XTensor
*
T2TEmbedder
::
Make
(
XTensor
*
input
)
XTensor
*
T2TEmbedder
::
Make
(
XTensor
*
input
)
{
{
return
NULL
;
CheckNTErrors
(
input
->
GetDim
(
-
1
)
==
vSize
,
"Wrong vocabulary size!"
);
CheckNTErrors
(
input
->
order
>
1
,
"Wrong input tensor size!"
);
CheckNTErrors
(
input
->
dimSize
[
input
->
order
-
2
]
<
maxLength
,
"The sequence is too long!"
);
int
dims
[
MAX_TENSOR_DIM_NUM
];
memcpy
(
dims
,
input
->
dimSize
,
input
->
order
);
dims
[
0
]
=
eSize
;
bool
match
=
(
posEmbedding
.
order
==
input
->
order
);
if
(
match
){
for
(
int
i
=
0
;
i
<
input
->
order
;
i
++
){
if
(
dims
[
i
]
!=
posEmbedding
.
GetDim
(
i
))
match
=
false
;
}
}
/* we make positional embeddings first */
if
(
!
match
){
InitTensor
(
&
posEmbedding
,
input
->
order
,
dims
,
X_FLOAT
,
1.0
F
,
devID
,
mem
);
XTensor
*
posTMP
=
NewTensorBuf
(
2
,
dims
,
X_FLOAT
,
1.0
F
,
devID
,
mem
);
_CopyValues
(
&
posEmbeddingBase
,
0
,
posTMP
->
unitNum
,
posTMP
,
0
);
int
dims2
[
MAX_TENSOR_DIM_NUM
];
dims2
[
0
]
=
dims
[
0
];
dims2
[
1
]
=
dims
[
1
];
dims2
[
2
]
=
posEmbedding
.
unitNum
/
(
dims
[
0
]
*
dims
[
1
]);
posEmbedding
.
Reshape
(
3
,
dims2
);
_Unsqueeze
(
posTMP
,
&
posEmbedding
,
0
,
dims2
[
2
]);
posEmbedding
.
Reshape
(
input
->
order
,
dims
);
DelTensorBuf
(
posTMP
);
}
XTensor
wordEmbedding
;
/* then we make word embeddings */
wordEmbedding
=
MMul
(
*
input
,
w
);
XTensor
*
result
=
new
XTensor
();
/* we sum over the two embeddings */
*
result
=
wordEmbedding
+
posEmbedding
;
return
result
;
}
}
}
}
source/sample/transformer/T2TEmbedding.h
查看文件 @
3cd237ff
...
@@ -48,11 +48,17 @@ public:
...
@@ -48,11 +48,17 @@ public:
/* embedding size */
/* embedding size */
int
eSize
;
int
eSize
;
/* maximum length of the sequence */
int
maxLength
;
/* word embedding matrix */
/* word embedding matrix */
XTensor
w
;
XTensor
w
;
/* predefined positional embeddings. It can speeds up
/* predefined positional embeddings. It can speeds up
the embedding processing by re-loading. */
the embedding processing by re-loading. */
XTensor
posEmbeddingBase
;
/* positional embeddings */
XTensor
posEmbedding
;
XTensor
posEmbedding
;
public
:
public
:
...
...
source/sample/transformer/T2TModel.cpp
查看文件 @
3cd237ff
...
@@ -26,7 +26,6 @@
...
@@ -26,7 +26,6 @@
namespace
transformer
namespace
transformer
{
{
/* constructor */
/* constructor */
T2TModel
::
T2TModel
()
T2TModel
::
T2TModel
()
{
{
...
@@ -61,4 +60,30 @@ void T2TModel::InitModel(int argc, const char ** argv)
...
@@ -61,4 +60,30 @@ void T2TModel::InitModel(int argc, const char ** argv)
outputLayer
.
InitModel
(
argc
,
argv
,
devID
,
mem
);
outputLayer
.
InitModel
(
argc
,
argv
,
devID
,
mem
);
}
}
/*
make the encoding network
>> input - input tensor
<< return - encoding result
*/
XTensor
*
T2TModel
::
MakeEncoding
(
XTensor
*
input
)
{
return
encoder
.
Make
(
input
);
}
/*
make the entire network (with the output softmax layer)
>> input - input tensor
>> output - output tensor (distribution)
*/
void
T2TModel
::
Make
(
XTensor
*
input
,
XTensor
*
output
)
{
if
(
isLM
){
XTensor
*
encoding
=
MakeEncoding
(
input
);
outputLayer
.
Make
(
encoding
,
output
);
}
else
{
ShowNTErrors
(
"TODO!"
);
}
}
}
}
\ No newline at end of file
source/sample/transformer/T2TModel.h
查看文件 @
3cd237ff
...
@@ -64,6 +64,12 @@ public:
...
@@ -64,6 +64,12 @@ public:
/* initialize the model */
/* initialize the model */
void
InitModel
(
int
argc
,
const
char
**
argv
);
void
InitModel
(
int
argc
,
const
char
**
argv
);
/* make the encoding network */
XTensor
*
MakeEncoding
(
XTensor
*
input
);
/* make the entire network (with the output softmax layer) */
void
Make
(
XTensor
*
input
,
XTensor
*
output
);
};
};
}
}
...
...
source/sample/transformer/T2TOutput.cpp
查看文件 @
3cd237ff
...
@@ -74,4 +74,16 @@ XTensor * T2TOutput::Make(XTensor * input)
...
@@ -74,4 +74,16 @@ XTensor * T2TOutput::Make(XTensor * input)
return
result
;
return
result
;
}
}
/*
make the network (redefined output tensor)
>> input - input tensor
>> output - output tensor
*/
void
T2TOutput
::
Make
(
XTensor
*
input
,
XTensor
*
output
)
{
XTensor
&
x
=
*
input
;
*
output
=
LogSoftmax
(
MMul
(
x
,
w
),
-
1
);
}
}
}
\ No newline at end of file
source/sample/transformer/T2TOutput.h
查看文件 @
3cd237ff
...
@@ -63,6 +63,9 @@ public:
...
@@ -63,6 +63,9 @@ public:
/* make the network */
/* make the network */
XTensor
*
Make
(
XTensor
*
input
);
XTensor
*
Make
(
XTensor
*
input
);
/* make the network (redefined output tensor) */
void
Make
(
XTensor
*
input
,
XTensor
*
output
);
};
};
...
...
source/tensor/core/movement/CopyValues.cpp
查看文件 @
3cd237ff
...
@@ -20,6 +20,7 @@
...
@@ -20,6 +20,7 @@
*/
*/
#include "../../XName.h"
#include "../../XName.h"
#include "../../XUtility.h"
#include "CopyValues.h"
#include "CopyValues.h"
#include "CopyValues.cuh"
#include "CopyValues.cuh"
...
@@ -42,7 +43,7 @@ void _CopyValues(const XTensor * s, XTensor * t, XStream * stream)
...
@@ -42,7 +43,7 @@ void _CopyValues(const XTensor * s, XTensor * t, XStream * stream)
if
((
s
->
dataType
==
X_FLOAT16
&&
t
->
dataType
==
X_FLOAT
)
||
if
((
s
->
dataType
==
X_FLOAT16
&&
t
->
dataType
==
X_FLOAT
)
||
(
s
->
dataType
==
X_FLOAT
&&
t
->
dataType
==
X_FLOAT16
))
{
(
s
->
dataType
==
X_FLOAT
&&
t
->
dataType
==
X_FLOAT16
))
{
CheckNTErrors
(((
s
->
devID
<
0
&&
t
->
devID
<
0
)
||
s
->
devID
==
t
->
devID
),
CheckNTErrors
(((
s
->
devID
<
0
&&
t
->
devID
<
0
)
||
s
->
devID
==
t
->
devID
),
"The code must be run on the same device!"
);
"The code must be run on the same device!"
);
CheckNTErrors
((
s
->
isSparse
||
t
->
isSparse
),
"TODO!"
);
CheckNTErrors
((
s
->
isSparse
||
t
->
isSparse
),
"TODO!"
);
ConvertDataType
(
s
->
devID
,
s
->
data
,
s
->
dataType
,
t
->
data
,
t
->
dataType
,
s
->
unitNum
);
ConvertDataType
(
s
->
devID
,
s
->
data
,
s
->
dataType
,
t
->
data
,
t
->
dataType
,
s
->
unitNum
);
}
}
...
@@ -69,6 +70,34 @@ void _CopyValues(const XTensor * s, XTensor * t, XStream * stream)
...
@@ -69,6 +70,34 @@ void _CopyValues(const XTensor * s, XTensor * t, XStream * stream)
}
}
/*
/*
copy s to t
>> s - source
>> sBeg - begining of the segment
>> sLen - length of the segment
>> t - target
>> tBeg - beginning of the segment on the target side
>> stream - the stream for creating the job pipeline
*/
void
_CopyValues
(
const
XTensor
*
s
,
const
int
sBeg
,
const
int
sLen
,
XTensor
*
t
,
const
int
tBeg
,
XStream
*
stream
)
{
CheckNTErrors
(
s
!=
NULL
&&
t
!=
NULL
,
"The input tensor and output tensor must be nonempty!"
);
CheckNTErrors
(
s
->
data
!=
NULL
&&
t
->
data
!=
NULL
,
"Cannot copy from an empty data array!"
);
CheckNTErrors
(
s
->
unitSize
==
t
->
unitSize
,
"The input tensors must be of the same unit size!"
);
CheckNTErrors
(
s
->
order
>
sBeg
&&
sBeg
>=
0
&&
sLen
<=
s
->
unitNum
,
"Wrong segment on the source side"
);
CheckNTErrors
(
t
->
order
>
tBeg
&&
tBeg
>=
0
,
"Wrong segment on the target side"
);
if
(
!
s
->
isSparse
&&
!
t
->
isSparse
)
{
XMemCopy
((
char
*
)
t
->
data
+
tBeg
*
t
->
unitSize
,
t
->
devID
,
(
char
*
)
s
->
data
+
sBeg
*
s
->
unitSize
,
s
->
devID
,
s
->
unitSize
*
sLen
);
}
else
{
ShowNTErrors
(
"TODO!"
);
}
}
/*
copy s to t (return a XTensor structure)
copy s to t (return a XTensor structure)
make a new tensor to keep the result and return it
make a new tensor to keep the result and return it
...
...
source/tensor/core/movement/CopyValues.h
查看文件 @
3cd237ff
...
@@ -29,6 +29,9 @@ namespace nts { // namespace nts(NiuTrans.Tensor)
...
@@ -29,6 +29,9 @@ namespace nts { // namespace nts(NiuTrans.Tensor)
/* copy s to t */
/* copy s to t */
void
_CopyValues
(
const
XTensor
*
s
,
XTensor
*
t
,
XStream
*
stream
=
NULL
);
void
_CopyValues
(
const
XTensor
*
s
,
XTensor
*
t
,
XStream
*
stream
=
NULL
);
/* copy a segment of s to t */
void
_CopyValues
(
const
XTensor
*
s
,
const
int
sBeg
,
const
int
sLen
,
XTensor
*
t
,
const
int
tBeg
,
XStream
*
stream
=
NULL
);
/*
/*
copy s to t (return a XTensor structure)
copy s to t (return a XTensor structure)
make a new tensor to keep the result and return it
make a new tensor to keep the result and return it
...
...
source/tensor/function/LogSoftmax.cpp
查看文件 @
3cd237ff
...
@@ -190,6 +190,27 @@ XTensor LogSoftmax(const XTensor &x, int leadDim)
...
@@ -190,6 +190,27 @@ XTensor LogSoftmax(const XTensor &x, int leadDim)
return
y
;
return
y
;
}
}
/*
log scale softmax y = log(e^x / \sum_{i} e^{x_i})
make a new tensor to keep the result and return it
>> x - input vector
>> y - output vector
>> leadDim - leading dimension (along which we perform reduction)
*/
void
LogSoftmax
(
const
XTensor
&
x
,
XTensor
&
y
,
int
leadDim
)
{
if
(
!
XTensor
::
IsSameShaped
(
&
x
,
&
y
))
InitTensor
(
&
y
,
&
x
);
/* call _LogSoftmax function */
_LogSoftmax
(
&
x
,
&
y
,
leadDim
);
/* tensor connection */
XLink
::
MakeLink
(
&
x
,
NULL
,
&
y
,
FUNC_LOGSOFTMAX
);
XLink
::
AddParamToHeadInt
(
&
y
,
leadDim
);
}
/*
/*
backward computation for dense matrices with default data type
backward computation for dense matrices with default data type
...
...
source/tensor/function/LogSoftmax.h
查看文件 @
3cd237ff
...
@@ -33,6 +33,9 @@ void _LogSoftmax(const XTensor * x, XTensor * y, int leadDim);
...
@@ -33,6 +33,9 @@ void _LogSoftmax(const XTensor * x, XTensor * y, int leadDim);
/* log scale softmax y = log(e^x / \sum_{i} e^{x_i}) (return a XTensor structure) */
/* log scale softmax y = log(e^x / \sum_{i} e^{x_i}) (return a XTensor structure) */
XTensor
LogSoftmax
(
const
XTensor
&
x
,
int
leadDim
);
XTensor
LogSoftmax
(
const
XTensor
&
x
,
int
leadDim
);
/* log scale softmax y = log(e^x / \sum_{i} e^{x_i}) (with both argument of x and y) */
void
LogSoftmax
(
const
XTensor
&
x
,
XTensor
&
y
,
int
leadDim
);
/* de/dx */
/* de/dx */
void
_LogSoftmaxBackward
(
XTensor
*
gold
,
XTensor
*
y
,
XTensor
*
x
,
void
_LogSoftmaxBackward
(
XTensor
*
gold
,
XTensor
*
y
,
XTensor
*
x
,
XTensor
*
dedy
,
XTensor
*
dedx
,
XTensor
*
dedy
,
XTensor
*
dedx
,
...
...
编写
预览
Markdown
格式
0%
重试
或
添加新文件
添加附件
取消
您添加了
0
人
到此讨论。请谨慎行事。
请先完成此评论的编辑!
取消
请
注册
或者
登录
后发表评论