Skip to content
项目
群组
代码片段
帮助
当前项目
正在载入...
登录 / 注册
切换导航面板
N
NiuTrans.Tensor
概览
Overview
Details
Activity
Cycle Analytics
版本库
Repository
Files
Commits
Branches
Tags
Contributors
Graph
Compare
Charts
问题
0
Issues
0
列表
Board
标记
里程碑
合并请求
0
Merge Requests
0
CI / CD
CI / CD
流水线
作业
日程表
图表
维基
Wiki
代码片段
Snippets
成员
Collapse sidebar
Close sidebar
活动
图像
聊天
创建新问题
作业
提交
Issue Boards
Open sidebar
Emmay
NiuTrans.Tensor
Commits
7809ed05
Commit
7809ed05
authored
Nov 13, 2018
by
xuchen
Browse files
Options
Browse Files
Download
Plain Diff
Merge branch 'xuchen' into xiaotong-working
parents
f4be1882
03a9836e
全部展开
隐藏空白字符变更
内嵌
并排
正在显示
51 个修改的文件
包含
805 行增加
和
148 行删除
+805
-148
source/network/XBackwardFunc.cpp
+2
-2
source/network/XBackwardLoss.cpp
+8
-5
source/network/XBackwardLoss.h
+2
-2
source/network/XBackwardShape.cpp
+0
-2
source/network/XNet.cpp
+66
-12
source/network/XNet.h
+8
-1
source/sample/fnnlm/FNNLM.cpp
+3
-1
source/sample/transformer/T2TEmbedding.cpp
+12
-8
source/sample/transformer/T2TModel.cpp
+29
-21
source/sample/transformer/T2TOutput.cpp
+2
-1
source/sample/transformer/T2TTrainer.cpp
+0
-0
source/sample/transformer/T2TTrainer.h
+28
-3
source/sample/transformer/Transformer.cpp
+67
-11
source/tensor/XDevice.cpp
+1
-1
source/tensor/XMem.cpp
+4
-3
source/tensor/XName.cpp
+2
-0
source/tensor/XName.h
+2
-1
source/tensor/XTensor.cpp
+80
-4
source/tensor/XTensor.h
+13
-1
source/tensor/XUtility.cpp
+15
-0
source/tensor/XUtility.h
+1
-0
source/tensor/core/getandset/SetData.cpp
+4
-4
source/tensor/core/math/Unary.cpp
+13
-0
source/tensor/core/math/Unary.cu
+8
-0
source/tensor/core/math/Unary.cuh
+9
-0
source/tensor/core/math/Unary.h
+9
-0
source/tensor/core/movement/Gather.cpp
+49
-0
source/tensor/core/movement/Gather.h
+4
-0
source/tensor/core/reduce/ReduceSum.cpp
+2
-2
source/tensor/core/reduce/ReduceSum.cu
+0
-0
source/tensor/core/reduce/ReduceSumAll.cpp
+10
-9
source/tensor/core/reduce/ReduceSumAll.h
+2
-2
source/tensor/function/CrossEntropy.cpp
+0
-0
source/tensor/function/CrossEntropy.cu
+0
-0
source/tensor/function/CrossEntropy.cuh
+9
-8
source/tensor/function/CrossEntropy.h
+3
-3
source/tensor/function/LogSoftmax.cpp
+3
-3
source/tensor/function/LogSoftmax.cu
+27
-3
source/tensor/function/LogSoftmax.cuh
+2
-2
source/tensor/function/LogSoftmax.h
+2
-2
source/tensor/function/Loss.cpp
+3
-2
source/tensor/function/Softmax.cpp
+7
-6
source/tensor/function/Softmax.cu
+22
-1
source/tensor/function/Softmax.cuh
+2
-2
source/tensor/function/Softmax.h
+2
-2
source/tensor/test/TDropout.cpp
+4
-4
source/tensor/test/TLogSoftmax.cpp
+4
-4
source/tensor/test/TPower.cpp
+19
-7
source/tensor/test/TReduceSum.cpp
+0
-0
source/tensor/test/TSoftmax.cpp
+2
-2
source/tensor/test/TSumDim.cpp
+239
-1
没有找到文件。
source/network/XBackwardFunc.cpp
查看文件 @
7809ed05
...
...
@@ -49,7 +49,7 @@ void XFuncGrad::MakeGrad(XTensor * node, bool isEfficient)
else
if
(
operID
==
FUNC_LOGSOFTMAX
){
int
leadDim
=
income
.
GetParamInt
(
0
);
CheckNTErrors
(
leadDim
>=
0
&&
leadDim
<
input
->
order
,
"wrong leading dimension in logsoftmax!"
);
_LogSoftmaxBackward
(
NULL
,
output
,
input
,
output
->
grad
,
input
->
grad
,
leadDim
,
NOLOSS
);
_LogSoftmaxBackward
(
NULL
,
output
,
input
,
output
->
grad
,
input
->
grad
,
NULL
,
leadDim
,
NOLOSS
);
}
else
if
(
operID
==
FUNC_RECTIFY
)
_RectifyBackward
(
NULL
,
output
,
input
,
output
->
grad
,
input
->
grad
,
NOLOSS
);
...
...
@@ -58,7 +58,7 @@ void XFuncGrad::MakeGrad(XTensor * node, bool isEfficient)
else
if
(
operID
==
FUNC_SOFTMAX
){
int
leadDim
=
income
.
GetParamInt
(
0
);
CheckNTErrors
(
leadDim
>=
0
&&
leadDim
<
input
->
order
,
"wrong leading dimension in softmax!"
);
_SoftmaxBackward
(
NULL
,
output
,
input
,
output
->
grad
,
input
->
grad
,
leadDim
,
NOLOSS
);
_SoftmaxBackward
(
NULL
,
output
,
input
,
output
->
grad
,
input
->
grad
,
NULL
,
leadDim
,
NOLOSS
);
}
else
{
ShowNTErrors
(
"Wrong activation function type!"
);
...
...
source/network/XBackwardLoss.cpp
查看文件 @
7809ed05
...
...
@@ -42,7 +42,7 @@ compute dE/dx for a given function y = f(x)
>> lossName - name of the loss, e.g., cross entropy
*/
void
XLossGrad
::
Compute
(
XTensor
*
gold
,
XTensor
*
y
,
XTensor
*
x
,
XTensor
*
dedy
,
XTensor
*
dedx
,
XTensor
*
dedy
,
XTensor
*
dedx
,
XTensor
*
padding
,
int
funcID
,
void
*
params
,
LOSS_FUNCTION_NAME
lossName
)
{
...
...
@@ -58,7 +58,7 @@ void XLossGrad::Compute(XTensor * gold, XTensor * y, XTensor * x,
}
else
if
(
funcID
==
FUNC_LOGSOFTMAX
){
int
leadDim
=
*
(
int
*
)
params
;
_LogSoftmaxBackward
(
gold
,
y
,
x
,
dedy
,
dedx
,
leadDim
,
lossName
);
_LogSoftmaxBackward
(
gold
,
y
,
x
,
dedy
,
dedx
,
padding
,
leadDim
,
lossName
);
}
else
if
(
funcID
==
FUNC_RECTIFY
){
_RectifyBackward
(
gold
,
y
,
x
,
dedy
,
dedx
,
lossName
);
...
...
@@ -67,7 +67,7 @@ void XLossGrad::Compute(XTensor * gold, XTensor * y, XTensor * x,
_SigmoidBackward
(
gold
,
y
,
x
,
dedy
,
dedx
,
lossName
);
}
else
if
(
funcID
==
FUNC_SOFTMAX
){
int
leadDim
=
*
(
int
*
)
params
;
_SoftmaxBackward
(
gold
,
y
,
x
,
dedy
,
dedx
,
leadDim
,
lossName
);
_SoftmaxBackward
(
gold
,
y
,
x
,
dedy
,
dedx
,
padding
,
leadDim
,
lossName
);
}
else
{
ShowNTErrors
(
"wrong function found when call the backward process!"
);
...
...
@@ -83,10 +83,12 @@ compute dE/dy for variable y and error(loss) function E
>> lossName - name of the loss, e.g., cross entropy
*/
void
XLossGrad
::
Compute
(
XTensor
*
gold
,
XTensor
*
y
,
XTensor
*
dedy
,
XTensor
*
dedy
,
XTensor
*
padding
,
LOSS_FUNCTION_NAME
lossName
)
{
_LossBackward
(
dedy
,
gold
,
y
,
lossName
);
//_LossBackward(dedy, gold, y, lossName);
if
(
lossName
==
CROSSENTROPY
)
_CrossEntropyBackward
(
dedy
,
y
,
gold
,
NULL
,
padding
);
}
}
\ No newline at end of file
source/network/XBackwardLoss.h
查看文件 @
7809ed05
...
...
@@ -36,13 +36,13 @@ class XLossGrad
public
:
/* compute dE/dx for a given function y = f(x) */
void
Compute
(
XTensor
*
gold
,
XTensor
*
y
,
XTensor
*
x
,
XTensor
*
dedy
,
XTensor
*
dedx
,
XTensor
*
dedy
,
XTensor
*
dedx
,
XTensor
*
padding
,
int
funcID
,
void
*
params
,
LOSS_FUNCTION_NAME
lossName
);
/* compute dE/dy for variable y and error(loss) function E */
void
Compute
(
XTensor
*
gold
,
XTensor
*
y
,
XTensor
*
dedy
,
XTensor
*
dedy
,
XTensor
*
padding
,
LOSS_FUNCTION_NAME
lossName
);
};
...
...
source/network/XBackwardShape.cpp
查看文件 @
7809ed05
...
...
@@ -469,8 +469,6 @@ void XShapeGrad::GradTranspose(XTensor * node, bool isEfficient)
DelTensorBuf
(
b
);
node
->
visitMark
=
NODE_FINISHED
;
delete
b
;
}
/*
...
...
source/network/XNet.cpp
查看文件 @
7809ed05
...
...
@@ -55,7 +55,7 @@ void XNetClearAll()
XNet
::
XNet
()
{
nodes
.
Clear
();
isGradEfficient
=
tru
e
;
isGradEfficient
=
fals
e
;
}
/* de-constructor */
...
...
@@ -86,7 +86,31 @@ void XNet::Backward(XTensor &root, XTensor &gold, LOSS_FUNCTION_NAME loss)
XList
golds
(
1
);
golds
.
Add
(
&
gold
);
Backward
(
roots
,
golds
,
loss
);
XList
paddings
(
1
);
paddings
.
Add
(
NULL
);
Backward
(
roots
,
golds
,
paddings
,
loss
);
}
/*
backward propagation to obtain gradient wrt. the loss/error function
>> root - root node (output) of the network
>> gold - gold standard for the output
>> padding - specify a target value that is ignored and does not contribute to the loss computation
>> loss - name of loss function
*/
void
XNet
::
Backward
(
XTensor
&
root
,
XTensor
&
gold
,
XTensor
&
padding
,
LOSS_FUNCTION_NAME
loss
)
{
XList
roots
(
1
);
roots
.
Add
(
&
root
);
XList
golds
(
1
);
golds
.
Add
(
&
gold
);
XList
paddings
(
1
);
paddings
.
Add
(
&
padding
);
Backward
(
roots
,
golds
,
paddings
,
loss
);
}
/*
...
...
@@ -102,7 +126,10 @@ void XNet::Backward(XTensor &root, LOSS_FUNCTION_NAME loss)
XList
golds
(
1
);
golds
.
Add
(
NULL
);
Backward
(
roots
,
golds
,
loss
);
XList
paddings
(
1
);
paddings
.
Add
(
NULL
);
Backward
(
roots
,
golds
,
paddings
,
loss
);
}
/*
...
...
@@ -110,9 +137,10 @@ backward propagation to obtain gradient wrt. the loss/error function
with a number of root nodes
>> root - a list of root nodes (output) of the network
>> gold - a list of gold standard for the output
>> padding - specify a target value that is ignored
>> loss - name of loss function
*/
void
XNet
::
Backward
(
XList
&
roots
,
XList
&
golds
,
LOSS_FUNCTION_NAME
loss
)
void
XNet
::
Backward
(
XList
&
roots
,
XList
&
golds
,
XList
&
paddings
,
LOSS_FUNCTION_NAME
loss
)
{
Traverse
(
roots
);
...
...
@@ -131,6 +159,7 @@ void XNet::Backward(XList &roots, XList &golds, LOSS_FUNCTION_NAME loss)
for
(
int
i
=
0
;
i
<
roots
.
count
;
i
++
){
XTensor
*
root
=
(
XTensor
*
)
roots
.
Get
(
i
);
XTensor
*
gold
=
(
XTensor
*
)
golds
.
Get
(
i
);
XTensor
*
padding
=
(
XTensor
*
)
paddings
.
Get
(
i
);
XLink
&
income
=
root
->
income
;
int
funcID
=
income
.
typeID
;
void
*
params
=
income
.
params
;
...
...
@@ -139,15 +168,21 @@ void XNet::Backward(XList &roots, XList &golds, LOSS_FUNCTION_NAME loss)
Note that we do not need to obtain dE/dy here because it is no use in the
folloing process of back-propagation */
if
(
gold
!=
NULL
&&
income
.
tailNum
==
1
&&
(
funcID
&
FUNCTION_BASE
)){
XTensor
*
x
=
income
.
tails
[
0
];
XNoder
::
MakeGrad
(
x
);
lossGrad
.
Compute
(
gold
,
root
,
x
,
NULL
,
x
->
grad
,
funcID
,
params
,
loss
);
root
->
visitMark
=
NODE_FINISHED
;
if
(
funcID
==
FUNC_LOGSOFTMAX
||
funcID
==
FUNC_SOFTMAX
)
{
XTensor
*
x
=
income
.
tails
[
0
];
XNoder
::
MakeGrad
(
x
);
lossGrad
.
Compute
(
gold
,
root
,
x
,
NULL
,
x
->
grad
,
padding
,
funcID
,
params
,
loss
);
root
->
visitMark
=
NODE_FINISHED
;
}
else
{
XNoder
::
MakeGrad
(
root
);
lossGrad
.
Compute
(
gold
,
root
,
root
->
grad
,
padding
,
loss
);
}
}
/* we compuate dE/dy (y is the output) if no predefined activation function is used */
else
{
XNoder
::
MakeGrad
(
root
);
lossGrad
.
Compute
(
gold
,
root
,
root
->
grad
,
loss
);
lossGrad
.
Compute
(
gold
,
root
,
root
->
grad
,
NULL
,
loss
);
}
}
...
...
@@ -178,16 +213,35 @@ void XNet::Backward(XList &roots, XList &golds, LOSS_FUNCTION_NAME loss)
/*
backward propagation to obtain gradient
with a number of root nodes
>> root - a list of root nodes (output) of the network
>> root
s
- a list of root nodes (output) of the network
>> loss - name of loss function
*/
void
XNet
::
Backward
(
XList
&
roots
,
LOSS_FUNCTION_NAME
loss
)
{
XList
golds
(
roots
.
count
);
for
(
int
i
=
0
;
i
<
roots
.
count
;
i
++
)
XList
paddings
(
roots
.
count
);
for
(
int
i
=
0
;
i
<
roots
.
count
;
i
++
)
{
golds
.
Add
(
NULL
);
paddings
.
Add
(
NULL
);
}
Backward
(
roots
,
golds
,
paddings
,
loss
);
}
/*
backward propagation to obtain gradient
with a number of root nodes
>> roots - a list of root nodes (output) of the network
>> golds - a list of gold standard for the output
>> loss - name of loss function
*/
void
XNet
::
Backward
(
XList
&
roots
,
XList
&
golds
,
LOSS_FUNCTION_NAME
loss
)
{
XList
paddings
(
roots
.
count
);
for
(
int
i
=
0
;
i
<
roots
.
count
;
i
++
)
paddings
.
Add
(
NULL
);
Backward
(
roots
,
golds
,
loss
);
Backward
(
roots
,
golds
,
paddings
,
loss
);
}
/*
...
...
source/network/XNet.h
查看文件 @
7809ed05
...
...
@@ -62,17 +62,24 @@ struct XNet
/* backward propagation to obtain gradient wrt. the loss/error function */
void
Backward
(
XTensor
&
root
,
XTensor
&
gold
,
LOSS_FUNCTION_NAME
loss
=
NOLOSS
);
/* backward propagation to obtain gradient wrt. the loss/error function */
void
Backward
(
XTensor
&
root
,
XTensor
&
gold
,
XTensor
&
padding
,
LOSS_FUNCTION_NAME
loss
=
NOLOSS
);
/* backward propagation to obtain gradient */
void
Backward
(
XTensor
&
root
,
LOSS_FUNCTION_NAME
loss
=
NOLOSS
);
/* backward propagation to obtain gradient wrt. the loss/error function
with a number of root nodes */
void
Backward
(
XList
&
roots
,
XList
&
golds
,
LOSS_FUNCTION_NAME
loss
=
NOLOSS
);
void
Backward
(
XList
&
roots
,
XList
&
golds
,
XList
&
paddings
,
LOSS_FUNCTION_NAME
loss
=
NOLOSS
);
/* backward propagation to obtain gradient
with a number of root nodes */
void
Backward
(
XList
&
roots
,
LOSS_FUNCTION_NAME
loss
=
NOLOSS
);
/* backward propagation to obtain gradient
with a number of root nodes */
void
Backward
(
XList
&
roots
,
XList
&
golds
,
LOSS_FUNCTION_NAME
loss
=
NOLOSS
);
/* backward computation for a given node */
void
BackwardNode
(
XTensor
*
node
,
bool
isEfficent
=
false
);
...
...
source/sample/fnnlm/FNNLM.cpp
查看文件 @
7809ed05
...
...
@@ -514,6 +514,8 @@ void Train(const char * train, bool isShuffled, FNNModel &model)
if
(
isEnd
)
break
;
Test
(
testFN
,
outputFN
,
model
);
}
double
elapsed
=
GetClockSec
()
-
startT
;
...
...
@@ -890,7 +892,7 @@ void Backward(XTensor inputs[], XTensor &output, XTensor &gold, LOSS_FUNCTION_NA
/* for y = softmax(s), we get dE/ds
where E is the error function (define by loss) */
_LogSoftmaxBackward
(
&
gold
,
&
y
,
&
s
,
NULL
,
&
deds
,
1
,
loss
);
_LogSoftmaxBackward
(
&
gold
,
&
y
,
&
s
,
NULL
,
&
deds
,
NULL
,
1
,
loss
);
/* for s = x * w, we get
dE/w_{i,j} = dE/ds_j * ds/dw_{i,j}
...
...
source/sample/transformer/T2TEmbedding.cpp
查看文件 @
7809ed05
...
...
@@ -68,9 +68,10 @@ void T2TEmbedder::InitModel(int argc, char ** argv, int myDevID, XMem * myMem)
}
/*
make positional embeddings (of size eSize * length
eSize - embedding size
length - length of the sequenc
make positional embeddings (of size eSize * length)
>> eSize - embedding size
>> d - dimension size of the hidden layers
>> length - length of the sequence
*/
void
T2TEmbedder
::
MakePosEmbedding
(
int
eSize
,
int
d
,
int
length
)
{
...
...
@@ -114,15 +115,15 @@ make the network
*/
XTensor
T2TEmbedder
::
Make
(
XTensor
&
input
)
{
CheckNTErrors
(
input
.
GetDim
(
-
1
)
==
vSize
,
"Wrong vocabulary size!"
);
//
CheckNTErrors(input.GetDim(-1) == vSize, "Wrong vocabulary size!");
CheckNTErrors
(
input
.
order
>
1
,
"Wrong input tensor size!"
);
CheckNTErrors
(
input
.
dimSize
[
input
.
order
-
2
]
<
maxLength
,
"The sequence is too long!"
);
CheckNTErrors
(
input
.
dimSize
[
input
.
order
-
1
]
<
maxLength
,
"The sequence is too long!"
);
CheckNTErrors
(
vSize
>
0
,
"set vocabulary size by
\"
-vsize
\"
"
);
CheckNTErrors
(
eSize
>
0
,
"set embedding size by
\"
-esize
\"
"
);
int
dims
[
MAX_TENSOR_DIM_NUM
];
memcpy
(
dims
,
input
.
dimSize
,
input
.
order
*
sizeof
(
int
));
dims
[
input
.
order
-
1
]
=
eSize
;
dims
[
input
.
order
]
=
eSize
;
XTensor
wordEmbedding
;
XTensor
posEmbedding
;
...
...
@@ -138,7 +139,8 @@ XTensor T2TEmbedder::Make(XTensor &input)
/* we make positional embeddings first */
//if(!match){
if
(
true
){
InitTensor
(
&
posEmbedding
,
input
.
order
,
dims
,
X_FLOAT
,
1.0
F
,
devID
,
mem
);
InitTensor
(
&
posEmbedding
,
input
.
order
+
1
,
dims
,
X_FLOAT
,
1.0
F
,
devID
,
mem
);
XTensor
*
posTMP
=
NewTensorBuf
(
2
,
dims
+
1
,
X_FLOAT
,
1.0
F
,
devID
,
mem
);
_CopyValues
(
&
posEmbeddingBase
,
0
,
posTMP
->
unitNum
,
posTMP
,
0
);
...
...
@@ -148,7 +150,9 @@ XTensor T2TEmbedder::Make(XTensor &input)
}
/* then we make word embeddings */
wordEmbedding
=
Linear
(
MMul
(
input
,
w
),
(
float
)
sqrt
((
float
)
eSize
));
//wordEmbedding = Linear(MMul(input, w), (float)sqrt((float)eSize));
wordEmbedding
=
Gather
(
w
,
input
);
wordEmbedding
=
Linear
(
wordEmbedding
,
(
float
)
sqrt
((
float
)
eSize
));
/* we sum over the two embeddings */
return
wordEmbedding
+
posEmbedding
;
...
...
source/sample/transformer/T2TModel.cpp
查看文件 @
7809ed05
...
...
@@ -121,14 +121,22 @@ void T2TModel::MakeLM(XTensor &input, XTensor &output, XTensor &padding, bool is
XTensor
encoding
;
/* generate mask to see "previous" words only */
int
len
=
input
.
GetDim
(
input
.
order
-
2
);
int
*
dims
=
new
int
[
input
.
order
+
1
];
//int len = input.GetDim(input.order - 2);
//int * dims = new int[input.order + 1];
//for(int i = 0; i < input.order; i++)
// dims[i + 1] = input.GetDim(i);
//dims[0] = nhead;
//dims[input.order] = len;
//XTensor mask(input.order + 1, dims, X_FLOAT, 1.0F, input.devID, input.mem);
int
len
=
input
.
GetDim
(
input
.
order
-
1
);
int
*
dims
=
new
int
[
input
.
order
+
2
];
for
(
int
i
=
0
;
i
<
input
.
order
;
i
++
)
dims
[
i
+
1
]
=
input
.
GetDim
(
i
);
dims
[
0
]
=
nhead
;
dims
[
input
.
order
]
=
len
;
XTensor
mask
(
input
.
order
+
1
,
dims
,
X_FLOAT
,
1.0
F
,
input
.
devID
,
input
.
mem
);
dims
[
input
.
order
+
1
]
=
len
;
XTensor
mask
(
input
.
order
+
2
,
dims
,
X_FLOAT
,
1.0
F
,
padding
.
devID
,
padding
.
mem
);
/* a upper triangular matrix where the cells of the upper triangular are set to -1e-9.
this matrix can be used to prevent the attention to current or following words in
a given sequence. */
...
...
@@ -140,24 +148,24 @@ void T2TModel::MakeLM(XTensor &input, XTensor &output, XTensor &padding, bool is
dimsPadding
[
i
]
=
padding
.
GetDim
(
i
);
dimsPadding
[
padding
.
order
-
1
]
=
padding
.
GetDim
(
-
1
);
dimsPadding
[
padding
.
order
]
=
padding
.
GetDim
(
-
1
);
XTensor
*
padding2
=
NewTensorBuf
(
padding
.
order
+
1
,
dimsPadding
,
padding
.
dataType
,
padding
.
denseRatio
,
padding
.
devID
,
padding
.
mem
);
padding
.
denseRatio
,
padding
.
devID
,
padding
.
mem
);
for
(
int
i
=
0
;
i
<
padding2
->
order
;
i
++
)
dimsPadding
[
i
+
1
]
=
padding2
->
GetDim
(
i
);
dimsPadding
[
0
]
=
nhead
;
XTensor
*
padding3
=
NewTensorBuf
(
padding
.
order
+
2
,
dimsPadding
,
padding
.
dataType
,
padding
.
denseRatio
,
padding
.
devID
,
padding
.
mem
);
/* mask of the padding */
_Unsqueeze
(
&
padding
,
padding2
,
padding
.
order
-
1
,
padding
.
GetDim
(
-
1
));
_Unsqueeze
(
padding2
,
padding3
,
0
,
nhead
);
_ScaleAndShiftMe
(
padding3
,
1e9
F
,
-
1e9
F
);
//_Sum(&mask, padding3, &mask);
//
XTensor * padding3 = NewTensorBuf(padding.order + 2, dimsPadding, padding.dataType,
//
padding.denseRatio, padding.devID, padding.mem);
//
/
//
* mask of the padding */
//
_Unsqueeze(&padding, padding2, padding.order - 1, padding.GetDim(-1));
//
_Unsqueeze(padding2, padding3, 0, nhead);
//
//
_ScaleAndShiftMe(padding3, 1e9F, -1e9F);
//
//
//
_Sum(&mask, padding3, &mask);
encoding
=
MakeEncoder
(
input
,
mask
,
isTraining
);
outputLayer
.
Make
(
encoding
,
output
);
...
...
@@ -165,8 +173,8 @@ void T2TModel::MakeLM(XTensor &input, XTensor &output, XTensor &padding, bool is
delete
[]
dims
;
delete
[]
dimsPadding
;
//DelTensorBuf(padding3);
DelTensorBuf
(
padding2
);
DelTensorBuf
(
padding3
);
}
/*
...
...
@@ -235,8 +243,8 @@ void T2TModel::MakeMT(XTensor &inputEnc, XTensor &inputDec, XTensor &output, XTe
delete
[]
dims
;
delete
[]
dimsPadding
;
DelTensorBuf
(
padding2
);
DelTensorBuf
(
padding3
);
DelTensorBuf
(
padding2
);
}
/*
...
...
source/sample/transformer/T2TOutput.cpp
查看文件 @
7809ed05
...
...
@@ -93,7 +93,8 @@ void T2TOutput::Make(XTensor &input, XTensor &output)
{
XTensor
&
x
=
input
;
output
=
LogSoftmax
(
MMul
(
x
,
w
),
-
1
);
//output = LogSoftmax(MMul(x, w), -1);
output
=
Softmax
(
MMul
(
x
,
w
),
-
1
);
}
}
source/sample/transformer/T2TTrainer.cpp
查看文件 @
7809ed05
差异被折叠。
点击展开。
source/sample/transformer/T2TTrainer.h
查看文件 @
7809ed05
...
...
@@ -103,6 +103,10 @@ public:
/* indicates whether we use adam */
bool
useAdam
;
int
validStep
;
int
curEpoch
;
/* hyper parameters of adam*/
float
adamBeta1
;
float
adamBeta2
;
...
...
@@ -131,7 +135,7 @@ public:
/* number of batches on which we do model update */
int
updateStep
;
/* indicates whether we double the </s> symb
le
for the output of lms */
/* indicates whether we double the </s> symb
ol
for the output of lms */
bool
isDoubledEnd
;
/* indicates whether we use batchsize = max * sc
...
...
@@ -150,7 +154,7 @@ public:
void
Init
(
int
argc
,
char
**
argv
);
/* train the model */
void
Train
(
const
char
*
fn
,
const
char
*
validFN
,
const
char
*
modelFN
,
T2TModel
*
model
);
bool
Train
(
const
char
*
fn
,
const
char
*
validFN
,
const
char
*
modelFN
,
T2TModel
*
model
);
/* test the model */
void
Test
(
const
char
*
fn
,
const
char
*
ofn
,
T2TModel
*
model
);
...
...
@@ -172,7 +176,28 @@ public:
int
*
seqs
,
int
vsEnc
,
int
vsDec
,
int
sBatch
,
int
wBatch
,
bool
isSorted
,
int
&
wCount
,
int
devID
,
XMem
*
mem
);
int
devID
,
XMem
*
mem
,
bool
isTraining
);
/* load a batch of sequences (for language modeling) */
int
LoadBatchLM
(
FILE
*
file
,
XTensor
*
batchEnc
,
XTensor
*
paddingEnc
,
XTensor
*
batchDec
,
XTensor
*
paddingDec
,
XTensor
*
gold
,
int
*
seqs
,
int
vs
,
int
sBatch
,
int
wBatch
,
bool
isSorted
,
int
&
wCount
,
int
devID
,
XMem
*
mem
,
bool
isTraining
);
/* load a batch of sequences (for machine translation) */
int
LoadBatchMT
(
FILE
*
file
,
XTensor
*
batchEnc
,
XTensor
*
paddingEnc
,
XTensor
*
batchDec
,
XTensor
*
paddingDec
,
XTensor
*
gold
,
int
*
seqs
,
int
vsEnc
,
int
vsDec
,
int
sBatch
,
int
wBatch
,
bool
isSorted
,
int
&
wCount
,
int
devID
,
XMem
*
mem
,
bool
isTraining
);
/* load a batch of sequences (for language modeling) */
int
LoadBatchLM
(
FILE
*
file
,
...
...
source/sample/transformer/Transformer.cpp
查看文件 @
7809ed05
...
...
@@ -25,6 +25,8 @@
#include "T2TUtility.h"
#include "T2TTrainer.h"
#include "../../tensor/XDevice.h"
#include "../../tensor/XUtility.h"
#include "../../tensor/XGlobal.h"
namespace
transformer
{
...
...
@@ -56,20 +58,74 @@ int TransformerMain(int argc, const char ** argv)
LoadParamString
(
argc
,
args
,
"test"
,
testFN
,
""
);
LoadParamString
(
argc
,
args
,
"output"
,
outputFN
,
""
);
T2TTrainer
trainer
;
trainer
.
Init
(
argc
,
args
);
T2TModel
model
;
model
.
InitModel
(
argc
,
args
);
/* learn model parameters */
if
(
strcmp
(
trainFN
,
""
))
trainer
.
Train
(
trainFN
,
testFN
,
strcmp
(
modelFN
,
""
)
?
modelFN
:
"checkpoint.model"
,
&
model
);
if
(
strcmp
(
trainFN
,
""
))
{
double
startT
=
GetClockSec
();
T2TTrainer
trainer
;
trainer
.
Init
(
argc
,
args
);
char
*
fn
=
new
char
[
MAX_LINE_LENGTH
];
char
*
fn1
=
new
char
[
MAX_LINE_LENGTH
];
char
*
fn2
=
new
char
[
MAX_LINE_LENGTH
];
modelFN
=
strcmp
(
modelFN
,
""
)
?
modelFN
:
(
char
*
)
"checkpoint.model"
;
int
epoch
;
bool
isTrain
;
for
(
epoch
=
1
;
epoch
<=
trainer
.
nepoch
;
epoch
++
)
{
sprintf
(
fn
,
"%s.%s.%03d"
,
modelFN
,
"epoch"
,
epoch
-
1
);
sprintf
(
fn1
,
"%s.%s.%03d"
,
modelFN
,
"epoch"
,
epoch
);
sprintf
(
fn2
,
"%s.%s.%03d.output"
,
modelFN
,
"epoch"
,
epoch
);
if
(
epoch
==
1
)
{
T2TModel
model
;
model
.
InitModel
(
argc
,
args
);
isTrain
=
trainer
.
Train
(
trainFN
,
testFN
,
modelFN
,
&
model
);
model
.
Dump
(
fn1
);
}
else
{
T2TModel
model
;
model
.
InitModel
(
argc
,
args
);
model
.
Read
(
fn
);
isTrain
=
trainer
.
Train
(
trainFN
,
testFN
,
modelFN
,
&
model
);
model
.
Dump
(
fn1
);
}
if
(
trainer
.
useEpochCheckpoint
&&
strcmp
(
testFN
,
""
))
{
T2TTrainer
tester
;
tester
.
Init
(
argc
,
args
);
T2TModel
model
;
model
.
InitModel
(
argc
,
args
);
model
.
Read
(
fn1
);
tester
.
Test
(
testFN
,
fn2
,
&
model
);
}
if
(
!
isTrain
)
break
;
}
double
elapsed
=
GetClockSec
()
-
startT
;
epoch
=
MIN
(
epoch
,
trainer
.
nepoch
);
XPRINT2
(
0
,
stderr
,
"[INFO] training finished (took %.1fs and epoch=%d)
\n
"
,
elapsed
,
epoch
);
delete
[]
fn
;
delete
[]
fn1
;
delete
[]
fn2
;
}
/* don't dump the final model */
/* save the final model */
if
(
strcmp
(
modelFN
,
""
)
&&
strcmp
(
trainFN
,
""
))
model
.
Dump
(
modelFN
);
//if(strcmp(modelFN, "") && strcmp(trainFN, ""))
// model.Dump(modelFN);
T2TModel
model
;
model
.
InitModel
(
argc
,
args
);
/* load the model if neccessary */
if
(
strcmp
(
modelFN
,
""
))
...
...
source/tensor/XDevice.cpp
查看文件 @
7809ed05
...
...
@@ -446,7 +446,7 @@ int XDevManager::GetCudaThread2D(const int devID, const int n, const int m, int
CheckNTErrors
((
!
(
b
&
(
b
-
1
))),
"Block size (x-axis) must be in 2^x"
);
CheckNTErrors
((
gXSize
<=
GPUs
[
devID
].
GPUMaxGridSize
[
0
]
&&
gYSize
<=
GPUs
[
devID
].
GPUMaxGridSize
[
1
]),
"A too large grid size."
);
gYSize
<=
GPUs
[
devID
].
GPUMaxGridSize
[
1
]),
"A too large grid size."
);
blockSize
[
0
]
=
bXSize
;
blockSize
[
1
]
=
bYSize
;
...
...
source/tensor/XMem.cpp
查看文件 @
7809ed05
...
...
@@ -292,7 +292,8 @@ void XMem::SetComputationMode(bool myIsForComputation)
if
(
!
myIsForComputation
&&
devID
>=
0
&&
cublasHandle
!=
NULL
)
cublasDestroy
(
cublasHandle
);
if
(
myIsForComputation
)
CheckNTErrors
(
cublasCreate
(
&
cublasHandle
)
==
CURAND_STATUS_SUCCESS
,
"Cannot create the cublas handle."
);
CheckNTErrors
((
enum
curandStatus
)
cublasCreate
(
&
cublasHandle
)
==
CURAND_STATUS_SUCCESS
,
"Cannot create the cublas handle."
);
SetDevice
(
devIDBackup
);
#endif
...
...
@@ -1392,8 +1393,8 @@ void XMem::CreateBLASHandle()
"Cannot destroy the cublas handle."
);
}
CheckNTErrors
(
cublasCreate
(
&
cublasHandle
)
==
CURAND_STATUS_SUCCESS
,
"Cannot create the cublas handle."
);
CheckNTErrors
(
(
enum
curandStatus
)
cublasCreate
(
&
cublasHandle
)
==
CURAND_STATUS_SUCCESS
,
"Cannot create the cublas handle."
);
#endif
}
...
...
source/tensor/XName.cpp
查看文件 @
7809ed05
...
...
@@ -35,6 +35,8 @@ const char * GetOPName(int type)
return
"M_EXP"
;
else
if
(
type
==
MATH_FLOOR
)
return
"M_FLOOR"
;
else
if
(
type
==
MATH_ISNONZERO
)
return
"M_ISNONZERO"
;
else
if
(
type
==
MATH_ISZERO
)
return
"M_ISZERO"
;
else
if
(
type
==
MATH_LOG
)
...
...
source/tensor/XName.h
查看文件 @
7809ed05
...
...
@@ -35,7 +35,8 @@ namespace nts { // namespace nts(NiuTrans.Tensor)
#define MATH_CEIL MATH_ABSOLUTE + 1
#define MATH_EXP MATH_CEIL + 1
#define MATH_FLOOR MATH_EXP + 1
#define MATH_ISZERO MATH_FLOOR + 1
#define MATH_ISNONZERO MATH_FLOOR + 1
#define MATH_ISZERO MATH_ISNONZERO + 1
#define MATH_LOG MATH_ISZERO + 1
#define MATH_SQRT MATH_LOG + 1
#define MATH_SQUARE MATH_SQRT + 1
...
...
source/tensor/XTensor.cpp
查看文件 @
7809ed05
...
...
@@ -1057,9 +1057,9 @@ int XTensor::GetKeyInSparse(int i)
/*
set the value of a cell
>> value - value
to assign to the cell
>> value - value
we tend to set
>> index - index of the cell for each dimension
>>
>>
size - size of the index
*/
bool
XTensor
::
Set
(
DTYPE
value
,
int
index
[],
int
size
)
{
...
...
@@ -1070,8 +1070,9 @@ bool XTensor::Set(DTYPE value, int index[], int size)
/*
set the value of a cell in a 1d tensor
>> value - value
to assign to the cell
>> value - value
we tend to set
>> i - item offset
<< return - succeeded or not
*/
bool
XTensor
::
Set1D
(
DTYPE
value
,
int
i
)
{
...
...
@@ -1124,6 +1125,78 @@ bool XTensor::Set3D(DTYPE value, int d0, int d1, int d2)
return
SetToDevice
(
devID
,
GetCell
(
dims
,
3
),
value
);
}
/*
set the integer value of a cell
>> value - value we tend to set
>> index - index of the cell for each dimension
>> size - size of the index
<< return - succeeded or not
*/
bool
XTensor
::
SetInt
(
int
value
,
int
index
[],
int
size
)
{
CheckNTErrors
((
dataType
==
X_INT
),
"The tensor is not in integer type."
);
return
SetToDeviceInt
(
devID
,
GetCell
(
index
,
size
),
value
);
}
/*
set the integer value of a cell in a 1d tensor
>> value - value we tend to set
>> i - item offset
<< return - succeeded or not
*/
bool
XTensor
::
Set1DInt
(
int
value
,
int
i
)
{
CheckNTErrors
((
order
==
1
),
"Cannot get a 2d cell for a tensor whose order is not 2!"
);
CheckNTErrors
((
i
>=
0
&&
i
<
dimSize
[
0
]),
"dimension 0 is out of range!"
);
CheckNTErrors
((
dataType
==
X_INT
),
"The tensor is not in integer type."
);
int
dims
[
1
]
=
{
i
};
return
SetToDeviceInt
(
devID
,
GetCell
(
dims
,
1
),
value
);
}
/*
set the integer value of a cell in a 2d tensor in default type
>> value - value we tend to set
>> ni - row index
>> mi - column index
<< return - succeeded or not
*/
bool
XTensor
::
Set2DInt
(
int
value
,
int
ni
,
int
mi
)
{
CheckNTErrors
((
order
==
2
),
"Cannot get a 2d cell for a tensor whose order is not 2!"
);
CheckNTErrors
((
ni
>=
0
&&
ni
<
dimSize
[
0
]),
"dimension 0 is out of range!"
);
CheckNTErrors
((
mi
>=
0
&&
mi
<
dimSize
[
1
]),
"dimension 1 is out of range!"
);
CheckNTErrors
((
dataType
==
X_INT
),
"The tensor is not in integer type."
);
int
dims
[
2
]
=
{
ni
,
mi
};
return
SetToDeviceInt
(
devID
,
GetCell
(
dims
,
2
),
value
);
}
/*
set the integer value of a cell in a 3d tensor in default type
>> value - value we tend to set
>> d0 - index of demension 0
>> d1 - index of demension 1
>> d2 - index of demension 2
<< return - succeeded or not
*/
bool
XTensor
::
Set3DInt
(
int
value
,
int
d0
,
int
d1
,
int
d2
)
{
CheckNTErrors
(
order
==
3
,
"Cannot get a 2d cell for a tensor whose order is not 2!"
);
CheckNTErrors
(
d0
>=
0
&&
d0
<
dimSize
[
0
],
"dimension 0 is out of range!"
);
CheckNTErrors
(
d1
>=
0
&&
d1
<
dimSize
[
1
],
"dimension 1 is out of range!"
);
CheckNTErrors
(
d2
>=
0
&&
d2
<
dimSize
[
2
],
"dimension 2 is out of range!"
);
CheckNTErrors
((
dataType
==
X_INT
),
"The tensor is not in integer type."
);
int
dims
[
3
]
=
{
d0
,
d1
,
d2
};
return
SetToDeviceInt
(
devID
,
GetCell
(
dims
,
3
),
value
);
}
/*
increase the value of a cell in a 2d tensor
>> value - value we tend to set
...
...
@@ -1986,6 +2059,9 @@ XTensor * NewTensorBuf(const int myOrder, const int * myDimSize,
XTensor
*
tensor
=
NewTensor
(
myOrder
,
dims
,
myDataType
,
myDenseRatio
,
devID
,
myMem
);
if
(
tensor
->
unitNum
*
tensor
->
unitSize
==
176657664
)
{
tensor
->
Dump
(
stderr
,
""
,
200
);
}
if
(
myMem
!=
NULL
)
tensor
->
data
=
myMem
->
AllocBuf
(
myMem
->
devID
,
tensor
->
unitNum
*
tensor
->
unitSize
);
else
...
...
@@ -2135,7 +2211,7 @@ generate a copy of XTensor
>> isFilledData - indicates whether we allocate the data for
the newly-generated tensor
*/
XTensor
*
NewTensor
(
XTensor
*
a
,
bool
isFilledData
)
XTensor
*
NewTensor
(
const
XTensor
*
a
,
bool
isFilledData
)
{
int
dims
[
MAX_TENSOR_DIM_NUM
];
...
...
source/tensor/XTensor.h
查看文件 @
7809ed05
...
...
@@ -326,6 +326,18 @@ public:
/* set the value of a cell in a 3d tensor */
bool
Set3D
(
DTYPE
value
,
int
d0
,
int
d1
,
int
d2
);
/* set the integer value of a cell */
bool
SetInt
(
int
value
,
int
index
[],
int
size
=
-
1
);
/* set the integer value of a cell in a 1d tensor */
bool
Set1DInt
(
int
value
,
int
i
);
/* set the integer value of a cell in a 2d tensor */
bool
Set2DInt
(
int
value
,
int
ni
,
int
mi
);
/* set the integer value of a cell in a 3d tensor */
bool
Set3DInt
(
int
value
,
int
d0
,
int
d1
,
int
d2
);
/* increase the value of a cell in a 2d */
bool
Add2D
(
DTYPE
value
,
int
ni
,
int
mi
);
...
...
@@ -450,7 +462,7 @@ XTensor * NewTensor5D(const int d0, const int d1, const int d2, const int d3, co
const
int
myDevID
=
-
1
,
XMem
*
myMem
=
NULL
);
/* generate a copy of XTensor (with a reference to a given tensor) */
XTensor
*
NewTensor
(
XTensor
*
a
,
bool
isFilledData
=
true
);
XTensor
*
NewTensor
(
const
XTensor
*
a
,
bool
isFilledData
=
true
);
/* free the data space of a given tensor */
void
DelTensor
(
XTensor
*
tensor
);
...
...
source/tensor/XUtility.cpp
查看文件 @
7809ed05
...
...
@@ -491,6 +491,21 @@ bool SetToDevice(int devID, void * p, DTYPE value)
return
true
;
}
/* assign a integer number to a variable that is kept on a specified device */
bool
SetToDeviceInt
(
int
devID
,
void
*
p
,
int
value
)
{
if
(
p
==
NULL
)
return
false
;
if
(
devID
<
0
)
*
(
int
*
)
p
=
value
;
else
{
XMemCopy
(
p
,
devID
,
&
value
,
-
1
,
sizeof
(
int
));
}
return
true
;
}
/* get the next number with power of 2 */
unsigned
int
GetNextPower2
(
unsigned
int
n
)
{
...
...
source/tensor/XUtility.h
查看文件 @
7809ed05
...
...
@@ -50,6 +50,7 @@ extern void XMemFreeOnDev(int devID, void * p);
extern
DTYPE
ToCPU
(
int
devID
,
void
*
value
);
extern
int
ToCPUInt
(
int
devID
,
void
*
value
);
extern
bool
SetToDevice
(
int
devID
,
void
*
p
,
DTYPE
value
);
extern
bool
SetToDeviceInt
(
int
devID
,
void
*
p
,
int
value
);
extern
unsigned
int
GetNextPower2
(
unsigned
int
n
);
extern
void
XSleep
(
int
sleepTime
);
extern
double
GetClock
();
...
...
source/tensor/core/getandset/SetData.cpp
查看文件 @
7809ed05
...
...
@@ -70,9 +70,9 @@ void _SetDataFanInOut(XTensor * tensor, DTYPE gain)
fanOut
=
numOutputFmaps
*
receptiveFieldSize
;
}
DTYPE
std
=
gain
*
(
float
)
sqrt
(
2.0
/
(
fanIn
+
fanOut
));
DTYPE
a
=
(
DTYPE
)
sqrt
(
3.0
)
*
std
;
_SetDataRand
(
tensor
,
-
a
,
a
);
DTYPE
finfout
=
gain
*
(
float
)
sqrt
(
6.0
F
/
(
fanIn
+
fanOut
));
tensor
->
SetDataRand
(
-
finfout
,
finfout
)
;
//_SetDataRand(tensor, -finfout, finfout
);
}
/*
...
...
@@ -393,7 +393,7 @@ void _SetDataRand(XTensor * tensor, DTYPE lower, DTYPE upper)
if
(
tensor
==
NULL
)
return
;
/*
G
PU code */
/*
C
PU code */
if
(
tensor
->
devID
<
0
){
DTYPE
variance
=
upper
-
lower
;
...
...
source/tensor/core/math/Unary.cpp
查看文件 @
7809ed05
...
...
@@ -37,6 +37,11 @@ DTYPE round(DTYPE r)
return
(
r
>
0.0
)
?
(
DTYPE
)
floor
(
r
+
0.5
)
:
(
DTYPE
)
ceil
(
r
-
0.5
);
}
DTYPE
isnonzero
(
DTYPE
r
)
{
return
(
r
!=
0.0
)
?
(
DTYPE
)
1.0
:
(
DTYPE
)
0.0
;
}
DTYPE
iszero
(
DTYPE
r
)
{
return
(
r
==
0.0
)
?
(
DTYPE
)
1.0
:
(
DTYPE
)
0.0
;
...
...
@@ -93,6 +98,10 @@ _SIMPLE_UNARY_FUNCTION(_Floor, _CudaFloor, floor)
_SIMPLE_UNARY_FUNCTION_ME
(
_FloorMe
,
_Floor
)
SIMPLE_UNARY_FUNCTION
(
Floor
,
_Floor
,
MATH_FLOOR
)
_SIMPLE_UNARY_FUNCTION
(
_IsNonZero
,
_CudaIsNonZero
,
isnonzero
)
_SIMPLE_UNARY_FUNCTION_ME
(
_IsNonZeroMe
,
_IsNonZero
)
SIMPLE_UNARY_FUNCTION
(
IsNonZero
,
_IsNonZero
,
MATH_ISNONZERO
)
_SIMPLE_UNARY_FUNCTION
(
_IsZero
,
_CudaIsZero
,
iszero
)
_SIMPLE_UNARY_FUNCTION_ME
(
_IsZeroMe
,
_IsZero
)
SIMPLE_UNARY_FUNCTION
(
IsZero
,
_IsZero
,
MATH_ISZERO
)
...
...
@@ -173,6 +182,10 @@ _SIMPLE_UNARY_FUNCTION(_Floor, floor)
_SIMPLE_UNARY_FUNCTION_ME
(
_FloorMe
,
_Floor
)
SIMPLE_UNARY_FUNCTION
(
Floor
,
_Floor
,
MATH_FLOOR
)
_SIMPLE_UNARY_FUNCTION
(
_IsNonZero
,
isnonzero
)
_SIMPLE_UNARY_FUNCTION_ME
(
_IsNonZeroMe
,
_IsNonZero
)
SIMPLE_UNARY_FUNCTION
(
IsNonZero
,
_IsNonZero
,
MATH_ISNONZERO
)
_SIMPLE_UNARY_FUNCTION
(
_IsZero
,
iszero
)
_SIMPLE_UNARY_FUNCTION_ME
(
_IsZeroMe
,
_IsZero
)
SIMPLE_UNARY_FUNCTION
(
IsZero
,
_IsZero
,
MATH_ISZERO
)
...
...
source/tensor/core/math/Unary.cu
查看文件 @
7809ed05
...
...
@@ -41,11 +41,18 @@ DTYPE cudaround(DTYPE r)
}
__device__
DTYPE cudaisnonzero(DTYPE r)
{
return (r != 0.0) ? (DTYPE)1.0 : (DTYPE)0.0;
}
__device__
DTYPE cudaiszero(DTYPE r)
{
return (r == 0.0) ? (DTYPE)1.0 : (DTYPE)0.0;
}
#define SIMPLE_UNARY_FUNCTION_GPU(funcName, origFunc) \
__global__ \
void Kernel##funcName(DTYPE * a, DTYPE * b, int size) \
...
...
@@ -96,6 +103,7 @@ SIMPLE_UNARY_FUNCTION_GPU(Absolute, fabs)
SIMPLE_UNARY_FUNCTION_GPU(Ceil, ceil)
SIMPLE_UNARY_FUNCTION_GPU(Exp, exp)
SIMPLE_UNARY_FUNCTION_GPU(Floor, floor)
SIMPLE_UNARY_FUNCTION_GPU(IsNonZero, cudaisnonzero)
SIMPLE_UNARY_FUNCTION_GPU(IsZero, cudaiszero)
SIMPLE_UNARY_FUNCTION_GPU(Log, log)
SIMPLE_UNARY_FUNCTION_GPU(Round, cudaround)
...
...
source/tensor/core/math/Unary.cuh
查看文件 @
7809ed05
...
...
@@ -66,6 +66,15 @@ void KernelFloor(__half * a, __half * b, int size);
/* set each entry to its floor value */
void _CudaFloor(const XTensor * a, XTensor * b);
/* if source entry is non-zero, set target entry to be one, otherwise zero (CUDA Kernel) */
__global__
void KernelIsNonZero(DTYPE * a, DTYPE * b, int size);
/* if source entry is non-zero, set target entry to be one, otherwise zero (CUDA Kernel) with float16 data type*/
__global__
void KernelIsNonZero(__half * a, __half * b, int size);
/* if source entry is non-zero, set target entry to be one, otherwise zero */
void _CudaIsNonZero(const XTensor * a, XTensor * b);
/* if source entry is zero, set target entry to be one, otherwise zero (CUDA Kernel) */
__global__
void KernelIsZero(DTYPE * a, DTYPE * b, int size);
...
...
source/tensor/core/math/Unary.h
查看文件 @
7809ed05
...
...
@@ -63,6 +63,15 @@ void _FloorMe(XTensor * a);
make a new tensor to keep the result and return it */
XTensor
Floor
(
const
XTensor
&
a
);
/* if source entry is non-zero, set target entry to be one, otherwise zero */
void
_IsNonZero
(
const
XTensor
*
a
,
XTensor
*
b
);
/* if source entry is non-zero, set target entry to be one, otherwise zero (do it on site)
keep the result in the input tensor a and return nothing */
void
_IsNonZeroMe
(
XTensor
*
a
);
/* if source entry is non-zero, set target entry to be one, otherwise zero (return a XTensor structure)
make a new tensor to keep the result and return it */
XTensor
IsNonZero
(
const
XTensor
&
a
);
/* if source entry is zero, set target entry to be one, otherwise zero */
void
_IsZero
(
const
XTensor
*
a
,
XTensor
*
b
);
/* if source entry is zero, set target entry to be one, otherwise zero (do it on site)
...
...
source/tensor/core/movement/Gather.cpp
查看文件 @
7809ed05
...
...
@@ -21,6 +21,8 @@
#include "Gather.h"
#include "CopyIndexed.h"
#include "../../XUtility.h"
#include "../shape/Reshape.h"
namespace
nts
{
// namespace nts(NiuTrans.Tensor)
...
...
@@ -75,4 +77,50 @@ XTensor Gather(const XTensor &s, int dim, int * srcIndex, int indexSize)
return
result
;
}
/*
gather indexed sub-tensors (return a XTensor structure)
make a new tensor to keep the result and return it
>> s - the source tensor(2D)
>> index - the index tensor
<< return - the result of copying indexed sub-tensors
*/
XTensor
Gather
(
const
XTensor
&
s
,
const
XTensor
&
index
)
{
int
indexSize
=
index
.
unitNum
;
CheckNTErrors
(
s
.
order
==
2
,
"The order of the input tensor must be 2!"
);
int
*
srcIndex
=
new
int
[
index
.
unitNum
];
if
(
index
.
dataType
==
X_INT
)
{
XMemCopy
(
srcIndex
,
-
1
,
index
.
data
,
index
.
devID
,
indexSize
*
index
.
unitSize
);
}
else
if
(
index
.
dataType
==
X_FLOAT
||
index
.
dataType
==
X_DOUBLE
)
{
DTYPE
*
tmp
=
new
DTYPE
[
indexSize
];
XMemCopy
(
tmp
,
-
1
,
index
.
data
,
index
.
devID
,
indexSize
*
index
.
unitSize
);
for
(
int
i
=
0
;
i
<
indexSize
;
i
++
)
srcIndex
[
i
]
=
(
int
)
tmp
[
i
];
delete
[]
tmp
;
}
XTensor
tensor
;
tensor
=
Gather
(
s
,
0
,
srcIndex
,
indexSize
);
delete
[]
srcIndex
;
if
(
index
.
order
>
1
)
{
int
*
dims
=
new
int
[
index
.
order
+
1
];
memcpy
(
dims
,
index
.
dimSize
,
index
.
order
*
sizeof
(
int
));
dims
[
index
.
order
]
=
tensor
.
GetDim
(
-
1
);
XTensor
t
;
t
=
Reshape
(
tensor
,
index
.
order
+
1
,
dims
);
delete
[]
dims
;
return
t
;
}
else
{
return
tensor
;
}
}
}
//
namespace
nts
(
NiuTrans
.
Tensor
)
\ No newline at end of file
source/tensor/core/movement/Gather.h
查看文件 @
7809ed05
...
...
@@ -33,6 +33,10 @@ void _Gather(const XTensor * s, XTensor * t, int dim, int * srcIndex, int indexS
make a new tensor to keep the result and return it */
XTensor
Gather
(
const
XTensor
&
s
,
int
dim
,
int
*
srcIndex
,
int
indexSize
);
/* gather selected sub-tensors (return a XTensor structure)
make a new tensor to keep the result and return it */
XTensor
Gather
(
const
XTensor
&
s
,
const
XTensor
&
index
);
}
// namespace nts(NiuTrans.Tensor)
#endif // __GATHER_H__
source/tensor/core/reduce/ReduceSum.cpp
查看文件 @
7809ed05
...
...
@@ -16,8 +16,8 @@
*/
/*
* $Created by: XIAO Tong (email: xiaotong@mail.neu.edu.cn) 2018-04-24
*/
* $Created by: XIAO Tong (email: xiaotong@mail.neu.edu.cn) 2018-04-24
*/
#include <math.h>
#include "ReduceSum.h"
...
...
source/tensor/core/reduce/ReduceSum.cu
查看文件 @
7809ed05
差异被折叠。
点击展开。
source/tensor/core/reduce/ReduceSumAll.cpp
查看文件 @
7809ed05
...
...
@@ -44,23 +44,24 @@ sum all the items of the tensor (It should be optimized!)
>> source - the inpute tensor
<< return - the total summation
*/
DTYPE
_ReduceSumAll
(
XTensor
*
source
)
DTYPE
_ReduceSumAll
(
const
XTensor
*
source
)
{
int
order
=
source
->
order
;
DTYPE
summation
;
XTensor
*
big
=
NewTensor
(
source
);
_CopyValues
(
source
,
big
);
for
(
int
i
=
0
;
i
<
order
;
i
++
)
{
if
(
i
==
order
-
1
)
big
->
Reshape
(
big
->
unitNum
,
1
);
for
(
int
i
=
order
-
1
;
i
>=
0
;
i
--
)
{
if
(
i
==
0
)
big
->
Reshape
(
1
,
big
->
unitNum
);
int
leadingDim
=
big
->
order
-
1
;
int
*
dimSize
;
dimSize
=
getDimSize
(
big
,
0
);
XTensor
*
little
=
NewTensor
(
big
->
order
-
1
,
dimSize
,
source
->
dataType
,
source
->
denseRatio
,
source
->
devID
,
source
->
mem
);
dimSize
=
getDimSize
(
big
,
leadingDim
);
XTensor
*
little
=
NewTensor
(
big
->
order
-
1
,
dimSize
,
source
->
dataType
,
source
->
denseRatio
,
source
->
devID
,
source
->
mem
);
_ReduceSum
(
big
,
little
,
0
);
_ReduceSum
(
big
,
little
,
leadingDim
);
delete
big
;
delete
dimSize
;
...
...
@@ -81,7 +82,7 @@ sum all the items of the tensor
>> source - the inpute tensor
<< return - the total summation
*/
DTYPE
ReduceSumAll
(
XTensor
&
source
)
DTYPE
ReduceSumAll
(
const
XTensor
&
source
)
{
return
_ReduceSumAll
(
&
source
);
}
...
...
source/tensor/core/reduce/ReduceSumAll.h
查看文件 @
7809ed05
...
...
@@ -28,10 +28,10 @@
namespace
nts
{
// namespace nts(NiuTrans.Tensor)
/* sum all the items of the tensor */
DTYPE
_ReduceSumAll
(
XTensor
*
source
);
DTYPE
_ReduceSumAll
(
const
XTensor
*
source
);
/* sum all the items of the tensor */
DTYPE
ReduceSumAll
(
XTensor
&
source
);
DTYPE
ReduceSumAll
(
const
XTensor
&
source
);
}
// namespace nts(NiuTrans.Tensor)
...
...
source/tensor/function/CrossEntropy.cpp
查看文件 @
7809ed05
差异被折叠。
点击展开。
source/tensor/function/CrossEntropy.cu
查看文件 @
7809ed05
差异被折叠。
点击展开。
source/tensor/function/CrossEntropy.cuh
查看文件 @
7809ed05
...
...
@@ -29,20 +29,20 @@ namespace nts{ // namespace nts(NiuTrans.Tensor)
/* compute the cross entropy loss */
void _CudaCrossEntropyFast(const XTensor * output, const XTensor * gold,
XTensor * loss, const XTensor * weight = NULL,
const XTensor * padding = NULL, int leadingDim = -1);
XTensor * loss, const XTensor * weight = NULL,
const XTensor * padding = NULL, int leadingDim = -1);
/* compute the cross entropy loss */
DTYPE _CudaCrossEntropyFast(const XTensor * output, const XTensor * gold,
LOSS_COMPUTE_WAY reduceWay, const XTensor * weight = NULL,
const XTensor * padding = NULL, int leadingDim = -1);
LOSS_COMPUTE_WAY reduceWay, const XTensor * weight = NULL,
const XTensor * padding = NULL, int leadingDim = -1);
/* backward computation of cross entropy function */
void _CudaCrossEntropyBackward(XTensor * dedy, const XTensor * output,
const XTensor * gold,
const XTensor *
weight = NULL, XTensor * padding
= NULL,
int leadingDim = -1);
void _CudaCrossEntropyBackward(XTensor * dedy, const XTensor * output,
const XTensor *
gold, const XTensor * weight
= NULL,
XTensor * padding = NULL,
int leadingDim = -1);
} // namespace nts(NiuTrans.Tensor)
#endif // __CROSSENTROPY_CUH__
#endif // __CROSSENTROPY_CUH__
\ No newline at end of file
source/tensor/function/CrossEntropy.h
查看文件 @
7809ed05
...
...
@@ -52,9 +52,9 @@ DTYPE _CrossEntropyFast(const XTensor * output, const XTensor * gold,
const
XTensor
*
padding
=
NULL
,
int
leadingDim
=
-
1
);
/* backward computation of cross entropy function */
void
_CrossEntropyBackward
(
XTensor
*
dedy
,
const
XTensor
*
output
,
const
XTensor
*
gold
,
const
XTensor
*
weight
=
NULL
,
XTensor
*
padding
=
NULL
,
int
leadingDim
=
-
1
);
void
_CrossEntropyBackward
(
XTensor
*
dedy
,
const
XTensor
*
output
,
const
XTensor
*
gold
,
const
XTensor
*
weight
=
NULL
,
XTensor
*
padding
=
NULL
,
int
leadingDim
=
-
1
);
}
// namespace nts(NiuTrans.Tensor)
...
...
source/tensor/function/LogSoftmax.cpp
查看文件 @
7809ed05
...
...
@@ -279,8 +279,8 @@ better numerical stability.
>> leadDim - leading dimension (along which we perform reduction)
*/
void
_LogSoftmaxBackward
(
XTensor
*
gold
,
XTensor
*
y
,
XTensor
*
x
,
XTensor
*
dedy
,
XTensor
*
dedx
,
int
leadDim
,
XTensor
*
dedy
,
XTensor
*
dedx
,
XTensor
*
padding
,
int
leadDim
,
LOSS_FUNCTION_NAME
lossName
)
{
CheckNTErrors
((
!
dedx
->
isSparse
),
"The gradient matrix must be dense!"
);
...
...
@@ -292,7 +292,7 @@ void _LogSoftmaxBackward(XTensor * gold, XTensor * y, XTensor * x,
int
leadDimRDI
=
y
->
order
-
leadDim
-
1
;
#ifdef USE_CUDA
if
(
gold
->
devID
>=
0
)
{
_CudaLogSoftmaxBackward
(
gold
,
y
,
x
,
dedy
,
dedx
,
leadDim
,
lossName
);
_CudaLogSoftmaxBackward
(
gold
,
y
,
x
,
dedy
,
dedx
,
padding
,
leadDim
,
lossName
);
return
;
}
#endif
...
...
source/tensor/function/LogSoftmax.cu
查看文件 @
7809ed05
...
...
@@ -22,6 +22,7 @@
#include "LogSoftmax.h"
#include "LogSoftmax.cuh"
#include "Loss.cuh"
#include "../core/arithmetic/MultiplyDim.h"
#include "../core/reduce/ReduceSum.cuh"
#include "../core/reduce/ReduceMax.cuh"
#include "../XDevice.h"
...
...
@@ -232,7 +233,8 @@ dE/dx = dE/dy * dy/dx
>> lossName - name of the loss function
*/
__global__
void KernelLogSoftmaxBackwardDEDS(DTYPE * dedy, DTYPE * dedx, DTYPE * gold, DTYPE * y, DTYPE * x, int size, LOSS_FUNCTION_NAME lossName)
void KernelLogSoftmaxBackwardDEDS(DTYPE * dedy, DTYPE * dedx, DTYPE * gold, DTYPE * y, DTYPE * x,
int size, LOSS_FUNCTION_NAME lossName)
{
int i = blockDim.x * blockIdx.x + threadIdx.x;
...
...
@@ -371,10 +373,12 @@ better numerical stability.
>> leadDim - leading dimension (along which we perform reduction)
*/
void _CudaLogSoftmaxBackward(XTensor * gold, XTensor * y, XTensor * x,
XTensor * dedy, XTensor * dedx,
int leadDim,
XTensor * dedy, XTensor * dedx,
XTensor * padding, int leadDim,
LOSS_FUNCTION_NAME lossName)
{
leadDim = leadDim < 0 ? y->order - 1 : leadDim;
CheckNTErrors((x->devID >= 0), "Backward computation of log softmax must be run on GPUs.");
CheckNTErrors((x->devID == y->devID && gold->devID == y->devID),
"Tensors used in log softmax are not on the same GPU.");
...
...
@@ -441,6 +445,26 @@ void _CudaLogSoftmaxBackward(XTensor * gold, XTensor * y, XTensor * x,
dimensionSize * stride, lossName);
}
}
if(padding != NULL) {
int n = leadDim;
int paddingOrder = padding->order;
int * paddingDims = new int[paddingOrder];
memcpy(paddingDims, padding->dimSize, padding->order * sizeof(int));
padding->Reshape(padding->unitNum);
int order = dedx->order;
int * dims = new int[order];
memcpy(dims, dedx->dimSize, dedx->order * sizeof(int));
dedx->Reshape(dedx->unitNum/dedx->GetDim(n), dedx->GetDim(n));
_MultiplyDimMe(dedx, padding, 0);
padding->Reshape(paddingOrder, paddingDims);
dedx->Reshape(order, dims);
delete[] paddingDims;
delete[] dims;
}
}
else {
ShowNTErrors("TODO!");
...
...
source/tensor/function/LogSoftmax.cuh
查看文件 @
7809ed05
...
...
@@ -37,8 +37,8 @@ void _CudaLogSoftmaxSumMax(XTensor * x, XTensor * y, int leadDim, XTensor * sum,
/* de/dx (Cuda version) */
void _CudaLogSoftmaxBackward(XTensor * gold, XTensor * y, XTensor * x,
XTensor * dedy, XTensor * dedx,
int leadDim,
XTensor * dedy, XTensor * dedx,
XTensor * padding,
int leadDim,
LOSS_FUNCTION_NAME lossName);
#endif // USE_CUDA
...
...
source/tensor/function/LogSoftmax.h
查看文件 @
7809ed05
...
...
@@ -38,8 +38,8 @@ void LogSoftmax(const XTensor &x, XTensor &y, int leadDim);
/* de/dx */
void
_LogSoftmaxBackward
(
XTensor
*
gold
,
XTensor
*
y
,
XTensor
*
x
,
XTensor
*
dedy
,
XTensor
*
dedx
,
int
leadDim
,
XTensor
*
dedy
,
XTensor
*
dedx
,
XTensor
*
padding
,
int
leadDim
,
LOSS_FUNCTION_NAME
lossName
);
}
// namespace nts(NiuTrans.Tensor)
...
...
source/tensor/function/Loss.cpp
查看文件 @
7809ed05
...
...
@@ -486,8 +486,9 @@ void _LossBackward(XTensor * dedy, XTensor * t, XTensor * y,
for
(
int
i
=
0
;
i
<
blockNum
;
i
++
)
{
for
(
int
j
=
0
;
j
<
stride
;
j
++
)
{
for
(
int
k
=
0
;
k
<
tLen
;
k
++
)
{
*
(
dedyp
+
i
*
stride
*
dimensionSize
+
j
+
stride
*
(
yBeg
+
k
))
=
-
(
DTYPE
)
*
(
tp
+
i
*
stride
*
dimensionSize
+
j
+
stride
*
(
tBeg
+
k
))
/
(
DTYPE
)
*
(
yp
+
i
*
stride
*
dimensionSize
+
j
+
stride
*
(
yBeg
+
k
));
*
(
dedyp
+
i
*
stride
*
dimensionSize
+
j
+
stride
*
(
yBeg
+
k
))
=
-
(
DTYPE
)
*
(
tp
+
i
*
stride
*
dimensionSize
+
j
+
stride
*
(
tBeg
+
k
))
/
(
DTYPE
)
*
(
yp
+
i
*
stride
*
dimensionSize
+
j
+
stride
*
(
yBeg
+
k
));
}
}
}
...
...
source/tensor/function/Softmax.cpp
查看文件 @
7809ed05
...
...
@@ -174,8 +174,8 @@ See more details in LogSoftmaxBackward(...)
>> leadDim - leading dimension (along which we perform reduction)
*/
void
_SoftmaxBackward
(
XTensor
*
gold
,
XTensor
*
y
,
XTensor
*
x
,
XTensor
*
dedy
,
XTensor
*
dedx
,
int
leadDim
,
XTensor
*
dedy
,
XTensor
*
dedx
,
XTensor
*
padding
,
int
leadDim
,
LOSS_FUNCTION_NAME
lossName
)
{
CheckNTErrors
(
dedx
->
isSparse
==
false
,
"The gradient tensor must be dense!"
);
...
...
@@ -188,7 +188,7 @@ void _SoftmaxBackward(XTensor * gold, XTensor * y, XTensor * x,
#ifdef USE_CUDA
if
(
y
->
devID
>=
0
){
_CudaSoftmaxBackward
(
gold
,
y
,
x
,
dedy
,
dedx
,
leadDim
,
lossName
);
_CudaSoftmaxBackward
(
gold
,
y
,
x
,
dedy
,
dedx
,
padding
,
leadDim
,
lossName
);
return
;
}
#endif
...
...
@@ -297,9 +297,10 @@ void _SoftmaxBackward(XTensor * gold, XTensor * y, XTensor * x,
\beta = \sum_i (dE/dy_i * y_i)
*/
for
(
int
k
=
0
;
k
<
blockNum
;
k
++
){
op
=
(
DTYPE
*
)
y
->
data
+
k
*
blockSize
;
sp
=
(
DTYPE
*
)
dedx
->
data
+
k
*
blockSize
;
for
(
int
m
=
0
;
m
<
blockNum
;
m
++
){
yp
=
(
DTYPE
*
)
dedy
->
data
+
m
*
blockSize
;
op
=
(
DTYPE
*
)
y
->
data
+
m
*
blockSize
;
sp
=
(
DTYPE
*
)
dedx
->
data
+
m
*
blockSize
;
int
nCols
=
stride
;
for
(
int
k
=
0
;
k
<
stride
;
k
++
){
...
...
source/tensor/function/Softmax.cu
查看文件 @
7809ed05
...
...
@@ -24,6 +24,7 @@
#include "Loss.cuh"
#include "../core/reduce/ReduceSum.h"
#include "../core/arithmetic/Multiply.h"
#include "../core/arithmetic/MultiplyDim.h"
#include "../core/shape/Unsqueeze.h"
#include "../core/arithmetic/Sum.h"
#include "../XDevice.h"
...
...
@@ -309,9 +310,11 @@ See more details in SoftmaxBackward
*/
void _CudaSoftmaxBackward(XTensor * gold, XTensor * y, XTensor * x,
XTensor * dedy, XTensor * dedx,
int leadDim,
XTensor * padding,
int leadDim,
LOSS_FUNCTION_NAME lossName)
{
int n = leadDim < 0 ? y->order - 1 : leadDim;
CheckNTErrors((x->devID >= 0), "Backward computation of log softmax must be run on GPUs.");
CheckNTErrors((x->devID == y->devID), "Matrices used in log softmax are not on the same GPU.");
CheckNTErrors((y->order >= 1), "Empty tensor!");
...
...
@@ -329,6 +332,24 @@ void _CudaSoftmaxBackward(XTensor * gold, XTensor * y, XTensor * x,
if(lossName == CROSSENTROPY || lossName == SQUAREDERROR){
_Sum(y, gold, dedx, -1.0F);
if(padding != NULL) {
int paddingOrder = padding->order;
int * paddingDims = new int[paddingOrder];
memcpy(paddingDims, padding->dimSize, padding->order * sizeof(int));
padding->Reshape(padding->unitNum);
int order = dedx->order;
int * dims = new int[order];
memcpy(dims, dedx->dimSize, dedx->order * sizeof(int));
dedx->Reshape(dedx->unitNum/dedx->GetDim(n), dedx->GetDim(n));
_MultiplyDimMe(dedx, padding, 0);
padding->Reshape(paddingOrder, paddingDims);
dedx->Reshape(order, dims);
delete[] paddingDims;
delete[] dims;
}
}
else if(lossName == ONEHOTERROR){
ShowNTErrors("TODO!");
...
...
source/tensor/function/Softmax.cuh
查看文件 @
7809ed05
...
...
@@ -37,8 +37,8 @@ void _CudaSoftmaxSumMax(const XTensor * x, XTensor * y, int leadDim, XTensor * s
/* de/dx (Cuda version) */
void _CudaSoftmaxBackward(XTensor * gold, XTensor * y, XTensor * x,
XTensor * dedy, XTensor * dedx,
int leadDim,
XTensor * dedy, XTensor * dedx,
XTensor * padding,
int leadDim,
LOSS_FUNCTION_NAME lossName);
#endif // USE_CUDA
...
...
source/tensor/function/Softmax.h
查看文件 @
7809ed05
...
...
@@ -35,8 +35,8 @@ XTensor Softmax(const XTensor &x, int leadDim);
/* de/dx */
void
_SoftmaxBackward
(
XTensor
*
gold
,
XTensor
*
y
,
XTensor
*
x
,
XTensor
*
dedy
,
XTensor
*
dedx
,
int
leadDim
,
XTensor
*
dedy
,
XTensor
*
dedx
,
XTensor
*
padding
,
int
leadDim
,
LOSS_FUNCTION_NAME
lossName
);
}
// namespace nts(NiuTrans.Tensor)
...
...
source/tensor/test/TDropout.cpp
查看文件 @
7809ed05
...
...
@@ -169,8 +169,8 @@ bool TestDropout2()
_DropoutBackward
(
y
,
x
,
dedy
,
dedx
,
1
,
dropProb
);
/* check result */
y
->
Dump
(
stderr
,
"y"
);
dedx
->
Dump
(
stderr
,
"dedy"
);
//
y->Dump(stderr, "y");
//
dedx->Dump(stderr, "dedy");
#ifdef USE_CUDA
/* GPU test */
...
...
@@ -193,8 +193,8 @@ bool TestDropout2()
_DropoutBackward
(
yGPU
,
xGPU
,
dedyGPU
,
dedxGPU
,
1
,
dropProb
);
/* check result */
yGPU
->
Dump
(
stderr
,
"yGPU"
);
dedxGPU
->
Dump
(
stderr
,
"dedyGPU"
);
//
yGPU->Dump(stderr, "yGPU");
//
dedxGPU->Dump(stderr, "dedyGPU");
/* destroy variables */
delete
x
;
...
...
source/tensor/test/TLogSoftmax.cpp
查看文件 @
7809ed05
...
...
@@ -146,7 +146,7 @@ bool TestLogSoftmax2()
_LogSoftmax
(
x
,
y
,
1
);
/* call LogSoftmaxBackward function */
_LogSoftmaxBackward
(
g
,
y
,
x
,
dedy
,
dedx
,
1
,
CROSSENTROPY
);
_LogSoftmaxBackward
(
g
,
y
,
x
,
dedy
,
dedx
,
NULL
,
1
,
CROSSENTROPY
);
/* check result */
cpuTest
=
y
->
CheckData
(
yAnswer
,
unitNum
,
1e-4
F
)
...
...
@@ -174,7 +174,7 @@ bool TestLogSoftmax2()
_LogSoftmax
(
xGPU
,
yGPU
,
1
);
/* call LogSoftmaxBackward function */
_LogSoftmaxBackward
(
gGPU
,
yGPU
,
xGPU
,
dedyGPU
,
dedxGPU
,
1
,
CROSSENTROPY
);
_LogSoftmaxBackward
(
gGPU
,
yGPU
,
xGPU
,
dedyGPU
,
dedxGPU
,
NULL
,
1
,
CROSSENTROPY
);
/* check result */
gpuTest
=
yGPU
->
CheckData
(
yAnswer
,
unitNum
,
1e-4
F
)
&&
dedxGPU
->
CheckData
(
dedxAnswer
,
unitNum
,
1e-4
F
);
...
...
@@ -250,7 +250,7 @@ bool TestLogSoftmax3()
_LogSoftmax
(
x
,
y
,
1
);
/* call LogSoftmaxBackward function */
_LogSoftmaxBackward
(
g
,
y
,
x
,
dedy
,
dedx
,
1
,
SQUAREDERROR
);
_LogSoftmaxBackward
(
g
,
y
,
x
,
dedy
,
dedx
,
NULL
,
1
,
SQUAREDERROR
);
/* check result */
cpuTest
=
y
->
CheckData
(
yAnswer
,
unitNum
,
1e-4
F
)
...
...
@@ -278,7 +278,7 @@ bool TestLogSoftmax3()
_LogSoftmax
(
xGPU
,
yGPU
,
1
);
/* call LogSoftmaxBackward function */
_LogSoftmaxBackward
(
gGPU
,
yGPU
,
xGPU
,
dedyGPU
,
dedxGPU
,
1
,
SQUAREDERROR
);
_LogSoftmaxBackward
(
gGPU
,
yGPU
,
xGPU
,
dedyGPU
,
dedxGPU
,
NULL
,
1
,
SQUAREDERROR
);
/* check result */
gpuTest
=
yGPU
->
CheckData
(
yAnswer
,
unitNum
,
1e-4
F
)
...
...
source/tensor/test/TPower.cpp
查看文件 @
7809ed05
...
...
@@ -66,7 +66,9 @@ bool TestPower1()
bUser
=
Power
(
*
a
,
2.0
F
);
/* check results */
cpuTest
=
b
->
CheckData
(
answer
,
aUnitNum
,
1e-4
F
)
&&
aMe
->
CheckData
(
answer
,
aUnitNum
,
1e-4
F
)
&&
bUser
.
CheckData
(
answer
,
aUnitNum
,
1e-4
F
);
cpuTest
=
b
->
CheckData
(
answer
,
aUnitNum
,
1e-4
F
)
&&
aMe
->
CheckData
(
answer
,
aUnitNum
,
1e-4
F
)
&&
bUser
.
CheckData
(
answer
,
aUnitNum
,
1e-4
F
);
#ifdef USE_CUDA
/* GPU test */
...
...
@@ -88,7 +90,9 @@ bool TestPower1()
bUserGPU
=
Power
(
*
aGPU
,
2.0
F
);
/* check results */
gpuTest
=
bGPU
->
CheckData
(
answer
,
aUnitNum
,
1e-4
F
)
&&
aMeGPU
->
CheckData
(
answer
,
aUnitNum
,
1e-4
F
)
&&
bUserGPU
.
CheckData
(
answer
,
aUnitNum
,
1e-4
F
);
gpuTest
=
bGPU
->
CheckData
(
answer
,
aUnitNum
,
1e-4
F
)
&&
aMeGPU
->
CheckData
(
answer
,
aUnitNum
,
1e-4
F
)
&&
bUserGPU
.
CheckData
(
answer
,
aUnitNum
,
1e-4
F
);
/* destroy variables */
delete
a
;
...
...
@@ -153,7 +157,9 @@ bool TestPower2()
bUser
=
Power
(
*
a
,
1.0
F
);
/* check results */
cpuTest
=
b
->
CheckData
(
answer
,
aUnitNum
,
1e-4
F
)
&&
aMe
->
CheckData
(
answer
,
aUnitNum
,
1e-4
F
)
&&
bUser
.
CheckData
(
answer
,
aUnitNum
,
1e-4
F
);
cpuTest
=
b
->
CheckData
(
answer
,
aUnitNum
,
1e-4
F
)
&&
aMe
->
CheckData
(
answer
,
aUnitNum
,
1e-4
F
)
&&
bUser
.
CheckData
(
answer
,
aUnitNum
,
1e-4
F
);
#ifdef USE_CUDA
/* GPU test */
...
...
@@ -175,7 +181,9 @@ bool TestPower2()
bUserGPU
=
Power
(
*
aGPU
,
1.0
F
);
/* check results */
gpuTest
=
bGPU
->
CheckData
(
answer
,
aUnitNum
,
1e-4
F
)
&&
aMeGPU
->
CheckData
(
answer
,
aUnitNum
,
1e-4
F
)
&&
bUserGPU
.
CheckData
(
answer
,
aUnitNum
,
1e-4
F
);
gpuTest
=
bGPU
->
CheckData
(
answer
,
aUnitNum
,
1e-4
F
)
&&
aMeGPU
->
CheckData
(
answer
,
aUnitNum
,
1e-4
F
)
&&
bUserGPU
.
CheckData
(
answer
,
aUnitNum
,
1e-4
F
);
/* destroy variables */
delete
a
;
...
...
@@ -214,7 +222,7 @@ bool TestPower3()
for
(
int
i
=
0
;
i
<
aOrder
;
i
++
)
aUnitNum
*=
aDimSize
[
i
];
DTYPE
aData
[
3
][
2
]
=
{
{
0
.0
F
,
1.0
F
},
DTYPE
aData
[
3
][
2
]
=
{
{
1
.0
F
,
1.0
F
},
{
2.0
F
,
3.0
F
},
{
4.0
F
,
5.0
F
}
};
DTYPE
answer
[
3
][
2
]
=
{
{
1.0
F
,
1.0
F
},
...
...
@@ -240,7 +248,9 @@ bool TestPower3()
bUser
=
Power
(
*
a
,
0.0
F
);
/* check results */
cpuTest
=
b
->
CheckData
(
answer
,
aUnitNum
,
1e-4
F
)
&&
aMe
->
CheckData
(
answer
,
aUnitNum
,
1e-4
F
)
&&
bUser
.
CheckData
(
answer
,
aUnitNum
,
1e-4
F
);
cpuTest
=
b
->
CheckData
(
answer
,
aUnitNum
,
1e-4
F
)
&&
aMe
->
CheckData
(
answer
,
aUnitNum
,
1e-4
F
)
&&
bUser
.
CheckData
(
answer
,
aUnitNum
,
1e-4
F
);
#ifdef USE_CUDA
/* GPU test */
...
...
@@ -262,7 +272,9 @@ bool TestPower3()
bUserGPU
=
Power
(
*
aGPU
,
0.0
F
);
/* check results */
gpuTest
=
bGPU
->
CheckData
(
answer
,
aUnitNum
,
1e-4
F
)
&&
aMeGPU
->
CheckData
(
answer
,
aUnitNum
,
1e-4
F
)
&&
bUserGPU
.
CheckData
(
answer
,
aUnitNum
,
1e-4
F
);
gpuTest
=
bGPU
->
CheckData
(
answer
,
aUnitNum
,
1e-4
F
)
&&
aMeGPU
->
CheckData
(
answer
,
aUnitNum
,
1e-4
F
)
&&
bUserGPU
.
CheckData
(
answer
,
aUnitNum
,
1e-4
F
);
/* destroy variables */
delete
a
;
...
...
source/tensor/test/TReduceSum.cpp
查看文件 @
7809ed05
差异被折叠。
点击展开。
source/tensor/test/TSoftmax.cpp
查看文件 @
7809ed05
...
...
@@ -146,7 +146,7 @@ bool TestSoftmax2()
_Softmax
(
x
,
y
,
1
);
/* call SoftmaxBackward function */
_SoftmaxBackward
(
g
,
y
,
x
,
dedy
,
dedx
,
1
,
CROSSENTROPY
);
_SoftmaxBackward
(
g
,
y
,
x
,
dedy
,
dedx
,
NULL
,
1
,
CROSSENTROPY
);
/* check result */
cpuTest
=
y
->
CheckData
(
yAnswer
,
unitNum
,
1e-4
F
)
...
...
@@ -174,7 +174,7 @@ bool TestSoftmax2()
_Softmax
(
xGPU
,
yGPU
,
1
);
/* call SoftmaxBackward function */
_SoftmaxBackward
(
gGPU
,
yGPU
,
xGPU
,
dedyGPU
,
dedxGPU
,
1
,
CROSSENTROPY
);
_SoftmaxBackward
(
gGPU
,
yGPU
,
xGPU
,
dedyGPU
,
dedxGPU
,
NULL
,
1
,
CROSSENTROPY
);
/* check result */
gpuTest
=
yGPU
->
CheckData
(
yAnswer
,
unitNum
,
1e-4
F
)
...
...
source/tensor/test/TSumDim.cpp
查看文件 @
7809ed05
...
...
@@ -20,8 +20,9 @@
*/
#include "TSumDim.h"
#include "../core/arithmetic/SumDim.h"
#include "../XTensor.h"
#include "../core/arithmetic/SumDim.h"
#include "../core/getandset/SetData.h"
namespace
nts
{
// namespace nts(NiuTrans.Tensor)
...
...
@@ -251,6 +252,225 @@ bool TestSumDim2()
#endif // USE_CUDA
}
/*
case 3: tensor summation c = a + b * \beta
where the size of b is equal to the n-th dimension of a,
i.e., a is summed with b by broadcasting.
In this case,
(20, 40, 4000) + (40) = (20, 40, 4000), dim = 1.
*/
bool
TestSumDim3
()
{
/* a tensor of size (20, 40, 4000) */
int
aOrder
=
3
;
int
*
aDimSize
=
new
int
[
aOrder
];
aDimSize
[
0
]
=
20
;
aDimSize
[
1
]
=
40
;
aDimSize
[
2
]
=
4000
;
int
aUnitNum
=
1
;
for
(
int
i
=
0
;
i
<
aOrder
;
i
++
)
aUnitNum
*=
aDimSize
[
i
];
/* a tensor of size (40) */
int
bOrder
=
1
;
int
*
bDimSize
=
new
int
[
bOrder
];
bDimSize
[
0
]
=
40
;
int
bUnitNum
=
1
;
for
(
int
i
=
0
;
i
<
bOrder
;
i
++
)
bUnitNum
*=
bDimSize
[
i
];
/* CPU test */
bool
cpuTest
=
true
;
/* create tensors */
XTensor
*
a
=
NewTensor
(
aOrder
,
aDimSize
);
XTensor
*
b
=
NewTensor
(
bOrder
,
bDimSize
);
XTensor
*
c
=
NewTensor
(
aOrder
,
aDimSize
);
XTensor
*
cMe
=
NewTensor
(
aOrder
,
aDimSize
);
XTensor
*
answer
=
NewTensor
(
aOrder
,
aDimSize
);
XTensor
cUser
;
/* initialize variables */
a
->
SetZeroAll
();
cMe
->
SetZeroAll
();
_SetDataFixedFloat
(
b
,
1.0
F
);
_SetDataFixedFloat
(
answer
,
1.0
F
);
/* call SumDim function */
_SumDim
(
a
,
b
,
c
,
1
);
_SumDim
(
cMe
,
b
,
1
);
cUser
=
SumDim
(
*
a
,
*
b
,
1
);
/* check results */
cpuTest
=
c
->
CheckData
(
answer
->
data
,
aUnitNum
)
&&
cMe
->
CheckData
(
answer
->
data
,
aUnitNum
)
&&
cUser
.
CheckData
(
answer
->
data
,
aUnitNum
);
#ifdef USE_CUDA
/* GPU test */
bool
gpuTest
=
true
;
/* create tensor */
XTensor
*
aGPU
=
NewTensor
(
aOrder
,
aDimSize
,
X_FLOAT
,
1.0
F
,
0
);
XTensor
*
bGPU
=
NewTensor
(
bOrder
,
bDimSize
,
X_FLOAT
,
1.0
F
,
0
);
XTensor
*
cGPU
=
NewTensor
(
aOrder
,
aDimSize
,
X_FLOAT
,
1.0
F
,
0
);
XTensor
*
cMeGPU
=
NewTensor
(
aOrder
,
aDimSize
,
X_FLOAT
,
1.0
F
,
0
);
XTensor
cUserGPU
;
/* Initialize variables */
aGPU
->
SetZeroAll
();
cMe
->
SetZeroAll
();
_SetDataFixedFloat
(
bGPU
,
1.0
F
);
/* call sum function */
_SumDim
(
aGPU
,
bGPU
,
cGPU
,
1
);
_SumDim
(
cMeGPU
,
bGPU
,
1
);
cUserGPU
=
SumDim
(
*
aGPU
,
*
bGPU
,
1
);
/* check results */
gpuTest
=
cGPU
->
CheckData
(
answer
->
data
,
aUnitNum
)
&&
cMeGPU
->
CheckData
(
answer
->
data
,
aUnitNum
)
&&
cUserGPU
.
CheckData
(
answer
->
data
,
aUnitNum
);
/* destroy variables */
delete
a
;
delete
b
;
delete
c
;
delete
cMe
;
delete
answer
;
delete
aGPU
;
delete
bGPU
;
delete
cGPU
;
delete
cMeGPU
;
delete
[]
aDimSize
;
delete
[]
bDimSize
;
return
cpuTest
&&
gpuTest
;
#else
/* destroy variables */
delete
a
;
delete
b
;
delete
c
;
delete
cMe
;
delete
answer
;
delete
[]
aDimSize
;
delete
[]
bDimSize
;
return
cpuTest
;
#endif // USE_CUDA
}
/*
case 4: tensor summation c = a + b * \beta
where the size of b is equal to the n-th dimension of a,
i.e., a is summed with b by broadcasting.
In this case,
(200, 40, 4000) + (40) = (200, 40, 4000), dim = 1.
*/
bool
TestSumDim4
()
{
/* a tensor of size (200, 40, 4000) */
int
aOrder
=
2
;
int
*
aDimSize
=
new
int
[
aOrder
];
aDimSize
[
0
]
=
1000000
;
aDimSize
[
1
]
=
50
;
int
aUnitNum
=
1
;
for
(
int
i
=
0
;
i
<
aOrder
;
i
++
)
aUnitNum
*=
aDimSize
[
i
];
/* a tensor of size (40) */
int
bOrder
=
1
;
int
*
bDimSize
=
new
int
[
bOrder
];
bDimSize
[
0
]
=
50
;
int
bUnitNum
=
1
;
for
(
int
i
=
0
;
i
<
bOrder
;
i
++
)
bUnitNum
*=
bDimSize
[
i
];
/* CPU test */
bool
cpuTest
=
true
;
/* create tensors */
XTensor
*
a
=
NewTensor
(
aOrder
,
aDimSize
);
XTensor
*
b
=
NewTensor
(
bOrder
,
bDimSize
);
XTensor
*
c
=
NewTensor
(
aOrder
,
aDimSize
);
XTensor
*
cMe
=
NewTensor
(
aOrder
,
aDimSize
);
XTensor
*
answer
=
NewTensor
(
aOrder
,
aDimSize
);
XTensor
cUser
;
/* initialize variables */
a
->
SetZeroAll
();
cMe
->
SetZeroAll
();
_SetDataFixedFloat
(
b
,
1.0
F
);
_SetDataFixedFloat
(
answer
,
1.0
F
);
/* call SumDim function */
_SumDim
(
a
,
b
,
c
,
1
);
_SumDim
(
cMe
,
b
,
1
);
cUser
=
SumDim
(
*
a
,
*
b
,
1
);
/* check results */
cpuTest
=
c
->
CheckData
(
answer
->
data
,
aUnitNum
)
&&
cMe
->
CheckData
(
answer
->
data
,
aUnitNum
)
&&
cUser
.
CheckData
(
answer
->
data
,
aUnitNum
);
#ifdef USE_CUDA
/* GPU test */
bool
gpuTest
=
true
;
/* create tensor */
XTensor
*
aGPU
=
NewTensor
(
aOrder
,
aDimSize
,
X_FLOAT
,
1.0
F
,
0
);
XTensor
*
bGPU
=
NewTensor
(
bOrder
,
bDimSize
,
X_FLOAT
,
1.0
F
,
0
);
XTensor
*
cGPU
=
NewTensor
(
aOrder
,
aDimSize
,
X_FLOAT
,
1.0
F
,
0
);
XTensor
*
cMeGPU
=
NewTensor
(
aOrder
,
aDimSize
,
X_FLOAT
,
1.0
F
,
0
);
XTensor
cUserGPU
;
/* Initialize variables */
aGPU
->
SetZeroAll
();
cMe
->
SetZeroAll
();
_SetDataFixedFloat
(
bGPU
,
1.0
F
);
/* call sum function */
_SumDim
(
aGPU
,
bGPU
,
cGPU
,
1
);
_SumDim
(
cMeGPU
,
bGPU
,
1
);
cUserGPU
=
SumDim
(
*
aGPU
,
*
bGPU
,
1
);
/* check results */
gpuTest
=
cGPU
->
CheckData
(
answer
->
data
,
aUnitNum
)
&&
cMeGPU
->
CheckData
(
answer
->
data
,
aUnitNum
)
&&
cUserGPU
.
CheckData
(
answer
->
data
,
aUnitNum
);
/* destroy variables */
delete
a
;
delete
b
;
delete
c
;
delete
cMe
;
delete
answer
;
delete
aGPU
;
delete
bGPU
;
delete
cGPU
;
delete
cMeGPU
;
delete
[]
aDimSize
;
delete
[]
bDimSize
;
return
cpuTest
&&
gpuTest
;
#else
/* destroy variables */
delete
a
;
delete
b
;
delete
c
;
delete
cMe
;
delete
answer
;
delete
[]
aDimSize
;
delete
[]
bDimSize
;
return
cpuTest
;
#endif // USE_CUDA
}
/* other cases */
/*
TODO!!
...
...
@@ -279,6 +499,24 @@ bool TestSumDim()
}
else
XPRINT
(
0
,
stdout
,
">> case 2 passed!
\n
"
);
/* case 3 test */
caseFlag
=
TestSumDim3
();
if
(
!
caseFlag
)
{
returnFlag
=
false
;
XPRINT
(
0
,
stdout
,
">> case 3 failed!
\n
"
);
}
else
XPRINT
(
0
,
stdout
,
">> case 3 passed!
\n
"
);
///* case 4 test */
//caseFlag = TestSumDim4();
//if (!caseFlag) {
// returnFlag = false;
// XPRINT(0, stdout, ">> case 4 failed!\n");
//}
//else
// XPRINT(0, stdout, ">> case 4 passed!\n");
/* other cases test */
/*
...
...
编写
预览
Markdown
格式
0%
重试
或
添加新文件
添加附件
取消
您添加了
0
人
到此讨论。请谨慎行事。
请先完成此评论的编辑!
取消
请
注册
或者
登录
后发表评论