Skip to content
项目
群组
代码片段
帮助
当前项目
正在载入...
登录 / 注册
切换导航面板
N
NiuTrans.Tensor
概览
Overview
Details
Activity
Cycle Analytics
版本库
Repository
Files
Commits
Branches
Tags
Contributors
Graph
Compare
Charts
问题
8
Issues
8
列表
Board
标记
里程碑
合并请求
0
Merge Requests
0
CI / CD
CI / CD
流水线
作业
日程表
图表
维基
Wiki
代码片段
Snippets
成员
Collapse sidebar
Close sidebar
活动
图像
聊天
创建新问题
作业
提交
Issue Boards
Open sidebar
NiuTrans
NiuTrans.Tensor
Commits
906eebb7
Commit
906eebb7
authored
Jul 19, 2018
by
xuchen
Browse files
Options
Browse Files
Download
Plain Diff
merge with xuchen branch
parents
7283b3dc
b3a76184
全部展开
显示空白字符变更
内嵌
并排
正在显示
106 个修改的文件
包含
1178 行增加
和
423 行删除
+1178
-423
source/network/XBackwardLoss.cpp
+2
-1
source/tensor/XLink.cpp
+31
-0
source/tensor/XLink.h
+4
-0
source/tensor/XName.cpp
+41
-37
source/tensor/XName.h
+4
-0
source/tensor/XTensor.cpp
+1
-1
source/tensor/XUtility.cpp
+4
-3
source/tensor/XUtility.h
+1
-1
source/tensor/core/arithmetic/Absolute.cpp
+39
-4
source/tensor/core/arithmetic/Absolute.cu
+14
-10
source/tensor/core/arithmetic/Absolute.cuh
+3
-3
source/tensor/core/arithmetic/Absolute.h
+13
-2
source/tensor/core/arithmetic/MatrixMULBatchedCPU.cpp
+2
-2
source/tensor/core/arithmetic/MatrixMULBatchedCPU.h
+2
-2
source/tensor/core/arithmetic/MatrixMul.h
+0
-0
source/tensor/core/arithmetic/MatrixMul2D.cpp
+0
-0
source/tensor/core/arithmetic/MatrixMul2D.cu
+1
-2
source/tensor/core/arithmetic/MatrixMul2D.cuh
+0
-0
source/tensor/core/arithmetic/MatrixMul2DParallel.cpp
+0
-0
source/tensor/core/arithmetic/MatrixMul2DParallel.h
+2
-2
source/tensor/core/arithmetic/MatrixMulBatched.cpp
+5
-8
source/tensor/core/arithmetic/Negate.cpp
+41
-5
source/tensor/core/arithmetic/Negate.cu
+15
-11
source/tensor/core/arithmetic/Negate.cuh
+3
-3
source/tensor/core/arithmetic/Negate.h
+13
-2
source/tensor/core/arithmetic/Sign.cpp
+41
-6
source/tensor/core/arithmetic/Sign.cu
+19
-15
source/tensor/core/arithmetic/Sign.cuh
+3
-3
source/tensor/core/arithmetic/Sign.h
+13
-2
source/tensor/core/arithmetic/XTensorBLAS.cpp
+0
-0
source/tensor/core/arithmetic/XTensorBLAS.cu
+0
-0
source/tensor/core/arithmetic/XTensorBLAS.h
+8
-4
source/tensor/core/getandset/Select.cpp
+2
-3
source/tensor/core/math/Log.cpp
+41
-6
source/tensor/core/math/Log.cu
+13
-9
source/tensor/core/math/Log.cuh
+3
-3
source/tensor/core/math/Log.h
+13
-2
source/tensor/core/math/Power.cpp
+45
-9
source/tensor/core/math/Power.cu
+27
-17
source/tensor/core/math/Power.cuh
+3
-3
source/tensor/core/math/Power.h
+13
-2
source/tensor/core/movement/CopyIndexed.cpp
+6
-7
source/tensor/core/reduce/ReduceMax.cpp
+8
-9
source/tensor/core/reduce/ReduceMean.cpp
+5
-6
source/tensor/core/reduce/ReduceSum.cpp
+52
-6
source/tensor/core/reduce/ReduceSum.h
+10
-1
source/tensor/core/reduce/ReduceSumSquared.cpp
+5
-6
source/tensor/core/reduce/ReduceVariance.cpp
+10
-6
source/tensor/core/shape/ConcatenateSolely.cpp
+1
-1
source/tensor/core/shape/MakeMergeBlockIndex.cpp
+0
-0
source/tensor/core/shape/MakeMergeBlockIndex.cu
+0
-0
source/tensor/core/shape/MakeMergeBlockIndex.cuh
+1
-2
source/tensor/core/shape/MakeMergeBlockIndex.h
+0
-0
source/tensor/core/shape/Merge.cpp
+8
-12
source/tensor/core/shape/Split.cpp
+10
-37
source/tensor/core/shape/Split.h
+1
-1
source/tensor/core/shape/Unsqueeze.cpp
+2
-4
source/tensor/core/sort/Sort.cpp
+44
-6
source/tensor/core/sort/Sort.cu
+2
-2
source/tensor/core/sort/Sort.h
+13
-1
source/tensor/core/sort/TopK.cpp
+25
-0
source/tensor/core/sort/TopK.h
+5
-0
source/tensor/function/HardTanH.cpp
+6
-2
source/tensor/function/HardTanH.cu
+1
-1
source/tensor/function/HardTanH.h
+1
-1
source/tensor/function/Identity.cpp
+22
-1
source/tensor/function/Identity.h
+3
-2
source/tensor/function/LogSoftmax.cpp
+6
-2
source/tensor/function/LogSoftmax.h
+1
-1
source/tensor/function/Loss.cpp
+7
-7
source/tensor/function/Loss.cu
+10
-10
source/tensor/function/Loss.cuh
+4
-4
source/tensor/function/Loss.h
+4
-4
source/tensor/function/Rectify.cpp
+24
-1
source/tensor/function/Rectify.cu
+1
-1
source/tensor/function/Rectify.h
+3
-2
source/tensor/function/Sigmoid.cpp
+23
-1
source/tensor/function/Sigmoid.cu
+1
-1
source/tensor/function/Sigmoid.h
+3
-2
source/tensor/function/Softmax.cpp
+23
-0
source/tensor/function/Softmax.cuh
+1
-1
source/tensor/function/Softmax.h
+3
-2
source/tensor/test/TAbsolute.cpp
+22
-4
source/tensor/test/TConcatenate.cpp
+24
-8
source/tensor/test/THardTanH.cpp
+6
-2
source/tensor/test/TIdentity.cpp
+6
-2
source/tensor/test/TLog.cpp
+22
-4
source/tensor/test/TLogSoftmax.cpp
+6
-2
source/tensor/test/TLoss.cpp
+12
-9
source/tensor/test/TMatrixMulBatched.cpp
+12
-4
source/tensor/test/TMerge.cpp
+38
-16
source/tensor/test/TNegate.cpp
+44
-8
source/tensor/test/TPower.cpp
+66
-12
source/tensor/test/TRectify.cpp
+6
-2
source/tensor/test/TReduceMax.cpp
+12
-2
source/tensor/test/TReduceMean.cpp
+12
-2
source/tensor/test/TReduceSum.cpp
+26
-2
source/tensor/test/TReduceSumSquared.cpp
+12
-4
source/tensor/test/TReduceVariance.cpp
+6
-2
source/tensor/test/TSigmoid.cpp
+6
-2
source/tensor/test/TSign.cpp
+0
-0
source/tensor/test/TSoftmax.cpp
+0
-0
source/tensor/test/TSort.cpp
+0
-0
source/tensor/test/TSplit.cpp
+0
-0
source/tensor/test/TTopK.cpp
+0
-0
source/tensor/test/TUnsqueeze.cpp
+0
-0
没有找到文件。
source/network/XBackwardLoss.cpp
查看文件 @
906eebb7
...
@@ -70,7 +70,7 @@ void XLossGrad::Compute(XTensor * gold, XTensor * y,
...
@@ -70,7 +70,7 @@ void XLossGrad::Compute(XTensor * gold, XTensor * y,
XTensor
*
dedy
,
XTensor
*
dedy
,
LOSS_FUNCTION_NAME
lossName
)
LOSS_FUNCTION_NAME
lossName
)
{
{
LossBackward
(
dedy
,
gold
,
y
,
lossName
);
_
LossBackward
(
dedy
,
gold
,
y
,
lossName
);
}
}
}
}
\ No newline at end of file
source/tensor/XLink.cpp
查看文件 @
906eebb7
...
@@ -328,6 +328,37 @@ void XLink::MakeLink(const XList * list, XTensor * h, int id)
...
@@ -328,6 +328,37 @@ void XLink::MakeLink(const XList * list, XTensor * h, int id)
}
}
/*
/*
create a hyper edge with a input tensors and a list of output tensors
>> h - a input tensor
>> list - a list of output tensors
>> id - id of the edge type
*/
void
XLink
::
MakeLink
(
XTensor
*
t
,
XList
*
list
,
int
id
)
{
/* forward */
for
(
int
i
=
0
;
i
<
list
->
count
;
i
++
){
XTensor
*
h
=
(
XTensor
*
)
list
->
GetItem
(
i
);
if
(
h
==
NULL
)
continue
;
XLink
&
income
=
h
->
income
;
income
.
Reset
();
income
.
SetHead
(
h
);
income
.
SetType
(
id
);
income
.
AddTail
(
t
);
}
/* backward */
XLink
&
outgo
=
t
->
outgo
;
CheckNTErrors
(
outgo
.
head
==
NULL
||
outgo
.
head
==
t
,
"Wrong head of the hyperedge!"
);
for
(
int
i
=
0
;
i
<
list
->
count
;
i
++
){
XTensor
*
t
=
(
XTensor
*
)
list
->
GetItem
(
i
);
if
(
t
==
NULL
)
continue
;
outgo
.
AddTail
(
t
);
}
}
/*
add parameters
add parameters
>> h - head
>> h - head
>> param - parameter we want introduce
>> param - parameter we want introduce
...
...
source/tensor/XLink.h
查看文件 @
906eebb7
...
@@ -139,6 +139,10 @@ struct XLink
...
@@ -139,6 +139,10 @@ struct XLink
static
static
void
MakeLink
(
const
XList
*
list
,
XTensor
*
h
,
int
id
);
void
MakeLink
(
const
XList
*
list
,
XTensor
*
h
,
int
id
);
/* create a hyper edge with a input tensors and a list of output tensors */
static
void
MakeLink
(
XTensor
*
h
,
XList
*
list
,
int
id
);
/* add a parameter */
/* add a parameter */
static
static
void
AddParamToHead
(
XTensor
*
h
,
DTYPE
param
);
void
AddParamToHead
(
XTensor
*
h
,
DTYPE
param
);
...
...
source/tensor/XName.cpp
查看文件 @
906eebb7
...
@@ -26,76 +26,80 @@ namespace nts { // namespace nts(NiuTrans.Tensor)
...
@@ -26,76 +26,80 @@ namespace nts { // namespace nts(NiuTrans.Tensor)
/* get operator name */
/* get operator name */
const
char
*
GetOPName
(
int
type
)
const
char
*
GetOPName
(
int
type
)
{
{
if
((
type
&
MATH_BASE
)
!=
0
){
if
((
type
&
MATH_BASE
)
!=
0
){
if
(
type
==
MATH_ABSOLUTE
)
if
(
type
==
MATH_ABSOLUTE
)
return
"M_ABSOLUTE"
;
return
"M_ABSOLUTE"
;
else
if
(
type
==
MATH_MATRIXMUL
)
else
if
(
type
==
MATH_MATRIXMUL
)
return
"M_MATRIXMUL"
;
return
"M_MATRIXMUL"
;
else
if
(
type
==
MATH_MATRIXMULBATCHED
)
else
if
(
type
==
MATH_MATRIXMULBATCHED
)
return
"M_MATRIXMULBATCHED"
;
return
"M_MATRIXMULBATCHED"
;
else
if
(
type
==
MATH_MULTIPLY
)
else
if
(
type
==
MATH_MULTIPLY
)
return
"M_MULTIPLY"
;
return
"M_MULTIPLY"
;
else
if
(
type
==
MATH_NEGATE
)
else
if
(
type
==
MATH_NEGATE
)
return
"M_NEGATE"
;
return
"M_NEGATE"
;
else
if
(
type
==
MATH_SIGN
)
else
if
(
type
==
MATH_SIGN
)
return
"M_SIGN"
;
return
"M_SIGN"
;
else
if
(
type
==
MATH_SUM
)
else
if
(
type
==
MATH_SUM
)
return
"M_SUM"
;
return
"M_SUM"
;
else
if
(
type
==
MATH_LOG
)
else
if
(
type
==
MATH_LOG
)
return
"M_NORMALIZE"
;
else
if
(
type
==
MATH_NORMALIZE
)
return
"M_LOG"
;
return
"M_LOG"
;
else
if
(
type
==
MATH_POWER
)
else
if
(
type
==
MATH_NORMALIZE
)
return
"M_NORMALIZE"
;
else
if
(
type
==
MATH_POWER
)
return
"M_POWER"
;
return
"M_POWER"
;
else
if
(
type
==
MATH_SCALEANDSHIFT
)
else
if
(
type
==
MATH_SCALEANDSHIFT
)
return
"M_SCALEANDSHIFT"
;
return
"M_SCALEANDSHIFT"
;
else
if
(
type
==
REDUCE_REDUCEMAX
)
else
if
(
type
==
REDUCE_REDUCEMAX
)
return
"R_REDUCEMAX"
;
return
"R_REDUCEMAX"
;
else
if
(
type
==
REDUCE_REDUCEMEAN
)
else
if
(
type
==
REDUCE_REDUCEMEAN
)
return
"R_REDUCEMEAN"
;
return
"R_REDUCEMEAN"
;
else
if
(
type
==
REDUCE_REDUCESUM
)
else
if
(
type
==
REDUCE_REDUCESUM
)
return
"R_REDUCESUM"
;
return
"R_REDUCESUM"
;
else
if
(
type
==
REDUCE_REDUCESUMSQUARED
)
else
if
(
type
==
REDUCE_REDUCESUMSQUARED
)
return
"R_REDUCESUMSQUARED"
;
return
"R_REDUCESUMSQUARED"
;
else
if
(
type
==
REDUCE_REDUCEVARIANCE
)
else
if
(
type
==
REDUCE_REDUCEVARIANCE
)
return
"R_REDUCEVARIANCE"
;
return
"R_REDUCEVARIANCE"
;
}
}
else
if
((
type
&
DATA_BASE
)
!=
0
){
else
if
((
type
&
DATA_BASE
)
!=
0
){
if
(
type
==
GETANDSET_SELECT
)
if
(
type
==
GETANDSET_SELECT
)
return
"G_SELECT"
;
return
"G_SELECT"
;
else
if
(
type
==
MOVEMENT_COPYINDEXED
)
else
if
(
type
==
MOVEMENT_COPYINDEXED
)
return
"M_COPYINDEXED"
;
return
"M_COPYINDEXED"
;
else
if
(
type
==
MOVEMENT_COPYVALUES
)
else
if
(
type
==
MOVEMENT_COPYVALUES
)
return
"M_COPYVALUES"
;
return
"M_COPYVALUES"
;
else
if
(
type
==
SHAPE_CONCATENATE
)
else
if
(
type
==
SHAPE_CONCATENATE
)
return
"S_CONCATENATE"
;
return
"S_CONCATENATE"
;
else
if
(
type
==
SHAPE_MERGE
)
else
if
(
type
==
SHAPE_MERGE
)
return
"S_MERGE"
;
return
"S_MERGE"
;
else
if
(
type
==
SHAPE_MERGE_LIST
)
else
if
(
type
==
SHAPE_MERGE_LIST
)
return
"S_MERGE_LIST"
;
return
"S_MERGE_LIST"
;
else
if
(
type
==
SHAPE_PERMUTE
)
else
if
(
type
==
SHAPE_PERMUTE
)
return
"S_PERMUTE"
;
return
"S_PERMUTE"
;
else
if
(
type
==
SHAPE_SPLIT
)
else
if
(
type
==
SHAPE_SPLIT
)
return
"S_SPLIT"
;
return
"S_SPLIT"
;
else
if
(
type
==
SHAPE_SPLIT_LIST
)
else
if
(
type
==
SHAPE_SPLIT_LIST
)
return
"S_SPLIT_LIST"
;
return
"S_SPLIT_LIST"
;
else
if
(
type
==
SHAPE_TRANSPOSE
)
else
if
(
type
==
SHAPE_TRANSPOSE
)
return
"S_TRANSPOSE"
;
return
"S_TRANSPOSE"
;
else
if
(
type
==
SHAPE_UNSQUEEZE
)
else
if
(
type
==
SHAPE_UNSQUEEZE
)
return
"S_UNSQUEEZE"
;
return
"S_UNSQUEEZE"
;
else
if
(
type
==
SORT_SORT
)
return
"S_SORT"
;
else
if
(
type
==
SORT_TOPK
)
return
"S_TOPK"
;
}
}
else
if
((
type
&
FUNCTION_BASE
)
!=
0
){
else
if
((
type
&
FUNCTION_BASE
)
!=
0
){
if
(
type
==
FUNC_HARDTANH
)
if
(
type
==
FUNC_HARDTANH
)
return
"F_HARDTANH"
;
return
"F_HARDTANH"
;
else
if
(
type
==
FUNC_IDENTITY
)
else
if
(
type
==
FUNC_IDENTITY
)
return
"F_IDENTITY"
;
return
"F_IDENTITY"
;
else
if
(
type
==
FUNC_LOGSOFTMAX
)
else
if
(
type
==
FUNC_LOGSOFTMAX
)
return
"F_LOGSOFTMAX"
;
return
"F_LOGSOFTMAX"
;
else
if
(
type
==
FUNC_RECTIFY
)
else
if
(
type
==
FUNC_RECTIFY
)
return
"F_RECTIFY"
;
return
"F_RECTIFY"
;
else
if
(
type
==
FUNC_SIGMOID
)
else
if
(
type
==
FUNC_SIGMOID
)
return
"F_SIGMOID"
;
return
"F_SIGMOID"
;
else
if
(
type
==
FUNC_SOFTMAX
)
else
if
(
type
==
FUNC_SOFTMAX
)
return
"F_SOFTMAX"
;
return
"F_SOFTMAX"
;
}
}
...
...
source/tensor/XName.h
查看文件 @
906eebb7
...
@@ -69,6 +69,10 @@ namespace nts { // namespace nts(NiuTrans.Tensor)
...
@@ -69,6 +69,10 @@ namespace nts { // namespace nts(NiuTrans.Tensor)
#define SHAPE_TRANSPOSE SHAPE_SPLIT_LIST + 1
#define SHAPE_TRANSPOSE SHAPE_SPLIT_LIST + 1
#define SHAPE_UNSQUEEZE SHAPE_TRANSPOSE + 1
#define SHAPE_UNSQUEEZE SHAPE_TRANSPOSE + 1
#define SORT SHAPE_UNSQUEEZE + 1
#define SORT_SORT SORT + 1
#define SORT_TOPK SORT_SORT + 1
/* activation functions */
/* activation functions */
#define FUNCTION_BASE DATA_BASE * 2
#define FUNCTION_BASE DATA_BASE * 2
#define FUNC_HARDTANH FUNCTION_BASE + 1
#define FUNC_HARDTANH FUNCTION_BASE + 1
...
...
source/tensor/XTensor.cpp
查看文件 @
906eebb7
...
@@ -1121,7 +1121,7 @@ bool XTensor::Resize(const int myOrder, const int * myDimSize,
...
@@ -1121,7 +1121,7 @@ bool XTensor::Resize(const int myOrder, const int * myDimSize,
if
(
isSparse
){
if
(
isSparse
){
/*
/*
for sparse matrices, we use a list of tuple (key, value),
for sparse matrices, we use a list of tuple (key, value),
ordered by key. Take a (2-dimensional) matrix as an example
s
,
ordered by key. Take a (2-dimensional) matrix as an example,
we have key = m * i + j;
we have key = m * i + j;
The data array is
The data array is
---------
---------
...
...
source/tensor/XUtility.cpp
查看文件 @
906eebb7
...
@@ -486,8 +486,9 @@ quick sorting
...
@@ -486,8 +486,9 @@ quick sorting
NOTE: this means that the items may not placed in a continuous memory space
NOTE: this means that the items may not placed in a continuous memory space
>> comp - the comparison function
>> comp - the comparison function
*/
*/
void
XQSort
(
void
*
data
,
void
*
index
,
int
num
,
int
width
,
int
stride
,
int
(
*
comp
)(
const
void
*
,
const
void
*
))
void
XQSort
(
void
*
data
A
,
void
*
dataB
,
void
*
index
,
int
num
,
int
width
,
int
stride
,
int
(
*
comp
)(
const
void
*
,
const
void
*
))
{
{
XMemCopy
(
dataB
,
-
1
,
dataA
,
-
1
,
num
*
width
);
char
*
lo
,
*
hi
;
// ends of sub-array currently sorting
char
*
lo
,
*
hi
;
// ends of sub-array currently sorting
int
*
indexlo
,
*
indexhi
;
int
*
indexlo
,
*
indexhi
;
char
*
mid
;
// points to middle of subarray
char
*
mid
;
// points to middle of subarray
...
@@ -506,8 +507,8 @@ void XQSort(void * data, void * index, int num, int width, int stride, int (*com
...
@@ -506,8 +507,8 @@ void XQSort(void * data, void * index, int num, int width, int stride, int (*com
stackptr
=
0
;
stackptr
=
0
;
lo
=
(
char
*
)
data
;
lo
=
(
char
*
)
data
B
;
hi
=
(
char
*
)
data
+
realStride
*
(
num
-
1
);
hi
=
(
char
*
)
data
B
+
realStride
*
(
num
-
1
);
indexlo
=
(
int
*
)
index
;
indexlo
=
(
int
*
)
index
;
indexhi
=
index
!=
NULL
?
(
int
*
)
index
+
stride
*
(
num
-
1
)
:
NULL
;
indexhi
=
index
!=
NULL
?
(
int
*
)
index
+
stride
*
(
num
-
1
)
:
NULL
;
...
...
source/tensor/XUtility.h
查看文件 @
906eebb7
...
@@ -53,7 +53,7 @@ extern void XSleep(int sleepTime);
...
@@ -53,7 +53,7 @@ extern void XSleep(int sleepTime);
extern
double
GetClock
();
extern
double
GetClock
();
extern
double
GetClockSec
();
extern
double
GetClockSec
();
extern
void
XQSort
(
void
*
data
,
void
*
index
,
int
num
,
int
width
,
int
stride
,
int
(
*
comp
)(
const
void
*
,
const
void
*
));
extern
void
XQSort
(
void
*
data
A
,
void
*
dataB
,
void
*
index
,
int
num
,
int
width
,
int
stride
,
int
(
*
comp
)(
const
void
*
,
const
void
*
));
extern
int
CompXFloat
(
const
void
*
a
,
const
void
*
b
);
extern
int
CompXFloat
(
const
void
*
a
,
const
void
*
b
);
#ifdef USE_CUDA
#ifdef USE_CUDA
...
...
source/tensor/core/arithmetic/Absolute.cpp
查看文件 @
906eebb7
...
@@ -21,6 +21,7 @@
...
@@ -21,6 +21,7 @@
#include <math.h>
#include <math.h>
#include "../../XTensor.h"
#include "../../XTensor.h"
#include "../../XName.h"
#include "Absolute.h"
#include "Absolute.h"
#include "Absolute.cuh"
#include "Absolute.cuh"
...
@@ -28,21 +29,54 @@ namespace nts { // namespace nts(NiuTrans.Tensor)
...
@@ -28,21 +29,54 @@ namespace nts { // namespace nts(NiuTrans.Tensor)
/*
/*
set every entry to its absolute value
set every entry to its absolute value
>> a - the tensor we are processing
>> a - input tensor we are processing
>> b - output tensor we are processing
*/
*/
void
_Absolute
(
XTensor
*
a
)
void
_Absolute
(
const
XTensor
*
a
,
XTensor
*
b
)
{
{
#ifdef USE_CUDA
#ifdef USE_CUDA
/* run it on GPUs */
/* run it on GPUs */
if
(
a
->
devID
>=
0
)
{
if
(
a
->
devID
>=
0
)
{
_CudaAbsolute
(
a
);
_CudaAbsolute
(
a
,
b
);
return
;
return
;
}
}
#endif
#endif
CheckNTErrors
((
XTensor
::
IsIdentical
(
a
,
b
)),
"Input tensors should have the same type!"
);
CheckNTErrors
((
a
->
dataType
==
DEFAULT_DTYPE
),
"TODO!"
);
CheckNTErrors
((
a
->
dataType
==
DEFAULT_DTYPE
),
"TODO!"
);
DTYPE
*
d
=
(
DTYPE
*
)
a
->
data
;
DTYPE
*
d
=
(
DTYPE
*
)
a
->
data
;
DTYPE
*
db
=
(
DTYPE
*
)
b
->
data
;
for
(
int
i
=
0
;
i
<
a
->
unitNum
;
i
++
)
for
(
int
i
=
0
;
i
<
a
->
unitNum
;
i
++
)
d
[
i
]
=
(
DTYPE
)
fabs
(
d
[
i
]);
db
[
i
]
=
(
DTYPE
)
fabs
(
d
[
i
]);
}
/*
set every entry to its absolute value (do it on site)
keep the result in the input tensor a and return nothing
>> a - the tensor we are processing
*/
void
_AbsoluteMe
(
XTensor
*
a
)
{
_Absolute
(
a
,
a
);
}
/*
set every entry to its absolute value (return a XTensor structure)
make a new tensor to keep the result and return it
>> a - input tensor we are processing
<< return - the absolute value of input tensor
*/
XTensor
Absolute
(
const
XTensor
&
a
)
{
XTensor
b
(
&
a
);
b
.
SetTMP
();
/* call _Absolute function */
_Absolute
(
&
a
,
&
b
);
/* tensor connections */
XLink
::
MakeLink
(
&
a
,
NULL
,
&
b
,
MATH_ABSOLUTE
);
return
b
;
}
}
}
//
namespace
nts
(
NiuTrans
.
Tensor
)
}
//
namespace
nts
(
NiuTrans
.
Tensor
)
\ No newline at end of file
source/tensor/core/arithmetic/Absolute.cu
查看文件 @
906eebb7
...
@@ -29,37 +29,41 @@ namespace nts { // namespace nts(NiuTrans.Tensor)
...
@@ -29,37 +29,41 @@ namespace nts { // namespace nts(NiuTrans.Tensor)
#ifdef USE_CUDA
#ifdef USE_CUDA
/*
/*
set each entry to its absolute value (CUDA Kernel)
set each entry to its absolute value (CUDA Kernel)
>> d - pointer to the data array
>> a - pointer to input data array
>> b - pointer to output data array
>> size - size of the data array
>> size - size of the data array
*/
*/
__global__
__global__
void KernelAbsolute(DTYPE *
d
, int size)
void KernelAbsolute(DTYPE *
a, DTYPE * b
, int size)
{
{
int i = blockDim.x * blockIdx.x + threadIdx.x;
int i = blockDim.x * blockIdx.x + threadIdx.x;
if (i < size)
if (i < size)
d[i] = fabs(d
[i]);
b[i] = fabs(a
[i]);
}
}
/*
/*
set each entry to its absolute value (CUDA Kernel)
set each entry to its absolute value (CUDA Kernel)
This is for float16 computation
This is for float16 computation
>> d - pointer to the data array
>> a - pointer to input data array
>> b - pointer to output data array
>> size - size of the data array
>> size - size of the data array
*/
*/
__global__
__global__
void KernelAbsolute(__half *
d
, int size)
void KernelAbsolute(__half *
a, __half * b
, int size)
{
{
return;
return;
}
}
/*
/*
set each entry to its with float16 data type value
set each entry to its absolute value
>> a - the tensor
>> a - input tensor
>> b - output tensor
*/
*/
extern "C"
extern "C"
void _CudaAbsolute(
XTensor * a
)
void _CudaAbsolute(
const XTensor * a, XTensor * b
)
{
{
CheckNTErrors((XTensor::IsIdentical(a, b)), "Input tensors should have the same type!");
CheckNTErrors((a->isSparse == false), "TODO!");
CheckNTErrors((a->isSparse == false), "TODO!");
int gridSize[3];
int gridSize[3];
...
@@ -74,10 +78,10 @@ void _CudaAbsolute(XTensor * a)
...
@@ -74,10 +78,10 @@ void _CudaAbsolute(XTensor * a)
ProtectCudaDev(a->devID, devIDBackup);
ProtectCudaDev(a->devID, devIDBackup);
if (a->dataType == DEFAULT_DTYPE) {
if (a->dataType == DEFAULT_DTYPE) {
KernelAbsolute << <blocks, threads >> >((DTYPE*)a->data, a->unitNum);
KernelAbsolute << <blocks, threads >> >((DTYPE*)a->data,
(DTYPE*)b->data,
a->unitNum);
}
}
else if (a->dataType == X_FLOAT16) {
else if (a->dataType == X_FLOAT16) {
KernelAbsolute << <blocks, threads >> >((__half*)a->data, a->unitNum);
KernelAbsolute << <blocks, threads >> >((__half*)a->data,
(__half*)b->data,
a->unitNum);
}
}
else {
else {
ShowNTErrors("TODO!");
ShowNTErrors("TODO!");
...
...
source/tensor/core/arithmetic/Absolute.cuh
查看文件 @
906eebb7
...
@@ -27,15 +27,15 @@ namespace nts { // namespace nts(NiuTrans.Tensor)
...
@@ -27,15 +27,15 @@ namespace nts { // namespace nts(NiuTrans.Tensor)
/* set each entry to its absolute value (CUDA Kernel) */
/* set each entry to its absolute value (CUDA Kernel) */
__global__
__global__
void KernelAbsolute(DTYPE *
d
, int size);
void KernelAbsolute(DTYPE *
a, DTYPE * b
, int size);
/* set each entry to its absolute value (CUDA Kernel) with float16 data type*/
/* set each entry to its absolute value (CUDA Kernel) with float16 data type*/
__global__
__global__
void KernelAbsolute(__half *
d
, int size);
void KernelAbsolute(__half *
a, __half * b
, int size);
/* set each entry to its absolute value */
/* set each entry to its absolute value */
extern "C"
extern "C"
void _CudaAbsolute(
XTensor * a
);
void _CudaAbsolute(
const XTensor * a, XTensor * b
);
#endif // USE_CUDA
#endif // USE_CUDA
...
...
source/tensor/core/arithmetic/Absolute.h
查看文件 @
906eebb7
...
@@ -27,8 +27,19 @@
...
@@ -27,8 +27,19 @@
namespace
nts
{
// namespace nts(NiuTrans.Tensor)
namespace
nts
{
// namespace nts(NiuTrans.Tensor)
/* set every entry to its absolute value */
/* set every entry to its absolute value */
extern
"C"
void
_Absolute
(
const
XTensor
*
a
,
XTensor
*
b
);
void
_Absolute
(
XTensor
*
a
);
/*
set every entry to its absolute value (do it on site)
keep the result in the input tensor a and return nothing
*/
void
_AbsoluteMe
(
XTensor
*
a
);
/*
set every entry to its absolute value (return a XTensor structure)
make a new tensor to keep the result and return it
*/
XTensor
Absolute
(
const
XTensor
&
a
);
}
// namespace nts(NiuTrans.Tensor)
}
// namespace nts(NiuTrans.Tensor)
...
...
source/tensor/core/arithmetic/MatrixMULBatchedCPU.cpp
查看文件 @
906eebb7
...
@@ -41,8 +41,8 @@ void _MatrixMULBatchedCPU(const XList * a, MATRIX_TRANS_TYPE transposedA,
...
@@ -41,8 +41,8 @@ void _MatrixMULBatchedCPU(const XList * a, MATRIX_TRANS_TYPE transposedA,
const
XList
*
b
,
MATRIX_TRANS_TYPE
transposedB
,
const
XList
*
b
,
MATRIX_TRANS_TYPE
transposedB
,
XList
*
c
,
DTYPE
alpha
,
DTYPE
beta
)
XList
*
c
,
DTYPE
alpha
,
DTYPE
beta
)
{
{
CheckNTErrors
(
(
a
&&
b
&&
c
)
,
"Empty input lists!"
);
CheckNTErrors
(
a
&&
b
&&
c
,
"Empty input lists!"
);
CheckNTErrors
(
(
a
->
count
==
b
->
count
&&
a
->
count
==
c
->
count
)
,
"Input lists must be of the same size!"
);
CheckNTErrors
(
a
->
count
==
b
->
count
&&
a
->
count
==
c
->
count
,
"Input lists must be of the same size!"
);
if
(
a
->
count
==
0
)
if
(
a
->
count
==
0
)
return
;
return
;
...
...
source/tensor/core/arithmetic/MatrixMULBatchedCPU.h
查看文件 @
906eebb7
...
@@ -28,8 +28,8 @@ namespace nts { // namespace nts(NiuTrans.Tensor)
...
@@ -28,8 +28,8 @@ namespace nts { // namespace nts(NiuTrans.Tensor)
/* matrix multiplication in batch mode (CPU code) */
/* matrix multiplication in batch mode (CPU code) */
extern
"C"
extern
"C"
void
_MatrixMULBatchedCPU
(
const
XList
*
a
,
MATRIX_TRANS_TYPE
transposedA
,
const
XList
*
b
,
MATRIX_TRANS_TYPE
transposedB
,
XList
*
c
,
void
_MatrixMULBatchedCPU
(
const
XList
*
a
,
MATRIX_TRANS_TYPE
transposedA
,
const
XList
*
b
,
MATRIX_TRANS_TYPE
transposedB
,
DTYPE
alpha
=
(
DTYPE
)
1
.
0
,
DTYPE
beta
=
0
);
XList
*
c
,
DTYPE
alpha
=
(
DTYPE
)
1
.
0
,
DTYPE
beta
=
0
);
}
// namespace nts(NiuTrans.Tensor)
}
// namespace nts(NiuTrans.Tensor)
...
...
source/tensor/core/arithmetic/MatrixMul.h
查看文件 @
906eebb7
source/tensor/core/arithmetic/MatrixMul2D.cpp
查看文件 @
906eebb7
source/tensor/core/arithmetic/MatrixMul2D.cu
查看文件 @
906eebb7
...
@@ -123,8 +123,7 @@ where trans() return the transposed matrix if the flag is fired
...
@@ -123,8 +123,7 @@ where trans() return the transposed matrix if the flag is fired
*/
*/
void _CudaMatrixMul2D(const XTensor * a, MATRIX_TRANS_TYPE transposedA,
void _CudaMatrixMul2D(const XTensor * a, MATRIX_TRANS_TYPE transposedA,
const XTensor * b, MATRIX_TRANS_TYPE transposedB,
const XTensor * b, MATRIX_TRANS_TYPE transposedB,
XTensor * c,
XTensor * c, DTYPE alpha, DTYPE beta, XStream * stream)
DTYPE alpha, DTYPE beta, XStream * stream)
{
{
int an = transposedA == X_TRANS ? a->dimSize[1] : a->dimSize[0];
int an = transposedA == X_TRANS ? a->dimSize[1] : a->dimSize[0];
int am = transposedA == X_TRANS ? a->dimSize[0] : a->dimSize[1];
int am = transposedA == X_TRANS ? a->dimSize[0] : a->dimSize[1];
...
...
source/tensor/core/arithmetic/MatrixMul2D.cuh
查看文件 @
906eebb7
source/tensor/core/arithmetic/MatrixMul2DParallel.cpp
查看文件 @
906eebb7
source/tensor/core/arithmetic/MatrixMul2DParallel.h
查看文件 @
906eebb7
...
@@ -32,8 +32,8 @@ c = trans(a) * trans(b) * alpha + c * beta
...
@@ -32,8 +32,8 @@ c = trans(a) * trans(b) * alpha + c * beta
where trans() return the transposed matrix if the flag is fired.
where trans() return the transposed matrix if the flag is fired.
*/
*/
extern
"C"
extern
"C"
void
_MatrixMul2DParallel
(
const
XTensor
*
a
,
MATRIX_TRANS_TYPE
transposedA
,
const
XTensor
*
b
,
MATRIX_TRANS_TYPE
transposedB
,
XTensor
*
c
,
void
_MatrixMul2DParallel
(
const
XTensor
*
a
,
MATRIX_TRANS_TYPE
transposedA
,
const
XTensor
*
b
,
MATRIX_TRANS_TYPE
transposedB
,
DTYPE
alpha
=
(
DTYPE
)
1
.
0
,
DTYPE
beta
=
0
,
XPRunner
*
parallelRunner
=
NULL
);
XTensor
*
c
,
DTYPE
alpha
=
(
DTYPE
)
1
.
0
,
DTYPE
beta
=
0
,
XPRunner
*
parallelRunner
=
NULL
);
}
// namespace nts(NiuTrans.Tensor)
}
// namespace nts(NiuTrans.Tensor)
...
...
source/tensor/core/arithmetic/MatrixMulBatched.cpp
查看文件 @
906eebb7
...
@@ -47,8 +47,7 @@ where trans() returns the transposed matrix if the flag is fired
...
@@ -47,8 +47,7 @@ where trans() returns the transposed matrix if the flag is fired
*/
*/
void
_MatrixMulBatched
(
const
XTensor
*
a
,
MATRIX_TRANS_TYPE
transposedA
,
void
_MatrixMulBatched
(
const
XTensor
*
a
,
MATRIX_TRANS_TYPE
transposedA
,
const
XTensor
*
b
,
MATRIX_TRANS_TYPE
transposedB
,
const
XTensor
*
b
,
MATRIX_TRANS_TYPE
transposedB
,
XTensor
*
c
,
DTYPE
alpha
,
DTYPE
beta
,
XTensor
*
c
,
DTYPE
alpha
,
DTYPE
beta
,
XPRunner
*
parallelRunner
)
XPRunner
*
parallelRunner
)
{
{
CheckNTErrors
((
a
&&
b
&&
c
),
"Empty input tensors!"
);
CheckNTErrors
((
a
&&
b
&&
c
),
"Empty input tensors!"
);
CheckNTErrors
((
a
->
dataType
==
b
->
dataType
&&
a
->
dataType
==
c
->
dataType
),
CheckNTErrors
((
a
->
dataType
==
b
->
dataType
&&
a
->
dataType
==
c
->
dataType
),
...
@@ -169,14 +168,12 @@ where trans() returns the transposed matrix if the flag is fired.
...
@@ -169,14 +168,12 @@ where trans() returns the transposed matrix if the flag is fired.
>> b - tensor b
>> b - tensor b
>> transposedB - indicates whether teh matrices in b are transposed
>> transposedB - indicates whether teh matrices in b are transposed
>> alpha - a coefficient
>> alpha - a coefficient
>> beta - another coefficient
>> parallelRunner - parallel processing module
>> parallelRunner - parallel processing module
<< return - the result of matrix multiplication of the two tensors
<< return - the result of matrix multiplication of the two tensors
*/
*/
XTensor
MatrixMulBatched
(
const
XTensor
&
a
,
MATRIX_TRANS_TYPE
transposedA
,
const
XTensor
&
b
,
MATRIX_TRANS_TYPE
transposedB
,
XTensor
MatrixMulBatched
(
const
XTensor
&
a
,
MATRIX_TRANS_TYPE
transposedA
,
const
XTensor
&
b
,
MATRIX_TRANS_TYPE
transposedB
,
DTYPE
alpha
,
XPRunner
*
parallelRunner
)
DTYPE
alpha
,
XPRunner
*
parallelRunner
)
{
{
CheckNTErrors
(
&
a
!=
&
NULLTensor
&&
&
b
!=
&
NULLTensor
,
"Empty input tensors!"
);
CheckNTErrors
(
a
.
dataType
==
b
.
dataType
,
"Input tensors should have the same data type!"
);
CheckNTErrors
(
a
.
dataType
==
b
.
dataType
,
"Input tensors should have the same data type!"
);
CheckNTErrors
(
a
.
order
>=
2
&&
b
.
order
>=
2
,
"Input tensors must have a order >= 2!"
);
CheckNTErrors
(
a
.
order
>=
2
&&
b
.
order
>=
2
,
"Input tensors must have a order >= 2!"
);
CheckNTErrors
(
a
.
order
==
b
.
order
,
"Input tensor and output tensor must have same order!"
);
CheckNTErrors
(
a
.
order
==
b
.
order
,
"Input tensor and output tensor must have same order!"
);
...
@@ -191,13 +188,13 @@ XTensor MatrixMulBatched(const XTensor &a, MATRIX_TRANS_TYPE transposedA, const
...
@@ -191,13 +188,13 @@ XTensor MatrixMulBatched(const XTensor &a, MATRIX_TRANS_TYPE transposedA, const
int
order
=
a
.
order
;
int
order
=
a
.
order
;
int
sub
=
0
;
int
sub
=
0
;
int
*
dimSize
=
new
int
[
order
];
int
*
dimSize
=
new
int
[
order
];
for
(
int
i
=
2
;
i
<
a
.
order
;
i
++
)
for
(
int
i
=
0
;
i
<
a
.
order
-
2
;
i
++
)
dimSize
[
sub
++
]
=
a
.
dimSize
RDI
[
i
];
dimSize
[
sub
++
]
=
a
.
dimSize
[
i
];
dimSize
[
sub
++
]
=
an
;
dimSize
[
sub
++
]
=
an
;
dimSize
[
sub
++
]
=
bm
;
dimSize
[
sub
++
]
=
bm
;
XTensor
c
=
NewTensor
(
order
,
dimSize
,
a
.
dataType
,
a
.
denseRatio
,
a
.
devID
,
a
.
mem
);
float
dr
=
(
!
a
.
isSparse
||
!
b
.
isSparse
)
?
1.0
F
:
MAX
(
a
.
denseRatio
,
b
.
denseRatio
);
c
.
SetZeroAll
(
);
XTensor
c
(
order
,
dimSize
,
a
.
dataType
,
dr
,
a
.
devID
,
a
.
mem
);
c
.
SetTMP
();
c
.
SetTMP
();
/*call _MatrixMulBatched function */
/*call _MatrixMulBatched function */
...
...
source/tensor/core/arithmetic/Negate.cpp
查看文件 @
906eebb7
...
@@ -20,6 +20,7 @@
...
@@ -20,6 +20,7 @@
*/
*/
#include "../../XTensor.h"
#include "../../XTensor.h"
#include "../../XName.h"
#include "Negate.h"
#include "Negate.h"
#include "Negate.cuh"
#include "Negate.cuh"
...
@@ -27,21 +28,55 @@ namespace nts { // namespace nts(NiuTrans.Tensor)
...
@@ -27,21 +28,55 @@ namespace nts { // namespace nts(NiuTrans.Tensor)
/*
/*
set every entry to its minus value
set every entry to its minus value
>> a - the tensor we are processing
>> a - input tensor we are processing
>> b - output tensor we are processing
*/
*/
void
_Negate
(
XTensor
*
a
)
void
_Negate
(
const
XTensor
*
a
,
XTensor
*
b
)
{
{
#ifdef USE_CUDA
#ifdef USE_CUDA
/* run it on GPUs */
/* run it on GPUs */
if
(
a
->
devID
>=
0
)
{
if
(
a
->
devID
>=
0
)
{
_CudaNegate
(
a
);
_CudaNegate
(
a
,
b
);
return
;
return
;
}
}
#endif
#endif
CheckNTErrors
((
XTensor
::
IsIdentical
(
a
,
b
)),
"Input tensors should have the same type!"
);
CheckNTErrors
((
a
->
dataType
==
DEFAULT_DTYPE
),
"TODO!"
);
CheckNTErrors
((
a
->
dataType
==
DEFAULT_DTYPE
),
"TODO!"
);
DTYPE
*
d
=
(
DTYPE
*
)
a
->
data
;
DTYPE
*
d
=
(
DTYPE
*
)
a
->
data
;
DTYPE
*
db
=
(
DTYPE
*
)
b
->
data
;
for
(
int
i
=
0
;
i
<
a
->
unitNum
;
i
++
)
for
(
int
i
=
0
;
i
<
a
->
unitNum
;
i
++
)
d
[
i
]
=
-
d
[
i
];
d
b
[
i
]
=
-
d
[
i
];
}
}
/*
set every entry to its minus value (do it on site)
keep the result in the input tensor a and return nothing
>> a - the tensor we are processing
*/
void
_NegateMe
(
XTensor
*
a
)
{
_Negate
(
a
,
a
);
}
/*
set every entry to its minus value (return a XTensor structure)
make a new tensor to keep the result and return it
>> a - input tensor we are processing
<< return - the minus value of input tensor
*/
XTensor
Negate
(
const
XTensor
&
a
)
{
XTensor
b
(
&
a
);
b
.
SetTMP
();
/* call _Negate function */
_Negate
(
&
a
,
&
b
);
/* tensor connections */
XLink
::
MakeLink
(
&
a
,
NULL
,
&
b
,
MATH_NEGATE
);
return
b
;
}
}
//
namespace
nts
(
NiuTrans
.
Tensor
)
}
//
namespace
nts
(
NiuTrans
.
Tensor
)
\ No newline at end of file
source/tensor/core/arithmetic/Negate.cu
查看文件 @
906eebb7
...
@@ -29,45 +29,49 @@ namespace nts { // namespace nts(NiuTrans.Tensor)
...
@@ -29,45 +29,49 @@ namespace nts { // namespace nts(NiuTrans.Tensor)
#ifdef USE_CUDA
#ifdef USE_CUDA
/*
/*
set each entry to its negtive value (CUDA Kernel)
set each entry to its negtive value (CUDA Kernel)
>> d - pointer to the data array
>> a - pointer to the input data array
>> b - pointer to the output data array
>> size - size of the data array
>> size - size of the data array
*/
*/
__global__
__global__
void KernelNegate(DTYPE *
d
, int size)
void KernelNegate(DTYPE *
a, DTYPE * b
, int size)
{
{
int i = blockDim.x * blockIdx.x + threadIdx.x;
int i = blockDim.x * blockIdx.x + threadIdx.x;
if (i < size)
if (i < size)
d[i] = -d
[i];
b[i] = -a
[i];
}
}
/*
/*
set each entry to its negtive value (CUDA Kernel)
set each entry to its negtive value (CUDA Kernel)
This is for float16 computation
This is for float16 computation
>> d - pointer to the data array
>> a - pointer to the input data array
>> b - pointer to the output data array
>> size - size of the data array
>> size - size of the data array
*/
*/
__global__
__global__
void KernelNegate(__half *
d
, int size)
void KernelNegate(__half *
a, __half * b
, int size)
{
{
int i = blockDim.x * blockIdx.x + threadIdx.x;
int i = blockDim.x * blockIdx.x + threadIdx.x;
#if __CUDA_ARCH__ >= 530 || !defined(__CUDA_ARCH__)
#if __CUDA_ARCH__ >= 530 || !defined(__CUDA_ARCH__)
if (i < size)
if (i < size)
d[i] = __hsub(__float2half(0), d
[i]);
b[i] = __hsub(__float2half(0), a
[i]);
#else
#else
if (i < size)
if (i < size)
d[i] = __float2half(-__half2float(d
[i]));
b[i] = __float2half(-__half2float(a
[i]));
#endif
#endif
}
}
/*
/*
set each entry to its negtive value
set each entry to its negtive value
>> a - the tensor
>> a - input tensor
>> b - output tensor
*/
*/
extern "C"
extern "C"
void _CudaNegate(
XTensor * a
)
void _CudaNegate(
const XTensor * a, XTensor * b
)
{
{
CheckNTErrors((XTensor::IsIdentical(a, b)), "Input tensors should have the same type!");
CheckNTErrors((a->isSparse == false), "TODO!");
CheckNTErrors((a->isSparse == false), "TODO!");
int gridSize[3];
int gridSize[3];
...
@@ -82,10 +86,10 @@ void _CudaNegate(XTensor * a)
...
@@ -82,10 +86,10 @@ void _CudaNegate(XTensor * a)
ProtectCudaDev(a->devID, devIDBackup);
ProtectCudaDev(a->devID, devIDBackup);
if (a->dataType == DEFAULT_DTYPE) {
if (a->dataType == DEFAULT_DTYPE) {
KernelNegate << <blocks, threads >> >((DTYPE*)a->data, a->unitNum);
KernelNegate << <blocks, threads >> >((DTYPE*)a->data,
(DTYPE*)b->data,
a->unitNum);
}
}
else if (a->dataType == X_FLOAT16) {
else if (a->dataType == X_FLOAT16) {
KernelNegate << <blocks, threads >> >((__half*)a->data, a->unitNum);
KernelNegate << <blocks, threads >> >((__half*)a->data,
(__half*)b->data,
a->unitNum);
}
}
else {
else {
ShowNTErrors("TODO!");
ShowNTErrors("TODO!");
...
...
source/tensor/core/arithmetic/Negate.cuh
查看文件 @
906eebb7
...
@@ -30,15 +30,15 @@ namespace nts { // namespace nts(NiuTrans.Tensor)
...
@@ -30,15 +30,15 @@ namespace nts { // namespace nts(NiuTrans.Tensor)
/* set each entry to its negtive value (CUDA Kernel) */
/* set each entry to its negtive value (CUDA Kernel) */
__global__
__global__
void KernelNegate(DTYPE *
d
, int size);
void KernelNegate(DTYPE *
a, DTYPE * b
, int size);
/* set each entry to its negtive value (CUDA Kernel) with float16 data type*/
/* set each entry to its negtive value (CUDA Kernel) with float16 data type*/
__global__
__global__
void KernelNegate(__half *
d
, int size);
void KernelNegate(__half *
a, __half * b
, int size);
/* set each entry to its negtive value */
/* set each entry to its negtive value */
extern "C"
extern "C"
void _CudaNegate(
XTensor * a
);
void _CudaNegate(
const XTensor * a, XTensor * b
);
#endif // USE_CUDA
#endif // USE_CUDA
...
...
source/tensor/core/arithmetic/Negate.h
查看文件 @
906eebb7
...
@@ -27,8 +27,19 @@
...
@@ -27,8 +27,19 @@
namespace
nts
{
// namespace nts(NiuTrans.Tensor)
namespace
nts
{
// namespace nts(NiuTrans.Tensor)
/* set every entry to its minus value */
/* set every entry to its minus value */
extern
"C"
void
_Negate
(
const
XTensor
*
a
,
XTensor
*
b
);
void
_Negate
(
XTensor
*
a
);
/*
set every entry to its minus value (do it on site)
keep the result in the input tensor a and return nothing
*/
void
_NegateMe
(
XTensor
*
a
);
/*
set every entry to its minus value (return a XTensor structure)
make a new tensor to keep the result and return it
*/
XTensor
Negate
(
const
XTensor
&
a
);
}
// namespace nts(NiuTrans.Tensor)
}
// namespace nts(NiuTrans.Tensor)
...
...
source/tensor/core/arithmetic/Sign.cpp
查看文件 @
906eebb7
...
@@ -20,6 +20,7 @@
...
@@ -20,6 +20,7 @@
*/
*/
#include "../../XTensor.h"
#include "../../XTensor.h"
#include "../../XName.h"
#include "Sign.h"
#include "Sign.h"
#include "Sign.cuh"
#include "Sign.cuh"
...
@@ -27,27 +28,60 @@ namespace nts { // namespace nts(NiuTrans.Tensor)
...
@@ -27,27 +28,60 @@ namespace nts { // namespace nts(NiuTrans.Tensor)
/*
/*
set every entry to its sign value
set every entry to its sign value
>> a - the tensor we are processing
>> a - input tensor we are processing
>> b - output tensor we are processing
*/
*/
void
_Sign
(
XTensor
*
a
)
void
_Sign
(
const
XTensor
*
a
,
XTensor
*
b
)
{
{
#ifdef USE_CUDA
#ifdef USE_CUDA
/* run it on GPUs */
/* run it on GPUs */
if
(
a
->
devID
>=
0
)
{
if
(
a
->
devID
>=
0
)
{
_CudaSign
(
a
);
_CudaSign
(
a
,
b
);
return
;
return
;
}
}
#endif
#endif
CheckNTErrors
((
XTensor
::
IsIdentical
(
a
,
b
)),
"Input tensors should have the same type!"
);
CheckNTErrors
((
a
->
dataType
==
DEFAULT_DTYPE
),
"TODO!"
);
CheckNTErrors
((
a
->
dataType
==
DEFAULT_DTYPE
),
"TODO!"
);
DTYPE
*
d
=
(
DTYPE
*
)
a
->
data
;
DTYPE
*
d
=
(
DTYPE
*
)
a
->
data
;
DTYPE
*
db
=
(
DTYPE
*
)
b
->
data
;
for
(
int
i
=
0
;
i
<
a
->
unitNum
;
i
++
)
{
for
(
int
i
=
0
;
i
<
a
->
unitNum
;
i
++
)
{
if
(
d
[
i
]
>
0
)
if
(
d
[
i
]
>
0
)
d
[
i
]
=
1.0
F
;
d
b
[
i
]
=
1.0
F
;
else
if
(
d
[
i
]
==
0
)
else
if
(
d
[
i
]
==
0
)
d
[
i
]
=
0.0
F
;
d
b
[
i
]
=
0.0
F
;
else
else
d
[
i
]
=
-
1.0
F
;
d
b
[
i
]
=
-
1.0
F
;
}
}
}
}
/*
set every entry to its sign value (do it on site)
keep the result in the input tensor a and return nothing
>> a - the tensor we are processing
*/
void
_SignMe
(
XTensor
*
a
)
{
_Sign
(
a
,
a
);
}
/*
set every entry to its sign value (return a XTensor structure)
make a new tensor to keep the result and return it
>> a - input tensor we are processing
<< return - the sign value of the input tensor
*/
XTensor
Sign
(
const
XTensor
&
a
)
{
XTensor
b
(
&
a
);
b
.
SetTMP
();
/* call _ScaleAndShift function */
_Sign
(
&
a
,
&
b
);
/* tensor connections */
XLink
::
MakeLink
(
&
a
,
NULL
,
&
b
,
MATH_SIGN
);
return
b
;
}
}
//
namespace
nts
(
NiuTrans
.
Tensor
)
}
//
namespace
nts
(
NiuTrans
.
Tensor
)
\ No newline at end of file
source/tensor/core/arithmetic/Sign.cu
查看文件 @
906eebb7
...
@@ -29,43 +29,47 @@ namespace nts { // namespace nts(NiuTrans.Tensor)
...
@@ -29,43 +29,47 @@ namespace nts { // namespace nts(NiuTrans.Tensor)
#ifdef USE_CUDA
#ifdef USE_CUDA
/*
/*
set each entry to its sign value (CUDA Kernel)
set each entry to its sign value (CUDA Kernel)
>> d - pointer to the data array
>> a - pointer to input data array
>> b - pointer to output data array
>> size - size of the data array
>> size - size of the data array
*/
*/
__global__
__global__
void KernelSign(DTYPE *
d
, int size)
void KernelSign(DTYPE *
a, DTYPE * b
, int size)
{
{
int i = blockDim.x * blockIdx.x + threadIdx.x;
int i = blockDim.x * blockIdx.x + threadIdx.x;
if (i < size) {
if (i < size) {
if (
d
[i] > 0)
if (
a
[i] > 0)
d
[i] = 1.0F;
b
[i] = 1.0F;
else if (
d
[i] == 0)
else if (
a
[i] == 0)
d
[i] = 0.0F;
b
[i] = 0.0F;
else
else
d
[i] = -1.0F;
b
[i] = -1.0F;
}
}
}
}
/*
/*
set each entry to its sign value (CUDA Kernel)
set each entry to its sign value
with float16 data type value
(CUDA Kernel)
This is for float16 computation
This is for float16 computation
>> d - pointer to the data array
>> a - pointer to input data array
>> b - pointer to output data array
>> size - size of the data array
>> size - size of the data array
*/
*/
__global__
__global__
void KernelSign(__half *
d
, int size)
void KernelSign(__half *
a, __half * b
, int size)
{
{
return;
return;
}
}
/*
/*
set each entry to its with float16 data type value
set each entry to its sign value
>> a - the tensor
>> a - input tensor we are processing
>> b - output tensor we are processing
*/
*/
extern "C"
extern "C"
void _CudaSign(
XTensor * a
)
void _CudaSign(
const XTensor * a, XTensor * b
)
{
{
CheckNTErrors((XTensor::IsIdentical(a, b)), "Input tensors should have the same type!");
CheckNTErrors((a->isSparse == false), "TODO!");
CheckNTErrors((a->isSparse == false), "TODO!");
int gridSize[3];
int gridSize[3];
...
@@ -80,10 +84,10 @@ void _CudaSign(XTensor * a)
...
@@ -80,10 +84,10 @@ void _CudaSign(XTensor * a)
ProtectCudaDev(a->devID, devIDBackup);
ProtectCudaDev(a->devID, devIDBackup);
if (a->dataType == DEFAULT_DTYPE) {
if (a->dataType == DEFAULT_DTYPE) {
KernelSign << <blocks, threads >> >((DTYPE*)a->data, a->unitNum);
KernelSign << <blocks, threads >> >((DTYPE*)a->data,
(DTYPE*)b->data,
a->unitNum);
}
}
else if (a->dataType == X_FLOAT16) {
else if (a->dataType == X_FLOAT16) {
KernelSign << <blocks, threads >> >((__half*)a->data, a->unitNum);
KernelSign << <blocks, threads >> >((__half*)a->data,
(__half*)b->data,
a->unitNum);
}
}
else {
else {
ShowNTErrors("TODO!");
ShowNTErrors("TODO!");
...
...
source/tensor/core/arithmetic/Sign.cuh
查看文件 @
906eebb7
...
@@ -30,15 +30,15 @@ namespace nts { // namespace nts(NiuTrans.Tensor)
...
@@ -30,15 +30,15 @@ namespace nts { // namespace nts(NiuTrans.Tensor)
/* set each entry to its sign value (CUDA Kernel) */
/* set each entry to its sign value (CUDA Kernel) */
__global__
__global__
void KernelSign(DTYPE *
d
, int size);
void KernelSign(DTYPE *
a, DTYPE * b
, int size);
/* set each entry to its sign value (CUDA Kernel) with float16 data type*/
/* set each entry to its sign value (CUDA Kernel) with float16 data type*/
__global__
__global__
void KernelSign(__half *
d
, int size);
void KernelSign(__half *
a, __half * b
, int size);
/* set each entry to its sign value */
/* set each entry to its sign value */
extern "C"
extern "C"
void _CudaSign(
XTensor * a
);
void _CudaSign(
const XTensor * a, XTensor * b
);
#endif // USE_CUDA
#endif // USE_CUDA
...
...
source/tensor/core/arithmetic/Sign.h
查看文件 @
906eebb7
...
@@ -27,8 +27,19 @@
...
@@ -27,8 +27,19 @@
namespace
nts
{
// namespace nts(NiuTrans.Tensor)
namespace
nts
{
// namespace nts(NiuTrans.Tensor)
/* set every entry to its sign value */
/* set every entry to its sign value */
extern
"C"
void
_Sign
(
const
XTensor
*
a
,
XTensor
*
b
);
void
_Sign
(
XTensor
*
a
);
/*
set every entry to its sign value (do it on site)
keep the result in the input tensor a and return nothing
*/
void
_SignMe
(
XTensor
*
a
);
/*
set every entry to its sign value (return a XTensor structure)
make a new tensor to keep the result and return it
*/
XTensor
Sign
(
const
XTensor
&
a
);
}
// namespace nts(NiuTrans.Tensor)
}
// namespace nts(NiuTrans.Tensor)
...
...
source/tensor/core/arithmetic/XTensorBLAS.cpp
查看文件 @
906eebb7
source/tensor/core/arithmetic/XTensorBLAS.cu
查看文件 @
906eebb7
source/tensor/core/arithmetic/XTensorBLAS.h
查看文件 @
906eebb7
...
@@ -28,7 +28,8 @@ namespace nts { // namespace nts(NiuTrans.Tensor)
...
@@ -28,7 +28,8 @@ namespace nts { // namespace nts(NiuTrans.Tensor)
/* matrix multiplication (BLAS) */
/* matrix multiplication (BLAS) */
extern
"C"
extern
"C"
void
_MatrixMULCPU
(
const
XTensor
*
a
,
MATRIX_TRANS_TYPE
transposedA
,
const
XTensor
*
b
,
MATRIX_TRANS_TYPE
transposedB
,
XTensor
*
c
,
DTYPE
alpha
=
(
DTYPE
)
1
.
0
,
DTYPE
beta
=
0
);
void
_MatrixMULCPU
(
const
XTensor
*
a
,
MATRIX_TRANS_TYPE
transposedA
,
const
XTensor
*
b
,
MATRIX_TRANS_TYPE
transposedB
,
XTensor
*
c
,
DTYPE
alpha
=
(
DTYPE
)
1
.
0
,
DTYPE
beta
=
0
);
#ifdef USE_CUDA
#ifdef USE_CUDA
...
@@ -46,7 +47,8 @@ void _CudaBLASMatrixMULBatched(cublasHandle_t * handle,
...
@@ -46,7 +47,8 @@ void _CudaBLASMatrixMULBatched(cublasHandle_t * handle,
const
void
**
a
,
MATRIX_TRANS_TYPE
transposedA
,
TENSOR_DATA_TYPE
dataTypeA
,
const
void
**
a
,
MATRIX_TRANS_TYPE
transposedA
,
TENSOR_DATA_TYPE
dataTypeA
,
const
void
**
b
,
MATRIX_TRANS_TYPE
transposedB
,
TENSOR_DATA_TYPE
dataTypeB
,
const
void
**
b
,
MATRIX_TRANS_TYPE
transposedB
,
TENSOR_DATA_TYPE
dataTypeB
,
void
**
c
,
TENSOR_DATA_TYPE
dataTypeC
,
void
**
c
,
TENSOR_DATA_TYPE
dataTypeC
,
int
count
,
int
na
,
int
ma
,
int
nb
,
int
mb
,
int
nc
,
int
mc
,
DTYPE
alpha
=
(
DTYPE
)
1
.
0
,
DTYPE
beta
=
1
.
0
);
int
count
,
int
na
,
int
ma
,
int
nb
,
int
mb
,
int
nc
,
int
mc
,
DTYPE
alpha
=
(
DTYPE
)
1
.
0
,
DTYPE
beta
=
1
.
0
);
/* matrix multiplication in batch and strided mode via cuda version BLAS */
/* matrix multiplication in batch and strided mode via cuda version BLAS */
extern
"C"
extern
"C"
...
@@ -54,11 +56,13 @@ void _CudaBLASMatrixMULBatchedStrided(cublasHandle_t * handle,
...
@@ -54,11 +56,13 @@ void _CudaBLASMatrixMULBatchedStrided(cublasHandle_t * handle,
const
void
*
a
,
MATRIX_TRANS_TYPE
transposedA
,
TENSOR_DATA_TYPE
dataTypeA
,
long
long
int
strideA
,
const
void
*
a
,
MATRIX_TRANS_TYPE
transposedA
,
TENSOR_DATA_TYPE
dataTypeA
,
long
long
int
strideA
,
const
void
*
b
,
MATRIX_TRANS_TYPE
transposedB
,
TENSOR_DATA_TYPE
dataTypeB
,
long
long
int
strideB
,
const
void
*
b
,
MATRIX_TRANS_TYPE
transposedB
,
TENSOR_DATA_TYPE
dataTypeB
,
long
long
int
strideB
,
void
*
c
,
TENSOR_DATA_TYPE
dataTypeC
,
long
long
int
strideC
,
void
*
c
,
TENSOR_DATA_TYPE
dataTypeC
,
long
long
int
strideC
,
int
count
,
int
na
,
int
ma
,
int
nb
,
int
mb
,
int
nc
,
int
mc
,
DTYPE
alpha
=
(
DTYPE
)
1
.
0
,
DTYPE
beta
=
1
.
0
);
int
count
,
int
na
,
int
ma
,
int
nb
,
int
mb
,
int
nc
,
int
mc
,
DTYPE
alpha
=
(
DTYPE
)
1
.
0
,
DTYPE
beta
=
1
.
0
);
/* matrix multiplication in batch mode via cuda version BLAS */
/* matrix multiplication in batch mode via cuda version BLAS */
extern
"C"
extern
"C"
void
_CudaBLASMatrixMULList
(
cublasHandle_t
*
handle
,
const
XList
*
a
,
MATRIX_TRANS_TYPE
transposedA
,
const
XList
*
b
,
MATRIX_TRANS_TYPE
transposedB
,
XList
*
c
,
void
_CudaBLASMatrixMULList
(
cublasHandle_t
*
handle
,
const
XList
*
a
,
MATRIX_TRANS_TYPE
transposedA
,
const
XList
*
b
,
MATRIX_TRANS_TYPE
transposedB
,
XList
*
c
,
int
count
,
DTYPE
alpha
=
(
DTYPE
)
1
.
0
,
DTYPE
beta
=
1
.
0
);
int
count
,
DTYPE
alpha
=
(
DTYPE
)
1
.
0
,
DTYPE
beta
=
1
.
0
);
#endif
#endif
...
...
source/tensor/core/getandset/Select.cpp
查看文件 @
906eebb7
...
@@ -96,7 +96,6 @@ XTensor SelectRange(const XTensor &a, int dim, int low, int high)
...
@@ -96,7 +96,6 @@ XTensor SelectRange(const XTensor &a, int dim, int low, int high)
int
order
=
a
.
order
;
int
order
=
a
.
order
;
int
*
dimSize
=
new
int
[
order
];
int
*
dimSize
=
new
int
[
order
];
CheckNTErrors
(
&
a
!=
NULL
,
"Empty input tensors!"
);
CheckNTErrors
(
dim
>=
0
&&
dim
<
a
.
order
,
"The input dimension is out of bounds!"
);
CheckNTErrors
(
dim
>=
0
&&
dim
<
a
.
order
,
"The input dimension is out of bounds!"
);
CheckNTErrors
(
low
<
high
,
"Illegal range specified!"
);
CheckNTErrors
(
low
<
high
,
"Illegal range specified!"
);
...
@@ -110,8 +109,8 @@ XTensor SelectRange(const XTensor &a, int dim, int low, int high)
...
@@ -110,8 +109,8 @@ XTensor SelectRange(const XTensor &a, int dim, int low, int high)
dimSize
[
i
]
=
a
.
dimSize
[
i
];
dimSize
[
i
]
=
a
.
dimSize
[
i
];
}
}
XTensor
c
=
NewTensor
(
order
,
dimSize
,
a
.
dataType
,
a
.
denseRatio
,
a
.
devID
,
a
.
mem
)
;
float
dr
=
(
!
a
.
isSparse
)
?
1.0
F
:
a
.
denseRatio
;
c
.
SetZeroAll
(
);
XTensor
c
(
order
,
dimSize
,
a
.
dataType
,
dr
,
a
.
devID
,
a
.
mem
);
c
.
SetTMP
();
c
.
SetTMP
();
/* call _SelectRange function */
/* call _SelectRange function */
...
...
source/tensor/core/math/Log.cpp
查看文件 @
906eebb7
...
@@ -20,6 +20,7 @@
...
@@ -20,6 +20,7 @@
*/
*/
#include "../../XTensor.h"
#include "../../XTensor.h"
#include "../../XName.h"
#include "Log.h"
#include "Log.h"
#include "Log.cuh"
#include "Log.cuh"
#include <math.h>
#include <math.h>
...
@@ -27,22 +28,55 @@
...
@@ -27,22 +28,55 @@
namespace
nts
{
// namespace nts(NiuTrans.Tensor)
namespace
nts
{
// namespace nts(NiuTrans.Tensor)
/*
/*
set every entry to its log value
set every entry to its log value (do it on site)
>> a - the tensor we are processing
>> a - input tensor we are processing
>> b - output tensor we are processing
*/
*/
void
_Log
(
XTensor
*
a
)
void
_Log
(
const
XTensor
*
a
,
XTensor
*
b
)
{
{
#ifdef USE_CUDA
#ifdef USE_CUDA
/* run it on GPUs */
/* run it on GPUs */
if
(
a
->
devID
>=
0
)
{
if
(
a
->
devID
>=
0
)
{
_CudaLog
(
a
);
_CudaLog
(
a
,
b
);
return
;
return
;
}
}
#endif
#endif
CheckNTErrors
((
XTensor
::
IsIdentical
(
a
,
b
)),
"Input tensors should have the same type!"
);
CheckNTErrors
((
a
->
dataType
==
DEFAULT_DTYPE
),
"TODO!"
);
CheckNTErrors
((
a
->
dataType
==
DEFAULT_DTYPE
),
"TODO!"
);
DTYPE
*
d
=
(
DTYPE
*
)
a
->
data
;
DTYPE
*
d
=
(
DTYPE
*
)
a
->
data
;
DTYPE
*
db
=
(
DTYPE
*
)
b
->
data
;
for
(
int
i
=
0
;
i
<
a
->
unitNum
;
i
++
)
for
(
int
i
=
0
;
i
<
a
->
unitNum
;
i
++
)
d
[
i
]
=
(
DTYPE
)
log
(
d
[
i
]);
db
[
i
]
=
(
DTYPE
)
log
(
d
[
i
]);
}
/*
set every entry to its log value
keep the result in the input tensor a and return nothing
>> a - the tensor we are processing
*/
void
_LogMe
(
XTensor
*
a
)
{
_Log
(
a
,
a
);
}
/*
set every entry to its log value (return a XTensor structure)
make a new tensor to keep the result and return it
>> a - input tensor we are processing
<< return - the log value of the input tensor
*/
XTensor
Log
(
const
XTensor
&
a
)
{
XTensor
b
(
&
a
);
b
.
SetTMP
();
/* call _Log function */
_Log
(
&
a
,
&
b
);
/* tensor connections */
XLink
::
MakeLink
(
&
a
,
NULL
,
&
b
,
MATH_LOG
);
return
b
;
}
}
}
//
namespace
nts
(
NiuTrans
.
Tensor
)
}
//
namespace
nts
(
NiuTrans
.
Tensor
)
\ No newline at end of file
source/tensor/core/math/Log.cu
查看文件 @
906eebb7
...
@@ -29,37 +29,41 @@ namespace nts { // namespace nts(NiuTrans.Tensor)
...
@@ -29,37 +29,41 @@ namespace nts { // namespace nts(NiuTrans.Tensor)
#ifdef USE_CUDA
#ifdef USE_CUDA
/*
/*
set each entry to its log value (CUDA Kernel)
set each entry to its log value (CUDA Kernel)
>> d - pointer to the data array
>> a - pointer to input data array
>> b - pointer to output data array
>> size - size of the data array
>> size - size of the data array
*/
*/
__global__
__global__
void KernelLog(DTYPE *
d
, int size)
void KernelLog(DTYPE *
a, DTYPE * b
, int size)
{
{
int i = blockDim.x * blockIdx.x + threadIdx.x;
int i = blockDim.x * blockIdx.x + threadIdx.x;
if (i < size)
if (i < size)
d[i] = log(d
[i]);
b[i] = log(a
[i]);
}
}
/*
/*
set each entry to its log value (CUDA Kernel)
set each entry to its log value (CUDA Kernel)
This is for float16 computation
This is for float16 computation
>> d - pointer to the data array
>> a - pointer to input data array
>> b - pointer to output data array
>> size - size of the data array
>> size - size of the data array
*/
*/
__global__
__global__
void KernelLog(__half *
d
, int size)
void KernelLog(__half *
a, __half * b
, int size)
{
{
return;
return;
}
}
/*
/*
set each entry to its log value
set each entry to its log value
>> a - the tensor
>> a - input tensor
>> b - output tensor
*/
*/
extern "C"
extern "C"
void _CudaLog(
XTensor * a
)
void _CudaLog(
const XTensor * a, XTensor * b
)
{
{
CheckNTErrors((XTensor::IsIdentical(a, b)), "Input tensors should have the same type!");
CheckNTErrors((a->isSparse == false), "TODO!");
CheckNTErrors((a->isSparse == false), "TODO!");
int gridSize[3];
int gridSize[3];
...
@@ -74,10 +78,10 @@ void _CudaLog(XTensor * a)
...
@@ -74,10 +78,10 @@ void _CudaLog(XTensor * a)
ProtectCudaDev(a->devID, devIDBackup);
ProtectCudaDev(a->devID, devIDBackup);
if (a->dataType == DEFAULT_DTYPE) {
if (a->dataType == DEFAULT_DTYPE) {
KernelLog << <blocks, threads >> >((DTYPE*)a->data, a->unitNum);
KernelLog << <blocks, threads >> >((DTYPE*)a->data,
(DTYPE*)b->data,
a->unitNum);
}
}
else if (a->dataType == X_FLOAT16) {
else if (a->dataType == X_FLOAT16) {
KernelLog << <blocks, threads >> >((__half*)a->data, a->unitNum);
KernelLog << <blocks, threads >> >((__half*)a->data,
(__half*)b->data,
a->unitNum);
}
}
else {
else {
ShowNTErrors("TODO!");
ShowNTErrors("TODO!");
...
...
source/tensor/core/math/Log.cuh
查看文件 @
906eebb7
...
@@ -30,15 +30,15 @@ namespace nts { // namespace nts(NiuTrans.Tensor)
...
@@ -30,15 +30,15 @@ namespace nts { // namespace nts(NiuTrans.Tensor)
/* set each entry to its log value (CUDA Kernel) */
/* set each entry to its log value (CUDA Kernel) */
__global__
__global__
void KernelLog(DTYPE *
d
, int size);
void KernelLog(DTYPE *
a, DTYPE * b
, int size);
/* set each entry to its log value (CUDA Kernel) with float16 data type*/
/* set each entry to its log value (CUDA Kernel) with float16 data type*/
__global__
__global__
void KernelLog(__half *
d
, int size);
void KernelLog(__half *
a, __half * b
, int size);
/* set each entry to its log value */
/* set each entry to its log value */
extern "C"
extern "C"
void _CudaLog(
XTensor * a
);
void _CudaLog(
const XTensor * a, XTensor * b
);
#endif // USE_CUDA
#endif // USE_CUDA
...
...
source/tensor/core/math/Log.h
查看文件 @
906eebb7
...
@@ -27,8 +27,19 @@
...
@@ -27,8 +27,19 @@
namespace
nts
{
// namespace nts(NiuTrans.Tensor)
namespace
nts
{
// namespace nts(NiuTrans.Tensor)
/* set every entry to its log value */
/* set every entry to its log value */
extern
"C"
void
_Log
(
const
XTensor
*
a
,
XTensor
*
b
);
void
_Log
(
XTensor
*
a
);
/*
set every entry to its log value (do it on site)
keep the result in the input tensor a and return nothing
*/
void
_LogMe
(
XTensor
*
a
);
/*
set every entry to its log value (return a XTensor structure)
make a new tensor to keep the result and return it
*/
XTensor
Log
(
const
XTensor
&
a
);
}
// namespace nts(NiuTrans.Tensor)
}
// namespace nts(NiuTrans.Tensor)
...
...
source/tensor/core/math/Power.cpp
查看文件 @
906eebb7
...
@@ -21,6 +21,7 @@
...
@@ -21,6 +21,7 @@
#include <math.h>
#include <math.h>
#include "../../XTensor.h"
#include "../../XTensor.h"
#include "../../XName.h"
#include "Power.h"
#include "Power.h"
#include "Power.cuh"
#include "Power.cuh"
...
@@ -28,38 +29,73 @@ namespace nts { // namespace nts(NiuTrans.Tensor)
...
@@ -28,38 +29,73 @@ namespace nts { // namespace nts(NiuTrans.Tensor)
/*
/*
get the power(a, p)
get the power(a, p)
>> a - the tensor
>> a - input tensor
>> p - as it is
>> b - output tensor
>> p - parameter
*/
*/
void
_Power
(
XTensor
*
a
,
DTYPE
p
)
void
_Power
(
const
XTensor
*
a
,
XTensor
*
b
,
DTYPE
p
)
{
{
#ifdef USE_CUDA
#ifdef USE_CUDA
/* run it on GPUs */
/* run it on GPUs */
if
(
a
->
devID
>=
0
)
{
if
(
a
->
devID
>=
0
)
{
_CudaPower
(
a
,
p
);
_CudaPower
(
a
,
b
,
p
);
return
;
return
;
}
}
#endif
#endif
CheckNTErrors
((
a
->
dataType
==
DEFAULT_DTYPE
),
"TODO!"
);
CheckNTErrors
((
a
->
dataType
==
DEFAULT_DTYPE
),
"TODO!"
);
DTYPE
*
d
=
(
DTYPE
*
)
a
->
data
;
DTYPE
*
aData
=
(
DTYPE
*
)
a
->
data
;
DTYPE
*
bData
=
(
DTYPE
*
)
b
->
data
;
if
(
p
==
0
)
{
if
(
p
==
0
)
{
for
(
int
i
=
0
;
i
<
a
->
unitNum
;
i
++
)
for
(
int
i
=
0
;
i
<
a
->
unitNum
;
i
++
)
d
[
i
]
=
(
DTYPE
)
1.0
;
bData
[
i
]
=
(
DTYPE
)
1.0
;
}
}
else
if
(
p
==
(
DTYPE
)
0.5
)
{
else
if
(
p
==
(
DTYPE
)
0.5
)
{
for
(
int
i
=
0
;
i
<
a
->
unitNum
;
i
++
)
for
(
int
i
=
0
;
i
<
a
->
unitNum
;
i
++
)
d
[
i
]
=
(
DTYPE
)
sqrt
(
d
[
i
]);
bData
[
i
]
=
(
DTYPE
)
sqrt
(
aData
[
i
]);
}
}
else
if
(
p
==
(
DTYPE
)
2.0
)
{
else
if
(
p
==
(
DTYPE
)
2.0
)
{
for
(
int
i
=
0
;
i
<
a
->
unitNum
;
i
++
)
for
(
int
i
=
0
;
i
<
a
->
unitNum
;
i
++
)
d
[
i
]
=
d
[
i
]
*
d
[
i
];
bData
[
i
]
=
aData
[
i
]
*
aData
[
i
];
}
}
else
{
else
{
for
(
int
i
=
0
;
i
<
a
->
unitNum
;
i
++
)
for
(
int
i
=
0
;
i
<
a
->
unitNum
;
i
++
)
d
[
i
]
=
(
DTYPE
)
pow
(
d
[
i
],
p
);
bData
[
i
]
=
(
DTYPE
)
pow
(
aData
[
i
],
p
);
}
}
}
}
/*
get the power(a, p) (do it on site)
keep the result in the input tensor a and return nothing
>> a - the tensor
>> p - parameter
*/
void
_PowerMe
(
XTensor
*
a
,
DTYPE
p
)
{
_Power
(
a
,
a
,
p
);
}
/*
get the power(a, p) (return a XTensor structure)
make a new tensor to keep the result and return it
>> a - input tensor
>> p - parameter
<< return - the power value of the input tensor
*/
XTensor
Power
(
const
XTensor
&
a
,
DTYPE
p
)
{
XTensor
b
(
&
a
);
b
.
SetTMP
();
/* call _Power function */
_Power
(
&
a
,
&
b
,
p
);
/* tensor connections */
XLink
::
MakeLink
(
&
a
,
NULL
,
&
b
,
MATH_POWER
);
XLink
::
AddParamToHead
(
&
b
,
p
);
return
b
;
}
}
// namespace nts(NiuTrans.Tensor)
}
// namespace nts(NiuTrans.Tensor)
source/tensor/core/math/Power.cu
查看文件 @
906eebb7
...
@@ -21,6 +21,7 @@
...
@@ -21,6 +21,7 @@
#include "../../XDevice.h"
#include "../../XDevice.h"
#include "../../XTensor.h"
#include "../../XTensor.h"
#include "../movement/CopyValues.cuh"
#include "Power.h"
#include "Power.h"
#include "Power.cuh"
#include "Power.cuh"
...
@@ -30,74 +31,80 @@ namespace nts { // namespace nts(NiuTrans.Tensor)
...
@@ -30,74 +31,80 @@ namespace nts { // namespace nts(NiuTrans.Tensor)
/*
/*
set all entries to its root (CUDA Kernel)
set all entries to its root (CUDA Kernel)
>> d - data array
>> a - input data array
>> b - output data array
>> size - size of the data array
>> size - size of the data array
*/
*/
__global__
__global__
void KernelSqrtV2(DTYPE *
d
, int size)
void KernelSqrtV2(DTYPE *
a, DTYPE * b
, int size)
{
{
int i = blockDim.x * blockIdx.x + threadIdx.x;
int i = blockDim.x * blockIdx.x + threadIdx.x;
if (i < size)
if (i < size)
d[i] = sqrt(d
[i]);
b[i] = sqrt(a
[i]);
}
}
/*
/*
set all entries to its root (CUDA Kernel)
set all entries to its root (CUDA Kernel)
>> d - data array
>> a - input data array
>> b - output data array
>> size - size of the data array
>> size - size of the data array
*/
*/
__global__
__global__
void KernelSqrtV2(__half *
d
, int size)
void KernelSqrtV2(__half *
a, __half * b
, int size)
{
{
int i = blockDim.x * blockIdx.x + threadIdx.x;
int i = blockDim.x * blockIdx.x + threadIdx.x;
#if __CUDA_ARCH__ >= 530 || !defined(__CUDA_ARCH__)
#if __CUDA_ARCH__ >= 530 || !defined(__CUDA_ARCH__)
if (i < size)
if (i < size)
d[i] = hsqrt(d
[i]);
b[i] = hsqrt(a
[i]);
#else
#else
if (i < size)
if (i < size)
d[i] = __float2half(sqrt(__half2float(d
[i])));
b[i] = __float2half(sqrt(__half2float(a
[i])));
#endif
#endif
}
}
/*
/*
get power(d[i], p)
get power(d[i], p)
>> d - data array
>> a - input data array
>> b - output data array
>> p - power
>> p - power
>> size - size of the data array
>> size - size of the data array
*/
*/
__global__
__global__
void KernelPower(DTYPE *
d
, DTYPE p, int size)
void KernelPower(DTYPE *
a, DTYPE * b
, DTYPE p, int size)
{
{
int i = blockDim.x * blockIdx.x + threadIdx.x;
int i = blockDim.x * blockIdx.x + threadIdx.x;
if (i < size)
if (i < size)
d[i] = pow(d
[i], p);
b[i] = pow(a
[i], p);
}
}
/*
/*
get power(d[i], p)
get power(d[i], p)
>> d - data array
>> a - input data array
>> b - output data array
>> p - power
>> p - power
>> size - size of the data array
>> size - size of the data array
*/
*/
__global__
__global__
void KernelPower(__half *
d
, __half p, int size)
void KernelPower(__half *
a, __half * b
, __half p, int size)
{
{
#if __CUDA_ARCH__ >= 530 || !defined(__CUDA_ARCH__)
#if __CUDA_ARCH__ >= 530 || !defined(__CUDA_ARCH__)
#else
#else
int i = blockDim.x * blockIdx.x + threadIdx.x;
int i = blockDim.x * blockIdx.x + threadIdx.x;
if (i < size)
if (i < size)
d[i] = __float2half(pow(__half2float(d
[i]), __half2float(p)));
b[i] = __float2half(pow(__half2float(a
[i]), __half2float(p)));
#endif
#endif
}
}
/* get the power of the entries */
/* get the power of the entries */
extern "C"
extern "C"
void _CudaPower(
XTensor * a
, DTYPE p)
void _CudaPower(
const XTensor * a, XTensor * b
, DTYPE p)
{
{
CheckNTErrors((XTensor::IsIdentical(a, b)), "Input tensors should have the same type!");
int gridSize[3];
int gridSize[3];
int blockSize[3];
int blockSize[3];
...
@@ -111,15 +118,18 @@ void _CudaPower(XTensor * a, DTYPE p)
...
@@ -111,15 +118,18 @@ void _CudaPower(XTensor * a, DTYPE p)
if (a->dataType == DEFAULT_DTYPE) {
if (a->dataType == DEFAULT_DTYPE) {
if (p == (DTYPE)0.5) {
if (p == (DTYPE)0.5) {
KernelSqrtV2 << <blocks, threads >> >((DTYPE*)a->data, a->unitNum);
KernelSqrtV2 << <blocks, threads >> >((DTYPE*)a->data, (DTYPE*)b->data, a->unitNum);
}
else if (p == (DTYPE)1.0) {
_CudaCopyValues(a, b);
}
}
else if (p != (DTYPE)1.0) {
else if (p != (DTYPE)1.0) {
KernelPower << <blocks, threads >> >((DTYPE*)a->data, p, a->unitNum);
KernelPower << <blocks, threads >> >((DTYPE*)a->data,
(DTYPE*)b->data,
p, a->unitNum);
}
}
}
}
else if (a->dataType == X_FLOAT16) {
else if (a->dataType == X_FLOAT16) {
if (p == (DTYPE)0.5) {
if (p == (DTYPE)0.5) {
KernelSqrtV2 << <blocks, threads >> >((__half*)a->data, a->unitNum);
KernelSqrtV2 << <blocks, threads >> >((__half*)a->data,
(__half*)b->data,
a->unitNum);
}
}
else if (p != (DTYPE)1.0) {
else if (p != (DTYPE)1.0) {
ShowNTErrors("TODO!");
ShowNTErrors("TODO!");
...
...
source/tensor/core/math/Power.cuh
查看文件 @
906eebb7
...
@@ -30,15 +30,15 @@ namespace nts { // namespace nts(NiuTrans.Tensor)
...
@@ -30,15 +30,15 @@ namespace nts { // namespace nts(NiuTrans.Tensor)
/* set all entries to its root (CUDA Kernel) */
/* set all entries to its root (CUDA Kernel) */
__global__
__global__
void KernelSqrtV2(DTYPE *
d
, int size);
void KernelSqrtV2(DTYPE *
a, DTYPE * b
, int size);
/* set all entries to its root (CUDA Kernel) */
/* set all entries to its root (CUDA Kernel) */
__global__
__global__
void KernelSqrtV2(__half *
d
, int size);
void KernelSqrtV2(__half *
a, __half * b
, int size);
/* get the power of the entries */
/* get the power of the entries */
extern "C"
extern "C"
void _CudaPower(
XTensor * a
, DTYPE p);
void _CudaPower(
const XTensor * a, XTensor * b
, DTYPE p);
#endif // USE_CUDA
#endif // USE_CUDA
...
...
source/tensor/core/math/Power.h
查看文件 @
906eebb7
...
@@ -27,8 +27,19 @@
...
@@ -27,8 +27,19 @@
namespace
nts
{
// namespace nts(NiuTrans.Tensor)
namespace
nts
{
// namespace nts(NiuTrans.Tensor)
/* get the power(x, y) */
/* get the power(x, y) */
extern
"C"
void
_Power
(
const
XTensor
*
a
,
XTensor
*
b
,
DTYPE
p
);
void
_Power
(
XTensor
*
a
,
DTYPE
p
);
/*
get the power(x, y) (do it on site)
keep the result in the input tensor a and return nothing
*/
void
_PowerMe
(
XTensor
*
a
,
DTYPE
p
);
/*
get the power(x, y) (return a XTensor structure)
make a new tensor to keep the result and return it
*/
XTensor
Power
(
const
XTensor
&
a
,
DTYPE
p
);
}
// namespace nts(NiuTrans.Tensor)
}
// namespace nts(NiuTrans.Tensor)
...
...
source/tensor/core/movement/CopyIndexed.cpp
查看文件 @
906eebb7
...
@@ -110,8 +110,7 @@ make a new tensor to keep the result and return it
...
@@ -110,8 +110,7 @@ make a new tensor to keep the result and return it
*/
*/
XTensor
CopyIndexed
(
const
XTensor
&
s
,
int
dim
,
int
*
srcIndex
,
int
indexSize
,
int
*
tgtIndex
,
int
copyNum
)
XTensor
CopyIndexed
(
const
XTensor
&
s
,
int
dim
,
int
*
srcIndex
,
int
indexSize
,
int
*
tgtIndex
,
int
copyNum
)
{
{
CheckNTErrors
(
&
s
,
"Empty input tensor!"
);
CheckNTErrors
(
dim
>=
0
&&
dim
<
s
.
order
,
"A too larget dimension specified!"
);
CheckNTErrors
((
dim
>=
0
&&
dim
<
s
.
order
),
"A too larget dimension specified!"
);
int
order
=
s
.
order
;
int
order
=
s
.
order
;
int
*
dimSize
=
new
int
[
order
];
int
*
dimSize
=
new
int
[
order
];
...
@@ -123,16 +122,13 @@ XTensor CopyIndexed(const XTensor &s, int dim, int * srcIndex, int indexSize, in
...
@@ -123,16 +122,13 @@ XTensor CopyIndexed(const XTensor &s, int dim, int * srcIndex, int indexSize, in
dimSize
[
i
]
=
s
.
dimSize
[
i
];
dimSize
[
i
]
=
s
.
dimSize
[
i
];
}
}
XTensor
t
=
NewTensor
(
order
,
dimSize
,
s
.
dataType
,
s
.
denseRatio
,
s
.
devID
,
s
.
mem
)
;
float
dr
=
(
!
s
.
isSparse
)
?
1.0
F
:
s
.
denseRatio
;
t
.
SetZeroAll
(
);
XTensor
t
(
order
,
dimSize
,
s
.
dataType
,
dr
,
s
.
devID
,
s
.
mem
);
t
.
SetTMP
();
t
.
SetTMP
();
/* call _CopyIndexed function */
/* call _CopyIndexed function */
_CopyIndexed
(
&
s
,
&
t
,
dim
,
srcIndex
,
indexSize
,
tgtIndex
,
copyNum
);
_CopyIndexed
(
&
s
,
&
t
,
dim
,
srcIndex
,
indexSize
,
tgtIndex
,
copyNum
);
/* destroy variables */
delete
[]
dimSize
;
/* tensor connection */
/* tensor connection */
XLink
::
MakeLink
(
&
s
,
NULL
,
&
t
,
MOVEMENT_COPYINDEXED
);
XLink
::
MakeLink
(
&
s
,
NULL
,
&
t
,
MOVEMENT_COPYINDEXED
);
XLink
::
AddParamToHeadInt
(
&
t
,
dim
);
XLink
::
AddParamToHeadInt
(
&
t
,
dim
);
...
@@ -141,6 +137,9 @@ XTensor CopyIndexed(const XTensor &s, int dim, int * srcIndex, int indexSize, in
...
@@ -141,6 +137,9 @@ XTensor CopyIndexed(const XTensor &s, int dim, int * srcIndex, int indexSize, in
XLink
::
AddParamToHeadPointer
(
&
t
,
tgtIndex
);
XLink
::
AddParamToHeadPointer
(
&
t
,
tgtIndex
);
XLink
::
AddParamToHeadInt
(
&
t
,
copyNum
);
XLink
::
AddParamToHeadInt
(
&
t
,
copyNum
);
/* destroy variables */
delete
[]
dimSize
;
return
t
;
return
t
;
}
}
...
...
source/tensor/core/reduce/ReduceMax.cpp
查看文件 @
906eebb7
...
@@ -101,32 +101,31 @@ make a new tensor to keep the result and return it
...
@@ -101,32 +101,31 @@ make a new tensor to keep the result and return it
*/
*/
XTensor
ReduceMax
(
const
XTensor
&
input
,
int
dim
)
XTensor
ReduceMax
(
const
XTensor
&
input
,
int
dim
)
{
{
CheckNTErrors
(
&
input
,
"Empty input or output tensors!"
);
CheckNTErrors
(
dim
>=
0
&&
dim
<
input
.
order
,
"Illegal dimension to reduce!"
);
CheckNTErrors
((
dim
>=
0
&&
dim
<
input
.
order
),
"Illegal dimension to reduce!"
);
int
order
=
input
.
order
-
1
;
int
order
=
input
.
order
-
1
;
int
*
dimSize
=
new
int
[
order
];
int
*
dimSize
=
new
int
[
order
];
for
(
int
i
=
0
;
i
<
input
.
order
;
i
++
){
for
(
int
i
=
0
;
i
<
order
;
i
++
){
if
(
i
<
dim
)
if
(
i
<
dim
)
dimSize
[
i
]
=
input
.
dimSize
[
i
];
dimSize
[
i
]
=
input
.
dimSize
[
i
];
else
if
(
i
>
dim
)
else
if
(
i
>
=
dim
)
dimSize
[
i
]
=
input
.
dimSize
[
i
+
1
];
dimSize
[
i
]
=
input
.
dimSize
[
i
+
1
];
}
}
XTensor
output
=
NewTensor
(
order
,
dimSize
,
input
.
dataType
,
input
.
denseRatio
,
input
.
devID
,
input
.
mem
)
;
float
dr
=
(
!
input
.
isSparse
)
?
1.0
F
:
input
.
denseRatio
;
output
.
SetZeroAll
(
);
XTensor
output
(
order
,
dimSize
,
input
.
dataType
,
dr
,
input
.
devID
,
input
.
mem
);
output
.
SetTMP
();
output
.
SetTMP
();
/* call _ReduceMax function */
/* call _ReduceMax function */
_ReduceMax
(
&
input
,
&
output
,
dim
);
_ReduceMax
(
&
input
,
&
output
,
dim
);
/* destroy variables */
delete
[]
dimSize
;
/* tensor connection */
/* tensor connection */
XLink
::
MakeLink
(
&
input
,
NULL
,
&
output
,
REDUCE_REDUCEMAX
);
XLink
::
MakeLink
(
&
input
,
NULL
,
&
output
,
REDUCE_REDUCEMAX
);
XLink
::
AddParamToHeadInt
(
&
output
,
dim
);
XLink
::
AddParamToHeadInt
(
&
output
,
dim
);
/* destroy variables */
delete
[]
dimSize
;
return
output
;
return
output
;
}
}
...
...
source/tensor/core/reduce/ReduceMean.cpp
查看文件 @
906eebb7
...
@@ -58,20 +58,19 @@ For a 1-dimensional data array a, mean = (1/n) * sum_i input_i
...
@@ -58,20 +58,19 @@ For a 1-dimensional data array a, mean = (1/n) * sum_i input_i
*/
*/
XTensor
ReduceMean
(
const
XTensor
&
input
,
int
dim
)
XTensor
ReduceMean
(
const
XTensor
&
input
,
int
dim
)
{
{
CheckNTErrors
(
&
input
,
"Empty input or output tensors!"
);
CheckNTErrors
(
dim
>=
0
&&
dim
<
input
.
order
,
"Illegal dimension to reduce!"
);
CheckNTErrors
((
dim
>=
0
&&
dim
<
input
.
order
),
"Illegal dimension to reduce!"
);
int
order
=
input
.
order
-
1
;
int
order
=
input
.
order
-
1
;
int
*
dimSize
=
new
int
[
order
];
int
*
dimSize
=
new
int
[
order
];
for
(
int
i
=
0
;
i
<
input
.
order
;
i
++
){
for
(
int
i
=
0
;
i
<
order
;
i
++
){
if
(
i
<
dim
)
if
(
i
<
dim
)
dimSize
[
i
]
=
input
.
dimSize
[
i
];
dimSize
[
i
]
=
input
.
dimSize
[
i
];
else
if
(
i
>
dim
)
else
if
(
i
>
=
dim
)
dimSize
[
i
]
=
input
.
dimSize
[
i
+
1
];
dimSize
[
i
]
=
input
.
dimSize
[
i
+
1
];
}
}
XTensor
output
=
NewTensor
(
order
,
dimSize
,
input
.
dataType
,
input
.
denseRatio
,
input
.
devID
,
input
.
mem
)
;
float
dr
=
(
!
input
.
isSparse
)
?
1.0
F
:
input
.
denseRatio
;
output
.
SetZeroAll
(
);
XTensor
output
(
order
,
dimSize
,
input
.
dataType
,
dr
,
input
.
devID
,
input
.
mem
);
output
.
SetTMP
();
output
.
SetTMP
();
/* call _ReduceMean function */
/* call _ReduceMean function */
...
...
source/tensor/core/reduce/ReduceSum.cpp
查看文件 @
906eebb7
...
@@ -214,20 +214,19 @@ sum = \sum_i exp((a_i - shift)^power) if isExp == true
...
@@ -214,20 +214,19 @@ sum = \sum_i exp((a_i - shift)^power) if isExp == true
*/
*/
XTensor
ReduceSum
(
const
XTensor
&
input
,
int
dim
,
const
XTensor
&
shift
,
DTYPE
power
,
bool
isExp
)
XTensor
ReduceSum
(
const
XTensor
&
input
,
int
dim
,
const
XTensor
&
shift
,
DTYPE
power
,
bool
isExp
)
{
{
CheckNTErrors
(
&
input
,
"Empty input or output tensors!"
);
CheckNTErrors
(
dim
>=
0
&&
dim
<
input
.
order
,
"Illegal dimension to reduce!"
);
CheckNTErrors
((
dim
>=
0
&&
dim
<
input
.
order
),
"Illegal dimension to reduce!"
);
int
order
=
input
.
order
-
1
;
int
order
=
input
.
order
-
1
;
int
*
dimSize
=
new
int
[
order
];
int
*
dimSize
=
new
int
[
order
];
for
(
int
i
=
0
;
i
<
input
.
order
;
i
++
){
for
(
int
i
=
0
;
i
<
order
;
i
++
){
if
(
i
<
dim
)
if
(
i
<
dim
)
dimSize
[
i
]
=
input
.
dimSize
[
i
];
dimSize
[
i
]
=
input
.
dimSize
[
i
];
else
if
(
i
>
dim
)
else
if
(
i
>
=
dim
)
dimSize
[
i
]
=
input
.
dimSize
[
i
+
1
];
dimSize
[
i
]
=
input
.
dimSize
[
i
+
1
];
}
}
XTensor
output
=
NewTensor
(
order
,
dimSize
,
input
.
dataType
,
input
.
denseRatio
,
input
.
devID
,
input
.
mem
)
;
float
dr
=
(
!
input
.
isSparse
)
?
1.0
F
:
input
.
denseRatio
;
output
.
SetZeroAll
(
);
XTensor
output
(
order
,
dimSize
,
input
.
dataType
,
dr
,
input
.
devID
,
input
.
mem
);
output
.
SetTMP
();
output
.
SetTMP
();
/* call _ReduceSum function */
/* call _ReduceSum function */
...
@@ -237,6 +236,53 @@ XTensor ReduceSum(const XTensor &input, int dim, const XTensor &shift, DTYPE pow
...
@@ -237,6 +236,53 @@ XTensor ReduceSum(const XTensor &input, int dim, const XTensor &shift, DTYPE pow
XLink
::
MakeLink
(
&
input
,
&
shift
,
&
output
,
REDUCE_REDUCESUM
);
XLink
::
MakeLink
(
&
input
,
&
shift
,
&
output
,
REDUCE_REDUCESUM
);
XLink
::
AddParamToHeadInt
(
&
output
,
dim
);
XLink
::
AddParamToHeadInt
(
&
output
,
dim
);
XLink
::
AddParamToHead
(
&
output
,
power
);
XLink
::
AddParamToHead
(
&
output
,
power
);
XLink
::
AddParamToHeadBool
(
&
output
,
isExp
);
/* destroy variables */
delete
[]
dimSize
;
return
output
;
}
/*
sum the items along a dimension of the tensor (return a XTensor structure)
make a new tensor to keep the result and return it
For a 1-dimensional data array a,
sum = \sum_i (a_i)^power if isExp == false
sum = \sum_i exp((a_i)^power) if isExp == true
>> input - the input tensor
>> dim - the dimension where the reduction is performed on
>> ieExp - specify if the exp() is performed
>> power - we perform pow(item_i, power) on each item in the array
<< return - the sum along a dimension of the tensor
*/
XTensor
ReduceSum
(
const
XTensor
&
input
,
int
dim
,
DTYPE
power
,
bool
isExp
)
{
CheckNTErrors
(
dim
>=
0
&&
dim
<
input
.
order
,
"Illegal dimension to reduce!"
);
int
order
=
input
.
order
-
1
;
int
*
dimSize
=
new
int
[
order
];
for
(
int
i
=
0
;
i
<
order
;
i
++
){
if
(
i
<
dim
)
dimSize
[
i
]
=
input
.
dimSize
[
i
];
else
if
(
i
>=
dim
)
dimSize
[
i
]
=
input
.
dimSize
[
i
+
1
];
}
float
dr
=
(
!
input
.
isSparse
)
?
1.0
F
:
input
.
denseRatio
;
XTensor
output
(
order
,
dimSize
,
input
.
dataType
,
dr
,
input
.
devID
,
input
.
mem
);
output
.
SetTMP
();
/* call _ReduceSum function */
_ReduceSum
(
&
input
,
&
output
,
dim
,
NULL
,
power
,
isExp
);
/* tensor connection */
XLink
::
MakeLink
(
&
input
,
NULL
,
&
output
,
REDUCE_REDUCESUM
);
XLink
::
AddParamToHeadInt
(
&
output
,
dim
);
XLink
::
AddParamToHead
(
&
output
,
power
);
XLink
::
AddParamToHeadBool
(
&
output
,
isExp
);
/* destroy variables */
/* destroy variables */
delete
[]
dimSize
;
delete
[]
dimSize
;
...
...
source/tensor/core/reduce/ReduceSum.h
查看文件 @
906eebb7
...
@@ -43,7 +43,16 @@ For a 1-dimensional data array a,
...
@@ -43,7 +43,16 @@ For a 1-dimensional data array a,
sum = \sum_i (a_i - shift) if isExp == false
sum = \sum_i (a_i - shift) if isExp == false
sum = \sum_i exp(a_i - shift) if isExp == true
sum = \sum_i exp(a_i - shift) if isExp == true
*/
*/
XTensor
ReduceSum
(
const
XTensor
&
input
,
int
dim
,
const
XTensor
&
shift
=
NULL
,
DTYPE
power
=
(
DTYPE
)
1
.
0
F
,
bool
isExp
=
false
);
XTensor
ReduceSum
(
const
XTensor
&
input
,
int
dim
,
const
XTensor
&
shift
,
DTYPE
power
=
(
DTYPE
)
1
.
0
F
,
bool
isExp
=
false
);
/*
sum the items along a dimension of the tensor (return a XTensor structure)
make a new tensor to keep the result and return it
For a 1-dimensional data array a,
sum = \sum_i (a_i) if isExp == false
sum = \sum_i exp(a_i) if isExp == true
*/
XTensor
ReduceSum
(
const
XTensor
&
input
,
int
dim
,
DTYPE
power
=
(
DTYPE
)
1
.
0
F
,
bool
isExp
=
false
);
}
// namespace nts(NiuTrans.Tensor)
}
// namespace nts(NiuTrans.Tensor)
...
...
source/tensor/core/reduce/ReduceSumSquared.cpp
查看文件 @
906eebb7
...
@@ -54,20 +54,19 @@ For a 1-dimensional data array a, sum = \sum_i (a_i - shift)^2
...
@@ -54,20 +54,19 @@ For a 1-dimensional data array a, sum = \sum_i (a_i - shift)^2
*/
*/
XTensor
ReduceSumSquared
(
const
XTensor
&
input
,
int
dim
,
const
XTensor
&
shift
)
XTensor
ReduceSumSquared
(
const
XTensor
&
input
,
int
dim
,
const
XTensor
&
shift
)
{
{
CheckNTErrors
(
&
input
,
"Empty input or output tensors!"
);
CheckNTErrors
(
dim
>=
0
&&
dim
<
input
.
order
,
"Illegal dimension to reduce!"
);
CheckNTErrors
((
dim
>=
0
&&
dim
<
input
.
order
),
"Illegal dimension to reduce!"
);
int
order
=
input
.
order
-
1
;
int
order
=
input
.
order
-
1
;
int
*
dimSize
=
new
int
[
order
];
int
*
dimSize
=
new
int
[
order
];
for
(
int
i
=
0
;
i
<
input
.
order
;
i
++
){
for
(
int
i
=
0
;
i
<
order
;
i
++
){
if
(
i
<
dim
)
if
(
i
<
dim
)
dimSize
[
i
]
=
input
.
dimSize
[
i
];
dimSize
[
i
]
=
input
.
dimSize
[
i
];
else
if
(
i
>
dim
)
else
if
(
i
>
=
dim
)
dimSize
[
i
]
=
input
.
dimSize
[
i
+
1
];
dimSize
[
i
]
=
input
.
dimSize
[
i
+
1
];
}
}
XTensor
output
=
NewTensor
(
order
,
dimSize
,
input
.
dataType
,
input
.
denseRatio
,
input
.
devID
,
input
.
mem
)
;
float
dr
=
(
!
input
.
isSparse
)
?
1.0
F
:
input
.
denseRatio
;
output
.
SetZeroAll
(
);
XTensor
output
(
order
,
dimSize
,
input
.
dataType
,
dr
,
input
.
devID
,
input
.
mem
);
output
.
SetTMP
();
output
.
SetTMP
();
/* call _ReduceSumSquared function */
/* call _ReduceSumSquared function */
...
...
source/tensor/core/reduce/ReduceVariance.cpp
查看文件 @
906eebb7
...
@@ -19,6 +19,7 @@
...
@@ -19,6 +19,7 @@
* $Created by: XIAO Tong (email: xiaotong@mail.neu.edu.cn) 2018-04-24
* $Created by: XIAO Tong (email: xiaotong@mail.neu.edu.cn) 2018-04-24
*/
*/
#include "../../XName.h"
#include "../math/ScaleAndShift.h"
#include "../math/ScaleAndShift.h"
#include "ReduceSum.h"
#include "ReduceSum.h"
#include "ReduceVariance.h"
#include "ReduceVariance.h"
...
@@ -56,25 +57,28 @@ For a 1-dimensional data array a, variance = 1/n * \sum_i (a_i - mean)^2
...
@@ -56,25 +57,28 @@ For a 1-dimensional data array a, variance = 1/n * \sum_i (a_i - mean)^2
*/
*/
XTensor
ReduceVariance
(
const
XTensor
&
input
,
int
dim
,
const
XTensor
&
mean
)
XTensor
ReduceVariance
(
const
XTensor
&
input
,
int
dim
,
const
XTensor
&
mean
)
{
{
CheckNTErrors
(
&
input
,
"Empty input or output tensors!"
);
CheckNTErrors
(
dim
>=
0
&&
dim
<
input
.
order
,
"Illegal dimension to reduce!"
);
CheckNTErrors
((
dim
>=
0
&&
dim
<
input
.
order
),
"Illegal dimension to reduce!"
);
int
order
=
input
.
order
-
1
;
int
order
=
input
.
order
-
1
;
int
*
dimSize
=
new
int
[
order
];
int
*
dimSize
=
new
int
[
order
];
for
(
int
i
=
0
;
i
<
input
.
order
;
i
++
){
for
(
int
i
=
0
;
i
<
order
;
i
++
){
if
(
i
<
dim
)
if
(
i
<
dim
)
dimSize
[
i
]
=
input
.
dimSize
[
i
];
dimSize
[
i
]
=
input
.
dimSize
[
i
];
else
if
(
i
>
dim
)
else
if
(
i
>
=
dim
)
dimSize
[
i
]
=
input
.
dimSize
[
i
+
1
];
dimSize
[
i
]
=
input
.
dimSize
[
i
+
1
];
}
}
XTensor
output
=
NewTensor
(
order
,
dimSize
,
input
.
dataType
,
input
.
denseRatio
,
input
.
devID
,
input
.
mem
)
;
float
dr
=
(
!
input
.
isSparse
)
?
1.0
F
:
input
.
denseRatio
;
output
.
SetZeroAll
(
);
XTensor
output
(
order
,
dimSize
,
input
.
dataType
,
dr
,
input
.
devID
,
input
.
mem
);
output
.
SetTMP
();
output
.
SetTMP
();
/* call _ReduceVariance function */
/* call _ReduceVariance function */
_ReduceVariance
(
&
input
,
&
output
,
dim
,
&
mean
);
_ReduceVariance
(
&
input
,
&
output
,
dim
,
&
mean
);
/* tensor connection */
XLink
::
MakeLink
(
&
input
,
&
mean
,
&
output
,
REDUCE_REDUCEVARIANCE
);
XLink
::
AddParamToHeadInt
(
&
output
,
dim
);
/* destroy variables */
/* destroy variables */
delete
[]
dimSize
;
delete
[]
dimSize
;
...
...
source/tensor/core/shape/ConcatenateSolely.cpp
查看文件 @
906eebb7
...
@@ -36,7 +36,7 @@ concatenate a list of tensors along a given dimension
...
@@ -36,7 +36,7 @@ concatenate a list of tensors along a given dimension
*/
*/
void
_ConcatenateSolely
(
const
XList
*
smalls
,
XTensor
*
big
,
int
dim
)
void
_ConcatenateSolely
(
const
XList
*
smalls
,
XTensor
*
big
,
int
dim
)
{
{
CheckNTErrors
(
(
big
->
order
>
dim
&&
dim
>=
0
)
,
"Illegal dimension to concatenate!"
);
CheckNTErrors
(
big
->
order
>
dim
&&
dim
>=
0
,
"Illegal dimension to concatenate!"
);
int
catDimSize
=
0
;
int
catDimSize
=
0
;
int
dimRDI
=
big
->
order
-
dim
-
1
;
int
dimRDI
=
big
->
order
-
dim
-
1
;
...
...
source/tensor/core/shape/MakeMergeBlockIndex.cpp
查看文件 @
906eebb7
source/tensor/core/shape/MakeMergeBlockIndex.cu
查看文件 @
906eebb7
source/tensor/core/shape/MakeMergeBlockIndex.cuh
查看文件 @
906eebb7
...
@@ -30,8 +30,7 @@ namespace nts { // namespace nts(NiuTrans.Tensor)
...
@@ -30,8 +30,7 @@ namespace nts { // namespace nts(NiuTrans.Tensor)
/* set target data block index for the data movement in split */
/* set target data block index for the data movement in split */
extern "C"
extern "C"
void _CudaMakeMergeBlockIndex(int devID,
void _CudaMakeMergeBlockIndex(int devID, int * blockIndex, int blockNum, int blockNumInMerge,
int * blockIndex, int blockNum, int blockNumInMerge,
int splitSizeInGrid, int gridSize, int gridNum);
int splitSizeInGrid, int gridSize, int gridNum);
#endif // USE_CUDA
#endif // USE_CUDA
...
...
source/tensor/core/shape/MakeMergeBlockIndex.h
查看文件 @
906eebb7
source/tensor/core/shape/Merge.cpp
查看文件 @
906eebb7
...
@@ -161,8 +161,7 @@ e.g., (N/3, M, 3) -> (N, M)
...
@@ -161,8 +161,7 @@ e.g., (N/3, M, 3) -> (N, M)
*/
*/
XTensor
Merge
(
const
XTensor
&
s
,
int
whereToMerge
,
int
leadingDim
)
XTensor
Merge
(
const
XTensor
&
s
,
int
whereToMerge
,
int
leadingDim
)
{
{
CheckNTErrors
(
&
s
!=
NULL
,
"Invalid tensors!"
);
CheckNTErrors
(
leadingDim
<
whereToMerge
,
"Invalid leading dimension!"
);
CheckNTErrors
((
leadingDim
<
whereToMerge
),
"Invalid leading dimension!"
);
if
(
leadingDim
<
0
)
if
(
leadingDim
<
0
)
leadingDim
=
0
;
leadingDim
=
0
;
...
@@ -180,8 +179,8 @@ XTensor Merge(const XTensor &s, int whereToMerge, int leadingDim)
...
@@ -180,8 +179,8 @@ XTensor Merge(const XTensor &s, int whereToMerge, int leadingDim)
}
}
}
}
XTensor
t
=
NewTensor
(
order
,
dimSize
,
s
.
dataType
,
s
.
denseRatio
,
s
.
devID
,
s
.
mem
)
;
float
dr
=
(
!
s
.
isSparse
)
?
1.0
F
:
s
.
denseRatio
;
t
.
SetZeroAll
(
);
XTensor
t
(
order
,
dimSize
,
s
.
dataType
,
dr
,
s
.
devID
,
s
.
mem
);
t
.
SetTMP
();
t
.
SetTMP
();
/* call _Merge function */
/* call _Merge function */
...
@@ -314,6 +313,7 @@ void _Merge(const XList * smalls, XTensor * big, int whereToMerge)
...
@@ -314,6 +313,7 @@ void _Merge(const XList * smalls, XTensor * big, int whereToMerge)
/*
/*
merge small tensors into a big tensor (return a XTensor structure)
merge small tensors into a big tensor (return a XTensor structure)
make a new tensor to keep the result and return it
>> smalls - the list of the small tensors
>> smalls - the list of the small tensors
>> whereToMerge - the merging operation is along with which dimension
>> whereToMerge - the merging operation is along with which dimension
...
@@ -331,10 +331,8 @@ XTensor Merge(const XList &smalls, int whereToMerge)
...
@@ -331,10 +331,8 @@ XTensor Merge(const XList &smalls, int whereToMerge)
dimSize
[
i
]
=
tensor
->
dimSize
[
whereToMerge
]
*
smalls
.
count
;
dimSize
[
i
]
=
tensor
->
dimSize
[
whereToMerge
]
*
smalls
.
count
;
}
}
XTensor
big
=
NewTensor
(
order
,
dimSize
,
float
dr
=
(
!
tensor
->
isSparse
)
?
1.0
F
:
tensor
->
denseRatio
;
tensor
->
dataType
,
tensor
->
denseRatio
,
XTensor
big
(
order
,
dimSize
,
tensor
->
dataType
,
dr
,
tensor
->
devID
,
tensor
->
mem
);
tensor
->
devID
,
tensor
->
mem
);
big
.
SetZeroAll
();
big
.
SetTMP
();
big
.
SetTMP
();
/* call _Merge function */
/* call _Merge function */
...
@@ -370,10 +368,8 @@ XTensor Merge(const XTensor &smallA, const XTensor &smallB, int whereToMerge)
...
@@ -370,10 +368,8 @@ XTensor Merge(const XTensor &smallA, const XTensor &smallB, int whereToMerge)
dimSize
[
i
]
=
smallA
.
dimSize
[
whereToMerge
]
*
2
;
dimSize
[
i
]
=
smallA
.
dimSize
[
whereToMerge
]
*
2
;
}
}
XTensor
big
=
NewTensor
(
order
,
dimSize
,
float
dr
=
(
!
smallA
.
isSparse
)
?
1.0
F
:
smallA
.
denseRatio
;
smallA
.
dataType
,
smallA
.
denseRatio
,
XTensor
big
(
order
,
dimSize
,
smallA
.
dataType
,
dr
,
smallA
.
devID
,
smallA
.
mem
);
smallA
.
devID
,
smallA
.
mem
);
big
.
SetZeroAll
();
big
.
SetTMP
();
big
.
SetTMP
();
XList
smalls
(
2
);
XList
smalls
(
2
);
...
...
source/tensor/core/shape/Split.cpp
查看文件 @
906eebb7
...
@@ -148,16 +148,16 @@ XTensor Split(const XTensor &s, int whereToSplit, int splitNum)
...
@@ -148,16 +148,16 @@ XTensor Split(const XTensor &s, int whereToSplit, int splitNum)
int
order
=
s
.
order
+
1
;
int
order
=
s
.
order
+
1
;
int
*
dimSize
=
new
int
[
order
];
int
*
dimSize
=
new
int
[
order
];
dimSize
[
0
]
=
splitNum
;
for
(
int
i
=
0
;
i
<
s
.
order
;
i
++
)
{
for
(
int
i
=
0
;
i
<
s
.
order
;
i
++
)
{
if
(
i
==
whereToSplit
)
if
(
i
==
whereToSplit
)
dimSize
[
i
+
1
]
=
s
.
dimSize
[
i
]
/
splitNum
;
dimSize
[
i
+
1
]
=
s
.
dimSize
[
i
]
/
splitNum
;
else
else
dimSize
[
i
+
1
]
=
s
.
dimSize
[
i
];
dimSize
[
i
+
1
]
=
s
.
dimSize
[
i
];
}
}
dimSize
[
0
]
=
splitNum
;
XTensor
t
=
NewTensor
(
order
,
dimSize
,
s
.
dataType
,
s
.
denseRatio
,
s
.
devID
,
s
.
mem
)
;
float
dr
=
(
!
s
.
isSparse
)
?
1.0
F
:
s
.
denseRatio
;
t
.
SetZeroAll
(
);
XTensor
t
(
order
,
dimSize
,
s
.
dataType
,
dr
,
s
.
devID
,
s
.
mem
);
t
.
SetTMP
();
t
.
SetTMP
();
/* call _Split function */
/* call _Split function */
...
@@ -175,7 +175,7 @@ XTensor Split(const XTensor &s, int whereToSplit, int splitNum)
...
@@ -175,7 +175,7 @@ XTensor Split(const XTensor &s, int whereToSplit, int splitNum)
}
}
/*
/*
split a big tensor into small tensors
.
split a big tensor into small tensors
>> big - the source tensor
>> big - the source tensor
>> smalls - the list that keeps the resulting tensors (for return)
>> smalls - the list that keeps the resulting tensors (for return)
...
@@ -281,38 +281,16 @@ void _Split(const XTensor * big, XList * smalls, int whereToSplit, int splitNum)
...
@@ -281,38 +281,16 @@ void _Split(const XTensor * big, XList * smalls, int whereToSplit, int splitNum)
}
}
/*
/*
split a big tensor into small tensors (returna a XList struture).
split a big tensor into small tensors
make a new list to keep the result and return it.
>> big - the source tensor
>> big - the source tensor
>> smalls - the list that keeps the resulting tensors (for return)
NOTE that all the "small" tensors have already been placed in the list in advance.
>> whereToSplit - which dimension of the tensor is to split
>> whereToSplit - which dimension of the tensor is to split
>> splitNum - how many splits
>> splitNum - how many splits
<< return - a list of small tensors by splitting a big tensor
*/
*/
XList
SplitList
(
const
XTensor
&
big
,
int
whereToSplit
,
int
splitNum
)
void
Split
(
const
XTensor
&
big
,
XList
&
smalls
,
int
whereToSplit
,
int
splitNum
)
{
{
CheckNTErrors
(
&
big
,
"Invalid tensors!"
);
XList
smalls
=
XList
(
splitNum
);
int
order
=
big
.
order
;
int
*
dimSize
=
new
int
[
order
];
for
(
int
i
=
0
;
i
<
big
.
order
;
i
++
)
{
if
(
i
!=
whereToSplit
)
dimSize
[
i
]
=
big
.
dimSize
[
i
];
else
dimSize
[
i
]
=
big
.
dimSize
[
i
]
/
splitNum
;
}
for
(
int
i
=
0
;
i
<
splitNum
;
i
++
)
{
XTensor
tensor
=
NewTensor
(
order
,
dimSize
,
big
.
dataType
,
big
.
denseRatio
,
big
.
devID
,
big
.
mem
);
tensor
.
SetZeroAll
();
tensor
.
SetTMP
();
smalls
.
Add
(
&
tensor
);
}
/* call _Split function */
/* call _Split function */
_Split
(
&
big
,
&
smalls
,
whereToSplit
,
splitNum
);
_Split
(
&
big
,
&
smalls
,
whereToSplit
,
splitNum
);
...
@@ -326,11 +304,6 @@ XList SplitList(const XTensor &big, int whereToSplit, int splitNum)
...
@@ -326,11 +304,6 @@ XList SplitList(const XTensor &big, int whereToSplit, int splitNum)
block, rather than the total number of splits */
block, rather than the total number of splits */
XLink
::
AddParamToHeadInt
(
s
,
i
);
XLink
::
AddParamToHeadInt
(
s
,
i
);
}
}
/* destroy variables */
delete
[]
dimSize
;
return
smalls
;
}
}
}
// namespace nts(NiuTrans.Tensor)
}
// namespace nts(NiuTrans.Tensor)
source/tensor/core/shape/Split.h
查看文件 @
906eebb7
...
@@ -46,7 +46,7 @@ void _Split(const XTensor * big, XList * smalls, int whereToSplit, int splitNum)
...
@@ -46,7 +46,7 @@ void _Split(const XTensor * big, XList * smalls, int whereToSplit, int splitNum)
split a big tensor into small tensors (return a XList structure)
split a big tensor into small tensors (return a XList structure)
make a new list to keep the result and return it
make a new list to keep the result and return it
*/
*/
XList
SplitList
(
const
XTensor
&
big
,
int
whereToSplit
,
int
splitNum
);
void
Split
(
const
XTensor
&
big
,
XList
&
smalls
,
int
whereToSplit
,
int
splitNum
);
}
// namespace nts(NiuTrans.Tensor)
}
// namespace nts(NiuTrans.Tensor)
...
...
source/tensor/core/shape/Unsqueeze.cpp
查看文件 @
906eebb7
...
@@ -108,8 +108,6 @@ make a new tensor to keep the result and return it
...
@@ -108,8 +108,6 @@ make a new tensor to keep the result and return it
*/
*/
XTensor
Unsqueeze
(
const
XTensor
&
a
,
int
dim
,
int
dSize
)
XTensor
Unsqueeze
(
const
XTensor
&
a
,
int
dim
,
int
dSize
)
{
{
CheckNTErrors
(
&
a
,
"Empty input tensors!"
);
int
order
=
a
.
order
+
1
;
int
order
=
a
.
order
+
1
;
int
*
dimSize
=
new
int
[
order
];
int
*
dimSize
=
new
int
[
order
];
...
@@ -122,8 +120,8 @@ XTensor Unsqueeze(const XTensor &a, int dim, int dSize)
...
@@ -122,8 +120,8 @@ XTensor Unsqueeze(const XTensor &a, int dim, int dSize)
dimSize
[
i
]
=
a
.
dimSize
[
i
-
1
];
dimSize
[
i
]
=
a
.
dimSize
[
i
-
1
];
}
}
XTensor
b
=
NewTensor
(
order
,
dimSize
,
a
.
dataType
,
a
.
denseRatio
,
a
.
devID
,
a
.
mem
)
;
float
dr
=
(
!
a
.
isSparse
)
?
1.0
F
:
a
.
denseRatio
;
b
.
SetZeroAll
(
);
XTensor
b
(
order
,
dimSize
,
a
.
dataType
,
dr
,
a
.
devID
,
a
.
mem
);
b
.
SetTMP
();
b
.
SetTMP
();
/* call _Unsqueeze function */
/* call _Unsqueeze function */
...
...
source/tensor/core/sort/Sort.cpp
查看文件 @
906eebb7
...
@@ -29,13 +29,14 @@ namespace nts { // namespace nts(NiuTrans.Tensor)
...
@@ -29,13 +29,14 @@ namespace nts { // namespace nts(NiuTrans.Tensor)
/*
/*
sort the tensor along a given dimension
sort the tensor along a given dimension
>> a - input tensor
>>
a - the
tensor
>>
b - output
tensor
>> index - index of the items in the resulting tensor
>> index - index of the items in the resulting tensor
>> dim - the dimension along which the sorting is performed
>> dim - the dimension along which the sorting is performed
*/
*/
void
_Sort
(
XTensor
*
a
,
XTensor
*
index
,
int
dim
)
void
_Sort
(
const
XTensor
*
a
,
XTensor
*
b
,
XTensor
*
index
,
int
dim
)
{
{
CheckNTErrors
((
XTensor
::
IsIdentical
(
a
,
b
)),
"Input tensors should have the same type!"
);
CheckNTErrors
((
dim
>=
0
&&
dim
<
a
->
order
),
"Incorrect dimension specified!"
);
CheckNTErrors
((
dim
>=
0
&&
dim
<
a
->
order
),
"Incorrect dimension specified!"
);
CheckNTErrors
((
a
->
order
==
index
->
order
),
"Unmatched input tensors!"
);
CheckNTErrors
((
a
->
order
==
index
->
order
),
"Unmatched input tensors!"
);
CheckNTErrors
((
index
->
dataType
==
X_INT
),
"Wrong data type!"
);
CheckNTErrors
((
index
->
dataType
==
X_INT
),
"Wrong data type!"
);
...
@@ -46,7 +47,7 @@ void _Sort(XTensor * a, XTensor * index, int dim)
...
@@ -46,7 +47,7 @@ void _Sort(XTensor * a, XTensor * index, int dim)
if
(
a
->
devID
>=
0
)
{
if
(
a
->
devID
>=
0
)
{
#ifdef USE_CUDA
#ifdef USE_CUDA
_CudaSortBig
(
a
,
a
,
index
,
index
,
dim
);
_CudaSortBig
(
a
,
b
,
index
,
index
,
dim
);
#else
#else
ShowNTErrors
(
"Plesae specify USE_CUDA and recompile the code!"
);
ShowNTErrors
(
"Plesae specify USE_CUDA and recompile the code!"
);
#endif
#endif
...
@@ -64,12 +65,13 @@ void _Sort(XTensor * a, XTensor * index, int dim)
...
@@ -64,12 +65,13 @@ void _Sort(XTensor * a, XTensor * index, int dim)
for
(
int
k
=
0
;
k
<
blockNum
;
k
++
)
{
for
(
int
k
=
0
;
k
<
blockNum
;
k
++
)
{
for
(
int
i
=
0
;
i
<
stride
;
i
++
)
{
for
(
int
i
=
0
;
i
<
stride
;
i
++
)
{
void
*
data
=
(
char
*
)
a
->
data
+
(
k
*
blockSize
+
i
)
*
a
->
unitSize
;
void
*
dataA
=
(
char
*
)
a
->
data
+
(
k
*
blockSize
+
i
)
*
a
->
unitSize
;
void
*
dataB
=
(
char
*
)
b
->
data
+
(
k
*
blockSize
+
i
)
*
b
->
unitSize
;
void
*
indexData
=
(
char
*
)
index
->
data
+
(
k
*
blockSize
+
i
)
*
sizeof
(
int
);
void
*
indexData
=
(
char
*
)
index
->
data
+
(
k
*
blockSize
+
i
)
*
sizeof
(
int
);
/* we sort the data array along "dim" */
/* we sort the data array along "dim" */
if
(
a
->
dataType
==
X_FLOAT
)
if
(
a
->
dataType
==
X_FLOAT
)
XQSort
(
data
,
indexData
,
strideNum
,
a
->
unitSize
,
stride
,
CompXFloat
);
XQSort
(
data
A
,
dataB
,
indexData
,
strideNum
,
a
->
unitSize
,
stride
,
CompXFloat
);
else
{
else
{
ShowNTErrors
(
"TODO!"
);
ShowNTErrors
(
"TODO!"
);
}
}
...
@@ -78,4 +80,40 @@ void _Sort(XTensor * a, XTensor * index, int dim)
...
@@ -78,4 +80,40 @@ void _Sort(XTensor * a, XTensor * index, int dim)
}
}
}
}
/*
sort the tensor along a given dimension (do it on site)
keep the result in the input tensor a and return nothing
>> a - input tensor
>> index - index of the items in the resulting tensor
>> dim - the dimension along which the sorting is performed
*/
void
_SortMe
(
XTensor
*
a
,
XTensor
*
index
,
int
dim
)
{
_Sort
(
a
,
a
,
index
,
dim
);
}
/*
sort the tensor along a given dimension (return a XTensor structure)
make a new tensor to keep the result and return it
>> a - input tensor
>> b - output tensor
>> index - index of the items in the resulting tensor
>> dim - the dimension along which the sorting is performed
*/
void
Sort
(
XTensor
&
a
,
XTensor
&
b
,
XTensor
&
index
,
int
dim
)
{
/* call _Negate function */
_Sort
(
&
a
,
&
b
,
&
index
,
dim
);
/* tensor connections */
XList
list
(
2
);
list
.
Add
(
&
b
);
list
.
Add
(
&
index
);
XLink
::
MakeLink
(
&
a
,
&
list
,
SORT_SORT
);
XLink
::
AddParamToHeadInt
(
&
b
,
dim
);
XLink
::
AddParamToHeadInt
(
&
index
,
dim
);
}
}
// namespace nts(NiuTrans.Tensor)
}
// namespace nts(NiuTrans.Tensor)
source/tensor/core/sort/Sort.cu
查看文件 @
906eebb7
...
@@ -39,7 +39,7 @@ bitonic sort (for each row in a matrix)
...
@@ -39,7 +39,7 @@ bitonic sort (for each row in a matrix)
>> n - row number of the matrix
>> n - row number of the matrix
*/
*/
template<class T> __global__
template<class T> __global__
void KernelBitonicSort2D(void * data, int j, int k, int m, int n)
void KernelBitonicSort2D(void * data, int j, int k, int m, int n)
{
{
const unsigned int idx = blockDim.x * blockIdx.x + threadIdx.x;
const unsigned int idx = blockDim.x * blockIdx.x + threadIdx.x;
const unsigned int row = blockDim.y * blockIdx.y + threadIdx.y;
const unsigned int row = blockDim.y * blockIdx.y + threadIdx.y;
...
@@ -74,7 +74,7 @@ bitonic sort (for each row in a matrix) with index
...
@@ -74,7 +74,7 @@ bitonic sort (for each row in a matrix) with index
>> n - row number of the matrix
>> n - row number of the matrix
*/
*/
template<class T> __global__
template<class T> __global__
void KernelBitonicSort2D(void * data, int * index, int j, int k, int m, int n)
void KernelBitonicSort2D(void * data, int * index, int j, int k, int m, int n)
{
{
const unsigned int idx = blockDim.x * blockIdx.x + threadIdx.x;
const unsigned int idx = blockDim.x * blockIdx.x + threadIdx.x;
const unsigned int row = blockDim.y * blockIdx.y + threadIdx.y;
const unsigned int row = blockDim.y * blockIdx.y + threadIdx.y;
...
...
source/tensor/core/sort/Sort.h
查看文件 @
906eebb7
...
@@ -27,8 +27,20 @@
...
@@ -27,8 +27,20 @@
namespace
nts
{
// namespace nts(NiuTrans.Tensor)
namespace
nts
{
// namespace nts(NiuTrans.Tensor)
/* sort the data along a given dimension */
/* sort the data along a given dimension */
void
_Sort
(
const
XTensor
*
a
,
XTensor
*
b
,
XTensor
*
index
,
int
dim
);
/*
sort the data along a given dimension (do it on site)
keep the result in the input tensor a and return nothing
*/
void
_SortMe
(
XTensor
*
a
,
XTensor
*
index
,
int
dim
);
/*
sort the data along a given dimension (return a XTensor structure)
make a new tensor to keep the result and return it
*/
extern
"C"
extern
"C"
void
_Sort
(
XTensor
*
a
,
XTensor
*
index
,
int
dim
);
void
Sort
(
XTensor
&
a
,
XTensor
&
b
,
XTensor
&
index
,
int
dim
);
}
// namespace nts(NiuTrans.Tensor)
}
// namespace nts(NiuTrans.Tensor)
...
...
source/tensor/core/sort/TopK.cpp
查看文件 @
906eebb7
...
@@ -105,4 +105,29 @@ void _TopK(const XTensor * a, XTensor * b, XTensor * index, int dim, int k)
...
@@ -105,4 +105,29 @@ void _TopK(const XTensor * a, XTensor * b, XTensor * index, int dim, int k)
}
}
}
}
}
}
/*
get the top-k items along a given dimension
>> a - input tensor
>> b - output tensor (top-k result)
>> index - index of the top-k items
>> dim - the dimension along which the sorting is performed
>> k - how many items returned after sorting
*/
void
TopK
(
XTensor
&
a
,
XTensor
&
b
,
XTensor
&
index
,
int
dim
,
int
k
)
{
_TopK
(
&
a
,
&
b
,
&
index
,
dim
,
k
);
/* tensor connection */
XList
list
(
2
);
list
.
Add
(
&
b
);
list
.
Add
(
&
index
);
XLink
::
MakeLink
(
&
a
,
&
list
,
SORT_TOPK
);
XLink
::
AddParamToHeadInt
(
&
b
,
dim
);
XLink
::
AddParamToHeadInt
(
&
index
,
k
);
XLink
::
AddParamToHeadInt
(
&
b
,
dim
);
XLink
::
AddParamToHeadInt
(
&
index
,
k
);
}
}
// namespace nts(NiuTrans.Tensor)
}
// namespace nts(NiuTrans.Tensor)
source/tensor/core/sort/TopK.h
查看文件 @
906eebb7
...
@@ -30,6 +30,10 @@ namespace nts { // namespace nts(NiuTrans.Tensor)
...
@@ -30,6 +30,10 @@ namespace nts { // namespace nts(NiuTrans.Tensor)
extern
"C"
extern
"C"
void
_TopK
(
const
XTensor
*
a
,
XTensor
*
b
,
XTensor
*
index
,
int
dim
,
int
k
);
void
_TopK
(
const
XTensor
*
a
,
XTensor
*
b
,
XTensor
*
index
,
int
dim
,
int
k
);
/* get the top-k items along a given dimension */
extern
"C"
void
TopK
(
XTensor
&
a
,
XTensor
&
b
,
XTensor
&
index
,
int
dim
,
int
k
);
}
// namespace nts(NiuTrans.Tensor)
}
// namespace nts(NiuTrans.Tensor)
#endif // __TOPK_H__
#endif // __TOPK_H__
\ No newline at end of file
source/tensor/function/HardTanH.cpp
查看文件 @
906eebb7
...
@@ -60,7 +60,9 @@ void _HardTanH(const XTensor * x, XTensor * y)
...
@@ -60,7 +60,9 @@ void _HardTanH(const XTensor * x, XTensor * y)
}
}
/*
/*
hard tanh function (return a structure)
hard tanh function (return a XTensor structure)
make a new tensor to keep the result and return it
y = 1 if x > 1
y = 1 if x > 1
x if -1 <= x <= 1
x if -1 <= x <= 1
-1 if x < -1
-1 if x < -1
...
@@ -72,8 +74,10 @@ XTensor HardTanH(const XTensor &x)
...
@@ -72,8 +74,10 @@ XTensor HardTanH(const XTensor &x)
XTensor
y
(
&
x
);
XTensor
y
(
&
x
);
y
.
SetTMP
();
y
.
SetTMP
();
/* call _HardTanH function */
_HardTanH
(
&
x
,
&
y
);
_HardTanH
(
&
x
,
&
y
);
/* tensor connection */
XLink
::
MakeLink
(
&
x
,
NULL
,
&
y
,
FUNC_HARDTANH
);
XLink
::
MakeLink
(
&
x
,
NULL
,
&
y
,
FUNC_HARDTANH
);
return
y
;
return
y
;
...
@@ -116,7 +120,7 @@ void _HardTanHBackward(XTensor * gold, XTensor * y, XTensor * x,
...
@@ -116,7 +120,7 @@ void _HardTanHBackward(XTensor * gold, XTensor * y, XTensor * x,
{
{
/* calculate dE/dy */
/* calculate dE/dy */
if
(
lossName
!=
NOLOSS
)
if
(
lossName
!=
NOLOSS
)
LossBackward
(
dedy
,
gold
,
y
,
lossName
);
_
LossBackward
(
dedy
,
gold
,
y
,
lossName
);
DTYPE
*
dedyp
=
(
DTYPE
*
)
dedy
->
data
;
DTYPE
*
dedyp
=
(
DTYPE
*
)
dedy
->
data
;
DTYPE
*
dedxp
=
(
DTYPE
*
)
dedx
->
data
;
DTYPE
*
dedxp
=
(
DTYPE
*
)
dedx
->
data
;
...
...
source/tensor/function/HardTanH.cu
查看文件 @
906eebb7
...
@@ -137,7 +137,7 @@ void _CudaHardTanHBackward(XTensor * gold, XTensor * y, XTensor * x,
...
@@ -137,7 +137,7 @@ void _CudaHardTanHBackward(XTensor * gold, XTensor * y, XTensor * x,
/* calculate dE/dy */
/* calculate dE/dy */
if(lossName != NOLOSS)
if(lossName != NOLOSS)
LossBackward(dedy, gold, y, lossName);
_
LossBackward(dedy, gold, y, lossName);
int gridSize[3], blockSize[3];
int gridSize[3], blockSize[3];
...
...
source/tensor/function/HardTanH.h
查看文件 @
906eebb7
...
@@ -37,7 +37,7 @@ y = 1 if x > 1
...
@@ -37,7 +37,7 @@ y = 1 if x > 1
*/
*/
void
_HardTanH
(
const
XTensor
*
x
,
XTensor
*
y
);
void
_HardTanH
(
const
XTensor
*
x
,
XTensor
*
y
);
/* hard tanh function (return a structure) */
/* hard tanh function (return a
XTensor
structure) */
XTensor
HardTanH
(
const
XTensor
&
x
);
XTensor
HardTanH
(
const
XTensor
&
x
);
/* de/dx */
/* de/dx */
...
...
source/tensor/function/Identity.cpp
查看文件 @
906eebb7
...
@@ -19,6 +19,7 @@
...
@@ -19,6 +19,7 @@
* $Created by: XIAO Tong (email: xiaotong@mail.neu.edu.cn) 2018-04-27
* $Created by: XIAO Tong (email: xiaotong@mail.neu.edu.cn) 2018-04-27
*/
*/
#include "../XName.h"
#include "Identity.h"
#include "Identity.h"
#include "../XUtility.h"
#include "../XUtility.h"
#include "../core/movement/CopyValues.h"
#include "../core/movement/CopyValues.h"
...
@@ -36,6 +37,26 @@ void _Identity(const XTensor * x, XTensor * y)
...
@@ -36,6 +37,26 @@ void _Identity(const XTensor * x, XTensor * y)
}
}
/*
/*
identity function y = x (return a XTensor structure)
make a new tensor to keep the result and return it
>> x - input tensor
<< return - y
*/
XTensor
Identity
(
const
XTensor
&
x
)
{
XTensor
y
(
&
x
);
y
.
SetTMP
();
/* call _Identity function */
_Identity
(
&
x
,
&
y
);
/* tensor connection */
XLink
::
MakeLink
(
&
x
,
NULL
,
&
y
,
FUNC_IDENTITY
);
return
y
;
}
/*
backward computation for identity function y = x
backward computation for identity function y = x
dE/dx = dE/dy * dy/dx = dE/dy
dE/dx = dE/dy * dy/dx = dE/dy
...
@@ -58,7 +79,7 @@ void _IdentityBackward(XTensor * gold, XTensor * y, XTensor * x,
...
@@ -58,7 +79,7 @@ void _IdentityBackward(XTensor * gold, XTensor * y, XTensor * x,
{
{
/* calculate dE/dy */
/* calculate dE/dy */
if
(
lossName
!=
NOLOSS
)
if
(
lossName
!=
NOLOSS
)
LossBackward
(
dedy
,
gold
,
y
,
lossName
);
_
LossBackward
(
dedy
,
gold
,
y
,
lossName
);
if
(
dedy
->
data
!=
dedx
->
data
)
if
(
dedy
->
data
!=
dedx
->
data
)
_CopyValues
(
dedy
,
dedx
);
_CopyValues
(
dedy
,
dedx
);
...
...
source/tensor/function/Identity.h
查看文件 @
906eebb7
...
@@ -28,11 +28,12 @@
...
@@ -28,11 +28,12 @@
namespace
nts
{
// namespace nts(NiuTrans.Tensor)
namespace
nts
{
// namespace nts(NiuTrans.Tensor)
/* identity function y = x */
/* identity function y = x */
extern
"C"
void
_Identity
(
const
XTensor
*
x
,
XTensor
*
y
);
void
_Identity
(
const
XTensor
*
x
,
XTensor
*
y
);
/* identity function y = x (return a XTensor structure) */
XTensor
Identity
(
const
XTensor
&
x
);
/* de/dx */
/* de/dx */
extern
"C"
void
_IdentityBackward
(
XTensor
*
gold
,
XTensor
*
y
,
XTensor
*
x
,
void
_IdentityBackward
(
XTensor
*
gold
,
XTensor
*
y
,
XTensor
*
x
,
XTensor
*
dedy
,
XTensor
*
dedx
,
XTensor
*
dedy
,
XTensor
*
dedx
,
LOSS_FUNCTION_NAME
lossName
);
LOSS_FUNCTION_NAME
lossName
);
...
...
source/tensor/function/LogSoftmax.cpp
查看文件 @
906eebb7
...
@@ -162,18 +162,22 @@ void _LogSoftmax(const XTensor * x, XTensor * y, int leadDim)
...
@@ -162,18 +162,22 @@ void _LogSoftmax(const XTensor * x, XTensor * y, int leadDim)
}
}
/*
/*
log scale softmax y = log(e^x / \sum_{i} e^{x_i}) (return a structure)
log scale softmax y = log(e^x / \sum_{i} e^{x_i}) (return a XTensor structure)
make a new tensor to keep the result and return it
>> x - input vector
>> x - input vector
>> leadDim - leading dimension (along which we perform reduction)
>> leadDim - leading dimension (along which we perform reduction)
<< return -
result
<< return -
y
*/
*/
XTensor
LogSoftmax
(
const
XTensor
&
x
,
int
leadDim
)
XTensor
LogSoftmax
(
const
XTensor
&
x
,
int
leadDim
)
{
{
XTensor
y
(
&
x
);
XTensor
y
(
&
x
);
y
.
SetTMP
();
y
.
SetTMP
();
/* call _LogSoftmax function */
_LogSoftmax
(
&
x
,
&
y
,
leadDim
);
_LogSoftmax
(
&
x
,
&
y
,
leadDim
);
/* tensor connection */
XLink
::
MakeLink
(
&
x
,
NULL
,
&
y
,
FUNC_LOGSOFTMAX
);
XLink
::
MakeLink
(
&
x
,
NULL
,
&
y
,
FUNC_LOGSOFTMAX
);
XLink
::
AddParamToHeadInt
(
&
y
,
leadDim
);
XLink
::
AddParamToHeadInt
(
&
y
,
leadDim
);
...
...
source/tensor/function/LogSoftmax.h
查看文件 @
906eebb7
...
@@ -30,7 +30,7 @@ namespace nts{ // namespace nts(NiuTrans.Tensor)
...
@@ -30,7 +30,7 @@ namespace nts{ // namespace nts(NiuTrans.Tensor)
/* log scale softmax y = log(e^x / \sum_{i} e^{x_i}) */
/* log scale softmax y = log(e^x / \sum_{i} e^{x_i}) */
void
_LogSoftmax
(
const
XTensor
*
x
,
XTensor
*
y
,
int
leadDim
);
void
_LogSoftmax
(
const
XTensor
*
x
,
XTensor
*
y
,
int
leadDim
);
/* log scale softmax y = log(e^x / \sum_{i} e^{x_i}) (return a structure) */
/* log scale softmax y = log(e^x / \sum_{i} e^{x_i}) (return a
XTensor
structure) */
XTensor
LogSoftmax
(
const
XTensor
&
x
,
int
leadDim
);
XTensor
LogSoftmax
(
const
XTensor
&
x
,
int
leadDim
);
/* de/dx */
/* de/dx */
...
...
source/tensor/function/Loss.cpp
查看文件 @
906eebb7
...
@@ -42,7 +42,7 @@ compute the loss
...
@@ -42,7 +42,7 @@ compute the loss
>> oBeg - where to start in the model output (along the leading dimension)
>> oBeg - where to start in the model output (along the leading dimension)
<< return - error in model prediction with respect to gold standard
<< return - error in model prediction with respect to gold standard
*/
*/
DTYPE
LossCompute
(
XTensor
*
gold
,
XTensor
*
output
,
LOSS_FUNCTION_NAME
LFName
,
DTYPE
_
LossCompute
(
XTensor
*
gold
,
XTensor
*
output
,
LOSS_FUNCTION_NAME
LFName
,
bool
isLogOutput
,
int
leadDim
,
int
gBeg
,
int
gLen
,
int
oBeg
)
bool
isLogOutput
,
int
leadDim
,
int
gBeg
,
int
gLen
,
int
oBeg
)
{
{
DTYPE
error
=
0.0
F
;
DTYPE
error
=
0.0
F
;
...
@@ -66,7 +66,7 @@ DTYPE LossCompute(XTensor * gold, XTensor * output, LOSS_FUNCTION_NAME LFName,
...
@@ -66,7 +66,7 @@ DTYPE LossCompute(XTensor * gold, XTensor * output, LOSS_FUNCTION_NAME LFName,
blockNum
=
output
->
unitNum
/
blockSize
;
blockNum
=
output
->
unitNum
/
blockSize
;
if
(
isLogOutput
)
if
(
isLogOutput
)
return
LossComputeForLogScale
(
gold
,
output
,
LFName
,
leadDim
,
gBeg
,
gLen
,
oBeg
);
return
_
LossComputeForLogScale
(
gold
,
output
,
LFName
,
leadDim
,
gBeg
,
gLen
,
oBeg
);
DTYPE
*
gp
=
(
DTYPE
*
)
gold
->
data
;
DTYPE
*
gp
=
(
DTYPE
*
)
gold
->
data
;
DTYPE
*
op
=
(
DTYPE
*
)
output
->
data
;
DTYPE
*
op
=
(
DTYPE
*
)
output
->
data
;
...
@@ -180,7 +180,7 @@ DTYPE LossCompute(XTensor * gold, XTensor * output, LOSS_FUNCTION_NAME LFName,
...
@@ -180,7 +180,7 @@ DTYPE LossCompute(XTensor * gold, XTensor * output, LOSS_FUNCTION_NAME LFName,
}
}
else
{
else
{
#ifdef USE_CUDA
#ifdef USE_CUDA
error
=
CudaLossCompute
(
gold
,
output
,
LFName
,
isLogOutput
,
leadDim
,
gBeg
,
gLen
,
oBeg
);
error
=
_
CudaLossCompute
(
gold
,
output
,
LFName
,
isLogOutput
,
leadDim
,
gBeg
,
gLen
,
oBeg
);
#else
#else
ShowNTErrors
(
"Please specify USE_CUDA and recompile the code!"
);
ShowNTErrors
(
"Please specify USE_CUDA and recompile the code!"
);
#endif
#endif
...
@@ -201,7 +201,7 @@ the log version of loss computation
...
@@ -201,7 +201,7 @@ the log version of loss computation
>> oBeg - where to start in the model output (along the leading dimension)
>> oBeg - where to start in the model output (along the leading dimension)
<< return - error in model prediction with respect to gold standard
<< return - error in model prediction with respect to gold standard
*/
*/
DTYPE
LossComputeForLogScale
(
XTensor
*
gold
,
XTensor
*
output
,
DTYPE
_
LossComputeForLogScale
(
XTensor
*
gold
,
XTensor
*
output
,
LOSS_FUNCTION_NAME
LFName
,
LOSS_FUNCTION_NAME
LFName
,
int
leadDim
,
int
gBeg
,
int
gLen
,
int
oBeg
)
int
leadDim
,
int
gBeg
,
int
gLen
,
int
oBeg
)
{
{
...
@@ -343,7 +343,7 @@ with respect to gold standard, and y this the model output
...
@@ -343,7 +343,7 @@ with respect to gold standard, and y this the model output
>> LFName - name of loss function
>> LFName - name of loss function
<< return dE/dy
<< return dE/dy
*/
*/
DTYPE
LossBackwardPoint
(
DTYPE
t
,
DTYPE
y
,
LOSS_FUNCTION_NAME
LFName
)
DTYPE
_
LossBackwardPoint
(
DTYPE
t
,
DTYPE
y
,
LOSS_FUNCTION_NAME
LFName
)
{
{
/*
/*
squared error
squared error
...
@@ -380,7 +380,7 @@ with respect to gold standard, and y this the model output
...
@@ -380,7 +380,7 @@ with respect to gold standard, and y this the model output
>> tLen - segment length from tBeg (along the leading dimension)
>> tLen - segment length from tBeg (along the leading dimension)
>> yBeg - where to start in the model output (along the leading dimension)
>> yBeg - where to start in the model output (along the leading dimension)
*/
*/
void
LossBackward
(
XTensor
*
dedy
,
XTensor
*
t
,
XTensor
*
y
,
void
_
LossBackward
(
XTensor
*
dedy
,
XTensor
*
t
,
XTensor
*
y
,
LOSS_FUNCTION_NAME
LFName
,
LOSS_FUNCTION_NAME
LFName
,
int
leadDim
,
int
tBeg
,
int
tLen
,
int
yBeg
)
int
leadDim
,
int
tBeg
,
int
tLen
,
int
yBeg
)
{
{
...
@@ -496,7 +496,7 @@ void LossBackward(XTensor * dedy, XTensor * t, XTensor * y,
...
@@ -496,7 +496,7 @@ void LossBackward(XTensor * dedy, XTensor * t, XTensor * y,
}
}
else
{
else
{
#ifdef USE_CUDA
#ifdef USE_CUDA
CudaLossBackward
(
dedy
,
t
,
y
,
LFName
,
leadDim
,
tBeg
,
tLen
,
yBeg
);
_
CudaLossBackward
(
dedy
,
t
,
y
,
LFName
,
leadDim
,
tBeg
,
tLen
,
yBeg
);
#else
#else
ShowNTErrors
(
"Please specify USE_CUDA and recompile the code!"
);
ShowNTErrors
(
"Please specify USE_CUDA and recompile the code!"
);
#endif
#endif
...
...
source/tensor/function/Loss.cu
查看文件 @
906eebb7
...
@@ -51,7 +51,7 @@ compute the loss
...
@@ -51,7 +51,7 @@ compute the loss
>> yBeg - where to start in the model output (along the leading dimension)
>> yBeg - where to start in the model output (along the leading dimension)
<< return - error in model prediction with respect to gold standard
<< return - error in model prediction with respect to gold standard
*/
*/
DTYPE CudaLossCompute(XTensor * gold, XTensor * y, LOSS_FUNCTION_NAME LFName,
DTYPE
_
CudaLossCompute(XTensor * gold, XTensor * y, LOSS_FUNCTION_NAME LFName,
bool isLogOutput, int leadDim, int gBeg, int gLen, int yBeg)
bool isLogOutput, int leadDim, int gBeg, int gLen, int yBeg)
{
{
CheckNTErrors((gLen >= 0 && gLen <= y->unitNum), "Illegal input length!");
CheckNTErrors((gLen >= 0 && gLen <= y->unitNum), "Illegal input length!");
...
@@ -65,7 +65,7 @@ DTYPE CudaLossCompute(XTensor * gold, XTensor * y, LOSS_FUNCTION_NAME LFName,
...
@@ -65,7 +65,7 @@ DTYPE CudaLossCompute(XTensor * gold, XTensor * y, LOSS_FUNCTION_NAME LFName,
CheckNTErrors((gLen == gold->dimSize[leadDim] && gBeg == 0 && yBeg == 0), "TODO!");
CheckNTErrors((gLen == gold->dimSize[leadDim] && gBeg == 0 && yBeg == 0), "TODO!");
if(isLogOutput)
if(isLogOutput)
return LossComputeForLogScale(gold, y, LFName, leadDim, gBeg, gLen, yBeg);
return
_
LossComputeForLogScale(gold, y, LFName, leadDim, gBeg, gLen, yBeg);
DTYPE error = 0.0F;
DTYPE error = 0.0F;
...
@@ -77,7 +77,7 @@ DTYPE CudaLossCompute(XTensor * gold, XTensor * y, LOSS_FUNCTION_NAME LFName,
...
@@ -77,7 +77,7 @@ DTYPE CudaLossCompute(XTensor * gold, XTensor * y, LOSS_FUNCTION_NAME LFName,
if(LFName == SQUAREDERROR){
if(LFName == SQUAREDERROR){
XTensor * diff = NewTensor(gold->order, gold->dimSize, gold->dataType, gold->denseRatio, gold->devID, gold->mem);
XTensor * diff = NewTensor(gold->order, gold->dimSize, gold->dataType, gold->denseRatio, gold->devID, gold->mem);
_Sum(gold, y, diff, -1.0F);
_Sum(gold, y, diff, -1.0F);
_Power(diff, 2.0F);
_Power
Me
(diff, 2.0F);
_ScaleAndShiftMe(diff, 0.5F, 0.0F);
_ScaleAndShiftMe(diff, 0.5F, 0.0F);
int reduceTimes = diff->order;
int reduceTimes = diff->order;
...
@@ -110,9 +110,9 @@ DTYPE CudaLossCompute(XTensor * gold, XTensor * y, LOSS_FUNCTION_NAME LFName,
...
@@ -110,9 +110,9 @@ DTYPE CudaLossCompute(XTensor * gold, XTensor * y, LOSS_FUNCTION_NAME LFName,
if(LFName == CROSSENTROPY){
if(LFName == CROSSENTROPY){
XTensor * diff = NewTensor(y->order, y->dimSize, y->dataType, y->denseRatio, y->devID, y->mem);
XTensor * diff = NewTensor(y->order, y->dimSize, y->dataType, y->denseRatio, y->devID, y->mem);
_CopyValues(y, diff);
_CopyValues(y, diff);
_Log(diff);
_Log
Me
(diff);
_Multiply(gold, diff, diff);
_Multiply(gold, diff, diff);
_Negate(diff);
_Negate
Me
(diff);
int reduceTimes = diff->order;
int reduceTimes = diff->order;
for (int i = 0; i < reduceTimes; i++) {
for (int i = 0; i < reduceTimes; i++) {
...
@@ -148,7 +148,7 @@ DTYPE CudaLossCompute(XTensor * gold, XTensor * y, LOSS_FUNCTION_NAME LFName,
...
@@ -148,7 +148,7 @@ DTYPE CudaLossCompute(XTensor * gold, XTensor * y, LOSS_FUNCTION_NAME LFName,
_CopyValues(y, yOnehot);
_CopyValues(y, yOnehot);
_Multiply(gold, y, yOnehot);
_Multiply(gold, y, yOnehot);
_Sum(gold, yOnehot, diff, -1.0F);
_Sum(gold, yOnehot, diff, -1.0F);
_Power(diff, 2.0F);
_Power
Me
(diff, 2.0F);
_ScaleAndShiftMe(diff, 0.5F, 0.0F);
_ScaleAndShiftMe(diff, 0.5F, 0.0F);
int reduceTimes = diff->order;
int reduceTimes = diff->order;
...
@@ -190,7 +190,7 @@ the log version of loss computation
...
@@ -190,7 +190,7 @@ the log version of loss computation
>> yBeg - where to start in the model output (along the leading dimension)
>> yBeg - where to start in the model output (along the leading dimension)
<< return - error in model prediction with respect to gold standard
<< return - error in model prediction with respect to gold standard
*/
*/
DTYPE CudaLossComputeForLogScale(XTensor * gold, XTensor * y,
DTYPE
_
CudaLossComputeForLogScale(XTensor * gold, XTensor * y,
LOSS_FUNCTION_NAME LFName,
LOSS_FUNCTION_NAME LFName,
int leadDim, int gBeg, int gLen, int yBeg)
int leadDim, int gBeg, int gLen, int yBeg)
{
{
...
@@ -209,9 +209,9 @@ with respect to gold standard, and y this the model output
...
@@ -209,9 +209,9 @@ with respect to gold standard, and y this the model output
>> LFName - name of loss function
>> LFName - name of loss function
<< return dE/dy
<< return dE/dy
*/
*/
DTYPE CudaLossBackward(DTYPE t, DTYPE y, LOSS_FUNCTION_NAME LFName)
DTYPE
_
CudaLossBackward(DTYPE t, DTYPE y, LOSS_FUNCTION_NAME LFName)
{
{
return LossBackwardPoint(t, y, LFName);
return
_
LossBackwardPoint(t, y, LFName);
// TODO: call cuda kernels for computing the errors
// TODO: call cuda kernels for computing the errors
}
}
...
@@ -328,7 +328,7 @@ with respect to gold standard, and y this the model output
...
@@ -328,7 +328,7 @@ with respect to gold standard, and y this the model output
>> tLen - segment length from oBeg (along the leading dimension)
>> tLen - segment length from oBeg (along the leading dimension)
>> yBeg - where to start in the model output (along the leading dimension)
>> yBeg - where to start in the model output (along the leading dimension)
*/
*/
void CudaLossBackward(XTensor * dedy, XTensor * t, XTensor * y,
void
_
CudaLossBackward(XTensor * dedy, XTensor * t, XTensor * y,
LOSS_FUNCTION_NAME LFName,
LOSS_FUNCTION_NAME LFName,
int leadDim, int tBeg, int tLen, int yBeg)
int leadDim, int tBeg, int tLen, int yBeg)
{
{
...
...
source/tensor/function/Loss.cuh
查看文件 @
906eebb7
...
@@ -31,21 +31,21 @@ namespace nts{ // namespace nts(NiuTrans.Tensor)
...
@@ -31,21 +31,21 @@ namespace nts{ // namespace nts(NiuTrans.Tensor)
/* compute the loss (cuda version) */
/* compute the loss (cuda version) */
extern "C"
extern "C"
DTYPE CudaLossCompute(XTensor * gold, XTensor * output, LOSS_FUNCTION_NAME LFName,
DTYPE
_
CudaLossCompute(XTensor * gold, XTensor * output, LOSS_FUNCTION_NAME LFName,
bool isLogOutput, int leadDim, int gBeg, int gLen, int oBeg);
bool isLogOutput, int leadDim, int gBeg, int gLen, int oBeg);
/* compute the loss in log scale (cuda version) */
/* compute the loss in log scale (cuda version) */
extern "C"
extern "C"
DTYPE CudaLossComputeForLogScale(XTensor * gold, XTensor * output, LOSS_FUNCTION_NAME LFName,
DTYPE
_
CudaLossComputeForLogScale(XTensor * gold, XTensor * output, LOSS_FUNCTION_NAME LFName,
int leadDim, int gBeg, int gLen, int oBeg);
int leadDim, int gBeg, int gLen, int oBeg);
/* backward compuation for a single element (cuda version) */
/* backward compuation for a single element (cuda version) */
extern "C"
extern "C"
DTYPE CudaLossBackwardPoint(DTYPE t, DTYPE y, LOSS_FUNCTION_NAME LFName);
DTYPE
_
CudaLossBackwardPoint(DTYPE t, DTYPE y, LOSS_FUNCTION_NAME LFName);
/* backward compuation for (dense) vectors (cuda version) */
/* backward compuation for (dense) vectors (cuda version) */
extern "C"
extern "C"
void CudaLossBackward(XTensor * dedy, XTensor * t, XTensor * y,
void
_
CudaLossBackward(XTensor * dedy, XTensor * t, XTensor * y,
LOSS_FUNCTION_NAME LFName,
LOSS_FUNCTION_NAME LFName,
int leadDim = -1, int tBeg = 0, int tLen = -1, int yBeg = 0);
int leadDim = -1, int tBeg = 0, int tLen = -1, int yBeg = 0);
...
...
source/tensor/function/Loss.h
查看文件 @
906eebb7
...
@@ -48,21 +48,21 @@ loss function to measure the "number" of errors
...
@@ -48,21 +48,21 @@ loss function to measure the "number" of errors
/* compute the loss */
/* compute the loss */
extern
"C"
extern
"C"
DTYPE
LossCompute
(
XTensor
*
gold
,
XTensor
*
output
,
LOSS_FUNCTION_NAME
LFName
,
DTYPE
_
LossCompute
(
XTensor
*
gold
,
XTensor
*
output
,
LOSS_FUNCTION_NAME
LFName
,
bool
isLogOutput
,
int
leadDim
,
int
gBeg
,
int
gLen
,
int
oBeg
);
bool
isLogOutput
,
int
leadDim
,
int
gBeg
,
int
gLen
,
int
oBeg
);
/* compute the loss (log version) */
/* compute the loss (log version) */
extern
"C"
extern
"C"
DTYPE
LossComputeForLogScale
(
XTensor
*
gold
,
XTensor
*
output
,
LOSS_FUNCTION_NAME
LFName
,
DTYPE
_
LossComputeForLogScale
(
XTensor
*
gold
,
XTensor
*
output
,
LOSS_FUNCTION_NAME
LFName
,
int
leadDim
,
int
gBeg
,
int
gLen
,
int
oBeg
);
int
leadDim
,
int
gBeg
,
int
gLen
,
int
oBeg
);
/* backward compuation for a single element */
/* backward compuation for a single element */
extern
"C"
extern
"C"
DTYPE
LossBackwardPoint
(
DTYPE
t
,
DTYPE
y
,
LOSS_FUNCTION_NAME
LFName
);
DTYPE
_
LossBackwardPoint
(
DTYPE
t
,
DTYPE
y
,
LOSS_FUNCTION_NAME
LFName
);
/* backward compuation for (dense) vectors */
/* backward compuation for (dense) vectors */
extern
"C"
extern
"C"
void
LossBackward
(
XTensor
*
dEdY
,
XTensor
*
t
,
XTensor
*
y
,
void
_
LossBackward
(
XTensor
*
dEdY
,
XTensor
*
t
,
XTensor
*
y
,
LOSS_FUNCTION_NAME
LFName
,
LOSS_FUNCTION_NAME
LFName
,
int
leadDim
=
-
1
,
int
tBeg
=
0
,
int
tLen
=
-
1
,
int
yBeg
=
0
);
int
leadDim
=
-
1
,
int
tBeg
=
0
,
int
tLen
=
-
1
,
int
yBeg
=
0
);
...
...
source/tensor/function/Rectify.cpp
查看文件 @
906eebb7
...
@@ -19,6 +19,7 @@
...
@@ -19,6 +19,7 @@
* $Created by: XIAO Tong (email: xiaotong@mail.neu.edu.cn) 2018-04-24
* $Created by: XIAO Tong (email: xiaotong@mail.neu.edu.cn) 2018-04-24
*/
*/
#include "../XName.h"
#include "Rectify.h"
#include "Rectify.h"
#include "Rectify.cuh"
#include "Rectify.cuh"
...
@@ -55,6 +56,28 @@ void _Rectify(const XTensor * x, XTensor * y)
...
@@ -55,6 +56,28 @@ void _Rectify(const XTensor * x, XTensor * y)
}
}
/*
/*
rectify function y = max(0, x) (return a XTensor structure)
make a new tensor to keep the result and return it
>> input - input tensor
<< return - y
*/
XTensor
Rectify
(
const
XTensor
&
x
)
{
XTensor
y
(
&
x
);
y
.
SetTMP
();
/* call _Rectify function */
_Rectify
(
&
x
,
&
y
);
/* tensor connection */
XLink
::
MakeLink
(
&
x
,
NULL
,
&
y
,
FUNC_RECTIFY
);
return
y
;
}
/*
backward computation
backward computation
dE/dx = dE/dy * dy/dx
dE/dx = dE/dy * dy/dx
...
@@ -94,7 +117,7 @@ void _RectifyBackward(XTensor * gold, XTensor * y, XTensor * x,
...
@@ -94,7 +117,7 @@ void _RectifyBackward(XTensor * gold, XTensor * y, XTensor * x,
{
{
/* calculate dE/dy */
/* calculate dE/dy */
if
(
lossName
!=
NOLOSS
)
if
(
lossName
!=
NOLOSS
)
LossBackward
(
dedy
,
gold
,
y
,
lossName
);
_
LossBackward
(
dedy
,
gold
,
y
,
lossName
);
DTYPE
*
dedyp
=
(
DTYPE
*
)
dedy
->
data
;
DTYPE
*
dedyp
=
(
DTYPE
*
)
dedy
->
data
;
DTYPE
*
dedxp
=
(
DTYPE
*
)
dedx
->
data
;
DTYPE
*
dedxp
=
(
DTYPE
*
)
dedx
->
data
;
...
...
source/tensor/function/Rectify.cu
查看文件 @
906eebb7
...
@@ -134,7 +134,7 @@ void _CudaRectifyBackward(XTensor * gold, XTensor * y, XTensor * x,
...
@@ -134,7 +134,7 @@ void _CudaRectifyBackward(XTensor * gold, XTensor * y, XTensor * x,
/* calculate dE/dy */
/* calculate dE/dy */
if(lossName != NOLOSS)
if(lossName != NOLOSS)
CudaLossBackward(dedy, gold, y, lossName);
_
CudaLossBackward(dedy, gold, y, lossName);
int gridSize[3], blockSize[3];
int gridSize[3], blockSize[3];
...
...
source/tensor/function/Rectify.h
查看文件 @
906eebb7
...
@@ -28,11 +28,12 @@
...
@@ -28,11 +28,12 @@
namespace
nts
{
// namespace nts(NiuTrans.Tensor)
namespace
nts
{
// namespace nts(NiuTrans.Tensor)
/* rectify function y = max(0, x) */
/* rectify function y = max(0, x) */
extern
"C"
void
_Rectify
(
const
XTensor
*
x
,
XTensor
*
y
);
void
_Rectify
(
const
XTensor
*
x
,
XTensor
*
y
);
/* rectify function y = max(0, x) (return a XTensor structure) */
XTensor
Rectify
(
const
XTensor
&
x
);
/* de/dx */
/* de/dx */
extern
"C"
void
_RectifyBackward
(
XTensor
*
gold
,
XTensor
*
y
,
XTensor
*
x
,
void
_RectifyBackward
(
XTensor
*
gold
,
XTensor
*
y
,
XTensor
*
x
,
XTensor
*
dedy
,
XTensor
*
dedx
,
XTensor
*
dedy
,
XTensor
*
dedx
,
LOSS_FUNCTION_NAME
lossName
);
LOSS_FUNCTION_NAME
lossName
);
...
...
source/tensor/function/Sigmoid.cpp
查看文件 @
906eebb7
...
@@ -19,6 +19,7 @@
...
@@ -19,6 +19,7 @@
* $Created by: XIAO Tong (email: xiaotong@mail.neu.edu.cn) 2018-04-25
* $Created by: XIAO Tong (email: xiaotong@mail.neu.edu.cn) 2018-04-25
*/
*/
#include "../XName.h"
#include <math.h>
#include <math.h>
#include "Sigmoid.h"
#include "Sigmoid.h"
#include "Sigmoid.cuh"
#include "Sigmoid.cuh"
...
@@ -53,6 +54,27 @@ void _Sigmoid(const XTensor * x, XTensor * y)
...
@@ -53,6 +54,27 @@ void _Sigmoid(const XTensor * x, XTensor * y)
}
}
/*
/*
sigmoid function y = 1/(1+exp(-x)) (return a XTensor structure)
make a new tensor to keep the result and return it
>> x - input tensor
<< return - y
*/
XTensor
Sigmoid
(
const
XTensor
&
x
)
{
XTensor
y
(
&
x
);
y
.
SetTMP
();
/* call _Sigmoid function */
_Sigmoid
(
&
x
,
&
y
);
/* tensor connection */
XLink
::
MakeLink
(
&
x
,
NULL
,
&
y
,
FUNC_SIGMOID
);
return
y
;
}
/*
backward computation
backward computation
dE/ds = dE/dy * dy/dx
dE/ds = dE/dy * dy/dx
...
@@ -86,7 +108,7 @@ void _SigmoidBackward(XTensor * gold, XTensor * y, XTensor * x,
...
@@ -86,7 +108,7 @@ void _SigmoidBackward(XTensor * gold, XTensor * y, XTensor * x,
{
{
/* calculate dE/dy */
/* calculate dE/dy */
if
(
lossName
!=
NOLOSS
)
if
(
lossName
!=
NOLOSS
)
LossBackward
(
dedy
,
gold
,
y
,
lossName
);
_
LossBackward
(
dedy
,
gold
,
y
,
lossName
);
DTYPE
*
dedyp
=
(
DTYPE
*
)
dedy
->
data
;
DTYPE
*
dedyp
=
(
DTYPE
*
)
dedy
->
data
;
DTYPE
*
dedxp
=
(
DTYPE
*
)
dedx
->
data
;
DTYPE
*
dedxp
=
(
DTYPE
*
)
dedx
->
data
;
...
...
source/tensor/function/Sigmoid.cu
查看文件 @
906eebb7
...
@@ -129,7 +129,7 @@ void _CudaSigmoidBackward(XTensor * gold, XTensor * y, XTensor * x,
...
@@ -129,7 +129,7 @@ void _CudaSigmoidBackward(XTensor * gold, XTensor * y, XTensor * x,
if(x->dataType == DEFAULT_DTYPE && y->dataType == DEFAULT_DTYPE){
if(x->dataType == DEFAULT_DTYPE && y->dataType == DEFAULT_DTYPE){
/* calculate dE/dy */
/* calculate dE/dy */
if(lossName != NOLOSS)
if(lossName != NOLOSS)
LossBackward(dedy, gold, y, lossName);
_
LossBackward(dedy, gold, y, lossName);
int gridSize[3], blockSize[3];
int gridSize[3], blockSize[3];
...
...
source/tensor/function/Sigmoid.h
查看文件 @
906eebb7
...
@@ -28,11 +28,12 @@
...
@@ -28,11 +28,12 @@
namespace
nts
{
// namespace nts(NiuTrans.Tensor)
namespace
nts
{
// namespace nts(NiuTrans.Tensor)
/* sigmoid function y = 1/(1+exp(-x)) */
/* sigmoid function y = 1/(1+exp(-x)) */
extern
"C"
void
_Sigmoid
(
const
XTensor
*
x
,
XTensor
*
y
);
void
_Sigmoid
(
const
XTensor
*
x
,
XTensor
*
y
);
/* sigmoid function y = 1/(1+exp(-x)) (return a XTensor structure) */
XTensor
Sigmoid
(
const
XTensor
&
x
);
/* de/dx */
/* de/dx */
extern
"C"
void
_SigmoidBackward
(
XTensor
*
gold
,
XTensor
*
y
,
XTensor
*
x
,
void
_SigmoidBackward
(
XTensor
*
gold
,
XTensor
*
y
,
XTensor
*
x
,
XTensor
*
dedy
,
XTensor
*
dedx
,
XTensor
*
dedy
,
XTensor
*
dedx
,
LOSS_FUNCTION_NAME
lossName
);
LOSS_FUNCTION_NAME
lossName
);
...
...
source/tensor/function/Softmax.cpp
查看文件 @
906eebb7
...
@@ -22,6 +22,7 @@
...
@@ -22,6 +22,7 @@
#include <math.h>
#include <math.h>
#include "Softmax.h"
#include "Softmax.h"
#include "Softmax.cuh"
#include "Softmax.cuh"
#include "../XName.h"
#include "../XUtility.h"
#include "../XUtility.h"
#include "../core/reduce/ReduceSum.h"
#include "../core/reduce/ReduceSum.h"
#include "../core/reduce/ReduceMax.h"
#include "../core/reduce/ReduceMax.h"
...
@@ -130,6 +131,28 @@ void _Softmax(const XTensor * x, XTensor * y, int leadDim)
...
@@ -130,6 +131,28 @@ void _Softmax(const XTensor * x, XTensor * y, int leadDim)
}
}
/*
/*
softmax y = e^x / \sum_{i} e^{x_i} (return a XTensor structure)
make a new tensor to keep the result and return it
>> x - input vector
>> leadDim - leading dimension (along which we perform reduction)
<< return - y
*/
XTensor
Softmax
(
const
XTensor
&
x
,
int
leadDim
)
{
XTensor
y
(
&
x
);
y
.
SetTMP
();
/* call _Softmax function */
_Softmax
(
&
x
,
&
y
,
leadDim
);
/* tensor connection */
XLink
::
MakeLink
(
&
x
,
NULL
,
&
y
,
FUNC_SOFTMAX
);
return
y
;
}
/*
backward computation for dense tensors
backward computation for dense tensors
dE/dx = dE/dy * dy/dx
dE/dx = dE/dy * dy/dx
...
...
source/tensor/function/Softmax.cuh
查看文件 @
906eebb7
...
@@ -31,7 +31,7 @@ namespace nts { // namespace nts(NiuTrans.Tensor)
...
@@ -31,7 +31,7 @@ namespace nts { // namespace nts(NiuTrans.Tensor)
/* softmax y = e^x / \sum_{i} e^{x_i} (Cuda version) */
/* softmax y = e^x / \sum_{i} e^{x_i} (Cuda version) */
extern "C"
extern "C"
void _CudaSotmax(const XTensor * input, XTensor * output, int leadDim);
void _CudaSo
f
tmax(const XTensor * input, XTensor * output, int leadDim);
/* softmax y = e^x / \sum_{i} e^{x_i} (Cuda version) */
/* softmax y = e^x / \sum_{i} e^{x_i} (Cuda version) */
extern "C"
extern "C"
...
...
source/tensor/function/Softmax.h
查看文件 @
906eebb7
...
@@ -28,11 +28,12 @@
...
@@ -28,11 +28,12 @@
namespace
nts
{
// namespace nts(NiuTrans.Tensor)
namespace
nts
{
// namespace nts(NiuTrans.Tensor)
/* softmax y = e^x / \sum_{i} e^{x_i} */
/* softmax y = e^x / \sum_{i} e^{x_i} */
extern
"C"
void
_Softmax
(
const
XTensor
*
x
,
XTensor
*
y
,
int
leadDim
);
void
_Softmax
(
const
XTensor
*
x
,
XTensor
*
y
,
int
leadDim
);
/* softmax y = e^x / \sum_{i} e^{x_i} (return a XTensor structure) */
XTensor
Softmax
(
const
XTensor
&
x
,
int
leadDim
);
/* de/dx */
/* de/dx */
extern
"C"
void
_SoftmaxBackward
(
XTensor
*
gold
,
XTensor
*
y
,
XTensor
*
x
,
void
_SoftmaxBackward
(
XTensor
*
gold
,
XTensor
*
y
,
XTensor
*
x
,
XTensor
*
dedy
,
XTensor
*
dedx
,
XTensor
*
dedy
,
XTensor
*
dedx
,
int
leadDim
,
int
leadDim
,
...
...
source/tensor/test/TAbsolute.cpp
查看文件 @
906eebb7
...
@@ -51,15 +51,21 @@ bool TestAbsolute1()
...
@@ -51,15 +51,21 @@ bool TestAbsolute1()
/* create tensors */
/* create tensors */
XTensor
*
a
=
NewTensor
(
aOrder
,
aDimSize
);
XTensor
*
a
=
NewTensor
(
aOrder
,
aDimSize
);
XTensor
*
b
=
NewTensor
(
aOrder
,
aDimSize
);
XTensor
*
aMe
=
NewTensor
(
aOrder
,
aDimSize
);
XTensor
bUser
;
/* initialize variables */
/* initialize variables */
a
->
SetData
(
aData
,
aUnitNum
);
a
->
SetData
(
aData
,
aUnitNum
);
aMe
->
SetData
(
aData
,
aUnitNum
);
/* call Absolute function */
/* call Absolute function */
_Absolute
(
a
);
_Absolute
(
a
,
b
);
_AbsoluteMe
(
aMe
);
bUser
=
Absolute
(
*
a
);
/* check results */
/* check results */
cpuTest
=
a
->
CheckData
(
answer
,
aUnitNum
,
1e-4
F
);
cpuTest
=
b
->
CheckData
(
answer
,
aUnitNum
,
1e-4
F
)
&&
aMe
->
CheckData
(
answer
,
aUnitNum
,
1e-4
F
)
&&
bUser
.
CheckData
(
answer
,
aUnitNum
,
1e-4
F
);
#ifdef USE_CUDA
#ifdef USE_CUDA
/* GPU test */
/* GPU test */
...
@@ -67,25 +73,37 @@ bool TestAbsolute1()
...
@@ -67,25 +73,37 @@ bool TestAbsolute1()
/* create tensor */
/* create tensor */
XTensor
*
aGPU
=
NewTensor
(
aOrder
,
aDimSize
,
X_FLOAT
,
1.0
F
,
0
);
XTensor
*
aGPU
=
NewTensor
(
aOrder
,
aDimSize
,
X_FLOAT
,
1.0
F
,
0
);
XTensor
*
bGPU
=
NewTensor
(
aOrder
,
aDimSize
,
X_FLOAT
,
1.0
F
,
0
);
XTensor
*
aMeGPU
=
NewTensor
(
aOrder
,
aDimSize
,
X_FLOAT
,
1.0
F
,
0
);
XTensor
bUserGPU
;
/* Initialize variables */
/* Initialize variables */
aGPU
->
SetData
(
aData
,
aUnitNum
);
aGPU
->
SetData
(
aData
,
aUnitNum
);
aMeGPU
->
SetData
(
aData
,
aUnitNum
);
/* call Absolute function */
/* call Absolute function */
_Absolute
(
aGPU
);
_Absolute
(
aGPU
,
bGPU
);
_AbsoluteMe
(
aMeGPU
);
bUserGPU
=
Absolute
(
*
aGPU
);
/* check results */
/* check results */
gpuTest
=
aGPU
->
CheckData
(
answer
,
aUnitNum
,
1e-4
F
);
gpuTest
=
bGPU
->
CheckData
(
answer
,
aUnitNum
,
1e-4
F
)
&&
aMeGPU
->
CheckData
(
answer
,
aUnitNum
,
1e-4
F
)
&&
bUserGPU
.
CheckData
(
answer
,
aUnitNum
,
1e-4
F
);
/* destroy variables */
/* destroy variables */
delete
a
;
delete
a
;
delete
b
;
delete
aMe
;
delete
aGPU
;
delete
aGPU
;
delete
bGPU
;
delete
aMeGPU
;
delete
[]
aDimSize
;
delete
[]
aDimSize
;
return
cpuTest
&&
gpuTest
;
return
cpuTest
&&
gpuTest
;
#else
#else
/* destroy variables */
/* destroy variables */
delete
a
;
delete
a
;
delete
b
;
delete
aMe
;
delete
[]
aDimSize
;
delete
[]
aDimSize
;
return
cpuTest
;
return
cpuTest
;
...
...
source/tensor/test/TConcatenate.cpp
查看文件 @
906eebb7
...
@@ -76,6 +76,7 @@ bool TestConcatenate1()
...
@@ -76,6 +76,7 @@ bool TestConcatenate1()
XTensor
*
s1
=
NewTensor
(
sOrder1
,
sDimSize1
);
XTensor
*
s1
=
NewTensor
(
sOrder1
,
sDimSize1
);
XTensor
*
s2
=
NewTensor
(
sOrder2
,
sDimSize2
);
XTensor
*
s2
=
NewTensor
(
sOrder2
,
sDimSize2
);
XTensor
*
t
=
NewTensor
(
tOrder
,
tDimSize
);
XTensor
*
t
=
NewTensor
(
tOrder
,
tDimSize
);
XTensor
tUser
;
/* initialize variables */
/* initialize variables */
s1
->
SetData
(
sData1
,
sUnitNum1
);
s1
->
SetData
(
sData1
,
sUnitNum1
);
...
@@ -88,9 +89,10 @@ bool TestConcatenate1()
...
@@ -88,9 +89,10 @@ bool TestConcatenate1()
/* call Concatenate function */
/* call Concatenate function */
_Concatenate
(
sList
,
t
,
1
);
_Concatenate
(
sList
,
t
,
1
);
tUser
=
Concatenate
(
*
sList
,
1
);
/* check results */
/* check results */
cpuTest
=
t
->
CheckData
(
answer
,
tUnitNum
);
cpuTest
=
t
->
CheckData
(
answer
,
tUnitNum
)
&&
tUser
.
CheckData
(
answer
,
tUnitNum
)
;
#ifdef USE_CUDA
#ifdef USE_CUDA
/* GPU test */
/* GPU test */
...
@@ -100,6 +102,7 @@ bool TestConcatenate1()
...
@@ -100,6 +102,7 @@ bool TestConcatenate1()
XTensor
*
sGPU1
=
NewTensor
(
sOrder1
,
sDimSize1
,
X_FLOAT
,
1.0
F
,
0
);
XTensor
*
sGPU1
=
NewTensor
(
sOrder1
,
sDimSize1
,
X_FLOAT
,
1.0
F
,
0
);
XTensor
*
sGPU2
=
NewTensor
(
sOrder2
,
sDimSize2
,
X_FLOAT
,
1.0
F
,
0
);
XTensor
*
sGPU2
=
NewTensor
(
sOrder2
,
sDimSize2
,
X_FLOAT
,
1.0
F
,
0
);
XTensor
*
tGPU
=
NewTensor
(
tOrder
,
tDimSize
,
X_FLOAT
,
1.0
F
,
0
);
XTensor
*
tGPU
=
NewTensor
(
tOrder
,
tDimSize
,
X_FLOAT
,
1.0
F
,
0
);
XTensor
tUserGPU
;
/* Initialize variables */
/* Initialize variables */
sGPU1
->
SetData
(
sData1
,
sUnitNum1
);
sGPU1
->
SetData
(
sData1
,
sUnitNum1
);
...
@@ -115,9 +118,10 @@ bool TestConcatenate1()
...
@@ -115,9 +118,10 @@ bool TestConcatenate1()
/* call Concatenate function */
/* call Concatenate function */
_Concatenate
(
sList
,
tGPU
,
1
);
_Concatenate
(
sList
,
tGPU
,
1
);
tUserGPU
=
Concatenate
(
*
sList
,
1
);
/* check results */
/* check results */
gpuTest
=
tGPU
->
CheckData
(
answer
,
tUnitNum
);
gpuTest
=
tGPU
->
CheckData
(
answer
,
tUnitNum
)
&&
tUserGPU
.
CheckData
(
answer
,
tUnitNum
)
;
/* destroy variables */
/* destroy variables */
delete
sList
;
delete
sList
;
...
@@ -201,6 +205,7 @@ bool TestConcatenate2()
...
@@ -201,6 +205,7 @@ bool TestConcatenate2()
XTensor
*
s1
=
NewTensor
(
sOrder1
,
sDimSize1
);
XTensor
*
s1
=
NewTensor
(
sOrder1
,
sDimSize1
);
XTensor
*
s2
=
NewTensor
(
sOrder2
,
sDimSize2
);
XTensor
*
s2
=
NewTensor
(
sOrder2
,
sDimSize2
);
XTensor
*
t
=
NewTensor
(
tOrder
,
tDimSize
);
XTensor
*
t
=
NewTensor
(
tOrder
,
tDimSize
);
XTensor
tUser
;
/* initialize variables */
/* initialize variables */
s1
->
SetData
(
sData1
,
sUnitNum1
);
s1
->
SetData
(
sData1
,
sUnitNum1
);
...
@@ -213,9 +218,10 @@ bool TestConcatenate2()
...
@@ -213,9 +218,10 @@ bool TestConcatenate2()
/* call Concatenate function */
/* call Concatenate function */
_Concatenate
(
sList
,
t
,
0
);
_Concatenate
(
sList
,
t
,
0
);
tUser
=
Concatenate
(
*
sList
,
0
);
/* check results */
/* check results */
cpuTest
=
t
->
CheckData
(
answer
,
tUnitNum
);
cpuTest
=
t
->
CheckData
(
answer
,
tUnitNum
)
&&
tUser
.
CheckData
(
answer
,
tUnitNum
)
;
#ifdef USE_CUDA
#ifdef USE_CUDA
/* GPU test */
/* GPU test */
...
@@ -225,6 +231,7 @@ bool TestConcatenate2()
...
@@ -225,6 +231,7 @@ bool TestConcatenate2()
XTensor
*
sGPU1
=
NewTensor
(
sOrder1
,
sDimSize1
,
X_FLOAT
,
1.0
F
,
0
);
XTensor
*
sGPU1
=
NewTensor
(
sOrder1
,
sDimSize1
,
X_FLOAT
,
1.0
F
,
0
);
XTensor
*
sGPU2
=
NewTensor
(
sOrder2
,
sDimSize2
,
X_FLOAT
,
1.0
F
,
0
);
XTensor
*
sGPU2
=
NewTensor
(
sOrder2
,
sDimSize2
,
X_FLOAT
,
1.0
F
,
0
);
XTensor
*
tGPU
=
NewTensor
(
tOrder
,
tDimSize
,
X_FLOAT
,
1.0
F
,
0
);
XTensor
*
tGPU
=
NewTensor
(
tOrder
,
tDimSize
,
X_FLOAT
,
1.0
F
,
0
);
XTensor
tUserGPU
;
/* Initialize variables */
/* Initialize variables */
sGPU1
->
SetData
(
sData1
,
sUnitNum1
);
sGPU1
->
SetData
(
sData1
,
sUnitNum1
);
...
@@ -240,9 +247,10 @@ bool TestConcatenate2()
...
@@ -240,9 +247,10 @@ bool TestConcatenate2()
/* call Concatenate function */
/* call Concatenate function */
_Concatenate
(
sList
,
tGPU
,
0
);
_Concatenate
(
sList
,
tGPU
,
0
);
tUserGPU
=
Concatenate
(
*
sList
,
0
);
/* check results */
/* check results */
gpuTest
=
tGPU
->
CheckData
(
answer
,
tUnitNum
);
gpuTest
=
tGPU
->
CheckData
(
answer
,
tUnitNum
)
&&
tUserGPU
.
CheckData
(
answer
,
tUnitNum
)
;
/* destroy variables */
/* destroy variables */
delete
sList
;
delete
sList
;
...
@@ -324,6 +332,7 @@ bool TestConcatenate3()
...
@@ -324,6 +332,7 @@ bool TestConcatenate3()
XTensor
*
s1
=
NewTensor
(
sOrder1
,
sDimSize1
);
XTensor
*
s1
=
NewTensor
(
sOrder1
,
sDimSize1
);
XTensor
*
s2
=
NewTensor
(
sOrder2
,
sDimSize2
);
XTensor
*
s2
=
NewTensor
(
sOrder2
,
sDimSize2
);
XTensor
*
t
=
NewTensor
(
tOrder
,
tDimSize
);
XTensor
*
t
=
NewTensor
(
tOrder
,
tDimSize
);
XTensor
tUser
;
/* initialize variables */
/* initialize variables */
s1
->
SetData
(
sData1
,
sUnitNum1
);
s1
->
SetData
(
sData1
,
sUnitNum1
);
...
@@ -336,9 +345,10 @@ bool TestConcatenate3()
...
@@ -336,9 +345,10 @@ bool TestConcatenate3()
/* call Concatenate function */
/* call Concatenate function */
_Concatenate
(
sList
,
t
,
1
);
_Concatenate
(
sList
,
t
,
1
);
tUser
=
Concatenate
(
*
sList
,
1
);
/* check results */
/* check results */
cpuTest
=
t
->
CheckData
(
answer
,
tUnitNum
);
cpuTest
=
t
->
CheckData
(
answer
,
tUnitNum
)
&&
tUser
.
CheckData
(
answer
,
tUnitNum
)
;
#ifdef USE_CUDA
#ifdef USE_CUDA
/* GPU test */
/* GPU test */
...
@@ -348,6 +358,7 @@ bool TestConcatenate3()
...
@@ -348,6 +358,7 @@ bool TestConcatenate3()
XTensor
*
sGPU1
=
NewTensor
(
sOrder1
,
sDimSize1
,
X_FLOAT
,
1.0
F
,
0
);
XTensor
*
sGPU1
=
NewTensor
(
sOrder1
,
sDimSize1
,
X_FLOAT
,
1.0
F
,
0
);
XTensor
*
sGPU2
=
NewTensor
(
sOrder2
,
sDimSize2
,
X_FLOAT
,
1.0
F
,
0
);
XTensor
*
sGPU2
=
NewTensor
(
sOrder2
,
sDimSize2
,
X_FLOAT
,
1.0
F
,
0
);
XTensor
*
tGPU
=
NewTensor
(
tOrder
,
tDimSize
,
X_FLOAT
,
1.0
F
,
0
);
XTensor
*
tGPU
=
NewTensor
(
tOrder
,
tDimSize
,
X_FLOAT
,
1.0
F
,
0
);
XTensor
tUserGPU
;
/* Initialize variables */
/* Initialize variables */
sGPU1
->
SetData
(
sData1
,
sUnitNum1
);
sGPU1
->
SetData
(
sData1
,
sUnitNum1
);
...
@@ -363,9 +374,10 @@ bool TestConcatenate3()
...
@@ -363,9 +374,10 @@ bool TestConcatenate3()
/* call Concatenate function */
/* call Concatenate function */
_Concatenate
(
sList
,
tGPU
,
1
);
_Concatenate
(
sList
,
tGPU
,
1
);
tUserGPU
=
Concatenate
(
*
sList
,
1
);
/* check results */
/* check results */
gpuTest
=
tGPU
->
CheckData
(
answer
,
tUnitNum
);
gpuTest
=
tGPU
->
CheckData
(
answer
,
tUnitNum
)
&&
tUserGPU
.
CheckData
(
answer
,
tUnitNum
)
;
/* destroy variables */
/* destroy variables */
delete
sList
;
delete
sList
;
...
@@ -444,6 +456,7 @@ bool TestConcatenate4()
...
@@ -444,6 +456,7 @@ bool TestConcatenate4()
XTensor
*
s1
=
NewTensor
(
sOrder1
,
sDimSize1
);
XTensor
*
s1
=
NewTensor
(
sOrder1
,
sDimSize1
);
XTensor
*
s2
=
NewTensor
(
sOrder2
,
sDimSize2
);
XTensor
*
s2
=
NewTensor
(
sOrder2
,
sDimSize2
);
XTensor
*
t
=
NewTensor
(
tOrder
,
tDimSize
);
XTensor
*
t
=
NewTensor
(
tOrder
,
tDimSize
);
XTensor
tUser
;
/* initialize variables */
/* initialize variables */
s1
->
SetData
(
sData1
,
sUnitNum1
);
s1
->
SetData
(
sData1
,
sUnitNum1
);
...
@@ -452,9 +465,10 @@ bool TestConcatenate4()
...
@@ -452,9 +465,10 @@ bool TestConcatenate4()
/* call Concatenate function */
/* call Concatenate function */
_Concatenate
(
s1
,
s2
,
t
,
1
);
_Concatenate
(
s1
,
s2
,
t
,
1
);
tUser
=
Concatenate
(
*
s1
,
*
s2
,
1
);
/* check results */
/* check results */
cpuTest
=
t
->
CheckData
(
answer
,
tUnitNum
);
cpuTest
=
t
->
CheckData
(
answer
,
tUnitNum
)
&&
tUser
.
CheckData
(
answer
,
tUnitNum
)
;
#ifdef USE_CUDA
#ifdef USE_CUDA
/* GPU test */
/* GPU test */
...
@@ -464,6 +478,7 @@ bool TestConcatenate4()
...
@@ -464,6 +478,7 @@ bool TestConcatenate4()
XTensor
*
sGPU1
=
NewTensor
(
sOrder1
,
sDimSize1
,
X_FLOAT
,
1.0
F
,
0
);
XTensor
*
sGPU1
=
NewTensor
(
sOrder1
,
sDimSize1
,
X_FLOAT
,
1.0
F
,
0
);
XTensor
*
sGPU2
=
NewTensor
(
sOrder2
,
sDimSize2
,
X_FLOAT
,
1.0
F
,
0
);
XTensor
*
sGPU2
=
NewTensor
(
sOrder2
,
sDimSize2
,
X_FLOAT
,
1.0
F
,
0
);
XTensor
*
tGPU
=
NewTensor
(
tOrder
,
tDimSize
,
X_FLOAT
,
1.0
F
,
0
);
XTensor
*
tGPU
=
NewTensor
(
tOrder
,
tDimSize
,
X_FLOAT
,
1.0
F
,
0
);
XTensor
tUserGPU
;
/* Initialize variables */
/* Initialize variables */
sGPU1
->
SetData
(
sData1
,
sUnitNum1
);
sGPU1
->
SetData
(
sData1
,
sUnitNum1
);
...
@@ -472,9 +487,10 @@ bool TestConcatenate4()
...
@@ -472,9 +487,10 @@ bool TestConcatenate4()
/* call Concatenate function */
/* call Concatenate function */
_Concatenate
(
sGPU1
,
sGPU2
,
tGPU
,
1
);
_Concatenate
(
sGPU1
,
sGPU2
,
tGPU
,
1
);
tUserGPU
=
Concatenate
(
*
sGPU1
,
*
sGPU2
,
1
);
/* check results */
/* check results */
gpuTest
=
tGPU
->
CheckData
(
answer
,
tUnitNum
);
gpuTest
=
tGPU
->
CheckData
(
answer
,
tUnitNum
)
&&
tUserGPU
.
CheckData
(
answer
,
tUnitNum
)
;
/* destroy variables */
/* destroy variables */
delete
s1
;
delete
s1
;
...
...
source/tensor/test/THardTanH.cpp
查看文件 @
906eebb7
...
@@ -53,6 +53,7 @@ bool TestHardTanH1()
...
@@ -53,6 +53,7 @@ bool TestHardTanH1()
/* create tensors */
/* create tensors */
XTensor
*
x
=
NewTensor
(
order
,
dimSize
);
XTensor
*
x
=
NewTensor
(
order
,
dimSize
);
XTensor
*
y
=
NewTensor
(
order
,
dimSize
);
XTensor
*
y
=
NewTensor
(
order
,
dimSize
);
XTensor
yUser
;
/* initialize variables */
/* initialize variables */
x
->
SetData
(
xData
,
unitNum
);
x
->
SetData
(
xData
,
unitNum
);
...
@@ -60,9 +61,10 @@ bool TestHardTanH1()
...
@@ -60,9 +61,10 @@ bool TestHardTanH1()
/* call hardtanh function */
/* call hardtanh function */
_HardTanH
(
x
,
y
);
_HardTanH
(
x
,
y
);
yUser
=
HardTanH
(
*
x
);
/* check results */
/* check results */
cpuTest
=
y
->
CheckData
(
answer
,
unitNum
,
1e-4
F
);
cpuTest
=
y
->
CheckData
(
answer
,
unitNum
,
1e-4
F
)
&&
yUser
.
CheckData
(
answer
,
unitNum
,
1e-4
F
)
;
#ifdef USE_CUDA
#ifdef USE_CUDA
/* GPU test */
/* GPU test */
...
@@ -71,6 +73,7 @@ bool TestHardTanH1()
...
@@ -71,6 +73,7 @@ bool TestHardTanH1()
/* create tensor */
/* create tensor */
XTensor
*
xGPU
=
NewTensor
(
order
,
dimSize
,
X_FLOAT
,
1.0
F
,
0
);
XTensor
*
xGPU
=
NewTensor
(
order
,
dimSize
,
X_FLOAT
,
1.0
F
,
0
);
XTensor
*
yGPU
=
NewTensor
(
order
,
dimSize
,
X_FLOAT
,
1.0
F
,
0
);
XTensor
*
yGPU
=
NewTensor
(
order
,
dimSize
,
X_FLOAT
,
1.0
F
,
0
);
XTensor
yUserGPU
;
/* Initialize variables */
/* Initialize variables */
xGPU
->
SetData
(
xData
,
unitNum
);
xGPU
->
SetData
(
xData
,
unitNum
);
...
@@ -78,9 +81,10 @@ bool TestHardTanH1()
...
@@ -78,9 +81,10 @@ bool TestHardTanH1()
/* call hardtanh function */
/* call hardtanh function */
_HardTanH
(
xGPU
,
yGPU
);
_HardTanH
(
xGPU
,
yGPU
);
yUserGPU
=
HardTanH
(
*
xGPU
);
/* check results */
/* check results */
gpuTest
=
yGPU
->
CheckData
(
answer
,
unitNum
,
1e-4
F
);
gpuTest
=
yGPU
->
CheckData
(
answer
,
unitNum
,
1e-4
F
)
&&
yUserGPU
.
CheckData
(
answer
,
unitNum
,
1e-4
F
)
;
/* destroy variables */
/* destroy variables */
delete
x
;
delete
x
;
...
...
source/tensor/test/TIdentity.cpp
查看文件 @
906eebb7
...
@@ -51,6 +51,7 @@ bool TestIdentity1()
...
@@ -51,6 +51,7 @@ bool TestIdentity1()
/* create tensors */
/* create tensors */
XTensor
*
x
=
NewTensor
(
order
,
dimSize
);
XTensor
*
x
=
NewTensor
(
order
,
dimSize
);
XTensor
*
y
=
NewTensor
(
order
,
dimSize
);
XTensor
*
y
=
NewTensor
(
order
,
dimSize
);
XTensor
yUser
;
/* initialize variables */
/* initialize variables */
x
->
SetData
(
xData
,
unitNum
);
x
->
SetData
(
xData
,
unitNum
);
...
@@ -58,9 +59,10 @@ bool TestIdentity1()
...
@@ -58,9 +59,10 @@ bool TestIdentity1()
/* call Identity function */
/* call Identity function */
_Identity
(
x
,
y
);
_Identity
(
x
,
y
);
yUser
=
Identity
(
*
x
);
/* check result */
/* check result */
cpuTest
=
y
->
CheckData
(
answer
,
unitNum
);
cpuTest
=
y
->
CheckData
(
answer
,
unitNum
,
1e-4
F
)
&&
yUser
.
CheckData
(
answer
,
unitNum
,
1e-4
F
);
#ifdef USE_CUDA
#ifdef USE_CUDA
/* GPU test */
/* GPU test */
...
@@ -69,6 +71,7 @@ bool TestIdentity1()
...
@@ -69,6 +71,7 @@ bool TestIdentity1()
/* create tensors */
/* create tensors */
XTensor
*
xGPU
=
NewTensor
(
order
,
dimSize
,
X_FLOAT
,
1.0
F
,
0
);
XTensor
*
xGPU
=
NewTensor
(
order
,
dimSize
,
X_FLOAT
,
1.0
F
,
0
);
XTensor
*
yGPU
=
NewTensor
(
order
,
dimSize
,
X_FLOAT
,
1.0
F
,
0
);
XTensor
*
yGPU
=
NewTensor
(
order
,
dimSize
,
X_FLOAT
,
1.0
F
,
0
);
XTensor
yUserGPU
;
/* initialize variables */
/* initialize variables */
xGPU
->
SetData
(
xData
,
unitNum
);
xGPU
->
SetData
(
xData
,
unitNum
);
...
@@ -76,9 +79,10 @@ bool TestIdentity1()
...
@@ -76,9 +79,10 @@ bool TestIdentity1()
/* call Identity function */
/* call Identity function */
_Identity
(
xGPU
,
yGPU
);
_Identity
(
xGPU
,
yGPU
);
yUserGPU
=
Identity
(
*
xGPU
);
/* check result */
/* check result */
gpuTest
=
yGPU
->
CheckData
(
answer
,
unitNum
);
gpuTest
=
yGPU
->
CheckData
(
answer
,
unitNum
,
1e-4
F
)
&&
yUserGPU
.
CheckData
(
answer
,
unitNum
,
1e-4
F
);
/* destroy variables */
/* destroy variables */
delete
x
;
delete
x
;
...
...
source/tensor/test/TLog.cpp
查看文件 @
906eebb7
...
@@ -51,15 +51,21 @@ bool TestLog1()
...
@@ -51,15 +51,21 @@ bool TestLog1()
/* create tensors */
/* create tensors */
XTensor
*
a
=
NewTensor
(
aOrder
,
aDimSize
);
XTensor
*
a
=
NewTensor
(
aOrder
,
aDimSize
);
XTensor
*
b
=
NewTensor
(
aOrder
,
aDimSize
);
XTensor
*
aMe
=
NewTensor
(
aOrder
,
aDimSize
);
XTensor
bUser
;
/* initialize variables */
/* initialize variables */
a
->
SetData
(
aData
,
aUnitNum
);
a
->
SetData
(
aData
,
aUnitNum
);
aMe
->
SetData
(
aData
,
aUnitNum
);
/* call Log function */
/* call Log function */
_Log
(
a
);
_Log
(
a
,
b
);
_LogMe
(
aMe
);
bUser
=
Log
(
*
a
);
/* check results */
/* check results */
cpuTest
=
a
->
CheckData
(
answer
,
aUnitNum
,
1e-4
F
);
cpuTest
=
b
->
CheckData
(
answer
,
aUnitNum
,
1e-4
F
)
&&
aMe
->
CheckData
(
answer
,
aUnitNum
,
1e-4
F
)
&&
bUser
.
CheckData
(
answer
,
aUnitNum
,
1e-4
F
);
#ifdef USE_CUDA
#ifdef USE_CUDA
/* GPU test */
/* GPU test */
...
@@ -67,25 +73,37 @@ bool TestLog1()
...
@@ -67,25 +73,37 @@ bool TestLog1()
/* create tensor */
/* create tensor */
XTensor
*
aGPU
=
NewTensor
(
aOrder
,
aDimSize
,
X_FLOAT
,
1.0
F
,
0
);
XTensor
*
aGPU
=
NewTensor
(
aOrder
,
aDimSize
,
X_FLOAT
,
1.0
F
,
0
);
XTensor
*
bGPU
=
NewTensor
(
aOrder
,
aDimSize
,
X_FLOAT
,
1.0
F
,
0
);
XTensor
*
aMeGPU
=
NewTensor
(
aOrder
,
aDimSize
,
X_FLOAT
,
1.0
F
,
0
);
XTensor
bUserGPU
;
/* Initialize variables */
/* Initialize variables */
aGPU
->
SetData
(
aData
,
aUnitNum
);
aGPU
->
SetData
(
aData
,
aUnitNum
);
aMeGPU
->
SetData
(
aData
,
aUnitNum
);
/* call Log function */
/* call Log function */
_Log
(
aGPU
);
_Log
(
aGPU
,
bGPU
);
_LogMe
(
aMeGPU
);
bUserGPU
=
Log
(
*
aGPU
);
/* check results */
/* check results */
gpuTest
=
aGPU
->
CheckData
(
answer
,
aUnitNum
,
1e-4
F
);
gpuTest
=
bGPU
->
CheckData
(
answer
,
aUnitNum
,
1e-4
F
)
&&
aMeGPU
->
CheckData
(
answer
,
aUnitNum
,
1e-4
F
)
&&
bUserGPU
.
CheckData
(
answer
,
aUnitNum
,
1e-4
F
);
/* destroy variables */
/* destroy variables */
delete
a
;
delete
a
;
delete
b
;
delete
aMe
;
delete
aGPU
;
delete
aGPU
;
delete
bGPU
;
delete
aMeGPU
;
delete
[]
aDimSize
;
delete
[]
aDimSize
;
return
cpuTest
&&
gpuTest
;
return
cpuTest
&&
gpuTest
;
#else
#else
/* destroy variables */
/* destroy variables */
delete
a
;
delete
a
;
delete
b
;
delete
aMe
;
delete
[]
aDimSize
;
delete
[]
aDimSize
;
return
cpuTest
;
return
cpuTest
;
...
...
source/tensor/test/TLogSoftmax.cpp
查看文件 @
906eebb7
...
@@ -51,6 +51,7 @@ bool TestLogSoftmax1()
...
@@ -51,6 +51,7 @@ bool TestLogSoftmax1()
/* create tensors */
/* create tensors */
XTensor
*
x
=
NewTensor
(
order
,
dimSize
);
XTensor
*
x
=
NewTensor
(
order
,
dimSize
);
XTensor
*
y
=
NewTensor
(
order
,
dimSize
);
XTensor
*
y
=
NewTensor
(
order
,
dimSize
);
XTensor
yUser
;
/* initialize variables */
/* initialize variables */
x
->
SetData
(
xData
,
unitNum
);
x
->
SetData
(
xData
,
unitNum
);
...
@@ -58,9 +59,10 @@ bool TestLogSoftmax1()
...
@@ -58,9 +59,10 @@ bool TestLogSoftmax1()
/* call LogSoftmax function */
/* call LogSoftmax function */
_LogSoftmax
(
x
,
y
,
1
);
_LogSoftmax
(
x
,
y
,
1
);
yUser
=
LogSoftmax
(
*
x
,
1
);
/* check result */
/* check result */
cpuTest
=
y
->
CheckData
(
answer
,
unitNum
,
1e-4
F
);
cpuTest
=
y
->
CheckData
(
answer
,
unitNum
,
1e-4
F
)
&&
yUser
.
CheckData
(
answer
,
unitNum
,
1e-4
F
);
#ifdef USE_CUDA
#ifdef USE_CUDA
/* GPU test */
/* GPU test */
...
@@ -69,6 +71,7 @@ bool TestLogSoftmax1()
...
@@ -69,6 +71,7 @@ bool TestLogSoftmax1()
/* create tensors */
/* create tensors */
XTensor
*
xGPU
=
NewTensor
(
order
,
dimSize
,
X_FLOAT
,
1.0
F
,
0
);
XTensor
*
xGPU
=
NewTensor
(
order
,
dimSize
,
X_FLOAT
,
1.0
F
,
0
);
XTensor
*
yGPU
=
NewTensor
(
order
,
dimSize
,
X_FLOAT
,
1.0
F
,
0
);
XTensor
*
yGPU
=
NewTensor
(
order
,
dimSize
,
X_FLOAT
,
1.0
F
,
0
);
XTensor
yUserGPU
;
/* initialize variables */
/* initialize variables */
xGPU
->
SetData
(
xData
,
unitNum
);
xGPU
->
SetData
(
xData
,
unitNum
);
...
@@ -76,9 +79,10 @@ bool TestLogSoftmax1()
...
@@ -76,9 +79,10 @@ bool TestLogSoftmax1()
/* call LogSoftmax function */
/* call LogSoftmax function */
_LogSoftmax
(
xGPU
,
yGPU
,
1
);
_LogSoftmax
(
xGPU
,
yGPU
,
1
);
yUserGPU
=
LogSoftmax
(
*
xGPU
,
1
);
/* check result */
/* check result */
gpuTest
=
yGPU
->
CheckData
(
answer
,
unitNum
,
1e-4
F
);
gpuTest
=
yGPU
->
CheckData
(
answer
,
unitNum
,
1e-4
F
)
&&
yUserGPU
.
CheckData
(
answer
,
unitNum
,
1e-4
F
);
/* destroy variables */
/* destroy variables */
delete
x
;
delete
x
;
...
...
source/tensor/test/TLoss.cpp
查看文件 @
906eebb7
...
@@ -46,6 +46,7 @@ bool TestLoss1()
...
@@ -46,6 +46,7 @@ bool TestLoss1()
bool
cpuTest
=
true
;
bool
cpuTest
=
true
;
DTYPE
answer
=
5.0
F
;
DTYPE
answer
=
5.0
F
;
DTYPE
error
;
/* create tensors */
/* create tensors */
XTensor
*
output
=
NewTensor
(
order
,
dimSize
);
XTensor
*
output
=
NewTensor
(
order
,
dimSize
);
...
@@ -57,8 +58,8 @@ bool TestLoss1()
...
@@ -57,8 +58,8 @@ bool TestLoss1()
_ScaleAndShiftMe
(
output
,
1
,
1
);
_ScaleAndShiftMe
(
output
,
1
,
1
);
_ScaleAndShiftMe
(
gold
,
1
,
2
);
_ScaleAndShiftMe
(
gold
,
1
,
2
);
DTYPE
error
;
/* call LossCompute function */
error
=
LossCompute
(
gold
,
output
,
SQUAREDERROR
,
false
,
0
,
0
,
dimSize
[
0
],
0
);
error
=
_
LossCompute
(
gold
,
output
,
SQUAREDERROR
,
false
,
0
,
0
,
dimSize
[
0
],
0
);
/* check results */
/* check results */
cpuTest
=
(
error
==
answer
);
cpuTest
=
(
error
==
answer
);
...
@@ -78,7 +79,7 @@ bool TestLoss1()
...
@@ -78,7 +79,7 @@ bool TestLoss1()
_ScaleAndShiftMe
(
goldGPU
,
1
,
2
);
_ScaleAndShiftMe
(
goldGPU
,
1
,
2
);
/* call LossCompute function */
/* call LossCompute function */
error
=
LossCompute
(
goldGPU
,
outputGPU
,
SQUAREDERROR
,
false
,
0
,
0
,
dimSize
[
0
],
0
);
error
=
_
LossCompute
(
goldGPU
,
outputGPU
,
SQUAREDERROR
,
false
,
0
,
0
,
dimSize
[
0
],
0
);
/* check results */
/* check results */
gpuTest
=
(
error
==
answer
);
gpuTest
=
(
error
==
answer
);
...
@@ -123,6 +124,7 @@ bool TestLoss2()
...
@@ -123,6 +124,7 @@ bool TestLoss2()
bool
cpuTest
=
true
;
bool
cpuTest
=
true
;
DTYPE
answer
=
0.0
F
;
DTYPE
answer
=
0.0
F
;
DTYPE
error
;
/* create tensors */
/* create tensors */
XTensor
*
output
=
NewTensor
(
order
,
dimSize
);
XTensor
*
output
=
NewTensor
(
order
,
dimSize
);
...
@@ -134,8 +136,8 @@ bool TestLoss2()
...
@@ -134,8 +136,8 @@ bool TestLoss2()
_ScaleAndShiftMe
(
output
,
1
,
1
);
_ScaleAndShiftMe
(
output
,
1
,
1
);
_ScaleAndShiftMe
(
gold
,
1
,
2
);
_ScaleAndShiftMe
(
gold
,
1
,
2
);
DTYPE
error
;
/* call LossCompute function */
error
=
LossCompute
(
gold
,
output
,
CROSSENTROPY
,
false
,
0
,
0
,
dimSize
[
0
],
0
);
error
=
_
LossCompute
(
gold
,
output
,
CROSSENTROPY
,
false
,
0
,
0
,
dimSize
[
0
],
0
);
/* check results */
/* check results */
cpuTest
=
(
error
==
answer
);
cpuTest
=
(
error
==
answer
);
...
@@ -155,7 +157,7 @@ bool TestLoss2()
...
@@ -155,7 +157,7 @@ bool TestLoss2()
_ScaleAndShiftMe
(
goldGPU
,
1
,
2
);
_ScaleAndShiftMe
(
goldGPU
,
1
,
2
);
/* call LossCompute function */
/* call LossCompute function */
error
=
LossCompute
(
goldGPU
,
outputGPU
,
CROSSENTROPY
,
false
,
0
,
0
,
dimSize
[
0
],
0
);
error
=
_
LossCompute
(
goldGPU
,
outputGPU
,
CROSSENTROPY
,
false
,
0
,
0
,
dimSize
[
0
],
0
);
/* check results */
/* check results */
gpuTest
=
(
error
==
answer
);
gpuTest
=
(
error
==
answer
);
...
@@ -210,6 +212,7 @@ bool TestLoss3()
...
@@ -210,6 +212,7 @@ bool TestLoss3()
bool
cpuTest
=
true
;
bool
cpuTest
=
true
;
DTYPE
answer
=
0.25
F
;
DTYPE
answer
=
0.25
F
;
DTYPE
error
;
/* create tensors */
/* create tensors */
XTensor
*
output
=
NewTensor
(
order
,
dimSize
);
XTensor
*
output
=
NewTensor
(
order
,
dimSize
);
...
@@ -219,8 +222,8 @@ bool TestLoss3()
...
@@ -219,8 +222,8 @@ bool TestLoss3()
output
->
SetData
(
outputData
,
unitNum
);
output
->
SetData
(
outputData
,
unitNum
);
gold
->
SetData
(
goldData
,
unitNum
);
gold
->
SetData
(
goldData
,
unitNum
);
DTYPE
error
;
/* call LossCompute function */
error
=
LossCompute
(
gold
,
output
,
ONEHOTERROR
,
false
,
0
,
0
,
dimSize
[
0
],
0
);
error
=
_
LossCompute
(
gold
,
output
,
ONEHOTERROR
,
false
,
0
,
0
,
dimSize
[
0
],
0
);
/* check results */
/* check results */
cpuTest
=
(
error
==
answer
);
cpuTest
=
(
error
==
answer
);
...
@@ -238,7 +241,7 @@ bool TestLoss3()
...
@@ -238,7 +241,7 @@ bool TestLoss3()
goldGPU
->
SetData
(
goldData
,
unitNum
);
goldGPU
->
SetData
(
goldData
,
unitNum
);
/* call LossCompute function */
/* call LossCompute function */
error
=
LossCompute
(
goldGPU
,
outputGPU
,
ONEHOTERROR
,
false
,
0
,
0
,
dimSize
[
0
],
0
);
error
=
_
LossCompute
(
goldGPU
,
outputGPU
,
ONEHOTERROR
,
false
,
0
,
0
,
dimSize
[
0
],
0
);
/* check results */
/* check results */
gpuTest
=
(
error
==
answer
);
gpuTest
=
(
error
==
answer
);
...
...
source/tensor/test/TMatrixMulBatched.cpp
查看文件 @
906eebb7
...
@@ -75,6 +75,7 @@ bool TestMatrixMulBatched1()
...
@@ -75,6 +75,7 @@ bool TestMatrixMulBatched1()
XTensor
*
s1
=
NewTensor
(
sOrder1
,
sDimSize1
);
XTensor
*
s1
=
NewTensor
(
sOrder1
,
sDimSize1
);
XTensor
*
s2
=
NewTensor
(
sOrder2
,
sDimSize2
);
XTensor
*
s2
=
NewTensor
(
sOrder2
,
sDimSize2
);
XTensor
*
t
=
NewTensor
(
tOrder
,
tDimSize
);
XTensor
*
t
=
NewTensor
(
tOrder
,
tDimSize
);
XTensor
tUser
;
/* initialize variables */
/* initialize variables */
s1
->
SetData
(
sData1
,
sUnitNum1
);
s1
->
SetData
(
sData1
,
sUnitNum1
);
...
@@ -83,9 +84,10 @@ bool TestMatrixMulBatched1()
...
@@ -83,9 +84,10 @@ bool TestMatrixMulBatched1()
/* call MatrixMulBatched function */
/* call MatrixMulBatched function */
_MatrixMulBatched
(
s1
,
X_NOTRANS
,
s2
,
X_NOTRANS
,
t
);
_MatrixMulBatched
(
s1
,
X_NOTRANS
,
s2
,
X_NOTRANS
,
t
);
tUser
=
MatrixMulBatched
(
*
s1
,
X_NOTRANS
,
*
s2
,
X_NOTRANS
);
/* check results */
/* check results */
cpuTest
=
t
->
CheckData
(
answer
,
tUnitNum
);
cpuTest
=
t
->
CheckData
(
answer
,
tUnitNum
)
&&
tUser
.
CheckData
(
answer
,
tUnitNum
)
;
#ifdef USE_CUDA
#ifdef USE_CUDA
/* GPU test */
/* GPU test */
...
@@ -95,6 +97,7 @@ bool TestMatrixMulBatched1()
...
@@ -95,6 +97,7 @@ bool TestMatrixMulBatched1()
XTensor
*
sGPU1
=
NewTensor
(
sOrder1
,
sDimSize1
,
X_FLOAT
,
1.0
F
,
0
);
XTensor
*
sGPU1
=
NewTensor
(
sOrder1
,
sDimSize1
,
X_FLOAT
,
1.0
F
,
0
);
XTensor
*
sGPU2
=
NewTensor
(
sOrder2
,
sDimSize2
,
X_FLOAT
,
1.0
F
,
0
);
XTensor
*
sGPU2
=
NewTensor
(
sOrder2
,
sDimSize2
,
X_FLOAT
,
1.0
F
,
0
);
XTensor
*
tGPU
=
NewTensor
(
tOrder
,
tDimSize
,
X_FLOAT
,
1.0
F
,
0
);
XTensor
*
tGPU
=
NewTensor
(
tOrder
,
tDimSize
,
X_FLOAT
,
1.0
F
,
0
);
XTensor
tUserGPU
;
/* Initialize variables */
/* Initialize variables */
sGPU1
->
SetData
(
sData1
,
sUnitNum1
);
sGPU1
->
SetData
(
sData1
,
sUnitNum1
);
...
@@ -103,9 +106,10 @@ bool TestMatrixMulBatched1()
...
@@ -103,9 +106,10 @@ bool TestMatrixMulBatched1()
/* call MatrixMulBatched function */
/* call MatrixMulBatched function */
_MatrixMulBatched
(
sGPU1
,
X_NOTRANS
,
sGPU2
,
X_NOTRANS
,
tGPU
);
_MatrixMulBatched
(
sGPU1
,
X_NOTRANS
,
sGPU2
,
X_NOTRANS
,
tGPU
);
tUserGPU
=
MatrixMulBatched
(
*
sGPU1
,
X_NOTRANS
,
*
sGPU2
,
X_NOTRANS
);
/* check results */
/* check results */
gpuTest
=
tGPU
->
CheckData
(
answer
,
tUnitNum
);
gpuTest
=
tGPU
->
CheckData
(
answer
,
tUnitNum
)
&&
tUserGPU
.
CheckData
(
answer
,
tUnitNum
)
;
/* destroy variables */
/* destroy variables */
delete
s1
;
delete
s1
;
...
@@ -193,6 +197,7 @@ bool TestMatrixMulBatched2()
...
@@ -193,6 +197,7 @@ bool TestMatrixMulBatched2()
XTensor
*
s1
=
NewTensor
(
sOrder1
,
sDimSize1
);
XTensor
*
s1
=
NewTensor
(
sOrder1
,
sDimSize1
);
XTensor
*
s2
=
NewTensor
(
sOrder2
,
sDimSize2
);
XTensor
*
s2
=
NewTensor
(
sOrder2
,
sDimSize2
);
XTensor
*
t
=
NewTensor
(
tOrder
,
tDimSize
);
XTensor
*
t
=
NewTensor
(
tOrder
,
tDimSize
);
XTensor
tUser
;
/* initialize variables */
/* initialize variables */
s1
->
SetData
(
sData1
,
sUnitNum1
);
s1
->
SetData
(
sData1
,
sUnitNum1
);
...
@@ -201,9 +206,10 @@ bool TestMatrixMulBatched2()
...
@@ -201,9 +206,10 @@ bool TestMatrixMulBatched2()
/* call MatrixMulBatched function */
/* call MatrixMulBatched function */
_MatrixMulBatched
(
s1
,
X_NOTRANS
,
s2
,
X_NOTRANS
,
t
);
_MatrixMulBatched
(
s1
,
X_NOTRANS
,
s2
,
X_NOTRANS
,
t
);
tUser
=
MatrixMulBatched
(
*
s1
,
X_NOTRANS
,
*
s2
,
X_NOTRANS
);
/* check results */
/* check results */
cpuTest
=
t
->
CheckData
(
answer
,
tUnitNum
);
cpuTest
=
t
->
CheckData
(
answer
,
tUnitNum
)
&&
tUser
.
CheckData
(
answer
,
tUnitNum
)
;
#ifdef USE_CUDA
#ifdef USE_CUDA
/* GPU test */
/* GPU test */
...
@@ -213,6 +219,7 @@ bool TestMatrixMulBatched2()
...
@@ -213,6 +219,7 @@ bool TestMatrixMulBatched2()
XTensor
*
sGPU1
=
NewTensor
(
sOrder1
,
sDimSize1
,
X_FLOAT
,
1.0
F
,
0
);
XTensor
*
sGPU1
=
NewTensor
(
sOrder1
,
sDimSize1
,
X_FLOAT
,
1.0
F
,
0
);
XTensor
*
sGPU2
=
NewTensor
(
sOrder2
,
sDimSize2
,
X_FLOAT
,
1.0
F
,
0
);
XTensor
*
sGPU2
=
NewTensor
(
sOrder2
,
sDimSize2
,
X_FLOAT
,
1.0
F
,
0
);
XTensor
*
tGPU
=
NewTensor
(
tOrder
,
tDimSize
,
X_FLOAT
,
1.0
F
,
0
);
XTensor
*
tGPU
=
NewTensor
(
tOrder
,
tDimSize
,
X_FLOAT
,
1.0
F
,
0
);
XTensor
tUserGPU
;
/* Initialize variables */
/* Initialize variables */
sGPU1
->
SetData
(
sData1
,
sUnitNum1
);
sGPU1
->
SetData
(
sData1
,
sUnitNum1
);
...
@@ -221,9 +228,10 @@ bool TestMatrixMulBatched2()
...
@@ -221,9 +228,10 @@ bool TestMatrixMulBatched2()
/* call MatrixMulBatched function */
/* call MatrixMulBatched function */
_MatrixMulBatched
(
sGPU1
,
X_NOTRANS
,
sGPU2
,
X_NOTRANS
,
tGPU
);
_MatrixMulBatched
(
sGPU1
,
X_NOTRANS
,
sGPU2
,
X_NOTRANS
,
tGPU
);
tUserGPU
=
MatrixMulBatched
(
*
sGPU1
,
X_NOTRANS
,
*
sGPU2
,
X_NOTRANS
);
/* check results */
/* check results */
gpuTest
=
tGPU
->
CheckData
(
answer
,
tUnitNum
);
gpuTest
=
tGPU
->
CheckData
(
answer
,
tUnitNum
)
&&
tUserGPU
.
CheckData
(
answer
,
tUnitNum
)
;
/* destroy variables */
/* destroy variables */
delete
s1
;
delete
s1
;
...
...
source/tensor/test/TMerge.cpp
查看文件 @
906eebb7
...
@@ -60,16 +60,18 @@ bool TestMerge1()
...
@@ -60,16 +60,18 @@ bool TestMerge1()
/* create tensors */
/* create tensors */
XTensor
*
s
=
NewTensor
(
sOrder
,
sDimSize
);
XTensor
*
s
=
NewTensor
(
sOrder
,
sDimSize
);
XTensor
*
t
=
NewTensor
(
tOrder
,
tDimSize
);
XTensor
*
t
=
NewTensor
(
tOrder
,
tDimSize
);
XTensor
tUser
;
/* initialize variables */
/* initialize variables */
s
->
SetData
(
sData
,
sUnitNum
);
s
->
SetData
(
sData
,
sUnitNum
);
t
->
SetZeroAll
();
t
->
SetZeroAll
();
/* call
m
erge function */
/* call
M
erge function */
_Merge
(
s
,
t
,
1
,
0
);
_Merge
(
s
,
t
,
1
,
0
);
tUser
=
Merge
(
*
s
,
1
,
0
);
/* check results */
/* check results */
cpuTest
=
t
->
CheckData
(
answer
,
tUnitNum
);
cpuTest
=
t
->
CheckData
(
answer
,
tUnitNum
)
&&
tUser
.
CheckData
(
answer
,
tUnitNum
)
;
#ifdef USE_CUDA
#ifdef USE_CUDA
/* GPU test */
/* GPU test */
...
@@ -78,16 +80,18 @@ bool TestMerge1()
...
@@ -78,16 +80,18 @@ bool TestMerge1()
/* create tensor */
/* create tensor */
XTensor
*
sGPU
=
NewTensor
(
sOrder
,
sDimSize
,
X_FLOAT
,
1.0
F
,
0
);
XTensor
*
sGPU
=
NewTensor
(
sOrder
,
sDimSize
,
X_FLOAT
,
1.0
F
,
0
);
XTensor
*
tGPU
=
NewTensor
(
tOrder
,
tDimSize
,
X_FLOAT
,
1.0
F
,
0
);
XTensor
*
tGPU
=
NewTensor
(
tOrder
,
tDimSize
,
X_FLOAT
,
1.0
F
,
0
);
XTensor
tUserGPU
;
/* Initialize variables */
/* Initialize variables */
sGPU
->
SetData
(
sData
,
sUnitNum
);
sGPU
->
SetData
(
sData
,
sUnitNum
);
tGPU
->
SetZeroAll
();
tGPU
->
SetZeroAll
();
/* call
m
erge function */
/* call
M
erge function */
_Merge
(
sGPU
,
tGPU
,
1
,
0
);
_Merge
(
sGPU
,
tGPU
,
1
,
0
);
tUserGPU
=
Merge
(
*
sGPU
,
1
,
0
);
/* check results */
/* check results */
gpuTest
=
tGPU
->
CheckData
(
answer
,
tUnitNum
);
gpuTest
=
tGPU
->
CheckData
(
answer
,
tUnitNum
)
&&
tUserGPU
.
CheckData
(
answer
,
tUnitNum
);
/* destroy variables */
/* destroy variables */
delete
s
;
delete
s
;
...
@@ -166,18 +170,23 @@ bool TestMerge2()
...
@@ -166,18 +170,23 @@ bool TestMerge2()
XTensor
*
s
=
NewTensor
(
sOrder
,
sDimSize
);
XTensor
*
s
=
NewTensor
(
sOrder
,
sDimSize
);
XTensor
*
t1
=
NewTensor
(
tOrder1
,
tDimSize1
);
XTensor
*
t1
=
NewTensor
(
tOrder1
,
tDimSize1
);
XTensor
*
t2
=
NewTensor
(
tOrder2
,
tDimSize2
);
XTensor
*
t2
=
NewTensor
(
tOrder2
,
tDimSize2
);
XTensor
tUser1
;
XTensor
tUser2
;
/* initialize variables */
/* initialize variables */
s
->
SetData
(
sData
,
sUnitNum
);
s
->
SetData
(
sData
,
sUnitNum
);
t1
->
SetZeroAll
();
t1
->
SetZeroAll
();
t2
->
SetZeroAll
();
t2
->
SetZeroAll
();
/* call
m
erge function */
/* call
M
erge function */
_Merge
(
s
,
t1
,
1
,
0
);
_Merge
(
s
,
t1
,
1
,
0
);
_Merge
(
s
,
t2
,
2
,
0
);
_Merge
(
s
,
t2
,
2
,
0
);
tUser1
=
Merge
(
*
s
,
1
,
0
);
tUser2
=
Merge
(
*
s
,
2
,
0
);
/* check results */
/* check results */
cpuTest
=
t1
->
CheckData
(
answer1
,
tUnitNum1
)
&&
t2
->
CheckData
(
answer2
,
tUnitNum2
);
cpuTest
=
t1
->
CheckData
(
answer1
,
tUnitNum1
)
&&
tUser1
.
CheckData
(
answer1
,
tUnitNum1
)
&&
t2
->
CheckData
(
answer2
,
tUnitNum2
)
&&
tUser2
.
CheckData
(
answer2
,
tUnitNum2
);
#ifdef USE_CUDA
#ifdef USE_CUDA
/* GPU test */
/* GPU test */
...
@@ -187,18 +196,23 @@ bool TestMerge2()
...
@@ -187,18 +196,23 @@ bool TestMerge2()
XTensor
*
sGPU
=
NewTensor
(
sOrder
,
sDimSize
,
X_FLOAT
,
1.0
F
,
0
);
XTensor
*
sGPU
=
NewTensor
(
sOrder
,
sDimSize
,
X_FLOAT
,
1.0
F
,
0
);
XTensor
*
tGPU1
=
NewTensor
(
tOrder1
,
tDimSize1
,
X_FLOAT
,
1.0
F
,
0
);
XTensor
*
tGPU1
=
NewTensor
(
tOrder1
,
tDimSize1
,
X_FLOAT
,
1.0
F
,
0
);
XTensor
*
tGPU2
=
NewTensor
(
tOrder2
,
tDimSize2
,
X_FLOAT
,
1.0
F
,
0
);
XTensor
*
tGPU2
=
NewTensor
(
tOrder2
,
tDimSize2
,
X_FLOAT
,
1.0
F
,
0
);
XTensor
tUserGPU1
;
XTensor
tUserGPU2
;
/* Initialize variables */
/* Initialize variables */
sGPU
->
SetData
(
sData
,
sUnitNum
);
sGPU
->
SetData
(
sData
,
sUnitNum
);
tGPU1
->
SetZeroAll
();
tGPU1
->
SetZeroAll
();
tGPU2
->
SetZeroAll
();
tGPU2
->
SetZeroAll
();
/* call
m
erge function */
/* call
M
erge function */
_Merge
(
sGPU
,
tGPU1
,
1
,
0
);
_Merge
(
sGPU
,
tGPU1
,
1
,
0
);
_Merge
(
sGPU
,
tGPU2
,
2
,
0
);
_Merge
(
sGPU
,
tGPU2
,
2
,
0
);
tUserGPU1
=
Merge
(
*
sGPU
,
1
,
0
);
tUserGPU2
=
Merge
(
*
sGPU
,
2
,
0
);
/* check results */
/* check results */
gpuTest
=
tGPU1
->
CheckData
(
answer1
,
tUnitNum1
)
&&
tGPU2
->
CheckData
(
answer2
,
tUnitNum2
);
gpuTest
=
tGPU1
->
CheckData
(
answer1
,
tUnitNum1
)
&&
tUserGPU1
.
CheckData
(
answer1
,
tUnitNum1
)
&&
tGPU2
->
CheckData
(
answer2
,
tUnitNum2
)
&&
tUserGPU2
.
CheckData
(
answer2
,
tUnitNum2
);
/* destroy variables */
/* destroy variables */
delete
s
;
delete
s
;
...
@@ -271,6 +285,7 @@ bool TestMerge3()
...
@@ -271,6 +285,7 @@ bool TestMerge3()
XTensor
*
s1
=
NewTensor
(
sOrder
,
sDimSize
);
XTensor
*
s1
=
NewTensor
(
sOrder
,
sDimSize
);
XTensor
*
s2
=
NewTensor
(
sOrder
,
sDimSize
);
XTensor
*
s2
=
NewTensor
(
sOrder
,
sDimSize
);
XTensor
*
t
=
NewTensor
(
tOrder
,
tDimSize
);
XTensor
*
t
=
NewTensor
(
tOrder
,
tDimSize
);
XTensor
tUser
;
/* initialize variables */
/* initialize variables */
s1
->
SetData
(
sData1
,
sUnitNum
);
s1
->
SetData
(
sData1
,
sUnitNum
);
...
@@ -281,11 +296,12 @@ bool TestMerge3()
...
@@ -281,11 +296,12 @@ bool TestMerge3()
smallList
->
Add
(
s1
);
smallList
->
Add
(
s1
);
smallList
->
Add
(
s2
);
smallList
->
Add
(
s2
);
/* call
m
erge function */
/* call
M
erge function */
_Merge
(
smallList
,
t
,
0
);
_Merge
(
smallList
,
t
,
0
);
tUser
=
Merge
(
*
smallList
,
0
);
/* check results */
/* check results */
cpuTest
=
t
->
CheckData
(
answer
,
tUnitNum
);
cpuTest
=
t
->
CheckData
(
answer
,
tUnitNum
)
&&
tUser
.
CheckData
(
answer
,
tUnitNum
)
;
#ifdef USE_CUDA
#ifdef USE_CUDA
/* GPU test */
/* GPU test */
...
@@ -298,6 +314,7 @@ bool TestMerge3()
...
@@ -298,6 +314,7 @@ bool TestMerge3()
XTensor
*
sGPU1
=
NewTensor
(
sOrder
,
sDimSize
,
X_FLOAT
,
1.0
F
,
0
);
XTensor
*
sGPU1
=
NewTensor
(
sOrder
,
sDimSize
,
X_FLOAT
,
1.0
F
,
0
);
XTensor
*
sGPU2
=
NewTensor
(
sOrder
,
sDimSize
,
X_FLOAT
,
1.0
F
,
0
);
XTensor
*
sGPU2
=
NewTensor
(
sOrder
,
sDimSize
,
X_FLOAT
,
1.0
F
,
0
);
XTensor
*
tGPU
=
NewTensor
(
tOrder
,
tDimSize
);
XTensor
*
tGPU
=
NewTensor
(
tOrder
,
tDimSize
);
XTensor
tUserGPU
;
/* initialize variables */
/* initialize variables */
sGPU1
->
SetData
(
sData1
,
sUnitNum
);
sGPU1
->
SetData
(
sData1
,
sUnitNum
);
...
@@ -308,11 +325,12 @@ bool TestMerge3()
...
@@ -308,11 +325,12 @@ bool TestMerge3()
smallList
->
Add
(
sGPU1
);
smallList
->
Add
(
sGPU1
);
smallList
->
Add
(
sGPU2
);
smallList
->
Add
(
sGPU2
);
/* call
m
erge function */
/* call
M
erge function */
_Merge
(
smallList
,
tGPU
,
0
);
_Merge
(
smallList
,
tGPU
,
0
);
tUserGPU
=
Merge
(
*
smallList
,
0
);
/* check results */
/* check results */
cpuTest
=
tGPU
->
CheckData
(
answer
,
tUnitNum
);
gpuTest
=
tGPU
->
CheckData
(
answer
,
tUnitNum
)
&&
tUserGPU
.
CheckData
(
answer
,
tUnitNum
);
/* destroy variables */
/* destroy variables */
delete
s1
;
delete
s1
;
...
@@ -383,6 +401,7 @@ bool TestMerge4()
...
@@ -383,6 +401,7 @@ bool TestMerge4()
XTensor
*
s1
=
NewTensor
(
sOrder
,
sDimSize
);
XTensor
*
s1
=
NewTensor
(
sOrder
,
sDimSize
);
XTensor
*
s2
=
NewTensor
(
sOrder
,
sDimSize
);
XTensor
*
s2
=
NewTensor
(
sOrder
,
sDimSize
);
XTensor
*
t
=
NewTensor
(
tOrder
,
tDimSize
);
XTensor
*
t
=
NewTensor
(
tOrder
,
tDimSize
);
XTensor
tUser
;
/* initialize variables */
/* initialize variables */
s1
->
SetData
(
sData1
,
sUnitNum
);
s1
->
SetData
(
sData1
,
sUnitNum
);
...
@@ -393,11 +412,12 @@ bool TestMerge4()
...
@@ -393,11 +412,12 @@ bool TestMerge4()
smallList
->
Add
(
s1
);
smallList
->
Add
(
s1
);
smallList
->
Add
(
s2
);
smallList
->
Add
(
s2
);
/* call
m
erge function */
/* call
M
erge function */
_Merge
(
smallList
,
t
,
1
);
_Merge
(
smallList
,
t
,
1
);
tUser
=
Merge
(
*
smallList
,
1
);
/* check results */
/* check results */
cpuTest
=
t
->
CheckData
(
answer
,
tUnitNum
);
cpuTest
=
t
->
CheckData
(
answer
,
tUnitNum
)
&&
tUser
.
CheckData
(
answer
,
tUnitNum
)
;
#ifdef USE_CUDA
#ifdef USE_CUDA
/* GPU test */
/* GPU test */
...
@@ -410,6 +430,7 @@ bool TestMerge4()
...
@@ -410,6 +430,7 @@ bool TestMerge4()
XTensor
*
sGPU1
=
NewTensor
(
sOrder
,
sDimSize
,
X_FLOAT
,
1.0
F
,
0
);
XTensor
*
sGPU1
=
NewTensor
(
sOrder
,
sDimSize
,
X_FLOAT
,
1.0
F
,
0
);
XTensor
*
sGPU2
=
NewTensor
(
sOrder
,
sDimSize
,
X_FLOAT
,
1.0
F
,
0
);
XTensor
*
sGPU2
=
NewTensor
(
sOrder
,
sDimSize
,
X_FLOAT
,
1.0
F
,
0
);
XTensor
*
tGPU
=
NewTensor
(
tOrder
,
tDimSize
);
XTensor
*
tGPU
=
NewTensor
(
tOrder
,
tDimSize
);
XTensor
tUserGPU
;
/* initialize variables */
/* initialize variables */
sGPU1
->
SetData
(
sData1
,
sUnitNum
);
sGPU1
->
SetData
(
sData1
,
sUnitNum
);
...
@@ -420,11 +441,12 @@ bool TestMerge4()
...
@@ -420,11 +441,12 @@ bool TestMerge4()
smallList
->
Add
(
sGPU1
);
smallList
->
Add
(
sGPU1
);
smallList
->
Add
(
sGPU2
);
smallList
->
Add
(
sGPU2
);
/* call
m
erge function */
/* call
M
erge function */
_Merge
(
smallList
,
tGPU
,
1
);
_Merge
(
smallList
,
tGPU
,
1
);
tUserGPU
=
Merge
(
*
smallList
,
1
);
/* check results */
/* check results */
cpuTest
=
tGPU
->
CheckData
(
answer
,
tUnitNum
);
gpuTest
=
tGPU
->
CheckData
(
answer
,
tUnitNum
)
&&
tUserGPU
.
CheckData
(
answer
,
tUnitNum
);
/* destroy variables */
/* destroy variables */
delete
s1
;
delete
s1
;
...
...
source/tensor/test/TNegate.cpp
查看文件 @
906eebb7
...
@@ -48,15 +48,21 @@ bool TestNegate1()
...
@@ -48,15 +48,21 @@ bool TestNegate1()
/* create tensors */
/* create tensors */
XTensor
*
a
=
NewTensor
(
aOrder
,
aDimSize
);
XTensor
*
a
=
NewTensor
(
aOrder
,
aDimSize
);
XTensor
*
b
=
NewTensor
(
aOrder
,
aDimSize
);
XTensor
*
aMe
=
NewTensor
(
aOrder
,
aDimSize
);
XTensor
bUser
;
/* initialize variables */
/* initialize variables */
a
->
SetData
(
aData
,
aUnitNum
);
a
->
SetData
(
aData
,
aUnitNum
);
aMe
->
SetData
(
aData
,
aUnitNum
);
/* call Negate function */
/* call Negate function */
_Negate
(
a
);
_Negate
(
a
,
b
);
_NegateMe
(
aMe
);
bUser
=
Negate
(
*
a
);
/* check results */
/* check results */
cpuTest
=
a
->
CheckData
(
answer
,
aUnitNum
);
cpuTest
=
b
->
CheckData
(
answer
,
aUnitNum
,
1e-4
F
)
&&
aMe
->
CheckData
(
answer
,
aUnitNum
,
1e-4
F
)
&&
bUser
.
CheckData
(
answer
,
aUnitNum
,
1e-4
F
);
#ifdef USE_CUDA
#ifdef USE_CUDA
/* GPU test */
/* GPU test */
...
@@ -64,25 +70,37 @@ bool TestNegate1()
...
@@ -64,25 +70,37 @@ bool TestNegate1()
/* create tensor */
/* create tensor */
XTensor
*
aGPU
=
NewTensor
(
aOrder
,
aDimSize
,
X_FLOAT
,
1.0
F
,
0
);
XTensor
*
aGPU
=
NewTensor
(
aOrder
,
aDimSize
,
X_FLOAT
,
1.0
F
,
0
);
XTensor
*
bGPU
=
NewTensor
(
aOrder
,
aDimSize
,
X_FLOAT
,
1.0
F
,
0
);
XTensor
*
aMeGPU
=
NewTensor
(
aOrder
,
aDimSize
,
X_FLOAT
,
1.0
F
,
0
);
XTensor
bUserGPU
;
/* Initialize variables */
/* Initialize variables */
aGPU
->
SetData
(
aData
,
aUnitNum
);
aGPU
->
SetData
(
aData
,
aUnitNum
);
aMeGPU
->
SetData
(
aData
,
aUnitNum
);
/* call Negate function */
/* call Negate function */
_Negate
(
aGPU
);
_Negate
(
aGPU
,
bGPU
);
_NegateMe
(
aMeGPU
);
bUserGPU
=
Negate
(
*
aGPU
);
/* check results */
/* check results */
gpuTest
=
aGPU
->
CheckData
(
answer
,
aUnitNum
);
gpuTest
=
bGPU
->
CheckData
(
answer
,
aUnitNum
,
1e-4
F
)
&&
aMeGPU
->
CheckData
(
answer
,
aUnitNum
,
1e-4
F
)
&&
bUserGPU
.
CheckData
(
answer
,
aUnitNum
,
1e-4
F
);
/* destroy variables */
/* destroy variables */
delete
a
;
delete
a
;
delete
b
;
delete
aMe
;
delete
aGPU
;
delete
aGPU
;
delete
bGPU
;
delete
aMeGPU
;
delete
[]
aDimSize
;
delete
[]
aDimSize
;
return
cpuTest
&&
gpuTest
;
return
cpuTest
&&
gpuTest
;
#else
#else
/* destroy variables */
/* destroy variables */
delete
a
;
delete
a
;
delete
b
;
delete
aMe
;
delete
[]
aDimSize
;
delete
[]
aDimSize
;
return
cpuTest
;
return
cpuTest
;
...
@@ -114,15 +132,21 @@ bool TestNegate2()
...
@@ -114,15 +132,21 @@ bool TestNegate2()
/* create tensors */
/* create tensors */
XTensor
*
a
=
NewTensor
(
aOrder
,
aDimSize
);
XTensor
*
a
=
NewTensor
(
aOrder
,
aDimSize
);
XTensor
*
b
=
NewTensor
(
aOrder
,
aDimSize
);
XTensor
*
aMe
=
NewTensor
(
aOrder
,
aDimSize
);
XTensor
bUser
;
/* initialize variables */
/* initialize variables */
a
->
SetData
(
aData
,
aUnitNum
);
a
->
SetData
(
aData
,
aUnitNum
);
aMe
->
SetData
(
aData
,
aUnitNum
);
/* call Negate function */
/* call Negate function */
_Negate
(
a
);
_Negate
(
a
,
b
);
_NegateMe
(
aMe
);
bUser
=
Negate
(
*
a
);
/* check results */
/* check results */
cpuTest
=
a
->
CheckData
(
answer
,
aUnitNum
);
cpuTest
=
b
->
CheckData
(
answer
,
aUnitNum
,
1e-4
F
)
&&
aMe
->
CheckData
(
answer
,
aUnitNum
,
1e-4
F
)
&&
bUser
.
CheckData
(
answer
,
aUnitNum
,
1e-4
F
);
#ifdef USE_CUDA
#ifdef USE_CUDA
/* GPU test */
/* GPU test */
...
@@ -130,25 +154,37 @@ bool TestNegate2()
...
@@ -130,25 +154,37 @@ bool TestNegate2()
/* create tensor */
/* create tensor */
XTensor
*
aGPU
=
NewTensor
(
aOrder
,
aDimSize
,
X_FLOAT
,
1.0
F
,
0
);
XTensor
*
aGPU
=
NewTensor
(
aOrder
,
aDimSize
,
X_FLOAT
,
1.0
F
,
0
);
XTensor
*
bGPU
=
NewTensor
(
aOrder
,
aDimSize
,
X_FLOAT
,
1.0
F
,
0
);
XTensor
*
aMeGPU
=
NewTensor
(
aOrder
,
aDimSize
,
X_FLOAT
,
1.0
F
,
0
);
XTensor
bUserGPU
;
/* Initialize variables */
/* Initialize variables */
aGPU
->
SetData
(
aData
,
aUnitNum
);
aGPU
->
SetData
(
aData
,
aUnitNum
);
aMeGPU
->
SetData
(
aData
,
aUnitNum
);
/* call Negate function */
/* call Negate function */
_Negate
(
aGPU
);
_Negate
(
aGPU
,
bGPU
);
_NegateMe
(
aMeGPU
);
bUserGPU
=
Negate
(
*
aGPU
);
/* check results */
/* check results */
gpuTest
=
aGPU
->
CheckData
(
answer
,
aUnitNum
);
gpuTest
=
bGPU
->
CheckData
(
answer
,
aUnitNum
,
1e-4
F
)
&&
aMeGPU
->
CheckData
(
answer
,
aUnitNum
,
1e-4
F
)
&&
bUserGPU
.
CheckData
(
answer
,
aUnitNum
,
1e-4
F
);
/* destroy variables */
/* destroy variables */
delete
a
;
delete
a
;
delete
b
;
delete
aMe
;
delete
aGPU
;
delete
aGPU
;
delete
bGPU
;
delete
aMeGPU
;
delete
[]
aDimSize
;
delete
[]
aDimSize
;
return
cpuTest
&&
gpuTest
;
return
cpuTest
&&
gpuTest
;
#else
#else
/* destroy variables */
/* destroy variables */
delete
a
;
delete
a
;
delete
b
;
delete
aMe
;
delete
[]
aDimSize
;
delete
[]
aDimSize
;
return
cpuTest
;
return
cpuTest
;
...
...
source/tensor/test/TPower.cpp
查看文件 @
906eebb7
...
@@ -52,15 +52,21 @@ bool TestPower1()
...
@@ -52,15 +52,21 @@ bool TestPower1()
/* create tensors */
/* create tensors */
XTensor
*
a
=
NewTensor
(
aOrder
,
aDimSize
);
XTensor
*
a
=
NewTensor
(
aOrder
,
aDimSize
);
XTensor
*
b
=
NewTensor
(
aOrder
,
aDimSize
);
XTensor
*
aMe
=
NewTensor
(
aOrder
,
aDimSize
);
XTensor
bUser
;
/* initialize variables */
/* initialize variables */
a
->
SetData
(
aData
,
aUnitNum
);
a
->
SetData
(
aData
,
aUnitNum
);
aMe
->
SetData
(
aData
,
aUnitNum
);
/* call Power function */
/* call Power function */
_Power
(
a
,
2.0
F
);
_Power
(
a
,
b
,
2.0
F
);
_PowerMe
(
aMe
,
2.0
F
);
bUser
=
Power
(
*
a
,
2.0
F
);
/* check results */
/* check results */
cpuTest
=
a
->
CheckData
(
answer
,
aUnitNum
,
1e-4
F
);
cpuTest
=
b
->
CheckData
(
answer
,
aUnitNum
,
1e-4
F
)
&&
aMe
->
CheckData
(
answer
,
aUnitNum
,
1e-4
F
)
&&
bUser
.
CheckData
(
answer
,
aUnitNum
,
1e-4
F
);
#ifdef USE_CUDA
#ifdef USE_CUDA
/* GPU test */
/* GPU test */
...
@@ -68,25 +74,37 @@ bool TestPower1()
...
@@ -68,25 +74,37 @@ bool TestPower1()
/* create tensor */
/* create tensor */
XTensor
*
aGPU
=
NewTensor
(
aOrder
,
aDimSize
,
X_FLOAT
,
1.0
F
,
0
);
XTensor
*
aGPU
=
NewTensor
(
aOrder
,
aDimSize
,
X_FLOAT
,
1.0
F
,
0
);
XTensor
*
bGPU
=
NewTensor
(
aOrder
,
aDimSize
,
X_FLOAT
,
1.0
F
,
0
);
XTensor
*
aMeGPU
=
NewTensor
(
aOrder
,
aDimSize
,
X_FLOAT
,
1.0
F
,
0
);
XTensor
bUserGPU
;
/* Initialize variables */
/* Initialize variables */
aGPU
->
SetData
(
aData
,
aUnitNum
);
aGPU
->
SetData
(
aData
,
aUnitNum
);
aMeGPU
->
SetData
(
aData
,
aUnitNum
);
/* call power function */
/* call power function */
_Power
(
aGPU
,
2.0
F
);
_Power
(
aGPU
,
bGPU
,
2.0
F
);
_PowerMe
(
aMeGPU
,
2.0
F
);
bUserGPU
=
Power
(
*
aGPU
,
2.0
F
);
/* check results */
/* check results */
gpuTest
=
aGPU
->
CheckData
(
answer
,
aUnitNum
,
1e-4
F
);
gpuTest
=
bGPU
->
CheckData
(
answer
,
aUnitNum
,
1e-4
F
)
&&
aMeGPU
->
CheckData
(
answer
,
aUnitNum
,
1e-4
F
)
&&
bUserGPU
.
CheckData
(
answer
,
aUnitNum
,
1e-4
F
);
/* destroy variables */
/* destroy variables */
delete
a
;
delete
a
;
delete
b
;
delete
aMe
;
delete
aGPU
;
delete
aGPU
;
delete
bGPU
;
delete
aMeGPU
;
delete
[]
aDimSize
;
delete
[]
aDimSize
;
return
cpuTest
&&
gpuTest
;
return
cpuTest
&&
gpuTest
;
#else
#else
/* destroy variables */
/* destroy variables */
delete
a
;
delete
a
;
delete
b
;
delete
aMe
;
delete
[]
aDimSize
;
delete
[]
aDimSize
;
return
cpuTest
;
return
cpuTest
;
...
@@ -121,15 +139,21 @@ bool TestPower2()
...
@@ -121,15 +139,21 @@ bool TestPower2()
/* create tensors */
/* create tensors */
XTensor
*
a
=
NewTensor
(
aOrder
,
aDimSize
);
XTensor
*
a
=
NewTensor
(
aOrder
,
aDimSize
);
XTensor
*
b
=
NewTensor
(
aOrder
,
aDimSize
);
XTensor
*
aMe
=
NewTensor
(
aOrder
,
aDimSize
);
XTensor
bUser
;
/* initialize variables */
/* initialize variables */
a
->
SetData
(
aData
,
aUnitNum
);
a
->
SetData
(
aData
,
aUnitNum
);
aMe
->
SetData
(
aData
,
aUnitNum
);
/* call Power function */
/* call Power function */
_Power
(
a
,
1.0
F
);
_Power
(
a
,
b
,
1.0
F
);
_PowerMe
(
aMe
,
1.0
F
);
bUser
=
Power
(
*
a
,
1.0
F
);
/* check results */
/* check results */
cpuTest
=
a
->
CheckData
(
answer
,
aUnitNum
,
1e-4
F
);
cpuTest
=
b
->
CheckData
(
answer
,
aUnitNum
,
1e-4
F
)
&&
aMe
->
CheckData
(
answer
,
aUnitNum
,
1e-4
F
)
&&
bUser
.
CheckData
(
answer
,
aUnitNum
,
1e-4
F
);
#ifdef USE_CUDA
#ifdef USE_CUDA
/* GPU test */
/* GPU test */
...
@@ -137,25 +161,37 @@ bool TestPower2()
...
@@ -137,25 +161,37 @@ bool TestPower2()
/* create tensor */
/* create tensor */
XTensor
*
aGPU
=
NewTensor
(
aOrder
,
aDimSize
,
X_FLOAT
,
1.0
F
,
0
);
XTensor
*
aGPU
=
NewTensor
(
aOrder
,
aDimSize
,
X_FLOAT
,
1.0
F
,
0
);
XTensor
*
bGPU
=
NewTensor
(
aOrder
,
aDimSize
,
X_FLOAT
,
1.0
F
,
0
);
XTensor
*
aMeGPU
=
NewTensor
(
aOrder
,
aDimSize
,
X_FLOAT
,
1.0
F
,
0
);
XTensor
bUserGPU
;
/* Initialize variables */
/* Initialize variables */
aGPU
->
SetData
(
aData
,
aUnitNum
);
aGPU
->
SetData
(
aData
,
aUnitNum
);
aMeGPU
->
SetData
(
aData
,
aUnitNum
);
/* call Power function */
/* call Power function */
_Power
(
aGPU
,
1.0
F
);
_Power
(
aGPU
,
bGPU
,
1.0
F
);
_PowerMe
(
aMeGPU
,
1.0
F
);
bUserGPU
=
Power
(
*
aGPU
,
1.0
F
);
/* check results */
/* check results */
gpuTest
=
aGPU
->
CheckData
(
answer
,
aUnitNum
,
1e-4
F
);
gpuTest
=
bGPU
->
CheckData
(
answer
,
aUnitNum
,
1e-4
F
)
&&
aMeGPU
->
CheckData
(
answer
,
aUnitNum
,
1e-4
F
)
&&
bUserGPU
.
CheckData
(
answer
,
aUnitNum
,
1e-4
F
);
/* destroy variables */
/* destroy variables */
delete
a
;
delete
a
;
delete
b
;
delete
aMe
;
delete
aGPU
;
delete
aGPU
;
delete
bGPU
;
delete
aMeGPU
;
delete
[]
aDimSize
;
delete
[]
aDimSize
;
return
cpuTest
&&
gpuTest
;
return
cpuTest
&&
gpuTest
;
#else
#else
/* destroy variables */
/* destroy variables */
delete
a
;
delete
a
;
delete
b
;
delete
aMe
;
delete
[]
aDimSize
;
delete
[]
aDimSize
;
return
cpuTest
;
return
cpuTest
;
...
@@ -190,15 +226,21 @@ bool TestPower3()
...
@@ -190,15 +226,21 @@ bool TestPower3()
/* create tensors */
/* create tensors */
XTensor
*
a
=
NewTensor
(
aOrder
,
aDimSize
);
XTensor
*
a
=
NewTensor
(
aOrder
,
aDimSize
);
XTensor
*
b
=
NewTensor
(
aOrder
,
aDimSize
);
XTensor
*
aMe
=
NewTensor
(
aOrder
,
aDimSize
);
XTensor
bUser
;
/* initialize variables */
/* initialize variables */
a
->
SetData
(
aData
,
aUnitNum
);
a
->
SetData
(
aData
,
aUnitNum
);
aMe
->
SetData
(
aData
,
aUnitNum
);
/* call Power function */
/* call Power function */
_Power
(
a
,
0.0
F
);
_Power
(
a
,
b
,
0.0
F
);
_PowerMe
(
aMe
,
0.0
F
);
bUser
=
Power
(
*
a
,
0.0
F
);
/* check results */
/* check results */
cpuTest
=
a
->
CheckData
(
answer
,
aUnitNum
,
1e-4
F
);
cpuTest
=
b
->
CheckData
(
answer
,
aUnitNum
,
1e-4
F
)
&&
aMe
->
CheckData
(
answer
,
aUnitNum
,
1e-4
F
)
&&
bUser
.
CheckData
(
answer
,
aUnitNum
,
1e-4
F
);
#ifdef USE_CUDA
#ifdef USE_CUDA
/* GPU test */
/* GPU test */
...
@@ -206,25 +248,37 @@ bool TestPower3()
...
@@ -206,25 +248,37 @@ bool TestPower3()
/* create tensor */
/* create tensor */
XTensor
*
aGPU
=
NewTensor
(
aOrder
,
aDimSize
,
X_FLOAT
,
1.0
F
,
0
);
XTensor
*
aGPU
=
NewTensor
(
aOrder
,
aDimSize
,
X_FLOAT
,
1.0
F
,
0
);
XTensor
*
bGPU
=
NewTensor
(
aOrder
,
aDimSize
,
X_FLOAT
,
1.0
F
,
0
);
XTensor
*
aMeGPU
=
NewTensor
(
aOrder
,
aDimSize
,
X_FLOAT
,
1.0
F
,
0
);
XTensor
bUserGPU
;
/* Initialize variables */
/* Initialize variables */
aGPU
->
SetData
(
aData
,
aUnitNum
);
aGPU
->
SetData
(
aData
,
aUnitNum
);
aMeGPU
->
SetData
(
aData
,
aUnitNum
);
/* call Power function */
/* call Power function */
_Power
(
aGPU
,
0.0
F
);
_Power
(
aGPU
,
bGPU
,
0.0
F
);
_PowerMe
(
aMeGPU
,
0.0
F
);
bUserGPU
=
Power
(
*
aGPU
,
0.0
F
);
/* check results */
/* check results */
gpuTest
=
aGPU
->
CheckData
(
answer
,
aUnitNum
,
1e-4
F
);
gpuTest
=
bGPU
->
CheckData
(
answer
,
aUnitNum
,
1e-4
F
)
&&
aMeGPU
->
CheckData
(
answer
,
aUnitNum
,
1e-4
F
)
&&
bUserGPU
.
CheckData
(
answer
,
aUnitNum
,
1e-4
F
);
/* destroy variables */
/* destroy variables */
delete
a
;
delete
a
;
delete
b
;
delete
aMe
;
delete
aGPU
;
delete
aGPU
;
delete
bGPU
;
delete
aMeGPU
;
delete
[]
aDimSize
;
delete
[]
aDimSize
;
return
cpuTest
&&
gpuTest
;
return
cpuTest
&&
gpuTest
;
#else
#else
/* destroy variables */
/* destroy variables */
delete
a
;
delete
a
;
delete
b
;
delete
aMe
;
delete
[]
aDimSize
;
delete
[]
aDimSize
;
return
cpuTest
;
return
cpuTest
;
...
...
source/tensor/test/TRectify.cpp
查看文件 @
906eebb7
...
@@ -50,6 +50,7 @@ bool TestRectify1()
...
@@ -50,6 +50,7 @@ bool TestRectify1()
/* create tensors */
/* create tensors */
XTensor
*
x
=
NewTensor
(
order
,
dimSize
);
XTensor
*
x
=
NewTensor
(
order
,
dimSize
);
XTensor
*
y
=
NewTensor
(
order
,
dimSize
);
XTensor
*
y
=
NewTensor
(
order
,
dimSize
);
XTensor
yUser
;
/* initialize variables */
/* initialize variables */
x
->
SetData
(
xData
,
unitNum
);
x
->
SetData
(
xData
,
unitNum
);
...
@@ -57,9 +58,10 @@ bool TestRectify1()
...
@@ -57,9 +58,10 @@ bool TestRectify1()
/* call Rectify function */
/* call Rectify function */
_Rectify
(
x
,
y
);
_Rectify
(
x
,
y
);
yUser
=
Rectify
(
*
x
);
/* check results */
/* check results */
cpuTest
=
y
->
CheckData
(
answer
,
unitNum
);
cpuTest
=
y
->
CheckData
(
answer
,
unitNum
,
1e-4
F
)
&&
yUser
.
CheckData
(
answer
,
unitNum
,
1e-4
F
);
#ifdef USE_CUDA
#ifdef USE_CUDA
/* GPU test */
/* GPU test */
...
@@ -68,6 +70,7 @@ bool TestRectify1()
...
@@ -68,6 +70,7 @@ bool TestRectify1()
/* create tensor */
/* create tensor */
XTensor
*
xGPU
=
NewTensor
(
order
,
dimSize
,
X_FLOAT
,
1.0
F
,
0
);
XTensor
*
xGPU
=
NewTensor
(
order
,
dimSize
,
X_FLOAT
,
1.0
F
,
0
);
XTensor
*
yGPU
=
NewTensor
(
order
,
dimSize
,
X_FLOAT
,
1.0
F
,
0
);
XTensor
*
yGPU
=
NewTensor
(
order
,
dimSize
,
X_FLOAT
,
1.0
F
,
0
);
XTensor
yUserGPU
;
/* Initialize variables */
/* Initialize variables */
xGPU
->
SetData
(
xData
,
unitNum
);
xGPU
->
SetData
(
xData
,
unitNum
);
...
@@ -75,9 +78,10 @@ bool TestRectify1()
...
@@ -75,9 +78,10 @@ bool TestRectify1()
/* call Rectify function */
/* call Rectify function */
_Rectify
(
xGPU
,
yGPU
);
_Rectify
(
xGPU
,
yGPU
);
yUserGPU
=
Rectify
(
*
xGPU
);
/* check results */
/* check results */
gpuTest
=
yGPU
->
CheckData
(
answer
,
unitNum
);
gpuTest
=
yGPU
->
CheckData
(
answer
,
unitNum
,
1e-4
F
)
&&
yUserGPU
.
CheckData
(
answer
,
unitNum
,
1e-4
F
);
/* destroy variables */
/* destroy variables */
delete
x
;
delete
x
;
...
...
source/tensor/test/TReduceMax.cpp
查看文件 @
906eebb7
...
@@ -71,6 +71,8 @@ bool TestReduceMax1()
...
@@ -71,6 +71,8 @@ bool TestReduceMax1()
XTensor
*
s
=
NewTensor
(
sOrder
,
sDimSize
);
XTensor
*
s
=
NewTensor
(
sOrder
,
sDimSize
);
XTensor
*
t1
=
NewTensor
(
tOrder1
,
tDimSize1
);
XTensor
*
t1
=
NewTensor
(
tOrder1
,
tDimSize1
);
XTensor
*
t2
=
NewTensor
(
tOrder2
,
tDimSize2
);
XTensor
*
t2
=
NewTensor
(
tOrder2
,
tDimSize2
);
XTensor
tUser1
;
XTensor
tUser2
;
/* initialize variables */
/* initialize variables */
s
->
SetData
(
sData
,
sUnitNum
);
s
->
SetData
(
sData
,
sUnitNum
);
...
@@ -80,9 +82,12 @@ bool TestReduceMax1()
...
@@ -80,9 +82,12 @@ bool TestReduceMax1()
/* call ReduceMax function */
/* call ReduceMax function */
_ReduceMax
(
s
,
t1
,
0
);
_ReduceMax
(
s
,
t1
,
0
);
_ReduceMax
(
s
,
t2
,
1
);
_ReduceMax
(
s
,
t2
,
1
);
tUser1
=
ReduceMax
(
*
s
,
0
);
tUser2
=
ReduceMax
(
*
s
,
1
);
/* check results */
/* check results */
cpuTest
=
t1
->
CheckData
(
answer1
,
tUnitNum1
)
&&
t2
->
CheckData
(
answer2
,
tUnitNum2
);
cpuTest
=
t1
->
CheckData
(
answer1
,
tUnitNum1
)
&&
tUser1
.
CheckData
(
answer1
,
tUnitNum1
)
&&
t2
->
CheckData
(
answer2
,
tUnitNum2
)
&&
tUser2
.
CheckData
(
answer2
,
tUnitNum2
);
#ifdef USE_CUDA
#ifdef USE_CUDA
/* GPU test */
/* GPU test */
...
@@ -92,6 +97,8 @@ bool TestReduceMax1()
...
@@ -92,6 +97,8 @@ bool TestReduceMax1()
XTensor
*
sGPU
=
NewTensor
(
sOrder
,
sDimSize
,
X_FLOAT
,
1.0
F
,
0
);
XTensor
*
sGPU
=
NewTensor
(
sOrder
,
sDimSize
,
X_FLOAT
,
1.0
F
,
0
);
XTensor
*
tGPU1
=
NewTensor
(
tOrder1
,
tDimSize1
,
X_FLOAT
,
1.0
F
,
0
);
XTensor
*
tGPU1
=
NewTensor
(
tOrder1
,
tDimSize1
,
X_FLOAT
,
1.0
F
,
0
);
XTensor
*
tGPU2
=
NewTensor
(
tOrder2
,
tDimSize2
,
X_FLOAT
,
1.0
F
,
0
);
XTensor
*
tGPU2
=
NewTensor
(
tOrder2
,
tDimSize2
,
X_FLOAT
,
1.0
F
,
0
);
XTensor
tUserGPU1
;
XTensor
tUserGPU2
;
/* initialize variables */
/* initialize variables */
sGPU
->
SetData
(
sData
,
sUnitNum
);
sGPU
->
SetData
(
sData
,
sUnitNum
);
...
@@ -101,9 +108,12 @@ bool TestReduceMax1()
...
@@ -101,9 +108,12 @@ bool TestReduceMax1()
/* call ReduceMax function */
/* call ReduceMax function */
_ReduceMax
(
sGPU
,
tGPU1
,
0
);
_ReduceMax
(
sGPU
,
tGPU1
,
0
);
_ReduceMax
(
sGPU
,
tGPU2
,
1
);
_ReduceMax
(
sGPU
,
tGPU2
,
1
);
tUserGPU1
=
ReduceMax
(
*
sGPU
,
0
);
tUserGPU2
=
ReduceMax
(
*
sGPU
,
1
);
/* check results */
/* check results */
gpuTest
=
tGPU1
->
CheckData
(
answer1
,
tUnitNum1
)
&&
tGPU2
->
CheckData
(
answer2
,
tUnitNum2
);
gpuTest
=
tGPU1
->
CheckData
(
answer1
,
tUnitNum1
)
&&
tUserGPU1
.
CheckData
(
answer1
,
tUnitNum1
)
&&
tGPU2
->
CheckData
(
answer2
,
tUnitNum2
)
&&
tUserGPU2
.
CheckData
(
answer2
,
tUnitNum2
);
/* destroy variables */
/* destroy variables */
delete
s
;
delete
s
;
...
...
source/tensor/test/TReduceMean.cpp
查看文件 @
906eebb7
...
@@ -66,6 +66,8 @@ bool TestReduceMean1()
...
@@ -66,6 +66,8 @@ bool TestReduceMean1()
XTensor
*
s
=
NewTensor
(
sOrder
,
sDimSize
);
XTensor
*
s
=
NewTensor
(
sOrder
,
sDimSize
);
XTensor
*
t1
=
NewTensor
(
tOrder1
,
tDimSize1
);
XTensor
*
t1
=
NewTensor
(
tOrder1
,
tDimSize1
);
XTensor
*
t2
=
NewTensor
(
tOrder2
,
tDimSize2
);
XTensor
*
t2
=
NewTensor
(
tOrder2
,
tDimSize2
);
XTensor
tUser1
;
XTensor
tUser2
;
/* initialize variables */
/* initialize variables */
s
->
SetData
(
sData
,
sUnitNum
);
s
->
SetData
(
sData
,
sUnitNum
);
...
@@ -75,9 +77,12 @@ bool TestReduceMean1()
...
@@ -75,9 +77,12 @@ bool TestReduceMean1()
/* call ReduceMean function */
/* call ReduceMean function */
_ReduceMean
(
s
,
t1
,
0
);
_ReduceMean
(
s
,
t1
,
0
);
_ReduceMean
(
s
,
t2
,
1
);
_ReduceMean
(
s
,
t2
,
1
);
tUser1
=
ReduceMean
(
*
s
,
0
);
tUser2
=
ReduceMean
(
*
s
,
1
);
/* check results */
/* check results */
cpuTest
=
t1
->
CheckData
(
answer1
,
tUnitNum1
)
&&
t2
->
CheckData
(
answer2
,
tUnitNum2
);
cpuTest
=
t1
->
CheckData
(
answer1
,
tUnitNum1
)
&&
tUser1
.
CheckData
(
answer1
,
tUnitNum1
)
&&
t2
->
CheckData
(
answer2
,
tUnitNum2
)
&&
tUser2
.
CheckData
(
answer2
,
tUnitNum2
);
#ifdef USE_CUDA
#ifdef USE_CUDA
/* GPU test */
/* GPU test */
...
@@ -87,6 +92,8 @@ bool TestReduceMean1()
...
@@ -87,6 +92,8 @@ bool TestReduceMean1()
XTensor
*
sGPU
=
NewTensor
(
sOrder
,
sDimSize
,
X_FLOAT
,
1.0
F
,
0
);
XTensor
*
sGPU
=
NewTensor
(
sOrder
,
sDimSize
,
X_FLOAT
,
1.0
F
,
0
);
XTensor
*
tGPU1
=
NewTensor
(
tOrder1
,
tDimSize1
,
X_FLOAT
,
1.0
F
,
0
);
XTensor
*
tGPU1
=
NewTensor
(
tOrder1
,
tDimSize1
,
X_FLOAT
,
1.0
F
,
0
);
XTensor
*
tGPU2
=
NewTensor
(
tOrder2
,
tDimSize2
,
X_FLOAT
,
1.0
F
,
0
);
XTensor
*
tGPU2
=
NewTensor
(
tOrder2
,
tDimSize2
,
X_FLOAT
,
1.0
F
,
0
);
XTensor
tUserGPU1
;
XTensor
tUserGPU2
;
/* Initialize variables */
/* Initialize variables */
sGPU
->
SetData
(
sData
,
sUnitNum
);
sGPU
->
SetData
(
sData
,
sUnitNum
);
...
@@ -96,9 +103,12 @@ bool TestReduceMean1()
...
@@ -96,9 +103,12 @@ bool TestReduceMean1()
/* call ReduceMean function */
/* call ReduceMean function */
_ReduceMean
(
sGPU
,
tGPU1
,
0
);
_ReduceMean
(
sGPU
,
tGPU1
,
0
);
_ReduceMean
(
sGPU
,
tGPU2
,
1
);
_ReduceMean
(
sGPU
,
tGPU2
,
1
);
tUserGPU1
=
ReduceMean
(
*
sGPU
,
0
);
tUserGPU2
=
ReduceMean
(
*
sGPU
,
1
);
/* check results */
/* check results */
cpuTest
=
tGPU1
->
CheckData
(
answer1
,
tUnitNum1
)
&&
tGPU2
->
CheckData
(
answer2
,
tUnitNum2
);
gpuTest
=
tGPU1
->
CheckData
(
answer1
,
tUnitNum1
)
&&
tUserGPU1
.
CheckData
(
answer1
,
tUnitNum1
)
&&
tGPU2
->
CheckData
(
answer2
,
tUnitNum2
)
&&
tUserGPU2
.
CheckData
(
answer2
,
tUnitNum2
);
/* destroy variables */
/* destroy variables */
delete
s
;
delete
s
;
...
...
source/tensor/test/TReduceSum.cpp
查看文件 @
906eebb7
...
@@ -69,20 +69,29 @@ bool TestReduceSum1()
...
@@ -69,20 +69,29 @@ bool TestReduceSum1()
/* create tensors */
/* create tensors */
XTensor
*
s
=
NewTensor
(
sOrder
,
sDimSize
);
XTensor
*
s
=
NewTensor
(
sOrder
,
sDimSize
);
XTensor
*
shift1
=
NewTensor
(
tOrder1
,
tDimSize1
);
XTensor
*
shift2
=
NewTensor
(
tOrder2
,
tDimSize2
);
XTensor
*
t1
=
NewTensor
(
tOrder1
,
tDimSize1
);
XTensor
*
t1
=
NewTensor
(
tOrder1
,
tDimSize1
);
XTensor
*
t2
=
NewTensor
(
tOrder2
,
tDimSize2
);
XTensor
*
t2
=
NewTensor
(
tOrder2
,
tDimSize2
);
XTensor
tUser1
;
XTensor
tUser2
;
/* initialize variables */
/* initialize variables */
s
->
SetData
(
sData
,
sUnitNum
);
s
->
SetData
(
sData
,
sUnitNum
);
shift1
->
SetZeroAll
();
shift2
->
SetZeroAll
();
t1
->
SetZeroAll
();
t1
->
SetZeroAll
();
t2
->
SetZeroAll
();
t2
->
SetZeroAll
();
/* call ReduceSum function */
/* call ReduceSum function */
_ReduceSum
(
s
,
t1
,
0
);
_ReduceSum
(
s
,
t1
,
0
);
_ReduceSum
(
s
,
t2
,
1
);
_ReduceSum
(
s
,
t2
,
1
);
tUser1
=
ReduceSum
(
*
s
,
0
,
*
shift1
);
tUser2
=
ReduceSum
(
*
s
,
1
,
*
shift2
);
/* check results */
/* check results */
cpuTest
=
t1
->
CheckData
(
answer1
,
tUnitNum1
)
&&
t2
->
CheckData
(
answer2
,
tUnitNum2
);
cpuTest
=
t1
->
CheckData
(
answer1
,
tUnitNum1
)
&&
tUser1
.
CheckData
(
answer1
,
tUnitNum1
)
&&
t2
->
CheckData
(
answer2
,
tUnitNum2
)
&&
tUser2
.
CheckData
(
answer2
,
tUnitNum2
);
#ifdef USE_CUDA
#ifdef USE_CUDA
/* GPU test */
/* GPU test */
...
@@ -90,26 +99,39 @@ bool TestReduceSum1()
...
@@ -90,26 +99,39 @@ bool TestReduceSum1()
/* create tensors */
/* create tensors */
XTensor
*
sGPU
=
NewTensor
(
sOrder
,
sDimSize
,
X_FLOAT
,
1.0
F
,
0
);
XTensor
*
sGPU
=
NewTensor
(
sOrder
,
sDimSize
,
X_FLOAT
,
1.0
F
,
0
);
XTensor
*
shiftGPU1
=
NewTensor
(
tOrder1
,
tDimSize1
,
X_FLOAT
,
1.0
F
,
0
);
XTensor
*
shiftGPU2
=
NewTensor
(
tOrder2
,
tDimSize2
,
X_FLOAT
,
1.0
F
,
0
);
XTensor
*
tGPU1
=
NewTensor
(
tOrder1
,
tDimSize1
,
X_FLOAT
,
1.0
F
,
0
);
XTensor
*
tGPU1
=
NewTensor
(
tOrder1
,
tDimSize1
,
X_FLOAT
,
1.0
F
,
0
);
XTensor
*
tGPU2
=
NewTensor
(
tOrder2
,
tDimSize2
,
X_FLOAT
,
1.0
F
,
0
);
XTensor
*
tGPU2
=
NewTensor
(
tOrder2
,
tDimSize2
,
X_FLOAT
,
1.0
F
,
0
);
XTensor
tUserGPU1
;
XTensor
tUserGPU2
;
/* initialize variables */
/* initialize variables */
sGPU
->
SetData
(
sData
,
sUnitNum
);
sGPU
->
SetData
(
sData
,
sUnitNum
);
shiftGPU1
->
SetZeroAll
();
shiftGPU2
->
SetZeroAll
();
tGPU1
->
SetZeroAll
();
tGPU1
->
SetZeroAll
();
tGPU2
->
SetZeroAll
();
tGPU2
->
SetZeroAll
();
/* call ReduceSum function */
/* call ReduceSum function */
_ReduceSum
(
sGPU
,
tGPU1
,
0
);
_ReduceSum
(
sGPU
,
tGPU1
,
0
);
_ReduceSum
(
sGPU
,
tGPU2
,
1
);
_ReduceSum
(
sGPU
,
tGPU2
,
1
);
tUserGPU1
=
ReduceSum
(
*
sGPU
,
0
,
*
shiftGPU1
);
tUserGPU2
=
ReduceSum
(
*
sGPU
,
1
,
*
shiftGPU2
);
/* check results */
/* check results */
cpuTest
=
tGPU1
->
CheckData
(
answer1
,
tUnitNum1
)
&&
tGPU2
->
CheckData
(
answer2
,
tUnitNum2
);
gpuTest
=
tGPU1
->
CheckData
(
answer1
,
tUnitNum1
)
&&
tUserGPU1
.
CheckData
(
answer1
,
tUnitNum1
)
&&
tGPU2
->
CheckData
(
answer2
,
tUnitNum2
)
&&
tUserGPU2
.
CheckData
(
answer2
,
tUnitNum2
);
/* destroy variables */
/* destroy variables */
delete
s
;
delete
s
;
delete
shift1
;
delete
shift2
;
delete
t1
;
delete
t1
;
delete
t2
;
delete
t2
;
delete
sGPU
;
delete
sGPU
;
delete
shiftGPU1
;
delete
shiftGPU2
;
delete
tGPU1
;
delete
tGPU1
;
delete
tGPU2
;
delete
tGPU2
;
delete
[]
sDimSize
;
delete
[]
sDimSize
;
...
@@ -120,6 +142,8 @@ bool TestReduceSum1()
...
@@ -120,6 +142,8 @@ bool TestReduceSum1()
#else
#else
/* destroy variables */
/* destroy variables */
delete
s
;
delete
s
;
delete
shift1
;
delete
shift2
;
delete
t1
;
delete
t1
;
delete
t2
;
delete
t2
;
delete
[]
sDimSize
;
delete
[]
sDimSize
;
...
...
source/tensor/test/TReduceSumSquared.cpp
查看文件 @
906eebb7
...
@@ -70,6 +70,7 @@ bool TestReduceSumSquared1()
...
@@ -70,6 +70,7 @@ bool TestReduceSumSquared1()
XTensor
*
s
=
NewTensor
(
sOrder
,
sDimSize
);
XTensor
*
s
=
NewTensor
(
sOrder
,
sDimSize
);
XTensor
*
t
=
NewTensor
(
tOrder
,
tDimSize
);
XTensor
*
t
=
NewTensor
(
tOrder
,
tDimSize
);
XTensor
*
shift
=
NewTensor
(
shiftOrder
,
shiftDimSize
);
XTensor
*
shift
=
NewTensor
(
shiftOrder
,
shiftDimSize
);
XTensor
tUser
;
/* initialize variables */
/* initialize variables */
s
->
SetData
(
sData
,
sUnitNum
);
s
->
SetData
(
sData
,
sUnitNum
);
...
@@ -78,9 +79,10 @@ bool TestReduceSumSquared1()
...
@@ -78,9 +79,10 @@ bool TestReduceSumSquared1()
/* call ReduceSumSquared function */
/* call ReduceSumSquared function */
_ReduceSumSquared
(
s
,
t
,
0
,
shift
);
_ReduceSumSquared
(
s
,
t
,
0
,
shift
);
tUser
=
ReduceSumSquared
(
*
s
,
0
,
*
shift
);
/* check results */
/* check results */
cpuTest
=
t
->
CheckData
(
answer
,
tUnitNum
);
cpuTest
=
t
->
CheckData
(
answer
,
tUnitNum
)
&&
tUser
.
CheckData
(
answer
,
tUnitNum
)
;
#ifdef USE_CUDA
#ifdef USE_CUDA
/* GPU test */
/* GPU test */
...
@@ -90,6 +92,7 @@ bool TestReduceSumSquared1()
...
@@ -90,6 +92,7 @@ bool TestReduceSumSquared1()
XTensor
*
sGPU
=
NewTensor
(
sOrder
,
sDimSize
,
X_FLOAT
,
1.0
F
,
0
);
XTensor
*
sGPU
=
NewTensor
(
sOrder
,
sDimSize
,
X_FLOAT
,
1.0
F
,
0
);
XTensor
*
tGPU
=
NewTensor
(
tOrder
,
tDimSize
,
X_FLOAT
,
1.0
F
,
0
);
XTensor
*
tGPU
=
NewTensor
(
tOrder
,
tDimSize
,
X_FLOAT
,
1.0
F
,
0
);
XTensor
*
shiftGPU
=
NewTensor
(
shiftOrder
,
shiftDimSize
,
X_FLOAT
,
1.0
F
,
0
);
XTensor
*
shiftGPU
=
NewTensor
(
shiftOrder
,
shiftDimSize
,
X_FLOAT
,
1.0
F
,
0
);
XTensor
tUserGPU
;
/* initialize variables */
/* initialize variables */
sGPU
->
SetData
(
sData
,
sUnitNum
);
sGPU
->
SetData
(
sData
,
sUnitNum
);
...
@@ -98,9 +101,10 @@ bool TestReduceSumSquared1()
...
@@ -98,9 +101,10 @@ bool TestReduceSumSquared1()
/* call ReduceSumSquared function */
/* call ReduceSumSquared function */
_ReduceSumSquared
(
sGPU
,
tGPU
,
0
,
shiftGPU
);
_ReduceSumSquared
(
sGPU
,
tGPU
,
0
,
shiftGPU
);
tUserGPU
=
ReduceSumSquared
(
*
sGPU
,
0
,
*
shiftGPU
);
/* check results */
/* check results */
gpuTest
=
tGPU
->
CheckData
(
answer
,
tUnitNum
);
gpuTest
=
tGPU
->
CheckData
(
answer
,
tUnitNum
)
&&
tUserGPU
.
CheckData
(
answer
,
tUnitNum
)
;
/* destroy variables */
/* destroy variables */
delete
s
;
delete
s
;
...
@@ -174,6 +178,7 @@ bool TestReduceSumSquared2()
...
@@ -174,6 +178,7 @@ bool TestReduceSumSquared2()
XTensor
*
s
=
NewTensor
(
sOrder
,
sDimSize
);
XTensor
*
s
=
NewTensor
(
sOrder
,
sDimSize
);
XTensor
*
t
=
NewTensor
(
tOrder
,
tDimSize
);
XTensor
*
t
=
NewTensor
(
tOrder
,
tDimSize
);
XTensor
*
shift
=
NewTensor
(
shiftOrder
,
shiftDimSize
);
XTensor
*
shift
=
NewTensor
(
shiftOrder
,
shiftDimSize
);
XTensor
tUser
;
/* initialize variables */
/* initialize variables */
s
->
SetData
(
sData
,
sUnitNum
);
s
->
SetData
(
sData
,
sUnitNum
);
...
@@ -182,9 +187,10 @@ bool TestReduceSumSquared2()
...
@@ -182,9 +187,10 @@ bool TestReduceSumSquared2()
/* call ReduceSumSquared function */
/* call ReduceSumSquared function */
_ReduceSumSquared
(
s
,
t
,
1
,
shift
);
_ReduceSumSquared
(
s
,
t
,
1
,
shift
);
tUser
=
ReduceSumSquared
(
*
s
,
1
,
*
shift
);
/* check results */
/* check results */
cpuTest
=
t
->
CheckData
(
answer
,
tUnitNum
);
cpuTest
=
t
->
CheckData
(
answer
,
tUnitNum
)
&&
tUser
.
CheckData
(
answer
,
tUnitNum
)
;
#ifdef USE_CUDA
#ifdef USE_CUDA
/* GPU test */
/* GPU test */
...
@@ -194,6 +200,7 @@ bool TestReduceSumSquared2()
...
@@ -194,6 +200,7 @@ bool TestReduceSumSquared2()
XTensor
*
sGPU
=
NewTensor
(
sOrder
,
sDimSize
,
X_FLOAT
,
1.0
F
,
0
);
XTensor
*
sGPU
=
NewTensor
(
sOrder
,
sDimSize
,
X_FLOAT
,
1.0
F
,
0
);
XTensor
*
tGPU
=
NewTensor
(
tOrder
,
tDimSize
,
X_FLOAT
,
1.0
F
,
0
);
XTensor
*
tGPU
=
NewTensor
(
tOrder
,
tDimSize
,
X_FLOAT
,
1.0
F
,
0
);
XTensor
*
shiftGPU
=
NewTensor
(
shiftOrder
,
shiftDimSize
,
X_FLOAT
,
1.0
F
,
0
);
XTensor
*
shiftGPU
=
NewTensor
(
shiftOrder
,
shiftDimSize
,
X_FLOAT
,
1.0
F
,
0
);
XTensor
tUserGPU
;
/* initialize variables */
/* initialize variables */
sGPU
->
SetData
(
sData
,
sUnitNum
);
sGPU
->
SetData
(
sData
,
sUnitNum
);
...
@@ -202,9 +209,10 @@ bool TestReduceSumSquared2()
...
@@ -202,9 +209,10 @@ bool TestReduceSumSquared2()
/* call ReduceSumSquared function */
/* call ReduceSumSquared function */
_ReduceSumSquared
(
sGPU
,
tGPU
,
1
,
shiftGPU
);
_ReduceSumSquared
(
sGPU
,
tGPU
,
1
,
shiftGPU
);
tUserGPU
=
ReduceSumSquared
(
*
sGPU
,
1
,
*
shiftGPU
);
/* check results */
/* check results */
gpuTest
=
tGPU
->
CheckData
(
answer
,
tUnitNum
);
gpuTest
=
tGPU
->
CheckData
(
answer
,
tUnitNum
)
&&
tUserGPU
.
CheckData
(
answer
,
tUnitNum
)
;
/* destroy variables */
/* destroy variables */
delete
s
;
delete
s
;
...
...
source/tensor/test/TReduceVariance.cpp
查看文件 @
906eebb7
...
@@ -70,6 +70,7 @@ bool TestReduceVariance1()
...
@@ -70,6 +70,7 @@ bool TestReduceVariance1()
XTensor
*
s
=
NewTensor
(
sOrder
,
sDimSize
);
XTensor
*
s
=
NewTensor
(
sOrder
,
sDimSize
);
XTensor
*
t
=
NewTensor
(
tOrder
,
tDimSize
);
XTensor
*
t
=
NewTensor
(
tOrder
,
tDimSize
);
XTensor
*
mean
=
NewTensor
(
meanOrder
,
meanDimSize
);
XTensor
*
mean
=
NewTensor
(
meanOrder
,
meanDimSize
);
XTensor
tUser
;
/* initialize variables */
/* initialize variables */
s
->
SetData
(
sData
,
sUnitNum
);
s
->
SetData
(
sData
,
sUnitNum
);
...
@@ -78,9 +79,10 @@ bool TestReduceVariance1()
...
@@ -78,9 +79,10 @@ bool TestReduceVariance1()
/* call ReduceVariance function */
/* call ReduceVariance function */
_ReduceVariance
(
s
,
t
,
0
,
mean
);
_ReduceVariance
(
s
,
t
,
0
,
mean
);
tUser
=
ReduceVariance
(
*
s
,
0
,
*
mean
);
/* check results */
/* check results */
cpuTest
=
t
->
CheckData
(
answer
,
tUnitNum
);
cpuTest
=
t
->
CheckData
(
answer
,
tUnitNum
)
&&
tUser
.
CheckData
(
answer
,
tUnitNum
)
;
#ifdef USE_CUDA
#ifdef USE_CUDA
/* GPU test */
/* GPU test */
...
@@ -90,6 +92,7 @@ bool TestReduceVariance1()
...
@@ -90,6 +92,7 @@ bool TestReduceVariance1()
XTensor
*
sGPU
=
NewTensor
(
sOrder
,
sDimSize
,
X_FLOAT
,
1.0
F
,
0
);
XTensor
*
sGPU
=
NewTensor
(
sOrder
,
sDimSize
,
X_FLOAT
,
1.0
F
,
0
);
XTensor
*
tGPU
=
NewTensor
(
tOrder
,
tDimSize
,
X_FLOAT
,
1.0
F
,
0
);
XTensor
*
tGPU
=
NewTensor
(
tOrder
,
tDimSize
,
X_FLOAT
,
1.0
F
,
0
);
XTensor
*
meanGPU
=
NewTensor
(
meanOrder
,
meanDimSize
,
X_FLOAT
,
1.0
F
,
0
);
XTensor
*
meanGPU
=
NewTensor
(
meanOrder
,
meanDimSize
,
X_FLOAT
,
1.0
F
,
0
);
XTensor
tUserGPU
;
/* initialize variables */
/* initialize variables */
sGPU
->
SetData
(
sData
,
sUnitNum
);
sGPU
->
SetData
(
sData
,
sUnitNum
);
...
@@ -98,9 +101,10 @@ bool TestReduceVariance1()
...
@@ -98,9 +101,10 @@ bool TestReduceVariance1()
/* call ReduceVariance function */
/* call ReduceVariance function */
_ReduceVariance
(
sGPU
,
tGPU
,
0
,
meanGPU
);
_ReduceVariance
(
sGPU
,
tGPU
,
0
,
meanGPU
);
tUserGPU
=
ReduceVariance
(
*
sGPU
,
0
,
*
meanGPU
);
/* check results */
/* check results */
gpuTest
=
t
->
CheckData
(
answer
,
tUnitNum
);
gpuTest
=
t
GPU
->
CheckData
(
answer
,
tUnitNum
)
&&
tUserGPU
.
CheckData
(
answer
,
tUnitNum
);
/* destroy variables */
/* destroy variables */
delete
s
;
delete
s
;
...
...
source/tensor/test/TSigmoid.cpp
查看文件 @
906eebb7
...
@@ -48,6 +48,7 @@ bool TestSigmoid1()
...
@@ -48,6 +48,7 @@ bool TestSigmoid1()
/* create tensors */
/* create tensors */
XTensor
*
x
=
NewTensor
(
order
,
dimSize
);
XTensor
*
x
=
NewTensor
(
order
,
dimSize
);
XTensor
*
y
=
NewTensor
(
order
,
dimSize
);
XTensor
*
y
=
NewTensor
(
order
,
dimSize
);
XTensor
yUser
;
/* initialize variables */
/* initialize variables */
x
->
SetData
(
xData
,
unitNum
);
x
->
SetData
(
xData
,
unitNum
);
...
@@ -55,9 +56,10 @@ bool TestSigmoid1()
...
@@ -55,9 +56,10 @@ bool TestSigmoid1()
/* call Sigmoid function */
/* call Sigmoid function */
_Sigmoid
(
x
,
y
);
_Sigmoid
(
x
,
y
);
yUser
=
Sigmoid
(
*
x
);
/* check result */
/* check result */
cpuTest
=
y
->
CheckData
(
answer
,
unitNum
,
1e-4
F
);
cpuTest
=
y
->
CheckData
(
answer
,
unitNum
,
1e-4
F
)
&&
yUser
.
CheckData
(
answer
,
unitNum
,
1e-4
F
);
#ifdef USE_CUDA
#ifdef USE_CUDA
/* GPU test */
/* GPU test */
...
@@ -66,6 +68,7 @@ bool TestSigmoid1()
...
@@ -66,6 +68,7 @@ bool TestSigmoid1()
/* create tensors */
/* create tensors */
XTensor
*
xGPU
=
NewTensor
(
order
,
dimSize
,
X_FLOAT
,
1.0
F
,
0
);
XTensor
*
xGPU
=
NewTensor
(
order
,
dimSize
,
X_FLOAT
,
1.0
F
,
0
);
XTensor
*
yGPU
=
NewTensor
(
order
,
dimSize
,
X_FLOAT
,
1.0
F
,
0
);
XTensor
*
yGPU
=
NewTensor
(
order
,
dimSize
,
X_FLOAT
,
1.0
F
,
0
);
XTensor
yUserGPU
;
/* initialize variables */
/* initialize variables */
xGPU
->
SetData
(
xData
,
unitNum
);
xGPU
->
SetData
(
xData
,
unitNum
);
...
@@ -73,9 +76,10 @@ bool TestSigmoid1()
...
@@ -73,9 +76,10 @@ bool TestSigmoid1()
/* call Sigmoid function */
/* call Sigmoid function */
_Sigmoid
(
xGPU
,
yGPU
);
_Sigmoid
(
xGPU
,
yGPU
);
yUserGPU
=
Sigmoid
(
*
xGPU
);
/* check result */
/* check result */
gpuTest
=
yGPU
->
CheckData
(
answer
,
unitNum
,
1e-4
F
);
gpuTest
=
yGPU
->
CheckData
(
answer
,
unitNum
,
1e-4
F
)
&&
yUserGPU
.
CheckData
(
answer
,
unitNum
,
1e-4
F
);
/* destroy variables */
/* destroy variables */
delete
x
;
delete
x
;
...
...
source/tensor/test/TSign.cpp
查看文件 @
906eebb7
差异被折叠。
点击展开。
source/tensor/test/TSoftmax.cpp
查看文件 @
906eebb7
差异被折叠。
点击展开。
source/tensor/test/TSort.cpp
查看文件 @
906eebb7
差异被折叠。
点击展开。
source/tensor/test/TSplit.cpp
查看文件 @
906eebb7
差异被折叠。
点击展开。
source/tensor/test/TTopK.cpp
查看文件 @
906eebb7
差异被折叠。
点击展开。
source/tensor/test/TUnsqueeze.cpp
查看文件 @
906eebb7
差异被折叠。
点击展开。
编写
预览
Markdown
格式
0%
重试
或
添加新文件
添加附件
取消
您添加了
0
人
到此讨论。请谨慎行事。
请先完成此评论的编辑!
取消
请
注册
或者
登录
后发表评论