Skip to content
项目
群组
代码片段
帮助
当前项目
正在载入...
登录 / 注册
切换导航面板
N
NiuTrans.Tensor
概览
Overview
Details
Activity
Cycle Analytics
版本库
Repository
Files
Commits
Branches
Tags
Contributors
Graph
Compare
Charts
问题
0
Issues
0
列表
Board
标记
里程碑
合并请求
0
Merge Requests
0
CI / CD
CI / CD
流水线
作业
日程表
图表
维基
Wiki
代码片段
Snippets
成员
Collapse sidebar
Close sidebar
活动
图像
聊天
创建新问题
作业
提交
Issue Boards
Open sidebar
Emmay
NiuTrans.Tensor
Commits
906eebb7
Commit
906eebb7
authored
Jul 19, 2018
by
xuchen
Browse files
Options
Browse Files
Download
Plain Diff
merge with xuchen branch
parents
7283b3dc
b3a76184
全部展开
显示空白字符变更
内嵌
并排
正在显示
106 个修改的文件
包含
1178 行增加
和
423 行删除
+1178
-423
source/network/XBackwardLoss.cpp
+2
-1
source/tensor/XLink.cpp
+31
-0
source/tensor/XLink.h
+4
-0
source/tensor/XName.cpp
+41
-37
source/tensor/XName.h
+4
-0
source/tensor/XTensor.cpp
+1
-1
source/tensor/XUtility.cpp
+4
-3
source/tensor/XUtility.h
+1
-1
source/tensor/core/arithmetic/Absolute.cpp
+39
-4
source/tensor/core/arithmetic/Absolute.cu
+14
-10
source/tensor/core/arithmetic/Absolute.cuh
+3
-3
source/tensor/core/arithmetic/Absolute.h
+13
-2
source/tensor/core/arithmetic/MatrixMULBatchedCPU.cpp
+2
-2
source/tensor/core/arithmetic/MatrixMULBatchedCPU.h
+2
-2
source/tensor/core/arithmetic/MatrixMul.h
+0
-0
source/tensor/core/arithmetic/MatrixMul2D.cpp
+0
-0
source/tensor/core/arithmetic/MatrixMul2D.cu
+1
-2
source/tensor/core/arithmetic/MatrixMul2D.cuh
+0
-0
source/tensor/core/arithmetic/MatrixMul2DParallel.cpp
+0
-0
source/tensor/core/arithmetic/MatrixMul2DParallel.h
+2
-2
source/tensor/core/arithmetic/MatrixMulBatched.cpp
+5
-8
source/tensor/core/arithmetic/Negate.cpp
+41
-5
source/tensor/core/arithmetic/Negate.cu
+15
-11
source/tensor/core/arithmetic/Negate.cuh
+3
-3
source/tensor/core/arithmetic/Negate.h
+13
-2
source/tensor/core/arithmetic/Sign.cpp
+41
-6
source/tensor/core/arithmetic/Sign.cu
+19
-15
source/tensor/core/arithmetic/Sign.cuh
+3
-3
source/tensor/core/arithmetic/Sign.h
+13
-2
source/tensor/core/arithmetic/XTensorBLAS.cpp
+0
-0
source/tensor/core/arithmetic/XTensorBLAS.cu
+0
-0
source/tensor/core/arithmetic/XTensorBLAS.h
+8
-4
source/tensor/core/getandset/Select.cpp
+2
-3
source/tensor/core/math/Log.cpp
+41
-6
source/tensor/core/math/Log.cu
+13
-9
source/tensor/core/math/Log.cuh
+3
-3
source/tensor/core/math/Log.h
+13
-2
source/tensor/core/math/Power.cpp
+45
-9
source/tensor/core/math/Power.cu
+27
-17
source/tensor/core/math/Power.cuh
+3
-3
source/tensor/core/math/Power.h
+13
-2
source/tensor/core/movement/CopyIndexed.cpp
+6
-7
source/tensor/core/reduce/ReduceMax.cpp
+8
-9
source/tensor/core/reduce/ReduceMean.cpp
+5
-6
source/tensor/core/reduce/ReduceSum.cpp
+52
-6
source/tensor/core/reduce/ReduceSum.h
+10
-1
source/tensor/core/reduce/ReduceSumSquared.cpp
+5
-6
source/tensor/core/reduce/ReduceVariance.cpp
+10
-6
source/tensor/core/shape/ConcatenateSolely.cpp
+1
-1
source/tensor/core/shape/MakeMergeBlockIndex.cpp
+0
-0
source/tensor/core/shape/MakeMergeBlockIndex.cu
+0
-0
source/tensor/core/shape/MakeMergeBlockIndex.cuh
+1
-2
source/tensor/core/shape/MakeMergeBlockIndex.h
+0
-0
source/tensor/core/shape/Merge.cpp
+8
-12
source/tensor/core/shape/Split.cpp
+10
-37
source/tensor/core/shape/Split.h
+1
-1
source/tensor/core/shape/Unsqueeze.cpp
+2
-4
source/tensor/core/sort/Sort.cpp
+44
-6
source/tensor/core/sort/Sort.cu
+2
-2
source/tensor/core/sort/Sort.h
+13
-1
source/tensor/core/sort/TopK.cpp
+25
-0
source/tensor/core/sort/TopK.h
+5
-0
source/tensor/function/HardTanH.cpp
+6
-2
source/tensor/function/HardTanH.cu
+1
-1
source/tensor/function/HardTanH.h
+1
-1
source/tensor/function/Identity.cpp
+22
-1
source/tensor/function/Identity.h
+3
-2
source/tensor/function/LogSoftmax.cpp
+6
-2
source/tensor/function/LogSoftmax.h
+1
-1
source/tensor/function/Loss.cpp
+7
-7
source/tensor/function/Loss.cu
+10
-10
source/tensor/function/Loss.cuh
+4
-4
source/tensor/function/Loss.h
+4
-4
source/tensor/function/Rectify.cpp
+24
-1
source/tensor/function/Rectify.cu
+1
-1
source/tensor/function/Rectify.h
+3
-2
source/tensor/function/Sigmoid.cpp
+23
-1
source/tensor/function/Sigmoid.cu
+1
-1
source/tensor/function/Sigmoid.h
+3
-2
source/tensor/function/Softmax.cpp
+23
-0
source/tensor/function/Softmax.cuh
+1
-1
source/tensor/function/Softmax.h
+3
-2
source/tensor/test/TAbsolute.cpp
+22
-4
source/tensor/test/TConcatenate.cpp
+24
-8
source/tensor/test/THardTanH.cpp
+6
-2
source/tensor/test/TIdentity.cpp
+6
-2
source/tensor/test/TLog.cpp
+22
-4
source/tensor/test/TLogSoftmax.cpp
+6
-2
source/tensor/test/TLoss.cpp
+12
-9
source/tensor/test/TMatrixMulBatched.cpp
+12
-4
source/tensor/test/TMerge.cpp
+38
-16
source/tensor/test/TNegate.cpp
+44
-8
source/tensor/test/TPower.cpp
+66
-12
source/tensor/test/TRectify.cpp
+6
-2
source/tensor/test/TReduceMax.cpp
+12
-2
source/tensor/test/TReduceMean.cpp
+12
-2
source/tensor/test/TReduceSum.cpp
+26
-2
source/tensor/test/TReduceSumSquared.cpp
+12
-4
source/tensor/test/TReduceVariance.cpp
+6
-2
source/tensor/test/TSigmoid.cpp
+6
-2
source/tensor/test/TSign.cpp
+0
-0
source/tensor/test/TSoftmax.cpp
+0
-0
source/tensor/test/TSort.cpp
+0
-0
source/tensor/test/TSplit.cpp
+0
-0
source/tensor/test/TTopK.cpp
+0
-0
source/tensor/test/TUnsqueeze.cpp
+0
-0
没有找到文件。
source/network/XBackwardLoss.cpp
查看文件 @
906eebb7
...
...
@@ -70,7 +70,7 @@ void XLossGrad::Compute(XTensor * gold, XTensor * y,
XTensor
*
dedy
,
LOSS_FUNCTION_NAME
lossName
)
{
LossBackward
(
dedy
,
gold
,
y
,
lossName
);
_
LossBackward
(
dedy
,
gold
,
y
,
lossName
);
}
}
\ No newline at end of file
source/tensor/XLink.cpp
查看文件 @
906eebb7
...
...
@@ -328,6 +328,37 @@ void XLink::MakeLink(const XList * list, XTensor * h, int id)
}
/*
create a hyper edge with a input tensors and a list of output tensors
>> h - a input tensor
>> list - a list of output tensors
>> id - id of the edge type
*/
void
XLink
::
MakeLink
(
XTensor
*
t
,
XList
*
list
,
int
id
)
{
/* forward */
for
(
int
i
=
0
;
i
<
list
->
count
;
i
++
){
XTensor
*
h
=
(
XTensor
*
)
list
->
GetItem
(
i
);
if
(
h
==
NULL
)
continue
;
XLink
&
income
=
h
->
income
;
income
.
Reset
();
income
.
SetHead
(
h
);
income
.
SetType
(
id
);
income
.
AddTail
(
t
);
}
/* backward */
XLink
&
outgo
=
t
->
outgo
;
CheckNTErrors
(
outgo
.
head
==
NULL
||
outgo
.
head
==
t
,
"Wrong head of the hyperedge!"
);
for
(
int
i
=
0
;
i
<
list
->
count
;
i
++
){
XTensor
*
t
=
(
XTensor
*
)
list
->
GetItem
(
i
);
if
(
t
==
NULL
)
continue
;
outgo
.
AddTail
(
t
);
}
}
/*
add parameters
>> h - head
>> param - parameter we want introduce
...
...
source/tensor/XLink.h
查看文件 @
906eebb7
...
...
@@ -139,6 +139,10 @@ struct XLink
static
void
MakeLink
(
const
XList
*
list
,
XTensor
*
h
,
int
id
);
/* create a hyper edge with a input tensors and a list of output tensors */
static
void
MakeLink
(
XTensor
*
h
,
XList
*
list
,
int
id
);
/* add a parameter */
static
void
AddParamToHead
(
XTensor
*
h
,
DTYPE
param
);
...
...
source/tensor/XName.cpp
查看文件 @
906eebb7
...
...
@@ -26,76 +26,80 @@ namespace nts { // namespace nts(NiuTrans.Tensor)
/* get operator name */
const
char
*
GetOPName
(
int
type
)
{
if
((
type
&
MATH_BASE
)
!=
0
){
if
(
type
==
MATH_ABSOLUTE
)
if
((
type
&
MATH_BASE
)
!=
0
){
if
(
type
==
MATH_ABSOLUTE
)
return
"M_ABSOLUTE"
;
else
if
(
type
==
MATH_MATRIXMUL
)
else
if
(
type
==
MATH_MATRIXMUL
)
return
"M_MATRIXMUL"
;
else
if
(
type
==
MATH_MATRIXMULBATCHED
)
else
if
(
type
==
MATH_MATRIXMULBATCHED
)
return
"M_MATRIXMULBATCHED"
;
else
if
(
type
==
MATH_MULTIPLY
)
else
if
(
type
==
MATH_MULTIPLY
)
return
"M_MULTIPLY"
;
else
if
(
type
==
MATH_NEGATE
)
else
if
(
type
==
MATH_NEGATE
)
return
"M_NEGATE"
;
else
if
(
type
==
MATH_SIGN
)
else
if
(
type
==
MATH_SIGN
)
return
"M_SIGN"
;
else
if
(
type
==
MATH_SUM
)
else
if
(
type
==
MATH_SUM
)
return
"M_SUM"
;
else
if
(
type
==
MATH_LOG
)
return
"M_NORMALIZE"
;
else
if
(
type
==
MATH_NORMALIZE
)
else
if
(
type
==
MATH_LOG
)
return
"M_LOG"
;
else
if
(
type
==
MATH_POWER
)
else
if
(
type
==
MATH_NORMALIZE
)
return
"M_NORMALIZE"
;
else
if
(
type
==
MATH_POWER
)
return
"M_POWER"
;
else
if
(
type
==
MATH_SCALEANDSHIFT
)
else
if
(
type
==
MATH_SCALEANDSHIFT
)
return
"M_SCALEANDSHIFT"
;
else
if
(
type
==
REDUCE_REDUCEMAX
)
else
if
(
type
==
REDUCE_REDUCEMAX
)
return
"R_REDUCEMAX"
;
else
if
(
type
==
REDUCE_REDUCEMEAN
)
else
if
(
type
==
REDUCE_REDUCEMEAN
)
return
"R_REDUCEMEAN"
;
else
if
(
type
==
REDUCE_REDUCESUM
)
else
if
(
type
==
REDUCE_REDUCESUM
)
return
"R_REDUCESUM"
;
else
if
(
type
==
REDUCE_REDUCESUMSQUARED
)
else
if
(
type
==
REDUCE_REDUCESUMSQUARED
)
return
"R_REDUCESUMSQUARED"
;
else
if
(
type
==
REDUCE_REDUCEVARIANCE
)
else
if
(
type
==
REDUCE_REDUCEVARIANCE
)
return
"R_REDUCEVARIANCE"
;
}
else
if
((
type
&
DATA_BASE
)
!=
0
){
if
(
type
==
GETANDSET_SELECT
)
else
if
((
type
&
DATA_BASE
)
!=
0
){
if
(
type
==
GETANDSET_SELECT
)
return
"G_SELECT"
;
else
if
(
type
==
MOVEMENT_COPYINDEXED
)
else
if
(
type
==
MOVEMENT_COPYINDEXED
)
return
"M_COPYINDEXED"
;
else
if
(
type
==
MOVEMENT_COPYVALUES
)
else
if
(
type
==
MOVEMENT_COPYVALUES
)
return
"M_COPYVALUES"
;
else
if
(
type
==
SHAPE_CONCATENATE
)
else
if
(
type
==
SHAPE_CONCATENATE
)
return
"S_CONCATENATE"
;
else
if
(
type
==
SHAPE_MERGE
)
else
if
(
type
==
SHAPE_MERGE
)
return
"S_MERGE"
;
else
if
(
type
==
SHAPE_MERGE_LIST
)
else
if
(
type
==
SHAPE_MERGE_LIST
)
return
"S_MERGE_LIST"
;
else
if
(
type
==
SHAPE_PERMUTE
)
else
if
(
type
==
SHAPE_PERMUTE
)
return
"S_PERMUTE"
;
else
if
(
type
==
SHAPE_SPLIT
)
else
if
(
type
==
SHAPE_SPLIT
)
return
"S_SPLIT"
;
else
if
(
type
==
SHAPE_SPLIT_LIST
)
else
if
(
type
==
SHAPE_SPLIT_LIST
)
return
"S_SPLIT_LIST"
;
else
if
(
type
==
SHAPE_TRANSPOSE
)
else
if
(
type
==
SHAPE_TRANSPOSE
)
return
"S_TRANSPOSE"
;
else
if
(
type
==
SHAPE_UNSQUEEZE
)
else
if
(
type
==
SHAPE_UNSQUEEZE
)
return
"S_UNSQUEEZE"
;
else
if
(
type
==
SORT_SORT
)
return
"S_SORT"
;
else
if
(
type
==
SORT_TOPK
)
return
"S_TOPK"
;
}
else
if
((
type
&
FUNCTION_BASE
)
!=
0
){
if
(
type
==
FUNC_HARDTANH
)
else
if
((
type
&
FUNCTION_BASE
)
!=
0
){
if
(
type
==
FUNC_HARDTANH
)
return
"F_HARDTANH"
;
else
if
(
type
==
FUNC_IDENTITY
)
else
if
(
type
==
FUNC_IDENTITY
)
return
"F_IDENTITY"
;
else
if
(
type
==
FUNC_LOGSOFTMAX
)
else
if
(
type
==
FUNC_LOGSOFTMAX
)
return
"F_LOGSOFTMAX"
;
else
if
(
type
==
FUNC_RECTIFY
)
else
if
(
type
==
FUNC_RECTIFY
)
return
"F_RECTIFY"
;
else
if
(
type
==
FUNC_SIGMOID
)
else
if
(
type
==
FUNC_SIGMOID
)
return
"F_SIGMOID"
;
else
if
(
type
==
FUNC_SOFTMAX
)
else
if
(
type
==
FUNC_SOFTMAX
)
return
"F_SOFTMAX"
;
}
...
...
source/tensor/XName.h
查看文件 @
906eebb7
...
...
@@ -69,6 +69,10 @@ namespace nts { // namespace nts(NiuTrans.Tensor)
#define SHAPE_TRANSPOSE SHAPE_SPLIT_LIST + 1
#define SHAPE_UNSQUEEZE SHAPE_TRANSPOSE + 1
#define SORT SHAPE_UNSQUEEZE + 1
#define SORT_SORT SORT + 1
#define SORT_TOPK SORT_SORT + 1
/* activation functions */
#define FUNCTION_BASE DATA_BASE * 2
#define FUNC_HARDTANH FUNCTION_BASE + 1
...
...
source/tensor/XTensor.cpp
查看文件 @
906eebb7
...
...
@@ -1121,7 +1121,7 @@ bool XTensor::Resize(const int myOrder, const int * myDimSize,
if
(
isSparse
){
/*
for sparse matrices, we use a list of tuple (key, value),
ordered by key. Take a (2-dimensional) matrix as an example
s
,
ordered by key. Take a (2-dimensional) matrix as an example,
we have key = m * i + j;
The data array is
---------
...
...
source/tensor/XUtility.cpp
查看文件 @
906eebb7
...
...
@@ -486,8 +486,9 @@ quick sorting
NOTE: this means that the items may not placed in a continuous memory space
>> comp - the comparison function
*/
void
XQSort
(
void
*
data
,
void
*
index
,
int
num
,
int
width
,
int
stride
,
int
(
*
comp
)(
const
void
*
,
const
void
*
))
void
XQSort
(
void
*
data
A
,
void
*
dataB
,
void
*
index
,
int
num
,
int
width
,
int
stride
,
int
(
*
comp
)(
const
void
*
,
const
void
*
))
{
XMemCopy
(
dataB
,
-
1
,
dataA
,
-
1
,
num
*
width
);
char
*
lo
,
*
hi
;
// ends of sub-array currently sorting
int
*
indexlo
,
*
indexhi
;
char
*
mid
;
// points to middle of subarray
...
...
@@ -506,8 +507,8 @@ void XQSort(void * data, void * index, int num, int width, int stride, int (*com
stackptr
=
0
;
lo
=
(
char
*
)
data
;
hi
=
(
char
*
)
data
+
realStride
*
(
num
-
1
);
lo
=
(
char
*
)
data
B
;
hi
=
(
char
*
)
data
B
+
realStride
*
(
num
-
1
);
indexlo
=
(
int
*
)
index
;
indexhi
=
index
!=
NULL
?
(
int
*
)
index
+
stride
*
(
num
-
1
)
:
NULL
;
...
...
source/tensor/XUtility.h
查看文件 @
906eebb7
...
...
@@ -53,7 +53,7 @@ extern void XSleep(int sleepTime);
extern
double
GetClock
();
extern
double
GetClockSec
();
extern
void
XQSort
(
void
*
data
,
void
*
index
,
int
num
,
int
width
,
int
stride
,
int
(
*
comp
)(
const
void
*
,
const
void
*
));
extern
void
XQSort
(
void
*
data
A
,
void
*
dataB
,
void
*
index
,
int
num
,
int
width
,
int
stride
,
int
(
*
comp
)(
const
void
*
,
const
void
*
));
extern
int
CompXFloat
(
const
void
*
a
,
const
void
*
b
);
#ifdef USE_CUDA
...
...
source/tensor/core/arithmetic/Absolute.cpp
查看文件 @
906eebb7
...
...
@@ -21,6 +21,7 @@
#include <math.h>
#include "../../XTensor.h"
#include "../../XName.h"
#include "Absolute.h"
#include "Absolute.cuh"
...
...
@@ -28,21 +29,54 @@ namespace nts { // namespace nts(NiuTrans.Tensor)
/*
set every entry to its absolute value
>> a - the tensor we are processing
>> a - input tensor we are processing
>> b - output tensor we are processing
*/
void
_Absolute
(
XTensor
*
a
)
void
_Absolute
(
const
XTensor
*
a
,
XTensor
*
b
)
{
#ifdef USE_CUDA
/* run it on GPUs */
if
(
a
->
devID
>=
0
)
{
_CudaAbsolute
(
a
);
_CudaAbsolute
(
a
,
b
);
return
;
}
#endif
CheckNTErrors
((
XTensor
::
IsIdentical
(
a
,
b
)),
"Input tensors should have the same type!"
);
CheckNTErrors
((
a
->
dataType
==
DEFAULT_DTYPE
),
"TODO!"
);
DTYPE
*
d
=
(
DTYPE
*
)
a
->
data
;
DTYPE
*
db
=
(
DTYPE
*
)
b
->
data
;
for
(
int
i
=
0
;
i
<
a
->
unitNum
;
i
++
)
d
[
i
]
=
(
DTYPE
)
fabs
(
d
[
i
]);
db
[
i
]
=
(
DTYPE
)
fabs
(
d
[
i
]);
}
/*
set every entry to its absolute value (do it on site)
keep the result in the input tensor a and return nothing
>> a - the tensor we are processing
*/
void
_AbsoluteMe
(
XTensor
*
a
)
{
_Absolute
(
a
,
a
);
}
/*
set every entry to its absolute value (return a XTensor structure)
make a new tensor to keep the result and return it
>> a - input tensor we are processing
<< return - the absolute value of input tensor
*/
XTensor
Absolute
(
const
XTensor
&
a
)
{
XTensor
b
(
&
a
);
b
.
SetTMP
();
/* call _Absolute function */
_Absolute
(
&
a
,
&
b
);
/* tensor connections */
XLink
::
MakeLink
(
&
a
,
NULL
,
&
b
,
MATH_ABSOLUTE
);
return
b
;
}
}
//
namespace
nts
(
NiuTrans
.
Tensor
)
\ No newline at end of file
source/tensor/core/arithmetic/Absolute.cu
查看文件 @
906eebb7
...
...
@@ -29,37 +29,41 @@ namespace nts { // namespace nts(NiuTrans.Tensor)
#ifdef USE_CUDA
/*
set each entry to its absolute value (CUDA Kernel)
>> d - pointer to the data array
>> a - pointer to input data array
>> b - pointer to output data array
>> size - size of the data array
*/
__global__
void KernelAbsolute(DTYPE *
d
, int size)
void KernelAbsolute(DTYPE *
a, DTYPE * b
, int size)
{
int i = blockDim.x * blockIdx.x + threadIdx.x;
if (i < size)
d[i] = fabs(d
[i]);
b[i] = fabs(a
[i]);
}
/*
set each entry to its absolute value (CUDA Kernel)
This is for float16 computation
>> d - pointer to the data array
>> a - pointer to input data array
>> b - pointer to output data array
>> size - size of the data array
*/
__global__
void KernelAbsolute(__half *
d
, int size)
void KernelAbsolute(__half *
a, __half * b
, int size)
{
return;
}
/*
set each entry to its with float16 data type value
>> a - the tensor
set each entry to its absolute value
>> a - input tensor
>> b - output tensor
*/
extern "C"
void _CudaAbsolute(
XTensor * a
)
void _CudaAbsolute(
const XTensor * a, XTensor * b
)
{
CheckNTErrors((XTensor::IsIdentical(a, b)), "Input tensors should have the same type!");
CheckNTErrors((a->isSparse == false), "TODO!");
int gridSize[3];
...
...
@@ -74,10 +78,10 @@ void _CudaAbsolute(XTensor * a)
ProtectCudaDev(a->devID, devIDBackup);
if (a->dataType == DEFAULT_DTYPE) {
KernelAbsolute << <blocks, threads >> >((DTYPE*)a->data, a->unitNum);
KernelAbsolute << <blocks, threads >> >((DTYPE*)a->data,
(DTYPE*)b->data,
a->unitNum);
}
else if (a->dataType == X_FLOAT16) {
KernelAbsolute << <blocks, threads >> >((__half*)a->data, a->unitNum);
KernelAbsolute << <blocks, threads >> >((__half*)a->data,
(__half*)b->data,
a->unitNum);
}
else {
ShowNTErrors("TODO!");
...
...
source/tensor/core/arithmetic/Absolute.cuh
查看文件 @
906eebb7
...
...
@@ -27,15 +27,15 @@ namespace nts { // namespace nts(NiuTrans.Tensor)
/* set each entry to its absolute value (CUDA Kernel) */
__global__
void KernelAbsolute(DTYPE *
d
, int size);
void KernelAbsolute(DTYPE *
a, DTYPE * b
, int size);
/* set each entry to its absolute value (CUDA Kernel) with float16 data type*/
__global__
void KernelAbsolute(__half *
d
, int size);
void KernelAbsolute(__half *
a, __half * b
, int size);
/* set each entry to its absolute value */
extern "C"
void _CudaAbsolute(
XTensor * a
);
void _CudaAbsolute(
const XTensor * a, XTensor * b
);
#endif // USE_CUDA
...
...
source/tensor/core/arithmetic/Absolute.h
查看文件 @
906eebb7
...
...
@@ -27,8 +27,19 @@
namespace
nts
{
// namespace nts(NiuTrans.Tensor)
/* set every entry to its absolute value */
extern
"C"
void
_Absolute
(
XTensor
*
a
);
void
_Absolute
(
const
XTensor
*
a
,
XTensor
*
b
);
/*
set every entry to its absolute value (do it on site)
keep the result in the input tensor a and return nothing
*/
void
_AbsoluteMe
(
XTensor
*
a
);
/*
set every entry to its absolute value (return a XTensor structure)
make a new tensor to keep the result and return it
*/
XTensor
Absolute
(
const
XTensor
&
a
);
}
// namespace nts(NiuTrans.Tensor)
...
...
source/tensor/core/arithmetic/MatrixMULBatchedCPU.cpp
查看文件 @
906eebb7
...
...
@@ -41,8 +41,8 @@ void _MatrixMULBatchedCPU(const XList * a, MATRIX_TRANS_TYPE transposedA,
const
XList
*
b
,
MATRIX_TRANS_TYPE
transposedB
,
XList
*
c
,
DTYPE
alpha
,
DTYPE
beta
)
{
CheckNTErrors
(
(
a
&&
b
&&
c
)
,
"Empty input lists!"
);
CheckNTErrors
(
(
a
->
count
==
b
->
count
&&
a
->
count
==
c
->
count
)
,
"Input lists must be of the same size!"
);
CheckNTErrors
(
a
&&
b
&&
c
,
"Empty input lists!"
);
CheckNTErrors
(
a
->
count
==
b
->
count
&&
a
->
count
==
c
->
count
,
"Input lists must be of the same size!"
);
if
(
a
->
count
==
0
)
return
;
...
...
source/tensor/core/arithmetic/MatrixMULBatchedCPU.h
查看文件 @
906eebb7
...
...
@@ -28,8 +28,8 @@ namespace nts { // namespace nts(NiuTrans.Tensor)
/* matrix multiplication in batch mode (CPU code) */
extern
"C"
void
_MatrixMULBatchedCPU
(
const
XList
*
a
,
MATRIX_TRANS_TYPE
transposedA
,
const
XList
*
b
,
MATRIX_TRANS_TYPE
transposedB
,
XList
*
c
,
DTYPE
alpha
=
(
DTYPE
)
1
.
0
,
DTYPE
beta
=
0
);
void
_MatrixMULBatchedCPU
(
const
XList
*
a
,
MATRIX_TRANS_TYPE
transposedA
,
const
XList
*
b
,
MATRIX_TRANS_TYPE
transposedB
,
XList
*
c
,
DTYPE
alpha
=
(
DTYPE
)
1
.
0
,
DTYPE
beta
=
0
);
}
// namespace nts(NiuTrans.Tensor)
...
...
source/tensor/core/arithmetic/MatrixMul.h
查看文件 @
906eebb7
source/tensor/core/arithmetic/MatrixMul2D.cpp
查看文件 @
906eebb7
source/tensor/core/arithmetic/MatrixMul2D.cu
查看文件 @
906eebb7
...
...
@@ -123,8 +123,7 @@ where trans() return the transposed matrix if the flag is fired
*/
void _CudaMatrixMul2D(const XTensor * a, MATRIX_TRANS_TYPE transposedA,
const XTensor * b, MATRIX_TRANS_TYPE transposedB,
XTensor * c,
DTYPE alpha, DTYPE beta, XStream * stream)
XTensor * c, DTYPE alpha, DTYPE beta, XStream * stream)
{
int an = transposedA == X_TRANS ? a->dimSize[1] : a->dimSize[0];
int am = transposedA == X_TRANS ? a->dimSize[0] : a->dimSize[1];
...
...
source/tensor/core/arithmetic/MatrixMul2D.cuh
查看文件 @
906eebb7
source/tensor/core/arithmetic/MatrixMul2DParallel.cpp
查看文件 @
906eebb7
source/tensor/core/arithmetic/MatrixMul2DParallel.h
查看文件 @
906eebb7
...
...
@@ -32,8 +32,8 @@ c = trans(a) * trans(b) * alpha + c * beta
where trans() return the transposed matrix if the flag is fired.
*/
extern
"C"
void
_MatrixMul2DParallel
(
const
XTensor
*
a
,
MATRIX_TRANS_TYPE
transposedA
,
const
XTensor
*
b
,
MATRIX_TRANS_TYPE
transposedB
,
XTensor
*
c
,
DTYPE
alpha
=
(
DTYPE
)
1
.
0
,
DTYPE
beta
=
0
,
XPRunner
*
parallelRunner
=
NULL
);
void
_MatrixMul2DParallel
(
const
XTensor
*
a
,
MATRIX_TRANS_TYPE
transposedA
,
const
XTensor
*
b
,
MATRIX_TRANS_TYPE
transposedB
,
XTensor
*
c
,
DTYPE
alpha
=
(
DTYPE
)
1
.
0
,
DTYPE
beta
=
0
,
XPRunner
*
parallelRunner
=
NULL
);
}
// namespace nts(NiuTrans.Tensor)
...
...
source/tensor/core/arithmetic/MatrixMulBatched.cpp
查看文件 @
906eebb7
...
...
@@ -47,8 +47,7 @@ where trans() returns the transposed matrix if the flag is fired
*/
void
_MatrixMulBatched
(
const
XTensor
*
a
,
MATRIX_TRANS_TYPE
transposedA
,
const
XTensor
*
b
,
MATRIX_TRANS_TYPE
transposedB
,
XTensor
*
c
,
DTYPE
alpha
,
DTYPE
beta
,
XPRunner
*
parallelRunner
)
XTensor
*
c
,
DTYPE
alpha
,
DTYPE
beta
,
XPRunner
*
parallelRunner
)
{
CheckNTErrors
((
a
&&
b
&&
c
),
"Empty input tensors!"
);
CheckNTErrors
((
a
->
dataType
==
b
->
dataType
&&
a
->
dataType
==
c
->
dataType
),
...
...
@@ -169,14 +168,12 @@ where trans() returns the transposed matrix if the flag is fired.
>> b - tensor b
>> transposedB - indicates whether teh matrices in b are transposed
>> alpha - a coefficient
>> beta - another coefficient
>> parallelRunner - parallel processing module
<< return - the result of matrix multiplication of the two tensors
*/
XTensor
MatrixMulBatched
(
const
XTensor
&
a
,
MATRIX_TRANS_TYPE
transposedA
,
const
XTensor
&
b
,
MATRIX_TRANS_TYPE
transposedB
,
DTYPE
alpha
,
XPRunner
*
parallelRunner
)
{
CheckNTErrors
(
&
a
!=
&
NULLTensor
&&
&
b
!=
&
NULLTensor
,
"Empty input tensors!"
);
CheckNTErrors
(
a
.
dataType
==
b
.
dataType
,
"Input tensors should have the same data type!"
);
CheckNTErrors
(
a
.
order
>=
2
&&
b
.
order
>=
2
,
"Input tensors must have a order >= 2!"
);
CheckNTErrors
(
a
.
order
==
b
.
order
,
"Input tensor and output tensor must have same order!"
);
...
...
@@ -191,13 +188,13 @@ XTensor MatrixMulBatched(const XTensor &a, MATRIX_TRANS_TYPE transposedA, const
int
order
=
a
.
order
;
int
sub
=
0
;
int
*
dimSize
=
new
int
[
order
];
for
(
int
i
=
2
;
i
<
a
.
order
;
i
++
)
dimSize
[
sub
++
]
=
a
.
dimSize
RDI
[
i
];
for
(
int
i
=
0
;
i
<
a
.
order
-
2
;
i
++
)
dimSize
[
sub
++
]
=
a
.
dimSize
[
i
];
dimSize
[
sub
++
]
=
an
;
dimSize
[
sub
++
]
=
bm
;
XTensor
c
=
NewTensor
(
order
,
dimSize
,
a
.
dataType
,
a
.
denseRatio
,
a
.
devID
,
a
.
mem
);
c
.
SetZeroAll
(
);
float
dr
=
(
!
a
.
isSparse
||
!
b
.
isSparse
)
?
1.0
F
:
MAX
(
a
.
denseRatio
,
b
.
denseRatio
);
XTensor
c
(
order
,
dimSize
,
a
.
dataType
,
dr
,
a
.
devID
,
a
.
mem
);
c
.
SetTMP
();
/*call _MatrixMulBatched function */
...
...
source/tensor/core/arithmetic/Negate.cpp
查看文件 @
906eebb7
...
...
@@ -20,6 +20,7 @@
*/
#include "../../XTensor.h"
#include "../../XName.h"
#include "Negate.h"
#include "Negate.cuh"
...
...
@@ -27,21 +28,55 @@ namespace nts { // namespace nts(NiuTrans.Tensor)
/*
set every entry to its minus value
>> a - the tensor we are processing
>> a - input tensor we are processing
>> b - output tensor we are processing
*/
void
_Negate
(
XTensor
*
a
)
void
_Negate
(
const
XTensor
*
a
,
XTensor
*
b
)
{
#ifdef USE_CUDA
/* run it on GPUs */
if
(
a
->
devID
>=
0
)
{
_CudaNegate
(
a
);
_CudaNegate
(
a
,
b
);
return
;
}
}
#endif
CheckNTErrors
((
XTensor
::
IsIdentical
(
a
,
b
)),
"Input tensors should have the same type!"
);
CheckNTErrors
((
a
->
dataType
==
DEFAULT_DTYPE
),
"TODO!"
);
DTYPE
*
d
=
(
DTYPE
*
)
a
->
data
;
DTYPE
*
db
=
(
DTYPE
*
)
b
->
data
;
for
(
int
i
=
0
;
i
<
a
->
unitNum
;
i
++
)
d
[
i
]
=
-
d
[
i
];
d
b
[
i
]
=
-
d
[
i
];
}
/*
set every entry to its minus value (do it on site)
keep the result in the input tensor a and return nothing
>> a - the tensor we are processing
*/
void
_NegateMe
(
XTensor
*
a
)
{
_Negate
(
a
,
a
);
}
/*
set every entry to its minus value (return a XTensor structure)
make a new tensor to keep the result and return it
>> a - input tensor we are processing
<< return - the minus value of input tensor
*/
XTensor
Negate
(
const
XTensor
&
a
)
{
XTensor
b
(
&
a
);
b
.
SetTMP
();
/* call _Negate function */
_Negate
(
&
a
,
&
b
);
/* tensor connections */
XLink
::
MakeLink
(
&
a
,
NULL
,
&
b
,
MATH_NEGATE
);
return
b
;
}
}
//
namespace
nts
(
NiuTrans
.
Tensor
)
\ No newline at end of file
source/tensor/core/arithmetic/Negate.cu
查看文件 @
906eebb7
...
...
@@ -29,45 +29,49 @@ namespace nts { // namespace nts(NiuTrans.Tensor)
#ifdef USE_CUDA
/*
set each entry to its negtive value (CUDA Kernel)
>> d - pointer to the data array
>> a - pointer to the input data array
>> b - pointer to the output data array
>> size - size of the data array
*/
__global__
void KernelNegate(DTYPE *
d
, int size)
void KernelNegate(DTYPE *
a, DTYPE * b
, int size)
{
int i = blockDim.x * blockIdx.x + threadIdx.x;
if (i < size)
d[i] = -d
[i];
b[i] = -a
[i];
}
/*
set each entry to its negtive value (CUDA Kernel)
This is for float16 computation
>> d - pointer to the data array
>> a - pointer to the input data array
>> b - pointer to the output data array
>> size - size of the data array
*/
__global__
void KernelNegate(__half *
d
, int size)
void KernelNegate(__half *
a, __half * b
, int size)
{
int i = blockDim.x * blockIdx.x + threadIdx.x;
#if __CUDA_ARCH__ >= 530 || !defined(__CUDA_ARCH__)
if (i < size)
d[i] = __hsub(__float2half(0), d
[i]);
b[i] = __hsub(__float2half(0), a
[i]);
#else
if (i < size)
d[i] = __float2half(-__half2float(d
[i]));
b[i] = __float2half(-__half2float(a
[i]));
#endif
}
/*
set each entry to its negtive value
>> a - the tensor
>> a - input tensor
>> b - output tensor
*/
extern "C"
void _CudaNegate(
XTensor * a
)
void _CudaNegate(
const XTensor * a, XTensor * b
)
{
CheckNTErrors((XTensor::IsIdentical(a, b)), "Input tensors should have the same type!");
CheckNTErrors((a->isSparse == false), "TODO!");
int gridSize[3];
...
...
@@ -82,10 +86,10 @@ void _CudaNegate(XTensor * a)
ProtectCudaDev(a->devID, devIDBackup);
if (a->dataType == DEFAULT_DTYPE) {
KernelNegate << <blocks, threads >> >((DTYPE*)a->data, a->unitNum);
KernelNegate << <blocks, threads >> >((DTYPE*)a->data,
(DTYPE*)b->data,
a->unitNum);
}
else if (a->dataType == X_FLOAT16) {
KernelNegate << <blocks, threads >> >((__half*)a->data, a->unitNum);
KernelNegate << <blocks, threads >> >((__half*)a->data,
(__half*)b->data,
a->unitNum);
}
else {
ShowNTErrors("TODO!");
...
...
source/tensor/core/arithmetic/Negate.cuh
查看文件 @
906eebb7
...
...
@@ -30,15 +30,15 @@ namespace nts { // namespace nts(NiuTrans.Tensor)
/* set each entry to its negtive value (CUDA Kernel) */
__global__
void KernelNegate(DTYPE *
d
, int size);
void KernelNegate(DTYPE *
a, DTYPE * b
, int size);
/* set each entry to its negtive value (CUDA Kernel) with float16 data type*/
__global__
void KernelNegate(__half *
d
, int size);
void KernelNegate(__half *
a, __half * b
, int size);
/* set each entry to its negtive value */
extern "C"
void _CudaNegate(
XTensor * a
);
void _CudaNegate(
const XTensor * a, XTensor * b
);
#endif // USE_CUDA
...
...
source/tensor/core/arithmetic/Negate.h
查看文件 @
906eebb7
...
...
@@ -27,8 +27,19 @@
namespace
nts
{
// namespace nts(NiuTrans.Tensor)
/* set every entry to its minus value */
extern
"C"
void
_Negate
(
XTensor
*
a
);
void
_Negate
(
const
XTensor
*
a
,
XTensor
*
b
);
/*
set every entry to its minus value (do it on site)
keep the result in the input tensor a and return nothing
*/
void
_NegateMe
(
XTensor
*
a
);
/*
set every entry to its minus value (return a XTensor structure)
make a new tensor to keep the result and return it
*/
XTensor
Negate
(
const
XTensor
&
a
);
}
// namespace nts(NiuTrans.Tensor)
...
...
source/tensor/core/arithmetic/Sign.cpp
查看文件 @
906eebb7
...
...
@@ -20,6 +20,7 @@
*/
#include "../../XTensor.h"
#include "../../XName.h"
#include "Sign.h"
#include "Sign.cuh"
...
...
@@ -27,27 +28,60 @@ namespace nts { // namespace nts(NiuTrans.Tensor)
/*
set every entry to its sign value
>> a - the tensor we are processing
>> a - input tensor we are processing
>> b - output tensor we are processing
*/
void
_Sign
(
XTensor
*
a
)
void
_Sign
(
const
XTensor
*
a
,
XTensor
*
b
)
{
#ifdef USE_CUDA
/* run it on GPUs */
if
(
a
->
devID
>=
0
)
{
_CudaSign
(
a
);
_CudaSign
(
a
,
b
);
return
;
}
#endif
CheckNTErrors
((
XTensor
::
IsIdentical
(
a
,
b
)),
"Input tensors should have the same type!"
);
CheckNTErrors
((
a
->
dataType
==
DEFAULT_DTYPE
),
"TODO!"
);
DTYPE
*
d
=
(
DTYPE
*
)
a
->
data
;
DTYPE
*
db
=
(
DTYPE
*
)
b
->
data
;
for
(
int
i
=
0
;
i
<
a
->
unitNum
;
i
++
)
{
if
(
d
[
i
]
>
0
)
d
[
i
]
=
1.0
F
;
d
b
[
i
]
=
1.0
F
;
else
if
(
d
[
i
]
==
0
)
d
[
i
]
=
0.0
F
;
d
b
[
i
]
=
0.0
F
;
else
d
[
i
]
=
-
1.0
F
;
d
b
[
i
]
=
-
1.0
F
;
}
}
/*
set every entry to its sign value (do it on site)
keep the result in the input tensor a and return nothing
>> a - the tensor we are processing
*/
void
_SignMe
(
XTensor
*
a
)
{
_Sign
(
a
,
a
);
}
/*
set every entry to its sign value (return a XTensor structure)
make a new tensor to keep the result and return it
>> a - input tensor we are processing
<< return - the sign value of the input tensor
*/
XTensor
Sign
(
const
XTensor
&
a
)
{
XTensor
b
(
&
a
);
b
.
SetTMP
();
/* call _ScaleAndShift function */
_Sign
(
&
a
,
&
b
);
/* tensor connections */
XLink
::
MakeLink
(
&
a
,
NULL
,
&
b
,
MATH_SIGN
);
return
b
;
}
}
//
namespace
nts
(
NiuTrans
.
Tensor
)
\ No newline at end of file
source/tensor/core/arithmetic/Sign.cu
查看文件 @
906eebb7
...
...
@@ -29,43 +29,47 @@ namespace nts { // namespace nts(NiuTrans.Tensor)
#ifdef USE_CUDA
/*
set each entry to its sign value (CUDA Kernel)
>> d - pointer to the data array
>> a - pointer to input data array
>> b - pointer to output data array
>> size - size of the data array
*/
__global__
void KernelSign(DTYPE *
d
, int size)
void KernelSign(DTYPE *
a, DTYPE * b
, int size)
{
int i = blockDim.x * blockIdx.x + threadIdx.x;
if (i < size) {
if (
d
[i] > 0)
d
[i] = 1.0F;
else if (
d
[i] == 0)
d
[i] = 0.0F;
if (
a
[i] > 0)
b
[i] = 1.0F;
else if (
a
[i] == 0)
b
[i] = 0.0F;
else
d
[i] = -1.0F;
b
[i] = -1.0F;
}
}
/*
set each entry to its sign value (CUDA Kernel)
set each entry to its sign value
with float16 data type value
(CUDA Kernel)
This is for float16 computation
>> d - pointer to the data array
>> a - pointer to input data array
>> b - pointer to output data array
>> size - size of the data array
*/
__global__
void KernelSign(__half *
d
, int size)
void KernelSign(__half *
a, __half * b
, int size)
{
return;
}
/*
set each entry to its with float16 data type value
>> a - the tensor
set each entry to its sign value
>> a - input tensor we are processing
>> b - output tensor we are processing
*/
extern "C"
void _CudaSign(
XTensor * a
)
void _CudaSign(
const XTensor * a, XTensor * b
)
{
CheckNTErrors((XTensor::IsIdentical(a, b)), "Input tensors should have the same type!");
CheckNTErrors((a->isSparse == false), "TODO!");
int gridSize[3];
...
...
@@ -80,10 +84,10 @@ void _CudaSign(XTensor * a)
ProtectCudaDev(a->devID, devIDBackup);
if (a->dataType == DEFAULT_DTYPE) {
KernelSign << <blocks, threads >> >((DTYPE*)a->data, a->unitNum);
KernelSign << <blocks, threads >> >((DTYPE*)a->data,
(DTYPE*)b->data,
a->unitNum);
}
else if (a->dataType == X_FLOAT16) {
KernelSign << <blocks, threads >> >((__half*)a->data, a->unitNum);
KernelSign << <blocks, threads >> >((__half*)a->data,
(__half*)b->data,
a->unitNum);
}
else {
ShowNTErrors("TODO!");
...
...
source/tensor/core/arithmetic/Sign.cuh
查看文件 @
906eebb7
...
...
@@ -30,15 +30,15 @@ namespace nts { // namespace nts(NiuTrans.Tensor)
/* set each entry to its sign value (CUDA Kernel) */
__global__
void KernelSign(DTYPE *
d
, int size);
void KernelSign(DTYPE *
a, DTYPE * b
, int size);
/* set each entry to its sign value (CUDA Kernel) with float16 data type*/
__global__
void KernelSign(__half *
d
, int size);
void KernelSign(__half *
a, __half * b
, int size);
/* set each entry to its sign value */
extern "C"
void _CudaSign(
XTensor * a
);
void _CudaSign(
const XTensor * a, XTensor * b
);
#endif // USE_CUDA
...
...
source/tensor/core/arithmetic/Sign.h
查看文件 @
906eebb7
...
...
@@ -27,8 +27,19 @@
namespace
nts
{
// namespace nts(NiuTrans.Tensor)
/* set every entry to its sign value */
extern
"C"
void
_Sign
(
XTensor
*
a
);
void
_Sign
(
const
XTensor
*
a
,
XTensor
*
b
);
/*
set every entry to its sign value (do it on site)
keep the result in the input tensor a and return nothing
*/
void
_SignMe
(
XTensor
*
a
);
/*
set every entry to its sign value (return a XTensor structure)
make a new tensor to keep the result and return it
*/
XTensor
Sign
(
const
XTensor
&
a
);
}
// namespace nts(NiuTrans.Tensor)
...
...
source/tensor/core/arithmetic/XTensorBLAS.cpp
查看文件 @
906eebb7
source/tensor/core/arithmetic/XTensorBLAS.cu
查看文件 @
906eebb7
source/tensor/core/arithmetic/XTensorBLAS.h
查看文件 @
906eebb7
...
...
@@ -28,7 +28,8 @@ namespace nts { // namespace nts(NiuTrans.Tensor)
/* matrix multiplication (BLAS) */
extern
"C"
void
_MatrixMULCPU
(
const
XTensor
*
a
,
MATRIX_TRANS_TYPE
transposedA
,
const
XTensor
*
b
,
MATRIX_TRANS_TYPE
transposedB
,
XTensor
*
c
,
DTYPE
alpha
=
(
DTYPE
)
1
.
0
,
DTYPE
beta
=
0
);
void
_MatrixMULCPU
(
const
XTensor
*
a
,
MATRIX_TRANS_TYPE
transposedA
,
const
XTensor
*
b
,
MATRIX_TRANS_TYPE
transposedB
,
XTensor
*
c
,
DTYPE
alpha
=
(
DTYPE
)
1
.
0
,
DTYPE
beta
=
0
);
#ifdef USE_CUDA
...
...
@@ -46,7 +47,8 @@ void _CudaBLASMatrixMULBatched(cublasHandle_t * handle,
const
void
**
a
,
MATRIX_TRANS_TYPE
transposedA
,
TENSOR_DATA_TYPE
dataTypeA
,
const
void
**
b
,
MATRIX_TRANS_TYPE
transposedB
,
TENSOR_DATA_TYPE
dataTypeB
,
void
**
c
,
TENSOR_DATA_TYPE
dataTypeC
,
int
count
,
int
na
,
int
ma
,
int
nb
,
int
mb
,
int
nc
,
int
mc
,
DTYPE
alpha
=
(
DTYPE
)
1
.
0
,
DTYPE
beta
=
1
.
0
);
int
count
,
int
na
,
int
ma
,
int
nb
,
int
mb
,
int
nc
,
int
mc
,
DTYPE
alpha
=
(
DTYPE
)
1
.
0
,
DTYPE
beta
=
1
.
0
);
/* matrix multiplication in batch and strided mode via cuda version BLAS */
extern
"C"
...
...
@@ -54,11 +56,13 @@ void _CudaBLASMatrixMULBatchedStrided(cublasHandle_t * handle,
const
void
*
a
,
MATRIX_TRANS_TYPE
transposedA
,
TENSOR_DATA_TYPE
dataTypeA
,
long
long
int
strideA
,
const
void
*
b
,
MATRIX_TRANS_TYPE
transposedB
,
TENSOR_DATA_TYPE
dataTypeB
,
long
long
int
strideB
,
void
*
c
,
TENSOR_DATA_TYPE
dataTypeC
,
long
long
int
strideC
,
int
count
,
int
na
,
int
ma
,
int
nb
,
int
mb
,
int
nc
,
int
mc
,
DTYPE
alpha
=
(
DTYPE
)
1
.
0
,
DTYPE
beta
=
1
.
0
);
int
count
,
int
na
,
int
ma
,
int
nb
,
int
mb
,
int
nc
,
int
mc
,
DTYPE
alpha
=
(
DTYPE
)
1
.
0
,
DTYPE
beta
=
1
.
0
);
/* matrix multiplication in batch mode via cuda version BLAS */
extern
"C"
void
_CudaBLASMatrixMULList
(
cublasHandle_t
*
handle
,
const
XList
*
a
,
MATRIX_TRANS_TYPE
transposedA
,
const
XList
*
b
,
MATRIX_TRANS_TYPE
transposedB
,
XList
*
c
,
void
_CudaBLASMatrixMULList
(
cublasHandle_t
*
handle
,
const
XList
*
a
,
MATRIX_TRANS_TYPE
transposedA
,
const
XList
*
b
,
MATRIX_TRANS_TYPE
transposedB
,
XList
*
c
,
int
count
,
DTYPE
alpha
=
(
DTYPE
)
1
.
0
,
DTYPE
beta
=
1
.
0
);
#endif
...
...
source/tensor/core/getandset/Select.cpp
查看文件 @
906eebb7
...
...
@@ -96,7 +96,6 @@ XTensor SelectRange(const XTensor &a, int dim, int low, int high)
int
order
=
a
.
order
;
int
*
dimSize
=
new
int
[
order
];
CheckNTErrors
(
&
a
!=
NULL
,
"Empty input tensors!"
);
CheckNTErrors
(
dim
>=
0
&&
dim
<
a
.
order
,
"The input dimension is out of bounds!"
);
CheckNTErrors
(
low
<
high
,
"Illegal range specified!"
);
...
...
@@ -110,8 +109,8 @@ XTensor SelectRange(const XTensor &a, int dim, int low, int high)
dimSize
[
i
]
=
a
.
dimSize
[
i
];
}
XTensor
c
=
NewTensor
(
order
,
dimSize
,
a
.
dataType
,
a
.
denseRatio
,
a
.
devID
,
a
.
mem
)
;
c
.
SetZeroAll
(
);
float
dr
=
(
!
a
.
isSparse
)
?
1.0
F
:
a
.
denseRatio
;
XTensor
c
(
order
,
dimSize
,
a
.
dataType
,
dr
,
a
.
devID
,
a
.
mem
);
c
.
SetTMP
();
/* call _SelectRange function */
...
...
source/tensor/core/math/Log.cpp
查看文件 @
906eebb7
...
...
@@ -20,6 +20,7 @@
*/
#include "../../XTensor.h"
#include "../../XName.h"
#include "Log.h"
#include "Log.cuh"
#include <math.h>
...
...
@@ -27,22 +28,55 @@
namespace
nts
{
// namespace nts(NiuTrans.Tensor)
/*
set every entry to its log value
>> a - the tensor we are processing
set every entry to its log value (do it on site)
>> a - input tensor we are processing
>> b - output tensor we are processing
*/
void
_Log
(
XTensor
*
a
)
void
_Log
(
const
XTensor
*
a
,
XTensor
*
b
)
{
#ifdef USE_CUDA
/* run it on GPUs */
if
(
a
->
devID
>=
0
)
{
_CudaLog
(
a
);
_CudaLog
(
a
,
b
);
return
;
}
}
#endif
CheckNTErrors
((
XTensor
::
IsIdentical
(
a
,
b
)),
"Input tensors should have the same type!"
);
CheckNTErrors
((
a
->
dataType
==
DEFAULT_DTYPE
),
"TODO!"
);
DTYPE
*
d
=
(
DTYPE
*
)
a
->
data
;
DTYPE
*
db
=
(
DTYPE
*
)
b
->
data
;
for
(
int
i
=
0
;
i
<
a
->
unitNum
;
i
++
)
d
[
i
]
=
(
DTYPE
)
log
(
d
[
i
]);
db
[
i
]
=
(
DTYPE
)
log
(
d
[
i
]);
}
/*
set every entry to its log value
keep the result in the input tensor a and return nothing
>> a - the tensor we are processing
*/
void
_LogMe
(
XTensor
*
a
)
{
_Log
(
a
,
a
);
}
/*
set every entry to its log value (return a XTensor structure)
make a new tensor to keep the result and return it
>> a - input tensor we are processing
<< return - the log value of the input tensor
*/
XTensor
Log
(
const
XTensor
&
a
)
{
XTensor
b
(
&
a
);
b
.
SetTMP
();
/* call _Log function */
_Log
(
&
a
,
&
b
);
/* tensor connections */
XLink
::
MakeLink
(
&
a
,
NULL
,
&
b
,
MATH_LOG
);
return
b
;
}
}
//
namespace
nts
(
NiuTrans
.
Tensor
)
\ No newline at end of file
source/tensor/core/math/Log.cu
查看文件 @
906eebb7
...
...
@@ -29,37 +29,41 @@ namespace nts { // namespace nts(NiuTrans.Tensor)
#ifdef USE_CUDA
/*
set each entry to its log value (CUDA Kernel)
>> d - pointer to the data array
>> a - pointer to input data array
>> b - pointer to output data array
>> size - size of the data array
*/
__global__
void KernelLog(DTYPE *
d
, int size)
void KernelLog(DTYPE *
a, DTYPE * b
, int size)
{
int i = blockDim.x * blockIdx.x + threadIdx.x;
if (i < size)
d[i] = log(d
[i]);
b[i] = log(a
[i]);
}
/*
set each entry to its log value (CUDA Kernel)
This is for float16 computation
>> d - pointer to the data array
>> a - pointer to input data array
>> b - pointer to output data array
>> size - size of the data array
*/
__global__
void KernelLog(__half *
d
, int size)
void KernelLog(__half *
a, __half * b
, int size)
{
return;
}
/*
set each entry to its log value
>> a - the tensor
>> a - input tensor
>> b - output tensor
*/
extern "C"
void _CudaLog(
XTensor * a
)
void _CudaLog(
const XTensor * a, XTensor * b
)
{
CheckNTErrors((XTensor::IsIdentical(a, b)), "Input tensors should have the same type!");
CheckNTErrors((a->isSparse == false), "TODO!");
int gridSize[3];
...
...
@@ -74,10 +78,10 @@ void _CudaLog(XTensor * a)
ProtectCudaDev(a->devID, devIDBackup);
if (a->dataType == DEFAULT_DTYPE) {
KernelLog << <blocks, threads >> >((DTYPE*)a->data, a->unitNum);
KernelLog << <blocks, threads >> >((DTYPE*)a->data,
(DTYPE*)b->data,
a->unitNum);
}
else if (a->dataType == X_FLOAT16) {
KernelLog << <blocks, threads >> >((__half*)a->data, a->unitNum);
KernelLog << <blocks, threads >> >((__half*)a->data,
(__half*)b->data,
a->unitNum);
}
else {
ShowNTErrors("TODO!");
...
...
source/tensor/core/math/Log.cuh
查看文件 @
906eebb7
...
...
@@ -30,15 +30,15 @@ namespace nts { // namespace nts(NiuTrans.Tensor)
/* set each entry to its log value (CUDA Kernel) */
__global__
void KernelLog(DTYPE *
d
, int size);
void KernelLog(DTYPE *
a, DTYPE * b
, int size);
/* set each entry to its log value (CUDA Kernel) with float16 data type*/
__global__
void KernelLog(__half *
d
, int size);
void KernelLog(__half *
a, __half * b
, int size);
/* set each entry to its log value */
extern "C"
void _CudaLog(
XTensor * a
);
void _CudaLog(
const XTensor * a, XTensor * b
);
#endif // USE_CUDA
...
...
source/tensor/core/math/Log.h
查看文件 @
906eebb7
...
...
@@ -27,8 +27,19 @@
namespace
nts
{
// namespace nts(NiuTrans.Tensor)
/* set every entry to its log value */
extern
"C"
void
_Log
(
XTensor
*
a
);
void
_Log
(
const
XTensor
*
a
,
XTensor
*
b
);
/*
set every entry to its log value (do it on site)
keep the result in the input tensor a and return nothing
*/
void
_LogMe
(
XTensor
*
a
);
/*
set every entry to its log value (return a XTensor structure)
make a new tensor to keep the result and return it
*/
XTensor
Log
(
const
XTensor
&
a
);
}
// namespace nts(NiuTrans.Tensor)
...
...
source/tensor/core/math/Power.cpp
查看文件 @
906eebb7
...
...
@@ -21,6 +21,7 @@
#include <math.h>
#include "../../XTensor.h"
#include "../../XName.h"
#include "Power.h"
#include "Power.cuh"
...
...
@@ -28,38 +29,73 @@ namespace nts { // namespace nts(NiuTrans.Tensor)
/*
get the power(a, p)
>> a - the tensor
>> p - as it is
>> a - input tensor
>> b - output tensor
>> p - parameter
*/
void
_Power
(
XTensor
*
a
,
DTYPE
p
)
void
_Power
(
const
XTensor
*
a
,
XTensor
*
b
,
DTYPE
p
)
{
#ifdef USE_CUDA
/* run it on GPUs */
if
(
a
->
devID
>=
0
)
{
_CudaPower
(
a
,
p
);
_CudaPower
(
a
,
b
,
p
);
return
;
}
#endif
CheckNTErrors
((
a
->
dataType
==
DEFAULT_DTYPE
),
"TODO!"
);
DTYPE
*
d
=
(
DTYPE
*
)
a
->
data
;
DTYPE
*
aData
=
(
DTYPE
*
)
a
->
data
;
DTYPE
*
bData
=
(
DTYPE
*
)
b
->
data
;
if
(
p
==
0
)
{
for
(
int
i
=
0
;
i
<
a
->
unitNum
;
i
++
)
d
[
i
]
=
(
DTYPE
)
1.0
;
bData
[
i
]
=
(
DTYPE
)
1.0
;
}
else
if
(
p
==
(
DTYPE
)
0.5
)
{
for
(
int
i
=
0
;
i
<
a
->
unitNum
;
i
++
)
d
[
i
]
=
(
DTYPE
)
sqrt
(
d
[
i
]);
bData
[
i
]
=
(
DTYPE
)
sqrt
(
aData
[
i
]);
}
else
if
(
p
==
(
DTYPE
)
2.0
)
{
for
(
int
i
=
0
;
i
<
a
->
unitNum
;
i
++
)
d
[
i
]
=
d
[
i
]
*
d
[
i
];
bData
[
i
]
=
aData
[
i
]
*
aData
[
i
];
}
else
{
for
(
int
i
=
0
;
i
<
a
->
unitNum
;
i
++
)
d
[
i
]
=
(
DTYPE
)
pow
(
d
[
i
],
p
);
bData
[
i
]
=
(
DTYPE
)
pow
(
aData
[
i
],
p
);
}
}
/*
get the power(a, p) (do it on site)
keep the result in the input tensor a and return nothing
>> a - the tensor
>> p - parameter
*/
void
_PowerMe
(
XTensor
*
a
,
DTYPE
p
)
{
_Power
(
a
,
a
,
p
);
}
/*
get the power(a, p) (return a XTensor structure)
make a new tensor to keep the result and return it
>> a - input tensor
>> p - parameter
<< return - the power value of the input tensor
*/
XTensor
Power
(
const
XTensor
&
a
,
DTYPE
p
)
{
XTensor
b
(
&
a
);
b
.
SetTMP
();
/* call _Power function */
_Power
(
&
a
,
&
b
,
p
);
/* tensor connections */
XLink
::
MakeLink
(
&
a
,
NULL
,
&
b
,
MATH_POWER
);
XLink
::
AddParamToHead
(
&
b
,
p
);
return
b
;
}
}
// namespace nts(NiuTrans.Tensor)
source/tensor/core/math/Power.cu
查看文件 @
906eebb7
...
...
@@ -21,6 +21,7 @@
#include "../../XDevice.h"
#include "../../XTensor.h"
#include "../movement/CopyValues.cuh"
#include "Power.h"
#include "Power.cuh"
...
...
@@ -30,74 +31,80 @@ namespace nts { // namespace nts(NiuTrans.Tensor)
/*
set all entries to its root (CUDA Kernel)
>> d - data array
>> a - input data array
>> b - output data array
>> size - size of the data array
*/
__global__
void KernelSqrtV2(DTYPE *
d
, int size)
void KernelSqrtV2(DTYPE *
a, DTYPE * b
, int size)
{
int i = blockDim.x * blockIdx.x + threadIdx.x;
if (i < size)
d[i] = sqrt(d
[i]);
b[i] = sqrt(a
[i]);
}
/*
set all entries to its root (CUDA Kernel)
>> d - data array
>> a - input data array
>> b - output data array
>> size - size of the data array
*/
__global__
void KernelSqrtV2(__half *
d
, int size)
void KernelSqrtV2(__half *
a, __half * b
, int size)
{
int i = blockDim.x * blockIdx.x + threadIdx.x;
#if __CUDA_ARCH__ >= 530 || !defined(__CUDA_ARCH__)
if (i < size)
d[i] = hsqrt(d
[i]);
b[i] = hsqrt(a
[i]);
#else
if (i < size)
d[i] = __float2half(sqrt(__half2float(d
[i])));
b[i] = __float2half(sqrt(__half2float(a
[i])));
#endif
}
/*
get power(d[i], p)
>> d - data array
>> a - input data array
>> b - output data array
>> p - power
>> size - size of the data array
*/
__global__
void KernelPower(DTYPE *
d
, DTYPE p, int size)
void KernelPower(DTYPE *
a, DTYPE * b
, DTYPE p, int size)
{
int i = blockDim.x * blockIdx.x + threadIdx.x;
if (i < size)
d[i] = pow(d
[i], p);
b[i] = pow(a
[i], p);
}
/*
get power(d[i], p)
>> d - data array
>> a - input data array
>> b - output data array
>> p - power
>> size - size of the data array
*/
__global__
void KernelPower(__half *
d
, __half p, int size)
void KernelPower(__half *
a, __half * b
, __half p, int size)
{
#if __CUDA_ARCH__ >= 530 || !defined(__CUDA_ARCH__)
#else
int i = blockDim.x * blockIdx.x + threadIdx.x;
if (i < size)
d[i] = __float2half(pow(__half2float(d
[i]), __half2float(p)));
b[i] = __float2half(pow(__half2float(a
[i]), __half2float(p)));
#endif
}
/* get the power of the entries */
extern "C"
void _CudaPower(
XTensor * a
, DTYPE p)
void _CudaPower(
const XTensor * a, XTensor * b
, DTYPE p)
{
CheckNTErrors((XTensor::IsIdentical(a, b)), "Input tensors should have the same type!");
int gridSize[3];
int blockSize[3];
...
...
@@ -111,15 +118,18 @@ void _CudaPower(XTensor * a, DTYPE p)
if (a->dataType == DEFAULT_DTYPE) {
if (p == (DTYPE)0.5) {
KernelSqrtV2 << <blocks, threads >> >((DTYPE*)a->data, a->unitNum);
KernelSqrtV2 << <blocks, threads >> >((DTYPE*)a->data, (DTYPE*)b->data, a->unitNum);
}
else if (p == (DTYPE)1.0) {
_CudaCopyValues(a, b);
}
else if (p != (DTYPE)1.0) {
KernelPower << <blocks, threads >> >((DTYPE*)a->data, p, a->unitNum);
KernelPower << <blocks, threads >> >((DTYPE*)a->data,
(DTYPE*)b->data,
p, a->unitNum);
}
}
else if (a->dataType == X_FLOAT16) {
if (p == (DTYPE)0.5) {
KernelSqrtV2 << <blocks, threads >> >((__half*)a->data, a->unitNum);
KernelSqrtV2 << <blocks, threads >> >((__half*)a->data,
(__half*)b->data,
a->unitNum);
}
else if (p != (DTYPE)1.0) {
ShowNTErrors("TODO!");
...
...
source/tensor/core/math/Power.cuh
查看文件 @
906eebb7
...
...
@@ -30,15 +30,15 @@ namespace nts { // namespace nts(NiuTrans.Tensor)
/* set all entries to its root (CUDA Kernel) */
__global__
void KernelSqrtV2(DTYPE *
d
, int size);
void KernelSqrtV2(DTYPE *
a, DTYPE * b
, int size);
/* set all entries to its root (CUDA Kernel) */
__global__
void KernelSqrtV2(__half *
d
, int size);
void KernelSqrtV2(__half *
a, __half * b
, int size);
/* get the power of the entries */
extern "C"
void _CudaPower(
XTensor * a
, DTYPE p);
void _CudaPower(
const XTensor * a, XTensor * b
, DTYPE p);
#endif // USE_CUDA
...
...
source/tensor/core/math/Power.h
查看文件 @
906eebb7
...
...
@@ -27,8 +27,19 @@
namespace
nts
{
// namespace nts(NiuTrans.Tensor)
/* get the power(x, y) */
extern
"C"
void
_Power
(
XTensor
*
a
,
DTYPE
p
);
void
_Power
(
const
XTensor
*
a
,
XTensor
*
b
,
DTYPE
p
);
/*
get the power(x, y) (do it on site)
keep the result in the input tensor a and return nothing
*/
void
_PowerMe
(
XTensor
*
a
,
DTYPE
p
);
/*
get the power(x, y) (return a XTensor structure)
make a new tensor to keep the result and return it
*/
XTensor
Power
(
const
XTensor
&
a
,
DTYPE
p
);
}
// namespace nts(NiuTrans.Tensor)
...
...
source/tensor/core/movement/CopyIndexed.cpp
查看文件 @
906eebb7
...
...
@@ -110,8 +110,7 @@ make a new tensor to keep the result and return it
*/
XTensor
CopyIndexed
(
const
XTensor
&
s
,
int
dim
,
int
*
srcIndex
,
int
indexSize
,
int
*
tgtIndex
,
int
copyNum
)
{
CheckNTErrors
(
&
s
,
"Empty input tensor!"
);
CheckNTErrors
((
dim
>=
0
&&
dim
<
s
.
order
),
"A too larget dimension specified!"
);
CheckNTErrors
(
dim
>=
0
&&
dim
<
s
.
order
,
"A too larget dimension specified!"
);
int
order
=
s
.
order
;
int
*
dimSize
=
new
int
[
order
];
...
...
@@ -123,16 +122,13 @@ XTensor CopyIndexed(const XTensor &s, int dim, int * srcIndex, int indexSize, in
dimSize
[
i
]
=
s
.
dimSize
[
i
];
}
XTensor
t
=
NewTensor
(
order
,
dimSize
,
s
.
dataType
,
s
.
denseRatio
,
s
.
devID
,
s
.
mem
)
;
t
.
SetZeroAll
(
);
float
dr
=
(
!
s
.
isSparse
)
?
1.0
F
:
s
.
denseRatio
;
XTensor
t
(
order
,
dimSize
,
s
.
dataType
,
dr
,
s
.
devID
,
s
.
mem
);
t
.
SetTMP
();
/* call _CopyIndexed function */
_CopyIndexed
(
&
s
,
&
t
,
dim
,
srcIndex
,
indexSize
,
tgtIndex
,
copyNum
);
/* destroy variables */
delete
[]
dimSize
;
/* tensor connection */
XLink
::
MakeLink
(
&
s
,
NULL
,
&
t
,
MOVEMENT_COPYINDEXED
);
XLink
::
AddParamToHeadInt
(
&
t
,
dim
);
...
...
@@ -141,6 +137,9 @@ XTensor CopyIndexed(const XTensor &s, int dim, int * srcIndex, int indexSize, in
XLink
::
AddParamToHeadPointer
(
&
t
,
tgtIndex
);
XLink
::
AddParamToHeadInt
(
&
t
,
copyNum
);
/* destroy variables */
delete
[]
dimSize
;
return
t
;
}
...
...
source/tensor/core/reduce/ReduceMax.cpp
查看文件 @
906eebb7
...
...
@@ -101,32 +101,31 @@ make a new tensor to keep the result and return it
*/
XTensor
ReduceMax
(
const
XTensor
&
input
,
int
dim
)
{
CheckNTErrors
(
&
input
,
"Empty input or output tensors!"
);
CheckNTErrors
((
dim
>=
0
&&
dim
<
input
.
order
),
"Illegal dimension to reduce!"
);
CheckNTErrors
(
dim
>=
0
&&
dim
<
input
.
order
,
"Illegal dimension to reduce!"
);
int
order
=
input
.
order
-
1
;
int
*
dimSize
=
new
int
[
order
];
for
(
int
i
=
0
;
i
<
input
.
order
;
i
++
){
for
(
int
i
=
0
;
i
<
order
;
i
++
){
if
(
i
<
dim
)
dimSize
[
i
]
=
input
.
dimSize
[
i
];
else
if
(
i
>
dim
)
else
if
(
i
>
=
dim
)
dimSize
[
i
]
=
input
.
dimSize
[
i
+
1
];
}
XTensor
output
=
NewTensor
(
order
,
dimSize
,
input
.
dataType
,
input
.
denseRatio
,
input
.
devID
,
input
.
mem
)
;
output
.
SetZeroAll
(
);
float
dr
=
(
!
input
.
isSparse
)
?
1.0
F
:
input
.
denseRatio
;
XTensor
output
(
order
,
dimSize
,
input
.
dataType
,
dr
,
input
.
devID
,
input
.
mem
);
output
.
SetTMP
();
/* call _ReduceMax function */
_ReduceMax
(
&
input
,
&
output
,
dim
);
/* destroy variables */
delete
[]
dimSize
;
/* tensor connection */
XLink
::
MakeLink
(
&
input
,
NULL
,
&
output
,
REDUCE_REDUCEMAX
);
XLink
::
AddParamToHeadInt
(
&
output
,
dim
);
/* destroy variables */
delete
[]
dimSize
;
return
output
;
}
...
...
source/tensor/core/reduce/ReduceMean.cpp
查看文件 @
906eebb7
...
...
@@ -58,20 +58,19 @@ For a 1-dimensional data array a, mean = (1/n) * sum_i input_i
*/
XTensor
ReduceMean
(
const
XTensor
&
input
,
int
dim
)
{
CheckNTErrors
(
&
input
,
"Empty input or output tensors!"
);
CheckNTErrors
((
dim
>=
0
&&
dim
<
input
.
order
),
"Illegal dimension to reduce!"
);
CheckNTErrors
(
dim
>=
0
&&
dim
<
input
.
order
,
"Illegal dimension to reduce!"
);
int
order
=
input
.
order
-
1
;
int
*
dimSize
=
new
int
[
order
];
for
(
int
i
=
0
;
i
<
input
.
order
;
i
++
){
for
(
int
i
=
0
;
i
<
order
;
i
++
){
if
(
i
<
dim
)
dimSize
[
i
]
=
input
.
dimSize
[
i
];
else
if
(
i
>
dim
)
else
if
(
i
>
=
dim
)
dimSize
[
i
]
=
input
.
dimSize
[
i
+
1
];
}
XTensor
output
=
NewTensor
(
order
,
dimSize
,
input
.
dataType
,
input
.
denseRatio
,
input
.
devID
,
input
.
mem
)
;
output
.
SetZeroAll
(
);
float
dr
=
(
!
input
.
isSparse
)
?
1.0
F
:
input
.
denseRatio
;
XTensor
output
(
order
,
dimSize
,
input
.
dataType
,
dr
,
input
.
devID
,
input
.
mem
);
output
.
SetTMP
();
/* call _ReduceMean function */
...
...
source/tensor/core/reduce/ReduceSum.cpp
查看文件 @
906eebb7
...
...
@@ -214,20 +214,19 @@ sum = \sum_i exp((a_i - shift)^power) if isExp == true
*/
XTensor
ReduceSum
(
const
XTensor
&
input
,
int
dim
,
const
XTensor
&
shift
,
DTYPE
power
,
bool
isExp
)
{
CheckNTErrors
(
&
input
,
"Empty input or output tensors!"
);
CheckNTErrors
((
dim
>=
0
&&
dim
<
input
.
order
),
"Illegal dimension to reduce!"
);
CheckNTErrors
(
dim
>=
0
&&
dim
<
input
.
order
,
"Illegal dimension to reduce!"
);
int
order
=
input
.
order
-
1
;
int
*
dimSize
=
new
int
[
order
];
for
(
int
i
=
0
;
i
<
input
.
order
;
i
++
){
for
(
int
i
=
0
;
i
<
order
;
i
++
){
if
(
i
<
dim
)
dimSize
[
i
]
=
input
.
dimSize
[
i
];
else
if
(
i
>
dim
)
else
if
(
i
>
=
dim
)
dimSize
[
i
]
=
input
.
dimSize
[
i
+
1
];
}
XTensor
output
=
NewTensor
(
order
,
dimSize
,
input
.
dataType
,
input
.
denseRatio
,
input
.
devID
,
input
.
mem
)
;
output
.
SetZeroAll
(
);
float
dr
=
(
!
input
.
isSparse
)
?
1.0
F
:
input
.
denseRatio
;
XTensor
output
(
order
,
dimSize
,
input
.
dataType
,
dr
,
input
.
devID
,
input
.
mem
);
output
.
SetTMP
();
/* call _ReduceSum function */
...
...
@@ -237,6 +236,53 @@ XTensor ReduceSum(const XTensor &input, int dim, const XTensor &shift, DTYPE pow
XLink
::
MakeLink
(
&
input
,
&
shift
,
&
output
,
REDUCE_REDUCESUM
);
XLink
::
AddParamToHeadInt
(
&
output
,
dim
);
XLink
::
AddParamToHead
(
&
output
,
power
);
XLink
::
AddParamToHeadBool
(
&
output
,
isExp
);
/* destroy variables */
delete
[]
dimSize
;
return
output
;
}
/*
sum the items along a dimension of the tensor (return a XTensor structure)
make a new tensor to keep the result and return it
For a 1-dimensional data array a,
sum = \sum_i (a_i)^power if isExp == false
sum = \sum_i exp((a_i)^power) if isExp == true
>> input - the input tensor
>> dim - the dimension where the reduction is performed on
>> ieExp - specify if the exp() is performed
>> power - we perform pow(item_i, power) on each item in the array
<< return - the sum along a dimension of the tensor
*/
XTensor
ReduceSum
(
const
XTensor
&
input
,
int
dim
,
DTYPE
power
,
bool
isExp
)
{
CheckNTErrors
(
dim
>=
0
&&
dim
<
input
.
order
,
"Illegal dimension to reduce!"
);
int
order
=
input
.
order
-
1
;
int
*
dimSize
=
new
int
[
order
];
for
(
int
i
=
0
;
i
<
order
;
i
++
){
if
(
i
<
dim
)
dimSize
[
i
]
=
input
.
dimSize
[
i
];
else
if
(
i
>=
dim
)
dimSize
[
i
]
=
input
.
dimSize
[
i
+
1
];
}
float
dr
=
(
!
input
.
isSparse
)
?
1.0
F
:
input
.
denseRatio
;
XTensor
output
(
order
,
dimSize
,
input
.
dataType
,
dr
,
input
.
devID
,
input
.
mem
);
output
.
SetTMP
();
/* call _ReduceSum function */
_ReduceSum
(
&
input
,
&
output
,
dim
,
NULL
,
power
,
isExp
);
/* tensor connection */
XLink
::
MakeLink
(
&
input
,
NULL
,
&
output
,
REDUCE_REDUCESUM
);
XLink
::
AddParamToHeadInt
(
&
output
,
dim
);
XLink
::
AddParamToHead
(
&
output
,
power
);
XLink
::
AddParamToHeadBool
(
&
output
,
isExp
);
/* destroy variables */
delete
[]
dimSize
;
...
...
source/tensor/core/reduce/ReduceSum.h
查看文件 @
906eebb7
...
...
@@ -43,7 +43,16 @@ For a 1-dimensional data array a,
sum = \sum_i (a_i - shift) if isExp == false
sum = \sum_i exp(a_i - shift) if isExp == true
*/
XTensor
ReduceSum
(
const
XTensor
&
input
,
int
dim
,
const
XTensor
&
shift
=
NULL
,
DTYPE
power
=
(
DTYPE
)
1
.
0
F
,
bool
isExp
=
false
);
XTensor
ReduceSum
(
const
XTensor
&
input
,
int
dim
,
const
XTensor
&
shift
,
DTYPE
power
=
(
DTYPE
)
1
.
0
F
,
bool
isExp
=
false
);
/*
sum the items along a dimension of the tensor (return a XTensor structure)
make a new tensor to keep the result and return it
For a 1-dimensional data array a,
sum = \sum_i (a_i) if isExp == false
sum = \sum_i exp(a_i) if isExp == true
*/
XTensor
ReduceSum
(
const
XTensor
&
input
,
int
dim
,
DTYPE
power
=
(
DTYPE
)
1
.
0
F
,
bool
isExp
=
false
);
}
// namespace nts(NiuTrans.Tensor)
...
...
source/tensor/core/reduce/ReduceSumSquared.cpp
查看文件 @
906eebb7
...
...
@@ -54,20 +54,19 @@ For a 1-dimensional data array a, sum = \sum_i (a_i - shift)^2
*/
XTensor
ReduceSumSquared
(
const
XTensor
&
input
,
int
dim
,
const
XTensor
&
shift
)
{
CheckNTErrors
(
&
input
,
"Empty input or output tensors!"
);
CheckNTErrors
((
dim
>=
0
&&
dim
<
input
.
order
),
"Illegal dimension to reduce!"
);
CheckNTErrors
(
dim
>=
0
&&
dim
<
input
.
order
,
"Illegal dimension to reduce!"
);
int
order
=
input
.
order
-
1
;
int
*
dimSize
=
new
int
[
order
];
for
(
int
i
=
0
;
i
<
input
.
order
;
i
++
){
for
(
int
i
=
0
;
i
<
order
;
i
++
){
if
(
i
<
dim
)
dimSize
[
i
]
=
input
.
dimSize
[
i
];
else
if
(
i
>
dim
)
else
if
(
i
>
=
dim
)
dimSize
[
i
]
=
input
.
dimSize
[
i
+
1
];
}
XTensor
output
=
NewTensor
(
order
,
dimSize
,
input
.
dataType
,
input
.
denseRatio
,
input
.
devID
,
input
.
mem
)
;
output
.
SetZeroAll
(
);
float
dr
=
(
!
input
.
isSparse
)
?
1.0
F
:
input
.
denseRatio
;
XTensor
output
(
order
,
dimSize
,
input
.
dataType
,
dr
,
input
.
devID
,
input
.
mem
);
output
.
SetTMP
();
/* call _ReduceSumSquared function */
...
...
source/tensor/core/reduce/ReduceVariance.cpp
查看文件 @
906eebb7
...
...
@@ -19,6 +19,7 @@
* $Created by: XIAO Tong (email: xiaotong@mail.neu.edu.cn) 2018-04-24
*/
#include "../../XName.h"
#include "../math/ScaleAndShift.h"
#include "ReduceSum.h"
#include "ReduceVariance.h"
...
...
@@ -56,25 +57,28 @@ For a 1-dimensional data array a, variance = 1/n * \sum_i (a_i - mean)^2
*/
XTensor
ReduceVariance
(
const
XTensor
&
input
,
int
dim
,
const
XTensor
&
mean
)
{
CheckNTErrors
(
&
input
,
"Empty input or output tensors!"
);
CheckNTErrors
((
dim
>=
0
&&
dim
<
input
.
order
),
"Illegal dimension to reduce!"
);
CheckNTErrors
(
dim
>=
0
&&
dim
<
input
.
order
,
"Illegal dimension to reduce!"
);
int
order
=
input
.
order
-
1
;
int
*
dimSize
=
new
int
[
order
];
for
(
int
i
=
0
;
i
<
input
.
order
;
i
++
){
for
(
int
i
=
0
;
i
<
order
;
i
++
){
if
(
i
<
dim
)
dimSize
[
i
]
=
input
.
dimSize
[
i
];
else
if
(
i
>
dim
)
else
if
(
i
>
=
dim
)
dimSize
[
i
]
=
input
.
dimSize
[
i
+
1
];
}
XTensor
output
=
NewTensor
(
order
,
dimSize
,
input
.
dataType
,
input
.
denseRatio
,
input
.
devID
,
input
.
mem
)
;
output
.
SetZeroAll
(
);
float
dr
=
(
!
input
.
isSparse
)
?
1.0
F
:
input
.
denseRatio
;
XTensor
output
(
order
,
dimSize
,
input
.
dataType
,
dr
,
input
.
devID
,
input
.
mem
);
output
.
SetTMP
();
/* call _ReduceVariance function */
_ReduceVariance
(
&
input
,
&
output
,
dim
,
&
mean
);
/* tensor connection */
XLink
::
MakeLink
(
&
input
,
&
mean
,
&
output
,
REDUCE_REDUCEVARIANCE
);
XLink
::
AddParamToHeadInt
(
&
output
,
dim
);
/* destroy variables */
delete
[]
dimSize
;
...
...
source/tensor/core/shape/ConcatenateSolely.cpp
查看文件 @
906eebb7
...
...
@@ -36,7 +36,7 @@ concatenate a list of tensors along a given dimension
*/
void
_ConcatenateSolely
(
const
XList
*
smalls
,
XTensor
*
big
,
int
dim
)
{
CheckNTErrors
(
(
big
->
order
>
dim
&&
dim
>=
0
)
,
"Illegal dimension to concatenate!"
);
CheckNTErrors
(
big
->
order
>
dim
&&
dim
>=
0
,
"Illegal dimension to concatenate!"
);
int
catDimSize
=
0
;
int
dimRDI
=
big
->
order
-
dim
-
1
;
...
...
source/tensor/core/shape/MakeMergeBlockIndex.cpp
查看文件 @
906eebb7
source/tensor/core/shape/MakeMergeBlockIndex.cu
查看文件 @
906eebb7
source/tensor/core/shape/MakeMergeBlockIndex.cuh
查看文件 @
906eebb7
...
...
@@ -30,8 +30,7 @@ namespace nts { // namespace nts(NiuTrans.Tensor)
/* set target data block index for the data movement in split */
extern "C"
void _CudaMakeMergeBlockIndex(int devID,
int * blockIndex, int blockNum, int blockNumInMerge,
void _CudaMakeMergeBlockIndex(int devID, int * blockIndex, int blockNum, int blockNumInMerge,
int splitSizeInGrid, int gridSize, int gridNum);
#endif // USE_CUDA
...
...
source/tensor/core/shape/MakeMergeBlockIndex.h
查看文件 @
906eebb7
source/tensor/core/shape/Merge.cpp
查看文件 @
906eebb7
...
...
@@ -161,8 +161,7 @@ e.g., (N/3, M, 3) -> (N, M)
*/
XTensor
Merge
(
const
XTensor
&
s
,
int
whereToMerge
,
int
leadingDim
)
{
CheckNTErrors
(
&
s
!=
NULL
,
"Invalid tensors!"
);
CheckNTErrors
((
leadingDim
<
whereToMerge
),
"Invalid leading dimension!"
);
CheckNTErrors
(
leadingDim
<
whereToMerge
,
"Invalid leading dimension!"
);
if
(
leadingDim
<
0
)
leadingDim
=
0
;
...
...
@@ -180,8 +179,8 @@ XTensor Merge(const XTensor &s, int whereToMerge, int leadingDim)
}
}
XTensor
t
=
NewTensor
(
order
,
dimSize
,
s
.
dataType
,
s
.
denseRatio
,
s
.
devID
,
s
.
mem
)
;
t
.
SetZeroAll
(
);
float
dr
=
(
!
s
.
isSparse
)
?
1.0
F
:
s
.
denseRatio
;
XTensor
t
(
order
,
dimSize
,
s
.
dataType
,
dr
,
s
.
devID
,
s
.
mem
);
t
.
SetTMP
();
/* call _Merge function */
...
...
@@ -314,6 +313,7 @@ void _Merge(const XList * smalls, XTensor * big, int whereToMerge)
/*
merge small tensors into a big tensor (return a XTensor structure)
make a new tensor to keep the result and return it
>> smalls - the list of the small tensors
>> whereToMerge - the merging operation is along with which dimension
...
...
@@ -331,10 +331,8 @@ XTensor Merge(const XList &smalls, int whereToMerge)
dimSize
[
i
]
=
tensor
->
dimSize
[
whereToMerge
]
*
smalls
.
count
;
}
XTensor
big
=
NewTensor
(
order
,
dimSize
,
tensor
->
dataType
,
tensor
->
denseRatio
,
tensor
->
devID
,
tensor
->
mem
);
big
.
SetZeroAll
();
float
dr
=
(
!
tensor
->
isSparse
)
?
1.0
F
:
tensor
->
denseRatio
;
XTensor
big
(
order
,
dimSize
,
tensor
->
dataType
,
dr
,
tensor
->
devID
,
tensor
->
mem
);
big
.
SetTMP
();
/* call _Merge function */
...
...
@@ -370,10 +368,8 @@ XTensor Merge(const XTensor &smallA, const XTensor &smallB, int whereToMerge)
dimSize
[
i
]
=
smallA
.
dimSize
[
whereToMerge
]
*
2
;
}
XTensor
big
=
NewTensor
(
order
,
dimSize
,
smallA
.
dataType
,
smallA
.
denseRatio
,
smallA
.
devID
,
smallA
.
mem
);
big
.
SetZeroAll
();
float
dr
=
(
!
smallA
.
isSparse
)
?
1.0
F
:
smallA
.
denseRatio
;
XTensor
big
(
order
,
dimSize
,
smallA
.
dataType
,
dr
,
smallA
.
devID
,
smallA
.
mem
);
big
.
SetTMP
();
XList
smalls
(
2
);
...
...
source/tensor/core/shape/Split.cpp
查看文件 @
906eebb7
...
...
@@ -148,16 +148,16 @@ XTensor Split(const XTensor &s, int whereToSplit, int splitNum)
int
order
=
s
.
order
+
1
;
int
*
dimSize
=
new
int
[
order
];
dimSize
[
0
]
=
splitNum
;
for
(
int
i
=
0
;
i
<
s
.
order
;
i
++
)
{
if
(
i
==
whereToSplit
)
dimSize
[
i
+
1
]
=
s
.
dimSize
[
i
]
/
splitNum
;
dimSize
[
i
+
1
]
=
s
.
dimSize
[
i
]
/
splitNum
;
else
dimSize
[
i
+
1
]
=
s
.
dimSize
[
i
];
dimSize
[
i
+
1
]
=
s
.
dimSize
[
i
];
}
dimSize
[
0
]
=
splitNum
;
XTensor
t
=
NewTensor
(
order
,
dimSize
,
s
.
dataType
,
s
.
denseRatio
,
s
.
devID
,
s
.
mem
)
;
t
.
SetZeroAll
(
);
float
dr
=
(
!
s
.
isSparse
)
?
1.0
F
:
s
.
denseRatio
;
XTensor
t
(
order
,
dimSize
,
s
.
dataType
,
dr
,
s
.
devID
,
s
.
mem
);
t
.
SetTMP
();
/* call _Split function */
...
...
@@ -175,7 +175,7 @@ XTensor Split(const XTensor &s, int whereToSplit, int splitNum)
}
/*
split a big tensor into small tensors
.
split a big tensor into small tensors
>> big - the source tensor
>> smalls - the list that keeps the resulting tensors (for return)
...
...
@@ -281,38 +281,16 @@ void _Split(const XTensor * big, XList * smalls, int whereToSplit, int splitNum)
}
/*
split a big tensor into small tensors (returna a XList struture).
make a new list to keep the result and return it.
split a big tensor into small tensors
>> big - the source tensor
>> smalls - the list that keeps the resulting tensors (for return)
NOTE that all the "small" tensors have already been placed in the list in advance.
>> whereToSplit - which dimension of the tensor is to split
>> splitNum - how many splits
<< return - a list of small tensors by splitting a big tensor
*/
XList
SplitList
(
const
XTensor
&
big
,
int
whereToSplit
,
int
splitNum
)
void
Split
(
const
XTensor
&
big
,
XList
&
smalls
,
int
whereToSplit
,
int
splitNum
)
{
CheckNTErrors
(
&
big
,
"Invalid tensors!"
);
XList
smalls
=
XList
(
splitNum
);
int
order
=
big
.
order
;
int
*
dimSize
=
new
int
[
order
];
for
(
int
i
=
0
;
i
<
big
.
order
;
i
++
)
{
if
(
i
!=
whereToSplit
)
dimSize
[
i
]
=
big
.
dimSize
[
i
];
else
dimSize
[
i
]
=
big
.
dimSize
[
i
]
/
splitNum
;
}
for
(
int
i
=
0
;
i
<
splitNum
;
i
++
)
{
XTensor
tensor
=
NewTensor
(
order
,
dimSize
,
big
.
dataType
,
big
.
denseRatio
,
big
.
devID
,
big
.
mem
);
tensor
.
SetZeroAll
();
tensor
.
SetTMP
();
smalls
.
Add
(
&
tensor
);
}
/* call _Split function */
_Split
(
&
big
,
&
smalls
,
whereToSplit
,
splitNum
);
...
...
@@ -326,11 +304,6 @@ XList SplitList(const XTensor &big, int whereToSplit, int splitNum)
block, rather than the total number of splits */
XLink
::
AddParamToHeadInt
(
s
,
i
);
}
/* destroy variables */
delete
[]
dimSize
;
return
smalls
;
}
}
// namespace nts(NiuTrans.Tensor)
source/tensor/core/shape/Split.h
查看文件 @
906eebb7
...
...
@@ -46,7 +46,7 @@ void _Split(const XTensor * big, XList * smalls, int whereToSplit, int splitNum)
split a big tensor into small tensors (return a XList structure)
make a new list to keep the result and return it
*/
XList
SplitList
(
const
XTensor
&
big
,
int
whereToSplit
,
int
splitNum
);
void
Split
(
const
XTensor
&
big
,
XList
&
smalls
,
int
whereToSplit
,
int
splitNum
);
}
// namespace nts(NiuTrans.Tensor)
...
...
source/tensor/core/shape/Unsqueeze.cpp
查看文件 @
906eebb7
...
...
@@ -108,8 +108,6 @@ make a new tensor to keep the result and return it
*/
XTensor
Unsqueeze
(
const
XTensor
&
a
,
int
dim
,
int
dSize
)
{
CheckNTErrors
(
&
a
,
"Empty input tensors!"
);
int
order
=
a
.
order
+
1
;
int
*
dimSize
=
new
int
[
order
];
...
...
@@ -122,8 +120,8 @@ XTensor Unsqueeze(const XTensor &a, int dim, int dSize)
dimSize
[
i
]
=
a
.
dimSize
[
i
-
1
];
}
XTensor
b
=
NewTensor
(
order
,
dimSize
,
a
.
dataType
,
a
.
denseRatio
,
a
.
devID
,
a
.
mem
)
;
b
.
SetZeroAll
(
);
float
dr
=
(
!
a
.
isSparse
)
?
1.0
F
:
a
.
denseRatio
;
XTensor
b
(
order
,
dimSize
,
a
.
dataType
,
dr
,
a
.
devID
,
a
.
mem
);
b
.
SetTMP
();
/* call _Unsqueeze function */
...
...
source/tensor/core/sort/Sort.cpp
查看文件 @
906eebb7
...
...
@@ -29,13 +29,14 @@ namespace nts { // namespace nts(NiuTrans.Tensor)
/*
sort the tensor along a given dimension
>>
a - the
tensor
>> a - input tensor
>>
b - output
tensor
>> index - index of the items in the resulting tensor
>> dim - the dimension along which the sorting is performed
*/
void
_Sort
(
XTensor
*
a
,
XTensor
*
index
,
int
dim
)
void
_Sort
(
const
XTensor
*
a
,
XTensor
*
b
,
XTensor
*
index
,
int
dim
)
{
CheckNTErrors
((
XTensor
::
IsIdentical
(
a
,
b
)),
"Input tensors should have the same type!"
);
CheckNTErrors
((
dim
>=
0
&&
dim
<
a
->
order
),
"Incorrect dimension specified!"
);
CheckNTErrors
((
a
->
order
==
index
->
order
),
"Unmatched input tensors!"
);
CheckNTErrors
((
index
->
dataType
==
X_INT
),
"Wrong data type!"
);
...
...
@@ -46,7 +47,7 @@ void _Sort(XTensor * a, XTensor * index, int dim)
if
(
a
->
devID
>=
0
)
{
#ifdef USE_CUDA
_CudaSortBig
(
a
,
a
,
index
,
index
,
dim
);
_CudaSortBig
(
a
,
b
,
index
,
index
,
dim
);
#else
ShowNTErrors
(
"Plesae specify USE_CUDA and recompile the code!"
);
#endif
...
...
@@ -64,12 +65,13 @@ void _Sort(XTensor * a, XTensor * index, int dim)
for
(
int
k
=
0
;
k
<
blockNum
;
k
++
)
{
for
(
int
i
=
0
;
i
<
stride
;
i
++
)
{
void
*
data
=
(
char
*
)
a
->
data
+
(
k
*
blockSize
+
i
)
*
a
->
unitSize
;
void
*
dataA
=
(
char
*
)
a
->
data
+
(
k
*
blockSize
+
i
)
*
a
->
unitSize
;
void
*
dataB
=
(
char
*
)
b
->
data
+
(
k
*
blockSize
+
i
)
*
b
->
unitSize
;
void
*
indexData
=
(
char
*
)
index
->
data
+
(
k
*
blockSize
+
i
)
*
sizeof
(
int
);
/* we sort the data array along "dim" */
if
(
a
->
dataType
==
X_FLOAT
)
XQSort
(
data
,
indexData
,
strideNum
,
a
->
unitSize
,
stride
,
CompXFloat
);
XQSort
(
data
A
,
dataB
,
indexData
,
strideNum
,
a
->
unitSize
,
stride
,
CompXFloat
);
else
{
ShowNTErrors
(
"TODO!"
);
}
...
...
@@ -78,4 +80,40 @@ void _Sort(XTensor * a, XTensor * index, int dim)
}
}
/*
sort the tensor along a given dimension (do it on site)
keep the result in the input tensor a and return nothing
>> a - input tensor
>> index - index of the items in the resulting tensor
>> dim - the dimension along which the sorting is performed
*/
void
_SortMe
(
XTensor
*
a
,
XTensor
*
index
,
int
dim
)
{
_Sort
(
a
,
a
,
index
,
dim
);
}
/*
sort the tensor along a given dimension (return a XTensor structure)
make a new tensor to keep the result and return it
>> a - input tensor
>> b - output tensor
>> index - index of the items in the resulting tensor
>> dim - the dimension along which the sorting is performed
*/
void
Sort
(
XTensor
&
a
,
XTensor
&
b
,
XTensor
&
index
,
int
dim
)
{
/* call _Negate function */
_Sort
(
&
a
,
&
b
,
&
index
,
dim
);
/* tensor connections */
XList
list
(
2
);
list
.
Add
(
&
b
);
list
.
Add
(
&
index
);
XLink
::
MakeLink
(
&
a
,
&
list
,
SORT_SORT
);
XLink
::
AddParamToHeadInt
(
&
b
,
dim
);
XLink
::
AddParamToHeadInt
(
&
index
,
dim
);
}
}
// namespace nts(NiuTrans.Tensor)
source/tensor/core/sort/Sort.cu
查看文件 @
906eebb7
...
...
@@ -39,7 +39,7 @@ bitonic sort (for each row in a matrix)
>> n - row number of the matrix
*/
template<class T> __global__
void KernelBitonicSort2D(void * data, int j, int k, int m, int n)
void KernelBitonicSort2D(void * data, int j, int k, int m, int n)
{
const unsigned int idx = blockDim.x * blockIdx.x + threadIdx.x;
const unsigned int row = blockDim.y * blockIdx.y + threadIdx.y;
...
...
@@ -74,7 +74,7 @@ bitonic sort (for each row in a matrix) with index
>> n - row number of the matrix
*/
template<class T> __global__
void KernelBitonicSort2D(void * data, int * index, int j, int k, int m, int n)
void KernelBitonicSort2D(void * data, int * index, int j, int k, int m, int n)
{
const unsigned int idx = blockDim.x * blockIdx.x + threadIdx.x;
const unsigned int row = blockDim.y * blockIdx.y + threadIdx.y;
...
...
source/tensor/core/sort/Sort.h
查看文件 @
906eebb7
...
...
@@ -27,8 +27,20 @@
namespace
nts
{
// namespace nts(NiuTrans.Tensor)
/* sort the data along a given dimension */
void
_Sort
(
const
XTensor
*
a
,
XTensor
*
b
,
XTensor
*
index
,
int
dim
);
/*
sort the data along a given dimension (do it on site)
keep the result in the input tensor a and return nothing
*/
void
_SortMe
(
XTensor
*
a
,
XTensor
*
index
,
int
dim
);
/*
sort the data along a given dimension (return a XTensor structure)
make a new tensor to keep the result and return it
*/
extern
"C"
void
_Sort
(
XTensor
*
a
,
XTensor
*
index
,
int
dim
);
void
Sort
(
XTensor
&
a
,
XTensor
&
b
,
XTensor
&
index
,
int
dim
);
}
// namespace nts(NiuTrans.Tensor)
...
...
source/tensor/core/sort/TopK.cpp
查看文件 @
906eebb7
...
...
@@ -105,4 +105,29 @@ void _TopK(const XTensor * a, XTensor * b, XTensor * index, int dim, int k)
}
}
}
/*
get the top-k items along a given dimension
>> a - input tensor
>> b - output tensor (top-k result)
>> index - index of the top-k items
>> dim - the dimension along which the sorting is performed
>> k - how many items returned after sorting
*/
void
TopK
(
XTensor
&
a
,
XTensor
&
b
,
XTensor
&
index
,
int
dim
,
int
k
)
{
_TopK
(
&
a
,
&
b
,
&
index
,
dim
,
k
);
/* tensor connection */
XList
list
(
2
);
list
.
Add
(
&
b
);
list
.
Add
(
&
index
);
XLink
::
MakeLink
(
&
a
,
&
list
,
SORT_TOPK
);
XLink
::
AddParamToHeadInt
(
&
b
,
dim
);
XLink
::
AddParamToHeadInt
(
&
index
,
k
);
XLink
::
AddParamToHeadInt
(
&
b
,
dim
);
XLink
::
AddParamToHeadInt
(
&
index
,
k
);
}
}
// namespace nts(NiuTrans.Tensor)
source/tensor/core/sort/TopK.h
查看文件 @
906eebb7
...
...
@@ -30,6 +30,10 @@ namespace nts { // namespace nts(NiuTrans.Tensor)
extern
"C"
void
_TopK
(
const
XTensor
*
a
,
XTensor
*
b
,
XTensor
*
index
,
int
dim
,
int
k
);
/* get the top-k items along a given dimension */
extern
"C"
void
TopK
(
XTensor
&
a
,
XTensor
&
b
,
XTensor
&
index
,
int
dim
,
int
k
);
}
// namespace nts(NiuTrans.Tensor)
#endif // __TOPK_H__
\ No newline at end of file
source/tensor/function/HardTanH.cpp
查看文件 @
906eebb7
...
...
@@ -60,7 +60,9 @@ void _HardTanH(const XTensor * x, XTensor * y)
}
/*
hard tanh function (return a structure)
hard tanh function (return a XTensor structure)
make a new tensor to keep the result and return it
y = 1 if x > 1
x if -1 <= x <= 1
-1 if x < -1
...
...
@@ -72,8 +74,10 @@ XTensor HardTanH(const XTensor &x)
XTensor
y
(
&
x
);
y
.
SetTMP
();
/* call _HardTanH function */
_HardTanH
(
&
x
,
&
y
);
/* tensor connection */
XLink
::
MakeLink
(
&
x
,
NULL
,
&
y
,
FUNC_HARDTANH
);
return
y
;
...
...
@@ -116,7 +120,7 @@ void _HardTanHBackward(XTensor * gold, XTensor * y, XTensor * x,
{
/* calculate dE/dy */
if
(
lossName
!=
NOLOSS
)
LossBackward
(
dedy
,
gold
,
y
,
lossName
);
_
LossBackward
(
dedy
,
gold
,
y
,
lossName
);
DTYPE
*
dedyp
=
(
DTYPE
*
)
dedy
->
data
;
DTYPE
*
dedxp
=
(
DTYPE
*
)
dedx
->
data
;
...
...
source/tensor/function/HardTanH.cu
查看文件 @
906eebb7
...
...
@@ -137,7 +137,7 @@ void _CudaHardTanHBackward(XTensor * gold, XTensor * y, XTensor * x,
/* calculate dE/dy */
if(lossName != NOLOSS)
LossBackward(dedy, gold, y, lossName);
_
LossBackward(dedy, gold, y, lossName);
int gridSize[3], blockSize[3];
...
...
source/tensor/function/HardTanH.h
查看文件 @
906eebb7
...
...
@@ -37,7 +37,7 @@ y = 1 if x > 1
*/
void
_HardTanH
(
const
XTensor
*
x
,
XTensor
*
y
);
/* hard tanh function (return a structure) */
/* hard tanh function (return a
XTensor
structure) */
XTensor
HardTanH
(
const
XTensor
&
x
);
/* de/dx */
...
...
source/tensor/function/Identity.cpp
查看文件 @
906eebb7
...
...
@@ -19,6 +19,7 @@
* $Created by: XIAO Tong (email: xiaotong@mail.neu.edu.cn) 2018-04-27
*/
#include "../XName.h"
#include "Identity.h"
#include "../XUtility.h"
#include "../core/movement/CopyValues.h"
...
...
@@ -36,6 +37,26 @@ void _Identity(const XTensor * x, XTensor * y)
}
/*
identity function y = x (return a XTensor structure)
make a new tensor to keep the result and return it
>> x - input tensor
<< return - y
*/
XTensor
Identity
(
const
XTensor
&
x
)
{
XTensor
y
(
&
x
);
y
.
SetTMP
();
/* call _Identity function */
_Identity
(
&
x
,
&
y
);
/* tensor connection */
XLink
::
MakeLink
(
&
x
,
NULL
,
&
y
,
FUNC_IDENTITY
);
return
y
;
}
/*
backward computation for identity function y = x
dE/dx = dE/dy * dy/dx = dE/dy
...
...
@@ -58,7 +79,7 @@ void _IdentityBackward(XTensor * gold, XTensor * y, XTensor * x,
{
/* calculate dE/dy */
if
(
lossName
!=
NOLOSS
)
LossBackward
(
dedy
,
gold
,
y
,
lossName
);
_
LossBackward
(
dedy
,
gold
,
y
,
lossName
);
if
(
dedy
->
data
!=
dedx
->
data
)
_CopyValues
(
dedy
,
dedx
);
...
...
source/tensor/function/Identity.h
查看文件 @
906eebb7
...
...
@@ -28,11 +28,12 @@
namespace
nts
{
// namespace nts(NiuTrans.Tensor)
/* identity function y = x */
extern
"C"
void
_Identity
(
const
XTensor
*
x
,
XTensor
*
y
);
/* identity function y = x (return a XTensor structure) */
XTensor
Identity
(
const
XTensor
&
x
);
/* de/dx */
extern
"C"
void
_IdentityBackward
(
XTensor
*
gold
,
XTensor
*
y
,
XTensor
*
x
,
XTensor
*
dedy
,
XTensor
*
dedx
,
LOSS_FUNCTION_NAME
lossName
);
...
...
source/tensor/function/LogSoftmax.cpp
查看文件 @
906eebb7
...
...
@@ -162,18 +162,22 @@ void _LogSoftmax(const XTensor * x, XTensor * y, int leadDim)
}
/*
log scale softmax y = log(e^x / \sum_{i} e^{x_i}) (return a structure)
log scale softmax y = log(e^x / \sum_{i} e^{x_i}) (return a XTensor structure)
make a new tensor to keep the result and return it
>> x - input vector
>> leadDim - leading dimension (along which we perform reduction)
<< return -
result
<< return -
y
*/
XTensor
LogSoftmax
(
const
XTensor
&
x
,
int
leadDim
)
{
XTensor
y
(
&
x
);
y
.
SetTMP
();
/* call _LogSoftmax function */
_LogSoftmax
(
&
x
,
&
y
,
leadDim
);
/* tensor connection */
XLink
::
MakeLink
(
&
x
,
NULL
,
&
y
,
FUNC_LOGSOFTMAX
);
XLink
::
AddParamToHeadInt
(
&
y
,
leadDim
);
...
...
source/tensor/function/LogSoftmax.h
查看文件 @
906eebb7
...
...
@@ -30,7 +30,7 @@ namespace nts{ // namespace nts(NiuTrans.Tensor)
/* log scale softmax y = log(e^x / \sum_{i} e^{x_i}) */
void
_LogSoftmax
(
const
XTensor
*
x
,
XTensor
*
y
,
int
leadDim
);
/* log scale softmax y = log(e^x / \sum_{i} e^{x_i}) (return a structure) */
/* log scale softmax y = log(e^x / \sum_{i} e^{x_i}) (return a
XTensor
structure) */
XTensor
LogSoftmax
(
const
XTensor
&
x
,
int
leadDim
);
/* de/dx */
...
...
source/tensor/function/Loss.cpp
查看文件 @
906eebb7
...
...
@@ -42,7 +42,7 @@ compute the loss
>> oBeg - where to start in the model output (along the leading dimension)
<< return - error in model prediction with respect to gold standard
*/
DTYPE
LossCompute
(
XTensor
*
gold
,
XTensor
*
output
,
LOSS_FUNCTION_NAME
LFName
,
DTYPE
_
LossCompute
(
XTensor
*
gold
,
XTensor
*
output
,
LOSS_FUNCTION_NAME
LFName
,
bool
isLogOutput
,
int
leadDim
,
int
gBeg
,
int
gLen
,
int
oBeg
)
{
DTYPE
error
=
0.0
F
;
...
...
@@ -66,7 +66,7 @@ DTYPE LossCompute(XTensor * gold, XTensor * output, LOSS_FUNCTION_NAME LFName,
blockNum
=
output
->
unitNum
/
blockSize
;
if
(
isLogOutput
)
return
LossComputeForLogScale
(
gold
,
output
,
LFName
,
leadDim
,
gBeg
,
gLen
,
oBeg
);
return
_
LossComputeForLogScale
(
gold
,
output
,
LFName
,
leadDim
,
gBeg
,
gLen
,
oBeg
);
DTYPE
*
gp
=
(
DTYPE
*
)
gold
->
data
;
DTYPE
*
op
=
(
DTYPE
*
)
output
->
data
;
...
...
@@ -180,7 +180,7 @@ DTYPE LossCompute(XTensor * gold, XTensor * output, LOSS_FUNCTION_NAME LFName,
}
else
{
#ifdef USE_CUDA
error
=
CudaLossCompute
(
gold
,
output
,
LFName
,
isLogOutput
,
leadDim
,
gBeg
,
gLen
,
oBeg
);
error
=
_
CudaLossCompute
(
gold
,
output
,
LFName
,
isLogOutput
,
leadDim
,
gBeg
,
gLen
,
oBeg
);
#else
ShowNTErrors
(
"Please specify USE_CUDA and recompile the code!"
);
#endif
...
...
@@ -201,7 +201,7 @@ the log version of loss computation
>> oBeg - where to start in the model output (along the leading dimension)
<< return - error in model prediction with respect to gold standard
*/
DTYPE
LossComputeForLogScale
(
XTensor
*
gold
,
XTensor
*
output
,
DTYPE
_
LossComputeForLogScale
(
XTensor
*
gold
,
XTensor
*
output
,
LOSS_FUNCTION_NAME
LFName
,
int
leadDim
,
int
gBeg
,
int
gLen
,
int
oBeg
)
{
...
...
@@ -343,7 +343,7 @@ with respect to gold standard, and y this the model output
>> LFName - name of loss function
<< return dE/dy
*/
DTYPE
LossBackwardPoint
(
DTYPE
t
,
DTYPE
y
,
LOSS_FUNCTION_NAME
LFName
)
DTYPE
_
LossBackwardPoint
(
DTYPE
t
,
DTYPE
y
,
LOSS_FUNCTION_NAME
LFName
)
{
/*
squared error
...
...
@@ -380,7 +380,7 @@ with respect to gold standard, and y this the model output
>> tLen - segment length from tBeg (along the leading dimension)
>> yBeg - where to start in the model output (along the leading dimension)
*/
void
LossBackward
(
XTensor
*
dedy
,
XTensor
*
t
,
XTensor
*
y
,
void
_
LossBackward
(
XTensor
*
dedy
,
XTensor
*
t
,
XTensor
*
y
,
LOSS_FUNCTION_NAME
LFName
,
int
leadDim
,
int
tBeg
,
int
tLen
,
int
yBeg
)
{
...
...
@@ -496,7 +496,7 @@ void LossBackward(XTensor * dedy, XTensor * t, XTensor * y,
}
else
{
#ifdef USE_CUDA
CudaLossBackward
(
dedy
,
t
,
y
,
LFName
,
leadDim
,
tBeg
,
tLen
,
yBeg
);
_
CudaLossBackward
(
dedy
,
t
,
y
,
LFName
,
leadDim
,
tBeg
,
tLen
,
yBeg
);
#else
ShowNTErrors
(
"Please specify USE_CUDA and recompile the code!"
);
#endif
...
...
source/tensor/function/Loss.cu
查看文件 @
906eebb7
...
...
@@ -51,7 +51,7 @@ compute the loss
>> yBeg - where to start in the model output (along the leading dimension)
<< return - error in model prediction with respect to gold standard
*/
DTYPE CudaLossCompute(XTensor * gold, XTensor * y, LOSS_FUNCTION_NAME LFName,
DTYPE
_
CudaLossCompute(XTensor * gold, XTensor * y, LOSS_FUNCTION_NAME LFName,
bool isLogOutput, int leadDim, int gBeg, int gLen, int yBeg)
{
CheckNTErrors((gLen >= 0 && gLen <= y->unitNum), "Illegal input length!");
...
...
@@ -65,7 +65,7 @@ DTYPE CudaLossCompute(XTensor * gold, XTensor * y, LOSS_FUNCTION_NAME LFName,
CheckNTErrors((gLen == gold->dimSize[leadDim] && gBeg == 0 && yBeg == 0), "TODO!");
if(isLogOutput)
return LossComputeForLogScale(gold, y, LFName, leadDim, gBeg, gLen, yBeg);
return
_
LossComputeForLogScale(gold, y, LFName, leadDim, gBeg, gLen, yBeg);
DTYPE error = 0.0F;
...
...
@@ -77,7 +77,7 @@ DTYPE CudaLossCompute(XTensor * gold, XTensor * y, LOSS_FUNCTION_NAME LFName,
if(LFName == SQUAREDERROR){
XTensor * diff = NewTensor(gold->order, gold->dimSize, gold->dataType, gold->denseRatio, gold->devID, gold->mem);
_Sum(gold, y, diff, -1.0F);
_Power(diff, 2.0F);
_Power
Me
(diff, 2.0F);
_ScaleAndShiftMe(diff, 0.5F, 0.0F);
int reduceTimes = diff->order;
...
...
@@ -110,9 +110,9 @@ DTYPE CudaLossCompute(XTensor * gold, XTensor * y, LOSS_FUNCTION_NAME LFName,
if(LFName == CROSSENTROPY){
XTensor * diff = NewTensor(y->order, y->dimSize, y->dataType, y->denseRatio, y->devID, y->mem);
_CopyValues(y, diff);
_Log(diff);
_Log
Me
(diff);
_Multiply(gold, diff, diff);
_Negate(diff);
_Negate
Me
(diff);
int reduceTimes = diff->order;
for (int i = 0; i < reduceTimes; i++) {
...
...
@@ -148,7 +148,7 @@ DTYPE CudaLossCompute(XTensor * gold, XTensor * y, LOSS_FUNCTION_NAME LFName,
_CopyValues(y, yOnehot);
_Multiply(gold, y, yOnehot);
_Sum(gold, yOnehot, diff, -1.0F);
_Power(diff, 2.0F);
_Power
Me
(diff, 2.0F);
_ScaleAndShiftMe(diff, 0.5F, 0.0F);
int reduceTimes = diff->order;
...
...
@@ -190,7 +190,7 @@ the log version of loss computation
>> yBeg - where to start in the model output (along the leading dimension)
<< return - error in model prediction with respect to gold standard
*/
DTYPE CudaLossComputeForLogScale(XTensor * gold, XTensor * y,
DTYPE
_
CudaLossComputeForLogScale(XTensor * gold, XTensor * y,
LOSS_FUNCTION_NAME LFName,
int leadDim, int gBeg, int gLen, int yBeg)
{
...
...
@@ -209,9 +209,9 @@ with respect to gold standard, and y this the model output
>> LFName - name of loss function
<< return dE/dy
*/
DTYPE CudaLossBackward(DTYPE t, DTYPE y, LOSS_FUNCTION_NAME LFName)
DTYPE
_
CudaLossBackward(DTYPE t, DTYPE y, LOSS_FUNCTION_NAME LFName)
{
return LossBackwardPoint(t, y, LFName);
return
_
LossBackwardPoint(t, y, LFName);
// TODO: call cuda kernels for computing the errors
}
...
...
@@ -328,7 +328,7 @@ with respect to gold standard, and y this the model output
>> tLen - segment length from oBeg (along the leading dimension)
>> yBeg - where to start in the model output (along the leading dimension)
*/
void CudaLossBackward(XTensor * dedy, XTensor * t, XTensor * y,
void
_
CudaLossBackward(XTensor * dedy, XTensor * t, XTensor * y,
LOSS_FUNCTION_NAME LFName,
int leadDim, int tBeg, int tLen, int yBeg)
{
...
...
source/tensor/function/Loss.cuh
查看文件 @
906eebb7
...
...
@@ -31,21 +31,21 @@ namespace nts{ // namespace nts(NiuTrans.Tensor)
/* compute the loss (cuda version) */
extern "C"
DTYPE CudaLossCompute(XTensor * gold, XTensor * output, LOSS_FUNCTION_NAME LFName,
DTYPE
_
CudaLossCompute(XTensor * gold, XTensor * output, LOSS_FUNCTION_NAME LFName,
bool isLogOutput, int leadDim, int gBeg, int gLen, int oBeg);
/* compute the loss in log scale (cuda version) */
extern "C"
DTYPE CudaLossComputeForLogScale(XTensor * gold, XTensor * output, LOSS_FUNCTION_NAME LFName,
DTYPE
_
CudaLossComputeForLogScale(XTensor * gold, XTensor * output, LOSS_FUNCTION_NAME LFName,
int leadDim, int gBeg, int gLen, int oBeg);
/* backward compuation for a single element (cuda version) */
extern "C"
DTYPE CudaLossBackwardPoint(DTYPE t, DTYPE y, LOSS_FUNCTION_NAME LFName);
DTYPE
_
CudaLossBackwardPoint(DTYPE t, DTYPE y, LOSS_FUNCTION_NAME LFName);
/* backward compuation for (dense) vectors (cuda version) */
extern "C"
void CudaLossBackward(XTensor * dedy, XTensor * t, XTensor * y,
void
_
CudaLossBackward(XTensor * dedy, XTensor * t, XTensor * y,
LOSS_FUNCTION_NAME LFName,
int leadDim = -1, int tBeg = 0, int tLen = -1, int yBeg = 0);
...
...
source/tensor/function/Loss.h
查看文件 @
906eebb7
...
...
@@ -48,21 +48,21 @@ loss function to measure the "number" of errors
/* compute the loss */
extern
"C"
DTYPE
LossCompute
(
XTensor
*
gold
,
XTensor
*
output
,
LOSS_FUNCTION_NAME
LFName
,
DTYPE
_
LossCompute
(
XTensor
*
gold
,
XTensor
*
output
,
LOSS_FUNCTION_NAME
LFName
,
bool
isLogOutput
,
int
leadDim
,
int
gBeg
,
int
gLen
,
int
oBeg
);
/* compute the loss (log version) */
extern
"C"
DTYPE
LossComputeForLogScale
(
XTensor
*
gold
,
XTensor
*
output
,
LOSS_FUNCTION_NAME
LFName
,
DTYPE
_
LossComputeForLogScale
(
XTensor
*
gold
,
XTensor
*
output
,
LOSS_FUNCTION_NAME
LFName
,
int
leadDim
,
int
gBeg
,
int
gLen
,
int
oBeg
);
/* backward compuation for a single element */
extern
"C"
DTYPE
LossBackwardPoint
(
DTYPE
t
,
DTYPE
y
,
LOSS_FUNCTION_NAME
LFName
);
DTYPE
_
LossBackwardPoint
(
DTYPE
t
,
DTYPE
y
,
LOSS_FUNCTION_NAME
LFName
);
/* backward compuation for (dense) vectors */
extern
"C"
void
LossBackward
(
XTensor
*
dEdY
,
XTensor
*
t
,
XTensor
*
y
,
void
_
LossBackward
(
XTensor
*
dEdY
,
XTensor
*
t
,
XTensor
*
y
,
LOSS_FUNCTION_NAME
LFName
,
int
leadDim
=
-
1
,
int
tBeg
=
0
,
int
tLen
=
-
1
,
int
yBeg
=
0
);
...
...
source/tensor/function/Rectify.cpp
查看文件 @
906eebb7
...
...
@@ -19,6 +19,7 @@
* $Created by: XIAO Tong (email: xiaotong@mail.neu.edu.cn) 2018-04-24
*/
#include "../XName.h"
#include "Rectify.h"
#include "Rectify.cuh"
...
...
@@ -55,6 +56,28 @@ void _Rectify(const XTensor * x, XTensor * y)
}
/*
rectify function y = max(0, x) (return a XTensor structure)
make a new tensor to keep the result and return it
>> input - input tensor
<< return - y
*/
XTensor
Rectify
(
const
XTensor
&
x
)
{
XTensor
y
(
&
x
);
y
.
SetTMP
();
/* call _Rectify function */
_Rectify
(
&
x
,
&
y
);
/* tensor connection */
XLink
::
MakeLink
(
&
x
,
NULL
,
&
y
,
FUNC_RECTIFY
);
return
y
;
}
/*
backward computation
dE/dx = dE/dy * dy/dx
...
...
@@ -94,7 +117,7 @@ void _RectifyBackward(XTensor * gold, XTensor * y, XTensor * x,
{
/* calculate dE/dy */
if
(
lossName
!=
NOLOSS
)
LossBackward
(
dedy
,
gold
,
y
,
lossName
);
_
LossBackward
(
dedy
,
gold
,
y
,
lossName
);
DTYPE
*
dedyp
=
(
DTYPE
*
)
dedy
->
data
;
DTYPE
*
dedxp
=
(
DTYPE
*
)
dedx
->
data
;
...
...
source/tensor/function/Rectify.cu
查看文件 @
906eebb7
...
...
@@ -134,7 +134,7 @@ void _CudaRectifyBackward(XTensor * gold, XTensor * y, XTensor * x,
/* calculate dE/dy */
if(lossName != NOLOSS)
CudaLossBackward(dedy, gold, y, lossName);
_
CudaLossBackward(dedy, gold, y, lossName);
int gridSize[3], blockSize[3];
...
...
source/tensor/function/Rectify.h
查看文件 @
906eebb7
...
...
@@ -28,11 +28,12 @@
namespace
nts
{
// namespace nts(NiuTrans.Tensor)
/* rectify function y = max(0, x) */
extern
"C"
void
_Rectify
(
const
XTensor
*
x
,
XTensor
*
y
);
/* rectify function y = max(0, x) (return a XTensor structure) */
XTensor
Rectify
(
const
XTensor
&
x
);
/* de/dx */
extern
"C"
void
_RectifyBackward
(
XTensor
*
gold
,
XTensor
*
y
,
XTensor
*
x
,
XTensor
*
dedy
,
XTensor
*
dedx
,
LOSS_FUNCTION_NAME
lossName
);
...
...
source/tensor/function/Sigmoid.cpp
查看文件 @
906eebb7
...
...
@@ -19,6 +19,7 @@
* $Created by: XIAO Tong (email: xiaotong@mail.neu.edu.cn) 2018-04-25
*/
#include "../XName.h"
#include <math.h>
#include "Sigmoid.h"
#include "Sigmoid.cuh"
...
...
@@ -53,6 +54,27 @@ void _Sigmoid(const XTensor * x, XTensor * y)
}
/*
sigmoid function y = 1/(1+exp(-x)) (return a XTensor structure)
make a new tensor to keep the result and return it
>> x - input tensor
<< return - y
*/
XTensor
Sigmoid
(
const
XTensor
&
x
)
{
XTensor
y
(
&
x
);
y
.
SetTMP
();
/* call _Sigmoid function */
_Sigmoid
(
&
x
,
&
y
);
/* tensor connection */
XLink
::
MakeLink
(
&
x
,
NULL
,
&
y
,
FUNC_SIGMOID
);
return
y
;
}
/*
backward computation
dE/ds = dE/dy * dy/dx
...
...
@@ -86,7 +108,7 @@ void _SigmoidBackward(XTensor * gold, XTensor * y, XTensor * x,
{
/* calculate dE/dy */
if
(
lossName
!=
NOLOSS
)
LossBackward
(
dedy
,
gold
,
y
,
lossName
);
_
LossBackward
(
dedy
,
gold
,
y
,
lossName
);
DTYPE
*
dedyp
=
(
DTYPE
*
)
dedy
->
data
;
DTYPE
*
dedxp
=
(
DTYPE
*
)
dedx
->
data
;
...
...
source/tensor/function/Sigmoid.cu
查看文件 @
906eebb7
...
...
@@ -129,7 +129,7 @@ void _CudaSigmoidBackward(XTensor * gold, XTensor * y, XTensor * x,
if(x->dataType == DEFAULT_DTYPE && y->dataType == DEFAULT_DTYPE){
/* calculate dE/dy */
if(lossName != NOLOSS)
LossBackward(dedy, gold, y, lossName);
_
LossBackward(dedy, gold, y, lossName);
int gridSize[3], blockSize[3];
...
...
source/tensor/function/Sigmoid.h
查看文件 @
906eebb7
...
...
@@ -28,11 +28,12 @@
namespace
nts
{
// namespace nts(NiuTrans.Tensor)
/* sigmoid function y = 1/(1+exp(-x)) */
extern
"C"
void
_Sigmoid
(
const
XTensor
*
x
,
XTensor
*
y
);
/* sigmoid function y = 1/(1+exp(-x)) (return a XTensor structure) */
XTensor
Sigmoid
(
const
XTensor
&
x
);
/* de/dx */
extern
"C"
void
_SigmoidBackward
(
XTensor
*
gold
,
XTensor
*
y
,
XTensor
*
x
,
XTensor
*
dedy
,
XTensor
*
dedx
,
LOSS_FUNCTION_NAME
lossName
);
...
...
source/tensor/function/Softmax.cpp
查看文件 @
906eebb7
...
...
@@ -22,6 +22,7 @@
#include <math.h>
#include "Softmax.h"
#include "Softmax.cuh"
#include "../XName.h"
#include "../XUtility.h"
#include "../core/reduce/ReduceSum.h"
#include "../core/reduce/ReduceMax.h"
...
...
@@ -130,6 +131,28 @@ void _Softmax(const XTensor * x, XTensor * y, int leadDim)
}
/*
softmax y = e^x / \sum_{i} e^{x_i} (return a XTensor structure)
make a new tensor to keep the result and return it
>> x - input vector
>> leadDim - leading dimension (along which we perform reduction)
<< return - y
*/
XTensor
Softmax
(
const
XTensor
&
x
,
int
leadDim
)
{
XTensor
y
(
&
x
);
y
.
SetTMP
();
/* call _Softmax function */
_Softmax
(
&
x
,
&
y
,
leadDim
);
/* tensor connection */
XLink
::
MakeLink
(
&
x
,
NULL
,
&
y
,
FUNC_SOFTMAX
);
return
y
;
}
/*
backward computation for dense tensors
dE/dx = dE/dy * dy/dx
...
...
source/tensor/function/Softmax.cuh
查看文件 @
906eebb7
...
...
@@ -31,7 +31,7 @@ namespace nts { // namespace nts(NiuTrans.Tensor)
/* softmax y = e^x / \sum_{i} e^{x_i} (Cuda version) */
extern "C"
void _CudaSotmax(const XTensor * input, XTensor * output, int leadDim);
void _CudaSo
f
tmax(const XTensor * input, XTensor * output, int leadDim);
/* softmax y = e^x / \sum_{i} e^{x_i} (Cuda version) */
extern "C"
...
...
source/tensor/function/Softmax.h
查看文件 @
906eebb7
...
...
@@ -28,11 +28,12 @@
namespace
nts
{
// namespace nts(NiuTrans.Tensor)
/* softmax y = e^x / \sum_{i} e^{x_i} */
extern
"C"
void
_Softmax
(
const
XTensor
*
x
,
XTensor
*
y
,
int
leadDim
);
/* softmax y = e^x / \sum_{i} e^{x_i} (return a XTensor structure) */
XTensor
Softmax
(
const
XTensor
&
x
,
int
leadDim
);
/* de/dx */
extern
"C"
void
_SoftmaxBackward
(
XTensor
*
gold
,
XTensor
*
y
,
XTensor
*
x
,
XTensor
*
dedy
,
XTensor
*
dedx
,
int
leadDim
,
...
...
source/tensor/test/TAbsolute.cpp
查看文件 @
906eebb7
...
...
@@ -51,15 +51,21 @@ bool TestAbsolute1()
/* create tensors */
XTensor
*
a
=
NewTensor
(
aOrder
,
aDimSize
);
XTensor
*
b
=
NewTensor
(
aOrder
,
aDimSize
);
XTensor
*
aMe
=
NewTensor
(
aOrder
,
aDimSize
);
XTensor
bUser
;
/* initialize variables */
a
->
SetData
(
aData
,
aUnitNum
);
aMe
->
SetData
(
aData
,
aUnitNum
);
/* call Absolute function */
_Absolute
(
a
);
_Absolute
(
a
,
b
);
_AbsoluteMe
(
aMe
);
bUser
=
Absolute
(
*
a
);
/* check results */
cpuTest
=
a
->
CheckData
(
answer
,
aUnitNum
,
1e-4
F
);
cpuTest
=
b
->
CheckData
(
answer
,
aUnitNum
,
1e-4
F
)
&&
aMe
->
CheckData
(
answer
,
aUnitNum
,
1e-4
F
)
&&
bUser
.
CheckData
(
answer
,
aUnitNum
,
1e-4
F
);
#ifdef USE_CUDA
/* GPU test */
...
...
@@ -67,25 +73,37 @@ bool TestAbsolute1()
/* create tensor */
XTensor
*
aGPU
=
NewTensor
(
aOrder
,
aDimSize
,
X_FLOAT
,
1.0
F
,
0
);
XTensor
*
bGPU
=
NewTensor
(
aOrder
,
aDimSize
,
X_FLOAT
,
1.0
F
,
0
);
XTensor
*
aMeGPU
=
NewTensor
(
aOrder
,
aDimSize
,
X_FLOAT
,
1.0
F
,
0
);
XTensor
bUserGPU
;
/* Initialize variables */
aGPU
->
SetData
(
aData
,
aUnitNum
);
aMeGPU
->
SetData
(
aData
,
aUnitNum
);
/* call Absolute function */
_Absolute
(
aGPU
);
_Absolute
(
aGPU
,
bGPU
);
_AbsoluteMe
(
aMeGPU
);
bUserGPU
=
Absolute
(
*
aGPU
);
/* check results */
gpuTest
=
aGPU
->
CheckData
(
answer
,
aUnitNum
,
1e-4
F
);
gpuTest
=
bGPU
->
CheckData
(
answer
,
aUnitNum
,
1e-4
F
)
&&
aMeGPU
->
CheckData
(
answer
,
aUnitNum
,
1e-4
F
)
&&
bUserGPU
.
CheckData
(
answer
,
aUnitNum
,
1e-4
F
);
/* destroy variables */
delete
a
;
delete
b
;
delete
aMe
;
delete
aGPU
;
delete
bGPU
;
delete
aMeGPU
;
delete
[]
aDimSize
;
return
cpuTest
&&
gpuTest
;
#else
/* destroy variables */
delete
a
;
delete
b
;
delete
aMe
;
delete
[]
aDimSize
;
return
cpuTest
;
...
...
source/tensor/test/TConcatenate.cpp
查看文件 @
906eebb7
...
...
@@ -76,6 +76,7 @@ bool TestConcatenate1()
XTensor
*
s1
=
NewTensor
(
sOrder1
,
sDimSize1
);
XTensor
*
s2
=
NewTensor
(
sOrder2
,
sDimSize2
);
XTensor
*
t
=
NewTensor
(
tOrder
,
tDimSize
);
XTensor
tUser
;
/* initialize variables */
s1
->
SetData
(
sData1
,
sUnitNum1
);
...
...
@@ -88,9 +89,10 @@ bool TestConcatenate1()
/* call Concatenate function */
_Concatenate
(
sList
,
t
,
1
);
tUser
=
Concatenate
(
*
sList
,
1
);
/* check results */
cpuTest
=
t
->
CheckData
(
answer
,
tUnitNum
);
cpuTest
=
t
->
CheckData
(
answer
,
tUnitNum
)
&&
tUser
.
CheckData
(
answer
,
tUnitNum
)
;
#ifdef USE_CUDA
/* GPU test */
...
...
@@ -100,6 +102,7 @@ bool TestConcatenate1()
XTensor
*
sGPU1
=
NewTensor
(
sOrder1
,
sDimSize1
,
X_FLOAT
,
1.0
F
,
0
);
XTensor
*
sGPU2
=
NewTensor
(
sOrder2
,
sDimSize2
,
X_FLOAT
,
1.0
F
,
0
);
XTensor
*
tGPU
=
NewTensor
(
tOrder
,
tDimSize
,
X_FLOAT
,
1.0
F
,
0
);
XTensor
tUserGPU
;
/* Initialize variables */
sGPU1
->
SetData
(
sData1
,
sUnitNum1
);
...
...
@@ -115,9 +118,10 @@ bool TestConcatenate1()
/* call Concatenate function */
_Concatenate
(
sList
,
tGPU
,
1
);
tUserGPU
=
Concatenate
(
*
sList
,
1
);
/* check results */
gpuTest
=
tGPU
->
CheckData
(
answer
,
tUnitNum
);
gpuTest
=
tGPU
->
CheckData
(
answer
,
tUnitNum
)
&&
tUserGPU
.
CheckData
(
answer
,
tUnitNum
)
;
/* destroy variables */
delete
sList
;
...
...
@@ -201,6 +205,7 @@ bool TestConcatenate2()
XTensor
*
s1
=
NewTensor
(
sOrder1
,
sDimSize1
);
XTensor
*
s2
=
NewTensor
(
sOrder2
,
sDimSize2
);
XTensor
*
t
=
NewTensor
(
tOrder
,
tDimSize
);
XTensor
tUser
;
/* initialize variables */
s1
->
SetData
(
sData1
,
sUnitNum1
);
...
...
@@ -213,9 +218,10 @@ bool TestConcatenate2()
/* call Concatenate function */
_Concatenate
(
sList
,
t
,
0
);
tUser
=
Concatenate
(
*
sList
,
0
);
/* check results */
cpuTest
=
t
->
CheckData
(
answer
,
tUnitNum
);
cpuTest
=
t
->
CheckData
(
answer
,
tUnitNum
)
&&
tUser
.
CheckData
(
answer
,
tUnitNum
)
;
#ifdef USE_CUDA
/* GPU test */
...
...
@@ -225,6 +231,7 @@ bool TestConcatenate2()
XTensor
*
sGPU1
=
NewTensor
(
sOrder1
,
sDimSize1
,
X_FLOAT
,
1.0
F
,
0
);
XTensor
*
sGPU2
=
NewTensor
(
sOrder2
,
sDimSize2
,
X_FLOAT
,
1.0
F
,
0
);
XTensor
*
tGPU
=
NewTensor
(
tOrder
,
tDimSize
,
X_FLOAT
,
1.0
F
,
0
);
XTensor
tUserGPU
;
/* Initialize variables */
sGPU1
->
SetData
(
sData1
,
sUnitNum1
);
...
...
@@ -240,9 +247,10 @@ bool TestConcatenate2()
/* call Concatenate function */
_Concatenate
(
sList
,
tGPU
,
0
);
tUserGPU
=
Concatenate
(
*
sList
,
0
);
/* check results */
gpuTest
=
tGPU
->
CheckData
(
answer
,
tUnitNum
);
gpuTest
=
tGPU
->
CheckData
(
answer
,
tUnitNum
)
&&
tUserGPU
.
CheckData
(
answer
,
tUnitNum
)
;
/* destroy variables */
delete
sList
;
...
...
@@ -324,6 +332,7 @@ bool TestConcatenate3()
XTensor
*
s1
=
NewTensor
(
sOrder1
,
sDimSize1
);
XTensor
*
s2
=
NewTensor
(
sOrder2
,
sDimSize2
);
XTensor
*
t
=
NewTensor
(
tOrder
,
tDimSize
);
XTensor
tUser
;
/* initialize variables */
s1
->
SetData
(
sData1
,
sUnitNum1
);
...
...
@@ -336,9 +345,10 @@ bool TestConcatenate3()
/* call Concatenate function */
_Concatenate
(
sList
,
t
,
1
);
tUser
=
Concatenate
(
*
sList
,
1
);
/* check results */
cpuTest
=
t
->
CheckData
(
answer
,
tUnitNum
);
cpuTest
=
t
->
CheckData
(
answer
,
tUnitNum
)
&&
tUser
.
CheckData
(
answer
,
tUnitNum
)
;
#ifdef USE_CUDA
/* GPU test */
...
...
@@ -348,6 +358,7 @@ bool TestConcatenate3()
XTensor
*
sGPU1
=
NewTensor
(
sOrder1
,
sDimSize1
,
X_FLOAT
,
1.0
F
,
0
);
XTensor
*
sGPU2
=
NewTensor
(
sOrder2
,
sDimSize2
,
X_FLOAT
,
1.0
F
,
0
);
XTensor
*
tGPU
=
NewTensor
(
tOrder
,
tDimSize
,
X_FLOAT
,
1.0
F
,
0
);
XTensor
tUserGPU
;
/* Initialize variables */
sGPU1
->
SetData
(
sData1
,
sUnitNum1
);
...
...
@@ -363,9 +374,10 @@ bool TestConcatenate3()
/* call Concatenate function */
_Concatenate
(
sList
,
tGPU
,
1
);
tUserGPU
=
Concatenate
(
*
sList
,
1
);
/* check results */
gpuTest
=
tGPU
->
CheckData
(
answer
,
tUnitNum
);
gpuTest
=
tGPU
->
CheckData
(
answer
,
tUnitNum
)
&&
tUserGPU
.
CheckData
(
answer
,
tUnitNum
)
;
/* destroy variables */
delete
sList
;
...
...
@@ -444,6 +456,7 @@ bool TestConcatenate4()
XTensor
*
s1
=
NewTensor
(
sOrder1
,
sDimSize1
);
XTensor
*
s2
=
NewTensor
(
sOrder2
,
sDimSize2
);
XTensor
*
t
=
NewTensor
(
tOrder
,
tDimSize
);
XTensor
tUser
;
/* initialize variables */
s1
->
SetData
(
sData1
,
sUnitNum1
);
...
...
@@ -452,9 +465,10 @@ bool TestConcatenate4()
/* call Concatenate function */
_Concatenate
(
s1
,
s2
,
t
,
1
);
tUser
=
Concatenate
(
*
s1
,
*
s2
,
1
);
/* check results */
cpuTest
=
t
->
CheckData
(
answer
,
tUnitNum
);
cpuTest
=
t
->
CheckData
(
answer
,
tUnitNum
)
&&
tUser
.
CheckData
(
answer
,
tUnitNum
)
;
#ifdef USE_CUDA
/* GPU test */
...
...
@@ -464,6 +478,7 @@ bool TestConcatenate4()
XTensor
*
sGPU1
=
NewTensor
(
sOrder1
,
sDimSize1
,
X_FLOAT
,
1.0
F
,
0
);
XTensor
*
sGPU2
=
NewTensor
(
sOrder2
,
sDimSize2
,
X_FLOAT
,
1.0
F
,
0
);
XTensor
*
tGPU
=
NewTensor
(
tOrder
,
tDimSize
,
X_FLOAT
,
1.0
F
,
0
);
XTensor
tUserGPU
;
/* Initialize variables */
sGPU1
->
SetData
(
sData1
,
sUnitNum1
);
...
...
@@ -472,9 +487,10 @@ bool TestConcatenate4()
/* call Concatenate function */
_Concatenate
(
sGPU1
,
sGPU2
,
tGPU
,
1
);
tUserGPU
=
Concatenate
(
*
sGPU1
,
*
sGPU2
,
1
);
/* check results */
gpuTest
=
tGPU
->
CheckData
(
answer
,
tUnitNum
);
gpuTest
=
tGPU
->
CheckData
(
answer
,
tUnitNum
)
&&
tUserGPU
.
CheckData
(
answer
,
tUnitNum
)
;
/* destroy variables */
delete
s1
;
...
...
source/tensor/test/THardTanH.cpp
查看文件 @
906eebb7
...
...
@@ -53,6 +53,7 @@ bool TestHardTanH1()
/* create tensors */
XTensor
*
x
=
NewTensor
(
order
,
dimSize
);
XTensor
*
y
=
NewTensor
(
order
,
dimSize
);
XTensor
yUser
;
/* initialize variables */
x
->
SetData
(
xData
,
unitNum
);
...
...
@@ -60,9 +61,10 @@ bool TestHardTanH1()
/* call hardtanh function */
_HardTanH
(
x
,
y
);
yUser
=
HardTanH
(
*
x
);
/* check results */
cpuTest
=
y
->
CheckData
(
answer
,
unitNum
,
1e-4
F
);
cpuTest
=
y
->
CheckData
(
answer
,
unitNum
,
1e-4
F
)
&&
yUser
.
CheckData
(
answer
,
unitNum
,
1e-4
F
)
;
#ifdef USE_CUDA
/* GPU test */
...
...
@@ -71,6 +73,7 @@ bool TestHardTanH1()
/* create tensor */
XTensor
*
xGPU
=
NewTensor
(
order
,
dimSize
,
X_FLOAT
,
1.0
F
,
0
);
XTensor
*
yGPU
=
NewTensor
(
order
,
dimSize
,
X_FLOAT
,
1.0
F
,
0
);
XTensor
yUserGPU
;
/* Initialize variables */
xGPU
->
SetData
(
xData
,
unitNum
);
...
...
@@ -78,9 +81,10 @@ bool TestHardTanH1()
/* call hardtanh function */
_HardTanH
(
xGPU
,
yGPU
);
yUserGPU
=
HardTanH
(
*
xGPU
);
/* check results */
gpuTest
=
yGPU
->
CheckData
(
answer
,
unitNum
,
1e-4
F
);
gpuTest
=
yGPU
->
CheckData
(
answer
,
unitNum
,
1e-4
F
)
&&
yUserGPU
.
CheckData
(
answer
,
unitNum
,
1e-4
F
)
;
/* destroy variables */
delete
x
;
...
...
source/tensor/test/TIdentity.cpp
查看文件 @
906eebb7
...
...
@@ -51,6 +51,7 @@ bool TestIdentity1()
/* create tensors */
XTensor
*
x
=
NewTensor
(
order
,
dimSize
);
XTensor
*
y
=
NewTensor
(
order
,
dimSize
);
XTensor
yUser
;
/* initialize variables */
x
->
SetData
(
xData
,
unitNum
);
...
...
@@ -58,9 +59,10 @@ bool TestIdentity1()
/* call Identity function */
_Identity
(
x
,
y
);
yUser
=
Identity
(
*
x
);
/* check result */
cpuTest
=
y
->
CheckData
(
answer
,
unitNum
);
cpuTest
=
y
->
CheckData
(
answer
,
unitNum
,
1e-4
F
)
&&
yUser
.
CheckData
(
answer
,
unitNum
,
1e-4
F
);
#ifdef USE_CUDA
/* GPU test */
...
...
@@ -69,6 +71,7 @@ bool TestIdentity1()
/* create tensors */
XTensor
*
xGPU
=
NewTensor
(
order
,
dimSize
,
X_FLOAT
,
1.0
F
,
0
);
XTensor
*
yGPU
=
NewTensor
(
order
,
dimSize
,
X_FLOAT
,
1.0
F
,
0
);
XTensor
yUserGPU
;
/* initialize variables */
xGPU
->
SetData
(
xData
,
unitNum
);
...
...
@@ -76,9 +79,10 @@ bool TestIdentity1()
/* call Identity function */
_Identity
(
xGPU
,
yGPU
);
yUserGPU
=
Identity
(
*
xGPU
);
/* check result */
gpuTest
=
yGPU
->
CheckData
(
answer
,
unitNum
);
gpuTest
=
yGPU
->
CheckData
(
answer
,
unitNum
,
1e-4
F
)
&&
yUserGPU
.
CheckData
(
answer
,
unitNum
,
1e-4
F
);
/* destroy variables */
delete
x
;
...
...
source/tensor/test/TLog.cpp
查看文件 @
906eebb7
...
...
@@ -51,15 +51,21 @@ bool TestLog1()
/* create tensors */
XTensor
*
a
=
NewTensor
(
aOrder
,
aDimSize
);
XTensor
*
b
=
NewTensor
(
aOrder
,
aDimSize
);
XTensor
*
aMe
=
NewTensor
(
aOrder
,
aDimSize
);
XTensor
bUser
;
/* initialize variables */
a
->
SetData
(
aData
,
aUnitNum
);
aMe
->
SetData
(
aData
,
aUnitNum
);
/* call Log function */
_Log
(
a
);
_Log
(
a
,
b
);
_LogMe
(
aMe
);
bUser
=
Log
(
*
a
);
/* check results */
cpuTest
=
a
->
CheckData
(
answer
,
aUnitNum
,
1e-4
F
);
cpuTest
=
b
->
CheckData
(
answer
,
aUnitNum
,
1e-4
F
)
&&
aMe
->
CheckData
(
answer
,
aUnitNum
,
1e-4
F
)
&&
bUser
.
CheckData
(
answer
,
aUnitNum
,
1e-4
F
);
#ifdef USE_CUDA
/* GPU test */
...
...
@@ -67,25 +73,37 @@ bool TestLog1()
/* create tensor */
XTensor
*
aGPU
=
NewTensor
(
aOrder
,
aDimSize
,
X_FLOAT
,
1.0
F
,
0
);
XTensor
*
bGPU
=
NewTensor
(
aOrder
,
aDimSize
,
X_FLOAT
,
1.0
F
,
0
);
XTensor
*
aMeGPU
=
NewTensor
(
aOrder
,
aDimSize
,
X_FLOAT
,
1.0
F
,
0
);
XTensor
bUserGPU
;
/* Initialize variables */
aGPU
->
SetData
(
aData
,
aUnitNum
);
aMeGPU
->
SetData
(
aData
,
aUnitNum
);
/* call Log function */
_Log
(
aGPU
);
_Log
(
aGPU
,
bGPU
);
_LogMe
(
aMeGPU
);
bUserGPU
=
Log
(
*
aGPU
);
/* check results */
gpuTest
=
aGPU
->
CheckData
(
answer
,
aUnitNum
,
1e-4
F
);
gpuTest
=
bGPU
->
CheckData
(
answer
,
aUnitNum
,
1e-4
F
)
&&
aMeGPU
->
CheckData
(
answer
,
aUnitNum
,
1e-4
F
)
&&
bUserGPU
.
CheckData
(
answer
,
aUnitNum
,
1e-4
F
);
/* destroy variables */
delete
a
;
delete
b
;
delete
aMe
;
delete
aGPU
;
delete
bGPU
;
delete
aMeGPU
;
delete
[]
aDimSize
;
return
cpuTest
&&
gpuTest
;
#else
/* destroy variables */
delete
a
;
delete
b
;
delete
aMe
;
delete
[]
aDimSize
;
return
cpuTest
;
...
...
source/tensor/test/TLogSoftmax.cpp
查看文件 @
906eebb7
...
...
@@ -51,6 +51,7 @@ bool TestLogSoftmax1()
/* create tensors */
XTensor
*
x
=
NewTensor
(
order
,
dimSize
);
XTensor
*
y
=
NewTensor
(
order
,
dimSize
);
XTensor
yUser
;
/* initialize variables */
x
->
SetData
(
xData
,
unitNum
);
...
...
@@ -58,9 +59,10 @@ bool TestLogSoftmax1()
/* call LogSoftmax function */
_LogSoftmax
(
x
,
y
,
1
);
yUser
=
LogSoftmax
(
*
x
,
1
);
/* check result */
cpuTest
=
y
->
CheckData
(
answer
,
unitNum
,
1e-4
F
);
cpuTest
=
y
->
CheckData
(
answer
,
unitNum
,
1e-4
F
)
&&
yUser
.
CheckData
(
answer
,
unitNum
,
1e-4
F
);
#ifdef USE_CUDA
/* GPU test */
...
...
@@ -69,6 +71,7 @@ bool TestLogSoftmax1()
/* create tensors */
XTensor
*
xGPU
=
NewTensor
(
order
,
dimSize
,
X_FLOAT
,
1.0
F
,
0
);
XTensor
*
yGPU
=
NewTensor
(
order
,
dimSize
,
X_FLOAT
,
1.0
F
,
0
);
XTensor
yUserGPU
;
/* initialize variables */
xGPU
->
SetData
(
xData
,
unitNum
);
...
...
@@ -76,9 +79,10 @@ bool TestLogSoftmax1()
/* call LogSoftmax function */
_LogSoftmax
(
xGPU
,
yGPU
,
1
);
yUserGPU
=
LogSoftmax
(
*
xGPU
,
1
);
/* check result */
gpuTest
=
yGPU
->
CheckData
(
answer
,
unitNum
,
1e-4
F
);
gpuTest
=
yGPU
->
CheckData
(
answer
,
unitNum
,
1e-4
F
)
&&
yUserGPU
.
CheckData
(
answer
,
unitNum
,
1e-4
F
);
/* destroy variables */
delete
x
;
...
...
source/tensor/test/TLoss.cpp
查看文件 @
906eebb7
...
...
@@ -46,6 +46,7 @@ bool TestLoss1()
bool
cpuTest
=
true
;
DTYPE
answer
=
5.0
F
;
DTYPE
error
;
/* create tensors */
XTensor
*
output
=
NewTensor
(
order
,
dimSize
);
...
...
@@ -57,8 +58,8 @@ bool TestLoss1()
_ScaleAndShiftMe
(
output
,
1
,
1
);
_ScaleAndShiftMe
(
gold
,
1
,
2
);
DTYPE
error
;
error
=
LossCompute
(
gold
,
output
,
SQUAREDERROR
,
false
,
0
,
0
,
dimSize
[
0
],
0
);
/* call LossCompute function */
error
=
_
LossCompute
(
gold
,
output
,
SQUAREDERROR
,
false
,
0
,
0
,
dimSize
[
0
],
0
);
/* check results */
cpuTest
=
(
error
==
answer
);
...
...
@@ -78,7 +79,7 @@ bool TestLoss1()
_ScaleAndShiftMe
(
goldGPU
,
1
,
2
);
/* call LossCompute function */
error
=
LossCompute
(
goldGPU
,
outputGPU
,
SQUAREDERROR
,
false
,
0
,
0
,
dimSize
[
0
],
0
);
error
=
_
LossCompute
(
goldGPU
,
outputGPU
,
SQUAREDERROR
,
false
,
0
,
0
,
dimSize
[
0
],
0
);
/* check results */
gpuTest
=
(
error
==
answer
);
...
...
@@ -123,6 +124,7 @@ bool TestLoss2()
bool
cpuTest
=
true
;
DTYPE
answer
=
0.0
F
;
DTYPE
error
;
/* create tensors */
XTensor
*
output
=
NewTensor
(
order
,
dimSize
);
...
...
@@ -134,8 +136,8 @@ bool TestLoss2()
_ScaleAndShiftMe
(
output
,
1
,
1
);
_ScaleAndShiftMe
(
gold
,
1
,
2
);
DTYPE
error
;
error
=
LossCompute
(
gold
,
output
,
CROSSENTROPY
,
false
,
0
,
0
,
dimSize
[
0
],
0
);
/* call LossCompute function */
error
=
_
LossCompute
(
gold
,
output
,
CROSSENTROPY
,
false
,
0
,
0
,
dimSize
[
0
],
0
);
/* check results */
cpuTest
=
(
error
==
answer
);
...
...
@@ -155,7 +157,7 @@ bool TestLoss2()
_ScaleAndShiftMe
(
goldGPU
,
1
,
2
);
/* call LossCompute function */
error
=
LossCompute
(
goldGPU
,
outputGPU
,
CROSSENTROPY
,
false
,
0
,
0
,
dimSize
[
0
],
0
);
error
=
_
LossCompute
(
goldGPU
,
outputGPU
,
CROSSENTROPY
,
false
,
0
,
0
,
dimSize
[
0
],
0
);
/* check results */
gpuTest
=
(
error
==
answer
);
...
...
@@ -210,6 +212,7 @@ bool TestLoss3()
bool
cpuTest
=
true
;
DTYPE
answer
=
0.25
F
;
DTYPE
error
;
/* create tensors */
XTensor
*
output
=
NewTensor
(
order
,
dimSize
);
...
...
@@ -219,8 +222,8 @@ bool TestLoss3()
output
->
SetData
(
outputData
,
unitNum
);
gold
->
SetData
(
goldData
,
unitNum
);
DTYPE
error
;
error
=
LossCompute
(
gold
,
output
,
ONEHOTERROR
,
false
,
0
,
0
,
dimSize
[
0
],
0
);
/* call LossCompute function */
error
=
_
LossCompute
(
gold
,
output
,
ONEHOTERROR
,
false
,
0
,
0
,
dimSize
[
0
],
0
);
/* check results */
cpuTest
=
(
error
==
answer
);
...
...
@@ -238,7 +241,7 @@ bool TestLoss3()
goldGPU
->
SetData
(
goldData
,
unitNum
);
/* call LossCompute function */
error
=
LossCompute
(
goldGPU
,
outputGPU
,
ONEHOTERROR
,
false
,
0
,
0
,
dimSize
[
0
],
0
);
error
=
_
LossCompute
(
goldGPU
,
outputGPU
,
ONEHOTERROR
,
false
,
0
,
0
,
dimSize
[
0
],
0
);
/* check results */
gpuTest
=
(
error
==
answer
);
...
...
source/tensor/test/TMatrixMulBatched.cpp
查看文件 @
906eebb7
...
...
@@ -75,6 +75,7 @@ bool TestMatrixMulBatched1()
XTensor
*
s1
=
NewTensor
(
sOrder1
,
sDimSize1
);
XTensor
*
s2
=
NewTensor
(
sOrder2
,
sDimSize2
);
XTensor
*
t
=
NewTensor
(
tOrder
,
tDimSize
);
XTensor
tUser
;
/* initialize variables */
s1
->
SetData
(
sData1
,
sUnitNum1
);
...
...
@@ -83,9 +84,10 @@ bool TestMatrixMulBatched1()
/* call MatrixMulBatched function */
_MatrixMulBatched
(
s1
,
X_NOTRANS
,
s2
,
X_NOTRANS
,
t
);
tUser
=
MatrixMulBatched
(
*
s1
,
X_NOTRANS
,
*
s2
,
X_NOTRANS
);
/* check results */
cpuTest
=
t
->
CheckData
(
answer
,
tUnitNum
);
cpuTest
=
t
->
CheckData
(
answer
,
tUnitNum
)
&&
tUser
.
CheckData
(
answer
,
tUnitNum
)
;
#ifdef USE_CUDA
/* GPU test */
...
...
@@ -95,6 +97,7 @@ bool TestMatrixMulBatched1()
XTensor
*
sGPU1
=
NewTensor
(
sOrder1
,
sDimSize1
,
X_FLOAT
,
1.0
F
,
0
);
XTensor
*
sGPU2
=
NewTensor
(
sOrder2
,
sDimSize2
,
X_FLOAT
,
1.0
F
,
0
);
XTensor
*
tGPU
=
NewTensor
(
tOrder
,
tDimSize
,
X_FLOAT
,
1.0
F
,
0
);
XTensor
tUserGPU
;
/* Initialize variables */
sGPU1
->
SetData
(
sData1
,
sUnitNum1
);
...
...
@@ -103,9 +106,10 @@ bool TestMatrixMulBatched1()
/* call MatrixMulBatched function */
_MatrixMulBatched
(
sGPU1
,
X_NOTRANS
,
sGPU2
,
X_NOTRANS
,
tGPU
);
tUserGPU
=
MatrixMulBatched
(
*
sGPU1
,
X_NOTRANS
,
*
sGPU2
,
X_NOTRANS
);
/* check results */
gpuTest
=
tGPU
->
CheckData
(
answer
,
tUnitNum
);
gpuTest
=
tGPU
->
CheckData
(
answer
,
tUnitNum
)
&&
tUserGPU
.
CheckData
(
answer
,
tUnitNum
)
;
/* destroy variables */
delete
s1
;
...
...
@@ -193,6 +197,7 @@ bool TestMatrixMulBatched2()
XTensor
*
s1
=
NewTensor
(
sOrder1
,
sDimSize1
);
XTensor
*
s2
=
NewTensor
(
sOrder2
,
sDimSize2
);
XTensor
*
t
=
NewTensor
(
tOrder
,
tDimSize
);
XTensor
tUser
;
/* initialize variables */
s1
->
SetData
(
sData1
,
sUnitNum1
);
...
...
@@ -201,9 +206,10 @@ bool TestMatrixMulBatched2()
/* call MatrixMulBatched function */
_MatrixMulBatched
(
s1
,
X_NOTRANS
,
s2
,
X_NOTRANS
,
t
);
tUser
=
MatrixMulBatched
(
*
s1
,
X_NOTRANS
,
*
s2
,
X_NOTRANS
);
/* check results */
cpuTest
=
t
->
CheckData
(
answer
,
tUnitNum
);
cpuTest
=
t
->
CheckData
(
answer
,
tUnitNum
)
&&
tUser
.
CheckData
(
answer
,
tUnitNum
)
;
#ifdef USE_CUDA
/* GPU test */
...
...
@@ -213,6 +219,7 @@ bool TestMatrixMulBatched2()
XTensor
*
sGPU1
=
NewTensor
(
sOrder1
,
sDimSize1
,
X_FLOAT
,
1.0
F
,
0
);
XTensor
*
sGPU2
=
NewTensor
(
sOrder2
,
sDimSize2
,
X_FLOAT
,
1.0
F
,
0
);
XTensor
*
tGPU
=
NewTensor
(
tOrder
,
tDimSize
,
X_FLOAT
,
1.0
F
,
0
);
XTensor
tUserGPU
;
/* Initialize variables */
sGPU1
->
SetData
(
sData1
,
sUnitNum1
);
...
...
@@ -221,9 +228,10 @@ bool TestMatrixMulBatched2()
/* call MatrixMulBatched function */
_MatrixMulBatched
(
sGPU1
,
X_NOTRANS
,
sGPU2
,
X_NOTRANS
,
tGPU
);
tUserGPU
=
MatrixMulBatched
(
*
sGPU1
,
X_NOTRANS
,
*
sGPU2
,
X_NOTRANS
);
/* check results */
gpuTest
=
tGPU
->
CheckData
(
answer
,
tUnitNum
);
gpuTest
=
tGPU
->
CheckData
(
answer
,
tUnitNum
)
&&
tUserGPU
.
CheckData
(
answer
,
tUnitNum
)
;
/* destroy variables */
delete
s1
;
...
...
source/tensor/test/TMerge.cpp
查看文件 @
906eebb7
...
...
@@ -60,16 +60,18 @@ bool TestMerge1()
/* create tensors */
XTensor
*
s
=
NewTensor
(
sOrder
,
sDimSize
);
XTensor
*
t
=
NewTensor
(
tOrder
,
tDimSize
);
XTensor
tUser
;
/* initialize variables */
s
->
SetData
(
sData
,
sUnitNum
);
t
->
SetZeroAll
();
/* call
m
erge function */
/* call
M
erge function */
_Merge
(
s
,
t
,
1
,
0
);
tUser
=
Merge
(
*
s
,
1
,
0
);
/* check results */
cpuTest
=
t
->
CheckData
(
answer
,
tUnitNum
);
cpuTest
=
t
->
CheckData
(
answer
,
tUnitNum
)
&&
tUser
.
CheckData
(
answer
,
tUnitNum
)
;
#ifdef USE_CUDA
/* GPU test */
...
...
@@ -78,16 +80,18 @@ bool TestMerge1()
/* create tensor */
XTensor
*
sGPU
=
NewTensor
(
sOrder
,
sDimSize
,
X_FLOAT
,
1.0
F
,
0
);
XTensor
*
tGPU
=
NewTensor
(
tOrder
,
tDimSize
,
X_FLOAT
,
1.0
F
,
0
);
XTensor
tUserGPU
;
/* Initialize variables */
sGPU
->
SetData
(
sData
,
sUnitNum
);
tGPU
->
SetZeroAll
();
/* call
m
erge function */
/* call
M
erge function */
_Merge
(
sGPU
,
tGPU
,
1
,
0
);
tUserGPU
=
Merge
(
*
sGPU
,
1
,
0
);
/* check results */
gpuTest
=
tGPU
->
CheckData
(
answer
,
tUnitNum
);
gpuTest
=
tGPU
->
CheckData
(
answer
,
tUnitNum
)
&&
tUserGPU
.
CheckData
(
answer
,
tUnitNum
);
/* destroy variables */
delete
s
;
...
...
@@ -166,18 +170,23 @@ bool TestMerge2()
XTensor
*
s
=
NewTensor
(
sOrder
,
sDimSize
);
XTensor
*
t1
=
NewTensor
(
tOrder1
,
tDimSize1
);
XTensor
*
t2
=
NewTensor
(
tOrder2
,
tDimSize2
);
XTensor
tUser1
;
XTensor
tUser2
;
/* initialize variables */
s
->
SetData
(
sData
,
sUnitNum
);
t1
->
SetZeroAll
();
t2
->
SetZeroAll
();
/* call
m
erge function */
/* call
M
erge function */
_Merge
(
s
,
t1
,
1
,
0
);
_Merge
(
s
,
t2
,
2
,
0
);
tUser1
=
Merge
(
*
s
,
1
,
0
);
tUser2
=
Merge
(
*
s
,
2
,
0
);
/* check results */
cpuTest
=
t1
->
CheckData
(
answer1
,
tUnitNum1
)
&&
t2
->
CheckData
(
answer2
,
tUnitNum2
);
cpuTest
=
t1
->
CheckData
(
answer1
,
tUnitNum1
)
&&
tUser1
.
CheckData
(
answer1
,
tUnitNum1
)
&&
t2
->
CheckData
(
answer2
,
tUnitNum2
)
&&
tUser2
.
CheckData
(
answer2
,
tUnitNum2
);
#ifdef USE_CUDA
/* GPU test */
...
...
@@ -187,18 +196,23 @@ bool TestMerge2()
XTensor
*
sGPU
=
NewTensor
(
sOrder
,
sDimSize
,
X_FLOAT
,
1.0
F
,
0
);
XTensor
*
tGPU1
=
NewTensor
(
tOrder1
,
tDimSize1
,
X_FLOAT
,
1.0
F
,
0
);
XTensor
*
tGPU2
=
NewTensor
(
tOrder2
,
tDimSize2
,
X_FLOAT
,
1.0
F
,
0
);
XTensor
tUserGPU1
;
XTensor
tUserGPU2
;
/* Initialize variables */
sGPU
->
SetData
(
sData
,
sUnitNum
);
tGPU1
->
SetZeroAll
();
tGPU2
->
SetZeroAll
();
/* call
m
erge function */
/* call
M
erge function */
_Merge
(
sGPU
,
tGPU1
,
1
,
0
);
_Merge
(
sGPU
,
tGPU2
,
2
,
0
);
tUserGPU1
=
Merge
(
*
sGPU
,
1
,
0
);
tUserGPU2
=
Merge
(
*
sGPU
,
2
,
0
);
/* check results */
gpuTest
=
tGPU1
->
CheckData
(
answer1
,
tUnitNum1
)
&&
tGPU2
->
CheckData
(
answer2
,
tUnitNum2
);
gpuTest
=
tGPU1
->
CheckData
(
answer1
,
tUnitNum1
)
&&
tUserGPU1
.
CheckData
(
answer1
,
tUnitNum1
)
&&
tGPU2
->
CheckData
(
answer2
,
tUnitNum2
)
&&
tUserGPU2
.
CheckData
(
answer2
,
tUnitNum2
);
/* destroy variables */
delete
s
;
...
...
@@ -271,6 +285,7 @@ bool TestMerge3()
XTensor
*
s1
=
NewTensor
(
sOrder
,
sDimSize
);
XTensor
*
s2
=
NewTensor
(
sOrder
,
sDimSize
);
XTensor
*
t
=
NewTensor
(
tOrder
,
tDimSize
);
XTensor
tUser
;
/* initialize variables */
s1
->
SetData
(
sData1
,
sUnitNum
);
...
...
@@ -281,11 +296,12 @@ bool TestMerge3()
smallList
->
Add
(
s1
);
smallList
->
Add
(
s2
);
/* call
m
erge function */
/* call
M
erge function */
_Merge
(
smallList
,
t
,
0
);
tUser
=
Merge
(
*
smallList
,
0
);
/* check results */
cpuTest
=
t
->
CheckData
(
answer
,
tUnitNum
);
cpuTest
=
t
->
CheckData
(
answer
,
tUnitNum
)
&&
tUser
.
CheckData
(
answer
,
tUnitNum
)
;
#ifdef USE_CUDA
/* GPU test */
...
...
@@ -298,6 +314,7 @@ bool TestMerge3()
XTensor
*
sGPU1
=
NewTensor
(
sOrder
,
sDimSize
,
X_FLOAT
,
1.0
F
,
0
);
XTensor
*
sGPU2
=
NewTensor
(
sOrder
,
sDimSize
,
X_FLOAT
,
1.0
F
,
0
);
XTensor
*
tGPU
=
NewTensor
(
tOrder
,
tDimSize
);
XTensor
tUserGPU
;
/* initialize variables */
sGPU1
->
SetData
(
sData1
,
sUnitNum
);
...
...
@@ -308,11 +325,12 @@ bool TestMerge3()
smallList
->
Add
(
sGPU1
);
smallList
->
Add
(
sGPU2
);
/* call
m
erge function */
/* call
M
erge function */
_Merge
(
smallList
,
tGPU
,
0
);
tUserGPU
=
Merge
(
*
smallList
,
0
);
/* check results */
cpuTest
=
tGPU
->
CheckData
(
answer
,
tUnitNum
);
gpuTest
=
tGPU
->
CheckData
(
answer
,
tUnitNum
)
&&
tUserGPU
.
CheckData
(
answer
,
tUnitNum
);
/* destroy variables */
delete
s1
;
...
...
@@ -383,6 +401,7 @@ bool TestMerge4()
XTensor
*
s1
=
NewTensor
(
sOrder
,
sDimSize
);
XTensor
*
s2
=
NewTensor
(
sOrder
,
sDimSize
);
XTensor
*
t
=
NewTensor
(
tOrder
,
tDimSize
);
XTensor
tUser
;
/* initialize variables */
s1
->
SetData
(
sData1
,
sUnitNum
);
...
...
@@ -393,11 +412,12 @@ bool TestMerge4()
smallList
->
Add
(
s1
);
smallList
->
Add
(
s2
);
/* call
m
erge function */
/* call
M
erge function */
_Merge
(
smallList
,
t
,
1
);
tUser
=
Merge
(
*
smallList
,
1
);
/* check results */
cpuTest
=
t
->
CheckData
(
answer
,
tUnitNum
);
cpuTest
=
t
->
CheckData
(
answer
,
tUnitNum
)
&&
tUser
.
CheckData
(
answer
,
tUnitNum
)
;
#ifdef USE_CUDA
/* GPU test */
...
...
@@ -410,6 +430,7 @@ bool TestMerge4()
XTensor
*
sGPU1
=
NewTensor
(
sOrder
,
sDimSize
,
X_FLOAT
,
1.0
F
,
0
);
XTensor
*
sGPU2
=
NewTensor
(
sOrder
,
sDimSize
,
X_FLOAT
,
1.0
F
,
0
);
XTensor
*
tGPU
=
NewTensor
(
tOrder
,
tDimSize
);
XTensor
tUserGPU
;
/* initialize variables */
sGPU1
->
SetData
(
sData1
,
sUnitNum
);
...
...
@@ -420,11 +441,12 @@ bool TestMerge4()
smallList
->
Add
(
sGPU1
);
smallList
->
Add
(
sGPU2
);
/* call
m
erge function */
/* call
M
erge function */
_Merge
(
smallList
,
tGPU
,
1
);
tUserGPU
=
Merge
(
*
smallList
,
1
);
/* check results */
cpuTest
=
tGPU
->
CheckData
(
answer
,
tUnitNum
);
gpuTest
=
tGPU
->
CheckData
(
answer
,
tUnitNum
)
&&
tUserGPU
.
CheckData
(
answer
,
tUnitNum
);
/* destroy variables */
delete
s1
;
...
...
source/tensor/test/TNegate.cpp
查看文件 @
906eebb7
...
...
@@ -48,15 +48,21 @@ bool TestNegate1()
/* create tensors */
XTensor
*
a
=
NewTensor
(
aOrder
,
aDimSize
);
XTensor
*
b
=
NewTensor
(
aOrder
,
aDimSize
);
XTensor
*
aMe
=
NewTensor
(
aOrder
,
aDimSize
);
XTensor
bUser
;
/* initialize variables */
a
->
SetData
(
aData
,
aUnitNum
);
aMe
->
SetData
(
aData
,
aUnitNum
);
/* call Negate function */
_Negate
(
a
);
_Negate
(
a
,
b
);
_NegateMe
(
aMe
);
bUser
=
Negate
(
*
a
);
/* check results */
cpuTest
=
a
->
CheckData
(
answer
,
aUnitNum
);
cpuTest
=
b
->
CheckData
(
answer
,
aUnitNum
,
1e-4
F
)
&&
aMe
->
CheckData
(
answer
,
aUnitNum
,
1e-4
F
)
&&
bUser
.
CheckData
(
answer
,
aUnitNum
,
1e-4
F
);
#ifdef USE_CUDA
/* GPU test */
...
...
@@ -64,25 +70,37 @@ bool TestNegate1()
/* create tensor */
XTensor
*
aGPU
=
NewTensor
(
aOrder
,
aDimSize
,
X_FLOAT
,
1.0
F
,
0
);
XTensor
*
bGPU
=
NewTensor
(
aOrder
,
aDimSize
,
X_FLOAT
,
1.0
F
,
0
);
XTensor
*
aMeGPU
=
NewTensor
(
aOrder
,
aDimSize
,
X_FLOAT
,
1.0
F
,
0
);
XTensor
bUserGPU
;
/* Initialize variables */
aGPU
->
SetData
(
aData
,
aUnitNum
);
aMeGPU
->
SetData
(
aData
,
aUnitNum
);
/* call Negate function */
_Negate
(
aGPU
);
_Negate
(
aGPU
,
bGPU
);
_NegateMe
(
aMeGPU
);
bUserGPU
=
Negate
(
*
aGPU
);
/* check results */
gpuTest
=
aGPU
->
CheckData
(
answer
,
aUnitNum
);
gpuTest
=
bGPU
->
CheckData
(
answer
,
aUnitNum
,
1e-4
F
)
&&
aMeGPU
->
CheckData
(
answer
,
aUnitNum
,
1e-4
F
)
&&
bUserGPU
.
CheckData
(
answer
,
aUnitNum
,
1e-4
F
);
/* destroy variables */
delete
a
;
delete
b
;
delete
aMe
;
delete
aGPU
;
delete
bGPU
;
delete
aMeGPU
;
delete
[]
aDimSize
;
return
cpuTest
&&
gpuTest
;
#else
/* destroy variables */
delete
a
;
delete
b
;
delete
aMe
;
delete
[]
aDimSize
;
return
cpuTest
;
...
...
@@ -114,15 +132,21 @@ bool TestNegate2()
/* create tensors */
XTensor
*
a
=
NewTensor
(
aOrder
,
aDimSize
);
XTensor
*
b
=
NewTensor
(
aOrder
,
aDimSize
);
XTensor
*
aMe
=
NewTensor
(
aOrder
,
aDimSize
);
XTensor
bUser
;
/* initialize variables */
a
->
SetData
(
aData
,
aUnitNum
);
aMe
->
SetData
(
aData
,
aUnitNum
);
/* call Negate function */
_Negate
(
a
);
_Negate
(
a
,
b
);
_NegateMe
(
aMe
);
bUser
=
Negate
(
*
a
);
/* check results */
cpuTest
=
a
->
CheckData
(
answer
,
aUnitNum
);
cpuTest
=
b
->
CheckData
(
answer
,
aUnitNum
,
1e-4
F
)
&&
aMe
->
CheckData
(
answer
,
aUnitNum
,
1e-4
F
)
&&
bUser
.
CheckData
(
answer
,
aUnitNum
,
1e-4
F
);
#ifdef USE_CUDA
/* GPU test */
...
...
@@ -130,25 +154,37 @@ bool TestNegate2()
/* create tensor */
XTensor
*
aGPU
=
NewTensor
(
aOrder
,
aDimSize
,
X_FLOAT
,
1.0
F
,
0
);
XTensor
*
bGPU
=
NewTensor
(
aOrder
,
aDimSize
,
X_FLOAT
,
1.0
F
,
0
);
XTensor
*
aMeGPU
=
NewTensor
(
aOrder
,
aDimSize
,
X_FLOAT
,
1.0
F
,
0
);
XTensor
bUserGPU
;
/* Initialize variables */
aGPU
->
SetData
(
aData
,
aUnitNum
);
aMeGPU
->
SetData
(
aData
,
aUnitNum
);
/* call Negate function */
_Negate
(
aGPU
);
_Negate
(
aGPU
,
bGPU
);
_NegateMe
(
aMeGPU
);
bUserGPU
=
Negate
(
*
aGPU
);
/* check results */
gpuTest
=
aGPU
->
CheckData
(
answer
,
aUnitNum
);
gpuTest
=
bGPU
->
CheckData
(
answer
,
aUnitNum
,
1e-4
F
)
&&
aMeGPU
->
CheckData
(
answer
,
aUnitNum
,
1e-4
F
)
&&
bUserGPU
.
CheckData
(
answer
,
aUnitNum
,
1e-4
F
);
/* destroy variables */
delete
a
;
delete
b
;
delete
aMe
;
delete
aGPU
;
delete
bGPU
;
delete
aMeGPU
;
delete
[]
aDimSize
;
return
cpuTest
&&
gpuTest
;
#else
/* destroy variables */
delete
a
;
delete
b
;
delete
aMe
;
delete
[]
aDimSize
;
return
cpuTest
;
...
...
source/tensor/test/TPower.cpp
查看文件 @
906eebb7
...
...
@@ -52,15 +52,21 @@ bool TestPower1()
/* create tensors */
XTensor
*
a
=
NewTensor
(
aOrder
,
aDimSize
);
XTensor
*
b
=
NewTensor
(
aOrder
,
aDimSize
);
XTensor
*
aMe
=
NewTensor
(
aOrder
,
aDimSize
);
XTensor
bUser
;
/* initialize variables */
a
->
SetData
(
aData
,
aUnitNum
);
aMe
->
SetData
(
aData
,
aUnitNum
);
/* call Power function */
_Power
(
a
,
2.0
F
);
_Power
(
a
,
b
,
2.0
F
);
_PowerMe
(
aMe
,
2.0
F
);
bUser
=
Power
(
*
a
,
2.0
F
);
/* check results */
cpuTest
=
a
->
CheckData
(
answer
,
aUnitNum
,
1e-4
F
);
cpuTest
=
b
->
CheckData
(
answer
,
aUnitNum
,
1e-4
F
)
&&
aMe
->
CheckData
(
answer
,
aUnitNum
,
1e-4
F
)
&&
bUser
.
CheckData
(
answer
,
aUnitNum
,
1e-4
F
);
#ifdef USE_CUDA
/* GPU test */
...
...
@@ -68,25 +74,37 @@ bool TestPower1()
/* create tensor */
XTensor
*
aGPU
=
NewTensor
(
aOrder
,
aDimSize
,
X_FLOAT
,
1.0
F
,
0
);
XTensor
*
bGPU
=
NewTensor
(
aOrder
,
aDimSize
,
X_FLOAT
,
1.0
F
,
0
);
XTensor
*
aMeGPU
=
NewTensor
(
aOrder
,
aDimSize
,
X_FLOAT
,
1.0
F
,
0
);
XTensor
bUserGPU
;
/* Initialize variables */
aGPU
->
SetData
(
aData
,
aUnitNum
);
aMeGPU
->
SetData
(
aData
,
aUnitNum
);
/* call power function */
_Power
(
aGPU
,
2.0
F
);
_Power
(
aGPU
,
bGPU
,
2.0
F
);
_PowerMe
(
aMeGPU
,
2.0
F
);
bUserGPU
=
Power
(
*
aGPU
,
2.0
F
);
/* check results */
gpuTest
=
aGPU
->
CheckData
(
answer
,
aUnitNum
,
1e-4
F
);
gpuTest
=
bGPU
->
CheckData
(
answer
,
aUnitNum
,
1e-4
F
)
&&
aMeGPU
->
CheckData
(
answer
,
aUnitNum
,
1e-4
F
)
&&
bUserGPU
.
CheckData
(
answer
,
aUnitNum
,
1e-4
F
);
/* destroy variables */
delete
a
;
delete
b
;
delete
aMe
;
delete
aGPU
;
delete
bGPU
;
delete
aMeGPU
;
delete
[]
aDimSize
;
return
cpuTest
&&
gpuTest
;
#else
/* destroy variables */
delete
a
;
delete
b
;
delete
aMe
;
delete
[]
aDimSize
;
return
cpuTest
;
...
...
@@ -121,15 +139,21 @@ bool TestPower2()
/* create tensors */
XTensor
*
a
=
NewTensor
(
aOrder
,
aDimSize
);
XTensor
*
b
=
NewTensor
(
aOrder
,
aDimSize
);
XTensor
*
aMe
=
NewTensor
(
aOrder
,
aDimSize
);
XTensor
bUser
;
/* initialize variables */
a
->
SetData
(
aData
,
aUnitNum
);
aMe
->
SetData
(
aData
,
aUnitNum
);
/* call Power function */
_Power
(
a
,
1.0
F
);
_Power
(
a
,
b
,
1.0
F
);
_PowerMe
(
aMe
,
1.0
F
);
bUser
=
Power
(
*
a
,
1.0
F
);
/* check results */
cpuTest
=
a
->
CheckData
(
answer
,
aUnitNum
,
1e-4
F
);
cpuTest
=
b
->
CheckData
(
answer
,
aUnitNum
,
1e-4
F
)
&&
aMe
->
CheckData
(
answer
,
aUnitNum
,
1e-4
F
)
&&
bUser
.
CheckData
(
answer
,
aUnitNum
,
1e-4
F
);
#ifdef USE_CUDA
/* GPU test */
...
...
@@ -137,25 +161,37 @@ bool TestPower2()
/* create tensor */
XTensor
*
aGPU
=
NewTensor
(
aOrder
,
aDimSize
,
X_FLOAT
,
1.0
F
,
0
);
XTensor
*
bGPU
=
NewTensor
(
aOrder
,
aDimSize
,
X_FLOAT
,
1.0
F
,
0
);
XTensor
*
aMeGPU
=
NewTensor
(
aOrder
,
aDimSize
,
X_FLOAT
,
1.0
F
,
0
);
XTensor
bUserGPU
;
/* Initialize variables */
aGPU
->
SetData
(
aData
,
aUnitNum
);
aMeGPU
->
SetData
(
aData
,
aUnitNum
);
/* call Power function */
_Power
(
aGPU
,
1.0
F
);
_Power
(
aGPU
,
bGPU
,
1.0
F
);
_PowerMe
(
aMeGPU
,
1.0
F
);
bUserGPU
=
Power
(
*
aGPU
,
1.0
F
);
/* check results */
gpuTest
=
aGPU
->
CheckData
(
answer
,
aUnitNum
,
1e-4
F
);
gpuTest
=
bGPU
->
CheckData
(
answer
,
aUnitNum
,
1e-4
F
)
&&
aMeGPU
->
CheckData
(
answer
,
aUnitNum
,
1e-4
F
)
&&
bUserGPU
.
CheckData
(
answer
,
aUnitNum
,
1e-4
F
);
/* destroy variables */
delete
a
;
delete
b
;
delete
aMe
;
delete
aGPU
;
delete
bGPU
;
delete
aMeGPU
;
delete
[]
aDimSize
;
return
cpuTest
&&
gpuTest
;
#else
/* destroy variables */
delete
a
;
delete
b
;
delete
aMe
;
delete
[]
aDimSize
;
return
cpuTest
;
...
...
@@ -190,15 +226,21 @@ bool TestPower3()
/* create tensors */
XTensor
*
a
=
NewTensor
(
aOrder
,
aDimSize
);
XTensor
*
b
=
NewTensor
(
aOrder
,
aDimSize
);
XTensor
*
aMe
=
NewTensor
(
aOrder
,
aDimSize
);
XTensor
bUser
;
/* initialize variables */
a
->
SetData
(
aData
,
aUnitNum
);
aMe
->
SetData
(
aData
,
aUnitNum
);
/* call Power function */
_Power
(
a
,
0.0
F
);
_Power
(
a
,
b
,
0.0
F
);
_PowerMe
(
aMe
,
0.0
F
);
bUser
=
Power
(
*
a
,
0.0
F
);
/* check results */
cpuTest
=
a
->
CheckData
(
answer
,
aUnitNum
,
1e-4
F
);
cpuTest
=
b
->
CheckData
(
answer
,
aUnitNum
,
1e-4
F
)
&&
aMe
->
CheckData
(
answer
,
aUnitNum
,
1e-4
F
)
&&
bUser
.
CheckData
(
answer
,
aUnitNum
,
1e-4
F
);
#ifdef USE_CUDA
/* GPU test */
...
...
@@ -206,25 +248,37 @@ bool TestPower3()
/* create tensor */
XTensor
*
aGPU
=
NewTensor
(
aOrder
,
aDimSize
,
X_FLOAT
,
1.0
F
,
0
);
XTensor
*
bGPU
=
NewTensor
(
aOrder
,
aDimSize
,
X_FLOAT
,
1.0
F
,
0
);
XTensor
*
aMeGPU
=
NewTensor
(
aOrder
,
aDimSize
,
X_FLOAT
,
1.0
F
,
0
);
XTensor
bUserGPU
;
/* Initialize variables */
aGPU
->
SetData
(
aData
,
aUnitNum
);
aMeGPU
->
SetData
(
aData
,
aUnitNum
);
/* call Power function */
_Power
(
aGPU
,
0.0
F
);
_Power
(
aGPU
,
bGPU
,
0.0
F
);
_PowerMe
(
aMeGPU
,
0.0
F
);
bUserGPU
=
Power
(
*
aGPU
,
0.0
F
);
/* check results */
gpuTest
=
aGPU
->
CheckData
(
answer
,
aUnitNum
,
1e-4
F
);
gpuTest
=
bGPU
->
CheckData
(
answer
,
aUnitNum
,
1e-4
F
)
&&
aMeGPU
->
CheckData
(
answer
,
aUnitNum
,
1e-4
F
)
&&
bUserGPU
.
CheckData
(
answer
,
aUnitNum
,
1e-4
F
);
/* destroy variables */
delete
a
;
delete
b
;
delete
aMe
;
delete
aGPU
;
delete
bGPU
;
delete
aMeGPU
;
delete
[]
aDimSize
;
return
cpuTest
&&
gpuTest
;
#else
/* destroy variables */
delete
a
;
delete
b
;
delete
aMe
;
delete
[]
aDimSize
;
return
cpuTest
;
...
...
source/tensor/test/TRectify.cpp
查看文件 @
906eebb7
...
...
@@ -50,6 +50,7 @@ bool TestRectify1()
/* create tensors */
XTensor
*
x
=
NewTensor
(
order
,
dimSize
);
XTensor
*
y
=
NewTensor
(
order
,
dimSize
);
XTensor
yUser
;
/* initialize variables */
x
->
SetData
(
xData
,
unitNum
);
...
...
@@ -57,9 +58,10 @@ bool TestRectify1()
/* call Rectify function */
_Rectify
(
x
,
y
);
yUser
=
Rectify
(
*
x
);
/* check results */
cpuTest
=
y
->
CheckData
(
answer
,
unitNum
);
cpuTest
=
y
->
CheckData
(
answer
,
unitNum
,
1e-4
F
)
&&
yUser
.
CheckData
(
answer
,
unitNum
,
1e-4
F
);
#ifdef USE_CUDA
/* GPU test */
...
...
@@ -68,6 +70,7 @@ bool TestRectify1()
/* create tensor */
XTensor
*
xGPU
=
NewTensor
(
order
,
dimSize
,
X_FLOAT
,
1.0
F
,
0
);
XTensor
*
yGPU
=
NewTensor
(
order
,
dimSize
,
X_FLOAT
,
1.0
F
,
0
);
XTensor
yUserGPU
;
/* Initialize variables */
xGPU
->
SetData
(
xData
,
unitNum
);
...
...
@@ -75,9 +78,10 @@ bool TestRectify1()
/* call Rectify function */
_Rectify
(
xGPU
,
yGPU
);
yUserGPU
=
Rectify
(
*
xGPU
);
/* check results */
gpuTest
=
yGPU
->
CheckData
(
answer
,
unitNum
);
gpuTest
=
yGPU
->
CheckData
(
answer
,
unitNum
,
1e-4
F
)
&&
yUserGPU
.
CheckData
(
answer
,
unitNum
,
1e-4
F
);
/* destroy variables */
delete
x
;
...
...
source/tensor/test/TReduceMax.cpp
查看文件 @
906eebb7
...
...
@@ -71,6 +71,8 @@ bool TestReduceMax1()
XTensor
*
s
=
NewTensor
(
sOrder
,
sDimSize
);
XTensor
*
t1
=
NewTensor
(
tOrder1
,
tDimSize1
);
XTensor
*
t2
=
NewTensor
(
tOrder2
,
tDimSize2
);
XTensor
tUser1
;
XTensor
tUser2
;
/* initialize variables */
s
->
SetData
(
sData
,
sUnitNum
);
...
...
@@ -80,9 +82,12 @@ bool TestReduceMax1()
/* call ReduceMax function */
_ReduceMax
(
s
,
t1
,
0
);
_ReduceMax
(
s
,
t2
,
1
);
tUser1
=
ReduceMax
(
*
s
,
0
);
tUser2
=
ReduceMax
(
*
s
,
1
);
/* check results */
cpuTest
=
t1
->
CheckData
(
answer1
,
tUnitNum1
)
&&
t2
->
CheckData
(
answer2
,
tUnitNum2
);
cpuTest
=
t1
->
CheckData
(
answer1
,
tUnitNum1
)
&&
tUser1
.
CheckData
(
answer1
,
tUnitNum1
)
&&
t2
->
CheckData
(
answer2
,
tUnitNum2
)
&&
tUser2
.
CheckData
(
answer2
,
tUnitNum2
);
#ifdef USE_CUDA
/* GPU test */
...
...
@@ -92,6 +97,8 @@ bool TestReduceMax1()
XTensor
*
sGPU
=
NewTensor
(
sOrder
,
sDimSize
,
X_FLOAT
,
1.0
F
,
0
);
XTensor
*
tGPU1
=
NewTensor
(
tOrder1
,
tDimSize1
,
X_FLOAT
,
1.0
F
,
0
);
XTensor
*
tGPU2
=
NewTensor
(
tOrder2
,
tDimSize2
,
X_FLOAT
,
1.0
F
,
0
);
XTensor
tUserGPU1
;
XTensor
tUserGPU2
;
/* initialize variables */
sGPU
->
SetData
(
sData
,
sUnitNum
);
...
...
@@ -101,9 +108,12 @@ bool TestReduceMax1()
/* call ReduceMax function */
_ReduceMax
(
sGPU
,
tGPU1
,
0
);
_ReduceMax
(
sGPU
,
tGPU2
,
1
);
tUserGPU1
=
ReduceMax
(
*
sGPU
,
0
);
tUserGPU2
=
ReduceMax
(
*
sGPU
,
1
);
/* check results */
gpuTest
=
tGPU1
->
CheckData
(
answer1
,
tUnitNum1
)
&&
tGPU2
->
CheckData
(
answer2
,
tUnitNum2
);
gpuTest
=
tGPU1
->
CheckData
(
answer1
,
tUnitNum1
)
&&
tUserGPU1
.
CheckData
(
answer1
,
tUnitNum1
)
&&
tGPU2
->
CheckData
(
answer2
,
tUnitNum2
)
&&
tUserGPU2
.
CheckData
(
answer2
,
tUnitNum2
);
/* destroy variables */
delete
s
;
...
...
source/tensor/test/TReduceMean.cpp
查看文件 @
906eebb7
...
...
@@ -66,6 +66,8 @@ bool TestReduceMean1()
XTensor
*
s
=
NewTensor
(
sOrder
,
sDimSize
);
XTensor
*
t1
=
NewTensor
(
tOrder1
,
tDimSize1
);
XTensor
*
t2
=
NewTensor
(
tOrder2
,
tDimSize2
);
XTensor
tUser1
;
XTensor
tUser2
;
/* initialize variables */
s
->
SetData
(
sData
,
sUnitNum
);
...
...
@@ -75,9 +77,12 @@ bool TestReduceMean1()
/* call ReduceMean function */
_ReduceMean
(
s
,
t1
,
0
);
_ReduceMean
(
s
,
t2
,
1
);
tUser1
=
ReduceMean
(
*
s
,
0
);
tUser2
=
ReduceMean
(
*
s
,
1
);
/* check results */
cpuTest
=
t1
->
CheckData
(
answer1
,
tUnitNum1
)
&&
t2
->
CheckData
(
answer2
,
tUnitNum2
);
cpuTest
=
t1
->
CheckData
(
answer1
,
tUnitNum1
)
&&
tUser1
.
CheckData
(
answer1
,
tUnitNum1
)
&&
t2
->
CheckData
(
answer2
,
tUnitNum2
)
&&
tUser2
.
CheckData
(
answer2
,
tUnitNum2
);
#ifdef USE_CUDA
/* GPU test */
...
...
@@ -87,6 +92,8 @@ bool TestReduceMean1()
XTensor
*
sGPU
=
NewTensor
(
sOrder
,
sDimSize
,
X_FLOAT
,
1.0
F
,
0
);
XTensor
*
tGPU1
=
NewTensor
(
tOrder1
,
tDimSize1
,
X_FLOAT
,
1.0
F
,
0
);
XTensor
*
tGPU2
=
NewTensor
(
tOrder2
,
tDimSize2
,
X_FLOAT
,
1.0
F
,
0
);
XTensor
tUserGPU1
;
XTensor
tUserGPU2
;
/* Initialize variables */
sGPU
->
SetData
(
sData
,
sUnitNum
);
...
...
@@ -96,9 +103,12 @@ bool TestReduceMean1()
/* call ReduceMean function */
_ReduceMean
(
sGPU
,
tGPU1
,
0
);
_ReduceMean
(
sGPU
,
tGPU2
,
1
);
tUserGPU1
=
ReduceMean
(
*
sGPU
,
0
);
tUserGPU2
=
ReduceMean
(
*
sGPU
,
1
);
/* check results */
cpuTest
=
tGPU1
->
CheckData
(
answer1
,
tUnitNum1
)
&&
tGPU2
->
CheckData
(
answer2
,
tUnitNum2
);
gpuTest
=
tGPU1
->
CheckData
(
answer1
,
tUnitNum1
)
&&
tUserGPU1
.
CheckData
(
answer1
,
tUnitNum1
)
&&
tGPU2
->
CheckData
(
answer2
,
tUnitNum2
)
&&
tUserGPU2
.
CheckData
(
answer2
,
tUnitNum2
);
/* destroy variables */
delete
s
;
...
...
source/tensor/test/TReduceSum.cpp
查看文件 @
906eebb7
...
...
@@ -69,20 +69,29 @@ bool TestReduceSum1()
/* create tensors */
XTensor
*
s
=
NewTensor
(
sOrder
,
sDimSize
);
XTensor
*
shift1
=
NewTensor
(
tOrder1
,
tDimSize1
);
XTensor
*
shift2
=
NewTensor
(
tOrder2
,
tDimSize2
);
XTensor
*
t1
=
NewTensor
(
tOrder1
,
tDimSize1
);
XTensor
*
t2
=
NewTensor
(
tOrder2
,
tDimSize2
);
XTensor
tUser1
;
XTensor
tUser2
;
/* initialize variables */
s
->
SetData
(
sData
,
sUnitNum
);
shift1
->
SetZeroAll
();
shift2
->
SetZeroAll
();
t1
->
SetZeroAll
();
t2
->
SetZeroAll
();
/* call ReduceSum function */
_ReduceSum
(
s
,
t1
,
0
);
_ReduceSum
(
s
,
t2
,
1
);
tUser1
=
ReduceSum
(
*
s
,
0
,
*
shift1
);
tUser2
=
ReduceSum
(
*
s
,
1
,
*
shift2
);
/* check results */
cpuTest
=
t1
->
CheckData
(
answer1
,
tUnitNum1
)
&&
t2
->
CheckData
(
answer2
,
tUnitNum2
);
cpuTest
=
t1
->
CheckData
(
answer1
,
tUnitNum1
)
&&
tUser1
.
CheckData
(
answer1
,
tUnitNum1
)
&&
t2
->
CheckData
(
answer2
,
tUnitNum2
)
&&
tUser2
.
CheckData
(
answer2
,
tUnitNum2
);
#ifdef USE_CUDA
/* GPU test */
...
...
@@ -90,26 +99,39 @@ bool TestReduceSum1()
/* create tensors */
XTensor
*
sGPU
=
NewTensor
(
sOrder
,
sDimSize
,
X_FLOAT
,
1.0
F
,
0
);
XTensor
*
shiftGPU1
=
NewTensor
(
tOrder1
,
tDimSize1
,
X_FLOAT
,
1.0
F
,
0
);
XTensor
*
shiftGPU2
=
NewTensor
(
tOrder2
,
tDimSize2
,
X_FLOAT
,
1.0
F
,
0
);
XTensor
*
tGPU1
=
NewTensor
(
tOrder1
,
tDimSize1
,
X_FLOAT
,
1.0
F
,
0
);
XTensor
*
tGPU2
=
NewTensor
(
tOrder2
,
tDimSize2
,
X_FLOAT
,
1.0
F
,
0
);
XTensor
tUserGPU1
;
XTensor
tUserGPU2
;
/* initialize variables */
sGPU
->
SetData
(
sData
,
sUnitNum
);
shiftGPU1
->
SetZeroAll
();
shiftGPU2
->
SetZeroAll
();
tGPU1
->
SetZeroAll
();
tGPU2
->
SetZeroAll
();
/* call ReduceSum function */
_ReduceSum
(
sGPU
,
tGPU1
,
0
);
_ReduceSum
(
sGPU
,
tGPU2
,
1
);
tUserGPU1
=
ReduceSum
(
*
sGPU
,
0
,
*
shiftGPU1
);
tUserGPU2
=
ReduceSum
(
*
sGPU
,
1
,
*
shiftGPU2
);
/* check results */
cpuTest
=
tGPU1
->
CheckData
(
answer1
,
tUnitNum1
)
&&
tGPU2
->
CheckData
(
answer2
,
tUnitNum2
);
gpuTest
=
tGPU1
->
CheckData
(
answer1
,
tUnitNum1
)
&&
tUserGPU1
.
CheckData
(
answer1
,
tUnitNum1
)
&&
tGPU2
->
CheckData
(
answer2
,
tUnitNum2
)
&&
tUserGPU2
.
CheckData
(
answer2
,
tUnitNum2
);
/* destroy variables */
delete
s
;
delete
shift1
;
delete
shift2
;
delete
t1
;
delete
t2
;
delete
sGPU
;
delete
shiftGPU1
;
delete
shiftGPU2
;
delete
tGPU1
;
delete
tGPU2
;
delete
[]
sDimSize
;
...
...
@@ -120,6 +142,8 @@ bool TestReduceSum1()
#else
/* destroy variables */
delete
s
;
delete
shift1
;
delete
shift2
;
delete
t1
;
delete
t2
;
delete
[]
sDimSize
;
...
...
source/tensor/test/TReduceSumSquared.cpp
查看文件 @
906eebb7
...
...
@@ -70,6 +70,7 @@ bool TestReduceSumSquared1()
XTensor
*
s
=
NewTensor
(
sOrder
,
sDimSize
);
XTensor
*
t
=
NewTensor
(
tOrder
,
tDimSize
);
XTensor
*
shift
=
NewTensor
(
shiftOrder
,
shiftDimSize
);
XTensor
tUser
;
/* initialize variables */
s
->
SetData
(
sData
,
sUnitNum
);
...
...
@@ -78,9 +79,10 @@ bool TestReduceSumSquared1()
/* call ReduceSumSquared function */
_ReduceSumSquared
(
s
,
t
,
0
,
shift
);
tUser
=
ReduceSumSquared
(
*
s
,
0
,
*
shift
);
/* check results */
cpuTest
=
t
->
CheckData
(
answer
,
tUnitNum
);
cpuTest
=
t
->
CheckData
(
answer
,
tUnitNum
)
&&
tUser
.
CheckData
(
answer
,
tUnitNum
)
;
#ifdef USE_CUDA
/* GPU test */
...
...
@@ -90,6 +92,7 @@ bool TestReduceSumSquared1()
XTensor
*
sGPU
=
NewTensor
(
sOrder
,
sDimSize
,
X_FLOAT
,
1.0
F
,
0
);
XTensor
*
tGPU
=
NewTensor
(
tOrder
,
tDimSize
,
X_FLOAT
,
1.0
F
,
0
);
XTensor
*
shiftGPU
=
NewTensor
(
shiftOrder
,
shiftDimSize
,
X_FLOAT
,
1.0
F
,
0
);
XTensor
tUserGPU
;
/* initialize variables */
sGPU
->
SetData
(
sData
,
sUnitNum
);
...
...
@@ -98,9 +101,10 @@ bool TestReduceSumSquared1()
/* call ReduceSumSquared function */
_ReduceSumSquared
(
sGPU
,
tGPU
,
0
,
shiftGPU
);
tUserGPU
=
ReduceSumSquared
(
*
sGPU
,
0
,
*
shiftGPU
);
/* check results */
gpuTest
=
tGPU
->
CheckData
(
answer
,
tUnitNum
);
gpuTest
=
tGPU
->
CheckData
(
answer
,
tUnitNum
)
&&
tUserGPU
.
CheckData
(
answer
,
tUnitNum
)
;
/* destroy variables */
delete
s
;
...
...
@@ -174,6 +178,7 @@ bool TestReduceSumSquared2()
XTensor
*
s
=
NewTensor
(
sOrder
,
sDimSize
);
XTensor
*
t
=
NewTensor
(
tOrder
,
tDimSize
);
XTensor
*
shift
=
NewTensor
(
shiftOrder
,
shiftDimSize
);
XTensor
tUser
;
/* initialize variables */
s
->
SetData
(
sData
,
sUnitNum
);
...
...
@@ -182,9 +187,10 @@ bool TestReduceSumSquared2()
/* call ReduceSumSquared function */
_ReduceSumSquared
(
s
,
t
,
1
,
shift
);
tUser
=
ReduceSumSquared
(
*
s
,
1
,
*
shift
);
/* check results */
cpuTest
=
t
->
CheckData
(
answer
,
tUnitNum
);
cpuTest
=
t
->
CheckData
(
answer
,
tUnitNum
)
&&
tUser
.
CheckData
(
answer
,
tUnitNum
)
;
#ifdef USE_CUDA
/* GPU test */
...
...
@@ -194,6 +200,7 @@ bool TestReduceSumSquared2()
XTensor
*
sGPU
=
NewTensor
(
sOrder
,
sDimSize
,
X_FLOAT
,
1.0
F
,
0
);
XTensor
*
tGPU
=
NewTensor
(
tOrder
,
tDimSize
,
X_FLOAT
,
1.0
F
,
0
);
XTensor
*
shiftGPU
=
NewTensor
(
shiftOrder
,
shiftDimSize
,
X_FLOAT
,
1.0
F
,
0
);
XTensor
tUserGPU
;
/* initialize variables */
sGPU
->
SetData
(
sData
,
sUnitNum
);
...
...
@@ -202,9 +209,10 @@ bool TestReduceSumSquared2()
/* call ReduceSumSquared function */
_ReduceSumSquared
(
sGPU
,
tGPU
,
1
,
shiftGPU
);
tUserGPU
=
ReduceSumSquared
(
*
sGPU
,
1
,
*
shiftGPU
);
/* check results */
gpuTest
=
tGPU
->
CheckData
(
answer
,
tUnitNum
);
gpuTest
=
tGPU
->
CheckData
(
answer
,
tUnitNum
)
&&
tUserGPU
.
CheckData
(
answer
,
tUnitNum
)
;
/* destroy variables */
delete
s
;
...
...
source/tensor/test/TReduceVariance.cpp
查看文件 @
906eebb7
...
...
@@ -70,6 +70,7 @@ bool TestReduceVariance1()
XTensor
*
s
=
NewTensor
(
sOrder
,
sDimSize
);
XTensor
*
t
=
NewTensor
(
tOrder
,
tDimSize
);
XTensor
*
mean
=
NewTensor
(
meanOrder
,
meanDimSize
);
XTensor
tUser
;
/* initialize variables */
s
->
SetData
(
sData
,
sUnitNum
);
...
...
@@ -78,9 +79,10 @@ bool TestReduceVariance1()
/* call ReduceVariance function */
_ReduceVariance
(
s
,
t
,
0
,
mean
);
tUser
=
ReduceVariance
(
*
s
,
0
,
*
mean
);
/* check results */
cpuTest
=
t
->
CheckData
(
answer
,
tUnitNum
);
cpuTest
=
t
->
CheckData
(
answer
,
tUnitNum
)
&&
tUser
.
CheckData
(
answer
,
tUnitNum
)
;
#ifdef USE_CUDA
/* GPU test */
...
...
@@ -90,6 +92,7 @@ bool TestReduceVariance1()
XTensor
*
sGPU
=
NewTensor
(
sOrder
,
sDimSize
,
X_FLOAT
,
1.0
F
,
0
);
XTensor
*
tGPU
=
NewTensor
(
tOrder
,
tDimSize
,
X_FLOAT
,
1.0
F
,
0
);
XTensor
*
meanGPU
=
NewTensor
(
meanOrder
,
meanDimSize
,
X_FLOAT
,
1.0
F
,
0
);
XTensor
tUserGPU
;
/* initialize variables */
sGPU
->
SetData
(
sData
,
sUnitNum
);
...
...
@@ -98,9 +101,10 @@ bool TestReduceVariance1()
/* call ReduceVariance function */
_ReduceVariance
(
sGPU
,
tGPU
,
0
,
meanGPU
);
tUserGPU
=
ReduceVariance
(
*
sGPU
,
0
,
*
meanGPU
);
/* check results */
gpuTest
=
t
->
CheckData
(
answer
,
tUnitNum
);
gpuTest
=
t
GPU
->
CheckData
(
answer
,
tUnitNum
)
&&
tUserGPU
.
CheckData
(
answer
,
tUnitNum
);
/* destroy variables */
delete
s
;
...
...
source/tensor/test/TSigmoid.cpp
查看文件 @
906eebb7
...
...
@@ -48,6 +48,7 @@ bool TestSigmoid1()
/* create tensors */
XTensor
*
x
=
NewTensor
(
order
,
dimSize
);
XTensor
*
y
=
NewTensor
(
order
,
dimSize
);
XTensor
yUser
;
/* initialize variables */
x
->
SetData
(
xData
,
unitNum
);
...
...
@@ -55,9 +56,10 @@ bool TestSigmoid1()
/* call Sigmoid function */
_Sigmoid
(
x
,
y
);
yUser
=
Sigmoid
(
*
x
);
/* check result */
cpuTest
=
y
->
CheckData
(
answer
,
unitNum
,
1e-4
F
);
cpuTest
=
y
->
CheckData
(
answer
,
unitNum
,
1e-4
F
)
&&
yUser
.
CheckData
(
answer
,
unitNum
,
1e-4
F
);
#ifdef USE_CUDA
/* GPU test */
...
...
@@ -66,6 +68,7 @@ bool TestSigmoid1()
/* create tensors */
XTensor
*
xGPU
=
NewTensor
(
order
,
dimSize
,
X_FLOAT
,
1.0
F
,
0
);
XTensor
*
yGPU
=
NewTensor
(
order
,
dimSize
,
X_FLOAT
,
1.0
F
,
0
);
XTensor
yUserGPU
;
/* initialize variables */
xGPU
->
SetData
(
xData
,
unitNum
);
...
...
@@ -73,9 +76,10 @@ bool TestSigmoid1()
/* call Sigmoid function */
_Sigmoid
(
xGPU
,
yGPU
);
yUserGPU
=
Sigmoid
(
*
xGPU
);
/* check result */
gpuTest
=
yGPU
->
CheckData
(
answer
,
unitNum
,
1e-4
F
);
gpuTest
=
yGPU
->
CheckData
(
answer
,
unitNum
,
1e-4
F
)
&&
yUserGPU
.
CheckData
(
answer
,
unitNum
,
1e-4
F
);
/* destroy variables */
delete
x
;
...
...
source/tensor/test/TSign.cpp
查看文件 @
906eebb7
差异被折叠。
点击展开。
source/tensor/test/TSoftmax.cpp
查看文件 @
906eebb7
差异被折叠。
点击展开。
source/tensor/test/TSort.cpp
查看文件 @
906eebb7
差异被折叠。
点击展开。
source/tensor/test/TSplit.cpp
查看文件 @
906eebb7
差异被折叠。
点击展开。
source/tensor/test/TTopK.cpp
查看文件 @
906eebb7
差异被折叠。
点击展开。
source/tensor/test/TUnsqueeze.cpp
查看文件 @
906eebb7
差异被折叠。
点击展开。
编写
预览
Markdown
格式
0%
重试
或
添加新文件
添加附件
取消
您添加了
0
人
到此讨论。请谨慎行事。
请先完成此评论的编辑!
取消
请
注册
或者
登录
后发表评论