Skip to content
项目
群组
代码片段
帮助
当前项目
正在载入...
登录 / 注册
切换导航面板
N
NiuTrans.Tensor
概览
Overview
Details
Activity
Cycle Analytics
版本库
Repository
Files
Commits
Branches
Tags
Contributors
Graph
Compare
Charts
问题
0
Issues
0
列表
Board
标记
里程碑
合并请求
0
Merge Requests
0
CI / CD
CI / CD
流水线
作业
日程表
图表
维基
Wiki
代码片段
Snippets
成员
Collapse sidebar
Close sidebar
活动
图像
聊天
创建新问题
作业
提交
Issue Boards
Open sidebar
Emmay
NiuTrans.Tensor
Commits
0887fae1
Commit
0887fae1
authored
Jul 07, 2018
by
liyinqiao
Browse files
Options
Browse Files
Download
Email Patches
Plain Diff
Format correction.
parent
42f995ae
隐藏空白字符变更
内嵌
并排
正在显示
102 个修改的文件
包含
990 行增加
和
1118 行删除
+990
-1118
source/core/CHeader.h
+5
-0
source/core/Concatenate.cpp
+4
-0
source/core/Concatenate.h
+2
-1
source/core/ConcatenateSolely.cpp
+4
-2
source/core/ConcatenateSolely.h
+0
-1
source/core/CopyBlocks.cpp
+4
-2
source/core/CopyBlocksOnSite.cpp
+5
-2
source/core/CopyInGrid.cpp
+1
-1
source/core/CopyIndexed.cpp
+1
-0
source/core/CopyValues.cuh
+0
-1
source/core/FlushToMem.cu
+0
-2
source/core/MakeSplitBlockIndex.cpp
+1
-0
source/core/MakeSplitBlockIndex.cu
+1
-0
source/core/MatrixMULBatchedCPU.cpp
+1
-5
source/core/MatrixMul.h
+1
-1
source/core/MatrixMul2D.cpp
+1
-1
source/core/MatrixMul2D.cu
+3
-3
source/core/MatrixMulBatched.cpp
+1
-0
source/core/Merge.cpp
+0
-1
source/core/MergeBlockLists.cpp
+6
-6
source/core/MergeBlockLists.cu
+1
-20
source/core/MultiplyElementWise.cpp
+1
-0
source/core/MultiplyElementWise.cu
+1
-0
source/core/Negate.cpp
+2
-2
source/core/Negate.cu
+4
-4
source/core/Normalize.cpp
+1
-0
source/core/Normalize.cu
+1
-0
source/core/Normalize.cuh
+4
-2
source/core/Power.cpp
+2
-1
source/core/Power.cu
+0
-6
source/core/ReduceMax.cu
+48
-59
source/core/ReduceMean.cpp
+0
-3
source/core/ReduceSum.cu
+0
-5
source/core/ReduceSumSquared.cpp
+0
-1
source/core/ReduceVariance.cpp
+0
-1
source/core/ScaleAndShift.cpp
+0
-2
source/core/ScaleAndShift.cu
+0
-2
source/core/Select.cpp
+1
-2
source/core/SetData.cpp
+5
-4
source/core/Sort.cpp
+1
-0
source/core/Sort.cuh
+1
-0
source/core/Split.h
+1
-0
source/core/Sum.cu
+1
-0
source/core/Sum.cuh
+1
-1
source/core/SumByColumnVT.cu
+1
-0
source/core/SumByColumnVT.h
+0
-1
source/core/TopK.cpp
+1
-0
source/core/TopK.cu
+9
-4
source/core/XMatrixSegment.cpp
+1
-1
source/core/XMatrixSegment.h
+1
-3
source/core/XTensorBLAS.cu
+3
-10
source/core/XTensorCore.h
+48
-6
source/function/HardTanH.cpp
+0
-1
source/function/HardTanH.cu
+0
-1
source/function/LogSoftmax.cpp
+0
-2
source/function/LogSoftmax.cu
+8
-5
source/function/Loss.cu
+0
-1
source/function/Rectify.cu
+0
-1
source/function/Sigmoid.cpp
+0
-1
source/function/Sigmoid.cu
+0
-2
source/function/Softmax.cuh
+0
-1
source/test/TConcatenate.cpp
+39
-35
source/test/TConcatenateSolely.cpp
+31
-28
source/test/TCopyIndexed.cpp
+18
-17
source/test/TCopyValues.cpp
+5
-5
source/test/THardTanH.cpp
+72
-63
source/test/TIdentity.cpp
+24
-22
source/test/TLogSoftmax.cpp
+38
-35
source/test/TLoss.cpp
+26
-23
source/test/TMatrixMULBatchedCPU.cpp
+18
-17
source/test/TMatrixMul.cpp
+71
-67
source/test/TMatrixMul2D.cpp
+24
-21
source/test/TMatrixMul2DParallel.cpp
+24
-22
source/test/TMatrixMulBatched.cpp
+28
-28
source/test/TMerge.cpp
+38
-34
source/test/TMultiplyElementWise.cpp
+30
-27
source/test/TNegate.cpp
+13
-13
source/test/TNormalize.cpp
+16
-15
source/test/TPower.cpp
+34
-31
source/test/TRectify.cpp
+23
-21
source/test/TReduceMax.cpp
+8
-7
source/test/TReduceMean.cpp
+5
-105
source/test/TReduceMean.h
+2
-2
source/test/TReduceSum.cpp
+8
-106
source/test/TReduceSumSquared.cpp
+19
-17
source/test/TReduceVariance.cpp
+5
-4
source/test/TScaleAndShift.cpp
+6
-5
source/test/TSelect.cpp
+11
-10
source/test/TSetAscendingOrder.cpp
+1
-1
source/test/TSetData.cpp
+2
-3
source/test/TSigmoid.cpp
+9
-7
source/test/TSoftmax.cpp
+7
-5
source/test/TSort.cpp
+10
-10
source/test/TSplit.cpp
+19
-19
source/test/TSum.cpp
+18
-18
source/test/TSumByColumnTV.cpp
+19
-19
source/test/TSumByColumnVT.cpp
+19
-19
source/test/TTopK.cpp
+19
-19
source/test/TUnsqueeze.cpp
+19
-19
source/test/TXMem.cpp
+17
-9
source/test/TXMem.h
+5
-5
source/test/Test.cpp
+1
-1
没有找到文件。
source/core/CHeader.h
查看文件 @
0887fae1
...
@@ -28,6 +28,10 @@
...
@@ -28,6 +28,10 @@
#include "Concatenate.h"
#include "Concatenate.h"
#include "ConcatenateSolely.h"
#include "ConcatenateSolely.h"
#include "CopyBlocks.h"
#include "CopyBlocksInGrid.h"
#include "CopyBlocksOnSite.h"
#include "CopyData2D.h"
#include "CopyIndexed.h"
#include "CopyIndexed.h"
#include "CopyInGrid.h"
#include "CopyInGrid.h"
#include "CopyValues.h"
#include "CopyValues.h"
...
@@ -53,6 +57,7 @@
...
@@ -53,6 +57,7 @@
#include "ReduceSumSquared.h"
#include "ReduceSumSquared.h"
#include "ReduceVariance.h"
#include "ReduceVariance.h"
#include "ScaleAndShift.h"
#include "ScaleAndShift.h"
#include "Select.h"
#include "SetData.h"
#include "SetData.h"
#include "Sort.h"
#include "Sort.h"
#include "Split.h"
#include "Split.h"
...
...
source/core/Concatenate.cpp
查看文件 @
0887fae1
...
@@ -53,6 +53,10 @@ void Concatenate(XList * smalls, XTensor * big, int dim)
...
@@ -53,6 +53,10 @@ void Concatenate(XList * smalls, XTensor * big, int dim)
/*
/*
concatenate two tensors along a given dimension
concatenate two tensors along a given dimension
>> smallA - one tensor for concatenation
>> smallB - the other tensor for concatenation
>> big - the resulting tensor
>> dim - which dimension we perform the concatenation
*/
*/
void
Concatenate
(
XTensor
*
smallA
,
XTensor
*
smallB
,
XTensor
*
big
,
int
dim
)
void
Concatenate
(
XTensor
*
smallA
,
XTensor
*
smallB
,
XTensor
*
big
,
int
dim
)
{
{
...
...
source/core/Concatenate.h
查看文件 @
0887fae1
...
@@ -29,7 +29,8 @@ namespace nts { // namespace nts(NiuTrans.Tensor)
...
@@ -29,7 +29,8 @@ namespace nts { // namespace nts(NiuTrans.Tensor)
/*
/*
concatenate a list of tensors along a given dimension
concatenate a list of tensors along a given dimension
Note that this is actually a wrapper that selects "ConcatenateSolely"
Note that this is actually a wrapper that selects "ConcatenateSolely"
or "Merge" by means of the tensor shapes */
or "Merge" by means of the tensor shapes
*/
void
Concatenate
(
XList
*
smalls
,
XTensor
*
big
,
int
dim
);
void
Concatenate
(
XList
*
smalls
,
XTensor
*
big
,
int
dim
);
/* concatenate two tensors along a given dimension */
/* concatenate two tensors along a given dimension */
...
...
source/core/ConcatenateSolely.cpp
查看文件 @
0887fae1
...
@@ -64,9 +64,11 @@ void ConcatenateSolely(XList * smalls, XTensor * big, int dim)
...
@@ -64,9 +64,11 @@ void ConcatenateSolely(XList * smalls, XTensor * big, int dim)
int
offset
=
0
;
int
offset
=
0
;
/* two strategies are used - we can either resort to memcpy2d for the case of
/*
two strategies are used - we can either resort to memcpy2d for the case of
concatenation of a few items, or use MergeBlockLists to merge a large number
concatenation of a few items, or use MergeBlockLists to merge a large number
of data blocks */
of data blocks
*/
if
(
smalls
->
count
<=
MIN_TENSOR_CAT_NUM
)
{
if
(
smalls
->
count
<=
MIN_TENSOR_CAT_NUM
)
{
for
(
int
i
=
0
;
i
<
smalls
->
count
;
i
++
)
{
for
(
int
i
=
0
;
i
<
smalls
->
count
;
i
++
)
{
XTensor
*
tensor
=
(
XTensor
*
)
smalls
->
GetItem
(
i
);
XTensor
*
tensor
=
(
XTensor
*
)
smalls
->
GetItem
(
i
);
...
...
source/core/ConcatenateSolely.h
查看文件 @
0887fae1
...
@@ -26,7 +26,6 @@
...
@@ -26,7 +26,6 @@
namespace
nts
{
// namespace nts(NiuTrans.Tensor)
namespace
nts
{
// namespace nts(NiuTrans.Tensor)
/* concatenate a list of tensors along a given dimension */
/* concatenate a list of tensors along a given dimension */
extern
"C"
extern
"C"
void
ConcatenateSolely
(
XList
*
smalls
,
XTensor
*
big
,
int
dim
);
void
ConcatenateSolely
(
XList
*
smalls
,
XTensor
*
big
,
int
dim
);
...
...
source/core/CopyBlocks.cpp
查看文件 @
0887fae1
...
@@ -78,9 +78,11 @@ void CopyBlocks(void * source, int blockSize, int * sourceBlocks, int blockNum,
...
@@ -78,9 +78,11 @@ void CopyBlocks(void * source, int blockSize, int * sourceBlocks, int blockNum,
else
{
else
{
int
devID
=
myMem
!=
NULL
?
myMem
->
devID
:
-
1
;
int
devID
=
myMem
!=
NULL
?
myMem
->
devID
:
-
1
;
/* The following code should be fine with GPUs, but too many
/*
The following code should be fine with GPUs, but too many
kernel calls would slow down the system. We prefer to use
kernel calls would slow down the system. We prefer to use
one kernel to do block copy in batch (kernel fusion). */
one kernel to do block copy in batch (kernel fusion).
*/
for
(
int
i
=
0
;
i
<
blockNum
;
i
++
)
{
for
(
int
i
=
0
;
i
<
blockNum
;
i
++
)
{
XMemCopy
((
char
*
)
target
+
targetBlocks
[
i
]
*
blockSize
,
devID
,
XMemCopy
((
char
*
)
target
+
targetBlocks
[
i
]
*
blockSize
,
devID
,
(
char
*
)
source
+
sourceBlocks
[
i
]
*
blockSize
,
devID
,
blockSize
);
(
char
*
)
source
+
sourceBlocks
[
i
]
*
blockSize
,
devID
,
blockSize
);
...
...
source/core/CopyBlocksOnSite.cpp
查看文件 @
0887fae1
...
@@ -25,6 +25,7 @@
...
@@ -25,6 +25,7 @@
#include "CopyBlocksOnSite.cuh"
#include "CopyBlocksOnSite.cuh"
namespace
nts
{
// namespace nts(NiuTrans.Tensor)
namespace
nts
{
// namespace nts(NiuTrans.Tensor)
/*
/*
copy a number of blocks to target positions. Here we assume that
copy a number of blocks to target positions. Here we assume that
all the data has been on the device (CPU/GPU) already.
all the data has been on the device (CPU/GPU) already.
...
@@ -47,9 +48,11 @@ void CopyBlocksOnSite(void * source, int blockSize, int blockNum, void * target,
...
@@ -47,9 +48,11 @@ void CopyBlocksOnSite(void * source, int blockSize, int blockNum, void * target,
else
{
else
{
int
devID
=
myMem
!=
NULL
?
myMem
->
devID
:
-
1
;
int
devID
=
myMem
!=
NULL
?
myMem
->
devID
:
-
1
;
/* The following code should be fine with GPUs, but too many
/*
The following code should be fine with GPUs, but too many
kernel calls would slow down the system. We prefer to use
kernel calls would slow down the system. We prefer to use
one kernel to do block copy in batch (kernel fusion). */
one kernel to do block copy in batch (kernel fusion).
*/
for
(
int
i
=
0
,
b
=
0
;
i
<
blockNum
;
i
++
,
b
+=
blockSize
)
{
for
(
int
i
=
0
,
b
=
0
;
i
<
blockNum
;
i
++
,
b
+=
blockSize
)
{
XMemCopy
((
char
*
)
target
+
targetBlocks
[
i
]
*
blockSize
,
devID
,
XMemCopy
((
char
*
)
target
+
targetBlocks
[
i
]
*
blockSize
,
devID
,
(
char
*
)
source
+
b
,
devID
,
blockSize
);
(
char
*
)
source
+
b
,
devID
,
blockSize
);
...
...
source/core/CopyInGrid.cpp
查看文件 @
0887fae1
...
@@ -34,7 +34,7 @@ i.e., reorder the data blocks in the same memory piece
...
@@ -34,7 +34,7 @@ i.e., reorder the data blocks in the same memory piece
in the k-th grid
in the k-th grid
>> blockDim - leading dimension of blocks
>> blockDim - leading dimension of blocks
>> blockNumInGrid - number of blocks in each grid
>> blockNumInGrid - number of blocks in each grid
>> isOnDev - indicates whether the index is on the device already
>> is
Index
OnDev - indicates whether the index is on the device already
*/
*/
void
CopyInGrid
(
XTensor
*
s
,
XTensor
*
t
,
int
*
index
,
int
blockDim
,
int
blockNumInGrid
,
bool
isIndexOnDev
)
void
CopyInGrid
(
XTensor
*
s
,
XTensor
*
t
,
int
*
index
,
int
blockDim
,
int
blockNumInGrid
,
bool
isIndexOnDev
)
{
{
...
...
source/core/CopyIndexed.cpp
查看文件 @
0887fae1
...
@@ -36,6 +36,7 @@ copy indexed sub-tensors
...
@@ -36,6 +36,7 @@ copy indexed sub-tensors
>> tgtIndex - index of the target sub-tensors
>> tgtIndex - index of the target sub-tensors
>> copyNum - number of the sub-tensors we copy for each source index, e.g.,
>> copyNum - number of the sub-tensors we copy for each source index, e.g.,
for srcIndex = [1,4] and copyNum = 2, we actually copy the source sub-tensors 1, 2, 4, 5
for srcIndex = [1,4] and copyNum = 2, we actually copy the source sub-tensors 1, 2, 4, 5
<< return - whether copy indexed operation was successful
*/
*/
bool
CopyIndexed
(
XTensor
*
s
,
XTensor
*
t
,
int
dim
,
int
*
srcIndex
,
int
indexSize
,
int
*
tgtIndex
,
int
copyNum
)
bool
CopyIndexed
(
XTensor
*
s
,
XTensor
*
t
,
int
dim
,
int
*
srcIndex
,
int
indexSize
,
int
*
tgtIndex
,
int
copyNum
)
{
{
...
...
source/core/CopyValues.cuh
查看文件 @
0887fae1
...
@@ -28,7 +28,6 @@ namespace nts { // namespace nts(NiuTrans.Tensor)
...
@@ -28,7 +28,6 @@ namespace nts { // namespace nts(NiuTrans.Tensor)
#ifdef USE_CUDA
#ifdef USE_CUDA
/**************************************/
/* copy all elements from a source matrix to a target matrix */
/* copy all elements from a source matrix to a target matrix */
extern "C"
extern "C"
bool CudaCopyValues(XTensor * s, XTensor * t, XStream * stream = NULL);
bool CudaCopyValues(XTensor * s, XTensor * t, XStream * stream = NULL);
...
...
source/core/FlushToMem.cu
查看文件 @
0887fae1
...
@@ -52,7 +52,6 @@ void CudaCPUToGPUFlush(XList * mList, int devID, XMem * GPUMem)
...
@@ -52,7 +52,6 @@ void CudaCPUToGPUFlush(XList * mList, int devID, XMem * GPUMem)
else
else
reqiredSize = m->unitSize * m->unitNum;
reqiredSize = m->unitSize * m->unitNum;
//reqiredSize = (int)GPUMem->GetPitch(GPUMem->devID, (MTYPE)GPUMem->GetAddress() + size, reqiredSize);
size += reqiredSize;
size += reqiredSize;
}
}
...
@@ -70,7 +69,6 @@ void CudaCPUToGPUFlush(XList * mList, int devID, XMem * GPUMem)
...
@@ -70,7 +69,6 @@ void CudaCPUToGPUFlush(XList * mList, int devID, XMem * GPUMem)
else
else
pSize = m->unitSize * m->unitNum;
pSize = m->unitSize * m->unitNum;
//reqiredSize = (int)GPUMem->GetPitch(GPUMem->devID, (MTYPE)GPUMem->GetAddress() + p, pSize);
reqiredSize = pSize;
reqiredSize = pSize;
memcpy(data + p, m->data, pSize);
memcpy(data + p, m->data, pSize);
...
...
source/core/MakeSplitBlockIndex.cpp
查看文件 @
0887fae1
...
@@ -24,6 +24,7 @@
...
@@ -24,6 +24,7 @@
#include "MakeSplitBlockIndex.cuh"
#include "MakeSplitBlockIndex.cuh"
namespace
nts
{
// namespace nts(NiuTrans.Tensor)
namespace
nts
{
// namespace nts(NiuTrans.Tensor)
/*
/*
set target data block index for the data movement in split
set target data block index for the data movement in split
>> blockIndex - block index
>> blockIndex - block index
...
...
source/core/MakeSplitBlockIndex.cu
查看文件 @
0887fae1
...
@@ -51,6 +51,7 @@ void KernelMakeSplitBlockIndex(int * blockIndex, int splitNum, int blockSplitSiz
...
@@ -51,6 +51,7 @@ void KernelMakeSplitBlockIndex(int * blockIndex, int splitNum, int blockSplitSiz
/*
/*
set target data block index for the data movement in split
set target data block index for the data movement in split
>> devID - device id
>> blockIndex - block index
>> blockIndex - block index
>> splitNum - number of splits
>> splitNum - number of splits
>> blockSplitSize - size of the splitted block
>> blockSplitSize - size of the splitted block
...
...
source/core/MatrixMULBatchedCPU.cpp
查看文件 @
0887fae1
...
@@ -33,9 +33,9 @@ c_i = trans(a_i) * trans(b_i) * \alpha + c_i * \beta for each i in [0,count-1]
...
@@ -33,9 +33,9 @@ c_i = trans(a_i) * trans(b_i) * \alpha + c_i * \beta for each i in [0,count-1]
>> transposedA - indicate whether the matrix a is transposed
>> transposedA - indicate whether the matrix a is transposed
>> b - another list of input matrices (2d tensors)
>> b - another list of input matrices (2d tensors)
>> transposedB - indicate whether the matrix b is transposed
>> transposedB - indicate whether the matrix b is transposed
>> c - output matrix (2d tensor)
>> alpha - scalar
>> alpha - scalar
>> beta - scalar
>> beta - scalar
>> c - output matrix (2d tensor)
*/
*/
void
MatrixMULBatchedCPU
(
XList
*
a
,
MATRIX_TRANS_TYPE
transposedA
,
void
MatrixMULBatchedCPU
(
XList
*
a
,
MATRIX_TRANS_TYPE
transposedA
,
XList
*
b
,
MATRIX_TRANS_TYPE
transposedB
,
XList
*
b
,
MATRIX_TRANS_TYPE
transposedB
,
...
@@ -64,10 +64,6 @@ void MatrixMULBatchedCPU(XList * a, MATRIX_TRANS_TYPE transposedA,
...
@@ -64,10 +64,6 @@ void MatrixMULBatchedCPU(XList * a, MATRIX_TRANS_TYPE transposedA,
}
}
}
}
//if(isUniform){
//}
//else{
for
(
int
i
=
0
;
i
<
a
->
count
;
i
++
)
{
for
(
int
i
=
0
;
i
<
a
->
count
;
i
++
)
{
XTensor
*
ai
=
(
XTensor
*
)
a
->
GetItem
(
i
);
XTensor
*
ai
=
(
XTensor
*
)
a
->
GetItem
(
i
);
XTensor
*
bi
=
(
XTensor
*
)
b
->
GetItem
(
i
);
XTensor
*
bi
=
(
XTensor
*
)
b
->
GetItem
(
i
);
...
...
source/core/MatrixMul.h
查看文件 @
0887fae1
...
@@ -39,7 +39,7 @@ normal matrix multiplication if A = y * z and B = x * y.
...
@@ -39,7 +39,7 @@ normal matrix multiplication if A = y * z and B = x * y.
*/
*/
extern
"C"
extern
"C"
void
MatrixMul
(
XTensor
*
a
,
MATRIX_TRANS_TYPE
transposedA
,
XTensor
*
b
,
MATRIX_TRANS_TYPE
transposedB
,
XTensor
*
c
,
void
MatrixMul
(
XTensor
*
a
,
MATRIX_TRANS_TYPE
transposedA
,
XTensor
*
b
,
MATRIX_TRANS_TYPE
transposedB
,
XTensor
*
c
,
DTYPE
alpha
=
(
DTYPE
)
1
.
0
,
DTYPE
beta
=
0
,
XPRunner
*
parallelRunner
=
NULL
);
DTYPE
alpha
=
(
DTYPE
)
1
.
0
,
DTYPE
beta
=
0
,
XPRunner
*
parallelRunner
=
NULL
);
}
// namespace nts(NiuTrans.Tensor)
}
// namespace nts(NiuTrans.Tensor)
...
...
source/core/MatrixMul2D.cpp
查看文件 @
0887fae1
...
@@ -104,7 +104,7 @@ void MatrixMul2D(XTensor * a, MATRIX_TRANS_TYPE transposedA,
...
@@ -104,7 +104,7 @@ void MatrixMul2D(XTensor * a, MATRIX_TRANS_TYPE transposedA,
int
num
=
*
((
int
*
)
b
->
data
);
int
num
=
*
((
int
*
)
b
->
data
);
char
*
p
=
(
char
*
)
b
->
data
+
sizeof
(
int
);
// pointer to the first tuple
char
*
p
=
(
char
*
)
b
->
data
+
sizeof
(
int
);
// pointer to the first tuple
/* a * b */
/* a * b */
if
(
transposedA
==
X_NOTRANS
&&
transposedB
==
X_NOTRANS
)
{
if
(
transposedA
==
X_NOTRANS
&&
transposedB
==
X_NOTRANS
)
{
for
(
int
i
=
0
;
i
<
num
;
i
++
)
{
for
(
int
i
=
0
;
i
<
num
;
i
++
)
{
int
key
=
*
((
int
*
)
p
);
int
key
=
*
((
int
*
)
p
);
...
...
source/core/MatrixMul2D.cu
查看文件 @
0887fae1
...
@@ -37,11 +37,13 @@ c = a * b * \alpha
...
@@ -37,11 +37,13 @@ c = a * b * \alpha
>> aColSize - column size of matrix a
>> aColSize - column size of matrix a
>> aRowSize - row size of matrix a
>> aRowSize - row size of matrix a
>> b - a sparse matrix
>> b - a sparse matrix
>> transposed
A
- indicates whether b is transposed
>> transposed
B
- indicates whether b is transposed
>> bNonZeroNum - number of non-zero items in b
>> bNonZeroNum - number of non-zero items in b
>> bColSize - column size of matrix b
>> bColSize - column size of matrix b
>> bRowSize - row size of matrix b
>> bRowSize - row size of matrix b
>> c - the resulting (dense) matrix
>> c - the resulting (dense) matrix
>> cColSize - column size of matrix c
>> cRowSize - row size of matrix c
>> alpha - the scaling factor
>> alpha - the scaling factor
*/
*/
extern "C" __global__
extern "C" __global__
...
@@ -147,7 +149,6 @@ void CudaMatrixMul2D(XTensor * a, MATRIX_TRANS_TYPE transposedA,
...
@@ -147,7 +149,6 @@ void CudaMatrixMul2D(XTensor * a, MATRIX_TRANS_TYPE transposedA,
if (!a->isSparse && !b->isSparse) {
if (!a->isSparse && !b->isSparse) {
CheckNTErrors((!c->isSparse), "Illegal use of sparse matrix in multiplication!");
CheckNTErrors((!c->isSparse), "Illegal use of sparse matrix in multiplication!");
//cublasHandle_t * handle = GDevs->GetCudaHandle(a->devID);
cublasHandle_t * handle = a->mem == NULL ? GDevs.GetCudaHandle(a->devID) : a->mem->GetCublasHandle();
cublasHandle_t * handle = a->mem == NULL ? GDevs.GetCudaHandle(a->devID) : a->mem->GetCublasHandle();
/* !!!! might have problems */
/* !!!! might have problems */
...
@@ -183,7 +184,6 @@ void CudaMatrixMul2D(XTensor * a, MATRIX_TRANS_TYPE transposedA,
...
@@ -183,7 +184,6 @@ void CudaMatrixMul2D(XTensor * a, MATRIX_TRANS_TYPE transposedA,
if (beta == 0)
if (beta == 0)
c->SetZeroAll();
c->SetZeroAll();
else if (beta != 1.0F) {
else if (beta != 1.0F) {
//XTensor::ScaleAndShift(c, beta, 0);
ShowNTErrors("TODO!");
ShowNTErrors("TODO!");
}
}
...
...
source/core/MatrixMulBatched.cpp
查看文件 @
0887fae1
...
@@ -40,6 +40,7 @@ where trans() returns the transposed matrix if the flag is fired
...
@@ -40,6 +40,7 @@ where trans() returns the transposed matrix if the flag is fired
>> c - where we keep a*b
>> c - where we keep a*b
>> alpha - a coefficient
>> alpha - a coefficient
>> beta - another coefficient
>> beta - another coefficient
>> parallelRunner - parallel processing module
*/
*/
void
MatrixMulBatched
(
XTensor
*
a
,
MATRIX_TRANS_TYPE
transposedA
,
void
MatrixMulBatched
(
XTensor
*
a
,
MATRIX_TRANS_TYPE
transposedA
,
XTensor
*
b
,
MATRIX_TRANS_TYPE
transposedB
,
XTensor
*
b
,
MATRIX_TRANS_TYPE
transposedB
,
...
...
source/core/Merge.cpp
查看文件 @
0887fae1
...
@@ -27,7 +27,6 @@
...
@@ -27,7 +27,6 @@
namespace
nts
{
// namespace nts(NiuTrans.Tensor)
namespace
nts
{
// namespace nts(NiuTrans.Tensor)
/*
/*
transform a tensor by merging it alone with a dimension, e.g., (N/3, M, 3) -> (N, M)
transform a tensor by merging it alone with a dimension, e.g., (N/3, M, 3) -> (N, M)
>> s - the source tensor
>> s - the source tensor
...
...
source/core/MergeBlockLists.cpp
查看文件 @
0887fae1
...
@@ -27,12 +27,12 @@
...
@@ -27,12 +27,12 @@
namespace
nts
{
// namespace nts(NiuTrans.Tensor)
namespace
nts
{
// namespace nts(NiuTrans.Tensor)
/*
/*
merge data by blocks
merge data by blocks
>> sourceList - list of source data array
>> sourceList - list of source data array
>> blockSizes - list of the block size for each source data array
>> blockSizes - list of the block size for each source data array
>> blockNum - number of blocks kept in each data array
>> blockNum - number of blocks kept in each data array
>> target - target data array
>> target - target data array
>> myMem - memory pool
>> myMem - memory pool
*/
*/
void
MergeBlockLists
(
XList
*
sourceList
,
int
*
blockSizes
,
int
blockNum
,
void
*
target
,
XMem
*
myMem
)
void
MergeBlockLists
(
XList
*
sourceList
,
int
*
blockSizes
,
int
blockNum
,
void
*
target
,
XMem
*
myMem
)
{
{
...
...
source/core/MergeBlockLists.cu
查看文件 @
0887fae1
...
@@ -34,10 +34,9 @@ copy a number of blocks (of different sizes) to target positions
...
@@ -34,10 +34,9 @@ copy a number of blocks (of different sizes) to target positions
>> sourceBlockSizes - the size of the block_i
>> sourceBlockSizes - the size of the block_i
>> sourceBlockNum - number of blocks to merge
>> sourceBlockNum - number of blocks to merge
>> targetList - list of data arrays to copy to
>> targetList - list of data arrays to copy to
>> target - target data array
*/
*/
__global__
__global__
void KernelCopyBlockLists(DTYPE * sourceList[], int * sourceBlockSizes, int sourceBlockNum, DTYPE * targetList[])
void KernelCopyBlockLists(DTYPE * sourceList[], int * sourceBlockSizes, int sourceBlockNum, DTYPE * targetList[])
{
{
__shared__ int iBlockSizes[MAX_CUDA_THREAD_NUM_PER_BLOCK];
__shared__ int iBlockSizes[MAX_CUDA_THREAD_NUM_PER_BLOCK];
__shared__ DTYPE * iSourceList[MAX_CUDA_THREAD_NUM_PER_BLOCK];
__shared__ DTYPE * iSourceList[MAX_CUDA_THREAD_NUM_PER_BLOCK];
...
@@ -82,7 +81,6 @@ void CudaMergeBlockLists(XList * sourceList, int * blockSizes, int blockNum, voi
...
@@ -82,7 +81,6 @@ void CudaMergeBlockLists(XList * sourceList, int * blockSizes, int blockNum, voi
int minBlockSize = MAX_INT;
int minBlockSize = MAX_INT;
int maxBlockSize = -MAX_INT;
int maxBlockSize = -MAX_INT;
//int realMinBlockSize = 1;
int realMaxBlockSize = 1;
int realMaxBlockSize = 1;
DTYPE ** sourceArrays = new DTYPE*[newBlockListSize];
DTYPE ** sourceArrays = new DTYPE*[newBlockListSize];
DTYPE ** targetArrays = new DTYPE*[newBlockListSize];
DTYPE ** targetArrays = new DTYPE*[newBlockListSize];
...
@@ -110,7 +108,6 @@ void CudaMergeBlockLists(XList * sourceList, int * blockSizes, int blockNum, voi
...
@@ -110,7 +108,6 @@ void CudaMergeBlockLists(XList * sourceList, int * blockSizes, int blockNum, voi
CheckNTErrors((minBlockSize % sizeof(DTYPE) == 0), "Unsupported block size!");
CheckNTErrors((minBlockSize % sizeof(DTYPE) == 0), "Unsupported block size!");
CheckNTErrors((maxBlockSize % sizeof(DTYPE) == 0), "Unsupported block size!");
CheckNTErrors((maxBlockSize % sizeof(DTYPE) == 0), "Unsupported block size!");
//realMinBlockSize = minBlockSize/sizeof(DTYPE);
realMaxBlockSize = maxBlockSize / sizeof(DTYPE);
realMaxBlockSize = maxBlockSize / sizeof(DTYPE);
int cudaGridSizes[3];
int cudaGridSizes[3];
...
@@ -120,31 +117,16 @@ void CudaMergeBlockLists(XList * sourceList, int * blockSizes, int blockNum, voi
...
@@ -120,31 +117,16 @@ void CudaMergeBlockLists(XList * sourceList, int * blockSizes, int blockNum, voi
cudaGridSizes, cudaBlockSizes);
cudaGridSizes, cudaBlockSizes);
myMem->SetPinBuf();
myMem->SetPinBuf();
//MTYPE offset0 = myMem->bufUsed;
int * sizesGPU = (int*)myMem->AllocBuf(myMem->devID, sizeof(int) * newBlockListSize, 256);
int * sizesGPU = (int*)myMem->AllocBuf(myMem->devID, sizeof(int) * newBlockListSize, 256);
//MTYPE offset1 = myMem->bufUsed;
DTYPE ** sourceArraysGPU = (DTYPE**)myMem->AllocBuf(myMem->devID, sizeof(DTYPE*) * newBlockListSize, 256);
DTYPE ** sourceArraysGPU = (DTYPE**)myMem->AllocBuf(myMem->devID, sizeof(DTYPE*) * newBlockListSize, 256);
//MTYPE offset2 = myMem->bufUsed;
DTYPE ** targetArraysGPU = (DTYPE**)myMem->AllocBuf(myMem->devID, sizeof(DTYPE*) * newBlockListSize, 256);
DTYPE ** targetArraysGPU = (DTYPE**)myMem->AllocBuf(myMem->devID, sizeof(DTYPE*) * newBlockListSize, 256);
//MTYPE bufSize = myMem->bufUsed - offset0;
//char * CPUBuf = new char[bufSize];
//memset(CPUBuf, 0 , bufSize);
//memcpy(CPUBuf, sizes, sizeof(int) * newBlockListSize);
//memcpy(CPUBuf + (offset1 - offset0), sourceArrays, sizeof(DTYPE*) * newBlockListSize);
//memcpy(CPUBuf + (offset2 - offset0), targetArrays, sizeof(DTYPE*) * newBlockListSize);
XMemCopy(sizesGPU, myMem->devID, sizes, -1, sizeof(int) * newBlockListSize);
XMemCopy(sizesGPU, myMem->devID, sizes, -1, sizeof(int) * newBlockListSize);
XMemCopy(sourceArraysGPU, myMem->devID, sourceArrays, -1, sizeof(DTYPE*) * newBlockListSize);
XMemCopy(sourceArraysGPU, myMem->devID, sourceArrays, -1, sizeof(DTYPE*) * newBlockListSize);
XMemCopy(targetArraysGPU, myMem->devID, targetArrays, -1, sizeof(DTYPE*) * newBlockListSize);
XMemCopy(targetArraysGPU, myMem->devID, targetArrays, -1, sizeof(DTYPE*) * newBlockListSize);
/* it is VERY tricky here because we squeeze three data copies into one */
//XMemCopy(sizesGPU, myMem->devID, CPUBuf, -1, bufSize);
KernelCopyBlockLists << <dim3(cudaGridSizes[0], cudaGridSizes[1]), dim3(cudaBlockSizes[0], cudaBlockSizes[1]) >> >
KernelCopyBlockLists << <dim3(cudaGridSizes[0], cudaGridSizes[1]), dim3(cudaBlockSizes[0], cudaBlockSizes[1]) >> >
(sourceArraysGPU, sizesGPU, newBlockListSize, targetArraysGPU);
(sourceArraysGPU, sizesGPU, newBlockListSize, targetArraysGPU);
...
@@ -154,7 +136,6 @@ void CudaMergeBlockLists(XList * sourceList, int * blockSizes, int blockNum, voi
...
@@ -154,7 +136,6 @@ void CudaMergeBlockLists(XList * sourceList, int * blockSizes, int blockNum, voi
delete[] targetArrays;
delete[] targetArrays;
delete[] sizes;
delete[] sizes;
delete[] offsets;
delete[] offsets;
//delete[] CPUBuf;
}
}
#endif // USE_CUDA
#endif // USE_CUDA
...
...
source/core/MultiplyElementWise.cpp
查看文件 @
0887fae1
...
@@ -24,6 +24,7 @@
...
@@ -24,6 +24,7 @@
#include "MultiplyElementWise.cuh"
#include "MultiplyElementWise.cuh"
namespace
nts
{
// namespace nts(NiuTrans.Tensor)
namespace
nts
{
// namespace nts(NiuTrans.Tensor)
/*
/*
element-wise product of two tensors
element-wise product of two tensors
c(i) = a(i)*b(i) + \alpha * c(i)
c(i) = a(i)*b(i) + \alpha * c(i)
...
...
source/core/MultiplyElementWise.cu
查看文件 @
0887fae1
...
@@ -68,6 +68,7 @@ where |a_lead| means the size of the leading dimension of a
...
@@ -68,6 +68,7 @@ where |a_lead| means the size of the leading dimension of a
>> a - tensor a
>> a - tensor a
>> b - tensor b
>> b - tensor b
>> c - result tensor
>> c - result tensor
>> alpha - the coefficient
>> stride - the number of items we go over when move next along the leading dimension in a block
>> stride - the number of items we go over when move next along the leading dimension in a block
>> ldSizeA - size of the leading dimension of a
>> ldSizeA - size of the leading dimension of a
>> ldSizeB - size of the leading dimension of b
>> ldSizeB - size of the leading dimension of b
...
...
source/core/Negate.cpp
查看文件 @
0887fae1
...
@@ -26,8 +26,8 @@
...
@@ -26,8 +26,8 @@
namespace
nts
{
// namespace nts(NiuTrans.Tensor)
namespace
nts
{
// namespace nts(NiuTrans.Tensor)
/*
/*
set every entry to its minus value
set every entry to its minus value
>> a - the tensor we are processing
>> a - the tensor we are processing
*/
*/
void
Negate
(
XTensor
*
a
)
void
Negate
(
XTensor
*
a
)
{
{
...
...
source/core/Negate.cu
查看文件 @
0887fae1
...
@@ -42,10 +42,10 @@ void KernelNegate(DTYPE * d, int size)
...
@@ -42,10 +42,10 @@ void KernelNegate(DTYPE * d, int size)
}
}
/*
/*
set each entry to its negtive value (CUDA Kernel)
set each entry to its negtive value (CUDA Kernel)
This is for float16 computation
This is for float16 computation
>> d - pointer to the data array
>> d - pointer to the data array
>> size - size of the data array
>> size - size of the data array
*/
*/
__global__
__global__
void KernelNegate(__half * d, int size)
void KernelNegate(__half * d, int size)
...
...
source/core/Normalize.cpp
查看文件 @
0887fae1
...
@@ -25,6 +25,7 @@
...
@@ -25,6 +25,7 @@
#include "Normalize.cuh"
#include "Normalize.cuh"
namespace
nts
{
// namespace nts(NiuTrans.Tensor)
namespace
nts
{
// namespace nts(NiuTrans.Tensor)
/*
/*
normalized the data with normal distribution. For an input x,
normalized the data with normal distribution. For an input x,
y = a * (x-mean)/sqrt(variance+\epsilon) + b
y = a * (x-mean)/sqrt(variance+\epsilon) + b
...
...
source/core/Normalize.cu
查看文件 @
0887fae1
...
@@ -25,6 +25,7 @@
...
@@ -25,6 +25,7 @@
#include "Normalize.cuh"
#include "Normalize.cuh"
namespace nts { // namespace nts(NiuTrans.Tensor)
namespace nts { // namespace nts(NiuTrans.Tensor)
#ifdef USE_CUDA
#ifdef USE_CUDA
/*
/*
normalized the data with normal distribution (kernel code). For an input x,
normalized the data with normal distribution (kernel code). For an input x,
...
...
source/core/Normalize.cuh
查看文件 @
0887fae1
...
@@ -28,7 +28,8 @@ namespace nts { // namespace nts(NiuTrans.Tensor)
...
@@ -28,7 +28,8 @@ namespace nts { // namespace nts(NiuTrans.Tensor)
#ifdef USE_CUDA
#ifdef USE_CUDA
/* normalized the data with normal distribution (Kernel code). For an input x,
/*
normalized the data with normal distribution (Kernel code). For an input x,
y = a * (x-mean)/sqrt(variance+\epsilon) + b
y = a * (x-mean)/sqrt(variance+\epsilon) + b
where a and b are the scalar and bias respectively, and \epsilon is the adjustment parameter
where a and b are the scalar and bias respectively, and \epsilon is the adjustment parameter
*/
*/
...
@@ -37,7 +38,8 @@ void KernelNormalize(DTYPE * input, DTYPE * output, DTYPE * mean, DTYPE * var,
...
@@ -37,7 +38,8 @@ void KernelNormalize(DTYPE * input, DTYPE * output, DTYPE * mean, DTYPE * var,
DTYPE * a, DTYPE * b, DTYPE epsilon,
DTYPE * a, DTYPE * b, DTYPE epsilon,
int stride, int strideNum, int blockNum);
int stride, int strideNum, int blockNum);
/* normalized the data with normal distribution. For an input x,
/*
normalized the data with normal distribution. For an input x,
y = a * (x-mean)/sqrt(variance+\epsilon) + b
y = a * (x-mean)/sqrt(variance+\epsilon) + b
where a and b are the scalar and bias respectively, and \epsilon is the adjustment parameter
where a and b are the scalar and bias respectively, and \epsilon is the adjustment parameter
*/
*/
...
...
source/core/Power.cpp
查看文件 @
0887fae1
...
@@ -25,10 +25,11 @@
...
@@ -25,10 +25,11 @@
#include "Power.cuh"
#include "Power.cuh"
namespace
nts
{
// namespace nts(NiuTrans.Tensor)
namespace
nts
{
// namespace nts(NiuTrans.Tensor)
/*
/*
get the power(a, p)
get the power(a, p)
>> a - the tensor
>> a - the tensor
>> p
ower
- as it is
>> p - as it is
*/
*/
void
Power
(
XTensor
*
a
,
DTYPE
p
)
void
Power
(
XTensor
*
a
,
DTYPE
p
)
{
{
...
...
source/core/Power.cu
查看文件 @
0887fae1
...
@@ -87,9 +87,6 @@ __global__
...
@@ -87,9 +87,6 @@ __global__
void KernelPower(__half * d, __half p, int size)
void KernelPower(__half * d, __half p, int size)
{
{
#if __CUDA_ARCH__ >= 530 || !defined(__CUDA_ARCH__)
#if __CUDA_ARCH__ >= 530 || !defined(__CUDA_ARCH__)
//int i = blockDim.x * blockIdx.x + threadIdx.x;
//if (i < size)
// d[i] = hpow(d[i], p);
#else
#else
int i = blockDim.x * blockIdx.x + threadIdx.x;
int i = blockDim.x * blockIdx.x + threadIdx.x;
if (i < size)
if (i < size)
...
@@ -126,9 +123,6 @@ void CudaPower(XTensor * a, DTYPE p)
...
@@ -126,9 +123,6 @@ void CudaPower(XTensor * a, DTYPE p)
}
}
else if (p != (DTYPE)1.0) {
else if (p != (DTYPE)1.0) {
ShowNTErrors("TODO!");
ShowNTErrors("TODO!");
//unsigned short p2 = FloatToFloat16(p);
//__half * pp = (__half*)&p2;
//KernelPower<<<blocks, threads>>>((__half*)a->data, *pp, a->unitNum);
}
}
}
}
else {
else {
...
...
source/core/ReduceMax.cu
查看文件 @
0887fae1
...
@@ -31,14 +31,10 @@ namespace nts{ // namespace nts(NiuTrans.Tensor)
...
@@ -31,14 +31,10 @@ namespace nts{ // namespace nts(NiuTrans.Tensor)
/*
/*
reduce a tensor to another that keeps the max value along a dimension - slow version
reduce a tensor to another that keeps the max value along a dimension - slow version
Given a block of data, we go over each dimension i in the stride and we have
Given a block of data, we go over each dimension i in the stride and we have
sum_i = max_{0<=j<strideNum} input_{i,j}
sum_i = max_{0<=j<strideNum} input_{i,j}
where we can view the block as a matrix and input_{i,j} represent the item at the
where we can view the block as a matrix and input_{i,j} represent the item at the
crossing of the i-th columne and the j-th row.
crossing of the i-th columne and the j-th row.
>> input - the input array (representing a tensor)
>> input - the input array (representing a tensor)
>> output - the sum over each block. NOTE: output is also an array
>> output - the sum over each block. NOTE: output is also an array
>> stride - stride that we need to move to the next item
>> stride - stride that we need to move to the next item
...
@@ -89,82 +85,77 @@ void KernelReduceMax(DTYPE * input, DTYPE * output,
...
@@ -89,82 +85,77 @@ void KernelReduceMax(DTYPE * input, DTYPE * output,
}
}
/*
/*
reduce a tensor to another that keeps the max value along a dimension - slow version
reduce a tensor to another that keeps the max value along a dimension - slow version
Given a block of data, we go over each dimension i in the stride and we have
Given a block of data, we go over each dimension i in the stride and we have
sum_i = max_{0<=j<strideNum} input_{i,j}
where we can view the block as a matrix and input_{i,j} represent the item at the
sum_i = max_{0<=j<strideNum} input_{i,j}
crossing of the i-th columne and the j-th row.
>> input - the input array (representing a tensor)
where we can view the block as a matrix and input_{i,j} represent the item at the
>> output - the sum over each block. NOTE: output is also an array
crossing of the i-th columne and the j-th row.
>> stride - stride that we need to move to the next item
>> strideNum - how many strides we need to finish the reduce
>> input - the input array (representing a tensor)
>> reducedStrideNum - the number of strides after reducation
>> output - the sum over each block. NOTE: output is also an array
>> blockSize - size of the block (i.e., stride * strideNum)
>> stride - stride that we need to move to the next item
>> blockNum - how many blocks
>> strideNum - how many strides we need to finish the reduce
*/
>> reducedStrideNum - the number of strides after reducation
__global__
>> blockSize - size of the block (i.e., stride * strideNum)
void KernelReduceMax(__half * input, __half * output,
>> blockNum - how many blocks
int stride, int strideNum, int reducedStrideNum,
*/
int blockSize, int blockNum)
__global__
{
void KernelReduceMax(__half * input, __half * output,
int idx = threadIdx.x * blockDim.y + threadIdx.y;
int stride, int strideNum, int reducedStrideNum,
unsigned int i = blockIdx.x*blockDim.x + threadIdx.x;
int blockSize, int blockNum)
unsigned int j = blockIdx.y*blockDim.y + threadIdx.y;
{
int idx = threadIdx.x * blockDim.y + threadIdx.y;
unsigned int i = blockIdx.x*blockDim.x + threadIdx.x;
unsigned int j = blockIdx.y*blockDim.y + threadIdx.y;
if (i >= stride * blockNum)
if (i >= stride * blockNum)
return;
return;
#if __CUDA_ARCH__ >= 530 || !defined(__CUDA_ARCH__)
#if __CUDA_ARCH__ >= 530 || !defined(__CUDA_ARCH__)
__shared__ __half iData[MAX_CUDA_THREAD_NUM_PER_BLOCK * MIN_CUDA_SHARED_MEM_COL_SIZE / 2];
__shared__ __half iData[MAX_CUDA_THREAD_NUM_PER_BLOCK * MIN_CUDA_SHARED_MEM_COL_SIZE / 2];
#else
#else
__shared__ DTYPE iData[MAX_CUDA_THREAD_NUM_PER_BLOCK * MIN_CUDA_SHARED_MEM_COL_SIZE / 2];
__shared__ DTYPE iData[MAX_CUDA_THREAD_NUM_PER_BLOCK * MIN_CUDA_SHARED_MEM_COL_SIZE / 2];
#endif
#endif
__syncthreads();
__syncthreads();
int k = i / stride;
int k = i / stride;
int iOffset = i % stride;
int iOffset = i % stride;
#if __CUDA_ARCH__ >= 530 || !defined(__CUDA_ARCH__)
#if __CUDA_ARCH__ >= 530 || !defined(__CUDA_ARCH__)
__half value = (i < stride * blockNum && j < strideNum) ?
__half value = (i < stride * blockNum && j < strideNum) ?
input[blockSize * k + stride * j + iOffset] : __half(FLOAT16_MIN);
input[blockSize * k + stride * j + iOffset] : __half(FLOAT16_MIN);
#else
#else
DTYPE value = (i < stride * blockNum && j < strideNum) ?
DTYPE value = (i < stride * blockNum && j < strideNum) ?
__half2float(input[blockSize * k + stride * j + iOffset]) : FLOAT_MIN;
__half2float(input[blockSize * k + stride * j + iOffset]) : FLOAT_MIN;
#endif
#endif
/* load data into the shared mem */
/* load data into the shared mem */
iData[threadIdx.x * blockDim.y + threadIdx.y] = value;
iData[threadIdx.x * blockDim.y + threadIdx.y] = value;
__syncthreads();
__syncthreads();
/* do reduction in shared mem */
/* do reduction in shared mem */
for (unsigned int s = blockDim.y / 2; s > 0; s >>= 1) {
for (unsigned int s = blockDim.y / 2; s > 0; s >>= 1) {
if (threadIdx.y < s && iData[idx] < iData[idx + s]) {
if (threadIdx.y < s && iData[idx] < iData[idx + s]) {
iData[idx] = iData[idx + s];
iData[idx] = iData[idx + s];
}
}
__syncthreads();
__syncthreads();
}
}
#if __CUDA_ARCH__ >= 530 || !defined(__CUDA_ARCH__)
#if __CUDA_ARCH__ >= 530 || !defined(__CUDA_ARCH__)
/* write result for this block to the output array */
/* write result for this block to the output array */
if (threadIdx.y == 0 && blockIdx.y < reducedStrideNum)
if (threadIdx.y == 0 && blockIdx.y < reducedStrideNum)
output[(k * reducedStrideNum + blockIdx.y) * stride + iOffset] = iData[threadIdx.x * blockDim.y];
output[(k * reducedStrideNum + blockIdx.y) * stride + iOffset] = iData[threadIdx.x * blockDim.y];
#else
#else
/* write result for this block to the output array */
/* write result for this block to the output array */
if (threadIdx.y == 0 && blockIdx.y < reducedStrideNum)
if (threadIdx.y == 0 && blockIdx.y < reducedStrideNum)
output[(k * reducedStrideNum + blockIdx.y) * stride + iOffset] = __half(iData[threadIdx.x * blockDim.y]);
output[(k * reducedStrideNum + blockIdx.y) * stride + iOffset] = __half(iData[threadIdx.x * blockDim.y]);
#endif
#endif
}
}
/*
/*
reduce a tensor to another that keeps the max value along a dimension - fast version
reduce a tensor to another that keeps the max value along a dimension - fast version
>> input - the input array (representing a tensor)
>> input - the input array (representing a tensor)
...
@@ -338,9 +329,7 @@ void KernelReduceMaxSimpleFast(DTYPE * input, DTYPE * output,
...
@@ -338,9 +329,7 @@ void KernelReduceMaxSimpleFast(DTYPE * input, DTYPE * output,
/*
/*
get the max-valued items along a dimension of the tensor (cuda version).
get the max-valued items along a dimension of the tensor (cuda version).
For a 1-dimensional data array a,
For a 1-dimensional data array a,
sum_i = max_{0<=j<strideNum} input_{i,j}
sum_i = max_{0<=j<strideNum} input_{i,j}
>> input - the input tensor
>> input - the input tensor
>> output - the output tensor
>> output - the output tensor
>> dim - which dimension to reduce
>> dim - which dimension to reduce
...
...
source/core/ReduceMean.cpp
查看文件 @
0887fae1
...
@@ -28,7 +28,6 @@ namespace nts{ // namespace nts(NiuTrans.Tensor)
...
@@ -28,7 +28,6 @@ namespace nts{ // namespace nts(NiuTrans.Tensor)
/*
/*
get the mean value along a dimension of the tensor. For a 1-dimensional data array a,
get the mean value along a dimension of the tensor. For a 1-dimensional data array a,
mean = (1/n) * sum_i input_i
mean = (1/n) * sum_i input_i
>> input - the input tensor
>> input - the input tensor
>> output - the output tensor
>> output - the output tensor
>> dim - the dimension where the reduction is performed on
>> dim - the dimension where the reduction is performed on
...
@@ -44,5 +43,4 @@ void ReduceMean(XTensor * input, XTensor * output, int dim)
...
@@ -44,5 +43,4 @@ void ReduceMean(XTensor * input, XTensor * output, int dim)
ScaleAndShift
(
output
,
(
DTYPE
)
1
/
num
,
0
);
ScaleAndShift
(
output
,
(
DTYPE
)
1
/
num
,
0
);
}
}
}
//
namespace
nts
(
NiuTrans
.
Tensor
)
}
//
namespace
nts
(
NiuTrans
.
Tensor
)
\ No newline at end of file
source/core/ReduceSum.cu
查看文件 @
0887fae1
...
@@ -29,13 +29,11 @@ namespace nts{ // namespace nts(NiuTrans.Tensor)
...
@@ -29,13 +29,11 @@ namespace nts{ // namespace nts(NiuTrans.Tensor)
/*
/*
reduce a tensor to another that keeps the sum along a dimension - slow version
reduce a tensor to another that keeps the sum along a dimension - slow version
Given a block of data, we go over each dimension i in the stride and we have
Given a block of data, we go over each dimension i in the stride and we have
sum_i = sum_{0<=j<strideNum} exp(input_{i,j} - shift) if isExp == true;
sum_i = sum_{0<=j<strideNum} exp(input_{i,j} - shift) if isExp == true;
= sum_{0<=j<strideNum} input_{i,j} - shift if isExp == false;
= sum_{0<=j<strideNum} input_{i,j} - shift if isExp == false;
where we can view the block as a matrix and input_{i,j} represent the item at the
where we can view the block as a matrix and input_{i,j} represent the item at the
crossing of the i-th columne and the j-th row.
crossing of the i-th columne and the j-th row.
>> input - the input array (representing a tensor)
>> input - the input array (representing a tensor)
>> output - the sum over each block. NOTE: output is also an array
>> output - the sum over each block. NOTE: output is also an array
>> stride - stride that we need to move to the next item
>> stride - stride that we need to move to the next item
...
@@ -107,13 +105,11 @@ void KernelReduceSum(DTYPE * input, DTYPE * output,
...
@@ -107,13 +105,11 @@ void KernelReduceSum(DTYPE * input, DTYPE * output,
/*
/*
reduce a tensor to another that keeps the sum along a dimension - slow version
reduce a tensor to another that keeps the sum along a dimension - slow version
This is for float16 reduction.
This is for float16 reduction.
Given a block of data, we go over each dimension i in the stride and we have
Given a block of data, we go over each dimension i in the stride and we have
sum_i = sum_{0<=j<strideNum} exp(input_{i,j} - shift) if isExp == true;
sum_i = sum_{0<=j<strideNum} exp(input_{i,j} - shift) if isExp == true;
= sum_{0<=j<strideNum} input_{i,j} - shift if isExp == false;
= sum_{0<=j<strideNum} input_{i,j} - shift if isExp == false;
where we can view the block as a matrix and input_{i,j} represent the item at the
where we can view the block as a matrix and input_{i,j} represent the item at the
crossing of the i-th columne and the j-th row.
crossing of the i-th columne and the j-th row.
>> input - the input array (representing a tensor)
>> input - the input array (representing a tensor)
>> output - the sum over each block. NOTE: output is also an array
>> output - the sum over each block. NOTE: output is also an array
>> stride - stride that we need to move to the next item
>> stride - stride that we need to move to the next item
...
@@ -304,7 +300,6 @@ void KernelReduceSumFast(DTYPE * input, DTYPE * output,
...
@@ -304,7 +300,6 @@ void KernelReduceSumFast(DTYPE * input, DTYPE * output,
/*
/*
reduce a tensor to another that keeps the sum along a dimension - fast version
reduce a tensor to another that keeps the sum along a dimension - fast version
This is for float16 reduction
This is for float16 reduction
>> input - the input array (representing a tensor)
>> input - the input array (representing a tensor)
>> output - the sum over each block. NOTE: output is also an array
>> output - the sum over each block. NOTE: output is also an array
>> stride - stride that we need to move to the next item
>> stride - stride that we need to move to the next item
...
...
source/core/ReduceSumSquared.cpp
查看文件 @
0887fae1
...
@@ -28,7 +28,6 @@ namespace nts{ // namespace nts(NiuTrans.Tensor)
...
@@ -28,7 +28,6 @@ namespace nts{ // namespace nts(NiuTrans.Tensor)
squared sum of the items along a dimension of the tensor.
squared sum of the items along a dimension of the tensor.
For a 1-dimensional data array a,
For a 1-dimensional data array a,
sum = \sum_i (a_i - shift)^2
sum = \sum_i (a_i - shift)^2
>> input - the input tensor
>> input - the input tensor
>> output - the output tensor
>> output - the output tensor
>> dim - the dimension where the reduction is performed on
>> dim - the dimension where the reduction is performed on
...
...
source/core/ReduceVariance.cpp
查看文件 @
0887fae1
...
@@ -29,7 +29,6 @@ namespace nts{ // namespace nts(NiuTrans.Tensor)
...
@@ -29,7 +29,6 @@ namespace nts{ // namespace nts(NiuTrans.Tensor)
variance of the items along a dimension of the tensor.
variance of the items along a dimension of the tensor.
For a 1-dimensional data array a,
For a 1-dimensional data array a,
variance = 1/n * \sum_i (a_i - mean)^2
variance = 1/n * \sum_i (a_i - mean)^2
>> input - the input tensor
>> input - the input tensor
>> output - the output tensor
>> output - the output tensor
>> dim - the dimension where the reduction is performed on
>> dim - the dimension where the reduction is performed on
...
...
source/core/ScaleAndShift.cpp
查看文件 @
0887fae1
...
@@ -26,9 +26,7 @@ namespace nts{ // namespace nts(NiuTrans.Tensor)
...
@@ -26,9 +26,7 @@ namespace nts{ // namespace nts(NiuTrans.Tensor)
/*
/*
scale and shift all tensor entires
scale and shift all tensor entires
p = p * scale + shift
p = p * scale + shift
>> a - the tensor
>> a - the tensor
>> scale - the scaler factor
>> scale - the scaler factor
>> shift - the shift factor
>> shift - the shift factor
...
...
source/core/ScaleAndShift.cu
查看文件 @
0887fae1
...
@@ -80,9 +80,7 @@ void KernelScaleAndShift(__half * d, int size, __half scale, __half shift)
...
@@ -80,9 +80,7 @@ void KernelScaleAndShift(__half * d, int size, __half scale, __half shift)
/*
/*
scale and shift all matrix entires
scale and shift all matrix entires
p = p * scale + shift
p = p * scale + shift
>> a - the tensor
>> a - the tensor
>> scale - the scaler factor
>> scale - the scaler factor
>> shift - the shift factor
>> shift - the shift factor
...
...
source/core/Select.cpp
查看文件 @
0887fae1
...
@@ -31,7 +31,7 @@ c = select(a)
...
@@ -31,7 +31,7 @@ c = select(a)
>> dim - the dimension along with which we do the job
>> dim - the dimension along with which we do the job
>> low - lower bound
>> low - lower bound
>> high - higher bound.
>> high - higher bound.
Note that range [1,3] means that we select 1 and 2.
Note that range [1,3] means that we select 1 and 2.
>> c - result tensor
>> c - result tensor
*/
*/
void
SelectRange
(
XTensor
*
a
,
int
dim
,
int
low
,
int
high
,
XTensor
*
c
)
void
SelectRange
(
XTensor
*
a
,
int
dim
,
int
low
,
int
high
,
XTensor
*
c
)
...
@@ -75,5 +75,4 @@ void SelectRange(XTensor * a, int dim, int low, int high, XTensor * c)
...
@@ -75,5 +75,4 @@ void SelectRange(XTensor * a, int dim, int low, int high, XTensor * c)
}
}
}
}
}
// namespace nts(NiuTrans.Tensor)
}
// namespace nts(NiuTrans.Tensor)
source/core/SetData.cpp
查看文件 @
0887fae1
...
@@ -68,10 +68,11 @@ void SetDataRand(XTensor * tensor, DTYPE low, DTYPE high)
...
@@ -68,10 +68,11 @@ void SetDataRand(XTensor * tensor, DTYPE low, DTYPE high)
ShowNTErrors
(
"TODO"
);
ShowNTErrors
(
"TODO"
);
}
}
}
}
/* GPU code
/*
The trick here is that initialize the data on a temperary tensor on CPU.
GPU code
The CPU data is then copied to GPU.
The trick here is that initialize the data on a temperary tensor on CPU.
TODO: generate data points on GPUs straightforwardly.
The CPU data is then copied to GPU.
TODO: generate data points on GPUs straightforwardly.
*/
*/
else
{
else
{
XTensor
*
t2
=
NewTensor
(
tensor
->
order
,
tensor
->
dimSize
,
tensor
->
dataType
,
tensor
->
denseRatio
,
-
1
);
XTensor
*
t2
=
NewTensor
(
tensor
->
order
,
tensor
->
dimSize
,
tensor
->
dataType
,
tensor
->
denseRatio
,
-
1
);
...
...
source/core/Sort.cpp
查看文件 @
0887fae1
...
@@ -39,6 +39,7 @@ void Sort(XTensor * a, XTensor * index, int dim)
...
@@ -39,6 +39,7 @@ void Sort(XTensor * a, XTensor * index, int dim)
CheckNTErrors
((
index
->
dataType
==
X_INT
),
"Wrong data type!"
);
CheckNTErrors
((
index
->
dataType
==
X_INT
),
"Wrong data type!"
);
int
dimRDI
=
a
->
order
-
dim
-
1
;
int
dimRDI
=
a
->
order
-
dim
-
1
;
/* make the index tensor */
/* make the index tensor */
index
->
SetAscendingOrder
(
dim
);
index
->
SetAscendingOrder
(
dim
);
...
...
source/core/Sort.cuh
查看文件 @
0887fae1
...
@@ -29,6 +29,7 @@ namespace nts { // namespace nts(NiuTrans.Tensor)
...
@@ -29,6 +29,7 @@ namespace nts { // namespace nts(NiuTrans.Tensor)
#ifdef USE_CUDA
#ifdef USE_CUDA
/* sort the tensor along a given dimension */
/* sort the tensor along a given dimension */
extern "C"
void CudaSortBig(XTensor * a, XTensor * b, XTensor * indexA, XTensor * indexB, int dim, int k = -1);
void CudaSortBig(XTensor * a, XTensor * b, XTensor * indexA, XTensor * indexB, int dim, int k = -1);
#endif // USE_CUDA
#endif // USE_CUDA
...
...
source/core/Split.h
查看文件 @
0887fae1
...
@@ -27,6 +27,7 @@
...
@@ -27,6 +27,7 @@
namespace
nts
{
// namespace nts(NiuTrans.Tensor)
namespace
nts
{
// namespace nts(NiuTrans.Tensor)
/* transform a tensor by splitting it, e.g., (M, N) -> (M, N/3, 3) */
/* transform a tensor by splitting it, e.g., (M, N) -> (M, N/3, 3) */
extern
"C"
void
Split
(
XTensor
*
s
,
XTensor
*
t
,
int
whereToSplit
,
int
splitNum
);
void
Split
(
XTensor
*
s
,
XTensor
*
t
,
int
whereToSplit
,
int
splitNum
);
/* split a big tensor into small tensors */
/* split a big tensor into small tensors */
...
...
source/core/Sum.cu
查看文件 @
0887fae1
...
@@ -25,6 +25,7 @@
...
@@ -25,6 +25,7 @@
namespace nts { // namespace nts(NiuTrans.Tensor)
namespace nts { // namespace nts(NiuTrans.Tensor)
#ifdef USE_CUDA
#ifdef USE_CUDA
/*
/*
summation of data arrays (CUDA Kernel)
summation of data arrays (CUDA Kernel)
c = a + b * \beta
c = a + b * \beta
...
...
source/core/Sum.cuh
查看文件 @
0887fae1
...
@@ -28,7 +28,7 @@ namespace nts { // namespace nts(NiuTrans.Tensor)
...
@@ -28,7 +28,7 @@ namespace nts { // namespace nts(NiuTrans.Tensor)
#ifdef USE_CUDA
#ifdef USE_CUDA
/* summation of data arrays (CUDA Kernel) */
/* summation of data arrays (CUDA Kernel) */
extern "C" __global__
extern "C" __global__
void KernelADD(DTYPE * a, DTYPE * b, DTYPE * c, int size, DTYPE beta = (DTYPE)1.0);
void KernelADD(DTYPE * a, DTYPE * b, DTYPE * c, int size, DTYPE beta = (DTYPE)1.0);
...
...
source/core/SumByColumnVT.cu
查看文件 @
0887fae1
...
@@ -27,6 +27,7 @@
...
@@ -27,6 +27,7 @@
namespace nts { // namespace nts(NiuTrans.Tensor)
namespace nts { // namespace nts(NiuTrans.Tensor)
#ifdef USE_CUDA
#ifdef USE_CUDA
/*
/*
summation of a vector (column vector) and a tensor
summation of a vector (column vector) and a tensor
c = a + \sum{col} b_col * \beta
c = a + \sum{col} b_col * \beta
...
...
source/core/SumByColumnVT.h
查看文件 @
0887fae1
...
@@ -26,7 +26,6 @@
...
@@ -26,7 +26,6 @@
namespace
nts
{
// namespace nts(NiuTrans.Tensor)
namespace
nts
{
// namespace nts(NiuTrans.Tensor)
/* sum of a (column) vector and a tensor */
/* sum of a (column) vector and a tensor */
extern
"C"
extern
"C"
void
SumByColumnVT
(
XTensor
*
a
,
XTensor
*
b
,
XTensor
*
c
=
NULL
,
DTYPE
beta
=
(
DTYPE
)
1
.
0
);
void
SumByColumnVT
(
XTensor
*
a
,
XTensor
*
b
,
XTensor
*
c
=
NULL
,
DTYPE
beta
=
(
DTYPE
)
1
.
0
);
...
...
source/core/TopK.cpp
查看文件 @
0887fae1
...
@@ -24,6 +24,7 @@
...
@@ -24,6 +24,7 @@
#include "TopK.cuh"
#include "TopK.cuh"
namespace
nts
{
// namespace nts(NiuTrans.Tensor)
namespace
nts
{
// namespace nts(NiuTrans.Tensor)
/*
/*
get the top-k items along a given dimension
get the top-k items along a given dimension
>> a - input tensor
>> a - input tensor
...
...
source/core/TopK.cu
查看文件 @
0887fae1
...
@@ -95,9 +95,11 @@ public:
...
@@ -95,9 +95,11 @@ public:
/* swap */
/* swap */
__device__ void Swap(int i, int j)
__device__ void Swap(int i, int j)
{
{
/*CudaHeapNode<T> tmp = items[i];
/*
CudaHeapNode<T> tmp = items[i];
items[i] = items[j];
items[i] = items[j];
items[j] = tmp;*/
items[j] = tmp;
*/
int tmpIndex = items[i].index;
int tmpIndex = items[i].index;
T tmpValue = items[i].value;
T tmpValue = items[i].value;
items[i] = items[j];
items[i] = items[j];
...
@@ -239,8 +241,10 @@ void KernelTopK(T * input, int stride, int strideNum, int blockNum, int k, T min
...
@@ -239,8 +241,10 @@ void KernelTopK(T * input, int stride, int strideNum, int blockNum, int k, T min
if (threadIdx.x == 0) {
if (threadIdx.x == 0) {
CudaXHeap<MIN_HEAP, T> heapFinal(k, k, heapData + k * threadIdx.y * blockDim.x);
CudaXHeap<MIN_HEAP, T> heapFinal(k, k, heapData + k * threadIdx.y * blockDim.x);
/* merge the result over the workers.
/*
This can be improved by parallel merging */
merge the result over the workers.
This can be improved by parallel merging
*/
if (blockDim.x > 1) {
if (blockDim.x > 1) {
for (int p = 1; p < blockDim.x && p < strideNum; p++) {
for (int p = 1; p < blockDim.x && p < strideNum; p++) {
CudaHeapNode<T> * hd = heapData + k * (threadIdx.y * blockDim.x + p);
CudaHeapNode<T> * hd = heapData + k * (threadIdx.y * blockDim.x + p);
...
@@ -429,6 +433,7 @@ void CudaTopK(XTensor * a, XTensor * b, XTensor * index, int dim, int k)
...
@@ -429,6 +433,7 @@ void CudaTopK(XTensor * a, XTensor * b, XTensor * index, int dim, int k)
}
}
}
}
/* we resort to sorting if the data cannot fit inside the shared memory */
/* we resort to sorting if the data cannot fit inside the shared memory */
else {
else {
int dimSize[MAX_TENSOR_DIM_NUM];
int dimSize[MAX_TENSOR_DIM_NUM];
...
...
source/core/XMatrixSegment.cpp
查看文件 @
0887fae1
...
@@ -227,7 +227,7 @@ int SegmentTensor2D(int rowNum, int colNum, int blockNum, int * blockIndex)
...
@@ -227,7 +227,7 @@ int SegmentTensor2D(int rowNum, int colNum, int blockNum, int * blockIndex)
x2
=
colSize
-
1
;
x2
=
colSize
-
1
;
y2
=
rowSize
-
1
;
// bottom-right corner
y2
=
rowSize
-
1
;
// bottom-right corner
/* the main body of the matrix (after removing the margin block) */
/* the main body of the matrix (after removing the margin block) */
while
(
x1
<=
xMax
)
{
while
(
x1
<=
xMax
)
{
y1
=
0
;
y1
=
0
;
x2
=
x1
+
colSize
-
1
;
x2
=
x1
+
colSize
-
1
;
...
...
source/core/XMatrixSegment.h
查看文件 @
0887fae1
...
@@ -26,9 +26,7 @@
...
@@ -26,9 +26,7 @@
namespace
nts
{
// namespace nts(NiuTrans.Tensor)
namespace
nts
{
// namespace nts(NiuTrans.Tensor)
/*******************************************************************
/* segmentation and parallel processing for 2d tensors (i.e., matrices) */
segmentation and parallel processing for 2d tensors (i.e., matrices)
*/
/* segment a 2d tensor (i.e., matrix) into blocks and run jobs in parallel */
/* segment a 2d tensor (i.e., matrix) into blocks and run jobs in parallel */
extern
"C"
extern
"C"
void
RunParallel2D
(
XPRunner
*
parallelRunner
,
void
*
job
,
int
opNum
,
int
rowNum
,
int
colNum
,
int
argNum
,
...);
void
RunParallel2D
(
XPRunner
*
parallelRunner
,
void
*
job
,
int
opNum
,
int
rowNum
,
int
colNum
,
int
argNum
,
...);
...
...
source/core/XTensorBLAS.cu
查看文件 @
0887fae1
...
@@ -28,9 +28,7 @@ namespace nts { // namespace nts(NiuTrans.Tensor)
...
@@ -28,9 +28,7 @@ namespace nts { // namespace nts(NiuTrans.Tensor)
#ifdef USE_CUDA
#ifdef USE_CUDA
/*
/* matrix multiplication via cuda version BLAS */
matrix multiplication via cuda version BLAS
*/
void CudaBLASMatrixMUL(cublasHandle_t * handle,
void CudaBLASMatrixMUL(cublasHandle_t * handle,
void * a, MATRIX_TRANS_TYPE transposedA, TENSOR_DATA_TYPE dataTypeA,
void * a, MATRIX_TRANS_TYPE transposedA, TENSOR_DATA_TYPE dataTypeA,
void * b, MATRIX_TRANS_TYPE transposedB, TENSOR_DATA_TYPE dataTypeB,
void * b, MATRIX_TRANS_TYPE transposedB, TENSOR_DATA_TYPE dataTypeB,
...
@@ -85,9 +83,7 @@ void CudaBLASMatrixMUL(cublasHandle_t * handle,
...
@@ -85,9 +83,7 @@ void CudaBLASMatrixMUL(cublasHandle_t * handle,
}
}
}
}
/*
/* matrix multiplication via cuda version BLAS */
matrix multiplication via cuda version BLAS
*/
void CudaBLASMatrixMULBatched(cublasHandle_t * handle,
void CudaBLASMatrixMULBatched(cublasHandle_t * handle,
const void ** a, MATRIX_TRANS_TYPE transposedA, TENSOR_DATA_TYPE dataTypeA,
const void ** a, MATRIX_TRANS_TYPE transposedA, TENSOR_DATA_TYPE dataTypeA,
const void ** b, MATRIX_TRANS_TYPE transposedB, TENSOR_DATA_TYPE dataTypeB,
const void ** b, MATRIX_TRANS_TYPE transposedB, TENSOR_DATA_TYPE dataTypeB,
...
@@ -143,7 +139,6 @@ void CudaBLASMatrixMULBatched(cublasHandle_t * handle,
...
@@ -143,7 +139,6 @@ void CudaBLASMatrixMULBatched(cublasHandle_t * handle,
}
}
/* matrix multiplication in batch and strided mode via cuda version BLAS */
/* matrix multiplication in batch and strided mode via cuda version BLAS */
extern "C"
void CudaBLASMatrixMULBatchedStrided(cublasHandle_t * handle,
void CudaBLASMatrixMULBatchedStrided(cublasHandle_t * handle,
const void * a, MATRIX_TRANS_TYPE transposedA, TENSOR_DATA_TYPE dataTypeA, long long int strideA,
const void * a, MATRIX_TRANS_TYPE transposedA, TENSOR_DATA_TYPE dataTypeA, long long int strideA,
const void * b, MATRIX_TRANS_TYPE transposedB, TENSOR_DATA_TYPE dataTypeB, long long int strideB,
const void * b, MATRIX_TRANS_TYPE transposedB, TENSOR_DATA_TYPE dataTypeB, long long int strideB,
...
@@ -198,9 +193,7 @@ void CudaBLASMatrixMULBatchedStrided(cublasHandle_t * handle,
...
@@ -198,9 +193,7 @@ void CudaBLASMatrixMULBatchedStrided(cublasHandle_t * handle,
}
}
}
}
/*
/* matrix multiplication via cuda version BLAS */
matrix multiplication via cuda version BLAS
*/
void CudaBLASMatrixMULList(cublasHandle_t * handle,
void CudaBLASMatrixMULList(cublasHandle_t * handle,
XList * a, MATRIX_TRANS_TYPE transposedA,
XList * a, MATRIX_TRANS_TYPE transposedA,
XList * b, MATRIX_TRANS_TYPE transposedB,
XList * b, MATRIX_TRANS_TYPE transposedB,
...
...
source/core/XTensorCore.h
查看文件 @
0887fae1
...
@@ -37,33 +37,41 @@ public:
...
@@ -37,33 +37,41 @@ public:
concatenate a list of tensors along a given dimension
concatenate a list of tensors along a given dimension
Note that this is actually a wrapper that selects "ConcatenateSolely"
Note that this is actually a wrapper that selects "ConcatenateSolely"
or "Merge" by means of the tensor shapes */
or "Merge" by means of the tensor shapes */
extern
"C"
void
Concatenate
(
XList
*
smalls
,
XTensor
*
big
,
int
dim
);
void
Concatenate
(
XList
*
smalls
,
XTensor
*
big
,
int
dim
);
/* concatenate two tensors along a given dimension */
/* concatenate two tensors along a given dimension */
extern
"C"
void
Concatenate
(
XTensor
*
smallA
,
XTensor
*
smallB
,
XTensor
*
big
,
int
dim
);
void
Concatenate
(
XTensor
*
smallA
,
XTensor
*
smallB
,
XTensor
*
big
,
int
dim
);
/* concatenate a list of tensors along a given dimension */
/* concatenate a list of tensors along a given dimension */
extern
"C"
static
static
void
ConcatenateSolely
(
XList
*
smalls
,
XTensor
*
big
,
int
dim
);
void
ConcatenateSolely
(
XList
*
smalls
,
XTensor
*
big
,
int
dim
);
/* copy selected sub-tensors */
/* copy selected sub-tensors */
extern
"C"
static
static
bool
CopyIndexed
(
XTensor
*
s
,
XTensor
*
t
,
int
dim
,
int
*
srcIndex
,
int
indexSize
,
int
*
tgtIndex
,
int
copyNum
);
bool
CopyIndexed
(
XTensor
*
s
,
XTensor
*
t
,
int
dim
,
int
*
srcIndex
,
int
indexSize
,
int
*
tgtIndex
,
int
copyNum
);
/* copy a number of blocks in grid */
/* copy a number of blocks in grid */
extern
"C"
static
static
void
CopyInGrid
(
XTensor
*
s
,
XTensor
*
t
,
int
*
index
,
int
blockDim
,
int
blockNumInGrid
,
bool
isIndexOnDev
=
false
);
void
CopyInGrid
(
XTensor
*
s
,
XTensor
*
t
,
int
*
index
,
int
blockDim
,
int
blockNumInGrid
,
bool
isIndexOnDev
=
false
);
/* copy s to t */
/* copy s to t */
extern
"C"
static
static
bool
CopyValues
(
XTensor
*
s
,
XTensor
*
t
,
XStream
*
stream
=
NULL
);
bool
CopyValues
(
XTensor
*
s
,
XTensor
*
t
,
XStream
*
stream
=
NULL
);
/* set target data block index for the data movement in merge */
/* set target data block index for the data movement in merge */
extern
"C"
static
static
void
MakeMergeBlockIndex
(
int
*
blockIndex
,
int
blockNum
,
int
blockNumInMerge
,
void
MakeMergeBlockIndex
(
int
*
blockIndex
,
int
blockNum
,
int
blockNumInMerge
,
int
splitSizeInGrid
,
int
gridSize
,
int
gridNum
,
XMem
*
mem
);
int
splitSizeInGrid
,
int
gridSize
,
int
gridNum
,
XMem
*
mem
);
/* set target data block index for the data movement in split */
/* set target data block index for the data movement in split */
extern
"C"
static
static
void
MakeSplitBlockIndex
(
int
*
blockIndex
,
int
splitNum
,
int
blockSplitSize
,
int
blockNum
,
XMem
*
mem
);
void
MakeSplitBlockIndex
(
int
*
blockIndex
,
int
splitNum
,
int
blockSplitSize
,
int
blockNum
,
XMem
*
mem
);
...
@@ -78,6 +86,7 @@ public:
...
@@ -78,6 +86,7 @@ public:
tensor of the result C. C should be a tensor of z * x * n * m. Obviously C = A * B performs
tensor of the result C. C should be a tensor of z * x * n * m. Obviously C = A * B performs
normal matrix multiplication if A = y * z and B = x * y.
normal matrix multiplication if A = y * z and B = x * y.
*/
*/
extern
"C"
static
static
void
MatrixMul
(
XTensor
*
a
,
MATRIX_TRANS_TYPE
transposedA
,
XTensor
*
b
,
MATRIX_TRANS_TYPE
transposedB
,
XTensor
*
c
,
void
MatrixMul
(
XTensor
*
a
,
MATRIX_TRANS_TYPE
transposedA
,
XTensor
*
b
,
MATRIX_TRANS_TYPE
transposedB
,
XTensor
*
c
,
DTYPE
alpha
=
(
DTYPE
)
1
.
0
,
DTYPE
beta
=
0
,
XPRunner
*
parallelRunner
=
NULL
);
DTYPE
alpha
=
(
DTYPE
)
1
.
0
,
DTYPE
beta
=
0
,
XPRunner
*
parallelRunner
=
NULL
);
...
@@ -87,6 +96,7 @@ public:
...
@@ -87,6 +96,7 @@ public:
c = trans(a) * trans(b) * alpha + c * beta
c = trans(a) * trans(b) * alpha + c * beta
where trans() return the transposed matrix if the flag is fired
where trans() return the transposed matrix if the flag is fired
*/
*/
extern
"C"
static
static
void
MatrixMul2D
(
XTensor
*
a
,
MATRIX_TRANS_TYPE
transposedA
,
XTensor
*
b
,
MATRIX_TRANS_TYPE
transposedB
,
XTensor
*
c
,
void
MatrixMul2D
(
XTensor
*
a
,
MATRIX_TRANS_TYPE
transposedA
,
XTensor
*
b
,
MATRIX_TRANS_TYPE
transposedB
,
XTensor
*
c
,
DTYPE
alpha
=
(
DTYPE
)
1
.
0
,
DTYPE
beta
=
0
,
XPRunner
*
parallelRunner
=
NULL
,
XStream
*
stream
=
NULL
);
DTYPE
alpha
=
(
DTYPE
)
1
.
0
,
DTYPE
beta
=
0
,
XPRunner
*
parallelRunner
=
NULL
,
XStream
*
stream
=
NULL
);
...
@@ -95,6 +105,7 @@ public:
...
@@ -95,6 +105,7 @@ public:
matrix multiplication for a block (x1,y1) - (x2,y2)
matrix multiplication for a block (x1,y1) - (x2,y2)
where (x1,y1) is the upper-left corner and (x2,y2) is the bottom-right corner
where (x1,y1) is the upper-left corner and (x2,y2) is the bottom-right corner
*/
*/
extern
"C"
static
static
void
MatrixMul2DMultiTheading
(
XList
*
args
);
void
MatrixMul2DMultiTheading
(
XList
*
args
);
...
@@ -103,6 +114,7 @@ public:
...
@@ -103,6 +114,7 @@ public:
c = trans(a) * trans(b) * alpha + c * beta
c = trans(a) * trans(b) * alpha + c * beta
where trans() return the transposed matrix if the flag is fired
where trans() return the transposed matrix if the flag is fired
*/
*/
extern
"C"
static
static
void
MatrixMul2DParallel
(
XTensor
*
a
,
MATRIX_TRANS_TYPE
transposedA
,
XTensor
*
b
,
MATRIX_TRANS_TYPE
transposedB
,
XTensor
*
c
,
void
MatrixMul2DParallel
(
XTensor
*
a
,
MATRIX_TRANS_TYPE
transposedA
,
XTensor
*
b
,
MATRIX_TRANS_TYPE
transposedB
,
XTensor
*
c
,
DTYPE
alpha
=
(
DTYPE
)
1
.
0
,
DTYPE
beta
=
0
,
XPRunner
*
parallelRunner
=
NULL
);
DTYPE
alpha
=
(
DTYPE
)
1
.
0
,
DTYPE
beta
=
0
,
XPRunner
*
parallelRunner
=
NULL
);
...
@@ -114,29 +126,36 @@ public:
...
@@ -114,29 +126,36 @@ public:
ci = trans(ai) * trans(bi) * alpha + cm * beta
ci = trans(ai) * trans(bi) * alpha + cm * beta
where trans() returns the transposed matrix if the flag is fired
where trans() returns the transposed matrix if the flag is fired
*/
*/
extern
"C"
static
static
void
MatrixMulBatched
(
XTensor
*
a
,
MATRIX_TRANS_TYPE
transposedA
,
XTensor
*
b
,
MATRIX_TRANS_TYPE
transposedB
,
XTensor
*
c
,
void
MatrixMulBatched
(
XTensor
*
a
,
MATRIX_TRANS_TYPE
transposedA
,
XTensor
*
b
,
MATRIX_TRANS_TYPE
transposedB
,
XTensor
*
c
,
DTYPE
alpha
=
(
DTYPE
)
1
.
0
,
DTYPE
beta
=
0
,
XPRunner
*
parallelRunner
=
NULL
);
DTYPE
alpha
=
(
DTYPE
)
1
.
0
,
DTYPE
beta
=
0
,
XPRunner
*
parallelRunner
=
NULL
);
/* matrix multiplication in batch mode (CPU code) */
/* matrix multiplication in batch mode (CPU code) */
extern
"C"
static
static
void
MatrixMULBatchedCPU
(
XList
*
a
,
MATRIX_TRANS_TYPE
transposedA
,
XList
*
b
,
MATRIX_TRANS_TYPE
transposedB
,
XList
*
c
,
void
MatrixMULBatchedCPU
(
XList
*
a
,
MATRIX_TRANS_TYPE
transposedA
,
XList
*
b
,
MATRIX_TRANS_TYPE
transposedB
,
XList
*
c
,
DTYPE
alpha
=
(
DTYPE
)
1
.
0
,
DTYPE
beta
=
0
);
DTYPE
alpha
=
(
DTYPE
)
1
.
0
,
DTYPE
beta
=
0
);
/* transform a tensor by merging it alone with a dimension, e.g., (M, N/3, 3) -> (M, N) */
/* transform a tensor by merging it alone with a dimension, e.g., (M, N/3, 3) -> (M, N) */
void
Merge
(
XTensor
*
s
,
XTensor
*
t
,
int
whereToMerge
,
int
leadingDim
=
-
1
);
extern
"C"
void
Merge
(
XTensor
*
s
,
XTensor
*
t
,
int
whereToMerge
,
int
leadingDim
=
-
1
);
/* merge small tensors into a big tensor */
/* merge small tensors into a big tensor */
extern
"C"
void
Merge
(
XList
*
smalls
,
XTensor
*
big
,
int
whereToMerge
);
void
Merge
(
XList
*
smalls
,
XTensor
*
big
,
int
whereToMerge
);
/* merge data by blocks */
/* merge data by blocks */
extern
"C"
void
MergeBlockLists
(
XList
*
sourceList
,
int
*
blockSizes
,
int
blockNum
,
void
*
target
,
XMem
*
myMem
);
void
MergeBlockLists
(
XList
*
sourceList
,
int
*
blockSizes
,
int
blockNum
,
void
*
target
,
XMem
*
myMem
);
/* element-wise product of two tensors */
/* element-wise product of two tensors */
extern
"C"
static
static
void
MultiplyElementWise
(
XTensor
*
a
,
XTensor
*
b
,
XTensor
*
c
,
int
leadingDim
,
DTYPE
alpha
=
0
);
void
MultiplyElementWise
(
XTensor
*
a
,
XTensor
*
b
,
XTensor
*
c
,
int
leadingDim
,
DTYPE
alpha
=
0
);
/* set every entry to its minus value */
/* set every entry to its minus value */
extern
"C"
void
Negate
(
XTensor
*
a
);
void
Negate
(
XTensor
*
a
);
/*
/*
...
@@ -144,13 +163,16 @@ public:
...
@@ -144,13 +163,16 @@ public:
y = a * (x-mean)/sqrt(variance+\epsilon) + b
y = a * (x-mean)/sqrt(variance+\epsilon) + b
where a and b are the scalar and bias respectively, and \epsilon is the adjustment parameter.
where a and b are the scalar and bias respectively, and \epsilon is the adjustment parameter.
*/
*/
extern
"C"
static
static
void
Normalize
(
XTensor
*
input
,
XTensor
*
output
,
int
dim
,
XTensor
*
mean
,
XTensor
*
var
,
XTensor
*
a
,
XTensor
*
b
,
DTYPE
epsilon
);
void
Normalize
(
XTensor
*
input
,
XTensor
*
output
,
int
dim
,
XTensor
*
mean
,
XTensor
*
var
,
XTensor
*
a
,
XTensor
*
b
,
DTYPE
epsilon
);
/* get the power(x, y) */
/* get the power(x, y) */
extern
"C"
void
Power
(
XTensor
*
a
,
DTYPE
p
);
void
Power
(
XTensor
*
a
,
DTYPE
p
);
/* get the max value of the items along a dimension of the tensor. */
/* get the max value of the items along a dimension of the tensor. */
extern
"C"
static
static
void
ReduceMax
(
XTensor
*
input
,
XTensor
*
output
,
int
dim
);
void
ReduceMax
(
XTensor
*
input
,
XTensor
*
output
,
int
dim
);
...
@@ -158,6 +180,7 @@ public:
...
@@ -158,6 +180,7 @@ public:
get the mean value along a dimension of the tensor. For a 1-dimensional data array a,
get the mean value along a dimension of the tensor. For a 1-dimensional data array a,
mean = (1/n) * sum_i input_i
mean = (1/n) * sum_i input_i
*/
*/
extern
"C"
static
static
void
ReduceMean
(
XTensor
*
input
,
XTensor
*
output
,
int
dim
);
void
ReduceMean
(
XTensor
*
input
,
XTensor
*
output
,
int
dim
);
...
@@ -165,6 +188,7 @@ public:
...
@@ -165,6 +188,7 @@ public:
standard variance of the items along a dimension of the tensor. For a 1-dimensional data array a,
standard variance of the items along a dimension of the tensor. For a 1-dimensional data array a,
variance = (1/n * \sum_i (a_i - mean)^2)^0.5
variance = (1/n * \sum_i (a_i - mean)^2)^0.5
*/
*/
extern
"C"
static
static
void
ReduceStandardVariance
(
XTensor
*
input
,
XTensor
*
output
,
int
dim
,
XTensor
*
mean
);
void
ReduceStandardVariance
(
XTensor
*
input
,
XTensor
*
output
,
int
dim
,
XTensor
*
mean
);
...
@@ -173,6 +197,7 @@ public:
...
@@ -173,6 +197,7 @@ public:
sum = \sum_i (a_i - shift) if isExp == false
sum = \sum_i (a_i - shift) if isExp == false
sum = \sum_i exp(a_i - shift) if isExp == true
sum = \sum_i exp(a_i - shift) if isExp == true
*/
*/
extern
"C"
static
static
void
ReduceSum
(
XTensor
*
input
,
XTensor
*
output
,
int
dim
,
XTensor
*
shift
=
NULL
,
DTYPE
power
=
(
DTYPE
)
1
.
0
F
,
bool
isExp
=
false
);
void
ReduceSum
(
XTensor
*
input
,
XTensor
*
output
,
int
dim
,
XTensor
*
shift
=
NULL
,
DTYPE
power
=
(
DTYPE
)
1
.
0
F
,
bool
isExp
=
false
);
...
@@ -180,6 +205,7 @@ public:
...
@@ -180,6 +205,7 @@ public:
squared sum of the items along a dimension of the tensor. For a 1-dimensional data array a,
squared sum of the items along a dimension of the tensor. For a 1-dimensional data array a,
sum = \sum_i (a_i - shift)^2
sum = \sum_i (a_i - shift)^2
*/
*/
extern
"C"
static
static
void
ReduceSumSquared
(
XTensor
*
input
,
XTensor
*
output
,
int
dim
,
XTensor
*
shift
);
void
ReduceSumSquared
(
XTensor
*
input
,
XTensor
*
output
,
int
dim
,
XTensor
*
shift
);
...
@@ -187,60 +213,73 @@ public:
...
@@ -187,60 +213,73 @@ public:
variance of the items along a dimension of the tensor. For a 1-dimensional data array a,
variance of the items along a dimension of the tensor. For a 1-dimensional data array a,
variance = 1/n * \sum_i (a_i - mean)^2
variance = 1/n * \sum_i (a_i - mean)^2
*/
*/
extern
"C"
static
static
void
ReduceVariance
(
XTensor
*
input
,
XTensor
*
output
,
int
dim
,
XTensor
*
mean
);
void
ReduceVariance
(
XTensor
*
input
,
XTensor
*
output
,
int
dim
,
XTensor
*
mean
);
/* scale and shift all tensor entires */
/* scale and shift all tensor entires */
extern
"C"
static
static
void
ScaleAndShift
(
XTensor
*
a
,
DTYPE
scale
,
DTYPE
shift
);
void
ScaleAndShift
(
XTensor
*
a
,
DTYPE
scale
,
DTYPE
shift
);
/* transform a tensor by splitting it, e.g., (M, N) -> (M, N/3, 3) */
/* transform a tensor by splitting it, e.g., (M, N) -> (M, N/3, 3) */
extern
"C"
void
Split
(
XTensor
*
s
,
XTensor
*
t
,
int
whereToSplit
,
int
splitNum
);
void
Split
(
XTensor
*
s
,
XTensor
*
t
,
int
whereToSplit
,
int
splitNum
);
/* split a big tensor into small tensors */
/* split a big tensor into small tensors */
extern
"C"
void
Split
(
XTensor
*
big
,
XList
*
smalls
,
int
whereToSplit
,
int
splitNum
);
void
Split
(
XTensor
*
big
,
XList
*
smalls
,
int
whereToSplit
,
int
splitNum
);
/* tensor summation c = a + b * \beta */
/* tensor summation c = a + b * \beta */
extern
"C"
static
static
void
Sum
(
XTensor
*
a
,
XTensor
*
b
,
XTensor
*
c
=
NULL
,
DTYPE
beta
=
(
DTYPE
)
1
.
0
);
void
Sum
(
XTensor
*
a
,
XTensor
*
b
,
XTensor
*
c
=
NULL
,
DTYPE
beta
=
(
DTYPE
)
1
.
0
);
/* sum of a tensor and a (column) vector */
/* sum of a tensor and a (column) vector */
extern
"C"
static
static
void
SumByColumnTV
(
XTensor
*
a
,
XTensor
*
b
,
XTensor
*
c
=
NULL
,
DTYPE
beta
=
(
DTYPE
)
1
.
0
);
void
SumByColumnTV
(
XTensor
*
a
,
XTensor
*
b
,
XTensor
*
c
=
NULL
,
DTYPE
beta
=
(
DTYPE
)
1
.
0
);
/* sum of a (column) vector and a tensor */
/* sum of a (column) vector and a tensor */
extern
"C"
static
static
void
SumByColumnVT
(
XTensor
*
a
,
XTensor
*
b
,
XTensor
*
c
=
NULL
,
DTYPE
beta
=
(
DTYPE
)
1
.
0
);
void
SumByColumnVT
(
XTensor
*
a
,
XTensor
*
b
,
XTensor
*
c
=
NULL
,
DTYPE
beta
=
(
DTYPE
)
1
.
0
);
/* get the top-k items along a given dimension */
/* get the top-k items along a given dimension */
extern
"C"
static
static
void
TopK
(
XTensor
*
a
,
XTensor
*
b
,
XTensor
*
index
,
int
dim
,
int
k
);
void
TopK
(
XTensor
*
a
,
XTensor
*
b
,
XTensor
*
index
,
int
dim
,
int
k
);
/* insert a dimension by copying the blocks for x times (where x is the size of the inerted dimension) */
/* insert a dimension by copying the blocks for x times (where x is the size of the inerted dimension) */
void
Unsqueeze
(
XTensor
*
a
,
XTensor
*
b
,
int
dim
,
int
dSize
);
extern
"C"
void
Unsqueeze
(
XTensor
*
a
,
XTensor
*
b
,
int
dim
,
int
dSize
);
/* segmentation and parallel processing for 2d tensors (i.e., matrices) */
/*******************************************************************
segmentation and parallel processing for 2d tensors (i.e., matrices)
*/
/* segment a 2d tensor (i.e., matrix) into blocks and run jobs in parallel */
/* segment a 2d tensor (i.e., matrix) into blocks and run jobs in parallel */
static
extern
"C"
static
void
RunParallel2D
(
XPRunner
*
parallelRunner
,
void
*
job
,
int
opNum
,
int
rowNum
,
int
colNum
,
int
argNum
,
...);
void
RunParallel2D
(
XPRunner
*
parallelRunner
,
void
*
job
,
int
opNum
,
int
rowNum
,
int
colNum
,
int
argNum
,
...);
/* segment a block into sub-blocks */
/* segment a block into sub-blocks */
extern
"C"
static
static
int
SegmentTensor2D
(
int
rowNum
,
int
colNum
,
int
blockNum
,
int
*
blockIndex
);
int
SegmentTensor2D
(
int
rowNum
,
int
colNum
,
int
blockNum
,
int
*
blockIndex
);
/* segment a block into sub-blocks */
/* segment a block into sub-blocks */
extern
"C"
static
static
int
SegmentTensor2DInRows
(
int
rowNum
,
int
colNum
,
int
blockNum
,
int
*
blockIndex
);
int
SegmentTensor2DInRows
(
int
rowNum
,
int
colNum
,
int
blockNum
,
int
*
blockIndex
);
/* matrix multiplication (BLAS) */
/* matrix multiplication (BLAS) */
extern
"C"
static
static
void
MatrixMULCPU
(
XTensor
*
a
,
MATRIX_TRANS_TYPE
transposedA
,
XTensor
*
b
,
MATRIX_TRANS_TYPE
transposedB
,
XTensor
*
c
,
DTYPE
alpha
=
(
DTYPE
)
1
.
0
,
DTYPE
beta
=
0
);
void
MatrixMULCPU
(
XTensor
*
a
,
MATRIX_TRANS_TYPE
transposedA
,
XTensor
*
b
,
MATRIX_TRANS_TYPE
transposedB
,
XTensor
*
c
,
DTYPE
alpha
=
(
DTYPE
)
1
.
0
,
DTYPE
beta
=
0
);
#ifdef USE_CUDA
#ifdef USE_CUDA
/* matrix multiplication via cuda version BLAS */
/* matrix multiplication via cuda version BLAS */
extern
"C"
static
static
void
CudaBLASMatrixMUL
(
cublasHandle_t
*
handle
,
void
CudaBLASMatrixMUL
(
cublasHandle_t
*
handle
,
void
*
a
,
MATRIX_TRANS_TYPE
transposedA
,
TENSOR_DATA_TYPE
dataTypeA
,
void
*
a
,
MATRIX_TRANS_TYPE
transposedA
,
TENSOR_DATA_TYPE
dataTypeA
,
...
@@ -249,6 +288,7 @@ public:
...
@@ -249,6 +288,7 @@ public:
int
na
,
int
ma
,
int
nb
,
int
mb
,
int
nc
,
int
mc
,
DTYPE
alpha
=
(
DTYPE
)
1
.
0
,
DTYPE
beta
=
1
.
0
);
int
na
,
int
ma
,
int
nb
,
int
mb
,
int
nc
,
int
mc
,
DTYPE
alpha
=
(
DTYPE
)
1
.
0
,
DTYPE
beta
=
1
.
0
);
/* matrix multiplication in batch mode via cuda version BLAS */
/* matrix multiplication in batch mode via cuda version BLAS */
extern
"C"
static
static
void
CudaBLASMatrixMULBatched
(
cublasHandle_t
*
handle
,
void
CudaBLASMatrixMULBatched
(
cublasHandle_t
*
handle
,
const
void
**
a
,
MATRIX_TRANS_TYPE
transposedA
,
TENSOR_DATA_TYPE
dataTypeA
,
const
void
**
a
,
MATRIX_TRANS_TYPE
transposedA
,
TENSOR_DATA_TYPE
dataTypeA
,
...
@@ -257,6 +297,7 @@ public:
...
@@ -257,6 +297,7 @@ public:
int
count
,
int
na
,
int
ma
,
int
nb
,
int
mb
,
int
nc
,
int
mc
,
DTYPE
alpha
=
(
DTYPE
)
1
.
0
,
DTYPE
beta
=
1
.
0
);
int
count
,
int
na
,
int
ma
,
int
nb
,
int
mb
,
int
nc
,
int
mc
,
DTYPE
alpha
=
(
DTYPE
)
1
.
0
,
DTYPE
beta
=
1
.
0
);
/* matrix multiplication in batch and strided mode via cuda version BLAS */
/* matrix multiplication in batch and strided mode via cuda version BLAS */
extern
"C"
static
static
void
CudaBLASMatrixMULBatchedStrided
(
cublasHandle_t
*
handle
,
void
CudaBLASMatrixMULBatchedStrided
(
cublasHandle_t
*
handle
,
const
void
*
a
,
MATRIX_TRANS_TYPE
transposedA
,
TENSOR_DATA_TYPE
dataTypeA
,
long
long
int
strideA
,
const
void
*
a
,
MATRIX_TRANS_TYPE
transposedA
,
TENSOR_DATA_TYPE
dataTypeA
,
long
long
int
strideA
,
...
@@ -265,6 +306,7 @@ public:
...
@@ -265,6 +306,7 @@ public:
int
count
,
int
na
,
int
ma
,
int
nb
,
int
mb
,
int
nc
,
int
mc
,
DTYPE
alpha
=
(
DTYPE
)
1
.
0
,
DTYPE
beta
=
1
.
0
);
int
count
,
int
na
,
int
ma
,
int
nb
,
int
mb
,
int
nc
,
int
mc
,
DTYPE
alpha
=
(
DTYPE
)
1
.
0
,
DTYPE
beta
=
1
.
0
);
/* matrix multiplication in batch mode via cuda version BLAS */
/* matrix multiplication in batch mode via cuda version BLAS */
extern
"C"
static
static
void
CudaBLASMatrixMULList
(
cublasHandle_t
*
handle
,
XList
*
a
,
MATRIX_TRANS_TYPE
transposedA
,
XList
*
b
,
MATRIX_TRANS_TYPE
transposedB
,
XList
*
c
,
void
CudaBLASMatrixMULList
(
cublasHandle_t
*
handle
,
XList
*
a
,
MATRIX_TRANS_TYPE
transposedA
,
XList
*
b
,
MATRIX_TRANS_TYPE
transposedB
,
XList
*
c
,
int
count
,
DTYPE
alpha
=
(
DTYPE
)
1
.
0
,
DTYPE
beta
=
1
.
0
);
int
count
,
DTYPE
alpha
=
(
DTYPE
)
1
.
0
,
DTYPE
beta
=
1
.
0
);
...
...
source/function/HardTanH.cpp
查看文件 @
0887fae1
...
@@ -25,7 +25,6 @@
...
@@ -25,7 +25,6 @@
namespace
nts
{
// namespace nts(NiuTrans.Tensor)
namespace
nts
{
// namespace nts(NiuTrans.Tensor)
/*
/*
hard tanh function
hard tanh function
y = 1 if x > 1
y = 1 if x > 1
...
...
source/function/HardTanH.cu
查看文件 @
0887fae1
...
@@ -95,7 +95,6 @@ dy/dx = 1 if -1 <= x <= 1
...
@@ -95,7 +95,6 @@ dy/dx = 1 if -1 <= x <= 1
>> y - y of the function
>> y - y of the function
>> x - x of the function
>> x - x of the function
>> size - size of y/x
>> size - size of y/x
*/
*/
__global__
__global__
void KernelHardtanhBackward(DTYPE * dedy, DTYPE * dedx, DTYPE * gold, DTYPE * y, DTYPE * x, int size)
void KernelHardtanhBackward(DTYPE * dedy, DTYPE * dedx, DTYPE * gold, DTYPE * y, DTYPE * x, int size)
...
...
source/function/LogSoftmax.cpp
查看文件 @
0887fae1
...
@@ -49,7 +49,6 @@ void LogSoftmax(XTensor * x, XTensor * y, int leadDim)
...
@@ -49,7 +49,6 @@ void LogSoftmax(XTensor * x, XTensor * y, int leadDim)
dimSize
[
i
-
1
]
=
-
x
->
dimSize
[
i
];
dimSize
[
i
-
1
]
=
-
x
->
dimSize
[
i
];
}
}
XMem
*
mem
=
x
->
mem
;
XMem
*
mem
=
x
->
mem
;
XTensor
*
max
=
NULL
;
XTensor
*
max
=
NULL
;
XTensor
*
sum
=
NULL
;
XTensor
*
sum
=
NULL
;
...
@@ -168,7 +167,6 @@ dE/dx = dE/dy * dy/dx
...
@@ -168,7 +167,6 @@ dE/dx = dE/dy * dy/dx
log softmax: y_i = log(e^{x_i} / \sum_{k} e^{x_k})
log softmax: y_i = log(e^{x_i} / \sum_{k} e^{x_k})
dy_i/dx_j
dy_i/dx_j
= d{log(e^{x_i} / \sum_{k} e^{x_k})}/dx_j
= d{log(e^{x_i} / \sum_{k} e^{x_k})}/dx_j
= d{log(e^{x_i})}/dx_j - d{log(\sum_{k} e^{x_k})}/dx_j
= d{log(e^{x_i})}/dx_j - d{log(\sum_{k} e^{x_k})}/dx_j
...
...
source/function/LogSoftmax.cu
查看文件 @
0887fae1
...
@@ -41,7 +41,8 @@ void CudaLogSoftmax(XTensor * x, XTensor * y, int leadDim)
...
@@ -41,7 +41,8 @@ void CudaLogSoftmax(XTensor * x, XTensor * y, int leadDim)
ShowNTErrors("You should call LogSoftmax instead!");
ShowNTErrors("You should call LogSoftmax instead!");
}
}
/* log softmax forward computation (Cuda kernel)
/*
log softmax forward computation (Cuda kernel)
for each column j, let y_{i,j} and x_{i,j} are the output
for each column j, let y_{i,j} and x_{i,j} are the output
and state value for the i-th element of column j. We have
and state value for the i-th element of column j. We have
...
@@ -85,7 +86,8 @@ void KernelLogSoftmaxComputeByRow(DTYPE * x, DTYPE * max, DTYPE * sum, DTYPE * y
...
@@ -85,7 +86,8 @@ void KernelLogSoftmaxComputeByRow(DTYPE * x, DTYPE * max, DTYPE * sum, DTYPE * y
}
}
}
}
/* log softmax forward computation (Cuda kernel)
/*
log softmax forward computation (Cuda kernel)
for each row i, let y_{i,j} and x_{i,j} are the output
for each row i, let y_{i,j} and x_{i,j} are the output
and state value for the j-th element of row i. We have
and state value for the j-th element of row i. We have
...
@@ -182,7 +184,7 @@ void CudaLogSoftmaxSumMax(XTensor * x, XTensor * y, int leadDim, XTensor * sum,
...
@@ -182,7 +184,7 @@ void CudaLogSoftmaxSumMax(XTensor * x, XTensor * y, int leadDim, XTensor * sum,
/*
/*
set dE/dx = exp(y)
set dE/dx = exp(y)
>> ded
u
- dE/dy
>> ded
y
- dE/dy
>> dedx - dE/dx
>> dedx - dE/dx
>> y - output of the function
>> y - output of the function
>> size - size of output
>> size - size of output
...
@@ -256,7 +258,9 @@ dE/dx_j += -gold_j
...
@@ -256,7 +258,9 @@ dE/dx_j += -gold_j
>> gold - gold standard to measure error (or loss)
>> gold - gold standard to measure error (or loss)
>> y - output of the function
>> y - output of the function
>> x - input of the function
>> x - input of the function
>> size - size of input/output
>> rowNum - row number of the matrix
>> colNum - column number of the matrix
>> gNonZeroNum -
>> lossName - name of the loss function
>> lossName - name of the loss function
*/
*/
__global__
__global__
...
@@ -293,7 +297,6 @@ dE/dx = dE/dy * dy/dx
...
@@ -293,7 +297,6 @@ dE/dx = dE/dy * dy/dx
log softmax: y_i = log(e^{x_i} / \sum_{k} e^{x_k})
log softmax: y_i = log(e^{x_i} / \sum_{k} e^{x_k})
dy_i/dx_j
dy_i/dx_j
= d{log(e^{x_i} / \sum_{k} e^{x_k})}/dx_j
= d{log(e^{x_i} / \sum_{k} e^{x_k})}/dx_j
= d{log(e^{x_i})}/dx_j - d{log(\sum_{k} e^{x_k})}/dx_j
= d{log(e^{x_i})}/dx_j - d{log(\sum_{k} e^{x_k})}/dx_j
...
...
source/function/Loss.cu
查看文件 @
0887fae1
...
@@ -31,7 +31,6 @@ namespace nts{ // namespace nts(NiuTrans.Tensor)
...
@@ -31,7 +31,6 @@ namespace nts{ // namespace nts(NiuTrans.Tensor)
loss function to measure the "number" of errors
loss function to measure the "number" of errors
*/
*/
/*
/*
compute the loss
compute the loss
>> gold - gold standard
>> gold - gold standard
...
...
source/function/Rectify.cu
查看文件 @
0887fae1
...
@@ -88,7 +88,6 @@ dy/dx = 1 if x >= 0
...
@@ -88,7 +88,6 @@ dy/dx = 1 if x >= 0
>> y - output of the function
>> y - output of the function
>> x - input of the function
>> x - input of the function
>> size - size of output/input
>> size - size of output/input
*/
*/
__global__
__global__
void KernelRectifyBackward(DTYPE * dedy, DTYPE * dedx, DTYPE * gold, DTYPE * y, DTYPE * x, int size)
void KernelRectifyBackward(DTYPE * dedy, DTYPE * dedx, DTYPE * gold, DTYPE * y, DTYPE * x, int size)
...
...
source/function/Sigmoid.cpp
查看文件 @
0887fae1
...
@@ -25,7 +25,6 @@
...
@@ -25,7 +25,6 @@
namespace
nts
{
// namespace nts(NiuTrans.Tensor)
namespace
nts
{
// namespace nts(NiuTrans.Tensor)
/*
/*
sigmoid function y = 1/(1+exp(-x))
sigmoid function y = 1/(1+exp(-x))
>> x - input tensor
>> x - input tensor
...
...
source/function/Sigmoid.cu
查看文件 @
0887fae1
...
@@ -95,7 +95,6 @@ sigmoid: y = 1/(1+exp(-x))
...
@@ -95,7 +95,6 @@ sigmoid: y = 1/(1+exp(-x))
>> y - output of the function
>> y - output of the function
>> x - input of the function
>> x - input of the function
>> size - size of output/input
>> size - size of output/input
*/
*/
__global__
__global__
void KernelSigmoidBackward(DTYPE * dedy, DTYPE * dedx, DTYPE * gold, DTYPE * y, DTYPE * x, int size)
void KernelSigmoidBackward(DTYPE * dedy, DTYPE * dedx, DTYPE * gold, DTYPE * y, DTYPE * x, int size)
...
@@ -122,7 +121,6 @@ sigmoid: y = 1/(1+exp(-x))
...
@@ -122,7 +121,6 @@ sigmoid: y = 1/(1+exp(-x))
>> dedy - dE/dy
>> dedy - dE/dy
>> dedx - dE/dx
>> dedx - dE/dx
>> lossName - type of loss function, e.g., cross entropy
>> lossName - type of loss function, e.g., cross entropy
*/
*/
void CudaSigmoidBackward(XTensor * gold, XTensor * y, XTensor * x,
void CudaSigmoidBackward(XTensor * gold, XTensor * y, XTensor * x,
XTensor * dedy, XTensor * dedx,
XTensor * dedy, XTensor * dedx,
...
...
source/function/Softmax.cuh
查看文件 @
0887fae1
...
@@ -29,7 +29,6 @@ namespace nts { // namespace nts(NiuTrans.Tensor)
...
@@ -29,7 +29,6 @@ namespace nts { // namespace nts(NiuTrans.Tensor)
#ifdef USE_CUDA
#ifdef USE_CUDA
/* softmax y = e^x / \sum_{i} e^{x_i} (Cuda version) */
/* softmax y = e^x / \sum_{i} e^{x_i} (Cuda version) */
extern "C"
extern "C"
void CudaSotmax(XTensor * input, XTensor * output, int leadDim);
void CudaSotmax(XTensor * input, XTensor * output, int leadDim);
...
...
source/test/TConcatenate.cpp
查看文件 @
0887fae1
...
@@ -22,8 +22,10 @@
...
@@ -22,8 +22,10 @@
#include "TConcatenate.h"
#include "TConcatenate.h"
namespace
nts
{
// namespace nts(NiuTrans.Tensor)
namespace
nts
{
// namespace nts(NiuTrans.Tensor)
/* case 1: concatenate a list of tensors along a given dimension.
* In this case, 2 * (2, 1) -> (2, 2), dim=1.
/*
case 1: concatenate a list of tensors along a given dimension.
In this case, 2 * (2, 1) -> (2, 2), dim=1.
*/
*/
bool
TestConcatenate1
()
bool
TestConcatenate1
()
{
{
...
@@ -60,12 +62,12 @@ bool TestConcatenate1()
...
@@ -60,12 +62,12 @@ bool TestConcatenate1()
for
(
int
i
=
0
;
i
<
tOrder
;
i
++
)
for
(
int
i
=
0
;
i
<
tOrder
;
i
++
)
tUnitNum
*=
tDimSize
[
i
];
tUnitNum
*=
tDimSize
[
i
];
DTYPE
sData1
[
2
][
1
]
=
{
{
0.0
},
DTYPE
sData1
[
2
][
1
]
=
{
{
0.0
F
},
{
1.0
}
};
{
1.0
F
}
};
DTYPE
sData2
[
2
][
1
]
=
{
{
2.0
},
DTYPE
sData2
[
2
][
1
]
=
{
{
2.0
F
},
{
3.0
}
};
{
3.0
F
}
};
DTYPE
answer
[
2
][
2
]
=
{
{
0.0
,
2.0
},
DTYPE
answer
[
2
][
2
]
=
{
{
0.0
F
,
2.0
F
},
{
1.0
,
3.0
}
};
{
1.0
F
,
3.0
F
}
};
/* CPU test */
/* CPU test */
bool
cpuTest
=
true
;
bool
cpuTest
=
true
;
...
@@ -144,8 +146,9 @@ bool TestConcatenate1()
...
@@ -144,8 +146,9 @@ bool TestConcatenate1()
#endif // USE_CUDA
#endif // USE_CUDA
}
}
/* case 2: concatenate a list of tensors along a given dimension.
/*
* In this case, 2 * (2, 1) -> (4, 1), dim=0.
case 2: concatenate a list of tensors along a given dimension.
In this case, 2 * (2, 1) -> (4, 1), dim=0.
*/
*/
bool
TestConcatenate2
()
bool
TestConcatenate2
()
{
{
...
@@ -182,14 +185,14 @@ bool TestConcatenate2()
...
@@ -182,14 +185,14 @@ bool TestConcatenate2()
for
(
int
i
=
0
;
i
<
tOrder
;
i
++
)
for
(
int
i
=
0
;
i
<
tOrder
;
i
++
)
tUnitNum
*=
tDimSize
[
i
];
tUnitNum
*=
tDimSize
[
i
];
DTYPE
sData1
[
2
][
1
]
=
{
{
0.0
},
DTYPE
sData1
[
2
][
1
]
=
{
{
0.0
F
},
{
1.0
}
};
{
1.0
F
}
};
DTYPE
sData2
[
2
][
1
]
=
{
{
2.0
},
DTYPE
sData2
[
2
][
1
]
=
{
{
2.0
F
},
{
3.0
}
};
{
3.0
F
}
};
DTYPE
answer
[
4
][
1
]
=
{
{
0.0
},
DTYPE
answer
[
4
][
1
]
=
{
{
0.0
F
},
{
1.0
},
{
1.0
F
},
{
2.0
},
{
2.0
F
},
{
3.0
}
};
{
3.0
F
}
};
/* CPU test */
/* CPU test */
bool
cpuTest
=
true
;
bool
cpuTest
=
true
;
...
@@ -268,8 +271,9 @@ bool TestConcatenate2()
...
@@ -268,8 +271,9 @@ bool TestConcatenate2()
#endif // USE_CUDA
#endif // USE_CUDA
}
}
/* case 3: concatenate a list of tensors along a given dimension.
/*
* In this case, (2, 1) + (2, 2) -> (2, 3), dim=1.
case 3: concatenate a list of tensors along a given dimension.
In this case, (2, 1) + (2, 2) -> (2, 3), dim=1.
*/
*/
bool
TestConcatenate3
()
bool
TestConcatenate3
()
{
{
...
@@ -306,12 +310,12 @@ bool TestConcatenate3()
...
@@ -306,12 +310,12 @@ bool TestConcatenate3()
for
(
int
i
=
0
;
i
<
tOrder
;
i
++
)
for
(
int
i
=
0
;
i
<
tOrder
;
i
++
)
tUnitNum
*=
tDimSize
[
i
];
tUnitNum
*=
tDimSize
[
i
];
DTYPE
sData1
[
2
][
1
]
=
{
{
0.0
},
DTYPE
sData1
[
2
][
1
]
=
{
{
0.0
F
},
{
1.0
}
};
{
1.0
F
}
};
DTYPE
sData2
[
2
][
2
]
=
{
{
2.0
,
3.0
},
DTYPE
sData2
[
2
][
2
]
=
{
{
2.0
F
,
3.0
F
},
{
4.0
,
5.0
}
};
{
4.0
F
,
5.0
F
}
};
DTYPE
answer
[
2
][
3
]
=
{
{
0.0
,
2.0
,
3.0
},
DTYPE
answer
[
2
][
3
]
=
{
{
0.0
F
,
2.0
F
,
3.0
F
},
{
1.0
,
4.0
,
5.0
}
};
{
1.0
F
,
4.0
F
,
5.0
F
}
};
/* CPU test */
/* CPU test */
bool
cpuTest
=
true
;
bool
cpuTest
=
true
;
...
@@ -390,8 +394,9 @@ bool TestConcatenate3()
...
@@ -390,8 +394,9 @@ bool TestConcatenate3()
#endif // USE_CUDA
#endif // USE_CUDA
}
}
/* case 4: concatenate two tensors along a given dimension.
/*
* In this case, (2, 1), (2, 2) -> (2, 3), dim=1.
case 4: concatenate two tensors along a given dimension.
In this case, (2, 1), (2, 2) -> (2, 3), dim=1.
*/
*/
bool
TestConcatenate4
()
bool
TestConcatenate4
()
{
{
...
@@ -425,12 +430,12 @@ bool TestConcatenate4()
...
@@ -425,12 +430,12 @@ bool TestConcatenate4()
for
(
int
i
=
0
;
i
<
tOrder
;
i
++
)
for
(
int
i
=
0
;
i
<
tOrder
;
i
++
)
tUnitNum
*=
tDimSize
[
i
];
tUnitNum
*=
tDimSize
[
i
];
DTYPE
sData1
[
2
][
1
]
=
{
{
0.0
},
DTYPE
sData1
[
2
][
1
]
=
{
{
0.0
F
},
{
1.0
}
};
{
1.0
F
}
};
DTYPE
sData2
[
2
][
2
]
=
{
{
2.0
,
3.0
},
DTYPE
sData2
[
2
][
2
]
=
{
{
2.0
F
,
3.0
F
},
{
4.0
,
5.0
}
};
{
4.0
F
,
5.0
F
}
};
DTYPE
answer
[
2
][
3
]
=
{
{
0.0
,
2.0
,
3.0
},
DTYPE
answer
[
2
][
3
]
=
{
{
0.0
F
,
2.0
F
,
3.0
F
},
{
1.0
,
4.0
,
5.0
}
};
{
1.0
F
,
4.0
F
,
5.0
F
}
};
/* CPU test */
/* CPU test */
bool
cpuTest
=
true
;
bool
cpuTest
=
true
;
...
@@ -502,7 +507,6 @@ TODO!!
...
@@ -502,7 +507,6 @@ TODO!!
*/
*/
/* test for Concatenate Function */
/* test for Concatenate Function */
extern
"C"
bool
TestConcatenate
()
bool
TestConcatenate
()
{
{
XPRINT
(
0
,
stdout
,
"[TEST CONCATENATE] concatenate a list of tensors or two tensors along a given dimension
\n
"
);
XPRINT
(
0
,
stdout
,
"[TEST CONCATENATE] concatenate a list of tensors or two tensors along a given dimension
\n
"
);
...
...
source/test/TConcatenateSolely.cpp
查看文件 @
0887fae1
...
@@ -19,12 +19,14 @@
...
@@ -19,12 +19,14 @@
* $Created by: Lin Ye (email: linye2015@outlook.com) 2018-06-14
* $Created by: Lin Ye (email: linye2015@outlook.com) 2018-06-14
*/
*/
#include "TConcatenateSolely.h"
#include "../XList.h"
#include "../XList.h"
#include "TConcatenateSolely.h"
namespace
nts
{
// namespace nt(NiuTrans.Tensor)
namespace
nts
{
// namespace nt(NiuTrans.Tensor)
/* case 1: concatenate a list of tensors along a given dimension
* In this case, 2 * (2, 1) -> (2, 2), dim=1.
/*
case 1: concatenate a list of tensors along a given dimension
In this case, 2 * (2, 1) -> (2, 2), dim=1.
*/
*/
bool
TestConcatenateSolely1
()
bool
TestConcatenateSolely1
()
{
{
...
@@ -61,12 +63,12 @@ bool TestConcatenateSolely1()
...
@@ -61,12 +63,12 @@ bool TestConcatenateSolely1()
for
(
int
i
=
0
;
i
<
tOrder
;
i
++
)
for
(
int
i
=
0
;
i
<
tOrder
;
i
++
)
tUnitNum
*=
tDimSize
[
i
];
tUnitNum
*=
tDimSize
[
i
];
DTYPE
sData1
[
2
][
1
]
=
{
{
0.0
},
DTYPE
sData1
[
2
][
1
]
=
{
{
0.0
F
},
{
1.0
}
};
{
1.0
F
}
};
DTYPE
sData2
[
2
][
1
]
=
{
{
2.0
},
DTYPE
sData2
[
2
][
1
]
=
{
{
2.0
F
},
{
3.0
}
};
{
3.0
F
}
};
DTYPE
answer
[
2
][
2
]
=
{
{
0.0
,
2.0
},
DTYPE
answer
[
2
][
2
]
=
{
{
0.0
F
,
2.0
F
},
{
1.0
,
3.0
}
};
{
1.0
F
,
3.0
F
}
};
/* CPU test */
/* CPU test */
bool
cpuTest
=
true
;
bool
cpuTest
=
true
;
...
@@ -145,8 +147,9 @@ bool TestConcatenateSolely1()
...
@@ -145,8 +147,9 @@ bool TestConcatenateSolely1()
#endif // USE_CUDA
#endif // USE_CUDA
}
}
/* case 2: concatenate a list of tensors along a given dimension
/*
* In this case, 2 * (2, 1) -> (4, 1), dim=0.
case 2: concatenate a list of tensors along a given dimension
In this case, 2 * (2, 1) -> (4, 1), dim=0.
*/
*/
bool
TestConcatenateSolely2
()
bool
TestConcatenateSolely2
()
{
{
...
@@ -183,14 +186,14 @@ bool TestConcatenateSolely2()
...
@@ -183,14 +186,14 @@ bool TestConcatenateSolely2()
for
(
int
i
=
0
;
i
<
tOrder
;
i
++
)
for
(
int
i
=
0
;
i
<
tOrder
;
i
++
)
tUnitNum
*=
tDimSize
[
i
];
tUnitNum
*=
tDimSize
[
i
];
DTYPE
sData1
[
2
][
1
]
=
{
{
0.0
},
DTYPE
sData1
[
2
][
1
]
=
{
{
0.0
F
},
{
1.0
}
};
{
1.0
F
}
};
DTYPE
sData2
[
2
][
1
]
=
{
{
2.0
},
DTYPE
sData2
[
2
][
1
]
=
{
{
2.0
F
},
{
3.0
}
};
{
3.0
F
}
};
DTYPE
answer
[
4
][
1
]
=
{
{
0.0
},
DTYPE
answer
[
4
][
1
]
=
{
{
0.0
F
},
{
1.0
},
{
1.0
F
},
{
2.0
},
{
2.0
F
},
{
3.0
}
};
{
3.0
F
}
};
/* CPU test */
/* CPU test */
bool
cpuTest
=
true
;
bool
cpuTest
=
true
;
...
@@ -269,8 +272,9 @@ bool TestConcatenateSolely2()
...
@@ -269,8 +272,9 @@ bool TestConcatenateSolely2()
#endif // USE_CUDA
#endif // USE_CUDA
}
}
/* case 3: concatenate a list of tensors along a given dimension
/*
* In this case, (2, 1) + (2, 2) -> (2, 3), dim=1.
case 3: concatenate a list of tensors along a given dimension
In this case, (2, 1) + (2, 2) -> (2, 3), dim=1.
*/
*/
bool
TestConcatenateSolely3
()
bool
TestConcatenateSolely3
()
{
{
...
@@ -307,12 +311,12 @@ bool TestConcatenateSolely3()
...
@@ -307,12 +311,12 @@ bool TestConcatenateSolely3()
for
(
int
i
=
0
;
i
<
tOrder
;
i
++
)
for
(
int
i
=
0
;
i
<
tOrder
;
i
++
)
tUnitNum
*=
tDimSize
[
i
];
tUnitNum
*=
tDimSize
[
i
];
DTYPE
sData1
[
2
][
1
]
=
{
{
0.0
},
DTYPE
sData1
[
2
][
1
]
=
{
{
0.0
F
},
{
1.0
}
};
{
1.0
F
}
};
DTYPE
sData2
[
2
][
2
]
=
{
{
2.0
,
3.0
},
DTYPE
sData2
[
2
][
2
]
=
{
{
2.0
F
,
3.0
F
},
{
4.0
,
5.0
}
};
{
4.0
F
,
5.0
F
}
};
DTYPE
answer
[
2
][
3
]
=
{
{
0.0
,
2.0
,
3.0
},
DTYPE
answer
[
2
][
3
]
=
{
{
0.0
F
,
2.0
F
,
3.0
F
},
{
1.0
,
4.0
,
5.0
}
};
{
1.0
F
,
4.0
F
,
5.0
F
}
};
/* CPU test */
/* CPU test */
bool
cpuTest
=
true
;
bool
cpuTest
=
true
;
...
@@ -397,7 +401,6 @@ TODO!!
...
@@ -397,7 +401,6 @@ TODO!!
*/
*/
/* test for ConcatenateSolely Function */
/* test for ConcatenateSolely Function */
extern
"C"
bool
TestConcatenateSolely
()
bool
TestConcatenateSolely
()
{
{
XPRINT
(
0
,
stdout
,
"[TEST CONCATENATESOLELY] concatenate a list of tensors along a given dimension
\n
"
);
XPRINT
(
0
,
stdout
,
"[TEST CONCATENATESOLELY] concatenate a list of tensors along a given dimension
\n
"
);
...
...
source/test/TCopyIndexed.cpp
查看文件 @
0887fae1
...
@@ -22,9 +22,11 @@
...
@@ -22,9 +22,11 @@
#include "TCopyIndexed.h"
#include "TCopyIndexed.h"
namespace
nts
{
// namespace nts(NiuTrans.Tensor)
namespace
nts
{
// namespace nts(NiuTrans.Tensor)
/* case 1 copy indexed sub-tensors
* In this case, (3, 2, 3) -> (3, 2, 2), dim = 2, indexSize = 2,
/*
* srcIndex = [0, 2], tgtIndex = [0, 1], copyNum = 1.
case 1 copy indexed sub-tensors
In this case, (3, 2, 3) -> (3, 2, 2), dim = 2, indexSize = 2,
srcIndex = [0, 2], tgtIndex = [0, 1], copyNum = 1.
*/
*/
bool
TestCopyIndexed1
()
bool
TestCopyIndexed1
()
{
{
...
@@ -50,19 +52,19 @@ bool TestCopyIndexed1()
...
@@ -50,19 +52,19 @@ bool TestCopyIndexed1()
for
(
int
i
=
0
;
i
<
tOrder
;
i
++
)
for
(
int
i
=
0
;
i
<
tOrder
;
i
++
)
tUnitNum
*=
tDimSize
[
i
];
tUnitNum
*=
tDimSize
[
i
];
DTYPE
sData
[
3
][
2
][
3
]
=
{
{
{
0.0
,
-
1.0
,
2.0
},
DTYPE
sData
[
3
][
2
][
3
]
=
{
{
{
0.0
F
,
-
1.0
F
,
2.0
F
},
{
2.0
,
1.0
,
3.0
}
},
{
2.0
F
,
1.0
F
,
3.0
F
}
},
{
{
1.0
,
2.0
,
4.0
},
{
{
1.0
F
,
2.0
F
,
4.0
F
},
{
3.0
,
1.0
,
2.0
}},
{
3.0
F
,
1.0
F
,
2.0
F
}},
{
{
-
1.0
,
3.0
,
2.0
},
{
{
-
1.0
F
,
3.0
F
,
2.0
F
},
{
1.0
,
-
1.0
,
0.0
}
}
};
{
1.0
F
,
-
1.0
F
,
0.0
F
}
}
};
DTYPE
answer
[
3
][
2
][
2
]
=
{
{
{
0.0
,
2.0
},
DTYPE
answer
[
3
][
2
][
2
]
=
{
{
{
0.0
F
,
2.0
F
},
{
2.0
,
3.0
}
},
{
2.0
F
,
3.0
F
}
},
{
{
1.0
,
4.0
},
{
{
1.0
F
,
4.0
F
},
{
3.0
,
2.0
}},
{
3.0
F
,
2.0
F
}},
{
{
-
1.0
,
2.0
},
{
{
-
1.0
F
,
2.0
F
},
{
1.0
,
0.0
}
}
};
{
1.0
F
,
0.0
F
}
}
};
int
dim
=
2
;
int
dim
=
2
;
int
indexSize
=
2
;
int
indexSize
=
2
;
int
srcIndex
[
2
]
=
{
0
,
2
};
int
srcIndex
[
2
]
=
{
0
,
2
};
...
@@ -131,7 +133,6 @@ TODO!!
...
@@ -131,7 +133,6 @@ TODO!!
*/
*/
/* test for CopyIndexed Function */
/* test for CopyIndexed Function */
extern
"C"
bool
TestCopyIndexed
()
bool
TestCopyIndexed
()
{
{
XPRINT
(
0
,
stdout
,
"[TEST CopyIndexed] copy indexed sub-tensors
\n
"
);
XPRINT
(
0
,
stdout
,
"[TEST CopyIndexed] copy indexed sub-tensors
\n
"
);
...
...
source/test/TCopyValues.cpp
查看文件 @
0887fae1
...
@@ -23,6 +23,7 @@
...
@@ -23,6 +23,7 @@
#include "TCopyValues.h"
#include "TCopyValues.h"
namespace
nts
{
// namespace nts(NiuTrans.Tensor)
namespace
nts
{
// namespace nts(NiuTrans.Tensor)
/* case 1: copy tensor s to tensor t */
/* case 1: copy tensor s to tensor t */
bool
TestCopyValues1
()
bool
TestCopyValues1
()
{
{
...
@@ -36,11 +37,11 @@ bool TestCopyValues1()
...
@@ -36,11 +37,11 @@ bool TestCopyValues1()
for
(
int
i
=
0
;
i
<
sOrder
;
i
++
)
for
(
int
i
=
0
;
i
<
sOrder
;
i
++
)
sUnitNum
*=
sDimSize
[
i
];
sUnitNum
*=
sDimSize
[
i
];
DTYPE
sData
[
2
][
4
]
=
{
{
0.0
,
1.0
,
2.0
,
3.0
},
DTYPE
sData
[
2
][
4
]
=
{
{
0.0
F
,
1.0
F
,
2.0
F
,
3.0
F
},
{
4.0
,
5.0
,
6.0
,
7.0
}
};
{
4.0
F
,
5.0
F
,
6.0
F
,
7.0
F
}
};
DTYPE
scaleFactor
=
2.0
;
DTYPE
scaleFactor
=
2.0
F
;
DTYPE
shiftFactor
=
0.5
;
DTYPE
shiftFactor
=
0.5
F
;
/* CPU test */
/* CPU test */
bool
cpuTest
=
true
;
bool
cpuTest
=
true
;
...
@@ -105,7 +106,6 @@ TODO!!
...
@@ -105,7 +106,6 @@ TODO!!
*/
*/
/* test for CopyValues Function */
/* test for CopyValues Function */
extern
"C"
bool
TestCopyValues
()
bool
TestCopyValues
()
{
{
XPRINT
(
0
,
stdout
,
"[TEST CopyValues] copy tensor s to tensor t
\n
"
);
XPRINT
(
0
,
stdout
,
"[TEST CopyValues] copy tensor s to tensor t
\n
"
);
...
...
source/test/THardTanH.cpp
查看文件 @
0887fae1
...
@@ -22,10 +22,11 @@
...
@@ -22,10 +22,11 @@
#include "THardTanH.h"
#include "THardTanH.h"
namespace
nts
{
// namespace nts(NiuTrans.Tensor)
namespace
nts
{
// namespace nts(NiuTrans.Tensor)
/* case 1: hard tanh function */
/* case 1: hard tanh function */
bool
TestHardTanH1
()
bool
TestHardTanH1
()
{
{
/* a x tensor of size
2 * 3
*/
/* a x tensor of size
(2, 3)
*/
int
xOrder
=
2
;
int
xOrder
=
2
;
int
*
xDimSize
=
new
int
[
xOrder
];
int
*
xDimSize
=
new
int
[
xOrder
];
xDimSize
[
0
]
=
2
;
xDimSize
[
0
]
=
2
;
...
@@ -35,7 +36,7 @@ bool TestHardTanH1()
...
@@ -35,7 +36,7 @@ bool TestHardTanH1()
for
(
int
i
=
0
;
i
<
xOrder
;
i
++
)
for
(
int
i
=
0
;
i
<
xOrder
;
i
++
)
xUnitNum
*=
xDimSize
[
i
];
xUnitNum
*=
xDimSize
[
i
];
/* a y tensor of size
2 * 3
*/
/* a y tensor of size
(2, 3)
*/
int
yOrder
=
2
;
int
yOrder
=
2
;
int
*
yDimSize
=
new
int
[
yOrder
];
int
*
yDimSize
=
new
int
[
yOrder
];
yDimSize
[
0
]
=
2
;
yDimSize
[
0
]
=
2
;
...
@@ -45,10 +46,10 @@ bool TestHardTanH1()
...
@@ -45,10 +46,10 @@ bool TestHardTanH1()
for
(
int
i
=
0
;
i
<
yOrder
;
i
++
)
for
(
int
i
=
0
;
i
<
yOrder
;
i
++
)
yUnitNum
*=
yDimSize
[
i
];
yUnitNum
*=
yDimSize
[
i
];
DTYPE
xData
[
2
][
3
]
=
{
{
0.5
,
-
1.0
,
2.0
},
DTYPE
xData
[
2
][
3
]
=
{
{
0.5
F
,
-
1.0
F
,
2.0
F
},
{
3.5
,
-
4.5
,
1.0
}
};
{
3.5
F
,
-
4.5
F
,
1.0
F
}
};
DTYPE
answer
[
2
][
3
]
=
{
{
0.5
,
-
1.0
,
1.0
},
DTYPE
answer
[
2
][
3
]
=
{
{
0.5
F
,
-
1.0
F
,
1.0
F
},
{
1.0
,
-
1.0
,
1.0
}
};
{
1.0
F
,
-
1.0
F
,
1.0
F
}
};
/* CPU test */
/* CPU test */
bool
cpuTest
=
true
;
bool
cpuTest
=
true
;
...
@@ -86,25 +87,32 @@ bool TestHardTanH1()
...
@@ -86,25 +87,32 @@ bool TestHardTanH1()
gpuTest
=
yGPU
->
CheckData
(
answer
,
yUnitNum
,
1e-4
F
);
gpuTest
=
yGPU
->
CheckData
(
answer
,
yUnitNum
,
1e-4
F
);
/* destroy variables */
/* destroy variables */
delete
x
,
y
,
xGPU
,
yGPU
;
delete
x
;
delete
[]
xDimSize
,
yDimSize
;
delete
y
;
delete
xGPU
;
delete
yGPU
;
delete
[]
xDimSize
;
delete
[]
yDimSize
;
return
cpuTest
&&
gpuTest
;
return
cpuTest
&&
gpuTest
;
#else
#else
/* destroy variables */
/* destroy variables */
delete
x
,
y
;
delete
x
;
delete
[]
xDimSize
,
yDimSize
;
delete
y
;
delete
[]
xDimSize
;
delete
[]
yDimSize
;
return
cpuTest
;
return
cpuTest
;
#endif // USE_CUDA
#endif // USE_CUDA
}
}
/* case 2: backward computation
/*
* In this case, lossName=CROSSENTROPY.
case 2: backward computation
In this case, lossName=CROSSENTROPY.
*/
*/
bool
TestHardTanH2
()
bool
TestHardTanH2
()
{
{
/* a x tensor of size
2 * 3
*/
/* a x tensor of size
(2, 3)
*/
int
xOrder
=
2
;
int
xOrder
=
2
;
int
*
xDimSize
=
new
int
[
xOrder
];
int
*
xDimSize
=
new
int
[
xOrder
];
xDimSize
[
0
]
=
2
;
xDimSize
[
0
]
=
2
;
...
@@ -114,7 +122,7 @@ bool TestHardTanH2()
...
@@ -114,7 +122,7 @@ bool TestHardTanH2()
for
(
int
i
=
0
;
i
<
xOrder
;
i
++
)
for
(
int
i
=
0
;
i
<
xOrder
;
i
++
)
xUnitNum
*=
xDimSize
[
i
];
xUnitNum
*=
xDimSize
[
i
];
/* a y tensor of size
2 * 3
*/
/* a y tensor of size
(2, 3)
*/
int
yOrder
=
2
;
int
yOrder
=
2
;
int
*
yDimSize
=
new
int
[
yOrder
];
int
*
yDimSize
=
new
int
[
yOrder
];
yDimSize
[
0
]
=
2
;
yDimSize
[
0
]
=
2
;
...
@@ -124,7 +132,7 @@ bool TestHardTanH2()
...
@@ -124,7 +132,7 @@ bool TestHardTanH2()
for
(
int
i
=
0
;
i
<
yOrder
;
i
++
)
for
(
int
i
=
0
;
i
<
yOrder
;
i
++
)
yUnitNum
*=
yDimSize
[
i
];
yUnitNum
*=
yDimSize
[
i
];
/* a gold tensor of size
2 * 3
*/
/* a gold tensor of size
(2, 3)
*/
int
goldOrder
=
2
;
int
goldOrder
=
2
;
int
*
goldDimSize
=
new
int
[
goldOrder
];
int
*
goldDimSize
=
new
int
[
goldOrder
];
goldDimSize
[
0
]
=
2
;
goldDimSize
[
0
]
=
2
;
...
@@ -134,7 +142,7 @@ bool TestHardTanH2()
...
@@ -134,7 +142,7 @@ bool TestHardTanH2()
for
(
int
i
=
0
;
i
<
goldOrder
;
i
++
)
for
(
int
i
=
0
;
i
<
goldOrder
;
i
++
)
goldUnitNum
*=
goldDimSize
[
i
];
goldUnitNum
*=
goldDimSize
[
i
];
/* a dedy tensor of size
2 * 3
*/
/* a dedy tensor of size
(2, 3)
*/
int
dedyOrder
=
2
;
int
dedyOrder
=
2
;
int
*
dedyDimSize
=
new
int
[
dedyOrder
];
int
*
dedyDimSize
=
new
int
[
dedyOrder
];
dedyDimSize
[
0
]
=
2
;
dedyDimSize
[
0
]
=
2
;
...
@@ -144,7 +152,7 @@ bool TestHardTanH2()
...
@@ -144,7 +152,7 @@ bool TestHardTanH2()
for
(
int
i
=
0
;
i
<
dedyOrder
;
i
++
)
for
(
int
i
=
0
;
i
<
dedyOrder
;
i
++
)
dedyUnitNum
*=
dedyDimSize
[
i
];
dedyUnitNum
*=
dedyDimSize
[
i
];
/* a dedx tensor of size
2 * 3
*/
/* a dedx tensor of size
(2, 3)
*/
int
dedxOrder
=
2
;
int
dedxOrder
=
2
;
int
*
dedxDimSize
=
new
int
[
dedxOrder
];
int
*
dedxDimSize
=
new
int
[
dedxOrder
];
dedxDimSize
[
0
]
=
2
;
dedxDimSize
[
0
]
=
2
;
...
@@ -154,16 +162,16 @@ bool TestHardTanH2()
...
@@ -154,16 +162,16 @@ bool TestHardTanH2()
for
(
int
i
=
0
;
i
<
dedxOrder
;
i
++
)
for
(
int
i
=
0
;
i
<
dedxOrder
;
i
++
)
dedxUnitNum
*=
dedxDimSize
[
i
];
dedxUnitNum
*=
dedxDimSize
[
i
];
DTYPE
xData
[
2
][
3
]
=
{
{
0.5
,
-
1.0
,
2.0
},
DTYPE
xData
[
2
][
3
]
=
{
{
0.5
F
,
-
1.0
F
,
2.0
F
},
{
3.5
,
-
4.5
,
1.0
}
};
{
3.5
F
,
-
4.5
F
,
1.0
F
}
};
DTYPE
yData
[
2
][
3
]
=
{
{
0.5
,
-
1.0
,
1.0
},
DTYPE
yData
[
2
][
3
]
=
{
{
0.5
F
,
-
1.0
F
,
1.0
F
},
{
1.0
,
-
1.0
,
1.0
}
};
{
1.0
F
,
-
1.0
F
,
1.0
F
}
};
DTYPE
goldData
[
2
][
3
]
=
{
{
1.0
,
1.0
,
1.0
},
DTYPE
goldData
[
2
][
3
]
=
{
{
1.0
F
,
1.0
F
,
1.0
F
},
{
1.0
,
1.0
,
1.0
}
};
{
1.0
F
,
1.0
F
,
1.0
F
}
};
DTYPE
dedyData
[
2
][
3
]
=
{
{
-
2.0
,
1.0
,
-
1.0
},
DTYPE
dedyData
[
2
][
3
]
=
{
{
-
2.0
F
,
1.0
F
,
-
1.0
F
},
{
-
1.0
,
1.0
,
-
1.0
}
};
{
-
1.0
F
,
1.0
F
,
-
1.0
F
}
};
DTYPE
answer
[
2
][
3
]
=
{
{
-
2.0
,
1.0
,
0.0
},
DTYPE
answer
[
2
][
3
]
=
{
{
-
2.0
F
,
1.0
F
,
0.0
F
},
{
0.0
,
0.0
,
-
1.0
}
};
{
0.0
F
,
0.0
F
,
-
1.0
F
}
};
/* CPU test */
/* CPU test */
bool
cpuTest
=
true
;
bool
cpuTest
=
true
;
...
@@ -226,12 +234,13 @@ bool TestHardTanH2()
...
@@ -226,12 +234,13 @@ bool TestHardTanH2()
#endif // USE_CUDA
#endif // USE_CUDA
}
}
/* case 3: backward computation
/*
* In this case, lossName=SQUAREDERROR.
case 3: backward computation
In this case, lossName=SQUAREDERROR.
*/
*/
bool
TestHardTanH3
()
bool
TestHardTanH3
()
{
{
/* a x tensor of size
2 * 3
*/
/* a x tensor of size
(2, 3)
*/
int
xOrder
=
2
;
int
xOrder
=
2
;
int
*
xDimSize
=
new
int
[
xOrder
];
int
*
xDimSize
=
new
int
[
xOrder
];
xDimSize
[
0
]
=
2
;
xDimSize
[
0
]
=
2
;
...
@@ -241,7 +250,7 @@ bool TestHardTanH3()
...
@@ -241,7 +250,7 @@ bool TestHardTanH3()
for
(
int
i
=
0
;
i
<
xOrder
;
i
++
)
for
(
int
i
=
0
;
i
<
xOrder
;
i
++
)
xUnitNum
*=
xDimSize
[
i
];
xUnitNum
*=
xDimSize
[
i
];
/* a y tensor of size
2 * 3
*/
/* a y tensor of size
(2, 3)
*/
int
yOrder
=
2
;
int
yOrder
=
2
;
int
*
yDimSize
=
new
int
[
yOrder
];
int
*
yDimSize
=
new
int
[
yOrder
];
yDimSize
[
0
]
=
2
;
yDimSize
[
0
]
=
2
;
...
@@ -251,7 +260,7 @@ bool TestHardTanH3()
...
@@ -251,7 +260,7 @@ bool TestHardTanH3()
for
(
int
i
=
0
;
i
<
yOrder
;
i
++
)
for
(
int
i
=
0
;
i
<
yOrder
;
i
++
)
yUnitNum
*=
yDimSize
[
i
];
yUnitNum
*=
yDimSize
[
i
];
/* a gold tensor of size
2 * 3
*/
/* a gold tensor of size
(2, 3)
*/
int
goldOrder
=
2
;
int
goldOrder
=
2
;
int
*
goldDimSize
=
new
int
[
goldOrder
];
int
*
goldDimSize
=
new
int
[
goldOrder
];
goldDimSize
[
0
]
=
2
;
goldDimSize
[
0
]
=
2
;
...
@@ -261,7 +270,7 @@ bool TestHardTanH3()
...
@@ -261,7 +270,7 @@ bool TestHardTanH3()
for
(
int
i
=
0
;
i
<
goldOrder
;
i
++
)
for
(
int
i
=
0
;
i
<
goldOrder
;
i
++
)
goldUnitNum
*=
goldDimSize
[
i
];
goldUnitNum
*=
goldDimSize
[
i
];
/* a dedy tensor of size
2 * 3
*/
/* a dedy tensor of size
(2, 3)
*/
int
dedyOrder
=
2
;
int
dedyOrder
=
2
;
int
*
dedyDimSize
=
new
int
[
dedyOrder
];
int
*
dedyDimSize
=
new
int
[
dedyOrder
];
dedyDimSize
[
0
]
=
2
;
dedyDimSize
[
0
]
=
2
;
...
@@ -271,7 +280,7 @@ bool TestHardTanH3()
...
@@ -271,7 +280,7 @@ bool TestHardTanH3()
for
(
int
i
=
0
;
i
<
dedyOrder
;
i
++
)
for
(
int
i
=
0
;
i
<
dedyOrder
;
i
++
)
dedyUnitNum
*=
dedyDimSize
[
i
];
dedyUnitNum
*=
dedyDimSize
[
i
];
/* a dedx tensor of size
2 * 3
*/
/* a dedx tensor of size
(2, 3)
*/
int
dedxOrder
=
2
;
int
dedxOrder
=
2
;
int
*
dedxDimSize
=
new
int
[
dedxOrder
];
int
*
dedxDimSize
=
new
int
[
dedxOrder
];
dedxDimSize
[
0
]
=
2
;
dedxDimSize
[
0
]
=
2
;
...
@@ -281,16 +290,16 @@ bool TestHardTanH3()
...
@@ -281,16 +290,16 @@ bool TestHardTanH3()
for
(
int
i
=
0
;
i
<
dedxOrder
;
i
++
)
for
(
int
i
=
0
;
i
<
dedxOrder
;
i
++
)
dedxUnitNum
*=
dedxDimSize
[
i
];
dedxUnitNum
*=
dedxDimSize
[
i
];
DTYPE
xData
[
2
][
3
]
=
{
{
0.5
,
-
1.0
,
2.0
},
DTYPE
xData
[
2
][
3
]
=
{
{
0.5
F
,
-
1.0
F
,
2.0
F
},
{
3.5
,
-
4.5
,
1.0
}
};
{
3.5
F
,
-
4.5
F
,
1.0
F
}
};
DTYPE
yData
[
2
][
3
]
=
{
{
0.5
,
-
1.0
,
1.0
},
DTYPE
yData
[
2
][
3
]
=
{
{
0.5
F
,
-
1.0
F
,
1.0
F
},
{
1.0
,
-
1.0
,
1.0
}
};
{
1.0
F
,
-
1.0
F
,
1.0
F
}
};
DTYPE
goldData
[
2
][
3
]
=
{
{
1.0
,
1.0
,
1.0
},
DTYPE
goldData
[
2
][
3
]
=
{
{
1.0
F
,
1.0
F
,
1.0
F
},
{
1.0
,
1.0
,
1.0
}
};
{
1.0
F
,
1.0
F
,
1.0
F
}
};
DTYPE
dedyData
[
2
][
3
]
=
{
{
-
0.5
,
-
2.0
,
0.0
},
DTYPE
dedyData
[
2
][
3
]
=
{
{
-
0.5
F
,
-
2.0
F
,
0.0
F
},
{
0.0
,
-
2.0
,
0.0
}
};
{
0.0
F
,
-
2.0
F
,
0.0
F
}
};
DTYPE
answer
[
2
][
3
]
=
{
{
-
0.5
,
-
2.0
,
0.0
},
DTYPE
answer
[
2
][
3
]
=
{
{
-
0.5
F
,
-
2.0
F
,
0.0
F
},
{
0.0
,
0.0
,
0.0
}
};
{
0.0
F
,
0.0
F
,
0.0
F
}
};
/* CPU test */
/* CPU test */
bool
cpuTest
=
true
;
bool
cpuTest
=
true
;
...
@@ -353,12 +362,13 @@ bool TestHardTanH3()
...
@@ -353,12 +362,13 @@ bool TestHardTanH3()
#endif // USE_CUDA
#endif // USE_CUDA
}
}
/* case 4: backward computation
/*
* In this case, lossName=ONEHOTERROR.
case 4: backward computation
In this case, lossName=ONEHOTERROR.
*/
*/
bool
TestHardTanH4
()
bool
TestHardTanH4
()
{
{
/* a x tensor of size
2 * 3
*/
/* a x tensor of size
(2, 3)
*/
int
xOrder
=
2
;
int
xOrder
=
2
;
int
*
xDimSize
=
new
int
[
xOrder
];
int
*
xDimSize
=
new
int
[
xOrder
];
xDimSize
[
0
]
=
2
;
xDimSize
[
0
]
=
2
;
...
@@ -368,7 +378,7 @@ bool TestHardTanH4()
...
@@ -368,7 +378,7 @@ bool TestHardTanH4()
for
(
int
i
=
0
;
i
<
xOrder
;
i
++
)
for
(
int
i
=
0
;
i
<
xOrder
;
i
++
)
xUnitNum
*=
xDimSize
[
i
];
xUnitNum
*=
xDimSize
[
i
];
/* a y tensor of size
2 * 3
*/
/* a y tensor of size
(2, 3)
*/
int
yOrder
=
2
;
int
yOrder
=
2
;
int
*
yDimSize
=
new
int
[
yOrder
];
int
*
yDimSize
=
new
int
[
yOrder
];
yDimSize
[
0
]
=
2
;
yDimSize
[
0
]
=
2
;
...
@@ -378,7 +388,7 @@ bool TestHardTanH4()
...
@@ -378,7 +388,7 @@ bool TestHardTanH4()
for
(
int
i
=
0
;
i
<
yOrder
;
i
++
)
for
(
int
i
=
0
;
i
<
yOrder
;
i
++
)
yUnitNum
*=
yDimSize
[
i
];
yUnitNum
*=
yDimSize
[
i
];
/* a gold tensor of size
2 * 3
*/
/* a gold tensor of size
(2, 3)
*/
int
goldOrder
=
2
;
int
goldOrder
=
2
;
int
*
goldDimSize
=
new
int
[
goldOrder
];
int
*
goldDimSize
=
new
int
[
goldOrder
];
goldDimSize
[
0
]
=
2
;
goldDimSize
[
0
]
=
2
;
...
@@ -388,7 +398,7 @@ bool TestHardTanH4()
...
@@ -388,7 +398,7 @@ bool TestHardTanH4()
for
(
int
i
=
0
;
i
<
goldOrder
;
i
++
)
for
(
int
i
=
0
;
i
<
goldOrder
;
i
++
)
goldUnitNum
*=
goldDimSize
[
i
];
goldUnitNum
*=
goldDimSize
[
i
];
/* a dedy tensor of size
2 * 3
*/
/* a dedy tensor of size
(2, 3)
*/
int
dedyOrder
=
2
;
int
dedyOrder
=
2
;
int
*
dedyDimSize
=
new
int
[
dedyOrder
];
int
*
dedyDimSize
=
new
int
[
dedyOrder
];
dedyDimSize
[
0
]
=
2
;
dedyDimSize
[
0
]
=
2
;
...
@@ -398,7 +408,7 @@ bool TestHardTanH4()
...
@@ -398,7 +408,7 @@ bool TestHardTanH4()
for
(
int
i
=
0
;
i
<
dedyOrder
;
i
++
)
for
(
int
i
=
0
;
i
<
dedyOrder
;
i
++
)
dedyUnitNum
*=
dedyDimSize
[
i
];
dedyUnitNum
*=
dedyDimSize
[
i
];
/* a dedx tensor of size
2 * 3
*/
/* a dedx tensor of size
(2, 3)
*/
int
dedxOrder
=
2
;
int
dedxOrder
=
2
;
int
*
dedxDimSize
=
new
int
[
dedxOrder
];
int
*
dedxDimSize
=
new
int
[
dedxOrder
];
dedxDimSize
[
0
]
=
2
;
dedxDimSize
[
0
]
=
2
;
...
@@ -408,16 +418,16 @@ bool TestHardTanH4()
...
@@ -408,16 +418,16 @@ bool TestHardTanH4()
for
(
int
i
=
0
;
i
<
dedxOrder
;
i
++
)
for
(
int
i
=
0
;
i
<
dedxOrder
;
i
++
)
dedxUnitNum
*=
dedxDimSize
[
i
];
dedxUnitNum
*=
dedxDimSize
[
i
];
DTYPE
xData
[
2
][
3
]
=
{
{
0.5
,
-
1.0
,
2.0
},
DTYPE
xData
[
2
][
3
]
=
{
{
0.5
F
,
-
1.0
F
,
2.0
F
},
{
3.5
,
-
4.5
,
1.0
}
};
{
3.5
F
,
-
4.5
F
,
1.0
F
}
};
DTYPE
yData
[
2
][
3
]
=
{
{
0.5
,
-
1.0
,
1.0
},
DTYPE
yData
[
2
][
3
]
=
{
{
0.5
F
,
-
1.0
F
,
1.0
F
},
{
1.0
,
-
1.0
,
1.0
}
};
{
1.0
F
,
-
1.0
F
,
1.0
F
}
};
DTYPE
goldData
[
2
][
3
]
=
{
{
1.0
,
0.0
,
1.0
},
DTYPE
goldData
[
2
][
3
]
=
{
{
1.0
F
,
0.0
F
,
1.0
F
},
{
0.0
,
1.0
,
1.0
}
};
{
0.0
F
,
1.0
F
,
1.0
F
}
};
DTYPE
dedyData
[
2
][
3
]
=
{
{
-
0.5
,
0.0
,
0.0
},
DTYPE
dedyData
[
2
][
3
]
=
{
{
-
0.5
F
,
0.0
F
,
0.0
F
},
{
0.0
,
-
2.0
,
0.0
}
};
{
0.0
F
,
-
2.0
F
,
0.0
F
}
};
DTYPE
answer
[
2
][
3
]
=
{
{
-
0.5
,
0.0
,
0.0
},
DTYPE
answer
[
2
][
3
]
=
{
{
-
0.5
F
,
0.0
F
,
0.0
F
},
{
0.0
,
0.0
,
0.0
}
};
{
0.0
F
,
0.0
F
,
0.0
F
}
};
/* CPU test */
/* CPU test */
bool
cpuTest
=
true
;
bool
cpuTest
=
true
;
...
@@ -486,10 +496,9 @@ TODO!!
...
@@ -486,10 +496,9 @@ TODO!!
*/
*/
/* test for HardTanH Function */
/* test for HardTanH Function */
extern
"C"
bool
TestHardTanH
()
bool
TestHardTanH
()
{
{
XPRINT
(
0
,
stdout
,
"[TEST HARDTANH]
-------------
\n
"
);
XPRINT
(
0
,
stdout
,
"[TEST HARDTANH]
test hardtanh and its backward computation
\n
"
);
bool
returnFlag
=
true
,
caseFlag
=
true
;
bool
returnFlag
=
true
,
caseFlag
=
true
;
/* case 1 test */
/* case 1 test */
...
...
source/test/TIdentity.cpp
查看文件 @
0887fae1
...
@@ -23,8 +23,10 @@
...
@@ -23,8 +23,10 @@
#include "TIdentity.h"
#include "TIdentity.h"
namespace
nts
{
// namespace nts(NiuTrans.Tensor)
namespace
nts
{
// namespace nts(NiuTrans.Tensor)
/* case 1: test Identity function.
* Identity function: y = x
/*
case 1: test Identity function.
Identity function: y = x
*/
*/
bool
TestIdentity1
()
bool
TestIdentity1
()
{
{
...
@@ -38,10 +40,10 @@ bool TestIdentity1()
...
@@ -38,10 +40,10 @@ bool TestIdentity1()
for
(
int
i
=
0
;
i
<
sOrder
;
i
++
)
for
(
int
i
=
0
;
i
<
sOrder
;
i
++
)
sUnitNum
*=
sDimSize
[
i
];
sUnitNum
*=
sDimSize
[
i
];
DTYPE
xData
[
2
][
3
]
=
{
{
0.0
,
1.0
,
2.0
},
DTYPE
xData
[
2
][
3
]
=
{
{
0.0
F
,
1.0
F
,
2.0
F
},
{
0.5
,
0.7
,
1.4
}
};
{
0.5
F
,
0.7
F
,
1.4
F
}
};
DTYPE
answer
[
2
][
3
]
=
{
{
0.0
,
1.0
,
2.0
},
DTYPE
answer
[
2
][
3
]
=
{
{
0.0
F
,
1.0
F
,
2.0
F
},
{
0.5
,
0.7
,
1.4
}
};
{
0.5
F
,
0.7
F
,
1.4
F
}
};
/* CPU test */
/* CPU test */
bool
cpuTest
=
true
;
bool
cpuTest
=
true
;
...
@@ -93,8 +95,9 @@ bool TestIdentity1()
...
@@ -93,8 +95,9 @@ bool TestIdentity1()
#endif // USE_CUDA
#endif // USE_CUDA
}
}
/* case 2: test IdentityBackward function.
/*
* IdentityBackward function: dE/dx = dE/dy * dy/dx = dE/dy
case 2: test IdentityBackward function.
IdentityBackward function: dE/dx = dE/dy * dy/dx = dE/dy
*/
*/
bool
TestIdentity2
()
bool
TestIdentity2
()
{
{
...
@@ -107,9 +110,9 @@ bool TestIdentity2()
...
@@ -107,9 +110,9 @@ bool TestIdentity2()
for
(
int
i
=
0
;
i
<
sOrder
;
i
++
)
for
(
int
i
=
0
;
i
<
sOrder
;
i
++
)
sUnitNum
*=
sDimSize
[
i
];
sUnitNum
*=
sDimSize
[
i
];
DTYPE
xData
[
1
][
3
]
=
{
{
0.0
,
1.0
,
2.0
}
};
DTYPE
xData
[
1
][
3
]
=
{
{
0.0
F
,
1.0
F
,
2.0
F
}
};
DTYPE
gData
[
1
][
3
]
=
{
{
0.0
,
0.0
,
1.0
}
};
DTYPE
gData
[
1
][
3
]
=
{
{
0.0
F
,
0.0
F
,
1.0
F
}
};
DTYPE
dedxAnswer
[
3
]
=
{
0.090031
,
0.244728
,
-
0.334759
};
DTYPE
dedxAnswer
[
3
]
=
{
0.090031
F
,
0.244728
F
,
-
0.334759
F
};
/* CPU test */
/* CPU test */
bool
cpuTest
=
true
;
bool
cpuTest
=
true
;
...
@@ -135,7 +138,7 @@ bool TestIdentity2()
...
@@ -135,7 +138,7 @@ bool TestIdentity2()
IdentityBackward
(
g
,
y
,
x
,
dedy
,
dedx
,
CROSSENTROPY
);
IdentityBackward
(
g
,
y
,
x
,
dedy
,
dedx
,
CROSSENTROPY
);
/* check result */
/* check result */
cpuTest
=
dedx
->
CheckData
(
dedxAnswer
,
sUnitNum
);
cpuTest
=
dedx
->
CheckData
(
dedxAnswer
,
sUnitNum
,
1e-4
F
);
#ifdef USE_CUDA
#ifdef USE_CUDA
/* GPU test */
/* GPU test */
...
@@ -162,7 +165,7 @@ bool TestIdentity2()
...
@@ -162,7 +165,7 @@ bool TestIdentity2()
IdentityBackward
(
gGPU
,
yGPU
,
xGPU
,
dedyGPU
,
dedxGPU
,
CROSSENTROPY
);
IdentityBackward
(
gGPU
,
yGPU
,
xGPU
,
dedyGPU
,
dedxGPU
,
CROSSENTROPY
);
/* check result */
/* check result */
gpuTest
=
dedxGPU
->
CheckData
(
dedxAnswer
,
sUnitNum
);
gpuTest
=
dedxGPU
->
CheckData
(
dedxAnswer
,
sUnitNum
,
1e-4
F
);
/* destroy variables */
/* destroy variables */
delete
x
;
delete
x
;
...
@@ -197,7 +200,6 @@ bool TestIdentity2()
...
@@ -197,7 +200,6 @@ bool TestIdentity2()
*/
*/
/* test for Identity Function */
/* test for Identity Function */
extern
"C"
bool
TestIdentity
()
bool
TestIdentity
()
{
{
XPRINT
(
0
,
stdout
,
"[TEST Identity] identity function and its backward computation
\n
"
);
XPRINT
(
0
,
stdout
,
"[TEST Identity] identity function and its backward computation
\n
"
);
...
@@ -213,15 +215,15 @@ bool TestIdentity()
...
@@ -213,15 +215,15 @@ bool TestIdentity()
else
else
XPRINT
(
0
,
stdout
,
">> case 1 passed!
\n
"
);
XPRINT
(
0
,
stdout
,
">> case 1 passed!
\n
"
);
/
//
* case 2 test */
/* case 2 test */
//
caseFlag = TestIdentity2();
caseFlag
=
TestIdentity2
();
//
if (!caseFlag) {
if
(
!
caseFlag
)
{
//
returnFlag = false;
returnFlag
=
false
;
//
XPRINT(0, stdout, ">> case 2 failed!\n");
XPRINT
(
0
,
stdout
,
">> case 2 failed!
\n
"
);
//
}
}
//
else
else
//
XPRINT(0, stdout, ">> case 2 passed!\n");
XPRINT
(
0
,
stdout
,
">> case 2 passed!
\n
"
);
/* other cases test */
/* other cases test */
/*
/*
...
...
source/test/TLogSoftmax.cpp
查看文件 @
0887fae1
...
@@ -23,8 +23,10 @@
...
@@ -23,8 +23,10 @@
#include "TLogSoftmax.h"
#include "TLogSoftmax.h"
namespace
nts
{
// namespace nts(NiuTrans.Tensor)
namespace
nts
{
// namespace nts(NiuTrans.Tensor)
/* case 1: test LogSoftmax function.
* LogSoftmax function: y = log(e^x / \sum_{i} e^{x_i})
/*
case 1: test LogSoftmax function.
LogSoftmax function: y = log(e^x / \sum_{i} e^{x_i})
*/
*/
bool
TestLogSoftmax1
()
bool
TestLogSoftmax1
()
{
{
...
@@ -38,10 +40,10 @@ bool TestLogSoftmax1()
...
@@ -38,10 +40,10 @@ bool TestLogSoftmax1()
for
(
int
i
=
0
;
i
<
sOrder
;
i
++
)
for
(
int
i
=
0
;
i
<
sOrder
;
i
++
)
sUnitNum
*=
sDimSize
[
i
];
sUnitNum
*=
sDimSize
[
i
];
DTYPE
xData
[
2
][
3
]
=
{
{
0.0
,
1.0
,
2.0
},
DTYPE
xData
[
2
][
3
]
=
{
{
0.0
F
,
1.0
F
,
2.0
F
},
{
0.5
,
0.7
,
1.4
}
};
{
0.5
F
,
0.7
F
,
1.4
F
}
};
DTYPE
answer
[
2
][
3
]
=
{
{
-
2.4076
,
-
1.4076
,
-
0.4076
},
DTYPE
answer
[
2
][
3
]
=
{
{
-
2.4076
F
,
-
1.4076
F
,
-
0.4076
F
},
{
-
1.5435
,
-
1.3435
,
-
0.6435
}
};
{
-
1.5435
F
,
-
1.3435
F
,
-
0.6435
F
}
};
/* CPU test */
/* CPU test */
bool
cpuTest
=
true
;
bool
cpuTest
=
true
;
...
@@ -58,7 +60,7 @@ bool TestLogSoftmax1()
...
@@ -58,7 +60,7 @@ bool TestLogSoftmax1()
LogSoftmax
(
x
,
y
,
1
);
LogSoftmax
(
x
,
y
,
1
);
/* check result */
/* check result */
cpuTest
=
y
->
CheckData
(
answer
,
sUnitNum
);
cpuTest
=
y
->
CheckData
(
answer
,
sUnitNum
,
1e-4
F
);
#ifdef USE_CUDA
#ifdef USE_CUDA
/* GPU test */
/* GPU test */
...
@@ -76,7 +78,7 @@ bool TestLogSoftmax1()
...
@@ -76,7 +78,7 @@ bool TestLogSoftmax1()
LogSoftmax
(
xGPU
,
yGPU
,
1
);
LogSoftmax
(
xGPU
,
yGPU
,
1
);
/* check result */
/* check result */
gpuTest
=
yGPU
->
CheckData
(
answer
,
sUnitNum
);
gpuTest
=
yGPU
->
CheckData
(
answer
,
sUnitNum
,
1e-4
F
);
/* destroy variables */
/* destroy variables */
delete
x
;
delete
x
;
...
@@ -97,9 +99,10 @@ bool TestLogSoftmax1()
...
@@ -97,9 +99,10 @@ bool TestLogSoftmax1()
#endif // USE_CUDA
#endif // USE_CUDA
}
}
/* case 2: test LogSoftmaxBackward function.
/*
* dE/dx = dE/dy * dy/dx
case 2: test LogSoftmaxBackward function.
* log softmax: y_i = log(e^{x_i} / \sum_{k} e^{x_k})
dE/dx = dE/dy * dy/dx
log softmax: y_i = log(e^{x_i} / \sum_{k} e^{x_k})
*/
*/
bool
TestLogSoftmax2
()
bool
TestLogSoftmax2
()
{
{
...
@@ -112,10 +115,10 @@ bool TestLogSoftmax2()
...
@@ -112,10 +115,10 @@ bool TestLogSoftmax2()
for
(
int
i
=
0
;
i
<
sOrder
;
i
++
)
for
(
int
i
=
0
;
i
<
sOrder
;
i
++
)
sUnitNum
*=
sDimSize
[
i
];
sUnitNum
*=
sDimSize
[
i
];
DTYPE
xData
[
3
]
=
{
0.0
,
1.0
,
2.0
};
DTYPE
xData
[
3
]
=
{
0.0
F
,
1.0
F
,
2.0
F
};
DTYPE
gData
[
3
]
=
{
0.5
,
0.8
,
1.5
};
DTYPE
gData
[
3
]
=
{
0.5
F
,
0.8
F
,
1.5
F
};
DTYPE
yAnswer
[
3
]
=
{
-
2.4076
,
-
1.4076
,
-
0.4076
};
DTYPE
yAnswer
[
3
]
=
{
-
2.4076
F
,
-
1.4076
F
,
-
0.4076
F
};
DTYPE
dedxAnswer
[
3
]
=
{
-
0.409969
,
-
0.555272
,
-
0.834759
};
DTYPE
dedxAnswer
[
3
]
=
{
-
0.409969
F
,
-
0.555272
F
,
-
0.834759
F
};
/* CPU test */
/* CPU test */
bool
cpuTest
=
true
;
bool
cpuTest
=
true
;
...
@@ -141,7 +144,7 @@ bool TestLogSoftmax2()
...
@@ -141,7 +144,7 @@ bool TestLogSoftmax2()
LogSoftmaxBackward
(
g
,
y
,
x
,
dedy
,
dedx
,
0
,
CROSSENTROPY
);
LogSoftmaxBackward
(
g
,
y
,
x
,
dedy
,
dedx
,
0
,
CROSSENTROPY
);
/* check result */
/* check result */
cpuTest
=
y
->
CheckData
(
yAnswer
,
sUnitNum
)
&&
dedx
->
CheckData
(
dedxAnswer
,
sUnitNum
);
cpuTest
=
y
->
CheckData
(
yAnswer
,
sUnitNum
,
1e-4
F
)
&&
dedx
->
CheckData
(
dedxAnswer
,
sUnitNum
,
1e-4
F
);
#ifdef USE_CUDA
#ifdef USE_CUDA
/* GPU test */
/* GPU test */
...
@@ -168,7 +171,7 @@ bool TestLogSoftmax2()
...
@@ -168,7 +171,7 @@ bool TestLogSoftmax2()
LogSoftmaxBackward
(
gGPU
,
yGPU
,
xGPU
,
dedyGPU
,
dedxGPU
,
0
,
CROSSENTROPY
);
LogSoftmaxBackward
(
gGPU
,
yGPU
,
xGPU
,
dedyGPU
,
dedxGPU
,
0
,
CROSSENTROPY
);
/* check result */
/* check result */
gpuTest
=
yGPU
->
CheckData
(
yAnswer
,
sUnitNum
)
&&
dedxGPU
->
CheckData
(
dedxAnswer
,
sUnitNum
);
gpuTest
=
yGPU
->
CheckData
(
yAnswer
,
sUnitNum
,
1e-4
F
)
&&
dedxGPU
->
CheckData
(
dedxAnswer
,
sUnitNum
,
1e-4
F
);
/* destroy variables */
/* destroy variables */
delete
x
;
delete
x
;
...
@@ -197,9 +200,10 @@ bool TestLogSoftmax2()
...
@@ -197,9 +200,10 @@ bool TestLogSoftmax2()
#endif // USE_CUDA
#endif // USE_CUDA
}
}
/* case 3: test LogSoftmaxBackward function.
/*
* dE/dx = dE/dy * dy/dx
case 3: test LogSoftmaxBackward function.
* log softmax: y_i = log(e^{x_i} / \sum_{k} e^{x_k})
dE/dx = dE/dy * dy/dx
log softmax: y_i = log(e^{x_i} / \sum_{k} e^{x_k})
*/
*/
bool
TestLogSoftmax3
()
bool
TestLogSoftmax3
()
{
{
...
@@ -213,10 +217,10 @@ bool TestLogSoftmax3()
...
@@ -213,10 +217,10 @@ bool TestLogSoftmax3()
for
(
int
i
=
0
;
i
<
sOrder
;
i
++
)
for
(
int
i
=
0
;
i
<
sOrder
;
i
++
)
sUnitNum
*=
sDimSize
[
i
];
sUnitNum
*=
sDimSize
[
i
];
DTYPE
xData
[
1
][
3
]
=
{
{
0.0
,
1.0
,
2.0
}
};
DTYPE
xData
[
1
][
3
]
=
{
{
0.0
F
,
1.0
F
,
2.0
F
}
};
DTYPE
gData
[
1
][
3
]
=
{
{
0.5
,
0.8
,
1.5
}
};
DTYPE
gData
[
1
][
3
]
=
{
{
0.5
F
,
0.8
F
,
1.5
F
}
};
DTYPE
yAnswer
[
1
][
3
]
=
{
-
2.4076
,
-
1.4076
,
-
0.4076
};
DTYPE
yAnswer
[
1
][
3
]
=
{
-
2.4076
F
,
-
1.4076
F
,
-
0.4076
F
};
DTYPE
dedxAnswer
[
1
][
3
]
=
{
-
0.409969
,
-
0.555272
,
-
0.834759
};
DTYPE
dedxAnswer
[
1
][
3
]
=
{
-
0.409969
F
,
-
0.555272
F
,
-
0.834759
F
};
/* CPU test */
/* CPU test */
bool
cpuTest
=
true
;
bool
cpuTest
=
true
;
...
@@ -242,7 +246,7 @@ bool TestLogSoftmax3()
...
@@ -242,7 +246,7 @@ bool TestLogSoftmax3()
LogSoftmaxBackward
(
g
,
y
,
x
,
dedy
,
dedx
,
1
,
CROSSENTROPY
);
LogSoftmaxBackward
(
g
,
y
,
x
,
dedy
,
dedx
,
1
,
CROSSENTROPY
);
/* check result */
/* check result */
cpuTest
=
y
->
CheckData
(
yAnswer
,
sUnitNum
)
&&
dedx
->
CheckData
(
dedxAnswer
,
sUnitNum
);
cpuTest
=
y
->
CheckData
(
yAnswer
,
sUnitNum
,
1e-4
F
)
&&
dedx
->
CheckData
(
dedxAnswer
,
sUnitNum
,
1e-4
F
);
#ifdef USE_CUDA
#ifdef USE_CUDA
/* GPU test */
/* GPU test */
...
@@ -269,7 +273,7 @@ bool TestLogSoftmax3()
...
@@ -269,7 +273,7 @@ bool TestLogSoftmax3()
LogSoftmaxBackward
(
gGPU
,
yGPU
,
xGPU
,
dedyGPU
,
dedxGPU
,
1
,
CROSSENTROPY
);
LogSoftmaxBackward
(
gGPU
,
yGPU
,
xGPU
,
dedyGPU
,
dedxGPU
,
1
,
CROSSENTROPY
);
/* check result */
/* check result */
gpuTest
=
yGPU
->
CheckData
(
yAnswer
,
sUnitNum
)
&&
dedxGPU
->
CheckData
(
dedxAnswer
,
sUnitNum
);
gpuTest
=
yGPU
->
CheckData
(
yAnswer
,
sUnitNum
,
1e-4
F
)
&&
dedxGPU
->
CheckData
(
dedxAnswer
,
sUnitNum
,
1e-4
F
);
/* destroy variables */
/* destroy variables */
delete
x
;
delete
x
;
...
@@ -305,7 +309,6 @@ bool TestLogSoftmax3()
...
@@ -305,7 +309,6 @@ bool TestLogSoftmax3()
*/
*/
/* test for LogSoftmax Function */
/* test for LogSoftmax Function */
extern
"C"
bool
TestLogSoftmax
()
bool
TestLogSoftmax
()
{
{
XPRINT
(
0
,
stdout
,
"[TEST LogSoftmax] test log softmax function and its backward computation
\n
"
);
XPRINT
(
0
,
stdout
,
"[TEST LogSoftmax] test log softmax function and its backward computation
\n
"
);
...
@@ -321,15 +324,15 @@ bool TestLogSoftmax()
...
@@ -321,15 +324,15 @@ bool TestLogSoftmax()
else
else
XPRINT
(
0
,
stdout
,
">> case 1 passed!
\n
"
);
XPRINT
(
0
,
stdout
,
">> case 1 passed!
\n
"
);
/
//
* case 2 test */
/* case 2 test */
//
caseFlag = TestLogSoftmax2();
caseFlag
=
TestLogSoftmax2
();
//
if (!caseFlag) {
if
(
!
caseFlag
)
{
//
returnFlag = false;
returnFlag
=
false
;
//
XPRINT(0, stdout, ">> case 2 failed!\n");
XPRINT
(
0
,
stdout
,
">> case 2 failed!
\n
"
);
//
}
}
//
else
else
//
XPRINT(0, stdout, ">> case 2 passed!\n");
XPRINT
(
0
,
stdout
,
">> case 2 passed!
\n
"
);
/* case 3 test */
/* case 3 test */
caseFlag
=
TestLogSoftmax3
();
caseFlag
=
TestLogSoftmax3
();
...
...
source/test/TLoss.cpp
查看文件 @
0887fae1
...
@@ -23,10 +23,12 @@
...
@@ -23,10 +23,12 @@
#include "../function/Loss.h"
#include "../function/Loss.h"
namespace
nts
{
// namespace nts(NiuTrans.Tensor)
namespace
nts
{
// namespace nts(NiuTrans.Tensor)
/* case 1: test LossCompute function
* In this case, Loss function name = SQUAREDERROR.
/*
* loss = sum_{i} 0.5*(t_i - y_i)^2,
case 1: test LossCompute function
* where t_i is the gold standard and y_i is the model output
In this case, Loss function name = SQUAREDERROR.
loss = sum_{i} 0.5*(t_i - y_i)^2,
where t_i is the gold standard and y_i is the model output
*/
*/
bool
TestLoss1
()
bool
TestLoss1
()
{
{
...
@@ -99,10 +101,11 @@ bool TestLoss1()
...
@@ -99,10 +101,11 @@ bool TestLoss1()
#endif // USE_CUDA
#endif // USE_CUDA
}
}
/* case 2: test LossCompute function
/*
* In this case, Loss function name = CROSSENTROPY.
case 2: test LossCompute function
* loss = sum_{i} (-t_i * log(y_i))
In this case, Loss function name = CROSSENTROPY.
* where t_i is the gold standard and y_i is the model output
loss = sum_{i} (-t_i * log(y_i))
where t_i is the gold standard and y_i is the model output
*/
*/
bool
TestLoss2
()
bool
TestLoss2
()
{
{
...
@@ -175,10 +178,11 @@ bool TestLoss2()
...
@@ -175,10 +178,11 @@ bool TestLoss2()
#endif // USE_CUDA
#endif // USE_CUDA
}
}
/* case 3: test LossCompute function
/*
* In this case, Loss function name = ONEHOTERROR.
case 3: test LossCompute function
* loss = sum_{i} e_i
In this case, Loss function name = ONEHOTERROR.
* where e_i = 0.5*(t_i - y_i)^2 if t_i = 1, e_i = 0 otherwise
loss = sum_{i} e_i
where e_i = 0.5*(t_i - y_i)^2 if t_i = 1, e_i = 0 otherwise
*/
*/
bool
TestLoss3
()
bool
TestLoss3
()
{
{
...
@@ -191,16 +195,16 @@ bool TestLoss3()
...
@@ -191,16 +195,16 @@ bool TestLoss3()
int
unitNum
=
1
;
int
unitNum
=
1
;
for
(
int
i
=
0
;
i
<
order
;
i
++
)
for
(
int
i
=
0
;
i
<
order
;
i
++
)
unitNum
*=
dimSize
[
i
];
unitNum
*=
dimSize
[
i
];
DTYPE
outputData
[
5
][
1
]
=
{
{
0.5
},
DTYPE
outputData
[
5
][
1
]
=
{
{
0.5
F
},
{
0.5
},
{
0.5
F
},
{
0.5
},
{
0.5
F
},
{
0.5
},
{
0.5
F
},
{
0.5
}
};
{
0.5
F
}
};
DTYPE
goldData
[
5
][
1
]
=
{
{
1.0
},
DTYPE
goldData
[
5
][
1
]
=
{
{
1.0
F
},
{
1.0
},
{
1.0
F
},
{
0.0
},
{
0.0
F
},
{
0.0
},
{
0.0
F
},
{
0.0
}
};
{
0.0
F
}
};
/* CPU test */
/* CPU test */
bool
cpuTest
=
true
;
bool
cpuTest
=
true
;
...
@@ -263,7 +267,6 @@ TODO!!
...
@@ -263,7 +267,6 @@ TODO!!
*/
*/
/* test for Loss Function */
/* test for Loss Function */
extern
"C"
bool
TestLoss
()
bool
TestLoss
()
{
{
XPRINT
(
0
,
stdout
,
"[TEST Loss] compute the loss
\n
"
);
XPRINT
(
0
,
stdout
,
"[TEST Loss] compute the loss
\n
"
);
...
...
source/test/TMatrixMULBatchedCPU.cpp
查看文件 @
0887fae1
...
@@ -22,9 +22,10 @@
...
@@ -22,9 +22,10 @@
#include "TMatrixMULBatchedCPU.h"
#include "TMatrixMULBatchedCPU.h"
namespace
nts
{
// namespace nts(NiuTrans.Tensor)
namespace
nts
{
// namespace nts(NiuTrans.Tensor)
/* case 1: matrix multiplication in batch mode (CPU code).
* In this case, aList=2*(2, 3), bList=2*(3, 2) -> c=2*(2, 2),
/*
* transposedA=X_NOTRANS, transposedB=X_NOTRANS.
case 1: matrix multiplication in batch mode (CPU code).
In this case, aList=2*(2, 3), bList=2*(3, 2) -> c=2*(2, 2), transposedA=X_NOTRANS, transposedB=X_NOTRANS.
*/
*/
bool
TestMatrixMulBatchedCPU1
()
bool
TestMatrixMulBatchedCPU1
()
{
{
...
@@ -63,20 +64,20 @@ bool TestMatrixMulBatchedCPU1()
...
@@ -63,20 +64,20 @@ bool TestMatrixMulBatchedCPU1()
for
(
int
i
=
0
;
i
<
cOrder
;
i
++
)
for
(
int
i
=
0
;
i
<
cOrder
;
i
++
)
cUnitNum
*=
cDimSize
[
i
];
cUnitNum
*=
cDimSize
[
i
];
DTYPE
aData1
[
2
][
3
]
=
{
{
1.0
,
2.0
,
3.0
},
DTYPE
aData1
[
2
][
3
]
=
{
{
1.0
F
,
2.0
F
,
3.0
F
},
{
-
4.0
,
5.0
,
6.0
}
};
{
-
4.0
F
,
5.0
F
,
6.0
F
}
};
DTYPE
aData2
[
2
][
3
]
=
{
{
1.0
,
-
2.0
,
-
3.0
},
DTYPE
aData2
[
2
][
3
]
=
{
{
1.0
F
,
-
2.0
F
,
-
3.0
F
},
{
-
4.0
,
3.0
,
2.0
}
};
{
-
4.0
F
,
3.0
F
,
2.0
F
}
};
DTYPE
bData1
[
3
][
2
]
=
{
{
0.0
,
-
1.0
},
DTYPE
bData1
[
3
][
2
]
=
{
{
0.0
F
,
-
1.0
F
},
{
1.0
,
2.0
},
{
1.0
F
,
2.0
F
},
{
2.0
,
1.0
}
};
{
2.0
F
,
1.0
F
}
};
DTYPE
bData2
[
3
][
2
]
=
{
{
0.0
,
1.0
},
DTYPE
bData2
[
3
][
2
]
=
{
{
0.0
F
,
1.0
F
},
{
3.0
,
2.0
},
{
3.0
F
,
2.0
F
},
{
2.0
,
1.0
}
};
{
2.0
F
,
1.0
F
}
};
DTYPE
answer1
[
2
][
2
]
=
{
{
8.0
,
6.0
},
DTYPE
answer1
[
2
][
2
]
=
{
{
8.0
F
,
6.0
F
},
{
17.0
,
20.0
}
};
{
17.0
F
,
20.0
F
}
};
DTYPE
answer2
[
2
][
2
]
=
{
{
-
12.0
,
-
6.0
},
DTYPE
answer2
[
2
][
2
]
=
{
{
-
12.0
F
,
-
6.0
F
},
{
13.0
,
4.0
}
};
{
13.0
F
,
4.0
F
}
};
/* CPU test */
/* CPU test */
bool
cpuTest
=
true
;
bool
cpuTest
=
true
;
...
...
source/test/TMatrixMul.cpp
查看文件 @
0887fae1
...
@@ -22,9 +22,11 @@
...
@@ -22,9 +22,11 @@
#include "TMatrixMul.h"
#include "TMatrixMul.h"
namespace
nts
{
// namespace nts(NiuTrans.Tensor)
namespace
nts
{
// namespace nts(NiuTrans.Tensor)
/* case 1: matrix multiplication.
* In this case, a=(2, 3), b=(3, 2) -> c=(2, 2),
/*
* transposedA=X_NOTRANS, transposedB=X_NOTRANS.
case 1: matrix multiplication.
In this case, a=(2, 3), b=(3, 2) -> c=(2, 2),
transposedA=X_NOTRANS, transposedB=X_NOTRANS.
*/
*/
bool
TestMatrixMul1
()
bool
TestMatrixMul1
()
{
{
...
@@ -58,13 +60,13 @@ bool TestMatrixMul1()
...
@@ -58,13 +60,13 @@ bool TestMatrixMul1()
for
(
int
i
=
0
;
i
<
tOrder
;
i
++
)
for
(
int
i
=
0
;
i
<
tOrder
;
i
++
)
tUnitNum
*=
tDimSize
[
i
];
tUnitNum
*=
tDimSize
[
i
];
DTYPE
sData1
[
2
][
3
]
=
{
{
1.0
,
2.0
,
3.0
},
DTYPE
sData1
[
2
][
3
]
=
{
{
1.0
F
,
2.0
F
,
3.0
F
},
{
-
4.0
,
5.0
,
6.0
}
};
{
-
4.0
F
,
5.0
F
,
6.0
F
}
};
DTYPE
sData2
[
3
][
2
]
=
{
{
0.0
,
-
1.0
},
DTYPE
sData2
[
3
][
2
]
=
{
{
0.0
F
,
-
1.0
F
},
{
1.0
,
2.0
},
{
1.0
F
,
2.0
F
},
{
2.0
,
1.0
}
};
{
2.0
F
,
1.0
F
}
};
DTYPE
answer
[
2
][
2
]
=
{
{
8.0
,
6.0
},
DTYPE
answer
[
2
][
2
]
=
{
{
8.0
F
,
6.0
F
},
{
17.0
,
20.0
}
};
{
17.0
F
,
20.0
F
}
};
/* CPU test */
/* CPU test */
bool
cpuTest
=
true
;
bool
cpuTest
=
true
;
...
@@ -130,9 +132,10 @@ bool TestMatrixMul1()
...
@@ -130,9 +132,10 @@ bool TestMatrixMul1()
#endif // USE_CUDA
#endif // USE_CUDA
}
}
/* case 2: matrix multiplication.
/*
* In this case, a=(3, 2), b=(3, 2) -> c=(2, 2),
case 2: matrix multiplication.
* transposedA=X_TRANS, transposedB=X_NOTRANS.
In this case, a=(3, 2), b=(3, 2) -> c=(2, 2),
transposedA=X_TRANS, transposedB=X_NOTRANS.
*/
*/
bool
TestMatrixMul2
()
bool
TestMatrixMul2
()
{
{
...
@@ -166,14 +169,14 @@ bool TestMatrixMul2()
...
@@ -166,14 +169,14 @@ bool TestMatrixMul2()
for
(
int
i
=
0
;
i
<
tOrder
;
i
++
)
for
(
int
i
=
0
;
i
<
tOrder
;
i
++
)
tUnitNum
*=
tDimSize
[
i
];
tUnitNum
*=
tDimSize
[
i
];
DTYPE
sData1
[
3
][
2
]
=
{
{
1.0
,
-
4.0
},
DTYPE
sData1
[
3
][
2
]
=
{
{
1.0
F
,
-
4.0
F
},
{
2.0
,
5.0
},
{
2.0
F
,
5.0
F
},
{
3.0
,
6.0
}
};
{
3.0
F
,
6.0
F
}
};
DTYPE
sData2
[
3
][
2
]
=
{
{
0.0
,
-
1.0
},
DTYPE
sData2
[
3
][
2
]
=
{
{
0.0
F
,
-
1.0
F
},
{
1.0
,
2.0
},
{
1.0
F
,
2.0
F
},
{
2.0
,
1.0
}
};
{
2.0
F
,
1.0
F
}
};
DTYPE
answer
[
2
][
2
]
=
{
{
8.0
,
6.0
},
DTYPE
answer
[
2
][
2
]
=
{
{
8.0
F
,
6.0
F
},
{
17.0
,
20.0
}
};
{
17.0
F
,
20.0
F
}
};
/* CPU test */
/* CPU test */
bool
cpuTest
=
true
;
bool
cpuTest
=
true
;
...
@@ -239,9 +242,10 @@ bool TestMatrixMul2()
...
@@ -239,9 +242,10 @@ bool TestMatrixMul2()
#endif // USE_CUDA
#endif // USE_CUDA
}
}
/* case 3: matrix multiplication.
/*
* In this case, a=(3, 2, 3), b=(2, 3, 2) -> c=(3, 2, 2, 2),
case 3: matrix multiplication.
* transposedA=X_NOTRANS, transposedB=X_NOTRANS.
In this case, a=(3, 2, 3), b=(2, 3, 2) -> c=(3, 2, 2, 2),
transposedA=X_NOTRANS, transposedB=X_NOTRANS.
*/
*/
bool
TestMatrixMul3
()
bool
TestMatrixMul3
()
{
{
...
@@ -279,30 +283,30 @@ bool TestMatrixMul3()
...
@@ -279,30 +283,30 @@ bool TestMatrixMul3()
for
(
int
i
=
0
;
i
<
tOrder
;
i
++
)
for
(
int
i
=
0
;
i
<
tOrder
;
i
++
)
tUnitNum
*=
tDimSize
[
i
];
tUnitNum
*=
tDimSize
[
i
];
DTYPE
sData1
[
3
][
2
][
3
]
=
{
{
{
0.0
,
-
1.0
,
2.0
},
DTYPE
sData1
[
3
][
2
][
3
]
=
{
{
{
0.0
F
,
-
1.0
F
,
2.0
F
},
{
2.0
,
1.0
,
3.0
}
},
{
2.0
F
,
1.0
F
,
3.0
F
}
},
{
{
1.0
,
2.0
,
4.0
},
{
{
1.0
F
,
2.0
F
,
4.0
F
},
{
3.0
,
1.0
,
2.0
}},
{
3.0
F
,
1.0
F
,
2.0
F
}},
{
{
-
1.0
,
3.0
,
2.0
},
{
{
-
1.0
F
,
3.0
F
,
2.0
F
},
{
1.0
,
-
1.0
,
0.0
}
}
};
{
1.0
F
,
-
1.0
F
,
0.0
F
}
}
};
DTYPE
sData2
[
2
][
3
][
2
]
=
{
{
{
1.0
,
2.0
},
DTYPE
sData2
[
2
][
3
][
2
]
=
{
{
{
1.0
F
,
2.0
F
},
{
-
4.0
,
3.0
},
{
-
4.0
F
,
3.0
F
},
{
2.0
,
6.0
}
},
{
2.0
F
,
6.0
F
}
},
{
{
1.0
,
2.0
},
{
{
1.0
F
,
2.0
F
},
{
3.0
,
4.0
},
{
3.0
F
,
4.0
F
},
{
5.0
,
6.0
}
}
};
{
5.0
F
,
6.0
F
}
}
};
DTYPE
answer
[
3
][
2
][
2
][
2
]
=
{
{
{
{
8.0
,
9.0
},
DTYPE
answer
[
3
][
2
][
2
][
2
]
=
{
{
{
{
8.0
F
,
9.0
F
},
{
4.0
,
25.0
}
},
{
4.0
F
,
25.0
F
}
},
{
{
7.0
,
8.0
},
{
{
7.0
F
,
8.0
F
},
{
20.0
,
26.0
}
}
},
{
20.0
F
,
26.0
F
}
}
},
{
{
{
1.0
,
32.0
},
{
{
{
1.0
F
,
32.0
F
},
{
3.0
,
21.0
}
},
{
3.0
F
,
21.0
F
}
},
{
{
27.0
,
34.0
},
{
{
27.0
F
,
34.0
F
},
{
16.0
,
22.0
}
}
},
{
16.0
F
,
22.0
F
}
}
},
{
{
{
-
9.0
,
19.0
},
{
{
{
-
9.0
F
,
19.0
F
},
{
5.0
,
-
1.0
}
},
{
5.0
F
,
-
1.0
F
}
},
{
{
18.0
,
22.0
},
{
{
18.0
F
,
22.0
F
},
{
-
2.0
,
-
2.0
}
}
}
};
{
-
2.0
F
,
-
2.0
F
}
}
}
};
/* CPU test */
/* CPU test */
bool
cpuTest
=
true
;
bool
cpuTest
=
true
;
...
@@ -368,9 +372,10 @@ bool TestMatrixMul3()
...
@@ -368,9 +372,10 @@ bool TestMatrixMul3()
#endif // USE_CUDA
#endif // USE_CUDA
}
}
/* case 4: matrix multiplication.
/*
* In this case, a=(3, 2, 3), b=(3, 2) -> c=(3, 2, 2),
case 4: matrix multiplication.
* transposedA=X_NOTRANS, transposedB=X_NOTRANS.
In this case, a=(3, 2, 3), b=(3, 2) -> c=(3, 2, 2),
transposedA=X_NOTRANS, transposedB=X_NOTRANS.
*/
*/
bool
TestMatrixMul4
()
bool
TestMatrixMul4
()
{
{
...
@@ -406,21 +411,21 @@ bool TestMatrixMul4()
...
@@ -406,21 +411,21 @@ bool TestMatrixMul4()
for
(
int
i
=
0
;
i
<
tOrder
;
i
++
)
for
(
int
i
=
0
;
i
<
tOrder
;
i
++
)
tUnitNum
*=
tDimSize
[
i
];
tUnitNum
*=
tDimSize
[
i
];
DTYPE
sData1
[
3
][
2
][
3
]
=
{
{
{
0.0
,
-
1.0
,
2.0
},
DTYPE
sData1
[
3
][
2
][
3
]
=
{
{
{
0.0
F
,
-
1.0
F
,
2.0
F
},
{
2.0
,
1.0
,
3.0
}
},
{
2.0
F
,
1.0
F
,
3.0
F
}
},
{
{
1.0
,
2.0
,
4.0
},
{
{
1.0
F
,
2.0
F
,
4.0
F
},
{
3.0
,
1.0
,
2.0
}},
{
3.0
F
,
1.0
F
,
2.0
F
}},
{
{
-
1.0
,
3.0
,
2.0
},
{
{
-
1.0
F
,
3.0
F
,
2.0
F
},
{
1.0
,
-
1.0
,
0.0
}
}
};
{
1.0
F
,
-
1.0
F
,
0.0
F
}
}
};
DTYPE
sData2
[
3
][
2
]
=
{
{
1.0
,
2.0
},
DTYPE
sData2
[
3
][
2
]
=
{
{
1.0
F
,
2.0
F
},
{
3.0
,
4.0
},
{
3.0
F
,
4.0
F
},
{
5.0
,
6.0
}
};
{
5.0
F
,
6.0
F
}
};
DTYPE
answer
[
3
][
2
][
2
]
=
{
{
{
7.0
,
8.0
},
DTYPE
answer
[
3
][
2
][
2
]
=
{
{
{
7.0
F
,
8.0
F
},
{
20.0
,
26.0
}
},
{
20.0
F
,
26.0
F
}
},
{
{
27.0
,
34.0
},
{
{
27.0
F
,
34.0
F
},
{
16.0
,
22.0
}
},
{
16.0
F
,
22.0
F
}
},
{
{
18.0
,
22.0
},
{
{
18.0
F
,
22.0
F
},
{
-
2.0
,
-
2.0
}
}
};
{
-
2.0
F
,
-
2.0
F
}
}
};
/* CPU test */
/* CPU test */
bool
cpuTest
=
true
;
bool
cpuTest
=
true
;
...
@@ -493,7 +498,6 @@ bool TestMatrixMul4()
...
@@ -493,7 +498,6 @@ bool TestMatrixMul4()
*/
*/
/* test for MatrixMul Function */
/* test for MatrixMul Function */
extern
"C"
bool
TestMatrixMul
()
bool
TestMatrixMul
()
{
{
XPRINT
(
0
,
stdout
,
"[TEST MATRIXMUL] matrix multiplication
\n
"
);
XPRINT
(
0
,
stdout
,
"[TEST MATRIXMUL] matrix multiplication
\n
"
);
...
...
source/test/TMatrixMul2D.cpp
查看文件 @
0887fae1
...
@@ -22,9 +22,11 @@
...
@@ -22,9 +22,11 @@
#include "TMatrixMul2D.h"
#include "TMatrixMul2D.h"
namespace
nts
{
// namespace nts(NiuTrans.Tensor)
namespace
nts
{
// namespace nts(NiuTrans.Tensor)
/* case 1: matrix multiplication (for 2d tensors).
* In this case, a=(2, 3), b=(3, 2) -> c=(2, 2),
/*
* transposedA=X_NOTRANS, transposedB=X_NOTRANS.
case 1: matrix multiplication (for 2d tensors).
In this case, a=(2, 3), b=(3, 2) -> c=(2, 2),
transposedA=X_NOTRANS, transposedB=X_NOTRANS.
*/
*/
bool
TestMatrixMul2D1
()
bool
TestMatrixMul2D1
()
{
{
...
@@ -58,13 +60,13 @@ bool TestMatrixMul2D1()
...
@@ -58,13 +60,13 @@ bool TestMatrixMul2D1()
for
(
int
i
=
0
;
i
<
tOrder
;
i
++
)
for
(
int
i
=
0
;
i
<
tOrder
;
i
++
)
tUnitNum
*=
tDimSize
[
i
];
tUnitNum
*=
tDimSize
[
i
];
DTYPE
sData1
[
2
][
3
]
=
{
{
1.0
,
2.0
,
3.0
},
DTYPE
sData1
[
2
][
3
]
=
{
{
1.0
F
,
2.0
F
,
3.0
F
},
{
-
4.0
,
5.0
,
6.0
}
};
{
-
4.0
F
,
5.0
F
,
6.0
F
}
};
DTYPE
sData2
[
3
][
2
]
=
{
{
0.0
,
-
1.0
},
DTYPE
sData2
[
3
][
2
]
=
{
{
0.0
F
,
-
1.0
F
},
{
1.0
,
2.0
},
{
1.0
F
,
2.0
F
},
{
2.0
,
1.0
}
};
{
2.0
F
,
1.0
F
}
};
DTYPE
answer
[
2
][
2
]
=
{
{
8.0
,
6.0
},
DTYPE
answer
[
2
][
2
]
=
{
{
8.0
F
,
6.0
F
},
{
17.0
,
20.0
}
};
{
17.0
F
,
20.0
F
}
};
/* CPU test */
/* CPU test */
bool
cpuTest
=
true
;
bool
cpuTest
=
true
;
...
@@ -130,9 +132,10 @@ bool TestMatrixMul2D1()
...
@@ -130,9 +132,10 @@ bool TestMatrixMul2D1()
#endif // USE_CUDA
#endif // USE_CUDA
}
}
/* case 2: matrix multiplication (for 2d tensors).
/*
* In this case, a=(3, 2), b=(3, 2) -> c=(2, 2),
case 2: matrix multiplication (for 2d tensors).
* transposedA=X_TRANS, transposedB=X_NOTRANS.
In this case, a=(3, 2), b=(3, 2) -> c=(2, 2),
transposedA=X_TRANS, transposedB=X_NOTRANS.
*/
*/
bool
TestMatrixMul2D2
()
bool
TestMatrixMul2D2
()
{
{
...
@@ -166,14 +169,14 @@ bool TestMatrixMul2D2()
...
@@ -166,14 +169,14 @@ bool TestMatrixMul2D2()
for
(
int
i
=
0
;
i
<
tOrder
;
i
++
)
for
(
int
i
=
0
;
i
<
tOrder
;
i
++
)
tUnitNum
*=
tDimSize
[
i
];
tUnitNum
*=
tDimSize
[
i
];
DTYPE
sData1
[
3
][
2
]
=
{
{
1.0
,
-
4.0
},
DTYPE
sData1
[
3
][
2
]
=
{
{
1.0
F
,
-
4.0
F
},
{
2.0
,
5.0
},
{
2.0
F
,
5.0
F
},
{
3.0
,
6.0
}
};
{
3.0
F
,
6.0
F
}
};
DTYPE
sData2
[
3
][
2
]
=
{
{
0.0
,
-
1.0
},
DTYPE
sData2
[
3
][
2
]
=
{
{
0.0
F
,
-
1.0
F
},
{
1.0
,
2.0
},
{
1.0
F
,
2.0
F
},
{
2.0
,
1.0
}
};
{
2.0
F
,
1.0
F
}
};
DTYPE
answer
[
2
][
2
]
=
{
{
8.0
,
6.0
},
DTYPE
answer
[
2
][
2
]
=
{
{
8.0
F
,
6.0
F
},
{
17.0
,
20.0
}
};
{
17.0
F
,
20.0
F
}
};
/* CPU test */
/* CPU test */
bool
cpuTest
=
true
;
bool
cpuTest
=
true
;
...
...
source/test/TMatrixMul2DParallel.cpp
查看文件 @
0887fae1
...
@@ -22,9 +22,11 @@
...
@@ -22,9 +22,11 @@
#include "TMatrixMul2DParallel.h"
#include "TMatrixMul2DParallel.h"
namespace
nts
{
// namespace nts(NiuTrans.Tensor)
namespace
nts
{
// namespace nts(NiuTrans.Tensor)
/* case 1: matrix multiplication (for 2d tensors) with multi-threading.
* In this case, a=(2, 3), b=(3, 2) -> c=(2, 2),
/*
* transposedA=X_NOTRANS, transposedB=X_NOTRANS.
case 1: matrix multiplication (for 2d tensors) with multi-threading.
In this case, a=(2, 3), b=(3, 2) -> c=(2, 2),
transposedA=X_NOTRANS, transposedB=X_NOTRANS.
*/
*/
bool
TestMatrixMul2DParallel1
()
bool
TestMatrixMul2DParallel1
()
{
{
...
@@ -58,13 +60,13 @@ bool TestMatrixMul2DParallel1()
...
@@ -58,13 +60,13 @@ bool TestMatrixMul2DParallel1()
for
(
int
i
=
0
;
i
<
tOrder
;
i
++
)
for
(
int
i
=
0
;
i
<
tOrder
;
i
++
)
tUnitNum
*=
tDimSize
[
i
];
tUnitNum
*=
tDimSize
[
i
];
DTYPE
sData1
[
2
][
3
]
=
{
{
1.0
,
2.0
,
3.0
},
DTYPE
sData1
[
2
][
3
]
=
{
{
1.0
F
,
2.0
F
,
3.0
F
},
{
-
4.0
,
5.0
,
6.0
}
};
{
-
4.0
F
,
5.0
F
,
6.0
F
}
};
DTYPE
sData2
[
3
][
2
]
=
{
{
0.0
,
-
1.0
},
DTYPE
sData2
[
3
][
2
]
=
{
{
0.0
F
,
-
1.0
F
},
{
1.0
,
2.0
},
{
1.0
F
,
2.0
F
},
{
2.0
,
1.0
}
};
{
2.0
F
,
1.0
F
}
};
DTYPE
answer
[
2
][
2
]
=
{
{
8.0
,
6.0
},
DTYPE
answer
[
2
][
2
]
=
{
{
8.0
F
,
6.0
F
},
{
17.0
,
20.0
}
};
{
17.0
F
,
20.0
F
}
};
/* CPU test */
/* CPU test */
bool
cpuTest
=
true
;
bool
cpuTest
=
true
;
...
@@ -96,9 +98,10 @@ bool TestMatrixMul2DParallel1()
...
@@ -96,9 +98,10 @@ bool TestMatrixMul2DParallel1()
return
cpuTest
;
return
cpuTest
;
}
}
/* case 2: matrix multiplication (for 2d tensors) with multi-threading.
/*
* In this case, a=(3, 2), b=(3, 2) -> c=(2, 2),
case 2: matrix multiplication (for 2d tensors) with multi-threading.
* transposedA=X_TRANS, transposedB=X_NOTRANS.
In this case, a=(3, 2), b=(3, 2) -> c=(2, 2),
transposedA=X_TRANS, transposedB=X_NOTRANS.
*/
*/
bool
TestMatrixMul2DParallel2
()
bool
TestMatrixMul2DParallel2
()
{
{
...
@@ -132,14 +135,14 @@ bool TestMatrixMul2DParallel2()
...
@@ -132,14 +135,14 @@ bool TestMatrixMul2DParallel2()
for
(
int
i
=
0
;
i
<
tOrder
;
i
++
)
for
(
int
i
=
0
;
i
<
tOrder
;
i
++
)
tUnitNum
*=
tDimSize
[
i
];
tUnitNum
*=
tDimSize
[
i
];
DTYPE
sData1
[
3
][
2
]
=
{
{
1.0
,
-
4.0
},
DTYPE
sData1
[
3
][
2
]
=
{
{
1.0
F
,
-
4.0
F
},
{
2.0
,
5.0
},
{
2.0
F
,
5.0
F
},
{
3.0
,
6.0
}
};
{
3.0
F
,
6.0
F
}
};
DTYPE
sData2
[
3
][
2
]
=
{
{
0.0
,
-
1.0
},
DTYPE
sData2
[
3
][
2
]
=
{
{
0.0
F
,
-
1.0
F
},
{
1.0
,
2.0
},
{
1.0
F
,
2.0
F
},
{
2.0
,
1.0
}
};
{
2.0
F
,
1.0
F
}
};
DTYPE
answer
[
2
][
2
]
=
{
{
8.0
,
6.0
},
DTYPE
answer
[
2
][
2
]
=
{
{
8.0
F
,
6.0
F
},
{
17.0
,
20.0
}
};
{
17.0
F
,
20.0
F
}
};
/* CPU test */
/* CPU test */
bool
cpuTest
=
true
;
bool
cpuTest
=
true
;
...
@@ -177,7 +180,6 @@ bool TestMatrixMul2DParallel2()
...
@@ -177,7 +180,6 @@ bool TestMatrixMul2DParallel2()
*/
*/
/* test for MatrixMul2DParallel Function */
/* test for MatrixMul2DParallel Function */
extern
"C"
bool
TestMatrixMul2DParallel
()
bool
TestMatrixMul2DParallel
()
{
{
XPRINT
(
0
,
stdout
,
"[TEST MatrixMul2DParallel] matrix multiplication (for 2d tensors) with multi-threading
\n
"
);
XPRINT
(
0
,
stdout
,
"[TEST MatrixMul2DParallel] matrix multiplication (for 2d tensors) with multi-threading
\n
"
);
...
...
source/test/TMatrixMulBatched.cpp
查看文件 @
0887fae1
...
@@ -22,9 +22,10 @@
...
@@ -22,9 +22,10 @@
#include "TMatrixMULBatched.h"
#include "TMatrixMULBatched.h"
namespace
nts
{
// namespace nts(NiuTrans.Tensor)
namespace
nts
{
// namespace nts(NiuTrans.Tensor)
/* case 1: matrix multiplication of the two tensors.
* In this case, a=(2, 3), b=(2, 3) -> c=(2, 2), transposedA=X_NOTRANS,
/*
transposedB=X_NOTRANS.
case 1: matrix multiplication of the two tensors.
In this case, a=(2, 3), b=(2, 3) -> c=(2, 2), transposedA=X_NOTRANS, transposedB=X_NOTRANS.
*/
*/
bool
TestMatrixMulBatched1
()
bool
TestMatrixMulBatched1
()
{
{
...
@@ -58,13 +59,13 @@ bool TestMatrixMulBatched1()
...
@@ -58,13 +59,13 @@ bool TestMatrixMulBatched1()
for
(
int
i
=
0
;
i
<
tOrder
;
i
++
)
for
(
int
i
=
0
;
i
<
tOrder
;
i
++
)
tUnitNum
*=
tDimSize
[
i
];
tUnitNum
*=
tDimSize
[
i
];
DTYPE
sData1
[
2
][
3
]
=
{
{
1.0
,
2.0
,
3.0
},
DTYPE
sData1
[
2
][
3
]
=
{
{
1.0
F
,
2.0
F
,
3.0
F
},
{
-
4.0
,
5.0
,
6.0
}
};
{
-
4.0
F
,
5.0
F
,
6.0
F
}
};
DTYPE
sData2
[
3
][
2
]
=
{
{
0.0
,
-
1.0
},
DTYPE
sData2
[
3
][
2
]
=
{
{
0.0
F
,
-
1.0
F
},
{
1.0
,
2.0
},
{
1.0
F
,
2.0
F
},
{
2.0
,
1.0
}
};
{
2.0
F
,
1.0
F
}
};
DTYPE
answer
[
2
][
2
]
=
{
{
8.0
,
6.0
},
DTYPE
answer
[
2
][
2
]
=
{
{
8.0
F
,
6.0
F
},
{
17.0
,
20.0
}
};
{
17.0
F
,
20.0
F
}
};
/* CPU test */
/* CPU test */
bool
cpuTest
=
true
;
bool
cpuTest
=
true
;
...
@@ -130,9 +131,9 @@ bool TestMatrixMulBatched1()
...
@@ -130,9 +131,9 @@ bool TestMatrixMulBatched1()
#endif // USE_CUDA
#endif // USE_CUDA
}
}
/*
case 2: matrix multiplication of the two tensors.
/*
* In this case, a=(2, 2, 3), b=(2, 3, 2) -> c=(2, 2, 2),
case 2: matrix multiplication of the two tensors.
*
transposedA=X_NOTRANS, transposedB=X_NOTRANS.
In this case, a=(2, 2, 3), b=(2, 3, 2) -> c=(2, 2, 2),
transposedA=X_NOTRANS, transposedB=X_NOTRANS.
*/
*/
bool
TestMatrixMulBatched2
()
bool
TestMatrixMulBatched2
()
{
{
...
@@ -169,20 +170,20 @@ bool TestMatrixMulBatched2()
...
@@ -169,20 +170,20 @@ bool TestMatrixMulBatched2()
for
(
int
i
=
0
;
i
<
tOrder
;
i
++
)
for
(
int
i
=
0
;
i
<
tOrder
;
i
++
)
tUnitNum
*=
tDimSize
[
i
];
tUnitNum
*=
tDimSize
[
i
];
DTYPE
sData1
[
2
][
2
][
3
]
=
{
{
{
0.0
,
-
1.0
,
2.0
},
DTYPE
sData1
[
2
][
2
][
3
]
=
{
{
{
0.0
F
,
-
1.0
F
,
2.0
F
},
{
2.0
,
1.0
,
3.0
}
},
{
2.0
F
,
1.0
F
,
3.0
F
}
},
{
{
1.0
,
2.0
,
4.0
},
{
{
1.0
F
,
2.0
F
,
4.0
F
},
{
3.0
,
1.0
,
2.0
}
}
};
{
3.0
F
,
1.0
F
,
2.0
F
}
}
};
DTYPE
sData2
[
2
][
3
][
2
]
=
{
{
{
1.0
,
2.0
},
DTYPE
sData2
[
2
][
3
][
2
]
=
{
{
{
1.0
F
,
2.0
F
},
{
-
4.0
,
3.0
},
{
-
4.0
F
,
3.0
F
},
{
2.0
,
6.0
}
},
{
2.0
F
,
6.0
F
}
},
{
{
1.0
,
2.0
},
{
{
1.0
F
,
2.0
F
},
{
3.0
,
4.0
},
{
3.0
F
,
4.0
F
},
{
5.0
,
6.0
}
}
};
{
5.0
F
,
6.0
F
}
}
};
DTYPE
answer
[
2
][
2
][
2
]
=
{
{
{
8.0
,
9.0
},
DTYPE
answer
[
2
][
2
][
2
]
=
{
{
{
8.0
F
,
9.0
F
},
{
4.0
,
25.0
}
},
{
4.0
F
,
25.0
F
}
},
{
{
27.0
,
34.0
},
{
{
27.0
F
,
34.0
F
},
{
16.0
,
22.0
}
}
};
{
16.0
F
,
22.0
F
}
}
};
/* CPU test */
/* CPU test */
bool
cpuTest
=
true
;
bool
cpuTest
=
true
;
...
@@ -254,7 +255,6 @@ bool TestMatrixMulBatched2()
...
@@ -254,7 +255,6 @@ bool TestMatrixMulBatched2()
*/
*/
/* test for TestMatrixMulBatched Function */
/* test for TestMatrixMulBatched Function */
extern
"C"
bool
TestMatrixMulBatched
()
bool
TestMatrixMulBatched
()
{
{
XPRINT
(
0
,
stdout
,
"[TEST MATRIXMULBATCHED] matrix multiplication of the two tensors
\n
"
);
XPRINT
(
0
,
stdout
,
"[TEST MATRIXMULBATCHED] matrix multiplication of the two tensors
\n
"
);
...
...
source/test/TMerge.cpp
查看文件 @
0887fae1
...
@@ -24,8 +24,10 @@
...
@@ -24,8 +24,10 @@
#include "TMerge.h"
#include "TMerge.h"
namespace
nts
{
// namespace nts(NiuTrans.Tensor)
namespace
nts
{
// namespace nts(NiuTrans.Tensor)
/* case 1: transform a tensor by merging it along with a dimension.
* In this case, (3, 2) -> (6), whereToMerge=1, leadingDim=0.
/*
case 1: transform a tensor by merging it along with a dimension.
In this case, (3, 2) -> (6), whereToMerge=1, leadingDim=0.
*/
*/
bool
TestMerge1
()
bool
TestMerge1
()
{
{
...
@@ -48,9 +50,9 @@ bool TestMerge1()
...
@@ -48,9 +50,9 @@ bool TestMerge1()
for
(
int
i
=
0
;
i
<
tOrder
;
i
++
)
for
(
int
i
=
0
;
i
<
tOrder
;
i
++
)
tUnitNum
*=
tDimSize
[
i
];
tUnitNum
*=
tDimSize
[
i
];
DTYPE
sData
[
2
][
3
]
=
{
{
0.0
,
1.0
,
2.0
},
DTYPE
sData
[
2
][
3
]
=
{
{
0.0
F
,
1.0
F
,
2.0
F
},
{
3.0
,
4.0
,
5.0
}
};
{
3.0
F
,
4.0
F
,
5.0
F
}
};
DTYPE
answer
[
6
]
=
{
0.0
,
1.0
,
2.0
,
3.0
,
4.0
,
5.0
};
DTYPE
answer
[
6
]
=
{
0.0
F
,
1.0
F
,
2.0
F
,
3.0
F
,
4.0
F
,
5.0
F
};
/* CPU test */
/* CPU test */
bool
cpuTest
=
true
;
bool
cpuTest
=
true
;
...
@@ -107,8 +109,9 @@ bool TestMerge1()
...
@@ -107,8 +109,9 @@ bool TestMerge1()
#endif // USE_CUDA
#endif // USE_CUDA
}
}
/* case 2: transform a tensor by merging it along with a dimension.
/*
* In this case,
case 2: transform a tensor by merging it along with a dimension.
In this case,
(2, 2, 3) -> (4, 3), whereToMerge=1, leadingDim=0.
(2, 2, 3) -> (4, 3), whereToMerge=1, leadingDim=0.
(2, 2, 3) -> (2, 6), whereToMerge=2, leadingDim=0.
(2, 2, 3) -> (2, 6), whereToMerge=2, leadingDim=0.
*/
*/
...
@@ -145,16 +148,16 @@ bool TestMerge2()
...
@@ -145,16 +148,16 @@ bool TestMerge2()
for
(
int
i
=
0
;
i
<
tOrder2
;
i
++
)
for
(
int
i
=
0
;
i
<
tOrder2
;
i
++
)
tUnitNum2
*=
tDimSize2
[
i
];
tUnitNum2
*=
tDimSize2
[
i
];
DTYPE
sData
[
2
][
2
][
3
]
=
{
{
{
0.0
,
1.0
,
2.0
},
DTYPE
sData
[
2
][
2
][
3
]
=
{
{
{
0.0
F
,
1.0
F
,
2.0
F
},
{
4.0
,
5.0
,
6.0
}
},
{
4.0
F
,
5.0
F
,
6.0
F
}
},
{
{
-
1.0
,
2.0
,
3.0
},
{
{
-
1.0
F
,
2.0
F
,
3.0
F
},
{
-
4.0
,
-
5.0
,
-
6.0
}
}
};
{
-
4.0
F
,
-
5.0
F
,
-
6.0
F
}
}
};
DTYPE
answer1
[
4
][
3
]
=
{
{
0.0
,
1.0
,
2.0
},
DTYPE
answer1
[
4
][
3
]
=
{
{
0.0
F
,
1.0
F
,
2.0
F
},
{
4.0
,
5.0
,
6.0
},
{
4.0
F
,
5.0
F
,
6.0
F
},
{
-
1.0
,
2.0
,
3.0
},
{
-
1.0
F
,
2.0
F
,
3.0
F
},
{
-
4.0
,
-
5.0
,
-
6.0
}
};
{
-
4.0
F
,
-
5.0
F
,
-
6.0
F
}
};
DTYPE
answer2
[
2
][
6
]
=
{
{
0.0
,
1.0
,
2.0
,
-
1.0
,
2.0
,
3.0
},
DTYPE
answer2
[
2
][
6
]
=
{
{
0.0
F
,
1.0
F
,
2.0
F
,
-
1.0
F
,
2.0
F
,
3.0
F
},
{
4.0
,
5.0
,
6.0
,
-
4.0
,
-
5.0
,
-
6.0
}
};
{
4.0
F
,
5.0
F
,
6.0
F
,
-
4.0
F
,
-
5.0
F
,
-
6.0
F
}
};
/* CPU test */
/* CPU test */
bool
cpuTest
=
true
;
bool
cpuTest
=
true
;
...
@@ -222,7 +225,8 @@ bool TestMerge2()
...
@@ -222,7 +225,8 @@ bool TestMerge2()
#endif // USE_CUDA
#endif // USE_CUDA
}
}
/* case 3: merge small tensors into a big tensor.
/*
case 3: merge small tensors into a big tensor.
In this case, 2 * (2, 4) -> (4, 4), whereToMerge=0.
In this case, 2 * (2, 4) -> (4, 4), whereToMerge=0.
*/
*/
bool
TestMerge3
()
bool
TestMerge3
()
...
@@ -240,10 +244,10 @@ bool TestMerge3()
...
@@ -240,10 +244,10 @@ bool TestMerge3()
for
(
int
i
=
0
;
i
<
sOrder
;
i
++
)
for
(
int
i
=
0
;
i
<
sOrder
;
i
++
)
sUnitNum
*=
sDimSize
[
i
];
sUnitNum
*=
sDimSize
[
i
];
DTYPE
sData1
[
2
][
4
]
=
{
{
0.0
,
1.0
,
2.0
,
3.0
},
DTYPE
sData1
[
2
][
4
]
=
{
{
0.0
F
,
1.0
F
,
2.0
F
,
3.0
F
},
{
4.0
,
5.0
,
6.0
,
7.0
}
};
{
4.0
F
,
5.0
F
,
6.0
F
,
7.0
F
}
};
DTYPE
sData2
[
2
][
4
]
=
{
{
0.0
,
-
1.0
,
-
2.0
,
-
3.0
},
DTYPE
sData2
[
2
][
4
]
=
{
{
0.0
F
,
-
1.0
F
,
-
2.0
F
,
-
3.0
F
},
{
-
4.0
,
-
5.0
,
-
6.0
,
-
7.0
}
};
{
-
4.0
F
,
-
5.0
F
,
-
6.0
F
,
-
7.0
F
}
};
/* a target tensor of size (4, 4) */
/* a target tensor of size (4, 4) */
int
tOrder
=
2
;
int
tOrder
=
2
;
...
@@ -255,10 +259,10 @@ bool TestMerge3()
...
@@ -255,10 +259,10 @@ bool TestMerge3()
for
(
int
i
=
0
;
i
<
tOrder
;
i
++
)
for
(
int
i
=
0
;
i
<
tOrder
;
i
++
)
tUnitNum
*=
tDimSize
[
i
];
tUnitNum
*=
tDimSize
[
i
];
DTYPE
answer
[
4
][
4
]
=
{
{
0.0
,
1.0
,
2.0
,
3.0
},
DTYPE
answer
[
4
][
4
]
=
{
{
0.0
F
,
1.0
F
,
2.0
F
,
3.0
F
},
{
4.0
,
5.0
,
6.0
,
7.0
},
{
4.0
F
,
5.0
F
,
6.0
F
,
7.0
F
},
{
0.0
,
-
1.0
,
-
2.0
,
-
3.0
},
{
0.0
F
,
-
1.0
F
,
-
2.0
F
,
-
3.0
F
},
{
-
4.0
,
-
5.0
,
-
6.0
,
-
7.0
}
};
{
-
4.0
F
,
-
5.0
F
,
-
6.0
F
,
-
7.0
F
}
};
/* CPU test */
/* CPU test */
bool
cpuTest
=
true
;
bool
cpuTest
=
true
;
...
@@ -336,7 +340,8 @@ bool TestMerge3()
...
@@ -336,7 +340,8 @@ bool TestMerge3()
#endif // USE_CUDA
#endif // USE_CUDA
}
}
/* case 4: merge small tensors into a big tensor.
/*
case 4: merge small tensors into a big tensor.
In this case, 2 * (2, 4) -> (2, 8), whereToMerge=1.
In this case, 2 * (2, 4) -> (2, 8), whereToMerge=1.
*/
*/
bool
TestMerge4
()
bool
TestMerge4
()
...
@@ -354,10 +359,10 @@ bool TestMerge4()
...
@@ -354,10 +359,10 @@ bool TestMerge4()
for
(
int
i
=
0
;
i
<
sOrder
;
i
++
)
for
(
int
i
=
0
;
i
<
sOrder
;
i
++
)
sUnitNum
*=
sDimSize
[
i
];
sUnitNum
*=
sDimSize
[
i
];
DTYPE
sData1
[
2
][
4
]
=
{
{
0.0
,
1.0
,
2.0
,
3.0
},
DTYPE
sData1
[
2
][
4
]
=
{
{
0.0
F
,
1.0
F
,
2.0
F
,
3.0
F
},
{
4.0
,
5.0
,
6.0
,
7.0
}
};
{
4.0
F
,
5.0
F
,
6.0
F
,
7.0
F
}
};
DTYPE
sData2
[
2
][
4
]
=
{
{
0.0
,
-
1.0
,
-
2.0
,
-
3.0
},
DTYPE
sData2
[
2
][
4
]
=
{
{
0.0
F
,
-
1.0
F
,
-
2.0
F
,
-
3.0
F
},
{
-
4.0
,
-
5.0
,
-
6.0
,
-
7.0
}
};
{
-
4.0
F
,
-
5.0
F
,
-
6.0
F
,
-
7.0
F
}
};
/* a target tensor of size (4, 4) */
/* a target tensor of size (4, 4) */
int
tOrder
=
2
;
int
tOrder
=
2
;
...
@@ -369,8 +374,8 @@ bool TestMerge4()
...
@@ -369,8 +374,8 @@ bool TestMerge4()
for
(
int
i
=
0
;
i
<
tOrder
;
i
++
)
for
(
int
i
=
0
;
i
<
tOrder
;
i
++
)
tUnitNum
*=
tDimSize
[
i
];
tUnitNum
*=
tDimSize
[
i
];
DTYPE
answer
[
2
][
8
]
=
{
{
0.0
,
1.0
,
2.0
,
3.0
,
0.0
,
-
1.0
,
-
2.0
,
-
3.0
},
DTYPE
answer
[
2
][
8
]
=
{
{
0.0
F
,
1.0
F
,
2.0
F
,
3.0
F
,
0.0
F
,
-
1.0
F
,
-
2.0
F
,
-
3.0
F
},
{
4.0
,
5.0
,
6.0
,
7.0
,
-
4.0
,
-
5.0
,
-
6.0
,
-
7.0
}
};
{
4.0
F
,
5.0
F
,
6.0
F
,
7.0
F
,
-
4.0
F
,
-
5.0
F
,
-
6.0
F
,
-
7.0
F
}
};
/* CPU test */
/* CPU test */
bool
cpuTest
=
true
;
bool
cpuTest
=
true
;
...
@@ -454,7 +459,6 @@ bool TestMerge4()
...
@@ -454,7 +459,6 @@ bool TestMerge4()
*/
*/
/* test for Merge Function */
/* test for Merge Function */
extern
"C"
bool
TestMerge
()
bool
TestMerge
()
{
{
XPRINT
(
0
,
stdout
,
"[TEST MERGE] transform a tensor by merging it alone with a dimension or merge small tensors into a big tensor
\n
"
);
XPRINT
(
0
,
stdout
,
"[TEST MERGE] transform a tensor by merging it alone with a dimension or merge small tensors into a big tensor
\n
"
);
...
...
source/test/TMultiplyElementWise.cpp
查看文件 @
0887fae1
...
@@ -22,9 +22,11 @@
...
@@ -22,9 +22,11 @@
#include "TMultiplyElementWise.h"
#include "TMultiplyElementWise.h"
namespace
nts
{
// namespace nts(NiuTrans.Tensor)
namespace
nts
{
// namespace nts(NiuTrans.Tensor)
/* case 1: element-wise product of two tensors
* c(i) = a(i)*b(i) + \alpha * c(i)
/*
* In this case, (2, 1) (2, 1) -> (2, 1), leadingDim=0, alpha=0.
case 1: element-wise product of two tensors
c(i) = a(i)*b(i) + \alpha * c(i)
In this case, (2, 1) (2, 1) -> (2, 1), leadingDim=0, alpha=0.
*/
*/
bool
TestMultiplyElementWise1
()
bool
TestMultiplyElementWise1
()
{
{
...
@@ -58,12 +60,12 @@ bool TestMultiplyElementWise1()
...
@@ -58,12 +60,12 @@ bool TestMultiplyElementWise1()
for
(
int
i
=
0
;
i
<
tOrder
;
i
++
)
for
(
int
i
=
0
;
i
<
tOrder
;
i
++
)
tUnitNum
*=
tDimSize
[
i
];
tUnitNum
*=
tDimSize
[
i
];
DTYPE
sData1
[
2
][
1
]
=
{
{
0.0
},
DTYPE
sData1
[
2
][
1
]
=
{
{
0.0
F
},
{
1.0
}
};
{
1.0
F
}
};
DTYPE
sData2
[
2
][
1
]
=
{
{
2.0
},
DTYPE
sData2
[
2
][
1
]
=
{
{
2.0
F
},
{
3.0
}
};
{
3.0
F
}
};
DTYPE
answer
[
2
][
1
]
=
{
{
0.0
},
DTYPE
answer
[
2
][
1
]
=
{
{
0.0
F
},
{
3.0
}
};
{
3.0
F
}
};
/* CPU test */
/* CPU test */
bool
cpuTest
=
true
;
bool
cpuTest
=
true
;
...
@@ -129,9 +131,10 @@ bool TestMultiplyElementWise1()
...
@@ -129,9 +131,10 @@ bool TestMultiplyElementWise1()
#endif // USE_CUDA
#endif // USE_CUDA
}
}
/* case 2: element-wise product of two tensors
/*
* c(i) = a(i)*b(i) + \alpha * c(i)
case 2: element-wise product of two tensors
* In this case, (2, 2) (2, 2) -> (2, 2), leadingDim=0, alpha=0.
c(i) = a(i)*b(i) + \alpha * c(i)
In this case, (2, 2) (2, 2) -> (2, 2), leadingDim=0, alpha=0.
*/
*/
bool
TestMultiplyElementWise2
()
bool
TestMultiplyElementWise2
()
{
{
...
@@ -165,12 +168,12 @@ bool TestMultiplyElementWise2()
...
@@ -165,12 +168,12 @@ bool TestMultiplyElementWise2()
for
(
int
i
=
0
;
i
<
tOrder
;
i
++
)
for
(
int
i
=
0
;
i
<
tOrder
;
i
++
)
tUnitNum
*=
tDimSize
[
i
];
tUnitNum
*=
tDimSize
[
i
];
DTYPE
sData1
[
2
][
2
]
=
{
{
0.0
,
1.0
},
DTYPE
sData1
[
2
][
2
]
=
{
{
0.0
F
,
1.0
F
},
{
2.0
,
3.0
}
};
{
2.0
F
,
3.0
F
}
};
DTYPE
sData2
[
2
][
2
]
=
{
{
0.0
,
1.0
},
DTYPE
sData2
[
2
][
2
]
=
{
{
0.0
F
,
1.0
F
},
{
2.0
,
3.0
}
};
{
2.0
F
,
3.0
F
}
};
DTYPE
answer
[
2
][
2
]
=
{
{
0.0
,
1.0
},
DTYPE
answer
[
2
][
2
]
=
{
{
0.0
F
,
1.0
F
},
{
4.0
,
9.0
}
};
{
4.0
F
,
9.0
F
}
};
/* CPU test */
/* CPU test */
bool
cpuTest
=
true
;
bool
cpuTest
=
true
;
...
@@ -236,8 +239,9 @@ bool TestMultiplyElementWise2()
...
@@ -236,8 +239,9 @@ bool TestMultiplyElementWise2()
#endif // USE_CUDA
#endif // USE_CUDA
}
}
/* case 3: element-wise product of two tensors, c(i) = a(i)*b(i) + \alpha * c(i)
/*
* In this case, (2, 2) (2, 2) -> (2, 2), leadingDim=1, alpha=0.
case 3: element-wise product of two tensors, c(i) = a(i)*b(i) + \alpha * c(i)
In this case, (2, 2) (2, 2) -> (2, 2), leadingDim=1, alpha=0.
*/
*/
bool
TestMultiplyElementWise3
()
bool
TestMultiplyElementWise3
()
{
{
...
@@ -271,12 +275,12 @@ bool TestMultiplyElementWise3()
...
@@ -271,12 +275,12 @@ bool TestMultiplyElementWise3()
for
(
int
i
=
0
;
i
<
tOrder
;
i
++
)
for
(
int
i
=
0
;
i
<
tOrder
;
i
++
)
tUnitNum
*=
tDimSize
[
i
];
tUnitNum
*=
tDimSize
[
i
];
DTYPE
sData1
[
2
][
2
]
=
{
{
0.0
,
1.0
},
DTYPE
sData1
[
2
][
2
]
=
{
{
0.0
F
,
1.0
F
},
{
2.0
,
3.0
}
};
{
2.0
F
,
3.0
F
}
};
DTYPE
sData2
[
2
][
2
]
=
{
{
0.0
,
1.0
},
DTYPE
sData2
[
2
][
2
]
=
{
{
0.0
F
,
1.0
F
},
{
2.0
,
3.0
}
};
{
2.0
F
,
3.0
F
}
};
DTYPE
answer
[
2
][
2
]
=
{
{
0.0
,
1.0
},
DTYPE
answer
[
2
][
2
]
=
{
{
0.0
F
,
1.0
F
},
{
4.0
,
9.0
}
};
{
4.0
F
,
9.0
F
}
};
/* CPU test */
/* CPU test */
bool
cpuTest
=
true
;
bool
cpuTest
=
true
;
...
@@ -348,7 +352,6 @@ TODO!!
...
@@ -348,7 +352,6 @@ TODO!!
*/
*/
/* test for MultiplyElementWise Function */
/* test for MultiplyElementWise Function */
extern
"C"
bool
TestMultiplyElementWise
()
bool
TestMultiplyElementWise
()
{
{
XPRINT
(
0
,
stdout
,
"[TEST MULTIPLYELEMENTWISE] element-wise product of two tensors
\n
"
);
XPRINT
(
0
,
stdout
,
"[TEST MULTIPLYELEMENTWISE] element-wise product of two tensors
\n
"
);
...
...
source/test/TNegate.cpp
查看文件 @
0887fae1
...
@@ -22,6 +22,7 @@
...
@@ -22,6 +22,7 @@
#include "TNegate.h"
#include "TNegate.h"
namespace
nts
{
// namespace nts(NiuTrans.Tensor)
namespace
nts
{
// namespace nts(NiuTrans.Tensor)
/* case 1: set every entry to its minus value */
/* case 1: set every entry to its minus value */
bool
TestNegate1
()
bool
TestNegate1
()
{
{
...
@@ -35,12 +36,12 @@ bool TestNegate1()
...
@@ -35,12 +36,12 @@ bool TestNegate1()
for
(
int
i
=
0
;
i
<
aOrder
;
i
++
)
for
(
int
i
=
0
;
i
<
aOrder
;
i
++
)
aUnitNum
*=
aDimSize
[
i
];
aUnitNum
*=
aDimSize
[
i
];
DTYPE
aData
[
3
][
2
]
=
{
{
1.0
,
-
2.0
},
DTYPE
aData
[
3
][
2
]
=
{
{
1.0
F
,
-
2.0
F
},
{
-
3.0
,
4.0
},
{
-
3.0
F
,
4.0
F
},
{
5.0
,
-
6.0
}
};
{
5.0
F
,
-
6.0
F
}
};
DTYPE
answer
[
3
][
2
]
=
{
{
-
1.0
,
2.0
},
DTYPE
answer
[
3
][
2
]
=
{
{
-
1.0
F
,
2.0
F
},
{
3.0
,
-
4.0
},
{
3.0
F
,
-
4.0
F
},
{
-
5.0
,
6.0
}
};
{
-
5.0
F
,
6.0
F
}
};
/* CPU test */
/* CPU test */
bool
cpuTest
=
true
;
bool
cpuTest
=
true
;
...
@@ -101,12 +102,12 @@ bool TestNegate2()
...
@@ -101,12 +102,12 @@ bool TestNegate2()
for
(
int
i
=
0
;
i
<
aOrder
;
i
++
)
for
(
int
i
=
0
;
i
<
aOrder
;
i
++
)
aUnitNum
*=
aDimSize
[
i
];
aUnitNum
*=
aDimSize
[
i
];
DTYPE
aData
[
3
][
2
]
=
{
{
0.0
,
0.0
},
DTYPE
aData
[
3
][
2
]
=
{
{
0.0
F
,
0.0
F
},
{
0.0
,
0.0
},
{
0.0
F
,
0.0
F
},
{
0.0
,
0.0
}
};
{
0.0
F
,
0.0
F
}
};
DTYPE
answer
[
3
][
2
]
=
{
{
-
0.0
,
-
0.0
},
DTYPE
answer
[
3
][
2
]
=
{
{
-
0.0
F
,
-
0.0
F
},
{
-
0.0
,
-
0.0
},
{
-
0.0
F
,
-
0.0
F
},
{
-
0.0
,
-
0.0
}
};
{
-
0.0
F
,
-
0.0
F
}
};
/* CPU test */
/* CPU test */
bool
cpuTest
=
true
;
bool
cpuTest
=
true
;
...
@@ -160,7 +161,6 @@ TODO!!
...
@@ -160,7 +161,6 @@ TODO!!
*/
*/
/* test for Negate Function */
/* test for Negate Function */
extern
"C"
bool
TestNegate
()
bool
TestNegate
()
{
{
XPRINT
(
0
,
stdout
,
"[TEST NEGATE] set every entry to its minus value
\n
"
);
XPRINT
(
0
,
stdout
,
"[TEST NEGATE] set every entry to its minus value
\n
"
);
...
...
source/test/TNormalize.cpp
查看文件 @
0887fae1
...
@@ -22,10 +22,12 @@
...
@@ -22,10 +22,12 @@
#include "TNormalize.h"
#include "TNormalize.h"
namespace
nts
{
// namespace nts(NiuTrans.Tensor)
namespace
nts
{
// namespace nts(NiuTrans.Tensor)
/* case 1: normalized the data with normal distribution
* For an input x, y = a * (x-mean)/sqrt(variance+\epsilon) + b.
/*
* where a and b are the scalar and bias respectively,
case 1: normalized the data with normal distribution
* and \epsilon is the adjustment parameter.
For an input x, y = a * (x-mean)/sqrt(variance+\epsilon) + b.
where a and b are the scalar and bias respectively,
and \epsilon is the adjustment parameter.
*/
*/
bool
TestNormalize1
()
bool
TestNormalize1
()
{
{
...
@@ -87,14 +89,14 @@ bool TestNormalize1()
...
@@ -87,14 +89,14 @@ bool TestNormalize1()
for
(
int
i
=
0
;
i
<
bOrder
;
i
++
)
for
(
int
i
=
0
;
i
<
bOrder
;
i
++
)
bUnitNum
*=
bDimSize
[
i
];
bUnitNum
*=
bDimSize
[
i
];
DTYPE
sData
[
2
][
3
]
=
{
{
1.0
,
2.0
,
3.0
},
DTYPE
sData
[
2
][
3
]
=
{
{
1.0
F
,
2.0
F
,
3.0
F
},
{
1.5
,
2.5
,
3.5
}
};
{
1.5
F
,
2.5
F
,
3.5
F
}
};
DTYPE
meanData
[
3
]
=
{
1.0
,
1.5
,
2.0
};
DTYPE
meanData
[
3
]
=
{
1.0
F
,
1.5
F
,
2.0
F
};
DTYPE
varData
[
3
]
=
{
1.0
,
1.0
,
4.0
};
DTYPE
varData
[
3
]
=
{
1.0
F
,
1.0
F
,
4.0
F
};
DTYPE
aData
[
2
][
3
]
=
{
{
1.0
,
1.0
,
1.0
},
DTYPE
aData
[
2
][
3
]
=
{
{
1.0
F
,
1.0
F
,
1.0
F
},
{
1.0
,
1.0
,
1.0
}
};
{
1.0
F
,
1.0
F
,
1.0
F
}
};
DTYPE
answer
[
2
][
3
]
=
{
{
0.0
,
0.5
,
0.5
},
DTYPE
answer
[
2
][
3
]
=
{
{
0.0
F
,
0.5
F
,
0.5
F
},
{
0.5
,
1.0
,
0.75
}
};
{
0.5
F
,
1.0
F
,
0.75
F
}
};
/* CPU test */
/* CPU test */
bool
cpuTest
=
true
;
bool
cpuTest
=
true
;
...
@@ -116,7 +118,7 @@ bool TestNormalize1()
...
@@ -116,7 +118,7 @@ bool TestNormalize1()
t
->
SetZeroAll
();
t
->
SetZeroAll
();
/* call normalize function */
/* call normalize function */
Normalize
(
s
,
t
,
0
,
mean
,
var
,
a
,
b
,
0.0
);
Normalize
(
s
,
t
,
0
,
mean
,
var
,
a
,
b
,
0.0
F
);
/* check results */
/* check results */
cpuTest
=
t
->
CheckData
(
answer
,
tUnitNum
,
1e-4
,
0
);
cpuTest
=
t
->
CheckData
(
answer
,
tUnitNum
,
1e-4
,
0
);
...
@@ -142,7 +144,7 @@ bool TestNormalize1()
...
@@ -142,7 +144,7 @@ bool TestNormalize1()
tGPU
->
SetZeroAll
();
tGPU
->
SetZeroAll
();
/* call Normalize function */
/* call Normalize function */
Normalize
(
sGPU
,
tGPU
,
0
,
meanGPU
,
varGPU
,
aGPU
,
bGPU
,
0.0
);
Normalize
(
sGPU
,
tGPU
,
0
,
meanGPU
,
varGPU
,
aGPU
,
bGPU
,
0.0
F
);
/* check results */
/* check results */
gpuTest
=
tGPU
->
CheckData
(
answer
,
tUnitNum
,
1e-4
,
0
);
gpuTest
=
tGPU
->
CheckData
(
answer
,
tUnitNum
,
1e-4
,
0
);
...
@@ -193,7 +195,6 @@ TODO!!
...
@@ -193,7 +195,6 @@ TODO!!
*/
*/
/* test for Normalize Function */
/* test for Normalize Function */
extern
"C"
bool
TestNormalize
()
bool
TestNormalize
()
{
{
XPRINT
(
0
,
stdout
,
"[TEST NORMALIZE] normalized the data with normal distribution
\n
"
);
XPRINT
(
0
,
stdout
,
"[TEST NORMALIZE] normalized the data with normal distribution
\n
"
);
...
...
source/test/TPower.cpp
查看文件 @
0887fae1
...
@@ -23,8 +23,10 @@
...
@@ -23,8 +23,10 @@
#include "TPower.h"
#include "TPower.h"
namespace
nts
{
// namespace nts(NiuTrans.Tensor)
namespace
nts
{
// namespace nts(NiuTrans.Tensor)
/* case 1: get the power(a, p)
* In this case, p=2.
/*
case 1: get the power(a, p)
In this case, p=2.
*/
*/
bool
TestPower1
()
bool
TestPower1
()
{
{
...
@@ -38,12 +40,12 @@ bool TestPower1()
...
@@ -38,12 +40,12 @@ bool TestPower1()
for
(
int
i
=
0
;
i
<
aOrder
;
i
++
)
for
(
int
i
=
0
;
i
<
aOrder
;
i
++
)
aUnitNum
*=
aDimSize
[
i
];
aUnitNum
*=
aDimSize
[
i
];
DTYPE
aData
[
3
][
2
]
=
{
{
1.0
,
2.0
},
DTYPE
aData
[
3
][
2
]
=
{
{
1.0
F
,
2.0
F
},
{
3.0
,
4.0
},
{
3.0
F
,
4.0
F
},
{
5.0
,
6.0
}
};
{
5.0
F
,
6.0
F
}
};
DTYPE
answer
[
3
][
2
]
=
{
{
1.0
,
4.0
},
DTYPE
answer
[
3
][
2
]
=
{
{
1.0
F
,
4.0
F
},
{
9.0
,
16.0
},
{
9.0
F
,
16.0
F
},
{
25.0
,
36.0
}
};
{
25.0
F
,
36.0
F
}
};
/* CPU test */
/* CPU test */
bool
cpuTest
=
true
;
bool
cpuTest
=
true
;
...
@@ -55,7 +57,7 @@ bool TestPower1()
...
@@ -55,7 +57,7 @@ bool TestPower1()
a
->
SetData
(
aData
,
aUnitNum
);
a
->
SetData
(
aData
,
aUnitNum
);
/* call Power function */
/* call Power function */
Power
(
a
,
2.0
);
Power
(
a
,
2.0
F
);
/* check results */
/* check results */
cpuTest
=
a
->
CheckData
(
answer
,
aUnitNum
,
1e-4
F
);
cpuTest
=
a
->
CheckData
(
answer
,
aUnitNum
,
1e-4
F
);
...
@@ -71,7 +73,7 @@ bool TestPower1()
...
@@ -71,7 +73,7 @@ bool TestPower1()
aGPU
->
SetData
(
aData
,
aUnitNum
);
aGPU
->
SetData
(
aData
,
aUnitNum
);
/* call power function */
/* call power function */
Power
(
aGPU
,
2.0
);
Power
(
aGPU
,
2.0
F
);
/* check results */
/* check results */
gpuTest
=
aGPU
->
CheckData
(
answer
,
aUnitNum
,
1e-4
F
);
gpuTest
=
aGPU
->
CheckData
(
answer
,
aUnitNum
,
1e-4
F
);
...
@@ -91,8 +93,9 @@ bool TestPower1()
...
@@ -91,8 +93,9 @@ bool TestPower1()
#endif // USE_CUDA
#endif // USE_CUDA
}
}
/* case 2: get the power(a, p)
/*
* In this case, p=1.
case 2: get the power(a, p)
In this case, p=1.
*/
*/
bool
TestPower2
()
bool
TestPower2
()
{
{
...
@@ -106,12 +109,12 @@ bool TestPower2()
...
@@ -106,12 +109,12 @@ bool TestPower2()
for
(
int
i
=
0
;
i
<
aOrder
;
i
++
)
for
(
int
i
=
0
;
i
<
aOrder
;
i
++
)
aUnitNum
*=
aDimSize
[
i
];
aUnitNum
*=
aDimSize
[
i
];
DTYPE
aData
[
3
][
2
]
=
{
{
0.0
,
1.0
},
DTYPE
aData
[
3
][
2
]
=
{
{
0.0
F
,
1.0
F
},
{
2.0
,
3.0
},
{
2.0
F
,
3.0
F
},
{
4.0
,
5.0
}
};
{
4.0
F
,
5.0
F
}
};
DTYPE
answer
[
3
][
2
]
=
{
{
0.0
,
1.0
},
DTYPE
answer
[
3
][
2
]
=
{
{
0.0
F
,
1.0
F
},
{
2.0
,
3.0
},
{
2.0
F
,
3.0
F
},
{
4.0
,
5.0
}
};
{
4.0
F
,
5.0
F
}
};
/* CPU test */
/* CPU test */
bool
cpuTest
=
true
;
bool
cpuTest
=
true
;
...
@@ -123,7 +126,7 @@ bool TestPower2()
...
@@ -123,7 +126,7 @@ bool TestPower2()
a
->
SetData
(
aData
,
aUnitNum
);
a
->
SetData
(
aData
,
aUnitNum
);
/* call Power function */
/* call Power function */
Power
(
a
,
1.0
);
Power
(
a
,
1.0
F
);
/* check results */
/* check results */
cpuTest
=
a
->
CheckData
(
answer
,
aUnitNum
,
1e-4
F
);
cpuTest
=
a
->
CheckData
(
answer
,
aUnitNum
,
1e-4
F
);
...
@@ -139,7 +142,7 @@ bool TestPower2()
...
@@ -139,7 +142,7 @@ bool TestPower2()
aGPU
->
SetData
(
aData
,
aUnitNum
);
aGPU
->
SetData
(
aData
,
aUnitNum
);
/* call Power function */
/* call Power function */
Power
(
aGPU
,
1.0
);
Power
(
aGPU
,
1.0
F
);
/* check results */
/* check results */
gpuTest
=
aGPU
->
CheckData
(
answer
,
aUnitNum
,
1e-4
F
);
gpuTest
=
aGPU
->
CheckData
(
answer
,
aUnitNum
,
1e-4
F
);
...
@@ -159,8 +162,9 @@ bool TestPower2()
...
@@ -159,8 +162,9 @@ bool TestPower2()
#endif // USE_CUDA
#endif // USE_CUDA
}
}
/* case 3: get the power(a, p)
/*
* In this case, p=0.
case 3: get the power(a, p)
In this case, p=0.
*/
*/
bool
TestPower3
()
bool
TestPower3
()
{
{
...
@@ -174,12 +178,12 @@ bool TestPower3()
...
@@ -174,12 +178,12 @@ bool TestPower3()
for
(
int
i
=
0
;
i
<
aOrder
;
i
++
)
for
(
int
i
=
0
;
i
<
aOrder
;
i
++
)
aUnitNum
*=
aDimSize
[
i
];
aUnitNum
*=
aDimSize
[
i
];
DTYPE
aData
[
3
][
2
]
=
{
{
0.0
,
1.0
},
DTYPE
aData
[
3
][
2
]
=
{
{
0.0
F
,
1.0
F
},
{
2.0
,
3.0
},
{
2.0
F
,
3.0
F
},
{
4.0
,
5.0
}
};
{
4.0
F
,
5.0
F
}
};
DTYPE
answer
[
3
][
2
]
=
{
{
1.0
,
1.0
},
DTYPE
answer
[
3
][
2
]
=
{
{
1.0
F
,
1.0
F
},
{
1.0
,
1.0
},
{
1.0
F
,
1.0
F
},
{
1.0
,
1.0
}
};
{
1.0
F
,
1.0
F
}
};
/* CPU test */
/* CPU test */
bool
cpuTest
=
true
;
bool
cpuTest
=
true
;
...
@@ -191,7 +195,7 @@ bool TestPower3()
...
@@ -191,7 +195,7 @@ bool TestPower3()
a
->
SetData
(
aData
,
aUnitNum
);
a
->
SetData
(
aData
,
aUnitNum
);
/* call Power function */
/* call Power function */
Power
(
a
,
0.0
);
Power
(
a
,
0.0
F
);
/* check results */
/* check results */
cpuTest
=
a
->
CheckData
(
answer
,
aUnitNum
,
1e-4
F
);
cpuTest
=
a
->
CheckData
(
answer
,
aUnitNum
,
1e-4
F
);
...
@@ -207,7 +211,7 @@ bool TestPower3()
...
@@ -207,7 +211,7 @@ bool TestPower3()
aGPU
->
SetData
(
aData
,
aUnitNum
);
aGPU
->
SetData
(
aData
,
aUnitNum
);
/* call Power function */
/* call Power function */
Power
(
aGPU
,
0.0
);
Power
(
aGPU
,
0.0
F
);
/* check results */
/* check results */
gpuTest
=
aGPU
->
CheckData
(
answer
,
aUnitNum
,
1e-4
F
);
gpuTest
=
aGPU
->
CheckData
(
answer
,
aUnitNum
,
1e-4
F
);
...
@@ -233,7 +237,6 @@ TODO!!
...
@@ -233,7 +237,6 @@ TODO!!
*/
*/
/* test for Power Function */
/* test for Power Function */
extern
"C"
bool
TestPower
()
bool
TestPower
()
{
{
XPRINT
(
0
,
stdout
,
"[TEST POWER] get the power(a, p)
\n
"
);
XPRINT
(
0
,
stdout
,
"[TEST POWER] get the power(a, p)
\n
"
);
...
...
source/test/TRectify.cpp
查看文件 @
0887fae1
...
@@ -22,8 +22,10 @@
...
@@ -22,8 +22,10 @@
#include "TRectify.h"
#include "TRectify.h"
namespace
nts
{
// namespace nts(NiuTrans.Tensor)
namespace
nts
{
// namespace nts(NiuTrans.Tensor)
/* case 1: test rectify function
* y = max(0, x)
/*
case 1: test rectify function
In this case, y = max(0, x)
*/
*/
bool
TestRectify1
()
bool
TestRectify1
()
{
{
...
@@ -47,10 +49,10 @@ bool TestRectify1()
...
@@ -47,10 +49,10 @@ bool TestRectify1()
for
(
int
i
=
0
;
i
<
yOrder
;
i
++
)
for
(
int
i
=
0
;
i
<
yOrder
;
i
++
)
yUnitNum
*=
yDimSize
[
i
];
yUnitNum
*=
yDimSize
[
i
];
DTYPE
xData
[
2
][
3
]
=
{
{
0.0
,
-
1.0
,
2.0
},
DTYPE
xData
[
2
][
3
]
=
{
{
0.0
F
,
-
1.0
F
,
2.0
F
},
{
3.0
,
-
4.0
,
-
5.0
}
};
{
3.0
F
,
-
4.0
F
,
-
5.0
F
}
};
DTYPE
answer
[
2
][
3
]
=
{
{
0.0
,
0.0
,
2.0
},
DTYPE
answer
[
2
][
3
]
=
{
{
0.0
F
,
0.0
F
,
2.0
F
},
{
3.0
,
0.0
,
0.0
}
};
{
3.0
F
,
0.0
F
,
0.0
F
}
};
/* CPU test */
/* CPU test */
bool
cpuTest
=
true
;
bool
cpuTest
=
true
;
...
@@ -107,10 +109,11 @@ bool TestRectify1()
...
@@ -107,10 +109,11 @@ bool TestRectify1()
#endif // USE_CUDA
#endif // USE_CUDA
}
}
/* case 2: backward computation
/*
* dE/dx = dE/dy * dy/dx
case 2: backward computation
* rectified: y = max(0, x)
dE/dx = dE/dy * dy/dx
* In this case, lossName=CROSSENTROPY.
rectified: y = max(0, x)
In this case, lossName=CROSSENTROPY.
*/
*/
bool
TestRectify2
()
bool
TestRectify2
()
{
{
...
@@ -124,16 +127,16 @@ bool TestRectify2()
...
@@ -124,16 +127,16 @@ bool TestRectify2()
for
(
int
i
=
0
;
i
<
xOrder
;
i
++
)
for
(
int
i
=
0
;
i
<
xOrder
;
i
++
)
xUnitNum
*=
xDimSize
[
i
];
xUnitNum
*=
xDimSize
[
i
];
DTYPE
xData
[
2
][
3
]
=
{
{
1.0
,
1.0
,
2.0
},
DTYPE
xData
[
2
][
3
]
=
{
{
1.0
F
,
1.0
F
,
2.0
F
},
{
2.0
,
4.0
,
5.0
}
};
{
2.0
F
,
4.0
F
,
5.0
F
}
};
DTYPE
yData
[
2
][
3
]
=
{
{
1.0
,
1.0
,
2.0
},
DTYPE
yData
[
2
][
3
]
=
{
{
1.0
F
,
1.0
F
,
2.0
F
},
{
2.0
,
4.0
,
5.0
}
};
{
2.0
F
,
4.0
F
,
5.0
F
}
};
DTYPE
goldData
[
2
][
3
]
=
{
{
1.0
,
1.0
,
1.0
},
DTYPE
goldData
[
2
][
3
]
=
{
{
1.0
F
,
1.0
F
,
1.0
F
},
{
1.0
,
1.0
,
1.0
}
};
{
1.0
F
,
1.0
F
,
1.0
F
}
};
DTYPE
dedyData
[
2
][
3
]
=
{
{
-
1.0
,
-
1.0
,
-
0.5
},
DTYPE
dedyData
[
2
][
3
]
=
{
{
-
1.0
F
,
-
1.0
F
,
-
0.5
F
},
{
-
0.5
,
-
0.25
,
-
0.2
}
};
{
-
0.5
F
,
-
0.25
F
,
-
0.2
F
}
};
DTYPE
answer
[
2
][
3
]
=
{
{
-
1.0
,
-
1.0
,
-
0.5
},
DTYPE
answer
[
2
][
3
]
=
{
{
-
1.0
F
,
-
1.0
F
,
-
0.5
F
},
{
-
0.5
,
-
0.25
,
-
0.2
}
};
{
-
0.5
F
,
-
0.25
F
,
-
0.2
F
}
};
/* CPU test */
/* CPU test */
bool
cpuTest
=
true
;
bool
cpuTest
=
true
;
...
@@ -215,7 +218,6 @@ TODO!!
...
@@ -215,7 +218,6 @@ TODO!!
*/
*/
/* test for Rectify Function */
/* test for Rectify Function */
extern
"C"
bool
TestRectify
()
bool
TestRectify
()
{
{
XPRINT
(
0
,
stdout
,
"[TEST RECTIFY] test rectify and its backward computation
\n
"
);
XPRINT
(
0
,
stdout
,
"[TEST RECTIFY] test rectify and its backward computation
\n
"
);
...
...
source/test/TReduceMax.cpp
查看文件 @
0887fae1
...
@@ -22,8 +22,10 @@
...
@@ -22,8 +22,10 @@
#include "TReduceMax.h"
#include "TReduceMax.h"
namespace
nts
{
// namespace nts(NiuTrans.Tensor)
namespace
nts
{
// namespace nts(NiuTrans.Tensor)
/* case 1: get the max value of the items along a dimension of the tensor.
* In this case,
/*
case 1: get the max value of the items along a dimension of the tensor.
In this case,
(2, 4) -> (4), dim = 0
(2, 4) -> (4), dim = 0
(2, 4) -> (2), dim = 1
(2, 4) -> (2), dim = 1
*/
*/
...
@@ -57,10 +59,10 @@ bool TestReduceMax1()
...
@@ -57,10 +59,10 @@ bool TestReduceMax1()
for
(
int
i
=
0
;
i
<
tOrder2
;
i
++
)
for
(
int
i
=
0
;
i
<
tOrder2
;
i
++
)
tUnitNum2
*=
tDimSize2
[
i
];
tUnitNum2
*=
tDimSize2
[
i
];
DTYPE
sData
[
2
][
4
]
=
{
{
0.0
,
5.0
,
2.0
,
3.0
},
DTYPE
sData
[
2
][
4
]
=
{
{
0.0
F
,
5.0
F
,
2.0
F
,
3.0
F
},
{
4.0
,
1.0
,
6.0
,
7.0
}
};
{
4.0
F
,
1.0
F
,
6.0
F
,
7.0
F
}
};
DTYPE
answer1
[
4
]
=
{
4.0
,
5.0
,
6.0
,
7.0
};
DTYPE
answer1
[
4
]
=
{
4.0
F
,
5.0
F
,
6.0
F
,
7.0
F
};
DTYPE
answer2
[
2
]
=
{
5.0
,
7.0
};
DTYPE
answer2
[
2
]
=
{
5.0
F
,
7.0
F
};
/* CPU test */
/* CPU test */
bool
cpuTest
=
true
;
bool
cpuTest
=
true
;
...
@@ -134,7 +136,6 @@ TODO!!
...
@@ -134,7 +136,6 @@ TODO!!
*/
*/
/* test for ReduceMax Function */
/* test for ReduceMax Function */
extern
"C"
bool
TestReduceMax
()
bool
TestReduceMax
()
{
{
XPRINT
(
0
,
stdout
,
"[TEST ReduceMax] get the max value of the items along a dimension of the tensor
\n
"
);
XPRINT
(
0
,
stdout
,
"[TEST ReduceMax] get the max value of the items along a dimension of the tensor
\n
"
);
...
...
source/test/TReduceMean.cpp
查看文件 @
0887fae1
...
@@ -22,6 +22,7 @@
...
@@ -22,6 +22,7 @@
#include "TReduceMean.h"
#include "TReduceMean.h"
namespace
nts
{
// namespace nt(NiuTrans.Tensor)
namespace
nts
{
// namespace nt(NiuTrans.Tensor)
/* case 1: get the mean value along a dimension of the tensor */
/* case 1: get the mean value along a dimension of the tensor */
bool
TestReduceMean1
()
bool
TestReduceMean1
()
{
{
...
@@ -53,10 +54,10 @@ bool TestReduceMean1()
...
@@ -53,10 +54,10 @@ bool TestReduceMean1()
for
(
int
i
=
0
;
i
<
tOrder2
;
i
++
)
for
(
int
i
=
0
;
i
<
tOrder2
;
i
++
)
tUnitNum2
*=
tDimSize2
[
i
];
tUnitNum2
*=
tDimSize2
[
i
];
DTYPE
sData
[
2
][
4
]
=
{
{
0.0
,
1.0
,
2.0
,
3.0
},
DTYPE
sData
[
2
][
4
]
=
{
{
0.0
F
,
1.0
F
,
2.0
F
,
3.0
F
},
{
4.0
,
5.0
,
6.0
,
7.0
}
};
{
4.0
F
,
5.0
F
,
6.0
F
,
7.0
F
}
};
DTYPE
answer1
[
4
]
=
{
2.0
,
3.0
,
4.0
,
5.0
};
DTYPE
answer1
[
4
]
=
{
2.0
F
,
3.0
F
,
4.0
F
,
5.0
F
};
DTYPE
answer2
[
2
]
=
{
1.5
,
5.5
};
DTYPE
answer2
[
2
]
=
{
1.5
F
,
5.5
F
};
/* CPU test */
/* CPU test */
bool
cpuTest
=
true
;
bool
cpuTest
=
true
;
...
@@ -124,104 +125,12 @@ bool TestReduceMean1()
...
@@ -124,104 +125,12 @@ bool TestReduceMean1()
#endif // USE_CUDA
#endif // USE_CUDA
}
}
bool
TestReduceMeanForLargescale
()
{
/* a tensor of size 10000 * 500 */
int
order
=
2
;
int
order_reduce
=
1
;
int
*
dimSize
=
new
int
[
order
];
dimSize
[
0
]
=
10000
;
dimSize
[
1
]
=
500
;
int
unitNum
=
1
;
for
(
int
i
=
0
;
i
<
order
;
i
++
)
unitNum
*=
dimSize
[
i
];
/* a tensor of size 500 */
int
*
dimSize_reduce_a
=
new
int
[
order_reduce
];
dimSize_reduce_a
[
0
]
=
500
;
int
unitNum_a
=
1
;
for
(
int
i
=
0
;
i
<
order_reduce
;
i
++
)
unitNum_a
*=
dimSize_reduce_a
[
i
];
/* a tensor of size 10000 */
int
*
dimSize_reduce_b
=
new
int
[
order_reduce
];
dimSize_reduce_b
[
0
]
=
10000
;
int
unitNum_b
=
1
;
for
(
int
i
=
0
;
i
<
order_reduce
;
i
++
)
unitNum_b
*=
dimSize_reduce_b
[
i
];
DTYPE
*
data
=
new
DTYPE
[
5000000
];
DTYPE
*
tmp
=
data
;
for
(
int
i
=
0
;
i
<
unitNum
;
i
++
)
*
tmp
++
=
1
;
DTYPE
answer_a
[
500
];
for
(
int
i
=
0
;
i
<
unitNum_a
;
i
++
)
answer_a
[
i
]
=
1
;
DTYPE
answer_b
[
10000
];
for
(
int
i
=
0
;
i
<
unitNum_b
;
i
++
)
answer_b
[
i
]
=
1
;
/* CPU test */
bool
cpuTest
=
true
;
/* create tensors */
XTensor
*
a
=
NewTensor
(
order
,
dimSize
);
XTensor
*
reduce_a
=
NewTensor
(
order_reduce
,
dimSize_reduce_a
);
XTensor
*
b
=
NewTensor
(
order
,
dimSize
);
XTensor
*
reduce_b
=
NewTensor
(
order_reduce
,
dimSize_reduce_b
);
/* initialize variables */
a
->
SetData
(
data
,
unitNum
);
b
->
SetData
(
data
,
unitNum
);
/* call reduce max function */
ReduceMean
(
a
,
reduce_a
,
0
);
ReduceMean
(
b
,
reduce_b
,
1
);
/* check results */
cpuTest
=
reduce_a
->
CheckData
(
answer_a
,
unitNum_a
)
&&
reduce_b
->
CheckData
(
answer_b
,
unitNum_b
);
#ifdef USE_CUDA
/* GPU test */
bool
gpuTest
=
true
;
/* create tensor */
XTensor
*
aGPU
=
NewTensor
(
order
,
dimSize
,
X_FLOAT
);
XTensor
*
reduce_aGPU
=
NewTensor
(
order_reduce
,
dimSize_reduce_a
,
X_FLOAT
);
XTensor
*
bGPU
=
NewTensor
(
order
,
dimSize
,
X_FLOAT
);
XTensor
*
reduce_bGPU
=
NewTensor
(
order_reduce
,
dimSize_reduce_b
,
X_FLOAT
);
/* Initialize variables */
aGPU
->
SetData
(
data
,
unitNum
);
bGPU
->
SetData
(
data
,
unitNum
);
/* call reduce max function */
ReduceMean
(
aGPU
,
reduce_aGPU
,
0
);
ReduceMean
(
bGPU
,
reduce_bGPU
,
1
);
/* check results */
gpuTest
=
reduce_aGPU
->
CheckData
(
answer_a
,
unitNum_a
)
&&
reduce_bGPU
->
CheckData
(
answer_b
,
unitNum_b
);
/* destroy variables */
delete
aGPU
,
bGPU
,
reduce_aGPU
,
reduce_bGPU
;
delete
[]
dimSize
,
dimSize_reduce_a
,
dimSize_reduce_b
;
return
cpuTest
&&
gpuTest
;
#else
/* destroy variables */
delete
a
;
delete
b
;
return
cpuTest
;
#endif // USE_CUDA
}
/* other cases */
/* other cases */
/*
/*
TODO!!
TODO!!
*/
*/
/* test for ReduceMean Function */
/* test for ReduceMean Function */
extern
"C"
bool
TestReduceMean
()
bool
TestReduceMean
()
{
{
XPRINT
(
0
,
stdout
,
"[TEST ReduceMean] get the mean value along a dimension of the tensor
\n
"
);
XPRINT
(
0
,
stdout
,
"[TEST ReduceMean] get the mean value along a dimension of the tensor
\n
"
);
...
@@ -236,15 +145,6 @@ bool TestReduceMean()
...
@@ -236,15 +145,6 @@ bool TestReduceMean()
else
else
XPRINT
(
0
,
stdout
,
">> case 1 passed!
\n
"
);
XPRINT
(
0
,
stdout
,
">> case 1 passed!
\n
"
);
/* case 2 test */
caseFlag
=
TestReduceMeanForLargescale
();
if
(
!
caseFlag
)
{
returnFlag
=
false
;
XPRINT
(
0
,
stdout
,
">> case 2 failed!
\n
"
);
}
else
XPRINT
(
0
,
stdout
,
">> case 2 passed!
\n
"
);
///* other cases test */
///* other cases test */
///*
///*
//TODO!!
//TODO!!
...
...
source/test/TReduceMean.h
查看文件 @
0887fae1
...
@@ -24,13 +24,13 @@
...
@@ -24,13 +24,13 @@
#include "../core/ReduceMean.h"
#include "../core/ReduceMean.h"
namespace
nts
{
// namespace nt(NiuTrans.Tensor)
namespace
nts
{
// namespace nt
s
(NiuTrans.Tensor)
/* test for ReduceMean Function */
/* test for ReduceMean Function */
extern
"C"
extern
"C"
bool
TestReduceMean
();
bool
TestReduceMean
();
}
// namespace nt(NiuTrans.Tensor)
}
// namespace nt
s
(NiuTrans.Tensor)
#endif // __TEST_REDUCEMEAN_H__
#endif // __TEST_REDUCEMEAN_H__
source/test/TReduceSum.cpp
查看文件 @
0887fae1
...
@@ -22,8 +22,10 @@
...
@@ -22,8 +22,10 @@
#include "TReduceSum.h"
#include "TReduceSum.h"
namespace
nts
{
// namespace nts(NiuTrans.Tensor)
namespace
nts
{
// namespace nts(NiuTrans.Tensor)
/* case 1: sum the items along a dimension of the tensor.
* In this case,
/*
case 1: sum the items along a dimension of the tensor.
In this case,
(2, 4) -> (4), dim = 0
(2, 4) -> (4), dim = 0
(2, 4) -> (2), dim = 1
(2, 4) -> (2), dim = 1
*/
*/
...
@@ -57,10 +59,10 @@ bool TestReduceSum1()
...
@@ -57,10 +59,10 @@ bool TestReduceSum1()
for
(
int
i
=
0
;
i
<
tOrder2
;
i
++
)
for
(
int
i
=
0
;
i
<
tOrder2
;
i
++
)
tUnitNum2
*=
tDimSize2
[
i
];
tUnitNum2
*=
tDimSize2
[
i
];
DTYPE
sData
[
2
][
4
]
=
{
{
0.0
,
1.0
,
2.0
,
3.0
},
DTYPE
sData
[
2
][
4
]
=
{
{
0.0
F
,
1.0
F
,
2.0
F
,
3.0
F
},
{
4.0
,
5.0
,
6.0
,
7.0
}
};
{
4.0
F
,
5.0
F
,
6.0
F
,
7.0
F
}
};
DTYPE
answer1
[
4
]
=
{
4.0
,
6.0
,
8.0
,
10.0
};
DTYPE
answer1
[
4
]
=
{
4.0
F
,
6.0
F
,
8.0
F
,
10.0
F
};
DTYPE
answer2
[
2
]
=
{
6.0
,
22.0
};
DTYPE
answer2
[
2
]
=
{
6.0
F
,
22.0
F
};
/* CPU test */
/* CPU test */
bool
cpuTest
=
true
;
bool
cpuTest
=
true
;
...
@@ -128,103 +130,12 @@ bool TestReduceSum1()
...
@@ -128,103 +130,12 @@ bool TestReduceSum1()
#endif // USE_CUDA
#endif // USE_CUDA
}
}
bool
TestReduceSumForLargescale
()
{
/* a tensor of size 10000 * 500 */
int
order
=
2
;
int
orderReduce
=
1
;
int
*
dimSize
=
new
int
[
order
];
dimSize
[
0
]
=
10000
;
dimSize
[
1
]
=
500
;
int
unitNum
=
1
;
for
(
int
i
=
0
;
i
<
order
;
i
++
)
unitNum
*=
dimSize
[
i
];
/* a tensor of size 500 */
int
*
dimSize_reduce_a
=
new
int
[
orderReduce
];
dimSize_reduce_a
[
0
]
=
500
;
int
unitNum_a
=
1
;
for
(
int
i
=
0
;
i
<
orderReduce
;
i
++
)
unitNum_a
*=
dimSize_reduce_a
[
i
];
/* a tensor of size 10000 */
int
*
dimSize_reduce_b
=
new
int
[
orderReduce
];
dimSize_reduce_b
[
0
]
=
10000
;
int
unitNum_b
=
1
;
for
(
int
i
=
0
;
i
<
orderReduce
;
i
++
)
unitNum_b
*=
dimSize_reduce_b
[
i
];
DTYPE
*
data
=
new
DTYPE
[
5000000
];
DTYPE
*
tmp
=
data
;
for
(
int
i
=
0
;
i
<
unitNum
;
i
++
)
*
tmp
++
=
1
;
DTYPE
answer_a
[
500
];
for
(
int
i
=
0
;
i
<
unitNum_a
;
i
++
)
answer_a
[
i
]
=
10000
;
DTYPE
answer_b
[
10000
];
for
(
int
i
=
0
;
i
<
unitNum_b
;
i
++
)
answer_b
[
i
]
=
500
;
/* CPU test */
bool
cpuTest
=
true
;
/* create tensors */
XTensor
*
a
=
NewTensor
(
order
,
dimSize
);
XTensor
*
reduce_a
=
NewTensor
(
orderReduce
,
dimSize_reduce_a
);
XTensor
*
b
=
NewTensor
(
order
,
dimSize
);
XTensor
*
reduce_b
=
NewTensor
(
orderReduce
,
dimSize_reduce_b
);
/* initialize variables */
a
->
SetData
(
data
,
unitNum
);
b
->
SetData
(
data
,
unitNum
);
/* call reduce sum function */
ReduceSum
(
a
,
reduce_a
,
0
);
ReduceSum
(
b
,
reduce_b
,
1
);
/* check results */
cpuTest
=
reduce_a
->
CheckData
(
answer_a
,
unitNum_a
)
&&
reduce_b
->
CheckData
(
answer_b
,
unitNum_b
);
#ifdef USE_CUDA
/* GPU test */
bool
gpuTest
=
true
;
/* create tensor */
XTensor
*
aGPU
=
NewTensor
(
order
,
dimSize
,
X_FLOAT
);
XTensor
*
reduce_aGPU
=
NewTensor
(
orderReduce
,
dimSize_reduce_a
,
X_FLOAT
);
XTensor
*
bGPU
=
NewTensor
(
order
,
dimSize
,
X_FLOAT
);
XTensor
*
reduce_bGPU
=
NewTensor
(
orderReduce
,
dimSize_reduce_b
,
X_FLOAT
);
/* Initialize variables */
aGPU
->
SetData
(
data
,
unitNum
);
bGPU
->
SetData
(
data
,
unitNum
);
/* call reduce max function */
ReduceSum
(
aGPU
,
reduce_aGPU
,
0
);
ReduceSum
(
bGPU
,
reduce_bGPU
,
1
);
/* check results */
gpuTest
=
reduce_aGPU
->
CheckData
(
answer_a
,
unitNum_a
)
&&
reduce_bGPU
->
CheckData
(
answer_b
,
unitNum_b
);
/* destroy variables */
delete
aGPU
,
bGPU
,
reduce_aGPU
,
reduce_bGPU
;
delete
[]
dimSize
,
dimSize_reduce_a
,
dimSize_reduce_b
;
return
cpuTest
&&
gpuTest
;
#else
/* destroy variables */
delete
a
;
delete
b
;
return
cpuTest
;
#endif // USE_CUDA
}
/* other cases */
/* other cases */
/*
/*
TODO!!
TODO!!
*/
*/
/* test for ReduceSum Function */
/* test for ReduceSum Function */
extern
"C"
bool
TestReduceSum
()
bool
TestReduceSum
()
{
{
XPRINT
(
0
,
stdout
,
"[TEST ReduceSum] sum the items along a dimension of the tensor.
\n
"
);
XPRINT
(
0
,
stdout
,
"[TEST ReduceSum] sum the items along a dimension of the tensor.
\n
"
);
...
@@ -239,15 +150,6 @@ bool TestReduceSum()
...
@@ -239,15 +150,6 @@ bool TestReduceSum()
else
else
XPRINT
(
0
,
stdout
,
">> case 1 passed!
\n
"
);
XPRINT
(
0
,
stdout
,
">> case 1 passed!
\n
"
);
/* case 2 test */
caseFlag
=
TestReduceSumForLargescale
();
if
(
!
caseFlag
)
{
returnFlag
=
false
;
XPRINT
(
0
,
stdout
,
">> case 2 failed!
\n
"
);
}
else
XPRINT
(
0
,
stdout
,
">> case 2 passed!
\n
"
);
/* other cases test */
/* other cases test */
/*
/*
TODO!!
TODO!!
...
...
source/test/TReduceSumSquared.cpp
查看文件 @
0887fae1
...
@@ -22,9 +22,11 @@
...
@@ -22,9 +22,11 @@
#include "TReduceSumSquared.h"
#include "TReduceSumSquared.h"
namespace
nts
{
// namespace nts(NiuTrans.Tensor)
namespace
nts
{
// namespace nts(NiuTrans.Tensor)
/* case 1: squared sum of the items along a dimension of the tensor.
* For a 1-dimensional data array a, sum = \sum_i (a_i - shift)^2.
/*
* In this case, (2, 4) -> (4), dim = 0.
case 1: squared sum of the items along a dimension of the tensor.
For a 1-dimensional data array a, sum = \sum_i (a_i - shift)^2.
In this case, (2, 4) -> (4), dim = 0.
*/
*/
bool
TestReduceSumSquared1
()
bool
TestReduceSumSquared1
()
{
{
...
@@ -56,10 +58,10 @@ bool TestReduceSumSquared1()
...
@@ -56,10 +58,10 @@ bool TestReduceSumSquared1()
for
(
int
i
=
0
;
i
<
shiftOrder
;
i
++
)
for
(
int
i
=
0
;
i
<
shiftOrder
;
i
++
)
shiftUnitNum
*=
shiftDimSize
[
i
];
shiftUnitNum
*=
shiftDimSize
[
i
];
DTYPE
sData
[
2
][
4
]
=
{
{
0.0
,
1.0
,
2.0
,
3.0
},
DTYPE
sData
[
2
][
4
]
=
{
{
0.0
F
,
1.0
F
,
2.0
F
,
3.0
F
},
{
4.0
,
5.0
,
6.0
,
7.0
}
};
{
4.0
F
,
5.0
F
,
6.0
F
,
7.0
F
}
};
DTYPE
shiftData
[
4
]
=
{
1.0
,
-
1.0
,
-
1.0
,
0.0
};
DTYPE
shiftData
[
4
]
=
{
1.0
F
,
-
1.0
F
,
-
1.0
F
,
0.0
F
};
DTYPE
answer
[
4
]
=
{
10.0
,
40.0
,
58.0
,
58.0
};
DTYPE
answer
[
4
]
=
{
10.0
F
,
40.0
F
,
58.0
F
,
58.0
F
};
/* CPU test */
/* CPU test */
bool
cpuTest
=
true
;
bool
cpuTest
=
true
;
...
@@ -125,9 +127,10 @@ bool TestReduceSumSquared1()
...
@@ -125,9 +127,10 @@ bool TestReduceSumSquared1()
#endif // USE_CUDA
#endif // USE_CUDA
}
}
/* case 1: squared sum of the items along a dimension of the tensor.
/*
* For a 1-dimensional data array a, sum = \sum_i (a_i - shift)^2.
case 2: squared sum of the items along a dimension of the tensor.
* In this case, (2, 4) -> (2), dim = 1.
For a 1-dimensional data array a, sum = \sum_i (a_i - shift)^2.
In this case, (2, 4) -> (2), dim = 1.
*/
*/
bool
TestReduceSumSquared2
()
bool
TestReduceSumSquared2
()
{
{
...
@@ -141,7 +144,7 @@ bool TestReduceSumSquared2()
...
@@ -141,7 +144,7 @@ bool TestReduceSumSquared2()
for
(
int
i
=
0
;
i
<
sOrder
;
i
++
)
for
(
int
i
=
0
;
i
<
sOrder
;
i
++
)
sUnitNum
*=
sDimSize
[
i
];
sUnitNum
*=
sDimSize
[
i
];
/* a output tensor of size (
4
) */
/* a output tensor of size (
2
) */
int
tOrder
=
1
;
int
tOrder
=
1
;
int
*
tDimSize
=
new
int
[
tOrder
];
int
*
tDimSize
=
new
int
[
tOrder
];
tDimSize
[
0
]
=
2
;
tDimSize
[
0
]
=
2
;
...
@@ -150,7 +153,7 @@ bool TestReduceSumSquared2()
...
@@ -150,7 +153,7 @@ bool TestReduceSumSquared2()
for
(
int
i
=
0
;
i
<
tOrder
;
i
++
)
for
(
int
i
=
0
;
i
<
tOrder
;
i
++
)
tUnitNum
*=
tDimSize
[
i
];
tUnitNum
*=
tDimSize
[
i
];
/* a shift tensor of size (
4
) */
/* a shift tensor of size (
2
) */
int
shiftOrder
=
1
;
int
shiftOrder
=
1
;
int
*
shiftDimSize
=
new
int
[
shiftOrder
];
int
*
shiftDimSize
=
new
int
[
shiftOrder
];
shiftDimSize
[
0
]
=
2
;
shiftDimSize
[
0
]
=
2
;
...
@@ -159,10 +162,10 @@ bool TestReduceSumSquared2()
...
@@ -159,10 +162,10 @@ bool TestReduceSumSquared2()
for
(
int
i
=
0
;
i
<
shiftOrder
;
i
++
)
for
(
int
i
=
0
;
i
<
shiftOrder
;
i
++
)
shiftUnitNum
*=
shiftDimSize
[
i
];
shiftUnitNum
*=
shiftDimSize
[
i
];
DTYPE
sData
[
2
][
4
]
=
{
{
0.0
,
1.0
,
2.0
,
3.0
},
DTYPE
sData
[
2
][
4
]
=
{
{
0.0
F
,
1.0
F
,
2.0
F
,
3.0
F
},
{
4.0
,
5.0
,
6.0
,
7.0
}
};
{
4.0
F
,
5.0
F
,
6.0
F
,
7.0
F
}
};
DTYPE
shiftData
[
2
]
=
{
-
1.0
,
1.0
};
DTYPE
shiftData
[
2
]
=
{
-
1.0
F
,
1.0
F
};
DTYPE
answer
[
2
]
=
{
30.0
,
86.0
};
DTYPE
answer
[
2
]
=
{
30.0
F
,
86.0
F
};
/* CPU test */
/* CPU test */
bool
cpuTest
=
true
;
bool
cpuTest
=
true
;
...
@@ -234,7 +237,6 @@ TODO!!
...
@@ -234,7 +237,6 @@ TODO!!
*/
*/
/* test for ReduceSumSquared Function */
/* test for ReduceSumSquared Function */
extern
"C"
bool
TestReduceSumSquared
()
bool
TestReduceSumSquared
()
{
{
XPRINT
(
0
,
stdout
,
"[TEST ReduceSumSquared] squared sum of the items along a dimension of the tensor
\n
"
);
XPRINT
(
0
,
stdout
,
"[TEST ReduceSumSquared] squared sum of the items along a dimension of the tensor
\n
"
);
...
...
source/test/TReduceVariance.cpp
查看文件 @
0887fae1
...
@@ -22,9 +22,11 @@
...
@@ -22,9 +22,11 @@
#include "TReduceVariance.h"
#include "TReduceVariance.h"
namespace
nts
{
// namespace nts(NiuTrans.Tensor)
namespace
nts
{
// namespace nts(NiuTrans.Tensor)
/* case 1: variance of the items along a dimension of the tensor.
* For a 1-dimensional data array a, variance = 1/n * \sum_i (a_i - mean)^2.
/*
* In this case, (2, 4) -> (4), dim = 0.
case 1: variance of the items along a dimension of the tensor.
For a 1-dimensional data array a, variance = 1/n * \sum_i (a_i - mean)^2.
In this case, (2, 4) -> (4), dim = 0.
*/
*/
bool
TestReduceVariance1
()
bool
TestReduceVariance1
()
{
{
...
@@ -131,7 +133,6 @@ TODO!!
...
@@ -131,7 +133,6 @@ TODO!!
*/
*/
/* test for ReduceVariance Function */
/* test for ReduceVariance Function */
extern
"C"
bool
TestReduceVariance
()
bool
TestReduceVariance
()
{
{
XPRINT
(
0
,
stdout
,
"[TEST ReduceVariance] variance of the items along a dimension of the tensor
\n
"
);
XPRINT
(
0
,
stdout
,
"[TEST ReduceVariance] variance of the items along a dimension of the tensor
\n
"
);
...
...
source/test/TScaleAndShift.cpp
查看文件 @
0887fae1
...
@@ -22,8 +22,10 @@
...
@@ -22,8 +22,10 @@
#include "TScaleAndShift.h"
#include "TScaleAndShift.h"
namespace
nts
{
// namespace nts(NiuTrans.Tensor)
namespace
nts
{
// namespace nts(NiuTrans.Tensor)
/* case 1: scale and shift all tensor entires.
* p = p * scale + shift
/*
case 1: scale and shift all tensor entires.
p = p * scale + shift
*/
*/
bool
TestScaleAndShift1
()
bool
TestScaleAndShift1
()
{
{
...
@@ -42,8 +44,8 @@ bool TestScaleAndShift1()
...
@@ -42,8 +44,8 @@ bool TestScaleAndShift1()
DTYPE
answer
[
2
][
4
]
=
{
{
0.5
F
,
2.5
F
,
4.5
F
,
6.5
F
},
DTYPE
answer
[
2
][
4
]
=
{
{
0.5
F
,
2.5
F
,
4.5
F
,
6.5
F
},
{
8.5
F
,
10.5
F
,
12.5
F
,
14.5
F
}
};
{
8.5
F
,
10.5
F
,
12.5
F
,
14.5
F
}
};
DTYPE
scaleFactor
=
2.0
;
DTYPE
scaleFactor
=
2.0
F
;
DTYPE
shiftFactor
=
0.5
;
DTYPE
shiftFactor
=
0.5
F
;
/* CPU test */
/* CPU test */
bool
cpuTest
=
true
;
bool
cpuTest
=
true
;
...
@@ -97,7 +99,6 @@ TODO!!
...
@@ -97,7 +99,6 @@ TODO!!
*/
*/
/* test for ScaleAndShift Function */
/* test for ScaleAndShift Function */
extern
"C"
bool
TestScaleAndShift
()
bool
TestScaleAndShift
()
{
{
XPRINT
(
0
,
stdout
,
"[TEST ScaleAndShift] scale and shift all tensor entires
\n
"
);
XPRINT
(
0
,
stdout
,
"[TEST ScaleAndShift] scale and shift all tensor entires
\n
"
);
...
...
source/test/TSelect.cpp
查看文件 @
0887fae1
...
@@ -20,12 +20,14 @@
...
@@ -20,12 +20,14 @@
*/
*/
#include "TSelect.h"
#include "TSelect.h"
#include "../xc/Mycode.h"
namespace
nts
{
// namespace nts(NiuTrans.Tensor)
namespace
nts
{
// namespace nts(NiuTrans.Tensor)
/* case 1: test SelectRange function.
* It can generate a tensor with seleccted data
/*
* in range[low,high] along the given dimension.
case 1: test SelectRange function.
* In this case, (2, 2, 4) -> (2, 2, 2), dim = 2, low = 1, high = 3.
It can generate a tensor with seleccted data in range[low,high] along the given dimension.
In this case, (2, 2, 4) -> (2, 2, 2), dim = 2, low = 1, high = 3.
*/
*/
bool
TestSelect1
()
bool
TestSelect1
()
{
{
...
@@ -76,25 +78,25 @@ bool TestSelect1()
...
@@ -76,25 +78,25 @@ bool TestSelect1()
/* check results */
/* check results */
cpuTest
=
t
->
CheckData
(
answer
,
tUnitNum
);
cpuTest
=
t
->
CheckData
(
answer
,
tUnitNum
);
return
cpuTest
;
#ifdef USE_CUDA
#ifdef USE_CUDA
/* GPU test */
/* GPU test */
bool
gpuTest
=
true
;
bool
gpuTest
=
true
;
/* create tensors */
/* create tensors */
XTensor
*
sGPU
=
NewTensor
(
sOrder
,
sDimSize
,
X_FLOAT
,
1.0
F
,
0
);
XTensor
*
sGPU
=
NewTensor
(
sOrder
,
sDimSize
,
X_FLOAT
,
1.0
F
,
0
);
XTensor
*
tGPU
=
NewTensor
(
sOrder
,
s
DimSize
,
X_FLOAT
,
1.0
F
,
0
);
XTensor
*
tGPU
=
NewTensor
(
tOrder
,
t
DimSize
,
X_FLOAT
,
1.0
F
,
0
);
/* initialize variables */
/* initialize variables */
sGPU
->
SetData
(
sData
,
sUnitNum
);
sGPU
->
SetData
(
sData
,
sUnitNum
);
tGPU
->
SetZeroAll
();
tGPU
->
SetZeroAll
();
/* call Select function */
/* call Select function */
SelectRange
(
sGPU
,
1
,
1
,
3
,
tGPU
);
SelectRange
(
sGPU
,
2
,
1
,
3
,
tGPU
);
/* check results */
/* check results */
gpuTest
=
tGPU
->
CheckData
(
answer
,
s
UnitNum
);
gpuTest
=
tGPU
->
CheckData
(
answer
,
t
UnitNum
);
/* destroy variables */
/* destroy variables */
delete
s
;
delete
s
;
delete
t
;
delete
t
;
...
@@ -121,7 +123,6 @@ TODO!!
...
@@ -121,7 +123,6 @@ TODO!!
*/
*/
/* test for Select Function */
/* test for Select Function */
extern
"C"
bool
TestSelect
()
bool
TestSelect
()
{
{
XPRINT
(
0
,
stdout
,
"[TEST Select] generate a tensor with seleccted data in range[low,high] along the given dimension
\n
"
);
XPRINT
(
0
,
stdout
,
"[TEST Select] generate a tensor with seleccted data in range[low,high] along the given dimension
\n
"
);
...
...
source/test/TSetAscendingOrder.cpp
查看文件 @
0887fae1
...
@@ -22,6 +22,7 @@
...
@@ -22,6 +22,7 @@
#include "TSetAscendingOrder.h"
#include "TSetAscendingOrder.h"
namespace
nts
{
// namespace nts(NiuTrans.Tensor)
namespace
nts
{
// namespace nts(NiuTrans.Tensor)
/* case 1: set the cell to the ascending order along a given dimension.
/* case 1: set the cell to the ascending order along a given dimension.
*/
*/
bool
TestSetAscendingOrder1
()
bool
TestSetAscendingOrder1
()
...
@@ -92,7 +93,6 @@ TODO!!
...
@@ -92,7 +93,6 @@ TODO!!
*/
*/
/* test for SetAscendingOrder Function */
/* test for SetAscendingOrder Function */
extern
"C"
bool
TestSetAscendingOrder
()
bool
TestSetAscendingOrder
()
{
{
XPRINT
(
0
,
stdout
,
"[TEST SetAscendingOrder] set the cell to the ascending order along a given dimension
\n
"
);
XPRINT
(
0
,
stdout
,
"[TEST SetAscendingOrder] set the cell to the ascending order along a given dimension
\n
"
);
...
...
source/test/TSetData.cpp
查看文件 @
0887fae1
...
@@ -22,8 +22,8 @@
...
@@ -22,8 +22,8 @@
#include "TSetData.h"
#include "TSetData.h"
namespace
nts
{
// namespace nts(NiuTrans.Tensor)
namespace
nts
{
// namespace nts(NiuTrans.Tensor)
/* case 1: set the cell to the ascending order along a given dimension.
*/
/* case 1: set the cell to the ascending order along a given dimension.
*/
bool
TestSetData1
()
bool
TestSetData1
()
{
{
/* a input tensor of size (2, 4) */
/* a input tensor of size (2, 4) */
...
@@ -83,7 +83,6 @@ TODO!!
...
@@ -83,7 +83,6 @@ TODO!!
*/
*/
/* test for SetData Function */
/* test for SetData Function */
extern
"C"
bool
TestSetData
()
bool
TestSetData
()
{
{
XPRINT
(
0
,
stdout
,
"[TEST SetData] set the data of tensor
\n
"
);
XPRINT
(
0
,
stdout
,
"[TEST SetData] set the data of tensor
\n
"
);
...
...
source/test/TSigmoid.cpp
查看文件 @
0887fae1
...
@@ -23,9 +23,11 @@
...
@@ -23,9 +23,11 @@
#include "TSigmoid.h"
#include "TSigmoid.h"
namespace
nts
{
// namespace nts(NiuTrans.Tensor)
namespace
nts
{
// namespace nts(NiuTrans.Tensor)
/* case 1: test Sigmoid function and SigmoidBackward function.
* sigmoid function: y = 1/(1+exp(-x))
/*
* backward computation: dE/ds = dE/dy * dy/dx
case 1: test Sigmoid function and SigmoidBackward function.
sigmoid function: y = 1/(1+exp(-x))
backward computation: dE/ds = dE/dy * dy/dx
*/
*/
bool
TestSigmoid1
()
bool
TestSigmoid1
()
{
{
...
@@ -124,9 +126,10 @@ bool TestSigmoid1()
...
@@ -124,9 +126,10 @@ bool TestSigmoid1()
#endif // USE_CUDA
#endif // USE_CUDA
}
}
/* case 2: test Sigmoid function and SigmoidBackward function.
/*
* sigmoid function: y = 1/(1+exp(-x))
case 2: test Sigmoid function and SigmoidBackward function.
* backward computation: dE/ds = dE/dy * dy/dx
sigmoid function: y = 1/(1+exp(-x))
backward computation: dE/ds = dE/dy * dy/dx
*/
*/
bool
TestSigmoid2
()
bool
TestSigmoid2
()
{
{
...
@@ -234,7 +237,6 @@ bool TestSigmoid2()
...
@@ -234,7 +237,6 @@ bool TestSigmoid2()
*/
*/
/* test for Sigmoid Function */
/* test for Sigmoid Function */
extern
"C"
bool
TestSigmoid
()
bool
TestSigmoid
()
{
{
XPRINT
(
0
,
stdout
,
"[TEST SIGMOID] sigmoid function and its backward computation
\n
"
);
XPRINT
(
0
,
stdout
,
"[TEST SIGMOID] sigmoid function and its backward computation
\n
"
);
...
...
source/test/TSoftmax.cpp
查看文件 @
0887fae1
...
@@ -24,8 +24,10 @@
...
@@ -24,8 +24,10 @@
#include "TSoftmax.h"
#include "TSoftmax.h"
namespace
nts
{
// namespace nts(NiuTrans.Tensor)
namespace
nts
{
// namespace nts(NiuTrans.Tensor)
/* case 1: test Softmax function.
* softmax function: y = e^x / \sum_{i} e^{x_i}
/*
case 1: test Softmax function.
softmax function: y = e^x / \sum_{i} e^{x_i}
*/
*/
bool
TestSoftmax1
()
bool
TestSoftmax1
()
{
{
...
@@ -96,8 +98,9 @@ bool TestSoftmax1()
...
@@ -96,8 +98,9 @@ bool TestSoftmax1()
#endif // USE_CUDA
#endif // USE_CUDA
}
}
/* case 2: test SoftmaxBackward function.
/*
* SoftmaxBackward function: dE/dx_j = -gold_j + y_j
case 2: test SoftmaxBackward function.
SoftmaxBackward function: dE/dx_j = -gold_j + y_j
*/
*/
bool
TestSoftmax2
()
bool
TestSoftmax2
()
{
{
...
@@ -200,7 +203,6 @@ bool TestSoftmax2()
...
@@ -200,7 +203,6 @@ bool TestSoftmax2()
*/
*/
/* test for Softmax Function */
/* test for Softmax Function */
extern
"C"
bool
TestSoftmax
()
bool
TestSoftmax
()
{
{
XPRINT
(
0
,
stdout
,
"[TEST SOFTMAX] softmax function and its backward computation
\n
"
);
XPRINT
(
0
,
stdout
,
"[TEST SOFTMAX] softmax function and its backward computation
\n
"
);
...
...
source/test/TSort.cpp
查看文件 @
0887fae1
...
@@ -22,7 +22,8 @@
...
@@ -22,7 +22,8 @@
#include "TSort.h"
#include "TSort.h"
namespace
nts
{
// namespace nts(NiuTrans.Tensor)
namespace
nts
{
// namespace nts(NiuTrans.Tensor)
/* case 1: sort the tensor along a given dimension*/
/* case 1: sort the tensor along a given dimension */
bool
TestSort1
()
bool
TestSort1
()
{
{
/* a tensor of size (2, 4) */
/* a tensor of size (2, 4) */
...
@@ -35,10 +36,10 @@ bool TestSort1()
...
@@ -35,10 +36,10 @@ bool TestSort1()
for
(
int
i
=
0
;
i
<
order
;
i
++
)
for
(
int
i
=
0
;
i
<
order
;
i
++
)
unitNum
*=
dimSize
[
i
];
unitNum
*=
dimSize
[
i
];
DTYPE
aData
[
2
][
4
]
=
{
{
0.0
F
,
1.0
F
,
2.0
F
,
3.0
F
},
DTYPE
aData
[
2
][
4
]
=
{
{
0.0
F
,
1.0
F
,
2.0
F
,
3.0
F
},
{
4.0
F
,
5.0
F
,
6.0
F
,
7.0
F
}
};
{
4.0
F
,
5.0
F
,
6.0
F
,
7.0
F
}
};
DTYPE
answer
[
2
][
4
]
=
{
{
4.0
F
,
5.0
F
,
6.0
F
,
7.0
F
},
DTYPE
answer
[
2
][
4
]
=
{
{
4.0
F
,
5.0
F
,
6.0
F
,
7.0
F
},
{
0.0
F
,
1.0
F
,
2.0
F
,
3.0
F
}
};
{
0.0
F
,
1.0
F
,
2.0
F
,
3.0
F
}
};
/* CPU test */
/* CPU test */
bool
cpuTest
=
true
;
bool
cpuTest
=
true
;
...
@@ -104,10 +105,10 @@ bool TestSort2()
...
@@ -104,10 +105,10 @@ bool TestSort2()
for
(
int
i
=
0
;
i
<
order
;
i
++
)
for
(
int
i
=
0
;
i
<
order
;
i
++
)
unitNum
*=
dimSize
[
i
];
unitNum
*=
dimSize
[
i
];
DTYPE
aData
[
2
][
4
]
=
{
{
0.0
,
1.0
,
2.0
,
3.0
},
DTYPE
aData
[
2
][
4
]
=
{
{
0.0
F
,
1.0
F
,
2.0
F
,
3.0
F
},
{
4.0
,
5.0
,
6.0
,
7.0
}
};
{
4.0
F
,
5.0
F
,
6.0
F
,
7.0
F
}
};
DTYPE
answer
[
2
][
4
]
=
{
{
3.0
,
2.0
,
1.0
,
0.0
},
DTYPE
answer
[
2
][
4
]
=
{
{
3.0
F
,
2.0
F
,
1.0
F
,
0.0
F
},
{
7.0
,
6.0
,
5.0
,
4.0
}
};
{
7.0
F
,
6.0
F
,
5.0
F
,
4.0
F
}
};
/* CPU test */
/* CPU test */
bool
cpuTest
=
true
;
bool
cpuTest
=
true
;
...
@@ -166,7 +167,6 @@ TODO!!
...
@@ -166,7 +167,6 @@ TODO!!
*/
*/
/* test for Sort Function */
/* test for Sort Function */
extern
"C"
bool
TestSort
()
bool
TestSort
()
{
{
XPRINT
(
0
,
stdout
,
"[TEST SORT] sort the tensor along a given dimension
\n
"
);
XPRINT
(
0
,
stdout
,
"[TEST SORT] sort the tensor along a given dimension
\n
"
);
...
...
source/test/TSplit.cpp
查看文件 @
0887fae1
...
@@ -19,18 +19,17 @@
...
@@ -19,18 +19,17 @@
* $Created by: Lin Ye (email: linye2015@outlook.com) 2018-06-13
* $Created by: Lin Ye (email: linye2015@outlook.com) 2018-06-13
*/
*/
#include "../XTensor.h"
#include "TSplit.h"
#include "../XDevice.h"
#include "../core/Split.h"
#include "../XList.h"
namespace
nts
{
// namespace nt(NiuTrans.Tensor)
namespace
nts
{
// namespace nt(NiuTrans.Tensor)
/* case 1: transform a tensor by splitting it, e.g., (N, M) -> (N/3, M, 3)
* In this case, 4 * 3 -> 2 * 2 * 3, whereToSplit=0, splitNum=2.
/*
case 1: transform a tensor by splitting it, e.g., (N, M) -> (N/3, M, 3)
In this case, (4, 3) -> (2, 2, 3), whereToSplit=0, splitNum=2.
*/
*/
bool
TestSplit1
()
bool
TestSplit1
()
{
{
/* a source tensor of size
4 * 3
*/
/* a source tensor of size
(4, 3)
*/
int
sOrder
=
2
;
int
sOrder
=
2
;
int
*
sDimSize
=
new
int
[
sOrder
];
int
*
sDimSize
=
new
int
[
sOrder
];
sDimSize
[
0
]
=
4
;
sDimSize
[
0
]
=
4
;
...
@@ -40,7 +39,7 @@ bool TestSplit1()
...
@@ -40,7 +39,7 @@ bool TestSplit1()
for
(
int
i
=
0
;
i
<
sOrder
;
i
++
)
for
(
int
i
=
0
;
i
<
sOrder
;
i
++
)
sUnitNum
*=
sDimSize
[
i
];
sUnitNum
*=
sDimSize
[
i
];
/* a target tensor of size
2 * 2 * 3
*/
/* a target tensor of size
(2, 2, 3)
*/
int
tOrder
=
3
;
int
tOrder
=
3
;
int
*
tDimSize
=
new
int
[
tOrder
];
int
*
tDimSize
=
new
int
[
tOrder
];
tDimSize
[
0
]
=
2
;
tDimSize
[
0
]
=
2
;
...
@@ -109,12 +108,13 @@ bool TestSplit1()
...
@@ -109,12 +108,13 @@ bool TestSplit1()
#endif // USE_CUDA
#endif // USE_CUDA
}
}
/* case 2: transform a tensor by splitting it, e.g., (N, M) -> (N/3, M, 3)
/*
* In this case, 3 * 4 -> 2 * 3 * 2, whereToSplit=1, splitNum=2.
case 2: transform a tensor by splitting it, e.g., (N, M) -> (N/3, M, 3)
In this case, (3, 4) -> (2, 3, 2), whereToSplit=1, splitNum=2.
*/
*/
bool
TestSplit2
()
bool
TestSplit2
()
{
{
/* a source tensor of size
3 * 4
*/
/* a source tensor of size
(3, 4)
*/
int
sOrder
=
2
;
int
sOrder
=
2
;
int
*
sDimSize
=
new
int
[
sOrder
];
int
*
sDimSize
=
new
int
[
sOrder
];
sDimSize
[
0
]
=
3
;
sDimSize
[
0
]
=
3
;
...
@@ -124,7 +124,7 @@ bool TestSplit2()
...
@@ -124,7 +124,7 @@ bool TestSplit2()
for
(
int
i
=
0
;
i
<
sOrder
;
i
++
)
for
(
int
i
=
0
;
i
<
sOrder
;
i
++
)
sUnitNum
*=
sDimSize
[
i
];
sUnitNum
*=
sDimSize
[
i
];
/* a target tensor of size
2 * 3 * 2
*/
/* a target tensor of size
(2, 3, 2)
*/
int
tOrder
=
3
;
int
tOrder
=
3
;
int
*
tDimSize
=
new
int
[
tOrder
];
int
*
tDimSize
=
new
int
[
tOrder
];
tDimSize
[
0
]
=
2
;
tDimSize
[
0
]
=
2
;
...
@@ -194,8 +194,9 @@ bool TestSplit2()
...
@@ -194,8 +194,9 @@ bool TestSplit2()
#endif // USE_CUDA
#endif // USE_CUDA
}
}
/* case 3: split a big tensor into small tensors
/*
* In this case, 3 * 4 -> 2 * (3 * 2) , whereToSplit=1, splitNum=2.
case 3: split a big tensor into small tensors
In this case, (3, 4) -> 2 * (3, 2) , whereToSplit=1, splitNum=2.
*/
*/
bool
TestSplit3
()
bool
TestSplit3
()
{
{
...
@@ -203,7 +204,7 @@ bool TestSplit3()
...
@@ -203,7 +204,7 @@ bool TestSplit3()
XList
tList
;
XList
tList
;
tList
=
XList
();
tList
=
XList
();
/* a source tensor of size (3
*
4) */
/* a source tensor of size (3
,
4) */
int
sOrder
=
2
;
int
sOrder
=
2
;
int
*
sDimSize
=
new
int
[
sOrder
];
int
*
sDimSize
=
new
int
[
sOrder
];
sDimSize
[
0
]
=
3
;
sDimSize
[
0
]
=
3
;
...
@@ -213,7 +214,7 @@ bool TestSplit3()
...
@@ -213,7 +214,7 @@ bool TestSplit3()
for
(
int
i
=
0
;
i
<
sOrder
;
i
++
)
for
(
int
i
=
0
;
i
<
sOrder
;
i
++
)
sUnitNum
*=
sDimSize
[
i
];
sUnitNum
*=
sDimSize
[
i
];
/* a target tensor of size (3
*
2) */
/* a target tensor of size (3
,
2) */
int
tOrder1
=
2
;
int
tOrder1
=
2
;
int
*
tDimSize1
=
new
int
[
tOrder1
];
int
*
tDimSize1
=
new
int
[
tOrder1
];
tDimSize1
[
0
]
=
3
;
tDimSize1
[
0
]
=
3
;
...
@@ -313,10 +314,9 @@ TODO!!
...
@@ -313,10 +314,9 @@ TODO!!
*/
*/
/* test for Split Function */
/* test for Split Function */
extern
"C"
bool
TestSplit
()
bool
TestSplit
()
{
{
XPRINT
(
0
,
stdout
,
"[TEST SPLIT]
-------------
\n
"
);
XPRINT
(
0
,
stdout
,
"[TEST SPLIT]
split a big tensor into small tensors
\n
"
);
bool
returnFlag
=
true
,
caseFlag
=
true
;
bool
returnFlag
=
true
,
caseFlag
=
true
;
/* case 1 test */
/* case 1 test */
...
...
source/test/TSum.cpp
查看文件 @
0887fae1
...
@@ -22,7 +22,8 @@
...
@@ -22,7 +22,8 @@
#include "TSum.h"
#include "TSum.h"
namespace
nts
{
// namespace nts(NiuTrans.Tensor)
namespace
nts
{
// namespace nts(NiuTrans.Tensor)
/* case 1 */
/* case 1: tensor summation c = a + b * \beta */
bool
TestSum1
()
bool
TestSum1
()
{
{
/* a tensor of size (2, 4) */
/* a tensor of size (2, 4) */
...
@@ -35,12 +36,12 @@ bool TestSum1()
...
@@ -35,12 +36,12 @@ bool TestSum1()
for
(
int
i
=
0
;
i
<
order
;
i
++
)
for
(
int
i
=
0
;
i
<
order
;
i
++
)
unitNum
*=
dimSize
[
i
];
unitNum
*=
dimSize
[
i
];
DTYPE
aData
[
2
][
4
]
=
{
{
0.0
,
1.0
,
2.0
,
3.0
},
DTYPE
aData
[
2
][
4
]
=
{
{
0.0
F
,
1.0
F
,
2.0
F
,
3.0
F
},
{
4.0
,
5.0
,
6.0
,
7.0
}
};
{
4.0
F
,
5.0
F
,
6.0
F
,
7.0
F
}
};
DTYPE
bData
[
2
][
4
]
=
{
{
1.0
,
-
1.0
,
-
3.0
,
-
5.0
},
DTYPE
bData
[
2
][
4
]
=
{
{
1.0
F
,
-
1.0
F
,
-
3.0
F
,
-
5.0
F
},
{
-
7.0
,
-
9.0
,
-
11.0
,
-
13.0
}
};
{
-
7.0
F
,
-
9.0
F
,
-
11.0
F
,
-
13.0
F
}
};
DTYPE
answer
[
2
][
4
]
=
{
{
1.0
,
0.0
,
-
1.0
,
-
2.0
},
DTYPE
answer
[
2
][
4
]
=
{
{
1.0
F
,
0.0
F
,
-
1.0
F
,
-
2.0
F
},
{
-
3.0
,
-
4.0
,
-
5.0
,
-
6.0
}
};
{
-
3.0
F
,
-
4.0
F
,
-
5.0
F
,
-
6.0
F
}
};
/* CPU test */
/* CPU test */
bool
cpuTest
=
true
;
bool
cpuTest
=
true
;
...
@@ -95,7 +96,7 @@ bool TestSum1()
...
@@ -95,7 +96,7 @@ bool TestSum1()
#endif // USE_CUDA
#endif // USE_CUDA
}
}
/* case 2 */
/* case 2
: tensor summation c = a + b * \beta
*/
bool
TestSum2
()
bool
TestSum2
()
{
{
/* a tensor of size (2, 4) */
/* a tensor of size (2, 4) */
...
@@ -108,12 +109,12 @@ bool TestSum2()
...
@@ -108,12 +109,12 @@ bool TestSum2()
for
(
int
i
=
0
;
i
<
order
;
i
++
)
{
for
(
int
i
=
0
;
i
<
order
;
i
++
)
{
unitNum
*=
dimSize
[
i
];
unitNum
*=
dimSize
[
i
];
}
}
DTYPE
aData
[
2
][
4
]
=
{
{
0.0
,
1.0
,
2.0
,
3.0
},
DTYPE
aData
[
2
][
4
]
=
{
{
0.0
F
,
1.0
F
,
2.0
F
,
3.0
F
},
{
4.0
,
5.0
,
6.0
,
7.0
}
};
{
4.0
F
,
5.0
F
,
6.0
F
,
7.0
F
}
};
DTYPE
bData
[
2
][
4
]
=
{
{
1.0
,
-
1.0
,
-
3.0
,
-
5.0
},
DTYPE
bData
[
2
][
4
]
=
{
{
1.0
F
,
-
1.0
F
,
-
3.0
F
,
-
5.0
F
},
{
-
7.0
,
-
9.0
,
-
11.0
,
-
13.0
}
};
{
-
7.0
F
,
-
9.0
F
,
-
11.0
F
,
-
13.0
F
}
};
DTYPE
answer
[
2
][
4
]
=
{
{
0.5
,
0.5
,
0.5
,
0.5
},
DTYPE
answer
[
2
][
4
]
=
{
{
0.5
F
,
0.5
F
,
0.5
F
,
0.5
F
},
{
0.5
,
0.5
,
0.5
,
0.5
}
};
{
0.5
F
,
0.5
F
,
0.5
F
,
0.5
F
}
};
float
beta
=
0.5
F
;
float
beta
=
0.5
F
;
/* CPU test */
/* CPU test */
...
@@ -129,7 +130,7 @@ bool TestSum2()
...
@@ -129,7 +130,7 @@ bool TestSum2()
b
->
SetData
(
bData
,
unitNum
);
b
->
SetData
(
bData
,
unitNum
);
c
->
SetZeroAll
();
c
->
SetZeroAll
();
/* call
s
um function */
/* call
S
um function */
Sum
(
a
,
b
,
c
,
beta
);
Sum
(
a
,
b
,
c
,
beta
);
/* check results */
/* check results */
...
@@ -149,7 +150,7 @@ bool TestSum2()
...
@@ -149,7 +150,7 @@ bool TestSum2()
bGPU
->
SetData
(
bData
,
unitNum
);
bGPU
->
SetData
(
bData
,
unitNum
);
cGPU
->
SetZeroAll
();
cGPU
->
SetZeroAll
();
/* call
s
um function */
/* call
S
um function */
Sum
(
aGPU
,
bGPU
,
cGPU
,
beta
);
Sum
(
aGPU
,
bGPU
,
cGPU
,
beta
);
/* check results */
/* check results */
...
@@ -182,8 +183,7 @@ bool TestSum2()
...
@@ -182,8 +183,7 @@ bool TestSum2()
*/
*/
/* test for Sum Function */
/* test for Sum Function */
extern
"C"
bool
TestSum
()
bool
TestSum
()
{
{
XPRINT
(
0
,
stdout
,
"[TEST SUM] tensor summation c = a + b * beta
\n
"
);
XPRINT
(
0
,
stdout
,
"[TEST SUM] tensor summation c = a + b * beta
\n
"
);
bool
returnFlag
=
true
,
caseFlag
=
true
;
bool
returnFlag
=
true
,
caseFlag
=
true
;
...
...
source/test/TSumByColumnTV.cpp
查看文件 @
0887fae1
...
@@ -22,9 +22,10 @@
...
@@ -22,9 +22,10 @@
#include "TSumByColumnTV.h"
#include "TSumByColumnTV.h"
namespace
nts
{
// namespace nts(NiuTrans.Tensor)
namespace
nts
{
// namespace nts(NiuTrans.Tensor)
/* case 1: test SumByColumnTV function
* sum of a tensor and a vector (column vector)
/*
* in a column by column manner
case 1: test SumByColumnTV function
sum of a tensor and a vector (column vector) in a column by column manner
*/
*/
bool
TestSumByColumnTV1
()
bool
TestSumByColumnTV1
()
{
{
...
@@ -58,12 +59,12 @@ bool TestSumByColumnTV1()
...
@@ -58,12 +59,12 @@ bool TestSumByColumnTV1()
for
(
int
i
=
0
;
i
<
cOrder
;
i
++
)
for
(
int
i
=
0
;
i
<
cOrder
;
i
++
)
cUnitNum
*=
cDimSize
[
i
];
cUnitNum
*=
cDimSize
[
i
];
DTYPE
aData
[
2
][
4
]
=
{
{
0.0
,
1.0
,
2.0
,
3.0
},
DTYPE
aData
[
2
][
4
]
=
{
{
0.0
F
,
1.0
F
,
2.0
F
,
3.0
F
},
{
4.0
,
5.0
,
6.0
,
7.0
}
};
{
4.0
F
,
5.0
F
,
6.0
F
,
7.0
F
}
};
DTYPE
bData
[
2
][
1
]
=
{
{
1.0
},
DTYPE
bData
[
2
][
1
]
=
{
{
1.0
F
},
{
0.0
}
};
{
0.0
F
}
};
DTYPE
answer
[
2
][
4
]
=
{
{
1.0
,
2.0
,
3.0
,
4.0
},
DTYPE
answer
[
2
][
4
]
=
{
{
1.0
F
,
2.0
F
,
3.0
F
,
4.0
F
},
{
4.0
,
5.0
,
6.0
,
7.0
}
};
{
4.0
F
,
5.0
F
,
6.0
F
,
7.0
F
}
};
/* CPU test */
/* CPU test */
bool
cpuTest
=
true
;
bool
cpuTest
=
true
;
...
@@ -128,9 +129,9 @@ bool TestSumByColumnTV1()
...
@@ -128,9 +129,9 @@ bool TestSumByColumnTV1()
#endif // USE_CUDA
#endif // USE_CUDA
}
}
/*
case 2: test SumByColumnTV function
/*
* sum of a tensor and a vector (column vector)
case 2: test SumByColumnTV function
*
in a column by column manner
sum of a tensor and a vector (column vector)
in a column by column manner
*/
*/
bool
TestSumByColumnTV2
()
bool
TestSumByColumnTV2
()
{
{
...
@@ -154,12 +155,12 @@ bool TestSumByColumnTV2()
...
@@ -154,12 +155,12 @@ bool TestSumByColumnTV2()
for
(
int
i
=
0
;
i
<
bOrder
;
i
++
)
for
(
int
i
=
0
;
i
<
bOrder
;
i
++
)
bUnitNum
*=
bDimSize
[
i
];
bUnitNum
*=
bDimSize
[
i
];
DTYPE
aData
[
2
][
4
]
=
{
{
0.0
,
1.0
,
2.0
,
3.0
},
DTYPE
aData
[
2
][
4
]
=
{
{
0.0
F
,
1.0
F
,
2.0
F
,
3.0
F
},
{
4.0
,
5.0
,
6.0
,
7.0
}
};
{
4.0
F
,
5.0
F
,
6.0
F
,
7.0
F
}
};
DTYPE
bData
[
2
][
1
]
=
{
{
1.0
},
DTYPE
bData
[
2
][
1
]
=
{
{
1.0
F
},
{
0.0
}
};
{
0.0
F
}
};
DTYPE
answer
[
2
][
4
]
=
{
{
1.0
,
2.0
,
3.0
,
4.0
},
DTYPE
answer
[
2
][
4
]
=
{
{
1.0
F
,
2.0
F
,
3.0
F
,
4.0
F
},
{
4.0
,
5.0
,
6.0
,
7.0
}
};
{
4.0
F
,
5.0
F
,
6.0
F
,
7.0
F
}
};
/* CPU test */
/* CPU test */
bool
cpuTest
=
true
;
bool
cpuTest
=
true
;
...
@@ -222,7 +223,6 @@ bool TestSumByColumnTV2()
...
@@ -222,7 +223,6 @@ bool TestSumByColumnTV2()
*/
*/
/* test for SumByColumnTV Function */
/* test for SumByColumnTV Function */
extern
"C"
bool
TestSumByColumnTV
()
bool
TestSumByColumnTV
()
{
{
XPRINT
(
0
,
stdout
,
"[TEST SumByColumnTV] sum of a tensor and a vector (column vector) in a column by column manner
\n
"
);
XPRINT
(
0
,
stdout
,
"[TEST SumByColumnTV] sum of a tensor and a vector (column vector) in a column by column manner
\n
"
);
...
...
source/test/TSumByColumnVT.cpp
查看文件 @
0887fae1
...
@@ -22,9 +22,10 @@
...
@@ -22,9 +22,10 @@
#include "TSumByColumnVT.h"
#include "TSumByColumnVT.h"
namespace
nts
{
// namespace nts(NiuTrans.Tensor)
namespace
nts
{
// namespace nts(NiuTrans.Tensor)
/* case 1: test SumByColumnVT function
* sum of a vector (column vector) and a tensor
/*
* in a column by column manner
case 1: test SumByColumnVT function
sum of a vector (column vector) and a tensor in a column by column manner
*/
*/
bool
TestSumByColumnVT1
()
bool
TestSumByColumnVT1
()
{
{
...
@@ -58,12 +59,12 @@ bool TestSumByColumnVT1()
...
@@ -58,12 +59,12 @@ bool TestSumByColumnVT1()
for
(
int
i
=
0
;
i
<
cOrder
;
i
++
)
for
(
int
i
=
0
;
i
<
cOrder
;
i
++
)
cUnitNum
*=
cDimSize
[
i
];
cUnitNum
*=
cDimSize
[
i
];
DTYPE
aData
[
2
][
1
]
=
{
{
1.0
},
DTYPE
aData
[
2
][
1
]
=
{
{
1.0
F
},
{
0.0
}
};
{
0.0
F
}
};
DTYPE
bData
[
2
][
4
]
=
{
{
0.0
,
1.0
,
2.0
,
3.0
},
DTYPE
bData
[
2
][
4
]
=
{
{
0.0
F
,
1.0
F
,
2.0
F
,
3.0
F
},
{
4.0
,
5.0
,
6.0
,
7.0
}
};
{
4.0
F
,
5.0
F
,
6.0
F
,
7.0
F
}
};
DTYPE
answer
[
2
][
1
]
=
{
{
7.0
},
DTYPE
answer
[
2
][
1
]
=
{
{
7.0
F
},
{
22.0
}
};
{
22.0
F
}
};
/* CPU test */
/* CPU test */
bool
cpuTest
=
true
;
bool
cpuTest
=
true
;
...
@@ -129,9 +130,9 @@ bool TestSumByColumnVT1()
...
@@ -129,9 +130,9 @@ bool TestSumByColumnVT1()
#endif // USE_CUDA
#endif // USE_CUDA
}
}
/*
case 2: test SumByColumnVT function
/*
* sum of a vector (column vector) and a tensor
case 2: test SumByColumnVT function
*
in a column by column manner
sum of a vector (column vector) and a tensor
in a column by column manner
*/
*/
bool
TestSumByColumnVT2
()
bool
TestSumByColumnVT2
()
{
{
...
@@ -155,12 +156,12 @@ bool TestSumByColumnVT2()
...
@@ -155,12 +156,12 @@ bool TestSumByColumnVT2()
for
(
int
i
=
0
;
i
<
bOrder
;
i
++
)
for
(
int
i
=
0
;
i
<
bOrder
;
i
++
)
bUnitNum
*=
bDimSize
[
i
];
bUnitNum
*=
bDimSize
[
i
];
DTYPE
aData
[
2
][
1
]
=
{
{
1.0
},
DTYPE
aData
[
2
][
1
]
=
{
{
1.0
F
},
{
0.0
}
};
{
0.0
F
}
};
DTYPE
bData
[
2
][
4
]
=
{
{
0.0
,
1.0
,
2.0
,
3.0
},
DTYPE
bData
[
2
][
4
]
=
{
{
0.0
F
,
1.0
F
,
2.0
F
,
3.0
F
},
{
4.0
,
5.0
,
6.0
,
7.0
}
};
{
4.0
F
,
5.0
F
,
6.0
F
,
7.0
F
}
};
DTYPE
answer
[
2
][
1
]
=
{
{
7.0
},
DTYPE
answer
[
2
][
1
]
=
{
{
7.0
F
},
{
22.0
}
};
{
22.0
F
}
};
/* CPU test */
/* CPU test */
bool
cpuTest
=
true
;
bool
cpuTest
=
true
;
...
@@ -223,7 +224,6 @@ bool TestSumByColumnVT2()
...
@@ -223,7 +224,6 @@ bool TestSumByColumnVT2()
*/
*/
/* test for SumByColumnVT Function */
/* test for SumByColumnVT Function */
extern
"C"
bool
TestSumByColumnVT
()
bool
TestSumByColumnVT
()
{
{
XPRINT
(
0
,
stdout
,
"[TEST SumByColumnVT] sum of a vector (column vector) and a tensor in a column by column manner
\n
"
);
XPRINT
(
0
,
stdout
,
"[TEST SumByColumnVT] sum of a vector (column vector) and a tensor in a column by column manner
\n
"
);
...
...
source/test/TTopK.cpp
查看文件 @
0887fae1
...
@@ -22,10 +22,12 @@
...
@@ -22,10 +22,12 @@
#include "TTopK.h"
#include "TTopK.h"
namespace
nts
{
// namespace nts(NiuTrans.Tensor)
namespace
nts
{
// namespace nts(NiuTrans.Tensor)
/* case 1: get the top-k items along a given dimension.
* In this case,
/*
* (2, 4) -> (2, 4), dim = 0, k = 2
case 1: get the top-k items along a given dimension.
* (2, 4) -> (2, 4), dim = 1, k = 4
In this case,
(2, 4) -> (2, 4), dim = 0, k = 2
(2, 4) -> (2, 4), dim = 1, k = 4
*/
*/
bool
TestTopK1
()
bool
TestTopK1
()
{
{
...
@@ -49,16 +51,16 @@ bool TestTopK1()
...
@@ -49,16 +51,16 @@ bool TestTopK1()
for
(
int
i
=
0
;
i
<
tOrder
;
i
++
)
for
(
int
i
=
0
;
i
<
tOrder
;
i
++
)
tUnitNum
*=
tDimSize
[
i
];
tUnitNum
*=
tDimSize
[
i
];
DTYPE
sData
[
2
][
4
]
=
{
{
5.0
,
1.0
,
2.0
,
8.0
},
DTYPE
sData
[
2
][
4
]
=
{
{
5.0
F
,
1.0
F
,
2.0
F
,
8.0
F
},
{
4.0
,
3.0
,
7.0
,
6.0
}
};
{
4.0
F
,
3.0
F
,
7.0
F
,
6.0
F
}
};
DTYPE
tAnswer1
[
2
][
4
]
=
{
{
5.0
,
3.0
,
7.0
,
8.0
},
DTYPE
tAnswer1
[
2
][
4
]
=
{
{
5.0
F
,
3.0
F
,
7.0
F
,
8.0
F
},
{
4.0
,
1.0
,
2.0
,
6.0
}
};
{
4.0
F
,
1.0
F
,
2.0
F
,
6.0
F
}
};
int
indexAnswer1
[
2
][
4
]
=
{
{
0
,
1
,
1
,
0
},
int
indexAnswer1
[
2
][
4
]
=
{
{
0
,
1
,
1
,
0
},
{
1
,
0
,
0
,
1
}
};
{
1
,
0
,
0
,
1
}
};
DTYPE
tAnswer2
[
2
][
4
]
=
{
{
8.0
,
5.0
,
2.0
,
1.0
},
DTYPE
tAnswer2
[
2
][
4
]
=
{
{
8.0
F
,
5.0
F
,
2.0
F
,
1.0
F
},
{
7.0
,
6.0
,
4.0
,
3.0
}
};
{
7.0
F
,
6.0
F
,
4.0
F
,
3.0
F
}
};
int
indexAnswer2
[
2
][
4
]
=
{
{
3
,
0
,
2
,
1
},
int
indexAnswer2
[
2
][
4
]
=
{
{
3
,
0
,
2
,
1
},
{
2
,
3
,
0
,
1
}
};
{
2
,
3
,
0
,
1
}
};
...
@@ -156,9 +158,9 @@ bool TestTopK1()
...
@@ -156,9 +158,9 @@ bool TestTopK1()
#endif // USE_CUDA
#endif // USE_CUDA
}
}
/*
case 2: get the top-k items along a given dimension.
/*
* In this case,
case 2: get the top-k items along a given dimension.
* (2, 4) -> (2, 2), dim = 1, k = 2
In this case, (2, 4) -> (2, 2), dim = 1, k = 2.
*/
*/
bool
TestTopK2
()
bool
TestTopK2
()
{
{
...
@@ -182,10 +184,10 @@ bool TestTopK2()
...
@@ -182,10 +184,10 @@ bool TestTopK2()
for
(
int
i
=
0
;
i
<
tOrder
;
i
++
)
for
(
int
i
=
0
;
i
<
tOrder
;
i
++
)
tUnitNum
*=
tDimSize
[
i
];
tUnitNum
*=
tDimSize
[
i
];
DTYPE
sData
[
2
][
4
]
=
{
{
5.0
,
1.0
,
2.0
,
8.0
},
DTYPE
sData
[
2
][
4
]
=
{
{
5.0
F
,
1.0
F
,
2.0
F
,
8.0
F
},
{
4.0
,
3.0
,
7.0
,
6.0
}
};
{
4.0
F
,
3.0
F
,
7.0
F
,
6.0
F
}
};
DTYPE
tAnswer
[
2
][
2
]
=
{
{
8.0
,
5.0
},
DTYPE
tAnswer
[
2
][
2
]
=
{
{
8.0
F
,
5.0
F
},
{
7.0
,
6.0
}
};
{
7.0
F
,
6.0
F
}
};
int
indexAnswer
[
2
][
2
]
=
{
{
3
,
0
},
int
indexAnswer
[
2
][
2
]
=
{
{
3
,
0
},
{
2
,
3
}
};
{
2
,
3
}
};
...
@@ -255,14 +257,12 @@ bool TestTopK2()
...
@@ -255,14 +257,12 @@ bool TestTopK2()
#endif // USE_CUDA
#endif // USE_CUDA
}
}
/* other cases */
/* other cases */
/*
/*
TODO!!
TODO!!
*/
*/
/* test for TopK Function */
/* test for TopK Function */
extern
"C"
bool
TestTopK
()
bool
TestTopK
()
{
{
XPRINT
(
0
,
stdout
,
"[TEST TopK] get the top-k items along a given dimension
\n
"
);
XPRINT
(
0
,
stdout
,
"[TEST TopK] get the top-k items along a given dimension
\n
"
);
...
...
source/test/TUnsqueeze.cpp
查看文件 @
0887fae1
...
@@ -19,15 +19,16 @@
...
@@ -19,15 +19,16 @@
* $Created by: Xu Chen (email: hello_master1954@163.com) 2018-06-13
* $Created by: Xu Chen (email: hello_master1954@163.com) 2018-06-13
*/
*/
#include "../XTensor.h"
#include "../core/Unsqueeze.h"
#include "../XList.h"
#include "../XList.h"
#include "TUnsqueeze.h"
namespace
nts
{
// namespace nts(NiuTrans.Tensor)
namespace
nts
{
// namespace nts(NiuTrans.Tensor)
/* case 1: insert a dimension by copying the blocks for x times (where x is the size of the inerted dimension)
* In this case,
/*
* (2, 3) -> (2, 2, 3), dim=1, dSize=2
case 1: insert a dimension by copying the blocks for x times (where x is the size of the inerted dimension)
* (2, 3) -> (2, 3, 2), dim=2, dSize=2
In this case,
(2, 3) -> (2, 2, 3), dim=1, dSize=2
(2, 3) -> (2, 3, 2), dim=2, dSize=2
*/
*/
bool
TestUnsqueeze1
()
bool
TestUnsqueeze1
()
{
{
...
@@ -63,18 +64,18 @@ bool TestUnsqueeze1()
...
@@ -63,18 +64,18 @@ bool TestUnsqueeze1()
for
(
int
i
=
0
;
i
<
tOrder2
;
i
++
)
for
(
int
i
=
0
;
i
<
tOrder2
;
i
++
)
tUnitNum2
*=
tDimSize2
[
i
];
tUnitNum2
*=
tDimSize2
[
i
];
DTYPE
sData
[
2
][
3
]
=
{
{
0.0
,
1.0
,
2.0
},
DTYPE
sData
[
2
][
3
]
=
{
{
0.0
F
,
1.0
F
,
2.0
F
},
{
3.0
,
4.0
,
5.0
}
};
{
3.0
F
,
4.0
F
,
5.0
F
}
};
DTYPE
answer1
[
2
][
2
][
3
]
=
{
{
{
0.0
,
1.0
,
2.0
},
DTYPE
answer1
[
2
][
2
][
3
]
=
{
{
{
0.0
F
,
1.0
F
,
2.0
F
},
{
0.0
,
1.0
,
2.0
}
},
{
0.0
F
,
1.0
F
,
2.0
F
}
},
{
{
3.0
,
4.0
,
5.0
},
{
{
3.0
F
,
4.0
F
,
5.0
F
},
{
3.0
,
4.0
,
5.0
}
}
};
{
3.0
F
,
4.0
F
,
5.0
F
}
}
};
DTYPE
answer2
[
2
][
3
][
2
]
=
{
{
{
0.0
,
0.0
},
DTYPE
answer2
[
2
][
3
][
2
]
=
{
{
{
0.0
F
,
0.0
F
},
{
1.0
,
1.0
},
{
1.0
F
,
1.0
F
},
{
2.0
,
2.0
}
},
{
2.0
F
,
2.0
F
}
},
{
{
3.0
,
3.0
},
{
{
3.0
F
,
3.0
F
},
{
4.0
,
4.0
},
{
4.0
F
,
4.0
F
},
{
5.0
,
5.0
}
}
};
{
5.0
F
,
5.0
F
}
}
};
/* CPU test */
/* CPU test */
bool
cpuTest
=
true
;
bool
cpuTest
=
true
;
...
@@ -148,7 +149,6 @@ bool TestUnsqueeze1()
...
@@ -148,7 +149,6 @@ bool TestUnsqueeze1()
*/
*/
/* test for Unsqueeze Function */
/* test for Unsqueeze Function */
extern
"C"
bool
TestUnsqueeze
()
bool
TestUnsqueeze
()
{
{
XPRINT
(
0
,
stdout
,
"[TEST Unsqueeze] insert a dimension by copying the blocks for x times
\n
"
);
XPRINT
(
0
,
stdout
,
"[TEST Unsqueeze] insert a dimension by copying the blocks for x times
\n
"
);
...
...
source/test/TXMem.cpp
查看文件 @
0887fae1
...
@@ -19,14 +19,13 @@
...
@@ -19,14 +19,13 @@
* $Created by: XIAO Tong (xiaotong@mail.neu.edu.cn) 2018-6-24
* $Created by: XIAO Tong (xiaotong@mail.neu.edu.cn) 2018-6-24
*/
*/
#include "TXMem.h"
#include "../XGlobal.h"
#include "../XGlobal.h"
#include "../XUtility.h"
#include "../XUtility.h"
#include "
../
XMem.h"
#include "
T
XMem.h"
/* the nts (NiuTrans.Tensor) namespace */
namespace
nts
{
// namespace nts(NiuTrans.Tensor)
namespace
nts
{
/* case 1: test memory pool class */
bool
TestXMemCase1
()
bool
TestXMemCase1
()
{
{
bool
ok
=
true
;
bool
ok
=
true
;
...
@@ -83,6 +82,7 @@ bool TestXMemCase1()
...
@@ -83,6 +82,7 @@ bool TestXMemCase1()
return
ok
;
return
ok
;
}
}
/* test for memory pool class */
bool
TestXMem
()
bool
TestXMem
()
{
{
XPRINT
(
0
,
stdout
,
"[Test] Memory pool ... Began
\n
"
);
XPRINT
(
0
,
stdout
,
"[Test] Memory pool ... Began
\n
"
);
...
@@ -93,11 +93,18 @@ bool TestXMem()
...
@@ -93,11 +93,18 @@ bool TestXMem()
/* case 1 test */
/* case 1 test */
caseFlag
=
TestXMemCase1
();
caseFlag
=
TestXMemCase1
();
if
(
!
caseFlag
)
{
returnFlag
=
false
;
XPRINT
(
0
,
stdout
,
">> case 1 failed!
\n
"
);
}
if
(
!
caseFlag
)
{
else
{
XPRINT
(
0
,
stdout
,
">> case 1 passed!
\n
"
);}
returnFlag
=
false
;
XPRINT
(
0
,
stdout
,
">> case 1 failed!
\n
"
);
}
else
XPRINT
(
0
,
stdout
,
">> case 1 passed!
\n
"
);
if
(
returnFlag
)
{
XPRINT
(
0
,
stdout
,
">> All Passed!
\n
"
);
}
if
(
returnFlag
)
{
else
{
XPRINT
(
0
,
stdout
,
">> Failed!
\n
"
);
}
XPRINT
(
0
,
stdout
,
">> All Passed!
\n
"
);
}
else
XPRINT
(
0
,
stdout
,
">> Failed!
\n
"
);
double
endT
=
GetClock
();
double
endT
=
GetClock
();
...
@@ -106,4 +113,4 @@ bool TestXMem()
...
@@ -106,4 +113,4 @@ bool TestXMem()
return
returnFlag
;
return
returnFlag
;
}
}
}
/
* end of the nts (NiuTrans.Tensor) namespace */
}
/
/
namespace
nts
(
NiuTrans
.
Tensor
)
\ No newline at end of file
source/test/TXMem.h
查看文件 @
0887fae1
...
@@ -22,13 +22,13 @@
...
@@ -22,13 +22,13 @@
#ifndef __TXMEM_H__
#ifndef __TXMEM_H__
#define __TXMEM_H__
#define __TXMEM_H__
/* the nts (NiuTrans.Tensor) namespace */
#include "../XMem.h"
namespace
nts
{
namespace
nts
{
// namespace nts(NiuTrans.Tensor)
/* test for memory pool class */
/* test for memory pool class */
extern
"C"
extern
"C"
bool
TestXMem
();
bool
TestXMem
();
}
/* end of the nts (NiuTrans.Tensor) namespace */
}
// namespace nts(NiuTrans.Tensor)
#endif // __TXMEM_H__
#endif
source/test/Test.cpp
查看文件 @
0887fae1
...
@@ -62,7 +62,7 @@ bool Test()
...
@@ -62,7 +62,7 @@ bool Test()
wrong
=
!
TestXMem
()
||
wrong
;
wrong
=
!
TestXMem
()
||
wrong
;
//wrong = !TestHardTanH() || wrong;
//wrong = !TestHardTanH() || wrong;
wrong
=
!
TestIdentity
||
wrong
;
//wrong = !TestIdentity()
|| wrong;
//wrong = !TestLogSoftmax() || wrong;
//wrong = !TestLogSoftmax() || wrong;
//wrong = !TestLoss() || wrong;
//wrong = !TestLoss() || wrong;
//wrong = !TestRectify() || wrong;
//wrong = !TestRectify() || wrong;
...
...
编写
预览
Markdown
格式
0%
重试
或
添加新文件
添加附件
取消
您添加了
0
人
到此讨论。请谨慎行事。
请先完成此评论的编辑!
取消
请
注册
或者
登录
后发表评论