Skip to content
项目
群组
代码片段
帮助
当前项目
正在载入...
登录 / 注册
切换导航面板
N
NiuTrans.Tensor
概览
Overview
Details
Activity
Cycle Analytics
版本库
Repository
Files
Commits
Branches
Tags
Contributors
Graph
Compare
Charts
问题
0
Issues
0
列表
Board
标记
里程碑
合并请求
0
Merge Requests
0
CI / CD
CI / CD
流水线
作业
日程表
图表
维基
Wiki
代码片段
Snippets
成员
Collapse sidebar
Close sidebar
活动
图像
聊天
创建新问题
作业
提交
Issue Boards
Open sidebar
杨迪
NiuTrans.Tensor
Commits
0887fae1
Commit
0887fae1
authored
Jul 07, 2018
by
liyinqiao
Browse files
Options
Browse Files
Download
Email Patches
Plain Diff
Format correction.
parent
42f995ae
全部展开
隐藏空白字符变更
内嵌
并排
正在显示
102 个修改的文件
包含
984 行增加
和
1112 行删除
+984
-1112
source/core/CHeader.h
+5
-0
source/core/Concatenate.cpp
+4
-0
source/core/Concatenate.h
+2
-1
source/core/ConcatenateSolely.cpp
+4
-2
source/core/ConcatenateSolely.h
+0
-1
source/core/CopyBlocks.cpp
+4
-2
source/core/CopyBlocksOnSite.cpp
+5
-2
source/core/CopyInGrid.cpp
+1
-1
source/core/CopyIndexed.cpp
+1
-0
source/core/CopyValues.cuh
+0
-1
source/core/FlushToMem.cu
+0
-2
source/core/MakeSplitBlockIndex.cpp
+1
-0
source/core/MakeSplitBlockIndex.cu
+1
-0
source/core/MatrixMULBatchedCPU.cpp
+1
-5
source/core/MatrixMul.h
+1
-1
source/core/MatrixMul2D.cpp
+1
-1
source/core/MatrixMul2D.cu
+3
-3
source/core/MatrixMulBatched.cpp
+1
-0
source/core/Merge.cpp
+0
-1
source/core/MergeBlockLists.cpp
+6
-6
source/core/MergeBlockLists.cu
+1
-20
source/core/MultiplyElementWise.cpp
+1
-0
source/core/MultiplyElementWise.cu
+1
-0
source/core/Negate.cpp
+2
-2
source/core/Negate.cu
+4
-4
source/core/Normalize.cpp
+1
-0
source/core/Normalize.cu
+1
-0
source/core/Normalize.cuh
+4
-2
source/core/Power.cpp
+2
-1
source/core/Power.cu
+0
-6
source/core/ReduceMax.cu
+48
-59
source/core/ReduceMean.cpp
+0
-3
source/core/ReduceSum.cu
+0
-5
source/core/ReduceSumSquared.cpp
+0
-1
source/core/ReduceVariance.cpp
+0
-1
source/core/ScaleAndShift.cpp
+0
-2
source/core/ScaleAndShift.cu
+0
-2
source/core/Select.cpp
+1
-2
source/core/SetData.cpp
+5
-4
source/core/Sort.cpp
+1
-0
source/core/Sort.cuh
+1
-0
source/core/Split.h
+1
-0
source/core/Sum.cu
+1
-0
source/core/Sum.cuh
+1
-1
source/core/SumByColumnVT.cu
+1
-0
source/core/SumByColumnVT.h
+0
-1
source/core/TopK.cpp
+1
-0
source/core/TopK.cu
+9
-4
source/core/XMatrixSegment.cpp
+1
-1
source/core/XMatrixSegment.h
+1
-3
source/core/XTensorBLAS.cu
+3
-10
source/core/XTensorCore.h
+48
-6
source/function/HardTanH.cpp
+0
-1
source/function/HardTanH.cu
+0
-1
source/function/LogSoftmax.cpp
+0
-2
source/function/LogSoftmax.cu
+8
-5
source/function/Loss.cu
+0
-1
source/function/Rectify.cu
+0
-1
source/function/Sigmoid.cpp
+0
-1
source/function/Sigmoid.cu
+0
-2
source/function/Softmax.cuh
+0
-1
source/test/TConcatenate.cpp
+39
-35
source/test/TConcatenateSolely.cpp
+31
-28
source/test/TCopyIndexed.cpp
+18
-17
source/test/TCopyValues.cpp
+5
-5
source/test/THardTanH.cpp
+72
-63
source/test/TIdentity.cpp
+24
-22
source/test/TLogSoftmax.cpp
+38
-35
source/test/TLoss.cpp
+26
-23
source/test/TMatrixMULBatchedCPU.cpp
+18
-17
source/test/TMatrixMul.cpp
+71
-67
source/test/TMatrixMul2D.cpp
+24
-21
source/test/TMatrixMul2DParallel.cpp
+24
-22
source/test/TMatrixMulBatched.cpp
+28
-28
source/test/TMerge.cpp
+38
-34
source/test/TMultiplyElementWise.cpp
+30
-27
source/test/TNegate.cpp
+13
-13
source/test/TNormalize.cpp
+16
-15
source/test/TPower.cpp
+34
-31
source/test/TRectify.cpp
+23
-21
source/test/TReduceMax.cpp
+8
-7
source/test/TReduceMean.cpp
+5
-105
source/test/TReduceMean.h
+2
-2
source/test/TReduceSum.cpp
+8
-106
source/test/TReduceSumSquared.cpp
+19
-17
source/test/TReduceVariance.cpp
+5
-4
source/test/TScaleAndShift.cpp
+6
-5
source/test/TSelect.cpp
+11
-10
source/test/TSetAscendingOrder.cpp
+1
-1
source/test/TSetData.cpp
+2
-3
source/test/TSigmoid.cpp
+9
-7
source/test/TSoftmax.cpp
+7
-5
source/test/TSort.cpp
+10
-10
source/test/TSplit.cpp
+19
-19
source/test/TSum.cpp
+18
-18
source/test/TSumByColumnTV.cpp
+19
-19
source/test/TSumByColumnVT.cpp
+19
-19
source/test/TTopK.cpp
+19
-19
source/test/TUnsqueeze.cpp
+19
-19
source/test/TXMem.cpp
+17
-9
source/test/TXMem.h
+0
-0
source/test/Test.cpp
+0
-0
没有找到文件。
source/core/CHeader.h
查看文件 @
0887fae1
...
...
@@ -28,6 +28,10 @@
#include "Concatenate.h"
#include "ConcatenateSolely.h"
#include "CopyBlocks.h"
#include "CopyBlocksInGrid.h"
#include "CopyBlocksOnSite.h"
#include "CopyData2D.h"
#include "CopyIndexed.h"
#include "CopyInGrid.h"
#include "CopyValues.h"
...
...
@@ -53,6 +57,7 @@
#include "ReduceSumSquared.h"
#include "ReduceVariance.h"
#include "ScaleAndShift.h"
#include "Select.h"
#include "SetData.h"
#include "Sort.h"
#include "Split.h"
...
...
source/core/Concatenate.cpp
查看文件 @
0887fae1
...
...
@@ -53,6 +53,10 @@ void Concatenate(XList * smalls, XTensor * big, int dim)
/*
concatenate two tensors along a given dimension
>> smallA - one tensor for concatenation
>> smallB - the other tensor for concatenation
>> big - the resulting tensor
>> dim - which dimension we perform the concatenation
*/
void
Concatenate
(
XTensor
*
smallA
,
XTensor
*
smallB
,
XTensor
*
big
,
int
dim
)
{
...
...
source/core/Concatenate.h
查看文件 @
0887fae1
...
...
@@ -29,7 +29,8 @@ namespace nts { // namespace nts(NiuTrans.Tensor)
/*
concatenate a list of tensors along a given dimension
Note that this is actually a wrapper that selects "ConcatenateSolely"
or "Merge" by means of the tensor shapes */
or "Merge" by means of the tensor shapes
*/
void
Concatenate
(
XList
*
smalls
,
XTensor
*
big
,
int
dim
);
/* concatenate two tensors along a given dimension */
...
...
source/core/ConcatenateSolely.cpp
查看文件 @
0887fae1
...
...
@@ -64,9 +64,11 @@ void ConcatenateSolely(XList * smalls, XTensor * big, int dim)
int
offset
=
0
;
/* two strategies are used - we can either resort to memcpy2d for the case of
/*
two strategies are used - we can either resort to memcpy2d for the case of
concatenation of a few items, or use MergeBlockLists to merge a large number
of data blocks */
of data blocks
*/
if
(
smalls
->
count
<=
MIN_TENSOR_CAT_NUM
)
{
for
(
int
i
=
0
;
i
<
smalls
->
count
;
i
++
)
{
XTensor
*
tensor
=
(
XTensor
*
)
smalls
->
GetItem
(
i
);
...
...
source/core/ConcatenateSolely.h
查看文件 @
0887fae1
...
...
@@ -26,7 +26,6 @@
namespace
nts
{
// namespace nts(NiuTrans.Tensor)
/* concatenate a list of tensors along a given dimension */
extern
"C"
void
ConcatenateSolely
(
XList
*
smalls
,
XTensor
*
big
,
int
dim
);
...
...
source/core/CopyBlocks.cpp
查看文件 @
0887fae1
...
...
@@ -78,9 +78,11 @@ void CopyBlocks(void * source, int blockSize, int * sourceBlocks, int blockNum,
else
{
int
devID
=
myMem
!=
NULL
?
myMem
->
devID
:
-
1
;
/* The following code should be fine with GPUs, but too many
/*
The following code should be fine with GPUs, but too many
kernel calls would slow down the system. We prefer to use
one kernel to do block copy in batch (kernel fusion). */
one kernel to do block copy in batch (kernel fusion).
*/
for
(
int
i
=
0
;
i
<
blockNum
;
i
++
)
{
XMemCopy
((
char
*
)
target
+
targetBlocks
[
i
]
*
blockSize
,
devID
,
(
char
*
)
source
+
sourceBlocks
[
i
]
*
blockSize
,
devID
,
blockSize
);
...
...
source/core/CopyBlocksOnSite.cpp
查看文件 @
0887fae1
...
...
@@ -25,6 +25,7 @@
#include "CopyBlocksOnSite.cuh"
namespace
nts
{
// namespace nts(NiuTrans.Tensor)
/*
copy a number of blocks to target positions. Here we assume that
all the data has been on the device (CPU/GPU) already.
...
...
@@ -47,9 +48,11 @@ void CopyBlocksOnSite(void * source, int blockSize, int blockNum, void * target,
else
{
int
devID
=
myMem
!=
NULL
?
myMem
->
devID
:
-
1
;
/* The following code should be fine with GPUs, but too many
/*
The following code should be fine with GPUs, but too many
kernel calls would slow down the system. We prefer to use
one kernel to do block copy in batch (kernel fusion). */
one kernel to do block copy in batch (kernel fusion).
*/
for
(
int
i
=
0
,
b
=
0
;
i
<
blockNum
;
i
++
,
b
+=
blockSize
)
{
XMemCopy
((
char
*
)
target
+
targetBlocks
[
i
]
*
blockSize
,
devID
,
(
char
*
)
source
+
b
,
devID
,
blockSize
);
...
...
source/core/CopyInGrid.cpp
查看文件 @
0887fae1
...
...
@@ -34,7 +34,7 @@ i.e., reorder the data blocks in the same memory piece
in the k-th grid
>> blockDim - leading dimension of blocks
>> blockNumInGrid - number of blocks in each grid
>> isOnDev - indicates whether the index is on the device already
>> is
Index
OnDev - indicates whether the index is on the device already
*/
void
CopyInGrid
(
XTensor
*
s
,
XTensor
*
t
,
int
*
index
,
int
blockDim
,
int
blockNumInGrid
,
bool
isIndexOnDev
)
{
...
...
source/core/CopyIndexed.cpp
查看文件 @
0887fae1
...
...
@@ -36,6 +36,7 @@ copy indexed sub-tensors
>> tgtIndex - index of the target sub-tensors
>> copyNum - number of the sub-tensors we copy for each source index, e.g.,
for srcIndex = [1,4] and copyNum = 2, we actually copy the source sub-tensors 1, 2, 4, 5
<< return - whether copy indexed operation was successful
*/
bool
CopyIndexed
(
XTensor
*
s
,
XTensor
*
t
,
int
dim
,
int
*
srcIndex
,
int
indexSize
,
int
*
tgtIndex
,
int
copyNum
)
{
...
...
source/core/CopyValues.cuh
查看文件 @
0887fae1
...
...
@@ -28,7 +28,6 @@ namespace nts { // namespace nts(NiuTrans.Tensor)
#ifdef USE_CUDA
/**************************************/
/* copy all elements from a source matrix to a target matrix */
extern "C"
bool CudaCopyValues(XTensor * s, XTensor * t, XStream * stream = NULL);
...
...
source/core/FlushToMem.cu
查看文件 @
0887fae1
...
...
@@ -52,7 +52,6 @@ void CudaCPUToGPUFlush(XList * mList, int devID, XMem * GPUMem)
else
reqiredSize = m->unitSize * m->unitNum;
//reqiredSize = (int)GPUMem->GetPitch(GPUMem->devID, (MTYPE)GPUMem->GetAddress() + size, reqiredSize);
size += reqiredSize;
}
...
...
@@ -70,7 +69,6 @@ void CudaCPUToGPUFlush(XList * mList, int devID, XMem * GPUMem)
else
pSize = m->unitSize * m->unitNum;
//reqiredSize = (int)GPUMem->GetPitch(GPUMem->devID, (MTYPE)GPUMem->GetAddress() + p, pSize);
reqiredSize = pSize;
memcpy(data + p, m->data, pSize);
...
...
source/core/MakeSplitBlockIndex.cpp
查看文件 @
0887fae1
...
...
@@ -24,6 +24,7 @@
#include "MakeSplitBlockIndex.cuh"
namespace
nts
{
// namespace nts(NiuTrans.Tensor)
/*
set target data block index for the data movement in split
>> blockIndex - block index
...
...
source/core/MakeSplitBlockIndex.cu
查看文件 @
0887fae1
...
...
@@ -51,6 +51,7 @@ void KernelMakeSplitBlockIndex(int * blockIndex, int splitNum, int blockSplitSiz
/*
set target data block index for the data movement in split
>> devID - device id
>> blockIndex - block index
>> splitNum - number of splits
>> blockSplitSize - size of the splitted block
...
...
source/core/MatrixMULBatchedCPU.cpp
查看文件 @
0887fae1
...
...
@@ -33,9 +33,9 @@ c_i = trans(a_i) * trans(b_i) * \alpha + c_i * \beta for each i in [0,count-1]
>> transposedA - indicate whether the matrix a is transposed
>> b - another list of input matrices (2d tensors)
>> transposedB - indicate whether the matrix b is transposed
>> c - output matrix (2d tensor)
>> alpha - scalar
>> beta - scalar
>> c - output matrix (2d tensor)
*/
void
MatrixMULBatchedCPU
(
XList
*
a
,
MATRIX_TRANS_TYPE
transposedA
,
XList
*
b
,
MATRIX_TRANS_TYPE
transposedB
,
...
...
@@ -64,10 +64,6 @@ void MatrixMULBatchedCPU(XList * a, MATRIX_TRANS_TYPE transposedA,
}
}
//if(isUniform){
//}
//else{
for
(
int
i
=
0
;
i
<
a
->
count
;
i
++
)
{
XTensor
*
ai
=
(
XTensor
*
)
a
->
GetItem
(
i
);
XTensor
*
bi
=
(
XTensor
*
)
b
->
GetItem
(
i
);
...
...
source/core/MatrixMul.h
查看文件 @
0887fae1
...
...
@@ -39,7 +39,7 @@ normal matrix multiplication if A = y * z and B = x * y.
*/
extern
"C"
void
MatrixMul
(
XTensor
*
a
,
MATRIX_TRANS_TYPE
transposedA
,
XTensor
*
b
,
MATRIX_TRANS_TYPE
transposedB
,
XTensor
*
c
,
DTYPE
alpha
=
(
DTYPE
)
1
.
0
,
DTYPE
beta
=
0
,
XPRunner
*
parallelRunner
=
NULL
);
DTYPE
alpha
=
(
DTYPE
)
1
.
0
,
DTYPE
beta
=
0
,
XPRunner
*
parallelRunner
=
NULL
);
}
// namespace nts(NiuTrans.Tensor)
...
...
source/core/MatrixMul2D.cpp
查看文件 @
0887fae1
...
...
@@ -104,7 +104,7 @@ void MatrixMul2D(XTensor * a, MATRIX_TRANS_TYPE transposedA,
int
num
=
*
((
int
*
)
b
->
data
);
char
*
p
=
(
char
*
)
b
->
data
+
sizeof
(
int
);
// pointer to the first tuple
/* a * b */
/* a * b */
if
(
transposedA
==
X_NOTRANS
&&
transposedB
==
X_NOTRANS
)
{
for
(
int
i
=
0
;
i
<
num
;
i
++
)
{
int
key
=
*
((
int
*
)
p
);
...
...
source/core/MatrixMul2D.cu
查看文件 @
0887fae1
...
...
@@ -37,11 +37,13 @@ c = a * b * \alpha
>> aColSize - column size of matrix a
>> aRowSize - row size of matrix a
>> b - a sparse matrix
>> transposed
A
- indicates whether b is transposed
>> transposed
B
- indicates whether b is transposed
>> bNonZeroNum - number of non-zero items in b
>> bColSize - column size of matrix b
>> bRowSize - row size of matrix b
>> c - the resulting (dense) matrix
>> cColSize - column size of matrix c
>> cRowSize - row size of matrix c
>> alpha - the scaling factor
*/
extern "C" __global__
...
...
@@ -147,7 +149,6 @@ void CudaMatrixMul2D(XTensor * a, MATRIX_TRANS_TYPE transposedA,
if (!a->isSparse && !b->isSparse) {
CheckNTErrors((!c->isSparse), "Illegal use of sparse matrix in multiplication!");
//cublasHandle_t * handle = GDevs->GetCudaHandle(a->devID);
cublasHandle_t * handle = a->mem == NULL ? GDevs.GetCudaHandle(a->devID) : a->mem->GetCublasHandle();
/* !!!! might have problems */
...
...
@@ -183,7 +184,6 @@ void CudaMatrixMul2D(XTensor * a, MATRIX_TRANS_TYPE transposedA,
if (beta == 0)
c->SetZeroAll();
else if (beta != 1.0F) {
//XTensor::ScaleAndShift(c, beta, 0);
ShowNTErrors("TODO!");
}
...
...
source/core/MatrixMulBatched.cpp
查看文件 @
0887fae1
...
...
@@ -40,6 +40,7 @@ where trans() returns the transposed matrix if the flag is fired
>> c - where we keep a*b
>> alpha - a coefficient
>> beta - another coefficient
>> parallelRunner - parallel processing module
*/
void
MatrixMulBatched
(
XTensor
*
a
,
MATRIX_TRANS_TYPE
transposedA
,
XTensor
*
b
,
MATRIX_TRANS_TYPE
transposedB
,
...
...
source/core/Merge.cpp
查看文件 @
0887fae1
...
...
@@ -27,7 +27,6 @@
namespace
nts
{
// namespace nts(NiuTrans.Tensor)
/*
transform a tensor by merging it alone with a dimension, e.g., (N/3, M, 3) -> (N, M)
>> s - the source tensor
...
...
source/core/MergeBlockLists.cpp
查看文件 @
0887fae1
...
...
@@ -27,12 +27,12 @@
namespace
nts
{
// namespace nts(NiuTrans.Tensor)
/*
merge data by blocks
>> sourceList - list of source data array
>> blockSizes - list of the block size for each source data array
>> blockNum - number of blocks kept in each data array
>> target - target data array
>> myMem - memory pool
merge data by blocks
>> sourceList - list of source data array
>> blockSizes - list of the block size for each source data array
>> blockNum - number of blocks kept in each data array
>> target - target data array
>> myMem - memory pool
*/
void
MergeBlockLists
(
XList
*
sourceList
,
int
*
blockSizes
,
int
blockNum
,
void
*
target
,
XMem
*
myMem
)
{
...
...
source/core/MergeBlockLists.cu
查看文件 @
0887fae1
...
...
@@ -34,10 +34,9 @@ copy a number of blocks (of different sizes) to target positions
>> sourceBlockSizes - the size of the block_i
>> sourceBlockNum - number of blocks to merge
>> targetList - list of data arrays to copy to
>> target - target data array
*/
__global__
void KernelCopyBlockLists(DTYPE * sourceList[], int * sourceBlockSizes, int sourceBlockNum, DTYPE * targetList[])
void KernelCopyBlockLists(DTYPE * sourceList[], int * sourceBlockSizes, int sourceBlockNum, DTYPE * targetList[])
{
__shared__ int iBlockSizes[MAX_CUDA_THREAD_NUM_PER_BLOCK];
__shared__ DTYPE * iSourceList[MAX_CUDA_THREAD_NUM_PER_BLOCK];
...
...
@@ -82,7 +81,6 @@ void CudaMergeBlockLists(XList * sourceList, int * blockSizes, int blockNum, voi
int minBlockSize = MAX_INT;
int maxBlockSize = -MAX_INT;
//int realMinBlockSize = 1;
int realMaxBlockSize = 1;
DTYPE ** sourceArrays = new DTYPE*[newBlockListSize];
DTYPE ** targetArrays = new DTYPE*[newBlockListSize];
...
...
@@ -110,7 +108,6 @@ void CudaMergeBlockLists(XList * sourceList, int * blockSizes, int blockNum, voi
CheckNTErrors((minBlockSize % sizeof(DTYPE) == 0), "Unsupported block size!");
CheckNTErrors((maxBlockSize % sizeof(DTYPE) == 0), "Unsupported block size!");
//realMinBlockSize = minBlockSize/sizeof(DTYPE);
realMaxBlockSize = maxBlockSize / sizeof(DTYPE);
int cudaGridSizes[3];
...
...
@@ -120,31 +117,16 @@ void CudaMergeBlockLists(XList * sourceList, int * blockSizes, int blockNum, voi
cudaGridSizes, cudaBlockSizes);
myMem->SetPinBuf();
//MTYPE offset0 = myMem->bufUsed;
int * sizesGPU = (int*)myMem->AllocBuf(myMem->devID, sizeof(int) * newBlockListSize, 256);
//MTYPE offset1 = myMem->bufUsed;
DTYPE ** sourceArraysGPU = (DTYPE**)myMem->AllocBuf(myMem->devID, sizeof(DTYPE*) * newBlockListSize, 256);
//MTYPE offset2 = myMem->bufUsed;
DTYPE ** targetArraysGPU = (DTYPE**)myMem->AllocBuf(myMem->devID, sizeof(DTYPE*) * newBlockListSize, 256);
//MTYPE bufSize = myMem->bufUsed - offset0;
//char * CPUBuf = new char[bufSize];
//memset(CPUBuf, 0 , bufSize);
//memcpy(CPUBuf, sizes, sizeof(int) * newBlockListSize);
//memcpy(CPUBuf + (offset1 - offset0), sourceArrays, sizeof(DTYPE*) * newBlockListSize);
//memcpy(CPUBuf + (offset2 - offset0), targetArrays, sizeof(DTYPE*) * newBlockListSize);
XMemCopy(sizesGPU, myMem->devID, sizes, -1, sizeof(int) * newBlockListSize);
XMemCopy(sourceArraysGPU, myMem->devID, sourceArrays, -1, sizeof(DTYPE*) * newBlockListSize);
XMemCopy(targetArraysGPU, myMem->devID, targetArrays, -1, sizeof(DTYPE*) * newBlockListSize);
/* it is VERY tricky here because we squeeze three data copies into one */
//XMemCopy(sizesGPU, myMem->devID, CPUBuf, -1, bufSize);
KernelCopyBlockLists << <dim3(cudaGridSizes[0], cudaGridSizes[1]), dim3(cudaBlockSizes[0], cudaBlockSizes[1]) >> >
(sourceArraysGPU, sizesGPU, newBlockListSize, targetArraysGPU);
...
...
@@ -154,7 +136,6 @@ void CudaMergeBlockLists(XList * sourceList, int * blockSizes, int blockNum, voi
delete[] targetArrays;
delete[] sizes;
delete[] offsets;
//delete[] CPUBuf;
}
#endif // USE_CUDA
...
...
source/core/MultiplyElementWise.cpp
查看文件 @
0887fae1
...
...
@@ -24,6 +24,7 @@
#include "MultiplyElementWise.cuh"
namespace
nts
{
// namespace nts(NiuTrans.Tensor)
/*
element-wise product of two tensors
c(i) = a(i)*b(i) + \alpha * c(i)
...
...
source/core/MultiplyElementWise.cu
查看文件 @
0887fae1
...
...
@@ -68,6 +68,7 @@ where |a_lead| means the size of the leading dimension of a
>> a - tensor a
>> b - tensor b
>> c - result tensor
>> alpha - the coefficient
>> stride - the number of items we go over when move next along the leading dimension in a block
>> ldSizeA - size of the leading dimension of a
>> ldSizeB - size of the leading dimension of b
...
...
source/core/Negate.cpp
查看文件 @
0887fae1
...
...
@@ -26,8 +26,8 @@
namespace
nts
{
// namespace nts(NiuTrans.Tensor)
/*
set every entry to its minus value
>> a - the tensor we are processing
set every entry to its minus value
>> a - the tensor we are processing
*/
void
Negate
(
XTensor
*
a
)
{
...
...
source/core/Negate.cu
查看文件 @
0887fae1
...
...
@@ -42,10 +42,10 @@ void KernelNegate(DTYPE * d, int size)
}
/*
set each entry to its negtive value (CUDA Kernel)
This is for float16 computation
>> d - pointer to the data array
>> size - size of the data array
set each entry to its negtive value (CUDA Kernel)
This is for float16 computation
>> d - pointer to the data array
>> size - size of the data array
*/
__global__
void KernelNegate(__half * d, int size)
...
...
source/core/Normalize.cpp
查看文件 @
0887fae1
...
...
@@ -25,6 +25,7 @@
#include "Normalize.cuh"
namespace
nts
{
// namespace nts(NiuTrans.Tensor)
/*
normalized the data with normal distribution. For an input x,
y = a * (x-mean)/sqrt(variance+\epsilon) + b
...
...
source/core/Normalize.cu
查看文件 @
0887fae1
...
...
@@ -25,6 +25,7 @@
#include "Normalize.cuh"
namespace nts { // namespace nts(NiuTrans.Tensor)
#ifdef USE_CUDA
/*
normalized the data with normal distribution (kernel code). For an input x,
...
...
source/core/Normalize.cuh
查看文件 @
0887fae1
...
...
@@ -28,7 +28,8 @@ namespace nts { // namespace nts(NiuTrans.Tensor)
#ifdef USE_CUDA
/* normalized the data with normal distribution (Kernel code). For an input x,
/*
normalized the data with normal distribution (Kernel code). For an input x,
y = a * (x-mean)/sqrt(variance+\epsilon) + b
where a and b are the scalar and bias respectively, and \epsilon is the adjustment parameter
*/
...
...
@@ -37,7 +38,8 @@ void KernelNormalize(DTYPE * input, DTYPE * output, DTYPE * mean, DTYPE * var,
DTYPE * a, DTYPE * b, DTYPE epsilon,
int stride, int strideNum, int blockNum);
/* normalized the data with normal distribution. For an input x,
/*
normalized the data with normal distribution. For an input x,
y = a * (x-mean)/sqrt(variance+\epsilon) + b
where a and b are the scalar and bias respectively, and \epsilon is the adjustment parameter
*/
...
...
source/core/Power.cpp
查看文件 @
0887fae1
...
...
@@ -25,10 +25,11 @@
#include "Power.cuh"
namespace
nts
{
// namespace nts(NiuTrans.Tensor)
/*
get the power(a, p)
>> a - the tensor
>> p
ower
- as it is
>> p - as it is
*/
void
Power
(
XTensor
*
a
,
DTYPE
p
)
{
...
...
source/core/Power.cu
查看文件 @
0887fae1
...
...
@@ -87,9 +87,6 @@ __global__
void KernelPower(__half * d, __half p, int size)
{
#if __CUDA_ARCH__ >= 530 || !defined(__CUDA_ARCH__)
//int i = blockDim.x * blockIdx.x + threadIdx.x;
//if (i < size)
// d[i] = hpow(d[i], p);
#else
int i = blockDim.x * blockIdx.x + threadIdx.x;
if (i < size)
...
...
@@ -126,9 +123,6 @@ void CudaPower(XTensor * a, DTYPE p)
}
else if (p != (DTYPE)1.0) {
ShowNTErrors("TODO!");
//unsigned short p2 = FloatToFloat16(p);
//__half * pp = (__half*)&p2;
//KernelPower<<<blocks, threads>>>((__half*)a->data, *pp, a->unitNum);
}
}
else {
...
...
source/core/ReduceMax.cu
查看文件 @
0887fae1
...
...
@@ -31,14 +31,10 @@ namespace nts{ // namespace nts(NiuTrans.Tensor)
/*
reduce a tensor to another that keeps the max value along a dimension - slow version
Given a block of data, we go over each dimension i in the stride and we have
sum_i = max_{0<=j<strideNum} input_{i,j}
where we can view the block as a matrix and input_{i,j} represent the item at the
crossing of the i-th columne and the j-th row.
>> input - the input array (representing a tensor)
>> output - the sum over each block. NOTE: output is also an array
>> stride - stride that we need to move to the next item
...
...
@@ -89,82 +85,77 @@ void KernelReduceMax(DTYPE * input, DTYPE * output,
}
/*
reduce a tensor to another that keeps the max value along a dimension - slow version
Given a block of data, we go over each dimension i in the stride and we have
sum_i = max_{0<=j<strideNum} input_{i,j}
where we can view the block as a matrix and input_{i,j} represent the item at the
crossing of the i-th columne and the j-th row.
>> input - the input array (representing a tensor)
>> output - the sum over each block. NOTE: output is also an array
>> stride - stride that we need to move to the next item
>> strideNum - how many strides we need to finish the reduce
>> reducedStrideNum - the number of strides after reducation
>> blockSize - size of the block (i.e., stride * strideNum)
>> blockNum - how many blocks
*/
__global__
void KernelReduceMax(__half * input, __half * output,
int stride, int strideNum, int reducedStrideNum,
int blockSize, int blockNum)
{
int idx = threadIdx.x * blockDim.y + threadIdx.y;
unsigned int i = blockIdx.x*blockDim.x + threadIdx.x;
unsigned int j = blockIdx.y*blockDim.y + threadIdx.y;
/*
reduce a tensor to another that keeps the max value along a dimension - slow version
Given a block of data, we go over each dimension i in the stride and we have
sum_i = max_{0<=j<strideNum} input_{i,j}
where we can view the block as a matrix and input_{i,j} represent the item at the
crossing of the i-th columne and the j-th row.
>> input - the input array (representing a tensor)
>> output - the sum over each block. NOTE: output is also an array
>> stride - stride that we need to move to the next item
>> strideNum - how many strides we need to finish the reduce
>> reducedStrideNum - the number of strides after reducation
>> blockSize - size of the block (i.e., stride * strideNum)
>> blockNum - how many blocks
*/
__global__
void KernelReduceMax(__half * input, __half * output,
int stride, int strideNum, int reducedStrideNum,
int blockSize, int blockNum)
{
int idx = threadIdx.x * blockDim.y + threadIdx.y;
unsigned int i = blockIdx.x*blockDim.x + threadIdx.x;
unsigned int j = blockIdx.y*blockDim.y + threadIdx.y;
if (i >= stride * blockNum)
return;
if (i >= stride * blockNum)
return;
#if __CUDA_ARCH__ >= 530 || !defined(__CUDA_ARCH__)
__shared__ __half iData[MAX_CUDA_THREAD_NUM_PER_BLOCK * MIN_CUDA_SHARED_MEM_COL_SIZE / 2];
__shared__ __half iData[MAX_CUDA_THREAD_NUM_PER_BLOCK * MIN_CUDA_SHARED_MEM_COL_SIZE / 2];
#else
__shared__ DTYPE iData[MAX_CUDA_THREAD_NUM_PER_BLOCK * MIN_CUDA_SHARED_MEM_COL_SIZE / 2];
__shared__ DTYPE iData[MAX_CUDA_THREAD_NUM_PER_BLOCK * MIN_CUDA_SHARED_MEM_COL_SIZE / 2];
#endif
__syncthreads();
__syncthreads();
int k = i / stride;
int iOffset = i % stride;
int k = i / stride;
int iOffset = i % stride;
#if __CUDA_ARCH__ >= 530 || !defined(__CUDA_ARCH__)
__half value = (i < stride * blockNum && j < strideNum) ?
__half value = (i < stride * blockNum && j < strideNum) ?
input[blockSize * k + stride * j + iOffset] : __half(FLOAT16_MIN);
#else
DTYPE value = (i < stride * blockNum && j < strideNum) ?
__half2float(input[blockSize * k + stride * j + iOffset]) : FLOAT_MIN;
DTYPE value = (i < stride * blockNum && j < strideNum) ?
__half2float(input[blockSize * k + stride * j + iOffset]) : FLOAT_MIN;
#endif
/* load data into the shared mem */
iData[threadIdx.x * blockDim.y + threadIdx.y] = value;
/* load data into the shared mem */
iData[threadIdx.x * blockDim.y + threadIdx.y] = value;
__syncthreads();
__syncthreads();
/* do reduction in shared mem */
for (unsigned int s = blockDim.y / 2; s > 0; s >>= 1) {
if (threadIdx.y < s && iData[idx] < iData[idx + s]) {
iData[idx] = iData[idx + s];
}
/* do reduction in shared mem */
for (unsigned int s = blockDim.y / 2; s > 0; s >>= 1) {
if (threadIdx.y < s && iData[idx] < iData[idx + s]) {
iData[idx] = iData[idx + s];
}
__syncthreads();
}
__syncthreads();
}
#if __CUDA_ARCH__ >= 530 || !defined(__CUDA_ARCH__)
/* write result for this block to the output array */
if (threadIdx.y == 0 && blockIdx.y < reducedStrideNum)
output[(k * reducedStrideNum + blockIdx.y) * stride + iOffset] = iData[threadIdx.x * blockDim.y];
/* write result for this block to the output array */
if (threadIdx.y == 0 && blockIdx.y < reducedStrideNum)
output[(k * reducedStrideNum + blockIdx.y) * stride + iOffset] = iData[threadIdx.x * blockDim.y];
#else
/* write result for this block to the output array */
if (threadIdx.y == 0 && blockIdx.y < reducedStrideNum)
output[(k * reducedStrideNum + blockIdx.y) * stride + iOffset] = __half(iData[threadIdx.x * blockDim.y]);
/* write result for this block to the output array */
if (threadIdx.y == 0 && blockIdx.y < reducedStrideNum)
output[(k * reducedStrideNum + blockIdx.y) * stride + iOffset] = __half(iData[threadIdx.x * blockDim.y]);
#endif
}
/*
reduce a tensor to another that keeps the max value along a dimension - fast version
>> input - the input array (representing a tensor)
...
...
@@ -338,9 +329,7 @@ void KernelReduceMaxSimpleFast(DTYPE * input, DTYPE * output,
/*
get the max-valued items along a dimension of the tensor (cuda version).
For a 1-dimensional data array a,
sum_i = max_{0<=j<strideNum} input_{i,j}
>> input - the input tensor
>> output - the output tensor
>> dim - which dimension to reduce
...
...
source/core/ReduceMean.cpp
查看文件 @
0887fae1
...
...
@@ -28,7 +28,6 @@ namespace nts{ // namespace nts(NiuTrans.Tensor)
/*
get the mean value along a dimension of the tensor. For a 1-dimensional data array a,
mean = (1/n) * sum_i input_i
>> input - the input tensor
>> output - the output tensor
>> dim - the dimension where the reduction is performed on
...
...
@@ -44,5 +43,4 @@ void ReduceMean(XTensor * input, XTensor * output, int dim)
ScaleAndShift
(
output
,
(
DTYPE
)
1
/
num
,
0
);
}
}
//
namespace
nts
(
NiuTrans
.
Tensor
)
\ No newline at end of file
source/core/ReduceSum.cu
查看文件 @
0887fae1
...
...
@@ -29,13 +29,11 @@ namespace nts{ // namespace nts(NiuTrans.Tensor)
/*
reduce a tensor to another that keeps the sum along a dimension - slow version
Given a block of data, we go over each dimension i in the stride and we have
sum_i = sum_{0<=j<strideNum} exp(input_{i,j} - shift) if isExp == true;
= sum_{0<=j<strideNum} input_{i,j} - shift if isExp == false;
where we can view the block as a matrix and input_{i,j} represent the item at the
crossing of the i-th columne and the j-th row.
>> input - the input array (representing a tensor)
>> output - the sum over each block. NOTE: output is also an array
>> stride - stride that we need to move to the next item
...
...
@@ -107,13 +105,11 @@ void KernelReduceSum(DTYPE * input, DTYPE * output,
/*
reduce a tensor to another that keeps the sum along a dimension - slow version
This is for float16 reduction.
Given a block of data, we go over each dimension i in the stride and we have
sum_i = sum_{0<=j<strideNum} exp(input_{i,j} - shift) if isExp == true;
= sum_{0<=j<strideNum} input_{i,j} - shift if isExp == false;
where we can view the block as a matrix and input_{i,j} represent the item at the
crossing of the i-th columne and the j-th row.
>> input - the input array (representing a tensor)
>> output - the sum over each block. NOTE: output is also an array
>> stride - stride that we need to move to the next item
...
...
@@ -304,7 +300,6 @@ void KernelReduceSumFast(DTYPE * input, DTYPE * output,
/*
reduce a tensor to another that keeps the sum along a dimension - fast version
This is for float16 reduction
>> input - the input array (representing a tensor)
>> output - the sum over each block. NOTE: output is also an array
>> stride - stride that we need to move to the next item
...
...
source/core/ReduceSumSquared.cpp
查看文件 @
0887fae1
...
...
@@ -28,7 +28,6 @@ namespace nts{ // namespace nts(NiuTrans.Tensor)
squared sum of the items along a dimension of the tensor.
For a 1-dimensional data array a,
sum = \sum_i (a_i - shift)^2
>> input - the input tensor
>> output - the output tensor
>> dim - the dimension where the reduction is performed on
...
...
source/core/ReduceVariance.cpp
查看文件 @
0887fae1
...
...
@@ -29,7 +29,6 @@ namespace nts{ // namespace nts(NiuTrans.Tensor)
variance of the items along a dimension of the tensor.
For a 1-dimensional data array a,
variance = 1/n * \sum_i (a_i - mean)^2
>> input - the input tensor
>> output - the output tensor
>> dim - the dimension where the reduction is performed on
...
...
source/core/ScaleAndShift.cpp
查看文件 @
0887fae1
...
...
@@ -26,9 +26,7 @@ namespace nts{ // namespace nts(NiuTrans.Tensor)
/*
scale and shift all tensor entires
p = p * scale + shift
>> a - the tensor
>> scale - the scaler factor
>> shift - the shift factor
...
...
source/core/ScaleAndShift.cu
查看文件 @
0887fae1
...
...
@@ -80,9 +80,7 @@ void KernelScaleAndShift(__half * d, int size, __half scale, __half shift)
/*
scale and shift all matrix entires
p = p * scale + shift
>> a - the tensor
>> scale - the scaler factor
>> shift - the shift factor
...
...
source/core/Select.cpp
查看文件 @
0887fae1
...
...
@@ -31,7 +31,7 @@ c = select(a)
>> dim - the dimension along with which we do the job
>> low - lower bound
>> high - higher bound.
Note that range [1,3] means that we select 1 and 2.
Note that range [1,3] means that we select 1 and 2.
>> c - result tensor
*/
void
SelectRange
(
XTensor
*
a
,
int
dim
,
int
low
,
int
high
,
XTensor
*
c
)
...
...
@@ -75,5 +75,4 @@ void SelectRange(XTensor * a, int dim, int low, int high, XTensor * c)
}
}
}
// namespace nts(NiuTrans.Tensor)
source/core/SetData.cpp
查看文件 @
0887fae1
...
...
@@ -68,10 +68,11 @@ void SetDataRand(XTensor * tensor, DTYPE low, DTYPE high)
ShowNTErrors
(
"TODO"
);
}
}
/* GPU code
The trick here is that initialize the data on a temperary tensor on CPU.
The CPU data is then copied to GPU.
TODO: generate data points on GPUs straightforwardly.
/*
GPU code
The trick here is that initialize the data on a temperary tensor on CPU.
The CPU data is then copied to GPU.
TODO: generate data points on GPUs straightforwardly.
*/
else
{
XTensor
*
t2
=
NewTensor
(
tensor
->
order
,
tensor
->
dimSize
,
tensor
->
dataType
,
tensor
->
denseRatio
,
-
1
);
...
...
source/core/Sort.cpp
查看文件 @
0887fae1
...
...
@@ -39,6 +39,7 @@ void Sort(XTensor * a, XTensor * index, int dim)
CheckNTErrors
((
index
->
dataType
==
X_INT
),
"Wrong data type!"
);
int
dimRDI
=
a
->
order
-
dim
-
1
;
/* make the index tensor */
index
->
SetAscendingOrder
(
dim
);
...
...
source/core/Sort.cuh
查看文件 @
0887fae1
...
...
@@ -29,6 +29,7 @@ namespace nts { // namespace nts(NiuTrans.Tensor)
#ifdef USE_CUDA
/* sort the tensor along a given dimension */
extern "C"
void CudaSortBig(XTensor * a, XTensor * b, XTensor * indexA, XTensor * indexB, int dim, int k = -1);
#endif // USE_CUDA
...
...
source/core/Split.h
查看文件 @
0887fae1
...
...
@@ -27,6 +27,7 @@
namespace
nts
{
// namespace nts(NiuTrans.Tensor)
/* transform a tensor by splitting it, e.g., (M, N) -> (M, N/3, 3) */
extern
"C"
void
Split
(
XTensor
*
s
,
XTensor
*
t
,
int
whereToSplit
,
int
splitNum
);
/* split a big tensor into small tensors */
...
...
source/core/Sum.cu
查看文件 @
0887fae1
...
...
@@ -25,6 +25,7 @@
namespace nts { // namespace nts(NiuTrans.Tensor)
#ifdef USE_CUDA
/*
summation of data arrays (CUDA Kernel)
c = a + b * \beta
...
...
source/core/Sum.cuh
查看文件 @
0887fae1
...
...
@@ -28,7 +28,7 @@ namespace nts { // namespace nts(NiuTrans.Tensor)
#ifdef USE_CUDA
/* summation of data arrays (CUDA Kernel) */
/* summation of data arrays (CUDA Kernel) */
extern "C" __global__
void KernelADD(DTYPE * a, DTYPE * b, DTYPE * c, int size, DTYPE beta = (DTYPE)1.0);
...
...
source/core/SumByColumnVT.cu
查看文件 @
0887fae1
...
...
@@ -27,6 +27,7 @@
namespace nts { // namespace nts(NiuTrans.Tensor)
#ifdef USE_CUDA
/*
summation of a vector (column vector) and a tensor
c = a + \sum{col} b_col * \beta
...
...
source/core/SumByColumnVT.h
查看文件 @
0887fae1
...
...
@@ -26,7 +26,6 @@
namespace
nts
{
// namespace nts(NiuTrans.Tensor)
/* sum of a (column) vector and a tensor */
extern
"C"
void
SumByColumnVT
(
XTensor
*
a
,
XTensor
*
b
,
XTensor
*
c
=
NULL
,
DTYPE
beta
=
(
DTYPE
)
1
.
0
);
...
...
source/core/TopK.cpp
查看文件 @
0887fae1
...
...
@@ -24,6 +24,7 @@
#include "TopK.cuh"
namespace
nts
{
// namespace nts(NiuTrans.Tensor)
/*
get the top-k items along a given dimension
>> a - input tensor
...
...
source/core/TopK.cu
查看文件 @
0887fae1
...
...
@@ -95,9 +95,11 @@ public:
/* swap */
__device__ void Swap(int i, int j)
{
/*CudaHeapNode<T> tmp = items[i];
/*
CudaHeapNode<T> tmp = items[i];
items[i] = items[j];
items[j] = tmp;*/
items[j] = tmp;
*/
int tmpIndex = items[i].index;
T tmpValue = items[i].value;
items[i] = items[j];
...
...
@@ -239,8 +241,10 @@ void KernelTopK(T * input, int stride, int strideNum, int blockNum, int k, T min
if (threadIdx.x == 0) {
CudaXHeap<MIN_HEAP, T> heapFinal(k, k, heapData + k * threadIdx.y * blockDim.x);
/* merge the result over the workers.
This can be improved by parallel merging */
/*
merge the result over the workers.
This can be improved by parallel merging
*/
if (blockDim.x > 1) {
for (int p = 1; p < blockDim.x && p < strideNum; p++) {
CudaHeapNode<T> * hd = heapData + k * (threadIdx.y * blockDim.x + p);
...
...
@@ -429,6 +433,7 @@ void CudaTopK(XTensor * a, XTensor * b, XTensor * index, int dim, int k)
}
}
/* we resort to sorting if the data cannot fit inside the shared memory */
else {
int dimSize[MAX_TENSOR_DIM_NUM];
...
...
source/core/XMatrixSegment.cpp
查看文件 @
0887fae1
...
...
@@ -227,7 +227,7 @@ int SegmentTensor2D(int rowNum, int colNum, int blockNum, int * blockIndex)
x2
=
colSize
-
1
;
y2
=
rowSize
-
1
;
// bottom-right corner
/* the main body of the matrix (after removing the margin block) */
/* the main body of the matrix (after removing the margin block) */
while
(
x1
<=
xMax
)
{
y1
=
0
;
x2
=
x1
+
colSize
-
1
;
...
...
source/core/XMatrixSegment.h
查看文件 @
0887fae1
...
...
@@ -26,9 +26,7 @@
namespace
nts
{
// namespace nts(NiuTrans.Tensor)
/*******************************************************************
segmentation and parallel processing for 2d tensors (i.e., matrices)
*/
/* segmentation and parallel processing for 2d tensors (i.e., matrices) */
/* segment a 2d tensor (i.e., matrix) into blocks and run jobs in parallel */
extern
"C"
void
RunParallel2D
(
XPRunner
*
parallelRunner
,
void
*
job
,
int
opNum
,
int
rowNum
,
int
colNum
,
int
argNum
,
...);
...
...
source/core/XTensorBLAS.cu
查看文件 @
0887fae1
...
...
@@ -28,9 +28,7 @@ namespace nts { // namespace nts(NiuTrans.Tensor)
#ifdef USE_CUDA
/*
matrix multiplication via cuda version BLAS
*/
/* matrix multiplication via cuda version BLAS */
void CudaBLASMatrixMUL(cublasHandle_t * handle,
void * a, MATRIX_TRANS_TYPE transposedA, TENSOR_DATA_TYPE dataTypeA,
void * b, MATRIX_TRANS_TYPE transposedB, TENSOR_DATA_TYPE dataTypeB,
...
...
@@ -85,9 +83,7 @@ void CudaBLASMatrixMUL(cublasHandle_t * handle,
}
}
/*
matrix multiplication via cuda version BLAS
*/
/* matrix multiplication via cuda version BLAS */
void CudaBLASMatrixMULBatched(cublasHandle_t * handle,
const void ** a, MATRIX_TRANS_TYPE transposedA, TENSOR_DATA_TYPE dataTypeA,
const void ** b, MATRIX_TRANS_TYPE transposedB, TENSOR_DATA_TYPE dataTypeB,
...
...
@@ -143,7 +139,6 @@ void CudaBLASMatrixMULBatched(cublasHandle_t * handle,
}
/* matrix multiplication in batch and strided mode via cuda version BLAS */
extern "C"
void CudaBLASMatrixMULBatchedStrided(cublasHandle_t * handle,
const void * a, MATRIX_TRANS_TYPE transposedA, TENSOR_DATA_TYPE dataTypeA, long long int strideA,
const void * b, MATRIX_TRANS_TYPE transposedB, TENSOR_DATA_TYPE dataTypeB, long long int strideB,
...
...
@@ -198,9 +193,7 @@ void CudaBLASMatrixMULBatchedStrided(cublasHandle_t * handle,
}
}
/*
matrix multiplication via cuda version BLAS
*/
/* matrix multiplication via cuda version BLAS */
void CudaBLASMatrixMULList(cublasHandle_t * handle,
XList * a, MATRIX_TRANS_TYPE transposedA,
XList * b, MATRIX_TRANS_TYPE transposedB,
...
...
source/core/XTensorCore.h
查看文件 @
0887fae1
差异被折叠。
点击展开。
source/function/HardTanH.cpp
查看文件 @
0887fae1
...
...
@@ -25,7 +25,6 @@
namespace
nts
{
// namespace nts(NiuTrans.Tensor)
/*
hard tanh function
y = 1 if x > 1
...
...
source/function/HardTanH.cu
查看文件 @
0887fae1
...
...
@@ -95,7 +95,6 @@ dy/dx = 1 if -1 <= x <= 1
>> y - y of the function
>> x - x of the function
>> size - size of y/x
*/
__global__
void KernelHardtanhBackward(DTYPE * dedy, DTYPE * dedx, DTYPE * gold, DTYPE * y, DTYPE * x, int size)
...
...
source/function/LogSoftmax.cpp
查看文件 @
0887fae1
...
...
@@ -49,7 +49,6 @@ void LogSoftmax(XTensor * x, XTensor * y, int leadDim)
dimSize
[
i
-
1
]
=
-
x
->
dimSize
[
i
];
}
XMem
*
mem
=
x
->
mem
;
XTensor
*
max
=
NULL
;
XTensor
*
sum
=
NULL
;
...
...
@@ -168,7 +167,6 @@ dE/dx = dE/dy * dy/dx
log softmax: y_i = log(e^{x_i} / \sum_{k} e^{x_k})
dy_i/dx_j
= d{log(e^{x_i} / \sum_{k} e^{x_k})}/dx_j
= d{log(e^{x_i})}/dx_j - d{log(\sum_{k} e^{x_k})}/dx_j
...
...
source/function/LogSoftmax.cu
查看文件 @
0887fae1
...
...
@@ -41,7 +41,8 @@ void CudaLogSoftmax(XTensor * x, XTensor * y, int leadDim)
ShowNTErrors("You should call LogSoftmax instead!");
}
/* log softmax forward computation (Cuda kernel)
/*
log softmax forward computation (Cuda kernel)
for each column j, let y_{i,j} and x_{i,j} are the output
and state value for the i-th element of column j. We have
...
...
@@ -85,7 +86,8 @@ void KernelLogSoftmaxComputeByRow(DTYPE * x, DTYPE * max, DTYPE * sum, DTYPE * y
}
}
/* log softmax forward computation (Cuda kernel)
/*
log softmax forward computation (Cuda kernel)
for each row i, let y_{i,j} and x_{i,j} are the output
and state value for the j-th element of row i. We have
...
...
@@ -182,7 +184,7 @@ void CudaLogSoftmaxSumMax(XTensor * x, XTensor * y, int leadDim, XTensor * sum,
/*
set dE/dx = exp(y)
>> ded
u
- dE/dy
>> ded
y
- dE/dy
>> dedx - dE/dx
>> y - output of the function
>> size - size of output
...
...
@@ -256,7 +258,9 @@ dE/dx_j += -gold_j
>> gold - gold standard to measure error (or loss)
>> y - output of the function
>> x - input of the function
>> size - size of input/output
>> rowNum - row number of the matrix
>> colNum - column number of the matrix
>> gNonZeroNum -
>> lossName - name of the loss function
*/
__global__
...
...
@@ -293,7 +297,6 @@ dE/dx = dE/dy * dy/dx
log softmax: y_i = log(e^{x_i} / \sum_{k} e^{x_k})
dy_i/dx_j
= d{log(e^{x_i} / \sum_{k} e^{x_k})}/dx_j
= d{log(e^{x_i})}/dx_j - d{log(\sum_{k} e^{x_k})}/dx_j
...
...
source/function/Loss.cu
查看文件 @
0887fae1
...
...
@@ -31,7 +31,6 @@ namespace nts{ // namespace nts(NiuTrans.Tensor)
loss function to measure the "number" of errors
*/
/*
compute the loss
>> gold - gold standard
...
...
source/function/Rectify.cu
查看文件 @
0887fae1
...
...
@@ -88,7 +88,6 @@ dy/dx = 1 if x >= 0
>> y - output of the function
>> x - input of the function
>> size - size of output/input
*/
__global__
void KernelRectifyBackward(DTYPE * dedy, DTYPE * dedx, DTYPE * gold, DTYPE * y, DTYPE * x, int size)
...
...
source/function/Sigmoid.cpp
查看文件 @
0887fae1
...
...
@@ -25,7 +25,6 @@
namespace
nts
{
// namespace nts(NiuTrans.Tensor)
/*
sigmoid function y = 1/(1+exp(-x))
>> x - input tensor
...
...
source/function/Sigmoid.cu
查看文件 @
0887fae1
...
...
@@ -95,7 +95,6 @@ sigmoid: y = 1/(1+exp(-x))
>> y - output of the function
>> x - input of the function
>> size - size of output/input
*/
__global__
void KernelSigmoidBackward(DTYPE * dedy, DTYPE * dedx, DTYPE * gold, DTYPE * y, DTYPE * x, int size)
...
...
@@ -122,7 +121,6 @@ sigmoid: y = 1/(1+exp(-x))
>> dedy - dE/dy
>> dedx - dE/dx
>> lossName - type of loss function, e.g., cross entropy
*/
void CudaSigmoidBackward(XTensor * gold, XTensor * y, XTensor * x,
XTensor * dedy, XTensor * dedx,
...
...
source/function/Softmax.cuh
查看文件 @
0887fae1
...
...
@@ -29,7 +29,6 @@ namespace nts { // namespace nts(NiuTrans.Tensor)
#ifdef USE_CUDA
/* softmax y = e^x / \sum_{i} e^{x_i} (Cuda version) */
extern "C"
void CudaSotmax(XTensor * input, XTensor * output, int leadDim);
...
...
source/test/TConcatenate.cpp
查看文件 @
0887fae1
...
...
@@ -22,8 +22,10 @@
#include "TConcatenate.h"
namespace
nts
{
// namespace nts(NiuTrans.Tensor)
/* case 1: concatenate a list of tensors along a given dimension.
* In this case, 2 * (2, 1) -> (2, 2), dim=1.
/*
case 1: concatenate a list of tensors along a given dimension.
In this case, 2 * (2, 1) -> (2, 2), dim=1.
*/
bool
TestConcatenate1
()
{
...
...
@@ -60,12 +62,12 @@ bool TestConcatenate1()
for
(
int
i
=
0
;
i
<
tOrder
;
i
++
)
tUnitNum
*=
tDimSize
[
i
];
DTYPE
sData1
[
2
][
1
]
=
{
{
0.0
},
{
1.0
}
};
DTYPE
sData2
[
2
][
1
]
=
{
{
2.0
},
{
3.0
}
};
DTYPE
answer
[
2
][
2
]
=
{
{
0.0
,
2.0
},
{
1.0
,
3.0
}
};
DTYPE
sData1
[
2
][
1
]
=
{
{
0.0
F
},
{
1.0
F
}
};
DTYPE
sData2
[
2
][
1
]
=
{
{
2.0
F
},
{
3.0
F
}
};
DTYPE
answer
[
2
][
2
]
=
{
{
0.0
F
,
2.0
F
},
{
1.0
F
,
3.0
F
}
};
/* CPU test */
bool
cpuTest
=
true
;
...
...
@@ -144,8 +146,9 @@ bool TestConcatenate1()
#endif // USE_CUDA
}
/* case 2: concatenate a list of tensors along a given dimension.
* In this case, 2 * (2, 1) -> (4, 1), dim=0.
/*
case 2: concatenate a list of tensors along a given dimension.
In this case, 2 * (2, 1) -> (4, 1), dim=0.
*/
bool
TestConcatenate2
()
{
...
...
@@ -182,14 +185,14 @@ bool TestConcatenate2()
for
(
int
i
=
0
;
i
<
tOrder
;
i
++
)
tUnitNum
*=
tDimSize
[
i
];
DTYPE
sData1
[
2
][
1
]
=
{
{
0.0
},
{
1.0
}
};
DTYPE
sData2
[
2
][
1
]
=
{
{
2.0
},
{
3.0
}
};
DTYPE
answer
[
4
][
1
]
=
{
{
0.0
},
{
1.0
},
{
2.0
},
{
3.0
}
};
DTYPE
sData1
[
2
][
1
]
=
{
{
0.0
F
},
{
1.0
F
}
};
DTYPE
sData2
[
2
][
1
]
=
{
{
2.0
F
},
{
3.0
F
}
};
DTYPE
answer
[
4
][
1
]
=
{
{
0.0
F
},
{
1.0
F
},
{
2.0
F
},
{
3.0
F
}
};
/* CPU test */
bool
cpuTest
=
true
;
...
...
@@ -268,8 +271,9 @@ bool TestConcatenate2()
#endif // USE_CUDA
}
/* case 3: concatenate a list of tensors along a given dimension.
* In this case, (2, 1) + (2, 2) -> (2, 3), dim=1.
/*
case 3: concatenate a list of tensors along a given dimension.
In this case, (2, 1) + (2, 2) -> (2, 3), dim=1.
*/
bool
TestConcatenate3
()
{
...
...
@@ -306,12 +310,12 @@ bool TestConcatenate3()
for
(
int
i
=
0
;
i
<
tOrder
;
i
++
)
tUnitNum
*=
tDimSize
[
i
];
DTYPE
sData1
[
2
][
1
]
=
{
{
0.0
},
{
1.0
}
};
DTYPE
sData2
[
2
][
2
]
=
{
{
2.0
,
3.0
},
{
4.0
,
5.0
}
};
DTYPE
answer
[
2
][
3
]
=
{
{
0.0
,
2.0
,
3.0
},
{
1.0
,
4.0
,
5.0
}
};
DTYPE
sData1
[
2
][
1
]
=
{
{
0.0
F
},
{
1.0
F
}
};
DTYPE
sData2
[
2
][
2
]
=
{
{
2.0
F
,
3.0
F
},
{
4.0
F
,
5.0
F
}
};
DTYPE
answer
[
2
][
3
]
=
{
{
0.0
F
,
2.0
F
,
3.0
F
},
{
1.0
F
,
4.0
F
,
5.0
F
}
};
/* CPU test */
bool
cpuTest
=
true
;
...
...
@@ -390,8 +394,9 @@ bool TestConcatenate3()
#endif // USE_CUDA
}
/* case 4: concatenate two tensors along a given dimension.
* In this case, (2, 1), (2, 2) -> (2, 3), dim=1.
/*
case 4: concatenate two tensors along a given dimension.
In this case, (2, 1), (2, 2) -> (2, 3), dim=1.
*/
bool
TestConcatenate4
()
{
...
...
@@ -425,12 +430,12 @@ bool TestConcatenate4()
for
(
int
i
=
0
;
i
<
tOrder
;
i
++
)
tUnitNum
*=
tDimSize
[
i
];
DTYPE
sData1
[
2
][
1
]
=
{
{
0.0
},
{
1.0
}
};
DTYPE
sData2
[
2
][
2
]
=
{
{
2.0
,
3.0
},
{
4.0
,
5.0
}
};
DTYPE
answer
[
2
][
3
]
=
{
{
0.0
,
2.0
,
3.0
},
{
1.0
,
4.0
,
5.0
}
};
DTYPE
sData1
[
2
][
1
]
=
{
{
0.0
F
},
{
1.0
F
}
};
DTYPE
sData2
[
2
][
2
]
=
{
{
2.0
F
,
3.0
F
},
{
4.0
F
,
5.0
F
}
};
DTYPE
answer
[
2
][
3
]
=
{
{
0.0
F
,
2.0
F
,
3.0
F
},
{
1.0
F
,
4.0
F
,
5.0
F
}
};
/* CPU test */
bool
cpuTest
=
true
;
...
...
@@ -502,7 +507,6 @@ TODO!!
*/
/* test for Concatenate Function */
extern
"C"
bool
TestConcatenate
()
{
XPRINT
(
0
,
stdout
,
"[TEST CONCATENATE] concatenate a list of tensors or two tensors along a given dimension
\n
"
);
...
...
source/test/TConcatenateSolely.cpp
查看文件 @
0887fae1
...
...
@@ -19,12 +19,14 @@
* $Created by: Lin Ye (email: linye2015@outlook.com) 2018-06-14
*/
#include "TConcatenateSolely.h"
#include "../XList.h"
#include "TConcatenateSolely.h"
namespace
nts
{
// namespace nt(NiuTrans.Tensor)
/* case 1: concatenate a list of tensors along a given dimension
* In this case, 2 * (2, 1) -> (2, 2), dim=1.
/*
case 1: concatenate a list of tensors along a given dimension
In this case, 2 * (2, 1) -> (2, 2), dim=1.
*/
bool
TestConcatenateSolely1
()
{
...
...
@@ -61,12 +63,12 @@ bool TestConcatenateSolely1()
for
(
int
i
=
0
;
i
<
tOrder
;
i
++
)
tUnitNum
*=
tDimSize
[
i
];
DTYPE
sData1
[
2
][
1
]
=
{
{
0.0
},
{
1.0
}
};
DTYPE
sData2
[
2
][
1
]
=
{
{
2.0
},
{
3.0
}
};
DTYPE
answer
[
2
][
2
]
=
{
{
0.0
,
2.0
},
{
1.0
,
3.0
}
};
DTYPE
sData1
[
2
][
1
]
=
{
{
0.0
F
},
{
1.0
F
}
};
DTYPE
sData2
[
2
][
1
]
=
{
{
2.0
F
},
{
3.0
F
}
};
DTYPE
answer
[
2
][
2
]
=
{
{
0.0
F
,
2.0
F
},
{
1.0
F
,
3.0
F
}
};
/* CPU test */
bool
cpuTest
=
true
;
...
...
@@ -145,8 +147,9 @@ bool TestConcatenateSolely1()
#endif // USE_CUDA
}
/* case 2: concatenate a list of tensors along a given dimension
* In this case, 2 * (2, 1) -> (4, 1), dim=0.
/*
case 2: concatenate a list of tensors along a given dimension
In this case, 2 * (2, 1) -> (4, 1), dim=0.
*/
bool
TestConcatenateSolely2
()
{
...
...
@@ -183,14 +186,14 @@ bool TestConcatenateSolely2()
for
(
int
i
=
0
;
i
<
tOrder
;
i
++
)
tUnitNum
*=
tDimSize
[
i
];
DTYPE
sData1
[
2
][
1
]
=
{
{
0.0
},
{
1.0
}
};
DTYPE
sData2
[
2
][
1
]
=
{
{
2.0
},
{
3.0
}
};
DTYPE
answer
[
4
][
1
]
=
{
{
0.0
},
{
1.0
},
{
2.0
},
{
3.0
}
};
DTYPE
sData1
[
2
][
1
]
=
{
{
0.0
F
},
{
1.0
F
}
};
DTYPE
sData2
[
2
][
1
]
=
{
{
2.0
F
},
{
3.0
F
}
};
DTYPE
answer
[
4
][
1
]
=
{
{
0.0
F
},
{
1.0
F
},
{
2.0
F
},
{
3.0
F
}
};
/* CPU test */
bool
cpuTest
=
true
;
...
...
@@ -269,8 +272,9 @@ bool TestConcatenateSolely2()
#endif // USE_CUDA
}
/* case 3: concatenate a list of tensors along a given dimension
* In this case, (2, 1) + (2, 2) -> (2, 3), dim=1.
/*
case 3: concatenate a list of tensors along a given dimension
In this case, (2, 1) + (2, 2) -> (2, 3), dim=1.
*/
bool
TestConcatenateSolely3
()
{
...
...
@@ -307,12 +311,12 @@ bool TestConcatenateSolely3()
for
(
int
i
=
0
;
i
<
tOrder
;
i
++
)
tUnitNum
*=
tDimSize
[
i
];
DTYPE
sData1
[
2
][
1
]
=
{
{
0.0
},
{
1.0
}
};
DTYPE
sData2
[
2
][
2
]
=
{
{
2.0
,
3.0
},
{
4.0
,
5.0
}
};
DTYPE
answer
[
2
][
3
]
=
{
{
0.0
,
2.0
,
3.0
},
{
1.0
,
4.0
,
5.0
}
};
DTYPE
sData1
[
2
][
1
]
=
{
{
0.0
F
},
{
1.0
F
}
};
DTYPE
sData2
[
2
][
2
]
=
{
{
2.0
F
,
3.0
F
},
{
4.0
F
,
5.0
F
}
};
DTYPE
answer
[
2
][
3
]
=
{
{
0.0
F
,
2.0
F
,
3.0
F
},
{
1.0
F
,
4.0
F
,
5.0
F
}
};
/* CPU test */
bool
cpuTest
=
true
;
...
...
@@ -397,7 +401,6 @@ TODO!!
*/
/* test for ConcatenateSolely Function */
extern
"C"
bool
TestConcatenateSolely
()
{
XPRINT
(
0
,
stdout
,
"[TEST CONCATENATESOLELY] concatenate a list of tensors along a given dimension
\n
"
);
...
...
source/test/TCopyIndexed.cpp
查看文件 @
0887fae1
...
...
@@ -22,9 +22,11 @@
#include "TCopyIndexed.h"
namespace
nts
{
// namespace nts(NiuTrans.Tensor)
/* case 1 copy indexed sub-tensors
* In this case, (3, 2, 3) -> (3, 2, 2), dim = 2, indexSize = 2,
* srcIndex = [0, 2], tgtIndex = [0, 1], copyNum = 1.
/*
case 1 copy indexed sub-tensors
In this case, (3, 2, 3) -> (3, 2, 2), dim = 2, indexSize = 2,
srcIndex = [0, 2], tgtIndex = [0, 1], copyNum = 1.
*/
bool
TestCopyIndexed1
()
{
...
...
@@ -50,19 +52,19 @@ bool TestCopyIndexed1()
for
(
int
i
=
0
;
i
<
tOrder
;
i
++
)
tUnitNum
*=
tDimSize
[
i
];
DTYPE
sData
[
3
][
2
][
3
]
=
{
{
{
0.0
,
-
1.0
,
2.0
},
{
2.0
,
1.0
,
3.0
}
},
{
{
1.0
,
2.0
,
4.0
},
{
3.0
,
1.0
,
2.0
}},
{
{
-
1.0
,
3.0
,
2.0
},
{
1.0
,
-
1.0
,
0.0
}
}
};
DTYPE
answer
[
3
][
2
][
2
]
=
{
{
{
0.0
,
2.0
},
{
2.0
,
3.0
}
},
{
{
1.0
,
4.0
},
{
3.0
,
2.0
}},
{
{
-
1.0
,
2.0
},
{
1.0
,
0.0
}
}
};
DTYPE
sData
[
3
][
2
][
3
]
=
{
{
{
0.0
F
,
-
1.0
F
,
2.0
F
},
{
2.0
F
,
1.0
F
,
3.0
F
}
},
{
{
1.0
F
,
2.0
F
,
4.0
F
},
{
3.0
F
,
1.0
F
,
2.0
F
}},
{
{
-
1.0
F
,
3.0
F
,
2.0
F
},
{
1.0
F
,
-
1.0
F
,
0.0
F
}
}
};
DTYPE
answer
[
3
][
2
][
2
]
=
{
{
{
0.0
F
,
2.0
F
},
{
2.0
F
,
3.0
F
}
},
{
{
1.0
F
,
4.0
F
},
{
3.0
F
,
2.0
F
}},
{
{
-
1.0
F
,
2.0
F
},
{
1.0
F
,
0.0
F
}
}
};
int
dim
=
2
;
int
indexSize
=
2
;
int
srcIndex
[
2
]
=
{
0
,
2
};
...
...
@@ -131,7 +133,6 @@ TODO!!
*/
/* test for CopyIndexed Function */
extern
"C"
bool
TestCopyIndexed
()
{
XPRINT
(
0
,
stdout
,
"[TEST CopyIndexed] copy indexed sub-tensors
\n
"
);
...
...
source/test/TCopyValues.cpp
查看文件 @
0887fae1
...
...
@@ -23,6 +23,7 @@
#include "TCopyValues.h"
namespace
nts
{
// namespace nts(NiuTrans.Tensor)
/* case 1: copy tensor s to tensor t */
bool
TestCopyValues1
()
{
...
...
@@ -36,11 +37,11 @@ bool TestCopyValues1()
for
(
int
i
=
0
;
i
<
sOrder
;
i
++
)
sUnitNum
*=
sDimSize
[
i
];
DTYPE
sData
[
2
][
4
]
=
{
{
0.0
,
1.0
,
2.0
,
3.0
},
{
4.0
,
5.0
,
6.0
,
7.0
}
};
DTYPE
sData
[
2
][
4
]
=
{
{
0.0
F
,
1.0
F
,
2.0
F
,
3.0
F
},
{
4.0
F
,
5.0
F
,
6.0
F
,
7.0
F
}
};
DTYPE
scaleFactor
=
2.0
;
DTYPE
shiftFactor
=
0.5
;
DTYPE
scaleFactor
=
2.0
F
;
DTYPE
shiftFactor
=
0.5
F
;
/* CPU test */
bool
cpuTest
=
true
;
...
...
@@ -105,7 +106,6 @@ TODO!!
*/
/* test for CopyValues Function */
extern
"C"
bool
TestCopyValues
()
{
XPRINT
(
0
,
stdout
,
"[TEST CopyValues] copy tensor s to tensor t
\n
"
);
...
...
source/test/THardTanH.cpp
查看文件 @
0887fae1
...
...
@@ -22,10 +22,11 @@
#include "THardTanH.h"
namespace
nts
{
// namespace nts(NiuTrans.Tensor)
/* case 1: hard tanh function */
bool
TestHardTanH1
()
{
/* a x tensor of size
2 * 3
*/
/* a x tensor of size
(2, 3)
*/
int
xOrder
=
2
;
int
*
xDimSize
=
new
int
[
xOrder
];
xDimSize
[
0
]
=
2
;
...
...
@@ -35,7 +36,7 @@ bool TestHardTanH1()
for
(
int
i
=
0
;
i
<
xOrder
;
i
++
)
xUnitNum
*=
xDimSize
[
i
];
/* a y tensor of size
2 * 3
*/
/* a y tensor of size
(2, 3)
*/
int
yOrder
=
2
;
int
*
yDimSize
=
new
int
[
yOrder
];
yDimSize
[
0
]
=
2
;
...
...
@@ -45,10 +46,10 @@ bool TestHardTanH1()
for
(
int
i
=
0
;
i
<
yOrder
;
i
++
)
yUnitNum
*=
yDimSize
[
i
];
DTYPE
xData
[
2
][
3
]
=
{
{
0.5
,
-
1.0
,
2.0
},
{
3.5
,
-
4.5
,
1.0
}
};
DTYPE
answer
[
2
][
3
]
=
{
{
0.5
,
-
1.0
,
1.0
},
{
1.0
,
-
1.0
,
1.0
}
};
DTYPE
xData
[
2
][
3
]
=
{
{
0.5
F
,
-
1.0
F
,
2.0
F
},
{
3.5
F
,
-
4.5
F
,
1.0
F
}
};
DTYPE
answer
[
2
][
3
]
=
{
{
0.5
F
,
-
1.0
F
,
1.0
F
},
{
1.0
F
,
-
1.0
F
,
1.0
F
}
};
/* CPU test */
bool
cpuTest
=
true
;
...
...
@@ -86,25 +87,32 @@ bool TestHardTanH1()
gpuTest
=
yGPU
->
CheckData
(
answer
,
yUnitNum
,
1e-4
F
);
/* destroy variables */
delete
x
,
y
,
xGPU
,
yGPU
;
delete
[]
xDimSize
,
yDimSize
;
delete
x
;
delete
y
;
delete
xGPU
;
delete
yGPU
;
delete
[]
xDimSize
;
delete
[]
yDimSize
;
return
cpuTest
&&
gpuTest
;
#else
/* destroy variables */
delete
x
,
y
;
delete
[]
xDimSize
,
yDimSize
;
delete
x
;
delete
y
;
delete
[]
xDimSize
;
delete
[]
yDimSize
;
return
cpuTest
;
#endif // USE_CUDA
}
/* case 2: backward computation
* In this case, lossName=CROSSENTROPY.
/*
case 2: backward computation
In this case, lossName=CROSSENTROPY.
*/
bool
TestHardTanH2
()
{
/* a x tensor of size
2 * 3
*/
/* a x tensor of size
(2, 3)
*/
int
xOrder
=
2
;
int
*
xDimSize
=
new
int
[
xOrder
];
xDimSize
[
0
]
=
2
;
...
...
@@ -114,7 +122,7 @@ bool TestHardTanH2()
for
(
int
i
=
0
;
i
<
xOrder
;
i
++
)
xUnitNum
*=
xDimSize
[
i
];
/* a y tensor of size
2 * 3
*/
/* a y tensor of size
(2, 3)
*/
int
yOrder
=
2
;
int
*
yDimSize
=
new
int
[
yOrder
];
yDimSize
[
0
]
=
2
;
...
...
@@ -124,7 +132,7 @@ bool TestHardTanH2()
for
(
int
i
=
0
;
i
<
yOrder
;
i
++
)
yUnitNum
*=
yDimSize
[
i
];
/* a gold tensor of size
2 * 3
*/
/* a gold tensor of size
(2, 3)
*/
int
goldOrder
=
2
;
int
*
goldDimSize
=
new
int
[
goldOrder
];
goldDimSize
[
0
]
=
2
;
...
...
@@ -134,7 +142,7 @@ bool TestHardTanH2()
for
(
int
i
=
0
;
i
<
goldOrder
;
i
++
)
goldUnitNum
*=
goldDimSize
[
i
];
/* a dedy tensor of size
2 * 3
*/
/* a dedy tensor of size
(2, 3)
*/
int
dedyOrder
=
2
;
int
*
dedyDimSize
=
new
int
[
dedyOrder
];
dedyDimSize
[
0
]
=
2
;
...
...
@@ -144,7 +152,7 @@ bool TestHardTanH2()
for
(
int
i
=
0
;
i
<
dedyOrder
;
i
++
)
dedyUnitNum
*=
dedyDimSize
[
i
];
/* a dedx tensor of size
2 * 3
*/
/* a dedx tensor of size
(2, 3)
*/
int
dedxOrder
=
2
;
int
*
dedxDimSize
=
new
int
[
dedxOrder
];
dedxDimSize
[
0
]
=
2
;
...
...
@@ -154,16 +162,16 @@ bool TestHardTanH2()
for
(
int
i
=
0
;
i
<
dedxOrder
;
i
++
)
dedxUnitNum
*=
dedxDimSize
[
i
];
DTYPE
xData
[
2
][
3
]
=
{
{
0.5
,
-
1.0
,
2.0
},
{
3.5
,
-
4.5
,
1.0
}
};
DTYPE
yData
[
2
][
3
]
=
{
{
0.5
,
-
1.0
,
1.0
},
{
1.0
,
-
1.0
,
1.0
}
};
DTYPE
goldData
[
2
][
3
]
=
{
{
1.0
,
1.0
,
1.0
},
{
1.0
,
1.0
,
1.0
}
};
DTYPE
dedyData
[
2
][
3
]
=
{
{
-
2.0
,
1.0
,
-
1.0
},
{
-
1.0
,
1.0
,
-
1.0
}
};
DTYPE
answer
[
2
][
3
]
=
{
{
-
2.0
,
1.0
,
0.0
},
{
0.0
,
0.0
,
-
1.0
}
};
DTYPE
xData
[
2
][
3
]
=
{
{
0.5
F
,
-
1.0
F
,
2.0
F
},
{
3.5
F
,
-
4.5
F
,
1.0
F
}
};
DTYPE
yData
[
2
][
3
]
=
{
{
0.5
F
,
-
1.0
F
,
1.0
F
},
{
1.0
F
,
-
1.0
F
,
1.0
F
}
};
DTYPE
goldData
[
2
][
3
]
=
{
{
1.0
F
,
1.0
F
,
1.0
F
},
{
1.0
F
,
1.0
F
,
1.0
F
}
};
DTYPE
dedyData
[
2
][
3
]
=
{
{
-
2.0
F
,
1.0
F
,
-
1.0
F
},
{
-
1.0
F
,
1.0
F
,
-
1.0
F
}
};
DTYPE
answer
[
2
][
3
]
=
{
{
-
2.0
F
,
1.0
F
,
0.0
F
},
{
0.0
F
,
0.0
F
,
-
1.0
F
}
};
/* CPU test */
bool
cpuTest
=
true
;
...
...
@@ -226,12 +234,13 @@ bool TestHardTanH2()
#endif // USE_CUDA
}
/* case 3: backward computation
* In this case, lossName=SQUAREDERROR.
/*
case 3: backward computation
In this case, lossName=SQUAREDERROR.
*/
bool
TestHardTanH3
()
{
/* a x tensor of size
2 * 3
*/
/* a x tensor of size
(2, 3)
*/
int
xOrder
=
2
;
int
*
xDimSize
=
new
int
[
xOrder
];
xDimSize
[
0
]
=
2
;
...
...
@@ -241,7 +250,7 @@ bool TestHardTanH3()
for
(
int
i
=
0
;
i
<
xOrder
;
i
++
)
xUnitNum
*=
xDimSize
[
i
];
/* a y tensor of size
2 * 3
*/
/* a y tensor of size
(2, 3)
*/
int
yOrder
=
2
;
int
*
yDimSize
=
new
int
[
yOrder
];
yDimSize
[
0
]
=
2
;
...
...
@@ -251,7 +260,7 @@ bool TestHardTanH3()
for
(
int
i
=
0
;
i
<
yOrder
;
i
++
)
yUnitNum
*=
yDimSize
[
i
];
/* a gold tensor of size
2 * 3
*/
/* a gold tensor of size
(2, 3)
*/
int
goldOrder
=
2
;
int
*
goldDimSize
=
new
int
[
goldOrder
];
goldDimSize
[
0
]
=
2
;
...
...
@@ -261,7 +270,7 @@ bool TestHardTanH3()
for
(
int
i
=
0
;
i
<
goldOrder
;
i
++
)
goldUnitNum
*=
goldDimSize
[
i
];
/* a dedy tensor of size
2 * 3
*/
/* a dedy tensor of size
(2, 3)
*/
int
dedyOrder
=
2
;
int
*
dedyDimSize
=
new
int
[
dedyOrder
];
dedyDimSize
[
0
]
=
2
;
...
...
@@ -271,7 +280,7 @@ bool TestHardTanH3()
for
(
int
i
=
0
;
i
<
dedyOrder
;
i
++
)
dedyUnitNum
*=
dedyDimSize
[
i
];
/* a dedx tensor of size
2 * 3
*/
/* a dedx tensor of size
(2, 3)
*/
int
dedxOrder
=
2
;
int
*
dedxDimSize
=
new
int
[
dedxOrder
];
dedxDimSize
[
0
]
=
2
;
...
...
@@ -281,16 +290,16 @@ bool TestHardTanH3()
for
(
int
i
=
0
;
i
<
dedxOrder
;
i
++
)
dedxUnitNum
*=
dedxDimSize
[
i
];
DTYPE
xData
[
2
][
3
]
=
{
{
0.5
,
-
1.0
,
2.0
},
{
3.5
,
-
4.5
,
1.0
}
};
DTYPE
yData
[
2
][
3
]
=
{
{
0.5
,
-
1.0
,
1.0
},
{
1.0
,
-
1.0
,
1.0
}
};
DTYPE
goldData
[
2
][
3
]
=
{
{
1.0
,
1.0
,
1.0
},
{
1.0
,
1.0
,
1.0
}
};
DTYPE
dedyData
[
2
][
3
]
=
{
{
-
0.5
,
-
2.0
,
0.0
},
{
0.0
,
-
2.0
,
0.0
}
};
DTYPE
answer
[
2
][
3
]
=
{
{
-
0.5
,
-
2.0
,
0.0
},
{
0.0
,
0.0
,
0.0
}
};
DTYPE
xData
[
2
][
3
]
=
{
{
0.5
F
,
-
1.0
F
,
2.0
F
},
{
3.5
F
,
-
4.5
F
,
1.0
F
}
};
DTYPE
yData
[
2
][
3
]
=
{
{
0.5
F
,
-
1.0
F
,
1.0
F
},
{
1.0
F
,
-
1.0
F
,
1.0
F
}
};
DTYPE
goldData
[
2
][
3
]
=
{
{
1.0
F
,
1.0
F
,
1.0
F
},
{
1.0
F
,
1.0
F
,
1.0
F
}
};
DTYPE
dedyData
[
2
][
3
]
=
{
{
-
0.5
F
,
-
2.0
F
,
0.0
F
},
{
0.0
F
,
-
2.0
F
,
0.0
F
}
};
DTYPE
answer
[
2
][
3
]
=
{
{
-
0.5
F
,
-
2.0
F
,
0.0
F
},
{
0.0
F
,
0.0
F
,
0.0
F
}
};
/* CPU test */
bool
cpuTest
=
true
;
...
...
@@ -353,12 +362,13 @@ bool TestHardTanH3()
#endif // USE_CUDA
}
/* case 4: backward computation
* In this case, lossName=ONEHOTERROR.
/*
case 4: backward computation
In this case, lossName=ONEHOTERROR.
*/
bool
TestHardTanH4
()
{
/* a x tensor of size
2 * 3
*/
/* a x tensor of size
(2, 3)
*/
int
xOrder
=
2
;
int
*
xDimSize
=
new
int
[
xOrder
];
xDimSize
[
0
]
=
2
;
...
...
@@ -368,7 +378,7 @@ bool TestHardTanH4()
for
(
int
i
=
0
;
i
<
xOrder
;
i
++
)
xUnitNum
*=
xDimSize
[
i
];
/* a y tensor of size
2 * 3
*/
/* a y tensor of size
(2, 3)
*/
int
yOrder
=
2
;
int
*
yDimSize
=
new
int
[
yOrder
];
yDimSize
[
0
]
=
2
;
...
...
@@ -378,7 +388,7 @@ bool TestHardTanH4()
for
(
int
i
=
0
;
i
<
yOrder
;
i
++
)
yUnitNum
*=
yDimSize
[
i
];
/* a gold tensor of size
2 * 3
*/
/* a gold tensor of size
(2, 3)
*/
int
goldOrder
=
2
;
int
*
goldDimSize
=
new
int
[
goldOrder
];
goldDimSize
[
0
]
=
2
;
...
...
@@ -388,7 +398,7 @@ bool TestHardTanH4()
for
(
int
i
=
0
;
i
<
goldOrder
;
i
++
)
goldUnitNum
*=
goldDimSize
[
i
];
/* a dedy tensor of size
2 * 3
*/
/* a dedy tensor of size
(2, 3)
*/
int
dedyOrder
=
2
;
int
*
dedyDimSize
=
new
int
[
dedyOrder
];
dedyDimSize
[
0
]
=
2
;
...
...
@@ -398,7 +408,7 @@ bool TestHardTanH4()
for
(
int
i
=
0
;
i
<
dedyOrder
;
i
++
)
dedyUnitNum
*=
dedyDimSize
[
i
];
/* a dedx tensor of size
2 * 3
*/
/* a dedx tensor of size
(2, 3)
*/
int
dedxOrder
=
2
;
int
*
dedxDimSize
=
new
int
[
dedxOrder
];
dedxDimSize
[
0
]
=
2
;
...
...
@@ -408,16 +418,16 @@ bool TestHardTanH4()
for
(
int
i
=
0
;
i
<
dedxOrder
;
i
++
)
dedxUnitNum
*=
dedxDimSize
[
i
];
DTYPE
xData
[
2
][
3
]
=
{
{
0.5
,
-
1.0
,
2.0
},
{
3.5
,
-
4.5
,
1.0
}
};
DTYPE
yData
[
2
][
3
]
=
{
{
0.5
,
-
1.0
,
1.0
},
{
1.0
,
-
1.0
,
1.0
}
};
DTYPE
goldData
[
2
][
3
]
=
{
{
1.0
,
0.0
,
1.0
},
{
0.0
,
1.0
,
1.0
}
};
DTYPE
dedyData
[
2
][
3
]
=
{
{
-
0.5
,
0.0
,
0.0
},
{
0.0
,
-
2.0
,
0.0
}
};
DTYPE
answer
[
2
][
3
]
=
{
{
-
0.5
,
0.0
,
0.0
},
{
0.0
,
0.0
,
0.0
}
};
DTYPE
xData
[
2
][
3
]
=
{
{
0.5
F
,
-
1.0
F
,
2.0
F
},
{
3.5
F
,
-
4.5
F
,
1.0
F
}
};
DTYPE
yData
[
2
][
3
]
=
{
{
0.5
F
,
-
1.0
F
,
1.0
F
},
{
1.0
F
,
-
1.0
F
,
1.0
F
}
};
DTYPE
goldData
[
2
][
3
]
=
{
{
1.0
F
,
0.0
F
,
1.0
F
},
{
0.0
F
,
1.0
F
,
1.0
F
}
};
DTYPE
dedyData
[
2
][
3
]
=
{
{
-
0.5
F
,
0.0
F
,
0.0
F
},
{
0.0
F
,
-
2.0
F
,
0.0
F
}
};
DTYPE
answer
[
2
][
3
]
=
{
{
-
0.5
F
,
0.0
F
,
0.0
F
},
{
0.0
F
,
0.0
F
,
0.0
F
}
};
/* CPU test */
bool
cpuTest
=
true
;
...
...
@@ -486,10 +496,9 @@ TODO!!
*/
/* test for HardTanH Function */
extern
"C"
bool
TestHardTanH
()
{
XPRINT
(
0
,
stdout
,
"[TEST HARDTANH]
-------------
\n
"
);
XPRINT
(
0
,
stdout
,
"[TEST HARDTANH]
test hardtanh and its backward computation
\n
"
);
bool
returnFlag
=
true
,
caseFlag
=
true
;
/* case 1 test */
...
...
source/test/TIdentity.cpp
查看文件 @
0887fae1
...
...
@@ -23,8 +23,10 @@
#include "TIdentity.h"
namespace
nts
{
// namespace nts(NiuTrans.Tensor)
/* case 1: test Identity function.
* Identity function: y = x
/*
case 1: test Identity function.
Identity function: y = x
*/
bool
TestIdentity1
()
{
...
...
@@ -38,10 +40,10 @@ bool TestIdentity1()
for
(
int
i
=
0
;
i
<
sOrder
;
i
++
)
sUnitNum
*=
sDimSize
[
i
];
DTYPE
xData
[
2
][
3
]
=
{
{
0.0
,
1.0
,
2.0
},
{
0.5
,
0.7
,
1.4
}
};
DTYPE
answer
[
2
][
3
]
=
{
{
0.0
,
1.0
,
2.0
},
{
0.5
,
0.7
,
1.4
}
};
DTYPE
xData
[
2
][
3
]
=
{
{
0.0
F
,
1.0
F
,
2.0
F
},
{
0.5
F
,
0.7
F
,
1.4
F
}
};
DTYPE
answer
[
2
][
3
]
=
{
{
0.0
F
,
1.0
F
,
2.0
F
},
{
0.5
F
,
0.7
F
,
1.4
F
}
};
/* CPU test */
bool
cpuTest
=
true
;
...
...
@@ -93,8 +95,9 @@ bool TestIdentity1()
#endif // USE_CUDA
}
/* case 2: test IdentityBackward function.
* IdentityBackward function: dE/dx = dE/dy * dy/dx = dE/dy
/*
case 2: test IdentityBackward function.
IdentityBackward function: dE/dx = dE/dy * dy/dx = dE/dy
*/
bool
TestIdentity2
()
{
...
...
@@ -107,9 +110,9 @@ bool TestIdentity2()
for
(
int
i
=
0
;
i
<
sOrder
;
i
++
)
sUnitNum
*=
sDimSize
[
i
];
DTYPE
xData
[
1
][
3
]
=
{
{
0.0
,
1.0
,
2.0
}
};
DTYPE
gData
[
1
][
3
]
=
{
{
0.0
,
0.0
,
1.0
}
};
DTYPE
dedxAnswer
[
3
]
=
{
0.090031
,
0.244728
,
-
0.334759
};
DTYPE
xData
[
1
][
3
]
=
{
{
0.0
F
,
1.0
F
,
2.0
F
}
};
DTYPE
gData
[
1
][
3
]
=
{
{
0.0
F
,
0.0
F
,
1.0
F
}
};
DTYPE
dedxAnswer
[
3
]
=
{
0.090031
F
,
0.244728
F
,
-
0.334759
F
};
/* CPU test */
bool
cpuTest
=
true
;
...
...
@@ -135,7 +138,7 @@ bool TestIdentity2()
IdentityBackward
(
g
,
y
,
x
,
dedy
,
dedx
,
CROSSENTROPY
);
/* check result */
cpuTest
=
dedx
->
CheckData
(
dedxAnswer
,
sUnitNum
);
cpuTest
=
dedx
->
CheckData
(
dedxAnswer
,
sUnitNum
,
1e-4
F
);
#ifdef USE_CUDA
/* GPU test */
...
...
@@ -162,7 +165,7 @@ bool TestIdentity2()
IdentityBackward
(
gGPU
,
yGPU
,
xGPU
,
dedyGPU
,
dedxGPU
,
CROSSENTROPY
);
/* check result */
gpuTest
=
dedxGPU
->
CheckData
(
dedxAnswer
,
sUnitNum
);
gpuTest
=
dedxGPU
->
CheckData
(
dedxAnswer
,
sUnitNum
,
1e-4
F
);
/* destroy variables */
delete
x
;
...
...
@@ -197,7 +200,6 @@ bool TestIdentity2()
*/
/* test for Identity Function */
extern
"C"
bool
TestIdentity
()
{
XPRINT
(
0
,
stdout
,
"[TEST Identity] identity function and its backward computation
\n
"
);
...
...
@@ -213,15 +215,15 @@ bool TestIdentity()
else
XPRINT
(
0
,
stdout
,
">> case 1 passed!
\n
"
);
/
//
* case 2 test */
//
caseFlag = TestIdentity2();
/* case 2 test */
caseFlag
=
TestIdentity2
();
//
if (!caseFlag) {
//
returnFlag = false;
//
XPRINT(0, stdout, ">> case 2 failed!\n");
//
}
//
else
//
XPRINT(0, stdout, ">> case 2 passed!\n");
if
(
!
caseFlag
)
{
returnFlag
=
false
;
XPRINT
(
0
,
stdout
,
">> case 2 failed!
\n
"
);
}
else
XPRINT
(
0
,
stdout
,
">> case 2 passed!
\n
"
);
/* other cases test */
/*
...
...
source/test/TLogSoftmax.cpp
查看文件 @
0887fae1
...
...
@@ -23,8 +23,10 @@
#include "TLogSoftmax.h"
namespace
nts
{
// namespace nts(NiuTrans.Tensor)
/* case 1: test LogSoftmax function.
* LogSoftmax function: y = log(e^x / \sum_{i} e^{x_i})
/*
case 1: test LogSoftmax function.
LogSoftmax function: y = log(e^x / \sum_{i} e^{x_i})
*/
bool
TestLogSoftmax1
()
{
...
...
@@ -38,10 +40,10 @@ bool TestLogSoftmax1()
for
(
int
i
=
0
;
i
<
sOrder
;
i
++
)
sUnitNum
*=
sDimSize
[
i
];
DTYPE
xData
[
2
][
3
]
=
{
{
0.0
,
1.0
,
2.0
},
{
0.5
,
0.7
,
1.4
}
};
DTYPE
answer
[
2
][
3
]
=
{
{
-
2.4076
,
-
1.4076
,
-
0.4076
},
{
-
1.5435
,
-
1.3435
,
-
0.6435
}
};
DTYPE
xData
[
2
][
3
]
=
{
{
0.0
F
,
1.0
F
,
2.0
F
},
{
0.5
F
,
0.7
F
,
1.4
F
}
};
DTYPE
answer
[
2
][
3
]
=
{
{
-
2.4076
F
,
-
1.4076
F
,
-
0.4076
F
},
{
-
1.5435
F
,
-
1.3435
F
,
-
0.6435
F
}
};
/* CPU test */
bool
cpuTest
=
true
;
...
...
@@ -58,7 +60,7 @@ bool TestLogSoftmax1()
LogSoftmax
(
x
,
y
,
1
);
/* check result */
cpuTest
=
y
->
CheckData
(
answer
,
sUnitNum
);
cpuTest
=
y
->
CheckData
(
answer
,
sUnitNum
,
1e-4
F
);
#ifdef USE_CUDA
/* GPU test */
...
...
@@ -76,7 +78,7 @@ bool TestLogSoftmax1()
LogSoftmax
(
xGPU
,
yGPU
,
1
);
/* check result */
gpuTest
=
yGPU
->
CheckData
(
answer
,
sUnitNum
);
gpuTest
=
yGPU
->
CheckData
(
answer
,
sUnitNum
,
1e-4
F
);
/* destroy variables */
delete
x
;
...
...
@@ -97,9 +99,10 @@ bool TestLogSoftmax1()
#endif // USE_CUDA
}
/* case 2: test LogSoftmaxBackward function.
* dE/dx = dE/dy * dy/dx
* log softmax: y_i = log(e^{x_i} / \sum_{k} e^{x_k})
/*
case 2: test LogSoftmaxBackward function.
dE/dx = dE/dy * dy/dx
log softmax: y_i = log(e^{x_i} / \sum_{k} e^{x_k})
*/
bool
TestLogSoftmax2
()
{
...
...
@@ -112,10 +115,10 @@ bool TestLogSoftmax2()
for
(
int
i
=
0
;
i
<
sOrder
;
i
++
)
sUnitNum
*=
sDimSize
[
i
];
DTYPE
xData
[
3
]
=
{
0.0
,
1.0
,
2.0
};
DTYPE
gData
[
3
]
=
{
0.5
,
0.8
,
1.5
};
DTYPE
yAnswer
[
3
]
=
{
-
2.4076
,
-
1.4076
,
-
0.4076
};
DTYPE
dedxAnswer
[
3
]
=
{
-
0.409969
,
-
0.555272
,
-
0.834759
};
DTYPE
xData
[
3
]
=
{
0.0
F
,
1.0
F
,
2.0
F
};
DTYPE
gData
[
3
]
=
{
0.5
F
,
0.8
F
,
1.5
F
};
DTYPE
yAnswer
[
3
]
=
{
-
2.4076
F
,
-
1.4076
F
,
-
0.4076
F
};
DTYPE
dedxAnswer
[
3
]
=
{
-
0.409969
F
,
-
0.555272
F
,
-
0.834759
F
};
/* CPU test */
bool
cpuTest
=
true
;
...
...
@@ -141,7 +144,7 @@ bool TestLogSoftmax2()
LogSoftmaxBackward
(
g
,
y
,
x
,
dedy
,
dedx
,
0
,
CROSSENTROPY
);
/* check result */
cpuTest
=
y
->
CheckData
(
yAnswer
,
sUnitNum
)
&&
dedx
->
CheckData
(
dedxAnswer
,
sUnitNum
);
cpuTest
=
y
->
CheckData
(
yAnswer
,
sUnitNum
,
1e-4
F
)
&&
dedx
->
CheckData
(
dedxAnswer
,
sUnitNum
,
1e-4
F
);
#ifdef USE_CUDA
/* GPU test */
...
...
@@ -168,7 +171,7 @@ bool TestLogSoftmax2()
LogSoftmaxBackward
(
gGPU
,
yGPU
,
xGPU
,
dedyGPU
,
dedxGPU
,
0
,
CROSSENTROPY
);
/* check result */
gpuTest
=
yGPU
->
CheckData
(
yAnswer
,
sUnitNum
)
&&
dedxGPU
->
CheckData
(
dedxAnswer
,
sUnitNum
);
gpuTest
=
yGPU
->
CheckData
(
yAnswer
,
sUnitNum
,
1e-4
F
)
&&
dedxGPU
->
CheckData
(
dedxAnswer
,
sUnitNum
,
1e-4
F
);
/* destroy variables */
delete
x
;
...
...
@@ -197,9 +200,10 @@ bool TestLogSoftmax2()
#endif // USE_CUDA
}
/* case 3: test LogSoftmaxBackward function.
* dE/dx = dE/dy * dy/dx
* log softmax: y_i = log(e^{x_i} / \sum_{k} e^{x_k})
/*
case 3: test LogSoftmaxBackward function.
dE/dx = dE/dy * dy/dx
log softmax: y_i = log(e^{x_i} / \sum_{k} e^{x_k})
*/
bool
TestLogSoftmax3
()
{
...
...
@@ -213,10 +217,10 @@ bool TestLogSoftmax3()
for
(
int
i
=
0
;
i
<
sOrder
;
i
++
)
sUnitNum
*=
sDimSize
[
i
];
DTYPE
xData
[
1
][
3
]
=
{
{
0.0
,
1.0
,
2.0
}
};
DTYPE
gData
[
1
][
3
]
=
{
{
0.5
,
0.8
,
1.5
}
};
DTYPE
yAnswer
[
1
][
3
]
=
{
-
2.4076
,
-
1.4076
,
-
0.4076
};
DTYPE
dedxAnswer
[
1
][
3
]
=
{
-
0.409969
,
-
0.555272
,
-
0.834759
};
DTYPE
xData
[
1
][
3
]
=
{
{
0.0
F
,
1.0
F
,
2.0
F
}
};
DTYPE
gData
[
1
][
3
]
=
{
{
0.5
F
,
0.8
F
,
1.5
F
}
};
DTYPE
yAnswer
[
1
][
3
]
=
{
-
2.4076
F
,
-
1.4076
F
,
-
0.4076
F
};
DTYPE
dedxAnswer
[
1
][
3
]
=
{
-
0.409969
F
,
-
0.555272
F
,
-
0.834759
F
};
/* CPU test */
bool
cpuTest
=
true
;
...
...
@@ -242,7 +246,7 @@ bool TestLogSoftmax3()
LogSoftmaxBackward
(
g
,
y
,
x
,
dedy
,
dedx
,
1
,
CROSSENTROPY
);
/* check result */
cpuTest
=
y
->
CheckData
(
yAnswer
,
sUnitNum
)
&&
dedx
->
CheckData
(
dedxAnswer
,
sUnitNum
);
cpuTest
=
y
->
CheckData
(
yAnswer
,
sUnitNum
,
1e-4
F
)
&&
dedx
->
CheckData
(
dedxAnswer
,
sUnitNum
,
1e-4
F
);
#ifdef USE_CUDA
/* GPU test */
...
...
@@ -269,7 +273,7 @@ bool TestLogSoftmax3()
LogSoftmaxBackward
(
gGPU
,
yGPU
,
xGPU
,
dedyGPU
,
dedxGPU
,
1
,
CROSSENTROPY
);
/* check result */
gpuTest
=
yGPU
->
CheckData
(
yAnswer
,
sUnitNum
)
&&
dedxGPU
->
CheckData
(
dedxAnswer
,
sUnitNum
);
gpuTest
=
yGPU
->
CheckData
(
yAnswer
,
sUnitNum
,
1e-4
F
)
&&
dedxGPU
->
CheckData
(
dedxAnswer
,
sUnitNum
,
1e-4
F
);
/* destroy variables */
delete
x
;
...
...
@@ -305,7 +309,6 @@ bool TestLogSoftmax3()
*/
/* test for LogSoftmax Function */
extern
"C"
bool
TestLogSoftmax
()
{
XPRINT
(
0
,
stdout
,
"[TEST LogSoftmax] test log softmax function and its backward computation
\n
"
);
...
...
@@ -321,15 +324,15 @@ bool TestLogSoftmax()
else
XPRINT
(
0
,
stdout
,
">> case 1 passed!
\n
"
);
/
//
* case 2 test */
//
caseFlag = TestLogSoftmax2();
/* case 2 test */
caseFlag
=
TestLogSoftmax2
();
//
if (!caseFlag) {
//
returnFlag = false;
//
XPRINT(0, stdout, ">> case 2 failed!\n");
//
}
//
else
//
XPRINT(0, stdout, ">> case 2 passed!\n");
if
(
!
caseFlag
)
{
returnFlag
=
false
;
XPRINT
(
0
,
stdout
,
">> case 2 failed!
\n
"
);
}
else
XPRINT
(
0
,
stdout
,
">> case 2 passed!
\n
"
);
/* case 3 test */
caseFlag
=
TestLogSoftmax3
();
...
...
source/test/TLoss.cpp
查看文件 @
0887fae1
...
...
@@ -23,10 +23,12 @@
#include "../function/Loss.h"
namespace
nts
{
// namespace nts(NiuTrans.Tensor)
/* case 1: test LossCompute function
* In this case, Loss function name = SQUAREDERROR.
* loss = sum_{i} 0.5*(t_i - y_i)^2,
* where t_i is the gold standard and y_i is the model output
/*
case 1: test LossCompute function
In this case, Loss function name = SQUAREDERROR.
loss = sum_{i} 0.5*(t_i - y_i)^2,
where t_i is the gold standard and y_i is the model output
*/
bool
TestLoss1
()
{
...
...
@@ -99,10 +101,11 @@ bool TestLoss1()
#endif // USE_CUDA
}
/* case 2: test LossCompute function
* In this case, Loss function name = CROSSENTROPY.
* loss = sum_{i} (-t_i * log(y_i))
* where t_i is the gold standard and y_i is the model output
/*
case 2: test LossCompute function
In this case, Loss function name = CROSSENTROPY.
loss = sum_{i} (-t_i * log(y_i))
where t_i is the gold standard and y_i is the model output
*/
bool
TestLoss2
()
{
...
...
@@ -175,10 +178,11 @@ bool TestLoss2()
#endif // USE_CUDA
}
/* case 3: test LossCompute function
* In this case, Loss function name = ONEHOTERROR.
* loss = sum_{i} e_i
* where e_i = 0.5*(t_i - y_i)^2 if t_i = 1, e_i = 0 otherwise
/*
case 3: test LossCompute function
In this case, Loss function name = ONEHOTERROR.
loss = sum_{i} e_i
where e_i = 0.5*(t_i - y_i)^2 if t_i = 1, e_i = 0 otherwise
*/
bool
TestLoss3
()
{
...
...
@@ -191,16 +195,16 @@ bool TestLoss3()
int
unitNum
=
1
;
for
(
int
i
=
0
;
i
<
order
;
i
++
)
unitNum
*=
dimSize
[
i
];
DTYPE
outputData
[
5
][
1
]
=
{
{
0.5
},
{
0.5
},
{
0.5
},
{
0.5
},
{
0.5
}
};
DTYPE
goldData
[
5
][
1
]
=
{
{
1.0
},
{
1.0
},
{
0.0
},
{
0.0
},
{
0.0
}
};
DTYPE
outputData
[
5
][
1
]
=
{
{
0.5
F
},
{
0.5
F
},
{
0.5
F
},
{
0.5
F
},
{
0.5
F
}
};
DTYPE
goldData
[
5
][
1
]
=
{
{
1.0
F
},
{
1.0
F
},
{
0.0
F
},
{
0.0
F
},
{
0.0
F
}
};
/* CPU test */
bool
cpuTest
=
true
;
...
...
@@ -263,7 +267,6 @@ TODO!!
*/
/* test for Loss Function */
extern
"C"
bool
TestLoss
()
{
XPRINT
(
0
,
stdout
,
"[TEST Loss] compute the loss
\n
"
);
...
...
source/test/TMatrixMULBatchedCPU.cpp
查看文件 @
0887fae1
...
...
@@ -22,9 +22,10 @@
#include "TMatrixMULBatchedCPU.h"
namespace
nts
{
// namespace nts(NiuTrans.Tensor)
/* case 1: matrix multiplication in batch mode (CPU code).
* In this case, aList=2*(2, 3), bList=2*(3, 2) -> c=2*(2, 2),
* transposedA=X_NOTRANS, transposedB=X_NOTRANS.
/*
case 1: matrix multiplication in batch mode (CPU code).
In this case, aList=2*(2, 3), bList=2*(3, 2) -> c=2*(2, 2), transposedA=X_NOTRANS, transposedB=X_NOTRANS.
*/
bool
TestMatrixMulBatchedCPU1
()
{
...
...
@@ -63,20 +64,20 @@ bool TestMatrixMulBatchedCPU1()
for
(
int
i
=
0
;
i
<
cOrder
;
i
++
)
cUnitNum
*=
cDimSize
[
i
];
DTYPE
aData1
[
2
][
3
]
=
{
{
1.0
,
2.0
,
3.0
},
{
-
4.0
,
5.0
,
6.0
}
};
DTYPE
aData2
[
2
][
3
]
=
{
{
1.0
,
-
2.0
,
-
3.0
},
{
-
4.0
,
3.0
,
2.0
}
};
DTYPE
bData1
[
3
][
2
]
=
{
{
0.0
,
-
1.0
},
{
1.0
,
2.0
},
{
2.0
,
1.0
}
};
DTYPE
bData2
[
3
][
2
]
=
{
{
0.0
,
1.0
},
{
3.0
,
2.0
},
{
2.0
,
1.0
}
};
DTYPE
answer1
[
2
][
2
]
=
{
{
8.0
,
6.0
},
{
17.0
,
20.0
}
};
DTYPE
answer2
[
2
][
2
]
=
{
{
-
12.0
,
-
6.0
},
{
13.0
,
4.0
}
};
DTYPE
aData1
[
2
][
3
]
=
{
{
1.0
F
,
2.0
F
,
3.0
F
},
{
-
4.0
F
,
5.0
F
,
6.0
F
}
};
DTYPE
aData2
[
2
][
3
]
=
{
{
1.0
F
,
-
2.0
F
,
-
3.0
F
},
{
-
4.0
F
,
3.0
F
,
2.0
F
}
};
DTYPE
bData1
[
3
][
2
]
=
{
{
0.0
F
,
-
1.0
F
},
{
1.0
F
,
2.0
F
},
{
2.0
F
,
1.0
F
}
};
DTYPE
bData2
[
3
][
2
]
=
{
{
0.0
F
,
1.0
F
},
{
3.0
F
,
2.0
F
},
{
2.0
F
,
1.0
F
}
};
DTYPE
answer1
[
2
][
2
]
=
{
{
8.0
F
,
6.0
F
},
{
17.0
F
,
20.0
F
}
};
DTYPE
answer2
[
2
][
2
]
=
{
{
-
12.0
F
,
-
6.0
F
},
{
13.0
F
,
4.0
F
}
};
/* CPU test */
bool
cpuTest
=
true
;
...
...
source/test/TMatrixMul.cpp
查看文件 @
0887fae1
...
...
@@ -22,9 +22,11 @@
#include "TMatrixMul.h"
namespace
nts
{
// namespace nts(NiuTrans.Tensor)
/* case 1: matrix multiplication.
* In this case, a=(2, 3), b=(3, 2) -> c=(2, 2),
* transposedA=X_NOTRANS, transposedB=X_NOTRANS.
/*
case 1: matrix multiplication.
In this case, a=(2, 3), b=(3, 2) -> c=(2, 2),
transposedA=X_NOTRANS, transposedB=X_NOTRANS.
*/
bool
TestMatrixMul1
()
{
...
...
@@ -58,13 +60,13 @@ bool TestMatrixMul1()
for
(
int
i
=
0
;
i
<
tOrder
;
i
++
)
tUnitNum
*=
tDimSize
[
i
];
DTYPE
sData1
[
2
][
3
]
=
{
{
1.0
,
2.0
,
3.0
},
{
-
4.0
,
5.0
,
6.0
}
};
DTYPE
sData2
[
3
][
2
]
=
{
{
0.0
,
-
1.0
},
{
1.0
,
2.0
},
{
2.0
,
1.0
}
};
DTYPE
answer
[
2
][
2
]
=
{
{
8.0
,
6.0
},
{
17.0
,
20.0
}
};
DTYPE
sData1
[
2
][
3
]
=
{
{
1.0
F
,
2.0
F
,
3.0
F
},
{
-
4.0
F
,
5.0
F
,
6.0
F
}
};
DTYPE
sData2
[
3
][
2
]
=
{
{
0.0
F
,
-
1.0
F
},
{
1.0
F
,
2.0
F
},
{
2.0
F
,
1.0
F
}
};
DTYPE
answer
[
2
][
2
]
=
{
{
8.0
F
,
6.0
F
},
{
17.0
F
,
20.0
F
}
};
/* CPU test */
bool
cpuTest
=
true
;
...
...
@@ -130,9 +132,10 @@ bool TestMatrixMul1()
#endif // USE_CUDA
}
/* case 2: matrix multiplication.
* In this case, a=(3, 2), b=(3, 2) -> c=(2, 2),
* transposedA=X_TRANS, transposedB=X_NOTRANS.
/*
case 2: matrix multiplication.
In this case, a=(3, 2), b=(3, 2) -> c=(2, 2),
transposedA=X_TRANS, transposedB=X_NOTRANS.
*/
bool
TestMatrixMul2
()
{
...
...
@@ -166,14 +169,14 @@ bool TestMatrixMul2()
for
(
int
i
=
0
;
i
<
tOrder
;
i
++
)
tUnitNum
*=
tDimSize
[
i
];
DTYPE
sData1
[
3
][
2
]
=
{
{
1.0
,
-
4.0
},
{
2.0
,
5.0
},
{
3.0
,
6.0
}
};
DTYPE
sData2
[
3
][
2
]
=
{
{
0.0
,
-
1.0
},
{
1.0
,
2.0
},
{
2.0
,
1.0
}
};
DTYPE
answer
[
2
][
2
]
=
{
{
8.0
,
6.0
},
{
17.0
,
20.0
}
};
DTYPE
sData1
[
3
][
2
]
=
{
{
1.0
F
,
-
4.0
F
},
{
2.0
F
,
5.0
F
},
{
3.0
F
,
6.0
F
}
};
DTYPE
sData2
[
3
][
2
]
=
{
{
0.0
F
,
-
1.0
F
},
{
1.0
F
,
2.0
F
},
{
2.0
F
,
1.0
F
}
};
DTYPE
answer
[
2
][
2
]
=
{
{
8.0
F
,
6.0
F
},
{
17.0
F
,
20.0
F
}
};
/* CPU test */
bool
cpuTest
=
true
;
...
...
@@ -239,9 +242,10 @@ bool TestMatrixMul2()
#endif // USE_CUDA
}
/* case 3: matrix multiplication.
* In this case, a=(3, 2, 3), b=(2, 3, 2) -> c=(3, 2, 2, 2),
* transposedA=X_NOTRANS, transposedB=X_NOTRANS.
/*
case 3: matrix multiplication.
In this case, a=(3, 2, 3), b=(2, 3, 2) -> c=(3, 2, 2, 2),
transposedA=X_NOTRANS, transposedB=X_NOTRANS.
*/
bool
TestMatrixMul3
()
{
...
...
@@ -279,30 +283,30 @@ bool TestMatrixMul3()
for
(
int
i
=
0
;
i
<
tOrder
;
i
++
)
tUnitNum
*=
tDimSize
[
i
];
DTYPE
sData1
[
3
][
2
][
3
]
=
{
{
{
0.0
,
-
1.0
,
2.0
},
{
2.0
,
1.0
,
3.0
}
},
{
{
1.0
,
2.0
,
4.0
},
{
3.0
,
1.0
,
2.0
}},
{
{
-
1.0
,
3.0
,
2.0
},
{
1.0
,
-
1.0
,
0.0
}
}
};
DTYPE
sData2
[
2
][
3
][
2
]
=
{
{
{
1.0
,
2.0
},
{
-
4.0
,
3.0
},
{
2.0
,
6.0
}
},
{
{
1.0
,
2.0
},
{
3.0
,
4.0
},
{
5.0
,
6.0
}
}
};
DTYPE
answer
[
3
][
2
][
2
][
2
]
=
{
{
{
{
8.0
,
9.0
},
{
4.0
,
25.0
}
},
{
{
7.0
,
8.0
},
{
20.0
,
26.0
}
}
},
{
{
{
1.0
,
32.0
},
{
3.0
,
21.0
}
},
{
{
27.0
,
34.0
},
{
16.0
,
22.0
}
}
},
{
{
{
-
9.0
,
19.0
},
{
5.0
,
-
1.0
}
},
{
{
18.0
,
22.0
},
{
-
2.0
,
-
2.0
}
}
}
};
DTYPE
sData1
[
3
][
2
][
3
]
=
{
{
{
0.0
F
,
-
1.0
F
,
2.0
F
},
{
2.0
F
,
1.0
F
,
3.0
F
}
},
{
{
1.0
F
,
2.0
F
,
4.0
F
},
{
3.0
F
,
1.0
F
,
2.0
F
}},
{
{
-
1.0
F
,
3.0
F
,
2.0
F
},
{
1.0
F
,
-
1.0
F
,
0.0
F
}
}
};
DTYPE
sData2
[
2
][
3
][
2
]
=
{
{
{
1.0
F
,
2.0
F
},
{
-
4.0
F
,
3.0
F
},
{
2.0
F
,
6.0
F
}
},
{
{
1.0
F
,
2.0
F
},
{
3.0
F
,
4.0
F
},
{
5.0
F
,
6.0
F
}
}
};
DTYPE
answer
[
3
][
2
][
2
][
2
]
=
{
{
{
{
8.0
F
,
9.0
F
},
{
4.0
F
,
25.0
F
}
},
{
{
7.0
F
,
8.0
F
},
{
20.0
F
,
26.0
F
}
}
},
{
{
{
1.0
F
,
32.0
F
},
{
3.0
F
,
21.0
F
}
},
{
{
27.0
F
,
34.0
F
},
{
16.0
F
,
22.0
F
}
}
},
{
{
{
-
9.0
F
,
19.0
F
},
{
5.0
F
,
-
1.0
F
}
},
{
{
18.0
F
,
22.0
F
},
{
-
2.0
F
,
-
2.0
F
}
}
}
};
/* CPU test */
bool
cpuTest
=
true
;
...
...
@@ -368,9 +372,10 @@ bool TestMatrixMul3()
#endif // USE_CUDA
}
/* case 4: matrix multiplication.
* In this case, a=(3, 2, 3), b=(3, 2) -> c=(3, 2, 2),
* transposedA=X_NOTRANS, transposedB=X_NOTRANS.
/*
case 4: matrix multiplication.
In this case, a=(3, 2, 3), b=(3, 2) -> c=(3, 2, 2),
transposedA=X_NOTRANS, transposedB=X_NOTRANS.
*/
bool
TestMatrixMul4
()
{
...
...
@@ -406,21 +411,21 @@ bool TestMatrixMul4()
for
(
int
i
=
0
;
i
<
tOrder
;
i
++
)
tUnitNum
*=
tDimSize
[
i
];
DTYPE
sData1
[
3
][
2
][
3
]
=
{
{
{
0.0
,
-
1.0
,
2.0
},
{
2.0
,
1.0
,
3.0
}
},
{
{
1.0
,
2.0
,
4.0
},
{
3.0
,
1.0
,
2.0
}},
{
{
-
1.0
,
3.0
,
2.0
},
{
1.0
,
-
1.0
,
0.0
}
}
};
DTYPE
sData2
[
3
][
2
]
=
{
{
1.0
,
2.0
},
{
3.0
,
4.0
},
{
5.0
,
6.0
}
};
DTYPE
answer
[
3
][
2
][
2
]
=
{
{
{
7.0
,
8.0
},
{
20.0
,
26.0
}
},
{
{
27.0
,
34.0
},
{
16.0
,
22.0
}
},
{
{
18.0
,
22.0
},
{
-
2.0
,
-
2.0
}
}
};
DTYPE
sData1
[
3
][
2
][
3
]
=
{
{
{
0.0
F
,
-
1.0
F
,
2.0
F
},
{
2.0
F
,
1.0
F
,
3.0
F
}
},
{
{
1.0
F
,
2.0
F
,
4.0
F
},
{
3.0
F
,
1.0
F
,
2.0
F
}},
{
{
-
1.0
F
,
3.0
F
,
2.0
F
},
{
1.0
F
,
-
1.0
F
,
0.0
F
}
}
};
DTYPE
sData2
[
3
][
2
]
=
{
{
1.0
F
,
2.0
F
},
{
3.0
F
,
4.0
F
},
{
5.0
F
,
6.0
F
}
};
DTYPE
answer
[
3
][
2
][
2
]
=
{
{
{
7.0
F
,
8.0
F
},
{
20.0
F
,
26.0
F
}
},
{
{
27.0
F
,
34.0
F
},
{
16.0
F
,
22.0
F
}
},
{
{
18.0
F
,
22.0
F
},
{
-
2.0
F
,
-
2.0
F
}
}
};
/* CPU test */
bool
cpuTest
=
true
;
...
...
@@ -493,7 +498,6 @@ bool TestMatrixMul4()
*/
/* test for MatrixMul Function */
extern
"C"
bool
TestMatrixMul
()
{
XPRINT
(
0
,
stdout
,
"[TEST MATRIXMUL] matrix multiplication
\n
"
);
...
...
source/test/TMatrixMul2D.cpp
查看文件 @
0887fae1
...
...
@@ -22,9 +22,11 @@
#include "TMatrixMul2D.h"
namespace
nts
{
// namespace nts(NiuTrans.Tensor)
/* case 1: matrix multiplication (for 2d tensors).
* In this case, a=(2, 3), b=(3, 2) -> c=(2, 2),
* transposedA=X_NOTRANS, transposedB=X_NOTRANS.
/*
case 1: matrix multiplication (for 2d tensors).
In this case, a=(2, 3), b=(3, 2) -> c=(2, 2),
transposedA=X_NOTRANS, transposedB=X_NOTRANS.
*/
bool
TestMatrixMul2D1
()
{
...
...
@@ -58,13 +60,13 @@ bool TestMatrixMul2D1()
for
(
int
i
=
0
;
i
<
tOrder
;
i
++
)
tUnitNum
*=
tDimSize
[
i
];
DTYPE
sData1
[
2
][
3
]
=
{
{
1.0
,
2.0
,
3.0
},
{
-
4.0
,
5.0
,
6.0
}
};
DTYPE
sData2
[
3
][
2
]
=
{
{
0.0
,
-
1.0
},
{
1.0
,
2.0
},
{
2.0
,
1.0
}
};
DTYPE
answer
[
2
][
2
]
=
{
{
8.0
,
6.0
},
{
17.0
,
20.0
}
};
DTYPE
sData1
[
2
][
3
]
=
{
{
1.0
F
,
2.0
F
,
3.0
F
},
{
-
4.0
F
,
5.0
F
,
6.0
F
}
};
DTYPE
sData2
[
3
][
2
]
=
{
{
0.0
F
,
-
1.0
F
},
{
1.0
F
,
2.0
F
},
{
2.0
F
,
1.0
F
}
};
DTYPE
answer
[
2
][
2
]
=
{
{
8.0
F
,
6.0
F
},
{
17.0
F
,
20.0
F
}
};
/* CPU test */
bool
cpuTest
=
true
;
...
...
@@ -130,9 +132,10 @@ bool TestMatrixMul2D1()
#endif // USE_CUDA
}
/* case 2: matrix multiplication (for 2d tensors).
* In this case, a=(3, 2), b=(3, 2) -> c=(2, 2),
* transposedA=X_TRANS, transposedB=X_NOTRANS.
/*
case 2: matrix multiplication (for 2d tensors).
In this case, a=(3, 2), b=(3, 2) -> c=(2, 2),
transposedA=X_TRANS, transposedB=X_NOTRANS.
*/
bool
TestMatrixMul2D2
()
{
...
...
@@ -166,14 +169,14 @@ bool TestMatrixMul2D2()
for
(
int
i
=
0
;
i
<
tOrder
;
i
++
)
tUnitNum
*=
tDimSize
[
i
];
DTYPE
sData1
[
3
][
2
]
=
{
{
1.0
,
-
4.0
},
{
2.0
,
5.0
},
{
3.0
,
6.0
}
};
DTYPE
sData2
[
3
][
2
]
=
{
{
0.0
,
-
1.0
},
{
1.0
,
2.0
},
{
2.0
,
1.0
}
};
DTYPE
answer
[
2
][
2
]
=
{
{
8.0
,
6.0
},
{
17.0
,
20.0
}
};
DTYPE
sData1
[
3
][
2
]
=
{
{
1.0
F
,
-
4.0
F
},
{
2.0
F
,
5.0
F
},
{
3.0
F
,
6.0
F
}
};
DTYPE
sData2
[
3
][
2
]
=
{
{
0.0
F
,
-
1.0
F
},
{
1.0
F
,
2.0
F
},
{
2.0
F
,
1.0
F
}
};
DTYPE
answer
[
2
][
2
]
=
{
{
8.0
F
,
6.0
F
},
{
17.0
F
,
20.0
F
}
};
/* CPU test */
bool
cpuTest
=
true
;
...
...
source/test/TMatrixMul2DParallel.cpp
查看文件 @
0887fae1
...
...
@@ -22,9 +22,11 @@
#include "TMatrixMul2DParallel.h"
namespace
nts
{
// namespace nts(NiuTrans.Tensor)
/* case 1: matrix multiplication (for 2d tensors) with multi-threading.
* In this case, a=(2, 3), b=(3, 2) -> c=(2, 2),
* transposedA=X_NOTRANS, transposedB=X_NOTRANS.
/*
case 1: matrix multiplication (for 2d tensors) with multi-threading.
In this case, a=(2, 3), b=(3, 2) -> c=(2, 2),
transposedA=X_NOTRANS, transposedB=X_NOTRANS.
*/
bool
TestMatrixMul2DParallel1
()
{
...
...
@@ -58,13 +60,13 @@ bool TestMatrixMul2DParallel1()
for
(
int
i
=
0
;
i
<
tOrder
;
i
++
)
tUnitNum
*=
tDimSize
[
i
];
DTYPE
sData1
[
2
][
3
]
=
{
{
1.0
,
2.0
,
3.0
},
{
-
4.0
,
5.0
,
6.0
}
};
DTYPE
sData2
[
3
][
2
]
=
{
{
0.0
,
-
1.0
},
{
1.0
,
2.0
},
{
2.0
,
1.0
}
};
DTYPE
answer
[
2
][
2
]
=
{
{
8.0
,
6.0
},
{
17.0
,
20.0
}
};
DTYPE
sData1
[
2
][
3
]
=
{
{
1.0
F
,
2.0
F
,
3.0
F
},
{
-
4.0
F
,
5.0
F
,
6.0
F
}
};
DTYPE
sData2
[
3
][
2
]
=
{
{
0.0
F
,
-
1.0
F
},
{
1.0
F
,
2.0
F
},
{
2.0
F
,
1.0
F
}
};
DTYPE
answer
[
2
][
2
]
=
{
{
8.0
F
,
6.0
F
},
{
17.0
F
,
20.0
F
}
};
/* CPU test */
bool
cpuTest
=
true
;
...
...
@@ -96,9 +98,10 @@ bool TestMatrixMul2DParallel1()
return
cpuTest
;
}
/* case 2: matrix multiplication (for 2d tensors) with multi-threading.
* In this case, a=(3, 2), b=(3, 2) -> c=(2, 2),
* transposedA=X_TRANS, transposedB=X_NOTRANS.
/*
case 2: matrix multiplication (for 2d tensors) with multi-threading.
In this case, a=(3, 2), b=(3, 2) -> c=(2, 2),
transposedA=X_TRANS, transposedB=X_NOTRANS.
*/
bool
TestMatrixMul2DParallel2
()
{
...
...
@@ -132,14 +135,14 @@ bool TestMatrixMul2DParallel2()
for
(
int
i
=
0
;
i
<
tOrder
;
i
++
)
tUnitNum
*=
tDimSize
[
i
];
DTYPE
sData1
[
3
][
2
]
=
{
{
1.0
,
-
4.0
},
{
2.0
,
5.0
},
{
3.0
,
6.0
}
};
DTYPE
sData2
[
3
][
2
]
=
{
{
0.0
,
-
1.0
},
{
1.0
,
2.0
},
{
2.0
,
1.0
}
};
DTYPE
answer
[
2
][
2
]
=
{
{
8.0
,
6.0
},
{
17.0
,
20.0
}
};
DTYPE
sData1
[
3
][
2
]
=
{
{
1.0
F
,
-
4.0
F
},
{
2.0
F
,
5.0
F
},
{
3.0
F
,
6.0
F
}
};
DTYPE
sData2
[
3
][
2
]
=
{
{
0.0
F
,
-
1.0
F
},
{
1.0
F
,
2.0
F
},
{
2.0
F
,
1.0
F
}
};
DTYPE
answer
[
2
][
2
]
=
{
{
8.0
F
,
6.0
F
},
{
17.0
F
,
20.0
F
}
};
/* CPU test */
bool
cpuTest
=
true
;
...
...
@@ -177,7 +180,6 @@ bool TestMatrixMul2DParallel2()
*/
/* test for MatrixMul2DParallel Function */
extern
"C"
bool
TestMatrixMul2DParallel
()
{
XPRINT
(
0
,
stdout
,
"[TEST MatrixMul2DParallel] matrix multiplication (for 2d tensors) with multi-threading
\n
"
);
...
...
source/test/TMatrixMulBatched.cpp
查看文件 @
0887fae1
...
...
@@ -22,9 +22,10 @@
#include "TMatrixMULBatched.h"
namespace
nts
{
// namespace nts(NiuTrans.Tensor)
/* case 1: matrix multiplication of the two tensors.
* In this case, a=(2, 3), b=(2, 3) -> c=(2, 2), transposedA=X_NOTRANS,
transposedB=X_NOTRANS.
/*
case 1: matrix multiplication of the two tensors.
In this case, a=(2, 3), b=(2, 3) -> c=(2, 2), transposedA=X_NOTRANS, transposedB=X_NOTRANS.
*/
bool
TestMatrixMulBatched1
()
{
...
...
@@ -58,13 +59,13 @@ bool TestMatrixMulBatched1()
for
(
int
i
=
0
;
i
<
tOrder
;
i
++
)
tUnitNum
*=
tDimSize
[
i
];
DTYPE
sData1
[
2
][
3
]
=
{
{
1.0
,
2.0
,
3.0
},
{
-
4.0
,
5.0
,
6.0
}
};
DTYPE
sData2
[
3
][
2
]
=
{
{
0.0
,
-
1.0
},
{
1.0
,
2.0
},
{
2.0
,
1.0
}
};
DTYPE
answer
[
2
][
2
]
=
{
{
8.0
,
6.0
},
{
17.0
,
20.0
}
};
DTYPE
sData1
[
2
][
3
]
=
{
{
1.0
F
,
2.0
F
,
3.0
F
},
{
-
4.0
F
,
5.0
F
,
6.0
F
}
};
DTYPE
sData2
[
3
][
2
]
=
{
{
0.0
F
,
-
1.0
F
},
{
1.0
F
,
2.0
F
},
{
2.0
F
,
1.0
F
}
};
DTYPE
answer
[
2
][
2
]
=
{
{
8.0
F
,
6.0
F
},
{
17.0
F
,
20.0
F
}
};
/* CPU test */
bool
cpuTest
=
true
;
...
...
@@ -130,9 +131,9 @@ bool TestMatrixMulBatched1()
#endif // USE_CUDA
}
/*
case 2: matrix multiplication of the two tensors.
* In this case, a=(2, 2, 3), b=(2, 3, 2) -> c=(2, 2, 2),
*
transposedA=X_NOTRANS, transposedB=X_NOTRANS.
/*
case 2: matrix multiplication of the two tensors.
In this case, a=(2, 2, 3), b=(2, 3, 2) -> c=(2, 2, 2),
transposedA=X_NOTRANS, transposedB=X_NOTRANS.
*/
bool
TestMatrixMulBatched2
()
{
...
...
@@ -169,20 +170,20 @@ bool TestMatrixMulBatched2()
for
(
int
i
=
0
;
i
<
tOrder
;
i
++
)
tUnitNum
*=
tDimSize
[
i
];
DTYPE
sData1
[
2
][
2
][
3
]
=
{
{
{
0.0
,
-
1.0
,
2.0
},
{
2.0
,
1.0
,
3.0
}
},
{
{
1.0
,
2.0
,
4.0
},
{
3.0
,
1.0
,
2.0
}
}
};
DTYPE
sData2
[
2
][
3
][
2
]
=
{
{
{
1.0
,
2.0
},
{
-
4.0
,
3.0
},
{
2.0
,
6.0
}
},
{
{
1.0
,
2.0
},
{
3.0
,
4.0
},
{
5.0
,
6.0
}
}
};
DTYPE
answer
[
2
][
2
][
2
]
=
{
{
{
8.0
,
9.0
},
{
4.0
,
25.0
}
},
{
{
27.0
,
34.0
},
{
16.0
,
22.0
}
}
};
DTYPE
sData1
[
2
][
2
][
3
]
=
{
{
{
0.0
F
,
-
1.0
F
,
2.0
F
},
{
2.0
F
,
1.0
F
,
3.0
F
}
},
{
{
1.0
F
,
2.0
F
,
4.0
F
},
{
3.0
F
,
1.0
F
,
2.0
F
}
}
};
DTYPE
sData2
[
2
][
3
][
2
]
=
{
{
{
1.0
F
,
2.0
F
},
{
-
4.0
F
,
3.0
F
},
{
2.0
F
,
6.0
F
}
},
{
{
1.0
F
,
2.0
F
},
{
3.0
F
,
4.0
F
},
{
5.0
F
,
6.0
F
}
}
};
DTYPE
answer
[
2
][
2
][
2
]
=
{
{
{
8.0
F
,
9.0
F
},
{
4.0
F
,
25.0
F
}
},
{
{
27.0
F
,
34.0
F
},
{
16.0
F
,
22.0
F
}
}
};
/* CPU test */
bool
cpuTest
=
true
;
...
...
@@ -254,7 +255,6 @@ bool TestMatrixMulBatched2()
*/
/* test for TestMatrixMulBatched Function */
extern
"C"
bool
TestMatrixMulBatched
()
{
XPRINT
(
0
,
stdout
,
"[TEST MATRIXMULBATCHED] matrix multiplication of the two tensors
\n
"
);
...
...
source/test/TMerge.cpp
查看文件 @
0887fae1
...
...
@@ -24,8 +24,10 @@
#include "TMerge.h"
namespace
nts
{
// namespace nts(NiuTrans.Tensor)
/* case 1: transform a tensor by merging it along with a dimension.
* In this case, (3, 2) -> (6), whereToMerge=1, leadingDim=0.
/*
case 1: transform a tensor by merging it along with a dimension.
In this case, (3, 2) -> (6), whereToMerge=1, leadingDim=0.
*/
bool
TestMerge1
()
{
...
...
@@ -48,9 +50,9 @@ bool TestMerge1()
for
(
int
i
=
0
;
i
<
tOrder
;
i
++
)
tUnitNum
*=
tDimSize
[
i
];
DTYPE
sData
[
2
][
3
]
=
{
{
0.0
,
1.0
,
2.0
},
{
3.0
,
4.0
,
5.0
}
};
DTYPE
answer
[
6
]
=
{
0.0
,
1.0
,
2.0
,
3.0
,
4.0
,
5.0
};
DTYPE
sData
[
2
][
3
]
=
{
{
0.0
F
,
1.0
F
,
2.0
F
},
{
3.0
F
,
4.0
F
,
5.0
F
}
};
DTYPE
answer
[
6
]
=
{
0.0
F
,
1.0
F
,
2.0
F
,
3.0
F
,
4.0
F
,
5.0
F
};
/* CPU test */
bool
cpuTest
=
true
;
...
...
@@ -107,8 +109,9 @@ bool TestMerge1()
#endif // USE_CUDA
}
/* case 2: transform a tensor by merging it along with a dimension.
* In this case,
/*
case 2: transform a tensor by merging it along with a dimension.
In this case,
(2, 2, 3) -> (4, 3), whereToMerge=1, leadingDim=0.
(2, 2, 3) -> (2, 6), whereToMerge=2, leadingDim=0.
*/
...
...
@@ -145,16 +148,16 @@ bool TestMerge2()
for
(
int
i
=
0
;
i
<
tOrder2
;
i
++
)
tUnitNum2
*=
tDimSize2
[
i
];
DTYPE
sData
[
2
][
2
][
3
]
=
{
{
{
0.0
,
1.0
,
2.0
},
{
4.0
,
5.0
,
6.0
}
},
{
{
-
1.0
,
2.0
,
3.0
},
{
-
4.0
,
-
5.0
,
-
6.0
}
}
};
DTYPE
answer1
[
4
][
3
]
=
{
{
0.0
,
1.0
,
2.0
},
{
4.0
,
5.0
,
6.0
},
{
-
1.0
,
2.0
,
3.0
},
{
-
4.0
,
-
5.0
,
-
6.0
}
};
DTYPE
answer2
[
2
][
6
]
=
{
{
0.0
,
1.0
,
2.0
,
-
1.0
,
2.0
,
3.0
},
{
4.0
,
5.0
,
6.0
,
-
4.0
,
-
5.0
,
-
6.0
}
};
DTYPE
sData
[
2
][
2
][
3
]
=
{
{
{
0.0
F
,
1.0
F
,
2.0
F
},
{
4.0
F
,
5.0
F
,
6.0
F
}
},
{
{
-
1.0
F
,
2.0
F
,
3.0
F
},
{
-
4.0
F
,
-
5.0
F
,
-
6.0
F
}
}
};
DTYPE
answer1
[
4
][
3
]
=
{
{
0.0
F
,
1.0
F
,
2.0
F
},
{
4.0
F
,
5.0
F
,
6.0
F
},
{
-
1.0
F
,
2.0
F
,
3.0
F
},
{
-
4.0
F
,
-
5.0
F
,
-
6.0
F
}
};
DTYPE
answer2
[
2
][
6
]
=
{
{
0.0
F
,
1.0
F
,
2.0
F
,
-
1.0
F
,
2.0
F
,
3.0
F
},
{
4.0
F
,
5.0
F
,
6.0
F
,
-
4.0
F
,
-
5.0
F
,
-
6.0
F
}
};
/* CPU test */
bool
cpuTest
=
true
;
...
...
@@ -222,7 +225,8 @@ bool TestMerge2()
#endif // USE_CUDA
}
/* case 3: merge small tensors into a big tensor.
/*
case 3: merge small tensors into a big tensor.
In this case, 2 * (2, 4) -> (4, 4), whereToMerge=0.
*/
bool
TestMerge3
()
...
...
@@ -240,10 +244,10 @@ bool TestMerge3()
for
(
int
i
=
0
;
i
<
sOrder
;
i
++
)
sUnitNum
*=
sDimSize
[
i
];
DTYPE
sData1
[
2
][
4
]
=
{
{
0.0
,
1.0
,
2.0
,
3.0
},
{
4.0
,
5.0
,
6.0
,
7.0
}
};
DTYPE
sData2
[
2
][
4
]
=
{
{
0.0
,
-
1.0
,
-
2.0
,
-
3.0
},
{
-
4.0
,
-
5.0
,
-
6.0
,
-
7.0
}
};
DTYPE
sData1
[
2
][
4
]
=
{
{
0.0
F
,
1.0
F
,
2.0
F
,
3.0
F
},
{
4.0
F
,
5.0
F
,
6.0
F
,
7.0
F
}
};
DTYPE
sData2
[
2
][
4
]
=
{
{
0.0
F
,
-
1.0
F
,
-
2.0
F
,
-
3.0
F
},
{
-
4.0
F
,
-
5.0
F
,
-
6.0
F
,
-
7.0
F
}
};
/* a target tensor of size (4, 4) */
int
tOrder
=
2
;
...
...
@@ -255,10 +259,10 @@ bool TestMerge3()
for
(
int
i
=
0
;
i
<
tOrder
;
i
++
)
tUnitNum
*=
tDimSize
[
i
];
DTYPE
answer
[
4
][
4
]
=
{
{
0.0
,
1.0
,
2.0
,
3.0
},
{
4.0
,
5.0
,
6.0
,
7.0
},
{
0.0
,
-
1.0
,
-
2.0
,
-
3.0
},
{
-
4.0
,
-
5.0
,
-
6.0
,
-
7.0
}
};
DTYPE
answer
[
4
][
4
]
=
{
{
0.0
F
,
1.0
F
,
2.0
F
,
3.0
F
},
{
4.0
F
,
5.0
F
,
6.0
F
,
7.0
F
},
{
0.0
F
,
-
1.0
F
,
-
2.0
F
,
-
3.0
F
},
{
-
4.0
F
,
-
5.0
F
,
-
6.0
F
,
-
7.0
F
}
};
/* CPU test */
bool
cpuTest
=
true
;
...
...
@@ -336,7 +340,8 @@ bool TestMerge3()
#endif // USE_CUDA
}
/* case 4: merge small tensors into a big tensor.
/*
case 4: merge small tensors into a big tensor.
In this case, 2 * (2, 4) -> (2, 8), whereToMerge=1.
*/
bool
TestMerge4
()
...
...
@@ -354,10 +359,10 @@ bool TestMerge4()
for
(
int
i
=
0
;
i
<
sOrder
;
i
++
)
sUnitNum
*=
sDimSize
[
i
];
DTYPE
sData1
[
2
][
4
]
=
{
{
0.0
,
1.0
,
2.0
,
3.0
},
{
4.0
,
5.0
,
6.0
,
7.0
}
};
DTYPE
sData2
[
2
][
4
]
=
{
{
0.0
,
-
1.0
,
-
2.0
,
-
3.0
},
{
-
4.0
,
-
5.0
,
-
6.0
,
-
7.0
}
};
DTYPE
sData1
[
2
][
4
]
=
{
{
0.0
F
,
1.0
F
,
2.0
F
,
3.0
F
},
{
4.0
F
,
5.0
F
,
6.0
F
,
7.0
F
}
};
DTYPE
sData2
[
2
][
4
]
=
{
{
0.0
F
,
-
1.0
F
,
-
2.0
F
,
-
3.0
F
},
{
-
4.0
F
,
-
5.0
F
,
-
6.0
F
,
-
7.0
F
}
};
/* a target tensor of size (4, 4) */
int
tOrder
=
2
;
...
...
@@ -369,8 +374,8 @@ bool TestMerge4()
for
(
int
i
=
0
;
i
<
tOrder
;
i
++
)
tUnitNum
*=
tDimSize
[
i
];
DTYPE
answer
[
2
][
8
]
=
{
{
0.0
,
1.0
,
2.0
,
3.0
,
0.0
,
-
1.0
,
-
2.0
,
-
3.0
},
{
4.0
,
5.0
,
6.0
,
7.0
,
-
4.0
,
-
5.0
,
-
6.0
,
-
7.0
}
};
DTYPE
answer
[
2
][
8
]
=
{
{
0.0
F
,
1.0
F
,
2.0
F
,
3.0
F
,
0.0
F
,
-
1.0
F
,
-
2.0
F
,
-
3.0
F
},
{
4.0
F
,
5.0
F
,
6.0
F
,
7.0
F
,
-
4.0
F
,
-
5.0
F
,
-
6.0
F
,
-
7.0
F
}
};
/* CPU test */
bool
cpuTest
=
true
;
...
...
@@ -454,7 +459,6 @@ bool TestMerge4()
*/
/* test for Merge Function */
extern
"C"
bool
TestMerge
()
{
XPRINT
(
0
,
stdout
,
"[TEST MERGE] transform a tensor by merging it alone with a dimension or merge small tensors into a big tensor
\n
"
);
...
...
source/test/TMultiplyElementWise.cpp
查看文件 @
0887fae1
...
...
@@ -22,9 +22,11 @@
#include "TMultiplyElementWise.h"
namespace
nts
{
// namespace nts(NiuTrans.Tensor)
/* case 1: element-wise product of two tensors
* c(i) = a(i)*b(i) + \alpha * c(i)
* In this case, (2, 1) (2, 1) -> (2, 1), leadingDim=0, alpha=0.
/*
case 1: element-wise product of two tensors
c(i) = a(i)*b(i) + \alpha * c(i)
In this case, (2, 1) (2, 1) -> (2, 1), leadingDim=0, alpha=0.
*/
bool
TestMultiplyElementWise1
()
{
...
...
@@ -58,12 +60,12 @@ bool TestMultiplyElementWise1()
for
(
int
i
=
0
;
i
<
tOrder
;
i
++
)
tUnitNum
*=
tDimSize
[
i
];
DTYPE
sData1
[
2
][
1
]
=
{
{
0.0
},
{
1.0
}
};
DTYPE
sData2
[
2
][
1
]
=
{
{
2.0
},
{
3.0
}
};
DTYPE
answer
[
2
][
1
]
=
{
{
0.0
},
{
3.0
}
};
DTYPE
sData1
[
2
][
1
]
=
{
{
0.0
F
},
{
1.0
F
}
};
DTYPE
sData2
[
2
][
1
]
=
{
{
2.0
F
},
{
3.0
F
}
};
DTYPE
answer
[
2
][
1
]
=
{
{
0.0
F
},
{
3.0
F
}
};
/* CPU test */
bool
cpuTest
=
true
;
...
...
@@ -129,9 +131,10 @@ bool TestMultiplyElementWise1()
#endif // USE_CUDA
}
/* case 2: element-wise product of two tensors
* c(i) = a(i)*b(i) + \alpha * c(i)
* In this case, (2, 2) (2, 2) -> (2, 2), leadingDim=0, alpha=0.
/*
case 2: element-wise product of two tensors
c(i) = a(i)*b(i) + \alpha * c(i)
In this case, (2, 2) (2, 2) -> (2, 2), leadingDim=0, alpha=0.
*/
bool
TestMultiplyElementWise2
()
{
...
...
@@ -165,12 +168,12 @@ bool TestMultiplyElementWise2()
for
(
int
i
=
0
;
i
<
tOrder
;
i
++
)
tUnitNum
*=
tDimSize
[
i
];
DTYPE
sData1
[
2
][
2
]
=
{
{
0.0
,
1.0
},
{
2.0
,
3.0
}
};
DTYPE
sData2
[
2
][
2
]
=
{
{
0.0
,
1.0
},
{
2.0
,
3.0
}
};
DTYPE
answer
[
2
][
2
]
=
{
{
0.0
,
1.0
},
{
4.0
,
9.0
}
};
DTYPE
sData1
[
2
][
2
]
=
{
{
0.0
F
,
1.0
F
},
{
2.0
F
,
3.0
F
}
};
DTYPE
sData2
[
2
][
2
]
=
{
{
0.0
F
,
1.0
F
},
{
2.0
F
,
3.0
F
}
};
DTYPE
answer
[
2
][
2
]
=
{
{
0.0
F
,
1.0
F
},
{
4.0
F
,
9.0
F
}
};
/* CPU test */
bool
cpuTest
=
true
;
...
...
@@ -236,8 +239,9 @@ bool TestMultiplyElementWise2()
#endif // USE_CUDA
}
/* case 3: element-wise product of two tensors, c(i) = a(i)*b(i) + \alpha * c(i)
* In this case, (2, 2) (2, 2) -> (2, 2), leadingDim=1, alpha=0.
/*
case 3: element-wise product of two tensors, c(i) = a(i)*b(i) + \alpha * c(i)
In this case, (2, 2) (2, 2) -> (2, 2), leadingDim=1, alpha=0.
*/
bool
TestMultiplyElementWise3
()
{
...
...
@@ -271,12 +275,12 @@ bool TestMultiplyElementWise3()
for
(
int
i
=
0
;
i
<
tOrder
;
i
++
)
tUnitNum
*=
tDimSize
[
i
];
DTYPE
sData1
[
2
][
2
]
=
{
{
0.0
,
1.0
},
{
2.0
,
3.0
}
};
DTYPE
sData2
[
2
][
2
]
=
{
{
0.0
,
1.0
},
{
2.0
,
3.0
}
};
DTYPE
answer
[
2
][
2
]
=
{
{
0.0
,
1.0
},
{
4.0
,
9.0
}
};
DTYPE
sData1
[
2
][
2
]
=
{
{
0.0
F
,
1.0
F
},
{
2.0
F
,
3.0
F
}
};
DTYPE
sData2
[
2
][
2
]
=
{
{
0.0
F
,
1.0
F
},
{
2.0
F
,
3.0
F
}
};
DTYPE
answer
[
2
][
2
]
=
{
{
0.0
F
,
1.0
F
},
{
4.0
F
,
9.0
F
}
};
/* CPU test */
bool
cpuTest
=
true
;
...
...
@@ -348,7 +352,6 @@ TODO!!
*/
/* test for MultiplyElementWise Function */
extern
"C"
bool
TestMultiplyElementWise
()
{
XPRINT
(
0
,
stdout
,
"[TEST MULTIPLYELEMENTWISE] element-wise product of two tensors
\n
"
);
...
...
source/test/TNegate.cpp
查看文件 @
0887fae1
...
...
@@ -22,6 +22,7 @@
#include "TNegate.h"
namespace
nts
{
// namespace nts(NiuTrans.Tensor)
/* case 1: set every entry to its minus value */
bool
TestNegate1
()
{
...
...
@@ -35,12 +36,12 @@ bool TestNegate1()
for
(
int
i
=
0
;
i
<
aOrder
;
i
++
)
aUnitNum
*=
aDimSize
[
i
];
DTYPE
aData
[
3
][
2
]
=
{
{
1.0
,
-
2.0
},
{
-
3.0
,
4.0
},
{
5.0
,
-
6.0
}
};
DTYPE
answer
[
3
][
2
]
=
{
{
-
1.0
,
2.0
},
{
3.0
,
-
4.0
},
{
-
5.0
,
6.0
}
};
DTYPE
aData
[
3
][
2
]
=
{
{
1.0
F
,
-
2.0
F
},
{
-
3.0
F
,
4.0
F
},
{
5.0
F
,
-
6.0
F
}
};
DTYPE
answer
[
3
][
2
]
=
{
{
-
1.0
F
,
2.0
F
},
{
3.0
F
,
-
4.0
F
},
{
-
5.0
F
,
6.0
F
}
};
/* CPU test */
bool
cpuTest
=
true
;
...
...
@@ -101,12 +102,12 @@ bool TestNegate2()
for
(
int
i
=
0
;
i
<
aOrder
;
i
++
)
aUnitNum
*=
aDimSize
[
i
];
DTYPE
aData
[
3
][
2
]
=
{
{
0.0
,
0.0
},
{
0.0
,
0.0
},
{
0.0
,
0.0
}
};
DTYPE
answer
[
3
][
2
]
=
{
{
-
0.0
,
-
0.0
},
{
-
0.0
,
-
0.0
},
{
-
0.0
,
-
0.0
}
};
DTYPE
aData
[
3
][
2
]
=
{
{
0.0
F
,
0.0
F
},
{
0.0
F
,
0.0
F
},
{
0.0
F
,
0.0
F
}
};
DTYPE
answer
[
3
][
2
]
=
{
{
-
0.0
F
,
-
0.0
F
},
{
-
0.0
F
,
-
0.0
F
},
{
-
0.0
F
,
-
0.0
F
}
};
/* CPU test */
bool
cpuTest
=
true
;
...
...
@@ -160,7 +161,6 @@ TODO!!
*/
/* test for Negate Function */
extern
"C"
bool
TestNegate
()
{
XPRINT
(
0
,
stdout
,
"[TEST NEGATE] set every entry to its minus value
\n
"
);
...
...
source/test/TNormalize.cpp
查看文件 @
0887fae1
...
...
@@ -22,10 +22,12 @@
#include "TNormalize.h"
namespace
nts
{
// namespace nts(NiuTrans.Tensor)
/* case 1: normalized the data with normal distribution
* For an input x, y = a * (x-mean)/sqrt(variance+\epsilon) + b.
* where a and b are the scalar and bias respectively,
* and \epsilon is the adjustment parameter.
/*
case 1: normalized the data with normal distribution
For an input x, y = a * (x-mean)/sqrt(variance+\epsilon) + b.
where a and b are the scalar and bias respectively,
and \epsilon is the adjustment parameter.
*/
bool
TestNormalize1
()
{
...
...
@@ -87,14 +89,14 @@ bool TestNormalize1()
for
(
int
i
=
0
;
i
<
bOrder
;
i
++
)
bUnitNum
*=
bDimSize
[
i
];
DTYPE
sData
[
2
][
3
]
=
{
{
1.0
,
2.0
,
3.0
},
{
1.5
,
2.5
,
3.5
}
};
DTYPE
meanData
[
3
]
=
{
1.0
,
1.5
,
2.0
};
DTYPE
varData
[
3
]
=
{
1.0
,
1.0
,
4.0
};
DTYPE
aData
[
2
][
3
]
=
{
{
1.0
,
1.0
,
1.0
},
{
1.0
,
1.0
,
1.0
}
};
DTYPE
answer
[
2
][
3
]
=
{
{
0.0
,
0.5
,
0.5
},
{
0.5
,
1.0
,
0.75
}
};
DTYPE
sData
[
2
][
3
]
=
{
{
1.0
F
,
2.0
F
,
3.0
F
},
{
1.5
F
,
2.5
F
,
3.5
F
}
};
DTYPE
meanData
[
3
]
=
{
1.0
F
,
1.5
F
,
2.0
F
};
DTYPE
varData
[
3
]
=
{
1.0
F
,
1.0
F
,
4.0
F
};
DTYPE
aData
[
2
][
3
]
=
{
{
1.0
F
,
1.0
F
,
1.0
F
},
{
1.0
F
,
1.0
F
,
1.0
F
}
};
DTYPE
answer
[
2
][
3
]
=
{
{
0.0
F
,
0.5
F
,
0.5
F
},
{
0.5
F
,
1.0
F
,
0.75
F
}
};
/* CPU test */
bool
cpuTest
=
true
;
...
...
@@ -116,7 +118,7 @@ bool TestNormalize1()
t
->
SetZeroAll
();
/* call normalize function */
Normalize
(
s
,
t
,
0
,
mean
,
var
,
a
,
b
,
0.0
);
Normalize
(
s
,
t
,
0
,
mean
,
var
,
a
,
b
,
0.0
F
);
/* check results */
cpuTest
=
t
->
CheckData
(
answer
,
tUnitNum
,
1e-4
,
0
);
...
...
@@ -142,7 +144,7 @@ bool TestNormalize1()
tGPU
->
SetZeroAll
();
/* call Normalize function */
Normalize
(
sGPU
,
tGPU
,
0
,
meanGPU
,
varGPU
,
aGPU
,
bGPU
,
0.0
);
Normalize
(
sGPU
,
tGPU
,
0
,
meanGPU
,
varGPU
,
aGPU
,
bGPU
,
0.0
F
);
/* check results */
gpuTest
=
tGPU
->
CheckData
(
answer
,
tUnitNum
,
1e-4
,
0
);
...
...
@@ -193,7 +195,6 @@ TODO!!
*/
/* test for Normalize Function */
extern
"C"
bool
TestNormalize
()
{
XPRINT
(
0
,
stdout
,
"[TEST NORMALIZE] normalized the data with normal distribution
\n
"
);
...
...
source/test/TPower.cpp
查看文件 @
0887fae1
...
...
@@ -23,8 +23,10 @@
#include "TPower.h"
namespace
nts
{
// namespace nts(NiuTrans.Tensor)
/* case 1: get the power(a, p)
* In this case, p=2.
/*
case 1: get the power(a, p)
In this case, p=2.
*/
bool
TestPower1
()
{
...
...
@@ -38,12 +40,12 @@ bool TestPower1()
for
(
int
i
=
0
;
i
<
aOrder
;
i
++
)
aUnitNum
*=
aDimSize
[
i
];
DTYPE
aData
[
3
][
2
]
=
{
{
1.0
,
2.0
},
{
3.0
,
4.0
},
{
5.0
,
6.0
}
};
DTYPE
answer
[
3
][
2
]
=
{
{
1.0
,
4.0
},
{
9.0
,
16.0
},
{
25.0
,
36.0
}
};
DTYPE
aData
[
3
][
2
]
=
{
{
1.0
F
,
2.0
F
},
{
3.0
F
,
4.0
F
},
{
5.0
F
,
6.0
F
}
};
DTYPE
answer
[
3
][
2
]
=
{
{
1.0
F
,
4.0
F
},
{
9.0
F
,
16.0
F
},
{
25.0
F
,
36.0
F
}
};
/* CPU test */
bool
cpuTest
=
true
;
...
...
@@ -55,7 +57,7 @@ bool TestPower1()
a
->
SetData
(
aData
,
aUnitNum
);
/* call Power function */
Power
(
a
,
2.0
);
Power
(
a
,
2.0
F
);
/* check results */
cpuTest
=
a
->
CheckData
(
answer
,
aUnitNum
,
1e-4
F
);
...
...
@@ -71,7 +73,7 @@ bool TestPower1()
aGPU
->
SetData
(
aData
,
aUnitNum
);
/* call power function */
Power
(
aGPU
,
2.0
);
Power
(
aGPU
,
2.0
F
);
/* check results */
gpuTest
=
aGPU
->
CheckData
(
answer
,
aUnitNum
,
1e-4
F
);
...
...
@@ -91,8 +93,9 @@ bool TestPower1()
#endif // USE_CUDA
}
/* case 2: get the power(a, p)
* In this case, p=1.
/*
case 2: get the power(a, p)
In this case, p=1.
*/
bool
TestPower2
()
{
...
...
@@ -106,12 +109,12 @@ bool TestPower2()
for
(
int
i
=
0
;
i
<
aOrder
;
i
++
)
aUnitNum
*=
aDimSize
[
i
];
DTYPE
aData
[
3
][
2
]
=
{
{
0.0
,
1.0
},
{
2.0
,
3.0
},
{
4.0
,
5.0
}
};
DTYPE
answer
[
3
][
2
]
=
{
{
0.0
,
1.0
},
{
2.0
,
3.0
},
{
4.0
,
5.0
}
};
DTYPE
aData
[
3
][
2
]
=
{
{
0.0
F
,
1.0
F
},
{
2.0
F
,
3.0
F
},
{
4.0
F
,
5.0
F
}
};
DTYPE
answer
[
3
][
2
]
=
{
{
0.0
F
,
1.0
F
},
{
2.0
F
,
3.0
F
},
{
4.0
F
,
5.0
F
}
};
/* CPU test */
bool
cpuTest
=
true
;
...
...
@@ -123,7 +126,7 @@ bool TestPower2()
a
->
SetData
(
aData
,
aUnitNum
);
/* call Power function */
Power
(
a
,
1.0
);
Power
(
a
,
1.0
F
);
/* check results */
cpuTest
=
a
->
CheckData
(
answer
,
aUnitNum
,
1e-4
F
);
...
...
@@ -139,7 +142,7 @@ bool TestPower2()
aGPU
->
SetData
(
aData
,
aUnitNum
);
/* call Power function */
Power
(
aGPU
,
1.0
);
Power
(
aGPU
,
1.0
F
);
/* check results */
gpuTest
=
aGPU
->
CheckData
(
answer
,
aUnitNum
,
1e-4
F
);
...
...
@@ -159,8 +162,9 @@ bool TestPower2()
#endif // USE_CUDA
}
/* case 3: get the power(a, p)
* In this case, p=0.
/*
case 3: get the power(a, p)
In this case, p=0.
*/
bool
TestPower3
()
{
...
...
@@ -174,12 +178,12 @@ bool TestPower3()
for
(
int
i
=
0
;
i
<
aOrder
;
i
++
)
aUnitNum
*=
aDimSize
[
i
];
DTYPE
aData
[
3
][
2
]
=
{
{
0.0
,
1.0
},
{
2.0
,
3.0
},
{
4.0
,
5.0
}
};
DTYPE
answer
[
3
][
2
]
=
{
{
1.0
,
1.0
},
{
1.0
,
1.0
},
{
1.0
,
1.0
}
};
DTYPE
aData
[
3
][
2
]
=
{
{
0.0
F
,
1.0
F
},
{
2.0
F
,
3.0
F
},
{
4.0
F
,
5.0
F
}
};
DTYPE
answer
[
3
][
2
]
=
{
{
1.0
F
,
1.0
F
},
{
1.0
F
,
1.0
F
},
{
1.0
F
,
1.0
F
}
};
/* CPU test */
bool
cpuTest
=
true
;
...
...
@@ -191,7 +195,7 @@ bool TestPower3()
a
->
SetData
(
aData
,
aUnitNum
);
/* call Power function */
Power
(
a
,
0.0
);
Power
(
a
,
0.0
F
);
/* check results */
cpuTest
=
a
->
CheckData
(
answer
,
aUnitNum
,
1e-4
F
);
...
...
@@ -207,7 +211,7 @@ bool TestPower3()
aGPU
->
SetData
(
aData
,
aUnitNum
);
/* call Power function */
Power
(
aGPU
,
0.0
);
Power
(
aGPU
,
0.0
F
);
/* check results */
gpuTest
=
aGPU
->
CheckData
(
answer
,
aUnitNum
,
1e-4
F
);
...
...
@@ -233,7 +237,6 @@ TODO!!
*/
/* test for Power Function */
extern
"C"
bool
TestPower
()
{
XPRINT
(
0
,
stdout
,
"[TEST POWER] get the power(a, p)
\n
"
);
...
...
source/test/TRectify.cpp
查看文件 @
0887fae1
...
...
@@ -22,8 +22,10 @@
#include "TRectify.h"
namespace
nts
{
// namespace nts(NiuTrans.Tensor)
/* case 1: test rectify function
* y = max(0, x)
/*
case 1: test rectify function
In this case, y = max(0, x)
*/
bool
TestRectify1
()
{
...
...
@@ -47,10 +49,10 @@ bool TestRectify1()
for
(
int
i
=
0
;
i
<
yOrder
;
i
++
)
yUnitNum
*=
yDimSize
[
i
];
DTYPE
xData
[
2
][
3
]
=
{
{
0.0
,
-
1.0
,
2.0
},
{
3.0
,
-
4.0
,
-
5.0
}
};
DTYPE
answer
[
2
][
3
]
=
{
{
0.0
,
0.0
,
2.0
},
{
3.0
,
0.0
,
0.0
}
};
DTYPE
xData
[
2
][
3
]
=
{
{
0.0
F
,
-
1.0
F
,
2.0
F
},
{
3.0
F
,
-
4.0
F
,
-
5.0
F
}
};
DTYPE
answer
[
2
][
3
]
=
{
{
0.0
F
,
0.0
F
,
2.0
F
},
{
3.0
F
,
0.0
F
,
0.0
F
}
};
/* CPU test */
bool
cpuTest
=
true
;
...
...
@@ -107,10 +109,11 @@ bool TestRectify1()
#endif // USE_CUDA
}
/* case 2: backward computation
* dE/dx = dE/dy * dy/dx
* rectified: y = max(0, x)
* In this case, lossName=CROSSENTROPY.
/*
case 2: backward computation
dE/dx = dE/dy * dy/dx
rectified: y = max(0, x)
In this case, lossName=CROSSENTROPY.
*/
bool
TestRectify2
()
{
...
...
@@ -124,16 +127,16 @@ bool TestRectify2()
for
(
int
i
=
0
;
i
<
xOrder
;
i
++
)
xUnitNum
*=
xDimSize
[
i
];
DTYPE
xData
[
2
][
3
]
=
{
{
1.0
,
1.0
,
2.0
},
{
2.0
,
4.0
,
5.0
}
};
DTYPE
yData
[
2
][
3
]
=
{
{
1.0
,
1.0
,
2.0
},
{
2.0
,
4.0
,
5.0
}
};
DTYPE
goldData
[
2
][
3
]
=
{
{
1.0
,
1.0
,
1.0
},
{
1.0
,
1.0
,
1.0
}
};
DTYPE
dedyData
[
2
][
3
]
=
{
{
-
1.0
,
-
1.0
,
-
0.5
},
{
-
0.5
,
-
0.25
,
-
0.2
}
};
DTYPE
answer
[
2
][
3
]
=
{
{
-
1.0
,
-
1.0
,
-
0.5
},
{
-
0.5
,
-
0.25
,
-
0.2
}
};
DTYPE
xData
[
2
][
3
]
=
{
{
1.0
F
,
1.0
F
,
2.0
F
},
{
2.0
F
,
4.0
F
,
5.0
F
}
};
DTYPE
yData
[
2
][
3
]
=
{
{
1.0
F
,
1.0
F
,
2.0
F
},
{
2.0
F
,
4.0
F
,
5.0
F
}
};
DTYPE
goldData
[
2
][
3
]
=
{
{
1.0
F
,
1.0
F
,
1.0
F
},
{
1.0
F
,
1.0
F
,
1.0
F
}
};
DTYPE
dedyData
[
2
][
3
]
=
{
{
-
1.0
F
,
-
1.0
F
,
-
0.5
F
},
{
-
0.5
F
,
-
0.25
F
,
-
0.2
F
}
};
DTYPE
answer
[
2
][
3
]
=
{
{
-
1.0
F
,
-
1.0
F
,
-
0.5
F
},
{
-
0.5
F
,
-
0.25
F
,
-
0.2
F
}
};
/* CPU test */
bool
cpuTest
=
true
;
...
...
@@ -215,7 +218,6 @@ TODO!!
*/
/* test for Rectify Function */
extern
"C"
bool
TestRectify
()
{
XPRINT
(
0
,
stdout
,
"[TEST RECTIFY] test rectify and its backward computation
\n
"
);
...
...
source/test/TReduceMax.cpp
查看文件 @
0887fae1
...
...
@@ -22,8 +22,10 @@
#include "TReduceMax.h"
namespace
nts
{
// namespace nts(NiuTrans.Tensor)
/* case 1: get the max value of the items along a dimension of the tensor.
* In this case,
/*
case 1: get the max value of the items along a dimension of the tensor.
In this case,
(2, 4) -> (4), dim = 0
(2, 4) -> (2), dim = 1
*/
...
...
@@ -57,10 +59,10 @@ bool TestReduceMax1()
for
(
int
i
=
0
;
i
<
tOrder2
;
i
++
)
tUnitNum2
*=
tDimSize2
[
i
];
DTYPE
sData
[
2
][
4
]
=
{
{
0.0
,
5.0
,
2.0
,
3.0
},
{
4.0
,
1.0
,
6.0
,
7.0
}
};
DTYPE
answer1
[
4
]
=
{
4.0
,
5.0
,
6.0
,
7.0
};
DTYPE
answer2
[
2
]
=
{
5.0
,
7.0
};
DTYPE
sData
[
2
][
4
]
=
{
{
0.0
F
,
5.0
F
,
2.0
F
,
3.0
F
},
{
4.0
F
,
1.0
F
,
6.0
F
,
7.0
F
}
};
DTYPE
answer1
[
4
]
=
{
4.0
F
,
5.0
F
,
6.0
F
,
7.0
F
};
DTYPE
answer2
[
2
]
=
{
5.0
F
,
7.0
F
};
/* CPU test */
bool
cpuTest
=
true
;
...
...
@@ -134,7 +136,6 @@ TODO!!
*/
/* test for ReduceMax Function */
extern
"C"
bool
TestReduceMax
()
{
XPRINT
(
0
,
stdout
,
"[TEST ReduceMax] get the max value of the items along a dimension of the tensor
\n
"
);
...
...
source/test/TReduceMean.cpp
查看文件 @
0887fae1
...
...
@@ -22,6 +22,7 @@
#include "TReduceMean.h"
namespace
nts
{
// namespace nt(NiuTrans.Tensor)
/* case 1: get the mean value along a dimension of the tensor */
bool
TestReduceMean1
()
{
...
...
@@ -53,10 +54,10 @@ bool TestReduceMean1()
for
(
int
i
=
0
;
i
<
tOrder2
;
i
++
)
tUnitNum2
*=
tDimSize2
[
i
];
DTYPE
sData
[
2
][
4
]
=
{
{
0.0
,
1.0
,
2.0
,
3.0
},
{
4.0
,
5.0
,
6.0
,
7.0
}
};
DTYPE
answer1
[
4
]
=
{
2.0
,
3.0
,
4.0
,
5.0
};
DTYPE
answer2
[
2
]
=
{
1.5
,
5.5
};
DTYPE
sData
[
2
][
4
]
=
{
{
0.0
F
,
1.0
F
,
2.0
F
,
3.0
F
},
{
4.0
F
,
5.0
F
,
6.0
F
,
7.0
F
}
};
DTYPE
answer1
[
4
]
=
{
2.0
F
,
3.0
F
,
4.0
F
,
5.0
F
};
DTYPE
answer2
[
2
]
=
{
1.5
F
,
5.5
F
};
/* CPU test */
bool
cpuTest
=
true
;
...
...
@@ -124,104 +125,12 @@ bool TestReduceMean1()
#endif // USE_CUDA
}
bool
TestReduceMeanForLargescale
()
{
/* a tensor of size 10000 * 500 */
int
order
=
2
;
int
order_reduce
=
1
;
int
*
dimSize
=
new
int
[
order
];
dimSize
[
0
]
=
10000
;
dimSize
[
1
]
=
500
;
int
unitNum
=
1
;
for
(
int
i
=
0
;
i
<
order
;
i
++
)
unitNum
*=
dimSize
[
i
];
/* a tensor of size 500 */
int
*
dimSize_reduce_a
=
new
int
[
order_reduce
];
dimSize_reduce_a
[
0
]
=
500
;
int
unitNum_a
=
1
;
for
(
int
i
=
0
;
i
<
order_reduce
;
i
++
)
unitNum_a
*=
dimSize_reduce_a
[
i
];
/* a tensor of size 10000 */
int
*
dimSize_reduce_b
=
new
int
[
order_reduce
];
dimSize_reduce_b
[
0
]
=
10000
;
int
unitNum_b
=
1
;
for
(
int
i
=
0
;
i
<
order_reduce
;
i
++
)
unitNum_b
*=
dimSize_reduce_b
[
i
];
DTYPE
*
data
=
new
DTYPE
[
5000000
];
DTYPE
*
tmp
=
data
;
for
(
int
i
=
0
;
i
<
unitNum
;
i
++
)
*
tmp
++
=
1
;
DTYPE
answer_a
[
500
];
for
(
int
i
=
0
;
i
<
unitNum_a
;
i
++
)
answer_a
[
i
]
=
1
;
DTYPE
answer_b
[
10000
];
for
(
int
i
=
0
;
i
<
unitNum_b
;
i
++
)
answer_b
[
i
]
=
1
;
/* CPU test */
bool
cpuTest
=
true
;
/* create tensors */
XTensor
*
a
=
NewTensor
(
order
,
dimSize
);
XTensor
*
reduce_a
=
NewTensor
(
order_reduce
,
dimSize_reduce_a
);
XTensor
*
b
=
NewTensor
(
order
,
dimSize
);
XTensor
*
reduce_b
=
NewTensor
(
order_reduce
,
dimSize_reduce_b
);
/* initialize variables */
a
->
SetData
(
data
,
unitNum
);
b
->
SetData
(
data
,
unitNum
);
/* call reduce max function */
ReduceMean
(
a
,
reduce_a
,
0
);
ReduceMean
(
b
,
reduce_b
,
1
);
/* check results */
cpuTest
=
reduce_a
->
CheckData
(
answer_a
,
unitNum_a
)
&&
reduce_b
->
CheckData
(
answer_b
,
unitNum_b
);
#ifdef USE_CUDA
/* GPU test */
bool
gpuTest
=
true
;
/* create tensor */
XTensor
*
aGPU
=
NewTensor
(
order
,
dimSize
,
X_FLOAT
);
XTensor
*
reduce_aGPU
=
NewTensor
(
order_reduce
,
dimSize_reduce_a
,
X_FLOAT
);
XTensor
*
bGPU
=
NewTensor
(
order
,
dimSize
,
X_FLOAT
);
XTensor
*
reduce_bGPU
=
NewTensor
(
order_reduce
,
dimSize_reduce_b
,
X_FLOAT
);
/* Initialize variables */
aGPU
->
SetData
(
data
,
unitNum
);
bGPU
->
SetData
(
data
,
unitNum
);
/* call reduce max function */
ReduceMean
(
aGPU
,
reduce_aGPU
,
0
);
ReduceMean
(
bGPU
,
reduce_bGPU
,
1
);
/* check results */
gpuTest
=
reduce_aGPU
->
CheckData
(
answer_a
,
unitNum_a
)
&&
reduce_bGPU
->
CheckData
(
answer_b
,
unitNum_b
);
/* destroy variables */
delete
aGPU
,
bGPU
,
reduce_aGPU
,
reduce_bGPU
;
delete
[]
dimSize
,
dimSize_reduce_a
,
dimSize_reduce_b
;
return
cpuTest
&&
gpuTest
;
#else
/* destroy variables */
delete
a
;
delete
b
;
return
cpuTest
;
#endif // USE_CUDA
}
/* other cases */
/*
TODO!!
*/
/* test for ReduceMean Function */
extern
"C"
bool
TestReduceMean
()
{
XPRINT
(
0
,
stdout
,
"[TEST ReduceMean] get the mean value along a dimension of the tensor
\n
"
);
...
...
@@ -236,15 +145,6 @@ bool TestReduceMean()
else
XPRINT
(
0
,
stdout
,
">> case 1 passed!
\n
"
);
/* case 2 test */
caseFlag
=
TestReduceMeanForLargescale
();
if
(
!
caseFlag
)
{
returnFlag
=
false
;
XPRINT
(
0
,
stdout
,
">> case 2 failed!
\n
"
);
}
else
XPRINT
(
0
,
stdout
,
">> case 2 passed!
\n
"
);
///* other cases test */
///*
//TODO!!
...
...
source/test/TReduceMean.h
查看文件 @
0887fae1
...
...
@@ -24,13 +24,13 @@
#include "../core/ReduceMean.h"
namespace
nts
{
// namespace nt(NiuTrans.Tensor)
namespace
nts
{
// namespace nt
s
(NiuTrans.Tensor)
/* test for ReduceMean Function */
extern
"C"
bool
TestReduceMean
();
}
// namespace nt(NiuTrans.Tensor)
}
// namespace nt
s
(NiuTrans.Tensor)
#endif // __TEST_REDUCEMEAN_H__
source/test/TReduceSum.cpp
查看文件 @
0887fae1
...
...
@@ -22,8 +22,10 @@
#include "TReduceSum.h"
namespace
nts
{
// namespace nts(NiuTrans.Tensor)
/* case 1: sum the items along a dimension of the tensor.
* In this case,
/*
case 1: sum the items along a dimension of the tensor.
In this case,
(2, 4) -> (4), dim = 0
(2, 4) -> (2), dim = 1
*/
...
...
@@ -57,10 +59,10 @@ bool TestReduceSum1()
for
(
int
i
=
0
;
i
<
tOrder2
;
i
++
)
tUnitNum2
*=
tDimSize2
[
i
];
DTYPE
sData
[
2
][
4
]
=
{
{
0.0
,
1.0
,
2.0
,
3.0
},
{
4.0
,
5.0
,
6.0
,
7.0
}
};
DTYPE
answer1
[
4
]
=
{
4.0
,
6.0
,
8.0
,
10.0
};
DTYPE
answer2
[
2
]
=
{
6.0
,
22.0
};
DTYPE
sData
[
2
][
4
]
=
{
{
0.0
F
,
1.0
F
,
2.0
F
,
3.0
F
},
{
4.0
F
,
5.0
F
,
6.0
F
,
7.0
F
}
};
DTYPE
answer1
[
4
]
=
{
4.0
F
,
6.0
F
,
8.0
F
,
10.0
F
};
DTYPE
answer2
[
2
]
=
{
6.0
F
,
22.0
F
};
/* CPU test */
bool
cpuTest
=
true
;
...
...
@@ -128,103 +130,12 @@ bool TestReduceSum1()
#endif // USE_CUDA
}
bool
TestReduceSumForLargescale
()
{
/* a tensor of size 10000 * 500 */
int
order
=
2
;
int
orderReduce
=
1
;
int
*
dimSize
=
new
int
[
order
];
dimSize
[
0
]
=
10000
;
dimSize
[
1
]
=
500
;
int
unitNum
=
1
;
for
(
int
i
=
0
;
i
<
order
;
i
++
)
unitNum
*=
dimSize
[
i
];
/* a tensor of size 500 */
int
*
dimSize_reduce_a
=
new
int
[
orderReduce
];
dimSize_reduce_a
[
0
]
=
500
;
int
unitNum_a
=
1
;
for
(
int
i
=
0
;
i
<
orderReduce
;
i
++
)
unitNum_a
*=
dimSize_reduce_a
[
i
];
/* a tensor of size 10000 */
int
*
dimSize_reduce_b
=
new
int
[
orderReduce
];
dimSize_reduce_b
[
0
]
=
10000
;
int
unitNum_b
=
1
;
for
(
int
i
=
0
;
i
<
orderReduce
;
i
++
)
unitNum_b
*=
dimSize_reduce_b
[
i
];
DTYPE
*
data
=
new
DTYPE
[
5000000
];
DTYPE
*
tmp
=
data
;
for
(
int
i
=
0
;
i
<
unitNum
;
i
++
)
*
tmp
++
=
1
;
DTYPE
answer_a
[
500
];
for
(
int
i
=
0
;
i
<
unitNum_a
;
i
++
)
answer_a
[
i
]
=
10000
;
DTYPE
answer_b
[
10000
];
for
(
int
i
=
0
;
i
<
unitNum_b
;
i
++
)
answer_b
[
i
]
=
500
;
/* CPU test */
bool
cpuTest
=
true
;
/* create tensors */
XTensor
*
a
=
NewTensor
(
order
,
dimSize
);
XTensor
*
reduce_a
=
NewTensor
(
orderReduce
,
dimSize_reduce_a
);
XTensor
*
b
=
NewTensor
(
order
,
dimSize
);
XTensor
*
reduce_b
=
NewTensor
(
orderReduce
,
dimSize_reduce_b
);
/* initialize variables */
a
->
SetData
(
data
,
unitNum
);
b
->
SetData
(
data
,
unitNum
);
/* call reduce sum function */
ReduceSum
(
a
,
reduce_a
,
0
);
ReduceSum
(
b
,
reduce_b
,
1
);
/* check results */
cpuTest
=
reduce_a
->
CheckData
(
answer_a
,
unitNum_a
)
&&
reduce_b
->
CheckData
(
answer_b
,
unitNum_b
);
#ifdef USE_CUDA
/* GPU test */
bool
gpuTest
=
true
;
/* create tensor */
XTensor
*
aGPU
=
NewTensor
(
order
,
dimSize
,
X_FLOAT
);
XTensor
*
reduce_aGPU
=
NewTensor
(
orderReduce
,
dimSize_reduce_a
,
X_FLOAT
);
XTensor
*
bGPU
=
NewTensor
(
order
,
dimSize
,
X_FLOAT
);
XTensor
*
reduce_bGPU
=
NewTensor
(
orderReduce
,
dimSize_reduce_b
,
X_FLOAT
);
/* Initialize variables */
aGPU
->
SetData
(
data
,
unitNum
);
bGPU
->
SetData
(
data
,
unitNum
);
/* call reduce max function */
ReduceSum
(
aGPU
,
reduce_aGPU
,
0
);
ReduceSum
(
bGPU
,
reduce_bGPU
,
1
);
/* check results */
gpuTest
=
reduce_aGPU
->
CheckData
(
answer_a
,
unitNum_a
)
&&
reduce_bGPU
->
CheckData
(
answer_b
,
unitNum_b
);
/* destroy variables */
delete
aGPU
,
bGPU
,
reduce_aGPU
,
reduce_bGPU
;
delete
[]
dimSize
,
dimSize_reduce_a
,
dimSize_reduce_b
;
return
cpuTest
&&
gpuTest
;
#else
/* destroy variables */
delete
a
;
delete
b
;
return
cpuTest
;
#endif // USE_CUDA
}
/* other cases */
/*
TODO!!
*/
/* test for ReduceSum Function */
extern
"C"
bool
TestReduceSum
()
{
XPRINT
(
0
,
stdout
,
"[TEST ReduceSum] sum the items along a dimension of the tensor.
\n
"
);
...
...
@@ -239,15 +150,6 @@ bool TestReduceSum()
else
XPRINT
(
0
,
stdout
,
">> case 1 passed!
\n
"
);
/* case 2 test */
caseFlag
=
TestReduceSumForLargescale
();
if
(
!
caseFlag
)
{
returnFlag
=
false
;
XPRINT
(
0
,
stdout
,
">> case 2 failed!
\n
"
);
}
else
XPRINT
(
0
,
stdout
,
">> case 2 passed!
\n
"
);
/* other cases test */
/*
TODO!!
...
...
source/test/TReduceSumSquared.cpp
查看文件 @
0887fae1
...
...
@@ -22,9 +22,11 @@
#include "TReduceSumSquared.h"
namespace
nts
{
// namespace nts(NiuTrans.Tensor)
/* case 1: squared sum of the items along a dimension of the tensor.
* For a 1-dimensional data array a, sum = \sum_i (a_i - shift)^2.
* In this case, (2, 4) -> (4), dim = 0.
/*
case 1: squared sum of the items along a dimension of the tensor.
For a 1-dimensional data array a, sum = \sum_i (a_i - shift)^2.
In this case, (2, 4) -> (4), dim = 0.
*/
bool
TestReduceSumSquared1
()
{
...
...
@@ -56,10 +58,10 @@ bool TestReduceSumSquared1()
for
(
int
i
=
0
;
i
<
shiftOrder
;
i
++
)
shiftUnitNum
*=
shiftDimSize
[
i
];
DTYPE
sData
[
2
][
4
]
=
{
{
0.0
,
1.0
,
2.0
,
3.0
},
{
4.0
,
5.0
,
6.0
,
7.0
}
};
DTYPE
shiftData
[
4
]
=
{
1.0
,
-
1.0
,
-
1.0
,
0.0
};
DTYPE
answer
[
4
]
=
{
10.0
,
40.0
,
58.0
,
58.0
};
DTYPE
sData
[
2
][
4
]
=
{
{
0.0
F
,
1.0
F
,
2.0
F
,
3.0
F
},
{
4.0
F
,
5.0
F
,
6.0
F
,
7.0
F
}
};
DTYPE
shiftData
[
4
]
=
{
1.0
F
,
-
1.0
F
,
-
1.0
F
,
0.0
F
};
DTYPE
answer
[
4
]
=
{
10.0
F
,
40.0
F
,
58.0
F
,
58.0
F
};
/* CPU test */
bool
cpuTest
=
true
;
...
...
@@ -125,9 +127,10 @@ bool TestReduceSumSquared1()
#endif // USE_CUDA
}
/* case 1: squared sum of the items along a dimension of the tensor.
* For a 1-dimensional data array a, sum = \sum_i (a_i - shift)^2.
* In this case, (2, 4) -> (2), dim = 1.
/*
case 2: squared sum of the items along a dimension of the tensor.
For a 1-dimensional data array a, sum = \sum_i (a_i - shift)^2.
In this case, (2, 4) -> (2), dim = 1.
*/
bool
TestReduceSumSquared2
()
{
...
...
@@ -141,7 +144,7 @@ bool TestReduceSumSquared2()
for
(
int
i
=
0
;
i
<
sOrder
;
i
++
)
sUnitNum
*=
sDimSize
[
i
];
/* a output tensor of size (
4
) */
/* a output tensor of size (
2
) */
int
tOrder
=
1
;
int
*
tDimSize
=
new
int
[
tOrder
];
tDimSize
[
0
]
=
2
;
...
...
@@ -150,7 +153,7 @@ bool TestReduceSumSquared2()
for
(
int
i
=
0
;
i
<
tOrder
;
i
++
)
tUnitNum
*=
tDimSize
[
i
];
/* a shift tensor of size (
4
) */
/* a shift tensor of size (
2
) */
int
shiftOrder
=
1
;
int
*
shiftDimSize
=
new
int
[
shiftOrder
];
shiftDimSize
[
0
]
=
2
;
...
...
@@ -159,10 +162,10 @@ bool TestReduceSumSquared2()
for
(
int
i
=
0
;
i
<
shiftOrder
;
i
++
)
shiftUnitNum
*=
shiftDimSize
[
i
];
DTYPE
sData
[
2
][
4
]
=
{
{
0.0
,
1.0
,
2.0
,
3.0
},
{
4.0
,
5.0
,
6.0
,
7.0
}
};
DTYPE
shiftData
[
2
]
=
{
-
1.0
,
1.0
};
DTYPE
answer
[
2
]
=
{
30.0
,
86.0
};
DTYPE
sData
[
2
][
4
]
=
{
{
0.0
F
,
1.0
F
,
2.0
F
,
3.0
F
},
{
4.0
F
,
5.0
F
,
6.0
F
,
7.0
F
}
};
DTYPE
shiftData
[
2
]
=
{
-
1.0
F
,
1.0
F
};
DTYPE
answer
[
2
]
=
{
30.0
F
,
86.0
F
};
/* CPU test */
bool
cpuTest
=
true
;
...
...
@@ -234,7 +237,6 @@ TODO!!
*/
/* test for ReduceSumSquared Function */
extern
"C"
bool
TestReduceSumSquared
()
{
XPRINT
(
0
,
stdout
,
"[TEST ReduceSumSquared] squared sum of the items along a dimension of the tensor
\n
"
);
...
...
source/test/TReduceVariance.cpp
查看文件 @
0887fae1
...
...
@@ -22,9 +22,11 @@
#include "TReduceVariance.h"
namespace
nts
{
// namespace nts(NiuTrans.Tensor)
/* case 1: variance of the items along a dimension of the tensor.
* For a 1-dimensional data array a, variance = 1/n * \sum_i (a_i - mean)^2.
* In this case, (2, 4) -> (4), dim = 0.
/*
case 1: variance of the items along a dimension of the tensor.
For a 1-dimensional data array a, variance = 1/n * \sum_i (a_i - mean)^2.
In this case, (2, 4) -> (4), dim = 0.
*/
bool
TestReduceVariance1
()
{
...
...
@@ -131,7 +133,6 @@ TODO!!
*/
/* test for ReduceVariance Function */
extern
"C"
bool
TestReduceVariance
()
{
XPRINT
(
0
,
stdout
,
"[TEST ReduceVariance] variance of the items along a dimension of the tensor
\n
"
);
...
...
source/test/TScaleAndShift.cpp
查看文件 @
0887fae1
...
...
@@ -22,8 +22,10 @@
#include "TScaleAndShift.h"
namespace
nts
{
// namespace nts(NiuTrans.Tensor)
/* case 1: scale and shift all tensor entires.
* p = p * scale + shift
/*
case 1: scale and shift all tensor entires.
p = p * scale + shift
*/
bool
TestScaleAndShift1
()
{
...
...
@@ -42,8 +44,8 @@ bool TestScaleAndShift1()
DTYPE
answer
[
2
][
4
]
=
{
{
0.5
F
,
2.5
F
,
4.5
F
,
6.5
F
},
{
8.5
F
,
10.5
F
,
12.5
F
,
14.5
F
}
};
DTYPE
scaleFactor
=
2.0
;
DTYPE
shiftFactor
=
0.5
;
DTYPE
scaleFactor
=
2.0
F
;
DTYPE
shiftFactor
=
0.5
F
;
/* CPU test */
bool
cpuTest
=
true
;
...
...
@@ -97,7 +99,6 @@ TODO!!
*/
/* test for ScaleAndShift Function */
extern
"C"
bool
TestScaleAndShift
()
{
XPRINT
(
0
,
stdout
,
"[TEST ScaleAndShift] scale and shift all tensor entires
\n
"
);
...
...
source/test/TSelect.cpp
查看文件 @
0887fae1
...
...
@@ -20,12 +20,14 @@
*/
#include "TSelect.h"
#include "../xc/Mycode.h"
namespace
nts
{
// namespace nts(NiuTrans.Tensor)
/* case 1: test SelectRange function.
* It can generate a tensor with seleccted data
* in range[low,high] along the given dimension.
* In this case, (2, 2, 4) -> (2, 2, 2), dim = 2, low = 1, high = 3.
/*
case 1: test SelectRange function.
It can generate a tensor with seleccted data in range[low,high] along the given dimension.
In this case, (2, 2, 4) -> (2, 2, 2), dim = 2, low = 1, high = 3.
*/
bool
TestSelect1
()
{
...
...
@@ -76,25 +78,25 @@ bool TestSelect1()
/* check results */
cpuTest
=
t
->
CheckData
(
answer
,
tUnitNum
);
return
cpuTest
;
#ifdef USE_CUDA
/* GPU test */
bool
gpuTest
=
true
;
/* create tensors */
XTensor
*
sGPU
=
NewTensor
(
sOrder
,
sDimSize
,
X_FLOAT
,
1.0
F
,
0
);
XTensor
*
tGPU
=
NewTensor
(
sOrder
,
s
DimSize
,
X_FLOAT
,
1.0
F
,
0
);
XTensor
*
tGPU
=
NewTensor
(
tOrder
,
t
DimSize
,
X_FLOAT
,
1.0
F
,
0
);
/* initialize variables */
sGPU
->
SetData
(
sData
,
sUnitNum
);
tGPU
->
SetZeroAll
();
/* call Select function */
SelectRange
(
sGPU
,
1
,
1
,
3
,
tGPU
);
SelectRange
(
sGPU
,
2
,
1
,
3
,
tGPU
);
/* check results */
gpuTest
=
tGPU
->
CheckData
(
answer
,
s
UnitNum
);
gpuTest
=
tGPU
->
CheckData
(
answer
,
t
UnitNum
);
/* destroy variables */
delete
s
;
delete
t
;
...
...
@@ -121,7 +123,6 @@ TODO!!
*/
/* test for Select Function */
extern
"C"
bool
TestSelect
()
{
XPRINT
(
0
,
stdout
,
"[TEST Select] generate a tensor with seleccted data in range[low,high] along the given dimension
\n
"
);
...
...
source/test/TSetAscendingOrder.cpp
查看文件 @
0887fae1
...
...
@@ -22,6 +22,7 @@
#include "TSetAscendingOrder.h"
namespace
nts
{
// namespace nts(NiuTrans.Tensor)
/* case 1: set the cell to the ascending order along a given dimension.
*/
bool
TestSetAscendingOrder1
()
...
...
@@ -92,7 +93,6 @@ TODO!!
*/
/* test for SetAscendingOrder Function */
extern
"C"
bool
TestSetAscendingOrder
()
{
XPRINT
(
0
,
stdout
,
"[TEST SetAscendingOrder] set the cell to the ascending order along a given dimension
\n
"
);
...
...
source/test/TSetData.cpp
查看文件 @
0887fae1
...
...
@@ -22,8 +22,8 @@
#include "TSetData.h"
namespace
nts
{
// namespace nts(NiuTrans.Tensor)
/* case 1: set the cell to the ascending order along a given dimension.
*/
/* case 1: set the cell to the ascending order along a given dimension.
*/
bool
TestSetData1
()
{
/* a input tensor of size (2, 4) */
...
...
@@ -83,7 +83,6 @@ TODO!!
*/
/* test for SetData Function */
extern
"C"
bool
TestSetData
()
{
XPRINT
(
0
,
stdout
,
"[TEST SetData] set the data of tensor
\n
"
);
...
...
source/test/TSigmoid.cpp
查看文件 @
0887fae1
...
...
@@ -23,9 +23,11 @@
#include "TSigmoid.h"
namespace
nts
{
// namespace nts(NiuTrans.Tensor)
/* case 1: test Sigmoid function and SigmoidBackward function.
* sigmoid function: y = 1/(1+exp(-x))
* backward computation: dE/ds = dE/dy * dy/dx
/*
case 1: test Sigmoid function and SigmoidBackward function.
sigmoid function: y = 1/(1+exp(-x))
backward computation: dE/ds = dE/dy * dy/dx
*/
bool
TestSigmoid1
()
{
...
...
@@ -124,9 +126,10 @@ bool TestSigmoid1()
#endif // USE_CUDA
}
/* case 2: test Sigmoid function and SigmoidBackward function.
* sigmoid function: y = 1/(1+exp(-x))
* backward computation: dE/ds = dE/dy * dy/dx
/*
case 2: test Sigmoid function and SigmoidBackward function.
sigmoid function: y = 1/(1+exp(-x))
backward computation: dE/ds = dE/dy * dy/dx
*/
bool
TestSigmoid2
()
{
...
...
@@ -234,7 +237,6 @@ bool TestSigmoid2()
*/
/* test for Sigmoid Function */
extern
"C"
bool
TestSigmoid
()
{
XPRINT
(
0
,
stdout
,
"[TEST SIGMOID] sigmoid function and its backward computation
\n
"
);
...
...
source/test/TSoftmax.cpp
查看文件 @
0887fae1
...
...
@@ -24,8 +24,10 @@
#include "TSoftmax.h"
namespace
nts
{
// namespace nts(NiuTrans.Tensor)
/* case 1: test Softmax function.
* softmax function: y = e^x / \sum_{i} e^{x_i}
/*
case 1: test Softmax function.
softmax function: y = e^x / \sum_{i} e^{x_i}
*/
bool
TestSoftmax1
()
{
...
...
@@ -96,8 +98,9 @@ bool TestSoftmax1()
#endif // USE_CUDA
}
/* case 2: test SoftmaxBackward function.
* SoftmaxBackward function: dE/dx_j = -gold_j + y_j
/*
case 2: test SoftmaxBackward function.
SoftmaxBackward function: dE/dx_j = -gold_j + y_j
*/
bool
TestSoftmax2
()
{
...
...
@@ -200,7 +203,6 @@ bool TestSoftmax2()
*/
/* test for Softmax Function */
extern
"C"
bool
TestSoftmax
()
{
XPRINT
(
0
,
stdout
,
"[TEST SOFTMAX] softmax function and its backward computation
\n
"
);
...
...
source/test/TSort.cpp
查看文件 @
0887fae1
...
...
@@ -22,7 +22,8 @@
#include "TSort.h"
namespace
nts
{
// namespace nts(NiuTrans.Tensor)
/* case 1: sort the tensor along a given dimension*/
/* case 1: sort the tensor along a given dimension */
bool
TestSort1
()
{
/* a tensor of size (2, 4) */
...
...
@@ -35,10 +36,10 @@ bool TestSort1()
for
(
int
i
=
0
;
i
<
order
;
i
++
)
unitNum
*=
dimSize
[
i
];
DTYPE
aData
[
2
][
4
]
=
{
{
0.0
F
,
1.0
F
,
2.0
F
,
3.0
F
},
{
4.0
F
,
5.0
F
,
6.0
F
,
7.0
F
}
};
DTYPE
answer
[
2
][
4
]
=
{
{
4.0
F
,
5.0
F
,
6.0
F
,
7.0
F
},
{
0.0
F
,
1.0
F
,
2.0
F
,
3.0
F
}
};
DTYPE
aData
[
2
][
4
]
=
{
{
0.0
F
,
1.0
F
,
2.0
F
,
3.0
F
},
{
4.0
F
,
5.0
F
,
6.0
F
,
7.0
F
}
};
DTYPE
answer
[
2
][
4
]
=
{
{
4.0
F
,
5.0
F
,
6.0
F
,
7.0
F
},
{
0.0
F
,
1.0
F
,
2.0
F
,
3.0
F
}
};
/* CPU test */
bool
cpuTest
=
true
;
...
...
@@ -104,10 +105,10 @@ bool TestSort2()
for
(
int
i
=
0
;
i
<
order
;
i
++
)
unitNum
*=
dimSize
[
i
];
DTYPE
aData
[
2
][
4
]
=
{
{
0.0
,
1.0
,
2.0
,
3.0
},
{
4.0
,
5.0
,
6.0
,
7.0
}
};
DTYPE
answer
[
2
][
4
]
=
{
{
3.0
,
2.0
,
1.0
,
0.0
},
{
7.0
,
6.0
,
5.0
,
4.0
}
};
DTYPE
aData
[
2
][
4
]
=
{
{
0.0
F
,
1.0
F
,
2.0
F
,
3.0
F
},
{
4.0
F
,
5.0
F
,
6.0
F
,
7.0
F
}
};
DTYPE
answer
[
2
][
4
]
=
{
{
3.0
F
,
2.0
F
,
1.0
F
,
0.0
F
},
{
7.0
F
,
6.0
F
,
5.0
F
,
4.0
F
}
};
/* CPU test */
bool
cpuTest
=
true
;
...
...
@@ -166,7 +167,6 @@ TODO!!
*/
/* test for Sort Function */
extern
"C"
bool
TestSort
()
{
XPRINT
(
0
,
stdout
,
"[TEST SORT] sort the tensor along a given dimension
\n
"
);
...
...
source/test/TSplit.cpp
查看文件 @
0887fae1
...
...
@@ -19,18 +19,17 @@
* $Created by: Lin Ye (email: linye2015@outlook.com) 2018-06-13
*/
#include "../XTensor.h"
#include "../XDevice.h"
#include "../core/Split.h"
#include "../XList.h"
#include "TSplit.h"
namespace
nts
{
// namespace nt(NiuTrans.Tensor)
/* case 1: transform a tensor by splitting it, e.g., (N, M) -> (N/3, M, 3)
* In this case, 4 * 3 -> 2 * 2 * 3, whereToSplit=0, splitNum=2.
/*
case 1: transform a tensor by splitting it, e.g., (N, M) -> (N/3, M, 3)
In this case, (4, 3) -> (2, 2, 3), whereToSplit=0, splitNum=2.
*/
bool
TestSplit1
()
{
/* a source tensor of size
4 * 3
*/
/* a source tensor of size
(4, 3)
*/
int
sOrder
=
2
;
int
*
sDimSize
=
new
int
[
sOrder
];
sDimSize
[
0
]
=
4
;
...
...
@@ -40,7 +39,7 @@ bool TestSplit1()
for
(
int
i
=
0
;
i
<
sOrder
;
i
++
)
sUnitNum
*=
sDimSize
[
i
];
/* a target tensor of size
2 * 2 * 3
*/
/* a target tensor of size
(2, 2, 3)
*/
int
tOrder
=
3
;
int
*
tDimSize
=
new
int
[
tOrder
];
tDimSize
[
0
]
=
2
;
...
...
@@ -109,12 +108,13 @@ bool TestSplit1()
#endif // USE_CUDA
}
/* case 2: transform a tensor by splitting it, e.g., (N, M) -> (N/3, M, 3)
* In this case, 3 * 4 -> 2 * 3 * 2, whereToSplit=1, splitNum=2.
/*
case 2: transform a tensor by splitting it, e.g., (N, M) -> (N/3, M, 3)
In this case, (3, 4) -> (2, 3, 2), whereToSplit=1, splitNum=2.
*/
bool
TestSplit2
()
{
/* a source tensor of size
3 * 4
*/
/* a source tensor of size
(3, 4)
*/
int
sOrder
=
2
;
int
*
sDimSize
=
new
int
[
sOrder
];
sDimSize
[
0
]
=
3
;
...
...
@@ -124,7 +124,7 @@ bool TestSplit2()
for
(
int
i
=
0
;
i
<
sOrder
;
i
++
)
sUnitNum
*=
sDimSize
[
i
];
/* a target tensor of size
2 * 3 * 2
*/
/* a target tensor of size
(2, 3, 2)
*/
int
tOrder
=
3
;
int
*
tDimSize
=
new
int
[
tOrder
];
tDimSize
[
0
]
=
2
;
...
...
@@ -194,8 +194,9 @@ bool TestSplit2()
#endif // USE_CUDA
}
/* case 3: split a big tensor into small tensors
* In this case, 3 * 4 -> 2 * (3 * 2) , whereToSplit=1, splitNum=2.
/*
case 3: split a big tensor into small tensors
In this case, (3, 4) -> 2 * (3, 2) , whereToSplit=1, splitNum=2.
*/
bool
TestSplit3
()
{
...
...
@@ -203,7 +204,7 @@ bool TestSplit3()
XList
tList
;
tList
=
XList
();
/* a source tensor of size (3
*
4) */
/* a source tensor of size (3
,
4) */
int
sOrder
=
2
;
int
*
sDimSize
=
new
int
[
sOrder
];
sDimSize
[
0
]
=
3
;
...
...
@@ -213,7 +214,7 @@ bool TestSplit3()
for
(
int
i
=
0
;
i
<
sOrder
;
i
++
)
sUnitNum
*=
sDimSize
[
i
];
/* a target tensor of size (3
*
2) */
/* a target tensor of size (3
,
2) */
int
tOrder1
=
2
;
int
*
tDimSize1
=
new
int
[
tOrder1
];
tDimSize1
[
0
]
=
3
;
...
...
@@ -313,10 +314,9 @@ TODO!!
*/
/* test for Split Function */
extern
"C"
bool
TestSplit
()
bool
TestSplit
()
{
XPRINT
(
0
,
stdout
,
"[TEST SPLIT]
-------------
\n
"
);
XPRINT
(
0
,
stdout
,
"[TEST SPLIT]
split a big tensor into small tensors
\n
"
);
bool
returnFlag
=
true
,
caseFlag
=
true
;
/* case 1 test */
...
...
source/test/TSum.cpp
查看文件 @
0887fae1
...
...
@@ -22,7 +22,8 @@
#include "TSum.h"
namespace
nts
{
// namespace nts(NiuTrans.Tensor)
/* case 1 */
/* case 1: tensor summation c = a + b * \beta */
bool
TestSum1
()
{
/* a tensor of size (2, 4) */
...
...
@@ -35,12 +36,12 @@ bool TestSum1()
for
(
int
i
=
0
;
i
<
order
;
i
++
)
unitNum
*=
dimSize
[
i
];
DTYPE
aData
[
2
][
4
]
=
{
{
0.0
,
1.0
,
2.0
,
3.0
},
{
4.0
,
5.0
,
6.0
,
7.0
}
};
DTYPE
bData
[
2
][
4
]
=
{
{
1.0
,
-
1.0
,
-
3.0
,
-
5.0
},
{
-
7.0
,
-
9.0
,
-
11.0
,
-
13.0
}
};
DTYPE
answer
[
2
][
4
]
=
{
{
1.0
,
0.0
,
-
1.0
,
-
2.0
},
{
-
3.0
,
-
4.0
,
-
5.0
,
-
6.0
}
};
DTYPE
aData
[
2
][
4
]
=
{
{
0.0
F
,
1.0
F
,
2.0
F
,
3.0
F
},
{
4.0
F
,
5.0
F
,
6.0
F
,
7.0
F
}
};
DTYPE
bData
[
2
][
4
]
=
{
{
1.0
F
,
-
1.0
F
,
-
3.0
F
,
-
5.0
F
},
{
-
7.0
F
,
-
9.0
F
,
-
11.0
F
,
-
13.0
F
}
};
DTYPE
answer
[
2
][
4
]
=
{
{
1.0
F
,
0.0
F
,
-
1.0
F
,
-
2.0
F
},
{
-
3.0
F
,
-
4.0
F
,
-
5.0
F
,
-
6.0
F
}
};
/* CPU test */
bool
cpuTest
=
true
;
...
...
@@ -95,7 +96,7 @@ bool TestSum1()
#endif // USE_CUDA
}
/* case 2 */
/* case 2
: tensor summation c = a + b * \beta
*/
bool
TestSum2
()
{
/* a tensor of size (2, 4) */
...
...
@@ -108,12 +109,12 @@ bool TestSum2()
for
(
int
i
=
0
;
i
<
order
;
i
++
)
{
unitNum
*=
dimSize
[
i
];
}
DTYPE
aData
[
2
][
4
]
=
{
{
0.0
,
1.0
,
2.0
,
3.0
},
{
4.0
,
5.0
,
6.0
,
7.0
}
};
DTYPE
bData
[
2
][
4
]
=
{
{
1.0
,
-
1.0
,
-
3.0
,
-
5.0
},
{
-
7.0
,
-
9.0
,
-
11.0
,
-
13.0
}
};
DTYPE
answer
[
2
][
4
]
=
{
{
0.5
,
0.5
,
0.5
,
0.5
},
{
0.5
,
0.5
,
0.5
,
0.5
}
};
DTYPE
aData
[
2
][
4
]
=
{
{
0.0
F
,
1.0
F
,
2.0
F
,
3.0
F
},
{
4.0
F
,
5.0
F
,
6.0
F
,
7.0
F
}
};
DTYPE
bData
[
2
][
4
]
=
{
{
1.0
F
,
-
1.0
F
,
-
3.0
F
,
-
5.0
F
},
{
-
7.0
F
,
-
9.0
F
,
-
11.0
F
,
-
13.0
F
}
};
DTYPE
answer
[
2
][
4
]
=
{
{
0.5
F
,
0.5
F
,
0.5
F
,
0.5
F
},
{
0.5
F
,
0.5
F
,
0.5
F
,
0.5
F
}
};
float
beta
=
0.5
F
;
/* CPU test */
...
...
@@ -129,7 +130,7 @@ bool TestSum2()
b
->
SetData
(
bData
,
unitNum
);
c
->
SetZeroAll
();
/* call
s
um function */
/* call
S
um function */
Sum
(
a
,
b
,
c
,
beta
);
/* check results */
...
...
@@ -149,7 +150,7 @@ bool TestSum2()
bGPU
->
SetData
(
bData
,
unitNum
);
cGPU
->
SetZeroAll
();
/* call
s
um function */
/* call
S
um function */
Sum
(
aGPU
,
bGPU
,
cGPU
,
beta
);
/* check results */
...
...
@@ -182,8 +183,7 @@ bool TestSum2()
*/
/* test for Sum Function */
extern
"C"
bool
TestSum
()
bool
TestSum
()
{
XPRINT
(
0
,
stdout
,
"[TEST SUM] tensor summation c = a + b * beta
\n
"
);
bool
returnFlag
=
true
,
caseFlag
=
true
;
...
...
source/test/TSumByColumnTV.cpp
查看文件 @
0887fae1
...
...
@@ -22,9 +22,10 @@
#include "TSumByColumnTV.h"
namespace
nts
{
// namespace nts(NiuTrans.Tensor)
/* case 1: test SumByColumnTV function
* sum of a tensor and a vector (column vector)
* in a column by column manner
/*
case 1: test SumByColumnTV function
sum of a tensor and a vector (column vector) in a column by column manner
*/
bool
TestSumByColumnTV1
()
{
...
...
@@ -58,12 +59,12 @@ bool TestSumByColumnTV1()
for
(
int
i
=
0
;
i
<
cOrder
;
i
++
)
cUnitNum
*=
cDimSize
[
i
];
DTYPE
aData
[
2
][
4
]
=
{
{
0.0
,
1.0
,
2.0
,
3.0
},
{
4.0
,
5.0
,
6.0
,
7.0
}
};
DTYPE
bData
[
2
][
1
]
=
{
{
1.0
},
{
0.0
}
};
DTYPE
answer
[
2
][
4
]
=
{
{
1.0
,
2.0
,
3.0
,
4.0
},
{
4.0
,
5.0
,
6.0
,
7.0
}
};
DTYPE
aData
[
2
][
4
]
=
{
{
0.0
F
,
1.0
F
,
2.0
F
,
3.0
F
},
{
4.0
F
,
5.0
F
,
6.0
F
,
7.0
F
}
};
DTYPE
bData
[
2
][
1
]
=
{
{
1.0
F
},
{
0.0
F
}
};
DTYPE
answer
[
2
][
4
]
=
{
{
1.0
F
,
2.0
F
,
3.0
F
,
4.0
F
},
{
4.0
F
,
5.0
F
,
6.0
F
,
7.0
F
}
};
/* CPU test */
bool
cpuTest
=
true
;
...
...
@@ -128,9 +129,9 @@ bool TestSumByColumnTV1()
#endif // USE_CUDA
}
/*
case 2: test SumByColumnTV function
* sum of a tensor and a vector (column vector)
*
in a column by column manner
/*
case 2: test SumByColumnTV function
sum of a tensor and a vector (column vector)
in a column by column manner
*/
bool
TestSumByColumnTV2
()
{
...
...
@@ -154,12 +155,12 @@ bool TestSumByColumnTV2()
for
(
int
i
=
0
;
i
<
bOrder
;
i
++
)
bUnitNum
*=
bDimSize
[
i
];
DTYPE
aData
[
2
][
4
]
=
{
{
0.0
,
1.0
,
2.0
,
3.0
},
{
4.0
,
5.0
,
6.0
,
7.0
}
};
DTYPE
bData
[
2
][
1
]
=
{
{
1.0
},
{
0.0
}
};
DTYPE
answer
[
2
][
4
]
=
{
{
1.0
,
2.0
,
3.0
,
4.0
},
{
4.0
,
5.0
,
6.0
,
7.0
}
};
DTYPE
aData
[
2
][
4
]
=
{
{
0.0
F
,
1.0
F
,
2.0
F
,
3.0
F
},
{
4.0
F
,
5.0
F
,
6.0
F
,
7.0
F
}
};
DTYPE
bData
[
2
][
1
]
=
{
{
1.0
F
},
{
0.0
F
}
};
DTYPE
answer
[
2
][
4
]
=
{
{
1.0
F
,
2.0
F
,
3.0
F
,
4.0
F
},
{
4.0
F
,
5.0
F
,
6.0
F
,
7.0
F
}
};
/* CPU test */
bool
cpuTest
=
true
;
...
...
@@ -222,7 +223,6 @@ bool TestSumByColumnTV2()
*/
/* test for SumByColumnTV Function */
extern
"C"
bool
TestSumByColumnTV
()
{
XPRINT
(
0
,
stdout
,
"[TEST SumByColumnTV] sum of a tensor and a vector (column vector) in a column by column manner
\n
"
);
...
...
source/test/TSumByColumnVT.cpp
查看文件 @
0887fae1
...
...
@@ -22,9 +22,10 @@
#include "TSumByColumnVT.h"
namespace
nts
{
// namespace nts(NiuTrans.Tensor)
/* case 1: test SumByColumnVT function
* sum of a vector (column vector) and a tensor
* in a column by column manner
/*
case 1: test SumByColumnVT function
sum of a vector (column vector) and a tensor in a column by column manner
*/
bool
TestSumByColumnVT1
()
{
...
...
@@ -58,12 +59,12 @@ bool TestSumByColumnVT1()
for
(
int
i
=
0
;
i
<
cOrder
;
i
++
)
cUnitNum
*=
cDimSize
[
i
];
DTYPE
aData
[
2
][
1
]
=
{
{
1.0
},
{
0.0
}
};
DTYPE
bData
[
2
][
4
]
=
{
{
0.0
,
1.0
,
2.0
,
3.0
},
{
4.0
,
5.0
,
6.0
,
7.0
}
};
DTYPE
answer
[
2
][
1
]
=
{
{
7.0
},
{
22.0
}
};
DTYPE
aData
[
2
][
1
]
=
{
{
1.0
F
},
{
0.0
F
}
};
DTYPE
bData
[
2
][
4
]
=
{
{
0.0
F
,
1.0
F
,
2.0
F
,
3.0
F
},
{
4.0
F
,
5.0
F
,
6.0
F
,
7.0
F
}
};
DTYPE
answer
[
2
][
1
]
=
{
{
7.0
F
},
{
22.0
F
}
};
/* CPU test */
bool
cpuTest
=
true
;
...
...
@@ -129,9 +130,9 @@ bool TestSumByColumnVT1()
#endif // USE_CUDA
}
/*
case 2: test SumByColumnVT function
* sum of a vector (column vector) and a tensor
*
in a column by column manner
/*
case 2: test SumByColumnVT function
sum of a vector (column vector) and a tensor
in a column by column manner
*/
bool
TestSumByColumnVT2
()
{
...
...
@@ -155,12 +156,12 @@ bool TestSumByColumnVT2()
for
(
int
i
=
0
;
i
<
bOrder
;
i
++
)
bUnitNum
*=
bDimSize
[
i
];
DTYPE
aData
[
2
][
1
]
=
{
{
1.0
},
{
0.0
}
};
DTYPE
bData
[
2
][
4
]
=
{
{
0.0
,
1.0
,
2.0
,
3.0
},
{
4.0
,
5.0
,
6.0
,
7.0
}
};
DTYPE
answer
[
2
][
1
]
=
{
{
7.0
},
{
22.0
}
};
DTYPE
aData
[
2
][
1
]
=
{
{
1.0
F
},
{
0.0
F
}
};
DTYPE
bData
[
2
][
4
]
=
{
{
0.0
F
,
1.0
F
,
2.0
F
,
3.0
F
},
{
4.0
F
,
5.0
F
,
6.0
F
,
7.0
F
}
};
DTYPE
answer
[
2
][
1
]
=
{
{
7.0
F
},
{
22.0
F
}
};
/* CPU test */
bool
cpuTest
=
true
;
...
...
@@ -223,7 +224,6 @@ bool TestSumByColumnVT2()
*/
/* test for SumByColumnVT Function */
extern
"C"
bool
TestSumByColumnVT
()
{
XPRINT
(
0
,
stdout
,
"[TEST SumByColumnVT] sum of a vector (column vector) and a tensor in a column by column manner
\n
"
);
...
...
source/test/TTopK.cpp
查看文件 @
0887fae1
...
...
@@ -22,10 +22,12 @@
#include "TTopK.h"
namespace
nts
{
// namespace nts(NiuTrans.Tensor)
/* case 1: get the top-k items along a given dimension.
* In this case,
* (2, 4) -> (2, 4), dim = 0, k = 2
* (2, 4) -> (2, 4), dim = 1, k = 4
/*
case 1: get the top-k items along a given dimension.
In this case,
(2, 4) -> (2, 4), dim = 0, k = 2
(2, 4) -> (2, 4), dim = 1, k = 4
*/
bool
TestTopK1
()
{
...
...
@@ -49,16 +51,16 @@ bool TestTopK1()
for
(
int
i
=
0
;
i
<
tOrder
;
i
++
)
tUnitNum
*=
tDimSize
[
i
];
DTYPE
sData
[
2
][
4
]
=
{
{
5.0
,
1.0
,
2.0
,
8.0
},
{
4.0
,
3.0
,
7.0
,
6.0
}
};
DTYPE
sData
[
2
][
4
]
=
{
{
5.0
F
,
1.0
F
,
2.0
F
,
8.0
F
},
{
4.0
F
,
3.0
F
,
7.0
F
,
6.0
F
}
};
DTYPE
tAnswer1
[
2
][
4
]
=
{
{
5.0
,
3.0
,
7.0
,
8.0
},
{
4.0
,
1.0
,
2.0
,
6.0
}
};
DTYPE
tAnswer1
[
2
][
4
]
=
{
{
5.0
F
,
3.0
F
,
7.0
F
,
8.0
F
},
{
4.0
F
,
1.0
F
,
2.0
F
,
6.0
F
}
};
int
indexAnswer1
[
2
][
4
]
=
{
{
0
,
1
,
1
,
0
},
{
1
,
0
,
0
,
1
}
};
DTYPE
tAnswer2
[
2
][
4
]
=
{
{
8.0
,
5.0
,
2.0
,
1.0
},
{
7.0
,
6.0
,
4.0
,
3.0
}
};
DTYPE
tAnswer2
[
2
][
4
]
=
{
{
8.0
F
,
5.0
F
,
2.0
F
,
1.0
F
},
{
7.0
F
,
6.0
F
,
4.0
F
,
3.0
F
}
};
int
indexAnswer2
[
2
][
4
]
=
{
{
3
,
0
,
2
,
1
},
{
2
,
3
,
0
,
1
}
};
...
...
@@ -156,9 +158,9 @@ bool TestTopK1()
#endif // USE_CUDA
}
/*
case 2: get the top-k items along a given dimension.
* In this case,
* (2, 4) -> (2, 2), dim = 1, k = 2
/*
case 2: get the top-k items along a given dimension.
In this case, (2, 4) -> (2, 2), dim = 1, k = 2.
*/
bool
TestTopK2
()
{
...
...
@@ -182,10 +184,10 @@ bool TestTopK2()
for
(
int
i
=
0
;
i
<
tOrder
;
i
++
)
tUnitNum
*=
tDimSize
[
i
];
DTYPE
sData
[
2
][
4
]
=
{
{
5.0
,
1.0
,
2.0
,
8.0
},
{
4.0
,
3.0
,
7.0
,
6.0
}
};
DTYPE
tAnswer
[
2
][
2
]
=
{
{
8.0
,
5.0
},
{
7.0
,
6.0
}
};
DTYPE
sData
[
2
][
4
]
=
{
{
5.0
F
,
1.0
F
,
2.0
F
,
8.0
F
},
{
4.0
F
,
3.0
F
,
7.0
F
,
6.0
F
}
};
DTYPE
tAnswer
[
2
][
2
]
=
{
{
8.0
F
,
5.0
F
},
{
7.0
F
,
6.0
F
}
};
int
indexAnswer
[
2
][
2
]
=
{
{
3
,
0
},
{
2
,
3
}
};
...
...
@@ -255,14 +257,12 @@ bool TestTopK2()
#endif // USE_CUDA
}
/* other cases */
/*
TODO!!
*/
/* test for TopK Function */
extern
"C"
bool
TestTopK
()
{
XPRINT
(
0
,
stdout
,
"[TEST TopK] get the top-k items along a given dimension
\n
"
);
...
...
source/test/TUnsqueeze.cpp
查看文件 @
0887fae1
...
...
@@ -19,15 +19,16 @@
* $Created by: Xu Chen (email: hello_master1954@163.com) 2018-06-13
*/
#include "../XTensor.h"
#include "../core/Unsqueeze.h"
#include "../XList.h"
#include "TUnsqueeze.h"
namespace
nts
{
// namespace nts(NiuTrans.Tensor)
/* case 1: insert a dimension by copying the blocks for x times (where x is the size of the inerted dimension)
* In this case,
* (2, 3) -> (2, 2, 3), dim=1, dSize=2
* (2, 3) -> (2, 3, 2), dim=2, dSize=2
/*
case 1: insert a dimension by copying the blocks for x times (where x is the size of the inerted dimension)
In this case,
(2, 3) -> (2, 2, 3), dim=1, dSize=2
(2, 3) -> (2, 3, 2), dim=2, dSize=2
*/
bool
TestUnsqueeze1
()
{
...
...
@@ -63,18 +64,18 @@ bool TestUnsqueeze1()
for
(
int
i
=
0
;
i
<
tOrder2
;
i
++
)
tUnitNum2
*=
tDimSize2
[
i
];
DTYPE
sData
[
2
][
3
]
=
{
{
0.0
,
1.0
,
2.0
},
{
3.0
,
4.0
,
5.0
}
};
DTYPE
answer1
[
2
][
2
][
3
]
=
{
{
{
0.0
,
1.0
,
2.0
},
{
0.0
,
1.0
,
2.0
}
},
{
{
3.0
,
4.0
,
5.0
},
{
3.0
,
4.0
,
5.0
}
}
};
DTYPE
answer2
[
2
][
3
][
2
]
=
{
{
{
0.0
,
0.0
},
{
1.0
,
1.0
},
{
2.0
,
2.0
}
},
{
{
3.0
,
3.0
},
{
4.0
,
4.0
},
{
5.0
,
5.0
}
}
};
DTYPE
sData
[
2
][
3
]
=
{
{
0.0
F
,
1.0
F
,
2.0
F
},
{
3.0
F
,
4.0
F
,
5.0
F
}
};
DTYPE
answer1
[
2
][
2
][
3
]
=
{
{
{
0.0
F
,
1.0
F
,
2.0
F
},
{
0.0
F
,
1.0
F
,
2.0
F
}
},
{
{
3.0
F
,
4.0
F
,
5.0
F
},
{
3.0
F
,
4.0
F
,
5.0
F
}
}
};
DTYPE
answer2
[
2
][
3
][
2
]
=
{
{
{
0.0
F
,
0.0
F
},
{
1.0
F
,
1.0
F
},
{
2.0
F
,
2.0
F
}
},
{
{
3.0
F
,
3.0
F
},
{
4.0
F
,
4.0
F
},
{
5.0
F
,
5.0
F
}
}
};
/* CPU test */
bool
cpuTest
=
true
;
...
...
@@ -148,7 +149,6 @@ bool TestUnsqueeze1()
*/
/* test for Unsqueeze Function */
extern
"C"
bool
TestUnsqueeze
()
{
XPRINT
(
0
,
stdout
,
"[TEST Unsqueeze] insert a dimension by copying the blocks for x times
\n
"
);
...
...
source/test/TXMem.cpp
查看文件 @
0887fae1
...
...
@@ -19,14 +19,13 @@
* $Created by: XIAO Tong (xiaotong@mail.neu.edu.cn) 2018-6-24
*/
#include "TXMem.h"
#include "../XGlobal.h"
#include "../XUtility.h"
#include "
../
XMem.h"
#include "
T
XMem.h"
/* the nts (NiuTrans.Tensor) namespace */
namespace
nts
{
namespace
nts
{
// namespace nts(NiuTrans.Tensor)
/* case 1: test memory pool class */
bool
TestXMemCase1
()
{
bool
ok
=
true
;
...
...
@@ -83,6 +82,7 @@ bool TestXMemCase1()
return
ok
;
}
/* test for memory pool class */
bool
TestXMem
()
{
XPRINT
(
0
,
stdout
,
"[Test] Memory pool ... Began
\n
"
);
...
...
@@ -93,11 +93,18 @@ bool TestXMem()
/* case 1 test */
caseFlag
=
TestXMemCase1
();
if
(
!
caseFlag
)
{
returnFlag
=
false
;
XPRINT
(
0
,
stdout
,
">> case 1 failed!
\n
"
);
}
else
{
XPRINT
(
0
,
stdout
,
">> case 1 passed!
\n
"
);}
if
(
!
caseFlag
)
{
returnFlag
=
false
;
XPRINT
(
0
,
stdout
,
">> case 1 failed!
\n
"
);
}
else
XPRINT
(
0
,
stdout
,
">> case 1 passed!
\n
"
);
if
(
returnFlag
)
{
XPRINT
(
0
,
stdout
,
">> All Passed!
\n
"
);
}
else
{
XPRINT
(
0
,
stdout
,
">> Failed!
\n
"
);
}
if
(
returnFlag
)
{
XPRINT
(
0
,
stdout
,
">> All Passed!
\n
"
);
}
else
XPRINT
(
0
,
stdout
,
">> Failed!
\n
"
);
double
endT
=
GetClock
();
...
...
@@ -106,4 +113,4 @@ bool TestXMem()
return
returnFlag
;
}
}
/
* end of the nts (NiuTrans.Tensor) namespace */
}
/
/
namespace
nts
(
NiuTrans
.
Tensor
)
\ No newline at end of file
source/test/TXMem.h
查看文件 @
0887fae1
差异被折叠。
点击展开。
source/test/Test.cpp
查看文件 @
0887fae1
差异被折叠。
点击展开。
编写
预览
Markdown
格式
0%
重试
或
添加新文件
添加附件
取消
您添加了
0
人
到此讨论。请谨慎行事。
请先完成此评论的编辑!
取消
请
注册
或者
登录
后发表评论