Commit abeb3e64 by liyinqiao

merged

parents dcabc2b0 414ff54f
......@@ -38,7 +38,7 @@
#include "XMem.h"
#include "XHeap.h"
#include "XBLAS.h"
#include "core/MergeBlockLists.h"
#include "core/shape/MergeBlockLists.h"
#ifdef USE_CUDA
......@@ -47,8 +47,8 @@
#include <cublas_v2.h>
#include <cuda.h>
#include <curand.h>
#include "core/FlushToMem.cuh"
#include "core/SetAscendingOrder.cuh"
#include "core/utilities/FlushToMem.cuh"
#include "core/utilities/SetAscendingOrder.cuh"
#endif
......@@ -555,6 +555,27 @@ bool XTensor::CheckData(const void * d, int num, int beg)
return true;
}
bool XTensor::CheckData(const void * d, int num, float tolerance, int beg)
{
if (data == NULL || d == NULL)
return false;
CheckNTErrors(!isSparse, "TODO");
CheckNTErrors(num == unitNum - beg, "Illegal size!");
DTYPE * valuePrt = (DTYPE*)data;
DTYPE value = 0;
DTYPE * answerPrt = (DTYPE*)d;
for (int i = beg; i < num; i++) {
value = ToCPU(devID, valuePrt);
if (fabs(value - *answerPrt) > tolerance)
return false;
valuePrt++;
answerPrt++;
}
return true;
}
/*
set the cell to the ascending order along a given dimension
>> dim - the dimension specified
......@@ -696,6 +717,63 @@ DTYPE XTensor::Get3D(int d0, int d1, int d2)
return ToCPU(devID, value);
}
/*
get the value of a cell in a 1d tensor in int type
>> i - index
<< return - value of cell(i) in int
*/
int XTensor::Get1DInt(int i)
{
CheckNTErrors((order == 1), "Cannot get a 2d cell for a tensor whose order is not 2!");
CheckNTErrors((i >= 0 && i < dimSize[0]), "dimension 0 is out of range!");
CheckNTErrors((dataType == X_INT), "The tensor is not in int type.");
int dimSize[1] = {i};
void * value = GetCell(dimSize, 1);
return ToCPUInt(devID, value);
}
/*
get the value of a cell in a 2d tensor in int type
>> ni - row index
>> mi - column index
<< return - value of cell(ni, mi) in int
*/
int XTensor::Get2DInt(int ni, int mi)
{
CheckNTErrors((order == 2), "Cannot get a 2d cell for a tensor whose order is not 2!");
CheckNTErrors((ni >= 0 && ni < dimSize[0]), "dimension 0 is out of range!");
CheckNTErrors((mi >= 0 && mi < dimSize[1]), "dimension 1 is out of range!");
CheckNTErrors((dataType == X_INT), "The tensor is not in default type.");
int dims[2] = {ni, mi};
void * value = GetCell(dims, 2);
return ToCPUInt(devID, value);
}
/*
get the value of a cell in a 3d tensor in int type
>> d0 - index of dimension 0
>> d1 - index of dimension 1
>> d2 - index of dimension 2
<< return - value of cell(d0, d1, d2) in int
*/
int XTensor::Get3DInt(int d0, int d1, int d2)
{
CheckNTErrors((order == 3), "Cannot get a 2d cell for a tensor whose order is not 2!");
CheckNTErrors((d0 >= 0 && d0 < dimSize[0]), "dimension 0 is out of range!");
CheckNTErrors((d1 >= 0 && d1 < dimSize[1]), "dimension 1 is out of range!");
CheckNTErrors((d2 >= 0 && d2 < dimSize[2]), "dimension 2 is out of range!");
CheckNTErrors((dataType == X_INT), "The tensor is not in default type.");
int dims[3] = {d0, d1, d2};
void * value = GetCell(dims, 3);
return ToCPUInt(devID, value);
}
/*
get the value of a cell in the sparse tensor
>> i - i-th tuple in the tuple list of the sparse tensor
......
......@@ -211,6 +211,9 @@ struct XTensor
/* check whether the data array is the same as the answer */
bool CheckData(const void * answer, int num, int beg = 0);
/* check whether the data array is the same as the answer */
bool CheckData(const void * answer, int num, float tolerance, int beg = 0);
/* set the cell to the ascending order along a given dimension */
void SetAscendingOrder(int dim);
......@@ -220,15 +223,24 @@ struct XTensor
/* get the pointer to a cell */
void * GetCell(int index[], int size = -1);
/* get the value of a cell in a 1d tensor */
/* get the default type value of a cell in a 1d tensor */
DTYPE Get1D(int i);
/* get the value of a cell in a 2d tensor */
/* get the default type value of a cell in a 2d tensor */
DTYPE Get2D(int ni, int mi);
/* get the value of a cell in a 3d tensor */
/* get the default type value of a cell in a 3d tensor */
DTYPE Get3D(int d0, int d1, int d2);
/* get the int value of a cell in a 1d tensor */
int Get1DInt(int i);
/* get the int value of a cell in a 2d tensor */
int Get2DInt(int ni, int mi);
/* get the int value of a cell in a 3d tensor */
int Get3DInt(int d0, int d1, int d2);
/* get the value of a cell in a sparse tensor */
DTYPE GetInSparse(int i);
......
......@@ -26,43 +26,49 @@
#include "../XTensor.h"
#include "Concatenate.h"
#include "ConcatenateSolely.h"
#include "CopyIndexed.h"
#include "CopyInGrid.h"
#include "CopyValues.h"
#include "FlushToMem.h"
#include "MakeMergeBlockIndex.h"
#include "MakeSplitBlockIndex.h"
#include "MatrixMul.h"
#include "MatrixMul2D.h"
#include "MatrixMul2DMultiTheading.h"
#include "MatrixMul2DParallel.h"
#include "MatrixMulBatched.h"
#include "MatrixMULBatchedCPU.h"
#include "Merge.h"
#include "MergeBlockLists.h"
#include "Multiply.h"
#include "Negate.h"
#include "Normalize.h"
#include "Permute.h"
#include "Power.h"
#include "ReduceMax.h"
#include "ReduceMean.h"
#include "ReduceStandardVariance.h"
#include "ReduceSum.h"
#include "ReduceSumSquared.h"
#include "ReduceVariance.h"
#include "ScaleAndShift.h"
#include "SetData.h"
#include "Sort.h"
#include "Split.h"
#include "Sum.h"
#include "SumByColumnTV.h"
#include "SumByColumnVT.h"
#include "TopK.h"
#include "Unsqueeze.h"
#include "XMatrixSegment.h"
#include "XTensorBLAS.h"
#include "shape/Concatenate.h"
#include "shape/ConcatenateSolely.h"
#include "movement/CopyBlocks.h"
#include "movement/CopyBlocksInGrid.h"
#include "movement/CopyBlocksOnSite.h"
#include "movement/CopyData2D.h"
#include "movement/CopyIndexed.h"
#include "movement/CopyInGrid.h"
#include "movement/CopyValues.h"
#include "utilities/FlushToMem.h"
#include "shape/MakeMergeBlockIndex.h"
#include "shape/MakeSplitBlockIndex.h"
#include "arithmetic/MatrixMul.h"
#include "arithmetic/MatrixMul2D.h"
#include "arithmetic/MatrixMul2DMultiTheading.h"
#include "arithmetic/MatrixMul2DParallel.h"
#include "arithmetic/MatrixMulBatched.h"
#include "arithmetic/MatrixMULBatchedCPU.h"
#include "shape/Merge.h"
#include "shape/MergeBlockLists.h"
#include "arithmetic/Multiply.h"
#include "arithmetic/Negate.h"
#include "math/Normalize.h"
#include "shape/Permute.h"
#include "math/Power.h"
#include "reduce/ReduceMax.h"
#include "reduce/ReduceMean.h"
#include "reduce/ReduceStandardVariance.h"
#include "reduce/ReduceSum.h"
#include "reduce/ReduceSumSquared.h"
#include "reduce/ReduceVariance.h"
#include "math/ScaleAndShift.h"
#include "getandset/Select.h"
#include "getandset/SetData.h"
#include "sort/Sort.h"
#include "shape/Split.h"
#include "arithmetic/Sum.h"
#include "arithmetic/SumByColumnTV.h"
#include "arithmetic/SumByColumnVT.h"
#include "sort/TopK.h"
#include "shape/Transpose.h"
#include "shape/Unsqueeze.h"
#include "utilities/XMatrixSegment.h"
#include "arithmetic/XTensorBLAS.h"
#endif // __CHEADER_H__
\ No newline at end of file
......@@ -219,9 +219,8 @@ public:
/* insert a dimension by copying the blocks for x times (where x is the size of the inerted dimension) */
void Unsqueeze(XTensor * a, XTensor * b, int dim, int dSize);
/*******************************************************************
segmentation and parallel processing for 2d tensors (i.e., matrices)
*/
/* segmentation and parallel processing for 2d tensors (i.e., matrices) */
/* segment a 2d tensor (i.e., matrix) into blocks and run jobs in parallel */
static
void RunParallel2D(XPRunner * parallelRunner, void * job, int opNum, int rowNum, int colNum, int argNum, ...);
......
......@@ -19,7 +19,7 @@
* $Created by: XIAO Tong (email: xiaotong@mail.neu.edu.cn) 2018-04-24
*/
#include "../XTensor.h"
#include "../../XTensor.h"
#include "MatrixMULBatchedCPU.h"
#include "MatrixMul2D.h"
#include "XTensorBLAS.h"
......@@ -33,9 +33,9 @@ c_i = trans(a_i) * trans(b_i) * \alpha + c_i * \beta for each i in [0,count-1]
>> transposedA - indicate whether the matrix a is transposed
>> b - another list of input matrices (2d tensors)
>> transposedB - indicate whether the matrix b is transposed
>> c - output matrix (2d tensor)
>> alpha - scalar
>> beta - scalar
>> c - output matrix (2d tensor)
*/
void MatrixMULBatchedCPU(XList * a, MATRIX_TRANS_TYPE transposedA,
XList * b, MATRIX_TRANS_TYPE transposedB,
......@@ -64,10 +64,6 @@ void MatrixMULBatchedCPU(XList * a, MATRIX_TRANS_TYPE transposedA,
}
}
//if(isUniform){
//}
//else{
for (int i = 0; i < a->count; i++) {
XTensor * ai = (XTensor*)a->GetItem(i);
XTensor * bi = (XTensor*)b->GetItem(i);
......
......@@ -22,7 +22,7 @@
#ifndef __MATRIXMULBATCHEDCPU_H__
#define __MATRIXMULBATCHEDCPU_H__
#include "../XTensor.h"
#include "../../XTensor.h"
namespace nts { // namespace nts(NiuTrans.Tensor)
......
......@@ -19,9 +19,9 @@
* $Created by: XIAO Tong (email: xiaotong@mail.neu.edu.cn) 2018-04-24
*/
#include "../XTensor.h"
#include "../XDevice.h"
#include "../XName.h"
#include "../../XTensor.h"
#include "../../XDevice.h"
#include "../../XName.h"
#include "MatrixMul.h"
#include "MatrixMul2D.h"
#include "MatrixMULBatchedCPU.h"
......@@ -65,13 +65,12 @@ void MatrixMul(XTensor * a, MATRIX_TRANS_TYPE transposedA,
XLink::AddParamToHeadInt(c, transposedB);
XLink::AddParamToHead(c, alpha);
XLink::AddParamToHead(c, beta);
int an = transposedA == X_TRANS ? a->dimSize[1] : a->dimSize[0];
int am = transposedA == X_TRANS ? a->dimSize[0] : a->dimSize[1];
int bn = transposedB == X_TRANS ? b->dimSize[1] : b->dimSize[0];
int bm = transposedB == X_TRANS ? b->dimSize[0] : b->dimSize[1];
int cn = c->dimSize[0];
int cm = c->dimSize[1];
int an = transposedA == X_TRANS ? a->dimSizeRDI[0] : a->dimSizeRDI[1];
int am = transposedA == X_TRANS ? a->dimSizeRDI[1] : a->dimSizeRDI[0];
int bn = transposedB == X_TRANS ? b->dimSizeRDI[0] : b->dimSizeRDI[1];
int bm = transposedB == X_TRANS ? b->dimSizeRDI[1] : b->dimSizeRDI[0];
int cn = c->dimSizeRDI[1];
int cm = c->dimSizeRDI[0];
CheckNTErrors((am == bn && an == cn && bm == cm),
"Unmatched tensors in multiplication!");
......@@ -87,13 +86,13 @@ void MatrixMul(XTensor * a, MATRIX_TRANS_TYPE transposedA,
int cBlockNum = 1;
for (int i = 2; i < a->order; i++) {
CheckNTErrors((a->dimSizeRDI[i] == c->dimSizeRDI[i]), "Incorrect tensor sizes!");
CheckNTErrors((a->dimSizeRDI[i] == c->dimSizeRDI[i - 2 + b->order]), "Incorrect tensor sizes!");
aBlockNum *= a->dimSizeRDI[i];
cBlockNum *= a->dimSizeRDI[i];
}
for (int i = 2; i < b->order; i++) {
CheckNTErrors((b->dimSizeRDI[i] == c->dimSizeRDI[i - 2 + a->order]), "Incorrect tensor sizes!");
CheckNTErrors((b->dimSizeRDI[i] == c->dimSizeRDI[i]), "Incorrect tensor sizes!");
bBlockNum *= b->dimSizeRDI[i];
cBlockNum *= b->dimSizeRDI[i];
}
......@@ -101,9 +100,9 @@ void MatrixMul(XTensor * a, MATRIX_TRANS_TYPE transposedA,
XList * aList = new XList(10);
XList * bList = new XList(10);
XList * cList = new XList(10);
int aDimSize[2] = { -a->dimSize[0], a->dimSize[1] };
int bDimSize[2] = { -b->dimSize[0], b->dimSize[1] };
int cDimSize[2] = { -c->dimSize[0], c->dimSize[1] };
int aDimSize[2] = { a->dimSizeRDI[1], a->dimSizeRDI[0] };
int bDimSize[2] = { b->dimSizeRDI[1], b->dimSizeRDI[0] };
int cDimSize[2] = { c->dimSizeRDI[1], c->dimSizeRDI[0] };
bool isSparseMul = false;
......
......@@ -22,7 +22,7 @@
#ifndef __MATRIXMUL_H__
#define __MATRIXMUL_H__
#include "../XTensor.h"
#include "../../XTensor.h"
namespace nts { // namespace nts(NiuTrans.Tensor)
......@@ -39,7 +39,7 @@ normal matrix multiplication if A = y * z and B = x * y.
*/
extern "C"
void MatrixMul(XTensor * a, MATRIX_TRANS_TYPE transposedA, XTensor * b, MATRIX_TRANS_TYPE transposedB, XTensor * c,
DTYPE alpha = (DTYPE)1.0, DTYPE beta = 0, XPRunner * parallelRunner = NULL);
DTYPE alpha = (DTYPE)1.0, DTYPE beta = 0, XPRunner * parallelRunner = NULL);
} // namespace nts(NiuTrans.Tensor)
......
......@@ -19,8 +19,8 @@
* $Created by: XIAO Tong (email: xiaotong@mail.neu.edu.cn) 2018-04-24
*/
#include "../XTensor.h"
#include "../XName.h"
#include "../../XTensor.h"
#include "../../XName.h"
#include "MatrixMul2D.h"
#include "MatrixMul2D.cuh"
#include "MatrixMul2DParallel.h"
......@@ -112,7 +112,7 @@ void MatrixMul2D(XTensor * a, MATRIX_TRANS_TYPE transposedA,
int num = *((int*)b->data);
char * p = (char*)b->data + sizeof(int); // pointer to the first tuple
/* a * b */
/* a * b */
if (transposedA == X_NOTRANS && transposedB == X_NOTRANS) {
for (int i = 0; i < num; i++) {
int key = *((int*)p);
......
......@@ -19,8 +19,8 @@
* $Created by: XIAO Tong (email: xiaotong@mail.neu.edu.cn) 2018-04-24
*/
#include "../XDevice.h"
#include "../XTensor.h"
#include "../../XDevice.h"
#include "../../XTensor.h"
#include "MatrixMul2D.h"
#include "MatrixMul2D.cuh"
#include "XTensorBLAS.h"
......@@ -37,11 +37,13 @@ c = a * b * \alpha
>> aColSize - column size of matrix a
>> aRowSize - row size of matrix a
>> b - a sparse matrix
>> transposedA - indicates whether b is transposed
>> transposedB - indicates whether b is transposed
>> bNonZeroNum - number of non-zero items in b
>> bColSize - column size of matrix b
>> bRowSize - row size of matrix b
>> c - the resulting (dense) matrix
>> cColSize - column size of matrix c
>> cRowSize - row size of matrix c
>> alpha - the scaling factor
*/
extern "C" __global__
......@@ -147,7 +149,6 @@ void CudaMatrixMul2D(XTensor * a, MATRIX_TRANS_TYPE transposedA,
if (!a->isSparse && !b->isSparse) {
CheckNTErrors((!c->isSparse), "Illegal use of sparse matrix in multiplication!");
//cublasHandle_t * handle = GDevs->GetCudaHandle(a->devID);
cublasHandle_t * handle = a->mem == NULL ? GDevs.GetCudaHandle(a->devID) : a->mem->GetCublasHandle();
/* !!!! might have problems */
......@@ -183,7 +184,6 @@ void CudaMatrixMul2D(XTensor * a, MATRIX_TRANS_TYPE transposedA,
if (beta == 0)
c->SetZeroAll();
else if (beta != 1.0F) {
//XTensor::ScaleAndShift(c, beta, 0);
ShowNTErrors("TODO!");
}
......
......@@ -22,7 +22,7 @@
#ifndef __MATRIXMUL2D_H__
#define __MATRIXMUL2D_H__
#include "../XTensor.h"
#include "../../XTensor.h"
namespace nts { // namespace nts(NiuTrans.Tensor)
......
......@@ -19,7 +19,7 @@
* $Created by: XIAO Tong (email: xiaotong@mail.neu.edu.cn) 2018-04-24
*/
#include "../XTensor.h"
#include "../../XTensor.h"
#include "MatrixMul2DMultiTheading.h"
namespace nts { // namespace nts(NiuTrans.Tensor)
......
......@@ -22,7 +22,7 @@
#ifndef __MATRIXMUL2DMULTITHEADING_H__
#define __MATRIXMUL2DMULTITHEADING_H__
#include "../XTensor.h"
#include "../../XTensor.h"
namespace nts { // namespace nts(NiuTrans.Tensor)
......
......@@ -19,10 +19,10 @@
* $Created by: XIAO Tong (email: xiaotong@mail.neu.edu.cn) 2018-04-24
*/
#include "../XTensor.h"
#include "../../XTensor.h"
#include "MatrixMul2DParallel.h"
#include "MatrixMul2DMultiTheading.h"
#include "XMatrixSegment.h"
#include "../utilities/XMatrixSegment.h"
namespace nts { // namespace nts(NiuTrans.Tensor)
......
......@@ -22,7 +22,7 @@
#ifndef __MATRIXMUL2DPARALLEL_H__
#define __MATRIXMUL2DPARALLEL_H__
#include "../XTensor.h"
#include "../../XTensor.h"
namespace nts { // namespace nts(NiuTrans.Tensor)
......
......@@ -19,9 +19,9 @@
* $Created by: XIAO Tong (email: xiaotong@mail.neu.edu.cn) 2018-04-24
*/
#include "../XTensor.h"
#include "../XDevice.h"
#include "../XName.h"
#include "../../XTensor.h"
#include "../../XDevice.h"
#include "../../XName.h"
#include "MatrixMulBatched.h"
#include "MatrixMULBatchedCPU.h"
#include "XTensorBLAS.h"
......@@ -41,6 +41,7 @@ where trans() returns the transposed matrix if the flag is fired
>> c - where we keep a*b
>> alpha - a coefficient
>> beta - another coefficient
>> parallelRunner - parallel processing module
*/
void MatrixMulBatched(XTensor * a, MATRIX_TRANS_TYPE transposedA,
XTensor * b, MATRIX_TRANS_TYPE transposedB,
......@@ -59,13 +60,12 @@ void MatrixMulBatched(XTensor * a, MATRIX_TRANS_TYPE transposedA,
XLink::AddParamToHeadInt(c, transposedB);
XLink::AddParamToHead(c, alpha);
XLink::AddParamToHead(c, beta);
int an = transposedA == X_TRANS ? a->dimSize[1] : a->dimSize[0];
int am = transposedA == X_TRANS ? a->dimSize[0] : a->dimSize[1];
int bn = transposedB == X_TRANS ? b->dimSize[1] : b->dimSize[0];
int bm = transposedB == X_TRANS ? b->dimSize[0] : b->dimSize[1];
int cn = c->dimSize[0];
int cm = c->dimSize[1];
int an = transposedA == X_TRANS ? a->dimSizeRDI[0] : a->dimSizeRDI[1];
int am = transposedA == X_TRANS ? a->dimSizeRDI[1] : a->dimSizeRDI[0];
int bn = transposedB == X_TRANS ? b->dimSizeRDI[0] : b->dimSizeRDI[1];
int bm = transposedB == X_TRANS ? b->dimSizeRDI[1] : b->dimSizeRDI[0];
int cn = c->dimSizeRDI[1];
int cm = c->dimSizeRDI[0];
CheckNTErrors((am == bn && an == cn && bm == cm),
"Unmatched tensors in multiplication!");
......@@ -87,9 +87,9 @@ void MatrixMulBatched(XTensor * a, MATRIX_TRANS_TYPE transposedA,
XList * aList = new XList(10);
XList * bList = new XList(10);
XList * cList = new XList(10);
int aDimSize[2] = { -a->dimSizeRDI[0], a->dimSizeRDI[1] };
int bDimSize[2] = { -b->dimSizeRDI[0], b->dimSizeRDI[1] };
int cDimSize[2] = { -c->dimSizeRDI[0], c->dimSizeRDI[1] };
int aDimSize[2] = { -a->dimSizeRDI[1], a->dimSizeRDI[0] };
int bDimSize[2] = { -b->dimSizeRDI[1], b->dimSizeRDI[0] };
int cDimSize[2] = { -c->dimSizeRDI[1], c->dimSizeRDI[0] };
for (int p = 0; p < blockNum; p++) {
void * ap = (char*)a->data + aRealBlockSize * p;
......@@ -114,8 +114,9 @@ void MatrixMulBatched(XTensor * a, MATRIX_TRANS_TYPE transposedA,
int devIDBackup;
ProtectCudaDev(a->devID, devIDBackup);
CudaBLASMatrixMULList(a->mem != NULL ? a->mem->GetCublasHandle() : GDevs.GetCudaHandle(a->devID),
aList, transposedA,
cublasHandle_t * handle = a->mem != NULL ? a->mem->GetCublasHandle() : GDevs.GetCudaHandle(a->devID);
CudaBLASMatrixMULList(handle,
aList, transposedA,
bList, transposedB,
cList, aList->count,
alpha, beta);
......
......@@ -22,7 +22,7 @@
#ifndef __MATRIXMULBATCHED_H__
#define __MATRIXMULBATCHED_H__
#include "../XTensor.h"
#include "../../XTensor.h"
namespace nts { // namespace nts(NiuTrans.Tensor)
......
......@@ -19,12 +19,13 @@
* $Created by: XIAO Tong (email: xiaotong@mail.neu.edu.cn) 2018-04-24
*/
#include "../XTensor.h"
#include "../XName.h"
#include "../../XTensor.h"
#include "../../XName.h"
#include "Multiply.h"
#include "Multiply.cuh"
namespace nts { // namespace nts(NiuTrans.Tensor)
/*
element-wise product of two tensors
c(i) = a(i)*b(i) + \alpha * c(i)
......
......@@ -19,8 +19,8 @@
* $Created by: XIAO Tong (email: xiaotong@mail.neu.edu.cn) 2018-04-24
*/
#include "../XDevice.h"
#include "../XTensor.h"
#include "../../XDevice.h"
#include "../../XTensor.h"
#include "Multiply.h"
#include "Multiply.cuh"
......@@ -68,6 +68,7 @@ where |a_lead| means the size of the leading dimension of a
>> a - tensor a
>> b - tensor b
>> c - result tensor
>> alpha - the coefficient
>> stride - the number of items we go over when move next along the leading dimension in a block
>> ldSizeA - size of the leading dimension of a
>> ldSizeB - size of the leading dimension of b
......
......@@ -22,7 +22,7 @@
#ifndef __MULTIPLY_H__
#define __MULTIPLY_H__
#include "../XTensor.h"
#include "../../XTensor.h"
namespace nts { // namespace nts(NiuTrans.Tensor)
......
......@@ -19,15 +19,15 @@
* $Created by: XIAO Tong (email: xiaotong@mail.neu.edu.cn) 2018-04-24
*/
#include "../XTensor.h"
#include "../../XTensor.h"
#include "Negate.h"
#include "Negate.cuh"
namespace nts { // namespace nts(NiuTrans.Tensor)
/*
set every entry to its minus value
>> a - the tensor we are processing
set every entry to its minus value
>> a - the tensor we are processing
*/
void Negate(XTensor * a)
{
......
......@@ -19,8 +19,8 @@
* $Created by: XIAO Tong (email: xiaotong@mail.neu.edu.cn) 2018-04-24
*/
#include "../XDevice.h"
#include "../XTensor.h"
#include "../../XDevice.h"
#include "../../XTensor.h"
#include "Negate.h"
#include "Negate.cuh"
......@@ -42,10 +42,10 @@ void KernelNegate(DTYPE * d, int size)
}
/*
set each entry to its negtive value (CUDA Kernel)
This is for float16 computation
>> d - pointer to the data array
>> size - size of the data array
set each entry to its negtive value (CUDA Kernel)
This is for float16 computation
>> d - pointer to the data array
>> size - size of the data array
*/
__global__
void KernelNegate(__half * d, int size)
......
......@@ -22,7 +22,7 @@
#ifndef __NEGATE_H__
#define __NEGATE_H__
#include "../XTensor.h"
#include "../../XTensor.h"
namespace nts { // namespace nts(NiuTrans.Tensor)
......
......@@ -19,8 +19,8 @@
* $Created by: XIAO Tong (email: xiaotong@mail.neu.edu.cn) 2018-04-24
*/
#include "../XTensor.h"
#include "../XName.h"
#include "../../XTensor.h"
#include "../../XName.h"
#include "Sum.h"
#include "Sum.cuh"
......
......@@ -19,12 +19,13 @@
* $Created by: XIAO Tong (email: xiaotong@mail.neu.edu.cn) 2018-04-24
*/
#include "../XDevice.h"
#include "../../XDevice.h"
#include "Sum.cuh"
namespace nts { // namespace nts(NiuTrans.Tensor)
#ifdef USE_CUDA
/*
summation of data arrays (CUDA Kernel)
c = a + b * \beta
......
......@@ -28,7 +28,7 @@ namespace nts { // namespace nts(NiuTrans.Tensor)
#ifdef USE_CUDA
/* summation of data arrays (CUDA Kernel) */
/* summation of data arrays (CUDA Kernel) */
extern "C" __global__
void KernelADD(DTYPE * a, DTYPE * b, DTYPE * c, int size, DTYPE beta = (DTYPE)1.0);
......
......@@ -22,7 +22,7 @@
#ifndef __SUM_H__
#define __SUM_H__
#include "../XTensor.h"
#include "../../XTensor.h"
namespace nts { // namespace nts(NiuTrans.Tensor)
......
......@@ -19,7 +19,7 @@
* $Created by: XIAO Tong (email: xiaotong@mail.neu.edu.cn) 2018-04-24
*/
#include "../XTensor.h"
#include "../../XTensor.h"
#include "SumByColumnTV.h"
#include "SumByColumnTV.cuh"
......
......@@ -19,8 +19,8 @@
* $Created by: XIAO Tong (email: xiaotong@mail.neu.edu.cn) 2018-04-24
*/
#include "../XDevice.h"
#include "../XTensor.h"
#include "../../XDevice.h"
#include "../../XTensor.h"
#include "SumByColumnTV.h"
#include "SumByColumnTV.cuh"
......
......@@ -22,7 +22,7 @@
#ifndef __REDUCEMAX_CUH__
#define __REDUCEMAX_CUH__
#include "ReduceMax.h"
#include "../reduce/ReduceMax.h"
namespace nts { // namespace nts(NiuTrans.Tensor)
......
......@@ -22,7 +22,7 @@
#ifndef __SUMBYCOLUMNTV_H__
#define __SUMBYCOLUMNTV_H__
#include "../XTensor.h"
#include "../../XTensor.h"
namespace nts { // namespace nts(NiuTrans.Tensor)
......
......@@ -19,7 +19,7 @@
* $Created by: XIAO Tong (email: xiaotong@mail.neu.edu.cn) 2018-04-24
*/
#include "../XTensor.h"
#include "../../XTensor.h"
#include "SumByColumnVT.h"
#include "SumByColumnVT.cuh"
......
......@@ -19,14 +19,15 @@
* $Created by: XIAO Tong (email: xiaotong@mail.neu.edu.cn) 2018-04-24
*/
#include "../XDevice.h"
#include "../XTensor.h"
#include "../../XDevice.h"
#include "../../XTensor.h"
#include "SumByColumnVT.h"
#include "SumByColumnVT.cuh"
namespace nts { // namespace nts(NiuTrans.Tensor)
#ifdef USE_CUDA
/*
summation of a vector (column vector) and a tensor
c = a + \sum{col} b_col * \beta
......
......@@ -22,11 +22,10 @@
#ifndef __SUMBYCOLUMNVT_H__
#define __SUMBYCOLUMNVT_H__
#include "../XTensor.h"
#include "../../XTensor.h"
namespace nts { // namespace nts(NiuTrans.Tensor)
/* sum of a (column) vector and a tensor */
extern "C"
void SumByColumnVT(XTensor * a, XTensor * b, XTensor * c = NULL, DTYPE beta = (DTYPE)1.0);
......
......@@ -20,8 +20,8 @@
*/
#include "XTensorBLAS.h"
#include "../XTensor.h"
#include "../XBLAS.h"
#include "../../XTensor.h"
#include "../../XBLAS.h"
namespace nts { // namespace nts(NiuTrans.Tensor)
......
......@@ -19,9 +19,9 @@
* $Created by: XIAO Tong (email: xiaotong@mail.neu.edu.cn) 2018-04-24
*/
#include "../XUtility.h"
#include "../XDevice.h"
#include "../XTensor.h"
#include "../../XUtility.h"
#include "../../XDevice.h"
#include "../../XTensor.h"
#include "XTensorBLAS.h"
namespace nts { // namespace nts(NiuTrans.Tensor)
......
......@@ -22,7 +22,7 @@
#ifndef __XTENSORBLAS_H__
#define __XTENSORBLAS_H__
#include "../XTensor.h"
#include "../../XTensor.h"
namespace nts { // namespace nts(NiuTrans.Tensor)
......
......@@ -19,8 +19,8 @@
* $Created by: LI Yinqiao (li.yin.qiao.2012@hotmail.com) 2018-06-14
*/
#include "../XTensor.h"
#include "../XDevice.h"
#include "../../XTensor.h"
#include "../../XDevice.h"
namespace nts { // namespace nts(NiuTrans.Tensor)
......
......@@ -19,8 +19,8 @@
* $Created by: XIAO Tong (email: xiaotong@mail.neu.edu.cn) 2018-07-04
*/
#include "../XUtility.h"
#include "../XName.h"
#include "../../XUtility.h"
#include "../../XName.h"
#include "Select.h"
namespace nts{ // namespace nts(NiuTrans.Tensor)
......@@ -33,7 +33,7 @@ c = select(a)
>> dim - the dimension along with which we do the job
>> low - lower bound
>> high - higher bound.
Note that range [1,3] means that we select 1 and 2.
Note that range [1,3] means that we select 1 and 2.
*/
void SelectRange(XTensor * a, XTensor * c, int dim, int low, int high)
{
......@@ -48,7 +48,7 @@ void SelectRange(XTensor * a, XTensor * c, int dim, int low, int high)
for(int i = 0; i < a->order; i++){
if(i == dim){
CheckNTErrors(low > 0 && low < a->dimSize[dim], "Illegal range specified!");
CheckNTErrors(high > 0 && high < a->dimSize[dim], "Illegal range specified!");
CheckNTErrors(high > 0 && high <= a->dimSize[dim], "Illegal range specified!");
}
else{
CheckNTErrors(a->dimSize[i] == c->dimSize[i], "The size of the dimensions should be same!");
......@@ -62,20 +62,24 @@ void SelectRange(XTensor * a, XTensor * c, int dim, int low, int high)
XLink::AddParamToHeadInt(c, high);
int stride = 1;
for(int i = 0; i < dim; i++)
int dimRDI = a->order - dim - 1;
for(int i = 0; i < dimRDI; i++)
stride *= a->dimSizeRDI[i];
int copyTimes = 1;
for (int i = dimRDI + 1; i < a->order; i++)
copyTimes *= a->dimSizeRDI[i];
int blockSize = stride * (high - low) * a->unitSize;
int stepSizeS = stride * a->dimSize[dim] * a->unitSize;
int stepSizeT = stride * c->dimSize[dim] * a->unitSize;
char * s = (char*)a->data + stride * low * a->unitSize;
char * t = (char*)c->data;
for(int i = 0; i < high - low; i++){
for(int i = 0; i < copyTimes; i++){
XMemCopy(t, c->devID, s, a->devID, blockSize);
s += stepSizeS;
t += stepSizeT;
}
}
} // namespace nts(NiuTrans.Tensor)
......@@ -22,7 +22,7 @@
#ifndef __SELECT_H__
#define __SELECT_H__
#include "../XTensor.h"
#include "../../XTensor.h"
namespace nts{ // namespace nts(NiuTrans.Tensor)
......
......@@ -21,7 +21,7 @@
*/
#include "SetData.h"
#include "CopyValues.h"
#include "../movement/CopyValues.h"
#if !defined( WIN32 ) && !defined( _WIN32 )
#include "sys/time.h"
......@@ -68,10 +68,11 @@ void SetDataRand(XTensor * tensor, DTYPE low, DTYPE high)
ShowNTErrors("TODO");
}
}
/* GPU code
The trick here is that initialize the data on a temperary tensor on CPU.
The CPU data is then copied to GPU.
TODO: generate data points on GPUs straightforwardly.
/*
GPU code
The trick here is that initialize the data on a temperary tensor on CPU.
The CPU data is then copied to GPU.
TODO: generate data points on GPUs straightforwardly.
*/
else{
XTensor * t2 = NewTensor(tensor->order, tensor->dimSize, tensor->dataType, tensor->denseRatio, -1);
......
......@@ -23,7 +23,7 @@
#ifndef __SETDATA_H__
#define __SETDATA_H__
#include "../XTensor.h"
#include "../../XTensor.h"
namespace nts { // namespace nts(NiuTrans.Tensor)
......
......@@ -20,11 +20,12 @@
*/
#include <math.h>
#include "../XTensor.h"
#include "../../XTensor.h"
#include "Normalize.h"
#include "Normalize.cuh"
namespace nts { // namespace nts(NiuTrans.Tensor)
/*
normalized the data with normal distribution. For an input x,
y = a * (x-mean)/sqrt(variance+\epsilon) + b
......
......@@ -19,12 +19,13 @@
* $Created by: XIAO Tong (email: xiaotong@mail.neu.edu.cn) 2018-04-24
*/
#include "../XDevice.h"
#include "../XTensor.h"
#include "../../XDevice.h"
#include "../../XTensor.h"
#include "Normalize.h"
#include "Normalize.cuh"
namespace nts { // namespace nts(NiuTrans.Tensor)
#ifdef USE_CUDA
/*
normalized the data with normal distribution (kernel code). For an input x,
......
......@@ -28,7 +28,8 @@ namespace nts { // namespace nts(NiuTrans.Tensor)
#ifdef USE_CUDA
/* normalized the data with normal distribution (Kernel code). For an input x,
/*
normalized the data with normal distribution (Kernel code). For an input x,
y = a * (x-mean)/sqrt(variance+\epsilon) + b
where a and b are the scalar and bias respectively, and \epsilon is the adjustment parameter
*/
......@@ -37,7 +38,8 @@ void KernelNormalize(DTYPE * input, DTYPE * output, DTYPE * mean, DTYPE * var,
DTYPE * a, DTYPE * b, DTYPE epsilon,
int stride, int strideNum, int blockNum);
/* normalized the data with normal distribution. For an input x,
/*
normalized the data with normal distribution. For an input x,
y = a * (x-mean)/sqrt(variance+\epsilon) + b
where a and b are the scalar and bias respectively, and \epsilon is the adjustment parameter
*/
......
......@@ -22,7 +22,7 @@
#ifndef __NORMALIZE_H__
#define __NORMALIZE_H__
#include "../XTensor.h"
#include "../../XTensor.h"
namespace nts { // namespace nts(NiuTrans.Tensor)
......
......@@ -20,15 +20,16 @@
*/
#include <math.h>
#include "../XTensor.h"
#include "../../XTensor.h"
#include "Power.h"
#include "Power.cuh"
namespace nts { // namespace nts(NiuTrans.Tensor)
/*
get the power(a, p)
>> a - the tensor
>> power - as it is
>> p - as it is
*/
void Power(XTensor * a, DTYPE p)
{
......
......@@ -19,8 +19,8 @@
* $Created by: XIAO Tong (email: xiaotong@mail.neu.edu.cn) 2018-04-24
*/
#include "../XDevice.h"
#include "../XTensor.h"
#include "../../XDevice.h"
#include "../../XTensor.h"
#include "Power.h"
#include "Power.cuh"
......@@ -87,9 +87,6 @@ __global__
void KernelPower(__half * d, __half p, int size)
{
#if __CUDA_ARCH__ >= 530 || !defined(__CUDA_ARCH__)
//int i = blockDim.x * blockIdx.x + threadIdx.x;
//if (i < size)
// d[i] = hpow(d[i], p);
#else
int i = blockDim.x * blockIdx.x + threadIdx.x;
if (i < size)
......@@ -126,9 +123,6 @@ void CudaPower(XTensor * a, DTYPE p)
}
else if (p != (DTYPE)1.0) {
ShowNTErrors("TODO!");
//unsigned short p2 = FloatToFloat16(p);
//__half * pp = (__half*)&p2;
//KernelPower<<<blocks, threads>>>((__half*)a->data, *pp, a->unitNum);
}
}
else {
......
......@@ -22,7 +22,7 @@
#ifndef __POWER_H__
#define __POWER_H__
#include "../XTensor.h"
#include "../../XTensor.h"
namespace nts { // namespace nts(NiuTrans.Tensor)
......
......@@ -26,9 +26,7 @@ namespace nts{ // namespace nts(NiuTrans.Tensor)
/*
scale and shift all tensor entires
p = p * scale + shift
>> a - the tensor
>> scale - the scaler factor
>> shift - the shift factor
......
......@@ -21,7 +21,7 @@
#include "ScaleAndShift.h"
#include "ScaleAndShift.cuh"
#include "../XDevice.h"
#include "../../XDevice.h"
namespace nts{ // namespace nts(NiuTrans.Tensor)
......@@ -80,9 +80,7 @@ void KernelScaleAndShift(__half * d, int size, __half scale, __half shift)
/*
scale and shift all matrix entires
p = p * scale + shift
>> a - the tensor
>> scale - the scaler factor
>> shift - the shift factor
......
......@@ -22,7 +22,7 @@
#ifndef __SCALEANDSHIFT_CUH__
#define __SCALEANDSHIFT_CUH__
#include "../XTensor.h"
#include "../../XTensor.h"
namespace nts{ // namespace nts(NiuTrans.Tensor)
......
......@@ -22,7 +22,7 @@
#ifndef __SCALEANDSHIFT_H__
#define __SCALEANDSHIFT_H__
#include "../XTensor.h"
#include "../../XTensor.h"
namespace nts{ // namespace nts(NiuTrans.Tensor)
......
......@@ -19,8 +19,8 @@
* $Created by: XIAO Tong (email: xiaotong@mail.neu.edu.cn) 2018-04-24
*/
#include "../XTensor.h"
#include "../XUtility.h"
#include "../../XTensor.h"
#include "../../XUtility.h"
#include "CopyBlocks.h"
#include "CopyBlocksOnSite.h"
#include "CopyBlocksSelected.cuh"
......@@ -78,9 +78,11 @@ void CopyBlocks(void * source, int blockSize, int * sourceBlocks, int blockNum,
else {
int devID = myMem != NULL ? myMem->devID : -1;
/* The following code should be fine with GPUs, but too many
/*
The following code should be fine with GPUs, but too many
kernel calls would slow down the system. We prefer to use
one kernel to do block copy in batch (kernel fusion). */
one kernel to do block copy in batch (kernel fusion).
*/
for (int i = 0; i < blockNum; i++) {
XMemCopy((char*)target + targetBlocks[i] * blockSize, devID,
(char*)source + sourceBlocks[i] * blockSize, devID, blockSize);
......
......@@ -22,7 +22,7 @@
#ifndef __COPYBLOCKS_H__
#define __COPYBLOCKS_H__
#include "../XTensor.h"
#include "../../XTensor.h"
namespace nts { // namespace nts(NiuTrans.Tensor)
......
......@@ -19,9 +19,9 @@
* $Created by: XIAO Tong (email: xiaotong@mail.neu.edu.cn) 2018-04-24
*/
#include "../XTensor.h"
#include "../../XTensor.h"
#include "CopyBlocksInGrid.h"
#include "../XUtility.h"
#include "../../XUtility.h"
#include "CopyBlocksInGrid.cuh"
namespace nts { // namespace nts(NiuTrans.Tensor)
......
......@@ -21,7 +21,7 @@
#include "CopyBlocksInGrid.h"
#include "CopyBlocksInGrid.cuh"
#include "../XDevice.h"
#include "../../XDevice.h"
namespace nts { // namespace nts(NiuTrans.Tensor)
......
......@@ -22,7 +22,7 @@
#ifndef __COPYBLOCKSINGRID_CUH__
#define __COPYBLOCKSINGRID_CUH__
#include "../XTensor.h"
#include "../../XTensor.h"
namespace nts { // namespace nts(NiuTrans.Tensor)
......
......@@ -22,7 +22,7 @@
#ifndef __COPYBLOCKSINGRID_H__
#define __COPYBLOCKSINGRID_H__
#include "../XTensor.h"
#include "../../XTensor.h"
namespace nts { // namespace nts(NiuTrans.Tensor)
......
......@@ -19,12 +19,13 @@
* $Created by: XIAO Tong (email: xiaotong@mail.neu.edu.cn) 2018-04-24
*/
#include "../XTensor.h"
#include "../XUtility.h"
#include "../../XTensor.h"
#include "../../XUtility.h"
#include "CopyBlocksOnSite.h"
#include "CopyBlocksOnSite.cuh"
namespace nts { // namespace nts(NiuTrans.Tensor)
/*
copy a number of blocks to target positions. Here we assume that
all the data has been on the device (CPU/GPU) already.
......@@ -47,9 +48,11 @@ void CopyBlocksOnSite(void * source, int blockSize, int blockNum, void * target,
else {
int devID = myMem != NULL ? myMem->devID : -1;
/* The following code should be fine with GPUs, but too many
/*
The following code should be fine with GPUs, but too many
kernel calls would slow down the system. We prefer to use
one kernel to do block copy in batch (kernel fusion). */
one kernel to do block copy in batch (kernel fusion).
*/
for (int i = 0, b = 0; i < blockNum; i++, b += blockSize) {
XMemCopy((char*)target + targetBlocks[i] * blockSize, devID,
(char*)source + b, devID, blockSize);
......
......@@ -21,7 +21,7 @@
#include "CopyBlocksOnSite.h"
#include "CopyBlocksOnSite.cuh"
#include "../XDevice.h"
#include "../../XDevice.h"
namespace nts { // namespace nts(NiuTrans.Tensor)
......
......@@ -22,7 +22,7 @@
#ifndef __COPYBLOCKS_CUH__
#define __COPYBLOCKS_CUH__
#include "../XTensor.h"
#include "../../XTensor.h"
namespace nts { // namespace nts(NiuTrans.Tensor)
......
......@@ -22,7 +22,7 @@
#ifndef __COPYBLOCKSONSITE_H__
#define __COPYBLOCKSONSITE_H__
#include "../XTensor.h"
#include "../../XTensor.h"
namespace nts { // namespace nts(NiuTrans.Tensor)
......
......@@ -21,8 +21,8 @@
#include "CopyBlocks.h"
#include "CopyBlocksSelected.cuh"
#include "../XUtility.h"
#include "../XDevice.h"
#include "../../XUtility.h"
#include "../../XDevice.h"
namespace nts { // namespace nts(NiuTrans.Tensor)
......
......@@ -22,7 +22,7 @@
#ifndef __COPYBLOCKSSELECTED_CUH__
#define __COPYBLOCKSSELECTED_CUH__
#include "../XTensor.h"
#include "../../XTensor.h"
namespace nts { // namespace nts(NiuTrans.Tensor)
......
......@@ -19,9 +19,9 @@
* $Created by: XIAO Tong (email: xiaotong@mail.neu.edu.cn) 2018-04-24
*/
#include "../XTensor.h"
#include "../../XTensor.h"
#include "CopyData2D.h"
#include "../XUtility.h"
#include "../../XUtility.h"
namespace nts { // namespace nts(NiuTrans.Tensor)
......
......@@ -22,7 +22,7 @@
#ifndef __COPYDATA2D_H__
#define __COPYDATA2D_H__
#include "../XTensor.h"
#include "../../XTensor.h"
namespace nts { // namespace nts(NiuTrans.Tensor)
......
......@@ -19,7 +19,7 @@
* $Created by: XIAO Tong (email: xiaotong@mail.neu.edu.cn) 2018-04-24
*/
#include "../XTensor.h"
#include "../../XTensor.h"
#include "CopyInGrid.h"
#include "CopyBlocksInGrid.h"
......@@ -34,7 +34,7 @@ i.e., reorder the data blocks in the same memory piece
in the k-th grid
>> blockDim - leading dimension of blocks
>> blockNumInGrid - number of blocks in each grid
>> isOnDev - indicates whether the index is on the device already
>> isIndexOnDev - indicates whether the index is on the device already
*/
void CopyInGrid(XTensor * s, XTensor * t, int * index, int blockDim, int blockNumInGrid, bool isIndexOnDev)
{
......
......@@ -22,7 +22,7 @@
#ifndef __COPYINGRID_H__
#define __COPYINGRID_H__
#include "../XTensor.h"
#include "../../XTensor.h"
namespace nts { // namespace nts(NiuTrans.Tensor)
......
......@@ -36,6 +36,7 @@ copy indexed sub-tensors
>> tgtIndex - index of the target sub-tensors
>> copyNum - number of the sub-tensors we copy for each source index, e.g.,
for srcIndex = [1,4] and copyNum = 2, we actually copy the source sub-tensors 1, 2, 4, 5
<< return - whether copy indexed operation was successful
*/
bool CopyIndexed(XTensor * s, XTensor * t, int dim, int * srcIndex, int indexSize, int * tgtIndex, int copyNum)
{
......
......@@ -22,7 +22,7 @@
#ifndef __COPYINDEXED_H__
#define __COPYINDEXED_H__
#include "../XTensor.h"
#include "../../XTensor.h"
namespace nts { // namespace nts(NiuTrans.Tensor)
......
......@@ -19,7 +19,7 @@
* $Created by: XIAO Tong (email: xiaotong@mail.neu.edu.cn) 2018-04-24
*/
#include "../XName.h"
#include "../../XName.h"
#include "CopyValues.h"
#include "CopyValues.cuh"
......
......@@ -21,8 +21,8 @@
#include "CopyValues.h"
#include "CopyValues.cuh"
#include "../XUtility.h"
#include "../XDevice.h"
#include "../../XUtility.h"
#include "../../XDevice.h"
namespace nts { // namespace nts(NiuTrans.Tensor)
......
......@@ -22,13 +22,12 @@
#ifndef __COPYVALUES_CUH__
#define __COPYVALUES_CUH__
#include "../XTensor.h"
#include "../../XTensor.h"
namespace nts { // namespace nts(NiuTrans.Tensor)
#ifdef USE_CUDA
/**************************************/
/* copy all elements from a source matrix to a target matrix */
extern "C"
bool CudaCopyValues(XTensor * s, XTensor * t, XStream * stream = NULL);
......
......@@ -22,7 +22,7 @@
#ifndef __COPYVALUES_H__
#define __COPYVALUES_H__
#include "../XTensor.h"
#include "../../XTensor.h"
namespace nts { // namespace nts(NiuTrans.Tensor)
......
......@@ -19,8 +19,8 @@
* $Created by: XIAO Tong (email: xiaotong@mail.neu.edu.cn) 2018-04-24
*/
#include "../XTensor.h"
#include "../XName.h"
#include "../../XTensor.h"
#include "../../XName.h"
#include "ReduceMax.h"
#include "ReduceMax.cuh"
......
......@@ -19,9 +19,9 @@
* $Created by: XIAO Tong (email: xiaotong@mail.neu.edu.cn) 2018-04-24
*/
#include "../XDevice.h"
#include "../XTensor.h"
#include "../XUtility.h"
#include "../../XDevice.h"
#include "../../XTensor.h"
#include "../../XUtility.h"
#include "ReduceMax.h"
#include "ReduceMax.cuh"
......@@ -31,14 +31,10 @@ namespace nts{ // namespace nts(NiuTrans.Tensor)
/*
reduce a tensor to another that keeps the max value along a dimension - slow version
Given a block of data, we go over each dimension i in the stride and we have
sum_i = max_{0<=j<strideNum} input_{i,j}
where we can view the block as a matrix and input_{i,j} represent the item at the
crossing of the i-th columne and the j-th row.
>> input - the input array (representing a tensor)
>> output - the sum over each block. NOTE: output is also an array
>> stride - stride that we need to move to the next item
......@@ -89,82 +85,77 @@ void KernelReduceMax(DTYPE * input, DTYPE * output,
}
/*
reduce a tensor to another that keeps the max value along a dimension - slow version
Given a block of data, we go over each dimension i in the stride and we have
sum_i = max_{0<=j<strideNum} input_{i,j}
where we can view the block as a matrix and input_{i,j} represent the item at the
crossing of the i-th columne and the j-th row.
>> input - the input array (representing a tensor)
>> output - the sum over each block. NOTE: output is also an array
>> stride - stride that we need to move to the next item
>> strideNum - how many strides we need to finish the reduce
>> reducedStrideNum - the number of strides after reducation
>> blockSize - size of the block (i.e., stride * strideNum)
>> blockNum - how many blocks
*/
__global__
void KernelReduceMax(__half * input, __half * output,
int stride, int strideNum, int reducedStrideNum,
int blockSize, int blockNum)
{
int idx = threadIdx.x * blockDim.y + threadIdx.y;
unsigned int i = blockIdx.x*blockDim.x + threadIdx.x;
unsigned int j = blockIdx.y*blockDim.y + threadIdx.y;
/*
reduce a tensor to another that keeps the max value along a dimension - slow version
Given a block of data, we go over each dimension i in the stride and we have
sum_i = max_{0<=j<strideNum} input_{i,j}
where we can view the block as a matrix and input_{i,j} represent the item at the
crossing of the i-th columne and the j-th row.
>> input - the input array (representing a tensor)
>> output - the sum over each block. NOTE: output is also an array
>> stride - stride that we need to move to the next item
>> strideNum - how many strides we need to finish the reduce
>> reducedStrideNum - the number of strides after reducation
>> blockSize - size of the block (i.e., stride * strideNum)
>> blockNum - how many blocks
*/
__global__
void KernelReduceMax(__half * input, __half * output,
int stride, int strideNum, int reducedStrideNum,
int blockSize, int blockNum)
{
int idx = threadIdx.x * blockDim.y + threadIdx.y;
unsigned int i = blockIdx.x*blockDim.x + threadIdx.x;
unsigned int j = blockIdx.y*blockDim.y + threadIdx.y;
if (i >= stride * blockNum)
return;
if (i >= stride * blockNum)
return;
#if __CUDA_ARCH__ >= 530 || !defined(__CUDA_ARCH__)
__shared__ __half iData[MAX_CUDA_THREAD_NUM_PER_BLOCK * MIN_CUDA_SHARED_MEM_COL_SIZE / 2];
__shared__ __half iData[MAX_CUDA_THREAD_NUM_PER_BLOCK * MIN_CUDA_SHARED_MEM_COL_SIZE / 2];
#else
__shared__ DTYPE iData[MAX_CUDA_THREAD_NUM_PER_BLOCK * MIN_CUDA_SHARED_MEM_COL_SIZE / 2];
__shared__ DTYPE iData[MAX_CUDA_THREAD_NUM_PER_BLOCK * MIN_CUDA_SHARED_MEM_COL_SIZE / 2];
#endif
__syncthreads();
__syncthreads();
int k = i / stride;
int iOffset = i % stride;
int k = i / stride;
int iOffset = i % stride;
#if __CUDA_ARCH__ >= 530 || !defined(__CUDA_ARCH__)
__half value = (i < stride * blockNum && j < strideNum) ?
__half value = (i < stride * blockNum && j < strideNum) ?
input[blockSize * k + stride * j + iOffset] : __half(FLOAT16_MIN);
#else
DTYPE value = (i < stride * blockNum && j < strideNum) ?
__half2float(input[blockSize * k + stride * j + iOffset]) : FLOAT_MIN;
DTYPE value = (i < stride * blockNum && j < strideNum) ?
__half2float(input[blockSize * k + stride * j + iOffset]) : FLOAT_MIN;
#endif
/* load data into the shared mem */
iData[threadIdx.x * blockDim.y + threadIdx.y] = value;
/* load data into the shared mem */
iData[threadIdx.x * blockDim.y + threadIdx.y] = value;
__syncthreads();
__syncthreads();
/* do reduction in shared mem */
for (unsigned int s = blockDim.y / 2; s > 0; s >>= 1) {
if (threadIdx.y < s && iData[idx] < iData[idx + s]) {
iData[idx] = iData[idx + s];
}
/* do reduction in shared mem */
for (unsigned int s = blockDim.y / 2; s > 0; s >>= 1) {
if (threadIdx.y < s && iData[idx] < iData[idx + s]) {
iData[idx] = iData[idx + s];
}
__syncthreads();
}
__syncthreads();
}
#if __CUDA_ARCH__ >= 530 || !defined(__CUDA_ARCH__)
/* write result for this block to the output array */
if (threadIdx.y == 0 && blockIdx.y < reducedStrideNum)
output[(k * reducedStrideNum + blockIdx.y) * stride + iOffset] = iData[threadIdx.x * blockDim.y];
/* write result for this block to the output array */
if (threadIdx.y == 0 && blockIdx.y < reducedStrideNum)
output[(k * reducedStrideNum + blockIdx.y) * stride + iOffset] = iData[threadIdx.x * blockDim.y];
#else
/* write result for this block to the output array */
if (threadIdx.y == 0 && blockIdx.y < reducedStrideNum)
output[(k * reducedStrideNum + blockIdx.y) * stride + iOffset] = __half(iData[threadIdx.x * blockDim.y]);
/* write result for this block to the output array */
if (threadIdx.y == 0 && blockIdx.y < reducedStrideNum)
output[(k * reducedStrideNum + blockIdx.y) * stride + iOffset] = __half(iData[threadIdx.x * blockDim.y]);
#endif
}
/*
reduce a tensor to another that keeps the max value along a dimension - fast version
>> input - the input array (representing a tensor)
......@@ -338,9 +329,7 @@ void KernelReduceMaxSimpleFast(DTYPE * input, DTYPE * output,
/*
get the max-valued items along a dimension of the tensor (cuda version).
For a 1-dimensional data array a,
sum_i = max_{0<=j<strideNum} input_{i,j}
>> input - the input tensor
>> output - the output tensor
>> dim - which dimension to reduce
......
......@@ -22,7 +22,7 @@
#ifndef __REDUCEMAX_H__
#define __REDUCEMAX_H__
#include "../XTensor.h"
#include "../../XTensor.h"
namespace nts{ // namespace nts(NiuTrans.Tensor)
......
......@@ -19,7 +19,7 @@
* $Created by: XIAO Tong (email: xiaotong@mail.neu.edu.cn) 2018-04-24
*/
#include "ScaleAndShift.h"
#include "../math/ScaleAndShift.h"
#include "ReduceSum.h"
#include "ReduceMean.h"
......@@ -28,7 +28,6 @@ namespace nts{ // namespace nts(NiuTrans.Tensor)
/*
get the mean value along a dimension of the tensor. For a 1-dimensional data array a,
mean = (1/n) * sum_i input_i
>> input - the input tensor
>> output - the output tensor
>> dim - the dimension where the reduction is performed on
......@@ -44,5 +43,4 @@ void ReduceMean(XTensor * input, XTensor * output, int dim)
ScaleAndShift(output, (DTYPE)1/num, 0);
}
} // namespace nts(NiuTrans.Tensor)
\ No newline at end of file
......@@ -22,7 +22,7 @@
#ifndef __REDUCEMEAN_H__
#define __REDUCEMEAN_H__
#include "../XTensor.h"
#include "../../XTensor.h"
namespace nts{ // namespace nts(NiuTrans.Tensor)
......
......@@ -22,7 +22,7 @@
#ifndef __REDUCESTANDARDVARIANCE_H__
#define __REDUCESTANDARDVARIANCE_H__
#include "../XTensor.h"
#include "../../XTensor.h"
namespace nts { // namespace nts(NiuTrans.Tensor)
......
......@@ -22,7 +22,7 @@
#include <math.h>
#include "ReduceSum.h"
#include "ReduceSum.cuh"
#include "../XName.h"
#include "../../XName.h"
namespace nts{ // namespace nts(NiuTrans.Tensor)
......
......@@ -19,8 +19,8 @@
* $Created by: XIAO Tong (email: xiaotong@mail.neu.edu.cn) 2018-04-24
*/
#include "../XDevice.h"
#include "../XUtility.h"
#include "../../XDevice.h"
#include "../../XUtility.h"
#include "ReduceSum.cuh"
namespace nts{ // namespace nts(NiuTrans.Tensor)
......@@ -29,13 +29,11 @@ namespace nts{ // namespace nts(NiuTrans.Tensor)
/*
reduce a tensor to another that keeps the sum along a dimension - slow version
Given a block of data, we go over each dimension i in the stride and we have
sum_i = sum_{0<=j<strideNum} exp(input_{i,j} - shift) if isExp == true;
= sum_{0<=j<strideNum} input_{i,j} - shift if isExp == false;
where we can view the block as a matrix and input_{i,j} represent the item at the
crossing of the i-th columne and the j-th row.
>> input - the input array (representing a tensor)
>> output - the sum over each block. NOTE: output is also an array
>> stride - stride that we need to move to the next item
......@@ -107,13 +105,11 @@ void KernelReduceSum(DTYPE * input, DTYPE * output,
/*
reduce a tensor to another that keeps the sum along a dimension - slow version
This is for float16 reduction.
Given a block of data, we go over each dimension i in the stride and we have
sum_i = sum_{0<=j<strideNum} exp(input_{i,j} - shift) if isExp == true;
= sum_{0<=j<strideNum} input_{i,j} - shift if isExp == false;
where we can view the block as a matrix and input_{i,j} represent the item at the
crossing of the i-th columne and the j-th row.
>> input - the input array (representing a tensor)
>> output - the sum over each block. NOTE: output is also an array
>> stride - stride that we need to move to the next item
......@@ -304,7 +300,6 @@ void KernelReduceSumFast(DTYPE * input, DTYPE * output,
/*
reduce a tensor to another that keeps the sum along a dimension - fast version
This is for float16 reduction
>> input - the input array (representing a tensor)
>> output - the sum over each block. NOTE: output is also an array
>> stride - stride that we need to move to the next item
......
......@@ -22,7 +22,7 @@
#ifndef __REDUCESUM_H__
#define __REDUCESUM_H__
#include "../XTensor.h"
#include "../../XTensor.h"
namespace nts{ // namespace nts(NiuTrans.Tensor)
......
......@@ -28,7 +28,6 @@ namespace nts{ // namespace nts(NiuTrans.Tensor)
squared sum of the items along a dimension of the tensor.
For a 1-dimensional data array a,
sum = \sum_i (a_i - shift)^2
>> input - the input tensor
>> output - the output tensor
>> dim - the dimension where the reduction is performed on
......
......@@ -22,7 +22,7 @@
#ifndef __REDUCESUMSQUARED_H__
#define __REDUCESUMSQUARED_H__
#include "../XTensor.h"
#include "../../XTensor.h"
namespace nts{ // namespace nts(NiuTrans.Tensor)
......
......@@ -19,7 +19,7 @@
* $Created by: XIAO Tong (email: xiaotong@mail.neu.edu.cn) 2018-04-24
*/
#include "ScaleAndShift.h"
#include "../math/ScaleAndShift.h"
#include "ReduceSum.h"
#include "ReduceVariance.h"
......@@ -29,7 +29,6 @@ namespace nts{ // namespace nts(NiuTrans.Tensor)
variance of the items along a dimension of the tensor.
For a 1-dimensional data array a,
variance = 1/n * \sum_i (a_i - mean)^2
>> input - the input tensor
>> output - the output tensor
>> dim - the dimension where the reduction is performed on
......
......@@ -22,7 +22,7 @@
#ifndef __REDUCEVARIANCE_H__
#define __REDUCEVARIANCE_H__
#include "../XTensor.h"
#include "../../XTensor.h"
namespace nts{ // namespace nts(NiuTrans.Tensor)
......
......@@ -19,7 +19,7 @@
* $Created by: XIAO Tong (email: xiaotong@mail.neu.edu.cn) 2018-04-24
*/
#include "../XTensor.h"
#include "../../XTensor.h"
#include "Concatenate.h"
#include "Merge.h"
#include "ConcatenateSolely.h"
......@@ -53,6 +53,10 @@ void Concatenate(XList * smalls, XTensor * big, int dim)
/*
concatenate two tensors along a given dimension
>> smallA - one tensor for concatenation
>> smallB - the other tensor for concatenation
>> big - the resulting tensor
>> dim - which dimension we perform the concatenation
*/
void Concatenate(XTensor * smallA, XTensor * smallB, XTensor * big, int dim)
{
......
......@@ -22,14 +22,15 @@
#ifndef __CONCATENATE_H__
#define __CONCATENATE_H__
#include "../XTensor.h"
#include "../../XTensor.h"
namespace nts { // namespace nts(NiuTrans.Tensor)
/*
concatenate a list of tensors along a given dimension
Note that this is actually a wrapper that selects "ConcatenateSolely"
or "Merge" by means of the tensor shapes */
or "Merge" by means of the tensor shapes
*/
void Concatenate(XList * smalls, XTensor * big, int dim);
/* concatenate two tensors along a given dimension */
......
......@@ -19,9 +19,9 @@
* $Created by: XIAO Tong (email: xiaotong@mail.neu.edu.cn) 2018-04-24
*/
#include "../XTensor.h"
#include "../XUtility.h"
#include "../XName.h"
#include "../../XTensor.h"
#include "../../XUtility.h"
#include "../../XName.h"
#include "ConcatenateSolely.h"
#include "MergeBlockLists.h"
......@@ -69,9 +69,11 @@ void ConcatenateSolely(XList * smalls, XTensor * big, int dim)
int offset = 0;
/* two strategies are used - we can either resort to memcpy2d for the case of
/*
two strategies are used - we can either resort to memcpy2d for the case of
concatenation of a few items, or use MergeBlockLists to merge a large number
of data blocks */
of data blocks
*/
if (smalls->count <= MIN_TENSOR_CAT_NUM) {
for (int i = 0; i < smalls->count; i++) {
XTensor * tensor = (XTensor*)smalls->GetItem(i);
......
......@@ -22,11 +22,10 @@
#ifndef __CONCATENATESOLELY_H__
#define __CONCATENATESOLELY_H__
#include "../XTensor.h"
#include "../../XTensor.h"
namespace nts { // namespace nts(NiuTrans.Tensor)
/* concatenate a list of tensors along a given dimension */
extern "C"
void ConcatenateSolely(XList * smalls, XTensor * big, int dim);
......
......@@ -19,7 +19,7 @@
* $Created by: XIAO Tong (email: xiaotong@mail.neu.edu.cn) 2018-04-24
*/
#include "../XTensor.h"
#include "../../XTensor.h"
#include "MakeMergeBlockIndex.h"
#include "MakeMergeBlockIndex.cuh"
......
......@@ -19,8 +19,8 @@
* $Created by: XIAO Tong (email: xiaotong@mail.neu.edu.cn) 2018-04-24
*/
#include "../XDevice.h"
#include "../XTensor.h"
#include "../../XDevice.h"
#include "../../XTensor.h"
#include "MakeMergeBlockIndex.h"
#include "MakeMergeBlockIndex.cuh"
......
......@@ -22,7 +22,7 @@
#ifndef __CUDAMAKEMERGEBLOCKINDEX_CUH__
#define __CUDAMAKEMERGEBLOCKINDEX_CUH__
#include "../XTensor.h"
#include "../../XTensor.h"
namespace nts { // namespace nts(NiuTrans.Tensor)
......
......@@ -22,7 +22,7 @@
#ifndef __MAKEMERGEBLOCKINDEX_H__
#define __MAKEMERGEBLOCKINDEX_H__
#include "../XTensor.h"
#include "../../XTensor.h"
namespace nts { // namespace nts(NiuTrans.Tensor)
......
......@@ -19,11 +19,12 @@
* $Created by: XIAO Tong (email: xiaotong@mail.neu.edu.cn) 2018-04-24
*/
#include "../XTensor.h"
#include "../../XTensor.h"
#include "MakeSplitBlockIndex.h"
#include "MakeSplitBlockIndex.cuh"
namespace nts { // namespace nts(NiuTrans.Tensor)
/*
set target data block index for the data movement in split
>> blockIndex - block index
......
......@@ -19,8 +19,8 @@
* $Created by: XIAO Tong (email: xiaotong@mail.neu.edu.cn) 2018-04-24
*/
#include "../XDevice.h"
#include "../XTensor.h"
#include "../../XDevice.h"
#include "../../XTensor.h"
#include "MakeSplitBlockIndex.h"
#include "MakeSplitBlockIndex.cuh"
......@@ -51,6 +51,7 @@ void KernelMakeSplitBlockIndex(int * blockIndex, int splitNum, int blockSplitSiz
/*
set target data block index for the data movement in split
>> devID - device id
>> blockIndex - block index
>> splitNum - number of splits
>> blockSplitSize - size of the splitted block
......
......@@ -22,7 +22,7 @@
#ifndef __MAKESPLITBLOCKINDEX_H__
#define __MAKESPLITBLOCKINDEX_H__
#include "../XTensor.h"
#include "../../XTensor.h"
namespace nts { // namespace nts(NiuTrans.Tensor)
......
......@@ -19,16 +19,15 @@
* $Created by: XIAO Tong (email: xiaotong@mail.neu.edu.cn) 2018-04-24
*/
#include "../XTensor.h"
#include "../XUtility.h"
#include "../XName.h"
#include "../../XTensor.h"
#include "../../XUtility.h"
#include "../../XName.h"
#include "Merge.h"
#include "MakeMergeBlockIndex.h"
#include "CopyBlocksOnSite.h"
#include "../movement/CopyBlocksOnSite.h"
namespace nts { // namespace nts(NiuTrans.Tensor)
/*
transform a tensor by merging it alone with a dimension, e.g., (N/3, M, 3) -> (N, M)
>> s - the source tensor
......
......@@ -22,7 +22,7 @@
#ifndef __MERGE_H__
#define __MERGE_H__
#include "../XTensor.h"
#include "../../XTensor.h"
namespace nts { // namespace nts(NiuTrans.Tensor)
......
......@@ -19,8 +19,8 @@
* $Created by: XIAO Tong (email: xiaotong@mail.neu.edu.cn) 2018-04-24
*/
#include "../XTensor.h"
#include "../XUtility.h"
#include "../../XTensor.h"
#include "../../XUtility.h"
#include "MergeBlockLists.h"
#include "MergeBlockLists.cuh"
......
......@@ -19,9 +19,9 @@
* $Created by: XIAO Tong (email: xiaotong@mail.neu.edu.cn) 2018-04-24
*/
#include "../XDevice.h"
#include "../XUtility.h"
#include "../XTensor.h"
#include "../../XDevice.h"
#include "../../XUtility.h"
#include "../../XTensor.h"
#include "MergeBlockLists.h"
#include "MergeBlockLists.cuh"
......@@ -34,10 +34,9 @@ copy a number of blocks (of different sizes) to target positions
>> sourceBlockSizes - the size of the block_i
>> sourceBlockNum - number of blocks to merge
>> targetList - list of data arrays to copy to
>> target - target data array
*/
__global__
void KernelCopyBlockLists(DTYPE * sourceList[], int * sourceBlockSizes, int sourceBlockNum, DTYPE * targetList[])
void KernelCopyBlockLists(DTYPE * sourceList[], int * sourceBlockSizes, int sourceBlockNum, DTYPE * targetList[])
{
__shared__ int iBlockSizes[MAX_CUDA_THREAD_NUM_PER_BLOCK];
__shared__ DTYPE * iSourceList[MAX_CUDA_THREAD_NUM_PER_BLOCK];
......@@ -82,7 +81,6 @@ void CudaMergeBlockLists(XList * sourceList, int * blockSizes, int blockNum, voi
int minBlockSize = MAX_INT;
int maxBlockSize = -MAX_INT;
//int realMinBlockSize = 1;
int realMaxBlockSize = 1;
DTYPE ** sourceArrays = new DTYPE*[newBlockListSize];
DTYPE ** targetArrays = new DTYPE*[newBlockListSize];
......@@ -110,7 +108,6 @@ void CudaMergeBlockLists(XList * sourceList, int * blockSizes, int blockNum, voi
CheckNTErrors((minBlockSize % sizeof(DTYPE) == 0), "Unsupported block size!");
CheckNTErrors((maxBlockSize % sizeof(DTYPE) == 0), "Unsupported block size!");
//realMinBlockSize = minBlockSize/sizeof(DTYPE);
realMaxBlockSize = maxBlockSize / sizeof(DTYPE);
int cudaGridSizes[3];
......@@ -120,31 +117,16 @@ void CudaMergeBlockLists(XList * sourceList, int * blockSizes, int blockNum, voi
cudaGridSizes, cudaBlockSizes);
myMem->SetPinBuf();
//MTYPE offset0 = myMem->bufUsed;
int * sizesGPU = (int*)myMem->AllocBuf(myMem->devID, sizeof(int) * newBlockListSize, 256);
//MTYPE offset1 = myMem->bufUsed;
DTYPE ** sourceArraysGPU = (DTYPE**)myMem->AllocBuf(myMem->devID, sizeof(DTYPE*) * newBlockListSize, 256);
//MTYPE offset2 = myMem->bufUsed;
DTYPE ** targetArraysGPU = (DTYPE**)myMem->AllocBuf(myMem->devID, sizeof(DTYPE*) * newBlockListSize, 256);
//MTYPE bufSize = myMem->bufUsed - offset0;
//char * CPUBuf = new char[bufSize];
//memset(CPUBuf, 0 , bufSize);
//memcpy(CPUBuf, sizes, sizeof(int) * newBlockListSize);
//memcpy(CPUBuf + (offset1 - offset0), sourceArrays, sizeof(DTYPE*) * newBlockListSize);
//memcpy(CPUBuf + (offset2 - offset0), targetArrays, sizeof(DTYPE*) * newBlockListSize);
XMemCopy(sizesGPU, myMem->devID, sizes, -1, sizeof(int) * newBlockListSize);
XMemCopy(sourceArraysGPU, myMem->devID, sourceArrays, -1, sizeof(DTYPE*) * newBlockListSize);
XMemCopy(targetArraysGPU, myMem->devID, targetArrays, -1, sizeof(DTYPE*) * newBlockListSize);
/* it is VERY tricky here because we squeeze three data copies into one */
//XMemCopy(sizesGPU, myMem->devID, CPUBuf, -1, bufSize);
KernelCopyBlockLists << <dim3(cudaGridSizes[0], cudaGridSizes[1]), dim3(cudaBlockSizes[0], cudaBlockSizes[1]) >> >
(sourceArraysGPU, sizesGPU, newBlockListSize, targetArraysGPU);
......@@ -154,7 +136,6 @@ void CudaMergeBlockLists(XList * sourceList, int * blockSizes, int blockNum, voi
delete[] targetArrays;
delete[] sizes;
delete[] offsets;
//delete[] CPUBuf;
}
#endif // USE_CUDA
......
......@@ -22,7 +22,7 @@
#ifndef __MERGEBLOCKLISTS_H__
#define __MERGEBLOCKLISTS_H__
#include "../XTensor.h"
#include "../../XTensor.h"
namespace nts { // namespace nts(NiuTrans.Tensor)
......
......@@ -22,7 +22,7 @@
#ifndef __PERMUTE_H__
#define __PERMUTE_H__
#include "../XTensor.h"
#include "../../XTensor.h"
namespace nts { // namespace nts(NiuTrans.Tensor)
......
......@@ -19,11 +19,11 @@
* $Created by: XIAO Tong (email: xiaotong@mail.neu.edu.cn) 2018-04-24
*/
#include "../XTensor.h"
#include "../XUtility.h"
#include "../../XTensor.h"
#include "../../XUtility.h"
#include "Split.h"
#include "MakeSplitBlockIndex.h"
#include "CopyBlocksOnSite.h"
#include "../movement/CopyBlocksOnSite.h"
namespace nts { // namespace nts(NiuTrans.Tensor)
......
......@@ -22,11 +22,12 @@
#ifndef __SPLIT_H__
#define __SPLIT_H__
#include "../XTensor.h"
#include "../../XTensor.h"
namespace nts { // namespace nts(NiuTrans.Tensor)
/* transform a tensor by splitting it, e.g., (M, N) -> (M, N/3, 3) */
extern "C"
void Split(XTensor * s, XTensor * t, int whereToSplit, int splitNum);
/* split a big tensor into small tensors */
......
......@@ -23,7 +23,7 @@
#ifndef __TRANSPOSE_H__
#define __TRANSPOSE_H__
#include "../XTensor.h"
#include "../../XTensor.h"
namespace nts { // namespace nts(NiuTrans.Tensor)
......
......@@ -19,8 +19,8 @@
* $Created by: XIAO Tong (email: xiaotong@mail.neu.edu.cn) 2018-04-24
*/
#include "../XTensor.h"
#include "../XName.h"
#include "../../XTensor.h"
#include "../../XName.h"
#include "Unsqueeze.h"
#include "MergeBlockLists.h"
#include "Unsqueeze.cuh"
......
......@@ -19,8 +19,8 @@
* $Created by: XIAO Tong (email: xiaotong@mail.neu.edu.cn) 2018-04-24
*/
#include "../XDevice.h"
#include "../XTensor.h"
#include "../../XDevice.h"
#include "../../XTensor.h"
#include "Unsqueeze.h"
#include "Unsqueeze.cuh"
......
......@@ -22,7 +22,7 @@
#ifndef __UNSQUEEZE_H__
#define __UNSQUEEZE_H__
#include "../XTensor.h"
#include "../../XTensor.h"
namespace nts { // namespace nts(NiuTrans.Tensor)
......
......@@ -19,9 +19,9 @@
* $Created by: XIAO Tong (email: xiaotong@mail.neu.edu.cn) 2018-04-24
*/
#include "../XTensor.h"
#include "../XUtility.h"
#include "../XName.h"
#include "../../XTensor.h"
#include "../../XUtility.h"
#include "../../XName.h"
#include "Sort.h"
#include "Sort.cuh"
......
......@@ -19,9 +19,9 @@
* $Created by: XIAO Tong (email: xiaotong@mail.neu.edu.cn) 2018-04-24
*/
#include "../XDevice.h"
#include "../XUtility.h"
#include "../XTensor.h"
#include "../../XDevice.h"
#include "../../XUtility.h"
#include "../../XTensor.h"
#include "Sort.h"
#include "Sort.cuh"
......@@ -235,13 +235,16 @@ void CudaSortBig(XTensor * a, XTensor * b, XTensor * indexA, XTensor * indexB, i
int m = GetNextPower2(strideNum);
int n = stride * blockNum;
void * buf = mem->AllocBuf(mem->devID, n * m * a->unitSize);
void * bufIndex = (indexA != NULL && indexB != NULL) ? mem->AllocBuf(mem->devID, n * m * sizeof(int)) : NULL;
void * buf = mem != NULL ? mem->AllocBuf(a->devID, n * m * a->unitSize) : XMemAlloc(a->devID, n * m * a->unitSize);
void * bufIndex = NULL;
if (indexA != NULL && indexB != NULL) {
bufIndex = mem != NULL ? mem->AllocBuf(a->devID, n * m * sizeof(int)) : XMemAlloc(a->devID, n * m * sizeof(int));
}
int cudaGrids[3];
int cudaBlocks[3];
GDevs.GetCudaThread(mem->devID, m * n, cudaGrids, cudaBlocks);
GDevs.GetCudaThread(a->devID, m * n, cudaGrids, cudaBlocks);
int devIDBackup;
ProtectCudaDev(a->devID, devIDBackup);
......@@ -250,7 +253,7 @@ void CudaSortBig(XTensor * a, XTensor * b, XTensor * indexA, XTensor * indexB, i
KernelSetDataArray<DTYPE> << <dim3(cudaGrids[0]), dim3(cudaBlocks[0]) >> >
((DTYPE*)buf, DTYPE_MIN, m * n);
GDevs.GetCudaThread2D(mem->devID, strideNum, n, MAX_INT, cudaGrids, cudaBlocks);
GDevs.GetCudaThread2D(a->devID, strideNum, n, MAX_INT, cudaGrids, cudaBlocks);
/* reorganize the data into a matrix */
KernelReorganize<DTYPE> << <dim3(cudaGrids[1], cudaGrids[0]), dim3(cudaBlocks[1], cudaBlocks[0]) >> >
......@@ -261,7 +264,7 @@ void CudaSortBig(XTensor * a, XTensor * b, XTensor * indexA, XTensor * indexB, i
KernelReorganize<int> << <dim3(cudaGrids[1], cudaGrids[0]), dim3(cudaBlocks[1], cudaBlocks[0]) >> >
(indexA->data, bufIndex, stride, strideNum, blockNum, m, n);
GDevs.GetCudaThread2D(mem->devID, m, n, MAX_INT, cudaGrids, cudaBlocks);
GDevs.GetCudaThread2D(a->devID, m, n, MAX_INT, cudaGrids, cudaBlocks);
/* bitonic sorting */
for (int i = 2; i <= m; i <<= 1) {
......@@ -277,7 +280,7 @@ void CudaSortBig(XTensor * a, XTensor * b, XTensor * indexA, XTensor * indexB, i
}
}
GDevs.GetCudaThread2D(mem->devID, k, n, MAX_INT, cudaGrids, cudaBlocks);
GDevs.GetCudaThread2D(a->devID, k, n, MAX_INT, cudaGrids, cudaBlocks);
/* copy result to the output tensor */
KernelReorganizeBack<DTYPE> << <dim3(cudaGrids[1], cudaGrids[0]), dim3(cudaBlocks[1], cudaBlocks[0]) >> >
......@@ -287,9 +290,15 @@ void CudaSortBig(XTensor * a, XTensor * b, XTensor * indexA, XTensor * indexB, i
KernelReorganizeBack<int> << <dim3(cudaGrids[1], cudaGrids[0]), dim3(cudaBlocks[1], cudaBlocks[0]) >> >
(bufIndex, indexB->data, m, n, stride, k, blockNum);
mem->ReleaseBuf(mem->devID, n * m * a->unitSize);
if (mem != NULL)
mem->ReleaseBuf(a->devID, n * m * a->unitSize);
else
XMemFree(a->devID, buf);
if (indexA != NULL && indexB != NULL)
mem->ReleaseBuf(mem->devID, n * m * sizeof(int));
if (mem != NULL)
mem->ReleaseBuf(a->devID, n * m * sizeof(int));
else
XMemFree(a->devID, bufIndex);
ProtectCudaDev(a->devID, devIDBackup);
}
......
......@@ -29,6 +29,7 @@ namespace nts { // namespace nts(NiuTrans.Tensor)
#ifdef USE_CUDA
/* sort the tensor along a given dimension */
extern "C"
void CudaSortBig(XTensor * a, XTensor * b, XTensor * indexA, XTensor * indexB, int dim, int k = -1);
#endif // USE_CUDA
......
......@@ -22,7 +22,7 @@
#ifndef __SORT_H__
#define __SORT_H__
#include "../XTensor.h"
#include "../../XTensor.h"
namespace nts { // namespace nts(NiuTrans.Tensor)
......
......@@ -19,12 +19,13 @@
* $Created by: XIAO Tong (email: xiaotong@mail.neu.edu.cn) 2018-04-24
*/
#include "../XTensor.h"
#include "../XName.h"
#include "../../XTensor.h"
#include "../../XName.h"
#include "TopK.h"
#include "TopK.cuh"
namespace nts { // namespace nts(NiuTrans.Tensor)
/*
get the top-k items along a given dimension
>> a - input tensor
......
......@@ -19,8 +19,9 @@
* $Created by: XIAO Tong (email: xiaotong@mail.neu.edu.cn) 2018-04-24
*/
#include "../XDevice.h"
#include "../XTensor.h"
#include "../../XDevice.h"
#include "../../XUtility.h"
#include "../../XTensor.h"
#include "TopK.h"
#include "TopK.cuh"
#include "Sort.cuh"
......@@ -94,9 +95,6 @@ public:
/* swap */
__device__ void Swap(int i, int j)
{
/*CudaHeapNode<T> tmp = items[i];
items[i] = items[j];
items[j] = tmp;*/
int tmpIndex = items[i].index;
T tmpValue = items[i].value;
items[i] = items[j];
......@@ -238,8 +236,10 @@ void KernelTopK(T * input, int stride, int strideNum, int blockNum, int k, T min
if (threadIdx.x == 0) {
CudaXHeap<MIN_HEAP, T> heapFinal(k, k, heapData + k * threadIdx.y * blockDim.x);
/* merge the result over the workers.
This can be improved by parallel merging */
/*
merge the result over the workers.
This can be improved by parallel merging
*/
if (blockDim.x > 1) {
for (int p = 1; p < blockDim.x && p < strideNum; p++) {
CudaHeapNode<T> * hd = heapData + k * (threadIdx.y * blockDim.x + p);
......@@ -393,7 +393,7 @@ void CudaTopK(XTensor * a, XTensor * b, XTensor * index, int dim, int k)
int cudaGrids[3];
int cudaBlocks[3];
GDevs.GetCudaThread2D(a->mem->devID,
GDevs.GetCudaThread2D(a->devID,
workerNum, stride * blockNum, MAX_INT,
cudaGrids, cudaBlocks);
......@@ -434,14 +434,15 @@ void CudaTopK(XTensor * a, XTensor * b, XTensor * index, int dim, int k)
memcpy(dimSize, a->dimSize, sizeof(int) * a->order);
dimSize[0] = -dimSize[0];
XTensor * indexA = new XTensor(a->order, dimSize, X_INT, 1.0F, a->mem);
indexA->data = a->mem->AllocBuf(a->devID, a->unitNum * sizeof(int));
indexA->data = a->mem != NULL ? a->mem->AllocBuf(a->devID, a->unitNum * sizeof(int)) : XMemAlloc(a->devID, a->unitNum * sizeof(int));
/* make the index tensor */
indexA->SetAscendingOrder(dim);
CudaSortBig(a, b, indexA, index, dim, k);
a->mem->ReleaseBuf(a->devID, a->unitNum * sizeof(int));
if (a->mem != NULL)
a->mem->ReleaseBuf(a->devID, a->unitNum * sizeof(int));
delete indexA;
}
......
......@@ -22,7 +22,7 @@
#ifndef __TOPK_H__
#define __TOPK_H__
#include "../XTensor.h"
#include "../../XTensor.h"
namespace nts { // namespace nts(NiuTrans.Tensor)
......
......@@ -19,7 +19,7 @@
* $Created by: XIAO Tong (xiaotong@mail.neu.edu.cn) 2018-06-22
*/
#include "../XUtility.h"
#include "../../XUtility.h"
#include "FlushToMem.h"
#include "FlushToMem.cuh"
......
......@@ -20,7 +20,7 @@
*/
#include "FlushToMem.cuh"
#include "../XUtility.h"
#include "../../XUtility.h"
namespace nts { // namespace nts(NiuTrans.Tensor)
......@@ -52,7 +52,6 @@ void CudaCPUToGPUFlush(XList * mList, int devID, XMem * GPUMem)
else
reqiredSize = m->unitSize * m->unitNum;
//reqiredSize = (int)GPUMem->GetPitch(GPUMem->devID, (MTYPE)GPUMem->GetAddress() + size, reqiredSize);
size += reqiredSize;
}
......@@ -70,7 +69,6 @@ void CudaCPUToGPUFlush(XList * mList, int devID, XMem * GPUMem)
else
pSize = m->unitSize * m->unitNum;
//reqiredSize = (int)GPUMem->GetPitch(GPUMem->devID, (MTYPE)GPUMem->GetAddress() + p, pSize);
reqiredSize = pSize;
memcpy(data + p, m->data, pSize);
......
......@@ -22,7 +22,7 @@
#ifndef __FLUSHTOMEM_CUH__
#define __FLUSHTOMEM_CUH__
#include "../XTensor.h"
#include "../../XTensor.h"
namespace nts { // namespace nts(NiuTrans.Tensor)
......
......@@ -22,7 +22,7 @@
#ifndef __FLUSHTOMEM_H__
#define __FLUSHTOMEM_H__
#include "../XTensor.h"
#include "../../XTensor.h"
namespace nts { // namespace nts(NiuTrans.Tensor)
......
......@@ -20,7 +20,7 @@
*/
#include "SetAscendingOrder.cuh"
#include "../XDevice.h"
#include "../../XDevice.h"
namespace nts { // namespace nts(NiuTrans.Tensor)
......
......@@ -22,7 +22,7 @@
#ifndef __SETASCENDINGORDER_CUH__
#define __SETASCENDINGORDER_CUH__
#include "../XTensor.h"
#include "../../XTensor.h"
namespace nts { // namespace nts(NiuTrans.Tensor)
......
......@@ -227,7 +227,7 @@ int SegmentTensor2D(int rowNum, int colNum, int blockNum, int * blockIndex)
x2 = colSize - 1;
y2 = rowSize - 1; // bottom-right corner
/* the main body of the matrix (after removing the margin block) */
/* the main body of the matrix (after removing the margin block) */
while (x1 <= xMax) {
y1 = 0;
x2 = x1 + colSize - 1;
......
......@@ -22,13 +22,11 @@
#ifndef __XMATRIXSEGMENT_H__
#define __XMATRIXSEGMENT_H__
#include "../XTensor.h"
#include "../../XTensor.h"
namespace nts { // namespace nts(NiuTrans.Tensor)
/*******************************************************************
segmentation and parallel processing for 2d tensors (i.e., matrices)
*/
/* segmentation and parallel processing for 2d tensors (i.e., matrices) */
/* segment a 2d tensor (i.e., matrix) into blocks and run jobs in parallel */
extern "C"
void RunParallel2D(XPRunner * parallelRunner, void * job, int opNum, int rowNum, int colNum, int argNum, ...);
......
......@@ -25,7 +25,6 @@
namespace nts{ // namespace nts(NiuTrans.Tensor)
/*
hard tanh function
y = 1 if x > 1
......
......@@ -95,7 +95,6 @@ dy/dx = 1 if -1 <= x <= 1
>> y - y of the function
>> x - x of the function
>> size - size of y/x
*/
__global__
void KernelHardtanhBackward(DTYPE * dedy, DTYPE * dedx, DTYPE * gold, DTYPE * y, DTYPE * x, int size)
......
......@@ -21,7 +21,7 @@
#include "Identity.h"
#include "../XUtility.h"
#include "../core/CopyValues.h"
#include "../core/movement/CopyValues.h"
namespace nts{ // namespace nts(NiuTrans.Tensor)
......
......@@ -23,9 +23,9 @@
#include "../XUtility.h"
#include "LogSoftmax.h"
#include "LogSoftmax.cuh"
#include "../core/ReduceSum.h"
#include "../core/ReduceMax.h"
#include "../core/CopyValues.h"
#include "../core/reduce/ReduceSum.h"
#include "../core/reduce/ReduceMax.h"
#include "../core/movement/CopyValues.h"
namespace nts { // namespace nts(NiuTrans.Tensor)
......@@ -49,7 +49,6 @@ void LogSoftmax(XTensor * x, XTensor * y, int leadDim)
dimSize[i - 1] = -x->dimSize[i];
}
XMem * mem = x->mem;
XTensor * max = NULL;
XTensor * sum = NULL;
......@@ -168,7 +167,6 @@ dE/dx = dE/dy * dy/dx
log softmax: y_i = log(e^{x_i} / \sum_{k} e^{x_k})
dy_i/dx_j
= d{log(e^{x_i} / \sum_{k} e^{x_k})}/dx_j
= d{log(e^{x_i})}/dx_j - d{log(\sum_{k} e^{x_k})}/dx_j
......
......@@ -22,8 +22,8 @@
#include "LogSoftmax.h"
#include "LogSoftmax.cuh"
#include "Loss.cuh"
#include "../core/ReduceSum.cuh"
#include "../core/ReduceMax.cuh"
#include "../core/reduce/ReduceSum.cuh"
#include "../core/reduce/ReduceMax.cuh"
#include "../XDevice.h"
namespace nts { // namespace nts(NiuTrans.Tensor)
......@@ -41,7 +41,8 @@ void CudaLogSoftmax(XTensor * x, XTensor * y, int leadDim)
ShowNTErrors("You should call LogSoftmax instead!");
}
/* log softmax forward computation (Cuda kernel)
/*
log softmax forward computation (Cuda kernel)
for each column j, let y_{i,j} and x_{i,j} are the output
and state value for the i-th element of column j. We have
......@@ -85,7 +86,8 @@ void KernelLogSoftmaxComputeByRow(DTYPE * x, DTYPE * max, DTYPE * sum, DTYPE * y
}
}
/* log softmax forward computation (Cuda kernel)
/*
log softmax forward computation (Cuda kernel)
for each row i, let y_{i,j} and x_{i,j} are the output
and state value for the j-th element of row i. We have
......@@ -182,7 +184,7 @@ void CudaLogSoftmaxSumMax(XTensor * x, XTensor * y, int leadDim, XTensor * sum,
/*
set dE/dx = exp(y)
>> dedu - dE/dy
>> dedy - dE/dy
>> dedx - dE/dx
>> y - output of the function
>> size - size of output
......@@ -256,7 +258,9 @@ dE/dx_j += -gold_j
>> gold - gold standard to measure error (or loss)
>> y - output of the function
>> x - input of the function
>> size - size of input/output
>> rowNum - row number of the matrix
>> colNum - column number of the matrix
>> gNonZeroNum -
>> lossName - name of the loss function
*/
__global__
......@@ -293,7 +297,6 @@ dE/dx = dE/dy * dy/dx
log softmax: y_i = log(e^{x_i} / \sum_{k} e^{x_k})
dy_i/dx_j
= d{log(e^{x_i} / \sum_{k} e^{x_k})}/dx_j
= d{log(e^{x_i})}/dx_j - d{log(\sum_{k} e^{x_k})}/dx_j
......
......@@ -374,15 +374,15 @@ void LossBackward(XTensor * dedy, XTensor * t, XTensor * y,
LOSS_FUNCTION_NAME LFName,
int leadDim, int tBeg, int tLen, int yBeg)
{
CheckNTErrors((tLen >= 0 && tLen < y->unitNum), "Illegal input length!");
CheckNTErrors((tLen < y->unitNum), "Illegal input length!");
CheckNTErrors((XTensor::IsIdentical(t, y)&& XTensor::IsIdentical(dedy, y)),
"The input tensors must be of the same size!");
CheckNTErrors((t->dimSizeRDI[0] == 1 && y->dimSizeRDI[0] == 1 && dedy->dimSizeRDI[1] == 1), "TODO!");
CheckNTErrors((t->dimSizeRDI[0] == 1 && y->dimSizeRDI[0] == 1 && dedy->dimSizeRDI[0] == 1), "TODO!");
CheckNTErrors((t->order > leadDim && leadDim >= 0), "Illegal leading dimension!");
CheckNTErrors((t->dataType == DEFAULT_DTYPE && y->dataType == DEFAULT_DTYPE),
"TODO!");
int leadDimRDI = y->order - leadDim - 1;
int leadDimRDI = leadDim >= 0 ? y->order - leadDim - 1 : -1;
if(leadDimRDI < 0){
leadDimRDI = y->dimSizeRDI[y->order - 1];
tBeg = 0;
......
......@@ -31,7 +31,6 @@ namespace nts{ // namespace nts(NiuTrans.Tensor)
loss function to measure the "number" of errors
*/
/*
compute the loss
>> gold - gold standard
......
......@@ -88,7 +88,6 @@ dy/dx = 1 if x >= 0
>> y - output of the function
>> x - input of the function
>> size - size of output/input
*/
__global__
void KernelRectifyBackward(DTYPE * dedy, DTYPE * dedx, DTYPE * gold, DTYPE * y, DTYPE * x, int size)
......
......@@ -25,7 +25,6 @@
namespace nts{ // namespace nts(NiuTrans.Tensor)
/*
sigmoid function y = 1/(1+exp(-x))
>> x - input tensor
......
......@@ -95,7 +95,6 @@ sigmoid: y = 1/(1+exp(-x))
>> y - output of the function
>> x - input of the function
>> size - size of output/input
*/
__global__
void KernelSigmoidBackward(DTYPE * dedy, DTYPE * dedx, DTYPE * gold, DTYPE * y, DTYPE * x, int size)
......@@ -122,7 +121,6 @@ sigmoid: y = 1/(1+exp(-x))
>> dedy - dE/dy
>> dedx - dE/dx
>> lossName - type of loss function, e.g., cross entropy
*/
void CudaSigmoidBackward(XTensor * gold, XTensor * y, XTensor * x,
XTensor * dedy, XTensor * dedx,
......
......@@ -23,8 +23,8 @@
#include "Softmax.h"
#include "Softmax.cuh"
#include "../XUtility.h"
#include "../core/ReduceSum.h"
#include "../core/ReduceMax.h"
#include "../core/reduce/ReduceSum.h"
#include "../core/reduce/ReduceMax.h"
namespace nts { // namespace nts(NiuTrans.Tensor)
......
......@@ -22,10 +22,10 @@
#include "Softmax.h"
#include "Softmax.cuh"
#include "Loss.cuh"
#include "../core/ReduceSum.h"
#include "../core/Multiply.h"
#include "../core/Unsqueeze.h"
#include "../core/Sum.h"
#include "../core/reduce/ReduceSum.h"
#include "../core/arithmetic/Multiply.h"
#include "../core/shape/Unsqueeze.h"
#include "../core/arithmetic/Sum.h"
#include "../XDevice.h"
#include "../XUtility.h"
......
......@@ -29,7 +29,6 @@ namespace nts { // namespace nts(NiuTrans.Tensor)
#ifdef USE_CUDA
/* softmax y = e^x / \sum_{i} e^{x_i} (Cuda version) */
extern "C"
void CudaSotmax(XTensor * input, XTensor * output, int leadDim);
......
......@@ -19,23 +19,20 @@
* $Created by: Lin Ye (email: linye2015@outlook.com) 2018-06-14
*/
#include "../XTensor.h"
#include "../XDevice.h"
#include "../core/Concatenate.h"
#include "../XList.h"
#include "TConcatenate.h"
namespace nts { // namespace nts(NiuTrans.Tensor)
/* case 1: concatenate a list of tensors along a given dimension
* In this case, 2 * (2 * 1) -> (2 * 2), dim=1.
/*
case 1: concatenate a list of tensors along a given dimension.
In this case, 2 * (2, 1) -> (2, 2), dim=1.
*/
bool TestConcatenate1()
{
/* create list */
XList sList;
sList = XList();
XList * sList = new XList();
/* a source tensor of size (2 * 1) */
/* a source tensor of size (2, 1) */
int sOrder1 = 2;
int * sDimSize1 = new int[sOrder1];
sDimSize1[0] = 2;
......@@ -45,7 +42,7 @@ bool TestConcatenate1()
for (int i = 0; i < sOrder1; i++)
sUnitNum1 *= sDimSize1[i];
/* a source tensor of size (2 * 1) */
/* a source tensor of size (2, 1) */
int sOrder2 = 2;
int * sDimSize2 = new int[sOrder2];
sDimSize2[0] = 2;
......@@ -55,7 +52,7 @@ bool TestConcatenate1()
for (int i = 0; i < sOrder2; i++)
sUnitNum2 *= sDimSize2[i];
/* a target tensor of size (2 * 2) */
/* a target tensor of size (2, 2) */
int tOrder = 2;
int * tDimSize = new int[tOrder];
tDimSize[0] = 2;
......@@ -65,12 +62,12 @@ bool TestConcatenate1()
for (int i = 0; i < tOrder; i++)
tUnitNum *= tDimSize[i];
DTYPE sData1[2][1] = { {0.0},
{1.0} };
DTYPE sData2[2][1] = { {2.0},
{3.0} };
DTYPE answer[2][2] = { {0.0, 2.0},
{1.0, 3.0} };
DTYPE sData1[2][1] = { {0.0F},
{1.0F} };
DTYPE sData2[2][1] = { {2.0F},
{3.0F} };
DTYPE answer[2][2] = { {0.0F, 2.0F},
{1.0F, 3.0F} };
/* CPU test */
bool cpuTest = true;
......@@ -86,11 +83,11 @@ bool TestConcatenate1()
t->SetZeroAll();
/* add tensors to list */
sList.Add(s1);
sList.Add(s2);
sList->Add(s1);
sList->Add(s2);
/* call concatenate function */
Concatenate(&sList, t, 1);
/* call Concatenate function */
Concatenate(sList, t, 1);
/* check results */
cpuTest = t->CheckData(answer, tUnitNum);
......@@ -99,9 +96,6 @@ bool TestConcatenate1()
/* GPU test */
bool gpuTest = true;
/* clear list */
sList.Clear();
/* create tensor */
XTensor * sGPU1 = NewTensor(sOrder1, sDimSize1, X_FLOAT, 1.0F, 0);
XTensor * sGPU2 = NewTensor(sOrder2, sDimSize2, X_FLOAT, 1.0F, 0);
......@@ -112,40 +106,56 @@ bool TestConcatenate1()
sGPU2->SetData(sData2, sUnitNum2);
tGPU->SetZeroAll();
/* clear list */
sList->Clear();
/* add tensors to list*/
sList.Add(sGPU1);
sList.Add(sGPU2);
sList->Add(sGPU1);
sList->Add(sGPU2);
/* call concatenate function */
Concatenate(&sList, tGPU, 1);
/* call Concatenate function */
Concatenate(sList, tGPU, 1);
/* check results */
gpuTest = tGPU->CheckData(answer, tUnitNum);
/* destroy variables */
delete s1, s2, t, sGPU1, sGPU2, tGPU;
delete[] sDimSize1, sDimSize2, tDimSize;
delete sList;
delete s1;
delete s2;
delete t;
delete sGPU1;
delete sGPU2;
delete tGPU;
delete[] sDimSize1;
delete[] sDimSize2;
delete[] tDimSize;
return cpuTest && gpuTest;
#else
/* destroy variables */
delete s1, s2, t;
delete[] sDimSize1, sDimSize2, tDimSize;
delete sList;
delete s1;
delete s2;
delete t;
delete[] sDimSize1;
delete[] sDimSize2;
delete[] tDimSize;
return cpuTest;
#endif // USE_CUDA
}
/* case 2: concatenate a list of tensors along a given dimension
* In this case, 2 * (2 * 1) -> (4 * 1), dim=0.
/*
case 2: concatenate a list of tensors along a given dimension.
In this case, 2 * (2, 1) -> (4, 1), dim=0.
*/
bool TestConcatenate2()
{
/* create list */
XList sList;
sList = XList();
XList * sList = new XList();
/* a source tensor of size (2 * 1) */
/* a source tensor of size (2, 1) */
int sOrder1 = 2;
int * sDimSize1 = new int[sOrder1];
sDimSize1[0] = 2;
......@@ -155,7 +165,7 @@ bool TestConcatenate2()
for (int i = 0; i < sOrder1; i++)
sUnitNum1 *= sDimSize1[i];
/* a source tensor of size (2 * 1) */
/* a source tensor of size (2, 1) */
int sOrder2 = 2;
int * sDimSize2 = new int[sOrder2];
sDimSize2[0] = 2;
......@@ -165,7 +175,7 @@ bool TestConcatenate2()
for (int i = 0; i < sOrder2; i++)
sUnitNum2 *= sDimSize2[i];
/* a target tensor of size (4 * 1) */
/* a target tensor of size (4, 1) */
int tOrder = 2;
int * tDimSize = new int[tOrder];
tDimSize[0] = 4;
......@@ -175,14 +185,14 @@ bool TestConcatenate2()
for (int i = 0; i < tOrder; i++)
tUnitNum *= tDimSize[i];
DTYPE sData1[2][1] = { {0.0},
{1.0} };
DTYPE sData2[2][1] = { {2.0},
{3.0} };
DTYPE answer[4][1] = { {0.0},
{1.0},
{2.0},
{3.0} };
DTYPE sData1[2][1] = { {0.0F},
{1.0F} };
DTYPE sData2[2][1] = { {2.0F},
{3.0F} };
DTYPE answer[4][1] = { {0.0F},
{1.0F},
{2.0F},
{3.0F} };
/* CPU test */
bool cpuTest = true;
......@@ -198,11 +208,11 @@ bool TestConcatenate2()
t->SetZeroAll();
/* add tensors to list */
sList.Add(s1);
sList.Add(s2);
sList->Add(s1);
sList->Add(s2);
/* call concatenate function */
Concatenate(&sList, t, 0);
/* call Concatenate function */
Concatenate(sList, t, 0);
/* check results */
cpuTest = t->CheckData(answer, tUnitNum);
......@@ -211,9 +221,6 @@ bool TestConcatenate2()
/* GPU test */
bool gpuTest = true;
/* clear list */
sList.Clear();
/* create tensor */
XTensor * sGPU1 = NewTensor(sOrder1, sDimSize1, X_FLOAT, 1.0F, 0);
XTensor * sGPU2 = NewTensor(sOrder2, sDimSize2, X_FLOAT, 1.0F, 0);
......@@ -223,40 +230,57 @@ bool TestConcatenate2()
sGPU1->SetData(sData1, sUnitNum1);
sGPU2->SetData(sData2, sUnitNum2);
tGPU->SetZeroAll();
/* clear list */
sList->Clear();
/* add tensors to list*/
sList.Add(sGPU1);
sList.Add(sGPU2);
sList->Add(sGPU1);
sList->Add(sGPU2);
/* call concatenate function */
Concatenate(&sList, tGPU, 0);
/* call Concatenate function */
Concatenate(sList, tGPU, 0);
/* check results */
gpuTest = tGPU->CheckData(answer, tUnitNum);
/* destroy variables */
delete s1, s2, t, sGPU1, sGPU2, tGPU;
delete[] sDimSize1, sDimSize2, tDimSize;
delete sList;
delete s1;
delete s2;
delete t;
delete sGPU1;
delete sGPU2;
delete tGPU;
delete[] sDimSize1;
delete[] sDimSize2;
delete[] tDimSize;
return cpuTest && gpuTest;
#else
/* destroy variables */
delete s1, s2, t;
delete[] sDimSize1, sDimSize2, tDimSize;
delete sList;
delete s1;
delete s2;
delete t;
delete[] sDimSize1;
delete[] sDimSize2;
delete[] tDimSize;
return cpuTest;
#endif // USE_CUDA
}
/* case 3: concatenate a list of tensors along a given dimension
* In this case, (2 * 1) + (2 * 2) -> (2 * 3), dim=1.
/*
case 3: concatenate a list of tensors along a given dimension.
In this case, (2, 1) + (2, 2) -> (2, 3), dim=1.
*/
bool TestConcatenate3()
{
/* create list */
XList sList;
sList = XList();
/* a source tensor of size (2 * 1) */
XList * sList = new XList();
/* a source tensor of size (2, 1) */
int sOrder1 = 2;
int * sDimSize1 = new int[sOrder1];
sDimSize1[0] = 2;
......@@ -266,7 +290,7 @@ bool TestConcatenate3()
for (int i = 0; i < sOrder1; i++)
sUnitNum1 *= sDimSize1[i];
/* a source tensor of size (2 * 2) */
/* a source tensor of size (2, 2) */
int sOrder2 = 2;
int * sDimSize2 = new int[sOrder2];
sDimSize2[0] = 2;
......@@ -276,7 +300,7 @@ bool TestConcatenate3()
for (int i = 0; i < sOrder2; i++)
sUnitNum2 *= sDimSize2[i];
/* a target tensor of size (2 * 3) */
/* a target tensor of size (2, 3) */
int tOrder = 2;
int * tDimSize = new int[tOrder];
tDimSize[0] = 2;
......@@ -286,12 +310,12 @@ bool TestConcatenate3()
for (int i = 0; i < tOrder; i++)
tUnitNum *= tDimSize[i];
DTYPE sData1[2][1] = { {0.0},
{1.0} };
DTYPE sData2[2][2] = { {2.0, 3.0},
{4.0, 5.0} };
DTYPE answer[2][3] = { {0.0, 2.0, 3.0},
{1.0, 4.0, 5.0} };
DTYPE sData1[2][1] = { {0.0F},
{1.0F} };
DTYPE sData2[2][2] = { {2.0F, 3.0F},
{4.0F, 5.0F} };
DTYPE answer[2][3] = { {0.0F, 2.0F, 3.0F},
{1.0F, 4.0F, 5.0F} };
/* CPU test */
bool cpuTest = true;
......@@ -307,11 +331,11 @@ bool TestConcatenate3()
t->SetZeroAll();
/* add tensors to list */
sList.Add(s1);
sList.Add(s2);
sList->Add(s1);
sList->Add(s2);
/* call concatenate function */
Concatenate(&sList, t, 1);
/* call Concatenate function */
Concatenate(sList, t, 1);
/* check results */
cpuTest = t->CheckData(answer, tUnitNum);
......@@ -320,9 +344,6 @@ bool TestConcatenate3()
/* GPU test */
bool gpuTest = true;
/* clear list */
sList.Clear();
/* create tensor */
XTensor * sGPU1 = NewTensor(sOrder1, sDimSize1, X_FLOAT, 1.0F, 0);
XTensor * sGPU2 = NewTensor(sOrder2, sDimSize2, X_FLOAT, 1.0F, 0);
......@@ -332,37 +353,54 @@ bool TestConcatenate3()
sGPU1->SetData(sData1, sUnitNum1);
sGPU2->SetData(sData2, sUnitNum2);
tGPU->SetZeroAll();
/* clear list */
sList->Clear();
/* add tensors to list*/
sList.Add(sGPU1);
sList.Add(sGPU2);
sList->Add(sGPU1);
sList->Add(sGPU2);
/* call concatenate function */
Concatenate(&sList, tGPU, 1);
/* call Concatenate function */
Concatenate(sList, tGPU, 1);
/* check results */
gpuTest = tGPU->CheckData(answer, tUnitNum);
/* destroy variables */
delete s1, s2, t, sGPU1, sGPU2, tGPU;
delete[] sDimSize1, sDimSize2, tDimSize;
delete sList;
delete s1;
delete s2;
delete t;
delete sGPU1;
delete sGPU2;
delete tGPU;
delete[] sDimSize1;
delete[] sDimSize2;
delete[] tDimSize;
return cpuTest && gpuTest;
#else
/* destroy variables */
delete s1, s2, t;
delete[] sDimSize1, sDimSize2, tDimSize;
/* destroy variables */
delete sList;
delete s1;
delete s2;
delete t;
delete[] sDimSize1;
delete[] sDimSize2;
delete[] tDimSize;
return cpuTest;
#endif // USE_CUDA
}
/* case 4: concatenate two tensors along a given dimension
* In this case, (2 * 1) + (2 * 2) -> (2 * 3), dim=1.
/*
case 4: concatenate two tensors along a given dimension.
In this case, (2, 1), (2, 2) -> (2, 3), dim=1.
*/
bool TestConcatenate4()
{
/* a source tensor of size (2 * 1) */
/* a source tensor of size (2, 1) */
int sOrder1 = 2;
int * sDimSize1 = new int[sOrder1];
sDimSize1[0] = 2;
......@@ -372,7 +410,7 @@ bool TestConcatenate4()
for (int i = 0; i < sOrder1; i++)
sUnitNum1 *= sDimSize1[i];
/* a source tensor of size (2 * 2) */
/* a source tensor of size (2, 2) */
int sOrder2 = 2;
int * sDimSize2 = new int[sOrder2];
sDimSize2[0] = 2;
......@@ -382,7 +420,7 @@ bool TestConcatenate4()
for (int i = 0; i < sOrder2; i++)
sUnitNum2 *= sDimSize2[i];
/* a target tensor of size (2 * 3) */
/* a target tensor of size (2, 3) */
int tOrder = 2;
int * tDimSize = new int[tOrder];
tDimSize[0] = 2;
......@@ -392,12 +430,12 @@ bool TestConcatenate4()
for (int i = 0; i < tOrder; i++)
tUnitNum *= tDimSize[i];
DTYPE sData1[2][1] = { {0.0},
{1.0} };
DTYPE sData2[2][2] = { {2.0, 3.0},
{4.0, 5.0} };
DTYPE answer[2][3] = { {0.0, 2.0, 3.0},
{1.0, 4.0, 5.0} };
DTYPE sData1[2][1] = { {0.0F},
{1.0F} };
DTYPE sData2[2][2] = { {2.0F, 3.0F},
{4.0F, 5.0F} };
DTYPE answer[2][3] = { {0.0F, 2.0F, 3.0F},
{1.0F, 4.0F, 5.0F} };
/* CPU test */
bool cpuTest = true;
......@@ -412,7 +450,7 @@ bool TestConcatenate4()
s2->SetData(sData2, sUnitNum2);
t->SetZeroAll();
/* call concatenate function */
/* call Concatenate function */
Concatenate(s1, s2, t, 1);
/* check results */
......@@ -432,21 +470,32 @@ bool TestConcatenate4()
sGPU2->SetData(sData2, sUnitNum2);
tGPU->SetZeroAll();
/* call concatenate function */
/* call Concatenate function */
Concatenate(sGPU1, sGPU2, tGPU, 1);
/* check results */
gpuTest = tGPU->CheckData(answer, tUnitNum);
/* destroy variables */
delete s1, s2, t, sGPU1, sGPU2, tGPU;
delete[] sDimSize1, sDimSize2, tDimSize;
delete s1;
delete s2;
delete t;
delete sGPU1;
delete sGPU2;
delete tGPU;
delete[] sDimSize1;
delete[] sDimSize2;
delete[] tDimSize;
return cpuTest && gpuTest;
#else
/* destroy variables */
delete s1, s2, t;
delete[] sDimSize1, sDimSize2, tDimSize;
/* destroy variables */
delete s1;
delete s2;
delete t;
delete[] sDimSize1;
delete[] sDimSize2;
delete[] tDimSize;
return cpuTest;
#endif // USE_CUDA
......@@ -458,10 +507,9 @@ TODO!!
*/
/* test for Concatenate Function */
extern "C"
bool TestConcatenate()
{
XPRINT(0, stdout, "[TEST CONCATENATE] -------------\n");
XPRINT(0, stdout, "[TEST CONCATENATE] concatenate a list of tensors or two tensors along a given dimension \n");
bool returnFlag = true, caseFlag = true;
/* case 1 test */
......
......@@ -22,7 +22,7 @@
#ifndef __TEST_CONCATENATE_H__
#define __TEST_CONCATENATE_H__
#include "../core/Concatenate.h"
#include "../core/shape/Concatenate.h"
namespace nts { // namespace nts(NiuTrans.Tensor)
......
......@@ -19,23 +19,21 @@
* $Created by: Lin Ye (email: linye2015@outlook.com) 2018-06-14
*/
#include "../XTensor.h"
#include "../XDevice.h"
#include "../core/ConcatenateSolely.h"
#include "../XList.h"
#include "TConcatenateSolely.h"
namespace nts { // namespace nt(NiuTrans.Tensor)
/* case 1: concatenate a list of tensors along a given dimension
* In this case, 2 * (2 * 1) -> (2 * 2), dim=1.
/*
case 1: concatenate a list of tensors along a given dimension
In this case, 2 * (2, 1) -> (2, 2), dim=1.
*/
bool TestConcatenateSolely1()
{
/* create list */
XList sList;
sList = XList();
XList * sList = new XList();
/* a source tensor of size 2 * 1 */
/* a source tensor of size (2, 1) */
int sOrder1 = 2;
int * sDimSize1 = new int[sOrder1];
sDimSize1[0] = 2;
......@@ -45,7 +43,7 @@ bool TestConcatenateSolely1()
for (int i = 0; i < sOrder1; i++)
sUnitNum1 *= sDimSize1[i];
/* a source tensor of size 2 * 1 */
/* a source tensor of size (2, 1) */
int sOrder2 = 2;
int * sDimSize2 = new int[sOrder2];
sDimSize2[0] = 2;
......@@ -55,7 +53,7 @@ bool TestConcatenateSolely1()
for (int i = 0; i < sOrder2; i++)
sUnitNum2 *= sDimSize2[i];
/* a target tensor of size 2 * 2 */
/* a target tensor of size (2, 2) */
int tOrder = 2;
int * tDimSize = new int[tOrder];
tDimSize[0] = 2;
......@@ -65,12 +63,12 @@ bool TestConcatenateSolely1()
for (int i = 0; i < tOrder; i++)
tUnitNum *= tDimSize[i];
DTYPE sData1[2][1] = { {0.0},
{1.0} };
DTYPE sData2[2][1] = { {2.0},
{3.0} };
DTYPE answer[2][2] = { {0.0, 2.0},
{1.0, 3.0} };
DTYPE sData1[2][1] = { {0.0F},
{1.0F} };
DTYPE sData2[2][1] = { {2.0F},
{3.0F} };
DTYPE answer[2][2] = { {0.0F, 2.0F},
{1.0F, 3.0F} };
/* CPU test */
bool cpuTest = true;
......@@ -86,11 +84,11 @@ bool TestConcatenateSolely1()
t->SetZeroAll();
/* add tensors to list */
sList.Add(s1);
sList.Add(s2);
sList->Add(s1);
sList->Add(s2);
/* call concatenatesolely function */
ConcatenateSolely(&sList, t, 1);
/* call ConcatenateSolely function */
ConcatenateSolely(sList, t, 1);
/* check results */
cpuTest = t->CheckData(answer, tUnitNum);
......@@ -99,9 +97,6 @@ bool TestConcatenateSolely1()
/* GPU test */
bool gpuTest = true;
/* clear list */
sList.Clear();
/* create tensor */
XTensor * sGPU1 = NewTensor(sOrder1, sDimSize1, X_FLOAT, 1.0F, 0);
XTensor * sGPU2 = NewTensor(sOrder2, sDimSize2, X_FLOAT, 1.0F, 0);
......@@ -111,24 +106,36 @@ bool TestConcatenateSolely1()
sGPU1->SetData(sData1, sUnitNum1);
sGPU2->SetData(sData2, sUnitNum2);
tGPU->SetZeroAll();
/* clear list */
sList->Clear();
/* add tensors to list*/
sList.Add(sGPU1);
sList.Add(sGPU2);
sList->Add(sGPU1);
sList->Add(sGPU2);
/* call concatenatesolely function */
ConcatenateSolely(&sList, tGPU, 1);
/* call ConcatenateSolely function */
ConcatenateSolely(sList, tGPU, 1);
/* check results */
gpuTest = tGPU->CheckData(answer, tUnitNum);
/* destroy variables */
delete s1, s2, t, sGPU1, sGPU2, tGPU;
delete[] sDimSize1, sDimSize2, tDimSize;
delete sList;
delete s1;
delete s2;
delete t;
delete sGPU1;
delete sGPU2;
delete tGPU;
delete[] sDimSize1;
delete[] sDimSize2;
delete[] tDimSize;
return cpuTest && gpuTest;
#else
/* destroy variables */
/* destroy variables */
delete sList;
delete s1;
delete s2;
delete t;
......@@ -140,15 +147,16 @@ bool TestConcatenateSolely1()
#endif // USE_CUDA
}
/* case 2: concatenate a list of tensors along a given dimension
* In this case, 2 * (2 * 1) -> (4 * 1), dim=0.
/*
case 2: concatenate a list of tensors along a given dimension
In this case, 2 * (2, 1) -> (4, 1), dim=0.
*/
bool TestConcatenateSolely2()
{
/* create list */
XList sList;
sList = XList();
/* a source tensor of size 2 * 1 */
XList * sList = new XList();
/* a source tensor of size (2, 1) */
int sOrder1 = 2;
int * sDimSize1 = new int[sOrder1];
sDimSize1[0] = 2;
......@@ -158,7 +166,7 @@ bool TestConcatenateSolely2()
for (int i = 0; i < sOrder1; i++)
sUnitNum1 *= sDimSize1[i];
/* a source tensor of size 2 * 1 */
/* a source tensor of size (2, 1) */
int sOrder2 = 2;
int * sDimSize2 = new int[sOrder2];
sDimSize2[0] = 2;
......@@ -168,7 +176,7 @@ bool TestConcatenateSolely2()
for (int i = 0; i < sOrder2; i++)
sUnitNum2 *= sDimSize2[i];
/* a target tensor of size 4 * 1 */
/* a target tensor of size (4, 1) */
int tOrder = 2;
int * tDimSize = new int[tOrder];
tDimSize[0] = 4;
......@@ -178,14 +186,14 @@ bool TestConcatenateSolely2()
for (int i = 0; i < tOrder; i++)
tUnitNum *= tDimSize[i];
DTYPE sData1[2][1] = { {0.0},
{1.0} };
DTYPE sData2[2][1] = { {2.0},
{3.0} };
DTYPE answer[4][1] = { {0.0},
{1.0},
{2.0},
{3.0} };
DTYPE sData1[2][1] = { {0.0F},
{1.0F} };
DTYPE sData2[2][1] = { {2.0F},
{3.0F} };
DTYPE answer[4][1] = { {0.0F},
{1.0F},
{2.0F},
{3.0F} };
/* CPU test */
bool cpuTest = true;
......@@ -201,11 +209,11 @@ bool TestConcatenateSolely2()
t->SetZeroAll();
/* add tensors to list */
sList.Add(s1);
sList.Add(s2);
sList->Add(s1);
sList->Add(s2);
/* call concatenatesolely function */
ConcatenateSolely(&sList, t, 0);
/* call ConcatenateSolely function */
ConcatenateSolely(sList, t, 0);
/* check results */
cpuTest = t->CheckData(answer, tUnitNum);
......@@ -214,9 +222,6 @@ bool TestConcatenateSolely2()
/* GPU test */
bool gpuTest = true;
/* clear list */
sList.Clear();
/* create tensor */
XTensor * sGPU1 = NewTensor(sOrder1, sDimSize1, X_FLOAT, 1.0F, 0);
XTensor * sGPU2 = NewTensor(sOrder2, sDimSize2, X_FLOAT, 1.0F, 0);
......@@ -226,40 +231,57 @@ bool TestConcatenateSolely2()
sGPU1->SetData(sData1, sUnitNum1);
sGPU2->SetData(sData2, sUnitNum2);
tGPU->SetZeroAll();
/* clear list */
sList->Clear();
/* add tensors to list*/
sList.Add(sGPU1);
sList.Add(sGPU2);
sList->Add(sGPU1);
sList->Add(sGPU2);
/* call concatenatesolely function */
ConcatenateSolely(&sList, tGPU, 0);
ConcatenateSolely(sList, tGPU, 0);
/* check results */
gpuTest = tGPU->CheckData(answer, tUnitNum);
/* destroy variables */
delete s1, s2, t, sGPU1, sGPU2, tGPU;
delete[] sDimSize1, sDimSize2, tDimSize;
delete sList;
delete s1;
delete s2;
delete t;
delete sGPU1;
delete sGPU2;
delete tGPU;
delete[] sDimSize1;
delete[] sDimSize2;
delete[] tDimSize;
return cpuTest && gpuTest;
#else
/* destroy variables */
delete s1, s2, t;
delete[] sDimSize1, sDimSize2, tDimSize;
/* destroy variables */
delete sList;
delete s1;
delete s2;
delete t;
delete[] sDimSize1;
delete[] sDimSize2;
delete[] tDimSize;
return cpuTest;
#endif // USE_CUDA
}
/* case 3: concatenate a list of tensors along a given dimension
* In this case, (2 * 1) + (2 * 2) -> (2 * 3), dim=1.
/*
case 3: concatenate a list of tensors along a given dimension
In this case, (2, 1) + (2, 2) -> (2, 3), dim=1.
*/
bool TestConcatenateSolely3()
{
/* create list */
XList sList;
sList = XList();
/* a source tensor of size (2 * 1) */
XList * sList = new XList();
/* a source tensor of size (2, 1) */
int sOrder1 = 2;
int * sDimSize1 = new int[sOrder1];
sDimSize1[0] = 2;
......@@ -269,7 +291,7 @@ bool TestConcatenateSolely3()
for (int i = 0; i < sOrder1; i++)
sUnitNum1 *= sDimSize1[i];
/* a source tensor of size (2 * 2) */
/* a source tensor of size (2, 2) */
int sOrder2 = 2;
int * sDimSize2 = new int[sOrder2];
sDimSize2[0] = 2;
......@@ -279,7 +301,7 @@ bool TestConcatenateSolely3()
for (int i = 0; i < sOrder2; i++)
sUnitNum2 *= sDimSize2[i];
/* a target tensor of size (2 * 3) */
/* a target tensor of size (2, 3) */
int tOrder = 2;
int * tDimSize = new int[tOrder];
tDimSize[0] = 2;
......@@ -289,12 +311,12 @@ bool TestConcatenateSolely3()
for (int i = 0; i < tOrder; i++)
tUnitNum *= tDimSize[i];
DTYPE sData1[2][1] = { {0.0},
{1.0} };
DTYPE sData2[2][2] = { {2.0, 3.0},
{4.0, 5.0} };
DTYPE answer[2][3] = { {0.0, 2.0, 3.0},
{1.0, 4.0, 5.0} };
DTYPE sData1[2][1] = { {0.0F},
{1.0F} };
DTYPE sData2[2][2] = { {2.0F, 3.0F},
{4.0F, 5.0F} };
DTYPE answer[2][3] = { {0.0F, 2.0F, 3.0F},
{1.0F, 4.0F, 5.0F} };
/* CPU test */
bool cpuTest = true;
......@@ -310,11 +332,11 @@ bool TestConcatenateSolely3()
t->SetZeroAll();
/* add tensors to list */
sList.Add(s1);
sList.Add(s2);
sList->Add(s1);
sList->Add(s2);
/* call concatenatesolely function */
ConcatenateSolely(&sList, t, 1);
/* call ConcatenateSolely function */
ConcatenateSolely(sList, t, 1);
/* check results */
cpuTest = t->CheckData(answer, tUnitNum);
......@@ -323,9 +345,6 @@ bool TestConcatenateSolely3()
/* GPU test */
bool gpuTest = true;
/* clear list */
sList.Clear();
/* create tensor */
XTensor * sGPU1 = NewTensor(sOrder1, sDimSize1, X_FLOAT, 1.0F, 0);
XTensor * sGPU2 = NewTensor(sOrder2, sDimSize2, X_FLOAT, 1.0F, 0);
......@@ -335,26 +354,42 @@ bool TestConcatenateSolely3()
sGPU1->SetData(sData1, sUnitNum1);
sGPU2->SetData(sData2, sUnitNum2);
tGPU->SetZeroAll();
/* clear list */
sList->Clear();
/* add tensors to list*/
sList.Add(sGPU1);
sList.Add(sGPU2);
sList->Add(sGPU1);
sList->Add(sGPU2);
/* call concatenatesolely function */
ConcatenateSolely(&sList, tGPU, 1);
/* call ConcatenateSolely function */
ConcatenateSolely(sList, tGPU, 1);
/* check results */
gpuTest = tGPU->CheckData(answer, tUnitNum);
/* destroy variables */
delete s1, s2, t, sGPU1, sGPU2, tGPU;
delete[] sDimSize1, sDimSize2, tDimSize;
delete sList;
delete s1;
delete s2;
delete t;
delete sGPU1;
delete sGPU2;
delete tGPU;
delete[] sDimSize1;
delete[] sDimSize2;
delete[] tDimSize;
return cpuTest && gpuTest;
#else
/* destroy variables */
delete s1, s2, t;
delete[] sDimSize1, sDimSize2, tDimSize;
/* destroy variables */
delete sList;
delete s1;
delete s2;
delete t;
delete[] sDimSize1;
delete[] sDimSize2;
delete[] tDimSize;
return cpuTest;
#endif // USE_CUDA
......@@ -366,10 +401,9 @@ TODO!!
*/
/* test for ConcatenateSolely Function */
extern "C"
bool TestConcatenateSolely()
{
XPRINT(0, stdout, "[TEST CONCATENATESOLELY] -------------\n");
XPRINT(0, stdout, "[TEST CONCATENATESOLELY] concatenate a list of tensors along a given dimension \n");
bool returnFlag = true, caseFlag = true;
/* case 1 test */
......
......@@ -22,7 +22,7 @@
#ifndef __TEST_CONCATENATESOLELY_H__
#define __TEST_CONCATENATESOLELY_H__
#include "../core/ConcatenateSolely.h"
#include "../core/shape/ConcatenateSolely.h"
namespace nts { // namespace nts(NiuTrans.Tensor)
......
/* NiuTrans.Tensor - an open-source tensor library
* Copyright (C) 2017, Natural Language Processing Lab, Northestern University.
* All rights reserved.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
/*
* $Created by: Xu Chen (email: hello_master1954@163.com) 2018-06-27
*/
#include "TCopyIndexed.h"
namespace nts { // namespace nts(NiuTrans.Tensor)
/*
case 1 copy indexed sub-tensors
In this case, (3, 2, 3) -> (3, 2, 2), dim = 2, indexSize = 2,
srcIndex = [0, 2], tgtIndex = [0, 1], copyNum = 1.
*/
bool TestCopyIndexed1()
{
/* a input tensor of size (3, 2, 3) */
int sOrder = 3;
int * sDimSize = new int[sOrder];
sDimSize[0] = 3;
sDimSize[1] = 2;
sDimSize[2] = 3;
int sUnitNum = 1;
for (int i = 0; i < sOrder; i++)
sUnitNum *= sDimSize[i];
/* a output tensor of size (3, 2, 2) */
int tOrder = 3;
int * tDimSize = new int[tOrder];
tDimSize[0] = 3;
tDimSize[1] = 2;
tDimSize[2] = 2;
int tUnitNum = 1;
for (int i = 0; i < tOrder; i++)
tUnitNum *= tDimSize[i];
DTYPE sData[3][2][3] = { { {0.0F, -1.0F, 2.0F},
{2.0F, 1.0F, 3.0F} },
{ {1.0F, 2.0F, 4.0F},
{3.0F, 1.0F, 2.0F}},
{ {-1.0F, 3.0F, 2.0F},
{1.0F, -1.0F, 0.0F} } };
DTYPE answer[3][2][2] = { { {0.0F, 2.0F},
{2.0F, 3.0F} },
{ {1.0F, 4.0F},
{3.0F, 2.0F}},
{ {-1.0F, 2.0F},
{1.0F, 0.0F} } };
int dim = 2;
int indexSize = 2;
int srcIndex[2] = {0, 2};
int tgtIndex[2] = {0, 1};
int copyNum = 1;
/* CPU test */
bool cpuTest = true;
/* create tensors */
XTensor * s = NewTensor(sOrder, sDimSize);
XTensor * t = NewTensor(tOrder, tDimSize);
/* initialize variables */
s->SetData(sData, sUnitNum);
t->SetZeroAll();
/* call CopyIndexed function */
CopyIndexed(s, t, dim, srcIndex, indexSize, tgtIndex, copyNum);
/* check results */
cpuTest = t->CheckData(answer, tUnitNum);
#ifdef USE_CUDA
/* GPU test */
bool gpuTest = true;
/* create tensors */
XTensor * sGPU = NewTensor(sOrder, sDimSize, X_FLOAT, 1.0F, 0);
XTensor * tGPU = NewTensor(sOrder, tDimSize, X_FLOAT, 1.0F, 0);
/* initialize variables */
sGPU->SetData(sData, sUnitNum);
tGPU->SetZeroAll();
/* call CopyIndexed function */
CopyIndexed(sGPU, tGPU, dim, srcIndex, indexSize, tgtIndex, copyNum);
/* check results */
gpuTest = tGPU->CheckData(answer, tUnitNum);
/* destroy variables */
delete s;
delete t;
delete sGPU;
delete tGPU;
delete[] sDimSize;
delete[] tDimSize;
return cpuTest && gpuTest;
#else
/* destroy variables */
delete s;
delete t;
delete[] sDimSize;
delete[] tDimSize;
return cpuTest;
#endif // USE_CUDA
}
/* other cases */
/*
TODO!!
*/
/* test for CopyIndexed Function */
bool TestCopyIndexed()
{
XPRINT(0, stdout, "[TEST CopyIndexed] copy indexed sub-tensors \n");
bool returnFlag = true, caseFlag = true;
/* case 1 test */
caseFlag = TestCopyIndexed1();
if (!caseFlag) {
returnFlag = false;
XPRINT(0, stdout, ">> case 1 failed!\n");
}
else
XPRINT(0, stdout, ">> case 1 passed!\n");
/* other cases test */
/*
TODO!!
*/
if (returnFlag) {
XPRINT(0, stdout, ">> All Passed!\n");
}
else
XPRINT(0, stdout, ">> Failed!\n");
XPRINT(0, stdout, "\n");
return returnFlag;
}
} // namespace nts(NiuTrans.Tensor)
/* NiuTrans.Tensor - an open-source tensor library
* Copyright (C) 2017, Natural Language Processing Lab, Northestern University.
* All rights reserved.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
/*
* $Created by: Xu Chen (email: hello_master1954@163.com) 2018-06-27
*/
#ifndef __TEST_COPYINDEXED_H__
#define __TEST_COPYINDEXED_H__
#include "../core/movement/CopyIndexed.h"
namespace nts { // namespace nts(NiuTrans.Tensor)
/* test for CopyIndexed Function */
extern "C"
bool TestCopyIndexed();
} // namespace nts(NiuTrans.Tensor)
#endif // __TEST_COPYINDEXED_H__
/* NiuTrans.Tensor - an open-source tensor library
* Copyright (C) 2017, Natural Language Processing Lab, Northestern University.
* All rights reserved.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
/*
* $Created by: Xu Chen (email: hello_master1954@163.com) 2018-06-27
*/
#include "../XUtility.h"
#include "TCopyValues.h"
namespace nts { // namespace nts(NiuTrans.Tensor)
/* case 1: copy tensor s to tensor t */
bool TestCopyValues1()
{
/* a input tensor of size (2, 4) */
int sOrder = 2;
int * sDimSize = new int[sOrder];
sDimSize[0] = 2;
sDimSize[1] = 4;
int sUnitNum = 1;
for (int i = 0; i < sOrder; i++)
sUnitNum *= sDimSize[i];
DTYPE sData[2][4] = { {0.0F, 1.0F, 2.0F, 3.0F},
{4.0F, 5.0F, 6.0F, 7.0F} };
DTYPE scaleFactor = 2.0F;
DTYPE shiftFactor = 0.5F;
/* CPU test */
bool cpuTest = true;
/* create tensors */
XTensor * s = NewTensor(sOrder, sDimSize);
XTensor * t = NewTensor(sOrder, sDimSize);
/* initialize variables */
s->SetData(sData, sUnitNum);
t->SetZeroAll();
/* call CopyValues function */
CopyValues(s, t);
/* check results */
cpuTest = t->CheckData(s->data, sUnitNum);
#ifdef USE_CUDA
/* GPU test */
bool gpuTest = true;
/* create tensors */
XTensor * sGPU = NewTensor(sOrder, sDimSize, X_FLOAT, 1.0F, 0);
XTensor * tGPU = NewTensor(sOrder, sDimSize, X_FLOAT, 1.0F, 0);
/* initialize variables */
sGPU->SetData(sData, sUnitNum);
tGPU->SetData(sData, sUnitNum);
/* call CopyValues function */
CopyValues(sGPU, tGPU);
/* check results */
DTYPE * dataGPU = (DTYPE*)sGPU->data;
int size = sUnitNum * sGPU->unitSize;
char * dataCPU = new char[size];
XMemCopy(dataCPU, -1, dataGPU, sGPU->devID, size);
gpuTest = tGPU->CheckData(dataCPU, sUnitNum);
/* destroy variables */
delete s;
delete t;
delete sGPU;
delete tGPU;
delete[] sDimSize;
return cpuTest && gpuTest;
#else
/* destroy variables */
delete s;
delete t;
delete[] sDimSize;
return cpuTest;
#endif // USE_CUDA
}
/* other cases */
/*
TODO!!
*/
/* test for CopyValues Function */
bool TestCopyValues()
{
XPRINT(0, stdout, "[TEST CopyValues] copy tensor s to tensor t \n");
bool returnFlag = true, caseFlag = true;
/* case 1 test */
caseFlag = TestCopyValues1();
if (!caseFlag) {
returnFlag = false;
XPRINT(0, stdout, ">> case 1 failed!\n");
}
else
XPRINT(0, stdout, ">> case 1 passed!\n");
/* other cases test */
/*
TODO!!
*/
if (returnFlag) {
XPRINT(0, stdout, ">> All Passed!\n");
}
else
XPRINT(0, stdout, ">> Failed!\n");
XPRINT(0, stdout, "\n");
return returnFlag;
}
} // namespace nts(NiuTrans.Tensor)
/* NiuTrans.Tensor - an open-source tensor library
* Copyright (C) 2017, Natural Language Processing Lab, Northestern University.
* All rights reserved.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
/*
* $Created by: Xu Chen (email: hello_master1954@163.com) 2018-06-27
*/
#ifndef __TEST_COPYVALUES_H__
#define __TEST_COPYVALUES_H__
#include "../core/movement/CopyValues.h"
namespace nts { // namespace nts(NiuTrans.Tensor)
/* test for CopyValues Function */
extern "C"
bool TestCopyValues();
} // namespace nts(NiuTrans.Tensor)
#endif // __TEST_COPYVALUES_H__
......@@ -19,16 +19,14 @@
* $Created by: Lin Ye (email: linye2015@outlook.com) 2018-06-20
*/
#include "../XTensor.h"
#include "../XDevice.h"
#include "../function/HardTanH.h"
#include "THardTanH.h"
namespace nts { // namespace nts(NiuTrans.Tensor)
/* case 1: hard tanh function */
bool TestHardTanH1()
{
/* a x tensor of size 2 * 3 */
/* a x tensor of size (2, 3) */
int xOrder = 2;
int * xDimSize = new int[xOrder];
xDimSize[0] = 2;
......@@ -38,7 +36,7 @@ bool TestHardTanH1()
for (int i = 0; i < xOrder; i++)
xUnitNum *= xDimSize[i];
/* a y tensor of size 2 * 3 */
/* a y tensor of size (2, 3) */
int yOrder = 2;
int * yDimSize = new int[yOrder];
yDimSize[0] = 2;
......@@ -48,10 +46,10 @@ bool TestHardTanH1()
for (int i = 0; i < yOrder; i++)
yUnitNum *= yDimSize[i];
DTYPE xData[2][3] = { {0.5, -1.0, 2.0},
{3.5, -4.5, 1.0} };
DTYPE answer[2][3] = { {0.5, -1.0, 1.0},
{1.0, -1.0, 1.0} };
DTYPE xData[2][3] = { {0.5F, -1.0F, 2.0F},
{3.5F, -4.5F, 1.0F} };
DTYPE answer[2][3] = { {0.5F, -1.0F, 1.0F},
{1.0F, -1.0F, 1.0F} };
/* CPU test */
bool cpuTest = true;
......@@ -68,7 +66,7 @@ bool TestHardTanH1()
HardTanH(x, y);
/* check results */
cpuTest = y->CheckData(answer, yUnitNum);
cpuTest = y->CheckData(answer, yUnitNum, 1e-4F);
#ifdef USE_CUDA
/* GPU test */
......@@ -86,30 +84,35 @@ bool TestHardTanH1()
HardTanH(xGPU, yGPU);
/* check results */
gpuTest = yGPU->CheckData(answer, yUnitNum);
gpuTest = yGPU->CheckData(answer, yUnitNum, 1e-4F);
/* destroy variables */
delete x, y, xGPU, yGPU;
delete[] xDimSize, yDimSize;
delete x;
delete y;
delete xGPU;
delete yGPU;
delete[] xDimSize;
delete[] yDimSize;
return cpuTest && gpuTest;
#else
/* destroy variables */
delete x;
delete x;
delete y;
delete[] xDimSize;
delete[] xDimSize;
delete[] yDimSize;
return cpuTest;
#endif // USE_CUDA
}
/* case 2: backward computation
* In this case, lossName=CROSSENTROPY.
/*
case 2: backward computation
In this case, lossName=CROSSENTROPY.
*/
bool TestHardTanH2()
{
/* a x tensor of size 2 * 3 */
/* a x tensor of size (2, 3) */
int xOrder = 2;
int * xDimSize = new int[xOrder];
xDimSize[0] = 2;
......@@ -119,7 +122,7 @@ bool TestHardTanH2()
for (int i = 0; i < xOrder; i++)
xUnitNum *= xDimSize[i];
/* a y tensor of size 2 * 3 */
/* a y tensor of size (2, 3) */
int yOrder = 2;
int * yDimSize = new int[yOrder];
yDimSize[0] = 2;
......@@ -129,7 +132,7 @@ bool TestHardTanH2()
for (int i = 0; i < yOrder; i++)
yUnitNum *= yDimSize[i];
/* a gold tensor of size 2 * 3 */
/* a gold tensor of size (2, 3) */
int goldOrder = 2;
int * goldDimSize = new int[goldOrder];
goldDimSize[0] = 2;
......@@ -139,7 +142,7 @@ bool TestHardTanH2()
for (int i = 0; i < goldOrder; i++)
goldUnitNum *= goldDimSize[i];
/* a dedy tensor of size 2 * 3 */
/* a dedy tensor of size (2, 3) */
int dedyOrder = 2;
int * dedyDimSize = new int[dedyOrder];
dedyDimSize[0] = 2;
......@@ -149,7 +152,7 @@ bool TestHardTanH2()
for (int i = 0; i < dedyOrder; i++)
dedyUnitNum *= dedyDimSize[i];
/* a dedx tensor of size 2 * 3 */
/* a dedx tensor of size (2, 3) */
int dedxOrder = 2;
int * dedxDimSize = new int[dedxOrder];
dedxDimSize[0] = 2;
......@@ -159,16 +162,16 @@ bool TestHardTanH2()
for (int i = 0; i < dedxOrder; i++)
dedxUnitNum *= dedxDimSize[i];
DTYPE xData[2][3] = { {0.5, -1.0, 2.0},
{3.5, -4.5, 1.0} };
DTYPE yData[2][3] = { {0.5, -1.0, 1.0},
{1.0, -1.0, 1.0} };
DTYPE goldData[2][3] = { {1.0, 1.0, 1.0},
{1.0, 1.0, 1.0} };
DTYPE dedyData[2][3] = { {-2.0, 1.0, -1.0},
{-1.0, 1.0, -1.0} };
DTYPE answer[2][3] = { {-2.0, 1.0, 0.0},
{0.0, 0.0, -1.0} };
DTYPE xData[2][3] = { {0.5F, -1.0F, 2.0F},
{3.5F, -4.5F, 1.0F} };
DTYPE yData[2][3] = { {0.5F, -1.0F, 1.0F},
{1.0F, -1.0F, 1.0F} };
DTYPE goldData[2][3] = { {1.0F, 1.0F, 1.0F},
{1.0F, 1.0F, 1.0F} };
DTYPE dedyData[2][3] = { {-2.0F, 1.0F, -1.0F},
{-1.0F, 1.0F, -1.0F} };
DTYPE answer[2][3] = { {-2.0F, 1.0F, 0.0F},
{0.0F, 0.0F, -1.0F} };
/* CPU test */
bool cpuTest = true;
......@@ -231,12 +234,13 @@ bool TestHardTanH2()
#endif // USE_CUDA
}
/* case 3: backward computation
* In this case, lossName=SQUAREDERROR.
/*
case 3: backward computation
In this case, lossName=SQUAREDERROR.
*/
bool TestHardTanH3()
{
/* a x tensor of size 2 * 3 */
/* a x tensor of size (2, 3) */
int xOrder = 2;
int * xDimSize = new int[xOrder];
xDimSize[0] = 2;
......@@ -246,7 +250,7 @@ bool TestHardTanH3()
for (int i = 0; i < xOrder; i++)
xUnitNum *= xDimSize[i];
/* a y tensor of size 2 * 3 */
/* a y tensor of size (2, 3) */
int yOrder = 2;
int * yDimSize = new int[yOrder];
yDimSize[0] = 2;
......@@ -256,7 +260,7 @@ bool TestHardTanH3()
for (int i = 0; i < yOrder; i++)
yUnitNum *= yDimSize[i];
/* a gold tensor of size 2 * 3 */
/* a gold tensor of size (2, 3) */
int goldOrder = 2;
int * goldDimSize = new int[goldOrder];
goldDimSize[0] = 2;
......@@ -266,7 +270,7 @@ bool TestHardTanH3()
for (int i = 0; i < goldOrder; i++)
goldUnitNum *= goldDimSize[i];
/* a dedy tensor of size 2 * 3 */
/* a dedy tensor of size (2, 3) */
int dedyOrder = 2;
int * dedyDimSize = new int[dedyOrder];
dedyDimSize[0] = 2;
......@@ -276,7 +280,7 @@ bool TestHardTanH3()
for (int i = 0; i < dedyOrder; i++)
dedyUnitNum *= dedyDimSize[i];
/* a dedx tensor of size 2 * 3 */
/* a dedx tensor of size (2, 3) */
int dedxOrder = 2;
int * dedxDimSize = new int[dedxOrder];
dedxDimSize[0] = 2;
......@@ -286,16 +290,16 @@ bool TestHardTanH3()
for (int i = 0; i < dedxOrder; i++)
dedxUnitNum *= dedxDimSize[i];
DTYPE xData[2][3] = { {0.5, -1.0, 2.0},
{3.5, -4.5, 1.0} };
DTYPE yData[2][3] = { {0.5, -1.0, 1.0},
{1.0, -1.0, 1.0} };
DTYPE goldData[2][3] = { {1.0, 1.0, 1.0},
{1.0, 1.0, 1.0} };
DTYPE dedyData[2][3] = { {-0.5, -2.0, 0.0 },
{0.0, -2.0, 0.0 } };
DTYPE answer[2][3] = { {-0.5, -2.0, 0.0},
{0.0, 0.0, 0.0} };
DTYPE xData[2][3] = { {0.5F, -1.0F, 2.0F},
{3.5F, -4.5F, 1.0F} };
DTYPE yData[2][3] = { {0.5F, -1.0F, 1.0F},
{1.0F, -1.0F, 1.0F} };
DTYPE goldData[2][3] = { {1.0F, 1.0F, 1.0F},
{1.0F, 1.0F, 1.0F} };
DTYPE dedyData[2][3] = { {-0.5F, -2.0F, 0.0F },
{0.0F, -2.0F, 0.0F } };
DTYPE answer[2][3] = { {-0.5F, -2.0F, 0.0F},
{0.0F, 0.0F, 0.0F} };
/* CPU test */
bool cpuTest = true;
......@@ -358,12 +362,13 @@ bool TestHardTanH3()
#endif // USE_CUDA
}
/* case 4: backward computation
* In this case, lossName=ONEHOTERROR.
/*
case 4: backward computation
In this case, lossName=ONEHOTERROR.
*/
bool TestHardTanH4()
{
/* a x tensor of size 2 * 3 */
/* a x tensor of size (2, 3) */
int xOrder = 2;
int * xDimSize = new int[xOrder];
xDimSize[0] = 2;
......@@ -373,7 +378,7 @@ bool TestHardTanH4()
for (int i = 0; i < xOrder; i++)
xUnitNum *= xDimSize[i];
/* a y tensor of size 2 * 3 */
/* a y tensor of size (2, 3) */
int yOrder = 2;
int * yDimSize = new int[yOrder];
yDimSize[0] = 2;
......@@ -383,7 +388,7 @@ bool TestHardTanH4()
for (int i = 0; i < yOrder; i++)
yUnitNum *= yDimSize[i];
/* a gold tensor of size 2 * 3 */
/* a gold tensor of size (2, 3) */
int goldOrder = 2;
int * goldDimSize = new int[goldOrder];
goldDimSize[0] = 2;
......@@ -393,7 +398,7 @@ bool TestHardTanH4()
for (int i = 0; i < goldOrder; i++)
goldUnitNum *= goldDimSize[i];
/* a dedy tensor of size 2 * 3 */
/* a dedy tensor of size (2, 3) */
int dedyOrder = 2;
int * dedyDimSize = new int[dedyOrder];
dedyDimSize[0] = 2;
......@@ -403,7 +408,7 @@ bool TestHardTanH4()
for (int i = 0; i < dedyOrder; i++)
dedyUnitNum *= dedyDimSize[i];
/* a dedx tensor of size 2 * 3 */
/* a dedx tensor of size (2, 3) */
int dedxOrder = 2;
int * dedxDimSize = new int[dedxOrder];
dedxDimSize[0] = 2;
......@@ -413,16 +418,16 @@ bool TestHardTanH4()
for (int i = 0; i < dedxOrder; i++)
dedxUnitNum *= dedxDimSize[i];
DTYPE xData[2][3] = { {0.5, -1.0, 2.0},
{3.5, -4.5, 1.0} };
DTYPE yData[2][3] = { {0.5, -1.0, 1.0},
{1.0, -1.0, 1.0} };
DTYPE goldData[2][3] = { {1.0, 0.0, 1.0},
{0.0, 1.0, 1.0} };
DTYPE dedyData[2][3] = { {-0.5, 0.0, 0.0},
{0.0, -2.0, 0.0} };
DTYPE answer[2][3] = { {-0.5, 0.0, 0.0},
{0.0, 0.0, 0.0} };
DTYPE xData[2][3] = { {0.5F, -1.0F, 2.0F},
{3.5F, -4.5F, 1.0F} };
DTYPE yData[2][3] = { {0.5F, -1.0F, 1.0F},
{1.0F, -1.0F, 1.0F} };
DTYPE goldData[2][3] = { {1.0F, 0.0F, 1.0F},
{0.0F, 1.0F, 1.0F} };
DTYPE dedyData[2][3] = { {-0.5F, 0.0F, 0.0F},
{0.0F, -2.0F, 0.0F} };
DTYPE answer[2][3] = { {-0.5F, 0.0F, 0.0F},
{0.0F, 0.0F, 0.0F} };
/* CPU test */
bool cpuTest = true;
......@@ -491,10 +496,9 @@ TODO!!
*/
/* test for HardTanH Function */
extern "C"
bool TestHardTanH()
{
XPRINT(0, stdout, "[TEST HARDTANH] -------------\n");
XPRINT(0, stdout, "[TEST HARDTANH] test hardtanh and its backward computation \n");
bool returnFlag = true, caseFlag = true;
/* case 1 test */
......
/* NiuTrans.Tensor - an open-source tensor library
* Copyright (C) 2017, Natural Language Processing Lab, Northestern University.
* All rights reserved.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
/*
* $Created by: Xu Chen (email: hello_master1954@163.com) 2018-06-29
*/
#include "../XUtility.h"
#include "TIdentity.h"
namespace nts { // namespace nts(NiuTrans.Tensor)
/*
case 1: test Identity function.
Identity function: y = x
*/
bool TestIdentity1()
{
/* a input tensor of size (2, 3) */
int sOrder = 2;
int * sDimSize = new int[sOrder];
sDimSize[0] = 2;
sDimSize[1] = 3;
int sUnitNum = 1;
for (int i = 0; i < sOrder; i++)
sUnitNum *= sDimSize[i];
DTYPE xData[2][3] = { {0.0F, 1.0F, 2.0F},
{0.5F, 0.7F, 1.4F} };
DTYPE answer[2][3] = { {0.0F, 1.0F, 2.0F},
{0.5F, 0.7F, 1.4F} };
/* CPU test */
bool cpuTest = true;
/* create tensors */
XTensor * x = NewTensor(sOrder, sDimSize);
XTensor * y = NewTensor(sOrder, sDimSize);
/* initialize variables */
x->SetData(xData, sUnitNum);
y->SetZeroAll();
/* call Identity function */
Identity(x, y);
/* check result */
cpuTest = y->CheckData(answer, sUnitNum);
#ifdef USE_CUDA
/* GPU test */
bool gpuTest = true;
/* create tensors */
XTensor * xGPU = NewTensor(sOrder, sDimSize, X_FLOAT, 1.0F, 0);
XTensor * yGPU = NewTensor(sOrder, sDimSize, X_FLOAT, 1.0F, 0);
/* initialize variables */
xGPU->SetData(xData, sUnitNum);
yGPU->SetZeroAll();
/* call Identity function */
Identity(xGPU, yGPU);
/* check result */
gpuTest = yGPU->CheckData(answer, sUnitNum);
/* destroy variables */
delete x, y;
delete xGPU, yGPU;
delete[] sDimSize;
return cpuTest && gpuTest;
#else
/* destroy variables */
delete x, y;
delete[] sDimSize;
return cpuTest;
#endif // USE_CUDA
}
/*
case 2: test IdentityBackward function.
IdentityBackward function: dE/dx = dE/dy * dy/dx = dE/dy
*/
bool TestIdentity2()
{
int sOrder = 2;
int * sDimSize = new int[sOrder];
sDimSize[0] = 1;
sDimSize[1] = 3;
int sUnitNum = 1;
for (int i = 0; i < sOrder; i++)
sUnitNum *= sDimSize[i];
DTYPE xData[1][3] = { {0.0F, 1.0F, 2.0F} };
DTYPE gData[1][3] = { {0.0F, 0.0F, 1.0F} };
DTYPE dedxAnswer[3] = {0.090031F, 0.244728F, -0.334759F};
/* CPU test */
bool cpuTest = true;
/* create tensors */
XTensor * x = NewTensor(sOrder, sDimSize);
XTensor * y = NewTensor(sOrder, sDimSize);
XTensor * g = NewTensor(sOrder, sDimSize);
XTensor * dedy = NewTensor(sOrder, sDimSize);
XTensor * dedx = NewTensor(sOrder, sDimSize);
/* initialize variables */
x->SetData(xData, sUnitNum);
g->SetData(gData, sUnitNum);
y->SetZeroAll();
dedx->SetZeroAll();
dedy->SetZeroAll();
/* call Identity function */
Identity(x, y);
/* call IdentityBackward function */
IdentityBackward(g, y, x, dedy, dedx, CROSSENTROPY);
/* check result */
cpuTest = dedx->CheckData(dedxAnswer, sUnitNum, 1e-4F);
#ifdef USE_CUDA
/* GPU test */
bool gpuTest = true;
/* create tensors */
XTensor * xGPU = NewTensor(sOrder, sDimSize, X_FLOAT, 1.0F, 0);
XTensor * yGPU = NewTensor(sOrder, sDimSize, X_FLOAT, 1.0F, 0);
XTensor * gGPU = NewTensor(sOrder, sDimSize, X_FLOAT, 1.0F, 0);
XTensor * dedyGPU = NewTensor(sOrder, sDimSize, X_FLOAT, 1.0F, 0);
XTensor * dedxGPU = NewTensor(sOrder, sDimSize, X_FLOAT, 1.0F, 0);
/* initialize variables */
xGPU->SetData(xData, sUnitNum);
gGPU->SetData(gData, sUnitNum);
yGPU->SetZeroAll();
dedxGPU->SetZeroAll();
dedyGPU->SetZeroAll();
/* call Identity function */
Identity(xGPU, yGPU);
/* call IdentityBackward function */
IdentityBackward(gGPU, yGPU, xGPU, dedyGPU, dedxGPU, CROSSENTROPY);
/* check result */
gpuTest = dedxGPU->CheckData(dedxAnswer, sUnitNum, 1e-4F);
/* destroy variables */
delete x;
delete y;
delete g;
delete dedx;
delete dedy;
delete xGPU;
delete yGPU;
delete gGPU;
delete dedxGPU;
delete dedyGPU;
delete[] sDimSize;
return cpuTest && gpuTest;
#else
/* destroy variables */
delete x;
delete y;
delete g;
delete dedx;
delete dedy;
delete[] sDimSize;
return cpuTest;
#endif // USE_CUDA
}
/* other cases */
/*
TODO!!
*/
/* test for Identity Function */
bool TestIdentity()
{
XPRINT(0, stdout, "[TEST Identity] identity function and its backward computation \n");
bool returnFlag = true, caseFlag = true;
/* case 1 test */
caseFlag = TestIdentity1();
if (!caseFlag) {
returnFlag = false;
XPRINT(0, stdout, ">> case 1 failed!\n");
}
else
XPRINT(0, stdout, ">> case 1 passed!\n");
/* case 2 test */
caseFlag = TestIdentity2();
if (!caseFlag) {
returnFlag = false;
XPRINT(0, stdout, ">> case 2 failed!\n");
}
else
XPRINT(0, stdout, ">> case 2 passed!\n");
/* other cases test */
/*
TODO!!
*/
if (returnFlag) {
XPRINT(0, stdout, ">> All Passed!\n");
}
else
XPRINT(0, stdout, ">> Failed!\n");
XPRINT(0, stdout, "\n");
return returnFlag;
}
} // namespace nts(NiuTrans.Tensor)
/* NiuTrans.Tensor - an open-source tensor library
* Copyright (C) 2017, Natural Language Processing Lab, Northestern University.
* All rights reserved.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
/*
* $Created by: Xu Chen (email: hello_master1954@163.com) 2018-06-29
*/
#ifndef __TEST_IDENTITY_H__
#define __TEST_IDENTITY_H__
#include "../function/Identity.h"
namespace nts { // namespace nts(NiuTrans.Tensor)
/* test for Identity Function */
extern "C"
bool TestIdentity();
} // namespace nts(NiuTrans.Tensor)
#endif // __TEST_IDENTITY_H__
/* NiuTrans.Tensor - an open-source tensor library
* Copyright (C) 2017, Natural Language Processing Lab, Northestern University.
* All rights reserved.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
/*
* $Created by: Xu Chen (email: hello_master1954@163.com) 2018-07-02
*/
#include "../XUtility.h"
#include "TLogSoftmax.h"
namespace nts { // namespace nts(NiuTrans.Tensor)
/*
case 1: test LogSoftmax function.
LogSoftmax function: y = log(e^x / \sum_{i} e^{x_i})
*/
bool TestLogSoftmax1()
{
/* a input tensor of size (2, 3) */
int sOrder = 2;
int * sDimSize = new int[sOrder];
sDimSize[0] = 2;
sDimSize[1] = 3;
int sUnitNum = 1;
for (int i = 0; i < sOrder; i++)
sUnitNum *= sDimSize[i];
DTYPE xData[2][3] = { {0.0F, 1.0F, 2.0F},
{0.5F, 0.7F, 1.4F} };
DTYPE answer[2][3] = { {-2.4076F, -1.4076F, -0.4076F},
{-1.5435F, -1.3435F, -0.6435F} };
/* CPU test */
bool cpuTest = true;
/* create tensors */
XTensor * x = NewTensor(sOrder, sDimSize);
XTensor * y = NewTensor(sOrder, sDimSize);
/* initialize variables */
x->SetData(xData, sUnitNum);
y->SetZeroAll();
/* call LogSoftmax function */
LogSoftmax(x, y, 1);
/* check result */
cpuTest = y->CheckData(answer, sUnitNum, 1e-4F);
#ifdef USE_CUDA
/* GPU test */
bool gpuTest = true;
/* create tensors */
XTensor * xGPU = NewTensor(sOrder, sDimSize, X_FLOAT, 1.0F, 0);
XTensor * yGPU = NewTensor(sOrder, sDimSize, X_FLOAT, 1.0F, 0);
/* initialize variables */
xGPU->SetData(xData, sUnitNum);
yGPU->SetZeroAll();
/* call LogSoftmax function */
LogSoftmax(xGPU, yGPU, 1);
/* check result */
gpuTest = yGPU->CheckData(answer, sUnitNum, 1e-4F);
/* destroy variables */
delete x;
delete y;
delete xGPU;
delete yGPU;
delete[] sDimSize;
return cpuTest && gpuTest;
#else
/* destroy variables */
delete x;
delete y;
delete z;
delete[] sDimSize;
return cpuTest;
#endif // USE_CUDA
}
/*
case 2: test LogSoftmaxBackward function.
dE/dx = dE/dy * dy/dx
log softmax: y_i = log(e^{x_i} / \sum_{k} e^{x_k})
*/
bool TestLogSoftmax2()
{
/* a input tensor of size (3) */
int sOrder = 1;
int * sDimSize = new int[sOrder];
sDimSize[0] = 3;
int sUnitNum = 1;
for (int i = 0; i < sOrder; i++)
sUnitNum *= sDimSize[i];
DTYPE xData[3] = {0.0F, 1.0F, 2.0F};
DTYPE gData[3] = {0.5F, 0.8F, 1.5F};
DTYPE yAnswer[3] = {-2.4076F, -1.4076F, -0.4076F};
DTYPE dedxAnswer[3] = {-0.409969F, -0.555272F, -0.834759F};
/* CPU test */
bool cpuTest = true;
/* create tensors */
XTensor * x = NewTensor(sOrder, sDimSize);
XTensor * y = NewTensor(sOrder, sDimSize);
XTensor * g = NewTensor(sOrder, sDimSize);
XTensor * dedy = NewTensor(sOrder, sDimSize);
XTensor * dedx = NewTensor(sOrder, sDimSize);
/* initialize variables */
x->SetData(xData, sUnitNum);
g->SetData(gData, sUnitNum);
y->SetZeroAll();
dedx->SetZeroAll();
dedy->SetZeroAll();
/* call LogSoftmax function */
LogSoftmax(x, y, 0);
/* call LogSoftmaxBackward function */
LogSoftmaxBackward(g, y, x, dedy, dedx, 0, CROSSENTROPY);
/* check result */
cpuTest = y->CheckData(yAnswer, sUnitNum, 1e-4F) && dedx->CheckData(dedxAnswer, sUnitNum, 1e-4F);
#ifdef USE_CUDA
/* GPU test */
bool gpuTest = true;
/* create tensors */
XTensor * xGPU = NewTensor(sOrder, sDimSize, X_FLOAT, 1.0F, 0);
XTensor * yGPU = NewTensor(sOrder, sDimSize, X_FLOAT, 1.0F, 0);
XTensor * gGPU = NewTensor(sOrder, sDimSize, X_FLOAT, 1.0F, 0);
XTensor * dedyGPU = NewTensor(sOrder, sDimSize, X_FLOAT, 1.0F, 0);
XTensor * dedxGPU = NewTensor(sOrder, sDimSize, X_FLOAT, 1.0F, 0);
/* initialize variables */
xGPU->SetData(xData, sUnitNum);
gGPU->SetData(gData, sUnitNum);
yGPU->SetZeroAll();
dedxGPU->SetZeroAll();
dedyGPU->SetZeroAll();
/* call LogSoftmax function */
LogSoftmax(xGPU, yGPU, 0);
/* call LogSoftmaxBackward function */
LogSoftmaxBackward(gGPU, yGPU, xGPU, dedyGPU, dedxGPU, 0, CROSSENTROPY);
/* check result */
gpuTest = yGPU->CheckData(yAnswer, sUnitNum, 1e-4F) && dedxGPU->CheckData(dedxAnswer, sUnitNum, 1e-4F);
/* destroy variables */
delete x;
delete y;
delete g;
delete dedx;
delete dedy;
delete xGPU;
delete yGPU;
delete gGPU;
delete dedxGPU;
delete dedyGPU;
delete[] sDimSize;
return cpuTest && gpuTest;
#else
/* destroy variables */
delete x;
delete y;
delete g;
delete dedx;
delete dedy;
delete[] sDimSize;
return cpuTest;
#endif // USE_CUDA
}
/*
case 3: test LogSoftmaxBackward function.
dE/dx = dE/dy * dy/dx
log softmax: y_i = log(e^{x_i} / \sum_{k} e^{x_k})
*/
bool TestLogSoftmax3()
{
/* a tensor of size (1, 3) */
int sOrder = 2;
int * sDimSize = new int[sOrder];
sDimSize[0] = 1;
sDimSize[1] = 3;
int sUnitNum = 1;
for (int i = 0; i < sOrder; i++)
sUnitNum *= sDimSize[i];
DTYPE xData[1][3] = { {0.0F, 1.0F, 2.0F} };
DTYPE gData[1][3] = { {0.5F, 0.8F, 1.5F} };
DTYPE yAnswer[1][3] = {-2.4076F, -1.4076F, -0.4076F};
DTYPE dedxAnswer[1][3] = {-0.409969F, -0.555272F, -0.834759F};
/* CPU test */
bool cpuTest = true;
/* create tensors */
XTensor * x = NewTensor(sOrder, sDimSize);
XTensor * y = NewTensor(sOrder, sDimSize);
XTensor * g = NewTensor(sOrder, sDimSize);
XTensor * dedy = NewTensor(sOrder, sDimSize);
XTensor * dedx = NewTensor(sOrder, sDimSize);
/* initialize variables */
x->SetData(xData, sUnitNum);
g->SetData(gData, sUnitNum);
y->SetZeroAll();
dedx->SetZeroAll();
dedy->SetZeroAll();
/* call LogSoftmax function */
LogSoftmax(x, y, 1);
/* call LogSoftmaxBackward function */
LogSoftmaxBackward(g, y, x, dedy, dedx, 1, CROSSENTROPY);
/* check result */
cpuTest = y->CheckData(yAnswer, sUnitNum, 1e-4F) && dedx->CheckData(dedxAnswer, sUnitNum, 1e-4F);
#ifdef USE_CUDA
/* GPU test */
bool gpuTest = true;
/* create tensors */
XTensor * xGPU = NewTensor(sOrder, sDimSize, X_FLOAT, 1.0F, 0);
XTensor * yGPU = NewTensor(sOrder, sDimSize, X_FLOAT, 1.0F, 0);
XTensor * gGPU = NewTensor(sOrder, sDimSize, X_FLOAT, 1.0F, 0);
XTensor * dedyGPU = NewTensor(sOrder, sDimSize, X_FLOAT, 1.0F, 0);
XTensor * dedxGPU = NewTensor(sOrder, sDimSize, X_FLOAT, 1.0F, 0);
/* initialize variables */
xGPU->SetData(xData, sUnitNum);
gGPU->SetData(gData, sUnitNum);
yGPU->SetZeroAll();
dedxGPU->SetZeroAll();
dedyGPU->SetZeroAll();
/* call LogSoftmax function */
LogSoftmax(xGPU, yGPU, 1);
/* call LogSoftmaxBackward function */
LogSoftmaxBackward(gGPU, yGPU, xGPU, dedyGPU, dedxGPU, 1, CROSSENTROPY);
/* check result */
gpuTest = yGPU->CheckData(yAnswer, sUnitNum, 1e-4F) && dedxGPU->CheckData(dedxAnswer, sUnitNum, 1e-4F);
/* destroy variables */
delete x;
delete y;
delete g;
delete dedx;
delete dedy;
delete xGPU;
delete yGPU;
delete gGPU;
delete dedxGPU;
delete dedyGPU;
delete[] sDimSize;
return cpuTest && gpuTest;
#else
/* destroy variables */
delete x;
delete y;
delete g;
delete dedx;
delete dedy;
delete[] sDimSize;
return cpuTest;
#endif // USE_CUDA
}
/* other cases */
/*
TODO!!
*/
/* test for LogSoftmax Function */
bool TestLogSoftmax()
{
XPRINT(0, stdout, "[TEST LogSoftmax] test log softmax function and its backward computation \n");
bool returnFlag = true, caseFlag = true;
/* case 1 test */
caseFlag = TestLogSoftmax1();
if (!caseFlag) {
returnFlag = false;
XPRINT(0, stdout, ">> case 1 failed!\n");
}
else
XPRINT(0, stdout, ">> case 1 passed!\n");
/* case 2 test */
caseFlag = TestLogSoftmax2();
if (!caseFlag) {
returnFlag = false;
XPRINT(0, stdout, ">> case 2 failed!\n");
}
else
XPRINT(0, stdout, ">> case 2 passed!\n");
/* case 3 test */
caseFlag = TestLogSoftmax3();
if (!caseFlag) {
returnFlag = false;
XPRINT(0, stdout, ">> case 3 failed!\n");
}
else
XPRINT(0, stdout, ">> case 3 passed!\n");
/* other cases test */
/*
TODO!!
*/
if (returnFlag) {
XPRINT(0, stdout, ">> All Passed!\n");
}
else
XPRINT(0, stdout, ">> Failed!\n");
XPRINT(0, stdout, "\n");
return returnFlag;
}
} // namespace nts(NiuTrans.Tensor)
/* NiuTrans.Tensor - an open-source tensor library
* Copyright (C) 2017, Natural Language Processing Lab, Northestern University.
* All rights reserved.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
/*
* $Created by: Xu Chen (email: hello_master1954@163.com) 2018-07-02
*/
#ifndef __TEST_LOGSOFTMAX_H__
#define __TEST_LOGSOFTMAX_H__
#include "../function/LogSoftmax.h"
namespace nts { // namespace nts(NiuTrans.Tensor)
/* test for LogSoftmax Function */
extern "C"
bool TestLogSoftmax();
} // namespace nts(NiuTrans.Tensor)
#endif // __TEST_LOGSOFTMAX_H__
......@@ -19,20 +19,23 @@
* $Created by: LI Yinqiao (email: li.yin.qiao.2012@hotmail.com) 2018-04-30
*/
#include "../XTensor.h"
#include "../XDevice.h"
#include "../core/math/ScaleAndShift.h"
#include "../function/Loss.h"
namespace nts { // namespace nts(NiuTrans.Tensor)
namespace nts { // namespace nt(NiuTrans.Tensor)
/* case 1 */
/*
case 1: test LossCompute function
In this case, Loss function name = SQUAREDERROR.
loss = sum_{i} 0.5*(t_i - y_i)^2,
where t_i is the gold standard and y_i is the model output
*/
bool TestLoss1()
{
/* a tensor of size 10000 * 1 */
/* a tensor of size (10, 1) */
int order = 2;
int order_reduce = 1;
int * dimSize = new int[order];
dimSize[0] = 10000;
dimSize[0] = 10;
dimSize[1] = 1;
int unitNum = 1;
......@@ -42,68 +45,218 @@ bool TestLoss1()
/* CPU test */
bool cpuTest = true;
DTYPE answer = 5.0F;
/* create tensors */
XTensor * a = NewTensor(order, dimSize);
XTensor * b = NewTensor(order, dimSize);
XTensor * output = NewTensor(order, dimSize);
XTensor * gold = NewTensor(order, dimSize);
/* initialize variables */
DTYPE* a_data = (DTYPE*)a->data;
for (int i = 0; i < unitNum; i++)
*a_data++ = 1;
DTYPE* b_data = (DTYPE*)b->data;
for (int i = 0; i < unitNum; i++)
*b_data++ = 1;
DTYPE error = 0.0F;
error = LossCompute(a, b, SQUAREDERROR, false, 1, 0, dimSize[0], 0);
printf("%d", error);
/* call reduce max function */
//ReduceMax(a, reduce_a, 0);
//ReduceMax(b, reduce_b, 1);
//DTYPE* reduce_a_data = (DTYPE*)reduce_a->data;
//for (int i = 0; i < unitNum_a; i++)
// printf("%f ", *reduce_a_data++);
//printf("\n");
//DTYPE* reduce_b_data = (DTYPE*)reduce_b->data;
//for (int i = 0; i < unitNum_b; i++)
// printf("%f ", *reduce_b_data++);
output->SetZeroAll();
gold->SetZeroAll();
ScaleAndShift(output, 1, 1);
ScaleAndShift(gold, 1, 2);
DTYPE error;
error = LossCompute(gold, output, SQUAREDERROR, false, 0, 0, dimSize[0], 0);
/* check results */
cpuTest = true;
cpuTest = (error == answer);
#ifdef USE_CUDA
/* GPU test */
bool gpuTest = true;
/* create tensor */
XTensor * outputGPU = NewTensor(order, dimSize, X_FLOAT, 1.0F, 0);
XTensor * goldGPU = NewTensor(order, dimSize, X_FLOAT, 1.0F, 0);
/* Initialize variables */
outputGPU->SetZeroAll();
goldGPU->SetZeroAll();
ScaleAndShift(outputGPU, 1, 1);
ScaleAndShift(goldGPU, 1, 2);
/* call LossCompute function */
error = LossCompute(goldGPU, outputGPU, SQUAREDERROR, false, 0, 0, dimSize[0], 0);
/* check results */
gpuTest = (error == answer);
/* destroy variables */
delete output;
delete gold;
delete outputGPU;
delete goldGPU;
delete[] dimSize;
return cpuTest && gpuTest;
#else
/* destroy variables */
delete output;
delete gold;
delete[] dimSize;
return cpuTest;
#endif // USE_CUDA
}
/*
case 2: test LossCompute function
In this case, Loss function name = CROSSENTROPY.
loss = sum_{i} (-t_i * log(y_i))
where t_i is the gold standard and y_i is the model output
*/
bool TestLoss2()
{
/* a tensor of size (10, 1) */
int order = 2;
int * dimSize = new int[order];
dimSize[0] = 10;
dimSize[1] = 1;
int unitNum = 1;
for (int i = 0; i < order; i++)
unitNum *= dimSize[i];
/* CPU test */
bool cpuTest = true;
DTYPE answer = 0.0F;
/* create tensors */
XTensor * output = NewTensor(order, dimSize);
XTensor * gold = NewTensor(order, dimSize);
/* initialize variables */
output->SetZeroAll();
gold->SetZeroAll();
ScaleAndShift(output, 1, 1);
ScaleAndShift(gold, 1, 2);
DTYPE error;
error = LossCompute(gold, output, CROSSENTROPY, false, 0, 0, dimSize[0], 0);
/* check results */
cpuTest = (error == answer);
#ifdef USE_CUDA
/* GPU test */
bool gpuTest = true;
/* create tensor */
XTensor * aGPU = NewTensor(order, dimSize, X_FLOAT);
XTensor * bGPU = NewTensor(order, dimSize, X_FLOAT);
XTensor * outputGPU = NewTensor(order, dimSize, X_FLOAT, 1.0F, 0);
XTensor * goldGPU = NewTensor(order, dimSize, X_FLOAT, 1.0F, 0);
/* Initialize variables */
DTYPE* aGPU_data = (DTYPE*)aGPU->data;
for (int i = 0; i < unitNum; i++)
*aGPU_data++ = 1;
DTYPE* bGPU_data = (DTYPE*)bGPU->data;
for (int i = 0; i < unitNum; i++)
*bGPU_data++ = 1;
error = LossCompute(a, b, SQUAREDERROR, false, 1, 0, dimSize[0], 0);
printf("%d", error);
/* call reduce max function */
//ReduceMax(aGPU, reduce_aGPU, 0);
//ReduceMax(bGPU, reduce_bGPU, 1);
outputGPU->SetZeroAll();
goldGPU->SetZeroAll();
ScaleAndShift(outputGPU, 1, 1);
ScaleAndShift(goldGPU, 1, 2);
/* call LossCompute function */
error = LossCompute(goldGPU, outputGPU, CROSSENTROPY, false, 0, 0, dimSize[0], 0);
/* check results */
gpuTest = true;
gpuTest = (error == answer);
/* destroy variables */
delete aGPU, bGPU;
delete output;
delete gold;
delete outputGPU;
delete goldGPU;
delete[] dimSize;
return cpuTest && gpuTest;
#else
/* destroy variables */
delete a;
delete b;
delete output;
delete gold;
delete[] dimSize;
return cpuTest;
#endif // USE_CUDA
}
/*
case 3: test LossCompute function
In this case, Loss function name = ONEHOTERROR.
loss = sum_{i} e_i
where e_i = 0.5*(t_i - y_i)^2 if t_i = 1, e_i = 0 otherwise
*/
bool TestLoss3()
{
/* a tensor of size (10, 1) */
int order = 2;
int * dimSize = new int[order];
dimSize[0] = 5;
dimSize[1] = 1;
int unitNum = 1;
for (int i = 0; i < order; i++)
unitNum *= dimSize[i];
DTYPE outputData[5][1] = { {0.5F},
{0.5F},
{0.5F},
{0.5F},
{0.5F} };
DTYPE goldData[5][1] = { {1.0F},
{1.0F},
{0.0F},
{0.0F},
{0.0F} };
/* CPU test */
bool cpuTest = true;
DTYPE answer = 0.25F;
/* create tensors */
XTensor * output = NewTensor(order, dimSize);
XTensor * gold = NewTensor(order, dimSize);
/* initialize variables */
output->SetData(outputData, unitNum);
gold->SetData(goldData, unitNum);
DTYPE error;
error = LossCompute(gold, output, ONEHOTERROR, false, 0, 0, dimSize[0], 0);
/* check results */
cpuTest = (error == answer);
#ifdef USE_CUDA
/* GPU test */
bool gpuTest = true;
/* create tensor */
XTensor * outputGPU = NewTensor(order, dimSize, X_FLOAT, 1.0F, 0);
XTensor * goldGPU = NewTensor(order, dimSize, X_FLOAT, 1.0F, 0);
/* Initialize variables */
outputGPU->SetData(outputData, unitNum);
goldGPU->SetData(goldData, unitNum);
/* call LossCompute function */
error = LossCompute(goldGPU, outputGPU, ONEHOTERROR, false, 0, 0, dimSize[0], 0);
/* check results */
gpuTest = (error == answer);
/* destroy variables */
delete output;
delete gold;
delete outputGPU;
delete goldGPU;
delete[] dimSize;
return cpuTest && gpuTest;
#else
/* destroy variables */
delete output;
delete gold;
delete[] dimSize;
return cpuTest;
#endif // USE_CUDA
}
......@@ -113,11 +266,10 @@ bool TestLoss1()
TODO!!
*/
/* test for Sum Function */
extern "C"
bool TestLoss()
/* test for Loss Function */
bool TestLoss()
{
XPRINT(0, stdout, "[TEST Loss]\n");
XPRINT(0, stdout, "[TEST Loss] compute the loss \n");
bool returnFlag = true, caseFlag = true;
/* case 1 test */
......@@ -129,6 +281,23 @@ extern "C"
else
XPRINT(0, stdout, ">> case 1 passed!\n");
/* case 2 test */
caseFlag = TestLoss2();
if (!caseFlag) {
returnFlag = false;
XPRINT(0, stdout, ">> case 2 failed!\n");
}
else
XPRINT(0, stdout, ">> case 2 passed!\n");
caseFlag = TestLoss3();
if (!caseFlag) {
returnFlag = false;
XPRINT(0, stdout, ">> case 3 failed!\n");
}
else
XPRINT(0, stdout, ">> case 3 passed!\n");
///* other cases test */
///*
//TODO!!
......@@ -145,4 +314,4 @@ extern "C"
return returnFlag;
}
} // namespace nt(NiuTrans.Tensor)
} // namespace nts(NiuTrans.Tensor)
......@@ -26,9 +26,9 @@
namespace nts { // namespace nts(NiuTrans.Tensor)
/* test for Sum Function */
/* test for Loss Function */
extern "C"
bool TestLoss();
} // namespace nts(NiuTrans.Tensor)
#endif // __TEST_SUM_H__
#endif // __TEST_LOSS_H__
......@@ -19,14 +19,13 @@
* $Created by: Xu Chen (email: hello_master1954@163.com) 2018-06-15
*/
#include "../XTensor.h"
#include "TMatrixMULBatchedCPU.h"
namespace nts { // namespace nts(NiuTrans.Tensor)
/* case 1: matrix multiplication in batch mode (CPU code).
* In this case, aList=2*(2, 3), bList=2*(2, 3) -> c=2*(2, 2),
transposedA=X_NOTRANS, transposedB=X_NOTRANS.
/*
case 1: matrix multiplication in batch mode (CPU code).
In this case, aList=2*(2, 3), bList=2*(3, 2) -> c=2*(2, 2), transposedA=X_NOTRANS, transposedB=X_NOTRANS.
*/
bool TestMatrixMulBatchedCPU1()
{
......@@ -65,20 +64,20 @@ bool TestMatrixMulBatchedCPU1()
for (int i = 0; i < cOrder; i++)
cUnitNum *= cDimSize[i];
DTYPE aData1[2][3] = { {1.0, 2.0, 3.0},
{-4.0, 5.0, 6.0} };
DTYPE aData2[2][3] = { {1.0, -2.0, -3.0},
{-4.0, 3.0, 2.0} };
DTYPE bData1[3][2] = { {0.0, -1.0},
{1.0, 2.0},
{2.0, 1.0} };
DTYPE bData2[3][2] = { {0.0, 1.0},
{3.0, 2.0},
{2.0, 1.0} };
DTYPE answer1[2][2] = { {8.0, 6.0},
{17.0, 20.0} };
DTYPE answer2[2][2] = { {-12.0, -6.0},
{13.0, 4.0} };
DTYPE aData1[2][3] = { {1.0F, 2.0F, 3.0F},
{-4.0F, 5.0F, 6.0F} };
DTYPE aData2[2][3] = { {1.0F, -2.0F, -3.0F},
{-4.0F, 3.0F, 2.0F} };
DTYPE bData1[3][2] = { {0.0F, -1.0F},
{1.0F, 2.0F},
{2.0F, 1.0F} };
DTYPE bData2[3][2] = { {0.0F, 1.0F},
{3.0F, 2.0F},
{2.0F, 1.0F} };
DTYPE answer1[2][2] = { {8.0F, 6.0F},
{17.0F, 20.0F} };
DTYPE answer2[2][2] = { {-12.0F, -6.0F},
{13.0F, 4.0F} };
/* CPU test */
bool cpuTest = true;
......@@ -111,18 +110,12 @@ bool TestMatrixMulBatchedCPU1()
MatrixMULBatchedCPU(aList, X_NOTRANS, bList, X_NOTRANS, cList);
/* check results */
cpuTest = c1->CheckData(answer1, cUnitNum) && cpuTest;
cpuTest = c2->CheckData(answer2, cUnitNum) && cpuTest;
cpuTest = c1->CheckData(answer1, cUnitNum) && c2->CheckData(answer2, cUnitNum);
#ifdef USE_CUDA
/* GPU test */
bool gpuTest = true;
/* clear list */
aList->Clear();
bList->Clear();
cList->Clear();
/* create tensors */
XTensor * aGPU1 = NewTensor(aOrder, aDimSize, X_FLOAT, 1.0F, 0);
XTensor * aGPU2 = NewTensor(aOrder, aDimSize, X_FLOAT, 1.0F, 0);
......@@ -138,32 +131,56 @@ bool TestMatrixMulBatchedCPU1()
bGPU2->SetData(bData2, aUnitNum);
cGPU1->SetZeroAll();
cGPU2->SetZeroAll();
/* clear list */
aList->Clear();
bList->Clear();
cList->Clear();
/* add tensors to list */
aList->Add(a1);
aList->Add(a2);
bList->Add(b1);
bList->Add(b2);
cList->Add(c1);
cList->Add(c2);
aList->Add(aGPU1);
aList->Add(aGPU2);
bList->Add(bGPU1);
bList->Add(bGPU2);
cList->Add(cGPU1);
cList->Add(cGPU2);
/* call MatrixMULBatchedCPU function */
MatrixMULBatchedCPU(aList, X_NOTRANS, bList, X_NOTRANS, cList);
/* check results */
gpuTest = c1->CheckData(answer1, cUnitNum) && gpuTest;
gpuTest = c2->CheckData(answer2, cUnitNum) && gpuTest;
gpuTest = cGPU1->CheckData(answer1, cUnitNum) && gpuTest;
gpuTest = cGPU2->CheckData(answer2, cUnitNum) && gpuTest;
/* destroy variables */
delete a1, a2, b1, b2, c1, c2;
delete aGPU1, aGPU2, bGPU1, bGPU2, cGPU1, cGPU2;
delete[] aDimSize, bDimSize, cDimSize;
delete a1;
delete a2;
delete b1;
delete b2;
delete c1;
delete c2;
delete aGPU1;
delete aGPU2;
delete bGPU1;
delete bGPU2;
delete cGPU1;
delete cGPU2;
delete[] aDimSize;
delete[] bDimSize;
delete[] cDimSize;
return cpuTest && gpuTest;
#else
/* destroy variables */
delete a1, a2, b1, b2, c1, c2;
delete[] aDimSize, bDimSize, cDimSize;
delete a1;
delete a2;
delete b1;
delete b2;
delete c1;
delete c2;
delete[] aDimSize;
delete[] bDimSize;
delete[] cDimSize;
return cpuTest;
#endif // USE_CUDA
......@@ -178,7 +195,7 @@ bool TestMatrixMulBatchedCPU1()
extern "C"
bool TestMatrixMulBatchedCPU()
{
XPRINT(0, stdout, "[TEST MATRIXMULBATCHEDCPU] -------------\n");
XPRINT(0, stdout, "[TEST MATRIXMULBATCHEDCPU] matrix multiplication in batch mode (CPU code) \n");
bool returnFlag = true, caseFlag = true;
/* case 1 test */
......@@ -191,15 +208,6 @@ bool TestMatrixMulBatchedCPU()
else
XPRINT(0, stdout, ">> case 1 passed!\n");
///* case 2 test */
//caseFlag = TestMatrixMulBatchedCPU2();
//if (!caseFlag) {
// returnFlag = false;
// XPRINT(0, stdout, ">> case 2 failed!\n");
//}
//else
// XPRINT(0, stdout, ">> case 2 passed!\n");
/* other cases test */
/*
TODO!!
......
......@@ -22,7 +22,7 @@
#ifndef __TEST_MATRIXMULBATCHEDCPU_H__
#define __TEST_MATRIXMULBATCHEDCPU_H__
#include "../core/MatrixMULBatchedCPU.h"
#include "../core/arithmetic/MatrixMULBatchedCPU.h"
namespace nts { // namespace nts(NiuTrans.Tensor)
......
......@@ -19,14 +19,14 @@
* $Created by: Xu Chen (email: hello_master1954@163.com) 2018-06-14
*/
#include "../XTensor.h"
#include "TMatrixMul.h"
namespace nts { // namespace nts(NiuTrans.Tensor)
/* case 1: matrix multiplication.
* In this case, a=(2, 3), b=(3, 2) -> c=(2, 2), transposedA=X_NOTRANS,
transposedB=X_NOTRANS.
/*
case 1: matrix multiplication.
In this case, a=(2, 3), b=(3, 2) -> c=(2, 2),
transposedA=X_NOTRANS, transposedB=X_NOTRANS.
*/
bool TestMatrixMul1()
{
......@@ -60,13 +60,13 @@ bool TestMatrixMul1()
for (int i = 0; i < tOrder; i++)
tUnitNum *= tDimSize[i];
DTYPE sData1[2][3] = { {1.0, 2.0, 3.0},
{-4.0, 5.0, 6.0} };
DTYPE sData2[3][2] = { {0.0, -1.0},
{1.0, 2.0},
{2.0, 1.0} };
DTYPE answer[2][2] = { {8.0, 6.0},
{17.0, 20.0} };
DTYPE sData1[2][3] = { {1.0F, 2.0F, 3.0F},
{-4.0F, 5.0F, 6.0F} };
DTYPE sData2[3][2] = { {0.0F, -1.0F},
{1.0F, 2.0F},
{2.0F, 1.0F} };
DTYPE answer[2][2] = { {8.0F, 6.0F},
{17.0F, 20.0F} };
/* CPU test */
bool cpuTest = true;
......@@ -108,22 +108,34 @@ bool TestMatrixMul1()
gpuTest = tGPU->CheckData(answer, tUnitNum);
/* destroy variables */
delete s1, s2, t, sGPU1, sGPU2, tGPU;
delete[] sDimSize1, sDimSize2, tDimSize;
delete s1;
delete s2;
delete t;
delete sGPU1;
delete sGPU2;
delete tGPU;
delete[] sDimSize1;
delete[] sDimSize2;
delete[] tDimSize;
return cpuTest && gpuTest;
#else
/* destroy variables */
delete s1, s2, t;
delete[] sDimSize1, sDimSize2, tDimSize;
delete s1;
delete s2;
delete t;
delete[] sDimSize1;
delete[] sDimSize2;
delete[] tDimSize;
return cpuTest;
#endif // USE_CUDA
}
/* case 2: matrix multiplication.
* In this case, a=(3, 2), b=(3, 2) -> c=(2, 2), transposedA=X_TRANS,
transposedB=X_NOTRANS.
/*
case 2: matrix multiplication.
In this case, a=(3, 2), b=(3, 2) -> c=(2, 2),
transposedA=X_TRANS, transposedB=X_NOTRANS.
*/
bool TestMatrixMul2()
{
......@@ -137,7 +149,7 @@ bool TestMatrixMul2()
for (int i = 0; i < sOrder1; i++)
sUnitNum1 *= sDimSize1[i];
/* a source tensor of size (2, 3) */
/* a source tensor of size (3, 2) */
int sOrder2 = 2;
int * sDimSize2 = new int[sOrder2];
sDimSize2[0] = 3;
......@@ -157,14 +169,14 @@ bool TestMatrixMul2()
for (int i = 0; i < tOrder; i++)
tUnitNum *= tDimSize[i];
DTYPE sData1[3][2] = { {1.0, -4.0},
{2.0, 5.0},
{3.0, 6.0} };
DTYPE sData2[3][2] = { {0.0, -1.0},
{1.0, 2.0},
{2.0, 1.0} };
DTYPE answer[2][2] = { {8.0, 6.0},
{17.0, 20.0} };
DTYPE sData1[3][2] = { {1.0F, -4.0F},
{2.0F, 5.0F},
{3.0F, 6.0F} };
DTYPE sData2[3][2] = { {0.0F, -1.0F},
{1.0F, 2.0F},
{2.0F, 1.0F} };
DTYPE answer[2][2] = { {8.0F, 6.0F},
{17.0F, 20.0F} };
/* CPU test */
bool cpuTest = true;
......@@ -206,22 +218,34 @@ bool TestMatrixMul2()
gpuTest = tGPU->CheckData(answer, tUnitNum);
/* destroy variables */
delete s1, s2, t, sGPU1, sGPU2, tGPU;
delete[] sDimSize1, sDimSize2, tDimSize;
delete s1;
delete s2;
delete t;
delete sGPU1;
delete sGPU2;
delete tGPU;
delete[] sDimSize1;
delete[] sDimSize2;
delete[] tDimSize;
return cpuTest && gpuTest;
#else
/* destroy variables */
delete s1, s2, t;
delete[] sDimSize1, sDimSize2, tDimSize;
delete s1;
delete s2;
delete t;
delete[] sDimSize1;
delete[] sDimSize2;
delete[] tDimSize;
return cpuTest;
#endif // USE_CUDA
}
/* case 3: matrix multiplication.
* In this case, a=(3, 2, 3), b=(2, 3, 2) -> c=(3, 2, 2, 2), transposedA=X_NOTRANS,
transposedB=X_NOTRANS.
/*
case 3: matrix multiplication.
In this case, a=(3, 2, 3), b=(2, 3, 2) -> c=(3, 2, 2, 2),
transposedA=X_NOTRANS, transposedB=X_NOTRANS.
*/
bool TestMatrixMul3()
{
......@@ -259,20 +283,30 @@ bool TestMatrixMul3()
for (int i = 0; i < tOrder; i++)
tUnitNum *= tDimSize[i];
DTYPE sData1[3][2][3] = { { {0.0, -1.0, 2.0},
{2.0, 1.0, 3.0} },
{ {1.0, 2.0, 4.0},
{3.0, 1.0, 2.0}},
{ {-1.0, 3.0, 2.0},
{1.0, -1.0, 0.0} } };
DTYPE sData2[2][3][2] = { { {1.0, 2.0},
{-4.0, 3.0},
{2.0, 6.0} },
{ {1.0, 2.0},
{-4.0, 3.0},
{2.0, 6.0} } };
DTYPE answer[2][2] = { {8.0, 6.0},
{17.0, 20.0} };
DTYPE sData1[3][2][3] = { { {0.0F, -1.0F, 2.0F},
{2.0F, 1.0F, 3.0F} },
{ {1.0F, 2.0F, 4.0F},
{3.0F, 1.0F, 2.0F}},
{ {-1.0F, 3.0F, 2.0F},
{1.0F, -1.0F, 0.0F} } };
DTYPE sData2[2][3][2] = { { {1.0F, 2.0F},
{-4.0F, 3.0F},
{2.0F, 6.0F} },
{ {1.0F, 2.0F},
{3.0F, 4.0F},
{5.0F, 6.0F} } };
DTYPE answer[3][2][2][2] = { { { {8.0F, 9.0F},
{4.0F, 25.0F} },
{ {7.0F, 8.0F},
{20.0F, 26.0F} } },
{ { {1.0F, 32.0F},
{3.0F, 21.0F} },
{ {27.0F, 34.0F},
{16.0F, 22.0F} } },
{ { {-9.0F, 19.0F},
{5.0F, -1.0F} },
{ {18.0F, 22.0F},
{-2.0F, -2.0F} } } };
/* CPU test */
bool cpuTest = true;
......@@ -290,17 +324,124 @@ bool TestMatrixMul3()
/* call MatrixMul function */
MatrixMul(s1, X_NOTRANS, s2, X_NOTRANS, t);
XPRINT(0, stdout, "\ntarget data\n[");
DTYPE* check_data = (DTYPE*)t->data;
for (int i = 0; i < tUnitNum; i++)
printf("%f ", *check_data++);
printf("]\n");
/* check results */
cpuTest = t->CheckData(answer, tUnitNum);
#ifdef USE_CUDA
/* GPU test */
bool gpuTest = true;
int * size = new int(tOrder);
size = t->dimSize;
for (int i = 0; i < tOrder; i++) {
printf("size %d: %d\n", i, *size++);
}
/* create tensor */
XTensor * sGPU1 = NewTensor(sOrder1, sDimSize1, X_FLOAT, 1.0F, 0);
XTensor * sGPU2 = NewTensor(sOrder2, sDimSize2, X_FLOAT, 1.0F, 0);
XTensor * tGPU = NewTensor(tOrder, tDimSize, X_FLOAT, 1.0F, 0);
/* Initialize variables */
sGPU1->SetData(sData1, sUnitNum1);
sGPU2->SetData(sData2, sUnitNum2);
tGPU->SetZeroAll();
/* call MatrixMul function */
MatrixMul(sGPU1, X_NOTRANS, sGPU2, X_NOTRANS, tGPU);
/* check results */
gpuTest = tGPU->CheckData(answer, tUnitNum);
/* destroy variables */
delete s1;
delete s2;
delete t;
delete sGPU1;
delete sGPU2;
delete tGPU;
delete[] sDimSize1;
delete[] sDimSize2;
delete[] tDimSize;
return cpuTest && gpuTest;
#else
/* destroy variables */
delete s1;
delete s2;
delete t;
delete[] sDimSize1;
delete[] sDimSize2;
delete[] tDimSize;
return cpuTest;
#endif // USE_CUDA
}
/*
case 4: matrix multiplication.
In this case, a=(3, 2, 3), b=(3, 2) -> c=(3, 2, 2),
transposedA=X_NOTRANS, transposedB=X_NOTRANS.
*/
bool TestMatrixMul4()
{
/* a source tensor of size (3, 2, 3) */
int sOrder1 = 3;
int * sDimSize1 = new int[sOrder1];
sDimSize1[0] = 3;
sDimSize1[1] = 2;
sDimSize1[2] = 3;
int sUnitNum1 = 1;
for (int i = 0; i < sOrder1; i++)
sUnitNum1 *= sDimSize1[i];
/* a source tensor of size (3, 2) */
int sOrder2 = 2;
int * sDimSize2 = new int[sOrder2];
sDimSize2[0] = 3;
sDimSize2[1] = 2;
int sUnitNum2 = 1;
for (int i = 0; i < sOrder2; i++)
sUnitNum2 *= sDimSize2[i];
/* a target tensor of size (3, 2, 2) */
int tOrder = 3;
int * tDimSize = new int[tOrder];
tDimSize[0] = 3;
tDimSize[1] = 2;
tDimSize[2] = 2;
int tUnitNum = 1;
for (int i = 0; i < tOrder; i++)
tUnitNum *= tDimSize[i];
DTYPE sData1[3][2][3] = { { {0.0F, -1.0F, 2.0F},
{2.0F, 1.0F, 3.0F} },
{ {1.0F, 2.0F, 4.0F},
{3.0F, 1.0F, 2.0F}},
{ {-1.0F, 3.0F, 2.0F},
{1.0F, -1.0F, 0.0F} } };
DTYPE sData2[3][2] = { {1.0F, 2.0F},
{3.0F, 4.0F},
{5.0F, 6.0F} };
DTYPE answer[3][2][2] = { { {7.0F, 8.0F},
{20.0F, 26.0F} },
{ {27.0F, 34.0F},
{16.0F, 22.0F} },
{ {18.0F, 22.0F},
{-2.0F, -2.0F} } };
/* CPU test */
bool cpuTest = true;
/* create tensors */
XTensor * s1 = NewTensor(sOrder1, sDimSize1);
XTensor * s2 = NewTensor(sOrder2, sDimSize2);
XTensor * t = NewTensor(tOrder, tDimSize);
/* initialize variables */
s1->SetData(sData1, sUnitNum1);
s2->SetData(sData2, sUnitNum2);
t->SetZeroAll();
/* call MatrixMul function */
MatrixMul(s1, X_NOTRANS, s2, X_NOTRANS, t);
/* check results */
cpuTest = t->CheckData(answer, tUnitNum);
......@@ -326,14 +467,25 @@ bool TestMatrixMul3()
gpuTest = tGPU->CheckData(answer, tUnitNum);
/* destroy variables */
delete s1, s2, t, sGPU1, sGPU2, tGPU;
delete[] sDimSize1, sDimSize2, tDimSize;
delete s1;
delete s2;
delete t;
delete sGPU1;
delete sGPU2;
delete tGPU;
delete[] sDimSize1;
delete[] sDimSize2;
delete[] tDimSize;
return cpuTest && gpuTest;
#else
/* destroy variables */
delete s1, s2, t;
delete[] sDimSize1, sDimSize2, tDimSize;
delete s1;
delete s2;
delete t;
delete[] sDimSize1;
delete[] sDimSize2;
delete[] tDimSize;
return cpuTest;
#endif // USE_CUDA
......@@ -346,10 +498,9 @@ bool TestMatrixMul3()
*/
/* test for MatrixMul Function */
extern "C"
bool TestMatrixMul()
{
XPRINT(0, stdout, "[TEST MATRIXMUL] -------------\n");
XPRINT(0, stdout, "[TEST MATRIXMUL] matrix multiplication \n");
bool returnFlag = true, caseFlag = true;
/* case 1 test */
......@@ -371,14 +522,23 @@ bool TestMatrixMul()
else
XPRINT(0, stdout, ">> case 2 passed!\n");
///* case 3 test */
//caseFlag = TestMatrixMul3();
//if (!caseFlag) {
// returnFlag = false;
// XPRINT(0, stdout, ">> case 3 failed!\n");
//}
//else
// XPRINT(0, stdout, ">> case 3 passed!\n");
/* case 3 test */
caseFlag = TestMatrixMul3();
if (!caseFlag) {
returnFlag = false;
XPRINT(0, stdout, ">> case 3 failed!\n");
}
else
XPRINT(0, stdout, ">> case 3 passed!\n");
/* case 4 test */
caseFlag = TestMatrixMul4();
if (!caseFlag) {
returnFlag = false;
XPRINT(0, stdout, ">> case 4 failed!\n");
}
else
XPRINT(0, stdout, ">> case 4 passed!\n");
/* other cases test */
/*
......
......@@ -22,7 +22,7 @@
#ifndef __TEST_MATRIXMUL_H__
#define __TEST_MATRIXMUL_H__
#include "../core/MatrixMul.h"
#include "../core/arithmetic/MatrixMul.h"
namespace nts { // namespace nts(NiuTrans.Tensor)
......
......@@ -19,14 +19,14 @@
* $Created by: Xu Chen (email: hello_master1954@163.com) 2018-06-15
*/
#include "../XTensor.h"
#include "TMatrixMul2D.h"
namespace nts { // namespace nts(NiuTrans.Tensor)
/* case 1: matrix multiplication (for 2d tensors).
* In this case, a=(2, 3), b=(3, 2) -> c=(2, 2), transposedA=X_NOTRANS,
transposedB=X_NOTRANS.
/*
case 1: matrix multiplication (for 2d tensors).
In this case, a=(2, 3), b=(3, 2) -> c=(2, 2),
transposedA=X_NOTRANS, transposedB=X_NOTRANS.
*/
bool TestMatrixMul2D1()
{
......@@ -60,13 +60,13 @@ bool TestMatrixMul2D1()
for (int i = 0; i < tOrder; i++)
tUnitNum *= tDimSize[i];
DTYPE sData1[2][3] = { {1.0, 2.0, 3.0},
{-4.0, 5.0, 6.0} };
DTYPE sData2[3][2] = { {0.0, -1.0},
{1.0, 2.0},
{2.0, 1.0} };
DTYPE answer[2][2] = { {8.0, 6.0},
{17.0, 20.0} };
DTYPE sData1[2][3] = { {1.0F, 2.0F, 3.0F},
{-4.0F, 5.0F, 6.0F} };
DTYPE sData2[3][2] = { {0.0F, -1.0F},
{1.0F, 2.0F},
{2.0F, 1.0F} };
DTYPE answer[2][2] = { {8.0F, 6.0F},
{17.0F, 20.0F} };
/* CPU test */
bool cpuTest = true;
......@@ -108,22 +108,34 @@ bool TestMatrixMul2D1()
gpuTest = tGPU->CheckData(answer, tUnitNum);
/* destroy variables */
delete s1, s2, t, sGPU1, sGPU2, tGPU;
delete[] sDimSize1, sDimSize2, tDimSize;
delete s1;
delete s2;
delete t;
delete sGPU1;
delete sGPU2;
delete tGPU;
delete[] sDimSize1;
delete[] sDimSize2;
delete[] tDimSize;
return cpuTest && gpuTest;
#else
/* destroy variables */
delete s1, s2, t;
delete[] sDimSize1, sDimSize2, tDimSize;
delete s1;
delete s2;
delete t;
delete[] sDimSize1;
delete[] sDimSize2;
delete[] tDimSize;
return cpuTest;
#endif // USE_CUDA
}
/* case 2: matrix multiplication (for 2d tensors).
* In this case, a=(3, 2), b=(2, 3) -> c=(2, 2), transposedA=X_TRANS,
transposedB=X_NOTRANS.
/*
case 2: matrix multiplication (for 2d tensors).
In this case, a=(3, 2), b=(3, 2) -> c=(2, 2),
transposedA=X_TRANS, transposedB=X_NOTRANS.
*/
bool TestMatrixMul2D2()
{
......@@ -157,14 +169,14 @@ bool TestMatrixMul2D2()
for (int i = 0; i < tOrder; i++)
tUnitNum *= tDimSize[i];
DTYPE sData1[3][2] = { {1.0, -4.0},
{2.0, 5.0},
{3.0, 6.0} };
DTYPE sData2[3][2] = { {0.0, -1.0},
{1.0, 2.0},
{2.0, 1.0} };
DTYPE answer[2][2] = { {8.0, 6.0},
{17.0, 20.0} };
DTYPE sData1[3][2] = { {1.0F, -4.0F},
{2.0F, 5.0F},
{3.0F, 6.0F} };
DTYPE sData2[3][2] = { {0.0F, -1.0F},
{1.0F, 2.0F},
{2.0F, 1.0F} };
DTYPE answer[2][2] = { {8.0F, 6.0F},
{17.0F, 20.0F} };
/* CPU test */
bool cpuTest = true;
......@@ -206,14 +218,25 @@ bool TestMatrixMul2D2()
gpuTest = tGPU->CheckData(answer, tUnitNum);
/* destroy variables */
delete s1, s2, t, sGPU1, sGPU2, tGPU;
delete[] sDimSize1, sDimSize2, tDimSize;
delete s1;
delete s2;
delete t;
delete sGPU1;
delete sGPU2;
delete tGPU;
delete[] sDimSize1;
delete[] sDimSize2;
delete[] tDimSize;
return cpuTest && gpuTest;
#else
/* destroy variables */
delete s1, s2, t;
delete[] sDimSize1, sDimSize2, tDimSize;
delete s1;
delete s2;
delete t;
delete[] sDimSize1;
delete[] sDimSize2;
delete[] tDimSize;
return cpuTest;
#endif // USE_CUDA
......@@ -228,7 +251,7 @@ bool TestMatrixMul2D2()
extern "C"
bool TestMatrixMul2D()
{
XPRINT(0, stdout, "[TEST MATRIXMUL2D] -------------\n");
XPRINT(0, stdout, "[TEST MATRIXMUL2D] matrix multiplication (for 2d tensors) \n");
bool returnFlag = true, caseFlag = true;
/* case 1 test */
......
......@@ -22,7 +22,7 @@
#ifndef __TEST_MATRIXMUL2D_H__
#define __TEST_MATRIXMUL2D_H__
#include "../core/MatrixMul2D.h"
#include "../core/arithmetic/MatrixMul2D.h"
namespace nts { // namespace nts(NiuTrans.Tensor)
......
/* NiuTrans.Tensor - an open-source tensor library
* Copyright (C) 2017, Natural Language Processing Lab, Northestern University.
* All rights reserved.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
/*
* $Created by: Xu Chen (email: hello_master1954@163.com) 2018-07-06
*/
#include "TMatrixMul2DParallel.h"
namespace nts { // namespace nts(NiuTrans.Tensor)
/*
case 1: matrix multiplication (for 2d tensors) with multi-threading.
In this case, a=(2, 3), b=(3, 2) -> c=(2, 2),
transposedA=X_NOTRANS, transposedB=X_NOTRANS.
*/
bool TestMatrixMul2DParallel1()
{
/* a source tensor of size (2, 3) */
int sOrder1 = 2;
int * sDimSize1 = new int[sOrder1];
sDimSize1[0] = 2;
sDimSize1[1] = 3;
int sUnitNum1 = 1;
for (int i = 0; i < sOrder1; i++)
sUnitNum1 *= sDimSize1[i];
/* a source tensor of size (3, 2) */
int sOrder2 = 2;
int * sDimSize2 = new int[sOrder2];
sDimSize2[0] = 3;
sDimSize2[1] = 2;
int sUnitNum2 = 1;
for (int i = 0; i < sOrder2; i++)
sUnitNum2 *= sDimSize2[i];
/* a target tensor of size (2, 2) */
int tOrder = 2;
int * tDimSize = new int[tOrder];
tDimSize[0] = 2;
tDimSize[1] = 2;
int tUnitNum = 1;
for (int i = 0; i < tOrder; i++)
tUnitNum *= tDimSize[i];
DTYPE sData1[2][3] = { {1.0F, 2.0F, 3.0F},
{-4.0F, 5.0F, 6.0F} };
DTYPE sData2[3][2] = { {0.0F, -1.0F},
{1.0F, 2.0F},
{2.0F, 1.0F} };
DTYPE answer[2][2] = { {8.0F, 6.0F},
{17.0F, 20.0F} };
/* CPU test */
bool cpuTest = true;
/* create tensors */
XTensor * s1 = NewTensor(sOrder1, sDimSize1);
XTensor * s2 = NewTensor(sOrder2, sDimSize2);
XTensor * t = NewTensor(tOrder, tDimSize);
/* initialize variables */
s1->SetData(sData1, sUnitNum1);
s2->SetData(sData2, sUnitNum2);
t->SetZeroAll();
/* call MatrixMul2DParallel function */
MatrixMul2DParallel(s1, X_NOTRANS, s2, X_NOTRANS, t);
/* check results */
cpuTest = t->CheckData(answer, tUnitNum);
/* destroy variables */
delete s1;
delete s2;
delete t;
delete[] sDimSize1;
delete[] sDimSize2;
delete[] tDimSize;
return cpuTest;
}
/*
case 2: matrix multiplication (for 2d tensors) with multi-threading.
In this case, a=(3, 2), b=(3, 2) -> c=(2, 2),
transposedA=X_TRANS, transposedB=X_NOTRANS.
*/
bool TestMatrixMul2DParallel2()
{
/* a source tensor of size (3, 2) */
int sOrder1 = 2;
int * sDimSize1 = new int[sOrder1];
sDimSize1[0] = 3;
sDimSize1[1] = 2;
int sUnitNum1 = 1;
for (int i = 0; i < sOrder1; i++)
sUnitNum1 *= sDimSize1[i];
/* a source tensor of size (3, 2) */
int sOrder2 = 2;
int * sDimSize2 = new int[sOrder2];
sDimSize2[0] = 3;
sDimSize2[1] = 2;
int sUnitNum2 = 1;
for (int i = 0; i < sOrder2; i++)
sUnitNum2 *= sDimSize2[i];
/* a target tensor of size (2, 2) */
int tOrder = 2;
int * tDimSize = new int[tOrder];
tDimSize[0] = 2;
tDimSize[1] = 2;
int tUnitNum = 1;
for (int i = 0; i < tOrder; i++)
tUnitNum *= tDimSize[i];
DTYPE sData1[3][2] = { {1.0F, -4.0F},
{2.0F, 5.0F},
{3.0F, 6.0F} };
DTYPE sData2[3][2] = { {0.0F, -1.0F},
{1.0F, 2.0F},
{2.0F, 1.0F} };
DTYPE answer[2][2] = { {8.0F, 6.0F},
{17.0F, 20.0F} };
/* CPU test */
bool cpuTest = true;
/* create tensors */
XTensor * s1 = NewTensor(sOrder1, sDimSize1);
XTensor * s2 = NewTensor(sOrder2, sDimSize2);
XTensor * t = NewTensor(tOrder, tDimSize);
/* initialize variables */
s1->SetData(sData1, sUnitNum1);
s2->SetData(sData2, sUnitNum2);
t->SetZeroAll();
/* call MatrixMul2DParallel function */
MatrixMul2DParallel(s1, X_TRANS, s2, X_NOTRANS, t);
/* check results */
cpuTest = t->CheckData(answer, tUnitNum);
/* destroy variables */
delete s1;
delete s2;
delete t;
delete[] sDimSize1;
delete[] sDimSize2;
delete[] tDimSize;
return cpuTest;
}
/* other cases */
/*
TODO!!
*/
/* test for MatrixMul2DParallel Function */
bool TestMatrixMul2DParallel()
{
XPRINT(0, stdout, "[TEST MatrixMul2DParallel] matrix multiplication (for 2d tensors) with multi-threading \n");
bool returnFlag = true, caseFlag = true;
/* case 1 test */
caseFlag = TestMatrixMul2DParallel1();
if (!caseFlag) {
returnFlag = false;
XPRINT(0, stdout, ">> case 1 failed!\n");
}
else
XPRINT(0, stdout, ">> case 1 passed!\n");
/* case 2 test */
caseFlag = TestMatrixMul2DParallel2();
if (!caseFlag) {
returnFlag = false;
XPRINT(0, stdout, ">> case 2 failed!\n");
}
else
XPRINT(0, stdout, ">> case 2 passed!\n");
/* other cases test */
/*
TODO!!
*/
if (returnFlag) {
XPRINT(0, stdout, ">> All Passed!\n");
}
else
XPRINT(0, stdout, ">> Failed!\n");
XPRINT(0, stdout, "\n");
return returnFlag;
}
} // namespace nts(NiuTrans.Tensor)
/* NiuTrans.Tensor - an open-source tensor library
* Copyright (C) 2017, Natural Language Processing Lab, Northestern University.
* All rights reserved.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
/*
* $Created by: Xu Chen (email: hello_master1954@163.com) 2018-07-06
*/
#ifndef __TEST_MATRIXMUL2DPARALLEL_H__
#define __TEST_MATRIXMUL2DPARALLEL_H__
#include "../core/arithmetic/MatrixMul2DParallel.h"
namespace nts { // namespace nts(NiuTrans.Tensor)
/* test for MatrixMul2DParallel Function */
extern "C"
bool TestMatrixMul2DParallel();
} // namespace nts(NiuTrans.Tensor)
#endif // __TEST_MATRIXMUL2DPARALLEL_H__
/* NiuTrans.Tensor - an open-source tensor library
* Copyright (C) 2017, Natural Language Processing Lab, Northestern University.
* All rights reserved.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
/*
* $Created by: Xu Chen (email: hello_master1954@163.com) 2018-06-15
*/
#include "TMatrixMULBatched.h"
namespace nts { // namespace nts(NiuTrans.Tensor)
/*
case 1: matrix multiplication of the two tensors.
In this case, a=(2, 3), b=(2, 3) -> c=(2, 2), transposedA=X_NOTRANS, transposedB=X_NOTRANS.
*/
bool TestMatrixMulBatched1()
{
/* a source tensor of size (2, 3) */
int sOrder1 = 2;
int * sDimSize1 = new int[sOrder1];
sDimSize1[0] = 2;
sDimSize1[1] = 3;
int sUnitNum1 = 1;
for (int i = 0; i < sOrder1; i++)
sUnitNum1 *= sDimSize1[i];
/* a source tensor of size (3, 2) */
int sOrder2 = 2;
int * sDimSize2 = new int[sOrder2];
sDimSize2[0] = 3;
sDimSize2[1] = 2;
int sUnitNum2 = 1;
for (int i = 0; i < sOrder2; i++)
sUnitNum2 *= sDimSize2[i];
/* a target tensor of size (2, 2) */
int tOrder = 2;
int * tDimSize = new int[tOrder];
tDimSize[0] = 2;
tDimSize[1] = 2;
int tUnitNum = 1;
for (int i = 0; i < tOrder; i++)
tUnitNum *= tDimSize[i];
DTYPE sData1[2][3] = { {1.0F, 2.0F, 3.0F},
{-4.0F, 5.0F, 6.0F} };
DTYPE sData2[3][2] = { {0.0F, -1.0F},
{1.0F, 2.0F},
{2.0F, 1.0F} };
DTYPE answer[2][2] = { {8.0F, 6.0F},
{17.0F, 20.0F} };
/* CPU test */
bool cpuTest = true;
/* create tensors */
XTensor * s1 = NewTensor(sOrder1, sDimSize1);
XTensor * s2 = NewTensor(sOrder2, sDimSize2);
XTensor * t = NewTensor(tOrder, tDimSize);
/* initialize variables */
s1->SetData(sData1, sUnitNum1);
s2->SetData(sData2, sUnitNum2);
t->SetZeroAll();
/* call MatrixMulBatched function */
MatrixMulBatched(s1, X_NOTRANS, s2, X_NOTRANS, t);
/* check results */
cpuTest = t->CheckData(answer, tUnitNum);
#ifdef USE_CUDA
/* GPU test */
bool gpuTest = true;
/* create tensor */
XTensor * sGPU1 = NewTensor(sOrder1, sDimSize1, X_FLOAT, 1.0F, 0);
XTensor * sGPU2 = NewTensor(sOrder2, sDimSize2, X_FLOAT, 1.0F, 0);
XTensor * tGPU = NewTensor(tOrder, tDimSize, X_FLOAT, 1.0F, 0);
/* Initialize variables */
sGPU1->SetData(sData1, sUnitNum1);
sGPU2->SetData(sData2, sUnitNum2);
tGPU->SetZeroAll();
/* call MatrixMulBatched function */
MatrixMulBatched(sGPU1, X_NOTRANS, sGPU2, X_NOTRANS, tGPU);
/* check results */
gpuTest = tGPU->CheckData(answer, tUnitNum);
/* destroy variables */
delete s1;
delete s2;
delete t;
delete sGPU1;
delete sGPU2;
delete tGPU;
delete[] sDimSize1;
delete[] sDimSize2;
delete[] tDimSize;
return cpuTest && gpuTest;
#else
/* destroy variables */
delete s1;
delete s2;
delete t;
delete[] sDimSize1;
delete[] sDimSize2;
delete[] tDimSize;
return cpuTest;
#endif // USE_CUDA
}
/*
case 2: matrix multiplication of the two tensors.
In this case, a=(2, 2, 3), b=(2, 3, 2) -> c=(2, 2, 2), transposedA=X_NOTRANS, transposedB=X_NOTRANS.
*/
bool TestMatrixMulBatched2()
{
/* a source tensor of size (2, 2, 3) */
int sOrder1 = 3;
int * sDimSize1 = new int[sOrder1];
sDimSize1[0] = 2;
sDimSize1[1] = 2;
sDimSize1[2] = 3;
int sUnitNum1 = 1;
for (int i = 0; i < sOrder1; i++)
sUnitNum1 *= sDimSize1[i];
/* a source tensor of size (2, 3, 2) */
int sOrder2 = 3;
int * sDimSize2 = new int[sOrder2];
sDimSize2[0] = 2;
sDimSize2[1] = 3;
sDimSize2[2] = 2;
int sUnitNum2 = 1;
for (int i = 0; i < sOrder2; i++)
sUnitNum2 *= sDimSize2[i];
/* a target tensor of size (2, 2, 2) */
int tOrder = 3;
int * tDimSize = new int[tOrder];
tDimSize[0] = 2;
tDimSize[1] = 2;
tDimSize[2] = 2;
int tUnitNum = 1;
for (int i = 0; i < tOrder; i++)
tUnitNum *= tDimSize[i];
DTYPE sData1[2][2][3] = { { {0.0F, -1.0F, 2.0F},
{2.0F, 1.0F, 3.0F} },
{ {1.0F, 2.0F, 4.0F},
{3.0F, 1.0F, 2.0F} } };
DTYPE sData2[2][3][2] = { { {1.0F, 2.0F},
{-4.0F, 3.0F},
{2.0F, 6.0F} },
{ {1.0F, 2.0F},
{3.0F, 4.0F},
{5.0F, 6.0F} } };
DTYPE answer[2][2][2] = { { {8.0F, 9.0F},
{4.0F, 25.0F} },
{ {27.0F, 34.0F},
{16.0F, 22.0F} } };
/* CPU test */
bool cpuTest = true;
/* create tensors */
XTensor * s1 = NewTensor(sOrder1, sDimSize1);
XTensor * s2 = NewTensor(sOrder2, sDimSize2);
XTensor * t = NewTensor(tOrder, tDimSize);
/* initialize variables */
s1->SetData(sData1, sUnitNum1);
s2->SetData(sData2, sUnitNum2);
t->SetZeroAll();
/* call MatrixMulBatched function */
MatrixMulBatched(s1, X_NOTRANS, s2, X_NOTRANS, t);
/* check results */
cpuTest = t->CheckData(answer, tUnitNum);
#ifdef USE_CUDA
/* GPU test */
bool gpuTest = true;
/* create tensor */
XTensor * sGPU1 = NewTensor(sOrder1, sDimSize1, X_FLOAT, 1.0F, 0);
XTensor * sGPU2 = NewTensor(sOrder2, sDimSize2, X_FLOAT, 1.0F, 0);
XTensor * tGPU = NewTensor(tOrder, tDimSize, X_FLOAT, 1.0F, 0);
/* Initialize variables */
sGPU1->SetData(sData1, sUnitNum1);
sGPU2->SetData(sData2, sUnitNum2);
tGPU->SetZeroAll();
/* call MatrixMulBatched function */
MatrixMulBatched(sGPU1, X_NOTRANS, sGPU2, X_NOTRANS, tGPU);
/* check results */
gpuTest = tGPU->CheckData(answer, tUnitNum);
/* destroy variables */
delete s1;
delete s2;
delete t;
delete sGPU1;
delete sGPU2;
delete tGPU;
delete[] sDimSize1;
delete[] sDimSize2;
delete[] tDimSize;
return cpuTest && gpuTest;
#else
/* destroy variables */
delete s1;
delete s2;
delete t;
delete[] sDimSize1;
delete[] sDimSize2;
delete[] tDimSize;
return cpuTest;
#endif // USE_CUDA
}
/* other cases */
/*
TODO!!
*/
/* test for TestMatrixMulBatched Function */
bool TestMatrixMulBatched()
{
XPRINT(0, stdout, "[TEST MATRIXMULBATCHED] matrix multiplication of the two tensors \n");
bool returnFlag = true, caseFlag = true;
/* case 1 test */
caseFlag = TestMatrixMulBatched1();
if (!caseFlag) {
returnFlag = false;
XPRINT(0, stdout, ">> case 1 failed!\n");
}
else
XPRINT(0, stdout, ">> case 1 passed!\n");
/* case 2 test */
caseFlag = TestMatrixMulBatched2();
if (!caseFlag) {
returnFlag = false;
XPRINT(0, stdout, ">> case 2 failed!\n");
}
else
XPRINT(0, stdout, ">> case 2 passed!\n");
/* other cases test */
/*
TODO!!
*/
if (returnFlag) {
XPRINT(0, stdout, ">> All Passed!\n");
}
else
XPRINT(0, stdout, ">> Failed!\n");
XPRINT(0, stdout, "\n");
return returnFlag;
}
} // namespace nts(NiuTrans.Tensor)
/* NiuTrans.Tensor - an open-source tensor library
* Copyright (C) 2017, Natural Language Processing Lab, Northestern University.
* All rights reserved.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
/*
* $Created by: Xu Chen (email: hello_master1954@163.com) 2018-06-15
*/
#ifndef __TEST_MATRIXMULBATCHED_H__
#define __TEST_MATRIXMULBATCHED_H__
#include "../core/arithmetic/MatrixMulBatched.h"
namespace nts { // namespace nts(NiuTrans.Tensor)
/* test for MatrixMulBatched Function */
extern "C"
bool TestMatrixMulBatched();
} // namespace nts(NiuTrans.Tensor)
#endif // __TEST_MATRIXMULBATCHED_H__
......@@ -19,14 +19,15 @@
* $Created by: Xu Chen (email: hello_master1954@163.com) 2018-06-13
*/
#include "../XTensor.h"
#include "../XList.h"
#include "TMerge.h"
namespace nts { // namespace nts(NiuTrans.Tensor)
/* case 1: transform a tensor by merging it along with a dimension.
* In this case, (3, 2) -> (6), whereToMerge=1, leadingDim=0.
/*
case 1: transform a tensor by merging it along with a dimension.
In this case, (3, 2) -> (6), whereToMerge=1, leadingDim=0.
*/
bool TestMerge1()
{
......@@ -49,9 +50,9 @@ bool TestMerge1()
for (int i = 0; i < tOrder; i++)
tUnitNum *= tDimSize[i];
DTYPE sData[2][3] = { {0.0, 1.0, 2.0},
{3.0, 4.0, 5.0} };
DTYPE answer[6] = {0.0, 1.0, 2.0, 3.0, 4.0, 5.0};
DTYPE sData[2][3] = { {0.0F, 1.0F, 2.0F},
{3.0F, 4.0F, 5.0F} };
DTYPE answer[6] = {0.0F, 1.0F, 2.0F, 3.0F, 4.0F, 5.0F};
/* CPU test */
bool cpuTest = true;
......@@ -89,21 +90,30 @@ bool TestMerge1()
gpuTest = tGPU->CheckData(answer, tUnitNum);
/* destroy variables */
delete s, t, sGPU, tGPU;
delete[] sDimSize, tDimSize;
delete s;
delete t;
delete sGPU;
delete tGPU;
delete[] sDimSize;
delete[] tDimSize;
return cpuTest && gpuTest;
#else
/* destroy variables */
delete s, t;
delete[] sDimSize, tDimSize;
delete s;
delete t;
delete[] sDimSize;
delete[] tDimSize;
return cpuTest;
#endif // USE_CUDA
}
/* case 2: transform a tensor by merging it along with a dimension.
* In this case, (2, 2, 3) -> (4, 3), whereToMerge=1, leadingDim=0.
/*
case 2: transform a tensor by merging it along with a dimension.
In this case,
(2, 2, 3) -> (4, 3), whereToMerge=1, leadingDim=0.
(2, 2, 3) -> (2, 6), whereToMerge=2, leadingDim=0.
*/
bool TestMerge2()
{
......@@ -119,40 +129,55 @@ bool TestMerge2()
sUnitNum *= sDimSize[i];
/* a target tensor of size (4, 3) */
int tOrder = 2;
int * tDimSize = new int[tOrder];
tDimSize[0] = 4;
tDimSize[1] = 3;
int tUnitNum = 1;
for (int i = 0; i < tOrder; i++)
tUnitNum *= tDimSize[i];
DTYPE sData[2][2][3] = { { {0.0, 1.0, 2.0},
{4.0, 5.0, 6.0} },
{ {-1.0, 2.0, 3.0},
{-4.0, -5.0, -6.0} } };
DTYPE answer[4][3] = { {0.0, 1.0, 2.0},
{4.0, 5.0, 6.0},
{-1.0, 2.0, 3.0},
{-4.0, -5.0, -6.0} };
int tOrder1 = 2;
int * tDimSize1 = new int[tOrder1];
tDimSize1[0] = 4;
tDimSize1[1] = 3;
int tUnitNum1 = 1;
for (int i = 0; i < tOrder1; i++)
tUnitNum1 *= tDimSize1[i];
/* a target tensor of size (2, 6) */
int tOrder2 = 2;
int * tDimSize2 = new int[tOrder2];
tDimSize2[0] = 2;
tDimSize2[1] = 6;
int tUnitNum2 = 1;
for (int i = 0; i < tOrder2; i++)
tUnitNum2 *= tDimSize2[i];
DTYPE sData[2][2][3] = { { {0.0F, 1.0F, 2.0F},
{4.0F, 5.0F, 6.0F} },
{ {-1.0F, 2.0F, 3.0F},
{-4.0F, -5.0F, -6.0F} } };
DTYPE answer1[4][3] = { {0.0F, 1.0F, 2.0F},
{4.0F, 5.0F, 6.0F},
{-1.0F, 2.0F, 3.0F},
{-4.0F, -5.0F, -6.0F} };
DTYPE answer2[2][6] = { {0.0F, 1.0F, 2.0F, -1.0F, 2.0F, 3.0F},
{4.0F, 5.0F, 6.0F, -4.0F, -5.0F, -6.0F} };
/* CPU test */
bool cpuTest = true;
/* create tensors */
XTensor * s = NewTensor(sOrder, sDimSize);
XTensor * t = NewTensor(tOrder, tDimSize);
XTensor * t1 = NewTensor(tOrder1, tDimSize1);
XTensor * t2 = NewTensor(tOrder2, tDimSize2);
/* initialize variables */
s->SetData(sData, sUnitNum);
t->SetZeroAll();
t1->SetZeroAll();
t2->SetZeroAll();
/* call merge function */
Merge(s, t, 1, 0);
Merge(s, t1, 1, 0);
Merge(s, t2, 2, 0);
/* check results */
cpuTest = t->CheckData(answer, tUnitNum);
cpuTest = t1->CheckData(answer1, tUnitNum1) && t2->CheckData(answer2, tUnitNum2);
#ifdef USE_CUDA
/* GPU test */
......@@ -160,121 +185,51 @@ bool TestMerge2()
/* create tensor */
XTensor * sGPU = NewTensor(sOrder, sDimSize, X_FLOAT, 1.0F, 0);
XTensor * tGPU = NewTensor(tOrder, tDimSize, X_FLOAT, 1.0F, 0);
XTensor * tGPU1 = NewTensor(tOrder1, tDimSize1, X_FLOAT, 1.0F, 0);
XTensor * tGPU2 = NewTensor(tOrder2, tDimSize2, X_FLOAT, 1.0F, 0);
/* Initialize variables */
sGPU->SetData(sData, sUnitNum);
tGPU->SetZeroAll();
/* call merge function */
Merge(sGPU, tGPU, 1, 0);
/* check results */
gpuTest = tGPU->CheckData(answer, tUnitNum);
/* destroy variables */
delete s, t, sGPU, tGPU;
delete[] sDimSize, tDimSize;
return cpuTest && gpuTest;
#else
/* destroy variables */
delete s, t;
delete[] sDimSize, tDimSize;
return cpuTest;
#endif // USE_CUDA
}
/* case 3: transform a tensor by merging it along with a dimension.
* In this case, (2, 3, 4) -> (3, 8), whereToMerge=0, leadingDim=2.
*/
bool TestMerge3()
{
/* a source tensor of size (2, 3, 4) */
int sOrder = 3;
int * sDimSize = new int[sOrder];
sDimSize[0] = 2;
sDimSize[1] = 3;
sDimSize[2] = 4;
int sUnitNum = 1;
for (int i = 0; i < sOrder; i++)
sUnitNum *= sDimSize[i];
/* a target tensor of size (8, 3) */
int tOrder = 2;
int * tDimSize = new int[tOrder];
tDimSize[0] = 3;
tDimSize[1] = 8;
int tUnitNum = 1;
for (int i = 0; i < tOrder; i++)
tUnitNum *= tDimSize[i];
DTYPE sData[2][3][4] = { { {0.0, 1.0, 2.0, 3.0},
{4.0, 5.0, 6.0, 7.0},
{8.0, 9.0, 10.0, 11.0} },
{ {0.0, -1.0, -2.0, -3.0},
{-4.0, -5.0, -6.0, -7.0},
{-8.0, -9.0, -10.0, -11.0} } };
DTYPE answer[3][8] = { {0.0, 1.0, 2.0, 3.0, 0.0, -1.0, -2.0, -3.0},
{4.0, 5.0, 6.0, 7.0, -4.0, -5.0, -6.0, -7.0},
{8.0, 9.0, 10.0, 11.0, -8.0, -9.0, -10.0, -11.0} };
/* CPU test */
bool cpuTest = true;
/* create tensors */
XTensor * s = NewTensor(sOrder, sDimSize);
XTensor * t = NewTensor(tOrder, tDimSize);
/* initialize variables */
s->SetData(sData, sUnitNum);
t->SetZeroAll();
tGPU1->SetZeroAll();
tGPU2->SetZeroAll();
/* call merge function */
Merge(s, t, 2, 0);
Merge(sGPU, tGPU1, 1, 0);
Merge(sGPU, tGPU2, 2, 0);
/* check results */
cpuTest = t->CheckData(answer, tUnitNum);
#ifdef USE_CUDA
/* GPU test */
bool gpuTest = true;
/* create tensor */
XTensor * sGPU = NewTensor(sOrder, sDimSize, X_FLOAT, 1.0F, 0);
XTensor * tGPU = NewTensor(tOrder, tDimSize, X_FLOAT, 1.0F, 0);
/* Initialize variables */
sGPU->SetData(sData, sUnitNum);
tGPU->SetZeroAll();
/* call merge function */
Merge(sGPU, tGPU, 2, 0);
/* check results */
gpuTest = tGPU->CheckData(answer, tUnitNum);
gpuTest = tGPU1->CheckData(answer1, tUnitNum1) && tGPU2->CheckData(answer2, tUnitNum2);
/* destroy variables */
delete s, t, sGPU, tGPU;
delete[] sDimSize, tDimSize;
delete s;
delete t1;
delete t2;
delete sGPU;
delete tGPU1;
delete tGPU2;
delete[] sDimSize;
delete[] tDimSize1;
delete[] tDimSize2;
return cpuTest && gpuTest;
#else
/* destroy variables */
delete s, t;
delete[] sDimSize, tDimSize;
delete s;
delete t1;
delete t2;
delete[] sDimSize;
delete[] tDimSize1;
delete[] tDimSize2;
return cpuTest;
#endif // USE_CUDA
}
/* case 4: merge small tensors into a big tensor.
/*
case 3: merge small tensors into a big tensor.
In this case, 2 * (2, 4) -> (4, 4), whereToMerge=0.
*/
bool TestMerge4()
bool TestMerge3()
{
/* create list */
XList * smallList = new XList();
......@@ -289,10 +244,10 @@ bool TestMerge4()
for (int i = 0; i < sOrder; i++)
sUnitNum *= sDimSize[i];
DTYPE sData1[2][4] = { {0.0, 1.0, 2.0, 3.0},
{4.0, 5.0, 6.0, 7.0} };
DTYPE sData2[2][4] = { {0.0, -1.0, -2.0, -3.0},
{-4.0, -5.0, -6.0, -7.0} };
DTYPE sData1[2][4] = { {0.0F, 1.0F, 2.0F, 3.0F},
{4.0F, 5.0F, 6.0F, 7.0F} };
DTYPE sData2[2][4] = { {0.0F, -1.0F, -2.0F, -3.0F},
{-4.0F, -5.0F, -6.0F, -7.0F} };
/* a target tensor of size (4, 4) */
int tOrder = 2;
......@@ -304,10 +259,10 @@ bool TestMerge4()
for (int i = 0; i < tOrder; i++)
tUnitNum *= tDimSize[i];
DTYPE answer[4][4] = { {0.0, 1.0, 2.0, 3.0},
{4.0, 5.0, 6.0, 7.0},
{0.0, -1.0, -2.0, -3.0},
{-4.0, -5.0, -6.0, -7.0} };
DTYPE answer[4][4] = { {0.0F, 1.0F, 2.0F, 3.0F},
{4.0F, 5.0F, 6.0F, 7.0F},
{0.0F, -1.0F, -2.0F, -3.0F},
{-4.0F, -5.0F, -6.0F, -7.0F} };
/* CPU test */
bool cpuTest = true;
......@@ -359,24 +314,37 @@ bool TestMerge4()
/* check results */
cpuTest = tGPU->CheckData(answer, tUnitNum);
delete s1, s2, t, sGPU1, sGPU2, tGPU;
delete[] sDimSize, tDimSize;
/* destroy variables */
delete s1;
delete s2;
delete t;
delete sGPU1;
delete sGPU2;
delete tGPU;
delete[] sDimSize;
delete[] tDimSize;
delete smallList;
return cpuTest && gpuTest;
#else
/* destroy variables */
delete s1, s2, t;
delete[] sDimSize, tDimSize;
delete s1;
delete s2;
delete t;
delete[] sDimSize1;
delete[] sDimSize2;
delete[] tDimSize;
delete smallList;
return cpuTest;
#endif // USE_CUDA
}
/* case 5: merge small tensors into a big tensor.
/*
case 4: merge small tensors into a big tensor.
In this case, 2 * (2, 4) -> (2, 8), whereToMerge=1.
*/
bool TestMerge5()
bool TestMerge4()
{
/* create list */
XList * smallList = new XList();
......@@ -391,10 +359,10 @@ bool TestMerge5()
for (int i = 0; i < sOrder; i++)
sUnitNum *= sDimSize[i];
DTYPE sData1[2][4] = { {0.0, 1.0, 2.0, 3.0},
{4.0, 5.0, 6.0, 7.0} };
DTYPE sData2[2][4] = { {0.0, -1.0, -2.0, -3.0},
{-4.0, -5.0, -6.0, -7.0} };
DTYPE sData1[2][4] = { {0.0F, 1.0F, 2.0F, 3.0F},
{4.0F, 5.0F, 6.0F, 7.0F} };
DTYPE sData2[2][4] = { {0.0F, -1.0F, -2.0F, -3.0F},
{-4.0F, -5.0F, -6.0F, -7.0F} };
/* a target tensor of size (4, 4) */
int tOrder = 2;
......@@ -406,8 +374,8 @@ bool TestMerge5()
for (int i = 0; i < tOrder; i++)
tUnitNum *= tDimSize[i];
DTYPE answer[2][8] = { {0.0, 1.0, 2.0, 3.0, 0.0, -1.0, -2.0, -3.0},
{4.0, 5.0, 6.0, 7.0, -4.0, -5.0, -6.0, -7.0} };
DTYPE answer[2][8] = { {0.0F, 1.0F, 2.0F, 3.0F, 0.0F, -1.0F, -2.0F, -3.0F},
{4.0F, 5.0F, 6.0F, 7.0F, -4.0F, -5.0F, -6.0F, -7.0F} };
/* CPU test */
bool cpuTest = true;
......@@ -459,15 +427,27 @@ bool TestMerge5()
/* check results */
cpuTest = tGPU->CheckData(answer, tUnitNum);
delete s1, s2, t, sGPU1, sGPU2, tGPU;
delete[] sDimSize, tDimSize;
/* destroy variables */
delete s1;
delete s2;
delete t;
delete sGPU1;
delete sGPU2;
delete tGPU;
delete[] sDimSize;
delete[] tDimSize;
delete smallList;
return cpuTest && gpuTest;
#else
/* destroy variables */
delete s1, s2, t;
delete[] sDimSize, tDimSize;
delete s1;
delete s2;
delete t;
delete[] sDimSize1;
delete[] sDimSize2;
delete[] tDimSize;
delete smallList;
return cpuTest;
#endif // USE_CUDA
......@@ -479,10 +459,9 @@ bool TestMerge5()
*/
/* test for Merge Function */
extern "C"
bool TestMerge()
{
XPRINT(0, stdout, "[TEST MERGE] -------------\n");
XPRINT(0, stdout, "[TEST MERGE] transform a tensor by merging it alone with a dimension or merge small tensors into a big tensor\n");
bool returnFlag = true, caseFlag = true;
/* case 1 test */
......@@ -522,15 +501,6 @@ bool TestMerge()
else
XPRINT(0, stdout, ">> case 4 passed!\n");
/* case 5 test */
caseFlag = TestMerge5();
if (!caseFlag) {
returnFlag = false;
XPRINT(0, stdout, ">> case 5 failed!\n");
}
else
XPRINT(0, stdout, ">> case 5 passed!\n");
/* other cases test */
/*
TODO!!
......
......@@ -22,7 +22,7 @@
#ifndef __TEST_MERGE_H__
#define __TEST_MERGE_H__
#include "../core/Merge.h"
#include "../core/shape/Merge.h"
namespace nts { // namespace nts(NiuTrans.Tensor)
......
......@@ -19,17 +19,18 @@
* $Created by: Lin Ye (email: linye2015@outlook.com) 2018-06-15
*/
#include "../XTensor.h"
#include "../XDevice.h"
#include "../core/Multiply.h"
#include "TMultiply.h"
namespace nts { // namespace nts(NiuTrans.Tensor)
/* case 1: element-wise product of two tensors, c(i) = a(i)*b(i) + \alpha * c(i)
* In this case, (2 * 1) (2 * 1) -> (2 * 1), leadingDim=0, alpha=0.
/*
case 1: element-wise product of two tensors
c(i) = a(i)*b(i) + \alpha * c(i)
In this case, (2, 1) (2, 1) -> (2, 1), leadingDim=0, alpha=0.
*/
bool TestMultiply1()
{
/* a source tensor of size 2 * 1 */
/* a source tensor of size (2, 1) */
int sOrder1 = 2;
int * sDimSize1 = new int[sOrder1];
sDimSize1[0] = 2;
......@@ -39,7 +40,7 @@ bool TestMultiply1()
for (int i = 0; i < sOrder1; i++)
sUnitNum1 *= sDimSize1[i];
/* a source tensor of size 2 * 1 */
/* a source tensor of size (2, 1) */
int sOrder2 = 2;
int * sDimSize2 = new int[sOrder2];
sDimSize2[0] = 2;
......@@ -49,7 +50,7 @@ bool TestMultiply1()
for (int i = 0; i < sOrder2; i++)
sUnitNum2 *= sDimSize2[i];
/* a target tensor of size 2 * 1 */
/* a target tensor of size (2, 1) */
int tOrder = 2;
int * tDimSize = new int[tOrder];
tDimSize[0] = 2;
......@@ -59,9 +60,12 @@ bool TestMultiply1()
for (int i = 0; i < tOrder; i++)
tUnitNum *= tDimSize[i];
DTYPE sData1[2][1] = { {0.0}, {1.0} };
DTYPE sData2[2][1] = { {2.0}, {3.0} };
DTYPE answer[2][1] = { {0.0}, {3.0} };
DTYPE sData1[2][1] = { {0.0F},
{1.0F} };
DTYPE sData2[2][1] = { {2.0F},
{3.0F} };
DTYPE answer[2][1] = { {0.0F},
{3.0F} };
/* CPU test */
bool cpuTest = true;
......@@ -76,7 +80,7 @@ bool TestMultiply1()
s2->SetData(sData2, sUnitNum2);
t->SetZeroAll();
/* call multiplyelementwise function */
/* call MultiplyElementWise function */
Multiply(s1, s2, t, 0);
/* check results */
......@@ -96,19 +100,26 @@ bool TestMultiply1()
sGPU2->SetData(sData2, sUnitNum2);
tGPU->SetZeroAll();
/* call multiplyelementwise function */
/* call MultiplyElementWise function */
Multiply(sGPU1, sGPU2, tGPU, 0);
/* check results */
gpuTest = tGPU->CheckData(answer, tUnitNum);
/* destroy variables */
delete s1, s2, t, sGPU1, sGPU2, tGPU;
delete[] sDimSize1, sDimSize2, tDimSize;
delete s1;
delete s2;
delete t;
delete sGPU1;
delete sGPU2;
delete tGPU;
delete[] sDimSize1;
delete[] sDimSize2;
delete[] tDimSize;
return cpuTest && gpuTest;
#else
/* destroy variables */
/* destroy variables */
delete s1;
delete s2;
delete t;
......@@ -120,12 +131,14 @@ bool TestMultiply1()
#endif // USE_CUDA
}
/* case 2: element-wise product of two tensors, c(i) = a(i)*b(i) + \alpha * c(i)
* In this case, (2 * 2) (2 * 2) -> (2 * 2), leadingDim=0, alpha=0.
/*
case 2: element-wise product of two tensors
c(i) = a(i)*b(i) + \alpha * c(i)
In this case, (2, 2) (2, 2) -> (2, 2), leadingDim=0, alpha=0.
*/
bool TestMultiply2()
{
/* a source tensor of size (2 * 2) */
/* a source tensor of size (2, 2) */
int sOrder1 = 2;
int * sDimSize1 = new int[sOrder1];
sDimSize1[0] = 2;
......@@ -135,7 +148,7 @@ bool TestMultiply2()
for (int i = 0; i < sOrder1; i++)
sUnitNum1 *= sDimSize1[i];
/* a source tensor of size (2 * 2) */
/* a source tensor of size (2, 2) */
int sOrder2 = 2;
int * sDimSize2 = new int[sOrder2];
sDimSize2[0] = 2;
......@@ -145,7 +158,7 @@ bool TestMultiply2()
for (int i = 0; i < sOrder2; i++)
sUnitNum2 *= sDimSize2[i];
/* a target tensor of size (2 * 2) */
/* a target tensor of size (2, 2) */
int tOrder = 2;
int * tDimSize = new int[tOrder];
tDimSize[0] = 2;
......@@ -155,12 +168,12 @@ bool TestMultiply2()
for (int i = 0; i < tOrder; i++)
tUnitNum *= tDimSize[i];
DTYPE sData1[2][2] = { {0.0, 1.0},
{2.0, 3.0} };
DTYPE sData2[2][2] = { {0.0, 1.0},
{2.0, 3.0} };
DTYPE answer[2][2] = { {0.0, 1.0},
{4.0, 9.0} };
DTYPE sData1[2][2] = { {0.0F, 1.0F},
{2.0F, 3.0F} };
DTYPE sData2[2][2] = { {0.0F, 1.0F},
{2.0F, 3.0F} };
DTYPE answer[2][2] = { {0.0F, 1.0F},
{4.0F, 9.0F} };
/* CPU test */
bool cpuTest = true;
......@@ -175,7 +188,7 @@ bool TestMultiply2()
s2->SetData(sData2, sUnitNum2);
t->SetZeroAll();
/* call multiplyelementwise function */
/* call MultiplyElementWise function */
Multiply(s1, s2, t, 0);
/* check results */
......@@ -195,32 +208,44 @@ bool TestMultiply2()
sGPU2->SetData(sData2, sUnitNum2);
tGPU->SetZeroAll();
/* call multiplyelementwise function */
/* call MultiplyElementWise function */
Multiply(sGPU1, sGPU2, tGPU, 0);
/* check results */
gpuTest = tGPU->CheckData(answer, tUnitNum);
/* destroy variables */
delete s1, s2, t, sGPU1, sGPU2, tGPU;
delete[] sDimSize1, sDimSize2, tDimSize;
delete s1;
delete s2;
delete t;
delete sGPU1;
delete sGPU2;
delete tGPU;
delete[] sDimSize1;
delete[] sDimSize2;
delete[] tDimSize;
return cpuTest && gpuTest;
#else
/* destroy variables */
delete s1, s2, t;
delete[] sDimSize1, sDimSize2, tDimSize;
/* destroy variables */
delete s1;
delete s2;
delete t;
delete[] sDimSize1;
delete[] sDimSize2;
delete[] tDimSize;
return cpuTest;
#endif // USE_CUDA
}
/* case 3: element-wise product of two tensors, c(i) = a(i)*b(i) + \alpha * c(i)
* In this case, (2 * 2) (2 * 2) -> (2 * 2), leadingDim=1, alpha=0.
/*
case 3: element-wise product of two tensors, c(i) = a(i)*b(i) + \alpha * c(i)
In this case, (2, 2) (2, 2) -> (2, 2), leadingDim=1, alpha=0.
*/
bool TestMultiply3()
{
/* a source tensor of size (2 * 2) */
/* a source tensor of size (2, 2) */
int sOrder1 = 2;
int * sDimSize1 = new int[sOrder1];
sDimSize1[0] = 2;
......@@ -230,7 +255,7 @@ bool TestMultiply3()
for (int i = 0; i < sOrder1; i++)
sUnitNum1 *= sDimSize1[i];
/* a source tensor of size (2 * 2) */
/* a source tensor of size (2, 2) */
int sOrder2 = 2;
int * sDimSize2 = new int[sOrder2];
sDimSize2[0] = 2;
......@@ -240,7 +265,7 @@ bool TestMultiply3()
for (int i = 0; i < sOrder2; i++)
sUnitNum2 *= sDimSize2[i];
/* a target tensor of size (2 * 2) */
/* a target tensor of size (2, 2) */
int tOrder = 2;
int * tDimSize = new int[tOrder];
tDimSize[0] = 2;
......@@ -250,12 +275,12 @@ bool TestMultiply3()
for (int i = 0; i < tOrder; i++)
tUnitNum *= tDimSize[i];
DTYPE sData1[2][2] = { {0.0, 1.0},
{2.0, 3.0} };
DTYPE sData2[2][2] = { {0.0, 1.0},
{2.0, 3.0} };
DTYPE answer[2][2] = { {0.0, 1.0},
{4.0, 9.0} };
DTYPE sData1[2][2] = { {0.0F, 1.0F},
{2.0F, 3.0F} };
DTYPE sData2[2][2] = { {0.0F, 1.0F},
{2.0F, 3.0F} };
DTYPE answer[2][2] = { {0.0F, 1.0F},
{4.0F, 9.0F} };
/* CPU test */
bool cpuTest = true;
......@@ -270,7 +295,7 @@ bool TestMultiply3()
s2->SetData(sData2, sUnitNum2);
t->SetZeroAll();
/* call multiplyelementwise function */
/* call MultiplyElementWise function */
Multiply(s1, s2, t, 1);
/* check results */
......@@ -290,21 +315,32 @@ bool TestMultiply3()
sGPU2->SetData(sData2, sUnitNum2);
tGPU->SetZeroAll();
/* call multiplyelementwise function */
/* call MultiplyElementWise function */
Multiply(sGPU1, sGPU2, tGPU, 1);
/* check results */
gpuTest = tGPU->CheckData(answer, tUnitNum);
/* destroy variables */
delete s1, s2, t, sGPU1, sGPU2, tGPU;
delete[] sDimSize1, sDimSize2, tDimSize;
delete s1;
delete s2;
delete t;
delete sGPU1;
delete sGPU2;
delete tGPU;
delete[] sDimSize1;
delete[] sDimSize2;
delete[] tDimSize;
return cpuTest && gpuTest;
#else
/* destroy variables */
delete s1, s2, t;
delete[] sDimSize1, sDimSize2, tDimSize;
/* destroy variables */
delete s1;
delete s2;
delete t;
delete[] sDimSize1;
delete[] sDimSize2;
delete[] tDimSize;
return cpuTest;
#endif // USE_CUDA
......@@ -316,10 +352,9 @@ TODO!!
*/
/* test for MultiplyElementWise Function */
extern "C"
bool TestMultiply()
{
XPRINT(0, stdout, "[TEST MULTIPLYELEMENTWISE] -------------\n");
XPRINT(0, stdout, "[TEST MULTIPLYELEMENTWISE] element-wise product of two tensors \n");
bool returnFlag = true, caseFlag = true;
/* case 1 test */
......
......@@ -19,10 +19,10 @@
* $Created by: Lin Ye (email: linye2015@outlook.com) 2018-06-15
*/
#ifndef __TEST_MULTIPLY_H__
#define __TEST_MULTIPLY_H__
#ifndef __TEST_MULTIPLYELEMENTWISE_H__
#define __TEST_MULTIPLYELEMENTWISE_H__
#include "../core/Multiply.h"
#include "../core/arithmetic/Multiply.h"
namespace nts { // namespace nts(NiuTrans.Tensor)
......
......@@ -19,15 +19,14 @@
* $Created by: Lin Ye (email: linye2015@outlook.com) 2018-06-14
*/
#include "../XTensor.h"
#include "../XDevice.h"
#include "../core/Negate.h"
#include "TNegate.h"
namespace nts { // namespace nts(NiuTrans.Tensor)
/* case 1: set every entry to its minus value */
bool TestNegate1()
{
/* a tensor of size 3 * 2 */
/* a tensor of size (3, 2) */
int aOrder = 2;
int * aDimSize = new int[aOrder];
aDimSize[0] = 3;
......@@ -37,12 +36,12 @@ bool TestNegate1()
for (int i = 0; i < aOrder; i++)
aUnitNum *= aDimSize[i];
DTYPE aData[3][2] = { {1.0, -2.0},
{-3.0, 4.0},
{5.0, -6.0} };
DTYPE answer[3][2] = { {-1.0, 2.0},
{3.0, -4.0},
{-5.0, 6.0} };
DTYPE aData[3][2] = { {1.0F, -2.0F},
{-3.0F, 4.0F},
{5.0F, -6.0F} };
DTYPE answer[3][2] = { {-1.0F, 2.0F},
{3.0F, -4.0F},
{-5.0F, 6.0F} };
/* CPU test */
bool cpuTest = true;
......@@ -53,12 +52,12 @@ bool TestNegate1()
/* initialize variables */
a->SetData(aData, aUnitNum);
/* call negate function */
/* call Negate function */
Negate(a);
/* check results */
cpuTest = a->CheckData(answer, aUnitNum);
#ifdef USE_CUDA
/* GPU test */
bool gpuTest = true;
......@@ -69,14 +68,15 @@ bool TestNegate1()
/* Initialize variables */
aGPU->SetData(aData, aUnitNum);
/* call negate function */
/* call Negate function */
Negate(aGPU);
/* check results */
gpuTest = aGPU->CheckData(answer, aUnitNum);
/* destroy variables */
delete a, aGPU;
delete a;
delete aGPU;
delete[] aDimSize;
return cpuTest && gpuTest;
......@@ -92,7 +92,7 @@ bool TestNegate1()
/* case 2: set every entry to its minus value */
bool TestNegate2()
{
/* a tensor of size 3 * 2 */
/* a tensor of size (3, 2) */
int aOrder = 2;
int * aDimSize = new int[aOrder];
aDimSize[0] = 3;
......@@ -102,12 +102,12 @@ bool TestNegate2()
for (int i = 0; i < aOrder; i++)
aUnitNum *= aDimSize[i];
DTYPE aData[3][2] = { {0.0, 0.0},
{0.0, 0.0},
{0.0, 0.0} };
DTYPE answer[3][2] = { {-0.0, -0.0},
{-0.0, -0.0},
{-0.0, -0.0} };
DTYPE aData[3][2] = { {0.0F, 0.0F},
{0.0F, 0.0F},
{0.0F, 0.0F} };
DTYPE answer[3][2] = { {-0.0F, -0.0F},
{-0.0F, -0.0F},
{-0.0F, -0.0F} };
/* CPU test */
bool cpuTest = true;
......@@ -118,7 +118,7 @@ bool TestNegate2()
/* initialize variables */
a->SetData(aData, aUnitNum);
/* call negate function */
/* call Negate function */
Negate(a);
/* check results */
......@@ -134,14 +134,15 @@ bool TestNegate2()
/* Initialize variables */
aGPU->SetData(aData, aUnitNum);
/* call negate function */
/* call Negate function */
Negate(aGPU);
/* check results */
gpuTest = aGPU->CheckData(answer, aUnitNum);
/* destroy variables */
delete a, aGPU;
delete a;
delete aGPU;
delete[] aDimSize;
return cpuTest && gpuTest;
......@@ -160,10 +161,9 @@ TODO!!
*/
/* test for Negate Function */
extern "C"
bool TestNegate()
{
XPRINT(0, stdout, "[TEST NEGATE] -------------\n");
XPRINT(0, stdout, "[TEST NEGATE] set every entry to its minus value \n");
bool returnFlag = true, caseFlag = true;
/* case 1 test */
......
......@@ -22,7 +22,7 @@
#ifndef __TEST_NEGATE_H__
#define __TEST_NEGATE_H__
#include "../core/Negate.h"
#include "../core/arithmetic/Negate.h"
namespace nts { // namespace nts(NiuTrans.Tensor)
......
......@@ -19,17 +19,19 @@
* $Created by: Lin Ye (email: linye2015@outlook.com) 2018-06-20
*/
#include "../XTensor.h"
#include "../XDevice.h"
#include "../core/Normalize.h"
#include "TNormalize.h"
namespace nts { // namespace nts(NiuTrans.Tensor)
/* case 1: normalized the data with normal distribution
* In this case, dim=0.
/*
case 1: normalized the data with normal distribution
For an input x, y = a * (x-mean)/sqrt(variance+\epsilon) + b.
where a and b are the scalar and bias respectively,
and \epsilon is the adjustment parameter.
*/
bool TestNormalize1()
{
/* a source tensor of size 2 * 3 */
/* a source tensor of size (2, 3) */
int sOrder = 2;
int * sDimSize = new int[sOrder];
sDimSize[0] = 2;
......@@ -39,7 +41,7 @@ bool TestNormalize1()
for (int i = 0; i < sOrder; i++)
sUnitNum *= sDimSize[i];
/* a target tensor of size 2 * 3 */
/* a target tensor of size (2, 3) */
int tOrder = 2;
int * tDimSize = new int[tOrder];
tDimSize[0] = 2;
......@@ -49,7 +51,7 @@ bool TestNormalize1()
for (int i = 0; i < tOrder; i++)
tUnitNum *= tDimSize[i];
/* a mean tensor of size 3 */
/* a mean tensor of size (3) */
int meanOrder = 1;
int * meanDimSize = new int[meanOrder];
meanDimSize[0] = 3;
......@@ -58,7 +60,7 @@ bool TestNormalize1()
for (int i = 0; i < meanOrder; i++)
meanUnitNum *= meanDimSize[i];
/* a var tensor of size 3 */
/* a variance tensor of size (3) */
int varOrder = 1;
int * varDimSize = new int[varOrder];
varDimSize[0] = 3;
......@@ -67,7 +69,7 @@ bool TestNormalize1()
for (int i = 0; i < varOrder; i++)
varUnitNum *= varDimSize[i];
/* a a tensor of size 2 * 3 */
/* a scalar tensor of size (2, 3) */
int aOrder = 2;
int * aDimSize = new int[aOrder];
aDimSize[0] = 2;
......@@ -77,7 +79,7 @@ bool TestNormalize1()
for (int i = 0; i < aOrder; i++)
aUnitNum *= aDimSize[i];
/* a b tensor of size 2 * 3 */
/* a bias tensor of size (2, 3) */
int bOrder = 2;
int * bDimSize = new int[bOrder];
bDimSize[0] = 2;
......@@ -87,41 +89,39 @@ bool TestNormalize1()
for (int i = 0; i < bOrder; i++)
bUnitNum *= bDimSize[i];
DTYPE sData[2][3] = { {0.5, -1.0, 2.0},
{3.5, -4.5, 1.0} };
DTYPE meanData[3] = {2.0, -2.75, 1.5};
DTYPE varData[3] = {4.5, 6.125, 0.5};
DTYPE aData[2][3] = { {0.0, 0.0, 0.0},
{0.0, 0.0, 0.0} };
DTYPE bData[2][3] = { {0.0, 0.0, 0.0},
{0.0, 0.0, 0.0} };
DTYPE answer[2][3] = { {0.0, 0.0, 0.0},
{0.0, 0.0, 0.0} };
DTYPE sData[2][3] = { {1.0F, 2.0F, 3.0F},
{1.5F, 2.5F, 3.5F} };
DTYPE meanData[3] = {1.0F, 1.5F, 2.0F};
DTYPE varData[3] = {1.0F, 1.0F, 4.0F};
DTYPE aData[2][3] = { {1.0F, 1.0F, 1.0F},
{1.0F, 1.0F, 1.0F} };
DTYPE answer[2][3] = { {0.0F, 0.5F, 0.5F},
{0.5F, 1.0F, 0.75F} };
/* CPU test */
bool cpuTest = true;
/* create tensors */
XTensor * s = NewTensor(sOrder, sDimSize);
XTensor * t = NewTensor(tOrder, tDimSize);
XTensor * mean = NewTensor(meanOrder, meanDimSize);
XTensor * var = NewTensor(varOrder, varDimSize);
XTensor * a = NewTensor(aOrder, aDimSize);
XTensor * b = NewTensor(bOrder, bDimSize);
XTensor * t = NewTensor(tOrder, tDimSize);
/* initialize variables */
s->SetData(sData, sUnitNum);
mean->SetData(meanData, meanUnitNum);
var->SetData(varData, varUnitNum);
a->SetData(aData, aUnitNum);
b->SetData(bData, bUnitNum);
b->SetZeroAll();
t->SetZeroAll();
/* call normalize function */
Normalize(s, t, 0, mean, var, a, b, 0.0);
Normalize(s, t, 0, mean, var, a, b, 0.0F);
/* check results */
cpuTest = t->CheckData(answer, tUnitNum);
cpuTest = t->CheckData(answer, tUnitNum, 1e-4, 0);
#ifdef USE_CUDA
/* GPU test */
......@@ -140,24 +140,50 @@ bool TestNormalize1()
meanGPU->SetData(meanData, meanUnitNum);
varGPU->SetData(varData, varUnitNum);
aGPU->SetData(aData, aUnitNum);
bGPU->SetData(bData, bUnitNum);
bGPU->SetZeroAll();
tGPU->SetZeroAll();
/* call normalize function */
Normalize(sGPU, tGPU, 0, meanGPU, varGPU, aGPU, bGPU, 0.0);
/* call Normalize function */
Normalize(sGPU, tGPU, 0, meanGPU, varGPU, aGPU, bGPU, 0.0F);
/* check results */
gpuTest = tGPU->CheckData(answer, tUnitNum);
gpuTest = tGPU->CheckData(answer, tUnitNum, 1e-4, 0);
/* destroy variables */
delete s, t, mean, var, a, b, sGPU, tGPU, meanGPU, varGPU, aGPU, bGPU;
delete[] sDimSize, tDimSize, meanDimSize, varDimSize, aDimSize, bDimSize;
delete s;
delete t;
delete mean;
delete var;
delete a;
delete b;
delete sGPU;
delete tGPU;
delete meanGPU;
delete varGPU;
delete aGPU;
delete bGPU;
delete[] sDimSize;
delete[] tDimSize;
delete[] meanDimSize;
delete[] varDimSize;
delete[] aDimSize;
delete[] bDimSize;
return cpuTest && gpuTest;
#else
/* destroy variables */
delete s, t, mean, var, a, b;
delete[] sDimSize, tDimSize, meanDimSize, varDimSize, aDimSize, bDimSize;
delete s;
delete t;
delete mean;
delete var;
delete a;
delete b;
delete[] sDimSize;
delete[] tDimSize;
delete[] meanDimSize;
delete[] varDimSize;
delete[] aDimSize;
delete[] bDimSize;
return cpuTest;
#endif // USE_CUDA
......@@ -169,10 +195,9 @@ TODO!!
*/
/* test for Normalize Function */
extern "C"
bool TestNormalize()
{
XPRINT(0, stdout, "[TEST NORMALIZE] -------------\n");
XPRINT(0, stdout, "[TEST NORMALIZE] normalized the data with normal distribution \n");
bool returnFlag = true, caseFlag = true;
/* case 1 test */
......
......@@ -22,7 +22,7 @@
#ifndef __TEST_NORMALIZE_H__
#define __TEST_NORMALIZE_H__
#include "../core/Normalize.h"
#include "../core/math/Normalize.h"
namespace nts { // namespace nts(NiuTrans.Tensor)
......
......@@ -19,17 +19,18 @@
* $Created by: Lin Ye (email: linye2015@outlook.com) 2018-06-15
*/
#include "../XTensor.h"
#include "../XDevice.h"
#include "../core/Power.h"
#include "../XUtility.h"
#include "TPower.h"
namespace nts { // namespace nts(NiuTrans.Tensor)
/* case 1: get the power(a, p)
* In this case, p=2.
/*
case 1: get the power(a, p)
In this case, p=2.
*/
bool TestPower1()
{
/* a tensor of size 3 * 2 */
/* a tensor of size (3, 2) */
int aOrder = 2;
int * aDimSize = new int[aOrder];
aDimSize[0] = 3;
......@@ -39,12 +40,12 @@ bool TestPower1()
for (int i = 0; i < aOrder; i++)
aUnitNum *= aDimSize[i];
DTYPE aData[3][2] = { {1.0, 2.0},
{3.0, 4.0},
{5.0, 6.0} };
DTYPE answer[3][2] = { {1.0, 4.0},
{9.0, 16.0},
{25.0, 36.0} };
DTYPE aData[3][2] = { {1.0F, 2.0F},
{3.0F, 4.0F},
{5.0F, 6.0F} };
DTYPE answer[3][2] = { {1.0F, 4.0F},
{9.0F, 16.0F},
{25.0F, 36.0F} };
/* CPU test */
bool cpuTest = true;
......@@ -55,11 +56,11 @@ bool TestPower1()
/* initialize variables */
a->SetData(aData, aUnitNum);
/* call power function */
Power(a, 2.0);
/* call Power function */
Power(a, 2.0F);
/* check results */
cpuTest = a->CheckData(answer, aUnitNum);
cpuTest = a->CheckData(answer, aUnitNum, 1e-4F);
#ifdef USE_CUDA
/* GPU test */
......@@ -72,13 +73,14 @@ bool TestPower1()
aGPU->SetData(aData, aUnitNum);
/* call power function */
Power(aGPU, 2.0);
Power(aGPU, 2.0F);
/* check results */
gpuTest = aGPU->CheckData(answer, aUnitNum);
gpuTest = aGPU->CheckData(answer, aUnitNum, 1e-4F);
/* destroy variables */
delete a, aGPU;
delete a;
delete aGPU;
delete[] aDimSize;
return cpuTest && gpuTest;
......@@ -91,12 +93,13 @@ bool TestPower1()
#endif // USE_CUDA
}
/* case 2: get the power(a, p)
* In this case, p=1.
/*
case 2: get the power(a, p)
In this case, p=1.
*/
bool TestPower2()
{
/* a tensor of size 3 * 2 */
/* a tensor of size (3, 2) */
int aOrder = 2;
int * aDimSize = new int[aOrder];
aDimSize[0] = 3;
......@@ -106,12 +109,12 @@ bool TestPower2()
for (int i = 0; i < aOrder; i++)
aUnitNum *= aDimSize[i];
DTYPE aData[3][2] = { {0.0, 1.0},
{2.0, 3.0},
{4.0, 5.0} };
DTYPE answer[3][2] = { {0.0, 1.0},
{2.0, 3.0},
{4.0, 5.0} };
DTYPE aData[3][2] = { {0.0F, 1.0F},
{2.0F, 3.0F},
{4.0F, 5.0F} };
DTYPE answer[3][2] = { {0.0F, 1.0F},
{2.0F, 3.0F},
{4.0F, 5.0F} };
/* CPU test */
bool cpuTest = true;
......@@ -122,11 +125,11 @@ bool TestPower2()
/* initialize variables */
a->SetData(aData, aUnitNum);
/* call power function */
Power(a, 1.0);
/* call Power function */
Power(a, 1.0F);
/* check results */
cpuTest = a->CheckData(answer, aUnitNum);
cpuTest = a->CheckData(answer, aUnitNum, 1e-4F);
#ifdef USE_CUDA
/* GPU test */
......@@ -138,14 +141,15 @@ bool TestPower2()
/* Initialize variables */
aGPU->SetData(aData, aUnitNum);
/* call power function */
Power(aGPU, 1.0);
/* call Power function */
Power(aGPU, 1.0F);
/* check results */
gpuTest = aGPU->CheckData(answer, aUnitNum);
gpuTest = aGPU->CheckData(answer, aUnitNum, 1e-4F);
/* destroy variables */
delete a, aGPU;
delete a;
delete aGPU;
delete[] aDimSize;
return cpuTest && gpuTest;
......@@ -158,12 +162,13 @@ bool TestPower2()
#endif // USE_CUDA
}
/* case 3: get the power(a, p)
* In this case, p=0.
/*
case 3: get the power(a, p)
In this case, p=0.
*/
bool TestPower3()
{
/* a tensor of size 3 * 2 */
/* a tensor of size (3, 2) */
int aOrder = 2;
int * aDimSize = new int[aOrder];
aDimSize[0] = 3;
......@@ -173,12 +178,12 @@ bool TestPower3()
for (int i = 0; i < aOrder; i++)
aUnitNum *= aDimSize[i];
DTYPE aData[3][2] = { {0.0, 1.0},
{2.0, 3.0},
{4.0, 5.0} };
DTYPE answer[3][2] = { {1.0, 1.0},
{1.0, 1.0},
{1.0, 1.0} };
DTYPE aData[3][2] = { {0.0F, 1.0F},
{2.0F, 3.0F},
{4.0F, 5.0F} };
DTYPE answer[3][2] = { {1.0F, 1.0F},
{1.0F, 1.0F},
{1.0F, 1.0F} };
/* CPU test */
bool cpuTest = true;
......@@ -189,11 +194,11 @@ bool TestPower3()
/* initialize variables */
a->SetData(aData, aUnitNum);
/* call power function */
Power(a, 0.0);
/* call Power function */
Power(a, 0.0F);
/* check results */
cpuTest = a->CheckData(answer, aUnitNum);
cpuTest = a->CheckData(answer, aUnitNum, 1e-4F);
#ifdef USE_CUDA
/* GPU test */
......@@ -205,14 +210,15 @@ bool TestPower3()
/* Initialize variables */
aGPU->SetData(aData, aUnitNum);
/* call power function */
Power(aGPU, 0.0);
/* call Power function */
Power(aGPU, 0.0F);
/* check results */
gpuTest = aGPU->CheckData(answer, aUnitNum);
gpuTest = aGPU->CheckData(answer, aUnitNum, 1e-4F);
/* destroy variables */
delete a, aGPU;
delete a;
delete aGPU;
delete[] aDimSize;
return cpuTest && gpuTest;
......@@ -231,10 +237,9 @@ TODO!!
*/
/* test for Power Function */
extern "C"
bool TestPower()
{
XPRINT(0, stdout, "[TEST POWER] -------------\n");
XPRINT(0, stdout, "[TEST POWER] get the power(a, p) \n");
bool returnFlag = true, caseFlag = true;
/* case 1 test */
......
......@@ -22,7 +22,7 @@
#ifndef __TEST_POWER_H__
#define __TEST_POWER_H__
#include "../core/Power.h"
#include "../core/math/Power.h"
namespace nts { // namespace nts(NiuTrans.Tensor)
......
......@@ -19,15 +19,17 @@
* $Created by: Lin Ye (email: linye2015@outlook.com) 2018-06-14
*/
#include "../XTensor.h"
#include "../XDevice.h"
#include "../function/Rectify.h"
#include "TRectify.h"
namespace nts { // namespace nts(NiuTrans.Tensor)
/* case 1: rectify function y = max(0, x) */
/*
case 1: test rectify function
In this case, y = max(0, x)
*/
bool TestRectify1()
{
/* a x tensor of size 2 * 3 */
/* a x tensor of size (2, 3) */
int xOrder = 2;
int * xDimSize = new int[xOrder];
xDimSize[0] = 2;
......@@ -37,7 +39,7 @@ bool TestRectify1()
for (int i = 0; i < xOrder; i++)
xUnitNum *= xDimSize[i];
/* a y tensor of size 2 * 3 */
/* a y tensor of size (2, 3) */
int yOrder = 2;
int * yDimSize = new int[yOrder];
yDimSize[0] = 2;
......@@ -47,10 +49,10 @@ bool TestRectify1()
for (int i = 0; i < yOrder; i++)
yUnitNum *= yDimSize[i];
DTYPE xData[2][3] = { {0.0, -1.0, 2.0},
{3.0, -4.0, -5.0} };
DTYPE answer[2][3] = { {0.0, 0.0, 2.0},
{3.0, 0.0, 0.0} };
DTYPE xData[2][3] = { {0.0F, -1.0F, 2.0F},
{3.0F, -4.0F, -5.0F} };
DTYPE answer[2][3] = { {0.0F, 0.0F, 2.0F},
{3.0F, 0.0F, 0.0F} };
/* CPU test */
bool cpuTest = true;
......@@ -63,7 +65,7 @@ bool TestRectify1()
x->SetData(xData, xUnitNum);
y->SetZeroAll();
/* call rectify function */
/* call Rectify function */
Rectify(x, y);
/* check results */
......@@ -81,32 +83,41 @@ bool TestRectify1()
xGPU->SetData(xData, xUnitNum);
yGPU->SetZeroAll();
/* call rectify function */
/* call Rectify function */
Rectify(xGPU, yGPU);
/* check results */
gpuTest = yGPU->CheckData(answer, yUnitNum);
/* destroy variables */
delete x, y, xGPU, yGPU;
delete[] xDimSize, yDimSize;
delete x;
delete y;
delete xGPU;
delete yGPU;
delete[] xDimSize;
delete[] yDimSize;
return cpuTest && gpuTest;
#else
/* destroy variables */
delete x, y;
delete[] xDimSize, yDimSize;
delete x;
delete y;
delete[] xDimSize;
delete[] yDimSize;
return cpuTest;
#endif // USE_CUDA
}
/* case 2: backward computation dE/dx = dE/dy * dy/dx rectified: y = max(0, x)
* In this case, lossName=CROSSENTROPY.
/*
case 2: backward computation
dE/dx = dE/dy * dy/dx
rectified: y = max(0, x)
In this case, lossName=CROSSENTROPY.
*/
bool TestRectify2()
{
/* a x tensor of size 2 * 3 */
/* a x tensor of size (2, 3) */
int xOrder = 2;
int * xDimSize = new int[xOrder];
xDimSize[0] = 2;
......@@ -116,46 +127,6 @@ bool TestRectify2()
for (int i = 0; i < xOrder; i++)
xUnitNum *= xDimSize[i];
/* a y tensor of size 2 * 3 */
int yOrder = 2;
int * yDimSize = new int[yOrder];
yDimSize[0] = 2;
yDimSize[1] = 3;
int yUnitNum = 1;
for (int i = 0; i < yOrder; i++)
yUnitNum *= yDimSize[i];
/* a gold tensor of size 2 * 3 */
int goldOrder = 2;
int * goldDimSize = new int[goldOrder];
goldDimSize[0] = 2;
goldDimSize[1] = 3;
int goldUnitNum = 1;
for (int i = 0; i < goldOrder; i++)
goldUnitNum *= goldDimSize[i];
/* a dedy tensor of size 2 * 3 */
int dedyOrder = 2;
int * dedyDimSize = new int[dedyOrder];
dedyDimSize[0] = 2;
dedyDimSize[1] = 3;
int dedyUnitNum = 1;
for (int i = 0; i < dedyOrder; i++)
dedyUnitNum *= dedyDimSize[i];
/* a dedx tensor of size 2 * 3 */
int dedxOrder = 2;
int * dedxDimSize = new int[dedxOrder];
dedxDimSize[0] = 2;
dedxDimSize[1] = 3;
int dedxUnitNum = 1;
for (int i = 0; i < dedxOrder; i++)
dedxUnitNum *= dedxDimSize[i];
DTYPE xData[2][3] = { {1.0F, 1.0F, 2.0F},
{2.0F, 4.0F, 5.0F} };
DTYPE yData[2][3] = { {1.0F, 1.0F, 2.0F},
......@@ -172,277 +143,23 @@ bool TestRectify2()
/* create tensors */
XTensor * x = NewTensor(xOrder, xDimSize);
XTensor * y = NewTensor(yOrder, yDimSize);
XTensor * gold = NewTensor(goldOrder, goldDimSize);
XTensor * dedy = NewTensor(dedyOrder, dedyDimSize);
XTensor * dedx = NewTensor(dedxOrder, dedxDimSize);
/* initialize variables */
x->SetData(xData, xUnitNum);
y->SetData(yData, yUnitNum);
gold->SetData(goldData, goldUnitNum);
dedy->SetData(dedyData, dedyUnitNum);
dedx->SetZeroAll();
/* call rectifybackward function */
RectifyBackward(gold, y, x, dedy, dedx, CROSSENTROPY);
/* check results */
cpuTest = dedx->CheckData(answer, dedxUnitNum);
#ifdef USE_CUDA
/* GPU test */
bool gpuTest = true;
/* create tensors */
XTensor * xGPU = NewTensor(xOrder, xDimSize, X_FLOAT, 1.0F, 0);
XTensor * yGPU = NewTensor(yOrder, yDimSize, X_FLOAT, 1.0F, 0);
XTensor * goldGPU = NewTensor(goldOrder, goldDimSize, X_FLOAT, 1.0F, 0);
XTensor * dedyGPU = NewTensor(dedyOrder, dedyDimSize, X_FLOAT, 1.0F, 0);
XTensor * dedxGPU = NewTensor(dedxOrder, dedxDimSize, X_FLOAT, 1.0F, 0);
/* initialize variables */
xGPU->SetData(xData, xUnitNum);
yGPU->SetData(yData, yUnitNum);
goldGPU->SetData(goldData, goldUnitNum);
dedyGPU->SetData(dedyData, dedyUnitNum);
dedxGPU->SetZeroAll();
/* call rectifybackward function */
RectifyBackward(goldGPU, yGPU, xGPU, dedyGPU, dedxGPU, CROSSENTROPY);
/* check results */
gpuTest = dedxGPU->CheckData(answer, dedxUnitNum);
/* destroy variables */
delete x, y, dedy, dedx, gold, xGPU, yGPU, dedyGPU, dedxGPU, goldGPU;
delete[] xDimSize, yDimSize, dedyDimSize, dedxDimSize, goldDimSize;
return cpuTest && gpuTest;
#else
/* destroy variables */
delete x, y, dedy, dedx, gold;
delete[] xDimSize, yDimSize, dedyDimSize, dedxDimSize, goldDimSize;
return cpuTest;
#endif // USE_CUDA
}
/* case 3: backward computation dE/dx = dE/dy * dy/dx rectified: y = max(0, x)
* In this case, lossName=SQUAREDERROR.
*/
bool TestRectify3()
{
/* a x tensor of size 2 * 3 */
int xOrder = 2;
int * xDimSize = new int[xOrder];
xDimSize[0] = 2;
xDimSize[1] = 3;
int xUnitNum = 1;
for (int i = 0; i < xOrder; i++)
xUnitNum *= xDimSize[i];
/* a y tensor of size 2 * 3 */
int yOrder = 2;
int * yDimSize = new int[yOrder];
yDimSize[0] = 2;
yDimSize[1] = 3;
int yUnitNum = 1;
for (int i = 0; i < yOrder; i++)
yUnitNum *= yDimSize[i];
/* a gold tensor of size 2 * 3 */
int goldOrder = 2;
int * goldDimSize = new int[goldOrder];
goldDimSize[0] = 2;
goldDimSize[1] = 3;
int goldUnitNum = 1;
for (int i = 0; i < goldOrder; i++)
goldUnitNum *= goldDimSize[i];
/* a dedy tensor of size 2 * 3 */
int dedyOrder = 2;
int * dedyDimSize = new int[dedyOrder];
dedyDimSize[0] = 2;
dedyDimSize[1] = 3;
int dedyUnitNum = 1;
for (int i = 0; i < dedyOrder; i++)
dedyUnitNum *= dedyDimSize[i];
/* a dedx tensor of size 2 * 3 */
int dedxOrder = 2;
int * dedxDimSize = new int[dedxOrder];
dedxDimSize[0] = 2;
dedxDimSize[1] = 3;
int dedxUnitNum = 1;
for (int i = 0; i < dedxOrder; i++)
dedxUnitNum *= dedxDimSize[i];
DTYPE xData[2][3] = { {1.0, 1.0, 2.0},
{2.0, 4.0, 5.0} };
DTYPE yData[2][3] = { {1.0, 1.0, 2.0},
{2.0, 4.0, 5.0} };
DTYPE goldData[2][3] = { {1.0, 1.0, 1.0},
{1.0, 1.0, 1.0} };
DTYPE dedyData[2][3] = { {0.0, 0.0, 1.0},
{1.0, 3.0, 4.0} };
DTYPE answer[2][3] = { {0.0, 0.0, 1.0},
{1.0, 3.0, 4.0} };
/* CPU test */
bool cpuTest = true;
/* create tensors */
XTensor * x = NewTensor(xOrder, xDimSize);
XTensor * y = NewTensor(yOrder, yDimSize);
XTensor * gold = NewTensor(goldOrder, goldDimSize);
XTensor * dedy = NewTensor(dedyOrder, dedyDimSize);
XTensor * dedx = NewTensor(dedxOrder, dedxDimSize);
/* initialize variables */
x->SetData(xData, xUnitNum);
y->SetData(yData, yUnitNum);
gold->SetData(goldData, goldUnitNum);
dedy->SetData(dedyData, dedyUnitNum);
dedx->SetZeroAll();
/* call rectifybackward function */
RectifyBackward(gold, y, x, dedy, dedx, CROSSENTROPY);
/* check results */
cpuTest = dedx->CheckData(answer, dedxUnitNum);
#ifdef USE_CUDA
/* GPU test */
bool gpuTest = true;
/* create tensors */
XTensor * xGPU = NewTensor(xOrder, xDimSize, X_FLOAT, 1.0F, 0);
XTensor * yGPU = NewTensor(yOrder, yDimSize, X_FLOAT, 1.0F, 0);
XTensor * goldGPU = NewTensor(goldOrder, goldDimSize, X_FLOAT, 1.0F, 0);
XTensor * dedyGPU = NewTensor(dedyOrder, dedyDimSize, X_FLOAT, 1.0F, 0);
XTensor * dedxGPU = NewTensor(dedxOrder, dedxDimSize, X_FLOAT, 1.0F, 0);
/* initialize variables */
xGPU->SetData(xData, xUnitNum);
yGPU->SetData(yData, yUnitNum);
goldGPU->SetData(goldData, goldUnitNum);
dedyGPU->SetData(dedyData, dedyUnitNum);
dedxGPU->SetZeroAll();
/* call rectifybackward function */
RectifyBackward(goldGPU, yGPU, xGPU, dedyGPU, dedxGPU, CROSSENTROPY);
/* check results */
gpuTest = dedxGPU->CheckData(answer, dedxUnitNum);
/* destroy variables */
delete x, y, dedy, dedx, gold, xGPU, yGPU, dedyGPU, dedxGPU, goldGPU;
delete[] xDimSize, yDimSize, dedyDimSize, dedxDimSize, goldDimSize;
return cpuTest && gpuTest;
#else
/* destroy variables */
delete x, y, dedy, dedx, gold;
delete[] xDimSize, yDimSize, dedyDimSize, dedxDimSize, goldDimSize;
return cpuTest;
#endif // USE_CUDA
}
/* case 4: backward computation dE/dx = dE/dy * dy/dx rectified: y = max(0, x)
* In this case, lossName=ONEHOTERROR.
*/
bool TestRectify4()
{
/* a x tensor of size 2 * 3 */
int xOrder = 2;
int * xDimSize = new int[xOrder];
xDimSize[0] = 2;
xDimSize[1] = 3;
int xUnitNum = 1;
for (int i = 0; i < xOrder; i++)
xUnitNum *= xDimSize[i];
/* a y tensor of size 2 * 3 */
int yOrder = 2;
int * yDimSize = new int[yOrder];
yDimSize[0] = 2;
yDimSize[1] = 3;
int yUnitNum = 1;
for (int i = 0; i < yOrder; i++)
yUnitNum *= yDimSize[i];
/* a gold tensor of size 2 * 3 */
int goldOrder = 2;
int * goldDimSize = new int[goldOrder];
goldDimSize[0] = 2;
goldDimSize[1] = 3;
int goldUnitNum = 1;
for (int i = 0; i < goldOrder; i++)
goldUnitNum *= goldDimSize[i];
/* a dedy tensor of size 2 * 3 */
int dedyOrder = 2;
int * dedyDimSize = new int[dedyOrder];
dedyDimSize[0] = 2;
dedyDimSize[1] = 3;
int dedyUnitNum = 1;
for (int i = 0; i < dedyOrder; i++)
dedyUnitNum *= dedyDimSize[i];
/* a dedx tensor of size 2 * 3 */
int dedxOrder = 2;
int * dedxDimSize = new int[dedxOrder];
dedxDimSize[0] = 2;
dedxDimSize[1] = 3;
int dedxUnitNum = 1;
for (int i = 0; i < dedxOrder; i++)
dedxUnitNum *= dedxDimSize[i];
DTYPE xData[2][3] = { {1.0, 1.0, -2.0},
{2.0, 4.0, 5.0} };
DTYPE yData[2][3] = { {1.0, 1.0, 0.0},
{2.0, 4.0, 5.0} };
DTYPE goldData[2][3] = { {1.0, 0.0, 1.0},
{1.0, 1.0, 0.0} };
DTYPE dedyData[2][3] = { {0.0, 0.0, -1.0},
{1.0, 3.0, 0.0} };
DTYPE answer[2][3] = { {0.0, 0.0, 0.0},
{1.0, 3.0, 0.0} };
/* CPU test */
bool cpuTest = true;
/* create tensors */
XTensor * x = NewTensor(xOrder, xDimSize);
XTensor * y = NewTensor(yOrder, yDimSize);
XTensor * gold = NewTensor(goldOrder, goldDimSize);
XTensor * dedy = NewTensor(dedyOrder, dedyDimSize);
XTensor * dedx = NewTensor(dedxOrder, dedxDimSize);
XTensor * y = NewTensor(xOrder, xDimSize);
XTensor * gold = NewTensor(xOrder, xDimSize);
XTensor * dedy = NewTensor(xOrder, xDimSize);
XTensor * dedx = NewTensor(xOrder, xDimSize);
/* initialize variables */
x->SetData(xData, xUnitNum);
y->SetData(yData, yUnitNum);
gold->SetData(goldData, goldUnitNum);
dedy->SetData(dedyData, dedyUnitNum);
y->SetData(yData, xUnitNum);
gold->SetData(goldData, xUnitNum);
dedy->SetData(dedyData, xUnitNum);
dedx->SetZeroAll();
/* call rectifybackward function */
RectifyBackward(gold, y, x, dedy, dedx, ONEHOTERROR);
/* call RectifyBackward function */
RectifyBackward(gold, y, x, dedy, dedx, NOLOSS);
/* check results */
cpuTest = dedx->CheckData(answer, dedxUnitNum);
cpuTest = dedx->CheckData(answer, xUnitNum);
#ifdef USE_CUDA
/* GPU test */
......@@ -450,33 +167,46 @@ bool TestRectify4()
/* create tensors */
XTensor * xGPU = NewTensor(xOrder, xDimSize, X_FLOAT, 1.0F, 0);
XTensor * yGPU = NewTensor(yOrder, yDimSize, X_FLOAT, 1.0F, 0);
XTensor * goldGPU = NewTensor(goldOrder, goldDimSize, X_FLOAT, 1.0F, 0);
XTensor * dedyGPU = NewTensor(dedyOrder, dedyDimSize, X_FLOAT, 1.0F, 0);
XTensor * dedxGPU = NewTensor(dedxOrder, dedxDimSize, X_FLOAT, 1.0F, 0);
XTensor * yGPU = NewTensor(xOrder, xDimSize, X_FLOAT, 1.0F, 0);
XTensor * goldGPU = NewTensor(xOrder, xDimSize, X_FLOAT, 1.0F, 0);
XTensor * dedyGPU = NewTensor(xOrder, xDimSize, X_FLOAT, 1.0F, 0);
XTensor * dedxGPU = NewTensor(xOrder, xDimSize, X_FLOAT, 1.0F, 0);
/* initialize variables */
xGPU->SetData(xData, xUnitNum);
yGPU->SetData(yData, yUnitNum);
goldGPU->SetData(goldData, goldUnitNum);
dedyGPU->SetData(dedyData, dedyUnitNum);
yGPU->SetData(yData, xUnitNum);
goldGPU->SetData(goldData, xUnitNum);
dedyGPU->SetData(dedyData, xUnitNum);
dedxGPU->SetZeroAll();
/* call rectifybackward function */
RectifyBackward(goldGPU, yGPU, xGPU, dedyGPU, dedxGPU, CROSSENTROPY);
RectifyBackward(goldGPU, yGPU, xGPU, dedyGPU, dedxGPU, NOLOSS);
/* check results */
gpuTest = dedxGPU->CheckData(answer, dedxUnitNum);
gpuTest = dedxGPU->CheckData(answer, xUnitNum);
/* destroy variables */
delete x, y, dedy, dedx, gold, xGPU, yGPU, dedyGPU, dedxGPU, goldGPU;
delete[] xDimSize, yDimSize, dedyDimSize, dedxDimSize, goldDimSize;
delete x;
delete y;
delete dedy;
delete dedx;
delete gold;
delete xGPU;
delete yGPU;
delete dedyGPU;
delete dedxGPU;
delete goldGPU;
delete[] xDimSize;
return cpuTest && gpuTest;
#else
/* destroy variables */
delete x, y, dedy, dedx, gold;
delete[] xDimSize, yDimSize, dedyDimSize, dedxDimSize, goldDimSize;
delete x;
delete y;
delete dedy;
delete dedx;
delete gold;
delete[] xDimSize;
return cpuTest;
#endif // USE_CUDA
......@@ -488,10 +218,9 @@ TODO!!
*/
/* test for Rectify Function */
extern "C"
bool TestRectify()
{
XPRINT(0, stdout, "[TEST RECTIFY] -------------\n");
XPRINT(0, stdout, "[TEST RECTIFY] test rectify and its backward computation \n");
bool returnFlag = true, caseFlag = true;
/* case 1 test */
......@@ -514,26 +243,6 @@ bool TestRectify()
else
XPRINT(0, stdout, ">> case 2 passed!\n");
/* case 3 test */
caseFlag = TestRectify3();
if (!caseFlag) {
returnFlag = false;
XPRINT(0, stdout, ">> case 3 failed!\n");
}
else
XPRINT(0, stdout, ">> case 3 passed!\n");
/* case 4 test */
caseFlag = TestRectify4();
if (!caseFlag) {
returnFlag = false;
XPRINT(0, stdout, ">> case 4 failed!\n");
}
else
XPRINT(0, stdout, ">> case 4 passed!\n");
/* other cases test */
/*
TODO!!
......
......@@ -16,246 +16,154 @@
*/
/*
* $Created by: LI Yinqiao (email: li.yin.qiao.2012@hotmail.com) 2018-04-30
* $Created by: Xu Chen (email: hello_master1954@163.com) 2018-06-30
*/
#include "../XTensor.h"
#include "../XDevice.h"
#include "../core/ReduceMax.h"
#include "TReduceMax.h"
namespace nts { // namespace nt(NiuTrans.Tensor)
/* case 1 */
bool TestReduceMax1()
{
/* a tensor of size 2 * 4 */
int order = 2;
int order_reduce = 1;
int * dimSize = new int[order];
dimSize[0] = 2;
dimSize[1] = 4;
namespace nts { // namespace nts(NiuTrans.Tensor)
int unitNum = 1;
for (int i = 0; i < order; i++)
unitNum *= dimSize[i];
/* a tensor of size 4 */
int * dimSize_reduce_a = new int[order_reduce];
dimSize_reduce_a[0] = 4;
int unitNum_a = 1;
for (int i = 0; i < order_reduce; i++)
unitNum_a *= dimSize_reduce_a[i];
/* a tensor of size 2 */
int * dimSize_reduce_b = new int[order_reduce];
dimSize_reduce_b[0] = 2;
int unitNum_b = 1;
for (int i = 0; i < order_reduce; i++)
unitNum_b *= dimSize_reduce_b[i];
DTYPE aData[2][4] = { { 0.0, 1.0, 2.0, 3.0 },
{ 4.0, 5.0, 6.0, 7.0 } };
DTYPE bData[2][4] = { { 1.0, -1.0, -3.0, -5.0 },
{ -7.0, -9.0, -11.0, -13.0 } };
DTYPE answer_a[4] = { 4.0, 5.0, 6.0, 7.0 };
DTYPE answer_b[2] = { 1.0, -7.0 };
/* CPU test */
bool cpuTest = true;
/* create tensors */
XTensor * a = NewTensor(order, dimSize);
XTensor * reduce_a = NewTensor(order_reduce, dimSize_reduce_a);
XTensor * b = NewTensor(order, dimSize);
XTensor * reduce_b = NewTensor(order_reduce, dimSize_reduce_b);
/* initialize variables */
a->SetData(aData, unitNum);
b->SetData(bData, unitNum);
/* call reduce max function */
ReduceMax(a, reduce_a, 0);
ReduceMax(b, reduce_b, 1);
//DTYPE* reduce_a_data = (DTYPE*)reduce_a->data;
//for (int i = 0; i < unitNum_a; i++)
// printf("%f ", *reduce_a_data++);
//printf("\n");
//DTYPE* reduce_b_data = (DTYPE*)reduce_b->data;
//for (int i = 0; i < unitNum_b; i++)
// printf("%f ", *reduce_b_data++);
/* check results */
cpuTest = reduce_a->CheckData(answer_a, unitNum_a) && reduce_b->CheckData(answer_b, unitNum_b);
/*
case 1: get the max value of the items along a dimension of the tensor.
In this case,
(2, 4) -> (4), dim = 0
(2, 4) -> (2), dim = 1
*/
bool TestReduceMax1()
{
/* a input tensor of size (2, 4) */
int sOrder = 2;
int * sDimSize = new int[sOrder];
sDimSize[0] = 2;
sDimSize[1] = 4;
int sUnitNum = 1;
for (int i = 0; i < sOrder; i++)
sUnitNum *= sDimSize[i];
/* a output tensor of size (4) */
int tOrder1 = 1;
int * tDimSize1 = new int[tOrder1];
tDimSize1[0] = 4;
int tUnitNum1 = 1;
for (int i = 0; i < tOrder1; i++)
tUnitNum1 *= tDimSize1[i];
/* a output tensor of size (2) */
int tOrder2 = 1;
int * tDimSize2 = new int[tOrder2];
tDimSize2[0] = 2;
int tUnitNum2 = 1;
for (int i = 0; i < tOrder2; i++)
tUnitNum2 *= tDimSize2[i];
DTYPE sData[2][4] = { {0.0F, 5.0F, 2.0F, 3.0F},
{4.0F, 1.0F, 6.0F, 7.0F} };
DTYPE answer1[4] = {4.0F, 5.0F, 6.0F, 7.0F};
DTYPE answer2[2] = {5.0F, 7.0F};
/* CPU test */
bool cpuTest = true;
/* create tensors */
XTensor * s = NewTensor(sOrder, sDimSize);
XTensor * t1 = NewTensor(tOrder1, tDimSize1);
XTensor * t2 = NewTensor(tOrder2, tDimSize2);
/* initialize variables */
s->SetData(sData, sUnitNum);
t1->SetZeroAll();
t2->SetZeroAll();
/* call ReduceMax function */
ReduceMax(s, t1, 0);
ReduceMax(s, t2, 1);
/* check results */
cpuTest = t1->CheckData(answer1, tUnitNum1) && t2->CheckData(answer2, tUnitNum2);
#ifdef USE_CUDA
/* GPU test */
bool gpuTest = true;
/* create tensor */
XTensor * aGPU = NewTensor(order, dimSize, X_FLOAT);
XTensor * reduce_aGPU = NewTensor(order_reduce, dimSize_reduce_a, X_FLOAT);
XTensor * bGPU = NewTensor(order, dimSize, X_FLOAT);
XTensor * reduce_bGPU = NewTensor(order_reduce, dimSize_reduce_b, X_FLOAT);
/* Initialize variables */
aGPU->SetData(aData, unitNum);
bGPU->SetData(bData, unitNum);
/* call reduce max function */
ReduceMax(aGPU, reduce_aGPU, 0);
ReduceMax(bGPU, reduce_bGPU, 1);
/* check results */
gpuTest = reduce_aGPU->CheckData(answer_a, unitNum_a) && reduce_bGPU->CheckData(answer_b, unitNum_b);
/* destroy variables */
delete aGPU, bGPU, reduce_aGPU, reduce_bGPU;
delete[] dimSize, dimSize_reduce_a, dimSize_reduce_b;
return cpuTest && gpuTest;
/* GPU test */
bool gpuTest = true;
/* create tensors */
XTensor * sGPU = NewTensor(sOrder, sDimSize, X_FLOAT, 1.0F, 0);
XTensor * tGPU1 = NewTensor(tOrder1, tDimSize1, X_FLOAT, 1.0F, 0);
XTensor * tGPU2 = NewTensor(tOrder2, tDimSize2, X_FLOAT, 1.0F, 0);
/* initialize variables */
sGPU->SetData(sData, sUnitNum);
tGPU1->SetZeroAll();
tGPU2->SetZeroAll();
/* call ReduceMax function */
ReduceMax(sGPU, tGPU1, 0);
ReduceMax(sGPU, tGPU2, 1);
/* check results */
gpuTest = tGPU1->CheckData(answer1, tUnitNum1) && tGPU2->CheckData(answer2, tUnitNum2);
/* destroy variables */
delete s;
delete t1;
delete t2;
delete sGPU;
delete tGPU1;
delete tGPU2;
delete[] sDimSize;
delete[] tDimSize1;
delete[] tDimSize2;
return cpuTest && gpuTest;
#else
/* destroy variables */
delete a;
delete b;
return cpuTest;
/* destroy variables */
delete s;
delete t1;
delete t2;
delete[] sDimSize;
delete[] tDimSize1;
delete[] tDimSize2;
return cpuTest;
#endif // USE_CUDA
}
bool TestReduceMaxForLargescale()
{
/* a tensor of size 10000 * 500 */
int order = 2;
int order_reduce = 1;
int * dimSize = new int[order];
dimSize[0] = 10000;
dimSize[1] = 500;
int unitNum = 1;
for (int i = 0; i < order; i++)
unitNum *= dimSize[i];
/* a tensor of size 500 */
int * dimSize_reduce_a = new int[order_reduce];
dimSize_reduce_a[0] = 500;
int unitNum_a = 1;
for (int i = 0; i < order_reduce; i++)
unitNum_a *= dimSize_reduce_a[i];
/* a tensor of size 10000 */
int * dimSize_reduce_b = new int[order_reduce];
dimSize_reduce_b[0] = 10000;
}
int unitNum_b = 1;
for (int i = 0; i < order_reduce; i++)
unitNum_b *= dimSize_reduce_b[i];
DTYPE * data = new DTYPE[5000000];
DTYPE * tmp = data;
for (int i = 0; i < unitNum; i++)
*tmp++ = 1;
DTYPE answer_a[500];
for (int i = 0; i < unitNum_a; i++)
answer_a[i] = 1;
DTYPE answer_b[10000];
for (int i = 0; i < unitNum_b; i++)
answer_b[i] = 1;
/* CPU test */
bool cpuTest = true;
/* create tensors */
XTensor * a = NewTensor(order, dimSize);
XTensor * reduce_a = NewTensor(order_reduce, dimSize_reduce_a);
XTensor * b = NewTensor(order, dimSize);
XTensor * reduce_b = NewTensor(order_reduce, dimSize_reduce_b);
/* initialize variables */
a->SetData(data, unitNum);
b->SetData(data, unitNum);
/* call reduce max function */
ReduceMax(a, reduce_a, 0);
ReduceMax(b, reduce_b, 1);
/* check results */
cpuTest = reduce_a->CheckData(answer_a, unitNum_a) && reduce_b->CheckData(answer_b, unitNum_b);
#ifdef USE_CUDA
/* GPU test */
bool gpuTest = true;
/* create tensor */
XTensor * aGPU = NewTensor(order, dimSize, X_FLOAT);
XTensor * reduce_aGPU = NewTensor(order_reduce, dimSize_reduce_a, X_FLOAT);
XTensor * bGPU = NewTensor(order, dimSize, X_FLOAT);
XTensor * reduce_bGPU = NewTensor(order_reduce, dimSize_reduce_b, X_FLOAT);
/* Initialize variables */
aGPU->SetData(data, unitNum);
bGPU->SetData(data, unitNum);
/* call reduce max function */
ReduceMax(aGPU, reduce_aGPU, 0);
ReduceMax(bGPU, reduce_bGPU, 1);
/* check results */
gpuTest = reduce_aGPU->CheckData(answer_a, unitNum_a) && reduce_bGPU->CheckData(answer_b, unitNum_b);
/* other cases */
/*
TODO!!
*/
/* destroy variables */
delete aGPU, bGPU, reduce_aGPU, reduce_bGPU;
delete[] dimSize, dimSize_reduce_a, dimSize_reduce_b;
return cpuTest && gpuTest;
#else
/* destroy variables */
delete a;
delete b;
return cpuTest;
#endif // USE_CUDA
/* test for ReduceMax Function */
bool TestReduceMax()
{
XPRINT(0, stdout, "[TEST ReduceMax] get the max value of the items along a dimension of the tensor\n");
bool returnFlag = true, caseFlag = true;
/* case 1 test */
caseFlag = TestReduceMax1();
if (!caseFlag) {
returnFlag = false;
XPRINT(0, stdout, ">> case 1 failed!\n");
}
else
XPRINT(0, stdout, ">> case 1 passed!\n");
/* other cases */
/* other cases test */
/*
TODO!!
*/
/* test for Sum Function */
extern "C"
bool TestReduceMax()
{
XPRINT(0, stdout, "[TEST ReduceMax]\n");
bool returnFlag = true, caseFlag = true;
/* case 1 test */
caseFlag = TestReduceMax1();
if (!caseFlag) {
returnFlag = false;
XPRINT(0, stdout, ">> case 1 failed!\n");
}
else
XPRINT(0, stdout, ">> case 1 passed!\n");
/* case 2 test */
caseFlag = TestReduceMaxForLargescale();
if (!caseFlag) {
returnFlag = false;
XPRINT(0, stdout, ">> case 2 failed!\n");
}
else
XPRINT(0, stdout, ">> case 2 passed!\n");
///* other cases test */
///*
//TODO!!
//*/
if (returnFlag) {
XPRINT(0, stdout, ">> All Passed!\n");
}
else
XPRINT(0, stdout, ">> Failed!\n");
if (returnFlag) {
XPRINT(0, stdout, ">> All Passed!\n");
}
else
XPRINT(0, stdout, ">> Failed!\n");
XPRINT(0, stdout, "\n");
XPRINT(0, stdout, "\n");
return returnFlag;
return returnFlag;
}
} // namespace nt(NiuTrans.Tensor)
} // namespace nts(NiuTrans.Tensor)
......@@ -16,20 +16,19 @@
*/
/*
* $Created by: LI Yinqiao (email: li.yin.qiao.2012@hotmail.com) 2018-04-30
* $Created by: Xu Chen (email: hello_master1954@163.com) 2018-06-30
*/
#ifndef __TEST_REDUCEMAX_H__
#define __TEST_REDUCEMAX_H__
#include "../core/ReduceMax.h"
#include "../core/reduce/ReduceMax.h"
namespace nts { // namespace nt(NiuTrans.Tensor)
namespace nts { // namespace nts(NiuTrans.Tensor)
/* test for ReduceMax Function */
extern "C"
bool TestReduceMax();
} // namespace nt(NiuTrans.Tensor)
} // namespace nts(NiuTrans.Tensor)
#endif // __TEST_REDUCEMAX_H__
......@@ -19,244 +19,146 @@
* $Created by: LI Yinqiao (email: li.yin.qiao.2012@hotmail.com) 2018-04-30
*/
#include "../XTensor.h"
#include "../XDevice.h"
#include "../core/ReduceMean.h"
#include "../core/ReduceMax.h"
#include "../core/ReduceSum.h"
#include "TReduceMean.h"
namespace nts { // namespace nt(NiuTrans.Tensor)
/* case 1 */
bool TestReduceMean1()
{
/* a tensor of size 2 * 4 */
int order = 2;
int order_reduce = 1;
int * dimSize = new int[order];
dimSize[0] = 2;
dimSize[1] = 4;
int unitNum = 1;
for (int i = 0; i < order; i++)
unitNum *= dimSize[i];
/* a tensor of size 4 */
int * dimSize_reduce_a = new int[order_reduce];
dimSize_reduce_a[0] = 4;
int unitNum_a = 1;
for (int i = 0; i < order_reduce; i++)
unitNum_a *= dimSize_reduce_a[i];
/* a tensor of size 2 */
int * dimSize_reduce_b = new int[order_reduce];
dimSize_reduce_b[0] = 2;
int unitNum_b = 1;
for (int i = 0; i < order_reduce; i++)
unitNum_b *= dimSize_reduce_b[i];
DTYPE aData[2][4] = { { 0.0, 1.0, 2.0, 3.0 },
{ 4.0, 5.0, 6.0, 7.0 } };
DTYPE bData[2][4] = { { 1.0, -1.0, -3.0, -5.0 },
{ -7.0, -9.0, -11.0, -13.0 } };
DTYPE answer_a[4] = { 2.0, 3.0, 4.0, 5.0 };
DTYPE answer_b[2] = { -2.0, -10.0 };
/* CPU test */
bool cpuTest = true;
/* create tensors */
XTensor * a = NewTensor(order, dimSize);
XTensor * reduce_a = NewTensor(order_reduce, dimSize_reduce_a);
XTensor * b = NewTensor(order, dimSize);
XTensor * reduce_b = NewTensor(order_reduce, dimSize_reduce_b);
/* initialize variables */
a->SetData(aData, unitNum);
b->SetData(bData, unitNum);
/* call reduce mean function */
ReduceMean(a, reduce_a, 0);
ReduceMean(b, reduce_b, 1);
//DTYPE* reduce_a_data = (DTYPE*)reduce_a->data;
//for (int i = 0; i < unitNum_a; i++)
// printf("%f ", *reduce_a_data++);
//printf("\n");
//DTYPE* reduce_b_data = (DTYPE*)reduce_b->data;
//for (int i = 0; i < unitNum_b; i++)
// printf("%f ", *reduce_b_data++);
/* check results */
cpuTest = reduce_a->CheckData(answer_a, unitNum_a) && reduce_b->CheckData(answer_b, unitNum_b);
/* case 1: get the mean value along a dimension of the tensor */
bool TestReduceMean1()
{
/* a tensor of size (2, 4) */
int sOrder = 2;
int * sDimSize = new int[sOrder];
sDimSize[0] = 2;
sDimSize[1] = 4;
int sUnitNum = 1;
for (int i = 0; i < sOrder; i++)
sUnitNum *= sDimSize[i];
/* a tensor of size (4) */
int tOrder1 = 1;
int * tDimSize1 = new int[tOrder1];
tDimSize1[0] = 4;
int tUnitNum1 = 1;
for (int i = 0; i < tOrder1; i++)
tUnitNum1 *= tDimSize1[i];
/* a tensor of size (2) */
int tOrder2 = 1;
int * tDimSize2 = new int[tOrder2];
tDimSize2[0] = 2;
int tUnitNum2 = 1;
for (int i = 0; i < tOrder2; i++)
tUnitNum2 *= tDimSize2[i];
DTYPE sData[2][4] = { {0.0F, 1.0F, 2.0F, 3.0F},
{4.0F, 5.0F, 6.0F, 7.0F} };
DTYPE answer1[4] = {2.0F, 3.0F, 4.0F, 5.0F};
DTYPE answer2[2] = {1.5F, 5.5F};
/* CPU test */
bool cpuTest = true;
/* create tensors */
XTensor * s = NewTensor(sOrder, sDimSize);
XTensor * t1 = NewTensor(tOrder1, tDimSize1);
XTensor * t2 = NewTensor(tOrder2, tDimSize2);
/* initialize variables */
s->SetData(sData, sUnitNum);
t1->SetZeroAll();
t2->SetZeroAll();
/* call ReduceMean function */
ReduceMean(s, t1, 0);
ReduceMean(s, t2, 1);
/* check results */
cpuTest = t1->CheckData(answer1, tUnitNum1) && t2->CheckData(answer2, tUnitNum2);
#ifdef USE_CUDA
/* GPU test */
bool gpuTest = true;
/* create tensor */
XTensor * aGPU = NewTensor(order, dimSize, X_FLOAT);
XTensor * reduce_aGPU = NewTensor(order_reduce, dimSize_reduce_a, X_FLOAT);
XTensor * bGPU = NewTensor(order, dimSize, X_FLOAT);
XTensor * reduce_bGPU = NewTensor(order_reduce, dimSize_reduce_b, X_FLOAT);
/* Initialize variables */
aGPU->SetData(aData, unitNum);
bGPU->SetData(bData, unitNum);
/* call reduce mean function */
ReduceMean(aGPU, reduce_aGPU, 0);
ReduceMean(bGPU, reduce_bGPU, 1);
/* check results */
gpuTest = reduce_aGPU->CheckData(answer_a, unitNum_a) && reduce_bGPU->CheckData(answer_b, unitNum_b);
/* destroy variables */
delete aGPU, bGPU, reduce_aGPU, reduce_bGPU;
delete dimSize, dimSize_reduce_a, dimSize_reduce_b;
return cpuTest && gpuTest;
/* GPU test */
bool gpuTest = true;
/* create tensor */
XTensor * sGPU = NewTensor(sOrder, sDimSize, X_FLOAT, 1.0F, 0);
XTensor * tGPU1 = NewTensor(tOrder1, tDimSize1, X_FLOAT, 1.0F, 0);
XTensor * tGPU2 = NewTensor(tOrder2, tDimSize2, X_FLOAT, 1.0F, 0);
/* Initialize variables */
sGPU->SetData(sData, sUnitNum);
tGPU1->SetZeroAll();
tGPU2->SetZeroAll();
/* call ReduceMean function */
ReduceMean(sGPU, tGPU1, 0);
ReduceMean(sGPU, tGPU2, 1);
/* check results */
cpuTest = tGPU1->CheckData(answer1, tUnitNum1) && tGPU2->CheckData(answer2, tUnitNum2);
/* destroy variables */
delete s;
delete t1;
delete t2;
delete sGPU;
delete tGPU1;
delete tGPU2;
delete[] sDimSize;
delete[] tDimSize1;
delete[] tDimSize2;
return cpuTest && gpuTest;
#else
/* destroy variables */
delete a;
delete b;
return cpuTest;
/* destroy variables */
delete s;
delete t1;
delete t2;
delete[] sDimSize;
delete[] tDimSize1;
delete[] tDimSize2;
return cpuTest;
#endif // USE_CUDA
}
bool TestReduceMeanForLargescale()
{
/* a tensor of size 10000 * 500 */
int order = 2;
int order_reduce = 1;
int * dimSize = new int[order];
dimSize[0] = 10000;
dimSize[1] = 500;
int unitNum = 1;
for (int i = 0; i < order; i++)
unitNum *= dimSize[i];
/* a tensor of size 500 */
int * dimSize_reduce_a = new int[order_reduce];
dimSize_reduce_a[0] = 500;
}
int unitNum_a = 1;
for (int i = 0; i < order_reduce; i++)
unitNum_a *= dimSize_reduce_a[i];
/* a tensor of size 10000 */
int * dimSize_reduce_b = new int[order_reduce];
dimSize_reduce_b[0] = 10000;
int unitNum_b = 1;
for (int i = 0; i < order_reduce; i++)
unitNum_b *= dimSize_reduce_b[i];
DTYPE * data = new DTYPE[5000000];
DTYPE * tmp = data;
for (int i = 0; i < unitNum; i++)
*tmp++ = 1;
DTYPE answer_a[500];
for (int i = 0; i < unitNum_a; i++)
answer_a[i] = 1;
DTYPE answer_b[10000];
for (int i = 0; i < unitNum_b; i++)
answer_b[i] = 1;
/* CPU test */
bool cpuTest = true;
/* create tensors */
XTensor * a = NewTensor(order, dimSize);
XTensor * reduce_a = NewTensor(order_reduce, dimSize_reduce_a);
XTensor * b = NewTensor(order, dimSize);
XTensor * reduce_b = NewTensor(order_reduce, dimSize_reduce_b);
/* initialize variables */
a->SetData(data, unitNum);
b->SetData(data, unitNum);
/* call reduce max function */
ReduceMean(a, reduce_a, 0);
ReduceMean(b, reduce_b, 1);
/* check results */
cpuTest = reduce_a->CheckData(answer_a, unitNum_a) && reduce_b->CheckData(answer_b, unitNum_b);
#ifdef USE_CUDA
/* GPU test */
bool gpuTest = true;
/* create tensor */
XTensor * aGPU = NewTensor(order, dimSize, X_FLOAT);
XTensor * reduce_aGPU = NewTensor(order_reduce, dimSize_reduce_a, X_FLOAT);
XTensor * bGPU = NewTensor(order, dimSize, X_FLOAT);
XTensor * reduce_bGPU = NewTensor(order_reduce, dimSize_reduce_b, X_FLOAT);
/* Initialize variables */
aGPU->SetData(data, unitNum);
bGPU->SetData(data, unitNum);
/* call reduce max function */
ReduceMean(aGPU, reduce_aGPU, 0);
ReduceMean(bGPU, reduce_bGPU, 1);
/* check results */
gpuTest = reduce_aGPU->CheckData(answer_a, unitNum_a) && reduce_bGPU->CheckData(answer_b, unitNum_b);
/* other cases */
/*
TODO!!
*/
/* destroy variables */
delete aGPU, bGPU, reduce_aGPU, reduce_bGPU;
delete[] dimSize, dimSize_reduce_a, dimSize_reduce_b;
return cpuTest && gpuTest;
#else
/* destroy variables */
delete a;
delete b;
return cpuTest;
#endif // USE_CUDA
/* test for ReduceMean Function */
bool TestReduceMean()
{
XPRINT(0, stdout, "[TEST ReduceMean] get the mean value along a dimension of the tensor \n");
bool returnFlag = true, caseFlag = true;
/* case 1 test */
caseFlag = TestReduceMean1();
if (!caseFlag) {
returnFlag = false;
XPRINT(0, stdout, ">> case 1 failed!\n");
}
/* other cases */
/*
TODO!!
*/
/* test for Sum Function */
extern "C"
bool TestReduceMean()
{
XPRINT(0, stdout, "[TEST ReduceMean]\n");
bool returnFlag = true, caseFlag = true;
/* case 1 test */
caseFlag = TestReduceMean1();
if (!caseFlag) {
returnFlag = false;
XPRINT(0, stdout, ">> case 1 failed!\n");
}
else
XPRINT(0, stdout, ">> case 1 passed!\n");
else
XPRINT(0, stdout, ">> case 1 passed!\n");
/* case 2 test */
caseFlag = TestReduceMeanForLargescale();
if (!caseFlag) {
returnFlag = false;
XPRINT(0, stdout, ">> case 2 failed!\n");
}
else
XPRINT(0, stdout, ">> case 2 passed!\n");
///* other cases test */
///*
//TODO!!
//*/
///* other cases test */
///*
//TODO!!
//*/
if (returnFlag) {
XPRINT(0, stdout, ">> All Passed!\n");
}
else
XPRINT(0, stdout, ">> Failed!\n");
if (returnFlag) {
XPRINT(0, stdout, ">> All Passed!\n");
}
else
XPRINT(0, stdout, ">> Failed!\n");
XPRINT(0, stdout, "\n");
XPRINT(0, stdout, "\n");
return returnFlag;
}
return returnFlag;
}
} // namespace nt(NiuTrans.Tensor)
} // namespace nts(NiuTrans.Tensor)
......@@ -22,15 +22,15 @@
#ifndef __TEST_REDUCEMEAN_H__
#define __TEST_REDUCEMEAN_H__
#include "../core/ReduceMean.h"
#include "../core/reduce/ReduceMean.h"
namespace nts { // namespace nt(NiuTrans.Tensor)
namespace nts { // namespace nts(NiuTrans.Tensor)
/* test for ReduceMean Function */
extern "C"
bool TestReduceMean();
} // namespace nt(NiuTrans.Tensor)
} // namespace nts(NiuTrans.Tensor)
#endif // __TEST_REDUCEMEAN_H__
......@@ -19,244 +19,151 @@
* $Created by: LI Yinqiao (email: li.yin.qiao.2012@hotmail.com) 2018-04-30
*/
#include "../XTensor.h"
#include "../XDevice.h"
#include "../core/ReduceMean.h"
#include "../core/ReduceMax.h"
#include "../core/ReduceSum.h"
#include "TReduceSum.h"
namespace nts { // namespace nt(NiuTrans.Tensor)
/* case 1 */
bool TestReduceSum1()
{
/* a tensor of size 2 * 4 */
int order = 2;
int order_reduce = 1;
int * dimSize = new int[order];
dimSize[0] = 2;
dimSize[1] = 4;
namespace nts { // namespace nts(NiuTrans.Tensor)
int unitNum = 1;
for (int i = 0; i < order; i++)
unitNum *= dimSize[i];
/* a tensor of size 4 */
int * dimSize_reduce_a = new int[order_reduce];
dimSize_reduce_a[0] = 4;
int unitNum_a = 1;
for (int i = 0; i < order_reduce; i++)
unitNum_a *= dimSize_reduce_a[i];
/* a tensor of size 2 */
int * dimSize_reduce_b = new int[order_reduce];
dimSize_reduce_b[0] = 2;
int unitNum_b = 1;
for (int i = 0; i < order_reduce; i++)
unitNum_b *= dimSize_reduce_b[i];
DTYPE aData[2][4] = { { 0.0, 1.0, 2.0, 3.0 },
{ 4.0, 5.0, 6.0, 7.0 } };
DTYPE bData[2][4] = { { 1.0, -1.0, -3.0, -5.0 },
{ -7.0, -9.0, -11.0, -13.0 } };
DTYPE answer_a[4] = { 4.0, 6.0, 8.0, 10.0 };
DTYPE answer_b[2] = { -8.0, -40.0 };
/* CPU test */
bool cpuTest = true;
/* create tensors */
XTensor * a = NewTensor(order, dimSize);
XTensor * reduce_a = NewTensor(order_reduce, dimSize_reduce_a);
XTensor * b = NewTensor(order, dimSize);
XTensor * reduce_b = NewTensor(order_reduce, dimSize_reduce_b);
/* initialize variables */
a->SetData(aData, unitNum);
b->SetData(bData, unitNum);
/* call reduce sum function */
ReduceSum(a, reduce_a, 0);
ReduceSum(b, reduce_b, 1);
//DTYPE* reduce_a_data = (DTYPE*)reduce_a->data;
//for (int i = 0; i < unitNum_a; i++)
// printf("%f ", *reduce_a_data++);
//printf("\n");
//DTYPE* reduce_b_data = (DTYPE*)reduce_b->data;
//for (int i = 0; i < unitNum_b; i++)
// printf("%f ", *reduce_b_data++);
/* check results */
cpuTest = reduce_a->CheckData(answer_a, unitNum_a) && reduce_b->CheckData(answer_b, unitNum_b);
/*
case 1: sum the items along a dimension of the tensor.
In this case,
(2, 4) -> (4), dim = 0
(2, 4) -> (2), dim = 1
*/
bool TestReduceSum1()
{
/* a tensor of size (2, 4) */
int sOrder = 2;
int * sDimSize = new int[sOrder];
sDimSize[0] = 2;
sDimSize[1] = 4;
int sUnitNum = 1;
for (int i = 0; i < sOrder; i++)
sUnitNum *= sDimSize[i];
/* a tensor of size (4) */
int tOrder1 = 1;
int * tDimSize1 = new int[tOrder1];
tDimSize1[0] = 4;
int tUnitNum1 = 1;
for (int i = 0; i < tOrder1; i++)
tUnitNum1 *= tDimSize1[i];
/* a tensor of size (2) */
int tOrder2 = 1;
int * tDimSize2 = new int[tOrder2];
tDimSize2[0] = 2;
int tUnitNum2 = 1;
for (int i = 0; i < tOrder2; i++)
tUnitNum2 *= tDimSize2[i];
DTYPE sData[2][4] = { {0.0F, 1.0F, 2.0F, 3.0F},
{4.0F, 5.0F, 6.0F, 7.0F} };
DTYPE answer1[4] = {4.0F, 6.0F, 8.0F, 10.0F};
DTYPE answer2[2] = {6.0F, 22.0F};
/* CPU test */
bool cpuTest = true;
/* create tensors */
XTensor * s = NewTensor(sOrder, sDimSize);
XTensor * t1 = NewTensor(tOrder1, tDimSize1);
XTensor * t2 = NewTensor(tOrder2, tDimSize2);
/* initialize variables */
s->SetData(sData, sUnitNum);
t1->SetZeroAll();
t2->SetZeroAll();
/* call ReduceSum function */
ReduceSum(s, t1, 0);
ReduceSum(s, t2, 1);
/* check results */
cpuTest = t1->CheckData(answer1, tUnitNum1) && t2->CheckData(answer2, tUnitNum2);
#ifdef USE_CUDA
/* GPU test */
bool gpuTest = true;
/* create tensor */
XTensor * aGPU = NewTensor(order, dimSize, X_FLOAT, 1.0F, 0);
XTensor * reduce_aGPU = NewTensor(order_reduce, dimSize_reduce_a, X_FLOAT, 1.0F, 0);
XTensor * bGPU = NewTensor(order, dimSize, X_FLOAT, 1.0F, 0);
XTensor * reduce_bGPU = NewTensor(order_reduce, dimSize_reduce_b, X_FLOAT, 1.0F, 0);
/* Initialize variables */
aGPU->SetData(aData, unitNum);
bGPU->SetData(bData, unitNum);
/* call reduce sum function */
ReduceSum(aGPU, reduce_aGPU, 0);
ReduceSum(bGPU, reduce_bGPU, 1);
/* check results */
gpuTest = reduce_aGPU->CheckData(answer_a, unitNum_a) && reduce_bGPU->CheckData(answer_b, unitNum_b);
/* destroy variables */
delete aGPU, bGPU, reduce_aGPU, reduce_bGPU;
delete[] dimSize, dimSize_reduce_a, dimSize_reduce_b;
return cpuTest && gpuTest;
/* GPU test */
bool gpuTest = true;
/* create tensors */
XTensor * sGPU = NewTensor(sOrder, sDimSize, X_FLOAT, 1.0F, 0);
XTensor * tGPU1 = NewTensor(tOrder1, tDimSize1, X_FLOAT, 1.0F, 0);
XTensor * tGPU2 = NewTensor(tOrder2, tDimSize2, X_FLOAT, 1.0F, 0);
/* initialize variables */
sGPU->SetData(sData, sUnitNum);
tGPU1->SetZeroAll();
tGPU2->SetZeroAll();
/* call ReduceSum function */
ReduceSum(sGPU, tGPU1, 0);
ReduceSum(sGPU, tGPU2, 1);
/* check results */
cpuTest = tGPU1->CheckData(answer1, tUnitNum1) && tGPU2->CheckData(answer2, tUnitNum2);
/* destroy variables */
delete s;
delete t1;
delete t2;
delete sGPU;
delete tGPU1;
delete tGPU2;
delete[] sDimSize;
delete[] tDimSize1;
delete[] tDimSize2;
return cpuTest && gpuTest;
#else
/* destroy variables */
delete a;
delete b;
return cpuTest;
/* destroy variables */
delete s;
delete t1;
delete t2;
delete[] sDimSize;
delete[] tDimSize1;
delete[] tDimSize2;
return cpuTest;
#endif // USE_CUDA
}
bool TestReduceSumForLargescale()
{
/* a tensor of size 10000 * 500 */
int order = 2;
int order_reduce = 1;
int * dimSize = new int[order];
dimSize[0] = 10000;
dimSize[1] = 500;
int unitNum = 1;
for (int i = 0; i < order; i++)
unitNum *= dimSize[i];
/* a tensor of size 500 */
int * dimSize_reduce_a = new int[order_reduce];
dimSize_reduce_a[0] = 500;
int unitNum_a = 1;
for (int i = 0; i < order_reduce; i++)
unitNum_a *= dimSize_reduce_a[i];
/* a tensor of size 10000 */
int * dimSize_reduce_b = new int[order_reduce];
dimSize_reduce_b[0] = 10000;
}
int unitNum_b = 1;
for (int i = 0; i < order_reduce; i++)
unitNum_b *= dimSize_reduce_b[i];
DTYPE * data = new DTYPE[5000000];
DTYPE * tmp = data;
for (int i = 0; i < unitNum; i++)
*tmp++ = 1;
DTYPE answer_a[500];
for (int i = 0; i < unitNum_a; i++)
answer_a[i] = 10000;
DTYPE answer_b[10000];
for (int i = 0; i < unitNum_b; i++)
answer_b[i] = 500;
/* CPU test */
bool cpuTest = true;
/* create tensors */
XTensor * a = NewTensor(order, dimSize);
XTensor * reduce_a = NewTensor(order_reduce, dimSize_reduce_a);
XTensor * b = NewTensor(order, dimSize);
XTensor * reduce_b = NewTensor(order_reduce, dimSize_reduce_b);
/* initialize variables */
a->SetData(data, unitNum);
b->SetData(data, unitNum);
/* call reduce sum function */
ReduceSum(a, reduce_a, 0);
ReduceSum(b, reduce_b, 1);
/* check results */
cpuTest = reduce_a->CheckData(answer_a, unitNum_a) && reduce_b->CheckData(answer_b, unitNum_b);
#ifdef USE_CUDA
/* GPU test */
bool gpuTest = true;
/* create tensor */
XTensor * aGPU = NewTensor(order, dimSize, X_FLOAT);
XTensor * reduce_aGPU = NewTensor(order_reduce, dimSize_reduce_a, X_FLOAT);
XTensor * bGPU = NewTensor(order, dimSize, X_FLOAT);
XTensor * reduce_bGPU = NewTensor(order_reduce, dimSize_reduce_b, X_FLOAT);
/* Initialize variables */
aGPU->SetData(data, unitNum);
bGPU->SetData(data, unitNum);
/* call reduce max function */
ReduceSum(aGPU, reduce_aGPU, 0);
ReduceSum(bGPU, reduce_bGPU, 1);
/* check results */
gpuTest = reduce_aGPU->CheckData(answer_a, unitNum_a) && reduce_bGPU->CheckData(answer_b, unitNum_b);
/* other cases */
/*
TODO!!
*/
/* destroy variables */
delete aGPU, bGPU, reduce_aGPU, reduce_bGPU;
delete[] dimSize, dimSize_reduce_a, dimSize_reduce_b;
return cpuTest && gpuTest;
#else
/* destroy variables */
delete a;
delete b;
return cpuTest;
#endif // USE_CUDA
/* test for ReduceSum Function */
bool TestReduceSum()
{
XPRINT(0, stdout, "[TEST ReduceSum] sum the items along a dimension of the tensor.\n");
bool returnFlag = true, caseFlag = true;
/* case 1 test */
caseFlag = TestReduceSum1();
if (!caseFlag) {
returnFlag = false;
XPRINT(0, stdout, ">> case 1 failed!\n");
}
else
XPRINT(0, stdout, ">> case 1 passed!\n");
/* other cases */
/* other cases test */
/*
TODO!!
*/
/* test for ReduceSum Function */
extern "C"
bool TestReduceSum()
{
XPRINT(0, stdout, "[TEST ReduceSum]\n");
bool returnFlag = true, caseFlag = true;
/* case 1 test */
caseFlag = TestReduceSum1();
if (!caseFlag) {
returnFlag = false;
XPRINT(0, stdout, ">> case 1 failed!\n");
}
else
XPRINT(0, stdout, ">> case 1 passed!\n");
/* case 2 test */
caseFlag = TestReduceSumForLargescale();
if (!caseFlag) {
returnFlag = false;
XPRINT(0, stdout, ">> case 2 failed!\n");
}
else
XPRINT(0, stdout, ">> case 2 passed!\n");
///* other cases test */
///*
//TODO!!
//*/
if (returnFlag) {
XPRINT(0, stdout, ">> All Passed!\n");
}
else
XPRINT(0, stdout, ">> Failed!\n");
if (returnFlag) {
XPRINT(0, stdout, ">> All Passed!\n");
}
else
XPRINT(0, stdout, ">> Failed!\n");
XPRINT(0, stdout, "\n");
XPRINT(0, stdout, "\n");
return returnFlag;
return returnFlag;
}
} // namespace nt(NiuTrans.Tensor)
} // namespace nts(NiuTrans.Tensor)
......@@ -22,15 +22,15 @@
#ifndef __TEST_REDUCESUM_H__
#define __TEST_REDUCESUM_H__
#include "../core/ReduceSum.h"
#include "../core/reduce/ReduceSum.h"
namespace nts { // namespace nt(NiuTrans.Tensor)
namespace nts { // namespace nts(NiuTrans.Tensor)
/* test for ReduceSum Function */
extern "C"
bool TestReduceSum();
/* test for ReduceSum Function */
extern "C"
bool TestReduceSum();
} // namespace nt(NiuTrans.Tensor)
} // namespace nts(NiuTrans.Tensor)
#endif // __TEST_REDUCESUM_H__
......
/* NiuTrans.Tensor - an open-source tensor library
* Copyright (C) 2017, Natural Language Processing Lab, Northestern University.
* All rights reserved.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
/*
* $Created by: Xu Chen (email: hello_master1954@163.com) 2018-06-27
*/
#include "TReduceSumSquared.h"
namespace nts { // namespace nts(NiuTrans.Tensor)
/*
case 1: squared sum of the items along a dimension of the tensor.
For a 1-dimensional data array a, sum = \sum_i (a_i - shift)^2.
In this case, (2, 4) -> (4), dim = 0.
*/
bool TestReduceSumSquared1()
{
/* a input tensor of size (2, 4) */
int sOrder = 2;
int * sDimSize = new int[sOrder];
sDimSize[0] = 2;
sDimSize[1] = 4;
int sUnitNum = 1;
for (int i = 0; i < sOrder; i++)
sUnitNum *= sDimSize[i];
/* a output tensor of size (4) */
int tOrder = 1;
int * tDimSize = new int[tOrder];
tDimSize[0] = 4;
int tUnitNum = 1;
for (int i = 0; i < tOrder; i++)
tUnitNum *= tDimSize[i];
/* a shift tensor of size (4) */
int shiftOrder = 1;
int * shiftDimSize = new int[shiftOrder];
shiftDimSize[0] = 4;
int shiftUnitNum = 1;
for (int i = 0; i < shiftOrder; i++)
shiftUnitNum *= shiftDimSize[i];
DTYPE sData[2][4] = { {0.0F, 1.0F, 2.0F, 3.0F},
{4.0F, 5.0F, 6.0F, 7.0F} };
DTYPE shiftData[4] = {1.0F, -1.0F, -1.0F, 0.0F};
DTYPE answer[4] = {10.0F, 40.0F, 58.0F, 58.0F};
/* CPU test */
bool cpuTest = true;
/* create tensors */
XTensor * s = NewTensor(sOrder, sDimSize);
XTensor * t = NewTensor(tOrder, tDimSize);
XTensor * shift = NewTensor(shiftOrder, shiftDimSize);
/* initialize variables */
s->SetData(sData, sUnitNum);
shift->SetData(shiftData, shiftUnitNum);
t->SetZeroAll();
/* call ReduceSumSquared function */
ReduceSumSquared(s, t, 0, shift);
/* check results */
cpuTest = t->CheckData(answer, tUnitNum);
#ifdef USE_CUDA
/* GPU test */
bool gpuTest = true;
/* create tensors */
XTensor * sGPU = NewTensor(sOrder, sDimSize, X_FLOAT, 1.0F, 0);
XTensor * tGPU = NewTensor(tOrder, tDimSize, X_FLOAT, 1.0F, 0);
XTensor * shiftGPU = NewTensor(shiftOrder, shiftDimSize, X_FLOAT, 1.0F, 0);
/* initialize variables */
sGPU->SetData(sData, sUnitNum);
shiftGPU->SetData(shiftData, shiftUnitNum);
tGPU->SetZeroAll();
/* call ReduceSumSquared function */
ReduceSumSquared(sGPU, tGPU, 0, shiftGPU);
/* check results */
gpuTest = tGPU->CheckData(answer, tUnitNum);
/* destroy variables */
delete s;
delete t;
delete shift;
delete sGPU;
delete tGPU;
delete shiftGPU;
delete[] sDimSize;
delete[] tDimSize;
delete[] shiftDimSize;
return cpuTest && gpuTest;
#else
/* destroy variables */
delete s;
delete t;
delete shift;
delete[] sDimSize;
delete[] tDimSize;
delete[] shiftDimSize;
return cpuTest;
#endif // USE_CUDA
}
/*
case 2: squared sum of the items along a dimension of the tensor.
For a 1-dimensional data array a, sum = \sum_i (a_i - shift)^2.
In this case, (2, 4) -> (2), dim = 1.
*/
bool TestReduceSumSquared2()
{
/* a input tensor of size (2, 4) */
int sOrder = 2;
int * sDimSize = new int[sOrder];
sDimSize[0] = 2;
sDimSize[1] = 4;
int sUnitNum = 1;
for (int i = 0; i < sOrder; i++)
sUnitNum *= sDimSize[i];
/* a output tensor of size (2) */
int tOrder = 1;
int * tDimSize = new int[tOrder];
tDimSize[0] = 2;
int tUnitNum = 1;
for (int i = 0; i < tOrder; i++)
tUnitNum *= tDimSize[i];
/* a shift tensor of size (2) */
int shiftOrder = 1;
int * shiftDimSize = new int[shiftOrder];
shiftDimSize[0] = 2;
int shiftUnitNum = 1;
for (int i = 0; i < shiftOrder; i++)
shiftUnitNum *= shiftDimSize[i];
DTYPE sData[2][4] = { {0.0F, 1.0F, 2.0F, 3.0F},
{4.0F, 5.0F, 6.0F, 7.0F} };
DTYPE shiftData[2] = {-1.0F, 1.0F};
DTYPE answer[2] = {30.0F, 86.0F};
/* CPU test */
bool cpuTest = true;
/* create tensors */
XTensor * s = NewTensor(sOrder, sDimSize);
XTensor * t = NewTensor(tOrder, tDimSize);
XTensor * shift = NewTensor(shiftOrder, shiftDimSize);
/* initialize variables */
s->SetData(sData, sUnitNum);
shift->SetData(shiftData, shiftUnitNum);
t->SetZeroAll();
/* call ReduceSumSquared function */
ReduceSumSquared(s, t, 1, shift);
/* check results */
cpuTest = t->CheckData(answer, tUnitNum);
#ifdef USE_CUDA
/* GPU test */
bool gpuTest = true;
/* create tensors */
XTensor * sGPU = NewTensor(sOrder, sDimSize, X_FLOAT, 1.0F, 0);
XTensor * tGPU = NewTensor(tOrder, tDimSize, X_FLOAT, 1.0F, 0);
XTensor * shiftGPU = NewTensor(shiftOrder, shiftDimSize, X_FLOAT, 1.0F, 0);
/* initialize variables */
sGPU->SetData(sData, sUnitNum);
shiftGPU->SetData(shiftData, shiftUnitNum);
tGPU->SetZeroAll();
/* call ReduceSumSquared function */
ReduceSumSquared(sGPU, tGPU, 1, shiftGPU);
/* check results */
gpuTest = tGPU->CheckData(answer, tUnitNum);
/* destroy variables */
delete s;
delete t;
delete shift;
delete sGPU;
delete tGPU;
delete shiftGPU;
delete[] sDimSize;
delete[] tDimSize;
delete[] shiftDimSize;
return cpuTest && gpuTest;
#else
/* destroy variables */
delete s;
delete t;
delete shift;
delete[] sDimSize;
delete[] tDimSize;
delete[] shiftDimSize;
return cpuTest;
#endif // USE_CUDA
}
/* other cases */
/*
TODO!!
*/
/* test for ReduceSumSquared Function */
bool TestReduceSumSquared()
{
XPRINT(0, stdout, "[TEST ReduceSumSquared] squared sum of the items along a dimension of the tensor\n");
bool returnFlag = true, caseFlag = true;
/* case 1 test */
caseFlag = TestReduceSumSquared1();
if (!caseFlag) {
returnFlag = false;
XPRINT(0, stdout, ">> case 1 failed!\n");
}
else
XPRINT(0, stdout, ">> case 1 passed!\n");
/* case 2 test */
caseFlag = TestReduceSumSquared2();
if (!caseFlag) {
returnFlag = false;
XPRINT(0, stdout, ">> case 1 failed!\n");
}
else
XPRINT(0, stdout, ">> case 1 passed!\n");
/* other cases test */
/*
TODO!!
*/
if (returnFlag) {
XPRINT(0, stdout, ">> All Passed!\n");
}
else
XPRINT(0, stdout, ">> Failed!\n");
XPRINT(0, stdout, "\n");
return returnFlag;
}
} // namespace nts(NiuTrans.Tensor)
/* NiuTrans.Tensor - an open-source tensor library
* Copyright (C) 2017, Natural Language Processing Lab, Northestern University.
* All rights reserved.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
/*
* $Created by: Xu Chen (email: hello_master1954@163.com) 2018-06-27
*/
#ifndef __TEST_REDUCESUMSQUARED_H__
#define __TEST_REDUCESUMSQUARED_H__
#include "../core/reduce/ReduceSumSquared.h"
namespace nts { // namespace nts(NiuTrans.Tensor)
/* test for ReduceSumSquared Function */
extern "C"
bool TestReduceSumSquared();
} // namespace nts(NiuTrans.Tensor)
#endif // __TEST_REDUCESUMSQUARED_H__
/* NiuTrans.Tensor - an open-source tensor library
* Copyright (C) 2017, Natural Language Processing Lab, Northestern University.
* All rights reserved.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
/*
* $Created by: Xu Chen (email: hello_master1954@163.com) 2018-06-27
*/
#include "TReduceVariance.h"
namespace nts { // namespace nts(NiuTrans.Tensor)
/*
case 1: variance of the items along a dimension of the tensor.
For a 1-dimensional data array a, variance = 1/n * \sum_i (a_i - mean)^2.
In this case, (2, 4) -> (4), dim = 0.
*/
bool TestReduceVariance1()
{
/* a input tensor of size (2, 4) */
int sOrder = 2;
int * sDimSize = new int[sOrder];
sDimSize[0] = 2;
sDimSize[1] = 4;
int sUnitNum = 1;
for (int i = 0; i < sOrder; i++)
sUnitNum *= sDimSize[i];
/* a output tensor of size (4) */
int tOrder = 1;
int * tDimSize = new int[tOrder];
tDimSize[0] = 4;
int tUnitNum = 1;
for (int i = 0; i < tOrder; i++)
tUnitNum *= tDimSize[i];
/* a mean tensor of size (4) */
int meanOrder = 1;
int * meanDimSize = new int[meanOrder];
meanDimSize[0] = 4;
int meanUnitNum = 1;
for (int i = 0; i < meanOrder; i++)
meanUnitNum *= meanDimSize[i];
DTYPE sData[2][4] = { {0.0F, 1.0F, 2.0F, 3.0F},
{4.0F, 5.0F, 6.0F, 7.0F} };
DTYPE meanData[4] = {2.0F, 3.0F, 4.0F, 5.0F};
DTYPE answer[4] = {4.0F, 4.0F, 4.0F, 4.0F};
/* CPU test */
bool cpuTest = true;
/* create tensors */
XTensor * s = NewTensor(sOrder, sDimSize);
XTensor * t = NewTensor(tOrder, tDimSize);
XTensor * mean = NewTensor(meanOrder, meanDimSize);
/* initialize variables */
s->SetData(sData, sUnitNum);
mean->SetData(meanData, meanUnitNum);
t->SetZeroAll();
/* call ReduceVariance function */
ReduceVariance(s, t, 0, mean);
/* check results */
cpuTest = t->CheckData(answer, tUnitNum);
#ifdef USE_CUDA
/* GPU test */
bool gpuTest = true;
/* create tensors */
XTensor * sGPU = NewTensor(sOrder, sDimSize, X_FLOAT, 1.0F, 0);
XTensor * tGPU = NewTensor(tOrder, tDimSize, X_FLOAT, 1.0F, 0);
XTensor * meanGPU = NewTensor(meanOrder, meanDimSize, X_FLOAT, 1.0F, 0);
/* initialize variables */
sGPU->SetData(sData, sUnitNum);
meanGPU->SetData(meanData, meanUnitNum);
tGPU->SetZeroAll();
/* call ReduceVariance function */
ReduceVariance(sGPU, tGPU, 0, meanGPU);
/* check results */
gpuTest = t->CheckData(answer, tUnitNum);
/* destroy variables */
delete s;
delete t;
delete mean;
delete sGPU;
delete tGPU;
delete meanGPU;
delete[] sDimSize;
delete[] tDimSize;
delete[] meanDimSize;
return cpuTest && gpuTest;
#else
/* destroy variables */
delete s;
delete t;
delete mean;
delete[] sDimSize;
delete[] tDimSize;
delete[] meanDimSize;
return cpuTest;
#endif // USE_CUDA
}
/* other cases */
/*
TODO!!
*/
/* test for ReduceVariance Function */
bool TestReduceVariance()
{
XPRINT(0, stdout, "[TEST ReduceVariance] variance of the items along a dimension of the tensor\n");
bool returnFlag = true, caseFlag = true;
/* case 1 test */
caseFlag = TestReduceVariance1();
if (!caseFlag) {
returnFlag = false;
XPRINT(0, stdout, ">> case 1 failed!\n");
}
else
XPRINT(0, stdout, ">> case 1 passed!\n");
/* other cases test */
/*
TODO!!
*/
if (returnFlag) {
XPRINT(0, stdout, ">> All Passed!\n");
}
else
XPRINT(0, stdout, ">> Failed!\n");
XPRINT(0, stdout, "\n");
return returnFlag;
}
} // namespace nts(NiuTrans.Tensor)
/* NiuTrans.Tensor - an open-source tensor library
* Copyright (C) 2017, Natural Language Processing Lab, Northestern University.
* All rights reserved.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
/*
* $Created by: Xu Chen (email: hello_master1954@163.com) 2018-07-06
*/
#ifndef __TEST_REDUCEVARIANCE_H__
#define __TEST_REDUCEVARIANCE_H__
#include "../core/reduce/ReduceVariance.h"
namespace nts { // namespace nts(NiuTrans.Tensor)
/* test for ReduceVariance Function */
extern "C"
bool TestReduceVariance();
} // namespace nts(NiuTrans.Tensor)
#endif // __TEST_REDUCEVARIANCE_H__
/* NiuTrans.Tensor - an open-source tensor library
* Copyright (C) 2017, Natural Language Processing Lab, Northestern University.
* All rights reserved.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
/*
* $Created by: Xu Chen (email: hello_master1954@163.com) 2018-06-27
*/
#include "TScaleAndShift.h"
namespace nts { // namespace nts(NiuTrans.Tensor)
/*
case 1: scale and shift all tensor entires.
p = p * scale + shift
*/
bool TestScaleAndShift1()
{
/* a input tensor of size (2, 4) */
int sOrder = 2;
int * sDimSize = new int[sOrder];
sDimSize[0] = 2;
sDimSize[1] = 4;
int sUnitNum = 1;
for (int i = 0; i < sOrder; i++)
sUnitNum *= sDimSize[i];
DTYPE sData[2][4] = { {0.0F, 1.0F, 2.0F, 3.0F},
{4.0F, 5.0F, 6.0F, 7.0F} };
DTYPE answer[2][4] = { {0.5F, 2.5F, 4.5F, 6.5F},
{8.5F, 10.5F, 12.5F, 14.5F} };
DTYPE scaleFactor = 2.0F;
DTYPE shiftFactor = 0.5F;
/* CPU test */
bool cpuTest = true;
/* create tensors */
XTensor * s = NewTensor(sOrder, sDimSize);
/* initialize variables */
s->SetData(sData, sUnitNum);
/* call ScaleAndShift function */
ScaleAndShift(s, scaleFactor, shiftFactor);
/* check results */
cpuTest = s->CheckData(answer, sUnitNum);
#ifdef USE_CUDA
/* GPU test */
bool gpuTest = true;
/* create tensors */
XTensor * sGPU = NewTensor(sOrder, sDimSize, X_FLOAT, 1.0F, 0);
/* initialize variables */
sGPU->SetData(sData, sUnitNum);
/* call ScaleAndShift function */
ScaleAndShift(sGPU, scaleFactor, shiftFactor);
/* check results */
gpuTest = sGPU->CheckData(answer, sUnitNum);
/* destroy variables */
delete s;
delete sGPU;
delete[] sDimSize;
return cpuTest && gpuTest;
#else
/* destroy variables */
delete s;
delete[] sDimSize;
return cpuTest;
#endif // USE_CUDA
}
/* other cases */
/*
TODO!!
*/
/* test for ScaleAndShift Function */
bool TestScaleAndShift()
{
XPRINT(0, stdout, "[TEST ScaleAndShift] scale and shift all tensor entires\n");
bool returnFlag = true, caseFlag = true;
/* case 1 test */
caseFlag = TestScaleAndShift1();
if (!caseFlag) {
returnFlag = false;
XPRINT(0, stdout, ">> case 1 failed!\n");
}
else
XPRINT(0, stdout, ">> case 1 passed!\n");
/* other cases test */
/*
TODO!!
*/
if (returnFlag) {
XPRINT(0, stdout, ">> All Passed!\n");
}
else
XPRINT(0, stdout, ">> Failed!\n");
XPRINT(0, stdout, "\n");
return returnFlag;
}
} // namespace nts(NiuTrans.Tensor)
/* NiuTrans.Tensor - an open-source tensor library
* Copyright (C) 2017, Natural Language Processing Lab, Northestern University.
* All rights reserved.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
/*
* $Created by: Xu Chen (email: hello_master1954@163.com) 2018-06-27
*/
#ifndef __TEST_SCALEANDSHIFT_H__
#define __TEST_SCALEANDSHIFT_H__
#include "../core/math/ScaleAndShift.h"
namespace nts { // namespace nts(NiuTrans.Tensor)
/* test for ScaleAndShift Function */
extern "C"
bool TestScaleAndShift();
} // namespace nts(NiuTrans.Tensor)
#endif // __TEST_SCALEANDSHIFT_H__
/* NiuTrans.Tensor - an open-source tensor library
* Copyright (C) 2017, Natural Language Processing Lab, Northestern University.
* All rights reserved.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
/*
* $Created by: Xu Chen (email: hello_master1954@163.com) 2018-07-04
*/
#include "TSelect.h"
namespace nts { // namespace nts(NiuTrans.Tensor)
/*
case 1: test SelectRange function.
It can generate a tensor with seleccted data in range[low,high] along the given dimension.
In this case, (2, 2, 4) -> (2, 2, 2), dim = 2, low = 1, high = 3.
*/
bool TestSelect1()
{
/* a input tensor of size (2, 2, 4) */
int sOrder = 3;
int * sDimSize = new int[sOrder];
sDimSize[0] = 2;
sDimSize[1] = 2;
sDimSize[2] = 4;
int sUnitNum = 1;
for (int i = 0; i < sOrder; i++)
sUnitNum *= sDimSize[i];
/* a output tensor of size (2, 2, 2) */
int tOrder = 3;
int * tDimSize = new int[tOrder];
tDimSize[0] = 2;
tDimSize[1] = 2;
tDimSize[2] = 2;
int tUnitNum = 1;
for (int i = 0; i < tOrder; i++)
tUnitNum *= tDimSize[i];
DTYPE sData[2][2][4] = { { {0.0F, 1.0F, 2.0F, 3.0F},
{4.0F, 5.0F, 6.0F, 7.0F} },
{ {1.0F, 2.0F, 3.0F, 4.0F},
{5.0F, 6.0F, 7.0F, 8.0F} } };
DTYPE answer[2][2][2] = { { {1.0F, 2.0F},
{5.0F, 6.0F} },
{ {2.0F, 3.0F},
{6.0F, 7.0F} } };
/* CPU test */
bool cpuTest = true;
/* create tensors */
XTensor * s = NewTensor(sOrder, sDimSize);
XTensor * t = NewTensor(tOrder, tDimSize);
/* initialize variables */
s->SetData(sData, sUnitNum);
t->SetZeroAll();
/* call SelectRange function */
SelectRange(s, t, 2, 1, 3);
/* check results */
cpuTest = t->CheckData(answer, tUnitNum);
#ifdef USE_CUDA
/* GPU test */
bool gpuTest = true;
/* create tensors */
XTensor * sGPU = NewTensor(sOrder, sDimSize, X_FLOAT, 1.0F, 0);
XTensor * tGPU = NewTensor(tOrder, tDimSize, X_FLOAT, 1.0F, 0);
/* initialize variables */
sGPU->SetData(sData, sUnitNum);
tGPU->SetZeroAll();
/* call Select function */
SelectRange(sGPU, tGPU, 2, 1, 3);
/* check results */
gpuTest = tGPU->CheckData(answer, tUnitNum);
/* destroy variables */
delete s;
delete t;
delete sGPU;
delete tGPU;
delete[] sDimSize;
delete[] tDimSize;
return cpuTest && gpuTest;
#else
/* destroy variables */
delete s;
delete t;
delete[] sDimSize;
delete[] tDimSize;
return cpuTest;
#endif // USE_CUDA
}
/* other cases */
/*
TODO!!
*/
/* test for Select Function */
bool TestSelect()
{
XPRINT(0, stdout, "[TEST Select] generate a tensor with seleccted data in range[low,high] along the given dimension \n");
bool returnFlag = true, caseFlag = true;
/* case 1 test */
caseFlag = TestSelect1();
if (!caseFlag) {
returnFlag = false;
XPRINT(0, stdout, ">> case 1 failed!\n");
}
else
XPRINT(0, stdout, ">> case 1 passed!\n");
/* other cases test */
/*
TODO!!
*/
if (returnFlag) {
XPRINT(0, stdout, ">> All Passed!\n");
}
else
XPRINT(0, stdout, ">> Failed!\n");
XPRINT(0, stdout, "\n");
return returnFlag;
}
} // namespace nts(NiuTrans.Tensor)
/* NiuTrans.Tensor - an open-source tensor library
* Copyright (C) 2017, Natural Language Processing Lab, Northestern University.
* All rights reserved.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
/*
* $Created by: Xu Chen (email: hello_master1954@163.com) 2018-07-04
*/
#ifndef __TEST_SELECT_H__
#define __TEST_SELECT_H__
#include "../core/getandset/Select.h"
namespace nts { // namespace nts(NiuTrans.Tensor)
/* test for Select Function */
extern "C"
bool TestSelect();
} // namespace nts(NiuTrans.Tensor)
#endif // __TEST_SELECT_H__
/* NiuTrans.Tensor - an open-source tensor library
* Copyright (C) 2017, Natural Language Processing Lab, Northestern University.
* All rights reserved.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
/*
* $Created by: Xu Chen (email: hello_master1954@163.com) 2018-07-06
*/
#include "TSetAscendingOrder.h"
namespace nts { // namespace nts(NiuTrans.Tensor)
/* case 1: set the cell to the ascending order along a given dimension.
*/
bool TestSetAscendingOrder1()
{
/* a input tensor of size (2, 4) */
int sOrder = 2;
int * sDimSize = new int[sOrder];
sDimSize[0] = 2;
sDimSize[1] = 4;
int sUnitNum = 1;
for (int i = 0; i < sOrder; i++)
sUnitNum *= sDimSize[i];
int answer[2][4] = { {0, 1, 2, 3},
{0, 1, 2, 3} };
/* CPU test */
bool cpuTest = true;
/* create tensors */
XTensor * s = NewTensor(sOrder, sDimSize, X_INT);
/* initialize variables */
s->SetZeroAll();
/* call SetAscendingOrder function */
s->SetAscendingOrder(1);
/* check results */
cpuTest = s->CheckData(answer, sUnitNum);
#ifdef USE_CUDA
/* GPU test */
bool gpuTest = true;
/* create tensors */
XTensor * sGPU = NewTensor(sOrder, sDimSize, X_INT, 1.0F, 0);
/* initialize variables */
sGPU->SetZeroAll();
/* call SetAscendingOrder function */
sGPU->SetAscendingOrder(1);
/* check results */
gpuTest = sGPU->CheckData(answer, sUnitNum);
/* destroy variables */
delete s;
delete sGPU;
delete[] sDimSize;
return cpuTest && gpuTest;
#else
/* destroy variables */
delete s;
delete[] sDimSize;
return cpuTest;
#endif // USE_CUDA
}
/* other cases */
/*
TODO!!
*/
/* test for SetAscendingOrder Function */
bool TestSetAscendingOrder()
{
XPRINT(0, stdout, "[TEST SetAscendingOrder] set the cell to the ascending order along a given dimension \n");
bool returnFlag = true, caseFlag = true;
/* case 1 test */
caseFlag = TestSetAscendingOrder1();
if (!caseFlag) {
returnFlag = false;
XPRINT(0, stdout, ">> case 1 failed!\n");
}
else
XPRINT(0, stdout, ">> case 1 passed!\n");
/* other cases test */
/*
TODO!!
*/
if (returnFlag) {
XPRINT(0, stdout, ">> All Passed!\n");
}
else
XPRINT(0, stdout, ">> Failed!\n");
XPRINT(0, stdout, "\n");
return returnFlag;
}
} // namespace nts(NiuTrans.Tensor)
/* NiuTrans.Tensor - an open-source tensor library
* Copyright (C) 2017, Natural Language Processing Lab, Northestern University.
* All rights reserved.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
/*
* $Created by: Xu Chen (email: hello_master1954@163.com) 2018-07-06
*/
#ifndef __TEST_SETASCENDINGORDER_H__
#define __TEST_SETASCENDINGORDER_H__
#include "../XTensor.h"
namespace nts { // namespace nts(NiuTrans.Tensor)
/* test for SetAscendingOrder Function */
extern "C"
bool TestSetAscendingOrder();
} // namespace nts(NiuTrans.Tensor)
#endif // __TEST_SETASCENDINGORDER_H__
/* NiuTrans.Tensor - an open-source tensor library
* Copyright (C) 2017, Natural Language Processing Lab, Northestern University.
* All rights reserved.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
/*
* $Created by: Xu Chen (email: hello_master1954@163.com) 2018-07-06
*/
#include "TSetData.h"
namespace nts { // namespace nts(NiuTrans.Tensor)
/* case 1: set the cell to the ascending order along a given dimension. */
bool TestSetData1()
{
/* a input tensor of size (2, 4) */
int sOrder = 2;
int * sDimSize = new int[sOrder];
sDimSize[0] = 2;
sDimSize[1] = 4;
int sUnitNum = 1;
for (int i = 0; i < sOrder; i++)
sUnitNum *= sDimSize[i];
DTYPE answer[2][4] = {0};
/* CPU test */
bool cpuTest = true;
/* create tensors */
XTensor * s = NewTensor(sOrder, sDimSize);
/* call SetData function */
s->SetDataRand(0.0, 1.0);
/* check results */
cpuTest = s->CheckData(answer, sUnitNum, 1.0F);
#ifdef USE_CUDA
/* GPU test */
bool gpuTest = true;
/* create tensors */
XTensor * sGPU = NewTensor(sOrder, sDimSize, X_FLOAT, 1.0F, 0);
/* call SetDataRand function */
sGPU->SetDataRand(0.0, 1.0);
gpuTest = sGPU->CheckData(answer, sUnitNum, 1.0F);
/* destroy variables */
delete s;
delete sGPU;
delete[] sDimSize;
return cpuTest && gpuTest;
#else
/* destroy variables */
delete s;
delete[] sDimSize;
return cpuTest;
#endif // USE_CUDA
}
/* other cases */
/*
TODO!!
*/
/* test for SetData Function */
bool TestSetData()
{
XPRINT(0, stdout, "[TEST SetData] set the data of tensor \n");
bool returnFlag = true, caseFlag = true;
/* case 1 test */
caseFlag = TestSetData1();
if (!caseFlag) {
returnFlag = false;
XPRINT(0, stdout, ">> case 1 failed!\n");
}
else
XPRINT(0, stdout, ">> case 1 passed!\n");
/* other cases test */
/*
TODO!!
*/
if (returnFlag) {
XPRINT(0, stdout, ">> All Passed!\n");
}
else
XPRINT(0, stdout, ">> Failed!\n");
XPRINT(0, stdout, "\n");
return returnFlag;
}
} // namespace nts(NiuTrans.Tensor)
/* NiuTrans.Tensor - an open-source tensor library
* Copyright (C) 2017, Natural Language Processing Lab, Northestern University.
* All rights reserved.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
/*
* $Created by: Xu Chen (email: hello_master1954@163.com) 2018-07-06
*/
#ifndef __TEST_SETDATA_H__
#define __TEST_SETDATA_H__
#include "../XTensor.h"
namespace nts { // namespace nts(NiuTrans.Tensor)
/* test for SetData Function */
extern "C"
bool TestSetData();
} // namespace nts(NiuTrans.Tensor)
#endif // __TEST_SETDATA_H__
......@@ -19,15 +19,15 @@
* $Created by: Xu Chen (email: hello_master1954@163.com) 2018-06-19
*/
#include "../XTensor.h"
#include "../XUtility.h"
#include "TSigmoid.h"
namespace nts { // namespace nts(NiuTrans.Tensor)
/* case 1: test Sigmoid function and SigmoidBackward function.
* sigmoid function: y = 1/(1+exp(-x))
* backward computation: dE/ds = dE/dy * dy/dx
/*
case 1: test Sigmoid function and SigmoidBackward function.
sigmoid function: y = 1/(1+exp(-x))
backward computation: dE/ds = dE/dy * dy/dx
*/
bool TestSigmoid1()
{
......@@ -42,7 +42,9 @@ bool TestSigmoid1()
DTYPE xData[3] = {0.0F, 1.0F, 2.0F};
DTYPE gData[3] = {0.4F, 0.8F, 1.0F};
DTYPE answer[3];
DTYPE dedyData[3] = {-0.8F, -1.094F, -1.135F};
DTYPE yAnswer[3] = {0.5F, 0.731F, 0.881F};
DTYPE dedxAnswer[3] = {-0.2F, -0.215F, -0.119F};
/* CPU test */
bool cpuTest = true;
......@@ -57,41 +59,18 @@ bool TestSigmoid1()
/* initialize variables */
x->SetData(xData, sUnitNum);
g->SetData(gData, sUnitNum);
dedy->SetData(dedyData, sUnitNum);
y->SetZeroAll();
dedx->SetZeroAll();
/* call Sigmoid function */
Sigmoid(x, y);
/* cross_entropy: de/dy_i = -t_i / y_i */
DTYPE dedyData[3];
DTYPE * yProcessedData = (DTYPE*)y->data;
for (int i = 0; i < sUnitNum; i++)
dedyData[i] = - gData[i] / yProcessedData[i];
/* initialize variables */
dedy->SetData(dedyData, sUnitNum);
for (int i = 0; i < sUnitNum; i++)
answer[i] = dedyData[i] * yProcessedData[i] * (1 - yProcessedData[i]);
/* call SigmoidBackward function */
SigmoidBackward(g, y, x, dedy, dedx, NOLOSS);
/* check result */
printf("CPU Test:\n");
printf("Computer de/dx:");
DTYPE * checkData = (DTYPE*)dedx->data;
for (int i = 0; i < sUnitNum; i++) {
printf("\t%f", checkData[i]);
}
printf("\n");
printf("Real de/dx:");
for (int i = 0; i < sUnitNum; i++) {
printf("\t%f", answer[i]);
}
printf("\n");
cpuTest = y->CheckData(yAnswer, sUnitNum) && dedx->CheckData(dedxAnswer, sUnitNum);
#ifdef USE_CUDA
/* GPU test */
......@@ -107,66 +86,50 @@ bool TestSigmoid1()
/* initialize variables */
xGPU->SetData(xData, sUnitNum);
gGPU->SetData(gData, sUnitNum);
dedyGPU->SetData(dedyData, sUnitNum);
yGPU->SetZeroAll();
dedxGPU->SetZeroAll();
/* call Sigmoid function */
Sigmoid(xGPU, yGPU);
/* cross_entropy: de/dy_i = -t_i / y_i */
void * yProcessedDataGPU = (DTYPE*)yGPU->data;
int size = sUnitNum * yGPU->unitSize;
DTYPE * copy = new DTYPE[size];
XMemCopy(copy, -1, yProcessedDataGPU, yGPU->devID, size);
for (int i = 0; i < sUnitNum; i++) {
dedyData[i] = - gData[i] / *copy++;
}
/* initialize variables */
dedyGPU->SetData(dedyData, sUnitNum);
for (int i = 0; i < sUnitNum; i++)
answer[i] = dedyData[i] * yProcessedData[i] * (1 - yProcessedData[i]);
/* call SigmoidBackward function */
SigmoidBackward(gGPU, yGPU, xGPU, dedyGPU, dedxGPU, NOLOSS);
/* check result */
printf("\nGPU Test:\n");
printf("Computer de/dx:");
checkData = (DTYPE*)dedxGPU->data;
size = sUnitNum * dedxGPU->unitSize;
DTYPE * copy1 = new DTYPE[size];
XMemCopy(copy1, -1, checkData, dedxGPU->devID, size);
for (int i = 0; i < sUnitNum; i++) {
printf("\t%f", copy1[i]);
}
printf("\n");
printf("Real de/dx:");
for (int i = 0; i < sUnitNum; i++) {
printf("\t%f", answer[i]);
}
printf("\n\n");
gpuTest = yGPU->CheckData(yAnswer, sUnitNum) && dedxGPU->CheckData(dedxAnswer, sUnitNum);
/* destroy variables */
delete x, y, g, dedx, dedy;
delete xGPU, yGPU, gGPU, dedxGPU, dedyGPU;
delete x;
delete y;
delete g;
delete dedx;
delete dedy;
delete xGPU;
delete yGPU;
delete gGPU;
delete dedxGPU;
delete dedyGPU;
delete[] sDimSize;
return cpuTest && gpuTest;
#else
/* destroy variables */
delete x, y, g, dedx, dedy;
delete x;
delete y;
delete g;
delete dedx;
delete dedy;
delete[] sDimSize;
return cpuTest;
#endif // USE_CUDA
}
/* case 2: test Sigmoid function and SigmoidBackward function.
* sigmoid function: y = 1/(1+exp(-x))
* backward computation: dE/ds = dE/dy * dy/dx
/*
case 2: test Sigmoid function and SigmoidBackward function.
sigmoid function: y = 1/(1+exp(-x))
backward computation: dE/ds = dE/dy * dy/dx
*/
bool TestSigmoid2()
{
......@@ -181,7 +144,9 @@ bool TestSigmoid2()
DTYPE xData[3] = {0.0F, 1.0F, 2.0F};
DTYPE gData[3] = {0.4F, 0.8F, 1.0F};
DTYPE answer[3] = {0.0F, 0.0F, 0.0F};
DTYPE dedyData[3] = {-0.8F, -1.094F, -1.135F};
DTYPE yAnswer[3] = {0.5F, 0.731F, 0.881F};
DTYPE dedxAnswer[3] = {-0.2F, -0.215F, -0.119F};
/* CPU test */
bool cpuTest = true;
......@@ -196,29 +161,21 @@ bool TestSigmoid2()
/* initialize variables */
x->SetData(xData, sUnitNum);
g->SetData(gData, sUnitNum);
dedy->SetZeroAll();
y->SetZeroAll();
dedx->SetZeroAll();
/* call Sigmoid function */
Sigmoid(x, y);
/* initialize variables */
dedy->SetData(dedyData, sUnitNum);
/* call SigmoidBackward function */
SigmoidBackward(g, y, x, dedy, dedx, CROSSENTROPY);
/* check result */
printf("CPU Test:\n");
printf("Computer de/dx:");
DTYPE * checkData = (DTYPE*)dedx->data;
for (int i = 0; i < sUnitNum; i++) {
printf("\t%f", checkData[i]);
}
printf("\n");
printf("Real de/dx:");
for (int i = 0; i < sUnitNum; i++) {
printf("\t%f", answer[i]);
}
printf("\n");
cpuTest = y->CheckData(yAnswer, sUnitNum) && dedx->CheckData(dedxAnswer, sUnitNum);
#ifdef USE_CUDA
/* GPU test */
......@@ -234,6 +191,7 @@ bool TestSigmoid2()
/* initialize variables */
xGPU->SetData(xData, sUnitNum);
gGPU->SetData(gData, sUnitNum);
dedyGPU->SetZeroAll();
yGPU->SetZeroAll();
dedxGPU->SetZeroAll();
......@@ -244,32 +202,29 @@ bool TestSigmoid2()
SigmoidBackward(gGPU, yGPU, xGPU, dedyGPU, dedxGPU, CROSSENTROPY);
/* check result */
printf("\nGPU Test:\n");
printf("Computer de/dx:");
checkData = (DTYPE*)dedxGPU->data;
int size = sUnitNum * dedxGPU->unitSize;
DTYPE * copy1 = new DTYPE[size];
XMemCopy(copy1, -1, checkData, dedxGPU->devID, size);
for (int i = 0; i < sUnitNum; i++) {
printf("\t%f", copy1[i]);
}
printf("\n");
printf("Real de/dx:");
for (int i = 0; i < sUnitNum; i++) {
printf("\t%f", answer[i]);
}
printf("\n\n");
gpuTest = yGPU->CheckData(yAnswer, sUnitNum) && dedxGPU->CheckData(dedxAnswer, sUnitNum);
/* destroy variables */
delete x, y, g, dedx, dedy;
delete xGPU, yGPU, gGPU, dedxGPU, dedyGPU;
delete x;
delete y;
delete g;
delete dedx;
delete dedy;
delete xGPU;
delete yGPU;
delete gGPU;
delete dedxGPU;
delete dedyGPU;
delete[] sDimSize;
return cpuTest && gpuTest;
#else
/* destroy variables */
delete x, y, g, dedx, dedy;
delete x;
delete y;
delete g;
delete dedx;
delete dedy;
delete[] sDimSize;
return cpuTest;
......@@ -282,10 +237,9 @@ bool TestSigmoid2()
*/
/* test for Sigmoid Function */
extern "C"
bool TestSigmoid()
{
XPRINT(0, stdout, "[TEST SIGMOID] -------------\n");
XPRINT(0, stdout, "[TEST SIGMOID] sigmoid function and its backward computation \n");
bool returnFlag = true, caseFlag = true;
/* case 1 test */
......
......@@ -24,8 +24,10 @@
#include "TSoftmax.h"
namespace nts { // namespace nts(NiuTrans.Tensor)
/* case 1: test Softmax function.
* softmax function: y = e^x / \sum_{i} e^{x_i}
/*
case 1: test Softmax function.
softmax function: y = e^x / \sum_{i} e^{x_i}
*/
bool TestSoftmax1()
{
......@@ -59,21 +61,7 @@ bool TestSoftmax1()
Softmax(x, y, 1);
/* check result */
printf("CPU Test:\n");
printf("Softmax Result:");
DTYPE * checkData = (DTYPE*)y->data;
for (int i = 0; i < sUnitNum; i++) {
printf("\t%f", checkData[i]);
}
printf("\n");
printf("Real Result:");
for (int i = 0; i < sDimSize[0]; i++) {
for (int j = 0; j < sDimSize[1]; j++) {
printf("\t%f", answer[i][j]);
}
}
printf("\n");
cpuTest = y->CheckData(answer, sUnitNum);
#ifdef USE_CUDA
/* GPU test */
......@@ -91,28 +79,13 @@ bool TestSoftmax1()
Softmax(xGPU, yGPU, 1);
/* check result */
printf("\nGPU Test:\n");
printf("Computer de/dx:");
checkData = (DTYPE*)yGPU->data;
int size = sUnitNum * yGPU->unitSize;
DTYPE * copy = new DTYPE[size];
XMemCopy(copy, -1, checkData, yGPU->devID, size);
for (int i = 0; i < sUnitNum; i++) {
printf("\t%f", copy[i]);
}
printf("\n");
printf("Real Result:");
for (int i = 0; i < sDimSize[0]; i++) {
for (int j = 0; j < sDimSize[1]; j++) {
printf("\t%f", answer[i][j]);
}
}
printf("\n");
gpuTest = yGPU->CheckData(answer, sUnitNum);
/* destroy variables */
delete x, y;
delete xGPU, yGPU;
delete x;
delete y;
delete xGPU;
delete yGPU;
delete[] sDimSize;
return cpuTest && gpuTest;
......@@ -125,11 +98,13 @@ bool TestSoftmax1()
#endif // USE_CUDA
}
/* case 2: test SoftmaxBackward function.
* SoftmaxBackward function: dE/dx_j = -gold_j + y_j
/*
case 2: test SoftmaxBackward function.
SoftmaxBackward function: dE/dx_j = -gold_j + y_j
*/
bool TestSoftmax2()
{
/* a input tensor of size (2, 3) */
int sOrder = 2;
int * sDimSize = new int[sOrder];
sDimSize[0] = 1;
......@@ -141,7 +116,7 @@ bool TestSoftmax2()
DTYPE xData[1][3] = { {0.0F, 1.0F, 2.0F} };
DTYPE gData[1][3] = { {0.0F, 0.0F, 1.0F} };
DTYPE answer[3] = {0.090031F, 0.244728F, -0.334759F};
DTYPE dedxAnswer[3] = {0.090031F, 0.244728F, -0.334759F};
/* CPU test */
bool cpuTest = true;
......@@ -163,31 +138,10 @@ bool TestSoftmax2()
/* call Softmax function */
Softmax(x, y, 1);
/* check result */
printf("CPU Test:\n");
printf("Softmax Result:");
DTYPE * checkData = (DTYPE*)y->data;
for (int i = 0; i < sUnitNum; i++) {
printf("\t%f", checkData[i]);
}
printf("\n");
/* call SoftmaxBackward function */
SoftmaxBackward(g, y, x, dedy, dedx, 1, CROSSENTROPY);
/* check result */
printf("Computer de/dx:");
checkData = (DTYPE*)dedx->data;
for (int i = 0; i < sUnitNum; i++) {
printf("\t%f", checkData[i]);
}
printf("\n");
printf("Real de/dx:");
for (int i = 0; i < sUnitNum; i++) {
printf("\t%f", answer[i]);
}
printf("\n");
cpuTest = dedx->CheckData(dedxAnswer, sUnitNum);
#ifdef USE_CUDA
/* GPU test */
......@@ -210,44 +164,33 @@ bool TestSoftmax2()
/* call Softmax function */
Softmax(xGPU, yGPU, 1);
/* check result */
printf("\nGPU Test:\n");
printf("Softmax Result:");
checkData = (DTYPE*)y->data;
for (int i = 0; i < sUnitNum; i++) {
printf("\t%f", checkData[i]);
}
printf("\n");
/* call SoftmaxBackward function */
SoftmaxBackward(gGPU, yGPU, xGPU, dedyGPU, dedxGPU, 1, CROSSENTROPY);
/* check result */
printf("Computer de/dx:");
checkData = (DTYPE*)dedxGPU->data;
int size = sUnitNum * dedxGPU->unitSize;
DTYPE * copy = new DTYPE[size];
XMemCopy(copy, -1, checkData, dedxGPU->devID, size);
for (int i = 0; i < sUnitNum; i++) {
printf("\t%f", copy[i]);
}
printf("\n");
printf("Real de/dx:");
for (int i = 0; i < sUnitNum; i++) {
printf("\t%f", answer[i]);
}
printf("\n");
gpuTest = dedxGPU->CheckData(dedxAnswer, sUnitNum);
/* destroy variables */
delete x, y, g, dedx, dedy;
delete xGPU, yGPU, gGPU, dedxGPU, dedyGPU;
delete x;
delete y;
delete g;
delete dedx;
delete dedy;
delete xGPU;
delete yGPU;
delete gGPU;
delete dedxGPU;
delete dedyGPU;
delete[] sDimSize;
return cpuTest && gpuTest;
#else
/* destroy variables */
delete x, y, g, dedx, dedy;
delete x;
delete y;
delete g;
delete dedx;
delete dedy;
delete[] sDimSize;
return cpuTest;
......@@ -260,10 +203,9 @@ bool TestSoftmax2()
*/
/* test for Softmax Function */
extern "C"
bool TestSoftmax()
{
XPRINT(0, stdout, "[TEST SOFTMAX] -------------\n");
XPRINT(0, stdout, "[TEST SOFTMAX] softmax function and its backward computation \n");
bool returnFlag = true, caseFlag = true;
/* case 1 test */
......
......@@ -19,15 +19,14 @@
* $Created by: LI Yinqiao (li.yin.qiao.2012@hotmail.com) 2018-04-30
*/
#include "../XTensor.h"
#include "../XDevice.h"
#include "../core/Sort.h"
#include "TSort.h"
namespace nts { // namespace nts(NiuTrans.Tensor)
/* case 1 */
/* case 1: sort the tensor along a given dimension */
bool TestSort1()
{
/* a tensor of size 2 * 4 */
/* a tensor of size (2, 4) */
int order = 2;
int * dimSize = new int[order];
dimSize[0] = 2;
......@@ -37,33 +36,25 @@ bool TestSort1()
for (int i = 0; i < order; i++)
unitNum *= dimSize[i];
DTYPE aData[2][4] = { { 0.0, 1.0, 2.0, 3.0 },
{ 4.0, 5.0, 6.0, 7.0 } };
DTYPE answer[2][4] = { { 4.0, 5.0, 6.0, 7.0 },
{ 0.0, 1.0, 2.0, 3.0 } };
DTYPE aData[2][4] = { {0.0F, 1.0F, 2.0F, 3.0F},
{4.0F, 5.0F, 6.0F, 7.0F} };
DTYPE answer[2][4] = { {4.0F, 5.0F, 6.0F, 7.0F},
{0.0F, 1.0F, 2.0F, 3.0F} };
/* CPU test */
bool cpuTest = true;
/* create tensors */
XTensor * a = NewTensor(order, dimSize);
XTensor * b = NewTensor(order, dimSize);
b->dataType = X_INT;
XTensor * b = NewTensor(order, dimSize, X_INT);
/* initialize variables */
a->SetData(aData, unitNum);
b->SetZeroAll();
/* call sort function */
/* call Sort function */
Sort(a, b, 0);
DTYPE* check1 = (DTYPE*)a->data;
for (int i = 0; i < 8; i++)
printf("%f ", *check1++);
printf("\n");
int* check2 = (int*)b->data;
for (int i = 0; i < 8; i++)
printf("%d ", *check2++);
printf("\n");
/* check results */
cpuTest = a->CheckData(answer, unitNum);
#ifdef USE_CUDA
......@@ -71,11 +62,12 @@ bool TestSort1()
bool gpuTest = true;
/* create tensor */
XTensor * aGPU = NewTensor(order, dimSize, X_FLOAT);
XTensor * bGPU = NewTensor(order, dimSize, X_FLOAT);
bGPU->dataType = X_INT;
XTensor * aGPU = NewTensor(order, dimSize, X_FLOAT, 1.0F, 0);
XTensor * bGPU = NewTensor(order, dimSize, X_INT, 1.0F, 0);
/* Initialize variables */
aGPU->SetData(aData, unitNum);
bGPU->SetZeroAll();
/* call sum function */
Sort(aGPU, bGPU, 0);
......@@ -84,21 +76,26 @@ bool TestSort1()
gpuTest = aGPU->CheckData(answer, unitNum);
/* destroy variables */
delete a, b, aGPU, bGPU;
delete a;
delete b;
delete aGPU;
delete bGPU;
delete[] dimSize;
return cpuTest && gpuTest;
#else
/* destroy variables */
delete a;
delete b;
delete[] dimSize;
return cpuTest;
#endif // USE_CUDA
}
bool TestSort2()
{
/* a tensor of size 2 * 4 */
/* a tensor of size (2, 4) */
int order = 2;
int * dimSize = new int[order];
dimSize[0] = 2;
......@@ -108,32 +105,24 @@ bool TestSort2()
for (int i = 0; i < order; i++)
unitNum *= dimSize[i];
DTYPE aData[2][4] = { { 0.0, 1.0, 2.0, 3.0 },
{ 4.0, 5.0, 6.0, 7.0 } };
DTYPE answer[2][4] = { { 3.0, 2.0, 1.0, 0.0 },
{ 7.0, 6.0, 5.0, 4.0 } };
DTYPE aData[2][4] = { {0.0F, 1.0F, 2.0F, 3.0F},
{4.0F, 5.0F, 6.0F, 7.0F} };
DTYPE answer[2][4] = { {3.0F, 2.0F, 1.0F, 0.0F},
{7.0F, 6.0F, 5.0F, 4.0F} };
/* CPU test */
bool cpuTest = true;
/* create tensors */
XTensor * a = NewTensor(order, dimSize);
XTensor * b = NewTensor(order, dimSize);
b->dataType = X_INT;
XTensor * b = NewTensor(order, dimSize, X_INT);
/* initialize variables */
a->SetData(aData, unitNum);
/* call sort function */
/* call Sort function */
Sort(a, b, 1);
DTYPE* check1 = (DTYPE*)a->data;
for (int i = 0; i < 8; i++)
printf("%f ", *check1++);
printf("\n");
int* check2 = (int*)b->data;
for (int i = 0; i < 8; i++)
printf("%d ", *check2++);
printf("\n");
/* check results */
cpuTest = a->CheckData(answer, unitNum);
......@@ -142,9 +131,9 @@ bool TestSort2()
bool gpuTest = true;
/* create tensor */
XTensor * aGPU = NewTensor(order, dimSize, X_FLOAT);
XTensor * bGPU = NewTensor(order, dimSize, X_FLOAT);
bGPU->dataType = X_INT;
XTensor * aGPU = NewTensor(order, dimSize, X_FLOAT, 1.0F, 0);
XTensor * bGPU = NewTensor(order, dimSize, X_INT, 1.0F, 0);
/* Initialize variables */
aGPU->SetData(aData, unitNum);
......@@ -155,27 +144,32 @@ bool TestSort2()
gpuTest = aGPU->CheckData(answer, unitNum);
/* destroy variables */
delete a, b, aGPU, bGPU;
delete a;
delete b;
delete aGPU;
delete bGPU;
delete[] dimSize;
return cpuTest && gpuTest;
#else
/* destroy variables */
delete a;
delete b;
delete[] dimSize;
return cpuTest;
#endif // USE_CUDA
}
/* other cases */
/*
TODO!!
*/
/* test for Sum Function */
extern "C"
/* test for Sort Function */
bool TestSort()
{
XPRINT(0, stdout, "[TEST SORT]\n");
XPRINT(0, stdout, "[TEST SORT] sort the tensor along a given dimension \n");
bool returnFlag = true, caseFlag = true;
/* case 1 test */
......@@ -195,6 +189,7 @@ bool TestSort()
}
else
XPRINT(0, stdout, ">> case 2 passed!\n");
/* other cases test */
/*
TODO!!
......
......@@ -22,13 +22,13 @@
#ifndef __TEST_SORT_H__
#define __TEST_SORT_H__
#include "../core/Sort.h"
#include "../core/sort/Sort.h"
namespace nts { // namespace nts(NiuTrans.Tensor)
/* test for Sum Function */
/* test for Sort Function */
extern "C"
bool TestSort();
} // namespace nts(NiuTrans.Tensor)
#endif // __TEST_SUM_H__
#endif // __TEST_SORT_H__
......@@ -19,18 +19,17 @@
* $Created by: Lin Ye (email: linye2015@outlook.com) 2018-06-13
*/
#include "../XTensor.h"
#include "../XDevice.h"
#include "../core/Split.h"
#include "../XList.h"
#include "TSplit.h"
namespace nts { // namespace nt(NiuTrans.Tensor)
/* case 1: transform a tensor by splitting it, e.g., (N, M) -> (N/3, M, 3)
* In this case, 4 * 3 -> 2 * 2 * 3, whereToSplit=0, splitNum=2.
/*
case 1: transform a tensor by splitting it, e.g., (N, M) -> (N/3, M, 3)
In this case, (4, 3) -> (2, 2, 3), whereToSplit=0, splitNum=2.
*/
bool TestSplit1()
{
/* a source tensor of size 4 * 3 */
/* a source tensor of size (4, 3) */
int sOrder = 2;
int * sDimSize = new int[sOrder];
sDimSize[0] = 4;
......@@ -40,7 +39,7 @@ bool TestSplit1()
for (int i = 0; i < sOrder; i++)
sUnitNum *= sDimSize[i];
/* a target tensor of size 2 * 2 * 3 */
/* a target tensor of size (2, 2, 3) */
int tOrder = 3;
int * tDimSize = new int[tOrder];
tDimSize[0] = 2;
......@@ -109,12 +108,13 @@ bool TestSplit1()
#endif // USE_CUDA
}
/* case 2: transform a tensor by splitting it, e.g., (N, M) -> (N/3, M, 3)
* In this case, 3 * 4 -> 2 * 3 * 2, whereToSplit=1, splitNum=2.
/*
case 2: transform a tensor by splitting it, e.g., (N, M) -> (N/3, M, 3)
In this case, (3, 4) -> (2, 3, 2), whereToSplit=1, splitNum=2.
*/
bool TestSplit2()
{
/* a source tensor of size 3 * 4 */
/* a source tensor of size (3, 4) */
int sOrder = 2;
int * sDimSize = new int[sOrder];
sDimSize[0] = 3;
......@@ -124,7 +124,7 @@ bool TestSplit2()
for (int i = 0; i < sOrder; i++)
sUnitNum *= sDimSize[i];
/* a target tensor of size 2 * 3 * 2 */
/* a target tensor of size (2, 3, 2) */
int tOrder = 3;
int * tDimSize = new int[tOrder];
tDimSize[0] = 2;
......@@ -194,8 +194,9 @@ bool TestSplit2()
#endif // USE_CUDA
}
/* case 3: split a big tensor into small tensors
* In this case, 3 * 4 -> 2 * (3 * 2) , whereToSplit=1, splitNum=2.
/*
case 3: split a big tensor into small tensors
In this case, (3, 4) -> 2 * (3, 2) , whereToSplit=1, splitNum=2.
*/
bool TestSplit3()
{
......@@ -203,7 +204,7 @@ bool TestSplit3()
XList tList;
tList = XList();
/* a source tensor of size (3 * 4) */
/* a source tensor of size (3, 4) */
int sOrder = 2;
int * sDimSize = new int[sOrder];
sDimSize[0] = 3;
......@@ -213,7 +214,7 @@ bool TestSplit3()
for (int i = 0; i < sOrder; i++)
sUnitNum *= sDimSize[i];
/* a target tensor of size (3 * 2) */
/* a target tensor of size (3, 2) */
int tOrder1 = 2;
int * tDimSize1 = new int[tOrder1];
tDimSize1[0] = 3;
......@@ -313,10 +314,9 @@ TODO!!
*/
/* test for Split Function */
extern "C"
bool TestSplit()
bool TestSplit()
{
XPRINT(0, stdout, "[TEST SPLIT] -------------\n");
XPRINT(0, stdout, "[TEST SPLIT] split a big tensor into small tensors \n");
bool returnFlag = true, caseFlag = true;
/* case 1 test */
......
......@@ -22,7 +22,7 @@
#ifndef __TEST_SPLIT_H__
#define __TEST_SPLIT_H__
#include "../core/Split.h"
#include "../core/shape/Split.h"
namespace nts { // namespace nts(NiuTrans.Tensor)
......
......@@ -19,15 +19,14 @@
* $Created by: LI Yinqiao (li.yin.qiao.2012@hotmail.com) 2018-04-30
*/
#include "../XTensor.h"
#include "../XDevice.h"
#include "../core/Sum.h"
#include "TSum.h"
namespace nts { // namespace nts(NiuTrans.Tensor)
/* case 1 */
/* case 1: tensor summation c = a + b * \beta */
bool TestSum1()
{
/* a tensor of size 2 * 4 */
/* a tensor of size (2, 4) */
int order = 2;
int * dimSize = new int[order];
dimSize[0] = 2;
......@@ -37,12 +36,12 @@ bool TestSum1()
for (int i = 0; i < order; i++)
unitNum *= dimSize[i];
DTYPE aData[2][4] = { {0.0, 1.0, 2.0, 3.0},
{4.0, 5.0, 6.0, 7.0} };
DTYPE bData[2][4] = { {1.0, -1.0, -3.0, -5.0},
{-7.0, -9.0, -11.0, -13.0} };
DTYPE answer[2][4] = { {1.0, 0.0, -1.0, -2.0},
{-3.0, -4.0, -5.0, -6.0} };
DTYPE aData[2][4] = { {0.0F, 1.0F, 2.0F, 3.0F},
{4.0F, 5.0F, 6.0F, 7.0F} };
DTYPE bData[2][4] = { {1.0F, -1.0F, -3.0F, -5.0F},
{-7.0F, -9.0F, -11.0F, -13.0F} };
DTYPE answer[2][4] = { {1.0F, 0.0F, -1.0F, -2.0F},
{-3.0F, -4.0F, -5.0F, -6.0F} };
/* CPU test */
bool cpuTest = true;
......@@ -80,22 +79,27 @@ bool TestSum1()
gpuTest = aGPU->CheckData(answer, unitNum);
/* destroy variables */
delete a, b, aGPU, bGPU;
delete a;
delete b;
delete aGPU;
delete bGPU;
delete[] dimSize;
return cpuTest && gpuTest;
#else
/* destroy variables */
delete a;
delete b;
delete[] dimSize;
return cpuTest;
#endif // USE_CUDA
}
/* case 2 */
/* case 2: tensor summation c = a + b * \beta */
bool TestSum2()
{
/* a tensor of size 2 * 4 */
/* a tensor of size (2, 4) */
int order = 2;
int * dimSize = new int[order];
dimSize[0] = 2;
......@@ -105,12 +109,12 @@ bool TestSum2()
for (int i = 0; i < order; i++) {
unitNum *= dimSize[i];
}
DTYPE aData[2][4] = { {0.0, 1.0, 2.0, 3.0},
{4.0, 5.0, 6.0, 7.0} };
DTYPE bData[2][4] = { {1.0, -1.0, -3.0, -5.0},
{-7.0, -9.0, -11.0, -13.0} };
DTYPE answer[2][4] = { {0.5, 0.5, 0.5, 0.5},
{0.5, 0.5, 0.5, 0.5} };
DTYPE aData[2][4] = { {0.0F, 1.0F, 2.0F, 3.0F},
{4.0F, 5.0F, 6.0F, 7.0F} };
DTYPE bData[2][4] = { {1.0F, -1.0F, -3.0F, -5.0F},
{-7.0F, -9.0F, -11.0F, -13.0F} };
DTYPE answer[2][4] = { {0.5F, 0.5F, 0.5F, 0.5F},
{0.5F, 0.5F, 0.5F, 0.5F} };
float beta = 0.5F;
/* CPU test */
......@@ -126,7 +130,7 @@ bool TestSum2()
b->SetData(bData, unitNum);
c->SetZeroAll();
/* call sum function */
/* call Sum function */
Sum(a, b, c, beta);
/* check results */
......@@ -146,15 +150,21 @@ bool TestSum2()
bGPU->SetData(bData, unitNum);
cGPU->SetZeroAll();
/* call sum function */
/* call Sum function */
Sum(aGPU, bGPU, cGPU, beta);
/* check results */
gpuTest = cGPU->CheckData(answer, unitNum);
/* destroy variables */
delete a, b, c, aGPU, bGPU, cGPU;
delete a;
delete b;
delete c;
delete aGPU;
delete bGPU;
delete cGPU;
delete[] dimSize;
return cpuTest && gpuTest;
#else
/* destroy variables */
......@@ -162,6 +172,7 @@ bool TestSum2()
delete b;
delete c;
delete[] dimSize;
return cpuTest;
#endif // USE_CUDA
}
......@@ -172,8 +183,7 @@ bool TestSum2()
*/
/* test for Sum Function */
extern "C"
bool TestSum()
bool TestSum()
{
XPRINT(0, stdout, "[TEST SUM] tensor summation c = a + b * beta\n");
bool returnFlag = true, caseFlag = true;
......
......@@ -22,7 +22,7 @@
#ifndef __TEST_SUM_H__
#define __TEST_SUM_H__
#include "../core/Sum.h"
#include "../core/arithmetic/Sum.h"
namespace nts { // namespace nts(NiuTrans.Tensor)
......
/* NiuTrans.Tensor - an open-source tensor library
* Copyright (C) 2017, Natural Language Processing Lab, Northestern University.
* All rights reserved.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
/*
* $Created by: Xu Chen (email: hello_master1954@163.com) 2018-07-06
*/
#include "TSumByColumnTV.h"
namespace nts { // namespace nts(NiuTrans.Tensor)
/*
case 1: test SumByColumnTV function
sum of a tensor and a vector (column vector) in a column by column manner
*/
bool TestSumByColumnTV1()
{
/* a tensor of size (2, 4) */
int aOrder = 2;
int * aDimSize = new int[aOrder];
aDimSize[0] = 2;
aDimSize[1] = 4;
int aUnitNum = 1;
for (int i = 0; i < aOrder; i++)
aUnitNum *= aDimSize[i];
/* a tensor of size (2, 1) */
int bOrder = 2;
int * bDimSize = new int[bOrder];
bDimSize[0] = 2;
bDimSize[1] = 1;
int bUnitNum = 1;
for (int i = 0; i < bOrder; i++)
bUnitNum *= bDimSize[i];
/* a tensor of size (2, 4) */
int cOrder = 2;
int * cDimSize = new int[cOrder];
cDimSize[0] = 2;
cDimSize[1] = 4;
int cUnitNum = 1;
for (int i = 0; i < cOrder; i++)
cUnitNum *= cDimSize[i];
DTYPE aData[2][4] = { {0.0F, 1.0F, 2.0F, 3.0F},
{4.0F, 5.0F, 6.0F, 7.0F} };
DTYPE bData[2][1] = { {1.0F},
{0.0F} };
DTYPE answer[2][4] = { {1.0F, 2.0F, 3.0F, 4.0F},
{4.0F, 5.0F, 6.0F, 7.0F} };
/* CPU test */
bool cpuTest = true;
/* create tensors */
XTensor * a = NewTensor(aOrder, aDimSize);
XTensor * b = NewTensor(bOrder, bDimSize);
XTensor * c = NewTensor(cOrder, cDimSize);
/* initialize variables */
a->SetData(aData, aUnitNum);
b->SetData(bData, bUnitNum);
/* call SumByColumnTV function */
SumByColumnTV(a, b, c);
/* check results */
cpuTest = c->CheckData(answer, cUnitNum);
#ifdef USE_CUDA
/* GPU test */
bool gpuTest = true;
/* create tensor */
XTensor * aGPU = NewTensor(aOrder, aDimSize, X_FLOAT, 1.0F, 0);
XTensor * bGPU = NewTensor(bOrder, bDimSize, X_FLOAT, 1.0F, 0);
XTensor * cGPU = NewTensor(cOrder, cDimSize, X_FLOAT, 1.0F, 0);
/* Initialize variables */
aGPU->SetData(aData, aUnitNum);
bGPU->SetData(bData, bUnitNum);
cGPU->SetZeroAll();
/* call SumByColumnTV function */
SumByColumnTV(aGPU, bGPU, cGPU);
/* check results */
gpuTest = cGPU->CheckData(answer, cUnitNum);
/* destroy variables */
delete a;
delete b;
delete c;
delete aGPU;
delete bGPU;
delete cGPU;
delete[] aDimSize;
delete[] bDimSize;
delete[] cDimSize;
return cpuTest && gpuTest;
#else
/* destroy variables */
delete a;
delete b;
delete c;
delete[] aDimSize;
delete[] bDimSize;
delete[] cDimSize;
return cpuTest;
#endif // USE_CUDA
}
/*
case 2: test SumByColumnTV function
sum of a tensor and a vector (column vector) in a column by column manner
*/
bool TestSumByColumnTV2()
{
/* a tensor of size (2, 4) */
int aOrder = 2;
int * aDimSize = new int[aOrder];
aDimSize[0] = 2;
aDimSize[1] = 4;
int aUnitNum = 1;
for (int i = 0; i < aOrder; i++)
aUnitNum *= aDimSize[i];
/* a tensor of size (2, 1) */
int bOrder = 2;
int * bDimSize = new int[bOrder];
bDimSize[0] = 2;
bDimSize[1] = 1;
int bUnitNum = 1;
for (int i = 0; i < bOrder; i++)
bUnitNum *= bDimSize[i];
DTYPE aData[2][4] = { {0.0F, 1.0F, 2.0F, 3.0F},
{4.0F, 5.0F, 6.0F, 7.0F} };
DTYPE bData[2][1] = { {1.0F},
{0.0F} };
DTYPE answer[2][4] = { {1.0F, 2.0F, 3.0F, 4.0F},
{4.0F, 5.0F, 6.0F, 7.0F} };
/* CPU test */
bool cpuTest = true;
/* create tensors */
XTensor * a = NewTensor(aOrder, aDimSize);
XTensor * b = NewTensor(bOrder, bDimSize);
/* initialize variables */
a->SetData(aData, aUnitNum);
b->SetData(bData, bUnitNum);
/* call SumByColumnTV function */
SumByColumnTV(a, b);
/* check results */
cpuTest = a->CheckData(answer, aUnitNum);
#ifdef USE_CUDA
/* GPU test */
bool gpuTest = true;
/* create tensor */
XTensor * aGPU = NewTensor(aOrder, aDimSize, X_FLOAT, 1.0F, 0);
XTensor * bGPU = NewTensor(bOrder, bDimSize, X_FLOAT, 1.0F, 0);
/* Initialize variables */
aGPU->SetData(aData, aUnitNum);
bGPU->SetData(bData, bUnitNum);
/* call SumByColumnTV function */
SumByColumnTV(aGPU, bGPU);
/* check results */
gpuTest = aGPU->CheckData(answer, aUnitNum);
/* destroy variables */
delete a;
delete b;
delete aGPU;
delete bGPU;
delete[] aDimSize;
delete[] bDimSize;
return cpuTest && gpuTest;
#else
/* destroy variables */
delete a;
delete b;
delete[] aDimSize;
delete[] bDimSize;
return cpuTest;
#endif // USE_CUDA
}
/* other cases */
/*
TODO!!
*/
/* test for SumByColumnTV Function */
bool TestSumByColumnTV()
{
XPRINT(0, stdout, "[TEST SumByColumnTV] sum of a tensor and a vector (column vector) in a column by column manner \n");
bool returnFlag = true, caseFlag = true;
/* case 1 test */
caseFlag = TestSumByColumnTV1();
if (!caseFlag) {
returnFlag = false;
XPRINT(0, stdout, ">> case 1 failed!\n");
}
else
XPRINT(0, stdout, ">> case 1 passed!\n");
/* case 2 test */
caseFlag = TestSumByColumnTV2();
if (!caseFlag) {
returnFlag = false;
XPRINT(0, stdout, ">> case 2 failed!\n");
}
else
XPRINT(0, stdout, ">> case 2 passed!\n");
/* other cases test */
/*
TODO!!
*/
if (returnFlag) {
XPRINT(0, stdout, ">> All Passed!\n");
}
else
XPRINT(0, stdout, ">> Failed!\n");
XPRINT(0, stdout, "\n");
return returnFlag;
}
} // namespace nts(NiuTrans.Tensor)
/* NiuTrans.Tensor - an open-source tensor library
* Copyright (C) 2017, Natural Language Processing Lab, Northestern University.
* All rights reserved.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
/*
* $Created by: Xu Chen (email: hello_master1954@163.com) 2018-07-06
*/
#ifndef __TEST_SUMBYCOLUMNTV_H__
#define __TEST_SUMBYCOLUMNTV_H__
#include "../core/arithmetic/SumByColumnTV.h"
namespace nts { // namespace nts(NiuTrans.Tensor)
/* test for SumByColumnTV Function */
extern "C"
bool TestSumByColumnTV();
} // namespace nts(NiuTrans.Tensor)
#endif // __TEST_SUMBYCOLUMNTV_H__
/* NiuTrans.Tensor - an open-source tensor library
* Copyright (C) 2017, Natural Language Processing Lab, Northestern University.
* All rights reserved.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
/*
* $Created by: Xu Chen (email: hello_master1954@163.com) 2018-07-06
*/
#include "TSumByColumnVT.h"
namespace nts { // namespace nts(NiuTrans.Tensor)
/*
case 1: test SumByColumnVT function
sum of a vector (column vector) and a tensor in a column by column manner
*/
bool TestSumByColumnVT1()
{
/* a tensor of size (2, 1) */
int aOrder = 2;
int * aDimSize = new int[aOrder];
aDimSize[0] = 2;
aDimSize[1] = 1;
int aUnitNum = 1;
for (int i = 0; i < aOrder; i++)
aUnitNum *= aDimSize[i];
/* a tensor of size (2, 4) */
int bOrder = 2;
int * bDimSize = new int[bOrder];
bDimSize[0] = 2;
bDimSize[1] = 4;
int bUnitNum = 1;
for (int i = 0; i < bOrder; i++)
bUnitNum *= bDimSize[i];
/* a tensor of size (2, 1) */
int cOrder = 2;
int * cDimSize = new int[cOrder];
cDimSize[0] = 2;
cDimSize[1] = 1;
int cUnitNum = 1;
for (int i = 0; i < cOrder; i++)
cUnitNum *= cDimSize[i];
DTYPE aData[2][1] = { {1.0F},
{0.0F} };
DTYPE bData[2][4] = { {0.0F, 1.0F, 2.0F, 3.0F},
{4.0F, 5.0F, 6.0F, 7.0F} };
DTYPE answer[2][1] = { {7.0F},
{22.0F} };
/* CPU test */
bool cpuTest = true;
/* create tensors */
XTensor * a = NewTensor(aOrder, aDimSize);
XTensor * b = NewTensor(bOrder, bDimSize);
XTensor * c = NewTensor(cOrder, cDimSize);
/* initialize variables */
a->SetData(aData, aUnitNum);
b->SetData(bData, bUnitNum);
c->SetZeroAll();
/* call SumByColumnVT function */
SumByColumnVT(a, b, c);
/* check results */
cpuTest = c->CheckData(answer, cUnitNum);
#ifdef USE_CUDA
/* GPU test */
bool gpuTest = true;
/* create tensor */
XTensor * aGPU = NewTensor(aOrder, aDimSize, X_FLOAT, 1.0F, 0);
XTensor * bGPU = NewTensor(bOrder, bDimSize, X_FLOAT, 1.0F, 0);
XTensor * cGPU = NewTensor(cOrder, cDimSize, X_FLOAT, 1.0F, 0);
/* Initialize variables */
aGPU->SetData(aData, aUnitNum);
bGPU->SetData(bData, bUnitNum);
cGPU->SetZeroAll();
/* call SumByColumnVT function */
SumByColumnVT(aGPU, bGPU, cGPU);
/* check results */
gpuTest = cGPU->CheckData(answer, cUnitNum);
/* destroy variables */
delete a;
delete b;
delete c;
delete aGPU;
delete bGPU;
delete cGPU;
delete[] aDimSize;
delete[] bDimSize;
delete[] cDimSize;
return cpuTest && gpuTest;
#else
/* destroy variables */
delete a;
delete b;
delete c;
delete[] aDimSize;
delete[] bDimSize;
delete[] cDimSize;
return cpuTest;
#endif // USE_CUDA
}
/*
case 2: test SumByColumnVT function
sum of a vector (column vector) and a tensor in a column by column manner
*/
bool TestSumByColumnVT2()
{
/* a tensor of size (2, 1) */
int aOrder = 2;
int * aDimSize = new int[aOrder];
aDimSize[0] = 2;
aDimSize[1] = 1;
int aUnitNum = 1;
for (int i = 0; i < aOrder; i++)
aUnitNum *= aDimSize[i];
/* a tensor of size (2, 4) */
int bOrder = 2;
int * bDimSize = new int[bOrder];
bDimSize[0] = 2;
bDimSize[1] = 4;
int bUnitNum = 1;
for (int i = 0; i < bOrder; i++)
bUnitNum *= bDimSize[i];
DTYPE aData[2][1] = { {1.0F},
{0.0F} };
DTYPE bData[2][4] = { {0.0F, 1.0F, 2.0F, 3.0F},
{4.0F, 5.0F, 6.0F, 7.0F} };
DTYPE answer[2][1] = { {7.0F},
{22.0F} };
/* CPU test */
bool cpuTest = true;
/* create tensors */
XTensor * a = NewTensor(aOrder, aDimSize);
XTensor * b = NewTensor(bOrder, bDimSize);
/* initialize variables */
a->SetData(aData, aUnitNum);
b->SetData(bData, bUnitNum);
/* call SumByColumnVT function */
SumByColumnVT(a, b);
/* check results */
cpuTest = a->CheckData(answer, aUnitNum);
#ifdef USE_CUDA
/* GPU test */
bool gpuTest = true;
/* create tensor */
XTensor * aGPU = NewTensor(aOrder, aDimSize, X_FLOAT, 1.0F, 0);
XTensor * bGPU = NewTensor(bOrder, bDimSize, X_FLOAT, 1.0F, 0);
/* Initialize variables */
aGPU->SetData(aData, aUnitNum);
bGPU->SetData(bData, bUnitNum);
/* call SumByColumnVT function */
SumByColumnVT(aGPU, bGPU);
/* check results */
gpuTest = aGPU->CheckData(answer, aUnitNum);
/* destroy variables */
delete a;
delete b;
delete aGPU;
delete bGPU;
delete[] aDimSize;
delete[] bDimSize;
return cpuTest && gpuTest;
#else
/* destroy variables */
delete a;
delete b;
delete[] aDimSize;
delete[] bDimSize;
return cpuTest;
#endif // USE_CUDA
}
/* other cases */
/*
TODO!!
*/
/* test for SumByColumnVT Function */
bool TestSumByColumnVT()
{
XPRINT(0, stdout, "[TEST SumByColumnVT] sum of a vector (column vector) and a tensor in a column by column manner \n");
bool returnFlag = true, caseFlag = true;
/* case 1 test */
caseFlag = TestSumByColumnVT1();
if (!caseFlag) {
returnFlag = false;
XPRINT(0, stdout, ">> case 1 failed!\n");
}
else
XPRINT(0, stdout, ">> case 1 passed!\n");
/* case 2 test */
caseFlag = TestSumByColumnVT2();
if (!caseFlag) {
returnFlag = false;
XPRINT(0, stdout, ">> case 2 failed!\n");
}
else
XPRINT(0, stdout, ">> case 2 passed!\n");
/* other cases test */
/*
TODO!!
*/
if (returnFlag) {
XPRINT(0, stdout, ">> All Passed!\n");
}
else
XPRINT(0, stdout, ">> Failed!\n");
XPRINT(0, stdout, "\n");
return returnFlag;
}
} // namespace nts(NiuTrans.Tensor)
/* NiuTrans.Tensor - an open-source tensor library
* Copyright (C) 2017, Natural Language Processing Lab, Northestern University.
* All rights reserved.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
/*
* $Created by: Xu Chen (email: hello_master1954@163.com) 2018-07-06
*/
#ifndef __TEST_SUMBYCOLUMNVT_H__
#define __TEST_SUMBYCOLUMNVT_H__
#include "../core/arithmetic/SumByColumnVT.h"
namespace nts { // namespace nts(NiuTrans.Tensor)
/* test for SumByColumnVT Function */
extern "C"
bool TestSumByColumnVT();
} // namespace nts(NiuTrans.Tensor)
#endif // __TEST_SUMBYCOLUMNVT_H__
/* NiuTrans.Tensor - an open-source tensor library
* Copyright (C) 2017, Natural Language Processing Lab, Northestern University.
* All rights reserved.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
/*
* $Created by: Xu Chen (email: hello_master1954@163.com) 2018-06-27
*/
#include "TTopK.h"
namespace nts { // namespace nts(NiuTrans.Tensor)
/*
case 1: get the top-k items along a given dimension.
In this case,
(2, 4) -> (2, 4), dim = 0, k = 2
(2, 4) -> (2, 4), dim = 1, k = 4
*/
bool TestTopK1()
{
/* a input tensor of size (2, 4) */
int sOrder = 2;
int * sDimSize = new int[sOrder];
sDimSize[0] = 2;
sDimSize[1] = 4;
int sUnitNum = 1;
for (int i = 0; i < sOrder; i++)
sUnitNum *= sDimSize[i];
/* a output tensor of size (2, 4) */
int tOrder = 2;
int * tDimSize = new int[tOrder];
tDimSize[0] = 2;
tDimSize[1] = 4;
int tUnitNum = 1;
for (int i = 0; i < tOrder; i++)
tUnitNum *= tDimSize[i];
DTYPE sData[2][4] = { {5.0F, 1.0F, 2.0F, 8.0F},
{4.0F, 3.0F, 7.0F, 6.0F} };
DTYPE tAnswer1[2][4] = { {5.0F, 3.0F, 7.0F, 8.0F},
{4.0F, 1.0F, 2.0F, 6.0F} };
int indexAnswer1[2][4] = { {0, 1, 1, 0},
{1, 0, 0, 1} };
DTYPE tAnswer2[2][4] = { {8.0F, 5.0F, 2.0F, 1.0F},
{7.0F, 6.0F, 4.0F, 3.0F} };
int indexAnswer2[2][4] = { {3, 0, 2, 1},
{2, 3, 0, 1} };
/* CPU test */
bool cpuTest = true;
/* create tensors */
XTensor * s = NewTensor(sOrder, sDimSize);
XTensor * t1 = NewTensor(tOrder, tDimSize);
XTensor * t2 = NewTensor(tOrder, tDimSize);
XTensor * index1 = NewTensor(tOrder, tDimSize, X_INT);
XTensor * index2 = NewTensor(tOrder, tDimSize, X_INT);
/* initialize variables */
s->SetData(sData, sUnitNum);
t1->SetZeroAll();
t2->SetZeroAll();
index1->SetZeroAll();
index2->SetZeroAll();
/* call TopK function */
int dim = 0;
int k = sDimSize[dim];
TopK(s, t1, index1, dim, k);
dim = 1;
k = sDimSize[dim];
TopK(s, t2, index2, dim, k);
/* check results */
cpuTest = t1->CheckData(tAnswer1, tUnitNum) &&
t2->CheckData(tAnswer2, tUnitNum) &&
index1->CheckData(indexAnswer1, tUnitNum) &&
index2->CheckData(indexAnswer2, tUnitNum);
#ifdef USE_CUDA
/* GPU test */
bool gpuTest = true;
/* create tensors */
XTensor * sGPU = NewTensor(sOrder, sDimSize, X_FLOAT, 1.0F, 0);
XTensor * tGPU1 = NewTensor(tOrder, tDimSize, X_FLOAT, 1.0F, 0);
XTensor * tGPU2 = NewTensor(tOrder, tDimSize, X_FLOAT, 1.0F, 0);
XTensor * indexGPU1 = NewTensor(tOrder, tDimSize, X_INT, 1.0F, 0);
XTensor * indexGPU2 = NewTensor(tOrder, tDimSize, X_INT, 1.0F, 0);
/* initialize variables */
sGPU->SetData(sData, sUnitNum);
tGPU1->SetZeroAll();
tGPU2->SetZeroAll();
indexGPU1->SetZeroAll();
indexGPU2->SetZeroAll();
/* call TopK function */
dim = 0;
k = sDimSize[dim];
TopK(sGPU, tGPU1, indexGPU1, dim, k);
dim = 1;
k = sDimSize[dim];
TopK(sGPU, tGPU2, indexGPU2, dim, k);
/* check results */
gpuTest = tGPU1->CheckData(tAnswer1, tUnitNum) &&
tGPU2->CheckData(tAnswer2, tUnitNum) &&
indexGPU1->CheckData(indexAnswer1, tUnitNum) &&
indexGPU2->CheckData(indexAnswer2, tUnitNum);
/* destroy variables */
delete s;
delete t1;
delete t2;
delete index1;
delete index2;
delete sGPU;
delete tGPU1;
delete tGPU2;
delete indexGPU1;
delete indexGPU2;
delete[] sDimSize;
delete[] tDimSize;
return cpuTest && gpuTest;
#else
/* destroy variables */
delete s;
delete t1;
delete t2;
delete index1;
delete index2;
delete[] sDimSize;
delete[] tDimSize;
return cpuTest;
#endif // USE_CUDA
}
/*
case 2: get the top-k items along a given dimension.
In this case, (2, 4) -> (2, 2), dim = 1, k = 2.
*/
bool TestTopK2()
{
/* a input tensor of size (2, 4) */
int sOrder = 2;
int * sDimSize = new int[sOrder];
sDimSize[0] = 2;
sDimSize[1] = 4;
int sUnitNum = 1;
for (int i = 0; i < sOrder; i++)
sUnitNum *= sDimSize[i];
/* a output tensor of size (2, 2) */
int tOrder = 2;
int * tDimSize = new int[tOrder];
tDimSize[0] = 2;
tDimSize[1] = 2;
int tUnitNum = 1;
for (int i = 0; i < tOrder; i++)
tUnitNum *= tDimSize[i];
DTYPE sData[2][4] = { {5.0F, 1.0F, 2.0F, 8.0F},
{4.0F, 3.0F, 7.0F, 6.0F} };
DTYPE tAnswer[2][2] = { {8.0F, 5.0F},
{7.0F, 6.0F} };
int indexAnswer[2][2] = { {3, 0},
{2, 3} };
/* CPU test */
bool cpuTest = true;
/* create tensors */
XTensor * s = NewTensor(sOrder, sDimSize);
XTensor * t = NewTensor(tOrder, tDimSize);
XTensor * index = NewTensor(tOrder, tDimSize, X_INT);
/* initialize variables */
s->SetData(sData, sUnitNum);
t->SetZeroAll();
index->SetZeroAll();
/* call TopK function */
int dim = 1;
int k = tDimSize[dim];
TopK(s, t, index, dim, k);
/* check results */
cpuTest = t->CheckData(tAnswer, tUnitNum) && index->CheckData(indexAnswer, tUnitNum);
#ifdef USE_CUDA
/* GPU test */
bool gpuTest = true;
/* create tensors */
XTensor * sGPU = NewTensor(sOrder, sDimSize, X_FLOAT, 1.0F, 0);
XTensor * tGPU = NewTensor(tOrder, tDimSize, X_FLOAT, 1.0F, 0);
XTensor * indexGPU = NewTensor(tOrder, tDimSize, X_INT, 1.0F, 0);
/* initialize variables */
sGPU->SetData(sData, sUnitNum);
tGPU->SetZeroAll();
indexGPU->SetZeroAll();
/* call TopK function */
dim = 1;
k = tDimSize[dim];
TopK(sGPU, tGPU, indexGPU, dim, k);
/* check results */
gpuTest = tGPU->CheckData(tAnswer, tUnitNum) && indexGPU->CheckData(indexAnswer, tUnitNum);
/* destroy variables */
delete s;
delete t;
delete index;
delete sGPU;
delete tGPU;
delete indexGPU;
delete[] sDimSize;
delete[] tDimSize;
return cpuTest && gpuTest;
#else
/* destroy variables */
delete s;
delete t;
delete index;
delete[] sDimSize;
delete[] tDimSize;
return cpuTest;
#endif // USE_CUDA
}
/* other cases */
/*
TODO!!
*/
/* test for TopK Function */
bool TestTopK()
{
XPRINT(0, stdout, "[TEST TopK] get the top-k items along a given dimension\n");
bool returnFlag = true, caseFlag = true;
/* case 1 test */
caseFlag = TestTopK1();
if (!caseFlag) {
returnFlag = false;
XPRINT(0, stdout, ">> case 1 failed!\n");
}
else
XPRINT(0, stdout, ">> case 1 passed!\n");
/* case 2 test */
caseFlag = TestTopK2();
if (!caseFlag) {
returnFlag = false;
XPRINT(0, stdout, ">> case 2 failed!\n");
}
else
XPRINT(0, stdout, ">> case 2 passed!\n");
/* other cases test */
/*
TODO!!
*/
if (returnFlag) {
XPRINT(0, stdout, ">> All Passed!\n");
}
else
XPRINT(0, stdout, ">> Failed!\n");
XPRINT(0, stdout, "\n");
return returnFlag;
}
} // namespace nts(NiuTrans.Tensor)
/* NiuTrans.Tensor - an open-source tensor library
* Copyright (C) 2017, Natural Language Processing Lab, Northestern University.
* All rights reserved.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
/*
* $Created by: Xu Chen (email: hello_master1954@163.com) 2018-06-27
*/
#ifndef __TEST_TOPK_H__
#define __TEST_TOPK_H__
#include "../core/sort/TopK.h"
namespace nts { // namespace nts(NiuTrans.Tensor)
/* test for TopK Function */
extern "C"
bool TestTopK();
} // namespace nts(NiuTrans.Tensor)
#endif // __TEST_TOPK_H__
/* NiuTrans.Tensor - an open-source tensor library
* Copyright (C) 2017, Natural Language Processing Lab, Northestern University.
* All rights reserved.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
/*
* $Created by: Xu Chen (email: hello_master1954@163.com) 2018-06-13
*/
#include "../XList.h"
#include "TUnsqueeze.h"
namespace nts { // namespace nts(NiuTrans.Tensor)
/*
case 1: insert a dimension by copying the blocks for x times (where x is the size of the inerted dimension)
In this case,
(2, 3) -> (2, 2, 3), dim=1, dSize=2
(2, 3) -> (2, 3, 2), dim=2, dSize=2
*/
bool TestUnsqueeze1()
{
/* a source tensor of size (2, 3) */
int sOrder = 2;
int * sDimSize = new int[sOrder];
sDimSize[0] = 2;
sDimSize[1] = 3;
int sUnitNum = 1;
for (int i = 0; i < sOrder; i++)
sUnitNum *= sDimSize[i];
/* a target tensor of size (2, 2, 3) */
int tOrder1 = 3;
int * tDimSize1 = new int[tOrder1];
tDimSize1[0] = 2;
tDimSize1[1] = 2;
tDimSize1[2] = 3;
int tUnitNum1 = 1;
for (int i = 0; i < tOrder1; i++)
tUnitNum1 *= tDimSize1[i];
/* a target tensor of size (2, 3, 2) */
int tOrder2 = 3;
int * tDimSize2 = new int[tOrder2];
tDimSize2[0] = 2;
tDimSize2[1] = 3;
tDimSize2[2] = 2;
int tUnitNum2 = 1;
for (int i = 0; i < tOrder2; i++)
tUnitNum2 *= tDimSize2[i];
DTYPE sData[2][3] = { {0.0F, 1.0F, 2.0F},
{3.0F, 4.0F, 5.0F} };
DTYPE answer1[2][2][3] = { { {0.0F, 1.0F, 2.0F},
{0.0F, 1.0F, 2.0F} },
{ {3.0F, 4.0F, 5.0F},
{3.0F, 4.0F, 5.0F} } };
DTYPE answer2[2][3][2] = { { {0.0F, 0.0F},
{1.0F, 1.0F},
{2.0F, 2.0F} },
{ {3.0F, 3.0F},
{4.0F, 4.0F},
{5.0F, 5.0F} } };
/* CPU test */
bool cpuTest = true;
/* create tensors */
XTensor * s = NewTensor(sOrder, sDimSize);
XTensor * t1 = NewTensor(tOrder1, tDimSize1);
XTensor * t2 = NewTensor(tOrder2, tDimSize2);
/* initialize variables */
s->SetData(sData, sUnitNum);
t1->SetZeroAll();
t2->SetZeroAll();
/* call Unsqueeze function */
Unsqueeze(s, t1, 1, 2);
Unsqueeze(s, t2, 2, 2);
/* check results */
cpuTest = t1->CheckData(answer1, tUnitNum1) && t2->CheckData(answer2, tUnitNum2);
#ifdef USE_CUDA
/* GPU test */
bool gpuTest = true;
/* create tensor */
XTensor * sGPU = NewTensor(sOrder, sDimSize, X_FLOAT, 1.0F, 0);
XTensor * tGPU1 = NewTensor(tOrder1, tDimSize1, X_FLOAT, 1.0F, 0);
XTensor * tGPU2 = NewTensor(tOrder2, tDimSize2, X_FLOAT, 1.0F, 0);
/* Initialize variables */
sGPU->SetData(sData, sUnitNum);
tGPU1->SetZeroAll();
tGPU2->SetZeroAll();
/* call Unsqueeze function */
Unsqueeze(sGPU, tGPU1, 1, 2);
Unsqueeze(sGPU, tGPU2, 2, 2);
/* check results */
gpuTest = tGPU1->CheckData(answer1, tUnitNum1) && tGPU2->CheckData(answer2, tUnitNum2);
/* destroy variables */
delete s;
delete t1;
delete t2;
delete sGPU;
delete tGPU1;
delete tGPU2;
delete[] sDimSize;
delete[] tDimSize1;
delete[] tDimSize2;
return cpuTest && gpuTest;
#else
/* destroy variables */
delete s;
delete t1;
delete t2;
delete[] sDimSize;
delete[] tDimSize1;
delete[] tDimSize2;
return cpuTest;
#endif // USE_CUDA
}
/* other cases */
/*
TODO!!
*/
/* test for Unsqueeze Function */
bool TestUnsqueeze()
{
XPRINT(0, stdout, "[TEST Unsqueeze] insert a dimension by copying the blocks for x times\n");
bool returnFlag = true, caseFlag = true;
/* case 1 test */
caseFlag = TestUnsqueeze1();
if (!caseFlag) {
returnFlag = false;
XPRINT(0, stdout, ">> case 1 failed!\n");
}
else
XPRINT(0, stdout, ">> case 1 passed!\n");
/* other cases test */
/*
TODO!!
*/
if (returnFlag) {
XPRINT(0, stdout, ">> All Passed!\n");
}
else
XPRINT(0, stdout, ">> Failed!\n");
XPRINT(0, stdout, "\n");
return returnFlag;
}
} // namespace nts(NiuTrans.Tensor)
/* NiuTrans.Tensor - an open-source tensor library
* Copyright (C) 2017, Natural Language Processing Lab, Northestern University.
* All rights reserved.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
/*
* $Created by: Xu Chen (email: hello_master1954@163.com) 2018-06-27
*/
#ifndef __TEST_UNSQUEEZE_H__
#define __TEST_UNSQUEEZE_H__
#include "../core/shape/Unsqueeze.h"
namespace nts { // namespace nts(NiuTrans.Tensor)
/* test for Unsqueeze Function */
extern "C"
bool TestUnsqueeze();
} // namespace nts(NiuTrans.Tensor)
#endif // __TEST_UNSQUEEZE_H__
......@@ -19,14 +19,13 @@
* $Created by: XIAO Tong (xiaotong@mail.neu.edu.cn) 2018-6-24
*/
#include "TXMem.h"
#include "../XGlobal.h"
#include "../XUtility.h"
#include "../XMem.h"
#include "TXMem.h"
/* the nts (NiuTrans.Tensor) namespace */
namespace nts{
namespace nts{ // namespace nts(NiuTrans.Tensor)
/* case 1: test memory pool class */
bool TestXMemCase1()
{
bool ok = true;
......@@ -83,6 +82,7 @@ bool TestXMemCase1()
return ok;
}
/* test for memory pool class */
bool TestXMem()
{
XPRINT(0, stdout, "[Test] Memory pool ... Began\n");
......@@ -93,11 +93,18 @@ bool TestXMem()
/* case 1 test */
caseFlag = TestXMemCase1();
if (!caseFlag) { returnFlag = false; XPRINT(0, stdout, ">> case 1 failed!\n"); }
else {XPRINT(0, stdout, ">> case 1 passed!\n");}
if (!caseFlag) {
returnFlag = false;
XPRINT(0, stdout, ">> case 1 failed!\n");
}
else
XPRINT(0, stdout, ">> case 1 passed!\n");
if (returnFlag) { XPRINT(0, stdout, ">> All Passed!\n"); }
else { XPRINT(0, stdout, ">> Failed!\n"); }
if (returnFlag) {
XPRINT(0, stdout, ">> All Passed!\n");
}
else
XPRINT(0, stdout, ">> Failed!\n");
double endT = GetClock();
......@@ -106,4 +113,4 @@ bool TestXMem()
return returnFlag;
}
} /* end of the nts (NiuTrans.Tensor) namespace */
} // namespace nts(NiuTrans.Tensor)
\ No newline at end of file
......@@ -22,13 +22,13 @@
#ifndef __TXMEM_H__
#define __TXMEM_H__
/* the nts (NiuTrans.Tensor) namespace */
namespace nts{
#include "../XMem.h"
namespace nts{ // namespace nts(NiuTrans.Tensor)
/* test for memory pool class */
extern "C"
bool TestXMem();
} /* end of the nts (NiuTrans.Tensor) namespace */
#endif
} // namespace nts(NiuTrans.Tensor)
#endif // __TXMEM_H__
......@@ -31,26 +31,42 @@ bool Test()
wrong = !TestConcatenate() || wrong;
wrong = !TestConcatenateSolely() || wrong;
//wrong = !TestCopyIndexed() || wrong;
wrong = !TestCopyValues() || wrong;
wrong = !TestMatrixMul() || wrong;
wrong = !TestMatrixMul2D() || wrong;
wrong = !TestMatrixMul2DParallel() || wrong;
//wrong = !TestMatrixMulBatched() || wrong;
wrong = !TestMatrixMulBatchedCPU() || wrong;
wrong = !TestMerge() || wrong;
wrong = !TestMultiply() || wrong;
wrong = !TestNegate() || wrong;
wrong = !TestNormalize() || wrong;
//wrong = !TestPower() || wrong;
wrong = !TestPower() || wrong;
wrong = !TestReduceMax() || wrong;
wrong = !TestReduceMean() || wrong;
wrong = !TestReduceSum() || wrong;
wrong = !TestReduceSumSquared() || wrong;
wrong = !TestReduceVariance() || wrong;
wrong = !TestScaleAndShift() || wrong;
wrong = !TestSelect() || wrong;
wrong = !TestSetAscendingOrder() || wrong;
wrong = !TestSetData() || wrong;
wrong = !TestSort() || wrong;
wrong = !TestSplit() || wrong;
wrong = !TestSum() || wrong;
wrong = !TestSumByColumnTV || wrong;
//wrong = !TestSumByColumnVT() || wrong;
wrong = !TestTopK() || wrong;
wrong = !TestUnsqueeze() || wrong;
wrong = !TestXMem() || wrong;
//wrong = !TestHardTanH() || wrong;
//wrong = !TestIdentity() || wrong;
//wrong = !TestLogSoftmax() || wrong;
//wrong = !TestLoss() || wrong;
//wrong = !TestRectify() || wrong;
wrong = !TestSigmoid() || wrong;
//wrong = !TestSigmoid() || wrong;
//wrong = !TestSoftmax() || wrong;
/* other test */
......
......@@ -24,8 +24,12 @@
#include "TConcatenate.h"
#include "TConcatenateSolely.h"
#include "TCopyIndexed.h"
#include "TCopyValues.h"
#include "TMatrixMul.h"
#include "TMatrixMul2D.h"
#include "TMatrixMul2DParallel.h"
#include "TMatrixMulBatched.h"
#include "TMatrixMULBatchedCPU.h"
#include "TMerge.h"
#include "TMultiply.h"
......@@ -35,12 +39,24 @@
#include "TReduceMax.h"
#include "TReduceMean.h"
#include "TReduceSum.h"
#include "TReduceSumSquared.h"
#include "TReduceVariance.h"
#include "TScaleAndShift.h"
#include "TSelect.h"
#include "TSetAscendingOrder.h"
#include "TSetData.h"
#include "TSort.h"
#include "TSplit.h"
#include "TSum.h"
#include "TSumByColumnTV.h"
#include "TSumByColumnVT.h"
#include "TTopK.h"
#include "TUnsqueeze.h"
#include "TXMem.h"
#include "THardTanH.h"
#include "TIdentity.h"
#include "TLogSoftmax.h"
#include "TLoss.h"
#include "TRectify.h"
#include "TSigmoid.h"
......
Markdown 格式
0%
您添加了 0 到此讨论。请谨慎行事。
请先完成此评论的编辑!
注册 或者 后发表评论