Commit abeb3e64 by liyinqiao

merged

parents dcabc2b0 414ff54f
...@@ -38,7 +38,7 @@ ...@@ -38,7 +38,7 @@
#include "XMem.h" #include "XMem.h"
#include "XHeap.h" #include "XHeap.h"
#include "XBLAS.h" #include "XBLAS.h"
#include "core/MergeBlockLists.h" #include "core/shape/MergeBlockLists.h"
#ifdef USE_CUDA #ifdef USE_CUDA
...@@ -47,8 +47,8 @@ ...@@ -47,8 +47,8 @@
#include <cublas_v2.h> #include <cublas_v2.h>
#include <cuda.h> #include <cuda.h>
#include <curand.h> #include <curand.h>
#include "core/FlushToMem.cuh" #include "core/utilities/FlushToMem.cuh"
#include "core/SetAscendingOrder.cuh" #include "core/utilities/SetAscendingOrder.cuh"
#endif #endif
...@@ -555,6 +555,27 @@ bool XTensor::CheckData(const void * d, int num, int beg) ...@@ -555,6 +555,27 @@ bool XTensor::CheckData(const void * d, int num, int beg)
return true; return true;
} }
bool XTensor::CheckData(const void * d, int num, float tolerance, int beg)
{
if (data == NULL || d == NULL)
return false;
CheckNTErrors(!isSparse, "TODO");
CheckNTErrors(num == unitNum - beg, "Illegal size!");
DTYPE * valuePrt = (DTYPE*)data;
DTYPE value = 0;
DTYPE * answerPrt = (DTYPE*)d;
for (int i = beg; i < num; i++) {
value = ToCPU(devID, valuePrt);
if (fabs(value - *answerPrt) > tolerance)
return false;
valuePrt++;
answerPrt++;
}
return true;
}
/* /*
set the cell to the ascending order along a given dimension set the cell to the ascending order along a given dimension
>> dim - the dimension specified >> dim - the dimension specified
...@@ -697,6 +718,63 @@ DTYPE XTensor::Get3D(int d0, int d1, int d2) ...@@ -697,6 +718,63 @@ DTYPE XTensor::Get3D(int d0, int d1, int d2)
} }
/* /*
get the value of a cell in a 1d tensor in int type
>> i - index
<< return - value of cell(i) in int
*/
int XTensor::Get1DInt(int i)
{
CheckNTErrors((order == 1), "Cannot get a 2d cell for a tensor whose order is not 2!");
CheckNTErrors((i >= 0 && i < dimSize[0]), "dimension 0 is out of range!");
CheckNTErrors((dataType == X_INT), "The tensor is not in int type.");
int dimSize[1] = {i};
void * value = GetCell(dimSize, 1);
return ToCPUInt(devID, value);
}
/*
get the value of a cell in a 2d tensor in int type
>> ni - row index
>> mi - column index
<< return - value of cell(ni, mi) in int
*/
int XTensor::Get2DInt(int ni, int mi)
{
CheckNTErrors((order == 2), "Cannot get a 2d cell for a tensor whose order is not 2!");
CheckNTErrors((ni >= 0 && ni < dimSize[0]), "dimension 0 is out of range!");
CheckNTErrors((mi >= 0 && mi < dimSize[1]), "dimension 1 is out of range!");
CheckNTErrors((dataType == X_INT), "The tensor is not in default type.");
int dims[2] = {ni, mi};
void * value = GetCell(dims, 2);
return ToCPUInt(devID, value);
}
/*
get the value of a cell in a 3d tensor in int type
>> d0 - index of dimension 0
>> d1 - index of dimension 1
>> d2 - index of dimension 2
<< return - value of cell(d0, d1, d2) in int
*/
int XTensor::Get3DInt(int d0, int d1, int d2)
{
CheckNTErrors((order == 3), "Cannot get a 2d cell for a tensor whose order is not 2!");
CheckNTErrors((d0 >= 0 && d0 < dimSize[0]), "dimension 0 is out of range!");
CheckNTErrors((d1 >= 0 && d1 < dimSize[1]), "dimension 1 is out of range!");
CheckNTErrors((d2 >= 0 && d2 < dimSize[2]), "dimension 2 is out of range!");
CheckNTErrors((dataType == X_INT), "The tensor is not in default type.");
int dims[3] = {d0, d1, d2};
void * value = GetCell(dims, 3);
return ToCPUInt(devID, value);
}
/*
get the value of a cell in the sparse tensor get the value of a cell in the sparse tensor
>> i - i-th tuple in the tuple list of the sparse tensor >> i - i-th tuple in the tuple list of the sparse tensor
<< return - value of the tuple << return - value of the tuple
......
...@@ -211,6 +211,9 @@ struct XTensor ...@@ -211,6 +211,9 @@ struct XTensor
/* check whether the data array is the same as the answer */ /* check whether the data array is the same as the answer */
bool CheckData(const void * answer, int num, int beg = 0); bool CheckData(const void * answer, int num, int beg = 0);
/* check whether the data array is the same as the answer */
bool CheckData(const void * answer, int num, float tolerance, int beg = 0);
/* set the cell to the ascending order along a given dimension */ /* set the cell to the ascending order along a given dimension */
void SetAscendingOrder(int dim); void SetAscendingOrder(int dim);
...@@ -220,15 +223,24 @@ struct XTensor ...@@ -220,15 +223,24 @@ struct XTensor
/* get the pointer to a cell */ /* get the pointer to a cell */
void * GetCell(int index[], int size = -1); void * GetCell(int index[], int size = -1);
/* get the value of a cell in a 1d tensor */ /* get the default type value of a cell in a 1d tensor */
DTYPE Get1D(int i); DTYPE Get1D(int i);
/* get the value of a cell in a 2d tensor */ /* get the default type value of a cell in a 2d tensor */
DTYPE Get2D(int ni, int mi); DTYPE Get2D(int ni, int mi);
/* get the value of a cell in a 3d tensor */ /* get the default type value of a cell in a 3d tensor */
DTYPE Get3D(int d0, int d1, int d2); DTYPE Get3D(int d0, int d1, int d2);
/* get the int value of a cell in a 1d tensor */
int Get1DInt(int i);
/* get the int value of a cell in a 2d tensor */
int Get2DInt(int ni, int mi);
/* get the int value of a cell in a 3d tensor */
int Get3DInt(int d0, int d1, int d2);
/* get the value of a cell in a sparse tensor */ /* get the value of a cell in a sparse tensor */
DTYPE GetInSparse(int i); DTYPE GetInSparse(int i);
......
...@@ -26,43 +26,49 @@ ...@@ -26,43 +26,49 @@
#include "../XTensor.h" #include "../XTensor.h"
#include "Concatenate.h" #include "shape/Concatenate.h"
#include "ConcatenateSolely.h" #include "shape/ConcatenateSolely.h"
#include "CopyIndexed.h" #include "movement/CopyBlocks.h"
#include "CopyInGrid.h" #include "movement/CopyBlocksInGrid.h"
#include "CopyValues.h" #include "movement/CopyBlocksOnSite.h"
#include "FlushToMem.h" #include "movement/CopyData2D.h"
#include "MakeMergeBlockIndex.h" #include "movement/CopyIndexed.h"
#include "MakeSplitBlockIndex.h" #include "movement/CopyInGrid.h"
#include "MatrixMul.h" #include "movement/CopyValues.h"
#include "MatrixMul2D.h" #include "utilities/FlushToMem.h"
#include "MatrixMul2DMultiTheading.h" #include "shape/MakeMergeBlockIndex.h"
#include "MatrixMul2DParallel.h" #include "shape/MakeSplitBlockIndex.h"
#include "MatrixMulBatched.h" #include "arithmetic/MatrixMul.h"
#include "MatrixMULBatchedCPU.h" #include "arithmetic/MatrixMul2D.h"
#include "Merge.h" #include "arithmetic/MatrixMul2DMultiTheading.h"
#include "MergeBlockLists.h" #include "arithmetic/MatrixMul2DParallel.h"
#include "Multiply.h" #include "arithmetic/MatrixMulBatched.h"
#include "Negate.h" #include "arithmetic/MatrixMULBatchedCPU.h"
#include "Normalize.h" #include "shape/Merge.h"
#include "Permute.h" #include "shape/MergeBlockLists.h"
#include "Power.h" #include "arithmetic/Multiply.h"
#include "ReduceMax.h" #include "arithmetic/Negate.h"
#include "ReduceMean.h" #include "math/Normalize.h"
#include "ReduceStandardVariance.h" #include "shape/Permute.h"
#include "ReduceSum.h" #include "math/Power.h"
#include "ReduceSumSquared.h" #include "reduce/ReduceMax.h"
#include "ReduceVariance.h" #include "reduce/ReduceMean.h"
#include "ScaleAndShift.h" #include "reduce/ReduceStandardVariance.h"
#include "SetData.h" #include "reduce/ReduceSum.h"
#include "Sort.h" #include "reduce/ReduceSumSquared.h"
#include "Split.h" #include "reduce/ReduceVariance.h"
#include "Sum.h" #include "math/ScaleAndShift.h"
#include "SumByColumnTV.h" #include "getandset/Select.h"
#include "SumByColumnVT.h" #include "getandset/SetData.h"
#include "TopK.h" #include "sort/Sort.h"
#include "Unsqueeze.h" #include "shape/Split.h"
#include "XMatrixSegment.h" #include "arithmetic/Sum.h"
#include "XTensorBLAS.h" #include "arithmetic/SumByColumnTV.h"
#include "arithmetic/SumByColumnVT.h"
#include "sort/TopK.h"
#include "shape/Transpose.h"
#include "shape/Unsqueeze.h"
#include "utilities/XMatrixSegment.h"
#include "arithmetic/XTensorBLAS.h"
#endif // __CHEADER_H__ #endif // __CHEADER_H__
\ No newline at end of file
...@@ -219,9 +219,8 @@ public: ...@@ -219,9 +219,8 @@ public:
/* insert a dimension by copying the blocks for x times (where x is the size of the inerted dimension) */ /* insert a dimension by copying the blocks for x times (where x is the size of the inerted dimension) */
void Unsqueeze(XTensor * a, XTensor * b, int dim, int dSize); void Unsqueeze(XTensor * a, XTensor * b, int dim, int dSize);
/******************************************************************* /* segmentation and parallel processing for 2d tensors (i.e., matrices) */
segmentation and parallel processing for 2d tensors (i.e., matrices)
*/
/* segment a 2d tensor (i.e., matrix) into blocks and run jobs in parallel */ /* segment a 2d tensor (i.e., matrix) into blocks and run jobs in parallel */
static static
void RunParallel2D(XPRunner * parallelRunner, void * job, int opNum, int rowNum, int colNum, int argNum, ...); void RunParallel2D(XPRunner * parallelRunner, void * job, int opNum, int rowNum, int colNum, int argNum, ...);
......
...@@ -19,7 +19,7 @@ ...@@ -19,7 +19,7 @@
* $Created by: XIAO Tong (email: xiaotong@mail.neu.edu.cn) 2018-04-24 * $Created by: XIAO Tong (email: xiaotong@mail.neu.edu.cn) 2018-04-24
*/ */
#include "../XTensor.h" #include "../../XTensor.h"
#include "MatrixMULBatchedCPU.h" #include "MatrixMULBatchedCPU.h"
#include "MatrixMul2D.h" #include "MatrixMul2D.h"
#include "XTensorBLAS.h" #include "XTensorBLAS.h"
...@@ -33,9 +33,9 @@ c_i = trans(a_i) * trans(b_i) * \alpha + c_i * \beta for each i in [0,count-1] ...@@ -33,9 +33,9 @@ c_i = trans(a_i) * trans(b_i) * \alpha + c_i * \beta for each i in [0,count-1]
>> transposedA - indicate whether the matrix a is transposed >> transposedA - indicate whether the matrix a is transposed
>> b - another list of input matrices (2d tensors) >> b - another list of input matrices (2d tensors)
>> transposedB - indicate whether the matrix b is transposed >> transposedB - indicate whether the matrix b is transposed
>> c - output matrix (2d tensor)
>> alpha - scalar >> alpha - scalar
>> beta - scalar >> beta - scalar
>> c - output matrix (2d tensor)
*/ */
void MatrixMULBatchedCPU(XList * a, MATRIX_TRANS_TYPE transposedA, void MatrixMULBatchedCPU(XList * a, MATRIX_TRANS_TYPE transposedA,
XList * b, MATRIX_TRANS_TYPE transposedB, XList * b, MATRIX_TRANS_TYPE transposedB,
...@@ -64,10 +64,6 @@ void MatrixMULBatchedCPU(XList * a, MATRIX_TRANS_TYPE transposedA, ...@@ -64,10 +64,6 @@ void MatrixMULBatchedCPU(XList * a, MATRIX_TRANS_TYPE transposedA,
} }
} }
//if(isUniform){
//}
//else{
for (int i = 0; i < a->count; i++) { for (int i = 0; i < a->count; i++) {
XTensor * ai = (XTensor*)a->GetItem(i); XTensor * ai = (XTensor*)a->GetItem(i);
XTensor * bi = (XTensor*)b->GetItem(i); XTensor * bi = (XTensor*)b->GetItem(i);
......
...@@ -22,7 +22,7 @@ ...@@ -22,7 +22,7 @@
#ifndef __MATRIXMULBATCHEDCPU_H__ #ifndef __MATRIXMULBATCHEDCPU_H__
#define __MATRIXMULBATCHEDCPU_H__ #define __MATRIXMULBATCHEDCPU_H__
#include "../XTensor.h" #include "../../XTensor.h"
namespace nts { // namespace nts(NiuTrans.Tensor) namespace nts { // namespace nts(NiuTrans.Tensor)
......
...@@ -19,9 +19,9 @@ ...@@ -19,9 +19,9 @@
* $Created by: XIAO Tong (email: xiaotong@mail.neu.edu.cn) 2018-04-24 * $Created by: XIAO Tong (email: xiaotong@mail.neu.edu.cn) 2018-04-24
*/ */
#include "../XTensor.h" #include "../../XTensor.h"
#include "../XDevice.h" #include "../../XDevice.h"
#include "../XName.h" #include "../../XName.h"
#include "MatrixMul.h" #include "MatrixMul.h"
#include "MatrixMul2D.h" #include "MatrixMul2D.h"
#include "MatrixMULBatchedCPU.h" #include "MatrixMULBatchedCPU.h"
...@@ -65,13 +65,12 @@ void MatrixMul(XTensor * a, MATRIX_TRANS_TYPE transposedA, ...@@ -65,13 +65,12 @@ void MatrixMul(XTensor * a, MATRIX_TRANS_TYPE transposedA,
XLink::AddParamToHeadInt(c, transposedB); XLink::AddParamToHeadInt(c, transposedB);
XLink::AddParamToHead(c, alpha); XLink::AddParamToHead(c, alpha);
XLink::AddParamToHead(c, beta); XLink::AddParamToHead(c, beta);
int an = transposedA == X_TRANS ? a->dimSizeRDI[0] : a->dimSizeRDI[1];
int an = transposedA == X_TRANS ? a->dimSize[1] : a->dimSize[0]; int am = transposedA == X_TRANS ? a->dimSizeRDI[1] : a->dimSizeRDI[0];
int am = transposedA == X_TRANS ? a->dimSize[0] : a->dimSize[1]; int bn = transposedB == X_TRANS ? b->dimSizeRDI[0] : b->dimSizeRDI[1];
int bn = transposedB == X_TRANS ? b->dimSize[1] : b->dimSize[0]; int bm = transposedB == X_TRANS ? b->dimSizeRDI[1] : b->dimSizeRDI[0];
int bm = transposedB == X_TRANS ? b->dimSize[0] : b->dimSize[1]; int cn = c->dimSizeRDI[1];
int cn = c->dimSize[0]; int cm = c->dimSizeRDI[0];
int cm = c->dimSize[1];
CheckNTErrors((am == bn && an == cn && bm == cm), CheckNTErrors((am == bn && an == cn && bm == cm),
"Unmatched tensors in multiplication!"); "Unmatched tensors in multiplication!");
...@@ -87,13 +86,13 @@ void MatrixMul(XTensor * a, MATRIX_TRANS_TYPE transposedA, ...@@ -87,13 +86,13 @@ void MatrixMul(XTensor * a, MATRIX_TRANS_TYPE transposedA,
int cBlockNum = 1; int cBlockNum = 1;
for (int i = 2; i < a->order; i++) { for (int i = 2; i < a->order; i++) {
CheckNTErrors((a->dimSizeRDI[i] == c->dimSizeRDI[i]), "Incorrect tensor sizes!"); CheckNTErrors((a->dimSizeRDI[i] == c->dimSizeRDI[i - 2 + b->order]), "Incorrect tensor sizes!");
aBlockNum *= a->dimSizeRDI[i]; aBlockNum *= a->dimSizeRDI[i];
cBlockNum *= a->dimSizeRDI[i]; cBlockNum *= a->dimSizeRDI[i];
} }
for (int i = 2; i < b->order; i++) { for (int i = 2; i < b->order; i++) {
CheckNTErrors((b->dimSizeRDI[i] == c->dimSizeRDI[i - 2 + a->order]), "Incorrect tensor sizes!"); CheckNTErrors((b->dimSizeRDI[i] == c->dimSizeRDI[i]), "Incorrect tensor sizes!");
bBlockNum *= b->dimSizeRDI[i]; bBlockNum *= b->dimSizeRDI[i];
cBlockNum *= b->dimSizeRDI[i]; cBlockNum *= b->dimSizeRDI[i];
} }
...@@ -101,9 +100,9 @@ void MatrixMul(XTensor * a, MATRIX_TRANS_TYPE transposedA, ...@@ -101,9 +100,9 @@ void MatrixMul(XTensor * a, MATRIX_TRANS_TYPE transposedA,
XList * aList = new XList(10); XList * aList = new XList(10);
XList * bList = new XList(10); XList * bList = new XList(10);
XList * cList = new XList(10); XList * cList = new XList(10);
int aDimSize[2] = { -a->dimSize[0], a->dimSize[1] }; int aDimSize[2] = { a->dimSizeRDI[1], a->dimSizeRDI[0] };
int bDimSize[2] = { -b->dimSize[0], b->dimSize[1] }; int bDimSize[2] = { b->dimSizeRDI[1], b->dimSizeRDI[0] };
int cDimSize[2] = { -c->dimSize[0], c->dimSize[1] }; int cDimSize[2] = { c->dimSizeRDI[1], c->dimSizeRDI[0] };
bool isSparseMul = false; bool isSparseMul = false;
......
...@@ -22,7 +22,7 @@ ...@@ -22,7 +22,7 @@
#ifndef __MATRIXMUL_H__ #ifndef __MATRIXMUL_H__
#define __MATRIXMUL_H__ #define __MATRIXMUL_H__
#include "../XTensor.h" #include "../../XTensor.h"
namespace nts { // namespace nts(NiuTrans.Tensor) namespace nts { // namespace nts(NiuTrans.Tensor)
......
...@@ -19,8 +19,8 @@ ...@@ -19,8 +19,8 @@
* $Created by: XIAO Tong (email: xiaotong@mail.neu.edu.cn) 2018-04-24 * $Created by: XIAO Tong (email: xiaotong@mail.neu.edu.cn) 2018-04-24
*/ */
#include "../XTensor.h" #include "../../XTensor.h"
#include "../XName.h" #include "../../XName.h"
#include "MatrixMul2D.h" #include "MatrixMul2D.h"
#include "MatrixMul2D.cuh" #include "MatrixMul2D.cuh"
#include "MatrixMul2DParallel.h" #include "MatrixMul2DParallel.h"
......
...@@ -19,8 +19,8 @@ ...@@ -19,8 +19,8 @@
* $Created by: XIAO Tong (email: xiaotong@mail.neu.edu.cn) 2018-04-24 * $Created by: XIAO Tong (email: xiaotong@mail.neu.edu.cn) 2018-04-24
*/ */
#include "../XDevice.h" #include "../../XDevice.h"
#include "../XTensor.h" #include "../../XTensor.h"
#include "MatrixMul2D.h" #include "MatrixMul2D.h"
#include "MatrixMul2D.cuh" #include "MatrixMul2D.cuh"
#include "XTensorBLAS.h" #include "XTensorBLAS.h"
...@@ -37,11 +37,13 @@ c = a * b * \alpha ...@@ -37,11 +37,13 @@ c = a * b * \alpha
>> aColSize - column size of matrix a >> aColSize - column size of matrix a
>> aRowSize - row size of matrix a >> aRowSize - row size of matrix a
>> b - a sparse matrix >> b - a sparse matrix
>> transposedA - indicates whether b is transposed >> transposedB - indicates whether b is transposed
>> bNonZeroNum - number of non-zero items in b >> bNonZeroNum - number of non-zero items in b
>> bColSize - column size of matrix b >> bColSize - column size of matrix b
>> bRowSize - row size of matrix b >> bRowSize - row size of matrix b
>> c - the resulting (dense) matrix >> c - the resulting (dense) matrix
>> cColSize - column size of matrix c
>> cRowSize - row size of matrix c
>> alpha - the scaling factor >> alpha - the scaling factor
*/ */
extern "C" __global__ extern "C" __global__
...@@ -147,7 +149,6 @@ void CudaMatrixMul2D(XTensor * a, MATRIX_TRANS_TYPE transposedA, ...@@ -147,7 +149,6 @@ void CudaMatrixMul2D(XTensor * a, MATRIX_TRANS_TYPE transposedA,
if (!a->isSparse && !b->isSparse) { if (!a->isSparse && !b->isSparse) {
CheckNTErrors((!c->isSparse), "Illegal use of sparse matrix in multiplication!"); CheckNTErrors((!c->isSparse), "Illegal use of sparse matrix in multiplication!");
//cublasHandle_t * handle = GDevs->GetCudaHandle(a->devID);
cublasHandle_t * handle = a->mem == NULL ? GDevs.GetCudaHandle(a->devID) : a->mem->GetCublasHandle(); cublasHandle_t * handle = a->mem == NULL ? GDevs.GetCudaHandle(a->devID) : a->mem->GetCublasHandle();
/* !!!! might have problems */ /* !!!! might have problems */
...@@ -183,7 +184,6 @@ void CudaMatrixMul2D(XTensor * a, MATRIX_TRANS_TYPE transposedA, ...@@ -183,7 +184,6 @@ void CudaMatrixMul2D(XTensor * a, MATRIX_TRANS_TYPE transposedA,
if (beta == 0) if (beta == 0)
c->SetZeroAll(); c->SetZeroAll();
else if (beta != 1.0F) { else if (beta != 1.0F) {
//XTensor::ScaleAndShift(c, beta, 0);
ShowNTErrors("TODO!"); ShowNTErrors("TODO!");
} }
......
...@@ -22,7 +22,7 @@ ...@@ -22,7 +22,7 @@
#ifndef __MATRIXMUL2D_H__ #ifndef __MATRIXMUL2D_H__
#define __MATRIXMUL2D_H__ #define __MATRIXMUL2D_H__
#include "../XTensor.h" #include "../../XTensor.h"
namespace nts { // namespace nts(NiuTrans.Tensor) namespace nts { // namespace nts(NiuTrans.Tensor)
......
...@@ -19,7 +19,7 @@ ...@@ -19,7 +19,7 @@
* $Created by: XIAO Tong (email: xiaotong@mail.neu.edu.cn) 2018-04-24 * $Created by: XIAO Tong (email: xiaotong@mail.neu.edu.cn) 2018-04-24
*/ */
#include "../XTensor.h" #include "../../XTensor.h"
#include "MatrixMul2DMultiTheading.h" #include "MatrixMul2DMultiTheading.h"
namespace nts { // namespace nts(NiuTrans.Tensor) namespace nts { // namespace nts(NiuTrans.Tensor)
......
...@@ -22,7 +22,7 @@ ...@@ -22,7 +22,7 @@
#ifndef __MATRIXMUL2DMULTITHEADING_H__ #ifndef __MATRIXMUL2DMULTITHEADING_H__
#define __MATRIXMUL2DMULTITHEADING_H__ #define __MATRIXMUL2DMULTITHEADING_H__
#include "../XTensor.h" #include "../../XTensor.h"
namespace nts { // namespace nts(NiuTrans.Tensor) namespace nts { // namespace nts(NiuTrans.Tensor)
......
...@@ -19,10 +19,10 @@ ...@@ -19,10 +19,10 @@
* $Created by: XIAO Tong (email: xiaotong@mail.neu.edu.cn) 2018-04-24 * $Created by: XIAO Tong (email: xiaotong@mail.neu.edu.cn) 2018-04-24
*/ */
#include "../XTensor.h" #include "../../XTensor.h"
#include "MatrixMul2DParallel.h" #include "MatrixMul2DParallel.h"
#include "MatrixMul2DMultiTheading.h" #include "MatrixMul2DMultiTheading.h"
#include "XMatrixSegment.h" #include "../utilities/XMatrixSegment.h"
namespace nts { // namespace nts(NiuTrans.Tensor) namespace nts { // namespace nts(NiuTrans.Tensor)
......
...@@ -22,7 +22,7 @@ ...@@ -22,7 +22,7 @@
#ifndef __MATRIXMUL2DPARALLEL_H__ #ifndef __MATRIXMUL2DPARALLEL_H__
#define __MATRIXMUL2DPARALLEL_H__ #define __MATRIXMUL2DPARALLEL_H__
#include "../XTensor.h" #include "../../XTensor.h"
namespace nts { // namespace nts(NiuTrans.Tensor) namespace nts { // namespace nts(NiuTrans.Tensor)
......
...@@ -19,9 +19,9 @@ ...@@ -19,9 +19,9 @@
* $Created by: XIAO Tong (email: xiaotong@mail.neu.edu.cn) 2018-04-24 * $Created by: XIAO Tong (email: xiaotong@mail.neu.edu.cn) 2018-04-24
*/ */
#include "../XTensor.h" #include "../../XTensor.h"
#include "../XDevice.h" #include "../../XDevice.h"
#include "../XName.h" #include "../../XName.h"
#include "MatrixMulBatched.h" #include "MatrixMulBatched.h"
#include "MatrixMULBatchedCPU.h" #include "MatrixMULBatchedCPU.h"
#include "XTensorBLAS.h" #include "XTensorBLAS.h"
...@@ -41,6 +41,7 @@ where trans() returns the transposed matrix if the flag is fired ...@@ -41,6 +41,7 @@ where trans() returns the transposed matrix if the flag is fired
>> c - where we keep a*b >> c - where we keep a*b
>> alpha - a coefficient >> alpha - a coefficient
>> beta - another coefficient >> beta - another coefficient
>> parallelRunner - parallel processing module
*/ */
void MatrixMulBatched(XTensor * a, MATRIX_TRANS_TYPE transposedA, void MatrixMulBatched(XTensor * a, MATRIX_TRANS_TYPE transposedA,
XTensor * b, MATRIX_TRANS_TYPE transposedB, XTensor * b, MATRIX_TRANS_TYPE transposedB,
...@@ -59,13 +60,12 @@ void MatrixMulBatched(XTensor * a, MATRIX_TRANS_TYPE transposedA, ...@@ -59,13 +60,12 @@ void MatrixMulBatched(XTensor * a, MATRIX_TRANS_TYPE transposedA,
XLink::AddParamToHeadInt(c, transposedB); XLink::AddParamToHeadInt(c, transposedB);
XLink::AddParamToHead(c, alpha); XLink::AddParamToHead(c, alpha);
XLink::AddParamToHead(c, beta); XLink::AddParamToHead(c, beta);
int an = transposedA == X_TRANS ? a->dimSizeRDI[0] : a->dimSizeRDI[1];
int an = transposedA == X_TRANS ? a->dimSize[1] : a->dimSize[0]; int am = transposedA == X_TRANS ? a->dimSizeRDI[1] : a->dimSizeRDI[0];
int am = transposedA == X_TRANS ? a->dimSize[0] : a->dimSize[1]; int bn = transposedB == X_TRANS ? b->dimSizeRDI[0] : b->dimSizeRDI[1];
int bn = transposedB == X_TRANS ? b->dimSize[1] : b->dimSize[0]; int bm = transposedB == X_TRANS ? b->dimSizeRDI[1] : b->dimSizeRDI[0];
int bm = transposedB == X_TRANS ? b->dimSize[0] : b->dimSize[1]; int cn = c->dimSizeRDI[1];
int cn = c->dimSize[0]; int cm = c->dimSizeRDI[0];
int cm = c->dimSize[1];
CheckNTErrors((am == bn && an == cn && bm == cm), CheckNTErrors((am == bn && an == cn && bm == cm),
"Unmatched tensors in multiplication!"); "Unmatched tensors in multiplication!");
...@@ -87,9 +87,9 @@ void MatrixMulBatched(XTensor * a, MATRIX_TRANS_TYPE transposedA, ...@@ -87,9 +87,9 @@ void MatrixMulBatched(XTensor * a, MATRIX_TRANS_TYPE transposedA,
XList * aList = new XList(10); XList * aList = new XList(10);
XList * bList = new XList(10); XList * bList = new XList(10);
XList * cList = new XList(10); XList * cList = new XList(10);
int aDimSize[2] = { -a->dimSizeRDI[0], a->dimSizeRDI[1] }; int aDimSize[2] = { -a->dimSizeRDI[1], a->dimSizeRDI[0] };
int bDimSize[2] = { -b->dimSizeRDI[0], b->dimSizeRDI[1] }; int bDimSize[2] = { -b->dimSizeRDI[1], b->dimSizeRDI[0] };
int cDimSize[2] = { -c->dimSizeRDI[0], c->dimSizeRDI[1] }; int cDimSize[2] = { -c->dimSizeRDI[1], c->dimSizeRDI[0] };
for (int p = 0; p < blockNum; p++) { for (int p = 0; p < blockNum; p++) {
void * ap = (char*)a->data + aRealBlockSize * p; void * ap = (char*)a->data + aRealBlockSize * p;
...@@ -114,7 +114,8 @@ void MatrixMulBatched(XTensor * a, MATRIX_TRANS_TYPE transposedA, ...@@ -114,7 +114,8 @@ void MatrixMulBatched(XTensor * a, MATRIX_TRANS_TYPE transposedA,
int devIDBackup; int devIDBackup;
ProtectCudaDev(a->devID, devIDBackup); ProtectCudaDev(a->devID, devIDBackup);
CudaBLASMatrixMULList(a->mem != NULL ? a->mem->GetCublasHandle() : GDevs.GetCudaHandle(a->devID), cublasHandle_t * handle = a->mem != NULL ? a->mem->GetCublasHandle() : GDevs.GetCudaHandle(a->devID);
CudaBLASMatrixMULList(handle,
aList, transposedA, aList, transposedA,
bList, transposedB, bList, transposedB,
cList, aList->count, cList, aList->count,
......
...@@ -22,7 +22,7 @@ ...@@ -22,7 +22,7 @@
#ifndef __MATRIXMULBATCHED_H__ #ifndef __MATRIXMULBATCHED_H__
#define __MATRIXMULBATCHED_H__ #define __MATRIXMULBATCHED_H__
#include "../XTensor.h" #include "../../XTensor.h"
namespace nts { // namespace nts(NiuTrans.Tensor) namespace nts { // namespace nts(NiuTrans.Tensor)
......
...@@ -19,12 +19,13 @@ ...@@ -19,12 +19,13 @@
* $Created by: XIAO Tong (email: xiaotong@mail.neu.edu.cn) 2018-04-24 * $Created by: XIAO Tong (email: xiaotong@mail.neu.edu.cn) 2018-04-24
*/ */
#include "../XTensor.h" #include "../../XTensor.h"
#include "../XName.h" #include "../../XName.h"
#include "Multiply.h" #include "Multiply.h"
#include "Multiply.cuh" #include "Multiply.cuh"
namespace nts { // namespace nts(NiuTrans.Tensor) namespace nts { // namespace nts(NiuTrans.Tensor)
/* /*
element-wise product of two tensors element-wise product of two tensors
c(i) = a(i)*b(i) + \alpha * c(i) c(i) = a(i)*b(i) + \alpha * c(i)
......
...@@ -19,8 +19,8 @@ ...@@ -19,8 +19,8 @@
* $Created by: XIAO Tong (email: xiaotong@mail.neu.edu.cn) 2018-04-24 * $Created by: XIAO Tong (email: xiaotong@mail.neu.edu.cn) 2018-04-24
*/ */
#include "../XDevice.h" #include "../../XDevice.h"
#include "../XTensor.h" #include "../../XTensor.h"
#include "Multiply.h" #include "Multiply.h"
#include "Multiply.cuh" #include "Multiply.cuh"
...@@ -68,6 +68,7 @@ where |a_lead| means the size of the leading dimension of a ...@@ -68,6 +68,7 @@ where |a_lead| means the size of the leading dimension of a
>> a - tensor a >> a - tensor a
>> b - tensor b >> b - tensor b
>> c - result tensor >> c - result tensor
>> alpha - the coefficient
>> stride - the number of items we go over when move next along the leading dimension in a block >> stride - the number of items we go over when move next along the leading dimension in a block
>> ldSizeA - size of the leading dimension of a >> ldSizeA - size of the leading dimension of a
>> ldSizeB - size of the leading dimension of b >> ldSizeB - size of the leading dimension of b
......
...@@ -22,7 +22,7 @@ ...@@ -22,7 +22,7 @@
#ifndef __MULTIPLY_H__ #ifndef __MULTIPLY_H__
#define __MULTIPLY_H__ #define __MULTIPLY_H__
#include "../XTensor.h" #include "../../XTensor.h"
namespace nts { // namespace nts(NiuTrans.Tensor) namespace nts { // namespace nts(NiuTrans.Tensor)
......
...@@ -19,15 +19,15 @@ ...@@ -19,15 +19,15 @@
* $Created by: XIAO Tong (email: xiaotong@mail.neu.edu.cn) 2018-04-24 * $Created by: XIAO Tong (email: xiaotong@mail.neu.edu.cn) 2018-04-24
*/ */
#include "../XTensor.h" #include "../../XTensor.h"
#include "Negate.h" #include "Negate.h"
#include "Negate.cuh" #include "Negate.cuh"
namespace nts { // namespace nts(NiuTrans.Tensor) namespace nts { // namespace nts(NiuTrans.Tensor)
/* /*
set every entry to its minus value set every entry to its minus value
>> a - the tensor we are processing >> a - the tensor we are processing
*/ */
void Negate(XTensor * a) void Negate(XTensor * a)
{ {
......
...@@ -19,8 +19,8 @@ ...@@ -19,8 +19,8 @@
* $Created by: XIAO Tong (email: xiaotong@mail.neu.edu.cn) 2018-04-24 * $Created by: XIAO Tong (email: xiaotong@mail.neu.edu.cn) 2018-04-24
*/ */
#include "../XDevice.h" #include "../../XDevice.h"
#include "../XTensor.h" #include "../../XTensor.h"
#include "Negate.h" #include "Negate.h"
#include "Negate.cuh" #include "Negate.cuh"
...@@ -42,10 +42,10 @@ void KernelNegate(DTYPE * d, int size) ...@@ -42,10 +42,10 @@ void KernelNegate(DTYPE * d, int size)
} }
/* /*
set each entry to its negtive value (CUDA Kernel) set each entry to its negtive value (CUDA Kernel)
This is for float16 computation This is for float16 computation
>> d - pointer to the data array >> d - pointer to the data array
>> size - size of the data array >> size - size of the data array
*/ */
__global__ __global__
void KernelNegate(__half * d, int size) void KernelNegate(__half * d, int size)
......
...@@ -22,7 +22,7 @@ ...@@ -22,7 +22,7 @@
#ifndef __NEGATE_H__ #ifndef __NEGATE_H__
#define __NEGATE_H__ #define __NEGATE_H__
#include "../XTensor.h" #include "../../XTensor.h"
namespace nts { // namespace nts(NiuTrans.Tensor) namespace nts { // namespace nts(NiuTrans.Tensor)
......
...@@ -19,8 +19,8 @@ ...@@ -19,8 +19,8 @@
* $Created by: XIAO Tong (email: xiaotong@mail.neu.edu.cn) 2018-04-24 * $Created by: XIAO Tong (email: xiaotong@mail.neu.edu.cn) 2018-04-24
*/ */
#include "../XTensor.h" #include "../../XTensor.h"
#include "../XName.h" #include "../../XName.h"
#include "Sum.h" #include "Sum.h"
#include "Sum.cuh" #include "Sum.cuh"
......
...@@ -19,12 +19,13 @@ ...@@ -19,12 +19,13 @@
* $Created by: XIAO Tong (email: xiaotong@mail.neu.edu.cn) 2018-04-24 * $Created by: XIAO Tong (email: xiaotong@mail.neu.edu.cn) 2018-04-24
*/ */
#include "../XDevice.h" #include "../../XDevice.h"
#include "Sum.cuh" #include "Sum.cuh"
namespace nts { // namespace nts(NiuTrans.Tensor) namespace nts { // namespace nts(NiuTrans.Tensor)
#ifdef USE_CUDA #ifdef USE_CUDA
/* /*
summation of data arrays (CUDA Kernel) summation of data arrays (CUDA Kernel)
c = a + b * \beta c = a + b * \beta
......
...@@ -28,7 +28,7 @@ namespace nts { // namespace nts(NiuTrans.Tensor) ...@@ -28,7 +28,7 @@ namespace nts { // namespace nts(NiuTrans.Tensor)
#ifdef USE_CUDA #ifdef USE_CUDA
/* summation of data arrays (CUDA Kernel) */ /* summation of data arrays (CUDA Kernel) */
extern "C" __global__ extern "C" __global__
void KernelADD(DTYPE * a, DTYPE * b, DTYPE * c, int size, DTYPE beta = (DTYPE)1.0); void KernelADD(DTYPE * a, DTYPE * b, DTYPE * c, int size, DTYPE beta = (DTYPE)1.0);
......
...@@ -22,7 +22,7 @@ ...@@ -22,7 +22,7 @@
#ifndef __SUM_H__ #ifndef __SUM_H__
#define __SUM_H__ #define __SUM_H__
#include "../XTensor.h" #include "../../XTensor.h"
namespace nts { // namespace nts(NiuTrans.Tensor) namespace nts { // namespace nts(NiuTrans.Tensor)
......
...@@ -19,7 +19,7 @@ ...@@ -19,7 +19,7 @@
* $Created by: XIAO Tong (email: xiaotong@mail.neu.edu.cn) 2018-04-24 * $Created by: XIAO Tong (email: xiaotong@mail.neu.edu.cn) 2018-04-24
*/ */
#include "../XTensor.h" #include "../../XTensor.h"
#include "SumByColumnTV.h" #include "SumByColumnTV.h"
#include "SumByColumnTV.cuh" #include "SumByColumnTV.cuh"
......
...@@ -19,8 +19,8 @@ ...@@ -19,8 +19,8 @@
* $Created by: XIAO Tong (email: xiaotong@mail.neu.edu.cn) 2018-04-24 * $Created by: XIAO Tong (email: xiaotong@mail.neu.edu.cn) 2018-04-24
*/ */
#include "../XDevice.h" #include "../../XDevice.h"
#include "../XTensor.h" #include "../../XTensor.h"
#include "SumByColumnTV.h" #include "SumByColumnTV.h"
#include "SumByColumnTV.cuh" #include "SumByColumnTV.cuh"
......
...@@ -22,7 +22,7 @@ ...@@ -22,7 +22,7 @@
#ifndef __REDUCEMAX_CUH__ #ifndef __REDUCEMAX_CUH__
#define __REDUCEMAX_CUH__ #define __REDUCEMAX_CUH__
#include "ReduceMax.h" #include "../reduce/ReduceMax.h"
namespace nts { // namespace nts(NiuTrans.Tensor) namespace nts { // namespace nts(NiuTrans.Tensor)
......
...@@ -22,7 +22,7 @@ ...@@ -22,7 +22,7 @@
#ifndef __SUMBYCOLUMNTV_H__ #ifndef __SUMBYCOLUMNTV_H__
#define __SUMBYCOLUMNTV_H__ #define __SUMBYCOLUMNTV_H__
#include "../XTensor.h" #include "../../XTensor.h"
namespace nts { // namespace nts(NiuTrans.Tensor) namespace nts { // namespace nts(NiuTrans.Tensor)
......
...@@ -19,7 +19,7 @@ ...@@ -19,7 +19,7 @@
* $Created by: XIAO Tong (email: xiaotong@mail.neu.edu.cn) 2018-04-24 * $Created by: XIAO Tong (email: xiaotong@mail.neu.edu.cn) 2018-04-24
*/ */
#include "../XTensor.h" #include "../../XTensor.h"
#include "SumByColumnVT.h" #include "SumByColumnVT.h"
#include "SumByColumnVT.cuh" #include "SumByColumnVT.cuh"
......
...@@ -19,14 +19,15 @@ ...@@ -19,14 +19,15 @@
* $Created by: XIAO Tong (email: xiaotong@mail.neu.edu.cn) 2018-04-24 * $Created by: XIAO Tong (email: xiaotong@mail.neu.edu.cn) 2018-04-24
*/ */
#include "../XDevice.h" #include "../../XDevice.h"
#include "../XTensor.h" #include "../../XTensor.h"
#include "SumByColumnVT.h" #include "SumByColumnVT.h"
#include "SumByColumnVT.cuh" #include "SumByColumnVT.cuh"
namespace nts { // namespace nts(NiuTrans.Tensor) namespace nts { // namespace nts(NiuTrans.Tensor)
#ifdef USE_CUDA #ifdef USE_CUDA
/* /*
summation of a vector (column vector) and a tensor summation of a vector (column vector) and a tensor
c = a + \sum{col} b_col * \beta c = a + \sum{col} b_col * \beta
......
...@@ -22,11 +22,10 @@ ...@@ -22,11 +22,10 @@
#ifndef __SUMBYCOLUMNVT_H__ #ifndef __SUMBYCOLUMNVT_H__
#define __SUMBYCOLUMNVT_H__ #define __SUMBYCOLUMNVT_H__
#include "../XTensor.h" #include "../../XTensor.h"
namespace nts { // namespace nts(NiuTrans.Tensor) namespace nts { // namespace nts(NiuTrans.Tensor)
/* sum of a (column) vector and a tensor */ /* sum of a (column) vector and a tensor */
extern "C" extern "C"
void SumByColumnVT(XTensor * a, XTensor * b, XTensor * c = NULL, DTYPE beta = (DTYPE)1.0); void SumByColumnVT(XTensor * a, XTensor * b, XTensor * c = NULL, DTYPE beta = (DTYPE)1.0);
......
...@@ -20,8 +20,8 @@ ...@@ -20,8 +20,8 @@
*/ */
#include "XTensorBLAS.h" #include "XTensorBLAS.h"
#include "../XTensor.h" #include "../../XTensor.h"
#include "../XBLAS.h" #include "../../XBLAS.h"
namespace nts { // namespace nts(NiuTrans.Tensor) namespace nts { // namespace nts(NiuTrans.Tensor)
......
...@@ -19,9 +19,9 @@ ...@@ -19,9 +19,9 @@
* $Created by: XIAO Tong (email: xiaotong@mail.neu.edu.cn) 2018-04-24 * $Created by: XIAO Tong (email: xiaotong@mail.neu.edu.cn) 2018-04-24
*/ */
#include "../XUtility.h" #include "../../XUtility.h"
#include "../XDevice.h" #include "../../XDevice.h"
#include "../XTensor.h" #include "../../XTensor.h"
#include "XTensorBLAS.h" #include "XTensorBLAS.h"
namespace nts { // namespace nts(NiuTrans.Tensor) namespace nts { // namespace nts(NiuTrans.Tensor)
......
...@@ -22,7 +22,7 @@ ...@@ -22,7 +22,7 @@
#ifndef __XTENSORBLAS_H__ #ifndef __XTENSORBLAS_H__
#define __XTENSORBLAS_H__ #define __XTENSORBLAS_H__
#include "../XTensor.h" #include "../../XTensor.h"
namespace nts { // namespace nts(NiuTrans.Tensor) namespace nts { // namespace nts(NiuTrans.Tensor)
......
...@@ -19,8 +19,8 @@ ...@@ -19,8 +19,8 @@
* $Created by: LI Yinqiao (li.yin.qiao.2012@hotmail.com) 2018-06-14 * $Created by: LI Yinqiao (li.yin.qiao.2012@hotmail.com) 2018-06-14
*/ */
#include "../XTensor.h" #include "../../XTensor.h"
#include "../XDevice.h" #include "../../XDevice.h"
namespace nts { // namespace nts(NiuTrans.Tensor) namespace nts { // namespace nts(NiuTrans.Tensor)
......
...@@ -19,8 +19,8 @@ ...@@ -19,8 +19,8 @@
* $Created by: XIAO Tong (email: xiaotong@mail.neu.edu.cn) 2018-07-04 * $Created by: XIAO Tong (email: xiaotong@mail.neu.edu.cn) 2018-07-04
*/ */
#include "../XUtility.h" #include "../../XUtility.h"
#include "../XName.h" #include "../../XName.h"
#include "Select.h" #include "Select.h"
namespace nts{ // namespace nts(NiuTrans.Tensor) namespace nts{ // namespace nts(NiuTrans.Tensor)
...@@ -33,7 +33,7 @@ c = select(a) ...@@ -33,7 +33,7 @@ c = select(a)
>> dim - the dimension along with which we do the job >> dim - the dimension along with which we do the job
>> low - lower bound >> low - lower bound
>> high - higher bound. >> high - higher bound.
Note that range [1,3] means that we select 1 and 2. Note that range [1,3] means that we select 1 and 2.
*/ */
void SelectRange(XTensor * a, XTensor * c, int dim, int low, int high) void SelectRange(XTensor * a, XTensor * c, int dim, int low, int high)
{ {
...@@ -48,7 +48,7 @@ void SelectRange(XTensor * a, XTensor * c, int dim, int low, int high) ...@@ -48,7 +48,7 @@ void SelectRange(XTensor * a, XTensor * c, int dim, int low, int high)
for(int i = 0; i < a->order; i++){ for(int i = 0; i < a->order; i++){
if(i == dim){ if(i == dim){
CheckNTErrors(low > 0 && low < a->dimSize[dim], "Illegal range specified!"); CheckNTErrors(low > 0 && low < a->dimSize[dim], "Illegal range specified!");
CheckNTErrors(high > 0 && high < a->dimSize[dim], "Illegal range specified!"); CheckNTErrors(high > 0 && high <= a->dimSize[dim], "Illegal range specified!");
} }
else{ else{
CheckNTErrors(a->dimSize[i] == c->dimSize[i], "The size of the dimensions should be same!"); CheckNTErrors(a->dimSize[i] == c->dimSize[i], "The size of the dimensions should be same!");
...@@ -62,20 +62,24 @@ void SelectRange(XTensor * a, XTensor * c, int dim, int low, int high) ...@@ -62,20 +62,24 @@ void SelectRange(XTensor * a, XTensor * c, int dim, int low, int high)
XLink::AddParamToHeadInt(c, high); XLink::AddParamToHeadInt(c, high);
int stride = 1; int stride = 1;
for(int i = 0; i < dim; i++) int dimRDI = a->order - dim - 1;
for(int i = 0; i < dimRDI; i++)
stride *= a->dimSizeRDI[i]; stride *= a->dimSizeRDI[i];
int copyTimes = 1;
for (int i = dimRDI + 1; i < a->order; i++)
copyTimes *= a->dimSizeRDI[i];
int blockSize = stride * (high - low) * a->unitSize; int blockSize = stride * (high - low) * a->unitSize;
int stepSizeS = stride * a->dimSize[dim] * a->unitSize; int stepSizeS = stride * a->dimSize[dim] * a->unitSize;
int stepSizeT = stride * c->dimSize[dim] * a->unitSize; int stepSizeT = stride * c->dimSize[dim] * a->unitSize;
char * s = (char*)a->data + stride * low * a->unitSize; char * s = (char*)a->data + stride * low * a->unitSize;
char * t = (char*)c->data; char * t = (char*)c->data;
for(int i = 0; i < high - low; i++){ for(int i = 0; i < copyTimes; i++){
XMemCopy(t, c->devID, s, a->devID, blockSize); XMemCopy(t, c->devID, s, a->devID, blockSize);
s += stepSizeS; s += stepSizeS;
t += stepSizeT; t += stepSizeT;
} }
} }
} // namespace nts(NiuTrans.Tensor) } // namespace nts(NiuTrans.Tensor)
...@@ -22,7 +22,7 @@ ...@@ -22,7 +22,7 @@
#ifndef __SELECT_H__ #ifndef __SELECT_H__
#define __SELECT_H__ #define __SELECT_H__
#include "../XTensor.h" #include "../../XTensor.h"
namespace nts{ // namespace nts(NiuTrans.Tensor) namespace nts{ // namespace nts(NiuTrans.Tensor)
......
...@@ -21,7 +21,7 @@ ...@@ -21,7 +21,7 @@
*/ */
#include "SetData.h" #include "SetData.h"
#include "CopyValues.h" #include "../movement/CopyValues.h"
#if !defined( WIN32 ) && !defined( _WIN32 ) #if !defined( WIN32 ) && !defined( _WIN32 )
#include "sys/time.h" #include "sys/time.h"
...@@ -68,7 +68,8 @@ void SetDataRand(XTensor * tensor, DTYPE low, DTYPE high) ...@@ -68,7 +68,8 @@ void SetDataRand(XTensor * tensor, DTYPE low, DTYPE high)
ShowNTErrors("TODO"); ShowNTErrors("TODO");
} }
} }
/* GPU code /*
GPU code
The trick here is that initialize the data on a temperary tensor on CPU. The trick here is that initialize the data on a temperary tensor on CPU.
The CPU data is then copied to GPU. The CPU data is then copied to GPU.
TODO: generate data points on GPUs straightforwardly. TODO: generate data points on GPUs straightforwardly.
......
...@@ -23,7 +23,7 @@ ...@@ -23,7 +23,7 @@
#ifndef __SETDATA_H__ #ifndef __SETDATA_H__
#define __SETDATA_H__ #define __SETDATA_H__
#include "../XTensor.h" #include "../../XTensor.h"
namespace nts { // namespace nts(NiuTrans.Tensor) namespace nts { // namespace nts(NiuTrans.Tensor)
......
...@@ -20,11 +20,12 @@ ...@@ -20,11 +20,12 @@
*/ */
#include <math.h> #include <math.h>
#include "../XTensor.h" #include "../../XTensor.h"
#include "Normalize.h" #include "Normalize.h"
#include "Normalize.cuh" #include "Normalize.cuh"
namespace nts { // namespace nts(NiuTrans.Tensor) namespace nts { // namespace nts(NiuTrans.Tensor)
/* /*
normalized the data with normal distribution. For an input x, normalized the data with normal distribution. For an input x,
y = a * (x-mean)/sqrt(variance+\epsilon) + b y = a * (x-mean)/sqrt(variance+\epsilon) + b
......
...@@ -19,12 +19,13 @@ ...@@ -19,12 +19,13 @@
* $Created by: XIAO Tong (email: xiaotong@mail.neu.edu.cn) 2018-04-24 * $Created by: XIAO Tong (email: xiaotong@mail.neu.edu.cn) 2018-04-24
*/ */
#include "../XDevice.h" #include "../../XDevice.h"
#include "../XTensor.h" #include "../../XTensor.h"
#include "Normalize.h" #include "Normalize.h"
#include "Normalize.cuh" #include "Normalize.cuh"
namespace nts { // namespace nts(NiuTrans.Tensor) namespace nts { // namespace nts(NiuTrans.Tensor)
#ifdef USE_CUDA #ifdef USE_CUDA
/* /*
normalized the data with normal distribution (kernel code). For an input x, normalized the data with normal distribution (kernel code). For an input x,
......
...@@ -28,7 +28,8 @@ namespace nts { // namespace nts(NiuTrans.Tensor) ...@@ -28,7 +28,8 @@ namespace nts { // namespace nts(NiuTrans.Tensor)
#ifdef USE_CUDA #ifdef USE_CUDA
/* normalized the data with normal distribution (Kernel code). For an input x, /*
normalized the data with normal distribution (Kernel code). For an input x,
y = a * (x-mean)/sqrt(variance+\epsilon) + b y = a * (x-mean)/sqrt(variance+\epsilon) + b
where a and b are the scalar and bias respectively, and \epsilon is the adjustment parameter where a and b are the scalar and bias respectively, and \epsilon is the adjustment parameter
*/ */
...@@ -37,7 +38,8 @@ void KernelNormalize(DTYPE * input, DTYPE * output, DTYPE * mean, DTYPE * var, ...@@ -37,7 +38,8 @@ void KernelNormalize(DTYPE * input, DTYPE * output, DTYPE * mean, DTYPE * var,
DTYPE * a, DTYPE * b, DTYPE epsilon, DTYPE * a, DTYPE * b, DTYPE epsilon,
int stride, int strideNum, int blockNum); int stride, int strideNum, int blockNum);
/* normalized the data with normal distribution. For an input x, /*
normalized the data with normal distribution. For an input x,
y = a * (x-mean)/sqrt(variance+\epsilon) + b y = a * (x-mean)/sqrt(variance+\epsilon) + b
where a and b are the scalar and bias respectively, and \epsilon is the adjustment parameter where a and b are the scalar and bias respectively, and \epsilon is the adjustment parameter
*/ */
......
...@@ -22,7 +22,7 @@ ...@@ -22,7 +22,7 @@
#ifndef __NORMALIZE_H__ #ifndef __NORMALIZE_H__
#define __NORMALIZE_H__ #define __NORMALIZE_H__
#include "../XTensor.h" #include "../../XTensor.h"
namespace nts { // namespace nts(NiuTrans.Tensor) namespace nts { // namespace nts(NiuTrans.Tensor)
......
...@@ -20,15 +20,16 @@ ...@@ -20,15 +20,16 @@
*/ */
#include <math.h> #include <math.h>
#include "../XTensor.h" #include "../../XTensor.h"
#include "Power.h" #include "Power.h"
#include "Power.cuh" #include "Power.cuh"
namespace nts { // namespace nts(NiuTrans.Tensor) namespace nts { // namespace nts(NiuTrans.Tensor)
/* /*
get the power(a, p) get the power(a, p)
>> a - the tensor >> a - the tensor
>> power - as it is >> p - as it is
*/ */
void Power(XTensor * a, DTYPE p) void Power(XTensor * a, DTYPE p)
{ {
......
...@@ -19,8 +19,8 @@ ...@@ -19,8 +19,8 @@
* $Created by: XIAO Tong (email: xiaotong@mail.neu.edu.cn) 2018-04-24 * $Created by: XIAO Tong (email: xiaotong@mail.neu.edu.cn) 2018-04-24
*/ */
#include "../XDevice.h" #include "../../XDevice.h"
#include "../XTensor.h" #include "../../XTensor.h"
#include "Power.h" #include "Power.h"
#include "Power.cuh" #include "Power.cuh"
...@@ -87,9 +87,6 @@ __global__ ...@@ -87,9 +87,6 @@ __global__
void KernelPower(__half * d, __half p, int size) void KernelPower(__half * d, __half p, int size)
{ {
#if __CUDA_ARCH__ >= 530 || !defined(__CUDA_ARCH__) #if __CUDA_ARCH__ >= 530 || !defined(__CUDA_ARCH__)
//int i = blockDim.x * blockIdx.x + threadIdx.x;
//if (i < size)
// d[i] = hpow(d[i], p);
#else #else
int i = blockDim.x * blockIdx.x + threadIdx.x; int i = blockDim.x * blockIdx.x + threadIdx.x;
if (i < size) if (i < size)
...@@ -126,9 +123,6 @@ void CudaPower(XTensor * a, DTYPE p) ...@@ -126,9 +123,6 @@ void CudaPower(XTensor * a, DTYPE p)
} }
else if (p != (DTYPE)1.0) { else if (p != (DTYPE)1.0) {
ShowNTErrors("TODO!"); ShowNTErrors("TODO!");
//unsigned short p2 = FloatToFloat16(p);
//__half * pp = (__half*)&p2;
//KernelPower<<<blocks, threads>>>((__half*)a->data, *pp, a->unitNum);
} }
} }
else { else {
......
...@@ -22,7 +22,7 @@ ...@@ -22,7 +22,7 @@
#ifndef __POWER_H__ #ifndef __POWER_H__
#define __POWER_H__ #define __POWER_H__
#include "../XTensor.h" #include "../../XTensor.h"
namespace nts { // namespace nts(NiuTrans.Tensor) namespace nts { // namespace nts(NiuTrans.Tensor)
......
...@@ -26,9 +26,7 @@ namespace nts{ // namespace nts(NiuTrans.Tensor) ...@@ -26,9 +26,7 @@ namespace nts{ // namespace nts(NiuTrans.Tensor)
/* /*
scale and shift all tensor entires scale and shift all tensor entires
p = p * scale + shift p = p * scale + shift
>> a - the tensor >> a - the tensor
>> scale - the scaler factor >> scale - the scaler factor
>> shift - the shift factor >> shift - the shift factor
......
...@@ -21,7 +21,7 @@ ...@@ -21,7 +21,7 @@
#include "ScaleAndShift.h" #include "ScaleAndShift.h"
#include "ScaleAndShift.cuh" #include "ScaleAndShift.cuh"
#include "../XDevice.h" #include "../../XDevice.h"
namespace nts{ // namespace nts(NiuTrans.Tensor) namespace nts{ // namespace nts(NiuTrans.Tensor)
...@@ -80,9 +80,7 @@ void KernelScaleAndShift(__half * d, int size, __half scale, __half shift) ...@@ -80,9 +80,7 @@ void KernelScaleAndShift(__half * d, int size, __half scale, __half shift)
/* /*
scale and shift all matrix entires scale and shift all matrix entires
p = p * scale + shift p = p * scale + shift
>> a - the tensor >> a - the tensor
>> scale - the scaler factor >> scale - the scaler factor
>> shift - the shift factor >> shift - the shift factor
......
...@@ -22,7 +22,7 @@ ...@@ -22,7 +22,7 @@
#ifndef __SCALEANDSHIFT_CUH__ #ifndef __SCALEANDSHIFT_CUH__
#define __SCALEANDSHIFT_CUH__ #define __SCALEANDSHIFT_CUH__
#include "../XTensor.h" #include "../../XTensor.h"
namespace nts{ // namespace nts(NiuTrans.Tensor) namespace nts{ // namespace nts(NiuTrans.Tensor)
......
...@@ -22,7 +22,7 @@ ...@@ -22,7 +22,7 @@
#ifndef __SCALEANDSHIFT_H__ #ifndef __SCALEANDSHIFT_H__
#define __SCALEANDSHIFT_H__ #define __SCALEANDSHIFT_H__
#include "../XTensor.h" #include "../../XTensor.h"
namespace nts{ // namespace nts(NiuTrans.Tensor) namespace nts{ // namespace nts(NiuTrans.Tensor)
......
...@@ -19,8 +19,8 @@ ...@@ -19,8 +19,8 @@
* $Created by: XIAO Tong (email: xiaotong@mail.neu.edu.cn) 2018-04-24 * $Created by: XIAO Tong (email: xiaotong@mail.neu.edu.cn) 2018-04-24
*/ */
#include "../XTensor.h" #include "../../XTensor.h"
#include "../XUtility.h" #include "../../XUtility.h"
#include "CopyBlocks.h" #include "CopyBlocks.h"
#include "CopyBlocksOnSite.h" #include "CopyBlocksOnSite.h"
#include "CopyBlocksSelected.cuh" #include "CopyBlocksSelected.cuh"
...@@ -78,9 +78,11 @@ void CopyBlocks(void * source, int blockSize, int * sourceBlocks, int blockNum, ...@@ -78,9 +78,11 @@ void CopyBlocks(void * source, int blockSize, int * sourceBlocks, int blockNum,
else { else {
int devID = myMem != NULL ? myMem->devID : -1; int devID = myMem != NULL ? myMem->devID : -1;
/* The following code should be fine with GPUs, but too many /*
The following code should be fine with GPUs, but too many
kernel calls would slow down the system. We prefer to use kernel calls would slow down the system. We prefer to use
one kernel to do block copy in batch (kernel fusion). */ one kernel to do block copy in batch (kernel fusion).
*/
for (int i = 0; i < blockNum; i++) { for (int i = 0; i < blockNum; i++) {
XMemCopy((char*)target + targetBlocks[i] * blockSize, devID, XMemCopy((char*)target + targetBlocks[i] * blockSize, devID,
(char*)source + sourceBlocks[i] * blockSize, devID, blockSize); (char*)source + sourceBlocks[i] * blockSize, devID, blockSize);
......
...@@ -22,7 +22,7 @@ ...@@ -22,7 +22,7 @@
#ifndef __COPYBLOCKS_H__ #ifndef __COPYBLOCKS_H__
#define __COPYBLOCKS_H__ #define __COPYBLOCKS_H__
#include "../XTensor.h" #include "../../XTensor.h"
namespace nts { // namespace nts(NiuTrans.Tensor) namespace nts { // namespace nts(NiuTrans.Tensor)
......
...@@ -19,9 +19,9 @@ ...@@ -19,9 +19,9 @@
* $Created by: XIAO Tong (email: xiaotong@mail.neu.edu.cn) 2018-04-24 * $Created by: XIAO Tong (email: xiaotong@mail.neu.edu.cn) 2018-04-24
*/ */
#include "../XTensor.h" #include "../../XTensor.h"
#include "CopyBlocksInGrid.h" #include "CopyBlocksInGrid.h"
#include "../XUtility.h" #include "../../XUtility.h"
#include "CopyBlocksInGrid.cuh" #include "CopyBlocksInGrid.cuh"
namespace nts { // namespace nts(NiuTrans.Tensor) namespace nts { // namespace nts(NiuTrans.Tensor)
......
...@@ -21,7 +21,7 @@ ...@@ -21,7 +21,7 @@
#include "CopyBlocksInGrid.h" #include "CopyBlocksInGrid.h"
#include "CopyBlocksInGrid.cuh" #include "CopyBlocksInGrid.cuh"
#include "../XDevice.h" #include "../../XDevice.h"
namespace nts { // namespace nts(NiuTrans.Tensor) namespace nts { // namespace nts(NiuTrans.Tensor)
......
...@@ -22,7 +22,7 @@ ...@@ -22,7 +22,7 @@
#ifndef __COPYBLOCKSINGRID_CUH__ #ifndef __COPYBLOCKSINGRID_CUH__
#define __COPYBLOCKSINGRID_CUH__ #define __COPYBLOCKSINGRID_CUH__
#include "../XTensor.h" #include "../../XTensor.h"
namespace nts { // namespace nts(NiuTrans.Tensor) namespace nts { // namespace nts(NiuTrans.Tensor)
......
...@@ -22,7 +22,7 @@ ...@@ -22,7 +22,7 @@
#ifndef __COPYBLOCKSINGRID_H__ #ifndef __COPYBLOCKSINGRID_H__
#define __COPYBLOCKSINGRID_H__ #define __COPYBLOCKSINGRID_H__
#include "../XTensor.h" #include "../../XTensor.h"
namespace nts { // namespace nts(NiuTrans.Tensor) namespace nts { // namespace nts(NiuTrans.Tensor)
......
...@@ -19,12 +19,13 @@ ...@@ -19,12 +19,13 @@
* $Created by: XIAO Tong (email: xiaotong@mail.neu.edu.cn) 2018-04-24 * $Created by: XIAO Tong (email: xiaotong@mail.neu.edu.cn) 2018-04-24
*/ */
#include "../XTensor.h" #include "../../XTensor.h"
#include "../XUtility.h" #include "../../XUtility.h"
#include "CopyBlocksOnSite.h" #include "CopyBlocksOnSite.h"
#include "CopyBlocksOnSite.cuh" #include "CopyBlocksOnSite.cuh"
namespace nts { // namespace nts(NiuTrans.Tensor) namespace nts { // namespace nts(NiuTrans.Tensor)
/* /*
copy a number of blocks to target positions. Here we assume that copy a number of blocks to target positions. Here we assume that
all the data has been on the device (CPU/GPU) already. all the data has been on the device (CPU/GPU) already.
...@@ -47,9 +48,11 @@ void CopyBlocksOnSite(void * source, int blockSize, int blockNum, void * target, ...@@ -47,9 +48,11 @@ void CopyBlocksOnSite(void * source, int blockSize, int blockNum, void * target,
else { else {
int devID = myMem != NULL ? myMem->devID : -1; int devID = myMem != NULL ? myMem->devID : -1;
/* The following code should be fine with GPUs, but too many /*
The following code should be fine with GPUs, but too many
kernel calls would slow down the system. We prefer to use kernel calls would slow down the system. We prefer to use
one kernel to do block copy in batch (kernel fusion). */ one kernel to do block copy in batch (kernel fusion).
*/
for (int i = 0, b = 0; i < blockNum; i++, b += blockSize) { for (int i = 0, b = 0; i < blockNum; i++, b += blockSize) {
XMemCopy((char*)target + targetBlocks[i] * blockSize, devID, XMemCopy((char*)target + targetBlocks[i] * blockSize, devID,
(char*)source + b, devID, blockSize); (char*)source + b, devID, blockSize);
......
...@@ -21,7 +21,7 @@ ...@@ -21,7 +21,7 @@
#include "CopyBlocksOnSite.h" #include "CopyBlocksOnSite.h"
#include "CopyBlocksOnSite.cuh" #include "CopyBlocksOnSite.cuh"
#include "../XDevice.h" #include "../../XDevice.h"
namespace nts { // namespace nts(NiuTrans.Tensor) namespace nts { // namespace nts(NiuTrans.Tensor)
......
...@@ -22,7 +22,7 @@ ...@@ -22,7 +22,7 @@
#ifndef __COPYBLOCKS_CUH__ #ifndef __COPYBLOCKS_CUH__
#define __COPYBLOCKS_CUH__ #define __COPYBLOCKS_CUH__
#include "../XTensor.h" #include "../../XTensor.h"
namespace nts { // namespace nts(NiuTrans.Tensor) namespace nts { // namespace nts(NiuTrans.Tensor)
......
...@@ -22,7 +22,7 @@ ...@@ -22,7 +22,7 @@
#ifndef __COPYBLOCKSONSITE_H__ #ifndef __COPYBLOCKSONSITE_H__
#define __COPYBLOCKSONSITE_H__ #define __COPYBLOCKSONSITE_H__
#include "../XTensor.h" #include "../../XTensor.h"
namespace nts { // namespace nts(NiuTrans.Tensor) namespace nts { // namespace nts(NiuTrans.Tensor)
......
...@@ -21,8 +21,8 @@ ...@@ -21,8 +21,8 @@
#include "CopyBlocks.h" #include "CopyBlocks.h"
#include "CopyBlocksSelected.cuh" #include "CopyBlocksSelected.cuh"
#include "../XUtility.h" #include "../../XUtility.h"
#include "../XDevice.h" #include "../../XDevice.h"
namespace nts { // namespace nts(NiuTrans.Tensor) namespace nts { // namespace nts(NiuTrans.Tensor)
......
...@@ -22,7 +22,7 @@ ...@@ -22,7 +22,7 @@
#ifndef __COPYBLOCKSSELECTED_CUH__ #ifndef __COPYBLOCKSSELECTED_CUH__
#define __COPYBLOCKSSELECTED_CUH__ #define __COPYBLOCKSSELECTED_CUH__
#include "../XTensor.h" #include "../../XTensor.h"
namespace nts { // namespace nts(NiuTrans.Tensor) namespace nts { // namespace nts(NiuTrans.Tensor)
......
...@@ -19,9 +19,9 @@ ...@@ -19,9 +19,9 @@
* $Created by: XIAO Tong (email: xiaotong@mail.neu.edu.cn) 2018-04-24 * $Created by: XIAO Tong (email: xiaotong@mail.neu.edu.cn) 2018-04-24
*/ */
#include "../XTensor.h" #include "../../XTensor.h"
#include "CopyData2D.h" #include "CopyData2D.h"
#include "../XUtility.h" #include "../../XUtility.h"
namespace nts { // namespace nts(NiuTrans.Tensor) namespace nts { // namespace nts(NiuTrans.Tensor)
......
...@@ -22,7 +22,7 @@ ...@@ -22,7 +22,7 @@
#ifndef __COPYDATA2D_H__ #ifndef __COPYDATA2D_H__
#define __COPYDATA2D_H__ #define __COPYDATA2D_H__
#include "../XTensor.h" #include "../../XTensor.h"
namespace nts { // namespace nts(NiuTrans.Tensor) namespace nts { // namespace nts(NiuTrans.Tensor)
......
...@@ -19,7 +19,7 @@ ...@@ -19,7 +19,7 @@
* $Created by: XIAO Tong (email: xiaotong@mail.neu.edu.cn) 2018-04-24 * $Created by: XIAO Tong (email: xiaotong@mail.neu.edu.cn) 2018-04-24
*/ */
#include "../XTensor.h" #include "../../XTensor.h"
#include "CopyInGrid.h" #include "CopyInGrid.h"
#include "CopyBlocksInGrid.h" #include "CopyBlocksInGrid.h"
...@@ -34,7 +34,7 @@ i.e., reorder the data blocks in the same memory piece ...@@ -34,7 +34,7 @@ i.e., reorder the data blocks in the same memory piece
in the k-th grid in the k-th grid
>> blockDim - leading dimension of blocks >> blockDim - leading dimension of blocks
>> blockNumInGrid - number of blocks in each grid >> blockNumInGrid - number of blocks in each grid
>> isOnDev - indicates whether the index is on the device already >> isIndexOnDev - indicates whether the index is on the device already
*/ */
void CopyInGrid(XTensor * s, XTensor * t, int * index, int blockDim, int blockNumInGrid, bool isIndexOnDev) void CopyInGrid(XTensor * s, XTensor * t, int * index, int blockDim, int blockNumInGrid, bool isIndexOnDev)
{ {
......
...@@ -22,7 +22,7 @@ ...@@ -22,7 +22,7 @@
#ifndef __COPYINGRID_H__ #ifndef __COPYINGRID_H__
#define __COPYINGRID_H__ #define __COPYINGRID_H__
#include "../XTensor.h" #include "../../XTensor.h"
namespace nts { // namespace nts(NiuTrans.Tensor) namespace nts { // namespace nts(NiuTrans.Tensor)
......
...@@ -36,6 +36,7 @@ copy indexed sub-tensors ...@@ -36,6 +36,7 @@ copy indexed sub-tensors
>> tgtIndex - index of the target sub-tensors >> tgtIndex - index of the target sub-tensors
>> copyNum - number of the sub-tensors we copy for each source index, e.g., >> copyNum - number of the sub-tensors we copy for each source index, e.g.,
for srcIndex = [1,4] and copyNum = 2, we actually copy the source sub-tensors 1, 2, 4, 5 for srcIndex = [1,4] and copyNum = 2, we actually copy the source sub-tensors 1, 2, 4, 5
<< return - whether copy indexed operation was successful
*/ */
bool CopyIndexed(XTensor * s, XTensor * t, int dim, int * srcIndex, int indexSize, int * tgtIndex, int copyNum) bool CopyIndexed(XTensor * s, XTensor * t, int dim, int * srcIndex, int indexSize, int * tgtIndex, int copyNum)
{ {
......
...@@ -22,7 +22,7 @@ ...@@ -22,7 +22,7 @@
#ifndef __COPYINDEXED_H__ #ifndef __COPYINDEXED_H__
#define __COPYINDEXED_H__ #define __COPYINDEXED_H__
#include "../XTensor.h" #include "../../XTensor.h"
namespace nts { // namespace nts(NiuTrans.Tensor) namespace nts { // namespace nts(NiuTrans.Tensor)
......
...@@ -19,7 +19,7 @@ ...@@ -19,7 +19,7 @@
* $Created by: XIAO Tong (email: xiaotong@mail.neu.edu.cn) 2018-04-24 * $Created by: XIAO Tong (email: xiaotong@mail.neu.edu.cn) 2018-04-24
*/ */
#include "../XName.h" #include "../../XName.h"
#include "CopyValues.h" #include "CopyValues.h"
#include "CopyValues.cuh" #include "CopyValues.cuh"
......
...@@ -21,8 +21,8 @@ ...@@ -21,8 +21,8 @@
#include "CopyValues.h" #include "CopyValues.h"
#include "CopyValues.cuh" #include "CopyValues.cuh"
#include "../XUtility.h" #include "../../XUtility.h"
#include "../XDevice.h" #include "../../XDevice.h"
namespace nts { // namespace nts(NiuTrans.Tensor) namespace nts { // namespace nts(NiuTrans.Tensor)
......
...@@ -22,13 +22,12 @@ ...@@ -22,13 +22,12 @@
#ifndef __COPYVALUES_CUH__ #ifndef __COPYVALUES_CUH__
#define __COPYVALUES_CUH__ #define __COPYVALUES_CUH__
#include "../XTensor.h" #include "../../XTensor.h"
namespace nts { // namespace nts(NiuTrans.Tensor) namespace nts { // namespace nts(NiuTrans.Tensor)
#ifdef USE_CUDA #ifdef USE_CUDA
/**************************************/
/* copy all elements from a source matrix to a target matrix */ /* copy all elements from a source matrix to a target matrix */
extern "C" extern "C"
bool CudaCopyValues(XTensor * s, XTensor * t, XStream * stream = NULL); bool CudaCopyValues(XTensor * s, XTensor * t, XStream * stream = NULL);
......
...@@ -22,7 +22,7 @@ ...@@ -22,7 +22,7 @@
#ifndef __COPYVALUES_H__ #ifndef __COPYVALUES_H__
#define __COPYVALUES_H__ #define __COPYVALUES_H__
#include "../XTensor.h" #include "../../XTensor.h"
namespace nts { // namespace nts(NiuTrans.Tensor) namespace nts { // namespace nts(NiuTrans.Tensor)
......
...@@ -19,8 +19,8 @@ ...@@ -19,8 +19,8 @@
* $Created by: XIAO Tong (email: xiaotong@mail.neu.edu.cn) 2018-04-24 * $Created by: XIAO Tong (email: xiaotong@mail.neu.edu.cn) 2018-04-24
*/ */
#include "../XTensor.h" #include "../../XTensor.h"
#include "../XName.h" #include "../../XName.h"
#include "ReduceMax.h" #include "ReduceMax.h"
#include "ReduceMax.cuh" #include "ReduceMax.cuh"
......
...@@ -19,9 +19,9 @@ ...@@ -19,9 +19,9 @@
* $Created by: XIAO Tong (email: xiaotong@mail.neu.edu.cn) 2018-04-24 * $Created by: XIAO Tong (email: xiaotong@mail.neu.edu.cn) 2018-04-24
*/ */
#include "../XDevice.h" #include "../../XDevice.h"
#include "../XTensor.h" #include "../../XTensor.h"
#include "../XUtility.h" #include "../../XUtility.h"
#include "ReduceMax.h" #include "ReduceMax.h"
#include "ReduceMax.cuh" #include "ReduceMax.cuh"
...@@ -31,14 +31,10 @@ namespace nts{ // namespace nts(NiuTrans.Tensor) ...@@ -31,14 +31,10 @@ namespace nts{ // namespace nts(NiuTrans.Tensor)
/* /*
reduce a tensor to another that keeps the max value along a dimension - slow version reduce a tensor to another that keeps the max value along a dimension - slow version
Given a block of data, we go over each dimension i in the stride and we have Given a block of data, we go over each dimension i in the stride and we have
sum_i = max_{0<=j<strideNum} input_{i,j} sum_i = max_{0<=j<strideNum} input_{i,j}
where we can view the block as a matrix and input_{i,j} represent the item at the where we can view the block as a matrix and input_{i,j} represent the item at the
crossing of the i-th columne and the j-th row. crossing of the i-th columne and the j-th row.
>> input - the input array (representing a tensor) >> input - the input array (representing a tensor)
>> output - the sum over each block. NOTE: output is also an array >> output - the sum over each block. NOTE: output is also an array
>> stride - stride that we need to move to the next item >> stride - stride that we need to move to the next item
...@@ -89,29 +85,25 @@ void KernelReduceMax(DTYPE * input, DTYPE * output, ...@@ -89,29 +85,25 @@ void KernelReduceMax(DTYPE * input, DTYPE * output,
} }
/* /*
reduce a tensor to another that keeps the max value along a dimension - slow version reduce a tensor to another that keeps the max value along a dimension - slow version
Given a block of data, we go over each dimension i in the stride and we have
Given a block of data, we go over each dimension i in the stride and we have sum_i = max_{0<=j<strideNum} input_{i,j}
where we can view the block as a matrix and input_{i,j} represent the item at the
sum_i = max_{0<=j<strideNum} input_{i,j} crossing of the i-th columne and the j-th row.
>> input - the input array (representing a tensor)
where we can view the block as a matrix and input_{i,j} represent the item at the >> output - the sum over each block. NOTE: output is also an array
crossing of the i-th columne and the j-th row. >> stride - stride that we need to move to the next item
>> strideNum - how many strides we need to finish the reduce
>> input - the input array (representing a tensor) >> reducedStrideNum - the number of strides after reducation
>> output - the sum over each block. NOTE: output is also an array >> blockSize - size of the block (i.e., stride * strideNum)
>> stride - stride that we need to move to the next item >> blockNum - how many blocks
>> strideNum - how many strides we need to finish the reduce */
>> reducedStrideNum - the number of strides after reducation __global__
>> blockSize - size of the block (i.e., stride * strideNum) void KernelReduceMax(__half * input, __half * output,
>> blockNum - how many blocks
*/
__global__
void KernelReduceMax(__half * input, __half * output,
int stride, int strideNum, int reducedStrideNum, int stride, int strideNum, int reducedStrideNum,
int blockSize, int blockNum) int blockSize, int blockNum)
{ {
int idx = threadIdx.x * blockDim.y + threadIdx.y; int idx = threadIdx.x * blockDim.y + threadIdx.y;
unsigned int i = blockIdx.x*blockDim.x + threadIdx.x; unsigned int i = blockIdx.x*blockDim.x + threadIdx.x;
unsigned int j = blockIdx.y*blockDim.y + threadIdx.y; unsigned int j = blockIdx.y*blockDim.y + threadIdx.y;
...@@ -164,7 +156,6 @@ void KernelReduceMax(DTYPE * input, DTYPE * output, ...@@ -164,7 +156,6 @@ void KernelReduceMax(DTYPE * input, DTYPE * output,
} }
/* /*
reduce a tensor to another that keeps the max value along a dimension - fast version reduce a tensor to another that keeps the max value along a dimension - fast version
>> input - the input array (representing a tensor) >> input - the input array (representing a tensor)
...@@ -338,9 +329,7 @@ void KernelReduceMaxSimpleFast(DTYPE * input, DTYPE * output, ...@@ -338,9 +329,7 @@ void KernelReduceMaxSimpleFast(DTYPE * input, DTYPE * output,
/* /*
get the max-valued items along a dimension of the tensor (cuda version). get the max-valued items along a dimension of the tensor (cuda version).
For a 1-dimensional data array a, For a 1-dimensional data array a,
sum_i = max_{0<=j<strideNum} input_{i,j} sum_i = max_{0<=j<strideNum} input_{i,j}
>> input - the input tensor >> input - the input tensor
>> output - the output tensor >> output - the output tensor
>> dim - which dimension to reduce >> dim - which dimension to reduce
......
...@@ -22,7 +22,7 @@ ...@@ -22,7 +22,7 @@
#ifndef __REDUCEMAX_H__ #ifndef __REDUCEMAX_H__
#define __REDUCEMAX_H__ #define __REDUCEMAX_H__
#include "../XTensor.h" #include "../../XTensor.h"
namespace nts{ // namespace nts(NiuTrans.Tensor) namespace nts{ // namespace nts(NiuTrans.Tensor)
......
...@@ -19,7 +19,7 @@ ...@@ -19,7 +19,7 @@
* $Created by: XIAO Tong (email: xiaotong@mail.neu.edu.cn) 2018-04-24 * $Created by: XIAO Tong (email: xiaotong@mail.neu.edu.cn) 2018-04-24
*/ */
#include "ScaleAndShift.h" #include "../math/ScaleAndShift.h"
#include "ReduceSum.h" #include "ReduceSum.h"
#include "ReduceMean.h" #include "ReduceMean.h"
...@@ -28,7 +28,6 @@ namespace nts{ // namespace nts(NiuTrans.Tensor) ...@@ -28,7 +28,6 @@ namespace nts{ // namespace nts(NiuTrans.Tensor)
/* /*
get the mean value along a dimension of the tensor. For a 1-dimensional data array a, get the mean value along a dimension of the tensor. For a 1-dimensional data array a,
mean = (1/n) * sum_i input_i mean = (1/n) * sum_i input_i
>> input - the input tensor >> input - the input tensor
>> output - the output tensor >> output - the output tensor
>> dim - the dimension where the reduction is performed on >> dim - the dimension where the reduction is performed on
...@@ -44,5 +43,4 @@ void ReduceMean(XTensor * input, XTensor * output, int dim) ...@@ -44,5 +43,4 @@ void ReduceMean(XTensor * input, XTensor * output, int dim)
ScaleAndShift(output, (DTYPE)1/num, 0); ScaleAndShift(output, (DTYPE)1/num, 0);
} }
} // namespace nts(NiuTrans.Tensor) } // namespace nts(NiuTrans.Tensor)
\ No newline at end of file
...@@ -22,7 +22,7 @@ ...@@ -22,7 +22,7 @@
#ifndef __REDUCEMEAN_H__ #ifndef __REDUCEMEAN_H__
#define __REDUCEMEAN_H__ #define __REDUCEMEAN_H__
#include "../XTensor.h" #include "../../XTensor.h"
namespace nts{ // namespace nts(NiuTrans.Tensor) namespace nts{ // namespace nts(NiuTrans.Tensor)
......
...@@ -22,7 +22,7 @@ ...@@ -22,7 +22,7 @@
#ifndef __REDUCESTANDARDVARIANCE_H__ #ifndef __REDUCESTANDARDVARIANCE_H__
#define __REDUCESTANDARDVARIANCE_H__ #define __REDUCESTANDARDVARIANCE_H__
#include "../XTensor.h" #include "../../XTensor.h"
namespace nts { // namespace nts(NiuTrans.Tensor) namespace nts { // namespace nts(NiuTrans.Tensor)
......
...@@ -22,7 +22,7 @@ ...@@ -22,7 +22,7 @@
#include <math.h> #include <math.h>
#include "ReduceSum.h" #include "ReduceSum.h"
#include "ReduceSum.cuh" #include "ReduceSum.cuh"
#include "../XName.h" #include "../../XName.h"
namespace nts{ // namespace nts(NiuTrans.Tensor) namespace nts{ // namespace nts(NiuTrans.Tensor)
......
...@@ -19,8 +19,8 @@ ...@@ -19,8 +19,8 @@
* $Created by: XIAO Tong (email: xiaotong@mail.neu.edu.cn) 2018-04-24 * $Created by: XIAO Tong (email: xiaotong@mail.neu.edu.cn) 2018-04-24
*/ */
#include "../XDevice.h" #include "../../XDevice.h"
#include "../XUtility.h" #include "../../XUtility.h"
#include "ReduceSum.cuh" #include "ReduceSum.cuh"
namespace nts{ // namespace nts(NiuTrans.Tensor) namespace nts{ // namespace nts(NiuTrans.Tensor)
...@@ -29,13 +29,11 @@ namespace nts{ // namespace nts(NiuTrans.Tensor) ...@@ -29,13 +29,11 @@ namespace nts{ // namespace nts(NiuTrans.Tensor)
/* /*
reduce a tensor to another that keeps the sum along a dimension - slow version reduce a tensor to another that keeps the sum along a dimension - slow version
Given a block of data, we go over each dimension i in the stride and we have Given a block of data, we go over each dimension i in the stride and we have
sum_i = sum_{0<=j<strideNum} exp(input_{i,j} - shift) if isExp == true; sum_i = sum_{0<=j<strideNum} exp(input_{i,j} - shift) if isExp == true;
= sum_{0<=j<strideNum} input_{i,j} - shift if isExp == false; = sum_{0<=j<strideNum} input_{i,j} - shift if isExp == false;
where we can view the block as a matrix and input_{i,j} represent the item at the where we can view the block as a matrix and input_{i,j} represent the item at the
crossing of the i-th columne and the j-th row. crossing of the i-th columne and the j-th row.
>> input - the input array (representing a tensor) >> input - the input array (representing a tensor)
>> output - the sum over each block. NOTE: output is also an array >> output - the sum over each block. NOTE: output is also an array
>> stride - stride that we need to move to the next item >> stride - stride that we need to move to the next item
...@@ -107,13 +105,11 @@ void KernelReduceSum(DTYPE * input, DTYPE * output, ...@@ -107,13 +105,11 @@ void KernelReduceSum(DTYPE * input, DTYPE * output,
/* /*
reduce a tensor to another that keeps the sum along a dimension - slow version reduce a tensor to another that keeps the sum along a dimension - slow version
This is for float16 reduction. This is for float16 reduction.
Given a block of data, we go over each dimension i in the stride and we have Given a block of data, we go over each dimension i in the stride and we have
sum_i = sum_{0<=j<strideNum} exp(input_{i,j} - shift) if isExp == true; sum_i = sum_{0<=j<strideNum} exp(input_{i,j} - shift) if isExp == true;
= sum_{0<=j<strideNum} input_{i,j} - shift if isExp == false; = sum_{0<=j<strideNum} input_{i,j} - shift if isExp == false;
where we can view the block as a matrix and input_{i,j} represent the item at the where we can view the block as a matrix and input_{i,j} represent the item at the
crossing of the i-th columne and the j-th row. crossing of the i-th columne and the j-th row.
>> input - the input array (representing a tensor) >> input - the input array (representing a tensor)
>> output - the sum over each block. NOTE: output is also an array >> output - the sum over each block. NOTE: output is also an array
>> stride - stride that we need to move to the next item >> stride - stride that we need to move to the next item
...@@ -304,7 +300,6 @@ void KernelReduceSumFast(DTYPE * input, DTYPE * output, ...@@ -304,7 +300,6 @@ void KernelReduceSumFast(DTYPE * input, DTYPE * output,
/* /*
reduce a tensor to another that keeps the sum along a dimension - fast version reduce a tensor to another that keeps the sum along a dimension - fast version
This is for float16 reduction This is for float16 reduction
>> input - the input array (representing a tensor) >> input - the input array (representing a tensor)
>> output - the sum over each block. NOTE: output is also an array >> output - the sum over each block. NOTE: output is also an array
>> stride - stride that we need to move to the next item >> stride - stride that we need to move to the next item
......
...@@ -22,7 +22,7 @@ ...@@ -22,7 +22,7 @@
#ifndef __REDUCESUM_H__ #ifndef __REDUCESUM_H__
#define __REDUCESUM_H__ #define __REDUCESUM_H__
#include "../XTensor.h" #include "../../XTensor.h"
namespace nts{ // namespace nts(NiuTrans.Tensor) namespace nts{ // namespace nts(NiuTrans.Tensor)
......
...@@ -28,7 +28,6 @@ namespace nts{ // namespace nts(NiuTrans.Tensor) ...@@ -28,7 +28,6 @@ namespace nts{ // namespace nts(NiuTrans.Tensor)
squared sum of the items along a dimension of the tensor. squared sum of the items along a dimension of the tensor.
For a 1-dimensional data array a, For a 1-dimensional data array a,
sum = \sum_i (a_i - shift)^2 sum = \sum_i (a_i - shift)^2
>> input - the input tensor >> input - the input tensor
>> output - the output tensor >> output - the output tensor
>> dim - the dimension where the reduction is performed on >> dim - the dimension where the reduction is performed on
......
...@@ -22,7 +22,7 @@ ...@@ -22,7 +22,7 @@
#ifndef __REDUCESUMSQUARED_H__ #ifndef __REDUCESUMSQUARED_H__
#define __REDUCESUMSQUARED_H__ #define __REDUCESUMSQUARED_H__
#include "../XTensor.h" #include "../../XTensor.h"
namespace nts{ // namespace nts(NiuTrans.Tensor) namespace nts{ // namespace nts(NiuTrans.Tensor)
......
...@@ -19,7 +19,7 @@ ...@@ -19,7 +19,7 @@
* $Created by: XIAO Tong (email: xiaotong@mail.neu.edu.cn) 2018-04-24 * $Created by: XIAO Tong (email: xiaotong@mail.neu.edu.cn) 2018-04-24
*/ */
#include "ScaleAndShift.h" #include "../math/ScaleAndShift.h"
#include "ReduceSum.h" #include "ReduceSum.h"
#include "ReduceVariance.h" #include "ReduceVariance.h"
...@@ -29,7 +29,6 @@ namespace nts{ // namespace nts(NiuTrans.Tensor) ...@@ -29,7 +29,6 @@ namespace nts{ // namespace nts(NiuTrans.Tensor)
variance of the items along a dimension of the tensor. variance of the items along a dimension of the tensor.
For a 1-dimensional data array a, For a 1-dimensional data array a,
variance = 1/n * \sum_i (a_i - mean)^2 variance = 1/n * \sum_i (a_i - mean)^2
>> input - the input tensor >> input - the input tensor
>> output - the output tensor >> output - the output tensor
>> dim - the dimension where the reduction is performed on >> dim - the dimension where the reduction is performed on
......
...@@ -22,7 +22,7 @@ ...@@ -22,7 +22,7 @@
#ifndef __REDUCEVARIANCE_H__ #ifndef __REDUCEVARIANCE_H__
#define __REDUCEVARIANCE_H__ #define __REDUCEVARIANCE_H__
#include "../XTensor.h" #include "../../XTensor.h"
namespace nts{ // namespace nts(NiuTrans.Tensor) namespace nts{ // namespace nts(NiuTrans.Tensor)
......
...@@ -19,7 +19,7 @@ ...@@ -19,7 +19,7 @@
* $Created by: XIAO Tong (email: xiaotong@mail.neu.edu.cn) 2018-04-24 * $Created by: XIAO Tong (email: xiaotong@mail.neu.edu.cn) 2018-04-24
*/ */
#include "../XTensor.h" #include "../../XTensor.h"
#include "Concatenate.h" #include "Concatenate.h"
#include "Merge.h" #include "Merge.h"
#include "ConcatenateSolely.h" #include "ConcatenateSolely.h"
...@@ -53,6 +53,10 @@ void Concatenate(XList * smalls, XTensor * big, int dim) ...@@ -53,6 +53,10 @@ void Concatenate(XList * smalls, XTensor * big, int dim)
/* /*
concatenate two tensors along a given dimension concatenate two tensors along a given dimension
>> smallA - one tensor for concatenation
>> smallB - the other tensor for concatenation
>> big - the resulting tensor
>> dim - which dimension we perform the concatenation
*/ */
void Concatenate(XTensor * smallA, XTensor * smallB, XTensor * big, int dim) void Concatenate(XTensor * smallA, XTensor * smallB, XTensor * big, int dim)
{ {
......
...@@ -22,14 +22,15 @@ ...@@ -22,14 +22,15 @@
#ifndef __CONCATENATE_H__ #ifndef __CONCATENATE_H__
#define __CONCATENATE_H__ #define __CONCATENATE_H__
#include "../XTensor.h" #include "../../XTensor.h"
namespace nts { // namespace nts(NiuTrans.Tensor) namespace nts { // namespace nts(NiuTrans.Tensor)
/* /*
concatenate a list of tensors along a given dimension concatenate a list of tensors along a given dimension
Note that this is actually a wrapper that selects "ConcatenateSolely" Note that this is actually a wrapper that selects "ConcatenateSolely"
or "Merge" by means of the tensor shapes */ or "Merge" by means of the tensor shapes
*/
void Concatenate(XList * smalls, XTensor * big, int dim); void Concatenate(XList * smalls, XTensor * big, int dim);
/* concatenate two tensors along a given dimension */ /* concatenate two tensors along a given dimension */
......
...@@ -19,9 +19,9 @@ ...@@ -19,9 +19,9 @@
* $Created by: XIAO Tong (email: xiaotong@mail.neu.edu.cn) 2018-04-24 * $Created by: XIAO Tong (email: xiaotong@mail.neu.edu.cn) 2018-04-24
*/ */
#include "../XTensor.h" #include "../../XTensor.h"
#include "../XUtility.h" #include "../../XUtility.h"
#include "../XName.h" #include "../../XName.h"
#include "ConcatenateSolely.h" #include "ConcatenateSolely.h"
#include "MergeBlockLists.h" #include "MergeBlockLists.h"
...@@ -69,9 +69,11 @@ void ConcatenateSolely(XList * smalls, XTensor * big, int dim) ...@@ -69,9 +69,11 @@ void ConcatenateSolely(XList * smalls, XTensor * big, int dim)
int offset = 0; int offset = 0;
/* two strategies are used - we can either resort to memcpy2d for the case of /*
two strategies are used - we can either resort to memcpy2d for the case of
concatenation of a few items, or use MergeBlockLists to merge a large number concatenation of a few items, or use MergeBlockLists to merge a large number
of data blocks */ of data blocks
*/
if (smalls->count <= MIN_TENSOR_CAT_NUM) { if (smalls->count <= MIN_TENSOR_CAT_NUM) {
for (int i = 0; i < smalls->count; i++) { for (int i = 0; i < smalls->count; i++) {
XTensor * tensor = (XTensor*)smalls->GetItem(i); XTensor * tensor = (XTensor*)smalls->GetItem(i);
......
...@@ -22,11 +22,10 @@ ...@@ -22,11 +22,10 @@
#ifndef __CONCATENATESOLELY_H__ #ifndef __CONCATENATESOLELY_H__
#define __CONCATENATESOLELY_H__ #define __CONCATENATESOLELY_H__
#include "../XTensor.h" #include "../../XTensor.h"
namespace nts { // namespace nts(NiuTrans.Tensor) namespace nts { // namespace nts(NiuTrans.Tensor)
/* concatenate a list of tensors along a given dimension */ /* concatenate a list of tensors along a given dimension */
extern "C" extern "C"
void ConcatenateSolely(XList * smalls, XTensor * big, int dim); void ConcatenateSolely(XList * smalls, XTensor * big, int dim);
......
...@@ -19,7 +19,7 @@ ...@@ -19,7 +19,7 @@
* $Created by: XIAO Tong (email: xiaotong@mail.neu.edu.cn) 2018-04-24 * $Created by: XIAO Tong (email: xiaotong@mail.neu.edu.cn) 2018-04-24
*/ */
#include "../XTensor.h" #include "../../XTensor.h"
#include "MakeMergeBlockIndex.h" #include "MakeMergeBlockIndex.h"
#include "MakeMergeBlockIndex.cuh" #include "MakeMergeBlockIndex.cuh"
......
...@@ -19,8 +19,8 @@ ...@@ -19,8 +19,8 @@
* $Created by: XIAO Tong (email: xiaotong@mail.neu.edu.cn) 2018-04-24 * $Created by: XIAO Tong (email: xiaotong@mail.neu.edu.cn) 2018-04-24
*/ */
#include "../XDevice.h" #include "../../XDevice.h"
#include "../XTensor.h" #include "../../XTensor.h"
#include "MakeMergeBlockIndex.h" #include "MakeMergeBlockIndex.h"
#include "MakeMergeBlockIndex.cuh" #include "MakeMergeBlockIndex.cuh"
......
...@@ -22,7 +22,7 @@ ...@@ -22,7 +22,7 @@
#ifndef __CUDAMAKEMERGEBLOCKINDEX_CUH__ #ifndef __CUDAMAKEMERGEBLOCKINDEX_CUH__
#define __CUDAMAKEMERGEBLOCKINDEX_CUH__ #define __CUDAMAKEMERGEBLOCKINDEX_CUH__
#include "../XTensor.h" #include "../../XTensor.h"
namespace nts { // namespace nts(NiuTrans.Tensor) namespace nts { // namespace nts(NiuTrans.Tensor)
......
...@@ -22,7 +22,7 @@ ...@@ -22,7 +22,7 @@
#ifndef __MAKEMERGEBLOCKINDEX_H__ #ifndef __MAKEMERGEBLOCKINDEX_H__
#define __MAKEMERGEBLOCKINDEX_H__ #define __MAKEMERGEBLOCKINDEX_H__
#include "../XTensor.h" #include "../../XTensor.h"
namespace nts { // namespace nts(NiuTrans.Tensor) namespace nts { // namespace nts(NiuTrans.Tensor)
......
...@@ -19,11 +19,12 @@ ...@@ -19,11 +19,12 @@
* $Created by: XIAO Tong (email: xiaotong@mail.neu.edu.cn) 2018-04-24 * $Created by: XIAO Tong (email: xiaotong@mail.neu.edu.cn) 2018-04-24
*/ */
#include "../XTensor.h" #include "../../XTensor.h"
#include "MakeSplitBlockIndex.h" #include "MakeSplitBlockIndex.h"
#include "MakeSplitBlockIndex.cuh" #include "MakeSplitBlockIndex.cuh"
namespace nts { // namespace nts(NiuTrans.Tensor) namespace nts { // namespace nts(NiuTrans.Tensor)
/* /*
set target data block index for the data movement in split set target data block index for the data movement in split
>> blockIndex - block index >> blockIndex - block index
......
...@@ -19,8 +19,8 @@ ...@@ -19,8 +19,8 @@
* $Created by: XIAO Tong (email: xiaotong@mail.neu.edu.cn) 2018-04-24 * $Created by: XIAO Tong (email: xiaotong@mail.neu.edu.cn) 2018-04-24
*/ */
#include "../XDevice.h" #include "../../XDevice.h"
#include "../XTensor.h" #include "../../XTensor.h"
#include "MakeSplitBlockIndex.h" #include "MakeSplitBlockIndex.h"
#include "MakeSplitBlockIndex.cuh" #include "MakeSplitBlockIndex.cuh"
...@@ -51,6 +51,7 @@ void KernelMakeSplitBlockIndex(int * blockIndex, int splitNum, int blockSplitSiz ...@@ -51,6 +51,7 @@ void KernelMakeSplitBlockIndex(int * blockIndex, int splitNum, int blockSplitSiz
/* /*
set target data block index for the data movement in split set target data block index for the data movement in split
>> devID - device id
>> blockIndex - block index >> blockIndex - block index
>> splitNum - number of splits >> splitNum - number of splits
>> blockSplitSize - size of the splitted block >> blockSplitSize - size of the splitted block
......
...@@ -22,7 +22,7 @@ ...@@ -22,7 +22,7 @@
#ifndef __MAKESPLITBLOCKINDEX_H__ #ifndef __MAKESPLITBLOCKINDEX_H__
#define __MAKESPLITBLOCKINDEX_H__ #define __MAKESPLITBLOCKINDEX_H__
#include "../XTensor.h" #include "../../XTensor.h"
namespace nts { // namespace nts(NiuTrans.Tensor) namespace nts { // namespace nts(NiuTrans.Tensor)
......
...@@ -19,16 +19,15 @@ ...@@ -19,16 +19,15 @@
* $Created by: XIAO Tong (email: xiaotong@mail.neu.edu.cn) 2018-04-24 * $Created by: XIAO Tong (email: xiaotong@mail.neu.edu.cn) 2018-04-24
*/ */
#include "../XTensor.h" #include "../../XTensor.h"
#include "../XUtility.h" #include "../../XUtility.h"
#include "../XName.h" #include "../../XName.h"
#include "Merge.h" #include "Merge.h"
#include "MakeMergeBlockIndex.h" #include "MakeMergeBlockIndex.h"
#include "CopyBlocksOnSite.h" #include "../movement/CopyBlocksOnSite.h"
namespace nts { // namespace nts(NiuTrans.Tensor) namespace nts { // namespace nts(NiuTrans.Tensor)
/* /*
transform a tensor by merging it alone with a dimension, e.g., (N/3, M, 3) -> (N, M) transform a tensor by merging it alone with a dimension, e.g., (N/3, M, 3) -> (N, M)
>> s - the source tensor >> s - the source tensor
......
...@@ -22,7 +22,7 @@ ...@@ -22,7 +22,7 @@
#ifndef __MERGE_H__ #ifndef __MERGE_H__
#define __MERGE_H__ #define __MERGE_H__
#include "../XTensor.h" #include "../../XTensor.h"
namespace nts { // namespace nts(NiuTrans.Tensor) namespace nts { // namespace nts(NiuTrans.Tensor)
......
...@@ -19,8 +19,8 @@ ...@@ -19,8 +19,8 @@
* $Created by: XIAO Tong (email: xiaotong@mail.neu.edu.cn) 2018-04-24 * $Created by: XIAO Tong (email: xiaotong@mail.neu.edu.cn) 2018-04-24
*/ */
#include "../XTensor.h" #include "../../XTensor.h"
#include "../XUtility.h" #include "../../XUtility.h"
#include "MergeBlockLists.h" #include "MergeBlockLists.h"
#include "MergeBlockLists.cuh" #include "MergeBlockLists.cuh"
......
...@@ -19,9 +19,9 @@ ...@@ -19,9 +19,9 @@
* $Created by: XIAO Tong (email: xiaotong@mail.neu.edu.cn) 2018-04-24 * $Created by: XIAO Tong (email: xiaotong@mail.neu.edu.cn) 2018-04-24
*/ */
#include "../XDevice.h" #include "../../XDevice.h"
#include "../XUtility.h" #include "../../XUtility.h"
#include "../XTensor.h" #include "../../XTensor.h"
#include "MergeBlockLists.h" #include "MergeBlockLists.h"
#include "MergeBlockLists.cuh" #include "MergeBlockLists.cuh"
...@@ -34,10 +34,9 @@ copy a number of blocks (of different sizes) to target positions ...@@ -34,10 +34,9 @@ copy a number of blocks (of different sizes) to target positions
>> sourceBlockSizes - the size of the block_i >> sourceBlockSizes - the size of the block_i
>> sourceBlockNum - number of blocks to merge >> sourceBlockNum - number of blocks to merge
>> targetList - list of data arrays to copy to >> targetList - list of data arrays to copy to
>> target - target data array
*/ */
__global__ __global__
void KernelCopyBlockLists(DTYPE * sourceList[], int * sourceBlockSizes, int sourceBlockNum, DTYPE * targetList[]) void KernelCopyBlockLists(DTYPE * sourceList[], int * sourceBlockSizes, int sourceBlockNum, DTYPE * targetList[])
{ {
__shared__ int iBlockSizes[MAX_CUDA_THREAD_NUM_PER_BLOCK]; __shared__ int iBlockSizes[MAX_CUDA_THREAD_NUM_PER_BLOCK];
__shared__ DTYPE * iSourceList[MAX_CUDA_THREAD_NUM_PER_BLOCK]; __shared__ DTYPE * iSourceList[MAX_CUDA_THREAD_NUM_PER_BLOCK];
...@@ -82,7 +81,6 @@ void CudaMergeBlockLists(XList * sourceList, int * blockSizes, int blockNum, voi ...@@ -82,7 +81,6 @@ void CudaMergeBlockLists(XList * sourceList, int * blockSizes, int blockNum, voi
int minBlockSize = MAX_INT; int minBlockSize = MAX_INT;
int maxBlockSize = -MAX_INT; int maxBlockSize = -MAX_INT;
//int realMinBlockSize = 1;
int realMaxBlockSize = 1; int realMaxBlockSize = 1;
DTYPE ** sourceArrays = new DTYPE*[newBlockListSize]; DTYPE ** sourceArrays = new DTYPE*[newBlockListSize];
DTYPE ** targetArrays = new DTYPE*[newBlockListSize]; DTYPE ** targetArrays = new DTYPE*[newBlockListSize];
...@@ -110,7 +108,6 @@ void CudaMergeBlockLists(XList * sourceList, int * blockSizes, int blockNum, voi ...@@ -110,7 +108,6 @@ void CudaMergeBlockLists(XList * sourceList, int * blockSizes, int blockNum, voi
CheckNTErrors((minBlockSize % sizeof(DTYPE) == 0), "Unsupported block size!"); CheckNTErrors((minBlockSize % sizeof(DTYPE) == 0), "Unsupported block size!");
CheckNTErrors((maxBlockSize % sizeof(DTYPE) == 0), "Unsupported block size!"); CheckNTErrors((maxBlockSize % sizeof(DTYPE) == 0), "Unsupported block size!");
//realMinBlockSize = minBlockSize/sizeof(DTYPE);
realMaxBlockSize = maxBlockSize / sizeof(DTYPE); realMaxBlockSize = maxBlockSize / sizeof(DTYPE);
int cudaGridSizes[3]; int cudaGridSizes[3];
...@@ -120,31 +117,16 @@ void CudaMergeBlockLists(XList * sourceList, int * blockSizes, int blockNum, voi ...@@ -120,31 +117,16 @@ void CudaMergeBlockLists(XList * sourceList, int * blockSizes, int blockNum, voi
cudaGridSizes, cudaBlockSizes); cudaGridSizes, cudaBlockSizes);
myMem->SetPinBuf(); myMem->SetPinBuf();
//MTYPE offset0 = myMem->bufUsed;
int * sizesGPU = (int*)myMem->AllocBuf(myMem->devID, sizeof(int) * newBlockListSize, 256); int * sizesGPU = (int*)myMem->AllocBuf(myMem->devID, sizeof(int) * newBlockListSize, 256);
//MTYPE offset1 = myMem->bufUsed;
DTYPE ** sourceArraysGPU = (DTYPE**)myMem->AllocBuf(myMem->devID, sizeof(DTYPE*) * newBlockListSize, 256); DTYPE ** sourceArraysGPU = (DTYPE**)myMem->AllocBuf(myMem->devID, sizeof(DTYPE*) * newBlockListSize, 256);
//MTYPE offset2 = myMem->bufUsed;
DTYPE ** targetArraysGPU = (DTYPE**)myMem->AllocBuf(myMem->devID, sizeof(DTYPE*) * newBlockListSize, 256); DTYPE ** targetArraysGPU = (DTYPE**)myMem->AllocBuf(myMem->devID, sizeof(DTYPE*) * newBlockListSize, 256);
//MTYPE bufSize = myMem->bufUsed - offset0;
//char * CPUBuf = new char[bufSize];
//memset(CPUBuf, 0 , bufSize);
//memcpy(CPUBuf, sizes, sizeof(int) * newBlockListSize);
//memcpy(CPUBuf + (offset1 - offset0), sourceArrays, sizeof(DTYPE*) * newBlockListSize);
//memcpy(CPUBuf + (offset2 - offset0), targetArrays, sizeof(DTYPE*) * newBlockListSize);
XMemCopy(sizesGPU, myMem->devID, sizes, -1, sizeof(int) * newBlockListSize); XMemCopy(sizesGPU, myMem->devID, sizes, -1, sizeof(int) * newBlockListSize);
XMemCopy(sourceArraysGPU, myMem->devID, sourceArrays, -1, sizeof(DTYPE*) * newBlockListSize); XMemCopy(sourceArraysGPU, myMem->devID, sourceArrays, -1, sizeof(DTYPE*) * newBlockListSize);
XMemCopy(targetArraysGPU, myMem->devID, targetArrays, -1, sizeof(DTYPE*) * newBlockListSize); XMemCopy(targetArraysGPU, myMem->devID, targetArrays, -1, sizeof(DTYPE*) * newBlockListSize);
/* it is VERY tricky here because we squeeze three data copies into one */
//XMemCopy(sizesGPU, myMem->devID, CPUBuf, -1, bufSize);
KernelCopyBlockLists << <dim3(cudaGridSizes[0], cudaGridSizes[1]), dim3(cudaBlockSizes[0], cudaBlockSizes[1]) >> > KernelCopyBlockLists << <dim3(cudaGridSizes[0], cudaGridSizes[1]), dim3(cudaBlockSizes[0], cudaBlockSizes[1]) >> >
(sourceArraysGPU, sizesGPU, newBlockListSize, targetArraysGPU); (sourceArraysGPU, sizesGPU, newBlockListSize, targetArraysGPU);
...@@ -154,7 +136,6 @@ void CudaMergeBlockLists(XList * sourceList, int * blockSizes, int blockNum, voi ...@@ -154,7 +136,6 @@ void CudaMergeBlockLists(XList * sourceList, int * blockSizes, int blockNum, voi
delete[] targetArrays; delete[] targetArrays;
delete[] sizes; delete[] sizes;
delete[] offsets; delete[] offsets;
//delete[] CPUBuf;
} }
#endif // USE_CUDA #endif // USE_CUDA
......
...@@ -22,7 +22,7 @@ ...@@ -22,7 +22,7 @@
#ifndef __MERGEBLOCKLISTS_H__ #ifndef __MERGEBLOCKLISTS_H__
#define __MERGEBLOCKLISTS_H__ #define __MERGEBLOCKLISTS_H__
#include "../XTensor.h" #include "../../XTensor.h"
namespace nts { // namespace nts(NiuTrans.Tensor) namespace nts { // namespace nts(NiuTrans.Tensor)
......
...@@ -22,7 +22,7 @@ ...@@ -22,7 +22,7 @@
#ifndef __PERMUTE_H__ #ifndef __PERMUTE_H__
#define __PERMUTE_H__ #define __PERMUTE_H__
#include "../XTensor.h" #include "../../XTensor.h"
namespace nts { // namespace nts(NiuTrans.Tensor) namespace nts { // namespace nts(NiuTrans.Tensor)
......
...@@ -19,11 +19,11 @@ ...@@ -19,11 +19,11 @@
* $Created by: XIAO Tong (email: xiaotong@mail.neu.edu.cn) 2018-04-24 * $Created by: XIAO Tong (email: xiaotong@mail.neu.edu.cn) 2018-04-24
*/ */
#include "../XTensor.h" #include "../../XTensor.h"
#include "../XUtility.h" #include "../../XUtility.h"
#include "Split.h" #include "Split.h"
#include "MakeSplitBlockIndex.h" #include "MakeSplitBlockIndex.h"
#include "CopyBlocksOnSite.h" #include "../movement/CopyBlocksOnSite.h"
namespace nts { // namespace nts(NiuTrans.Tensor) namespace nts { // namespace nts(NiuTrans.Tensor)
......
...@@ -22,11 +22,12 @@ ...@@ -22,11 +22,12 @@
#ifndef __SPLIT_H__ #ifndef __SPLIT_H__
#define __SPLIT_H__ #define __SPLIT_H__
#include "../XTensor.h" #include "../../XTensor.h"
namespace nts { // namespace nts(NiuTrans.Tensor) namespace nts { // namespace nts(NiuTrans.Tensor)
/* transform a tensor by splitting it, e.g., (M, N) -> (M, N/3, 3) */ /* transform a tensor by splitting it, e.g., (M, N) -> (M, N/3, 3) */
extern "C"
void Split(XTensor * s, XTensor * t, int whereToSplit, int splitNum); void Split(XTensor * s, XTensor * t, int whereToSplit, int splitNum);
/* split a big tensor into small tensors */ /* split a big tensor into small tensors */
......
...@@ -23,7 +23,7 @@ ...@@ -23,7 +23,7 @@
#ifndef __TRANSPOSE_H__ #ifndef __TRANSPOSE_H__
#define __TRANSPOSE_H__ #define __TRANSPOSE_H__
#include "../XTensor.h" #include "../../XTensor.h"
namespace nts { // namespace nts(NiuTrans.Tensor) namespace nts { // namespace nts(NiuTrans.Tensor)
......
...@@ -19,8 +19,8 @@ ...@@ -19,8 +19,8 @@
* $Created by: XIAO Tong (email: xiaotong@mail.neu.edu.cn) 2018-04-24 * $Created by: XIAO Tong (email: xiaotong@mail.neu.edu.cn) 2018-04-24
*/ */
#include "../XTensor.h" #include "../../XTensor.h"
#include "../XName.h" #include "../../XName.h"
#include "Unsqueeze.h" #include "Unsqueeze.h"
#include "MergeBlockLists.h" #include "MergeBlockLists.h"
#include "Unsqueeze.cuh" #include "Unsqueeze.cuh"
......
...@@ -19,8 +19,8 @@ ...@@ -19,8 +19,8 @@
* $Created by: XIAO Tong (email: xiaotong@mail.neu.edu.cn) 2018-04-24 * $Created by: XIAO Tong (email: xiaotong@mail.neu.edu.cn) 2018-04-24
*/ */
#include "../XDevice.h" #include "../../XDevice.h"
#include "../XTensor.h" #include "../../XTensor.h"
#include "Unsqueeze.h" #include "Unsqueeze.h"
#include "Unsqueeze.cuh" #include "Unsqueeze.cuh"
......
...@@ -22,7 +22,7 @@ ...@@ -22,7 +22,7 @@
#ifndef __UNSQUEEZE_H__ #ifndef __UNSQUEEZE_H__
#define __UNSQUEEZE_H__ #define __UNSQUEEZE_H__
#include "../XTensor.h" #include "../../XTensor.h"
namespace nts { // namespace nts(NiuTrans.Tensor) namespace nts { // namespace nts(NiuTrans.Tensor)
......
...@@ -19,9 +19,9 @@ ...@@ -19,9 +19,9 @@
* $Created by: XIAO Tong (email: xiaotong@mail.neu.edu.cn) 2018-04-24 * $Created by: XIAO Tong (email: xiaotong@mail.neu.edu.cn) 2018-04-24
*/ */
#include "../XTensor.h" #include "../../XTensor.h"
#include "../XUtility.h" #include "../../XUtility.h"
#include "../XName.h" #include "../../XName.h"
#include "Sort.h" #include "Sort.h"
#include "Sort.cuh" #include "Sort.cuh"
......
...@@ -19,9 +19,9 @@ ...@@ -19,9 +19,9 @@
* $Created by: XIAO Tong (email: xiaotong@mail.neu.edu.cn) 2018-04-24 * $Created by: XIAO Tong (email: xiaotong@mail.neu.edu.cn) 2018-04-24
*/ */
#include "../XDevice.h" #include "../../XDevice.h"
#include "../XUtility.h" #include "../../XUtility.h"
#include "../XTensor.h" #include "../../XTensor.h"
#include "Sort.h" #include "Sort.h"
#include "Sort.cuh" #include "Sort.cuh"
...@@ -235,13 +235,16 @@ void CudaSortBig(XTensor * a, XTensor * b, XTensor * indexA, XTensor * indexB, i ...@@ -235,13 +235,16 @@ void CudaSortBig(XTensor * a, XTensor * b, XTensor * indexA, XTensor * indexB, i
int m = GetNextPower2(strideNum); int m = GetNextPower2(strideNum);
int n = stride * blockNum; int n = stride * blockNum;
void * buf = mem->AllocBuf(mem->devID, n * m * a->unitSize); void * buf = mem != NULL ? mem->AllocBuf(a->devID, n * m * a->unitSize) : XMemAlloc(a->devID, n * m * a->unitSize);
void * bufIndex = (indexA != NULL && indexB != NULL) ? mem->AllocBuf(mem->devID, n * m * sizeof(int)) : NULL; void * bufIndex = NULL;
if (indexA != NULL && indexB != NULL) {
bufIndex = mem != NULL ? mem->AllocBuf(a->devID, n * m * sizeof(int)) : XMemAlloc(a->devID, n * m * sizeof(int));
}
int cudaGrids[3]; int cudaGrids[3];
int cudaBlocks[3]; int cudaBlocks[3];
GDevs.GetCudaThread(mem->devID, m * n, cudaGrids, cudaBlocks); GDevs.GetCudaThread(a->devID, m * n, cudaGrids, cudaBlocks);
int devIDBackup; int devIDBackup;
ProtectCudaDev(a->devID, devIDBackup); ProtectCudaDev(a->devID, devIDBackup);
...@@ -250,7 +253,7 @@ void CudaSortBig(XTensor * a, XTensor * b, XTensor * indexA, XTensor * indexB, i ...@@ -250,7 +253,7 @@ void CudaSortBig(XTensor * a, XTensor * b, XTensor * indexA, XTensor * indexB, i
KernelSetDataArray<DTYPE> << <dim3(cudaGrids[0]), dim3(cudaBlocks[0]) >> > KernelSetDataArray<DTYPE> << <dim3(cudaGrids[0]), dim3(cudaBlocks[0]) >> >
((DTYPE*)buf, DTYPE_MIN, m * n); ((DTYPE*)buf, DTYPE_MIN, m * n);
GDevs.GetCudaThread2D(mem->devID, strideNum, n, MAX_INT, cudaGrids, cudaBlocks); GDevs.GetCudaThread2D(a->devID, strideNum, n, MAX_INT, cudaGrids, cudaBlocks);
/* reorganize the data into a matrix */ /* reorganize the data into a matrix */
KernelReorganize<DTYPE> << <dim3(cudaGrids[1], cudaGrids[0]), dim3(cudaBlocks[1], cudaBlocks[0]) >> > KernelReorganize<DTYPE> << <dim3(cudaGrids[1], cudaGrids[0]), dim3(cudaBlocks[1], cudaBlocks[0]) >> >
...@@ -261,7 +264,7 @@ void CudaSortBig(XTensor * a, XTensor * b, XTensor * indexA, XTensor * indexB, i ...@@ -261,7 +264,7 @@ void CudaSortBig(XTensor * a, XTensor * b, XTensor * indexA, XTensor * indexB, i
KernelReorganize<int> << <dim3(cudaGrids[1], cudaGrids[0]), dim3(cudaBlocks[1], cudaBlocks[0]) >> > KernelReorganize<int> << <dim3(cudaGrids[1], cudaGrids[0]), dim3(cudaBlocks[1], cudaBlocks[0]) >> >
(indexA->data, bufIndex, stride, strideNum, blockNum, m, n); (indexA->data, bufIndex, stride, strideNum, blockNum, m, n);
GDevs.GetCudaThread2D(mem->devID, m, n, MAX_INT, cudaGrids, cudaBlocks); GDevs.GetCudaThread2D(a->devID, m, n, MAX_INT, cudaGrids, cudaBlocks);
/* bitonic sorting */ /* bitonic sorting */
for (int i = 2; i <= m; i <<= 1) { for (int i = 2; i <= m; i <<= 1) {
...@@ -277,7 +280,7 @@ void CudaSortBig(XTensor * a, XTensor * b, XTensor * indexA, XTensor * indexB, i ...@@ -277,7 +280,7 @@ void CudaSortBig(XTensor * a, XTensor * b, XTensor * indexA, XTensor * indexB, i
} }
} }
GDevs.GetCudaThread2D(mem->devID, k, n, MAX_INT, cudaGrids, cudaBlocks); GDevs.GetCudaThread2D(a->devID, k, n, MAX_INT, cudaGrids, cudaBlocks);
/* copy result to the output tensor */ /* copy result to the output tensor */
KernelReorganizeBack<DTYPE> << <dim3(cudaGrids[1], cudaGrids[0]), dim3(cudaBlocks[1], cudaBlocks[0]) >> > KernelReorganizeBack<DTYPE> << <dim3(cudaGrids[1], cudaGrids[0]), dim3(cudaBlocks[1], cudaBlocks[0]) >> >
...@@ -287,9 +290,15 @@ void CudaSortBig(XTensor * a, XTensor * b, XTensor * indexA, XTensor * indexB, i ...@@ -287,9 +290,15 @@ void CudaSortBig(XTensor * a, XTensor * b, XTensor * indexA, XTensor * indexB, i
KernelReorganizeBack<int> << <dim3(cudaGrids[1], cudaGrids[0]), dim3(cudaBlocks[1], cudaBlocks[0]) >> > KernelReorganizeBack<int> << <dim3(cudaGrids[1], cudaGrids[0]), dim3(cudaBlocks[1], cudaBlocks[0]) >> >
(bufIndex, indexB->data, m, n, stride, k, blockNum); (bufIndex, indexB->data, m, n, stride, k, blockNum);
mem->ReleaseBuf(mem->devID, n * m * a->unitSize); if (mem != NULL)
mem->ReleaseBuf(a->devID, n * m * a->unitSize);
else
XMemFree(a->devID, buf);
if (indexA != NULL && indexB != NULL) if (indexA != NULL && indexB != NULL)
mem->ReleaseBuf(mem->devID, n * m * sizeof(int)); if (mem != NULL)
mem->ReleaseBuf(a->devID, n * m * sizeof(int));
else
XMemFree(a->devID, bufIndex);
ProtectCudaDev(a->devID, devIDBackup); ProtectCudaDev(a->devID, devIDBackup);
} }
......
...@@ -29,6 +29,7 @@ namespace nts { // namespace nts(NiuTrans.Tensor) ...@@ -29,6 +29,7 @@ namespace nts { // namespace nts(NiuTrans.Tensor)
#ifdef USE_CUDA #ifdef USE_CUDA
/* sort the tensor along a given dimension */ /* sort the tensor along a given dimension */
extern "C"
void CudaSortBig(XTensor * a, XTensor * b, XTensor * indexA, XTensor * indexB, int dim, int k = -1); void CudaSortBig(XTensor * a, XTensor * b, XTensor * indexA, XTensor * indexB, int dim, int k = -1);
#endif // USE_CUDA #endif // USE_CUDA
......
...@@ -22,7 +22,7 @@ ...@@ -22,7 +22,7 @@
#ifndef __SORT_H__ #ifndef __SORT_H__
#define __SORT_H__ #define __SORT_H__
#include "../XTensor.h" #include "../../XTensor.h"
namespace nts { // namespace nts(NiuTrans.Tensor) namespace nts { // namespace nts(NiuTrans.Tensor)
......
...@@ -19,12 +19,13 @@ ...@@ -19,12 +19,13 @@
* $Created by: XIAO Tong (email: xiaotong@mail.neu.edu.cn) 2018-04-24 * $Created by: XIAO Tong (email: xiaotong@mail.neu.edu.cn) 2018-04-24
*/ */
#include "../XTensor.h" #include "../../XTensor.h"
#include "../XName.h" #include "../../XName.h"
#include "TopK.h" #include "TopK.h"
#include "TopK.cuh" #include "TopK.cuh"
namespace nts { // namespace nts(NiuTrans.Tensor) namespace nts { // namespace nts(NiuTrans.Tensor)
/* /*
get the top-k items along a given dimension get the top-k items along a given dimension
>> a - input tensor >> a - input tensor
......
...@@ -19,8 +19,9 @@ ...@@ -19,8 +19,9 @@
* $Created by: XIAO Tong (email: xiaotong@mail.neu.edu.cn) 2018-04-24 * $Created by: XIAO Tong (email: xiaotong@mail.neu.edu.cn) 2018-04-24
*/ */
#include "../XDevice.h" #include "../../XDevice.h"
#include "../XTensor.h" #include "../../XUtility.h"
#include "../../XTensor.h"
#include "TopK.h" #include "TopK.h"
#include "TopK.cuh" #include "TopK.cuh"
#include "Sort.cuh" #include "Sort.cuh"
...@@ -94,9 +95,6 @@ public: ...@@ -94,9 +95,6 @@ public:
/* swap */ /* swap */
__device__ void Swap(int i, int j) __device__ void Swap(int i, int j)
{ {
/*CudaHeapNode<T> tmp = items[i];
items[i] = items[j];
items[j] = tmp;*/
int tmpIndex = items[i].index; int tmpIndex = items[i].index;
T tmpValue = items[i].value; T tmpValue = items[i].value;
items[i] = items[j]; items[i] = items[j];
...@@ -238,8 +236,10 @@ void KernelTopK(T * input, int stride, int strideNum, int blockNum, int k, T min ...@@ -238,8 +236,10 @@ void KernelTopK(T * input, int stride, int strideNum, int blockNum, int k, T min
if (threadIdx.x == 0) { if (threadIdx.x == 0) {
CudaXHeap<MIN_HEAP, T> heapFinal(k, k, heapData + k * threadIdx.y * blockDim.x); CudaXHeap<MIN_HEAP, T> heapFinal(k, k, heapData + k * threadIdx.y * blockDim.x);
/* merge the result over the workers. /*
This can be improved by parallel merging */ merge the result over the workers.
This can be improved by parallel merging
*/
if (blockDim.x > 1) { if (blockDim.x > 1) {
for (int p = 1; p < blockDim.x && p < strideNum; p++) { for (int p = 1; p < blockDim.x && p < strideNum; p++) {
CudaHeapNode<T> * hd = heapData + k * (threadIdx.y * blockDim.x + p); CudaHeapNode<T> * hd = heapData + k * (threadIdx.y * blockDim.x + p);
...@@ -393,7 +393,7 @@ void CudaTopK(XTensor * a, XTensor * b, XTensor * index, int dim, int k) ...@@ -393,7 +393,7 @@ void CudaTopK(XTensor * a, XTensor * b, XTensor * index, int dim, int k)
int cudaGrids[3]; int cudaGrids[3];
int cudaBlocks[3]; int cudaBlocks[3];
GDevs.GetCudaThread2D(a->mem->devID, GDevs.GetCudaThread2D(a->devID,
workerNum, stride * blockNum, MAX_INT, workerNum, stride * blockNum, MAX_INT,
cudaGrids, cudaBlocks); cudaGrids, cudaBlocks);
...@@ -434,13 +434,14 @@ void CudaTopK(XTensor * a, XTensor * b, XTensor * index, int dim, int k) ...@@ -434,13 +434,14 @@ void CudaTopK(XTensor * a, XTensor * b, XTensor * index, int dim, int k)
memcpy(dimSize, a->dimSize, sizeof(int) * a->order); memcpy(dimSize, a->dimSize, sizeof(int) * a->order);
dimSize[0] = -dimSize[0]; dimSize[0] = -dimSize[0];
XTensor * indexA = new XTensor(a->order, dimSize, X_INT, 1.0F, a->mem); XTensor * indexA = new XTensor(a->order, dimSize, X_INT, 1.0F, a->mem);
indexA->data = a->mem->AllocBuf(a->devID, a->unitNum * sizeof(int)); indexA->data = a->mem != NULL ? a->mem->AllocBuf(a->devID, a->unitNum * sizeof(int)) : XMemAlloc(a->devID, a->unitNum * sizeof(int));
/* make the index tensor */ /* make the index tensor */
indexA->SetAscendingOrder(dim); indexA->SetAscendingOrder(dim);
CudaSortBig(a, b, indexA, index, dim, k); CudaSortBig(a, b, indexA, index, dim, k);
if (a->mem != NULL)
a->mem->ReleaseBuf(a->devID, a->unitNum * sizeof(int)); a->mem->ReleaseBuf(a->devID, a->unitNum * sizeof(int));
delete indexA; delete indexA;
} }
......
...@@ -22,7 +22,7 @@ ...@@ -22,7 +22,7 @@
#ifndef __TOPK_H__ #ifndef __TOPK_H__
#define __TOPK_H__ #define __TOPK_H__
#include "../XTensor.h" #include "../../XTensor.h"
namespace nts { // namespace nts(NiuTrans.Tensor) namespace nts { // namespace nts(NiuTrans.Tensor)
......
...@@ -19,7 +19,7 @@ ...@@ -19,7 +19,7 @@
* $Created by: XIAO Tong (xiaotong@mail.neu.edu.cn) 2018-06-22 * $Created by: XIAO Tong (xiaotong@mail.neu.edu.cn) 2018-06-22
*/ */
#include "../XUtility.h" #include "../../XUtility.h"
#include "FlushToMem.h" #include "FlushToMem.h"
#include "FlushToMem.cuh" #include "FlushToMem.cuh"
......
...@@ -20,7 +20,7 @@ ...@@ -20,7 +20,7 @@
*/ */
#include "FlushToMem.cuh" #include "FlushToMem.cuh"
#include "../XUtility.h" #include "../../XUtility.h"
namespace nts { // namespace nts(NiuTrans.Tensor) namespace nts { // namespace nts(NiuTrans.Tensor)
...@@ -52,7 +52,6 @@ void CudaCPUToGPUFlush(XList * mList, int devID, XMem * GPUMem) ...@@ -52,7 +52,6 @@ void CudaCPUToGPUFlush(XList * mList, int devID, XMem * GPUMem)
else else
reqiredSize = m->unitSize * m->unitNum; reqiredSize = m->unitSize * m->unitNum;
//reqiredSize = (int)GPUMem->GetPitch(GPUMem->devID, (MTYPE)GPUMem->GetAddress() + size, reqiredSize);
size += reqiredSize; size += reqiredSize;
} }
...@@ -70,7 +69,6 @@ void CudaCPUToGPUFlush(XList * mList, int devID, XMem * GPUMem) ...@@ -70,7 +69,6 @@ void CudaCPUToGPUFlush(XList * mList, int devID, XMem * GPUMem)
else else
pSize = m->unitSize * m->unitNum; pSize = m->unitSize * m->unitNum;
//reqiredSize = (int)GPUMem->GetPitch(GPUMem->devID, (MTYPE)GPUMem->GetAddress() + p, pSize);
reqiredSize = pSize; reqiredSize = pSize;
memcpy(data + p, m->data, pSize); memcpy(data + p, m->data, pSize);
......
...@@ -22,7 +22,7 @@ ...@@ -22,7 +22,7 @@
#ifndef __FLUSHTOMEM_CUH__ #ifndef __FLUSHTOMEM_CUH__
#define __FLUSHTOMEM_CUH__ #define __FLUSHTOMEM_CUH__
#include "../XTensor.h" #include "../../XTensor.h"
namespace nts { // namespace nts(NiuTrans.Tensor) namespace nts { // namespace nts(NiuTrans.Tensor)
......
...@@ -22,7 +22,7 @@ ...@@ -22,7 +22,7 @@
#ifndef __FLUSHTOMEM_H__ #ifndef __FLUSHTOMEM_H__
#define __FLUSHTOMEM_H__ #define __FLUSHTOMEM_H__
#include "../XTensor.h" #include "../../XTensor.h"
namespace nts { // namespace nts(NiuTrans.Tensor) namespace nts { // namespace nts(NiuTrans.Tensor)
......
...@@ -20,7 +20,7 @@ ...@@ -20,7 +20,7 @@
*/ */
#include "SetAscendingOrder.cuh" #include "SetAscendingOrder.cuh"
#include "../XDevice.h" #include "../../XDevice.h"
namespace nts { // namespace nts(NiuTrans.Tensor) namespace nts { // namespace nts(NiuTrans.Tensor)
......
...@@ -22,7 +22,7 @@ ...@@ -22,7 +22,7 @@
#ifndef __SETASCENDINGORDER_CUH__ #ifndef __SETASCENDINGORDER_CUH__
#define __SETASCENDINGORDER_CUH__ #define __SETASCENDINGORDER_CUH__
#include "../XTensor.h" #include "../../XTensor.h"
namespace nts { // namespace nts(NiuTrans.Tensor) namespace nts { // namespace nts(NiuTrans.Tensor)
......
...@@ -22,13 +22,11 @@ ...@@ -22,13 +22,11 @@
#ifndef __XMATRIXSEGMENT_H__ #ifndef __XMATRIXSEGMENT_H__
#define __XMATRIXSEGMENT_H__ #define __XMATRIXSEGMENT_H__
#include "../XTensor.h" #include "../../XTensor.h"
namespace nts { // namespace nts(NiuTrans.Tensor) namespace nts { // namespace nts(NiuTrans.Tensor)
/******************************************************************* /* segmentation and parallel processing for 2d tensors (i.e., matrices) */
segmentation and parallel processing for 2d tensors (i.e., matrices)
*/
/* segment a 2d tensor (i.e., matrix) into blocks and run jobs in parallel */ /* segment a 2d tensor (i.e., matrix) into blocks and run jobs in parallel */
extern "C" extern "C"
void RunParallel2D(XPRunner * parallelRunner, void * job, int opNum, int rowNum, int colNum, int argNum, ...); void RunParallel2D(XPRunner * parallelRunner, void * job, int opNum, int rowNum, int colNum, int argNum, ...);
......
...@@ -25,7 +25,6 @@ ...@@ -25,7 +25,6 @@
namespace nts{ // namespace nts(NiuTrans.Tensor) namespace nts{ // namespace nts(NiuTrans.Tensor)
/* /*
hard tanh function hard tanh function
y = 1 if x > 1 y = 1 if x > 1
......
...@@ -95,7 +95,6 @@ dy/dx = 1 if -1 <= x <= 1 ...@@ -95,7 +95,6 @@ dy/dx = 1 if -1 <= x <= 1
>> y - y of the function >> y - y of the function
>> x - x of the function >> x - x of the function
>> size - size of y/x >> size - size of y/x
*/ */
__global__ __global__
void KernelHardtanhBackward(DTYPE * dedy, DTYPE * dedx, DTYPE * gold, DTYPE * y, DTYPE * x, int size) void KernelHardtanhBackward(DTYPE * dedy, DTYPE * dedx, DTYPE * gold, DTYPE * y, DTYPE * x, int size)
......
...@@ -21,7 +21,7 @@ ...@@ -21,7 +21,7 @@
#include "Identity.h" #include "Identity.h"
#include "../XUtility.h" #include "../XUtility.h"
#include "../core/CopyValues.h" #include "../core/movement/CopyValues.h"
namespace nts{ // namespace nts(NiuTrans.Tensor) namespace nts{ // namespace nts(NiuTrans.Tensor)
......
...@@ -23,9 +23,9 @@ ...@@ -23,9 +23,9 @@
#include "../XUtility.h" #include "../XUtility.h"
#include "LogSoftmax.h" #include "LogSoftmax.h"
#include "LogSoftmax.cuh" #include "LogSoftmax.cuh"
#include "../core/ReduceSum.h" #include "../core/reduce/ReduceSum.h"
#include "../core/ReduceMax.h" #include "../core/reduce/ReduceMax.h"
#include "../core/CopyValues.h" #include "../core/movement/CopyValues.h"
namespace nts { // namespace nts(NiuTrans.Tensor) namespace nts { // namespace nts(NiuTrans.Tensor)
...@@ -49,7 +49,6 @@ void LogSoftmax(XTensor * x, XTensor * y, int leadDim) ...@@ -49,7 +49,6 @@ void LogSoftmax(XTensor * x, XTensor * y, int leadDim)
dimSize[i - 1] = -x->dimSize[i]; dimSize[i - 1] = -x->dimSize[i];
} }
XMem * mem = x->mem; XMem * mem = x->mem;
XTensor * max = NULL; XTensor * max = NULL;
XTensor * sum = NULL; XTensor * sum = NULL;
...@@ -168,7 +167,6 @@ dE/dx = dE/dy * dy/dx ...@@ -168,7 +167,6 @@ dE/dx = dE/dy * dy/dx
log softmax: y_i = log(e^{x_i} / \sum_{k} e^{x_k}) log softmax: y_i = log(e^{x_i} / \sum_{k} e^{x_k})
dy_i/dx_j dy_i/dx_j
= d{log(e^{x_i} / \sum_{k} e^{x_k})}/dx_j = d{log(e^{x_i} / \sum_{k} e^{x_k})}/dx_j
= d{log(e^{x_i})}/dx_j - d{log(\sum_{k} e^{x_k})}/dx_j = d{log(e^{x_i})}/dx_j - d{log(\sum_{k} e^{x_k})}/dx_j
......
...@@ -22,8 +22,8 @@ ...@@ -22,8 +22,8 @@
#include "LogSoftmax.h" #include "LogSoftmax.h"
#include "LogSoftmax.cuh" #include "LogSoftmax.cuh"
#include "Loss.cuh" #include "Loss.cuh"
#include "../core/ReduceSum.cuh" #include "../core/reduce/ReduceSum.cuh"
#include "../core/ReduceMax.cuh" #include "../core/reduce/ReduceMax.cuh"
#include "../XDevice.h" #include "../XDevice.h"
namespace nts { // namespace nts(NiuTrans.Tensor) namespace nts { // namespace nts(NiuTrans.Tensor)
...@@ -41,7 +41,8 @@ void CudaLogSoftmax(XTensor * x, XTensor * y, int leadDim) ...@@ -41,7 +41,8 @@ void CudaLogSoftmax(XTensor * x, XTensor * y, int leadDim)
ShowNTErrors("You should call LogSoftmax instead!"); ShowNTErrors("You should call LogSoftmax instead!");
} }
/* log softmax forward computation (Cuda kernel) /*
log softmax forward computation (Cuda kernel)
for each column j, let y_{i,j} and x_{i,j} are the output for each column j, let y_{i,j} and x_{i,j} are the output
and state value for the i-th element of column j. We have and state value for the i-th element of column j. We have
...@@ -85,7 +86,8 @@ void KernelLogSoftmaxComputeByRow(DTYPE * x, DTYPE * max, DTYPE * sum, DTYPE * y ...@@ -85,7 +86,8 @@ void KernelLogSoftmaxComputeByRow(DTYPE * x, DTYPE * max, DTYPE * sum, DTYPE * y
} }
} }
/* log softmax forward computation (Cuda kernel) /*
log softmax forward computation (Cuda kernel)
for each row i, let y_{i,j} and x_{i,j} are the output for each row i, let y_{i,j} and x_{i,j} are the output
and state value for the j-th element of row i. We have and state value for the j-th element of row i. We have
...@@ -182,7 +184,7 @@ void CudaLogSoftmaxSumMax(XTensor * x, XTensor * y, int leadDim, XTensor * sum, ...@@ -182,7 +184,7 @@ void CudaLogSoftmaxSumMax(XTensor * x, XTensor * y, int leadDim, XTensor * sum,
/* /*
set dE/dx = exp(y) set dE/dx = exp(y)
>> dedu - dE/dy >> dedy - dE/dy
>> dedx - dE/dx >> dedx - dE/dx
>> y - output of the function >> y - output of the function
>> size - size of output >> size - size of output
...@@ -256,7 +258,9 @@ dE/dx_j += -gold_j ...@@ -256,7 +258,9 @@ dE/dx_j += -gold_j
>> gold - gold standard to measure error (or loss) >> gold - gold standard to measure error (or loss)
>> y - output of the function >> y - output of the function
>> x - input of the function >> x - input of the function
>> size - size of input/output >> rowNum - row number of the matrix
>> colNum - column number of the matrix
>> gNonZeroNum -
>> lossName - name of the loss function >> lossName - name of the loss function
*/ */
__global__ __global__
...@@ -293,7 +297,6 @@ dE/dx = dE/dy * dy/dx ...@@ -293,7 +297,6 @@ dE/dx = dE/dy * dy/dx
log softmax: y_i = log(e^{x_i} / \sum_{k} e^{x_k}) log softmax: y_i = log(e^{x_i} / \sum_{k} e^{x_k})
dy_i/dx_j dy_i/dx_j
= d{log(e^{x_i} / \sum_{k} e^{x_k})}/dx_j = d{log(e^{x_i} / \sum_{k} e^{x_k})}/dx_j
= d{log(e^{x_i})}/dx_j - d{log(\sum_{k} e^{x_k})}/dx_j = d{log(e^{x_i})}/dx_j - d{log(\sum_{k} e^{x_k})}/dx_j
......
...@@ -374,15 +374,15 @@ void LossBackward(XTensor * dedy, XTensor * t, XTensor * y, ...@@ -374,15 +374,15 @@ void LossBackward(XTensor * dedy, XTensor * t, XTensor * y,
LOSS_FUNCTION_NAME LFName, LOSS_FUNCTION_NAME LFName,
int leadDim, int tBeg, int tLen, int yBeg) int leadDim, int tBeg, int tLen, int yBeg)
{ {
CheckNTErrors((tLen >= 0 && tLen < y->unitNum), "Illegal input length!"); CheckNTErrors((tLen < y->unitNum), "Illegal input length!");
CheckNTErrors((XTensor::IsIdentical(t, y)&& XTensor::IsIdentical(dedy, y)), CheckNTErrors((XTensor::IsIdentical(t, y)&& XTensor::IsIdentical(dedy, y)),
"The input tensors must be of the same size!"); "The input tensors must be of the same size!");
CheckNTErrors((t->dimSizeRDI[0] == 1 && y->dimSizeRDI[0] == 1 && dedy->dimSizeRDI[1] == 1), "TODO!"); CheckNTErrors((t->dimSizeRDI[0] == 1 && y->dimSizeRDI[0] == 1 && dedy->dimSizeRDI[0] == 1), "TODO!");
CheckNTErrors((t->order > leadDim && leadDim >= 0), "Illegal leading dimension!"); CheckNTErrors((t->order > leadDim && leadDim >= 0), "Illegal leading dimension!");
CheckNTErrors((t->dataType == DEFAULT_DTYPE && y->dataType == DEFAULT_DTYPE), CheckNTErrors((t->dataType == DEFAULT_DTYPE && y->dataType == DEFAULT_DTYPE),
"TODO!"); "TODO!");
int leadDimRDI = y->order - leadDim - 1; int leadDimRDI = leadDim >= 0 ? y->order - leadDim - 1 : -1;
if(leadDimRDI < 0){ if(leadDimRDI < 0){
leadDimRDI = y->dimSizeRDI[y->order - 1]; leadDimRDI = y->dimSizeRDI[y->order - 1];
tBeg = 0; tBeg = 0;
......
...@@ -31,7 +31,6 @@ namespace nts{ // namespace nts(NiuTrans.Tensor) ...@@ -31,7 +31,6 @@ namespace nts{ // namespace nts(NiuTrans.Tensor)
loss function to measure the "number" of errors loss function to measure the "number" of errors
*/ */
/* /*
compute the loss compute the loss
>> gold - gold standard >> gold - gold standard
......
...@@ -88,7 +88,6 @@ dy/dx = 1 if x >= 0 ...@@ -88,7 +88,6 @@ dy/dx = 1 if x >= 0
>> y - output of the function >> y - output of the function
>> x - input of the function >> x - input of the function
>> size - size of output/input >> size - size of output/input
*/ */
__global__ __global__
void KernelRectifyBackward(DTYPE * dedy, DTYPE * dedx, DTYPE * gold, DTYPE * y, DTYPE * x, int size) void KernelRectifyBackward(DTYPE * dedy, DTYPE * dedx, DTYPE * gold, DTYPE * y, DTYPE * x, int size)
......
...@@ -25,7 +25,6 @@ ...@@ -25,7 +25,6 @@
namespace nts{ // namespace nts(NiuTrans.Tensor) namespace nts{ // namespace nts(NiuTrans.Tensor)
/* /*
sigmoid function y = 1/(1+exp(-x)) sigmoid function y = 1/(1+exp(-x))
>> x - input tensor >> x - input tensor
......
...@@ -95,7 +95,6 @@ sigmoid: y = 1/(1+exp(-x)) ...@@ -95,7 +95,6 @@ sigmoid: y = 1/(1+exp(-x))
>> y - output of the function >> y - output of the function
>> x - input of the function >> x - input of the function
>> size - size of output/input >> size - size of output/input
*/ */
__global__ __global__
void KernelSigmoidBackward(DTYPE * dedy, DTYPE * dedx, DTYPE * gold, DTYPE * y, DTYPE * x, int size) void KernelSigmoidBackward(DTYPE * dedy, DTYPE * dedx, DTYPE * gold, DTYPE * y, DTYPE * x, int size)
...@@ -122,7 +121,6 @@ sigmoid: y = 1/(1+exp(-x)) ...@@ -122,7 +121,6 @@ sigmoid: y = 1/(1+exp(-x))
>> dedy - dE/dy >> dedy - dE/dy
>> dedx - dE/dx >> dedx - dE/dx
>> lossName - type of loss function, e.g., cross entropy >> lossName - type of loss function, e.g., cross entropy
*/ */
void CudaSigmoidBackward(XTensor * gold, XTensor * y, XTensor * x, void CudaSigmoidBackward(XTensor * gold, XTensor * y, XTensor * x,
XTensor * dedy, XTensor * dedx, XTensor * dedy, XTensor * dedx,
......
...@@ -23,8 +23,8 @@ ...@@ -23,8 +23,8 @@
#include "Softmax.h" #include "Softmax.h"
#include "Softmax.cuh" #include "Softmax.cuh"
#include "../XUtility.h" #include "../XUtility.h"
#include "../core/ReduceSum.h" #include "../core/reduce/ReduceSum.h"
#include "../core/ReduceMax.h" #include "../core/reduce/ReduceMax.h"
namespace nts { // namespace nts(NiuTrans.Tensor) namespace nts { // namespace nts(NiuTrans.Tensor)
......
...@@ -22,10 +22,10 @@ ...@@ -22,10 +22,10 @@
#include "Softmax.h" #include "Softmax.h"
#include "Softmax.cuh" #include "Softmax.cuh"
#include "Loss.cuh" #include "Loss.cuh"
#include "../core/ReduceSum.h" #include "../core/reduce/ReduceSum.h"
#include "../core/Multiply.h" #include "../core/arithmetic/Multiply.h"
#include "../core/Unsqueeze.h" #include "../core/shape/Unsqueeze.h"
#include "../core/Sum.h" #include "../core/arithmetic/Sum.h"
#include "../XDevice.h" #include "../XDevice.h"
#include "../XUtility.h" #include "../XUtility.h"
......
...@@ -29,7 +29,6 @@ namespace nts { // namespace nts(NiuTrans.Tensor) ...@@ -29,7 +29,6 @@ namespace nts { // namespace nts(NiuTrans.Tensor)
#ifdef USE_CUDA #ifdef USE_CUDA
/* softmax y = e^x / \sum_{i} e^{x_i} (Cuda version) */ /* softmax y = e^x / \sum_{i} e^{x_i} (Cuda version) */
extern "C" extern "C"
void CudaSotmax(XTensor * input, XTensor * output, int leadDim); void CudaSotmax(XTensor * input, XTensor * output, int leadDim);
......
...@@ -19,23 +19,20 @@ ...@@ -19,23 +19,20 @@
* $Created by: Lin Ye (email: linye2015@outlook.com) 2018-06-14 * $Created by: Lin Ye (email: linye2015@outlook.com) 2018-06-14
*/ */
#include "TConcatenate.h"
#include "../XTensor.h"
#include "../XDevice.h"
#include "../core/Concatenate.h"
#include "../XList.h"
namespace nts { // namespace nts(NiuTrans.Tensor) namespace nts { // namespace nts(NiuTrans.Tensor)
/* case 1: concatenate a list of tensors along a given dimension
* In this case, 2 * (2 * 1) -> (2 * 2), dim=1. /*
case 1: concatenate a list of tensors along a given dimension.
In this case, 2 * (2, 1) -> (2, 2), dim=1.
*/ */
bool TestConcatenate1() bool TestConcatenate1()
{ {
/* create list */ /* create list */
XList sList; XList * sList = new XList();
sList = XList();
/* a source tensor of size (2 * 1) */ /* a source tensor of size (2, 1) */
int sOrder1 = 2; int sOrder1 = 2;
int * sDimSize1 = new int[sOrder1]; int * sDimSize1 = new int[sOrder1];
sDimSize1[0] = 2; sDimSize1[0] = 2;
...@@ -45,7 +42,7 @@ bool TestConcatenate1() ...@@ -45,7 +42,7 @@ bool TestConcatenate1()
for (int i = 0; i < sOrder1; i++) for (int i = 0; i < sOrder1; i++)
sUnitNum1 *= sDimSize1[i]; sUnitNum1 *= sDimSize1[i];
/* a source tensor of size (2 * 1) */ /* a source tensor of size (2, 1) */
int sOrder2 = 2; int sOrder2 = 2;
int * sDimSize2 = new int[sOrder2]; int * sDimSize2 = new int[sOrder2];
sDimSize2[0] = 2; sDimSize2[0] = 2;
...@@ -55,7 +52,7 @@ bool TestConcatenate1() ...@@ -55,7 +52,7 @@ bool TestConcatenate1()
for (int i = 0; i < sOrder2; i++) for (int i = 0; i < sOrder2; i++)
sUnitNum2 *= sDimSize2[i]; sUnitNum2 *= sDimSize2[i];
/* a target tensor of size (2 * 2) */ /* a target tensor of size (2, 2) */
int tOrder = 2; int tOrder = 2;
int * tDimSize = new int[tOrder]; int * tDimSize = new int[tOrder];
tDimSize[0] = 2; tDimSize[0] = 2;
...@@ -65,12 +62,12 @@ bool TestConcatenate1() ...@@ -65,12 +62,12 @@ bool TestConcatenate1()
for (int i = 0; i < tOrder; i++) for (int i = 0; i < tOrder; i++)
tUnitNum *= tDimSize[i]; tUnitNum *= tDimSize[i];
DTYPE sData1[2][1] = { {0.0}, DTYPE sData1[2][1] = { {0.0F},
{1.0} }; {1.0F} };
DTYPE sData2[2][1] = { {2.0}, DTYPE sData2[2][1] = { {2.0F},
{3.0} }; {3.0F} };
DTYPE answer[2][2] = { {0.0, 2.0}, DTYPE answer[2][2] = { {0.0F, 2.0F},
{1.0, 3.0} }; {1.0F, 3.0F} };
/* CPU test */ /* CPU test */
bool cpuTest = true; bool cpuTest = true;
...@@ -86,11 +83,11 @@ bool TestConcatenate1() ...@@ -86,11 +83,11 @@ bool TestConcatenate1()
t->SetZeroAll(); t->SetZeroAll();
/* add tensors to list */ /* add tensors to list */
sList.Add(s1); sList->Add(s1);
sList.Add(s2); sList->Add(s2);
/* call concatenate function */ /* call Concatenate function */
Concatenate(&sList, t, 1); Concatenate(sList, t, 1);
/* check results */ /* check results */
cpuTest = t->CheckData(answer, tUnitNum); cpuTest = t->CheckData(answer, tUnitNum);
...@@ -99,9 +96,6 @@ bool TestConcatenate1() ...@@ -99,9 +96,6 @@ bool TestConcatenate1()
/* GPU test */ /* GPU test */
bool gpuTest = true; bool gpuTest = true;
/* clear list */
sList.Clear();
/* create tensor */ /* create tensor */
XTensor * sGPU1 = NewTensor(sOrder1, sDimSize1, X_FLOAT, 1.0F, 0); XTensor * sGPU1 = NewTensor(sOrder1, sDimSize1, X_FLOAT, 1.0F, 0);
XTensor * sGPU2 = NewTensor(sOrder2, sDimSize2, X_FLOAT, 1.0F, 0); XTensor * sGPU2 = NewTensor(sOrder2, sDimSize2, X_FLOAT, 1.0F, 0);
...@@ -112,40 +106,56 @@ bool TestConcatenate1() ...@@ -112,40 +106,56 @@ bool TestConcatenate1()
sGPU2->SetData(sData2, sUnitNum2); sGPU2->SetData(sData2, sUnitNum2);
tGPU->SetZeroAll(); tGPU->SetZeroAll();
/* clear list */
sList->Clear();
/* add tensors to list*/ /* add tensors to list*/
sList.Add(sGPU1); sList->Add(sGPU1);
sList.Add(sGPU2); sList->Add(sGPU2);
/* call concatenate function */ /* call Concatenate function */
Concatenate(&sList, tGPU, 1); Concatenate(sList, tGPU, 1);
/* check results */ /* check results */
gpuTest = tGPU->CheckData(answer, tUnitNum); gpuTest = tGPU->CheckData(answer, tUnitNum);
/* destroy variables */ /* destroy variables */
delete s1, s2, t, sGPU1, sGPU2, tGPU; delete sList;
delete[] sDimSize1, sDimSize2, tDimSize; delete s1;
delete s2;
delete t;
delete sGPU1;
delete sGPU2;
delete tGPU;
delete[] sDimSize1;
delete[] sDimSize2;
delete[] tDimSize;
return cpuTest && gpuTest; return cpuTest && gpuTest;
#else #else
/* destroy variables */ /* destroy variables */
delete s1, s2, t; delete sList;
delete[] sDimSize1, sDimSize2, tDimSize; delete s1;
delete s2;
delete t;
delete[] sDimSize1;
delete[] sDimSize2;
delete[] tDimSize;
return cpuTest; return cpuTest;
#endif // USE_CUDA #endif // USE_CUDA
} }
/* case 2: concatenate a list of tensors along a given dimension /*
* In this case, 2 * (2 * 1) -> (4 * 1), dim=0. case 2: concatenate a list of tensors along a given dimension.
In this case, 2 * (2, 1) -> (4, 1), dim=0.
*/ */
bool TestConcatenate2() bool TestConcatenate2()
{ {
/* create list */ /* create list */
XList sList; XList * sList = new XList();
sList = XList();
/* a source tensor of size (2 * 1) */ /* a source tensor of size (2, 1) */
int sOrder1 = 2; int sOrder1 = 2;
int * sDimSize1 = new int[sOrder1]; int * sDimSize1 = new int[sOrder1];
sDimSize1[0] = 2; sDimSize1[0] = 2;
...@@ -155,7 +165,7 @@ bool TestConcatenate2() ...@@ -155,7 +165,7 @@ bool TestConcatenate2()
for (int i = 0; i < sOrder1; i++) for (int i = 0; i < sOrder1; i++)
sUnitNum1 *= sDimSize1[i]; sUnitNum1 *= sDimSize1[i];
/* a source tensor of size (2 * 1) */ /* a source tensor of size (2, 1) */
int sOrder2 = 2; int sOrder2 = 2;
int * sDimSize2 = new int[sOrder2]; int * sDimSize2 = new int[sOrder2];
sDimSize2[0] = 2; sDimSize2[0] = 2;
...@@ -165,7 +175,7 @@ bool TestConcatenate2() ...@@ -165,7 +175,7 @@ bool TestConcatenate2()
for (int i = 0; i < sOrder2; i++) for (int i = 0; i < sOrder2; i++)
sUnitNum2 *= sDimSize2[i]; sUnitNum2 *= sDimSize2[i];
/* a target tensor of size (4 * 1) */ /* a target tensor of size (4, 1) */
int tOrder = 2; int tOrder = 2;
int * tDimSize = new int[tOrder]; int * tDimSize = new int[tOrder];
tDimSize[0] = 4; tDimSize[0] = 4;
...@@ -175,14 +185,14 @@ bool TestConcatenate2() ...@@ -175,14 +185,14 @@ bool TestConcatenate2()
for (int i = 0; i < tOrder; i++) for (int i = 0; i < tOrder; i++)
tUnitNum *= tDimSize[i]; tUnitNum *= tDimSize[i];
DTYPE sData1[2][1] = { {0.0}, DTYPE sData1[2][1] = { {0.0F},
{1.0} }; {1.0F} };
DTYPE sData2[2][1] = { {2.0}, DTYPE sData2[2][1] = { {2.0F},
{3.0} }; {3.0F} };
DTYPE answer[4][1] = { {0.0}, DTYPE answer[4][1] = { {0.0F},
{1.0}, {1.0F},
{2.0}, {2.0F},
{3.0} }; {3.0F} };
/* CPU test */ /* CPU test */
bool cpuTest = true; bool cpuTest = true;
...@@ -198,11 +208,11 @@ bool TestConcatenate2() ...@@ -198,11 +208,11 @@ bool TestConcatenate2()
t->SetZeroAll(); t->SetZeroAll();
/* add tensors to list */ /* add tensors to list */
sList.Add(s1); sList->Add(s1);
sList.Add(s2); sList->Add(s2);
/* call concatenate function */ /* call Concatenate function */
Concatenate(&sList, t, 0); Concatenate(sList, t, 0);
/* check results */ /* check results */
cpuTest = t->CheckData(answer, tUnitNum); cpuTest = t->CheckData(answer, tUnitNum);
...@@ -211,9 +221,6 @@ bool TestConcatenate2() ...@@ -211,9 +221,6 @@ bool TestConcatenate2()
/* GPU test */ /* GPU test */
bool gpuTest = true; bool gpuTest = true;
/* clear list */
sList.Clear();
/* create tensor */ /* create tensor */
XTensor * sGPU1 = NewTensor(sOrder1, sDimSize1, X_FLOAT, 1.0F, 0); XTensor * sGPU1 = NewTensor(sOrder1, sDimSize1, X_FLOAT, 1.0F, 0);
XTensor * sGPU2 = NewTensor(sOrder2, sDimSize2, X_FLOAT, 1.0F, 0); XTensor * sGPU2 = NewTensor(sOrder2, sDimSize2, X_FLOAT, 1.0F, 0);
...@@ -224,39 +231,56 @@ bool TestConcatenate2() ...@@ -224,39 +231,56 @@ bool TestConcatenate2()
sGPU2->SetData(sData2, sUnitNum2); sGPU2->SetData(sData2, sUnitNum2);
tGPU->SetZeroAll(); tGPU->SetZeroAll();
/* clear list */
sList->Clear();
/* add tensors to list*/ /* add tensors to list*/
sList.Add(sGPU1); sList->Add(sGPU1);
sList.Add(sGPU2); sList->Add(sGPU2);
/* call concatenate function */ /* call Concatenate function */
Concatenate(&sList, tGPU, 0); Concatenate(sList, tGPU, 0);
/* check results */ /* check results */
gpuTest = tGPU->CheckData(answer, tUnitNum); gpuTest = tGPU->CheckData(answer, tUnitNum);
/* destroy variables */ /* destroy variables */
delete s1, s2, t, sGPU1, sGPU2, tGPU; delete sList;
delete[] sDimSize1, sDimSize2, tDimSize; delete s1;
delete s2;
delete t;
delete sGPU1;
delete sGPU2;
delete tGPU;
delete[] sDimSize1;
delete[] sDimSize2;
delete[] tDimSize;
return cpuTest && gpuTest; return cpuTest && gpuTest;
#else #else
/* destroy variables */ /* destroy variables */
delete s1, s2, t; delete sList;
delete[] sDimSize1, sDimSize2, tDimSize; delete s1;
delete s2;
delete t;
delete[] sDimSize1;
delete[] sDimSize2;
delete[] tDimSize;
return cpuTest; return cpuTest;
#endif // USE_CUDA #endif // USE_CUDA
} }
/* case 3: concatenate a list of tensors along a given dimension /*
* In this case, (2 * 1) + (2 * 2) -> (2 * 3), dim=1. case 3: concatenate a list of tensors along a given dimension.
In this case, (2, 1) + (2, 2) -> (2, 3), dim=1.
*/ */
bool TestConcatenate3() bool TestConcatenate3()
{ {
/* create list */ /* create list */
XList sList; XList * sList = new XList();
sList = XList();
/* a source tensor of size (2 * 1) */ /* a source tensor of size (2, 1) */
int sOrder1 = 2; int sOrder1 = 2;
int * sDimSize1 = new int[sOrder1]; int * sDimSize1 = new int[sOrder1];
sDimSize1[0] = 2; sDimSize1[0] = 2;
...@@ -266,7 +290,7 @@ bool TestConcatenate3() ...@@ -266,7 +290,7 @@ bool TestConcatenate3()
for (int i = 0; i < sOrder1; i++) for (int i = 0; i < sOrder1; i++)
sUnitNum1 *= sDimSize1[i]; sUnitNum1 *= sDimSize1[i];
/* a source tensor of size (2 * 2) */ /* a source tensor of size (2, 2) */
int sOrder2 = 2; int sOrder2 = 2;
int * sDimSize2 = new int[sOrder2]; int * sDimSize2 = new int[sOrder2];
sDimSize2[0] = 2; sDimSize2[0] = 2;
...@@ -276,7 +300,7 @@ bool TestConcatenate3() ...@@ -276,7 +300,7 @@ bool TestConcatenate3()
for (int i = 0; i < sOrder2; i++) for (int i = 0; i < sOrder2; i++)
sUnitNum2 *= sDimSize2[i]; sUnitNum2 *= sDimSize2[i];
/* a target tensor of size (2 * 3) */ /* a target tensor of size (2, 3) */
int tOrder = 2; int tOrder = 2;
int * tDimSize = new int[tOrder]; int * tDimSize = new int[tOrder];
tDimSize[0] = 2; tDimSize[0] = 2;
...@@ -286,12 +310,12 @@ bool TestConcatenate3() ...@@ -286,12 +310,12 @@ bool TestConcatenate3()
for (int i = 0; i < tOrder; i++) for (int i = 0; i < tOrder; i++)
tUnitNum *= tDimSize[i]; tUnitNum *= tDimSize[i];
DTYPE sData1[2][1] = { {0.0}, DTYPE sData1[2][1] = { {0.0F},
{1.0} }; {1.0F} };
DTYPE sData2[2][2] = { {2.0, 3.0}, DTYPE sData2[2][2] = { {2.0F, 3.0F},
{4.0, 5.0} }; {4.0F, 5.0F} };
DTYPE answer[2][3] = { {0.0, 2.0, 3.0}, DTYPE answer[2][3] = { {0.0F, 2.0F, 3.0F},
{1.0, 4.0, 5.0} }; {1.0F, 4.0F, 5.0F} };
/* CPU test */ /* CPU test */
bool cpuTest = true; bool cpuTest = true;
...@@ -307,11 +331,11 @@ bool TestConcatenate3() ...@@ -307,11 +331,11 @@ bool TestConcatenate3()
t->SetZeroAll(); t->SetZeroAll();
/* add tensors to list */ /* add tensors to list */
sList.Add(s1); sList->Add(s1);
sList.Add(s2); sList->Add(s2);
/* call concatenate function */ /* call Concatenate function */
Concatenate(&sList, t, 1); Concatenate(sList, t, 1);
/* check results */ /* check results */
cpuTest = t->CheckData(answer, tUnitNum); cpuTest = t->CheckData(answer, tUnitNum);
...@@ -320,9 +344,6 @@ bool TestConcatenate3() ...@@ -320,9 +344,6 @@ bool TestConcatenate3()
/* GPU test */ /* GPU test */
bool gpuTest = true; bool gpuTest = true;
/* clear list */
sList.Clear();
/* create tensor */ /* create tensor */
XTensor * sGPU1 = NewTensor(sOrder1, sDimSize1, X_FLOAT, 1.0F, 0); XTensor * sGPU1 = NewTensor(sOrder1, sDimSize1, X_FLOAT, 1.0F, 0);
XTensor * sGPU2 = NewTensor(sOrder2, sDimSize2, X_FLOAT, 1.0F, 0); XTensor * sGPU2 = NewTensor(sOrder2, sDimSize2, X_FLOAT, 1.0F, 0);
...@@ -333,36 +354,53 @@ bool TestConcatenate3() ...@@ -333,36 +354,53 @@ bool TestConcatenate3()
sGPU2->SetData(sData2, sUnitNum2); sGPU2->SetData(sData2, sUnitNum2);
tGPU->SetZeroAll(); tGPU->SetZeroAll();
/* clear list */
sList->Clear();
/* add tensors to list*/ /* add tensors to list*/
sList.Add(sGPU1); sList->Add(sGPU1);
sList.Add(sGPU2); sList->Add(sGPU2);
/* call concatenate function */ /* call Concatenate function */
Concatenate(&sList, tGPU, 1); Concatenate(sList, tGPU, 1);
/* check results */ /* check results */
gpuTest = tGPU->CheckData(answer, tUnitNum); gpuTest = tGPU->CheckData(answer, tUnitNum);
/* destroy variables */ /* destroy variables */
delete s1, s2, t, sGPU1, sGPU2, tGPU; delete sList;
delete[] sDimSize1, sDimSize2, tDimSize; delete s1;
delete s2;
delete t;
delete sGPU1;
delete sGPU2;
delete tGPU;
delete[] sDimSize1;
delete[] sDimSize2;
delete[] tDimSize;
return cpuTest && gpuTest; return cpuTest && gpuTest;
#else #else
/* destroy variables */ /* destroy variables */
delete s1, s2, t; delete sList;
delete[] sDimSize1, sDimSize2, tDimSize; delete s1;
delete s2;
delete t;
delete[] sDimSize1;
delete[] sDimSize2;
delete[] tDimSize;
return cpuTest; return cpuTest;
#endif // USE_CUDA #endif // USE_CUDA
} }
/* case 4: concatenate two tensors along a given dimension /*
* In this case, (2 * 1) + (2 * 2) -> (2 * 3), dim=1. case 4: concatenate two tensors along a given dimension.
In this case, (2, 1), (2, 2) -> (2, 3), dim=1.
*/ */
bool TestConcatenate4() bool TestConcatenate4()
{ {
/* a source tensor of size (2 * 1) */ /* a source tensor of size (2, 1) */
int sOrder1 = 2; int sOrder1 = 2;
int * sDimSize1 = new int[sOrder1]; int * sDimSize1 = new int[sOrder1];
sDimSize1[0] = 2; sDimSize1[0] = 2;
...@@ -372,7 +410,7 @@ bool TestConcatenate4() ...@@ -372,7 +410,7 @@ bool TestConcatenate4()
for (int i = 0; i < sOrder1; i++) for (int i = 0; i < sOrder1; i++)
sUnitNum1 *= sDimSize1[i]; sUnitNum1 *= sDimSize1[i];
/* a source tensor of size (2 * 2) */ /* a source tensor of size (2, 2) */
int sOrder2 = 2; int sOrder2 = 2;
int * sDimSize2 = new int[sOrder2]; int * sDimSize2 = new int[sOrder2];
sDimSize2[0] = 2; sDimSize2[0] = 2;
...@@ -382,7 +420,7 @@ bool TestConcatenate4() ...@@ -382,7 +420,7 @@ bool TestConcatenate4()
for (int i = 0; i < sOrder2; i++) for (int i = 0; i < sOrder2; i++)
sUnitNum2 *= sDimSize2[i]; sUnitNum2 *= sDimSize2[i];
/* a target tensor of size (2 * 3) */ /* a target tensor of size (2, 3) */
int tOrder = 2; int tOrder = 2;
int * tDimSize = new int[tOrder]; int * tDimSize = new int[tOrder];
tDimSize[0] = 2; tDimSize[0] = 2;
...@@ -392,12 +430,12 @@ bool TestConcatenate4() ...@@ -392,12 +430,12 @@ bool TestConcatenate4()
for (int i = 0; i < tOrder; i++) for (int i = 0; i < tOrder; i++)
tUnitNum *= tDimSize[i]; tUnitNum *= tDimSize[i];
DTYPE sData1[2][1] = { {0.0}, DTYPE sData1[2][1] = { {0.0F},
{1.0} }; {1.0F} };
DTYPE sData2[2][2] = { {2.0, 3.0}, DTYPE sData2[2][2] = { {2.0F, 3.0F},
{4.0, 5.0} }; {4.0F, 5.0F} };
DTYPE answer[2][3] = { {0.0, 2.0, 3.0}, DTYPE answer[2][3] = { {0.0F, 2.0F, 3.0F},
{1.0, 4.0, 5.0} }; {1.0F, 4.0F, 5.0F} };
/* CPU test */ /* CPU test */
bool cpuTest = true; bool cpuTest = true;
...@@ -412,7 +450,7 @@ bool TestConcatenate4() ...@@ -412,7 +450,7 @@ bool TestConcatenate4()
s2->SetData(sData2, sUnitNum2); s2->SetData(sData2, sUnitNum2);
t->SetZeroAll(); t->SetZeroAll();
/* call concatenate function */ /* call Concatenate function */
Concatenate(s1, s2, t, 1); Concatenate(s1, s2, t, 1);
/* check results */ /* check results */
...@@ -432,21 +470,32 @@ bool TestConcatenate4() ...@@ -432,21 +470,32 @@ bool TestConcatenate4()
sGPU2->SetData(sData2, sUnitNum2); sGPU2->SetData(sData2, sUnitNum2);
tGPU->SetZeroAll(); tGPU->SetZeroAll();
/* call concatenate function */ /* call Concatenate function */
Concatenate(sGPU1, sGPU2, tGPU, 1); Concatenate(sGPU1, sGPU2, tGPU, 1);
/* check results */ /* check results */
gpuTest = tGPU->CheckData(answer, tUnitNum); gpuTest = tGPU->CheckData(answer, tUnitNum);
/* destroy variables */ /* destroy variables */
delete s1, s2, t, sGPU1, sGPU2, tGPU; delete s1;
delete[] sDimSize1, sDimSize2, tDimSize; delete s2;
delete t;
delete sGPU1;
delete sGPU2;
delete tGPU;
delete[] sDimSize1;
delete[] sDimSize2;
delete[] tDimSize;
return cpuTest && gpuTest; return cpuTest && gpuTest;
#else #else
/* destroy variables */ /* destroy variables */
delete s1, s2, t; delete s1;
delete[] sDimSize1, sDimSize2, tDimSize; delete s2;
delete t;
delete[] sDimSize1;
delete[] sDimSize2;
delete[] tDimSize;
return cpuTest; return cpuTest;
#endif // USE_CUDA #endif // USE_CUDA
...@@ -458,10 +507,9 @@ TODO!! ...@@ -458,10 +507,9 @@ TODO!!
*/ */
/* test for Concatenate Function */ /* test for Concatenate Function */
extern "C"
bool TestConcatenate() bool TestConcatenate()
{ {
XPRINT(0, stdout, "[TEST CONCATENATE] -------------\n"); XPRINT(0, stdout, "[TEST CONCATENATE] concatenate a list of tensors or two tensors along a given dimension \n");
bool returnFlag = true, caseFlag = true; bool returnFlag = true, caseFlag = true;
/* case 1 test */ /* case 1 test */
......
...@@ -22,7 +22,7 @@ ...@@ -22,7 +22,7 @@
#ifndef __TEST_CONCATENATE_H__ #ifndef __TEST_CONCATENATE_H__
#define __TEST_CONCATENATE_H__ #define __TEST_CONCATENATE_H__
#include "../core/Concatenate.h" #include "../core/shape/Concatenate.h"
namespace nts { // namespace nts(NiuTrans.Tensor) namespace nts { // namespace nts(NiuTrans.Tensor)
......
...@@ -19,23 +19,21 @@ ...@@ -19,23 +19,21 @@
* $Created by: Lin Ye (email: linye2015@outlook.com) 2018-06-14 * $Created by: Lin Ye (email: linye2015@outlook.com) 2018-06-14
*/ */
#include "../XTensor.h"
#include "../XDevice.h"
#include "../core/ConcatenateSolely.h"
#include "../XList.h" #include "../XList.h"
#include "TConcatenateSolely.h"
namespace nts { // namespace nt(NiuTrans.Tensor) namespace nts { // namespace nt(NiuTrans.Tensor)
/* case 1: concatenate a list of tensors along a given dimension
* In this case, 2 * (2 * 1) -> (2 * 2), dim=1. /*
case 1: concatenate a list of tensors along a given dimension
In this case, 2 * (2, 1) -> (2, 2), dim=1.
*/ */
bool TestConcatenateSolely1() bool TestConcatenateSolely1()
{ {
/* create list */ /* create list */
XList sList; XList * sList = new XList();
sList = XList();
/* a source tensor of size 2 * 1 */ /* a source tensor of size (2, 1) */
int sOrder1 = 2; int sOrder1 = 2;
int * sDimSize1 = new int[sOrder1]; int * sDimSize1 = new int[sOrder1];
sDimSize1[0] = 2; sDimSize1[0] = 2;
...@@ -45,7 +43,7 @@ bool TestConcatenateSolely1() ...@@ -45,7 +43,7 @@ bool TestConcatenateSolely1()
for (int i = 0; i < sOrder1; i++) for (int i = 0; i < sOrder1; i++)
sUnitNum1 *= sDimSize1[i]; sUnitNum1 *= sDimSize1[i];
/* a source tensor of size 2 * 1 */ /* a source tensor of size (2, 1) */
int sOrder2 = 2; int sOrder2 = 2;
int * sDimSize2 = new int[sOrder2]; int * sDimSize2 = new int[sOrder2];
sDimSize2[0] = 2; sDimSize2[0] = 2;
...@@ -55,7 +53,7 @@ bool TestConcatenateSolely1() ...@@ -55,7 +53,7 @@ bool TestConcatenateSolely1()
for (int i = 0; i < sOrder2; i++) for (int i = 0; i < sOrder2; i++)
sUnitNum2 *= sDimSize2[i]; sUnitNum2 *= sDimSize2[i];
/* a target tensor of size 2 * 2 */ /* a target tensor of size (2, 2) */
int tOrder = 2; int tOrder = 2;
int * tDimSize = new int[tOrder]; int * tDimSize = new int[tOrder];
tDimSize[0] = 2; tDimSize[0] = 2;
...@@ -65,12 +63,12 @@ bool TestConcatenateSolely1() ...@@ -65,12 +63,12 @@ bool TestConcatenateSolely1()
for (int i = 0; i < tOrder; i++) for (int i = 0; i < tOrder; i++)
tUnitNum *= tDimSize[i]; tUnitNum *= tDimSize[i];
DTYPE sData1[2][1] = { {0.0}, DTYPE sData1[2][1] = { {0.0F},
{1.0} }; {1.0F} };
DTYPE sData2[2][1] = { {2.0}, DTYPE sData2[2][1] = { {2.0F},
{3.0} }; {3.0F} };
DTYPE answer[2][2] = { {0.0, 2.0}, DTYPE answer[2][2] = { {0.0F, 2.0F},
{1.0, 3.0} }; {1.0F, 3.0F} };
/* CPU test */ /* CPU test */
bool cpuTest = true; bool cpuTest = true;
...@@ -86,11 +84,11 @@ bool TestConcatenateSolely1() ...@@ -86,11 +84,11 @@ bool TestConcatenateSolely1()
t->SetZeroAll(); t->SetZeroAll();
/* add tensors to list */ /* add tensors to list */
sList.Add(s1); sList->Add(s1);
sList.Add(s2); sList->Add(s2);
/* call concatenatesolely function */ /* call ConcatenateSolely function */
ConcatenateSolely(&sList, t, 1); ConcatenateSolely(sList, t, 1);
/* check results */ /* check results */
cpuTest = t->CheckData(answer, tUnitNum); cpuTest = t->CheckData(answer, tUnitNum);
...@@ -99,9 +97,6 @@ bool TestConcatenateSolely1() ...@@ -99,9 +97,6 @@ bool TestConcatenateSolely1()
/* GPU test */ /* GPU test */
bool gpuTest = true; bool gpuTest = true;
/* clear list */
sList.Clear();
/* create tensor */ /* create tensor */
XTensor * sGPU1 = NewTensor(sOrder1, sDimSize1, X_FLOAT, 1.0F, 0); XTensor * sGPU1 = NewTensor(sOrder1, sDimSize1, X_FLOAT, 1.0F, 0);
XTensor * sGPU2 = NewTensor(sOrder2, sDimSize2, X_FLOAT, 1.0F, 0); XTensor * sGPU2 = NewTensor(sOrder2, sDimSize2, X_FLOAT, 1.0F, 0);
...@@ -112,23 +107,35 @@ bool TestConcatenateSolely1() ...@@ -112,23 +107,35 @@ bool TestConcatenateSolely1()
sGPU2->SetData(sData2, sUnitNum2); sGPU2->SetData(sData2, sUnitNum2);
tGPU->SetZeroAll(); tGPU->SetZeroAll();
/* clear list */
sList->Clear();
/* add tensors to list*/ /* add tensors to list*/
sList.Add(sGPU1); sList->Add(sGPU1);
sList.Add(sGPU2); sList->Add(sGPU2);
/* call concatenatesolely function */ /* call ConcatenateSolely function */
ConcatenateSolely(&sList, tGPU, 1); ConcatenateSolely(sList, tGPU, 1);
/* check results */ /* check results */
gpuTest = tGPU->CheckData(answer, tUnitNum); gpuTest = tGPU->CheckData(answer, tUnitNum);
/* destroy variables */ /* destroy variables */
delete s1, s2, t, sGPU1, sGPU2, tGPU; delete sList;
delete[] sDimSize1, sDimSize2, tDimSize; delete s1;
delete s2;
delete t;
delete sGPU1;
delete sGPU2;
delete tGPU;
delete[] sDimSize1;
delete[] sDimSize2;
delete[] tDimSize;
return cpuTest && gpuTest; return cpuTest && gpuTest;
#else #else
/* destroy variables */ /* destroy variables */
delete sList;
delete s1; delete s1;
delete s2; delete s2;
delete t; delete t;
...@@ -140,15 +147,16 @@ bool TestConcatenateSolely1() ...@@ -140,15 +147,16 @@ bool TestConcatenateSolely1()
#endif // USE_CUDA #endif // USE_CUDA
} }
/* case 2: concatenate a list of tensors along a given dimension /*
* In this case, 2 * (2 * 1) -> (4 * 1), dim=0. case 2: concatenate a list of tensors along a given dimension
In this case, 2 * (2, 1) -> (4, 1), dim=0.
*/ */
bool TestConcatenateSolely2() bool TestConcatenateSolely2()
{ {
/* create list */ /* create list */
XList sList; XList * sList = new XList();
sList = XList();
/* a source tensor of size 2 * 1 */ /* a source tensor of size (2, 1) */
int sOrder1 = 2; int sOrder1 = 2;
int * sDimSize1 = new int[sOrder1]; int * sDimSize1 = new int[sOrder1];
sDimSize1[0] = 2; sDimSize1[0] = 2;
...@@ -158,7 +166,7 @@ bool TestConcatenateSolely2() ...@@ -158,7 +166,7 @@ bool TestConcatenateSolely2()
for (int i = 0; i < sOrder1; i++) for (int i = 0; i < sOrder1; i++)
sUnitNum1 *= sDimSize1[i]; sUnitNum1 *= sDimSize1[i];
/* a source tensor of size 2 * 1 */ /* a source tensor of size (2, 1) */
int sOrder2 = 2; int sOrder2 = 2;
int * sDimSize2 = new int[sOrder2]; int * sDimSize2 = new int[sOrder2];
sDimSize2[0] = 2; sDimSize2[0] = 2;
...@@ -168,7 +176,7 @@ bool TestConcatenateSolely2() ...@@ -168,7 +176,7 @@ bool TestConcatenateSolely2()
for (int i = 0; i < sOrder2; i++) for (int i = 0; i < sOrder2; i++)
sUnitNum2 *= sDimSize2[i]; sUnitNum2 *= sDimSize2[i];
/* a target tensor of size 4 * 1 */ /* a target tensor of size (4, 1) */
int tOrder = 2; int tOrder = 2;
int * tDimSize = new int[tOrder]; int * tDimSize = new int[tOrder];
tDimSize[0] = 4; tDimSize[0] = 4;
...@@ -178,14 +186,14 @@ bool TestConcatenateSolely2() ...@@ -178,14 +186,14 @@ bool TestConcatenateSolely2()
for (int i = 0; i < tOrder; i++) for (int i = 0; i < tOrder; i++)
tUnitNum *= tDimSize[i]; tUnitNum *= tDimSize[i];
DTYPE sData1[2][1] = { {0.0}, DTYPE sData1[2][1] = { {0.0F},
{1.0} }; {1.0F} };
DTYPE sData2[2][1] = { {2.0}, DTYPE sData2[2][1] = { {2.0F},
{3.0} }; {3.0F} };
DTYPE answer[4][1] = { {0.0}, DTYPE answer[4][1] = { {0.0F},
{1.0}, {1.0F},
{2.0}, {2.0F},
{3.0} }; {3.0F} };
/* CPU test */ /* CPU test */
bool cpuTest = true; bool cpuTest = true;
...@@ -201,11 +209,11 @@ bool TestConcatenateSolely2() ...@@ -201,11 +209,11 @@ bool TestConcatenateSolely2()
t->SetZeroAll(); t->SetZeroAll();
/* add tensors to list */ /* add tensors to list */
sList.Add(s1); sList->Add(s1);
sList.Add(s2); sList->Add(s2);
/* call concatenatesolely function */ /* call ConcatenateSolely function */
ConcatenateSolely(&sList, t, 0); ConcatenateSolely(sList, t, 0);
/* check results */ /* check results */
cpuTest = t->CheckData(answer, tUnitNum); cpuTest = t->CheckData(answer, tUnitNum);
...@@ -214,9 +222,6 @@ bool TestConcatenateSolely2() ...@@ -214,9 +222,6 @@ bool TestConcatenateSolely2()
/* GPU test */ /* GPU test */
bool gpuTest = true; bool gpuTest = true;
/* clear list */
sList.Clear();
/* create tensor */ /* create tensor */
XTensor * sGPU1 = NewTensor(sOrder1, sDimSize1, X_FLOAT, 1.0F, 0); XTensor * sGPU1 = NewTensor(sOrder1, sDimSize1, X_FLOAT, 1.0F, 0);
XTensor * sGPU2 = NewTensor(sOrder2, sDimSize2, X_FLOAT, 1.0F, 0); XTensor * sGPU2 = NewTensor(sOrder2, sDimSize2, X_FLOAT, 1.0F, 0);
...@@ -227,39 +232,56 @@ bool TestConcatenateSolely2() ...@@ -227,39 +232,56 @@ bool TestConcatenateSolely2()
sGPU2->SetData(sData2, sUnitNum2); sGPU2->SetData(sData2, sUnitNum2);
tGPU->SetZeroAll(); tGPU->SetZeroAll();
/* clear list */
sList->Clear();
/* add tensors to list*/ /* add tensors to list*/
sList.Add(sGPU1); sList->Add(sGPU1);
sList.Add(sGPU2); sList->Add(sGPU2);
/* call concatenatesolely function */ /* call concatenatesolely function */
ConcatenateSolely(&sList, tGPU, 0); ConcatenateSolely(sList, tGPU, 0);
/* check results */ /* check results */
gpuTest = tGPU->CheckData(answer, tUnitNum); gpuTest = tGPU->CheckData(answer, tUnitNum);
/* destroy variables */ /* destroy variables */
delete s1, s2, t, sGPU1, sGPU2, tGPU; delete sList;
delete[] sDimSize1, sDimSize2, tDimSize; delete s1;
delete s2;
delete t;
delete sGPU1;
delete sGPU2;
delete tGPU;
delete[] sDimSize1;
delete[] sDimSize2;
delete[] tDimSize;
return cpuTest && gpuTest; return cpuTest && gpuTest;
#else #else
/* destroy variables */ /* destroy variables */
delete s1, s2, t; delete sList;
delete[] sDimSize1, sDimSize2, tDimSize; delete s1;
delete s2;
delete t;
delete[] sDimSize1;
delete[] sDimSize2;
delete[] tDimSize;
return cpuTest; return cpuTest;
#endif // USE_CUDA #endif // USE_CUDA
} }
/* case 3: concatenate a list of tensors along a given dimension /*
* In this case, (2 * 1) + (2 * 2) -> (2 * 3), dim=1. case 3: concatenate a list of tensors along a given dimension
In this case, (2, 1) + (2, 2) -> (2, 3), dim=1.
*/ */
bool TestConcatenateSolely3() bool TestConcatenateSolely3()
{ {
/* create list */ /* create list */
XList sList; XList * sList = new XList();
sList = XList();
/* a source tensor of size (2 * 1) */ /* a source tensor of size (2, 1) */
int sOrder1 = 2; int sOrder1 = 2;
int * sDimSize1 = new int[sOrder1]; int * sDimSize1 = new int[sOrder1];
sDimSize1[0] = 2; sDimSize1[0] = 2;
...@@ -269,7 +291,7 @@ bool TestConcatenateSolely3() ...@@ -269,7 +291,7 @@ bool TestConcatenateSolely3()
for (int i = 0; i < sOrder1; i++) for (int i = 0; i < sOrder1; i++)
sUnitNum1 *= sDimSize1[i]; sUnitNum1 *= sDimSize1[i];
/* a source tensor of size (2 * 2) */ /* a source tensor of size (2, 2) */
int sOrder2 = 2; int sOrder2 = 2;
int * sDimSize2 = new int[sOrder2]; int * sDimSize2 = new int[sOrder2];
sDimSize2[0] = 2; sDimSize2[0] = 2;
...@@ -279,7 +301,7 @@ bool TestConcatenateSolely3() ...@@ -279,7 +301,7 @@ bool TestConcatenateSolely3()
for (int i = 0; i < sOrder2; i++) for (int i = 0; i < sOrder2; i++)
sUnitNum2 *= sDimSize2[i]; sUnitNum2 *= sDimSize2[i];
/* a target tensor of size (2 * 3) */ /* a target tensor of size (2, 3) */
int tOrder = 2; int tOrder = 2;
int * tDimSize = new int[tOrder]; int * tDimSize = new int[tOrder];
tDimSize[0] = 2; tDimSize[0] = 2;
...@@ -289,12 +311,12 @@ bool TestConcatenateSolely3() ...@@ -289,12 +311,12 @@ bool TestConcatenateSolely3()
for (int i = 0; i < tOrder; i++) for (int i = 0; i < tOrder; i++)
tUnitNum *= tDimSize[i]; tUnitNum *= tDimSize[i];
DTYPE sData1[2][1] = { {0.0}, DTYPE sData1[2][1] = { {0.0F},
{1.0} }; {1.0F} };
DTYPE sData2[2][2] = { {2.0, 3.0}, DTYPE sData2[2][2] = { {2.0F, 3.0F},
{4.0, 5.0} }; {4.0F, 5.0F} };
DTYPE answer[2][3] = { {0.0, 2.0, 3.0}, DTYPE answer[2][3] = { {0.0F, 2.0F, 3.0F},
{1.0, 4.0, 5.0} }; {1.0F, 4.0F, 5.0F} };
/* CPU test */ /* CPU test */
bool cpuTest = true; bool cpuTest = true;
...@@ -310,11 +332,11 @@ bool TestConcatenateSolely3() ...@@ -310,11 +332,11 @@ bool TestConcatenateSolely3()
t->SetZeroAll(); t->SetZeroAll();
/* add tensors to list */ /* add tensors to list */
sList.Add(s1); sList->Add(s1);
sList.Add(s2); sList->Add(s2);
/* call concatenatesolely function */ /* call ConcatenateSolely function */
ConcatenateSolely(&sList, t, 1); ConcatenateSolely(sList, t, 1);
/* check results */ /* check results */
cpuTest = t->CheckData(answer, tUnitNum); cpuTest = t->CheckData(answer, tUnitNum);
...@@ -323,9 +345,6 @@ bool TestConcatenateSolely3() ...@@ -323,9 +345,6 @@ bool TestConcatenateSolely3()
/* GPU test */ /* GPU test */
bool gpuTest = true; bool gpuTest = true;
/* clear list */
sList.Clear();
/* create tensor */ /* create tensor */
XTensor * sGPU1 = NewTensor(sOrder1, sDimSize1, X_FLOAT, 1.0F, 0); XTensor * sGPU1 = NewTensor(sOrder1, sDimSize1, X_FLOAT, 1.0F, 0);
XTensor * sGPU2 = NewTensor(sOrder2, sDimSize2, X_FLOAT, 1.0F, 0); XTensor * sGPU2 = NewTensor(sOrder2, sDimSize2, X_FLOAT, 1.0F, 0);
...@@ -336,25 +355,41 @@ bool TestConcatenateSolely3() ...@@ -336,25 +355,41 @@ bool TestConcatenateSolely3()
sGPU2->SetData(sData2, sUnitNum2); sGPU2->SetData(sData2, sUnitNum2);
tGPU->SetZeroAll(); tGPU->SetZeroAll();
/* clear list */
sList->Clear();
/* add tensors to list*/ /* add tensors to list*/
sList.Add(sGPU1); sList->Add(sGPU1);
sList.Add(sGPU2); sList->Add(sGPU2);
/* call concatenatesolely function */ /* call ConcatenateSolely function */
ConcatenateSolely(&sList, tGPU, 1); ConcatenateSolely(sList, tGPU, 1);
/* check results */ /* check results */
gpuTest = tGPU->CheckData(answer, tUnitNum); gpuTest = tGPU->CheckData(answer, tUnitNum);
/* destroy variables */ /* destroy variables */
delete s1, s2, t, sGPU1, sGPU2, tGPU; delete sList;
delete[] sDimSize1, sDimSize2, tDimSize; delete s1;
delete s2;
delete t;
delete sGPU1;
delete sGPU2;
delete tGPU;
delete[] sDimSize1;
delete[] sDimSize2;
delete[] tDimSize;
return cpuTest && gpuTest; return cpuTest && gpuTest;
#else #else
/* destroy variables */ /* destroy variables */
delete s1, s2, t; delete sList;
delete[] sDimSize1, sDimSize2, tDimSize; delete s1;
delete s2;
delete t;
delete[] sDimSize1;
delete[] sDimSize2;
delete[] tDimSize;
return cpuTest; return cpuTest;
#endif // USE_CUDA #endif // USE_CUDA
...@@ -366,10 +401,9 @@ TODO!! ...@@ -366,10 +401,9 @@ TODO!!
*/ */
/* test for ConcatenateSolely Function */ /* test for ConcatenateSolely Function */
extern "C"
bool TestConcatenateSolely() bool TestConcatenateSolely()
{ {
XPRINT(0, stdout, "[TEST CONCATENATESOLELY] -------------\n"); XPRINT(0, stdout, "[TEST CONCATENATESOLELY] concatenate a list of tensors along a given dimension \n");
bool returnFlag = true, caseFlag = true; bool returnFlag = true, caseFlag = true;
/* case 1 test */ /* case 1 test */
......
...@@ -22,7 +22,7 @@ ...@@ -22,7 +22,7 @@
#ifndef __TEST_CONCATENATESOLELY_H__ #ifndef __TEST_CONCATENATESOLELY_H__
#define __TEST_CONCATENATESOLELY_H__ #define __TEST_CONCATENATESOLELY_H__
#include "../core/ConcatenateSolely.h" #include "../core/shape/ConcatenateSolely.h"
namespace nts { // namespace nts(NiuTrans.Tensor) namespace nts { // namespace nts(NiuTrans.Tensor)
......
/* NiuTrans.Tensor - an open-source tensor library
* Copyright (C) 2017, Natural Language Processing Lab, Northestern University.
* All rights reserved.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
/*
* $Created by: Xu Chen (email: hello_master1954@163.com) 2018-06-27
*/
#include "TCopyIndexed.h"
namespace nts { // namespace nts(NiuTrans.Tensor)
/*
case 1 copy indexed sub-tensors
In this case, (3, 2, 3) -> (3, 2, 2), dim = 2, indexSize = 2,
srcIndex = [0, 2], tgtIndex = [0, 1], copyNum = 1.
*/
bool TestCopyIndexed1()
{
/* a input tensor of size (3, 2, 3) */
int sOrder = 3;
int * sDimSize = new int[sOrder];
sDimSize[0] = 3;
sDimSize[1] = 2;
sDimSize[2] = 3;
int sUnitNum = 1;
for (int i = 0; i < sOrder; i++)
sUnitNum *= sDimSize[i];
/* a output tensor of size (3, 2, 2) */
int tOrder = 3;
int * tDimSize = new int[tOrder];
tDimSize[0] = 3;
tDimSize[1] = 2;
tDimSize[2] = 2;
int tUnitNum = 1;
for (int i = 0; i < tOrder; i++)
tUnitNum *= tDimSize[i];
DTYPE sData[3][2][3] = { { {0.0F, -1.0F, 2.0F},
{2.0F, 1.0F, 3.0F} },
{ {1.0F, 2.0F, 4.0F},
{3.0F, 1.0F, 2.0F}},
{ {-1.0F, 3.0F, 2.0F},
{1.0F, -1.0F, 0.0F} } };
DTYPE answer[3][2][2] = { { {0.0F, 2.0F},
{2.0F, 3.0F} },
{ {1.0F, 4.0F},
{3.0F, 2.0F}},
{ {-1.0F, 2.0F},
{1.0F, 0.0F} } };
int dim = 2;
int indexSize = 2;
int srcIndex[2] = {0, 2};
int tgtIndex[2] = {0, 1};
int copyNum = 1;
/* CPU test */
bool cpuTest = true;
/* create tensors */
XTensor * s = NewTensor(sOrder, sDimSize);
XTensor * t = NewTensor(tOrder, tDimSize);
/* initialize variables */
s->SetData(sData, sUnitNum);
t->SetZeroAll();
/* call CopyIndexed function */
CopyIndexed(s, t, dim, srcIndex, indexSize, tgtIndex, copyNum);
/* check results */
cpuTest = t->CheckData(answer, tUnitNum);
#ifdef USE_CUDA
/* GPU test */
bool gpuTest = true;
/* create tensors */
XTensor * sGPU = NewTensor(sOrder, sDimSize, X_FLOAT, 1.0F, 0);
XTensor * tGPU = NewTensor(sOrder, tDimSize, X_FLOAT, 1.0F, 0);
/* initialize variables */
sGPU->SetData(sData, sUnitNum);
tGPU->SetZeroAll();
/* call CopyIndexed function */
CopyIndexed(sGPU, tGPU, dim, srcIndex, indexSize, tgtIndex, copyNum);
/* check results */
gpuTest = tGPU->CheckData(answer, tUnitNum);
/* destroy variables */
delete s;
delete t;
delete sGPU;
delete tGPU;
delete[] sDimSize;
delete[] tDimSize;
return cpuTest && gpuTest;
#else
/* destroy variables */
delete s;
delete t;
delete[] sDimSize;
delete[] tDimSize;
return cpuTest;
#endif // USE_CUDA
}
/* other cases */
/*
TODO!!
*/
/* test for CopyIndexed Function */
bool TestCopyIndexed()
{
XPRINT(0, stdout, "[TEST CopyIndexed] copy indexed sub-tensors \n");
bool returnFlag = true, caseFlag = true;
/* case 1 test */
caseFlag = TestCopyIndexed1();
if (!caseFlag) {
returnFlag = false;
XPRINT(0, stdout, ">> case 1 failed!\n");
}
else
XPRINT(0, stdout, ">> case 1 passed!\n");
/* other cases test */
/*
TODO!!
*/
if (returnFlag) {
XPRINT(0, stdout, ">> All Passed!\n");
}
else
XPRINT(0, stdout, ">> Failed!\n");
XPRINT(0, stdout, "\n");
return returnFlag;
}
} // namespace nts(NiuTrans.Tensor)
/* NiuTrans.Tensor - an open-source tensor library
* Copyright (C) 2017, Natural Language Processing Lab, Northestern University.
* All rights reserved.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
/*
* $Created by: Xu Chen (email: hello_master1954@163.com) 2018-06-27
*/
#ifndef __TEST_COPYINDEXED_H__
#define __TEST_COPYINDEXED_H__
#include "../core/movement/CopyIndexed.h"
namespace nts { // namespace nts(NiuTrans.Tensor)
/* test for CopyIndexed Function */
extern "C"
bool TestCopyIndexed();
} // namespace nts(NiuTrans.Tensor)
#endif // __TEST_COPYINDEXED_H__
/* NiuTrans.Tensor - an open-source tensor library
* Copyright (C) 2017, Natural Language Processing Lab, Northestern University.
* All rights reserved.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
/*
* $Created by: Xu Chen (email: hello_master1954@163.com) 2018-06-27
*/
#include "../XUtility.h"
#include "TCopyValues.h"
namespace nts { // namespace nts(NiuTrans.Tensor)
/* case 1: copy tensor s to tensor t */
bool TestCopyValues1()
{
/* a input tensor of size (2, 4) */
int sOrder = 2;
int * sDimSize = new int[sOrder];
sDimSize[0] = 2;
sDimSize[1] = 4;
int sUnitNum = 1;
for (int i = 0; i < sOrder; i++)
sUnitNum *= sDimSize[i];
DTYPE sData[2][4] = { {0.0F, 1.0F, 2.0F, 3.0F},
{4.0F, 5.0F, 6.0F, 7.0F} };
DTYPE scaleFactor = 2.0F;
DTYPE shiftFactor = 0.5F;
/* CPU test */
bool cpuTest = true;
/* create tensors */
XTensor * s = NewTensor(sOrder, sDimSize);
XTensor * t = NewTensor(sOrder, sDimSize);
/* initialize variables */
s->SetData(sData, sUnitNum);
t->SetZeroAll();
/* call CopyValues function */
CopyValues(s, t);
/* check results */
cpuTest = t->CheckData(s->data, sUnitNum);
#ifdef USE_CUDA
/* GPU test */
bool gpuTest = true;
/* create tensors */
XTensor * sGPU = NewTensor(sOrder, sDimSize, X_FLOAT, 1.0F, 0);
XTensor * tGPU = NewTensor(sOrder, sDimSize, X_FLOAT, 1.0F, 0);
/* initialize variables */
sGPU->SetData(sData, sUnitNum);
tGPU->SetData(sData, sUnitNum);
/* call CopyValues function */
CopyValues(sGPU, tGPU);
/* check results */
DTYPE * dataGPU = (DTYPE*)sGPU->data;
int size = sUnitNum * sGPU->unitSize;
char * dataCPU = new char[size];
XMemCopy(dataCPU, -1, dataGPU, sGPU->devID, size);
gpuTest = tGPU->CheckData(dataCPU, sUnitNum);
/* destroy variables */
delete s;
delete t;
delete sGPU;
delete tGPU;
delete[] sDimSize;
return cpuTest && gpuTest;
#else
/* destroy variables */
delete s;
delete t;
delete[] sDimSize;
return cpuTest;
#endif // USE_CUDA
}
/* other cases */
/*
TODO!!
*/
/* test for CopyValues Function */
bool TestCopyValues()
{
XPRINT(0, stdout, "[TEST CopyValues] copy tensor s to tensor t \n");
bool returnFlag = true, caseFlag = true;
/* case 1 test */
caseFlag = TestCopyValues1();
if (!caseFlag) {
returnFlag = false;
XPRINT(0, stdout, ">> case 1 failed!\n");
}
else
XPRINT(0, stdout, ">> case 1 passed!\n");
/* other cases test */
/*
TODO!!
*/
if (returnFlag) {
XPRINT(0, stdout, ">> All Passed!\n");
}
else
XPRINT(0, stdout, ">> Failed!\n");
XPRINT(0, stdout, "\n");
return returnFlag;
}
} // namespace nts(NiuTrans.Tensor)
/* NiuTrans.Tensor - an open-source tensor library
* Copyright (C) 2017, Natural Language Processing Lab, Northestern University.
* All rights reserved.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
/*
* $Created by: Xu Chen (email: hello_master1954@163.com) 2018-06-27
*/
#ifndef __TEST_COPYVALUES_H__
#define __TEST_COPYVALUES_H__
#include "../core/movement/CopyValues.h"
namespace nts { // namespace nts(NiuTrans.Tensor)
/* test for CopyValues Function */
extern "C"
bool TestCopyValues();
} // namespace nts(NiuTrans.Tensor)
#endif // __TEST_COPYVALUES_H__
...@@ -19,16 +19,14 @@ ...@@ -19,16 +19,14 @@
* $Created by: Lin Ye (email: linye2015@outlook.com) 2018-06-20 * $Created by: Lin Ye (email: linye2015@outlook.com) 2018-06-20
*/ */
#include "THardTanH.h"
#include "../XTensor.h"
#include "../XDevice.h"
#include "../function/HardTanH.h"
namespace nts { // namespace nts(NiuTrans.Tensor) namespace nts { // namespace nts(NiuTrans.Tensor)
/* case 1: hard tanh function */ /* case 1: hard tanh function */
bool TestHardTanH1() bool TestHardTanH1()
{ {
/* a x tensor of size 2 * 3 */ /* a x tensor of size (2, 3) */
int xOrder = 2; int xOrder = 2;
int * xDimSize = new int[xOrder]; int * xDimSize = new int[xOrder];
xDimSize[0] = 2; xDimSize[0] = 2;
...@@ -38,7 +36,7 @@ bool TestHardTanH1() ...@@ -38,7 +36,7 @@ bool TestHardTanH1()
for (int i = 0; i < xOrder; i++) for (int i = 0; i < xOrder; i++)
xUnitNum *= xDimSize[i]; xUnitNum *= xDimSize[i];
/* a y tensor of size 2 * 3 */ /* a y tensor of size (2, 3) */
int yOrder = 2; int yOrder = 2;
int * yDimSize = new int[yOrder]; int * yDimSize = new int[yOrder];
yDimSize[0] = 2; yDimSize[0] = 2;
...@@ -48,10 +46,10 @@ bool TestHardTanH1() ...@@ -48,10 +46,10 @@ bool TestHardTanH1()
for (int i = 0; i < yOrder; i++) for (int i = 0; i < yOrder; i++)
yUnitNum *= yDimSize[i]; yUnitNum *= yDimSize[i];
DTYPE xData[2][3] = { {0.5, -1.0, 2.0}, DTYPE xData[2][3] = { {0.5F, -1.0F, 2.0F},
{3.5, -4.5, 1.0} }; {3.5F, -4.5F, 1.0F} };
DTYPE answer[2][3] = { {0.5, -1.0, 1.0}, DTYPE answer[2][3] = { {0.5F, -1.0F, 1.0F},
{1.0, -1.0, 1.0} }; {1.0F, -1.0F, 1.0F} };
/* CPU test */ /* CPU test */
bool cpuTest = true; bool cpuTest = true;
...@@ -68,7 +66,7 @@ bool TestHardTanH1() ...@@ -68,7 +66,7 @@ bool TestHardTanH1()
HardTanH(x, y); HardTanH(x, y);
/* check results */ /* check results */
cpuTest = y->CheckData(answer, yUnitNum); cpuTest = y->CheckData(answer, yUnitNum, 1e-4F);
#ifdef USE_CUDA #ifdef USE_CUDA
/* GPU test */ /* GPU test */
...@@ -86,11 +84,15 @@ bool TestHardTanH1() ...@@ -86,11 +84,15 @@ bool TestHardTanH1()
HardTanH(xGPU, yGPU); HardTanH(xGPU, yGPU);
/* check results */ /* check results */
gpuTest = yGPU->CheckData(answer, yUnitNum); gpuTest = yGPU->CheckData(answer, yUnitNum, 1e-4F);
/* destroy variables */ /* destroy variables */
delete x, y, xGPU, yGPU; delete x;
delete[] xDimSize, yDimSize; delete y;
delete xGPU;
delete yGPU;
delete[] xDimSize;
delete[] yDimSize;
return cpuTest && gpuTest; return cpuTest && gpuTest;
#else #else
...@@ -104,12 +106,13 @@ bool TestHardTanH1() ...@@ -104,12 +106,13 @@ bool TestHardTanH1()
#endif // USE_CUDA #endif // USE_CUDA
} }
/* case 2: backward computation /*
* In this case, lossName=CROSSENTROPY. case 2: backward computation
In this case, lossName=CROSSENTROPY.
*/ */
bool TestHardTanH2() bool TestHardTanH2()
{ {
/* a x tensor of size 2 * 3 */ /* a x tensor of size (2, 3) */
int xOrder = 2; int xOrder = 2;
int * xDimSize = new int[xOrder]; int * xDimSize = new int[xOrder];
xDimSize[0] = 2; xDimSize[0] = 2;
...@@ -119,7 +122,7 @@ bool TestHardTanH2() ...@@ -119,7 +122,7 @@ bool TestHardTanH2()
for (int i = 0; i < xOrder; i++) for (int i = 0; i < xOrder; i++)
xUnitNum *= xDimSize[i]; xUnitNum *= xDimSize[i];
/* a y tensor of size 2 * 3 */ /* a y tensor of size (2, 3) */
int yOrder = 2; int yOrder = 2;
int * yDimSize = new int[yOrder]; int * yDimSize = new int[yOrder];
yDimSize[0] = 2; yDimSize[0] = 2;
...@@ -129,7 +132,7 @@ bool TestHardTanH2() ...@@ -129,7 +132,7 @@ bool TestHardTanH2()
for (int i = 0; i < yOrder; i++) for (int i = 0; i < yOrder; i++)
yUnitNum *= yDimSize[i]; yUnitNum *= yDimSize[i];
/* a gold tensor of size 2 * 3 */ /* a gold tensor of size (2, 3) */
int goldOrder = 2; int goldOrder = 2;
int * goldDimSize = new int[goldOrder]; int * goldDimSize = new int[goldOrder];
goldDimSize[0] = 2; goldDimSize[0] = 2;
...@@ -139,7 +142,7 @@ bool TestHardTanH2() ...@@ -139,7 +142,7 @@ bool TestHardTanH2()
for (int i = 0; i < goldOrder; i++) for (int i = 0; i < goldOrder; i++)
goldUnitNum *= goldDimSize[i]; goldUnitNum *= goldDimSize[i];
/* a dedy tensor of size 2 * 3 */ /* a dedy tensor of size (2, 3) */
int dedyOrder = 2; int dedyOrder = 2;
int * dedyDimSize = new int[dedyOrder]; int * dedyDimSize = new int[dedyOrder];
dedyDimSize[0] = 2; dedyDimSize[0] = 2;
...@@ -149,7 +152,7 @@ bool TestHardTanH2() ...@@ -149,7 +152,7 @@ bool TestHardTanH2()
for (int i = 0; i < dedyOrder; i++) for (int i = 0; i < dedyOrder; i++)
dedyUnitNum *= dedyDimSize[i]; dedyUnitNum *= dedyDimSize[i];
/* a dedx tensor of size 2 * 3 */ /* a dedx tensor of size (2, 3) */
int dedxOrder = 2; int dedxOrder = 2;
int * dedxDimSize = new int[dedxOrder]; int * dedxDimSize = new int[dedxOrder];
dedxDimSize[0] = 2; dedxDimSize[0] = 2;
...@@ -159,16 +162,16 @@ bool TestHardTanH2() ...@@ -159,16 +162,16 @@ bool TestHardTanH2()
for (int i = 0; i < dedxOrder; i++) for (int i = 0; i < dedxOrder; i++)
dedxUnitNum *= dedxDimSize[i]; dedxUnitNum *= dedxDimSize[i];
DTYPE xData[2][3] = { {0.5, -1.0, 2.0}, DTYPE xData[2][3] = { {0.5F, -1.0F, 2.0F},
{3.5, -4.5, 1.0} }; {3.5F, -4.5F, 1.0F} };
DTYPE yData[2][3] = { {0.5, -1.0, 1.0}, DTYPE yData[2][3] = { {0.5F, -1.0F, 1.0F},
{1.0, -1.0, 1.0} }; {1.0F, -1.0F, 1.0F} };
DTYPE goldData[2][3] = { {1.0, 1.0, 1.0}, DTYPE goldData[2][3] = { {1.0F, 1.0F, 1.0F},
{1.0, 1.0, 1.0} }; {1.0F, 1.0F, 1.0F} };
DTYPE dedyData[2][3] = { {-2.0, 1.0, -1.0}, DTYPE dedyData[2][3] = { {-2.0F, 1.0F, -1.0F},
{-1.0, 1.0, -1.0} }; {-1.0F, 1.0F, -1.0F} };
DTYPE answer[2][3] = { {-2.0, 1.0, 0.0}, DTYPE answer[2][3] = { {-2.0F, 1.0F, 0.0F},
{0.0, 0.0, -1.0} }; {0.0F, 0.0F, -1.0F} };
/* CPU test */ /* CPU test */
bool cpuTest = true; bool cpuTest = true;
...@@ -231,12 +234,13 @@ bool TestHardTanH2() ...@@ -231,12 +234,13 @@ bool TestHardTanH2()
#endif // USE_CUDA #endif // USE_CUDA
} }
/* case 3: backward computation /*
* In this case, lossName=SQUAREDERROR. case 3: backward computation
In this case, lossName=SQUAREDERROR.
*/ */
bool TestHardTanH3() bool TestHardTanH3()
{ {
/* a x tensor of size 2 * 3 */ /* a x tensor of size (2, 3) */
int xOrder = 2; int xOrder = 2;
int * xDimSize = new int[xOrder]; int * xDimSize = new int[xOrder];
xDimSize[0] = 2; xDimSize[0] = 2;
...@@ -246,7 +250,7 @@ bool TestHardTanH3() ...@@ -246,7 +250,7 @@ bool TestHardTanH3()
for (int i = 0; i < xOrder; i++) for (int i = 0; i < xOrder; i++)
xUnitNum *= xDimSize[i]; xUnitNum *= xDimSize[i];
/* a y tensor of size 2 * 3 */ /* a y tensor of size (2, 3) */
int yOrder = 2; int yOrder = 2;
int * yDimSize = new int[yOrder]; int * yDimSize = new int[yOrder];
yDimSize[0] = 2; yDimSize[0] = 2;
...@@ -256,7 +260,7 @@ bool TestHardTanH3() ...@@ -256,7 +260,7 @@ bool TestHardTanH3()
for (int i = 0; i < yOrder; i++) for (int i = 0; i < yOrder; i++)
yUnitNum *= yDimSize[i]; yUnitNum *= yDimSize[i];
/* a gold tensor of size 2 * 3 */ /* a gold tensor of size (2, 3) */
int goldOrder = 2; int goldOrder = 2;
int * goldDimSize = new int[goldOrder]; int * goldDimSize = new int[goldOrder];
goldDimSize[0] = 2; goldDimSize[0] = 2;
...@@ -266,7 +270,7 @@ bool TestHardTanH3() ...@@ -266,7 +270,7 @@ bool TestHardTanH3()
for (int i = 0; i < goldOrder; i++) for (int i = 0; i < goldOrder; i++)
goldUnitNum *= goldDimSize[i]; goldUnitNum *= goldDimSize[i];
/* a dedy tensor of size 2 * 3 */ /* a dedy tensor of size (2, 3) */
int dedyOrder = 2; int dedyOrder = 2;
int * dedyDimSize = new int[dedyOrder]; int * dedyDimSize = new int[dedyOrder];
dedyDimSize[0] = 2; dedyDimSize[0] = 2;
...@@ -276,7 +280,7 @@ bool TestHardTanH3() ...@@ -276,7 +280,7 @@ bool TestHardTanH3()
for (int i = 0; i < dedyOrder; i++) for (int i = 0; i < dedyOrder; i++)
dedyUnitNum *= dedyDimSize[i]; dedyUnitNum *= dedyDimSize[i];
/* a dedx tensor of size 2 * 3 */ /* a dedx tensor of size (2, 3) */
int dedxOrder = 2; int dedxOrder = 2;
int * dedxDimSize = new int[dedxOrder]; int * dedxDimSize = new int[dedxOrder];
dedxDimSize[0] = 2; dedxDimSize[0] = 2;
...@@ -286,16 +290,16 @@ bool TestHardTanH3() ...@@ -286,16 +290,16 @@ bool TestHardTanH3()
for (int i = 0; i < dedxOrder; i++) for (int i = 0; i < dedxOrder; i++)
dedxUnitNum *= dedxDimSize[i]; dedxUnitNum *= dedxDimSize[i];
DTYPE xData[2][3] = { {0.5, -1.0, 2.0}, DTYPE xData[2][3] = { {0.5F, -1.0F, 2.0F},
{3.5, -4.5, 1.0} }; {3.5F, -4.5F, 1.0F} };
DTYPE yData[2][3] = { {0.5, -1.0, 1.0}, DTYPE yData[2][3] = { {0.5F, -1.0F, 1.0F},
{1.0, -1.0, 1.0} }; {1.0F, -1.0F, 1.0F} };
DTYPE goldData[2][3] = { {1.0, 1.0, 1.0}, DTYPE goldData[2][3] = { {1.0F, 1.0F, 1.0F},
{1.0, 1.0, 1.0} }; {1.0F, 1.0F, 1.0F} };
DTYPE dedyData[2][3] = { {-0.5, -2.0, 0.0 }, DTYPE dedyData[2][3] = { {-0.5F, -2.0F, 0.0F },
{0.0, -2.0, 0.0 } }; {0.0F, -2.0F, 0.0F } };
DTYPE answer[2][3] = { {-0.5, -2.0, 0.0}, DTYPE answer[2][3] = { {-0.5F, -2.0F, 0.0F},
{0.0, 0.0, 0.0} }; {0.0F, 0.0F, 0.0F} };
/* CPU test */ /* CPU test */
bool cpuTest = true; bool cpuTest = true;
...@@ -358,12 +362,13 @@ bool TestHardTanH3() ...@@ -358,12 +362,13 @@ bool TestHardTanH3()
#endif // USE_CUDA #endif // USE_CUDA
} }
/* case 4: backward computation /*
* In this case, lossName=ONEHOTERROR. case 4: backward computation
In this case, lossName=ONEHOTERROR.
*/ */
bool TestHardTanH4() bool TestHardTanH4()
{ {
/* a x tensor of size 2 * 3 */ /* a x tensor of size (2, 3) */
int xOrder = 2; int xOrder = 2;
int * xDimSize = new int[xOrder]; int * xDimSize = new int[xOrder];
xDimSize[0] = 2; xDimSize[0] = 2;
...@@ -373,7 +378,7 @@ bool TestHardTanH4() ...@@ -373,7 +378,7 @@ bool TestHardTanH4()
for (int i = 0; i < xOrder; i++) for (int i = 0; i < xOrder; i++)
xUnitNum *= xDimSize[i]; xUnitNum *= xDimSize[i];
/* a y tensor of size 2 * 3 */ /* a y tensor of size (2, 3) */
int yOrder = 2; int yOrder = 2;
int * yDimSize = new int[yOrder]; int * yDimSize = new int[yOrder];
yDimSize[0] = 2; yDimSize[0] = 2;
...@@ -383,7 +388,7 @@ bool TestHardTanH4() ...@@ -383,7 +388,7 @@ bool TestHardTanH4()
for (int i = 0; i < yOrder; i++) for (int i = 0; i < yOrder; i++)
yUnitNum *= yDimSize[i]; yUnitNum *= yDimSize[i];
/* a gold tensor of size 2 * 3 */ /* a gold tensor of size (2, 3) */
int goldOrder = 2; int goldOrder = 2;
int * goldDimSize = new int[goldOrder]; int * goldDimSize = new int[goldOrder];
goldDimSize[0] = 2; goldDimSize[0] = 2;
...@@ -393,7 +398,7 @@ bool TestHardTanH4() ...@@ -393,7 +398,7 @@ bool TestHardTanH4()
for (int i = 0; i < goldOrder; i++) for (int i = 0; i < goldOrder; i++)
goldUnitNum *= goldDimSize[i]; goldUnitNum *= goldDimSize[i];
/* a dedy tensor of size 2 * 3 */ /* a dedy tensor of size (2, 3) */
int dedyOrder = 2; int dedyOrder = 2;
int * dedyDimSize = new int[dedyOrder]; int * dedyDimSize = new int[dedyOrder];
dedyDimSize[0] = 2; dedyDimSize[0] = 2;
...@@ -403,7 +408,7 @@ bool TestHardTanH4() ...@@ -403,7 +408,7 @@ bool TestHardTanH4()
for (int i = 0; i < dedyOrder; i++) for (int i = 0; i < dedyOrder; i++)
dedyUnitNum *= dedyDimSize[i]; dedyUnitNum *= dedyDimSize[i];
/* a dedx tensor of size 2 * 3 */ /* a dedx tensor of size (2, 3) */
int dedxOrder = 2; int dedxOrder = 2;
int * dedxDimSize = new int[dedxOrder]; int * dedxDimSize = new int[dedxOrder];
dedxDimSize[0] = 2; dedxDimSize[0] = 2;
...@@ -413,16 +418,16 @@ bool TestHardTanH4() ...@@ -413,16 +418,16 @@ bool TestHardTanH4()
for (int i = 0; i < dedxOrder; i++) for (int i = 0; i < dedxOrder; i++)
dedxUnitNum *= dedxDimSize[i]; dedxUnitNum *= dedxDimSize[i];
DTYPE xData[2][3] = { {0.5, -1.0, 2.0}, DTYPE xData[2][3] = { {0.5F, -1.0F, 2.0F},
{3.5, -4.5, 1.0} }; {3.5F, -4.5F, 1.0F} };
DTYPE yData[2][3] = { {0.5, -1.0, 1.0}, DTYPE yData[2][3] = { {0.5F, -1.0F, 1.0F},
{1.0, -1.0, 1.0} }; {1.0F, -1.0F, 1.0F} };
DTYPE goldData[2][3] = { {1.0, 0.0, 1.0}, DTYPE goldData[2][3] = { {1.0F, 0.0F, 1.0F},
{0.0, 1.0, 1.0} }; {0.0F, 1.0F, 1.0F} };
DTYPE dedyData[2][3] = { {-0.5, 0.0, 0.0}, DTYPE dedyData[2][3] = { {-0.5F, 0.0F, 0.0F},
{0.0, -2.0, 0.0} }; {0.0F, -2.0F, 0.0F} };
DTYPE answer[2][3] = { {-0.5, 0.0, 0.0}, DTYPE answer[2][3] = { {-0.5F, 0.0F, 0.0F},
{0.0, 0.0, 0.0} }; {0.0F, 0.0F, 0.0F} };
/* CPU test */ /* CPU test */
bool cpuTest = true; bool cpuTest = true;
...@@ -491,10 +496,9 @@ TODO!! ...@@ -491,10 +496,9 @@ TODO!!
*/ */
/* test for HardTanH Function */ /* test for HardTanH Function */
extern "C"
bool TestHardTanH() bool TestHardTanH()
{ {
XPRINT(0, stdout, "[TEST HARDTANH] -------------\n"); XPRINT(0, stdout, "[TEST HARDTANH] test hardtanh and its backward computation \n");
bool returnFlag = true, caseFlag = true; bool returnFlag = true, caseFlag = true;
/* case 1 test */ /* case 1 test */
......
/* NiuTrans.Tensor - an open-source tensor library
* Copyright (C) 2017, Natural Language Processing Lab, Northestern University.
* All rights reserved.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
/*
* $Created by: Xu Chen (email: hello_master1954@163.com) 2018-06-29
*/
#include "../XUtility.h"
#include "TIdentity.h"
namespace nts { // namespace nts(NiuTrans.Tensor)
/*
case 1: test Identity function.
Identity function: y = x
*/
bool TestIdentity1()
{
/* a input tensor of size (2, 3) */
int sOrder = 2;
int * sDimSize = new int[sOrder];
sDimSize[0] = 2;
sDimSize[1] = 3;
int sUnitNum = 1;
for (int i = 0; i < sOrder; i++)
sUnitNum *= sDimSize[i];
DTYPE xData[2][3] = { {0.0F, 1.0F, 2.0F},
{0.5F, 0.7F, 1.4F} };
DTYPE answer[2][3] = { {0.0F, 1.0F, 2.0F},
{0.5F, 0.7F, 1.4F} };
/* CPU test */
bool cpuTest = true;
/* create tensors */
XTensor * x = NewTensor(sOrder, sDimSize);
XTensor * y = NewTensor(sOrder, sDimSize);
/* initialize variables */
x->SetData(xData, sUnitNum);
y->SetZeroAll();
/* call Identity function */
Identity(x, y);
/* check result */
cpuTest = y->CheckData(answer, sUnitNum);
#ifdef USE_CUDA
/* GPU test */
bool gpuTest = true;
/* create tensors */
XTensor * xGPU = NewTensor(sOrder, sDimSize, X_FLOAT, 1.0F, 0);
XTensor * yGPU = NewTensor(sOrder, sDimSize, X_FLOAT, 1.0F, 0);
/* initialize variables */
xGPU->SetData(xData, sUnitNum);
yGPU->SetZeroAll();
/* call Identity function */
Identity(xGPU, yGPU);
/* check result */
gpuTest = yGPU->CheckData(answer, sUnitNum);
/* destroy variables */
delete x, y;
delete xGPU, yGPU;
delete[] sDimSize;
return cpuTest && gpuTest;
#else
/* destroy variables */
delete x, y;
delete[] sDimSize;
return cpuTest;
#endif // USE_CUDA
}
/*
case 2: test IdentityBackward function.
IdentityBackward function: dE/dx = dE/dy * dy/dx = dE/dy
*/
bool TestIdentity2()
{
int sOrder = 2;
int * sDimSize = new int[sOrder];
sDimSize[0] = 1;
sDimSize[1] = 3;
int sUnitNum = 1;
for (int i = 0; i < sOrder; i++)
sUnitNum *= sDimSize[i];
DTYPE xData[1][3] = { {0.0F, 1.0F, 2.0F} };
DTYPE gData[1][3] = { {0.0F, 0.0F, 1.0F} };
DTYPE dedxAnswer[3] = {0.090031F, 0.244728F, -0.334759F};
/* CPU test */
bool cpuTest = true;
/* create tensors */
XTensor * x = NewTensor(sOrder, sDimSize);
XTensor * y = NewTensor(sOrder, sDimSize);
XTensor * g = NewTensor(sOrder, sDimSize);
XTensor * dedy = NewTensor(sOrder, sDimSize);
XTensor * dedx = NewTensor(sOrder, sDimSize);
/* initialize variables */
x->SetData(xData, sUnitNum);
g->SetData(gData, sUnitNum);
y->SetZeroAll();
dedx->SetZeroAll();
dedy->SetZeroAll();
/* call Identity function */
Identity(x, y);
/* call IdentityBackward function */
IdentityBackward(g, y, x, dedy, dedx, CROSSENTROPY);
/* check result */
cpuTest = dedx->CheckData(dedxAnswer, sUnitNum, 1e-4F);
#ifdef USE_CUDA
/* GPU test */
bool gpuTest = true;
/* create tensors */
XTensor * xGPU = NewTensor(sOrder, sDimSize, X_FLOAT, 1.0F, 0);
XTensor * yGPU = NewTensor(sOrder, sDimSize, X_FLOAT, 1.0F, 0);
XTensor * gGPU = NewTensor(sOrder, sDimSize, X_FLOAT, 1.0F, 0);
XTensor * dedyGPU = NewTensor(sOrder, sDimSize, X_FLOAT, 1.0F, 0);
XTensor * dedxGPU = NewTensor(sOrder, sDimSize, X_FLOAT, 1.0F, 0);
/* initialize variables */
xGPU->SetData(xData, sUnitNum);
gGPU->SetData(gData, sUnitNum);
yGPU->SetZeroAll();
dedxGPU->SetZeroAll();
dedyGPU->SetZeroAll();
/* call Identity function */
Identity(xGPU, yGPU);
/* call IdentityBackward function */
IdentityBackward(gGPU, yGPU, xGPU, dedyGPU, dedxGPU, CROSSENTROPY);
/* check result */
gpuTest = dedxGPU->CheckData(dedxAnswer, sUnitNum, 1e-4F);
/* destroy variables */
delete x;
delete y;
delete g;
delete dedx;
delete dedy;
delete xGPU;
delete yGPU;
delete gGPU;
delete dedxGPU;
delete dedyGPU;
delete[] sDimSize;
return cpuTest && gpuTest;
#else
/* destroy variables */
delete x;
delete y;
delete g;
delete dedx;
delete dedy;
delete[] sDimSize;
return cpuTest;
#endif // USE_CUDA
}
/* other cases */
/*
TODO!!
*/
/* test for Identity Function */
bool TestIdentity()
{
XPRINT(0, stdout, "[TEST Identity] identity function and its backward computation \n");
bool returnFlag = true, caseFlag = true;
/* case 1 test */
caseFlag = TestIdentity1();
if (!caseFlag) {
returnFlag = false;
XPRINT(0, stdout, ">> case 1 failed!\n");
}
else
XPRINT(0, stdout, ">> case 1 passed!\n");
/* case 2 test */
caseFlag = TestIdentity2();
if (!caseFlag) {
returnFlag = false;
XPRINT(0, stdout, ">> case 2 failed!\n");
}
else
XPRINT(0, stdout, ">> case 2 passed!\n");
/* other cases test */
/*
TODO!!
*/
if (returnFlag) {
XPRINT(0, stdout, ">> All Passed!\n");
}
else
XPRINT(0, stdout, ">> Failed!\n");
XPRINT(0, stdout, "\n");
return returnFlag;
}
} // namespace nts(NiuTrans.Tensor)
/* NiuTrans.Tensor - an open-source tensor library
* Copyright (C) 2017, Natural Language Processing Lab, Northestern University.
* All rights reserved.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
/*
* $Created by: Xu Chen (email: hello_master1954@163.com) 2018-06-29
*/
#ifndef __TEST_IDENTITY_H__
#define __TEST_IDENTITY_H__
#include "../function/Identity.h"
namespace nts { // namespace nts(NiuTrans.Tensor)
/* test for Identity Function */
extern "C"
bool TestIdentity();
} // namespace nts(NiuTrans.Tensor)
#endif // __TEST_IDENTITY_H__
/* NiuTrans.Tensor - an open-source tensor library
* Copyright (C) 2017, Natural Language Processing Lab, Northestern University.
* All rights reserved.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
/*
* $Created by: Xu Chen (email: hello_master1954@163.com) 2018-07-02
*/
#include "../XUtility.h"
#include "TLogSoftmax.h"
namespace nts { // namespace nts(NiuTrans.Tensor)
/*
case 1: test LogSoftmax function.
LogSoftmax function: y = log(e^x / \sum_{i} e^{x_i})
*/
bool TestLogSoftmax1()
{
/* a input tensor of size (2, 3) */
int sOrder = 2;
int * sDimSize = new int[sOrder];
sDimSize[0] = 2;
sDimSize[1] = 3;
int sUnitNum = 1;
for (int i = 0; i < sOrder; i++)
sUnitNum *= sDimSize[i];
DTYPE xData[2][3] = { {0.0F, 1.0F, 2.0F},
{0.5F, 0.7F, 1.4F} };
DTYPE answer[2][3] = { {-2.4076F, -1.4076F, -0.4076F},
{-1.5435F, -1.3435F, -0.6435F} };
/* CPU test */
bool cpuTest = true;
/* create tensors */
XTensor * x = NewTensor(sOrder, sDimSize);
XTensor * y = NewTensor(sOrder, sDimSize);
/* initialize variables */
x->SetData(xData, sUnitNum);
y->SetZeroAll();
/* call LogSoftmax function */
LogSoftmax(x, y, 1);
/* check result */
cpuTest = y->CheckData(answer, sUnitNum, 1e-4F);
#ifdef USE_CUDA
/* GPU test */
bool gpuTest = true;
/* create tensors */
XTensor * xGPU = NewTensor(sOrder, sDimSize, X_FLOAT, 1.0F, 0);
XTensor * yGPU = NewTensor(sOrder, sDimSize, X_FLOAT, 1.0F, 0);
/* initialize variables */
xGPU->SetData(xData, sUnitNum);
yGPU->SetZeroAll();
/* call LogSoftmax function */
LogSoftmax(xGPU, yGPU, 1);
/* check result */
gpuTest = yGPU->CheckData(answer, sUnitNum, 1e-4F);
/* destroy variables */
delete x;
delete y;
delete xGPU;
delete yGPU;
delete[] sDimSize;
return cpuTest && gpuTest;
#else
/* destroy variables */
delete x;
delete y;
delete z;
delete[] sDimSize;
return cpuTest;
#endif // USE_CUDA
}
/*
case 2: test LogSoftmaxBackward function.
dE/dx = dE/dy * dy/dx
log softmax: y_i = log(e^{x_i} / \sum_{k} e^{x_k})
*/
bool TestLogSoftmax2()
{
/* a input tensor of size (3) */
int sOrder = 1;
int * sDimSize = new int[sOrder];
sDimSize[0] = 3;
int sUnitNum = 1;
for (int i = 0; i < sOrder; i++)
sUnitNum *= sDimSize[i];
DTYPE xData[3] = {0.0F, 1.0F, 2.0F};
DTYPE gData[3] = {0.5F, 0.8F, 1.5F};
DTYPE yAnswer[3] = {-2.4076F, -1.4076F, -0.4076F};
DTYPE dedxAnswer[3] = {-0.409969F, -0.555272F, -0.834759F};
/* CPU test */
bool cpuTest = true;
/* create tensors */
XTensor * x = NewTensor(sOrder, sDimSize);
XTensor * y = NewTensor(sOrder, sDimSize);
XTensor * g = NewTensor(sOrder, sDimSize);
XTensor * dedy = NewTensor(sOrder, sDimSize);
XTensor * dedx = NewTensor(sOrder, sDimSize);
/* initialize variables */
x->SetData(xData, sUnitNum);
g->SetData(gData, sUnitNum);
y->SetZeroAll();
dedx->SetZeroAll();
dedy->SetZeroAll();
/* call LogSoftmax function */
LogSoftmax(x, y, 0);
/* call LogSoftmaxBackward function */
LogSoftmaxBackward(g, y, x, dedy, dedx, 0, CROSSENTROPY);
/* check result */
cpuTest = y->CheckData(yAnswer, sUnitNum, 1e-4F) && dedx->CheckData(dedxAnswer, sUnitNum, 1e-4F);
#ifdef USE_CUDA
/* GPU test */
bool gpuTest = true;
/* create tensors */
XTensor * xGPU = NewTensor(sOrder, sDimSize, X_FLOAT, 1.0F, 0);
XTensor * yGPU = NewTensor(sOrder, sDimSize, X_FLOAT, 1.0F, 0);
XTensor * gGPU = NewTensor(sOrder, sDimSize, X_FLOAT, 1.0F, 0);
XTensor * dedyGPU = NewTensor(sOrder, sDimSize, X_FLOAT, 1.0F, 0);
XTensor * dedxGPU = NewTensor(sOrder, sDimSize, X_FLOAT, 1.0F, 0);
/* initialize variables */
xGPU->SetData(xData, sUnitNum);
gGPU->SetData(gData, sUnitNum);
yGPU->SetZeroAll();
dedxGPU->SetZeroAll();
dedyGPU->SetZeroAll();
/* call LogSoftmax function */
LogSoftmax(xGPU, yGPU, 0);
/* call LogSoftmaxBackward function */
LogSoftmaxBackward(gGPU, yGPU, xGPU, dedyGPU, dedxGPU, 0, CROSSENTROPY);
/* check result */
gpuTest = yGPU->CheckData(yAnswer, sUnitNum, 1e-4F) && dedxGPU->CheckData(dedxAnswer, sUnitNum, 1e-4F);
/* destroy variables */
delete x;
delete y;
delete g;
delete dedx;
delete dedy;
delete xGPU;
delete yGPU;
delete gGPU;
delete dedxGPU;
delete dedyGPU;
delete[] sDimSize;
return cpuTest && gpuTest;
#else
/* destroy variables */
delete x;
delete y;
delete g;
delete dedx;
delete dedy;
delete[] sDimSize;
return cpuTest;
#endif // USE_CUDA
}
/*
case 3: test LogSoftmaxBackward function.
dE/dx = dE/dy * dy/dx
log softmax: y_i = log(e^{x_i} / \sum_{k} e^{x_k})
*/
bool TestLogSoftmax3()
{
/* a tensor of size (1, 3) */
int sOrder = 2;
int * sDimSize = new int[sOrder];
sDimSize[0] = 1;
sDimSize[1] = 3;
int sUnitNum = 1;
for (int i = 0; i < sOrder; i++)
sUnitNum *= sDimSize[i];
DTYPE xData[1][3] = { {0.0F, 1.0F, 2.0F} };
DTYPE gData[1][3] = { {0.5F, 0.8F, 1.5F} };
DTYPE yAnswer[1][3] = {-2.4076F, -1.4076F, -0.4076F};
DTYPE dedxAnswer[1][3] = {-0.409969F, -0.555272F, -0.834759F};
/* CPU test */
bool cpuTest = true;
/* create tensors */
XTensor * x = NewTensor(sOrder, sDimSize);
XTensor * y = NewTensor(sOrder, sDimSize);
XTensor * g = NewTensor(sOrder, sDimSize);
XTensor * dedy = NewTensor(sOrder, sDimSize);
XTensor * dedx = NewTensor(sOrder, sDimSize);
/* initialize variables */
x->SetData(xData, sUnitNum);
g->SetData(gData, sUnitNum);
y->SetZeroAll();
dedx->SetZeroAll();
dedy->SetZeroAll();
/* call LogSoftmax function */
LogSoftmax(x, y, 1);
/* call LogSoftmaxBackward function */
LogSoftmaxBackward(g, y, x, dedy, dedx, 1, CROSSENTROPY);
/* check result */
cpuTest = y->CheckData(yAnswer, sUnitNum, 1e-4F) && dedx->CheckData(dedxAnswer, sUnitNum, 1e-4F);
#ifdef USE_CUDA
/* GPU test */
bool gpuTest = true;
/* create tensors */
XTensor * xGPU = NewTensor(sOrder, sDimSize, X_FLOAT, 1.0F, 0);
XTensor * yGPU = NewTensor(sOrder, sDimSize, X_FLOAT, 1.0F, 0);
XTensor * gGPU = NewTensor(sOrder, sDimSize, X_FLOAT, 1.0F, 0);
XTensor * dedyGPU = NewTensor(sOrder, sDimSize, X_FLOAT, 1.0F, 0);
XTensor * dedxGPU = NewTensor(sOrder, sDimSize, X_FLOAT, 1.0F, 0);
/* initialize variables */
xGPU->SetData(xData, sUnitNum);
gGPU->SetData(gData, sUnitNum);
yGPU->SetZeroAll();
dedxGPU->SetZeroAll();
dedyGPU->SetZeroAll();
/* call LogSoftmax function */
LogSoftmax(xGPU, yGPU, 1);
/* call LogSoftmaxBackward function */
LogSoftmaxBackward(gGPU, yGPU, xGPU, dedyGPU, dedxGPU, 1, CROSSENTROPY);
/* check result */
gpuTest = yGPU->CheckData(yAnswer, sUnitNum, 1e-4F) && dedxGPU->CheckData(dedxAnswer, sUnitNum, 1e-4F);
/* destroy variables */
delete x;
delete y;
delete g;
delete dedx;
delete dedy;
delete xGPU;
delete yGPU;
delete gGPU;
delete dedxGPU;
delete dedyGPU;
delete[] sDimSize;
return cpuTest && gpuTest;
#else
/* destroy variables */
delete x;
delete y;
delete g;
delete dedx;
delete dedy;
delete[] sDimSize;
return cpuTest;
#endif // USE_CUDA
}
/* other cases */
/*
TODO!!
*/
/* test for LogSoftmax Function */
bool TestLogSoftmax()
{
XPRINT(0, stdout, "[TEST LogSoftmax] test log softmax function and its backward computation \n");
bool returnFlag = true, caseFlag = true;
/* case 1 test */
caseFlag = TestLogSoftmax1();
if (!caseFlag) {
returnFlag = false;
XPRINT(0, stdout, ">> case 1 failed!\n");
}
else
XPRINT(0, stdout, ">> case 1 passed!\n");
/* case 2 test */
caseFlag = TestLogSoftmax2();
if (!caseFlag) {
returnFlag = false;
XPRINT(0, stdout, ">> case 2 failed!\n");
}
else
XPRINT(0, stdout, ">> case 2 passed!\n");
/* case 3 test */
caseFlag = TestLogSoftmax3();
if (!caseFlag) {
returnFlag = false;
XPRINT(0, stdout, ">> case 3 failed!\n");
}
else
XPRINT(0, stdout, ">> case 3 passed!\n");
/* other cases test */
/*
TODO!!
*/
if (returnFlag) {
XPRINT(0, stdout, ">> All Passed!\n");
}
else
XPRINT(0, stdout, ">> Failed!\n");
XPRINT(0, stdout, "\n");
return returnFlag;
}
} // namespace nts(NiuTrans.Tensor)
/* NiuTrans.Tensor - an open-source tensor library
* Copyright (C) 2017, Natural Language Processing Lab, Northestern University.
* All rights reserved.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
/*
* $Created by: Xu Chen (email: hello_master1954@163.com) 2018-07-02
*/
#ifndef __TEST_LOGSOFTMAX_H__
#define __TEST_LOGSOFTMAX_H__
#include "../function/LogSoftmax.h"
namespace nts { // namespace nts(NiuTrans.Tensor)
/* test for LogSoftmax Function */
extern "C"
bool TestLogSoftmax();
} // namespace nts(NiuTrans.Tensor)
#endif // __TEST_LOGSOFTMAX_H__
...@@ -19,91 +19,244 @@ ...@@ -19,91 +19,244 @@
* $Created by: LI Yinqiao (email: li.yin.qiao.2012@hotmail.com) 2018-04-30 * $Created by: LI Yinqiao (email: li.yin.qiao.2012@hotmail.com) 2018-04-30
*/ */
#include "../XTensor.h" #include "../core/math/ScaleAndShift.h"
#include "../XDevice.h"
#include "../function/Loss.h" #include "../function/Loss.h"
namespace nts { // namespace nts(NiuTrans.Tensor)
namespace nts { // namespace nt(NiuTrans.Tensor) /*
/* case 1 */ case 1: test LossCompute function
In this case, Loss function name = SQUAREDERROR.
loss = sum_{i} 0.5*(t_i - y_i)^2,
where t_i is the gold standard and y_i is the model output
*/
bool TestLoss1() bool TestLoss1()
{ {
/* a tensor of size 10000 * 1 */ /* a tensor of size (10, 1) */
int order = 2;
int * dimSize = new int[order];
dimSize[0] = 10;
dimSize[1] = 1;
int unitNum = 1;
for (int i = 0; i < order; i++)
unitNum *= dimSize[i];
/* CPU test */
bool cpuTest = true;
DTYPE answer = 5.0F;
/* create tensors */
XTensor * output = NewTensor(order, dimSize);
XTensor * gold = NewTensor(order, dimSize);
/* initialize variables */
output->SetZeroAll();
gold->SetZeroAll();
ScaleAndShift(output, 1, 1);
ScaleAndShift(gold, 1, 2);
DTYPE error;
error = LossCompute(gold, output, SQUAREDERROR, false, 0, 0, dimSize[0], 0);
/* check results */
cpuTest = (error == answer);
#ifdef USE_CUDA
/* GPU test */
bool gpuTest = true;
/* create tensor */
XTensor * outputGPU = NewTensor(order, dimSize, X_FLOAT, 1.0F, 0);
XTensor * goldGPU = NewTensor(order, dimSize, X_FLOAT, 1.0F, 0);
/* Initialize variables */
outputGPU->SetZeroAll();
goldGPU->SetZeroAll();
ScaleAndShift(outputGPU, 1, 1);
ScaleAndShift(goldGPU, 1, 2);
/* call LossCompute function */
error = LossCompute(goldGPU, outputGPU, SQUAREDERROR, false, 0, 0, dimSize[0], 0);
/* check results */
gpuTest = (error == answer);
/* destroy variables */
delete output;
delete gold;
delete outputGPU;
delete goldGPU;
delete[] dimSize;
return cpuTest && gpuTest;
#else
/* destroy variables */
delete output;
delete gold;
delete[] dimSize;
return cpuTest;
#endif // USE_CUDA
}
/*
case 2: test LossCompute function
In this case, Loss function name = CROSSENTROPY.
loss = sum_{i} (-t_i * log(y_i))
where t_i is the gold standard and y_i is the model output
*/
bool TestLoss2()
{
/* a tensor of size (10, 1) */
int order = 2;
int * dimSize = new int[order];
dimSize[0] = 10;
dimSize[1] = 1;
int unitNum = 1;
for (int i = 0; i < order; i++)
unitNum *= dimSize[i];
/* CPU test */
bool cpuTest = true;
DTYPE answer = 0.0F;
/* create tensors */
XTensor * output = NewTensor(order, dimSize);
XTensor * gold = NewTensor(order, dimSize);
/* initialize variables */
output->SetZeroAll();
gold->SetZeroAll();
ScaleAndShift(output, 1, 1);
ScaleAndShift(gold, 1, 2);
DTYPE error;
error = LossCompute(gold, output, CROSSENTROPY, false, 0, 0, dimSize[0], 0);
/* check results */
cpuTest = (error == answer);
#ifdef USE_CUDA
/* GPU test */
bool gpuTest = true;
/* create tensor */
XTensor * outputGPU = NewTensor(order, dimSize, X_FLOAT, 1.0F, 0);
XTensor * goldGPU = NewTensor(order, dimSize, X_FLOAT, 1.0F, 0);
/* Initialize variables */
outputGPU->SetZeroAll();
goldGPU->SetZeroAll();
ScaleAndShift(outputGPU, 1, 1);
ScaleAndShift(goldGPU, 1, 2);
/* call LossCompute function */
error = LossCompute(goldGPU, outputGPU, CROSSENTROPY, false, 0, 0, dimSize[0], 0);
/* check results */
gpuTest = (error == answer);
/* destroy variables */
delete output;
delete gold;
delete outputGPU;
delete goldGPU;
delete[] dimSize;
return cpuTest && gpuTest;
#else
/* destroy variables */
delete output;
delete gold;
delete[] dimSize;
return cpuTest;
#endif // USE_CUDA
}
/*
case 3: test LossCompute function
In this case, Loss function name = ONEHOTERROR.
loss = sum_{i} e_i
where e_i = 0.5*(t_i - y_i)^2 if t_i = 1, e_i = 0 otherwise
*/
bool TestLoss3()
{
/* a tensor of size (10, 1) */
int order = 2; int order = 2;
int order_reduce = 1;
int * dimSize = new int[order]; int * dimSize = new int[order];
dimSize[0] = 10000; dimSize[0] = 5;
dimSize[1] = 1; dimSize[1] = 1;
int unitNum = 1; int unitNum = 1;
for (int i = 0; i < order; i++) for (int i = 0; i < order; i++)
unitNum *= dimSize[i]; unitNum *= dimSize[i];
DTYPE outputData[5][1] = { {0.5F},
{0.5F},
{0.5F},
{0.5F},
{0.5F} };
DTYPE goldData[5][1] = { {1.0F},
{1.0F},
{0.0F},
{0.0F},
{0.0F} };
/* CPU test */ /* CPU test */
bool cpuTest = true; bool cpuTest = true;
DTYPE answer = 0.25F;
/* create tensors */ /* create tensors */
XTensor * a = NewTensor(order, dimSize); XTensor * output = NewTensor(order, dimSize);
XTensor * b = NewTensor(order, dimSize); XTensor * gold = NewTensor(order, dimSize);
/* initialize variables */ /* initialize variables */
DTYPE* a_data = (DTYPE*)a->data; output->SetData(outputData, unitNum);
for (int i = 0; i < unitNum; i++) gold->SetData(goldData, unitNum);
*a_data++ = 1;
DTYPE* b_data = (DTYPE*)b->data; DTYPE error;
for (int i = 0; i < unitNum; i++) error = LossCompute(gold, output, ONEHOTERROR, false, 0, 0, dimSize[0], 0);
*b_data++ = 1;
DTYPE error = 0.0F;
error = LossCompute(a, b, SQUAREDERROR, false, 1, 0, dimSize[0], 0);
printf("%d", error);
/* call reduce max function */
//ReduceMax(a, reduce_a, 0);
//ReduceMax(b, reduce_b, 1);
//DTYPE* reduce_a_data = (DTYPE*)reduce_a->data;
//for (int i = 0; i < unitNum_a; i++)
// printf("%f ", *reduce_a_data++);
//printf("\n");
//DTYPE* reduce_b_data = (DTYPE*)reduce_b->data;
//for (int i = 0; i < unitNum_b; i++)
// printf("%f ", *reduce_b_data++);
/* check results */ /* check results */
cpuTest = true; cpuTest = (error == answer);
#ifdef USE_CUDA #ifdef USE_CUDA
/* GPU test */ /* GPU test */
bool gpuTest = true; bool gpuTest = true;
/* create tensor */ /* create tensor */
XTensor * aGPU = NewTensor(order, dimSize, X_FLOAT); XTensor * outputGPU = NewTensor(order, dimSize, X_FLOAT, 1.0F, 0);
XTensor * bGPU = NewTensor(order, dimSize, X_FLOAT); XTensor * goldGPU = NewTensor(order, dimSize, X_FLOAT, 1.0F, 0);
/* Initialize variables */ /* Initialize variables */
DTYPE* aGPU_data = (DTYPE*)aGPU->data; outputGPU->SetData(outputData, unitNum);
for (int i = 0; i < unitNum; i++) goldGPU->SetData(goldData, unitNum);
*aGPU_data++ = 1;
DTYPE* bGPU_data = (DTYPE*)bGPU->data; /* call LossCompute function */
for (int i = 0; i < unitNum; i++) error = LossCompute(goldGPU, outputGPU, ONEHOTERROR, false, 0, 0, dimSize[0], 0);
*bGPU_data++ = 1;
error = LossCompute(a, b, SQUAREDERROR, false, 1, 0, dimSize[0], 0);
printf("%d", error);
/* call reduce max function */
//ReduceMax(aGPU, reduce_aGPU, 0);
//ReduceMax(bGPU, reduce_bGPU, 1);
/* check results */ /* check results */
gpuTest = true; gpuTest = (error == answer);
/* destroy variables */ /* destroy variables */
delete aGPU, bGPU; delete output;
delete gold;
delete outputGPU;
delete goldGPU;
delete[] dimSize; delete[] dimSize;
return cpuTest && gpuTest; return cpuTest && gpuTest;
#else #else
/* destroy variables */ /* destroy variables */
delete a; delete output;
delete b; delete gold;
delete[] dimSize;
return cpuTest; return cpuTest;
#endif // USE_CUDA #endif // USE_CUDA
} }
...@@ -113,11 +266,10 @@ bool TestLoss1() ...@@ -113,11 +266,10 @@ bool TestLoss1()
TODO!! TODO!!
*/ */
/* test for Sum Function */ /* test for Loss Function */
extern "C" bool TestLoss()
bool TestLoss()
{ {
XPRINT(0, stdout, "[TEST Loss]\n"); XPRINT(0, stdout, "[TEST Loss] compute the loss \n");
bool returnFlag = true, caseFlag = true; bool returnFlag = true, caseFlag = true;
/* case 1 test */ /* case 1 test */
...@@ -129,6 +281,23 @@ extern "C" ...@@ -129,6 +281,23 @@ extern "C"
else else
XPRINT(0, stdout, ">> case 1 passed!\n"); XPRINT(0, stdout, ">> case 1 passed!\n");
/* case 2 test */
caseFlag = TestLoss2();
if (!caseFlag) {
returnFlag = false;
XPRINT(0, stdout, ">> case 2 failed!\n");
}
else
XPRINT(0, stdout, ">> case 2 passed!\n");
caseFlag = TestLoss3();
if (!caseFlag) {
returnFlag = false;
XPRINT(0, stdout, ">> case 3 failed!\n");
}
else
XPRINT(0, stdout, ">> case 3 passed!\n");
///* other cases test */ ///* other cases test */
///* ///*
//TODO!! //TODO!!
...@@ -145,4 +314,4 @@ extern "C" ...@@ -145,4 +314,4 @@ extern "C"
return returnFlag; return returnFlag;
} }
} // namespace nt(NiuTrans.Tensor) } // namespace nts(NiuTrans.Tensor)
...@@ -26,9 +26,9 @@ ...@@ -26,9 +26,9 @@
namespace nts { // namespace nts(NiuTrans.Tensor) namespace nts { // namespace nts(NiuTrans.Tensor)
/* test for Sum Function */ /* test for Loss Function */
extern "C" extern "C"
bool TestLoss(); bool TestLoss();
} // namespace nts(NiuTrans.Tensor) } // namespace nts(NiuTrans.Tensor)
#endif // __TEST_SUM_H__ #endif // __TEST_LOSS_H__
...@@ -19,14 +19,13 @@ ...@@ -19,14 +19,13 @@
* $Created by: Xu Chen (email: hello_master1954@163.com) 2018-06-15 * $Created by: Xu Chen (email: hello_master1954@163.com) 2018-06-15
*/ */
#include "../XTensor.h"
#include "TMatrixMULBatchedCPU.h" #include "TMatrixMULBatchedCPU.h"
namespace nts { // namespace nts(NiuTrans.Tensor) namespace nts { // namespace nts(NiuTrans.Tensor)
/* case 1: matrix multiplication in batch mode (CPU code).
* In this case, aList=2*(2, 3), bList=2*(2, 3) -> c=2*(2, 2), /*
transposedA=X_NOTRANS, transposedB=X_NOTRANS. case 1: matrix multiplication in batch mode (CPU code).
In this case, aList=2*(2, 3), bList=2*(3, 2) -> c=2*(2, 2), transposedA=X_NOTRANS, transposedB=X_NOTRANS.
*/ */
bool TestMatrixMulBatchedCPU1() bool TestMatrixMulBatchedCPU1()
{ {
...@@ -65,20 +64,20 @@ bool TestMatrixMulBatchedCPU1() ...@@ -65,20 +64,20 @@ bool TestMatrixMulBatchedCPU1()
for (int i = 0; i < cOrder; i++) for (int i = 0; i < cOrder; i++)
cUnitNum *= cDimSize[i]; cUnitNum *= cDimSize[i];
DTYPE aData1[2][3] = { {1.0, 2.0, 3.0}, DTYPE aData1[2][3] = { {1.0F, 2.0F, 3.0F},
{-4.0, 5.0, 6.0} }; {-4.0F, 5.0F, 6.0F} };
DTYPE aData2[2][3] = { {1.0, -2.0, -3.0}, DTYPE aData2[2][3] = { {1.0F, -2.0F, -3.0F},
{-4.0, 3.0, 2.0} }; {-4.0F, 3.0F, 2.0F} };
DTYPE bData1[3][2] = { {0.0, -1.0}, DTYPE bData1[3][2] = { {0.0F, -1.0F},
{1.0, 2.0}, {1.0F, 2.0F},
{2.0, 1.0} }; {2.0F, 1.0F} };
DTYPE bData2[3][2] = { {0.0, 1.0}, DTYPE bData2[3][2] = { {0.0F, 1.0F},
{3.0, 2.0}, {3.0F, 2.0F},
{2.0, 1.0} }; {2.0F, 1.0F} };
DTYPE answer1[2][2] = { {8.0, 6.0}, DTYPE answer1[2][2] = { {8.0F, 6.0F},
{17.0, 20.0} }; {17.0F, 20.0F} };
DTYPE answer2[2][2] = { {-12.0, -6.0}, DTYPE answer2[2][2] = { {-12.0F, -6.0F},
{13.0, 4.0} }; {13.0F, 4.0F} };
/* CPU test */ /* CPU test */
bool cpuTest = true; bool cpuTest = true;
...@@ -111,18 +110,12 @@ bool TestMatrixMulBatchedCPU1() ...@@ -111,18 +110,12 @@ bool TestMatrixMulBatchedCPU1()
MatrixMULBatchedCPU(aList, X_NOTRANS, bList, X_NOTRANS, cList); MatrixMULBatchedCPU(aList, X_NOTRANS, bList, X_NOTRANS, cList);
/* check results */ /* check results */
cpuTest = c1->CheckData(answer1, cUnitNum) && cpuTest; cpuTest = c1->CheckData(answer1, cUnitNum) && c2->CheckData(answer2, cUnitNum);
cpuTest = c2->CheckData(answer2, cUnitNum) && cpuTest;
#ifdef USE_CUDA #ifdef USE_CUDA
/* GPU test */ /* GPU test */
bool gpuTest = true; bool gpuTest = true;
/* clear list */
aList->Clear();
bList->Clear();
cList->Clear();
/* create tensors */ /* create tensors */
XTensor * aGPU1 = NewTensor(aOrder, aDimSize, X_FLOAT, 1.0F, 0); XTensor * aGPU1 = NewTensor(aOrder, aDimSize, X_FLOAT, 1.0F, 0);
XTensor * aGPU2 = NewTensor(aOrder, aDimSize, X_FLOAT, 1.0F, 0); XTensor * aGPU2 = NewTensor(aOrder, aDimSize, X_FLOAT, 1.0F, 0);
...@@ -139,31 +132,55 @@ bool TestMatrixMulBatchedCPU1() ...@@ -139,31 +132,55 @@ bool TestMatrixMulBatchedCPU1()
cGPU1->SetZeroAll(); cGPU1->SetZeroAll();
cGPU2->SetZeroAll(); cGPU2->SetZeroAll();
/* clear list */
aList->Clear();
bList->Clear();
cList->Clear();
/* add tensors to list */ /* add tensors to list */
aList->Add(a1); aList->Add(aGPU1);
aList->Add(a2); aList->Add(aGPU2);
bList->Add(b1); bList->Add(bGPU1);
bList->Add(b2); bList->Add(bGPU2);
cList->Add(c1); cList->Add(cGPU1);
cList->Add(c2); cList->Add(cGPU2);
/* call MatrixMULBatchedCPU function */ /* call MatrixMULBatchedCPU function */
MatrixMULBatchedCPU(aList, X_NOTRANS, bList, X_NOTRANS, cList); MatrixMULBatchedCPU(aList, X_NOTRANS, bList, X_NOTRANS, cList);
/* check results */ /* check results */
gpuTest = c1->CheckData(answer1, cUnitNum) && gpuTest; gpuTest = cGPU1->CheckData(answer1, cUnitNum) && gpuTest;
gpuTest = c2->CheckData(answer2, cUnitNum) && gpuTest; gpuTest = cGPU2->CheckData(answer2, cUnitNum) && gpuTest;
/* destroy variables */ /* destroy variables */
delete a1, a2, b1, b2, c1, c2; delete a1;
delete aGPU1, aGPU2, bGPU1, bGPU2, cGPU1, cGPU2; delete a2;
delete[] aDimSize, bDimSize, cDimSize; delete b1;
delete b2;
delete c1;
delete c2;
delete aGPU1;
delete aGPU2;
delete bGPU1;
delete bGPU2;
delete cGPU1;
delete cGPU2;
delete[] aDimSize;
delete[] bDimSize;
delete[] cDimSize;
return cpuTest && gpuTest; return cpuTest && gpuTest;
#else #else
/* destroy variables */ /* destroy variables */
delete a1, a2, b1, b2, c1, c2; delete a1;
delete[] aDimSize, bDimSize, cDimSize; delete a2;
delete b1;
delete b2;
delete c1;
delete c2;
delete[] aDimSize;
delete[] bDimSize;
delete[] cDimSize;
return cpuTest; return cpuTest;
#endif // USE_CUDA #endif // USE_CUDA
...@@ -178,7 +195,7 @@ bool TestMatrixMulBatchedCPU1() ...@@ -178,7 +195,7 @@ bool TestMatrixMulBatchedCPU1()
extern "C" extern "C"
bool TestMatrixMulBatchedCPU() bool TestMatrixMulBatchedCPU()
{ {
XPRINT(0, stdout, "[TEST MATRIXMULBATCHEDCPU] -------------\n"); XPRINT(0, stdout, "[TEST MATRIXMULBATCHEDCPU] matrix multiplication in batch mode (CPU code) \n");
bool returnFlag = true, caseFlag = true; bool returnFlag = true, caseFlag = true;
/* case 1 test */ /* case 1 test */
...@@ -191,15 +208,6 @@ bool TestMatrixMulBatchedCPU() ...@@ -191,15 +208,6 @@ bool TestMatrixMulBatchedCPU()
else else
XPRINT(0, stdout, ">> case 1 passed!\n"); XPRINT(0, stdout, ">> case 1 passed!\n");
///* case 2 test */
//caseFlag = TestMatrixMulBatchedCPU2();
//if (!caseFlag) {
// returnFlag = false;
// XPRINT(0, stdout, ">> case 2 failed!\n");
//}
//else
// XPRINT(0, stdout, ">> case 2 passed!\n");
/* other cases test */ /* other cases test */
/* /*
TODO!! TODO!!
......
...@@ -22,7 +22,7 @@ ...@@ -22,7 +22,7 @@
#ifndef __TEST_MATRIXMULBATCHEDCPU_H__ #ifndef __TEST_MATRIXMULBATCHEDCPU_H__
#define __TEST_MATRIXMULBATCHEDCPU_H__ #define __TEST_MATRIXMULBATCHEDCPU_H__
#include "../core/MatrixMULBatchedCPU.h" #include "../core/arithmetic/MatrixMULBatchedCPU.h"
namespace nts { // namespace nts(NiuTrans.Tensor) namespace nts { // namespace nts(NiuTrans.Tensor)
......
...@@ -19,14 +19,14 @@ ...@@ -19,14 +19,14 @@
* $Created by: Xu Chen (email: hello_master1954@163.com) 2018-06-14 * $Created by: Xu Chen (email: hello_master1954@163.com) 2018-06-14
*/ */
#include "../XTensor.h"
#include "TMatrixMul.h" #include "TMatrixMul.h"
namespace nts { // namespace nts(NiuTrans.Tensor) namespace nts { // namespace nts(NiuTrans.Tensor)
/* case 1: matrix multiplication.
* In this case, a=(2, 3), b=(3, 2) -> c=(2, 2), transposedA=X_NOTRANS, /*
transposedB=X_NOTRANS. case 1: matrix multiplication.
In this case, a=(2, 3), b=(3, 2) -> c=(2, 2),
transposedA=X_NOTRANS, transposedB=X_NOTRANS.
*/ */
bool TestMatrixMul1() bool TestMatrixMul1()
{ {
...@@ -60,13 +60,13 @@ bool TestMatrixMul1() ...@@ -60,13 +60,13 @@ bool TestMatrixMul1()
for (int i = 0; i < tOrder; i++) for (int i = 0; i < tOrder; i++)
tUnitNum *= tDimSize[i]; tUnitNum *= tDimSize[i];
DTYPE sData1[2][3] = { {1.0, 2.0, 3.0}, DTYPE sData1[2][3] = { {1.0F, 2.0F, 3.0F},
{-4.0, 5.0, 6.0} }; {-4.0F, 5.0F, 6.0F} };
DTYPE sData2[3][2] = { {0.0, -1.0}, DTYPE sData2[3][2] = { {0.0F, -1.0F},
{1.0, 2.0}, {1.0F, 2.0F},
{2.0, 1.0} }; {2.0F, 1.0F} };
DTYPE answer[2][2] = { {8.0, 6.0}, DTYPE answer[2][2] = { {8.0F, 6.0F},
{17.0, 20.0} }; {17.0F, 20.0F} };
/* CPU test */ /* CPU test */
bool cpuTest = true; bool cpuTest = true;
...@@ -108,22 +108,34 @@ bool TestMatrixMul1() ...@@ -108,22 +108,34 @@ bool TestMatrixMul1()
gpuTest = tGPU->CheckData(answer, tUnitNum); gpuTest = tGPU->CheckData(answer, tUnitNum);
/* destroy variables */ /* destroy variables */
delete s1, s2, t, sGPU1, sGPU2, tGPU; delete s1;
delete[] sDimSize1, sDimSize2, tDimSize; delete s2;
delete t;
delete sGPU1;
delete sGPU2;
delete tGPU;
delete[] sDimSize1;
delete[] sDimSize2;
delete[] tDimSize;
return cpuTest && gpuTest; return cpuTest && gpuTest;
#else #else
/* destroy variables */ /* destroy variables */
delete s1, s2, t; delete s1;
delete[] sDimSize1, sDimSize2, tDimSize; delete s2;
delete t;
delete[] sDimSize1;
delete[] sDimSize2;
delete[] tDimSize;
return cpuTest; return cpuTest;
#endif // USE_CUDA #endif // USE_CUDA
} }
/* case 2: matrix multiplication. /*
* In this case, a=(3, 2), b=(3, 2) -> c=(2, 2), transposedA=X_TRANS, case 2: matrix multiplication.
transposedB=X_NOTRANS. In this case, a=(3, 2), b=(3, 2) -> c=(2, 2),
transposedA=X_TRANS, transposedB=X_NOTRANS.
*/ */
bool TestMatrixMul2() bool TestMatrixMul2()
{ {
...@@ -137,7 +149,7 @@ bool TestMatrixMul2() ...@@ -137,7 +149,7 @@ bool TestMatrixMul2()
for (int i = 0; i < sOrder1; i++) for (int i = 0; i < sOrder1; i++)
sUnitNum1 *= sDimSize1[i]; sUnitNum1 *= sDimSize1[i];
/* a source tensor of size (2, 3) */ /* a source tensor of size (3, 2) */
int sOrder2 = 2; int sOrder2 = 2;
int * sDimSize2 = new int[sOrder2]; int * sDimSize2 = new int[sOrder2];
sDimSize2[0] = 3; sDimSize2[0] = 3;
...@@ -157,14 +169,14 @@ bool TestMatrixMul2() ...@@ -157,14 +169,14 @@ bool TestMatrixMul2()
for (int i = 0; i < tOrder; i++) for (int i = 0; i < tOrder; i++)
tUnitNum *= tDimSize[i]; tUnitNum *= tDimSize[i];
DTYPE sData1[3][2] = { {1.0, -4.0}, DTYPE sData1[3][2] = { {1.0F, -4.0F},
{2.0, 5.0}, {2.0F, 5.0F},
{3.0, 6.0} }; {3.0F, 6.0F} };
DTYPE sData2[3][2] = { {0.0, -1.0}, DTYPE sData2[3][2] = { {0.0F, -1.0F},
{1.0, 2.0}, {1.0F, 2.0F},
{2.0, 1.0} }; {2.0F, 1.0F} };
DTYPE answer[2][2] = { {8.0, 6.0}, DTYPE answer[2][2] = { {8.0F, 6.0F},
{17.0, 20.0} }; {17.0F, 20.0F} };
/* CPU test */ /* CPU test */
bool cpuTest = true; bool cpuTest = true;
...@@ -206,22 +218,34 @@ bool TestMatrixMul2() ...@@ -206,22 +218,34 @@ bool TestMatrixMul2()
gpuTest = tGPU->CheckData(answer, tUnitNum); gpuTest = tGPU->CheckData(answer, tUnitNum);
/* destroy variables */ /* destroy variables */
delete s1, s2, t, sGPU1, sGPU2, tGPU; delete s1;
delete[] sDimSize1, sDimSize2, tDimSize; delete s2;
delete t;
delete sGPU1;
delete sGPU2;
delete tGPU;
delete[] sDimSize1;
delete[] sDimSize2;
delete[] tDimSize;
return cpuTest && gpuTest; return cpuTest && gpuTest;
#else #else
/* destroy variables */ /* destroy variables */
delete s1, s2, t; delete s1;
delete[] sDimSize1, sDimSize2, tDimSize; delete s2;
delete t;
delete[] sDimSize1;
delete[] sDimSize2;
delete[] tDimSize;
return cpuTest; return cpuTest;
#endif // USE_CUDA #endif // USE_CUDA
} }
/* case 3: matrix multiplication. /*
* In this case, a=(3, 2, 3), b=(2, 3, 2) -> c=(3, 2, 2, 2), transposedA=X_NOTRANS, case 3: matrix multiplication.
transposedB=X_NOTRANS. In this case, a=(3, 2, 3), b=(2, 3, 2) -> c=(3, 2, 2, 2),
transposedA=X_NOTRANS, transposedB=X_NOTRANS.
*/ */
bool TestMatrixMul3() bool TestMatrixMul3()
{ {
...@@ -259,20 +283,30 @@ bool TestMatrixMul3() ...@@ -259,20 +283,30 @@ bool TestMatrixMul3()
for (int i = 0; i < tOrder; i++) for (int i = 0; i < tOrder; i++)
tUnitNum *= tDimSize[i]; tUnitNum *= tDimSize[i];
DTYPE sData1[3][2][3] = { { {0.0, -1.0, 2.0}, DTYPE sData1[3][2][3] = { { {0.0F, -1.0F, 2.0F},
{2.0, 1.0, 3.0} }, {2.0F, 1.0F, 3.0F} },
{ {1.0, 2.0, 4.0}, { {1.0F, 2.0F, 4.0F},
{3.0, 1.0, 2.0}}, {3.0F, 1.0F, 2.0F}},
{ {-1.0, 3.0, 2.0}, { {-1.0F, 3.0F, 2.0F},
{1.0, -1.0, 0.0} } }; {1.0F, -1.0F, 0.0F} } };
DTYPE sData2[2][3][2] = { { {1.0, 2.0}, DTYPE sData2[2][3][2] = { { {1.0F, 2.0F},
{-4.0, 3.0}, {-4.0F, 3.0F},
{2.0, 6.0} }, {2.0F, 6.0F} },
{ {1.0, 2.0}, { {1.0F, 2.0F},
{-4.0, 3.0}, {3.0F, 4.0F},
{2.0, 6.0} } }; {5.0F, 6.0F} } };
DTYPE answer[2][2] = { {8.0, 6.0}, DTYPE answer[3][2][2][2] = { { { {8.0F, 9.0F},
{17.0, 20.0} }; {4.0F, 25.0F} },
{ {7.0F, 8.0F},
{20.0F, 26.0F} } },
{ { {1.0F, 32.0F},
{3.0F, 21.0F} },
{ {27.0F, 34.0F},
{16.0F, 22.0F} } },
{ { {-9.0F, 19.0F},
{5.0F, -1.0F} },
{ {18.0F, 22.0F},
{-2.0F, -2.0F} } } };
/* CPU test */ /* CPU test */
bool cpuTest = true; bool cpuTest = true;
...@@ -290,17 +324,124 @@ bool TestMatrixMul3() ...@@ -290,17 +324,124 @@ bool TestMatrixMul3()
/* call MatrixMul function */ /* call MatrixMul function */
MatrixMul(s1, X_NOTRANS, s2, X_NOTRANS, t); MatrixMul(s1, X_NOTRANS, s2, X_NOTRANS, t);
XPRINT(0, stdout, "\ntarget data\n["); /* check results */
DTYPE* check_data = (DTYPE*)t->data; cpuTest = t->CheckData(answer, tUnitNum);
for (int i = 0; i < tUnitNum; i++)
printf("%f ", *check_data++);
printf("]\n");
int * size = new int(tOrder); #ifdef USE_CUDA
size = t->dimSize; /* GPU test */
for (int i = 0; i < tOrder; i++) { bool gpuTest = true;
printf("size %d: %d\n", i, *size++);
} /* create tensor */
XTensor * sGPU1 = NewTensor(sOrder1, sDimSize1, X_FLOAT, 1.0F, 0);
XTensor * sGPU2 = NewTensor(sOrder2, sDimSize2, X_FLOAT, 1.0F, 0);
XTensor * tGPU = NewTensor(tOrder, tDimSize, X_FLOAT, 1.0F, 0);
/* Initialize variables */
sGPU1->SetData(sData1, sUnitNum1);
sGPU2->SetData(sData2, sUnitNum2);
tGPU->SetZeroAll();
/* call MatrixMul function */
MatrixMul(sGPU1, X_NOTRANS, sGPU2, X_NOTRANS, tGPU);
/* check results */
gpuTest = tGPU->CheckData(answer, tUnitNum);
/* destroy variables */
delete s1;
delete s2;
delete t;
delete sGPU1;
delete sGPU2;
delete tGPU;
delete[] sDimSize1;
delete[] sDimSize2;
delete[] tDimSize;
return cpuTest && gpuTest;
#else
/* destroy variables */
delete s1;
delete s2;
delete t;
delete[] sDimSize1;
delete[] sDimSize2;
delete[] tDimSize;
return cpuTest;
#endif // USE_CUDA
}
/*
case 4: matrix multiplication.
In this case, a=(3, 2, 3), b=(3, 2) -> c=(3, 2, 2),
transposedA=X_NOTRANS, transposedB=X_NOTRANS.
*/
bool TestMatrixMul4()
{
/* a source tensor of size (3, 2, 3) */
int sOrder1 = 3;
int * sDimSize1 = new int[sOrder1];
sDimSize1[0] = 3;
sDimSize1[1] = 2;
sDimSize1[2] = 3;
int sUnitNum1 = 1;
for (int i = 0; i < sOrder1; i++)
sUnitNum1 *= sDimSize1[i];
/* a source tensor of size (3, 2) */
int sOrder2 = 2;
int * sDimSize2 = new int[sOrder2];
sDimSize2[0] = 3;
sDimSize2[1] = 2;
int sUnitNum2 = 1;
for (int i = 0; i < sOrder2; i++)
sUnitNum2 *= sDimSize2[i];
/* a target tensor of size (3, 2, 2) */
int tOrder = 3;
int * tDimSize = new int[tOrder];
tDimSize[0] = 3;
tDimSize[1] = 2;
tDimSize[2] = 2;
int tUnitNum = 1;
for (int i = 0; i < tOrder; i++)
tUnitNum *= tDimSize[i];
DTYPE sData1[3][2][3] = { { {0.0F, -1.0F, 2.0F},
{2.0F, 1.0F, 3.0F} },
{ {1.0F, 2.0F, 4.0F},
{3.0F, 1.0F, 2.0F}},
{ {-1.0F, 3.0F, 2.0F},
{1.0F, -1.0F, 0.0F} } };
DTYPE sData2[3][2] = { {1.0F, 2.0F},
{3.0F, 4.0F},
{5.0F, 6.0F} };
DTYPE answer[3][2][2] = { { {7.0F, 8.0F},
{20.0F, 26.0F} },
{ {27.0F, 34.0F},
{16.0F, 22.0F} },
{ {18.0F, 22.0F},
{-2.0F, -2.0F} } };
/* CPU test */
bool cpuTest = true;
/* create tensors */
XTensor * s1 = NewTensor(sOrder1, sDimSize1);
XTensor * s2 = NewTensor(sOrder2, sDimSize2);
XTensor * t = NewTensor(tOrder, tDimSize);
/* initialize variables */
s1->SetData(sData1, sUnitNum1);
s2->SetData(sData2, sUnitNum2);
t->SetZeroAll();
/* call MatrixMul function */
MatrixMul(s1, X_NOTRANS, s2, X_NOTRANS, t);
/* check results */ /* check results */
cpuTest = t->CheckData(answer, tUnitNum); cpuTest = t->CheckData(answer, tUnitNum);
...@@ -326,14 +467,25 @@ bool TestMatrixMul3() ...@@ -326,14 +467,25 @@ bool TestMatrixMul3()
gpuTest = tGPU->CheckData(answer, tUnitNum); gpuTest = tGPU->CheckData(answer, tUnitNum);
/* destroy variables */ /* destroy variables */
delete s1, s2, t, sGPU1, sGPU2, tGPU; delete s1;
delete[] sDimSize1, sDimSize2, tDimSize; delete s2;
delete t;
delete sGPU1;
delete sGPU2;
delete tGPU;
delete[] sDimSize1;
delete[] sDimSize2;
delete[] tDimSize;
return cpuTest && gpuTest; return cpuTest && gpuTest;
#else #else
/* destroy variables */ /* destroy variables */
delete s1, s2, t; delete s1;
delete[] sDimSize1, sDimSize2, tDimSize; delete s2;
delete t;
delete[] sDimSize1;
delete[] sDimSize2;
delete[] tDimSize;
return cpuTest; return cpuTest;
#endif // USE_CUDA #endif // USE_CUDA
...@@ -346,10 +498,9 @@ bool TestMatrixMul3() ...@@ -346,10 +498,9 @@ bool TestMatrixMul3()
*/ */
/* test for MatrixMul Function */ /* test for MatrixMul Function */
extern "C"
bool TestMatrixMul() bool TestMatrixMul()
{ {
XPRINT(0, stdout, "[TEST MATRIXMUL] -------------\n"); XPRINT(0, stdout, "[TEST MATRIXMUL] matrix multiplication \n");
bool returnFlag = true, caseFlag = true; bool returnFlag = true, caseFlag = true;
/* case 1 test */ /* case 1 test */
...@@ -371,14 +522,23 @@ bool TestMatrixMul() ...@@ -371,14 +522,23 @@ bool TestMatrixMul()
else else
XPRINT(0, stdout, ">> case 2 passed!\n"); XPRINT(0, stdout, ">> case 2 passed!\n");
///* case 3 test */ /* case 3 test */
//caseFlag = TestMatrixMul3(); caseFlag = TestMatrixMul3();
//if (!caseFlag) { if (!caseFlag) {
// returnFlag = false; returnFlag = false;
// XPRINT(0, stdout, ">> case 3 failed!\n"); XPRINT(0, stdout, ">> case 3 failed!\n");
//} }
//else else
// XPRINT(0, stdout, ">> case 3 passed!\n"); XPRINT(0, stdout, ">> case 3 passed!\n");
/* case 4 test */
caseFlag = TestMatrixMul4();
if (!caseFlag) {
returnFlag = false;
XPRINT(0, stdout, ">> case 4 failed!\n");
}
else
XPRINT(0, stdout, ">> case 4 passed!\n");
/* other cases test */ /* other cases test */
/* /*
......
...@@ -22,7 +22,7 @@ ...@@ -22,7 +22,7 @@
#ifndef __TEST_MATRIXMUL_H__ #ifndef __TEST_MATRIXMUL_H__
#define __TEST_MATRIXMUL_H__ #define __TEST_MATRIXMUL_H__
#include "../core/MatrixMul.h" #include "../core/arithmetic/MatrixMul.h"
namespace nts { // namespace nts(NiuTrans.Tensor) namespace nts { // namespace nts(NiuTrans.Tensor)
......
...@@ -19,14 +19,14 @@ ...@@ -19,14 +19,14 @@
* $Created by: Xu Chen (email: hello_master1954@163.com) 2018-06-15 * $Created by: Xu Chen (email: hello_master1954@163.com) 2018-06-15
*/ */
#include "../XTensor.h"
#include "TMatrixMul2D.h" #include "TMatrixMul2D.h"
namespace nts { // namespace nts(NiuTrans.Tensor) namespace nts { // namespace nts(NiuTrans.Tensor)
/* case 1: matrix multiplication (for 2d tensors).
* In this case, a=(2, 3), b=(3, 2) -> c=(2, 2), transposedA=X_NOTRANS, /*
transposedB=X_NOTRANS. case 1: matrix multiplication (for 2d tensors).
In this case, a=(2, 3), b=(3, 2) -> c=(2, 2),
transposedA=X_NOTRANS, transposedB=X_NOTRANS.
*/ */
bool TestMatrixMul2D1() bool TestMatrixMul2D1()
{ {
...@@ -60,13 +60,13 @@ bool TestMatrixMul2D1() ...@@ -60,13 +60,13 @@ bool TestMatrixMul2D1()
for (int i = 0; i < tOrder; i++) for (int i = 0; i < tOrder; i++)
tUnitNum *= tDimSize[i]; tUnitNum *= tDimSize[i];
DTYPE sData1[2][3] = { {1.0, 2.0, 3.0}, DTYPE sData1[2][3] = { {1.0F, 2.0F, 3.0F},
{-4.0, 5.0, 6.0} }; {-4.0F, 5.0F, 6.0F} };
DTYPE sData2[3][2] = { {0.0, -1.0}, DTYPE sData2[3][2] = { {0.0F, -1.0F},
{1.0, 2.0}, {1.0F, 2.0F},
{2.0, 1.0} }; {2.0F, 1.0F} };
DTYPE answer[2][2] = { {8.0, 6.0}, DTYPE answer[2][2] = { {8.0F, 6.0F},
{17.0, 20.0} }; {17.0F, 20.0F} };
/* CPU test */ /* CPU test */
bool cpuTest = true; bool cpuTest = true;
...@@ -108,22 +108,34 @@ bool TestMatrixMul2D1() ...@@ -108,22 +108,34 @@ bool TestMatrixMul2D1()
gpuTest = tGPU->CheckData(answer, tUnitNum); gpuTest = tGPU->CheckData(answer, tUnitNum);
/* destroy variables */ /* destroy variables */
delete s1, s2, t, sGPU1, sGPU2, tGPU; delete s1;
delete[] sDimSize1, sDimSize2, tDimSize; delete s2;
delete t;
delete sGPU1;
delete sGPU2;
delete tGPU;
delete[] sDimSize1;
delete[] sDimSize2;
delete[] tDimSize;
return cpuTest && gpuTest; return cpuTest && gpuTest;
#else #else
/* destroy variables */ /* destroy variables */
delete s1, s2, t; delete s1;
delete[] sDimSize1, sDimSize2, tDimSize; delete s2;
delete t;
delete[] sDimSize1;
delete[] sDimSize2;
delete[] tDimSize;
return cpuTest; return cpuTest;
#endif // USE_CUDA #endif // USE_CUDA
} }
/* case 2: matrix multiplication (for 2d tensors). /*
* In this case, a=(3, 2), b=(2, 3) -> c=(2, 2), transposedA=X_TRANS, case 2: matrix multiplication (for 2d tensors).
transposedB=X_NOTRANS. In this case, a=(3, 2), b=(3, 2) -> c=(2, 2),
transposedA=X_TRANS, transposedB=X_NOTRANS.
*/ */
bool TestMatrixMul2D2() bool TestMatrixMul2D2()
{ {
...@@ -157,14 +169,14 @@ bool TestMatrixMul2D2() ...@@ -157,14 +169,14 @@ bool TestMatrixMul2D2()
for (int i = 0; i < tOrder; i++) for (int i = 0; i < tOrder; i++)
tUnitNum *= tDimSize[i]; tUnitNum *= tDimSize[i];
DTYPE sData1[3][2] = { {1.0, -4.0}, DTYPE sData1[3][2] = { {1.0F, -4.0F},
{2.0, 5.0}, {2.0F, 5.0F},
{3.0, 6.0} }; {3.0F, 6.0F} };
DTYPE sData2[3][2] = { {0.0, -1.0}, DTYPE sData2[3][2] = { {0.0F, -1.0F},
{1.0, 2.0}, {1.0F, 2.0F},
{2.0, 1.0} }; {2.0F, 1.0F} };
DTYPE answer[2][2] = { {8.0, 6.0}, DTYPE answer[2][2] = { {8.0F, 6.0F},
{17.0, 20.0} }; {17.0F, 20.0F} };
/* CPU test */ /* CPU test */
bool cpuTest = true; bool cpuTest = true;
...@@ -206,14 +218,25 @@ bool TestMatrixMul2D2() ...@@ -206,14 +218,25 @@ bool TestMatrixMul2D2()
gpuTest = tGPU->CheckData(answer, tUnitNum); gpuTest = tGPU->CheckData(answer, tUnitNum);
/* destroy variables */ /* destroy variables */
delete s1, s2, t, sGPU1, sGPU2, tGPU; delete s1;
delete[] sDimSize1, sDimSize2, tDimSize; delete s2;
delete t;
delete sGPU1;
delete sGPU2;
delete tGPU;
delete[] sDimSize1;
delete[] sDimSize2;
delete[] tDimSize;
return cpuTest && gpuTest; return cpuTest && gpuTest;
#else #else
/* destroy variables */ /* destroy variables */
delete s1, s2, t; delete s1;
delete[] sDimSize1, sDimSize2, tDimSize; delete s2;
delete t;
delete[] sDimSize1;
delete[] sDimSize2;
delete[] tDimSize;
return cpuTest; return cpuTest;
#endif // USE_CUDA #endif // USE_CUDA
...@@ -228,7 +251,7 @@ bool TestMatrixMul2D2() ...@@ -228,7 +251,7 @@ bool TestMatrixMul2D2()
extern "C" extern "C"
bool TestMatrixMul2D() bool TestMatrixMul2D()
{ {
XPRINT(0, stdout, "[TEST MATRIXMUL2D] -------------\n"); XPRINT(0, stdout, "[TEST MATRIXMUL2D] matrix multiplication (for 2d tensors) \n");
bool returnFlag = true, caseFlag = true; bool returnFlag = true, caseFlag = true;
/* case 1 test */ /* case 1 test */
......
...@@ -22,7 +22,7 @@ ...@@ -22,7 +22,7 @@
#ifndef __TEST_MATRIXMUL2D_H__ #ifndef __TEST_MATRIXMUL2D_H__
#define __TEST_MATRIXMUL2D_H__ #define __TEST_MATRIXMUL2D_H__
#include "../core/MatrixMul2D.h" #include "../core/arithmetic/MatrixMul2D.h"
namespace nts { // namespace nts(NiuTrans.Tensor) namespace nts { // namespace nts(NiuTrans.Tensor)
......
/* NiuTrans.Tensor - an open-source tensor library
* Copyright (C) 2017, Natural Language Processing Lab, Northestern University.
* All rights reserved.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
/*
* $Created by: Xu Chen (email: hello_master1954@163.com) 2018-07-06
*/
#include "TMatrixMul2DParallel.h"
namespace nts { // namespace nts(NiuTrans.Tensor)
/*
case 1: matrix multiplication (for 2d tensors) with multi-threading.
In this case, a=(2, 3), b=(3, 2) -> c=(2, 2),
transposedA=X_NOTRANS, transposedB=X_NOTRANS.
*/
bool TestMatrixMul2DParallel1()
{
/* a source tensor of size (2, 3) */
int sOrder1 = 2;
int * sDimSize1 = new int[sOrder1];
sDimSize1[0] = 2;
sDimSize1[1] = 3;
int sUnitNum1 = 1;
for (int i = 0; i < sOrder1; i++)
sUnitNum1 *= sDimSize1[i];
/* a source tensor of size (3, 2) */
int sOrder2 = 2;
int * sDimSize2 = new int[sOrder2];
sDimSize2[0] = 3;
sDimSize2[1] = 2;
int sUnitNum2 = 1;
for (int i = 0; i < sOrder2; i++)
sUnitNum2 *= sDimSize2[i];
/* a target tensor of size (2, 2) */
int tOrder = 2;
int * tDimSize = new int[tOrder];
tDimSize[0] = 2;
tDimSize[1] = 2;
int tUnitNum = 1;
for (int i = 0; i < tOrder; i++)
tUnitNum *= tDimSize[i];
DTYPE sData1[2][3] = { {1.0F, 2.0F, 3.0F},
{-4.0F, 5.0F, 6.0F} };
DTYPE sData2[3][2] = { {0.0F, -1.0F},
{1.0F, 2.0F},
{2.0F, 1.0F} };
DTYPE answer[2][2] = { {8.0F, 6.0F},
{17.0F, 20.0F} };
/* CPU test */
bool cpuTest = true;
/* create tensors */
XTensor * s1 = NewTensor(sOrder1, sDimSize1);
XTensor * s2 = NewTensor(sOrder2, sDimSize2);
XTensor * t = NewTensor(tOrder, tDimSize);
/* initialize variables */
s1->SetData(sData1, sUnitNum1);
s2->SetData(sData2, sUnitNum2);
t->SetZeroAll();
/* call MatrixMul2DParallel function */
MatrixMul2DParallel(s1, X_NOTRANS, s2, X_NOTRANS, t);
/* check results */
cpuTest = t->CheckData(answer, tUnitNum);
/* destroy variables */
delete s1;
delete s2;
delete t;
delete[] sDimSize1;
delete[] sDimSize2;
delete[] tDimSize;
return cpuTest;
}
/*
case 2: matrix multiplication (for 2d tensors) with multi-threading.
In this case, a=(3, 2), b=(3, 2) -> c=(2, 2),
transposedA=X_TRANS, transposedB=X_NOTRANS.
*/
bool TestMatrixMul2DParallel2()
{
/* a source tensor of size (3, 2) */
int sOrder1 = 2;
int * sDimSize1 = new int[sOrder1];
sDimSize1[0] = 3;
sDimSize1[1] = 2;
int sUnitNum1 = 1;
for (int i = 0; i < sOrder1; i++)
sUnitNum1 *= sDimSize1[i];
/* a source tensor of size (3, 2) */
int sOrder2 = 2;
int * sDimSize2 = new int[sOrder2];
sDimSize2[0] = 3;
sDimSize2[1] = 2;
int sUnitNum2 = 1;
for (int i = 0; i < sOrder2; i++)
sUnitNum2 *= sDimSize2[i];
/* a target tensor of size (2, 2) */
int tOrder = 2;
int * tDimSize = new int[tOrder];
tDimSize[0] = 2;
tDimSize[1] = 2;
int tUnitNum = 1;
for (int i = 0; i < tOrder; i++)
tUnitNum *= tDimSize[i];
DTYPE sData1[3][2] = { {1.0F, -4.0F},
{2.0F, 5.0F},
{3.0F, 6.0F} };
DTYPE sData2[3][2] = { {0.0F, -1.0F},
{1.0F, 2.0F},
{2.0F, 1.0F} };
DTYPE answer[2][2] = { {8.0F, 6.0F},
{17.0F, 20.0F} };
/* CPU test */
bool cpuTest = true;
/* create tensors */
XTensor * s1 = NewTensor(sOrder1, sDimSize1);
XTensor * s2 = NewTensor(sOrder2, sDimSize2);
XTensor * t = NewTensor(tOrder, tDimSize);
/* initialize variables */
s1->SetData(sData1, sUnitNum1);
s2->SetData(sData2, sUnitNum2);
t->SetZeroAll();
/* call MatrixMul2DParallel function */
MatrixMul2DParallel(s1, X_TRANS, s2, X_NOTRANS, t);
/* check results */
cpuTest = t->CheckData(answer, tUnitNum);
/* destroy variables */
delete s1;
delete s2;
delete t;
delete[] sDimSize1;
delete[] sDimSize2;
delete[] tDimSize;
return cpuTest;
}
/* other cases */
/*
TODO!!
*/
/* test for MatrixMul2DParallel Function */
bool TestMatrixMul2DParallel()
{
XPRINT(0, stdout, "[TEST MatrixMul2DParallel] matrix multiplication (for 2d tensors) with multi-threading \n");
bool returnFlag = true, caseFlag = true;
/* case 1 test */
caseFlag = TestMatrixMul2DParallel1();
if (!caseFlag) {
returnFlag = false;
XPRINT(0, stdout, ">> case 1 failed!\n");
}
else
XPRINT(0, stdout, ">> case 1 passed!\n");
/* case 2 test */
caseFlag = TestMatrixMul2DParallel2();
if (!caseFlag) {
returnFlag = false;
XPRINT(0, stdout, ">> case 2 failed!\n");
}
else
XPRINT(0, stdout, ">> case 2 passed!\n");
/* other cases test */
/*
TODO!!
*/
if (returnFlag) {
XPRINT(0, stdout, ">> All Passed!\n");
}
else
XPRINT(0, stdout, ">> Failed!\n");
XPRINT(0, stdout, "\n");
return returnFlag;
}
} // namespace nts(NiuTrans.Tensor)
/* NiuTrans.Tensor - an open-source tensor library
* Copyright (C) 2017, Natural Language Processing Lab, Northestern University.
* All rights reserved.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
/*
* $Created by: Xu Chen (email: hello_master1954@163.com) 2018-07-06
*/
#ifndef __TEST_MATRIXMUL2DPARALLEL_H__
#define __TEST_MATRIXMUL2DPARALLEL_H__
#include "../core/arithmetic/MatrixMul2DParallel.h"
namespace nts { // namespace nts(NiuTrans.Tensor)
/* test for MatrixMul2DParallel Function */
extern "C"
bool TestMatrixMul2DParallel();
} // namespace nts(NiuTrans.Tensor)
#endif // __TEST_MATRIXMUL2DPARALLEL_H__
/* NiuTrans.Tensor - an open-source tensor library
* Copyright (C) 2017, Natural Language Processing Lab, Northestern University.
* All rights reserved.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
/*
* $Created by: Xu Chen (email: hello_master1954@163.com) 2018-06-15
*/
#include "TMatrixMULBatched.h"
namespace nts { // namespace nts(NiuTrans.Tensor)
/*
case 1: matrix multiplication of the two tensors.
In this case, a=(2, 3), b=(2, 3) -> c=(2, 2), transposedA=X_NOTRANS, transposedB=X_NOTRANS.
*/
bool TestMatrixMulBatched1()
{
/* a source tensor of size (2, 3) */
int sOrder1 = 2;
int * sDimSize1 = new int[sOrder1];
sDimSize1[0] = 2;
sDimSize1[1] = 3;
int sUnitNum1 = 1;
for (int i = 0; i < sOrder1; i++)
sUnitNum1 *= sDimSize1[i];
/* a source tensor of size (3, 2) */
int sOrder2 = 2;
int * sDimSize2 = new int[sOrder2];
sDimSize2[0] = 3;
sDimSize2[1] = 2;
int sUnitNum2 = 1;
for (int i = 0; i < sOrder2; i++)
sUnitNum2 *= sDimSize2[i];
/* a target tensor of size (2, 2) */
int tOrder = 2;
int * tDimSize = new int[tOrder];
tDimSize[0] = 2;
tDimSize[1] = 2;
int tUnitNum = 1;
for (int i = 0; i < tOrder; i++)
tUnitNum *= tDimSize[i];
DTYPE sData1[2][3] = { {1.0F, 2.0F, 3.0F},
{-4.0F, 5.0F, 6.0F} };
DTYPE sData2[3][2] = { {0.0F, -1.0F},
{1.0F, 2.0F},
{2.0F, 1.0F} };
DTYPE answer[2][2] = { {8.0F, 6.0F},
{17.0F, 20.0F} };
/* CPU test */
bool cpuTest = true;
/* create tensors */
XTensor * s1 = NewTensor(sOrder1, sDimSize1);
XTensor * s2 = NewTensor(sOrder2, sDimSize2);
XTensor * t = NewTensor(tOrder, tDimSize);
/* initialize variables */
s1->SetData(sData1, sUnitNum1);
s2->SetData(sData2, sUnitNum2);
t->SetZeroAll();
/* call MatrixMulBatched function */
MatrixMulBatched(s1, X_NOTRANS, s2, X_NOTRANS, t);
/* check results */
cpuTest = t->CheckData(answer, tUnitNum);
#ifdef USE_CUDA
/* GPU test */
bool gpuTest = true;
/* create tensor */
XTensor * sGPU1 = NewTensor(sOrder1, sDimSize1, X_FLOAT, 1.0F, 0);
XTensor * sGPU2 = NewTensor(sOrder2, sDimSize2, X_FLOAT, 1.0F, 0);
XTensor * tGPU = NewTensor(tOrder, tDimSize, X_FLOAT, 1.0F, 0);
/* Initialize variables */
sGPU1->SetData(sData1, sUnitNum1);
sGPU2->SetData(sData2, sUnitNum2);
tGPU->SetZeroAll();
/* call MatrixMulBatched function */
MatrixMulBatched(sGPU1, X_NOTRANS, sGPU2, X_NOTRANS, tGPU);
/* check results */
gpuTest = tGPU->CheckData(answer, tUnitNum);
/* destroy variables */
delete s1;
delete s2;
delete t;
delete sGPU1;
delete sGPU2;
delete tGPU;
delete[] sDimSize1;
delete[] sDimSize2;
delete[] tDimSize;
return cpuTest && gpuTest;
#else
/* destroy variables */
delete s1;
delete s2;
delete t;
delete[] sDimSize1;
delete[] sDimSize2;
delete[] tDimSize;
return cpuTest;
#endif // USE_CUDA
}
/*
case 2: matrix multiplication of the two tensors.
In this case, a=(2, 2, 3), b=(2, 3, 2) -> c=(2, 2, 2), transposedA=X_NOTRANS, transposedB=X_NOTRANS.
*/
bool TestMatrixMulBatched2()
{
/* a source tensor of size (2, 2, 3) */
int sOrder1 = 3;
int * sDimSize1 = new int[sOrder1];
sDimSize1[0] = 2;
sDimSize1[1] = 2;
sDimSize1[2] = 3;
int sUnitNum1 = 1;
for (int i = 0; i < sOrder1; i++)
sUnitNum1 *= sDimSize1[i];
/* a source tensor of size (2, 3, 2) */
int sOrder2 = 3;
int * sDimSize2 = new int[sOrder2];
sDimSize2[0] = 2;
sDimSize2[1] = 3;
sDimSize2[2] = 2;
int sUnitNum2 = 1;
for (int i = 0; i < sOrder2; i++)
sUnitNum2 *= sDimSize2[i];
/* a target tensor of size (2, 2, 2) */
int tOrder = 3;
int * tDimSize = new int[tOrder];
tDimSize[0] = 2;
tDimSize[1] = 2;
tDimSize[2] = 2;
int tUnitNum = 1;
for (int i = 0; i < tOrder; i++)
tUnitNum *= tDimSize[i];
DTYPE sData1[2][2][3] = { { {0.0F, -1.0F, 2.0F},
{2.0F, 1.0F, 3.0F} },
{ {1.0F, 2.0F, 4.0F},
{3.0F, 1.0F, 2.0F} } };
DTYPE sData2[2][3][2] = { { {1.0F, 2.0F},
{-4.0F, 3.0F},
{2.0F, 6.0F} },
{ {1.0F, 2.0F},
{3.0F, 4.0F},
{5.0F, 6.0F} } };
DTYPE answer[2][2][2] = { { {8.0F, 9.0F},
{4.0F, 25.0F} },
{ {27.0F, 34.0F},
{16.0F, 22.0F} } };
/* CPU test */
bool cpuTest = true;
/* create tensors */
XTensor * s1 = NewTensor(sOrder1, sDimSize1);
XTensor * s2 = NewTensor(sOrder2, sDimSize2);
XTensor * t = NewTensor(tOrder, tDimSize);
/* initialize variables */
s1->SetData(sData1, sUnitNum1);
s2->SetData(sData2, sUnitNum2);
t->SetZeroAll();
/* call MatrixMulBatched function */
MatrixMulBatched(s1, X_NOTRANS, s2, X_NOTRANS, t);
/* check results */
cpuTest = t->CheckData(answer, tUnitNum);
#ifdef USE_CUDA
/* GPU test */
bool gpuTest = true;
/* create tensor */
XTensor * sGPU1 = NewTensor(sOrder1, sDimSize1, X_FLOAT, 1.0F, 0);
XTensor * sGPU2 = NewTensor(sOrder2, sDimSize2, X_FLOAT, 1.0F, 0);
XTensor * tGPU = NewTensor(tOrder, tDimSize, X_FLOAT, 1.0F, 0);
/* Initialize variables */
sGPU1->SetData(sData1, sUnitNum1);
sGPU2->SetData(sData2, sUnitNum2);
tGPU->SetZeroAll();
/* call MatrixMulBatched function */
MatrixMulBatched(sGPU1, X_NOTRANS, sGPU2, X_NOTRANS, tGPU);
/* check results */
gpuTest = tGPU->CheckData(answer, tUnitNum);
/* destroy variables */
delete s1;
delete s2;
delete t;
delete sGPU1;
delete sGPU2;
delete tGPU;
delete[] sDimSize1;
delete[] sDimSize2;
delete[] tDimSize;
return cpuTest && gpuTest;
#else
/* destroy variables */
delete s1;
delete s2;
delete t;
delete[] sDimSize1;
delete[] sDimSize2;
delete[] tDimSize;
return cpuTest;
#endif // USE_CUDA
}
/* other cases */
/*
TODO!!
*/
/* test for TestMatrixMulBatched Function */
bool TestMatrixMulBatched()
{
XPRINT(0, stdout, "[TEST MATRIXMULBATCHED] matrix multiplication of the two tensors \n");
bool returnFlag = true, caseFlag = true;
/* case 1 test */
caseFlag = TestMatrixMulBatched1();
if (!caseFlag) {
returnFlag = false;
XPRINT(0, stdout, ">> case 1 failed!\n");
}
else
XPRINT(0, stdout, ">> case 1 passed!\n");
/* case 2 test */
caseFlag = TestMatrixMulBatched2();
if (!caseFlag) {
returnFlag = false;
XPRINT(0, stdout, ">> case 2 failed!\n");
}
else
XPRINT(0, stdout, ">> case 2 passed!\n");
/* other cases test */
/*
TODO!!
*/
if (returnFlag) {
XPRINT(0, stdout, ">> All Passed!\n");
}
else
XPRINT(0, stdout, ">> Failed!\n");
XPRINT(0, stdout, "\n");
return returnFlag;
}
} // namespace nts(NiuTrans.Tensor)
/* NiuTrans.Tensor - an open-source tensor library
* Copyright (C) 2017, Natural Language Processing Lab, Northestern University.
* All rights reserved.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
/*
* $Created by: Xu Chen (email: hello_master1954@163.com) 2018-06-15
*/
#ifndef __TEST_MATRIXMULBATCHED_H__
#define __TEST_MATRIXMULBATCHED_H__
#include "../core/arithmetic/MatrixMulBatched.h"
namespace nts { // namespace nts(NiuTrans.Tensor)
/* test for MatrixMulBatched Function */
extern "C"
bool TestMatrixMulBatched();
} // namespace nts(NiuTrans.Tensor)
#endif // __TEST_MATRIXMULBATCHED_H__
...@@ -19,14 +19,15 @@ ...@@ -19,14 +19,15 @@
* $Created by: Xu Chen (email: hello_master1954@163.com) 2018-06-13 * $Created by: Xu Chen (email: hello_master1954@163.com) 2018-06-13
*/ */
#include "../XTensor.h" #include "../XTensor.h"
#include "../XList.h" #include "../XList.h"
#include "TMerge.h" #include "TMerge.h"
namespace nts { // namespace nts(NiuTrans.Tensor) namespace nts { // namespace nts(NiuTrans.Tensor)
/* case 1: transform a tensor by merging it along with a dimension.
* In this case, (3, 2) -> (6), whereToMerge=1, leadingDim=0. /*
case 1: transform a tensor by merging it along with a dimension.
In this case, (3, 2) -> (6), whereToMerge=1, leadingDim=0.
*/ */
bool TestMerge1() bool TestMerge1()
{ {
...@@ -49,9 +50,9 @@ bool TestMerge1() ...@@ -49,9 +50,9 @@ bool TestMerge1()
for (int i = 0; i < tOrder; i++) for (int i = 0; i < tOrder; i++)
tUnitNum *= tDimSize[i]; tUnitNum *= tDimSize[i];
DTYPE sData[2][3] = { {0.0, 1.0, 2.0}, DTYPE sData[2][3] = { {0.0F, 1.0F, 2.0F},
{3.0, 4.0, 5.0} }; {3.0F, 4.0F, 5.0F} };
DTYPE answer[6] = {0.0, 1.0, 2.0, 3.0, 4.0, 5.0}; DTYPE answer[6] = {0.0F, 1.0F, 2.0F, 3.0F, 4.0F, 5.0F};
/* CPU test */ /* CPU test */
bool cpuTest = true; bool cpuTest = true;
...@@ -89,21 +90,30 @@ bool TestMerge1() ...@@ -89,21 +90,30 @@ bool TestMerge1()
gpuTest = tGPU->CheckData(answer, tUnitNum); gpuTest = tGPU->CheckData(answer, tUnitNum);
/* destroy variables */ /* destroy variables */
delete s, t, sGPU, tGPU; delete s;
delete[] sDimSize, tDimSize; delete t;
delete sGPU;
delete tGPU;
delete[] sDimSize;
delete[] tDimSize;
return cpuTest && gpuTest; return cpuTest && gpuTest;
#else #else
/* destroy variables */ /* destroy variables */
delete s, t; delete s;
delete[] sDimSize, tDimSize; delete t;
delete[] sDimSize;
delete[] tDimSize;
return cpuTest; return cpuTest;
#endif // USE_CUDA #endif // USE_CUDA
} }
/* case 2: transform a tensor by merging it along with a dimension. /*
* In this case, (2, 2, 3) -> (4, 3), whereToMerge=1, leadingDim=0. case 2: transform a tensor by merging it along with a dimension.
In this case,
(2, 2, 3) -> (4, 3), whereToMerge=1, leadingDim=0.
(2, 2, 3) -> (2, 6), whereToMerge=2, leadingDim=0.
*/ */
bool TestMerge2() bool TestMerge2()
{ {
...@@ -119,125 +129,55 @@ bool TestMerge2() ...@@ -119,125 +129,55 @@ bool TestMerge2()
sUnitNum *= sDimSize[i]; sUnitNum *= sDimSize[i];
/* a target tensor of size (4, 3) */ /* a target tensor of size (4, 3) */
int tOrder = 2; int tOrder1 = 2;
int * tDimSize = new int[tOrder]; int * tDimSize1 = new int[tOrder1];
tDimSize[0] = 4; tDimSize1[0] = 4;
tDimSize[1] = 3; tDimSize1[1] = 3;
int tUnitNum = 1; int tUnitNum1 = 1;
for (int i = 0; i < tOrder; i++) for (int i = 0; i < tOrder1; i++)
tUnitNum *= tDimSize[i]; tUnitNum1 *= tDimSize1[i];
DTYPE sData[2][2][3] = { { {0.0, 1.0, 2.0}, /* a target tensor of size (2, 6) */
{4.0, 5.0, 6.0} }, int tOrder2 = 2;
{ {-1.0, 2.0, 3.0}, int * tDimSize2 = new int[tOrder2];
{-4.0, -5.0, -6.0} } }; tDimSize2[0] = 2;
DTYPE answer[4][3] = { {0.0, 1.0, 2.0}, tDimSize2[1] = 6;
{4.0, 5.0, 6.0},
{-1.0, 2.0, 3.0}, int tUnitNum2 = 1;
{-4.0, -5.0, -6.0} }; for (int i = 0; i < tOrder2; i++)
tUnitNum2 *= tDimSize2[i];
/* CPU test */
bool cpuTest = true; DTYPE sData[2][2][3] = { { {0.0F, 1.0F, 2.0F},
{4.0F, 5.0F, 6.0F} },
/* create tensors */ { {-1.0F, 2.0F, 3.0F},
XTensor * s = NewTensor(sOrder, sDimSize); {-4.0F, -5.0F, -6.0F} } };
XTensor * t = NewTensor(tOrder, tDimSize); DTYPE answer1[4][3] = { {0.0F, 1.0F, 2.0F},
{4.0F, 5.0F, 6.0F},
/* initialize variables */ {-1.0F, 2.0F, 3.0F},
s->SetData(sData, sUnitNum); {-4.0F, -5.0F, -6.0F} };
t->SetZeroAll(); DTYPE answer2[2][6] = { {0.0F, 1.0F, 2.0F, -1.0F, 2.0F, 3.0F},
{4.0F, 5.0F, 6.0F, -4.0F, -5.0F, -6.0F} };
/* call merge function */
Merge(s, t, 1, 0);
/* check results */
cpuTest = t->CheckData(answer, tUnitNum);
#ifdef USE_CUDA
/* GPU test */
bool gpuTest = true;
/* create tensor */
XTensor * sGPU = NewTensor(sOrder, sDimSize, X_FLOAT, 1.0F, 0);
XTensor * tGPU = NewTensor(tOrder, tDimSize, X_FLOAT, 1.0F, 0);
/* Initialize variables */
sGPU->SetData(sData, sUnitNum);
tGPU->SetZeroAll();
/* call merge function */
Merge(sGPU, tGPU, 1, 0);
/* check results */
gpuTest = tGPU->CheckData(answer, tUnitNum);
/* destroy variables */
delete s, t, sGPU, tGPU;
delete[] sDimSize, tDimSize;
return cpuTest && gpuTest;
#else
/* destroy variables */
delete s, t;
delete[] sDimSize, tDimSize;
return cpuTest;
#endif // USE_CUDA
}
/* case 3: transform a tensor by merging it along with a dimension.
* In this case, (2, 3, 4) -> (3, 8), whereToMerge=0, leadingDim=2.
*/
bool TestMerge3()
{
/* a source tensor of size (2, 3, 4) */
int sOrder = 3;
int * sDimSize = new int[sOrder];
sDimSize[0] = 2;
sDimSize[1] = 3;
sDimSize[2] = 4;
int sUnitNum = 1;
for (int i = 0; i < sOrder; i++)
sUnitNum *= sDimSize[i];
/* a target tensor of size (8, 3) */
int tOrder = 2;
int * tDimSize = new int[tOrder];
tDimSize[0] = 3;
tDimSize[1] = 8;
int tUnitNum = 1;
for (int i = 0; i < tOrder; i++)
tUnitNum *= tDimSize[i];
DTYPE sData[2][3][4] = { { {0.0, 1.0, 2.0, 3.0},
{4.0, 5.0, 6.0, 7.0},
{8.0, 9.0, 10.0, 11.0} },
{ {0.0, -1.0, -2.0, -3.0},
{-4.0, -5.0, -6.0, -7.0},
{-8.0, -9.0, -10.0, -11.0} } };
DTYPE answer[3][8] = { {0.0, 1.0, 2.0, 3.0, 0.0, -1.0, -2.0, -3.0},
{4.0, 5.0, 6.0, 7.0, -4.0, -5.0, -6.0, -7.0},
{8.0, 9.0, 10.0, 11.0, -8.0, -9.0, -10.0, -11.0} };
/* CPU test */ /* CPU test */
bool cpuTest = true; bool cpuTest = true;
/* create tensors */ /* create tensors */
XTensor * s = NewTensor(sOrder, sDimSize); XTensor * s = NewTensor(sOrder, sDimSize);
XTensor * t = NewTensor(tOrder, tDimSize); XTensor * t1 = NewTensor(tOrder1, tDimSize1);
XTensor * t2 = NewTensor(tOrder2, tDimSize2);
/* initialize variables */ /* initialize variables */
s->SetData(sData, sUnitNum); s->SetData(sData, sUnitNum);
t->SetZeroAll(); t1->SetZeroAll();
t2->SetZeroAll();
/* call merge function */ /* call merge function */
Merge(s, t, 2, 0); Merge(s, t1, 1, 0);
Merge(s, t2, 2, 0);
/* check results */ /* check results */
cpuTest = t->CheckData(answer, tUnitNum); cpuTest = t1->CheckData(answer1, tUnitNum1) && t2->CheckData(answer2, tUnitNum2);
#ifdef USE_CUDA #ifdef USE_CUDA
/* GPU test */ /* GPU test */
...@@ -245,36 +185,51 @@ bool TestMerge3() ...@@ -245,36 +185,51 @@ bool TestMerge3()
/* create tensor */ /* create tensor */
XTensor * sGPU = NewTensor(sOrder, sDimSize, X_FLOAT, 1.0F, 0); XTensor * sGPU = NewTensor(sOrder, sDimSize, X_FLOAT, 1.0F, 0);
XTensor * tGPU = NewTensor(tOrder, tDimSize, X_FLOAT, 1.0F, 0); XTensor * tGPU1 = NewTensor(tOrder1, tDimSize1, X_FLOAT, 1.0F, 0);
XTensor * tGPU2 = NewTensor(tOrder2, tDimSize2, X_FLOAT, 1.0F, 0);
/* Initialize variables */ /* Initialize variables */
sGPU->SetData(sData, sUnitNum); sGPU->SetData(sData, sUnitNum);
tGPU->SetZeroAll(); tGPU1->SetZeroAll();
tGPU2->SetZeroAll();
/* call merge function */ /* call merge function */
Merge(sGPU, tGPU, 2, 0); Merge(sGPU, tGPU1, 1, 0);
Merge(sGPU, tGPU2, 2, 0);
/* check results */ /* check results */
gpuTest = tGPU->CheckData(answer, tUnitNum); gpuTest = tGPU1->CheckData(answer1, tUnitNum1) && tGPU2->CheckData(answer2, tUnitNum2);
/* destroy variables */ /* destroy variables */
delete s, t, sGPU, tGPU; delete s;
delete[] sDimSize, tDimSize; delete t1;
delete t2;
delete sGPU;
delete tGPU1;
delete tGPU2;
delete[] sDimSize;
delete[] tDimSize1;
delete[] tDimSize2;
return cpuTest && gpuTest; return cpuTest && gpuTest;
#else #else
/* destroy variables */ /* destroy variables */
delete s, t; delete s;
delete[] sDimSize, tDimSize; delete t1;
delete t2;
delete[] sDimSize;
delete[] tDimSize1;
delete[] tDimSize2;
return cpuTest; return cpuTest;
#endif // USE_CUDA #endif // USE_CUDA
} }
/* case 4: merge small tensors into a big tensor. /*
case 3: merge small tensors into a big tensor.
In this case, 2 * (2, 4) -> (4, 4), whereToMerge=0. In this case, 2 * (2, 4) -> (4, 4), whereToMerge=0.
*/ */
bool TestMerge4() bool TestMerge3()
{ {
/* create list */ /* create list */
XList * smallList = new XList(); XList * smallList = new XList();
...@@ -289,10 +244,10 @@ bool TestMerge4() ...@@ -289,10 +244,10 @@ bool TestMerge4()
for (int i = 0; i < sOrder; i++) for (int i = 0; i < sOrder; i++)
sUnitNum *= sDimSize[i]; sUnitNum *= sDimSize[i];
DTYPE sData1[2][4] = { {0.0, 1.0, 2.0, 3.0}, DTYPE sData1[2][4] = { {0.0F, 1.0F, 2.0F, 3.0F},
{4.0, 5.0, 6.0, 7.0} }; {4.0F, 5.0F, 6.0F, 7.0F} };
DTYPE sData2[2][4] = { {0.0, -1.0, -2.0, -3.0}, DTYPE sData2[2][4] = { {0.0F, -1.0F, -2.0F, -3.0F},
{-4.0, -5.0, -6.0, -7.0} }; {-4.0F, -5.0F, -6.0F, -7.0F} };
/* a target tensor of size (4, 4) */ /* a target tensor of size (4, 4) */
int tOrder = 2; int tOrder = 2;
...@@ -304,10 +259,10 @@ bool TestMerge4() ...@@ -304,10 +259,10 @@ bool TestMerge4()
for (int i = 0; i < tOrder; i++) for (int i = 0; i < tOrder; i++)
tUnitNum *= tDimSize[i]; tUnitNum *= tDimSize[i];
DTYPE answer[4][4] = { {0.0, 1.0, 2.0, 3.0}, DTYPE answer[4][4] = { {0.0F, 1.0F, 2.0F, 3.0F},
{4.0, 5.0, 6.0, 7.0}, {4.0F, 5.0F, 6.0F, 7.0F},
{0.0, -1.0, -2.0, -3.0}, {0.0F, -1.0F, -2.0F, -3.0F},
{-4.0, -5.0, -6.0, -7.0} }; {-4.0F, -5.0F, -6.0F, -7.0F} };
/* CPU test */ /* CPU test */
bool cpuTest = true; bool cpuTest = true;
...@@ -359,24 +314,37 @@ bool TestMerge4() ...@@ -359,24 +314,37 @@ bool TestMerge4()
/* check results */ /* check results */
cpuTest = tGPU->CheckData(answer, tUnitNum); cpuTest = tGPU->CheckData(answer, tUnitNum);
delete s1, s2, t, sGPU1, sGPU2, tGPU; /* destroy variables */
delete[] sDimSize, tDimSize; delete s1;
delete s2;
delete t;
delete sGPU1;
delete sGPU2;
delete tGPU;
delete[] sDimSize;
delete[] tDimSize;
delete smallList; delete smallList;
return cpuTest && gpuTest; return cpuTest && gpuTest;
#else #else
/* destroy variables */ /* destroy variables */
delete s1, s2, t; delete s1;
delete[] sDimSize, tDimSize; delete s2;
delete t;
delete[] sDimSize1;
delete[] sDimSize2;
delete[] tDimSize;
delete smallList;
return cpuTest; return cpuTest;
#endif // USE_CUDA #endif // USE_CUDA
} }
/* case 5: merge small tensors into a big tensor. /*
case 4: merge small tensors into a big tensor.
In this case, 2 * (2, 4) -> (2, 8), whereToMerge=1. In this case, 2 * (2, 4) -> (2, 8), whereToMerge=1.
*/ */
bool TestMerge5() bool TestMerge4()
{ {
/* create list */ /* create list */
XList * smallList = new XList(); XList * smallList = new XList();
...@@ -391,10 +359,10 @@ bool TestMerge5() ...@@ -391,10 +359,10 @@ bool TestMerge5()
for (int i = 0; i < sOrder; i++) for (int i = 0; i < sOrder; i++)
sUnitNum *= sDimSize[i]; sUnitNum *= sDimSize[i];
DTYPE sData1[2][4] = { {0.0, 1.0, 2.0, 3.0}, DTYPE sData1[2][4] = { {0.0F, 1.0F, 2.0F, 3.0F},
{4.0, 5.0, 6.0, 7.0} }; {4.0F, 5.0F, 6.0F, 7.0F} };
DTYPE sData2[2][4] = { {0.0, -1.0, -2.0, -3.0}, DTYPE sData2[2][4] = { {0.0F, -1.0F, -2.0F, -3.0F},
{-4.0, -5.0, -6.0, -7.0} }; {-4.0F, -5.0F, -6.0F, -7.0F} };
/* a target tensor of size (4, 4) */ /* a target tensor of size (4, 4) */
int tOrder = 2; int tOrder = 2;
...@@ -406,8 +374,8 @@ bool TestMerge5() ...@@ -406,8 +374,8 @@ bool TestMerge5()
for (int i = 0; i < tOrder; i++) for (int i = 0; i < tOrder; i++)
tUnitNum *= tDimSize[i]; tUnitNum *= tDimSize[i];
DTYPE answer[2][8] = { {0.0, 1.0, 2.0, 3.0, 0.0, -1.0, -2.0, -3.0}, DTYPE answer[2][8] = { {0.0F, 1.0F, 2.0F, 3.0F, 0.0F, -1.0F, -2.0F, -3.0F},
{4.0, 5.0, 6.0, 7.0, -4.0, -5.0, -6.0, -7.0} }; {4.0F, 5.0F, 6.0F, 7.0F, -4.0F, -5.0F, -6.0F, -7.0F} };
/* CPU test */ /* CPU test */
bool cpuTest = true; bool cpuTest = true;
...@@ -459,15 +427,27 @@ bool TestMerge5() ...@@ -459,15 +427,27 @@ bool TestMerge5()
/* check results */ /* check results */
cpuTest = tGPU->CheckData(answer, tUnitNum); cpuTest = tGPU->CheckData(answer, tUnitNum);
delete s1, s2, t, sGPU1, sGPU2, tGPU; /* destroy variables */
delete[] sDimSize, tDimSize; delete s1;
delete s2;
delete t;
delete sGPU1;
delete sGPU2;
delete tGPU;
delete[] sDimSize;
delete[] tDimSize;
delete smallList; delete smallList;
return cpuTest && gpuTest; return cpuTest && gpuTest;
#else #else
/* destroy variables */ /* destroy variables */
delete s1, s2, t; delete s1;
delete[] sDimSize, tDimSize; delete s2;
delete t;
delete[] sDimSize1;
delete[] sDimSize2;
delete[] tDimSize;
delete smallList;
return cpuTest; return cpuTest;
#endif // USE_CUDA #endif // USE_CUDA
...@@ -479,10 +459,9 @@ bool TestMerge5() ...@@ -479,10 +459,9 @@ bool TestMerge5()
*/ */
/* test for Merge Function */ /* test for Merge Function */
extern "C"
bool TestMerge() bool TestMerge()
{ {
XPRINT(0, stdout, "[TEST MERGE] -------------\n"); XPRINT(0, stdout, "[TEST MERGE] transform a tensor by merging it alone with a dimension or merge small tensors into a big tensor\n");
bool returnFlag = true, caseFlag = true; bool returnFlag = true, caseFlag = true;
/* case 1 test */ /* case 1 test */
...@@ -522,15 +501,6 @@ bool TestMerge() ...@@ -522,15 +501,6 @@ bool TestMerge()
else else
XPRINT(0, stdout, ">> case 4 passed!\n"); XPRINT(0, stdout, ">> case 4 passed!\n");
/* case 5 test */
caseFlag = TestMerge5();
if (!caseFlag) {
returnFlag = false;
XPRINT(0, stdout, ">> case 5 failed!\n");
}
else
XPRINT(0, stdout, ">> case 5 passed!\n");
/* other cases test */ /* other cases test */
/* /*
TODO!! TODO!!
......
...@@ -22,7 +22,7 @@ ...@@ -22,7 +22,7 @@
#ifndef __TEST_MERGE_H__ #ifndef __TEST_MERGE_H__
#define __TEST_MERGE_H__ #define __TEST_MERGE_H__
#include "../core/Merge.h" #include "../core/shape/Merge.h"
namespace nts { // namespace nts(NiuTrans.Tensor) namespace nts { // namespace nts(NiuTrans.Tensor)
......
...@@ -19,17 +19,18 @@ ...@@ -19,17 +19,18 @@
* $Created by: Lin Ye (email: linye2015@outlook.com) 2018-06-15 * $Created by: Lin Ye (email: linye2015@outlook.com) 2018-06-15
*/ */
#include "../XTensor.h" #include "TMultiply.h"
#include "../XDevice.h"
#include "../core/Multiply.h"
namespace nts { // namespace nts(NiuTrans.Tensor) namespace nts { // namespace nts(NiuTrans.Tensor)
/* case 1: element-wise product of two tensors, c(i) = a(i)*b(i) + \alpha * c(i)
* In this case, (2 * 1) (2 * 1) -> (2 * 1), leadingDim=0, alpha=0. /*
case 1: element-wise product of two tensors
c(i) = a(i)*b(i) + \alpha * c(i)
In this case, (2, 1) (2, 1) -> (2, 1), leadingDim=0, alpha=0.
*/ */
bool TestMultiply1() bool TestMultiply1()
{ {
/* a source tensor of size 2 * 1 */ /* a source tensor of size (2, 1) */
int sOrder1 = 2; int sOrder1 = 2;
int * sDimSize1 = new int[sOrder1]; int * sDimSize1 = new int[sOrder1];
sDimSize1[0] = 2; sDimSize1[0] = 2;
...@@ -39,7 +40,7 @@ bool TestMultiply1() ...@@ -39,7 +40,7 @@ bool TestMultiply1()
for (int i = 0; i < sOrder1; i++) for (int i = 0; i < sOrder1; i++)
sUnitNum1 *= sDimSize1[i]; sUnitNum1 *= sDimSize1[i];
/* a source tensor of size 2 * 1 */ /* a source tensor of size (2, 1) */
int sOrder2 = 2; int sOrder2 = 2;
int * sDimSize2 = new int[sOrder2]; int * sDimSize2 = new int[sOrder2];
sDimSize2[0] = 2; sDimSize2[0] = 2;
...@@ -49,7 +50,7 @@ bool TestMultiply1() ...@@ -49,7 +50,7 @@ bool TestMultiply1()
for (int i = 0; i < sOrder2; i++) for (int i = 0; i < sOrder2; i++)
sUnitNum2 *= sDimSize2[i]; sUnitNum2 *= sDimSize2[i];
/* a target tensor of size 2 * 1 */ /* a target tensor of size (2, 1) */
int tOrder = 2; int tOrder = 2;
int * tDimSize = new int[tOrder]; int * tDimSize = new int[tOrder];
tDimSize[0] = 2; tDimSize[0] = 2;
...@@ -59,9 +60,12 @@ bool TestMultiply1() ...@@ -59,9 +60,12 @@ bool TestMultiply1()
for (int i = 0; i < tOrder; i++) for (int i = 0; i < tOrder; i++)
tUnitNum *= tDimSize[i]; tUnitNum *= tDimSize[i];
DTYPE sData1[2][1] = { {0.0}, {1.0} }; DTYPE sData1[2][1] = { {0.0F},
DTYPE sData2[2][1] = { {2.0}, {3.0} }; {1.0F} };
DTYPE answer[2][1] = { {0.0}, {3.0} }; DTYPE sData2[2][1] = { {2.0F},
{3.0F} };
DTYPE answer[2][1] = { {0.0F},
{3.0F} };
/* CPU test */ /* CPU test */
bool cpuTest = true; bool cpuTest = true;
...@@ -76,7 +80,7 @@ bool TestMultiply1() ...@@ -76,7 +80,7 @@ bool TestMultiply1()
s2->SetData(sData2, sUnitNum2); s2->SetData(sData2, sUnitNum2);
t->SetZeroAll(); t->SetZeroAll();
/* call multiplyelementwise function */ /* call MultiplyElementWise function */
Multiply(s1, s2, t, 0); Multiply(s1, s2, t, 0);
/* check results */ /* check results */
...@@ -96,15 +100,22 @@ bool TestMultiply1() ...@@ -96,15 +100,22 @@ bool TestMultiply1()
sGPU2->SetData(sData2, sUnitNum2); sGPU2->SetData(sData2, sUnitNum2);
tGPU->SetZeroAll(); tGPU->SetZeroAll();
/* call multiplyelementwise function */ /* call MultiplyElementWise function */
Multiply(sGPU1, sGPU2, tGPU, 0); Multiply(sGPU1, sGPU2, tGPU, 0);
/* check results */ /* check results */
gpuTest = tGPU->CheckData(answer, tUnitNum); gpuTest = tGPU->CheckData(answer, tUnitNum);
/* destroy variables */ /* destroy variables */
delete s1, s2, t, sGPU1, sGPU2, tGPU; delete s1;
delete[] sDimSize1, sDimSize2, tDimSize; delete s2;
delete t;
delete sGPU1;
delete sGPU2;
delete tGPU;
delete[] sDimSize1;
delete[] sDimSize2;
delete[] tDimSize;
return cpuTest && gpuTest; return cpuTest && gpuTest;
#else #else
...@@ -120,12 +131,14 @@ bool TestMultiply1() ...@@ -120,12 +131,14 @@ bool TestMultiply1()
#endif // USE_CUDA #endif // USE_CUDA
} }
/* case 2: element-wise product of two tensors, c(i) = a(i)*b(i) + \alpha * c(i) /*
* In this case, (2 * 2) (2 * 2) -> (2 * 2), leadingDim=0, alpha=0. case 2: element-wise product of two tensors
c(i) = a(i)*b(i) + \alpha * c(i)
In this case, (2, 2) (2, 2) -> (2, 2), leadingDim=0, alpha=0.
*/ */
bool TestMultiply2() bool TestMultiply2()
{ {
/* a source tensor of size (2 * 2) */ /* a source tensor of size (2, 2) */
int sOrder1 = 2; int sOrder1 = 2;
int * sDimSize1 = new int[sOrder1]; int * sDimSize1 = new int[sOrder1];
sDimSize1[0] = 2; sDimSize1[0] = 2;
...@@ -135,7 +148,7 @@ bool TestMultiply2() ...@@ -135,7 +148,7 @@ bool TestMultiply2()
for (int i = 0; i < sOrder1; i++) for (int i = 0; i < sOrder1; i++)
sUnitNum1 *= sDimSize1[i]; sUnitNum1 *= sDimSize1[i];
/* a source tensor of size (2 * 2) */ /* a source tensor of size (2, 2) */
int sOrder2 = 2; int sOrder2 = 2;
int * sDimSize2 = new int[sOrder2]; int * sDimSize2 = new int[sOrder2];
sDimSize2[0] = 2; sDimSize2[0] = 2;
...@@ -145,7 +158,7 @@ bool TestMultiply2() ...@@ -145,7 +158,7 @@ bool TestMultiply2()
for (int i = 0; i < sOrder2; i++) for (int i = 0; i < sOrder2; i++)
sUnitNum2 *= sDimSize2[i]; sUnitNum2 *= sDimSize2[i];
/* a target tensor of size (2 * 2) */ /* a target tensor of size (2, 2) */
int tOrder = 2; int tOrder = 2;
int * tDimSize = new int[tOrder]; int * tDimSize = new int[tOrder];
tDimSize[0] = 2; tDimSize[0] = 2;
...@@ -155,12 +168,12 @@ bool TestMultiply2() ...@@ -155,12 +168,12 @@ bool TestMultiply2()
for (int i = 0; i < tOrder; i++) for (int i = 0; i < tOrder; i++)
tUnitNum *= tDimSize[i]; tUnitNum *= tDimSize[i];
DTYPE sData1[2][2] = { {0.0, 1.0}, DTYPE sData1[2][2] = { {0.0F, 1.0F},
{2.0, 3.0} }; {2.0F, 3.0F} };
DTYPE sData2[2][2] = { {0.0, 1.0}, DTYPE sData2[2][2] = { {0.0F, 1.0F},
{2.0, 3.0} }; {2.0F, 3.0F} };
DTYPE answer[2][2] = { {0.0, 1.0}, DTYPE answer[2][2] = { {0.0F, 1.0F},
{4.0, 9.0} }; {4.0F, 9.0F} };
/* CPU test */ /* CPU test */
bool cpuTest = true; bool cpuTest = true;
...@@ -175,7 +188,7 @@ bool TestMultiply2() ...@@ -175,7 +188,7 @@ bool TestMultiply2()
s2->SetData(sData2, sUnitNum2); s2->SetData(sData2, sUnitNum2);
t->SetZeroAll(); t->SetZeroAll();
/* call multiplyelementwise function */ /* call MultiplyElementWise function */
Multiply(s1, s2, t, 0); Multiply(s1, s2, t, 0);
/* check results */ /* check results */
...@@ -195,32 +208,44 @@ bool TestMultiply2() ...@@ -195,32 +208,44 @@ bool TestMultiply2()
sGPU2->SetData(sData2, sUnitNum2); sGPU2->SetData(sData2, sUnitNum2);
tGPU->SetZeroAll(); tGPU->SetZeroAll();
/* call multiplyelementwise function */ /* call MultiplyElementWise function */
Multiply(sGPU1, sGPU2, tGPU, 0); Multiply(sGPU1, sGPU2, tGPU, 0);
/* check results */ /* check results */
gpuTest = tGPU->CheckData(answer, tUnitNum); gpuTest = tGPU->CheckData(answer, tUnitNum);
/* destroy variables */ /* destroy variables */
delete s1, s2, t, sGPU1, sGPU2, tGPU; delete s1;
delete[] sDimSize1, sDimSize2, tDimSize; delete s2;
delete t;
delete sGPU1;
delete sGPU2;
delete tGPU;
delete[] sDimSize1;
delete[] sDimSize2;
delete[] tDimSize;
return cpuTest && gpuTest; return cpuTest && gpuTest;
#else #else
/* destroy variables */ /* destroy variables */
delete s1, s2, t; delete s1;
delete[] sDimSize1, sDimSize2, tDimSize; delete s2;
delete t;
delete[] sDimSize1;
delete[] sDimSize2;
delete[] tDimSize;
return cpuTest; return cpuTest;
#endif // USE_CUDA #endif // USE_CUDA
} }
/* case 3: element-wise product of two tensors, c(i) = a(i)*b(i) + \alpha * c(i) /*
* In this case, (2 * 2) (2 * 2) -> (2 * 2), leadingDim=1, alpha=0. case 3: element-wise product of two tensors, c(i) = a(i)*b(i) + \alpha * c(i)
In this case, (2, 2) (2, 2) -> (2, 2), leadingDim=1, alpha=0.
*/ */
bool TestMultiply3() bool TestMultiply3()
{ {
/* a source tensor of size (2 * 2) */ /* a source tensor of size (2, 2) */
int sOrder1 = 2; int sOrder1 = 2;
int * sDimSize1 = new int[sOrder1]; int * sDimSize1 = new int[sOrder1];
sDimSize1[0] = 2; sDimSize1[0] = 2;
...@@ -230,7 +255,7 @@ bool TestMultiply3() ...@@ -230,7 +255,7 @@ bool TestMultiply3()
for (int i = 0; i < sOrder1; i++) for (int i = 0; i < sOrder1; i++)
sUnitNum1 *= sDimSize1[i]; sUnitNum1 *= sDimSize1[i];
/* a source tensor of size (2 * 2) */ /* a source tensor of size (2, 2) */
int sOrder2 = 2; int sOrder2 = 2;
int * sDimSize2 = new int[sOrder2]; int * sDimSize2 = new int[sOrder2];
sDimSize2[0] = 2; sDimSize2[0] = 2;
...@@ -240,7 +265,7 @@ bool TestMultiply3() ...@@ -240,7 +265,7 @@ bool TestMultiply3()
for (int i = 0; i < sOrder2; i++) for (int i = 0; i < sOrder2; i++)
sUnitNum2 *= sDimSize2[i]; sUnitNum2 *= sDimSize2[i];
/* a target tensor of size (2 * 2) */ /* a target tensor of size (2, 2) */
int tOrder = 2; int tOrder = 2;
int * tDimSize = new int[tOrder]; int * tDimSize = new int[tOrder];
tDimSize[0] = 2; tDimSize[0] = 2;
...@@ -250,12 +275,12 @@ bool TestMultiply3() ...@@ -250,12 +275,12 @@ bool TestMultiply3()
for (int i = 0; i < tOrder; i++) for (int i = 0; i < tOrder; i++)
tUnitNum *= tDimSize[i]; tUnitNum *= tDimSize[i];
DTYPE sData1[2][2] = { {0.0, 1.0}, DTYPE sData1[2][2] = { {0.0F, 1.0F},
{2.0, 3.0} }; {2.0F, 3.0F} };
DTYPE sData2[2][2] = { {0.0, 1.0}, DTYPE sData2[2][2] = { {0.0F, 1.0F},
{2.0, 3.0} }; {2.0F, 3.0F} };
DTYPE answer[2][2] = { {0.0, 1.0}, DTYPE answer[2][2] = { {0.0F, 1.0F},
{4.0, 9.0} }; {4.0F, 9.0F} };
/* CPU test */ /* CPU test */
bool cpuTest = true; bool cpuTest = true;
...@@ -270,7 +295,7 @@ bool TestMultiply3() ...@@ -270,7 +295,7 @@ bool TestMultiply3()
s2->SetData(sData2, sUnitNum2); s2->SetData(sData2, sUnitNum2);
t->SetZeroAll(); t->SetZeroAll();
/* call multiplyelementwise function */ /* call MultiplyElementWise function */
Multiply(s1, s2, t, 1); Multiply(s1, s2, t, 1);
/* check results */ /* check results */
...@@ -290,21 +315,32 @@ bool TestMultiply3() ...@@ -290,21 +315,32 @@ bool TestMultiply3()
sGPU2->SetData(sData2, sUnitNum2); sGPU2->SetData(sData2, sUnitNum2);
tGPU->SetZeroAll(); tGPU->SetZeroAll();
/* call multiplyelementwise function */ /* call MultiplyElementWise function */
Multiply(sGPU1, sGPU2, tGPU, 1); Multiply(sGPU1, sGPU2, tGPU, 1);
/* check results */ /* check results */
gpuTest = tGPU->CheckData(answer, tUnitNum); gpuTest = tGPU->CheckData(answer, tUnitNum);
/* destroy variables */ /* destroy variables */
delete s1, s2, t, sGPU1, sGPU2, tGPU; delete s1;
delete[] sDimSize1, sDimSize2, tDimSize; delete s2;
delete t;
delete sGPU1;
delete sGPU2;
delete tGPU;
delete[] sDimSize1;
delete[] sDimSize2;
delete[] tDimSize;
return cpuTest && gpuTest; return cpuTest && gpuTest;
#else #else
/* destroy variables */ /* destroy variables */
delete s1, s2, t; delete s1;
delete[] sDimSize1, sDimSize2, tDimSize; delete s2;
delete t;
delete[] sDimSize1;
delete[] sDimSize2;
delete[] tDimSize;
return cpuTest; return cpuTest;
#endif // USE_CUDA #endif // USE_CUDA
...@@ -316,10 +352,9 @@ TODO!! ...@@ -316,10 +352,9 @@ TODO!!
*/ */
/* test for MultiplyElementWise Function */ /* test for MultiplyElementWise Function */
extern "C"
bool TestMultiply() bool TestMultiply()
{ {
XPRINT(0, stdout, "[TEST MULTIPLYELEMENTWISE] -------------\n"); XPRINT(0, stdout, "[TEST MULTIPLYELEMENTWISE] element-wise product of two tensors \n");
bool returnFlag = true, caseFlag = true; bool returnFlag = true, caseFlag = true;
/* case 1 test */ /* case 1 test */
......
...@@ -19,10 +19,10 @@ ...@@ -19,10 +19,10 @@
* $Created by: Lin Ye (email: linye2015@outlook.com) 2018-06-15 * $Created by: Lin Ye (email: linye2015@outlook.com) 2018-06-15
*/ */
#ifndef __TEST_MULTIPLY_H__ #ifndef __TEST_MULTIPLYELEMENTWISE_H__
#define __TEST_MULTIPLY_H__ #define __TEST_MULTIPLYELEMENTWISE_H__
#include "../core/Multiply.h" #include "../core/arithmetic/Multiply.h"
namespace nts { // namespace nts(NiuTrans.Tensor) namespace nts { // namespace nts(NiuTrans.Tensor)
......
...@@ -19,15 +19,14 @@ ...@@ -19,15 +19,14 @@
* $Created by: Lin Ye (email: linye2015@outlook.com) 2018-06-14 * $Created by: Lin Ye (email: linye2015@outlook.com) 2018-06-14
*/ */
#include "../XTensor.h" #include "TNegate.h"
#include "../XDevice.h"
#include "../core/Negate.h"
namespace nts { // namespace nts(NiuTrans.Tensor) namespace nts { // namespace nts(NiuTrans.Tensor)
/* case 1: set every entry to its minus value */ /* case 1: set every entry to its minus value */
bool TestNegate1() bool TestNegate1()
{ {
/* a tensor of size 3 * 2 */ /* a tensor of size (3, 2) */
int aOrder = 2; int aOrder = 2;
int * aDimSize = new int[aOrder]; int * aDimSize = new int[aOrder];
aDimSize[0] = 3; aDimSize[0] = 3;
...@@ -37,12 +36,12 @@ bool TestNegate1() ...@@ -37,12 +36,12 @@ bool TestNegate1()
for (int i = 0; i < aOrder; i++) for (int i = 0; i < aOrder; i++)
aUnitNum *= aDimSize[i]; aUnitNum *= aDimSize[i];
DTYPE aData[3][2] = { {1.0, -2.0}, DTYPE aData[3][2] = { {1.0F, -2.0F},
{-3.0, 4.0}, {-3.0F, 4.0F},
{5.0, -6.0} }; {5.0F, -6.0F} };
DTYPE answer[3][2] = { {-1.0, 2.0}, DTYPE answer[3][2] = { {-1.0F, 2.0F},
{3.0, -4.0}, {3.0F, -4.0F},
{-5.0, 6.0} }; {-5.0F, 6.0F} };
/* CPU test */ /* CPU test */
bool cpuTest = true; bool cpuTest = true;
...@@ -53,7 +52,7 @@ bool TestNegate1() ...@@ -53,7 +52,7 @@ bool TestNegate1()
/* initialize variables */ /* initialize variables */
a->SetData(aData, aUnitNum); a->SetData(aData, aUnitNum);
/* call negate function */ /* call Negate function */
Negate(a); Negate(a);
/* check results */ /* check results */
...@@ -69,14 +68,15 @@ bool TestNegate1() ...@@ -69,14 +68,15 @@ bool TestNegate1()
/* Initialize variables */ /* Initialize variables */
aGPU->SetData(aData, aUnitNum); aGPU->SetData(aData, aUnitNum);
/* call negate function */ /* call Negate function */
Negate(aGPU); Negate(aGPU);
/* check results */ /* check results */
gpuTest = aGPU->CheckData(answer, aUnitNum); gpuTest = aGPU->CheckData(answer, aUnitNum);
/* destroy variables */ /* destroy variables */
delete a, aGPU; delete a;
delete aGPU;
delete[] aDimSize; delete[] aDimSize;
return cpuTest && gpuTest; return cpuTest && gpuTest;
...@@ -92,7 +92,7 @@ bool TestNegate1() ...@@ -92,7 +92,7 @@ bool TestNegate1()
/* case 2: set every entry to its minus value */ /* case 2: set every entry to its minus value */
bool TestNegate2() bool TestNegate2()
{ {
/* a tensor of size 3 * 2 */ /* a tensor of size (3, 2) */
int aOrder = 2; int aOrder = 2;
int * aDimSize = new int[aOrder]; int * aDimSize = new int[aOrder];
aDimSize[0] = 3; aDimSize[0] = 3;
...@@ -102,12 +102,12 @@ bool TestNegate2() ...@@ -102,12 +102,12 @@ bool TestNegate2()
for (int i = 0; i < aOrder; i++) for (int i = 0; i < aOrder; i++)
aUnitNum *= aDimSize[i]; aUnitNum *= aDimSize[i];
DTYPE aData[3][2] = { {0.0, 0.0}, DTYPE aData[3][2] = { {0.0F, 0.0F},
{0.0, 0.0}, {0.0F, 0.0F},
{0.0, 0.0} }; {0.0F, 0.0F} };
DTYPE answer[3][2] = { {-0.0, -0.0}, DTYPE answer[3][2] = { {-0.0F, -0.0F},
{-0.0, -0.0}, {-0.0F, -0.0F},
{-0.0, -0.0} }; {-0.0F, -0.0F} };
/* CPU test */ /* CPU test */
bool cpuTest = true; bool cpuTest = true;
...@@ -118,7 +118,7 @@ bool TestNegate2() ...@@ -118,7 +118,7 @@ bool TestNegate2()
/* initialize variables */ /* initialize variables */
a->SetData(aData, aUnitNum); a->SetData(aData, aUnitNum);
/* call negate function */ /* call Negate function */
Negate(a); Negate(a);
/* check results */ /* check results */
...@@ -134,14 +134,15 @@ bool TestNegate2() ...@@ -134,14 +134,15 @@ bool TestNegate2()
/* Initialize variables */ /* Initialize variables */
aGPU->SetData(aData, aUnitNum); aGPU->SetData(aData, aUnitNum);
/* call negate function */ /* call Negate function */
Negate(aGPU); Negate(aGPU);
/* check results */ /* check results */
gpuTest = aGPU->CheckData(answer, aUnitNum); gpuTest = aGPU->CheckData(answer, aUnitNum);
/* destroy variables */ /* destroy variables */
delete a, aGPU; delete a;
delete aGPU;
delete[] aDimSize; delete[] aDimSize;
return cpuTest && gpuTest; return cpuTest && gpuTest;
...@@ -160,10 +161,9 @@ TODO!! ...@@ -160,10 +161,9 @@ TODO!!
*/ */
/* test for Negate Function */ /* test for Negate Function */
extern "C"
bool TestNegate() bool TestNegate()
{ {
XPRINT(0, stdout, "[TEST NEGATE] -------------\n"); XPRINT(0, stdout, "[TEST NEGATE] set every entry to its minus value \n");
bool returnFlag = true, caseFlag = true; bool returnFlag = true, caseFlag = true;
/* case 1 test */ /* case 1 test */
......
...@@ -22,7 +22,7 @@ ...@@ -22,7 +22,7 @@
#ifndef __TEST_NEGATE_H__ #ifndef __TEST_NEGATE_H__
#define __TEST_NEGATE_H__ #define __TEST_NEGATE_H__
#include "../core/Negate.h" #include "../core/arithmetic/Negate.h"
namespace nts { // namespace nts(NiuTrans.Tensor) namespace nts { // namespace nts(NiuTrans.Tensor)
......
...@@ -19,17 +19,19 @@ ...@@ -19,17 +19,19 @@
* $Created by: Lin Ye (email: linye2015@outlook.com) 2018-06-20 * $Created by: Lin Ye (email: linye2015@outlook.com) 2018-06-20
*/ */
#include "../XTensor.h" #include "TNormalize.h"
#include "../XDevice.h"
#include "../core/Normalize.h"
namespace nts { // namespace nts(NiuTrans.Tensor) namespace nts { // namespace nts(NiuTrans.Tensor)
/* case 1: normalized the data with normal distribution
* In this case, dim=0. /*
case 1: normalized the data with normal distribution
For an input x, y = a * (x-mean)/sqrt(variance+\epsilon) + b.
where a and b are the scalar and bias respectively,
and \epsilon is the adjustment parameter.
*/ */
bool TestNormalize1() bool TestNormalize1()
{ {
/* a source tensor of size 2 * 3 */ /* a source tensor of size (2, 3) */
int sOrder = 2; int sOrder = 2;
int * sDimSize = new int[sOrder]; int * sDimSize = new int[sOrder];
sDimSize[0] = 2; sDimSize[0] = 2;
...@@ -39,7 +41,7 @@ bool TestNormalize1() ...@@ -39,7 +41,7 @@ bool TestNormalize1()
for (int i = 0; i < sOrder; i++) for (int i = 0; i < sOrder; i++)
sUnitNum *= sDimSize[i]; sUnitNum *= sDimSize[i];
/* a target tensor of size 2 * 3 */ /* a target tensor of size (2, 3) */
int tOrder = 2; int tOrder = 2;
int * tDimSize = new int[tOrder]; int * tDimSize = new int[tOrder];
tDimSize[0] = 2; tDimSize[0] = 2;
...@@ -49,7 +51,7 @@ bool TestNormalize1() ...@@ -49,7 +51,7 @@ bool TestNormalize1()
for (int i = 0; i < tOrder; i++) for (int i = 0; i < tOrder; i++)
tUnitNum *= tDimSize[i]; tUnitNum *= tDimSize[i];
/* a mean tensor of size 3 */ /* a mean tensor of size (3) */
int meanOrder = 1; int meanOrder = 1;
int * meanDimSize = new int[meanOrder]; int * meanDimSize = new int[meanOrder];
meanDimSize[0] = 3; meanDimSize[0] = 3;
...@@ -58,7 +60,7 @@ bool TestNormalize1() ...@@ -58,7 +60,7 @@ bool TestNormalize1()
for (int i = 0; i < meanOrder; i++) for (int i = 0; i < meanOrder; i++)
meanUnitNum *= meanDimSize[i]; meanUnitNum *= meanDimSize[i];
/* a var tensor of size 3 */ /* a variance tensor of size (3) */
int varOrder = 1; int varOrder = 1;
int * varDimSize = new int[varOrder]; int * varDimSize = new int[varOrder];
varDimSize[0] = 3; varDimSize[0] = 3;
...@@ -67,7 +69,7 @@ bool TestNormalize1() ...@@ -67,7 +69,7 @@ bool TestNormalize1()
for (int i = 0; i < varOrder; i++) for (int i = 0; i < varOrder; i++)
varUnitNum *= varDimSize[i]; varUnitNum *= varDimSize[i];
/* a a tensor of size 2 * 3 */ /* a scalar tensor of size (2, 3) */
int aOrder = 2; int aOrder = 2;
int * aDimSize = new int[aOrder]; int * aDimSize = new int[aOrder];
aDimSize[0] = 2; aDimSize[0] = 2;
...@@ -77,7 +79,7 @@ bool TestNormalize1() ...@@ -77,7 +79,7 @@ bool TestNormalize1()
for (int i = 0; i < aOrder; i++) for (int i = 0; i < aOrder; i++)
aUnitNum *= aDimSize[i]; aUnitNum *= aDimSize[i];
/* a b tensor of size 2 * 3 */ /* a bias tensor of size (2, 3) */
int bOrder = 2; int bOrder = 2;
int * bDimSize = new int[bOrder]; int * bDimSize = new int[bOrder];
bDimSize[0] = 2; bDimSize[0] = 2;
...@@ -87,41 +89,39 @@ bool TestNormalize1() ...@@ -87,41 +89,39 @@ bool TestNormalize1()
for (int i = 0; i < bOrder; i++) for (int i = 0; i < bOrder; i++)
bUnitNum *= bDimSize[i]; bUnitNum *= bDimSize[i];
DTYPE sData[2][3] = { {0.5, -1.0, 2.0}, DTYPE sData[2][3] = { {1.0F, 2.0F, 3.0F},
{3.5, -4.5, 1.0} }; {1.5F, 2.5F, 3.5F} };
DTYPE meanData[3] = {2.0, -2.75, 1.5}; DTYPE meanData[3] = {1.0F, 1.5F, 2.0F};
DTYPE varData[3] = {4.5, 6.125, 0.5}; DTYPE varData[3] = {1.0F, 1.0F, 4.0F};
DTYPE aData[2][3] = { {0.0, 0.0, 0.0}, DTYPE aData[2][3] = { {1.0F, 1.0F, 1.0F},
{0.0, 0.0, 0.0} }; {1.0F, 1.0F, 1.0F} };
DTYPE bData[2][3] = { {0.0, 0.0, 0.0}, DTYPE answer[2][3] = { {0.0F, 0.5F, 0.5F},
{0.0, 0.0, 0.0} }; {0.5F, 1.0F, 0.75F} };
DTYPE answer[2][3] = { {0.0, 0.0, 0.0},
{0.0, 0.0, 0.0} };
/* CPU test */ /* CPU test */
bool cpuTest = true; bool cpuTest = true;
/* create tensors */ /* create tensors */
XTensor * s = NewTensor(sOrder, sDimSize); XTensor * s = NewTensor(sOrder, sDimSize);
XTensor * t = NewTensor(tOrder, tDimSize);
XTensor * mean = NewTensor(meanOrder, meanDimSize); XTensor * mean = NewTensor(meanOrder, meanDimSize);
XTensor * var = NewTensor(varOrder, varDimSize); XTensor * var = NewTensor(varOrder, varDimSize);
XTensor * a = NewTensor(aOrder, aDimSize); XTensor * a = NewTensor(aOrder, aDimSize);
XTensor * b = NewTensor(bOrder, bDimSize); XTensor * b = NewTensor(bOrder, bDimSize);
XTensor * t = NewTensor(tOrder, tDimSize);
/* initialize variables */ /* initialize variables */
s->SetData(sData, sUnitNum); s->SetData(sData, sUnitNum);
mean->SetData(meanData, meanUnitNum); mean->SetData(meanData, meanUnitNum);
var->SetData(varData, varUnitNum); var->SetData(varData, varUnitNum);
a->SetData(aData, aUnitNum); a->SetData(aData, aUnitNum);
b->SetData(bData, bUnitNum); b->SetZeroAll();
t->SetZeroAll(); t->SetZeroAll();
/* call normalize function */ /* call normalize function */
Normalize(s, t, 0, mean, var, a, b, 0.0); Normalize(s, t, 0, mean, var, a, b, 0.0F);
/* check results */ /* check results */
cpuTest = t->CheckData(answer, tUnitNum); cpuTest = t->CheckData(answer, tUnitNum, 1e-4, 0);
#ifdef USE_CUDA #ifdef USE_CUDA
/* GPU test */ /* GPU test */
...@@ -140,24 +140,50 @@ bool TestNormalize1() ...@@ -140,24 +140,50 @@ bool TestNormalize1()
meanGPU->SetData(meanData, meanUnitNum); meanGPU->SetData(meanData, meanUnitNum);
varGPU->SetData(varData, varUnitNum); varGPU->SetData(varData, varUnitNum);
aGPU->SetData(aData, aUnitNum); aGPU->SetData(aData, aUnitNum);
bGPU->SetData(bData, bUnitNum); bGPU->SetZeroAll();
tGPU->SetZeroAll(); tGPU->SetZeroAll();
/* call normalize function */ /* call Normalize function */
Normalize(sGPU, tGPU, 0, meanGPU, varGPU, aGPU, bGPU, 0.0); Normalize(sGPU, tGPU, 0, meanGPU, varGPU, aGPU, bGPU, 0.0F);
/* check results */ /* check results */
gpuTest = tGPU->CheckData(answer, tUnitNum); gpuTest = tGPU->CheckData(answer, tUnitNum, 1e-4, 0);
/* destroy variables */ /* destroy variables */
delete s, t, mean, var, a, b, sGPU, tGPU, meanGPU, varGPU, aGPU, bGPU; delete s;
delete[] sDimSize, tDimSize, meanDimSize, varDimSize, aDimSize, bDimSize; delete t;
delete mean;
delete var;
delete a;
delete b;
delete sGPU;
delete tGPU;
delete meanGPU;
delete varGPU;
delete aGPU;
delete bGPU;
delete[] sDimSize;
delete[] tDimSize;
delete[] meanDimSize;
delete[] varDimSize;
delete[] aDimSize;
delete[] bDimSize;
return cpuTest && gpuTest; return cpuTest && gpuTest;
#else #else
/* destroy variables */ /* destroy variables */
delete s, t, mean, var, a, b; delete s;
delete[] sDimSize, tDimSize, meanDimSize, varDimSize, aDimSize, bDimSize; delete t;
delete mean;
delete var;
delete a;
delete b;
delete[] sDimSize;
delete[] tDimSize;
delete[] meanDimSize;
delete[] varDimSize;
delete[] aDimSize;
delete[] bDimSize;
return cpuTest; return cpuTest;
#endif // USE_CUDA #endif // USE_CUDA
...@@ -169,10 +195,9 @@ TODO!! ...@@ -169,10 +195,9 @@ TODO!!
*/ */
/* test for Normalize Function */ /* test for Normalize Function */
extern "C"
bool TestNormalize() bool TestNormalize()
{ {
XPRINT(0, stdout, "[TEST NORMALIZE] -------------\n"); XPRINT(0, stdout, "[TEST NORMALIZE] normalized the data with normal distribution \n");
bool returnFlag = true, caseFlag = true; bool returnFlag = true, caseFlag = true;
/* case 1 test */ /* case 1 test */
......
...@@ -22,7 +22,7 @@ ...@@ -22,7 +22,7 @@
#ifndef __TEST_NORMALIZE_H__ #ifndef __TEST_NORMALIZE_H__
#define __TEST_NORMALIZE_H__ #define __TEST_NORMALIZE_H__
#include "../core/Normalize.h" #include "../core/math/Normalize.h"
namespace nts { // namespace nts(NiuTrans.Tensor) namespace nts { // namespace nts(NiuTrans.Tensor)
......
...@@ -19,17 +19,18 @@ ...@@ -19,17 +19,18 @@
* $Created by: Lin Ye (email: linye2015@outlook.com) 2018-06-15 * $Created by: Lin Ye (email: linye2015@outlook.com) 2018-06-15
*/ */
#include "../XTensor.h" #include "../XUtility.h"
#include "../XDevice.h" #include "TPower.h"
#include "../core/Power.h"
namespace nts { // namespace nts(NiuTrans.Tensor) namespace nts { // namespace nts(NiuTrans.Tensor)
/* case 1: get the power(a, p)
* In this case, p=2. /*
case 1: get the power(a, p)
In this case, p=2.
*/ */
bool TestPower1() bool TestPower1()
{ {
/* a tensor of size 3 * 2 */ /* a tensor of size (3, 2) */
int aOrder = 2; int aOrder = 2;
int * aDimSize = new int[aOrder]; int * aDimSize = new int[aOrder];
aDimSize[0] = 3; aDimSize[0] = 3;
...@@ -39,12 +40,12 @@ bool TestPower1() ...@@ -39,12 +40,12 @@ bool TestPower1()
for (int i = 0; i < aOrder; i++) for (int i = 0; i < aOrder; i++)
aUnitNum *= aDimSize[i]; aUnitNum *= aDimSize[i];
DTYPE aData[3][2] = { {1.0, 2.0}, DTYPE aData[3][2] = { {1.0F, 2.0F},
{3.0, 4.0}, {3.0F, 4.0F},
{5.0, 6.0} }; {5.0F, 6.0F} };
DTYPE answer[3][2] = { {1.0, 4.0}, DTYPE answer[3][2] = { {1.0F, 4.0F},
{9.0, 16.0}, {9.0F, 16.0F},
{25.0, 36.0} }; {25.0F, 36.0F} };
/* CPU test */ /* CPU test */
bool cpuTest = true; bool cpuTest = true;
...@@ -55,11 +56,11 @@ bool TestPower1() ...@@ -55,11 +56,11 @@ bool TestPower1()
/* initialize variables */ /* initialize variables */
a->SetData(aData, aUnitNum); a->SetData(aData, aUnitNum);
/* call power function */ /* call Power function */
Power(a, 2.0); Power(a, 2.0F);
/* check results */ /* check results */
cpuTest = a->CheckData(answer, aUnitNum); cpuTest = a->CheckData(answer, aUnitNum, 1e-4F);
#ifdef USE_CUDA #ifdef USE_CUDA
/* GPU test */ /* GPU test */
...@@ -72,13 +73,14 @@ bool TestPower1() ...@@ -72,13 +73,14 @@ bool TestPower1()
aGPU->SetData(aData, aUnitNum); aGPU->SetData(aData, aUnitNum);
/* call power function */ /* call power function */
Power(aGPU, 2.0); Power(aGPU, 2.0F);
/* check results */ /* check results */
gpuTest = aGPU->CheckData(answer, aUnitNum); gpuTest = aGPU->CheckData(answer, aUnitNum, 1e-4F);
/* destroy variables */ /* destroy variables */
delete a, aGPU; delete a;
delete aGPU;
delete[] aDimSize; delete[] aDimSize;
return cpuTest && gpuTest; return cpuTest && gpuTest;
...@@ -91,12 +93,13 @@ bool TestPower1() ...@@ -91,12 +93,13 @@ bool TestPower1()
#endif // USE_CUDA #endif // USE_CUDA
} }
/* case 2: get the power(a, p) /*
* In this case, p=1. case 2: get the power(a, p)
In this case, p=1.
*/ */
bool TestPower2() bool TestPower2()
{ {
/* a tensor of size 3 * 2 */ /* a tensor of size (3, 2) */
int aOrder = 2; int aOrder = 2;
int * aDimSize = new int[aOrder]; int * aDimSize = new int[aOrder];
aDimSize[0] = 3; aDimSize[0] = 3;
...@@ -106,12 +109,12 @@ bool TestPower2() ...@@ -106,12 +109,12 @@ bool TestPower2()
for (int i = 0; i < aOrder; i++) for (int i = 0; i < aOrder; i++)
aUnitNum *= aDimSize[i]; aUnitNum *= aDimSize[i];
DTYPE aData[3][2] = { {0.0, 1.0}, DTYPE aData[3][2] = { {0.0F, 1.0F},
{2.0, 3.0}, {2.0F, 3.0F},
{4.0, 5.0} }; {4.0F, 5.0F} };
DTYPE answer[3][2] = { {0.0, 1.0}, DTYPE answer[3][2] = { {0.0F, 1.0F},
{2.0, 3.0}, {2.0F, 3.0F},
{4.0, 5.0} }; {4.0F, 5.0F} };
/* CPU test */ /* CPU test */
bool cpuTest = true; bool cpuTest = true;
...@@ -122,11 +125,11 @@ bool TestPower2() ...@@ -122,11 +125,11 @@ bool TestPower2()
/* initialize variables */ /* initialize variables */
a->SetData(aData, aUnitNum); a->SetData(aData, aUnitNum);
/* call power function */ /* call Power function */
Power(a, 1.0); Power(a, 1.0F);
/* check results */ /* check results */
cpuTest = a->CheckData(answer, aUnitNum); cpuTest = a->CheckData(answer, aUnitNum, 1e-4F);
#ifdef USE_CUDA #ifdef USE_CUDA
/* GPU test */ /* GPU test */
...@@ -138,14 +141,15 @@ bool TestPower2() ...@@ -138,14 +141,15 @@ bool TestPower2()
/* Initialize variables */ /* Initialize variables */
aGPU->SetData(aData, aUnitNum); aGPU->SetData(aData, aUnitNum);
/* call power function */ /* call Power function */
Power(aGPU, 1.0); Power(aGPU, 1.0F);
/* check results */ /* check results */
gpuTest = aGPU->CheckData(answer, aUnitNum); gpuTest = aGPU->CheckData(answer, aUnitNum, 1e-4F);
/* destroy variables */ /* destroy variables */
delete a, aGPU; delete a;
delete aGPU;
delete[] aDimSize; delete[] aDimSize;
return cpuTest && gpuTest; return cpuTest && gpuTest;
...@@ -158,12 +162,13 @@ bool TestPower2() ...@@ -158,12 +162,13 @@ bool TestPower2()
#endif // USE_CUDA #endif // USE_CUDA
} }
/* case 3: get the power(a, p) /*
* In this case, p=0. case 3: get the power(a, p)
In this case, p=0.
*/ */
bool TestPower3() bool TestPower3()
{ {
/* a tensor of size 3 * 2 */ /* a tensor of size (3, 2) */
int aOrder = 2; int aOrder = 2;
int * aDimSize = new int[aOrder]; int * aDimSize = new int[aOrder];
aDimSize[0] = 3; aDimSize[0] = 3;
...@@ -173,12 +178,12 @@ bool TestPower3() ...@@ -173,12 +178,12 @@ bool TestPower3()
for (int i = 0; i < aOrder; i++) for (int i = 0; i < aOrder; i++)
aUnitNum *= aDimSize[i]; aUnitNum *= aDimSize[i];
DTYPE aData[3][2] = { {0.0, 1.0}, DTYPE aData[3][2] = { {0.0F, 1.0F},
{2.0, 3.0}, {2.0F, 3.0F},
{4.0, 5.0} }; {4.0F, 5.0F} };
DTYPE answer[3][2] = { {1.0, 1.0}, DTYPE answer[3][2] = { {1.0F, 1.0F},
{1.0, 1.0}, {1.0F, 1.0F},
{1.0, 1.0} }; {1.0F, 1.0F} };
/* CPU test */ /* CPU test */
bool cpuTest = true; bool cpuTest = true;
...@@ -189,11 +194,11 @@ bool TestPower3() ...@@ -189,11 +194,11 @@ bool TestPower3()
/* initialize variables */ /* initialize variables */
a->SetData(aData, aUnitNum); a->SetData(aData, aUnitNum);
/* call power function */ /* call Power function */
Power(a, 0.0); Power(a, 0.0F);
/* check results */ /* check results */
cpuTest = a->CheckData(answer, aUnitNum); cpuTest = a->CheckData(answer, aUnitNum, 1e-4F);
#ifdef USE_CUDA #ifdef USE_CUDA
/* GPU test */ /* GPU test */
...@@ -205,14 +210,15 @@ bool TestPower3() ...@@ -205,14 +210,15 @@ bool TestPower3()
/* Initialize variables */ /* Initialize variables */
aGPU->SetData(aData, aUnitNum); aGPU->SetData(aData, aUnitNum);
/* call power function */ /* call Power function */
Power(aGPU, 0.0); Power(aGPU, 0.0F);
/* check results */ /* check results */
gpuTest = aGPU->CheckData(answer, aUnitNum); gpuTest = aGPU->CheckData(answer, aUnitNum, 1e-4F);
/* destroy variables */ /* destroy variables */
delete a, aGPU; delete a;
delete aGPU;
delete[] aDimSize; delete[] aDimSize;
return cpuTest && gpuTest; return cpuTest && gpuTest;
...@@ -231,10 +237,9 @@ TODO!! ...@@ -231,10 +237,9 @@ TODO!!
*/ */
/* test for Power Function */ /* test for Power Function */
extern "C"
bool TestPower() bool TestPower()
{ {
XPRINT(0, stdout, "[TEST POWER] -------------\n"); XPRINT(0, stdout, "[TEST POWER] get the power(a, p) \n");
bool returnFlag = true, caseFlag = true; bool returnFlag = true, caseFlag = true;
/* case 1 test */ /* case 1 test */
......
...@@ -22,7 +22,7 @@ ...@@ -22,7 +22,7 @@
#ifndef __TEST_POWER_H__ #ifndef __TEST_POWER_H__
#define __TEST_POWER_H__ #define __TEST_POWER_H__
#include "../core/Power.h" #include "../core/math/Power.h"
namespace nts { // namespace nts(NiuTrans.Tensor) namespace nts { // namespace nts(NiuTrans.Tensor)
......
...@@ -19,15 +19,17 @@ ...@@ -19,15 +19,17 @@
* $Created by: Lin Ye (email: linye2015@outlook.com) 2018-06-14 * $Created by: Lin Ye (email: linye2015@outlook.com) 2018-06-14
*/ */
#include "../XTensor.h" #include "TRectify.h"
#include "../XDevice.h"
#include "../function/Rectify.h"
namespace nts { // namespace nts(NiuTrans.Tensor) namespace nts { // namespace nts(NiuTrans.Tensor)
/* case 1: rectify function y = max(0, x) */
/*
case 1: test rectify function
In this case, y = max(0, x)
*/
bool TestRectify1() bool TestRectify1()
{ {
/* a x tensor of size 2 * 3 */ /* a x tensor of size (2, 3) */
int xOrder = 2; int xOrder = 2;
int * xDimSize = new int[xOrder]; int * xDimSize = new int[xOrder];
xDimSize[0] = 2; xDimSize[0] = 2;
...@@ -37,7 +39,7 @@ bool TestRectify1() ...@@ -37,7 +39,7 @@ bool TestRectify1()
for (int i = 0; i < xOrder; i++) for (int i = 0; i < xOrder; i++)
xUnitNum *= xDimSize[i]; xUnitNum *= xDimSize[i];
/* a y tensor of size 2 * 3 */ /* a y tensor of size (2, 3) */
int yOrder = 2; int yOrder = 2;
int * yDimSize = new int[yOrder]; int * yDimSize = new int[yOrder];
yDimSize[0] = 2; yDimSize[0] = 2;
...@@ -47,10 +49,10 @@ bool TestRectify1() ...@@ -47,10 +49,10 @@ bool TestRectify1()
for (int i = 0; i < yOrder; i++) for (int i = 0; i < yOrder; i++)
yUnitNum *= yDimSize[i]; yUnitNum *= yDimSize[i];
DTYPE xData[2][3] = { {0.0, -1.0, 2.0}, DTYPE xData[2][3] = { {0.0F, -1.0F, 2.0F},
{3.0, -4.0, -5.0} }; {3.0F, -4.0F, -5.0F} };
DTYPE answer[2][3] = { {0.0, 0.0, 2.0}, DTYPE answer[2][3] = { {0.0F, 0.0F, 2.0F},
{3.0, 0.0, 0.0} }; {3.0F, 0.0F, 0.0F} };
/* CPU test */ /* CPU test */
bool cpuTest = true; bool cpuTest = true;
...@@ -63,7 +65,7 @@ bool TestRectify1() ...@@ -63,7 +65,7 @@ bool TestRectify1()
x->SetData(xData, xUnitNum); x->SetData(xData, xUnitNum);
y->SetZeroAll(); y->SetZeroAll();
/* call rectify function */ /* call Rectify function */
Rectify(x, y); Rectify(x, y);
/* check results */ /* check results */
...@@ -81,32 +83,41 @@ bool TestRectify1() ...@@ -81,32 +83,41 @@ bool TestRectify1()
xGPU->SetData(xData, xUnitNum); xGPU->SetData(xData, xUnitNum);
yGPU->SetZeroAll(); yGPU->SetZeroAll();
/* call rectify function */ /* call Rectify function */
Rectify(xGPU, yGPU); Rectify(xGPU, yGPU);
/* check results */ /* check results */
gpuTest = yGPU->CheckData(answer, yUnitNum); gpuTest = yGPU->CheckData(answer, yUnitNum);
/* destroy variables */ /* destroy variables */
delete x, y, xGPU, yGPU; delete x;
delete[] xDimSize, yDimSize; delete y;
delete xGPU;
delete yGPU;
delete[] xDimSize;
delete[] yDimSize;
return cpuTest && gpuTest; return cpuTest && gpuTest;
#else #else
/* destroy variables */ /* destroy variables */
delete x, y; delete x;
delete[] xDimSize, yDimSize; delete y;
delete[] xDimSize;
delete[] yDimSize;
return cpuTest; return cpuTest;
#endif // USE_CUDA #endif // USE_CUDA
} }
/* case 2: backward computation dE/dx = dE/dy * dy/dx rectified: y = max(0, x) /*
* In this case, lossName=CROSSENTROPY. case 2: backward computation
dE/dx = dE/dy * dy/dx
rectified: y = max(0, x)
In this case, lossName=CROSSENTROPY.
*/ */
bool TestRectify2() bool TestRectify2()
{ {
/* a x tensor of size 2 * 3 */ /* a x tensor of size (2, 3) */
int xOrder = 2; int xOrder = 2;
int * xDimSize = new int[xOrder]; int * xDimSize = new int[xOrder];
xDimSize[0] = 2; xDimSize[0] = 2;
...@@ -116,46 +127,6 @@ bool TestRectify2() ...@@ -116,46 +127,6 @@ bool TestRectify2()
for (int i = 0; i < xOrder; i++) for (int i = 0; i < xOrder; i++)
xUnitNum *= xDimSize[i]; xUnitNum *= xDimSize[i];
/* a y tensor of size 2 * 3 */
int yOrder = 2;
int * yDimSize = new int[yOrder];
yDimSize[0] = 2;
yDimSize[1] = 3;
int yUnitNum = 1;
for (int i = 0; i < yOrder; i++)
yUnitNum *= yDimSize[i];
/* a gold tensor of size 2 * 3 */
int goldOrder = 2;
int * goldDimSize = new int[goldOrder];
goldDimSize[0] = 2;
goldDimSize[1] = 3;
int goldUnitNum = 1;
for (int i = 0; i < goldOrder; i++)
goldUnitNum *= goldDimSize[i];
/* a dedy tensor of size 2 * 3 */
int dedyOrder = 2;
int * dedyDimSize = new int[dedyOrder];
dedyDimSize[0] = 2;
dedyDimSize[1] = 3;
int dedyUnitNum = 1;
for (int i = 0; i < dedyOrder; i++)
dedyUnitNum *= dedyDimSize[i];
/* a dedx tensor of size 2 * 3 */
int dedxOrder = 2;
int * dedxDimSize = new int[dedxOrder];
dedxDimSize[0] = 2;
dedxDimSize[1] = 3;
int dedxUnitNum = 1;
for (int i = 0; i < dedxOrder; i++)
dedxUnitNum *= dedxDimSize[i];
DTYPE xData[2][3] = { {1.0F, 1.0F, 2.0F}, DTYPE xData[2][3] = { {1.0F, 1.0F, 2.0F},
{2.0F, 4.0F, 5.0F} }; {2.0F, 4.0F, 5.0F} };
DTYPE yData[2][3] = { {1.0F, 1.0F, 2.0F}, DTYPE yData[2][3] = { {1.0F, 1.0F, 2.0F},
...@@ -172,150 +143,23 @@ bool TestRectify2() ...@@ -172,150 +143,23 @@ bool TestRectify2()
/* create tensors */ /* create tensors */
XTensor * x = NewTensor(xOrder, xDimSize); XTensor * x = NewTensor(xOrder, xDimSize);
XTensor * y = NewTensor(yOrder, yDimSize); XTensor * y = NewTensor(xOrder, xDimSize);
XTensor * gold = NewTensor(goldOrder, goldDimSize); XTensor * gold = NewTensor(xOrder, xDimSize);
XTensor * dedy = NewTensor(dedyOrder, dedyDimSize); XTensor * dedy = NewTensor(xOrder, xDimSize);
XTensor * dedx = NewTensor(dedxOrder, dedxDimSize); XTensor * dedx = NewTensor(xOrder, xDimSize);
/* initialize variables */
x->SetData(xData, xUnitNum);
y->SetData(yData, yUnitNum);
gold->SetData(goldData, goldUnitNum);
dedy->SetData(dedyData, dedyUnitNum);
dedx->SetZeroAll();
/* call rectifybackward function */
RectifyBackward(gold, y, x, dedy, dedx, CROSSENTROPY);
/* check results */
cpuTest = dedx->CheckData(answer, dedxUnitNum);
#ifdef USE_CUDA
/* GPU test */
bool gpuTest = true;
/* create tensors */
XTensor * xGPU = NewTensor(xOrder, xDimSize, X_FLOAT, 1.0F, 0);
XTensor * yGPU = NewTensor(yOrder, yDimSize, X_FLOAT, 1.0F, 0);
XTensor * goldGPU = NewTensor(goldOrder, goldDimSize, X_FLOAT, 1.0F, 0);
XTensor * dedyGPU = NewTensor(dedyOrder, dedyDimSize, X_FLOAT, 1.0F, 0);
XTensor * dedxGPU = NewTensor(dedxOrder, dedxDimSize, X_FLOAT, 1.0F, 0);
/* initialize variables */
xGPU->SetData(xData, xUnitNum);
yGPU->SetData(yData, yUnitNum);
goldGPU->SetData(goldData, goldUnitNum);
dedyGPU->SetData(dedyData, dedyUnitNum);
dedxGPU->SetZeroAll();
/* call rectifybackward function */
RectifyBackward(goldGPU, yGPU, xGPU, dedyGPU, dedxGPU, CROSSENTROPY);
/* check results */
gpuTest = dedxGPU->CheckData(answer, dedxUnitNum);
/* destroy variables */
delete x, y, dedy, dedx, gold, xGPU, yGPU, dedyGPU, dedxGPU, goldGPU;
delete[] xDimSize, yDimSize, dedyDimSize, dedxDimSize, goldDimSize;
return cpuTest && gpuTest;
#else
/* destroy variables */
delete x, y, dedy, dedx, gold;
delete[] xDimSize, yDimSize, dedyDimSize, dedxDimSize, goldDimSize;
return cpuTest;
#endif // USE_CUDA
}
/* case 3: backward computation dE/dx = dE/dy * dy/dx rectified: y = max(0, x)
* In this case, lossName=SQUAREDERROR.
*/
bool TestRectify3()
{
/* a x tensor of size 2 * 3 */
int xOrder = 2;
int * xDimSize = new int[xOrder];
xDimSize[0] = 2;
xDimSize[1] = 3;
int xUnitNum = 1;
for (int i = 0; i < xOrder; i++)
xUnitNum *= xDimSize[i];
/* a y tensor of size 2 * 3 */
int yOrder = 2;
int * yDimSize = new int[yOrder];
yDimSize[0] = 2;
yDimSize[1] = 3;
int yUnitNum = 1;
for (int i = 0; i < yOrder; i++)
yUnitNum *= yDimSize[i];
/* a gold tensor of size 2 * 3 */
int goldOrder = 2;
int * goldDimSize = new int[goldOrder];
goldDimSize[0] = 2;
goldDimSize[1] = 3;
int goldUnitNum = 1;
for (int i = 0; i < goldOrder; i++)
goldUnitNum *= goldDimSize[i];
/* a dedy tensor of size 2 * 3 */
int dedyOrder = 2;
int * dedyDimSize = new int[dedyOrder];
dedyDimSize[0] = 2;
dedyDimSize[1] = 3;
int dedyUnitNum = 1;
for (int i = 0; i < dedyOrder; i++)
dedyUnitNum *= dedyDimSize[i];
/* a dedx tensor of size 2 * 3 */
int dedxOrder = 2;
int * dedxDimSize = new int[dedxOrder];
dedxDimSize[0] = 2;
dedxDimSize[1] = 3;
int dedxUnitNum = 1;
for (int i = 0; i < dedxOrder; i++)
dedxUnitNum *= dedxDimSize[i];
DTYPE xData[2][3] = { {1.0, 1.0, 2.0},
{2.0, 4.0, 5.0} };
DTYPE yData[2][3] = { {1.0, 1.0, 2.0},
{2.0, 4.0, 5.0} };
DTYPE goldData[2][3] = { {1.0, 1.0, 1.0},
{1.0, 1.0, 1.0} };
DTYPE dedyData[2][3] = { {0.0, 0.0, 1.0},
{1.0, 3.0, 4.0} };
DTYPE answer[2][3] = { {0.0, 0.0, 1.0},
{1.0, 3.0, 4.0} };
/* CPU test */
bool cpuTest = true;
/* create tensors */
XTensor * x = NewTensor(xOrder, xDimSize);
XTensor * y = NewTensor(yOrder, yDimSize);
XTensor * gold = NewTensor(goldOrder, goldDimSize);
XTensor * dedy = NewTensor(dedyOrder, dedyDimSize);
XTensor * dedx = NewTensor(dedxOrder, dedxDimSize);
/* initialize variables */ /* initialize variables */
x->SetData(xData, xUnitNum); x->SetData(xData, xUnitNum);
y->SetData(yData, yUnitNum); y->SetData(yData, xUnitNum);
gold->SetData(goldData, goldUnitNum); gold->SetData(goldData, xUnitNum);
dedy->SetData(dedyData, dedyUnitNum); dedy->SetData(dedyData, xUnitNum);
dedx->SetZeroAll(); dedx->SetZeroAll();
/* call rectifybackward function */ /* call RectifyBackward function */
RectifyBackward(gold, y, x, dedy, dedx, CROSSENTROPY); RectifyBackward(gold, y, x, dedy, dedx, NOLOSS);
/* check results */ /* check results */
cpuTest = dedx->CheckData(answer, dedxUnitNum); cpuTest = dedx->CheckData(answer, xUnitNum);
#ifdef USE_CUDA #ifdef USE_CUDA
/* GPU test */ /* GPU test */
...@@ -323,160 +167,46 @@ bool TestRectify3() ...@@ -323,160 +167,46 @@ bool TestRectify3()
/* create tensors */ /* create tensors */
XTensor * xGPU = NewTensor(xOrder, xDimSize, X_FLOAT, 1.0F, 0); XTensor * xGPU = NewTensor(xOrder, xDimSize, X_FLOAT, 1.0F, 0);
XTensor * yGPU = NewTensor(yOrder, yDimSize, X_FLOAT, 1.0F, 0); XTensor * yGPU = NewTensor(xOrder, xDimSize, X_FLOAT, 1.0F, 0);
XTensor * goldGPU = NewTensor(goldOrder, goldDimSize, X_FLOAT, 1.0F, 0); XTensor * goldGPU = NewTensor(xOrder, xDimSize, X_FLOAT, 1.0F, 0);
XTensor * dedyGPU = NewTensor(dedyOrder, dedyDimSize, X_FLOAT, 1.0F, 0); XTensor * dedyGPU = NewTensor(xOrder, xDimSize, X_FLOAT, 1.0F, 0);
XTensor * dedxGPU = NewTensor(dedxOrder, dedxDimSize, X_FLOAT, 1.0F, 0); XTensor * dedxGPU = NewTensor(xOrder, xDimSize, X_FLOAT, 1.0F, 0);
/* initialize variables */ /* initialize variables */
xGPU->SetData(xData, xUnitNum); xGPU->SetData(xData, xUnitNum);
yGPU->SetData(yData, yUnitNum); yGPU->SetData(yData, xUnitNum);
goldGPU->SetData(goldData, goldUnitNum); goldGPU->SetData(goldData, xUnitNum);
dedyGPU->SetData(dedyData, dedyUnitNum); dedyGPU->SetData(dedyData, xUnitNum);
dedxGPU->SetZeroAll(); dedxGPU->SetZeroAll();
/* call rectifybackward function */ /* call rectifybackward function */
RectifyBackward(goldGPU, yGPU, xGPU, dedyGPU, dedxGPU, CROSSENTROPY); RectifyBackward(goldGPU, yGPU, xGPU, dedyGPU, dedxGPU, NOLOSS);
/* check results */ /* check results */
gpuTest = dedxGPU->CheckData(answer, dedxUnitNum); gpuTest = dedxGPU->CheckData(answer, xUnitNum);
/* destroy variables */ /* destroy variables */
delete x, y, dedy, dedx, gold, xGPU, yGPU, dedyGPU, dedxGPU, goldGPU; delete x;
delete[] xDimSize, yDimSize, dedyDimSize, dedxDimSize, goldDimSize; delete y;
delete dedy;
delete dedx;
delete gold;
delete xGPU;
delete yGPU;
delete dedyGPU;
delete dedxGPU;
delete goldGPU;
delete[] xDimSize;
return cpuTest && gpuTest; return cpuTest && gpuTest;
#else #else
/* destroy variables */ /* destroy variables */
delete x, y, dedy, dedx, gold; delete x;
delete[] xDimSize, yDimSize, dedyDimSize, dedxDimSize, goldDimSize; delete y;
delete dedy;
return cpuTest; delete dedx;
#endif // USE_CUDA delete gold;
} delete[] xDimSize;
/* case 4: backward computation dE/dx = dE/dy * dy/dx rectified: y = max(0, x)
* In this case, lossName=ONEHOTERROR.
*/
bool TestRectify4()
{
/* a x tensor of size 2 * 3 */
int xOrder = 2;
int * xDimSize = new int[xOrder];
xDimSize[0] = 2;
xDimSize[1] = 3;
int xUnitNum = 1;
for (int i = 0; i < xOrder; i++)
xUnitNum *= xDimSize[i];
/* a y tensor of size 2 * 3 */
int yOrder = 2;
int * yDimSize = new int[yOrder];
yDimSize[0] = 2;
yDimSize[1] = 3;
int yUnitNum = 1;
for (int i = 0; i < yOrder; i++)
yUnitNum *= yDimSize[i];
/* a gold tensor of size 2 * 3 */
int goldOrder = 2;
int * goldDimSize = new int[goldOrder];
goldDimSize[0] = 2;
goldDimSize[1] = 3;
int goldUnitNum = 1;
for (int i = 0; i < goldOrder; i++)
goldUnitNum *= goldDimSize[i];
/* a dedy tensor of size 2 * 3 */
int dedyOrder = 2;
int * dedyDimSize = new int[dedyOrder];
dedyDimSize[0] = 2;
dedyDimSize[1] = 3;
int dedyUnitNum = 1;
for (int i = 0; i < dedyOrder; i++)
dedyUnitNum *= dedyDimSize[i];
/* a dedx tensor of size 2 * 3 */
int dedxOrder = 2;
int * dedxDimSize = new int[dedxOrder];
dedxDimSize[0] = 2;
dedxDimSize[1] = 3;
int dedxUnitNum = 1;
for (int i = 0; i < dedxOrder; i++)
dedxUnitNum *= dedxDimSize[i];
DTYPE xData[2][3] = { {1.0, 1.0, -2.0},
{2.0, 4.0, 5.0} };
DTYPE yData[2][3] = { {1.0, 1.0, 0.0},
{2.0, 4.0, 5.0} };
DTYPE goldData[2][3] = { {1.0, 0.0, 1.0},
{1.0, 1.0, 0.0} };
DTYPE dedyData[2][3] = { {0.0, 0.0, -1.0},
{1.0, 3.0, 0.0} };
DTYPE answer[2][3] = { {0.0, 0.0, 0.0},
{1.0, 3.0, 0.0} };
/* CPU test */
bool cpuTest = true;
/* create tensors */
XTensor * x = NewTensor(xOrder, xDimSize);
XTensor * y = NewTensor(yOrder, yDimSize);
XTensor * gold = NewTensor(goldOrder, goldDimSize);
XTensor * dedy = NewTensor(dedyOrder, dedyDimSize);
XTensor * dedx = NewTensor(dedxOrder, dedxDimSize);
/* initialize variables */
x->SetData(xData, xUnitNum);
y->SetData(yData, yUnitNum);
gold->SetData(goldData, goldUnitNum);
dedy->SetData(dedyData, dedyUnitNum);
dedx->SetZeroAll();
/* call rectifybackward function */
RectifyBackward(gold, y, x, dedy, dedx, ONEHOTERROR);
/* check results */
cpuTest = dedx->CheckData(answer, dedxUnitNum);
#ifdef USE_CUDA
/* GPU test */
bool gpuTest = true;
/* create tensors */
XTensor * xGPU = NewTensor(xOrder, xDimSize, X_FLOAT, 1.0F, 0);
XTensor * yGPU = NewTensor(yOrder, yDimSize, X_FLOAT, 1.0F, 0);
XTensor * goldGPU = NewTensor(goldOrder, goldDimSize, X_FLOAT, 1.0F, 0);
XTensor * dedyGPU = NewTensor(dedyOrder, dedyDimSize, X_FLOAT, 1.0F, 0);
XTensor * dedxGPU = NewTensor(dedxOrder, dedxDimSize, X_FLOAT, 1.0F, 0);
/* initialize variables */
xGPU->SetData(xData, xUnitNum);
yGPU->SetData(yData, yUnitNum);
goldGPU->SetData(goldData, goldUnitNum);
dedyGPU->SetData(dedyData, dedyUnitNum);
dedxGPU->SetZeroAll();
/* call rectifybackward function */
RectifyBackward(goldGPU, yGPU, xGPU, dedyGPU, dedxGPU, CROSSENTROPY);
/* check results */
gpuTest = dedxGPU->CheckData(answer, dedxUnitNum);
/* destroy variables */
delete x, y, dedy, dedx, gold, xGPU, yGPU, dedyGPU, dedxGPU, goldGPU;
delete[] xDimSize, yDimSize, dedyDimSize, dedxDimSize, goldDimSize;
return cpuTest && gpuTest;
#else
/* destroy variables */
delete x, y, dedy, dedx, gold;
delete[] xDimSize, yDimSize, dedyDimSize, dedxDimSize, goldDimSize;
return cpuTest; return cpuTest;
#endif // USE_CUDA #endif // USE_CUDA
...@@ -488,10 +218,9 @@ TODO!! ...@@ -488,10 +218,9 @@ TODO!!
*/ */
/* test for Rectify Function */ /* test for Rectify Function */
extern "C"
bool TestRectify() bool TestRectify()
{ {
XPRINT(0, stdout, "[TEST RECTIFY] -------------\n"); XPRINT(0, stdout, "[TEST RECTIFY] test rectify and its backward computation \n");
bool returnFlag = true, caseFlag = true; bool returnFlag = true, caseFlag = true;
/* case 1 test */ /* case 1 test */
...@@ -514,26 +243,6 @@ bool TestRectify() ...@@ -514,26 +243,6 @@ bool TestRectify()
else else
XPRINT(0, stdout, ">> case 2 passed!\n"); XPRINT(0, stdout, ">> case 2 passed!\n");
/* case 3 test */
caseFlag = TestRectify3();
if (!caseFlag) {
returnFlag = false;
XPRINT(0, stdout, ">> case 3 failed!\n");
}
else
XPRINT(0, stdout, ">> case 3 passed!\n");
/* case 4 test */
caseFlag = TestRectify4();
if (!caseFlag) {
returnFlag = false;
XPRINT(0, stdout, ">> case 4 failed!\n");
}
else
XPRINT(0, stdout, ">> case 4 passed!\n");
/* other cases test */ /* other cases test */
/* /*
TODO!! TODO!!
......
...@@ -16,212 +16,129 @@ ...@@ -16,212 +16,129 @@
*/ */
/* /*
* $Created by: LI Yinqiao (email: li.yin.qiao.2012@hotmail.com) 2018-04-30 * $Created by: Xu Chen (email: hello_master1954@163.com) 2018-06-30
*/ */
#include "../XTensor.h" #include "TReduceMax.h"
#include "../XDevice.h"
#include "../core/ReduceMax.h"
namespace nts { // namespace nt(NiuTrans.Tensor) namespace nts { // namespace nts(NiuTrans.Tensor)
/* case 1 */
bool TestReduceMax1()
{
/* a tensor of size 2 * 4 */
int order = 2;
int order_reduce = 1;
int * dimSize = new int[order];
dimSize[0] = 2;
dimSize[1] = 4;
int unitNum = 1; /*
for (int i = 0; i < order; i++) case 1: get the max value of the items along a dimension of the tensor.
unitNum *= dimSize[i]; In this case,
/* a tensor of size 4 */ (2, 4) -> (4), dim = 0
int * dimSize_reduce_a = new int[order_reduce]; (2, 4) -> (2), dim = 1
dimSize_reduce_a[0] = 4; */
bool TestReduceMax1()
int unitNum_a = 1; {
for (int i = 0; i < order_reduce; i++) /* a input tensor of size (2, 4) */
unitNum_a *= dimSize_reduce_a[i]; int sOrder = 2;
/* a tensor of size 2 */ int * sDimSize = new int[sOrder];
int * dimSize_reduce_b = new int[order_reduce]; sDimSize[0] = 2;
dimSize_reduce_b[0] = 2; sDimSize[1] = 4;
int unitNum_b = 1; int sUnitNum = 1;
for (int i = 0; i < order_reduce; i++) for (int i = 0; i < sOrder; i++)
unitNum_b *= dimSize_reduce_b[i]; sUnitNum *= sDimSize[i];
/* a output tensor of size (4) */
DTYPE aData[2][4] = { { 0.0, 1.0, 2.0, 3.0 }, int tOrder1 = 1;
{ 4.0, 5.0, 6.0, 7.0 } }; int * tDimSize1 = new int[tOrder1];
DTYPE bData[2][4] = { { 1.0, -1.0, -3.0, -5.0 }, tDimSize1[0] = 4;
{ -7.0, -9.0, -11.0, -13.0 } };
DTYPE answer_a[4] = { 4.0, 5.0, 6.0, 7.0 }; int tUnitNum1 = 1;
DTYPE answer_b[2] = { 1.0, -7.0 }; for (int i = 0; i < tOrder1; i++)
tUnitNum1 *= tDimSize1[i];
/* a output tensor of size (2) */
int tOrder2 = 1;
int * tDimSize2 = new int[tOrder2];
tDimSize2[0] = 2;
int tUnitNum2 = 1;
for (int i = 0; i < tOrder2; i++)
tUnitNum2 *= tDimSize2[i];
DTYPE sData[2][4] = { {0.0F, 5.0F, 2.0F, 3.0F},
{4.0F, 1.0F, 6.0F, 7.0F} };
DTYPE answer1[4] = {4.0F, 5.0F, 6.0F, 7.0F};
DTYPE answer2[2] = {5.0F, 7.0F};
/* CPU test */ /* CPU test */
bool cpuTest = true; bool cpuTest = true;
/* create tensors */ /* create tensors */
XTensor * a = NewTensor(order, dimSize); XTensor * s = NewTensor(sOrder, sDimSize);
XTensor * reduce_a = NewTensor(order_reduce, dimSize_reduce_a); XTensor * t1 = NewTensor(tOrder1, tDimSize1);
XTensor * b = NewTensor(order, dimSize); XTensor * t2 = NewTensor(tOrder2, tDimSize2);
XTensor * reduce_b = NewTensor(order_reduce, dimSize_reduce_b);
/* initialize variables */ /* initialize variables */
a->SetData(aData, unitNum); s->SetData(sData, sUnitNum);
b->SetData(bData, unitNum); t1->SetZeroAll();
t2->SetZeroAll();
/* call reduce max function */ /* call ReduceMax function */
ReduceMax(a, reduce_a, 0); ReduceMax(s, t1, 0);
ReduceMax(b, reduce_b, 1); ReduceMax(s, t2, 1);
//DTYPE* reduce_a_data = (DTYPE*)reduce_a->data;
//for (int i = 0; i < unitNum_a; i++)
// printf("%f ", *reduce_a_data++);
//printf("\n");
//DTYPE* reduce_b_data = (DTYPE*)reduce_b->data;
//for (int i = 0; i < unitNum_b; i++)
// printf("%f ", *reduce_b_data++);
/* check results */ /* check results */
cpuTest = reduce_a->CheckData(answer_a, unitNum_a) && reduce_b->CheckData(answer_b, unitNum_b); cpuTest = t1->CheckData(answer1, tUnitNum1) && t2->CheckData(answer2, tUnitNum2);
#ifdef USE_CUDA #ifdef USE_CUDA
/* GPU test */ /* GPU test */
bool gpuTest = true; bool gpuTest = true;
/* create tensor */
XTensor * aGPU = NewTensor(order, dimSize, X_FLOAT);
XTensor * reduce_aGPU = NewTensor(order_reduce, dimSize_reduce_a, X_FLOAT);
XTensor * bGPU = NewTensor(order, dimSize, X_FLOAT);
XTensor * reduce_bGPU = NewTensor(order_reduce, dimSize_reduce_b, X_FLOAT);
/* Initialize variables */
aGPU->SetData(aData, unitNum);
bGPU->SetData(bData, unitNum);
/* call reduce max function */
ReduceMax(aGPU, reduce_aGPU, 0);
ReduceMax(bGPU, reduce_bGPU, 1);
/* check results */
gpuTest = reduce_aGPU->CheckData(answer_a, unitNum_a) && reduce_bGPU->CheckData(answer_b, unitNum_b);
/* destroy variables */
delete aGPU, bGPU, reduce_aGPU, reduce_bGPU;
delete[] dimSize, dimSize_reduce_a, dimSize_reduce_b;
return cpuTest && gpuTest;
#else
/* destroy variables */
delete a;
delete b;
return cpuTest;
#endif // USE_CUDA
}
bool TestReduceMaxForLargescale()
{
/* a tensor of size 10000 * 500 */
int order = 2;
int order_reduce = 1;
int * dimSize = new int[order];
dimSize[0] = 10000;
dimSize[1] = 500;
int unitNum = 1;
for (int i = 0; i < order; i++)
unitNum *= dimSize[i];
/* a tensor of size 500 */
int * dimSize_reduce_a = new int[order_reduce];
dimSize_reduce_a[0] = 500;
int unitNum_a = 1;
for (int i = 0; i < order_reduce; i++)
unitNum_a *= dimSize_reduce_a[i];
/* a tensor of size 10000 */
int * dimSize_reduce_b = new int[order_reduce];
dimSize_reduce_b[0] = 10000;
int unitNum_b = 1;
for (int i = 0; i < order_reduce; i++)
unitNum_b *= dimSize_reduce_b[i];
DTYPE * data = new DTYPE[5000000];
DTYPE * tmp = data;
for (int i = 0; i < unitNum; i++)
*tmp++ = 1;
DTYPE answer_a[500];
for (int i = 0; i < unitNum_a; i++)
answer_a[i] = 1;
DTYPE answer_b[10000];
for (int i = 0; i < unitNum_b; i++)
answer_b[i] = 1;
/* CPU test */
bool cpuTest = true;
/* create tensors */ /* create tensors */
XTensor * a = NewTensor(order, dimSize); XTensor * sGPU = NewTensor(sOrder, sDimSize, X_FLOAT, 1.0F, 0);
XTensor * reduce_a = NewTensor(order_reduce, dimSize_reduce_a); XTensor * tGPU1 = NewTensor(tOrder1, tDimSize1, X_FLOAT, 1.0F, 0);
XTensor * b = NewTensor(order, dimSize); XTensor * tGPU2 = NewTensor(tOrder2, tDimSize2, X_FLOAT, 1.0F, 0);
XTensor * reduce_b = NewTensor(order_reduce, dimSize_reduce_b);
/* initialize variables */ /* initialize variables */
a->SetData(data, unitNum); sGPU->SetData(sData, sUnitNum);
b->SetData(data, unitNum); tGPU1->SetZeroAll();
/* call reduce max function */ tGPU2->SetZeroAll();
ReduceMax(a, reduce_a, 0);
ReduceMax(b, reduce_b, 1);
/* check results */
cpuTest = reduce_a->CheckData(answer_a, unitNum_a) && reduce_b->CheckData(answer_b, unitNum_b);
#ifdef USE_CUDA /* call ReduceMax function */
/* GPU test */ ReduceMax(sGPU, tGPU1, 0);
bool gpuTest = true; ReduceMax(sGPU, tGPU2, 1);
/* create tensor */
XTensor * aGPU = NewTensor(order, dimSize, X_FLOAT);
XTensor * reduce_aGPU = NewTensor(order_reduce, dimSize_reduce_a, X_FLOAT);
XTensor * bGPU = NewTensor(order, dimSize, X_FLOAT);
XTensor * reduce_bGPU = NewTensor(order_reduce, dimSize_reduce_b, X_FLOAT);
/* Initialize variables */
aGPU->SetData(data, unitNum);
bGPU->SetData(data, unitNum);
/* call reduce max function */
ReduceMax(aGPU, reduce_aGPU, 0);
ReduceMax(bGPU, reduce_bGPU, 1);
/* check results */ /* check results */
gpuTest = reduce_aGPU->CheckData(answer_a, unitNum_a) && reduce_bGPU->CheckData(answer_b, unitNum_b); gpuTest = tGPU1->CheckData(answer1, tUnitNum1) && tGPU2->CheckData(answer2, tUnitNum2);
/* destroy variables */ /* destroy variables */
delete aGPU, bGPU, reduce_aGPU, reduce_bGPU; delete s;
delete[] dimSize, dimSize_reduce_a, dimSize_reduce_b; delete t1;
delete t2;
delete sGPU;
delete tGPU1;
delete tGPU2;
delete[] sDimSize;
delete[] tDimSize1;
delete[] tDimSize2;
return cpuTest && gpuTest; return cpuTest && gpuTest;
#else #else
/* destroy variables */ /* destroy variables */
delete a; delete s;
delete b; delete t1;
delete t2;
delete[] sDimSize;
delete[] tDimSize1;
delete[] tDimSize2;
return cpuTest; return cpuTest;
#endif // USE_CUDA #endif // USE_CUDA
} }
/* other cases */ /* other cases */
/* /*
TODO!! TODO!!
*/ */
/* test for Sum Function */ /* test for ReduceMax Function */
extern "C" bool TestReduceMax()
bool TestReduceMax() {
{ XPRINT(0, stdout, "[TEST ReduceMax] get the max value of the items along a dimension of the tensor\n");
XPRINT(0, stdout, "[TEST ReduceMax]\n");
bool returnFlag = true, caseFlag = true; bool returnFlag = true, caseFlag = true;
/* case 1 test */ /* case 1 test */
...@@ -233,19 +150,10 @@ namespace nts { // namespace nt(NiuTrans.Tensor) ...@@ -233,19 +150,10 @@ namespace nts { // namespace nt(NiuTrans.Tensor)
else else
XPRINT(0, stdout, ">> case 1 passed!\n"); XPRINT(0, stdout, ">> case 1 passed!\n");
/* case 2 test */ /* other cases test */
caseFlag = TestReduceMaxForLargescale(); /*
if (!caseFlag) { TODO!!
returnFlag = false; */
XPRINT(0, stdout, ">> case 2 failed!\n");
}
else
XPRINT(0, stdout, ">> case 2 passed!\n");
///* other cases test */
///*
//TODO!!
//*/
if (returnFlag) { if (returnFlag) {
XPRINT(0, stdout, ">> All Passed!\n"); XPRINT(0, stdout, ">> All Passed!\n");
...@@ -258,4 +166,4 @@ namespace nts { // namespace nt(NiuTrans.Tensor) ...@@ -258,4 +166,4 @@ namespace nts { // namespace nt(NiuTrans.Tensor)
return returnFlag; return returnFlag;
} }
} // namespace nt(NiuTrans.Tensor) } // namespace nts(NiuTrans.Tensor)
...@@ -16,20 +16,19 @@ ...@@ -16,20 +16,19 @@
*/ */
/* /*
* $Created by: LI Yinqiao (email: li.yin.qiao.2012@hotmail.com) 2018-04-30 * $Created by: Xu Chen (email: hello_master1954@163.com) 2018-06-30
*/ */
#ifndef __TEST_REDUCEMAX_H__ #ifndef __TEST_REDUCEMAX_H__
#define __TEST_REDUCEMAX_H__ #define __TEST_REDUCEMAX_H__
#include "../core/ReduceMax.h" #include "../core/reduce/ReduceMax.h"
namespace nts { // namespace nt(NiuTrans.Tensor) namespace nts { // namespace nts(NiuTrans.Tensor)
/* test for ReduceMax Function */ /* test for ReduceMax Function */
extern "C" extern "C"
bool TestReduceMax(); bool TestReduceMax();
} // namespace nt(NiuTrans.Tensor) } // namespace nts(NiuTrans.Tensor)
#endif // __TEST_REDUCEMAX_H__ #endif // __TEST_REDUCEMAX_H__
...@@ -19,211 +19,121 @@ ...@@ -19,211 +19,121 @@
* $Created by: LI Yinqiao (email: li.yin.qiao.2012@hotmail.com) 2018-04-30 * $Created by: LI Yinqiao (email: li.yin.qiao.2012@hotmail.com) 2018-04-30
*/ */
#include "../XTensor.h" #include "TReduceMean.h"
#include "../XDevice.h"
#include "../core/ReduceMean.h"
#include "../core/ReduceMax.h"
#include "../core/ReduceSum.h"
namespace nts { // namespace nt(NiuTrans.Tensor) namespace nts { // namespace nt(NiuTrans.Tensor)
/* case 1 */
bool TestReduceMean1() /* case 1: get the mean value along a dimension of the tensor */
{ bool TestReduceMean1()
/* a tensor of size 2 * 4 */ {
int order = 2; /* a tensor of size (2, 4) */
int order_reduce = 1; int sOrder = 2;
int * dimSize = new int[order]; int * sDimSize = new int[sOrder];
dimSize[0] = 2; sDimSize[0] = 2;
dimSize[1] = 4; sDimSize[1] = 4;
int unitNum = 1; int sUnitNum = 1;
for (int i = 0; i < order; i++) for (int i = 0; i < sOrder; i++)
unitNum *= dimSize[i]; sUnitNum *= sDimSize[i];
/* a tensor of size 4 */
int * dimSize_reduce_a = new int[order_reduce]; /* a tensor of size (4) */
dimSize_reduce_a[0] = 4; int tOrder1 = 1;
int * tDimSize1 = new int[tOrder1];
int unitNum_a = 1; tDimSize1[0] = 4;
for (int i = 0; i < order_reduce; i++)
unitNum_a *= dimSize_reduce_a[i]; int tUnitNum1 = 1;
/* a tensor of size 2 */ for (int i = 0; i < tOrder1; i++)
int * dimSize_reduce_b = new int[order_reduce]; tUnitNum1 *= tDimSize1[i];
dimSize_reduce_b[0] = 2;
/* a tensor of size (2) */
int unitNum_b = 1; int tOrder2 = 1;
for (int i = 0; i < order_reduce; i++) int * tDimSize2 = new int[tOrder2];
unitNum_b *= dimSize_reduce_b[i]; tDimSize2[0] = 2;
int tUnitNum2 = 1;
DTYPE aData[2][4] = { { 0.0, 1.0, 2.0, 3.0 }, for (int i = 0; i < tOrder2; i++)
{ 4.0, 5.0, 6.0, 7.0 } }; tUnitNum2 *= tDimSize2[i];
DTYPE bData[2][4] = { { 1.0, -1.0, -3.0, -5.0 },
{ -7.0, -9.0, -11.0, -13.0 } }; DTYPE sData[2][4] = { {0.0F, 1.0F, 2.0F, 3.0F},
DTYPE answer_a[4] = { 2.0, 3.0, 4.0, 5.0 }; {4.0F, 5.0F, 6.0F, 7.0F} };
DTYPE answer_b[2] = { -2.0, -10.0 }; DTYPE answer1[4] = {2.0F, 3.0F, 4.0F, 5.0F};
DTYPE answer2[2] = {1.5F, 5.5F};
/* CPU test */ /* CPU test */
bool cpuTest = true; bool cpuTest = true;
/* create tensors */ /* create tensors */
XTensor * a = NewTensor(order, dimSize); XTensor * s = NewTensor(sOrder, sDimSize);
XTensor * reduce_a = NewTensor(order_reduce, dimSize_reduce_a); XTensor * t1 = NewTensor(tOrder1, tDimSize1);
XTensor * b = NewTensor(order, dimSize); XTensor * t2 = NewTensor(tOrder2, tDimSize2);
XTensor * reduce_b = NewTensor(order_reduce, dimSize_reduce_b);
/* initialize variables */ /* initialize variables */
a->SetData(aData, unitNum); s->SetData(sData, sUnitNum);
b->SetData(bData, unitNum); t1->SetZeroAll();
t2->SetZeroAll();
/* call reduce mean function */
ReduceMean(a, reduce_a, 0);
ReduceMean(b, reduce_b, 1);
//DTYPE* reduce_a_data = (DTYPE*)reduce_a->data; /* call ReduceMean function */
//for (int i = 0; i < unitNum_a; i++) ReduceMean(s, t1, 0);
// printf("%f ", *reduce_a_data++); ReduceMean(s, t2, 1);
//printf("\n");
//DTYPE* reduce_b_data = (DTYPE*)reduce_b->data;
//for (int i = 0; i < unitNum_b; i++)
// printf("%f ", *reduce_b_data++);
/* check results */ /* check results */
cpuTest = reduce_a->CheckData(answer_a, unitNum_a) && reduce_b->CheckData(answer_b, unitNum_b); cpuTest = t1->CheckData(answer1, tUnitNum1) && t2->CheckData(answer2, tUnitNum2);
#ifdef USE_CUDA #ifdef USE_CUDA
/* GPU test */ /* GPU test */
bool gpuTest = true; bool gpuTest = true;
/* create tensor */ /* create tensor */
XTensor * aGPU = NewTensor(order, dimSize, X_FLOAT); XTensor * sGPU = NewTensor(sOrder, sDimSize, X_FLOAT, 1.0F, 0);
XTensor * reduce_aGPU = NewTensor(order_reduce, dimSize_reduce_a, X_FLOAT); XTensor * tGPU1 = NewTensor(tOrder1, tDimSize1, X_FLOAT, 1.0F, 0);
XTensor * bGPU = NewTensor(order, dimSize, X_FLOAT); XTensor * tGPU2 = NewTensor(tOrder2, tDimSize2, X_FLOAT, 1.0F, 0);
XTensor * reduce_bGPU = NewTensor(order_reduce, dimSize_reduce_b, X_FLOAT);
/* Initialize variables */ /* Initialize variables */
aGPU->SetData(aData, unitNum); sGPU->SetData(sData, sUnitNum);
bGPU->SetData(bData, unitNum); tGPU1->SetZeroAll();
tGPU2->SetZeroAll();
/* call reduce mean function */ /* call ReduceMean function */
ReduceMean(aGPU, reduce_aGPU, 0); ReduceMean(sGPU, tGPU1, 0);
ReduceMean(bGPU, reduce_bGPU, 1); ReduceMean(sGPU, tGPU2, 1);
/* check results */ /* check results */
gpuTest = reduce_aGPU->CheckData(answer_a, unitNum_a) && reduce_bGPU->CheckData(answer_b, unitNum_b); cpuTest = tGPU1->CheckData(answer1, tUnitNum1) && tGPU2->CheckData(answer2, tUnitNum2);
/* destroy variables */ /* destroy variables */
delete aGPU, bGPU, reduce_aGPU, reduce_bGPU; delete s;
delete dimSize, dimSize_reduce_a, dimSize_reduce_b; delete t1;
return cpuTest && gpuTest; delete t2;
#else delete sGPU;
/* destroy variables */ delete tGPU1;
delete a; delete tGPU2;
delete b; delete[] sDimSize;
return cpuTest; delete[] tDimSize1;
#endif // USE_CUDA delete[] tDimSize2;
}
bool TestReduceMeanForLargescale()
{
/* a tensor of size 10000 * 500 */
int order = 2;
int order_reduce = 1;
int * dimSize = new int[order];
dimSize[0] = 10000;
dimSize[1] = 500;
int unitNum = 1;
for (int i = 0; i < order; i++)
unitNum *= dimSize[i];
/* a tensor of size 500 */
int * dimSize_reduce_a = new int[order_reduce];
dimSize_reduce_a[0] = 500;
int unitNum_a = 1;
for (int i = 0; i < order_reduce; i++)
unitNum_a *= dimSize_reduce_a[i];
/* a tensor of size 10000 */
int * dimSize_reduce_b = new int[order_reduce];
dimSize_reduce_b[0] = 10000;
int unitNum_b = 1;
for (int i = 0; i < order_reduce; i++)
unitNum_b *= dimSize_reduce_b[i];
DTYPE * data = new DTYPE[5000000];
DTYPE * tmp = data;
for (int i = 0; i < unitNum; i++)
*tmp++ = 1;
DTYPE answer_a[500];
for (int i = 0; i < unitNum_a; i++)
answer_a[i] = 1;
DTYPE answer_b[10000];
for (int i = 0; i < unitNum_b; i++)
answer_b[i] = 1;
/* CPU test */
bool cpuTest = true;
/* create tensors */
XTensor * a = NewTensor(order, dimSize);
XTensor * reduce_a = NewTensor(order_reduce, dimSize_reduce_a);
XTensor * b = NewTensor(order, dimSize);
XTensor * reduce_b = NewTensor(order_reduce, dimSize_reduce_b);
/* initialize variables */
a->SetData(data, unitNum);
b->SetData(data, unitNum);
/* call reduce max function */
ReduceMean(a, reduce_a, 0);
ReduceMean(b, reduce_b, 1);
/* check results */
cpuTest = reduce_a->CheckData(answer_a, unitNum_a) && reduce_b->CheckData(answer_b, unitNum_b);
#ifdef USE_CUDA
/* GPU test */
bool gpuTest = true;
/* create tensor */
XTensor * aGPU = NewTensor(order, dimSize, X_FLOAT);
XTensor * reduce_aGPU = NewTensor(order_reduce, dimSize_reduce_a, X_FLOAT);
XTensor * bGPU = NewTensor(order, dimSize, X_FLOAT);
XTensor * reduce_bGPU = NewTensor(order_reduce, dimSize_reduce_b, X_FLOAT);
/* Initialize variables */
aGPU->SetData(data, unitNum);
bGPU->SetData(data, unitNum);
/* call reduce max function */
ReduceMean(aGPU, reduce_aGPU, 0);
ReduceMean(bGPU, reduce_bGPU, 1);
/* check results */
gpuTest = reduce_aGPU->CheckData(answer_a, unitNum_a) && reduce_bGPU->CheckData(answer_b, unitNum_b);
/* destroy variables */
delete aGPU, bGPU, reduce_aGPU, reduce_bGPU;
delete[] dimSize, dimSize_reduce_a, dimSize_reduce_b;
return cpuTest && gpuTest; return cpuTest && gpuTest;
#else #else
/* destroy variables */ /* destroy variables */
delete a; delete s;
delete b; delete t1;
delete t2;
delete[] sDimSize;
delete[] tDimSize1;
delete[] tDimSize2;
return cpuTest; return cpuTest;
#endif // USE_CUDA #endif // USE_CUDA
} }
/* other cases */
/* /* other cases */
TODO!! /*
*/ TODO!!
*/
/* test for Sum Function */ /* test for ReduceMean Function */
extern "C" bool TestReduceMean()
bool TestReduceMean() {
{ XPRINT(0, stdout, "[TEST ReduceMean] get the mean value along a dimension of the tensor \n");
XPRINT(0, stdout, "[TEST ReduceMean]\n");
bool returnFlag = true, caseFlag = true; bool returnFlag = true, caseFlag = true;
/* case 1 test */ /* case 1 test */
...@@ -235,14 +145,6 @@ namespace nts { // namespace nt(NiuTrans.Tensor) ...@@ -235,14 +145,6 @@ namespace nts { // namespace nt(NiuTrans.Tensor)
else else
XPRINT(0, stdout, ">> case 1 passed!\n"); XPRINT(0, stdout, ">> case 1 passed!\n");
/* case 2 test */
caseFlag = TestReduceMeanForLargescale();
if (!caseFlag) {
returnFlag = false;
XPRINT(0, stdout, ">> case 2 failed!\n");
}
else
XPRINT(0, stdout, ">> case 2 passed!\n");
///* other cases test */ ///* other cases test */
///* ///*
//TODO!! //TODO!!
...@@ -257,6 +159,6 @@ namespace nts { // namespace nt(NiuTrans.Tensor) ...@@ -257,6 +159,6 @@ namespace nts { // namespace nt(NiuTrans.Tensor)
XPRINT(0, stdout, "\n"); XPRINT(0, stdout, "\n");
return returnFlag; return returnFlag;
} }
} // namespace nt(NiuTrans.Tensor) } // namespace nts(NiuTrans.Tensor)
...@@ -22,15 +22,15 @@ ...@@ -22,15 +22,15 @@
#ifndef __TEST_REDUCEMEAN_H__ #ifndef __TEST_REDUCEMEAN_H__
#define __TEST_REDUCEMEAN_H__ #define __TEST_REDUCEMEAN_H__
#include "../core/ReduceMean.h" #include "../core/reduce/ReduceMean.h"
namespace nts { // namespace nt(NiuTrans.Tensor) namespace nts { // namespace nts(NiuTrans.Tensor)
/* test for ReduceMean Function */ /* test for ReduceMean Function */
extern "C" extern "C"
bool TestReduceMean(); bool TestReduceMean();
} // namespace nt(NiuTrans.Tensor) } // namespace nts(NiuTrans.Tensor)
#endif // __TEST_REDUCEMEAN_H__ #endif // __TEST_REDUCEMEAN_H__
...@@ -19,210 +19,126 @@ ...@@ -19,210 +19,126 @@
* $Created by: LI Yinqiao (email: li.yin.qiao.2012@hotmail.com) 2018-04-30 * $Created by: LI Yinqiao (email: li.yin.qiao.2012@hotmail.com) 2018-04-30
*/ */
#include "../XTensor.h" #include "TReduceSum.h"
#include "../XDevice.h"
#include "../core/ReduceMean.h"
#include "../core/ReduceMax.h"
#include "../core/ReduceSum.h"
namespace nts { // namespace nt(NiuTrans.Tensor) namespace nts { // namespace nts(NiuTrans.Tensor)
/* case 1 */
bool TestReduceSum1()
{
/* a tensor of size 2 * 4 */
int order = 2;
int order_reduce = 1;
int * dimSize = new int[order];
dimSize[0] = 2;
dimSize[1] = 4;
int unitNum = 1; /*
for (int i = 0; i < order; i++) case 1: sum the items along a dimension of the tensor.
unitNum *= dimSize[i]; In this case,
/* a tensor of size 4 */ (2, 4) -> (4), dim = 0
int * dimSize_reduce_a = new int[order_reduce]; (2, 4) -> (2), dim = 1
dimSize_reduce_a[0] = 4; */
bool TestReduceSum1()
int unitNum_a = 1; {
for (int i = 0; i < order_reduce; i++) /* a tensor of size (2, 4) */
unitNum_a *= dimSize_reduce_a[i]; int sOrder = 2;
/* a tensor of size 2 */ int * sDimSize = new int[sOrder];
int * dimSize_reduce_b = new int[order_reduce]; sDimSize[0] = 2;
dimSize_reduce_b[0] = 2; sDimSize[1] = 4;
int unitNum_b = 1; int sUnitNum = 1;
for (int i = 0; i < order_reduce; i++) for (int i = 0; i < sOrder; i++)
unitNum_b *= dimSize_reduce_b[i]; sUnitNum *= sDimSize[i];
DTYPE aData[2][4] = { { 0.0, 1.0, 2.0, 3.0 }, /* a tensor of size (4) */
{ 4.0, 5.0, 6.0, 7.0 } }; int tOrder1 = 1;
DTYPE bData[2][4] = { { 1.0, -1.0, -3.0, -5.0 }, int * tDimSize1 = new int[tOrder1];
{ -7.0, -9.0, -11.0, -13.0 } }; tDimSize1[0] = 4;
DTYPE answer_a[4] = { 4.0, 6.0, 8.0, 10.0 };
DTYPE answer_b[2] = { -8.0, -40.0 }; int tUnitNum1 = 1;
for (int i = 0; i < tOrder1; i++)
tUnitNum1 *= tDimSize1[i];
/* a tensor of size (2) */
int tOrder2 = 1;
int * tDimSize2 = new int[tOrder2];
tDimSize2[0] = 2;
int tUnitNum2 = 1;
for (int i = 0; i < tOrder2; i++)
tUnitNum2 *= tDimSize2[i];
DTYPE sData[2][4] = { {0.0F, 1.0F, 2.0F, 3.0F},
{4.0F, 5.0F, 6.0F, 7.0F} };
DTYPE answer1[4] = {4.0F, 6.0F, 8.0F, 10.0F};
DTYPE answer2[2] = {6.0F, 22.0F};
/* CPU test */ /* CPU test */
bool cpuTest = true; bool cpuTest = true;
/* create tensors */ /* create tensors */
XTensor * a = NewTensor(order, dimSize); XTensor * s = NewTensor(sOrder, sDimSize);
XTensor * reduce_a = NewTensor(order_reduce, dimSize_reduce_a); XTensor * t1 = NewTensor(tOrder1, tDimSize1);
XTensor * b = NewTensor(order, dimSize); XTensor * t2 = NewTensor(tOrder2, tDimSize2);
XTensor * reduce_b = NewTensor(order_reduce, dimSize_reduce_b);
/* initialize variables */ /* initialize variables */
a->SetData(aData, unitNum); s->SetData(sData, sUnitNum);
b->SetData(bData, unitNum); t1->SetZeroAll();
t2->SetZeroAll();
/* call reduce sum function */
ReduceSum(a, reduce_a, 0);
ReduceSum(b, reduce_b, 1);
//DTYPE* reduce_a_data = (DTYPE*)reduce_a->data; /* call ReduceSum function */
//for (int i = 0; i < unitNum_a; i++) ReduceSum(s, t1, 0);
// printf("%f ", *reduce_a_data++); ReduceSum(s, t2, 1);
//printf("\n");
//DTYPE* reduce_b_data = (DTYPE*)reduce_b->data;
//for (int i = 0; i < unitNum_b; i++)
// printf("%f ", *reduce_b_data++);
/* check results */ /* check results */
cpuTest = reduce_a->CheckData(answer_a, unitNum_a) && reduce_b->CheckData(answer_b, unitNum_b); cpuTest = t1->CheckData(answer1, tUnitNum1) && t2->CheckData(answer2, tUnitNum2);
#ifdef USE_CUDA #ifdef USE_CUDA
/* GPU test */ /* GPU test */
bool gpuTest = true; bool gpuTest = true;
/* create tensor */
XTensor * aGPU = NewTensor(order, dimSize, X_FLOAT, 1.0F, 0);
XTensor * reduce_aGPU = NewTensor(order_reduce, dimSize_reduce_a, X_FLOAT, 1.0F, 0);
XTensor * bGPU = NewTensor(order, dimSize, X_FLOAT, 1.0F, 0);
XTensor * reduce_bGPU = NewTensor(order_reduce, dimSize_reduce_b, X_FLOAT, 1.0F, 0);
/* Initialize variables */
aGPU->SetData(aData, unitNum);
bGPU->SetData(bData, unitNum);
/* call reduce sum function */
ReduceSum(aGPU, reduce_aGPU, 0);
ReduceSum(bGPU, reduce_bGPU, 1);
/* check results */
gpuTest = reduce_aGPU->CheckData(answer_a, unitNum_a) && reduce_bGPU->CheckData(answer_b, unitNum_b);
/* destroy variables */
delete aGPU, bGPU, reduce_aGPU, reduce_bGPU;
delete[] dimSize, dimSize_reduce_a, dimSize_reduce_b;
return cpuTest && gpuTest;
#else
/* destroy variables */
delete a;
delete b;
return cpuTest;
#endif // USE_CUDA
}
bool TestReduceSumForLargescale()
{
/* a tensor of size 10000 * 500 */
int order = 2;
int order_reduce = 1;
int * dimSize = new int[order];
dimSize[0] = 10000;
dimSize[1] = 500;
int unitNum = 1;
for (int i = 0; i < order; i++)
unitNum *= dimSize[i];
/* a tensor of size 500 */
int * dimSize_reduce_a = new int[order_reduce];
dimSize_reduce_a[0] = 500;
int unitNum_a = 1;
for (int i = 0; i < order_reduce; i++)
unitNum_a *= dimSize_reduce_a[i];
/* a tensor of size 10000 */
int * dimSize_reduce_b = new int[order_reduce];
dimSize_reduce_b[0] = 10000;
int unitNum_b = 1;
for (int i = 0; i < order_reduce; i++)
unitNum_b *= dimSize_reduce_b[i];
DTYPE * data = new DTYPE[5000000];
DTYPE * tmp = data;
for (int i = 0; i < unitNum; i++)
*tmp++ = 1;
DTYPE answer_a[500];
for (int i = 0; i < unitNum_a; i++)
answer_a[i] = 10000;
DTYPE answer_b[10000];
for (int i = 0; i < unitNum_b; i++)
answer_b[i] = 500;
/* CPU test */
bool cpuTest = true;
/* create tensors */ /* create tensors */
XTensor * a = NewTensor(order, dimSize); XTensor * sGPU = NewTensor(sOrder, sDimSize, X_FLOAT, 1.0F, 0);
XTensor * reduce_a = NewTensor(order_reduce, dimSize_reduce_a); XTensor * tGPU1 = NewTensor(tOrder1, tDimSize1, X_FLOAT, 1.0F, 0);
XTensor * b = NewTensor(order, dimSize); XTensor * tGPU2 = NewTensor(tOrder2, tDimSize2, X_FLOAT, 1.0F, 0);
XTensor * reduce_b = NewTensor(order_reduce, dimSize_reduce_b);
/* initialize variables */ /* initialize variables */
a->SetData(data, unitNum); sGPU->SetData(sData, sUnitNum);
b->SetData(data, unitNum); tGPU1->SetZeroAll();
/* call reduce sum function */ tGPU2->SetZeroAll();
ReduceSum(a, reduce_a, 0);
ReduceSum(b, reduce_b, 1);
/* check results */
cpuTest = reduce_a->CheckData(answer_a, unitNum_a) && reduce_b->CheckData(answer_b, unitNum_b);
#ifdef USE_CUDA /* call ReduceSum function */
/* GPU test */ ReduceSum(sGPU, tGPU1, 0);
bool gpuTest = true; ReduceSum(sGPU, tGPU2, 1);
/* create tensor */
XTensor * aGPU = NewTensor(order, dimSize, X_FLOAT);
XTensor * reduce_aGPU = NewTensor(order_reduce, dimSize_reduce_a, X_FLOAT);
XTensor * bGPU = NewTensor(order, dimSize, X_FLOAT);
XTensor * reduce_bGPU = NewTensor(order_reduce, dimSize_reduce_b, X_FLOAT);
/* Initialize variables */
aGPU->SetData(data, unitNum);
bGPU->SetData(data, unitNum);
/* call reduce max function */
ReduceSum(aGPU, reduce_aGPU, 0);
ReduceSum(bGPU, reduce_bGPU, 1);
/* check results */ /* check results */
gpuTest = reduce_aGPU->CheckData(answer_a, unitNum_a) && reduce_bGPU->CheckData(answer_b, unitNum_b); cpuTest = tGPU1->CheckData(answer1, tUnitNum1) && tGPU2->CheckData(answer2, tUnitNum2);
/* destroy variables */ /* destroy variables */
delete aGPU, bGPU, reduce_aGPU, reduce_bGPU; delete s;
delete[] dimSize, dimSize_reduce_a, dimSize_reduce_b; delete t1;
delete t2;
delete sGPU;
delete tGPU1;
delete tGPU2;
delete[] sDimSize;
delete[] tDimSize1;
delete[] tDimSize2;
return cpuTest && gpuTest; return cpuTest && gpuTest;
#else #else
/* destroy variables */ /* destroy variables */
delete a; delete s;
delete b; delete t1;
delete t2;
delete[] sDimSize;
delete[] tDimSize1;
delete[] tDimSize2;
return cpuTest; return cpuTest;
#endif // USE_CUDA #endif // USE_CUDA
} }
/* other cases */ /* other cases */
/* /*
TODO!! TODO!!
*/ */
/* test for ReduceSum Function */ /* test for ReduceSum Function */
extern "C" bool TestReduceSum()
bool TestReduceSum() {
{ XPRINT(0, stdout, "[TEST ReduceSum] sum the items along a dimension of the tensor.\n");
XPRINT(0, stdout, "[TEST ReduceSum]\n");
bool returnFlag = true, caseFlag = true; bool returnFlag = true, caseFlag = true;
/* case 1 test */ /* case 1 test */
...@@ -234,19 +150,10 @@ namespace nts { // namespace nt(NiuTrans.Tensor) ...@@ -234,19 +150,10 @@ namespace nts { // namespace nt(NiuTrans.Tensor)
else else
XPRINT(0, stdout, ">> case 1 passed!\n"); XPRINT(0, stdout, ">> case 1 passed!\n");
/* case 2 test */ /* other cases test */
caseFlag = TestReduceSumForLargescale(); /*
if (!caseFlag) { TODO!!
returnFlag = false; */
XPRINT(0, stdout, ">> case 2 failed!\n");
}
else
XPRINT(0, stdout, ">> case 2 passed!\n");
///* other cases test */
///*
//TODO!!
//*/
if (returnFlag) { if (returnFlag) {
XPRINT(0, stdout, ">> All Passed!\n"); XPRINT(0, stdout, ">> All Passed!\n");
...@@ -259,4 +166,4 @@ namespace nts { // namespace nt(NiuTrans.Tensor) ...@@ -259,4 +166,4 @@ namespace nts { // namespace nt(NiuTrans.Tensor)
return returnFlag; return returnFlag;
} }
} // namespace nt(NiuTrans.Tensor) } // namespace nts(NiuTrans.Tensor)
...@@ -22,15 +22,15 @@ ...@@ -22,15 +22,15 @@
#ifndef __TEST_REDUCESUM_H__ #ifndef __TEST_REDUCESUM_H__
#define __TEST_REDUCESUM_H__ #define __TEST_REDUCESUM_H__
#include "../core/ReduceSum.h" #include "../core/reduce/ReduceSum.h"
namespace nts { // namespace nt(NiuTrans.Tensor) namespace nts { // namespace nts(NiuTrans.Tensor)
/* test for ReduceSum Function */ /* test for ReduceSum Function */
extern "C" extern "C"
bool TestReduceSum(); bool TestReduceSum();
} // namespace nt(NiuTrans.Tensor) } // namespace nts(NiuTrans.Tensor)
#endif // __TEST_REDUCESUM_H__ #endif // __TEST_REDUCESUM_H__
......
/* NiuTrans.Tensor - an open-source tensor library
* Copyright (C) 2017, Natural Language Processing Lab, Northestern University.
* All rights reserved.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
/*
* $Created by: Xu Chen (email: hello_master1954@163.com) 2018-06-27
*/
#include "TReduceSumSquared.h"
namespace nts { // namespace nts(NiuTrans.Tensor)
/*
case 1: squared sum of the items along a dimension of the tensor.
For a 1-dimensional data array a, sum = \sum_i (a_i - shift)^2.
In this case, (2, 4) -> (4), dim = 0.
*/
bool TestReduceSumSquared1()
{
/* a input tensor of size (2, 4) */
int sOrder = 2;
int * sDimSize = new int[sOrder];
sDimSize[0] = 2;
sDimSize[1] = 4;
int sUnitNum = 1;
for (int i = 0; i < sOrder; i++)
sUnitNum *= sDimSize[i];
/* a output tensor of size (4) */
int tOrder = 1;
int * tDimSize = new int[tOrder];
tDimSize[0] = 4;
int tUnitNum = 1;
for (int i = 0; i < tOrder; i++)
tUnitNum *= tDimSize[i];
/* a shift tensor of size (4) */
int shiftOrder = 1;
int * shiftDimSize = new int[shiftOrder];
shiftDimSize[0] = 4;
int shiftUnitNum = 1;
for (int i = 0; i < shiftOrder; i++)
shiftUnitNum *= shiftDimSize[i];
DTYPE sData[2][4] = { {0.0F, 1.0F, 2.0F, 3.0F},
{4.0F, 5.0F, 6.0F, 7.0F} };
DTYPE shiftData[4] = {1.0F, -1.0F, -1.0F, 0.0F};
DTYPE answer[4] = {10.0F, 40.0F, 58.0F, 58.0F};
/* CPU test */
bool cpuTest = true;
/* create tensors */
XTensor * s = NewTensor(sOrder, sDimSize);
XTensor * t = NewTensor(tOrder, tDimSize);
XTensor * shift = NewTensor(shiftOrder, shiftDimSize);
/* initialize variables */
s->SetData(sData, sUnitNum);
shift->SetData(shiftData, shiftUnitNum);
t->SetZeroAll();
/* call ReduceSumSquared function */
ReduceSumSquared(s, t, 0, shift);
/* check results */
cpuTest = t->CheckData(answer, tUnitNum);
#ifdef USE_CUDA
/* GPU test */
bool gpuTest = true;
/* create tensors */
XTensor * sGPU = NewTensor(sOrder, sDimSize, X_FLOAT, 1.0F, 0);
XTensor * tGPU = NewTensor(tOrder, tDimSize, X_FLOAT, 1.0F, 0);
XTensor * shiftGPU = NewTensor(shiftOrder, shiftDimSize, X_FLOAT, 1.0F, 0);
/* initialize variables */
sGPU->SetData(sData, sUnitNum);
shiftGPU->SetData(shiftData, shiftUnitNum);
tGPU->SetZeroAll();
/* call ReduceSumSquared function */
ReduceSumSquared(sGPU, tGPU, 0, shiftGPU);
/* check results */
gpuTest = tGPU->CheckData(answer, tUnitNum);
/* destroy variables */
delete s;
delete t;
delete shift;
delete sGPU;
delete tGPU;
delete shiftGPU;
delete[] sDimSize;
delete[] tDimSize;
delete[] shiftDimSize;
return cpuTest && gpuTest;
#else
/* destroy variables */
delete s;
delete t;
delete shift;
delete[] sDimSize;
delete[] tDimSize;
delete[] shiftDimSize;
return cpuTest;
#endif // USE_CUDA
}
/*
case 2: squared sum of the items along a dimension of the tensor.
For a 1-dimensional data array a, sum = \sum_i (a_i - shift)^2.
In this case, (2, 4) -> (2), dim = 1.
*/
bool TestReduceSumSquared2()
{
/* a input tensor of size (2, 4) */
int sOrder = 2;
int * sDimSize = new int[sOrder];
sDimSize[0] = 2;
sDimSize[1] = 4;
int sUnitNum = 1;
for (int i = 0; i < sOrder; i++)
sUnitNum *= sDimSize[i];
/* a output tensor of size (2) */
int tOrder = 1;
int * tDimSize = new int[tOrder];
tDimSize[0] = 2;
int tUnitNum = 1;
for (int i = 0; i < tOrder; i++)
tUnitNum *= tDimSize[i];
/* a shift tensor of size (2) */
int shiftOrder = 1;
int * shiftDimSize = new int[shiftOrder];
shiftDimSize[0] = 2;
int shiftUnitNum = 1;
for (int i = 0; i < shiftOrder; i++)
shiftUnitNum *= shiftDimSize[i];
DTYPE sData[2][4] = { {0.0F, 1.0F, 2.0F, 3.0F},
{4.0F, 5.0F, 6.0F, 7.0F} };
DTYPE shiftData[2] = {-1.0F, 1.0F};
DTYPE answer[2] = {30.0F, 86.0F};
/* CPU test */
bool cpuTest = true;
/* create tensors */
XTensor * s = NewTensor(sOrder, sDimSize);
XTensor * t = NewTensor(tOrder, tDimSize);
XTensor * shift = NewTensor(shiftOrder, shiftDimSize);
/* initialize variables */
s->SetData(sData, sUnitNum);
shift->SetData(shiftData, shiftUnitNum);
t->SetZeroAll();
/* call ReduceSumSquared function */
ReduceSumSquared(s, t, 1, shift);
/* check results */
cpuTest = t->CheckData(answer, tUnitNum);
#ifdef USE_CUDA
/* GPU test */
bool gpuTest = true;
/* create tensors */
XTensor * sGPU = NewTensor(sOrder, sDimSize, X_FLOAT, 1.0F, 0);
XTensor * tGPU = NewTensor(tOrder, tDimSize, X_FLOAT, 1.0F, 0);
XTensor * shiftGPU = NewTensor(shiftOrder, shiftDimSize, X_FLOAT, 1.0F, 0);
/* initialize variables */
sGPU->SetData(sData, sUnitNum);
shiftGPU->SetData(shiftData, shiftUnitNum);
tGPU->SetZeroAll();
/* call ReduceSumSquared function */
ReduceSumSquared(sGPU, tGPU, 1, shiftGPU);
/* check results */
gpuTest = tGPU->CheckData(answer, tUnitNum);
/* destroy variables */
delete s;
delete t;
delete shift;
delete sGPU;
delete tGPU;
delete shiftGPU;
delete[] sDimSize;
delete[] tDimSize;
delete[] shiftDimSize;
return cpuTest && gpuTest;
#else
/* destroy variables */
delete s;
delete t;
delete shift;
delete[] sDimSize;
delete[] tDimSize;
delete[] shiftDimSize;
return cpuTest;
#endif // USE_CUDA
}
/* other cases */
/*
TODO!!
*/
/* test for ReduceSumSquared Function */
bool TestReduceSumSquared()
{
XPRINT(0, stdout, "[TEST ReduceSumSquared] squared sum of the items along a dimension of the tensor\n");
bool returnFlag = true, caseFlag = true;
/* case 1 test */
caseFlag = TestReduceSumSquared1();
if (!caseFlag) {
returnFlag = false;
XPRINT(0, stdout, ">> case 1 failed!\n");
}
else
XPRINT(0, stdout, ">> case 1 passed!\n");
/* case 2 test */
caseFlag = TestReduceSumSquared2();
if (!caseFlag) {
returnFlag = false;
XPRINT(0, stdout, ">> case 1 failed!\n");
}
else
XPRINT(0, stdout, ">> case 1 passed!\n");
/* other cases test */
/*
TODO!!
*/
if (returnFlag) {
XPRINT(0, stdout, ">> All Passed!\n");
}
else
XPRINT(0, stdout, ">> Failed!\n");
XPRINT(0, stdout, "\n");
return returnFlag;
}
} // namespace nts(NiuTrans.Tensor)
/* NiuTrans.Tensor - an open-source tensor library
* Copyright (C) 2017, Natural Language Processing Lab, Northestern University.
* All rights reserved.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
/*
* $Created by: Xu Chen (email: hello_master1954@163.com) 2018-06-27
*/
#ifndef __TEST_REDUCESUMSQUARED_H__
#define __TEST_REDUCESUMSQUARED_H__
#include "../core/reduce/ReduceSumSquared.h"
namespace nts { // namespace nts(NiuTrans.Tensor)
/* test for ReduceSumSquared Function */
extern "C"
bool TestReduceSumSquared();
} // namespace nts(NiuTrans.Tensor)
#endif // __TEST_REDUCESUMSQUARED_H__
/* NiuTrans.Tensor - an open-source tensor library
* Copyright (C) 2017, Natural Language Processing Lab, Northestern University.
* All rights reserved.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
/*
* $Created by: Xu Chen (email: hello_master1954@163.com) 2018-06-27
*/
#include "TReduceVariance.h"
namespace nts { // namespace nts(NiuTrans.Tensor)
/*
case 1: variance of the items along a dimension of the tensor.
For a 1-dimensional data array a, variance = 1/n * \sum_i (a_i - mean)^2.
In this case, (2, 4) -> (4), dim = 0.
*/
bool TestReduceVariance1()
{
/* a input tensor of size (2, 4) */
int sOrder = 2;
int * sDimSize = new int[sOrder];
sDimSize[0] = 2;
sDimSize[1] = 4;
int sUnitNum = 1;
for (int i = 0; i < sOrder; i++)
sUnitNum *= sDimSize[i];
/* a output tensor of size (4) */
int tOrder = 1;
int * tDimSize = new int[tOrder];
tDimSize[0] = 4;
int tUnitNum = 1;
for (int i = 0; i < tOrder; i++)
tUnitNum *= tDimSize[i];
/* a mean tensor of size (4) */
int meanOrder = 1;
int * meanDimSize = new int[meanOrder];
meanDimSize[0] = 4;
int meanUnitNum = 1;
for (int i = 0; i < meanOrder; i++)
meanUnitNum *= meanDimSize[i];
DTYPE sData[2][4] = { {0.0F, 1.0F, 2.0F, 3.0F},
{4.0F, 5.0F, 6.0F, 7.0F} };
DTYPE meanData[4] = {2.0F, 3.0F, 4.0F, 5.0F};
DTYPE answer[4] = {4.0F, 4.0F, 4.0F, 4.0F};
/* CPU test */
bool cpuTest = true;
/* create tensors */
XTensor * s = NewTensor(sOrder, sDimSize);
XTensor * t = NewTensor(tOrder, tDimSize);
XTensor * mean = NewTensor(meanOrder, meanDimSize);
/* initialize variables */
s->SetData(sData, sUnitNum);
mean->SetData(meanData, meanUnitNum);
t->SetZeroAll();
/* call ReduceVariance function */
ReduceVariance(s, t, 0, mean);
/* check results */
cpuTest = t->CheckData(answer, tUnitNum);
#ifdef USE_CUDA
/* GPU test */
bool gpuTest = true;
/* create tensors */
XTensor * sGPU = NewTensor(sOrder, sDimSize, X_FLOAT, 1.0F, 0);
XTensor * tGPU = NewTensor(tOrder, tDimSize, X_FLOAT, 1.0F, 0);
XTensor * meanGPU = NewTensor(meanOrder, meanDimSize, X_FLOAT, 1.0F, 0);
/* initialize variables */
sGPU->SetData(sData, sUnitNum);
meanGPU->SetData(meanData, meanUnitNum);
tGPU->SetZeroAll();
/* call ReduceVariance function */
ReduceVariance(sGPU, tGPU, 0, meanGPU);
/* check results */
gpuTest = t->CheckData(answer, tUnitNum);
/* destroy variables */
delete s;
delete t;
delete mean;
delete sGPU;
delete tGPU;
delete meanGPU;
delete[] sDimSize;
delete[] tDimSize;
delete[] meanDimSize;
return cpuTest && gpuTest;
#else
/* destroy variables */
delete s;
delete t;
delete mean;
delete[] sDimSize;
delete[] tDimSize;
delete[] meanDimSize;
return cpuTest;
#endif // USE_CUDA
}
/* other cases */
/*
TODO!!
*/
/* test for ReduceVariance Function */
bool TestReduceVariance()
{
XPRINT(0, stdout, "[TEST ReduceVariance] variance of the items along a dimension of the tensor\n");
bool returnFlag = true, caseFlag = true;
/* case 1 test */
caseFlag = TestReduceVariance1();
if (!caseFlag) {
returnFlag = false;
XPRINT(0, stdout, ">> case 1 failed!\n");
}
else
XPRINT(0, stdout, ">> case 1 passed!\n");
/* other cases test */
/*
TODO!!
*/
if (returnFlag) {
XPRINT(0, stdout, ">> All Passed!\n");
}
else
XPRINT(0, stdout, ">> Failed!\n");
XPRINT(0, stdout, "\n");
return returnFlag;
}
} // namespace nts(NiuTrans.Tensor)
/* NiuTrans.Tensor - an open-source tensor library
* Copyright (C) 2017, Natural Language Processing Lab, Northestern University.
* All rights reserved.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
/*
* $Created by: Xu Chen (email: hello_master1954@163.com) 2018-07-06
*/
#ifndef __TEST_REDUCEVARIANCE_H__
#define __TEST_REDUCEVARIANCE_H__
#include "../core/reduce/ReduceVariance.h"
namespace nts { // namespace nts(NiuTrans.Tensor)
/* test for ReduceVariance Function */
extern "C"
bool TestReduceVariance();
} // namespace nts(NiuTrans.Tensor)
#endif // __TEST_REDUCEVARIANCE_H__
/* NiuTrans.Tensor - an open-source tensor library
* Copyright (C) 2017, Natural Language Processing Lab, Northestern University.
* All rights reserved.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
/*
* $Created by: Xu Chen (email: hello_master1954@163.com) 2018-06-27
*/
#include "TScaleAndShift.h"
namespace nts { // namespace nts(NiuTrans.Tensor)
/*
case 1: scale and shift all tensor entires.
p = p * scale + shift
*/
bool TestScaleAndShift1()
{
/* a input tensor of size (2, 4) */
int sOrder = 2;
int * sDimSize = new int[sOrder];
sDimSize[0] = 2;
sDimSize[1] = 4;
int sUnitNum = 1;
for (int i = 0; i < sOrder; i++)
sUnitNum *= sDimSize[i];
DTYPE sData[2][4] = { {0.0F, 1.0F, 2.0F, 3.0F},
{4.0F, 5.0F, 6.0F, 7.0F} };
DTYPE answer[2][4] = { {0.5F, 2.5F, 4.5F, 6.5F},
{8.5F, 10.5F, 12.5F, 14.5F} };
DTYPE scaleFactor = 2.0F;
DTYPE shiftFactor = 0.5F;
/* CPU test */
bool cpuTest = true;
/* create tensors */
XTensor * s = NewTensor(sOrder, sDimSize);
/* initialize variables */
s->SetData(sData, sUnitNum);
/* call ScaleAndShift function */
ScaleAndShift(s, scaleFactor, shiftFactor);
/* check results */
cpuTest = s->CheckData(answer, sUnitNum);
#ifdef USE_CUDA
/* GPU test */
bool gpuTest = true;
/* create tensors */
XTensor * sGPU = NewTensor(sOrder, sDimSize, X_FLOAT, 1.0F, 0);
/* initialize variables */
sGPU->SetData(sData, sUnitNum);
/* call ScaleAndShift function */
ScaleAndShift(sGPU, scaleFactor, shiftFactor);
/* check results */
gpuTest = sGPU->CheckData(answer, sUnitNum);
/* destroy variables */
delete s;
delete sGPU;
delete[] sDimSize;
return cpuTest && gpuTest;
#else
/* destroy variables */
delete s;
delete[] sDimSize;
return cpuTest;
#endif // USE_CUDA
}
/* other cases */
/*
TODO!!
*/
/* test for ScaleAndShift Function */
bool TestScaleAndShift()
{
XPRINT(0, stdout, "[TEST ScaleAndShift] scale and shift all tensor entires\n");
bool returnFlag = true, caseFlag = true;
/* case 1 test */
caseFlag = TestScaleAndShift1();
if (!caseFlag) {
returnFlag = false;
XPRINT(0, stdout, ">> case 1 failed!\n");
}
else
XPRINT(0, stdout, ">> case 1 passed!\n");
/* other cases test */
/*
TODO!!
*/
if (returnFlag) {
XPRINT(0, stdout, ">> All Passed!\n");
}
else
XPRINT(0, stdout, ">> Failed!\n");
XPRINT(0, stdout, "\n");
return returnFlag;
}
} // namespace nts(NiuTrans.Tensor)
/* NiuTrans.Tensor - an open-source tensor library
* Copyright (C) 2017, Natural Language Processing Lab, Northestern University.
* All rights reserved.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
/*
* $Created by: Xu Chen (email: hello_master1954@163.com) 2018-06-27
*/
#ifndef __TEST_SCALEANDSHIFT_H__
#define __TEST_SCALEANDSHIFT_H__
#include "../core/math/ScaleAndShift.h"
namespace nts { // namespace nts(NiuTrans.Tensor)
/* test for ScaleAndShift Function */
extern "C"
bool TestScaleAndShift();
} // namespace nts(NiuTrans.Tensor)
#endif // __TEST_SCALEANDSHIFT_H__
/* NiuTrans.Tensor - an open-source tensor library
* Copyright (C) 2017, Natural Language Processing Lab, Northestern University.
* All rights reserved.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
/*
* $Created by: Xu Chen (email: hello_master1954@163.com) 2018-07-04
*/
#include "TSelect.h"
namespace nts { // namespace nts(NiuTrans.Tensor)
/*
case 1: test SelectRange function.
It can generate a tensor with seleccted data in range[low,high] along the given dimension.
In this case, (2, 2, 4) -> (2, 2, 2), dim = 2, low = 1, high = 3.
*/
bool TestSelect1()
{
/* a input tensor of size (2, 2, 4) */
int sOrder = 3;
int * sDimSize = new int[sOrder];
sDimSize[0] = 2;
sDimSize[1] = 2;
sDimSize[2] = 4;
int sUnitNum = 1;
for (int i = 0; i < sOrder; i++)
sUnitNum *= sDimSize[i];
/* a output tensor of size (2, 2, 2) */
int tOrder = 3;
int * tDimSize = new int[tOrder];
tDimSize[0] = 2;
tDimSize[1] = 2;
tDimSize[2] = 2;
int tUnitNum = 1;
for (int i = 0; i < tOrder; i++)
tUnitNum *= tDimSize[i];
DTYPE sData[2][2][4] = { { {0.0F, 1.0F, 2.0F, 3.0F},
{4.0F, 5.0F, 6.0F, 7.0F} },
{ {1.0F, 2.0F, 3.0F, 4.0F},
{5.0F, 6.0F, 7.0F, 8.0F} } };
DTYPE answer[2][2][2] = { { {1.0F, 2.0F},
{5.0F, 6.0F} },
{ {2.0F, 3.0F},
{6.0F, 7.0F} } };
/* CPU test */
bool cpuTest = true;
/* create tensors */
XTensor * s = NewTensor(sOrder, sDimSize);
XTensor * t = NewTensor(tOrder, tDimSize);
/* initialize variables */
s->SetData(sData, sUnitNum);
t->SetZeroAll();
/* call SelectRange function */
SelectRange(s, t, 2, 1, 3);
/* check results */
cpuTest = t->CheckData(answer, tUnitNum);
#ifdef USE_CUDA
/* GPU test */
bool gpuTest = true;
/* create tensors */
XTensor * sGPU = NewTensor(sOrder, sDimSize, X_FLOAT, 1.0F, 0);
XTensor * tGPU = NewTensor(tOrder, tDimSize, X_FLOAT, 1.0F, 0);
/* initialize variables */
sGPU->SetData(sData, sUnitNum);
tGPU->SetZeroAll();
/* call Select function */
SelectRange(sGPU, tGPU, 2, 1, 3);
/* check results */
gpuTest = tGPU->CheckData(answer, tUnitNum);
/* destroy variables */
delete s;
delete t;
delete sGPU;
delete tGPU;
delete[] sDimSize;
delete[] tDimSize;
return cpuTest && gpuTest;
#else
/* destroy variables */
delete s;
delete t;
delete[] sDimSize;
delete[] tDimSize;
return cpuTest;
#endif // USE_CUDA
}
/* other cases */
/*
TODO!!
*/
/* test for Select Function */
bool TestSelect()
{
XPRINT(0, stdout, "[TEST Select] generate a tensor with seleccted data in range[low,high] along the given dimension \n");
bool returnFlag = true, caseFlag = true;
/* case 1 test */
caseFlag = TestSelect1();
if (!caseFlag) {
returnFlag = false;
XPRINT(0, stdout, ">> case 1 failed!\n");
}
else
XPRINT(0, stdout, ">> case 1 passed!\n");
/* other cases test */
/*
TODO!!
*/
if (returnFlag) {
XPRINT(0, stdout, ">> All Passed!\n");
}
else
XPRINT(0, stdout, ">> Failed!\n");
XPRINT(0, stdout, "\n");
return returnFlag;
}
} // namespace nts(NiuTrans.Tensor)
/* NiuTrans.Tensor - an open-source tensor library
* Copyright (C) 2017, Natural Language Processing Lab, Northestern University.
* All rights reserved.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
/*
* $Created by: Xu Chen (email: hello_master1954@163.com) 2018-07-04
*/
#ifndef __TEST_SELECT_H__
#define __TEST_SELECT_H__
#include "../core/getandset/Select.h"
namespace nts { // namespace nts(NiuTrans.Tensor)
/* test for Select Function */
extern "C"
bool TestSelect();
} // namespace nts(NiuTrans.Tensor)
#endif // __TEST_SELECT_H__
/* NiuTrans.Tensor - an open-source tensor library
* Copyright (C) 2017, Natural Language Processing Lab, Northestern University.
* All rights reserved.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
/*
* $Created by: Xu Chen (email: hello_master1954@163.com) 2018-07-06
*/
#include "TSetAscendingOrder.h"
namespace nts { // namespace nts(NiuTrans.Tensor)
/* case 1: set the cell to the ascending order along a given dimension.
*/
bool TestSetAscendingOrder1()
{
/* a input tensor of size (2, 4) */
int sOrder = 2;
int * sDimSize = new int[sOrder];
sDimSize[0] = 2;
sDimSize[1] = 4;
int sUnitNum = 1;
for (int i = 0; i < sOrder; i++)
sUnitNum *= sDimSize[i];
int answer[2][4] = { {0, 1, 2, 3},
{0, 1, 2, 3} };
/* CPU test */
bool cpuTest = true;
/* create tensors */
XTensor * s = NewTensor(sOrder, sDimSize, X_INT);
/* initialize variables */
s->SetZeroAll();
/* call SetAscendingOrder function */
s->SetAscendingOrder(1);
/* check results */
cpuTest = s->CheckData(answer, sUnitNum);
#ifdef USE_CUDA
/* GPU test */
bool gpuTest = true;
/* create tensors */
XTensor * sGPU = NewTensor(sOrder, sDimSize, X_INT, 1.0F, 0);
/* initialize variables */
sGPU->SetZeroAll();
/* call SetAscendingOrder function */
sGPU->SetAscendingOrder(1);
/* check results */
gpuTest = sGPU->CheckData(answer, sUnitNum);
/* destroy variables */
delete s;
delete sGPU;
delete[] sDimSize;
return cpuTest && gpuTest;
#else
/* destroy variables */
delete s;
delete[] sDimSize;
return cpuTest;
#endif // USE_CUDA
}
/* other cases */
/*
TODO!!
*/
/* test for SetAscendingOrder Function */
bool TestSetAscendingOrder()
{
XPRINT(0, stdout, "[TEST SetAscendingOrder] set the cell to the ascending order along a given dimension \n");
bool returnFlag = true, caseFlag = true;
/* case 1 test */
caseFlag = TestSetAscendingOrder1();
if (!caseFlag) {
returnFlag = false;
XPRINT(0, stdout, ">> case 1 failed!\n");
}
else
XPRINT(0, stdout, ">> case 1 passed!\n");
/* other cases test */
/*
TODO!!
*/
if (returnFlag) {
XPRINT(0, stdout, ">> All Passed!\n");
}
else
XPRINT(0, stdout, ">> Failed!\n");
XPRINT(0, stdout, "\n");
return returnFlag;
}
} // namespace nts(NiuTrans.Tensor)
/* NiuTrans.Tensor - an open-source tensor library
* Copyright (C) 2017, Natural Language Processing Lab, Northestern University.
* All rights reserved.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
/*
* $Created by: Xu Chen (email: hello_master1954@163.com) 2018-07-06
*/
#ifndef __TEST_SETASCENDINGORDER_H__
#define __TEST_SETASCENDINGORDER_H__
#include "../XTensor.h"
namespace nts { // namespace nts(NiuTrans.Tensor)
/* test for SetAscendingOrder Function */
extern "C"
bool TestSetAscendingOrder();
} // namespace nts(NiuTrans.Tensor)
#endif // __TEST_SETASCENDINGORDER_H__
/* NiuTrans.Tensor - an open-source tensor library
* Copyright (C) 2017, Natural Language Processing Lab, Northestern University.
* All rights reserved.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
/*
* $Created by: Xu Chen (email: hello_master1954@163.com) 2018-07-06
*/
#include "TSetData.h"
namespace nts { // namespace nts(NiuTrans.Tensor)
/* case 1: set the cell to the ascending order along a given dimension. */
bool TestSetData1()
{
/* a input tensor of size (2, 4) */
int sOrder = 2;
int * sDimSize = new int[sOrder];
sDimSize[0] = 2;
sDimSize[1] = 4;
int sUnitNum = 1;
for (int i = 0; i < sOrder; i++)
sUnitNum *= sDimSize[i];
DTYPE answer[2][4] = {0};
/* CPU test */
bool cpuTest = true;
/* create tensors */
XTensor * s = NewTensor(sOrder, sDimSize);
/* call SetData function */
s->SetDataRand(0.0, 1.0);
/* check results */
cpuTest = s->CheckData(answer, sUnitNum, 1.0F);
#ifdef USE_CUDA
/* GPU test */
bool gpuTest = true;
/* create tensors */
XTensor * sGPU = NewTensor(sOrder, sDimSize, X_FLOAT, 1.0F, 0);
/* call SetDataRand function */
sGPU->SetDataRand(0.0, 1.0);
gpuTest = sGPU->CheckData(answer, sUnitNum, 1.0F);
/* destroy variables */
delete s;
delete sGPU;
delete[] sDimSize;
return cpuTest && gpuTest;
#else
/* destroy variables */
delete s;
delete[] sDimSize;
return cpuTest;
#endif // USE_CUDA
}
/* other cases */
/*
TODO!!
*/
/* test for SetData Function */
bool TestSetData()
{
XPRINT(0, stdout, "[TEST SetData] set the data of tensor \n");
bool returnFlag = true, caseFlag = true;
/* case 1 test */
caseFlag = TestSetData1();
if (!caseFlag) {
returnFlag = false;
XPRINT(0, stdout, ">> case 1 failed!\n");
}
else
XPRINT(0, stdout, ">> case 1 passed!\n");
/* other cases test */
/*
TODO!!
*/
if (returnFlag) {
XPRINT(0, stdout, ">> All Passed!\n");
}
else
XPRINT(0, stdout, ">> Failed!\n");
XPRINT(0, stdout, "\n");
return returnFlag;
}
} // namespace nts(NiuTrans.Tensor)
/* NiuTrans.Tensor - an open-source tensor library
* Copyright (C) 2017, Natural Language Processing Lab, Northestern University.
* All rights reserved.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
/*
* $Created by: Xu Chen (email: hello_master1954@163.com) 2018-07-06
*/
#ifndef __TEST_SETDATA_H__
#define __TEST_SETDATA_H__
#include "../XTensor.h"
namespace nts { // namespace nts(NiuTrans.Tensor)
/* test for SetData Function */
extern "C"
bool TestSetData();
} // namespace nts(NiuTrans.Tensor)
#endif // __TEST_SETDATA_H__
...@@ -19,15 +19,15 @@ ...@@ -19,15 +19,15 @@
* $Created by: Xu Chen (email: hello_master1954@163.com) 2018-06-19 * $Created by: Xu Chen (email: hello_master1954@163.com) 2018-06-19
*/ */
#include "../XTensor.h"
#include "../XUtility.h" #include "../XUtility.h"
#include "TSigmoid.h" #include "TSigmoid.h"
namespace nts { // namespace nts(NiuTrans.Tensor) namespace nts { // namespace nts(NiuTrans.Tensor)
/* case 1: test Sigmoid function and SigmoidBackward function.
* sigmoid function: y = 1/(1+exp(-x)) /*
* backward computation: dE/ds = dE/dy * dy/dx case 1: test Sigmoid function and SigmoidBackward function.
sigmoid function: y = 1/(1+exp(-x))
backward computation: dE/ds = dE/dy * dy/dx
*/ */
bool TestSigmoid1() bool TestSigmoid1()
{ {
...@@ -42,7 +42,9 @@ bool TestSigmoid1() ...@@ -42,7 +42,9 @@ bool TestSigmoid1()
DTYPE xData[3] = {0.0F, 1.0F, 2.0F}; DTYPE xData[3] = {0.0F, 1.0F, 2.0F};
DTYPE gData[3] = {0.4F, 0.8F, 1.0F}; DTYPE gData[3] = {0.4F, 0.8F, 1.0F};
DTYPE answer[3]; DTYPE dedyData[3] = {-0.8F, -1.094F, -1.135F};
DTYPE yAnswer[3] = {0.5F, 0.731F, 0.881F};
DTYPE dedxAnswer[3] = {-0.2F, -0.215F, -0.119F};
/* CPU test */ /* CPU test */
bool cpuTest = true; bool cpuTest = true;
...@@ -57,41 +59,18 @@ bool TestSigmoid1() ...@@ -57,41 +59,18 @@ bool TestSigmoid1()
/* initialize variables */ /* initialize variables */
x->SetData(xData, sUnitNum); x->SetData(xData, sUnitNum);
g->SetData(gData, sUnitNum); g->SetData(gData, sUnitNum);
dedy->SetData(dedyData, sUnitNum);
y->SetZeroAll(); y->SetZeroAll();
dedx->SetZeroAll(); dedx->SetZeroAll();
/* call Sigmoid function */ /* call Sigmoid function */
Sigmoid(x, y); Sigmoid(x, y);
/* cross_entropy: de/dy_i = -t_i / y_i */
DTYPE dedyData[3];
DTYPE * yProcessedData = (DTYPE*)y->data;
for (int i = 0; i < sUnitNum; i++)
dedyData[i] = - gData[i] / yProcessedData[i];
/* initialize variables */
dedy->SetData(dedyData, sUnitNum);
for (int i = 0; i < sUnitNum; i++)
answer[i] = dedyData[i] * yProcessedData[i] * (1 - yProcessedData[i]);
/* call SigmoidBackward function */ /* call SigmoidBackward function */
SigmoidBackward(g, y, x, dedy, dedx, NOLOSS); SigmoidBackward(g, y, x, dedy, dedx, NOLOSS);
/* check result */ /* check result */
printf("CPU Test:\n"); cpuTest = y->CheckData(yAnswer, sUnitNum) && dedx->CheckData(dedxAnswer, sUnitNum);
printf("Computer de/dx:");
DTYPE * checkData = (DTYPE*)dedx->data;
for (int i = 0; i < sUnitNum; i++) {
printf("\t%f", checkData[i]);
}
printf("\n");
printf("Real de/dx:");
for (int i = 0; i < sUnitNum; i++) {
printf("\t%f", answer[i]);
}
printf("\n");
#ifdef USE_CUDA #ifdef USE_CUDA
/* GPU test */ /* GPU test */
...@@ -107,66 +86,50 @@ bool TestSigmoid1() ...@@ -107,66 +86,50 @@ bool TestSigmoid1()
/* initialize variables */ /* initialize variables */
xGPU->SetData(xData, sUnitNum); xGPU->SetData(xData, sUnitNum);
gGPU->SetData(gData, sUnitNum); gGPU->SetData(gData, sUnitNum);
dedyGPU->SetData(dedyData, sUnitNum);
yGPU->SetZeroAll(); yGPU->SetZeroAll();
dedxGPU->SetZeroAll(); dedxGPU->SetZeroAll();
/* call Sigmoid function */ /* call Sigmoid function */
Sigmoid(xGPU, yGPU); Sigmoid(xGPU, yGPU);
/* cross_entropy: de/dy_i = -t_i / y_i */
void * yProcessedDataGPU = (DTYPE*)yGPU->data;
int size = sUnitNum * yGPU->unitSize;
DTYPE * copy = new DTYPE[size];
XMemCopy(copy, -1, yProcessedDataGPU, yGPU->devID, size);
for (int i = 0; i < sUnitNum; i++) {
dedyData[i] = - gData[i] / *copy++;
}
/* initialize variables */
dedyGPU->SetData(dedyData, sUnitNum);
for (int i = 0; i < sUnitNum; i++)
answer[i] = dedyData[i] * yProcessedData[i] * (1 - yProcessedData[i]);
/* call SigmoidBackward function */ /* call SigmoidBackward function */
SigmoidBackward(gGPU, yGPU, xGPU, dedyGPU, dedxGPU, NOLOSS); SigmoidBackward(gGPU, yGPU, xGPU, dedyGPU, dedxGPU, NOLOSS);
/* check result */ /* check result */
printf("\nGPU Test:\n"); gpuTest = yGPU->CheckData(yAnswer, sUnitNum) && dedxGPU->CheckData(dedxAnswer, sUnitNum);
printf("Computer de/dx:");
checkData = (DTYPE*)dedxGPU->data;
size = sUnitNum * dedxGPU->unitSize;
DTYPE * copy1 = new DTYPE[size];
XMemCopy(copy1, -1, checkData, dedxGPU->devID, size);
for (int i = 0; i < sUnitNum; i++) {
printf("\t%f", copy1[i]);
}
printf("\n");
printf("Real de/dx:");
for (int i = 0; i < sUnitNum; i++) {
printf("\t%f", answer[i]);
}
printf("\n\n");
/* destroy variables */ /* destroy variables */
delete x, y, g, dedx, dedy; delete x;
delete xGPU, yGPU, gGPU, dedxGPU, dedyGPU; delete y;
delete g;
delete dedx;
delete dedy;
delete xGPU;
delete yGPU;
delete gGPU;
delete dedxGPU;
delete dedyGPU;
delete[] sDimSize; delete[] sDimSize;
return cpuTest && gpuTest; return cpuTest && gpuTest;
#else #else
/* destroy variables */ /* destroy variables */
delete x, y, g, dedx, dedy; delete x;
delete y;
delete g;
delete dedx;
delete dedy;
delete[] sDimSize; delete[] sDimSize;
return cpuTest; return cpuTest;
#endif // USE_CUDA #endif // USE_CUDA
} }
/* case 2: test Sigmoid function and SigmoidBackward function. /*
* sigmoid function: y = 1/(1+exp(-x)) case 2: test Sigmoid function and SigmoidBackward function.
* backward computation: dE/ds = dE/dy * dy/dx sigmoid function: y = 1/(1+exp(-x))
backward computation: dE/ds = dE/dy * dy/dx
*/ */
bool TestSigmoid2() bool TestSigmoid2()
{ {
...@@ -181,7 +144,9 @@ bool TestSigmoid2() ...@@ -181,7 +144,9 @@ bool TestSigmoid2()
DTYPE xData[3] = {0.0F, 1.0F, 2.0F}; DTYPE xData[3] = {0.0F, 1.0F, 2.0F};
DTYPE gData[3] = {0.4F, 0.8F, 1.0F}; DTYPE gData[3] = {0.4F, 0.8F, 1.0F};
DTYPE answer[3] = {0.0F, 0.0F, 0.0F}; DTYPE dedyData[3] = {-0.8F, -1.094F, -1.135F};
DTYPE yAnswer[3] = {0.5F, 0.731F, 0.881F};
DTYPE dedxAnswer[3] = {-0.2F, -0.215F, -0.119F};
/* CPU test */ /* CPU test */
bool cpuTest = true; bool cpuTest = true;
...@@ -196,29 +161,21 @@ bool TestSigmoid2() ...@@ -196,29 +161,21 @@ bool TestSigmoid2()
/* initialize variables */ /* initialize variables */
x->SetData(xData, sUnitNum); x->SetData(xData, sUnitNum);
g->SetData(gData, sUnitNum); g->SetData(gData, sUnitNum);
dedy->SetZeroAll();
y->SetZeroAll(); y->SetZeroAll();
dedx->SetZeroAll(); dedx->SetZeroAll();
/* call Sigmoid function */ /* call Sigmoid function */
Sigmoid(x, y); Sigmoid(x, y);
/* initialize variables */
dedy->SetData(dedyData, sUnitNum);
/* call SigmoidBackward function */ /* call SigmoidBackward function */
SigmoidBackward(g, y, x, dedy, dedx, CROSSENTROPY); SigmoidBackward(g, y, x, dedy, dedx, CROSSENTROPY);
/* check result */ /* check result */
printf("CPU Test:\n"); cpuTest = y->CheckData(yAnswer, sUnitNum) && dedx->CheckData(dedxAnswer, sUnitNum);
printf("Computer de/dx:");
DTYPE * checkData = (DTYPE*)dedx->data;
for (int i = 0; i < sUnitNum; i++) {
printf("\t%f", checkData[i]);
}
printf("\n");
printf("Real de/dx:");
for (int i = 0; i < sUnitNum; i++) {
printf("\t%f", answer[i]);
}
printf("\n");
#ifdef USE_CUDA #ifdef USE_CUDA
/* GPU test */ /* GPU test */
...@@ -234,6 +191,7 @@ bool TestSigmoid2() ...@@ -234,6 +191,7 @@ bool TestSigmoid2()
/* initialize variables */ /* initialize variables */
xGPU->SetData(xData, sUnitNum); xGPU->SetData(xData, sUnitNum);
gGPU->SetData(gData, sUnitNum); gGPU->SetData(gData, sUnitNum);
dedyGPU->SetZeroAll();
yGPU->SetZeroAll(); yGPU->SetZeroAll();
dedxGPU->SetZeroAll(); dedxGPU->SetZeroAll();
...@@ -244,32 +202,29 @@ bool TestSigmoid2() ...@@ -244,32 +202,29 @@ bool TestSigmoid2()
SigmoidBackward(gGPU, yGPU, xGPU, dedyGPU, dedxGPU, CROSSENTROPY); SigmoidBackward(gGPU, yGPU, xGPU, dedyGPU, dedxGPU, CROSSENTROPY);
/* check result */ /* check result */
printf("\nGPU Test:\n"); gpuTest = yGPU->CheckData(yAnswer, sUnitNum) && dedxGPU->CheckData(dedxAnswer, sUnitNum);
printf("Computer de/dx:");
checkData = (DTYPE*)dedxGPU->data;
int size = sUnitNum * dedxGPU->unitSize;
DTYPE * copy1 = new DTYPE[size];
XMemCopy(copy1, -1, checkData, dedxGPU->devID, size);
for (int i = 0; i < sUnitNum; i++) {
printf("\t%f", copy1[i]);
}
printf("\n");
printf("Real de/dx:");
for (int i = 0; i < sUnitNum; i++) {
printf("\t%f", answer[i]);
}
printf("\n\n");
/* destroy variables */ /* destroy variables */
delete x, y, g, dedx, dedy; delete x;
delete xGPU, yGPU, gGPU, dedxGPU, dedyGPU; delete y;
delete g;
delete dedx;
delete dedy;
delete xGPU;
delete yGPU;
delete gGPU;
delete dedxGPU;
delete dedyGPU;
delete[] sDimSize; delete[] sDimSize;
return cpuTest && gpuTest; return cpuTest && gpuTest;
#else #else
/* destroy variables */ /* destroy variables */
delete x, y, g, dedx, dedy; delete x;
delete y;
delete g;
delete dedx;
delete dedy;
delete[] sDimSize; delete[] sDimSize;
return cpuTest; return cpuTest;
...@@ -282,10 +237,9 @@ bool TestSigmoid2() ...@@ -282,10 +237,9 @@ bool TestSigmoid2()
*/ */
/* test for Sigmoid Function */ /* test for Sigmoid Function */
extern "C"
bool TestSigmoid() bool TestSigmoid()
{ {
XPRINT(0, stdout, "[TEST SIGMOID] -------------\n"); XPRINT(0, stdout, "[TEST SIGMOID] sigmoid function and its backward computation \n");
bool returnFlag = true, caseFlag = true; bool returnFlag = true, caseFlag = true;
/* case 1 test */ /* case 1 test */
......
...@@ -24,8 +24,10 @@ ...@@ -24,8 +24,10 @@
#include "TSoftmax.h" #include "TSoftmax.h"
namespace nts { // namespace nts(NiuTrans.Tensor) namespace nts { // namespace nts(NiuTrans.Tensor)
/* case 1: test Softmax function.
* softmax function: y = e^x / \sum_{i} e^{x_i} /*
case 1: test Softmax function.
softmax function: y = e^x / \sum_{i} e^{x_i}
*/ */
bool TestSoftmax1() bool TestSoftmax1()
{ {
...@@ -59,21 +61,7 @@ bool TestSoftmax1() ...@@ -59,21 +61,7 @@ bool TestSoftmax1()
Softmax(x, y, 1); Softmax(x, y, 1);
/* check result */ /* check result */
printf("CPU Test:\n"); cpuTest = y->CheckData(answer, sUnitNum);
printf("Softmax Result:");
DTYPE * checkData = (DTYPE*)y->data;
for (int i = 0; i < sUnitNum; i++) {
printf("\t%f", checkData[i]);
}
printf("\n");
printf("Real Result:");
for (int i = 0; i < sDimSize[0]; i++) {
for (int j = 0; j < sDimSize[1]; j++) {
printf("\t%f", answer[i][j]);
}
}
printf("\n");
#ifdef USE_CUDA #ifdef USE_CUDA
/* GPU test */ /* GPU test */
...@@ -91,28 +79,13 @@ bool TestSoftmax1() ...@@ -91,28 +79,13 @@ bool TestSoftmax1()
Softmax(xGPU, yGPU, 1); Softmax(xGPU, yGPU, 1);
/* check result */ /* check result */
printf("\nGPU Test:\n"); gpuTest = yGPU->CheckData(answer, sUnitNum);
printf("Computer de/dx:");
checkData = (DTYPE*)yGPU->data;
int size = sUnitNum * yGPU->unitSize;
DTYPE * copy = new DTYPE[size];
XMemCopy(copy, -1, checkData, yGPU->devID, size);
for (int i = 0; i < sUnitNum; i++) {
printf("\t%f", copy[i]);
}
printf("\n");
printf("Real Result:");
for (int i = 0; i < sDimSize[0]; i++) {
for (int j = 0; j < sDimSize[1]; j++) {
printf("\t%f", answer[i][j]);
}
}
printf("\n");
/* destroy variables */ /* destroy variables */
delete x, y; delete x;
delete xGPU, yGPU; delete y;
delete xGPU;
delete yGPU;
delete[] sDimSize; delete[] sDimSize;
return cpuTest && gpuTest; return cpuTest && gpuTest;
...@@ -125,11 +98,13 @@ bool TestSoftmax1() ...@@ -125,11 +98,13 @@ bool TestSoftmax1()
#endif // USE_CUDA #endif // USE_CUDA
} }
/* case 2: test SoftmaxBackward function. /*
* SoftmaxBackward function: dE/dx_j = -gold_j + y_j case 2: test SoftmaxBackward function.
SoftmaxBackward function: dE/dx_j = -gold_j + y_j
*/ */
bool TestSoftmax2() bool TestSoftmax2()
{ {
/* a input tensor of size (2, 3) */
int sOrder = 2; int sOrder = 2;
int * sDimSize = new int[sOrder]; int * sDimSize = new int[sOrder];
sDimSize[0] = 1; sDimSize[0] = 1;
...@@ -141,7 +116,7 @@ bool TestSoftmax2() ...@@ -141,7 +116,7 @@ bool TestSoftmax2()
DTYPE xData[1][3] = { {0.0F, 1.0F, 2.0F} }; DTYPE xData[1][3] = { {0.0F, 1.0F, 2.0F} };
DTYPE gData[1][3] = { {0.0F, 0.0F, 1.0F} }; DTYPE gData[1][3] = { {0.0F, 0.0F, 1.0F} };
DTYPE answer[3] = {0.090031F, 0.244728F, -0.334759F}; DTYPE dedxAnswer[3] = {0.090031F, 0.244728F, -0.334759F};
/* CPU test */ /* CPU test */
bool cpuTest = true; bool cpuTest = true;
...@@ -163,31 +138,10 @@ bool TestSoftmax2() ...@@ -163,31 +138,10 @@ bool TestSoftmax2()
/* call Softmax function */ /* call Softmax function */
Softmax(x, y, 1); Softmax(x, y, 1);
/* check result */
printf("CPU Test:\n");
printf("Softmax Result:");
DTYPE * checkData = (DTYPE*)y->data;
for (int i = 0; i < sUnitNum; i++) {
printf("\t%f", checkData[i]);
}
printf("\n");
/* call SoftmaxBackward function */
SoftmaxBackward(g, y, x, dedy, dedx, 1, CROSSENTROPY); SoftmaxBackward(g, y, x, dedy, dedx, 1, CROSSENTROPY);
/* check result */ /* check result */
printf("Computer de/dx:"); cpuTest = dedx->CheckData(dedxAnswer, sUnitNum);
checkData = (DTYPE*)dedx->data;
for (int i = 0; i < sUnitNum; i++) {
printf("\t%f", checkData[i]);
}
printf("\n");
printf("Real de/dx:");
for (int i = 0; i < sUnitNum; i++) {
printf("\t%f", answer[i]);
}
printf("\n");
#ifdef USE_CUDA #ifdef USE_CUDA
/* GPU test */ /* GPU test */
...@@ -210,44 +164,33 @@ bool TestSoftmax2() ...@@ -210,44 +164,33 @@ bool TestSoftmax2()
/* call Softmax function */ /* call Softmax function */
Softmax(xGPU, yGPU, 1); Softmax(xGPU, yGPU, 1);
/* check result */
printf("\nGPU Test:\n");
printf("Softmax Result:");
checkData = (DTYPE*)y->data;
for (int i = 0; i < sUnitNum; i++) {
printf("\t%f", checkData[i]);
}
printf("\n");
/* call SoftmaxBackward function */ /* call SoftmaxBackward function */
SoftmaxBackward(gGPU, yGPU, xGPU, dedyGPU, dedxGPU, 1, CROSSENTROPY); SoftmaxBackward(gGPU, yGPU, xGPU, dedyGPU, dedxGPU, 1, CROSSENTROPY);
/* check result */ /* check result */
printf("Computer de/dx:"); gpuTest = dedxGPU->CheckData(dedxAnswer, sUnitNum);
checkData = (DTYPE*)dedxGPU->data;
int size = sUnitNum * dedxGPU->unitSize;
DTYPE * copy = new DTYPE[size];
XMemCopy(copy, -1, checkData, dedxGPU->devID, size);
for (int i = 0; i < sUnitNum; i++) {
printf("\t%f", copy[i]);
}
printf("\n");
printf("Real de/dx:");
for (int i = 0; i < sUnitNum; i++) {
printf("\t%f", answer[i]);
}
printf("\n");
/* destroy variables */ /* destroy variables */
delete x, y, g, dedx, dedy; delete x;
delete xGPU, yGPU, gGPU, dedxGPU, dedyGPU; delete y;
delete g;
delete dedx;
delete dedy;
delete xGPU;
delete yGPU;
delete gGPU;
delete dedxGPU;
delete dedyGPU;
delete[] sDimSize; delete[] sDimSize;
return cpuTest && gpuTest; return cpuTest && gpuTest;
#else #else
/* destroy variables */ /* destroy variables */
delete x, y, g, dedx, dedy; delete x;
delete y;
delete g;
delete dedx;
delete dedy;
delete[] sDimSize; delete[] sDimSize;
return cpuTest; return cpuTest;
...@@ -260,10 +203,9 @@ bool TestSoftmax2() ...@@ -260,10 +203,9 @@ bool TestSoftmax2()
*/ */
/* test for Softmax Function */ /* test for Softmax Function */
extern "C"
bool TestSoftmax() bool TestSoftmax()
{ {
XPRINT(0, stdout, "[TEST SOFTMAX] -------------\n"); XPRINT(0, stdout, "[TEST SOFTMAX] softmax function and its backward computation \n");
bool returnFlag = true, caseFlag = true; bool returnFlag = true, caseFlag = true;
/* case 1 test */ /* case 1 test */
......
...@@ -19,15 +19,14 @@ ...@@ -19,15 +19,14 @@
* $Created by: LI Yinqiao (li.yin.qiao.2012@hotmail.com) 2018-04-30 * $Created by: LI Yinqiao (li.yin.qiao.2012@hotmail.com) 2018-04-30
*/ */
#include "../XTensor.h" #include "TSort.h"
#include "../XDevice.h"
#include "../core/Sort.h"
namespace nts { // namespace nts(NiuTrans.Tensor) namespace nts { // namespace nts(NiuTrans.Tensor)
/* case 1 */
/* case 1: sort the tensor along a given dimension */
bool TestSort1() bool TestSort1()
{ {
/* a tensor of size 2 * 4 */ /* a tensor of size (2, 4) */
int order = 2; int order = 2;
int * dimSize = new int[order]; int * dimSize = new int[order];
dimSize[0] = 2; dimSize[0] = 2;
...@@ -37,33 +36,25 @@ bool TestSort1() ...@@ -37,33 +36,25 @@ bool TestSort1()
for (int i = 0; i < order; i++) for (int i = 0; i < order; i++)
unitNum *= dimSize[i]; unitNum *= dimSize[i];
DTYPE aData[2][4] = { { 0.0, 1.0, 2.0, 3.0 }, DTYPE aData[2][4] = { {0.0F, 1.0F, 2.0F, 3.0F},
{ 4.0, 5.0, 6.0, 7.0 } }; {4.0F, 5.0F, 6.0F, 7.0F} };
DTYPE answer[2][4] = { { 4.0, 5.0, 6.0, 7.0 }, DTYPE answer[2][4] = { {4.0F, 5.0F, 6.0F, 7.0F},
{ 0.0, 1.0, 2.0, 3.0 } }; {0.0F, 1.0F, 2.0F, 3.0F} };
/* CPU test */ /* CPU test */
bool cpuTest = true; bool cpuTest = true;
/* create tensors */ /* create tensors */
XTensor * a = NewTensor(order, dimSize); XTensor * a = NewTensor(order, dimSize);
XTensor * b = NewTensor(order, dimSize); XTensor * b = NewTensor(order, dimSize, X_INT);
b->dataType = X_INT;
/* initialize variables */ /* initialize variables */
a->SetData(aData, unitNum); a->SetData(aData, unitNum);
b->SetZeroAll();
/* call sort function */ /* call Sort function */
Sort(a, b, 0); Sort(a, b, 0);
DTYPE* check1 = (DTYPE*)a->data;
for (int i = 0; i < 8; i++)
printf("%f ", *check1++);
printf("\n");
int* check2 = (int*)b->data;
for (int i = 0; i < 8; i++)
printf("%d ", *check2++);
printf("\n");
/* check results */
cpuTest = a->CheckData(answer, unitNum); cpuTest = a->CheckData(answer, unitNum);
#ifdef USE_CUDA #ifdef USE_CUDA
...@@ -71,11 +62,12 @@ bool TestSort1() ...@@ -71,11 +62,12 @@ bool TestSort1()
bool gpuTest = true; bool gpuTest = true;
/* create tensor */ /* create tensor */
XTensor * aGPU = NewTensor(order, dimSize, X_FLOAT); XTensor * aGPU = NewTensor(order, dimSize, X_FLOAT, 1.0F, 0);
XTensor * bGPU = NewTensor(order, dimSize, X_FLOAT); XTensor * bGPU = NewTensor(order, dimSize, X_INT, 1.0F, 0);
bGPU->dataType = X_INT;
/* Initialize variables */ /* Initialize variables */
aGPU->SetData(aData, unitNum); aGPU->SetData(aData, unitNum);
bGPU->SetZeroAll();
/* call sum function */ /* call sum function */
Sort(aGPU, bGPU, 0); Sort(aGPU, bGPU, 0);
...@@ -84,21 +76,26 @@ bool TestSort1() ...@@ -84,21 +76,26 @@ bool TestSort1()
gpuTest = aGPU->CheckData(answer, unitNum); gpuTest = aGPU->CheckData(answer, unitNum);
/* destroy variables */ /* destroy variables */
delete a, b, aGPU, bGPU; delete a;
delete b;
delete aGPU;
delete bGPU;
delete[] dimSize; delete[] dimSize;
return cpuTest && gpuTest; return cpuTest && gpuTest;
#else #else
/* destroy variables */ /* destroy variables */
delete a; delete a;
delete b; delete b;
delete[] dimSize; delete[] dimSize;
return cpuTest; return cpuTest;
#endif // USE_CUDA #endif // USE_CUDA
} }
bool TestSort2() bool TestSort2()
{ {
/* a tensor of size 2 * 4 */ /* a tensor of size (2, 4) */
int order = 2; int order = 2;
int * dimSize = new int[order]; int * dimSize = new int[order];
dimSize[0] = 2; dimSize[0] = 2;
...@@ -108,32 +105,24 @@ bool TestSort2() ...@@ -108,32 +105,24 @@ bool TestSort2()
for (int i = 0; i < order; i++) for (int i = 0; i < order; i++)
unitNum *= dimSize[i]; unitNum *= dimSize[i];
DTYPE aData[2][4] = { { 0.0, 1.0, 2.0, 3.0 }, DTYPE aData[2][4] = { {0.0F, 1.0F, 2.0F, 3.0F},
{ 4.0, 5.0, 6.0, 7.0 } }; {4.0F, 5.0F, 6.0F, 7.0F} };
DTYPE answer[2][4] = { { 3.0, 2.0, 1.0, 0.0 }, DTYPE answer[2][4] = { {3.0F, 2.0F, 1.0F, 0.0F},
{ 7.0, 6.0, 5.0, 4.0 } }; {7.0F, 6.0F, 5.0F, 4.0F} };
/* CPU test */ /* CPU test */
bool cpuTest = true; bool cpuTest = true;
/* create tensors */ /* create tensors */
XTensor * a = NewTensor(order, dimSize); XTensor * a = NewTensor(order, dimSize);
XTensor * b = NewTensor(order, dimSize); XTensor * b = NewTensor(order, dimSize, X_INT);
b->dataType = X_INT;
/* initialize variables */ /* initialize variables */
a->SetData(aData, unitNum); a->SetData(aData, unitNum);
/* call sort function */ /* call Sort function */
Sort(a, b, 1); Sort(a, b, 1);
DTYPE* check1 = (DTYPE*)a->data;
for (int i = 0; i < 8; i++)
printf("%f ", *check1++);
printf("\n");
int* check2 = (int*)b->data;
for (int i = 0; i < 8; i++)
printf("%d ", *check2++);
printf("\n");
/* check results */ /* check results */
cpuTest = a->CheckData(answer, unitNum); cpuTest = a->CheckData(answer, unitNum);
...@@ -142,9 +131,9 @@ bool TestSort2() ...@@ -142,9 +131,9 @@ bool TestSort2()
bool gpuTest = true; bool gpuTest = true;
/* create tensor */ /* create tensor */
XTensor * aGPU = NewTensor(order, dimSize, X_FLOAT); XTensor * aGPU = NewTensor(order, dimSize, X_FLOAT, 1.0F, 0);
XTensor * bGPU = NewTensor(order, dimSize, X_FLOAT); XTensor * bGPU = NewTensor(order, dimSize, X_INT, 1.0F, 0);
bGPU->dataType = X_INT;
/* Initialize variables */ /* Initialize variables */
aGPU->SetData(aData, unitNum); aGPU->SetData(aData, unitNum);
...@@ -155,27 +144,32 @@ bool TestSort2() ...@@ -155,27 +144,32 @@ bool TestSort2()
gpuTest = aGPU->CheckData(answer, unitNum); gpuTest = aGPU->CheckData(answer, unitNum);
/* destroy variables */ /* destroy variables */
delete a, b, aGPU, bGPU; delete a;
delete b;
delete aGPU;
delete bGPU;
delete[] dimSize; delete[] dimSize;
return cpuTest && gpuTest; return cpuTest && gpuTest;
#else #else
/* destroy variables */ /* destroy variables */
delete a; delete a;
delete b; delete b;
delete[] dimSize; delete[] dimSize;
return cpuTest; return cpuTest;
#endif // USE_CUDA #endif // USE_CUDA
} }
/* other cases */ /* other cases */
/* /*
TODO!! TODO!!
*/ */
/* test for Sum Function */ /* test for Sort Function */
extern "C"
bool TestSort() bool TestSort()
{ {
XPRINT(0, stdout, "[TEST SORT]\n"); XPRINT(0, stdout, "[TEST SORT] sort the tensor along a given dimension \n");
bool returnFlag = true, caseFlag = true; bool returnFlag = true, caseFlag = true;
/* case 1 test */ /* case 1 test */
...@@ -195,6 +189,7 @@ bool TestSort() ...@@ -195,6 +189,7 @@ bool TestSort()
} }
else else
XPRINT(0, stdout, ">> case 2 passed!\n"); XPRINT(0, stdout, ">> case 2 passed!\n");
/* other cases test */ /* other cases test */
/* /*
TODO!! TODO!!
......
...@@ -22,13 +22,13 @@ ...@@ -22,13 +22,13 @@
#ifndef __TEST_SORT_H__ #ifndef __TEST_SORT_H__
#define __TEST_SORT_H__ #define __TEST_SORT_H__
#include "../core/Sort.h" #include "../core/sort/Sort.h"
namespace nts { // namespace nts(NiuTrans.Tensor) namespace nts { // namespace nts(NiuTrans.Tensor)
/* test for Sum Function */ /* test for Sort Function */
extern "C" extern "C"
bool TestSort(); bool TestSort();
} // namespace nts(NiuTrans.Tensor) } // namespace nts(NiuTrans.Tensor)
#endif // __TEST_SUM_H__ #endif // __TEST_SORT_H__
...@@ -19,18 +19,17 @@ ...@@ -19,18 +19,17 @@
* $Created by: Lin Ye (email: linye2015@outlook.com) 2018-06-13 * $Created by: Lin Ye (email: linye2015@outlook.com) 2018-06-13
*/ */
#include "../XTensor.h" #include "TSplit.h"
#include "../XDevice.h"
#include "../core/Split.h"
#include "../XList.h"
namespace nts { // namespace nt(NiuTrans.Tensor) namespace nts { // namespace nt(NiuTrans.Tensor)
/* case 1: transform a tensor by splitting it, e.g., (N, M) -> (N/3, M, 3)
* In this case, 4 * 3 -> 2 * 2 * 3, whereToSplit=0, splitNum=2. /*
case 1: transform a tensor by splitting it, e.g., (N, M) -> (N/3, M, 3)
In this case, (4, 3) -> (2, 2, 3), whereToSplit=0, splitNum=2.
*/ */
bool TestSplit1() bool TestSplit1()
{ {
/* a source tensor of size 4 * 3 */ /* a source tensor of size (4, 3) */
int sOrder = 2; int sOrder = 2;
int * sDimSize = new int[sOrder]; int * sDimSize = new int[sOrder];
sDimSize[0] = 4; sDimSize[0] = 4;
...@@ -40,7 +39,7 @@ bool TestSplit1() ...@@ -40,7 +39,7 @@ bool TestSplit1()
for (int i = 0; i < sOrder; i++) for (int i = 0; i < sOrder; i++)
sUnitNum *= sDimSize[i]; sUnitNum *= sDimSize[i];
/* a target tensor of size 2 * 2 * 3 */ /* a target tensor of size (2, 2, 3) */
int tOrder = 3; int tOrder = 3;
int * tDimSize = new int[tOrder]; int * tDimSize = new int[tOrder];
tDimSize[0] = 2; tDimSize[0] = 2;
...@@ -109,12 +108,13 @@ bool TestSplit1() ...@@ -109,12 +108,13 @@ bool TestSplit1()
#endif // USE_CUDA #endif // USE_CUDA
} }
/* case 2: transform a tensor by splitting it, e.g., (N, M) -> (N/3, M, 3) /*
* In this case, 3 * 4 -> 2 * 3 * 2, whereToSplit=1, splitNum=2. case 2: transform a tensor by splitting it, e.g., (N, M) -> (N/3, M, 3)
In this case, (3, 4) -> (2, 3, 2), whereToSplit=1, splitNum=2.
*/ */
bool TestSplit2() bool TestSplit2()
{ {
/* a source tensor of size 3 * 4 */ /* a source tensor of size (3, 4) */
int sOrder = 2; int sOrder = 2;
int * sDimSize = new int[sOrder]; int * sDimSize = new int[sOrder];
sDimSize[0] = 3; sDimSize[0] = 3;
...@@ -124,7 +124,7 @@ bool TestSplit2() ...@@ -124,7 +124,7 @@ bool TestSplit2()
for (int i = 0; i < sOrder; i++) for (int i = 0; i < sOrder; i++)
sUnitNum *= sDimSize[i]; sUnitNum *= sDimSize[i];
/* a target tensor of size 2 * 3 * 2 */ /* a target tensor of size (2, 3, 2) */
int tOrder = 3; int tOrder = 3;
int * tDimSize = new int[tOrder]; int * tDimSize = new int[tOrder];
tDimSize[0] = 2; tDimSize[0] = 2;
...@@ -194,8 +194,9 @@ bool TestSplit2() ...@@ -194,8 +194,9 @@ bool TestSplit2()
#endif // USE_CUDA #endif // USE_CUDA
} }
/* case 3: split a big tensor into small tensors /*
* In this case, 3 * 4 -> 2 * (3 * 2) , whereToSplit=1, splitNum=2. case 3: split a big tensor into small tensors
In this case, (3, 4) -> 2 * (3, 2) , whereToSplit=1, splitNum=2.
*/ */
bool TestSplit3() bool TestSplit3()
{ {
...@@ -203,7 +204,7 @@ bool TestSplit3() ...@@ -203,7 +204,7 @@ bool TestSplit3()
XList tList; XList tList;
tList = XList(); tList = XList();
/* a source tensor of size (3 * 4) */ /* a source tensor of size (3, 4) */
int sOrder = 2; int sOrder = 2;
int * sDimSize = new int[sOrder]; int * sDimSize = new int[sOrder];
sDimSize[0] = 3; sDimSize[0] = 3;
...@@ -213,7 +214,7 @@ bool TestSplit3() ...@@ -213,7 +214,7 @@ bool TestSplit3()
for (int i = 0; i < sOrder; i++) for (int i = 0; i < sOrder; i++)
sUnitNum *= sDimSize[i]; sUnitNum *= sDimSize[i];
/* a target tensor of size (3 * 2) */ /* a target tensor of size (3, 2) */
int tOrder1 = 2; int tOrder1 = 2;
int * tDimSize1 = new int[tOrder1]; int * tDimSize1 = new int[tOrder1];
tDimSize1[0] = 3; tDimSize1[0] = 3;
...@@ -313,10 +314,9 @@ TODO!! ...@@ -313,10 +314,9 @@ TODO!!
*/ */
/* test for Split Function */ /* test for Split Function */
extern "C" bool TestSplit()
bool TestSplit()
{ {
XPRINT(0, stdout, "[TEST SPLIT] -------------\n"); XPRINT(0, stdout, "[TEST SPLIT] split a big tensor into small tensors \n");
bool returnFlag = true, caseFlag = true; bool returnFlag = true, caseFlag = true;
/* case 1 test */ /* case 1 test */
......
...@@ -22,7 +22,7 @@ ...@@ -22,7 +22,7 @@
#ifndef __TEST_SPLIT_H__ #ifndef __TEST_SPLIT_H__
#define __TEST_SPLIT_H__ #define __TEST_SPLIT_H__
#include "../core/Split.h" #include "../core/shape/Split.h"
namespace nts { // namespace nts(NiuTrans.Tensor) namespace nts { // namespace nts(NiuTrans.Tensor)
......
...@@ -19,15 +19,14 @@ ...@@ -19,15 +19,14 @@
* $Created by: LI Yinqiao (li.yin.qiao.2012@hotmail.com) 2018-04-30 * $Created by: LI Yinqiao (li.yin.qiao.2012@hotmail.com) 2018-04-30
*/ */
#include "../XTensor.h" #include "TSum.h"
#include "../XDevice.h"
#include "../core/Sum.h"
namespace nts { // namespace nts(NiuTrans.Tensor) namespace nts { // namespace nts(NiuTrans.Tensor)
/* case 1 */
/* case 1: tensor summation c = a + b * \beta */
bool TestSum1() bool TestSum1()
{ {
/* a tensor of size 2 * 4 */ /* a tensor of size (2, 4) */
int order = 2; int order = 2;
int * dimSize = new int[order]; int * dimSize = new int[order];
dimSize[0] = 2; dimSize[0] = 2;
...@@ -37,12 +36,12 @@ bool TestSum1() ...@@ -37,12 +36,12 @@ bool TestSum1()
for (int i = 0; i < order; i++) for (int i = 0; i < order; i++)
unitNum *= dimSize[i]; unitNum *= dimSize[i];
DTYPE aData[2][4] = { {0.0, 1.0, 2.0, 3.0}, DTYPE aData[2][4] = { {0.0F, 1.0F, 2.0F, 3.0F},
{4.0, 5.0, 6.0, 7.0} }; {4.0F, 5.0F, 6.0F, 7.0F} };
DTYPE bData[2][4] = { {1.0, -1.0, -3.0, -5.0}, DTYPE bData[2][4] = { {1.0F, -1.0F, -3.0F, -5.0F},
{-7.0, -9.0, -11.0, -13.0} }; {-7.0F, -9.0F, -11.0F, -13.0F} };
DTYPE answer[2][4] = { {1.0, 0.0, -1.0, -2.0}, DTYPE answer[2][4] = { {1.0F, 0.0F, -1.0F, -2.0F},
{-3.0, -4.0, -5.0, -6.0} }; {-3.0F, -4.0F, -5.0F, -6.0F} };
/* CPU test */ /* CPU test */
bool cpuTest = true; bool cpuTest = true;
...@@ -80,22 +79,27 @@ bool TestSum1() ...@@ -80,22 +79,27 @@ bool TestSum1()
gpuTest = aGPU->CheckData(answer, unitNum); gpuTest = aGPU->CheckData(answer, unitNum);
/* destroy variables */ /* destroy variables */
delete a, b, aGPU, bGPU; delete a;
delete b;
delete aGPU;
delete bGPU;
delete[] dimSize; delete[] dimSize;
return cpuTest && gpuTest; return cpuTest && gpuTest;
#else #else
/* destroy variables */ /* destroy variables */
delete a; delete a;
delete b; delete b;
delete[] dimSize; delete[] dimSize;
return cpuTest; return cpuTest;
#endif // USE_CUDA #endif // USE_CUDA
} }
/* case 2 */ /* case 2: tensor summation c = a + b * \beta */
bool TestSum2() bool TestSum2()
{ {
/* a tensor of size 2 * 4 */ /* a tensor of size (2, 4) */
int order = 2; int order = 2;
int * dimSize = new int[order]; int * dimSize = new int[order];
dimSize[0] = 2; dimSize[0] = 2;
...@@ -105,12 +109,12 @@ bool TestSum2() ...@@ -105,12 +109,12 @@ bool TestSum2()
for (int i = 0; i < order; i++) { for (int i = 0; i < order; i++) {
unitNum *= dimSize[i]; unitNum *= dimSize[i];
} }
DTYPE aData[2][4] = { {0.0, 1.0, 2.0, 3.0}, DTYPE aData[2][4] = { {0.0F, 1.0F, 2.0F, 3.0F},
{4.0, 5.0, 6.0, 7.0} }; {4.0F, 5.0F, 6.0F, 7.0F} };
DTYPE bData[2][4] = { {1.0, -1.0, -3.0, -5.0}, DTYPE bData[2][4] = { {1.0F, -1.0F, -3.0F, -5.0F},
{-7.0, -9.0, -11.0, -13.0} }; {-7.0F, -9.0F, -11.0F, -13.0F} };
DTYPE answer[2][4] = { {0.5, 0.5, 0.5, 0.5}, DTYPE answer[2][4] = { {0.5F, 0.5F, 0.5F, 0.5F},
{0.5, 0.5, 0.5, 0.5} }; {0.5F, 0.5F, 0.5F, 0.5F} };
float beta = 0.5F; float beta = 0.5F;
/* CPU test */ /* CPU test */
...@@ -126,7 +130,7 @@ bool TestSum2() ...@@ -126,7 +130,7 @@ bool TestSum2()
b->SetData(bData, unitNum); b->SetData(bData, unitNum);
c->SetZeroAll(); c->SetZeroAll();
/* call sum function */ /* call Sum function */
Sum(a, b, c, beta); Sum(a, b, c, beta);
/* check results */ /* check results */
...@@ -146,15 +150,21 @@ bool TestSum2() ...@@ -146,15 +150,21 @@ bool TestSum2()
bGPU->SetData(bData, unitNum); bGPU->SetData(bData, unitNum);
cGPU->SetZeroAll(); cGPU->SetZeroAll();
/* call sum function */ /* call Sum function */
Sum(aGPU, bGPU, cGPU, beta); Sum(aGPU, bGPU, cGPU, beta);
/* check results */ /* check results */
gpuTest = cGPU->CheckData(answer, unitNum); gpuTest = cGPU->CheckData(answer, unitNum);
/* destroy variables */ /* destroy variables */
delete a, b, c, aGPU, bGPU, cGPU; delete a;
delete b;
delete c;
delete aGPU;
delete bGPU;
delete cGPU;
delete[] dimSize; delete[] dimSize;
return cpuTest && gpuTest; return cpuTest && gpuTest;
#else #else
/* destroy variables */ /* destroy variables */
...@@ -162,6 +172,7 @@ bool TestSum2() ...@@ -162,6 +172,7 @@ bool TestSum2()
delete b; delete b;
delete c; delete c;
delete[] dimSize; delete[] dimSize;
return cpuTest; return cpuTest;
#endif // USE_CUDA #endif // USE_CUDA
} }
...@@ -172,7 +183,6 @@ bool TestSum2() ...@@ -172,7 +183,6 @@ bool TestSum2()
*/ */
/* test for Sum Function */ /* test for Sum Function */
extern "C"
bool TestSum() bool TestSum()
{ {
XPRINT(0, stdout, "[TEST SUM] tensor summation c = a + b * beta\n"); XPRINT(0, stdout, "[TEST SUM] tensor summation c = a + b * beta\n");
......
...@@ -22,7 +22,7 @@ ...@@ -22,7 +22,7 @@
#ifndef __TEST_SUM_H__ #ifndef __TEST_SUM_H__
#define __TEST_SUM_H__ #define __TEST_SUM_H__
#include "../core/Sum.h" #include "../core/arithmetic/Sum.h"
namespace nts { // namespace nts(NiuTrans.Tensor) namespace nts { // namespace nts(NiuTrans.Tensor)
......
/* NiuTrans.Tensor - an open-source tensor library
* Copyright (C) 2017, Natural Language Processing Lab, Northestern University.
* All rights reserved.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
/*
* $Created by: Xu Chen (email: hello_master1954@163.com) 2018-07-06
*/
#include "TSumByColumnTV.h"
namespace nts { // namespace nts(NiuTrans.Tensor)
/*
case 1: test SumByColumnTV function
sum of a tensor and a vector (column vector) in a column by column manner
*/
bool TestSumByColumnTV1()
{
/* a tensor of size (2, 4) */
int aOrder = 2;
int * aDimSize = new int[aOrder];
aDimSize[0] = 2;
aDimSize[1] = 4;
int aUnitNum = 1;
for (int i = 0; i < aOrder; i++)
aUnitNum *= aDimSize[i];
/* a tensor of size (2, 1) */
int bOrder = 2;
int * bDimSize = new int[bOrder];
bDimSize[0] = 2;
bDimSize[1] = 1;
int bUnitNum = 1;
for (int i = 0; i < bOrder; i++)
bUnitNum *= bDimSize[i];
/* a tensor of size (2, 4) */
int cOrder = 2;
int * cDimSize = new int[cOrder];
cDimSize[0] = 2;
cDimSize[1] = 4;
int cUnitNum = 1;
for (int i = 0; i < cOrder; i++)
cUnitNum *= cDimSize[i];
DTYPE aData[2][4] = { {0.0F, 1.0F, 2.0F, 3.0F},
{4.0F, 5.0F, 6.0F, 7.0F} };
DTYPE bData[2][1] = { {1.0F},
{0.0F} };
DTYPE answer[2][4] = { {1.0F, 2.0F, 3.0F, 4.0F},
{4.0F, 5.0F, 6.0F, 7.0F} };
/* CPU test */
bool cpuTest = true;
/* create tensors */
XTensor * a = NewTensor(aOrder, aDimSize);
XTensor * b = NewTensor(bOrder, bDimSize);
XTensor * c = NewTensor(cOrder, cDimSize);
/* initialize variables */
a->SetData(aData, aUnitNum);
b->SetData(bData, bUnitNum);
/* call SumByColumnTV function */
SumByColumnTV(a, b, c);
/* check results */
cpuTest = c->CheckData(answer, cUnitNum);
#ifdef USE_CUDA
/* GPU test */
bool gpuTest = true;
/* create tensor */
XTensor * aGPU = NewTensor(aOrder, aDimSize, X_FLOAT, 1.0F, 0);
XTensor * bGPU = NewTensor(bOrder, bDimSize, X_FLOAT, 1.0F, 0);
XTensor * cGPU = NewTensor(cOrder, cDimSize, X_FLOAT, 1.0F, 0);
/* Initialize variables */
aGPU->SetData(aData, aUnitNum);
bGPU->SetData(bData, bUnitNum);
cGPU->SetZeroAll();
/* call SumByColumnTV function */
SumByColumnTV(aGPU, bGPU, cGPU);
/* check results */
gpuTest = cGPU->CheckData(answer, cUnitNum);
/* destroy variables */
delete a;
delete b;
delete c;
delete aGPU;
delete bGPU;
delete cGPU;
delete[] aDimSize;
delete[] bDimSize;
delete[] cDimSize;
return cpuTest && gpuTest;
#else
/* destroy variables */
delete a;
delete b;
delete c;
delete[] aDimSize;
delete[] bDimSize;
delete[] cDimSize;
return cpuTest;
#endif // USE_CUDA
}
/*
case 2: test SumByColumnTV function
sum of a tensor and a vector (column vector) in a column by column manner
*/
bool TestSumByColumnTV2()
{
/* a tensor of size (2, 4) */
int aOrder = 2;
int * aDimSize = new int[aOrder];
aDimSize[0] = 2;
aDimSize[1] = 4;
int aUnitNum = 1;
for (int i = 0; i < aOrder; i++)
aUnitNum *= aDimSize[i];
/* a tensor of size (2, 1) */
int bOrder = 2;
int * bDimSize = new int[bOrder];
bDimSize[0] = 2;
bDimSize[1] = 1;
int bUnitNum = 1;
for (int i = 0; i < bOrder; i++)
bUnitNum *= bDimSize[i];
DTYPE aData[2][4] = { {0.0F, 1.0F, 2.0F, 3.0F},
{4.0F, 5.0F, 6.0F, 7.0F} };
DTYPE bData[2][1] = { {1.0F},
{0.0F} };
DTYPE answer[2][4] = { {1.0F, 2.0F, 3.0F, 4.0F},
{4.0F, 5.0F, 6.0F, 7.0F} };
/* CPU test */
bool cpuTest = true;
/* create tensors */
XTensor * a = NewTensor(aOrder, aDimSize);
XTensor * b = NewTensor(bOrder, bDimSize);
/* initialize variables */
a->SetData(aData, aUnitNum);
b->SetData(bData, bUnitNum);
/* call SumByColumnTV function */
SumByColumnTV(a, b);
/* check results */
cpuTest = a->CheckData(answer, aUnitNum);
#ifdef USE_CUDA
/* GPU test */
bool gpuTest = true;
/* create tensor */
XTensor * aGPU = NewTensor(aOrder, aDimSize, X_FLOAT, 1.0F, 0);
XTensor * bGPU = NewTensor(bOrder, bDimSize, X_FLOAT, 1.0F, 0);
/* Initialize variables */
aGPU->SetData(aData, aUnitNum);
bGPU->SetData(bData, bUnitNum);
/* call SumByColumnTV function */
SumByColumnTV(aGPU, bGPU);
/* check results */
gpuTest = aGPU->CheckData(answer, aUnitNum);
/* destroy variables */
delete a;
delete b;
delete aGPU;
delete bGPU;
delete[] aDimSize;
delete[] bDimSize;
return cpuTest && gpuTest;
#else
/* destroy variables */
delete a;
delete b;
delete[] aDimSize;
delete[] bDimSize;
return cpuTest;
#endif // USE_CUDA
}
/* other cases */
/*
TODO!!
*/
/* test for SumByColumnTV Function */
bool TestSumByColumnTV()
{
XPRINT(0, stdout, "[TEST SumByColumnTV] sum of a tensor and a vector (column vector) in a column by column manner \n");
bool returnFlag = true, caseFlag = true;
/* case 1 test */
caseFlag = TestSumByColumnTV1();
if (!caseFlag) {
returnFlag = false;
XPRINT(0, stdout, ">> case 1 failed!\n");
}
else
XPRINT(0, stdout, ">> case 1 passed!\n");
/* case 2 test */
caseFlag = TestSumByColumnTV2();
if (!caseFlag) {
returnFlag = false;
XPRINT(0, stdout, ">> case 2 failed!\n");
}
else
XPRINT(0, stdout, ">> case 2 passed!\n");
/* other cases test */
/*
TODO!!
*/
if (returnFlag) {
XPRINT(0, stdout, ">> All Passed!\n");
}
else
XPRINT(0, stdout, ">> Failed!\n");
XPRINT(0, stdout, "\n");
return returnFlag;
}
} // namespace nts(NiuTrans.Tensor)
/* NiuTrans.Tensor - an open-source tensor library
* Copyright (C) 2017, Natural Language Processing Lab, Northestern University.
* All rights reserved.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
/*
* $Created by: Xu Chen (email: hello_master1954@163.com) 2018-07-06
*/
#ifndef __TEST_SUMBYCOLUMNTV_H__
#define __TEST_SUMBYCOLUMNTV_H__
#include "../core/arithmetic/SumByColumnTV.h"
namespace nts { // namespace nts(NiuTrans.Tensor)
/* test for SumByColumnTV Function */
extern "C"
bool TestSumByColumnTV();
} // namespace nts(NiuTrans.Tensor)
#endif // __TEST_SUMBYCOLUMNTV_H__
/* NiuTrans.Tensor - an open-source tensor library
* Copyright (C) 2017, Natural Language Processing Lab, Northestern University.
* All rights reserved.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
/*
* $Created by: Xu Chen (email: hello_master1954@163.com) 2018-07-06
*/
#include "TSumByColumnVT.h"
namespace nts { // namespace nts(NiuTrans.Tensor)
/*
case 1: test SumByColumnVT function
sum of a vector (column vector) and a tensor in a column by column manner
*/
bool TestSumByColumnVT1()
{
/* a tensor of size (2, 1) */
int aOrder = 2;
int * aDimSize = new int[aOrder];
aDimSize[0] = 2;
aDimSize[1] = 1;
int aUnitNum = 1;
for (int i = 0; i < aOrder; i++)
aUnitNum *= aDimSize[i];
/* a tensor of size (2, 4) */
int bOrder = 2;
int * bDimSize = new int[bOrder];
bDimSize[0] = 2;
bDimSize[1] = 4;
int bUnitNum = 1;
for (int i = 0; i < bOrder; i++)
bUnitNum *= bDimSize[i];
/* a tensor of size (2, 1) */
int cOrder = 2;
int * cDimSize = new int[cOrder];
cDimSize[0] = 2;
cDimSize[1] = 1;
int cUnitNum = 1;
for (int i = 0; i < cOrder; i++)
cUnitNum *= cDimSize[i];
DTYPE aData[2][1] = { {1.0F},
{0.0F} };
DTYPE bData[2][4] = { {0.0F, 1.0F, 2.0F, 3.0F},
{4.0F, 5.0F, 6.0F, 7.0F} };
DTYPE answer[2][1] = { {7.0F},
{22.0F} };
/* CPU test */
bool cpuTest = true;
/* create tensors */
XTensor * a = NewTensor(aOrder, aDimSize);
XTensor * b = NewTensor(bOrder, bDimSize);
XTensor * c = NewTensor(cOrder, cDimSize);
/* initialize variables */
a->SetData(aData, aUnitNum);
b->SetData(bData, bUnitNum);
c->SetZeroAll();
/* call SumByColumnVT function */
SumByColumnVT(a, b, c);
/* check results */
cpuTest = c->CheckData(answer, cUnitNum);
#ifdef USE_CUDA
/* GPU test */
bool gpuTest = true;
/* create tensor */
XTensor * aGPU = NewTensor(aOrder, aDimSize, X_FLOAT, 1.0F, 0);
XTensor * bGPU = NewTensor(bOrder, bDimSize, X_FLOAT, 1.0F, 0);
XTensor * cGPU = NewTensor(cOrder, cDimSize, X_FLOAT, 1.0F, 0);
/* Initialize variables */
aGPU->SetData(aData, aUnitNum);
bGPU->SetData(bData, bUnitNum);
cGPU->SetZeroAll();
/* call SumByColumnVT function */
SumByColumnVT(aGPU, bGPU, cGPU);
/* check results */
gpuTest = cGPU->CheckData(answer, cUnitNum);
/* destroy variables */
delete a;
delete b;
delete c;
delete aGPU;
delete bGPU;
delete cGPU;
delete[] aDimSize;
delete[] bDimSize;
delete[] cDimSize;
return cpuTest && gpuTest;
#else
/* destroy variables */
delete a;
delete b;
delete c;
delete[] aDimSize;
delete[] bDimSize;
delete[] cDimSize;
return cpuTest;
#endif // USE_CUDA
}
/*
case 2: test SumByColumnVT function
sum of a vector (column vector) and a tensor in a column by column manner
*/
bool TestSumByColumnVT2()
{
/* a tensor of size (2, 1) */
int aOrder = 2;
int * aDimSize = new int[aOrder];
aDimSize[0] = 2;
aDimSize[1] = 1;
int aUnitNum = 1;
for (int i = 0; i < aOrder; i++)
aUnitNum *= aDimSize[i];
/* a tensor of size (2, 4) */
int bOrder = 2;
int * bDimSize = new int[bOrder];
bDimSize[0] = 2;
bDimSize[1] = 4;
int bUnitNum = 1;
for (int i = 0; i < bOrder; i++)
bUnitNum *= bDimSize[i];
DTYPE aData[2][1] = { {1.0F},
{0.0F} };
DTYPE bData[2][4] = { {0.0F, 1.0F, 2.0F, 3.0F},
{4.0F, 5.0F, 6.0F, 7.0F} };
DTYPE answer[2][1] = { {7.0F},
{22.0F} };
/* CPU test */
bool cpuTest = true;
/* create tensors */
XTensor * a = NewTensor(aOrder, aDimSize);
XTensor * b = NewTensor(bOrder, bDimSize);
/* initialize variables */
a->SetData(aData, aUnitNum);
b->SetData(bData, bUnitNum);
/* call SumByColumnVT function */
SumByColumnVT(a, b);
/* check results */
cpuTest = a->CheckData(answer, aUnitNum);
#ifdef USE_CUDA
/* GPU test */
bool gpuTest = true;
/* create tensor */
XTensor * aGPU = NewTensor(aOrder, aDimSize, X_FLOAT, 1.0F, 0);
XTensor * bGPU = NewTensor(bOrder, bDimSize, X_FLOAT, 1.0F, 0);
/* Initialize variables */
aGPU->SetData(aData, aUnitNum);
bGPU->SetData(bData, bUnitNum);
/* call SumByColumnVT function */
SumByColumnVT(aGPU, bGPU);
/* check results */
gpuTest = aGPU->CheckData(answer, aUnitNum);
/* destroy variables */
delete a;
delete b;
delete aGPU;
delete bGPU;
delete[] aDimSize;
delete[] bDimSize;
return cpuTest && gpuTest;
#else
/* destroy variables */
delete a;
delete b;
delete[] aDimSize;
delete[] bDimSize;
return cpuTest;
#endif // USE_CUDA
}
/* other cases */
/*
TODO!!
*/
/* test for SumByColumnVT Function */
bool TestSumByColumnVT()
{
XPRINT(0, stdout, "[TEST SumByColumnVT] sum of a vector (column vector) and a tensor in a column by column manner \n");
bool returnFlag = true, caseFlag = true;
/* case 1 test */
caseFlag = TestSumByColumnVT1();
if (!caseFlag) {
returnFlag = false;
XPRINT(0, stdout, ">> case 1 failed!\n");
}
else
XPRINT(0, stdout, ">> case 1 passed!\n");
/* case 2 test */
caseFlag = TestSumByColumnVT2();
if (!caseFlag) {
returnFlag = false;
XPRINT(0, stdout, ">> case 2 failed!\n");
}
else
XPRINT(0, stdout, ">> case 2 passed!\n");
/* other cases test */
/*
TODO!!
*/
if (returnFlag) {
XPRINT(0, stdout, ">> All Passed!\n");
}
else
XPRINT(0, stdout, ">> Failed!\n");
XPRINT(0, stdout, "\n");
return returnFlag;
}
} // namespace nts(NiuTrans.Tensor)
/* NiuTrans.Tensor - an open-source tensor library
* Copyright (C) 2017, Natural Language Processing Lab, Northestern University.
* All rights reserved.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
/*
* $Created by: Xu Chen (email: hello_master1954@163.com) 2018-07-06
*/
#ifndef __TEST_SUMBYCOLUMNVT_H__
#define __TEST_SUMBYCOLUMNVT_H__
#include "../core/arithmetic/SumByColumnVT.h"
namespace nts { // namespace nts(NiuTrans.Tensor)
/* test for SumByColumnVT Function */
extern "C"
bool TestSumByColumnVT();
} // namespace nts(NiuTrans.Tensor)
#endif // __TEST_SUMBYCOLUMNVT_H__
/* NiuTrans.Tensor - an open-source tensor library
* Copyright (C) 2017, Natural Language Processing Lab, Northestern University.
* All rights reserved.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
/*
* $Created by: Xu Chen (email: hello_master1954@163.com) 2018-06-27
*/
#include "TTopK.h"
namespace nts { // namespace nts(NiuTrans.Tensor)
/*
case 1: get the top-k items along a given dimension.
In this case,
(2, 4) -> (2, 4), dim = 0, k = 2
(2, 4) -> (2, 4), dim = 1, k = 4
*/
bool TestTopK1()
{
/* a input tensor of size (2, 4) */
int sOrder = 2;
int * sDimSize = new int[sOrder];
sDimSize[0] = 2;
sDimSize[1] = 4;
int sUnitNum = 1;
for (int i = 0; i < sOrder; i++)
sUnitNum *= sDimSize[i];
/* a output tensor of size (2, 4) */
int tOrder = 2;
int * tDimSize = new int[tOrder];
tDimSize[0] = 2;
tDimSize[1] = 4;
int tUnitNum = 1;
for (int i = 0; i < tOrder; i++)
tUnitNum *= tDimSize[i];
DTYPE sData[2][4] = { {5.0F, 1.0F, 2.0F, 8.0F},
{4.0F, 3.0F, 7.0F, 6.0F} };
DTYPE tAnswer1[2][4] = { {5.0F, 3.0F, 7.0F, 8.0F},
{4.0F, 1.0F, 2.0F, 6.0F} };
int indexAnswer1[2][4] = { {0, 1, 1, 0},
{1, 0, 0, 1} };
DTYPE tAnswer2[2][4] = { {8.0F, 5.0F, 2.0F, 1.0F},
{7.0F, 6.0F, 4.0F, 3.0F} };
int indexAnswer2[2][4] = { {3, 0, 2, 1},
{2, 3, 0, 1} };
/* CPU test */
bool cpuTest = true;
/* create tensors */
XTensor * s = NewTensor(sOrder, sDimSize);
XTensor * t1 = NewTensor(tOrder, tDimSize);
XTensor * t2 = NewTensor(tOrder, tDimSize);
XTensor * index1 = NewTensor(tOrder, tDimSize, X_INT);
XTensor * index2 = NewTensor(tOrder, tDimSize, X_INT);
/* initialize variables */
s->SetData(sData, sUnitNum);
t1->SetZeroAll();
t2->SetZeroAll();
index1->SetZeroAll();
index2->SetZeroAll();
/* call TopK function */
int dim = 0;
int k = sDimSize[dim];
TopK(s, t1, index1, dim, k);
dim = 1;
k = sDimSize[dim];
TopK(s, t2, index2, dim, k);
/* check results */
cpuTest = t1->CheckData(tAnswer1, tUnitNum) &&
t2->CheckData(tAnswer2, tUnitNum) &&
index1->CheckData(indexAnswer1, tUnitNum) &&
index2->CheckData(indexAnswer2, tUnitNum);
#ifdef USE_CUDA
/* GPU test */
bool gpuTest = true;
/* create tensors */
XTensor * sGPU = NewTensor(sOrder, sDimSize, X_FLOAT, 1.0F, 0);
XTensor * tGPU1 = NewTensor(tOrder, tDimSize, X_FLOAT, 1.0F, 0);
XTensor * tGPU2 = NewTensor(tOrder, tDimSize, X_FLOAT, 1.0F, 0);
XTensor * indexGPU1 = NewTensor(tOrder, tDimSize, X_INT, 1.0F, 0);
XTensor * indexGPU2 = NewTensor(tOrder, tDimSize, X_INT, 1.0F, 0);
/* initialize variables */
sGPU->SetData(sData, sUnitNum);
tGPU1->SetZeroAll();
tGPU2->SetZeroAll();
indexGPU1->SetZeroAll();
indexGPU2->SetZeroAll();
/* call TopK function */
dim = 0;
k = sDimSize[dim];
TopK(sGPU, tGPU1, indexGPU1, dim, k);
dim = 1;
k = sDimSize[dim];
TopK(sGPU, tGPU2, indexGPU2, dim, k);
/* check results */
gpuTest = tGPU1->CheckData(tAnswer1, tUnitNum) &&
tGPU2->CheckData(tAnswer2, tUnitNum) &&
indexGPU1->CheckData(indexAnswer1, tUnitNum) &&
indexGPU2->CheckData(indexAnswer2, tUnitNum);
/* destroy variables */
delete s;
delete t1;
delete t2;
delete index1;
delete index2;
delete sGPU;
delete tGPU1;
delete tGPU2;
delete indexGPU1;
delete indexGPU2;
delete[] sDimSize;
delete[] tDimSize;
return cpuTest && gpuTest;
#else
/* destroy variables */
delete s;
delete t1;
delete t2;
delete index1;
delete index2;
delete[] sDimSize;
delete[] tDimSize;
return cpuTest;
#endif // USE_CUDA
}
/*
case 2: get the top-k items along a given dimension.
In this case, (2, 4) -> (2, 2), dim = 1, k = 2.
*/
bool TestTopK2()
{
/* a input tensor of size (2, 4) */
int sOrder = 2;
int * sDimSize = new int[sOrder];
sDimSize[0] = 2;
sDimSize[1] = 4;
int sUnitNum = 1;
for (int i = 0; i < sOrder; i++)
sUnitNum *= sDimSize[i];
/* a output tensor of size (2, 2) */
int tOrder = 2;
int * tDimSize = new int[tOrder];
tDimSize[0] = 2;
tDimSize[1] = 2;
int tUnitNum = 1;
for (int i = 0; i < tOrder; i++)
tUnitNum *= tDimSize[i];
DTYPE sData[2][4] = { {5.0F, 1.0F, 2.0F, 8.0F},
{4.0F, 3.0F, 7.0F, 6.0F} };
DTYPE tAnswer[2][2] = { {8.0F, 5.0F},
{7.0F, 6.0F} };
int indexAnswer[2][2] = { {3, 0},
{2, 3} };
/* CPU test */
bool cpuTest = true;
/* create tensors */
XTensor * s = NewTensor(sOrder, sDimSize);
XTensor * t = NewTensor(tOrder, tDimSize);
XTensor * index = NewTensor(tOrder, tDimSize, X_INT);
/* initialize variables */
s->SetData(sData, sUnitNum);
t->SetZeroAll();
index->SetZeroAll();
/* call TopK function */
int dim = 1;
int k = tDimSize[dim];
TopK(s, t, index, dim, k);
/* check results */
cpuTest = t->CheckData(tAnswer, tUnitNum) && index->CheckData(indexAnswer, tUnitNum);
#ifdef USE_CUDA
/* GPU test */
bool gpuTest = true;
/* create tensors */
XTensor * sGPU = NewTensor(sOrder, sDimSize, X_FLOAT, 1.0F, 0);
XTensor * tGPU = NewTensor(tOrder, tDimSize, X_FLOAT, 1.0F, 0);
XTensor * indexGPU = NewTensor(tOrder, tDimSize, X_INT, 1.0F, 0);
/* initialize variables */
sGPU->SetData(sData, sUnitNum);
tGPU->SetZeroAll();
indexGPU->SetZeroAll();
/* call TopK function */
dim = 1;
k = tDimSize[dim];
TopK(sGPU, tGPU, indexGPU, dim, k);
/* check results */
gpuTest = tGPU->CheckData(tAnswer, tUnitNum) && indexGPU->CheckData(indexAnswer, tUnitNum);
/* destroy variables */
delete s;
delete t;
delete index;
delete sGPU;
delete tGPU;
delete indexGPU;
delete[] sDimSize;
delete[] tDimSize;
return cpuTest && gpuTest;
#else
/* destroy variables */
delete s;
delete t;
delete index;
delete[] sDimSize;
delete[] tDimSize;
return cpuTest;
#endif // USE_CUDA
}
/* other cases */
/*
TODO!!
*/
/* test for TopK Function */
bool TestTopK()
{
XPRINT(0, stdout, "[TEST TopK] get the top-k items along a given dimension\n");
bool returnFlag = true, caseFlag = true;
/* case 1 test */
caseFlag = TestTopK1();
if (!caseFlag) {
returnFlag = false;
XPRINT(0, stdout, ">> case 1 failed!\n");
}
else
XPRINT(0, stdout, ">> case 1 passed!\n");
/* case 2 test */
caseFlag = TestTopK2();
if (!caseFlag) {
returnFlag = false;
XPRINT(0, stdout, ">> case 2 failed!\n");
}
else
XPRINT(0, stdout, ">> case 2 passed!\n");
/* other cases test */
/*
TODO!!
*/
if (returnFlag) {
XPRINT(0, stdout, ">> All Passed!\n");
}
else
XPRINT(0, stdout, ">> Failed!\n");
XPRINT(0, stdout, "\n");
return returnFlag;
}
} // namespace nts(NiuTrans.Tensor)
/* NiuTrans.Tensor - an open-source tensor library
* Copyright (C) 2017, Natural Language Processing Lab, Northestern University.
* All rights reserved.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
/*
* $Created by: Xu Chen (email: hello_master1954@163.com) 2018-06-27
*/
#ifndef __TEST_TOPK_H__
#define __TEST_TOPK_H__
#include "../core/sort/TopK.h"
namespace nts { // namespace nts(NiuTrans.Tensor)
/* test for TopK Function */
extern "C"
bool TestTopK();
} // namespace nts(NiuTrans.Tensor)
#endif // __TEST_TOPK_H__
/* NiuTrans.Tensor - an open-source tensor library
* Copyright (C) 2017, Natural Language Processing Lab, Northestern University.
* All rights reserved.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
/*
* $Created by: Xu Chen (email: hello_master1954@163.com) 2018-06-13
*/
#include "../XList.h"
#include "TUnsqueeze.h"
namespace nts { // namespace nts(NiuTrans.Tensor)
/*
case 1: insert a dimension by copying the blocks for x times (where x is the size of the inerted dimension)
In this case,
(2, 3) -> (2, 2, 3), dim=1, dSize=2
(2, 3) -> (2, 3, 2), dim=2, dSize=2
*/
bool TestUnsqueeze1()
{
/* a source tensor of size (2, 3) */
int sOrder = 2;
int * sDimSize = new int[sOrder];
sDimSize[0] = 2;
sDimSize[1] = 3;
int sUnitNum = 1;
for (int i = 0; i < sOrder; i++)
sUnitNum *= sDimSize[i];
/* a target tensor of size (2, 2, 3) */
int tOrder1 = 3;
int * tDimSize1 = new int[tOrder1];
tDimSize1[0] = 2;
tDimSize1[1] = 2;
tDimSize1[2] = 3;
int tUnitNum1 = 1;
for (int i = 0; i < tOrder1; i++)
tUnitNum1 *= tDimSize1[i];
/* a target tensor of size (2, 3, 2) */
int tOrder2 = 3;
int * tDimSize2 = new int[tOrder2];
tDimSize2[0] = 2;
tDimSize2[1] = 3;
tDimSize2[2] = 2;
int tUnitNum2 = 1;
for (int i = 0; i < tOrder2; i++)
tUnitNum2 *= tDimSize2[i];
DTYPE sData[2][3] = { {0.0F, 1.0F, 2.0F},
{3.0F, 4.0F, 5.0F} };
DTYPE answer1[2][2][3] = { { {0.0F, 1.0F, 2.0F},
{0.0F, 1.0F, 2.0F} },
{ {3.0F, 4.0F, 5.0F},
{3.0F, 4.0F, 5.0F} } };
DTYPE answer2[2][3][2] = { { {0.0F, 0.0F},
{1.0F, 1.0F},
{2.0F, 2.0F} },
{ {3.0F, 3.0F},
{4.0F, 4.0F},
{5.0F, 5.0F} } };
/* CPU test */
bool cpuTest = true;
/* create tensors */
XTensor * s = NewTensor(sOrder, sDimSize);
XTensor * t1 = NewTensor(tOrder1, tDimSize1);
XTensor * t2 = NewTensor(tOrder2, tDimSize2);
/* initialize variables */
s->SetData(sData, sUnitNum);
t1->SetZeroAll();
t2->SetZeroAll();
/* call Unsqueeze function */
Unsqueeze(s, t1, 1, 2);
Unsqueeze(s, t2, 2, 2);
/* check results */
cpuTest = t1->CheckData(answer1, tUnitNum1) && t2->CheckData(answer2, tUnitNum2);
#ifdef USE_CUDA
/* GPU test */
bool gpuTest = true;
/* create tensor */
XTensor * sGPU = NewTensor(sOrder, sDimSize, X_FLOAT, 1.0F, 0);
XTensor * tGPU1 = NewTensor(tOrder1, tDimSize1, X_FLOAT, 1.0F, 0);
XTensor * tGPU2 = NewTensor(tOrder2, tDimSize2, X_FLOAT, 1.0F, 0);
/* Initialize variables */
sGPU->SetData(sData, sUnitNum);
tGPU1->SetZeroAll();
tGPU2->SetZeroAll();
/* call Unsqueeze function */
Unsqueeze(sGPU, tGPU1, 1, 2);
Unsqueeze(sGPU, tGPU2, 2, 2);
/* check results */
gpuTest = tGPU1->CheckData(answer1, tUnitNum1) && tGPU2->CheckData(answer2, tUnitNum2);
/* destroy variables */
delete s;
delete t1;
delete t2;
delete sGPU;
delete tGPU1;
delete tGPU2;
delete[] sDimSize;
delete[] tDimSize1;
delete[] tDimSize2;
return cpuTest && gpuTest;
#else
/* destroy variables */
delete s;
delete t1;
delete t2;
delete[] sDimSize;
delete[] tDimSize1;
delete[] tDimSize2;
return cpuTest;
#endif // USE_CUDA
}
/* other cases */
/*
TODO!!
*/
/* test for Unsqueeze Function */
bool TestUnsqueeze()
{
XPRINT(0, stdout, "[TEST Unsqueeze] insert a dimension by copying the blocks for x times\n");
bool returnFlag = true, caseFlag = true;
/* case 1 test */
caseFlag = TestUnsqueeze1();
if (!caseFlag) {
returnFlag = false;
XPRINT(0, stdout, ">> case 1 failed!\n");
}
else
XPRINT(0, stdout, ">> case 1 passed!\n");
/* other cases test */
/*
TODO!!
*/
if (returnFlag) {
XPRINT(0, stdout, ">> All Passed!\n");
}
else
XPRINT(0, stdout, ">> Failed!\n");
XPRINT(0, stdout, "\n");
return returnFlag;
}
} // namespace nts(NiuTrans.Tensor)
/* NiuTrans.Tensor - an open-source tensor library
* Copyright (C) 2017, Natural Language Processing Lab, Northestern University.
* All rights reserved.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
/*
* $Created by: Xu Chen (email: hello_master1954@163.com) 2018-06-27
*/
#ifndef __TEST_UNSQUEEZE_H__
#define __TEST_UNSQUEEZE_H__
#include "../core/shape/Unsqueeze.h"
namespace nts { // namespace nts(NiuTrans.Tensor)
/* test for Unsqueeze Function */
extern "C"
bool TestUnsqueeze();
} // namespace nts(NiuTrans.Tensor)
#endif // __TEST_UNSQUEEZE_H__
...@@ -19,14 +19,13 @@ ...@@ -19,14 +19,13 @@
* $Created by: XIAO Tong (xiaotong@mail.neu.edu.cn) 2018-6-24 * $Created by: XIAO Tong (xiaotong@mail.neu.edu.cn) 2018-6-24
*/ */
#include "TXMem.h"
#include "../XGlobal.h" #include "../XGlobal.h"
#include "../XUtility.h" #include "../XUtility.h"
#include "../XMem.h" #include "TXMem.h"
/* the nts (NiuTrans.Tensor) namespace */ namespace nts{ // namespace nts(NiuTrans.Tensor)
namespace nts{
/* case 1: test memory pool class */
bool TestXMemCase1() bool TestXMemCase1()
{ {
bool ok = true; bool ok = true;
...@@ -83,6 +82,7 @@ bool TestXMemCase1() ...@@ -83,6 +82,7 @@ bool TestXMemCase1()
return ok; return ok;
} }
/* test for memory pool class */
bool TestXMem() bool TestXMem()
{ {
XPRINT(0, stdout, "[Test] Memory pool ... Began\n"); XPRINT(0, stdout, "[Test] Memory pool ... Began\n");
...@@ -93,11 +93,18 @@ bool TestXMem() ...@@ -93,11 +93,18 @@ bool TestXMem()
/* case 1 test */ /* case 1 test */
caseFlag = TestXMemCase1(); caseFlag = TestXMemCase1();
if (!caseFlag) { returnFlag = false; XPRINT(0, stdout, ">> case 1 failed!\n"); } if (!caseFlag) {
else {XPRINT(0, stdout, ">> case 1 passed!\n");} returnFlag = false;
XPRINT(0, stdout, ">> case 1 failed!\n");
}
else
XPRINT(0, stdout, ">> case 1 passed!\n");
if (returnFlag) { XPRINT(0, stdout, ">> All Passed!\n"); } if (returnFlag) {
else { XPRINT(0, stdout, ">> Failed!\n"); } XPRINT(0, stdout, ">> All Passed!\n");
}
else
XPRINT(0, stdout, ">> Failed!\n");
double endT = GetClock(); double endT = GetClock();
...@@ -106,4 +113,4 @@ bool TestXMem() ...@@ -106,4 +113,4 @@ bool TestXMem()
return returnFlag; return returnFlag;
} }
} /* end of the nts (NiuTrans.Tensor) namespace */ } // namespace nts(NiuTrans.Tensor)
\ No newline at end of file
...@@ -22,13 +22,13 @@ ...@@ -22,13 +22,13 @@
#ifndef __TXMEM_H__ #ifndef __TXMEM_H__
#define __TXMEM_H__ #define __TXMEM_H__
/* the nts (NiuTrans.Tensor) namespace */ #include "../XMem.h"
namespace nts{
namespace nts{ // namespace nts(NiuTrans.Tensor)
/* test for memory pool class */ /* test for memory pool class */
extern "C" extern "C"
bool TestXMem(); bool TestXMem();
} /* end of the nts (NiuTrans.Tensor) namespace */ } // namespace nts(NiuTrans.Tensor)
#endif // __TXMEM_H__
#endif
...@@ -31,26 +31,42 @@ bool Test() ...@@ -31,26 +31,42 @@ bool Test()
wrong = !TestConcatenate() || wrong; wrong = !TestConcatenate() || wrong;
wrong = !TestConcatenateSolely() || wrong; wrong = !TestConcatenateSolely() || wrong;
//wrong = !TestCopyIndexed() || wrong;
wrong = !TestCopyValues() || wrong;
wrong = !TestMatrixMul() || wrong; wrong = !TestMatrixMul() || wrong;
wrong = !TestMatrixMul2D() || wrong; wrong = !TestMatrixMul2D() || wrong;
wrong = !TestMatrixMul2DParallel() || wrong;
//wrong = !TestMatrixMulBatched() || wrong;
wrong = !TestMatrixMulBatchedCPU() || wrong; wrong = !TestMatrixMulBatchedCPU() || wrong;
wrong = !TestMerge() || wrong; wrong = !TestMerge() || wrong;
wrong = !TestMultiply() || wrong; wrong = !TestMultiply() || wrong;
wrong = !TestNegate() || wrong; wrong = !TestNegate() || wrong;
wrong = !TestNormalize() || wrong; wrong = !TestNormalize() || wrong;
//wrong = !TestPower() || wrong; wrong = !TestPower() || wrong;
wrong = !TestReduceMax() || wrong; wrong = !TestReduceMax() || wrong;
wrong = !TestReduceMean() || wrong; wrong = !TestReduceMean() || wrong;
wrong = !TestReduceSum() || wrong; wrong = !TestReduceSum() || wrong;
wrong = !TestReduceSumSquared() || wrong;
wrong = !TestReduceVariance() || wrong;
wrong = !TestScaleAndShift() || wrong;
wrong = !TestSelect() || wrong;
wrong = !TestSetAscendingOrder() || wrong;
wrong = !TestSetData() || wrong;
wrong = !TestSort() || wrong; wrong = !TestSort() || wrong;
wrong = !TestSplit() || wrong; wrong = !TestSplit() || wrong;
wrong = !TestSum() || wrong; wrong = !TestSum() || wrong;
wrong = !TestSumByColumnTV || wrong;
//wrong = !TestSumByColumnVT() || wrong;
wrong = !TestTopK() || wrong;
wrong = !TestUnsqueeze() || wrong;
wrong = !TestXMem() || wrong; wrong = !TestXMem() || wrong;
//wrong = !TestHardTanH() || wrong; //wrong = !TestHardTanH() || wrong;
//wrong = !TestIdentity() || wrong;
//wrong = !TestLogSoftmax() || wrong;
//wrong = !TestLoss() || wrong; //wrong = !TestLoss() || wrong;
//wrong = !TestRectify() || wrong; //wrong = !TestRectify() || wrong;
wrong = !TestSigmoid() || wrong; //wrong = !TestSigmoid() || wrong;
//wrong = !TestSoftmax() || wrong; //wrong = !TestSoftmax() || wrong;
/* other test */ /* other test */
......
...@@ -24,8 +24,12 @@ ...@@ -24,8 +24,12 @@
#include "TConcatenate.h" #include "TConcatenate.h"
#include "TConcatenateSolely.h" #include "TConcatenateSolely.h"
#include "TCopyIndexed.h"
#include "TCopyValues.h"
#include "TMatrixMul.h" #include "TMatrixMul.h"
#include "TMatrixMul2D.h" #include "TMatrixMul2D.h"
#include "TMatrixMul2DParallel.h"
#include "TMatrixMulBatched.h"
#include "TMatrixMULBatchedCPU.h" #include "TMatrixMULBatchedCPU.h"
#include "TMerge.h" #include "TMerge.h"
#include "TMultiply.h" #include "TMultiply.h"
...@@ -35,12 +39,24 @@ ...@@ -35,12 +39,24 @@
#include "TReduceMax.h" #include "TReduceMax.h"
#include "TReduceMean.h" #include "TReduceMean.h"
#include "TReduceSum.h" #include "TReduceSum.h"
#include "TReduceSumSquared.h"
#include "TReduceVariance.h"
#include "TScaleAndShift.h"
#include "TSelect.h"
#include "TSetAscendingOrder.h"
#include "TSetData.h"
#include "TSort.h" #include "TSort.h"
#include "TSplit.h" #include "TSplit.h"
#include "TSum.h" #include "TSum.h"
#include "TSumByColumnTV.h"
#include "TSumByColumnVT.h"
#include "TTopK.h"
#include "TUnsqueeze.h"
#include "TXMem.h" #include "TXMem.h"
#include "THardTanH.h" #include "THardTanH.h"
#include "TIdentity.h"
#include "TLogSoftmax.h"
#include "TLoss.h" #include "TLoss.h"
#include "TRectify.h" #include "TRectify.h"
#include "TSigmoid.h" #include "TSigmoid.h"
......
Markdown 格式
0%
您添加了 0 到此讨论。请谨慎行事。
请先完成此评论的编辑!
注册 或者 后发表评论