Commit 906eebb7 by xuchen

merge with xuchen branch

parents 7283b3dc b3a76184
...@@ -70,7 +70,7 @@ void XLossGrad::Compute(XTensor * gold, XTensor * y, ...@@ -70,7 +70,7 @@ void XLossGrad::Compute(XTensor * gold, XTensor * y,
XTensor * dedy, XTensor * dedy,
LOSS_FUNCTION_NAME lossName) LOSS_FUNCTION_NAME lossName)
{ {
LossBackward(dedy, gold, y, lossName); _LossBackward(dedy, gold, y, lossName);
} }
} }
\ No newline at end of file
...@@ -328,6 +328,37 @@ void XLink::MakeLink(const XList * list, XTensor * h, int id) ...@@ -328,6 +328,37 @@ void XLink::MakeLink(const XList * list, XTensor * h, int id)
} }
/* /*
create a hyper edge with a input tensors and a list of output tensors
>> h - a input tensor
>> list - a list of output tensors
>> id - id of the edge type
*/
void XLink::MakeLink(XTensor * t, XList * list, int id)
{
/* forward */
for(int i = 0; i < list->count; i++){
XTensor * h = (XTensor*)list->GetItem(i);
if(h == NULL)
continue;
XLink &income = h->income;
income.Reset();
income.SetHead(h);
income.SetType(id);
income.AddTail(t);
}
/* backward */
XLink &outgo = t->outgo;
CheckNTErrors(outgo.head == NULL || outgo.head == t, "Wrong head of the hyperedge!");
for(int i = 0; i < list->count; i++){
XTensor * t = (XTensor*)list->GetItem(i);
if(t == NULL)
continue;
outgo.AddTail(t);
}
}
/*
add parameters add parameters
>> h - head >> h - head
>> param - parameter we want introduce >> param - parameter we want introduce
......
...@@ -139,6 +139,10 @@ struct XLink ...@@ -139,6 +139,10 @@ struct XLink
static static
void MakeLink(const XList * list, XTensor * h, int id); void MakeLink(const XList * list, XTensor * h, int id);
/* create a hyper edge with a input tensors and a list of output tensors */
static
void MakeLink(XTensor * h, XList * list, int id);
/* add a parameter */ /* add a parameter */
static static
void AddParamToHead(XTensor * h, DTYPE param); void AddParamToHead(XTensor * h, DTYPE param);
......
...@@ -26,76 +26,80 @@ namespace nts { // namespace nts(NiuTrans.Tensor) ...@@ -26,76 +26,80 @@ namespace nts { // namespace nts(NiuTrans.Tensor)
/* get operator name */ /* get operator name */
const char * GetOPName(int type) const char * GetOPName(int type)
{ {
if((type & MATH_BASE) != 0){ if ((type & MATH_BASE) != 0){
if(type == MATH_ABSOLUTE) if (type == MATH_ABSOLUTE)
return "M_ABSOLUTE"; return "M_ABSOLUTE";
else if(type == MATH_MATRIXMUL) else if (type == MATH_MATRIXMUL)
return "M_MATRIXMUL"; return "M_MATRIXMUL";
else if(type == MATH_MATRIXMULBATCHED) else if (type == MATH_MATRIXMULBATCHED)
return "M_MATRIXMULBATCHED"; return "M_MATRIXMULBATCHED";
else if(type == MATH_MULTIPLY) else if (type == MATH_MULTIPLY)
return "M_MULTIPLY"; return "M_MULTIPLY";
else if(type == MATH_NEGATE) else if (type == MATH_NEGATE)
return "M_NEGATE"; return "M_NEGATE";
else if(type == MATH_SIGN) else if (type == MATH_SIGN)
return "M_SIGN"; return "M_SIGN";
else if(type == MATH_SUM) else if (type == MATH_SUM)
return "M_SUM"; return "M_SUM";
else if(type == MATH_LOG) else if (type == MATH_LOG)
return "M_NORMALIZE";
else if(type == MATH_NORMALIZE)
return "M_LOG"; return "M_LOG";
else if(type == MATH_POWER) else if (type == MATH_NORMALIZE)
return "M_NORMALIZE";
else if (type == MATH_POWER)
return "M_POWER"; return "M_POWER";
else if(type == MATH_SCALEANDSHIFT) else if (type == MATH_SCALEANDSHIFT)
return "M_SCALEANDSHIFT"; return "M_SCALEANDSHIFT";
else if(type == REDUCE_REDUCEMAX) else if (type == REDUCE_REDUCEMAX)
return "R_REDUCEMAX"; return "R_REDUCEMAX";
else if(type == REDUCE_REDUCEMEAN) else if (type == REDUCE_REDUCEMEAN)
return "R_REDUCEMEAN"; return "R_REDUCEMEAN";
else if(type == REDUCE_REDUCESUM) else if (type == REDUCE_REDUCESUM)
return "R_REDUCESUM"; return "R_REDUCESUM";
else if(type == REDUCE_REDUCESUMSQUARED) else if (type == REDUCE_REDUCESUMSQUARED)
return "R_REDUCESUMSQUARED"; return "R_REDUCESUMSQUARED";
else if(type == REDUCE_REDUCEVARIANCE) else if (type == REDUCE_REDUCEVARIANCE)
return "R_REDUCEVARIANCE"; return "R_REDUCEVARIANCE";
} }
else if((type & DATA_BASE) != 0){ else if ((type & DATA_BASE) != 0){
if(type == GETANDSET_SELECT) if (type == GETANDSET_SELECT)
return "G_SELECT"; return "G_SELECT";
else if(type == MOVEMENT_COPYINDEXED) else if (type == MOVEMENT_COPYINDEXED)
return "M_COPYINDEXED"; return "M_COPYINDEXED";
else if(type == MOVEMENT_COPYVALUES) else if (type == MOVEMENT_COPYVALUES)
return "M_COPYVALUES"; return "M_COPYVALUES";
else if(type == SHAPE_CONCATENATE) else if (type == SHAPE_CONCATENATE)
return "S_CONCATENATE"; return "S_CONCATENATE";
else if(type == SHAPE_MERGE) else if (type == SHAPE_MERGE)
return "S_MERGE"; return "S_MERGE";
else if(type == SHAPE_MERGE_LIST) else if (type == SHAPE_MERGE_LIST)
return "S_MERGE_LIST"; return "S_MERGE_LIST";
else if(type == SHAPE_PERMUTE) else if (type == SHAPE_PERMUTE)
return "S_PERMUTE"; return "S_PERMUTE";
else if(type == SHAPE_SPLIT) else if (type == SHAPE_SPLIT)
return "S_SPLIT"; return "S_SPLIT";
else if(type == SHAPE_SPLIT_LIST) else if (type == SHAPE_SPLIT_LIST)
return "S_SPLIT_LIST"; return "S_SPLIT_LIST";
else if(type == SHAPE_TRANSPOSE) else if (type == SHAPE_TRANSPOSE)
return "S_TRANSPOSE"; return "S_TRANSPOSE";
else if(type == SHAPE_UNSQUEEZE) else if (type == SHAPE_UNSQUEEZE)
return "S_UNSQUEEZE"; return "S_UNSQUEEZE";
else if (type == SORT_SORT)
return "S_SORT";
else if (type == SORT_TOPK)
return "S_TOPK";
} }
else if((type & FUNCTION_BASE) != 0){ else if ((type & FUNCTION_BASE) != 0){
if(type == FUNC_HARDTANH) if (type == FUNC_HARDTANH)
return "F_HARDTANH"; return "F_HARDTANH";
else if(type == FUNC_IDENTITY) else if (type == FUNC_IDENTITY)
return "F_IDENTITY"; return "F_IDENTITY";
else if(type == FUNC_LOGSOFTMAX) else if (type == FUNC_LOGSOFTMAX)
return "F_LOGSOFTMAX"; return "F_LOGSOFTMAX";
else if(type == FUNC_RECTIFY) else if (type == FUNC_RECTIFY)
return "F_RECTIFY"; return "F_RECTIFY";
else if(type == FUNC_SIGMOID) else if (type == FUNC_SIGMOID)
return "F_SIGMOID"; return "F_SIGMOID";
else if(type == FUNC_SOFTMAX) else if (type == FUNC_SOFTMAX)
return "F_SOFTMAX"; return "F_SOFTMAX";
} }
......
...@@ -69,6 +69,10 @@ namespace nts { // namespace nts(NiuTrans.Tensor) ...@@ -69,6 +69,10 @@ namespace nts { // namespace nts(NiuTrans.Tensor)
#define SHAPE_TRANSPOSE SHAPE_SPLIT_LIST + 1 #define SHAPE_TRANSPOSE SHAPE_SPLIT_LIST + 1
#define SHAPE_UNSQUEEZE SHAPE_TRANSPOSE + 1 #define SHAPE_UNSQUEEZE SHAPE_TRANSPOSE + 1
#define SORT SHAPE_UNSQUEEZE + 1
#define SORT_SORT SORT + 1
#define SORT_TOPK SORT_SORT + 1
/* activation functions */ /* activation functions */
#define FUNCTION_BASE DATA_BASE * 2 #define FUNCTION_BASE DATA_BASE * 2
#define FUNC_HARDTANH FUNCTION_BASE + 1 #define FUNC_HARDTANH FUNCTION_BASE + 1
......
...@@ -1121,7 +1121,7 @@ bool XTensor::Resize(const int myOrder, const int * myDimSize, ...@@ -1121,7 +1121,7 @@ bool XTensor::Resize(const int myOrder, const int * myDimSize,
if(isSparse){ if(isSparse){
/* /*
for sparse matrices, we use a list of tuple (key, value), for sparse matrices, we use a list of tuple (key, value),
ordered by key. Take a (2-dimensional) matrix as an examples, ordered by key. Take a (2-dimensional) matrix as an example,
we have key = m * i + j; we have key = m * i + j;
The data array is The data array is
--------- ---------
......
...@@ -486,8 +486,9 @@ quick sorting ...@@ -486,8 +486,9 @@ quick sorting
NOTE: this means that the items may not placed in a continuous memory space NOTE: this means that the items may not placed in a continuous memory space
>> comp - the comparison function >> comp - the comparison function
*/ */
void XQSort(void * data, void * index, int num, int width, int stride, int (*comp)(const void *, const void *)) void XQSort(void * dataA, void * dataB, void * index, int num, int width, int stride, int (*comp)(const void *, const void *))
{ {
XMemCopy(dataB, -1, dataA, -1, num * width);
char *lo, *hi; // ends of sub-array currently sorting char *lo, *hi; // ends of sub-array currently sorting
int *indexlo, *indexhi; int *indexlo, *indexhi;
char *mid; // points to middle of subarray char *mid; // points to middle of subarray
...@@ -506,8 +507,8 @@ void XQSort(void * data, void * index, int num, int width, int stride, int (*com ...@@ -506,8 +507,8 @@ void XQSort(void * data, void * index, int num, int width, int stride, int (*com
stackptr = 0; stackptr = 0;
lo = (char*)data; lo = (char*)dataB;
hi = (char*)data + realStride * (num - 1); hi = (char*)dataB + realStride * (num - 1);
indexlo = (int*)index; indexlo = (int*)index;
indexhi = index != NULL ? (int*)index + stride * (num - 1) : NULL; indexhi = index != NULL ? (int*)index + stride * (num - 1) : NULL;
......
...@@ -53,7 +53,7 @@ extern void XSleep(int sleepTime); ...@@ -53,7 +53,7 @@ extern void XSleep(int sleepTime);
extern double GetClock(); extern double GetClock();
extern double GetClockSec(); extern double GetClockSec();
extern void XQSort(void * data, void * index, int num, int width, int stride, int (*comp)(const void *, const void *)); extern void XQSort(void * dataA, void * dataB, void * index, int num, int width, int stride, int (*comp)(const void *, const void *));
extern int CompXFloat(const void * a, const void * b); extern int CompXFloat(const void * a, const void * b);
#ifdef USE_CUDA #ifdef USE_CUDA
......
...@@ -21,6 +21,7 @@ ...@@ -21,6 +21,7 @@
#include <math.h> #include <math.h>
#include "../../XTensor.h" #include "../../XTensor.h"
#include "../../XName.h"
#include "Absolute.h" #include "Absolute.h"
#include "Absolute.cuh" #include "Absolute.cuh"
...@@ -28,21 +29,54 @@ namespace nts { // namespace nts(NiuTrans.Tensor) ...@@ -28,21 +29,54 @@ namespace nts { // namespace nts(NiuTrans.Tensor)
/* /*
set every entry to its absolute value set every entry to its absolute value
>> a - the tensor we are processing >> a - input tensor we are processing
>> b - output tensor we are processing
*/ */
void _Absolute(XTensor * a) void _Absolute(const XTensor * a, XTensor * b)
{ {
#ifdef USE_CUDA #ifdef USE_CUDA
/* run it on GPUs */ /* run it on GPUs */
if (a->devID >= 0) { if (a->devID >= 0) {
_CudaAbsolute(a); _CudaAbsolute(a, b);
return; return;
} }
#endif #endif
CheckNTErrors((XTensor::IsIdentical(a, b)), "Input tensors should have the same type!");
CheckNTErrors((a->dataType == DEFAULT_DTYPE), "TODO!"); CheckNTErrors((a->dataType == DEFAULT_DTYPE), "TODO!");
DTYPE * d = (DTYPE*)a->data; DTYPE * d = (DTYPE*)a->data;
DTYPE * db = (DTYPE*)b->data;
for (int i = 0; i < a->unitNum; i++) for (int i = 0; i < a->unitNum; i++)
d[i] = (DTYPE)fabs(d[i]); db[i] = (DTYPE)fabs(d[i]);
}
/*
set every entry to its absolute value (do it on site)
keep the result in the input tensor a and return nothing
>> a - the tensor we are processing
*/
void _AbsoluteMe(XTensor * a)
{
_Absolute(a, a);
}
/*
set every entry to its absolute value (return a XTensor structure)
make a new tensor to keep the result and return it
>> a - input tensor we are processing
<< return - the absolute value of input tensor
*/
XTensor Absolute(const XTensor & a)
{
XTensor b(&a);
b.SetTMP();
/* call _Absolute function */
_Absolute(&a, &b);
/* tensor connections */
XLink::MakeLink(&a, NULL, &b, MATH_ABSOLUTE);
return b;
} }
} // namespace nts(NiuTrans.Tensor) } // namespace nts(NiuTrans.Tensor)
\ No newline at end of file
...@@ -29,37 +29,41 @@ namespace nts { // namespace nts(NiuTrans.Tensor) ...@@ -29,37 +29,41 @@ namespace nts { // namespace nts(NiuTrans.Tensor)
#ifdef USE_CUDA #ifdef USE_CUDA
/* /*
set each entry to its absolute value (CUDA Kernel) set each entry to its absolute value (CUDA Kernel)
>> d - pointer to the data array >> a - pointer to input data array
>> b - pointer to output data array
>> size - size of the data array >> size - size of the data array
*/ */
__global__ __global__
void KernelAbsolute(DTYPE * d, int size) void KernelAbsolute(DTYPE * a, DTYPE * b, int size)
{ {
int i = blockDim.x * blockIdx.x + threadIdx.x; int i = blockDim.x * blockIdx.x + threadIdx.x;
if (i < size) if (i < size)
d[i] = fabs(d[i]); b[i] = fabs(a[i]);
} }
/* /*
set each entry to its absolute value (CUDA Kernel) set each entry to its absolute value (CUDA Kernel)
This is for float16 computation This is for float16 computation
>> d - pointer to the data array >> a - pointer to input data array
>> b - pointer to output data array
>> size - size of the data array >> size - size of the data array
*/ */
__global__ __global__
void KernelAbsolute(__half * d, int size) void KernelAbsolute(__half * a, __half * b, int size)
{ {
return; return;
} }
/* /*
set each entry to its with float16 data type value set each entry to its absolute value
>> a - the tensor >> a - input tensor
>> b - output tensor
*/ */
extern "C" extern "C"
void _CudaAbsolute(XTensor * a) void _CudaAbsolute(const XTensor * a, XTensor * b)
{ {
CheckNTErrors((XTensor::IsIdentical(a, b)), "Input tensors should have the same type!");
CheckNTErrors((a->isSparse == false), "TODO!"); CheckNTErrors((a->isSparse == false), "TODO!");
int gridSize[3]; int gridSize[3];
...@@ -74,10 +78,10 @@ void _CudaAbsolute(XTensor * a) ...@@ -74,10 +78,10 @@ void _CudaAbsolute(XTensor * a)
ProtectCudaDev(a->devID, devIDBackup); ProtectCudaDev(a->devID, devIDBackup);
if (a->dataType == DEFAULT_DTYPE) { if (a->dataType == DEFAULT_DTYPE) {
KernelAbsolute << <blocks, threads >> >((DTYPE*)a->data, a->unitNum); KernelAbsolute << <blocks, threads >> >((DTYPE*)a->data, (DTYPE*)b->data, a->unitNum);
} }
else if (a->dataType == X_FLOAT16) { else if (a->dataType == X_FLOAT16) {
KernelAbsolute << <blocks, threads >> >((__half*)a->data, a->unitNum); KernelAbsolute << <blocks, threads >> >((__half*)a->data, (__half*)b->data, a->unitNum);
} }
else { else {
ShowNTErrors("TODO!"); ShowNTErrors("TODO!");
......
...@@ -27,15 +27,15 @@ namespace nts { // namespace nts(NiuTrans.Tensor) ...@@ -27,15 +27,15 @@ namespace nts { // namespace nts(NiuTrans.Tensor)
/* set each entry to its absolute value (CUDA Kernel) */ /* set each entry to its absolute value (CUDA Kernel) */
__global__ __global__
void KernelAbsolute(DTYPE * d, int size); void KernelAbsolute(DTYPE * a, DTYPE * b, int size);
/* set each entry to its absolute value (CUDA Kernel) with float16 data type*/ /* set each entry to its absolute value (CUDA Kernel) with float16 data type*/
__global__ __global__
void KernelAbsolute(__half * d, int size); void KernelAbsolute(__half * a, __half * b, int size);
/* set each entry to its absolute value */ /* set each entry to its absolute value */
extern "C" extern "C"
void _CudaAbsolute(XTensor * a); void _CudaAbsolute(const XTensor * a, XTensor * b);
#endif // USE_CUDA #endif // USE_CUDA
......
...@@ -27,8 +27,19 @@ ...@@ -27,8 +27,19 @@
namespace nts { // namespace nts(NiuTrans.Tensor) namespace nts { // namespace nts(NiuTrans.Tensor)
/* set every entry to its absolute value */ /* set every entry to its absolute value */
extern "C" void _Absolute(const XTensor * a, XTensor * b);
void _Absolute(XTensor * a);
/*
set every entry to its absolute value (do it on site)
keep the result in the input tensor a and return nothing
*/
void _AbsoluteMe(XTensor * a);
/*
set every entry to its absolute value (return a XTensor structure)
make a new tensor to keep the result and return it
*/
XTensor Absolute(const XTensor & a);
} // namespace nts(NiuTrans.Tensor) } // namespace nts(NiuTrans.Tensor)
......
...@@ -41,8 +41,8 @@ void _MatrixMULBatchedCPU(const XList * a, MATRIX_TRANS_TYPE transposedA, ...@@ -41,8 +41,8 @@ void _MatrixMULBatchedCPU(const XList * a, MATRIX_TRANS_TYPE transposedA,
const XList * b, MATRIX_TRANS_TYPE transposedB, const XList * b, MATRIX_TRANS_TYPE transposedB,
XList * c, DTYPE alpha, DTYPE beta) XList * c, DTYPE alpha, DTYPE beta)
{ {
CheckNTErrors((a && b && c), "Empty input lists!"); CheckNTErrors(a && b && c, "Empty input lists!");
CheckNTErrors((a->count == b->count && a->count == c->count), "Input lists must be of the same size!"); CheckNTErrors(a->count == b->count && a->count == c->count, "Input lists must be of the same size!");
if (a->count == 0) if (a->count == 0)
return; return;
......
...@@ -28,8 +28,8 @@ namespace nts { // namespace nts(NiuTrans.Tensor) ...@@ -28,8 +28,8 @@ namespace nts { // namespace nts(NiuTrans.Tensor)
/* matrix multiplication in batch mode (CPU code) */ /* matrix multiplication in batch mode (CPU code) */
extern "C" extern "C"
void _MatrixMULBatchedCPU(const XList * a, MATRIX_TRANS_TYPE transposedA, const XList * b, MATRIX_TRANS_TYPE transposedB, XList * c, void _MatrixMULBatchedCPU(const XList * a, MATRIX_TRANS_TYPE transposedA, const XList * b, MATRIX_TRANS_TYPE transposedB,
DTYPE alpha = (DTYPE)1.0, DTYPE beta = 0); XList * c, DTYPE alpha = (DTYPE)1.0, DTYPE beta = 0);
} // namespace nts(NiuTrans.Tensor) } // namespace nts(NiuTrans.Tensor)
......
...@@ -123,8 +123,7 @@ where trans() return the transposed matrix if the flag is fired ...@@ -123,8 +123,7 @@ where trans() return the transposed matrix if the flag is fired
*/ */
void _CudaMatrixMul2D(const XTensor * a, MATRIX_TRANS_TYPE transposedA, void _CudaMatrixMul2D(const XTensor * a, MATRIX_TRANS_TYPE transposedA,
const XTensor * b, MATRIX_TRANS_TYPE transposedB, const XTensor * b, MATRIX_TRANS_TYPE transposedB,
XTensor * c, XTensor * c, DTYPE alpha, DTYPE beta, XStream * stream)
DTYPE alpha, DTYPE beta, XStream * stream)
{ {
int an = transposedA == X_TRANS ? a->dimSize[1] : a->dimSize[0]; int an = transposedA == X_TRANS ? a->dimSize[1] : a->dimSize[0];
int am = transposedA == X_TRANS ? a->dimSize[0] : a->dimSize[1]; int am = transposedA == X_TRANS ? a->dimSize[0] : a->dimSize[1];
......
...@@ -32,8 +32,8 @@ c = trans(a) * trans(b) * alpha + c * beta ...@@ -32,8 +32,8 @@ c = trans(a) * trans(b) * alpha + c * beta
where trans() return the transposed matrix if the flag is fired. where trans() return the transposed matrix if the flag is fired.
*/ */
extern "C" extern "C"
void _MatrixMul2DParallel(const XTensor * a, MATRIX_TRANS_TYPE transposedA, const XTensor * b, MATRIX_TRANS_TYPE transposedB, XTensor * c, void _MatrixMul2DParallel(const XTensor * a, MATRIX_TRANS_TYPE transposedA, const XTensor * b, MATRIX_TRANS_TYPE transposedB,
DTYPE alpha = (DTYPE)1.0, DTYPE beta = 0, XPRunner * parallelRunner = NULL); XTensor * c, DTYPE alpha = (DTYPE)1.0, DTYPE beta = 0, XPRunner * parallelRunner = NULL);
} // namespace nts(NiuTrans.Tensor) } // namespace nts(NiuTrans.Tensor)
......
...@@ -47,8 +47,7 @@ where trans() returns the transposed matrix if the flag is fired ...@@ -47,8 +47,7 @@ where trans() returns the transposed matrix if the flag is fired
*/ */
void _MatrixMulBatched(const XTensor * a, MATRIX_TRANS_TYPE transposedA, void _MatrixMulBatched(const XTensor * a, MATRIX_TRANS_TYPE transposedA,
const XTensor * b, MATRIX_TRANS_TYPE transposedB, const XTensor * b, MATRIX_TRANS_TYPE transposedB,
XTensor * c, DTYPE alpha, DTYPE beta, XTensor * c, DTYPE alpha, DTYPE beta, XPRunner * parallelRunner)
XPRunner * parallelRunner)
{ {
CheckNTErrors((a && b && c), "Empty input tensors!"); CheckNTErrors((a && b && c), "Empty input tensors!");
CheckNTErrors((a->dataType == b->dataType && a->dataType == c->dataType), CheckNTErrors((a->dataType == b->dataType && a->dataType == c->dataType),
...@@ -169,14 +168,12 @@ where trans() returns the transposed matrix if the flag is fired. ...@@ -169,14 +168,12 @@ where trans() returns the transposed matrix if the flag is fired.
>> b - tensor b >> b - tensor b
>> transposedB - indicates whether teh matrices in b are transposed >> transposedB - indicates whether teh matrices in b are transposed
>> alpha - a coefficient >> alpha - a coefficient
>> beta - another coefficient
>> parallelRunner - parallel processing module >> parallelRunner - parallel processing module
<< return - the result of matrix multiplication of the two tensors << return - the result of matrix multiplication of the two tensors
*/ */
XTensor MatrixMulBatched(const XTensor &a, MATRIX_TRANS_TYPE transposedA, const XTensor &b, MATRIX_TRANS_TYPE transposedB, XTensor MatrixMulBatched(const XTensor &a, MATRIX_TRANS_TYPE transposedA, const XTensor &b, MATRIX_TRANS_TYPE transposedB,
DTYPE alpha, XPRunner * parallelRunner) DTYPE alpha, XPRunner * parallelRunner)
{ {
CheckNTErrors(&a != &NULLTensor && &b != &NULLTensor, "Empty input tensors!");
CheckNTErrors(a.dataType == b.dataType, "Input tensors should have the same data type!"); CheckNTErrors(a.dataType == b.dataType, "Input tensors should have the same data type!");
CheckNTErrors(a.order >= 2 && b.order >= 2, "Input tensors must have a order >= 2!"); CheckNTErrors(a.order >= 2 && b.order >= 2, "Input tensors must have a order >= 2!");
CheckNTErrors(a.order == b.order, "Input tensor and output tensor must have same order!"); CheckNTErrors(a.order == b.order, "Input tensor and output tensor must have same order!");
...@@ -191,13 +188,13 @@ XTensor MatrixMulBatched(const XTensor &a, MATRIX_TRANS_TYPE transposedA, const ...@@ -191,13 +188,13 @@ XTensor MatrixMulBatched(const XTensor &a, MATRIX_TRANS_TYPE transposedA, const
int order = a.order; int order = a.order;
int sub = 0; int sub = 0;
int * dimSize = new int[order]; int * dimSize = new int[order];
for (int i = 2; i < a.order; i++) for (int i = 0; i < a.order - 2; i++)
dimSize[sub++] = a.dimSizeRDI[i]; dimSize[sub++] = a.dimSize[i];
dimSize[sub++] = an; dimSize[sub++] = an;
dimSize[sub++] = bm; dimSize[sub++] = bm;
XTensor c = NewTensor(order, dimSize, a.dataType, a.denseRatio, a.devID, a.mem); float dr = (!a.isSparse || !b.isSparse) ? 1.0F : MAX(a.denseRatio, b.denseRatio);
c.SetZeroAll(); XTensor c(order, dimSize, a.dataType, dr, a.devID, a.mem);
c.SetTMP(); c.SetTMP();
/*call _MatrixMulBatched function */ /*call _MatrixMulBatched function */
......
...@@ -20,6 +20,7 @@ ...@@ -20,6 +20,7 @@
*/ */
#include "../../XTensor.h" #include "../../XTensor.h"
#include "../../XName.h"
#include "Negate.h" #include "Negate.h"
#include "Negate.cuh" #include "Negate.cuh"
...@@ -27,21 +28,55 @@ namespace nts { // namespace nts(NiuTrans.Tensor) ...@@ -27,21 +28,55 @@ namespace nts { // namespace nts(NiuTrans.Tensor)
/* /*
set every entry to its minus value set every entry to its minus value
>> a - the tensor we are processing >> a - input tensor we are processing
>> b - output tensor we are processing
*/ */
void _Negate(XTensor * a) void _Negate(const XTensor * a, XTensor * b)
{ {
#ifdef USE_CUDA #ifdef USE_CUDA
/* run it on GPUs */ /* run it on GPUs */
if (a->devID >= 0) { if (a->devID >= 0) {
_CudaNegate(a); _CudaNegate(a, b);
return; return;
} }
#endif #endif
CheckNTErrors((XTensor::IsIdentical(a, b)), "Input tensors should have the same type!");
CheckNTErrors((a->dataType == DEFAULT_DTYPE), "TODO!"); CheckNTErrors((a->dataType == DEFAULT_DTYPE), "TODO!");
DTYPE * d = (DTYPE*)a->data; DTYPE * d = (DTYPE*)a->data;
DTYPE * db = (DTYPE*)b->data;
for (int i = 0; i < a->unitNum; i++) for (int i = 0; i < a->unitNum; i++)
d[i] = -d[i]; db[i] = -d[i];
} }
/*
set every entry to its minus value (do it on site)
keep the result in the input tensor a and return nothing
>> a - the tensor we are processing
*/
void _NegateMe(XTensor * a)
{
_Negate(a, a);
}
/*
set every entry to its minus value (return a XTensor structure)
make a new tensor to keep the result and return it
>> a - input tensor we are processing
<< return - the minus value of input tensor
*/
XTensor Negate(const XTensor & a)
{
XTensor b(&a);
b.SetTMP();
/* call _Negate function */
_Negate(&a, &b);
/* tensor connections */
XLink::MakeLink(&a, NULL, &b, MATH_NEGATE);
return b;
}
} // namespace nts(NiuTrans.Tensor) } // namespace nts(NiuTrans.Tensor)
\ No newline at end of file
...@@ -29,45 +29,49 @@ namespace nts { // namespace nts(NiuTrans.Tensor) ...@@ -29,45 +29,49 @@ namespace nts { // namespace nts(NiuTrans.Tensor)
#ifdef USE_CUDA #ifdef USE_CUDA
/* /*
set each entry to its negtive value (CUDA Kernel) set each entry to its negtive value (CUDA Kernel)
>> d - pointer to the data array >> a - pointer to the input data array
>> b - pointer to the output data array
>> size - size of the data array >> size - size of the data array
*/ */
__global__ __global__
void KernelNegate(DTYPE * d, int size) void KernelNegate(DTYPE * a, DTYPE * b, int size)
{ {
int i = blockDim.x * blockIdx.x + threadIdx.x; int i = blockDim.x * blockIdx.x + threadIdx.x;
if (i < size) if (i < size)
d[i] = -d[i]; b[i] = -a[i];
} }
/* /*
set each entry to its negtive value (CUDA Kernel) set each entry to its negtive value (CUDA Kernel)
This is for float16 computation This is for float16 computation
>> d - pointer to the data array >> a - pointer to the input data array
>> b - pointer to the output data array
>> size - size of the data array >> size - size of the data array
*/ */
__global__ __global__
void KernelNegate(__half * d, int size) void KernelNegate(__half * a, __half * b, int size)
{ {
int i = blockDim.x * blockIdx.x + threadIdx.x; int i = blockDim.x * blockIdx.x + threadIdx.x;
#if __CUDA_ARCH__ >= 530 || !defined(__CUDA_ARCH__) #if __CUDA_ARCH__ >= 530 || !defined(__CUDA_ARCH__)
if (i < size) if (i < size)
d[i] = __hsub(__float2half(0), d[i]); b[i] = __hsub(__float2half(0), a[i]);
#else #else
if (i < size) if (i < size)
d[i] = __float2half(-__half2float(d[i])); b[i] = __float2half(-__half2float(a[i]));
#endif #endif
} }
/* /*
set each entry to its negtive value set each entry to its negtive value
>> a - the tensor >> a - input tensor
>> b - output tensor
*/ */
extern "C" extern "C"
void _CudaNegate(XTensor * a) void _CudaNegate(const XTensor * a, XTensor * b)
{ {
CheckNTErrors((XTensor::IsIdentical(a, b)), "Input tensors should have the same type!");
CheckNTErrors((a->isSparse == false), "TODO!"); CheckNTErrors((a->isSparse == false), "TODO!");
int gridSize[3]; int gridSize[3];
...@@ -82,10 +86,10 @@ void _CudaNegate(XTensor * a) ...@@ -82,10 +86,10 @@ void _CudaNegate(XTensor * a)
ProtectCudaDev(a->devID, devIDBackup); ProtectCudaDev(a->devID, devIDBackup);
if (a->dataType == DEFAULT_DTYPE) { if (a->dataType == DEFAULT_DTYPE) {
KernelNegate << <blocks, threads >> >((DTYPE*)a->data, a->unitNum); KernelNegate << <blocks, threads >> >((DTYPE*)a->data, (DTYPE*)b->data, a->unitNum);
} }
else if (a->dataType == X_FLOAT16) { else if (a->dataType == X_FLOAT16) {
KernelNegate << <blocks, threads >> >((__half*)a->data, a->unitNum); KernelNegate << <blocks, threads >> >((__half*)a->data, (__half*)b->data, a->unitNum);
} }
else { else {
ShowNTErrors("TODO!"); ShowNTErrors("TODO!");
......
...@@ -30,15 +30,15 @@ namespace nts { // namespace nts(NiuTrans.Tensor) ...@@ -30,15 +30,15 @@ namespace nts { // namespace nts(NiuTrans.Tensor)
/* set each entry to its negtive value (CUDA Kernel) */ /* set each entry to its negtive value (CUDA Kernel) */
__global__ __global__
void KernelNegate(DTYPE * d, int size); void KernelNegate(DTYPE * a, DTYPE * b, int size);
/* set each entry to its negtive value (CUDA Kernel) with float16 data type*/ /* set each entry to its negtive value (CUDA Kernel) with float16 data type*/
__global__ __global__
void KernelNegate(__half * d, int size); void KernelNegate(__half * a, __half * b, int size);
/* set each entry to its negtive value */ /* set each entry to its negtive value */
extern "C" extern "C"
void _CudaNegate(XTensor * a); void _CudaNegate(const XTensor * a, XTensor * b);
#endif // USE_CUDA #endif // USE_CUDA
......
...@@ -27,8 +27,19 @@ ...@@ -27,8 +27,19 @@
namespace nts { // namespace nts(NiuTrans.Tensor) namespace nts { // namespace nts(NiuTrans.Tensor)
/* set every entry to its minus value */ /* set every entry to its minus value */
extern "C" void _Negate(const XTensor * a, XTensor * b);
void _Negate(XTensor * a);
/*
set every entry to its minus value (do it on site)
keep the result in the input tensor a and return nothing
*/
void _NegateMe(XTensor * a);
/*
set every entry to its minus value (return a XTensor structure)
make a new tensor to keep the result and return it
*/
XTensor Negate(const XTensor & a);
} // namespace nts(NiuTrans.Tensor) } // namespace nts(NiuTrans.Tensor)
......
...@@ -20,6 +20,7 @@ ...@@ -20,6 +20,7 @@
*/ */
#include "../../XTensor.h" #include "../../XTensor.h"
#include "../../XName.h"
#include "Sign.h" #include "Sign.h"
#include "Sign.cuh" #include "Sign.cuh"
...@@ -27,27 +28,60 @@ namespace nts { // namespace nts(NiuTrans.Tensor) ...@@ -27,27 +28,60 @@ namespace nts { // namespace nts(NiuTrans.Tensor)
/* /*
set every entry to its sign value set every entry to its sign value
>> a - the tensor we are processing >> a - input tensor we are processing
>> b - output tensor we are processing
*/ */
void _Sign(XTensor * a) void _Sign(const XTensor * a, XTensor * b)
{ {
#ifdef USE_CUDA #ifdef USE_CUDA
/* run it on GPUs */ /* run it on GPUs */
if (a->devID >= 0) { if (a->devID >= 0) {
_CudaSign(a); _CudaSign(a, b);
return; return;
} }
#endif #endif
CheckNTErrors((XTensor::IsIdentical(a, b)), "Input tensors should have the same type!");
CheckNTErrors((a->dataType == DEFAULT_DTYPE), "TODO!"); CheckNTErrors((a->dataType == DEFAULT_DTYPE), "TODO!");
DTYPE * d = (DTYPE*)a->data; DTYPE * d = (DTYPE*)a->data;
DTYPE * db = (DTYPE*)b->data;
for (int i = 0; i < a->unitNum; i++) { for (int i = 0; i < a->unitNum; i++) {
if (d[i] > 0) if (d[i] > 0)
d[i] = 1.0F; db[i] = 1.0F;
else if (d[i] == 0) else if (d[i] == 0)
d[i] = 0.0F; db[i] = 0.0F;
else else
d[i] = -1.0F; db[i] = -1.0F;
} }
} }
/*
set every entry to its sign value (do it on site)
keep the result in the input tensor a and return nothing
>> a - the tensor we are processing
*/
void _SignMe(XTensor * a)
{
_Sign(a, a);
}
/*
set every entry to its sign value (return a XTensor structure)
make a new tensor to keep the result and return it
>> a - input tensor we are processing
<< return - the sign value of the input tensor
*/
XTensor Sign(const XTensor & a)
{
XTensor b(&a);
b.SetTMP();
/* call _ScaleAndShift function */
_Sign(&a, &b);
/* tensor connections */
XLink::MakeLink(&a, NULL, &b, MATH_SIGN);
return b;
}
} // namespace nts(NiuTrans.Tensor) } // namespace nts(NiuTrans.Tensor)
\ No newline at end of file
...@@ -29,43 +29,47 @@ namespace nts { // namespace nts(NiuTrans.Tensor) ...@@ -29,43 +29,47 @@ namespace nts { // namespace nts(NiuTrans.Tensor)
#ifdef USE_CUDA #ifdef USE_CUDA
/* /*
set each entry to its sign value (CUDA Kernel) set each entry to its sign value (CUDA Kernel)
>> d - pointer to the data array >> a - pointer to input data array
>> b - pointer to output data array
>> size - size of the data array >> size - size of the data array
*/ */
__global__ __global__
void KernelSign(DTYPE * d, int size) void KernelSign(DTYPE * a, DTYPE * b, int size)
{ {
int i = blockDim.x * blockIdx.x + threadIdx.x; int i = blockDim.x * blockIdx.x + threadIdx.x;
if (i < size) { if (i < size) {
if (d[i] > 0) if (a[i] > 0)
d[i] = 1.0F; b[i] = 1.0F;
else if (d[i] == 0) else if (a[i] == 0)
d[i] = 0.0F; b[i] = 0.0F;
else else
d[i] = -1.0F; b[i] = -1.0F;
} }
} }
/* /*
set each entry to its sign value (CUDA Kernel) set each entry to its sign value with float16 data type value (CUDA Kernel)
This is for float16 computation This is for float16 computation
>> d - pointer to the data array >> a - pointer to input data array
>> b - pointer to output data array
>> size - size of the data array >> size - size of the data array
*/ */
__global__ __global__
void KernelSign(__half * d, int size) void KernelSign(__half * a, __half * b, int size)
{ {
return; return;
} }
/* /*
set each entry to its with float16 data type value set each entry to its sign value
>> a - the tensor >> a - input tensor we are processing
>> b - output tensor we are processing
*/ */
extern "C" extern "C"
void _CudaSign(XTensor * a) void _CudaSign(const XTensor * a, XTensor * b)
{ {
CheckNTErrors((XTensor::IsIdentical(a, b)), "Input tensors should have the same type!");
CheckNTErrors((a->isSparse == false), "TODO!"); CheckNTErrors((a->isSparse == false), "TODO!");
int gridSize[3]; int gridSize[3];
...@@ -80,10 +84,10 @@ void _CudaSign(XTensor * a) ...@@ -80,10 +84,10 @@ void _CudaSign(XTensor * a)
ProtectCudaDev(a->devID, devIDBackup); ProtectCudaDev(a->devID, devIDBackup);
if (a->dataType == DEFAULT_DTYPE) { if (a->dataType == DEFAULT_DTYPE) {
KernelSign << <blocks, threads >> >((DTYPE*)a->data, a->unitNum); KernelSign << <blocks, threads >> >((DTYPE*)a->data, (DTYPE*)b->data, a->unitNum);
} }
else if (a->dataType == X_FLOAT16) { else if (a->dataType == X_FLOAT16) {
KernelSign << <blocks, threads >> >((__half*)a->data, a->unitNum); KernelSign << <blocks, threads >> >((__half*)a->data, (__half*)b->data, a->unitNum);
} }
else { else {
ShowNTErrors("TODO!"); ShowNTErrors("TODO!");
......
...@@ -30,15 +30,15 @@ namespace nts { // namespace nts(NiuTrans.Tensor) ...@@ -30,15 +30,15 @@ namespace nts { // namespace nts(NiuTrans.Tensor)
/* set each entry to its sign value (CUDA Kernel) */ /* set each entry to its sign value (CUDA Kernel) */
__global__ __global__
void KernelSign(DTYPE * d, int size); void KernelSign(DTYPE * a, DTYPE * b, int size);
/* set each entry to its sign value (CUDA Kernel) with float16 data type*/ /* set each entry to its sign value (CUDA Kernel) with float16 data type*/
__global__ __global__
void KernelSign(__half * d, int size); void KernelSign(__half * a, __half * b, int size);
/* set each entry to its sign value */ /* set each entry to its sign value */
extern "C" extern "C"
void _CudaSign(XTensor * a); void _CudaSign(const XTensor * a, XTensor * b);
#endif // USE_CUDA #endif // USE_CUDA
......
...@@ -27,8 +27,19 @@ ...@@ -27,8 +27,19 @@
namespace nts { // namespace nts(NiuTrans.Tensor) namespace nts { // namespace nts(NiuTrans.Tensor)
/* set every entry to its sign value */ /* set every entry to its sign value */
extern "C" void _Sign(const XTensor * a, XTensor * b);
void _Sign(XTensor * a);
/*
set every entry to its sign value (do it on site)
keep the result in the input tensor a and return nothing
*/
void _SignMe(XTensor * a);
/*
set every entry to its sign value (return a XTensor structure)
make a new tensor to keep the result and return it
*/
XTensor Sign(const XTensor & a);
} // namespace nts(NiuTrans.Tensor) } // namespace nts(NiuTrans.Tensor)
......
...@@ -28,7 +28,8 @@ namespace nts { // namespace nts(NiuTrans.Tensor) ...@@ -28,7 +28,8 @@ namespace nts { // namespace nts(NiuTrans.Tensor)
/* matrix multiplication (BLAS) */ /* matrix multiplication (BLAS) */
extern "C" extern "C"
void _MatrixMULCPU(const XTensor * a, MATRIX_TRANS_TYPE transposedA, const XTensor * b, MATRIX_TRANS_TYPE transposedB, XTensor * c, DTYPE alpha = (DTYPE)1.0, DTYPE beta = 0); void _MatrixMULCPU(const XTensor * a, MATRIX_TRANS_TYPE transposedA, const XTensor * b, MATRIX_TRANS_TYPE transposedB,
XTensor * c, DTYPE alpha = (DTYPE)1.0, DTYPE beta = 0);
#ifdef USE_CUDA #ifdef USE_CUDA
...@@ -46,7 +47,8 @@ void _CudaBLASMatrixMULBatched(cublasHandle_t * handle, ...@@ -46,7 +47,8 @@ void _CudaBLASMatrixMULBatched(cublasHandle_t * handle,
const void ** a, MATRIX_TRANS_TYPE transposedA, TENSOR_DATA_TYPE dataTypeA, const void ** a, MATRIX_TRANS_TYPE transposedA, TENSOR_DATA_TYPE dataTypeA,
const void ** b, MATRIX_TRANS_TYPE transposedB, TENSOR_DATA_TYPE dataTypeB, const void ** b, MATRIX_TRANS_TYPE transposedB, TENSOR_DATA_TYPE dataTypeB,
void ** c, TENSOR_DATA_TYPE dataTypeC, void ** c, TENSOR_DATA_TYPE dataTypeC,
int count, int na, int ma, int nb, int mb, int nc, int mc, DTYPE alpha = (DTYPE)1.0, DTYPE beta = 1.0); int count, int na, int ma, int nb, int mb, int nc, int mc,
DTYPE alpha = (DTYPE)1.0, DTYPE beta = 1.0);
/* matrix multiplication in batch and strided mode via cuda version BLAS */ /* matrix multiplication in batch and strided mode via cuda version BLAS */
extern "C" extern "C"
...@@ -54,11 +56,13 @@ void _CudaBLASMatrixMULBatchedStrided(cublasHandle_t * handle, ...@@ -54,11 +56,13 @@ void _CudaBLASMatrixMULBatchedStrided(cublasHandle_t * handle,
const void * a, MATRIX_TRANS_TYPE transposedA, TENSOR_DATA_TYPE dataTypeA, long long int strideA, const void * a, MATRIX_TRANS_TYPE transposedA, TENSOR_DATA_TYPE dataTypeA, long long int strideA,
const void * b, MATRIX_TRANS_TYPE transposedB, TENSOR_DATA_TYPE dataTypeB, long long int strideB, const void * b, MATRIX_TRANS_TYPE transposedB, TENSOR_DATA_TYPE dataTypeB, long long int strideB,
void * c, TENSOR_DATA_TYPE dataTypeC, long long int strideC, void * c, TENSOR_DATA_TYPE dataTypeC, long long int strideC,
int count, int na, int ma, int nb, int mb, int nc, int mc, DTYPE alpha = (DTYPE)1.0, DTYPE beta = 1.0); int count, int na, int ma, int nb, int mb, int nc, int mc,
DTYPE alpha = (DTYPE)1.0, DTYPE beta = 1.0);
/* matrix multiplication in batch mode via cuda version BLAS */ /* matrix multiplication in batch mode via cuda version BLAS */
extern "C" extern "C"
void _CudaBLASMatrixMULList(cublasHandle_t * handle, const XList * a, MATRIX_TRANS_TYPE transposedA, const XList * b, MATRIX_TRANS_TYPE transposedB, XList * c, void _CudaBLASMatrixMULList(cublasHandle_t * handle, const XList * a, MATRIX_TRANS_TYPE transposedA,
const XList * b, MATRIX_TRANS_TYPE transposedB, XList * c,
int count, DTYPE alpha = (DTYPE)1.0, DTYPE beta = 1.0); int count, DTYPE alpha = (DTYPE)1.0, DTYPE beta = 1.0);
#endif #endif
......
...@@ -96,7 +96,6 @@ XTensor SelectRange(const XTensor &a, int dim, int low, int high) ...@@ -96,7 +96,6 @@ XTensor SelectRange(const XTensor &a, int dim, int low, int high)
int order = a.order; int order = a.order;
int * dimSize = new int[order]; int * dimSize = new int[order];
CheckNTErrors(&a != NULL, "Empty input tensors!");
CheckNTErrors(dim >= 0 && dim < a.order, "The input dimension is out of bounds!"); CheckNTErrors(dim >= 0 && dim < a.order, "The input dimension is out of bounds!");
CheckNTErrors(low < high, "Illegal range specified!"); CheckNTErrors(low < high, "Illegal range specified!");
...@@ -110,8 +109,8 @@ XTensor SelectRange(const XTensor &a, int dim, int low, int high) ...@@ -110,8 +109,8 @@ XTensor SelectRange(const XTensor &a, int dim, int low, int high)
dimSize[i] = a.dimSize[i]; dimSize[i] = a.dimSize[i];
} }
XTensor c = NewTensor(order, dimSize, a.dataType, a.denseRatio, a.devID, a.mem); float dr = (!a.isSparse) ? 1.0F : a.denseRatio;
c.SetZeroAll(); XTensor c(order, dimSize, a.dataType, dr, a.devID, a.mem);
c.SetTMP(); c.SetTMP();
/* call _SelectRange function */ /* call _SelectRange function */
......
...@@ -20,6 +20,7 @@ ...@@ -20,6 +20,7 @@
*/ */
#include "../../XTensor.h" #include "../../XTensor.h"
#include "../../XName.h"
#include "Log.h" #include "Log.h"
#include "Log.cuh" #include "Log.cuh"
#include <math.h> #include <math.h>
...@@ -27,22 +28,55 @@ ...@@ -27,22 +28,55 @@
namespace nts { // namespace nts(NiuTrans.Tensor) namespace nts { // namespace nts(NiuTrans.Tensor)
/* /*
set every entry to its log value set every entry to its log value (do it on site)
>> a - the tensor we are processing >> a - input tensor we are processing
>> b - output tensor we are processing
*/ */
void _Log(XTensor * a) void _Log(const XTensor * a, XTensor * b)
{ {
#ifdef USE_CUDA #ifdef USE_CUDA
/* run it on GPUs */ /* run it on GPUs */
if (a->devID >= 0) { if (a->devID >= 0) {
_CudaLog(a); _CudaLog(a, b);
return; return;
} }
#endif #endif
CheckNTErrors((XTensor::IsIdentical(a, b)), "Input tensors should have the same type!");
CheckNTErrors((a->dataType == DEFAULT_DTYPE), "TODO!"); CheckNTErrors((a->dataType == DEFAULT_DTYPE), "TODO!");
DTYPE * d = (DTYPE*)a->data; DTYPE * d = (DTYPE*)a->data;
DTYPE * db = (DTYPE*)b->data;
for (int i = 0; i < a->unitNum; i++) for (int i = 0; i < a->unitNum; i++)
d[i] = (DTYPE)log(d[i]); db[i] = (DTYPE)log(d[i]);
}
/*
set every entry to its log value
keep the result in the input tensor a and return nothing
>> a - the tensor we are processing
*/
void _LogMe(XTensor * a)
{
_Log(a, a);
}
/*
set every entry to its log value (return a XTensor structure)
make a new tensor to keep the result and return it
>> a - input tensor we are processing
<< return - the log value of the input tensor
*/
XTensor Log(const XTensor & a)
{
XTensor b(&a);
b.SetTMP();
/* call _Log function */
_Log(&a, &b);
/* tensor connections */
XLink::MakeLink(&a, NULL, &b, MATH_LOG);
return b;
} }
} // namespace nts(NiuTrans.Tensor) } // namespace nts(NiuTrans.Tensor)
\ No newline at end of file
...@@ -29,37 +29,41 @@ namespace nts { // namespace nts(NiuTrans.Tensor) ...@@ -29,37 +29,41 @@ namespace nts { // namespace nts(NiuTrans.Tensor)
#ifdef USE_CUDA #ifdef USE_CUDA
/* /*
set each entry to its log value (CUDA Kernel) set each entry to its log value (CUDA Kernel)
>> d - pointer to the data array >> a - pointer to input data array
>> b - pointer to output data array
>> size - size of the data array >> size - size of the data array
*/ */
__global__ __global__
void KernelLog(DTYPE * d, int size) void KernelLog(DTYPE * a, DTYPE * b, int size)
{ {
int i = blockDim.x * blockIdx.x + threadIdx.x; int i = blockDim.x * blockIdx.x + threadIdx.x;
if (i < size) if (i < size)
d[i] = log(d[i]); b[i] = log(a[i]);
} }
/* /*
set each entry to its log value (CUDA Kernel) set each entry to its log value (CUDA Kernel)
This is for float16 computation This is for float16 computation
>> d - pointer to the data array >> a - pointer to input data array
>> b - pointer to output data array
>> size - size of the data array >> size - size of the data array
*/ */
__global__ __global__
void KernelLog(__half * d, int size) void KernelLog(__half * a, __half * b, int size)
{ {
return; return;
} }
/* /*
set each entry to its log value set each entry to its log value
>> a - the tensor >> a - input tensor
>> b - output tensor
*/ */
extern "C" extern "C"
void _CudaLog(XTensor * a) void _CudaLog(const XTensor * a, XTensor * b)
{ {
CheckNTErrors((XTensor::IsIdentical(a, b)), "Input tensors should have the same type!");
CheckNTErrors((a->isSparse == false), "TODO!"); CheckNTErrors((a->isSparse == false), "TODO!");
int gridSize[3]; int gridSize[3];
...@@ -74,10 +78,10 @@ void _CudaLog(XTensor * a) ...@@ -74,10 +78,10 @@ void _CudaLog(XTensor * a)
ProtectCudaDev(a->devID, devIDBackup); ProtectCudaDev(a->devID, devIDBackup);
if (a->dataType == DEFAULT_DTYPE) { if (a->dataType == DEFAULT_DTYPE) {
KernelLog << <blocks, threads >> >((DTYPE*)a->data, a->unitNum); KernelLog << <blocks, threads >> >((DTYPE*)a->data, (DTYPE*)b->data, a->unitNum);
} }
else if (a->dataType == X_FLOAT16) { else if (a->dataType == X_FLOAT16) {
KernelLog << <blocks, threads >> >((__half*)a->data, a->unitNum); KernelLog << <blocks, threads >> >((__half*)a->data, (__half*)b->data, a->unitNum);
} }
else { else {
ShowNTErrors("TODO!"); ShowNTErrors("TODO!");
......
...@@ -30,15 +30,15 @@ namespace nts { // namespace nts(NiuTrans.Tensor) ...@@ -30,15 +30,15 @@ namespace nts { // namespace nts(NiuTrans.Tensor)
/* set each entry to its log value (CUDA Kernel) */ /* set each entry to its log value (CUDA Kernel) */
__global__ __global__
void KernelLog(DTYPE * d, int size); void KernelLog(DTYPE * a, DTYPE * b, int size);
/* set each entry to its log value (CUDA Kernel) with float16 data type*/ /* set each entry to its log value (CUDA Kernel) with float16 data type*/
__global__ __global__
void KernelLog(__half * d, int size); void KernelLog(__half * a, __half * b, int size);
/* set each entry to its log value */ /* set each entry to its log value */
extern "C" extern "C"
void _CudaLog(XTensor * a); void _CudaLog(const XTensor * a, XTensor * b);
#endif // USE_CUDA #endif // USE_CUDA
......
...@@ -27,8 +27,19 @@ ...@@ -27,8 +27,19 @@
namespace nts { // namespace nts(NiuTrans.Tensor) namespace nts { // namespace nts(NiuTrans.Tensor)
/* set every entry to its log value */ /* set every entry to its log value */
extern "C" void _Log(const XTensor * a, XTensor * b);
void _Log(XTensor * a);
/*
set every entry to its log value (do it on site)
keep the result in the input tensor a and return nothing
*/
void _LogMe(XTensor * a);
/*
set every entry to its log value (return a XTensor structure)
make a new tensor to keep the result and return it
*/
XTensor Log(const XTensor & a);
} // namespace nts(NiuTrans.Tensor) } // namespace nts(NiuTrans.Tensor)
......
...@@ -21,6 +21,7 @@ ...@@ -21,6 +21,7 @@
#include <math.h> #include <math.h>
#include "../../XTensor.h" #include "../../XTensor.h"
#include "../../XName.h"
#include "Power.h" #include "Power.h"
#include "Power.cuh" #include "Power.cuh"
...@@ -28,38 +29,73 @@ namespace nts { // namespace nts(NiuTrans.Tensor) ...@@ -28,38 +29,73 @@ namespace nts { // namespace nts(NiuTrans.Tensor)
/* /*
get the power(a, p) get the power(a, p)
>> a - the tensor >> a - input tensor
>> p - as it is >> b - output tensor
>> p - parameter
*/ */
void _Power(XTensor * a, DTYPE p) void _Power(const XTensor * a, XTensor * b, DTYPE p)
{ {
#ifdef USE_CUDA #ifdef USE_CUDA
/* run it on GPUs */ /* run it on GPUs */
if (a->devID >= 0) { if (a->devID >= 0) {
_CudaPower(a, p); _CudaPower(a, b, p);
return; return;
} }
#endif #endif
CheckNTErrors((a->dataType == DEFAULT_DTYPE), "TODO!"); CheckNTErrors((a->dataType == DEFAULT_DTYPE), "TODO!");
DTYPE * d = (DTYPE*)a->data; DTYPE * aData = (DTYPE*)a->data;
DTYPE * bData = (DTYPE*)b->data;
if (p == 0) { if (p == 0) {
for (int i = 0; i < a->unitNum; i++) for (int i = 0; i < a->unitNum; i++)
d[i] = (DTYPE)1.0; bData[i] = (DTYPE)1.0;
} }
else if (p == (DTYPE)0.5) { else if (p == (DTYPE)0.5) {
for (int i = 0; i < a->unitNum; i++) for (int i = 0; i < a->unitNum; i++)
d[i] = (DTYPE)sqrt(d[i]); bData[i] = (DTYPE)sqrt(aData[i]);
} }
else if (p == (DTYPE)2.0) { else if (p == (DTYPE)2.0) {
for (int i = 0; i < a->unitNum; i++) for (int i = 0; i < a->unitNum; i++)
d[i] = d[i] * d[i]; bData[i] = aData[i] * aData[i];
} }
else { else {
for (int i = 0; i < a->unitNum; i++) for (int i = 0; i < a->unitNum; i++)
d[i] = (DTYPE)pow(d[i], p); bData[i] = (DTYPE)pow(aData[i], p);
} }
} }
/*
get the power(a, p) (do it on site)
keep the result in the input tensor a and return nothing
>> a - the tensor
>> p - parameter
*/
void _PowerMe(XTensor * a, DTYPE p)
{
_Power(a, a, p);
}
/*
get the power(a, p) (return a XTensor structure)
make a new tensor to keep the result and return it
>> a - input tensor
>> p - parameter
<< return - the power value of the input tensor
*/
XTensor Power(const XTensor & a, DTYPE p)
{
XTensor b(&a);
b.SetTMP();
/* call _Power function */
_Power(&a, &b, p);
/* tensor connections */
XLink::MakeLink(&a, NULL, &b, MATH_POWER);
XLink::AddParamToHead(&b, p);
return b;
}
} // namespace nts(NiuTrans.Tensor) } // namespace nts(NiuTrans.Tensor)
...@@ -21,6 +21,7 @@ ...@@ -21,6 +21,7 @@
#include "../../XDevice.h" #include "../../XDevice.h"
#include "../../XTensor.h" #include "../../XTensor.h"
#include "../movement/CopyValues.cuh"
#include "Power.h" #include "Power.h"
#include "Power.cuh" #include "Power.cuh"
...@@ -30,74 +31,80 @@ namespace nts { // namespace nts(NiuTrans.Tensor) ...@@ -30,74 +31,80 @@ namespace nts { // namespace nts(NiuTrans.Tensor)
/* /*
set all entries to its root (CUDA Kernel) set all entries to its root (CUDA Kernel)
>> d - data array >> a - input data array
>> b - output data array
>> size - size of the data array >> size - size of the data array
*/ */
__global__ __global__
void KernelSqrtV2(DTYPE * d, int size) void KernelSqrtV2(DTYPE * a, DTYPE * b, int size)
{ {
int i = blockDim.x * blockIdx.x + threadIdx.x; int i = blockDim.x * blockIdx.x + threadIdx.x;
if (i < size) if (i < size)
d[i] = sqrt(d[i]); b[i] = sqrt(a[i]);
} }
/* /*
set all entries to its root (CUDA Kernel) set all entries to its root (CUDA Kernel)
>> d - data array >> a - input data array
>> b - output data array
>> size - size of the data array >> size - size of the data array
*/ */
__global__ __global__
void KernelSqrtV2(__half * d, int size) void KernelSqrtV2(__half * a, __half * b, int size)
{ {
int i = blockDim.x * blockIdx.x + threadIdx.x; int i = blockDim.x * blockIdx.x + threadIdx.x;
#if __CUDA_ARCH__ >= 530 || !defined(__CUDA_ARCH__) #if __CUDA_ARCH__ >= 530 || !defined(__CUDA_ARCH__)
if (i < size) if (i < size)
d[i] = hsqrt(d[i]); b[i] = hsqrt(a[i]);
#else #else
if (i < size) if (i < size)
d[i] = __float2half(sqrt(__half2float(d[i]))); b[i] = __float2half(sqrt(__half2float(a[i])));
#endif #endif
} }
/* /*
get power(d[i], p) get power(d[i], p)
>> d - data array >> a - input data array
>> b - output data array
>> p - power >> p - power
>> size - size of the data array >> size - size of the data array
*/ */
__global__ __global__
void KernelPower(DTYPE * d, DTYPE p, int size) void KernelPower(DTYPE * a, DTYPE * b, DTYPE p, int size)
{ {
int i = blockDim.x * blockIdx.x + threadIdx.x; int i = blockDim.x * blockIdx.x + threadIdx.x;
if (i < size) if (i < size)
d[i] = pow(d[i], p); b[i] = pow(a[i], p);
} }
/* /*
get power(d[i], p) get power(d[i], p)
>> d - data array >> a - input data array
>> b - output data array
>> p - power >> p - power
>> size - size of the data array >> size - size of the data array
*/ */
__global__ __global__
void KernelPower(__half * d, __half p, int size) void KernelPower(__half * a, __half * b, __half p, int size)
{ {
#if __CUDA_ARCH__ >= 530 || !defined(__CUDA_ARCH__) #if __CUDA_ARCH__ >= 530 || !defined(__CUDA_ARCH__)
#else #else
int i = blockDim.x * blockIdx.x + threadIdx.x; int i = blockDim.x * blockIdx.x + threadIdx.x;
if (i < size) if (i < size)
d[i] = __float2half(pow(__half2float(d[i]), __half2float(p))); b[i] = __float2half(pow(__half2float(a[i]), __half2float(p)));
#endif #endif
} }
/* get the power of the entries */ /* get the power of the entries */
extern "C" extern "C"
void _CudaPower(XTensor * a, DTYPE p) void _CudaPower(const XTensor * a, XTensor * b, DTYPE p)
{ {
CheckNTErrors((XTensor::IsIdentical(a, b)), "Input tensors should have the same type!");
int gridSize[3]; int gridSize[3];
int blockSize[3]; int blockSize[3];
...@@ -111,15 +118,18 @@ void _CudaPower(XTensor * a, DTYPE p) ...@@ -111,15 +118,18 @@ void _CudaPower(XTensor * a, DTYPE p)
if (a->dataType == DEFAULT_DTYPE) { if (a->dataType == DEFAULT_DTYPE) {
if (p == (DTYPE)0.5) { if (p == (DTYPE)0.5) {
KernelSqrtV2 << <blocks, threads >> >((DTYPE*)a->data, a->unitNum); KernelSqrtV2 << <blocks, threads >> >((DTYPE*)a->data, (DTYPE*)b->data, a->unitNum);
}
else if (p == (DTYPE)1.0) {
_CudaCopyValues(a, b);
} }
else if (p != (DTYPE)1.0) { else if (p != (DTYPE)1.0) {
KernelPower << <blocks, threads >> >((DTYPE*)a->data, p, a->unitNum); KernelPower << <blocks, threads >> >((DTYPE*)a->data, (DTYPE*)b->data, p, a->unitNum);
} }
} }
else if (a->dataType == X_FLOAT16) { else if (a->dataType == X_FLOAT16) {
if (p == (DTYPE)0.5) { if (p == (DTYPE)0.5) {
KernelSqrtV2 << <blocks, threads >> >((__half*)a->data, a->unitNum); KernelSqrtV2 << <blocks, threads >> >((__half*)a->data, (__half*)b->data, a->unitNum);
} }
else if (p != (DTYPE)1.0) { else if (p != (DTYPE)1.0) {
ShowNTErrors("TODO!"); ShowNTErrors("TODO!");
......
...@@ -30,15 +30,15 @@ namespace nts { // namespace nts(NiuTrans.Tensor) ...@@ -30,15 +30,15 @@ namespace nts { // namespace nts(NiuTrans.Tensor)
/* set all entries to its root (CUDA Kernel) */ /* set all entries to its root (CUDA Kernel) */
__global__ __global__
void KernelSqrtV2(DTYPE * d, int size); void KernelSqrtV2(DTYPE * a, DTYPE * b, int size);
/* set all entries to its root (CUDA Kernel) */ /* set all entries to its root (CUDA Kernel) */
__global__ __global__
void KernelSqrtV2(__half * d, int size); void KernelSqrtV2(__half * a, __half * b, int size);
/* get the power of the entries */ /* get the power of the entries */
extern "C" extern "C"
void _CudaPower(XTensor * a, DTYPE p); void _CudaPower(const XTensor * a, XTensor * b, DTYPE p);
#endif // USE_CUDA #endif // USE_CUDA
......
...@@ -27,8 +27,19 @@ ...@@ -27,8 +27,19 @@
namespace nts { // namespace nts(NiuTrans.Tensor) namespace nts { // namespace nts(NiuTrans.Tensor)
/* get the power(x, y) */ /* get the power(x, y) */
extern "C" void _Power(const XTensor * a, XTensor * b, DTYPE p);
void _Power(XTensor * a, DTYPE p);
/*
get the power(x, y) (do it on site)
keep the result in the input tensor a and return nothing
*/
void _PowerMe(XTensor * a, DTYPE p);
/*
get the power(x, y) (return a XTensor structure)
make a new tensor to keep the result and return it
*/
XTensor Power(const XTensor & a, DTYPE p);
} // namespace nts(NiuTrans.Tensor) } // namespace nts(NiuTrans.Tensor)
......
...@@ -110,8 +110,7 @@ make a new tensor to keep the result and return it ...@@ -110,8 +110,7 @@ make a new tensor to keep the result and return it
*/ */
XTensor CopyIndexed(const XTensor &s, int dim, int * srcIndex, int indexSize, int * tgtIndex, int copyNum) XTensor CopyIndexed(const XTensor &s, int dim, int * srcIndex, int indexSize, int * tgtIndex, int copyNum)
{ {
CheckNTErrors(&s, "Empty input tensor!"); CheckNTErrors(dim >= 0 && dim < s.order, "A too larget dimension specified!");
CheckNTErrors((dim >= 0 && dim < s.order), "A too larget dimension specified!");
int order = s.order; int order = s.order;
int * dimSize = new int[order]; int * dimSize = new int[order];
...@@ -123,16 +122,13 @@ XTensor CopyIndexed(const XTensor &s, int dim, int * srcIndex, int indexSize, in ...@@ -123,16 +122,13 @@ XTensor CopyIndexed(const XTensor &s, int dim, int * srcIndex, int indexSize, in
dimSize[i] = s.dimSize[i]; dimSize[i] = s.dimSize[i];
} }
XTensor t = NewTensor(order, dimSize, s.dataType, s.denseRatio, s.devID, s.mem); float dr = (!s.isSparse) ? 1.0F : s.denseRatio;
t.SetZeroAll(); XTensor t(order, dimSize, s.dataType, dr, s.devID, s.mem);
t.SetTMP(); t.SetTMP();
/* call _CopyIndexed function */ /* call _CopyIndexed function */
_CopyIndexed(&s, &t, dim, srcIndex, indexSize, tgtIndex, copyNum); _CopyIndexed(&s, &t, dim, srcIndex, indexSize, tgtIndex, copyNum);
/* destroy variables */
delete[] dimSize;
/* tensor connection */ /* tensor connection */
XLink::MakeLink(&s, NULL, &t, MOVEMENT_COPYINDEXED); XLink::MakeLink(&s, NULL, &t, MOVEMENT_COPYINDEXED);
XLink::AddParamToHeadInt(&t, dim); XLink::AddParamToHeadInt(&t, dim);
...@@ -141,6 +137,9 @@ XTensor CopyIndexed(const XTensor &s, int dim, int * srcIndex, int indexSize, in ...@@ -141,6 +137,9 @@ XTensor CopyIndexed(const XTensor &s, int dim, int * srcIndex, int indexSize, in
XLink::AddParamToHeadPointer(&t, tgtIndex); XLink::AddParamToHeadPointer(&t, tgtIndex);
XLink::AddParamToHeadInt(&t, copyNum); XLink::AddParamToHeadInt(&t, copyNum);
/* destroy variables */
delete[] dimSize;
return t; return t;
} }
......
...@@ -101,32 +101,31 @@ make a new tensor to keep the result and return it ...@@ -101,32 +101,31 @@ make a new tensor to keep the result and return it
*/ */
XTensor ReduceMax(const XTensor &input, int dim) XTensor ReduceMax(const XTensor &input, int dim)
{ {
CheckNTErrors(&input, "Empty input or output tensors!"); CheckNTErrors(dim >= 0 && dim < input.order, "Illegal dimension to reduce!");
CheckNTErrors((dim >= 0 && dim < input.order), "Illegal dimension to reduce!");
int order = input.order - 1; int order = input.order - 1;
int * dimSize = new int[order]; int * dimSize = new int[order];
for(int i = 0; i < input.order; i++){ for(int i = 0; i < order; i++){
if(i < dim) if(i < dim)
dimSize[i] = input.dimSize[i]; dimSize[i] = input.dimSize[i];
else if(i > dim) else if(i >= dim)
dimSize[i] = input.dimSize[i + 1]; dimSize[i] = input.dimSize[i + 1];
} }
XTensor output = NewTensor(order, dimSize, input.dataType, input.denseRatio, input.devID, input.mem); float dr = (!input.isSparse) ? 1.0F : input.denseRatio;
output.SetZeroAll(); XTensor output(order, dimSize, input.dataType, dr, input.devID, input.mem);
output.SetTMP(); output.SetTMP();
/* call _ReduceMax function */ /* call _ReduceMax function */
_ReduceMax(&input, &output, dim); _ReduceMax(&input, &output, dim);
/* destroy variables */
delete[] dimSize;
/* tensor connection */ /* tensor connection */
XLink::MakeLink(&input, NULL, &output, REDUCE_REDUCEMAX); XLink::MakeLink(&input, NULL, &output, REDUCE_REDUCEMAX);
XLink::AddParamToHeadInt(&output, dim); XLink::AddParamToHeadInt(&output, dim);
/* destroy variables */
delete[] dimSize;
return output; return output;
} }
......
...@@ -58,20 +58,19 @@ For a 1-dimensional data array a, mean = (1/n) * sum_i input_i ...@@ -58,20 +58,19 @@ For a 1-dimensional data array a, mean = (1/n) * sum_i input_i
*/ */
XTensor ReduceMean(const XTensor &input, int dim) XTensor ReduceMean(const XTensor &input, int dim)
{ {
CheckNTErrors(&input, "Empty input or output tensors!"); CheckNTErrors(dim >= 0 && dim < input.order, "Illegal dimension to reduce!");
CheckNTErrors((dim >= 0 && dim < input.order), "Illegal dimension to reduce!");
int order = input.order - 1; int order = input.order - 1;
int * dimSize = new int[order]; int * dimSize = new int[order];
for(int i = 0; i < input.order; i++){ for(int i = 0; i < order; i++){
if(i < dim) if(i < dim)
dimSize[i] = input.dimSize[i]; dimSize[i] = input.dimSize[i];
else if(i > dim) else if(i >= dim)
dimSize[i] = input.dimSize[i + 1]; dimSize[i] = input.dimSize[i + 1];
} }
XTensor output = NewTensor(order, dimSize, input.dataType, input.denseRatio, input.devID, input.mem); float dr = (!input.isSparse) ? 1.0F : input.denseRatio;
output.SetZeroAll(); XTensor output(order, dimSize, input.dataType, dr, input.devID, input.mem);
output.SetTMP(); output.SetTMP();
/* call _ReduceMean function */ /* call _ReduceMean function */
......
...@@ -214,20 +214,19 @@ sum = \sum_i exp((a_i - shift)^power) if isExp == true ...@@ -214,20 +214,19 @@ sum = \sum_i exp((a_i - shift)^power) if isExp == true
*/ */
XTensor ReduceSum(const XTensor &input, int dim, const XTensor &shift, DTYPE power, bool isExp) XTensor ReduceSum(const XTensor &input, int dim, const XTensor &shift, DTYPE power, bool isExp)
{ {
CheckNTErrors(&input, "Empty input or output tensors!"); CheckNTErrors(dim >= 0 && dim < input.order, "Illegal dimension to reduce!");
CheckNTErrors((dim >= 0 && dim < input.order), "Illegal dimension to reduce!");
int order = input.order - 1; int order = input.order - 1;
int * dimSize = new int[order]; int * dimSize = new int[order];
for(int i = 0; i < input.order; i++){ for(int i = 0; i < order; i++){
if(i < dim) if(i < dim)
dimSize[i] = input.dimSize[i]; dimSize[i] = input.dimSize[i];
else if(i > dim) else if(i >= dim)
dimSize[i] = input.dimSize[i + 1]; dimSize[i] = input.dimSize[i + 1];
} }
XTensor output = NewTensor(order, dimSize, input.dataType, input.denseRatio, input.devID, input.mem); float dr = (!input.isSparse) ? 1.0F : input.denseRatio;
output.SetZeroAll(); XTensor output(order, dimSize, input.dataType, dr, input.devID, input.mem);
output.SetTMP(); output.SetTMP();
/* call _ReduceSum function */ /* call _ReduceSum function */
...@@ -237,6 +236,53 @@ XTensor ReduceSum(const XTensor &input, int dim, const XTensor &shift, DTYPE pow ...@@ -237,6 +236,53 @@ XTensor ReduceSum(const XTensor &input, int dim, const XTensor &shift, DTYPE pow
XLink::MakeLink(&input, &shift, &output, REDUCE_REDUCESUM); XLink::MakeLink(&input, &shift, &output, REDUCE_REDUCESUM);
XLink::AddParamToHeadInt(&output, dim); XLink::AddParamToHeadInt(&output, dim);
XLink::AddParamToHead(&output, power); XLink::AddParamToHead(&output, power);
XLink::AddParamToHeadBool(&output, isExp);
/* destroy variables */
delete[] dimSize;
return output;
}
/*
sum the items along a dimension of the tensor (return a XTensor structure)
make a new tensor to keep the result and return it
For a 1-dimensional data array a,
sum = \sum_i (a_i)^power if isExp == false
sum = \sum_i exp((a_i)^power) if isExp == true
>> input - the input tensor
>> dim - the dimension where the reduction is performed on
>> ieExp - specify if the exp() is performed
>> power - we perform pow(item_i, power) on each item in the array
<< return - the sum along a dimension of the tensor
*/
XTensor ReduceSum(const XTensor &input, int dim, DTYPE power, bool isExp)
{
CheckNTErrors(dim >= 0 && dim < input.order, "Illegal dimension to reduce!");
int order = input.order - 1;
int * dimSize = new int[order];
for(int i = 0; i < order; i++){
if(i < dim)
dimSize[i] = input.dimSize[i];
else if(i >= dim)
dimSize[i] = input.dimSize[i + 1];
}
float dr = (!input.isSparse) ? 1.0F : input.denseRatio;
XTensor output(order, dimSize, input.dataType, dr, input.devID, input.mem);
output.SetTMP();
/* call _ReduceSum function */
_ReduceSum(&input, &output, dim, NULL, power, isExp);
/* tensor connection */
XLink::MakeLink(&input, NULL, &output, REDUCE_REDUCESUM);
XLink::AddParamToHeadInt(&output, dim);
XLink::AddParamToHead(&output, power);
XLink::AddParamToHeadBool(&output, isExp);
/* destroy variables */ /* destroy variables */
delete[] dimSize; delete[] dimSize;
......
...@@ -43,7 +43,16 @@ For a 1-dimensional data array a, ...@@ -43,7 +43,16 @@ For a 1-dimensional data array a,
sum = \sum_i (a_i - shift) if isExp == false sum = \sum_i (a_i - shift) if isExp == false
sum = \sum_i exp(a_i - shift) if isExp == true sum = \sum_i exp(a_i - shift) if isExp == true
*/ */
XTensor ReduceSum(const XTensor &input, int dim, const XTensor &shift = NULL, DTYPE power = (DTYPE)1.0F, bool isExp = false); XTensor ReduceSum(const XTensor &input, int dim, const XTensor &shift, DTYPE power = (DTYPE)1.0F, bool isExp = false);
/*
sum the items along a dimension of the tensor (return a XTensor structure)
make a new tensor to keep the result and return it
For a 1-dimensional data array a,
sum = \sum_i (a_i) if isExp == false
sum = \sum_i exp(a_i) if isExp == true
*/
XTensor ReduceSum(const XTensor &input, int dim, DTYPE power = (DTYPE)1.0F, bool isExp = false);
} // namespace nts(NiuTrans.Tensor) } // namespace nts(NiuTrans.Tensor)
......
...@@ -54,20 +54,19 @@ For a 1-dimensional data array a, sum = \sum_i (a_i - shift)^2 ...@@ -54,20 +54,19 @@ For a 1-dimensional data array a, sum = \sum_i (a_i - shift)^2
*/ */
XTensor ReduceSumSquared(const XTensor &input, int dim, const XTensor &shift) XTensor ReduceSumSquared(const XTensor &input, int dim, const XTensor &shift)
{ {
CheckNTErrors(&input, "Empty input or output tensors!"); CheckNTErrors(dim >= 0 && dim < input.order, "Illegal dimension to reduce!");
CheckNTErrors((dim >= 0 && dim < input.order), "Illegal dimension to reduce!");
int order = input.order - 1; int order = input.order - 1;
int * dimSize = new int[order]; int * dimSize = new int[order];
for(int i = 0; i < input.order; i++){ for(int i = 0; i < order; i++){
if(i < dim) if(i < dim)
dimSize[i] = input.dimSize[i]; dimSize[i] = input.dimSize[i];
else if(i > dim) else if(i >= dim)
dimSize[i] = input.dimSize[i + 1]; dimSize[i] = input.dimSize[i + 1];
} }
XTensor output = NewTensor(order, dimSize, input.dataType, input.denseRatio, input.devID, input.mem); float dr = (!input.isSparse) ? 1.0F : input.denseRatio;
output.SetZeroAll(); XTensor output(order, dimSize, input.dataType, dr, input.devID, input.mem);
output.SetTMP(); output.SetTMP();
/* call _ReduceSumSquared function */ /* call _ReduceSumSquared function */
......
...@@ -19,6 +19,7 @@ ...@@ -19,6 +19,7 @@
* $Created by: XIAO Tong (email: xiaotong@mail.neu.edu.cn) 2018-04-24 * $Created by: XIAO Tong (email: xiaotong@mail.neu.edu.cn) 2018-04-24
*/ */
#include "../../XName.h"
#include "../math/ScaleAndShift.h" #include "../math/ScaleAndShift.h"
#include "ReduceSum.h" #include "ReduceSum.h"
#include "ReduceVariance.h" #include "ReduceVariance.h"
...@@ -56,25 +57,28 @@ For a 1-dimensional data array a, variance = 1/n * \sum_i (a_i - mean)^2 ...@@ -56,25 +57,28 @@ For a 1-dimensional data array a, variance = 1/n * \sum_i (a_i - mean)^2
*/ */
XTensor ReduceVariance(const XTensor &input, int dim, const XTensor &mean) XTensor ReduceVariance(const XTensor &input, int dim, const XTensor &mean)
{ {
CheckNTErrors(&input, "Empty input or output tensors!"); CheckNTErrors(dim >= 0 && dim < input.order, "Illegal dimension to reduce!");
CheckNTErrors((dim >= 0 && dim < input.order), "Illegal dimension to reduce!");
int order = input.order - 1; int order = input.order - 1;
int * dimSize = new int[order]; int * dimSize = new int[order];
for(int i = 0; i < input.order; i++){ for(int i = 0; i < order; i++){
if(i < dim) if(i < dim)
dimSize[i] = input.dimSize[i]; dimSize[i] = input.dimSize[i];
else if(i > dim) else if(i >= dim)
dimSize[i] = input.dimSize[i + 1]; dimSize[i] = input.dimSize[i + 1];
} }
XTensor output = NewTensor(order, dimSize, input.dataType, input.denseRatio, input.devID, input.mem); float dr = (!input.isSparse) ? 1.0F : input.denseRatio;
output.SetZeroAll(); XTensor output(order, dimSize, input.dataType, dr, input.devID, input.mem);
output.SetTMP(); output.SetTMP();
/* call _ReduceVariance function */ /* call _ReduceVariance function */
_ReduceVariance(&input, &output, dim, &mean); _ReduceVariance(&input, &output, dim, &mean);
/* tensor connection */
XLink::MakeLink(&input, &mean, &output, REDUCE_REDUCEVARIANCE);
XLink::AddParamToHeadInt(&output, dim);
/* destroy variables */ /* destroy variables */
delete[] dimSize; delete[] dimSize;
......
...@@ -36,7 +36,7 @@ concatenate a list of tensors along a given dimension ...@@ -36,7 +36,7 @@ concatenate a list of tensors along a given dimension
*/ */
void _ConcatenateSolely(const XList * smalls, XTensor * big, int dim) void _ConcatenateSolely(const XList * smalls, XTensor * big, int dim)
{ {
CheckNTErrors((big->order > dim && dim >= 0), "Illegal dimension to concatenate!"); CheckNTErrors(big->order > dim && dim >= 0, "Illegal dimension to concatenate!");
int catDimSize = 0; int catDimSize = 0;
int dimRDI = big->order - dim - 1; int dimRDI = big->order - dim - 1;
......
...@@ -30,8 +30,7 @@ namespace nts { // namespace nts(NiuTrans.Tensor) ...@@ -30,8 +30,7 @@ namespace nts { // namespace nts(NiuTrans.Tensor)
/* set target data block index for the data movement in split */ /* set target data block index for the data movement in split */
extern "C" extern "C"
void _CudaMakeMergeBlockIndex(int devID, void _CudaMakeMergeBlockIndex(int devID, int * blockIndex, int blockNum, int blockNumInMerge,
int * blockIndex, int blockNum, int blockNumInMerge,
int splitSizeInGrid, int gridSize, int gridNum); int splitSizeInGrid, int gridSize, int gridNum);
#endif // USE_CUDA #endif // USE_CUDA
......
...@@ -161,8 +161,7 @@ e.g., (N/3, M, 3) -> (N, M) ...@@ -161,8 +161,7 @@ e.g., (N/3, M, 3) -> (N, M)
*/ */
XTensor Merge(const XTensor &s, int whereToMerge, int leadingDim) XTensor Merge(const XTensor &s, int whereToMerge, int leadingDim)
{ {
CheckNTErrors(&s != NULL, "Invalid tensors!"); CheckNTErrors(leadingDim < whereToMerge, "Invalid leading dimension!");
CheckNTErrors((leadingDim < whereToMerge), "Invalid leading dimension!");
if (leadingDim < 0) if (leadingDim < 0)
leadingDim = 0; leadingDim = 0;
...@@ -180,8 +179,8 @@ XTensor Merge(const XTensor &s, int whereToMerge, int leadingDim) ...@@ -180,8 +179,8 @@ XTensor Merge(const XTensor &s, int whereToMerge, int leadingDim)
} }
} }
XTensor t = NewTensor(order, dimSize, s.dataType, s.denseRatio, s.devID, s.mem); float dr = (!s.isSparse) ? 1.0F : s.denseRatio;
t.SetZeroAll(); XTensor t(order, dimSize, s.dataType, dr, s.devID, s.mem);
t.SetTMP(); t.SetTMP();
/* call _Merge function */ /* call _Merge function */
...@@ -314,6 +313,7 @@ void _Merge(const XList * smalls, XTensor * big, int whereToMerge) ...@@ -314,6 +313,7 @@ void _Merge(const XList * smalls, XTensor * big, int whereToMerge)
/* /*
merge small tensors into a big tensor (return a XTensor structure) merge small tensors into a big tensor (return a XTensor structure)
make a new tensor to keep the result and return it
>> smalls - the list of the small tensors >> smalls - the list of the small tensors
>> whereToMerge - the merging operation is along with which dimension >> whereToMerge - the merging operation is along with which dimension
...@@ -331,10 +331,8 @@ XTensor Merge(const XList &smalls, int whereToMerge) ...@@ -331,10 +331,8 @@ XTensor Merge(const XList &smalls, int whereToMerge)
dimSize[i] = tensor->dimSize[whereToMerge] * smalls.count; dimSize[i] = tensor->dimSize[whereToMerge] * smalls.count;
} }
XTensor big = NewTensor(order, dimSize, float dr = (!tensor->isSparse) ? 1.0F : tensor->denseRatio;
tensor->dataType, tensor->denseRatio, XTensor big(order, dimSize, tensor->dataType, dr, tensor->devID, tensor->mem);
tensor->devID, tensor->mem);
big.SetZeroAll();
big.SetTMP(); big.SetTMP();
/* call _Merge function */ /* call _Merge function */
...@@ -370,10 +368,8 @@ XTensor Merge(const XTensor &smallA, const XTensor &smallB, int whereToMerge) ...@@ -370,10 +368,8 @@ XTensor Merge(const XTensor &smallA, const XTensor &smallB, int whereToMerge)
dimSize[i] = smallA.dimSize[whereToMerge] * 2; dimSize[i] = smallA.dimSize[whereToMerge] * 2;
} }
XTensor big = NewTensor(order, dimSize, float dr = (!smallA.isSparse) ? 1.0F : smallA.denseRatio;
smallA.dataType, smallA.denseRatio, XTensor big(order, dimSize, smallA.dataType, dr, smallA.devID, smallA.mem);
smallA.devID, smallA.mem);
big.SetZeroAll();
big.SetTMP(); big.SetTMP();
XList smalls(2); XList smalls(2);
......
...@@ -148,16 +148,16 @@ XTensor Split(const XTensor &s, int whereToSplit, int splitNum) ...@@ -148,16 +148,16 @@ XTensor Split(const XTensor &s, int whereToSplit, int splitNum)
int order = s.order + 1; int order = s.order + 1;
int * dimSize = new int[order]; int * dimSize = new int[order];
dimSize[0] = splitNum;
for (int i = 0; i < s.order; i++) { for (int i = 0; i < s.order; i++) {
if (i == whereToSplit) if (i == whereToSplit)
dimSize[i + 1] = s.dimSize[i] / splitNum; dimSize[i+1] = s.dimSize[i] / splitNum;
else else
dimSize[i + 1] = s.dimSize[i]; dimSize[i+1] = s.dimSize[i];
} }
dimSize[0] = splitNum;
XTensor t = NewTensor(order, dimSize, s.dataType, s.denseRatio, s.devID, s.mem); float dr = (!s.isSparse) ? 1.0F : s.denseRatio;
t.SetZeroAll(); XTensor t(order, dimSize, s.dataType, dr, s.devID, s.mem);
t.SetTMP(); t.SetTMP();
/* call _Split function */ /* call _Split function */
...@@ -175,7 +175,7 @@ XTensor Split(const XTensor &s, int whereToSplit, int splitNum) ...@@ -175,7 +175,7 @@ XTensor Split(const XTensor &s, int whereToSplit, int splitNum)
} }
/* /*
split a big tensor into small tensors. split a big tensor into small tensors
>> big - the source tensor >> big - the source tensor
>> smalls - the list that keeps the resulting tensors (for return) >> smalls - the list that keeps the resulting tensors (for return)
...@@ -281,38 +281,16 @@ void _Split(const XTensor * big, XList * smalls, int whereToSplit, int splitNum) ...@@ -281,38 +281,16 @@ void _Split(const XTensor * big, XList * smalls, int whereToSplit, int splitNum)
} }
/* /*
split a big tensor into small tensors (returna a XList struture). split a big tensor into small tensors
make a new list to keep the result and return it.
>> big - the source tensor >> big - the source tensor
>> smalls - the list that keeps the resulting tensors (for return)
NOTE that all the "small" tensors have already been placed in the list in advance.
>> whereToSplit - which dimension of the tensor is to split >> whereToSplit - which dimension of the tensor is to split
>> splitNum - how many splits >> splitNum - how many splits
<< return - a list of small tensors by splitting a big tensor
*/ */
XList SplitList(const XTensor &big, int whereToSplit, int splitNum) void Split(const XTensor &big, XList &smalls, int whereToSplit, int splitNum)
{ {
CheckNTErrors(&big, "Invalid tensors!");
XList smalls = XList(splitNum);
int order = big.order;
int * dimSize = new int[order];
for (int i = 0; i < big.order; i++) {
if (i != whereToSplit)
dimSize[i] = big.dimSize[i];
else
dimSize[i] = big.dimSize[i] / splitNum;
}
for (int i = 0; i < splitNum; i++) {
XTensor tensor = NewTensor(order, dimSize,
big.dataType, big.denseRatio,
big.devID, big.mem);
tensor.SetZeroAll();
tensor.SetTMP();
smalls.Add(&tensor);
}
/* call _Split function */ /* call _Split function */
_Split(&big, &smalls, whereToSplit, splitNum); _Split(&big, &smalls, whereToSplit, splitNum);
...@@ -326,11 +304,6 @@ XList SplitList(const XTensor &big, int whereToSplit, int splitNum) ...@@ -326,11 +304,6 @@ XList SplitList(const XTensor &big, int whereToSplit, int splitNum)
block, rather than the total number of splits */ block, rather than the total number of splits */
XLink::AddParamToHeadInt(s, i); XLink::AddParamToHeadInt(s, i);
} }
/* destroy variables */
delete[] dimSize;
return smalls;
} }
} // namespace nts(NiuTrans.Tensor) } // namespace nts(NiuTrans.Tensor)
...@@ -46,7 +46,7 @@ void _Split(const XTensor * big, XList * smalls, int whereToSplit, int splitNum) ...@@ -46,7 +46,7 @@ void _Split(const XTensor * big, XList * smalls, int whereToSplit, int splitNum)
split a big tensor into small tensors (return a XList structure) split a big tensor into small tensors (return a XList structure)
make a new list to keep the result and return it make a new list to keep the result and return it
*/ */
XList SplitList(const XTensor &big, int whereToSplit, int splitNum); void Split(const XTensor &big, XList &smalls, int whereToSplit, int splitNum);
} // namespace nts(NiuTrans.Tensor) } // namespace nts(NiuTrans.Tensor)
......
...@@ -108,8 +108,6 @@ make a new tensor to keep the result and return it ...@@ -108,8 +108,6 @@ make a new tensor to keep the result and return it
*/ */
XTensor Unsqueeze(const XTensor &a, int dim, int dSize) XTensor Unsqueeze(const XTensor &a, int dim, int dSize)
{ {
CheckNTErrors(&a, "Empty input tensors!");
int order = a.order + 1; int order = a.order + 1;
int * dimSize = new int[order]; int * dimSize = new int[order];
...@@ -122,8 +120,8 @@ XTensor Unsqueeze(const XTensor &a, int dim, int dSize) ...@@ -122,8 +120,8 @@ XTensor Unsqueeze(const XTensor &a, int dim, int dSize)
dimSize[i] = a.dimSize[i - 1]; dimSize[i] = a.dimSize[i - 1];
} }
XTensor b = NewTensor(order, dimSize, a.dataType, a.denseRatio, a.devID, a.mem); float dr = (!a.isSparse) ? 1.0F : a.denseRatio;
b.SetZeroAll(); XTensor b(order, dimSize, a.dataType, dr, a.devID, a.mem);
b.SetTMP(); b.SetTMP();
/* call _Unsqueeze function */ /* call _Unsqueeze function */
......
...@@ -29,13 +29,14 @@ namespace nts { // namespace nts(NiuTrans.Tensor) ...@@ -29,13 +29,14 @@ namespace nts { // namespace nts(NiuTrans.Tensor)
/* /*
sort the tensor along a given dimension sort the tensor along a given dimension
>> a - input tensor
>> a - the tensor >> b - output tensor
>> index - index of the items in the resulting tensor >> index - index of the items in the resulting tensor
>> dim - the dimension along which the sorting is performed >> dim - the dimension along which the sorting is performed
*/ */
void _Sort(XTensor * a, XTensor * index, int dim) void _Sort(const XTensor * a, XTensor * b, XTensor * index, int dim)
{ {
CheckNTErrors((XTensor::IsIdentical(a, b)), "Input tensors should have the same type!");
CheckNTErrors((dim >= 0 && dim < a->order), "Incorrect dimension specified!"); CheckNTErrors((dim >= 0 && dim < a->order), "Incorrect dimension specified!");
CheckNTErrors((a->order == index->order), "Unmatched input tensors!"); CheckNTErrors((a->order == index->order), "Unmatched input tensors!");
CheckNTErrors((index->dataType == X_INT), "Wrong data type!"); CheckNTErrors((index->dataType == X_INT), "Wrong data type!");
...@@ -46,7 +47,7 @@ void _Sort(XTensor * a, XTensor * index, int dim) ...@@ -46,7 +47,7 @@ void _Sort(XTensor * a, XTensor * index, int dim)
if (a->devID >= 0) { if (a->devID >= 0) {
#ifdef USE_CUDA #ifdef USE_CUDA
_CudaSortBig(a, a, index, index, dim); _CudaSortBig(a, b, index, index, dim);
#else #else
ShowNTErrors("Plesae specify USE_CUDA and recompile the code!"); ShowNTErrors("Plesae specify USE_CUDA and recompile the code!");
#endif #endif
...@@ -64,12 +65,13 @@ void _Sort(XTensor * a, XTensor * index, int dim) ...@@ -64,12 +65,13 @@ void _Sort(XTensor * a, XTensor * index, int dim)
for (int k = 0; k < blockNum; k++) { for (int k = 0; k < blockNum; k++) {
for (int i = 0; i < stride; i++) { for (int i = 0; i < stride; i++) {
void * data = (char*)a->data + (k * blockSize + i) * a->unitSize; void * dataA = (char*)a->data + (k * blockSize + i) * a->unitSize;
void * dataB = (char*)b->data + (k * blockSize + i) * b->unitSize;
void * indexData = (char*)index->data + (k * blockSize + i) * sizeof(int); void * indexData = (char*)index->data + (k * blockSize + i) * sizeof(int);
/* we sort the data array along "dim" */ /* we sort the data array along "dim" */
if (a->dataType == X_FLOAT) if (a->dataType == X_FLOAT)
XQSort(data, indexData, strideNum, a->unitSize, stride, CompXFloat); XQSort(dataA, dataB, indexData, strideNum, a->unitSize, stride, CompXFloat);
else { else {
ShowNTErrors("TODO!"); ShowNTErrors("TODO!");
} }
...@@ -78,4 +80,40 @@ void _Sort(XTensor * a, XTensor * index, int dim) ...@@ -78,4 +80,40 @@ void _Sort(XTensor * a, XTensor * index, int dim)
} }
} }
/*
sort the tensor along a given dimension (do it on site)
keep the result in the input tensor a and return nothing
>> a - input tensor
>> index - index of the items in the resulting tensor
>> dim - the dimension along which the sorting is performed
*/
void _SortMe(XTensor * a, XTensor * index, int dim)
{
_Sort(a, a, index, dim);
}
/*
sort the tensor along a given dimension (return a XTensor structure)
make a new tensor to keep the result and return it
>> a - input tensor
>> b - output tensor
>> index - index of the items in the resulting tensor
>> dim - the dimension along which the sorting is performed
*/
void Sort(XTensor & a, XTensor & b, XTensor & index, int dim)
{
/* call _Negate function */
_Sort(&a, &b, &index, dim);
/* tensor connections */
XList list(2);
list.Add(&b);
list.Add(&index);
XLink::MakeLink(&a, &list, SORT_SORT);
XLink::AddParamToHeadInt(&b, dim);
XLink::AddParamToHeadInt(&index, dim);
}
} // namespace nts(NiuTrans.Tensor) } // namespace nts(NiuTrans.Tensor)
...@@ -39,7 +39,7 @@ bitonic sort (for each row in a matrix) ...@@ -39,7 +39,7 @@ bitonic sort (for each row in a matrix)
>> n - row number of the matrix >> n - row number of the matrix
*/ */
template<class T> __global__ template<class T> __global__
void KernelBitonicSort2D(void * data, int j, int k, int m, int n) void KernelBitonicSort2D(void * data, int j, int k, int m, int n)
{ {
const unsigned int idx = blockDim.x * blockIdx.x + threadIdx.x; const unsigned int idx = blockDim.x * blockIdx.x + threadIdx.x;
const unsigned int row = blockDim.y * blockIdx.y + threadIdx.y; const unsigned int row = blockDim.y * blockIdx.y + threadIdx.y;
...@@ -74,7 +74,7 @@ bitonic sort (for each row in a matrix) with index ...@@ -74,7 +74,7 @@ bitonic sort (for each row in a matrix) with index
>> n - row number of the matrix >> n - row number of the matrix
*/ */
template<class T> __global__ template<class T> __global__
void KernelBitonicSort2D(void * data, int * index, int j, int k, int m, int n) void KernelBitonicSort2D(void * data, int * index, int j, int k, int m, int n)
{ {
const unsigned int idx = blockDim.x * blockIdx.x + threadIdx.x; const unsigned int idx = blockDim.x * blockIdx.x + threadIdx.x;
const unsigned int row = blockDim.y * blockIdx.y + threadIdx.y; const unsigned int row = blockDim.y * blockIdx.y + threadIdx.y;
......
...@@ -27,8 +27,20 @@ ...@@ -27,8 +27,20 @@
namespace nts { // namespace nts(NiuTrans.Tensor) namespace nts { // namespace nts(NiuTrans.Tensor)
/* sort the data along a given dimension */ /* sort the data along a given dimension */
void _Sort(const XTensor * a, XTensor * b, XTensor * index, int dim);
/*
sort the data along a given dimension (do it on site)
keep the result in the input tensor a and return nothing
*/
void _SortMe(XTensor * a, XTensor * index, int dim);
/*
sort the data along a given dimension (return a XTensor structure)
make a new tensor to keep the result and return it
*/
extern "C" extern "C"
void _Sort(XTensor * a, XTensor * index, int dim); void Sort(XTensor & a, XTensor & b, XTensor & index, int dim);
} // namespace nts(NiuTrans.Tensor) } // namespace nts(NiuTrans.Tensor)
......
...@@ -105,4 +105,29 @@ void _TopK(const XTensor * a, XTensor * b, XTensor * index, int dim, int k) ...@@ -105,4 +105,29 @@ void _TopK(const XTensor * a, XTensor * b, XTensor * index, int dim, int k)
} }
} }
} }
/*
get the top-k items along a given dimension
>> a - input tensor
>> b - output tensor (top-k result)
>> index - index of the top-k items
>> dim - the dimension along which the sorting is performed
>> k - how many items returned after sorting
*/
void TopK(XTensor &a, XTensor &b, XTensor &index, int dim, int k)
{
_TopK(&a, &b, &index, dim, k);
/* tensor connection */
XList list(2);
list.Add(&b);
list.Add(&index);
XLink::MakeLink(&a, &list, SORT_TOPK);
XLink::AddParamToHeadInt(&b, dim);
XLink::AddParamToHeadInt(&index, k);
XLink::AddParamToHeadInt(&b, dim);
XLink::AddParamToHeadInt(&index, k);
}
} // namespace nts(NiuTrans.Tensor) } // namespace nts(NiuTrans.Tensor)
...@@ -30,6 +30,10 @@ namespace nts { // namespace nts(NiuTrans.Tensor) ...@@ -30,6 +30,10 @@ namespace nts { // namespace nts(NiuTrans.Tensor)
extern "C" extern "C"
void _TopK(const XTensor * a, XTensor * b, XTensor * index, int dim, int k); void _TopK(const XTensor * a, XTensor * b, XTensor * index, int dim, int k);
/* get the top-k items along a given dimension */
extern "C"
void TopK(XTensor &a, XTensor &b, XTensor &index, int dim, int k);
} // namespace nts(NiuTrans.Tensor) } // namespace nts(NiuTrans.Tensor)
#endif // __TOPK_H__ #endif // __TOPK_H__
\ No newline at end of file
...@@ -60,7 +60,9 @@ void _HardTanH(const XTensor * x, XTensor * y) ...@@ -60,7 +60,9 @@ void _HardTanH(const XTensor * x, XTensor * y)
} }
/* /*
hard tanh function (return a structure) hard tanh function (return a XTensor structure)
make a new tensor to keep the result and return it
y = 1 if x > 1 y = 1 if x > 1
x if -1 <= x <= 1 x if -1 <= x <= 1
-1 if x < -1 -1 if x < -1
...@@ -72,8 +74,10 @@ XTensor HardTanH(const XTensor &x) ...@@ -72,8 +74,10 @@ XTensor HardTanH(const XTensor &x)
XTensor y(&x); XTensor y(&x);
y.SetTMP(); y.SetTMP();
/* call _HardTanH function */
_HardTanH(&x, &y); _HardTanH(&x, &y);
/* tensor connection */
XLink::MakeLink(&x, NULL, &y, FUNC_HARDTANH); XLink::MakeLink(&x, NULL, &y, FUNC_HARDTANH);
return y; return y;
...@@ -116,7 +120,7 @@ void _HardTanHBackward(XTensor * gold, XTensor * y, XTensor * x, ...@@ -116,7 +120,7 @@ void _HardTanHBackward(XTensor * gold, XTensor * y, XTensor * x,
{ {
/* calculate dE/dy */ /* calculate dE/dy */
if(lossName != NOLOSS) if(lossName != NOLOSS)
LossBackward(dedy, gold, y, lossName); _LossBackward(dedy, gold, y, lossName);
DTYPE * dedyp = (DTYPE*)dedy->data; DTYPE * dedyp = (DTYPE*)dedy->data;
DTYPE * dedxp = (DTYPE*)dedx->data; DTYPE * dedxp = (DTYPE*)dedx->data;
......
...@@ -137,7 +137,7 @@ void _CudaHardTanHBackward(XTensor * gold, XTensor * y, XTensor * x, ...@@ -137,7 +137,7 @@ void _CudaHardTanHBackward(XTensor * gold, XTensor * y, XTensor * x,
/* calculate dE/dy */ /* calculate dE/dy */
if(lossName != NOLOSS) if(lossName != NOLOSS)
LossBackward(dedy, gold, y, lossName); _LossBackward(dedy, gold, y, lossName);
int gridSize[3], blockSize[3]; int gridSize[3], blockSize[3];
......
...@@ -37,7 +37,7 @@ y = 1 if x > 1 ...@@ -37,7 +37,7 @@ y = 1 if x > 1
*/ */
void _HardTanH(const XTensor * x, XTensor * y); void _HardTanH(const XTensor * x, XTensor * y);
/* hard tanh function (return a structure) */ /* hard tanh function (return a XTensor structure) */
XTensor HardTanH(const XTensor &x); XTensor HardTanH(const XTensor &x);
/* de/dx */ /* de/dx */
......
...@@ -19,6 +19,7 @@ ...@@ -19,6 +19,7 @@
* $Created by: XIAO Tong (email: xiaotong@mail.neu.edu.cn) 2018-04-27 * $Created by: XIAO Tong (email: xiaotong@mail.neu.edu.cn) 2018-04-27
*/ */
#include "../XName.h"
#include "Identity.h" #include "Identity.h"
#include "../XUtility.h" #include "../XUtility.h"
#include "../core/movement/CopyValues.h" #include "../core/movement/CopyValues.h"
...@@ -36,6 +37,26 @@ void _Identity(const XTensor * x, XTensor * y) ...@@ -36,6 +37,26 @@ void _Identity(const XTensor * x, XTensor * y)
} }
/* /*
identity function y = x (return a XTensor structure)
make a new tensor to keep the result and return it
>> x - input tensor
<< return - y
*/
XTensor Identity(const XTensor &x)
{
XTensor y(&x);
y.SetTMP();
/* call _Identity function */
_Identity(&x, &y);
/* tensor connection */
XLink::MakeLink(&x, NULL, &y, FUNC_IDENTITY);
return y;
}
/*
backward computation for identity function y = x backward computation for identity function y = x
dE/dx = dE/dy * dy/dx = dE/dy dE/dx = dE/dy * dy/dx = dE/dy
...@@ -58,7 +79,7 @@ void _IdentityBackward(XTensor * gold, XTensor * y, XTensor * x, ...@@ -58,7 +79,7 @@ void _IdentityBackward(XTensor * gold, XTensor * y, XTensor * x,
{ {
/* calculate dE/dy */ /* calculate dE/dy */
if(lossName != NOLOSS) if(lossName != NOLOSS)
LossBackward(dedy, gold, y, lossName); _LossBackward(dedy, gold, y, lossName);
if(dedy->data != dedx->data) if(dedy->data != dedx->data)
_CopyValues(dedy, dedx); _CopyValues(dedy, dedx);
......
...@@ -28,11 +28,12 @@ ...@@ -28,11 +28,12 @@
namespace nts{ // namespace nts(NiuTrans.Tensor) namespace nts{ // namespace nts(NiuTrans.Tensor)
/* identity function y = x */ /* identity function y = x */
extern "C"
void _Identity(const XTensor * x, XTensor * y); void _Identity(const XTensor * x, XTensor * y);
/* identity function y = x (return a XTensor structure) */
XTensor Identity(const XTensor &x);
/* de/dx */ /* de/dx */
extern "C"
void _IdentityBackward(XTensor * gold, XTensor * y, XTensor * x, void _IdentityBackward(XTensor * gold, XTensor * y, XTensor * x,
XTensor * dedy, XTensor * dedx, XTensor * dedy, XTensor * dedx,
LOSS_FUNCTION_NAME lossName); LOSS_FUNCTION_NAME lossName);
......
...@@ -162,18 +162,22 @@ void _LogSoftmax(const XTensor * x, XTensor * y, int leadDim) ...@@ -162,18 +162,22 @@ void _LogSoftmax(const XTensor * x, XTensor * y, int leadDim)
} }
/* /*
log scale softmax y = log(e^x / \sum_{i} e^{x_i}) (return a structure) log scale softmax y = log(e^x / \sum_{i} e^{x_i}) (return a XTensor structure)
make a new tensor to keep the result and return it
>> x - input vector >> x - input vector
>> leadDim - leading dimension (along which we perform reduction) >> leadDim - leading dimension (along which we perform reduction)
<< return - result << return - y
*/ */
XTensor LogSoftmax(const XTensor &x, int leadDim) XTensor LogSoftmax(const XTensor &x, int leadDim)
{ {
XTensor y(&x); XTensor y(&x);
y.SetTMP(); y.SetTMP();
/* call _LogSoftmax function */
_LogSoftmax(&x, &y, leadDim); _LogSoftmax(&x, &y, leadDim);
/* tensor connection */
XLink::MakeLink(&x, NULL, &y, FUNC_LOGSOFTMAX); XLink::MakeLink(&x, NULL, &y, FUNC_LOGSOFTMAX);
XLink::AddParamToHeadInt(&y, leadDim); XLink::AddParamToHeadInt(&y, leadDim);
......
...@@ -30,7 +30,7 @@ namespace nts{ // namespace nts(NiuTrans.Tensor) ...@@ -30,7 +30,7 @@ namespace nts{ // namespace nts(NiuTrans.Tensor)
/* log scale softmax y = log(e^x / \sum_{i} e^{x_i}) */ /* log scale softmax y = log(e^x / \sum_{i} e^{x_i}) */
void _LogSoftmax(const XTensor * x, XTensor * y, int leadDim); void _LogSoftmax(const XTensor * x, XTensor * y, int leadDim);
/* log scale softmax y = log(e^x / \sum_{i} e^{x_i}) (return a structure) */ /* log scale softmax y = log(e^x / \sum_{i} e^{x_i}) (return a XTensor structure) */
XTensor LogSoftmax(const XTensor &x, int leadDim); XTensor LogSoftmax(const XTensor &x, int leadDim);
/* de/dx */ /* de/dx */
......
...@@ -42,7 +42,7 @@ compute the loss ...@@ -42,7 +42,7 @@ compute the loss
>> oBeg - where to start in the model output (along the leading dimension) >> oBeg - where to start in the model output (along the leading dimension)
<< return - error in model prediction with respect to gold standard << return - error in model prediction with respect to gold standard
*/ */
DTYPE LossCompute(XTensor * gold, XTensor * output, LOSS_FUNCTION_NAME LFName, DTYPE _LossCompute(XTensor * gold, XTensor * output, LOSS_FUNCTION_NAME LFName,
bool isLogOutput, int leadDim, int gBeg, int gLen, int oBeg) bool isLogOutput, int leadDim, int gBeg, int gLen, int oBeg)
{ {
DTYPE error = 0.0F; DTYPE error = 0.0F;
...@@ -66,7 +66,7 @@ DTYPE LossCompute(XTensor * gold, XTensor * output, LOSS_FUNCTION_NAME LFName, ...@@ -66,7 +66,7 @@ DTYPE LossCompute(XTensor * gold, XTensor * output, LOSS_FUNCTION_NAME LFName,
blockNum = output->unitNum / blockSize; blockNum = output->unitNum / blockSize;
if(isLogOutput) if(isLogOutput)
return LossComputeForLogScale(gold, output, LFName, leadDim, gBeg, gLen, oBeg); return _LossComputeForLogScale(gold, output, LFName, leadDim, gBeg, gLen, oBeg);
DTYPE * gp = (DTYPE*)gold->data; DTYPE * gp = (DTYPE*)gold->data;
DTYPE * op = (DTYPE*)output->data; DTYPE * op = (DTYPE*)output->data;
...@@ -180,7 +180,7 @@ DTYPE LossCompute(XTensor * gold, XTensor * output, LOSS_FUNCTION_NAME LFName, ...@@ -180,7 +180,7 @@ DTYPE LossCompute(XTensor * gold, XTensor * output, LOSS_FUNCTION_NAME LFName,
} }
else { else {
#ifdef USE_CUDA #ifdef USE_CUDA
error = CudaLossCompute(gold, output, LFName, isLogOutput, leadDim, gBeg, gLen, oBeg); error = _CudaLossCompute(gold, output, LFName, isLogOutput, leadDim, gBeg, gLen, oBeg);
#else #else
ShowNTErrors("Please specify USE_CUDA and recompile the code!"); ShowNTErrors("Please specify USE_CUDA and recompile the code!");
#endif #endif
...@@ -201,7 +201,7 @@ the log version of loss computation ...@@ -201,7 +201,7 @@ the log version of loss computation
>> oBeg - where to start in the model output (along the leading dimension) >> oBeg - where to start in the model output (along the leading dimension)
<< return - error in model prediction with respect to gold standard << return - error in model prediction with respect to gold standard
*/ */
DTYPE LossComputeForLogScale(XTensor * gold, XTensor * output, DTYPE _LossComputeForLogScale(XTensor * gold, XTensor * output,
LOSS_FUNCTION_NAME LFName, LOSS_FUNCTION_NAME LFName,
int leadDim, int gBeg, int gLen, int oBeg) int leadDim, int gBeg, int gLen, int oBeg)
{ {
...@@ -343,7 +343,7 @@ with respect to gold standard, and y this the model output ...@@ -343,7 +343,7 @@ with respect to gold standard, and y this the model output
>> LFName - name of loss function >> LFName - name of loss function
<< return dE/dy << return dE/dy
*/ */
DTYPE LossBackwardPoint(DTYPE t, DTYPE y, LOSS_FUNCTION_NAME LFName) DTYPE _LossBackwardPoint(DTYPE t, DTYPE y, LOSS_FUNCTION_NAME LFName)
{ {
/* /*
squared error squared error
...@@ -380,7 +380,7 @@ with respect to gold standard, and y this the model output ...@@ -380,7 +380,7 @@ with respect to gold standard, and y this the model output
>> tLen - segment length from tBeg (along the leading dimension) >> tLen - segment length from tBeg (along the leading dimension)
>> yBeg - where to start in the model output (along the leading dimension) >> yBeg - where to start in the model output (along the leading dimension)
*/ */
void LossBackward(XTensor * dedy, XTensor * t, XTensor * y, void _LossBackward(XTensor * dedy, XTensor * t, XTensor * y,
LOSS_FUNCTION_NAME LFName, LOSS_FUNCTION_NAME LFName,
int leadDim, int tBeg, int tLen, int yBeg) int leadDim, int tBeg, int tLen, int yBeg)
{ {
...@@ -496,7 +496,7 @@ void LossBackward(XTensor * dedy, XTensor * t, XTensor * y, ...@@ -496,7 +496,7 @@ void LossBackward(XTensor * dedy, XTensor * t, XTensor * y,
} }
else { else {
#ifdef USE_CUDA #ifdef USE_CUDA
CudaLossBackward(dedy, t, y, LFName, leadDim, tBeg, tLen, yBeg); _CudaLossBackward(dedy, t, y, LFName, leadDim, tBeg, tLen, yBeg);
#else #else
ShowNTErrors("Please specify USE_CUDA and recompile the code!"); ShowNTErrors("Please specify USE_CUDA and recompile the code!");
#endif #endif
......
...@@ -51,7 +51,7 @@ compute the loss ...@@ -51,7 +51,7 @@ compute the loss
>> yBeg - where to start in the model output (along the leading dimension) >> yBeg - where to start in the model output (along the leading dimension)
<< return - error in model prediction with respect to gold standard << return - error in model prediction with respect to gold standard
*/ */
DTYPE CudaLossCompute(XTensor * gold, XTensor * y, LOSS_FUNCTION_NAME LFName, DTYPE _CudaLossCompute(XTensor * gold, XTensor * y, LOSS_FUNCTION_NAME LFName,
bool isLogOutput, int leadDim, int gBeg, int gLen, int yBeg) bool isLogOutput, int leadDim, int gBeg, int gLen, int yBeg)
{ {
CheckNTErrors((gLen >= 0 && gLen <= y->unitNum), "Illegal input length!"); CheckNTErrors((gLen >= 0 && gLen <= y->unitNum), "Illegal input length!");
...@@ -65,7 +65,7 @@ DTYPE CudaLossCompute(XTensor * gold, XTensor * y, LOSS_FUNCTION_NAME LFName, ...@@ -65,7 +65,7 @@ DTYPE CudaLossCompute(XTensor * gold, XTensor * y, LOSS_FUNCTION_NAME LFName,
CheckNTErrors((gLen == gold->dimSize[leadDim] && gBeg == 0 && yBeg == 0), "TODO!"); CheckNTErrors((gLen == gold->dimSize[leadDim] && gBeg == 0 && yBeg == 0), "TODO!");
if(isLogOutput) if(isLogOutput)
return LossComputeForLogScale(gold, y, LFName, leadDim, gBeg, gLen, yBeg); return _LossComputeForLogScale(gold, y, LFName, leadDim, gBeg, gLen, yBeg);
DTYPE error = 0.0F; DTYPE error = 0.0F;
...@@ -77,7 +77,7 @@ DTYPE CudaLossCompute(XTensor * gold, XTensor * y, LOSS_FUNCTION_NAME LFName, ...@@ -77,7 +77,7 @@ DTYPE CudaLossCompute(XTensor * gold, XTensor * y, LOSS_FUNCTION_NAME LFName,
if(LFName == SQUAREDERROR){ if(LFName == SQUAREDERROR){
XTensor * diff = NewTensor(gold->order, gold->dimSize, gold->dataType, gold->denseRatio, gold->devID, gold->mem); XTensor * diff = NewTensor(gold->order, gold->dimSize, gold->dataType, gold->denseRatio, gold->devID, gold->mem);
_Sum(gold, y, diff, -1.0F); _Sum(gold, y, diff, -1.0F);
_Power(diff, 2.0F); _PowerMe(diff, 2.0F);
_ScaleAndShiftMe(diff, 0.5F, 0.0F); _ScaleAndShiftMe(diff, 0.5F, 0.0F);
int reduceTimes = diff->order; int reduceTimes = diff->order;
...@@ -110,9 +110,9 @@ DTYPE CudaLossCompute(XTensor * gold, XTensor * y, LOSS_FUNCTION_NAME LFName, ...@@ -110,9 +110,9 @@ DTYPE CudaLossCompute(XTensor * gold, XTensor * y, LOSS_FUNCTION_NAME LFName,
if(LFName == CROSSENTROPY){ if(LFName == CROSSENTROPY){
XTensor * diff = NewTensor(y->order, y->dimSize, y->dataType, y->denseRatio, y->devID, y->mem); XTensor * diff = NewTensor(y->order, y->dimSize, y->dataType, y->denseRatio, y->devID, y->mem);
_CopyValues(y, diff); _CopyValues(y, diff);
_Log(diff); _LogMe(diff);
_Multiply(gold, diff, diff); _Multiply(gold, diff, diff);
_Negate(diff); _NegateMe(diff);
int reduceTimes = diff->order; int reduceTimes = diff->order;
for (int i = 0; i < reduceTimes; i++) { for (int i = 0; i < reduceTimes; i++) {
...@@ -148,7 +148,7 @@ DTYPE CudaLossCompute(XTensor * gold, XTensor * y, LOSS_FUNCTION_NAME LFName, ...@@ -148,7 +148,7 @@ DTYPE CudaLossCompute(XTensor * gold, XTensor * y, LOSS_FUNCTION_NAME LFName,
_CopyValues(y, yOnehot); _CopyValues(y, yOnehot);
_Multiply(gold, y, yOnehot); _Multiply(gold, y, yOnehot);
_Sum(gold, yOnehot, diff, -1.0F); _Sum(gold, yOnehot, diff, -1.0F);
_Power(diff, 2.0F); _PowerMe(diff, 2.0F);
_ScaleAndShiftMe(diff, 0.5F, 0.0F); _ScaleAndShiftMe(diff, 0.5F, 0.0F);
int reduceTimes = diff->order; int reduceTimes = diff->order;
...@@ -190,7 +190,7 @@ the log version of loss computation ...@@ -190,7 +190,7 @@ the log version of loss computation
>> yBeg - where to start in the model output (along the leading dimension) >> yBeg - where to start in the model output (along the leading dimension)
<< return - error in model prediction with respect to gold standard << return - error in model prediction with respect to gold standard
*/ */
DTYPE CudaLossComputeForLogScale(XTensor * gold, XTensor * y, DTYPE _CudaLossComputeForLogScale(XTensor * gold, XTensor * y,
LOSS_FUNCTION_NAME LFName, LOSS_FUNCTION_NAME LFName,
int leadDim, int gBeg, int gLen, int yBeg) int leadDim, int gBeg, int gLen, int yBeg)
{ {
...@@ -209,9 +209,9 @@ with respect to gold standard, and y this the model output ...@@ -209,9 +209,9 @@ with respect to gold standard, and y this the model output
>> LFName - name of loss function >> LFName - name of loss function
<< return dE/dy << return dE/dy
*/ */
DTYPE CudaLossBackward(DTYPE t, DTYPE y, LOSS_FUNCTION_NAME LFName) DTYPE _CudaLossBackward(DTYPE t, DTYPE y, LOSS_FUNCTION_NAME LFName)
{ {
return LossBackwardPoint(t, y, LFName); return _LossBackwardPoint(t, y, LFName);
// TODO: call cuda kernels for computing the errors // TODO: call cuda kernels for computing the errors
} }
...@@ -328,7 +328,7 @@ with respect to gold standard, and y this the model output ...@@ -328,7 +328,7 @@ with respect to gold standard, and y this the model output
>> tLen - segment length from oBeg (along the leading dimension) >> tLen - segment length from oBeg (along the leading dimension)
>> yBeg - where to start in the model output (along the leading dimension) >> yBeg - where to start in the model output (along the leading dimension)
*/ */
void CudaLossBackward(XTensor * dedy, XTensor * t, XTensor * y, void _CudaLossBackward(XTensor * dedy, XTensor * t, XTensor * y,
LOSS_FUNCTION_NAME LFName, LOSS_FUNCTION_NAME LFName,
int leadDim, int tBeg, int tLen, int yBeg) int leadDim, int tBeg, int tLen, int yBeg)
{ {
......
...@@ -31,21 +31,21 @@ namespace nts{ // namespace nts(NiuTrans.Tensor) ...@@ -31,21 +31,21 @@ namespace nts{ // namespace nts(NiuTrans.Tensor)
/* compute the loss (cuda version) */ /* compute the loss (cuda version) */
extern "C" extern "C"
DTYPE CudaLossCompute(XTensor * gold, XTensor * output, LOSS_FUNCTION_NAME LFName, DTYPE _CudaLossCompute(XTensor * gold, XTensor * output, LOSS_FUNCTION_NAME LFName,
bool isLogOutput, int leadDim, int gBeg, int gLen, int oBeg); bool isLogOutput, int leadDim, int gBeg, int gLen, int oBeg);
/* compute the loss in log scale (cuda version) */ /* compute the loss in log scale (cuda version) */
extern "C" extern "C"
DTYPE CudaLossComputeForLogScale(XTensor * gold, XTensor * output, LOSS_FUNCTION_NAME LFName, DTYPE _CudaLossComputeForLogScale(XTensor * gold, XTensor * output, LOSS_FUNCTION_NAME LFName,
int leadDim, int gBeg, int gLen, int oBeg); int leadDim, int gBeg, int gLen, int oBeg);
/* backward compuation for a single element (cuda version) */ /* backward compuation for a single element (cuda version) */
extern "C" extern "C"
DTYPE CudaLossBackwardPoint(DTYPE t, DTYPE y, LOSS_FUNCTION_NAME LFName); DTYPE _CudaLossBackwardPoint(DTYPE t, DTYPE y, LOSS_FUNCTION_NAME LFName);
/* backward compuation for (dense) vectors (cuda version) */ /* backward compuation for (dense) vectors (cuda version) */
extern "C" extern "C"
void CudaLossBackward(XTensor * dedy, XTensor * t, XTensor * y, void _CudaLossBackward(XTensor * dedy, XTensor * t, XTensor * y,
LOSS_FUNCTION_NAME LFName, LOSS_FUNCTION_NAME LFName,
int leadDim = -1, int tBeg = 0, int tLen = -1, int yBeg = 0); int leadDim = -1, int tBeg = 0, int tLen = -1, int yBeg = 0);
......
...@@ -48,21 +48,21 @@ loss function to measure the "number" of errors ...@@ -48,21 +48,21 @@ loss function to measure the "number" of errors
/* compute the loss */ /* compute the loss */
extern "C" extern "C"
DTYPE LossCompute(XTensor * gold, XTensor * output, LOSS_FUNCTION_NAME LFName, DTYPE _LossCompute(XTensor * gold, XTensor * output, LOSS_FUNCTION_NAME LFName,
bool isLogOutput, int leadDim, int gBeg, int gLen, int oBeg); bool isLogOutput, int leadDim, int gBeg, int gLen, int oBeg);
/* compute the loss (log version) */ /* compute the loss (log version) */
extern "C" extern "C"
DTYPE LossComputeForLogScale(XTensor * gold, XTensor * output, LOSS_FUNCTION_NAME LFName, DTYPE _LossComputeForLogScale(XTensor * gold, XTensor * output, LOSS_FUNCTION_NAME LFName,
int leadDim, int gBeg, int gLen, int oBeg); int leadDim, int gBeg, int gLen, int oBeg);
/* backward compuation for a single element */ /* backward compuation for a single element */
extern "C" extern "C"
DTYPE LossBackwardPoint(DTYPE t, DTYPE y, LOSS_FUNCTION_NAME LFName); DTYPE _LossBackwardPoint(DTYPE t, DTYPE y, LOSS_FUNCTION_NAME LFName);
/* backward compuation for (dense) vectors */ /* backward compuation for (dense) vectors */
extern "C" extern "C"
void LossBackward(XTensor * dEdY, XTensor * t, XTensor * y, void _LossBackward(XTensor * dEdY, XTensor * t, XTensor * y,
LOSS_FUNCTION_NAME LFName, LOSS_FUNCTION_NAME LFName,
int leadDim = -1, int tBeg = 0, int tLen = -1, int yBeg = 0); int leadDim = -1, int tBeg = 0, int tLen = -1, int yBeg = 0);
......
...@@ -19,6 +19,7 @@ ...@@ -19,6 +19,7 @@
* $Created by: XIAO Tong (email: xiaotong@mail.neu.edu.cn) 2018-04-24 * $Created by: XIAO Tong (email: xiaotong@mail.neu.edu.cn) 2018-04-24
*/ */
#include "../XName.h"
#include "Rectify.h" #include "Rectify.h"
#include "Rectify.cuh" #include "Rectify.cuh"
...@@ -55,6 +56,28 @@ void _Rectify(const XTensor * x, XTensor * y) ...@@ -55,6 +56,28 @@ void _Rectify(const XTensor * x, XTensor * y)
} }
/* /*
rectify function y = max(0, x) (return a XTensor structure)
make a new tensor to keep the result and return it
>> input - input tensor
<< return - y
*/
XTensor Rectify(const XTensor &x)
{
XTensor y(&x);
y.SetTMP();
/* call _Rectify function */
_Rectify(&x, &y);
/* tensor connection */
XLink::MakeLink(&x, NULL, &y, FUNC_RECTIFY);
return y;
}
/*
backward computation backward computation
dE/dx = dE/dy * dy/dx dE/dx = dE/dy * dy/dx
...@@ -94,7 +117,7 @@ void _RectifyBackward(XTensor * gold, XTensor * y, XTensor * x, ...@@ -94,7 +117,7 @@ void _RectifyBackward(XTensor * gold, XTensor * y, XTensor * x,
{ {
/* calculate dE/dy */ /* calculate dE/dy */
if(lossName != NOLOSS) if(lossName != NOLOSS)
LossBackward(dedy, gold, y, lossName); _LossBackward(dedy, gold, y, lossName);
DTYPE * dedyp = (DTYPE*)dedy->data; DTYPE * dedyp = (DTYPE*)dedy->data;
DTYPE * dedxp = (DTYPE*)dedx->data; DTYPE * dedxp = (DTYPE*)dedx->data;
......
...@@ -134,7 +134,7 @@ void _CudaRectifyBackward(XTensor * gold, XTensor * y, XTensor * x, ...@@ -134,7 +134,7 @@ void _CudaRectifyBackward(XTensor * gold, XTensor * y, XTensor * x,
/* calculate dE/dy */ /* calculate dE/dy */
if(lossName != NOLOSS) if(lossName != NOLOSS)
CudaLossBackward(dedy, gold, y, lossName); _CudaLossBackward(dedy, gold, y, lossName);
int gridSize[3], blockSize[3]; int gridSize[3], blockSize[3];
......
...@@ -28,11 +28,12 @@ ...@@ -28,11 +28,12 @@
namespace nts{ // namespace nts(NiuTrans.Tensor) namespace nts{ // namespace nts(NiuTrans.Tensor)
/* rectify function y = max(0, x) */ /* rectify function y = max(0, x) */
extern "C"
void _Rectify(const XTensor * x, XTensor * y); void _Rectify(const XTensor * x, XTensor * y);
/* rectify function y = max(0, x) (return a XTensor structure) */
XTensor Rectify(const XTensor &x);
/* de/dx */ /* de/dx */
extern "C"
void _RectifyBackward(XTensor * gold, XTensor * y, XTensor * x, void _RectifyBackward(XTensor * gold, XTensor * y, XTensor * x,
XTensor * dedy, XTensor * dedx, XTensor * dedy, XTensor * dedx,
LOSS_FUNCTION_NAME lossName); LOSS_FUNCTION_NAME lossName);
......
...@@ -19,6 +19,7 @@ ...@@ -19,6 +19,7 @@
* $Created by: XIAO Tong (email: xiaotong@mail.neu.edu.cn) 2018-04-25 * $Created by: XIAO Tong (email: xiaotong@mail.neu.edu.cn) 2018-04-25
*/ */
#include "../XName.h"
#include <math.h> #include <math.h>
#include "Sigmoid.h" #include "Sigmoid.h"
#include "Sigmoid.cuh" #include "Sigmoid.cuh"
...@@ -53,6 +54,27 @@ void _Sigmoid(const XTensor * x, XTensor * y) ...@@ -53,6 +54,27 @@ void _Sigmoid(const XTensor * x, XTensor * y)
} }
/* /*
sigmoid function y = 1/(1+exp(-x)) (return a XTensor structure)
make a new tensor to keep the result and return it
>> x - input tensor
<< return - y
*/
XTensor Sigmoid(const XTensor &x)
{
XTensor y(&x);
y.SetTMP();
/* call _Sigmoid function */
_Sigmoid(&x, &y);
/* tensor connection */
XLink::MakeLink(&x, NULL, &y, FUNC_SIGMOID);
return y;
}
/*
backward computation backward computation
dE/ds = dE/dy * dy/dx dE/ds = dE/dy * dy/dx
...@@ -86,7 +108,7 @@ void _SigmoidBackward(XTensor * gold, XTensor * y, XTensor * x, ...@@ -86,7 +108,7 @@ void _SigmoidBackward(XTensor * gold, XTensor * y, XTensor * x,
{ {
/* calculate dE/dy */ /* calculate dE/dy */
if(lossName != NOLOSS) if(lossName != NOLOSS)
LossBackward(dedy, gold, y, lossName); _LossBackward(dedy, gold, y, lossName);
DTYPE * dedyp = (DTYPE*)dedy->data; DTYPE * dedyp = (DTYPE*)dedy->data;
DTYPE * dedxp = (DTYPE*)dedx->data; DTYPE * dedxp = (DTYPE*)dedx->data;
......
...@@ -129,7 +129,7 @@ void _CudaSigmoidBackward(XTensor * gold, XTensor * y, XTensor * x, ...@@ -129,7 +129,7 @@ void _CudaSigmoidBackward(XTensor * gold, XTensor * y, XTensor * x,
if(x->dataType == DEFAULT_DTYPE && y->dataType == DEFAULT_DTYPE){ if(x->dataType == DEFAULT_DTYPE && y->dataType == DEFAULT_DTYPE){
/* calculate dE/dy */ /* calculate dE/dy */
if(lossName != NOLOSS) if(lossName != NOLOSS)
LossBackward(dedy, gold, y, lossName); _LossBackward(dedy, gold, y, lossName);
int gridSize[3], blockSize[3]; int gridSize[3], blockSize[3];
......
...@@ -28,11 +28,12 @@ ...@@ -28,11 +28,12 @@
namespace nts{ // namespace nts(NiuTrans.Tensor) namespace nts{ // namespace nts(NiuTrans.Tensor)
/* sigmoid function y = 1/(1+exp(-x)) */ /* sigmoid function y = 1/(1+exp(-x)) */
extern "C"
void _Sigmoid(const XTensor * x, XTensor * y); void _Sigmoid(const XTensor * x, XTensor * y);
/* sigmoid function y = 1/(1+exp(-x)) (return a XTensor structure) */
XTensor Sigmoid(const XTensor &x);
/* de/dx */ /* de/dx */
extern "C"
void _SigmoidBackward(XTensor * gold, XTensor * y, XTensor * x, void _SigmoidBackward(XTensor * gold, XTensor * y, XTensor * x,
XTensor * dedy, XTensor * dedx, XTensor * dedy, XTensor * dedx,
LOSS_FUNCTION_NAME lossName); LOSS_FUNCTION_NAME lossName);
......
...@@ -22,6 +22,7 @@ ...@@ -22,6 +22,7 @@
#include <math.h> #include <math.h>
#include "Softmax.h" #include "Softmax.h"
#include "Softmax.cuh" #include "Softmax.cuh"
#include "../XName.h"
#include "../XUtility.h" #include "../XUtility.h"
#include "../core/reduce/ReduceSum.h" #include "../core/reduce/ReduceSum.h"
#include "../core/reduce/ReduceMax.h" #include "../core/reduce/ReduceMax.h"
...@@ -130,6 +131,28 @@ void _Softmax(const XTensor * x, XTensor * y, int leadDim) ...@@ -130,6 +131,28 @@ void _Softmax(const XTensor * x, XTensor * y, int leadDim)
} }
/* /*
softmax y = e^x / \sum_{i} e^{x_i} (return a XTensor structure)
make a new tensor to keep the result and return it
>> x - input vector
>> leadDim - leading dimension (along which we perform reduction)
<< return - y
*/
XTensor Softmax(const XTensor &x, int leadDim)
{
XTensor y(&x);
y.SetTMP();
/* call _Softmax function */
_Softmax(&x, &y, leadDim);
/* tensor connection */
XLink::MakeLink(&x, NULL, &y, FUNC_SOFTMAX);
return y;
}
/*
backward computation for dense tensors backward computation for dense tensors
dE/dx = dE/dy * dy/dx dE/dx = dE/dy * dy/dx
......
...@@ -31,7 +31,7 @@ namespace nts { // namespace nts(NiuTrans.Tensor) ...@@ -31,7 +31,7 @@ namespace nts { // namespace nts(NiuTrans.Tensor)
/* softmax y = e^x / \sum_{i} e^{x_i} (Cuda version) */ /* softmax y = e^x / \sum_{i} e^{x_i} (Cuda version) */
extern "C" extern "C"
void _CudaSotmax(const XTensor * input, XTensor * output, int leadDim); void _CudaSoftmax(const XTensor * input, XTensor * output, int leadDim);
/* softmax y = e^x / \sum_{i} e^{x_i} (Cuda version) */ /* softmax y = e^x / \sum_{i} e^{x_i} (Cuda version) */
extern "C" extern "C"
......
...@@ -28,11 +28,12 @@ ...@@ -28,11 +28,12 @@
namespace nts{ // namespace nts(NiuTrans.Tensor) namespace nts{ // namespace nts(NiuTrans.Tensor)
/* softmax y = e^x / \sum_{i} e^{x_i} */ /* softmax y = e^x / \sum_{i} e^{x_i} */
extern "C"
void _Softmax(const XTensor * x, XTensor * y, int leadDim); void _Softmax(const XTensor * x, XTensor * y, int leadDim);
/* softmax y = e^x / \sum_{i} e^{x_i} (return a XTensor structure) */
XTensor Softmax(const XTensor &x, int leadDim);
/* de/dx */ /* de/dx */
extern "C"
void _SoftmaxBackward(XTensor * gold, XTensor * y, XTensor * x, void _SoftmaxBackward(XTensor * gold, XTensor * y, XTensor * x,
XTensor * dedy, XTensor * dedx, XTensor * dedy, XTensor * dedx,
int leadDim, int leadDim,
......
...@@ -51,15 +51,21 @@ bool TestAbsolute1() ...@@ -51,15 +51,21 @@ bool TestAbsolute1()
/* create tensors */ /* create tensors */
XTensor * a = NewTensor(aOrder, aDimSize); XTensor * a = NewTensor(aOrder, aDimSize);
XTensor * b = NewTensor(aOrder, aDimSize);
XTensor * aMe = NewTensor(aOrder, aDimSize);
XTensor bUser;
/* initialize variables */ /* initialize variables */
a->SetData(aData, aUnitNum); a->SetData(aData, aUnitNum);
aMe->SetData(aData, aUnitNum);
/* call Absolute function */ /* call Absolute function */
_Absolute(a); _Absolute(a, b);
_AbsoluteMe(aMe);
bUser = Absolute(*a);
/* check results */ /* check results */
cpuTest = a->CheckData(answer, aUnitNum, 1e-4F); cpuTest = b->CheckData(answer, aUnitNum, 1e-4F) && aMe->CheckData(answer, aUnitNum, 1e-4F) && bUser.CheckData(answer, aUnitNum, 1e-4F);
#ifdef USE_CUDA #ifdef USE_CUDA
/* GPU test */ /* GPU test */
...@@ -67,25 +73,37 @@ bool TestAbsolute1() ...@@ -67,25 +73,37 @@ bool TestAbsolute1()
/* create tensor */ /* create tensor */
XTensor * aGPU = NewTensor(aOrder, aDimSize, X_FLOAT, 1.0F, 0); XTensor * aGPU = NewTensor(aOrder, aDimSize, X_FLOAT, 1.0F, 0);
XTensor * bGPU = NewTensor(aOrder, aDimSize, X_FLOAT, 1.0F, 0);
XTensor * aMeGPU = NewTensor(aOrder, aDimSize, X_FLOAT, 1.0F, 0);
XTensor bUserGPU;
/* Initialize variables */ /* Initialize variables */
aGPU->SetData(aData, aUnitNum); aGPU->SetData(aData, aUnitNum);
aMeGPU->SetData(aData, aUnitNum);
/* call Absolute function */ /* call Absolute function */
_Absolute(aGPU); _Absolute(aGPU, bGPU);
_AbsoluteMe(aMeGPU);
bUserGPU = Absolute(*aGPU);
/* check results */ /* check results */
gpuTest = aGPU->CheckData(answer, aUnitNum, 1e-4F); gpuTest = bGPU->CheckData(answer, aUnitNum, 1e-4F) && aMeGPU->CheckData(answer, aUnitNum, 1e-4F) && bUserGPU.CheckData(answer, aUnitNum, 1e-4F);
/* destroy variables */ /* destroy variables */
delete a; delete a;
delete b;
delete aMe;
delete aGPU; delete aGPU;
delete bGPU;
delete aMeGPU;
delete[] aDimSize; delete[] aDimSize;
return cpuTest && gpuTest; return cpuTest && gpuTest;
#else #else
/* destroy variables */ /* destroy variables */
delete a; delete a;
delete b;
delete aMe;
delete[] aDimSize; delete[] aDimSize;
return cpuTest; return cpuTest;
......
...@@ -76,6 +76,7 @@ bool TestConcatenate1() ...@@ -76,6 +76,7 @@ bool TestConcatenate1()
XTensor * s1 = NewTensor(sOrder1, sDimSize1); XTensor * s1 = NewTensor(sOrder1, sDimSize1);
XTensor * s2 = NewTensor(sOrder2, sDimSize2); XTensor * s2 = NewTensor(sOrder2, sDimSize2);
XTensor * t = NewTensor(tOrder, tDimSize); XTensor * t = NewTensor(tOrder, tDimSize);
XTensor tUser;
/* initialize variables */ /* initialize variables */
s1->SetData(sData1, sUnitNum1); s1->SetData(sData1, sUnitNum1);
...@@ -88,9 +89,10 @@ bool TestConcatenate1() ...@@ -88,9 +89,10 @@ bool TestConcatenate1()
/* call Concatenate function */ /* call Concatenate function */
_Concatenate(sList, t, 1); _Concatenate(sList, t, 1);
tUser = Concatenate(*sList, 1);
/* check results */ /* check results */
cpuTest = t->CheckData(answer, tUnitNum); cpuTest = t->CheckData(answer, tUnitNum) && tUser.CheckData(answer, tUnitNum);
#ifdef USE_CUDA #ifdef USE_CUDA
/* GPU test */ /* GPU test */
...@@ -100,6 +102,7 @@ bool TestConcatenate1() ...@@ -100,6 +102,7 @@ bool TestConcatenate1()
XTensor * sGPU1 = NewTensor(sOrder1, sDimSize1, X_FLOAT, 1.0F, 0); XTensor * sGPU1 = NewTensor(sOrder1, sDimSize1, X_FLOAT, 1.0F, 0);
XTensor * sGPU2 = NewTensor(sOrder2, sDimSize2, X_FLOAT, 1.0F, 0); XTensor * sGPU2 = NewTensor(sOrder2, sDimSize2, X_FLOAT, 1.0F, 0);
XTensor * tGPU = NewTensor(tOrder, tDimSize, X_FLOAT, 1.0F, 0); XTensor * tGPU = NewTensor(tOrder, tDimSize, X_FLOAT, 1.0F, 0);
XTensor tUserGPU;
/* Initialize variables */ /* Initialize variables */
sGPU1->SetData(sData1, sUnitNum1); sGPU1->SetData(sData1, sUnitNum1);
...@@ -115,9 +118,10 @@ bool TestConcatenate1() ...@@ -115,9 +118,10 @@ bool TestConcatenate1()
/* call Concatenate function */ /* call Concatenate function */
_Concatenate(sList, tGPU, 1); _Concatenate(sList, tGPU, 1);
tUserGPU = Concatenate(*sList, 1);
/* check results */ /* check results */
gpuTest = tGPU->CheckData(answer, tUnitNum); gpuTest = tGPU->CheckData(answer, tUnitNum) && tUserGPU.CheckData(answer, tUnitNum);
/* destroy variables */ /* destroy variables */
delete sList; delete sList;
...@@ -201,6 +205,7 @@ bool TestConcatenate2() ...@@ -201,6 +205,7 @@ bool TestConcatenate2()
XTensor * s1 = NewTensor(sOrder1, sDimSize1); XTensor * s1 = NewTensor(sOrder1, sDimSize1);
XTensor * s2 = NewTensor(sOrder2, sDimSize2); XTensor * s2 = NewTensor(sOrder2, sDimSize2);
XTensor * t = NewTensor(tOrder, tDimSize); XTensor * t = NewTensor(tOrder, tDimSize);
XTensor tUser;
/* initialize variables */ /* initialize variables */
s1->SetData(sData1, sUnitNum1); s1->SetData(sData1, sUnitNum1);
...@@ -213,9 +218,10 @@ bool TestConcatenate2() ...@@ -213,9 +218,10 @@ bool TestConcatenate2()
/* call Concatenate function */ /* call Concatenate function */
_Concatenate(sList, t, 0); _Concatenate(sList, t, 0);
tUser = Concatenate(*sList, 0);
/* check results */ /* check results */
cpuTest = t->CheckData(answer, tUnitNum); cpuTest = t->CheckData(answer, tUnitNum) && tUser.CheckData(answer, tUnitNum);
#ifdef USE_CUDA #ifdef USE_CUDA
/* GPU test */ /* GPU test */
...@@ -225,6 +231,7 @@ bool TestConcatenate2() ...@@ -225,6 +231,7 @@ bool TestConcatenate2()
XTensor * sGPU1 = NewTensor(sOrder1, sDimSize1, X_FLOAT, 1.0F, 0); XTensor * sGPU1 = NewTensor(sOrder1, sDimSize1, X_FLOAT, 1.0F, 0);
XTensor * sGPU2 = NewTensor(sOrder2, sDimSize2, X_FLOAT, 1.0F, 0); XTensor * sGPU2 = NewTensor(sOrder2, sDimSize2, X_FLOAT, 1.0F, 0);
XTensor * tGPU = NewTensor(tOrder, tDimSize, X_FLOAT, 1.0F, 0); XTensor * tGPU = NewTensor(tOrder, tDimSize, X_FLOAT, 1.0F, 0);
XTensor tUserGPU;
/* Initialize variables */ /* Initialize variables */
sGPU1->SetData(sData1, sUnitNum1); sGPU1->SetData(sData1, sUnitNum1);
...@@ -240,9 +247,10 @@ bool TestConcatenate2() ...@@ -240,9 +247,10 @@ bool TestConcatenate2()
/* call Concatenate function */ /* call Concatenate function */
_Concatenate(sList, tGPU, 0); _Concatenate(sList, tGPU, 0);
tUserGPU = Concatenate(*sList, 0);
/* check results */ /* check results */
gpuTest = tGPU->CheckData(answer, tUnitNum); gpuTest = tGPU->CheckData(answer, tUnitNum) && tUserGPU.CheckData(answer, tUnitNum);
/* destroy variables */ /* destroy variables */
delete sList; delete sList;
...@@ -324,6 +332,7 @@ bool TestConcatenate3() ...@@ -324,6 +332,7 @@ bool TestConcatenate3()
XTensor * s1 = NewTensor(sOrder1, sDimSize1); XTensor * s1 = NewTensor(sOrder1, sDimSize1);
XTensor * s2 = NewTensor(sOrder2, sDimSize2); XTensor * s2 = NewTensor(sOrder2, sDimSize2);
XTensor * t = NewTensor(tOrder, tDimSize); XTensor * t = NewTensor(tOrder, tDimSize);
XTensor tUser;
/* initialize variables */ /* initialize variables */
s1->SetData(sData1, sUnitNum1); s1->SetData(sData1, sUnitNum1);
...@@ -336,9 +345,10 @@ bool TestConcatenate3() ...@@ -336,9 +345,10 @@ bool TestConcatenate3()
/* call Concatenate function */ /* call Concatenate function */
_Concatenate(sList, t, 1); _Concatenate(sList, t, 1);
tUser = Concatenate(*sList, 1);
/* check results */ /* check results */
cpuTest = t->CheckData(answer, tUnitNum); cpuTest = t->CheckData(answer, tUnitNum) && tUser.CheckData(answer, tUnitNum);
#ifdef USE_CUDA #ifdef USE_CUDA
/* GPU test */ /* GPU test */
...@@ -348,6 +358,7 @@ bool TestConcatenate3() ...@@ -348,6 +358,7 @@ bool TestConcatenate3()
XTensor * sGPU1 = NewTensor(sOrder1, sDimSize1, X_FLOAT, 1.0F, 0); XTensor * sGPU1 = NewTensor(sOrder1, sDimSize1, X_FLOAT, 1.0F, 0);
XTensor * sGPU2 = NewTensor(sOrder2, sDimSize2, X_FLOAT, 1.0F, 0); XTensor * sGPU2 = NewTensor(sOrder2, sDimSize2, X_FLOAT, 1.0F, 0);
XTensor * tGPU = NewTensor(tOrder, tDimSize, X_FLOAT, 1.0F, 0); XTensor * tGPU = NewTensor(tOrder, tDimSize, X_FLOAT, 1.0F, 0);
XTensor tUserGPU;
/* Initialize variables */ /* Initialize variables */
sGPU1->SetData(sData1, sUnitNum1); sGPU1->SetData(sData1, sUnitNum1);
...@@ -363,9 +374,10 @@ bool TestConcatenate3() ...@@ -363,9 +374,10 @@ bool TestConcatenate3()
/* call Concatenate function */ /* call Concatenate function */
_Concatenate(sList, tGPU, 1); _Concatenate(sList, tGPU, 1);
tUserGPU = Concatenate(*sList, 1);
/* check results */ /* check results */
gpuTest = tGPU->CheckData(answer, tUnitNum); gpuTest = tGPU->CheckData(answer, tUnitNum) && tUserGPU.CheckData(answer, tUnitNum);
/* destroy variables */ /* destroy variables */
delete sList; delete sList;
...@@ -444,6 +456,7 @@ bool TestConcatenate4() ...@@ -444,6 +456,7 @@ bool TestConcatenate4()
XTensor * s1 = NewTensor(sOrder1, sDimSize1); XTensor * s1 = NewTensor(sOrder1, sDimSize1);
XTensor * s2 = NewTensor(sOrder2, sDimSize2); XTensor * s2 = NewTensor(sOrder2, sDimSize2);
XTensor * t = NewTensor(tOrder, tDimSize); XTensor * t = NewTensor(tOrder, tDimSize);
XTensor tUser;
/* initialize variables */ /* initialize variables */
s1->SetData(sData1, sUnitNum1); s1->SetData(sData1, sUnitNum1);
...@@ -452,9 +465,10 @@ bool TestConcatenate4() ...@@ -452,9 +465,10 @@ bool TestConcatenate4()
/* call Concatenate function */ /* call Concatenate function */
_Concatenate(s1, s2, t, 1); _Concatenate(s1, s2, t, 1);
tUser = Concatenate(*s1, *s2, 1);
/* check results */ /* check results */
cpuTest = t->CheckData(answer, tUnitNum); cpuTest = t->CheckData(answer, tUnitNum) && tUser.CheckData(answer, tUnitNum);
#ifdef USE_CUDA #ifdef USE_CUDA
/* GPU test */ /* GPU test */
...@@ -464,6 +478,7 @@ bool TestConcatenate4() ...@@ -464,6 +478,7 @@ bool TestConcatenate4()
XTensor * sGPU1 = NewTensor(sOrder1, sDimSize1, X_FLOAT, 1.0F, 0); XTensor * sGPU1 = NewTensor(sOrder1, sDimSize1, X_FLOAT, 1.0F, 0);
XTensor * sGPU2 = NewTensor(sOrder2, sDimSize2, X_FLOAT, 1.0F, 0); XTensor * sGPU2 = NewTensor(sOrder2, sDimSize2, X_FLOAT, 1.0F, 0);
XTensor * tGPU = NewTensor(tOrder, tDimSize, X_FLOAT, 1.0F, 0); XTensor * tGPU = NewTensor(tOrder, tDimSize, X_FLOAT, 1.0F, 0);
XTensor tUserGPU;
/* Initialize variables */ /* Initialize variables */
sGPU1->SetData(sData1, sUnitNum1); sGPU1->SetData(sData1, sUnitNum1);
...@@ -472,9 +487,10 @@ bool TestConcatenate4() ...@@ -472,9 +487,10 @@ bool TestConcatenate4()
/* call Concatenate function */ /* call Concatenate function */
_Concatenate(sGPU1, sGPU2, tGPU, 1); _Concatenate(sGPU1, sGPU2, tGPU, 1);
tUserGPU = Concatenate(*sGPU1, *sGPU2, 1);
/* check results */ /* check results */
gpuTest = tGPU->CheckData(answer, tUnitNum); gpuTest = tGPU->CheckData(answer, tUnitNum) && tUserGPU.CheckData(answer, tUnitNum);
/* destroy variables */ /* destroy variables */
delete s1; delete s1;
......
...@@ -53,6 +53,7 @@ bool TestHardTanH1() ...@@ -53,6 +53,7 @@ bool TestHardTanH1()
/* create tensors */ /* create tensors */
XTensor * x = NewTensor(order, dimSize); XTensor * x = NewTensor(order, dimSize);
XTensor * y = NewTensor(order, dimSize); XTensor * y = NewTensor(order, dimSize);
XTensor yUser;
/* initialize variables */ /* initialize variables */
x->SetData(xData, unitNum); x->SetData(xData, unitNum);
...@@ -60,9 +61,10 @@ bool TestHardTanH1() ...@@ -60,9 +61,10 @@ bool TestHardTanH1()
/* call hardtanh function */ /* call hardtanh function */
_HardTanH(x, y); _HardTanH(x, y);
yUser = HardTanH(*x);
/* check results */ /* check results */
cpuTest = y->CheckData(answer, unitNum, 1e-4F); cpuTest = y->CheckData(answer, unitNum, 1e-4F) && yUser.CheckData(answer, unitNum, 1e-4F);
#ifdef USE_CUDA #ifdef USE_CUDA
/* GPU test */ /* GPU test */
...@@ -71,6 +73,7 @@ bool TestHardTanH1() ...@@ -71,6 +73,7 @@ bool TestHardTanH1()
/* create tensor */ /* create tensor */
XTensor * xGPU = NewTensor(order, dimSize, X_FLOAT, 1.0F, 0); XTensor * xGPU = NewTensor(order, dimSize, X_FLOAT, 1.0F, 0);
XTensor * yGPU = NewTensor(order, dimSize, X_FLOAT, 1.0F, 0); XTensor * yGPU = NewTensor(order, dimSize, X_FLOAT, 1.0F, 0);
XTensor yUserGPU;
/* Initialize variables */ /* Initialize variables */
xGPU->SetData(xData, unitNum); xGPU->SetData(xData, unitNum);
...@@ -78,9 +81,10 @@ bool TestHardTanH1() ...@@ -78,9 +81,10 @@ bool TestHardTanH1()
/* call hardtanh function */ /* call hardtanh function */
_HardTanH(xGPU, yGPU); _HardTanH(xGPU, yGPU);
yUserGPU = HardTanH(*xGPU);
/* check results */ /* check results */
gpuTest = yGPU->CheckData(answer, unitNum, 1e-4F); gpuTest = yGPU->CheckData(answer, unitNum, 1e-4F) && yUserGPU.CheckData(answer, unitNum, 1e-4F);
/* destroy variables */ /* destroy variables */
delete x; delete x;
......
...@@ -51,6 +51,7 @@ bool TestIdentity1() ...@@ -51,6 +51,7 @@ bool TestIdentity1()
/* create tensors */ /* create tensors */
XTensor * x = NewTensor(order, dimSize); XTensor * x = NewTensor(order, dimSize);
XTensor * y = NewTensor(order, dimSize); XTensor * y = NewTensor(order, dimSize);
XTensor yUser;
/* initialize variables */ /* initialize variables */
x->SetData(xData, unitNum); x->SetData(xData, unitNum);
...@@ -58,9 +59,10 @@ bool TestIdentity1() ...@@ -58,9 +59,10 @@ bool TestIdentity1()
/* call Identity function */ /* call Identity function */
_Identity(x, y); _Identity(x, y);
yUser = Identity(*x);
/* check result */ /* check result */
cpuTest = y->CheckData(answer, unitNum); cpuTest = y->CheckData(answer, unitNum, 1e-4F) && yUser.CheckData(answer, unitNum, 1e-4F);
#ifdef USE_CUDA #ifdef USE_CUDA
/* GPU test */ /* GPU test */
...@@ -69,6 +71,7 @@ bool TestIdentity1() ...@@ -69,6 +71,7 @@ bool TestIdentity1()
/* create tensors */ /* create tensors */
XTensor * xGPU = NewTensor(order, dimSize, X_FLOAT, 1.0F, 0); XTensor * xGPU = NewTensor(order, dimSize, X_FLOAT, 1.0F, 0);
XTensor * yGPU = NewTensor(order, dimSize, X_FLOAT, 1.0F, 0); XTensor * yGPU = NewTensor(order, dimSize, X_FLOAT, 1.0F, 0);
XTensor yUserGPU;
/* initialize variables */ /* initialize variables */
xGPU->SetData(xData, unitNum); xGPU->SetData(xData, unitNum);
...@@ -76,9 +79,10 @@ bool TestIdentity1() ...@@ -76,9 +79,10 @@ bool TestIdentity1()
/* call Identity function */ /* call Identity function */
_Identity(xGPU, yGPU); _Identity(xGPU, yGPU);
yUserGPU = Identity(*xGPU);
/* check result */ /* check result */
gpuTest = yGPU->CheckData(answer, unitNum); gpuTest = yGPU->CheckData(answer, unitNum, 1e-4F) && yUserGPU.CheckData(answer, unitNum, 1e-4F);
/* destroy variables */ /* destroy variables */
delete x; delete x;
......
...@@ -51,15 +51,21 @@ bool TestLog1() ...@@ -51,15 +51,21 @@ bool TestLog1()
/* create tensors */ /* create tensors */
XTensor * a = NewTensor(aOrder, aDimSize); XTensor * a = NewTensor(aOrder, aDimSize);
XTensor * b = NewTensor(aOrder, aDimSize);
XTensor * aMe = NewTensor(aOrder, aDimSize);
XTensor bUser;
/* initialize variables */ /* initialize variables */
a->SetData(aData, aUnitNum); a->SetData(aData, aUnitNum);
aMe->SetData(aData, aUnitNum);
/* call Log function */ /* call Log function */
_Log(a); _Log(a, b);
_LogMe(aMe);
bUser = Log(*a);
/* check results */ /* check results */
cpuTest = a->CheckData(answer, aUnitNum, 1e-4F); cpuTest = b->CheckData(answer, aUnitNum, 1e-4F) && aMe->CheckData(answer, aUnitNum, 1e-4F) && bUser.CheckData(answer, aUnitNum, 1e-4F);
#ifdef USE_CUDA #ifdef USE_CUDA
/* GPU test */ /* GPU test */
...@@ -67,25 +73,37 @@ bool TestLog1() ...@@ -67,25 +73,37 @@ bool TestLog1()
/* create tensor */ /* create tensor */
XTensor * aGPU = NewTensor(aOrder, aDimSize, X_FLOAT, 1.0F, 0); XTensor * aGPU = NewTensor(aOrder, aDimSize, X_FLOAT, 1.0F, 0);
XTensor * bGPU = NewTensor(aOrder, aDimSize, X_FLOAT, 1.0F, 0);
XTensor * aMeGPU = NewTensor(aOrder, aDimSize, X_FLOAT, 1.0F, 0);
XTensor bUserGPU;
/* Initialize variables */ /* Initialize variables */
aGPU->SetData(aData, aUnitNum); aGPU->SetData(aData, aUnitNum);
aMeGPU->SetData(aData, aUnitNum);
/* call Log function */ /* call Log function */
_Log(aGPU); _Log(aGPU, bGPU);
_LogMe(aMeGPU);
bUserGPU = Log(*aGPU);
/* check results */ /* check results */
gpuTest = aGPU->CheckData(answer, aUnitNum, 1e-4F); gpuTest = bGPU->CheckData(answer, aUnitNum, 1e-4F) && aMeGPU->CheckData(answer, aUnitNum, 1e-4F) && bUserGPU.CheckData(answer, aUnitNum, 1e-4F);
/* destroy variables */ /* destroy variables */
delete a; delete a;
delete b;
delete aMe;
delete aGPU; delete aGPU;
delete bGPU;
delete aMeGPU;
delete[] aDimSize; delete[] aDimSize;
return cpuTest && gpuTest; return cpuTest && gpuTest;
#else #else
/* destroy variables */ /* destroy variables */
delete a; delete a;
delete b;
delete aMe;
delete[] aDimSize; delete[] aDimSize;
return cpuTest; return cpuTest;
......
...@@ -51,6 +51,7 @@ bool TestLogSoftmax1() ...@@ -51,6 +51,7 @@ bool TestLogSoftmax1()
/* create tensors */ /* create tensors */
XTensor * x = NewTensor(order, dimSize); XTensor * x = NewTensor(order, dimSize);
XTensor * y = NewTensor(order, dimSize); XTensor * y = NewTensor(order, dimSize);
XTensor yUser;
/* initialize variables */ /* initialize variables */
x->SetData(xData, unitNum); x->SetData(xData, unitNum);
...@@ -58,9 +59,10 @@ bool TestLogSoftmax1() ...@@ -58,9 +59,10 @@ bool TestLogSoftmax1()
/* call LogSoftmax function */ /* call LogSoftmax function */
_LogSoftmax(x, y, 1); _LogSoftmax(x, y, 1);
yUser = LogSoftmax(*x, 1);
/* check result */ /* check result */
cpuTest = y->CheckData(answer, unitNum, 1e-4F); cpuTest = y->CheckData(answer, unitNum, 1e-4F) && yUser.CheckData(answer, unitNum, 1e-4F);
#ifdef USE_CUDA #ifdef USE_CUDA
/* GPU test */ /* GPU test */
...@@ -69,6 +71,7 @@ bool TestLogSoftmax1() ...@@ -69,6 +71,7 @@ bool TestLogSoftmax1()
/* create tensors */ /* create tensors */
XTensor * xGPU = NewTensor(order, dimSize, X_FLOAT, 1.0F, 0); XTensor * xGPU = NewTensor(order, dimSize, X_FLOAT, 1.0F, 0);
XTensor * yGPU = NewTensor(order, dimSize, X_FLOAT, 1.0F, 0); XTensor * yGPU = NewTensor(order, dimSize, X_FLOAT, 1.0F, 0);
XTensor yUserGPU;
/* initialize variables */ /* initialize variables */
xGPU->SetData(xData, unitNum); xGPU->SetData(xData, unitNum);
...@@ -76,9 +79,10 @@ bool TestLogSoftmax1() ...@@ -76,9 +79,10 @@ bool TestLogSoftmax1()
/* call LogSoftmax function */ /* call LogSoftmax function */
_LogSoftmax(xGPU, yGPU, 1); _LogSoftmax(xGPU, yGPU, 1);
yUserGPU = LogSoftmax(*xGPU, 1);
/* check result */ /* check result */
gpuTest = yGPU->CheckData(answer, unitNum, 1e-4F); gpuTest = yGPU->CheckData(answer, unitNum, 1e-4F) && yUserGPU.CheckData(answer, unitNum, 1e-4F);
/* destroy variables */ /* destroy variables */
delete x; delete x;
......
...@@ -46,6 +46,7 @@ bool TestLoss1() ...@@ -46,6 +46,7 @@ bool TestLoss1()
bool cpuTest = true; bool cpuTest = true;
DTYPE answer = 5.0F; DTYPE answer = 5.0F;
DTYPE error;
/* create tensors */ /* create tensors */
XTensor * output = NewTensor(order, dimSize); XTensor * output = NewTensor(order, dimSize);
...@@ -57,8 +58,8 @@ bool TestLoss1() ...@@ -57,8 +58,8 @@ bool TestLoss1()
_ScaleAndShiftMe(output, 1, 1); _ScaleAndShiftMe(output, 1, 1);
_ScaleAndShiftMe(gold, 1, 2); _ScaleAndShiftMe(gold, 1, 2);
DTYPE error; /* call LossCompute function */
error = LossCompute(gold, output, SQUAREDERROR, false, 0, 0, dimSize[0], 0); error = _LossCompute(gold, output, SQUAREDERROR, false, 0, 0, dimSize[0], 0);
/* check results */ /* check results */
cpuTest = (error == answer); cpuTest = (error == answer);
...@@ -78,7 +79,7 @@ bool TestLoss1() ...@@ -78,7 +79,7 @@ bool TestLoss1()
_ScaleAndShiftMe(goldGPU, 1, 2); _ScaleAndShiftMe(goldGPU, 1, 2);
/* call LossCompute function */ /* call LossCompute function */
error = LossCompute(goldGPU, outputGPU, SQUAREDERROR, false, 0, 0, dimSize[0], 0); error = _LossCompute(goldGPU, outputGPU, SQUAREDERROR, false, 0, 0, dimSize[0], 0);
/* check results */ /* check results */
gpuTest = (error == answer); gpuTest = (error == answer);
...@@ -123,6 +124,7 @@ bool TestLoss2() ...@@ -123,6 +124,7 @@ bool TestLoss2()
bool cpuTest = true; bool cpuTest = true;
DTYPE answer = 0.0F; DTYPE answer = 0.0F;
DTYPE error;
/* create tensors */ /* create tensors */
XTensor * output = NewTensor(order, dimSize); XTensor * output = NewTensor(order, dimSize);
...@@ -134,8 +136,8 @@ bool TestLoss2() ...@@ -134,8 +136,8 @@ bool TestLoss2()
_ScaleAndShiftMe(output, 1, 1); _ScaleAndShiftMe(output, 1, 1);
_ScaleAndShiftMe(gold, 1, 2); _ScaleAndShiftMe(gold, 1, 2);
DTYPE error; /* call LossCompute function */
error = LossCompute(gold, output, CROSSENTROPY, false, 0, 0, dimSize[0], 0); error = _LossCompute(gold, output, CROSSENTROPY, false, 0, 0, dimSize[0], 0);
/* check results */ /* check results */
cpuTest = (error == answer); cpuTest = (error == answer);
...@@ -155,7 +157,7 @@ bool TestLoss2() ...@@ -155,7 +157,7 @@ bool TestLoss2()
_ScaleAndShiftMe(goldGPU, 1, 2); _ScaleAndShiftMe(goldGPU, 1, 2);
/* call LossCompute function */ /* call LossCompute function */
error = LossCompute(goldGPU, outputGPU, CROSSENTROPY, false, 0, 0, dimSize[0], 0); error = _LossCompute(goldGPU, outputGPU, CROSSENTROPY, false, 0, 0, dimSize[0], 0);
/* check results */ /* check results */
gpuTest = (error == answer); gpuTest = (error == answer);
...@@ -210,6 +212,7 @@ bool TestLoss3() ...@@ -210,6 +212,7 @@ bool TestLoss3()
bool cpuTest = true; bool cpuTest = true;
DTYPE answer = 0.25F; DTYPE answer = 0.25F;
DTYPE error;
/* create tensors */ /* create tensors */
XTensor * output = NewTensor(order, dimSize); XTensor * output = NewTensor(order, dimSize);
...@@ -219,8 +222,8 @@ bool TestLoss3() ...@@ -219,8 +222,8 @@ bool TestLoss3()
output->SetData(outputData, unitNum); output->SetData(outputData, unitNum);
gold->SetData(goldData, unitNum); gold->SetData(goldData, unitNum);
DTYPE error; /* call LossCompute function */
error = LossCompute(gold, output, ONEHOTERROR, false, 0, 0, dimSize[0], 0); error = _LossCompute(gold, output, ONEHOTERROR, false, 0, 0, dimSize[0], 0);
/* check results */ /* check results */
cpuTest = (error == answer); cpuTest = (error == answer);
...@@ -238,7 +241,7 @@ bool TestLoss3() ...@@ -238,7 +241,7 @@ bool TestLoss3()
goldGPU->SetData(goldData, unitNum); goldGPU->SetData(goldData, unitNum);
/* call LossCompute function */ /* call LossCompute function */
error = LossCompute(goldGPU, outputGPU, ONEHOTERROR, false, 0, 0, dimSize[0], 0); error = _LossCompute(goldGPU, outputGPU, ONEHOTERROR, false, 0, 0, dimSize[0], 0);
/* check results */ /* check results */
gpuTest = (error == answer); gpuTest = (error == answer);
......
...@@ -75,6 +75,7 @@ bool TestMatrixMulBatched1() ...@@ -75,6 +75,7 @@ bool TestMatrixMulBatched1()
XTensor * s1 = NewTensor(sOrder1, sDimSize1); XTensor * s1 = NewTensor(sOrder1, sDimSize1);
XTensor * s2 = NewTensor(sOrder2, sDimSize2); XTensor * s2 = NewTensor(sOrder2, sDimSize2);
XTensor * t = NewTensor(tOrder, tDimSize); XTensor * t = NewTensor(tOrder, tDimSize);
XTensor tUser;
/* initialize variables */ /* initialize variables */
s1->SetData(sData1, sUnitNum1); s1->SetData(sData1, sUnitNum1);
...@@ -83,9 +84,10 @@ bool TestMatrixMulBatched1() ...@@ -83,9 +84,10 @@ bool TestMatrixMulBatched1()
/* call MatrixMulBatched function */ /* call MatrixMulBatched function */
_MatrixMulBatched(s1, X_NOTRANS, s2, X_NOTRANS, t); _MatrixMulBatched(s1, X_NOTRANS, s2, X_NOTRANS, t);
tUser = MatrixMulBatched(*s1, X_NOTRANS, *s2, X_NOTRANS);
/* check results */ /* check results */
cpuTest = t->CheckData(answer, tUnitNum); cpuTest = t->CheckData(answer, tUnitNum) && tUser.CheckData(answer, tUnitNum);
#ifdef USE_CUDA #ifdef USE_CUDA
/* GPU test */ /* GPU test */
...@@ -95,6 +97,7 @@ bool TestMatrixMulBatched1() ...@@ -95,6 +97,7 @@ bool TestMatrixMulBatched1()
XTensor * sGPU1 = NewTensor(sOrder1, sDimSize1, X_FLOAT, 1.0F, 0); XTensor * sGPU1 = NewTensor(sOrder1, sDimSize1, X_FLOAT, 1.0F, 0);
XTensor * sGPU2 = NewTensor(sOrder2, sDimSize2, X_FLOAT, 1.0F, 0); XTensor * sGPU2 = NewTensor(sOrder2, sDimSize2, X_FLOAT, 1.0F, 0);
XTensor * tGPU = NewTensor(tOrder, tDimSize, X_FLOAT, 1.0F, 0); XTensor * tGPU = NewTensor(tOrder, tDimSize, X_FLOAT, 1.0F, 0);
XTensor tUserGPU;
/* Initialize variables */ /* Initialize variables */
sGPU1->SetData(sData1, sUnitNum1); sGPU1->SetData(sData1, sUnitNum1);
...@@ -103,9 +106,10 @@ bool TestMatrixMulBatched1() ...@@ -103,9 +106,10 @@ bool TestMatrixMulBatched1()
/* call MatrixMulBatched function */ /* call MatrixMulBatched function */
_MatrixMulBatched(sGPU1, X_NOTRANS, sGPU2, X_NOTRANS, tGPU); _MatrixMulBatched(sGPU1, X_NOTRANS, sGPU2, X_NOTRANS, tGPU);
tUserGPU = MatrixMulBatched(*sGPU1, X_NOTRANS, *sGPU2, X_NOTRANS);
/* check results */ /* check results */
gpuTest = tGPU->CheckData(answer, tUnitNum); gpuTest = tGPU->CheckData(answer, tUnitNum) && tUserGPU.CheckData(answer, tUnitNum);
/* destroy variables */ /* destroy variables */
delete s1; delete s1;
...@@ -193,6 +197,7 @@ bool TestMatrixMulBatched2() ...@@ -193,6 +197,7 @@ bool TestMatrixMulBatched2()
XTensor * s1 = NewTensor(sOrder1, sDimSize1); XTensor * s1 = NewTensor(sOrder1, sDimSize1);
XTensor * s2 = NewTensor(sOrder2, sDimSize2); XTensor * s2 = NewTensor(sOrder2, sDimSize2);
XTensor * t = NewTensor(tOrder, tDimSize); XTensor * t = NewTensor(tOrder, tDimSize);
XTensor tUser;
/* initialize variables */ /* initialize variables */
s1->SetData(sData1, sUnitNum1); s1->SetData(sData1, sUnitNum1);
...@@ -201,9 +206,10 @@ bool TestMatrixMulBatched2() ...@@ -201,9 +206,10 @@ bool TestMatrixMulBatched2()
/* call MatrixMulBatched function */ /* call MatrixMulBatched function */
_MatrixMulBatched(s1, X_NOTRANS, s2, X_NOTRANS, t); _MatrixMulBatched(s1, X_NOTRANS, s2, X_NOTRANS, t);
tUser = MatrixMulBatched(*s1, X_NOTRANS, *s2, X_NOTRANS);
/* check results */ /* check results */
cpuTest = t->CheckData(answer, tUnitNum); cpuTest = t->CheckData(answer, tUnitNum) && tUser.CheckData(answer, tUnitNum);
#ifdef USE_CUDA #ifdef USE_CUDA
/* GPU test */ /* GPU test */
...@@ -213,6 +219,7 @@ bool TestMatrixMulBatched2() ...@@ -213,6 +219,7 @@ bool TestMatrixMulBatched2()
XTensor * sGPU1 = NewTensor(sOrder1, sDimSize1, X_FLOAT, 1.0F, 0); XTensor * sGPU1 = NewTensor(sOrder1, sDimSize1, X_FLOAT, 1.0F, 0);
XTensor * sGPU2 = NewTensor(sOrder2, sDimSize2, X_FLOAT, 1.0F, 0); XTensor * sGPU2 = NewTensor(sOrder2, sDimSize2, X_FLOAT, 1.0F, 0);
XTensor * tGPU = NewTensor(tOrder, tDimSize, X_FLOAT, 1.0F, 0); XTensor * tGPU = NewTensor(tOrder, tDimSize, X_FLOAT, 1.0F, 0);
XTensor tUserGPU;
/* Initialize variables */ /* Initialize variables */
sGPU1->SetData(sData1, sUnitNum1); sGPU1->SetData(sData1, sUnitNum1);
...@@ -221,9 +228,10 @@ bool TestMatrixMulBatched2() ...@@ -221,9 +228,10 @@ bool TestMatrixMulBatched2()
/* call MatrixMulBatched function */ /* call MatrixMulBatched function */
_MatrixMulBatched(sGPU1, X_NOTRANS, sGPU2, X_NOTRANS, tGPU); _MatrixMulBatched(sGPU1, X_NOTRANS, sGPU2, X_NOTRANS, tGPU);
tUserGPU = MatrixMulBatched(*sGPU1, X_NOTRANS, *sGPU2, X_NOTRANS);
/* check results */ /* check results */
gpuTest = tGPU->CheckData(answer, tUnitNum); gpuTest = tGPU->CheckData(answer, tUnitNum) && tUserGPU.CheckData(answer, tUnitNum);
/* destroy variables */ /* destroy variables */
delete s1; delete s1;
......
...@@ -60,16 +60,18 @@ bool TestMerge1() ...@@ -60,16 +60,18 @@ bool TestMerge1()
/* create tensors */ /* create tensors */
XTensor * s = NewTensor(sOrder, sDimSize); XTensor * s = NewTensor(sOrder, sDimSize);
XTensor * t = NewTensor(tOrder, tDimSize); XTensor * t = NewTensor(tOrder, tDimSize);
XTensor tUser;
/* initialize variables */ /* initialize variables */
s->SetData(sData, sUnitNum); s->SetData(sData, sUnitNum);
t->SetZeroAll(); t->SetZeroAll();
/* call merge function */ /* call Merge function */
_Merge(s, t, 1, 0); _Merge(s, t, 1, 0);
tUser = Merge(*s, 1, 0);
/* check results */ /* check results */
cpuTest = t->CheckData(answer, tUnitNum); cpuTest = t->CheckData(answer, tUnitNum) && tUser.CheckData(answer, tUnitNum);
#ifdef USE_CUDA #ifdef USE_CUDA
/* GPU test */ /* GPU test */
...@@ -78,16 +80,18 @@ bool TestMerge1() ...@@ -78,16 +80,18 @@ bool TestMerge1()
/* create tensor */ /* create tensor */
XTensor * sGPU = NewTensor(sOrder, sDimSize, X_FLOAT, 1.0F, 0); XTensor * sGPU = NewTensor(sOrder, sDimSize, X_FLOAT, 1.0F, 0);
XTensor * tGPU = NewTensor(tOrder, tDimSize, X_FLOAT, 1.0F, 0); XTensor * tGPU = NewTensor(tOrder, tDimSize, X_FLOAT, 1.0F, 0);
XTensor tUserGPU;
/* Initialize variables */ /* Initialize variables */
sGPU->SetData(sData, sUnitNum); sGPU->SetData(sData, sUnitNum);
tGPU->SetZeroAll(); tGPU->SetZeroAll();
/* call merge function */ /* call Merge function */
_Merge(sGPU, tGPU, 1, 0); _Merge(sGPU, tGPU, 1, 0);
tUserGPU = Merge(*sGPU, 1, 0);
/* check results */ /* check results */
gpuTest = tGPU->CheckData(answer, tUnitNum); gpuTest = tGPU->CheckData(answer, tUnitNum) && tUserGPU.CheckData(answer, tUnitNum);
/* destroy variables */ /* destroy variables */
delete s; delete s;
...@@ -166,18 +170,23 @@ bool TestMerge2() ...@@ -166,18 +170,23 @@ bool TestMerge2()
XTensor * s = NewTensor(sOrder, sDimSize); XTensor * s = NewTensor(sOrder, sDimSize);
XTensor * t1 = NewTensor(tOrder1, tDimSize1); XTensor * t1 = NewTensor(tOrder1, tDimSize1);
XTensor * t2 = NewTensor(tOrder2, tDimSize2); XTensor * t2 = NewTensor(tOrder2, tDimSize2);
XTensor tUser1;
XTensor tUser2;
/* initialize variables */ /* initialize variables */
s->SetData(sData, sUnitNum); s->SetData(sData, sUnitNum);
t1->SetZeroAll(); t1->SetZeroAll();
t2->SetZeroAll(); t2->SetZeroAll();
/* call merge function */ /* call Merge function */
_Merge(s, t1, 1, 0); _Merge(s, t1, 1, 0);
_Merge(s, t2, 2, 0); _Merge(s, t2, 2, 0);
tUser1 = Merge(*s, 1, 0);
tUser2 = Merge(*s, 2, 0);
/* check results */ /* check results */
cpuTest = t1->CheckData(answer1, tUnitNum1) && t2->CheckData(answer2, tUnitNum2); cpuTest = t1->CheckData(answer1, tUnitNum1) && tUser1.CheckData(answer1, tUnitNum1)
&& t2->CheckData(answer2, tUnitNum2) && tUser2.CheckData(answer2, tUnitNum2);
#ifdef USE_CUDA #ifdef USE_CUDA
/* GPU test */ /* GPU test */
...@@ -187,18 +196,23 @@ bool TestMerge2() ...@@ -187,18 +196,23 @@ bool TestMerge2()
XTensor * sGPU = NewTensor(sOrder, sDimSize, X_FLOAT, 1.0F, 0); XTensor * sGPU = NewTensor(sOrder, sDimSize, X_FLOAT, 1.0F, 0);
XTensor * tGPU1 = NewTensor(tOrder1, tDimSize1, X_FLOAT, 1.0F, 0); XTensor * tGPU1 = NewTensor(tOrder1, tDimSize1, X_FLOAT, 1.0F, 0);
XTensor * tGPU2 = NewTensor(tOrder2, tDimSize2, X_FLOAT, 1.0F, 0); XTensor * tGPU2 = NewTensor(tOrder2, tDimSize2, X_FLOAT, 1.0F, 0);
XTensor tUserGPU1;
XTensor tUserGPU2;
/* Initialize variables */ /* Initialize variables */
sGPU->SetData(sData, sUnitNum); sGPU->SetData(sData, sUnitNum);
tGPU1->SetZeroAll(); tGPU1->SetZeroAll();
tGPU2->SetZeroAll(); tGPU2->SetZeroAll();
/* call merge function */ /* call Merge function */
_Merge(sGPU, tGPU1, 1, 0); _Merge(sGPU, tGPU1, 1, 0);
_Merge(sGPU, tGPU2, 2, 0); _Merge(sGPU, tGPU2, 2, 0);
tUserGPU1 = Merge(*sGPU, 1, 0);
tUserGPU2 = Merge(*sGPU, 2, 0);
/* check results */ /* check results */
gpuTest = tGPU1->CheckData(answer1, tUnitNum1) && tGPU2->CheckData(answer2, tUnitNum2); gpuTest = tGPU1->CheckData(answer1, tUnitNum1) && tUserGPU1.CheckData(answer1, tUnitNum1)
&& tGPU2->CheckData(answer2, tUnitNum2) && tUserGPU2.CheckData(answer2, tUnitNum2);
/* destroy variables */ /* destroy variables */
delete s; delete s;
...@@ -271,6 +285,7 @@ bool TestMerge3() ...@@ -271,6 +285,7 @@ bool TestMerge3()
XTensor * s1 = NewTensor(sOrder, sDimSize); XTensor * s1 = NewTensor(sOrder, sDimSize);
XTensor * s2 = NewTensor(sOrder, sDimSize); XTensor * s2 = NewTensor(sOrder, sDimSize);
XTensor * t = NewTensor(tOrder, tDimSize); XTensor * t = NewTensor(tOrder, tDimSize);
XTensor tUser;
/* initialize variables */ /* initialize variables */
s1->SetData(sData1, sUnitNum); s1->SetData(sData1, sUnitNum);
...@@ -281,11 +296,12 @@ bool TestMerge3() ...@@ -281,11 +296,12 @@ bool TestMerge3()
smallList->Add(s1); smallList->Add(s1);
smallList->Add(s2); smallList->Add(s2);
/* call merge function */ /* call Merge function */
_Merge(smallList, t, 0); _Merge(smallList, t, 0);
tUser = Merge(*smallList, 0);
/* check results */ /* check results */
cpuTest = t->CheckData(answer, tUnitNum); cpuTest = t->CheckData(answer, tUnitNum) && tUser.CheckData(answer, tUnitNum);
#ifdef USE_CUDA #ifdef USE_CUDA
/* GPU test */ /* GPU test */
...@@ -298,6 +314,7 @@ bool TestMerge3() ...@@ -298,6 +314,7 @@ bool TestMerge3()
XTensor * sGPU1 = NewTensor(sOrder, sDimSize, X_FLOAT, 1.0F, 0); XTensor * sGPU1 = NewTensor(sOrder, sDimSize, X_FLOAT, 1.0F, 0);
XTensor * sGPU2 = NewTensor(sOrder, sDimSize, X_FLOAT, 1.0F, 0); XTensor * sGPU2 = NewTensor(sOrder, sDimSize, X_FLOAT, 1.0F, 0);
XTensor * tGPU = NewTensor(tOrder, tDimSize); XTensor * tGPU = NewTensor(tOrder, tDimSize);
XTensor tUserGPU;
/* initialize variables */ /* initialize variables */
sGPU1->SetData(sData1, sUnitNum); sGPU1->SetData(sData1, sUnitNum);
...@@ -308,11 +325,12 @@ bool TestMerge3() ...@@ -308,11 +325,12 @@ bool TestMerge3()
smallList->Add(sGPU1); smallList->Add(sGPU1);
smallList->Add(sGPU2); smallList->Add(sGPU2);
/* call merge function */ /* call Merge function */
_Merge(smallList, tGPU, 0); _Merge(smallList, tGPU, 0);
tUserGPU = Merge(*smallList, 0);
/* check results */ /* check results */
cpuTest = tGPU->CheckData(answer, tUnitNum); gpuTest = tGPU->CheckData(answer, tUnitNum) && tUserGPU.CheckData(answer, tUnitNum);
/* destroy variables */ /* destroy variables */
delete s1; delete s1;
...@@ -383,6 +401,7 @@ bool TestMerge4() ...@@ -383,6 +401,7 @@ bool TestMerge4()
XTensor * s1 = NewTensor(sOrder, sDimSize); XTensor * s1 = NewTensor(sOrder, sDimSize);
XTensor * s2 = NewTensor(sOrder, sDimSize); XTensor * s2 = NewTensor(sOrder, sDimSize);
XTensor * t = NewTensor(tOrder, tDimSize); XTensor * t = NewTensor(tOrder, tDimSize);
XTensor tUser;
/* initialize variables */ /* initialize variables */
s1->SetData(sData1, sUnitNum); s1->SetData(sData1, sUnitNum);
...@@ -393,11 +412,12 @@ bool TestMerge4() ...@@ -393,11 +412,12 @@ bool TestMerge4()
smallList->Add(s1); smallList->Add(s1);
smallList->Add(s2); smallList->Add(s2);
/* call merge function */ /* call Merge function */
_Merge(smallList, t, 1); _Merge(smallList, t, 1);
tUser = Merge(*smallList, 1);
/* check results */ /* check results */
cpuTest = t->CheckData(answer, tUnitNum); cpuTest = t->CheckData(answer, tUnitNum) && tUser.CheckData(answer, tUnitNum);
#ifdef USE_CUDA #ifdef USE_CUDA
/* GPU test */ /* GPU test */
...@@ -410,6 +430,7 @@ bool TestMerge4() ...@@ -410,6 +430,7 @@ bool TestMerge4()
XTensor * sGPU1 = NewTensor(sOrder, sDimSize, X_FLOAT, 1.0F, 0); XTensor * sGPU1 = NewTensor(sOrder, sDimSize, X_FLOAT, 1.0F, 0);
XTensor * sGPU2 = NewTensor(sOrder, sDimSize, X_FLOAT, 1.0F, 0); XTensor * sGPU2 = NewTensor(sOrder, sDimSize, X_FLOAT, 1.0F, 0);
XTensor * tGPU = NewTensor(tOrder, tDimSize); XTensor * tGPU = NewTensor(tOrder, tDimSize);
XTensor tUserGPU;
/* initialize variables */ /* initialize variables */
sGPU1->SetData(sData1, sUnitNum); sGPU1->SetData(sData1, sUnitNum);
...@@ -420,11 +441,12 @@ bool TestMerge4() ...@@ -420,11 +441,12 @@ bool TestMerge4()
smallList->Add(sGPU1); smallList->Add(sGPU1);
smallList->Add(sGPU2); smallList->Add(sGPU2);
/* call merge function */ /* call Merge function */
_Merge(smallList, tGPU, 1); _Merge(smallList, tGPU, 1);
tUserGPU = Merge(*smallList, 1);
/* check results */ /* check results */
cpuTest = tGPU->CheckData(answer, tUnitNum); gpuTest = tGPU->CheckData(answer, tUnitNum) && tUserGPU.CheckData(answer, tUnitNum);
/* destroy variables */ /* destroy variables */
delete s1; delete s1;
......
...@@ -48,15 +48,21 @@ bool TestNegate1() ...@@ -48,15 +48,21 @@ bool TestNegate1()
/* create tensors */ /* create tensors */
XTensor * a = NewTensor(aOrder, aDimSize); XTensor * a = NewTensor(aOrder, aDimSize);
XTensor * b = NewTensor(aOrder, aDimSize);
XTensor * aMe = NewTensor(aOrder, aDimSize);
XTensor bUser;
/* initialize variables */ /* initialize variables */
a->SetData(aData, aUnitNum); a->SetData(aData, aUnitNum);
aMe->SetData(aData, aUnitNum);
/* call Negate function */ /* call Negate function */
_Negate(a); _Negate(a, b);
_NegateMe(aMe);
bUser = Negate(*a);
/* check results */ /* check results */
cpuTest = a->CheckData(answer, aUnitNum); cpuTest = b->CheckData(answer, aUnitNum, 1e-4F) && aMe->CheckData(answer, aUnitNum, 1e-4F) && bUser.CheckData(answer, aUnitNum, 1e-4F);
#ifdef USE_CUDA #ifdef USE_CUDA
/* GPU test */ /* GPU test */
...@@ -64,25 +70,37 @@ bool TestNegate1() ...@@ -64,25 +70,37 @@ bool TestNegate1()
/* create tensor */ /* create tensor */
XTensor * aGPU = NewTensor(aOrder, aDimSize, X_FLOAT, 1.0F, 0); XTensor * aGPU = NewTensor(aOrder, aDimSize, X_FLOAT, 1.0F, 0);
XTensor * bGPU = NewTensor(aOrder, aDimSize, X_FLOAT, 1.0F, 0);
XTensor * aMeGPU = NewTensor(aOrder, aDimSize, X_FLOAT, 1.0F, 0);
XTensor bUserGPU;
/* Initialize variables */ /* Initialize variables */
aGPU->SetData(aData, aUnitNum); aGPU->SetData(aData, aUnitNum);
aMeGPU->SetData(aData, aUnitNum);
/* call Negate function */ /* call Negate function */
_Negate(aGPU); _Negate(aGPU, bGPU);
_NegateMe(aMeGPU);
bUserGPU = Negate(*aGPU);
/* check results */ /* check results */
gpuTest = aGPU->CheckData(answer, aUnitNum); gpuTest = bGPU->CheckData(answer, aUnitNum, 1e-4F) && aMeGPU->CheckData(answer, aUnitNum, 1e-4F) && bUserGPU.CheckData(answer, aUnitNum, 1e-4F);
/* destroy variables */ /* destroy variables */
delete a; delete a;
delete b;
delete aMe;
delete aGPU; delete aGPU;
delete bGPU;
delete aMeGPU;
delete[] aDimSize; delete[] aDimSize;
return cpuTest && gpuTest; return cpuTest && gpuTest;
#else #else
/* destroy variables */ /* destroy variables */
delete a; delete a;
delete b;
delete aMe;
delete[] aDimSize; delete[] aDimSize;
return cpuTest; return cpuTest;
...@@ -114,15 +132,21 @@ bool TestNegate2() ...@@ -114,15 +132,21 @@ bool TestNegate2()
/* create tensors */ /* create tensors */
XTensor * a = NewTensor(aOrder, aDimSize); XTensor * a = NewTensor(aOrder, aDimSize);
XTensor * b = NewTensor(aOrder, aDimSize);
XTensor * aMe = NewTensor(aOrder, aDimSize);
XTensor bUser;
/* initialize variables */ /* initialize variables */
a->SetData(aData, aUnitNum); a->SetData(aData, aUnitNum);
aMe->SetData(aData, aUnitNum);
/* call Negate function */ /* call Negate function */
_Negate(a); _Negate(a, b);
_NegateMe(aMe);
bUser = Negate(*a);
/* check results */ /* check results */
cpuTest = a->CheckData(answer, aUnitNum); cpuTest = b->CheckData(answer, aUnitNum, 1e-4F) && aMe->CheckData(answer, aUnitNum, 1e-4F) && bUser.CheckData(answer, aUnitNum, 1e-4F);
#ifdef USE_CUDA #ifdef USE_CUDA
/* GPU test */ /* GPU test */
...@@ -130,25 +154,37 @@ bool TestNegate2() ...@@ -130,25 +154,37 @@ bool TestNegate2()
/* create tensor */ /* create tensor */
XTensor * aGPU = NewTensor(aOrder, aDimSize, X_FLOAT, 1.0F, 0); XTensor * aGPU = NewTensor(aOrder, aDimSize, X_FLOAT, 1.0F, 0);
XTensor * bGPU = NewTensor(aOrder, aDimSize, X_FLOAT, 1.0F, 0);
XTensor * aMeGPU = NewTensor(aOrder, aDimSize, X_FLOAT, 1.0F, 0);
XTensor bUserGPU;
/* Initialize variables */ /* Initialize variables */
aGPU->SetData(aData, aUnitNum); aGPU->SetData(aData, aUnitNum);
aMeGPU->SetData(aData, aUnitNum);
/* call Negate function */ /* call Negate function */
_Negate(aGPU); _Negate(aGPU, bGPU);
_NegateMe(aMeGPU);
bUserGPU = Negate(*aGPU);
/* check results */ /* check results */
gpuTest = aGPU->CheckData(answer, aUnitNum); gpuTest = bGPU->CheckData(answer, aUnitNum, 1e-4F) && aMeGPU->CheckData(answer, aUnitNum, 1e-4F) && bUserGPU.CheckData(answer, aUnitNum, 1e-4F);
/* destroy variables */ /* destroy variables */
delete a; delete a;
delete b;
delete aMe;
delete aGPU; delete aGPU;
delete bGPU;
delete aMeGPU;
delete[] aDimSize; delete[] aDimSize;
return cpuTest && gpuTest; return cpuTest && gpuTest;
#else #else
/* destroy variables */ /* destroy variables */
delete a; delete a;
delete b;
delete aMe;
delete[] aDimSize; delete[] aDimSize;
return cpuTest; return cpuTest;
......
...@@ -52,15 +52,21 @@ bool TestPower1() ...@@ -52,15 +52,21 @@ bool TestPower1()
/* create tensors */ /* create tensors */
XTensor * a = NewTensor(aOrder, aDimSize); XTensor * a = NewTensor(aOrder, aDimSize);
XTensor * b = NewTensor(aOrder, aDimSize);
XTensor * aMe = NewTensor(aOrder, aDimSize);
XTensor bUser;
/* initialize variables */ /* initialize variables */
a->SetData(aData, aUnitNum); a->SetData(aData, aUnitNum);
aMe->SetData(aData, aUnitNum);
/* call Power function */ /* call Power function */
_Power(a, 2.0F); _Power(a, b, 2.0F);
_PowerMe(aMe, 2.0F);
bUser = Power(*a, 2.0F);
/* check results */ /* check results */
cpuTest = a->CheckData(answer, aUnitNum, 1e-4F); cpuTest = b->CheckData(answer, aUnitNum, 1e-4F) && aMe->CheckData(answer, aUnitNum, 1e-4F) && bUser.CheckData(answer, aUnitNum, 1e-4F);
#ifdef USE_CUDA #ifdef USE_CUDA
/* GPU test */ /* GPU test */
...@@ -68,25 +74,37 @@ bool TestPower1() ...@@ -68,25 +74,37 @@ bool TestPower1()
/* create tensor */ /* create tensor */
XTensor * aGPU = NewTensor(aOrder, aDimSize, X_FLOAT, 1.0F, 0); XTensor * aGPU = NewTensor(aOrder, aDimSize, X_FLOAT, 1.0F, 0);
XTensor * bGPU = NewTensor(aOrder, aDimSize, X_FLOAT, 1.0F, 0);
XTensor * aMeGPU = NewTensor(aOrder, aDimSize, X_FLOAT, 1.0F, 0);
XTensor bUserGPU;
/* Initialize variables */ /* Initialize variables */
aGPU->SetData(aData, aUnitNum); aGPU->SetData(aData, aUnitNum);
aMeGPU->SetData(aData, aUnitNum);
/* call power function */ /* call power function */
_Power(aGPU, 2.0F); _Power(aGPU, bGPU, 2.0F);
_PowerMe(aMeGPU, 2.0F);
bUserGPU = Power(*aGPU, 2.0F);
/* check results */ /* check results */
gpuTest = aGPU->CheckData(answer, aUnitNum, 1e-4F); gpuTest = bGPU->CheckData(answer, aUnitNum, 1e-4F) && aMeGPU->CheckData(answer, aUnitNum, 1e-4F) && bUserGPU.CheckData(answer, aUnitNum, 1e-4F);
/* destroy variables */ /* destroy variables */
delete a; delete a;
delete b;
delete aMe;
delete aGPU; delete aGPU;
delete bGPU;
delete aMeGPU;
delete[] aDimSize; delete[] aDimSize;
return cpuTest && gpuTest; return cpuTest && gpuTest;
#else #else
/* destroy variables */ /* destroy variables */
delete a; delete a;
delete b;
delete aMe;
delete[] aDimSize; delete[] aDimSize;
return cpuTest; return cpuTest;
...@@ -121,15 +139,21 @@ bool TestPower2() ...@@ -121,15 +139,21 @@ bool TestPower2()
/* create tensors */ /* create tensors */
XTensor * a = NewTensor(aOrder, aDimSize); XTensor * a = NewTensor(aOrder, aDimSize);
XTensor * b = NewTensor(aOrder, aDimSize);
XTensor * aMe = NewTensor(aOrder, aDimSize);
XTensor bUser;
/* initialize variables */ /* initialize variables */
a->SetData(aData, aUnitNum); a->SetData(aData, aUnitNum);
aMe->SetData(aData, aUnitNum);
/* call Power function */ /* call Power function */
_Power(a, 1.0F); _Power(a, b, 1.0F);
_PowerMe(aMe, 1.0F);
bUser = Power(*a, 1.0F);
/* check results */ /* check results */
cpuTest = a->CheckData(answer, aUnitNum, 1e-4F); cpuTest = b->CheckData(answer, aUnitNum, 1e-4F) && aMe->CheckData(answer, aUnitNum, 1e-4F) && bUser.CheckData(answer, aUnitNum, 1e-4F);
#ifdef USE_CUDA #ifdef USE_CUDA
/* GPU test */ /* GPU test */
...@@ -137,25 +161,37 @@ bool TestPower2() ...@@ -137,25 +161,37 @@ bool TestPower2()
/* create tensor */ /* create tensor */
XTensor * aGPU = NewTensor(aOrder, aDimSize, X_FLOAT, 1.0F, 0); XTensor * aGPU = NewTensor(aOrder, aDimSize, X_FLOAT, 1.0F, 0);
XTensor * bGPU = NewTensor(aOrder, aDimSize, X_FLOAT, 1.0F, 0);
XTensor * aMeGPU = NewTensor(aOrder, aDimSize, X_FLOAT, 1.0F, 0);
XTensor bUserGPU;
/* Initialize variables */ /* Initialize variables */
aGPU->SetData(aData, aUnitNum); aGPU->SetData(aData, aUnitNum);
aMeGPU->SetData(aData, aUnitNum);
/* call Power function */ /* call Power function */
_Power(aGPU, 1.0F); _Power(aGPU, bGPU, 1.0F);
_PowerMe(aMeGPU, 1.0F);
bUserGPU = Power(*aGPU, 1.0F);
/* check results */ /* check results */
gpuTest = aGPU->CheckData(answer, aUnitNum, 1e-4F); gpuTest = bGPU->CheckData(answer, aUnitNum, 1e-4F) && aMeGPU->CheckData(answer, aUnitNum, 1e-4F) && bUserGPU.CheckData(answer, aUnitNum, 1e-4F);
/* destroy variables */ /* destroy variables */
delete a; delete a;
delete b;
delete aMe;
delete aGPU; delete aGPU;
delete bGPU;
delete aMeGPU;
delete[] aDimSize; delete[] aDimSize;
return cpuTest && gpuTest; return cpuTest && gpuTest;
#else #else
/* destroy variables */ /* destroy variables */
delete a; delete a;
delete b;
delete aMe;
delete[] aDimSize; delete[] aDimSize;
return cpuTest; return cpuTest;
...@@ -190,15 +226,21 @@ bool TestPower3() ...@@ -190,15 +226,21 @@ bool TestPower3()
/* create tensors */ /* create tensors */
XTensor * a = NewTensor(aOrder, aDimSize); XTensor * a = NewTensor(aOrder, aDimSize);
XTensor * b = NewTensor(aOrder, aDimSize);
XTensor * aMe = NewTensor(aOrder, aDimSize);
XTensor bUser;
/* initialize variables */ /* initialize variables */
a->SetData(aData, aUnitNum); a->SetData(aData, aUnitNum);
aMe->SetData(aData, aUnitNum);
/* call Power function */ /* call Power function */
_Power(a, 0.0F); _Power(a, b, 0.0F);
_PowerMe(aMe, 0.0F);
bUser = Power(*a, 0.0F);
/* check results */ /* check results */
cpuTest = a->CheckData(answer, aUnitNum, 1e-4F); cpuTest = b->CheckData(answer, aUnitNum, 1e-4F) && aMe->CheckData(answer, aUnitNum, 1e-4F) && bUser.CheckData(answer, aUnitNum, 1e-4F);
#ifdef USE_CUDA #ifdef USE_CUDA
/* GPU test */ /* GPU test */
...@@ -206,25 +248,37 @@ bool TestPower3() ...@@ -206,25 +248,37 @@ bool TestPower3()
/* create tensor */ /* create tensor */
XTensor * aGPU = NewTensor(aOrder, aDimSize, X_FLOAT, 1.0F, 0); XTensor * aGPU = NewTensor(aOrder, aDimSize, X_FLOAT, 1.0F, 0);
XTensor * bGPU = NewTensor(aOrder, aDimSize, X_FLOAT, 1.0F, 0);
XTensor * aMeGPU = NewTensor(aOrder, aDimSize, X_FLOAT, 1.0F, 0);
XTensor bUserGPU;
/* Initialize variables */ /* Initialize variables */
aGPU->SetData(aData, aUnitNum); aGPU->SetData(aData, aUnitNum);
aMeGPU->SetData(aData, aUnitNum);
/* call Power function */ /* call Power function */
_Power(aGPU, 0.0F); _Power(aGPU, bGPU, 0.0F);
_PowerMe(aMeGPU, 0.0F);
bUserGPU = Power(*aGPU, 0.0F);
/* check results */ /* check results */
gpuTest = aGPU->CheckData(answer, aUnitNum, 1e-4F); gpuTest = bGPU->CheckData(answer, aUnitNum, 1e-4F) && aMeGPU->CheckData(answer, aUnitNum, 1e-4F) && bUserGPU.CheckData(answer, aUnitNum, 1e-4F);
/* destroy variables */ /* destroy variables */
delete a; delete a;
delete b;
delete aMe;
delete aGPU; delete aGPU;
delete bGPU;
delete aMeGPU;
delete[] aDimSize; delete[] aDimSize;
return cpuTest && gpuTest; return cpuTest && gpuTest;
#else #else
/* destroy variables */ /* destroy variables */
delete a; delete a;
delete b;
delete aMe;
delete[] aDimSize; delete[] aDimSize;
return cpuTest; return cpuTest;
......
...@@ -50,6 +50,7 @@ bool TestRectify1() ...@@ -50,6 +50,7 @@ bool TestRectify1()
/* create tensors */ /* create tensors */
XTensor * x = NewTensor(order, dimSize); XTensor * x = NewTensor(order, dimSize);
XTensor * y = NewTensor(order, dimSize); XTensor * y = NewTensor(order, dimSize);
XTensor yUser;
/* initialize variables */ /* initialize variables */
x->SetData(xData, unitNum); x->SetData(xData, unitNum);
...@@ -57,9 +58,10 @@ bool TestRectify1() ...@@ -57,9 +58,10 @@ bool TestRectify1()
/* call Rectify function */ /* call Rectify function */
_Rectify(x, y); _Rectify(x, y);
yUser = Rectify(*x);
/* check results */ /* check results */
cpuTest = y->CheckData(answer, unitNum); cpuTest = y->CheckData(answer, unitNum, 1e-4F) && yUser.CheckData(answer, unitNum, 1e-4F);
#ifdef USE_CUDA #ifdef USE_CUDA
/* GPU test */ /* GPU test */
...@@ -68,6 +70,7 @@ bool TestRectify1() ...@@ -68,6 +70,7 @@ bool TestRectify1()
/* create tensor */ /* create tensor */
XTensor * xGPU = NewTensor(order, dimSize, X_FLOAT, 1.0F, 0); XTensor * xGPU = NewTensor(order, dimSize, X_FLOAT, 1.0F, 0);
XTensor * yGPU = NewTensor(order, dimSize, X_FLOAT, 1.0F, 0); XTensor * yGPU = NewTensor(order, dimSize, X_FLOAT, 1.0F, 0);
XTensor yUserGPU;
/* Initialize variables */ /* Initialize variables */
xGPU->SetData(xData, unitNum); xGPU->SetData(xData, unitNum);
...@@ -75,9 +78,10 @@ bool TestRectify1() ...@@ -75,9 +78,10 @@ bool TestRectify1()
/* call Rectify function */ /* call Rectify function */
_Rectify(xGPU, yGPU); _Rectify(xGPU, yGPU);
yUserGPU = Rectify(*xGPU);
/* check results */ /* check results */
gpuTest = yGPU->CheckData(answer, unitNum); gpuTest = yGPU->CheckData(answer, unitNum, 1e-4F) && yUserGPU.CheckData(answer, unitNum, 1e-4F);
/* destroy variables */ /* destroy variables */
delete x; delete x;
......
...@@ -71,6 +71,8 @@ bool TestReduceMax1() ...@@ -71,6 +71,8 @@ bool TestReduceMax1()
XTensor * s = NewTensor(sOrder, sDimSize); XTensor * s = NewTensor(sOrder, sDimSize);
XTensor * t1 = NewTensor(tOrder1, tDimSize1); XTensor * t1 = NewTensor(tOrder1, tDimSize1);
XTensor * t2 = NewTensor(tOrder2, tDimSize2); XTensor * t2 = NewTensor(tOrder2, tDimSize2);
XTensor tUser1;
XTensor tUser2;
/* initialize variables */ /* initialize variables */
s->SetData(sData, sUnitNum); s->SetData(sData, sUnitNum);
...@@ -80,9 +82,12 @@ bool TestReduceMax1() ...@@ -80,9 +82,12 @@ bool TestReduceMax1()
/* call ReduceMax function */ /* call ReduceMax function */
_ReduceMax(s, t1, 0); _ReduceMax(s, t1, 0);
_ReduceMax(s, t2, 1); _ReduceMax(s, t2, 1);
tUser1 = ReduceMax(*s, 0);
tUser2 = ReduceMax(*s, 1);
/* check results */ /* check results */
cpuTest = t1->CheckData(answer1, tUnitNum1) && t2->CheckData(answer2, tUnitNum2); cpuTest = t1->CheckData(answer1, tUnitNum1) && tUser1.CheckData(answer1, tUnitNum1)
&& t2->CheckData(answer2, tUnitNum2) && tUser2.CheckData(answer2, tUnitNum2);
#ifdef USE_CUDA #ifdef USE_CUDA
/* GPU test */ /* GPU test */
...@@ -92,6 +97,8 @@ bool TestReduceMax1() ...@@ -92,6 +97,8 @@ bool TestReduceMax1()
XTensor * sGPU = NewTensor(sOrder, sDimSize, X_FLOAT, 1.0F, 0); XTensor * sGPU = NewTensor(sOrder, sDimSize, X_FLOAT, 1.0F, 0);
XTensor * tGPU1 = NewTensor(tOrder1, tDimSize1, X_FLOAT, 1.0F, 0); XTensor * tGPU1 = NewTensor(tOrder1, tDimSize1, X_FLOAT, 1.0F, 0);
XTensor * tGPU2 = NewTensor(tOrder2, tDimSize2, X_FLOAT, 1.0F, 0); XTensor * tGPU2 = NewTensor(tOrder2, tDimSize2, X_FLOAT, 1.0F, 0);
XTensor tUserGPU1;
XTensor tUserGPU2;
/* initialize variables */ /* initialize variables */
sGPU->SetData(sData, sUnitNum); sGPU->SetData(sData, sUnitNum);
...@@ -101,9 +108,12 @@ bool TestReduceMax1() ...@@ -101,9 +108,12 @@ bool TestReduceMax1()
/* call ReduceMax function */ /* call ReduceMax function */
_ReduceMax(sGPU, tGPU1, 0); _ReduceMax(sGPU, tGPU1, 0);
_ReduceMax(sGPU, tGPU2, 1); _ReduceMax(sGPU, tGPU2, 1);
tUserGPU1 = ReduceMax(*sGPU, 0);
tUserGPU2 = ReduceMax(*sGPU, 1);
/* check results */ /* check results */
gpuTest = tGPU1->CheckData(answer1, tUnitNum1) && tGPU2->CheckData(answer2, tUnitNum2); gpuTest = tGPU1->CheckData(answer1, tUnitNum1) && tUserGPU1.CheckData(answer1, tUnitNum1)
&& tGPU2->CheckData(answer2, tUnitNum2) && tUserGPU2.CheckData(answer2, tUnitNum2);
/* destroy variables */ /* destroy variables */
delete s; delete s;
......
...@@ -66,6 +66,8 @@ bool TestReduceMean1() ...@@ -66,6 +66,8 @@ bool TestReduceMean1()
XTensor * s = NewTensor(sOrder, sDimSize); XTensor * s = NewTensor(sOrder, sDimSize);
XTensor * t1 = NewTensor(tOrder1, tDimSize1); XTensor * t1 = NewTensor(tOrder1, tDimSize1);
XTensor * t2 = NewTensor(tOrder2, tDimSize2); XTensor * t2 = NewTensor(tOrder2, tDimSize2);
XTensor tUser1;
XTensor tUser2;
/* initialize variables */ /* initialize variables */
s->SetData(sData, sUnitNum); s->SetData(sData, sUnitNum);
...@@ -75,9 +77,12 @@ bool TestReduceMean1() ...@@ -75,9 +77,12 @@ bool TestReduceMean1()
/* call ReduceMean function */ /* call ReduceMean function */
_ReduceMean(s, t1, 0); _ReduceMean(s, t1, 0);
_ReduceMean(s, t2, 1); _ReduceMean(s, t2, 1);
tUser1 = ReduceMean(*s, 0);
tUser2 = ReduceMean(*s, 1);
/* check results */ /* check results */
cpuTest = t1->CheckData(answer1, tUnitNum1) && t2->CheckData(answer2, tUnitNum2); cpuTest = t1->CheckData(answer1, tUnitNum1) && tUser1.CheckData(answer1, tUnitNum1)
&& t2->CheckData(answer2, tUnitNum2) && tUser2.CheckData(answer2, tUnitNum2);
#ifdef USE_CUDA #ifdef USE_CUDA
/* GPU test */ /* GPU test */
...@@ -87,6 +92,8 @@ bool TestReduceMean1() ...@@ -87,6 +92,8 @@ bool TestReduceMean1()
XTensor * sGPU = NewTensor(sOrder, sDimSize, X_FLOAT, 1.0F, 0); XTensor * sGPU = NewTensor(sOrder, sDimSize, X_FLOAT, 1.0F, 0);
XTensor * tGPU1 = NewTensor(tOrder1, tDimSize1, X_FLOAT, 1.0F, 0); XTensor * tGPU1 = NewTensor(tOrder1, tDimSize1, X_FLOAT, 1.0F, 0);
XTensor * tGPU2 = NewTensor(tOrder2, tDimSize2, X_FLOAT, 1.0F, 0); XTensor * tGPU2 = NewTensor(tOrder2, tDimSize2, X_FLOAT, 1.0F, 0);
XTensor tUserGPU1;
XTensor tUserGPU2;
/* Initialize variables */ /* Initialize variables */
sGPU->SetData(sData, sUnitNum); sGPU->SetData(sData, sUnitNum);
...@@ -96,9 +103,12 @@ bool TestReduceMean1() ...@@ -96,9 +103,12 @@ bool TestReduceMean1()
/* call ReduceMean function */ /* call ReduceMean function */
_ReduceMean(sGPU, tGPU1, 0); _ReduceMean(sGPU, tGPU1, 0);
_ReduceMean(sGPU, tGPU2, 1); _ReduceMean(sGPU, tGPU2, 1);
tUserGPU1 = ReduceMean(*sGPU, 0);
tUserGPU2 = ReduceMean(*sGPU, 1);
/* check results */ /* check results */
cpuTest = tGPU1->CheckData(answer1, tUnitNum1) && tGPU2->CheckData(answer2, tUnitNum2); gpuTest = tGPU1->CheckData(answer1, tUnitNum1) && tUserGPU1.CheckData(answer1, tUnitNum1)
&& tGPU2->CheckData(answer2, tUnitNum2) && tUserGPU2.CheckData(answer2, tUnitNum2);
/* destroy variables */ /* destroy variables */
delete s; delete s;
......
...@@ -69,20 +69,29 @@ bool TestReduceSum1() ...@@ -69,20 +69,29 @@ bool TestReduceSum1()
/* create tensors */ /* create tensors */
XTensor * s = NewTensor(sOrder, sDimSize); XTensor * s = NewTensor(sOrder, sDimSize);
XTensor * shift1 = NewTensor(tOrder1, tDimSize1);
XTensor * shift2 = NewTensor(tOrder2, tDimSize2);
XTensor * t1 = NewTensor(tOrder1, tDimSize1); XTensor * t1 = NewTensor(tOrder1, tDimSize1);
XTensor * t2 = NewTensor(tOrder2, tDimSize2); XTensor * t2 = NewTensor(tOrder2, tDimSize2);
XTensor tUser1;
XTensor tUser2;
/* initialize variables */ /* initialize variables */
s->SetData(sData, sUnitNum); s->SetData(sData, sUnitNum);
shift1->SetZeroAll();
shift2->SetZeroAll();
t1->SetZeroAll(); t1->SetZeroAll();
t2->SetZeroAll(); t2->SetZeroAll();
/* call ReduceSum function */ /* call ReduceSum function */
_ReduceSum(s, t1, 0); _ReduceSum(s, t1, 0);
_ReduceSum(s, t2, 1); _ReduceSum(s, t2, 1);
tUser1 = ReduceSum(*s, 0, *shift1);
tUser2 = ReduceSum(*s, 1, *shift2);
/* check results */ /* check results */
cpuTest = t1->CheckData(answer1, tUnitNum1) && t2->CheckData(answer2, tUnitNum2); cpuTest = t1->CheckData(answer1, tUnitNum1) && tUser1.CheckData(answer1, tUnitNum1)
&& t2->CheckData(answer2, tUnitNum2) && tUser2.CheckData(answer2, tUnitNum2);
#ifdef USE_CUDA #ifdef USE_CUDA
/* GPU test */ /* GPU test */
...@@ -90,26 +99,39 @@ bool TestReduceSum1() ...@@ -90,26 +99,39 @@ bool TestReduceSum1()
/* create tensors */ /* create tensors */
XTensor * sGPU = NewTensor(sOrder, sDimSize, X_FLOAT, 1.0F, 0); XTensor * sGPU = NewTensor(sOrder, sDimSize, X_FLOAT, 1.0F, 0);
XTensor * shiftGPU1 = NewTensor(tOrder1, tDimSize1, X_FLOAT, 1.0F, 0);
XTensor * shiftGPU2 = NewTensor(tOrder2, tDimSize2, X_FLOAT, 1.0F, 0);
XTensor * tGPU1 = NewTensor(tOrder1, tDimSize1, X_FLOAT, 1.0F, 0); XTensor * tGPU1 = NewTensor(tOrder1, tDimSize1, X_FLOAT, 1.0F, 0);
XTensor * tGPU2 = NewTensor(tOrder2, tDimSize2, X_FLOAT, 1.0F, 0); XTensor * tGPU2 = NewTensor(tOrder2, tDimSize2, X_FLOAT, 1.0F, 0);
XTensor tUserGPU1;
XTensor tUserGPU2;
/* initialize variables */ /* initialize variables */
sGPU->SetData(sData, sUnitNum); sGPU->SetData(sData, sUnitNum);
shiftGPU1->SetZeroAll();
shiftGPU2->SetZeroAll();
tGPU1->SetZeroAll(); tGPU1->SetZeroAll();
tGPU2->SetZeroAll(); tGPU2->SetZeroAll();
/* call ReduceSum function */ /* call ReduceSum function */
_ReduceSum(sGPU, tGPU1, 0); _ReduceSum(sGPU, tGPU1, 0);
_ReduceSum(sGPU, tGPU2, 1); _ReduceSum(sGPU, tGPU2, 1);
tUserGPU1 = ReduceSum(*sGPU, 0, *shiftGPU1);
tUserGPU2 = ReduceSum(*sGPU, 1, *shiftGPU2);
/* check results */ /* check results */
cpuTest = tGPU1->CheckData(answer1, tUnitNum1) && tGPU2->CheckData(answer2, tUnitNum2); gpuTest = tGPU1->CheckData(answer1, tUnitNum1) && tUserGPU1.CheckData(answer1, tUnitNum1)
&& tGPU2->CheckData(answer2, tUnitNum2) && tUserGPU2.CheckData(answer2, tUnitNum2);
/* destroy variables */ /* destroy variables */
delete s; delete s;
delete shift1;
delete shift2;
delete t1; delete t1;
delete t2; delete t2;
delete sGPU; delete sGPU;
delete shiftGPU1;
delete shiftGPU2;
delete tGPU1; delete tGPU1;
delete tGPU2; delete tGPU2;
delete[] sDimSize; delete[] sDimSize;
...@@ -120,6 +142,8 @@ bool TestReduceSum1() ...@@ -120,6 +142,8 @@ bool TestReduceSum1()
#else #else
/* destroy variables */ /* destroy variables */
delete s; delete s;
delete shift1;
delete shift2;
delete t1; delete t1;
delete t2; delete t2;
delete[] sDimSize; delete[] sDimSize;
......
...@@ -70,6 +70,7 @@ bool TestReduceSumSquared1() ...@@ -70,6 +70,7 @@ bool TestReduceSumSquared1()
XTensor * s = NewTensor(sOrder, sDimSize); XTensor * s = NewTensor(sOrder, sDimSize);
XTensor * t = NewTensor(tOrder, tDimSize); XTensor * t = NewTensor(tOrder, tDimSize);
XTensor * shift = NewTensor(shiftOrder, shiftDimSize); XTensor * shift = NewTensor(shiftOrder, shiftDimSize);
XTensor tUser;
/* initialize variables */ /* initialize variables */
s->SetData(sData, sUnitNum); s->SetData(sData, sUnitNum);
...@@ -78,9 +79,10 @@ bool TestReduceSumSquared1() ...@@ -78,9 +79,10 @@ bool TestReduceSumSquared1()
/* call ReduceSumSquared function */ /* call ReduceSumSquared function */
_ReduceSumSquared(s, t, 0, shift); _ReduceSumSquared(s, t, 0, shift);
tUser = ReduceSumSquared(*s, 0, *shift);
/* check results */ /* check results */
cpuTest = t->CheckData(answer, tUnitNum); cpuTest = t->CheckData(answer, tUnitNum) && tUser.CheckData(answer, tUnitNum);
#ifdef USE_CUDA #ifdef USE_CUDA
/* GPU test */ /* GPU test */
...@@ -90,6 +92,7 @@ bool TestReduceSumSquared1() ...@@ -90,6 +92,7 @@ bool TestReduceSumSquared1()
XTensor * sGPU = NewTensor(sOrder, sDimSize, X_FLOAT, 1.0F, 0); XTensor * sGPU = NewTensor(sOrder, sDimSize, X_FLOAT, 1.0F, 0);
XTensor * tGPU = NewTensor(tOrder, tDimSize, X_FLOAT, 1.0F, 0); XTensor * tGPU = NewTensor(tOrder, tDimSize, X_FLOAT, 1.0F, 0);
XTensor * shiftGPU = NewTensor(shiftOrder, shiftDimSize, X_FLOAT, 1.0F, 0); XTensor * shiftGPU = NewTensor(shiftOrder, shiftDimSize, X_FLOAT, 1.0F, 0);
XTensor tUserGPU;
/* initialize variables */ /* initialize variables */
sGPU->SetData(sData, sUnitNum); sGPU->SetData(sData, sUnitNum);
...@@ -98,9 +101,10 @@ bool TestReduceSumSquared1() ...@@ -98,9 +101,10 @@ bool TestReduceSumSquared1()
/* call ReduceSumSquared function */ /* call ReduceSumSquared function */
_ReduceSumSquared(sGPU, tGPU, 0, shiftGPU); _ReduceSumSquared(sGPU, tGPU, 0, shiftGPU);
tUserGPU = ReduceSumSquared(*sGPU, 0, *shiftGPU);
/* check results */ /* check results */
gpuTest = tGPU->CheckData(answer, tUnitNum); gpuTest = tGPU->CheckData(answer, tUnitNum) && tUserGPU.CheckData(answer, tUnitNum);
/* destroy variables */ /* destroy variables */
delete s; delete s;
...@@ -174,6 +178,7 @@ bool TestReduceSumSquared2() ...@@ -174,6 +178,7 @@ bool TestReduceSumSquared2()
XTensor * s = NewTensor(sOrder, sDimSize); XTensor * s = NewTensor(sOrder, sDimSize);
XTensor * t = NewTensor(tOrder, tDimSize); XTensor * t = NewTensor(tOrder, tDimSize);
XTensor * shift = NewTensor(shiftOrder, shiftDimSize); XTensor * shift = NewTensor(shiftOrder, shiftDimSize);
XTensor tUser;
/* initialize variables */ /* initialize variables */
s->SetData(sData, sUnitNum); s->SetData(sData, sUnitNum);
...@@ -182,9 +187,10 @@ bool TestReduceSumSquared2() ...@@ -182,9 +187,10 @@ bool TestReduceSumSquared2()
/* call ReduceSumSquared function */ /* call ReduceSumSquared function */
_ReduceSumSquared(s, t, 1, shift); _ReduceSumSquared(s, t, 1, shift);
tUser = ReduceSumSquared(*s, 1, *shift);
/* check results */ /* check results */
cpuTest = t->CheckData(answer, tUnitNum); cpuTest = t->CheckData(answer, tUnitNum) && tUser.CheckData(answer, tUnitNum);
#ifdef USE_CUDA #ifdef USE_CUDA
/* GPU test */ /* GPU test */
...@@ -194,6 +200,7 @@ bool TestReduceSumSquared2() ...@@ -194,6 +200,7 @@ bool TestReduceSumSquared2()
XTensor * sGPU = NewTensor(sOrder, sDimSize, X_FLOAT, 1.0F, 0); XTensor * sGPU = NewTensor(sOrder, sDimSize, X_FLOAT, 1.0F, 0);
XTensor * tGPU = NewTensor(tOrder, tDimSize, X_FLOAT, 1.0F, 0); XTensor * tGPU = NewTensor(tOrder, tDimSize, X_FLOAT, 1.0F, 0);
XTensor * shiftGPU = NewTensor(shiftOrder, shiftDimSize, X_FLOAT, 1.0F, 0); XTensor * shiftGPU = NewTensor(shiftOrder, shiftDimSize, X_FLOAT, 1.0F, 0);
XTensor tUserGPU;
/* initialize variables */ /* initialize variables */
sGPU->SetData(sData, sUnitNum); sGPU->SetData(sData, sUnitNum);
...@@ -202,9 +209,10 @@ bool TestReduceSumSquared2() ...@@ -202,9 +209,10 @@ bool TestReduceSumSquared2()
/* call ReduceSumSquared function */ /* call ReduceSumSquared function */
_ReduceSumSquared(sGPU, tGPU, 1, shiftGPU); _ReduceSumSquared(sGPU, tGPU, 1, shiftGPU);
tUserGPU = ReduceSumSquared(*sGPU, 1, *shiftGPU);
/* check results */ /* check results */
gpuTest = tGPU->CheckData(answer, tUnitNum); gpuTest = tGPU->CheckData(answer, tUnitNum) && tUserGPU.CheckData(answer, tUnitNum);
/* destroy variables */ /* destroy variables */
delete s; delete s;
......
...@@ -70,6 +70,7 @@ bool TestReduceVariance1() ...@@ -70,6 +70,7 @@ bool TestReduceVariance1()
XTensor * s = NewTensor(sOrder, sDimSize); XTensor * s = NewTensor(sOrder, sDimSize);
XTensor * t = NewTensor(tOrder, tDimSize); XTensor * t = NewTensor(tOrder, tDimSize);
XTensor * mean = NewTensor(meanOrder, meanDimSize); XTensor * mean = NewTensor(meanOrder, meanDimSize);
XTensor tUser;
/* initialize variables */ /* initialize variables */
s->SetData(sData, sUnitNum); s->SetData(sData, sUnitNum);
...@@ -78,9 +79,10 @@ bool TestReduceVariance1() ...@@ -78,9 +79,10 @@ bool TestReduceVariance1()
/* call ReduceVariance function */ /* call ReduceVariance function */
_ReduceVariance(s, t, 0, mean); _ReduceVariance(s, t, 0, mean);
tUser = ReduceVariance(*s, 0, *mean);
/* check results */ /* check results */
cpuTest = t->CheckData(answer, tUnitNum); cpuTest = t->CheckData(answer, tUnitNum) && tUser.CheckData(answer, tUnitNum);
#ifdef USE_CUDA #ifdef USE_CUDA
/* GPU test */ /* GPU test */
...@@ -90,6 +92,7 @@ bool TestReduceVariance1() ...@@ -90,6 +92,7 @@ bool TestReduceVariance1()
XTensor * sGPU = NewTensor(sOrder, sDimSize, X_FLOAT, 1.0F, 0); XTensor * sGPU = NewTensor(sOrder, sDimSize, X_FLOAT, 1.0F, 0);
XTensor * tGPU = NewTensor(tOrder, tDimSize, X_FLOAT, 1.0F, 0); XTensor * tGPU = NewTensor(tOrder, tDimSize, X_FLOAT, 1.0F, 0);
XTensor * meanGPU = NewTensor(meanOrder, meanDimSize, X_FLOAT, 1.0F, 0); XTensor * meanGPU = NewTensor(meanOrder, meanDimSize, X_FLOAT, 1.0F, 0);
XTensor tUserGPU;
/* initialize variables */ /* initialize variables */
sGPU->SetData(sData, sUnitNum); sGPU->SetData(sData, sUnitNum);
...@@ -98,9 +101,10 @@ bool TestReduceVariance1() ...@@ -98,9 +101,10 @@ bool TestReduceVariance1()
/* call ReduceVariance function */ /* call ReduceVariance function */
_ReduceVariance(sGPU, tGPU, 0, meanGPU); _ReduceVariance(sGPU, tGPU, 0, meanGPU);
tUserGPU = ReduceVariance(*sGPU, 0, *meanGPU);
/* check results */ /* check results */
gpuTest = t->CheckData(answer, tUnitNum); gpuTest = tGPU->CheckData(answer, tUnitNum) && tUserGPU.CheckData(answer, tUnitNum);
/* destroy variables */ /* destroy variables */
delete s; delete s;
......
...@@ -48,6 +48,7 @@ bool TestSigmoid1() ...@@ -48,6 +48,7 @@ bool TestSigmoid1()
/* create tensors */ /* create tensors */
XTensor * x = NewTensor(order, dimSize); XTensor * x = NewTensor(order, dimSize);
XTensor * y = NewTensor(order, dimSize); XTensor * y = NewTensor(order, dimSize);
XTensor yUser;
/* initialize variables */ /* initialize variables */
x->SetData(xData, unitNum); x->SetData(xData, unitNum);
...@@ -55,9 +56,10 @@ bool TestSigmoid1() ...@@ -55,9 +56,10 @@ bool TestSigmoid1()
/* call Sigmoid function */ /* call Sigmoid function */
_Sigmoid(x, y); _Sigmoid(x, y);
yUser = Sigmoid(*x);
/* check result */ /* check result */
cpuTest = y->CheckData(answer, unitNum, 1e-4F); cpuTest = y->CheckData(answer, unitNum, 1e-4F) && yUser.CheckData(answer, unitNum, 1e-4F);
#ifdef USE_CUDA #ifdef USE_CUDA
/* GPU test */ /* GPU test */
...@@ -66,6 +68,7 @@ bool TestSigmoid1() ...@@ -66,6 +68,7 @@ bool TestSigmoid1()
/* create tensors */ /* create tensors */
XTensor * xGPU = NewTensor(order, dimSize, X_FLOAT, 1.0F, 0); XTensor * xGPU = NewTensor(order, dimSize, X_FLOAT, 1.0F, 0);
XTensor * yGPU = NewTensor(order, dimSize, X_FLOAT, 1.0F, 0); XTensor * yGPU = NewTensor(order, dimSize, X_FLOAT, 1.0F, 0);
XTensor yUserGPU;
/* initialize variables */ /* initialize variables */
xGPU->SetData(xData, unitNum); xGPU->SetData(xData, unitNum);
...@@ -73,9 +76,10 @@ bool TestSigmoid1() ...@@ -73,9 +76,10 @@ bool TestSigmoid1()
/* call Sigmoid function */ /* call Sigmoid function */
_Sigmoid(xGPU, yGPU); _Sigmoid(xGPU, yGPU);
yUserGPU = Sigmoid(*xGPU);
/* check result */ /* check result */
gpuTest = yGPU->CheckData(answer, unitNum, 1e-4F); gpuTest = yGPU->CheckData(answer, unitNum, 1e-4F) && yUserGPU.CheckData(answer, unitNum, 1e-4F);
/* destroy variables */ /* destroy variables */
delete x; delete x;
......
Markdown 格式
0%
您添加了 0 到此讨论。请谨慎行事。
请先完成此评论的编辑!
注册 或者 后发表评论