Commit aa875fba by linye

1. implement SetData by template 2. update float16 datatype of SetData

parent 9f14dc72
...@@ -399,8 +399,8 @@ void xcTest() ...@@ -399,8 +399,8 @@ void xcTest()
InitTensor2D(&t2, 2, 4, X_FLOAT, 0, NULL); InitTensor2D(&t2, 2, 4, X_FLOAT, 0, NULL);
XTensor tensor; XTensor tensor;
_SetDataFixedFloat(&t1, 1.0F); _SetDataFixed(&t1, 1.0F);
_SetDataFixedFloat(&t2, 2.0F); _SetDataFixed(&t2, 2.0F);
tensor = t1 + t2; tensor = t1 + t2;
......
...@@ -52,15 +52,7 @@ void XLossGrad::MakeGrad(XTensor * node, bool isEfficient) ...@@ -52,15 +52,7 @@ void XLossGrad::MakeGrad(XTensor * node, bool isEfficient)
XTensor * dedy = output->grad; XTensor * dedy = output->grad;
if (income.tailNum == 1) { if (income.tailNum == 1) {
if(dedy->dataType == X_FLOAT) _SetDataFixed(dedy, 1.0F);
_SetDataFixedFloat(dedy, 1.0F);
else if(dedy->dataType == X_DOUBLE)
_SetDataFixedDouble(dedy, 1.0);
else if(dedy->dataType == X_INT)
_SetDataFixedInt(dedy, 1);
else
ShowNTErrors("TODO");
return; return;
} }
...@@ -144,15 +136,7 @@ void XLossGrad::Compute(XTensor * gold, XTensor * y, ...@@ -144,15 +136,7 @@ void XLossGrad::Compute(XTensor * gold, XTensor * y,
LOSS_FUNCTION_NAME lossName) LOSS_FUNCTION_NAME lossName)
{ {
if(gold == NULL){ if(gold == NULL){
if(dedy->dataType == X_FLOAT) _SetDataFixed(dedy, 1.0F);
_SetDataFixedFloat(dedy, 1.0F);
else if(dedy->dataType == X_DOUBLE)
_SetDataFixedDouble(dedy, 1.0);
else if(dedy->dataType == X_INT)
_SetDataFixedInt(dedy, 1);
else{
ShowNTErrors("TODO");
}
return; return;
} }
......
...@@ -25,6 +25,7 @@ ...@@ -25,6 +25,7 @@
#include "SetData.cuh" #include "SetData.cuh"
#include "../../XUtility.h" #include "../../XUtility.h"
#include "../movement/CopyValues.h" #include "../movement/CopyValues.h"
#include "ConvertDataType.h"
#if !defined( WIN32 ) && !defined( _WIN32 ) #if !defined( WIN32 ) && !defined( _WIN32 )
#include "sys/time.h" #include "sys/time.h"
...@@ -77,153 +78,78 @@ void _SetDataFanInOut(XTensor * tensor, DTYPE gain) ...@@ -77,153 +78,78 @@ void _SetDataFanInOut(XTensor * tensor, DTYPE gain)
} }
/* /*
generate data items with a fixed value p generate data items with a fixed value
>> tensor - the tensor whose data array would be initialized >> tensor - the tensor whose data array would be initialized
>> p - pointer to the number for initializing the tensor >> value - pointer to the number for initializing the tensor
*/ */
void _SetDataFixed(XTensor * tensor, void * valuePointer) template<class T>
void _SetDataFixed(XTensor * tensor, T value)
{ {
#ifdef USE_CUDA
if (tensor->devID >= 0) {
_CudaSetDataFixed(tensor, value);
return;
}
#endif
int num = tensor->unitNum; int num = tensor->unitNum;
if(tensor->dataType == X_INT){ if (tensor->dataType == X_INT) {
int p = *(int*)valuePointer; int * d = (int*)tensor->data;
if(tensor->devID < 0){ int v = (int)value;
int * d = (int*)tensor->data; if (num % 4 == 0) {
if(num % 4 == 0){ for (int i = 0; i < num; i += 4) {
for(int i = 0; i < num; i += 4){ d[i] = v;
d[i] = p; d[i + 1] = v;
d[i + 1] = p; d[i + 2] = v;
d[i + 2] = p; d[i + 3] = v;
d[i + 3] = p;
}
}
else{
for(int i = 0; i < num; i++)
d[i] = p;
} }
} }
else{ else {
#ifdef USE_CUDA for (int i = 0; i < num; i++)
_CudaSetDataFixedInt(tensor, p); d[i] = v;
#endif
} }
} }
else if(tensor->dataType == X_FLOAT){ else if (tensor->dataType == X_FLOAT) {
float p = *(float*)valuePointer; float * d = (float*)tensor->data;
if(tensor->devID < 0){ float v = (float)value;
float * d = (float*)tensor->data; if (num % 4 == 0) {
if(num % 4 == 0){ for (int i = 0; i < num; i += 4) {
for(int i = 0; i < num; i += 4){ d[i] = v;
d[i] = p; d[i + 1] = v;
d[i + 1] = p; d[i + 2] = v;
d[i + 2] = p; d[i + 3] = v;
d[i + 3] = p;
}
}
else{
for(int i = 0; i < num; i++)
d[i] = p;
} }
} }
else{ else {
#ifdef USE_CUDA for (int i = 0; i < num; i++)
_CudaSetDataFixedFloat(tensor, p); d[i] = v;
#endif
} }
} }
else if(tensor->dataType == X_DOUBLE){ else if (tensor->dataType == X_DOUBLE) {
double p = *(double*)valuePointer; double * d = (double*)tensor->data;
if(tensor->devID < 0){ double v = (double)value;
double * d = (double*)tensor->data;
if(num % 4 == 0){ if (num % 4 == 0) {
for(int i = 0; i < num; i += 4){ for (int i = 0; i < num; i += 4) {
d[i] = p; d[i] = v;
d[i + 1] = p; d[i + 1] = v;
d[i + 2] = p; d[i + 2] = v;
d[i + 3] = p; d[i + 3] = v;
}
}
else{
for(int i = 0; i < num; i++)
d[i] = p;
} }
} }
else{ else {
#ifdef USE_CUDA for (int i = 0; i < num; i++)
_CudaSetDataFixedDouble(tensor, p); d[i] = v;
#endif
} }
} }
else{
ShowNTErrors("TODO");
}
}
/*
generate data items with a fixed value p (in default type)
>> tensor - the tensor whose data array would be initialized
>> p - number in default type
*/
void SetDataFixed(XTensor &tensor, DTYPE p)
{
_SetDataFixed(&tensor, &p);
}
/*
generate data items with a fixed value p (in integer)
>> tensor - the tensor whose data array would be initialized
>> p - an integer
*/
void SetDataFixedInt(XTensor &tensor, int p)
{
CheckNTErrors(tensor.dataType == X_INT, "An integer tensor is required!");
_SetDataFixed(&tensor, &p);
}
/*
generate data items with a fixed value p (in integer)
>> tensor - the tensor whose data array would be initialized
>> p - an int-valued number
*/
void _SetDataFixedInt(XTensor * tensor, int p)
{
CheckNTErrors(tensor->dataType == X_INT, "the tensor must be in X_INT!");
if(p == 0)
tensor->SetZeroAll();
else else
_SetDataFixed(tensor, &p); ShowNTErrors("TODO");
}
/*
generate data items with a fixed value p (in float)
>> tensor - the tensor whose data array would be initialized
>> p - a float-valued number
*/
void _SetDataFixedFloat(XTensor * tensor, float p)
{
CheckNTErrors(tensor->dataType == X_FLOAT, "the tensor must be in X_FLOAT!");
if(p == 0)
tensor->SetZeroAll();
else
_SetDataFixed(tensor, &p);
} }
/* template void _SetDataFixed<int>(XTensor*, int);
generate data items with a fixed value p (in double) template void _SetDataFixed<float>(XTensor*, float);
>> tensor - the tensor whose data array would be initialized template void _SetDataFixed<double>(XTensor*, double);
>> p - a double-valued number
*/
void _SetDataFixedDouble(XTensor * tensor, double p)
{
CheckNTErrors(tensor->dataType == X_DOUBLE, "the tensor must be in X_DOUBLE!");
if(p == 0)
tensor->SetZeroAll();
else
_SetDataFixed(tensor, &p);
}
/* /*
set data items along with a given dimension (and keep the remaining items unchanged) set data items along with a given dimension (and keep the remaining items unchanged)
...@@ -396,7 +322,7 @@ generate data items with a uniform distribution in [lower, upper] ...@@ -396,7 +322,7 @@ generate data items with a uniform distribution in [lower, upper]
>> lower - lower value of the range >> lower - lower value of the range
>> upper - upper value of the range >> upper - upper value of the range
*/ */
void _SetDataRand(const XTensor * tensor, DTYPE lower, DTYPE upper) void _SetDataRand(XTensor * tensor, DTYPE lower, DTYPE upper)
{ {
CheckNTErrors(upper > lower, "the high value must be greater than low value!"); CheckNTErrors(upper > lower, "the high value must be greater than low value!");
...@@ -433,10 +359,6 @@ void _SetDataRand(const XTensor * tensor, DTYPE lower, DTYPE upper) ...@@ -433,10 +359,6 @@ void _SetDataRand(const XTensor * tensor, DTYPE lower, DTYPE upper)
#ifdef USE_CUDA #ifdef USE_CUDA
_CudaSetDataRand(tensor, lower, upper); _CudaSetDataRand(tensor, lower, upper);
#endif #endif
//XTensor * t2 = NewTensor(tensor->order, tensor->dimSize, tensor->dataType, tensor->denseRatio, -1);
//_SetDataRand(t2, low, high);
//_CopyValues(t2, tensor);
//delete t2;
} }
} }
...@@ -449,10 +371,8 @@ the item to a pre-defined value if the item >= p, set the item to 0 otherwise ...@@ -449,10 +371,8 @@ the item to a pre-defined value if the item >= p, set the item to 0 otherwise
>> p - the threshold >> p - the threshold
>> value - the value we intend to assign to the item >> value - the value we intend to assign to the item
*/ */
void _SetDataRandP(const XTensor * tensor, DTYPE lower, DTYPE upper, DTYPE p, DTYPE value) void _SetDataRandP(XTensor * tensor, DTYPE lower, DTYPE upper, DTYPE p, DTYPE value)
{ {
CheckNTErrors(tensor->dataType == DEFAULT_DTYPE, "TODO");
if (tensor->devID < 0) { if (tensor->devID < 0) {
_SetDataRand(tensor, lower, upper); _SetDataRand(tensor, lower, upper);
......
...@@ -19,6 +19,7 @@ ...@@ -19,6 +19,7 @@
/* /*
* $Created by: XIAO Tong (email: xiaotong@mail.neu.edu.cn) 2018-07-18 * $Created by: XIAO Tong (email: xiaotong@mail.neu.edu.cn) 2018-07-18
* I'm surprised that I did not write this file till today. * I'm surprised that I did not write this file till today.
* $Update by: Lin Ye (email: linye2015@outlook.com) 2019-07-22 float16 added
*/ */
#include <curand.h> #include <curand.h>
...@@ -27,17 +28,19 @@ ...@@ -27,17 +28,19 @@
#include <curand_kernel.h> #include <curand_kernel.h>
#include "../../XDevice.h" #include "../../XDevice.h"
#include "../../XUtility.h" #include "../../XUtility.h"
#include "ConvertDataType.h"
namespace nts { // namespace nts(NiuTrans.Tensor) namespace nts { // namespace nts(NiuTrans.Tensor)
/* /*
set an integer data array with a fixed value p (in int) set an data array with a fixed value p (in int, float, float16, double)
>> d - pointer to the data array >> d - pointer to the data array
>> size - size of the array >> size - size of the array
>> p - the initial value >> p - the initial value
*/ */
__global__ template<class T>
void KernelSetDataFixedInt(int * d, int size, int p) __global__
void KernelSetDataFixed(T * d, int size, T p)
{ {
int i = blockDim.x * blockIdx.x + threadIdx.x; int i = blockDim.x * blockIdx.x + threadIdx.x;
...@@ -45,15 +48,14 @@ void KernelSetDataFixedInt(int * d, int size, int p) ...@@ -45,15 +48,14 @@ void KernelSetDataFixedInt(int * d, int size, int p)
d[i] = p; d[i] = p;
} }
/* /*
generate data items with a fixed value p (in int) generate data items with a fixed value p (in int, float, float16, double)
>> tensor - the tensor for initialization >> tensor - the tensor for initialization
>> p - the initial value >> p - the initial value
*/ */
void _CudaSetDataFixedInt(XTensor * tensor, int p) template<class T>
void _CudaSetDataFixed(XTensor * tensor, T p)
{ {
CheckNTErrors(tensor->dataType == X_INT, "the tensor must be in X_INT!");
int gridSize[3]; int gridSize[3];
int blockSize[3]; int blockSize[3];
...@@ -65,34 +67,59 @@ void _CudaSetDataFixedInt(XTensor * tensor, int p) ...@@ -65,34 +67,59 @@ void _CudaSetDataFixedInt(XTensor * tensor, int p)
int devIDBackup; int devIDBackup;
ProtectCudaDev(tensor->devID, devIDBackup); ProtectCudaDev(tensor->devID, devIDBackup);
KernelSetDataFixedInt <<<blocks, threads >>>((int*)tensor->data, tensor->unitNum, p); if (tensor->dataType == X_INT){
KernelSetDataFixed<<<blocks, threads>>>((int*)tensor->data, tensor->unitNum, (int)p);
}
else if (tensor->dataType == X_FLOAT){
KernelSetDataFixed<<<blocks, threads>>>((DTYPE*)tensor->data, tensor->unitNum, (float)p);
}
else if (tensor->dataType == X_DOUBLE){
KernelSetDataFixed<<<blocks, threads>>>((double*)tensor->data, tensor->unitNum, (double)p);
}
else if (tensor->dataType == X_FLOAT16){
half p1 = __float2half(p);
KernelSetDataFixed<<<blocks, threads>>>((__half*)tensor->data, tensor->unitNum, p1);
}
else
ShowNTErrors("TODO");
BacktoCudaDev(tensor->devID, devIDBackup); BacktoCudaDev(tensor->devID, devIDBackup);
} }
/* template void _CudaSetDataFixed<int>(XTensor*, int);
set a float data array with a fixed value p (in int) template void _CudaSetDataFixed<float>(XTensor*, float);
>> d - pointer to the data array template void _CudaSetDataFixed<double>(XTensor*, double);
//__device__
//template void _CudaSetDataFixed<half>(XTensor*, half);
/*
set data array with a uniform distribution in [low, high]
>> deviceStates - the state of curand
>> d - float, float16, double datatype pointer to the data array
>> size - size of the array >> size - size of the array
>> p - the initial value >> lower - low value of the range
>> variance - the variance of the range
*/ */
__global__ template<class T>
void KernelSetDataFixedFloat(float * d, int size, float p) __global__
void KernelSetDataRand(T * d, int size, T lower, T variance)
{ {
int i = blockDim.x * blockIdx.x + threadIdx.x; int i = blockDim.x * blockIdx.x + threadIdx.x;
if (i < size) if (i < size) {
d[i] = p; d[i] = d[i] * variance + lower;
}
} }
/* /*
generate data items with a fixed value p (in float) generate data items with a uniform distribution in [lower, upper]
>> tensor - the tensor for initialization >> tensor - the tensor whose data array would be initialized
>> p - the initial value >> lower - lower value of the range
>> upper - upper value of the range
*/ */
void _CudaSetDataFixedFloat(XTensor * tensor, float p) void _CudaSetDataRand(XTensor * tensor, DTYPE lower, DTYPE upper)
{ {
CheckNTErrors(tensor->dataType == X_FLOAT, "the tensor must be in X_FLOAT!"); CheckNTErrors(upper > lower, "the high value must be greater than low value!");
int gridSize[3]; int gridSize[3];
int blockSize[3]; int blockSize[3];
...@@ -105,34 +132,69 @@ void _CudaSetDataFixedFloat(XTensor * tensor, float p) ...@@ -105,34 +132,69 @@ void _CudaSetDataFixedFloat(XTensor * tensor, float p)
int devIDBackup; int devIDBackup;
ProtectCudaDev(tensor->devID, devIDBackup); ProtectCudaDev(tensor->devID, devIDBackup);
KernelSetDataFixedFloat <<<blocks, threads >>>((float*)tensor->data, tensor->unitNum, p); XTensor tensor1(tensor->order, tensor->dimSize, X_FLOAT, tensor->denseRatio, tensor->devID, tensor->mem);
if (tensor->dataType == X_FLOAT || tensor->dataType == X_DOUBLE){
curandGenerator_t & gen = GDevs.GPUs[tensor->devID].gen;
curandGenerateUniform(gen, (float*)tensor->data, tensor->unitNum);
}
else {
curandGenerator_t & gen = GDevs.GPUs[tensor->devID].gen;
curandGenerateUniform(gen, (float*)tensor1.data, tensor1.unitNum);
}
DTYPE variance = upper - lower;
if (tensor->dataType == X_FLOAT){
KernelSetDataRand<<<blocks, threads>>>((DTYPE*)tensor->data, tensor->unitNum, lower, variance);
}
else if (tensor->dataType == X_FLOAT16){
_ConvertDataType(&tensor1, tensor);
half lower1 = __float2half(lower);
half variance1 = __float2half(variance);
KernelSetDataRand<<<blocks, threads>>>((__half*)tensor->data, tensor->unitNum, lower1, variance1);
}
else {
ShowNTErrors("TODO");
}
BacktoCudaDev(tensor->devID, devIDBackup); BacktoCudaDev(tensor->devID, devIDBackup);
} }
/* /*
set a double data array with a fixed value p (in int) set data items to a pre-defined value if its value >= p, set it to 0 otherwise
>> d - pointer to the data array >> d - pointer to the data array
>> size - size of the array >> size - size of the array
>> p - the initial value >> lower - low value of the range
>> variance - the variance of the range
*/ */
__global__ template<class T>
void KernelSetDataFixedDouble(double * d, int size, double p) __global__
void KernelSetDataPCut(T * d, int size, T p, T value)
{ {
int i = blockDim.x * blockIdx.x + threadIdx.x; int i = blockDim.x * blockIdx.x + threadIdx.x;
if (i < size) if (i < size) {
d[i] = p; if (d[i] >= p)
d[i] = value;
else
d[i] = 0;
}
} }
/* /*
generate data items with a fixed value p (in double) generate data items with a uniform distribution in [lower, upper] and set
>> tensor - the tensor for initialization the item to a pre-defined value if the item >= p, set the item to 0 otherwise
>> p - the initial value >> tensor - the tensor whose data array would be initialized
>> lower - lower value of the range
>> upper - upper value of the range
>> p - the threshold
>> value - the value we intend to assign to the item
*/ */
void _CudaSetDataFixedDouble(XTensor * tensor, double p) void _CudaSetDataRandP(XTensor * tensor, DTYPE lower, DTYPE upper, DTYPE p, DTYPE value)
{ {
CheckNTErrors(tensor->dataType == X_DOUBLE, "the tensor must be in X_DOUBLE!"); _CudaSetDataRand(tensor, lower, upper);
int gridSize[3]; int gridSize[3];
int blockSize[3]; int blockSize[3];
...@@ -145,67 +207,19 @@ void _CudaSetDataFixedDouble(XTensor * tensor, double p) ...@@ -145,67 +207,19 @@ void _CudaSetDataFixedDouble(XTensor * tensor, double p)
int devIDBackup; int devIDBackup;
ProtectCudaDev(tensor->devID, devIDBackup); ProtectCudaDev(tensor->devID, devIDBackup);
KernelSetDataFixedDouble <<<blocks, threads >>>((double*)tensor->data, tensor->unitNum, p); if (tensor->dataType == X_FLOAT) {
KernelSetDataPCut<<<blocks, threads>>>((DTYPE*)tensor->data, tensor->unitNum, p, value);
BacktoCudaDev(tensor->devID, devIDBackup);
}
/*
set data array with a uniform distribution in [low, high]
>> deviceStates - the state of curand
>> d - float datatype pointer to the data array
>> size - size of the array
>> lower - low value of the range
>> variance - the variance of the range
*/
__global__
void KernelSetDataRandFloat(float * d, int size, DTYPE lower, DTYPE variance)
{
int i = blockDim.x * blockIdx.x + threadIdx.x;
if (i < size) {
d[i] = d[i] * variance + lower;
} }
} else if (tensor->dataType == X_FLOAT16) {
/* half p1 = __float2half(p);
set data array with a uniform distribution in [low, high] half value1 = __float2half(value);
>> deviceStates - the state of curand KernelSetDataPCut<<<blocks, threads>>>((__half*)tensor->data, tensor->unitNum, p1, value1);
>> d - double datatype pointer to the data array
>> size - size of the array
>> lower - low value of the range
>> variance - the variance of the range
*/
__global__
void KernelSetDataRandDouble(double * d, int size, DTYPE lower, DTYPE variance)
{
int i = blockDim.x * blockIdx.x + threadIdx.x;
if (i < size){
d[i] = d[i] * variance + lower;
} }
}
/* BacktoCudaDev(tensor->devID, devIDBackup);
set data items to a pre-defined value if its value >= p, set it to 0 otherwise
>> d - pointer to the data array
>> size - size of the array
>> lower - low value of the range
>> variance - the variance of the range
*/
__global__
void KernelSetDataPCut(DTYPE * d, int size, DTYPE p, DTYPE value)
{
int i = blockDim.x * blockIdx.x + threadIdx.x;
if (i < size) {
if (d[i] >= p)
d[i] = value;
else
d[i] = 0;
}
} }
/* /*
set data items along with a given dimension (and keep the remaining items unchanged) - kernel version set data items along with a given dimension (and keep the remaining items unchanged) - kernel version
>> tensor - the tensor whose data array would be initialized >> tensor - the tensor whose data array would be initialized
>> beg - the beginning position >> beg - the beginning position
...@@ -213,8 +227,9 @@ set data items along with a given dimension (and keep the remaining items unchan ...@@ -213,8 +227,9 @@ set data items along with a given dimension (and keep the remaining items unchan
>> blockSize - size of a data block >> blockSize - size of a data block
>> blockNum - number of data blocks >> blockNum - number of data blocks
*/ */
template<class T>
__global__ __global__
void KernelSetDataDim(DTYPE * d, int beg, int len, int blockSize, int blockNum, DTYPE p) void KernelSetDataDim(T * d, int beg, int len, int blockSize, int blockNum, T p)
{ {
/* offset in each block */ /* offset in each block */
int i = blockDim.x * blockIdx.x + threadIdx.x; int i = blockDim.x * blockIdx.x + threadIdx.x;
...@@ -222,10 +237,10 @@ void KernelSetDataDim(DTYPE * d, int beg, int len, int blockSize, int blockNum, ...@@ -222,10 +237,10 @@ void KernelSetDataDim(DTYPE * d, int beg, int len, int blockSize, int blockNum,
/* block id */ /* block id */
int j = blockDim.y * blockIdx.y + threadIdx.y; int j = blockDim.y * blockIdx.y + threadIdx.y;
if(i >= blockSize || j > blockNum) if (i >= blockSize || j > blockNum)
return; return;
if(i < beg || i >= beg + len) if (i < beg || i >= beg + len)
return; return;
d[blockSize * j + i] = p; d[blockSize * j + i] = p;
...@@ -251,15 +266,14 @@ void _CudaSetDataDim(XTensor * tensor, int beg, int len, int dim, DTYPE p) ...@@ -251,15 +266,14 @@ void _CudaSetDataDim(XTensor * tensor, int beg, int len, int dim, DTYPE p)
{ {
int n = tensor->order; int n = tensor->order;
CheckNTErrors(tensor->dataType == DEFAULT_DTYPE, "TODO!");
CheckNTErrors(dim < n && dim >= 0, "Illegal dimension!"); CheckNTErrors(dim < n && dim >= 0, "Illegal dimension!");
CheckNTErrors(beg >= 0 && beg < tensor->GetDim(dim), "Illegal beginning position!"); CheckNTErrors(beg >= 0 && beg < tensor->GetDim(dim), "Illegal beginning position!");
CheckNTErrors(beg + len >= 0 && beg + len < tensor->GetDim(dim), "Illegal length!"); CheckNTErrors(beg + len >= 0 && beg + len < tensor->GetDim(dim), "Illegal length!");
int stride = 1; int stride = 1;
int blockSize = 1; int blockSize = 1;
int blockNum = 1; int blockNum = 1;
for(int i = n - 1; i > dim; i--){ for (int i = n - 1; i > dim; i--) {
stride *= tensor->GetDim(i); stride *= tensor->GetDim(i);
} }
blockSize = stride * tensor->GetDim(dim); blockSize = stride * tensor->GetDim(dim);
...@@ -276,8 +290,15 @@ void _CudaSetDataDim(XTensor * tensor, int beg, int len, int dim, DTYPE p) ...@@ -276,8 +290,15 @@ void _CudaSetDataDim(XTensor * tensor, int beg, int len, int dim, DTYPE p)
int devIDBackup; int devIDBackup;
ProtectCudaDev(tensor->devID, devIDBackup); ProtectCudaDev(tensor->devID, devIDBackup);
KernelSetDataDim<<<blocks, threads >>>((DTYPE*)tensor->data, beg * stride, if (tensor->dataType == X_FLOAT){
len * stride, blockSize, blockNum, p); KernelSetDataDim<<<blocks, threads>>>((DTYPE*)tensor->data, beg * stride,
len * stride, blockSize, blockNum, p);
}
else if (tensor->dataType == X_FLOAT16){
half p1 = __float2half(p);
KernelSetDataDim<<<blocks, threads>>>((__half*)tensor->data, beg * stride,
len * stride, blockSize, blockNum, p1);
}
BacktoCudaDev(tensor->devID, devIDBackup); BacktoCudaDev(tensor->devID, devIDBackup);
} }
...@@ -292,16 +313,17 @@ modify data items along with a given index and dimension ...@@ -292,16 +313,17 @@ modify data items along with a given index and dimension
>> blockSize - size of a data block >> blockSize - size of a data block
>> stride - stride of a data block >> stride - stride of a data block
*/ */
template<class T>
__global__ __global__
void KernelSetDataIndexed(DTYPE * s, DTYPE * m, int blockNum, int blockSize, int stride) void KernelSetDataIndexed(T * s, T * m, int blockNum, int blockSize, int stride)
{ {
/* offset in each block */ /* offset in each block */
int i = blockDim.x * blockIdx.x + threadIdx.x; int i = blockDim.x * blockIdx.x + threadIdx.x;
/* block id */ /* block id */
int j = blockDim.y * blockIdx.y + threadIdx.y; int j = blockDim.y * blockIdx.y + threadIdx.y;
if(i >= stride || j >= blockNum) if (i >= stride || j >= blockNum)
return; return;
int x = blockSize * j + i; int x = blockSize * j + i;
...@@ -332,7 +354,6 @@ void _CudaSetDataIndexed(XTensor * source, XTensor * modify, int dim, int index) ...@@ -332,7 +354,6 @@ void _CudaSetDataIndexed(XTensor * source, XTensor * modify, int dim, int index)
int order = source->order; int order = source->order;
int size = source->GetDim(dim); int size = source->GetDim(dim);
CheckNTErrors(source->dataType == DEFAULT_DTYPE, "TODO!");
CheckNTErrors(dim >= 0 && dim < order, "Illegal dimension!"); CheckNTErrors(dim >= 0 && dim < order, "Illegal dimension!");
CheckNTErrors(index >= 0 && index < size, "Illegal index!"); CheckNTErrors(index >= 0 && index < size, "Illegal index!");
...@@ -358,8 +379,14 @@ void _CudaSetDataIndexed(XTensor * source, XTensor * modify, int dim, int index) ...@@ -358,8 +379,14 @@ void _CudaSetDataIndexed(XTensor * source, XTensor * modify, int dim, int index)
int devIDBackup; int devIDBackup;
ProtectCudaDev(source->devID, devIDBackup); ProtectCudaDev(source->devID, devIDBackup);
KernelSetDataIndexed<<<blocks, threads >>>((DTYPE*)source->data + index * stride, (DTYPE*)modify->data, if (source->dataType == X_FLOAT){
blockNum, blockSize, stride); KernelSetDataIndexed<<<blocks, threads>>>((DTYPE*)source->data + index * stride, (DTYPE*)modify->data,
blockNum, blockSize, stride);
}
else if (source->dataType == X_FLOAT16){
KernelSetDataIndexed<<<blocks, threads>>>((__half*)source->data + index * stride, (__half*)modify->data,
blockNum, blockSize, stride);
}
BacktoCudaDev(source->devID, devIDBackup); BacktoCudaDev(source->devID, devIDBackup);
} }
...@@ -452,71 +479,6 @@ void _CudaSetDataLowTri(XTensor * tensor, DTYPE p, int shift) ...@@ -452,71 +479,6 @@ void _CudaSetDataLowTri(XTensor * tensor, DTYPE p, int shift)
} }
/* /*
generate data items with a uniform distribution in [lower, upper]
>> tensor - the tensor whose data array would be initialized
>> lower - lower value of the range
>> upper - upper value of the range
*/
void _CudaSetDataRand(const XTensor * tensor, DTYPE lower, DTYPE upper)
{
CheckNTErrors(upper > lower, "the high value must be greater than low value!");
int gridSize[3];
int blockSize[3];
GDevs.GetCudaThread(tensor->devID, tensor->unitNum, gridSize, blockSize);
dim3 blocks(gridSize[0]);
dim3 threads(blockSize[0]);
int devIDBackup;
ProtectCudaDev(tensor->devID, devIDBackup);
curandGenerator_t & gen = GDevs.GPUs[tensor->devID].gen;
curandGenerateUniform(gen , (float*)tensor->data , tensor->unitNum);
DTYPE variance = upper - lower;
if(variance != 1.0F || lower != 0){
if (tensor->dataType == X_FLOAT)
KernelSetDataRandFloat <<<blocks, threads >>>((float*) tensor->data, tensor->unitNum, lower, variance);
else if (tensor->dataType == X_DOUBLE)
KernelSetDataRandDouble <<<blocks, threads >>>((double*)tensor->data, tensor->unitNum, lower, variance);
}
BacktoCudaDev(tensor->devID, devIDBackup);
}
/*
generate data items with a uniform distribution in [lower, upper] and set
the item to a pre-defined value if the item >= p, set the item to 0 otherwise
>> tensor - the tensor whose data array would be initialized
>> lower - lower value of the range
>> upper - upper value of the range
>> p - the threshold
>> value - the value we intend to assign to the item
*/
void _CudaSetDataRandP(const XTensor * tensor, DTYPE lower, DTYPE upper, DTYPE p, DTYPE value)
{
_CudaSetDataRand(tensor, lower, upper);
int gridSize[3];
int blockSize[3];
GDevs.GetCudaThread(tensor->devID, tensor->unitNum, gridSize, blockSize);
dim3 blocks(gridSize[0]);
dim3 threads(blockSize[0]);
int devIDBackup;
ProtectCudaDev(tensor->devID, devIDBackup);
KernelSetDataPCut << <blocks, threads >> >((float*)tensor->data, tensor->unitNum, p, value);
BacktoCudaDev(tensor->devID, devIDBackup);
}
/*
set the data with an array of offsets (kernel version) set the data with an array of offsets (kernel version)
>> data - pointer to the data array >> data - pointer to the data array
>> offsets - offset for each data item >> offsets - offset for each data item
......
...@@ -19,6 +19,7 @@ ...@@ -19,6 +19,7 @@
/* /*
* $Created by: XIAO Tong (email: xiaotong@mail.neu.edu.cn) 2018-07-18 * $Created by: XIAO Tong (email: xiaotong@mail.neu.edu.cn) 2018-07-18
* I'm surprised that I did not write this file till today. * I'm surprised that I did not write this file till today.
* $Update by: Lin Ye (email: linye2015@outlook.com) 2019-07-22 float16 added
*/ */
#ifndef __SETDATA_CUH__ #ifndef __SETDATA_CUH__
...@@ -28,14 +29,9 @@ ...@@ -28,14 +29,9 @@
namespace nts { // namespace nts(NiuTrans.Tensor) namespace nts { // namespace nts(NiuTrans.Tensor)
/* generate data items with a fixed value p (in int) */ /* generate data items with a fixed value p (in int, float, float16, double) */
void _CudaSetDataFixedInt(XTensor * tensor, int p); template<class T>
void _CudaSetDataFixed(XTensor * tensor, T p);
/* generate data items with a fixed value p (in float) */
void _CudaSetDataFixedFloat(XTensor * tensor, float p);
/* generate data items with a fixed value p (in double) */
void _CudaSetDataFixedDouble(XTensor * tensor, double p);
/* set data items along with a given dimension (and keep the remaining items unchanged) */ /* set data items along with a given dimension (and keep the remaining items unchanged) */
void _CudaSetDataDim(XTensor * tensor, int beg, int len, int dim, DTYPE p); void _CudaSetDataDim(XTensor * tensor, int beg, int len, int dim, DTYPE p);
...@@ -47,11 +43,11 @@ void _CudaSetDataIndexed(XTensor * source, XTensor * modify, int dim, int index) ...@@ -47,11 +43,11 @@ void _CudaSetDataIndexed(XTensor * source, XTensor * modify, int dim, int index)
void _CudaSetDataLowTri(XTensor * tensor, DTYPE p, int shift); void _CudaSetDataLowTri(XTensor * tensor, DTYPE p, int shift);
/* generate data items with a uniform distribution in [lower, upper] */ /* generate data items with a uniform distribution in [lower, upper] */
void _CudaSetDataRand(const XTensor * tensor, DTYPE lower, DTYPE upper); void _CudaSetDataRand(XTensor * tensor, DTYPE lower, DTYPE upper);
/* generate data items with a uniform distribution in [lower, upper] and set /* generate data items with a uniform distribution in [lower, upper] and set
the item to a pre-defined value if the item >= p, set the item to 0 otherwise */ the item to a pre-defined value if the item >= p, set the item to 0 otherwise */
void _CudaSetDataRandP(const XTensor * tensor, DTYPE lower, DTYPE upper, DTYPE p, DTYPE value); void _CudaSetDataRandP(XTensor * tensor, DTYPE lower, DTYPE upper, DTYPE p, DTYPE value);
/* set the data with an array of offsets */ /* set the data with an array of offsets */
void _CudaSetDataWithOffset(XTensor * tensor, MTYPE * offsets, DTYPE value, MTYPE num); void _CudaSetDataWithOffset(XTensor * tensor, MTYPE * offsets, DTYPE value, MTYPE num);
......
...@@ -24,29 +24,19 @@ ...@@ -24,29 +24,19 @@
#define __SETDATA_H__ #define __SETDATA_H__
#include "../../XTensor.h" #include "../../XTensor.h"
#include "SetData.cuh"
namespace nts { // namespace nts(NiuTrans.Tensor) namespace nts { // namespace nts(NiuTrans.Tensor)
/* generate data items with a xavier initialization */ /* generate data items with a xavier initialization */
void _SetDataFanInOut(XTensor * tensor, DTYPE gain = 1.0F); void _SetDataFanInOut(XTensor * tensor, DTYPE gain = 1.0F);
/* generate data items with a fixed value p */ ///* generate data items with a fixed value p */
void _SetDataFixed(XTensor * tensor, void * valuePointer); //void _SetDataFixed(XTensor * tensor, void * valuePointer);
/* generate data items with a fixed value p (in default type) */ /* generate data items with a fixed value p (in default type) */
void SetDataFixed(XTensor &tensor, DTYPE p); template<class T>
void _SetDataFixed(XTensor * tensor, T value);
/* generate data items with a fixed value p (in integer) */
void SetDataFixedInt(XTensor &tensor, int p);
/* generate data items with a fixed value p (in int) */
void _SetDataFixedInt(XTensor * tensor, int p);
/* generate data items with a fixed value p (in float) */
void _SetDataFixedFloat(XTensor * tensor, float p);
/* generate data items with a fixed value p (in double) */
void _SetDataFixedDouble(XTensor * tensor, double p);
/* set data items along with a given dimension (and keep the remaining items unchanged) */ /* set data items along with a given dimension (and keep the remaining items unchanged) */
void _SetDataDim(XTensor * tensor, int beg, int len, int dim, DTYPE p); void _SetDataDim(XTensor * tensor, int beg, int len, int dim, DTYPE p);
...@@ -58,11 +48,11 @@ void _SetDataIndexed(XTensor * source, XTensor * modify, int dim, int index); ...@@ -58,11 +48,11 @@ void _SetDataIndexed(XTensor * source, XTensor * modify, int dim, int index);
void _SetDataLowTri(XTensor * tensor, DTYPE p, int shift); void _SetDataLowTri(XTensor * tensor, DTYPE p, int shift);
/* generate data items with a uniform distribution in [lower, upper] */ /* generate data items with a uniform distribution in [lower, upper] */
void _SetDataRand(const XTensor * tensor, DTYPE lower, DTYPE upper); void _SetDataRand(XTensor * tensor, DTYPE lower, DTYPE upper);
/* generate data items with a uniform distribution in [lower, upper] and set /* generate data items with a uniform distribution in [lower, upper] and set
the item to a pre-defined value if the item >= p, set the item to 0 otherwise */ the item to a pre-defined value if the item >= p, set the item to 0 otherwise */
void _SetDataRandP(const XTensor * tensor, DTYPE lower, DTYPE upper, DTYPE p, DTYPE value); void _SetDataRandP(XTensor * tensor, DTYPE lower, DTYPE upper, DTYPE p, DTYPE value);
/* generate data items with a normal distribution with specified mean and standard deviation */ /* generate data items with a normal distribution with specified mean and standard deviation */
void _SetDataRandN(XTensor * tensor, DTYPE mean = 0.0F, DTYPE standardDeviation = 1.0F); void _SetDataRandN(XTensor * tensor, DTYPE mean = 0.0F, DTYPE standardDeviation = 1.0F);
......
...@@ -70,7 +70,7 @@ XTensor DropoutWithIndex(const XTensor &x, XTensor &maskIndex, DTYPE scale) ...@@ -70,7 +70,7 @@ XTensor DropoutWithIndex(const XTensor &x, XTensor &maskIndex, DTYPE scale)
InitTensor1D(&c, x.unitNum, x.dataType, x.devID, x.mem); InitTensor1D(&c, x.unitNum, x.dataType, x.devID, x.mem);
_SetDataFixedFloat(&c, 1.0F); _SetDataFixed(&c, 1.0F);
_DropoutWithIndex(&x, &maskIndex, &c); _DropoutWithIndex(&x, &maskIndex, &c);
......
...@@ -385,11 +385,11 @@ void _LossBackward(XTensor * dedy, XTensor * t, XTensor * y, ...@@ -385,11 +385,11 @@ void _LossBackward(XTensor * dedy, XTensor * t, XTensor * y,
{ {
if(t == NULL){ if(t == NULL){
if(dedy->dataType == X_FLOAT) if(dedy->dataType == X_FLOAT)
_SetDataFixedFloat(dedy, 1.0F); _SetDataFixed(dedy, 1.0F);
else if(dedy->dataType == X_DOUBLE) else if(dedy->dataType == X_DOUBLE)
_SetDataFixedDouble(dedy, 1.0); _SetDataFixed(dedy, 1.0);
else if(dedy->dataType == X_INT) else if(dedy->dataType == X_INT)
_SetDataFixedInt(dedy, 1); _SetDataFixed(dedy, 1);
else{ else{
ShowNTErrors("TODO"); ShowNTErrors("TODO");
} }
......
...@@ -50,7 +50,7 @@ bool TestDropout1() ...@@ -50,7 +50,7 @@ bool TestDropout1()
XTensor yUser; XTensor yUser;
/* initialize variables */ /* initialize variables */
_SetDataFixedFloat(x, 1.0F); _SetDataFixed(x, 1.0F);
y->SetZeroAll(); y->SetZeroAll();
/* call Dropout function */ /* call Dropout function */
...@@ -88,7 +88,7 @@ bool TestDropout1() ...@@ -88,7 +88,7 @@ bool TestDropout1()
XTensor yUserGPU; XTensor yUserGPU;
/* initialize variables */ /* initialize variables */
_SetDataFixedFloat(xGPU, 1.0F); _SetDataFixed(xGPU, 1.0F);
yGPU->SetZeroAll(); yGPU->SetZeroAll();
/* call Dropout function */ /* call Dropout function */
...@@ -157,10 +157,10 @@ bool TestDropout2() ...@@ -157,10 +157,10 @@ bool TestDropout2()
XTensor * dedy = NewTensor(order, dimSize); XTensor * dedy = NewTensor(order, dimSize);
/* initialize variables */ /* initialize variables */
_SetDataFixedFloat(x, 1.0F); _SetDataFixed(x, 1.0F);
y->SetZeroAll(); y->SetZeroAll();
dedx->SetZeroAll(); dedx->SetZeroAll();
_SetDataFixedFloat(dedy, 1.5F); _SetDataFixed(dedy, 1.5F);
/* call Dropout function */ /* call Dropout function */
float dropProb = 0.5F; float dropProb = 0.5F;
...@@ -183,10 +183,10 @@ bool TestDropout2() ...@@ -183,10 +183,10 @@ bool TestDropout2()
XTensor * dedyGPU = NewTensor(order, dimSize, X_FLOAT, 1.0F, 0); XTensor * dedyGPU = NewTensor(order, dimSize, X_FLOAT, 1.0F, 0);
/* initialize variables */ /* initialize variables */
_SetDataFixedFloat(xGPU, 1.0F); _SetDataFixed(xGPU, 1.0F);
yGPU->SetZeroAll(); yGPU->SetZeroAll();
dedxGPU->SetZeroAll(); dedxGPU->SetZeroAll();
_SetDataFixedFloat(dedyGPU, 1.5F); _SetDataFixed(dedyGPU, 1.5F);
/* call Dropout function */ /* call Dropout function */
_Dropout(xGPU, yGPU, seed, dropProb); _Dropout(xGPU, yGPU, seed, dropProb);
......
...@@ -196,8 +196,8 @@ bool TestReduceSum2() ...@@ -196,8 +196,8 @@ bool TestReduceSum2()
XTensor tUser; XTensor tUser;
/* initialize variables */ /* initialize variables */
_SetDataFixedFloat(s, 1.0F); _SetDataFixed(s, 1.0F);
_SetDataFixedFloat(answer, (float)s->GetDim(1)); _SetDataFixed(answer, (float)s->GetDim(1));
/* call ReduceSum function */ /* call ReduceSum function */
_ReduceSum(s, t, 1); _ReduceSum(s, t, 1);
...@@ -216,7 +216,7 @@ bool TestReduceSum2() ...@@ -216,7 +216,7 @@ bool TestReduceSum2()
XTensor tUserGPU; XTensor tUserGPU;
/* initialize variables */ /* initialize variables */
_SetDataFixedFloat(sGPU, 1.0F); _SetDataFixed(sGPU, 1.0F);
/* call ReduceSum function */ /* call ReduceSum function */
_ReduceSum(sGPU, tGPU, 1); _ReduceSum(sGPU, tGPU, 1);
...@@ -285,8 +285,8 @@ bool TestReduceSum3() ...@@ -285,8 +285,8 @@ bool TestReduceSum3()
XTensor tUser; XTensor tUser;
/* initialize variables */ /* initialize variables */
_SetDataFixedFloat(s, 1.0F); _SetDataFixed(s, 1.0F);
_SetDataFixedFloat(answer, (float)s->GetDim(1)); _SetDataFixed(answer, (float)s->GetDim(1));
/* call ReduceSum function */ /* call ReduceSum function */
_ReduceSum(s, t, 1); _ReduceSum(s, t, 1);
...@@ -305,7 +305,7 @@ bool TestReduceSum3() ...@@ -305,7 +305,7 @@ bool TestReduceSum3()
XTensor tUserGPU; XTensor tUserGPU;
/* initialize variables */ /* initialize variables */
_SetDataFixedFloat(sGPU, 1.0F); _SetDataFixed(sGPU, 1.0F);
/* call ReduceSum function */ /* call ReduceSum function */
_ReduceSum(sGPU, tGPU, 1); _ReduceSum(sGPU, tGPU, 1);
...@@ -374,8 +374,8 @@ bool TestReduceSum4() ...@@ -374,8 +374,8 @@ bool TestReduceSum4()
XTensor tUser; XTensor tUser;
/* initialize variables */ /* initialize variables */
_SetDataFixedFloat(s, 1.0F); _SetDataFixed(s, 1.0F);
_SetDataFixedFloat(answer, (float)s->GetDim(1)); _SetDataFixed(answer, (float)s->GetDim(1));
/* call ReduceSum function */ /* call ReduceSum function */
_ReduceSum(s, t, 1); _ReduceSum(s, t, 1);
...@@ -394,7 +394,7 @@ bool TestReduceSum4() ...@@ -394,7 +394,7 @@ bool TestReduceSum4()
XTensor tUserGPU; XTensor tUserGPU;
/* initialize variables */ /* initialize variables */
_SetDataFixedFloat(sGPU, 1.0F); _SetDataFixed(sGPU, 1.0F);
/* call ReduceSum function */ /* call ReduceSum function */
_ReduceSum(sGPU, tGPU, 1); _ReduceSum(sGPU, tGPU, 1);
...@@ -465,8 +465,8 @@ bool TestReduceSum5() ...@@ -465,8 +465,8 @@ bool TestReduceSum5()
XTensor tUser; XTensor tUser;
/* initialize variables */ /* initialize variables */
_SetDataFixedFloat(s, 1.0F); _SetDataFixed(s, 1.0F);
_SetDataFixedFloat(answer, (float)s->GetDim(1)); _SetDataFixed(answer, (float)s->GetDim(1));
/* call ReduceSum function */ /* call ReduceSum function */
_ReduceSum(s, t, 1); _ReduceSum(s, t, 1);
...@@ -485,7 +485,7 @@ bool TestReduceSum5() ...@@ -485,7 +485,7 @@ bool TestReduceSum5()
XTensor tUserGPU; XTensor tUserGPU;
/* initialize variables */ /* initialize variables */
_SetDataFixedFloat(sGPU, 1.0F); _SetDataFixed(sGPU, 1.0F);
/* call ReduceSum function */ /* call ReduceSum function */
_ReduceSum(sGPU, tGPU, 1); _ReduceSum(sGPU, tGPU, 1);
...@@ -556,8 +556,8 @@ bool TestReduceSum6() ...@@ -556,8 +556,8 @@ bool TestReduceSum6()
XTensor tUser; XTensor tUser;
/* initialize variables */ /* initialize variables */
_SetDataFixedFloat(s, 1.0F); _SetDataFixed(s, 1.0F);
_SetDataFixedFloat(answer, (float)s->GetDim(1)); _SetDataFixed(answer, (float)s->GetDim(1));
/* call ReduceSum function */ /* call ReduceSum function */
_ReduceSum(s, t, 1); _ReduceSum(s, t, 1);
...@@ -576,7 +576,7 @@ bool TestReduceSum6() ...@@ -576,7 +576,7 @@ bool TestReduceSum6()
XTensor tUserGPU; XTensor tUserGPU;
/* initialize variables */ /* initialize variables */
_SetDataFixedFloat(sGPU, 1.0F); _SetDataFixed(sGPU, 1.0F);
/* call ReduceSum function */ /* call ReduceSum function */
_ReduceSum(sGPU, tGPU, 1); _ReduceSum(sGPU, tGPU, 1);
......
/* NiuTrans.Tensor - an open-source tensor library /* NiuTrans.Tensor - an open-source tensor library
* Copyright (C) 2017, Natural Language Processing Lab, Northestern University. * Copyright (C) 2017, Natural Language Processing Lab, Northestern University.
* All rights reserved. * All rights reserved.
* *
...@@ -17,10 +17,12 @@ ...@@ -17,10 +17,12 @@
/* /*
* $Created by: Xu Chen (email: hello_master1954@163.com) 2018-07-06 * $Created by: Xu Chen (email: hello_master1954@163.com) 2018-07-06
* $Update by: Lin Ye (email: linye2015@outlook.com) 2019-07-22 float16 added
*/ */
#include "TSetData.h" #include "TSetData.h"
#include "../core/getandset/SetData.h" #include "../core/getandset/SetData.h"
#include "../core/getandset/ConvertDataType.h"
namespace nts { // namespace nts(NiuTrans.Tensor) namespace nts { // namespace nts(NiuTrans.Tensor)
...@@ -118,7 +120,7 @@ bool TestSetData2() ...@@ -118,7 +120,7 @@ bool TestSetData2()
XTensor * modify = NewTensor(dataOrder, dataDimSize); XTensor * modify = NewTensor(dataOrder, dataDimSize);
/* Initialize variables */ /* Initialize variables */
_SetDataFixedFloat(s, 1.0F); _SetDataFixed(s, 1.0F);
modify->SetData(data, dataUnitNum); modify->SetData(data, dataUnitNum);
/* call SetDataIndexed function */ /* call SetDataIndexed function */
...@@ -136,7 +138,7 @@ bool TestSetData2() ...@@ -136,7 +138,7 @@ bool TestSetData2()
XTensor * modifyGPU = NewTensor(dataOrder, dataDimSize, X_FLOAT, 1.0F, 0); XTensor * modifyGPU = NewTensor(dataOrder, dataDimSize, X_FLOAT, 1.0F, 0);
/* Initialize variables */ /* Initialize variables */
_SetDataFixedFloat(sGPU, 1.0F); _SetDataFixed(sGPU, 1.0);
modifyGPU->SetData(data, dataUnitNum); modifyGPU->SetData(data, dataUnitNum);
/* call SetDataIndexed function */ /* call SetDataIndexed function */
...@@ -211,11 +213,11 @@ bool TestSetData3() ...@@ -211,11 +213,11 @@ bool TestSetData3()
XTensor * modify = NewTensor(dataOrder, dataDimSize); XTensor * modify = NewTensor(dataOrder, dataDimSize);
/* Initialize variables */ /* Initialize variables */
_SetDataFixedFloat(s, 1.0F); _SetDataFixed(s, 1.0);
modify->SetData(data, dataUnitNum); modify->SetData(data, dataUnitNum);
/* call SetDataIndexed function */ /* call SetDataIndexed function */
_SetDataFixedFloat(s, 1.0F); _SetDataFixed(s, 1.0);
_SetDataIndexed(s, modify, 1, 1); _SetDataIndexed(s, modify, 1, 1);
/* check results */ /* check results */
...@@ -230,7 +232,7 @@ bool TestSetData3() ...@@ -230,7 +232,7 @@ bool TestSetData3()
XTensor * modifyGPU = NewTensor(dataOrder, dataDimSize, X_FLOAT, 1.0F, 0); XTensor * modifyGPU = NewTensor(dataOrder, dataDimSize, X_FLOAT, 1.0F, 0);
/* Initialize variables */ /* Initialize variables */
_SetDataFixedFloat(sGPU, 1.0F); _SetDataFixed(sGPU, 1.0);
modifyGPU->SetData(data, dataUnitNum); modifyGPU->SetData(data, dataUnitNum);
/* call SetDataIndexed function */ /* call SetDataIndexed function */
...@@ -406,6 +408,427 @@ bool TestSetData5() ...@@ -406,6 +408,427 @@ bool TestSetData5()
#endif // USE_CUDA #endif // USE_CUDA
} }
/*
case 6: float16 test SetDataRand function.
set the tensor items by a uniform distribution in range [lower, upper].
*/
bool TestSetData6()
{
/* a input tensor of size (2, 4) */
int sOrder = 2;
int * sDimSize = new int[sOrder];
sDimSize[0] = 2;
sDimSize[1] = 4;
int sUnitNum = 1;
for (int i = 0; i < sOrder; i++)
sUnitNum *= sDimSize[i];
DTYPE answer[2][4] = {0};
/* CPU test */
bool cpuTest = true;
#ifdef USE_CUDA
/* GPU test */
bool gpuTest = true;
/* create tensors */
XTensor * sGPU = NewTensor(sOrder, sDimSize, X_FLOAT, 1.0F, 0);
/* create float16 tensors */
XTensor sHalfGPU;
/* convert data type from float to float16 */
sHalfGPU = ConvertDataType(*sGPU, X_FLOAT16);
/* call setdatarand function */
_SetDataRand(&sHalfGPU, 0.0, 1.0);
/* convert data type from float16 to float */
_ConvertDataType(&sHalfGPU, sGPU);
/* check results */
gpuTest = sGPU->CheckData(answer, sUnitNum, 1.0F);
/* destroy variables */
delete sGPU;
delete[] sDimSize;
return cpuTest && gpuTest;
#else
/* destroy variables */
delete[] sDimSize;
return cpuTest;
#endif // USE_CUDA
}
/*
case 7: float16 test SetDataRandP function.
first set the tensor items by a uniform distribution in range [lower, upper].
then set the item to a pre-defined value if the item >= p, set the item to 0 otherwise
*/
bool TestSetData7()
{
/* a input tensor of size (2, 4) */
int sOrder = 2;
int * sDimSize = new int[sOrder];
sDimSize[0] = 2;
sDimSize[1] = 4;
int sUnitNum = 1;
for (int i = 0; i < sOrder; i++)
sUnitNum *= sDimSize[i];
DTYPE answer[2][4] = {0};
/* CPU test */
bool cpuTest = true;
#ifdef USE_CUDA
/* GPU test */
bool gpuTest = true;
/* create tensors */
XTensor * sGPU = NewTensor(sOrder, sDimSize, X_FLOAT, 1.0F, 0);
/* create float16 tensors */
XTensor sHalfGPU;
/* convert data type from float to float16 */
sHalfGPU = ConvertDataType(*sGPU, X_FLOAT16);
/* call setdatarandp function */
_SetDataRandP(&sHalfGPU, 0.0, 1.0, 0.5, 1.0);
/* convert data type from float16 to float */
_ConvertDataType(&sHalfGPU, sGPU);
/* check results */
gpuTest = sGPU->CheckData(answer, sUnitNum, 1.1F);
/* destroy variables */
delete sGPU;
delete[] sDimSize;
return cpuTest && gpuTest;
#else
/* destroy variables */
delete[] sDimSize;
return cpuTest;
#endif // USE_CUDA
}
/*
case 8: float16 test SetDataIndexed function.
modify data items along with a given dimension.
*/
bool TestSetData8()
{
/* a input tensor of size (2, 4) */
int sOrder = 2;
int * sDimSize = new int[sOrder];
sDimSize[0] = 2;
sDimSize[1] = 4;
int sUnitNum = 1;
for (int i = 0; i < sOrder; i++)
sUnitNum *= sDimSize[i];
/* a data tensor of size (4) for GPU test */
int dataOrder = 1;
int * dataDimSize = new int[dataOrder];
dataDimSize[0] = 4;
int dataUnitNum = 1;
for (int i = 0; i < dataOrder; i++)
dataUnitNum *= dataDimSize[i];
DTYPE data[4] = {0.0F, 1.0F, 2.0F, 3.0F};
DTYPE answer[2][4] = { {1.0F, 1.0F, 1.0F, 1.0F},
{0.0F, 1.0F, 2.0F, 3.0F} };
/* CPU test */
bool cpuTest = true;
#ifdef USE_CUDA
/* GPU test */
bool gpuTest = true;
/* create tensors */
XTensor * sGPU = NewTensor(sOrder, sDimSize, X_FLOAT, 1.0F, 0);
XTensor * modifyGPU = NewTensor(dataOrder, dataDimSize, X_FLOAT, 1.0F, 0);
/* create float16 tensors */
XTensor sHalfGPU;
XTensor modifyHalfGPU;
/* Initialize modifyGPU */
modifyGPU->SetData(data, dataUnitNum);
/* convert data type from float to float16 */
sHalfGPU = ConvertDataType(*sGPU, X_FLOAT16);
modifyHalfGPU = ConvertDataType(*modifyGPU, X_FLOAT16);
/* Initialize sHalfGPU */
_SetDataFixed(&sHalfGPU, 1.0);
/* call setdataindexed function */
_SetDataIndexed(&sHalfGPU, &modifyHalfGPU, 0, 1);
/* convert data type from float16 to float */
_ConvertDataType(&sHalfGPU, sGPU);
/* check results */
gpuTest = sGPU->CheckData(answer, sUnitNum, 1e-5F);
/* destroy variables */
delete sGPU;
delete modifyGPU;
delete[] sDimSize;
delete[] dataDimSize;
return cpuTest && gpuTest;
#else
/* destroy variables */
delete[] sDimSize;
delete[] dataDimSize;
return cpuTest;
#endif // USE_CUDA
}
/*
case 9: float16 test SetDataIndexed function.
modify data items along with a given dimension.
*/
bool TestSetData9()
{
/* a input tensor of size (2, 4, 3) */
int sOrder = 3;
int * sDimSize = new int[sOrder];
sDimSize[0] = 2;
sDimSize[1] = 4;
sDimSize[2] = 3;
int sUnitNum = 1;
for (int i = 0; i < sOrder; i++)
sUnitNum *= sDimSize[i];
/* a data tensor of size (2, 3) for GPU test */
int dataOrder = 2;
int * dataDimSize = new int[dataOrder];
dataDimSize[0] = 2;
dataDimSize[1] = 3;
int dataUnitNum = 1;
for (int i = 0; i < dataOrder; i++)
dataUnitNum *= dataDimSize[i];
DTYPE data[2][3] = { { 0.0F, 1.0F, 2.0F },
{ 3.0F, 4.0F, 5.0F } };
DTYPE answer[2][4][3] = { { {1.0F, 1.0F, 1.0F},
{0.0F, 1.0F, 2.0F},
{1.0F, 1.0F, 1.0F},
{1.0F, 1.0F, 1.0F} },
{ {1.0F, 1.0F, 1.0F},
{3.0F, 4.0F, 5.0F},
{1.0F, 1.0F, 1.0F},
{1.0F, 1.0F, 1.0F} } };
/* CPU test */
bool cpuTest = true;
#ifdef USE_CUDA
/* GPU test */
bool gpuTest = true;
/* create tensors */
XTensor * sGPU = NewTensor(sOrder, sDimSize, X_FLOAT, 1.0F, 0);
XTensor * modifyGPU = NewTensor(dataOrder, dataDimSize, X_FLOAT, 1.0F, 0);
/* create float16 tensors */
XTensor sHalfGPU;
XTensor modifyHalfGPU;
/* Initialize modifyGPU */
modifyGPU->SetData(data, dataUnitNum);
/* convert data type from float to float16 */
sHalfGPU = ConvertDataType(*sGPU, X_FLOAT16);
modifyHalfGPU = ConvertDataType(*modifyGPU, X_FLOAT16);
/* Initialize sHalfGPU */
_SetDataFixed(&sHalfGPU, 1.0);
/* call setdataindexed function */
_SetDataIndexed(&sHalfGPU, &modifyHalfGPU, 1, 1);
/* convert data type from float16 to float */
_ConvertDataType(&sHalfGPU, sGPU);
/* check results */
gpuTest = sGPU->CheckData(answer, sUnitNum, 1e-5F);
/* destroy variables */
delete sGPU;
delete modifyGPU;
delete[] sDimSize;
delete[] dataDimSize;
return cpuTest && gpuTest;
#else
/* destroy variables */
delete[] sDimSize;
delete[] dataDimSize;
return cpuTest;
#endif // USE_CUDA
}
/*
case 10: float16 test SetDataDim function.
set data items along with a given dimension (and keep the remaining items unchanged)
*/
bool TestSetData10()
{
/* a input tensor of size (3, 3) */
int order = 2;
int * dimSize = new int[order];
dimSize[0] = 3;
dimSize[1] = 3;
int unitNum = 1;
for (int i = 0; i < order; i++)
unitNum *= dimSize[i];
DTYPE sData[3][3] = { {1.0F, 2.0F, 3.0F},
{4.0F, 5.0F, 6.0F},
{7.0F, 8.0F, 9.0F} };
DTYPE answer[3][3] = { {1.0F, 2.0F, 3.0F},
{0.0F, 0.0F, 0.0F},
{7.0F, 8.0F, 9.0F} };
/* CPU test */
bool cpuTest = true;
#ifdef USE_CUDA
/* GPU test */
bool gpuTest = true;
/* create tensors */
XTensor * sGPU = NewTensor(order, dimSize, X_FLOAT, 1.0F, 0);
/* create float16 tensors */
XTensor sHalfGPU;
/* initialize variables */
sGPU->SetData(sData, unitNum);
/* convert data type from float to float16 */
sHalfGPU = ConvertDataType(*sGPU, X_FLOAT16);
/* call _setdatadim function */
_SetDataDim(&sHalfGPU, 1, 1, 0, 0);
/* convert data type from float16 to float */
_ConvertDataType(&sHalfGPU, sGPU);
/* check results */
gpuTest = sGPU->CheckData(answer, unitNum, 1e-4F);
/* destroy variables */
delete sGPU;
delete[] dimSize;
return cpuTest && gpuTest;
#else
/* destroy variables */
delete[] dimSize;
return cpuTest;
#endif // USE_CUDA
}
/*
case 11: float16 test SetDataDim function.
set data items along with a given dimension (and keep the remaining items unchanged)
*/
bool TestSetData11()
{
/* a input tensor of size (2, 4, 3) */
int order = 3;
int * dimSize = new int[order];
dimSize[0] = 2;
dimSize[1] = 4;
dimSize[2] = 3;
int unitNum = 1;
for (int i = 0; i < order; i++)
unitNum *= dimSize[i];
DTYPE data[2][4][3] = { { {1.0F, 1.0F, 1.0F},
{0.0F, 1.0F, 2.0F},
{1.0F, 1.0F, 1.0F},
{1.0F, 1.0F, 1.0F} },
{ {1.0F, 1.0F, 1.0F},
{3.0F, 4.0F, 5.0F},
{1.0F, 1.0F, 1.0F},
{1.0F, 1.0F, 1.0F} } };
DTYPE answer[2][4][3] = { { {1.0F, 1.0F, 1.0F},
{0.0F, 1.0F, 2.0F},
{5.0F, 5.0F, 5.0F},
{1.0F, 1.0F, 1.0F} },
{ {1.0F, 1.0F, 1.0F},
{3.0F, 4.0F, 5.0F},
{5.0F, 5.0F, 5.0F},
{1.0F, 1.0F, 1.0F} } };
/* CPU test */
bool cpuTest = true;
#ifdef USE_CUDA
/* GPU test */
bool gpuTest = true;
/* create tensors */
XTensor * sGPU = NewTensor(order, dimSize, X_FLOAT, 1.0F, 0);
/* create float16 tensors */
XTensor sHalfGPU;
/* initialize variables */
sGPU->SetData(data, unitNum);
/* convert data type from float to float16 */
sHalfGPU = ConvertDataType(*sGPU, X_FLOAT16);
/* call _setdatadim function */
_SetDataDim(&sHalfGPU, 2, 1, 1, 5.0F);
/* convert data type from float16 to float */
_ConvertDataType(&sHalfGPU, sGPU);
/* check results */
gpuTest = sGPU->CheckData(answer, unitNum, 1e-4F);
/* destroy variables */
delete sGPU;
delete[] dimSize;
return cpuTest && gpuTest;
#else
/* destroy variables */
delete[] dimSize;
return cpuTest;
#endif // USE_CUDA
}
/* other cases */ /* other cases */
/* /*
TODO!! TODO!!
...@@ -462,6 +885,60 @@ bool TestSetData() ...@@ -462,6 +885,60 @@ bool TestSetData()
else else
XPRINT(0, stdout, ">> case 5 passed!\n"); XPRINT(0, stdout, ">> case 5 passed!\n");
/* case 6 test */
caseFlag = TestSetData6();
if (!caseFlag) {
returnFlag = false;
XPRINT(0, stdout, ">> case 6 failed!\n");
}
else
XPRINT(0, stdout, ">> case 6 passed!\n");
/* case 7 test */
caseFlag = TestSetData7();
if (!caseFlag) {
returnFlag = false;
XPRINT(0, stdout, ">> case 7 failed!\n");
}
else
XPRINT(0, stdout, ">> case 7 passed!\n");
/* case 8 test */
caseFlag = TestSetData8();
if (!caseFlag) {
returnFlag = false;
XPRINT(0, stdout, ">> case 8 failed!\n");
}
else
XPRINT(0, stdout, ">> case 8 passed!\n");
/* case 9 test */
caseFlag = TestSetData9();
if (!caseFlag) {
returnFlag = false;
XPRINT(0, stdout, ">> case 9 failed!\n");
}
else
XPRINT(0, stdout, ">> case 9 passed!\n");
/* case 10 test */
caseFlag = TestSetData10();
if (!caseFlag) {
returnFlag = false;
XPRINT(0, stdout, ">> case 10 failed!\n");
}
else
XPRINT(0, stdout, ">> case 10 passed!\n");
/* case 11 test */
caseFlag = TestSetData11();
if (!caseFlag) {
returnFlag = false;
XPRINT(0, stdout, ">> case 11 failed!\n");
}
else
XPRINT(0, stdout, ">> case 11 passed!\n");
/* other cases test */ /* other cases test */
/* /*
TODO!! TODO!!
......
...@@ -90,7 +90,7 @@ bool TestSpread1() ...@@ -90,7 +90,7 @@ bool TestSpread1()
XTensor * modify = NewTensor(dataOrder, dataDimSize); XTensor * modify = NewTensor(dataOrder, dataDimSize);
/* Initialize variables */ /* Initialize variables */
_SetDataFixedFloat(s, 0.0F); _SetDataFixed(s, 0.0F);
modify->SetData(data, dataUnitNum); modify->SetData(data, dataUnitNum);
/* call _Spread function */ /* call _Spread function */
...@@ -108,7 +108,7 @@ bool TestSpread1() ...@@ -108,7 +108,7 @@ bool TestSpread1()
XTensor * modifyGPU = NewTensor(dataOrder, dataDimSize, X_FLOAT, 1.0F, 0); XTensor * modifyGPU = NewTensor(dataOrder, dataDimSize, X_FLOAT, 1.0F, 0);
/* Initialize variables */ /* Initialize variables */
_SetDataFixedFloat(sGPU, 0.0F); _SetDataFixed(sGPU, 0.0F);
modifyGPU->SetData(data, dataUnitNum); modifyGPU->SetData(data, dataUnitNum);
/* call _Spread function */ /* call _Spread function */
......
...@@ -295,8 +295,8 @@ bool TestSumDim3() ...@@ -295,8 +295,8 @@ bool TestSumDim3()
/* initialize variables */ /* initialize variables */
a->SetZeroAll(); a->SetZeroAll();
cMe->SetZeroAll(); cMe->SetZeroAll();
_SetDataFixedFloat(b, 1.0F); _SetDataFixed(b, 1.0F);
_SetDataFixedFloat(answer, 1.0F); _SetDataFixed(answer, 1.0F);
/* call SumDim function */ /* call SumDim function */
_SumDim(a, b, c, 1); _SumDim(a, b, c, 1);
...@@ -322,7 +322,7 @@ bool TestSumDim3() ...@@ -322,7 +322,7 @@ bool TestSumDim3()
/* Initialize variables */ /* Initialize variables */
aGPU->SetZeroAll(); aGPU->SetZeroAll();
cMe->SetZeroAll(); cMe->SetZeroAll();
_SetDataFixedFloat(bGPU, 1.0F); _SetDataFixed(bGPU, 1.0F);
/* call sum function */ /* call sum function */
_SumDim(aGPU, bGPU, cGPU, 1); _SumDim(aGPU, bGPU, cGPU, 1);
...@@ -404,8 +404,8 @@ bool TestSumDim4() ...@@ -404,8 +404,8 @@ bool TestSumDim4()
/* initialize variables */ /* initialize variables */
a->SetZeroAll(); a->SetZeroAll();
cMe->SetZeroAll(); cMe->SetZeroAll();
_SetDataFixedFloat(b, 1.0F); _SetDataFixed(b, 1.0F);
_SetDataFixedFloat(answer, 1.0F); _SetDataFixed(answer, 1.0F);
/* call SumDim function */ /* call SumDim function */
_SumDim(a, b, c, 1); _SumDim(a, b, c, 1);
...@@ -431,7 +431,7 @@ bool TestSumDim4() ...@@ -431,7 +431,7 @@ bool TestSumDim4()
/* Initialize variables */ /* Initialize variables */
aGPU->SetZeroAll(); aGPU->SetZeroAll();
cMe->SetZeroAll(); cMe->SetZeroAll();
_SetDataFixedFloat(bGPU, 1.0F); _SetDataFixed(bGPU, 1.0F);
/* call sum function */ /* call sum function */
_SumDim(aGPU, bGPU, cGPU, 1); _SumDim(aGPU, bGPU, cGPU, 1);
......
...@@ -30,7 +30,7 @@ bool Test() ...@@ -30,7 +30,7 @@ bool Test()
XPRINT(0, stdout, "Testing the XTensor utilites ... \n\n"); XPRINT(0, stdout, "Testing the XTensor utilites ... \n\n");
//wrong = !TestAbsolute() || wrong; //wrong = !TestAbsolute() || wrong;
wrong = !TestClip() || wrong; //wrong = !TestClip() || wrong;
//wrong = !TestCompare() || wrong; //wrong = !TestCompare() || wrong;
//wrong = !TestConcatenate() || wrong; //wrong = !TestConcatenate() || wrong;
//wrong = !TestConcatenateSolely() || wrong; //wrong = !TestConcatenateSolely() || wrong;
...@@ -38,8 +38,8 @@ bool Test() ...@@ -38,8 +38,8 @@ bool Test()
//wrong = !TestConvertDataType() || wrong; //wrong = !TestConvertDataType() || wrong;
//wrong = !TestCopyIndexed() || wrong; //wrong = !TestCopyIndexed() || wrong;
//wrong = !TestCopyValues() || wrong; //wrong = !TestCopyValues() || wrong;
wrong = !TestDiv() || wrong; //wrong = !TestDiv() || wrong;
wrong = !TestDivDim() || wrong; //wrong = !TestDivDim() || wrong;
//wrong = !TestExp() || wrong; //wrong = !TestExp() || wrong;
//wrong = !TestGather() || wrong; //wrong = !TestGather() || wrong;
//wrong = !TestLog() || wrong; //wrong = !TestLog() || wrong;
...@@ -49,7 +49,7 @@ bool Test() ...@@ -49,7 +49,7 @@ bool Test()
//wrong = !TestMatrixMulBatched() || wrong; //wrong = !TestMatrixMulBatched() || wrong;
//wrong = !TestMerge() || wrong; //wrong = !TestMerge() || wrong;
//wrong = !TestMultiply() || wrong; //wrong = !TestMultiply() || wrong;
wrong = !TestMultiplyDim() || wrong; //wrong = !TestMultiplyDim() || wrong;
//wrong = !TestNegate() || wrong; //wrong = !TestNegate() || wrong;
//wrong = !TestNormalize() || wrong; //wrong = !TestNormalize() || wrong;
//wrong = !TestPower() || wrong; //wrong = !TestPower() || wrong;
...@@ -60,17 +60,17 @@ bool Test() ...@@ -60,17 +60,17 @@ bool Test()
//wrong = !TestReduceSumSquared() || wrong; //wrong = !TestReduceSumSquared() || wrong;
//wrong = !TestReduceVariance() || wrong; //wrong = !TestReduceVariance() || wrong;
//wrong = !TestRound() || wrong; //wrong = !TestRound() || wrong;
wrong = !TestScaleAndShift() || wrong; //wrong = !TestScaleAndShift() || wrong;
//wrong = !TestSelect() || wrong; //wrong = !TestSelect() || wrong;
//wrong = !TestSetAscendingOrder() || wrong; //wrong = !TestSetAscendingOrder() || wrong;
//wrong = !TestSetData() || wrong; wrong = !TestSetData() || wrong;
//wrong = !TestSign() || wrong; //wrong = !TestSign() || wrong;
//wrong = !TestSin() || wrong; //wrong = !TestSin() || wrong;
//wrong = !TestSort() || wrong; //wrong = !TestSort() || wrong;
//wrong = !TestSplit() || wrong; //wrong = !TestSplit() || wrong;
//wrong = !TestSpread() || wrong; //wrong = !TestSpread() || wrong;
//wrong = !TestSub() || wrong; //wrong = !TestSub() || wrong;
wrong = !TestSum() || wrong; //wrong = !TestSum() || wrong;
//wrong = !TestSumByColumnTV() || wrong; //wrong = !TestSumByColumnTV() || wrong;
//wrong = !TestSumByColumnVT() || wrong; //wrong = !TestSumByColumnVT() || wrong;
//wrong = !TestSumDim() || wrong; //wrong = !TestSumDim() || wrong;
......
Markdown 格式
0%
您添加了 0 到此讨论。请谨慎行事。
请先完成此评论的编辑!
注册 或者 后发表评论