Commit aa875fba by linye

1. implement SetData by template 2. update float16 datatype of SetData

parent 9f14dc72
......@@ -399,8 +399,8 @@ void xcTest()
InitTensor2D(&t2, 2, 4, X_FLOAT, 0, NULL);
XTensor tensor;
_SetDataFixedFloat(&t1, 1.0F);
_SetDataFixedFloat(&t2, 2.0F);
_SetDataFixed(&t1, 1.0F);
_SetDataFixed(&t2, 2.0F);
tensor = t1 + t2;
......
......@@ -52,15 +52,7 @@ void XLossGrad::MakeGrad(XTensor * node, bool isEfficient)
XTensor * dedy = output->grad;
if (income.tailNum == 1) {
if(dedy->dataType == X_FLOAT)
_SetDataFixedFloat(dedy, 1.0F);
else if(dedy->dataType == X_DOUBLE)
_SetDataFixedDouble(dedy, 1.0);
else if(dedy->dataType == X_INT)
_SetDataFixedInt(dedy, 1);
else
ShowNTErrors("TODO");
_SetDataFixed(dedy, 1.0F);
return;
}
......@@ -144,15 +136,7 @@ void XLossGrad::Compute(XTensor * gold, XTensor * y,
LOSS_FUNCTION_NAME lossName)
{
if(gold == NULL){
if(dedy->dataType == X_FLOAT)
_SetDataFixedFloat(dedy, 1.0F);
else if(dedy->dataType == X_DOUBLE)
_SetDataFixedDouble(dedy, 1.0);
else if(dedy->dataType == X_INT)
_SetDataFixedInt(dedy, 1);
else{
ShowNTErrors("TODO");
}
_SetDataFixed(dedy, 1.0F);
return;
}
......
......@@ -25,6 +25,7 @@
#include "SetData.cuh"
#include "../../XUtility.h"
#include "../movement/CopyValues.h"
#include "ConvertDataType.h"
#if !defined( WIN32 ) && !defined( _WIN32 )
#include "sys/time.h"
......@@ -77,153 +78,78 @@ void _SetDataFanInOut(XTensor * tensor, DTYPE gain)
}
/*
generate data items with a fixed value p
generate data items with a fixed value
>> tensor - the tensor whose data array would be initialized
>> p - pointer to the number for initializing the tensor
>> value - pointer to the number for initializing the tensor
*/
void _SetDataFixed(XTensor * tensor, void * valuePointer)
template<class T>
void _SetDataFixed(XTensor * tensor, T value)
{
#ifdef USE_CUDA
if (tensor->devID >= 0) {
_CudaSetDataFixed(tensor, value);
return;
}
#endif
int num = tensor->unitNum;
if(tensor->dataType == X_INT){
int p = *(int*)valuePointer;
if(tensor->devID < 0){
int * d = (int*)tensor->data;
if(num % 4 == 0){
for(int i = 0; i < num; i += 4){
d[i] = p;
d[i + 1] = p;
d[i + 2] = p;
d[i + 3] = p;
}
}
else{
for(int i = 0; i < num; i++)
d[i] = p;
if (tensor->dataType == X_INT) {
int * d = (int*)tensor->data;
int v = (int)value;
if (num % 4 == 0) {
for (int i = 0; i < num; i += 4) {
d[i] = v;
d[i + 1] = v;
d[i + 2] = v;
d[i + 3] = v;
}
}
else{
#ifdef USE_CUDA
_CudaSetDataFixedInt(tensor, p);
#endif
else {
for (int i = 0; i < num; i++)
d[i] = v;
}
}
else if(tensor->dataType == X_FLOAT){
float p = *(float*)valuePointer;
if(tensor->devID < 0){
float * d = (float*)tensor->data;
if(num % 4 == 0){
for(int i = 0; i < num; i += 4){
d[i] = p;
d[i + 1] = p;
d[i + 2] = p;
d[i + 3] = p;
}
}
else{
for(int i = 0; i < num; i++)
d[i] = p;
else if (tensor->dataType == X_FLOAT) {
float * d = (float*)tensor->data;
float v = (float)value;
if (num % 4 == 0) {
for (int i = 0; i < num; i += 4) {
d[i] = v;
d[i + 1] = v;
d[i + 2] = v;
d[i + 3] = v;
}
}
else{
#ifdef USE_CUDA
_CudaSetDataFixedFloat(tensor, p);
#endif
else {
for (int i = 0; i < num; i++)
d[i] = v;
}
}
else if(tensor->dataType == X_DOUBLE){
double p = *(double*)valuePointer;
if(tensor->devID < 0){
double * d = (double*)tensor->data;
if(num % 4 == 0){
for(int i = 0; i < num; i += 4){
d[i] = p;
d[i + 1] = p;
d[i + 2] = p;
d[i + 3] = p;
}
}
else{
for(int i = 0; i < num; i++)
d[i] = p;
else if (tensor->dataType == X_DOUBLE) {
double * d = (double*)tensor->data;
double v = (double)value;
if (num % 4 == 0) {
for (int i = 0; i < num; i += 4) {
d[i] = v;
d[i + 1] = v;
d[i + 2] = v;
d[i + 3] = v;
}
}
else{
#ifdef USE_CUDA
_CudaSetDataFixedDouble(tensor, p);
#endif
else {
for (int i = 0; i < num; i++)
d[i] = v;
}
}
else{
ShowNTErrors("TODO");
}
}
/*
generate data items with a fixed value p (in default type)
>> tensor - the tensor whose data array would be initialized
>> p - number in default type
*/
void SetDataFixed(XTensor &tensor, DTYPE p)
{
_SetDataFixed(&tensor, &p);
}
/*
generate data items with a fixed value p (in integer)
>> tensor - the tensor whose data array would be initialized
>> p - an integer
*/
void SetDataFixedInt(XTensor &tensor, int p)
{
CheckNTErrors(tensor.dataType == X_INT, "An integer tensor is required!");
_SetDataFixed(&tensor, &p);
}
/*
generate data items with a fixed value p (in integer)
>> tensor - the tensor whose data array would be initialized
>> p - an int-valued number
*/
void _SetDataFixedInt(XTensor * tensor, int p)
{
CheckNTErrors(tensor->dataType == X_INT, "the tensor must be in X_INT!");
if(p == 0)
tensor->SetZeroAll();
else
_SetDataFixed(tensor, &p);
}
/*
generate data items with a fixed value p (in float)
>> tensor - the tensor whose data array would be initialized
>> p - a float-valued number
*/
void _SetDataFixedFloat(XTensor * tensor, float p)
{
CheckNTErrors(tensor->dataType == X_FLOAT, "the tensor must be in X_FLOAT!");
if(p == 0)
tensor->SetZeroAll();
else
_SetDataFixed(tensor, &p);
ShowNTErrors("TODO");
}
/*
generate data items with a fixed value p (in double)
>> tensor - the tensor whose data array would be initialized
>> p - a double-valued number
*/
void _SetDataFixedDouble(XTensor * tensor, double p)
{
CheckNTErrors(tensor->dataType == X_DOUBLE, "the tensor must be in X_DOUBLE!");
if(p == 0)
tensor->SetZeroAll();
else
_SetDataFixed(tensor, &p);
}
template void _SetDataFixed<int>(XTensor*, int);
template void _SetDataFixed<float>(XTensor*, float);
template void _SetDataFixed<double>(XTensor*, double);
/*
set data items along with a given dimension (and keep the remaining items unchanged)
......@@ -396,7 +322,7 @@ generate data items with a uniform distribution in [lower, upper]
>> lower - lower value of the range
>> upper - upper value of the range
*/
void _SetDataRand(const XTensor * tensor, DTYPE lower, DTYPE upper)
void _SetDataRand(XTensor * tensor, DTYPE lower, DTYPE upper)
{
CheckNTErrors(upper > lower, "the high value must be greater than low value!");
......@@ -433,10 +359,6 @@ void _SetDataRand(const XTensor * tensor, DTYPE lower, DTYPE upper)
#ifdef USE_CUDA
_CudaSetDataRand(tensor, lower, upper);
#endif
//XTensor * t2 = NewTensor(tensor->order, tensor->dimSize, tensor->dataType, tensor->denseRatio, -1);
//_SetDataRand(t2, low, high);
//_CopyValues(t2, tensor);
//delete t2;
}
}
......@@ -449,10 +371,8 @@ the item to a pre-defined value if the item >= p, set the item to 0 otherwise
>> p - the threshold
>> value - the value we intend to assign to the item
*/
void _SetDataRandP(const XTensor * tensor, DTYPE lower, DTYPE upper, DTYPE p, DTYPE value)
void _SetDataRandP(XTensor * tensor, DTYPE lower, DTYPE upper, DTYPE p, DTYPE value)
{
CheckNTErrors(tensor->dataType == DEFAULT_DTYPE, "TODO");
if (tensor->devID < 0) {
_SetDataRand(tensor, lower, upper);
......
......@@ -19,6 +19,7 @@
/*
* $Created by: XIAO Tong (email: xiaotong@mail.neu.edu.cn) 2018-07-18
* I'm surprised that I did not write this file till today.
* $Update by: Lin Ye (email: linye2015@outlook.com) 2019-07-22 float16 added
*/
#include <curand.h>
......@@ -27,17 +28,19 @@
#include <curand_kernel.h>
#include "../../XDevice.h"
#include "../../XUtility.h"
#include "ConvertDataType.h"
namespace nts { // namespace nts(NiuTrans.Tensor)
/*
set an integer data array with a fixed value p (in int)
/*
set an data array with a fixed value p (in int, float, float16, double)
>> d - pointer to the data array
>> size - size of the array
>> p - the initial value
*/
__global__
void KernelSetDataFixedInt(int * d, int size, int p)
template<class T>
__global__
void KernelSetDataFixed(T * d, int size, T p)
{
int i = blockDim.x * blockIdx.x + threadIdx.x;
......@@ -45,15 +48,14 @@ void KernelSetDataFixedInt(int * d, int size, int p)
d[i] = p;
}
/*
generate data items with a fixed value p (in int)
/*
generate data items with a fixed value p (in int, float, float16, double)
>> tensor - the tensor for initialization
>> p - the initial value
*/
void _CudaSetDataFixedInt(XTensor * tensor, int p)
template<class T>
void _CudaSetDataFixed(XTensor * tensor, T p)
{
CheckNTErrors(tensor->dataType == X_INT, "the tensor must be in X_INT!");
int gridSize[3];
int blockSize[3];
......@@ -65,34 +67,59 @@ void _CudaSetDataFixedInt(XTensor * tensor, int p)
int devIDBackup;
ProtectCudaDev(tensor->devID, devIDBackup);
KernelSetDataFixedInt <<<blocks, threads >>>((int*)tensor->data, tensor->unitNum, p);
if (tensor->dataType == X_INT){
KernelSetDataFixed<<<blocks, threads>>>((int*)tensor->data, tensor->unitNum, (int)p);
}
else if (tensor->dataType == X_FLOAT){
KernelSetDataFixed<<<blocks, threads>>>((DTYPE*)tensor->data, tensor->unitNum, (float)p);
}
else if (tensor->dataType == X_DOUBLE){
KernelSetDataFixed<<<blocks, threads>>>((double*)tensor->data, tensor->unitNum, (double)p);
}
else if (tensor->dataType == X_FLOAT16){
half p1 = __float2half(p);
KernelSetDataFixed<<<blocks, threads>>>((__half*)tensor->data, tensor->unitNum, p1);
}
else
ShowNTErrors("TODO");
BacktoCudaDev(tensor->devID, devIDBackup);
}
/*
set a float data array with a fixed value p (in int)
>> d - pointer to the data array
template void _CudaSetDataFixed<int>(XTensor*, int);
template void _CudaSetDataFixed<float>(XTensor*, float);
template void _CudaSetDataFixed<double>(XTensor*, double);
//__device__
//template void _CudaSetDataFixed<half>(XTensor*, half);
/*
set data array with a uniform distribution in [low, high]
>> deviceStates - the state of curand
>> d - float, float16, double datatype pointer to the data array
>> size - size of the array
>> p - the initial value
>> lower - low value of the range
>> variance - the variance of the range
*/
__global__
void KernelSetDataFixedFloat(float * d, int size, float p)
template<class T>
__global__
void KernelSetDataRand(T * d, int size, T lower, T variance)
{
int i = blockDim.x * blockIdx.x + threadIdx.x;
if (i < size)
d[i] = p;
if (i < size) {
d[i] = d[i] * variance + lower;
}
}
/*
generate data items with a fixed value p (in float)
>> tensor - the tensor for initialization
>> p - the initial value
/*
generate data items with a uniform distribution in [lower, upper]
>> tensor - the tensor whose data array would be initialized
>> lower - lower value of the range
>> upper - upper value of the range
*/
void _CudaSetDataFixedFloat(XTensor * tensor, float p)
void _CudaSetDataRand(XTensor * tensor, DTYPE lower, DTYPE upper)
{
CheckNTErrors(tensor->dataType == X_FLOAT, "the tensor must be in X_FLOAT!");
CheckNTErrors(upper > lower, "the high value must be greater than low value!");
int gridSize[3];
int blockSize[3];
......@@ -105,34 +132,69 @@ void _CudaSetDataFixedFloat(XTensor * tensor, float p)
int devIDBackup;
ProtectCudaDev(tensor->devID, devIDBackup);
KernelSetDataFixedFloat <<<blocks, threads >>>((float*)tensor->data, tensor->unitNum, p);
XTensor tensor1(tensor->order, tensor->dimSize, X_FLOAT, tensor->denseRatio, tensor->devID, tensor->mem);
if (tensor->dataType == X_FLOAT || tensor->dataType == X_DOUBLE){
curandGenerator_t & gen = GDevs.GPUs[tensor->devID].gen;
curandGenerateUniform(gen, (float*)tensor->data, tensor->unitNum);
}
else {
curandGenerator_t & gen = GDevs.GPUs[tensor->devID].gen;
curandGenerateUniform(gen, (float*)tensor1.data, tensor1.unitNum);
}
DTYPE variance = upper - lower;
if (tensor->dataType == X_FLOAT){
KernelSetDataRand<<<blocks, threads>>>((DTYPE*)tensor->data, tensor->unitNum, lower, variance);
}
else if (tensor->dataType == X_FLOAT16){
_ConvertDataType(&tensor1, tensor);
half lower1 = __float2half(lower);
half variance1 = __float2half(variance);
KernelSetDataRand<<<blocks, threads>>>((__half*)tensor->data, tensor->unitNum, lower1, variance1);
}
else {
ShowNTErrors("TODO");
}
BacktoCudaDev(tensor->devID, devIDBackup);
}
/*
set a double data array with a fixed value p (in int)
/*
set data items to a pre-defined value if its value >= p, set it to 0 otherwise
>> d - pointer to the data array
>> size - size of the array
>> p - the initial value
>> lower - low value of the range
>> variance - the variance of the range
*/
__global__
void KernelSetDataFixedDouble(double * d, int size, double p)
template<class T>
__global__
void KernelSetDataPCut(T * d, int size, T p, T value)
{
int i = blockDim.x * blockIdx.x + threadIdx.x;
if (i < size)
d[i] = p;
if (i < size) {
if (d[i] >= p)
d[i] = value;
else
d[i] = 0;
}
}
/*
generate data items with a fixed value p (in double)
>> tensor - the tensor for initialization
>> p - the initial value
/*
generate data items with a uniform distribution in [lower, upper] and set
the item to a pre-defined value if the item >= p, set the item to 0 otherwise
>> tensor - the tensor whose data array would be initialized
>> lower - lower value of the range
>> upper - upper value of the range
>> p - the threshold
>> value - the value we intend to assign to the item
*/
void _CudaSetDataFixedDouble(XTensor * tensor, double p)
void _CudaSetDataRandP(XTensor * tensor, DTYPE lower, DTYPE upper, DTYPE p, DTYPE value)
{
CheckNTErrors(tensor->dataType == X_DOUBLE, "the tensor must be in X_DOUBLE!");
_CudaSetDataRand(tensor, lower, upper);
int gridSize[3];
int blockSize[3];
......@@ -145,67 +207,19 @@ void _CudaSetDataFixedDouble(XTensor * tensor, double p)
int devIDBackup;
ProtectCudaDev(tensor->devID, devIDBackup);
KernelSetDataFixedDouble <<<blocks, threads >>>((double*)tensor->data, tensor->unitNum, p);
BacktoCudaDev(tensor->devID, devIDBackup);
}
/*
set data array with a uniform distribution in [low, high]
>> deviceStates - the state of curand
>> d - float datatype pointer to the data array
>> size - size of the array
>> lower - low value of the range
>> variance - the variance of the range
*/
__global__
void KernelSetDataRandFloat(float * d, int size, DTYPE lower, DTYPE variance)
{
int i = blockDim.x * blockIdx.x + threadIdx.x;
if (i < size) {
d[i] = d[i] * variance + lower;
if (tensor->dataType == X_FLOAT) {
KernelSetDataPCut<<<blocks, threads>>>((DTYPE*)tensor->data, tensor->unitNum, p, value);
}
}
/*
set data array with a uniform distribution in [low, high]
>> deviceStates - the state of curand
>> d - double datatype pointer to the data array
>> size - size of the array
>> lower - low value of the range
>> variance - the variance of the range
*/
__global__
void KernelSetDataRandDouble(double * d, int size, DTYPE lower, DTYPE variance)
{
int i = blockDim.x * blockIdx.x + threadIdx.x;
if (i < size){
d[i] = d[i] * variance + lower;
else if (tensor->dataType == X_FLOAT16) {
half p1 = __float2half(p);
half value1 = __float2half(value);
KernelSetDataPCut<<<blocks, threads>>>((__half*)tensor->data, tensor->unitNum, p1, value1);
}
}
/*
set data items to a pre-defined value if its value >= p, set it to 0 otherwise
>> d - pointer to the data array
>> size - size of the array
>> lower - low value of the range
>> variance - the variance of the range
*/
__global__
void KernelSetDataPCut(DTYPE * d, int size, DTYPE p, DTYPE value)
{
int i = blockDim.x * blockIdx.x + threadIdx.x;
if (i < size) {
if (d[i] >= p)
d[i] = value;
else
d[i] = 0;
}
BacktoCudaDev(tensor->devID, devIDBackup);
}
/*
/*
set data items along with a given dimension (and keep the remaining items unchanged) - kernel version
>> tensor - the tensor whose data array would be initialized
>> beg - the beginning position
......@@ -213,8 +227,9 @@ set data items along with a given dimension (and keep the remaining items unchan
>> blockSize - size of a data block
>> blockNum - number of data blocks
*/
template<class T>
__global__
void KernelSetDataDim(DTYPE * d, int beg, int len, int blockSize, int blockNum, DTYPE p)
void KernelSetDataDim(T * d, int beg, int len, int blockSize, int blockNum, T p)
{
/* offset in each block */
int i = blockDim.x * blockIdx.x + threadIdx.x;
......@@ -222,10 +237,10 @@ void KernelSetDataDim(DTYPE * d, int beg, int len, int blockSize, int blockNum,
/* block id */
int j = blockDim.y * blockIdx.y + threadIdx.y;
if(i >= blockSize || j > blockNum)
if (i >= blockSize || j > blockNum)
return;
if(i < beg || i >= beg + len)
if (i < beg || i >= beg + len)
return;
d[blockSize * j + i] = p;
......@@ -251,15 +266,14 @@ void _CudaSetDataDim(XTensor * tensor, int beg, int len, int dim, DTYPE p)
{
int n = tensor->order;
CheckNTErrors(tensor->dataType == DEFAULT_DTYPE, "TODO!");
CheckNTErrors(dim < n && dim >= 0, "Illegal dimension!");
CheckNTErrors(beg >= 0 && beg < tensor->GetDim(dim), "Illegal beginning position!");
CheckNTErrors(beg + len >= 0 && beg + len < tensor->GetDim(dim), "Illegal length!");
int stride = 1;
int blockSize = 1;
int blockNum = 1;
for(int i = n - 1; i > dim; i--){
int blockNum = 1;
for (int i = n - 1; i > dim; i--) {
stride *= tensor->GetDim(i);
}
blockSize = stride * tensor->GetDim(dim);
......@@ -276,8 +290,15 @@ void _CudaSetDataDim(XTensor * tensor, int beg, int len, int dim, DTYPE p)
int devIDBackup;
ProtectCudaDev(tensor->devID, devIDBackup);
KernelSetDataDim<<<blocks, threads >>>((DTYPE*)tensor->data, beg * stride,
len * stride, blockSize, blockNum, p);
if (tensor->dataType == X_FLOAT){
KernelSetDataDim<<<blocks, threads>>>((DTYPE*)tensor->data, beg * stride,
len * stride, blockSize, blockNum, p);
}
else if (tensor->dataType == X_FLOAT16){
half p1 = __float2half(p);
KernelSetDataDim<<<blocks, threads>>>((__half*)tensor->data, beg * stride,
len * stride, blockSize, blockNum, p1);
}
BacktoCudaDev(tensor->devID, devIDBackup);
}
......@@ -292,16 +313,17 @@ modify data items along with a given index and dimension
>> blockSize - size of a data block
>> stride - stride of a data block
*/
template<class T>
__global__
void KernelSetDataIndexed(DTYPE * s, DTYPE * m, int blockNum, int blockSize, int stride)
void KernelSetDataIndexed(T * s, T * m, int blockNum, int blockSize, int stride)
{
/* offset in each block */
int i = blockDim.x * blockIdx.x + threadIdx.x;
/* block id */
int j = blockDim.y * blockIdx.y + threadIdx.y;
if(i >= stride || j >= blockNum)
if (i >= stride || j >= blockNum)
return;
int x = blockSize * j + i;
......@@ -332,7 +354,6 @@ void _CudaSetDataIndexed(XTensor * source, XTensor * modify, int dim, int index)
int order = source->order;
int size = source->GetDim(dim);
CheckNTErrors(source->dataType == DEFAULT_DTYPE, "TODO!");
CheckNTErrors(dim >= 0 && dim < order, "Illegal dimension!");
CheckNTErrors(index >= 0 && index < size, "Illegal index!");
......@@ -358,8 +379,14 @@ void _CudaSetDataIndexed(XTensor * source, XTensor * modify, int dim, int index)
int devIDBackup;
ProtectCudaDev(source->devID, devIDBackup);
KernelSetDataIndexed<<<blocks, threads >>>((DTYPE*)source->data + index * stride, (DTYPE*)modify->data,
blockNum, blockSize, stride);
if (source->dataType == X_FLOAT){
KernelSetDataIndexed<<<blocks, threads>>>((DTYPE*)source->data + index * stride, (DTYPE*)modify->data,
blockNum, blockSize, stride);
}
else if (source->dataType == X_FLOAT16){
KernelSetDataIndexed<<<blocks, threads>>>((__half*)source->data + index * stride, (__half*)modify->data,
blockNum, blockSize, stride);
}
BacktoCudaDev(source->devID, devIDBackup);
}
......@@ -452,71 +479,6 @@ void _CudaSetDataLowTri(XTensor * tensor, DTYPE p, int shift)
}
/*
generate data items with a uniform distribution in [lower, upper]
>> tensor - the tensor whose data array would be initialized
>> lower - lower value of the range
>> upper - upper value of the range
*/
void _CudaSetDataRand(const XTensor * tensor, DTYPE lower, DTYPE upper)
{
CheckNTErrors(upper > lower, "the high value must be greater than low value!");
int gridSize[3];
int blockSize[3];
GDevs.GetCudaThread(tensor->devID, tensor->unitNum, gridSize, blockSize);
dim3 blocks(gridSize[0]);
dim3 threads(blockSize[0]);
int devIDBackup;
ProtectCudaDev(tensor->devID, devIDBackup);
curandGenerator_t & gen = GDevs.GPUs[tensor->devID].gen;
curandGenerateUniform(gen , (float*)tensor->data , tensor->unitNum);
DTYPE variance = upper - lower;
if(variance != 1.0F || lower != 0){
if (tensor->dataType == X_FLOAT)
KernelSetDataRandFloat <<<blocks, threads >>>((float*) tensor->data, tensor->unitNum, lower, variance);
else if (tensor->dataType == X_DOUBLE)
KernelSetDataRandDouble <<<blocks, threads >>>((double*)tensor->data, tensor->unitNum, lower, variance);
}
BacktoCudaDev(tensor->devID, devIDBackup);
}
/*
generate data items with a uniform distribution in [lower, upper] and set
the item to a pre-defined value if the item >= p, set the item to 0 otherwise
>> tensor - the tensor whose data array would be initialized
>> lower - lower value of the range
>> upper - upper value of the range
>> p - the threshold
>> value - the value we intend to assign to the item
*/
void _CudaSetDataRandP(const XTensor * tensor, DTYPE lower, DTYPE upper, DTYPE p, DTYPE value)
{
_CudaSetDataRand(tensor, lower, upper);
int gridSize[3];
int blockSize[3];
GDevs.GetCudaThread(tensor->devID, tensor->unitNum, gridSize, blockSize);
dim3 blocks(gridSize[0]);
dim3 threads(blockSize[0]);
int devIDBackup;
ProtectCudaDev(tensor->devID, devIDBackup);
KernelSetDataPCut << <blocks, threads >> >((float*)tensor->data, tensor->unitNum, p, value);
BacktoCudaDev(tensor->devID, devIDBackup);
}
/*
set the data with an array of offsets (kernel version)
>> data - pointer to the data array
>> offsets - offset for each data item
......
......@@ -19,6 +19,7 @@
/*
* $Created by: XIAO Tong (email: xiaotong@mail.neu.edu.cn) 2018-07-18
* I'm surprised that I did not write this file till today.
* $Update by: Lin Ye (email: linye2015@outlook.com) 2019-07-22 float16 added
*/
#ifndef __SETDATA_CUH__
......@@ -28,14 +29,9 @@
namespace nts { // namespace nts(NiuTrans.Tensor)
/* generate data items with a fixed value p (in int) */
void _CudaSetDataFixedInt(XTensor * tensor, int p);
/* generate data items with a fixed value p (in float) */
void _CudaSetDataFixedFloat(XTensor * tensor, float p);
/* generate data items with a fixed value p (in double) */
void _CudaSetDataFixedDouble(XTensor * tensor, double p);
/* generate data items with a fixed value p (in int, float, float16, double) */
template<class T>
void _CudaSetDataFixed(XTensor * tensor, T p);
/* set data items along with a given dimension (and keep the remaining items unchanged) */
void _CudaSetDataDim(XTensor * tensor, int beg, int len, int dim, DTYPE p);
......@@ -47,11 +43,11 @@ void _CudaSetDataIndexed(XTensor * source, XTensor * modify, int dim, int index)
void _CudaSetDataLowTri(XTensor * tensor, DTYPE p, int shift);
/* generate data items with a uniform distribution in [lower, upper] */
void _CudaSetDataRand(const XTensor * tensor, DTYPE lower, DTYPE upper);
void _CudaSetDataRand(XTensor * tensor, DTYPE lower, DTYPE upper);
/* generate data items with a uniform distribution in [lower, upper] and set
the item to a pre-defined value if the item >= p, set the item to 0 otherwise */
void _CudaSetDataRandP(const XTensor * tensor, DTYPE lower, DTYPE upper, DTYPE p, DTYPE value);
void _CudaSetDataRandP(XTensor * tensor, DTYPE lower, DTYPE upper, DTYPE p, DTYPE value);
/* set the data with an array of offsets */
void _CudaSetDataWithOffset(XTensor * tensor, MTYPE * offsets, DTYPE value, MTYPE num);
......
......@@ -24,29 +24,19 @@
#define __SETDATA_H__
#include "../../XTensor.h"
#include "SetData.cuh"
namespace nts { // namespace nts(NiuTrans.Tensor)
/* generate data items with a xavier initialization */
void _SetDataFanInOut(XTensor * tensor, DTYPE gain = 1.0F);
/* generate data items with a fixed value p */
void _SetDataFixed(XTensor * tensor, void * valuePointer);
///* generate data items with a fixed value p */
//void _SetDataFixed(XTensor * tensor, void * valuePointer);
/* generate data items with a fixed value p (in default type) */
void SetDataFixed(XTensor &tensor, DTYPE p);
/* generate data items with a fixed value p (in integer) */
void SetDataFixedInt(XTensor &tensor, int p);
/* generate data items with a fixed value p (in int) */
void _SetDataFixedInt(XTensor * tensor, int p);
/* generate data items with a fixed value p (in float) */
void _SetDataFixedFloat(XTensor * tensor, float p);
/* generate data items with a fixed value p (in double) */
void _SetDataFixedDouble(XTensor * tensor, double p);
template<class T>
void _SetDataFixed(XTensor * tensor, T value);
/* set data items along with a given dimension (and keep the remaining items unchanged) */
void _SetDataDim(XTensor * tensor, int beg, int len, int dim, DTYPE p);
......@@ -58,11 +48,11 @@ void _SetDataIndexed(XTensor * source, XTensor * modify, int dim, int index);
void _SetDataLowTri(XTensor * tensor, DTYPE p, int shift);
/* generate data items with a uniform distribution in [lower, upper] */
void _SetDataRand(const XTensor * tensor, DTYPE lower, DTYPE upper);
void _SetDataRand(XTensor * tensor, DTYPE lower, DTYPE upper);
/* generate data items with a uniform distribution in [lower, upper] and set
the item to a pre-defined value if the item >= p, set the item to 0 otherwise */
void _SetDataRandP(const XTensor * tensor, DTYPE lower, DTYPE upper, DTYPE p, DTYPE value);
void _SetDataRandP(XTensor * tensor, DTYPE lower, DTYPE upper, DTYPE p, DTYPE value);
/* generate data items with a normal distribution with specified mean and standard deviation */
void _SetDataRandN(XTensor * tensor, DTYPE mean = 0.0F, DTYPE standardDeviation = 1.0F);
......
......@@ -70,7 +70,7 @@ XTensor DropoutWithIndex(const XTensor &x, XTensor &maskIndex, DTYPE scale)
InitTensor1D(&c, x.unitNum, x.dataType, x.devID, x.mem);
_SetDataFixedFloat(&c, 1.0F);
_SetDataFixed(&c, 1.0F);
_DropoutWithIndex(&x, &maskIndex, &c);
......
......@@ -385,11 +385,11 @@ void _LossBackward(XTensor * dedy, XTensor * t, XTensor * y,
{
if(t == NULL){
if(dedy->dataType == X_FLOAT)
_SetDataFixedFloat(dedy, 1.0F);
_SetDataFixed(dedy, 1.0F);
else if(dedy->dataType == X_DOUBLE)
_SetDataFixedDouble(dedy, 1.0);
_SetDataFixed(dedy, 1.0);
else if(dedy->dataType == X_INT)
_SetDataFixedInt(dedy, 1);
_SetDataFixed(dedy, 1);
else{
ShowNTErrors("TODO");
}
......
......@@ -50,7 +50,7 @@ bool TestDropout1()
XTensor yUser;
/* initialize variables */
_SetDataFixedFloat(x, 1.0F);
_SetDataFixed(x, 1.0F);
y->SetZeroAll();
/* call Dropout function */
......@@ -88,7 +88,7 @@ bool TestDropout1()
XTensor yUserGPU;
/* initialize variables */
_SetDataFixedFloat(xGPU, 1.0F);
_SetDataFixed(xGPU, 1.0F);
yGPU->SetZeroAll();
/* call Dropout function */
......@@ -157,10 +157,10 @@ bool TestDropout2()
XTensor * dedy = NewTensor(order, dimSize);
/* initialize variables */
_SetDataFixedFloat(x, 1.0F);
_SetDataFixed(x, 1.0F);
y->SetZeroAll();
dedx->SetZeroAll();
_SetDataFixedFloat(dedy, 1.5F);
_SetDataFixed(dedy, 1.5F);
/* call Dropout function */
float dropProb = 0.5F;
......@@ -183,10 +183,10 @@ bool TestDropout2()
XTensor * dedyGPU = NewTensor(order, dimSize, X_FLOAT, 1.0F, 0);
/* initialize variables */
_SetDataFixedFloat(xGPU, 1.0F);
_SetDataFixed(xGPU, 1.0F);
yGPU->SetZeroAll();
dedxGPU->SetZeroAll();
_SetDataFixedFloat(dedyGPU, 1.5F);
_SetDataFixed(dedyGPU, 1.5F);
/* call Dropout function */
_Dropout(xGPU, yGPU, seed, dropProb);
......
......@@ -196,8 +196,8 @@ bool TestReduceSum2()
XTensor tUser;
/* initialize variables */
_SetDataFixedFloat(s, 1.0F);
_SetDataFixedFloat(answer, (float)s->GetDim(1));
_SetDataFixed(s, 1.0F);
_SetDataFixed(answer, (float)s->GetDim(1));
/* call ReduceSum function */
_ReduceSum(s, t, 1);
......@@ -216,7 +216,7 @@ bool TestReduceSum2()
XTensor tUserGPU;
/* initialize variables */
_SetDataFixedFloat(sGPU, 1.0F);
_SetDataFixed(sGPU, 1.0F);
/* call ReduceSum function */
_ReduceSum(sGPU, tGPU, 1);
......@@ -285,8 +285,8 @@ bool TestReduceSum3()
XTensor tUser;
/* initialize variables */
_SetDataFixedFloat(s, 1.0F);
_SetDataFixedFloat(answer, (float)s->GetDim(1));
_SetDataFixed(s, 1.0F);
_SetDataFixed(answer, (float)s->GetDim(1));
/* call ReduceSum function */
_ReduceSum(s, t, 1);
......@@ -305,7 +305,7 @@ bool TestReduceSum3()
XTensor tUserGPU;
/* initialize variables */
_SetDataFixedFloat(sGPU, 1.0F);
_SetDataFixed(sGPU, 1.0F);
/* call ReduceSum function */
_ReduceSum(sGPU, tGPU, 1);
......@@ -374,8 +374,8 @@ bool TestReduceSum4()
XTensor tUser;
/* initialize variables */
_SetDataFixedFloat(s, 1.0F);
_SetDataFixedFloat(answer, (float)s->GetDim(1));
_SetDataFixed(s, 1.0F);
_SetDataFixed(answer, (float)s->GetDim(1));
/* call ReduceSum function */
_ReduceSum(s, t, 1);
......@@ -394,7 +394,7 @@ bool TestReduceSum4()
XTensor tUserGPU;
/* initialize variables */
_SetDataFixedFloat(sGPU, 1.0F);
_SetDataFixed(sGPU, 1.0F);
/* call ReduceSum function */
_ReduceSum(sGPU, tGPU, 1);
......@@ -465,8 +465,8 @@ bool TestReduceSum5()
XTensor tUser;
/* initialize variables */
_SetDataFixedFloat(s, 1.0F);
_SetDataFixedFloat(answer, (float)s->GetDim(1));
_SetDataFixed(s, 1.0F);
_SetDataFixed(answer, (float)s->GetDim(1));
/* call ReduceSum function */
_ReduceSum(s, t, 1);
......@@ -485,7 +485,7 @@ bool TestReduceSum5()
XTensor tUserGPU;
/* initialize variables */
_SetDataFixedFloat(sGPU, 1.0F);
_SetDataFixed(sGPU, 1.0F);
/* call ReduceSum function */
_ReduceSum(sGPU, tGPU, 1);
......@@ -556,8 +556,8 @@ bool TestReduceSum6()
XTensor tUser;
/* initialize variables */
_SetDataFixedFloat(s, 1.0F);
_SetDataFixedFloat(answer, (float)s->GetDim(1));
_SetDataFixed(s, 1.0F);
_SetDataFixed(answer, (float)s->GetDim(1));
/* call ReduceSum function */
_ReduceSum(s, t, 1);
......@@ -576,7 +576,7 @@ bool TestReduceSum6()
XTensor tUserGPU;
/* initialize variables */
_SetDataFixedFloat(sGPU, 1.0F);
_SetDataFixed(sGPU, 1.0F);
/* call ReduceSum function */
_ReduceSum(sGPU, tGPU, 1);
......
/* NiuTrans.Tensor - an open-source tensor library
/* NiuTrans.Tensor - an open-source tensor library
* Copyright (C) 2017, Natural Language Processing Lab, Northestern University.
* All rights reserved.
*
......@@ -17,10 +17,12 @@
/*
* $Created by: Xu Chen (email: hello_master1954@163.com) 2018-07-06
* $Update by: Lin Ye (email: linye2015@outlook.com) 2019-07-22 float16 added
*/
#include "TSetData.h"
#include "../core/getandset/SetData.h"
#include "../core/getandset/ConvertDataType.h"
namespace nts { // namespace nts(NiuTrans.Tensor)
......@@ -118,7 +120,7 @@ bool TestSetData2()
XTensor * modify = NewTensor(dataOrder, dataDimSize);
/* Initialize variables */
_SetDataFixedFloat(s, 1.0F);
_SetDataFixed(s, 1.0F);
modify->SetData(data, dataUnitNum);
/* call SetDataIndexed function */
......@@ -136,7 +138,7 @@ bool TestSetData2()
XTensor * modifyGPU = NewTensor(dataOrder, dataDimSize, X_FLOAT, 1.0F, 0);
/* Initialize variables */
_SetDataFixedFloat(sGPU, 1.0F);
_SetDataFixed(sGPU, 1.0);
modifyGPU->SetData(data, dataUnitNum);
/* call SetDataIndexed function */
......@@ -211,11 +213,11 @@ bool TestSetData3()
XTensor * modify = NewTensor(dataOrder, dataDimSize);
/* Initialize variables */
_SetDataFixedFloat(s, 1.0F);
_SetDataFixed(s, 1.0);
modify->SetData(data, dataUnitNum);
/* call SetDataIndexed function */
_SetDataFixedFloat(s, 1.0F);
_SetDataFixed(s, 1.0);
_SetDataIndexed(s, modify, 1, 1);
/* check results */
......@@ -230,7 +232,7 @@ bool TestSetData3()
XTensor * modifyGPU = NewTensor(dataOrder, dataDimSize, X_FLOAT, 1.0F, 0);
/* Initialize variables */
_SetDataFixedFloat(sGPU, 1.0F);
_SetDataFixed(sGPU, 1.0);
modifyGPU->SetData(data, dataUnitNum);
/* call SetDataIndexed function */
......@@ -406,6 +408,427 @@ bool TestSetData5()
#endif // USE_CUDA
}
/*
case 6: float16 test SetDataRand function.
set the tensor items by a uniform distribution in range [lower, upper].
*/
bool TestSetData6()
{
/* a input tensor of size (2, 4) */
int sOrder = 2;
int * sDimSize = new int[sOrder];
sDimSize[0] = 2;
sDimSize[1] = 4;
int sUnitNum = 1;
for (int i = 0; i < sOrder; i++)
sUnitNum *= sDimSize[i];
DTYPE answer[2][4] = {0};
/* CPU test */
bool cpuTest = true;
#ifdef USE_CUDA
/* GPU test */
bool gpuTest = true;
/* create tensors */
XTensor * sGPU = NewTensor(sOrder, sDimSize, X_FLOAT, 1.0F, 0);
/* create float16 tensors */
XTensor sHalfGPU;
/* convert data type from float to float16 */
sHalfGPU = ConvertDataType(*sGPU, X_FLOAT16);
/* call setdatarand function */
_SetDataRand(&sHalfGPU, 0.0, 1.0);
/* convert data type from float16 to float */
_ConvertDataType(&sHalfGPU, sGPU);
/* check results */
gpuTest = sGPU->CheckData(answer, sUnitNum, 1.0F);
/* destroy variables */
delete sGPU;
delete[] sDimSize;
return cpuTest && gpuTest;
#else
/* destroy variables */
delete[] sDimSize;
return cpuTest;
#endif // USE_CUDA
}
/*
case 7: float16 test SetDataRandP function.
first set the tensor items by a uniform distribution in range [lower, upper].
then set the item to a pre-defined value if the item >= p, set the item to 0 otherwise
*/
bool TestSetData7()
{
/* a input tensor of size (2, 4) */
int sOrder = 2;
int * sDimSize = new int[sOrder];
sDimSize[0] = 2;
sDimSize[1] = 4;
int sUnitNum = 1;
for (int i = 0; i < sOrder; i++)
sUnitNum *= sDimSize[i];
DTYPE answer[2][4] = {0};
/* CPU test */
bool cpuTest = true;
#ifdef USE_CUDA
/* GPU test */
bool gpuTest = true;
/* create tensors */
XTensor * sGPU = NewTensor(sOrder, sDimSize, X_FLOAT, 1.0F, 0);
/* create float16 tensors */
XTensor sHalfGPU;
/* convert data type from float to float16 */
sHalfGPU = ConvertDataType(*sGPU, X_FLOAT16);
/* call setdatarandp function */
_SetDataRandP(&sHalfGPU, 0.0, 1.0, 0.5, 1.0);
/* convert data type from float16 to float */
_ConvertDataType(&sHalfGPU, sGPU);
/* check results */
gpuTest = sGPU->CheckData(answer, sUnitNum, 1.1F);
/* destroy variables */
delete sGPU;
delete[] sDimSize;
return cpuTest && gpuTest;
#else
/* destroy variables */
delete[] sDimSize;
return cpuTest;
#endif // USE_CUDA
}
/*
case 8: float16 test SetDataIndexed function.
modify data items along with a given dimension.
*/
bool TestSetData8()
{
/* a input tensor of size (2, 4) */
int sOrder = 2;
int * sDimSize = new int[sOrder];
sDimSize[0] = 2;
sDimSize[1] = 4;
int sUnitNum = 1;
for (int i = 0; i < sOrder; i++)
sUnitNum *= sDimSize[i];
/* a data tensor of size (4) for GPU test */
int dataOrder = 1;
int * dataDimSize = new int[dataOrder];
dataDimSize[0] = 4;
int dataUnitNum = 1;
for (int i = 0; i < dataOrder; i++)
dataUnitNum *= dataDimSize[i];
DTYPE data[4] = {0.0F, 1.0F, 2.0F, 3.0F};
DTYPE answer[2][4] = { {1.0F, 1.0F, 1.0F, 1.0F},
{0.0F, 1.0F, 2.0F, 3.0F} };
/* CPU test */
bool cpuTest = true;
#ifdef USE_CUDA
/* GPU test */
bool gpuTest = true;
/* create tensors */
XTensor * sGPU = NewTensor(sOrder, sDimSize, X_FLOAT, 1.0F, 0);
XTensor * modifyGPU = NewTensor(dataOrder, dataDimSize, X_FLOAT, 1.0F, 0);
/* create float16 tensors */
XTensor sHalfGPU;
XTensor modifyHalfGPU;
/* Initialize modifyGPU */
modifyGPU->SetData(data, dataUnitNum);
/* convert data type from float to float16 */
sHalfGPU = ConvertDataType(*sGPU, X_FLOAT16);
modifyHalfGPU = ConvertDataType(*modifyGPU, X_FLOAT16);
/* Initialize sHalfGPU */
_SetDataFixed(&sHalfGPU, 1.0);
/* call setdataindexed function */
_SetDataIndexed(&sHalfGPU, &modifyHalfGPU, 0, 1);
/* convert data type from float16 to float */
_ConvertDataType(&sHalfGPU, sGPU);
/* check results */
gpuTest = sGPU->CheckData(answer, sUnitNum, 1e-5F);
/* destroy variables */
delete sGPU;
delete modifyGPU;
delete[] sDimSize;
delete[] dataDimSize;
return cpuTest && gpuTest;
#else
/* destroy variables */
delete[] sDimSize;
delete[] dataDimSize;
return cpuTest;
#endif // USE_CUDA
}
/*
case 9: float16 test SetDataIndexed function.
modify data items along with a given dimension.
*/
bool TestSetData9()
{
/* a input tensor of size (2, 4, 3) */
int sOrder = 3;
int * sDimSize = new int[sOrder];
sDimSize[0] = 2;
sDimSize[1] = 4;
sDimSize[2] = 3;
int sUnitNum = 1;
for (int i = 0; i < sOrder; i++)
sUnitNum *= sDimSize[i];
/* a data tensor of size (2, 3) for GPU test */
int dataOrder = 2;
int * dataDimSize = new int[dataOrder];
dataDimSize[0] = 2;
dataDimSize[1] = 3;
int dataUnitNum = 1;
for (int i = 0; i < dataOrder; i++)
dataUnitNum *= dataDimSize[i];
DTYPE data[2][3] = { { 0.0F, 1.0F, 2.0F },
{ 3.0F, 4.0F, 5.0F } };
DTYPE answer[2][4][3] = { { {1.0F, 1.0F, 1.0F},
{0.0F, 1.0F, 2.0F},
{1.0F, 1.0F, 1.0F},
{1.0F, 1.0F, 1.0F} },
{ {1.0F, 1.0F, 1.0F},
{3.0F, 4.0F, 5.0F},
{1.0F, 1.0F, 1.0F},
{1.0F, 1.0F, 1.0F} } };
/* CPU test */
bool cpuTest = true;
#ifdef USE_CUDA
/* GPU test */
bool gpuTest = true;
/* create tensors */
XTensor * sGPU = NewTensor(sOrder, sDimSize, X_FLOAT, 1.0F, 0);
XTensor * modifyGPU = NewTensor(dataOrder, dataDimSize, X_FLOAT, 1.0F, 0);
/* create float16 tensors */
XTensor sHalfGPU;
XTensor modifyHalfGPU;
/* Initialize modifyGPU */
modifyGPU->SetData(data, dataUnitNum);
/* convert data type from float to float16 */
sHalfGPU = ConvertDataType(*sGPU, X_FLOAT16);
modifyHalfGPU = ConvertDataType(*modifyGPU, X_FLOAT16);
/* Initialize sHalfGPU */
_SetDataFixed(&sHalfGPU, 1.0);
/* call setdataindexed function */
_SetDataIndexed(&sHalfGPU, &modifyHalfGPU, 1, 1);
/* convert data type from float16 to float */
_ConvertDataType(&sHalfGPU, sGPU);
/* check results */
gpuTest = sGPU->CheckData(answer, sUnitNum, 1e-5F);
/* destroy variables */
delete sGPU;
delete modifyGPU;
delete[] sDimSize;
delete[] dataDimSize;
return cpuTest && gpuTest;
#else
/* destroy variables */
delete[] sDimSize;
delete[] dataDimSize;
return cpuTest;
#endif // USE_CUDA
}
/*
case 10: float16 test SetDataDim function.
set data items along with a given dimension (and keep the remaining items unchanged)
*/
bool TestSetData10()
{
/* a input tensor of size (3, 3) */
int order = 2;
int * dimSize = new int[order];
dimSize[0] = 3;
dimSize[1] = 3;
int unitNum = 1;
for (int i = 0; i < order; i++)
unitNum *= dimSize[i];
DTYPE sData[3][3] = { {1.0F, 2.0F, 3.0F},
{4.0F, 5.0F, 6.0F},
{7.0F, 8.0F, 9.0F} };
DTYPE answer[3][3] = { {1.0F, 2.0F, 3.0F},
{0.0F, 0.0F, 0.0F},
{7.0F, 8.0F, 9.0F} };
/* CPU test */
bool cpuTest = true;
#ifdef USE_CUDA
/* GPU test */
bool gpuTest = true;
/* create tensors */
XTensor * sGPU = NewTensor(order, dimSize, X_FLOAT, 1.0F, 0);
/* create float16 tensors */
XTensor sHalfGPU;
/* initialize variables */
sGPU->SetData(sData, unitNum);
/* convert data type from float to float16 */
sHalfGPU = ConvertDataType(*sGPU, X_FLOAT16);
/* call _setdatadim function */
_SetDataDim(&sHalfGPU, 1, 1, 0, 0);
/* convert data type from float16 to float */
_ConvertDataType(&sHalfGPU, sGPU);
/* check results */
gpuTest = sGPU->CheckData(answer, unitNum, 1e-4F);
/* destroy variables */
delete sGPU;
delete[] dimSize;
return cpuTest && gpuTest;
#else
/* destroy variables */
delete[] dimSize;
return cpuTest;
#endif // USE_CUDA
}
/*
case 11: float16 test SetDataDim function.
set data items along with a given dimension (and keep the remaining items unchanged)
*/
bool TestSetData11()
{
/* a input tensor of size (2, 4, 3) */
int order = 3;
int * dimSize = new int[order];
dimSize[0] = 2;
dimSize[1] = 4;
dimSize[2] = 3;
int unitNum = 1;
for (int i = 0; i < order; i++)
unitNum *= dimSize[i];
DTYPE data[2][4][3] = { { {1.0F, 1.0F, 1.0F},
{0.0F, 1.0F, 2.0F},
{1.0F, 1.0F, 1.0F},
{1.0F, 1.0F, 1.0F} },
{ {1.0F, 1.0F, 1.0F},
{3.0F, 4.0F, 5.0F},
{1.0F, 1.0F, 1.0F},
{1.0F, 1.0F, 1.0F} } };
DTYPE answer[2][4][3] = { { {1.0F, 1.0F, 1.0F},
{0.0F, 1.0F, 2.0F},
{5.0F, 5.0F, 5.0F},
{1.0F, 1.0F, 1.0F} },
{ {1.0F, 1.0F, 1.0F},
{3.0F, 4.0F, 5.0F},
{5.0F, 5.0F, 5.0F},
{1.0F, 1.0F, 1.0F} } };
/* CPU test */
bool cpuTest = true;
#ifdef USE_CUDA
/* GPU test */
bool gpuTest = true;
/* create tensors */
XTensor * sGPU = NewTensor(order, dimSize, X_FLOAT, 1.0F, 0);
/* create float16 tensors */
XTensor sHalfGPU;
/* initialize variables */
sGPU->SetData(data, unitNum);
/* convert data type from float to float16 */
sHalfGPU = ConvertDataType(*sGPU, X_FLOAT16);
/* call _setdatadim function */
_SetDataDim(&sHalfGPU, 2, 1, 1, 5.0F);
/* convert data type from float16 to float */
_ConvertDataType(&sHalfGPU, sGPU);
/* check results */
gpuTest = sGPU->CheckData(answer, unitNum, 1e-4F);
/* destroy variables */
delete sGPU;
delete[] dimSize;
return cpuTest && gpuTest;
#else
/* destroy variables */
delete[] dimSize;
return cpuTest;
#endif // USE_CUDA
}
/* other cases */
/*
TODO!!
......@@ -462,6 +885,60 @@ bool TestSetData()
else
XPRINT(0, stdout, ">> case 5 passed!\n");
/* case 6 test */
caseFlag = TestSetData6();
if (!caseFlag) {
returnFlag = false;
XPRINT(0, stdout, ">> case 6 failed!\n");
}
else
XPRINT(0, stdout, ">> case 6 passed!\n");
/* case 7 test */
caseFlag = TestSetData7();
if (!caseFlag) {
returnFlag = false;
XPRINT(0, stdout, ">> case 7 failed!\n");
}
else
XPRINT(0, stdout, ">> case 7 passed!\n");
/* case 8 test */
caseFlag = TestSetData8();
if (!caseFlag) {
returnFlag = false;
XPRINT(0, stdout, ">> case 8 failed!\n");
}
else
XPRINT(0, stdout, ">> case 8 passed!\n");
/* case 9 test */
caseFlag = TestSetData9();
if (!caseFlag) {
returnFlag = false;
XPRINT(0, stdout, ">> case 9 failed!\n");
}
else
XPRINT(0, stdout, ">> case 9 passed!\n");
/* case 10 test */
caseFlag = TestSetData10();
if (!caseFlag) {
returnFlag = false;
XPRINT(0, stdout, ">> case 10 failed!\n");
}
else
XPRINT(0, stdout, ">> case 10 passed!\n");
/* case 11 test */
caseFlag = TestSetData11();
if (!caseFlag) {
returnFlag = false;
XPRINT(0, stdout, ">> case 11 failed!\n");
}
else
XPRINT(0, stdout, ">> case 11 passed!\n");
/* other cases test */
/*
TODO!!
......
......@@ -90,7 +90,7 @@ bool TestSpread1()
XTensor * modify = NewTensor(dataOrder, dataDimSize);
/* Initialize variables */
_SetDataFixedFloat(s, 0.0F);
_SetDataFixed(s, 0.0F);
modify->SetData(data, dataUnitNum);
/* call _Spread function */
......@@ -108,7 +108,7 @@ bool TestSpread1()
XTensor * modifyGPU = NewTensor(dataOrder, dataDimSize, X_FLOAT, 1.0F, 0);
/* Initialize variables */
_SetDataFixedFloat(sGPU, 0.0F);
_SetDataFixed(sGPU, 0.0F);
modifyGPU->SetData(data, dataUnitNum);
/* call _Spread function */
......
......@@ -295,8 +295,8 @@ bool TestSumDim3()
/* initialize variables */
a->SetZeroAll();
cMe->SetZeroAll();
_SetDataFixedFloat(b, 1.0F);
_SetDataFixedFloat(answer, 1.0F);
_SetDataFixed(b, 1.0F);
_SetDataFixed(answer, 1.0F);
/* call SumDim function */
_SumDim(a, b, c, 1);
......@@ -322,7 +322,7 @@ bool TestSumDim3()
/* Initialize variables */
aGPU->SetZeroAll();
cMe->SetZeroAll();
_SetDataFixedFloat(bGPU, 1.0F);
_SetDataFixed(bGPU, 1.0F);
/* call sum function */
_SumDim(aGPU, bGPU, cGPU, 1);
......@@ -404,8 +404,8 @@ bool TestSumDim4()
/* initialize variables */
a->SetZeroAll();
cMe->SetZeroAll();
_SetDataFixedFloat(b, 1.0F);
_SetDataFixedFloat(answer, 1.0F);
_SetDataFixed(b, 1.0F);
_SetDataFixed(answer, 1.0F);
/* call SumDim function */
_SumDim(a, b, c, 1);
......@@ -431,7 +431,7 @@ bool TestSumDim4()
/* Initialize variables */
aGPU->SetZeroAll();
cMe->SetZeroAll();
_SetDataFixedFloat(bGPU, 1.0F);
_SetDataFixed(bGPU, 1.0F);
/* call sum function */
_SumDim(aGPU, bGPU, cGPU, 1);
......
......@@ -30,7 +30,7 @@ bool Test()
XPRINT(0, stdout, "Testing the XTensor utilites ... \n\n");
//wrong = !TestAbsolute() || wrong;
wrong = !TestClip() || wrong;
//wrong = !TestClip() || wrong;
//wrong = !TestCompare() || wrong;
//wrong = !TestConcatenate() || wrong;
//wrong = !TestConcatenateSolely() || wrong;
......@@ -38,8 +38,8 @@ bool Test()
//wrong = !TestConvertDataType() || wrong;
//wrong = !TestCopyIndexed() || wrong;
//wrong = !TestCopyValues() || wrong;
wrong = !TestDiv() || wrong;
wrong = !TestDivDim() || wrong;
//wrong = !TestDiv() || wrong;
//wrong = !TestDivDim() || wrong;
//wrong = !TestExp() || wrong;
//wrong = !TestGather() || wrong;
//wrong = !TestLog() || wrong;
......@@ -49,7 +49,7 @@ bool Test()
//wrong = !TestMatrixMulBatched() || wrong;
//wrong = !TestMerge() || wrong;
//wrong = !TestMultiply() || wrong;
wrong = !TestMultiplyDim() || wrong;
//wrong = !TestMultiplyDim() || wrong;
//wrong = !TestNegate() || wrong;
//wrong = !TestNormalize() || wrong;
//wrong = !TestPower() || wrong;
......@@ -60,17 +60,17 @@ bool Test()
//wrong = !TestReduceSumSquared() || wrong;
//wrong = !TestReduceVariance() || wrong;
//wrong = !TestRound() || wrong;
wrong = !TestScaleAndShift() || wrong;
//wrong = !TestScaleAndShift() || wrong;
//wrong = !TestSelect() || wrong;
//wrong = !TestSetAscendingOrder() || wrong;
//wrong = !TestSetData() || wrong;
wrong = !TestSetData() || wrong;
//wrong = !TestSign() || wrong;
//wrong = !TestSin() || wrong;
//wrong = !TestSort() || wrong;
//wrong = !TestSplit() || wrong;
//wrong = !TestSpread() || wrong;
//wrong = !TestSub() || wrong;
wrong = !TestSum() || wrong;
//wrong = !TestSum() || wrong;
//wrong = !TestSumByColumnTV() || wrong;
//wrong = !TestSumByColumnVT() || wrong;
//wrong = !TestSumDim() || wrong;
......
Markdown 格式
0%
您添加了 0 到此讨论。请谨慎行事。
请先完成此评论的编辑!
注册 或者 后发表评论