Commit 3800528b by ltb

update setData flaot16 and modify some files which use the setData (main _SetDataFixed)

parent ccfe71d0
...@@ -53,11 +53,11 @@ void XLossGrad::MakeGrad(XTensor * node, bool isEfficient) ...@@ -53,11 +53,11 @@ void XLossGrad::MakeGrad(XTensor * node, bool isEfficient)
if (income.tailNum == 1) { if (income.tailNum == 1) {
if(dedy->dataType == X_FLOAT) if(dedy->dataType == X_FLOAT)
_SetDataFixedFloat(dedy, 1.0F); _SetDataFixed(dedy, 1.0F);
else if(dedy->dataType == X_DOUBLE) else if(dedy->dataType == X_DOUBLE)
_SetDataFixedDouble(dedy, 1.0); _SetDataFixed(dedy, 1.0);
else if(dedy->dataType == X_INT) else if(dedy->dataType == X_INT)
_SetDataFixedInt(dedy, 1); _SetDataFixed(dedy, 1);
else else
ShowNTErrors("TODO"); ShowNTErrors("TODO");
...@@ -145,11 +145,11 @@ void XLossGrad::Compute(XTensor * gold, XTensor * y, ...@@ -145,11 +145,11 @@ void XLossGrad::Compute(XTensor * gold, XTensor * y,
{ {
if(gold == NULL){ if(gold == NULL){
if(dedy->dataType == X_FLOAT) if(dedy->dataType == X_FLOAT)
_SetDataFixedFloat(dedy, 1.0F); _SetDataFixed(dedy, 1.0F);
else if(dedy->dataType == X_DOUBLE) else if(dedy->dataType == X_DOUBLE)
_SetDataFixedDouble(dedy, 1.0); _SetDataFixed(dedy, 1.0);
else if(dedy->dataType == X_INT) else if(dedy->dataType == X_INT)
_SetDataFixedInt(dedy, 1); _SetDataFixed(dedy, 1);
else{ else{
ShowNTErrors("TODO"); ShowNTErrors("TODO");
} }
......
...@@ -171,7 +171,7 @@ void T2TPredictor::Predict(T2TStateBundle * next, XTensor * encoding, ...@@ -171,7 +171,7 @@ void T2TPredictor::Predict(T2TStateBundle * next, XTensor * encoding,
dims[inputEnc->order - 1] = 1; dims[inputEnc->order - 1] = 1;
InitTensor(&first, inputEnc->order, dims, X_INT, 1.0F, inputEnc->devID, inputEnc->mem); InitTensor(&first, inputEnc->order, dims, X_INT, 1.0F, inputEnc->devID, inputEnc->mem);
_SetDataFixedInt(&first, startSymbol); _SetDataFixed(&first, startSymbol);
/* add a new word into the input sequence of the decoder side */ /* add a new word into the input sequence of the decoder side */
if (inputLast == NULL) { if (inputLast == NULL) {
......
...@@ -25,6 +25,7 @@ ...@@ -25,6 +25,7 @@
#include "SetData.cuh" #include "SetData.cuh"
#include "../../XUtility.h" #include "../../XUtility.h"
#include "../movement/CopyValues.h" #include "../movement/CopyValues.h"
#include "ConvertDataType.h"
#if !defined( WIN32 ) && !defined( _WIN32 ) #if !defined( WIN32 ) && !defined( _WIN32 )
#include "sys/time.h" #include "sys/time.h"
...@@ -81,82 +82,69 @@ generate data items with a fixed value p ...@@ -81,82 +82,69 @@ generate data items with a fixed value p
>> tensor - the tensor whose data array would be initialized >> tensor - the tensor whose data array would be initialized
>> p - pointer to the number for initializing the tensor >> p - pointer to the number for initializing the tensor
*/ */
void _SetDataFixed(XTensor * tensor, void * valuePointer) template<class T>
void _SetDataFixed(XTensor * tensor, T value)
{ {
#ifdef USE_CUDA
if (tensor->devID >= 0) {
_CudaSetDataFixed(tensor, value);
return;
}
#endif
int num = tensor->unitNum; int num = tensor->unitNum;
if(tensor->dataType == X_INT){ if (tensor->dataType == X_INT) {
int p = *(int*)valuePointer;
if(tensor->devID < 0){
int * d = (int*)tensor->data; int * d = (int*)tensor->data;
if(num % 4 == 0){ int v = (int)value;
for(int i = 0; i < num; i += 4){ if (num % 4 == 0) {
d[i] = p; for (int i = 0; i < num; i += 4) {
d[i + 1] = p; d[i] = v;
d[i + 2] = p; d[i + 1] = v;
d[i + 3] = p; d[i + 2] = v;
d[i + 3] = v;
} }
} }
else{ else {
for(int i = 0; i < num; i++) for (int i = 0; i < num; i++)
d[i] = p; d[i] = v;
}
}
else{
#ifdef USE_CUDA
_CudaSetDataFixedInt(tensor, p);
#endif
} }
} }
else if(tensor->dataType == X_FLOAT){ else if (tensor->dataType == X_FLOAT) {
float p = *(float*)valuePointer;
if(tensor->devID < 0){
float * d = (float*)tensor->data; float * d = (float*)tensor->data;
if(num % 4 == 0){ float v = (float)value;
for(int i = 0; i < num; i += 4){ if (num % 4 == 0) {
d[i] = p; for (int i = 0; i < num; i += 4) {
d[i + 1] = p; d[i] = v;
d[i + 2] = p; d[i + 1] = v;
d[i + 3] = p; d[i + 2] = v;
d[i + 3] = v;
} }
} }
else{ else {
for(int i = 0; i < num; i++) for (int i = 0; i < num; i++)
d[i] = p; d[i] = v;
}
}
else{
#ifdef USE_CUDA
_CudaSetDataFixedFloat(tensor, p);
#endif
} }
} }
else if(tensor->dataType == X_DOUBLE){ else if (tensor->dataType == X_DOUBLE) {
double p = *(double*)valuePointer;
if(tensor->devID < 0){
double * d = (double*)tensor->data; double * d = (double*)tensor->data;
if(num % 4 == 0){ double v = (double)value;
for(int i = 0; i < num; i += 4){
d[i] = p; if (num % 4 == 0) {
d[i + 1] = p; for (int i = 0; i < num; i += 4) {
d[i + 2] = p; d[i] = v;
d[i + 3] = p; d[i + 1] = v;
} d[i + 2] = v;
} d[i + 3] = v;
else{
for(int i = 0; i < num; i++)
d[i] = p;
} }
} }
else{ else {
#ifdef USE_CUDA for (int i = 0; i < num; i++)
_CudaSetDataFixedDouble(tensor, p); d[i] = v;
#endif
} }
} }
else{ else
ShowNTErrors("TODO"); ShowNTErrors("TODO");
}
} }
/* /*
...@@ -166,7 +154,7 @@ generate data items with a fixed value p (in default type) ...@@ -166,7 +154,7 @@ generate data items with a fixed value p (in default type)
*/ */
void SetDataFixed(XTensor &tensor, DTYPE p) void SetDataFixed(XTensor &tensor, DTYPE p)
{ {
_SetDataFixed(&tensor, &p); _SetDataFixed(&tensor, p);
} }
/* /*
...@@ -177,53 +165,17 @@ generate data items with a fixed value p (in integer) ...@@ -177,53 +165,17 @@ generate data items with a fixed value p (in integer)
void SetDataFixedInt(XTensor &tensor, int p) void SetDataFixedInt(XTensor &tensor, int p)
{ {
CheckNTErrors(tensor.dataType == X_INT, "An integer tensor is required!"); CheckNTErrors(tensor.dataType == X_INT, "An integer tensor is required!");
_SetDataFixed(&tensor, &p); _SetDataFixed(&tensor, p);
} }
/* /*
generate data items with a fixed value p (in integer) generate data items with a fixed value p (in integer)
>> tensor - the tensor whose data array would be initialized >> tensor - the tensor whose data array would be initialized
>> p - an int-valued number >> p - an int-valued number
*/ */
void _SetDataFixedInt(XTensor * tensor, int p) template void _SetDataFixed<int>(XTensor*, int);
{ template void _SetDataFixed<float>(XTensor*, float);
CheckNTErrors(tensor->dataType == X_INT, "the tensor must be in X_INT!"); template void _SetDataFixed<double>(XTensor*, double);
if(p == 0)
tensor->SetZeroAll();
else
_SetDataFixed(tensor, &p);
}
/*
generate data items with a fixed value p (in float)
>> tensor - the tensor whose data array would be initialized
>> p - a float-valued number
*/
void _SetDataFixedFloat(XTensor * tensor, float p)
{
CheckNTErrors(tensor->dataType == X_FLOAT, "the tensor must be in X_FLOAT!");
if(p == 0)
tensor->SetZeroAll();
else
_SetDataFixed(tensor, &p);
}
/*
generate data items with a fixed value p (in double)
>> tensor - the tensor whose data array would be initialized
>> p - a double-valued number
*/
void _SetDataFixedDouble(XTensor * tensor, double p)
{
CheckNTErrors(tensor->dataType == X_DOUBLE, "the tensor must be in X_DOUBLE!");
if(p == 0)
tensor->SetZeroAll();
else
_SetDataFixed(tensor, &p);
}
/* /*
generate data items with a fixed value p only if generate data items with a fixed value p only if
...@@ -319,7 +271,6 @@ void _SetDataDim(XTensor * tensor, int beg, int len, int dim, DTYPE p) ...@@ -319,7 +271,6 @@ void _SetDataDim(XTensor * tensor, int beg, int len, int dim, DTYPE p)
{ {
int n = tensor->order; int n = tensor->order;
CheckNTErrors(tensor->dataType == DEFAULT_DTYPE, "TODO!");
CheckNTErrors(dim < n && dim >= 0, "Illegal dimension!"); CheckNTErrors(dim < n && dim >= 0, "Illegal dimension!");
CheckNTErrors(beg >= 0 && beg < tensor->GetDim(dim), "Illegal beginning position!"); CheckNTErrors(beg >= 0 && beg < tensor->GetDim(dim), "Illegal beginning position!");
CheckNTErrors(beg + len >= 0 && beg + len < tensor->GetDim(dim), "Illegal length!"); CheckNTErrors(beg + len >= 0 && beg + len < tensor->GetDim(dim), "Illegal length!");
...@@ -372,7 +323,6 @@ void _SetDataIndexed(XTensor * source, XTensor * modify, int dim, int index) ...@@ -372,7 +323,6 @@ void _SetDataIndexed(XTensor * source, XTensor * modify, int dim, int index)
int order = source->order; int order = source->order;
int size = source->GetDim(dim); int size = source->GetDim(dim);
CheckNTErrors(source->dataType == DEFAULT_DTYPE, "TODO!");
CheckNTErrors(dim >= 0 && dim < order, "Illegal dimension!"); CheckNTErrors(dim >= 0 && dim < order, "Illegal dimension!");
CheckNTErrors(index >= 0 && index < size, "Illegal index!"); CheckNTErrors(index >= 0 && index < size, "Illegal index!");
...@@ -527,8 +477,6 @@ the item to a pre-defined value if the item >= p, set the item to 0 otherwise ...@@ -527,8 +477,6 @@ the item to a pre-defined value if the item >= p, set the item to 0 otherwise
*/ */
void _SetDataRandP(const XTensor * tensor, DTYPE lower, DTYPE upper, DTYPE p, DTYPE value) void _SetDataRandP(const XTensor * tensor, DTYPE lower, DTYPE upper, DTYPE p, DTYPE value)
{ {
CheckNTErrors(tensor->dataType == DEFAULT_DTYPE, "TODO");
if (tensor->devID < 0) { if (tensor->devID < 0) {
_SetDataRand(tensor, lower, upper); _SetDataRand(tensor, lower, upper);
......
...@@ -19,6 +19,7 @@ ...@@ -19,6 +19,7 @@
/* /*
* $Created by: XIAO Tong (email: xiaotong@mail.neu.edu.cn) 2018-07-18 * $Created by: XIAO Tong (email: xiaotong@mail.neu.edu.cn) 2018-07-18
* I'm surprised that I did not write this file till today. * I'm surprised that I did not write this file till today.
* $Update by: Lin Ye (email: linye2015@outlook.com) 2019-07-22 float16 added
*/ */
#include <curand.h> #include <curand.h>
...@@ -27,17 +28,20 @@ ...@@ -27,17 +28,20 @@
#include <curand_kernel.h> #include <curand_kernel.h>
#include "../../XDevice.h" #include "../../XDevice.h"
#include "../../XUtility.h" #include "../../XUtility.h"
#include "ConvertDataType.h"
#include <device_launch_parameters.h>
namespace nts { // namespace nts(NiuTrans.Tensor) namespace nts { // namespace nts(NiuTrans.Tensor)
/* /*
set an integer data array with a fixed value p (in int) set an data array with a fixed value p (in int, float, float16, double)
>> d - pointer to the data array >> d - pointer to the data array
>> size - size of the array >> size - size of the array
>> p - the initial value >> p - the initial value
*/ */
template<class T>
__global__ __global__
void KernelSetDataFixedInt(int * d, int size, int p) void KernelSetDataFixed(T * d, int size, T p)
{ {
int i = blockDim.x * blockIdx.x + threadIdx.x; int i = blockDim.x * blockIdx.x + threadIdx.x;
...@@ -46,14 +50,13 @@ void KernelSetDataFixedInt(int * d, int size, int p) ...@@ -46,14 +50,13 @@ void KernelSetDataFixedInt(int * d, int size, int p)
} }
/* /*
generate data items with a fixed value p (in int) generate data items with a fixed value p (in int, float, float16, double)
>> tensor - the tensor for initialization >> tensor - the tensor for initialization
>> p - the initial value >> p - the initial value
*/ */
void _CudaSetDataFixedInt(XTensor * tensor, int p) template<class T>
void _CudaSetDataFixed(XTensor * tensor, T p)
{ {
CheckNTErrors(tensor->dataType == X_INT, "the tensor must be in X_INT!");
int gridSize[3]; int gridSize[3];
int blockSize[3]; int blockSize[3];
...@@ -65,89 +68,48 @@ void _CudaSetDataFixedInt(XTensor * tensor, int p) ...@@ -65,89 +68,48 @@ void _CudaSetDataFixedInt(XTensor * tensor, int p)
int devIDBackup; int devIDBackup;
ProtectCudaDev(tensor->devID, devIDBackup); ProtectCudaDev(tensor->devID, devIDBackup);
KernelSetDataFixedInt <<<blocks, threads >>>((int*)tensor->data, tensor->unitNum, p); if (tensor->dataType == X_INT){
KernelSetDataFixed<<<blocks, threads>>>((int*)tensor->data, tensor->unitNum, (int)p);
}
else if (tensor->dataType == X_FLOAT){
KernelSetDataFixed<<<blocks, threads>>>((DTYPE*)tensor->data, tensor->unitNum, (float)p);
}
else if (tensor->dataType == X_DOUBLE){
KernelSetDataFixed<<<blocks, threads>>>((double*)tensor->data, tensor->unitNum, (double)p);
}
else if (tensor->dataType == X_FLOAT16){
half p1 = __float2half(p);
KernelSetDataFixed<<<blocks, threads>>>((__half*)tensor->data, tensor->unitNum, p1);
}
else
ShowNTErrors("TODO");
BacktoCudaDev(tensor->devID, devIDBackup); BacktoCudaDev(tensor->devID, devIDBackup);
} }
/* template void _CudaSetDataFixed<int>(XTensor*, int);
set a float data array with a fixed value p (in int) template void _CudaSetDataFixed<float>(XTensor*, float);
>> d - pointer to the data array template void _CudaSetDataFixed<double>(XTensor*, double);
>> size - size of the array //__device__
>> p - the initial value //template void _CudaSetDataFixed<half>(XTensor*, half);
*/
__global__
void KernelSetDataFixedFloat(float * d, int size, float p)
{
int i = blockDim.x * blockIdx.x + threadIdx.x;
if (i < size)
d[i] = p;
}
/*
generate data items with a fixed value p (in float)
>> tensor - the tensor for initialization
>> p - the initial value
*/
void _CudaSetDataFixedFloat(XTensor * tensor, float p)
{
CheckNTErrors(tensor->dataType == X_FLOAT, "the tensor must be in X_FLOAT!");
int gridSize[3];
int blockSize[3];
GDevs.GetCudaThread(tensor->devID, tensor->unitNum, gridSize, blockSize);
dim3 blocks(gridSize[0]);
dim3 threads(blockSize[0]);
int devIDBackup;
ProtectCudaDev(tensor->devID, devIDBackup);
KernelSetDataFixedFloat <<<blocks, threads >>>((float*)tensor->data, tensor->unitNum, p);
BacktoCudaDev(tensor->devID, devIDBackup);
}
/* /*
set a double data array with a fixed value p (in int) set data array with a uniform distribution in [low, high]
>> d - pointer to the data array >> deviceStates - the state of curand
>> d - float, float16, double datatype pointer to the data array
>> size - size of the array >> size - size of the array
>> p - the initial value >> lower - low value of the range
>> variance - the variance of the range
*/ */
template<class T>
__global__ __global__
void KernelSetDataFixedDouble(double * d, int size, double p) void KernelSetDataRand(T * d, int size, T lower, T variance)
{ {
int i = blockDim.x * blockIdx.x + threadIdx.x; int i = blockDim.x * blockIdx.x + threadIdx.x;
if (i < size) if (i < size) {
d[i] = p; d[i] = d[i] * variance + lower;
} }
/*
generate data items with a fixed value p (in double)
>> tensor - the tensor for initialization
>> p - the initial value
*/
void _CudaSetDataFixedDouble(XTensor * tensor, double p)
{
CheckNTErrors(tensor->dataType == X_DOUBLE, "the tensor must be in X_DOUBLE!");
int gridSize[3];
int blockSize[3];
GDevs.GetCudaThread(tensor->devID, tensor->unitNum, gridSize, blockSize);
dim3 blocks(gridSize[0]);
dim3 threads(blockSize[0]);
int devIDBackup;
ProtectCudaDev(tensor->devID, devIDBackup);
KernelSetDataFixedDouble <<<blocks, threads >>>((double*)tensor->data, tensor->unitNum, p);
BacktoCudaDev(tensor->devID, devIDBackup);
} }
/* /*
...@@ -214,6 +176,7 @@ void KernelSetDataFixedCondInt(int * d, int * c, int size, int p) ...@@ -214,6 +176,7 @@ void KernelSetDataFixedCondInt(int * d, int * c, int size, int p)
d[i] = p; d[i] = p;
} }
/* /*
generate data items with a fixed value p (in int) only generate data items with a fixed value p (in int) only
if the condition entry is non-zero if the condition entry is non-zero
...@@ -286,8 +249,9 @@ set data items to a pre-defined value if its value >= p, set it to 0 otherwise ...@@ -286,8 +249,9 @@ set data items to a pre-defined value if its value >= p, set it to 0 otherwise
>> lower - low value of the range >> lower - low value of the range
>> variance - the variance of the range >> variance - the variance of the range
*/ */
template<class T>
__global__ __global__
void KernelSetDataPCut(DTYPE * d, int size, DTYPE p, DTYPE value) void KernelSetDataPCut(T * d, int size, T p, T value)
{ {
int i = blockDim.x * blockIdx.x + threadIdx.x; int i = blockDim.x * blockIdx.x + threadIdx.x;
...@@ -307,8 +271,9 @@ set data items along with a given dimension (and keep the remaining items unchan ...@@ -307,8 +271,9 @@ set data items along with a given dimension (and keep the remaining items unchan
>> blockSize - size of a data block >> blockSize - size of a data block
>> blockNum - number of data blocks >> blockNum - number of data blocks
*/ */
template<class T>
__global__ __global__
void KernelSetDataDim(DTYPE * d, int beg, int len, int blockSize, int blockNum, DTYPE p) void KernelSetDataDim(T * d, int beg, int len, int blockSize, int blockNum, T p)
{ {
/* offset in each block */ /* offset in each block */
int i = blockDim.x * blockIdx.x + threadIdx.x; int i = blockDim.x * blockIdx.x + threadIdx.x;
...@@ -316,10 +281,10 @@ void KernelSetDataDim(DTYPE * d, int beg, int len, int blockSize, int blockNum, ...@@ -316,10 +281,10 @@ void KernelSetDataDim(DTYPE * d, int beg, int len, int blockSize, int blockNum,
/* block id */ /* block id */
int j = blockDim.y * blockIdx.y + threadIdx.y; int j = blockDim.y * blockIdx.y + threadIdx.y;
if(i >= blockSize || j > blockNum) if (i >= blockSize || j > blockNum)
return; return;
if(i < beg || i >= beg + len) if (i < beg || i >= beg + len)
return; return;
d[blockSize * j + i] = p; d[blockSize * j + i] = p;
...@@ -353,7 +318,7 @@ void _CudaSetDataDim(XTensor * tensor, int beg, int len, int dim, DTYPE p) ...@@ -353,7 +318,7 @@ void _CudaSetDataDim(XTensor * tensor, int beg, int len, int dim, DTYPE p)
int stride = 1; int stride = 1;
int blockSize = 1; int blockSize = 1;
int blockNum = 1; int blockNum = 1;
for(int i = n - 1; i > dim; i--){ for (int i = n - 1; i > dim; i--) {
stride *= tensor->GetDim(i); stride *= tensor->GetDim(i);
} }
blockSize = stride * tensor->GetDim(dim); blockSize = stride * tensor->GetDim(dim);
...@@ -370,8 +335,15 @@ void _CudaSetDataDim(XTensor * tensor, int beg, int len, int dim, DTYPE p) ...@@ -370,8 +335,15 @@ void _CudaSetDataDim(XTensor * tensor, int beg, int len, int dim, DTYPE p)
int devIDBackup; int devIDBackup;
ProtectCudaDev(tensor->devID, devIDBackup); ProtectCudaDev(tensor->devID, devIDBackup);
KernelSetDataDim<<<blocks, threads >>>((DTYPE*)tensor->data, beg * stride, if (tensor->dataType == X_FLOAT){
KernelSetDataDim<<<blocks, threads>>>((DTYPE*)tensor->data, beg * stride,
len * stride, blockSize, blockNum, p); len * stride, blockSize, blockNum, p);
}
else if (tensor->dataType == X_FLOAT16){
half p1 = __float2half(p);
KernelSetDataDim<<<blocks, threads>>>((__half*)tensor->data, beg * stride,
len * stride, blockSize, blockNum, p1);
}
BacktoCudaDev(tensor->devID, devIDBackup); BacktoCudaDev(tensor->devID, devIDBackup);
} }
...@@ -386,8 +358,9 @@ modify data items along with a given index and dimension ...@@ -386,8 +358,9 @@ modify data items along with a given index and dimension
>> blockSize - size of a data block >> blockSize - size of a data block
>> stride - stride of a data block >> stride - stride of a data block
*/ */
template<class T>
__global__ __global__
void KernelSetDataIndexed(DTYPE * s, DTYPE * m, int blockNum, int blockSize, int stride) void KernelSetDataIndexed(T * s, T * m, int blockNum, int blockSize, int stride)
{ {
/* offset in each block */ /* offset in each block */
int i = blockDim.x * blockIdx.x + threadIdx.x; int i = blockDim.x * blockIdx.x + threadIdx.x;
...@@ -395,7 +368,7 @@ void KernelSetDataIndexed(DTYPE * s, DTYPE * m, int blockNum, int blockSize, int ...@@ -395,7 +368,7 @@ void KernelSetDataIndexed(DTYPE * s, DTYPE * m, int blockNum, int blockSize, int
/* block id */ /* block id */
int j = blockDim.y * blockIdx.y + threadIdx.y; int j = blockDim.y * blockIdx.y + threadIdx.y;
if(i >= stride || j >= blockNum) if (i >= stride || j >= blockNum)
return; return;
int x = blockSize * j + i; int x = blockSize * j + i;
...@@ -426,7 +399,6 @@ void _CudaSetDataIndexed(XTensor * source, XTensor * modify, int dim, int index) ...@@ -426,7 +399,6 @@ void _CudaSetDataIndexed(XTensor * source, XTensor * modify, int dim, int index)
int order = source->order; int order = source->order;
int size = source->GetDim(dim); int size = source->GetDim(dim);
CheckNTErrors(source->dataType == DEFAULT_DTYPE, "TODO!");
CheckNTErrors(dim >= 0 && dim < order, "Illegal dimension!"); CheckNTErrors(dim >= 0 && dim < order, "Illegal dimension!");
CheckNTErrors(index >= 0 && index < size, "Illegal index!"); CheckNTErrors(index >= 0 && index < size, "Illegal index!");
...@@ -452,8 +424,14 @@ void _CudaSetDataIndexed(XTensor * source, XTensor * modify, int dim, int index) ...@@ -452,8 +424,14 @@ void _CudaSetDataIndexed(XTensor * source, XTensor * modify, int dim, int index)
int devIDBackup; int devIDBackup;
ProtectCudaDev(source->devID, devIDBackup); ProtectCudaDev(source->devID, devIDBackup);
KernelSetDataIndexed<<<blocks, threads >>>((DTYPE*)source->data + index * stride, (DTYPE*)modify->data, if (source->dataType == X_FLOAT){
KernelSetDataIndexed<<<blocks, threads>>>((DTYPE*)source->data + index * stride, (DTYPE*)modify->data,
blockNum, blockSize, stride);
}
else if (source->dataType == X_FLOAT16){
KernelSetDataIndexed<<<blocks, threads>>>((__half*)source->data + index * stride, (__half*)modify->data,
blockNum, blockSize, stride); blockNum, blockSize, stride);
}
BacktoCudaDev(source->devID, devIDBackup); BacktoCudaDev(source->devID, devIDBackup);
} }
......
...@@ -19,6 +19,7 @@ ...@@ -19,6 +19,7 @@
/* /*
* $Created by: XIAO Tong (email: xiaotong@mail.neu.edu.cn) 2018-07-18 * $Created by: XIAO Tong (email: xiaotong@mail.neu.edu.cn) 2018-07-18
* I'm surprised that I did not write this file till today. * I'm surprised that I did not write this file till today.
* $Update by: Lin Ye (email: linye2015@outlook.com) 2019-07-22 float16 added
*/ */
#ifndef __SETDATA_CUH__ #ifndef __SETDATA_CUH__
...@@ -28,14 +29,10 @@ ...@@ -28,14 +29,10 @@
namespace nts { // namespace nts(NiuTrans.Tensor) namespace nts { // namespace nts(NiuTrans.Tensor)
/* generate data items with a fixed value p (in int) */ /* generate data items with a fixed value p (in int, float, float16, double) */
void _CudaSetDataFixedInt(XTensor * tensor, int p); template<class T>
void _CudaSetDataFixed(XTensor * tensor, T p);
/* generate data items with a fixed value p (in float) */
void _CudaSetDataFixedFloat(XTensor * tensor, float p);
/* generate data items with a fixed value p (in double) */
void _CudaSetDataFixedDouble(XTensor * tensor, double p);
/* generate data items with a fixed value p (in float) only /* generate data items with a fixed value p (in float) only
if the condition entry is non-zero */ if the condition entry is non-zero */
......
...@@ -24,29 +24,22 @@ ...@@ -24,29 +24,22 @@
#define __SETDATA_H__ #define __SETDATA_H__
#include "../../XTensor.h" #include "../../XTensor.h"
#include "SetData.cuh"
namespace nts { // namespace nts(NiuTrans.Tensor) namespace nts { // namespace nts(NiuTrans.Tensor)
/* generate data items with a xavier initialization */ /* generate data items with a xavier initialization */
void _SetDataFanInOut(XTensor * tensor, DTYPE gain = 1.0F); void _SetDataFanInOut(XTensor * tensor, DTYPE gain = 1.0F);
/* generate data items with a fixed value p */ ///* generate data items with a fixed value p */
void _SetDataFixed(XTensor * tensor, void * valuePointer); //void _SetDataFixed(XTensor * tensor, void * valuePointer);
/* generate data items with a fixed value p (in default type) */ /* generate data items with a fixed value p (in default type) */
void SetDataFixed(XTensor &tensor, DTYPE p); void SetDataFixed(XTensor &tensor, DTYPE p);
/* generate data items with a fixed value p (in integer) */
void SetDataFixedInt(XTensor &tensor, int p); void SetDataFixedInt(XTensor &tensor, int p);
/* generate data items with a fixed value p (in int) */ template<class T>
void _SetDataFixedInt(XTensor * tensor, int p); void _SetDataFixed(XTensor * tensor, T value);
/* generate data items with a fixed value p (in float) */
void _SetDataFixedFloat(XTensor * tensor, float p);
/* generate data items with a fixed value p (in double) */
void _SetDataFixedDouble(XTensor * tensor, double p);
/* generate data items with a fixed value p only if the condition entry is non-zero */ /* generate data items with a fixed value p only if the condition entry is non-zero */
void _SetDataFixedCond(XTensor * tensor, XTensor * condition, DTYPE p); void _SetDataFixedCond(XTensor * tensor, XTensor * condition, DTYPE p);
......
...@@ -70,7 +70,7 @@ XTensor DropoutWithIndex(const XTensor &x, XTensor &maskIndex, DTYPE scale) ...@@ -70,7 +70,7 @@ XTensor DropoutWithIndex(const XTensor &x, XTensor &maskIndex, DTYPE scale)
InitTensor1D(&c, x.unitNum, x.dataType, x.devID, x.mem); InitTensor1D(&c, x.unitNum, x.dataType, x.devID, x.mem);
_SetDataFixedFloat(&c, 1.0F); _SetDataFixed(&c, 1.0F);
_DropoutWithIndex(&x, &maskIndex, &c); _DropoutWithIndex(&x, &maskIndex, &c);
......
...@@ -385,11 +385,11 @@ void _LossBackward(XTensor * dedy, XTensor * t, XTensor * y, ...@@ -385,11 +385,11 @@ void _LossBackward(XTensor * dedy, XTensor * t, XTensor * y,
{ {
if(t == NULL){ if(t == NULL){
if(dedy->dataType == X_FLOAT) if(dedy->dataType == X_FLOAT)
_SetDataFixedFloat(dedy, 1.0F); _SetDataFixed(dedy, 1.0F);
else if(dedy->dataType == X_DOUBLE) else if(dedy->dataType == X_DOUBLE)
_SetDataFixedDouble(dedy, 1.0); _SetDataFixed(dedy, 1.0);
else if(dedy->dataType == X_INT) else if(dedy->dataType == X_INT)
_SetDataFixedInt(dedy, 1); _SetDataFixed(dedy, 1);
else{ else{
ShowNTErrors("TODO"); ShowNTErrors("TODO");
} }
......
Markdown 格式
0%
您添加了 0 到此讨论。请谨慎行事。
请先完成此评论的编辑!
注册 或者 后发表评论