Commit 3800528b by ltb

update setData flaot16 and modify some files which use the setData (main _SetDataFixed)

parent ccfe71d0
......@@ -53,11 +53,11 @@ void XLossGrad::MakeGrad(XTensor * node, bool isEfficient)
if (income.tailNum == 1) {
if(dedy->dataType == X_FLOAT)
_SetDataFixedFloat(dedy, 1.0F);
_SetDataFixed(dedy, 1.0F);
else if(dedy->dataType == X_DOUBLE)
_SetDataFixedDouble(dedy, 1.0);
_SetDataFixed(dedy, 1.0);
else if(dedy->dataType == X_INT)
_SetDataFixedInt(dedy, 1);
_SetDataFixed(dedy, 1);
else
ShowNTErrors("TODO");
......@@ -145,11 +145,11 @@ void XLossGrad::Compute(XTensor * gold, XTensor * y,
{
if(gold == NULL){
if(dedy->dataType == X_FLOAT)
_SetDataFixedFloat(dedy, 1.0F);
_SetDataFixed(dedy, 1.0F);
else if(dedy->dataType == X_DOUBLE)
_SetDataFixedDouble(dedy, 1.0);
_SetDataFixed(dedy, 1.0);
else if(dedy->dataType == X_INT)
_SetDataFixedInt(dedy, 1);
_SetDataFixed(dedy, 1);
else{
ShowNTErrors("TODO");
}
......
......@@ -171,7 +171,7 @@ void T2TPredictor::Predict(T2TStateBundle * next, XTensor * encoding,
dims[inputEnc->order - 1] = 1;
InitTensor(&first, inputEnc->order, dims, X_INT, 1.0F, inputEnc->devID, inputEnc->mem);
_SetDataFixedInt(&first, startSymbol);
_SetDataFixed(&first, startSymbol);
/* add a new word into the input sequence of the decoder side */
if (inputLast == NULL) {
......
......@@ -25,6 +25,7 @@
#include "SetData.cuh"
#include "../../XUtility.h"
#include "../movement/CopyValues.h"
#include "ConvertDataType.h"
#if !defined( WIN32 ) && !defined( _WIN32 )
#include "sys/time.h"
......@@ -81,82 +82,69 @@ generate data items with a fixed value p
>> tensor - the tensor whose data array would be initialized
>> p - pointer to the number for initializing the tensor
*/
void _SetDataFixed(XTensor * tensor, void * valuePointer)
template<class T>
void _SetDataFixed(XTensor * tensor, T value)
{
#ifdef USE_CUDA
if (tensor->devID >= 0) {
_CudaSetDataFixed(tensor, value);
return;
}
#endif
int num = tensor->unitNum;
if(tensor->dataType == X_INT){
int p = *(int*)valuePointer;
if(tensor->devID < 0){
int * d = (int*)tensor->data;
if(num % 4 == 0){
for(int i = 0; i < num; i += 4){
d[i] = p;
d[i + 1] = p;
d[i + 2] = p;
d[i + 3] = p;
}
}
else{
for(int i = 0; i < num; i++)
d[i] = p;
if (tensor->dataType == X_INT) {
int * d = (int*)tensor->data;
int v = (int)value;
if (num % 4 == 0) {
for (int i = 0; i < num; i += 4) {
d[i] = v;
d[i + 1] = v;
d[i + 2] = v;
d[i + 3] = v;
}
}
else{
#ifdef USE_CUDA
_CudaSetDataFixedInt(tensor, p);
#endif
else {
for (int i = 0; i < num; i++)
d[i] = v;
}
}
else if(tensor->dataType == X_FLOAT){
float p = *(float*)valuePointer;
if(tensor->devID < 0){
float * d = (float*)tensor->data;
if(num % 4 == 0){
for(int i = 0; i < num; i += 4){
d[i] = p;
d[i + 1] = p;
d[i + 2] = p;
d[i + 3] = p;
}
}
else{
for(int i = 0; i < num; i++)
d[i] = p;
else if (tensor->dataType == X_FLOAT) {
float * d = (float*)tensor->data;
float v = (float)value;
if (num % 4 == 0) {
for (int i = 0; i < num; i += 4) {
d[i] = v;
d[i + 1] = v;
d[i + 2] = v;
d[i + 3] = v;
}
}
else{
#ifdef USE_CUDA
_CudaSetDataFixedFloat(tensor, p);
#endif
else {
for (int i = 0; i < num; i++)
d[i] = v;
}
}
else if(tensor->dataType == X_DOUBLE){
double p = *(double*)valuePointer;
if(tensor->devID < 0){
double * d = (double*)tensor->data;
if(num % 4 == 0){
for(int i = 0; i < num; i += 4){
d[i] = p;
d[i + 1] = p;
d[i + 2] = p;
d[i + 3] = p;
}
}
else{
for(int i = 0; i < num; i++)
d[i] = p;
else if (tensor->dataType == X_DOUBLE) {
double * d = (double*)tensor->data;
double v = (double)value;
if (num % 4 == 0) {
for (int i = 0; i < num; i += 4) {
d[i] = v;
d[i + 1] = v;
d[i + 2] = v;
d[i + 3] = v;
}
}
else{
#ifdef USE_CUDA
_CudaSetDataFixedDouble(tensor, p);
#endif
else {
for (int i = 0; i < num; i++)
d[i] = v;
}
}
else{
else
ShowNTErrors("TODO");
}
}
/*
......@@ -166,7 +154,7 @@ generate data items with a fixed value p (in default type)
*/
void SetDataFixed(XTensor &tensor, DTYPE p)
{
_SetDataFixed(&tensor, &p);
_SetDataFixed(&tensor, p);
}
/*
......@@ -177,53 +165,17 @@ generate data items with a fixed value p (in integer)
void SetDataFixedInt(XTensor &tensor, int p)
{
CheckNTErrors(tensor.dataType == X_INT, "An integer tensor is required!");
_SetDataFixed(&tensor, &p);
_SetDataFixed(&tensor, p);
}
/*
generate data items with a fixed value p (in integer)
>> tensor - the tensor whose data array would be initialized
>> p - an int-valued number
*/
void _SetDataFixedInt(XTensor * tensor, int p)
{
CheckNTErrors(tensor->dataType == X_INT, "the tensor must be in X_INT!");
template void _SetDataFixed<int>(XTensor*, int);
template void _SetDataFixed<float>(XTensor*, float);
template void _SetDataFixed<double>(XTensor*, double);
if(p == 0)
tensor->SetZeroAll();
else
_SetDataFixed(tensor, &p);
}
/*
generate data items with a fixed value p (in float)
>> tensor - the tensor whose data array would be initialized
>> p - a float-valued number
*/
void _SetDataFixedFloat(XTensor * tensor, float p)
{
CheckNTErrors(tensor->dataType == X_FLOAT, "the tensor must be in X_FLOAT!");
if(p == 0)
tensor->SetZeroAll();
else
_SetDataFixed(tensor, &p);
}
/*
generate data items with a fixed value p (in double)
>> tensor - the tensor whose data array would be initialized
>> p - a double-valued number
*/
void _SetDataFixedDouble(XTensor * tensor, double p)
{
CheckNTErrors(tensor->dataType == X_DOUBLE, "the tensor must be in X_DOUBLE!");
if(p == 0)
tensor->SetZeroAll();
else
_SetDataFixed(tensor, &p);
}
/*
generate data items with a fixed value p only if
......@@ -319,7 +271,6 @@ void _SetDataDim(XTensor * tensor, int beg, int len, int dim, DTYPE p)
{
int n = tensor->order;
CheckNTErrors(tensor->dataType == DEFAULT_DTYPE, "TODO!");
CheckNTErrors(dim < n && dim >= 0, "Illegal dimension!");
CheckNTErrors(beg >= 0 && beg < tensor->GetDim(dim), "Illegal beginning position!");
CheckNTErrors(beg + len >= 0 && beg + len < tensor->GetDim(dim), "Illegal length!");
......@@ -372,7 +323,6 @@ void _SetDataIndexed(XTensor * source, XTensor * modify, int dim, int index)
int order = source->order;
int size = source->GetDim(dim);
CheckNTErrors(source->dataType == DEFAULT_DTYPE, "TODO!");
CheckNTErrors(dim >= 0 && dim < order, "Illegal dimension!");
CheckNTErrors(index >= 0 && index < size, "Illegal index!");
......@@ -527,8 +477,6 @@ the item to a pre-defined value if the item >= p, set the item to 0 otherwise
*/
void _SetDataRandP(const XTensor * tensor, DTYPE lower, DTYPE upper, DTYPE p, DTYPE value)
{
CheckNTErrors(tensor->dataType == DEFAULT_DTYPE, "TODO");
if (tensor->devID < 0) {
_SetDataRand(tensor, lower, upper);
......
......@@ -19,6 +19,7 @@
/*
* $Created by: XIAO Tong (email: xiaotong@mail.neu.edu.cn) 2018-07-18
* I'm surprised that I did not write this file till today.
* $Update by: Lin Ye (email: linye2015@outlook.com) 2019-07-22 float16 added
*/
#include <curand.h>
......@@ -27,17 +28,20 @@
#include <curand_kernel.h>
#include "../../XDevice.h"
#include "../../XUtility.h"
#include "ConvertDataType.h"
#include <device_launch_parameters.h>
namespace nts { // namespace nts(NiuTrans.Tensor)
/*
set an integer data array with a fixed value p (in int)
/*
set an data array with a fixed value p (in int, float, float16, double)
>> d - pointer to the data array
>> size - size of the array
>> p - the initial value
*/
__global__
void KernelSetDataFixedInt(int * d, int size, int p)
template<class T>
__global__
void KernelSetDataFixed(T * d, int size, T p)
{
int i = blockDim.x * blockIdx.x + threadIdx.x;
......@@ -45,15 +49,14 @@ void KernelSetDataFixedInt(int * d, int size, int p)
d[i] = p;
}
/*
generate data items with a fixed value p (in int)
/*
generate data items with a fixed value p (in int, float, float16, double)
>> tensor - the tensor for initialization
>> p - the initial value
*/
void _CudaSetDataFixedInt(XTensor * tensor, int p)
template<class T>
void _CudaSetDataFixed(XTensor * tensor, T p)
{
CheckNTErrors(tensor->dataType == X_INT, "the tensor must be in X_INT!");
int gridSize[3];
int blockSize[3];
......@@ -65,106 +68,65 @@ void _CudaSetDataFixedInt(XTensor * tensor, int p)
int devIDBackup;
ProtectCudaDev(tensor->devID, devIDBackup);
KernelSetDataFixedInt <<<blocks, threads >>>((int*)tensor->data, tensor->unitNum, p);
if (tensor->dataType == X_INT){
KernelSetDataFixed<<<blocks, threads>>>((int*)tensor->data, tensor->unitNum, (int)p);
}
else if (tensor->dataType == X_FLOAT){
KernelSetDataFixed<<<blocks, threads>>>((DTYPE*)tensor->data, tensor->unitNum, (float)p);
}
else if (tensor->dataType == X_DOUBLE){
KernelSetDataFixed<<<blocks, threads>>>((double*)tensor->data, tensor->unitNum, (double)p);
}
else if (tensor->dataType == X_FLOAT16){
half p1 = __float2half(p);
KernelSetDataFixed<<<blocks, threads>>>((__half*)tensor->data, tensor->unitNum, p1);
}
else
ShowNTErrors("TODO");
BacktoCudaDev(tensor->devID, devIDBackup);
}
/*
set a float data array with a fixed value p (in int)
>> d - pointer to the data array
>> size - size of the array
>> p - the initial value
*/
__global__
void KernelSetDataFixedFloat(float * d, int size, float p)
{
int i = blockDim.x * blockIdx.x + threadIdx.x;
if (i < size)
d[i] = p;
}
/*
generate data items with a fixed value p (in float)
>> tensor - the tensor for initialization
>> p - the initial value
*/
void _CudaSetDataFixedFloat(XTensor * tensor, float p)
{
CheckNTErrors(tensor->dataType == X_FLOAT, "the tensor must be in X_FLOAT!");
template void _CudaSetDataFixed<int>(XTensor*, int);
template void _CudaSetDataFixed<float>(XTensor*, float);
template void _CudaSetDataFixed<double>(XTensor*, double);
//__device__
//template void _CudaSetDataFixed<half>(XTensor*, half);
int gridSize[3];
int blockSize[3];
GDevs.GetCudaThread(tensor->devID, tensor->unitNum, gridSize, blockSize);
dim3 blocks(gridSize[0]);
dim3 threads(blockSize[0]);
int devIDBackup;
ProtectCudaDev(tensor->devID, devIDBackup);
KernelSetDataFixedFloat <<<blocks, threads >>>((float*)tensor->data, tensor->unitNum, p);
BacktoCudaDev(tensor->devID, devIDBackup);
}
/*
set a double data array with a fixed value p (in int)
>> d - pointer to the data array
/*
set data array with a uniform distribution in [low, high]
>> deviceStates - the state of curand
>> d - float, float16, double datatype pointer to the data array
>> size - size of the array
>> p - the initial value
>> lower - low value of the range
>> variance - the variance of the range
*/
__global__
void KernelSetDataFixedDouble(double * d, int size, double p)
template<class T>
__global__
void KernelSetDataRand(T * d, int size, T lower, T variance)
{
int i = blockDim.x * blockIdx.x + threadIdx.x;
if (i < size)
d[i] = p;
}
/*
generate data items with a fixed value p (in double)
>> tensor - the tensor for initialization
>> p - the initial value
*/
void _CudaSetDataFixedDouble(XTensor * tensor, double p)
{
CheckNTErrors(tensor->dataType == X_DOUBLE, "the tensor must be in X_DOUBLE!");
int gridSize[3];
int blockSize[3];
GDevs.GetCudaThread(tensor->devID, tensor->unitNum, gridSize, blockSize);
dim3 blocks(gridSize[0]);
dim3 threads(blockSize[0]);
int devIDBackup;
ProtectCudaDev(tensor->devID, devIDBackup);
KernelSetDataFixedDouble <<<blocks, threads >>>((double*)tensor->data, tensor->unitNum, p);
BacktoCudaDev(tensor->devID, devIDBackup);
if (i < size) {
d[i] = d[i] * variance + lower;
}
}
/*
set a float data array with a fixed value p (in int) only
if the condition entry is non-zero
/*
set a float data array with a fixed value p (in int) only
if the condition entry is non-zero
>> d - pointer to the data array
>> c - pointer to the condition array
>> size - size of the array
>> p - the initial value
*/
__global__
__global__
void KernelSetDataFixedCondFloat(float * d, int * c, int size, float p)
{
int i = blockDim.x * blockIdx.x + threadIdx.x;
int i = blockDim.x * blockIdx.x + threadIdx.x;
if (i < size && c[i] != 0)
d[i] = p;
if (i < size && c[i] != 0)
d[i] = p;
}
/*
......@@ -214,6 +176,7 @@ void KernelSetDataFixedCondInt(int * d, int * c, int size, int p)
d[i] = p;
}
/*
generate data items with a fixed value p (in int) only
if the condition entry is non-zero
......@@ -286,8 +249,9 @@ set data items to a pre-defined value if its value >= p, set it to 0 otherwise
>> lower - low value of the range
>> variance - the variance of the range
*/
template<class T>
__global__
void KernelSetDataPCut(DTYPE * d, int size, DTYPE p, DTYPE value)
void KernelSetDataPCut(T * d, int size, T p, T value)
{
int i = blockDim.x * blockIdx.x + threadIdx.x;
......@@ -307,8 +271,9 @@ set data items along with a given dimension (and keep the remaining items unchan
>> blockSize - size of a data block
>> blockNum - number of data blocks
*/
template<class T>
__global__
void KernelSetDataDim(DTYPE * d, int beg, int len, int blockSize, int blockNum, DTYPE p)
void KernelSetDataDim(T * d, int beg, int len, int blockSize, int blockNum, T p)
{
/* offset in each block */
int i = blockDim.x * blockIdx.x + threadIdx.x;
......@@ -316,10 +281,10 @@ void KernelSetDataDim(DTYPE * d, int beg, int len, int blockSize, int blockNum,
/* block id */
int j = blockDim.y * blockIdx.y + threadIdx.y;
if(i >= blockSize || j > blockNum)
if (i >= blockSize || j > blockNum)
return;
if(i < beg || i >= beg + len)
if (i < beg || i >= beg + len)
return;
d[blockSize * j + i] = p;
......@@ -352,8 +317,8 @@ void _CudaSetDataDim(XTensor * tensor, int beg, int len, int dim, DTYPE p)
int stride = 1;
int blockSize = 1;
int blockNum = 1;
for(int i = n - 1; i > dim; i--){
int blockNum = 1;
for (int i = n - 1; i > dim; i--) {
stride *= tensor->GetDim(i);
}
blockSize = stride * tensor->GetDim(dim);
......@@ -370,8 +335,15 @@ void _CudaSetDataDim(XTensor * tensor, int beg, int len, int dim, DTYPE p)
int devIDBackup;
ProtectCudaDev(tensor->devID, devIDBackup);
KernelSetDataDim<<<blocks, threads >>>((DTYPE*)tensor->data, beg * stride,
len * stride, blockSize, blockNum, p);
if (tensor->dataType == X_FLOAT){
KernelSetDataDim<<<blocks, threads>>>((DTYPE*)tensor->data, beg * stride,
len * stride, blockSize, blockNum, p);
}
else if (tensor->dataType == X_FLOAT16){
half p1 = __float2half(p);
KernelSetDataDim<<<blocks, threads>>>((__half*)tensor->data, beg * stride,
len * stride, blockSize, blockNum, p1);
}
BacktoCudaDev(tensor->devID, devIDBackup);
}
......@@ -386,16 +358,17 @@ modify data items along with a given index and dimension
>> blockSize - size of a data block
>> stride - stride of a data block
*/
template<class T>
__global__
void KernelSetDataIndexed(DTYPE * s, DTYPE * m, int blockNum, int blockSize, int stride)
void KernelSetDataIndexed(T * s, T * m, int blockNum, int blockSize, int stride)
{
/* offset in each block */
int i = blockDim.x * blockIdx.x + threadIdx.x;
/* block id */
int j = blockDim.y * blockIdx.y + threadIdx.y;
if(i >= stride || j >= blockNum)
if (i >= stride || j >= blockNum)
return;
int x = blockSize * j + i;
......@@ -426,7 +399,6 @@ void _CudaSetDataIndexed(XTensor * source, XTensor * modify, int dim, int index)
int order = source->order;
int size = source->GetDim(dim);
CheckNTErrors(source->dataType == DEFAULT_DTYPE, "TODO!");
CheckNTErrors(dim >= 0 && dim < order, "Illegal dimension!");
CheckNTErrors(index >= 0 && index < size, "Illegal index!");
......@@ -452,8 +424,14 @@ void _CudaSetDataIndexed(XTensor * source, XTensor * modify, int dim, int index)
int devIDBackup;
ProtectCudaDev(source->devID, devIDBackup);
KernelSetDataIndexed<<<blocks, threads >>>((DTYPE*)source->data + index * stride, (DTYPE*)modify->data,
blockNum, blockSize, stride);
if (source->dataType == X_FLOAT){
KernelSetDataIndexed<<<blocks, threads>>>((DTYPE*)source->data + index * stride, (DTYPE*)modify->data,
blockNum, blockSize, stride);
}
else if (source->dataType == X_FLOAT16){
KernelSetDataIndexed<<<blocks, threads>>>((__half*)source->data + index * stride, (__half*)modify->data,
blockNum, blockSize, stride);
}
BacktoCudaDev(source->devID, devIDBackup);
}
......
......@@ -19,6 +19,7 @@
/*
* $Created by: XIAO Tong (email: xiaotong@mail.neu.edu.cn) 2018-07-18
* I'm surprised that I did not write this file till today.
* $Update by: Lin Ye (email: linye2015@outlook.com) 2019-07-22 float16 added
*/
#ifndef __SETDATA_CUH__
......@@ -28,14 +29,10 @@
namespace nts { // namespace nts(NiuTrans.Tensor)
/* generate data items with a fixed value p (in int) */
void _CudaSetDataFixedInt(XTensor * tensor, int p);
/* generate data items with a fixed value p (in int, float, float16, double) */
template<class T>
void _CudaSetDataFixed(XTensor * tensor, T p);
/* generate data items with a fixed value p (in float) */
void _CudaSetDataFixedFloat(XTensor * tensor, float p);
/* generate data items with a fixed value p (in double) */
void _CudaSetDataFixedDouble(XTensor * tensor, double p);
/* generate data items with a fixed value p (in float) only
if the condition entry is non-zero */
......
......@@ -24,29 +24,22 @@
#define __SETDATA_H__
#include "../../XTensor.h"
#include "SetData.cuh"
namespace nts { // namespace nts(NiuTrans.Tensor)
/* generate data items with a xavier initialization */
void _SetDataFanInOut(XTensor * tensor, DTYPE gain = 1.0F);
/* generate data items with a fixed value p */
void _SetDataFixed(XTensor * tensor, void * valuePointer);
///* generate data items with a fixed value p */
//void _SetDataFixed(XTensor * tensor, void * valuePointer);
/* generate data items with a fixed value p (in default type) */
void SetDataFixed(XTensor &tensor, DTYPE p);
/* generate data items with a fixed value p (in integer) */
void SetDataFixedInt(XTensor &tensor, int p);
/* generate data items with a fixed value p (in int) */
void _SetDataFixedInt(XTensor * tensor, int p);
/* generate data items with a fixed value p (in float) */
void _SetDataFixedFloat(XTensor * tensor, float p);
/* generate data items with a fixed value p (in double) */
void _SetDataFixedDouble(XTensor * tensor, double p);
template<class T>
void _SetDataFixed(XTensor * tensor, T value);
/* generate data items with a fixed value p only if the condition entry is non-zero */
void _SetDataFixedCond(XTensor * tensor, XTensor * condition, DTYPE p);
......
......@@ -70,7 +70,7 @@ XTensor DropoutWithIndex(const XTensor &x, XTensor &maskIndex, DTYPE scale)
InitTensor1D(&c, x.unitNum, x.dataType, x.devID, x.mem);
_SetDataFixedFloat(&c, 1.0F);
_SetDataFixed(&c, 1.0F);
_DropoutWithIndex(&x, &maskIndex, &c);
......
......@@ -385,11 +385,11 @@ void _LossBackward(XTensor * dedy, XTensor * t, XTensor * y,
{
if(t == NULL){
if(dedy->dataType == X_FLOAT)
_SetDataFixedFloat(dedy, 1.0F);
_SetDataFixed(dedy, 1.0F);
else if(dedy->dataType == X_DOUBLE)
_SetDataFixedDouble(dedy, 1.0);
_SetDataFixed(dedy, 1.0);
else if(dedy->dataType == X_INT)
_SetDataFixedInt(dedy, 1);
_SetDataFixed(dedy, 1);
else{
ShowNTErrors("TODO");
}
......
Markdown 格式
0%
您添加了 0 到此讨论。请谨慎行事。
请先完成此评论的编辑!
注册 或者 后发表评论