update setData flaot16 and modify some files which use the setData (main _SetDataFixed)

3800528b · ltb · ccfe71d0 · 3800528b · 3800528b · 3800528b
Commit 3800528b authored Jul 29, 2019 by ltb
--- a/source/network/XBackwardLoss.cpp
+++ b/source/network/XBackwardLoss.cpp
@@ -53,11 +53,11 @@ void XLossGrad::MakeGrad(XTensor * node, bool isEfficient)
    if (income.tailNum == 1) {
        if(dedy->dataType == X_FLOAT)
-            _SetDataFixedFloat(dedy, 1.0F);
+            _SetDataFixed(dedy, 1.0F);
        else if(dedy->dataType == X_DOUBLE)
-            _SetDataFixedDouble(dedy, 1.0);
+            _SetDataFixed(dedy, 1.0);
        else if(dedy->dataType == X_INT)
-            _SetDataFixedInt(dedy, 1);
+            _SetDataFixed(dedy, 1);
        else
            ShowNTErrors("TODO");
@@ -145,11 +145,11 @@ void XLossGrad::Compute(XTensor * gold, XTensor * y,
 {
    if(gold == NULL){
        if(dedy->dataType == X_FLOAT)
-            _SetDataFixedFloat(dedy, 1.0F);
+            _SetDataFixed(dedy, 1.0F);
        else if(dedy->dataType == X_DOUBLE)
-            _SetDataFixedDouble(dedy, 1.0);
+            _SetDataFixed(dedy, 1.0);
        else if(dedy->dataType == X_INT)
-            _SetDataFixedInt(dedy, 1);
+            _SetDataFixed(dedy, 1);
        else{
            ShowNTErrors("TODO");
        }

--- a/source/sample/transformer/T2TPredictor.cpp
+++ b/source/sample/transformer/T2TPredictor.cpp
@@ -171,7 +171,7 @@ void T2TPredictor::Predict(T2TStateBundle * next, XTensor * encoding,
    dims[inputEnc->order - 1] = 1;
    InitTensor(&first, inputEnc->order, dims, X_INT, 1.0F, inputEnc->devID, inputEnc->mem);
-    _SetDataFixedInt(&first, startSymbol);
+    _SetDataFixed(&first, startSymbol);
    /* add a new word into the input sequence of the decoder side */
    if (inputLast == NULL) {

--- a/source/tensor/core/getandset/SetData.cpp
+++ b/source/tensor/core/getandset/SetData.cpp
@@ -25,6 +25,7 @@
 #include "SetData.cuh"
 #include "../../XUtility.h"
 #include "../movement/CopyValues.h"
+#include "ConvertDataType.h"
 #if !defined( WIN32 ) && !defined( _WIN32 )
    #include "sys/time.h"
@@ -81,82 +82,69 @@ generate data items with a fixed value p
 >> tensor - the tensor whose data array would be initialized
 >> p - pointer to the number for initializing the tensor
 */
-void _SetDataFixed(XTensor * tensor, void * valuePointer)
+template<class T>
+void _SetDataFixed(XTensor * tensor, T value)
 {
+#ifdef USE_CUDA
+    if (tensor->devID >= 0) {
+        _CudaSetDataFixed(tensor, value);
+        return;
+    }
+#endif
    int num = tensor->unitNum;
-    if(tensor->dataType == X_INT){
+    if (tensor->dataType == X_INT) {
-        int p = *(int*)valuePointer;
-        if(tensor->devID < 0){
        int * d = (int*)tensor->data;
-            if(num % 4 == 0){
+        int v = (int)value;
-                for(int i = 0; i < num; i += 4){
+        if (num % 4 == 0) {
-                    d[i] = p;
+            for (int i = 0; i < num; i += 4) {
-                    d[i + 1] = p;
+                d[i] = v;
-                    d[i + 2] = p;
+                d[i + 1] = v;
-                    d[i + 3] = p;
+                d[i + 2] = v;
+                d[i + 3] = v;
            }
        }
-            else{
+        else {
-                for(int i = 0; i < num; i++)
+            for (int i = 0; i < num; i++)
-                    d[i] = p;
+                d[i] = v;
-            }
-        }
-        else{
-#ifdef USE_CUDA
-            _CudaSetDataFixedInt(tensor, p);
-#endif
        }
    }
-    else if(tensor->dataType == X_FLOAT){
+    else if (tensor->dataType == X_FLOAT) {
-        float p = *(float*)valuePointer;
-        if(tensor->devID < 0){
        float * d = (float*)tensor->data;
-            if(num % 4 == 0){
+        float v = (float)value;
-                for(int i = 0; i < num; i += 4){
+        if (num % 4 == 0) {
-                    d[i] = p;
+            for (int i = 0; i < num; i += 4) {
-                    d[i + 1] = p;
+                d[i] = v;
-                    d[i + 2] = p;
+                d[i + 1] = v;
-                    d[i + 3] = p;
+                d[i + 2] = v;
+                d[i + 3] = v;
            }
        }
-            else{
+        else {
-                for(int i = 0; i < num; i++)
+            for (int i = 0; i < num; i++)
-                    d[i] = p;
+                d[i] = v;
-            }
-        }
-        else{
-#ifdef USE_CUDA
-            _CudaSetDataFixedFloat(tensor, p);
-#endif
        }
    }
-    else if(tensor->dataType == X_DOUBLE){
+    else if (tensor->dataType == X_DOUBLE) {
-        double p = *(double*)valuePointer;
-        if(tensor->devID < 0){
        double * d = (double*)tensor->data;
-            if(num % 4 == 0){
+        double v = (double)value;
-                for(int i = 0; i < num; i += 4){
-                    d[i] = p;
+        if (num % 4 == 0) {
-                    d[i + 1] = p;
+            for (int i = 0; i < num; i += 4) {
-                    d[i + 2] = p;
+                d[i] = v;
-                    d[i + 3] = p;
+                d[i + 1] = v;
-                }
+                d[i + 2] = v;
-            }
+                d[i + 3] = v;
-            else{
-                for(int i = 0; i < num; i++)
-                    d[i] = p;
            }
        }
-        else{
+        else {
-#ifdef USE_CUDA
+            for (int i = 0; i < num; i++)
-            _CudaSetDataFixedDouble(tensor, p);
+                d[i] = v;
-#endif
        }
    }
-    else{
+    else
        ShowNTErrors("TODO");
-    }
 }
 /* 
@@ -166,7 +154,7 @@ generate data items with a fixed value p (in default type)
 */
 void SetDataFixed(XTensor &tensor, DTYPE p)
 {
-    _SetDataFixed(&tensor, &p);
+    _SetDataFixed(&tensor, p);
 }
 /*
@@ -177,53 +165,17 @@ generate data items with a fixed value p (in integer)
 void SetDataFixedInt(XTensor &tensor, int p)
 {
    CheckNTErrors(tensor.dataType == X_INT, "An integer tensor is required!");
-    _SetDataFixed(&tensor, &p);
+    _SetDataFixed(&tensor, p);
 }
 /* 
 generate data items with a fixed value p (in integer) 
 >> tensor - the tensor whose data array would be initialized
 >> p - an int-valued number
 */
-void _SetDataFixedInt(XTensor * tensor, int p)
+template void _SetDataFixed<int>(XTensor*, int);
-{
+template void _SetDataFixed<float>(XTensor*, float);
-    CheckNTErrors(tensor->dataType == X_INT, "the tensor must be in X_INT!");
+template void _SetDataFixed<double>(XTensor*, double);
-    if(p == 0)
-        tensor->SetZeroAll();
-    else
-        _SetDataFixed(tensor, &p);
-}
-/*
-generate data items with a fixed value p (in float) 
->> tensor - the tensor whose data array would be initialized
->> p - a float-valued number
-*/
-void _SetDataFixedFloat(XTensor * tensor, float p)
-{
-    CheckNTErrors(tensor->dataType == X_FLOAT, "the tensor must be in X_FLOAT!");
-    if(p == 0)
-        tensor->SetZeroAll();
-    else
-        _SetDataFixed(tensor, &p);
-}
-/* 
-generate data items with a fixed value p (in double) 
->> tensor - the tensor whose data array would be initialized
->> p - a double-valued number
-*/
-void _SetDataFixedDouble(XTensor * tensor, double p)
-{
-    CheckNTErrors(tensor->dataType == X_DOUBLE, "the tensor must be in X_DOUBLE!");
-    if(p == 0)
-        tensor->SetZeroAll();
-    else
-        _SetDataFixed(tensor, &p);
-}
 /* 
 generate data items with a fixed value p only if 
@@ -319,7 +271,6 @@ void _SetDataDim(XTensor * tensor, int beg, int len, int dim, DTYPE p)
 {
    int n = tensor->order;
-    CheckNTErrors(tensor->dataType == DEFAULT_DTYPE, "TODO!");
    CheckNTErrors(dim < n && dim >= 0, "Illegal dimension!");
    CheckNTErrors(beg >= 0 && beg < tensor->GetDim(dim), "Illegal beginning position!");
    CheckNTErrors(beg + len >= 0 && beg + len < tensor->GetDim(dim), "Illegal length!");
@@ -372,7 +323,6 @@ void _SetDataIndexed(XTensor * source, XTensor * modify, int dim, int index)
    int order = source->order;
    int size = source->GetDim(dim);
-    CheckNTErrors(source->dataType == DEFAULT_DTYPE, "TODO!");
    CheckNTErrors(dim >= 0 && dim < order, "Illegal dimension!");
    CheckNTErrors(index >= 0 && index < size, "Illegal index!");
@@ -527,8 +477,6 @@ the item to a pre-defined value if the item >= p, set the item to 0 otherwise
 */
 void _SetDataRandP(const XTensor * tensor, DTYPE lower, DTYPE upper, DTYPE p, DTYPE value)
 {
-    CheckNTErrors(tensor->dataType == DEFAULT_DTYPE, "TODO");
    if (tensor->devID < 0) {
        _SetDataRand(tensor, lower, upper);

--- a/source/tensor/core/getandset/SetData.cu
+++ b/source/tensor/core/getandset/SetData.cu
@@ -19,6 +19,7 @@
 /*
 * $Created by: XIAO Tong (email: xiaotong@mail.neu.edu.cn) 2018-07-18
 * I'm surprised that I did not write this file till today.
+* $Update by: Lin Ye (email: linye2015@outlook.com) 2019-07-22 float16 added
 */
 #include <curand.h>
@@ -27,17 +28,20 @@
 #include <curand_kernel.h>
 #include "../../XDevice.h"
 #include "../../XUtility.h"
+#include "ConvertDataType.h"
+#include <device_launch_parameters.h>
 namespace nts { // namespace nts(NiuTrans.Tensor)
 /*
-set an integer data array with a fixed value p (in int) 
+set an data array with a fixed value p (in int, float, float16, double)
 >> d - pointer to the data array
 >> size - size of the array
 >> p - the initial value
 */
+template<class T>
 __global__
-void KernelSetDataFixedInt(int * d, int size, int p)
+void KernelSetDataFixed(T * d, int size, T p)
 {
    int i = blockDim.x * blockIdx.x + threadIdx.x;
@@ -46,14 +50,13 @@ void KernelSetDataFixedInt(int * d, int size, int p)
 }
 /*
-generate data items with a fixed value p (in int) 
+generate data items with a fixed value p (in int, float, float16, double)
 >> tensor - the tensor for initialization
 >> p - the initial value
 */
-void _CudaSetDataFixedInt(XTensor * tensor, int p)
+template<class T>
+void _CudaSetDataFixed(XTensor * tensor, T p)
 {
-    CheckNTErrors(tensor->dataType == X_INT, "the tensor must be in X_INT!");
    int gridSize[3];
    int blockSize[3];
@@ -65,89 +68,48 @@ void _CudaSetDataFixedInt(XTensor * tensor, int p)
    int devIDBackup;
    ProtectCudaDev(tensor->devID, devIDBackup);
-    KernelSetDataFixedInt <<<blocks, threads >>>((int*)tensor->data, tensor->unitNum, p);
+    if (tensor->dataType == X_INT){
+        KernelSetDataFixed<<<blocks, threads>>>((int*)tensor->data, tensor->unitNum, (int)p);
+    }
+    else if (tensor->dataType == X_FLOAT){
+        KernelSetDataFixed<<<blocks, threads>>>((DTYPE*)tensor->data, tensor->unitNum, (float)p);
+    }
+    else if (tensor->dataType == X_DOUBLE){
+        KernelSetDataFixed<<<blocks, threads>>>((double*)tensor->data, tensor->unitNum, (double)p);
+    }
+    else if (tensor->dataType == X_FLOAT16){
+        half p1 = __float2half(p);
+        KernelSetDataFixed<<<blocks, threads>>>((__half*)tensor->data, tensor->unitNum, p1);
+    }
+    else
+        ShowNTErrors("TODO");
    BacktoCudaDev(tensor->devID, devIDBackup);
 }
-/* 
+template void _CudaSetDataFixed<int>(XTensor*, int);
-set a float data array with a fixed value p (in int) 
+template void _CudaSetDataFixed<float>(XTensor*, float);
->> d - pointer to the data array
+template void _CudaSetDataFixed<double>(XTensor*, double);
->> size - size of the array
+//__device__
->> p - the initial value
+//template void _CudaSetDataFixed<half>(XTensor*, half);
-*/
-__global__ 
-void KernelSetDataFixedFloat(float * d, int size, float p)
-{
-    int i = blockDim.x * blockIdx.x + threadIdx.x;
-    if (i < size)
-        d[i] = p;
-}
-/* 
-generate data items with a fixed value p (in float)
->> tensor - the tensor for initialization
->> p - the initial value
-*/
-void _CudaSetDataFixedFloat(XTensor * tensor, float p)
-{
-    CheckNTErrors(tensor->dataType == X_FLOAT, "the tensor must be in X_FLOAT!");
-    int gridSize[3];
-    int blockSize[3];
-    GDevs.GetCudaThread(tensor->devID, tensor->unitNum, gridSize, blockSize);
-    dim3 blocks(gridSize[0]);
-    dim3 threads(blockSize[0]);
-    int devIDBackup;
-    ProtectCudaDev(tensor->devID, devIDBackup);
-    KernelSetDataFixedFloat <<<blocks, threads >>>((float*)tensor->data, tensor->unitNum, p);
-    BacktoCudaDev(tensor->devID, devIDBackup);
-}
 /*
-set a double data array with a fixed value p (in int) 
+set data array with a uniform distribution in [low, high]
->> d - pointer to the data array
+>> deviceStates - the state of curand
+>> d - float, float16, double datatype pointer to the data array
 >> size - size of the array
->> p - the initial value
+>> lower - low value of the range
+>> variance - the variance of the range
 */
+template<class T>
 __global__
-void KernelSetDataFixedDouble(double * d, int size, double p)
+void KernelSetDataRand(T * d, int size, T lower, T variance)
 {
    int i = blockDim.x * blockIdx.x + threadIdx.x;
-    if (i < size)
+    if (i < size) {
-        d[i] = p;
+        d[i] = d[i] * variance + lower;
-}
+    }
-/* 
-generate data items with a fixed value p (in double) 
->> tensor - the tensor for initialization
->> p - the initial value
-*/
-void _CudaSetDataFixedDouble(XTensor * tensor, double p)
-{
-    CheckNTErrors(tensor->dataType == X_DOUBLE, "the tensor must be in X_DOUBLE!");
-    int gridSize[3];
-    int blockSize[3];
-    GDevs.GetCudaThread(tensor->devID, tensor->unitNum, gridSize, blockSize);
-    dim3 blocks(gridSize[0]);
-    dim3 threads(blockSize[0]);
-    int devIDBackup;
-    ProtectCudaDev(tensor->devID, devIDBackup);
-    KernelSetDataFixedDouble <<<blocks, threads >>>((double*)tensor->data, tensor->unitNum, p);
-    BacktoCudaDev(tensor->devID, devIDBackup);
 }
 /*
@@ -214,6 +176,7 @@ void KernelSetDataFixedCondInt(int * d, int * c, int size, int p)
        d[i] = p;
 }
 /* 
 generate data items with a fixed value p (in int) only 
 if the condition entry is non-zero 
@@ -286,8 +249,9 @@ set data items to a pre-defined value if its value >= p, set it to 0 otherwise
 >> lower - low value of the range
 >> variance - the variance of the range
 */
+template<class T>
 __global__
-void KernelSetDataPCut(DTYPE * d, int size, DTYPE p, DTYPE value)
+void KernelSetDataPCut(T * d, int size, T p, T value)
 {
    int i = blockDim.x * blockIdx.x + threadIdx.x;
@@ -307,8 +271,9 @@ set data items along with a given dimension (and keep the remaining items unchan
 >> blockSize - size of a data block
 >> blockNum - number of data blocks
 */
+template<class T>
 __global__
-void KernelSetDataDim(DTYPE * d, int beg, int len, int blockSize, int blockNum, DTYPE p)
+void KernelSetDataDim(T * d, int beg, int len, int blockSize, int blockNum, T p)
 {
    /* offset in each block */
    int i = blockDim.x * blockIdx.x + threadIdx.x;
@@ -316,10 +281,10 @@ void KernelSetDataDim(DTYPE * d, int beg, int len, int blockSize, int blockNum, 
    /* block id */
    int j = blockDim.y * blockIdx.y + threadIdx.y;
-    if(i >= blockSize || j > blockNum)
+    if (i >= blockSize || j > blockNum)
        return;
-    if(i < beg || i >= beg + len)
+    if (i < beg || i >= beg + len)
        return;
    d[blockSize * j + i] = p;
@@ -353,7 +318,7 @@ void _CudaSetDataDim(XTensor * tensor, int beg, int len, int dim, DTYPE p)
    int stride = 1;
    int blockSize = 1;
    int blockNum = 1;
-    for(int i = n - 1; i > dim; i--){
+    for (int i = n - 1; i > dim; i--) {
        stride *= tensor->GetDim(i);
    }
    blockSize = stride * tensor->GetDim(dim);
@@ -370,8 +335,15 @@ void _CudaSetDataDim(XTensor * tensor, int beg, int len, int dim, DTYPE p)
    int devIDBackup;
    ProtectCudaDev(tensor->devID, devIDBackup);
-    KernelSetDataDim<<<blocks, threads >>>((DTYPE*)tensor->data, beg * stride, 
+    if (tensor->dataType == X_FLOAT){
+        KernelSetDataDim<<<blocks, threads>>>((DTYPE*)tensor->data, beg * stride,
                                               len * stride, blockSize, blockNum, p);
+    }
+    else if (tensor->dataType == X_FLOAT16){
+        half p1 = __float2half(p);
+        KernelSetDataDim<<<blocks, threads>>>((__half*)tensor->data, beg * stride,
+                                               len * stride, blockSize, blockNum, p1);
+    }
    BacktoCudaDev(tensor->devID, devIDBackup);
 }
@@ -386,8 +358,9 @@ modify data items along with a given index and dimension
 >> blockSize - size of a data block
 >> stride - stride of a data block
 */
+template<class T>
 __global__
-void KernelSetDataIndexed(DTYPE * s, DTYPE * m, int blockNum, int blockSize, int stride)
+void KernelSetDataIndexed(T * s, T * m, int blockNum, int blockSize, int stride)
 {
    /* offset in each block */
    int i = blockDim.x * blockIdx.x + threadIdx.x;
@@ -395,7 +368,7 @@ void KernelSetDataIndexed(DTYPE * s, DTYPE * m, int blockNum, int blockSize, int
    /* block id */
    int j = blockDim.y * blockIdx.y + threadIdx.y;
-    if(i >= stride || j >= blockNum)
+    if (i >= stride || j >= blockNum)
        return;
    int x = blockSize * j + i;
@@ -426,7 +399,6 @@ void _CudaSetDataIndexed(XTensor * source, XTensor * modify, int dim, int index)
    int order = source->order;
    int size = source->GetDim(dim);
-    CheckNTErrors(source->dataType == DEFAULT_DTYPE, "TODO!");
    CheckNTErrors(dim >= 0 && dim < order, "Illegal dimension!");
    CheckNTErrors(index >= 0 && index < size, "Illegal index!");
@@ -452,8 +424,14 @@ void _CudaSetDataIndexed(XTensor * source, XTensor * modify, int dim, int index)
    int devIDBackup;
    ProtectCudaDev(source->devID, devIDBackup);
-    KernelSetDataIndexed<<<blocks, threads >>>((DTYPE*)source->data + index * stride, (DTYPE*)modify->data, 
+    if (source->dataType == X_FLOAT){
+        KernelSetDataIndexed<<<blocks, threads>>>((DTYPE*)source->data + index * stride, (DTYPE*)modify->data,
+                                                   blockNum, blockSize, stride);
+    }
+    else if (source->dataType == X_FLOAT16){
+        KernelSetDataIndexed<<<blocks, threads>>>((__half*)source->data + index * stride, (__half*)modify->data,
            blockNum, blockSize, stride);
+    }
    BacktoCudaDev(source->devID, devIDBackup);
 }

--- a/source/tensor/core/getandset/SetData.cuh
+++ b/source/tensor/core/getandset/SetData.cuh
@@ -19,6 +19,7 @@
 /*
 * $Created by: XIAO Tong (email: xiaotong@mail.neu.edu.cn) 2018-07-18
 * I'm surprised that I did not write this file till today.
+* $Update by: Lin Ye (email: linye2015@outlook.com) 2019-07-22 float16 added
 */
 #ifndef __SETDATA_CUH__
@@ -28,14 +29,10 @@
 namespace nts { // namespace nts(NiuTrans.Tensor)
-/* generate data items with a fixed value p (in int) */
+/* generate data items with a fixed value p (in int, float, float16, double) */
-void _CudaSetDataFixedInt(XTensor * tensor, int p);
+template<class T>
+void _CudaSetDataFixed(XTensor * tensor, T p);
-/* generate data items with a fixed value p (in float) */
-void _CudaSetDataFixedFloat(XTensor * tensor, float p);
-/* generate data items with a fixed value p (in double) */
-void _CudaSetDataFixedDouble(XTensor * tensor, double p);
 /* generate data items with a fixed value p (in float) only 
   if the condition entry is non-zero */

--- a/source/tensor/core/getandset/SetData.h
+++ b/source/tensor/core/getandset/SetData.h
@@ -24,29 +24,22 @@
 #define __SETDATA_H__
 #include "../../XTensor.h"
+#include "SetData.cuh"
 namespace nts { // namespace nts(NiuTrans.Tensor)
 /* generate data items with a xavier initialization */
 void _SetDataFanInOut(XTensor * tensor, DTYPE gain = 1.0F);
-/* generate data items with a fixed value p */
+///* generate data items with a fixed value p */
-void _SetDataFixed(XTensor * tensor, void * valuePointer);
+//void _SetDataFixed(XTensor * tensor, void * valuePointer);
 /* generate data items with a fixed value p (in default type) */
 void SetDataFixed(XTensor &tensor, DTYPE p);
-/* generate data items with a fixed value p (in integer) */
 void SetDataFixedInt(XTensor &tensor, int p);
-/* generate data items with a fixed value p (in int) */
+template<class T>
-void _SetDataFixedInt(XTensor * tensor, int p);
+void _SetDataFixed(XTensor * tensor, T value);
-/* generate data items with a fixed value p (in float) */
-void _SetDataFixedFloat(XTensor * tensor, float p);
-/* generate data items with a fixed value p (in double) */
-void _SetDataFixedDouble(XTensor * tensor, double p);
 /* generate data items with a fixed value p only if the condition entry is non-zero */
 void _SetDataFixedCond(XTensor * tensor, XTensor * condition, DTYPE p);

--- a/source/tensor/function/DropoutWithIndex.cpp
+++ b/source/tensor/function/DropoutWithIndex.cpp
@@ -70,7 +70,7 @@ XTensor DropoutWithIndex(const XTensor &x, XTensor &maskIndex, DTYPE scale)
    InitTensor1D(&c, x.unitNum, x.dataType, x.devID, x.mem);
-    _SetDataFixedFloat(&c, 1.0F);
+    _SetDataFixed(&c, 1.0F);
    _DropoutWithIndex(&x, &maskIndex, &c);

--- a/source/tensor/function/Loss.cpp
+++ b/source/tensor/function/Loss.cpp
@@ -385,11 +385,11 @@ void _LossBackward(XTensor * dedy, XTensor * t, XTensor * y,
 {
    if(t == NULL){
        if(dedy->dataType == X_FLOAT)
-            _SetDataFixedFloat(dedy, 1.0F);
+            _SetDataFixed(dedy, 1.0F);
        else if(dedy->dataType == X_DOUBLE)
-            _SetDataFixedDouble(dedy, 1.0);
+            _SetDataFixed(dedy, 1.0);
        else if(dedy->dataType == X_INT)
-            _SetDataFixedInt(dedy, 1);
+            _SetDataFixed(dedy, 1);
        else{
            ShowNTErrors("TODO");
        }