update hardtanh

daf4765a · linye · fe868e5c · daf4765a · daf4765a · daf4765a
Commit daf4765a authored Jul 12, 2019 by linye
--- a/source/tensor/function/HardTanH.cu
+++ b/source/tensor/function/HardTanH.cu
@@ -17,7 +17,7 @@
 /*
 * $Created by: XIAO Tong (email: xiaotong@mail.neu.edu.cn) 2018-04-25
-* $Update by: Lin Ye (email: linye2015@outlook.com) 2019-07-04 float16 added
+* $Update by: Lin Ye (email: linye2015@outlook.com) 2019-07-12 float16 added
 */
 #include "HardTanH.h"
@@ -105,14 +105,15 @@ dy/dx = 1     if -1 <= x <= 1
 >> x - x of the function
 >> size - size of y/x
 */
+template <class T>
 __global__ 
-void KernelHardtanhBackward(DTYPE * dedy, DTYPE * dedx, DTYPE * gold, DTYPE * y, DTYPE * x, int size)
+void KernelHardtanhBackward(T * dedy, T * dedx, T * gold, T * y, T * x, int size)
 {
    int i = blockDim.x * blockIdx.x + threadIdx.x;
    if (i < size){
-        DTYPE s = x[i];
+        T s = x[i];
-        if(s > (DTYPE)1.0 || s < (DTYPE)-1.0)
+        if(s > (T)1.0 || s < (T)-1.0)
            dedx[i] = 0;
        else
            dedx[i] = dedy[i];
@@ -142,21 +143,24 @@ void _CudaHardTanHBackward(XTensor * gold, XTensor * y, XTensor * x,
                           XTensor * dedy, XTensor * dedx,
                           LOSS_FUNCTION_NAME lossName)
 {
-    if(x->dataType == DEFAULT_DTYPE && y->dataType == DEFAULT_DTYPE){
+    CheckNTErrors(((x->dataType == DEFAULT_DTYPE && y->dataType == DEFAULT_DTYPE) ||
+                  (x->dataType == X_FLOAT16 && y->dataType == X_FLOAT16)),
+                   "Input vectors are not in default type.");
-        /* calculate dE/dy */
+    /* calculate dE/dy */
-        if(lossName == CROSSENTROPY)
+    if (lossName == CROSSENTROPY)
-            _CudaCrossEntropyBackward(dedy, y, gold);
+        _CudaCrossEntropyBackward(dedy, y, gold);
-        else if(lossName != NOLOSS)
+    else if (lossName != NOLOSS)
-            _CudaLossBackward(dedy, gold, y, lossName);
+        _CudaLossBackward(dedy, gold, y, lossName);
-        int gridSize[3], blockSize[3];
+    int gridSize[3], blockSize[3];
-        GDevs.GetCudaThread(x->devID, x->unitNum, gridSize, blockSize);
+    GDevs.GetCudaThread(x->devID, x->unitNum, gridSize, blockSize);
-        int devIDBackup;
+    int devIDBackup;
-        ProtectCudaDev(x->devID, devIDBackup);
+    ProtectCudaDev(x->devID, devIDBackup);
+    if(x->dataType == DEFAULT_DTYPE && y->dataType == DEFAULT_DTYPE){
        /* dE/dx = dE/dy * dy/dx */
        KernelHardtanhBackward<<<dim3(gridSize[0]),dim3(blockSize[0])>>>
                               ((DTYPE*)dedy->data, 
@@ -164,11 +168,18 @@ void _CudaHardTanHBackward(XTensor * gold, XTensor * y, XTensor * x,
                                 gold == NULL ? NULL : (DTYPE*)gold->data, 
                                (DTYPE*)y->data, (DTYPE*)x->data, 
                                 x->unitNum);
-        BacktoCudaDev(x->devID, devIDBackup);
    }
-    else
+    else if (x->dataType == X_FLOAT16 && y->dataType == X_FLOAT16) {
-        ShowNTErrors("TODO!");
+        /* dE/dx = dE/dy * dy/dx */
+        KernelHardtanhBackward<<<dim3(gridSize[0]), dim3(blockSize[0])>>>
+                               ((half*)dedy->data,
+                                (half*)dedx->data,
+                                 gold == NULL ? NULL : (half*)gold->data,
+                                (half*)y->data, (half*)x->data,
+                                 x->unitNum);
+    }
+    BacktoCudaDev(x->devID, devIDBackup);
 }
 #endif

--- a/source/tensor/function/Loss.cu
+++ b/source/tensor/function/Loss.cu
@@ -222,8 +222,9 @@ backward compuation for squared error (Cuda kernel)
 >> y - model output (in vector)
 >> size - size of the vector (dedy)
 */
+template <class T>
 __global__ 
-void KernelLossBackwardSquaredError(DTYPE * dedy, DTYPE * t, DTYPE * y, int size)
+void KernelLossBackwardSquaredError(T * dedy, T * t, T * y, int size)
 {
    int i = blockDim.x * blockIdx.x + threadIdx.x;
@@ -242,8 +243,9 @@ backward compuation of blocks for squared error (Cuda kernel)
 >> lenInBlock - number of items in a block for computation 
 >> size - size of the vector (dedy)
 */
+template <class T>
 __global__ 
-void KernelLossBackwardSquaredErrorBlock(DTYPE * dedy, DTYPE * t, DTYPE * y, 
+void KernelLossBackwardSquaredErrorBlock(T * dedy, T * t, T * y,
                                         int blockSize, int begInBlock, int lenInBlock, int size)
 {
    int i = blockDim.x * blockIdx.x + threadIdx.x;
@@ -265,8 +267,9 @@ backward compuation for cross entropy (Cuda kernel)
 >> y - model output (in vector)
 >> size - size of the vector (dedy)
 */
+template <class T>
 __global__ 
-void KernelLossBackwardCrossEntropy(DTYPE * dedy, DTYPE * t, DTYPE * y, int tBeg, int tLen, int yBeg, int blockNum, int stride, int dimensionSize)
+void KernelLossBackwardCrossEntropy(T * dedy, T * t, T * y, int tBeg, int tLen, int yBeg, int blockNum, int stride, int dimensionSize)
 {
    int i = blockDim.x * blockIdx.x + threadIdx.x;
    if (i > stride * dimensionSize * blockNum) 
@@ -297,8 +300,9 @@ backward compuation for cross entropy (Cuda kernel)
 >> lenInBlock - number of items in a block for computation 
 >> size - size of the vector (dedy)
 */
+template <class T>
 __global__ 
-void KernelLossBackwardCrossEntropyBlock(DTYPE * dedy, DTYPE * t, DTYPE * y, 
+void KernelLossBackwardCrossEntropyBlock(T * dedy, T * t, T * y,
                                         int blockSize, int begInBlock, int lenInBlock, int size)
 {
    int i = blockDim.x * blockIdx.x + threadIdx.x;
@@ -337,14 +341,8 @@ void _CudaLossBackward(XTensor * dedy, XTensor * t, XTensor * y,
    CheckNTErrors(((dedy->devID == t->devID) && (dedy->devID == y->devID)), 
                  "Tensor must be on the same device!");
    CheckNTErrors((t->order > leadDim), "Illegal leading dimension!");
-    CheckNTErrors((t->dataType == DEFAULT_DTYPE && 
-                   y->dataType == DEFAULT_DTYPE && 
-                   dedy->dataType == DEFAULT_DTYPE),
-                  "Input vectors are not in default type.");
    CheckNTErrors((dedy->devID >= 0 && t->devID >= 0 && y->devID >= 0),
                  "The backward compuation must be performed on GPUs.");
    CheckNTErrors((dedy->devID == t->devID && dedy->devID == y->devID),
                  "The vectors must be on the same GPU.");
    CheckNTErrors((tBeg == yBeg), "TODO!");
@@ -376,51 +374,105 @@ void _CudaLossBackward(XTensor * dedy, XTensor * t, XTensor * y,
    dim3 blocks(cudaGridSize[0]);
    dim3 threads(cudaBlockSize[0]);
-    DTYPE * tp = (DTYPE*)t->data;
+    if (t->dataType == DEFAULT_DTYPE &&
-    DTYPE * yp = (DTYPE*)y->data;
+        y->dataType == DEFAULT_DTYPE &&
-    DTYPE * dedyp = (DTYPE*)dedy->data;
+        dedy->dataType == DEFAULT_DTYPE) {
-    int devIDBackup;
+        DTYPE * tp = (DTYPE*)t->data;
-    ProtectCudaDev(y->devID, devIDBackup);
+        DTYPE * yp = (DTYPE*)y->data;
+        DTYPE * dedyp = (DTYPE*)dedy->data;
-    /* 
-    squared error 
+        int devIDBackup;
-    loss = sum_{i} 0.5*(t_i - y_i)^2, where t_i is the gold standard and y_i is the model output
+        ProtectCudaDev(y->devID, devIDBackup);
-    dloss/dy_i = y_i - t_i
-    */
+        /*
-    if(LFName == SQUAREDERROR){
+        squared error
-        if(t->isSparse){
+        loss = sum_{i} 0.5*(t_i - y_i)^2, where t_i is the gold standard and y_i is the model output
-            ShowNTErrors("TODO!");
+        dloss/dy_i = y_i - t_i
-        }
+        */
-        else if(size == y->unitNum){
+        if (LFName == SQUAREDERROR) {
-            KernelLossBackwardSquaredError<<<blocks, threads>>>(dedyp, tp, yp, y->unitNum);
+            if (t->isSparse) {
-        }
+                ShowNTErrors("TODO!");
-        else{
+            }
-            KernelLossBackwardSquaredErrorBlock<<<blocks, threads>>>(dedyp, tp, yp, blockSize, tBeg * stride, tLen * stride, y->unitNum);
+            else if (size == y->unitNum) {
+                KernelLossBackwardSquaredError << <blocks, threads >> >(dedyp, tp, yp, y->unitNum);
+            }
+            else {
+                KernelLossBackwardSquaredErrorBlock << <blocks, threads >> >(dedyp, tp, yp, blockSize, tBeg * stride, tLen * stride, y->unitNum);
+            }
        }
-    }
-    /* 
+        /*
-    cross entropy
+        cross entropy
-    loss = sum_{i} (-t_i * log(y_i)), where t and y are distributions 
+        loss = sum_{i} (-t_i * log(y_i)), where t and y are distributions
-    dloss/dy_i = -t_i / y_i
+        dloss/dy_i = -t_i / y_i
-    */
+        */
-    else if(LFName == CROSSENTROPY){
+        else if (LFName == CROSSENTROPY) {
-        if(t->isSparse){
+            if (t->isSparse) {
-            ShowNTErrors("TODO!");
+                ShowNTErrors("TODO!");
-        }
+            }
-        else if(size == y->unitNum){
+            else if (size == y->unitNum) {
-            KernelLossBackwardCrossEntropy<<<blocks, threads>>>(dedyp, tp, yp, tBeg, tLen, yBeg, blockNum, stride, dimensionSize);
+                KernelLossBackwardCrossEntropy << <blocks, threads >> >(dedyp, tp, yp, tBeg, tLen, yBeg, blockNum, stride, dimensionSize);
+            }
+            else {
+                KernelLossBackwardCrossEntropyBlock << <blocks, threads >> >(dedyp, tp, yp, blockSize, tBeg * stride, tLen * stride, y->unitNum);
+            }
+        }    
+        BacktoCudaDev(y->devID, devIDBackup);
+    }
+    else if (t->dataType == X_FLOAT16 &&
+             y->dataType == X_FLOAT16 &&
+             dedy->dataType == X_FLOAT16) {
+        half * tp = (half*)t->data;
+        half * yp = (half*)y->data;
+        half * dedyp = (half*)dedy->data;
+        int devIDBackup;
+        ProtectCudaDev(y->devID, devIDBackup);
+        /*
+        squared error
+        loss = sum_{i} 0.5*(t_i - y_i)^2, where t_i is the gold standard and y_i is the model output
+        dloss/dy_i = y_i - t_i
+        */
+        if (LFName == SQUAREDERROR) {
+            if (t->isSparse) {
+                ShowNTErrors("TODO!");
+            }
+            else if (size == y->unitNum) {
+                KernelLossBackwardSquaredError << <blocks, threads >> >(dedyp, tp, yp, y->unitNum);
+            }
+            else {
+                KernelLossBackwardSquaredErrorBlock << <blocks, threads >> >(dedyp, tp, yp, blockSize, tBeg * stride, tLen * stride, y->unitNum);
+            }
        }
-        else{
-            KernelLossBackwardCrossEntropyBlock<<<blocks, threads>>>(dedyp, tp, yp, blockSize, tBeg * stride, tLen * stride, y->unitNum);
+        /*
+        cross entropy
+        loss = sum_{i} (-t_i * log(y_i)), where t and y are distributions
+        dloss/dy_i = -t_i / y_i
+        */
+        else if (LFName == CROSSENTROPY) {
+            if (t->isSparse) {
+                ShowNTErrors("TODO!");
+            }
+            else if (size == y->unitNum) {
+                KernelLossBackwardCrossEntropy << <blocks, threads >> >(dedyp, tp, yp, tBeg, tLen, yBeg, blockNum, stride, dimensionSize);
+            }
+            else {
+                KernelLossBackwardCrossEntropyBlock << <blocks, threads >> >(dedyp, tp, yp, blockSize, tBeg * stride, tLen * stride, y->unitNum);
+            }
        }
+        BacktoCudaDev(y->devID, devIDBackup);
    }
    else{
        ShowNTErrors("TODO");
    }
-    BacktoCudaDev(y->devID, devIDBackup);
 }
 #endif

--- a/source/tensor/test/Test.cpp
+++ b/source/tensor/test/Test.cpp
@@ -82,9 +82,9 @@ bool Test()
    //wrong = !TestCrossEntropy() || wrong;
 	//wrong = !TestDropout() || wrong;
-    //wrong = !TestHardTanH() || wrong;
+    wrong = !TestHardTanH() || wrong;
    //wrong = !TestIdentity() || wrong;
-    wrong = !TestLogSoftmax() || wrong;
+    //wrong = !TestLogSoftmax() || wrong;
    //wrong = !TestLoss() || wrong;
    //wrong = !TestRectify() || wrong;
    //wrong = !TestSigmoid() || wrong;