Merge branch 'xuchen' into xiaotong-working

1b50554a · xuchen · cf43c58c · 102db468 · 1b50554a · 1b50554a
Commit 1b50554a authored Sep 17, 2018 by xuchen
--- a/source/tensor/core/arithmetic/Multiply.cpp
+++ b/source/tensor/core/arithmetic/Multiply.cpp
@@ -66,8 +66,8 @@ void _Multiply(const XTensor * a, const XTensor * b, XTensor * c, DTYPE alpha, i
    for (int i = 0; i < a->order; i++) {
        if (i != leadingDimRDI) {
            CheckNTErrors((a->dimSizeRDI[i] == b->dimSizeRDI[i] &&
-                a->dimSizeRDI[i] == c->dimSizeRDI[i]),
-                "Unmatched tensors!");
+                           a->dimSizeRDI[i] == c->dimSizeRDI[i]),
+                          "Unmatched tensors!");
        }
        if (i < leadingDimRDI)
            stride *= a->dimSizeRDI[i];

--- a/source/tensor/core/arithmetic/Multiply.cu
+++ b/source/tensor/core/arithmetic/Multiply.cu
@@ -77,7 +77,7 @@ where |a_lead| means the size of the leading dimension of a
 */
 template<int nonZeroAlpha> __global__
 void KernelMulElementWiseTensorDynamic(DTYPE * a, DTYPE * b, DTYPE * c, DTYPE alpha,
-    int stride, int ldSizeA, int ldSizeB, int ldSizeC, int blockNum)
+                                       int stride, int ldSizeA, int ldSizeB, int ldSizeC, int blockNum)
 {
    __shared__ DTYPE* ap[MAX_CUDA_THREAD_NUM_PER_BLOCK];
    __shared__ DTYPE* bp[MAX_CUDA_THREAD_NUM_PER_BLOCK];
@@ -171,14 +171,12 @@ void _CudaMultiply(const XTensor * a, const XTensor * b, XTensor * c, DTYPE alph
                if (alpha == 0) {
                    KernelMulElementWiseTensorDynamic<0> << <blocks, threads >> >
                        ((DTYPE*)a->data, (DTYPE*)b->data, (DTYPE*)c->data, 0,
-                            stride, dimensionSizeA, dimensionSizeB, dimensionSizeC,
-                            blockNum);
+                          stride, dimensionSizeA, dimensionSizeB, dimensionSizeC, blockNum);
                }
                else {
                    KernelMulElementWiseTensorDynamic<1> << <blocks, threads >> >
                        ((DTYPE*)a->data, (DTYPE*)b->data, (DTYPE*)c->data, alpha,
-                            stride, dimensionSizeA, dimensionSizeB, dimensionSizeC,
-                            blockNum);
+                          stride, dimensionSizeA, dimensionSizeB, dimensionSizeC, blockNum);
                }
            }
        }

--- a/source/tensor/function/Dropout.cpp
+++ b/source/tensor/function/Dropout.cpp
@@ -25,7 +25,7 @@
 #include "Dropout.h"
 #include "Dropout.cuh"
 #include "../core/arithmetic/Multiply.h"
-#include "../core/arithmetic/SumDim.h"
+#include "../core/arithmetic/MultiplyDim.h"
 #include "../core/math/ScaleAndShift.h"

 namespace nts{ // namespace nts(NiuTrans.Tensor
@@ -44,40 +44,35 @@ the same inference procedure as that with no use of dropout on the test data.
 
 >> x - input tensor
 >> y - output tensor
->> prob - probability to set an element to zero
+>> seed - random seed
+>> dropProb - probability to set an element to zero
+>> leadingDim - the dimension which we generate the random numbers and perform broadcasting
 */
-void _Dropout(const XTensor *x, XTensor *y, unsigned int seed, DTYPE prob)
+void _Dropout(const XTensor * x, XTensor * y, unsigned int seed, DTYPE dropProb, int leadingDim)
 {
-    CheckNTErrors(prob >= 0.0 && prob <= 1.0, "The probability must be 0-1!");
-    
-   DTYPE scaleFactor = (DTYPE)1.0 / ((DTYPE)1.0 - prob);
+    CheckNTErrors(dropProb >= 0.0 && dropProb <= 1.0, "The probability must be 0-1!");
+
+    int n = leadingDim < 0 ? x->order - 1 : leadingDim;
+
+    CheckNTErrors(n >= 0 && n < x->order, "Wrong leadingDim!");
+
+    DTYPE scaleFactor = (DTYPE)1.0 / ((DTYPE)1.0 - dropProb);
    
    /* generate a mask tensor again with special probability */
-    srand(seed);
-    int unitNum = x->unitNum;
+    int unitNum = x->dimSize[n];
    DTYPE * maskArray = new DTYPE[unitNum];
-    for (int i = 0; i < unitNum; i++)
-        maskArray[i] = RandomBernoulli(prob, 1.0F);

-    XTensor * maskTensor = NewTensorBuf(x, x->devID, x->mem);
-    maskTensor->SetData(maskArray, unitNum);
+    srand(seed);
+    for (int i = 0; i < unitNum; i++)
+        maskArray[i] = RandomBernoulli(dropProb, scaleFactor);

-#ifdef USE_CUDA
-    if(x->devID >=0 || y->devID >= 0){
-        _CudaDropout(x, y, maskTensor, scaleFactor);
-        
-        DelTensorBuf(maskTensor);
-        delete[] maskArray;
-        return;
-    }
-#endif
+    XTensor * mask = NewTensor1D(unitNum, x->dataType, x->devID, x->mem);
+    mask->SetData(maskArray, unitNum);

-    XTensor * inter = NewTensorBuf(x, x->devID, x->mem);
-    _Multiply(x, maskTensor, inter);
-    _ScaleAndShift(inter, y, scaleFactor, 0);
+    /* call Multiply function for mask */
+    _MultiplyDim(x, mask, y, n, 0);
    
-    DelTensorBuf(inter);
-    DelTensorBuf(maskTensor);
+    delete mask;
    delete[] maskArray;
 }

@@ -90,44 +85,39 @@ dE/dx = dE/dy * dy/dx
 >> x - input of the dropout function
 >> dedy - dE/dy
 >> dedx - dE/dx
->> prob - probability to set an element zero
+>> seed - random seed
+>> dropProb - probability to set an element to zero
+>> leadingDim - the dimension which we generate the random numbers and perform broadcasting
 */
 void _DropoutBackward(const XTensor * y, const XTensor * x, 
                      const XTensor * dedy, XTensor * dedx, 
-                      unsigned int seed, DTYPE prob)
+                      unsigned int seed, DTYPE dropProb, int leadingDim)
 {
+    CheckNTErrors(dropProb >= 0.0 && dropProb <= 1.0, "The probability must be 0-1!");
+
+    int n = leadingDim < 0 ? x->order - 1 : leadingDim;
+
+    CheckNTErrors(n >= 0 && n < x->order, "Wrong leadingDim!");
+
    if(x->dataType == DEFAULT_DTYPE && y->dataType == DEFAULT_DTYPE)
    {
-        int unitNum = y->unitNum;
-        DTYPE scaleFactor = (DTYPE)1.0F / ((DTYPE)1.0F - prob);
+        DTYPE scaleFactor = (DTYPE)1.0F / ((DTYPE)1.0F - dropProb);

        /* generate a mask tensor again with special probability */
-        srand(seed);
+        int unitNum = x->dimSize[n];
        DTYPE * maskArray = new DTYPE[unitNum];
+        
+        srand(seed);
        for (int i = 0; i < unitNum; i++)
-            maskArray[i] = RandomBernoulli(prob, 1.0F);
-
-        XTensor * maskTensor = NewTensorBuf(x, x->devID, x->mem);
-        maskTensor->SetData(maskArray, unitNum);
-
-#ifdef USE_CUDA
-        if(x->devID >= 0 || y->devID >= 0){
-            _CudaDropoutBackward(y, x, dedy, dedx, maskTensor, scaleFactor);
-            
-            DelTensorBuf(maskTensor);
-            delete[] maskArray;
-            return;
-        }
-#endif
+            maskArray[i] = RandomBernoulli(dropProb, scaleFactor);

-        DTYPE * dedyp = (DTYPE*)dedy->data;
-        DTYPE * dedxp = (DTYPE*)dedx->data;
+        XTensor * mask = NewTensor1D(unitNum, x->dataType, x->devID, x->mem);
+        mask->SetData(maskArray, unitNum);

-        /* dE/dx = dE/dy * dy/dx */
-        for(int i = 0; i < unitNum; i++)
-            dedxp[i] = dedyp[i] * maskArray[i] * scaleFactor;
+        /* call MultiplyDim function for mask */
+        _MultiplyDim(dedy, mask, dedx, n, 0);

-        DelTensorBuf(maskTensor);
+        delete mask;
        delete[] maskArray;
    }
    else
@@ -147,14 +137,18 @@ to mark the tensor with probability p in the inference phase. Instead we perform
 the same inference procedure as that with no use of dropout on the test data.
 
 >> x - input tensor
->> y - output tensor
->> prob - probability to set an element to zero
->> leadDim - the dimension along which we generate the random numbers
+>> dropProb - probability to set an element to zero
+>> leadingDim - the dimension which we generate the random numbers and perform broadcasting
 */
-XTensor Dropout(const XTensor &x, DTYPE prob, int leadDim)
+XTensor Dropout(const XTensor &x, DTYPE dropProb, int leadingDim)
 {
-    int n = leadDim < 0 ? x.order - 1 : leadDim;
-    DTYPE scaleFactor = (DTYPE)1.0 / ((DTYPE)1.0 - prob);
+    CheckNTErrors(dropProb >= 0.0 && dropProb <= 1.0, "The probability must be 0-1!");
+
+    int n = leadingDim < 0 ? x.order - 1 : leadingDim;
+
+    CheckNTErrors(n >= 0 && n < x.order, "Wrong leadingDim!");
+
+    DTYPE scaleFactor = (DTYPE)1.0 / ((DTYPE)1.0 - dropProb);
    
    /* generate a mask tensor with probability p */
    int unitNum = x.dimSize[n];
@@ -162,20 +156,15 @@ XTensor Dropout(const XTensor &x, DTYPE prob, int leadDim)

    srand((unsigned int)time(NULL));
    for (int i = 0; i < unitNum; i++)
-        maskArray[i] = RandomBernoulli(prob, scaleFactor);
-    
-    XTensor mask(&x);
-    mask.SetZeroAll();
-    
-    XTensor * maskVector = NewTensorBuf(1, &unitNum, X_FLOAT, 1.0F, x.devID, x.mem);
-    maskVector->SetData(maskArray, unitNum);
+        maskArray[i] = RandomBernoulli(dropProb, scaleFactor);
    
-    _SumDim(&mask, maskVector, &mask, n);
+    XTensor mask;
+    InitTensor1D(&mask, unitNum, x.dataType, x.devID, x.mem);
+    mask.SetData(maskArray, unitNum);

    delete[] maskArray;
-    DelTensorBuf(maskVector);
    
-    return Multiply(x, mask);
+    return MultiplyDim(x, mask, n, 0);
 }

 } // namespace nts(NiuTrans.Tensor)
--- a/source/tensor/function/Dropout.h
+++ b/source/tensor/function/Dropout.h
@@ -28,21 +28,21 @@
 namespace nts{ // namespace nts(NiuTrans.Tensor)

 /* generate a random bernoulli number */
-inline DTYPE RandomBernoulli(DTYPE prob, DTYPE value)
+inline DTYPE RandomBernoulli(DTYPE dropProb, DTYPE value)
 {
-    return (DTYPE)rand()/(DTYPE)RAND_MAX >= prob ? (DTYPE)value : 0;
+    return (DTYPE)rand()/(DTYPE)RAND_MAX >= dropProb ? (DTYPE)value : 0;
 }

 /* dropout function */
-void _Dropout(const XTensor * x, XTensor * y, unsigned int seed, DTYPE prob);
+void _Dropout(const XTensor * x, XTensor * y, unsigned int seed, DTYPE dropProb, int leadingDim = -1);

 /* de/dx */
 void _DropoutBackward(const XTensor * y, const XTensor * x, 
                      const XTensor * dedy, XTensor * dedx, 
-                      unsigned int seed, DTYPE prob);
+                      unsigned int seed, DTYPE dropProb, int leadingDim = -1);
    
 /* dropout function */
-XTensor Dropout(const XTensor &x, DTYPE prob, int leadDim = -1);
+XTensor Dropout(const XTensor &x, DTYPE dropProb, int leadingDim = -1);

 } // namespace nts(NiuTrans.Tensor)


--- a/source/tensor/test/TDropout.cpp
+++ b/source/tensor/test/TDropout.cpp
@@ -31,10 +31,11 @@ case 1: test Dropout function.
 bool TestDropout1()
 {
    /* a input tensor of size (4, 5) */
-    int order = 2;
+    int order = 3;
    int * dimSize = new int[order];
    dimSize[0] = 40;
    dimSize[1] = 50;
+    dimSize[2] = 60;

    int unitNum = 1;
    for (int i = 0; i < order; i++)
@@ -49,14 +50,14 @@ bool TestDropout1()
    XTensor yUser;

    /* initialize variables */
-    x->SetDataRand(0, 1);
+    _SetDataFixedFloat(x, 1.0F);
    y->SetZeroAll();

    /* call Dropout function */
-    float prob = 0.2F;
+    float dropProb = 0.2F;
    int seed = 20;
-    _Dropout(x, y, seed, prob);
-    yUser = Dropout(*x, 0.5F);
+    _Dropout(x, y, seed, dropProb);
+    yUser = Dropout(*x, dropProb);

    /* check result */
    int zeroNum1 = 0;
@@ -73,9 +74,9 @@ bool TestDropout1()
    }
    printf("CPU Test:\n");
    printf("In tensor y, there are %d units.\n", unitNum);
-    printf("There are %d zero units by Dropout layer with probability %.2f.\n", zeroNum1, prob);
+    printf("There are %d zero units by Dropout layer with probability %.2f.\n", zeroNum1, dropProb);
    printf("In tensor yUser, there are %d units.\n", unitNum);
-    printf("There are %d zero units by Dropout layer with default probability %.2f.\n", zeroNum2, 0.5F);
+    printf("There are %d zero units by Dropout layer with default probability %.2f.\n", zeroNum2, dropProb);

 #ifdef USE_CUDA
    /* GPU test */
@@ -87,12 +88,12 @@ bool TestDropout1()
    XTensor yUserGPU;

    /* initialize variables */
-    xGPU->SetDataRand(0, 1);
+    _SetDataFixedFloat(xGPU, 1.0F);
    yGPU->SetZeroAll();

    /* call Dropout function */
-    _Dropout(xGPU, yGPU, seed, prob);
-    yUserGPU = Dropout(*xGPU, 0.5F);
+    _Dropout(xGPU, yGPU, seed, dropProb);
+    yUserGPU = Dropout(*xGPU, dropProb);

    /* check result */
    zeroNum1 = 0;
@@ -109,9 +110,9 @@ bool TestDropout1()
    }
    printf("CPU Test:\n");
    printf("In tensor y, there are %d units.\n", unitNum);
-    printf("There are %d zero units by Dropout layer with probability %.2f.\n", zeroNum1, prob);
+    printf("There are %d zero units by Dropout layer with probability %.2f.\n", zeroNum1, dropProb);
    printf("In tensor yUser, there are %d units.\n", unitNum);
-    printf("There are %d zero units by Dropout layer with default probability %.2f.\n", zeroNum2, 0.5F);
+    printf("There are %d zero units by Dropout layer with default probability %.2f.\n", zeroNum2, dropProb);

    /* destroy variables */
    delete x;
@@ -159,13 +160,13 @@ bool TestDropout2()
    _SetDataFixedFloat(x, 1.0F);
    y->SetZeroAll();
    dedx->SetZeroAll();
-    _SetDataFixedFloat(dedy, 1.0F);
+    _SetDataFixedFloat(dedy, 1.5F);

    /* call Dropout function */
-    float prob = 0.5F;
+    float dropProb = 0.5F;
    int seed = 1;
-    _Dropout(x, y, seed, prob);
-    _DropoutBackward(y, x, dedy, dedx, 1, prob);
+    _Dropout(x, y, seed, dropProb);
+    _DropoutBackward(y, x, dedy, dedx, 1, dropProb);

    /* check result */
    y->Dump(stderr, "y");
@@ -185,11 +186,11 @@ bool TestDropout2()
    _SetDataFixedFloat(xGPU, 1.0F);
    yGPU->SetZeroAll();
    dedxGPU->SetZeroAll();
-    _SetDataFixedFloat(dedyGPU, 1.0F);
+    _SetDataFixedFloat(dedyGPU, 1.5F);

    /* call Dropout function */
-    _Dropout(xGPU, yGPU, seed, prob);
-    _DropoutBackward(yGPU, xGPU, dedyGPU, dedxGPU, 1, prob);
+    _Dropout(xGPU, yGPU, seed, dropProb);
+    _DropoutBackward(yGPU, xGPU, dedyGPU, dedxGPU, 1, dropProb);

    /* check result */
    yGPU->Dump(stderr, "yGPU");