better code of dropout

2e20824a · xiaotong · df76b612 · 2e20824a · 2e20824a
Commit 2e20824a authored Sep 15, 2018 by xiaotong
--- a/source/tensor/function/Dropout.cpp
+++ b/source/tensor/function/Dropout.cpp
@@ -39,20 +39,19 @@ DTYPE RandomBernoulli(DTYPE prob)

 /*
 dropout function
+It randomly zeroes some of the elements of the input tensor
+with probability p via a Bernoulli distribution.

-During training, randomly zeroes some of the elements of the input tensor
-with probability p using samples from a Bernoulli distribution.
-The elements to zero are randomized on every forward call.
+See "Improving neural networks by preventing co-adaptation of feature detectors"
+for more details.

-This has proven to be an effective technique for regularization and
-preventing the co-adaptation of neurons as described in the paper
-"Improving neural networks by preventing co-adaptation of feature detectors".
-
-Furthermore, the outputs are scaled by a factor of \frac{1}{1-p} during training.
-This means that during evaluation the module simply computes an identity function.
+Here, the output is scaled by a factor of \frac{1}{1-p} so that we do not need
+to mark the tensor with probability p in the inference phase. Instead we perform
+the same inference procedure as that with no use of dropout on the test data.
+ 
 >> x - input tensor
 >> y - output tensor
->> prob - probability to set an element zero
+>> prob - probability to set an element to zero
 */
 void _Dropout(const XTensor *x, XTensor *y, unsigned int seed, DTYPE prob)
 {
@@ -89,56 +88,8 @@ void _Dropout(const XTensor *x, XTensor *y, unsigned int seed, DTYPE prob)
    delete[] maskArray;
 }

-/*
-dropout function (return a XTensor structure)
-make a new tensor to keep the result and return it
-
-During training, randomly zeroes some of the elements of the input tensor
-with probability p using samples from a Bernoulli distribution.
-The elements to zero are randomized on every forward call.
-
-This has proven to be an effective technique for regularization and
-preventing the co-adaptation of neurons as described in the paper
-"Improving neural networks by preventing co-adaptation of feature detectors".
-
-Furthermore, the outputs are scaled by a factor of \frac{1}{1-p} during training.
-This means that during evaluation the module simply computes an identity function.
->> x - input tensor
->> y - output tensor
->> prob - probability to set an element zero
-*/
-XTensor Dropout(const XTensor &x, DTYPE prob)
-{
-    XTensor y(&x);
-    y.SetTMP();
-
-   DTYPE scaleFactor = (DTYPE)1.0 / ((DTYPE)1.0 - prob);
-    
-    /* generate a mask tensor again with special probability */
-    srand((unsigned int)time(NULL));
-    int unitNum = x.unitNum;
-    DTYPE * maskArray = new DTYPE[unitNum];
-    for (int i = 0; i < unitNum; i++)
-        maskArray[i] = RandomBernoulli(prob);
-
-    XTensor maskTensor(&x);
-    maskTensor.SetData(maskArray, unitNum);
-
-    XTensor inter;
-    inter = Multiply(x, maskTensor);
-    y = ScaleAndShift(inter, scaleFactor, 0);
-
-    delete[] maskArray;
-
-    ///* tensor connection */
-    //XLink::MakeLink(&x, NULL, &y, FUNC_DROPOUT);
-    //XLink::AddParamToHead(&y, prob);
-    
-    return y;
-}
-
 /* 
-backward computation of dropout function
+backward computation of the dropout function

 dE/dx = dE/dy * dy/dx

@@ -166,15 +117,15 @@ void _DropoutBackward(const XTensor * y, const XTensor * x,
        XTensor * maskTensor = NewTensorBuf(x, x->devID, x->mem);
        maskTensor->SetData(maskArray, unitNum);

-        #ifdef USE_CUDA
-            if(x->devID >= 0 || y->devID >= 0){
-                _CudaDropoutBackward(y, x, dedy, dedx, maskTensor, scaleFactor);
-                
-                DelTensorBuf(maskTensor);
-                delete[] maskArray;
-                return;
-            }
-        #endif
+#ifdef USE_CUDA
+        if(x->devID >= 0 || y->devID >= 0){
+            _CudaDropoutBackward(y, x, dedy, dedx, maskTensor, scaleFactor);
+            
+            DelTensorBuf(maskTensor);
+            delete[] maskArray;
+            return;
+        }
+#endif

        DTYPE * dedyp = (DTYPE*)dedy->data;
        DTYPE * dedxp = (DTYPE*)dedx->data;
@@ -189,5 +140,50 @@ void _DropoutBackward(const XTensor * y, const XTensor * x,
    else
        ShowNTErrors("TODO!");
 }
+    
+/*
+ dropout function (we make tensor connections here)
+ It randomly zeroes some of the elements of the input tensor
+ with probability p via a Bernoulli distribution.
+ 
+ See "Improving neural networks by preventing co-adaptation of feature detectors"
+ for more details.
+ 
+ Here, the output is scaled by a factor of \frac{1}{1-p} so that we do not need
+ to mark the tensor with probability p in the inference phase. Instead we perform
+ the same inference procedure as that with no use of dropout on the test data.
+ 
+ >> x - input tensor
+ >> y - output tensor
+ >> prob - probability to set an element to zero
+*/
+XTensor Dropout(const XTensor &x, DTYPE prob)
+{
+    DTYPE scaleFactor = (DTYPE)1.0 / ((DTYPE)1.0 - prob);
+    
+    /* generate a mask tensor again with special probability */
+    srand((unsigned int)time(NULL));
+    int unitNum = x.unitNum;
+    DTYPE * maskArray = new DTYPE[unitNum];
+    for (int i = 0; i < unitNum; i++)
+        maskArray[i] = RandomBernoulli(prob);
+    
+    XTensor maskTensor(&x);
+    maskTensor.SetData(maskArray, unitNum);
+    
+    XTensor y;
+    XTensor inter;
+    
+    inter = Multiply(x, maskTensor);
+    y = ScaleAndShift(inter, scaleFactor, 0);
+    
+    delete[] maskArray;
+    
+    ///* tensor connection */
+    //XLink::MakeLink(&x, NULL, &y, FUNC_DROPOUT);
+    //XLink::AddParamToHead(&y, prob);
+    
+    return y;
+}

-} // namespace nts(NiuTrans.Tensor)
\ No newline at end of file
+} // namespace nts(NiuTrans.Tensor)
--- a/source/tensor/function/Dropout.h
+++ b/source/tensor/function/Dropout.h
@@ -30,14 +30,14 @@ namespace nts{ // namespace nts(NiuTrans.Tensor)
 /* dropout function */
 void _Dropout(const XTensor * x, XTensor * y, unsigned int seed, DTYPE prob = 0.5);

-/* dropout function */
-XTensor Dropout(const XTensor &x, DTYPE prob = 0.5);
-
 /* de/dx */
 void _DropoutBackward(const XTensor * y, const XTensor * x, 
                      const XTensor * dedy, XTensor * dedx, 
                      unsigned int seed, DTYPE prob = 0.5);
+    
+/* dropout function */
+XTensor Dropout(const XTensor &x, DTYPE prob = 0.5);

 } // namespace nts(NiuTrans.Tensor)

-#endif // __DROPOUT_H__
\ No newline at end of file
+#endif // __DROPOUT_H__