backward propagation for activation functions

3a3b625a · xiaotong · 28f66400 · 3a3b625a · 3a3b625a · 3a3b625a
Commit 3a3b625a authored Jul 18, 2018 by xiaotong
--- a/source/network/XBackwardFunc.cpp
+++ b/source/network/XBackwardFunc.cpp
+/* NiuTrans.Tensor - an open-source tensor library
+ * Copyright (C) 2018, Natural Language Processing Lab, Northestern University.
+ * All rights reserved.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *   http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+/*
+ * backward computation for activation function
+ * $Created by: XIAO Tong (xiaotong@mail.neu.edu.cn) 2018-07-18
+ * Dingdang won 5 games in the GO training yesterday, hahaha ...
+ */
+#include "XNoder.h"
+#include "XBackwardFunc.h"
+#include "../tensor/XName.h"
+#include "../tensor/function/FHeader.h"
+namespace nts{
+/* compute dE/dx of a node */
+void XFuncGrad::MakeGrad(XTensor * node)
+{
+    XLink &income = node->income;
+    int operID = income.typeID;
+    CheckNTErrors(node->grad != NULL, "No gradient found!");
+    CheckNTErrors(income.tailNum == 1, "Too many input tensors for the function!");
+    XTensor * input = income.tails[0];
+    XTensor * output = node;
+    XNoder::MakeGrad(input);
+    if(operID == FUNC_HARDTANH)
+        _HardTanHBackward(NULL, output, input, output->grad, input->grad, NOLOSS);
+    else if(operID == FUNC_IDENTITY)
+        _IdentityBackward(NULL, output, input, output->grad, input->grad, NOLOSS);
+    else if(operID == FUNC_LOGSOFTMAX){
+        int leadDim = income.GetParamInt(0);
+        _LogSoftmaxBackward(NULL, output, input, output->grad, input->grad, leadDim, NOLOSS);
+    }
+    else if(operID == FUNC_RECTIFY)
+        _RectifyBackward(NULL, output, input, output->grad, input->grad, NOLOSS);
+    else if(operID == FUNC_SIGMOID)
+        _SigmoidBackward(NULL, output, input, output->grad, input->grad, NOLOSS);
+    else if(operID == FUNC_SOFTMAX){
+        int leadDim = income.GetParamInt(0);
+        _SoftmaxBackward(NULL, output, input, output->grad, input->grad, leadDim, NOLOSS);
+    }
+    else{
+        ShowNTErrors("Wrong activation function type!");
+    }
+}
+/* indicates whether the node is for an activation function */
+bool XFuncGrad::IsFunc(XTensor * node)
+{
+    XLink &income = node->income;
+    return (income.typeID & FUNCTION_BASE) != 0;
+}
+}
--- a/source/network/XBackwardFunc.h
+++ b/source/network/XBackwardFunc.h
@@ -29,6 +29,19 @@
 namespace nts{
+/* this class computes the gradient for activation functions given a node */
+class XFuncGrad
+{
+public:
+    /* compute dE/dx of a node */
+    static
+    void MakeGrad(XTensor * node);
+    /* indicates whether the node is for an activation function */
+    static
+    bool IsFunc(XTensor * node);
+};
 }
 #endif
\ No newline at end of file
--- a/source/network/XNet.cpp
+++ b/source/network/XNet.cpp
@@ -23,6 +23,7 @@
 #include "XNoder.h"
 #include "XBackwardLoss.h"
 #include "XBackwardMath.h"
+#include "XBackwardFunc.h"
 #include "../tensor/XName.h"
 namespace nts{
@@ -143,6 +144,11 @@ void XNet::BackwardNode(XTensor * node)
    if(!XNoder::IsLeaf(node)){
        if(XMathGrad::IsMathOP(node))
            XMathGrad::MakeGrad(node);
+        else if(XFuncGrad::IsFunc(node))
+            XFuncGrad::MakeGrad(node);
+        else{
+            ShowNTErrors("Wrong node type!");
+        }
    }
    node->visitMark = NODE_FINISHED;

--- a/source/tensor/core/arithmetic/MatrixMul.cpp
+++ b/source/tensor/core/arithmetic/MatrixMul.cpp
@@ -208,7 +208,7 @@ Obviously C = A * B performs normal matrix multiplication if A = y * z and B = x
 << return - the result of matrix multiplication
 */
 XTensor MatrixMul(const XTensor &a, MATRIX_TRANS_TYPE transposedA, const XTensor &b, MATRIX_TRANS_TYPE transposedB, 
-                 DTYPE alpha, DTYPE beta, XPRunner * parallelRunner)
+                  DTYPE alpha, DTYPE beta, XPRunner * parallelRunner)
 {
    CheckNTErrors(&a != &NULLTensor && &b != &NULLTensor, "Empty input tensors!");
    CheckNTErrors(a.dataType == b.dataType, "Input tensors should have the same data type!");

--- a/source/tensor/function/Identity.cpp
+++ b/source/tensor/function/Identity.cpp
@@ -47,9 +47,9 @@ dE/dx = dE/dy * dy/dx = dE/dy
 >> dedx - dE/dx
 >> lossName - type of loss function, e.g., cross entropy
 */
-void IdentityBackward(XTensor * gold, XTensor * y, XTensor * x, 
+void _IdentityBackward(XTensor * gold, XTensor * y, XTensor * x, 
-                      XTensor * dedy, XTensor * dedx,
+                       XTensor * dedy, XTensor * dedx,
-                      LOSS_FUNCTION_NAME lossName)
+                       LOSS_FUNCTION_NAME lossName)
 {
    CheckNTErrors((gold == NULL || XTensor::IsIdentical(gold, y)), 
                  "The tensors must be of the same size!");

--- a/source/tensor/function/Identity.h
+++ b/source/tensor/function/Identity.h
@@ -33,9 +33,9 @@ void _Identity(const XTensor * x, XTensor * y);
 /* de/dx */
 extern "C"
-void IdentityBackward(XTensor * gold, XTensor * y, XTensor * x, 
+void _IdentityBackward(XTensor * gold, XTensor * y, XTensor * x, 
-                      XTensor * dedy, XTensor * dedx,
+                       XTensor * dedy, XTensor * dedx,
-                      LOSS_FUNCTION_NAME lossName);
+                       LOSS_FUNCTION_NAME lossName);
 } // namespace nts(NiuTrans.Tensor)

--- a/source/tensor/function/Rectify.cpp
+++ b/source/tensor/function/Rectify.cpp
@@ -76,16 +76,16 @@ rectified: y = 0     if x < 0
 >> dedx - dE/dx
 >> lossName - type of loss function, e.g., cross entropy
 */
-void RectifyBackward(XTensor * gold, XTensor * y, XTensor * x, 
+void _RectifyBackward(XTensor * gold, XTensor * y, XTensor * x, 
-                     XTensor * dedy, XTensor * dedx,
+                      XTensor * dedy, XTensor * dedx,
-                     LOSS_FUNCTION_NAME lossName)
+                      LOSS_FUNCTION_NAME lossName)
 {
    CheckNTErrors((gold == NULL || XTensor::IsIdentical(gold, y)), 
                  "The tensors must be of the same size!");
 #ifdef USE_CUDA
    if(x->devID >= 0 || y->devID >= 0){
-        CudaRectifyBackward(gold, y, x, dedy, dedx, lossName);
+        _CudaRectifyBackward(gold, y, x, dedy, dedx, lossName);
        return;
    }
 #endif

--- a/source/tensor/function/Rectify.cu
+++ b/source/tensor/function/Rectify.cu
@@ -126,9 +126,9 @@ rectify  : y =  s    if s >= 0
 >> oBeg - where to start in the model output (along the leading dimension)
 >> parallelRunner - parallel processing module
 */
-void CudaRectifyBackward(XTensor * gold, XTensor * y, XTensor * x, 
+void _CudaRectifyBackward(XTensor * gold, XTensor * y, XTensor * x, 
-                         XTensor * dedy, XTensor * dedx,
+                          XTensor * dedy, XTensor * dedx,
-                         LOSS_FUNCTION_NAME lossName)
+                          LOSS_FUNCTION_NAME lossName)
 {
    if(x->dataType == DEFAULT_DTYPE && y->dataType == DEFAULT_DTYPE){

--- a/source/tensor/function/Rectify.cuh
+++ b/source/tensor/function/Rectify.cuh
@@ -35,9 +35,9 @@ void _CudaRectify(const XTensor * input, XTensor * output);
 /* de/dx (Cuda version) */
 extern "C"
-void CudaRectifyBackward(XTensor * gold, XTensor * y, XTensor * x, 
+void _CudaRectifyBackward(XTensor * gold, XTensor * y, XTensor * x, 
-                         XTensor * dedy, XTensor * dedx,
+                          XTensor * dedy, XTensor * dedx,
-                         LOSS_FUNCTION_NAME lossName);
+                          LOSS_FUNCTION_NAME lossName);
 #endif // USE_CUDA

--- a/source/tensor/function/Rectify.h
+++ b/source/tensor/function/Rectify.h
@@ -33,9 +33,9 @@ void _Rectify(const XTensor * x, XTensor * y);
 /* de/dx */
 extern "C"
-void RectifyBackward(XTensor * gold, XTensor * y, XTensor * x, 
+void _RectifyBackward(XTensor * gold, XTensor * y, XTensor * x, 
-                     XTensor * dedy, XTensor * dedx,
+                      XTensor * dedy, XTensor * dedx,
-                     LOSS_FUNCTION_NAME lossName);
+                      LOSS_FUNCTION_NAME lossName);
 } // namespace nts(NiuTrans.Tensor)

--- a/source/tensor/function/Sigmoid.cpp
+++ b/source/tensor/function/Sigmoid.cpp
@@ -68,16 +68,16 @@ sigmoid: y = 1/(1+exp(-x))
 >> dedx - dE/dx
 >> lossName - type of loss function, e.g., cross entropy
 */
-void SigmoidBackward(XTensor * gold, XTensor * y, XTensor * x, 
+void _SigmoidBackward(XTensor * gold, XTensor * y, XTensor * x, 
-                     XTensor * dedy, XTensor * dedx,
+                      XTensor * dedy, XTensor * dedx,
-                     LOSS_FUNCTION_NAME lossName)
+                      LOSS_FUNCTION_NAME lossName)
 {
    CheckNTErrors((gold == NULL || XTensor::IsIdentical(gold, y)), 
                        "The tensors must be of the same size!");
 #ifdef USE_CUDA
    if(x->devID >= 0 || y->devID >= 0){
-        CudaSigmoidBackward(gold, y, x, dedy, dedx, lossName);
+        _CudaSigmoidBackward(gold, y, x, dedy, dedx, lossName);
        return;
    }
 #endif

--- a/source/tensor/function/Sigmoid.cu
+++ b/source/tensor/function/Sigmoid.cu
@@ -122,9 +122,9 @@ sigmoid: y = 1/(1+exp(-x))
 >> dedx - dE/dx
 >> lossName - type of loss function, e.g., cross entropy
 */
-void CudaSigmoidBackward(XTensor * gold, XTensor * y, XTensor * x, 
+void _CudaSigmoidBackward(XTensor * gold, XTensor * y, XTensor * x, 
-                         XTensor * dedy, XTensor * dedx,
+                          XTensor * dedy, XTensor * dedx,
-                         LOSS_FUNCTION_NAME lossName)
+                          LOSS_FUNCTION_NAME lossName)
 {
    if(x->dataType == DEFAULT_DTYPE && y->dataType == DEFAULT_DTYPE){
        /* calculate dE/dy */

--- a/source/tensor/function/Sigmoid.cuh
+++ b/source/tensor/function/Sigmoid.cuh
@@ -35,9 +35,9 @@ void _CudaSigmoid(const XTensor * input, XTensor * output);
 /* de/dx (Cuda version) */
 extern "C"
-void CudaSigmoidBackward(XTensor * gold, XTensor * y, XTensor * x, 
+void _CudaSigmoidBackward(XTensor * gold, XTensor * y, XTensor * x, 
-                         XTensor * dedy, XTensor * dedx,
+                          XTensor * dedy, XTensor * dedx,
-                         LOSS_FUNCTION_NAME lossName);
+                          LOSS_FUNCTION_NAME lossName);
 #endif // USE_CUDA

--- a/source/tensor/function/Sigmoid.h
+++ b/source/tensor/function/Sigmoid.h
@@ -33,9 +33,9 @@ void _Sigmoid(const XTensor * x, XTensor * y);
 /* de/dx */
 extern "C"
-void SigmoidBackward(XTensor * gold, XTensor * y, XTensor * x, 
+void _SigmoidBackward(XTensor * gold, XTensor * y, XTensor * x, 
-                     XTensor * dedy, XTensor * dedx,
+                      XTensor * dedy, XTensor * dedx,
-                     LOSS_FUNCTION_NAME lossName);
+                      LOSS_FUNCTION_NAME lossName);
 } // namespace nts(NiuTrans.Tensor)

--- a/source/tensor/function/Softmax.cpp
+++ b/source/tensor/function/Softmax.cpp
@@ -154,10 +154,10 @@ See more details in LogSoftmaxBackward(...)
 >> lossName - type of loss function, e.g., cross entropy
 >> leadDim - leading dimension (along which we perform reduction)
 */
-void SoftmaxBackward(XTensor * gold, XTensor * y, XTensor * x, 
+void _SoftmaxBackward(XTensor * gold, XTensor * y, XTensor * x, 
-                     XTensor * dedy, XTensor * dedx,
+                      XTensor * dedy, XTensor * dedx,
-                     int leadDim,
+                      int leadDim,
-                     LOSS_FUNCTION_NAME lossName)
+                      LOSS_FUNCTION_NAME lossName)
 {
    CheckNTErrors((dedx->isSparse == false), "The gradient tensor must be dense!");
    CheckNTErrors((gold != NULL), "Incorrect x gold standard tensor!");
@@ -165,7 +165,7 @@ void SoftmaxBackward(XTensor * gold, XTensor * y, XTensor * x,
    int leadDimRDI = y->order - leadDim - 1;
 #ifdef USE_CUDA
    if(y->devID >= 0){
-        CudaSoftmaxBackward(gold, y, x, dedy, dedx, leadDim, lossName);
+        _CudaSoftmaxBackward(gold, y, x, dedy, dedx, leadDim, lossName);
        return;
    }
 #endif

--- a/source/tensor/function/Softmax.cu
+++ b/source/tensor/function/Softmax.cu
@@ -230,10 +230,10 @@ See more details in SoftmaxBackward
 >> lossName - type of loss function, e.g., cross entropy
 >> leadDim - leading dimension (along which we perform reduction)
 */
-void CudaSoftmaxBackward(XTensor * gold, XTensor * y, XTensor * x, 
+void _CudaSoftmaxBackward(XTensor * gold, XTensor * y, XTensor * x, 
-                         XTensor * dedy, XTensor * dedx,
+                          XTensor * dedy, XTensor * dedx,
-                         int leadDim,
+                          int leadDim,
-                         LOSS_FUNCTION_NAME lossName)
+                          LOSS_FUNCTION_NAME lossName)
 {
    CheckNTErrors((x->devID >= 0), "Backward computation of log softmax must be run on GPUs.");
    CheckNTErrors((x->devID == y->devID), "Matrices used in log softmax are not on the same GPU.");

--- a/source/tensor/function/Softmax.cuh
+++ b/source/tensor/function/Softmax.cuh
@@ -39,10 +39,10 @@ void _CudaSoftmaxSumMax(const XTensor * x, XTensor * y, int leadDim, XTensor * s
 /* de/dx (Cuda version) */
 extern "C"
-void CudaSoftmaxBackward(XTensor * gold, XTensor * y, XTensor * x,
+void _CudaSoftmaxBackward(XTensor * gold, XTensor * y, XTensor * x,
-                            XTensor * dedy, XTensor * dedx,
+                          XTensor * dedy, XTensor * dedx,
-                            int leadDim, 
+                          int leadDim, 
-                            LOSS_FUNCTION_NAME lossName);
+                          LOSS_FUNCTION_NAME lossName);
 #endif // USE_CUDA

--- a/source/tensor/function/Softmax.h
+++ b/source/tensor/function/Softmax.h
@@ -33,10 +33,10 @@ void _Softmax(const XTensor * x, XTensor * y, int leadDim);
 /* de/dx */
 extern "C"
-void SoftmaxBackward(XTensor * gold, XTensor * y, XTensor * x, 
+void _SoftmaxBackward(XTensor * gold, XTensor * y, XTensor * x, 
-                     XTensor * dedy, XTensor * dedx,
+                      XTensor * dedy, XTensor * dedx,
-                     int leadDim,
+                      int leadDim,
-                     LOSS_FUNCTION_NAME lossName);
+                      LOSS_FUNCTION_NAME lossName);
 } // namespace nts(NiuTrans.Tensor)

--- a/source/tensor/test/TIdentity.cpp
+++ b/source/tensor/test/TIdentity.cpp
@@ -142,7 +142,7 @@ bool TestIdentity2()
    _Identity(x, y);
    /* call IdentityBackward function */
-    IdentityBackward(g, y, x, dedy, dedx, CROSSENTROPY);
+    _IdentityBackward(g, y, x, dedy, dedx, CROSSENTROPY);
    /* check result */
    cpuTest = y->CheckData(yAnswer, unitNum, 1e-4F)
@@ -171,7 +171,7 @@ bool TestIdentity2()
    _Identity(xGPU, yGPU);
    /* call IdentityBackward function */
-    IdentityBackward(gGPU, yGPU, xGPU, dedyGPU, dedxGPU, CROSSENTROPY);
+    _IdentityBackward(gGPU, yGPU, xGPU, dedyGPU, dedxGPU, CROSSENTROPY);
    /* check result */
    gpuTest = yGPU->CheckData(yAnswer, unitNum, 1e-4F)

--- a/source/tensor/test/TRectify.cpp
+++ b/source/tensor/test/TRectify.cpp
@@ -147,7 +147,7 @@ bool TestRectify2()
    _Rectify(x, y);
 	/* call RectifyBackward function */
-	RectifyBackward(gold, y, x, dedy, dedx, CROSSENTROPY);
+	_RectifyBackward(gold, y, x, dedy, dedx, CROSSENTROPY);
 	/* check results */
    cpuTest = y->CheckData(yAnswer, unitNum, 1e-4F)
@@ -176,7 +176,7 @@ bool TestRectify2()
    _Rectify(xGPU, yGPU);
 	/* call rectifybackward function */
-	RectifyBackward(goldGPU, yGPU, xGPU, dedyGPU, dedxGPU, CROSSENTROPY);
+	_RectifyBackward(goldGPU, yGPU, xGPU, dedyGPU, dedxGPU, CROSSENTROPY);
 	/* check results */
    gpuTest = yGPU->CheckData(yAnswer, unitNum, 1e-4F)

--- a/source/tensor/test/TSigmoid.cpp
+++ b/source/tensor/test/TSigmoid.cpp
@@ -141,7 +141,7 @@ bool TestSigmoid2()
    _Sigmoid(x, y);
    /* call SigmoidBackward function */
-    SigmoidBackward(g, y, x, dedy, dedx, CROSSENTROPY);
+    _SigmoidBackward(g, y, x, dedy, dedx, CROSSENTROPY);
    /* check result */
    cpuTest = y->CheckData(yAnswer, unitNum, 1e-4F)
@@ -170,7 +170,7 @@ bool TestSigmoid2()
    _Sigmoid(xGPU, yGPU);
    /* call SigmoidBackward function */
-    SigmoidBackward(gGPU, yGPU, xGPU, dedyGPU, dedxGPU, CROSSENTROPY);
+    _SigmoidBackward(gGPU, yGPU, xGPU, dedyGPU, dedxGPU, CROSSENTROPY);
    /* check result */
    gpuTest = yGPU->CheckData(yAnswer, unitNum, 1e-4F)

--- a/source/tensor/test/TSoftmax.cpp
+++ b/source/tensor/test/TSoftmax.cpp
@@ -142,7 +142,7 @@ bool TestSoftmax2()
    _Softmax(x, y, 1);
    /* call SoftmaxBackward function */
-    SoftmaxBackward(g, y, x, dedy, dedx, 1, CROSSENTROPY);
+    _SoftmaxBackward(g, y, x, dedy, dedx, 1, CROSSENTROPY);
    /* check result */
    cpuTest = y->CheckData(yAnswer, unitNum, 1e-4F)
@@ -170,7 +170,7 @@ bool TestSoftmax2()
    _Softmax(xGPU, yGPU, 1);
    /* call SoftmaxBackward function */
-    SoftmaxBackward(gGPU, yGPU, xGPU, dedyGPU, dedxGPU, 1, CROSSENTROPY);
+    _SoftmaxBackward(gGPU, yGPU, xGPU, dedyGPU, dedxGPU, 1, CROSSENTROPY);
    /* check result */
    gpuTest = yGPU->CheckData(yAnswer, unitNum, 1e-4F)