add the implementation and optimize the annotation of backward function

e84a764b · xuchen · c103b9f3 · e84a764b · e84a764b · e84a764b
Commit e84a764b authored Mar 25, 2020 by xuchen
--- a/source/network/XBackwardFunc.cpp
+++ b/source/network/XBackwardFunc.cpp
@@ -73,7 +73,7 @@ void XFuncGrad::MakeGrad(XTensor * node, bool isEfficient)
            _SoftmaxBackward(NULL, output, input, dedy, tmp, NULL, leadDim, NOLOSS);
        }
        else {
-            ShowNTErrors("Wrong activation function type!");
+            ShowNTErrors("Unsupported backward computation! TODO!");
        }

        _SumMe(dedx, tmp);

--- a/source/network/XBackwardLoss.cpp
+++ b/source/network/XBackwardLoss.cpp
@@ -70,7 +70,7 @@ void XLossGrad::MakeGrad(XTensor * node, bool isEfficient)
            _SumMe(dedy, tmp);
        }
        else {
-            ShowNTErrors("Wrong activation function type!");
+            ShowNTErrors("Unsupported backward computation! TODO!");
        }
        //DelTensorBuf(tmp);
        DelTensor(tmp);

--- a/source/network/XBackwardMath.cpp
+++ b/source/network/XBackwardMath.cpp
@@ -79,6 +79,12 @@ void XMathGrad::MakeGrad(XTensor * node, bool isEfficient)
        GradNormalize(node, isEfficient);
    else if (operID == MATH_POWER)
        GradPower(node, isEfficient);
+    else if (operID == MATH_RECIPROCAL)
+        GradReciprocal(node, isEfficient);
+    else if (operID == MATH_SQRT)
+        GradSqrt(node, isEfficient);
+    else if (operID == MATH_SQUARE)
+        GradSquare(node, isEfficient);
    else if (operID == MATH_SCALEANDSHIFT)
        GradScaleAndShift(node, isEfficient);
    else if (operID == MATH_SCALE)
@@ -101,6 +107,8 @@ void XMathGrad::MakeGrad(XTensor * node, bool isEfficient)
        GradReduceMean(node, isEfficient);
    else if (operID == REDUCE_REDUCESUM)
        GradReduceSum(node, isEfficient);
+    else if (operID == REDUCE_REDUCESUMALL)
+        GradReduceSumAll(node, isEfficient);
    else if (operID == REDUCE_REDUCESUMSQUARED)
        GradReduceSumSquared(node, isEfficient);
    else if (operID == REDUCE_REDUCEVARIANCE)
@@ -108,7 +116,7 @@ void XMathGrad::MakeGrad(XTensor * node, bool isEfficient)
    else if (operID == MATH_MULANDSHIFT)
        GradMulAndShift(node, isEfficient);
    else{
-        ShowNTErrors("TODO!");
+        ShowNTErrors("Unsupported backward computation! TODO!");
    }
 }

@@ -967,7 +975,100 @@ void XMathGrad::GradPower(XTensor * node, bool isEfficient)

        XTensor * tmp = NewTensorBufV2(a, a->devID, a->mem);
        _Power(a, tmp, p - 1.0F);
-        _ScaleAndShiftMe(tmp, p);
+        _ScaleMe(tmp, p);
+        _Multiply(node->grad, tmp, a->grad, 1.0F);
+
+        DelTensorBuf(tmp);
+    }
+
+    node->visitMark = NODE_FINISHED;
+}
+
+
+/*
+gradient for reciprocal
+for
+c = reciprocal(a)
+we have
+dE/da = (dE/dc) * -a^(-2)
+>> node - the node (c) for backward computation
+>> isEfficient - indicates whether the computation is in an efficient manner
+*/
+void XMathGrad::GradReciprocal(XTensor* node, bool isEfficient)
+{
+    XLink& income = node->income;
+    CheckNTErrors(income.tailNum == 1, "Wrong input tensor number for RECIPROCAL!");
+
+    XTensor* a = income.tails[0];
+
+    /* dE/da = (dE/dc) * -a^(-2) */
+    if (!isEfficient || a->isGrad) {
+        XNoder::MakeGrad(a);
+
+        XTensor* tmp = NewTensorBufV2(a, a->devID, a->mem);
+        _Power(a, tmp, -2.0F);
+        _NegateMe(tmp);
+        _Multiply(node->grad, tmp, a->grad, 1.0F);
+
+        DelTensorBuf(tmp);
+    }
+
+    node->visitMark = NODE_FINISHED;
+}
+
+/*
+gradient for sqrt
+for
+c = sqrt(a)
+we have
+dE/da = (dE/dc) * 2 * a
+>> node - the node (c) for backward computation
+>> isEfficient - indicates whether the computation is in an efficient manner
+*/
+void XMathGrad::GradSqrt(XTensor * node, bool isEfficient)
+{
+    XLink &income = node->income;
+    CheckNTErrors(income.tailNum == 1, "Wrong input tensor number for SQRT!");
+
+    XTensor * a = income.tails[0];
+
+    /* dE/da = (dE/dc) * 2 * a */
+    if (!isEfficient || a->isGrad) {
+        XNoder::MakeGrad(a);
+
+        XTensor* tmp = NewTensorBufV2(a, a->devID, a->mem);
+        _ScaleMe(tmp, 2.0F);
+        _Multiply(node->grad, tmp, a->grad, 1.0F);
+
+        DelTensorBuf(tmp);
+    }
+
+    node->visitMark = NODE_FINISHED;
+}
+
+/*
+gradient for square
+for
+c = square(a)
+we have
+dE/da = (dE/dc) * (1/2) * a^(-1/2)
+>> node - the node (c) for backward computation
+>> isEfficient - indicates whether the computation is in an efficient manner
+*/
+void XMathGrad::GradSquare(XTensor * node, bool isEfficient)
+{
+    XLink &income = node->income;
+    CheckNTErrors(income.tailNum == 1, "Wrong input tensor number for SQUARE!");
+
+    XTensor * a = income.tails[0];
+
+    /* dE/da = (dE/dc) * (1/2) * a^(-1/2)*/
+    if (!isEfficient || a->isGrad) {
+        XNoder::MakeGrad(a);
+
+        XTensor* tmp = NewTensorBufV2(a, a->devID, a->mem);
+        _Power(a, tmp, -0.5F);
+        _ScaleMe(tmp, 0.5);
        _Multiply(node->grad, tmp, a->grad, 1.0F);

        DelTensorBuf(tmp);
@@ -1447,6 +1548,38 @@ void XMathGrad::GradReduceSum(XTensor * node, bool isEfficient)
 }

 /*
+gradient for reduceSumAll
+for
+c = reduceSumAll(a)
+we have
+dE/da = dE/dc * 1
+
+>> node - the node (c) for backward computation
+>> isEfficient - indicates whether the computation is in
+                 an efficient manner
+*/
+void XMathGrad::GradReduceSumAll(XTensor * node, bool isEfficient)
+{
+    XLink &income = node->income;
+    CheckNTErrors(income.tailNum == 1, "Wrong input tensor number for Reduce!");
+
+    XTensor * a = income.tails[0];
+
+    /* dE/da = dE/dc * 1 */
+    if (!isEfficient || a->isGrad) {
+        XNoder::MakeGrad(a);
+
+        XTensor * tmp = NewTensorBufV2(a, a->devID, a->mem);
+        DTYPE value = node->grad->Get0D();
+        tmp->SetDataFixed(value);
+        _Sum(a->grad, tmp, a->grad);
+        DelTensorBuf(tmp);
+    }
+
+    node->visitMark = NODE_FINISHED;
+}
+
+/*
 gradient for reduceSumSquared
 for
 c = \sum_i (a_i - b)^2 
@@ -1583,7 +1716,7 @@ void XMathGrad::GradMulAndShift(XTensor * node, bool isEfficient)
    int n = income.GetParamInt(0);
    MATRIX_TRANS_TYPE transW = income.GetParamTrans(1);
    MATRIX_TRANS_TYPE transX = income.GetParamTrans(2);
-
+    DTYPE alpha = income.GetParam(3);
    /* dE/db = dE/dc * x.reduce(0,...,n-1,n+1,...) */
    if (!isEfficient || b->isGrad) {
        XNoder::MakeGrad(b);
@@ -1652,7 +1785,7 @@ void XMathGrad::GradMulAndShift(XTensor * node, bool isEfficient)
    XTensor * dedx = x->grad;

    if (x->order == 2 && w->order == 2)
-        GradMatrixMul(x, dedx, transX, w, dedw, transW, dedc, 1.0F, isEfficient);
+        GradMatrixMul(x, dedx, transX, w, dedw, transW, dedc, alpha, isEfficient);
    else if (transX == X_NOTRANS && x->order > 2 && w->order == 2) {
        int orderBackupX = x->order;
        int orderBackupC = c->order;
@@ -1667,14 +1800,13 @@ void XMathGrad::GradMulAndShift(XTensor * node, bool isEfficient)
            dedx->Reshape(dedx->unitNum / dedx->GetDim(-1), dedx->GetDim(-1));
        dedc->Reshape(dedc->unitNum / dedc->GetDim(-1), dedc->GetDim(-1));

-        GradMatrixMul(x, dedx, transX, w, dedw, transW, dedc, 1.0F, isEfficient);
+        GradMatrixMul(x, dedx, transX, w, dedw, transW, dedc, alpha, isEfficient);

        x->Reshape(orderBackupX, dimsBackupX);
        c->Reshape(orderBackupC, dimsBackupC);
        if (!isEfficient || x->isGrad)
            dedx->Reshape(orderBackupX, dimsBackupX);
        dedc->Reshape(orderBackupC, dimsBackupC);
-
    }

    node->visitMark = NODE_FINISHED;

--- a/source/network/XBackwardMath.h
+++ b/source/network/XBackwardMath.h
@@ -126,6 +126,18 @@ private:
    static
    void GradPower(XTensor * node, bool isEfficient);

+    /* gradient for power */
+    static
+    void GradReciprocal(XTensor* node, bool isEfficient);
+
+    /* gradient for sqrt */
+    static
+    void GradSqrt(XTensor* node, bool isEfficient);
+    
+    /* gradient for square */
+    static
+    void GradSquare(XTensor* node, bool isEfficient);
+
    /* gradient for ScaleAndShift */
    static
    void GradScaleAndShift(XTensor * node, bool isEfficient);
@@ -146,10 +158,10 @@ private:
    static
    void GradSub(XTensor * node, bool isEfficient);
    
-	/* gradient for sub with one dimension: c = a - b * \beta
-	where the size of b is equal to that of one dimension of a */
-	static
-	void GradSubDim(XTensor * node, bool isEfficient);
+    /* gradient for sub with one dimension: c = a - b * \beta
+    where the size of b is equal to that of one dimension of a */
+    static
+    void GradSubDim(XTensor * node, bool isEfficient);

    /* gradient for sum: c =  a + b * \beta */
    static
@@ -173,6 +185,10 @@ private:
    static
    void GradReduceSum(XTensor * node, bool isEfficient);

+    /* gradient for reduceSumAll */
+    static
+    void GradReduceSumAll(XTensor * node, bool isEfficient);
+
    /* gradient for reduceSumSquared */
    static
    void GradReduceSumSquared(XTensor * node, bool isEfficient);

--- a/source/network/XBackwardShape.cpp
+++ b/source/network/XBackwardShape.cpp
@@ -44,7 +44,10 @@ void XShapeGrad::MakeGrad(XTensor * node, bool isEfficient)
    XLink &income = node->income;
    int operID = income.typeID;

-    if (operID == MOVEMENT_COPYINDEXED)
+
+    if (operID == GETANDSET_CONVERTDATATYPE)
+        GradConvertDataType(node, isEfficient);
+    else if (operID == MOVEMENT_COPYINDEXED)
        GradCopyIndexed(node, isEfficient);
    else if (operID == MOVEMENT_GATHER)
        GradGather(node, isEfficient);
@@ -65,7 +68,7 @@ void XShapeGrad::MakeGrad(XTensor * node, bool isEfficient)
    else if (operID == SHAPE_UNSQUEEZE)
        GradUnsqueeze(node, isEfficient);
    else{
-        ShowNTErrors("TODO!");
+        ShowNTErrors("Unsupported backward computation! TODO!");
    }
 }

@@ -83,6 +86,34 @@ void XShapeGrad::PostProcessing(XTensor * node, int typeID, bool isEfficient)
        GradSplitListPost(node, isEfficient);
 }

+/*
+gradient computation for convertdatatype
+for
+b = convertdatatype(a)
+we have
+dE/da = convertdatatype(dE/db)
+>> node - the node (c) for backward computation
+>> isEfficient - indicates whether the computation is in
+                 an efficient manner
+*/
+void XShapeGrad::GradConvertDataType(XTensor* node, bool isEfficient)
+{
+    XLink& income = node->income;
+    CheckNTErrors(income.tailNum == 1, "Wrong input tensor number for CopyIndexed!");
+
+    XTensor* a = income.tails[0];
+
+    if (!isEfficient || a->isGrad) {
+        XNoder::MakeGrad(a);
+
+        XTensor* tmp = NewTensorBufV2(a, a->devID, a->mem);
+        _ConvertDataType(node->grad, tmp);
+        _SumMe(a->grad, tmp);
+
+        DelTensorBuf(tmp);
+    }
+}
+
 /* 
 gradient computation for copying indexed sub-tensors
 for
@@ -301,7 +332,7 @@ void XShapeGrad::GradMergeList(XTensor * node, bool isEfficient)
        if (i > 1)
            CheckNTErrors(_IsSameShaped(last, tail), "Input tensors must be of the same size!");

-        if (tail->outgo.tailNum  > 1)
+        if (tail->outgo.tailNum > 1)
            mergeOnly = false;

        last = tail;

--- a/source/network/XBackwardShape.h
+++ b/source/network/XBackwardShape.h
@@ -34,7 +34,7 @@ class XShapeGrad
 public:
    /* compute dE/dx of a node */
    static
-    void MakeGrad(XTensor * node, bool isEfficent);
+    void MakeGrad(XTensor * node, bool isEfficient);

    /* indicates whether the node is for a shaping operation */
    static
@@ -42,55 +42,59 @@ public:

    /* post processing of a node */
    static
-    void PostProcessing(XTensor * node, int typeId, bool isEfficent);
+    void PostProcessing(XTensor * node, int typeId, bool isEfficient);

 private:
    
+    /* gradient computation for convertdatatype: b = convertdatatype(a) */
+    static
+    void GradConvertDataType(XTensor * node, bool isEfficient);
+            
    /* gradient computation for copying indexed sub-tensors: b = copyindexed(a, srcIndex, indexSize, tgtIndex, copyNum) */
    static
-    void GradCopyIndexed(XTensor * node, bool isEfficent);
+    void GradCopyIndexed(XTensor * node, bool isEfficient);
        
    /* gradient computation for copying indexed sub-tensors: b = gather(a, index) */
    static
-    void GradGather(XTensor * node, bool isEfficent);
+    void GradGather(XTensor * node, bool isEfficient);

    /* gradient computation for dropout with index: b = dropoutwithindex(a, index) */
    static
-    void GradDropoutWithIndex(XTensor * node, bool isEfficent);
+    void GradDropoutWithIndex(XTensor * node, bool isEfficient);

    /* gradient computation for merge: c = merge(a, b, ...) */
    static
-    void GradMerge(XTensor * node, bool isEfficent);
+    void GradMerge(XTensor * node, bool isEfficient);

    /* gradient computation for merging a list of tensors : c = merge(list(a, b, ...)) */
    static
-    void GradMergeList(XTensor * node, bool isEfficent);
+    void GradMergeList(XTensor * node, bool isEfficient);
    
    /* gradient computation for transposing a tensor : b = transpose(a) */
    static
-    void GradTranspose(XTensor * node, bool isEfficent);
+    void GradTranspose(XTensor * node, bool isEfficient);

    /* gradient computation for reshaping a tensor: c = reshape(a) */
    static
-    void GradReshape(XTensor * node, bool isEfficent);
+    void GradReshape(XTensor * node, bool isEfficient);

    /* gradient computation for split: c = split(a) */
    static
-    void GradSplit(XTensor * node, bool isEfficent);
+    void GradSplit(XTensor * node, bool isEfficient);

    /* gradient computation for spliting. we return the list of the splits : list(c_1, ...) = split(a) */
    static
-    void GradSplitList(XTensor * node, bool isEfficent);
+    void GradSplitList(XTensor * node, bool isEfficient);

    /* gradient computation for spliting. we return the list of the splits : list(c_1, ...) = split(a).
       this method is called only when all nodes of spliting have been processed. We do this in a post-processing
       manner because we can fuze multiple memory copy jobs one time. This is good for system speed up. */
    static
-    void GradSplitListPost(XTensor * node, bool isEfficent);
+    void GradSplitListPost(XTensor * node, bool isEfficient);

    /* gradient computation for unsqueezing a tensor : c = unsqueeze(a) */
    static
-    void GradUnsqueeze(XTensor * node, bool isEfficent);
+    void GradUnsqueeze(XTensor * node, bool isEfficient);

 };