improve the space management

baad6629 · xiaotong · 6ea64b51 · baad6629 · baad6629 · baad6629
Commit baad6629 authored Sep 18, 2018 by xiaotong
--- a/source/network/XBackwardFunc.cpp
+++ b/source/network/XBackwardFunc.cpp
@@ -29,7 +29,7 @@
 namespace nts{

 /* compute dE/dx of a node */
-void XFuncGrad::MakeGrad(XTensor * node)
+void XFuncGrad::MakeGrad(XTensor * node, bool isEfficient)
 {
    


--- a/source/network/XBackwardFunc.h
+++ b/source/network/XBackwardFunc.h
@@ -35,7 +35,7 @@ class XFuncGrad
 public:
    /* compute dE/dx of a node */
    static
-    void MakeGrad(XTensor * node);
+    void MakeGrad(XTensor * node, bool isEfficient);

    /* indicates whether the node is for an activation function */
    static

--- a/source/network/XBackwardMath.cpp
+++ b/source/network/XBackwardMath.cpp
--- a/source/network/XBackwardMath.h
+++ b/source/network/XBackwardMath.h
@@ -33,7 +33,7 @@ class XMathGrad
 public:
    /* compute dE/dx of a node */
    static
-    void MakeGrad(XTensor * node);
+    void MakeGrad(XTensor * node, bool isEfficient);

    /* indicates whether the node is for a math operation */
    static
@@ -43,121 +43,121 @@ private:
    
    /* gradient for absolute */
    static
-    void GradAbsolute(XTensor * node);
+    void GradAbsolute(XTensor * node, bool isEfficient);
    
    /* gradient for cos */
    static
-    void GradCos(XTensor * node);
+    void GradCos(XTensor * node, bool isEfficient);
    
    /* gradient for exp */
    static
-    void GradExp(XTensor * node);
+    void GradExp(XTensor * node, bool isEfficient);

    /* gradient for log: c =  log(a) */
    static
-    void GradLog(XTensor * node);
+    void GradLog(XTensor * node, bool isEfficient);
    
    /* gradient for round */
    static
-    void GradRound(XTensor * node);
+    void GradRound(XTensor * node, bool isEfficient);
    
    /* gradient for sign */
    static
-    void GradSign(XTensor * node);
+    void GradSign(XTensor * node, bool isEfficient);

    /* gradient for sin */
    static
-    void GradSin(XTensor * node);
+    void GradSin(XTensor * node, bool isEfficient);

    /* gradient for tan */
    static
-    void GradTan(XTensor * node);
+    void GradTan(XTensor * node, bool isEfficient);

    /* gradient for clip */
    static
-    void GradClip(XTensor * node);
+    void GradClip(XTensor * node, bool isEfficient);

    /* gradient for Divide */
    static
-    void GradDiv(XTensor * node);
+    void GradDiv(XTensor * node, bool isEfficient);

    /* gradient for DivideDim */
    static
-    void GradDivDim(XTensor * node);
+    void GradDivDim(XTensor * node, bool isEfficient);

    /* gradient for matrix multiply: c = matmul(a, b) * \alpha */
    static
-    void GradMatrixMul(XTensor * node);
+    void GradMatrixMul(XTensor * node, bool isEfficient);
    
    /* gradient for matrix multiply: c = matmul(a, b) * \alpha */
    static
    void GradMatrixMul(XTensor * a, XTensor * deda, MATRIX_TRANS_TYPE transA,
                       XTensor * b, XTensor * dedb, MATRIX_TRANS_TYPE transB,
-                       XTensor * dedc, DTYPE alpha);
+                       XTensor * dedc, DTYPE alpha, bool isEfficient);

    /* gradient for matrix multiply in batch mode.
       for each batch: c_i = matmul(a_i, b_i) * \alpha */
    static
-    void GradMatrixMulBatched(XTensor * node);
+    void GradMatrixMulBatched(XTensor * node, bool isEfficient);

    /* gradient for multiply (dot production): c =  a * b * \alpha */
    static
-    void GradMultiply(XTensor * node);
+    void GradMultiply(XTensor * node, bool isEfficient);

    /* gradient for multiply one dimension: c =  a * b * \alpha 
       where the size of b is equal to that of one dimension of a */
    static
-    void GradMultiplyDim(XTensor * node);
+    void GradMultiplyDim(XTensor * node, bool isEfficient);

    /* gradient for negate */
    static
-    void GradNegate(XTensor * node);
+    void GradNegate(XTensor * node, bool isEfficient);
    
    /* gradient for normalize */
    static
-    void GradNormalize(XTensor * node);
+    void GradNormalize(XTensor * node, bool isEfficient);

    /* gradient for power */
    static
-    void GradPower(XTensor * node);
+    void GradPower(XTensor * node, bool isEfficient);

    /* gradient for ScaleAndShift */
    static
-    void GradScaleAndShift(XTensor * node);
+    void GradScaleAndShift(XTensor * node, bool isEfficient);

    /* gradient for Minus */
    static
-    void GradSub(XTensor * node);
+    void GradSub(XTensor * node, bool isEfficient);
    
 	/* gradient for sub with one dimension: c = a - b * \beta
 	where the size of b is equal to that of one dimension of a */
 	static
-	void GradSubDim(XTensor * node);
+	void GradSubDim(XTensor * node, bool isEfficient);

    /* gradient for sum: c =  a + b * \beta */
    static
-    void GradSum(XTensor * node);
+    void GradSum(XTensor * node, bool isEfficient);

    /* gradient for sum with one dimension: c = a + b * \beta
       where the size of b is equal to that of one dimension of a */
    static
-    void GradSumDim(XTensor * node);
+    void GradSumDim(XTensor * node, bool isEfficient);

    /* gradient for reduceMean */
    static
-    void GradReduceMean(XTensor * node);
+    void GradReduceMean(XTensor * node, bool isEfficient);

    /* gradient for reduceSum */
    static
-    void GradReduceSum(XTensor * node);
+    void GradReduceSum(XTensor * node, bool isEfficient);

    /* gradient for reduceSumSquared */
    static
-    void GradReduceSumSquared(XTensor * node);
+    void GradReduceSumSquared(XTensor * node, bool isEfficient);

    /* gradient for reduceVariance */
    static
-    void GradReduceVariance(XTensor * node);
+    void GradReduceVariance(XTensor * node, bool isEfficient);
 };

 }

--- a/source/network/XBackwardShape.cpp
+++ b/source/network/XBackwardShape.cpp
@@ -30,7 +30,7 @@
 namespace nts{

 /* compute dE/dx of a node */
-void XShapeGrad::MakeGrad(XTensor * node)
+void XShapeGrad::MakeGrad(XTensor * node, bool isEfficent)
 {
    CheckNTErrors(node->grad != NULL, "No gradient found!");

@@ -38,17 +38,17 @@ void XShapeGrad::MakeGrad(XTensor * node)
    int operID = income.typeID;

    if(operID == SHAPE_MERGE)
-        GradMerge(node);
+        GradMerge(node, isEfficent);
    else if(operID == SHAPE_MERGE_LIST)
-        GradMergeList(node);
+        GradMergeList(node, isEfficent);
    else if(operID == SHAPE_UNSQUEEZE)
-        GradUnsqueeze(node);
+        GradUnsqueeze(node, isEfficent);
    else if(operID == SHAPE_SPLIT)
-        GradSplit(node);
+        GradSplit(node, isEfficent);
    else if(operID == SHAPE_SPLIT_LIST)
-        GradSplitList(node);
+        GradSplitList(node, isEfficent);
    else if (operID == SHAPE_TRANSPOSE)
-        GradTranspose(node);
+        GradTranspose(node, isEfficent);
    else{
        ShowNTErrors("TODO!");
    }
@@ -62,10 +62,10 @@ bool XShapeGrad::IsShapeOP(XTensor * node)
 }

 /* post processing of a node */
-void XShapeGrad::PostProcessing(XTensor * node, int typeID)
+void XShapeGrad::PostProcessing(XTensor * node, int typeID, bool isEfficent)
 {
    if(typeID == SHAPE_SPLIT_LIST)
-        GradSplitListPost(node);
+        GradSplitListPost(node, isEfficent);
 }

 /* 
@@ -80,8 +80,10 @@ dE/db_1 = dE/dc_{split_1}
 i.e.,
 dE/da = split(dE/dc)
 >> node - the node (c) for backward computation
+>> isEfficient - indicates whether the computation is in
+                 an efficient manner
 */
-void XShapeGrad::GradMerge(XTensor * node)
+void XShapeGrad::GradMerge(XTensor * node, bool isEfficent)
 {
    XLink &income = node->income;
    XTensor * input = income.tails[0];
@@ -162,8 +164,10 @@ dE/db = dE/dc_{split_1}
 i.e.,
 list(dE/da, dE/db, ...) = split(dE/dc)
 >> node - the node (c) for backward computation
+>> isEfficient - indicates whether the computation is in
+                 an efficient manner
 */
-void XShapeGrad::GradMergeList(XTensor * node)
+void XShapeGrad::GradMergeList(XTensor * node, bool isEfficient)
 {
    XLink &income = node->income;
    CheckNTErrors(income.tailNum > 0, "Wrong input tensor number for MERGE!");
@@ -239,8 +243,10 @@ c = split(a)
 we have
 dE/da = merge(dE/dc)
 >> node - the node (c) for backward computation
+>> isEfficient - indicates whether the computation is in
+                 an efficient manner
 */
-void XShapeGrad::GradSplit(XTensor * node)
+void XShapeGrad::GradSplit(XTensor * node, bool isEfficient)
 {
    XLink &income = node->income;
    XTensor * input = income.tails[0];
@@ -279,8 +285,10 @@ list(c_1, ...) = split(a)
 we have
 dE/da = merge(dE/c_1, ...)
 >> node - the node (c) for backward computation
+>> isEfficient - indicates whether the computation is in
+                 an efficient manner
 */
-void XShapeGrad::GradSplitList(XTensor * node)
+void XShapeGrad::GradSplitList(XTensor * node, bool isEfficient)
 {
    XLink &income = node->income;
    XTensor * input = income.tails[0];
@@ -299,8 +307,10 @@ have been processed. We do this in a post-processing
 manner because we can fuze multiple memory copy jobs 
 one time. This is good for system speed up. 
 >> node - the node (c) for backward computation
+>> isEfficient - indicates whether the computation is in
+                 an efficient manner
 */
-void XShapeGrad::GradSplitListPost(XTensor * node)
+void XShapeGrad::GradSplitListPost(XTensor * node, bool isEfficient)
 {
    /* we compute the gradient for current node, rather than for
       child node, i.e., we use the outgoing edge here */
@@ -351,8 +361,10 @@ c = unsqueeze(a)
 we have
 dE/da = reduecesum(dE/dc)
 >> node - the node (c) for backward computation
+>> isEfficient - indicates whether the computation is in
+                 an efficient manner
 */
-void XShapeGrad::GradUnsqueeze(XTensor * node)
+void XShapeGrad::GradUnsqueeze(XTensor * node, bool isEfficient)
 {
    XLink &income = node->income;
    CheckNTErrors(income.tailNum == 1, "Wrong input tensor number for UNSQUEEZE!");
@@ -379,8 +391,10 @@ c = Transpose(a)
 we have
 dE/da = Transpose(dE/dc)
 >> node - the node (c) for backward computation
+>> isEfficient - indicates whether the computation is in
+                 an efficient manner
 */
-void XShapeGrad::GradTranspose(XTensor * node)
+void XShapeGrad::GradTranspose(XTensor * node, bool isEfficient)
 {
    XLink &income = node->income;
    CheckNTErrors(income.tailNum == 1, "Wrong input tensor number for TRANSPOSE!");

--- a/source/network/XBackwardShape.h
+++ b/source/network/XBackwardShape.h
@@ -34,7 +34,7 @@ class XShapeGrad
 public:
    /* compute dE/dx of a node */
    static
-    void MakeGrad(XTensor * node);
+    void MakeGrad(XTensor * node, bool isEfficent);

    /* indicates whether the node is for a shaping operation */
    static
@@ -42,38 +42,38 @@ public:

    /* post processing of a node */
    static
-    void PostProcessing(XTensor * node, int typeId);
+    void PostProcessing(XTensor * node, int typeId, bool isEfficent);

 private:
    /* gradient computation for merge: c = merge(a, b, ...) */
    static
-    void GradMerge(XTensor * node);
+    void GradMerge(XTensor * node, bool isEfficent);

    /* gradient computation for merging a list of tensors : c = merge(list(a, b, ...)) */
    static
-    void GradMergeList(XTensor * node);
+    void GradMergeList(XTensor * node, bool isEfficent);

    /* gradient computation for split: c = split(a) */
    static
-    void GradSplit(XTensor * node);
+    void GradSplit(XTensor * node, bool isEfficent);

    /* gradient computation for spliting. we return the list of the splits : list(c_1, ...) = split(a) */
    static
-    void GradSplitList(XTensor * node);
+    void GradSplitList(XTensor * node, bool isEfficent);

    /* gradient computation for spliting. we return the list of the splits : list(c_1, ...) = split(a).
       this method is called only when all nodes of spliting have been processed. We do this in a post-processing
       manner because we can fuze multiple memory copy jobs one time. This is good for system speed up. */
    static
-    void GradSplitListPost(XTensor * node);
+    void GradSplitListPost(XTensor * node, bool isEfficent);

    /* gradient computation for unsqueezing a tensor : c = unsqueeze(a) */
    static
-    void GradUnsqueeze(XTensor * node);
+    void GradUnsqueeze(XTensor * node, bool isEfficent);

    /* gradient computation for unsqueezing a tensor : c = unsqueeze(a) */
    static
-    void GradTranspose(XTensor * node);
+    void GradTranspose(XTensor * node, bool isEfficent);
    
 };


--- a/source/network/XNet.cpp
+++ b/source/network/XNet.cpp
@@ -55,6 +55,7 @@ void XNetClearAll()
 XNet::XNet()
 {
    nodes.Clear();
+    isGradEfficient = false;
 }

 /* de-constructor */
@@ -115,6 +116,10 @@ void XNet::Backward(XList &roots, XList &golds, LOSS_FUNCTION_NAME loss)
 {
    Traverse(roots);

+    /* label tensors where the backward computation is neccessary */
+    if(isGradEfficient)
+        MakeEfficientNet();
+
    for(int i = 0; i < nodes.count; i++){
        XTensor * node = (XTensor*)nodes.Get(i);
        node->visitMark = NODE_UNFINISHED;
@@ -154,10 +159,20 @@ void XNet::Backward(XList &roots, XList &golds, LOSS_FUNCTION_NAME loss)
            CheckNTErrors(node->mem->bufUsed < BUF_PITCH, "Illegal access of buffer!");
        }

-        if(node->visitMark == NODE_FINISHED)
-            continue;
-
-        BackwardNode(node);  
+        if(node->visitMark != NODE_FINISHED)
+            BackwardNode(node, isGradEfficient); 
+
+        if(isGradEfficient){
+            if(!XNoder::IsLeaf(node)){
+                XLink & outgo = node->outgo;
+                for(int i = 0; i < outgo.tailNum; i++){
+                    XTensor * parent = outgo.tails[i];
+                    ClearGrad(parent);
+                }
+            }
+            else
+                ClearGrad(node);
+        }
    }
 }

@@ -179,27 +194,32 @@ void XNet::Backward(XList &roots, LOSS_FUNCTION_NAME loss)
 /* 
 backward computation for a given node 
 >> node - the node keeps the result of an operation (e.g., activation function)
+>> isEfficient - indicates whether the back-propagation is compuated in an
+                 efficient manner
 */
-void XNet::BackwardNode(XTensor * node)
+void XNet::BackwardNode(XTensor * node, bool isEfficent)
 {
    if(node == NULL || node->visitMark == NODE_FINISHED)
        return;

    if(!XNoder::IsLeaf(node)){
        /* post processing for parent nodes */
-        BackwardNodePost(node);
+        BackwardNodePost(node, isEfficent);

        /* process the current node */
        if(XMathGrad::IsMathOP(node))
-            XMathGrad::MakeGrad(node);
+            XMathGrad::MakeGrad(node, isEfficent);
        else if(XFuncGrad::IsFunc(node))
-            XFuncGrad::MakeGrad(node);
+            XFuncGrad::MakeGrad(node, isEfficent);
        else if(XShapeGrad::IsShapeOP(node))
-            XShapeGrad::MakeGrad(node);
+            XShapeGrad::MakeGrad(node, isEfficent);
        else{
            ShowNTErrors("Wrong node type!");
        }
    }
+    else{
+        node->visitMark = NODE_FINISHED;
+    }
 }

 /* 
@@ -207,7 +227,7 @@ backward computation (in post processing) for a given node
 >> node - the node whose parent nodes are not processed yet. So
          we do the job at the child node.
 */
-void XNet::BackwardNodePost(XTensor * node)
+void XNet::BackwardNodePost(XTensor * node, bool isEfficent)
 {
    bool isSplitList = false;
    XLink &outgo = node->outgo;
@@ -217,7 +237,7 @@ void XNet::BackwardNodePost(XTensor * node)
    }

    if(isSplitList)
-        XShapeGrad::PostProcessing(node, SHAPE_SPLIT_LIST);
+        XShapeGrad::PostProcessing(node, SHAPE_SPLIT_LIST, isEfficent);
 }

 /* 
@@ -304,4 +324,62 @@ void XNet::Dump(FILE * file)
    }
 }

+/* 
+set the flag of gradient-efficient 
+>> flag - the flag
+*/
+void XNet::SetGradEfficientFlag(bool flag)
+{
+    isGradEfficient = flag;
+}
+
+/* generate the gradient-efficient flag for every node */
+void XNet::MakeEfficientNet()
+{
+    /* back-propagation from output to input */
+    for(int i = 0; i < nodes.count; i++){
+        XTensor * node = (XTensor*)nodes.Get(i);
+        XLink &income = node->income;
+        for(int j = 0; j < income.tailNum; j++){
+            XTensor * child = income.tails[j];
+            if(child->isGrad || child->isVar){
+                node->SetGradFlag(true);
+                break;
+            }
+
+        }
+    }
+}
+
+/* 
+clear the graident information if the node is no use 
+>> node - the node that we want to clear
+*/
+void XNet::ClearGrad(XTensor * node)
+{
+    if(node->isVar)
+        return;
+    if(node->grad == NULL)
+        return;
+    if(node->visitMark != NODE_FINISHED)
+        return;
+
+    XLink & income = node->income;
+
+    bool finished = true;
+    for(int i = 0; i < income.tailNum; i++){
+        XTensor * child = income.tails[i];
+        if(child->visitMark != NODE_FINISHED){
+            finished = false;
+            break;
+        }
+    }
+
+    if(finished){
+        //fprintf(stderr, "del %d %ld\n", node->id, node->grad->unitNum);
+        delete node->grad;
+        node->grad = NULL;
+    }
+}
+
 }
\ No newline at end of file
--- a/source/network/XNet.h
+++ b/source/network/XNet.h
@@ -47,6 +47,9 @@ struct XNet
    /* input nodes of the network */
    XList inputs;

+    /* indicates whether the network just keeps the gradient for parameter tensors */
+    bool isGradEfficient;
+
    /* constructor */
    XNet();

@@ -71,10 +74,10 @@ struct XNet
    void Backward(XList &roots, LOSS_FUNCTION_NAME loss = NOLOSS);

    /* backward computation for a given node */
-    void BackwardNode(XTensor * node);
+    void BackwardNode(XTensor * node, bool isEfficent = false);

    /* backward computation (in post processing) for a given node */
-    void BackwardNodePost(XTensor * node);
+    void BackwardNodePost(XTensor * node, bool isEfficent = false);

    /* traverse the net and find the topological order by 
       depth-first search (Tarjan's algorithm) */
@@ -89,6 +92,15 @@ struct XNet

    /* dump network information */
    void Dump(FILE * file);
+
+    /* set the flag of gradient-efficient */
+    void SetGradEfficientFlag(bool flag = true);
+
+    /* generate the gradient-efficient flag for every node */
+    void MakeEfficientNet();
+
+    /* clear the graident information if the node is no use */
+    void ClearGrad(XTensor * node);
 };

 /* we make a unique id for every tensor */