Commit baad6629 by xiaotong

improve the space management

parent 6ea64b51
......@@ -29,7 +29,7 @@
namespace nts{
/* compute dE/dx of a node */
void XFuncGrad::MakeGrad(XTensor * node)
void XFuncGrad::MakeGrad(XTensor * node, bool isEfficient)
{
......
......@@ -35,7 +35,7 @@ class XFuncGrad
public:
/* compute dE/dx of a node */
static
void MakeGrad(XTensor * node);
void MakeGrad(XTensor * node, bool isEfficient);
/* indicates whether the node is for an activation function */
static
......
......@@ -33,7 +33,7 @@ class XMathGrad
public:
/* compute dE/dx of a node */
static
void MakeGrad(XTensor * node);
void MakeGrad(XTensor * node, bool isEfficient);
/* indicates whether the node is for a math operation */
static
......@@ -43,121 +43,121 @@ private:
/* gradient for absolute */
static
void GradAbsolute(XTensor * node);
void GradAbsolute(XTensor * node, bool isEfficient);
/* gradient for cos */
static
void GradCos(XTensor * node);
void GradCos(XTensor * node, bool isEfficient);
/* gradient for exp */
static
void GradExp(XTensor * node);
void GradExp(XTensor * node, bool isEfficient);
/* gradient for log: c = log(a) */
static
void GradLog(XTensor * node);
void GradLog(XTensor * node, bool isEfficient);
/* gradient for round */
static
void GradRound(XTensor * node);
void GradRound(XTensor * node, bool isEfficient);
/* gradient for sign */
static
void GradSign(XTensor * node);
void GradSign(XTensor * node, bool isEfficient);
/* gradient for sin */
static
void GradSin(XTensor * node);
void GradSin(XTensor * node, bool isEfficient);
/* gradient for tan */
static
void GradTan(XTensor * node);
void GradTan(XTensor * node, bool isEfficient);
/* gradient for clip */
static
void GradClip(XTensor * node);
void GradClip(XTensor * node, bool isEfficient);
/* gradient for Divide */
static
void GradDiv(XTensor * node);
void GradDiv(XTensor * node, bool isEfficient);
/* gradient for DivideDim */
static
void GradDivDim(XTensor * node);
void GradDivDim(XTensor * node, bool isEfficient);
/* gradient for matrix multiply: c = matmul(a, b) * \alpha */
static
void GradMatrixMul(XTensor * node);
void GradMatrixMul(XTensor * node, bool isEfficient);
/* gradient for matrix multiply: c = matmul(a, b) * \alpha */
static
void GradMatrixMul(XTensor * a, XTensor * deda, MATRIX_TRANS_TYPE transA,
XTensor * b, XTensor * dedb, MATRIX_TRANS_TYPE transB,
XTensor * dedc, DTYPE alpha);
XTensor * dedc, DTYPE alpha, bool isEfficient);
/* gradient for matrix multiply in batch mode.
for each batch: c_i = matmul(a_i, b_i) * \alpha */
static
void GradMatrixMulBatched(XTensor * node);
void GradMatrixMulBatched(XTensor * node, bool isEfficient);
/* gradient for multiply (dot production): c = a * b * \alpha */
static
void GradMultiply(XTensor * node);
void GradMultiply(XTensor * node, bool isEfficient);
/* gradient for multiply one dimension: c = a * b * \alpha
where the size of b is equal to that of one dimension of a */
static
void GradMultiplyDim(XTensor * node);
void GradMultiplyDim(XTensor * node, bool isEfficient);
/* gradient for negate */
static
void GradNegate(XTensor * node);
void GradNegate(XTensor * node, bool isEfficient);
/* gradient for normalize */
static
void GradNormalize(XTensor * node);
void GradNormalize(XTensor * node, bool isEfficient);
/* gradient for power */
static
void GradPower(XTensor * node);
void GradPower(XTensor * node, bool isEfficient);
/* gradient for ScaleAndShift */
static
void GradScaleAndShift(XTensor * node);
void GradScaleAndShift(XTensor * node, bool isEfficient);
/* gradient for Minus */
static
void GradSub(XTensor * node);
void GradSub(XTensor * node, bool isEfficient);
/* gradient for sub with one dimension: c = a - b * \beta
where the size of b is equal to that of one dimension of a */
static
void GradSubDim(XTensor * node);
void GradSubDim(XTensor * node, bool isEfficient);
/* gradient for sum: c = a + b * \beta */
static
void GradSum(XTensor * node);
void GradSum(XTensor * node, bool isEfficient);
/* gradient for sum with one dimension: c = a + b * \beta
where the size of b is equal to that of one dimension of a */
static
void GradSumDim(XTensor * node);
void GradSumDim(XTensor * node, bool isEfficient);
/* gradient for reduceMean */
static
void GradReduceMean(XTensor * node);
void GradReduceMean(XTensor * node, bool isEfficient);
/* gradient for reduceSum */
static
void GradReduceSum(XTensor * node);
void GradReduceSum(XTensor * node, bool isEfficient);
/* gradient for reduceSumSquared */
static
void GradReduceSumSquared(XTensor * node);
void GradReduceSumSquared(XTensor * node, bool isEfficient);
/* gradient for reduceVariance */
static
void GradReduceVariance(XTensor * node);
void GradReduceVariance(XTensor * node, bool isEfficient);
};
}
......
......@@ -30,7 +30,7 @@
namespace nts{
/* compute dE/dx of a node */
void XShapeGrad::MakeGrad(XTensor * node)
void XShapeGrad::MakeGrad(XTensor * node, bool isEfficent)
{
CheckNTErrors(node->grad != NULL, "No gradient found!");
......@@ -38,17 +38,17 @@ void XShapeGrad::MakeGrad(XTensor * node)
int operID = income.typeID;
if(operID == SHAPE_MERGE)
GradMerge(node);
GradMerge(node, isEfficent);
else if(operID == SHAPE_MERGE_LIST)
GradMergeList(node);
GradMergeList(node, isEfficent);
else if(operID == SHAPE_UNSQUEEZE)
GradUnsqueeze(node);
GradUnsqueeze(node, isEfficent);
else if(operID == SHAPE_SPLIT)
GradSplit(node);
GradSplit(node, isEfficent);
else if(operID == SHAPE_SPLIT_LIST)
GradSplitList(node);
GradSplitList(node, isEfficent);
else if (operID == SHAPE_TRANSPOSE)
GradTranspose(node);
GradTranspose(node, isEfficent);
else{
ShowNTErrors("TODO!");
}
......@@ -62,10 +62,10 @@ bool XShapeGrad::IsShapeOP(XTensor * node)
}
/* post processing of a node */
void XShapeGrad::PostProcessing(XTensor * node, int typeID)
void XShapeGrad::PostProcessing(XTensor * node, int typeID, bool isEfficent)
{
if(typeID == SHAPE_SPLIT_LIST)
GradSplitListPost(node);
GradSplitListPost(node, isEfficent);
}
/*
......@@ -80,8 +80,10 @@ dE/db_1 = dE/dc_{split_1}
i.e.,
dE/da = split(dE/dc)
>> node - the node (c) for backward computation
>> isEfficient - indicates whether the computation is in
an efficient manner
*/
void XShapeGrad::GradMerge(XTensor * node)
void XShapeGrad::GradMerge(XTensor * node, bool isEfficent)
{
XLink &income = node->income;
XTensor * input = income.tails[0];
......@@ -162,8 +164,10 @@ dE/db = dE/dc_{split_1}
i.e.,
list(dE/da, dE/db, ...) = split(dE/dc)
>> node - the node (c) for backward computation
>> isEfficient - indicates whether the computation is in
an efficient manner
*/
void XShapeGrad::GradMergeList(XTensor * node)
void XShapeGrad::GradMergeList(XTensor * node, bool isEfficient)
{
XLink &income = node->income;
CheckNTErrors(income.tailNum > 0, "Wrong input tensor number for MERGE!");
......@@ -239,8 +243,10 @@ c = split(a)
we have
dE/da = merge(dE/dc)
>> node - the node (c) for backward computation
>> isEfficient - indicates whether the computation is in
an efficient manner
*/
void XShapeGrad::GradSplit(XTensor * node)
void XShapeGrad::GradSplit(XTensor * node, bool isEfficient)
{
XLink &income = node->income;
XTensor * input = income.tails[0];
......@@ -279,8 +285,10 @@ list(c_1, ...) = split(a)
we have
dE/da = merge(dE/c_1, ...)
>> node - the node (c) for backward computation
>> isEfficient - indicates whether the computation is in
an efficient manner
*/
void XShapeGrad::GradSplitList(XTensor * node)
void XShapeGrad::GradSplitList(XTensor * node, bool isEfficient)
{
XLink &income = node->income;
XTensor * input = income.tails[0];
......@@ -299,8 +307,10 @@ have been processed. We do this in a post-processing
manner because we can fuze multiple memory copy jobs
one time. This is good for system speed up.
>> node - the node (c) for backward computation
>> isEfficient - indicates whether the computation is in
an efficient manner
*/
void XShapeGrad::GradSplitListPost(XTensor * node)
void XShapeGrad::GradSplitListPost(XTensor * node, bool isEfficient)
{
/* we compute the gradient for current node, rather than for
child node, i.e., we use the outgoing edge here */
......@@ -351,8 +361,10 @@ c = unsqueeze(a)
we have
dE/da = reduecesum(dE/dc)
>> node - the node (c) for backward computation
>> isEfficient - indicates whether the computation is in
an efficient manner
*/
void XShapeGrad::GradUnsqueeze(XTensor * node)
void XShapeGrad::GradUnsqueeze(XTensor * node, bool isEfficient)
{
XLink &income = node->income;
CheckNTErrors(income.tailNum == 1, "Wrong input tensor number for UNSQUEEZE!");
......@@ -379,8 +391,10 @@ c = Transpose(a)
we have
dE/da = Transpose(dE/dc)
>> node - the node (c) for backward computation
>> isEfficient - indicates whether the computation is in
an efficient manner
*/
void XShapeGrad::GradTranspose(XTensor * node)
void XShapeGrad::GradTranspose(XTensor * node, bool isEfficient)
{
XLink &income = node->income;
CheckNTErrors(income.tailNum == 1, "Wrong input tensor number for TRANSPOSE!");
......
......@@ -34,7 +34,7 @@ class XShapeGrad
public:
/* compute dE/dx of a node */
static
void MakeGrad(XTensor * node);
void MakeGrad(XTensor * node, bool isEfficent);
/* indicates whether the node is for a shaping operation */
static
......@@ -42,38 +42,38 @@ public:
/* post processing of a node */
static
void PostProcessing(XTensor * node, int typeId);
void PostProcessing(XTensor * node, int typeId, bool isEfficent);
private:
/* gradient computation for merge: c = merge(a, b, ...) */
static
void GradMerge(XTensor * node);
void GradMerge(XTensor * node, bool isEfficent);
/* gradient computation for merging a list of tensors : c = merge(list(a, b, ...)) */
static
void GradMergeList(XTensor * node);
void GradMergeList(XTensor * node, bool isEfficent);
/* gradient computation for split: c = split(a) */
static
void GradSplit(XTensor * node);
void GradSplit(XTensor * node, bool isEfficent);
/* gradient computation for spliting. we return the list of the splits : list(c_1, ...) = split(a) */
static
void GradSplitList(XTensor * node);
void GradSplitList(XTensor * node, bool isEfficent);
/* gradient computation for spliting. we return the list of the splits : list(c_1, ...) = split(a).
this method is called only when all nodes of spliting have been processed. We do this in a post-processing
manner because we can fuze multiple memory copy jobs one time. This is good for system speed up. */
static
void GradSplitListPost(XTensor * node);
void GradSplitListPost(XTensor * node, bool isEfficent);
/* gradient computation for unsqueezing a tensor : c = unsqueeze(a) */
static
void GradUnsqueeze(XTensor * node);
void GradUnsqueeze(XTensor * node, bool isEfficent);
/* gradient computation for unsqueezing a tensor : c = unsqueeze(a) */
static
void GradTranspose(XTensor * node);
void GradTranspose(XTensor * node, bool isEfficent);
};
......
......@@ -55,6 +55,7 @@ void XNetClearAll()
XNet::XNet()
{
nodes.Clear();
isGradEfficient = false;
}
/* de-constructor */
......@@ -115,6 +116,10 @@ void XNet::Backward(XList &roots, XList &golds, LOSS_FUNCTION_NAME loss)
{
Traverse(roots);
/* label tensors where the backward computation is neccessary */
if(isGradEfficient)
MakeEfficientNet();
for(int i = 0; i < nodes.count; i++){
XTensor * node = (XTensor*)nodes.Get(i);
node->visitMark = NODE_UNFINISHED;
......@@ -154,10 +159,20 @@ void XNet::Backward(XList &roots, XList &golds, LOSS_FUNCTION_NAME loss)
CheckNTErrors(node->mem->bufUsed < BUF_PITCH, "Illegal access of buffer!");
}
if(node->visitMark == NODE_FINISHED)
continue;
if(node->visitMark != NODE_FINISHED)
BackwardNode(node, isGradEfficient);
BackwardNode(node);
if(isGradEfficient){
if(!XNoder::IsLeaf(node)){
XLink & outgo = node->outgo;
for(int i = 0; i < outgo.tailNum; i++){
XTensor * parent = outgo.tails[i];
ClearGrad(parent);
}
}
else
ClearGrad(node);
}
}
}
......@@ -179,27 +194,32 @@ void XNet::Backward(XList &roots, LOSS_FUNCTION_NAME loss)
/*
backward computation for a given node
>> node - the node keeps the result of an operation (e.g., activation function)
>> isEfficient - indicates whether the back-propagation is compuated in an
efficient manner
*/
void XNet::BackwardNode(XTensor * node)
void XNet::BackwardNode(XTensor * node, bool isEfficent)
{
if(node == NULL || node->visitMark == NODE_FINISHED)
return;
if(!XNoder::IsLeaf(node)){
/* post processing for parent nodes */
BackwardNodePost(node);
BackwardNodePost(node, isEfficent);
/* process the current node */
if(XMathGrad::IsMathOP(node))
XMathGrad::MakeGrad(node);
XMathGrad::MakeGrad(node, isEfficent);
else if(XFuncGrad::IsFunc(node))
XFuncGrad::MakeGrad(node);
XFuncGrad::MakeGrad(node, isEfficent);
else if(XShapeGrad::IsShapeOP(node))
XShapeGrad::MakeGrad(node);
XShapeGrad::MakeGrad(node, isEfficent);
else{
ShowNTErrors("Wrong node type!");
}
}
else{
node->visitMark = NODE_FINISHED;
}
}
/*
......@@ -207,7 +227,7 @@ backward computation (in post processing) for a given node
>> node - the node whose parent nodes are not processed yet. So
we do the job at the child node.
*/
void XNet::BackwardNodePost(XTensor * node)
void XNet::BackwardNodePost(XTensor * node, bool isEfficent)
{
bool isSplitList = false;
XLink &outgo = node->outgo;
......@@ -217,7 +237,7 @@ void XNet::BackwardNodePost(XTensor * node)
}
if(isSplitList)
XShapeGrad::PostProcessing(node, SHAPE_SPLIT_LIST);
XShapeGrad::PostProcessing(node, SHAPE_SPLIT_LIST, isEfficent);
}
/*
......@@ -304,4 +324,62 @@ void XNet::Dump(FILE * file)
}
}
/*
set the flag of gradient-efficient
>> flag - the flag
*/
void XNet::SetGradEfficientFlag(bool flag)
{
isGradEfficient = flag;
}
/* generate the gradient-efficient flag for every node */
void XNet::MakeEfficientNet()
{
/* back-propagation from output to input */
for(int i = 0; i < nodes.count; i++){
XTensor * node = (XTensor*)nodes.Get(i);
XLink &income = node->income;
for(int j = 0; j < income.tailNum; j++){
XTensor * child = income.tails[j];
if(child->isGrad || child->isVar){
node->SetGradFlag(true);
break;
}
}
}
}
/*
clear the graident information if the node is no use
>> node - the node that we want to clear
*/
void XNet::ClearGrad(XTensor * node)
{
if(node->isVar)
return;
if(node->grad == NULL)
return;
if(node->visitMark != NODE_FINISHED)
return;
XLink & income = node->income;
bool finished = true;
for(int i = 0; i < income.tailNum; i++){
XTensor * child = income.tails[i];
if(child->visitMark != NODE_FINISHED){
finished = false;
break;
}
}
if(finished){
//fprintf(stderr, "del %d %ld\n", node->id, node->grad->unitNum);
delete node->grad;
node->grad = NULL;
}
}
}
\ No newline at end of file
......@@ -47,6 +47,9 @@ struct XNet
/* input nodes of the network */
XList inputs;
/* indicates whether the network just keeps the gradient for parameter tensors */
bool isGradEfficient;
/* constructor */
XNet();
......@@ -71,10 +74,10 @@ struct XNet
void Backward(XList &roots, LOSS_FUNCTION_NAME loss = NOLOSS);
/* backward computation for a given node */
void BackwardNode(XTensor * node);
void BackwardNode(XTensor * node, bool isEfficent = false);
/* backward computation (in post processing) for a given node */
void BackwardNodePost(XTensor * node);
void BackwardNodePost(XTensor * node, bool isEfficent = false);
/* traverse the net and find the topological order by
depth-first search (Tarjan's algorithm) */
......@@ -89,6 +92,15 @@ struct XNet
/* dump network information */
void Dump(FILE * file);
/* set the flag of gradient-efficient */
void SetGradEfficientFlag(bool flag = true);
/* generate the gradient-efficient flag for every node */
void MakeEfficientNet();
/* clear the graident information if the node is no use */
void ClearGrad(XTensor * node);
};
/* we make a unique id for every tensor */
......
Markdown 格式
0%
您添加了 0 到此讨论。请谨慎行事。
请先完成此评论的编辑!
注册 或者 后发表评论