Commit baad6629 by xiaotong

improve the space management

parent 6ea64b51
......@@ -29,7 +29,7 @@
namespace nts{
/* compute dE/dx of a node */
void XFuncGrad::MakeGrad(XTensor * node)
void XFuncGrad::MakeGrad(XTensor * node, bool isEfficient)
{
......
......@@ -35,7 +35,7 @@ class XFuncGrad
public:
/* compute dE/dx of a node */
static
void MakeGrad(XTensor * node);
void MakeGrad(XTensor * node, bool isEfficient);
/* indicates whether the node is for an activation function */
static
......
......@@ -28,69 +28,73 @@
namespace nts{
/* compute dE/dx of a node */
void XMathGrad::MakeGrad(XTensor * node)
void XMathGrad::MakeGrad(XTensor * node, bool isEfficient)
{
CheckNTErrors(node->grad != NULL, "No gradient found!");
if(!isEfficient){
CheckNTErrors(node->grad != NULL, "No gradient found!");
}
else{
CheckNTErrors(!node->isGrad || node->grad != NULL, "No gradient found!");
}
XLink &income = node->income;
int operID = income.typeID;
if(operID == MATH_ABSOLUTE)
GradAbsolute(node);
GradAbsolute(node, isEfficient);
else if(operID == MATH_COS)
GradCos(node);
GradCos(node, isEfficient);
else if(operID == MATH_EXP)
GradExp(node);
GradExp(node, isEfficient);
else if(operID == MATH_LOG)
GradLog(node);
GradLog(node, isEfficient);
else if(operID == MATH_ROUND)
GradRound(node);
GradRound(node, isEfficient);
else if(operID == MATH_SIGN)
GradSign(node);
GradSign(node, isEfficient);
else if(operID == MATH_SIN)
GradSin(node);
GradSin(node, isEfficient);
else if(operID == MATH_TAN)
GradTan(node);
GradTan(node, isEfficient);
else if(operID == MATH_CLIP)
GradClip(node);
GradClip(node, isEfficient);
else if(operID == MATH_DIV)
GradDiv(node);
GradDiv(node, isEfficient);
else if(operID == MATH_DIVDIM)
GradDivDim(node);
GradDivDim(node, isEfficient);
else if(operID == MATH_MATRIXMUL)
GradMatrixMul(node);
GradMatrixMul(node, isEfficient);
else if(operID == MATH_MATRIXMULBATCHED)
GradMatrixMulBatched(node);
GradMatrixMulBatched(node, isEfficient);
else if(operID == MATH_MULTIPLY)
GradMultiply(node);
GradMultiply(node, isEfficient);
else if(operID == MATH_MULTIPLYDIM)
GradMultiplyDim(node);
GradMultiplyDim(node, isEfficient);
else if(operID == MATH_NEGATE)
GradNegate(node);
GradNegate(node, isEfficient);
else if(operID == MATH_NORMALIZE)
GradNormalize(node);
GradNormalize(node, isEfficient);
else if(operID == MATH_POWER)
GradPower(node);
GradPower(node, isEfficient);
else if(operID == MATH_SCALEANDSHIFT)
GradScaleAndShift(node);
GradScaleAndShift(node, isEfficient);
else if(operID == MATH_SUB)
GradSub(node);
GradSub(node, isEfficient);
else if(operID == MATH_SUBDIM)
GradSubDim(node);
GradSubDim(node, isEfficient);
else if(operID == MATH_SUM)
GradSum(node);
GradSum(node, isEfficient);
else if(operID == MATH_SUMDIM)
GradSumDim(node);
GradSumDim(node, isEfficient);
else if(operID == REDUCE_REDUCEMEAN)
GradReduceMean(node);
GradReduceMean(node, isEfficient);
else if(operID == REDUCE_REDUCESUM)
GradReduceSum(node);
GradReduceSum(node, isEfficient);
else if(operID == REDUCE_REDUCESUMSQUARED)
GradReduceSumSquared(node);
GradReduceSumSquared(node, isEfficient);
else if(operID == REDUCE_REDUCEVARIANCE)
GradReduceVariance(node);
GradReduceVariance(node, isEfficient);
else{
ShowNTErrors("TODO!");
}
......@@ -111,8 +115,10 @@ we have
dE/da = dE/dc a >= 0
-dE/dc a < 0
>> node - the node (c) for backward computation
>> isEfficient - indicates whether the computation is in
an efficient manner
*/
void XMathGrad::GradAbsolute(XTensor * node)
void XMathGrad::GradAbsolute(XTensor * node, bool isEfficient)
{
XLink &income = node->income;
CheckNTErrors(income.tailNum == 1, "Wrong input tensor number for ABSOLUTE!");
......@@ -137,8 +143,10 @@ c = cos(a)
we have
dE/da = dE/dc * -sin(a)
>> node - the node (c) for backward computation
>> isEfficient - indicates whether the computation is in
an efficient manner
*/
void XMathGrad::GradCos(XTensor * node)
void XMathGrad::GradCos(XTensor * node, bool isEfficient)
{
XLink &income = node->income;
CheckNTErrors(income.tailNum == 1, "Wrong input tensor number for COS!");
......@@ -164,8 +172,10 @@ c = exp(a)
we have
dE/da = dE/dc * exp(a)
>> node - the node (c) for backward computation
>> isEfficient - indicates whether the computation is in
an efficient manner
*/
void XMathGrad::GradExp(XTensor * node)
void XMathGrad::GradExp(XTensor * node, bool isEfficient)
{
XLink &income = node->income;
CheckNTErrors(income.tailNum == 1, "Wrong input tensor number for EXP!");
......@@ -190,8 +200,10 @@ c = log(a)
we have
dE/da = dE/dc * 1/a
>> node - the node (c) for backward computation
>> isEfficient - indicates whether the computation is in
an efficient manner
*/
void XMathGrad::GradLog(XTensor * node)
void XMathGrad::GradLog(XTensor * node, bool isEfficient)
{
XLink &income = node->income;
CheckNTErrors(income.tailNum == 1, "Wrong input tensor number for LOG!");
......@@ -212,8 +224,10 @@ c = round(a)
we have
dE/da = 0
>> node - the node (c) for backward computation
>> isEfficient - indicates whether the computation is in
an efficient manner
*/
void XMathGrad::GradRound(XTensor * node)
void XMathGrad::GradRound(XTensor * node, bool isEfficient)
{
XLink &income = node->income;
CheckNTErrors(income.tailNum == 1, "Wrong input tensor number for ROUND!");
......@@ -231,8 +245,10 @@ c = sign(a)
we have
dE/da = 0
>> node - the node (c) for backward computation
>> isEfficient - indicates whether the computation is in
an efficient manner
*/
void XMathGrad::GradSign(XTensor * node)
void XMathGrad::GradSign(XTensor * node, bool isEfficient)
{
XLink &income = node->income;
CheckNTErrors(income.tailNum == 1, "Wrong input tensor number for SIGN!");
......@@ -250,8 +266,10 @@ c = sin(a)
we have
dE/da = dE/dc * cos(a)
>> node - the node (c) for backward computation
>> isEfficient - indicates whether the computation is in
an efficient manner
*/
void XMathGrad::GradSin(XTensor * node)
void XMathGrad::GradSin(XTensor * node, bool isEfficient)
{
XLink &income = node->income;
CheckNTErrors(income.tailNum == 1, "Wrong input tensor number for SIN!");
......@@ -276,8 +294,10 @@ c = tan(a)
we have
dE/da = dE/dc * 1/(cos(a))^2
>> node - the node (c) for backward computation
>> isEfficient - indicates whether the computation is in
an efficient manner
*/
void XMathGrad::GradTan(XTensor * node)
void XMathGrad::GradTan(XTensor * node, bool isEfficient)
{
XLink &income = node->income;
CheckNTErrors(income.tailNum == 1, "Wrong input tensor number for TAN!");
......@@ -302,8 +322,10 @@ we have
dE/da = 1 lower < a < upper
dE/da = 0 otherwise
>> node - the node (c) for backward computation
>> isEfficient - indicates whether the computation is in
an efficient manner
*/
void XMathGrad::GradClip(XTensor * node)
void XMathGrad::GradClip(XTensor * node, bool isEfficient)
{
XLink &income = node->income;
CheckNTErrors(income.tailNum == 1, "Wrong input tensor number for CLIP!");
......@@ -332,8 +354,10 @@ we have
dE/da = dE/dc / b
dE/db = dE/dc * a / -b^2
>> node - the node (c) for backward computation
>> isEfficient - indicates whether the computation is in
an efficient manner
*/
void XMathGrad::GradDiv(XTensor * node)
void XMathGrad::GradDiv(XTensor * node, bool isEfficient)
{
XLink &income = node->income;
CheckNTErrors(income.tailNum == 2, "Wrong input tensor number for DIVIDE!");
......@@ -365,8 +389,12 @@ c = a / b
where the size of b is equal to dimension n of a, i.e., |b| = a.dimSize[n]
dE/da = dE/dc * (1/b)
dE/db = (dE/dc * (-a/b^2)).reduce(0,...,n-1,n+1,...)
>> node - the node (c) for backward computation
>> isEfficient - indicates whether the computation is in
an efficient manner
*/
void XMathGrad::GradDivDim(XTensor * node)
void XMathGrad::GradDivDim(XTensor * node, bool isEfficient)
{
XLink &income = node->income;
CheckNTErrors(income.tailNum == 2, "Wrong input tensor number for DIVDIM!");
......@@ -466,8 +494,10 @@ we have
dE/da = dE/dc * b^T * \alpha
dE/db = a^T * dE/dc * \alpha
>> node - the node (c) for backward computation
>> isEfficient - indicates whether the computation is in
an efficient manner
*/
void XMathGrad::GradMatrixMul(XTensor * node)
void XMathGrad::GradMatrixMul(XTensor * node, bool isEfficient)
{
XLink &income = node->income;
CheckNTErrors(income.tailNum == 2, "Wrong input tensor number for MULTIPLY!");
......@@ -479,17 +509,19 @@ void XMathGrad::GradMatrixMul(XTensor * node)
MATRIX_TRANS_TYPE transB = income.GetParamTrans(1);
DTYPE alpha = income.GetParam(2);
XNoder::MakeGrad(a);
XNoder::MakeGrad(b);
if(!isEfficient || a->isGrad)
XNoder::MakeGrad(a);
if(!isEfficient || b->isGrad)
XNoder::MakeGrad(b);
XTensor * c = node;
XTensor * dedc = node->grad;
XTensor * deda = a->grad;
XTensor * dedb = b->grad;
if(deda->order == 2 && dedb->order == 2)
GradMatrixMul(a, deda, transA, b, dedb, transB, dedc, alpha);
else if(transA == X_NOTRANS && deda->order > 2 && dedb->order == 2){
if(a->order == 2 && b->order == 2)
GradMatrixMul(a, deda, transA, b, dedb, transB, dedc, alpha, isEfficient);
else if(transA == X_NOTRANS && a->order > 2 && b->order == 2){
int orderBackupA = a->order;
int orderBackupC = c->order;
int dimsBackupA[MAX_TENSOR_DIM_NUM];
......@@ -499,14 +531,16 @@ void XMathGrad::GradMatrixMul(XTensor * node)
a->Reshape(a->unitNum/a->GetDim(-1), a->GetDim(-1));
c->Reshape(c->unitNum/c->GetDim(-1), c->GetDim(-1));
deda->Reshape(deda->unitNum/deda->GetDim(-1), deda->GetDim(-1));
if(!isEfficient || a->isGrad)
deda->Reshape(deda->unitNum/deda->GetDim(-1), deda->GetDim(-1));
dedc->Reshape(dedc->unitNum/dedc->GetDim(-1), dedc->GetDim(-1));
GradMatrixMul(a, deda, transA, b, dedb, transB, dedc, alpha);
GradMatrixMul(a, deda, transA, b, dedb, transB, dedc, alpha, isEfficient);
a->Reshape(orderBackupA, dimsBackupA);
c->Reshape(orderBackupC, dimsBackupC);
deda->Reshape(orderBackupA, dimsBackupA);
if(!isEfficient || a->isGrad)
deda->Reshape(orderBackupA, dimsBackupA);
dedc->Reshape(orderBackupC, dimsBackupC);
}
else{
......@@ -524,19 +558,23 @@ gradient for matrix multiply: c = matmul(a, b) * \alpha
>> dedb - dE/db
>> dedc - dE/dc
>> alpha - the scalar
>> isEfficient - indicates whether the computation is in
an efficient manner
*/
void XMathGrad::GradMatrixMul(XTensor * a, XTensor * deda, MATRIX_TRANS_TYPE transA,
XTensor * b, XTensor * dedb, MATRIX_TRANS_TYPE transB,
XTensor * dedc, DTYPE alpha)
XTensor * dedc, DTYPE alpha, bool isEfficient)
{
/* c = a * b * \alpha */
if(transA == X_NOTRANS && transB == X_NOTRANS){
/* dE/da = dE/dc * b^T * \alpha */
_MatrixMul(dedc, X_NOTRANS, b, X_TRANS, deda, alpha, 1.0F);
if(!isEfficient || a->isGrad)
_MatrixMul(dedc, X_NOTRANS, b, X_TRANS, deda, alpha, 1.0F);
/* dE/db = a^T * dE/dc * \alpha */
_MatrixMul(a, X_TRANS, dedc, X_NOTRANS, dedb, alpha, 1.0F);
if(!isEfficient || b->isGrad)
_MatrixMul(a, X_TRANS, dedc, X_NOTRANS, dedb, alpha, 1.0F);
}
/* c = a^T * b * \alpha */
......@@ -544,21 +582,25 @@ void XMathGrad::GradMatrixMul(XTensor * a, XTensor * deda, MATRIX_TRANS_TYPE tra
/* dE/da = (dE/dc * b^T)^T * \alpha
= b * dE/dc^T * \alpha */
_MatrixMul(b, X_NOTRANS, dedc, X_TRANS, deda, alpha, 1.0F);
if(!isEfficient || a->isGrad)
_MatrixMul(b, X_NOTRANS, dedc, X_TRANS, deda, alpha, 1.0F);
/* dE/db = a * dE/dc * \alpha */
_MatrixMul(a, X_NOTRANS, dedc, X_NOTRANS, dedb, alpha, 1.0F);
if(!isEfficient || b->isGrad)
_MatrixMul(a, X_NOTRANS, dedc, X_NOTRANS, dedb, alpha, 1.0F);
}
/* c = a * b^T * \alpha */
else if(transA == X_NOTRANS && transB == X_TRANS){
/* dE/da = dE/dc * b * \alpha */
_MatrixMul(dedc, X_NOTRANS, b, X_NOTRANS, deda, alpha, 1.0F);
if(!isEfficient || a->isGrad)
_MatrixMul(dedc, X_NOTRANS, b, X_NOTRANS, deda, alpha, 1.0F);
/* dE/db = (a^T * dE/dc)^T * \alpha
= dE/dc^T * a * \alpha */
_MatrixMul(dedc, X_TRANS, a, X_NOTRANS, dedb, alpha, 1.0F);
if(!isEfficient || b->isGrad)
_MatrixMul(dedc, X_TRANS, a, X_NOTRANS, dedb, alpha, 1.0F);
}
/* c = a^T * b^T * \alpha */
......@@ -566,11 +608,13 @@ void XMathGrad::GradMatrixMul(XTensor * a, XTensor * deda, MATRIX_TRANS_TYPE tra
/* dE/da = (dE/dc * b)^T * \alpha
= b^T * dE/dc^T * \alpha */
_MatrixMul(b, X_TRANS, dedc, X_TRANS, deda, alpha, 1.0F);
if(!isEfficient || a->isGrad)
_MatrixMul(b, X_TRANS, dedc, X_TRANS, deda, alpha, 1.0F);
/* dE/db = (a * dE/dc)^T * \alpha
= dE/dc^T * a^T * \alpha */
_MatrixMul(dedc, X_TRANS, a, X_TRANS, dedb, alpha, 1.0F);
if(!isEfficient || b->isGrad)
_MatrixMul(dedc, X_TRANS, a, X_TRANS, dedb, alpha, 1.0F);
}
}
......@@ -582,8 +626,10 @@ we have
dE/da_i = dE/dc_i * b_i^T * \alpha
dE/db_i = a_i^T * dE/dc_i * \alpha
>> node - the node (c) for backward computation
>> isEfficient - indicates whether the computation is in
an efficient manner
*/
void XMathGrad::GradMatrixMulBatched(XTensor * node)
void XMathGrad::GradMatrixMulBatched(XTensor * node, bool isEfficient)
{
XLink &income = node->income;
CheckNTErrors(income.tailNum == 2, "Wrong input tensor number for MULTIPLY!");
......@@ -657,8 +703,10 @@ we have
dE/da = dE/dc * b
dE/db = dE/dc * a
>> node - the node (c) for backward computation
>> isEfficient - indicates whether the computation is in
an efficient manner
*/
void XMathGrad::GradMultiply(XTensor * node)
void XMathGrad::GradMultiply(XTensor * node, bool isEfficient)
{
XLink &income = node->income;
CheckNTErrors(income.tailNum == 2, "Wrong input tensor number for MULTIPLY!");
......@@ -681,8 +729,12 @@ c = a * b
where the size of b is equal to dimension n of a, i.e., |b| = a.dimSize[n]
dE/da = dE/dc * b
dE/db = (dE/dc * a).reduce(0,...,n-1,n+1,...)
>> node - the node (c) for backward computation
>> isEfficient - indicates whether the computation is in
an efficient manner
*/
void XMathGrad::GradMultiplyDim(XTensor * node)
void XMathGrad::GradMultiplyDim(XTensor * node, bool isEfficient)
{
XLink &income = node->income;
CheckNTErrors(income.tailNum == 2, "Wrong input tensor number for MULTIPLYDIM!");
......@@ -771,8 +823,10 @@ c = -a
we have
dE/da = dE/dc * (-1)
>> node - the node (c) for backward computation
>> isEfficient - indicates whether the computation is in
an efficient manner
*/
void XMathGrad::GradNegate(XTensor * node)
void XMathGrad::GradNegate(XTensor * node, bool isEfficient)
{
XLink &income = node->income;
CheckNTErrors(income.tailNum == 1, "Wrong input tensor number for NEGATE!");
......@@ -793,8 +847,10 @@ void XMathGrad::GradNegate(XTensor * node)
/*
gradient for normalize
>> node - the node (c) for backward computation
>> isEfficient - indicates whether the computation is in
an efficient manner
*/
void XMathGrad::GradNormalize(XTensor * node)
void XMathGrad::GradNormalize(XTensor * node, bool isEfficient)
{
ShowNTErrors("This is really a bad piece of code!!!");
......@@ -887,8 +943,10 @@ c = pow(a,p)
we have
dE/da = (dE/dc) * p * a^(p-1)
>> node - the node (c) for backward computation
>> isEfficient - indicates whether the computation is in
an efficient manner
*/
void XMathGrad::GradPower(XTensor * node)
void XMathGrad::GradPower(XTensor * node, bool isEfficient)
{
XLink &income = node->income;
CheckNTErrors(income.tailNum == 1, "Wrong input tensor number for POWER!");
......@@ -916,8 +974,10 @@ c = a * scale + shift
we have
dE/da = dE/dc * scale
>> node - the node (c) for backward computation
>> isEfficient - indicates whether the computation is in
an efficient manner
*/
void XMathGrad::GradScaleAndShift(XTensor * node)
void XMathGrad::GradScaleAndShift(XTensor * node, bool isEfficient)
{
XLink &income = node->income;
CheckNTErrors(income.tailNum == 1, "Wrong input tensor number for SCALEANDSHIFT!");
......@@ -941,8 +1001,10 @@ we have
dE/da = dE/dc
dE/db = -dE/dc * \beta
>> node - the node (c) for backward computation
>> isEfficient - indicates whether the computation is in
an efficient manner
*/
void XMathGrad::GradSub(XTensor * node)
void XMathGrad::GradSub(XTensor * node, bool isEfficient)
{
XLink &income = node->income;
CheckNTErrors(income.tailNum == 2, "Wrong input tensor number for SUBSTRACT!");
......@@ -966,8 +1028,11 @@ c = a - b * \beta
where the size of b is equal to dimension n of a, i.e., |b| = a.dimSize[n]
dE/da = dE/dc
dE/db = - dE/dc * b.reduce(0,...,n-1,n+1,...) * \beta
>> node - the node (c) for backward computation
>> isEfficient - indicates whether the computation is in
an efficient manner
*/
void XMathGrad::GradSubDim(XTensor * node)
void XMathGrad::GradSubDim(XTensor * node, bool isEfficient)
{
XLink &income = node->income;
CheckNTErrors(income.tailNum == 2, "Wrong input tensor number for SUBDIM!");
......@@ -1063,9 +1128,12 @@ c = a + b * \beta
we have
dE/da = dE/dc
dE/db = dE/dc * \beta
>> node - the node (c) for backward computation
>> isEfficient - indicates whether the computation is in
an efficient manner
*/
void XMathGrad::GradSum(XTensor * node)
void XMathGrad::GradSum(XTensor * node, bool isEfficient)
{
XLink &income = node->income;
CheckNTErrors(income.tailNum == 2, "Wrong input tensor number for SUM!");
......@@ -1074,11 +1142,15 @@ void XMathGrad::GradSum(XTensor * node)
XTensor * b = income.tails[1];
DTYPE beta = income.GetParam(0);
XNoder::MakeGrad(a);
XNoder::MakeGrad(b);
if(!isEfficient || a->isGrad){
XNoder::MakeGrad(a);
_Sum(a->grad, node->grad, a->grad);
}
_Sum(a->grad, node->grad, a->grad);
_Sum(b->grad, node->grad, b->grad, beta);
if(!isEfficient || b->isGrad){
XNoder::MakeGrad(b);
_Sum(b->grad, node->grad, b->grad, beta);
}
node->visitMark = NODE_FINISHED;
}
......@@ -1089,8 +1161,12 @@ c = a + b * \beta
where the size of b is equal to dimension n of a, i.e., |b| = a.dimSize[n]
dE/da = dE/dc
dE/db = dE/dc * b.reduce(0,...,n-1,n+1,...) * \beta
>> node - the node (c) for backward computation
>> isEfficient - indicates whether the computation is in
an efficient manner
*/
void XMathGrad::GradSumDim(XTensor * node)
void XMathGrad::GradSumDim(XTensor * node, bool isEfficient)
{
XLink &income = node->income;
CheckNTErrors(income.tailNum == 2, "Wrong input tensor number for SUMDIM!");
......@@ -1183,9 +1259,12 @@ for
c = reduceMean(a, dim)
we have
dE/da = Unsqueeze(dE/dc) * 1/dimSizeA[dim]
>> node - the node (c) for backward computation
>> isEfficient - indicates whether the computation is in
an efficient manner
*/
void XMathGrad::GradReduceMean(XTensor * node)
void XMathGrad::GradReduceMean(XTensor * node, bool isEfficient)
{
XLink &income = node->income;
CheckNTErrors(income.tailNum == 1, "Wrong input tensor number for Reduce!");
......@@ -1213,9 +1292,12 @@ for
c = reduceSum(a, dim)
we have
dE/da = Unsqueeze(dE/dc) * 1
>> node - the node (c) for backward computation
>> isEfficient - indicates whether the computation is in
an efficient manner
*/
void XMathGrad::GradReduceSum(XTensor * node)
void XMathGrad::GradReduceSum(XTensor * node, bool isEfficient)
{
XLink &income = node->income;
CheckNTErrors(income.tailNum == 1, "Wrong input tensor number for Reduce!");
......@@ -1243,9 +1325,12 @@ c = \sum_i (a_i - b)^2
we have
dE/da = Unsqueeze(dE/dc) * 2a
dE/db = dE/dc * -2 * n * b
>> node - the node (c) for backward computation
>> isEfficient - indicates whether the computation is in
an efficient manner
*/
void XMathGrad::GradReduceSumSquared(XTensor * node)
void XMathGrad::GradReduceSumSquared(XTensor * node, bool isEfficient)
{
XLink &income = node->income;
CheckNTErrors(income.tailNum == 2, "Wrong input tensor number for Reduce!");
......@@ -1292,9 +1377,12 @@ where b is the mean, and n is the size of a
we have
dE/da = Unsqueeze(dE/dc) * 2a/n
dE/db = dE/dc * -2 * b
>> node - the node (c) for backward computation
>> isEfficient - indicates whether the computation is in
an efficient manner
*/
void XMathGrad::GradReduceVariance(XTensor * node)
void XMathGrad::GradReduceVariance(XTensor * node, bool isEfficient)
{
XLink &income = node->income;
CheckNTErrors(income.tailNum == 2, "Wrong input tensor number for Reduce!");
......
......@@ -33,7 +33,7 @@ class XMathGrad
public:
/* compute dE/dx of a node */
static
void MakeGrad(XTensor * node);
void MakeGrad(XTensor * node, bool isEfficient);
/* indicates whether the node is for a math operation */
static
......@@ -43,121 +43,121 @@ private:
/* gradient for absolute */
static
void GradAbsolute(XTensor * node);
void GradAbsolute(XTensor * node, bool isEfficient);
/* gradient for cos */
static
void GradCos(XTensor * node);
void GradCos(XTensor * node, bool isEfficient);
/* gradient for exp */
static
void GradExp(XTensor * node);
void GradExp(XTensor * node, bool isEfficient);
/* gradient for log: c = log(a) */
static
void GradLog(XTensor * node);
void GradLog(XTensor * node, bool isEfficient);
/* gradient for round */
static
void GradRound(XTensor * node);
void GradRound(XTensor * node, bool isEfficient);
/* gradient for sign */
static
void GradSign(XTensor * node);
void GradSign(XTensor * node, bool isEfficient);
/* gradient for sin */
static
void GradSin(XTensor * node);
void GradSin(XTensor * node, bool isEfficient);
/* gradient for tan */
static
void GradTan(XTensor * node);
void GradTan(XTensor * node, bool isEfficient);
/* gradient for clip */
static
void GradClip(XTensor * node);
void GradClip(XTensor * node, bool isEfficient);
/* gradient for Divide */
static
void GradDiv(XTensor * node);
void GradDiv(XTensor * node, bool isEfficient);
/* gradient for DivideDim */
static
void GradDivDim(XTensor * node);
void GradDivDim(XTensor * node, bool isEfficient);
/* gradient for matrix multiply: c = matmul(a, b) * \alpha */
static
void GradMatrixMul(XTensor * node);
void GradMatrixMul(XTensor * node, bool isEfficient);
/* gradient for matrix multiply: c = matmul(a, b) * \alpha */
static
void GradMatrixMul(XTensor * a, XTensor * deda, MATRIX_TRANS_TYPE transA,
XTensor * b, XTensor * dedb, MATRIX_TRANS_TYPE transB,
XTensor * dedc, DTYPE alpha);
XTensor * dedc, DTYPE alpha, bool isEfficient);
/* gradient for matrix multiply in batch mode.
for each batch: c_i = matmul(a_i, b_i) * \alpha */
static
void GradMatrixMulBatched(XTensor * node);
void GradMatrixMulBatched(XTensor * node, bool isEfficient);
/* gradient for multiply (dot production): c = a * b * \alpha */
static
void GradMultiply(XTensor * node);
void GradMultiply(XTensor * node, bool isEfficient);
/* gradient for multiply one dimension: c = a * b * \alpha
where the size of b is equal to that of one dimension of a */
static
void GradMultiplyDim(XTensor * node);
void GradMultiplyDim(XTensor * node, bool isEfficient);
/* gradient for negate */
static
void GradNegate(XTensor * node);
void GradNegate(XTensor * node, bool isEfficient);
/* gradient for normalize */
static
void GradNormalize(XTensor * node);
void GradNormalize(XTensor * node, bool isEfficient);
/* gradient for power */
static
void GradPower(XTensor * node);
void GradPower(XTensor * node, bool isEfficient);
/* gradient for ScaleAndShift */
static
void GradScaleAndShift(XTensor * node);
void GradScaleAndShift(XTensor * node, bool isEfficient);
/* gradient for Minus */
static
void GradSub(XTensor * node);
void GradSub(XTensor * node, bool isEfficient);
/* gradient for sub with one dimension: c = a - b * \beta
where the size of b is equal to that of one dimension of a */
static
void GradSubDim(XTensor * node);
void GradSubDim(XTensor * node, bool isEfficient);
/* gradient for sum: c = a + b * \beta */
static
void GradSum(XTensor * node);
void GradSum(XTensor * node, bool isEfficient);
/* gradient for sum with one dimension: c = a + b * \beta
where the size of b is equal to that of one dimension of a */
static
void GradSumDim(XTensor * node);
void GradSumDim(XTensor * node, bool isEfficient);
/* gradient for reduceMean */
static
void GradReduceMean(XTensor * node);
void GradReduceMean(XTensor * node, bool isEfficient);
/* gradient for reduceSum */
static
void GradReduceSum(XTensor * node);
void GradReduceSum(XTensor * node, bool isEfficient);
/* gradient for reduceSumSquared */
static
void GradReduceSumSquared(XTensor * node);
void GradReduceSumSquared(XTensor * node, bool isEfficient);
/* gradient for reduceVariance */
static
void GradReduceVariance(XTensor * node);
void GradReduceVariance(XTensor * node, bool isEfficient);
};
}
......
......@@ -30,7 +30,7 @@
namespace nts{
/* compute dE/dx of a node */
void XShapeGrad::MakeGrad(XTensor * node)
void XShapeGrad::MakeGrad(XTensor * node, bool isEfficent)
{
CheckNTErrors(node->grad != NULL, "No gradient found!");
......@@ -38,17 +38,17 @@ void XShapeGrad::MakeGrad(XTensor * node)
int operID = income.typeID;
if(operID == SHAPE_MERGE)
GradMerge(node);
GradMerge(node, isEfficent);
else if(operID == SHAPE_MERGE_LIST)
GradMergeList(node);
GradMergeList(node, isEfficent);
else if(operID == SHAPE_UNSQUEEZE)
GradUnsqueeze(node);
GradUnsqueeze(node, isEfficent);
else if(operID == SHAPE_SPLIT)
GradSplit(node);
GradSplit(node, isEfficent);
else if(operID == SHAPE_SPLIT_LIST)
GradSplitList(node);
GradSplitList(node, isEfficent);
else if (operID == SHAPE_TRANSPOSE)
GradTranspose(node);
GradTranspose(node, isEfficent);
else{
ShowNTErrors("TODO!");
}
......@@ -62,10 +62,10 @@ bool XShapeGrad::IsShapeOP(XTensor * node)
}
/* post processing of a node */
void XShapeGrad::PostProcessing(XTensor * node, int typeID)
void XShapeGrad::PostProcessing(XTensor * node, int typeID, bool isEfficent)
{
if(typeID == SHAPE_SPLIT_LIST)
GradSplitListPost(node);
GradSplitListPost(node, isEfficent);
}
/*
......@@ -80,8 +80,10 @@ dE/db_1 = dE/dc_{split_1}
i.e.,
dE/da = split(dE/dc)
>> node - the node (c) for backward computation
>> isEfficient - indicates whether the computation is in
an efficient manner
*/
void XShapeGrad::GradMerge(XTensor * node)
void XShapeGrad::GradMerge(XTensor * node, bool isEfficent)
{
XLink &income = node->income;
XTensor * input = income.tails[0];
......@@ -162,8 +164,10 @@ dE/db = dE/dc_{split_1}
i.e.,
list(dE/da, dE/db, ...) = split(dE/dc)
>> node - the node (c) for backward computation
>> isEfficient - indicates whether the computation is in
an efficient manner
*/
void XShapeGrad::GradMergeList(XTensor * node)
void XShapeGrad::GradMergeList(XTensor * node, bool isEfficient)
{
XLink &income = node->income;
CheckNTErrors(income.tailNum > 0, "Wrong input tensor number for MERGE!");
......@@ -239,8 +243,10 @@ c = split(a)
we have
dE/da = merge(dE/dc)
>> node - the node (c) for backward computation
>> isEfficient - indicates whether the computation is in
an efficient manner
*/
void XShapeGrad::GradSplit(XTensor * node)
void XShapeGrad::GradSplit(XTensor * node, bool isEfficient)
{
XLink &income = node->income;
XTensor * input = income.tails[0];
......@@ -279,8 +285,10 @@ list(c_1, ...) = split(a)
we have
dE/da = merge(dE/c_1, ...)
>> node - the node (c) for backward computation
>> isEfficient - indicates whether the computation is in
an efficient manner
*/
void XShapeGrad::GradSplitList(XTensor * node)
void XShapeGrad::GradSplitList(XTensor * node, bool isEfficient)
{
XLink &income = node->income;
XTensor * input = income.tails[0];
......@@ -299,8 +307,10 @@ have been processed. We do this in a post-processing
manner because we can fuze multiple memory copy jobs
one time. This is good for system speed up.
>> node - the node (c) for backward computation
>> isEfficient - indicates whether the computation is in
an efficient manner
*/
void XShapeGrad::GradSplitListPost(XTensor * node)
void XShapeGrad::GradSplitListPost(XTensor * node, bool isEfficient)
{
/* we compute the gradient for current node, rather than for
child node, i.e., we use the outgoing edge here */
......@@ -351,8 +361,10 @@ c = unsqueeze(a)
we have
dE/da = reduecesum(dE/dc)
>> node - the node (c) for backward computation
>> isEfficient - indicates whether the computation is in
an efficient manner
*/
void XShapeGrad::GradUnsqueeze(XTensor * node)
void XShapeGrad::GradUnsqueeze(XTensor * node, bool isEfficient)
{
XLink &income = node->income;
CheckNTErrors(income.tailNum == 1, "Wrong input tensor number for UNSQUEEZE!");
......@@ -379,8 +391,10 @@ c = Transpose(a)
we have
dE/da = Transpose(dE/dc)
>> node - the node (c) for backward computation
>> isEfficient - indicates whether the computation is in
an efficient manner
*/
void XShapeGrad::GradTranspose(XTensor * node)
void XShapeGrad::GradTranspose(XTensor * node, bool isEfficient)
{
XLink &income = node->income;
CheckNTErrors(income.tailNum == 1, "Wrong input tensor number for TRANSPOSE!");
......
......@@ -34,7 +34,7 @@ class XShapeGrad
public:
/* compute dE/dx of a node */
static
void MakeGrad(XTensor * node);
void MakeGrad(XTensor * node, bool isEfficent);
/* indicates whether the node is for a shaping operation */
static
......@@ -42,38 +42,38 @@ public:
/* post processing of a node */
static
void PostProcessing(XTensor * node, int typeId);
void PostProcessing(XTensor * node, int typeId, bool isEfficent);
private:
/* gradient computation for merge: c = merge(a, b, ...) */
static
void GradMerge(XTensor * node);
void GradMerge(XTensor * node, bool isEfficent);
/* gradient computation for merging a list of tensors : c = merge(list(a, b, ...)) */
static
void GradMergeList(XTensor * node);
void GradMergeList(XTensor * node, bool isEfficent);
/* gradient computation for split: c = split(a) */
static
void GradSplit(XTensor * node);
void GradSplit(XTensor * node, bool isEfficent);
/* gradient computation for spliting. we return the list of the splits : list(c_1, ...) = split(a) */
static
void GradSplitList(XTensor * node);
void GradSplitList(XTensor * node, bool isEfficent);
/* gradient computation for spliting. we return the list of the splits : list(c_1, ...) = split(a).
this method is called only when all nodes of spliting have been processed. We do this in a post-processing
manner because we can fuze multiple memory copy jobs one time. This is good for system speed up. */
static
void GradSplitListPost(XTensor * node);
void GradSplitListPost(XTensor * node, bool isEfficent);
/* gradient computation for unsqueezing a tensor : c = unsqueeze(a) */
static
void GradUnsqueeze(XTensor * node);
void GradUnsqueeze(XTensor * node, bool isEfficent);
/* gradient computation for unsqueezing a tensor : c = unsqueeze(a) */
static
void GradTranspose(XTensor * node);
void GradTranspose(XTensor * node, bool isEfficent);
};
......
......@@ -55,6 +55,7 @@ void XNetClearAll()
XNet::XNet()
{
nodes.Clear();
isGradEfficient = false;
}
/* de-constructor */
......@@ -115,6 +116,10 @@ void XNet::Backward(XList &roots, XList &golds, LOSS_FUNCTION_NAME loss)
{
Traverse(roots);
/* label tensors where the backward computation is neccessary */
if(isGradEfficient)
MakeEfficientNet();
for(int i = 0; i < nodes.count; i++){
XTensor * node = (XTensor*)nodes.Get(i);
node->visitMark = NODE_UNFINISHED;
......@@ -154,10 +159,20 @@ void XNet::Backward(XList &roots, XList &golds, LOSS_FUNCTION_NAME loss)
CheckNTErrors(node->mem->bufUsed < BUF_PITCH, "Illegal access of buffer!");
}
if(node->visitMark == NODE_FINISHED)
continue;
BackwardNode(node);
if(node->visitMark != NODE_FINISHED)
BackwardNode(node, isGradEfficient);
if(isGradEfficient){
if(!XNoder::IsLeaf(node)){
XLink & outgo = node->outgo;
for(int i = 0; i < outgo.tailNum; i++){
XTensor * parent = outgo.tails[i];
ClearGrad(parent);
}
}
else
ClearGrad(node);
}
}
}
......@@ -179,27 +194,32 @@ void XNet::Backward(XList &roots, LOSS_FUNCTION_NAME loss)
/*
backward computation for a given node
>> node - the node keeps the result of an operation (e.g., activation function)
>> isEfficient - indicates whether the back-propagation is compuated in an
efficient manner
*/
void XNet::BackwardNode(XTensor * node)
void XNet::BackwardNode(XTensor * node, bool isEfficent)
{
if(node == NULL || node->visitMark == NODE_FINISHED)
return;
if(!XNoder::IsLeaf(node)){
/* post processing for parent nodes */
BackwardNodePost(node);
BackwardNodePost(node, isEfficent);
/* process the current node */
if(XMathGrad::IsMathOP(node))
XMathGrad::MakeGrad(node);
XMathGrad::MakeGrad(node, isEfficent);
else if(XFuncGrad::IsFunc(node))
XFuncGrad::MakeGrad(node);
XFuncGrad::MakeGrad(node, isEfficent);
else if(XShapeGrad::IsShapeOP(node))
XShapeGrad::MakeGrad(node);
XShapeGrad::MakeGrad(node, isEfficent);
else{
ShowNTErrors("Wrong node type!");
}
}
else{
node->visitMark = NODE_FINISHED;
}
}
/*
......@@ -207,7 +227,7 @@ backward computation (in post processing) for a given node
>> node - the node whose parent nodes are not processed yet. So
we do the job at the child node.
*/
void XNet::BackwardNodePost(XTensor * node)
void XNet::BackwardNodePost(XTensor * node, bool isEfficent)
{
bool isSplitList = false;
XLink &outgo = node->outgo;
......@@ -217,7 +237,7 @@ void XNet::BackwardNodePost(XTensor * node)
}
if(isSplitList)
XShapeGrad::PostProcessing(node, SHAPE_SPLIT_LIST);
XShapeGrad::PostProcessing(node, SHAPE_SPLIT_LIST, isEfficent);
}
/*
......@@ -304,4 +324,62 @@ void XNet::Dump(FILE * file)
}
}
/*
set the flag of gradient-efficient
>> flag - the flag
*/
void XNet::SetGradEfficientFlag(bool flag)
{
isGradEfficient = flag;
}
/* generate the gradient-efficient flag for every node */
void XNet::MakeEfficientNet()
{
/* back-propagation from output to input */
for(int i = 0; i < nodes.count; i++){
XTensor * node = (XTensor*)nodes.Get(i);
XLink &income = node->income;
for(int j = 0; j < income.tailNum; j++){
XTensor * child = income.tails[j];
if(child->isGrad || child->isVar){
node->SetGradFlag(true);
break;
}
}
}
}
/*
clear the graident information if the node is no use
>> node - the node that we want to clear
*/
void XNet::ClearGrad(XTensor * node)
{
if(node->isVar)
return;
if(node->grad == NULL)
return;
if(node->visitMark != NODE_FINISHED)
return;
XLink & income = node->income;
bool finished = true;
for(int i = 0; i < income.tailNum; i++){
XTensor * child = income.tails[i];
if(child->visitMark != NODE_FINISHED){
finished = false;
break;
}
}
if(finished){
//fprintf(stderr, "del %d %ld\n", node->id, node->grad->unitNum);
delete node->grad;
node->grad = NULL;
}
}
}
\ No newline at end of file
......@@ -47,6 +47,9 @@ struct XNet
/* input nodes of the network */
XList inputs;
/* indicates whether the network just keeps the gradient for parameter tensors */
bool isGradEfficient;
/* constructor */
XNet();
......@@ -71,10 +74,10 @@ struct XNet
void Backward(XList &roots, LOSS_FUNCTION_NAME loss = NOLOSS);
/* backward computation for a given node */
void BackwardNode(XTensor * node);
void BackwardNode(XTensor * node, bool isEfficent = false);
/* backward computation (in post processing) for a given node */
void BackwardNodePost(XTensor * node);
void BackwardNodePost(XTensor * node, bool isEfficent = false);
/* traverse the net and find the topological order by
depth-first search (Tarjan's algorithm) */
......@@ -89,6 +92,15 @@ struct XNet
/* dump network information */
void Dump(FILE * file);
/* set the flag of gradient-efficient */
void SetGradEfficientFlag(bool flag = true);
/* generate the gradient-efficient flag for every node */
void MakeEfficientNet();
/* clear the graident information if the node is no use */
void ClearGrad(XTensor * node);
};
/* we make a unique id for every tensor */
......
Markdown 格式
0%
您添加了 0 到此讨论。请谨慎行事。
请先完成此评论的编辑!
注册 或者 后发表评论