Commit e84a764b by xuchen

add the implementation and optimize the annotation of backward function

parent c103b9f3
...@@ -73,7 +73,7 @@ void XFuncGrad::MakeGrad(XTensor * node, bool isEfficient) ...@@ -73,7 +73,7 @@ void XFuncGrad::MakeGrad(XTensor * node, bool isEfficient)
_SoftmaxBackward(NULL, output, input, dedy, tmp, NULL, leadDim, NOLOSS); _SoftmaxBackward(NULL, output, input, dedy, tmp, NULL, leadDim, NOLOSS);
} }
else { else {
ShowNTErrors("Wrong activation function type!"); ShowNTErrors("Unsupported backward computation! TODO!");
} }
_SumMe(dedx, tmp); _SumMe(dedx, tmp);
......
...@@ -70,7 +70,7 @@ void XLossGrad::MakeGrad(XTensor * node, bool isEfficient) ...@@ -70,7 +70,7 @@ void XLossGrad::MakeGrad(XTensor * node, bool isEfficient)
_SumMe(dedy, tmp); _SumMe(dedy, tmp);
} }
else { else {
ShowNTErrors("Wrong activation function type!"); ShowNTErrors("Unsupported backward computation! TODO!");
} }
//DelTensorBuf(tmp); //DelTensorBuf(tmp);
DelTensor(tmp); DelTensor(tmp);
......
...@@ -79,6 +79,12 @@ void XMathGrad::MakeGrad(XTensor * node, bool isEfficient) ...@@ -79,6 +79,12 @@ void XMathGrad::MakeGrad(XTensor * node, bool isEfficient)
GradNormalize(node, isEfficient); GradNormalize(node, isEfficient);
else if (operID == MATH_POWER) else if (operID == MATH_POWER)
GradPower(node, isEfficient); GradPower(node, isEfficient);
else if (operID == MATH_RECIPROCAL)
GradReciprocal(node, isEfficient);
else if (operID == MATH_SQRT)
GradSqrt(node, isEfficient);
else if (operID == MATH_SQUARE)
GradSquare(node, isEfficient);
else if (operID == MATH_SCALEANDSHIFT) else if (operID == MATH_SCALEANDSHIFT)
GradScaleAndShift(node, isEfficient); GradScaleAndShift(node, isEfficient);
else if (operID == MATH_SCALE) else if (operID == MATH_SCALE)
...@@ -101,6 +107,8 @@ void XMathGrad::MakeGrad(XTensor * node, bool isEfficient) ...@@ -101,6 +107,8 @@ void XMathGrad::MakeGrad(XTensor * node, bool isEfficient)
GradReduceMean(node, isEfficient); GradReduceMean(node, isEfficient);
else if (operID == REDUCE_REDUCESUM) else if (operID == REDUCE_REDUCESUM)
GradReduceSum(node, isEfficient); GradReduceSum(node, isEfficient);
else if (operID == REDUCE_REDUCESUMALL)
GradReduceSumAll(node, isEfficient);
else if (operID == REDUCE_REDUCESUMSQUARED) else if (operID == REDUCE_REDUCESUMSQUARED)
GradReduceSumSquared(node, isEfficient); GradReduceSumSquared(node, isEfficient);
else if (operID == REDUCE_REDUCEVARIANCE) else if (operID == REDUCE_REDUCEVARIANCE)
...@@ -108,7 +116,7 @@ void XMathGrad::MakeGrad(XTensor * node, bool isEfficient) ...@@ -108,7 +116,7 @@ void XMathGrad::MakeGrad(XTensor * node, bool isEfficient)
else if (operID == MATH_MULANDSHIFT) else if (operID == MATH_MULANDSHIFT)
GradMulAndShift(node, isEfficient); GradMulAndShift(node, isEfficient);
else{ else{
ShowNTErrors("TODO!"); ShowNTErrors("Unsupported backward computation! TODO!");
} }
} }
...@@ -967,7 +975,100 @@ void XMathGrad::GradPower(XTensor * node, bool isEfficient) ...@@ -967,7 +975,100 @@ void XMathGrad::GradPower(XTensor * node, bool isEfficient)
XTensor * tmp = NewTensorBufV2(a, a->devID, a->mem); XTensor * tmp = NewTensorBufV2(a, a->devID, a->mem);
_Power(a, tmp, p - 1.0F); _Power(a, tmp, p - 1.0F);
_ScaleAndShiftMe(tmp, p); _ScaleMe(tmp, p);
_Multiply(node->grad, tmp, a->grad, 1.0F);
DelTensorBuf(tmp);
}
node->visitMark = NODE_FINISHED;
}
/*
gradient for reciprocal
for
c = reciprocal(a)
we have
dE/da = (dE/dc) * -a^(-2)
>> node - the node (c) for backward computation
>> isEfficient - indicates whether the computation is in an efficient manner
*/
void XMathGrad::GradReciprocal(XTensor* node, bool isEfficient)
{
XLink& income = node->income;
CheckNTErrors(income.tailNum == 1, "Wrong input tensor number for RECIPROCAL!");
XTensor* a = income.tails[0];
/* dE/da = (dE/dc) * -a^(-2) */
if (!isEfficient || a->isGrad) {
XNoder::MakeGrad(a);
XTensor* tmp = NewTensorBufV2(a, a->devID, a->mem);
_Power(a, tmp, -2.0F);
_NegateMe(tmp);
_Multiply(node->grad, tmp, a->grad, 1.0F);
DelTensorBuf(tmp);
}
node->visitMark = NODE_FINISHED;
}
/*
gradient for sqrt
for
c = sqrt(a)
we have
dE/da = (dE/dc) * 2 * a
>> node - the node (c) for backward computation
>> isEfficient - indicates whether the computation is in an efficient manner
*/
void XMathGrad::GradSqrt(XTensor * node, bool isEfficient)
{
XLink &income = node->income;
CheckNTErrors(income.tailNum == 1, "Wrong input tensor number for SQRT!");
XTensor * a = income.tails[0];
/* dE/da = (dE/dc) * 2 * a */
if (!isEfficient || a->isGrad) {
XNoder::MakeGrad(a);
XTensor* tmp = NewTensorBufV2(a, a->devID, a->mem);
_ScaleMe(tmp, 2.0F);
_Multiply(node->grad, tmp, a->grad, 1.0F);
DelTensorBuf(tmp);
}
node->visitMark = NODE_FINISHED;
}
/*
gradient for square
for
c = square(a)
we have
dE/da = (dE/dc) * (1/2) * a^(-1/2)
>> node - the node (c) for backward computation
>> isEfficient - indicates whether the computation is in an efficient manner
*/
void XMathGrad::GradSquare(XTensor * node, bool isEfficient)
{
XLink &income = node->income;
CheckNTErrors(income.tailNum == 1, "Wrong input tensor number for SQUARE!");
XTensor * a = income.tails[0];
/* dE/da = (dE/dc) * (1/2) * a^(-1/2)*/
if (!isEfficient || a->isGrad) {
XNoder::MakeGrad(a);
XTensor* tmp = NewTensorBufV2(a, a->devID, a->mem);
_Power(a, tmp, -0.5F);
_ScaleMe(tmp, 0.5);
_Multiply(node->grad, tmp, a->grad, 1.0F); _Multiply(node->grad, tmp, a->grad, 1.0F);
DelTensorBuf(tmp); DelTensorBuf(tmp);
...@@ -1447,6 +1548,38 @@ void XMathGrad::GradReduceSum(XTensor * node, bool isEfficient) ...@@ -1447,6 +1548,38 @@ void XMathGrad::GradReduceSum(XTensor * node, bool isEfficient)
} }
/* /*
gradient for reduceSumAll
for
c = reduceSumAll(a)
we have
dE/da = dE/dc * 1
>> node - the node (c) for backward computation
>> isEfficient - indicates whether the computation is in
an efficient manner
*/
void XMathGrad::GradReduceSumAll(XTensor * node, bool isEfficient)
{
XLink &income = node->income;
CheckNTErrors(income.tailNum == 1, "Wrong input tensor number for Reduce!");
XTensor * a = income.tails[0];
/* dE/da = dE/dc * 1 */
if (!isEfficient || a->isGrad) {
XNoder::MakeGrad(a);
XTensor * tmp = NewTensorBufV2(a, a->devID, a->mem);
DTYPE value = node->grad->Get0D();
tmp->SetDataFixed(value);
_Sum(a->grad, tmp, a->grad);
DelTensorBuf(tmp);
}
node->visitMark = NODE_FINISHED;
}
/*
gradient for reduceSumSquared gradient for reduceSumSquared
for for
c = \sum_i (a_i - b)^2 c = \sum_i (a_i - b)^2
...@@ -1583,7 +1716,7 @@ void XMathGrad::GradMulAndShift(XTensor * node, bool isEfficient) ...@@ -1583,7 +1716,7 @@ void XMathGrad::GradMulAndShift(XTensor * node, bool isEfficient)
int n = income.GetParamInt(0); int n = income.GetParamInt(0);
MATRIX_TRANS_TYPE transW = income.GetParamTrans(1); MATRIX_TRANS_TYPE transW = income.GetParamTrans(1);
MATRIX_TRANS_TYPE transX = income.GetParamTrans(2); MATRIX_TRANS_TYPE transX = income.GetParamTrans(2);
DTYPE alpha = income.GetParam(3);
/* dE/db = dE/dc * x.reduce(0,...,n-1,n+1,...) */ /* dE/db = dE/dc * x.reduce(0,...,n-1,n+1,...) */
if (!isEfficient || b->isGrad) { if (!isEfficient || b->isGrad) {
XNoder::MakeGrad(b); XNoder::MakeGrad(b);
...@@ -1652,7 +1785,7 @@ void XMathGrad::GradMulAndShift(XTensor * node, bool isEfficient) ...@@ -1652,7 +1785,7 @@ void XMathGrad::GradMulAndShift(XTensor * node, bool isEfficient)
XTensor * dedx = x->grad; XTensor * dedx = x->grad;
if (x->order == 2 && w->order == 2) if (x->order == 2 && w->order == 2)
GradMatrixMul(x, dedx, transX, w, dedw, transW, dedc, 1.0F, isEfficient); GradMatrixMul(x, dedx, transX, w, dedw, transW, dedc, alpha, isEfficient);
else if (transX == X_NOTRANS && x->order > 2 && w->order == 2) { else if (transX == X_NOTRANS && x->order > 2 && w->order == 2) {
int orderBackupX = x->order; int orderBackupX = x->order;
int orderBackupC = c->order; int orderBackupC = c->order;
...@@ -1667,14 +1800,13 @@ void XMathGrad::GradMulAndShift(XTensor * node, bool isEfficient) ...@@ -1667,14 +1800,13 @@ void XMathGrad::GradMulAndShift(XTensor * node, bool isEfficient)
dedx->Reshape(dedx->unitNum / dedx->GetDim(-1), dedx->GetDim(-1)); dedx->Reshape(dedx->unitNum / dedx->GetDim(-1), dedx->GetDim(-1));
dedc->Reshape(dedc->unitNum / dedc->GetDim(-1), dedc->GetDim(-1)); dedc->Reshape(dedc->unitNum / dedc->GetDim(-1), dedc->GetDim(-1));
GradMatrixMul(x, dedx, transX, w, dedw, transW, dedc, 1.0F, isEfficient); GradMatrixMul(x, dedx, transX, w, dedw, transW, dedc, alpha, isEfficient);
x->Reshape(orderBackupX, dimsBackupX); x->Reshape(orderBackupX, dimsBackupX);
c->Reshape(orderBackupC, dimsBackupC); c->Reshape(orderBackupC, dimsBackupC);
if (!isEfficient || x->isGrad) if (!isEfficient || x->isGrad)
dedx->Reshape(orderBackupX, dimsBackupX); dedx->Reshape(orderBackupX, dimsBackupX);
dedc->Reshape(orderBackupC, dimsBackupC); dedc->Reshape(orderBackupC, dimsBackupC);
} }
node->visitMark = NODE_FINISHED; node->visitMark = NODE_FINISHED;
......
...@@ -126,6 +126,18 @@ private: ...@@ -126,6 +126,18 @@ private:
static static
void GradPower(XTensor * node, bool isEfficient); void GradPower(XTensor * node, bool isEfficient);
/* gradient for power */
static
void GradReciprocal(XTensor* node, bool isEfficient);
/* gradient for sqrt */
static
void GradSqrt(XTensor* node, bool isEfficient);
/* gradient for square */
static
void GradSquare(XTensor* node, bool isEfficient);
/* gradient for ScaleAndShift */ /* gradient for ScaleAndShift */
static static
void GradScaleAndShift(XTensor * node, bool isEfficient); void GradScaleAndShift(XTensor * node, bool isEfficient);
...@@ -146,10 +158,10 @@ private: ...@@ -146,10 +158,10 @@ private:
static static
void GradSub(XTensor * node, bool isEfficient); void GradSub(XTensor * node, bool isEfficient);
/* gradient for sub with one dimension: c = a - b * \beta /* gradient for sub with one dimension: c = a - b * \beta
where the size of b is equal to that of one dimension of a */ where the size of b is equal to that of one dimension of a */
static static
void GradSubDim(XTensor * node, bool isEfficient); void GradSubDim(XTensor * node, bool isEfficient);
/* gradient for sum: c = a + b * \beta */ /* gradient for sum: c = a + b * \beta */
static static
...@@ -173,6 +185,10 @@ private: ...@@ -173,6 +185,10 @@ private:
static static
void GradReduceSum(XTensor * node, bool isEfficient); void GradReduceSum(XTensor * node, bool isEfficient);
/* gradient for reduceSumAll */
static
void GradReduceSumAll(XTensor * node, bool isEfficient);
/* gradient for reduceSumSquared */ /* gradient for reduceSumSquared */
static static
void GradReduceSumSquared(XTensor * node, bool isEfficient); void GradReduceSumSquared(XTensor * node, bool isEfficient);
......
...@@ -44,7 +44,10 @@ void XShapeGrad::MakeGrad(XTensor * node, bool isEfficient) ...@@ -44,7 +44,10 @@ void XShapeGrad::MakeGrad(XTensor * node, bool isEfficient)
XLink &income = node->income; XLink &income = node->income;
int operID = income.typeID; int operID = income.typeID;
if (operID == MOVEMENT_COPYINDEXED)
if (operID == GETANDSET_CONVERTDATATYPE)
GradConvertDataType(node, isEfficient);
else if (operID == MOVEMENT_COPYINDEXED)
GradCopyIndexed(node, isEfficient); GradCopyIndexed(node, isEfficient);
else if (operID == MOVEMENT_GATHER) else if (operID == MOVEMENT_GATHER)
GradGather(node, isEfficient); GradGather(node, isEfficient);
...@@ -65,7 +68,7 @@ void XShapeGrad::MakeGrad(XTensor * node, bool isEfficient) ...@@ -65,7 +68,7 @@ void XShapeGrad::MakeGrad(XTensor * node, bool isEfficient)
else if (operID == SHAPE_UNSQUEEZE) else if (operID == SHAPE_UNSQUEEZE)
GradUnsqueeze(node, isEfficient); GradUnsqueeze(node, isEfficient);
else{ else{
ShowNTErrors("TODO!"); ShowNTErrors("Unsupported backward computation! TODO!");
} }
} }
...@@ -83,6 +86,34 @@ void XShapeGrad::PostProcessing(XTensor * node, int typeID, bool isEfficient) ...@@ -83,6 +86,34 @@ void XShapeGrad::PostProcessing(XTensor * node, int typeID, bool isEfficient)
GradSplitListPost(node, isEfficient); GradSplitListPost(node, isEfficient);
} }
/*
gradient computation for convertdatatype
for
b = convertdatatype(a)
we have
dE/da = convertdatatype(dE/db)
>> node - the node (c) for backward computation
>> isEfficient - indicates whether the computation is in
an efficient manner
*/
void XShapeGrad::GradConvertDataType(XTensor* node, bool isEfficient)
{
XLink& income = node->income;
CheckNTErrors(income.tailNum == 1, "Wrong input tensor number for CopyIndexed!");
XTensor* a = income.tails[0];
if (!isEfficient || a->isGrad) {
XNoder::MakeGrad(a);
XTensor* tmp = NewTensorBufV2(a, a->devID, a->mem);
_ConvertDataType(node->grad, tmp);
_SumMe(a->grad, tmp);
DelTensorBuf(tmp);
}
}
/* /*
gradient computation for copying indexed sub-tensors gradient computation for copying indexed sub-tensors
for for
...@@ -301,7 +332,7 @@ void XShapeGrad::GradMergeList(XTensor * node, bool isEfficient) ...@@ -301,7 +332,7 @@ void XShapeGrad::GradMergeList(XTensor * node, bool isEfficient)
if (i > 1) if (i > 1)
CheckNTErrors(_IsSameShaped(last, tail), "Input tensors must be of the same size!"); CheckNTErrors(_IsSameShaped(last, tail), "Input tensors must be of the same size!");
if (tail->outgo.tailNum > 1) if (tail->outgo.tailNum > 1)
mergeOnly = false; mergeOnly = false;
last = tail; last = tail;
......
...@@ -34,7 +34,7 @@ class XShapeGrad ...@@ -34,7 +34,7 @@ class XShapeGrad
public: public:
/* compute dE/dx of a node */ /* compute dE/dx of a node */
static static
void MakeGrad(XTensor * node, bool isEfficent); void MakeGrad(XTensor * node, bool isEfficient);
/* indicates whether the node is for a shaping operation */ /* indicates whether the node is for a shaping operation */
static static
...@@ -42,55 +42,59 @@ public: ...@@ -42,55 +42,59 @@ public:
/* post processing of a node */ /* post processing of a node */
static static
void PostProcessing(XTensor * node, int typeId, bool isEfficent); void PostProcessing(XTensor * node, int typeId, bool isEfficient);
private: private:
/* gradient computation for convertdatatype: b = convertdatatype(a) */
static
void GradConvertDataType(XTensor * node, bool isEfficient);
/* gradient computation for copying indexed sub-tensors: b = copyindexed(a, srcIndex, indexSize, tgtIndex, copyNum) */ /* gradient computation for copying indexed sub-tensors: b = copyindexed(a, srcIndex, indexSize, tgtIndex, copyNum) */
static static
void GradCopyIndexed(XTensor * node, bool isEfficent); void GradCopyIndexed(XTensor * node, bool isEfficient);
/* gradient computation for copying indexed sub-tensors: b = gather(a, index) */ /* gradient computation for copying indexed sub-tensors: b = gather(a, index) */
static static
void GradGather(XTensor * node, bool isEfficent); void GradGather(XTensor * node, bool isEfficient);
/* gradient computation for dropout with index: b = dropoutwithindex(a, index) */ /* gradient computation for dropout with index: b = dropoutwithindex(a, index) */
static static
void GradDropoutWithIndex(XTensor * node, bool isEfficent); void GradDropoutWithIndex(XTensor * node, bool isEfficient);
/* gradient computation for merge: c = merge(a, b, ...) */ /* gradient computation for merge: c = merge(a, b, ...) */
static static
void GradMerge(XTensor * node, bool isEfficent); void GradMerge(XTensor * node, bool isEfficient);
/* gradient computation for merging a list of tensors : c = merge(list(a, b, ...)) */ /* gradient computation for merging a list of tensors : c = merge(list(a, b, ...)) */
static static
void GradMergeList(XTensor * node, bool isEfficent); void GradMergeList(XTensor * node, bool isEfficient);
/* gradient computation for transposing a tensor : b = transpose(a) */ /* gradient computation for transposing a tensor : b = transpose(a) */
static static
void GradTranspose(XTensor * node, bool isEfficent); void GradTranspose(XTensor * node, bool isEfficient);
/* gradient computation for reshaping a tensor: c = reshape(a) */ /* gradient computation for reshaping a tensor: c = reshape(a) */
static static
void GradReshape(XTensor * node, bool isEfficent); void GradReshape(XTensor * node, bool isEfficient);
/* gradient computation for split: c = split(a) */ /* gradient computation for split: c = split(a) */
static static
void GradSplit(XTensor * node, bool isEfficent); void GradSplit(XTensor * node, bool isEfficient);
/* gradient computation for spliting. we return the list of the splits : list(c_1, ...) = split(a) */ /* gradient computation for spliting. we return the list of the splits : list(c_1, ...) = split(a) */
static static
void GradSplitList(XTensor * node, bool isEfficent); void GradSplitList(XTensor * node, bool isEfficient);
/* gradient computation for spliting. we return the list of the splits : list(c_1, ...) = split(a). /* gradient computation for spliting. we return the list of the splits : list(c_1, ...) = split(a).
this method is called only when all nodes of spliting have been processed. We do this in a post-processing this method is called only when all nodes of spliting have been processed. We do this in a post-processing
manner because we can fuze multiple memory copy jobs one time. This is good for system speed up. */ manner because we can fuze multiple memory copy jobs one time. This is good for system speed up. */
static static
void GradSplitListPost(XTensor * node, bool isEfficent); void GradSplitListPost(XTensor * node, bool isEfficient);
/* gradient computation for unsqueezing a tensor : c = unsqueeze(a) */ /* gradient computation for unsqueezing a tensor : c = unsqueeze(a) */
static static
void GradUnsqueeze(XTensor * node, bool isEfficent); void GradUnsqueeze(XTensor * node, bool isEfficient);
}; };
......
Markdown 格式
0%
您添加了 0 到此讨论。请谨慎行事。
请先完成此评论的编辑!
注册 或者 后发表评论