Commit 64973687 by liyinqiao

Merge with the branch of huchi.

1. Support Inplace way for some functions.
2. Add new flexible constructor for XList.
3. Bug fixed.
parent a57ad688
......@@ -51,9 +51,18 @@ void XFuncGrad::MakeGrad(XTensor * node, bool isEfficient)
XTensor * dedx = input->grad;
XTensor * dedy = output->grad;
//XTensor * tmp = NewTensorBufV2(output, output->devID, output->mem);
XTensor * tmp = NewTensor(output);
tmp->SetZeroAll();
XTensor* tmp;
/* store the result to a temporary node if the input has multiple children */
if (input->outgo.tailNum > 1) {
tmp = NewTensor(output);
tmp->SetZeroAll();
}
/* otherwise, the result is directly stored into the input node */
else {
tmp = dedx;
}
if (operID == FUNC_HARDTANH)
_HardTanHBackward(output, input, dedy, tmp);
......@@ -77,9 +86,10 @@ void XFuncGrad::MakeGrad(XTensor * node, bool isEfficient)
ShowNTErrors("Unsupported backward computation! TODO!");
}
_SumMe(dedx, tmp);
//DelTensorBuf(tmp);
DelTensor(tmp);
if (input->outgo.tailNum > 1) {
_SumMe(dedx, tmp);
DelTensor(tmp);
}
}
node->visitMark = NODE_FINISHED;
......
......@@ -47,6 +47,8 @@ void XLossGrad::MakeGrad(XTensor * node, bool isEfficient)
XTensor * padding = NULL;
int leadingDim;
bool isRoot = XNoder::IsRoot(node);
if (!isEfficient || output->isGrad) {
XNoder::MakeGrad(output);
XTensor * dedy = output->grad;
......@@ -58,9 +60,14 @@ void XLossGrad::MakeGrad(XTensor * node, bool isEfficient)
gold = income.tails[1];
//XTensor * tmp = NewTensorBufV2(output, output->devID, output->mem);
XTensor* tmp = NewTensor(output);
tmp->SetZeroAll();
XTensor* tmp;
if (!isRoot) {
tmp = NewTensor(output);
tmp->SetZeroAll();
}
else{
tmp = dedy;
}
if (operID == LOSS_CROSSENTROPY) {
if (income.tailNum == 3)
......@@ -68,13 +75,17 @@ void XLossGrad::MakeGrad(XTensor * node, bool isEfficient)
leadingDim = income.GetParamInt(0);
CheckNTErrors(leadingDim >= 0 && leadingDim < output->order, "wrong leading dimension in logsoftmax!");
_CrossEntropyBackward(tmp, output, gold, weight, padding, leadingDim);
_SumMe(dedy, tmp);
if (isRoot)
gold->DestroyData();
else
_SumMe(dedy, tmp);
}
else {
ShowNTErrors("Unsupported backward computation! TODO!");
}
//DelTensorBuf(tmp);
DelTensor(tmp);
if (!isRoot)
DelTensor(tmp);
}
node->visitMark = NODE_FINISHED;
......
......@@ -24,6 +24,7 @@
#include "XBackwardMath.h"
#include "../tensor/XName.h"
#include "../tensor/core/CHeader.h"
#include "../tensor/function/FHeader.h"
namespace nts{
......@@ -71,6 +72,10 @@ void XMathGrad::MakeGrad(XTensor * node, bool isEfficient)
GradMultiply(node, isEfficient);
else if (operID == MATH_MULTIPLYDIM)
GradMultiplyDim(node, isEfficient);
else if (operID == MATH_MULTIPLY_INPLACE)
GradMultiply(node, isEfficient);
else if (operID == MATH_MULTIPLYDIM_INPLACE)
GradMultiplyDim(node, isEfficient);
else if (operID == MATH_MULTIPLYBROADCAST)
GradMultiplyBroadcast(node, isEfficient);
else if (operID == MATH_NEGATE)
......@@ -115,6 +120,8 @@ void XMathGrad::MakeGrad(XTensor * node, bool isEfficient)
GradReduceVariance(node, isEfficient);
else if (operID == MATH_MULANDSHIFT)
GradMulAndShift(node, isEfficient);
else if (operID == MATH_MLP)
GradMLP(node, isEfficient);
else{
ShowNTErrors("Unsupported backward computation! TODO!");
}
......@@ -1813,4 +1820,122 @@ void XMathGrad::GradMulAndShift(XTensor * node, bool isEfficient)
}
/*
gradient for operation
for c = relu(matmul(x, w) + b)
we have
dE/dx = dE/drelu * dE/dc * w^T
dE/dw = dE/drelu * dE/dc * x^T
dE/db = dE/drelu * dE/dc * x.reduce(0,...,n-1,n+1,...)
>> node - the node (c) for backward computation
>> isEfficient - indicates whether the computation is in
an efficient manner
*/
void XMathGrad::GradMLP(XTensor* node, bool isEfficient)
{
XLink& income = node->income;
CheckNTErrors(income.tailNum == 3, "wrong input tensor number")
_RectifyBackward(node, node, node->grad, node->grad);
XTensor* x = income.tails[0];
XTensor* w = income.tails[1];
XTensor* b = income.tails[2];
int n = income.GetParamInt(0);
MATRIX_TRANS_TYPE transW = income.GetParamTrans(1);
MATRIX_TRANS_TYPE transX = income.GetParamTrans(2);
DTYPE alpha = income.GetParam(3);
/* dE/db = dE/dc * x.reduce(0,...,n-1,n+1,...) */
if (!isEfficient || b->isGrad) {
XNoder::MakeGrad(b);
int order = node->order;
int dimSize[MAX_TENSOR_DIM_NUM];
memcpy(dimSize, node->dimSize, sizeof(int) * node->order);
/* compute dE/db */
if (n == order - 1) {
int reshapedSize[MAX_TENSOR_DIM_NUM];
reshapedSize[0] = node->unitNum / dimSize[order - 1];
reshapedSize[1] = dimSize[order - 1];
/* we reshape dE/dc to a matrix whose column number is equal to the
size of b. Then we can reduce the matrix into a row vector. */
node->grad->Reshape(2, reshapedSize);
_ReduceSum(node->grad, b->grad, 0);
node->grad->Reshape(order, dimSize);
}
else {
int reshapedSize[MAX_TENSOR_DIM_NUM];
reshapedSize[0] = 1;
reshapedSize[1] = dimSize[n];
reshapedSize[2] = 1;
for (int i = 0; i < order; i++) {
if (i < n)
reshapedSize[0] *= dimSize[i];
}
reshapedSize[2] = node->unitNum / (reshapedSize[0] * reshapedSize[1]);
/* we reshape dE/dc to a 3D tensor of size (x, y, z) where y = |b|.
Then reduce along with z and x to obtain dE/db. */
node->grad->Reshape(3, reshapedSize);
XTensor* interGrad = NewTensorBufV2(2, reshapedSize, b->dataType, b->denseRatio, b->devID, b->mem);
_ReduceSum(node->grad, interGrad, 2);
XTensor* bGradTMP = NewTensorBufV2(b->grad, b->devID, b->mem);
_ReduceSum(interGrad, bGradTMP, 0);
_Sum(bGradTMP, b->grad, b->grad);
DelTensorBuf(bGradTMP);
node->grad->Reshape(order, dimSize);
DelTensorBuf(interGrad);
}
}
if (!isEfficient || w->isGrad)
XNoder::MakeGrad(w);
if (!isEfficient || x->isGrad)
XNoder::MakeGrad(x);
/* compute dE/dx, dE/dw */
XTensor* c = node;
XTensor* dedc = node->grad;
XTensor* dedw = w->grad;
XTensor* dedx = x->grad;
if (x->order == 2 && w->order == 2)
GradMatrixMul(x, dedx, transX, w, dedw, transW, dedc, alpha, isEfficient);
else if (transX == X_NOTRANS && x->order > 2 && w->order == 2) {
int orderBackupX = x->order;
int orderBackupC = c->order;
int dimsBackupX[MAX_TENSOR_DIM_NUM];
int dimsBackupC[MAX_TENSOR_DIM_NUM];
memcpy(dimsBackupX, x->dimSize, sizeof(int) * x->order);
memcpy(dimsBackupC, c->dimSize, sizeof(int) * c->order);
x->Reshape(x->unitNum / x->GetDim(-1), x->GetDim(-1));
c->Reshape(c->unitNum / c->GetDim(-1), c->GetDim(-1));
if (!isEfficient || x->isGrad)
dedx->Reshape(dedx->unitNum / dedx->GetDim(-1), dedx->GetDim(-1));
dedc->Reshape(dedc->unitNum / dedc->GetDim(-1), dedc->GetDim(-1));
GradMatrixMul(x, dedx, transX, w, dedw, transW, dedc, alpha, isEfficient);
x->Reshape(orderBackupX, dimsBackupX);
c->Reshape(orderBackupC, dimsBackupC);
if (!isEfficient || x->isGrad)
dedx->Reshape(orderBackupX, dimsBackupX);
dedc->Reshape(orderBackupC, dimsBackupC);
}
node->visitMark = NODE_FINISHED;
}
}
......@@ -200,6 +200,10 @@ private:
/* gradient for operation */
static
void GradMulAndShift(XTensor * node, bool isEfficient);
/* gradient for MLP */
static
void GradMLP(XTensor* node, bool isEfficient);
};
}
......
......@@ -121,8 +121,13 @@ void XNet::Backward(TensorList &roots)
ClearGrad(parent);
}
if(XNoder::IsLeaf(node))
if (XNoder::IsLeaf(node)) {
ClearGrad(node);
if (node->outgo.tailNum == 0) {
delete node;
}
}
}
}
}
......@@ -333,7 +338,7 @@ void XNet::ShowNetwork(FILE * file, XTensor * node)
Traverse(roots);
XLink::ShowNode(file, node);
//XLink::ShowNode(file, node);
/* go over nodes in its topological order */
for(int i = nodes.count - 1; i >= 0; i--){
......
......@@ -25,6 +25,37 @@
namespace nts{ // namespace nts(NiuTrans.Tensor)
/* if the operation exists in the list below,
we mark the input nodes as `unused` for backward */
const int unusedOPs[] {
/* math operators */
MATH_SUM, MATH_SUMDIM,
MATH_SUB, MATH_SUBDIM,
MATH_SCALE, MATH_SCALEANDSHIFT,
/* shape operators */
MOVEMENT_GATHER, SHAPE_UNSQUEEZE,
SHAPE_MERGE, SHAPE_SPLIT,
/* reduce operators */
REDUCE_REDUCESUMALL, FUNC_SOFTMAX
};
IntList unusedOPsList(&(unusedOPs[0]), sizeof(unusedOPs) / sizeof(unusedOPs[0]));
/* if the operation exists in the list below,
we mark the output as `reserved`, so the data array will be reserved */
const int usedOPs[]{
FUNC_SIGMOID, FUNC_SOFTMAX, FUNC_LOGSOFTMAX
};
IntList usedOPsList(&(usedOPs[0]), sizeof(usedOPs) / sizeof(usedOPs[0]));
/* if the operation exists in the list below,
we mark the first input node as `unused` for backward*/
const int unusedFirstOPs[]{
MATH_MULTIPLY_INPLACE, MATH_MULTIPLYDIM_INPLACE
};
IntList unusedFirstOPsList(&(unusedFirstOPs[0]), sizeof(unusedFirstOPs) / sizeof(unusedFirstOPs[0]));
int XLink::paramSize = PARAM_UNTI_SIZE;
/* constuctor */
......@@ -39,7 +70,7 @@ XLink::XLink()
typeID = 0;
caculator = NULL;
}
/* deconstructor */
XLink::~XLink()
{
......@@ -356,7 +387,24 @@ void XLink::MakeLink(const TensorList * list, XTensor * h, int id)
if(t == NULL)
continue;
income.AddTail(t);
if (unusedOPsList.Contains(id) && t->reserved != 1) {
/* it's data will be released when calling the de-constructor */
t->reserved = -1;
}
else {
/* otherwise it will be reserved for backward */
t->reserved = 1;
}
}
/* in these cases we only mark partial nodes as unused */
if (unusedFirstOPsList.Contains(id) && list->GetItem(0)->reserved != 1)
list->GetItem(0)->reserved = -1;
if (usedOPsList.Contains(id))
h->reserved = 1;
else if (h->reserved != 1)
h->reserved = -1;
/* backward */
for(int i = 0; i < list->count; i++){
......
......@@ -52,10 +52,25 @@ TensorListBase<T>::TensorListBase(int myMaxNum)
items = (T*)malloc(sizeof(T) * myMaxNum);
}
/*
constructor
>> myMaxNum - maximum number of items to keep
*/
template <typename T>
TensorListBase<T>::TensorListBase(const T* inputItems, int inputItemCount)
{
CheckNTErrors(inputItemCount > 0, "check if the input number > 0");
maxNum = inputItemCount;
count = inputItemCount;
items = (T*)malloc(sizeof(T) * inputItemCount);
memcpy(items, inputItems, inputItemCount * sizeof(T));
}
/* copy-constructor */
template<typename T>
TensorListBase<T>::TensorListBase(const TensorListBase<T>& l)
{
CheckNTErrors(l.maxNum > 0, "check if the input number > 0");
maxNum = l.maxNum;
count = l.count;
items = (T*)malloc(sizeof(T) * maxNum);
......@@ -66,10 +81,11 @@ TensorListBase<T>::TensorListBase(const TensorListBase<T>& l)
template<typename T>
TensorListBase<T>::TensorListBase(TensorListBase<T>&& l)
{
CheckNTErrors(l.maxNum > 0, "check if the input number > 0");
maxNum = l.maxNum;
count = l.count;
items = (T*)malloc(sizeof(T) * maxNum);
memcpy(items, l.items, l.count * sizeof(T));
items = l.items;
l.items = NULL;
}
/* assignment operator for a constant reference */
......@@ -299,6 +315,13 @@ inline int TensorListBase<T>::FindFirst(const T& item)
return -1;
}
/* check if an item exists in this list */
template<typename T>
bool TensorListBase<T>::Contains(const T& item)
{
return FindFirst(item) >= 0;
}
/* clear the data array */
template <typename T>
void TensorListBase<T>::Clear()
......
......@@ -57,6 +57,9 @@ public:
/* constructor */
TensorListBase(int myMaxNum);
/* constructor */
TensorListBase(const T* inputItems, int inputItemCount);
/* copy-constructor */
TensorListBase(const TensorListBase<T>& l);
......@@ -105,6 +108,9 @@ public:
/* find the position of the first matched item */
int FindFirst(const T& item);
/* check if an item exists in this list */
bool Contains(const T& item);
/* clear the data array */
void Clear();
......
......@@ -79,6 +79,10 @@ const char * GetOPName(int type)
return "M_MULTIPLY";
else if (type == MATH_MULTIPLYDIM)
return "M_MULTIPLYDIM";
else if (type == MATH_MULTIPLY_INPLACE)
return "M_MULTIPLY_I";
else if (type == MATH_MULTIPLYDIM_INPLACE)
return "M_MULTIPLYDIM_I";
else if (type == MATH_MULTIPLYBROADCAST)
return "M_MULTIPLYBROADCAST";
else if (type == MATH_NEGATE)
......@@ -97,6 +101,8 @@ const char * GetOPName(int type)
return "M_SHIFT";
else if (type == MATH_MULANDSHIFT)
return "M_OPERATION";
else if (type == MATH_MLP)
return "M_MLP";
else if (type == MATH_SIGN)
return "M_SIGN";
else if (type == MATH_SUB)
......
......@@ -59,13 +59,16 @@ namespace nts { // namespace nts(NiuTrans.Tensor)
#define MATH_MIN MATH_MAX + 1
#define MATH_MULTIPLY MATH_MIN + 1
#define MATH_MULTIPLYDIM MATH_MULTIPLY + 1
#define MATH_MULTIPLYBROADCAST MATH_MULTIPLYDIM + 1
#define MATH_MULTIPLY_INPLACE MATH_MULTIPLYDIM + 1
#define MATH_MULTIPLYDIM_INPLACE MATH_MULTIPLY_INPLACE + 1
#define MATH_MULTIPLYBROADCAST MATH_MULTIPLYDIM_INPLACE + 1
#define MATH_NEGATE MATH_MULTIPLYBROADCAST + 1
#define MATH_NORMALIZE MATH_NEGATE + 1
#define MATH_POWER MATH_NORMALIZE + 1
#define MATH_SCALEANDSHIFT MATH_POWER + 1
#define MATH_MULANDSHIFT MATH_SCALEANDSHIFT + 1
#define MATH_SCALE MATH_MULANDSHIFT + 1
#define MATH_MLP MATH_MULANDSHIFT + 1
#define MATH_SCALE MATH_MLP + 1
#define MATH_DESCALE MATH_SCALE + 1
#define MATH_SHIFT MATH_DESCALE + 1
#define MATH_MOD MATH_SHIFT + 1
......
......@@ -190,10 +190,11 @@ where i is the index of the item
>> a - tensor a
>> b - tensor b
>> inplace - indicates whether the result will be placed in the input tensor
>> leadingDim - the dimension along which we perform broadcasting
<< return - the product of the tensors
*/
XTensor Multiply(const XTensor &a, const XTensor &b, int leadingDim)
XTensor Multiply(const XTensor &a, const XTensor &b, bool inplace, int leadingDim)
{
XTensor c(&a);
c.SetTMPFlag();
......@@ -214,7 +215,10 @@ XTensor Multiply(const XTensor &a, const XTensor &b, int leadingDim)
/* tensor connections */
if (a.enableGrad && b.enableGrad) {
XLink::MakeLink(&a, &b, &c, MATH_MULTIPLY);
if(inplace == false)
XLink::MakeLink(&a, &b, &c, MATH_MULTIPLY);
else
XLink::MakeLink(&a, &b, &c, MATH_MULTIPLY_INPLACE);
}
}
else if(n >= 0 && n < a.order){
......@@ -223,7 +227,10 @@ XTensor Multiply(const XTensor &a, const XTensor &b, int leadingDim)
/* tensor connections */
if (a.enableGrad && b.enableGrad) {
XLink::MakeLink(&a, &b, &c, MATH_MULTIPLYDIM);
if (inplace == false)
XLink::MakeLink(&a, &b, &c, MATH_MULTIPLYDIM);
else
XLink::MakeLink(&a, &b, &c, MATH_MULTIPLYDIM_INPLACE);
XLink::AddParamToHeadInt(&c, n);
}
}
......
......@@ -48,7 +48,7 @@ make a new tensor to keep the result and return it
c(i) = a(i)*b(i)
where i is the index of the element
*/
XTensor Multiply(const XTensor &a, const XTensor &b, int leadingDim = 0);
XTensor Multiply(const XTensor &a, const XTensor &b, bool inplace = false, int leadingDim = 0);
/*
element-wise product of two tensors:
......
......@@ -165,15 +165,10 @@ XTensor Gather(XTensor &s, XTensor &index)
memcpy(dims, index.dimSize, index.order * sizeof(int));
dims[index.order] = t.GetDim(-1);
XTensor tt;
tt = Reshape(t, index.order + 1, dims);
t.Reshape(index.order + 1, dims);
delete[] dims;
return tt;
}
else {
return t;
}
return t;
}
} // namespace nts(NiuTrans.Tensor)
\ No newline at end of file
......@@ -140,11 +140,12 @@ the same inference procedure as that with no use of dropout on the test data.
>> x - input tensor
>> dropProb - probability to set an element to zero
>> inplace - indicates whether the result will be placed in the input tensor
>> leadingDim - the dimension which we generate the random numbers and perform broadcasting
>> leadingDim2 - another dimension which we generate the random numbers and perform broadcasting
<< return - tensor after dropout
*/
XTensor Dropout(const XTensor &x, DTYPE dropProb, int leadingDim, int leadingDim2)
XTensor Dropout(const XTensor &x, DTYPE dropProb, bool inplace, int leadingDim, int leadingDim2)
{
CheckNTErrors(dropProb >= 0.0 && dropProb <= 1.0, "The probability must be 0-1!");
......@@ -158,7 +159,7 @@ XTensor Dropout(const XTensor &x, DTYPE dropProb, int leadingDim, int leadingDim
_SetDataRandP(&mask, 0, 1.0F, dropProb, scaleFactor);
return Multiply(x, mask);
return Multiply(x, mask, inplace);
/* dropout with index */
/*int unitNum = floor(x.unitNum*dropProb);
......
......@@ -41,7 +41,7 @@ void _DropoutBackward(const XTensor * y, const XTensor * x,
unsigned int seed, DTYPE dropProb, int leadingDim = -1);
/* dropout function */
XTensor Dropout(const XTensor &x, DTYPE dropProb, int leadingDim = -1, int leadingDim2 = -1);
XTensor Dropout(const XTensor &x, DTYPE dropProb, bool inplace = false, int leadingDim = -1, int leadingDim2 = -1);
/* dropout function without broadcast */
XTensor DropoutWithoutBroadcast(const XTensor &x, DTYPE dropProb);
......
Markdown 格式
0%
您添加了 0 到此讨论。请谨慎行事。
请先完成此评论的编辑!
注册 或者 后发表评论