Commit 93f51095 by liyinqiao

Bug fixed

1. Merge with Huchi branch (replace all requireLink with enableGrad that allows gradient computation for a tensor);
2. Update the global memory size (This may make the memory size a little bit larger than the old version).
parent 4040dde0
...@@ -68,6 +68,9 @@ void BackwardTest() ...@@ -68,6 +68,9 @@ void BackwardTest()
XTensor a; XTensor a;
XTensor b; XTensor b;
XTensor c; XTensor c;
a.enableGrad = true;
b.enableGrad = false;
c.enableGrad = false;
XTensor mean; XTensor mean;
XTensor origin; XTensor origin;
InitTensor2D(&a, 2, 3); InitTensor2D(&a, 2, 3);
...@@ -85,14 +88,15 @@ void BackwardTest() ...@@ -85,14 +88,15 @@ void BackwardTest()
b.Set1D(2.0F, 0); b.Set1D(2.0F, 0);
b.Set1D(1.0F, 1); b.Set1D(1.0F, 1);
c = DivDim(a, b, 0); DivDim(a, b, c, 0);
c.Dump(stderr, "c:"); c.Dump(stderr, "c:");
auto loss = CrossEntropy(c, a);
//XLink::ShowNetwork(stderr, &c); //XLink::ShowNetwork(stderr, &c);
net.Backward(c); net.Backward(loss);
net.Dump(stderr); a.grad->Dump(stderr);
} }
......
...@@ -765,15 +765,15 @@ void XMathGrad::GradMultiplyDim(XTensor * node, bool isEfficient) ...@@ -765,15 +765,15 @@ void XMathGrad::GradMultiplyDim(XTensor * node, bool isEfficient)
/* dE/da */ /* dE/da */
_MultiplyDim(node->grad, b, a->grad, n, 1.0F); _MultiplyDim(node->grad, b, a->grad, n, 1.0F);
/* dE/db */ /* dE/db */
int order = a->order; int order = a->order;
int dimSize[MAX_TENSOR_DIM_NUM]; int dimSize[MAX_TENSOR_DIM_NUM];
memcpy(dimSize, a->dimSize, sizeof(int) * a->order); memcpy(dimSize, a->dimSize, sizeof(int) * a->order);
XTensor * bGradTMP = NewTensorBuf(node->grad, node->devID, node->mem); XTensor * bGradTMP = NewTensorBuf(node->grad, node->devID, node->mem);
_Multiply(node->grad, a, bGradTMP); _Multiply(node->grad, a, bGradTMP);
if(n == order - 1){ if(n == order - 1){
int reshapedSize[MAX_TENSOR_DIM_NUM]; int reshapedSize[MAX_TENSOR_DIM_NUM];
reshapedSize[0] = a->unitNum/dimSize[order - 1]; reshapedSize[0] = a->unitNum/dimSize[order - 1];
...@@ -1078,91 +1078,91 @@ dE/db = - dE/dc * b.reduce(0,...,n-1,n+1,...) * \beta ...@@ -1078,91 +1078,91 @@ dE/db = - dE/dc * b.reduce(0,...,n-1,n+1,...) * \beta
*/ */
void XMathGrad::GradSubDim(XTensor * node, bool isEfficient) void XMathGrad::GradSubDim(XTensor * node, bool isEfficient)
{ {
XLink &income = node->income; XLink &income = node->income;
CheckNTErrors(income.tailNum == 2, "Wrong input tensor number for SUBDIM!"); CheckNTErrors(income.tailNum == 2, "Wrong input tensor number for SUBDIM!");
XTensor * a = income.tails[0]; XTensor * a = income.tails[0];
XTensor * b = income.tails[1]; XTensor * b = income.tails[1];
int n = income.GetParamInt(0); int n = income.GetParamInt(0);
DTYPE beta = income.GetParam(1); DTYPE beta = income.GetParam(1);
XNoder::MakeGrad(a); XNoder::MakeGrad(a);
XNoder::MakeGrad(b); XNoder::MakeGrad(b);
_Sum(a->grad, node->grad, a->grad); _Sum(a->grad, node->grad, a->grad);
int order = a->order; int order = a->order;
int dimSize[MAX_TENSOR_DIM_NUM]; int dimSize[MAX_TENSOR_DIM_NUM];
memcpy(dimSize, a->dimSize, sizeof(int) * a->order); memcpy(dimSize, a->dimSize, sizeof(int) * a->order);
if(n == order - 1){ if(n == order - 1){
int reshapedSize[MAX_TENSOR_DIM_NUM]; int reshapedSize[MAX_TENSOR_DIM_NUM];
reshapedSize[0] = a->unitNum / dimSize[order - 1]; reshapedSize[0] = a->unitNum / dimSize[order - 1];
reshapedSize[1] = dimSize[order - 1]; reshapedSize[1] = dimSize[order - 1];
/* we reshape dE/dc to a matrix whose column number is equal to the /* we reshape dE/dc to a matrix whose column number is equal to the
size of b. Then we can reduce the matrix into a row vector. */ size of b. Then we can reduce the matrix into a row vector. */
node->grad->Reshape(2, reshapedSize); node->grad->Reshape(2, reshapedSize);
//if(b->outgo.tailNum > 1){ //if(b->outgo.tailNum > 1){
XTensor * bGradTMP = NewTensorBuf(b->grad, b->devID, b->mem); XTensor * bGradTMP = NewTensorBuf(b->grad, b->devID, b->mem);
_ReduceSum(node->grad, bGradTMP, 0); _ReduceSum(node->grad, bGradTMP, 0);
if(beta != 1.0F) if(beta != 1.0F)
_ScaleAndShiftMe(bGradTMP, beta); _ScaleAndShiftMe(bGradTMP, beta);
_Sub(b->grad, bGradTMP, b->grad); _Sub(b->grad, bGradTMP, b->grad);
DelTensorBuf(bGradTMP); DelTensorBuf(bGradTMP);
/*} /*}
else{ else{
_ReduceSum(node->grad, b->grad, 0); _ReduceSum(node->grad, b->grad, 0);
if(beta != 1.0F) if(beta != 1.0F)
_ScaleAndShiftMe(b->grad, beta); _ScaleAndShiftMe(b->grad, beta);
_ScaleAndShiftMe(b->grad, -1.0F); _ScaleAndShiftMe(b->grad, -1.0F);
}*/ }*/
node->grad->Reshape(order, dimSize); node->grad->Reshape(order, dimSize);
} }
else{ else{
int reshapedSize[MAX_TENSOR_DIM_NUM]; int reshapedSize[MAX_TENSOR_DIM_NUM];
reshapedSize[0] = 1; reshapedSize[0] = 1;
reshapedSize[1] = dimSize[n]; reshapedSize[1] = dimSize[n];
reshapedSize[2] = 1; reshapedSize[2] = 1;
for(int i = 0; i < order; i++){ for(int i = 0; i < order; i++){
if(i < n) if(i < n)
reshapedSize[0] *= dimSize[i]; reshapedSize[0] *= dimSize[i];
} }
reshapedSize[2] = a->unitNum / (reshapedSize[0] * reshapedSize[1]); reshapedSize[2] = a->unitNum / (reshapedSize[0] * reshapedSize[1]);
/* we reshape dE/dc to a 3D tensor of size (x, y, z) where y = |b|. /* we reshape dE/dc to a 3D tensor of size (x, y, z) where y = |b|.
Then reduce along with z and x to obtain dE/db. */ Then reduce along with z and x to obtain dE/db. */
node->grad->Reshape(3, reshapedSize); node->grad->Reshape(3, reshapedSize);
XTensor * interGrad = NewTensorBuf(2, reshapedSize, b->dataType, b->denseRatio, b->devID, b->mem); XTensor * interGrad = NewTensorBuf(2, reshapedSize, b->dataType, b->denseRatio, b->devID, b->mem);
_ReduceSum(node->grad, interGrad, 2); _ReduceSum(node->grad, interGrad, 2);
//if(b->outgo.tailNum > 1){ //if(b->outgo.tailNum > 1){
XTensor * bGradTMP = NewTensorBuf(b->grad, b->devID, b->mem); XTensor * bGradTMP = NewTensorBuf(b->grad, b->devID, b->mem);
_ReduceSum(interGrad, bGradTMP, 0); _ReduceSum(interGrad, bGradTMP, 0);
if(beta != 1.0F) if(beta != 1.0F)
_ScaleAndShiftMe(bGradTMP, beta); _ScaleAndShiftMe(bGradTMP, beta);
_Sub(b->grad, bGradTMP, b->grad); _Sub(b->grad, bGradTMP, b->grad);
DelTensorBuf(bGradTMP); DelTensorBuf(bGradTMP);
/*} /*}
else{ else{
_ReduceSum(interGrad, b->grad, 0); _ReduceSum(interGrad, b->grad, 0);
if(beta != 1.0F) if(beta != 1.0F)
_ScaleAndShiftMe(b->grad, beta); _ScaleAndShiftMe(b->grad, beta);
_ScaleAndShiftMe(b->grad, -1.0F); _ScaleAndShiftMe(b->grad, -1.0F);
}*/ }*/
node->grad->Reshape(order, dimSize); node->grad->Reshape(order, dimSize);
DelTensorBuf(interGrad); DelTensorBuf(interGrad);
} }
node->visitMark = NODE_FINISHED; node->visitMark = NODE_FINISHED;
} }
/* /*
......
...@@ -146,10 +146,10 @@ private: ...@@ -146,10 +146,10 @@ private:
static static
void GradSub(XTensor * node, bool isEfficient); void GradSub(XTensor * node, bool isEfficient);
/* gradient for sub with one dimension: c = a - b * \beta /* gradient for sub with one dimension: c = a - b * \beta
where the size of b is equal to that of one dimension of a */ where the size of b is equal to that of one dimension of a */
static static
void GradSubDim(XTensor * node, bool isEfficient); void GradSubDim(XTensor * node, bool isEfficient);
/* gradient for sum: c = a + b * \beta */ /* gradient for sum: c = a + b * \beta */
static static
......
...@@ -450,7 +450,7 @@ void XShapeGrad::GradSplitListPost(XTensor * node, bool isEfficient) ...@@ -450,7 +450,7 @@ void XShapeGrad::GradSplitListPost(XTensor * node, bool isEfficient)
if(income.typeID == SHAPE_SPLIT_LIST){ if(income.typeID == SHAPE_SPLIT_LIST){
int w = income.GetParamInt(0); int w = income.GetParamInt(0);
int splitID = income.GetParamInt(1); int splitID = income.GetParamInt(1);
if(whereToSplit < 0) if(whereToSplit < 0)
whereToSplit = w; whereToSplit = w;
splitNum++; splitNum++;
......
...@@ -267,7 +267,7 @@ void XNet::BackwardNode(XTensor * node, bool isEfficent) ...@@ -267,7 +267,7 @@ void XNet::BackwardNode(XTensor * node, bool isEfficent)
else if(XShapeGrad::IsShapeOP(node)) else if(XShapeGrad::IsShapeOP(node))
XShapeGrad::MakeGrad(node, isEfficent); XShapeGrad::MakeGrad(node, isEfficent);
else if(XLossGrad::IsLossOP(node)) else if(XLossGrad::IsLossOP(node))
XLossGrad::MakeGrad(node, isEfficent); XLossGrad::MakeGrad(node, isEfficent);
else{ else{
ShowNTErrors("Wrong node type!"); ShowNTErrors("Wrong node type!");
} }
...@@ -468,7 +468,7 @@ search for a node in a top-down manner by its name ...@@ -468,7 +468,7 @@ search for a node in a top-down manner by its name
*/ */
//XTensor * XNet::SearchNode(XTensor * top, const char * name) //XTensor * XNet::SearchNode(XTensor * top, const char * name)
//{ //{
//return XLink::SearchNode(top, name); //return XLink::SearchNode(top, name);
//} //}
} }
...@@ -475,12 +475,12 @@ void Train(const char * train, bool isShuffled, FNNModel &model) ...@@ -475,12 +475,12 @@ void Train(const char * train, bool isShuffled, FNNModel &model)
Clear(model, true); Clear(model, true);
/* forward + backward process */ /* forward + backward process */
/* this is implemented by gather function */ /* this is implemented by gather function */
ForwardAutoDiff(ngrams, ngramNum, output, model); ForwardAutoDiff(ngrams, ngramNum, output, model);
/* this is implemented by multiply function */ /* this is implemented by multiply function */
//ForwardAutoDiff(inputs, output, model); //ForwardAutoDiff(inputs, output, model);
lossTensor = CrossEntropy(output, gold); lossTensor = CrossEntropy(output, gold);
/* automatic differentiation */ /* automatic differentiation */
...@@ -1168,12 +1168,12 @@ void Test(const char * test, const char * result, FNNModel &model) ...@@ -1168,12 +1168,12 @@ void Test(const char * test, const char * result, FNNModel &model)
/* forward computation */ /* forward computation */
Forward(inputs, output, model, net); Forward(inputs, output, model, net);
} }
else { else {
/* this is implemented by gather function */ /* this is implemented by gather function */
ForwardAutoDiff(ngrams, ngramNum, output, model); ForwardAutoDiff(ngrams, ngramNum, output, model);
/* this is implemented by multiply function */ /* this is implemented by multiply function */
//ForwardAutoDiff(inputs, output, model); //ForwardAutoDiff(inputs, output, model);
} }
/* prediction probabilities */ /* prediction probabilities */
......
...@@ -58,7 +58,7 @@ public: ...@@ -58,7 +58,7 @@ public:
XTensor wa; XTensor wa;
XTensor wbig; XTensor wbig;
/* size of transformed Q and K */ /* size of transformed Q and K */
int dk; int dk;
......
...@@ -86,7 +86,7 @@ struct SampleNode ...@@ -86,7 +86,7 @@ struct SampleNode
int * p; int * p;
int size; int size;
int value; int value;
int key; int key;
}; };
int CompareSampleNode(const void * a, const void * b) int CompareSampleNode(const void * a, const void * b)
......
...@@ -297,12 +297,12 @@ void T2TSearch::Generate(T2TStateBundle * beam) ...@@ -297,12 +297,12 @@ void T2TSearch::Generate(T2TStateBundle * beam)
row means a previous state. The column number is size-of-beam \times vocab-size. We, row means a previous state. The column number is size-of-beam \times vocab-size. We,
therefore, divide entries of the top-k index by vocab-size to compute the id of the therefore, divide entries of the top-k index by vocab-size to compute the id of the
previous state for each hypothesis in the top-k list. */ previous state for each hypothesis in the top-k list. */
_DescaleMe(preID, sizeVocab); Descale(preID, sizeVocab);
/* Then, we do something similar to "preID". For the top-k predictions, we need /* Then, we do something similar to "preID". For the top-k predictions, we need
to know their indices in the vocabulary. We compute the offset of each prediction to know their indices in the vocabulary. We compute the offset of each prediction
in the vocabulary by dividing it with vocab-size and computing the remainder. */ in the vocabulary by dividing it with vocab-size and computing the remainder. */
_ModMe(index, sizeVocab); ModMe(index, sizeVocab);
score.Reshape(order, dims); score.Reshape(order, dims);
......
...@@ -90,7 +90,7 @@ template <typename T> ...@@ -90,7 +90,7 @@ template <typename T>
void TensorListBase<T>::Add(T&& item) void TensorListBase<T>::Add(T&& item)
{ {
if (count == maxNum) { if (count == maxNum) {
T* newItems; T* newItems;
if (mem == NULL) if (mem == NULL)
newItems = new T[maxNum * 2 + 1]; newItems = new T[maxNum * 2 + 1];
...@@ -101,7 +101,6 @@ void TensorListBase<T>::Add(T&& item) ...@@ -101,7 +101,6 @@ void TensorListBase<T>::Add(T&& item)
maxNum = maxNum * 2 + 1; maxNum = maxNum * 2 + 1;
} }
items[count++] = item; items[count++] = item;
} }
/* /*
...@@ -111,18 +110,18 @@ add an item into the list ...@@ -111,18 +110,18 @@ add an item into the list
template <typename T> template <typename T>
void TensorListBase<T>::Add(const T& item) void TensorListBase<T>::Add(const T& item)
{ {
if (count == maxNum) { if (count == maxNum) {
T* newItems; T* newItems;
if (mem == NULL) if (mem == NULL)
newItems = new T[maxNum * 2 + 1]; newItems = new T[maxNum * 2 + 1];
else else
newItems = (T*)mem->Alloc(mem->devID, sizeof(T) * (maxNum * 2 + 1)); newItems = (T*)mem->Alloc(mem->devID, sizeof(T) * (maxNum * 2 + 1));
memcpy(newItems, items, sizeof(T) * maxNum); memcpy(newItems, items, sizeof(T) * maxNum);
items = newItems; items = newItems;
maxNum = maxNum * 2 + 1; maxNum = maxNum * 2 + 1;
} }
items[count++] = item; items[count++] = item;
} }
/* /*
...@@ -186,21 +185,21 @@ void TensorListBase<T>::Insert(int pos, const T& item) ...@@ -186,21 +185,21 @@ void TensorListBase<T>::Insert(int pos, const T& item)
template<typename T> template<typename T>
void TensorListBase<T>::Insert(int pos, T&& item) void TensorListBase<T>::Insert(int pos, T&& item)
{ {
if (count == maxNum) { if (count == maxNum) {
T* newItems; T* newItems;
if (mem == NULL) if (mem == NULL)
newItems = new T[maxNum * 2 + 1]; newItems = new T[maxNum * 2 + 1];
else else
newItems = (T*)mem->Alloc(mem->devID, sizeof(T) * (maxNum * 2 + 1)); newItems = (T*)mem->Alloc(mem->devID, sizeof(T) * (maxNum * 2 + 1));
memcpy(newItems, items, sizeof(T) * maxNum); memcpy(newItems, items, sizeof(T) * maxNum);
items = newItems; items = newItems;
maxNum = maxNum * 2 + 1; maxNum = maxNum * 2 + 1;
} }
for (int i = count - 1; i >= pos; i--) for (int i = count - 1; i >= pos; i--)
items[i + 1] = items[i]; items[i + 1] = items[i];
items[pos] = item; items[pos] = item;
count++; count++;
} }
/* get the item at position i */ /* get the item at position i */
...@@ -226,8 +225,8 @@ inline void TensorListBase<T>::SetItem(int i, const T& item) ...@@ -226,8 +225,8 @@ inline void TensorListBase<T>::SetItem(int i, const T& item)
template<typename T> template<typename T>
inline void TensorListBase<T>::SetItem(int i, T&& item) inline void TensorListBase<T>::SetItem(int i, T&& item)
{ {
if (i >= 0 && i < count) if (i >= 0 && i < count)
items[i] = std::move(item); items[i] = std::move(item);
} }
/* /*
...@@ -250,7 +249,7 @@ inline int TensorListBase<T>::FindFirst(const T& item) ...@@ -250,7 +249,7 @@ inline int TensorListBase<T>::FindFirst(const T& item)
template <typename T> template <typename T>
void TensorListBase<T>::Clear() void TensorListBase<T>::Clear()
{ {
count = 0; count = 0;
} }
/* /*
......
...@@ -32,7 +32,7 @@ ...@@ -32,7 +32,7 @@
/* the nts (NiuTrans.Tensor) namespace */ /* the nts (NiuTrans.Tensor) namespace */
namespace nts { namespace nts {
/* the TensorListBase class */ /* the TensorListBase class */
template <typename T> template <typename T>
struct TensorListBase { struct TensorListBase {
...@@ -66,57 +66,57 @@ public: ...@@ -66,57 +66,57 @@ public:
/* add an item into the list */ /* add an item into the list */
void Add(T&& item); void Add(T&& item);
/* add an item into the list */ /* add an item into the list */
void Add(const T& item); void Add(const T& item);
/* add a number of items into the list */ /* add a number of items into the list */
void Add(T* inputItems, int inputItemCount); void Add(T* inputItems, int inputItemCount);
/* append a list to the current list */ /* append a list to the current list */
void AddList(TensorListBase* l); void AddList(TensorListBase* l);
/* insert an item to the given position of the list */ /* insert an item to the given position of the list */
void Insert(int pos, const T& item); void Insert(int pos, const T& item);
/* insert an item to the given position of the list */ /* insert an item to the given position of the list */
void Insert(int pos, T&& item); void Insert(int pos, T&& item);
/* get the item at position i */ /* get the item at position i */
T& GetItem(int i) const; T& GetItem(int i) const;
/* set the item at position i */ /* set the item at position i */
void SetItem(int i, const T& item); void SetItem(int i, const T& item);
/* set the item at position i */ /* set the item at position i */
void SetItem(int i, T&& item); void SetItem(int i, T&& item);
/* find the position of the first matched item */ /* find the position of the first matched item */
int FindFirst(const T& item); int FindFirst(const T& item);
/* clear the data array */ /* clear the data array */
void Clear(); void Clear();
/* sort the list */ /* sort the list */
void Sort(int itemSize); void Sort(int itemSize);
/* reverse the list */ /* reverse the list */
void Reverse(); void Reverse();
/* remove the item at position i */ /* remove the item at position i */
void Remove(int i); void Remove(int i);
/* copy the list */ /* copy the list */
TensorListBase* Copy(XMem* myMem); TensorListBase* Copy(XMem* myMem);
/* shuffle the list */ /* shuffle the list */
void Shuffle(int nround = 10, int beg = -1, int len = 0); void Shuffle(int nround = 10, int beg = -1, int len = 0);
/* short */ /* short */
T& operator[] (int i) { T& operator[] (int i) {
return GetItem(i); return GetItem(i);
}; };
T& Get(int i) { return GetItem(i); }; T& Get(int i) { return GetItem(i); };
void Set(int i, T item) { SetItem(i, item); }; void Set(int i, T item) { SetItem(i, item); };
}; };
struct XTensor; struct XTensor;
......
...@@ -307,7 +307,7 @@ void XMem::SetComputationMode(bool myIsForComputation) ...@@ -307,7 +307,7 @@ void XMem::SetComputationMode(bool myIsForComputation)
cublasDestroy(cublasHandle); cublasDestroy(cublasHandle);
if(myIsForComputation) if(myIsForComputation)
CheckNTErrors((enum curandStatus)cublasCreate(&cublasHandle) == CURAND_STATUS_SUCCESS, CheckNTErrors((enum curandStatus)cublasCreate(&cublasHandle) == CURAND_STATUS_SUCCESS,
"Cannot create the cublas handle."); "Cannot create the cublas handle.");
SetDevice(devIDBackup); SetDevice(devIDBackup);
#endif #endif
...@@ -323,11 +323,11 @@ void XMem::SetIndex(INT_64 indexSize, MTYPE minSizeFirst, int minSizeNum) ...@@ -323,11 +323,11 @@ void XMem::SetIndex(INT_64 indexSize, MTYPE minSizeFirst, int minSizeNum)
{ {
delete[] memIndex; delete[] memIndex;
delete[] memIndex2; delete[] memIndex2;
delete[] minSizeIndex; delete[] minSizeIndex;
nodeNum = indexSize; nodeNum = indexSize;
nodeNumUsed = minSizeNum * 2; nodeNumUsed = minSizeNum * 2;
indexEntryNum = minSizeNum; indexEntryNum = minSizeNum;
memIndex = new MPieceNode[nodeNum]; memIndex = new MPieceNode[nodeNum];
memset(memIndex, 0, sizeof(MPieceNode) * nodeNum); memset(memIndex, 0, sizeof(MPieceNode) * nodeNum);
...@@ -335,12 +335,12 @@ void XMem::SetIndex(INT_64 indexSize, MTYPE minSizeFirst, int minSizeNum) ...@@ -335,12 +335,12 @@ void XMem::SetIndex(INT_64 indexSize, MTYPE minSizeFirst, int minSizeNum)
memIndex2 = new MPieceNode[nodeNum]; memIndex2 = new MPieceNode[nodeNum];
memset(memIndex2, 0, sizeof(MPieceNode) * nodeNum); memset(memIndex2, 0, sizeof(MPieceNode) * nodeNum);
minSizeIndex = new MTYPE[indexEntryNum]; minSizeIndex = new MTYPE[indexEntryNum];
memset(minSizeIndex, 0, sizeof(MTYPE) * indexEntryNum); memset(minSizeIndex, 0, sizeof(MTYPE) * indexEntryNum);
minSizeIndex[0] = minSizeFirst; minSizeIndex[0] = minSizeFirst;
for(int i = 1; i < indexEntryNum; i++) for(int i = 1; i < indexEntryNum; i++)
minSizeIndex[i] = minSizeIndex[i - 1] * 2; minSizeIndex[i] = minSizeIndex[i - 1] * 2;
indexOffset = GetMSB(minSizeFirst); indexOffset = GetMSB(minSizeFirst);
} }
...@@ -759,8 +759,8 @@ void * XMem::AllocStandard(int myDevID, MTYPE mySize, bool myIsRebuiltIndex) ...@@ -759,8 +759,8 @@ void * XMem::AllocStandard(int myDevID, MTYPE mySize, bool myIsRebuiltIndex)
/* if all index nodes are used, we rebuild the index to release the nodes that are free */ /* if all index nodes are used, we rebuild the index to release the nodes that are free */
if(nodeNumUsed == nodeNum){ if(nodeNumUsed == nodeNum){
RebuildIndex(); RebuildIndex();
CheckNTErrors(nodeNumUsed < nodeNum, "No enough index nodes for the memory pool!"); CheckNTErrors(nodeNumUsed < nodeNum, "No enough index nodes for the memory pool!");
} }
/*if(testxmemid == 30){ /*if(testxmemid == 30){
...@@ -963,8 +963,8 @@ release a piece of memory as "free" ...@@ -963,8 +963,8 @@ release a piece of memory as "free"
*/ */
void XMem::ReleaseStandard(int myDevID, void * p, MTYPE size) void XMem::ReleaseStandard(int myDevID, void * p, MTYPE size)
{ {
if(p == NULL) if(p == NULL)
return; return;
if(size <= minSizeIndex[0]) if(size <= minSizeIndex[0])
size = minSizeIndex[0]; size = minSizeIndex[0];
...@@ -1094,7 +1094,7 @@ void XMem::RebuildIndex() ...@@ -1094,7 +1094,7 @@ void XMem::RebuildIndex()
block->mem = NULL; block->mem = NULL;
} }
else{ else{
/* if the block is in use, we build the index */ /* if the block is in use, we build the index */
int pieceCount = 0; int pieceCount = 0;
MTYPE size = 0; MTYPE size = 0;
MHeader * newLast = NULL; MHeader * newLast = NULL;
...@@ -1562,9 +1562,9 @@ void XMemManager::GetBufferSize(MTYPE freeMem, MTYPE * myBufSize) ...@@ -1562,9 +1562,9 @@ void XMemManager::GetBufferSize(MTYPE freeMem, MTYPE * myBufSize)
if (freeMem >= MILLION * 512){ if (freeMem >= MILLION * 512){
*myBufSize = MILLION * 128; *myBufSize = MILLION * 128;
if (freeMem >= MILLION * 1024) { if (freeMem >= MILLION * 1024) {
*myBufSize = MILLION * 128; *myBufSize = MILLION * 256;
if (freeMem >= MILLION * 2048) if (freeMem >= MILLION * 2048)
*myBufSize = MILLION * 128; *myBufSize = MILLION * 512;
} }
} }
} }
......
...@@ -189,7 +189,7 @@ void XQueue::RunJobConsumer(int jobDevID) ...@@ -189,7 +189,7 @@ void XQueue::RunJobConsumer(int jobDevID)
isJobQueue = true; isJobQueue = true;
jobDequeuerArgs->Clear(); jobDequeuerArgs->Clear();
// warning: this may cause unknown error // warning: this may cause unknown error
jobDequeuerArgs->Add((XTensor*)this); jobDequeuerArgs->Add((XTensor*)this);
jobDequeuerArgs->Add(jobDevID >= 0 ? (XTensor*)(devids + jobDevID) : (XTensor*)&cpuid); jobDequeuerArgs->Add(jobDevID >= 0 ? (XTensor*)(devids + jobDevID) : (XTensor*)&cpuid);
......
...@@ -101,7 +101,7 @@ XTensor::XTensor(const XTensor * reference) ...@@ -101,7 +101,7 @@ XTensor::XTensor(const XTensor * reference)
SetDataPointer(); SetDataPointer();
id = MakeTensorID(); id = MakeTensorID();
InitTensorV2(this, reference); InitTensor(this, reference);
} }
/* /*
...@@ -173,7 +173,7 @@ XTensor::XTensor(const XTensor &reference) ...@@ -173,7 +173,7 @@ XTensor::XTensor(const XTensor &reference)
else{ else{
devID = reference.devID; devID = reference.devID;
mem = reference.mem; mem = reference.mem;
InitTensorV2(this, &reference); InitTensor(this, &reference);
_CopyValues(&reference, this); _CopyValues(&reference, this);
} }
...@@ -279,6 +279,7 @@ void XTensor::Init() ...@@ -279,6 +279,7 @@ void XTensor::Init()
isTmp = false; isTmp = false;
isGrad = false; isGrad = false;
isVar = false; isVar = false;
enableGrad = false;
visitMark = 0; visitMark = 0;
grad = NULL; grad = NULL;
} }
...@@ -309,6 +310,7 @@ void XTensor::ShallowCopy(const XTensor &tensor) ...@@ -309,6 +310,7 @@ void XTensor::ShallowCopy(const XTensor &tensor)
{ {
strcpy(name, tensor.name); strcpy(name, tensor.name);
order = tensor.order; order = tensor.order;
enableGrad = tensor.enableGrad;
memcpy(dimSize, tensor.dimSize, sizeof(int) * MAX_TENSOR_DIM_NUM); memcpy(dimSize, tensor.dimSize, sizeof(int) * MAX_TENSOR_DIM_NUM);
memcpy(dimSizeRDI, tensor.dimSizeRDI, sizeof(int) * MAX_TENSOR_DIM_NUM); memcpy(dimSizeRDI, tensor.dimSizeRDI, sizeof(int) * MAX_TENSOR_DIM_NUM);
dataType = tensor.dataType; dataType = tensor.dataType;
...@@ -1315,7 +1317,7 @@ set the value of a cell ...@@ -1315,7 +1317,7 @@ set the value of a cell
*/ */
bool XTensor::Set(DTYPE value, int index[], int size) bool XTensor::Set(DTYPE value, int index[], int size)
{ {
CheckNTErrors(dataType == DEFAULT_DTYPE, "The tensor is not in default type."); CheckNTErrors(dataType == DEFAULT_DTYPE, "The tensor is not in default type.");
return SetToDevice(devID, GetCell(index, size), value); return SetToDevice(devID, GetCell(index, size), value);
} }
...@@ -2445,6 +2447,7 @@ void InitTensor(XTensor * tensor, const XTensor * reference) ...@@ -2445,6 +2447,7 @@ void InitTensor(XTensor * tensor, const XTensor * reference)
if(reference->order < 0) if(reference->order < 0)
return; return;
tensor->enableGrad = reference->enableGrad;
InitTensor(tensor, reference->order, reference->dimSize, InitTensor(tensor, reference->order, reference->dimSize,
reference->dataType, reference->denseRatio, reference->dataType, reference->denseRatio,
reference->devID, reference->mem); reference->devID, reference->mem);
...@@ -2460,6 +2463,7 @@ void InitTensorV2(XTensor * tensor, const XTensor * reference) ...@@ -2460,6 +2463,7 @@ void InitTensorV2(XTensor * tensor, const XTensor * reference)
if(reference->order < 0) if(reference->order < 0)
return; return;
tensor->enableGrad = reference->enableGrad;
InitTensorV2(tensor, reference->order, reference->dimSize, InitTensorV2(tensor, reference->order, reference->dimSize,
reference->dataType, reference->devID); reference->dataType, reference->devID);
} }
...@@ -2474,6 +2478,7 @@ void InitTensorOnCPU(XTensor * tensor, const XTensor * reference) ...@@ -2474,6 +2478,7 @@ void InitTensorOnCPU(XTensor * tensor, const XTensor * reference)
if(reference->order < 0) if(reference->order < 0)
return; return;
tensor->enableGrad = reference->enableGrad;
InitTensorV2(tensor, reference->order, reference->dimSize, InitTensorV2(tensor, reference->order, reference->dimSize,
reference->dataType, -1); reference->dataType, -1);
} }
......
...@@ -151,6 +151,9 @@ public: ...@@ -151,6 +151,9 @@ public:
/* indicates whether the tensor keeps the gradient when used as model parameters */ /* indicates whether the tensor keeps the gradient when used as model parameters */
bool isGrad; bool isGrad;
/* indicates whether the gradient of the tensor should be computed */
bool enableGrad;
/* indicates whether the tensor is used as paramters (or variables) */ /* indicates whether the tensor is used as paramters (or variables) */
bool isVar; bool isVar;
...@@ -450,7 +453,7 @@ extern int MakeTensorID(); ...@@ -450,7 +453,7 @@ extern int MakeTensorID();
void InitTensor(XTensor * tensor, void InitTensor(XTensor * tensor,
const int myOrder, const int * myDimSize, const TENSOR_DATA_TYPE myDataType = X_FLOAT, const int myOrder, const int * myDimSize, const TENSOR_DATA_TYPE myDataType = X_FLOAT,
const float myDenseRatio = 1.0F, const int myDevID = -1, XMem * myMem = NULL); const float myDenseRatio = 1.0F, const int myDevID = -1, XMem * myMem = NULL);
/* initialize a dense XTensor V2 */ /* initialize a dense XTensor V2 */
void InitTensorV2(XTensor * tensor, void InitTensorV2(XTensor * tensor,
const int myOrder, const int * myDimSize, const TENSOR_DATA_TYPE myDataType = X_FLOAT, const int myOrder, const int * myDimSize, const TENSOR_DATA_TYPE myDataType = X_FLOAT,
......
...@@ -142,6 +142,23 @@ void _DivMe(XTensor * a, const XTensor * b, DTYPE alpha, int leadingDim) ...@@ -142,6 +142,23 @@ void _DivMe(XTensor * a, const XTensor * b, DTYPE alpha, int leadingDim)
_Div(a, b, a, alpha, leadingDim); _Div(a, b, a, alpha, leadingDim);
} }
/*
element-wise division of two tensors (do it on site)
keep the result in the input tensor a and return nothing
a(i) = a(i)*b(i) + \alpha * a(i)
where i is the index of the item
>> a - tensor a (where keep the result)
>> b - tensor b
>> alpha - the coefficient
>> leadingDim - the dimension along which we perform broadcasting
*/
void DivMe(XTensor& a, const XTensor& b, DTYPE alpha, int leadingDim)
{
_Div(&a, &b, &a, alpha, leadingDim);
}
/* /*
return a dimension if the division is performed as DivDim (in more details in DivDim.h) return a dimension if the division is performed as DivDim (in more details in DivDim.h)
>> a - a tensor >> a - a tensor
...@@ -229,9 +246,8 @@ where i is the index of the item ...@@ -229,9 +246,8 @@ where i is the index of the item
>> c - result tensor >> c - result tensor
>> alpha - the coefficient >> alpha - the coefficient
>> leadingDim - the dimension along which we perform broadcasting >> leadingDim - the dimension along which we perform broadcasting
>> requireLink - if add operation to network
*/ */
void Div(const XTensor &a, const XTensor &b, XTensor &c, DTYPE alpha, int leadingDim, bool requireLink) void Div(const XTensor &a, const XTensor &b, XTensor &c, DTYPE alpha, int leadingDim)
{ {
if (!c.isInit || !XTensor::IsSameShaped(&a, &c)) { if (!c.isInit || !XTensor::IsSameShaped(&a, &c)) {
InitTensor(&c, &a); InitTensor(&c, &a);
...@@ -245,7 +261,7 @@ void Div(const XTensor &a, const XTensor &b, XTensor &c, DTYPE alpha, int leadin ...@@ -245,7 +261,7 @@ void Div(const XTensor &a, const XTensor &b, XTensor &c, DTYPE alpha, int leadin
/* call _Div function */ /* call _Div function */
_Div(&a, &b, &c, 0, leadingDim); _Div(&a, &b, &c, 0, leadingDim);
if (requireLink) { if (c.enableGrad) {
/* tensor connections */ /* tensor connections */
XLink::MakeLink(&a, &b, &c, MATH_DIV); XLink::MakeLink(&a, &b, &c, MATH_DIV);
XLink::AddParamToHead(&c, alpha); XLink::AddParamToHead(&c, alpha);
...@@ -256,7 +272,7 @@ void Div(const XTensor &a, const XTensor &b, XTensor &c, DTYPE alpha, int leadin ...@@ -256,7 +272,7 @@ void Div(const XTensor &a, const XTensor &b, XTensor &c, DTYPE alpha, int leadin
/* call _DivDim function */ /* call _DivDim function */
_DivDim(&a, &b, &c, n, alpha); _DivDim(&a, &b, &c, n, alpha);
if (requireLink) { if (c.enableGrad) {
/* tensor connections */ /* tensor connections */
XLink::MakeLink(&a, &b, &c, MATH_DIVDIM); XLink::MakeLink(&a, &b, &c, MATH_DIVDIM);
XLink::AddParamToHeadInt(&c, n); XLink::AddParamToHeadInt(&c, n);
......
...@@ -122,7 +122,7 @@ where i is the item index ...@@ -122,7 +122,7 @@ where i is the item index
*/ */
void _CudaDiv(const XTensor * a, const XTensor * b, XTensor * c, DTYPE alpha, int leadingDim) void _CudaDiv(const XTensor * a, const XTensor * b, XTensor * c, DTYPE alpha, int leadingDim)
{ {
int leadingDimRDI = a->order - leadingDim - 1; int leadingDimRDI = a->order - leadingDim - 1;
CheckNTErrors((a->unitNum <= c->unitNum && b->unitNum <= c->unitNum), CheckNTErrors((a->unitNum <= c->unitNum && b->unitNum <= c->unitNum),
"Unmatched tensors in multiplication!"); "Unmatched tensors in multiplication!");
CheckNTErrors((a->order == b->order && a->order == c->order), "Unmatched tensors!"); CheckNTErrors((a->order == b->order && a->order == c->order), "Unmatched tensors!");
......
...@@ -40,6 +40,7 @@ a(i) = a(i)/b(i) + \alpha * a(i) ...@@ -40,6 +40,7 @@ a(i) = a(i)/b(i) + \alpha * a(i)
where i is the index of the element where i is the index of the element
*/ */
void _DivMe(XTensor * a, const XTensor * b, DTYPE alpha = 0.0, int leadingDim = 0); void _DivMe(XTensor * a, const XTensor * b, DTYPE alpha = 0.0, int leadingDim = 0);
void DivMe(XTensor & a, const XTensor & b, DTYPE alpha = 0.0, int leadingDim = 0);
/* /*
element-wise division of two tensors (return an XTensor structure) element-wise division of two tensors (return an XTensor structure)
...@@ -54,7 +55,7 @@ element-wise division of two tensors: ...@@ -54,7 +55,7 @@ element-wise division of two tensors:
c(i) = a(i)/b(i) + \alpha * c(i) c(i) = a(i)/b(i) + \alpha * c(i)
where i is the index of the element where i is the index of the element
*/ */
void Div(const XTensor &a, const XTensor &b, XTensor &c, DTYPE alpha = 0.0, int leadingDim = 0, bool requireLink = false); void Div(const XTensor &a, const XTensor &b, XTensor &c, DTYPE alpha = 0.0, int leadingDim = 0);
} // namespace nts(NiuTrans.Tensor) } // namespace nts(NiuTrans.Tensor)
......
...@@ -183,9 +183,8 @@ i.e., a is divided with b by broadcasting ...@@ -183,9 +183,8 @@ i.e., a is divided with b by broadcasting
>> c - where we put result. we save it in a if c is NULL >> c - where we put result. we save it in a if c is NULL
>> n - the dimension index >> n - the dimension index
>> alpha - the scaling factor >> alpha - the scaling factor
>> requireLink - if add operation to network
*/ */
void DivDim(const XTensor &a, const XTensor &b, XTensor &c, int n, DTYPE alpha, bool requireLink) void DivDim(const XTensor &a, const XTensor &b, XTensor &c, int n, DTYPE alpha)
{ {
if (!c.isInit || !XTensor::IsSameShaped(&a, &c)) { if (!c.isInit || !XTensor::IsSameShaped(&a, &c)) {
InitTensor(&c, &a); InitTensor(&c, &a);
...@@ -194,7 +193,7 @@ void DivDim(const XTensor &a, const XTensor &b, XTensor &c, int n, DTYPE alpha, ...@@ -194,7 +193,7 @@ void DivDim(const XTensor &a, const XTensor &b, XTensor &c, int n, DTYPE alpha,
/* call _Div function */ /* call _Div function */
_DivDim(&a, &b, &c, n, alpha); _DivDim(&a, &b, &c, n, alpha);
if (requireLink) { if (c.enableGrad == true) {
/* tensor connections */ /* tensor connections */
XLink::MakeLink(&a, &b, &c, MATH_DIVDIM); XLink::MakeLink(&a, &b, &c, MATH_DIVDIM);
XLink::AddParamToHeadInt(&c, n); XLink::AddParamToHeadInt(&c, n);
......
...@@ -59,7 +59,7 @@ c(i) = a/b + \alpha * c ...@@ -59,7 +59,7 @@ c(i) = a/b + \alpha * c
where the size of b is equal to the n-th dimension of a, where the size of b is equal to the n-th dimension of a,
i.e., a is divided with b by broadcasting i.e., a is divided with b by broadcasting
*/ */
void DivDim(const XTensor &a, const XTensor &b, XTensor &c, int n, DTYPE alpha = (DTYPE)0.0, bool requireLink = false); void DivDim(const XTensor &a, const XTensor &b, XTensor &c, int n, DTYPE alpha = (DTYPE)0.0);
} // namespace nts(NiuTrans.Tensor) } // namespace nts(NiuTrans.Tensor)
......
...@@ -130,6 +130,17 @@ void _MaskMe(XTensor * a, const XTensor * mask, DTYPE alpha) ...@@ -130,6 +130,17 @@ void _MaskMe(XTensor * a, const XTensor * mask, DTYPE alpha)
} }
/* /*
mask entries of a given tensor (on site):
a(i) = a(i) if mask(i) is non-zero
a(i) = alpha if mask(i) = 0
where i is the index of the element
*/
void MaskMe(XTensor& a, const XTensor& mask, DTYPE alpha)
{
_Mask(&a, &mask, &a, alpha);
}
/*
mask entries of a given tensor (return an XTensor structure): mask entries of a given tensor (return an XTensor structure):
a(i) = a(i) if mask(i) is non-zero a(i) = a(i) if mask(i) is non-zero
a(i) = alpha if mask(i) = 0 a(i) = alpha if mask(i) = 0
......
...@@ -43,6 +43,7 @@ a(i) = alpha if mask(i) = 0 ...@@ -43,6 +43,7 @@ a(i) = alpha if mask(i) = 0
where i is the index of the element where i is the index of the element
*/ */
void _MaskMe(XTensor * a, const XTensor * mask, DTYPE alpha); void _MaskMe(XTensor * a, const XTensor * mask, DTYPE alpha);
void MaskMe(XTensor & a, const XTensor & mask, DTYPE alpha);
/* /*
mask entries of a given tensor (return an XTensor structure): mask entries of a given tensor (return an XTensor structure):
......
...@@ -304,7 +304,7 @@ XTensor MatrixMul(const XTensor &a, MATRIX_TRANS_TYPE transposedA, ...@@ -304,7 +304,7 @@ XTensor MatrixMul(const XTensor &a, MATRIX_TRANS_TYPE transposedA,
void MatrixMul(const XTensor &a, MATRIX_TRANS_TYPE transposedA, void MatrixMul(const XTensor &a, MATRIX_TRANS_TYPE transposedA,
const XTensor &b, MATRIX_TRANS_TYPE transposedB, XTensor &c, const XTensor &b, MATRIX_TRANS_TYPE transposedB, XTensor &c,
DTYPE alpha, XPRunner * parallelRunner, bool requireLink) DTYPE alpha, XPRunner * parallelRunner)
{ {
CheckNTErrors(a.dataType == b.dataType, "Input tensors should have the same data type!"); CheckNTErrors(a.dataType == b.dataType, "Input tensors should have the same data type!");
CheckNTErrors(a.order >= 2 && b.order >= 2, "Input tensors must have a order >= 2!"); CheckNTErrors(a.order >= 2 && b.order >= 2, "Input tensors must have a order >= 2!");
...@@ -339,7 +339,7 @@ void MatrixMul(const XTensor &a, MATRIX_TRANS_TYPE transposedA, ...@@ -339,7 +339,7 @@ void MatrixMul(const XTensor &a, MATRIX_TRANS_TYPE transposedA,
/* call _MatrixMul function */ /* call _MatrixMul function */
_MatrixMul(&a, transposedA, &b, transposedB, &c, alpha, 0, parallelRunner); _MatrixMul(&a, transposedA, &b, transposedB, &c, alpha, 0, parallelRunner);
if (requireLink) { if (c.enableGrad) {
/* tensor connections */ /* tensor connections */
XLink::MakeLink(&a, &b, &c, MATH_MATRIXMUL); XLink::MakeLink(&a, &b, &c, MATH_MATRIXMUL);
XLink::AddParamToHeadTrans(&c, transposedA); XLink::AddParamToHeadTrans(&c, transposedA);
...@@ -400,7 +400,7 @@ XTensor MatrixMul(const XTensor &a, const XTensor &b, ...@@ -400,7 +400,7 @@ XTensor MatrixMul(const XTensor &a, const XTensor &b,
} }
void MatrixMul(const XTensor &a, const XTensor &b, XTensor &c, void MatrixMul(const XTensor &a, const XTensor &b, XTensor &c,
DTYPE alpha, XPRunner * parallelRunner, bool requireLink) DTYPE alpha, XPRunner * parallelRunner)
{ {
CheckNTErrors(a.dataType == b.dataType, "Input tensors should have the same data type!"); CheckNTErrors(a.dataType == b.dataType, "Input tensors should have the same data type!");
CheckNTErrors(a.order >= 2 && b.order >= 2, "Input tensors must have a order >= 2!"); CheckNTErrors(a.order >= 2 && b.order >= 2, "Input tensors must have a order >= 2!");
...@@ -435,7 +435,7 @@ void MatrixMul(const XTensor &a, const XTensor &b, XTensor &c, ...@@ -435,7 +435,7 @@ void MatrixMul(const XTensor &a, const XTensor &b, XTensor &c,
/* call _MatrixMul function */ /* call _MatrixMul function */
_MatrixMul(&a, X_NOTRANS, &b, X_NOTRANS, &c, alpha, 0, parallelRunner); _MatrixMul(&a, X_NOTRANS, &b, X_NOTRANS, &c, alpha, 0, parallelRunner);
if (requireLink) { if (c.enableGrad) {
/* tensor connections */ /* tensor connections */
XLink::MakeLink(&a, &b, &c, MATH_MATRIXMUL); XLink::MakeLink(&a, &b, &c, MATH_MATRIXMUL);
XLink::AddParamToHeadTrans(&c, X_NOTRANS); XLink::AddParamToHeadTrans(&c, X_NOTRANS);
......
...@@ -60,14 +60,14 @@ XTensor MatrixMul(const XTensor &a, MATRIX_TRANS_TYPE transposedA, const XTensor ...@@ -60,14 +60,14 @@ XTensor MatrixMul(const XTensor &a, MATRIX_TRANS_TYPE transposedA, const XTensor
DTYPE alpha = (DTYPE)1.0, XPRunner * parallelRunner = NULL); DTYPE alpha = (DTYPE)1.0, XPRunner * parallelRunner = NULL);
void MatrixMul(const XTensor &a, MATRIX_TRANS_TYPE transposedA, const XTensor &b, MATRIX_TRANS_TYPE transposedB, void MatrixMul(const XTensor &a, MATRIX_TRANS_TYPE transposedA, const XTensor &b, MATRIX_TRANS_TYPE transposedB,
XTensor &c, DTYPE alpha = (DTYPE)1.0, XPRunner * parallelRunner = NULL, bool requireLink = false); XTensor &c, DTYPE alpha = (DTYPE)1.0, XPRunner * parallelRunner = NULL);
/* matrix multiplication with no transposition c = a * b * alpha*/ /* matrix multiplication with no transposition c = a * b * alpha*/
XTensor MatrixMul(const XTensor &a, const XTensor &b, XTensor MatrixMul(const XTensor &a, const XTensor &b,
DTYPE alpha = (DTYPE)1.0, XPRunner * parallelRunner = NULL); DTYPE alpha = (DTYPE)1.0, XPRunner * parallelRunner = NULL);
void MatrixMul(const XTensor &a, const XTensor &b, XTensor &c, void MatrixMul(const XTensor &a, const XTensor &b, XTensor &c,
DTYPE alpha = (DTYPE)1.0, XPRunner * parallelRunner = NULL, bool requireLink = false); DTYPE alpha = (DTYPE)1.0, XPRunner * parallelRunner = NULL);
} // namespace nts(NiuTrans.Tensor) } // namespace nts(NiuTrans.Tensor)
......
...@@ -54,15 +54,15 @@ void _MatrixMul2D(const XTensor * a, MATRIX_TRANS_TYPE transposedA, ...@@ -54,15 +54,15 @@ void _MatrixMul2D(const XTensor * a, MATRIX_TRANS_TYPE transposedA,
CheckNTErrors((a->order == 2 && b->order == 2 && c->order == 2), CheckNTErrors((a->order == 2 && b->order == 2 && c->order == 2),
"Input tensors must have a order = 2!"); "Input tensors must have a order = 2!");
int an = a->dimSize[0], am = a->dimSize[1]; int an = a->dimSize[0], am = a->dimSize[1];
int bn = b->dimSize[0], bm = b->dimSize[1]; int bn = b->dimSize[0], bm = b->dimSize[1];
int cn = c->dimSize[0], cm = c->dimSize[1]; int cn = c->dimSize[0], cm = c->dimSize[1];
int am2 = transposedA == X_TRANS ? an : am; int am2 = transposedA == X_TRANS ? an : am;
int an2 = transposedA == X_TRANS ? am : an; int an2 = transposedA == X_TRANS ? am : an;
int bm2 = transposedB == X_TRANS ? bn : bm; int bm2 = transposedB == X_TRANS ? bn : bm;
int bn2 = transposedB == X_TRANS ? bm : bn; int bn2 = transposedB == X_TRANS ? bm : bn;
int cm2 = cm; int cm2 = cm;
int cn2 = cn; int cn2 = cn;
CheckNTErrors((am2 == bn2 && an2 == cn2 && bm2 == cm2), CheckNTErrors((am2 == bn2 && an2 == cn2 && bm2 == cm2),
"Unmatched tensors in multiplication!"); "Unmatched tensors in multiplication!");
......
...@@ -40,21 +40,21 @@ argument7: matrix c (c=a*b*\alpha + c*beta) ...@@ -40,21 +40,21 @@ argument7: matrix c (c=a*b*\alpha + c*beta)
*/ */
void _MatrixMul2DMultiTheading(TensorList * args) void _MatrixMul2DMultiTheading(TensorList * args)
{ {
CheckNTErrors(args->count == 2, "invalid argument number!"); CheckNTErrors(args->count == 2, "invalid argument number!");
IntList * indexArgs = (IntList*)args->GetItem(0); IntList * indexArgs = (IntList*)args->GetItem(0);
TensorList * matrixArgs = (TensorList*)args->GetItem(1); TensorList * matrixArgs = (TensorList*)args->GetItem(1);
CheckNTErrors(indexArgs->count == 4, "invalid argument number!"); CheckNTErrors(indexArgs->count == 4, "invalid argument number!");
CheckNTErrors(matrixArgs->count == 5, "invalid argument number!"); CheckNTErrors(matrixArgs->count == 5, "invalid argument number!");
XTensor * a = matrixArgs->GetItem(0); XTensor * a = matrixArgs->GetItem(0);
XTensor * b = matrixArgs->GetItem(1); XTensor * b = matrixArgs->GetItem(1);
XTensor * c = matrixArgs->GetItem(2); XTensor * c = matrixArgs->GetItem(2);
DTYPE alpha = *(DTYPE*)(matrixArgs->GetItem(3)); DTYPE alpha = *(DTYPE*)(matrixArgs->GetItem(3));
DTYPE beta = *(DTYPE*)(matrixArgs->GetItem(4)); DTYPE beta = *(DTYPE*)(matrixArgs->GetItem(4));
int x1 = indexArgs->GetItem(0); int x1 = indexArgs->GetItem(0);
int y1 = indexArgs->GetItem(1); int y1 = indexArgs->GetItem(1);
int x2 = indexArgs->GetItem(2); int x2 = indexArgs->GetItem(2);
int y2 = indexArgs->GetItem(3); int y2 = indexArgs->GetItem(3);
#ifdef FAST_MATRIX #ifdef FAST_MATRIX
int am = a->dimSize[1]; int am = a->dimSize[1];
......
...@@ -143,6 +143,23 @@ void _MultiplyMe(XTensor * a, const XTensor * b, DTYPE alpha, int leadingDim) ...@@ -143,6 +143,23 @@ void _MultiplyMe(XTensor * a, const XTensor * b, DTYPE alpha, int leadingDim)
_Multiply(a, b, a, alpha, leadingDim); _Multiply(a, b, a, alpha, leadingDim);
} }
/*
element-wise product of two tensors (do it on site)
keep the result in the input tensor a and return nothing
a(i) = a(i)*b(i) + \alpha * a(i)
where i is the index of the item
>> a - tensor a (where keep the result)
>> b - tensor b
>> alpha - the coefficient
>> leadingDim - the dimension along which we perform broadcasting
*/
void MultiplyMe(XTensor& a, const XTensor& b, DTYPE alpha, int leadingDim)
{
_Multiply(&a, &b, &a, alpha, leadingDim);
}
/* /*
return a dimension if the multiplication is performed as MultiplyDim (in more details in MultiplyDim.h) return a dimension if the multiplication is performed as MultiplyDim (in more details in MultiplyDim.h)
>> a - a tensor >> a - a tensor
...@@ -230,9 +247,8 @@ where i is the index of the item ...@@ -230,9 +247,8 @@ where i is the index of the item
>> c - result tensor >> c - result tensor
>> alpha - the coefficient >> alpha - the coefficient
>> leadingDim - the dimension along which we perform broadcasting >> leadingDim - the dimension along which we perform broadcasting
>> requireLink - if add operation to network
*/ */
void Multiply(const XTensor &a, const XTensor &b, XTensor &c, DTYPE alpha, int leadingDim, bool requireLink) void Multiply(const XTensor &a, const XTensor &b, XTensor &c, DTYPE alpha, int leadingDim)
{ {
if (!c.isInit || !XTensor::IsSameShaped(&a, &c)) { if (!c.isInit || !XTensor::IsSameShaped(&a, &c)) {
InitTensor(&c, &a); InitTensor(&c, &a);
...@@ -246,7 +262,7 @@ void Multiply(const XTensor &a, const XTensor &b, XTensor &c, DTYPE alpha, int l ...@@ -246,7 +262,7 @@ void Multiply(const XTensor &a, const XTensor &b, XTensor &c, DTYPE alpha, int l
/* call _Multiply function */ /* call _Multiply function */
_Multiply(&a, &b, &c, 0, leadingDim); _Multiply(&a, &b, &c, 0, leadingDim);
if (requireLink) { if (c.enableGrad) {
/* tensor connections */ /* tensor connections */
XLink::MakeLink(&a, &b, &c, MATH_MULTIPLY); XLink::MakeLink(&a, &b, &c, MATH_MULTIPLY);
XLink::AddParamToHead(&c, alpha); XLink::AddParamToHead(&c, alpha);
...@@ -257,7 +273,7 @@ void Multiply(const XTensor &a, const XTensor &b, XTensor &c, DTYPE alpha, int l ...@@ -257,7 +273,7 @@ void Multiply(const XTensor &a, const XTensor &b, XTensor &c, DTYPE alpha, int l
/* call _MultiplyDim function */ /* call _MultiplyDim function */
_MultiplyDim(&a, &b, &c, n, alpha); _MultiplyDim(&a, &b, &c, n, alpha);
if (requireLink) { if (c.enableGrad) {
/* tensor connections */ /* tensor connections */
XLink::MakeLink(&a, &b, &c, MATH_MULTIPLYDIM); XLink::MakeLink(&a, &b, &c, MATH_MULTIPLYDIM);
XLink::AddParamToHeadInt(&c, n); XLink::AddParamToHeadInt(&c, n);
......
...@@ -122,7 +122,7 @@ where i is the item index ...@@ -122,7 +122,7 @@ where i is the item index
*/ */
void _CudaMultiply(const XTensor * a, const XTensor * b, XTensor * c, DTYPE alpha, int leadingDim) void _CudaMultiply(const XTensor * a, const XTensor * b, XTensor * c, DTYPE alpha, int leadingDim)
{ {
int leadingDimRDI = a->order - leadingDim - 1; int leadingDimRDI = a->order - leadingDim - 1;
CheckNTErrors((a->unitNum <= c->unitNum && b->unitNum <= c->unitNum), CheckNTErrors((a->unitNum <= c->unitNum && b->unitNum <= c->unitNum),
"Unmatched tensors in multiplication!"); "Unmatched tensors in multiplication!");
CheckNTErrors((a->order == b->order && a->order == c->order), "Unmatched tensors!"); CheckNTErrors((a->order == b->order && a->order == c->order), "Unmatched tensors!");
......
...@@ -40,6 +40,7 @@ a(i) = a(i)*b(i) + \alpha * a(i) ...@@ -40,6 +40,7 @@ a(i) = a(i)*b(i) + \alpha * a(i)
where i is the index of the element where i is the index of the element
*/ */
void _MultiplyMe(XTensor * a, const XTensor * b, DTYPE alpha = 0.0, int leadingDim = 0); void _MultiplyMe(XTensor * a, const XTensor * b, DTYPE alpha = 0.0, int leadingDim = 0);
void MultiplyMe(XTensor & a, const XTensor & b, DTYPE alpha = 0.0, int leadingDim = 0);
/* /*
element-wise product of two tensors (return an XTensor structure) element-wise product of two tensors (return an XTensor structure)
...@@ -54,7 +55,7 @@ element-wise product of two tensors: ...@@ -54,7 +55,7 @@ element-wise product of two tensors:
c(i) = a(i)*b(i) + \alpha * c(i) c(i) = a(i)*b(i) + \alpha * c(i)
where i is the index of the element where i is the index of the element
*/ */
void Multiply(const XTensor &a, const XTensor &b, XTensor &c, DTYPE alpha = 0.0, int leadingDim = 0, bool requireLink = false); void Multiply(const XTensor &a, const XTensor &b, XTensor &c, DTYPE alpha = 0.0, int leadingDim = 0);
} // namespace nts(NiuTrans.Tensor) } // namespace nts(NiuTrans.Tensor)
......
...@@ -139,6 +139,24 @@ void _MultiplyDimMe(XTensor * a, const XTensor * b, int n, DTYPE alpha) ...@@ -139,6 +139,24 @@ void _MultiplyDimMe(XTensor * a, const XTensor * b, int n, DTYPE alpha)
} }
/* /*
tensor multiplication(do it on site)
make a new tensor to keep the result and return it
c = a * b + \alpha * c
where the size of b is equal to the n-th dimension of a,
i.e., a is multiplied with b by broadcasting
>> a - a tensor
>> b - another tensor whose size is equal to that of dimension n of a
>> n - the dimension index
>> alpha - the scaling factor
*/
void MultiplyDimMe(XTensor& a, const XTensor& b, int n, DTYPE alpha)
{
_MultiplyDim(&a, &b, &a, n, alpha);
}
/*
tensor multiplication (return an XTensor structure and make tensor connections) tensor multiplication (return an XTensor structure and make tensor connections)
make a new tensor to keep the result and return it make a new tensor to keep the result and return it
...@@ -180,9 +198,8 @@ i.e., a is multiplied with b by broadcasting ...@@ -180,9 +198,8 @@ i.e., a is multiplied with b by broadcasting
>> b - another tensor whose size is equal to that of dimension n of a >> b - another tensor whose size is equal to that of dimension n of a
>> c - where we put a * b + \alpha * c. we save it in a if c is NULL >> c - where we put a * b + \alpha * c. we save it in a if c is NULL
>> n - the dimension index >> n - the dimension index
>> requireLink - if add operation to network
*/ */
void MultiplyDim(const XTensor &a, const XTensor &b, XTensor &c, int n, bool requireLink) void MultiplyDim(const XTensor &a, const XTensor &b, XTensor &c, int n)
{ {
if (!c.isInit || !XTensor::IsSameShaped(&a, &c)) { if (!c.isInit || !XTensor::IsSameShaped(&a, &c)) {
InitTensor(&c, &a); InitTensor(&c, &a);
...@@ -191,7 +208,7 @@ void MultiplyDim(const XTensor &a, const XTensor &b, XTensor &c, int n, bool req ...@@ -191,7 +208,7 @@ void MultiplyDim(const XTensor &a, const XTensor &b, XTensor &c, int n, bool req
/* call _Multiply function */ /* call _Multiply function */
_MultiplyDim(&a, &b, &c, n, 0); _MultiplyDim(&a, &b, &c, n, 0);
if (requireLink) { if (c.enableGrad) {
/* tensor connections */ /* tensor connections */
XLink::MakeLink(&a, &b, &c, MATH_MULTIPLYDIM); XLink::MakeLink(&a, &b, &c, MATH_MULTIPLYDIM);
XLink::AddParamToHeadInt(&c, n); XLink::AddParamToHeadInt(&c, n);
...@@ -347,9 +364,8 @@ where some of dimensions of b can be of size 1 ...@@ -347,9 +364,8 @@ where some of dimensions of b can be of size 1
>> a - a tensor >> a - a tensor
>> b - another tensor that would be broadcasted >> b - another tensor that would be broadcasted
>> c - the resulting tensor >> c - the resulting tensor
>> requireLink - if add operation to network
*/ */
void MultiplyBroadcast(const XTensor &a, const XTensor &b, XTensor &c, bool requireLink) void MultiplyBroadcast(const XTensor &a, const XTensor &b, XTensor &c)
{ {
if (!c.isInit || !XTensor::IsSameShaped(&a, &c)) { if (!c.isInit || !XTensor::IsSameShaped(&a, &c)) {
InitTensor(&c, &a); InitTensor(&c, &a);
...@@ -358,7 +374,7 @@ void MultiplyBroadcast(const XTensor &a, const XTensor &b, XTensor &c, bool requ ...@@ -358,7 +374,7 @@ void MultiplyBroadcast(const XTensor &a, const XTensor &b, XTensor &c, bool requ
/* call _SumBroadcast function */ /* call _SumBroadcast function */
_MultiplyBroadcast(&a, &b, &c, 0); _MultiplyBroadcast(&a, &b, &c, 0);
if (requireLink) { if (c.enableGrad) {
/* tensor connections */ /* tensor connections */
XLink::MakeLink(&a, &b, &c, MATH_MULTIPLYBROADCAST); XLink::MakeLink(&a, &b, &c, MATH_MULTIPLYBROADCAST);
XLink::AddParamToHead(&c, 0); XLink::AddParamToHead(&c, 0);
......
...@@ -33,6 +33,7 @@ void _MultiplyDim(const XTensor * a, const XTensor * b, XTensor * c, int n, DTYP ...@@ -33,6 +33,7 @@ void _MultiplyDim(const XTensor * a, const XTensor * b, XTensor * c, int n, DTYP
/* tensor multiplication a = a * b + \alpha * c where the size of b is equal to the n-th dimension of a, /* tensor multiplication a = a * b + \alpha * c where the size of b is equal to the n-th dimension of a,
i.e., a is multiplied with b by broadcasting. we keep the result in the input tensor a and return nothing */ i.e., a is multiplied with b by broadcasting. we keep the result in the input tensor a and return nothing */
void _MultiplyDimMe(XTensor * a, const XTensor * b, int n, DTYPE alpha = 0.0); void _MultiplyDimMe(XTensor * a, const XTensor * b, int n, DTYPE alpha = 0.0);
void MultiplyDimMe(XTensor & a, const XTensor & b, int n, DTYPE alpha = 0.0);
/* tensor multiplication c = a * b where the size of b is equal to the n-th dimension of a, /* tensor multiplication c = a * b where the size of b is equal to the n-th dimension of a,
i.e., a is multiplied with b by broadcasting. We make a new tensor c to keep the result and return it */ i.e., a is multiplied with b by broadcasting. We make a new tensor c to keep the result and return it */
...@@ -40,7 +41,7 @@ XTensor MultiplyDim(const XTensor &a, const XTensor &b, int n); ...@@ -40,7 +41,7 @@ XTensor MultiplyDim(const XTensor &a, const XTensor &b, int n);
/* tensor multiplication c = a * b + \alpha * c where the size of b is equal to the n-th dimension of a, /* tensor multiplication c = a * b + \alpha * c where the size of b is equal to the n-th dimension of a,
i.e., a is multiplied with b by broadcasting */ i.e., a is multiplied with b by broadcasting */
void MultiplyDim(const XTensor &a, const XTensor &b, XTensor &c, int n, bool requireLink = false); void MultiplyDim(const XTensor &a, const XTensor &b, XTensor &c, int n);
/* tensor multiplication summation c = a * b + c * \beta where some of dimensions of b can be of size 1 */ /* tensor multiplication summation c = a * b + c * \beta where some of dimensions of b can be of size 1 */
void _MultiplyBroadcast(const XTensor * a, const XTensor * b, XTensor * c, DTYPE beta = (DTYPE)1.0); void _MultiplyBroadcast(const XTensor * a, const XTensor * b, XTensor * c, DTYPE beta = (DTYPE)1.0);
...@@ -50,7 +51,7 @@ void _MultiplyBroadcast(const XTensor * a, const XTensor * b, XTensor * c, DTYPE ...@@ -50,7 +51,7 @@ void _MultiplyBroadcast(const XTensor * a, const XTensor * b, XTensor * c, DTYPE
XTensor MultiplyBroadcast(const XTensor &a, const XTensor &b); XTensor MultiplyBroadcast(const XTensor &a, const XTensor &b);
/* tensor multiplication summation c = a * b + c * \beta where some of dimensions of b can be of size 1 */ /* tensor multiplication summation c = a * b + c * \beta where some of dimensions of b can be of size 1 */
void MultiplyBroadcast(const XTensor &a, const XTensor &b, XTensor &c, bool requireLink = false); void MultiplyBroadcast(const XTensor &a, const XTensor &b, XTensor &c);
} // namespace nts(NiuTrans.Tensor) } // namespace nts(NiuTrans.Tensor)
......
...@@ -60,6 +60,16 @@ void _NegateMe(XTensor * a) ...@@ -60,6 +60,16 @@ void _NegateMe(XTensor * a)
} }
/* /*
set every entry to its minus value (do it on site)
keep the result in the input tensor a and return nothing
>> a - the tensor we are processing
*/
void NegateMe(XTensor& a)
{
_Negate(&a, &a);
}
/*
set every entry to its minus value (return an XTensor structure) set every entry to its minus value (return an XTensor structure)
make a new tensor to keep the result and return it make a new tensor to keep the result and return it
>> a - input tensor we are processing >> a - input tensor we are processing
...@@ -83,9 +93,8 @@ XTensor Negate(const XTensor & a) ...@@ -83,9 +93,8 @@ XTensor Negate(const XTensor & a)
set every entry to its minus value set every entry to its minus value
>> a - input tensor we are processing >> a - input tensor we are processing
>> b - output tensor we are processing >> b - output tensor we are processing
>> requireLink - if add operation to network
*/ */
void Negate(const XTensor & a, XTensor & b, bool requireLink) void Negate(const XTensor & a, XTensor & b)
{ {
if (!b.isInit || !XTensor::IsSameShaped(&a, &b)) { if (!b.isInit || !XTensor::IsSameShaped(&a, &b)) {
InitTensor(&b, &a); InitTensor(&b, &a);
...@@ -94,7 +103,7 @@ void Negate(const XTensor & a, XTensor & b, bool requireLink) ...@@ -94,7 +103,7 @@ void Negate(const XTensor & a, XTensor & b, bool requireLink)
/* call _Negate function */ /* call _Negate function */
_Negate(&a, &b); _Negate(&a, &b);
if (requireLink) { if (b.enableGrad) {
/* tensor connections */ /* tensor connections */
XLink::MakeLink(&a, NULL, &b, MATH_NEGATE); XLink::MakeLink(&a, NULL, &b, MATH_NEGATE);
} }
......
...@@ -34,6 +34,7 @@ set every entry to its minus value (do it on site) ...@@ -34,6 +34,7 @@ set every entry to its minus value (do it on site)
keep the result in the input tensor a and return nothing keep the result in the input tensor a and return nothing
*/ */
void _NegateMe(XTensor * a); void _NegateMe(XTensor * a);
void NegateMe(XTensor & a);
/* /*
set every entry to its minus value (return an XTensor structure) set every entry to its minus value (return an XTensor structure)
...@@ -42,7 +43,7 @@ make a new tensor to keep the result and return it ...@@ -42,7 +43,7 @@ make a new tensor to keep the result and return it
XTensor Negate(const XTensor & a); XTensor Negate(const XTensor & a);
/* set every entry to its minus value */ /* set every entry to its minus value */
void Negate(const XTensor & a, XTensor & b, bool requireLink = false); void Negate(const XTensor & a, XTensor & b);
} // namespace nts(NiuTrans.Tensor) } // namespace nts(NiuTrans.Tensor)
......
...@@ -66,6 +66,16 @@ void _SignMe(XTensor * a) ...@@ -66,6 +66,16 @@ void _SignMe(XTensor * a)
} }
/* /*
set every entry to its sign value (do it on site)
keep the result in the input tensor a and return nothing
>> a - the tensor we are processing
*/
void SignMe(XTensor& a)
{
_Sign(&a, &a);
}
/*
set every entry to its sign value (return an XTensor structure) set every entry to its sign value (return an XTensor structure)
make a new tensor to keep the result and return it make a new tensor to keep the result and return it
>> a - input tensor we are processing >> a - input tensor we are processing
...@@ -89,9 +99,8 @@ XTensor Sign(const XTensor & a) ...@@ -89,9 +99,8 @@ XTensor Sign(const XTensor & a)
set every entry to its sign value set every entry to its sign value
>> a - input tensor we are processing >> a - input tensor we are processing
>> b - output tensor we are processing >> b - output tensor we are processing
>> requireLink - if add operation to network
*/ */
void Sign(const XTensor & a, XTensor & b, bool requireLink) void Sign(const XTensor & a, XTensor & b)
{ {
if (!b.isInit || !XTensor::IsSameShaped(&a, &b)) { if (!b.isInit || !XTensor::IsSameShaped(&a, &b)) {
InitTensor(&b, &a); InitTensor(&b, &a);
...@@ -100,7 +109,7 @@ void Sign(const XTensor & a, XTensor & b, bool requireLink) ...@@ -100,7 +109,7 @@ void Sign(const XTensor & a, XTensor & b, bool requireLink)
/* call _Sign function */ /* call _Sign function */
_Sign(&a, &b); _Sign(&a, &b);
if (requireLink) { if (b.enableGrad) {
/* tensor connections */ /* tensor connections */
XLink::MakeLink(&a, NULL, &b, MATH_SIGN); XLink::MakeLink(&a, NULL, &b, MATH_SIGN);
} }
......
...@@ -36,13 +36,19 @@ keep the result in the input tensor a and return nothing ...@@ -36,13 +36,19 @@ keep the result in the input tensor a and return nothing
void _SignMe(XTensor * a); void _SignMe(XTensor * a);
/* /*
set every entry to its sign value (do it on site)
keep the result in the input tensor a and return nothing
*/
void SignMe(XTensor & a);
/*
set every entry to its sign value (return an XTensor structure) set every entry to its sign value (return an XTensor structure)
make a new tensor to keep the result and return it make a new tensor to keep the result and return it
*/ */
XTensor Sign(const XTensor & a); XTensor Sign(const XTensor & a);
/* set every entry to its sign value */ /* set every entry to its sign value */
void Sign(const XTensor & a, XTensor & b, bool requireLink = false); void Sign(const XTensor & a, XTensor & b);
} // namespace nts(NiuTrans.Tensor) } // namespace nts(NiuTrans.Tensor)
......
...@@ -126,6 +126,19 @@ void _SubMe(XTensor * a, const XTensor * b, DTYPE beta) ...@@ -126,6 +126,19 @@ void _SubMe(XTensor * a, const XTensor * b, DTYPE beta)
{ {
_Sub(a, b, a, beta); _Sub(a, b, a, beta);
} }
/*
tensor subtraction a = a - b * \beta (do it on site)
keep the result in the tensor a and return nothing
>> a - a tensor
>> b - another tensor
>> beta - the scaling factor
*/
void SubMe(XTensor& a, const XTensor& b, DTYPE beta)
{
_Sub(&a, &b, &a, beta);
}
/* /*
return a dimension if the subtraction is performed as SubDim (in more details in SubDim.h) return a dimension if the subtraction is performed as SubDim (in more details in SubDim.h)
...@@ -203,9 +216,8 @@ tensor subtraction c = a - b * \beta ...@@ -203,9 +216,8 @@ tensor subtraction c = a - b * \beta
>> b - another tensor >> b - another tensor
>> c - where we put a-b*\beta. we save it in a if c is NULL >> c - where we put a-b*\beta. we save it in a if c is NULL
>> beta - the scaling factor >> beta - the scaling factor
>> requireLink - if add operation to network
*/ */
void Sub(const XTensor &a, const XTensor &b, XTensor &c, DTYPE beta, bool requireLink) void Sub(const XTensor &a, const XTensor &b, XTensor &c, DTYPE beta)
{ {
if (!c.isInit || !XTensor::IsSameShaped(&a, &c)) { if (!c.isInit || !XTensor::IsSameShaped(&a, &c)) {
InitTensor(&c, &a); InitTensor(&c, &a);
...@@ -217,7 +229,7 @@ void Sub(const XTensor &a, const XTensor &b, XTensor &c, DTYPE beta, bool requir ...@@ -217,7 +229,7 @@ void Sub(const XTensor &a, const XTensor &b, XTensor &c, DTYPE beta, bool requir
/* call _Sub function */ /* call _Sub function */
_Sub(&a, &b, &c, beta); _Sub(&a, &b, &c, beta);
if (requireLink) { if (c.enableGrad) {
/* tensor connections */ /* tensor connections */
XLink::MakeLink(&a, &b, &c, MATH_SUB); XLink::MakeLink(&a, &b, &c, MATH_SUB);
XLink::AddParamToHead(&c, beta); XLink::AddParamToHead(&c, beta);
...@@ -227,7 +239,7 @@ void Sub(const XTensor &a, const XTensor &b, XTensor &c, DTYPE beta, bool requir ...@@ -227,7 +239,7 @@ void Sub(const XTensor &a, const XTensor &b, XTensor &c, DTYPE beta, bool requir
/* call _SubDim function */ /* call _SubDim function */
_SubDim(&a, &b, &c, n, beta); _SubDim(&a, &b, &c, n, beta);
if (requireLink) { if (c.enableGrad) {
/* tensor connections */ /* tensor connections */
XLink::MakeLink(&a, &b, &c, MATH_SUBDIM); XLink::MakeLink(&a, &b, &c, MATH_SUBDIM);
XLink::AddParamToHeadInt(&c, n); XLink::AddParamToHeadInt(&c, n);
......
...@@ -35,6 +35,7 @@ tensor subtraction a = a - b * \beta ...@@ -35,6 +35,7 @@ tensor subtraction a = a - b * \beta
keep the result in the input tensor a and return nothing keep the result in the input tensor a and return nothing
*/ */
void _SubMe(XTensor * a, const XTensor * b, DTYPE beta = (DTYPE)1.0); void _SubMe(XTensor * a, const XTensor * b, DTYPE beta = (DTYPE)1.0);
void SubMe(XTensor & a, const XTensor & b, DTYPE beta = (DTYPE)1.0);
/* /*
tensor subtraction c = a - b * \beta tensor subtraction c = a - b * \beta
...@@ -43,7 +44,7 @@ make a new tensor c to keep the result and return it ...@@ -43,7 +44,7 @@ make a new tensor c to keep the result and return it
XTensor Sub(const XTensor &a, const XTensor &b, DTYPE beta = (DTYPE)1.0); XTensor Sub(const XTensor &a, const XTensor &b, DTYPE beta = (DTYPE)1.0);
/* tensor subtraction c = a - b * \beta */ /* tensor subtraction c = a - b * \beta */
void Sub(const XTensor &a, const XTensor &b, XTensor &c, DTYPE beta = (DTYPE)1.0, bool requireLink = false); void Sub(const XTensor &a, const XTensor &b, XTensor &c, DTYPE beta = (DTYPE)1.0);
} // namespace nts(NiuTrans.Tensor) } // namespace nts(NiuTrans.Tensor)
......
...@@ -46,79 +46,79 @@ void _SubDim(const XTensor * a, const XTensor * b, XTensor * c, int n, DTYPE bet ...@@ -46,79 +46,79 @@ void _SubDim(const XTensor * a, const XTensor * b, XTensor * c, int n, DTYPE bet
{ {
n = MODX(n, a->order); n = MODX(n, a->order);
CheckNTErrors(a && b && c, "Empty tensor input!"); CheckNTErrors(a && b && c, "Empty tensor input!");
CheckNTErrors(a->unitNum == c->unitNum, "Unmatched tensors in subtraction!"); CheckNTErrors(a->unitNum == c->unitNum, "Unmatched tensors in subtraction!");
CheckNTErrors(a->dataType == b->dataType && a->dataType == c->dataType, CheckNTErrors(a->dataType == b->dataType && a->dataType == c->dataType,
"Unmatched data types in subtraction!"); "Unmatched data types in subtraction!");
CheckNTErrors(a->order == c->order, "The input tensors do not have the same order in subtraction!"); CheckNTErrors(a->order == c->order, "The input tensors do not have the same order in subtraction!");
CheckNTErrors(!a->isSparse && !b->isSparse && !c->isSparse, "Dense tensors are required!"); CheckNTErrors(!a->isSparse && !b->isSparse && !c->isSparse, "Dense tensors are required!");
CheckNTErrors(a->dimSize[n] == b->unitNum, "Wrong tensor size!"); CheckNTErrors(a->dimSize[n] == b->unitNum, "Wrong tensor size!");
CheckDev(a->devID, b->devID); CheckDev(a->devID, b->devID);
if (beta == 0) { if (beta == 0) {
_CopyValues(a, c); _CopyValues(a, c);
return; return;
} }
if (XTensor::IsSameShaped(a, b)) { if (XTensor::IsSameShaped(a, b)) {
_Sub(a, b, c, beta); _Sub(a, b, c, beta);
return; return;
} }
if (a->devID >= 0 || b->devID >= 0 || c->devID >= 0) { if (a->devID >= 0 || b->devID >= 0 || c->devID >= 0) {
#ifdef USE_CUDA #ifdef USE_CUDA
_CudaSubDim(a, b, c, n, beta); _CudaSubDim(a, b, c, n, beta);
#else #else
ShowNTErrors("Please specify USE_CUDA and recompile the code!"); ShowNTErrors("Please specify USE_CUDA and recompile the code!");
#endif #endif
} }
else { else {
int stride = 1; int stride = 1;
int blockSize = a->dimSize[n]; int blockSize = a->dimSize[n];
int blockNum = 1; int blockNum = 1;
for (int i = a->order - 1; i >= 0; i--) { for (int i = a->order - 1; i >= 0; i--) {
if (i > n) if (i > n)
stride *= a->dimSize[i]; stride *= a->dimSize[i];
else if (i < n) else if (i < n)
blockNum *= a->dimSize[i]; blockNum *= a->dimSize[i];
} }
if (a->dataType == DEFAULT_DTYPE) { if (a->dataType == DEFAULT_DTYPE) {
int num = a->unitNum; int num = a->unitNum;
if (stride > 1) { if (stride > 1) {
for (int i = 0, j = 0; i < num; i += stride, j++) { for (int i = 0, j = 0; i < num; i += stride, j++) {
DTYPE * ap = (DTYPE*)a->data + i; DTYPE * ap = (DTYPE*)a->data + i;
DTYPE bv = *((DTYPE*)b->data + j % blockSize) * beta; DTYPE bv = *((DTYPE*)b->data + j % blockSize) * beta;
DTYPE * cp = (DTYPE*)c->data + i; DTYPE * cp = (DTYPE*)c->data + i;
for (int k = 0; k < stride; k++) for (int k = 0; k < stride; k++)
cp[k] = ap[k] - bv; cp[k] = ap[k] - bv;
} }
} }
else if (stride == 1) { else if (stride == 1) {
DTYPE * bp = (DTYPE*)b->data; DTYPE * bp = (DTYPE*)b->data;
for (int i = 0; i < num; i += blockSize) { for (int i = 0; i < num; i += blockSize) {
DTYPE * ap = (DTYPE*)a->data + i; DTYPE * ap = (DTYPE*)a->data + i;
DTYPE * cp = (DTYPE*)c->data + i; DTYPE * cp = (DTYPE*)c->data + i;
if (beta == 1.0F) { if (beta == 1.0F) {
for (int j = 0; j < blockSize; j++) for (int j = 0; j < blockSize; j++)
cp[j] = ap[j] - bp[j]; cp[j] = ap[j] - bp[j];
} }
else { else {
for (int j = 0; j < blockSize; j++) for (int j = 0; j < blockSize; j++)
cp[j] = ap[j] - bp[j] * beta; cp[j] = ap[j] - bp[j] * beta;
} }
} }
} }
else { else {
ShowNTErrors("Something is wrong!"); ShowNTErrors("Something is wrong!");
} }
} }
else { else {
ShowNTErrors("TODO!"); ShowNTErrors("TODO!");
} }
} }
} }
/* /*
...@@ -136,7 +136,7 @@ i.e., a is subtracted with b by broadcasting ...@@ -136,7 +136,7 @@ i.e., a is subtracted with b by broadcasting
*/ */
void _SubDim(XTensor * a, const XTensor * b, int n, DTYPE beta) void _SubDim(XTensor * a, const XTensor * b, int n, DTYPE beta)
{ {
_SubDim(a, b, a, n, beta); _SubDim(a, b, a, n, beta);
} }
/* /*
...@@ -155,20 +155,20 @@ i.e., a is subtracted with b by broadcasting ...@@ -155,20 +155,20 @@ i.e., a is subtracted with b by broadcasting
*/ */
XTensor SubDim(const XTensor &a, const XTensor &b, int n, DTYPE beta) XTensor SubDim(const XTensor &a, const XTensor &b, int n, DTYPE beta)
{ {
XTensor c(&a); XTensor c(&a);
c.SetTMPFlag(); c.SetTMPFlag();
n = MODX(n, a.order); n = MODX(n, a.order);
/* call _Sub function */ /* call _Sub function */
_SubDim(&a, &b, &c, n, beta); _SubDim(&a, &b, &c, n, beta);
/* tensor connections */ /* tensor connections */
XLink::MakeLink(&a, &b, &c, MATH_SUBDIM); XLink::MakeLink(&a, &b, &c, MATH_SUBDIM);
XLink::AddParamToHeadInt(&c, n); XLink::AddParamToHeadInt(&c, n);
XLink::AddParamToHead(&c, beta); XLink::AddParamToHead(&c, beta);
return c; return c;
} }
/* /*
...@@ -183,9 +183,8 @@ i.e., a is subtracted with b by broadcasting ...@@ -183,9 +183,8 @@ i.e., a is subtracted with b by broadcasting
>> c - where we put a-b*\beta. we save it in a if c is NULL >> c - where we put a-b*\beta. we save it in a if c is NULL
>> n - the dimension index >> n - the dimension index
>> beta - the scaling factor >> beta - the scaling factor
>> requireLink - if add operation to network
*/ */
void SubDim(const XTensor &a, const XTensor &b, XTensor &c, int n, DTYPE beta, bool requireLink) void SubDim(const XTensor &a, const XTensor &b, XTensor &c, int n, DTYPE beta)
{ {
if (!c.isInit || !XTensor::IsSameShaped(&a, &c)) { if (!c.isInit || !XTensor::IsSameShaped(&a, &c)) {
InitTensor(&c, &a); InitTensor(&c, &a);
...@@ -194,7 +193,7 @@ void SubDim(const XTensor &a, const XTensor &b, XTensor &c, int n, DTYPE beta, b ...@@ -194,7 +193,7 @@ void SubDim(const XTensor &a, const XTensor &b, XTensor &c, int n, DTYPE beta, b
/* call _Sub function */ /* call _Sub function */
_SubDim(&a, &b, &c, n, beta); _SubDim(&a, &b, &c, n, beta);
if (requireLink) { if (c.enableGrad) {
/* tensor connections */ /* tensor connections */
XLink::MakeLink(&a, &b, &c, MATH_SUBDIM); XLink::MakeLink(&a, &b, &c, MATH_SUBDIM);
XLink::AddParamToHeadInt(&c, n); XLink::AddParamToHeadInt(&c, n);
......
...@@ -39,25 +39,25 @@ where a is a tensor and b is a row vector ...@@ -39,25 +39,25 @@ where a is a tensor and b is a row vector
*/ */
template <class T, bool betaFired> template <class T, bool betaFired>
__global__ __global__
void KernelSubWithRow(T * a, T * b, T * c, int rowNum, int colNum, T beta) void KernelSubWithRow(T * a, T * b, T * c, int rowNum, int colNum, T beta)
{ {
__shared__ T bv[MAX_CUDA_THREAD_NUM_PER_BLOCK]; __shared__ T bv[MAX_CUDA_THREAD_NUM_PER_BLOCK];
int col = blockDim.x * blockIdx.x + threadIdx.x; int col = blockDim.x * blockIdx.x + threadIdx.x;
int row = blockDim.y * blockIdx.y + threadIdx.y; int row = blockDim.y * blockIdx.y + threadIdx.y;
if (col >= colNum || row >= rowNum) if (col >= colNum || row >= rowNum)
return; return;
if (threadIdx.y == 0) if (threadIdx.y == 0)
bv[threadIdx.x] = b[col]; bv[threadIdx.x] = b[col];
__syncthreads(); __syncthreads();
int offset = colNum * row + col; int offset = colNum * row + col;
if (betaFired) if (betaFired)
c[offset] = a[offset] - bv[threadIdx.x] * beta; c[offset] = a[offset] - bv[threadIdx.x] * beta;
else else
c[offset] = a[offset] - bv[threadIdx.x]; c[offset] = a[offset] - bv[threadIdx.x];
} }
/* /*
...@@ -75,30 +75,30 @@ where a is a tensor and b is a colum vector ...@@ -75,30 +75,30 @@ where a is a tensor and b is a colum vector
*/ */
template <class T, bool betaFired> template <class T, bool betaFired>
__global__ __global__
void KernelSubWithCol(T * a, T * b, T * c, int rowNum, int colNum, int blockSize, int blockNum, T beta) void KernelSubWithCol(T * a, T * b, T * c, int rowNum, int colNum, int blockSize, int blockNum, T beta)
{ {
__shared__ T bv[MAX_CUDA_THREAD_NUM_PER_BLOCK]; __shared__ T bv[MAX_CUDA_THREAD_NUM_PER_BLOCK];
int colIndex = blockDim.x * blockIdx.x + threadIdx.x; int colIndex = blockDim.x * blockIdx.x + threadIdx.x;
int row = blockDim.y * blockIdx.y + threadIdx.y; int row = blockDim.y * blockIdx.y + threadIdx.y;
int col = colIndex % colNum; int col = colIndex % colNum;
int block = colIndex / colNum; int block = colIndex / colNum;
if (row >= rowNum || block >= blockNum) if (row >= rowNum || block >= blockNum)
return; return;
if (threadIdx.x == 0) if (threadIdx.x == 0)
bv[threadIdx.y] = b[row]; bv[threadIdx.y] = b[row];
__syncthreads(); __syncthreads();
int offset = block * blockSize + row * colNum + col; int offset = block * blockSize + row * colNum + col;
if (betaFired) if (betaFired)
c[offset] = a[offset] - bv[threadIdx.y] * beta; c[offset] = a[offset] - bv[threadIdx.y] * beta;
else else
c[offset] = a[offset] - bv[threadIdx.y]; c[offset] = a[offset] - bv[threadIdx.y];
} }
/* /*
...@@ -116,63 +116,63 @@ i.e., a is subtracted with b by broadcasting ...@@ -116,63 +116,63 @@ i.e., a is subtracted with b by broadcasting
*/ */
void _CudaSubDim(const XTensor * a, const XTensor * b, XTensor * c, int n, DTYPE beta) void _CudaSubDim(const XTensor * a, const XTensor * b, XTensor * c, int n, DTYPE beta)
{ {
CheckNTErrors(a && b && c, "Empty tensor input!"); CheckNTErrors(a && b && c, "Empty tensor input!");
CheckNTErrors(a->unitNum == c->unitNum, "Unmatched tensors in subtraction!"); CheckNTErrors(a->unitNum == c->unitNum, "Unmatched tensors in subtraction!");
CheckNTErrors(a->dataType == b->dataType && a->dataType == c->dataType, CheckNTErrors(a->dataType == b->dataType && a->dataType == c->dataType,
"Unmatched data types in subtraction!"); "Unmatched data types in subtraction!");
CheckNTErrors(a->order == c->order, "The input tensors do not have the same order in subtraction!"); CheckNTErrors(a->order == c->order, "The input tensors do not have the same order in subtraction!");
CheckNTErrors(!a->isSparse && !b->isSparse && !c->isSparse, "Dense tensors are required!"); CheckNTErrors(!a->isSparse && !b->isSparse && !c->isSparse, "Dense tensors are required!");
CheckNTErrors(a->dimSize[n] == b->unitNum, "Wrong tensor size!"); CheckNTErrors(a->dimSize[n] == b->unitNum, "Wrong tensor size!");
int stride = 1; int stride = 1;
int blockSize = a->dimSize[n]; int blockSize = a->dimSize[n];
int blockNum = 1; int blockNum = 1;
for (int i = a->order - 1; i >= 0; i--) { for (int i = a->order - 1; i >= 0; i--) {
if (i > n) if (i > n)
stride *= a->dimSize[i]; stride *= a->dimSize[i];
else if (i < n) else if (i < n)
blockNum *= a->dimSize[i]; blockNum *= a->dimSize[i];
} }
int cudaGrids[3]; int cudaGrids[3];
int cudaBlocks[3]; int cudaBlocks[3];
int devIDBackup = 0; int devIDBackup = 0;
ProtectCudaDev(a->devID, devIDBackup); ProtectCudaDev(a->devID, devIDBackup);
if (a->dataType == DEFAULT_DTYPE) { if (a->dataType == DEFAULT_DTYPE) {
if (stride > 1) { if (stride > 1) {
GDevs.GetCudaThread2D(a->devID, stride * blockNum, blockSize, MAX_INT, cudaGrids, cudaBlocks); GDevs.GetCudaThread2D(a->devID, stride * blockNum, blockSize, MAX_INT, cudaGrids, cudaBlocks);
if (beta == (DTYPE)1.0F) if (beta == (DTYPE)1.0F)
KernelSubWithCol<DTYPE, false> <<<dim3(cudaGrids[0], cudaGrids[1]), dim3(cudaBlocks[0], cudaBlocks[1])>>> KernelSubWithCol<DTYPE, false> <<<dim3(cudaGrids[0], cudaGrids[1]), dim3(cudaBlocks[0], cudaBlocks[1])>>>
((DTYPE*)a->data, (DTYPE*)b->data, (DTYPE*)c->data, ((DTYPE*)a->data, (DTYPE*)b->data, (DTYPE*)c->data,
blockSize, stride, blockSize * stride, blockNum, beta); blockSize, stride, blockSize * stride, blockNum, beta);
else else
KernelSubWithCol<DTYPE, true> <<<dim3(cudaGrids[0], cudaGrids[1]), dim3(cudaBlocks[0], cudaBlocks[1])>>> KernelSubWithCol<DTYPE, true> <<<dim3(cudaGrids[0], cudaGrids[1]), dim3(cudaBlocks[0], cudaBlocks[1])>>>
((DTYPE*)a->data, (DTYPE*)b->data, (DTYPE*)c->data, ((DTYPE*)a->data, (DTYPE*)b->data, (DTYPE*)c->data,
blockSize, stride, blockSize * stride, blockNum, beta); blockSize, stride, blockSize * stride, blockNum, beta);
} }
else if (stride == 1) { else if (stride == 1) {
GDevs.GetCudaThread2D(a->devID, blockSize, blockNum, MAX_INT, cudaGrids, cudaBlocks); GDevs.GetCudaThread2D(a->devID, blockSize, blockNum, MAX_INT, cudaGrids, cudaBlocks);
if (beta == (DTYPE)1.0F) if (beta == (DTYPE)1.0F)
KernelSubWithRow<DTYPE, false> <<<dim3(cudaGrids[0], cudaGrids[1]), dim3(cudaBlocks[0], cudaBlocks[1]) >> > KernelSubWithRow<DTYPE, false> <<<dim3(cudaGrids[0], cudaGrids[1]), dim3(cudaBlocks[0], cudaBlocks[1]) >> >
((DTYPE*)a->data, (DTYPE*)b->data, (DTYPE*)c->data, ((DTYPE*)a->data, (DTYPE*)b->data, (DTYPE*)c->data,
blockNum, blockSize, beta); blockNum, blockSize, beta);
else else
KernelSubWithRow<DTYPE, true> <<<dim3(cudaGrids[0], cudaGrids[1]), dim3(cudaBlocks[0], cudaBlocks[1]) >> > KernelSubWithRow<DTYPE, true> <<<dim3(cudaGrids[0], cudaGrids[1]), dim3(cudaBlocks[0], cudaBlocks[1]) >> >
((DTYPE*)a->data, (DTYPE*)b->data, (DTYPE*)c->data, ((DTYPE*)a->data, (DTYPE*)b->data, (DTYPE*)c->data,
blockNum, blockSize, beta); blockNum, blockSize, beta);
} }
else { else {
ShowNTErrors("Something is wrong!"); ShowNTErrors("Something is wrong!");
} }
} }
else { else {
ShowNTErrors("TODO!"); ShowNTErrors("TODO!");
} }
BacktoCudaDev(a->devID, devIDBackup); BacktoCudaDev(a->devID, devIDBackup);
} }
#endif #endif
......
...@@ -40,7 +40,7 @@ XTensor SubDim(const XTensor &a, const XTensor &b, int n, DTYPE beta = (DTYPE)1. ...@@ -40,7 +40,7 @@ XTensor SubDim(const XTensor &a, const XTensor &b, int n, DTYPE beta = (DTYPE)1.
/* tensor subtraction c = a - b * \beta where the size of b is equal to the n-th dimension of a, /* tensor subtraction c = a - b * \beta where the size of b is equal to the n-th dimension of a,
i.e., a is subtracted with b by broadcasting*/ i.e., a is subtracted with b by broadcasting*/
void SubDim(const XTensor &a, const XTensor &b, XTensor &c, int n, DTYPE beta = (DTYPE)1.0, bool requireLink = false); void SubDim(const XTensor &a, const XTensor &b, XTensor &c, int n, DTYPE beta = (DTYPE)1.0);
} // namespace nts(NiuTrans.Tensor) } // namespace nts(NiuTrans.Tensor)
......
...@@ -132,6 +132,19 @@ void _SumMe(XTensor * a, const XTensor * b, DTYPE beta) ...@@ -132,6 +132,19 @@ void _SumMe(XTensor * a, const XTensor * b, DTYPE beta)
_Sum(a, b, a, beta); _Sum(a, b, a, beta);
} }
/*
tensor summation a = a + b * \beta (do it on site)
keep the result in the tensor a and return nothing
>> a - a tensor
>> b - another tensor
>> beta - the scaling factor
*/
void SumMe(XTensor& a, const XTensor& b, DTYPE beta)
{
_Sum(&a, &b, &a, beta);
}
/* /*
return a dimension if the sum is performed as SumDim (in more details in SumDim.h) return a dimension if the sum is performed as SumDim (in more details in SumDim.h)
>> a - a tensor >> a - a tensor
...@@ -207,9 +220,8 @@ tensor summation c = a + b * \beta ...@@ -207,9 +220,8 @@ tensor summation c = a + b * \beta
>> a - a tensor >> a - a tensor
>> b - another tensor >> b - another tensor
>> beta - the scaling factor >> beta - the scaling factor
>> requireLink - if add operation to network
*/ */
void Sum(const XTensor &a, const XTensor &b, XTensor &c, DTYPE beta, bool requireLink) void Sum(const XTensor &a, const XTensor &b, XTensor &c, DTYPE beta)
{ {
if (!c.isInit || !XTensor::IsSameShaped(&a, &c)) { if (!c.isInit || !XTensor::IsSameShaped(&a, &c)) {
InitTensor(&c, &a); InitTensor(&c, &a);
...@@ -221,7 +233,7 @@ void Sum(const XTensor &a, const XTensor &b, XTensor &c, DTYPE beta, bool requir ...@@ -221,7 +233,7 @@ void Sum(const XTensor &a, const XTensor &b, XTensor &c, DTYPE beta, bool requir
/* call _Sum function */ /* call _Sum function */
_Sum(&a, &b, &c, beta); _Sum(&a, &b, &c, beta);
if (requireLink) { if (c.enableGrad) {
/* tensor connections */ /* tensor connections */
XLink::MakeLink(&a, &b, &c, MATH_SUM); XLink::MakeLink(&a, &b, &c, MATH_SUM);
XLink::AddParamToHead(&c, beta); XLink::AddParamToHead(&c, beta);
...@@ -231,7 +243,7 @@ void Sum(const XTensor &a, const XTensor &b, XTensor &c, DTYPE beta, bool requir ...@@ -231,7 +243,7 @@ void Sum(const XTensor &a, const XTensor &b, XTensor &c, DTYPE beta, bool requir
/* call _SumDim function */ /* call _SumDim function */
_SumDim(&a, &b, &c, n, beta); _SumDim(&a, &b, &c, n, beta);
if (requireLink) { if (c.enableGrad) {
/* tensor connections */ /* tensor connections */
XLink::MakeLink(&a, &b, &c, MATH_SUMDIM); XLink::MakeLink(&a, &b, &c, MATH_SUMDIM);
XLink::AddParamToHeadInt(&c, n); XLink::AddParamToHeadInt(&c, n);
......
...@@ -34,6 +34,7 @@ tensor summation a = a + b * \beta ...@@ -34,6 +34,7 @@ tensor summation a = a + b * \beta
keep the result in the input tensor a and return nothing keep the result in the input tensor a and return nothing
*/ */
void _SumMe(XTensor * a, const XTensor * b, DTYPE beta = (DTYPE)1.0); void _SumMe(XTensor * a, const XTensor * b, DTYPE beta = (DTYPE)1.0);
void SumMe(XTensor & a, const XTensor & b, DTYPE beta = (DTYPE)1.0);
/* /*
tensor summation c = a + b * \beta tensor summation c = a + b * \beta
...@@ -42,7 +43,7 @@ make a new tensor c to keep the result and return it ...@@ -42,7 +43,7 @@ make a new tensor c to keep the result and return it
XTensor Sum(const XTensor &a, const XTensor &b, DTYPE beta = (DTYPE)1.0); XTensor Sum(const XTensor &a, const XTensor &b, DTYPE beta = (DTYPE)1.0);
/* tensor summation c = a + b * \beta */ /* tensor summation c = a + b * \beta */
void Sum(const XTensor &a, const XTensor &b, XTensor &c, DTYPE beta = (DTYPE)1.0, bool requireLink = false); void Sum(const XTensor &a, const XTensor &b, XTensor &c, DTYPE beta = (DTYPE)1.0);
} // namespace nts(NiuTrans.Tensor) } // namespace nts(NiuTrans.Tensor)
......
...@@ -200,9 +200,8 @@ i.e., a is summed with b by broadcasting ...@@ -200,9 +200,8 @@ i.e., a is summed with b by broadcasting
>> c - where we put a+b*\beta. we save it in a if c is NULL >> c - where we put a+b*\beta. we save it in a if c is NULL
>> n - the dimension index >> n - the dimension index
>> beta - the scaling factor >> beta - the scaling factor
>> requireLink - if add operation to network
*/ */
void SumDim(const XTensor &a, const XTensor &b, XTensor &c, int n, DTYPE beta, bool requireLink) void SumDim(const XTensor &a, const XTensor &b, XTensor &c, int n, DTYPE beta)
{ {
if (!c.isInit || !XTensor::IsSameShaped(&a, &c)) { if (!c.isInit || !XTensor::IsSameShaped(&a, &c)) {
InitTensor(&c, &a); InitTensor(&c, &a);
...@@ -211,7 +210,7 @@ void SumDim(const XTensor &a, const XTensor &b, XTensor &c, int n, DTYPE beta, b ...@@ -211,7 +210,7 @@ void SumDim(const XTensor &a, const XTensor &b, XTensor &c, int n, DTYPE beta, b
/* call _SumDim function */ /* call _SumDim function */
_SumDim(&a, &b, &c, n, beta); _SumDim(&a, &b, &c, n, beta);
if (requireLink) { if (c.enableGrad) {
/* tensor connections */ /* tensor connections */
XLink::MakeLink(&a, &b, &c, MATH_SUMDIM); XLink::MakeLink(&a, &b, &c, MATH_SUMDIM);
XLink::AddParamToHeadInt(&c, n); XLink::AddParamToHeadInt(&c, n);
...@@ -368,9 +367,8 @@ c = a + b * \beta ...@@ -368,9 +367,8 @@ c = a + b * \beta
>> b - another tensor that would be broadcasted >> b - another tensor that would be broadcasted
>> c - the resulting tensor >> c - the resulting tensor
>> beta - the scaling factor >> beta - the scaling factor
>> requireLink - if add operation to network
*/ */
void SumBroadcast(const XTensor &a, const XTensor &b, XTensor &c, DTYPE beta, bool requireLink) void SumBroadcast(const XTensor &a, const XTensor &b, XTensor &c, DTYPE beta)
{ {
if (!c.isInit || !XTensor::IsSameShaped(&a, &c)) { if (!c.isInit || !XTensor::IsSameShaped(&a, &c)) {
InitTensor(&c, &a); InitTensor(&c, &a);
...@@ -379,7 +377,7 @@ void SumBroadcast(const XTensor &a, const XTensor &b, XTensor &c, DTYPE beta, bo ...@@ -379,7 +377,7 @@ void SumBroadcast(const XTensor &a, const XTensor &b, XTensor &c, DTYPE beta, bo
/* call _SumBroadcast function */ /* call _SumBroadcast function */
_SumBroadcast(&a, &b, &c, beta); _SumBroadcast(&a, &b, &c, beta);
if (requireLink) { if (c.enableGrad) {
/* tensor connections */ /* tensor connections */
XLink::MakeLink(&a, &b, &c, MATH_SUMBROADCAST); XLink::MakeLink(&a, &b, &c, MATH_SUMBROADCAST);
XLink::AddParamToHead(&c, beta); XLink::AddParamToHead(&c, beta);
......
...@@ -44,7 +44,7 @@ XTensor SumDim(const XTensor &a, const XTensor &b, int n, DTYPE beta = (DTYPE)1. ...@@ -44,7 +44,7 @@ XTensor SumDim(const XTensor &a, const XTensor &b, int n, DTYPE beta = (DTYPE)1.
/* tensor summation c = a + b * \beta where the size of b is equal to the n-th dimension of a, /* tensor summation c = a + b * \beta where the size of b is equal to the n-th dimension of a,
i.e., a is summed with b by broadcasting */ i.e., a is summed with b by broadcasting */
void SumDim(const XTensor &a, const XTensor &b, XTensor &c, int n, DTYPE beta = (DTYPE)1.0, bool requireLink = false); void SumDim(const XTensor &a, const XTensor &b, XTensor &c, int n, DTYPE beta = (DTYPE)1.0);
/* tensor broadcast summation c = a + b * \beta where some of dimensions of b can be of size 1 */ /* tensor broadcast summation c = a + b * \beta where some of dimensions of b can be of size 1 */
void _SumBroadcast(const XTensor * a, const XTensor * b, XTensor * c, DTYPE beta = (DTYPE)1.0); void _SumBroadcast(const XTensor * a, const XTensor * b, XTensor * c, DTYPE beta = (DTYPE)1.0);
...@@ -54,7 +54,7 @@ void _SumBroadcast(const XTensor * a, const XTensor * b, XTensor * c, DTYPE beta ...@@ -54,7 +54,7 @@ void _SumBroadcast(const XTensor * a, const XTensor * b, XTensor * c, DTYPE beta
XTensor SumBroadcast(const XTensor &a, const XTensor &b, DTYPE beta = (DTYPE)1.0); XTensor SumBroadcast(const XTensor &a, const XTensor &b, DTYPE beta = (DTYPE)1.0);
/* tensor broadcast summation c = a + b * \beta where some of dimensions of b can be of size 1 */ /* tensor broadcast summation c = a + b * \beta where some of dimensions of b can be of size 1 */
void SumBroadcast(const XTensor &a, const XTensor &b, XTensor &c, DTYPE beta = (DTYPE)1.0, bool requireLink = false); void SumBroadcast(const XTensor &a, const XTensor &b, XTensor &c, DTYPE beta = (DTYPE)1.0);
} // namespace nts(NiuTrans.Tensor) } // namespace nts(NiuTrans.Tensor)
......
...@@ -48,12 +48,12 @@ void _MatrixMULCPU(const XTensor * a, MATRIX_TRANS_TYPE transposedA, ...@@ -48,12 +48,12 @@ void _MatrixMULCPU(const XTensor * a, MATRIX_TRANS_TYPE transposedA,
CheckNTErrors((c->dataType == DEFAULT_DTYPE), "TODO!"); CheckNTErrors((c->dataType == DEFAULT_DTYPE), "TODO!");
#if defined(USE_BLAS) #if defined(USE_BLAS)
int an = a->dimSize[0]; int an = a->dimSize[0];
int am = a->dimSize[1]; int am = a->dimSize[1];
int bn = b->dimSize[0]; int bn = b->dimSize[0];
int bm = b->dimSize[1]; int bm = b->dimSize[1];
int cn = c->dimSize[0]; int cn = c->dimSize[0];
int cm = c->dimSize[1]; int cm = c->dimSize[1];
if (transposedA == X_NOTRANS && transposedB == X_NOTRANS) if (transposedA == X_NOTRANS && transposedB == X_NOTRANS)
GEMM(CblasRowMajor, CblasNoTrans, CblasNoTrans, cn, cm, am, alpha, (DTYPE*)a->data, am, (DTYPE*)b->data, bm, beta, (DTYPE*)c->data, cm); GEMM(CblasRowMajor, CblasNoTrans, CblasNoTrans, cn, cm, am, alpha, (DTYPE*)a->data, am, (DTYPE*)b->data, bm, beta, (DTYPE*)c->data, cm);
......
...@@ -126,13 +126,13 @@ XTensor funcName(const XTensor &a, float num) \ ...@@ -126,13 +126,13 @@ XTensor funcName(const XTensor &a, float num) \
} \ } \
#define SIMPLE_BINARY_FUNCTION_VOID(funcName, _funcName, operationId) \ #define SIMPLE_BINARY_FUNCTION_VOID(funcName, _funcName, operationId) \
void funcName(const XTensor &a, XTensor &b, float num, bool requireLink) \ void funcName(const XTensor &a, XTensor &b, float num) \
{ \ { \
if (!b.isInit || !XTensor::IsSameShaped(&a, &b)) { \ if (!b.isInit || !XTensor::IsSameShaped(&a, &b)) { \
InitTensor(&b, &a); \ InitTensor(&b, &a); \
} \ } \
_funcName(&a, &b, num); \ _funcName(&a, &b, num); \
if (requireLink) { \ if (b.enableGrad) { \
XLink::MakeLink(&a, NULL, &b, operationId); \ XLink::MakeLink(&a, NULL, &b, operationId); \
} \ } \
} \ } \
...@@ -165,7 +165,7 @@ SIMPLE_BINARY_FUNCTION(Shift, _Shift, MATH_SHIFT) ...@@ -165,7 +165,7 @@ SIMPLE_BINARY_FUNCTION(Shift, _Shift, MATH_SHIFT)
SIMPLE_BINARY_FUNCTION_VOID(Shift, _Shift, MATH_SHIFT) SIMPLE_BINARY_FUNCTION_VOID(Shift, _Shift, MATH_SHIFT)
_SIMPLE_BINARY_FUNCTION_INT(_Mod, _CudaMod, mod) _SIMPLE_BINARY_FUNCTION_INT(_Mod, _CudaMod, mod)
SIMPLE_BINARY_FUNCTION_ME_INT(_ModMe, _Mod) SIMPLE_BINARY_FUNCTION_ME_INT(ModMe, _Mod)
SIMPLE_BINARY_FUNCTION_INT(Mod, _Mod) SIMPLE_BINARY_FUNCTION_INT(Mod, _Mod)
#else #else
......
...@@ -37,15 +37,22 @@ void _Scale(const XTensor * a, XTensor * b, float scale); ...@@ -37,15 +37,22 @@ void _Scale(const XTensor * a, XTensor * b, float scale);
scale up tensor entires (on site) scale up tensor entires (on site)
b = a * scale b = a * scale
*/ */
void _ScaleMe(XTensor & a, int scale); void _ScaleMe(XTensor * a, int scale);
void _ScaleMe(XTensor & a, float scale); void _ScaleMe(XTensor * a, float scale);
/*
scale up tensor entires (on site)
b = a * scale
*/
void ScaleMe(XTensor & a, int scale);
void ScaleMe(XTensor & a, float scale);
/* /*
scale up tensor entires scale up tensor entires
b = a * scale b = a * scale
*/ */
void Scale(const XTensor & a, XTensor &b, int scale); void Scale(const XTensor & a, XTensor &b, int scale);
void Scale(const XTensor & a, XTensor &b, float scale, bool requireLink = false); void Scale(const XTensor & a, XTensor &b, float scale);
/* /*
scale up tensor entires (return an XTensor structure) scale up tensor entires (return an XTensor structure)
...@@ -64,15 +71,22 @@ void _Descale(const XTensor * a, XTensor * b, float scale); ...@@ -64,15 +71,22 @@ void _Descale(const XTensor * a, XTensor * b, float scale);
descale tensor entires (on site) descale tensor entires (on site)
b = a / scale b = a / scale
*/ */
void _DescaleMe(XTensor & a, int scale); void _DescaleMe(XTensor * a, int scale);
void _DescaleMe(XTensor & a, float scale); void _DescaleMe(XTensor * a, float scale);
/*
descale tensor entires (on site)
b = a / scale
*/
void DescaleMe(XTensor & a, int scale);
void DescaleMe(XTensor & a, float scale);
/* /*
descale tensor entires descale tensor entires
b = a / scale b = a / scale
*/ */
void Descale(const XTensor & a, XTensor & b, int scale); void Descale(const XTensor & a, XTensor & b, int scale);
void Descale(const XTensor & a, XTensor & b, float scale, bool requireLink = false); void Descale(const XTensor & a, XTensor & b, float scale);
/* /*
descale tensor entires (return an XTensor structure) descale tensor entires (return an XTensor structure)
...@@ -91,15 +105,22 @@ void _Shift(const XTensor * a, XTensor * b, float shift); ...@@ -91,15 +105,22 @@ void _Shift(const XTensor * a, XTensor * b, float shift);
shift tensor entires (on site) shift tensor entires (on site)
b = a + shift b = a + shift
*/ */
void _ShiftMe(XTensor & a, int shift); void _ShiftMe(XTensor * a, int shift);
void _ShiftMe(XTensor & a, float shift); void _ShiftMe(XTensor * a, float shift);
/*
shift tensor entires (on site)
b = a + shift
*/
void ShiftMe(XTensor & a, int shift);
void ShiftMe(XTensor & a, float shift);
/* /*
shift tensor entires shift tensor entires
b = a + shift b = a + shift
*/ */
void Shift(const XTensor & a, XTensor & b, int shift); void Shift(const XTensor & a, XTensor & b, int shift);
void Shift(const XTensor & a, XTensor & b, float shift, bool requireLink = false); void Shift(const XTensor & a, XTensor & b, float shift);
/* /*
shift tensor entires (return an XTensor structure) shift tensor entires (return an XTensor structure)
...@@ -118,7 +139,13 @@ void _Mod(const XTensor * a, XTensor * b, int base); ...@@ -118,7 +139,13 @@ void _Mod(const XTensor * a, XTensor * b, int base);
mod tensor entires (on site) mod tensor entires (on site)
b = a % mod b = a % mod
*/ */
void _ModMe(XTensor & a, int base); void _ModMe(XTensor * a, int base);
/*
mod tensor entires (on site)
b = a % mod
*/
void ModMe(XTensor & a, int base);
/* /*
mod tensor entires mod tensor entires
......
...@@ -36,26 +36,26 @@ set every entry to its clip value ...@@ -36,26 +36,26 @@ set every entry to its clip value
void _Clip(const XTensor * a, XTensor * b, DTYPE lower, DTYPE upper) void _Clip(const XTensor * a, XTensor * b, DTYPE lower, DTYPE upper)
{ {
#ifdef USE_CUDA #ifdef USE_CUDA
/* run it on GPUs */ /* run it on GPUs */
if (a->devID >= 0) { if (a->devID >= 0) {
_CudaClip(a, b, lower, upper); _CudaClip(a, b, lower, upper);
return; return;
} }
#endif #endif
CheckNTErrors((XTensor::IsSameShaped(a, b)), "Input tensors should have the same type!"); CheckNTErrors((XTensor::IsSameShaped(a, b)), "Input tensors should have the same type!");
CheckNTErrors((a->dataType == DEFAULT_DTYPE), "TODO!"); CheckNTErrors((a->dataType == DEFAULT_DTYPE), "TODO!");
DTYPE * d = (DTYPE*)a->data; DTYPE * d = (DTYPE*)a->data;
DTYPE * db = (DTYPE*)b->data; DTYPE * db = (DTYPE*)b->data;
for (int i = 0; i < a->unitNum; i++) { for (int i = 0; i < a->unitNum; i++) {
if (d[i] > upper) if (d[i] > upper)
db[i] = upper; db[i] = upper;
else if (d[i] < lower) else if (d[i] < lower)
db[i] = lower; db[i] = lower;
else else
db[i] = d[i]; db[i] = d[i];
} }
} }
/* /*
...@@ -67,7 +67,19 @@ keep the result in the input tensor a and return nothing ...@@ -67,7 +67,19 @@ keep the result in the input tensor a and return nothing
*/ */
void _ClipMe(XTensor * a, DTYPE lower, DTYPE upper) void _ClipMe(XTensor * a, DTYPE lower, DTYPE upper)
{ {
_Clip(a, a, lower, upper); _Clip(a, a, lower, upper);
}
/*
set every entry to its clip value (do it on site)
keep the result in the input tensor a and return nothing
>> a - the tensor we are processing
>> lower - the lower border
>> upper - the upper border
*/
void ClipMe(XTensor& a, DTYPE lower, DTYPE upper)
{
_Clip(&a, &a, lower, upper);
} }
/* /*
...@@ -80,21 +92,21 @@ make a new tensor to keep the result and return it ...@@ -80,21 +92,21 @@ make a new tensor to keep the result and return it
*/ */
XTensor Clip(const XTensor & a, DTYPE lower, DTYPE upper) XTensor Clip(const XTensor & a, DTYPE lower, DTYPE upper)
{ {
XTensor b(&a); XTensor b(&a);
b.SetTMPFlag(); b.SetTMPFlag();
/* call _Clip function */ /* call _Clip function */
_Clip(&a, &b, lower, upper); _Clip(&a, &b, lower, upper);
/* tensor connections */ /* tensor connections */
XLink::MakeLink(&a, NULL, &b, MATH_CLIP); XLink::MakeLink(&a, NULL, &b, MATH_CLIP);
XLink::AddParamToHead(&b, lower); XLink::AddParamToHead(&b, lower);
XLink::AddParamToHead(&b, upper); XLink::AddParamToHead(&b, upper);
return b; return b;
} }
void Clip(const XTensor & a, XTensor & b, DTYPE lower, DTYPE upper, bool requireLink) void Clip(const XTensor & a, XTensor & b, DTYPE lower, DTYPE upper)
{ {
if (!b.isInit || !XTensor::IsSameShaped(&a, &b)) { if (!b.isInit || !XTensor::IsSameShaped(&a, &b)) {
InitTensor(&b, &a); InitTensor(&b, &a);
...@@ -103,7 +115,7 @@ void Clip(const XTensor & a, XTensor & b, DTYPE lower, DTYPE upper, bool require ...@@ -103,7 +115,7 @@ void Clip(const XTensor & a, XTensor & b, DTYPE lower, DTYPE upper, bool require
/* call _Clip function */ /* call _Clip function */
_Clip(&a, &b, lower, upper); _Clip(&a, &b, lower, upper);
if (requireLink) { if (b.enableGrad) {
/* tensor connections */ /* tensor connections */
XLink::MakeLink(&a, NULL, &b, MATH_CLIP); XLink::MakeLink(&a, NULL, &b, MATH_CLIP);
XLink::AddParamToHead(&b, lower); XLink::AddParamToHead(&b, lower);
......
...@@ -36,18 +36,18 @@ set each entry to its clip value (CUDA Kernel) ...@@ -36,18 +36,18 @@ set each entry to its clip value (CUDA Kernel)
>> size - size of the data array >> size - size of the data array
*/ */
__global__ __global__
void KernelClip(DTYPE * a, DTYPE * b, DTYPE lower, DTYPE upper, int size) void KernelClip(DTYPE * a, DTYPE * b, DTYPE lower, DTYPE upper, int size)
{ {
int i = blockDim.x * blockIdx.x + threadIdx.x; int i = blockDim.x * blockIdx.x + threadIdx.x;
if (i < size) { if (i < size) {
if (a[i] > upper) if (a[i] > upper)
b[i] = upper; b[i] = upper;
else if (a[i] < lower) else if (a[i] < lower)
b[i] = lower; b[i] = lower;
else else
b[i] = a[i]; b[i] = a[i];
} }
} }
/* /*
...@@ -62,7 +62,7 @@ This is for float16 computation ...@@ -62,7 +62,7 @@ This is for float16 computation
__global__ __global__
void KernelClip(__half * a, __half * b, DTYPE lower, DTYPE upper, int size) void KernelClip(__half * a, __half * b, DTYPE lower, DTYPE upper, int size)
{ {
return; return;
} }
/* /*
...@@ -74,31 +74,31 @@ set each entry to its clip value ...@@ -74,31 +74,31 @@ set each entry to its clip value
*/ */
void _CudaClip(const XTensor * a, XTensor * b, DTYPE lower, DTYPE upper) void _CudaClip(const XTensor * a, XTensor * b, DTYPE lower, DTYPE upper)
{ {
CheckNTErrors((XTensor::IsSameShaped(a, b)), "Input tensors should have the same type!"); CheckNTErrors((XTensor::IsSameShaped(a, b)), "Input tensors should have the same type!");
CheckNTErrors((a->isSparse == false), "TODO!"); CheckNTErrors((a->isSparse == false), "TODO!");
int gridSize[3]; int gridSize[3];
int blockSize[3]; int blockSize[3];
GDevs.GetCudaThread(a->devID, a->unitNum, gridSize, blockSize); GDevs.GetCudaThread(a->devID, a->unitNum, gridSize, blockSize);
dim3 blocks(gridSize[0]); dim3 blocks(gridSize[0]);
dim3 threads(blockSize[0]); dim3 threads(blockSize[0]);
int devIDBackup; int devIDBackup;
ProtectCudaDev(a->devID, devIDBackup); ProtectCudaDev(a->devID, devIDBackup);
if (a->dataType == DEFAULT_DTYPE) { if (a->dataType == DEFAULT_DTYPE) {
KernelClip << <blocks, threads >> >((DTYPE*)a->data, (DTYPE*)b->data, lower, upper, a->unitNum); KernelClip << <blocks, threads >> >((DTYPE*)a->data, (DTYPE*)b->data, lower, upper, a->unitNum);
} }
else if (a->dataType == X_FLOAT16) { else if (a->dataType == X_FLOAT16) {
KernelClip << <blocks, threads >> >((__half*)a->data, (__half*)b->data, lower, upper, a->unitNum); KernelClip << <blocks, threads >> >((__half*)a->data, (__half*)b->data, lower, upper, a->unitNum);
} }
else { else {
ShowNTErrors("TODO!"); ShowNTErrors("TODO!");
} }
BacktoCudaDev(a->devID, devIDBackup); BacktoCudaDev(a->devID, devIDBackup);
} }
/* /*
......
...@@ -33,11 +33,15 @@ void _Clip(const XTensor * a, XTensor * b, DTYPE lower, DTYPE upper); ...@@ -33,11 +33,15 @@ void _Clip(const XTensor * a, XTensor * b, DTYPE lower, DTYPE upper);
keep the result in the input tensor a and return nothing */ keep the result in the input tensor a and return nothing */
void _ClipMe(XTensor * a, DTYPE lower, DTYPE upper); void _ClipMe(XTensor * a, DTYPE lower, DTYPE upper);
/* set every entry to its clip value (do it on site)
keep the result in the input tensor a and return nothing */
void ClipMe(XTensor & a, DTYPE lower, DTYPE upper);
/* set every entry to its clip value (return an XTensor structure) /* set every entry to its clip value (return an XTensor structure)
make a new tensor to keep the result and return it */ make a new tensor to keep the result and return it */
XTensor Clip(const XTensor & a, DTYPE lower, DTYPE upper); XTensor Clip(const XTensor & a, DTYPE lower, DTYPE upper);
void Clip(const XTensor & a, XTensor & b, DTYPE lower, DTYPE upper, bool requireLink = false); void Clip(const XTensor & a, XTensor & b, DTYPE lower, DTYPE upper);
/* /*
backward of Clip function backward of Clip function
......
...@@ -32,6 +32,9 @@ void _Equal(const XTensor * a, XTensor * b, DTYPE value); ...@@ -32,6 +32,9 @@ void _Equal(const XTensor * a, XTensor * b, DTYPE value);
/* check whether every entry is equal to the given value (do it on site) */ /* check whether every entry is equal to the given value (do it on site) */
void _EqualMe(XTensor * a, DTYPE value); void _EqualMe(XTensor * a, DTYPE value);
/* check whether every entry is equal to the given value (do it on site) */
void EqualMe(XTensor & a, DTYPE value);
/* check whether every entry is equal to the given value (return an XTensor structure) */ /* check whether every entry is equal to the given value (return an XTensor structure) */
XTensor Equal(const XTensor & a, DTYPE value); XTensor Equal(const XTensor & a, DTYPE value);
...@@ -41,6 +44,9 @@ void _NotEqual(const XTensor * a, XTensor * b, DTYPE value); ...@@ -41,6 +44,9 @@ void _NotEqual(const XTensor * a, XTensor * b, DTYPE value);
/* check whether every entry is not equal to the given value (do it on site) */ /* check whether every entry is not equal to the given value (do it on site) */
void _NotEqualMe(XTensor * a, DTYPE value); void _NotEqualMe(XTensor * a, DTYPE value);
/* check whether every entry is not equal to the given value (do it on site) */
void NotEqualMe(XTensor & a, DTYPE value);
/* check whether every entry is not equal to the given value (return an XTensor structure) */ /* check whether every entry is not equal to the given value (return an XTensor structure) */
XTensor NotEqual(const XTensor & a, DTYPE value); XTensor NotEqual(const XTensor & a, DTYPE value);
......
...@@ -44,7 +44,7 @@ where a and b are the scalar and bias respectively, and \epsilon is the adjustme ...@@ -44,7 +44,7 @@ where a and b are the scalar and bias respectively, and \epsilon is the adjustme
*/ */
void _Normalize(const XTensor * input, XTensor * output, int dim, const XTensor * mean, const XTensor * var, const XTensor * a, const XTensor * b, DTYPE epsilon) void _Normalize(const XTensor * input, XTensor * output, int dim, const XTensor * mean, const XTensor * var, const XTensor * a, const XTensor * b, DTYPE epsilon)
{ {
int dimRDI = input->order - dim - 1; int dimRDI = input->order - dim - 1;
CheckNTErrors((XTensor::IsSameShaped(input, output)), "Unmatched input tensors!"); CheckNTErrors((XTensor::IsSameShaped(input, output)), "Unmatched input tensors!");
CheckNTErrors((XTensor::IsSameShaped(a, b)), "Unmatched input tensors"); CheckNTErrors((XTensor::IsSameShaped(a, b)), "Unmatched input tensors");
CheckNTErrors((XTensor::IsSameShaped(mean, var)), "Unmatched input tensors"); CheckNTErrors((XTensor::IsSameShaped(mean, var)), "Unmatched input tensors");
...@@ -113,6 +113,27 @@ void _NormalizeMe(XTensor * input, int dim, const XTensor * mean, const XTensor ...@@ -113,6 +113,27 @@ void _NormalizeMe(XTensor * input, int dim, const XTensor * mean, const XTensor
{ {
_Normalize(input, input, dim, mean, var, a, b, epsilon); _Normalize(input, input, dim, mean, var, a, b, epsilon);
} }
/*
normalized the data with normal distribution (do it on site)
keep the result in the input tensor and return nothing
For an input x, x = a * (x-mean)/sqrt(variance+\epsilon) + b
where a and b are the scalar and bias respectively, and \epsilon is the adjustment parameter.
>> input - the input tensor
>> dim - dimension alone which we generate the mean and variance
>> mean - the mean of the input
>> var - the variance of the input
>> a - the scalar
>> b - the bias
>> epsilon - a parameter
*/
void NormalizeMe(XTensor& input, int dim, const XTensor& mean, const XTensor& var, const XTensor& a, const XTensor& b, DTYPE epsilon)
{
_Normalize(&input, &input, dim, &mean, &var, &a, &b, epsilon);
}
/* /*
normalized the data with normal distribution (return an XTensor structure) normalized the data with normal distribution (return an XTensor structure)
make a new tensor to keep the result and return it make a new tensor to keep the result and return it
......
...@@ -95,8 +95,8 @@ void _CudaNormalize(const XTensor * input, XTensor * output, int dim, ...@@ -95,8 +95,8 @@ void _CudaNormalize(const XTensor * input, XTensor * output, int dim,
{ {
CheckNTErrors((input->dataType == DEFAULT_DTYPE), "TODO!"); CheckNTErrors((input->dataType == DEFAULT_DTYPE), "TODO!");
int dimRDI = input->order - dim - 1; int dimRDI = input->order - dim - 1;
int stride = 1; int stride = 1;
int strideNum = input->dimSizeRDI[dimRDI]; int strideNum = input->dimSizeRDI[dimRDI];
int blockNum = 1; int blockNum = 1;
for (int i = 0; i < input->order; i++) { for (int i = 0; i < input->order; i++) {
......
...@@ -42,6 +42,14 @@ where a and b are the scalar and bias respectively, and \epsilon is the adjustme ...@@ -42,6 +42,14 @@ where a and b are the scalar and bias respectively, and \epsilon is the adjustme
void _NormalizeMe(XTensor * input, int dim, const XTensor * mean, const XTensor * var, const XTensor * a, const XTensor * b, DTYPE epsilon); void _NormalizeMe(XTensor * input, int dim, const XTensor * mean, const XTensor * var, const XTensor * a, const XTensor * b, DTYPE epsilon);
/* /*
normalized the data with normal distribution (do it on site)
keep the result in the input tenosr and return nothing
For an input x, x = a * (x-mean)/sqrt(variance+\epsilon) + b
where a and b are the scalar and bias respectively, and \epsilon is the adjustment parameter.
*/
void NormalizeMe(XTensor & input, int dim, const XTensor & mean, const XTensor & var, const XTensor & a, const XTensor & b, DTYPE epsilon);
/*
normalized the data with normal distribution (return an XTensor structure) normalized the data with normal distribution (return an XTensor structure)
make a new tensor to keep the result and return it make a new tensor to keep the result and return it
For an input x, y = a * (x-mean)/sqrt(variance+\epsilon) + b For an input x, y = a * (x-mean)/sqrt(variance+\epsilon) + b
......
...@@ -81,6 +81,17 @@ void _PowerMe(XTensor * a, DTYPE p) ...@@ -81,6 +81,17 @@ void _PowerMe(XTensor * a, DTYPE p)
} }
/* /*
get the power(a, p) (do it on site)
keep the result in the input tensor a and return nothing
>> a - the tensor
>> p - parameter
*/
void PowerMe(XTensor& a, DTYPE p)
{
_Power(&a, &a, p);
}
/*
get the power(a, p) (return an XTensor structure) get the power(a, p) (return an XTensor structure)
make a new tensor to keep the result and return it make a new tensor to keep the result and return it
>> a - input tensor >> a - input tensor
...@@ -107,9 +118,8 @@ get the power(a, p) ...@@ -107,9 +118,8 @@ get the power(a, p)
>> a - input tensor >> a - input tensor
>> b - output tensor >> b - output tensor
>> p - parameter >> p - parameter
>> requireLink - if add operation to network
*/ */
void Power(const XTensor & a, XTensor & b, DTYPE p, bool requireLink) void Power(const XTensor & a, XTensor & b, DTYPE p)
{ {
if (!b.isInit || !XTensor::IsSameShaped(&a, &b)) { if (!b.isInit || !XTensor::IsSameShaped(&a, &b)) {
InitTensor(&b, &a); InitTensor(&b, &a);
...@@ -118,7 +128,7 @@ void Power(const XTensor & a, XTensor & b, DTYPE p, bool requireLink) ...@@ -118,7 +128,7 @@ void Power(const XTensor & a, XTensor & b, DTYPE p, bool requireLink)
/* call _Power function */ /* call _Power function */
_Power(&a, &b, p); _Power(&a, &b, p);
if (requireLink) { if (b.enableGrad) {
/* tensor connections */ /* tensor connections */
XLink::MakeLink(&a, NULL, &b, MATH_POWER); XLink::MakeLink(&a, NULL, &b, MATH_POWER);
XLink::AddParamToHead(&b, p); XLink::AddParamToHead(&b, p);
......
...@@ -36,13 +36,19 @@ keep the result in the input tensor a and return nothing ...@@ -36,13 +36,19 @@ keep the result in the input tensor a and return nothing
void _PowerMe(XTensor * a, DTYPE p); void _PowerMe(XTensor * a, DTYPE p);
/* /*
get the power(x, y) (do it on site)
keep the result in the input tensor a and return nothing
*/
void PowerMe(XTensor & a, DTYPE p);
/*
get the power(x, y) (return an XTensor structure) get the power(x, y) (return an XTensor structure)
make a new tensor to keep the result and return it make a new tensor to keep the result and return it
*/ */
XTensor Power(const XTensor & a, DTYPE p); XTensor Power(const XTensor & a, DTYPE p);
/* get the power(x, y) */ /* get the power(x, y) */
void Power(const XTensor & a, XTensor & b, DTYPE p, bool requireLink = false); void Power(const XTensor & a, XTensor & b, DTYPE p);
} // namespace nts(NiuTrans.Tensor) } // namespace nts(NiuTrans.Tensor)
......
...@@ -92,6 +92,21 @@ void _ScaleAndShiftMe(XTensor * a, DTYPE scale, DTYPE shift) ...@@ -92,6 +92,21 @@ void _ScaleAndShiftMe(XTensor * a, DTYPE scale, DTYPE shift)
} }
/* /*
scale and shift all tensor entires (do it on site)
keep the result in the input tensor a and return nothing
a = a * scale + shift
>> a - the input/output tensor
>> scale - the scaler factor
>> shift - the shift factor
*/
void ScaleAndShiftMe(XTensor& a, DTYPE scale, DTYPE shift)
{
_ScaleAndShift(&a, &a, scale, shift);
}
/*
scale and shift all tensor entires (return an XTensor structure) scale and shift all tensor entires (return an XTensor structure)
make a new tensor to keep the result and return it make a new tensor to keep the result and return it
...@@ -127,9 +142,8 @@ b = a * scale + shift ...@@ -127,9 +142,8 @@ b = a * scale + shift
>> b - the output tensor >> b - the output tensor
>> scale - the scaler factor >> scale - the scaler factor
>> shift - the shift factor >> shift - the shift factor
>> requireLink - if add operation to network
*/ */
void ScaleAndShift(const XTensor & a, XTensor & b, DTYPE scale, DTYPE shift, bool requireLink) void ScaleAndShift(const XTensor & a, XTensor & b, DTYPE scale, DTYPE shift)
{ {
if (!b.isInit || !XTensor::IsSameShaped(&a, &b)) { if (!b.isInit || !XTensor::IsSameShaped(&a, &b)) {
InitTensor(&b, &a); InitTensor(&b, &a);
...@@ -138,7 +152,7 @@ void ScaleAndShift(const XTensor & a, XTensor & b, DTYPE scale, DTYPE shift, boo ...@@ -138,7 +152,7 @@ void ScaleAndShift(const XTensor & a, XTensor & b, DTYPE scale, DTYPE shift, boo
/* call _ScaleAndShift function */ /* call _ScaleAndShift function */
_ScaleAndShift(&a, &b, scale, shift); _ScaleAndShift(&a, &b, scale, shift);
if (requireLink) { if (b.enableGrad) {
/* tensor connections */ /* tensor connections */
XLink::MakeLink(&a, NULL, &b, MATH_SCALEANDSHIFT); XLink::MakeLink(&a, NULL, &b, MATH_SCALEANDSHIFT);
XLink::AddParamToHead(&b, scale); XLink::AddParamToHead(&b, scale);
......
...@@ -45,6 +45,13 @@ void _ScaleAndShiftMe(XTensor * a, DTYPE scale, DTYPE shift = 0); ...@@ -45,6 +45,13 @@ void _ScaleAndShiftMe(XTensor * a, DTYPE scale, DTYPE shift = 0);
/* /*
scale and shift all tensor entires scale and shift all tensor entires
keep the result in the input tensor a and return nothing
a = a * scale + shift
*/
void ScaleAndShiftMe(XTensor & a, DTYPE scale, DTYPE shift = 0);
/*
scale and shift all tensor entires
make a new tensor to keep the result and return it make a new tensor to keep the result and return it
b = a * scale + shift b = a * scale + shift
*/ */
...@@ -54,7 +61,7 @@ XTensor ScaleAndShift(const XTensor &a, DTYPE scale, DTYPE shift = 0); ...@@ -54,7 +61,7 @@ XTensor ScaleAndShift(const XTensor &a, DTYPE scale, DTYPE shift = 0);
scale and shift all tensor entires scale and shift all tensor entires
b = a * scale + shift b = a * scale + shift
*/ */
void ScaleAndShift(const XTensor &a, XTensor &b, DTYPE scale, DTYPE shift = 0, bool requireLink = false); void ScaleAndShift(const XTensor &a, XTensor &b, DTYPE scale, DTYPE shift = 0);
} // namespace nts(NiuTrans.Tensor) } // namespace nts(NiuTrans.Tensor)
......
...@@ -34,7 +34,7 @@ DTYPE square(DTYPE x) ...@@ -34,7 +34,7 @@ DTYPE square(DTYPE x)
DTYPE round(DTYPE r) DTYPE round(DTYPE r)
{ {
return (r > 0.0) ? (DTYPE)floor(r + 0.5) : (DTYPE)ceil(r - 0.5); return (r > 0.0) ? (DTYPE)floor(r + 0.5) : (DTYPE)ceil(r - 0.5);
} }
DTYPE isnonzero(DTYPE r) DTYPE isnonzero(DTYPE r)
...@@ -83,13 +83,13 @@ XTensor funcName(const XTensor &a) \ ...@@ -83,13 +83,13 @@ XTensor funcName(const XTensor &a) \
} }
#define SIMPLE_UNARY_FUNCTION_VOID(funcName, _funcName, operationId) \ #define SIMPLE_UNARY_FUNCTION_VOID(funcName, _funcName, operationId) \
void funcName(const XTensor &a, XTensor &b, bool requireLink) \ void funcName(const XTensor &a, XTensor &b) \
{ \ { \
if (!b.isInit || !XTensor::IsSameShaped(&a, &b)) { \ if (!b.isInit || !XTensor::IsSameShaped(&a, &b)) { \
InitTensor(&b, &a); \ InitTensor(&b, &a); \
} \ } \
_funcName(&a, &b); \ _funcName(&a, &b); \
if (requireLink) { \ if (b.enableGrad) { \
XLink::MakeLink(&a, NULL, &b, operationId); \ XLink::MakeLink(&a, NULL, &b, operationId); \
} \ } \
} }
...@@ -189,13 +189,13 @@ XTensor funcName(const XTensor &a) \ ...@@ -189,13 +189,13 @@ XTensor funcName(const XTensor &a) \
return b; \ return b; \
} }
#define SIMPLE_UNARY_FUNCTION_VOID(funcName, _funcName, operationId) \ #define SIMPLE_UNARY_FUNCTION_VOID(funcName, _funcName, operationId) \
void funcName(const XTensor &a, XTensor &b, bool requireLink) \ void funcName(const XTensor &a, XTensor &b) \
{ \ { \
if (!b.isInit || !XTensor::IsSameShaped(&a, &b)) { \ if (!b.isInit || !XTensor::IsSameShaped(&a, &b)) { \
InitTensor(&b, &a); \ InitTensor(&b, &a); \
} \ } \
_funcName(&a, &b); \ _funcName(&a, &b); \
if (requireLink) { \ if (b.enableGrad) { \
XLink::MakeLink(&a, NULL, &b, operationId); \ XLink::MakeLink(&a, NULL, &b, operationId); \
} \ } \
} }
......
...@@ -38,7 +38,7 @@ DTYPE cudasquare(DTYPE x) ...@@ -38,7 +38,7 @@ DTYPE cudasquare(DTYPE x)
__device__ __device__
DTYPE cudaround(DTYPE r) DTYPE cudaround(DTYPE r)
{ {
return (r > 0.0) ? (DTYPE)floor(r + 0.5) : (DTYPE)ceil(r - 0.5); return (r > 0.0) ? (DTYPE)floor(r + 0.5) : (DTYPE)ceil(r - 0.5);
} }
__device__ __device__
......
...@@ -31,110 +31,140 @@ void _Absolute(const XTensor * a, XTensor * b); ...@@ -31,110 +31,140 @@ void _Absolute(const XTensor * a, XTensor * b);
/* set every entry to its absolute value (do it on site) /* set every entry to its absolute value (do it on site)
keep the result in the input tensor a and return nothing */ keep the result in the input tensor a and return nothing */
void _AbsoluteMe(XTensor * a); void _AbsoluteMe(XTensor * a);
/* set every entry to its absolute value (do it on site)
keep the result in the input tensor a and return nothing */
void AbsoluteMe(XTensor & a);
/* set every entry to its absolute value (return an XTensor structure) /* set every entry to its absolute value (return an XTensor structure)
make a new tensor to keep the result and return it */ make a new tensor to keep the result and return it */
XTensor Absolute(const XTensor & a); XTensor Absolute(const XTensor & a);
/* set every entry to its absolute value */ /* set every entry to its absolute value */
void Absolute(const XTensor & a, XTensor & b, bool requireLink = false); void Absolute(const XTensor & a, XTensor & b);
/* set every entry to its ceil value */ /* set every entry to its ceil value */
void _Ceil(const XTensor * a, XTensor * b); void _Ceil(const XTensor * a, XTensor * b);
/* set every entry to its ceil value (do it on site) /* set every entry to its ceil value (do it on site)
keep the result in the input tensor a and return nothing */ keep the result in the input tensor a and return nothing */
void _CeilMe(XTensor * a); void _CeilMe(XTensor * a);
/* set every entry to its ceil value (do it on site)
keep the result in the input tensor a and return nothing */
void CeilMe(XTensor & a);
/* set every entry to its ceil value (return an XTensor structure) /* set every entry to its ceil value (return an XTensor structure)
make a new tensor to keep the result and return it */ make a new tensor to keep the result and return it */
XTensor Ceil(const XTensor & a); XTensor Ceil(const XTensor & a);
/* set every entry to its ceil value */ /* set every entry to its ceil value */
void Ceil(const XTensor & a, XTensor & b, bool requireLink = false); void Ceil(const XTensor & a, XTensor & b);
/* set every entry to its exponent value */ /* set every entry to its exponent value */
void _Exp(const XTensor * a, XTensor * b); void _Exp(const XTensor * a, XTensor * b);
/* set every entry to its exponent value (do it on site) /* set every entry to its exponent value (do it on site)
keep the result in the input tensor a and return nothing */ keep the result in the input tensor a and return nothing */
void _ExpMe(XTensor * a); void _ExpMe(XTensor * a);
/* set every entry to its exponent value (do it on site)
keep the result in the input tensor a and return nothing */
void ExpMe(XTensor & a);
/* set every entry to its exponent value (return an XTensor structure) /* set every entry to its exponent value (return an XTensor structure)
make a new tensor to keep the result and return it */ make a new tensor to keep the result and return it */
XTensor Exp(const XTensor & a); XTensor Exp(const XTensor & a);
/* set every entry to its exponent value */ /* set every entry to its exponent value */
void Exp(const XTensor & a, XTensor & b, bool requireLink = false); void Exp(const XTensor & a, XTensor & b);
/* set every entry to its floor value */ /* set every entry to its floor value */
void _Floor(const XTensor * a, XTensor * b); void _Floor(const XTensor * a, XTensor * b);
/* set every entry to its floor value (do it on site) /* set every entry to its floor value (do it on site)
keep the result in the input tensor a and return nothing */ keep the result in the input tensor a and return nothing */
void _FloorMe(XTensor * a); void _FloorMe(XTensor * a);
/* set every entry to its floor value (do it on site)
keep the result in the input tensor a and return nothing */
void FloorMe(XTensor & a);
/* set every entry to its floor value (return an XTensor structure) /* set every entry to its floor value (return an XTensor structure)
make a new tensor to keep the result and return it */ make a new tensor to keep the result and return it */
XTensor Floor(const XTensor & a); XTensor Floor(const XTensor & a);
/* set every entry to its floor value */ /* set every entry to its floor value */
void Floor(const XTensor & a, XTensor & b, bool requireLink = false); void Floor(const XTensor & a, XTensor & b);
/* if source entry is non-zero, set target entry to be one, otherwise zero */ /* if source entry is non-zero, set target entry to be one, otherwise zero */
void _IsNonZero(const XTensor *a, XTensor *b); void _IsNonZero(const XTensor *a, XTensor *b);
/* if source entry is non-zero, set target entry to be one, otherwise zero (do it on site) /* if source entry is non-zero, set target entry to be one, otherwise zero (do it on site)
keep the result in the input tensor a and return nothing */ keep the result in the input tensor a and return nothing */
void _IsNonZeroMe(XTensor *a); void _IsNonZeroMe(XTensor *a);
/* if source entry is non-zero, set target entry to be one, otherwise zero (do it on site)
keep the result in the input tensor a and return nothing */
void IsNonZeroMe(XTensor &a);
/* if source entry is non-zero, set target entry to be one, otherwise zero (return an XTensor structure) /* if source entry is non-zero, set target entry to be one, otherwise zero (return an XTensor structure)
make a new tensor to keep the result and return it */ make a new tensor to keep the result and return it */
XTensor IsNonZero(const XTensor &a); XTensor IsNonZero(const XTensor &a);
/* if source entry is non-zero, set target entry to be one, otherwise zero */ /* if source entry is non-zero, set target entry to be one, otherwise zero */
void IsNonZero(const XTensor &a, XTensor & b, bool requireLink = false); void IsNonZero(const XTensor &a, XTensor & b);
/* if source entry is zero, set target entry to be one, otherwise zero */ /* if source entry is zero, set target entry to be one, otherwise zero */
void _IsZero(const XTensor *a, XTensor *b); void _IsZero(const XTensor *a, XTensor *b);
/* if source entry is zero, set target entry to be one, otherwise zero (do it on site) /* if source entry is zero, set target entry to be one, otherwise zero (do it on site)
keep the result in the input tensor a and return nothing */ keep the result in the input tensor a and return nothing */
void _IsZeroMe(XTensor *a); void _IsZeroMe(XTensor *a);
/* if source entry is zero, set target entry to be one, otherwise zero (do it on site)
keep the result in the input tensor a and return nothing */
void IsZeroMe(XTensor &a);
/* if source entry is zero, set target entry to be one, otherwise zero (return an XTensor structure) /* if source entry is zero, set target entry to be one, otherwise zero (return an XTensor structure)
make a new tensor to keep the result and return it */ make a new tensor to keep the result and return it */
XTensor IsZero(const XTensor &a); XTensor IsZero(const XTensor &a);
/* if source entry is zero, set target entry to be one, otherwise zero */ /* if source entry is zero, set target entry to be one, otherwise zero */
void IsZero(const XTensor &a, XTensor & b, bool requireLink = false); void IsZero(const XTensor &a, XTensor & b);
/* set every entry to its logarithm value */ /* set every entry to its logarithm value */
void _Log(const XTensor * a, XTensor * b); void _Log(const XTensor * a, XTensor * b);
/* set every entry to its logarithm value (do it on site) /* set every entry to its logarithm value (do it on site)
keep the result in the input tensor a and return nothing */ keep the result in the input tensor a and return nothing */
void _LogMe(XTensor * a); void _LogMe(XTensor * a);
/* set every entry to its logarithm value (do it on site)
keep the result in the input tensor a and return nothing */
void LogMe(XTensor & a);
/* set every entry to its logarithm value (return an XTensor structure) /* set every entry to its logarithm value (return an XTensor structure)
make a new tensor to keep the result and return it */ make a new tensor to keep the result and return it */
XTensor Log(const XTensor & a); XTensor Log(const XTensor & a);
/* set every entry to its logarithm value */ /* set every entry to its logarithm value */
void Log(const XTensor & a, XTensor & b, bool requireLink = false); void Log(const XTensor & a, XTensor & b);
/* set every entry to its round value */ /* set every entry to its round value */
void _Round(const XTensor * a, XTensor * b); void _Round(const XTensor * a, XTensor * b);
/* set every entry to its round value (do it on site) /* set every entry to its round value (do it on site)
keep the result in the input tensor a and return nothing */ keep the result in the input tensor a and return nothing */
void _RoundMe(XTensor * a); void _RoundMe(XTensor * a);
/* set every entry to its round value (do it on site)
keep the result in the input tensor a and return nothing */
void RoundMe(XTensor & a);
/* set every entry to its round value (return an XTensor structure) /* set every entry to its round value (return an XTensor structure)
make a new tensor to keep the result and return it */ make a new tensor to keep the result and return it */
XTensor Round(const XTensor & a); XTensor Round(const XTensor & a);
/* set every entry to its round value */ /* set every entry to its round value */
void Round(const XTensor & a, XTensor & b, bool requireLink = false); void Round(const XTensor & a, XTensor & b);
/* set every entry to its sqrt value */ /* set every entry to its sqrt value */
void _Sqrt(const XTensor * a, XTensor * b); void _Sqrt(const XTensor * a, XTensor * b);
/* set every entry to its sqrt value (do it on site) /* set every entry to its sqrt value (do it on site)
keep the result in the input tensor a and return nothing */ keep the result in the input tensor a and return nothing */
void _SqrtMe(XTensor * a); void _SqrtMe(XTensor * a);
/* set every entry to its sqrt value (do it on site)
keep the result in the input tensor a and return nothing */
void SqrtMe(XTensor & a);
/* set every entry to its sqrt value (return an XTensor structure) /* set every entry to its sqrt value (return an XTensor structure)
make a new tensor to keep the result and return it */ make a new tensor to keep the result and return it */
XTensor Sqrt(const XTensor & a); XTensor Sqrt(const XTensor & a);
/* set every entry to its sqrt value */ /* set every entry to its sqrt value */
void Sqrt(const XTensor & a, XTensor & b, bool requireLink = false); void Sqrt(const XTensor & a, XTensor & b);
/* set every entry to its square value */ /* set every entry to its square value */
void _Square(const XTensor * a, XTensor * b); void _Square(const XTensor * a, XTensor * b);
/* set every entry to its square value (do it on site) /* set every entry to its square value (do it on site)
keep the result in the input tensor a and return nothing */ keep the result in the input tensor a and return nothing */
void _SquareMe(XTensor * a); void _SquareMe(XTensor * a);
/* set every entry to its square value (do it on site)
keep the result in the input tensor a and return nothing */
void SquareMe(XTensor & a);
/* set every entry to its square value (return an XTensor structure) /* set every entry to its square value (return an XTensor structure)
make a new tensor to keep the result and return it */ make a new tensor to keep the result and return it */
XTensor Square(const XTensor & a); XTensor Square(const XTensor & a);
/* set every entry to its square value */ /* set every entry to its square value */
void Square(const XTensor & a, XTensor & b, bool requireLink = false); void Square(const XTensor & a, XTensor & b);
/* set every entry to its sine value */ /* set every entry to its sine value */
...@@ -142,33 +172,42 @@ void _Sin(const XTensor * a, XTensor * b); ...@@ -142,33 +172,42 @@ void _Sin(const XTensor * a, XTensor * b);
/* set every entry to its sine value (do it on site) /* set every entry to its sine value (do it on site)
keep the result in the input tensor a and return nothing */ keep the result in the input tensor a and return nothing */
void _SinMe(XTensor * a); void _SinMe(XTensor * a);
/* set every entry to its sine value (do it on site)
keep the result in the input tensor a and return nothing */
void SinMe(XTensor & a);
/* set every entry to its sine value (return an XTensor structure) /* set every entry to its sine value (return an XTensor structure)
make a new tensor to keep the result and return it */ make a new tensor to keep the result and return it */
XTensor Sin(const XTensor & a); XTensor Sin(const XTensor & a);
/* set every entry to its sine value */ /* set every entry to its sine value */
void Sin(const XTensor & a, XTensor & b, bool requireLink = false); void Sin(const XTensor & a, XTensor & b);
/* set every entry to its cosine value */ /* set every entry to its cosine value */
void _Cos(const XTensor * a, XTensor * b); void _Cos(const XTensor * a, XTensor * b);
/* set every entry to its cosine value (do it on site) /* set every entry to its cosine value (do it on site)
keep the result in the input tensor a and return nothing */ keep the result in the input tensor a and return nothing */
void _CosMe(XTensor * a); void _CosMe(XTensor * a);
/* set every entry to its cosine value (do it on site)
keep the result in the input tensor a and return nothing */
void CosMe(XTensor & a);
/* set every entry to its cosine value (return an XTensor structure) /* set every entry to its cosine value (return an XTensor structure)
make a new tensor to keep the result and return it */ make a new tensor to keep the result and return it */
XTensor Cos(const XTensor & a); XTensor Cos(const XTensor & a);
/* set every entry to its cosine value */ /* set every entry to its cosine value */
void Cos(const XTensor & a, XTensor & b, bool requireLink = false); void Cos(const XTensor & a, XTensor & b);
/* set every entry to its tangent value */ /* set every entry to its tangent value */
void _Tan(const XTensor * a, XTensor * b); void _Tan(const XTensor * a, XTensor * b);
/* set every entry to its tangent value (do it on site) /* set every entry to its tangent value (do it on site)
keep the result in the input tensor a and return nothing */ keep the result in the input tensor a and return nothing */
void _TanMe(XTensor * a); void _TanMe(XTensor * a);
/* set every entry to its tangent value (do it on site)
keep the result in the input tensor a and return nothing */
void TanMe(XTensor & a);
/* set every entry to its tangent value (return an XTensor structure) /* set every entry to its tangent value (return an XTensor structure)
make a new tensor to keep the result and return it */ make a new tensor to keep the result and return it */
XTensor Tan(const XTensor & a); XTensor Tan(const XTensor & a);
/* set every entry to its tangent value */ /* set every entry to its tangent value */
void Tan(const XTensor & a, XTensor & b, bool requireLink = false); void Tan(const XTensor & a, XTensor & b);
} // namespace nts(NiuTrans.Tensor) } // namespace nts(NiuTrans.Tensor)
......
...@@ -41,8 +41,8 @@ void _ReduceMax(const XTensor * input, XTensor * output, int dim) ...@@ -41,8 +41,8 @@ void _ReduceMax(const XTensor * input, XTensor * output, int dim)
CheckNTErrors((input->order == output->order + 1), "Incorrect tensor sizes!"); CheckNTErrors((input->order == output->order + 1), "Incorrect tensor sizes!");
CheckNTErrors((input->order > dim && dim >=0), "Illegal dimension to reduce!"); CheckNTErrors((input->order > dim && dim >=0), "Illegal dimension to reduce!");
CheckNTErrors((input->dataType == output->dataType), "Unmatched data types!"); CheckNTErrors((input->dataType == output->dataType), "Unmatched data types!");
int dimRDI = input->order - dim - 1; int dimRDI = input->order - dim - 1;
CheckNTErrors(dimRDI >= 0, "Wrong dimension!"); CheckNTErrors(dimRDI >= 0, "Wrong dimension!");
for(int i = 0; i < input->order; i++){ for(int i = 0; i < input->order; i++){
...@@ -104,7 +104,7 @@ make a new tensor to keep the result and return it ...@@ -104,7 +104,7 @@ make a new tensor to keep the result and return it
XTensor ReduceMax(const XTensor &input, int dim) XTensor ReduceMax(const XTensor &input, int dim)
{ {
CheckNTErrors(dim >= 0 && dim < input.order, "Illegal dimension to reduce!"); CheckNTErrors(dim >= 0 && dim < input.order, "Illegal dimension to reduce!");
int order = input.order - 1; int order = input.order - 1;
int * dimSize = new int[order]; int * dimSize = new int[order];
for(int i = 0; i < order; i++){ for(int i = 0; i < order; i++){
...@@ -137,9 +137,8 @@ get the max value of the items along a dimension of the tensor ...@@ -137,9 +137,8 @@ get the max value of the items along a dimension of the tensor
>> input - the input tensor >> input - the input tensor
>> output - the output tensor >> output - the output tensor
>> dim - the dimension where the reduction is performed on >> dim - the dimension where the reduction is performed on
>> requireLink - if add operation to network
*/ */
void ReduceMax(const XTensor &input, XTensor &output, int dim, bool requireLink) void ReduceMax(const XTensor &input, XTensor &output, int dim)
{ {
CheckNTErrors(dim >= 0 && dim < input.order, "Illegal dimension to reduce!"); CheckNTErrors(dim >= 0 && dim < input.order, "Illegal dimension to reduce!");
...@@ -163,7 +162,7 @@ void ReduceMax(const XTensor &input, XTensor &output, int dim, bool requireLink) ...@@ -163,7 +162,7 @@ void ReduceMax(const XTensor &input, XTensor &output, int dim, bool requireLink)
/* call _ReduceMax function */ /* call _ReduceMax function */
_ReduceMax(&input, &output, dim); _ReduceMax(&input, &output, dim);
if (requireLink) { if (output.enableGrad) {
/* tensor connections */ /* tensor connections */
XLink::MakeLink(&input, NULL, &output, REDUCE_REDUCEMAX); XLink::MakeLink(&input, NULL, &output, REDUCE_REDUCEMAX);
XLink::AddParamToHeadInt(&output, dim); XLink::AddParamToHeadInt(&output, dim);
......
...@@ -504,7 +504,7 @@ void _CudaReduceMax(const XTensor * input, XTensor * output, int dim) ...@@ -504,7 +504,7 @@ void _CudaReduceMax(const XTensor * input, XTensor * output, int dim)
CheckNTErrors(input->order > dim && dim >=0, "Illegal dimension to reduce!"); CheckNTErrors(input->order > dim && dim >=0, "Illegal dimension to reduce!");
CheckNTErrors(input->dataType == output->dataType, "Unmatched data types!"); CheckNTErrors(input->dataType == output->dataType, "Unmatched data types!");
int dimRDI = input->order - dim - 1; int dimRDI = input->order - dim - 1;
for(int i = 0; i < input->order; i++){ for(int i = 0; i < input->order; i++){
if(i < dimRDI){ if(i < dimRDI){
CheckNTErrors(input->dimSizeRDI[i] == output->dimSizeRDI[i], "Unmatched tensors!"); CheckNTErrors(input->dimSizeRDI[i] == output->dimSizeRDI[i], "Unmatched tensors!");
......
...@@ -36,7 +36,7 @@ make a new tensor to keep the result and return it ...@@ -36,7 +36,7 @@ make a new tensor to keep the result and return it
XTensor ReduceMax(const XTensor &input, int dim); XTensor ReduceMax(const XTensor &input, int dim);
/* get the max value of the items along a dimension of the tensor. */ /* get the max value of the items along a dimension of the tensor. */
void ReduceMax(const XTensor &input, XTensor &output, int dim, bool requireLink = false); void ReduceMax(const XTensor &input, XTensor &output, int dim);
} // namespace nts(NiuTrans.Tensor) } // namespace nts(NiuTrans.Tensor)
......
...@@ -39,7 +39,7 @@ void _ReduceMean(const XTensor * input, XTensor * output, int dim) ...@@ -39,7 +39,7 @@ void _ReduceMean(const XTensor * input, XTensor * output, int dim)
{ {
CheckNTErrors((input->order > dim), "Illegal dimension specified!"); CheckNTErrors((input->order > dim), "Illegal dimension specified!");
int dimRDI = input->order - dim - 1; int dimRDI = input->order - dim - 1;
int num = input->dimSizeRDI[dimRDI]; int num = input->dimSizeRDI[dimRDI];
_ReduceSum(input, output, dim); _ReduceSum(input, output, dim);
...@@ -59,7 +59,7 @@ For a 1-dimensional data array a, mean = (1/n) * sum_i input_i ...@@ -59,7 +59,7 @@ For a 1-dimensional data array a, mean = (1/n) * sum_i input_i
XTensor ReduceMean(const XTensor &input, int dim) XTensor ReduceMean(const XTensor &input, int dim)
{ {
CheckNTErrors(dim >= 0 && dim < input.order, "Illegal dimension to reduce!"); CheckNTErrors(dim >= 0 && dim < input.order, "Illegal dimension to reduce!");
int order = input.order - 1; int order = input.order - 1;
int * dimSize = new int[order]; int * dimSize = new int[order];
for(int i = 0; i < order; i++){ for(int i = 0; i < order; i++){
...@@ -94,9 +94,8 @@ For a 1-dimensional data array a, mean = (1/n) * sum_i input_i ...@@ -94,9 +94,8 @@ For a 1-dimensional data array a, mean = (1/n) * sum_i input_i
>> input - the input tensor >> input - the input tensor
>> output - the output tensor >> output - the output tensor
>> dim - the dimension where the reduction is performed on >> dim - the dimension where the reduction is performed on
>> requireLink - if add operation to network
*/ */
void ReduceMean(const XTensor &input, XTensor &output, int dim, bool requireLink) void ReduceMean(const XTensor &input, XTensor &output, int dim)
{ {
CheckNTErrors(dim >= 0 && dim < input.order, "Illegal dimension to reduce!"); CheckNTErrors(dim >= 0 && dim < input.order, "Illegal dimension to reduce!");
...@@ -120,7 +119,7 @@ void ReduceMean(const XTensor &input, XTensor &output, int dim, bool requireLink ...@@ -120,7 +119,7 @@ void ReduceMean(const XTensor &input, XTensor &output, int dim, bool requireLink
/* call _ReduceMean function */ /* call _ReduceMean function */
_ReduceMean(&input, &output, dim); _ReduceMean(&input, &output, dim);
if (requireLink) { if (output.enableGrad) {
/* tensor connections */ /* tensor connections */
XLink::MakeLink(&input, NULL, &output, REDUCE_REDUCEMEAN); XLink::MakeLink(&input, NULL, &output, REDUCE_REDUCEMEAN);
XLink::AddParamToHeadInt(&output, dim); XLink::AddParamToHeadInt(&output, dim);
......
...@@ -43,7 +43,7 @@ XTensor ReduceMean(const XTensor &input, int dim); ...@@ -43,7 +43,7 @@ XTensor ReduceMean(const XTensor &input, int dim);
get the mean value along a dimension of the tensor get the mean value along a dimension of the tensor
For a 1-dimensional data array a, mean = (1/n) * sum_i input_i For a 1-dimensional data array a, mean = (1/n) * sum_i input_i
*/ */
void ReduceMean(const XTensor &input, XTensor &output, int dim, bool requireLink = false); void ReduceMean(const XTensor &input, XTensor &output, int dim);
} // namespace nts(NiuTrans.Tensor) } // namespace nts(NiuTrans.Tensor)
......
...@@ -50,7 +50,7 @@ void _ReduceSum(const XTensor * input, XTensor * output, int dim, const XTensor ...@@ -50,7 +50,7 @@ void _ReduceSum(const XTensor * input, XTensor * output, int dim, const XTensor
CheckNTErrors((input->dataType == output->dataType), "Unmatched data types!"); CheckNTErrors((input->dataType == output->dataType), "Unmatched data types!");
CheckNTErrors((shift == NULL || XTensor::IsSameShaped(output, shift)), "Incorrect shift tensor size!"); CheckNTErrors((shift == NULL || XTensor::IsSameShaped(output, shift)), "Incorrect shift tensor size!");
int dimRDI = input->order - dim - 1; int dimRDI = input->order - dim - 1;
CheckNTErrors(dimRDI >= 0, "Wrong dimension!"); CheckNTErrors(dimRDI >= 0, "Wrong dimension!");
for(int i = 0; i < input->order; i++){ for(int i = 0; i < input->order; i++){
...@@ -215,7 +215,7 @@ sum = \sum_i exp((a_i - shift)^power) if isExp == true ...@@ -215,7 +215,7 @@ sum = \sum_i exp((a_i - shift)^power) if isExp == true
XTensor ReduceSum(const XTensor &input, int dim, const XTensor &shift, DTYPE power, bool isExp) XTensor ReduceSum(const XTensor &input, int dim, const XTensor &shift, DTYPE power, bool isExp)
{ {
CheckNTErrors(dim >= 0 && dim < input.order, "Illegal dimension to reduce!"); CheckNTErrors(dim >= 0 && dim < input.order, "Illegal dimension to reduce!");
int order = input.order - 1; int order = input.order - 1;
int * dimSize = new int[order]; int * dimSize = new int[order];
for(int i = 0; i < order; i++){ for(int i = 0; i < order; i++){
...@@ -244,7 +244,7 @@ XTensor ReduceSum(const XTensor &input, int dim, const XTensor &shift, DTYPE pow ...@@ -244,7 +244,7 @@ XTensor ReduceSum(const XTensor &input, int dim, const XTensor &shift, DTYPE pow
return output; return output;
} }
void ReduceSum(const XTensor &input, XTensor &output, int dim, const XTensor &shift, DTYPE power, bool isExp, bool requireLink) void ReduceSum(const XTensor &input, XTensor &output, int dim, const XTensor &shift, DTYPE power, bool isExp)
{ {
CheckNTErrors(dim >= 0 && dim < input.order, "Illegal dimension to reduce!"); CheckNTErrors(dim >= 0 && dim < input.order, "Illegal dimension to reduce!");
...@@ -268,7 +268,7 @@ void ReduceSum(const XTensor &input, XTensor &output, int dim, const XTensor &sh ...@@ -268,7 +268,7 @@ void ReduceSum(const XTensor &input, XTensor &output, int dim, const XTensor &sh
/* call _ReduceSum function */ /* call _ReduceSum function */
_ReduceSum(&input, &output, dim, &shift, power, isExp); _ReduceSum(&input, &output, dim, &shift, power, isExp);
if (requireLink) { if (output.enableGrad) {
/* tensor connections */ /* tensor connections */
XLink::MakeLink(&input, &shift, &output, REDUCE_REDUCESUM); XLink::MakeLink(&input, &shift, &output, REDUCE_REDUCESUM);
XLink::AddParamToHeadInt(&output, dim); XLink::AddParamToHeadInt(&output, dim);
...@@ -294,7 +294,7 @@ sum = \sum_i exp((a_i)^power) if isExp == true ...@@ -294,7 +294,7 @@ sum = \sum_i exp((a_i)^power) if isExp == true
XTensor ReduceSum(const XTensor &input, int dim, DTYPE power, bool isExp) XTensor ReduceSum(const XTensor &input, int dim, DTYPE power, bool isExp)
{ {
CheckNTErrors(dim >= 0 && dim < input.order, "Illegal dimension to reduce!"); CheckNTErrors(dim >= 0 && dim < input.order, "Illegal dimension to reduce!");
int order = input.order - 1; int order = input.order - 1;
int * dimSize = new int[order]; int * dimSize = new int[order];
for(int i = 0; i < order; i++){ for(int i = 0; i < order; i++){
...@@ -336,9 +336,8 @@ sum = \sum_i exp((a_i - shift)^power) if isExp == true ...@@ -336,9 +336,8 @@ sum = \sum_i exp((a_i - shift)^power) if isExp == true
>> shift - shift the input >> shift - shift the input
>> ieExp - specify if the exp() is performed >> ieExp - specify if the exp() is performed
>> power - we perform pow(item_i, power) on each item in the array >> power - we perform pow(item_i, power) on each item in the array
>> requireLink - if add operation to network
*/ */
void ReduceSum(const XTensor &input, XTensor &output, int dim, DTYPE power, bool isExp, bool requireLink) void ReduceSum(const XTensor &input, XTensor &output, int dim, DTYPE power, bool isExp)
{ {
CheckNTErrors(dim >= 0 && dim < input.order, "Illegal dimension to reduce!"); CheckNTErrors(dim >= 0 && dim < input.order, "Illegal dimension to reduce!");
...@@ -362,7 +361,7 @@ void ReduceSum(const XTensor &input, XTensor &output, int dim, DTYPE power, bool ...@@ -362,7 +361,7 @@ void ReduceSum(const XTensor &input, XTensor &output, int dim, DTYPE power, bool
/* call _ReduceSum function */ /* call _ReduceSum function */
_ReduceSum(&input, &output, dim, NULL, power, isExp); _ReduceSum(&input, &output, dim, NULL, power, isExp);
if (requireLink) { if (output.enableGrad) {
/* tensor connections */ /* tensor connections */
XLink::MakeLink(&input, NULL, &output, REDUCE_REDUCESUM); XLink::MakeLink(&input, NULL, &output, REDUCE_REDUCESUM);
XLink::AddParamToHeadInt(&output, dim); XLink::AddParamToHeadInt(&output, dim);
......
...@@ -341,7 +341,7 @@ void KernelReduceSumFast(DTYPE * input, DTYPE * output, ...@@ -341,7 +341,7 @@ void KernelReduceSumFast(DTYPE * input, DTYPE * output,
if (tid < blockDim.x / 32) if (tid < blockDim.x / 32)
value = data[tid]; value = data[tid];
else else
value = 0; value = 0;
value = shflDownReduceSum(value); value = shflDownReduceSum(value);
if (tid == 0 && blockIdx.x < reducedStrideNum) { if (tid == 0 && blockIdx.x < reducedStrideNum) {
...@@ -692,7 +692,7 @@ void _CudaReduceSum(const XTensor * input, XTensor * output, int dim, const XTen ...@@ -692,7 +692,7 @@ void _CudaReduceSum(const XTensor * input, XTensor * output, int dim, const XTen
CheckNTErrors(input->dataType == output->dataType, "Unmatched data types!"); CheckNTErrors(input->dataType == output->dataType, "Unmatched data types!");
CheckNTErrors(shift == NULL || output->unitNum == shift->unitNum, "Incorrect shift tensor size!"); CheckNTErrors(shift == NULL || output->unitNum == shift->unitNum, "Incorrect shift tensor size!");
int dimRDI = input->order - dim - 1; int dimRDI = input->order - dim - 1;
for(int i = 0; i < input->order; i++){ for(int i = 0; i < input->order; i++){
if(i < dimRDI){ if(i < dimRDI){
CheckNTErrors(input->dimSizeRDI[i] == output->dimSizeRDI[i], "Unmatched tensors!"); CheckNTErrors(input->dimSizeRDI[i] == output->dimSizeRDI[i], "Unmatched tensors!");
......
...@@ -44,7 +44,7 @@ sum = \sum_i exp(a_i - shift) if isExp == true ...@@ -44,7 +44,7 @@ sum = \sum_i exp(a_i - shift) if isExp == true
*/ */
XTensor ReduceSum(const XTensor &input, int dim, const XTensor &shift, DTYPE power = (DTYPE)1.0F, bool isExp = false); XTensor ReduceSum(const XTensor &input, int dim, const XTensor &shift, DTYPE power = (DTYPE)1.0F, bool isExp = false);
void ReduceSum(const XTensor &input, XTensor &output, int dim, const XTensor &shift, DTYPE power = (DTYPE)1.0F, bool isExp = false, bool requireLink = false); void ReduceSum(const XTensor &input, XTensor &output, int dim, const XTensor &shift, DTYPE power = (DTYPE)1.0F, bool isExp = false);
/* /*
sum the items along a dimension of the tensor (return an XTensor structure) sum the items along a dimension of the tensor (return an XTensor structure)
...@@ -61,7 +61,7 @@ For a 1-dimensional data array a, ...@@ -61,7 +61,7 @@ For a 1-dimensional data array a,
sum = \sum_i (a_i - shift) if isExp == false sum = \sum_i (a_i - shift) if isExp == false
sum = \sum_i exp(a_i - shift) if isExp == true sum = \sum_i exp(a_i - shift) if isExp == true
*/ */
void ReduceSum(const XTensor &input, XTensor &output, int dim, DTYPE power = (DTYPE)1.0F, bool isExp = false, bool requireLink = false); void ReduceSum(const XTensor &input, XTensor &output, int dim, DTYPE power = (DTYPE)1.0F, bool isExp = false);
} // namespace nts(NiuTrans.Tensor) } // namespace nts(NiuTrans.Tensor)
......
...@@ -55,7 +55,7 @@ For a 1-dimensional data array a, sum = \sum_i (a_i - shift)^2 ...@@ -55,7 +55,7 @@ For a 1-dimensional data array a, sum = \sum_i (a_i - shift)^2
XTensor ReduceSumSquared(const XTensor &input, int dim, const XTensor &shift) XTensor ReduceSumSquared(const XTensor &input, int dim, const XTensor &shift)
{ {
CheckNTErrors(dim >= 0 && dim < input.order, "Illegal dimension to reduce!"); CheckNTErrors(dim >= 0 && dim < input.order, "Illegal dimension to reduce!");
int order = input.order - 1; int order = input.order - 1;
int * dimSize = new int[order]; int * dimSize = new int[order];
for(int i = 0; i < order; i++){ for(int i = 0; i < order; i++){
...@@ -91,9 +91,8 @@ For a 1-dimensional data array a, sum = \sum_i (a_i - shift)^2 ...@@ -91,9 +91,8 @@ For a 1-dimensional data array a, sum = \sum_i (a_i - shift)^2
>> output - the output tensor >> output - the output tensor
>> dim - the dimension where the reduction is performed on >> dim - the dimension where the reduction is performed on
>> shift - bias on the input >> shift - bias on the input
>> requireLink - if add operation to network
*/ */
void ReduceSumSquared(const XTensor &input, XTensor &output, int dim, const XTensor &shift, bool requireLink) void ReduceSumSquared(const XTensor &input, XTensor &output, int dim, const XTensor &shift)
{ {
CheckNTErrors(dim >= 0 && dim < input.order, "Illegal dimension to reduce!"); CheckNTErrors(dim >= 0 && dim < input.order, "Illegal dimension to reduce!");
...@@ -117,7 +116,7 @@ void ReduceSumSquared(const XTensor &input, XTensor &output, int dim, const XTen ...@@ -117,7 +116,7 @@ void ReduceSumSquared(const XTensor &input, XTensor &output, int dim, const XTen
/* call _ReduceSumSquared function */ /* call _ReduceSumSquared function */
_ReduceSumSquared(&input, &output, dim, &shift); _ReduceSumSquared(&input, &output, dim, &shift);
if (requireLink) { if (output.enableGrad) {
/* tensor connections */ /* tensor connections */
XLink::MakeLink(&input, &shift, &output, REDUCE_REDUCESUMSQUARED); XLink::MakeLink(&input, &shift, &output, REDUCE_REDUCESUMSQUARED);
XLink::AddParamToHeadInt(&output, dim); XLink::AddParamToHeadInt(&output, dim);
......
...@@ -45,7 +45,7 @@ squared sum of the items along a dimension of the tensor ...@@ -45,7 +45,7 @@ squared sum of the items along a dimension of the tensor
For a 1-dimensional data array a, For a 1-dimensional data array a,
sum = \sum_i (a_i - shift)^2 sum = \sum_i (a_i - shift)^2
*/ */
void ReduceSumSquared(const XTensor &input, XTensor &output, int dim, const XTensor &shift, bool requireLink = false); void ReduceSumSquared(const XTensor &input, XTensor &output, int dim, const XTensor &shift);
} // namespace nts(NiuTrans.Tensor) } // namespace nts(NiuTrans.Tensor)
......
...@@ -38,7 +38,7 @@ For a 1-dimensional data array a, variance = 1/n * \sum_i (a_i - mean)^2 ...@@ -38,7 +38,7 @@ For a 1-dimensional data array a, variance = 1/n * \sum_i (a_i - mean)^2
*/ */
void _ReduceVariance(const XTensor * input, XTensor * output, int dim, const XTensor * mean) void _ReduceVariance(const XTensor * input, XTensor * output, int dim, const XTensor * mean)
{ {
int dimRDI = input->order - dim - 1; int dimRDI = input->order - dim - 1;
int num = input->dimSizeRDI[dimRDI]; int num = input->dimSizeRDI[dimRDI];
_ReduceSum(input, output, dim, mean, 2.0F); _ReduceSum(input, output, dim, mean, 2.0F);
_ScaleAndShiftMe(output, (DTYPE)1 / num, 0); _ScaleAndShiftMe(output, (DTYPE)1 / num, 0);
...@@ -58,7 +58,7 @@ For a 1-dimensional data array a, variance = 1/n * \sum_i (a_i - mean)^2 ...@@ -58,7 +58,7 @@ For a 1-dimensional data array a, variance = 1/n * \sum_i (a_i - mean)^2
XTensor ReduceVariance(const XTensor &input, int dim, const XTensor &mean) XTensor ReduceVariance(const XTensor &input, int dim, const XTensor &mean)
{ {
CheckNTErrors(dim >= 0 && dim < input.order, "Illegal dimension to reduce!"); CheckNTErrors(dim >= 0 && dim < input.order, "Illegal dimension to reduce!");
int order = input.order - 1; int order = input.order - 1;
int * dimSize = new int[order]; int * dimSize = new int[order];
for(int i = 0; i < order; i++){ for(int i = 0; i < order; i++){
...@@ -94,9 +94,8 @@ For a 1-dimensional data array a, variance = 1/n * \sum_i (a_i - mean)^2 ...@@ -94,9 +94,8 @@ For a 1-dimensional data array a, variance = 1/n * \sum_i (a_i - mean)^2
>> output - the output tensor >> output - the output tensor
>> dim - the dimension where the reduction is performed on >> dim - the dimension where the reduction is performed on
>> mean - the mean value >> mean - the mean value
>> requireLink - if add operation to network
*/ */
void ReduceVariance(const XTensor &input, XTensor &output, int dim, const XTensor &mean, bool requireLink) void ReduceVariance(const XTensor &input, XTensor &output, int dim, const XTensor &mean)
{ {
CheckNTErrors(dim >= 0 && dim < input.order, "Illegal dimension to reduce!"); CheckNTErrors(dim >= 0 && dim < input.order, "Illegal dimension to reduce!");
...@@ -120,7 +119,7 @@ void ReduceVariance(const XTensor &input, XTensor &output, int dim, const XTenso ...@@ -120,7 +119,7 @@ void ReduceVariance(const XTensor &input, XTensor &output, int dim, const XTenso
/* call _ReduceVariance function */ /* call _ReduceVariance function */
_ReduceVariance(&input, &output, dim, &mean); _ReduceVariance(&input, &output, dim, &mean);
if (requireLink) { if (output.enableGrad) {
/* tensor connection */ /* tensor connection */
XLink::MakeLink(&input, &mean, &output, REDUCE_REDUCEVARIANCE); XLink::MakeLink(&input, &mean, &output, REDUCE_REDUCEVARIANCE);
XLink::AddParamToHeadInt(&output, dim); XLink::AddParamToHeadInt(&output, dim);
......
...@@ -43,7 +43,7 @@ XTensor ReduceVariance(const XTensor &input, int dim, const XTensor &mean); ...@@ -43,7 +43,7 @@ XTensor ReduceVariance(const XTensor &input, int dim, const XTensor &mean);
variance of the items along a dimension of the tensor variance of the items along a dimension of the tensor
For a 1-dimensional data array a, variance = 1/n * \sum_i (a_i - mean)^2 For a 1-dimensional data array a, variance = 1/n * \sum_i (a_i - mean)^2
*/ */
void ReduceVariance(const XTensor &input, XTensor &output, int dim, const XTensor &mean, bool requireLink = false); void ReduceVariance(const XTensor &input, XTensor &output, int dim, const XTensor &mean);
} // namespace nts(NiuTrans.Tensor) } // namespace nts(NiuTrans.Tensor)
......
...@@ -85,7 +85,7 @@ void _ConcatenateSolely(const TensorList * smalls, XTensor * big, int dim) ...@@ -85,7 +85,7 @@ void _ConcatenateSolely(const TensorList * smalls, XTensor * big, int dim)
} }
} }
else { else {
StrList* sourceArrays = new StrList(smalls->count); StrList* sourceArrays = new StrList(smalls->count);
int * blockSizes = new int[smalls->count]; int * blockSizes = new int[smalls->count];
for (int i = 0; i < smalls->count; i++) { for (int i = 0; i < smalls->count; i++) {
XTensor * tensor = (XTensor*)smalls->GetItem(i); XTensor * tensor = (XTensor*)smalls->GetItem(i);
......
...@@ -232,7 +232,7 @@ XTensor Merge(const XTensor &s, int whereToMerge, int leadingDim) ...@@ -232,7 +232,7 @@ XTensor Merge(const XTensor &s, int whereToMerge, int leadingDim)
return t; return t;
} }
void Merge(const XTensor &s, XTensor &t, int whereToMerge, int leadingDim, bool requireLink) void Merge(const XTensor &s, XTensor &t, int whereToMerge, int leadingDim)
{ {
if (!t.isInit || !CheckMergeSize(&s, &t, whereToMerge, leadingDim)) { if (!t.isInit || !CheckMergeSize(&s, &t, whereToMerge, leadingDim)) {
if (leadingDim < 0) if (leadingDim < 0)
...@@ -261,7 +261,7 @@ void Merge(const XTensor &s, XTensor &t, int whereToMerge, int leadingDim, bool ...@@ -261,7 +261,7 @@ void Merge(const XTensor &s, XTensor &t, int whereToMerge, int leadingDim, bool
/* call _Merge function */ /* call _Merge function */
_Merge(&s, &t, whereToMerge, leadingDim); _Merge(&s, &t, whereToMerge, leadingDim);
if (requireLink) { if (t.enableGrad) {
/* tensor connections */ /* tensor connections */
XLink::MakeLink(&s, NULL, &t, SHAPE_MERGE); XLink::MakeLink(&s, NULL, &t, SHAPE_MERGE);
XLink::AddParamToHeadInt(&t, whereToMerge); XLink::AddParamToHeadInt(&t, whereToMerge);
......
...@@ -33,7 +33,7 @@ void _Merge(const XTensor * s, XTensor * t, int whereToMerge, int leadingDim = - ...@@ -33,7 +33,7 @@ void _Merge(const XTensor * s, XTensor * t, int whereToMerge, int leadingDim = -
e.g., (M, N/3, 3) -> (M, N) */ e.g., (M, N/3, 3) -> (M, N) */
XTensor Merge(const XTensor &s, int whereToMerge, int leadingDim = -1); XTensor Merge(const XTensor &s, int whereToMerge, int leadingDim = -1);
void Merge(const XTensor &s, XTensor &t, int whereToMerge, int leadingDim = -1, bool requireLink = false); void Merge(const XTensor &s, XTensor &t, int whereToMerge, int leadingDim = -1);
/* merge small tensors into a big tensor */ /* merge small tensors into a big tensor */
void _Merge(const TensorList * smalls, XTensor * big, int whereToMerge); void _Merge(const TensorList * smalls, XTensor * big, int whereToMerge);
......
...@@ -41,6 +41,13 @@ a = permuted(a) ...@@ -41,6 +41,13 @@ a = permuted(a)
*/ */
void _PermuteMe(XTensor * a, int * dimPermute); void _PermuteMe(XTensor * a, int * dimPermute);
/*
permute the tensor dimensions (do it on site).
keep the result in the input tensor and return nothing.
a = permuted(a)
*/
void PermuteMe(XTensor &a, int * dimPermute);
/* /*
make a tensor with permuted dimensions (return an XTensor structure). make a tensor with permuted dimensions (return an XTensor structure).
make a new tensor to keep the result and return it. make a new tensor to keep the result and return it.
......
...@@ -43,12 +43,12 @@ XTensor Reshape(XTensor &s, int order, int * dimSize) ...@@ -43,12 +43,12 @@ XTensor Reshape(XTensor &s, int order, int * dimSize)
t.Reshape(order, dimSize); t.Reshape(order, dimSize);
/* tensor connections */ /* tensor connections */
XLink::MakeLink(&s, NULL, &t, SHAPE_RESHAPE); XLink::MakeLink(&s, NULL, &t, SHAPE_RESHAPE);
return t; return t;
} }
void Reshape(XTensor &s, XTensor &t, int order, int * dimSize, bool requireLink) void Reshape(XTensor &s, XTensor &t, int order, int * dimSize)
{ {
if (!t.isInit || !XTensor::IsSameShaped(&t, &s)) { if (!t.isInit || !XTensor::IsSameShaped(&t, &s)) {
InitTensor(&t, &s); InitTensor(&t, &s);
...@@ -57,7 +57,7 @@ void Reshape(XTensor &s, XTensor &t, int order, int * dimSize, bool requireLink) ...@@ -57,7 +57,7 @@ void Reshape(XTensor &s, XTensor &t, int order, int * dimSize, bool requireLink)
/* call Reshape function */ /* call Reshape function */
t.Reshape(order, dimSize); t.Reshape(order, dimSize);
if (requireLink) { if (t.enableGrad) {
/* tensor connections */ /* tensor connections */
XLink::MakeLink(&s, NULL, &t, SHAPE_RESHAPE); XLink::MakeLink(&s, NULL, &t, SHAPE_RESHAPE);
} }
......
...@@ -29,7 +29,7 @@ namespace nts { // namespace nts(NiuTrans.Tensor) ...@@ -29,7 +29,7 @@ namespace nts { // namespace nts(NiuTrans.Tensor)
/* reshape the tensor */ /* reshape the tensor */
XTensor Reshape(XTensor &s, int order, int * dimSize); XTensor Reshape(XTensor &s, int order, int * dimSize);
void Reshape(XTensor &s, XTensor &t, int order, int * dimSize, bool requireLink = false); void Reshape(XTensor &s, XTensor &t, int order, int * dimSize);
} // namespace nts(NiuTrans.Tensor) } // namespace nts(NiuTrans.Tensor)
#endif // __RESHAPE_H__ #endif // __RESHAPE_H__
...@@ -227,7 +227,7 @@ XTensor Split(const XTensor &s, int whereToSplit, int splitNum) ...@@ -227,7 +227,7 @@ XTensor Split(const XTensor &s, int whereToSplit, int splitNum)
return t; return t;
} }
void Split(const XTensor &s, XTensor &t, int whereToSplit, int splitNum, bool requireLink) void Split(const XTensor &s, XTensor &t, int whereToSplit, int splitNum)
{ {
if (!t.isInit || !CheckSplitSize(&s, &t, whereToSplit, splitNum)) { if (!t.isInit || !CheckSplitSize(&s, &t, whereToSplit, splitNum)) {
int order = s.order + 1; int order = s.order + 1;
...@@ -251,7 +251,7 @@ void Split(const XTensor &s, XTensor &t, int whereToSplit, int splitNum, bool re ...@@ -251,7 +251,7 @@ void Split(const XTensor &s, XTensor &t, int whereToSplit, int splitNum, bool re
/* call _Split function */ /* call _Split function */
_Split(&s, &t, whereToSplit, splitNum); _Split(&s, &t, whereToSplit, splitNum);
if (requireLink) { if (t.enableGrad) {
/* tensor connections */ /* tensor connections */
XLink::MakeLink(&s, NULL, &t, SHAPE_SPLIT); XLink::MakeLink(&s, NULL, &t, SHAPE_SPLIT);
XLink::AddParamToHeadInt(&t, whereToSplit); XLink::AddParamToHeadInt(&t, whereToSplit);
......
...@@ -41,7 +41,7 @@ e.g., (M, N) -> (M, N/3, 3) ...@@ -41,7 +41,7 @@ e.g., (M, N) -> (M, N/3, 3)
*/ */
XTensor Split(const XTensor &s, int whereToSplit, int splitNum); XTensor Split(const XTensor &s, int whereToSplit, int splitNum);
void Split(const XTensor &s, XTensor &t, int whereToSplit, int splitNum, bool requireLink = false); void Split(const XTensor &s, XTensor &t, int whereToSplit, int splitNum);
/* split a big tensor into small tensors */ /* split a big tensor into small tensors */
void _Split(const XTensor * big, TensorList * smalls, int whereToSplit, int splitNum); void _Split(const XTensor * big, TensorList * smalls, int whereToSplit, int splitNum);
......
...@@ -89,6 +89,20 @@ void _SqueezeMe(XTensor * source, int leadingDim) ...@@ -89,6 +89,20 @@ void _SqueezeMe(XTensor * source, int leadingDim)
} }
/* /*
squeeze the tensor along the specified dimension (do it on site)
keep the result in the input tensor a and return nothing
>> source - the input tensor
>> leadingDim - the dimension that we would squeeze
if leadingDim = -1, squeeze all dimensions that are 1
else, squeeze the specified dimension
*/
void SqueezeMe(XTensor& source, int leadingDim)
{
_Squeeze(&source, &source, leadingDim);
}
/*
squeeze the tensor along the specified dimension (return an XTensor structure) squeeze the tensor along the specified dimension (return an XTensor structure)
make a new tensor to keep the result and return it make a new tensor to keep the result and return it
...@@ -112,7 +126,7 @@ XTensor Squeeze(XTensor & source, int leadingDim) ...@@ -112,7 +126,7 @@ XTensor Squeeze(XTensor & source, int leadingDim)
return target; return target;
} }
void Squeeze(XTensor & source, XTensor & target, int leadingDim, bool requireLink) void Squeeze(XTensor & source, XTensor & target, int leadingDim)
{ {
if (!target.isInit || !XTensor::IsSameShaped(&source, &target)) { if (!target.isInit || !XTensor::IsSameShaped(&source, &target)) {
InitTensor(&target, &source); InitTensor(&target, &source);
...@@ -121,7 +135,7 @@ void Squeeze(XTensor & source, XTensor & target, int leadingDim, bool requireLin ...@@ -121,7 +135,7 @@ void Squeeze(XTensor & source, XTensor & target, int leadingDim, bool requireLin
/* call _Squeeze function */ /* call _Squeeze function */
_Squeeze(&source, &target, leadingDim); _Squeeze(&source, &target, leadingDim);
if (requireLink) { if (target.enableGrad) {
/* tensor connections */ /* tensor connections */
XLink::MakeLink(&source, NULL, &target, SHAPE_SQUEEZE); XLink::MakeLink(&source, NULL, &target, SHAPE_SQUEEZE);
} }
......
...@@ -33,11 +33,15 @@ void _Squeeze(XTensor * source, XTensor * target, int leadingDim = -1); ...@@ -33,11 +33,15 @@ void _Squeeze(XTensor * source, XTensor * target, int leadingDim = -1);
keep the result in the input tensor a and return nothing */ keep the result in the input tensor a and return nothing */
void _SqueezeMe(XTensor * source, int leadingDim = -1); void _SqueezeMe(XTensor * source, int leadingDim = -1);
/* squeeze the tensor along the specified dimension (do it on site)
keep the result in the input tensor a and return nothing */
void SqueezeMe(XTensor & source, int leadingDim = -1);
/* squeeze the tensor along the specified dimension (return an XTensor structure) /* squeeze the tensor along the specified dimension (return an XTensor structure)
make a new tensor to keep the result and return it */ make a new tensor to keep the result and return it */
XTensor Squeeze(XTensor & source, int leadingDim = -1); XTensor Squeeze(XTensor & source, int leadingDim = -1);
void Squeeze(XTensor & source, XTensor & target, int leadingDim = -1, bool requireLink = false); void Squeeze(XTensor & source, XTensor & target, int leadingDim = -1);
} // namespace nts(NiuTrans.Tensor) } // namespace nts(NiuTrans.Tensor)
......
...@@ -166,7 +166,7 @@ XTensor Unsqueeze(const XTensor &a, int dim, int dSize) ...@@ -166,7 +166,7 @@ XTensor Unsqueeze(const XTensor &a, int dim, int dSize)
return b; return b;
} }
void Unsqueeze(const XTensor &a, XTensor &b, int dim, int dSize, bool requireLink) void Unsqueeze(const XTensor &a, XTensor &b, int dim, int dSize)
{ {
if (!b.isInit || !CheckUnsqueezeSize(&a, &b, dim, dSize)) { if (!b.isInit || !CheckUnsqueezeSize(&a, &b, dim, dSize)) {
int order = a.order + 1; int order = a.order + 1;
...@@ -191,7 +191,7 @@ void Unsqueeze(const XTensor &a, XTensor &b, int dim, int dSize, bool requireLin ...@@ -191,7 +191,7 @@ void Unsqueeze(const XTensor &a, XTensor &b, int dim, int dSize, bool requireLin
/* call _Unsqueeze function */ /* call _Unsqueeze function */
_Unsqueeze(&a, &b, dim, dSize); _Unsqueeze(&a, &b, dim, dSize);
if (requireLink) { if (b.enableGrad) {
/* tensor connections */ /* tensor connections */
XLink::MakeLink(&a, NULL, &b, SHAPE_UNSQUEEZE); XLink::MakeLink(&a, NULL, &b, SHAPE_UNSQUEEZE);
XLink::AddParamToHeadInt(&b, dim); XLink::AddParamToHeadInt(&b, dim);
......
...@@ -35,7 +35,7 @@ void _Unsqueeze(const XTensor * a, XTensor * b, int dim, int dSize); ...@@ -35,7 +35,7 @@ void _Unsqueeze(const XTensor * a, XTensor * b, int dim, int dSize);
make a new tensor to keep the result and return it */ make a new tensor to keep the result and return it */
XTensor Unsqueeze(const XTensor &a, int dim, int dSize); XTensor Unsqueeze(const XTensor &a, int dim, int dSize);
void Unsqueeze(const XTensor &a, XTensor &b, int dim, int dSize, bool requireLink = false); void Unsqueeze(const XTensor &a, XTensor &b, int dim, int dSize);
} // namespace nts(NiuTrans.Tensor) } // namespace nts(NiuTrans.Tensor)
......
...@@ -45,7 +45,7 @@ void _Sort(const XTensor * a, XTensor * b, XTensor * index, int dim) ...@@ -45,7 +45,7 @@ void _Sort(const XTensor * a, XTensor * b, XTensor * index, int dim)
CheckNTErrors((a->order == index->order), "Unmatched input tensors!"); CheckNTErrors((a->order == index->order), "Unmatched input tensors!");
CheckNTErrors((index->dataType == X_INT), "Wrong data type!"); CheckNTErrors((index->dataType == X_INT), "Wrong data type!");
int dimRDI = a->order - dim - 1; int dimRDI = a->order - dim - 1;
/* make the index tensor */ /* make the index tensor */
index->SetAscendingOrder(dim); index->SetAscendingOrder(dim);
...@@ -67,7 +67,7 @@ void _Sort(const XTensor * a, XTensor * b, XTensor * index, int dim) ...@@ -67,7 +67,7 @@ void _Sort(const XTensor * a, XTensor * b, XTensor * index, int dim)
blockNum *= a->dimSizeRDI[i]; blockNum *= a->dimSizeRDI[i];
int blockSize = stride * strideNum; int blockSize = stride * strideNum;
_CopyValues(a, b); _CopyValues(a, b);
for (int k = 0; k < blockNum; k++) { for (int k = 0; k < blockNum; k++) {
for (int i = 0; i < stride; i++) { for (int i = 0; i < stride; i++) {
void * dataB = (char*)b->data + (k * blockSize + i) * b->unitSize; void * dataB = (char*)b->data + (k * blockSize + i) * b->unitSize;
...@@ -98,6 +98,21 @@ void _SortMe(XTensor * a, XTensor * index, int dim) ...@@ -98,6 +98,21 @@ void _SortMe(XTensor * a, XTensor * index, int dim)
} }
/* /*
sort the tensor along a given dimension (do it on site)
keep the result in the input tensor a and return nothing
>> a - input tensor
>> index - index of the items in the resulting tensor
>> dim - the dimension along which the sorting is performed
*/
void SortMe(XTensor& a, XTensor& index, int dim)
{
_Sort(&a, &a, &index, dim);
}
/*
sort the tensor along a given dimension (return an XTensor structure) sort the tensor along a given dimension (return an XTensor structure)
make a new tensor to keep the result and return it make a new tensor to keep the result and return it
......
...@@ -217,7 +217,7 @@ void _CudaSortBig(const XTensor * a, XTensor * b, XTensor * indexA, XTensor * in ...@@ -217,7 +217,7 @@ void _CudaSortBig(const XTensor * a, XTensor * b, XTensor * indexA, XTensor * in
CheckNTErrors((a->order > dim && dim >= 0), "Incorrect dimension specified!"); CheckNTErrors((a->order > dim && dim >= 0), "Incorrect dimension specified!");
CheckNTErrors((a->dataType == DEFAULT_DTYPE), "TODO!"); CheckNTErrors((a->dataType == DEFAULT_DTYPE), "TODO!");
int dimRDI = a->order - dim - 1; int dimRDI = a->order - dim - 1;
if (k < 0 || k > b->dimSizeRDI[dimRDI]) if (k < 0 || k > b->dimSizeRDI[dimRDI])
k = b->dimSizeRDI[dimRDI]; k = b->dimSizeRDI[dimRDI];
......
...@@ -35,6 +35,12 @@ keep the result in the input tensor a and return nothing ...@@ -35,6 +35,12 @@ keep the result in the input tensor a and return nothing
*/ */
void _SortMe(XTensor * a, XTensor * index, int dim); void _SortMe(XTensor * a, XTensor * index, int dim);
/*
sort the data along a given dimension (do it on site)
keep the result in the input tensor a and return nothing
*/
void SortMe(XTensor & a, XTensor & index, int dim);
/* /*
sort the data along a given dimension (return an XTensor structure) sort the data along a given dimension (return an XTensor structure)
make a new tensor to keep the result and return it make a new tensor to keep the result and return it
......
...@@ -770,22 +770,22 @@ void KernelTopKRadixSelect(unsigned int * input, int stride, int strideNum, ...@@ -770,22 +770,22 @@ void KernelTopKRadixSelect(unsigned int * input, int stride, int strideNum,
/* /*
if (idx == 0) if (idx == 0)
{ {
unsigned int* uintOutput = new unsigned int; unsigned int* uintOutput = new unsigned int;
int* tmpIndex = new int; int* tmpIndex = new int;
//*******************something worng*************************** //*******************something worng***************************
cudaMalloc((void **)&uintOutput, sizeof(unsigned int)* k); cudaMalloc((void **)&uintOutput, sizeof(unsigned int)* k);
cudaMalloc((void **)&tmpIndex, sizeof(unsigned int)*k); cudaMalloc((void **)&tmpIndex, sizeof(unsigned int)*k);
//************************************************************* //*************************************************************
collectNumberOld(input, limit, k, desire, uintOutput, tmpIndex, stride, strideNum); collectNumberOld(input, limit, k, desire, uintOutput, tmpIndex, stride, strideNum);
int blockIndex = idy / stride; int blockIndex = idy / stride;
int offsetInBlock = idy% stride; int offsetInBlock = idy% stride;
for (int i = stride * k * blockIndex + offsetInBlock, j = 0; j < k; j++, i += stride) for (int i = stride * k * blockIndex + offsetInBlock, j = 0; j < k; j++, i += stride)
{ {
//for(int i = ) //for(int i = )
output[i] = deconvert(uintOutput[j]); output[i] = deconvert(uintOutput[j]);
index[i] = tmpIndex[j]; index[i] = tmpIndex[j];
} }
} }
__syncthreads(); __syncthreads();
*/ */
......
...@@ -67,8 +67,8 @@ void CudaSetAscendingOrder(XTensor * a, int dim) ...@@ -67,8 +67,8 @@ void CudaSetAscendingOrder(XTensor * a, int dim)
{ {
CheckNTErrors((a->dataType == X_INT), "TODO!"); CheckNTErrors((a->dataType == X_INT), "TODO!");
int dimRDI = a->order - dim - 1; int dimRDI = a->order - dim - 1;
int stride = 1; int stride = 1;
int strideNum = a->dimSizeRDI[dimRDI]; int strideNum = a->dimSizeRDI[dimRDI];
for(int i = 0; i < dimRDI; i++) for(int i = 0; i < dimRDI; i++)
stride *= a->dimSizeRDI[i]; stride *= a->dimSizeRDI[i];
......
...@@ -56,7 +56,7 @@ void RunParallel2D(XPRunner * parallelRunner, void * job, ...@@ -56,7 +56,7 @@ void RunParallel2D(XPRunner * parallelRunner, void * job,
va_list ap; va_list ap;
va_start(ap, argNum); va_start(ap, argNum);
for (int i = 0; i < argNum; i++) { for (int i = 0; i < argNum; i++) {
XTensor* p = va_arg(ap, XTensor*); XTensor* p = va_arg(ap, XTensor*);
jobArgList->Add(p); jobArgList->Add(p);
} }
va_end(ap); va_end(ap);
...@@ -77,19 +77,19 @@ void RunParallel2D(XPRunner * parallelRunner, void * job, ...@@ -77,19 +77,19 @@ void RunParallel2D(XPRunner * parallelRunner, void * job,
2. other arguments 2. other arguments
*/ */
for (int i = 0; i < jobNum; i++) { for (int i = 0; i < jobNum; i++) {
IntList* indexArgs = new IntList(4); IntList* indexArgs = new IntList(4);
TensorList * blockArgs = new TensorList(argNum); TensorList * blockArgs = new TensorList(argNum);
int * blockIndex = indexList + i * 4; int * blockIndex = indexList + i * 4;
indexArgs->Add(blockIndex[0]); indexArgs->Add(blockIndex[0]);
indexArgs->Add(blockIndex[1]); indexArgs->Add(blockIndex[1]);
indexArgs->Add(blockIndex[2]); indexArgs->Add(blockIndex[2]);
indexArgs->Add(blockIndex[3]); indexArgs->Add(blockIndex[3]);
for (int j = 0; j < argNum; j++) for (int j = 0; j < argNum; j++)
blockArgs->Add(jobArgList->GetItem(j)); blockArgs->Add(jobArgList->GetItem(j));
args->Add((XTensor*)indexArgs); args->Add((XTensor*)indexArgs);
args->Add((XTensor*)blockArgs); args->Add((XTensor*)blockArgs);
jobs->Add((XTensor*)job); jobs->Add((XTensor*)job);
......
...@@ -84,7 +84,7 @@ XTensor HardTanH(const XTensor &x) ...@@ -84,7 +84,7 @@ XTensor HardTanH(const XTensor &x)
return y; return y;
} }
void HardTanH(const XTensor &x, XTensor &y, bool requireLink) void HardTanH(const XTensor &x, XTensor &y)
{ {
if (!y.isInit || !XTensor::IsSameShaped(&y, &x)) { if (!y.isInit || !XTensor::IsSameShaped(&y, &x)) {
InitTensor(&y, &x); InitTensor(&y, &x);
...@@ -93,7 +93,7 @@ void HardTanH(const XTensor &x, XTensor &y, bool requireLink) ...@@ -93,7 +93,7 @@ void HardTanH(const XTensor &x, XTensor &y, bool requireLink)
/* call _HardTanH function */ /* call _HardTanH function */
_HardTanH(&x, &y); _HardTanH(&x, &y);
if (requireLink) { if (y.enableGrad) {
/* tensor connection */ /* tensor connection */
XLink::MakeLink(&x, NULL, &y, FUNC_HARDTANH); XLink::MakeLink(&x, NULL, &y, FUNC_HARDTANH);
} }
......
...@@ -40,7 +40,7 @@ void _HardTanH(const XTensor * x, XTensor * y); ...@@ -40,7 +40,7 @@ void _HardTanH(const XTensor * x, XTensor * y);
/* hard tanh function (return an XTensor structure) */ /* hard tanh function (return an XTensor structure) */
XTensor HardTanH(const XTensor &x); XTensor HardTanH(const XTensor &x);
void HardTanH(const XTensor &x, XTensor &y, bool requireLink = false); void HardTanH(const XTensor &x, XTensor &y);
/* de/dx */ /* de/dx */
void _HardTanHBackward(XTensor * gold, XTensor * y, XTensor * x, void _HardTanHBackward(XTensor * gold, XTensor * y, XTensor * x,
......
...@@ -58,7 +58,7 @@ XTensor Identity(const XTensor &x) ...@@ -58,7 +58,7 @@ XTensor Identity(const XTensor &x)
return y; return y;
} }
void Identity(const XTensor &x, XTensor &y, bool requireLink) void Identity(const XTensor &x, XTensor &y)
{ {
if (!y.isInit || !y.IsSameShaped(&y, &x)) { if (!y.isInit || !y.IsSameShaped(&y, &x)) {
InitTensor(&y, &x); InitTensor(&y, &x);
...@@ -67,7 +67,7 @@ void Identity(const XTensor &x, XTensor &y, bool requireLink) ...@@ -67,7 +67,7 @@ void Identity(const XTensor &x, XTensor &y, bool requireLink)
/* call _Identity function */ /* call _Identity function */
_Identity(&x, &y); _Identity(&x, &y);
if (requireLink) { if (y.enableGrad) {
/* tensor connection */ /* tensor connection */
XLink::MakeLink(&x, NULL, &y, FUNC_IDENTITY); XLink::MakeLink(&x, NULL, &y, FUNC_IDENTITY);
} }
......
...@@ -33,7 +33,7 @@ void _Identity(const XTensor * x, XTensor * y); ...@@ -33,7 +33,7 @@ void _Identity(const XTensor * x, XTensor * y);
/* identity function y = x (return an XTensor structure) */ /* identity function y = x (return an XTensor structure) */
XTensor Identity(const XTensor &x); XTensor Identity(const XTensor &x);
void Identity(const XTensor &x, XTensor &y, bool requireLink = false); void Identity(const XTensor &x, XTensor &y);
/* de/dx */ /* de/dx */
void _IdentityBackward(XTensor * gold, XTensor * y, XTensor * x, void _IdentityBackward(XTensor * gold, XTensor * y, XTensor * x,
......
...@@ -194,7 +194,15 @@ XTensor LogSoftmax(const XTensor &x, int leadDim) ...@@ -194,7 +194,15 @@ XTensor LogSoftmax(const XTensor &x, int leadDim)
return y; return y;
} }
void LogSoftmax(const XTensor &x, XTensor &y, int leadDim, bool requireLink) /*
log scale softmax y = log(e^x / \sum_{i} e^{x_i})
make a new tensor to keep the result and return it
>> x - input vector
>> y - output vector
>> leadDim - leading dimension (along which we perform reduction)
*/
void LogSoftmax(const XTensor &x, XTensor &y, int leadDim)
{ {
int ld = leadDim; int ld = leadDim;
if (ld < 0) if (ld < 0)
...@@ -207,32 +215,13 @@ void LogSoftmax(const XTensor &x, XTensor &y, int leadDim, bool requireLink) ...@@ -207,32 +215,13 @@ void LogSoftmax(const XTensor &x, XTensor &y, int leadDim, bool requireLink)
/* call _LogSoftmax function */ /* call _LogSoftmax function */
_LogSoftmax(&x, &y, ld); _LogSoftmax(&x, &y, ld);
if (requireLink) { if (y.enableGrad) {
/* tensor connection */ /* tensor connection */
XLink::MakeLink(&x, NULL, &y, FUNC_LOGSOFTMAX); XLink::MakeLink(&x, NULL, &y, FUNC_LOGSOFTMAX);
XLink::AddParamToHeadInt(&y, ld); XLink::AddParamToHeadInt(&y, ld);
} }
} }
/*
log scale softmax y = log(e^x / \sum_{i} e^{x_i})
make a new tensor to keep the result and return it
>> x - input vector
>> y - output vector
>> leadDim - leading dimension (along which we perform reduction)
*/
void LogSoftmax(const XTensor &x, XTensor &y, int leadDim)
{
if(!XTensor::IsSameShaped(&x, &y))
InitTensor(&y, &x);
/* call _LogSoftmax function */
_LogSoftmax(&x, &y, leadDim);
/* tensor connection */
XLink::MakeLink(&x, NULL, &y, FUNC_LOGSOFTMAX);
XLink::AddParamToHeadInt(&y, leadDim);
}
/* /*
backward computation for dense matrices with default data type backward computation for dense matrices with default data type
......
...@@ -33,8 +33,6 @@ void _LogSoftmax(const XTensor * x, XTensor * y, int leadDim); ...@@ -33,8 +33,6 @@ void _LogSoftmax(const XTensor * x, XTensor * y, int leadDim);
/* log scale softmax y = log(e^x / \sum_{i} e^{x_i}) (return an XTensor structure) */ /* log scale softmax y = log(e^x / \sum_{i} e^{x_i}) (return an XTensor structure) */
XTensor LogSoftmax(const XTensor &x, int leadDim); XTensor LogSoftmax(const XTensor &x, int leadDim);
void LogSoftmax(const XTensor &x, XTensor &y, int leadDim, bool requireLink = false);
/* log scale softmax y = log(e^x / \sum_{i} e^{x_i}) (with both argument of x and y) */ /* log scale softmax y = log(e^x / \sum_{i} e^{x_i}) (with both argument of x and y) */
void LogSoftmax(const XTensor &x, XTensor &y, int leadDim); void LogSoftmax(const XTensor &x, XTensor &y, int leadDim);
......
...@@ -77,7 +77,7 @@ XTensor Rectify(const XTensor &x) ...@@ -77,7 +77,7 @@ XTensor Rectify(const XTensor &x)
return y; return y;
} }
void Rectify(const XTensor &x, XTensor &y, bool requireLink) void Rectify(const XTensor &x, XTensor &y)
{ {
if (!y.isInit || !XTensor::IsSameShaped(&y, &x)) { if (!y.isInit || !XTensor::IsSameShaped(&y, &x)) {
InitTensor(&y, &x); InitTensor(&y, &x);
...@@ -86,7 +86,7 @@ void Rectify(const XTensor &x, XTensor &y, bool requireLink) ...@@ -86,7 +86,7 @@ void Rectify(const XTensor &x, XTensor &y, bool requireLink)
/* call _Rectify function */ /* call _Rectify function */
_Rectify(&x, &y); _Rectify(&x, &y);
if (requireLink) { if (y.enableGrad) {
/* tensor connection */ /* tensor connection */
XLink::MakeLink(&x, NULL, &y, FUNC_RECTIFY); XLink::MakeLink(&x, NULL, &y, FUNC_RECTIFY);
} }
......
...@@ -33,7 +33,7 @@ void _Rectify(const XTensor * x, XTensor * y); ...@@ -33,7 +33,7 @@ void _Rectify(const XTensor * x, XTensor * y);
/* rectify function y = max(0, x) (return an XTensor structure) */ /* rectify function y = max(0, x) (return an XTensor structure) */
XTensor Rectify(const XTensor &x); XTensor Rectify(const XTensor &x);
void Rectify(const XTensor &x, XTensor &y, bool requireLink = false); void Rectify(const XTensor &x, XTensor &y);
/* de/dx */ /* de/dx */
void _RectifyBackward(XTensor * gold, XTensor * y, XTensor * x, void _RectifyBackward(XTensor * gold, XTensor * y, XTensor * x,
......
...@@ -75,7 +75,7 @@ XTensor Sigmoid(const XTensor &x) ...@@ -75,7 +75,7 @@ XTensor Sigmoid(const XTensor &x)
return y; return y;
} }
void Sigmoid(const XTensor &x, XTensor &y, bool requireLink) void Sigmoid(const XTensor &x, XTensor &y)
{ {
if (!y.isInit || !XTensor::IsSameShaped(&y, &x)) { if (!y.isInit || !XTensor::IsSameShaped(&y, &x)) {
InitTensor(&y, &x); InitTensor(&y, &x);
...@@ -84,7 +84,7 @@ void Sigmoid(const XTensor &x, XTensor &y, bool requireLink) ...@@ -84,7 +84,7 @@ void Sigmoid(const XTensor &x, XTensor &y, bool requireLink)
/* call _Sigmoid function */ /* call _Sigmoid function */
_Sigmoid(&x, &y); _Sigmoid(&x, &y);
if (requireLink) { if (y.enableGrad) {
/* tensor connection */ /* tensor connection */
XLink::MakeLink(&x, NULL, &y, FUNC_SIGMOID); XLink::MakeLink(&x, NULL, &y, FUNC_SIGMOID);
} }
......
Markdown 格式
0%
您添加了 0 到此讨论。请谨慎行事。
请先完成此评论的编辑!
注册 或者 后发表评论