Commit 771643c6 by huchi

refactor parameter from pointer to reference

parent 04f129fc
...@@ -55,7 +55,7 @@ int main( int argc, const char ** argv ) ...@@ -55,7 +55,7 @@ int main( int argc, const char ** argv )
// fprintf(stderr, "Run this program with \"-test\" for unit test!\n"); // fprintf(stderr, "Run this program with \"-test\" for unit test!\n");
// fprintf(stderr, "Or run this program with \"-fnnlm\" for sample FNNLM!\n"); // fprintf(stderr, "Or run this program with \"-fnnlm\" for sample FNNLM!\n");
//} //}
BackwardTest(); BackwardTest();
//_CrtDumpMemoryLeaks(); //_CrtDumpMemoryLeaks();
...@@ -69,9 +69,9 @@ void BackwardTest() ...@@ -69,9 +69,9 @@ void BackwardTest()
XTensor a; XTensor a;
XTensor b; XTensor b;
XTensor c; XTensor c;
a.enableGrad = true; a.enableGrad = true;
b.enableGrad = false; b.enableGrad = false;
c.enableGrad = false; c.enableGrad = false;
XTensor mean; XTensor mean;
XTensor origin; XTensor origin;
InitTensor2D(&a, 2, 3); InitTensor2D(&a, 2, 3);
...@@ -89,9 +89,9 @@ void BackwardTest() ...@@ -89,9 +89,9 @@ void BackwardTest()
b.Set1D(2.0F, 0); b.Set1D(2.0F, 0);
b.Set1D(1.0F, 1); b.Set1D(1.0F, 1);
DivDim(a, b, c, 0); DivDim(a, b, c, 0);
c.Dump(stderr, "c:"); c.Dump(stderr, "c:");
auto loss = CrossEntropy(c, a); auto loss = CrossEntropy(c, a);
//XLink::ShowNetwork(stderr, &c); //XLink::ShowNetwork(stderr, &c);
......
...@@ -765,15 +765,15 @@ void XMathGrad::GradMultiplyDim(XTensor * node, bool isEfficient) ...@@ -765,15 +765,15 @@ void XMathGrad::GradMultiplyDim(XTensor * node, bool isEfficient)
/* dE/da */ /* dE/da */
_MultiplyDim(node->grad, b, a->grad, n, 1.0F); _MultiplyDim(node->grad, b, a->grad, n, 1.0F);
/* dE/db */ /* dE/db */
int order = a->order; int order = a->order;
int dimSize[MAX_TENSOR_DIM_NUM]; int dimSize[MAX_TENSOR_DIM_NUM];
memcpy(dimSize, a->dimSize, sizeof(int) * a->order); memcpy(dimSize, a->dimSize, sizeof(int) * a->order);
XTensor * bGradTMP = NewTensorBuf(node->grad, node->devID, node->mem); XTensor * bGradTMP = NewTensorBuf(node->grad, node->devID, node->mem);
_Multiply(node->grad, a, bGradTMP); _Multiply(node->grad, a, bGradTMP);
if(n == order - 1){ if(n == order - 1){
int reshapedSize[MAX_TENSOR_DIM_NUM]; int reshapedSize[MAX_TENSOR_DIM_NUM];
reshapedSize[0] = a->unitNum/dimSize[order - 1]; reshapedSize[0] = a->unitNum/dimSize[order - 1];
...@@ -1078,91 +1078,91 @@ dE/db = - dE/dc * b.reduce(0,...,n-1,n+1,...) * \beta ...@@ -1078,91 +1078,91 @@ dE/db = - dE/dc * b.reduce(0,...,n-1,n+1,...) * \beta
*/ */
void XMathGrad::GradSubDim(XTensor * node, bool isEfficient) void XMathGrad::GradSubDim(XTensor * node, bool isEfficient)
{ {
XLink &income = node->income; XLink &income = node->income;
CheckNTErrors(income.tailNum == 2, "Wrong input tensor number for SUBDIM!"); CheckNTErrors(income.tailNum == 2, "Wrong input tensor number for SUBDIM!");
XTensor * a = income.tails[0]; XTensor * a = income.tails[0];
XTensor * b = income.tails[1]; XTensor * b = income.tails[1];
int n = income.GetParamInt(0); int n = income.GetParamInt(0);
DTYPE beta = income.GetParam(1); DTYPE beta = income.GetParam(1);
XNoder::MakeGrad(a); XNoder::MakeGrad(a);
XNoder::MakeGrad(b); XNoder::MakeGrad(b);
_Sum(a->grad, node->grad, a->grad); _Sum(a->grad, node->grad, a->grad);
int order = a->order; int order = a->order;
int dimSize[MAX_TENSOR_DIM_NUM]; int dimSize[MAX_TENSOR_DIM_NUM];
memcpy(dimSize, a->dimSize, sizeof(int) * a->order); memcpy(dimSize, a->dimSize, sizeof(int) * a->order);
if(n == order - 1){ if(n == order - 1){
int reshapedSize[MAX_TENSOR_DIM_NUM]; int reshapedSize[MAX_TENSOR_DIM_NUM];
reshapedSize[0] = a->unitNum / dimSize[order - 1]; reshapedSize[0] = a->unitNum / dimSize[order - 1];
reshapedSize[1] = dimSize[order - 1]; reshapedSize[1] = dimSize[order - 1];
/* we reshape dE/dc to a matrix whose column number is equal to the /* we reshape dE/dc to a matrix whose column number is equal to the
size of b. Then we can reduce the matrix into a row vector. */ size of b. Then we can reduce the matrix into a row vector. */
node->grad->Reshape(2, reshapedSize); node->grad->Reshape(2, reshapedSize);
//if(b->outgo.tailNum > 1){ //if(b->outgo.tailNum > 1){
XTensor * bGradTMP = NewTensorBuf(b->grad, b->devID, b->mem); XTensor * bGradTMP = NewTensorBuf(b->grad, b->devID, b->mem);
_ReduceSum(node->grad, bGradTMP, 0); _ReduceSum(node->grad, bGradTMP, 0);
if(beta != 1.0F) if(beta != 1.0F)
_ScaleAndShiftMe(bGradTMP, beta); _ScaleAndShiftMe(bGradTMP, beta);
_Sub(b->grad, bGradTMP, b->grad); _Sub(b->grad, bGradTMP, b->grad);
DelTensorBuf(bGradTMP); DelTensorBuf(bGradTMP);
/*} /*}
else{ else{
_ReduceSum(node->grad, b->grad, 0); _ReduceSum(node->grad, b->grad, 0);
if(beta != 1.0F) if(beta != 1.0F)
_ScaleAndShiftMe(b->grad, beta); _ScaleAndShiftMe(b->grad, beta);
_ScaleAndShiftMe(b->grad, -1.0F); _ScaleAndShiftMe(b->grad, -1.0F);
}*/ }*/
node->grad->Reshape(order, dimSize); node->grad->Reshape(order, dimSize);
} }
else{ else{
int reshapedSize[MAX_TENSOR_DIM_NUM]; int reshapedSize[MAX_TENSOR_DIM_NUM];
reshapedSize[0] = 1; reshapedSize[0] = 1;
reshapedSize[1] = dimSize[n]; reshapedSize[1] = dimSize[n];
reshapedSize[2] = 1; reshapedSize[2] = 1;
for(int i = 0; i < order; i++){ for(int i = 0; i < order; i++){
if(i < n) if(i < n)
reshapedSize[0] *= dimSize[i]; reshapedSize[0] *= dimSize[i];
} }
reshapedSize[2] = a->unitNum / (reshapedSize[0] * reshapedSize[1]); reshapedSize[2] = a->unitNum / (reshapedSize[0] * reshapedSize[1]);
/* we reshape dE/dc to a 3D tensor of size (x, y, z) where y = |b|. /* we reshape dE/dc to a 3D tensor of size (x, y, z) where y = |b|.
Then reduce along with z and x to obtain dE/db. */ Then reduce along with z and x to obtain dE/db. */
node->grad->Reshape(3, reshapedSize); node->grad->Reshape(3, reshapedSize);
XTensor * interGrad = NewTensorBuf(2, reshapedSize, b->dataType, b->denseRatio, b->devID, b->mem); XTensor * interGrad = NewTensorBuf(2, reshapedSize, b->dataType, b->denseRatio, b->devID, b->mem);
_ReduceSum(node->grad, interGrad, 2); _ReduceSum(node->grad, interGrad, 2);
//if(b->outgo.tailNum > 1){ //if(b->outgo.tailNum > 1){
XTensor * bGradTMP = NewTensorBuf(b->grad, b->devID, b->mem); XTensor * bGradTMP = NewTensorBuf(b->grad, b->devID, b->mem);
_ReduceSum(interGrad, bGradTMP, 0); _ReduceSum(interGrad, bGradTMP, 0);
if(beta != 1.0F) if(beta != 1.0F)
_ScaleAndShiftMe(bGradTMP, beta); _ScaleAndShiftMe(bGradTMP, beta);
_Sub(b->grad, bGradTMP, b->grad); _Sub(b->grad, bGradTMP, b->grad);
DelTensorBuf(bGradTMP); DelTensorBuf(bGradTMP);
/*} /*}
else{ else{
_ReduceSum(interGrad, b->grad, 0); _ReduceSum(interGrad, b->grad, 0);
if(beta != 1.0F) if(beta != 1.0F)
_ScaleAndShiftMe(b->grad, beta); _ScaleAndShiftMe(b->grad, beta);
_ScaleAndShiftMe(b->grad, -1.0F); _ScaleAndShiftMe(b->grad, -1.0F);
}*/ }*/
node->grad->Reshape(order, dimSize); node->grad->Reshape(order, dimSize);
DelTensorBuf(interGrad); DelTensorBuf(interGrad);
} }
node->visitMark = NODE_FINISHED; node->visitMark = NODE_FINISHED;
} }
/* /*
......
...@@ -146,10 +146,10 @@ private: ...@@ -146,10 +146,10 @@ private:
static static
void GradSub(XTensor * node, bool isEfficient); void GradSub(XTensor * node, bool isEfficient);
/* gradient for sub with one dimension: c = a - b * \beta /* gradient for sub with one dimension: c = a - b * \beta
where the size of b is equal to that of one dimension of a */ where the size of b is equal to that of one dimension of a */
static static
void GradSubDim(XTensor * node, bool isEfficient); void GradSubDim(XTensor * node, bool isEfficient);
/* gradient for sum: c = a + b * \beta */ /* gradient for sum: c = a + b * \beta */
static static
......
...@@ -450,7 +450,7 @@ void XShapeGrad::GradSplitListPost(XTensor * node, bool isEfficient) ...@@ -450,7 +450,7 @@ void XShapeGrad::GradSplitListPost(XTensor * node, bool isEfficient)
if(income.typeID == SHAPE_SPLIT_LIST){ if(income.typeID == SHAPE_SPLIT_LIST){
int w = income.GetParamInt(0); int w = income.GetParamInt(0);
int splitID = income.GetParamInt(1); int splitID = income.GetParamInt(1);
if(whereToSplit < 0) if(whereToSplit < 0)
whereToSplit = w; whereToSplit = w;
splitNum++; splitNum++;
......
...@@ -267,7 +267,7 @@ void XNet::BackwardNode(XTensor * node, bool isEfficent) ...@@ -267,7 +267,7 @@ void XNet::BackwardNode(XTensor * node, bool isEfficent)
else if(XShapeGrad::IsShapeOP(node)) else if(XShapeGrad::IsShapeOP(node))
XShapeGrad::MakeGrad(node, isEfficent); XShapeGrad::MakeGrad(node, isEfficent);
else if(XLossGrad::IsLossOP(node)) else if(XLossGrad::IsLossOP(node))
XLossGrad::MakeGrad(node, isEfficent); XLossGrad::MakeGrad(node, isEfficent);
else{ else{
ShowNTErrors("Wrong node type!"); ShowNTErrors("Wrong node type!");
} }
...@@ -468,7 +468,7 @@ search for a node in a top-down manner by its name ...@@ -468,7 +468,7 @@ search for a node in a top-down manner by its name
*/ */
//XTensor * XNet::SearchNode(XTensor * top, const char * name) //XTensor * XNet::SearchNode(XTensor * top, const char * name)
//{ //{
//return XLink::SearchNode(top, name); //return XLink::SearchNode(top, name);
//} //}
} }
...@@ -482,12 +482,12 @@ void Train(const char * train, bool isShuffled, FNNModel &model) ...@@ -482,12 +482,12 @@ void Train(const char * train, bool isShuffled, FNNModel &model)
Clear(model, true); Clear(model, true);
/* forward + backward process */ /* forward + backward process */
/* this is implemented by gather function */ /* this is implemented by gather function */
ForwardAutoDiff(ngrams, ngramNum, output, model); ForwardAutoDiff(ngrams, ngramNum, output, model);
/* this is implemented by multiply function */ /* this is implemented by multiply function */
//ForwardAutoDiff(inputs, output, model); //ForwardAutoDiff(inputs, output, model);
lossTensor = CrossEntropy(output, gold); lossTensor = CrossEntropy(output, gold);
/* automatic differentiation */ /* automatic differentiation */
...@@ -1177,12 +1177,12 @@ void Test(const char * test, const char * result, FNNModel &model) ...@@ -1177,12 +1177,12 @@ void Test(const char * test, const char * result, FNNModel &model)
/* forward computation */ /* forward computation */
Forward(inputs, output, model, net); Forward(inputs, output, model, net);
} }
else { else {
/* this is implemented by gather function */ /* this is implemented by gather function */
ForwardAutoDiff(ngrams, ngramNum, output, model); ForwardAutoDiff(ngrams, ngramNum, output, model);
/* this is implemented by multiply function */ /* this is implemented by multiply function */
//ForwardAutoDiff(inputs, output, model); //ForwardAutoDiff(inputs, output, model);
} }
/* prediction probabilities */ /* prediction probabilities */
......
...@@ -61,7 +61,7 @@ public: ...@@ -61,7 +61,7 @@ public:
XTensor wa; XTensor wa;
XTensor wbig; XTensor wbig;
/* size of transformed Q and K */ /* size of transformed Q and K */
int dk; int dk;
......
...@@ -86,7 +86,7 @@ struct SampleNode ...@@ -86,7 +86,7 @@ struct SampleNode
int * p; int * p;
int size; int size;
int value; int value;
int key; int key;
}; };
int CompareSampleNode(const void * a, const void * b) int CompareSampleNode(const void * a, const void * b)
...@@ -289,7 +289,7 @@ int T2TBatchLoader::LoadBatch(FILE * file, bool isLM, ...@@ -289,7 +289,7 @@ int T2TBatchLoader::LoadBatch(FILE * file, bool isLM,
int vsEnc, int vsDec, int sBatch, int wBatch, int vsEnc, int vsDec, int sBatch, int wBatch,
bool isSorted, int &ws, int &wCount, bool isSorted, int &ws, int &wCount,
int devID, XMem * mem, int devID, XMem * mem,
bool isTraining) bool isTraining)
{ {
if(isLM){ if(isLM){
return LoadBatchLM(file, batchEnc, paddingEnc, batchDec, paddingDec, gold, label, return LoadBatchLM(file, batchEnc, paddingEnc, batchDec, paddingDec, gold, label,
...@@ -331,7 +331,7 @@ int T2TBatchLoader::LoadBatchLM(FILE * file, ...@@ -331,7 +331,7 @@ int T2TBatchLoader::LoadBatchLM(FILE * file,
int vSize, int sBatch, int wBatch, int vSize, int sBatch, int wBatch,
bool isSorted, int &wCount, bool isSorted, int &wCount,
int devID, XMem * mem, int devID, XMem * mem,
bool isTraining) bool isTraining)
{ {
if(nextSeq < 0 || nextSeq >= nseqBuf) if(nextSeq < 0 || nextSeq >= nseqBuf)
LoadBuf(file, isSorted, 1); LoadBuf(file, isSorted, 1);
...@@ -490,7 +490,7 @@ int T2TBatchLoader::LoadBatchMT(FILE * file, ...@@ -490,7 +490,7 @@ int T2TBatchLoader::LoadBatchMT(FILE * file,
int vSizeEnc, int vSizeDec, int sBatch, int wBatch, int vSizeEnc, int vSizeDec, int sBatch, int wBatch,
bool isSorted, int &ws, int &wCount, bool isSorted, int &ws, int &wCount,
int devID, XMem * mem, int devID, XMem * mem,
bool isTraining) bool isTraining)
{ {
if (nextBatch < 0 || nextBatch >= bufBatchSize) { if (nextBatch < 0 || nextBatch >= bufBatchSize) {
LoadBuf(file, isSorted, 2); LoadBuf(file, isSorted, 2);
......
...@@ -132,7 +132,7 @@ public: ...@@ -132,7 +132,7 @@ public:
int vsEnc, int vsDec, int sBatch, int wBatch, int vsEnc, int vsDec, int sBatch, int wBatch,
bool isSorted, int &ws, int &wCount, bool isSorted, int &ws, int &wCount,
int devID, XMem * mem, int devID, XMem * mem,
bool isTraining); bool isTraining);
/* load a batch of sequences (for language modeling) */ /* load a batch of sequences (for language modeling) */
int LoadBatchLM(FILE * file, int LoadBatchLM(FILE * file,
...@@ -142,7 +142,7 @@ public: ...@@ -142,7 +142,7 @@ public:
int * seqs, int vs, int sBatch, int wBatch, int * seqs, int vs, int sBatch, int wBatch,
bool isSorted, int &wCount, bool isSorted, int &wCount,
int devID, XMem * mem, int devID, XMem * mem,
bool isTraining); bool isTraining);
/* load a batch of sequences (for machine translation) */ /* load a batch of sequences (for machine translation) */
int LoadBatchMT(FILE * file, int LoadBatchMT(FILE * file,
...@@ -152,7 +152,7 @@ public: ...@@ -152,7 +152,7 @@ public:
int * seqs, int vsEnc, int vsDec, int sBatch, int wBatch, int * seqs, int vsEnc, int vsDec, int sBatch, int wBatch,
bool isSorted, int &ws, int &wCount, bool isSorted, int &ws, int &wCount,
int devID, XMem * mem, int devID, XMem * mem,
bool isTraining); bool isTraining);
/* shuffle the data file */ /* shuffle the data file */
void Shuffle(const char * srcFile, const char * tgtFile); void Shuffle(const char * srcFile, const char * tgtFile);
......
...@@ -303,7 +303,7 @@ void T2TSearch::Generate(T2TStateBundle * beam) ...@@ -303,7 +303,7 @@ void T2TSearch::Generate(T2TStateBundle * beam)
/* Then, we do something similar to "preID". For the top-k predictions, we need /* Then, we do something similar to "preID". For the top-k predictions, we need
to know their indices in the vocabulary. We compute the offset of each prediction to know their indices in the vocabulary. We compute the offset of each prediction
in the vocabulary by dividing it with vocab-size and computing the remainder. */ in the vocabulary by dividing it with vocab-size and computing the remainder. */
_ModMe(index, sizeVocab); ModMe(index, sizeVocab);
score.Reshape(order, dims); score.Reshape(order, dims);
......
...@@ -528,7 +528,7 @@ get device ids for the given device information ...@@ -528,7 +528,7 @@ get device ids for the given device information
*/ */
int XDevManager::GetDeviceIDs(char * devInfo, int * devIDs) int XDevManager::GetDeviceIDs(char * devInfo, int * devIDs)
{ {
StrList* terms = new StrList(1); StrList* terms = new StrList(1);
SplitALine(devInfo, " ", terms); SplitALine(devInfo, " ", terms);
for(int i = 0; i < terms->count; i++){ for(int i = 0; i < terms->count; i++){
......
...@@ -90,7 +90,7 @@ template <typename T> ...@@ -90,7 +90,7 @@ template <typename T>
void TensorListBase<T>::Add(T&& item) void TensorListBase<T>::Add(T&& item)
{ {
if (count == maxNum) { if (count == maxNum) {
T* newItems; T* newItems;
if (mem == NULL) if (mem == NULL)
newItems = new T[maxNum * 2 + 1]; newItems = new T[maxNum * 2 + 1];
...@@ -101,7 +101,7 @@ void TensorListBase<T>::Add(T&& item) ...@@ -101,7 +101,7 @@ void TensorListBase<T>::Add(T&& item)
maxNum = maxNum * 2 + 1; maxNum = maxNum * 2 + 1;
} }
items[count++] = item; items[count++] = item;
} }
/* /*
...@@ -111,18 +111,18 @@ add an item into the list ...@@ -111,18 +111,18 @@ add an item into the list
template <typename T> template <typename T>
void TensorListBase<T>::Add(const T& item) void TensorListBase<T>::Add(const T& item)
{ {
if (count == maxNum) { if (count == maxNum) {
T* newItems; T* newItems;
if (mem == NULL) if (mem == NULL)
newItems = new T[maxNum * 2 + 1]; newItems = new T[maxNum * 2 + 1];
else else
newItems = (T*)mem->Alloc(mem->devID, sizeof(T) * (maxNum * 2 + 1)); newItems = (T*)mem->Alloc(mem->devID, sizeof(T) * (maxNum * 2 + 1));
memcpy(newItems, items, sizeof(T) * maxNum); memcpy(newItems, items, sizeof(T) * maxNum);
items = newItems; items = newItems;
maxNum = maxNum * 2 + 1; maxNum = maxNum * 2 + 1;
} }
items[count++] = item; items[count++] = item;
} }
/* /*
...@@ -186,21 +186,21 @@ void TensorListBase<T>::Insert(int pos, const T& item) ...@@ -186,21 +186,21 @@ void TensorListBase<T>::Insert(int pos, const T& item)
template<typename T> template<typename T>
void TensorListBase<T>::Insert(int pos, T&& item) void TensorListBase<T>::Insert(int pos, T&& item)
{ {
if (count == maxNum) { if (count == maxNum) {
T* newItems; T* newItems;
if (mem == NULL) if (mem == NULL)
newItems = new T[maxNum * 2 + 1]; newItems = new T[maxNum * 2 + 1];
else else
newItems = (T*)mem->Alloc(mem->devID, sizeof(T) * (maxNum * 2 + 1)); newItems = (T*)mem->Alloc(mem->devID, sizeof(T) * (maxNum * 2 + 1));
memcpy(newItems, items, sizeof(T) * maxNum); memcpy(newItems, items, sizeof(T) * maxNum);
items = newItems; items = newItems;
maxNum = maxNum * 2 + 1; maxNum = maxNum * 2 + 1;
} }
for (int i = count - 1; i >= pos; i--) for (int i = count - 1; i >= pos; i--)
items[i + 1] = items[i]; items[i + 1] = items[i];
items[pos] = item; items[pos] = item;
count++; count++;
} }
/* get the item at position i */ /* get the item at position i */
...@@ -226,8 +226,8 @@ inline void TensorListBase<T>::SetItem(int i, const T& item) ...@@ -226,8 +226,8 @@ inline void TensorListBase<T>::SetItem(int i, const T& item)
template<typename T> template<typename T>
inline void TensorListBase<T>::SetItem(int i, T&& item) inline void TensorListBase<T>::SetItem(int i, T&& item)
{ {
if (i >= 0 && i < count) if (i >= 0 && i < count)
items[i] = std::move(item); items[i] = std::move(item);
} }
/* /*
...@@ -250,7 +250,7 @@ inline int TensorListBase<T>::FindFirst(const T& item) ...@@ -250,7 +250,7 @@ inline int TensorListBase<T>::FindFirst(const T& item)
template <typename T> template <typename T>
void TensorListBase<T>::Clear() void TensorListBase<T>::Clear()
{ {
count = 0; count = 0;
} }
/* /*
......
...@@ -32,7 +32,7 @@ ...@@ -32,7 +32,7 @@
/* the nts (NiuTrans.Tensor) namespace */ /* the nts (NiuTrans.Tensor) namespace */
namespace nts { namespace nts {
/* the TensorListBase class */ /* the TensorListBase class */
template <typename T> template <typename T>
struct TensorListBase { struct TensorListBase {
...@@ -66,57 +66,57 @@ public: ...@@ -66,57 +66,57 @@ public:
/* add an item into the list */ /* add an item into the list */
void Add(T&& item); void Add(T&& item);
/* add an item into the list */ /* add an item into the list */
void Add(const T& item); void Add(const T& item);
/* add a number of items into the list */ /* add a number of items into the list */
void Add(T* inputItems, int inputItemCount); void Add(T* inputItems, int inputItemCount);
/* append a list to the current list */ /* append a list to the current list */
void AddList(TensorListBase* l); void AddList(TensorListBase* l);
/* insert an item to the given position of the list */ /* insert an item to the given position of the list */
void Insert(int pos, const T& item); void Insert(int pos, const T& item);
/* insert an item to the given position of the list */ /* insert an item to the given position of the list */
void Insert(int pos, T&& item); void Insert(int pos, T&& item);
/* get the item at position i */ /* get the item at position i */
T& GetItem(int i) const; T& GetItem(int i) const;
/* set the item at position i */ /* set the item at position i */
void SetItem(int i, const T& item); void SetItem(int i, const T& item);
/* set the item at position i */ /* set the item at position i */
void SetItem(int i, T&& item); void SetItem(int i, T&& item);
/* find the position of the first matched item */ /* find the position of the first matched item */
int FindFirst(const T& item); int FindFirst(const T& item);
/* clear the data array */ /* clear the data array */
void Clear(); void Clear();
/* sort the list */ /* sort the list */
void Sort(int itemSize); void Sort(int itemSize);
/* reverse the list */ /* reverse the list */
void Reverse(); void Reverse();
/* remove the item at position i */ /* remove the item at position i */
void Remove(int i); void Remove(int i);
/* copy the list */ /* copy the list */
TensorListBase* Copy(XMem* myMem); TensorListBase* Copy(XMem* myMem);
/* shuffle the list */ /* shuffle the list */
void Shuffle(int nround = 10, int beg = -1, int len = 0); void Shuffle(int nround = 10, int beg = -1, int len = 0);
/* short */ /* short */
T& operator[] (int i) { T& operator[] (int i) {
return GetItem(i); return GetItem(i);
}; };
T& Get(int i) { return GetItem(i); }; T& Get(int i) { return GetItem(i); };
void Set(int i, T item) { SetItem(i, item); }; void Set(int i, T item) { SetItem(i, item); };
}; };
struct XTensor; struct XTensor;
......
...@@ -305,7 +305,7 @@ void XMem::SetComputationMode(bool myIsForComputation) ...@@ -305,7 +305,7 @@ void XMem::SetComputationMode(bool myIsForComputation)
cublasDestroy(cublasHandle); cublasDestroy(cublasHandle);
if(myIsForComputation) if(myIsForComputation)
CheckNTErrors((enum curandStatus)cublasCreate(&cublasHandle) == CURAND_STATUS_SUCCESS, CheckNTErrors((enum curandStatus)cublasCreate(&cublasHandle) == CURAND_STATUS_SUCCESS,
"Cannot create the cublas handle."); "Cannot create the cublas handle.");
SetDevice(devIDBackup); SetDevice(devIDBackup);
#endif #endif
...@@ -321,11 +321,11 @@ void XMem::SetIndex(INT_64 indexSize, MTYPE minSizeFirst, int minSizeNum) ...@@ -321,11 +321,11 @@ void XMem::SetIndex(INT_64 indexSize, MTYPE minSizeFirst, int minSizeNum)
{ {
delete[] memIndex; delete[] memIndex;
delete[] memIndex2; delete[] memIndex2;
delete[] minSizeIndex; delete[] minSizeIndex;
nodeNum = indexSize; nodeNum = indexSize;
nodeNumUsed = minSizeNum * 2; nodeNumUsed = minSizeNum * 2;
indexEntryNum = minSizeNum; indexEntryNum = minSizeNum;
memIndex = new MPieceNode[nodeNum]; memIndex = new MPieceNode[nodeNum];
memset(memIndex, 0, sizeof(MPieceNode) * nodeNum); memset(memIndex, 0, sizeof(MPieceNode) * nodeNum);
...@@ -333,12 +333,12 @@ void XMem::SetIndex(INT_64 indexSize, MTYPE minSizeFirst, int minSizeNum) ...@@ -333,12 +333,12 @@ void XMem::SetIndex(INT_64 indexSize, MTYPE minSizeFirst, int minSizeNum)
memIndex2 = new MPieceNode[nodeNum]; memIndex2 = new MPieceNode[nodeNum];
memset(memIndex2, 0, sizeof(MPieceNode) * nodeNum); memset(memIndex2, 0, sizeof(MPieceNode) * nodeNum);
minSizeIndex = new MTYPE[indexEntryNum]; minSizeIndex = new MTYPE[indexEntryNum];
memset(minSizeIndex, 0, sizeof(MTYPE) * indexEntryNum); memset(minSizeIndex, 0, sizeof(MTYPE) * indexEntryNum);
minSizeIndex[0] = minSizeFirst; minSizeIndex[0] = minSizeFirst;
for(int i = 1; i < indexEntryNum; i++) for(int i = 1; i < indexEntryNum; i++)
minSizeIndex[i] = minSizeIndex[i - 1] * 2; minSizeIndex[i] = minSizeIndex[i - 1] * 2;
indexOffset = GetMSB(minSizeFirst); indexOffset = GetMSB(minSizeFirst);
} }
...@@ -757,8 +757,8 @@ void * XMem::AllocStandard(int myDevID, MTYPE mySize, bool myIsRebuiltIndex) ...@@ -757,8 +757,8 @@ void * XMem::AllocStandard(int myDevID, MTYPE mySize, bool myIsRebuiltIndex)
/* if all index nodes are used, we rebuild the index to release the nodes that are free */ /* if all index nodes are used, we rebuild the index to release the nodes that are free */
if(nodeNumUsed == nodeNum){ if(nodeNumUsed == nodeNum){
RebuildIndex(); RebuildIndex();
CheckNTErrors(nodeNumUsed < nodeNum, "No enough index nodes for the memory pool!"); CheckNTErrors(nodeNumUsed < nodeNum, "No enough index nodes for the memory pool!");
} }
/*if(testxmemid == 30){ /*if(testxmemid == 30){
...@@ -961,8 +961,8 @@ release a piece of memory as "free" ...@@ -961,8 +961,8 @@ release a piece of memory as "free"
*/ */
void XMem::ReleaseStandard(int myDevID, void * p, MTYPE size) void XMem::ReleaseStandard(int myDevID, void * p, MTYPE size)
{ {
if(p == NULL) if(p == NULL)
return; return;
if(size <= minSizeIndex[0]) if(size <= minSizeIndex[0])
size = minSizeIndex[0]; size = minSizeIndex[0];
...@@ -1092,7 +1092,7 @@ void XMem::RebuildIndex() ...@@ -1092,7 +1092,7 @@ void XMem::RebuildIndex()
block->mem = NULL; block->mem = NULL;
} }
else{ else{
/* if the block is in use, we build the index */ /* if the block is in use, we build the index */
int pieceCount = 0; int pieceCount = 0;
MTYPE size = 0; MTYPE size = 0;
MHeader * newLast = NULL; MHeader * newLast = NULL;
......
/* NiuTrans.Tensor - an open-source tensor library
/* NiuTrans.Tensor - an open-source tensor library
* Copyright (C) 2017, Natural Language Processing Lab, Northestern University.
* All rights reserved.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
/*
*
* This is an implementation of queue. Actually we intend to use it to maintain
* a priority job list
*
* $Created by: XIAO Tong (xiaotong@mail.neu.edu.cn) 2017-04-05
*
*/
#include <stdio.h>
#include <stdlib.h>
#include "XQueue.h"
#include "XDevice.h"
#include "XList.h"
#include "XUtility.h"
/* the nts (NiuTrans.Tensor) namespace */
namespace nts{
/**************************************
job item used in queues
*/
/* constructor */
JobQueueNode::JobQueueNode()
{
job = NULL;
args = new TensorList(1);
}
/* de-constructor */
JobQueueNode::~JobQueueNode()
{
delete args;
}
/**************************************
This class provides standard utilities of Queue.
*/
/* constuctor */
XQueue::XQueue(int mySize)
{
queue = new void*[mySize];
memset(queue, 0, sizeof(void*) * mySize);
size = mySize;
itemCount = 0;
head = 0;
tail = 0;
isJobQueue = false;
jobDequeuerArgs = new TensorList(1);
jobDequeuerBreak = false;
runningJobCount = 0;
jobStream = NULL;
jobStream1 = NULL;
jobStream2 = NULL;
MUTEX_INIT(enqueueMutex);
MUTEX_INIT(dequeueMutex);
COND_INIT(queueCond);
MUTEX_INIT(jobQueueMutex);
}
/* deconstructor */
XQueue::~XQueue()
{
delete[] queue;
delete jobDequeuerArgs;
delete jobStream;
delete jobStream1;
delete jobStream2;
//if(isJobQueue)
// StopJobConsumer();
MUTEX_DELE(enqueueMutex);
MUTEX_DELE(dequeueMutex);
COND_DELE(queueCond);
MUTEX_DELE(jobQueueMutex);
}
/*
put an item in the tail of the queue
>> item - the item we intend to add into the queue
*/
void XQueue::Enqueue(void * item)
{
MUTEX_LOCK(enqueueMutex);
MUTEX_LOCK(dequeueMutex);
CheckNTErrors((itemCount < size), "Put too many items into the queue!");
queue[tail] = item;
tail = (tail + 1) % size;
itemCount++;
COND_SIGNAL(queueCond);
MUTEX_UNLOCK(dequeueMutex);
MUTEX_UNLOCK(enqueueMutex);
}
/*
fetch an item from head of the queue
<< return - the head item of the queue
*/
void * XQueue::Dequeue()
{
MUTEX_LOCK(dequeueMutex);
while(itemCount == 0)
{
#ifdef WIN32
MUTEX_UNLOCK(dequeueMutex);
#endif
COND_WAIT(queueCond, dequeueMutex);
#ifdef WIN32
MUTEX_LOCK(dequeueMutex);
#endif
}
void * r = queue[head];
head = (head + 1) % size;
itemCount--;
MUTEX_UNLOCK(dequeueMutex);
return r;
}
/* return if the queue is empty */
bool XQueue::IsEmpty()
{
return itemCount == 0;
}
/* wait until the queue is empty */
void XQueue::WaitForEmptyJobQueue()
{
while(runningJobCount > 0){
XSleep(10);
}
if(jobStream != NULL){
CheckNTErrors((jobStream->IsFinished()), "None fineished jobs remain");
jobStream->Clear();
}
if(jobStream1 != NULL){
CheckNTErrors((jobStream1->IsFinished()), "None fineished jobs remain");
jobStream1->Clear();
}
if(jobStream2 != NULL){
CheckNTErrors((jobStream2->IsFinished()), "None fineished jobs remain");
jobStream2->Clear();
}
}
int devids[16] = {0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15};
int cpuid = -1;
/*
run job consumer (in another thread)
>> jobDevID - id of the device for running the jobs
*/
void XQueue::RunJobConsumer(int jobDevID)
{
CheckNTErrors((jobDevID < 16), "device id is out of scope!");
isJobQueue = true;
jobDequeuerArgs->Clear();
jobDequeuerArgs->Add(this);
jobDequeuerArgs->Add(jobDevID >= 0 ? devids + jobDevID : &cpuid);
jobDequeuer.function = (TFunction)DequeueJobs;
jobDequeuer.argv = jobDequeuerArgs;
jobDequeuer.Start();
jobDequeuer.LetItGo();
}
/* stop the job consumer */
void XQueue::StopJobConsumer()
{
jobDequeuerBreak = true;
XSleep(10);
EnqueueJob(NULL, NULL);
jobDequeuer.End();
isJobQueue = false;
}
/* add a job item to process */
void XQueue::EnqueueJob(void * job, TensorList * jobArgs)
{
MUTEX_LOCK(jobQueueMutex);
runningJobCount++;
MUTEX_UNLOCK(jobQueueMutex);
JobQueueNode * node = new JobQueueNode();
node->job = job;
if(jobArgs != NULL)
node->args->AddList(jobArgs);
Enqueue(node);
}
/* job item consumer */
void XQueue::DequeueJobs(TensorList * args)
{
CheckNTErrors((args->count == 2), "Illegal arguments!");
XQueue * q = (XQueue*)args->GetItem(0);
int devID = *(int*)args->GetItem(1);
int devIDBackup = XDevice::GetGPUDevice();
if(devID >= 0)
XDevice::SetGPUDevice(devID);
while(1){
JobQueueNode * node = (JobQueueNode*)q->Dequeue();
if(q->GetJobBreak())
break;
CheckNTErrors((node != NULL), "Illegal job!");
/* process a job */
((TFunction)node->job)(node->args);
delete node;
MUTEX_LOCK(q->jobQueueMutex);
q->runningJobCount--;
MUTEX_UNLOCK(q->jobQueueMutex);
}
if(devID >= 0)
XDevice::SetGPUDevice(devIDBackup);
}
/* get the break flag */
bool XQueue::GetJobBreak()
{
return jobDequeuerBreak;
}
/* get job stream */
XStream * XQueue::GetJobStream(int n)
{
if(n == 0)
return jobStream;
else if(n == 1)
return jobStream1;
else if(n == 2)
return jobStream2;
else{
ShowNTErrors("invalid stream id!");
}
return NULL;
}
/* make job streams */
void XQueue::MakeJobStreams(int devID, int devID1, int devID2)
{
if(devID != INVALID_DEVICE_ID)
jobStream = new XStream(0, devID);
if(devID1 != INVALID_DEVICE_ID)
jobStream1 = new XStream(0, devID1);
if(devID2 != INVALID_DEVICE_ID)
jobStream2 = new XStream(0, devID2);
}
} /* end of the nts (NiuTrans.Tensor) namespace */
...@@ -189,7 +189,7 @@ void XQueue::RunJobConsumer(int jobDevID) ...@@ -189,7 +189,7 @@ void XQueue::RunJobConsumer(int jobDevID)
isJobQueue = true; isJobQueue = true;
jobDequeuerArgs->Clear(); jobDequeuerArgs->Clear();
// warning: this may cause unknown error // warning: this may cause unknown error
jobDequeuerArgs->Add((XTensor*)this); jobDequeuerArgs->Add((XTensor*)this);
jobDequeuerArgs->Add(jobDevID >= 0 ? (XTensor*)(devids + jobDevID) : (XTensor*)&cpuid); jobDequeuerArgs->Add(jobDevID >= 0 ? (XTensor*)(devids + jobDevID) : (XTensor*)&cpuid);
......
...@@ -190,7 +190,6 @@ XTensor::XTensor(const XTensor &reference) ...@@ -190,7 +190,6 @@ XTensor::XTensor(const XTensor &reference)
isInit = true; isInit = true;
isTmp = reference.isTmp; isTmp = reference.isTmp;
enableGrad = reference.enableGrad;
} }
/* copy constructor (with right value reference) */ /* copy constructor (with right value reference) */
...@@ -219,7 +218,6 @@ XTensor::XTensor(const XTensor &&reference) ...@@ -219,7 +218,6 @@ XTensor::XTensor(const XTensor &&reference)
isInit = true; isInit = true;
isTmp = reference.isTmp; isTmp = reference.isTmp;
enableGrad = reference.enableGrad;
} }
/* de-constructor */ /* de-constructor */
...@@ -285,7 +283,7 @@ void XTensor::Init() ...@@ -285,7 +283,7 @@ void XTensor::Init()
isTmp = false; isTmp = false;
isGrad = false; isGrad = false;
isVar = false; isVar = false;
enableGrad = false; enableGrad = false;
visitMark = 0; visitMark = 0;
grad = NULL; grad = NULL;
} }
...@@ -316,6 +314,7 @@ void XTensor::ShallowCopy(const XTensor &tensor) ...@@ -316,6 +314,7 @@ void XTensor::ShallowCopy(const XTensor &tensor)
{ {
strcpy(name, tensor.name); strcpy(name, tensor.name);
order = tensor.order; order = tensor.order;
enableGrad = tensor.enableGrad;
memcpy(dimSize, tensor.dimSize, sizeof(int) * MAX_TENSOR_DIM_NUM); memcpy(dimSize, tensor.dimSize, sizeof(int) * MAX_TENSOR_DIM_NUM);
memcpy(dimSizeRDI, tensor.dimSizeRDI, sizeof(int) * MAX_TENSOR_DIM_NUM); memcpy(dimSizeRDI, tensor.dimSizeRDI, sizeof(int) * MAX_TENSOR_DIM_NUM);
dataType = tensor.dataType; dataType = tensor.dataType;
...@@ -403,7 +402,6 @@ XTensor& XTensor::operator= (const XTensor& tensor) ...@@ -403,7 +402,6 @@ XTensor& XTensor::operator= (const XTensor& tensor)
/* create tensor links for the new tensor */ /* create tensor links for the new tensor */
XLink::Replace(&tensor, this); XLink::Replace(&tensor, this);
} }
enableGrad = tensor.enableGrad;
return *this; return *this;
} }
...@@ -450,7 +448,6 @@ XTensor& XTensor::operator= (const XTensor&& tensor) ...@@ -450,7 +448,6 @@ XTensor& XTensor::operator= (const XTensor&& tensor)
*tensor.dataP = NULL; *tensor.dataP = NULL;
XLink::Replace(&tensor, this); XLink::Replace(&tensor, this);
enableGrad = tensor.enableGrad;
return *this; return *this;
} }
...@@ -1322,7 +1319,7 @@ set the value of a cell ...@@ -1322,7 +1319,7 @@ set the value of a cell
*/ */
bool XTensor::Set(DTYPE value, int index[], int size) bool XTensor::Set(DTYPE value, int index[], int size)
{ {
CheckNTErrors(dataType == DEFAULT_DTYPE, "The tensor is not in default type."); CheckNTErrors(dataType == DEFAULT_DTYPE, "The tensor is not in default type.");
return SetToDevice(devID, GetCell(index, size), value); return SetToDevice(devID, GetCell(index, size), value);
} }
...@@ -2447,7 +2444,7 @@ void InitTensor(XTensor * tensor, const XTensor * reference) ...@@ -2447,7 +2444,7 @@ void InitTensor(XTensor * tensor, const XTensor * reference)
if(reference->order < 0) if(reference->order < 0)
return; return;
tensor->enableGrad = reference->enableGrad; tensor->enableGrad = reference->enableGrad;
InitTensor(tensor, reference->order, reference->dimSize, InitTensor(tensor, reference->order, reference->dimSize,
reference->dataType, reference->denseRatio, reference->dataType, reference->denseRatio,
reference->devID, reference->mem); reference->devID, reference->mem);
...@@ -2463,7 +2460,7 @@ void InitTensorV2(XTensor * tensor, const XTensor * reference) ...@@ -2463,7 +2460,7 @@ void InitTensorV2(XTensor * tensor, const XTensor * reference)
if(reference->order < 0) if(reference->order < 0)
return; return;
tensor->enableGrad = reference->enableGrad; tensor->enableGrad = reference->enableGrad;
InitTensorV2(tensor, reference->order, reference->dimSize, InitTensorV2(tensor, reference->order, reference->dimSize,
reference->dataType, reference->devID); reference->dataType, reference->devID);
} }
...@@ -2478,7 +2475,7 @@ void InitTensorOnCPU(XTensor * tensor, const XTensor * reference) ...@@ -2478,7 +2475,7 @@ void InitTensorOnCPU(XTensor * tensor, const XTensor * reference)
if(reference->order < 0) if(reference->order < 0)
return; return;
tensor->enableGrad = reference->enableGrad; tensor->enableGrad = reference->enableGrad;
InitTensor(tensor, reference->order, reference->dimSize, InitTensor(tensor, reference->order, reference->dimSize,
reference->dataType, reference->denseRatio, reference->dataType, reference->denseRatio,
-1); -1);
......
...@@ -151,8 +151,8 @@ public: ...@@ -151,8 +151,8 @@ public:
/* indicates whether the tensor keeps the gradient when used as model parameters */ /* indicates whether the tensor keeps the gradient when used as model parameters */
bool isGrad; bool isGrad;
/* indicates whether the gradient of the tensor should be computed */ /* indicates whether the gradient of the tensor should be computed */
bool enableGrad; bool enableGrad;
/* indicates whether the tensor is used as paramters (or variables) */ /* indicates whether the tensor is used as paramters (or variables) */
bool isVar; bool isVar;
...@@ -453,7 +453,7 @@ extern int MakeTensorID(); ...@@ -453,7 +453,7 @@ extern int MakeTensorID();
void InitTensor(XTensor * tensor, void InitTensor(XTensor * tensor,
const int myOrder, const int * myDimSize, const TENSOR_DATA_TYPE myDataType = X_FLOAT, const int myOrder, const int * myDimSize, const TENSOR_DATA_TYPE myDataType = X_FLOAT,
const float myDenseRatio = 1.0F, const int myDevID = -1, XMem * myMem = NULL); const float myDenseRatio = 1.0F, const int myDevID = -1, XMem * myMem = NULL);
/* initialize a dense XTensor V2 */ /* initialize a dense XTensor V2 */
void InitTensorV2(XTensor * tensor, void InitTensorV2(XTensor * tensor,
const int myOrder, const int * myDimSize, const TENSOR_DATA_TYPE myDataType = X_FLOAT, const int myOrder, const int * myDimSize, const TENSOR_DATA_TYPE myDataType = X_FLOAT,
......
...@@ -142,6 +142,23 @@ void _DivMe(XTensor * a, const XTensor * b, DTYPE alpha, int leadingDim) ...@@ -142,6 +142,23 @@ void _DivMe(XTensor * a, const XTensor * b, DTYPE alpha, int leadingDim)
_Div(a, b, a, alpha, leadingDim); _Div(a, b, a, alpha, leadingDim);
} }
/*
element-wise division of two tensors (do it on site)
keep the result in the input tensor a and return nothing
a(i) = a(i)*b(i) + \alpha * a(i)
where i is the index of the item
>> a - tensor a (where keep the result)
>> b - tensor b
>> alpha - the coefficient
>> leadingDim - the dimension along which we perform broadcasting
*/
void DivMe(XTensor& a, const XTensor& b, DTYPE alpha, int leadingDim)
{
_Div(&a, &b, &a, alpha, leadingDim);
}
/* /*
return a dimension if the division is performed as DivDim (in more details in DivDim.h) return a dimension if the division is performed as DivDim (in more details in DivDim.h)
>> a - a tensor >> a - a tensor
......
...@@ -122,7 +122,7 @@ where i is the item index ...@@ -122,7 +122,7 @@ where i is the item index
*/ */
void _CudaDiv(const XTensor * a, const XTensor * b, XTensor * c, DTYPE alpha, int leadingDim) void _CudaDiv(const XTensor * a, const XTensor * b, XTensor * c, DTYPE alpha, int leadingDim)
{ {
int leadingDimRDI = a->order - leadingDim - 1; int leadingDimRDI = a->order - leadingDim - 1;
CheckNTErrors((a->unitNum <= c->unitNum && b->unitNum <= c->unitNum), CheckNTErrors((a->unitNum <= c->unitNum && b->unitNum <= c->unitNum),
"Unmatched tensors in multiplication!"); "Unmatched tensors in multiplication!");
CheckNTErrors((a->order == b->order && a->order == c->order), "Unmatched tensors!"); CheckNTErrors((a->order == b->order && a->order == c->order), "Unmatched tensors!");
......
...@@ -40,6 +40,7 @@ a(i) = a(i)/b(i) + \alpha * a(i) ...@@ -40,6 +40,7 @@ a(i) = a(i)/b(i) + \alpha * a(i)
where i is the index of the element where i is the index of the element
*/ */
void _DivMe(XTensor * a, const XTensor * b, DTYPE alpha = 0.0, int leadingDim = 0); void _DivMe(XTensor * a, const XTensor * b, DTYPE alpha = 0.0, int leadingDim = 0);
void DivMe(XTensor & a, const XTensor & b, DTYPE alpha = 0.0, int leadingDim = 0);
/* /*
element-wise division of two tensors (return an XTensor structure) element-wise division of two tensors (return an XTensor structure)
......
...@@ -130,6 +130,17 @@ void _MaskMe(XTensor * a, const XTensor * mask, DTYPE alpha) ...@@ -130,6 +130,17 @@ void _MaskMe(XTensor * a, const XTensor * mask, DTYPE alpha)
} }
/* /*
mask entries of a given tensor (on site):
a(i) = a(i) if mask(i) is non-zero
a(i) = alpha if mask(i) = 0
where i is the index of the element
*/
void MaskMe(XTensor& a, const XTensor& mask, DTYPE alpha)
{
_Mask(&a, &mask, &a, alpha);
}
/*
mask entries of a given tensor (return an XTensor structure): mask entries of a given tensor (return an XTensor structure):
a(i) = a(i) if mask(i) is non-zero a(i) = a(i) if mask(i) is non-zero
a(i) = alpha if mask(i) = 0 a(i) = alpha if mask(i) = 0
......
...@@ -43,6 +43,7 @@ a(i) = alpha if mask(i) = 0 ...@@ -43,6 +43,7 @@ a(i) = alpha if mask(i) = 0
where i is the index of the element where i is the index of the element
*/ */
void _MaskMe(XTensor * a, const XTensor * mask, DTYPE alpha); void _MaskMe(XTensor * a, const XTensor * mask, DTYPE alpha);
void MaskMe(XTensor & a, const XTensor & mask, DTYPE alpha);
/* /*
mask entries of a given tensor (return an XTensor structure): mask entries of a given tensor (return an XTensor structure):
......
...@@ -54,15 +54,15 @@ void _MatrixMul2D(const XTensor * a, MATRIX_TRANS_TYPE transposedA, ...@@ -54,15 +54,15 @@ void _MatrixMul2D(const XTensor * a, MATRIX_TRANS_TYPE transposedA,
CheckNTErrors((a->order == 2 && b->order == 2 && c->order == 2), CheckNTErrors((a->order == 2 && b->order == 2 && c->order == 2),
"Input tensors must have a order = 2!"); "Input tensors must have a order = 2!");
int an = a->dimSize[0], am = a->dimSize[1]; int an = a->dimSize[0], am = a->dimSize[1];
int bn = b->dimSize[0], bm = b->dimSize[1]; int bn = b->dimSize[0], bm = b->dimSize[1];
int cn = c->dimSize[0], cm = c->dimSize[1]; int cn = c->dimSize[0], cm = c->dimSize[1];
int am2 = transposedA == X_TRANS ? an : am; int am2 = transposedA == X_TRANS ? an : am;
int an2 = transposedA == X_TRANS ? am : an; int an2 = transposedA == X_TRANS ? am : an;
int bm2 = transposedB == X_TRANS ? bn : bm; int bm2 = transposedB == X_TRANS ? bn : bm;
int bn2 = transposedB == X_TRANS ? bm : bn; int bn2 = transposedB == X_TRANS ? bm : bn;
int cm2 = cm; int cm2 = cm;
int cn2 = cn; int cn2 = cn;
CheckNTErrors((am2 == bn2 && an2 == cn2 && bm2 == cm2), CheckNTErrors((am2 == bn2 && an2 == cn2 && bm2 == cm2),
"Unmatched tensors in multiplication!"); "Unmatched tensors in multiplication!");
......
...@@ -40,21 +40,21 @@ argument7: matrix c (c=a*b*\alpha + c*beta) ...@@ -40,21 +40,21 @@ argument7: matrix c (c=a*b*\alpha + c*beta)
*/ */
void _MatrixMul2DMultiTheading(TensorList * args) void _MatrixMul2DMultiTheading(TensorList * args)
{ {
CheckNTErrors(args->count == 2, "invalid argument number!"); CheckNTErrors(args->count == 2, "invalid argument number!");
IntList * indexArgs = (IntList*)args->GetItem(0); IntList * indexArgs = (IntList*)args->GetItem(0);
TensorList * matrixArgs = (TensorList*)args->GetItem(1); TensorList * matrixArgs = (TensorList*)args->GetItem(1);
CheckNTErrors(indexArgs->count == 4, "invalid argument number!"); CheckNTErrors(indexArgs->count == 4, "invalid argument number!");
CheckNTErrors(matrixArgs->count == 5, "invalid argument number!"); CheckNTErrors(matrixArgs->count == 5, "invalid argument number!");
XTensor * a = matrixArgs->GetItem(0); XTensor * a = matrixArgs->GetItem(0);
XTensor * b = matrixArgs->GetItem(1); XTensor * b = matrixArgs->GetItem(1);
XTensor * c = matrixArgs->GetItem(2); XTensor * c = matrixArgs->GetItem(2);
DTYPE alpha = *(DTYPE*)(matrixArgs->GetItem(3)); DTYPE alpha = *(DTYPE*)(matrixArgs->GetItem(3));
DTYPE beta = *(DTYPE*)(matrixArgs->GetItem(4)); DTYPE beta = *(DTYPE*)(matrixArgs->GetItem(4));
int x1 = indexArgs->GetItem(0); int x1 = indexArgs->GetItem(0);
int y1 = indexArgs->GetItem(1); int y1 = indexArgs->GetItem(1);
int x2 = indexArgs->GetItem(2); int x2 = indexArgs->GetItem(2);
int y2 = indexArgs->GetItem(3); int y2 = indexArgs->GetItem(3);
#ifdef FAST_MATRIX #ifdef FAST_MATRIX
int am = a->dimSize[1]; int am = a->dimSize[1];
......
...@@ -143,6 +143,23 @@ void _MultiplyMe(XTensor * a, const XTensor * b, DTYPE alpha, int leadingDim) ...@@ -143,6 +143,23 @@ void _MultiplyMe(XTensor * a, const XTensor * b, DTYPE alpha, int leadingDim)
_Multiply(a, b, a, alpha, leadingDim); _Multiply(a, b, a, alpha, leadingDim);
} }
/*
element-wise product of two tensors (do it on site)
keep the result in the input tensor a and return nothing
a(i) = a(i)*b(i) + \alpha * a(i)
where i is the index of the item
>> a - tensor a (where keep the result)
>> b - tensor b
>> alpha - the coefficient
>> leadingDim - the dimension along which we perform broadcasting
*/
void MultiplyMe(XTensor& a, const XTensor& b, DTYPE alpha, int leadingDim)
{
_Multiply(&a, &b, &a, alpha, leadingDim);
}
/* /*
return a dimension if the multiplication is performed as MultiplyDim (in more details in MultiplyDim.h) return a dimension if the multiplication is performed as MultiplyDim (in more details in MultiplyDim.h)
>> a - a tensor >> a - a tensor
......
...@@ -122,7 +122,7 @@ where i is the item index ...@@ -122,7 +122,7 @@ where i is the item index
*/ */
void _CudaMultiply(const XTensor * a, const XTensor * b, XTensor * c, DTYPE alpha, int leadingDim) void _CudaMultiply(const XTensor * a, const XTensor * b, XTensor * c, DTYPE alpha, int leadingDim)
{ {
int leadingDimRDI = a->order - leadingDim - 1; int leadingDimRDI = a->order - leadingDim - 1;
CheckNTErrors((a->unitNum <= c->unitNum && b->unitNum <= c->unitNum), CheckNTErrors((a->unitNum <= c->unitNum && b->unitNum <= c->unitNum),
"Unmatched tensors in multiplication!"); "Unmatched tensors in multiplication!");
CheckNTErrors((a->order == b->order && a->order == c->order), "Unmatched tensors!"); CheckNTErrors((a->order == b->order && a->order == c->order), "Unmatched tensors!");
......
...@@ -40,6 +40,7 @@ a(i) = a(i)*b(i) + \alpha * a(i) ...@@ -40,6 +40,7 @@ a(i) = a(i)*b(i) + \alpha * a(i)
where i is the index of the element where i is the index of the element
*/ */
void _MultiplyMe(XTensor * a, const XTensor * b, DTYPE alpha = 0.0, int leadingDim = 0); void _MultiplyMe(XTensor * a, const XTensor * b, DTYPE alpha = 0.0, int leadingDim = 0);
void MultiplyMe(XTensor & a, const XTensor & b, DTYPE alpha = 0.0, int leadingDim = 0);
/* /*
element-wise product of two tensors (return an XTensor structure) element-wise product of two tensors (return an XTensor structure)
......
...@@ -139,6 +139,24 @@ void _MultiplyDimMe(XTensor * a, const XTensor * b, int n, DTYPE alpha) ...@@ -139,6 +139,24 @@ void _MultiplyDimMe(XTensor * a, const XTensor * b, int n, DTYPE alpha)
} }
/* /*
tensor multiplication(do it on site)
make a new tensor to keep the result and return it
c = a * b + \alpha * c
where the size of b is equal to the n-th dimension of a,
i.e., a is multiplied with b by broadcasting
>> a - a tensor
>> b - another tensor whose size is equal to that of dimension n of a
>> n - the dimension index
>> alpha - the scaling factor
*/
void MultiplyDimMe(XTensor& a, const XTensor& b, int n, DTYPE alpha)
{
_MultiplyDim(&a, &b, &a, n, alpha);
}
/*
tensor multiplication (return an XTensor structure and make tensor connections) tensor multiplication (return an XTensor structure and make tensor connections)
make a new tensor to keep the result and return it make a new tensor to keep the result and return it
......
...@@ -33,6 +33,7 @@ void _MultiplyDim(const XTensor * a, const XTensor * b, XTensor * c, int n, DTYP ...@@ -33,6 +33,7 @@ void _MultiplyDim(const XTensor * a, const XTensor * b, XTensor * c, int n, DTYP
/* tensor multiplication a = a * b + \alpha * c where the size of b is equal to the n-th dimension of a, /* tensor multiplication a = a * b + \alpha * c where the size of b is equal to the n-th dimension of a,
i.e., a is multiplied with b by broadcasting. we keep the result in the input tensor a and return nothing */ i.e., a is multiplied with b by broadcasting. we keep the result in the input tensor a and return nothing */
void _MultiplyDimMe(XTensor * a, const XTensor * b, int n, DTYPE alpha = 0.0); void _MultiplyDimMe(XTensor * a, const XTensor * b, int n, DTYPE alpha = 0.0);
void MultiplyDimMe(XTensor & a, const XTensor & b, int n, DTYPE alpha = 0.0);
/* tensor multiplication c = a * b where the size of b is equal to the n-th dimension of a, /* tensor multiplication c = a * b where the size of b is equal to the n-th dimension of a,
i.e., a is multiplied with b by broadcasting. We make a new tensor c to keep the result and return it */ i.e., a is multiplied with b by broadcasting. We make a new tensor c to keep the result and return it */
......
...@@ -60,6 +60,16 @@ void _NegateMe(XTensor * a) ...@@ -60,6 +60,16 @@ void _NegateMe(XTensor * a)
} }
/* /*
set every entry to its minus value (do it on site)
keep the result in the input tensor a and return nothing
>> a - the tensor we are processing
*/
void NegateMe(XTensor& a)
{
_Negate(&a, &a);
}
/*
set every entry to its minus value (return an XTensor structure) set every entry to its minus value (return an XTensor structure)
make a new tensor to keep the result and return it make a new tensor to keep the result and return it
>> a - input tensor we are processing >> a - input tensor we are processing
......
...@@ -34,6 +34,7 @@ set every entry to its minus value (do it on site) ...@@ -34,6 +34,7 @@ set every entry to its minus value (do it on site)
keep the result in the input tensor a and return nothing keep the result in the input tensor a and return nothing
*/ */
void _NegateMe(XTensor * a); void _NegateMe(XTensor * a);
void NegateMe(XTensor & a);
/* /*
set every entry to its minus value (return an XTensor structure) set every entry to its minus value (return an XTensor structure)
......
...@@ -66,6 +66,16 @@ void _SignMe(XTensor * a) ...@@ -66,6 +66,16 @@ void _SignMe(XTensor * a)
} }
/* /*
set every entry to its sign value (do it on site)
keep the result in the input tensor a and return nothing
>> a - the tensor we are processing
*/
void SignMe(XTensor& a)
{
_Sign(&a, &a);
}
/*
set every entry to its sign value (return an XTensor structure) set every entry to its sign value (return an XTensor structure)
make a new tensor to keep the result and return it make a new tensor to keep the result and return it
>> a - input tensor we are processing >> a - input tensor we are processing
......
...@@ -36,6 +36,12 @@ keep the result in the input tensor a and return nothing ...@@ -36,6 +36,12 @@ keep the result in the input tensor a and return nothing
void _SignMe(XTensor * a); void _SignMe(XTensor * a);
/* /*
set every entry to its sign value (do it on site)
keep the result in the input tensor a and return nothing
*/
void SignMe(XTensor & a);
/*
set every entry to its sign value (return an XTensor structure) set every entry to its sign value (return an XTensor structure)
make a new tensor to keep the result and return it make a new tensor to keep the result and return it
*/ */
......
...@@ -126,6 +126,19 @@ void _SubMe(XTensor * a, const XTensor * b, DTYPE beta) ...@@ -126,6 +126,19 @@ void _SubMe(XTensor * a, const XTensor * b, DTYPE beta)
{ {
_Sub(a, b, a, beta); _Sub(a, b, a, beta);
} }
/*
tensor subtraction a = a - b * \beta (do it on site)
keep the result in the tensor a and return nothing
>> a - a tensor
>> b - another tensor
>> beta - the scaling factor
*/
void SubMe(XTensor& a, const XTensor& b, DTYPE beta)
{
_Sub(&a, &b, &a, beta);
}
/* /*
return a dimension if the subtraction is performed as SubDim (in more details in SubDim.h) return a dimension if the subtraction is performed as SubDim (in more details in SubDim.h)
......
...@@ -35,6 +35,7 @@ tensor subtraction a = a - b * \beta ...@@ -35,6 +35,7 @@ tensor subtraction a = a - b * \beta
keep the result in the input tensor a and return nothing keep the result in the input tensor a and return nothing
*/ */
void _SubMe(XTensor * a, const XTensor * b, DTYPE beta = (DTYPE)1.0); void _SubMe(XTensor * a, const XTensor * b, DTYPE beta = (DTYPE)1.0);
void SubMe(XTensor & a, const XTensor & b, DTYPE beta = (DTYPE)1.0);
/* /*
tensor subtraction c = a - b * \beta tensor subtraction c = a - b * \beta
......
...@@ -46,79 +46,79 @@ void _SubDim(const XTensor * a, const XTensor * b, XTensor * c, int n, DTYPE bet ...@@ -46,79 +46,79 @@ void _SubDim(const XTensor * a, const XTensor * b, XTensor * c, int n, DTYPE bet
{ {
n = MODX(n, a->order); n = MODX(n, a->order);
CheckNTErrors(a && b && c, "Empty tensor input!"); CheckNTErrors(a && b && c, "Empty tensor input!");
CheckNTErrors(a->unitNum == c->unitNum, "Unmatched tensors in subtraction!"); CheckNTErrors(a->unitNum == c->unitNum, "Unmatched tensors in subtraction!");
CheckNTErrors(a->dataType == b->dataType && a->dataType == c->dataType, CheckNTErrors(a->dataType == b->dataType && a->dataType == c->dataType,
"Unmatched data types in subtraction!"); "Unmatched data types in subtraction!");
CheckNTErrors(a->order == c->order, "The input tensors do not have the same order in subtraction!"); CheckNTErrors(a->order == c->order, "The input tensors do not have the same order in subtraction!");
CheckNTErrors(!a->isSparse && !b->isSparse && !c->isSparse, "Dense tensors are required!"); CheckNTErrors(!a->isSparse && !b->isSparse && !c->isSparse, "Dense tensors are required!");
CheckNTErrors(a->dimSize[n] == b->unitNum, "Wrong tensor size!"); CheckNTErrors(a->dimSize[n] == b->unitNum, "Wrong tensor size!");
CheckDev(a->devID, b->devID); CheckDev(a->devID, b->devID);
if (beta == 0) { if (beta == 0) {
_CopyValues(a, c); _CopyValues(a, c);
return; return;
} }
if (XTensor::IsSameShaped(a, b)) { if (XTensor::IsSameShaped(a, b)) {
_Sub(a, b, c, beta); _Sub(a, b, c, beta);
return; return;
} }
if (a->devID >= 0 || b->devID >= 0 || c->devID >= 0) { if (a->devID >= 0 || b->devID >= 0 || c->devID >= 0) {
#ifdef USE_CUDA #ifdef USE_CUDA
_CudaSubDim(a, b, c, n, beta); _CudaSubDim(a, b, c, n, beta);
#else #else
ShowNTErrors("Please specify USE_CUDA and recompile the code!"); ShowNTErrors("Please specify USE_CUDA and recompile the code!");
#endif #endif
} }
else { else {
int stride = 1; int stride = 1;
int blockSize = a->dimSize[n]; int blockSize = a->dimSize[n];
int blockNum = 1; int blockNum = 1;
for (int i = a->order - 1; i >= 0; i--) { for (int i = a->order - 1; i >= 0; i--) {
if (i > n) if (i > n)
stride *= a->dimSize[i]; stride *= a->dimSize[i];
else if (i < n) else if (i < n)
blockNum *= a->dimSize[i]; blockNum *= a->dimSize[i];
} }
if (a->dataType == DEFAULT_DTYPE) { if (a->dataType == DEFAULT_DTYPE) {
int num = a->unitNum; int num = a->unitNum;
if (stride > 1) { if (stride > 1) {
for (int i = 0, j = 0; i < num; i += stride, j++) { for (int i = 0, j = 0; i < num; i += stride, j++) {
DTYPE * ap = (DTYPE*)a->data + i; DTYPE * ap = (DTYPE*)a->data + i;
DTYPE bv = *((DTYPE*)b->data + j % blockSize) * beta; DTYPE bv = *((DTYPE*)b->data + j % blockSize) * beta;
DTYPE * cp = (DTYPE*)c->data + i; DTYPE * cp = (DTYPE*)c->data + i;
for (int k = 0; k < stride; k++) for (int k = 0; k < stride; k++)
cp[k] = ap[k] - bv; cp[k] = ap[k] - bv;
} }
} }
else if (stride == 1) { else if (stride == 1) {
DTYPE * bp = (DTYPE*)b->data; DTYPE * bp = (DTYPE*)b->data;
for (int i = 0; i < num; i += blockSize) { for (int i = 0; i < num; i += blockSize) {
DTYPE * ap = (DTYPE*)a->data + i; DTYPE * ap = (DTYPE*)a->data + i;
DTYPE * cp = (DTYPE*)c->data + i; DTYPE * cp = (DTYPE*)c->data + i;
if (beta == 1.0F) { if (beta == 1.0F) {
for (int j = 0; j < blockSize; j++) for (int j = 0; j < blockSize; j++)
cp[j] = ap[j] - bp[j]; cp[j] = ap[j] - bp[j];
} }
else { else {
for (int j = 0; j < blockSize; j++) for (int j = 0; j < blockSize; j++)
cp[j] = ap[j] - bp[j] * beta; cp[j] = ap[j] - bp[j] * beta;
} }
} }
} }
else { else {
ShowNTErrors("Something is wrong!"); ShowNTErrors("Something is wrong!");
} }
} }
else { else {
ShowNTErrors("TODO!"); ShowNTErrors("TODO!");
} }
} }
} }
/* /*
...@@ -136,7 +136,7 @@ i.e., a is subtracted with b by broadcasting ...@@ -136,7 +136,7 @@ i.e., a is subtracted with b by broadcasting
*/ */
void _SubDim(XTensor * a, const XTensor * b, int n, DTYPE beta) void _SubDim(XTensor * a, const XTensor * b, int n, DTYPE beta)
{ {
_SubDim(a, b, a, n, beta); _SubDim(a, b, a, n, beta);
} }
/* /*
...@@ -155,20 +155,20 @@ i.e., a is subtracted with b by broadcasting ...@@ -155,20 +155,20 @@ i.e., a is subtracted with b by broadcasting
*/ */
XTensor SubDim(const XTensor &a, const XTensor &b, int n, DTYPE beta) XTensor SubDim(const XTensor &a, const XTensor &b, int n, DTYPE beta)
{ {
XTensor c(&a); XTensor c(&a);
c.SetTMPFlag(); c.SetTMPFlag();
n = MODX(n, a.order); n = MODX(n, a.order);
/* call _Sub function */ /* call _Sub function */
_SubDim(&a, &b, &c, n, beta); _SubDim(&a, &b, &c, n, beta);
/* tensor connections */ /* tensor connections */
XLink::MakeLink(&a, &b, &c, MATH_SUBDIM); XLink::MakeLink(&a, &b, &c, MATH_SUBDIM);
XLink::AddParamToHeadInt(&c, n); XLink::AddParamToHeadInt(&c, n);
XLink::AddParamToHead(&c, beta); XLink::AddParamToHead(&c, beta);
return c; return c;
} }
/* /*
......
...@@ -39,25 +39,25 @@ where a is a tensor and b is a row vector ...@@ -39,25 +39,25 @@ where a is a tensor and b is a row vector
*/ */
template <class T, bool betaFired> template <class T, bool betaFired>
__global__ __global__
void KernelSubWithRow(T * a, T * b, T * c, int rowNum, int colNum, T beta) void KernelSubWithRow(T * a, T * b, T * c, int rowNum, int colNum, T beta)
{ {
__shared__ T bv[MAX_CUDA_THREAD_NUM_PER_BLOCK]; __shared__ T bv[MAX_CUDA_THREAD_NUM_PER_BLOCK];
int col = blockDim.x * blockIdx.x + threadIdx.x; int col = blockDim.x * blockIdx.x + threadIdx.x;
int row = blockDim.y * blockIdx.y + threadIdx.y; int row = blockDim.y * blockIdx.y + threadIdx.y;
if (col >= colNum || row >= rowNum) if (col >= colNum || row >= rowNum)
return; return;
if (threadIdx.y == 0) if (threadIdx.y == 0)
bv[threadIdx.x] = b[col]; bv[threadIdx.x] = b[col];
__syncthreads(); __syncthreads();
int offset = colNum * row + col; int offset = colNum * row + col;
if (betaFired) if (betaFired)
c[offset] = a[offset] - bv[threadIdx.x] * beta; c[offset] = a[offset] - bv[threadIdx.x] * beta;
else else
c[offset] = a[offset] - bv[threadIdx.x]; c[offset] = a[offset] - bv[threadIdx.x];
} }
/* /*
...@@ -75,30 +75,30 @@ where a is a tensor and b is a colum vector ...@@ -75,30 +75,30 @@ where a is a tensor and b is a colum vector
*/ */
template <class T, bool betaFired> template <class T, bool betaFired>
__global__ __global__
void KernelSubWithCol(T * a, T * b, T * c, int rowNum, int colNum, int blockSize, int blockNum, T beta) void KernelSubWithCol(T * a, T * b, T * c, int rowNum, int colNum, int blockSize, int blockNum, T beta)
{ {
__shared__ T bv[MAX_CUDA_THREAD_NUM_PER_BLOCK]; __shared__ T bv[MAX_CUDA_THREAD_NUM_PER_BLOCK];
int colIndex = blockDim.x * blockIdx.x + threadIdx.x; int colIndex = blockDim.x * blockIdx.x + threadIdx.x;
int row = blockDim.y * blockIdx.y + threadIdx.y; int row = blockDim.y * blockIdx.y + threadIdx.y;
int col = colIndex % colNum; int col = colIndex % colNum;
int block = colIndex / colNum; int block = colIndex / colNum;
if (row >= rowNum || block >= blockNum) if (row >= rowNum || block >= blockNum)
return; return;
if (threadIdx.x == 0) if (threadIdx.x == 0)
bv[threadIdx.y] = b[row]; bv[threadIdx.y] = b[row];
__syncthreads(); __syncthreads();
int offset = block * blockSize + row * colNum + col; int offset = block * blockSize + row * colNum + col;
if (betaFired) if (betaFired)
c[offset] = a[offset] - bv[threadIdx.y] * beta; c[offset] = a[offset] - bv[threadIdx.y] * beta;
else else
c[offset] = a[offset] - bv[threadIdx.y]; c[offset] = a[offset] - bv[threadIdx.y];
} }
/* /*
...@@ -116,63 +116,63 @@ i.e., a is subtracted with b by broadcasting ...@@ -116,63 +116,63 @@ i.e., a is subtracted with b by broadcasting
*/ */
void _CudaSubDim(const XTensor * a, const XTensor * b, XTensor * c, int n, DTYPE beta) void _CudaSubDim(const XTensor * a, const XTensor * b, XTensor * c, int n, DTYPE beta)
{ {
CheckNTErrors(a && b && c, "Empty tensor input!"); CheckNTErrors(a && b && c, "Empty tensor input!");
CheckNTErrors(a->unitNum == c->unitNum, "Unmatched tensors in subtraction!"); CheckNTErrors(a->unitNum == c->unitNum, "Unmatched tensors in subtraction!");
CheckNTErrors(a->dataType == b->dataType && a->dataType == c->dataType, CheckNTErrors(a->dataType == b->dataType && a->dataType == c->dataType,
"Unmatched data types in subtraction!"); "Unmatched data types in subtraction!");
CheckNTErrors(a->order == c->order, "The input tensors do not have the same order in subtraction!"); CheckNTErrors(a->order == c->order, "The input tensors do not have the same order in subtraction!");
CheckNTErrors(!a->isSparse && !b->isSparse && !c->isSparse, "Dense tensors are required!"); CheckNTErrors(!a->isSparse && !b->isSparse && !c->isSparse, "Dense tensors are required!");
CheckNTErrors(a->dimSize[n] == b->unitNum, "Wrong tensor size!"); CheckNTErrors(a->dimSize[n] == b->unitNum, "Wrong tensor size!");
int stride = 1; int stride = 1;
int blockSize = a->dimSize[n]; int blockSize = a->dimSize[n];
int blockNum = 1; int blockNum = 1;
for (int i = a->order - 1; i >= 0; i--) { for (int i = a->order - 1; i >= 0; i--) {
if (i > n) if (i > n)
stride *= a->dimSize[i]; stride *= a->dimSize[i];
else if (i < n) else if (i < n)
blockNum *= a->dimSize[i]; blockNum *= a->dimSize[i];
} }
int cudaGrids[3]; int cudaGrids[3];
int cudaBlocks[3]; int cudaBlocks[3];
int devIDBackup = 0; int devIDBackup = 0;
ProtectCudaDev(a->devID, devIDBackup); ProtectCudaDev(a->devID, devIDBackup);
if (a->dataType == DEFAULT_DTYPE) { if (a->dataType == DEFAULT_DTYPE) {
if (stride > 1) { if (stride > 1) {
GDevs.GetCudaThread2D(a->devID, stride * blockNum, blockSize, MAX_INT, cudaGrids, cudaBlocks); GDevs.GetCudaThread2D(a->devID, stride * blockNum, blockSize, MAX_INT, cudaGrids, cudaBlocks);
if (beta == (DTYPE)1.0F) if (beta == (DTYPE)1.0F)
KernelSubWithCol<DTYPE, false> <<<dim3(cudaGrids[0], cudaGrids[1]), dim3(cudaBlocks[0], cudaBlocks[1])>>> KernelSubWithCol<DTYPE, false> <<<dim3(cudaGrids[0], cudaGrids[1]), dim3(cudaBlocks[0], cudaBlocks[1])>>>
((DTYPE*)a->data, (DTYPE*)b->data, (DTYPE*)c->data, ((DTYPE*)a->data, (DTYPE*)b->data, (DTYPE*)c->data,
blockSize, stride, blockSize * stride, blockNum, beta); blockSize, stride, blockSize * stride, blockNum, beta);
else else
KernelSubWithCol<DTYPE, true> <<<dim3(cudaGrids[0], cudaGrids[1]), dim3(cudaBlocks[0], cudaBlocks[1])>>> KernelSubWithCol<DTYPE, true> <<<dim3(cudaGrids[0], cudaGrids[1]), dim3(cudaBlocks[0], cudaBlocks[1])>>>
((DTYPE*)a->data, (DTYPE*)b->data, (DTYPE*)c->data, ((DTYPE*)a->data, (DTYPE*)b->data, (DTYPE*)c->data,
blockSize, stride, blockSize * stride, blockNum, beta); blockSize, stride, blockSize * stride, blockNum, beta);
} }
else if (stride == 1) { else if (stride == 1) {
GDevs.GetCudaThread2D(a->devID, blockSize, blockNum, MAX_INT, cudaGrids, cudaBlocks); GDevs.GetCudaThread2D(a->devID, blockSize, blockNum, MAX_INT, cudaGrids, cudaBlocks);
if (beta == (DTYPE)1.0F) if (beta == (DTYPE)1.0F)
KernelSubWithRow<DTYPE, false> <<<dim3(cudaGrids[0], cudaGrids[1]), dim3(cudaBlocks[0], cudaBlocks[1]) >> > KernelSubWithRow<DTYPE, false> <<<dim3(cudaGrids[0], cudaGrids[1]), dim3(cudaBlocks[0], cudaBlocks[1]) >> >
((DTYPE*)a->data, (DTYPE*)b->data, (DTYPE*)c->data, ((DTYPE*)a->data, (DTYPE*)b->data, (DTYPE*)c->data,
blockNum, blockSize, beta); blockNum, blockSize, beta);
else else
KernelSubWithRow<DTYPE, true> <<<dim3(cudaGrids[0], cudaGrids[1]), dim3(cudaBlocks[0], cudaBlocks[1]) >> > KernelSubWithRow<DTYPE, true> <<<dim3(cudaGrids[0], cudaGrids[1]), dim3(cudaBlocks[0], cudaBlocks[1]) >> >
((DTYPE*)a->data, (DTYPE*)b->data, (DTYPE*)c->data, ((DTYPE*)a->data, (DTYPE*)b->data, (DTYPE*)c->data,
blockNum, blockSize, beta); blockNum, blockSize, beta);
} }
else { else {
ShowNTErrors("Something is wrong!"); ShowNTErrors("Something is wrong!");
} }
} }
else { else {
ShowNTErrors("TODO!"); ShowNTErrors("TODO!");
} }
BacktoCudaDev(a->devID, devIDBackup); BacktoCudaDev(a->devID, devIDBackup);
} }
#endif #endif
......
...@@ -132,6 +132,19 @@ void _SumMe(XTensor * a, const XTensor * b, DTYPE beta) ...@@ -132,6 +132,19 @@ void _SumMe(XTensor * a, const XTensor * b, DTYPE beta)
_Sum(a, b, a, beta); _Sum(a, b, a, beta);
} }
/*
tensor summation a = a + b * \beta (do it on site)
keep the result in the tensor a and return nothing
>> a - a tensor
>> b - another tensor
>> beta - the scaling factor
*/
void SumMe(XTensor& a, const XTensor& b, DTYPE beta)
{
_Sum(&a, &b, &a, beta);
}
/* /*
return a dimension if the sum is performed as SumDim (in more details in SumDim.h) return a dimension if the sum is performed as SumDim (in more details in SumDim.h)
>> a - a tensor >> a - a tensor
......
...@@ -34,6 +34,7 @@ tensor summation a = a + b * \beta ...@@ -34,6 +34,7 @@ tensor summation a = a + b * \beta
keep the result in the input tensor a and return nothing keep the result in the input tensor a and return nothing
*/ */
void _SumMe(XTensor * a, const XTensor * b, DTYPE beta = (DTYPE)1.0); void _SumMe(XTensor * a, const XTensor * b, DTYPE beta = (DTYPE)1.0);
void SumMe(XTensor & a, const XTensor & b, DTYPE beta = (DTYPE)1.0);
/* /*
tensor summation c = a + b * \beta tensor summation c = a + b * \beta
......
...@@ -48,12 +48,12 @@ void _MatrixMULCPU(const XTensor * a, MATRIX_TRANS_TYPE transposedA, ...@@ -48,12 +48,12 @@ void _MatrixMULCPU(const XTensor * a, MATRIX_TRANS_TYPE transposedA,
CheckNTErrors((c->dataType == DEFAULT_DTYPE), "TODO!"); CheckNTErrors((c->dataType == DEFAULT_DTYPE), "TODO!");
#if defined(USE_BLAS) #if defined(USE_BLAS)
int an = a->dimSize[0]; int an = a->dimSize[0];
int am = a->dimSize[1]; int am = a->dimSize[1];
int bn = b->dimSize[0]; int bn = b->dimSize[0];
int bm = b->dimSize[1]; int bm = b->dimSize[1];
int cn = c->dimSize[0]; int cn = c->dimSize[0];
int cm = c->dimSize[1]; int cm = c->dimSize[1];
if (transposedA == X_NOTRANS && transposedB == X_NOTRANS) if (transposedA == X_NOTRANS && transposedB == X_NOTRANS)
GEMM(CblasRowMajor, CblasNoTrans, CblasNoTrans, cn, cm, am, alpha, (DTYPE*)a->data, am, (DTYPE*)b->data, bm, beta, (DTYPE*)c->data, cm); GEMM(CblasRowMajor, CblasNoTrans, CblasNoTrans, cn, cm, am, alpha, (DTYPE*)a->data, am, (DTYPE*)b->data, bm, beta, (DTYPE*)c->data, cm);
......
...@@ -165,7 +165,7 @@ SIMPLE_BINARY_FUNCTION(Shift, _Shift, MATH_SHIFT) ...@@ -165,7 +165,7 @@ SIMPLE_BINARY_FUNCTION(Shift, _Shift, MATH_SHIFT)
SIMPLE_BINARY_FUNCTION_VOID(Shift, _Shift, MATH_SHIFT) SIMPLE_BINARY_FUNCTION_VOID(Shift, _Shift, MATH_SHIFT)
_SIMPLE_BINARY_FUNCTION_INT(_Mod, _CudaMod, mod) _SIMPLE_BINARY_FUNCTION_INT(_Mod, _CudaMod, mod)
SIMPLE_BINARY_FUNCTION_ME_INT(_ModMe, _Mod) SIMPLE_BINARY_FUNCTION_ME_INT(ModMe, _Mod)
SIMPLE_BINARY_FUNCTION_INT(Mod, _Mod) SIMPLE_BINARY_FUNCTION_INT(Mod, _Mod)
#else #else
......
...@@ -37,9 +37,16 @@ void _Scale(const XTensor * a, XTensor * b, float scale); ...@@ -37,9 +37,16 @@ void _Scale(const XTensor * a, XTensor * b, float scale);
scale up tensor entires (on site) scale up tensor entires (on site)
b = a * scale b = a * scale
*/ */
void _ScaleMe(XTensor & a, int scale); void _ScaleMe(XTensor * a, int scale);
void _ScaleMe(XTensor & a, float scale); void _ScaleMe(XTensor * a, float scale);
/*
scale up tensor entires (on site)
b = a * scale
*/
void ScaleMe(XTensor & a, int scale);
void ScaleMe(XTensor & a, float scale);
/* /*
scale up tensor entires scale up tensor entires
b = a * scale b = a * scale
...@@ -64,8 +71,15 @@ void _Descale(const XTensor * a, XTensor * b, float scale); ...@@ -64,8 +71,15 @@ void _Descale(const XTensor * a, XTensor * b, float scale);
descale tensor entires (on site) descale tensor entires (on site)
b = a / scale b = a / scale
*/ */
void _DescaleMe(XTensor & a, int scale); void _DescaleMe(XTensor * a, int scale);
void _DescaleMe(XTensor & a, float scale); void _DescaleMe(XTensor * a, float scale);
/*
descale tensor entires (on site)
b = a / scale
*/
void DescaleMe(XTensor & a, int scale);
void DescaleMe(XTensor & a, float scale);
/* /*
descale tensor entires descale tensor entires
...@@ -91,8 +105,15 @@ void _Shift(const XTensor * a, XTensor * b, float shift); ...@@ -91,8 +105,15 @@ void _Shift(const XTensor * a, XTensor * b, float shift);
shift tensor entires (on site) shift tensor entires (on site)
b = a + shift b = a + shift
*/ */
void _ShiftMe(XTensor & a, int shift); void _ShiftMe(XTensor * a, int shift);
void _ShiftMe(XTensor & a, float shift); void _ShiftMe(XTensor * a, float shift);
/*
shift tensor entires (on site)
b = a + shift
*/
void ShiftMe(XTensor & a, int shift);
void ShiftMe(XTensor & a, float shift);
/* /*
shift tensor entires shift tensor entires
...@@ -118,7 +139,13 @@ void _Mod(const XTensor * a, XTensor * b, int base); ...@@ -118,7 +139,13 @@ void _Mod(const XTensor * a, XTensor * b, int base);
mod tensor entires (on site) mod tensor entires (on site)
b = a % mod b = a % mod
*/ */
void _ModMe(XTensor & a, int base); void _ModMe(XTensor * a, int base);
/*
mod tensor entires (on site)
b = a % mod
*/
void ModMe(XTensor & a, int base);
/* /*
mod tensor entires mod tensor entires
......
...@@ -36,26 +36,26 @@ set every entry to its clip value ...@@ -36,26 +36,26 @@ set every entry to its clip value
void _Clip(const XTensor * a, XTensor * b, DTYPE lower, DTYPE upper) void _Clip(const XTensor * a, XTensor * b, DTYPE lower, DTYPE upper)
{ {
#ifdef USE_CUDA #ifdef USE_CUDA
/* run it on GPUs */ /* run it on GPUs */
if (a->devID >= 0) { if (a->devID >= 0) {
_CudaClip(a, b, lower, upper); _CudaClip(a, b, lower, upper);
return; return;
} }
#endif #endif
CheckNTErrors((XTensor::IsSameShaped(a, b)), "Input tensors should have the same type!"); CheckNTErrors((XTensor::IsSameShaped(a, b)), "Input tensors should have the same type!");
CheckNTErrors((a->dataType == DEFAULT_DTYPE), "TODO!"); CheckNTErrors((a->dataType == DEFAULT_DTYPE), "TODO!");
DTYPE * d = (DTYPE*)a->data; DTYPE * d = (DTYPE*)a->data;
DTYPE * db = (DTYPE*)b->data; DTYPE * db = (DTYPE*)b->data;
for (int i = 0; i < a->unitNum; i++) { for (int i = 0; i < a->unitNum; i++) {
if (d[i] > upper) if (d[i] > upper)
db[i] = upper; db[i] = upper;
else if (d[i] < lower) else if (d[i] < lower)
db[i] = lower; db[i] = lower;
else else
db[i] = d[i]; db[i] = d[i];
} }
} }
/* /*
...@@ -67,7 +67,19 @@ keep the result in the input tensor a and return nothing ...@@ -67,7 +67,19 @@ keep the result in the input tensor a and return nothing
*/ */
void _ClipMe(XTensor * a, DTYPE lower, DTYPE upper) void _ClipMe(XTensor * a, DTYPE lower, DTYPE upper)
{ {
_Clip(a, a, lower, upper); _Clip(a, a, lower, upper);
}
/*
set every entry to its clip value (do it on site)
keep the result in the input tensor a and return nothing
>> a - the tensor we are processing
>> lower - the lower border
>> upper - the upper border
*/
void ClipMe(XTensor& a, DTYPE lower, DTYPE upper)
{
_Clip(&a, &a, lower, upper);
} }
/* /*
...@@ -80,18 +92,18 @@ make a new tensor to keep the result and return it ...@@ -80,18 +92,18 @@ make a new tensor to keep the result and return it
*/ */
XTensor Clip(const XTensor & a, DTYPE lower, DTYPE upper) XTensor Clip(const XTensor & a, DTYPE lower, DTYPE upper)
{ {
XTensor b(&a); XTensor b(&a);
b.SetTMPFlag(); b.SetTMPFlag();
/* call _Clip function */ /* call _Clip function */
_Clip(&a, &b, lower, upper); _Clip(&a, &b, lower, upper);
/* tensor connections */ /* tensor connections */
XLink::MakeLink(&a, NULL, &b, MATH_CLIP); XLink::MakeLink(&a, NULL, &b, MATH_CLIP);
XLink::AddParamToHead(&b, lower); XLink::AddParamToHead(&b, lower);
XLink::AddParamToHead(&b, upper); XLink::AddParamToHead(&b, upper);
return b; return b;
} }
void Clip(const XTensor & a, XTensor & b, DTYPE lower, DTYPE upper) void Clip(const XTensor & a, XTensor & b, DTYPE lower, DTYPE upper)
......
...@@ -36,18 +36,18 @@ set each entry to its clip value (CUDA Kernel) ...@@ -36,18 +36,18 @@ set each entry to its clip value (CUDA Kernel)
>> size - size of the data array >> size - size of the data array
*/ */
__global__ __global__
void KernelClip(DTYPE * a, DTYPE * b, DTYPE lower, DTYPE upper, int size) void KernelClip(DTYPE * a, DTYPE * b, DTYPE lower, DTYPE upper, int size)
{ {
int i = blockDim.x * blockIdx.x + threadIdx.x; int i = blockDim.x * blockIdx.x + threadIdx.x;
if (i < size) { if (i < size) {
if (a[i] > upper) if (a[i] > upper)
b[i] = upper; b[i] = upper;
else if (a[i] < lower) else if (a[i] < lower)
b[i] = lower; b[i] = lower;
else else
b[i] = a[i]; b[i] = a[i];
} }
} }
/* /*
...@@ -62,7 +62,7 @@ This is for float16 computation ...@@ -62,7 +62,7 @@ This is for float16 computation
__global__ __global__
void KernelClip(__half * a, __half * b, DTYPE lower, DTYPE upper, int size) void KernelClip(__half * a, __half * b, DTYPE lower, DTYPE upper, int size)
{ {
return; return;
} }
/* /*
...@@ -74,31 +74,31 @@ set each entry to its clip value ...@@ -74,31 +74,31 @@ set each entry to its clip value
*/ */
void _CudaClip(const XTensor * a, XTensor * b, DTYPE lower, DTYPE upper) void _CudaClip(const XTensor * a, XTensor * b, DTYPE lower, DTYPE upper)
{ {
CheckNTErrors((XTensor::IsSameShaped(a, b)), "Input tensors should have the same type!"); CheckNTErrors((XTensor::IsSameShaped(a, b)), "Input tensors should have the same type!");
CheckNTErrors((a->isSparse == false), "TODO!"); CheckNTErrors((a->isSparse == false), "TODO!");
int gridSize[3]; int gridSize[3];
int blockSize[3]; int blockSize[3];
GDevs.GetCudaThread(a->devID, a->unitNum, gridSize, blockSize); GDevs.GetCudaThread(a->devID, a->unitNum, gridSize, blockSize);
dim3 blocks(gridSize[0]); dim3 blocks(gridSize[0]);
dim3 threads(blockSize[0]); dim3 threads(blockSize[0]);
int devIDBackup; int devIDBackup;
ProtectCudaDev(a->devID, devIDBackup); ProtectCudaDev(a->devID, devIDBackup);
if (a->dataType == DEFAULT_DTYPE) { if (a->dataType == DEFAULT_DTYPE) {
KernelClip << <blocks, threads >> >((DTYPE*)a->data, (DTYPE*)b->data, lower, upper, a->unitNum); KernelClip << <blocks, threads >> >((DTYPE*)a->data, (DTYPE*)b->data, lower, upper, a->unitNum);
} }
else if (a->dataType == X_FLOAT16) { else if (a->dataType == X_FLOAT16) {
KernelClip << <blocks, threads >> >((__half*)a->data, (__half*)b->data, lower, upper, a->unitNum); KernelClip << <blocks, threads >> >((__half*)a->data, (__half*)b->data, lower, upper, a->unitNum);
} }
else { else {
ShowNTErrors("TODO!"); ShowNTErrors("TODO!");
} }
BacktoCudaDev(a->devID, devIDBackup); BacktoCudaDev(a->devID, devIDBackup);
} }
/* /*
......
...@@ -33,6 +33,10 @@ void _Clip(const XTensor * a, XTensor * b, DTYPE lower, DTYPE upper); ...@@ -33,6 +33,10 @@ void _Clip(const XTensor * a, XTensor * b, DTYPE lower, DTYPE upper);
keep the result in the input tensor a and return nothing */ keep the result in the input tensor a and return nothing */
void _ClipMe(XTensor * a, DTYPE lower, DTYPE upper); void _ClipMe(XTensor * a, DTYPE lower, DTYPE upper);
/* set every entry to its clip value (do it on site)
keep the result in the input tensor a and return nothing */
void ClipMe(XTensor & a, DTYPE lower, DTYPE upper);
/* set every entry to its clip value (return an XTensor structure) /* set every entry to its clip value (return an XTensor structure)
make a new tensor to keep the result and return it */ make a new tensor to keep the result and return it */
XTensor Clip(const XTensor & a, DTYPE lower, DTYPE upper); XTensor Clip(const XTensor & a, DTYPE lower, DTYPE upper);
......
...@@ -32,6 +32,9 @@ void _Equal(const XTensor * a, XTensor * b, DTYPE value); ...@@ -32,6 +32,9 @@ void _Equal(const XTensor * a, XTensor * b, DTYPE value);
/* check whether every entry is equal to the given value (do it on site) */ /* check whether every entry is equal to the given value (do it on site) */
void _EqualMe(XTensor * a, DTYPE value); void _EqualMe(XTensor * a, DTYPE value);
/* check whether every entry is equal to the given value (do it on site) */
void EqualMe(XTensor & a, DTYPE value);
/* check whether every entry is equal to the given value (return an XTensor structure) */ /* check whether every entry is equal to the given value (return an XTensor structure) */
XTensor Equal(const XTensor & a, DTYPE value); XTensor Equal(const XTensor & a, DTYPE value);
...@@ -41,6 +44,9 @@ void _NotEqual(const XTensor * a, XTensor * b, DTYPE value); ...@@ -41,6 +44,9 @@ void _NotEqual(const XTensor * a, XTensor * b, DTYPE value);
/* check whether every entry is not equal to the given value (do it on site) */ /* check whether every entry is not equal to the given value (do it on site) */
void _NotEqualMe(XTensor * a, DTYPE value); void _NotEqualMe(XTensor * a, DTYPE value);
/* check whether every entry is not equal to the given value (do it on site) */
void NotEqualMe(XTensor & a, DTYPE value);
/* check whether every entry is not equal to the given value (return an XTensor structure) */ /* check whether every entry is not equal to the given value (return an XTensor structure) */
XTensor NotEqual(const XTensor & a, DTYPE value); XTensor NotEqual(const XTensor & a, DTYPE value);
......
...@@ -44,7 +44,7 @@ where a and b are the scalar and bias respectively, and \epsilon is the adjustme ...@@ -44,7 +44,7 @@ where a and b are the scalar and bias respectively, and \epsilon is the adjustme
*/ */
void _Normalize(const XTensor * input, XTensor * output, int dim, const XTensor * mean, const XTensor * var, const XTensor * a, const XTensor * b, DTYPE epsilon) void _Normalize(const XTensor * input, XTensor * output, int dim, const XTensor * mean, const XTensor * var, const XTensor * a, const XTensor * b, DTYPE epsilon)
{ {
int dimRDI = input->order - dim - 1; int dimRDI = input->order - dim - 1;
CheckNTErrors((XTensor::IsSameShaped(input, output)), "Unmatched input tensors!"); CheckNTErrors((XTensor::IsSameShaped(input, output)), "Unmatched input tensors!");
CheckNTErrors((XTensor::IsSameShaped(a, b)), "Unmatched input tensors"); CheckNTErrors((XTensor::IsSameShaped(a, b)), "Unmatched input tensors");
CheckNTErrors((XTensor::IsSameShaped(mean, var)), "Unmatched input tensors"); CheckNTErrors((XTensor::IsSameShaped(mean, var)), "Unmatched input tensors");
...@@ -113,6 +113,27 @@ void _NormalizeMe(XTensor * input, int dim, const XTensor * mean, const XTensor ...@@ -113,6 +113,27 @@ void _NormalizeMe(XTensor * input, int dim, const XTensor * mean, const XTensor
{ {
_Normalize(input, input, dim, mean, var, a, b, epsilon); _Normalize(input, input, dim, mean, var, a, b, epsilon);
} }
/*
normalized the data with normal distribution (do it on site)
keep the result in the input tensor and return nothing
For an input x, x = a * (x-mean)/sqrt(variance+\epsilon) + b
where a and b are the scalar and bias respectively, and \epsilon is the adjustment parameter.
>> input - the input tensor
>> dim - dimension alone which we generate the mean and variance
>> mean - the mean of the input
>> var - the variance of the input
>> a - the scalar
>> b - the bias
>> epsilon - a parameter
*/
void NormalizeMe(XTensor& input, int dim, const XTensor& mean, const XTensor& var, const XTensor& a, const XTensor& b, DTYPE epsilon)
{
_Normalize(&input, &input, dim, &mean, &var, &a, &b, epsilon);
}
/* /*
normalized the data with normal distribution (return an XTensor structure) normalized the data with normal distribution (return an XTensor structure)
make a new tensor to keep the result and return it make a new tensor to keep the result and return it
......
...@@ -95,8 +95,8 @@ void _CudaNormalize(const XTensor * input, XTensor * output, int dim, ...@@ -95,8 +95,8 @@ void _CudaNormalize(const XTensor * input, XTensor * output, int dim,
{ {
CheckNTErrors((input->dataType == DEFAULT_DTYPE), "TODO!"); CheckNTErrors((input->dataType == DEFAULT_DTYPE), "TODO!");
int dimRDI = input->order - dim - 1; int dimRDI = input->order - dim - 1;
int stride = 1; int stride = 1;
int strideNum = input->dimSizeRDI[dimRDI]; int strideNum = input->dimSizeRDI[dimRDI];
int blockNum = 1; int blockNum = 1;
for (int i = 0; i < input->order; i++) { for (int i = 0; i < input->order; i++) {
......
...@@ -42,6 +42,14 @@ where a and b are the scalar and bias respectively, and \epsilon is the adjustme ...@@ -42,6 +42,14 @@ where a and b are the scalar and bias respectively, and \epsilon is the adjustme
void _NormalizeMe(XTensor * input, int dim, const XTensor * mean, const XTensor * var, const XTensor * a, const XTensor * b, DTYPE epsilon); void _NormalizeMe(XTensor * input, int dim, const XTensor * mean, const XTensor * var, const XTensor * a, const XTensor * b, DTYPE epsilon);
/* /*
normalized the data with normal distribution (do it on site)
keep the result in the input tenosr and return nothing
For an input x, x = a * (x-mean)/sqrt(variance+\epsilon) + b
where a and b are the scalar and bias respectively, and \epsilon is the adjustment parameter.
*/
void NormalizeMe(XTensor & input, int dim, const XTensor & mean, const XTensor & var, const XTensor & a, const XTensor & b, DTYPE epsilon);
/*
normalized the data with normal distribution (return an XTensor structure) normalized the data with normal distribution (return an XTensor structure)
make a new tensor to keep the result and return it make a new tensor to keep the result and return it
For an input x, y = a * (x-mean)/sqrt(variance+\epsilon) + b For an input x, y = a * (x-mean)/sqrt(variance+\epsilon) + b
......
...@@ -81,6 +81,17 @@ void _PowerMe(XTensor * a, DTYPE p) ...@@ -81,6 +81,17 @@ void _PowerMe(XTensor * a, DTYPE p)
} }
/* /*
get the power(a, p) (do it on site)
keep the result in the input tensor a and return nothing
>> a - the tensor
>> p - parameter
*/
void PowerMe(XTensor& a, DTYPE p)
{
_Power(&a, &a, p);
}
/*
get the power(a, p) (return an XTensor structure) get the power(a, p) (return an XTensor structure)
make a new tensor to keep the result and return it make a new tensor to keep the result and return it
>> a - input tensor >> a - input tensor
......
...@@ -36,6 +36,12 @@ keep the result in the input tensor a and return nothing ...@@ -36,6 +36,12 @@ keep the result in the input tensor a and return nothing
void _PowerMe(XTensor * a, DTYPE p); void _PowerMe(XTensor * a, DTYPE p);
/* /*
get the power(x, y) (do it on site)
keep the result in the input tensor a and return nothing
*/
void PowerMe(XTensor & a, DTYPE p);
/*
get the power(x, y) (return an XTensor structure) get the power(x, y) (return an XTensor structure)
make a new tensor to keep the result and return it make a new tensor to keep the result and return it
*/ */
......
...@@ -92,6 +92,21 @@ void _ScaleAndShiftMe(XTensor * a, DTYPE scale, DTYPE shift) ...@@ -92,6 +92,21 @@ void _ScaleAndShiftMe(XTensor * a, DTYPE scale, DTYPE shift)
} }
/* /*
scale and shift all tensor entires (do it on site)
keep the result in the input tensor a and return nothing
a = a * scale + shift
>> a - the input/output tensor
>> scale - the scaler factor
>> shift - the shift factor
*/
void ScaleAndShiftMe(XTensor& a, DTYPE scale, DTYPE shift)
{
_ScaleAndShift(&a, &a, scale, shift);
}
/*
scale and shift all tensor entires (return an XTensor structure) scale and shift all tensor entires (return an XTensor structure)
make a new tensor to keep the result and return it make a new tensor to keep the result and return it
......
...@@ -45,6 +45,13 @@ void _ScaleAndShiftMe(XTensor * a, DTYPE scale, DTYPE shift = 0); ...@@ -45,6 +45,13 @@ void _ScaleAndShiftMe(XTensor * a, DTYPE scale, DTYPE shift = 0);
/* /*
scale and shift all tensor entires scale and shift all tensor entires
keep the result in the input tensor a and return nothing
a = a * scale + shift
*/
void ScaleAndShiftMe(XTensor & a, DTYPE scale, DTYPE shift = 0);
/*
scale and shift all tensor entires
make a new tensor to keep the result and return it make a new tensor to keep the result and return it
b = a * scale + shift b = a * scale + shift
*/ */
......
...@@ -34,7 +34,7 @@ DTYPE square(DTYPE x) ...@@ -34,7 +34,7 @@ DTYPE square(DTYPE x)
DTYPE round(DTYPE r) DTYPE round(DTYPE r)
{ {
return (r > 0.0) ? (DTYPE)floor(r + 0.5) : (DTYPE)ceil(r - 0.5); return (r > 0.0) ? (DTYPE)floor(r + 0.5) : (DTYPE)ceil(r - 0.5);
} }
DTYPE isnonzero(DTYPE r) DTYPE isnonzero(DTYPE r)
......
...@@ -38,7 +38,7 @@ DTYPE cudasquare(DTYPE x) ...@@ -38,7 +38,7 @@ DTYPE cudasquare(DTYPE x)
__device__ __device__
DTYPE cudaround(DTYPE r) DTYPE cudaround(DTYPE r)
{ {
return (r > 0.0) ? (DTYPE)floor(r + 0.5) : (DTYPE)ceil(r - 0.5); return (r > 0.0) ? (DTYPE)floor(r + 0.5) : (DTYPE)ceil(r - 0.5);
} }
__device__ __device__
......
...@@ -31,6 +31,9 @@ void _Absolute(const XTensor * a, XTensor * b); ...@@ -31,6 +31,9 @@ void _Absolute(const XTensor * a, XTensor * b);
/* set every entry to its absolute value (do it on site) /* set every entry to its absolute value (do it on site)
keep the result in the input tensor a and return nothing */ keep the result in the input tensor a and return nothing */
void _AbsoluteMe(XTensor * a); void _AbsoluteMe(XTensor * a);
/* set every entry to its absolute value (do it on site)
keep the result in the input tensor a and return nothing */
void AbsoluteMe(XTensor & a);
/* set every entry to its absolute value (return an XTensor structure) /* set every entry to its absolute value (return an XTensor structure)
make a new tensor to keep the result and return it */ make a new tensor to keep the result and return it */
XTensor Absolute(const XTensor & a); XTensor Absolute(const XTensor & a);
...@@ -42,6 +45,9 @@ void _Ceil(const XTensor * a, XTensor * b); ...@@ -42,6 +45,9 @@ void _Ceil(const XTensor * a, XTensor * b);
/* set every entry to its ceil value (do it on site) /* set every entry to its ceil value (do it on site)
keep the result in the input tensor a and return nothing */ keep the result in the input tensor a and return nothing */
void _CeilMe(XTensor * a); void _CeilMe(XTensor * a);
/* set every entry to its ceil value (do it on site)
keep the result in the input tensor a and return nothing */
void CeilMe(XTensor & a);
/* set every entry to its ceil value (return an XTensor structure) /* set every entry to its ceil value (return an XTensor structure)
make a new tensor to keep the result and return it */ make a new tensor to keep the result and return it */
XTensor Ceil(const XTensor & a); XTensor Ceil(const XTensor & a);
...@@ -53,6 +59,9 @@ void _Exp(const XTensor * a, XTensor * b); ...@@ -53,6 +59,9 @@ void _Exp(const XTensor * a, XTensor * b);
/* set every entry to its exponent value (do it on site) /* set every entry to its exponent value (do it on site)
keep the result in the input tensor a and return nothing */ keep the result in the input tensor a and return nothing */
void _ExpMe(XTensor * a); void _ExpMe(XTensor * a);
/* set every entry to its exponent value (do it on site)
keep the result in the input tensor a and return nothing */
void ExpMe(XTensor & a);
/* set every entry to its exponent value (return an XTensor structure) /* set every entry to its exponent value (return an XTensor structure)
make a new tensor to keep the result and return it */ make a new tensor to keep the result and return it */
XTensor Exp(const XTensor & a); XTensor Exp(const XTensor & a);
...@@ -64,6 +73,9 @@ void _Floor(const XTensor * a, XTensor * b); ...@@ -64,6 +73,9 @@ void _Floor(const XTensor * a, XTensor * b);
/* set every entry to its floor value (do it on site) /* set every entry to its floor value (do it on site)
keep the result in the input tensor a and return nothing */ keep the result in the input tensor a and return nothing */
void _FloorMe(XTensor * a); void _FloorMe(XTensor * a);
/* set every entry to its floor value (do it on site)
keep the result in the input tensor a and return nothing */
void FloorMe(XTensor & a);
/* set every entry to its floor value (return an XTensor structure) /* set every entry to its floor value (return an XTensor structure)
make a new tensor to keep the result and return it */ make a new tensor to keep the result and return it */
XTensor Floor(const XTensor & a); XTensor Floor(const XTensor & a);
...@@ -75,6 +87,9 @@ void _IsNonZero(const XTensor *a, XTensor *b); ...@@ -75,6 +87,9 @@ void _IsNonZero(const XTensor *a, XTensor *b);
/* if source entry is non-zero, set target entry to be one, otherwise zero (do it on site) /* if source entry is non-zero, set target entry to be one, otherwise zero (do it on site)
keep the result in the input tensor a and return nothing */ keep the result in the input tensor a and return nothing */
void _IsNonZeroMe(XTensor *a); void _IsNonZeroMe(XTensor *a);
/* if source entry is non-zero, set target entry to be one, otherwise zero (do it on site)
keep the result in the input tensor a and return nothing */
void IsNonZeroMe(XTensor &a);
/* if source entry is non-zero, set target entry to be one, otherwise zero (return an XTensor structure) /* if source entry is non-zero, set target entry to be one, otherwise zero (return an XTensor structure)
make a new tensor to keep the result and return it */ make a new tensor to keep the result and return it */
XTensor IsNonZero(const XTensor &a); XTensor IsNonZero(const XTensor &a);
...@@ -86,6 +101,9 @@ void _IsZero(const XTensor *a, XTensor *b); ...@@ -86,6 +101,9 @@ void _IsZero(const XTensor *a, XTensor *b);
/* if source entry is zero, set target entry to be one, otherwise zero (do it on site) /* if source entry is zero, set target entry to be one, otherwise zero (do it on site)
keep the result in the input tensor a and return nothing */ keep the result in the input tensor a and return nothing */
void _IsZeroMe(XTensor *a); void _IsZeroMe(XTensor *a);
/* if source entry is zero, set target entry to be one, otherwise zero (do it on site)
keep the result in the input tensor a and return nothing */
void IsZeroMe(XTensor &a);
/* if source entry is zero, set target entry to be one, otherwise zero (return an XTensor structure) /* if source entry is zero, set target entry to be one, otherwise zero (return an XTensor structure)
make a new tensor to keep the result and return it */ make a new tensor to keep the result and return it */
XTensor IsZero(const XTensor &a); XTensor IsZero(const XTensor &a);
...@@ -97,6 +115,9 @@ void _Log(const XTensor * a, XTensor * b); ...@@ -97,6 +115,9 @@ void _Log(const XTensor * a, XTensor * b);
/* set every entry to its logarithm value (do it on site) /* set every entry to its logarithm value (do it on site)
keep the result in the input tensor a and return nothing */ keep the result in the input tensor a and return nothing */
void _LogMe(XTensor * a); void _LogMe(XTensor * a);
/* set every entry to its logarithm value (do it on site)
keep the result in the input tensor a and return nothing */
void LogMe(XTensor & a);
/* set every entry to its logarithm value (return an XTensor structure) /* set every entry to its logarithm value (return an XTensor structure)
make a new tensor to keep the result and return it */ make a new tensor to keep the result and return it */
XTensor Log(const XTensor & a); XTensor Log(const XTensor & a);
...@@ -108,6 +129,9 @@ void _Round(const XTensor * a, XTensor * b); ...@@ -108,6 +129,9 @@ void _Round(const XTensor * a, XTensor * b);
/* set every entry to its round value (do it on site) /* set every entry to its round value (do it on site)
keep the result in the input tensor a and return nothing */ keep the result in the input tensor a and return nothing */
void _RoundMe(XTensor * a); void _RoundMe(XTensor * a);
/* set every entry to its round value (do it on site)
keep the result in the input tensor a and return nothing */
void RoundMe(XTensor & a);
/* set every entry to its round value (return an XTensor structure) /* set every entry to its round value (return an XTensor structure)
make a new tensor to keep the result and return it */ make a new tensor to keep the result and return it */
XTensor Round(const XTensor & a); XTensor Round(const XTensor & a);
...@@ -119,6 +143,9 @@ void _Sqrt(const XTensor * a, XTensor * b); ...@@ -119,6 +143,9 @@ void _Sqrt(const XTensor * a, XTensor * b);
/* set every entry to its sqrt value (do it on site) /* set every entry to its sqrt value (do it on site)
keep the result in the input tensor a and return nothing */ keep the result in the input tensor a and return nothing */
void _SqrtMe(XTensor * a); void _SqrtMe(XTensor * a);
/* set every entry to its sqrt value (do it on site)
keep the result in the input tensor a and return nothing */
void SqrtMe(XTensor & a);
/* set every entry to its sqrt value (return an XTensor structure) /* set every entry to its sqrt value (return an XTensor structure)
make a new tensor to keep the result and return it */ make a new tensor to keep the result and return it */
XTensor Sqrt(const XTensor & a); XTensor Sqrt(const XTensor & a);
...@@ -130,6 +157,9 @@ void _Square(const XTensor * a, XTensor * b); ...@@ -130,6 +157,9 @@ void _Square(const XTensor * a, XTensor * b);
/* set every entry to its square value (do it on site) /* set every entry to its square value (do it on site)
keep the result in the input tensor a and return nothing */ keep the result in the input tensor a and return nothing */
void _SquareMe(XTensor * a); void _SquareMe(XTensor * a);
/* set every entry to its square value (do it on site)
keep the result in the input tensor a and return nothing */
void SquareMe(XTensor & a);
/* set every entry to its square value (return an XTensor structure) /* set every entry to its square value (return an XTensor structure)
make a new tensor to keep the result and return it */ make a new tensor to keep the result and return it */
XTensor Square(const XTensor & a); XTensor Square(const XTensor & a);
...@@ -142,6 +172,9 @@ void _Sin(const XTensor * a, XTensor * b); ...@@ -142,6 +172,9 @@ void _Sin(const XTensor * a, XTensor * b);
/* set every entry to its sine value (do it on site) /* set every entry to its sine value (do it on site)
keep the result in the input tensor a and return nothing */ keep the result in the input tensor a and return nothing */
void _SinMe(XTensor * a); void _SinMe(XTensor * a);
/* set every entry to its sine value (do it on site)
keep the result in the input tensor a and return nothing */
void SinMe(XTensor & a);
/* set every entry to its sine value (return an XTensor structure) /* set every entry to its sine value (return an XTensor structure)
make a new tensor to keep the result and return it */ make a new tensor to keep the result and return it */
XTensor Sin(const XTensor & a); XTensor Sin(const XTensor & a);
...@@ -153,6 +186,9 @@ void _Cos(const XTensor * a, XTensor * b); ...@@ -153,6 +186,9 @@ void _Cos(const XTensor * a, XTensor * b);
/* set every entry to its cosine value (do it on site) /* set every entry to its cosine value (do it on site)
keep the result in the input tensor a and return nothing */ keep the result in the input tensor a and return nothing */
void _CosMe(XTensor * a); void _CosMe(XTensor * a);
/* set every entry to its cosine value (do it on site)
keep the result in the input tensor a and return nothing */
void CosMe(XTensor & a);
/* set every entry to its cosine value (return an XTensor structure) /* set every entry to its cosine value (return an XTensor structure)
make a new tensor to keep the result and return it */ make a new tensor to keep the result and return it */
XTensor Cos(const XTensor & a); XTensor Cos(const XTensor & a);
...@@ -164,6 +200,9 @@ void _Tan(const XTensor * a, XTensor * b); ...@@ -164,6 +200,9 @@ void _Tan(const XTensor * a, XTensor * b);
/* set every entry to its tangent value (do it on site) /* set every entry to its tangent value (do it on site)
keep the result in the input tensor a and return nothing */ keep the result in the input tensor a and return nothing */
void _TanMe(XTensor * a); void _TanMe(XTensor * a);
/* set every entry to its tangent value (do it on site)
keep the result in the input tensor a and return nothing */
void TanMe(XTensor & a);
/* set every entry to its tangent value (return an XTensor structure) /* set every entry to its tangent value (return an XTensor structure)
make a new tensor to keep the result and return it */ make a new tensor to keep the result and return it */
XTensor Tan(const XTensor & a); XTensor Tan(const XTensor & a);
......
...@@ -41,8 +41,8 @@ void _ReduceMax(const XTensor * input, XTensor * output, int dim) ...@@ -41,8 +41,8 @@ void _ReduceMax(const XTensor * input, XTensor * output, int dim)
CheckNTErrors((input->order == output->order + 1), "Incorrect tensor sizes!"); CheckNTErrors((input->order == output->order + 1), "Incorrect tensor sizes!");
CheckNTErrors((input->order > dim && dim >=0), "Illegal dimension to reduce!"); CheckNTErrors((input->order > dim && dim >=0), "Illegal dimension to reduce!");
CheckNTErrors((input->dataType == output->dataType), "Unmatched data types!"); CheckNTErrors((input->dataType == output->dataType), "Unmatched data types!");
int dimRDI = input->order - dim - 1; int dimRDI = input->order - dim - 1;
CheckNTErrors(dimRDI >= 0, "Wrong dimension!"); CheckNTErrors(dimRDI >= 0, "Wrong dimension!");
for(int i = 0; i < input->order; i++){ for(int i = 0; i < input->order; i++){
...@@ -104,7 +104,7 @@ make a new tensor to keep the result and return it ...@@ -104,7 +104,7 @@ make a new tensor to keep the result and return it
XTensor ReduceMax(const XTensor &input, int dim) XTensor ReduceMax(const XTensor &input, int dim)
{ {
CheckNTErrors(dim >= 0 && dim < input.order, "Illegal dimension to reduce!"); CheckNTErrors(dim >= 0 && dim < input.order, "Illegal dimension to reduce!");
int order = input.order - 1; int order = input.order - 1;
int * dimSize = new int[order]; int * dimSize = new int[order];
for(int i = 0; i < order; i++){ for(int i = 0; i < order; i++){
......
...@@ -504,7 +504,7 @@ void _CudaReduceMax(const XTensor * input, XTensor * output, int dim) ...@@ -504,7 +504,7 @@ void _CudaReduceMax(const XTensor * input, XTensor * output, int dim)
CheckNTErrors(input->order > dim && dim >=0, "Illegal dimension to reduce!"); CheckNTErrors(input->order > dim && dim >=0, "Illegal dimension to reduce!");
CheckNTErrors(input->dataType == output->dataType, "Unmatched data types!"); CheckNTErrors(input->dataType == output->dataType, "Unmatched data types!");
int dimRDI = input->order - dim - 1; int dimRDI = input->order - dim - 1;
for(int i = 0; i < input->order; i++){ for(int i = 0; i < input->order; i++){
if(i < dimRDI){ if(i < dimRDI){
CheckNTErrors(input->dimSizeRDI[i] == output->dimSizeRDI[i], "Unmatched tensors!"); CheckNTErrors(input->dimSizeRDI[i] == output->dimSizeRDI[i], "Unmatched tensors!");
......
...@@ -39,7 +39,7 @@ void _ReduceMean(const XTensor * input, XTensor * output, int dim) ...@@ -39,7 +39,7 @@ void _ReduceMean(const XTensor * input, XTensor * output, int dim)
{ {
CheckNTErrors((input->order > dim), "Illegal dimension specified!"); CheckNTErrors((input->order > dim), "Illegal dimension specified!");
int dimRDI = input->order - dim - 1; int dimRDI = input->order - dim - 1;
int num = input->dimSizeRDI[dimRDI]; int num = input->dimSizeRDI[dimRDI];
_ReduceSum(input, output, dim); _ReduceSum(input, output, dim);
...@@ -59,7 +59,7 @@ For a 1-dimensional data array a, mean = (1/n) * sum_i input_i ...@@ -59,7 +59,7 @@ For a 1-dimensional data array a, mean = (1/n) * sum_i input_i
XTensor ReduceMean(const XTensor &input, int dim) XTensor ReduceMean(const XTensor &input, int dim)
{ {
CheckNTErrors(dim >= 0 && dim < input.order, "Illegal dimension to reduce!"); CheckNTErrors(dim >= 0 && dim < input.order, "Illegal dimension to reduce!");
int order = input.order - 1; int order = input.order - 1;
int * dimSize = new int[order]; int * dimSize = new int[order];
for(int i = 0; i < order; i++){ for(int i = 0; i < order; i++){
......
...@@ -50,7 +50,7 @@ void _ReduceSum(const XTensor * input, XTensor * output, int dim, const XTensor ...@@ -50,7 +50,7 @@ void _ReduceSum(const XTensor * input, XTensor * output, int dim, const XTensor
CheckNTErrors((input->dataType == output->dataType), "Unmatched data types!"); CheckNTErrors((input->dataType == output->dataType), "Unmatched data types!");
CheckNTErrors((shift == NULL || XTensor::IsSameShaped(output, shift)), "Incorrect shift tensor size!"); CheckNTErrors((shift == NULL || XTensor::IsSameShaped(output, shift)), "Incorrect shift tensor size!");
int dimRDI = input->order - dim - 1; int dimRDI = input->order - dim - 1;
CheckNTErrors(dimRDI >= 0, "Wrong dimension!"); CheckNTErrors(dimRDI >= 0, "Wrong dimension!");
for(int i = 0; i < input->order; i++){ for(int i = 0; i < input->order; i++){
...@@ -215,7 +215,7 @@ sum = \sum_i exp((a_i - shift)^power) if isExp == true ...@@ -215,7 +215,7 @@ sum = \sum_i exp((a_i - shift)^power) if isExp == true
XTensor ReduceSum(const XTensor &input, int dim, const XTensor &shift, DTYPE power, bool isExp) XTensor ReduceSum(const XTensor &input, int dim, const XTensor &shift, DTYPE power, bool isExp)
{ {
CheckNTErrors(dim >= 0 && dim < input.order, "Illegal dimension to reduce!"); CheckNTErrors(dim >= 0 && dim < input.order, "Illegal dimension to reduce!");
int order = input.order - 1; int order = input.order - 1;
int * dimSize = new int[order]; int * dimSize = new int[order];
for(int i = 0; i < order; i++){ for(int i = 0; i < order; i++){
...@@ -294,7 +294,7 @@ sum = \sum_i exp((a_i)^power) if isExp == true ...@@ -294,7 +294,7 @@ sum = \sum_i exp((a_i)^power) if isExp == true
XTensor ReduceSum(const XTensor &input, int dim, DTYPE power, bool isExp) XTensor ReduceSum(const XTensor &input, int dim, DTYPE power, bool isExp)
{ {
CheckNTErrors(dim >= 0 && dim < input.order, "Illegal dimension to reduce!"); CheckNTErrors(dim >= 0 && dim < input.order, "Illegal dimension to reduce!");
int order = input.order - 1; int order = input.order - 1;
int * dimSize = new int[order]; int * dimSize = new int[order];
for(int i = 0; i < order; i++){ for(int i = 0; i < order; i++){
......
...@@ -341,7 +341,7 @@ void KernelReduceSumFast(DTYPE * input, DTYPE * output, ...@@ -341,7 +341,7 @@ void KernelReduceSumFast(DTYPE * input, DTYPE * output,
if (tid < blockDim.x / 32) if (tid < blockDim.x / 32)
value = data[tid]; value = data[tid];
else else
value = 0; value = 0;
value = shflDownReduceSum(value); value = shflDownReduceSum(value);
if (tid == 0 && blockIdx.x < reducedStrideNum) { if (tid == 0 && blockIdx.x < reducedStrideNum) {
...@@ -692,7 +692,7 @@ void _CudaReduceSum(const XTensor * input, XTensor * output, int dim, const XTen ...@@ -692,7 +692,7 @@ void _CudaReduceSum(const XTensor * input, XTensor * output, int dim, const XTen
CheckNTErrors(input->dataType == output->dataType, "Unmatched data types!"); CheckNTErrors(input->dataType == output->dataType, "Unmatched data types!");
CheckNTErrors(shift == NULL || output->unitNum == shift->unitNum, "Incorrect shift tensor size!"); CheckNTErrors(shift == NULL || output->unitNum == shift->unitNum, "Incorrect shift tensor size!");
int dimRDI = input->order - dim - 1; int dimRDI = input->order - dim - 1;
for(int i = 0; i < input->order; i++){ for(int i = 0; i < input->order; i++){
if(i < dimRDI){ if(i < dimRDI){
CheckNTErrors(input->dimSizeRDI[i] == output->dimSizeRDI[i], "Unmatched tensors!"); CheckNTErrors(input->dimSizeRDI[i] == output->dimSizeRDI[i], "Unmatched tensors!");
......
...@@ -55,7 +55,7 @@ For a 1-dimensional data array a, sum = \sum_i (a_i - shift)^2 ...@@ -55,7 +55,7 @@ For a 1-dimensional data array a, sum = \sum_i (a_i - shift)^2
XTensor ReduceSumSquared(const XTensor &input, int dim, const XTensor &shift) XTensor ReduceSumSquared(const XTensor &input, int dim, const XTensor &shift)
{ {
CheckNTErrors(dim >= 0 && dim < input.order, "Illegal dimension to reduce!"); CheckNTErrors(dim >= 0 && dim < input.order, "Illegal dimension to reduce!");
int order = input.order - 1; int order = input.order - 1;
int * dimSize = new int[order]; int * dimSize = new int[order];
for(int i = 0; i < order; i++){ for(int i = 0; i < order; i++){
......
...@@ -38,7 +38,7 @@ For a 1-dimensional data array a, variance = 1/n * \sum_i (a_i - mean)^2 ...@@ -38,7 +38,7 @@ For a 1-dimensional data array a, variance = 1/n * \sum_i (a_i - mean)^2
*/ */
void _ReduceVariance(const XTensor * input, XTensor * output, int dim, const XTensor * mean) void _ReduceVariance(const XTensor * input, XTensor * output, int dim, const XTensor * mean)
{ {
int dimRDI = input->order - dim - 1; int dimRDI = input->order - dim - 1;
int num = input->dimSizeRDI[dimRDI]; int num = input->dimSizeRDI[dimRDI];
_ReduceSum(input, output, dim, mean, 2.0F); _ReduceSum(input, output, dim, mean, 2.0F);
_ScaleAndShiftMe(output, (DTYPE)1 / num, 0); _ScaleAndShiftMe(output, (DTYPE)1 / num, 0);
...@@ -58,7 +58,7 @@ For a 1-dimensional data array a, variance = 1/n * \sum_i (a_i - mean)^2 ...@@ -58,7 +58,7 @@ For a 1-dimensional data array a, variance = 1/n * \sum_i (a_i - mean)^2
XTensor ReduceVariance(const XTensor &input, int dim, const XTensor &mean) XTensor ReduceVariance(const XTensor &input, int dim, const XTensor &mean)
{ {
CheckNTErrors(dim >= 0 && dim < input.order, "Illegal dimension to reduce!"); CheckNTErrors(dim >= 0 && dim < input.order, "Illegal dimension to reduce!");
int order = input.order - 1; int order = input.order - 1;
int * dimSize = new int[order]; int * dimSize = new int[order];
for(int i = 0; i < order; i++){ for(int i = 0; i < order; i++){
......
...@@ -85,7 +85,7 @@ void _ConcatenateSolely(const TensorList * smalls, XTensor * big, int dim) ...@@ -85,7 +85,7 @@ void _ConcatenateSolely(const TensorList * smalls, XTensor * big, int dim)
} }
} }
else { else {
StrList* sourceArrays = new StrList(smalls->count); StrList* sourceArrays = new StrList(smalls->count);
int * blockSizes = new int[smalls->count]; int * blockSizes = new int[smalls->count];
for (int i = 0; i < smalls->count; i++) { for (int i = 0; i < smalls->count; i++) {
XTensor * tensor = (XTensor*)smalls->GetItem(i); XTensor * tensor = (XTensor*)smalls->GetItem(i);
......
...@@ -41,6 +41,13 @@ a = permuted(a) ...@@ -41,6 +41,13 @@ a = permuted(a)
*/ */
void _PermuteMe(XTensor * a, int * dimPermute); void _PermuteMe(XTensor * a, int * dimPermute);
/*
permute the tensor dimensions (do it on site).
keep the result in the input tensor and return nothing.
a = permuted(a)
*/
void PermuteMe(XTensor &a, int * dimPermute);
/* /*
make a tensor with permuted dimensions (return an XTensor structure). make a tensor with permuted dimensions (return an XTensor structure).
make a new tensor to keep the result and return it. make a new tensor to keep the result and return it.
......
...@@ -43,9 +43,9 @@ XTensor Reshape(XTensor &s, int order, int * dimSize) ...@@ -43,9 +43,9 @@ XTensor Reshape(XTensor &s, int order, int * dimSize)
t.Reshape(order, dimSize); t.Reshape(order, dimSize);
/* tensor connections */ /* tensor connections */
XLink::MakeLink(&s, NULL, &t, SHAPE_RESHAPE); XLink::MakeLink(&s, NULL, &t, SHAPE_RESHAPE);
return t; return t;
} }
void Reshape(XTensor &s, XTensor &t, int order, int * dimSize) void Reshape(XTensor &s, XTensor &t, int order, int * dimSize)
......
...@@ -89,6 +89,20 @@ void _SqueezeMe(XTensor * source, int leadingDim) ...@@ -89,6 +89,20 @@ void _SqueezeMe(XTensor * source, int leadingDim)
} }
/* /*
squeeze the tensor along the specified dimension (do it on site)
keep the result in the input tensor a and return nothing
>> source - the input tensor
>> leadingDim - the dimension that we would squeeze
if leadingDim = -1, squeeze all dimensions that are 1
else, squeeze the specified dimension
*/
void SqueezeMe(XTensor& source, int leadingDim)
{
_Squeeze(&source, &source, leadingDim);
}
/*
squeeze the tensor along the specified dimension (return an XTensor structure) squeeze the tensor along the specified dimension (return an XTensor structure)
make a new tensor to keep the result and return it make a new tensor to keep the result and return it
......
...@@ -33,6 +33,10 @@ void _Squeeze(XTensor * source, XTensor * target, int leadingDim = -1); ...@@ -33,6 +33,10 @@ void _Squeeze(XTensor * source, XTensor * target, int leadingDim = -1);
keep the result in the input tensor a and return nothing */ keep the result in the input tensor a and return nothing */
void _SqueezeMe(XTensor * source, int leadingDim = -1); void _SqueezeMe(XTensor * source, int leadingDim = -1);
/* squeeze the tensor along the specified dimension (do it on site)
keep the result in the input tensor a and return nothing */
void SqueezeMe(XTensor & source, int leadingDim = -1);
/* squeeze the tensor along the specified dimension (return an XTensor structure) /* squeeze the tensor along the specified dimension (return an XTensor structure)
make a new tensor to keep the result and return it */ make a new tensor to keep the result and return it */
XTensor Squeeze(XTensor & source, int leadingDim = -1); XTensor Squeeze(XTensor & source, int leadingDim = -1);
......
...@@ -45,7 +45,7 @@ void _Sort(const XTensor * a, XTensor * b, XTensor * index, int dim) ...@@ -45,7 +45,7 @@ void _Sort(const XTensor * a, XTensor * b, XTensor * index, int dim)
CheckNTErrors((a->order == index->order), "Unmatched input tensors!"); CheckNTErrors((a->order == index->order), "Unmatched input tensors!");
CheckNTErrors((index->dataType == X_INT), "Wrong data type!"); CheckNTErrors((index->dataType == X_INT), "Wrong data type!");
int dimRDI = a->order - dim - 1; int dimRDI = a->order - dim - 1;
/* make the index tensor */ /* make the index tensor */
index->SetAscendingOrder(dim); index->SetAscendingOrder(dim);
...@@ -67,7 +67,7 @@ void _Sort(const XTensor * a, XTensor * b, XTensor * index, int dim) ...@@ -67,7 +67,7 @@ void _Sort(const XTensor * a, XTensor * b, XTensor * index, int dim)
blockNum *= a->dimSizeRDI[i]; blockNum *= a->dimSizeRDI[i];
int blockSize = stride * strideNum; int blockSize = stride * strideNum;
_CopyValues(a, b); _CopyValues(a, b);
for (int k = 0; k < blockNum; k++) { for (int k = 0; k < blockNum; k++) {
for (int i = 0; i < stride; i++) { for (int i = 0; i < stride; i++) {
void * dataB = (char*)b->data + (k * blockSize + i) * b->unitSize; void * dataB = (char*)b->data + (k * blockSize + i) * b->unitSize;
...@@ -98,6 +98,21 @@ void _SortMe(XTensor * a, XTensor * index, int dim) ...@@ -98,6 +98,21 @@ void _SortMe(XTensor * a, XTensor * index, int dim)
} }
/* /*
sort the tensor along a given dimension (do it on site)
keep the result in the input tensor a and return nothing
>> a - input tensor
>> index - index of the items in the resulting tensor
>> dim - the dimension along which the sorting is performed
*/
void SortMe(XTensor& a, XTensor& index, int dim)
{
_Sort(&a, &a, &index, dim);
}
/*
sort the tensor along a given dimension (return an XTensor structure) sort the tensor along a given dimension (return an XTensor structure)
make a new tensor to keep the result and return it make a new tensor to keep the result and return it
......
...@@ -217,7 +217,7 @@ void _CudaSortBig(const XTensor * a, XTensor * b, XTensor * indexA, XTensor * in ...@@ -217,7 +217,7 @@ void _CudaSortBig(const XTensor * a, XTensor * b, XTensor * indexA, XTensor * in
CheckNTErrors((a->order > dim && dim >= 0), "Incorrect dimension specified!"); CheckNTErrors((a->order > dim && dim >= 0), "Incorrect dimension specified!");
CheckNTErrors((a->dataType == DEFAULT_DTYPE), "TODO!"); CheckNTErrors((a->dataType == DEFAULT_DTYPE), "TODO!");
int dimRDI = a->order - dim - 1; int dimRDI = a->order - dim - 1;
if (k < 0 || k > b->dimSizeRDI[dimRDI]) if (k < 0 || k > b->dimSizeRDI[dimRDI])
k = b->dimSizeRDI[dimRDI]; k = b->dimSizeRDI[dimRDI];
......
...@@ -35,6 +35,12 @@ keep the result in the input tensor a and return nothing ...@@ -35,6 +35,12 @@ keep the result in the input tensor a and return nothing
*/ */
void _SortMe(XTensor * a, XTensor * index, int dim); void _SortMe(XTensor * a, XTensor * index, int dim);
/*
sort the data along a given dimension (do it on site)
keep the result in the input tensor a and return nothing
*/
void SortMe(XTensor & a, XTensor & index, int dim);
/* /*
sort the data along a given dimension (return an XTensor structure) sort the data along a given dimension (return an XTensor structure)
make a new tensor to keep the result and return it make a new tensor to keep the result and return it
......
...@@ -238,9 +238,9 @@ void KernelTopK(T * input, int stride, int strideNum, int blockNum, int k, T min ...@@ -238,9 +238,9 @@ void KernelTopK(T * input, int stride, int strideNum, int blockNum, int k, T min
CudaXHeap<MIN_HEAP, T> heapFinal(k, k, heapData + k * threadIdx.y * blockDim.x); CudaXHeap<MIN_HEAP, T> heapFinal(k, k, heapData + k * threadIdx.y * blockDim.x);
/* /*
merge the result over the workers. merge the result over the workers.
This can be improved by parallel merging This can be improved by parallel merging
*/ */
if (blockDim.x > 1) { if (blockDim.x > 1) {
for (int p = 1; p < blockDim.x && p < strideNum; p++) { for (int p = 1; p < blockDim.x && p < strideNum; p++) {
CudaHeapNode<T> * hd = heapData + k * (threadIdx.y * blockDim.x + p); CudaHeapNode<T> * hd = heapData + k * (threadIdx.y * blockDim.x + p);
...@@ -770,22 +770,22 @@ void KernelTopKRadixSelect(unsigned int * input, int stride, int strideNum, ...@@ -770,22 +770,22 @@ void KernelTopKRadixSelect(unsigned int * input, int stride, int strideNum,
/* /*
if (idx == 0) if (idx == 0)
{ {
unsigned int* uintOutput = new unsigned int; unsigned int* uintOutput = new unsigned int;
int* tmpIndex = new int; int* tmpIndex = new int;
//*******************something worng*************************** //*******************something worng***************************
cudaMalloc((void **)&uintOutput, sizeof(unsigned int)* k); cudaMalloc((void **)&uintOutput, sizeof(unsigned int)* k);
cudaMalloc((void **)&tmpIndex, sizeof(unsigned int)*k); cudaMalloc((void **)&tmpIndex, sizeof(unsigned int)*k);
//************************************************************* //*************************************************************
collectNumberOld(input, limit, k, desire, uintOutput, tmpIndex, stride, strideNum); collectNumberOld(input, limit, k, desire, uintOutput, tmpIndex, stride, strideNum);
int blockIndex = idy / stride; int blockIndex = idy / stride;
int offsetInBlock = idy% stride; int offsetInBlock = idy% stride;
for (int i = stride * k * blockIndex + offsetInBlock, j = 0; j < k; j++, i += stride) for (int i = stride * k * blockIndex + offsetInBlock, j = 0; j < k; j++, i += stride)
{ {
//for(int i = ) //for(int i = )
output[i] = deconvert(uintOutput[j]); output[i] = deconvert(uintOutput[j]);
index[i] = tmpIndex[j]; index[i] = tmpIndex[j];
} }
} }
__syncthreads(); __syncthreads();
*/ */
......
...@@ -67,8 +67,8 @@ void CudaSetAscendingOrder(XTensor * a, int dim) ...@@ -67,8 +67,8 @@ void CudaSetAscendingOrder(XTensor * a, int dim)
{ {
CheckNTErrors((a->dataType == X_INT), "TODO!"); CheckNTErrors((a->dataType == X_INT), "TODO!");
int dimRDI = a->order - dim - 1; int dimRDI = a->order - dim - 1;
int stride = 1; int stride = 1;
int strideNum = a->dimSizeRDI[dimRDI]; int strideNum = a->dimSizeRDI[dimRDI];
for(int i = 0; i < dimRDI; i++) for(int i = 0; i < dimRDI; i++)
stride *= a->dimSizeRDI[i]; stride *= a->dimSizeRDI[i];
......
...@@ -56,7 +56,7 @@ void RunParallel2D(XPRunner * parallelRunner, void * job, ...@@ -56,7 +56,7 @@ void RunParallel2D(XPRunner * parallelRunner, void * job,
va_list ap; va_list ap;
va_start(ap, argNum); va_start(ap, argNum);
for (int i = 0; i < argNum; i++) { for (int i = 0; i < argNum; i++) {
XTensor* p = va_arg(ap, XTensor*); XTensor* p = va_arg(ap, XTensor*);
jobArgList->Add(p); jobArgList->Add(p);
} }
va_end(ap); va_end(ap);
...@@ -77,19 +77,19 @@ void RunParallel2D(XPRunner * parallelRunner, void * job, ...@@ -77,19 +77,19 @@ void RunParallel2D(XPRunner * parallelRunner, void * job,
2. other arguments 2. other arguments
*/ */
for (int i = 0; i < jobNum; i++) { for (int i = 0; i < jobNum; i++) {
IntList* indexArgs = new IntList(4); IntList* indexArgs = new IntList(4);
TensorList * blockArgs = new TensorList(argNum); TensorList * blockArgs = new TensorList(argNum);
int * blockIndex = indexList + i * 4; int * blockIndex = indexList + i * 4;
indexArgs->Add(blockIndex[0]); indexArgs->Add(blockIndex[0]);
indexArgs->Add(blockIndex[1]); indexArgs->Add(blockIndex[1]);
indexArgs->Add(blockIndex[2]); indexArgs->Add(blockIndex[2]);
indexArgs->Add(blockIndex[3]); indexArgs->Add(blockIndex[3]);
for (int j = 0; j < argNum; j++) for (int j = 0; j < argNum; j++)
blockArgs->Add(jobArgList->GetItem(j)); blockArgs->Add(jobArgList->GetItem(j));
args->Add((XTensor*)indexArgs); args->Add((XTensor*)indexArgs);
args->Add((XTensor*)blockArgs); args->Add((XTensor*)blockArgs);
jobs->Add((XTensor*)job); jobs->Add((XTensor*)job);
......
...@@ -30,84 +30,84 @@ Set every entry to its absolute value. ...@@ -30,84 +30,84 @@ Set every entry to its absolute value.
*/ */
bool TestAbsolute1() bool TestAbsolute1()
{ {
/* a tensor of size (3, 2) */ /* a tensor of size (3, 2) */
int order = 2; int order = 2;
int * dimSize = new int[order]; int * dimSize = new int[order];
dimSize[0] = 3; dimSize[0] = 3;
dimSize[1] = 2; dimSize[1] = 2;
int unitNum = 1; int unitNum = 1;
for (int i = 0; i < order; i++) for (int i = 0; i < order; i++)
unitNum *= dimSize[i]; unitNum *= dimSize[i];
DTYPE aData[3][2] = { {1.0F, -2.0F}, DTYPE aData[3][2] = { {1.0F, -2.0F},
{0.5F, -4.0F}, {0.5F, -4.0F},
{0.0F, 6.0F} }; {0.0F, 6.0F} };
DTYPE answer[3][2] = { {1.0F, 2.0F}, DTYPE answer[3][2] = { {1.0F, 2.0F},
{0.5F, 4.0F}, {0.5F, 4.0F},
{0.0F, 6.0F} }; {0.0F, 6.0F} };
/* CPU test */ /* CPU test */
bool cpuTest = true; bool cpuTest = true;
/* create tensors */ /* create tensors */
XTensor * a = NewTensor(order, dimSize); XTensor * a = NewTensor(order, dimSize);
XTensor * b = NewTensor(order, dimSize); XTensor * b = NewTensor(order, dimSize);
XTensor * aMe = NewTensor(order, dimSize); XTensor * aMe = NewTensor(order, dimSize);
XTensor bUser; XTensor bUser;
/* initialize variables */ /* initialize variables */
a->SetData(aData, unitNum); a->SetData(aData, unitNum);
aMe->SetData(aData, unitNum); aMe->SetData(aData, unitNum);
/* call Absolute function */ /* call Absolute function */
_Absolute(a, b); _Absolute(a, b);
_AbsoluteMe(aMe); _AbsoluteMe(aMe);
bUser = Absolute(*a); bUser = Absolute(*a);
/* check results */ /* check results */
cpuTest = b->CheckData(answer, unitNum, 1e-4F) && aMe->CheckData(answer, unitNum, 1e-4F) && bUser.CheckData(answer, unitNum, 1e-4F); cpuTest = b->CheckData(answer, unitNum, 1e-4F) && aMe->CheckData(answer, unitNum, 1e-4F) && bUser.CheckData(answer, unitNum, 1e-4F);
#ifdef USE_CUDA #ifdef USE_CUDA
/* GPU test */ /* GPU test */
bool gpuTest = true; bool gpuTest = true;
/* create tensor */ /* create tensor */
XTensor * aGPU = NewTensor(order, dimSize, X_FLOAT, 1.0F, 0); XTensor * aGPU = NewTensor(order, dimSize, X_FLOAT, 1.0F, 0);
XTensor * bGPU = NewTensor(order, dimSize, X_FLOAT, 1.0F, 0); XTensor * bGPU = NewTensor(order, dimSize, X_FLOAT, 1.0F, 0);
XTensor * aMeGPU = NewTensor(order, dimSize, X_FLOAT, 1.0F, 0); XTensor * aMeGPU = NewTensor(order, dimSize, X_FLOAT, 1.0F, 0);
XTensor bUserGPU; XTensor bUserGPU;
/* Initialize variables */ /* Initialize variables */
aGPU->SetData(aData, unitNum); aGPU->SetData(aData, unitNum);
aMeGPU->SetData(aData, unitNum); aMeGPU->SetData(aData, unitNum);
/* call Absolute function */ /* call Absolute function */
_Absolute(aGPU, bGPU); _Absolute(aGPU, bGPU);
_AbsoluteMe(aMeGPU); _AbsoluteMe(aMeGPU);
bUserGPU = Absolute(*aGPU); bUserGPU = Absolute(*aGPU);
/* check results */ /* check results */
gpuTest = bGPU->CheckData(answer, unitNum, 1e-4F) && aMeGPU->CheckData(answer, unitNum, 1e-4F) && bUserGPU.CheckData(answer, unitNum, 1e-4F); gpuTest = bGPU->CheckData(answer, unitNum, 1e-4F) && aMeGPU->CheckData(answer, unitNum, 1e-4F) && bUserGPU.CheckData(answer, unitNum, 1e-4F);
/* destroy variables */ /* destroy variables */
delete a; delete a;
delete b; delete b;
delete aMe; delete aMe;
delete aGPU; delete aGPU;
delete bGPU; delete bGPU;
delete aMeGPU; delete aMeGPU;
delete[] dimSize; delete[] dimSize;
return cpuTest && gpuTest; return cpuTest && gpuTest;
#else #else
/* destroy variables */ /* destroy variables */
delete a; delete a;
delete b; delete b;
delete aMe; delete aMe;
delete[] dimSize; delete[] dimSize;
return cpuTest; return cpuTest;
#endif // USE_CUDA #endif // USE_CUDA
} }
...@@ -119,33 +119,33 @@ TODO!! ...@@ -119,33 +119,33 @@ TODO!!
/* test for Absolute Function */ /* test for Absolute Function */
bool TestAbsolute() bool TestAbsolute()
{ {
XPRINT(0, stdout, "[TEST Absolute] set every entry to its absolute value \n"); XPRINT(0, stdout, "[TEST Absolute] set every entry to its absolute value \n");
bool returnFlag = true, caseFlag = true; bool returnFlag = true, caseFlag = true;
/* case 1 test */ /* case 1 test */
caseFlag = TestAbsolute1(); caseFlag = TestAbsolute1();
if (!caseFlag) { if (!caseFlag) {
returnFlag = false; returnFlag = false;
XPRINT(0, stdout, ">> case 1 failed!\n"); XPRINT(0, stdout, ">> case 1 failed!\n");
} }
else else
XPRINT(0, stdout, ">> case 1 passed!\n"); XPRINT(0, stdout, ">> case 1 passed!\n");
/* other cases test */ /* other cases test */
/* /*
TODO!! TODO!!
*/ */
if (returnFlag) { if (returnFlag) {
XPRINT(0, stdout, ">> All Passed!\n"); XPRINT(0, stdout, ">> All Passed!\n");
} }
else else
XPRINT(0, stdout, ">> Failed!\n"); XPRINT(0, stdout, ">> Failed!\n");
XPRINT(0, stdout, "\n"); XPRINT(0, stdout, "\n");
return returnFlag; return returnFlag;
} }
} // namespace nts(NiuTrans.Tensor) } // namespace nts(NiuTrans.Tensor)
...@@ -31,88 +31,88 @@ Set every entry to its clip value. ...@@ -31,88 +31,88 @@ Set every entry to its clip value.
*/ */
bool TestClip1() bool TestClip1()
{ {
/* a tensor of size (3, 2) */ /* a tensor of size (3, 2) */
int aOrder = 2; int aOrder = 2;
int * aDimSize = new int[aOrder]; int * aDimSize = new int[aOrder];
aDimSize[0] = 3; aDimSize[0] = 3;
aDimSize[1] = 2; aDimSize[1] = 2;
int aUnitNum = 1; int aUnitNum = 1;
for (int i = 0; i < aOrder; i++) for (int i = 0; i < aOrder; i++)
aUnitNum *= aDimSize[i]; aUnitNum *= aDimSize[i];
DTYPE aData[3][2] = { {1.0F, -2.0F}, DTYPE aData[3][2] = { {1.0F, -2.0F},
{0.0F, 4.0F}, {0.0F, 4.0F},
{5.0F, -6.0F} }; {5.0F, -6.0F} };
DTYPE answer[3][2] = { {1.0F, -1.0F}, DTYPE answer[3][2] = { {1.0F, -1.0F},
{0.0F, 1.0F}, {0.0F, 1.0F},
{1.0F, -1.0F} }; {1.0F, -1.0F} };
/* CPU test */ /* CPU test */
bool cpuTest = true; bool cpuTest = true;
/* create tensors */ /* create tensors */
XTensor * a = NewTensor(aOrder, aDimSize); XTensor * a = NewTensor(aOrder, aDimSize);
XTensor * b = NewTensor(aOrder, aDimSize); XTensor * b = NewTensor(aOrder, aDimSize);
XTensor * aMe = NewTensor(aOrder, aDimSize); XTensor * aMe = NewTensor(aOrder, aDimSize);
XTensor bUser; XTensor bUser;
/* initialize variables */ /* initialize variables */
a->SetData(aData, aUnitNum); a->SetData(aData, aUnitNum);
aMe->SetData(aData, aUnitNum); aMe->SetData(aData, aUnitNum);
/* call Clip function */ /* call Clip function */
_Clip(a, b, -1.0, 1.0); _Clip(a, b, -1.0, 1.0);
_ClipMe(aMe, -1.0, 1.0); _ClipMe(aMe, -1.0, 1.0);
bUser = Clip(*a, -1.0, 1.0); bUser = Clip(*a, -1.0, 1.0);
/* check results */ /* check results */
cpuTest = b->CheckData(answer, aUnitNum, 1e-4F) && cpuTest = b->CheckData(answer, aUnitNum, 1e-4F) &&
aMe->CheckData(answer, aUnitNum, 1e-4F) && aMe->CheckData(answer, aUnitNum, 1e-4F) &&
bUser.CheckData(answer, aUnitNum, 1e-4F); bUser.CheckData(answer, aUnitNum, 1e-4F);
#ifdef USE_CUDA #ifdef USE_CUDA
/* GPU test */ /* GPU test */
bool gpuTest = true; bool gpuTest = true;
/* create tensor */ /* create tensor */
XTensor * aGPU = NewTensor(aOrder, aDimSize, X_FLOAT, 1.0F, 0); XTensor * aGPU = NewTensor(aOrder, aDimSize, X_FLOAT, 1.0F, 0);
XTensor * bGPU = NewTensor(aOrder, aDimSize, X_FLOAT, 1.0F, 0); XTensor * bGPU = NewTensor(aOrder, aDimSize, X_FLOAT, 1.0F, 0);
XTensor * aMeGPU = NewTensor(aOrder, aDimSize, X_FLOAT, 1.0F, 0); XTensor * aMeGPU = NewTensor(aOrder, aDimSize, X_FLOAT, 1.0F, 0);
XTensor bUserGPU; XTensor bUserGPU;
/* Initialize variables */ /* Initialize variables */
aGPU->SetData(aData, aUnitNum); aGPU->SetData(aData, aUnitNum);
aMeGPU->SetData(aData, aUnitNum); aMeGPU->SetData(aData, aUnitNum);
/* call Clip function */ /* call Clip function */
_Clip(aGPU, bGPU, -1.0, 1.0); _Clip(aGPU, bGPU, -1.0, 1.0);
_ClipMe(aMeGPU, -1.0, 1.0); _ClipMe(aMeGPU, -1.0, 1.0);
bUserGPU = Clip(*aGPU, -1.0, 1.0); bUserGPU = Clip(*aGPU, -1.0, 1.0);
/* check results */ /* check results */
gpuTest = bGPU->CheckData(answer, aUnitNum, 1e-4F) && gpuTest = bGPU->CheckData(answer, aUnitNum, 1e-4F) &&
aMeGPU->CheckData(answer, aUnitNum, 1e-4F) && aMeGPU->CheckData(answer, aUnitNum, 1e-4F) &&
bUserGPU.CheckData(answer, aUnitNum, 1e-4F); bUserGPU.CheckData(answer, aUnitNum, 1e-4F);
/* destroy variables */ /* destroy variables */
delete a; delete a;
delete b; delete b;
delete aMe; delete aMe;
delete aGPU; delete aGPU;
delete bGPU; delete bGPU;
delete aMeGPU; delete aMeGPU;
delete[] aDimSize; delete[] aDimSize;
return cpuTest && gpuTest; return cpuTest && gpuTest;
#else #else
/* destroy variables */ /* destroy variables */
delete a; delete a;
delete b; delete b;
delete aMe; delete aMe;
delete[] aDimSize; delete[] aDimSize;
return cpuTest; return cpuTest;
#endif // USE_CUDA #endif // USE_CUDA
} }
...@@ -124,33 +124,33 @@ TODO!! ...@@ -124,33 +124,33 @@ TODO!!
/* test for Clip Function */ /* test for Clip Function */
bool TestClip() bool TestClip()
{ {
XPRINT(0, stdout, "[TEST Clip] set every entry to its clip value \n"); XPRINT(0, stdout, "[TEST Clip] set every entry to its clip value \n");
bool returnFlag = true, caseFlag = true; bool returnFlag = true, caseFlag = true;
/* case 1 test */ /* case 1 test */
caseFlag = TestClip1(); caseFlag = TestClip1();
if (!caseFlag) { if (!caseFlag) {
returnFlag = false; returnFlag = false;
XPRINT(0, stdout, ">> case 1 failed!\n"); XPRINT(0, stdout, ">> case 1 failed!\n");
} }
else else
XPRINT(0, stdout, ">> case 1 passed!\n"); XPRINT(0, stdout, ">> case 1 passed!\n");
/* other cases test */ /* other cases test */
/* /*
TODO!! TODO!!
*/ */
if (returnFlag) { if (returnFlag) {
XPRINT(0, stdout, ">> All Passed!\n"); XPRINT(0, stdout, ">> All Passed!\n");
} }
else else
XPRINT(0, stdout, ">> Failed!\n"); XPRINT(0, stdout, ">> Failed!\n");
XPRINT(0, stdout, "\n"); XPRINT(0, stdout, "\n");
return returnFlag; return returnFlag;
} }
} // namespace nts(NiuTrans.Tensor) } // namespace nts(NiuTrans.Tensor)
...@@ -31,88 +31,88 @@ Comapre whether every entry is equal to the specified value. ...@@ -31,88 +31,88 @@ Comapre whether every entry is equal to the specified value.
*/ */
bool TestCompare1() bool TestCompare1()
{ {
/* a tensor of size (3, 2) */ /* a tensor of size (3, 2) */
int aOrder = 2; int aOrder = 2;
int * aDimSize = new int[aOrder]; int * aDimSize = new int[aOrder];
aDimSize[0] = 3; aDimSize[0] = 3;
aDimSize[1] = 2; aDimSize[1] = 2;
int aUnitNum = 1; int aUnitNum = 1;
for (int i = 0; i < aOrder; i++) for (int i = 0; i < aOrder; i++)
aUnitNum *= aDimSize[i]; aUnitNum *= aDimSize[i];
DTYPE aData[3][2] = { {1.0F, -2.0F}, DTYPE aData[3][2] = { {1.0F, -2.0F},
{0.0F, 4.0F}, {0.0F, 4.0F},
{5.0F, 1.0F} }; {5.0F, 1.0F} };
DTYPE answer[3][2] = { {1.0F, 0.0F}, DTYPE answer[3][2] = { {1.0F, 0.0F},
{0.0F, 0.0F}, {0.0F, 0.0F},
{0.0F, 1.0F} }; {0.0F, 1.0F} };
/* CPU test */ /* CPU test */
bool cpuTest = true; bool cpuTest = true;
/* create tensors */ /* create tensors */
XTensor * a = NewTensor(aOrder, aDimSize); XTensor * a = NewTensor(aOrder, aDimSize);
XTensor * b = NewTensor(aOrder, aDimSize); XTensor * b = NewTensor(aOrder, aDimSize);
XTensor * aMe = NewTensor(aOrder, aDimSize); XTensor * aMe = NewTensor(aOrder, aDimSize);
XTensor bUser; XTensor bUser;
/* initialize variables */ /* initialize variables */
a->SetData(aData, aUnitNum); a->SetData(aData, aUnitNum);
aMe->SetData(aData, aUnitNum); aMe->SetData(aData, aUnitNum);
/* call Equal function */ /* call Equal function */
_Equal(a, b, 1.0); _Equal(a, b, 1.0);
_EqualMe(aMe, 1.0); _EqualMe(aMe, 1.0);
bUser = Equal(*a, 1.0); bUser = Equal(*a, 1.0);
/* check results */ /* check results */
cpuTest = b->CheckData(answer, aUnitNum, 1e-4F) && cpuTest = b->CheckData(answer, aUnitNum, 1e-4F) &&
aMe->CheckData(answer, aUnitNum, 1e-4F) && aMe->CheckData(answer, aUnitNum, 1e-4F) &&
bUser.CheckData(answer, aUnitNum, 1e-4F); bUser.CheckData(answer, aUnitNum, 1e-4F);
#ifdef USE_CUDA #ifdef USE_CUDA
/* GPU test */ /* GPU test */
bool gpuTest = true; bool gpuTest = true;
/* create tensor */ /* create tensor */
XTensor * aGPU = NewTensor(aOrder, aDimSize, X_FLOAT, 1.0F, 0); XTensor * aGPU = NewTensor(aOrder, aDimSize, X_FLOAT, 1.0F, 0);
XTensor * bGPU = NewTensor(aOrder, aDimSize, X_FLOAT, 1.0F, 0); XTensor * bGPU = NewTensor(aOrder, aDimSize, X_FLOAT, 1.0F, 0);
XTensor * aMeGPU = NewTensor(aOrder, aDimSize, X_FLOAT, 1.0F, 0); XTensor * aMeGPU = NewTensor(aOrder, aDimSize, X_FLOAT, 1.0F, 0);
XTensor bUserGPU; XTensor bUserGPU;
/* Initialize variables */ /* Initialize variables */
aGPU->SetData(aData, aUnitNum); aGPU->SetData(aData, aUnitNum);
aMeGPU->SetData(aData, aUnitNum); aMeGPU->SetData(aData, aUnitNum);
/* call Equal function */ /* call Equal function */
_Equal(aGPU, bGPU, 1.0); _Equal(aGPU, bGPU, 1.0);
_EqualMe(aMeGPU, 1.0); _EqualMe(aMeGPU, 1.0);
bUserGPU = Equal(*aGPU, 1.0); bUserGPU = Equal(*aGPU, 1.0);
/* check results */ /* check results */
gpuTest = bGPU->CheckData(answer, aUnitNum, 1e-4F) && gpuTest = bGPU->CheckData(answer, aUnitNum, 1e-4F) &&
aMeGPU->CheckData(answer, aUnitNum, 1e-4F) && aMeGPU->CheckData(answer, aUnitNum, 1e-4F) &&
bUserGPU.CheckData(answer, aUnitNum, 1e-4F); bUserGPU.CheckData(answer, aUnitNum, 1e-4F);
/* destroy variables */ /* destroy variables */
delete a; delete a;
delete b; delete b;
delete aMe; delete aMe;
delete aGPU; delete aGPU;
delete bGPU; delete bGPU;
delete aMeGPU; delete aMeGPU;
delete[] aDimSize; delete[] aDimSize;
return cpuTest && gpuTest; return cpuTest && gpuTest;
#else #else
/* destroy variables */ /* destroy variables */
delete a; delete a;
delete b; delete b;
delete aMe; delete aMe;
delete[] aDimSize; delete[] aDimSize;
return cpuTest; return cpuTest;
#endif // USE_CUDA #endif // USE_CUDA
} }
...@@ -124,33 +124,33 @@ TODO!! ...@@ -124,33 +124,33 @@ TODO!!
/* test for Compare Function */ /* test for Compare Function */
bool TestCompare() bool TestCompare()
{ {
XPRINT(0, stdout, "[TEST Compare] compare every entry with specified value \n"); XPRINT(0, stdout, "[TEST Compare] compare every entry with specified value \n");
bool returnFlag = true, caseFlag = true; bool returnFlag = true, caseFlag = true;
/* case 1 test */ /* case 1 test */
caseFlag = TestCompare1(); caseFlag = TestCompare1();
if (!caseFlag) { if (!caseFlag) {
returnFlag = false; returnFlag = false;
XPRINT(0, stdout, ">> case 1 failed!\n"); XPRINT(0, stdout, ">> case 1 failed!\n");
} }
else else
XPRINT(0, stdout, ">> case 1 passed!\n"); XPRINT(0, stdout, ">> case 1 passed!\n");
/* other cases test */ /* other cases test */
/* /*
TODO!! TODO!!
*/ */
if (returnFlag) { if (returnFlag) {
XPRINT(0, stdout, ">> All Passed!\n"); XPRINT(0, stdout, ">> All Passed!\n");
} }
else else
XPRINT(0, stdout, ">> Failed!\n"); XPRINT(0, stdout, ">> Failed!\n");
XPRINT(0, stdout, "\n"); XPRINT(0, stdout, "\n");
return returnFlag; return returnFlag;
} }
} // namespace nts(NiuTrans.Tensor) } // namespace nts(NiuTrans.Tensor)
...@@ -29,7 +29,7 @@ In this case, 2 * (2, 1) -> (2, 2), dim=1. ...@@ -29,7 +29,7 @@ In this case, 2 * (2, 1) -> (2, 2), dim=1.
*/ */
bool TestConcatenate1() bool TestConcatenate1()
{ {
/* create list */ /* create list */
TensorList * sList = new TensorList(); TensorList * sList = new TensorList();
/* a source tensor of size (2, 1) */ /* a source tensor of size (2, 1) */
...@@ -83,7 +83,7 @@ bool TestConcatenate1() ...@@ -83,7 +83,7 @@ bool TestConcatenate1()
s2->SetData(sData2, sUnitNum2); s2->SetData(sData2, sUnitNum2);
t->SetZeroAll(); t->SetZeroAll();
/* add tensors to list */ /* add tensors to list */
sList->Add(s1); sList->Add(s1);
sList->Add(s2); sList->Add(s2);
...@@ -99,29 +99,29 @@ bool TestConcatenate1() ...@@ -99,29 +99,29 @@ bool TestConcatenate1()
bool gpuTest = true; bool gpuTest = true;
/* create tensor */ /* create tensor */
XTensor * sGPU1 = NewTensor(sOrder1, sDimSize1, X_FLOAT, 1.0F, 0); XTensor * sGPU1 = NewTensor(sOrder1, sDimSize1, X_FLOAT, 1.0F, 0);
XTensor * sGPU2 = NewTensor(sOrder2, sDimSize2, X_FLOAT, 1.0F, 0); XTensor * sGPU2 = NewTensor(sOrder2, sDimSize2, X_FLOAT, 1.0F, 0);
XTensor * tGPU = NewTensor(tOrder, tDimSize, X_FLOAT, 1.0F, 0); XTensor * tGPU = NewTensor(tOrder, tDimSize, X_FLOAT, 1.0F, 0);
XTensor tUserGPU; XTensor tUserGPU;
/* Initialize variables */ /* Initialize variables */
sGPU1->SetData(sData1, sUnitNum1); sGPU1->SetData(sData1, sUnitNum1);
sGPU2->SetData(sData2, sUnitNum2); sGPU2->SetData(sData2, sUnitNum2);
tGPU->SetZeroAll(); tGPU->SetZeroAll();
/* clear list */ /* clear list */
sList->Clear(); sList->Clear();
/* add tensors to list*/ /* add tensors to list*/
sList->Add(sGPU1); sList->Add(sGPU1);
sList->Add(sGPU2); sList->Add(sGPU2);
/* call Concatenate function */ /* call Concatenate function */
_Concatenate(sList, tGPU, 1); _Concatenate(sList, tGPU, 1);
tUserGPU = Concatenate(*sList, 1); tUserGPU = Concatenate(*sList, 1);
/* check results */ /* check results */
gpuTest = tGPU->CheckData(answer, tUnitNum) && tUserGPU.CheckData(answer, tUnitNum); gpuTest = tGPU->CheckData(answer, tUnitNum) && tUserGPU.CheckData(answer, tUnitNum);
/* destroy variables */ /* destroy variables */
delete sList; delete sList;
...@@ -135,7 +135,7 @@ bool TestConcatenate1() ...@@ -135,7 +135,7 @@ bool TestConcatenate1()
delete[] sDimSize2; delete[] sDimSize2;
delete[] tDimSize; delete[] tDimSize;
return cpuTest && gpuTest; return cpuTest && gpuTest;
#else #else
/* destroy variables */ /* destroy variables */
delete sList; delete sList;
...@@ -156,7 +156,7 @@ In this case, 2 * (2, 1) -> (4, 1), dim=0. ...@@ -156,7 +156,7 @@ In this case, 2 * (2, 1) -> (4, 1), dim=0.
*/ */
bool TestConcatenate2() bool TestConcatenate2()
{ {
/* create list */ /* create list */
TensorList * sList = new TensorList(); TensorList * sList = new TensorList();
/* a source tensor of size (2, 1) */ /* a source tensor of size (2, 1) */
...@@ -212,7 +212,7 @@ bool TestConcatenate2() ...@@ -212,7 +212,7 @@ bool TestConcatenate2()
s2->SetData(sData2, sUnitNum2); s2->SetData(sData2, sUnitNum2);
t->SetZeroAll(); t->SetZeroAll();
/* add tensors to list */ /* add tensors to list */
sList->Add(s1); sList->Add(s1);
sList->Add(s2); sList->Add(s2);
...@@ -224,35 +224,35 @@ bool TestConcatenate2() ...@@ -224,35 +224,35 @@ bool TestConcatenate2()
cpuTest = t->CheckData(answer, tUnitNum) && tUser.CheckData(answer, tUnitNum); cpuTest = t->CheckData(answer, tUnitNum) && tUser.CheckData(answer, tUnitNum);
#ifdef USE_CUDA #ifdef USE_CUDA
/* GPU test */ /* GPU test */
bool gpuTest = true; bool gpuTest = true;
/* create tensor */ /* create tensor */
XTensor * sGPU1 = NewTensor(sOrder1, sDimSize1, X_FLOAT, 1.0F, 0); XTensor * sGPU1 = NewTensor(sOrder1, sDimSize1, X_FLOAT, 1.0F, 0);
XTensor * sGPU2 = NewTensor(sOrder2, sDimSize2, X_FLOAT, 1.0F, 0); XTensor * sGPU2 = NewTensor(sOrder2, sDimSize2, X_FLOAT, 1.0F, 0);
XTensor * tGPU = NewTensor(tOrder, tDimSize, X_FLOAT, 1.0F, 0); XTensor * tGPU = NewTensor(tOrder, tDimSize, X_FLOAT, 1.0F, 0);
XTensor tUserGPU; XTensor tUserGPU;
/* Initialize variables */ /* Initialize variables */
sGPU1->SetData(sData1, sUnitNum1); sGPU1->SetData(sData1, sUnitNum1);
sGPU2->SetData(sData2, sUnitNum2); sGPU2->SetData(sData2, sUnitNum2);
tGPU->SetZeroAll(); tGPU->SetZeroAll();
/* clear list */ /* clear list */
sList->Clear(); sList->Clear();
/* add tensors to list*/ /* add tensors to list*/
sList->Add(sGPU1); sList->Add(sGPU1);
sList->Add(sGPU2); sList->Add(sGPU2);
/* call Concatenate function */ /* call Concatenate function */
_Concatenate(sList, tGPU, 0); _Concatenate(sList, tGPU, 0);
tUserGPU = Concatenate(*sList, 0); tUserGPU = Concatenate(*sList, 0);
/* check results */ /* check results */
gpuTest = tGPU->CheckData(answer, tUnitNum) && tUserGPU.CheckData(answer, tUnitNum); gpuTest = tGPU->CheckData(answer, tUnitNum) && tUserGPU.CheckData(answer, tUnitNum);
/* destroy variables */ /* destroy variables */
delete sList; delete sList;
delete s1; delete s1;
delete s2; delete s2;
...@@ -264,7 +264,7 @@ bool TestConcatenate2() ...@@ -264,7 +264,7 @@ bool TestConcatenate2()
delete[] sDimSize2; delete[] sDimSize2;
delete[] tDimSize; delete[] tDimSize;
return cpuTest && gpuTest; return cpuTest && gpuTest;
#else #else
/* destroy variables */ /* destroy variables */
delete sList; delete sList;
...@@ -285,7 +285,7 @@ In this case, (2, 1) + (2, 2) -> (2, 3), dim=1. ...@@ -285,7 +285,7 @@ In this case, (2, 1) + (2, 2) -> (2, 3), dim=1.
*/ */
bool TestConcatenate3() bool TestConcatenate3()
{ {
/* create list */ /* create list */
TensorList * sList = new TensorList(); TensorList * sList = new TensorList();
/* a source tensor of size (2, 1) */ /* a source tensor of size (2, 1) */
...@@ -339,7 +339,7 @@ bool TestConcatenate3() ...@@ -339,7 +339,7 @@ bool TestConcatenate3()
s2->SetData(sData2, sUnitNum2); s2->SetData(sData2, sUnitNum2);
t->SetZeroAll(); t->SetZeroAll();
/* add tensors to list */ /* add tensors to list */
sList->Add(s1); sList->Add(s1);
sList->Add(s2); sList->Add(s2);
...@@ -351,35 +351,35 @@ bool TestConcatenate3() ...@@ -351,35 +351,35 @@ bool TestConcatenate3()
cpuTest = t->CheckData(answer, tUnitNum) && tUser.CheckData(answer, tUnitNum); cpuTest = t->CheckData(answer, tUnitNum) && tUser.CheckData(answer, tUnitNum);
#ifdef USE_CUDA #ifdef USE_CUDA
/* GPU test */ /* GPU test */
bool gpuTest = true; bool gpuTest = true;
/* create tensor */ /* create tensor */
XTensor * sGPU1 = NewTensor(sOrder1, sDimSize1, X_FLOAT, 1.0F, 0); XTensor * sGPU1 = NewTensor(sOrder1, sDimSize1, X_FLOAT, 1.0F, 0);
XTensor * sGPU2 = NewTensor(sOrder2, sDimSize2, X_FLOAT, 1.0F, 0); XTensor * sGPU2 = NewTensor(sOrder2, sDimSize2, X_FLOAT, 1.0F, 0);
XTensor * tGPU = NewTensor(tOrder, tDimSize, X_FLOAT, 1.0F, 0); XTensor * tGPU = NewTensor(tOrder, tDimSize, X_FLOAT, 1.0F, 0);
XTensor tUserGPU; XTensor tUserGPU;
/* Initialize variables */ /* Initialize variables */
sGPU1->SetData(sData1, sUnitNum1); sGPU1->SetData(sData1, sUnitNum1);
sGPU2->SetData(sData2, sUnitNum2); sGPU2->SetData(sData2, sUnitNum2);
tGPU->SetZeroAll(); tGPU->SetZeroAll();
/* clear list */ /* clear list */
sList->Clear(); sList->Clear();
/* add tensors to list*/ /* add tensors to list*/
sList->Add(sGPU1); sList->Add(sGPU1);
sList->Add(sGPU2); sList->Add(sGPU2);
/* call Concatenate function */ /* call Concatenate function */
_Concatenate(sList, tGPU, 1); _Concatenate(sList, tGPU, 1);
tUserGPU = Concatenate(*sList, 1); tUserGPU = Concatenate(*sList, 1);
/* check results */ /* check results */
gpuTest = tGPU->CheckData(answer, tUnitNum) && tUserGPU.CheckData(answer, tUnitNum); gpuTest = tGPU->CheckData(answer, tUnitNum) && tUserGPU.CheckData(answer, tUnitNum);
/* destroy variables */ /* destroy variables */
delete sList; delete sList;
delete s1; delete s1;
delete s2; delete s2;
...@@ -391,7 +391,7 @@ bool TestConcatenate3() ...@@ -391,7 +391,7 @@ bool TestConcatenate3()
delete[] sDimSize2; delete[] sDimSize2;
delete[] tDimSize; delete[] tDimSize;
return cpuTest && gpuTest; return cpuTest && gpuTest;
#else #else
/* destroy variables */ /* destroy variables */
delete sList; delete sList;
...@@ -402,7 +402,7 @@ bool TestConcatenate3() ...@@ -402,7 +402,7 @@ bool TestConcatenate3()
delete[] sDimSize2; delete[] sDimSize2;
delete[] tDimSize; delete[] tDimSize;
return cpuTest; return cpuTest;
#endif // USE_CUDA #endif // USE_CUDA
} }
...@@ -471,28 +471,28 @@ bool TestConcatenate4() ...@@ -471,28 +471,28 @@ bool TestConcatenate4()
cpuTest = t->CheckData(answer, tUnitNum) && tUser.CheckData(answer, tUnitNum); cpuTest = t->CheckData(answer, tUnitNum) && tUser.CheckData(answer, tUnitNum);
#ifdef USE_CUDA #ifdef USE_CUDA
/* GPU test */ /* GPU test */
bool gpuTest = true; bool gpuTest = true;
/* create tensor */ /* create tensor */
XTensor * sGPU1 = NewTensor(sOrder1, sDimSize1, X_FLOAT, 1.0F, 0); XTensor * sGPU1 = NewTensor(sOrder1, sDimSize1, X_FLOAT, 1.0F, 0);
XTensor * sGPU2 = NewTensor(sOrder2, sDimSize2, X_FLOAT, 1.0F, 0); XTensor * sGPU2 = NewTensor(sOrder2, sDimSize2, X_FLOAT, 1.0F, 0);
XTensor * tGPU = NewTensor(tOrder, tDimSize, X_FLOAT, 1.0F, 0); XTensor * tGPU = NewTensor(tOrder, tDimSize, X_FLOAT, 1.0F, 0);
XTensor tUserGPU; XTensor tUserGPU;
/* Initialize variables */ /* Initialize variables */
sGPU1->SetData(sData1, sUnitNum1); sGPU1->SetData(sData1, sUnitNum1);
sGPU2->SetData(sData2, sUnitNum2); sGPU2->SetData(sData2, sUnitNum2);
tGPU->SetZeroAll(); tGPU->SetZeroAll();
/* call Concatenate function */ /* call Concatenate function */
_Concatenate(sGPU1, sGPU2, tGPU, 1); _Concatenate(sGPU1, sGPU2, tGPU, 1);
tUserGPU = Concatenate(*sGPU1, *sGPU2, 1); tUserGPU = Concatenate(*sGPU1, *sGPU2, 1);
/* check results */ /* check results */
gpuTest = tGPU->CheckData(answer, tUnitNum) && tUserGPU.CheckData(answer, tUnitNum); gpuTest = tGPU->CheckData(answer, tUnitNum) && tUserGPU.CheckData(answer, tUnitNum);
/* destroy variables */ /* destroy variables */
delete s1; delete s1;
delete s2; delete s2;
delete t; delete t;
...@@ -503,7 +503,7 @@ bool TestConcatenate4() ...@@ -503,7 +503,7 @@ bool TestConcatenate4()
//delete[] sDimSize2; //delete[] sDimSize2;
//delete[] tDimSize; //delete[] tDimSize;
return cpuTest && gpuTest; return cpuTest && gpuTest;
#else #else
/* destroy variables */ /* destroy variables */
delete s1; delete s1;
...@@ -513,7 +513,7 @@ bool TestConcatenate4() ...@@ -513,7 +513,7 @@ bool TestConcatenate4()
delete[] sDimSize2; delete[] sDimSize2;
delete[] tDimSize; delete[] tDimSize;
return cpuTest; return cpuTest;
#endif // USE_CUDA #endif // USE_CUDA
} }
......
...@@ -30,7 +30,7 @@ In this case, 2 * (2, 1) -> (2, 2), dim=1. ...@@ -30,7 +30,7 @@ In this case, 2 * (2, 1) -> (2, 2), dim=1.
*/ */
bool TestConcatenateSolely1() bool TestConcatenateSolely1()
{ {
/* create list */ /* create list */
TensorList * sList = new TensorList(); TensorList * sList = new TensorList();
/* a source tensor of size (2, 1) */ /* a source tensor of size (2, 1) */
...@@ -83,44 +83,44 @@ bool TestConcatenateSolely1() ...@@ -83,44 +83,44 @@ bool TestConcatenateSolely1()
s2->SetData(sData2, sUnitNum2); s2->SetData(sData2, sUnitNum2);
t->SetZeroAll(); t->SetZeroAll();
/* add tensors to list */ /* add tensors to list */
sList->Add(s1); sList->Add(s1);
sList->Add(s2); sList->Add(s2);
/* call ConcatenateSolely function */ /* call ConcatenateSolely function */
_ConcatenateSolely(sList, t, 1); _ConcatenateSolely(sList, t, 1);
/* check results */ /* check results */
cpuTest = t->CheckData(answer, tUnitNum); cpuTest = t->CheckData(answer, tUnitNum);
#ifdef USE_CUDA #ifdef USE_CUDA
/* GPU test */ /* GPU test */
bool gpuTest = true; bool gpuTest = true;
/* create tensor */ /* create tensor */
XTensor * sGPU1 = NewTensor(sOrder1, sDimSize1, X_FLOAT, 1.0F, 0); XTensor * sGPU1 = NewTensor(sOrder1, sDimSize1, X_FLOAT, 1.0F, 0);
XTensor * sGPU2 = NewTensor(sOrder2, sDimSize2, X_FLOAT, 1.0F, 0); XTensor * sGPU2 = NewTensor(sOrder2, sDimSize2, X_FLOAT, 1.0F, 0);
XTensor * tGPU = NewTensor(tOrder, tDimSize, X_FLOAT, 1.0F, 0); XTensor * tGPU = NewTensor(tOrder, tDimSize, X_FLOAT, 1.0F, 0);
/* Initialize variables */ /* Initialize variables */
sGPU1->SetData(sData1, sUnitNum1); sGPU1->SetData(sData1, sUnitNum1);
sGPU2->SetData(sData2, sUnitNum2); sGPU2->SetData(sData2, sUnitNum2);
tGPU->SetZeroAll(); tGPU->SetZeroAll();
/* clear list */ /* clear list */
sList->Clear(); sList->Clear();
/* add tensors to list*/ /* add tensors to list*/
sList->Add(sGPU1); sList->Add(sGPU1);
sList->Add(sGPU2); sList->Add(sGPU2);
/* call ConcatenateSolely function */ /* call ConcatenateSolely function */
_ConcatenateSolely(sList, tGPU, 1); _ConcatenateSolely(sList, tGPU, 1);
/* check results */ /* check results */
gpuTest = tGPU->CheckData(answer, tUnitNum); gpuTest = tGPU->CheckData(answer, tUnitNum);
/* destroy variables */ /* destroy variables */
delete sList; delete sList;
delete s1; delete s1;
delete s2; delete s2;
...@@ -132,7 +132,7 @@ bool TestConcatenateSolely1() ...@@ -132,7 +132,7 @@ bool TestConcatenateSolely1()
delete[] sDimSize2; delete[] sDimSize2;
delete[] tDimSize; delete[] tDimSize;
return cpuTest && gpuTest; return cpuTest && gpuTest;
#else #else
/* destroy variables */ /* destroy variables */
delete sList; delete sList;
...@@ -143,7 +143,7 @@ bool TestConcatenateSolely1() ...@@ -143,7 +143,7 @@ bool TestConcatenateSolely1()
delete[] sDimSize2; delete[] sDimSize2;
delete[] tDimSize; delete[] tDimSize;
return cpuTest; return cpuTest;
#endif // USE_CUDA #endif // USE_CUDA
} }
...@@ -153,7 +153,7 @@ In this case, 2 * (2, 1) -> (4, 1), dim=0. ...@@ -153,7 +153,7 @@ In this case, 2 * (2, 1) -> (4, 1), dim=0.
*/ */
bool TestConcatenateSolely2() bool TestConcatenateSolely2()
{ {
/* create list */ /* create list */
TensorList * sList = new TensorList(); TensorList * sList = new TensorList();
/* a source tensor of size (2, 1) */ /* a source tensor of size (2, 1) */
...@@ -208,7 +208,7 @@ bool TestConcatenateSolely2() ...@@ -208,7 +208,7 @@ bool TestConcatenateSolely2()
s2->SetData(sData2, sUnitNum2); s2->SetData(sData2, sUnitNum2);
t->SetZeroAll(); t->SetZeroAll();
/* add tensors to list */ /* add tensors to list */
sList->Add(s1); sList->Add(s1);
sList->Add(s2); sList->Add(s2);
...@@ -219,33 +219,33 @@ bool TestConcatenateSolely2() ...@@ -219,33 +219,33 @@ bool TestConcatenateSolely2()
cpuTest = t->CheckData(answer, tUnitNum); cpuTest = t->CheckData(answer, tUnitNum);
#ifdef USE_CUDA #ifdef USE_CUDA
/* GPU test */ /* GPU test */
bool gpuTest = true; bool gpuTest = true;
/* create tensor */ /* create tensor */
XTensor * sGPU1 = NewTensor(sOrder1, sDimSize1, X_FLOAT, 1.0F, 0); XTensor * sGPU1 = NewTensor(sOrder1, sDimSize1, X_FLOAT, 1.0F, 0);
XTensor * sGPU2 = NewTensor(sOrder2, sDimSize2, X_FLOAT, 1.0F, 0); XTensor * sGPU2 = NewTensor(sOrder2, sDimSize2, X_FLOAT, 1.0F, 0);
XTensor * tGPU = NewTensor(tOrder, tDimSize, X_FLOAT, 1.0F, 0); XTensor * tGPU = NewTensor(tOrder, tDimSize, X_FLOAT, 1.0F, 0);
/* Initialize variables */ /* Initialize variables */
sGPU1->SetData(sData1, sUnitNum1); sGPU1->SetData(sData1, sUnitNum1);
sGPU2->SetData(sData2, sUnitNum2); sGPU2->SetData(sData2, sUnitNum2);
tGPU->SetZeroAll(); tGPU->SetZeroAll();
/* clear list */ /* clear list */
sList->Clear(); sList->Clear();
/* add tensors to list*/ /* add tensors to list*/
sList->Add(sGPU1); sList->Add(sGPU1);
sList->Add(sGPU2); sList->Add(sGPU2);
/* call concatenatesolely function */ /* call concatenatesolely function */
_ConcatenateSolely(sList, tGPU, 0); _ConcatenateSolely(sList, tGPU, 0);
/* check results */ /* check results */
gpuTest = tGPU->CheckData(answer, tUnitNum); gpuTest = tGPU->CheckData(answer, tUnitNum);
/* destroy variables */ /* destroy variables */
delete sList; delete sList;
delete s1; delete s1;
delete s2; delete s2;
...@@ -257,7 +257,7 @@ bool TestConcatenateSolely2() ...@@ -257,7 +257,7 @@ bool TestConcatenateSolely2()
delete[] sDimSize2; delete[] sDimSize2;
delete[] tDimSize; delete[] tDimSize;
return cpuTest && gpuTest; return cpuTest && gpuTest;
#else #else
/* destroy variables */ /* destroy variables */
delete sList; delete sList;
...@@ -268,7 +268,7 @@ bool TestConcatenateSolely2() ...@@ -268,7 +268,7 @@ bool TestConcatenateSolely2()
delete[] sDimSize2; delete[] sDimSize2;
delete[] tDimSize; delete[] tDimSize;
return cpuTest; return cpuTest;
#endif // USE_CUDA #endif // USE_CUDA
} }
...@@ -278,7 +278,7 @@ In this case, (2, 1) + (2, 2) -> (2, 3), dim=1. ...@@ -278,7 +278,7 @@ In this case, (2, 1) + (2, 2) -> (2, 3), dim=1.
*/ */
bool TestConcatenateSolely3() bool TestConcatenateSolely3()
{ {
/* create list */ /* create list */
TensorList * sList = new TensorList(); TensorList * sList = new TensorList();
/* a source tensor of size (2, 1) */ /* a source tensor of size (2, 1) */
...@@ -331,44 +331,44 @@ bool TestConcatenateSolely3() ...@@ -331,44 +331,44 @@ bool TestConcatenateSolely3()
s2->SetData(sData2, sUnitNum2); s2->SetData(sData2, sUnitNum2);
t->SetZeroAll(); t->SetZeroAll();
/* add tensors to list */ /* add tensors to list */
sList->Add(s1); sList->Add(s1);
sList->Add(s2); sList->Add(s2);
/* call ConcatenateSolely function */ /* call ConcatenateSolely function */
_ConcatenateSolely(sList, t, 1); _ConcatenateSolely(sList, t, 1);
/* check results */ /* check results */
cpuTest = t->CheckData(answer, tUnitNum); cpuTest = t->CheckData(answer, tUnitNum);
#ifdef USE_CUDA #ifdef USE_CUDA
/* GPU test */ /* GPU test */
bool gpuTest = true; bool gpuTest = true;
/* create tensor */ /* create tensor */
XTensor * sGPU1 = NewTensor(sOrder1, sDimSize1, X_FLOAT, 1.0F, 0); XTensor * sGPU1 = NewTensor(sOrder1, sDimSize1, X_FLOAT, 1.0F, 0);
XTensor * sGPU2 = NewTensor(sOrder2, sDimSize2, X_FLOAT, 1.0F, 0); XTensor * sGPU2 = NewTensor(sOrder2, sDimSize2, X_FLOAT, 1.0F, 0);
XTensor * tGPU = NewTensor(tOrder, tDimSize, X_FLOAT, 1.0F, 0); XTensor * tGPU = NewTensor(tOrder, tDimSize, X_FLOAT, 1.0F, 0);
/* Initialize variables */ /* Initialize variables */
sGPU1->SetData(sData1, sUnitNum1); sGPU1->SetData(sData1, sUnitNum1);
sGPU2->SetData(sData2, sUnitNum2); sGPU2->SetData(sData2, sUnitNum2);
tGPU->SetZeroAll(); tGPU->SetZeroAll();
/* clear list */ /* clear list */
sList->Clear(); sList->Clear();
/* add tensors to list*/ /* add tensors to list*/
sList->Add(sGPU1); sList->Add(sGPU1);
sList->Add(sGPU2); sList->Add(sGPU2);
/* call ConcatenateSolely function */ /* call ConcatenateSolely function */
_ConcatenateSolely(sList, tGPU, 1); _ConcatenateSolely(sList, tGPU, 1);
/* check results */ /* check results */
gpuTest = tGPU->CheckData(answer, tUnitNum); gpuTest = tGPU->CheckData(answer, tUnitNum);
/* destroy variables */ /* destroy variables */
delete sList; delete sList;
delete s1; delete s1;
delete s2; delete s2;
...@@ -380,7 +380,7 @@ bool TestConcatenateSolely3() ...@@ -380,7 +380,7 @@ bool TestConcatenateSolely3()
delete[] sDimSize2; delete[] sDimSize2;
delete[] tDimSize; delete[] tDimSize;
return cpuTest && gpuTest; return cpuTest && gpuTest;
#else #else
/* destroy variables */ /* destroy variables */
delete sList; delete sList;
...@@ -391,7 +391,7 @@ bool TestConcatenateSolely3() ...@@ -391,7 +391,7 @@ bool TestConcatenateSolely3()
delete[] sDimSize2; delete[] sDimSize2;
delete[] tDimSize; delete[] tDimSize;
return cpuTest; return cpuTest;
#endif // USE_CUDA #endif // USE_CUDA
} }
......
...@@ -31,72 +31,72 @@ In this case, the flaot32 data type is converted to int32 data type. ...@@ -31,72 +31,72 @@ In this case, the flaot32 data type is converted to int32 data type.
*/ */
bool TestConvertDataType1() bool TestConvertDataType1()
{ {
/* a tensor of size (3, 2) */ /* a tensor of size (3, 2) */
int aOrder = 2; int aOrder = 2;
int * aDimSize = new int[aOrder]; int * aDimSize = new int[aOrder];
aDimSize[0] = 3; aDimSize[0] = 3;
aDimSize[1] = 2; aDimSize[1] = 2;
int aUnitNum = 1; int aUnitNum = 1;
for (int i = 0; i < aOrder; i++) for (int i = 0; i < aOrder; i++)
aUnitNum *= aDimSize[i]; aUnitNum *= aDimSize[i];
DTYPE aData[3][2] = { {1.0F, 2.0F}, DTYPE aData[3][2] = { {1.0F, 2.0F},
{0.5F, 4.0F}, {0.5F, 4.0F},
{5.0F, 6.0F} }; {5.0F, 6.0F} };
int answer[3][2] = { {1, 2}, int answer[3][2] = { {1, 2},
{0, 4}, {0, 4},
{5, 6} }; {5, 6} };
/* CPU test */ /* CPU test */
bool cpuTest = true; bool cpuTest = true;
/* create tensors */ /* create tensors */
XTensor * a = NewTensor(aOrder, aDimSize); XTensor * a = NewTensor(aOrder, aDimSize);
XTensor * b = NewTensor(aOrder, aDimSize, X_INT); XTensor * b = NewTensor(aOrder, aDimSize, X_INT);
/* initialize variables */ /* initialize variables */
a->SetData(aData, aUnitNum); a->SetData(aData, aUnitNum);
b->SetZeroAll(); b->SetZeroAll();
/* call ConvertDataType function */ /* call ConvertDataType function */
_ConvertDataType(a, b); _ConvertDataType(a, b);
/* check results */ /* check results */
cpuTest = b->CheckData(answer, aUnitNum); cpuTest = b->CheckData(answer, aUnitNum);
#ifdef USE_CUDA #ifdef USE_CUDA
/* GPU test */ /* GPU test */
bool gpuTest = true; bool gpuTest = true;
/* create tensor */ /* create tensor */
XTensor * aGPU = NewTensor(aOrder, aDimSize, X_FLOAT, 1.0F, 0); XTensor * aGPU = NewTensor(aOrder, aDimSize, X_FLOAT, 1.0F, 0);
XTensor * bGPU = NewTensor(aOrder, aDimSize, X_INT, 1.0F, 0); XTensor * bGPU = NewTensor(aOrder, aDimSize, X_INT, 1.0F, 0);
/* Initialize variables */ /* Initialize variables */
aGPU->SetData(aData, aUnitNum); aGPU->SetData(aData, aUnitNum);
/* call ConvertDataType function */ /* call ConvertDataType function */
_ConvertDataType(aGPU, bGPU); _ConvertDataType(aGPU, bGPU);
/* check results */ /* check results */
gpuTest = bGPU->CheckData(answer, aUnitNum); gpuTest = bGPU->CheckData(answer, aUnitNum);
/* destroy variables */ /* destroy variables */
delete a; delete a;
delete b; delete b;
delete aGPU; delete aGPU;
delete bGPU; delete bGPU;
delete[] aDimSize; delete[] aDimSize;
return cpuTest && gpuTest; return cpuTest && gpuTest;
#else #else
/* destroy variables */ /* destroy variables */
delete a; delete a;
delete b; delete b;
delete[] aDimSize; delete[] aDimSize;
return cpuTest; return cpuTest;
#endif // USE_CUDA #endif // USE_CUDA
} }
...@@ -106,72 +106,72 @@ In this case, the int32 data type is converted to float32 data type. ...@@ -106,72 +106,72 @@ In this case, the int32 data type is converted to float32 data type.
*/ */
bool TestConvertDataType2() bool TestConvertDataType2()
{ {
/* a tensor of size (3, 2) */ /* a tensor of size (3, 2) */
int aOrder = 2; int aOrder = 2;
int * aDimSize = new int[aOrder]; int * aDimSize = new int[aOrder];
aDimSize[0] = 3; aDimSize[0] = 3;
aDimSize[1] = 2; aDimSize[1] = 2;
int aUnitNum = 1; int aUnitNum = 1;
for (int i = 0; i < aOrder; i++) for (int i = 0; i < aOrder; i++)
aUnitNum *= aDimSize[i]; aUnitNum *= aDimSize[i];
int aData[3][2] = { {1, 2}, int aData[3][2] = { {1, 2},
{0, 4}, {0, 4},
{5, 6} }; {5, 6} };
DTYPE answer[3][2] = { {1.0F, 2.0F}, DTYPE answer[3][2] = { {1.0F, 2.0F},
{0.0F, 4.0F}, {0.0F, 4.0F},
{5.0F, 6.0F} }; {5.0F, 6.0F} };
/* CPU test */ /* CPU test */
bool cpuTest = true; bool cpuTest = true;
/* create tensors */ /* create tensors */
XTensor * a = NewTensor(aOrder, aDimSize, X_INT); XTensor * a = NewTensor(aOrder, aDimSize, X_INT);
XTensor * b = NewTensor(aOrder, aDimSize); XTensor * b = NewTensor(aOrder, aDimSize);
/* initialize variables */ /* initialize variables */
a->SetData(aData, aUnitNum); a->SetData(aData, aUnitNum);
b->SetZeroAll(); b->SetZeroAll();
/* call ConvertDataType function */ /* call ConvertDataType function */
_ConvertDataType(a, b); _ConvertDataType(a, b);
/* check results */ /* check results */
cpuTest = b->CheckData(answer, aUnitNum, 1e-4F); cpuTest = b->CheckData(answer, aUnitNum, 1e-4F);
#ifdef USE_CUDA #ifdef USE_CUDA
/* GPU test */ /* GPU test */
bool gpuTest = true; bool gpuTest = true;
/* create tensor */ /* create tensor */
XTensor * aGPU = NewTensor(aOrder, aDimSize, X_INT, 1.0F, 0); XTensor * aGPU = NewTensor(aOrder, aDimSize, X_INT, 1.0F, 0);
XTensor * bGPU = NewTensor(aOrder, aDimSize, X_FLOAT, 1.0F, 0); XTensor * bGPU = NewTensor(aOrder, aDimSize, X_FLOAT, 1.0F, 0);
/* Initialize variables */ /* Initialize variables */
aGPU->SetData(aData, aUnitNum); aGPU->SetData(aData, aUnitNum);
/* call ConvertDataType function */ /* call ConvertDataType function */
_ConvertDataType(aGPU, bGPU); _ConvertDataType(aGPU, bGPU);
/* check results */ /* check results */
gpuTest = bGPU->CheckData(answer, aUnitNum, 1e-4F); gpuTest = bGPU->CheckData(answer, aUnitNum, 1e-4F);
/* destroy variables */ /* destroy variables */
delete a; delete a;
delete b; delete b;
delete aGPU; delete aGPU;
delete bGPU; delete bGPU;
delete[] aDimSize; delete[] aDimSize;
return cpuTest && gpuTest; return cpuTest && gpuTest;
#else #else
/* destroy variables */ /* destroy variables */
delete a; delete a;
delete b; delete b;
delete[] aDimSize; delete[] aDimSize;
return cpuTest; return cpuTest;
#endif // USE_CUDA #endif // USE_CUDA
} }
...@@ -298,53 +298,53 @@ TODO!! ...@@ -298,53 +298,53 @@ TODO!!
/* test for ConvertDataType Function */ /* test for ConvertDataType Function */
bool TestConvertDataType() bool TestConvertDataType()
{ {
XPRINT(0, stdout, "[TEST ConvertDataType] convert data type \n"); XPRINT(0, stdout, "[TEST ConvertDataType] convert data type \n");
bool returnFlag = true, caseFlag = true; bool returnFlag = true, caseFlag = true;
/* case 1 test */ /* case 1 test */
caseFlag = TestConvertDataType1(); caseFlag = TestConvertDataType1();
if (!caseFlag) { if (!caseFlag) {
returnFlag = false; returnFlag = false;
XPRINT(0, stdout, ">> case 1 failed!\n"); XPRINT(0, stdout, ">> case 1 failed!\n");
} }
else else
XPRINT(0, stdout, ">> case 1 passed!\n"); XPRINT(0, stdout, ">> case 1 passed!\n");
/* case 2 test */ /* case 2 test */
caseFlag = TestConvertDataType2(); caseFlag = TestConvertDataType2();
if (!caseFlag) { if (!caseFlag) {
returnFlag = false; returnFlag = false;
XPRINT(0, stdout, ">> case 2 failed!\n"); XPRINT(0, stdout, ">> case 2 failed!\n");
} }
else else
XPRINT(0, stdout, ">> case 2 passed!\n"); XPRINT(0, stdout, ">> case 2 passed!\n");
/* case 3 test */ /* case 3 test */
caseFlag = TestConvertDataType3(); caseFlag = TestConvertDataType3();
if (!caseFlag) { if (!caseFlag) {
returnFlag = false; returnFlag = false;
XPRINT(0, stdout, ">> case 3 failed!\n"); XPRINT(0, stdout, ">> case 3 failed!\n");
} }
else else
XPRINT(0, stdout, ">> case 3 passed!\n"); XPRINT(0, stdout, ">> case 3 passed!\n");
/* other cases test */ /* other cases test */
/* /*
TODO!! TODO!!
*/ */
if (returnFlag) { if (returnFlag) {
XPRINT(0, stdout, ">> All Passed!\n"); XPRINT(0, stdout, ">> All Passed!\n");
} }
else else
XPRINT(0, stdout, ">> Failed!\n"); XPRINT(0, stdout, ">> Failed!\n");
XPRINT(0, stdout, "\n"); XPRINT(0, stdout, "\n");
return returnFlag; return returnFlag;
} }
} // namespace nts(NiuTrans.Tensor) } // namespace nts(NiuTrans.Tensor)
...@@ -30,84 +30,84 @@ Set every entry to its cosine value. ...@@ -30,84 +30,84 @@ Set every entry to its cosine value.
*/ */
bool TestCos1() bool TestCos1()
{ {
/* a tensor of size (3, 2) */ /* a tensor of size (3, 2) */
int order = 2; int order = 2;
int * dimSize = new int[order]; int * dimSize = new int[order];
dimSize[0] = 3; dimSize[0] = 3;
dimSize[1] = 2; dimSize[1] = 2;
int unitNum = 1; int unitNum = 1;
for (int i = 0; i < order; i++) for (int i = 0; i < order; i++)
unitNum *= dimSize[i]; unitNum *= dimSize[i];
DTYPE aData[3][2] = { {1.0F, 2.0F}, DTYPE aData[3][2] = { {1.0F, 2.0F},
{-1.0F, -2.0F}, {-1.0F, -2.0F},
{0.0F, 0.5F} }; {0.0F, 0.5F} };
DTYPE answer[3][2] = { {0.5403F, -0.4161F}, DTYPE answer[3][2] = { {0.5403F, -0.4161F},
{0.5403F, -0.4161F}, {0.5403F, -0.4161F},
{1.0F, 0.8776F} }; {1.0F, 0.8776F} };
/* CPU test */ /* CPU test */
bool cpuTest = true; bool cpuTest = true;
/* create tensors */ /* create tensors */
XTensor * a = NewTensor(order, dimSize); XTensor * a = NewTensor(order, dimSize);
XTensor * b = NewTensor(order, dimSize); XTensor * b = NewTensor(order, dimSize);
XTensor * aMe = NewTensor(order, dimSize); XTensor * aMe = NewTensor(order, dimSize);
XTensor bUser; XTensor bUser;
/* initialize variables */ /* initialize variables */
a->SetData(aData, unitNum); a->SetData(aData, unitNum);
aMe->SetData(aData, unitNum); aMe->SetData(aData, unitNum);
/* call Cos function */ /* call Cos function */
_Cos(a, b); _Cos(a, b);
_CosMe(aMe); _CosMe(aMe);
bUser = Cos(*a); bUser = Cos(*a);
/* check results */ /* check results */
cpuTest = b->CheckData(answer, unitNum, 1e-4F) && aMe->CheckData(answer, unitNum, 1e-4F) && bUser.CheckData(answer, unitNum, 1e-4F); cpuTest = b->CheckData(answer, unitNum, 1e-4F) && aMe->CheckData(answer, unitNum, 1e-4F) && bUser.CheckData(answer, unitNum, 1e-4F);
#ifdef USE_CUDA #ifdef USE_CUDA
/* GPU test */ /* GPU test */
bool gpuTest = true; bool gpuTest = true;
/* create tensor */ /* create tensor */
XTensor * aGPU = NewTensor(order, dimSize, X_FLOAT, 1.0F, 0); XTensor * aGPU = NewTensor(order, dimSize, X_FLOAT, 1.0F, 0);
XTensor * bGPU = NewTensor(order, dimSize, X_FLOAT, 1.0F, 0); XTensor * bGPU = NewTensor(order, dimSize, X_FLOAT, 1.0F, 0);
XTensor * aMeGPU = NewTensor(order, dimSize, X_FLOAT, 1.0F, 0); XTensor * aMeGPU = NewTensor(order, dimSize, X_FLOAT, 1.0F, 0);
XTensor bUserGPU; XTensor bUserGPU;
/* Initialize variables */ /* Initialize variables */
aGPU->SetData(aData, unitNum); aGPU->SetData(aData, unitNum);
aMeGPU->SetData(aData, unitNum); aMeGPU->SetData(aData, unitNum);
/* call Cos function */ /* call Cos function */
_Cos(aGPU, bGPU); _Cos(aGPU, bGPU);
_CosMe(aMeGPU); _CosMe(aMeGPU);
bUserGPU = Cos(*aGPU); bUserGPU = Cos(*aGPU);
/* check results */ /* check results */
gpuTest = bGPU->CheckData(answer, unitNum, 1e-4F) && aMeGPU->CheckData(answer, unitNum, 1e-4F) && bUserGPU.CheckData(answer, unitNum, 1e-4F); gpuTest = bGPU->CheckData(answer, unitNum, 1e-4F) && aMeGPU->CheckData(answer, unitNum, 1e-4F) && bUserGPU.CheckData(answer, unitNum, 1e-4F);
/* destroy variables */ /* destroy variables */
delete a; delete a;
delete b; delete b;
delete aMe; delete aMe;
delete aGPU; delete aGPU;
delete bGPU; delete bGPU;
delete aMeGPU; delete aMeGPU;
delete[] dimSize; delete[] dimSize;
return cpuTest && gpuTest; return cpuTest && gpuTest;
#else #else
/* destroy variables */ /* destroy variables */
delete a; delete a;
delete b; delete b;
delete aMe; delete aMe;
delete[] dimSize; delete[] dimSize;
return cpuTest; return cpuTest;
#endif // USE_CUDA #endif // USE_CUDA
} }
...@@ -119,33 +119,33 @@ TODO!! ...@@ -119,33 +119,33 @@ TODO!!
/* test for Cos Function */ /* test for Cos Function */
bool TestCos() bool TestCos()
{ {
XPRINT(0, stdout, "[TEST Cos] set every entry to its cosine value \n"); XPRINT(0, stdout, "[TEST Cos] set every entry to its cosine value \n");
bool returnFlag = true, caseFlag = true; bool returnFlag = true, caseFlag = true;
/* case 1 test */ /* case 1 test */
caseFlag = TestCos1(); caseFlag = TestCos1();
if (!caseFlag) { if (!caseFlag) {
returnFlag = false; returnFlag = false;
XPRINT(0, stdout, ">> case 1 failed!\n"); XPRINT(0, stdout, ">> case 1 failed!\n");
} }
else else
XPRINT(0, stdout, ">> case 1 passed!\n"); XPRINT(0, stdout, ">> case 1 passed!\n");
/* other cases test */ /* other cases test */
/* /*
TODO!! TODO!!
*/ */
if (returnFlag) { if (returnFlag) {
XPRINT(0, stdout, ">> All Passed!\n"); XPRINT(0, stdout, ">> All Passed!\n");
} }
else else
XPRINT(0, stdout, ">> Failed!\n"); XPRINT(0, stdout, ">> Failed!\n");
XPRINT(0, stdout, "\n"); XPRINT(0, stdout, "\n");
return returnFlag; return returnFlag;
} }
} // namespace nts(NiuTrans.Tensor) } // namespace nts(NiuTrans.Tensor)
...@@ -30,97 +30,97 @@ In this case, (2, 2) (2, 2) -> (2, 2), leadingDim=0, alpha=0. ...@@ -30,97 +30,97 @@ In this case, (2, 2) (2, 2) -> (2, 2), leadingDim=0, alpha=0.
*/ */
bool TestDiv1() bool TestDiv1()
{ {
/* a source tensor of size (2, 2) */ /* a source tensor of size (2, 2) */
int sOrder1 = 2; int sOrder1 = 2;
int * sDimSize1 = new int[sOrder1]; int * sDimSize1 = new int[sOrder1];
sDimSize1[0] = 2; sDimSize1[0] = 2;
sDimSize1[1] = 2; sDimSize1[1] = 2;
int sUnitNum1 = 1; int sUnitNum1 = 1;
for (int i = 0; i < sOrder1; i++) for (int i = 0; i < sOrder1; i++)
sUnitNum1 *= sDimSize1[i]; sUnitNum1 *= sDimSize1[i];
/* a source tensor of size (2, 2) */ /* a source tensor of size (2, 2) */
int sOrder2 = 2; int sOrder2 = 2;
int * sDimSize2 = new int[sOrder2]; int * sDimSize2 = new int[sOrder2];
sDimSize2[0] = 2; sDimSize2[0] = 2;
sDimSize2[1] = 2; sDimSize2[1] = 2;
int sUnitNum2 = 1; int sUnitNum2 = 1;
for (int i = 0; i < sOrder2; i++) for (int i = 0; i < sOrder2; i++)
sUnitNum2 *= sDimSize2[i]; sUnitNum2 *= sDimSize2[i];
/* a target tensor of size (2, 2) */ /* a target tensor of size (2, 2) */
int tOrder = 2; int tOrder = 2;
int * tDimSize = new int[tOrder]; int * tDimSize = new int[tOrder];
tDimSize[0] = 2; tDimSize[0] = 2;
tDimSize[1] = 2; tDimSize[1] = 2;
int tUnitNum = 1; int tUnitNum = 1;
for (int i = 0; i < tOrder; i++) for (int i = 0; i < tOrder; i++)
tUnitNum *= tDimSize[i]; tUnitNum *= tDimSize[i];
DTYPE sData1[2][2] = { {0.0F, 1.0F}, DTYPE sData1[2][2] = { {0.0F, 1.0F},
{2.0F, 3.0F} }; {2.0F, 3.0F} };
DTYPE sData2[2][2] = { {1.0F, 1.0F}, DTYPE sData2[2][2] = { {1.0F, 1.0F},
{4.0F, 9.0F} }; {4.0F, 9.0F} };
DTYPE answer[2][2] = { {0.0F, 1.0F}, DTYPE answer[2][2] = { {0.0F, 1.0F},
{0.5F, 0.3333F} }; {0.5F, 0.3333F} };
/* CPU test */ /* CPU test */
bool cpuTest = true; bool cpuTest = true;
/* create tensors */ /* create tensors */
XTensor * s1 = NewTensor(sOrder1, sDimSize1); XTensor * s1 = NewTensor(sOrder1, sDimSize1);
XTensor * s2 = NewTensor(sOrder2, sDimSize2); XTensor * s2 = NewTensor(sOrder2, sDimSize2);
XTensor * t = NewTensor(tOrder, tDimSize); XTensor * t = NewTensor(tOrder, tDimSize);
XTensor * tMe = NewTensor(tOrder, tDimSize); XTensor * tMe = NewTensor(tOrder, tDimSize);
XTensor tUser; XTensor tUser;
/* initialize variables */ /* initialize variables */
s1->SetData(sData1, sUnitNum1); s1->SetData(sData1, sUnitNum1);
tMe->SetData(sData1, sUnitNum1); tMe->SetData(sData1, sUnitNum1);
s2->SetData(sData2, sUnitNum2); s2->SetData(sData2, sUnitNum2);
t->SetZeroAll(); t->SetZeroAll();
/* call Div function */ /* call Div function */
_Div(s1, s2, t, 0, 0); _Div(s1, s2, t, 0, 0);
_DivMe(tMe, s2, 0, 0); _DivMe(tMe, s2, 0, 0);
tUser = Div(*s1, *s2, 0); tUser = Div(*s1, *s2, 0);
/* check results */ /* check results */
cpuTest = t->CheckData(answer, tUnitNum, 1e-4F) && cpuTest = t->CheckData(answer, tUnitNum, 1e-4F) &&
tMe->CheckData(answer, tUnitNum, 1e-4F) && tMe->CheckData(answer, tUnitNum, 1e-4F) &&
tUser.CheckData(answer, tUnitNum, 1e-4F); tUser.CheckData(answer, tUnitNum, 1e-4F);
#ifdef USE_CUDA #ifdef USE_CUDA
/* GPU test */ /* GPU test */
bool gpuTest = true; bool gpuTest = true;
/* create tensor */ /* create tensor */
XTensor * sGPU1 = NewTensor(sOrder1, sDimSize1, X_FLOAT, 1.0F, 0); XTensor * sGPU1 = NewTensor(sOrder1, sDimSize1, X_FLOAT, 1.0F, 0);
XTensor * sGPU2 = NewTensor(sOrder2, sDimSize2, X_FLOAT, 1.0F, 0); XTensor * sGPU2 = NewTensor(sOrder2, sDimSize2, X_FLOAT, 1.0F, 0);
XTensor * tGPU = NewTensor(tOrder, tDimSize, X_FLOAT, 1.0F, 0); XTensor * tGPU = NewTensor(tOrder, tDimSize, X_FLOAT, 1.0F, 0);
XTensor * tMeGPU = NewTensor(tOrder, tDimSize, X_FLOAT, 1.0F, 0); XTensor * tMeGPU = NewTensor(tOrder, tDimSize, X_FLOAT, 1.0F, 0);
XTensor tUserGPU; XTensor tUserGPU;
/* Initialize variables */ /* Initialize variables */
sGPU1->SetData(sData1, sUnitNum1); sGPU1->SetData(sData1, sUnitNum1);
tMeGPU->SetData(sData1, sUnitNum1); tMeGPU->SetData(sData1, sUnitNum1);
sGPU2->SetData(sData2, sUnitNum2); sGPU2->SetData(sData2, sUnitNum2);
tGPU->SetZeroAll(); tGPU->SetZeroAll();
/* call Div function */ /* call Div function */
_Div(sGPU1, sGPU2, tGPU, 0, 0); _Div(sGPU1, sGPU2, tGPU, 0, 0);
_DivMe(tMeGPU, sGPU2, 0, 0); _DivMe(tMeGPU, sGPU2, 0, 0);
tUserGPU = Div(*sGPU1, *sGPU2, 0); tUserGPU = Div(*sGPU1, *sGPU2, 0);
/* check results */ /* check results */
gpuTest = tGPU->CheckData(answer, tUnitNum, 1e-4F) && gpuTest = tGPU->CheckData(answer, tUnitNum, 1e-4F) &&
tMeGPU->CheckData(answer, tUnitNum, 1e-4F) && tMeGPU->CheckData(answer, tUnitNum, 1e-4F) &&
tUserGPU.CheckData(answer, tUnitNum, 1e-4F); tUserGPU.CheckData(answer, tUnitNum, 1e-4F);
/* destroy variables */ /* destroy variables */
delete s1; delete s1;
delete s2; delete s2;
delete t; delete t;
...@@ -133,7 +133,7 @@ bool TestDiv1() ...@@ -133,7 +133,7 @@ bool TestDiv1()
delete[] sDimSize2; delete[] sDimSize2;
delete[] tDimSize; delete[] tDimSize;
return cpuTest && gpuTest; return cpuTest && gpuTest;
#else #else
/* destroy variables */ /* destroy variables */
delete s1; delete s1;
...@@ -144,7 +144,7 @@ bool TestDiv1() ...@@ -144,7 +144,7 @@ bool TestDiv1()
delete[] sDimSize2; delete[] sDimSize2;
delete[] tDimSize; delete[] tDimSize;
return cpuTest; return cpuTest;
#endif // USE_CUDA #endif // USE_CUDA
} }
...@@ -156,33 +156,33 @@ TODO!! ...@@ -156,33 +156,33 @@ TODO!!
/* test for Div Function */ /* test for Div Function */
bool TestDiv() bool TestDiv()
{ {
XPRINT(0, stdout, "[TEST Div] element-wise division of two tensors \n"); XPRINT(0, stdout, "[TEST Div] element-wise division of two tensors \n");
bool returnFlag = true, caseFlag = true; bool returnFlag = true, caseFlag = true;
/* case 1 test */ /* case 1 test */
caseFlag = TestDiv1(); caseFlag = TestDiv1();
if (!caseFlag) { if (!caseFlag) {
returnFlag = false; returnFlag = false;
XPRINT(0, stdout, ">> case 1 failed!\n"); XPRINT(0, stdout, ">> case 1 failed!\n");
} }
else else
XPRINT(0, stdout, ">> case 1 passed!\n"); XPRINT(0, stdout, ">> case 1 passed!\n");
/* other cases test */ /* other cases test */
/* /*
TODO!! TODO!!
*/ */
if (returnFlag) { if (returnFlag) {
XPRINT(0, stdout, ">> All Passed!\n"); XPRINT(0, stdout, ">> All Passed!\n");
} }
else else
XPRINT(0, stdout, ">> Failed!\n"); XPRINT(0, stdout, ">> Failed!\n");
XPRINT(0, stdout, "\n"); XPRINT(0, stdout, "\n");
return returnFlag; return returnFlag;
} }
} // namespace nts(NiuTrans.Tensor) } // namespace nts(NiuTrans.Tensor)
...@@ -127,8 +127,8 @@ bool TestDivDim1() ...@@ -127,8 +127,8 @@ bool TestDivDim1()
#else #else
/* destroy variables */ /* destroy variables */
delete a; delete a;
delete b; delete b;
delete c; delete c;
delete cMe; delete cMe;
delete[] aDimSize; delete[] aDimSize;
delete[] bDimSize; delete[] bDimSize;
...@@ -241,8 +241,8 @@ bool TestDivDim2() ...@@ -241,8 +241,8 @@ bool TestDivDim2()
#else #else
/* destroy variables */ /* destroy variables */
delete a; delete a;
delete b; delete b;
delete c; delete c;
delete cMe; delete cMe;
delete[] aDimSize; delete[] aDimSize;
delete[] bDimSize; delete[] bDimSize;
......
...@@ -30,88 +30,88 @@ Set every entry to its exponent value. ...@@ -30,88 +30,88 @@ Set every entry to its exponent value.
*/ */
bool TestExp1() bool TestExp1()
{ {
/* a tensor of size (3, 2) */ /* a tensor of size (3, 2) */
int order = 2; int order = 2;
int * dimSize = new int[order]; int * dimSize = new int[order];
dimSize[0] = 3; dimSize[0] = 3;
dimSize[1] = 2; dimSize[1] = 2;
int unitNum = 1; int unitNum = 1;
for (int i = 0; i < order; i++) for (int i = 0; i < order; i++)
unitNum *= dimSize[i]; unitNum *= dimSize[i];
DTYPE aData[3][2] = { {1.0F, 2.0F}, DTYPE aData[3][2] = { {1.0F, 2.0F},
{-1.0F, -2.0F}, {-1.0F, -2.0F},
{0.0F, 0.5F} }; {0.0F, 0.5F} };
DTYPE answer[3][2] = { {2.7183F, 7.3891F}, DTYPE answer[3][2] = { {2.7183F, 7.3891F},
{0.3679F, 0.1353F}, {0.3679F, 0.1353F},
{1.0F, 1.6487F} }; {1.0F, 1.6487F} };
/* CPU test */ /* CPU test */
bool cpuTest = true; bool cpuTest = true;
/* create tensors */ /* create tensors */
XTensor * a = NewTensor(order, dimSize); XTensor * a = NewTensor(order, dimSize);
XTensor * b = NewTensor(order, dimSize); XTensor * b = NewTensor(order, dimSize);
XTensor * aMe = NewTensor(order, dimSize); XTensor * aMe = NewTensor(order, dimSize);
XTensor bUser; XTensor bUser;
/* initialize variables */ /* initialize variables */
a->SetData(aData, unitNum); a->SetData(aData, unitNum);
aMe->SetData(aData, unitNum); aMe->SetData(aData, unitNum);
/* call Exp function */ /* call Exp function */
_Exp(a, b); _Exp(a, b);
_ExpMe(aMe); _ExpMe(aMe);
bUser = Exp(*a); bUser = Exp(*a);
/* check results */ /* check results */
cpuTest = b->CheckData(answer, unitNum, 1e-4F) && cpuTest = b->CheckData(answer, unitNum, 1e-4F) &&
aMe->CheckData(answer, unitNum, 1e-4F) && aMe->CheckData(answer, unitNum, 1e-4F) &&
bUser.CheckData(answer, unitNum, 1e-4F); bUser.CheckData(answer, unitNum, 1e-4F);
#ifdef USE_CUDA #ifdef USE_CUDA
/* GPU test */ /* GPU test */
bool gpuTest = true; bool gpuTest = true;
/* create tensor */ /* create tensor */
XTensor * aGPU = NewTensor(order, dimSize, X_FLOAT, 1.0F, 0); XTensor * aGPU = NewTensor(order, dimSize, X_FLOAT, 1.0F, 0);
XTensor * bGPU = NewTensor(order, dimSize, X_FLOAT, 1.0F, 0); XTensor * bGPU = NewTensor(order, dimSize, X_FLOAT, 1.0F, 0);
XTensor * aMeGPU = NewTensor(order, dimSize, X_FLOAT, 1.0F, 0); XTensor * aMeGPU = NewTensor(order, dimSize, X_FLOAT, 1.0F, 0);
XTensor bUserGPU; XTensor bUserGPU;
/* Initialize variables */ /* Initialize variables */
aGPU->SetData(aData, unitNum); aGPU->SetData(aData, unitNum);
aMeGPU->SetData(aData, unitNum); aMeGPU->SetData(aData, unitNum);
/* call Exp function */ /* call Exp function */
_Exp(aGPU, bGPU); _Exp(aGPU, bGPU);
_ExpMe(aMeGPU); _ExpMe(aMeGPU);
bUserGPU = Exp(*aGPU); bUserGPU = Exp(*aGPU);
/* check results */ /* check results */
gpuTest = bGPU->CheckData(answer, unitNum, 1e-4F) && gpuTest = bGPU->CheckData(answer, unitNum, 1e-4F) &&
aMeGPU->CheckData(answer, unitNum, 1e-4F) && \ aMeGPU->CheckData(answer, unitNum, 1e-4F) && \
bUserGPU.CheckData(answer, unitNum, 1e-4F); bUserGPU.CheckData(answer, unitNum, 1e-4F);
/* destroy variables */ /* destroy variables */
delete a; delete a;
delete b; delete b;
delete aMe; delete aMe;
delete aGPU; delete aGPU;
delete bGPU; delete bGPU;
delete aMeGPU; delete aMeGPU;
delete[] dimSize; delete[] dimSize;
return cpuTest && gpuTest; return cpuTest && gpuTest;
#else #else
/* destroy variables */ /* destroy variables */
delete a; delete a;
delete b; delete b;
delete aMe; delete aMe;
delete[] dimSize; delete[] dimSize;
return cpuTest; return cpuTest;
#endif // USE_CUDA #endif // USE_CUDA
} }
...@@ -123,33 +123,33 @@ TODO!! ...@@ -123,33 +123,33 @@ TODO!!
/* test for Exp Function */ /* test for Exp Function */
bool TestExp() bool TestExp()
{ {
XPRINT(0, stdout, "[TEST Exp] set every entry to its exponent value \n"); XPRINT(0, stdout, "[TEST Exp] set every entry to its exponent value \n");
bool returnFlag = true, caseFlag = true; bool returnFlag = true, caseFlag = true;
/* case 1 test */ /* case 1 test */
caseFlag = TestExp1(); caseFlag = TestExp1();
if (!caseFlag) { if (!caseFlag) {
returnFlag = false; returnFlag = false;
XPRINT(0, stdout, ">> case 1 failed!\n"); XPRINT(0, stdout, ">> case 1 failed!\n");
} }
else else
XPRINT(0, stdout, ">> case 1 passed!\n"); XPRINT(0, stdout, ">> case 1 passed!\n");
/* other cases test */ /* other cases test */
/* /*
TODO!! TODO!!
*/ */
if (returnFlag) { if (returnFlag) {
XPRINT(0, stdout, ">> All Passed!\n"); XPRINT(0, stdout, ">> All Passed!\n");
} }
else else
XPRINT(0, stdout, ">> Failed!\n"); XPRINT(0, stdout, ">> Failed!\n");
XPRINT(0, stdout, "\n"); XPRINT(0, stdout, "\n");
return returnFlag; return returnFlag;
} }
} // namespace nts(NiuTrans.Tensor) } // namespace nts(NiuTrans.Tensor)
Markdown 格式
0%
您添加了 0 到此讨论。请谨慎行事。
请先完成此评论的编辑!
注册 或者 后发表评论