Commit 771643c6 by huchi

refactor parameter from pointer to reference

parent 04f129fc
......@@ -55,7 +55,7 @@ int main( int argc, const char ** argv )
// fprintf(stderr, "Run this program with \"-test\" for unit test!\n");
// fprintf(stderr, "Or run this program with \"-fnnlm\" for sample FNNLM!\n");
//}
BackwardTest();
BackwardTest();
//_CrtDumpMemoryLeaks();
......@@ -69,9 +69,9 @@ void BackwardTest()
XTensor a;
XTensor b;
XTensor c;
a.enableGrad = true;
b.enableGrad = false;
c.enableGrad = false;
a.enableGrad = true;
b.enableGrad = false;
c.enableGrad = false;
XTensor mean;
XTensor origin;
InitTensor2D(&a, 2, 3);
......@@ -89,9 +89,9 @@ void BackwardTest()
b.Set1D(2.0F, 0);
b.Set1D(1.0F, 1);
DivDim(a, b, c, 0);
DivDim(a, b, c, 0);
c.Dump(stderr, "c:");
auto loss = CrossEntropy(c, a);
auto loss = CrossEntropy(c, a);
//XLink::ShowNetwork(stderr, &c);
......
......@@ -765,15 +765,15 @@ void XMathGrad::GradMultiplyDim(XTensor * node, bool isEfficient)
/* dE/da */
_MultiplyDim(node->grad, b, a->grad, n, 1.0F);
/* dE/db */
/* dE/db */
int order = a->order;
int dimSize[MAX_TENSOR_DIM_NUM];
memcpy(dimSize, a->dimSize, sizeof(int) * a->order);
XTensor * bGradTMP = NewTensorBuf(node->grad, node->devID, node->mem);
_Multiply(node->grad, a, bGradTMP);
if(n == order - 1){
int reshapedSize[MAX_TENSOR_DIM_NUM];
reshapedSize[0] = a->unitNum/dimSize[order - 1];
......@@ -1078,91 +1078,91 @@ dE/db = - dE/dc * b.reduce(0,...,n-1,n+1,...) * \beta
*/
void XMathGrad::GradSubDim(XTensor * node, bool isEfficient)
{
XLink &income = node->income;
CheckNTErrors(income.tailNum == 2, "Wrong input tensor number for SUBDIM!");
XTensor * a = income.tails[0];
XTensor * b = income.tails[1];
int n = income.GetParamInt(0);
DTYPE beta = income.GetParam(1);
XNoder::MakeGrad(a);
XNoder::MakeGrad(b);
_Sum(a->grad, node->grad, a->grad);
int order = a->order;
int dimSize[MAX_TENSOR_DIM_NUM];
memcpy(dimSize, a->dimSize, sizeof(int) * a->order);
if(n == order - 1){
int reshapedSize[MAX_TENSOR_DIM_NUM];
reshapedSize[0] = a->unitNum / dimSize[order - 1];
reshapedSize[1] = dimSize[order - 1];
/* we reshape dE/dc to a matrix whose column number is equal to the
size of b. Then we can reduce the matrix into a row vector. */
node->grad->Reshape(2, reshapedSize);
//if(b->outgo.tailNum > 1){
XTensor * bGradTMP = NewTensorBuf(b->grad, b->devID, b->mem);
_ReduceSum(node->grad, bGradTMP, 0);
if(beta != 1.0F)
_ScaleAndShiftMe(bGradTMP, beta);
_Sub(b->grad, bGradTMP, b->grad);
DelTensorBuf(bGradTMP);
/*}
else{
_ReduceSum(node->grad, b->grad, 0);
if(beta != 1.0F)
_ScaleAndShiftMe(b->grad, beta);
_ScaleAndShiftMe(b->grad, -1.0F);
}*/
node->grad->Reshape(order, dimSize);
}
else{
int reshapedSize[MAX_TENSOR_DIM_NUM];
reshapedSize[0] = 1;
reshapedSize[1] = dimSize[n];
reshapedSize[2] = 1;
for(int i = 0; i < order; i++){
if(i < n)
reshapedSize[0] *= dimSize[i];
}
reshapedSize[2] = a->unitNum / (reshapedSize[0] * reshapedSize[1]);
/* we reshape dE/dc to a 3D tensor of size (x, y, z) where y = |b|.
Then reduce along with z and x to obtain dE/db. */
node->grad->Reshape(3, reshapedSize);
XTensor * interGrad = NewTensorBuf(2, reshapedSize, b->dataType, b->denseRatio, b->devID, b->mem);
_ReduceSum(node->grad, interGrad, 2);
//if(b->outgo.tailNum > 1){
XTensor * bGradTMP = NewTensorBuf(b->grad, b->devID, b->mem);
_ReduceSum(interGrad, bGradTMP, 0);
if(beta != 1.0F)
_ScaleAndShiftMe(bGradTMP, beta);
_Sub(b->grad, bGradTMP, b->grad);
DelTensorBuf(bGradTMP);
/*}
else{
_ReduceSum(interGrad, b->grad, 0);
if(beta != 1.0F)
_ScaleAndShiftMe(b->grad, beta);
_ScaleAndShiftMe(b->grad, -1.0F);
}*/
node->grad->Reshape(order, dimSize);
DelTensorBuf(interGrad);
}
node->visitMark = NODE_FINISHED;
XLink &income = node->income;
CheckNTErrors(income.tailNum == 2, "Wrong input tensor number for SUBDIM!");
XTensor * a = income.tails[0];
XTensor * b = income.tails[1];
int n = income.GetParamInt(0);
DTYPE beta = income.GetParam(1);
XNoder::MakeGrad(a);
XNoder::MakeGrad(b);
_Sum(a->grad, node->grad, a->grad);
int order = a->order;
int dimSize[MAX_TENSOR_DIM_NUM];
memcpy(dimSize, a->dimSize, sizeof(int) * a->order);
if(n == order - 1){
int reshapedSize[MAX_TENSOR_DIM_NUM];
reshapedSize[0] = a->unitNum / dimSize[order - 1];
reshapedSize[1] = dimSize[order - 1];
/* we reshape dE/dc to a matrix whose column number is equal to the
size of b. Then we can reduce the matrix into a row vector. */
node->grad->Reshape(2, reshapedSize);
//if(b->outgo.tailNum > 1){
XTensor * bGradTMP = NewTensorBuf(b->grad, b->devID, b->mem);
_ReduceSum(node->grad, bGradTMP, 0);
if(beta != 1.0F)
_ScaleAndShiftMe(bGradTMP, beta);
_Sub(b->grad, bGradTMP, b->grad);
DelTensorBuf(bGradTMP);
/*}
else{
_ReduceSum(node->grad, b->grad, 0);
if(beta != 1.0F)
_ScaleAndShiftMe(b->grad, beta);
_ScaleAndShiftMe(b->grad, -1.0F);
}*/
node->grad->Reshape(order, dimSize);
}
else{
int reshapedSize[MAX_TENSOR_DIM_NUM];
reshapedSize[0] = 1;
reshapedSize[1] = dimSize[n];
reshapedSize[2] = 1;
for(int i = 0; i < order; i++){
if(i < n)
reshapedSize[0] *= dimSize[i];
}
reshapedSize[2] = a->unitNum / (reshapedSize[0] * reshapedSize[1]);
/* we reshape dE/dc to a 3D tensor of size (x, y, z) where y = |b|.
Then reduce along with z and x to obtain dE/db. */
node->grad->Reshape(3, reshapedSize);
XTensor * interGrad = NewTensorBuf(2, reshapedSize, b->dataType, b->denseRatio, b->devID, b->mem);
_ReduceSum(node->grad, interGrad, 2);
//if(b->outgo.tailNum > 1){
XTensor * bGradTMP = NewTensorBuf(b->grad, b->devID, b->mem);
_ReduceSum(interGrad, bGradTMP, 0);
if(beta != 1.0F)
_ScaleAndShiftMe(bGradTMP, beta);
_Sub(b->grad, bGradTMP, b->grad);
DelTensorBuf(bGradTMP);
/*}
else{
_ReduceSum(interGrad, b->grad, 0);
if(beta != 1.0F)
_ScaleAndShiftMe(b->grad, beta);
_ScaleAndShiftMe(b->grad, -1.0F);
}*/
node->grad->Reshape(order, dimSize);
DelTensorBuf(interGrad);
}
node->visitMark = NODE_FINISHED;
}
/*
......
......@@ -146,10 +146,10 @@ private:
static
void GradSub(XTensor * node, bool isEfficient);
/* gradient for sub with one dimension: c = a - b * \beta
where the size of b is equal to that of one dimension of a */
static
void GradSubDim(XTensor * node, bool isEfficient);
/* gradient for sub with one dimension: c = a - b * \beta
where the size of b is equal to that of one dimension of a */
static
void GradSubDim(XTensor * node, bool isEfficient);
/* gradient for sum: c = a + b * \beta */
static
......
......@@ -450,7 +450,7 @@ void XShapeGrad::GradSplitListPost(XTensor * node, bool isEfficient)
if(income.typeID == SHAPE_SPLIT_LIST){
int w = income.GetParamInt(0);
int splitID = income.GetParamInt(1);
if(whereToSplit < 0)
whereToSplit = w;
splitNum++;
......
......@@ -267,7 +267,7 @@ void XNet::BackwardNode(XTensor * node, bool isEfficent)
else if(XShapeGrad::IsShapeOP(node))
XShapeGrad::MakeGrad(node, isEfficent);
else if(XLossGrad::IsLossOP(node))
XLossGrad::MakeGrad(node, isEfficent);
XLossGrad::MakeGrad(node, isEfficent);
else{
ShowNTErrors("Wrong node type!");
}
......@@ -468,7 +468,7 @@ search for a node in a top-down manner by its name
*/
//XTensor * XNet::SearchNode(XTensor * top, const char * name)
//{
//return XLink::SearchNode(top, name);
//return XLink::SearchNode(top, name);
//}
}
......@@ -482,12 +482,12 @@ void Train(const char * train, bool isShuffled, FNNModel &model)
Clear(model, true);
/* forward + backward process */
/* this is implemented by gather function */
/* this is implemented by gather function */
ForwardAutoDiff(ngrams, ngramNum, output, model);
/* this is implemented by multiply function */
//ForwardAutoDiff(inputs, output, model);
/* this is implemented by multiply function */
//ForwardAutoDiff(inputs, output, model);
lossTensor = CrossEntropy(output, gold);
/* automatic differentiation */
......@@ -1177,12 +1177,12 @@ void Test(const char * test, const char * result, FNNModel &model)
/* forward computation */
Forward(inputs, output, model, net);
}
else {
/* this is implemented by gather function */
else {
/* this is implemented by gather function */
ForwardAutoDiff(ngrams, ngramNum, output, model);
/* this is implemented by multiply function */
//ForwardAutoDiff(inputs, output, model);
/* this is implemented by multiply function */
//ForwardAutoDiff(inputs, output, model);
}
/* prediction probabilities */
......
......@@ -61,7 +61,7 @@ public:
XTensor wa;
XTensor wbig;
/* size of transformed Q and K */
int dk;
......
......@@ -86,7 +86,7 @@ struct SampleNode
int * p;
int size;
int value;
int key;
int key;
};
int CompareSampleNode(const void * a, const void * b)
......@@ -289,7 +289,7 @@ int T2TBatchLoader::LoadBatch(FILE * file, bool isLM,
int vsEnc, int vsDec, int sBatch, int wBatch,
bool isSorted, int &ws, int &wCount,
int devID, XMem * mem,
bool isTraining)
bool isTraining)
{
if(isLM){
return LoadBatchLM(file, batchEnc, paddingEnc, batchDec, paddingDec, gold, label,
......@@ -331,7 +331,7 @@ int T2TBatchLoader::LoadBatchLM(FILE * file,
int vSize, int sBatch, int wBatch,
bool isSorted, int &wCount,
int devID, XMem * mem,
bool isTraining)
bool isTraining)
{
if(nextSeq < 0 || nextSeq >= nseqBuf)
LoadBuf(file, isSorted, 1);
......@@ -490,7 +490,7 @@ int T2TBatchLoader::LoadBatchMT(FILE * file,
int vSizeEnc, int vSizeDec, int sBatch, int wBatch,
bool isSorted, int &ws, int &wCount,
int devID, XMem * mem,
bool isTraining)
bool isTraining)
{
if (nextBatch < 0 || nextBatch >= bufBatchSize) {
LoadBuf(file, isSorted, 2);
......
......@@ -132,7 +132,7 @@ public:
int vsEnc, int vsDec, int sBatch, int wBatch,
bool isSorted, int &ws, int &wCount,
int devID, XMem * mem,
bool isTraining);
bool isTraining);
/* load a batch of sequences (for language modeling) */
int LoadBatchLM(FILE * file,
......@@ -142,7 +142,7 @@ public:
int * seqs, int vs, int sBatch, int wBatch,
bool isSorted, int &wCount,
int devID, XMem * mem,
bool isTraining);
bool isTraining);
/* load a batch of sequences (for machine translation) */
int LoadBatchMT(FILE * file,
......@@ -152,7 +152,7 @@ public:
int * seqs, int vsEnc, int vsDec, int sBatch, int wBatch,
bool isSorted, int &ws, int &wCount,
int devID, XMem * mem,
bool isTraining);
bool isTraining);
/* shuffle the data file */
void Shuffle(const char * srcFile, const char * tgtFile);
......
......@@ -303,7 +303,7 @@ void T2TSearch::Generate(T2TStateBundle * beam)
/* Then, we do something similar to "preID". For the top-k predictions, we need
to know their indices in the vocabulary. We compute the offset of each prediction
in the vocabulary by dividing it with vocab-size and computing the remainder. */
_ModMe(index, sizeVocab);
ModMe(index, sizeVocab);
score.Reshape(order, dims);
......
......@@ -528,7 +528,7 @@ get device ids for the given device information
*/
int XDevManager::GetDeviceIDs(char * devInfo, int * devIDs)
{
StrList* terms = new StrList(1);
StrList* terms = new StrList(1);
SplitALine(devInfo, " ", terms);
for(int i = 0; i < terms->count; i++){
......
......@@ -90,7 +90,7 @@ template <typename T>
void TensorListBase<T>::Add(T&& item)
{
if (count == maxNum) {
T* newItems;
if (mem == NULL)
newItems = new T[maxNum * 2 + 1];
......@@ -101,7 +101,7 @@ void TensorListBase<T>::Add(T&& item)
maxNum = maxNum * 2 + 1;
}
items[count++] = item;
}
/*
......@@ -111,18 +111,18 @@ add an item into the list
template <typename T>
void TensorListBase<T>::Add(const T& item)
{
if (count == maxNum) {
T* newItems;
if (mem == NULL)
newItems = new T[maxNum * 2 + 1];
else
newItems = (T*)mem->Alloc(mem->devID, sizeof(T) * (maxNum * 2 + 1));
memcpy(newItems, items, sizeof(T) * maxNum);
items = newItems;
maxNum = maxNum * 2 + 1;
}
items[count++] = item;
if (count == maxNum) {
T* newItems;
if (mem == NULL)
newItems = new T[maxNum * 2 + 1];
else
newItems = (T*)mem->Alloc(mem->devID, sizeof(T) * (maxNum * 2 + 1));
memcpy(newItems, items, sizeof(T) * maxNum);
items = newItems;
maxNum = maxNum * 2 + 1;
}
items[count++] = item;
}
/*
......@@ -186,21 +186,21 @@ void TensorListBase<T>::Insert(int pos, const T& item)
template<typename T>
void TensorListBase<T>::Insert(int pos, T&& item)
{
if (count == maxNum) {
T* newItems;
if (mem == NULL)
newItems = new T[maxNum * 2 + 1];
else
newItems = (T*)mem->Alloc(mem->devID, sizeof(T) * (maxNum * 2 + 1));
memcpy(newItems, items, sizeof(T) * maxNum);
items = newItems;
maxNum = maxNum * 2 + 1;
}
for (int i = count - 1; i >= pos; i--)
items[i + 1] = items[i];
items[pos] = item;
count++;
if (count == maxNum) {
T* newItems;
if (mem == NULL)
newItems = new T[maxNum * 2 + 1];
else
newItems = (T*)mem->Alloc(mem->devID, sizeof(T) * (maxNum * 2 + 1));
memcpy(newItems, items, sizeof(T) * maxNum);
items = newItems;
maxNum = maxNum * 2 + 1;
}
for (int i = count - 1; i >= pos; i--)
items[i + 1] = items[i];
items[pos] = item;
count++;
}
/* get the item at position i */
......@@ -226,8 +226,8 @@ inline void TensorListBase<T>::SetItem(int i, const T& item)
template<typename T>
inline void TensorListBase<T>::SetItem(int i, T&& item)
{
if (i >= 0 && i < count)
items[i] = std::move(item);
if (i >= 0 && i < count)
items[i] = std::move(item);
}
/*
......@@ -250,7 +250,7 @@ inline int TensorListBase<T>::FindFirst(const T& item)
template <typename T>
void TensorListBase<T>::Clear()
{
count = 0;
count = 0;
}
/*
......
......@@ -32,7 +32,7 @@
/* the nts (NiuTrans.Tensor) namespace */
namespace nts {
/* the TensorListBase class */
template <typename T>
struct TensorListBase {
......@@ -66,57 +66,57 @@ public:
/* add an item into the list */
void Add(T&& item);
/* add an item into the list */
void Add(const T& item);
/* add an item into the list */
void Add(const T& item);
/* add a number of items into the list */
/* add a number of items into the list */
void Add(T* inputItems, int inputItemCount);
/* append a list to the current list */
/* append a list to the current list */
void AddList(TensorListBase* l);
/* insert an item to the given position of the list */
/* insert an item to the given position of the list */
void Insert(int pos, const T& item);
/* insert an item to the given position of the list */
void Insert(int pos, T&& item);
/* insert an item to the given position of the list */
void Insert(int pos, T&& item);
/* get the item at position i */
/* get the item at position i */
T& GetItem(int i) const;
/* set the item at position i */
/* set the item at position i */
void SetItem(int i, const T& item);
/* set the item at position i */
void SetItem(int i, T&& item);
/* set the item at position i */
void SetItem(int i, T&& item);
/* find the position of the first matched item */
/* find the position of the first matched item */
int FindFirst(const T& item);
/* clear the data array */
/* clear the data array */
void Clear();
/* sort the list */
/* sort the list */
void Sort(int itemSize);
/* reverse the list */
/* reverse the list */
void Reverse();
/* remove the item at position i */
/* remove the item at position i */
void Remove(int i);
/* copy the list */
/* copy the list */
TensorListBase* Copy(XMem* myMem);
/* shuffle the list */
/* shuffle the list */
void Shuffle(int nround = 10, int beg = -1, int len = 0);
/* short */
T& operator[] (int i) {
return GetItem(i);
};
T& operator[] (int i) {
return GetItem(i);
};
T& Get(int i) { return GetItem(i); };
void Set(int i, T item) { SetItem(i, item); };
void Set(int i, T item) { SetItem(i, item); };
};
struct XTensor;
......
......@@ -305,7 +305,7 @@ void XMem::SetComputationMode(bool myIsForComputation)
cublasDestroy(cublasHandle);
if(myIsForComputation)
CheckNTErrors((enum curandStatus)cublasCreate(&cublasHandle) == CURAND_STATUS_SUCCESS,
"Cannot create the cublas handle.");
"Cannot create the cublas handle.");
SetDevice(devIDBackup);
#endif
......@@ -321,11 +321,11 @@ void XMem::SetIndex(INT_64 indexSize, MTYPE minSizeFirst, int minSizeNum)
{
delete[] memIndex;
delete[] memIndex2;
delete[] minSizeIndex;
delete[] minSizeIndex;
nodeNum = indexSize;
nodeNumUsed = minSizeNum * 2;
indexEntryNum = minSizeNum;
nodeNum = indexSize;
nodeNumUsed = minSizeNum * 2;
indexEntryNum = minSizeNum;
memIndex = new MPieceNode[nodeNum];
memset(memIndex, 0, sizeof(MPieceNode) * nodeNum);
......@@ -333,12 +333,12 @@ void XMem::SetIndex(INT_64 indexSize, MTYPE minSizeFirst, int minSizeNum)
memIndex2 = new MPieceNode[nodeNum];
memset(memIndex2, 0, sizeof(MPieceNode) * nodeNum);
minSizeIndex = new MTYPE[indexEntryNum];
memset(minSizeIndex, 0, sizeof(MTYPE) * indexEntryNum);
minSizeIndex = new MTYPE[indexEntryNum];
memset(minSizeIndex, 0, sizeof(MTYPE) * indexEntryNum);
minSizeIndex[0] = minSizeFirst;
for(int i = 1; i < indexEntryNum; i++)
minSizeIndex[i] = minSizeIndex[i - 1] * 2;
minSizeIndex[0] = minSizeFirst;
for(int i = 1; i < indexEntryNum; i++)
minSizeIndex[i] = minSizeIndex[i - 1] * 2;
indexOffset = GetMSB(minSizeFirst);
}
......@@ -757,8 +757,8 @@ void * XMem::AllocStandard(int myDevID, MTYPE mySize, bool myIsRebuiltIndex)
/* if all index nodes are used, we rebuild the index to release the nodes that are free */
if(nodeNumUsed == nodeNum){
RebuildIndex();
CheckNTErrors(nodeNumUsed < nodeNum, "No enough index nodes for the memory pool!");
RebuildIndex();
CheckNTErrors(nodeNumUsed < nodeNum, "No enough index nodes for the memory pool!");
}
/*if(testxmemid == 30){
......@@ -961,8 +961,8 @@ release a piece of memory as "free"
*/
void XMem::ReleaseStandard(int myDevID, void * p, MTYPE size)
{
if(p == NULL)
return;
if(p == NULL)
return;
if(size <= minSizeIndex[0])
size = minSizeIndex[0];
......@@ -1092,7 +1092,7 @@ void XMem::RebuildIndex()
block->mem = NULL;
}
else{
/* if the block is in use, we build the index */
/* if the block is in use, we build the index */
int pieceCount = 0;
MTYPE size = 0;
MHeader * newLast = NULL;
......
/* NiuTrans.Tensor - an open-source tensor library
/* NiuTrans.Tensor - an open-source tensor library
* Copyright (C) 2017, Natural Language Processing Lab, Northestern University.
* All rights reserved.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
/*
*
* This is an implementation of queue. Actually we intend to use it to maintain
* a priority job list
*
* $Created by: XIAO Tong (xiaotong@mail.neu.edu.cn) 2017-04-05
*
*/
#include <stdio.h>
#include <stdlib.h>
#include "XQueue.h"
#include "XDevice.h"
#include "XList.h"
#include "XUtility.h"
/* the nts (NiuTrans.Tensor) namespace */
namespace nts{
/**************************************
job item used in queues
*/
/* constructor */
JobQueueNode::JobQueueNode()
{
job = NULL;
args = new TensorList(1);
}
/* de-constructor */
JobQueueNode::~JobQueueNode()
{
delete args;
}
/**************************************
This class provides standard utilities of Queue.
*/
/* constuctor */
XQueue::XQueue(int mySize)
{
queue = new void*[mySize];
memset(queue, 0, sizeof(void*) * mySize);
size = mySize;
itemCount = 0;
head = 0;
tail = 0;
isJobQueue = false;
jobDequeuerArgs = new TensorList(1);
jobDequeuerBreak = false;
runningJobCount = 0;
jobStream = NULL;
jobStream1 = NULL;
jobStream2 = NULL;
MUTEX_INIT(enqueueMutex);
MUTEX_INIT(dequeueMutex);
COND_INIT(queueCond);
MUTEX_INIT(jobQueueMutex);
}
/* deconstructor */
XQueue::~XQueue()
{
delete[] queue;
delete jobDequeuerArgs;
delete jobStream;
delete jobStream1;
delete jobStream2;
//if(isJobQueue)
// StopJobConsumer();
MUTEX_DELE(enqueueMutex);
MUTEX_DELE(dequeueMutex);
COND_DELE(queueCond);
MUTEX_DELE(jobQueueMutex);
}
/*
put an item in the tail of the queue
>> item - the item we intend to add into the queue
*/
void XQueue::Enqueue(void * item)
{
MUTEX_LOCK(enqueueMutex);
MUTEX_LOCK(dequeueMutex);
CheckNTErrors((itemCount < size), "Put too many items into the queue!");
queue[tail] = item;
tail = (tail + 1) % size;
itemCount++;
COND_SIGNAL(queueCond);
MUTEX_UNLOCK(dequeueMutex);
MUTEX_UNLOCK(enqueueMutex);
}
/*
fetch an item from head of the queue
<< return - the head item of the queue
*/
void * XQueue::Dequeue()
{
MUTEX_LOCK(dequeueMutex);
while(itemCount == 0)
{
#ifdef WIN32
MUTEX_UNLOCK(dequeueMutex);
#endif
COND_WAIT(queueCond, dequeueMutex);
#ifdef WIN32
MUTEX_LOCK(dequeueMutex);
#endif
}
void * r = queue[head];
head = (head + 1) % size;
itemCount--;
MUTEX_UNLOCK(dequeueMutex);
return r;
}
/* return if the queue is empty */
bool XQueue::IsEmpty()
{
return itemCount == 0;
}
/* wait until the queue is empty */
void XQueue::WaitForEmptyJobQueue()
{
while(runningJobCount > 0){
XSleep(10);
}
if(jobStream != NULL){
CheckNTErrors((jobStream->IsFinished()), "None fineished jobs remain");
jobStream->Clear();
}
if(jobStream1 != NULL){
CheckNTErrors((jobStream1->IsFinished()), "None fineished jobs remain");
jobStream1->Clear();
}
if(jobStream2 != NULL){
CheckNTErrors((jobStream2->IsFinished()), "None fineished jobs remain");
jobStream2->Clear();
}
}
int devids[16] = {0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15};
int cpuid = -1;
/*
run job consumer (in another thread)
>> jobDevID - id of the device for running the jobs
*/
void XQueue::RunJobConsumer(int jobDevID)
{
CheckNTErrors((jobDevID < 16), "device id is out of scope!");
isJobQueue = true;
jobDequeuerArgs->Clear();
jobDequeuerArgs->Add(this);
jobDequeuerArgs->Add(jobDevID >= 0 ? devids + jobDevID : &cpuid);
jobDequeuer.function = (TFunction)DequeueJobs;
jobDequeuer.argv = jobDequeuerArgs;
jobDequeuer.Start();
jobDequeuer.LetItGo();
}
/* stop the job consumer */
void XQueue::StopJobConsumer()
{
jobDequeuerBreak = true;
XSleep(10);
EnqueueJob(NULL, NULL);
jobDequeuer.End();
isJobQueue = false;
}
/* add a job item to process */
void XQueue::EnqueueJob(void * job, TensorList * jobArgs)
{
MUTEX_LOCK(jobQueueMutex);
runningJobCount++;
MUTEX_UNLOCK(jobQueueMutex);
JobQueueNode * node = new JobQueueNode();
node->job = job;
if(jobArgs != NULL)
node->args->AddList(jobArgs);
Enqueue(node);
}
/* job item consumer */
void XQueue::DequeueJobs(TensorList * args)
{
CheckNTErrors((args->count == 2), "Illegal arguments!");
XQueue * q = (XQueue*)args->GetItem(0);
int devID = *(int*)args->GetItem(1);
int devIDBackup = XDevice::GetGPUDevice();
if(devID >= 0)
XDevice::SetGPUDevice(devID);
while(1){
JobQueueNode * node = (JobQueueNode*)q->Dequeue();
if(q->GetJobBreak())
break;
CheckNTErrors((node != NULL), "Illegal job!");
/* process a job */
((TFunction)node->job)(node->args);
delete node;
MUTEX_LOCK(q->jobQueueMutex);
q->runningJobCount--;
MUTEX_UNLOCK(q->jobQueueMutex);
}
if(devID >= 0)
XDevice::SetGPUDevice(devIDBackup);
}
/* get the break flag */
bool XQueue::GetJobBreak()
{
return jobDequeuerBreak;
}
/* get job stream */
XStream * XQueue::GetJobStream(int n)
{
if(n == 0)
return jobStream;
else if(n == 1)
return jobStream1;
else if(n == 2)
return jobStream2;
else{
ShowNTErrors("invalid stream id!");
}
return NULL;
}
/* make job streams */
void XQueue::MakeJobStreams(int devID, int devID1, int devID2)
{
if(devID != INVALID_DEVICE_ID)
jobStream = new XStream(0, devID);
if(devID1 != INVALID_DEVICE_ID)
jobStream1 = new XStream(0, devID1);
if(devID2 != INVALID_DEVICE_ID)
jobStream2 = new XStream(0, devID2);
}
} /* end of the nts (NiuTrans.Tensor) namespace */
......@@ -189,7 +189,7 @@ void XQueue::RunJobConsumer(int jobDevID)
isJobQueue = true;
jobDequeuerArgs->Clear();
// warning: this may cause unknown error
// warning: this may cause unknown error
jobDequeuerArgs->Add((XTensor*)this);
jobDequeuerArgs->Add(jobDevID >= 0 ? (XTensor*)(devids + jobDevID) : (XTensor*)&cpuid);
......
......@@ -190,7 +190,6 @@ XTensor::XTensor(const XTensor &reference)
isInit = true;
isTmp = reference.isTmp;
enableGrad = reference.enableGrad;
}
/* copy constructor (with right value reference) */
......@@ -219,7 +218,6 @@ XTensor::XTensor(const XTensor &&reference)
isInit = true;
isTmp = reference.isTmp;
enableGrad = reference.enableGrad;
}
/* de-constructor */
......@@ -285,7 +283,7 @@ void XTensor::Init()
isTmp = false;
isGrad = false;
isVar = false;
enableGrad = false;
enableGrad = false;
visitMark = 0;
grad = NULL;
}
......@@ -316,6 +314,7 @@ void XTensor::ShallowCopy(const XTensor &tensor)
{
strcpy(name, tensor.name);
order = tensor.order;
enableGrad = tensor.enableGrad;
memcpy(dimSize, tensor.dimSize, sizeof(int) * MAX_TENSOR_DIM_NUM);
memcpy(dimSizeRDI, tensor.dimSizeRDI, sizeof(int) * MAX_TENSOR_DIM_NUM);
dataType = tensor.dataType;
......@@ -403,7 +402,6 @@ XTensor& XTensor::operator= (const XTensor& tensor)
/* create tensor links for the new tensor */
XLink::Replace(&tensor, this);
}
enableGrad = tensor.enableGrad;
return *this;
}
......@@ -450,7 +448,6 @@ XTensor& XTensor::operator= (const XTensor&& tensor)
*tensor.dataP = NULL;
XLink::Replace(&tensor, this);
enableGrad = tensor.enableGrad;
return *this;
}
......@@ -1322,7 +1319,7 @@ set the value of a cell
*/
bool XTensor::Set(DTYPE value, int index[], int size)
{
CheckNTErrors(dataType == DEFAULT_DTYPE, "The tensor is not in default type.");
CheckNTErrors(dataType == DEFAULT_DTYPE, "The tensor is not in default type.");
return SetToDevice(devID, GetCell(index, size), value);
}
......@@ -2447,7 +2444,7 @@ void InitTensor(XTensor * tensor, const XTensor * reference)
if(reference->order < 0)
return;
tensor->enableGrad = reference->enableGrad;
tensor->enableGrad = reference->enableGrad;
InitTensor(tensor, reference->order, reference->dimSize,
reference->dataType, reference->denseRatio,
reference->devID, reference->mem);
......@@ -2463,7 +2460,7 @@ void InitTensorV2(XTensor * tensor, const XTensor * reference)
if(reference->order < 0)
return;
tensor->enableGrad = reference->enableGrad;
tensor->enableGrad = reference->enableGrad;
InitTensorV2(tensor, reference->order, reference->dimSize,
reference->dataType, reference->devID);
}
......@@ -2478,7 +2475,7 @@ void InitTensorOnCPU(XTensor * tensor, const XTensor * reference)
if(reference->order < 0)
return;
tensor->enableGrad = reference->enableGrad;
tensor->enableGrad = reference->enableGrad;
InitTensor(tensor, reference->order, reference->dimSize,
reference->dataType, reference->denseRatio,
-1);
......
......@@ -151,8 +151,8 @@ public:
/* indicates whether the tensor keeps the gradient when used as model parameters */
bool isGrad;
/* indicates whether the gradient of the tensor should be computed */
bool enableGrad;
/* indicates whether the gradient of the tensor should be computed */
bool enableGrad;
/* indicates whether the tensor is used as paramters (or variables) */
bool isVar;
......@@ -453,7 +453,7 @@ extern int MakeTensorID();
void InitTensor(XTensor * tensor,
const int myOrder, const int * myDimSize, const TENSOR_DATA_TYPE myDataType = X_FLOAT,
const float myDenseRatio = 1.0F, const int myDevID = -1, XMem * myMem = NULL);
/* initialize a dense XTensor V2 */
void InitTensorV2(XTensor * tensor,
const int myOrder, const int * myDimSize, const TENSOR_DATA_TYPE myDataType = X_FLOAT,
......
......@@ -142,6 +142,23 @@ void _DivMe(XTensor * a, const XTensor * b, DTYPE alpha, int leadingDim)
_Div(a, b, a, alpha, leadingDim);
}
/*
element-wise division of two tensors (do it on site)
keep the result in the input tensor a and return nothing
a(i) = a(i)*b(i) + \alpha * a(i)
where i is the index of the item
>> a - tensor a (where keep the result)
>> b - tensor b
>> alpha - the coefficient
>> leadingDim - the dimension along which we perform broadcasting
*/
void DivMe(XTensor& a, const XTensor& b, DTYPE alpha, int leadingDim)
{
_Div(&a, &b, &a, alpha, leadingDim);
}
/*
return a dimension if the division is performed as DivDim (in more details in DivDim.h)
>> a - a tensor
......
......@@ -122,7 +122,7 @@ where i is the item index
*/
void _CudaDiv(const XTensor * a, const XTensor * b, XTensor * c, DTYPE alpha, int leadingDim)
{
int leadingDimRDI = a->order - leadingDim - 1;
int leadingDimRDI = a->order - leadingDim - 1;
CheckNTErrors((a->unitNum <= c->unitNum && b->unitNum <= c->unitNum),
"Unmatched tensors in multiplication!");
CheckNTErrors((a->order == b->order && a->order == c->order), "Unmatched tensors!");
......
......@@ -40,6 +40,7 @@ a(i) = a(i)/b(i) + \alpha * a(i)
where i is the index of the element
*/
void _DivMe(XTensor * a, const XTensor * b, DTYPE alpha = 0.0, int leadingDim = 0);
void DivMe(XTensor & a, const XTensor & b, DTYPE alpha = 0.0, int leadingDim = 0);
/*
element-wise division of two tensors (return an XTensor structure)
......
......@@ -130,6 +130,17 @@ void _MaskMe(XTensor * a, const XTensor * mask, DTYPE alpha)
}
/*
mask entries of a given tensor (on site):
a(i) = a(i) if mask(i) is non-zero
a(i) = alpha if mask(i) = 0
where i is the index of the element
*/
void MaskMe(XTensor& a, const XTensor& mask, DTYPE alpha)
{
_Mask(&a, &mask, &a, alpha);
}
/*
mask entries of a given tensor (return an XTensor structure):
a(i) = a(i) if mask(i) is non-zero
a(i) = alpha if mask(i) = 0
......
......@@ -43,6 +43,7 @@ a(i) = alpha if mask(i) = 0
where i is the index of the element
*/
void _MaskMe(XTensor * a, const XTensor * mask, DTYPE alpha);
void MaskMe(XTensor & a, const XTensor & mask, DTYPE alpha);
/*
mask entries of a given tensor (return an XTensor structure):
......
......@@ -54,15 +54,15 @@ void _MatrixMul2D(const XTensor * a, MATRIX_TRANS_TYPE transposedA,
CheckNTErrors((a->order == 2 && b->order == 2 && c->order == 2),
"Input tensors must have a order = 2!");
int an = a->dimSize[0], am = a->dimSize[1];
int bn = b->dimSize[0], bm = b->dimSize[1];
int cn = c->dimSize[0], cm = c->dimSize[1];
int am2 = transposedA == X_TRANS ? an : am;
int an2 = transposedA == X_TRANS ? am : an;
int bm2 = transposedB == X_TRANS ? bn : bm;
int bn2 = transposedB == X_TRANS ? bm : bn;
int cm2 = cm;
int cn2 = cn;
int an = a->dimSize[0], am = a->dimSize[1];
int bn = b->dimSize[0], bm = b->dimSize[1];
int cn = c->dimSize[0], cm = c->dimSize[1];
int am2 = transposedA == X_TRANS ? an : am;
int an2 = transposedA == X_TRANS ? am : an;
int bm2 = transposedB == X_TRANS ? bn : bm;
int bn2 = transposedB == X_TRANS ? bm : bn;
int cm2 = cm;
int cn2 = cn;
CheckNTErrors((am2 == bn2 && an2 == cn2 && bm2 == cm2),
"Unmatched tensors in multiplication!");
......
......@@ -40,21 +40,21 @@ argument7: matrix c (c=a*b*\alpha + c*beta)
*/
void _MatrixMul2DMultiTheading(TensorList * args)
{
CheckNTErrors(args->count == 2, "invalid argument number!");
IntList * indexArgs = (IntList*)args->GetItem(0);
TensorList * matrixArgs = (TensorList*)args->GetItem(1);
CheckNTErrors(indexArgs->count == 4, "invalid argument number!");
CheckNTErrors(matrixArgs->count == 5, "invalid argument number!");
CheckNTErrors(args->count == 2, "invalid argument number!");
IntList * indexArgs = (IntList*)args->GetItem(0);
TensorList * matrixArgs = (TensorList*)args->GetItem(1);
CheckNTErrors(indexArgs->count == 4, "invalid argument number!");
CheckNTErrors(matrixArgs->count == 5, "invalid argument number!");
XTensor * a = matrixArgs->GetItem(0);
XTensor * b = matrixArgs->GetItem(1);
XTensor * c = matrixArgs->GetItem(2);
DTYPE alpha = *(DTYPE*)(matrixArgs->GetItem(3));
DTYPE beta = *(DTYPE*)(matrixArgs->GetItem(4));
int x1 = indexArgs->GetItem(0);
int y1 = indexArgs->GetItem(1);
int x2 = indexArgs->GetItem(2);
int y2 = indexArgs->GetItem(3);
int x1 = indexArgs->GetItem(0);
int y1 = indexArgs->GetItem(1);
int x2 = indexArgs->GetItem(2);
int y2 = indexArgs->GetItem(3);
#ifdef FAST_MATRIX
int am = a->dimSize[1];
......
......@@ -143,6 +143,23 @@ void _MultiplyMe(XTensor * a, const XTensor * b, DTYPE alpha, int leadingDim)
_Multiply(a, b, a, alpha, leadingDim);
}
/*
element-wise product of two tensors (do it on site)
keep the result in the input tensor a and return nothing
a(i) = a(i)*b(i) + \alpha * a(i)
where i is the index of the item
>> a - tensor a (where keep the result)
>> b - tensor b
>> alpha - the coefficient
>> leadingDim - the dimension along which we perform broadcasting
*/
void MultiplyMe(XTensor& a, const XTensor& b, DTYPE alpha, int leadingDim)
{
_Multiply(&a, &b, &a, alpha, leadingDim);
}
/*
return a dimension if the multiplication is performed as MultiplyDim (in more details in MultiplyDim.h)
>> a - a tensor
......
......@@ -122,7 +122,7 @@ where i is the item index
*/
void _CudaMultiply(const XTensor * a, const XTensor * b, XTensor * c, DTYPE alpha, int leadingDim)
{
int leadingDimRDI = a->order - leadingDim - 1;
int leadingDimRDI = a->order - leadingDim - 1;
CheckNTErrors((a->unitNum <= c->unitNum && b->unitNum <= c->unitNum),
"Unmatched tensors in multiplication!");
CheckNTErrors((a->order == b->order && a->order == c->order), "Unmatched tensors!");
......
......@@ -40,6 +40,7 @@ a(i) = a(i)*b(i) + \alpha * a(i)
where i is the index of the element
*/
void _MultiplyMe(XTensor * a, const XTensor * b, DTYPE alpha = 0.0, int leadingDim = 0);
void MultiplyMe(XTensor & a, const XTensor & b, DTYPE alpha = 0.0, int leadingDim = 0);
/*
element-wise product of two tensors (return an XTensor structure)
......
......@@ -139,6 +139,24 @@ void _MultiplyDimMe(XTensor * a, const XTensor * b, int n, DTYPE alpha)
}
/*
tensor multiplication(do it on site)
make a new tensor to keep the result and return it
c = a * b + \alpha * c
where the size of b is equal to the n-th dimension of a,
i.e., a is multiplied with b by broadcasting
>> a - a tensor
>> b - another tensor whose size is equal to that of dimension n of a
>> n - the dimension index
>> alpha - the scaling factor
*/
void MultiplyDimMe(XTensor& a, const XTensor& b, int n, DTYPE alpha)
{
_MultiplyDim(&a, &b, &a, n, alpha);
}
/*
tensor multiplication (return an XTensor structure and make tensor connections)
make a new tensor to keep the result and return it
......
......@@ -33,6 +33,7 @@ void _MultiplyDim(const XTensor * a, const XTensor * b, XTensor * c, int n, DTYP
/* tensor multiplication a = a * b + \alpha * c where the size of b is equal to the n-th dimension of a,
i.e., a is multiplied with b by broadcasting. we keep the result in the input tensor a and return nothing */
void _MultiplyDimMe(XTensor * a, const XTensor * b, int n, DTYPE alpha = 0.0);
void MultiplyDimMe(XTensor & a, const XTensor & b, int n, DTYPE alpha = 0.0);
/* tensor multiplication c = a * b where the size of b is equal to the n-th dimension of a,
i.e., a is multiplied with b by broadcasting. We make a new tensor c to keep the result and return it */
......
......@@ -60,6 +60,16 @@ void _NegateMe(XTensor * a)
}
/*
set every entry to its minus value (do it on site)
keep the result in the input tensor a and return nothing
>> a - the tensor we are processing
*/
void NegateMe(XTensor& a)
{
_Negate(&a, &a);
}
/*
set every entry to its minus value (return an XTensor structure)
make a new tensor to keep the result and return it
>> a - input tensor we are processing
......
......@@ -34,6 +34,7 @@ set every entry to its minus value (do it on site)
keep the result in the input tensor a and return nothing
*/
void _NegateMe(XTensor * a);
void NegateMe(XTensor & a);
/*
set every entry to its minus value (return an XTensor structure)
......
......@@ -66,6 +66,16 @@ void _SignMe(XTensor * a)
}
/*
set every entry to its sign value (do it on site)
keep the result in the input tensor a and return nothing
>> a - the tensor we are processing
*/
void SignMe(XTensor& a)
{
_Sign(&a, &a);
}
/*
set every entry to its sign value (return an XTensor structure)
make a new tensor to keep the result and return it
>> a - input tensor we are processing
......
......@@ -36,6 +36,12 @@ keep the result in the input tensor a and return nothing
void _SignMe(XTensor * a);
/*
set every entry to its sign value (do it on site)
keep the result in the input tensor a and return nothing
*/
void SignMe(XTensor & a);
/*
set every entry to its sign value (return an XTensor structure)
make a new tensor to keep the result and return it
*/
......
......@@ -126,6 +126,19 @@ void _SubMe(XTensor * a, const XTensor * b, DTYPE beta)
{
_Sub(a, b, a, beta);
}
/*
tensor subtraction a = a - b * \beta (do it on site)
keep the result in the tensor a and return nothing
>> a - a tensor
>> b - another tensor
>> beta - the scaling factor
*/
void SubMe(XTensor& a, const XTensor& b, DTYPE beta)
{
_Sub(&a, &b, &a, beta);
}
/*
return a dimension if the subtraction is performed as SubDim (in more details in SubDim.h)
......
......@@ -35,6 +35,7 @@ tensor subtraction a = a - b * \beta
keep the result in the input tensor a and return nothing
*/
void _SubMe(XTensor * a, const XTensor * b, DTYPE beta = (DTYPE)1.0);
void SubMe(XTensor & a, const XTensor & b, DTYPE beta = (DTYPE)1.0);
/*
tensor subtraction c = a - b * \beta
......
......@@ -46,79 +46,79 @@ void _SubDim(const XTensor * a, const XTensor * b, XTensor * c, int n, DTYPE bet
{
n = MODX(n, a->order);
CheckNTErrors(a && b && c, "Empty tensor input!");
CheckNTErrors(a->unitNum == c->unitNum, "Unmatched tensors in subtraction!");
CheckNTErrors(a->dataType == b->dataType && a->dataType == c->dataType,
"Unmatched data types in subtraction!");
CheckNTErrors(a->order == c->order, "The input tensors do not have the same order in subtraction!");
CheckNTErrors(!a->isSparse && !b->isSparse && !c->isSparse, "Dense tensors are required!");
CheckNTErrors(a->dimSize[n] == b->unitNum, "Wrong tensor size!");
CheckNTErrors(a && b && c, "Empty tensor input!");
CheckNTErrors(a->unitNum == c->unitNum, "Unmatched tensors in subtraction!");
CheckNTErrors(a->dataType == b->dataType && a->dataType == c->dataType,
"Unmatched data types in subtraction!");
CheckNTErrors(a->order == c->order, "The input tensors do not have the same order in subtraction!");
CheckNTErrors(!a->isSparse && !b->isSparse && !c->isSparse, "Dense tensors are required!");
CheckNTErrors(a->dimSize[n] == b->unitNum, "Wrong tensor size!");
CheckDev(a->devID, b->devID);
if (beta == 0) {
_CopyValues(a, c);
return;
}
if (beta == 0) {
_CopyValues(a, c);
return;
}
if (XTensor::IsSameShaped(a, b)) {
_Sub(a, b, c, beta);
return;
}
if (XTensor::IsSameShaped(a, b)) {
_Sub(a, b, c, beta);
return;
}
if (a->devID >= 0 || b->devID >= 0 || c->devID >= 0) {
if (a->devID >= 0 || b->devID >= 0 || c->devID >= 0) {
#ifdef USE_CUDA
_CudaSubDim(a, b, c, n, beta);
_CudaSubDim(a, b, c, n, beta);
#else
ShowNTErrors("Please specify USE_CUDA and recompile the code!");
ShowNTErrors("Please specify USE_CUDA and recompile the code!");
#endif
}
else {
int stride = 1;
int blockSize = a->dimSize[n];
int blockNum = 1;
for (int i = a->order - 1; i >= 0; i--) {
if (i > n)
stride *= a->dimSize[i];
else if (i < n)
blockNum *= a->dimSize[i];
}
if (a->dataType == DEFAULT_DTYPE) {
int num = a->unitNum;
if (stride > 1) {
for (int i = 0, j = 0; i < num; i += stride, j++) {
DTYPE * ap = (DTYPE*)a->data + i;
DTYPE bv = *((DTYPE*)b->data + j % blockSize) * beta;
DTYPE * cp = (DTYPE*)c->data + i;
for (int k = 0; k < stride; k++)
cp[k] = ap[k] - bv;
}
}
else if (stride == 1) {
DTYPE * bp = (DTYPE*)b->data;
for (int i = 0; i < num; i += blockSize) {
DTYPE * ap = (DTYPE*)a->data + i;
DTYPE * cp = (DTYPE*)c->data + i;
if (beta == 1.0F) {
for (int j = 0; j < blockSize; j++)
cp[j] = ap[j] - bp[j];
}
else {
for (int j = 0; j < blockSize; j++)
cp[j] = ap[j] - bp[j] * beta;
}
}
}
else {
ShowNTErrors("Something is wrong!");
}
}
else {
ShowNTErrors("TODO!");
}
}
}
else {
int stride = 1;
int blockSize = a->dimSize[n];
int blockNum = 1;
for (int i = a->order - 1; i >= 0; i--) {
if (i > n)
stride *= a->dimSize[i];
else if (i < n)
blockNum *= a->dimSize[i];
}
if (a->dataType == DEFAULT_DTYPE) {
int num = a->unitNum;
if (stride > 1) {
for (int i = 0, j = 0; i < num; i += stride, j++) {
DTYPE * ap = (DTYPE*)a->data + i;
DTYPE bv = *((DTYPE*)b->data + j % blockSize) * beta;
DTYPE * cp = (DTYPE*)c->data + i;
for (int k = 0; k < stride; k++)
cp[k] = ap[k] - bv;
}
}
else if (stride == 1) {
DTYPE * bp = (DTYPE*)b->data;
for (int i = 0; i < num; i += blockSize) {
DTYPE * ap = (DTYPE*)a->data + i;
DTYPE * cp = (DTYPE*)c->data + i;
if (beta == 1.0F) {
for (int j = 0; j < blockSize; j++)
cp[j] = ap[j] - bp[j];
}
else {
for (int j = 0; j < blockSize; j++)
cp[j] = ap[j] - bp[j] * beta;
}
}
}
else {
ShowNTErrors("Something is wrong!");
}
}
else {
ShowNTErrors("TODO!");
}
}
}
/*
......@@ -136,7 +136,7 @@ i.e., a is subtracted with b by broadcasting
*/
void _SubDim(XTensor * a, const XTensor * b, int n, DTYPE beta)
{
_SubDim(a, b, a, n, beta);
_SubDim(a, b, a, n, beta);
}
/*
......@@ -155,20 +155,20 @@ i.e., a is subtracted with b by broadcasting
*/
XTensor SubDim(const XTensor &a, const XTensor &b, int n, DTYPE beta)
{
XTensor c(&a);
c.SetTMPFlag();
XTensor c(&a);
c.SetTMPFlag();
n = MODX(n, a.order);
/* call _Sub function */
_SubDim(&a, &b, &c, n, beta);
/* call _Sub function */
_SubDim(&a, &b, &c, n, beta);
/* tensor connections */
XLink::MakeLink(&a, &b, &c, MATH_SUBDIM);
XLink::AddParamToHeadInt(&c, n);
XLink::AddParamToHead(&c, beta);
/* tensor connections */
XLink::MakeLink(&a, &b, &c, MATH_SUBDIM);
XLink::AddParamToHeadInt(&c, n);
XLink::AddParamToHead(&c, beta);
return c;
return c;
}
/*
......
......@@ -39,25 +39,25 @@ where a is a tensor and b is a row vector
*/
template <class T, bool betaFired>
__global__
void KernelSubWithRow(T * a, T * b, T * c, int rowNum, int colNum, T beta)
void KernelSubWithRow(T * a, T * b, T * c, int rowNum, int colNum, T beta)
{
__shared__ T bv[MAX_CUDA_THREAD_NUM_PER_BLOCK];
int col = blockDim.x * blockIdx.x + threadIdx.x;
int row = blockDim.y * blockIdx.y + threadIdx.y;
__shared__ T bv[MAX_CUDA_THREAD_NUM_PER_BLOCK];
int col = blockDim.x * blockIdx.x + threadIdx.x;
int row = blockDim.y * blockIdx.y + threadIdx.y;
if (col >= colNum || row >= rowNum)
return;
if (col >= colNum || row >= rowNum)
return;
if (threadIdx.y == 0)
bv[threadIdx.x] = b[col];
if (threadIdx.y == 0)
bv[threadIdx.x] = b[col];
__syncthreads();
__syncthreads();
int offset = colNum * row + col;
if (betaFired)
c[offset] = a[offset] - bv[threadIdx.x] * beta;
else
c[offset] = a[offset] - bv[threadIdx.x];
int offset = colNum * row + col;
if (betaFired)
c[offset] = a[offset] - bv[threadIdx.x] * beta;
else
c[offset] = a[offset] - bv[threadIdx.x];
}
/*
......@@ -75,30 +75,30 @@ where a is a tensor and b is a colum vector
*/
template <class T, bool betaFired>
__global__
void KernelSubWithCol(T * a, T * b, T * c, int rowNum, int colNum, int blockSize, int blockNum, T beta)
void KernelSubWithCol(T * a, T * b, T * c, int rowNum, int colNum, int blockSize, int blockNum, T beta)
{
__shared__ T bv[MAX_CUDA_THREAD_NUM_PER_BLOCK];
__shared__ T bv[MAX_CUDA_THREAD_NUM_PER_BLOCK];
int colIndex = blockDim.x * blockIdx.x + threadIdx.x;
int row = blockDim.y * blockIdx.y + threadIdx.y;
int colIndex = blockDim.x * blockIdx.x + threadIdx.x;
int row = blockDim.y * blockIdx.y + threadIdx.y;
int col = colIndex % colNum;
int block = colIndex / colNum;
int col = colIndex % colNum;
int block = colIndex / colNum;
if (row >= rowNum || block >= blockNum)
return;
if (row >= rowNum || block >= blockNum)
return;
if (threadIdx.x == 0)
bv[threadIdx.y] = b[row];
if (threadIdx.x == 0)
bv[threadIdx.y] = b[row];
__syncthreads();
__syncthreads();
int offset = block * blockSize + row * colNum + col;
int offset = block * blockSize + row * colNum + col;
if (betaFired)
c[offset] = a[offset] - bv[threadIdx.y] * beta;
else
c[offset] = a[offset] - bv[threadIdx.y];
if (betaFired)
c[offset] = a[offset] - bv[threadIdx.y] * beta;
else
c[offset] = a[offset] - bv[threadIdx.y];
}
/*
......@@ -116,63 +116,63 @@ i.e., a is subtracted with b by broadcasting
*/
void _CudaSubDim(const XTensor * a, const XTensor * b, XTensor * c, int n, DTYPE beta)
{
CheckNTErrors(a && b && c, "Empty tensor input!");
CheckNTErrors(a->unitNum == c->unitNum, "Unmatched tensors in subtraction!");
CheckNTErrors(a->dataType == b->dataType && a->dataType == c->dataType,
"Unmatched data types in subtraction!");
CheckNTErrors(a->order == c->order, "The input tensors do not have the same order in subtraction!");
CheckNTErrors(!a->isSparse && !b->isSparse && !c->isSparse, "Dense tensors are required!");
CheckNTErrors(a->dimSize[n] == b->unitNum, "Wrong tensor size!");
int stride = 1;
int blockSize = a->dimSize[n];
int blockNum = 1;
for (int i = a->order - 1; i >= 0; i--) {
if (i > n)
stride *= a->dimSize[i];
else if (i < n)
blockNum *= a->dimSize[i];
}
int cudaGrids[3];
int cudaBlocks[3];
int devIDBackup = 0;
ProtectCudaDev(a->devID, devIDBackup);
if (a->dataType == DEFAULT_DTYPE) {
if (stride > 1) {
GDevs.GetCudaThread2D(a->devID, stride * blockNum, blockSize, MAX_INT, cudaGrids, cudaBlocks);
if (beta == (DTYPE)1.0F)
KernelSubWithCol<DTYPE, false> <<<dim3(cudaGrids[0], cudaGrids[1]), dim3(cudaBlocks[0], cudaBlocks[1])>>>
((DTYPE*)a->data, (DTYPE*)b->data, (DTYPE*)c->data,
blockSize, stride, blockSize * stride, blockNum, beta);
else
KernelSubWithCol<DTYPE, true> <<<dim3(cudaGrids[0], cudaGrids[1]), dim3(cudaBlocks[0], cudaBlocks[1])>>>
((DTYPE*)a->data, (DTYPE*)b->data, (DTYPE*)c->data,
blockSize, stride, blockSize * stride, blockNum, beta);
}
else if (stride == 1) {
GDevs.GetCudaThread2D(a->devID, blockSize, blockNum, MAX_INT, cudaGrids, cudaBlocks);
if (beta == (DTYPE)1.0F)
KernelSubWithRow<DTYPE, false> <<<dim3(cudaGrids[0], cudaGrids[1]), dim3(cudaBlocks[0], cudaBlocks[1]) >> >
((DTYPE*)a->data, (DTYPE*)b->data, (DTYPE*)c->data,
blockNum, blockSize, beta);
else
KernelSubWithRow<DTYPE, true> <<<dim3(cudaGrids[0], cudaGrids[1]), dim3(cudaBlocks[0], cudaBlocks[1]) >> >
((DTYPE*)a->data, (DTYPE*)b->data, (DTYPE*)c->data,
blockNum, blockSize, beta);
}
else {
ShowNTErrors("Something is wrong!");
}
}
else {
ShowNTErrors("TODO!");
}
BacktoCudaDev(a->devID, devIDBackup);
CheckNTErrors(a && b && c, "Empty tensor input!");
CheckNTErrors(a->unitNum == c->unitNum, "Unmatched tensors in subtraction!");
CheckNTErrors(a->dataType == b->dataType && a->dataType == c->dataType,
"Unmatched data types in subtraction!");
CheckNTErrors(a->order == c->order, "The input tensors do not have the same order in subtraction!");
CheckNTErrors(!a->isSparse && !b->isSparse && !c->isSparse, "Dense tensors are required!");
CheckNTErrors(a->dimSize[n] == b->unitNum, "Wrong tensor size!");
int stride = 1;
int blockSize = a->dimSize[n];
int blockNum = 1;
for (int i = a->order - 1; i >= 0; i--) {
if (i > n)
stride *= a->dimSize[i];
else if (i < n)
blockNum *= a->dimSize[i];
}
int cudaGrids[3];
int cudaBlocks[3];
int devIDBackup = 0;
ProtectCudaDev(a->devID, devIDBackup);
if (a->dataType == DEFAULT_DTYPE) {
if (stride > 1) {
GDevs.GetCudaThread2D(a->devID, stride * blockNum, blockSize, MAX_INT, cudaGrids, cudaBlocks);
if (beta == (DTYPE)1.0F)
KernelSubWithCol<DTYPE, false> <<<dim3(cudaGrids[0], cudaGrids[1]), dim3(cudaBlocks[0], cudaBlocks[1])>>>
((DTYPE*)a->data, (DTYPE*)b->data, (DTYPE*)c->data,
blockSize, stride, blockSize * stride, blockNum, beta);
else
KernelSubWithCol<DTYPE, true> <<<dim3(cudaGrids[0], cudaGrids[1]), dim3(cudaBlocks[0], cudaBlocks[1])>>>
((DTYPE*)a->data, (DTYPE*)b->data, (DTYPE*)c->data,
blockSize, stride, blockSize * stride, blockNum, beta);
}
else if (stride == 1) {
GDevs.GetCudaThread2D(a->devID, blockSize, blockNum, MAX_INT, cudaGrids, cudaBlocks);
if (beta == (DTYPE)1.0F)
KernelSubWithRow<DTYPE, false> <<<dim3(cudaGrids[0], cudaGrids[1]), dim3(cudaBlocks[0], cudaBlocks[1]) >> >
((DTYPE*)a->data, (DTYPE*)b->data, (DTYPE*)c->data,
blockNum, blockSize, beta);
else
KernelSubWithRow<DTYPE, true> <<<dim3(cudaGrids[0], cudaGrids[1]), dim3(cudaBlocks[0], cudaBlocks[1]) >> >
((DTYPE*)a->data, (DTYPE*)b->data, (DTYPE*)c->data,
blockNum, blockSize, beta);
}
else {
ShowNTErrors("Something is wrong!");
}
}
else {
ShowNTErrors("TODO!");
}
BacktoCudaDev(a->devID, devIDBackup);
}
#endif
......
......@@ -132,6 +132,19 @@ void _SumMe(XTensor * a, const XTensor * b, DTYPE beta)
_Sum(a, b, a, beta);
}
/*
tensor summation a = a + b * \beta (do it on site)
keep the result in the tensor a and return nothing
>> a - a tensor
>> b - another tensor
>> beta - the scaling factor
*/
void SumMe(XTensor& a, const XTensor& b, DTYPE beta)
{
_Sum(&a, &b, &a, beta);
}
/*
return a dimension if the sum is performed as SumDim (in more details in SumDim.h)
>> a - a tensor
......
......@@ -34,6 +34,7 @@ tensor summation a = a + b * \beta
keep the result in the input tensor a and return nothing
*/
void _SumMe(XTensor * a, const XTensor * b, DTYPE beta = (DTYPE)1.0);
void SumMe(XTensor & a, const XTensor & b, DTYPE beta = (DTYPE)1.0);
/*
tensor summation c = a + b * \beta
......
......@@ -48,12 +48,12 @@ void _MatrixMULCPU(const XTensor * a, MATRIX_TRANS_TYPE transposedA,
CheckNTErrors((c->dataType == DEFAULT_DTYPE), "TODO!");
#if defined(USE_BLAS)
int an = a->dimSize[0];
int an = a->dimSize[0];
int am = a->dimSize[1];
int bn = b->dimSize[0];
int bm = b->dimSize[1];
int cn = c->dimSize[0];
int cm = c->dimSize[1];
int bn = b->dimSize[0];
int bm = b->dimSize[1];
int cn = c->dimSize[0];
int cm = c->dimSize[1];
if (transposedA == X_NOTRANS && transposedB == X_NOTRANS)
GEMM(CblasRowMajor, CblasNoTrans, CblasNoTrans, cn, cm, am, alpha, (DTYPE*)a->data, am, (DTYPE*)b->data, bm, beta, (DTYPE*)c->data, cm);
......
......@@ -165,7 +165,7 @@ SIMPLE_BINARY_FUNCTION(Shift, _Shift, MATH_SHIFT)
SIMPLE_BINARY_FUNCTION_VOID(Shift, _Shift, MATH_SHIFT)
_SIMPLE_BINARY_FUNCTION_INT(_Mod, _CudaMod, mod)
SIMPLE_BINARY_FUNCTION_ME_INT(_ModMe, _Mod)
SIMPLE_BINARY_FUNCTION_ME_INT(ModMe, _Mod)
SIMPLE_BINARY_FUNCTION_INT(Mod, _Mod)
#else
......
......@@ -37,9 +37,16 @@ void _Scale(const XTensor * a, XTensor * b, float scale);
scale up tensor entires (on site)
b = a * scale
*/
void _ScaleMe(XTensor & a, int scale);
void _ScaleMe(XTensor & a, float scale);
void _ScaleMe(XTensor * a, int scale);
void _ScaleMe(XTensor * a, float scale);
/*
scale up tensor entires (on site)
b = a * scale
*/
void ScaleMe(XTensor & a, int scale);
void ScaleMe(XTensor & a, float scale);
/*
scale up tensor entires
b = a * scale
......@@ -64,8 +71,15 @@ void _Descale(const XTensor * a, XTensor * b, float scale);
descale tensor entires (on site)
b = a / scale
*/
void _DescaleMe(XTensor & a, int scale);
void _DescaleMe(XTensor & a, float scale);
void _DescaleMe(XTensor * a, int scale);
void _DescaleMe(XTensor * a, float scale);
/*
descale tensor entires (on site)
b = a / scale
*/
void DescaleMe(XTensor & a, int scale);
void DescaleMe(XTensor & a, float scale);
/*
descale tensor entires
......@@ -91,8 +105,15 @@ void _Shift(const XTensor * a, XTensor * b, float shift);
shift tensor entires (on site)
b = a + shift
*/
void _ShiftMe(XTensor & a, int shift);
void _ShiftMe(XTensor & a, float shift);
void _ShiftMe(XTensor * a, int shift);
void _ShiftMe(XTensor * a, float shift);
/*
shift tensor entires (on site)
b = a + shift
*/
void ShiftMe(XTensor & a, int shift);
void ShiftMe(XTensor & a, float shift);
/*
shift tensor entires
......@@ -118,7 +139,13 @@ void _Mod(const XTensor * a, XTensor * b, int base);
mod tensor entires (on site)
b = a % mod
*/
void _ModMe(XTensor & a, int base);
void _ModMe(XTensor * a, int base);
/*
mod tensor entires (on site)
b = a % mod
*/
void ModMe(XTensor & a, int base);
/*
mod tensor entires
......
......@@ -36,26 +36,26 @@ set every entry to its clip value
void _Clip(const XTensor * a, XTensor * b, DTYPE lower, DTYPE upper)
{
#ifdef USE_CUDA
/* run it on GPUs */
if (a->devID >= 0) {
_CudaClip(a, b, lower, upper);
return;
}
/* run it on GPUs */
if (a->devID >= 0) {
_CudaClip(a, b, lower, upper);
return;
}
#endif
CheckNTErrors((XTensor::IsSameShaped(a, b)), "Input tensors should have the same type!");
CheckNTErrors((a->dataType == DEFAULT_DTYPE), "TODO!");
DTYPE * d = (DTYPE*)a->data;
DTYPE * db = (DTYPE*)b->data;
for (int i = 0; i < a->unitNum; i++) {
if (d[i] > upper)
db[i] = upper;
else if (d[i] < lower)
db[i] = lower;
else
db[i] = d[i];
}
CheckNTErrors((XTensor::IsSameShaped(a, b)), "Input tensors should have the same type!");
CheckNTErrors((a->dataType == DEFAULT_DTYPE), "TODO!");
DTYPE * d = (DTYPE*)a->data;
DTYPE * db = (DTYPE*)b->data;
for (int i = 0; i < a->unitNum; i++) {
if (d[i] > upper)
db[i] = upper;
else if (d[i] < lower)
db[i] = lower;
else
db[i] = d[i];
}
}
/*
......@@ -67,7 +67,19 @@ keep the result in the input tensor a and return nothing
*/
void _ClipMe(XTensor * a, DTYPE lower, DTYPE upper)
{
_Clip(a, a, lower, upper);
_Clip(a, a, lower, upper);
}
/*
set every entry to its clip value (do it on site)
keep the result in the input tensor a and return nothing
>> a - the tensor we are processing
>> lower - the lower border
>> upper - the upper border
*/
void ClipMe(XTensor& a, DTYPE lower, DTYPE upper)
{
_Clip(&a, &a, lower, upper);
}
/*
......@@ -80,18 +92,18 @@ make a new tensor to keep the result and return it
*/
XTensor Clip(const XTensor & a, DTYPE lower, DTYPE upper)
{
XTensor b(&a);
b.SetTMPFlag();
XTensor b(&a);
b.SetTMPFlag();
/* call _Clip function */
_Clip(&a, &b, lower, upper);
/* call _Clip function */
_Clip(&a, &b, lower, upper);
/* tensor connections */
XLink::MakeLink(&a, NULL, &b, MATH_CLIP);
XLink::AddParamToHead(&b, lower);
XLink::AddParamToHead(&b, upper);
/* tensor connections */
XLink::MakeLink(&a, NULL, &b, MATH_CLIP);
XLink::AddParamToHead(&b, lower);
XLink::AddParamToHead(&b, upper);
return b;
return b;
}
void Clip(const XTensor & a, XTensor & b, DTYPE lower, DTYPE upper)
......
......@@ -36,18 +36,18 @@ set each entry to its clip value (CUDA Kernel)
>> size - size of the data array
*/
__global__
void KernelClip(DTYPE * a, DTYPE * b, DTYPE lower, DTYPE upper, int size)
void KernelClip(DTYPE * a, DTYPE * b, DTYPE lower, DTYPE upper, int size)
{
int i = blockDim.x * blockIdx.x + threadIdx.x;
if (i < size) {
if (a[i] > upper)
b[i] = upper;
else if (a[i] < lower)
b[i] = lower;
else
b[i] = a[i];
}
int i = blockDim.x * blockIdx.x + threadIdx.x;
if (i < size) {
if (a[i] > upper)
b[i] = upper;
else if (a[i] < lower)
b[i] = lower;
else
b[i] = a[i];
}
}
/*
......@@ -62,7 +62,7 @@ This is for float16 computation
__global__
void KernelClip(__half * a, __half * b, DTYPE lower, DTYPE upper, int size)
{
return;
return;
}
/*
......@@ -74,31 +74,31 @@ set each entry to its clip value
*/
void _CudaClip(const XTensor * a, XTensor * b, DTYPE lower, DTYPE upper)
{
CheckNTErrors((XTensor::IsSameShaped(a, b)), "Input tensors should have the same type!");
CheckNTErrors((a->isSparse == false), "TODO!");
CheckNTErrors((XTensor::IsSameShaped(a, b)), "Input tensors should have the same type!");
CheckNTErrors((a->isSparse == false), "TODO!");
int gridSize[3];
int blockSize[3];
int gridSize[3];
int blockSize[3];
GDevs.GetCudaThread(a->devID, a->unitNum, gridSize, blockSize);
GDevs.GetCudaThread(a->devID, a->unitNum, gridSize, blockSize);
dim3 blocks(gridSize[0]);
dim3 threads(blockSize[0]);
dim3 blocks(gridSize[0]);
dim3 threads(blockSize[0]);
int devIDBackup;
ProtectCudaDev(a->devID, devIDBackup);
int devIDBackup;
ProtectCudaDev(a->devID, devIDBackup);
if (a->dataType == DEFAULT_DTYPE) {
KernelClip << <blocks, threads >> >((DTYPE*)a->data, (DTYPE*)b->data, lower, upper, a->unitNum);
}
else if (a->dataType == X_FLOAT16) {
KernelClip << <blocks, threads >> >((__half*)a->data, (__half*)b->data, lower, upper, a->unitNum);
}
else {
ShowNTErrors("TODO!");
}
if (a->dataType == DEFAULT_DTYPE) {
KernelClip << <blocks, threads >> >((DTYPE*)a->data, (DTYPE*)b->data, lower, upper, a->unitNum);
}
else if (a->dataType == X_FLOAT16) {
KernelClip << <blocks, threads >> >((__half*)a->data, (__half*)b->data, lower, upper, a->unitNum);
}
else {
ShowNTErrors("TODO!");
}
BacktoCudaDev(a->devID, devIDBackup);
BacktoCudaDev(a->devID, devIDBackup);
}
/*
......
......@@ -33,6 +33,10 @@ void _Clip(const XTensor * a, XTensor * b, DTYPE lower, DTYPE upper);
keep the result in the input tensor a and return nothing */
void _ClipMe(XTensor * a, DTYPE lower, DTYPE upper);
/* set every entry to its clip value (do it on site)
keep the result in the input tensor a and return nothing */
void ClipMe(XTensor & a, DTYPE lower, DTYPE upper);
/* set every entry to its clip value (return an XTensor structure)
make a new tensor to keep the result and return it */
XTensor Clip(const XTensor & a, DTYPE lower, DTYPE upper);
......
......@@ -32,6 +32,9 @@ void _Equal(const XTensor * a, XTensor * b, DTYPE value);
/* check whether every entry is equal to the given value (do it on site) */
void _EqualMe(XTensor * a, DTYPE value);
/* check whether every entry is equal to the given value (do it on site) */
void EqualMe(XTensor & a, DTYPE value);
/* check whether every entry is equal to the given value (return an XTensor structure) */
XTensor Equal(const XTensor & a, DTYPE value);
......@@ -41,6 +44,9 @@ void _NotEqual(const XTensor * a, XTensor * b, DTYPE value);
/* check whether every entry is not equal to the given value (do it on site) */
void _NotEqualMe(XTensor * a, DTYPE value);
/* check whether every entry is not equal to the given value (do it on site) */
void NotEqualMe(XTensor & a, DTYPE value);
/* check whether every entry is not equal to the given value (return an XTensor structure) */
XTensor NotEqual(const XTensor & a, DTYPE value);
......
......@@ -44,7 +44,7 @@ where a and b are the scalar and bias respectively, and \epsilon is the adjustme
*/
void _Normalize(const XTensor * input, XTensor * output, int dim, const XTensor * mean, const XTensor * var, const XTensor * a, const XTensor * b, DTYPE epsilon)
{
int dimRDI = input->order - dim - 1;
int dimRDI = input->order - dim - 1;
CheckNTErrors((XTensor::IsSameShaped(input, output)), "Unmatched input tensors!");
CheckNTErrors((XTensor::IsSameShaped(a, b)), "Unmatched input tensors");
CheckNTErrors((XTensor::IsSameShaped(mean, var)), "Unmatched input tensors");
......@@ -113,6 +113,27 @@ void _NormalizeMe(XTensor * input, int dim, const XTensor * mean, const XTensor
{
_Normalize(input, input, dim, mean, var, a, b, epsilon);
}
/*
normalized the data with normal distribution (do it on site)
keep the result in the input tensor and return nothing
For an input x, x = a * (x-mean)/sqrt(variance+\epsilon) + b
where a and b are the scalar and bias respectively, and \epsilon is the adjustment parameter.
>> input - the input tensor
>> dim - dimension alone which we generate the mean and variance
>> mean - the mean of the input
>> var - the variance of the input
>> a - the scalar
>> b - the bias
>> epsilon - a parameter
*/
void NormalizeMe(XTensor& input, int dim, const XTensor& mean, const XTensor& var, const XTensor& a, const XTensor& b, DTYPE epsilon)
{
_Normalize(&input, &input, dim, &mean, &var, &a, &b, epsilon);
}
/*
normalized the data with normal distribution (return an XTensor structure)
make a new tensor to keep the result and return it
......
......@@ -95,8 +95,8 @@ void _CudaNormalize(const XTensor * input, XTensor * output, int dim,
{
CheckNTErrors((input->dataType == DEFAULT_DTYPE), "TODO!");
int dimRDI = input->order - dim - 1;
int stride = 1;
int dimRDI = input->order - dim - 1;
int stride = 1;
int strideNum = input->dimSizeRDI[dimRDI];
int blockNum = 1;
for (int i = 0; i < input->order; i++) {
......
......@@ -42,6 +42,14 @@ where a and b are the scalar and bias respectively, and \epsilon is the adjustme
void _NormalizeMe(XTensor * input, int dim, const XTensor * mean, const XTensor * var, const XTensor * a, const XTensor * b, DTYPE epsilon);
/*
normalized the data with normal distribution (do it on site)
keep the result in the input tenosr and return nothing
For an input x, x = a * (x-mean)/sqrt(variance+\epsilon) + b
where a and b are the scalar and bias respectively, and \epsilon is the adjustment parameter.
*/
void NormalizeMe(XTensor & input, int dim, const XTensor & mean, const XTensor & var, const XTensor & a, const XTensor & b, DTYPE epsilon);
/*
normalized the data with normal distribution (return an XTensor structure)
make a new tensor to keep the result and return it
For an input x, y = a * (x-mean)/sqrt(variance+\epsilon) + b
......
......@@ -81,6 +81,17 @@ void _PowerMe(XTensor * a, DTYPE p)
}
/*
get the power(a, p) (do it on site)
keep the result in the input tensor a and return nothing
>> a - the tensor
>> p - parameter
*/
void PowerMe(XTensor& a, DTYPE p)
{
_Power(&a, &a, p);
}
/*
get the power(a, p) (return an XTensor structure)
make a new tensor to keep the result and return it
>> a - input tensor
......
......@@ -36,6 +36,12 @@ keep the result in the input tensor a and return nothing
void _PowerMe(XTensor * a, DTYPE p);
/*
get the power(x, y) (do it on site)
keep the result in the input tensor a and return nothing
*/
void PowerMe(XTensor & a, DTYPE p);
/*
get the power(x, y) (return an XTensor structure)
make a new tensor to keep the result and return it
*/
......
......@@ -92,6 +92,21 @@ void _ScaleAndShiftMe(XTensor * a, DTYPE scale, DTYPE shift)
}
/*
scale and shift all tensor entires (do it on site)
keep the result in the input tensor a and return nothing
a = a * scale + shift
>> a - the input/output tensor
>> scale - the scaler factor
>> shift - the shift factor
*/
void ScaleAndShiftMe(XTensor& a, DTYPE scale, DTYPE shift)
{
_ScaleAndShift(&a, &a, scale, shift);
}
/*
scale and shift all tensor entires (return an XTensor structure)
make a new tensor to keep the result and return it
......
......@@ -45,6 +45,13 @@ void _ScaleAndShiftMe(XTensor * a, DTYPE scale, DTYPE shift = 0);
/*
scale and shift all tensor entires
keep the result in the input tensor a and return nothing
a = a * scale + shift
*/
void ScaleAndShiftMe(XTensor & a, DTYPE scale, DTYPE shift = 0);
/*
scale and shift all tensor entires
make a new tensor to keep the result and return it
b = a * scale + shift
*/
......
......@@ -34,7 +34,7 @@ DTYPE square(DTYPE x)
DTYPE round(DTYPE r)
{
return (r > 0.0) ? (DTYPE)floor(r + 0.5) : (DTYPE)ceil(r - 0.5);
return (r > 0.0) ? (DTYPE)floor(r + 0.5) : (DTYPE)ceil(r - 0.5);
}
DTYPE isnonzero(DTYPE r)
......
......@@ -38,7 +38,7 @@ DTYPE cudasquare(DTYPE x)
__device__
DTYPE cudaround(DTYPE r)
{
return (r > 0.0) ? (DTYPE)floor(r + 0.5) : (DTYPE)ceil(r - 0.5);
return (r > 0.0) ? (DTYPE)floor(r + 0.5) : (DTYPE)ceil(r - 0.5);
}
__device__
......
......@@ -31,6 +31,9 @@ void _Absolute(const XTensor * a, XTensor * b);
/* set every entry to its absolute value (do it on site)
keep the result in the input tensor a and return nothing */
void _AbsoluteMe(XTensor * a);
/* set every entry to its absolute value (do it on site)
keep the result in the input tensor a and return nothing */
void AbsoluteMe(XTensor & a);
/* set every entry to its absolute value (return an XTensor structure)
make a new tensor to keep the result and return it */
XTensor Absolute(const XTensor & a);
......@@ -42,6 +45,9 @@ void _Ceil(const XTensor * a, XTensor * b);
/* set every entry to its ceil value (do it on site)
keep the result in the input tensor a and return nothing */
void _CeilMe(XTensor * a);
/* set every entry to its ceil value (do it on site)
keep the result in the input tensor a and return nothing */
void CeilMe(XTensor & a);
/* set every entry to its ceil value (return an XTensor structure)
make a new tensor to keep the result and return it */
XTensor Ceil(const XTensor & a);
......@@ -53,6 +59,9 @@ void _Exp(const XTensor * a, XTensor * b);
/* set every entry to its exponent value (do it on site)
keep the result in the input tensor a and return nothing */
void _ExpMe(XTensor * a);
/* set every entry to its exponent value (do it on site)
keep the result in the input tensor a and return nothing */
void ExpMe(XTensor & a);
/* set every entry to its exponent value (return an XTensor structure)
make a new tensor to keep the result and return it */
XTensor Exp(const XTensor & a);
......@@ -64,6 +73,9 @@ void _Floor(const XTensor * a, XTensor * b);
/* set every entry to its floor value (do it on site)
keep the result in the input tensor a and return nothing */
void _FloorMe(XTensor * a);
/* set every entry to its floor value (do it on site)
keep the result in the input tensor a and return nothing */
void FloorMe(XTensor & a);
/* set every entry to its floor value (return an XTensor structure)
make a new tensor to keep the result and return it */
XTensor Floor(const XTensor & a);
......@@ -75,6 +87,9 @@ void _IsNonZero(const XTensor *a, XTensor *b);
/* if source entry is non-zero, set target entry to be one, otherwise zero (do it on site)
keep the result in the input tensor a and return nothing */
void _IsNonZeroMe(XTensor *a);
/* if source entry is non-zero, set target entry to be one, otherwise zero (do it on site)
keep the result in the input tensor a and return nothing */
void IsNonZeroMe(XTensor &a);
/* if source entry is non-zero, set target entry to be one, otherwise zero (return an XTensor structure)
make a new tensor to keep the result and return it */
XTensor IsNonZero(const XTensor &a);
......@@ -86,6 +101,9 @@ void _IsZero(const XTensor *a, XTensor *b);
/* if source entry is zero, set target entry to be one, otherwise zero (do it on site)
keep the result in the input tensor a and return nothing */
void _IsZeroMe(XTensor *a);
/* if source entry is zero, set target entry to be one, otherwise zero (do it on site)
keep the result in the input tensor a and return nothing */
void IsZeroMe(XTensor &a);
/* if source entry is zero, set target entry to be one, otherwise zero (return an XTensor structure)
make a new tensor to keep the result and return it */
XTensor IsZero(const XTensor &a);
......@@ -97,6 +115,9 @@ void _Log(const XTensor * a, XTensor * b);
/* set every entry to its logarithm value (do it on site)
keep the result in the input tensor a and return nothing */
void _LogMe(XTensor * a);
/* set every entry to its logarithm value (do it on site)
keep the result in the input tensor a and return nothing */
void LogMe(XTensor & a);
/* set every entry to its logarithm value (return an XTensor structure)
make a new tensor to keep the result and return it */
XTensor Log(const XTensor & a);
......@@ -108,6 +129,9 @@ void _Round(const XTensor * a, XTensor * b);
/* set every entry to its round value (do it on site)
keep the result in the input tensor a and return nothing */
void _RoundMe(XTensor * a);
/* set every entry to its round value (do it on site)
keep the result in the input tensor a and return nothing */
void RoundMe(XTensor & a);
/* set every entry to its round value (return an XTensor structure)
make a new tensor to keep the result and return it */
XTensor Round(const XTensor & a);
......@@ -119,6 +143,9 @@ void _Sqrt(const XTensor * a, XTensor * b);
/* set every entry to its sqrt value (do it on site)
keep the result in the input tensor a and return nothing */
void _SqrtMe(XTensor * a);
/* set every entry to its sqrt value (do it on site)
keep the result in the input tensor a and return nothing */
void SqrtMe(XTensor & a);
/* set every entry to its sqrt value (return an XTensor structure)
make a new tensor to keep the result and return it */
XTensor Sqrt(const XTensor & a);
......@@ -130,6 +157,9 @@ void _Square(const XTensor * a, XTensor * b);
/* set every entry to its square value (do it on site)
keep the result in the input tensor a and return nothing */
void _SquareMe(XTensor * a);
/* set every entry to its square value (do it on site)
keep the result in the input tensor a and return nothing */
void SquareMe(XTensor & a);
/* set every entry to its square value (return an XTensor structure)
make a new tensor to keep the result and return it */
XTensor Square(const XTensor & a);
......@@ -142,6 +172,9 @@ void _Sin(const XTensor * a, XTensor * b);
/* set every entry to its sine value (do it on site)
keep the result in the input tensor a and return nothing */
void _SinMe(XTensor * a);
/* set every entry to its sine value (do it on site)
keep the result in the input tensor a and return nothing */
void SinMe(XTensor & a);
/* set every entry to its sine value (return an XTensor structure)
make a new tensor to keep the result and return it */
XTensor Sin(const XTensor & a);
......@@ -153,6 +186,9 @@ void _Cos(const XTensor * a, XTensor * b);
/* set every entry to its cosine value (do it on site)
keep the result in the input tensor a and return nothing */
void _CosMe(XTensor * a);
/* set every entry to its cosine value (do it on site)
keep the result in the input tensor a and return nothing */
void CosMe(XTensor & a);
/* set every entry to its cosine value (return an XTensor structure)
make a new tensor to keep the result and return it */
XTensor Cos(const XTensor & a);
......@@ -164,6 +200,9 @@ void _Tan(const XTensor * a, XTensor * b);
/* set every entry to its tangent value (do it on site)
keep the result in the input tensor a and return nothing */
void _TanMe(XTensor * a);
/* set every entry to its tangent value (do it on site)
keep the result in the input tensor a and return nothing */
void TanMe(XTensor & a);
/* set every entry to its tangent value (return an XTensor structure)
make a new tensor to keep the result and return it */
XTensor Tan(const XTensor & a);
......
......@@ -41,8 +41,8 @@ void _ReduceMax(const XTensor * input, XTensor * output, int dim)
CheckNTErrors((input->order == output->order + 1), "Incorrect tensor sizes!");
CheckNTErrors((input->order > dim && dim >=0), "Illegal dimension to reduce!");
CheckNTErrors((input->dataType == output->dataType), "Unmatched data types!");
int dimRDI = input->order - dim - 1;
int dimRDI = input->order - dim - 1;
CheckNTErrors(dimRDI >= 0, "Wrong dimension!");
for(int i = 0; i < input->order; i++){
......@@ -104,7 +104,7 @@ make a new tensor to keep the result and return it
XTensor ReduceMax(const XTensor &input, int dim)
{
CheckNTErrors(dim >= 0 && dim < input.order, "Illegal dimension to reduce!");
int order = input.order - 1;
int * dimSize = new int[order];
for(int i = 0; i < order; i++){
......
......@@ -504,7 +504,7 @@ void _CudaReduceMax(const XTensor * input, XTensor * output, int dim)
CheckNTErrors(input->order > dim && dim >=0, "Illegal dimension to reduce!");
CheckNTErrors(input->dataType == output->dataType, "Unmatched data types!");
int dimRDI = input->order - dim - 1;
int dimRDI = input->order - dim - 1;
for(int i = 0; i < input->order; i++){
if(i < dimRDI){
CheckNTErrors(input->dimSizeRDI[i] == output->dimSizeRDI[i], "Unmatched tensors!");
......
......@@ -39,7 +39,7 @@ void _ReduceMean(const XTensor * input, XTensor * output, int dim)
{
CheckNTErrors((input->order > dim), "Illegal dimension specified!");
int dimRDI = input->order - dim - 1;
int dimRDI = input->order - dim - 1;
int num = input->dimSizeRDI[dimRDI];
_ReduceSum(input, output, dim);
......@@ -59,7 +59,7 @@ For a 1-dimensional data array a, mean = (1/n) * sum_i input_i
XTensor ReduceMean(const XTensor &input, int dim)
{
CheckNTErrors(dim >= 0 && dim < input.order, "Illegal dimension to reduce!");
int order = input.order - 1;
int * dimSize = new int[order];
for(int i = 0; i < order; i++){
......
......@@ -50,7 +50,7 @@ void _ReduceSum(const XTensor * input, XTensor * output, int dim, const XTensor
CheckNTErrors((input->dataType == output->dataType), "Unmatched data types!");
CheckNTErrors((shift == NULL || XTensor::IsSameShaped(output, shift)), "Incorrect shift tensor size!");
int dimRDI = input->order - dim - 1;
int dimRDI = input->order - dim - 1;
CheckNTErrors(dimRDI >= 0, "Wrong dimension!");
for(int i = 0; i < input->order; i++){
......@@ -215,7 +215,7 @@ sum = \sum_i exp((a_i - shift)^power) if isExp == true
XTensor ReduceSum(const XTensor &input, int dim, const XTensor &shift, DTYPE power, bool isExp)
{
CheckNTErrors(dim >= 0 && dim < input.order, "Illegal dimension to reduce!");
int order = input.order - 1;
int * dimSize = new int[order];
for(int i = 0; i < order; i++){
......@@ -294,7 +294,7 @@ sum = \sum_i exp((a_i)^power) if isExp == true
XTensor ReduceSum(const XTensor &input, int dim, DTYPE power, bool isExp)
{
CheckNTErrors(dim >= 0 && dim < input.order, "Illegal dimension to reduce!");
int order = input.order - 1;
int * dimSize = new int[order];
for(int i = 0; i < order; i++){
......
......@@ -341,7 +341,7 @@ void KernelReduceSumFast(DTYPE * input, DTYPE * output,
if (tid < blockDim.x / 32)
value = data[tid];
else
value = 0;
value = 0;
value = shflDownReduceSum(value);
if (tid == 0 && blockIdx.x < reducedStrideNum) {
......@@ -692,7 +692,7 @@ void _CudaReduceSum(const XTensor * input, XTensor * output, int dim, const XTen
CheckNTErrors(input->dataType == output->dataType, "Unmatched data types!");
CheckNTErrors(shift == NULL || output->unitNum == shift->unitNum, "Incorrect shift tensor size!");
int dimRDI = input->order - dim - 1;
int dimRDI = input->order - dim - 1;
for(int i = 0; i < input->order; i++){
if(i < dimRDI){
CheckNTErrors(input->dimSizeRDI[i] == output->dimSizeRDI[i], "Unmatched tensors!");
......
......@@ -55,7 +55,7 @@ For a 1-dimensional data array a, sum = \sum_i (a_i - shift)^2
XTensor ReduceSumSquared(const XTensor &input, int dim, const XTensor &shift)
{
CheckNTErrors(dim >= 0 && dim < input.order, "Illegal dimension to reduce!");
int order = input.order - 1;
int * dimSize = new int[order];
for(int i = 0; i < order; i++){
......
......@@ -38,7 +38,7 @@ For a 1-dimensional data array a, variance = 1/n * \sum_i (a_i - mean)^2
*/
void _ReduceVariance(const XTensor * input, XTensor * output, int dim, const XTensor * mean)
{
int dimRDI = input->order - dim - 1;
int dimRDI = input->order - dim - 1;
int num = input->dimSizeRDI[dimRDI];
_ReduceSum(input, output, dim, mean, 2.0F);
_ScaleAndShiftMe(output, (DTYPE)1 / num, 0);
......@@ -58,7 +58,7 @@ For a 1-dimensional data array a, variance = 1/n * \sum_i (a_i - mean)^2
XTensor ReduceVariance(const XTensor &input, int dim, const XTensor &mean)
{
CheckNTErrors(dim >= 0 && dim < input.order, "Illegal dimension to reduce!");
int order = input.order - 1;
int * dimSize = new int[order];
for(int i = 0; i < order; i++){
......
......@@ -85,7 +85,7 @@ void _ConcatenateSolely(const TensorList * smalls, XTensor * big, int dim)
}
}
else {
StrList* sourceArrays = new StrList(smalls->count);
StrList* sourceArrays = new StrList(smalls->count);
int * blockSizes = new int[smalls->count];
for (int i = 0; i < smalls->count; i++) {
XTensor * tensor = (XTensor*)smalls->GetItem(i);
......
......@@ -41,6 +41,13 @@ a = permuted(a)
*/
void _PermuteMe(XTensor * a, int * dimPermute);
/*
permute the tensor dimensions (do it on site).
keep the result in the input tensor and return nothing.
a = permuted(a)
*/
void PermuteMe(XTensor &a, int * dimPermute);
/*
make a tensor with permuted dimensions (return an XTensor structure).
make a new tensor to keep the result and return it.
......
......@@ -43,9 +43,9 @@ XTensor Reshape(XTensor &s, int order, int * dimSize)
t.Reshape(order, dimSize);
/* tensor connections */
XLink::MakeLink(&s, NULL, &t, SHAPE_RESHAPE);
XLink::MakeLink(&s, NULL, &t, SHAPE_RESHAPE);
return t;
return t;
}
void Reshape(XTensor &s, XTensor &t, int order, int * dimSize)
......
......@@ -89,6 +89,20 @@ void _SqueezeMe(XTensor * source, int leadingDim)
}
/*
squeeze the tensor along the specified dimension (do it on site)
keep the result in the input tensor a and return nothing
>> source - the input tensor
>> leadingDim - the dimension that we would squeeze
if leadingDim = -1, squeeze all dimensions that are 1
else, squeeze the specified dimension
*/
void SqueezeMe(XTensor& source, int leadingDim)
{
_Squeeze(&source, &source, leadingDim);
}
/*
squeeze the tensor along the specified dimension (return an XTensor structure)
make a new tensor to keep the result and return it
......
......@@ -33,6 +33,10 @@ void _Squeeze(XTensor * source, XTensor * target, int leadingDim = -1);
keep the result in the input tensor a and return nothing */
void _SqueezeMe(XTensor * source, int leadingDim = -1);
/* squeeze the tensor along the specified dimension (do it on site)
keep the result in the input tensor a and return nothing */
void SqueezeMe(XTensor & source, int leadingDim = -1);
/* squeeze the tensor along the specified dimension (return an XTensor structure)
make a new tensor to keep the result and return it */
XTensor Squeeze(XTensor & source, int leadingDim = -1);
......
......@@ -45,7 +45,7 @@ void _Sort(const XTensor * a, XTensor * b, XTensor * index, int dim)
CheckNTErrors((a->order == index->order), "Unmatched input tensors!");
CheckNTErrors((index->dataType == X_INT), "Wrong data type!");
int dimRDI = a->order - dim - 1;
int dimRDI = a->order - dim - 1;
/* make the index tensor */
index->SetAscendingOrder(dim);
......@@ -67,7 +67,7 @@ void _Sort(const XTensor * a, XTensor * b, XTensor * index, int dim)
blockNum *= a->dimSizeRDI[i];
int blockSize = stride * strideNum;
_CopyValues(a, b);
_CopyValues(a, b);
for (int k = 0; k < blockNum; k++) {
for (int i = 0; i < stride; i++) {
void * dataB = (char*)b->data + (k * blockSize + i) * b->unitSize;
......@@ -98,6 +98,21 @@ void _SortMe(XTensor * a, XTensor * index, int dim)
}
/*
sort the tensor along a given dimension (do it on site)
keep the result in the input tensor a and return nothing
>> a - input tensor
>> index - index of the items in the resulting tensor
>> dim - the dimension along which the sorting is performed
*/
void SortMe(XTensor& a, XTensor& index, int dim)
{
_Sort(&a, &a, &index, dim);
}
/*
sort the tensor along a given dimension (return an XTensor structure)
make a new tensor to keep the result and return it
......
......@@ -217,7 +217,7 @@ void _CudaSortBig(const XTensor * a, XTensor * b, XTensor * indexA, XTensor * in
CheckNTErrors((a->order > dim && dim >= 0), "Incorrect dimension specified!");
CheckNTErrors((a->dataType == DEFAULT_DTYPE), "TODO!");
int dimRDI = a->order - dim - 1;
int dimRDI = a->order - dim - 1;
if (k < 0 || k > b->dimSizeRDI[dimRDI])
k = b->dimSizeRDI[dimRDI];
......
......@@ -35,6 +35,12 @@ keep the result in the input tensor a and return nothing
*/
void _SortMe(XTensor * a, XTensor * index, int dim);
/*
sort the data along a given dimension (do it on site)
keep the result in the input tensor a and return nothing
*/
void SortMe(XTensor & a, XTensor & index, int dim);
/*
sort the data along a given dimension (return an XTensor structure)
make a new tensor to keep the result and return it
......
......@@ -238,9 +238,9 @@ void KernelTopK(T * input, int stride, int strideNum, int blockNum, int k, T min
CudaXHeap<MIN_HEAP, T> heapFinal(k, k, heapData + k * threadIdx.y * blockDim.x);
/*
merge the result over the workers.
merge the result over the workers.
This can be improved by parallel merging
*/
*/
if (blockDim.x > 1) {
for (int p = 1; p < blockDim.x && p < strideNum; p++) {
CudaHeapNode<T> * hd = heapData + k * (threadIdx.y * blockDim.x + p);
......@@ -770,22 +770,22 @@ void KernelTopKRadixSelect(unsigned int * input, int stride, int strideNum,
/*
if (idx == 0)
{
unsigned int* uintOutput = new unsigned int;
int* tmpIndex = new int;
//*******************something worng***************************
cudaMalloc((void **)&uintOutput, sizeof(unsigned int)* k);
cudaMalloc((void **)&tmpIndex, sizeof(unsigned int)*k);
//*************************************************************
collectNumberOld(input, limit, k, desire, uintOutput, tmpIndex, stride, strideNum);
int blockIndex = idy / stride;
int offsetInBlock = idy% stride;
for (int i = stride * k * blockIndex + offsetInBlock, j = 0; j < k; j++, i += stride)
{
//for(int i = )
output[i] = deconvert(uintOutput[j]);
index[i] = tmpIndex[j];
}
unsigned int* uintOutput = new unsigned int;
int* tmpIndex = new int;
//*******************something worng***************************
cudaMalloc((void **)&uintOutput, sizeof(unsigned int)* k);
cudaMalloc((void **)&tmpIndex, sizeof(unsigned int)*k);
//*************************************************************
collectNumberOld(input, limit, k, desire, uintOutput, tmpIndex, stride, strideNum);
int blockIndex = idy / stride;
int offsetInBlock = idy% stride;
for (int i = stride * k * blockIndex + offsetInBlock, j = 0; j < k; j++, i += stride)
{
//for(int i = )
output[i] = deconvert(uintOutput[j]);
index[i] = tmpIndex[j];
}
}
__syncthreads();
*/
......
......@@ -67,8 +67,8 @@ void CudaSetAscendingOrder(XTensor * a, int dim)
{
CheckNTErrors((a->dataType == X_INT), "TODO!");
int dimRDI = a->order - dim - 1;
int stride = 1;
int dimRDI = a->order - dim - 1;
int stride = 1;
int strideNum = a->dimSizeRDI[dimRDI];
for(int i = 0; i < dimRDI; i++)
stride *= a->dimSizeRDI[i];
......
......@@ -56,7 +56,7 @@ void RunParallel2D(XPRunner * parallelRunner, void * job,
va_list ap;
va_start(ap, argNum);
for (int i = 0; i < argNum; i++) {
XTensor* p = va_arg(ap, XTensor*);
XTensor* p = va_arg(ap, XTensor*);
jobArgList->Add(p);
}
va_end(ap);
......@@ -77,19 +77,19 @@ void RunParallel2D(XPRunner * parallelRunner, void * job,
2. other arguments
*/
for (int i = 0; i < jobNum; i++) {
IntList* indexArgs = new IntList(4);
IntList* indexArgs = new IntList(4);
TensorList * blockArgs = new TensorList(argNum);
int * blockIndex = indexList + i * 4;
indexArgs->Add(blockIndex[0]);
indexArgs->Add(blockIndex[1]);
indexArgs->Add(blockIndex[2]);
indexArgs->Add(blockIndex[3]);
indexArgs->Add(blockIndex[0]);
indexArgs->Add(blockIndex[1]);
indexArgs->Add(blockIndex[2]);
indexArgs->Add(blockIndex[3]);
for (int j = 0; j < argNum; j++)
blockArgs->Add(jobArgList->GetItem(j));
args->Add((XTensor*)indexArgs);
args->Add((XTensor*)indexArgs);
args->Add((XTensor*)blockArgs);
jobs->Add((XTensor*)job);
......
......@@ -30,84 +30,84 @@ Set every entry to its absolute value.
*/
bool TestAbsolute1()
{
/* a tensor of size (3, 2) */
int order = 2;
int * dimSize = new int[order];
dimSize[0] = 3;
dimSize[1] = 2;
int unitNum = 1;
for (int i = 0; i < order; i++)
unitNum *= dimSize[i];
DTYPE aData[3][2] = { {1.0F, -2.0F},
{0.5F, -4.0F},
{0.0F, 6.0F} };
DTYPE answer[3][2] = { {1.0F, 2.0F},
{0.5F, 4.0F},
{0.0F, 6.0F} };
/* CPU test */
bool cpuTest = true;
/* create tensors */
XTensor * a = NewTensor(order, dimSize);
XTensor * b = NewTensor(order, dimSize);
XTensor * aMe = NewTensor(order, dimSize);
/* a tensor of size (3, 2) */
int order = 2;
int * dimSize = new int[order];
dimSize[0] = 3;
dimSize[1] = 2;
int unitNum = 1;
for (int i = 0; i < order; i++)
unitNum *= dimSize[i];
DTYPE aData[3][2] = { {1.0F, -2.0F},
{0.5F, -4.0F},
{0.0F, 6.0F} };
DTYPE answer[3][2] = { {1.0F, 2.0F},
{0.5F, 4.0F},
{0.0F, 6.0F} };
/* CPU test */
bool cpuTest = true;
/* create tensors */
XTensor * a = NewTensor(order, dimSize);
XTensor * b = NewTensor(order, dimSize);
XTensor * aMe = NewTensor(order, dimSize);
XTensor bUser;
/* initialize variables */
a->SetData(aData, unitNum);
/* initialize variables */
a->SetData(aData, unitNum);
aMe->SetData(aData, unitNum);
/* call Absolute function */
/* call Absolute function */
_Absolute(a, b);
_AbsoluteMe(aMe);
_AbsoluteMe(aMe);
bUser = Absolute(*a);
/* check results */
cpuTest = b->CheckData(answer, unitNum, 1e-4F) && aMe->CheckData(answer, unitNum, 1e-4F) && bUser.CheckData(answer, unitNum, 1e-4F);
/* check results */
cpuTest = b->CheckData(answer, unitNum, 1e-4F) && aMe->CheckData(answer, unitNum, 1e-4F) && bUser.CheckData(answer, unitNum, 1e-4F);
#ifdef USE_CUDA
/* GPU test */
bool gpuTest = true;
/* GPU test */
bool gpuTest = true;
/* create tensor */
XTensor * aGPU = NewTensor(order, dimSize, X_FLOAT, 1.0F, 0);
XTensor * bGPU = NewTensor(order, dimSize, X_FLOAT, 1.0F, 0);
XTensor * aMeGPU = NewTensor(order, dimSize, X_FLOAT, 1.0F, 0);
/* create tensor */
XTensor * aGPU = NewTensor(order, dimSize, X_FLOAT, 1.0F, 0);
XTensor * bGPU = NewTensor(order, dimSize, X_FLOAT, 1.0F, 0);
XTensor * aMeGPU = NewTensor(order, dimSize, X_FLOAT, 1.0F, 0);
XTensor bUserGPU;
/* Initialize variables */
aGPU->SetData(aData, unitNum);
/* Initialize variables */
aGPU->SetData(aData, unitNum);
aMeGPU->SetData(aData, unitNum);
/* call Absolute function */
/* call Absolute function */
_Absolute(aGPU, bGPU);
_AbsoluteMe(aMeGPU);
_AbsoluteMe(aMeGPU);
bUserGPU = Absolute(*aGPU);
/* check results */
gpuTest = bGPU->CheckData(answer, unitNum, 1e-4F) && aMeGPU->CheckData(answer, unitNum, 1e-4F) && bUserGPU.CheckData(answer, unitNum, 1e-4F);
/* check results */
gpuTest = bGPU->CheckData(answer, unitNum, 1e-4F) && aMeGPU->CheckData(answer, unitNum, 1e-4F) && bUserGPU.CheckData(answer, unitNum, 1e-4F);
/* destroy variables */
delete a;
delete b;
delete aMe;
/* destroy variables */
delete a;
delete b;
delete aMe;
delete aGPU;
delete bGPU;
delete aMeGPU;
delete[] dimSize;
delete[] dimSize;
return cpuTest && gpuTest;
return cpuTest && gpuTest;
#else
/* destroy variables */
delete a;
delete b;
delete aMe;
delete[] dimSize;
/* destroy variables */
delete a;
delete b;
delete aMe;
delete[] dimSize;
return cpuTest;
return cpuTest;
#endif // USE_CUDA
}
......@@ -119,33 +119,33 @@ TODO!!
/* test for Absolute Function */
bool TestAbsolute()
{
XPRINT(0, stdout, "[TEST Absolute] set every entry to its absolute value \n");
bool returnFlag = true, caseFlag = true;
XPRINT(0, stdout, "[TEST Absolute] set every entry to its absolute value \n");
bool returnFlag = true, caseFlag = true;
/* case 1 test */
caseFlag = TestAbsolute1();
/* case 1 test */
caseFlag = TestAbsolute1();
if (!caseFlag) {
returnFlag = false;
XPRINT(0, stdout, ">> case 1 failed!\n");
}
else
XPRINT(0, stdout, ">> case 1 passed!\n");
if (!caseFlag) {
returnFlag = false;
XPRINT(0, stdout, ">> case 1 failed!\n");
}
else
XPRINT(0, stdout, ">> case 1 passed!\n");
/* other cases test */
/*
TODO!!
*/
/* other cases test */
/*
TODO!!
*/
if (returnFlag) {
XPRINT(0, stdout, ">> All Passed!\n");
}
else
XPRINT(0, stdout, ">> Failed!\n");
if (returnFlag) {
XPRINT(0, stdout, ">> All Passed!\n");
}
else
XPRINT(0, stdout, ">> Failed!\n");
XPRINT(0, stdout, "\n");
XPRINT(0, stdout, "\n");
return returnFlag;
return returnFlag;
}
} // namespace nts(NiuTrans.Tensor)
......@@ -31,88 +31,88 @@ Set every entry to its clip value.
*/
bool TestClip1()
{
/* a tensor of size (3, 2) */
int aOrder = 2;
int * aDimSize = new int[aOrder];
aDimSize[0] = 3;
aDimSize[1] = 2;
int aUnitNum = 1;
for (int i = 0; i < aOrder; i++)
aUnitNum *= aDimSize[i];
DTYPE aData[3][2] = { {1.0F, -2.0F},
{0.0F, 4.0F},
{5.0F, -6.0F} };
DTYPE answer[3][2] = { {1.0F, -1.0F},
{0.0F, 1.0F},
{1.0F, -1.0F} };
/* CPU test */
bool cpuTest = true;
/* create tensors */
XTensor * a = NewTensor(aOrder, aDimSize);
XTensor * b = NewTensor(aOrder, aDimSize);
XTensor * aMe = NewTensor(aOrder, aDimSize);
XTensor bUser;
/* initialize variables */
a->SetData(aData, aUnitNum);
aMe->SetData(aData, aUnitNum);
/* call Clip function */
_Clip(a, b, -1.0, 1.0);
_ClipMe(aMe, -1.0, 1.0);
bUser = Clip(*a, -1.0, 1.0);
/* check results */
cpuTest = b->CheckData(answer, aUnitNum, 1e-4F) &&
/* a tensor of size (3, 2) */
int aOrder = 2;
int * aDimSize = new int[aOrder];
aDimSize[0] = 3;
aDimSize[1] = 2;
int aUnitNum = 1;
for (int i = 0; i < aOrder; i++)
aUnitNum *= aDimSize[i];
DTYPE aData[3][2] = { {1.0F, -2.0F},
{0.0F, 4.0F},
{5.0F, -6.0F} };
DTYPE answer[3][2] = { {1.0F, -1.0F},
{0.0F, 1.0F},
{1.0F, -1.0F} };
/* CPU test */
bool cpuTest = true;
/* create tensors */
XTensor * a = NewTensor(aOrder, aDimSize);
XTensor * b = NewTensor(aOrder, aDimSize);
XTensor * aMe = NewTensor(aOrder, aDimSize);
XTensor bUser;
/* initialize variables */
a->SetData(aData, aUnitNum);
aMe->SetData(aData, aUnitNum);
/* call Clip function */
_Clip(a, b, -1.0, 1.0);
_ClipMe(aMe, -1.0, 1.0);
bUser = Clip(*a, -1.0, 1.0);
/* check results */
cpuTest = b->CheckData(answer, aUnitNum, 1e-4F) &&
aMe->CheckData(answer, aUnitNum, 1e-4F) &&
bUser.CheckData(answer, aUnitNum, 1e-4F);
#ifdef USE_CUDA
/* GPU test */
bool gpuTest = true;
/* create tensor */
XTensor * aGPU = NewTensor(aOrder, aDimSize, X_FLOAT, 1.0F, 0);
XTensor * bGPU = NewTensor(aOrder, aDimSize, X_FLOAT, 1.0F, 0);
XTensor * aMeGPU = NewTensor(aOrder, aDimSize, X_FLOAT, 1.0F, 0);
XTensor bUserGPU;
/* Initialize variables */
aGPU->SetData(aData, aUnitNum);
aMeGPU->SetData(aData, aUnitNum);
/* call Clip function */
_Clip(aGPU, bGPU, -1.0, 1.0);
_ClipMe(aMeGPU, -1.0, 1.0);
bUserGPU = Clip(*aGPU, -1.0, 1.0);
/* check results */
gpuTest = bGPU->CheckData(answer, aUnitNum, 1e-4F) &&
/* GPU test */
bool gpuTest = true;
/* create tensor */
XTensor * aGPU = NewTensor(aOrder, aDimSize, X_FLOAT, 1.0F, 0);
XTensor * bGPU = NewTensor(aOrder, aDimSize, X_FLOAT, 1.0F, 0);
XTensor * aMeGPU = NewTensor(aOrder, aDimSize, X_FLOAT, 1.0F, 0);
XTensor bUserGPU;
/* Initialize variables */
aGPU->SetData(aData, aUnitNum);
aMeGPU->SetData(aData, aUnitNum);
/* call Clip function */
_Clip(aGPU, bGPU, -1.0, 1.0);
_ClipMe(aMeGPU, -1.0, 1.0);
bUserGPU = Clip(*aGPU, -1.0, 1.0);
/* check results */
gpuTest = bGPU->CheckData(answer, aUnitNum, 1e-4F) &&
aMeGPU->CheckData(answer, aUnitNum, 1e-4F) &&
bUserGPU.CheckData(answer, aUnitNum, 1e-4F);
/* destroy variables */
delete a;
delete b;
delete aMe;
delete aGPU;
delete bGPU;
delete aMeGPU;
delete[] aDimSize;
/* destroy variables */
delete a;
delete b;
delete aMe;
delete aGPU;
delete bGPU;
delete aMeGPU;
delete[] aDimSize;
return cpuTest && gpuTest;
return cpuTest && gpuTest;
#else
/* destroy variables */
delete a;
delete b;
delete aMe;
delete[] aDimSize;
/* destroy variables */
delete a;
delete b;
delete aMe;
delete[] aDimSize;
return cpuTest;
return cpuTest;
#endif // USE_CUDA
}
......@@ -124,33 +124,33 @@ TODO!!
/* test for Clip Function */
bool TestClip()
{
XPRINT(0, stdout, "[TEST Clip] set every entry to its clip value \n");
bool returnFlag = true, caseFlag = true;
XPRINT(0, stdout, "[TEST Clip] set every entry to its clip value \n");
bool returnFlag = true, caseFlag = true;
/* case 1 test */
caseFlag = TestClip1();
/* case 1 test */
caseFlag = TestClip1();
if (!caseFlag) {
returnFlag = false;
XPRINT(0, stdout, ">> case 1 failed!\n");
}
else
XPRINT(0, stdout, ">> case 1 passed!\n");
if (!caseFlag) {
returnFlag = false;
XPRINT(0, stdout, ">> case 1 failed!\n");
}
else
XPRINT(0, stdout, ">> case 1 passed!\n");
/* other cases test */
/*
TODO!!
*/
/* other cases test */
/*
TODO!!
*/
if (returnFlag) {
XPRINT(0, stdout, ">> All Passed!\n");
}
else
XPRINT(0, stdout, ">> Failed!\n");
if (returnFlag) {
XPRINT(0, stdout, ">> All Passed!\n");
}
else
XPRINT(0, stdout, ">> Failed!\n");
XPRINT(0, stdout, "\n");
XPRINT(0, stdout, "\n");
return returnFlag;
return returnFlag;
}
} // namespace nts(NiuTrans.Tensor)
......@@ -31,88 +31,88 @@ Comapre whether every entry is equal to the specified value.
*/
bool TestCompare1()
{
/* a tensor of size (3, 2) */
int aOrder = 2;
int * aDimSize = new int[aOrder];
aDimSize[0] = 3;
aDimSize[1] = 2;
int aUnitNum = 1;
for (int i = 0; i < aOrder; i++)
aUnitNum *= aDimSize[i];
DTYPE aData[3][2] = { {1.0F, -2.0F},
{0.0F, 4.0F},
{5.0F, 1.0F} };
DTYPE answer[3][2] = { {1.0F, 0.0F},
{0.0F, 0.0F},
{0.0F, 1.0F} };
/* CPU test */
bool cpuTest = true;
/* create tensors */
XTensor * a = NewTensor(aOrder, aDimSize);
XTensor * b = NewTensor(aOrder, aDimSize);
XTensor * aMe = NewTensor(aOrder, aDimSize);
XTensor bUser;
/* initialize variables */
a->SetData(aData, aUnitNum);
aMe->SetData(aData, aUnitNum);
/* call Equal function */
_Equal(a, b, 1.0);
_EqualMe(aMe, 1.0);
bUser = Equal(*a, 1.0);
/* check results */
cpuTest = b->CheckData(answer, aUnitNum, 1e-4F) &&
/* a tensor of size (3, 2) */
int aOrder = 2;
int * aDimSize = new int[aOrder];
aDimSize[0] = 3;
aDimSize[1] = 2;
int aUnitNum = 1;
for (int i = 0; i < aOrder; i++)
aUnitNum *= aDimSize[i];
DTYPE aData[3][2] = { {1.0F, -2.0F},
{0.0F, 4.0F},
{5.0F, 1.0F} };
DTYPE answer[3][2] = { {1.0F, 0.0F},
{0.0F, 0.0F},
{0.0F, 1.0F} };
/* CPU test */
bool cpuTest = true;
/* create tensors */
XTensor * a = NewTensor(aOrder, aDimSize);
XTensor * b = NewTensor(aOrder, aDimSize);
XTensor * aMe = NewTensor(aOrder, aDimSize);
XTensor bUser;
/* initialize variables */
a->SetData(aData, aUnitNum);
aMe->SetData(aData, aUnitNum);
/* call Equal function */
_Equal(a, b, 1.0);
_EqualMe(aMe, 1.0);
bUser = Equal(*a, 1.0);
/* check results */
cpuTest = b->CheckData(answer, aUnitNum, 1e-4F) &&
aMe->CheckData(answer, aUnitNum, 1e-4F) &&
bUser.CheckData(answer, aUnitNum, 1e-4F);
#ifdef USE_CUDA
/* GPU test */
bool gpuTest = true;
/* create tensor */
XTensor * aGPU = NewTensor(aOrder, aDimSize, X_FLOAT, 1.0F, 0);
XTensor * bGPU = NewTensor(aOrder, aDimSize, X_FLOAT, 1.0F, 0);
XTensor * aMeGPU = NewTensor(aOrder, aDimSize, X_FLOAT, 1.0F, 0);
XTensor bUserGPU;
/* Initialize variables */
aGPU->SetData(aData, aUnitNum);
aMeGPU->SetData(aData, aUnitNum);
/* call Equal function */
_Equal(aGPU, bGPU, 1.0);
_EqualMe(aMeGPU, 1.0);
bUserGPU = Equal(*aGPU, 1.0);
/* check results */
gpuTest = bGPU->CheckData(answer, aUnitNum, 1e-4F) &&
/* GPU test */
bool gpuTest = true;
/* create tensor */
XTensor * aGPU = NewTensor(aOrder, aDimSize, X_FLOAT, 1.0F, 0);
XTensor * bGPU = NewTensor(aOrder, aDimSize, X_FLOAT, 1.0F, 0);
XTensor * aMeGPU = NewTensor(aOrder, aDimSize, X_FLOAT, 1.0F, 0);
XTensor bUserGPU;
/* Initialize variables */
aGPU->SetData(aData, aUnitNum);
aMeGPU->SetData(aData, aUnitNum);
/* call Equal function */
_Equal(aGPU, bGPU, 1.0);
_EqualMe(aMeGPU, 1.0);
bUserGPU = Equal(*aGPU, 1.0);
/* check results */
gpuTest = bGPU->CheckData(answer, aUnitNum, 1e-4F) &&
aMeGPU->CheckData(answer, aUnitNum, 1e-4F) &&
bUserGPU.CheckData(answer, aUnitNum, 1e-4F);
/* destroy variables */
delete a;
delete b;
delete aMe;
delete aGPU;
delete bGPU;
delete aMeGPU;
delete[] aDimSize;
/* destroy variables */
delete a;
delete b;
delete aMe;
delete aGPU;
delete bGPU;
delete aMeGPU;
delete[] aDimSize;
return cpuTest && gpuTest;
return cpuTest && gpuTest;
#else
/* destroy variables */
delete a;
delete b;
delete aMe;
delete[] aDimSize;
/* destroy variables */
delete a;
delete b;
delete aMe;
delete[] aDimSize;
return cpuTest;
return cpuTest;
#endif // USE_CUDA
}
......@@ -124,33 +124,33 @@ TODO!!
/* test for Compare Function */
bool TestCompare()
{
XPRINT(0, stdout, "[TEST Compare] compare every entry with specified value \n");
bool returnFlag = true, caseFlag = true;
XPRINT(0, stdout, "[TEST Compare] compare every entry with specified value \n");
bool returnFlag = true, caseFlag = true;
/* case 1 test */
caseFlag = TestCompare1();
/* case 1 test */
caseFlag = TestCompare1();
if (!caseFlag) {
returnFlag = false;
XPRINT(0, stdout, ">> case 1 failed!\n");
}
else
XPRINT(0, stdout, ">> case 1 passed!\n");
if (!caseFlag) {
returnFlag = false;
XPRINT(0, stdout, ">> case 1 failed!\n");
}
else
XPRINT(0, stdout, ">> case 1 passed!\n");
/* other cases test */
/*
TODO!!
*/
/* other cases test */
/*
TODO!!
*/
if (returnFlag) {
XPRINT(0, stdout, ">> All Passed!\n");
}
else
XPRINT(0, stdout, ">> Failed!\n");
if (returnFlag) {
XPRINT(0, stdout, ">> All Passed!\n");
}
else
XPRINT(0, stdout, ">> Failed!\n");
XPRINT(0, stdout, "\n");
XPRINT(0, stdout, "\n");
return returnFlag;
return returnFlag;
}
} // namespace nts(NiuTrans.Tensor)
......@@ -29,7 +29,7 @@ In this case, 2 * (2, 1) -> (2, 2), dim=1.
*/
bool TestConcatenate1()
{
/* create list */
/* create list */
TensorList * sList = new TensorList();
/* a source tensor of size (2, 1) */
......@@ -83,7 +83,7 @@ bool TestConcatenate1()
s2->SetData(sData2, sUnitNum2);
t->SetZeroAll();
/* add tensors to list */
/* add tensors to list */
sList->Add(s1);
sList->Add(s2);
......@@ -99,29 +99,29 @@ bool TestConcatenate1()
bool gpuTest = true;
/* create tensor */
XTensor * sGPU1 = NewTensor(sOrder1, sDimSize1, X_FLOAT, 1.0F, 0);
XTensor * sGPU2 = NewTensor(sOrder2, sDimSize2, X_FLOAT, 1.0F, 0);
XTensor * tGPU = NewTensor(tOrder, tDimSize, X_FLOAT, 1.0F, 0);
XTensor * sGPU1 = NewTensor(sOrder1, sDimSize1, X_FLOAT, 1.0F, 0);
XTensor * sGPU2 = NewTensor(sOrder2, sDimSize2, X_FLOAT, 1.0F, 0);
XTensor * tGPU = NewTensor(tOrder, tDimSize, X_FLOAT, 1.0F, 0);
XTensor tUserGPU;
/* Initialize variables */
sGPU1->SetData(sData1, sUnitNum1);
sGPU2->SetData(sData2, sUnitNum2);
tGPU->SetZeroAll();
sGPU1->SetData(sData1, sUnitNum1);
sGPU2->SetData(sData2, sUnitNum2);
tGPU->SetZeroAll();
/* clear list */
sList->Clear();
/* clear list */
sList->Clear();
/* add tensors to list*/
sList->Add(sGPU1);
sList->Add(sGPU2);
/* add tensors to list*/
sList->Add(sGPU1);
sList->Add(sGPU2);
/* call Concatenate function */
_Concatenate(sList, tGPU, 1);
/* call Concatenate function */
_Concatenate(sList, tGPU, 1);
tUserGPU = Concatenate(*sList, 1);
/* check results */
gpuTest = tGPU->CheckData(answer, tUnitNum) && tUserGPU.CheckData(answer, tUnitNum);
/* check results */
gpuTest = tGPU->CheckData(answer, tUnitNum) && tUserGPU.CheckData(answer, tUnitNum);
/* destroy variables */
delete sList;
......@@ -135,7 +135,7 @@ bool TestConcatenate1()
delete[] sDimSize2;
delete[] tDimSize;
return cpuTest && gpuTest;
return cpuTest && gpuTest;
#else
/* destroy variables */
delete sList;
......@@ -156,7 +156,7 @@ In this case, 2 * (2, 1) -> (4, 1), dim=0.
*/
bool TestConcatenate2()
{
/* create list */
/* create list */
TensorList * sList = new TensorList();
/* a source tensor of size (2, 1) */
......@@ -212,7 +212,7 @@ bool TestConcatenate2()
s2->SetData(sData2, sUnitNum2);
t->SetZeroAll();
/* add tensors to list */
/* add tensors to list */
sList->Add(s1);
sList->Add(s2);
......@@ -224,35 +224,35 @@ bool TestConcatenate2()
cpuTest = t->CheckData(answer, tUnitNum) && tUser.CheckData(answer, tUnitNum);
#ifdef USE_CUDA
/* GPU test */
bool gpuTest = true;
/* GPU test */
bool gpuTest = true;
/* create tensor */
XTensor * sGPU1 = NewTensor(sOrder1, sDimSize1, X_FLOAT, 1.0F, 0);
XTensor * sGPU2 = NewTensor(sOrder2, sDimSize2, X_FLOAT, 1.0F, 0);
XTensor * tGPU = NewTensor(tOrder, tDimSize, X_FLOAT, 1.0F, 0);
/* create tensor */
XTensor * sGPU1 = NewTensor(sOrder1, sDimSize1, X_FLOAT, 1.0F, 0);
XTensor * sGPU2 = NewTensor(sOrder2, sDimSize2, X_FLOAT, 1.0F, 0);
XTensor * tGPU = NewTensor(tOrder, tDimSize, X_FLOAT, 1.0F, 0);
XTensor tUserGPU;
/* Initialize variables */
sGPU1->SetData(sData1, sUnitNum1);
sGPU2->SetData(sData2, sUnitNum2);
tGPU->SetZeroAll();
/* Initialize variables */
sGPU1->SetData(sData1, sUnitNum1);
sGPU2->SetData(sData2, sUnitNum2);
tGPU->SetZeroAll();
/* clear list */
sList->Clear();
/* clear list */
sList->Clear();
/* add tensors to list*/
sList->Add(sGPU1);
sList->Add(sGPU2);
/* add tensors to list*/
sList->Add(sGPU1);
sList->Add(sGPU2);
/* call Concatenate function */
_Concatenate(sList, tGPU, 0);
/* call Concatenate function */
_Concatenate(sList, tGPU, 0);
tUserGPU = Concatenate(*sList, 0);
/* check results */
gpuTest = tGPU->CheckData(answer, tUnitNum) && tUserGPU.CheckData(answer, tUnitNum);
/* check results */
gpuTest = tGPU->CheckData(answer, tUnitNum) && tUserGPU.CheckData(answer, tUnitNum);
/* destroy variables */
/* destroy variables */
delete sList;
delete s1;
delete s2;
......@@ -264,7 +264,7 @@ bool TestConcatenate2()
delete[] sDimSize2;
delete[] tDimSize;
return cpuTest && gpuTest;
return cpuTest && gpuTest;
#else
/* destroy variables */
delete sList;
......@@ -285,7 +285,7 @@ In this case, (2, 1) + (2, 2) -> (2, 3), dim=1.
*/
bool TestConcatenate3()
{
/* create list */
/* create list */
TensorList * sList = new TensorList();
/* a source tensor of size (2, 1) */
......@@ -339,7 +339,7 @@ bool TestConcatenate3()
s2->SetData(sData2, sUnitNum2);
t->SetZeroAll();
/* add tensors to list */
/* add tensors to list */
sList->Add(s1);
sList->Add(s2);
......@@ -351,35 +351,35 @@ bool TestConcatenate3()
cpuTest = t->CheckData(answer, tUnitNum) && tUser.CheckData(answer, tUnitNum);
#ifdef USE_CUDA
/* GPU test */
bool gpuTest = true;
/* GPU test */
bool gpuTest = true;
/* create tensor */
XTensor * sGPU1 = NewTensor(sOrder1, sDimSize1, X_FLOAT, 1.0F, 0);
XTensor * sGPU2 = NewTensor(sOrder2, sDimSize2, X_FLOAT, 1.0F, 0);
XTensor * tGPU = NewTensor(tOrder, tDimSize, X_FLOAT, 1.0F, 0);
/* create tensor */
XTensor * sGPU1 = NewTensor(sOrder1, sDimSize1, X_FLOAT, 1.0F, 0);
XTensor * sGPU2 = NewTensor(sOrder2, sDimSize2, X_FLOAT, 1.0F, 0);
XTensor * tGPU = NewTensor(tOrder, tDimSize, X_FLOAT, 1.0F, 0);
XTensor tUserGPU;
/* Initialize variables */
sGPU1->SetData(sData1, sUnitNum1);
sGPU2->SetData(sData2, sUnitNum2);
tGPU->SetZeroAll();
/* Initialize variables */
sGPU1->SetData(sData1, sUnitNum1);
sGPU2->SetData(sData2, sUnitNum2);
tGPU->SetZeroAll();
/* clear list */
sList->Clear();
/* clear list */
sList->Clear();
/* add tensors to list*/
sList->Add(sGPU1);
sList->Add(sGPU2);
/* add tensors to list*/
sList->Add(sGPU1);
sList->Add(sGPU2);
/* call Concatenate function */
_Concatenate(sList, tGPU, 1);
/* call Concatenate function */
_Concatenate(sList, tGPU, 1);
tUserGPU = Concatenate(*sList, 1);
/* check results */
gpuTest = tGPU->CheckData(answer, tUnitNum) && tUserGPU.CheckData(answer, tUnitNum);
/* check results */
gpuTest = tGPU->CheckData(answer, tUnitNum) && tUserGPU.CheckData(answer, tUnitNum);
/* destroy variables */
/* destroy variables */
delete sList;
delete s1;
delete s2;
......@@ -391,7 +391,7 @@ bool TestConcatenate3()
delete[] sDimSize2;
delete[] tDimSize;
return cpuTest && gpuTest;
return cpuTest && gpuTest;
#else
/* destroy variables */
delete sList;
......@@ -402,7 +402,7 @@ bool TestConcatenate3()
delete[] sDimSize2;
delete[] tDimSize;
return cpuTest;
return cpuTest;
#endif // USE_CUDA
}
......@@ -471,28 +471,28 @@ bool TestConcatenate4()
cpuTest = t->CheckData(answer, tUnitNum) && tUser.CheckData(answer, tUnitNum);
#ifdef USE_CUDA
/* GPU test */
bool gpuTest = true;
/* GPU test */
bool gpuTest = true;
/* create tensor */
XTensor * sGPU1 = NewTensor(sOrder1, sDimSize1, X_FLOAT, 1.0F, 0);
XTensor * sGPU2 = NewTensor(sOrder2, sDimSize2, X_FLOAT, 1.0F, 0);
XTensor * tGPU = NewTensor(tOrder, tDimSize, X_FLOAT, 1.0F, 0);
/* create tensor */
XTensor * sGPU1 = NewTensor(sOrder1, sDimSize1, X_FLOAT, 1.0F, 0);
XTensor * sGPU2 = NewTensor(sOrder2, sDimSize2, X_FLOAT, 1.0F, 0);
XTensor * tGPU = NewTensor(tOrder, tDimSize, X_FLOAT, 1.0F, 0);
XTensor tUserGPU;
/* Initialize variables */
sGPU1->SetData(sData1, sUnitNum1);
sGPU2->SetData(sData2, sUnitNum2);
tGPU->SetZeroAll();
/* Initialize variables */
sGPU1->SetData(sData1, sUnitNum1);
sGPU2->SetData(sData2, sUnitNum2);
tGPU->SetZeroAll();
/* call Concatenate function */
_Concatenate(sGPU1, sGPU2, tGPU, 1);
/* call Concatenate function */
_Concatenate(sGPU1, sGPU2, tGPU, 1);
tUserGPU = Concatenate(*sGPU1, *sGPU2, 1);
/* check results */
gpuTest = tGPU->CheckData(answer, tUnitNum) && tUserGPU.CheckData(answer, tUnitNum);
/* check results */
gpuTest = tGPU->CheckData(answer, tUnitNum) && tUserGPU.CheckData(answer, tUnitNum);
/* destroy variables */
/* destroy variables */
delete s1;
delete s2;
delete t;
......@@ -503,7 +503,7 @@ bool TestConcatenate4()
//delete[] sDimSize2;
//delete[] tDimSize;
return cpuTest && gpuTest;
return cpuTest && gpuTest;
#else
/* destroy variables */
delete s1;
......@@ -513,7 +513,7 @@ bool TestConcatenate4()
delete[] sDimSize2;
delete[] tDimSize;
return cpuTest;
return cpuTest;
#endif // USE_CUDA
}
......
......@@ -30,7 +30,7 @@ In this case, 2 * (2, 1) -> (2, 2), dim=1.
*/
bool TestConcatenateSolely1()
{
/* create list */
/* create list */
TensorList * sList = new TensorList();
/* a source tensor of size (2, 1) */
......@@ -83,44 +83,44 @@ bool TestConcatenateSolely1()
s2->SetData(sData2, sUnitNum2);
t->SetZeroAll();
/* add tensors to list */
/* add tensors to list */
sList->Add(s1);
sList->Add(s2);
/* call ConcatenateSolely function */
/* call ConcatenateSolely function */
_ConcatenateSolely(sList, t, 1);
/* check results */
cpuTest = t->CheckData(answer, tUnitNum);
#ifdef USE_CUDA
/* GPU test */
bool gpuTest = true;
/* create tensor */
XTensor * sGPU1 = NewTensor(sOrder1, sDimSize1, X_FLOAT, 1.0F, 0);
XTensor * sGPU2 = NewTensor(sOrder2, sDimSize2, X_FLOAT, 1.0F, 0);
XTensor * tGPU = NewTensor(tOrder, tDimSize, X_FLOAT, 1.0F, 0);
/* Initialize variables */
sGPU1->SetData(sData1, sUnitNum1);
sGPU2->SetData(sData2, sUnitNum2);
tGPU->SetZeroAll();
/* GPU test */
bool gpuTest = true;
/* create tensor */
XTensor * sGPU1 = NewTensor(sOrder1, sDimSize1, X_FLOAT, 1.0F, 0);
XTensor * sGPU2 = NewTensor(sOrder2, sDimSize2, X_FLOAT, 1.0F, 0);
XTensor * tGPU = NewTensor(tOrder, tDimSize, X_FLOAT, 1.0F, 0);
/* Initialize variables */
sGPU1->SetData(sData1, sUnitNum1);
sGPU2->SetData(sData2, sUnitNum2);
tGPU->SetZeroAll();
/* clear list */
sList->Clear();
/* clear list */
sList->Clear();
/* add tensors to list*/
sList->Add(sGPU1);
sList->Add(sGPU2);
/* add tensors to list*/
sList->Add(sGPU1);
sList->Add(sGPU2);
/* call ConcatenateSolely function */
_ConcatenateSolely(sList, tGPU, 1);
/* call ConcatenateSolely function */
_ConcatenateSolely(sList, tGPU, 1);
/* check results */
gpuTest = tGPU->CheckData(answer, tUnitNum);
/* check results */
gpuTest = tGPU->CheckData(answer, tUnitNum);
/* destroy variables */
/* destroy variables */
delete sList;
delete s1;
delete s2;
......@@ -132,7 +132,7 @@ bool TestConcatenateSolely1()
delete[] sDimSize2;
delete[] tDimSize;
return cpuTest && gpuTest;
return cpuTest && gpuTest;
#else
/* destroy variables */
delete sList;
......@@ -143,7 +143,7 @@ bool TestConcatenateSolely1()
delete[] sDimSize2;
delete[] tDimSize;
return cpuTest;
return cpuTest;
#endif // USE_CUDA
}
......@@ -153,7 +153,7 @@ In this case, 2 * (2, 1) -> (4, 1), dim=0.
*/
bool TestConcatenateSolely2()
{
/* create list */
/* create list */
TensorList * sList = new TensorList();
/* a source tensor of size (2, 1) */
......@@ -208,7 +208,7 @@ bool TestConcatenateSolely2()
s2->SetData(sData2, sUnitNum2);
t->SetZeroAll();
/* add tensors to list */
/* add tensors to list */
sList->Add(s1);
sList->Add(s2);
......@@ -219,33 +219,33 @@ bool TestConcatenateSolely2()
cpuTest = t->CheckData(answer, tUnitNum);
#ifdef USE_CUDA
/* GPU test */
bool gpuTest = true;
/* create tensor */
XTensor * sGPU1 = NewTensor(sOrder1, sDimSize1, X_FLOAT, 1.0F, 0);
XTensor * sGPU2 = NewTensor(sOrder2, sDimSize2, X_FLOAT, 1.0F, 0);
XTensor * tGPU = NewTensor(tOrder, tDimSize, X_FLOAT, 1.0F, 0);
/* Initialize variables */
sGPU1->SetData(sData1, sUnitNum1);
sGPU2->SetData(sData2, sUnitNum2);
tGPU->SetZeroAll();
/* GPU test */
bool gpuTest = true;
/* create tensor */
XTensor * sGPU1 = NewTensor(sOrder1, sDimSize1, X_FLOAT, 1.0F, 0);
XTensor * sGPU2 = NewTensor(sOrder2, sDimSize2, X_FLOAT, 1.0F, 0);
XTensor * tGPU = NewTensor(tOrder, tDimSize, X_FLOAT, 1.0F, 0);
/* Initialize variables */
sGPU1->SetData(sData1, sUnitNum1);
sGPU2->SetData(sData2, sUnitNum2);
tGPU->SetZeroAll();
/* clear list */
sList->Clear();
/* clear list */
sList->Clear();
/* add tensors to list*/
sList->Add(sGPU1);
sList->Add(sGPU2);
/* add tensors to list*/
sList->Add(sGPU1);
sList->Add(sGPU2);
/* call concatenatesolely function */
_ConcatenateSolely(sList, tGPU, 0);
/* call concatenatesolely function */
_ConcatenateSolely(sList, tGPU, 0);
/* check results */
gpuTest = tGPU->CheckData(answer, tUnitNum);
/* check results */
gpuTest = tGPU->CheckData(answer, tUnitNum);
/* destroy variables */
/* destroy variables */
delete sList;
delete s1;
delete s2;
......@@ -257,7 +257,7 @@ bool TestConcatenateSolely2()
delete[] sDimSize2;
delete[] tDimSize;
return cpuTest && gpuTest;
return cpuTest && gpuTest;
#else
/* destroy variables */
delete sList;
......@@ -268,7 +268,7 @@ bool TestConcatenateSolely2()
delete[] sDimSize2;
delete[] tDimSize;
return cpuTest;
return cpuTest;
#endif // USE_CUDA
}
......@@ -278,7 +278,7 @@ In this case, (2, 1) + (2, 2) -> (2, 3), dim=1.
*/
bool TestConcatenateSolely3()
{
/* create list */
/* create list */
TensorList * sList = new TensorList();
/* a source tensor of size (2, 1) */
......@@ -331,44 +331,44 @@ bool TestConcatenateSolely3()
s2->SetData(sData2, sUnitNum2);
t->SetZeroAll();
/* add tensors to list */
/* add tensors to list */
sList->Add(s1);
sList->Add(s2);
/* call ConcatenateSolely function */
/* call ConcatenateSolely function */
_ConcatenateSolely(sList, t, 1);
/* check results */
cpuTest = t->CheckData(answer, tUnitNum);
#ifdef USE_CUDA
/* GPU test */
bool gpuTest = true;
/* create tensor */
XTensor * sGPU1 = NewTensor(sOrder1, sDimSize1, X_FLOAT, 1.0F, 0);
XTensor * sGPU2 = NewTensor(sOrder2, sDimSize2, X_FLOAT, 1.0F, 0);
XTensor * tGPU = NewTensor(tOrder, tDimSize, X_FLOAT, 1.0F, 0);
/* Initialize variables */
sGPU1->SetData(sData1, sUnitNum1);
sGPU2->SetData(sData2, sUnitNum2);
tGPU->SetZeroAll();
/* GPU test */
bool gpuTest = true;
/* create tensor */
XTensor * sGPU1 = NewTensor(sOrder1, sDimSize1, X_FLOAT, 1.0F, 0);
XTensor * sGPU2 = NewTensor(sOrder2, sDimSize2, X_FLOAT, 1.0F, 0);
XTensor * tGPU = NewTensor(tOrder, tDimSize, X_FLOAT, 1.0F, 0);
/* Initialize variables */
sGPU1->SetData(sData1, sUnitNum1);
sGPU2->SetData(sData2, sUnitNum2);
tGPU->SetZeroAll();
/* clear list */
sList->Clear();
/* clear list */
sList->Clear();
/* add tensors to list*/
sList->Add(sGPU1);
sList->Add(sGPU2);
/* add tensors to list*/
sList->Add(sGPU1);
sList->Add(sGPU2);
/* call ConcatenateSolely function */
_ConcatenateSolely(sList, tGPU, 1);
/* call ConcatenateSolely function */
_ConcatenateSolely(sList, tGPU, 1);
/* check results */
gpuTest = tGPU->CheckData(answer, tUnitNum);
/* check results */
gpuTest = tGPU->CheckData(answer, tUnitNum);
/* destroy variables */
/* destroy variables */
delete sList;
delete s1;
delete s2;
......@@ -380,7 +380,7 @@ bool TestConcatenateSolely3()
delete[] sDimSize2;
delete[] tDimSize;
return cpuTest && gpuTest;
return cpuTest && gpuTest;
#else
/* destroy variables */
delete sList;
......@@ -391,7 +391,7 @@ bool TestConcatenateSolely3()
delete[] sDimSize2;
delete[] tDimSize;
return cpuTest;
return cpuTest;
#endif // USE_CUDA
}
......
......@@ -31,72 +31,72 @@ In this case, the flaot32 data type is converted to int32 data type.
*/
bool TestConvertDataType1()
{
/* a tensor of size (3, 2) */
int aOrder = 2;
int * aDimSize = new int[aOrder];
aDimSize[0] = 3;
aDimSize[1] = 2;
int aUnitNum = 1;
for (int i = 0; i < aOrder; i++)
aUnitNum *= aDimSize[i];
DTYPE aData[3][2] = { {1.0F, 2.0F},
{0.5F, 4.0F},
{5.0F, 6.0F} };
int answer[3][2] = { {1, 2},
{0, 4},
/* a tensor of size (3, 2) */
int aOrder = 2;
int * aDimSize = new int[aOrder];
aDimSize[0] = 3;
aDimSize[1] = 2;
int aUnitNum = 1;
for (int i = 0; i < aOrder; i++)
aUnitNum *= aDimSize[i];
DTYPE aData[3][2] = { {1.0F, 2.0F},
{0.5F, 4.0F},
{5.0F, 6.0F} };
int answer[3][2] = { {1, 2},
{0, 4},
{5, 6} };
/* CPU test */
bool cpuTest = true;
/* CPU test */
bool cpuTest = true;
/* create tensors */
XTensor * a = NewTensor(aOrder, aDimSize);
XTensor * b = NewTensor(aOrder, aDimSize, X_INT);
/* create tensors */
XTensor * a = NewTensor(aOrder, aDimSize);
XTensor * b = NewTensor(aOrder, aDimSize, X_INT);
/* initialize variables */
a->SetData(aData, aUnitNum);
b->SetZeroAll();
/* initialize variables */
a->SetData(aData, aUnitNum);
b->SetZeroAll();
/* call ConvertDataType function */
_ConvertDataType(a, b);
/* call ConvertDataType function */
_ConvertDataType(a, b);
/* check results */
cpuTest = b->CheckData(answer, aUnitNum);
/* check results */
cpuTest = b->CheckData(answer, aUnitNum);
#ifdef USE_CUDA
/* GPU test */
bool gpuTest = true;
/* GPU test */
bool gpuTest = true;
/* create tensor */
XTensor * aGPU = NewTensor(aOrder, aDimSize, X_FLOAT, 1.0F, 0);
XTensor * bGPU = NewTensor(aOrder, aDimSize, X_INT, 1.0F, 0);
/* create tensor */
XTensor * aGPU = NewTensor(aOrder, aDimSize, X_FLOAT, 1.0F, 0);
XTensor * bGPU = NewTensor(aOrder, aDimSize, X_INT, 1.0F, 0);
/* Initialize variables */
aGPU->SetData(aData, aUnitNum);
/* Initialize variables */
aGPU->SetData(aData, aUnitNum);
/* call ConvertDataType function */
_ConvertDataType(aGPU, bGPU);
/* call ConvertDataType function */
_ConvertDataType(aGPU, bGPU);
/* check results */
gpuTest = bGPU->CheckData(answer, aUnitNum);
/* check results */
gpuTest = bGPU->CheckData(answer, aUnitNum);
/* destroy variables */
delete a;
delete b;
/* destroy variables */
delete a;
delete b;
delete aGPU;
delete bGPU;
delete[] aDimSize;
delete[] aDimSize;
return cpuTest && gpuTest;
return cpuTest && gpuTest;
#else
/* destroy variables */
delete a;
delete b;
delete[] aDimSize;
/* destroy variables */
delete a;
delete b;
delete[] aDimSize;
return cpuTest;
return cpuTest;
#endif // USE_CUDA
}
......@@ -106,72 +106,72 @@ In this case, the int32 data type is converted to float32 data type.
*/
bool TestConvertDataType2()
{
/* a tensor of size (3, 2) */
int aOrder = 2;
int * aDimSize = new int[aOrder];
aDimSize[0] = 3;
aDimSize[1] = 2;
int aUnitNum = 1;
for (int i = 0; i < aOrder; i++)
aUnitNum *= aDimSize[i];
int aData[3][2] = { {1, 2},
{0, 4},
{5, 6} };
DTYPE answer[3][2] = { {1.0F, 2.0F},
{0.0F, 4.0F},
{5.0F, 6.0F} };
/* CPU test */
bool cpuTest = true;
/* create tensors */
XTensor * a = NewTensor(aOrder, aDimSize, X_INT);
XTensor * b = NewTensor(aOrder, aDimSize);
/* initialize variables */
a->SetData(aData, aUnitNum);
b->SetZeroAll();
/* call ConvertDataType function */
_ConvertDataType(a, b);
/* check results */
cpuTest = b->CheckData(answer, aUnitNum, 1e-4F);
/* a tensor of size (3, 2) */
int aOrder = 2;
int * aDimSize = new int[aOrder];
aDimSize[0] = 3;
aDimSize[1] = 2;
int aUnitNum = 1;
for (int i = 0; i < aOrder; i++)
aUnitNum *= aDimSize[i];
int aData[3][2] = { {1, 2},
{0, 4},
{5, 6} };
DTYPE answer[3][2] = { {1.0F, 2.0F},
{0.0F, 4.0F},
{5.0F, 6.0F} };
/* CPU test */
bool cpuTest = true;
/* create tensors */
XTensor * a = NewTensor(aOrder, aDimSize, X_INT);
XTensor * b = NewTensor(aOrder, aDimSize);
/* initialize variables */
a->SetData(aData, aUnitNum);
b->SetZeroAll();
/* call ConvertDataType function */
_ConvertDataType(a, b);
/* check results */
cpuTest = b->CheckData(answer, aUnitNum, 1e-4F);
#ifdef USE_CUDA
/* GPU test */
bool gpuTest = true;
/* GPU test */
bool gpuTest = true;
/* create tensor */
XTensor * aGPU = NewTensor(aOrder, aDimSize, X_INT, 1.0F, 0);
XTensor * bGPU = NewTensor(aOrder, aDimSize, X_FLOAT, 1.0F, 0);
/* create tensor */
XTensor * aGPU = NewTensor(aOrder, aDimSize, X_INT, 1.0F, 0);
XTensor * bGPU = NewTensor(aOrder, aDimSize, X_FLOAT, 1.0F, 0);
/* Initialize variables */
aGPU->SetData(aData, aUnitNum);
/* Initialize variables */
aGPU->SetData(aData, aUnitNum);
/* call ConvertDataType function */
_ConvertDataType(aGPU, bGPU);
/* call ConvertDataType function */
_ConvertDataType(aGPU, bGPU);
/* check results */
gpuTest = bGPU->CheckData(answer, aUnitNum, 1e-4F);
/* check results */
gpuTest = bGPU->CheckData(answer, aUnitNum, 1e-4F);
/* destroy variables */
delete a;
delete b;
/* destroy variables */
delete a;
delete b;
delete aGPU;
delete bGPU;
delete[] aDimSize;
delete[] aDimSize;
return cpuTest && gpuTest;
return cpuTest && gpuTest;
#else
/* destroy variables */
delete a;
delete b;
delete[] aDimSize;
/* destroy variables */
delete a;
delete b;
delete[] aDimSize;
return cpuTest;
return cpuTest;
#endif // USE_CUDA
}
......@@ -298,53 +298,53 @@ TODO!!
/* test for ConvertDataType Function */
bool TestConvertDataType()
{
XPRINT(0, stdout, "[TEST ConvertDataType] convert data type \n");
bool returnFlag = true, caseFlag = true;
/* case 1 test */
caseFlag = TestConvertDataType1();
if (!caseFlag) {
returnFlag = false;
XPRINT(0, stdout, ">> case 1 failed!\n");
}
else
XPRINT(0, stdout, ">> case 1 passed!\n");
/* case 2 test */
caseFlag = TestConvertDataType2();
if (!caseFlag) {
returnFlag = false;
XPRINT(0, stdout, ">> case 2 failed!\n");
}
else
XPRINT(0, stdout, ">> case 2 passed!\n");
XPRINT(0, stdout, "[TEST ConvertDataType] convert data type \n");
bool returnFlag = true, caseFlag = true;
/* case 1 test */
caseFlag = TestConvertDataType1();
if (!caseFlag) {
returnFlag = false;
XPRINT(0, stdout, ">> case 1 failed!\n");
}
else
XPRINT(0, stdout, ">> case 1 passed!\n");
/* case 2 test */
caseFlag = TestConvertDataType2();
if (!caseFlag) {
returnFlag = false;
XPRINT(0, stdout, ">> case 2 failed!\n");
}
else
XPRINT(0, stdout, ">> case 2 passed!\n");
/* case 3 test */
caseFlag = TestConvertDataType3();
caseFlag = TestConvertDataType3();
if (!caseFlag) {
returnFlag = false;
XPRINT(0, stdout, ">> case 3 failed!\n");
}
else
XPRINT(0, stdout, ">> case 3 passed!\n");
if (!caseFlag) {
returnFlag = false;
XPRINT(0, stdout, ">> case 3 failed!\n");
}
else
XPRINT(0, stdout, ">> case 3 passed!\n");
/* other cases test */
/*
TODO!!
*/
/* other cases test */
/*
TODO!!
*/
if (returnFlag) {
XPRINT(0, stdout, ">> All Passed!\n");
}
else
XPRINT(0, stdout, ">> Failed!\n");
if (returnFlag) {
XPRINT(0, stdout, ">> All Passed!\n");
}
else
XPRINT(0, stdout, ">> Failed!\n");
XPRINT(0, stdout, "\n");
XPRINT(0, stdout, "\n");
return returnFlag;
return returnFlag;
}
} // namespace nts(NiuTrans.Tensor)
......@@ -30,84 +30,84 @@ Set every entry to its cosine value.
*/
bool TestCos1()
{
/* a tensor of size (3, 2) */
int order = 2;
int * dimSize = new int[order];
dimSize[0] = 3;
dimSize[1] = 2;
int unitNum = 1;
for (int i = 0; i < order; i++)
unitNum *= dimSize[i];
DTYPE aData[3][2] = { {1.0F, 2.0F},
{-1.0F, -2.0F},
{0.0F, 0.5F} };
DTYPE answer[3][2] = { {0.5403F, -0.4161F},
{0.5403F, -0.4161F},
{1.0F, 0.8776F} };
/* CPU test */
bool cpuTest = true;
/* create tensors */
XTensor * a = NewTensor(order, dimSize);
/* a tensor of size (3, 2) */
int order = 2;
int * dimSize = new int[order];
dimSize[0] = 3;
dimSize[1] = 2;
int unitNum = 1;
for (int i = 0; i < order; i++)
unitNum *= dimSize[i];
DTYPE aData[3][2] = { {1.0F, 2.0F},
{-1.0F, -2.0F},
{0.0F, 0.5F} };
DTYPE answer[3][2] = { {0.5403F, -0.4161F},
{0.5403F, -0.4161F},
{1.0F, 0.8776F} };
/* CPU test */
bool cpuTest = true;
/* create tensors */
XTensor * a = NewTensor(order, dimSize);
XTensor * b = NewTensor(order, dimSize);
XTensor * aMe = NewTensor(order, dimSize);
XTensor * aMe = NewTensor(order, dimSize);
XTensor bUser;
/* initialize variables */
a->SetData(aData, unitNum);
aMe->SetData(aData, unitNum);
/* initialize variables */
a->SetData(aData, unitNum);
aMe->SetData(aData, unitNum);
/* call Cos function */
_Cos(a, b);
_CosMe(aMe);
/* call Cos function */
_Cos(a, b);
_CosMe(aMe);
bUser = Cos(*a);
/* check results */
cpuTest = b->CheckData(answer, unitNum, 1e-4F) && aMe->CheckData(answer, unitNum, 1e-4F) && bUser.CheckData(answer, unitNum, 1e-4F);
/* check results */
cpuTest = b->CheckData(answer, unitNum, 1e-4F) && aMe->CheckData(answer, unitNum, 1e-4F) && bUser.CheckData(answer, unitNum, 1e-4F);
#ifdef USE_CUDA
/* GPU test */
bool gpuTest = true;
/* GPU test */
bool gpuTest = true;
/* create tensor */
XTensor * aGPU = NewTensor(order, dimSize, X_FLOAT, 1.0F, 0);
XTensor * bGPU = NewTensor(order, dimSize, X_FLOAT, 1.0F, 0);
XTensor * aMeGPU = NewTensor(order, dimSize, X_FLOAT, 1.0F, 0);
/* create tensor */
XTensor * aGPU = NewTensor(order, dimSize, X_FLOAT, 1.0F, 0);
XTensor * bGPU = NewTensor(order, dimSize, X_FLOAT, 1.0F, 0);
XTensor * aMeGPU = NewTensor(order, dimSize, X_FLOAT, 1.0F, 0);
XTensor bUserGPU;
/* Initialize variables */
aGPU->SetData(aData, unitNum);
aMeGPU->SetData(aData, unitNum);
/* Initialize variables */
aGPU->SetData(aData, unitNum);
aMeGPU->SetData(aData, unitNum);
/* call Cos function */
/* call Cos function */
_Cos(aGPU, bGPU);
_CosMe(aMeGPU);
_CosMe(aMeGPU);
bUserGPU = Cos(*aGPU);
/* check results */
gpuTest = bGPU->CheckData(answer, unitNum, 1e-4F) && aMeGPU->CheckData(answer, unitNum, 1e-4F) && bUserGPU.CheckData(answer, unitNum, 1e-4F);
/* check results */
gpuTest = bGPU->CheckData(answer, unitNum, 1e-4F) && aMeGPU->CheckData(answer, unitNum, 1e-4F) && bUserGPU.CheckData(answer, unitNum, 1e-4F);
/* destroy variables */
delete a;
delete b;
delete aMe;
/* destroy variables */
delete a;
delete b;
delete aMe;
delete aGPU;
delete bGPU;
delete aMeGPU;
delete[] dimSize;
delete[] dimSize;
return cpuTest && gpuTest;
return cpuTest && gpuTest;
#else
/* destroy variables */
delete a;
delete b;
delete aMe;
delete[] dimSize;
/* destroy variables */
delete a;
delete b;
delete aMe;
delete[] dimSize;
return cpuTest;
return cpuTest;
#endif // USE_CUDA
}
......@@ -119,33 +119,33 @@ TODO!!
/* test for Cos Function */
bool TestCos()
{
XPRINT(0, stdout, "[TEST Cos] set every entry to its cosine value \n");
bool returnFlag = true, caseFlag = true;
XPRINT(0, stdout, "[TEST Cos] set every entry to its cosine value \n");
bool returnFlag = true, caseFlag = true;
/* case 1 test */
caseFlag = TestCos1();
/* case 1 test */
caseFlag = TestCos1();
if (!caseFlag) {
returnFlag = false;
XPRINT(0, stdout, ">> case 1 failed!\n");
}
else
XPRINT(0, stdout, ">> case 1 passed!\n");
if (!caseFlag) {
returnFlag = false;
XPRINT(0, stdout, ">> case 1 failed!\n");
}
else
XPRINT(0, stdout, ">> case 1 passed!\n");
/* other cases test */
/*
TODO!!
*/
/* other cases test */
/*
TODO!!
*/
if (returnFlag) {
XPRINT(0, stdout, ">> All Passed!\n");
}
else
XPRINT(0, stdout, ">> Failed!\n");
if (returnFlag) {
XPRINT(0, stdout, ">> All Passed!\n");
}
else
XPRINT(0, stdout, ">> Failed!\n");
XPRINT(0, stdout, "\n");
XPRINT(0, stdout, "\n");
return returnFlag;
return returnFlag;
}
} // namespace nts(NiuTrans.Tensor)
......@@ -30,97 +30,97 @@ In this case, (2, 2) (2, 2) -> (2, 2), leadingDim=0, alpha=0.
*/
bool TestDiv1()
{
/* a source tensor of size (2, 2) */
int sOrder1 = 2;
int * sDimSize1 = new int[sOrder1];
sDimSize1[0] = 2;
sDimSize1[1] = 2;
int sUnitNum1 = 1;
for (int i = 0; i < sOrder1; i++)
sUnitNum1 *= sDimSize1[i];
/* a source tensor of size (2, 2) */
int sOrder2 = 2;
int * sDimSize2 = new int[sOrder2];
sDimSize2[0] = 2;
sDimSize2[1] = 2;
int sUnitNum2 = 1;
for (int i = 0; i < sOrder2; i++)
sUnitNum2 *= sDimSize2[i];
/* a target tensor of size (2, 2) */
int tOrder = 2;
int * tDimSize = new int[tOrder];
tDimSize[0] = 2;
tDimSize[1] = 2;
int tUnitNum = 1;
for (int i = 0; i < tOrder; i++)
tUnitNum *= tDimSize[i];
DTYPE sData1[2][2] = { {0.0F, 1.0F},
{2.0F, 3.0F} };
DTYPE sData2[2][2] = { {1.0F, 1.0F},
{4.0F, 9.0F} };
DTYPE answer[2][2] = { {0.0F, 1.0F},
{0.5F, 0.3333F} };
/* CPU test */
bool cpuTest = true;
/* create tensors */
XTensor * s1 = NewTensor(sOrder1, sDimSize1);
XTensor * s2 = NewTensor(sOrder2, sDimSize2);
XTensor * t = NewTensor(tOrder, tDimSize);
/* a source tensor of size (2, 2) */
int sOrder1 = 2;
int * sDimSize1 = new int[sOrder1];
sDimSize1[0] = 2;
sDimSize1[1] = 2;
int sUnitNum1 = 1;
for (int i = 0; i < sOrder1; i++)
sUnitNum1 *= sDimSize1[i];
/* a source tensor of size (2, 2) */
int sOrder2 = 2;
int * sDimSize2 = new int[sOrder2];
sDimSize2[0] = 2;
sDimSize2[1] = 2;
int sUnitNum2 = 1;
for (int i = 0; i < sOrder2; i++)
sUnitNum2 *= sDimSize2[i];
/* a target tensor of size (2, 2) */
int tOrder = 2;
int * tDimSize = new int[tOrder];
tDimSize[0] = 2;
tDimSize[1] = 2;
int tUnitNum = 1;
for (int i = 0; i < tOrder; i++)
tUnitNum *= tDimSize[i];
DTYPE sData1[2][2] = { {0.0F, 1.0F},
{2.0F, 3.0F} };
DTYPE sData2[2][2] = { {1.0F, 1.0F},
{4.0F, 9.0F} };
DTYPE answer[2][2] = { {0.0F, 1.0F},
{0.5F, 0.3333F} };
/* CPU test */
bool cpuTest = true;
/* create tensors */
XTensor * s1 = NewTensor(sOrder1, sDimSize1);
XTensor * s2 = NewTensor(sOrder2, sDimSize2);
XTensor * t = NewTensor(tOrder, tDimSize);
XTensor * tMe = NewTensor(tOrder, tDimSize);
XTensor tUser;
/* initialize variables */
s1->SetData(sData1, sUnitNum1);
tMe->SetData(sData1, sUnitNum1);
s2->SetData(sData2, sUnitNum2);
t->SetZeroAll();
/* initialize variables */
s1->SetData(sData1, sUnitNum1);
tMe->SetData(sData1, sUnitNum1);
s2->SetData(sData2, sUnitNum2);
t->SetZeroAll();
/* call Div function */
_Div(s1, s2, t, 0, 0);
_DivMe(tMe, s2, 0, 0);
/* call Div function */
_Div(s1, s2, t, 0, 0);
_DivMe(tMe, s2, 0, 0);
tUser = Div(*s1, *s2, 0);
/* check results */
cpuTest = t->CheckData(answer, tUnitNum, 1e-4F) &&
/* check results */
cpuTest = t->CheckData(answer, tUnitNum, 1e-4F) &&
tMe->CheckData(answer, tUnitNum, 1e-4F) &&
tUser.CheckData(answer, tUnitNum, 1e-4F);
#ifdef USE_CUDA
/* GPU test */
bool gpuTest = true;
/* GPU test */
bool gpuTest = true;
/* create tensor */
XTensor * sGPU1 = NewTensor(sOrder1, sDimSize1, X_FLOAT, 1.0F, 0);
XTensor * sGPU2 = NewTensor(sOrder2, sDimSize2, X_FLOAT, 1.0F, 0);
XTensor * tGPU = NewTensor(tOrder, tDimSize, X_FLOAT, 1.0F, 0);
/* create tensor */
XTensor * sGPU1 = NewTensor(sOrder1, sDimSize1, X_FLOAT, 1.0F, 0);
XTensor * sGPU2 = NewTensor(sOrder2, sDimSize2, X_FLOAT, 1.0F, 0);
XTensor * tGPU = NewTensor(tOrder, tDimSize, X_FLOAT, 1.0F, 0);
XTensor * tMeGPU = NewTensor(tOrder, tDimSize, X_FLOAT, 1.0F, 0);
XTensor tUserGPU;
/* Initialize variables */
sGPU1->SetData(sData1, sUnitNum1);
tMeGPU->SetData(sData1, sUnitNum1);
sGPU2->SetData(sData2, sUnitNum2);
tGPU->SetZeroAll();
/* Initialize variables */
sGPU1->SetData(sData1, sUnitNum1);
tMeGPU->SetData(sData1, sUnitNum1);
sGPU2->SetData(sData2, sUnitNum2);
tGPU->SetZeroAll();
/* call Div function */
_Div(sGPU1, sGPU2, tGPU, 0, 0);
_DivMe(tMeGPU, sGPU2, 0, 0);
/* call Div function */
_Div(sGPU1, sGPU2, tGPU, 0, 0);
_DivMe(tMeGPU, sGPU2, 0, 0);
tUserGPU = Div(*sGPU1, *sGPU2, 0);
/* check results */
gpuTest = tGPU->CheckData(answer, tUnitNum, 1e-4F) &&
/* check results */
gpuTest = tGPU->CheckData(answer, tUnitNum, 1e-4F) &&
tMeGPU->CheckData(answer, tUnitNum, 1e-4F) &&
tUserGPU.CheckData(answer, tUnitNum, 1e-4F);
/* destroy variables */
/* destroy variables */
delete s1;
delete s2;
delete t;
......@@ -133,7 +133,7 @@ bool TestDiv1()
delete[] sDimSize2;
delete[] tDimSize;
return cpuTest && gpuTest;
return cpuTest && gpuTest;
#else
/* destroy variables */
delete s1;
......@@ -144,7 +144,7 @@ bool TestDiv1()
delete[] sDimSize2;
delete[] tDimSize;
return cpuTest;
return cpuTest;
#endif // USE_CUDA
}
......@@ -156,33 +156,33 @@ TODO!!
/* test for Div Function */
bool TestDiv()
{
XPRINT(0, stdout, "[TEST Div] element-wise division of two tensors \n");
bool returnFlag = true, caseFlag = true;
XPRINT(0, stdout, "[TEST Div] element-wise division of two tensors \n");
bool returnFlag = true, caseFlag = true;
/* case 1 test */
caseFlag = TestDiv1();
/* case 1 test */
caseFlag = TestDiv1();
if (!caseFlag) {
returnFlag = false;
XPRINT(0, stdout, ">> case 1 failed!\n");
}
else
XPRINT(0, stdout, ">> case 1 passed!\n");
if (!caseFlag) {
returnFlag = false;
XPRINT(0, stdout, ">> case 1 failed!\n");
}
else
XPRINT(0, stdout, ">> case 1 passed!\n");
/* other cases test */
/*
TODO!!
*/
/* other cases test */
/*
TODO!!
*/
if (returnFlag) {
XPRINT(0, stdout, ">> All Passed!\n");
}
else
XPRINT(0, stdout, ">> Failed!\n");
if (returnFlag) {
XPRINT(0, stdout, ">> All Passed!\n");
}
else
XPRINT(0, stdout, ">> Failed!\n");
XPRINT(0, stdout, "\n");
XPRINT(0, stdout, "\n");
return returnFlag;
return returnFlag;
}
} // namespace nts(NiuTrans.Tensor)
......@@ -127,8 +127,8 @@ bool TestDivDim1()
#else
/* destroy variables */
delete a;
delete b;
delete c;
delete b;
delete c;
delete cMe;
delete[] aDimSize;
delete[] bDimSize;
......@@ -241,8 +241,8 @@ bool TestDivDim2()
#else
/* destroy variables */
delete a;
delete b;
delete c;
delete b;
delete c;
delete cMe;
delete[] aDimSize;
delete[] bDimSize;
......
......@@ -30,88 +30,88 @@ Set every entry to its exponent value.
*/
bool TestExp1()
{
/* a tensor of size (3, 2) */
int order = 2;
int * dimSize = new int[order];
dimSize[0] = 3;
dimSize[1] = 2;
int unitNum = 1;
for (int i = 0; i < order; i++)
unitNum *= dimSize[i];
DTYPE aData[3][2] = { {1.0F, 2.0F},
{-1.0F, -2.0F},
{0.0F, 0.5F} };
DTYPE answer[3][2] = { {2.7183F, 7.3891F},
{0.3679F, 0.1353F},
{1.0F, 1.6487F} };
/* CPU test */
bool cpuTest = true;
/* create tensors */
XTensor * a = NewTensor(order, dimSize);
/* a tensor of size (3, 2) */
int order = 2;
int * dimSize = new int[order];
dimSize[0] = 3;
dimSize[1] = 2;
int unitNum = 1;
for (int i = 0; i < order; i++)
unitNum *= dimSize[i];
DTYPE aData[3][2] = { {1.0F, 2.0F},
{-1.0F, -2.0F},
{0.0F, 0.5F} };
DTYPE answer[3][2] = { {2.7183F, 7.3891F},
{0.3679F, 0.1353F},
{1.0F, 1.6487F} };
/* CPU test */
bool cpuTest = true;
/* create tensors */
XTensor * a = NewTensor(order, dimSize);
XTensor * b = NewTensor(order, dimSize);
XTensor * aMe = NewTensor(order, dimSize);
XTensor * aMe = NewTensor(order, dimSize);
XTensor bUser;
/* initialize variables */
a->SetData(aData, unitNum);
aMe->SetData(aData, unitNum);
/* initialize variables */
a->SetData(aData, unitNum);
aMe->SetData(aData, unitNum);
/* call Exp function */
_Exp(a, b);
_ExpMe(aMe);
/* call Exp function */
_Exp(a, b);
_ExpMe(aMe);
bUser = Exp(*a);
/* check results */
cpuTest = b->CheckData(answer, unitNum, 1e-4F) &&
/* check results */
cpuTest = b->CheckData(answer, unitNum, 1e-4F) &&
aMe->CheckData(answer, unitNum, 1e-4F) &&
bUser.CheckData(answer, unitNum, 1e-4F);
#ifdef USE_CUDA
/* GPU test */
bool gpuTest = true;
/* GPU test */
bool gpuTest = true;
/* create tensor */
XTensor * aGPU = NewTensor(order, dimSize, X_FLOAT, 1.0F, 0);
XTensor * bGPU = NewTensor(order, dimSize, X_FLOAT, 1.0F, 0);
XTensor * aMeGPU = NewTensor(order, dimSize, X_FLOAT, 1.0F, 0);
/* create tensor */
XTensor * aGPU = NewTensor(order, dimSize, X_FLOAT, 1.0F, 0);
XTensor * bGPU = NewTensor(order, dimSize, X_FLOAT, 1.0F, 0);
XTensor * aMeGPU = NewTensor(order, dimSize, X_FLOAT, 1.0F, 0);
XTensor bUserGPU;
/* Initialize variables */
aGPU->SetData(aData, unitNum);
aMeGPU->SetData(aData, unitNum);
/* Initialize variables */
aGPU->SetData(aData, unitNum);
aMeGPU->SetData(aData, unitNum);
/* call Exp function */
/* call Exp function */
_Exp(aGPU, bGPU);
_ExpMe(aMeGPU);
_ExpMe(aMeGPU);
bUserGPU = Exp(*aGPU);
/* check results */
gpuTest = bGPU->CheckData(answer, unitNum, 1e-4F) &&
/* check results */
gpuTest = bGPU->CheckData(answer, unitNum, 1e-4F) &&
aMeGPU->CheckData(answer, unitNum, 1e-4F) && \
bUserGPU.CheckData(answer, unitNum, 1e-4F);
/* destroy variables */
delete a;
delete b;
delete aMe;
/* destroy variables */
delete a;
delete b;
delete aMe;
delete aGPU;
delete bGPU;
delete aMeGPU;
delete[] dimSize;
delete[] dimSize;
return cpuTest && gpuTest;
return cpuTest && gpuTest;
#else
/* destroy variables */
delete a;
delete b;
delete aMe;
delete[] dimSize;
/* destroy variables */
delete a;
delete b;
delete aMe;
delete[] dimSize;
return cpuTest;
return cpuTest;
#endif // USE_CUDA
}
......@@ -123,33 +123,33 @@ TODO!!
/* test for Exp Function */
bool TestExp()
{
XPRINT(0, stdout, "[TEST Exp] set every entry to its exponent value \n");
bool returnFlag = true, caseFlag = true;
XPRINT(0, stdout, "[TEST Exp] set every entry to its exponent value \n");
bool returnFlag = true, caseFlag = true;
/* case 1 test */
caseFlag = TestExp1();
/* case 1 test */
caseFlag = TestExp1();
if (!caseFlag) {
returnFlag = false;
XPRINT(0, stdout, ">> case 1 failed!\n");
}
else
XPRINT(0, stdout, ">> case 1 passed!\n");
if (!caseFlag) {
returnFlag = false;
XPRINT(0, stdout, ">> case 1 failed!\n");
}
else
XPRINT(0, stdout, ">> case 1 passed!\n");
/* other cases test */
/*
TODO!!
*/
/* other cases test */
/*
TODO!!
*/
if (returnFlag) {
XPRINT(0, stdout, ">> All Passed!\n");
}
else
XPRINT(0, stdout, ">> Failed!\n");
if (returnFlag) {
XPRINT(0, stdout, ">> All Passed!\n");
}
else
XPRINT(0, stdout, ">> Failed!\n");
XPRINT(0, stdout, "\n");
XPRINT(0, stdout, "\n");
return returnFlag;
return returnFlag;
}
} // namespace nts(NiuTrans.Tensor)
Markdown 格式
0%
您添加了 0 到此讨论。请谨慎行事。
请先完成此评论的编辑!
注册 或者 后发表评论