Commit 143e048c by huchi

add conditions for link

parent 855a2803
......@@ -109,7 +109,7 @@ public:
XTensor bv;
/* max relative window size */
XTensor rp_embedding_k;
XTensor rpEmbK;
/* transformation after dot-product attention */
XTensor wa;
......@@ -140,7 +140,7 @@ public:
DTYPE dropoutP;
/* max relative window size */
int max_relative_position;
int maxRP;
public:
......@@ -160,14 +160,14 @@ public:
bool isTraining, Cache* cache, int cacheType);
/* make the attention network given keys, queries and values (after linear transformation) */
XTensor MakeAttention(XTensor *k, XTensor *q, XTensor *v, const XTensor *mask, bool isTraining, bool is_encoder);
XTensor MakeAttention(XTensor *k, XTensor *q, XTensor *v, const XTensor *mask, bool isTraining, bool isEnc);
/* make the attention network given keys, queries and values (after linear transformation) */
XTensor MakeRPRAttention(XTensor *k, XTensor *q, XTensor *v, XTensor *mask, bool isTraining, bool is_encoder);
XTensor MakeRPRAttention(XTensor *k, XTensor *q, XTensor *v, XTensor *mask, bool isTraining, bool isEnc);
void GetRPEmbedding(XTensor* emb_matrix, const int len_q, const int len_kv, const int max_relative_length, const int device_id, const bool is_encoder);
void GetRPEmbedding(XTensor* embMatrix, const int lenQ, const int lenKV, const int maxRelativeLen, const int device_id, const bool isEnc);
void RPDotProduct(XTensor* x, XTensor* y, XTensor* z, XTensor* attention, const bool is_key);
void RPDotProduct(XTensor* x, XTensor* y, XTensor* z, XTensor* attention, const bool isKey);
};
}
......
......@@ -365,7 +365,7 @@ void T2TModel::GetParams(TensorList &list)
list.Add(&encoder->attentions[i].bq);
list.Add(&encoder->attentions[i].bk);
list.Add(&encoder->attentions[i].bv);
list.Add(&encoder->attentions[i].rp_embedding_k);
list.Add(&encoder->attentions[i].rpEmbK);
list.Add(&encoder->attentions[i].wa);
list.Add(&encoder->attentions[i].ba);
list.Add(&encoder->fnns[i].w1);
......@@ -389,7 +389,7 @@ void T2TModel::GetParams(TensorList &list)
list.Add(&decoder->attentions[i].bq);
list.Add(&decoder->attentions[i].bk);
list.Add(&decoder->attentions[i].bv);
list.Add(&decoder->attentions[i].rp_embedding_k);
list.Add(&decoder->attentions[i].rpEmbK);
list.Add(&decoder->attentions[i].wa);
list.Add(&decoder->attentions[i].ba);
list.Add(&decoder->attLayerNorms[i].w);
......@@ -464,11 +464,13 @@ void T2TModel::Read(const char * fn)
GetParams(params);
for(int i = 0; i < params.count; i++){
XTensor * p = (XTensor*)params.Get(i);
FastRead(p, file);
// p->Read(file, "");
}
fclose(file);
double elapsed = GetClockSec() - startT;
......
......@@ -48,7 +48,7 @@ void T2TTester::Init(int argc, char** argv)
{
LoadParamInt(argc, argv, "vsize", &vSize, 1);
LoadParamInt(argc, argv, "vsizetgt", &vSizeTgt, vSize);
LoadParamInt(argc, argv, "sentBatch", &sentBatch, 1);
LoadParamInt(argc, argv, "sentbatch", &sentBatch, 1);
LoadParamBool(argc, argv, "sort", &batchLoader.sortBuffer, false);
seacher.Init(argc, argv);
......@@ -106,8 +106,6 @@ void T2TTester::Test(const char* fn, const char* ofn, T2TModel* model)
int count = 0;
while (!batchLoader.IsEmpty()) {
count++;
printf("sent: %d\n", count);
wordCount = 0;
/* reset cache for decoder */
......@@ -153,8 +151,13 @@ void T2TTester::Dump(FILE* file, IntList& output)
{
for (int i = 0; i < output.Size(); i++) {
int w = output[i];
if (w < 0)
break;
if (w < 0) {
if (i == 0)
return;
else
break;
}
fprintf(file, "%d ", w);
}
fprintf(file, "\n");
......
......@@ -60,77 +60,75 @@ void test() {
int TransformerMain(int argc, const char ** argv)
{
test();
if(argc == 0)
return 1;
char ** args = new char*[argc];
for(int i = 0; i < argc; i++){
args[i] = new char[strlen(argv[i]) + 1];
strcpy(args[i], argv[i]);
}
ShowParams(argc, args);
bool convertFile = false;
bool isBeamSearch = false;
bool convertModel = false;
char * modelFN = new char[MAX_LINE_LENGTH];
char * rawFN = new char[MAX_LINE_LENGTH];
char * testFN = new char[MAX_LINE_LENGTH];
char * outputFN = new char[MAX_LINE_LENGTH];
char * rawModel = new char[MAX_LINE_LENGTH];
LoadParamString(argc, args, "model", modelFN, "");
LoadParamString(argc, args, "rawmodel", rawModel, "");
LoadParamString(argc, args, "input", testFN, "");
LoadParamString(argc, args, "rawinput", rawFN, "");
LoadParamString(argc, args, "output", outputFN, "");
LoadParamBool(argc, args, "beamsearch", &isBeamSearch, false);
LoadParamBool(argc, args, "convertfile", &convertFile, false);
LoadParamBool(argc, args, "convertmodel", &convertModel, false);
srand((unsigned int)time(NULL));
T2TModel model;
model.InitModel(argc, args);
/* convert test file from text to binary */
if (convertFile) {
DataSet::ConvertFile(rawFN, testFN);
}
/* convert parameters from text to binary */
if (convertModel) {
TensorList params(100);
model.GetParams(params);
ConvertModelFile(&params, rawModel, modelFN);
}
/* load the model if neccessary */
if(strcmp(modelFN, ""))
model.Read(modelFN);
/* test the model on the new data */
if(strcmp(testFN, "") && strcmp(outputFN, "")){
T2TTester searcher;
searcher.Init(argc, args);
searcher.Test(testFN, outputFN, &model);
}
delete[] modelFN;
delete[] testFN;
delete[] outputFN;
delete[] rawModel;
for(int i = 0; i < argc; i++)
delete[] args[i];
delete[] args;
return 0;
//if(argc == 0)
// return 1;
//char ** args = new char*[argc];
//for(int i = 0; i < argc; i++){
// args[i] = new char[strlen(argv[i]) + 1];
// strcpy(args[i], argv[i]);
//}
//ShowParams(argc, args);
//bool convertFile = false;
//bool isBeamSearch = false;
//bool convertModel = false;
//
//char * modelFN = new char[MAX_LINE_LENGTH];
//char * rawFN = new char[MAX_LINE_LENGTH];
//char * testFN = new char[MAX_LINE_LENGTH];
//char * outputFN = new char[MAX_LINE_LENGTH];
//char * rawModel = new char[MAX_LINE_LENGTH];
//LoadParamString(argc, args, "model", modelFN, "");
//LoadParamString(argc, args, "rawModel", rawModel, "");
//LoadParamString(argc, args, "test", testFN, "");
//LoadParamString(argc, args, "rawFile", rawFN, "");
//LoadParamString(argc, args, "output", outputFN, "");
//LoadParamBool(argc, args, "beamsearch", &isBeamSearch, false);
//LoadParamBool(argc, args, "convertFile", &convertFile, false);
//LoadParamBool(argc, args, "convertModel", &convertModel, false);
//
//srand((unsigned int)time(NULL));
//T2TModel model;
//model.InitModel(argc, args);
///* convert test file from text to binary */
//if (convertFile) {
// DataSet::ConvertFile(rawFN, testFN);
//}
//
///* convert parameters from text to binary */
//if (convertModel) {
// TensorList params(100);
// model.GetParams(params);
// ConvertModelFile(&params, rawModel, modelFN);
//}
///* load the model if neccessary */
//if(strcmp(modelFN, ""))
// model.Read(modelFN);
///* test the model on the new data */
//if(strcmp(testFN, "") && strcmp(outputFN, "")){
// T2TTester searcher;
// searcher.Init(argc, args);
// searcher.Test(testFN, outputFN, &model);
//}
//delete[] modelFN;
//delete[] testFN;
//delete[] outputFN;
//delete[] rawModel;
//for(int i = 0; i < argc; i++)
// delete[] args[i];
//delete[] args;
//return 0;
}
}
......@@ -157,21 +157,23 @@ void nts::DataSet::ConvertFile(const char* src, const char* tgt)
ifstream ifile(src, ios::in);
FILE* ofile = fopen(tgt, "wb");
CheckNTErrors(ofile, "unable to create the output file");
string line;
long idx = 0;
size_t idx = 0;
const int maxExample = 10240;
IntList dataList[maxExample];
while (getline(ifile, line)){
SplitInt(line, " ", dataList[idx++]);
}
/* part 1: number of examples */
fwrite(&idx, sizeof(idx), 1, ofile);
/* part 2: offset of all examples */
for (int i = 0; i < idx; i++) {
int size = (dataList[i].Size());
size_t size = (dataList[i].Size());
fwrite(&size, sizeof(size), 1, ofile);
}
......@@ -212,7 +214,7 @@ void DataSet::Init(const char* fname, size_t myBufferSize, bool mySortBuffer)
CheckNTErrors(exampleNumber > 0, "invalid example numbers");
offset.Reserve(exampleNumber);
for (int i = 0; i < exampleNumber; i++) {
int off;
size_t off;
fread(&off, sizeof(off), 1, fp);
offset.Add(off);
}
......
......@@ -56,7 +56,7 @@ public:
size_t bufferUsed;
/* size of data in the src file */
long exampleNumber;
size_t exampleNumber;
/* current index of the offset */
size_t index;
......
......@@ -160,8 +160,10 @@ extern bool useCUDA;
/* BLAS interfaces */
#ifdef DOUBELPRICSION
#define GEMM XBLAS_DGEMM
#define AXPY XBLAS_DAXPY
#else
#define GEMM XBLAS_SGEMM
#define AXPY XBLAS_SAXPY
#endif
extern void InitGlobalAll();
......
......@@ -215,18 +215,22 @@ XTensor Div(const XTensor &a, const XTensor &b, DTYPE alpha, int leadingDim)
_Div(&a, &b, &c, alpha, leadingDim);
/* tensor connections */
XLink::MakeLink(&a, &b, &c, MATH_DIV);
XLink::AddParamToHead(&c, alpha);
XLink::AddParamToHeadInt(&c, leadingDim);
if (a.enableGrad && b.enableGrad) {
XLink::MakeLink(&a, &b, &c, MATH_DIV);
XLink::AddParamToHead(&c, alpha);
XLink::AddParamToHeadInt(&c, leadingDim);
}
}
else if(n >= 0 && n < a.order){
/* call _DivDim function */
_DivDim(&a, &b, &c, n, alpha);
/* tensor connections */
XLink::MakeLink(&a, &b, &c, MATH_DIVDIM);
XLink::AddParamToHeadInt(&c, n);
XLink::AddParamToHead(&c, alpha);
if (a.enableGrad && b.enableGrad) {
XLink::MakeLink(&a, &b, &c, MATH_DIVDIM);
XLink::AddParamToHeadInt(&c, n);
XLink::AddParamToHead(&c, alpha);
}
}
else{
ShowNTErrors("Something is wrong!");
......@@ -261,7 +265,7 @@ void Div(const XTensor &a, const XTensor &b, XTensor &c, DTYPE alpha, int leadin
/* call _Div function */
_Div(&a, &b, &c, 0, leadingDim);
if (c.enableGrad) {
if (a.enableGrad && b.enableGrad) {
/* tensor connections */
XLink::MakeLink(&a, &b, &c, MATH_DIV);
XLink::AddParamToHead(&c, alpha);
......@@ -272,7 +276,7 @@ void Div(const XTensor &a, const XTensor &b, XTensor &c, DTYPE alpha, int leadin
/* call _DivDim function */
_DivDim(&a, &b, &c, n, alpha);
if (c.enableGrad) {
if (a.enableGrad && b.enableGrad) {
/* tensor connections */
XLink::MakeLink(&a, &b, &c, MATH_DIVDIM);
XLink::AddParamToHeadInt(&c, n);
......
......@@ -164,10 +164,12 @@ XTensor DivDim(const XTensor &a, const XTensor &b, int n, DTYPE alpha)
_DivDim(&a, &b, &c, n, alpha);
/* tensor connections */
XLink::MakeLink(&a, &b, &c, MATH_DIVDIM);
XLink::AddParamToHeadInt(&c, n);
XLink::AddParamToHead(&c, alpha);
if (a.enableGrad && b.enableGrad) {
XLink::MakeLink(&a, &b, &c, MATH_DIVDIM);
XLink::AddParamToHeadInt(&c, n);
XLink::AddParamToHead(&c, alpha);
}
return c;
}
......@@ -193,7 +195,7 @@ void DivDim(const XTensor &a, const XTensor &b, XTensor &c, int n, DTYPE alpha)
/* call _Div function */
_DivDim(&a, &b, &c, n, alpha);
if (c.enableGrad == true) {
if (a.enableGrad && b.enableGrad) {
/* tensor connections */
XLink::MakeLink(&a, &b, &c, MATH_DIVDIM);
XLink::AddParamToHeadInt(&c, n);
......
......@@ -155,8 +155,10 @@ XTensor Mask(const XTensor &a, const XTensor &mask, DTYPE alpha)
_Mask(&a, &mask, &c, alpha);
/* tensor connections */
XLink::MakeLink(&a, &mask, &c, MATH_MASK);
XLink::AddParamToHead(&c, alpha);
if (a.enableGrad) {
XLink::MakeLink(&a, &mask, &c, MATH_MASK);
XLink::AddParamToHead(&c, alpha);
}
return c;
}
......@@ -176,7 +178,7 @@ void Mask(const XTensor &a, const XTensor &mask, XTensor &c, DTYPE alpha)
/* call _Mask function */
_Mask(&a, &mask, &c, alpha);
if (c.enableGrad) {
if (a.enableGrad) {
XLink::MakeLink(&a, &mask, &c, MATH_MASK);
XLink::AddParamToHead(&c, alpha);
}
......
......@@ -296,10 +296,12 @@ XTensor MatrixMul(const XTensor &a, MATRIX_TRANS_TYPE transposedA,
_MatrixMul(&a, transposedA, &b, transposedB, &c, alpha, 0, parallelRunner);
/* tensor connections */
XLink::MakeLink(&a, &b, &c, MATH_MATRIXMUL);
XLink::AddParamToHeadTrans(&c, transposedA);
XLink::AddParamToHeadTrans(&c, transposedB);
XLink::AddParamToHead(&c, alpha);
if (a.enableGrad && b.enableGrad) {
XLink::MakeLink(&a, &b, &c, MATH_MATRIXMUL);
XLink::AddParamToHeadTrans(&c, transposedA);
XLink::AddParamToHeadTrans(&c, transposedB);
XLink::AddParamToHead(&c, alpha);
}
/* destroy variables */
delete[] dimSize;
......@@ -344,7 +346,7 @@ void MatrixMul(const XTensor &a, MATRIX_TRANS_TYPE transposedA,
/* call _MatrixMul function */
_MatrixMul(&a, transposedA, &b, transposedB, &c, alpha, beta, parallelRunner);
if (c.enableGrad) {
if (a.enableGrad && b.enableGrad) {
/* tensor connections */
XLink::MakeLink(&a, &b, &c, MATH_MATRIXMUL);
XLink::AddParamToHeadTrans(&c, transposedA);
......@@ -393,10 +395,12 @@ XTensor MatrixMul(const XTensor &a, const XTensor &b,
_MatrixMul(&a, X_NOTRANS, &b, X_NOTRANS, &c, alpha, 0, parallelRunner);
/* tensor connections */
XLink::MakeLink(&a, &b, &c, MATH_MATRIXMUL);
XLink::AddParamToHeadTrans(&c, X_NOTRANS);
XLink::AddParamToHeadTrans(&c, X_NOTRANS);
XLink::AddParamToHead(&c, alpha);
if (a.enableGrad && b.enableGrad) {
XLink::MakeLink(&a, &b, &c, MATH_MATRIXMUL);
XLink::AddParamToHeadTrans(&c, X_NOTRANS);
XLink::AddParamToHeadTrans(&c, X_NOTRANS);
XLink::AddParamToHead(&c, alpha);
}
/* destroy variables */
delete[] dimSize;
......@@ -440,7 +444,7 @@ void MatrixMul(const XTensor &a, const XTensor &b, XTensor &c,
/* call _MatrixMul function */
_MatrixMul(&a, X_NOTRANS, &b, X_NOTRANS, &c, alpha, 0, parallelRunner);
if (c.enableGrad) {
if (a.enableGrad && b.enableGrad) {
/* tensor connections */
XLink::MakeLink(&a, &b, &c, MATH_MATRIXMUL);
XLink::AddParamToHeadTrans(&c, X_NOTRANS);
......
......@@ -82,10 +82,11 @@ void _MatrixMul2D(const XTensor * a, MATRIX_TRANS_TYPE transposedA,
b->dataType == DEFAULT_DTYPE &&
c->dataType == DEFAULT_DTYPE)
{
if (useBLAS)
#if defined(USE_BLAS)
_MatrixMULCPU(a, transposedA, b, transposedB, c, alpha, beta);
else
#else
_MatrixMul2DParallel(a, transposedA, b, transposedB, c, alpha, beta, parallelRunner);
#endif
}
else {
// TODO!!
......
......@@ -320,10 +320,12 @@ XTensor MatrixMulBatched(const XTensor &a, MATRIX_TRANS_TYPE transposedA, const
_MatrixMulBatched(&a, transposedA, &b, transposedB, &c, alpha, 0, parallelRunner);
/* tensor connections */
XLink::MakeLink(&a, &b, &c, MATH_MATRIXMULBATCHED);
XLink::AddParamToHeadTrans(&c, transposedA);
XLink::AddParamToHeadTrans(&c, transposedB);
XLink::AddParamToHead(&c, alpha);
if (a.enableGrad && b.enableGrad) {
XLink::MakeLink(&a, &b, &c, MATH_MATRIXMULBATCHED);
XLink::AddParamToHeadTrans(&c, transposedA);
XLink::AddParamToHeadTrans(&c, transposedB);
XLink::AddParamToHead(&c, alpha);
}
/* destroy variables */
delete[] dimSize;
......@@ -376,10 +378,12 @@ XTensor MatrixMulBatched(const XTensor &a, const XTensor &b,
_MatrixMulBatched(&a, X_NOTRANS, &b, X_NOTRANS, &c, alpha, 0, parallelRunner);
/* tensor connections */
XLink::MakeLink(&a, &b, &c, MATH_MATRIXMULBATCHED);
XLink::AddParamToHeadTrans(&c, X_NOTRANS);
XLink::AddParamToHeadTrans(&c, X_NOTRANS);
XLink::AddParamToHead(&c, alpha);
if (a.enableGrad && b.enableGrad) {
XLink::MakeLink(&a, &b, &c, MATH_MATRIXMULBATCHED);
XLink::AddParamToHeadTrans(&c, X_NOTRANS);
XLink::AddParamToHeadTrans(&c, X_NOTRANS);
XLink::AddParamToHead(&c, alpha);
}
/* destroy variables */
delete[] dimSize;
......
......@@ -118,11 +118,12 @@ XTensor MulAndShift(const XTensor &x, const XTensor &w, const XTensor &b,
}
/* tensor connections */
XLink::MakeLink(&x, &w, &b, &c, MATH_MULANDSHIFT);
XLink::AddParamToHeadInt(&c, n);
XLink::AddParamToHeadTrans(&c, X_NOTRANS);
XLink::AddParamToHeadTrans(&c, X_NOTRANS);
//XLink::AddParamToHead(&c, beta);
if (w.enableGrad && b.enableGrad) {
XLink::MakeLink(&x, &w, &b, &c, MATH_MULANDSHIFT);
XLink::AddParamToHeadInt(&c, n);
XLink::AddParamToHeadTrans(&c, X_NOTRANS);
XLink::AddParamToHeadTrans(&c, X_NOTRANS);
}
/* destroy variables */
delete[] dimSize;
......@@ -192,11 +193,12 @@ XTensor MulAndShift(const XTensor& x, MATRIX_TRANS_TYPE transposedA,
}
/* tensor connections */
XLink::MakeLink(&x, &w, &b, &c, MATH_MULANDSHIFT);
XLink::AddParamToHeadInt(&c, n);
XLink::AddParamToHeadTrans(&c, transposedA);
XLink::AddParamToHeadTrans(&c, transposedB);
//XLink::AddParamToHead(&c, beta);
if (w.enableGrad && b.enableGrad) {
XLink::MakeLink(&x, &w, &b, &c, MATH_MULANDSHIFT);
XLink::AddParamToHeadInt(&c, n);
XLink::AddParamToHeadTrans(&c, transposedA);
XLink::AddParamToHeadTrans(&c, transposedB);
}
/* destroy variables */
delete[] dimSize;
......
......@@ -216,18 +216,22 @@ XTensor Multiply(const XTensor &a, const XTensor &b, DTYPE alpha, int leadingDim
_Multiply(&a, &b, &c, 0, leadingDim);
/* tensor connections */
XLink::MakeLink(&a, &b, &c, MATH_MULTIPLY);
XLink::AddParamToHead(&c, alpha);
XLink::AddParamToHeadInt(&c, leadingDim);
if (a.enableGrad && b.enableGrad) {
XLink::MakeLink(&a, &b, &c, MATH_MULTIPLY);
XLink::AddParamToHead(&c, alpha);
XLink::AddParamToHeadInt(&c, leadingDim);
}
}
else if(n >= 0 && n < a.order){
/* call _MultiplyDim function */
_MultiplyDim(&a, &b, &c, n, alpha);
/* tensor connections */
XLink::MakeLink(&a, &b, &c, MATH_MULTIPLYDIM);
XLink::AddParamToHeadInt(&c, n);
XLink::AddParamToHead(&c, alpha);
if (a.enableGrad && b.enableGrad) {
XLink::MakeLink(&a, &b, &c, MATH_MULTIPLYDIM);
XLink::AddParamToHeadInt(&c, n);
XLink::AddParamToHead(&c, alpha);
}
}
else{
ShowNTErrors("Something is wrong!");
......@@ -262,7 +266,7 @@ void Multiply(const XTensor &a, const XTensor &b, XTensor &c, DTYPE alpha, int l
/* call _Multiply function */
_Multiply(&a, &b, &c, 0, leadingDim);
if (c.enableGrad) {
if (a.enableGrad && b.enableGrad) {
/* tensor connections */
XLink::MakeLink(&a, &b, &c, MATH_MULTIPLY);
XLink::AddParamToHead(&c, alpha);
......@@ -273,7 +277,7 @@ void Multiply(const XTensor &a, const XTensor &b, XTensor &c, DTYPE alpha, int l
/* call _MultiplyDim function */
_MultiplyDim(&a, &b, &c, n, alpha);
if (c.enableGrad) {
if (a.enableGrad && b.enableGrad) {
/* tensor connections */
XLink::MakeLink(&a, &b, &c, MATH_MULTIPLYDIM);
XLink::AddParamToHeadInt(&c, n);
......
......@@ -180,9 +180,11 @@ XTensor MultiplyDim(const XTensor &a, const XTensor &b, int n)
_MultiplyDim(&a, &b, &c, n, 0);
/* tensor connections */
XLink::MakeLink(&a, &b, &c, MATH_MULTIPLYDIM);
XLink::AddParamToHeadInt(&c, n);
XLink::AddParamToHead(&c, 0);
if (a.enableGrad && b.enableGrad) {
XLink::MakeLink(&a, &b, &c, MATH_MULTIPLYDIM);
XLink::AddParamToHeadInt(&c, n);
XLink::AddParamToHead(&c, 0);
}
return c;
}
......@@ -208,7 +210,7 @@ void MultiplyDim(const XTensor &a, const XTensor &b, XTensor &c, int n)
/* call _Multiply function */
_MultiplyDim(&a, &b, &c, n, 0);
if (c.enableGrad) {
if (a.enableGrad && b.enableGrad) {
/* tensor connections */
XLink::MakeLink(&a, &b, &c, MATH_MULTIPLYDIM);
XLink::AddParamToHeadInt(&c, n);
......@@ -350,8 +352,10 @@ XTensor MultiplyBroadcast(const XTensor &a, const XTensor &b)
_MultiplyBroadcast(&a, &b, &c, 0);
/* tensor connections */
XLink::MakeLink(&a, &b, &c, MATH_MULTIPLYBROADCAST);
XLink::AddParamToHead(&c, 0);
if (a.enableGrad && b.enableGrad) {
XLink::MakeLink(&a, &b, &c, MATH_MULTIPLYBROADCAST);
XLink::AddParamToHead(&c, 0);
}
return c;
}
......@@ -374,7 +378,7 @@ void MultiplyBroadcast(const XTensor &a, const XTensor &b, XTensor &c)
/* call _SumBroadcast function */
_MultiplyBroadcast(&a, &b, &c, 0);
if (c.enableGrad) {
if (a.enableGrad && b.enableGrad) {
/* tensor connections */
XLink::MakeLink(&a, &b, &c, MATH_MULTIPLYBROADCAST);
XLink::AddParamToHead(&c, 0);
......
......@@ -190,17 +190,21 @@ XTensor Sub(const XTensor &a, const XTensor &b, DTYPE beta)
_Sub(&a, &b, &c, beta);
/* tensor connections */
XLink::MakeLink(&a, &b, &c, MATH_SUB);
XLink::AddParamToHead(&c, beta);
if (a.enableGrad && b.enableGrad) {
XLink::MakeLink(&a, &b, &c, MATH_SUB);
XLink::AddParamToHead(&c, beta);
}
}
else if(n >= 0 && n < a.order){
/* call _SubDim function */
_SubDim(&a, &b, &c, n, beta);
/* tensor connections */
XLink::MakeLink(&a, &b, &c, MATH_SUBDIM);
XLink::AddParamToHeadInt(&c, n);
XLink::AddParamToHead(&c, beta);
if (a.enableGrad && b.enableGrad) {
XLink::MakeLink(&a, &b, &c, MATH_SUBDIM);
XLink::AddParamToHeadInt(&c, n);
XLink::AddParamToHead(&c, beta);
}
}
else{
ShowNTErrors("Something is wrong!");
......@@ -229,7 +233,7 @@ void Sub(const XTensor &a, const XTensor &b, XTensor &c, DTYPE beta)
/* call _Sub function */
_Sub(&a, &b, &c, beta);
if (c.enableGrad) {
if (a.enableGrad && b.enableGrad) {
/* tensor connections */
XLink::MakeLink(&a, &b, &c, MATH_SUB);
XLink::AddParamToHead(&c, beta);
......@@ -239,7 +243,7 @@ void Sub(const XTensor &a, const XTensor &b, XTensor &c, DTYPE beta)
/* call _SubDim function */
_SubDim(&a, &b, &c, n, beta);
if (c.enableGrad) {
if (a.enableGrad && b.enableGrad) {
/* tensor connections */
XLink::MakeLink(&a, &b, &c, MATH_SUBDIM);
XLink::AddParamToHeadInt(&c, n);
......
......@@ -164,9 +164,11 @@ XTensor SubDim(const XTensor &a, const XTensor &b, int n, DTYPE beta)
_SubDim(&a, &b, &c, n, beta);
/* tensor connections */
XLink::MakeLink(&a, &b, &c, MATH_SUBDIM);
XLink::AddParamToHeadInt(&c, n);
XLink::AddParamToHead(&c, beta);
if (a.enableGrad && b.enableGrad) {
XLink::MakeLink(&a, &b, &c, MATH_SUBDIM);
XLink::AddParamToHeadInt(&c, n);
XLink::AddParamToHead(&c, beta);
}
return c;
}
......@@ -193,7 +195,7 @@ void SubDim(const XTensor &a, const XTensor &b, XTensor &c, int n, DTYPE beta)
/* call _Sub function */
_SubDim(&a, &b, &c, n, beta);
if (c.enableGrad) {
if (a.enableGrad && b.enableGrad) {
/* tensor connections */
XLink::MakeLink(&a, &b, &c, MATH_SUBDIM);
XLink::AddParamToHeadInt(&c, n);
......
......@@ -204,17 +204,21 @@ XTensor Sum(const XTensor &a, const XTensor &b, DTYPE beta)
_Sum(&a, &b, &c, beta);
/* tensor connections */
XLink::MakeLink(&a, &b, &c, MATH_SUM);
XLink::AddParamToHead(&c, beta);
if (a.enableGrad && b.enableGrad) {
XLink::MakeLink(&a, &b, &c, MATH_SUM);
XLink::AddParamToHead(&c, beta);
}
}
else if(n >= 0 && n < a.order){
/* call _SumDim function */
_SumDim(&a, &b, &c, n, beta);
/* tensor connections */
XLink::MakeLink(&a, &b, &c, MATH_SUMDIM);
XLink::AddParamToHeadInt(&c, n);
XLink::AddParamToHead(&c, beta);
if (a.enableGrad && b.enableGrad) {
XLink::MakeLink(&a, &b, &c, MATH_SUMDIM);
XLink::AddParamToHeadInt(&c, n);
XLink::AddParamToHead(&c, beta);
}
}
else{
ShowNTErrors("Something is wrong!");
......
......@@ -181,9 +181,11 @@ XTensor SumDim(const XTensor &a, const XTensor &b, int n, DTYPE beta)
_SumDim(&a, &b, &c, n, beta);
/* tensor connections */
XLink::MakeLink(&a, &b, &c, MATH_SUMDIM);
XLink::AddParamToHeadInt(&c, n);
XLink::AddParamToHead(&c, beta);
if (a.enableGrad && b.enableGrad) {
XLink::MakeLink(&a, &b, &c, MATH_SUMDIM);
XLink::AddParamToHeadInt(&c, n);
XLink::AddParamToHead(&c, beta);
}
return c;
}
......@@ -210,7 +212,7 @@ void SumDim(const XTensor &a, const XTensor &b, XTensor &c, int n, DTYPE beta)
/* call _SumDim function */
_SumDim(&a, &b, &c, n, beta);
if (c.enableGrad) {
if (a.enableGrad && b.enableGrad) {
/* tensor connections */
XLink::MakeLink(&a, &b, &c, MATH_SUMDIM);
XLink::AddParamToHeadInt(&c, n);
......@@ -353,9 +355,11 @@ XTensor SumBroadcast(const XTensor &a, const XTensor &b, DTYPE beta)
_SumBroadcast(&a, &b, &c, beta);
/* tensor connections */
XLink::MakeLink(&a, &b, &c, MATH_SUMBROADCAST);
XLink::AddParamToHead(&c, beta);
if (a.enableGrad && b.enableGrad) {
XLink::MakeLink(&a, &b, &c, MATH_SUMBROADCAST);
XLink::AddParamToHead(&c, beta);
}
return c;
}
......@@ -377,7 +381,7 @@ void SumBroadcast(const XTensor &a, const XTensor &b, XTensor &c, DTYPE beta)
/* call _SumBroadcast function */
_SumBroadcast(&a, &b, &c, beta);
if (c.enableGrad) {
if (a.enableGrad && b.enableGrad) {
/* tensor connections */
XLink::MakeLink(&a, &b, &c, MATH_SUMBROADCAST);
XLink::AddParamToHead(&c, beta);
......
......@@ -121,7 +121,8 @@ XTensor ConvertDataType(const XTensor & input, TENSOR_DATA_TYPE dataType)
_ConvertDataType(&input, &output);
/* tensor connection */
XLink::MakeLink(&input, NULL, &output, GETANDSET_CONVERTDATATYPE);
if(input.enableGrad)
XLink::MakeLink(&input, NULL, &output, GETANDSET_CONVERTDATATYPE);
return output;
}
......@@ -136,7 +137,7 @@ void ConvertDataType(const XTensor & input, XTensor & output, TENSOR_DATA_TYPE d
_ConvertDataType(&input, &output);
/* tensor connection */
if (output.enableGrad)
if (input.enableGrad)
XLink::MakeLink(&input, NULL, &output, GETANDSET_CONVERTDATATYPE);
}
......
......@@ -117,10 +117,12 @@ XTensor SelectRange(const XTensor &a, int dim, int low, int high)
_SelectRange(&a, &c, dim, low, high);
/* tensor connection */
XLink::MakeLink(&a, NULL, &c, GETANDSET_SELECT);
XLink::AddParamToHeadInt(&c, dim);
XLink::AddParamToHeadInt(&c, low);
XLink::AddParamToHeadInt(&c, high);
if (a.enableGrad) {
XLink::MakeLink(&a, NULL, &c, GETANDSET_SELECT);
XLink::AddParamToHeadInt(&c, dim);
XLink::AddParamToHeadInt(&c, low);
XLink::AddParamToHeadInt(&c, high);
}
/* destroy variables */
delete[] dimSize;
......
......@@ -526,6 +526,43 @@ void _SetDataRand(XTensor * tensor, DTYPE lower, DTYPE upper)
}
}
/* generate data items with a range by start, end and the step
>> tensor - the tensor whose data array would be initialized
>> start - the begin of the array
>> end - the end of the array (not included self)
>> step - the step of two items
*/
void _SetDataRange(XTensor * tensor, DTYPE lower, DTYPE upper, DTYPE step)
{
CheckNTErrors((tensor->order == 1), "Tensor must be 1 dimension!");
/* compute the true length according to the (start, end, step) */
DTYPE size = fabs(upper - lower);
int num = ceil(size / fabs(step));
CheckNTErrors((tensor->unitNum == num), "Unit number of the tensor is not matched.");
/* init a integer array to store the sequence */
void * data = NULL;
if (tensor->dataType == X_INT) {
data = new int[num];
for (int i = 0; i < num; i++)
*((int*)data + i) = lower + i * step;
}
else if (tensor->dataType == X_FLOAT) {
data = new float[num];
for (int i = 0; i < num; i++)
*((float*)data + i) = lower + i * step;
}
else {
ShowNTErrors("TODO!");
}
/* set the data from the array */
tensor->SetData(data, num);
delete[] data;
}
/*
generate data items with a uniform distribution in [lower, upper] and set
the item to a pre-defined value if the item >= p, set the item to 0 otherwise
......
......@@ -69,6 +69,9 @@ void _SetDataRand(XTensor * tensor, int rNum, int cNum);
/* generate data items with a uniform distribution in [lower, upper] */
void _SetDataRand(XTensor * tensor, DTYPE lower, DTYPE upper);
/* generate data items with a range by start, end and the step */
void _SetDataRange(XTensor * tensor, DTYPE lower, DTYPE upper, DTYPE step);
/* generate data items with a uniform distribution in [lower, upper] and set
the item to a pre-defined value if the item >= p, set the item to 0 otherwise */
void _SetDataRandP(XTensor * tensor, DTYPE lower, DTYPE upper, DTYPE p, DTYPE value);
......
......@@ -67,18 +67,15 @@ int BinaryMod(int x, int num)
}
/* define three marco separately, specify the respective function names */
#ifdef USE_CUDA
#define _SIMPLE_BINARY_FUNCTION(_funcName, _cudaFuncName, origFunc) \
template<class T> \
void _funcName(const XTensor * a, XTensor * b, T num) \
{ \
/* run it on GPUs */ \
if (a->devID >= 0) { \
if (useCUDA) { \
_cudaFuncName(a, b, num); \
return; \
} \
else \
ShowNTErrors("No GPU devices support!") \
_cudaFuncName(a, b, num); \
return; \
} \
CheckNTErrors((XTensor::IsSameShaped(a, b)), \
"Input tensors should have the same data type!"); \
......@@ -105,7 +102,43 @@ void _funcName(const XTensor * a, XTensor * b, T num)
} \
template void _funcName<int>(const XTensor*, XTensor*, int); \
template void _funcName<float>(const XTensor*, XTensor*, float); \
template void _funcName<double>(const XTensor*, XTensor*, double);
template void _funcName<double>(const XTensor*, XTensor*, double);
#else
#define _SIMPLE_BINARY_FUNCTION(_funcName, origFunc) \
template<class T> \
void _funcName(const XTensor * a, XTensor * b, T num) \
{ \
/* run it on GPUs */ \
if (a->devID >= 0) { \
ShowNTErrors("No GPU devices support!") \
} \
CheckNTErrors((XTensor::IsSameShaped(a, b)), \
"Input tensors should have the same data type!"); \
if (a->dataType == X_INT) { \
int * d = (int*)a->data; \
int * db = (int*)b->data; \
for (int i = 0; i < a->unitNum; i++) \
db[i] = (int)origFunc((int)d[i], (T)num); \
} \
else if (a->dataType == X_FLOAT) { \
float * d = (float*)a->data; \
float * db = (float*)b->data; \
for (int i = 0; i < a->unitNum; i++) \
db[i] = (float)origFunc((float)d[i], (T)num); \
} \
else if (a->dataType == X_DOUBLE) { \
double * d = (double*)a->data; \
double * db = (double*)b->data; \
for (int i = 0; i < a->unitNum; i++) \
db[i] = (double)origFunc((double)d[i], (T)num); \
} \
else \
ShowNTErrors("TO DO!"); \
} \
template void _funcName<int>(const XTensor*, XTensor*, int); \
template void _funcName<float>(const XTensor*, XTensor*, float); \
template void _funcName<double>(const XTensor*, XTensor*, double);
#endif
#define _SIMPLE_BINARY_FUNCTION_ME(_funcNameMe, _funcName) \
template<class T> \
......@@ -134,7 +167,9 @@ XTensor funcName(const XTensor &a, T num)
XTensor b(&a); \
b.SetTMPFlag(); \
_funcName(&a, &b, num); \
XLink::MakeLink(&a, NULL, &b, operationId); \
if(a.enableGrad){ \
XLink::MakeLink(&a, NULL, &b, operationId); \
} \
XLink::AddParamToHead(&b, num); \
return b; \
} \
......@@ -150,7 +185,7 @@ void funcName(const XTensor &a, XTensor &b, T num)
InitTensor(&b, &a); \
} \
_funcName(&a, &b, num); \
if (b.enableGrad) { \
if (a.enableGrad) { \
XLink::MakeLink(&a, NULL, &b, operationId); \
XLink::AddParamToHead(&b, num); \
} \
......@@ -159,32 +194,40 @@ template void funcName<int>(const XTensor&, XTensor&, int);
template void funcName<float>(const XTensor&, XTensor&, float); \
template void funcName<double>(const XTensor&, XTensor&, double);
#ifdef USE_CUDA
_SIMPLE_BINARY_FUNCTION(_Descale, _CudaDescale, BinaryDescale)
_SIMPLE_BINARY_FUNCTION(_Mod, _CudaMod, BinaryMod)
_SIMPLE_BINARY_FUNCTION(_Power, _CudaPower, BinaryPower)
_SIMPLE_BINARY_FUNCTION(_Scale, _CudaScale, BinaryScale)
_SIMPLE_BINARY_FUNCTION(_Shift, _CudaShift, BinaryShift)
#else
_SIMPLE_BINARY_FUNCTION(_Descale, BinaryDescale)
_SIMPLE_BINARY_FUNCTION(_Mod, BinaryMod)
_SIMPLE_BINARY_FUNCTION(_Power, BinaryPower)
_SIMPLE_BINARY_FUNCTION(_Scale, BinaryScale)
_SIMPLE_BINARY_FUNCTION(_Shift, BinaryShift)
#endif
_SIMPLE_BINARY_FUNCTION_ME(_DescaleMe, _Descale)
SIMPLE_BINARY_FUNCTION_ME(DescaleMe, _Descale)
SIMPLE_BINARY_FUNCTION(Descale, _Descale, MATH_DESCALE)
SIMPLE_BINARY_FUNCTION_VOID(Descale, _Descale, MATH_DESCALE)
_SIMPLE_BINARY_FUNCTION(_Mod, _CudaMod, BinaryMod)
_SIMPLE_BINARY_FUNCTION_ME(_ModMe, _Mod)
SIMPLE_BINARY_FUNCTION_ME(ModMe, _Mod)
SIMPLE_BINARY_FUNCTION(Mod, _Mod, MATH_MOD)
SIMPLE_BINARY_FUNCTION_VOID(Mod, _Mod, MATH_MOD)
_SIMPLE_BINARY_FUNCTION(_Power, _CudaPower, BinaryPower)
_SIMPLE_BINARY_FUNCTION_ME(_PowerMe, _Power)
SIMPLE_BINARY_FUNCTION_ME(PowerMe, _Power)
SIMPLE_BINARY_FUNCTION(Power, _Power, MATH_POWER)
SIMPLE_BINARY_FUNCTION_VOID(Power, _Power, MATH_POWER)
_SIMPLE_BINARY_FUNCTION(_Scale, _CudaScale, BinaryScale)
_SIMPLE_BINARY_FUNCTION_ME(_ScaleMe, _Scale)
SIMPLE_BINARY_FUNCTION_ME(ScaleMe, _Scale)
SIMPLE_BINARY_FUNCTION(Scale, _Scale, MATH_SCALE)
SIMPLE_BINARY_FUNCTION_VOID(Scale, _Scale, MATH_SCALE)
_SIMPLE_BINARY_FUNCTION(_Shift, _CudaShift, BinaryShift)
_SIMPLE_BINARY_FUNCTION_ME(_ShiftMe, _Shift)
SIMPLE_BINARY_FUNCTION_ME(ShiftMe, _Shift)
SIMPLE_BINARY_FUNCTION(Shift, _Shift, MATH_SHIFT)
......
......@@ -100,9 +100,11 @@ XTensor Clip(const XTensor & a, DTYPE lower, DTYPE upper)
_Clip(&a, &b, lower, upper);
/* tensor connections */
XLink::MakeLink(&a, NULL, &b, MATH_CLIP);
XLink::AddParamToHead(&b, lower);
XLink::AddParamToHead(&b, upper);
if (a.enableGrad) {
XLink::MakeLink(&a, NULL, &b, MATH_CLIP);
XLink::AddParamToHead(&b, lower);
XLink::AddParamToHead(&b, upper);
}
return b;
}
......
/* NiuTrans.Tensor - an open-source tensor library
* Copyright (C) 2017, Natural Language Processing Lab, Northestern University.
* All rights reserved.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
* Copyright (C) 2017, Natural Language Processing Lab, Northestern University.
* All rights reserved.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
/*
* $Created by: Lin Ye (email: linye2015@outlook.com) 2018-08-03
*/
* $Created by: Lin Ye (email: linye2015@outlook.com) 2018-08-03
*/
#ifndef __CLIP_H__
#define __CLIP_H__
......@@ -29,18 +29,20 @@ namespace nts { // namespace nts(NiuTrans.Tensor)
/* set every entry to its clip value */
void _Clip(const XTensor * a, XTensor * b, DTYPE lower, DTYPE upper);
/*
set every entry to its clip value (do it on site)
keep the result in the input tensor a and return nothing
*/
/* set every entry to its clip value (do it on site)
keep the result in the input tensor a and return nothing */
void _ClipMe(XTensor * a, DTYPE lower, DTYPE upper);
/*
set every entry to its clip value (return a XTensor structure)
make a new tensor to keep the result and return it
*/
/* set every entry to its clip value (do it on site)
keep the result in the input tensor a and return nothing */
void ClipMe(XTensor & a, DTYPE lower, DTYPE upper);
/* set every entry to its clip value (return an XTensor structure)
make a new tensor to keep the result and return it */
XTensor Clip(const XTensor & a, DTYPE lower, DTYPE upper);
void Clip(const XTensor & a, XTensor & b, DTYPE lower, DTYPE upper);
/*
backward of Clip function
*/
......
......@@ -173,9 +173,11 @@ XTensor Normalize(const XTensor &input, int dim,
list.Add((XTensor*)&var);
list.Add((XTensor*)&a);
list.Add((XTensor*)&b);
XLink::MakeLink(&list, &output, MATH_NORMALIZE);
XLink::AddParamToHeadInt(&output, dim);
XLink::AddParamToHead(&output, epsilon);
if (input.enableGrad) {
XLink::MakeLink(&list, &output, MATH_NORMALIZE);
XLink::AddParamToHeadInt(&output, dim);
XLink::AddParamToHead(&output, epsilon);
}
return output;
}
......@@ -208,7 +210,7 @@ void Normalize(const XTensor &input, XTensor &output, int dim,
/* call _Normalize function */
_Normalize(&input, &output, dim, &mean, &var, &a, &b, epsilon);
if (output.enableGrad == true) {
if (input.enableGrad == true) {
/* tensor connections */
TensorList list(5);
list.Add((XTensor*)&input);
......
......@@ -118,9 +118,11 @@ XTensor ScaleAndShift(const XTensor &a, DTYPE scale, DTYPE shift)
_ScaleAndShift(&a, &b, scale, shift);
/* tensor connections */
XLink::MakeLink(&a, NULL, &b, MATH_SCALEANDSHIFT);
XLink::AddParamToHead(&b, scale);
XLink::AddParamToHead(&b, shift);
if (a.enableGrad) {
XLink::MakeLink(&a, NULL, &b, MATH_SCALEANDSHIFT);
XLink::AddParamToHead(&b, scale);
XLink::AddParamToHead(&b, shift);
}
return b;
}
......
......@@ -45,11 +45,24 @@ void _ScaleAndShiftMe(XTensor * a, DTYPE scale, DTYPE shift = 0);
/*
scale and shift all tensor entires
keep the result in the input tensor a and return nothing
a = a * scale + shift
*/
void ScaleAndShiftMe(XTensor & a, DTYPE scale, DTYPE shift = 0);
/*
scale and shift all tensor entires
make a new tensor to keep the result and return it
b = a * scale + shift
*/
XTensor ScaleAndShift(const XTensor &a, DTYPE scale, DTYPE shift = 0);
/*
scale and shift all tensor entires
b = a * scale + shift
*/
void ScaleAndShift(const XTensor &a, XTensor &b, DTYPE scale, DTYPE shift = 0);
} // namespace nts(NiuTrans.Tensor)
#endif // __SCALEANDSHIFT_H__
\ No newline at end of file
......@@ -68,17 +68,14 @@ T UnaryIsZero(T r)
}
/* define three marco separately, specify the respective function names */
#ifdef USE_CUDA
#define _SIMPLE_UNARY_FUNCTION(_funcName, _cudaFuncName, origFunc) \
void _funcName(const XTensor * a, XTensor * b) \
{ \
/* run it on GPUs */ \
if (a->devID >= 0) { \
if (useCUDA) { \
_cudaFuncName(a, b); \
return; \
} \
else \
ShowNTErrors("No GPU devices support!") \
_cudaFuncName(a, b); \
return; \
} \
CheckNTErrors((XTensor::IsSameShaped(a, b)), \
"Input tensors should have the same type!"); \
......@@ -103,6 +100,38 @@ void _funcName(const XTensor * a, XTensor * b)
else \
ShowNTErrors("TO DO!"); \
}
#else
#define _SIMPLE_UNARY_FUNCTION(_funcName, origFunc) \
void _funcName(const XTensor * a, XTensor * b) \
{ \
/* run it on GPUs */ \
if (a->devID >= 0) { \
ShowNTErrors("No GPU devices support!") \
} \
CheckNTErrors((XTensor::IsSameShaped(a, b)), \
"Input tensors should have the same type!"); \
if (a->dataType == X_INT) { \
int * d = (int*)a->data; \
int * db = (int*)b->data; \
for (int i = 0; i < a->unitNum; i++) \
db[i] = (int)origFunc(d[i]); \
} \
else if (a->dataType == X_FLOAT) { \
float * d = (float*)a->data; \
float * db = (float*)b->data; \
for (int i = 0; i < a->unitNum; i++) \
db[i] = (float)origFunc(d[i]); \
} \
else if (a->dataType == X_DOUBLE) { \
double * d = (double*)a->data; \
double * db = (double*)b->data; \
for (int i = 0; i < a->unitNum; i++) \
db[i] = (double)origFunc(d[i]); \
} \
else \
ShowNTErrors("TO DO!"); \
}
#endif
#define _SIMPLE_UNARY_FUNCTION_ME(_funcNameMe, _funcName) \
void _funcNameMe(XTensor * a) \
......@@ -122,7 +151,9 @@ XTensor funcName(const XTensor & a)
XTensor b(&a); \
b.SetTMPFlag(); \
_funcName(&a, &b); \
XLink::MakeLink(&a, NULL, &b, operationId); \
if(a.enableGrad){ \
XLink::MakeLink(&a, NULL, &b, operationId); \
} \
return b; \
}
......@@ -133,11 +164,12 @@ void funcName(const XTensor & a, XTensor & b)
InitTensor(&b, &a); \
} \
_funcName(&a, &b); \
if (b.enableGrad) { \
if (a.enableGrad) { \
XLink::MakeLink(&a, NULL, &b, operationId); \
} \
}
#ifdef USE_CUDA
_SIMPLE_UNARY_FUNCTION(_Absolute, _CudaAbsolute, fabs)
_SIMPLE_UNARY_FUNCTION(_Ceil, _CudaCeil, ceil)
_SIMPLE_UNARY_FUNCTION(_Exp, _CudaExp, exp)
......@@ -153,6 +185,23 @@ _SIMPLE_UNARY_FUNCTION(_Square, _CudaSquare, UnarySquare)
_SIMPLE_UNARY_FUNCTION(_Sin, _CudaSin, sin)
_SIMPLE_UNARY_FUNCTION(_Cos, _CudaCos, cos)
_SIMPLE_UNARY_FUNCTION(_Tan, _CudaTan, tan)
#else
_SIMPLE_UNARY_FUNCTION(_Absolute, fabs)
_SIMPLE_UNARY_FUNCTION(_Ceil, ceil)
_SIMPLE_UNARY_FUNCTION(_Exp, exp)
_SIMPLE_UNARY_FUNCTION(_Floor, floor)
_SIMPLE_UNARY_FUNCTION(_IsNonZero, UnaryIsNonZero)
_SIMPLE_UNARY_FUNCTION(_IsZero, UnaryIsZero)
_SIMPLE_UNARY_FUNCTION(_Log, log)
_SIMPLE_UNARY_FUNCTION(_Negate, UnaryNegate)
_SIMPLE_UNARY_FUNCTION(_Round, round)
_SIMPLE_UNARY_FUNCTION(_Sign, UnarySign)
_SIMPLE_UNARY_FUNCTION(_Sqrt, sqrt)
_SIMPLE_UNARY_FUNCTION(_Square, UnarySquare)
_SIMPLE_UNARY_FUNCTION(_Sin, sin)
_SIMPLE_UNARY_FUNCTION(_Cos, cos)
_SIMPLE_UNARY_FUNCTION(_Tan, tan)
#endif
_SIMPLE_UNARY_FUNCTION_ME(_AbsoluteMe, _Absolute)
SIMPLE_UNARY_FUNCTION_ME(AbsoluteMe, _Absolute)
......
......@@ -258,10 +258,12 @@ XTensor CopyIndexed(const XTensor & s, int dim,
list.Add((XTensor*)&tgtIndex);
/* tensor connection */
XLink::MakeLink(&list, &t, MOVEMENT_COPYINDEXED);
XLink::AddParamToHeadInt(&t, dim);
XLink::AddParamToHeadInt(&t, copyNum);
if (s.enableGrad) {
XLink::MakeLink(&list, &t, MOVEMENT_COPYINDEXED);
XLink::AddParamToHeadInt(&t, dim);
XLink::AddParamToHeadInt(&t, copyNum);
}
/* destroy variables */
delete[] dimSize;
......@@ -314,13 +316,15 @@ XTensor CopyIndexed(const XTensor &s, int dim, int * srcIndex, int indexSize, in
memcpy(saveTgtIndex, tgtIndex, indexSize * sizeof(int));
/* tensor connection */
XLink::MakeLink(&s, NULL, &t, MOVEMENT_COPYINDEXED);
XLink::AddParamToHeadInt(&t, dim);
XLink::AddParamToHeadPointer(&t, saveSrcIndex);
XLink::AddParamToHeadInt(&t, indexSize);
XLink::AddParamToHeadPointer(&t, saveTgtIndex);
XLink::AddParamToHeadInt(&t, copyNum);
if (s.enableGrad) {
XLink::MakeLink(&s, NULL, &t, MOVEMENT_COPYINDEXED);
XLink::AddParamToHeadInt(&t, dim);
XLink::AddParamToHeadPointer(&t, saveSrcIndex);
XLink::AddParamToHeadInt(&t, indexSize);
XLink::AddParamToHeadPointer(&t, saveTgtIndex);
XLink::AddParamToHeadInt(&t, copyNum);
}
/* destroy variables */
delete[] dimSize;
......
......@@ -134,7 +134,9 @@ XTensor CopyValues(const XTensor &s, XStream * stream)
_CopyValues(&s, &t, stream);
/* tensor connection */
XLink::MakeLink(&s, NULL, &t, MOVEMENT_COPYVALUES);
if (s.enableGrad) {
XLink::MakeLink(&s, NULL, &t, MOVEMENT_COPYVALUES);
}
return t;
}
......
......@@ -120,7 +120,9 @@ XTensor Gather(XTensor &s, XTensor &index)
_Gather(&s, &t, &index);
/* tensor connection */
XLink::MakeLink(&s, &index, &t, MOVEMENT_GATHER);
if (s.enableGrad) {
XLink::MakeLink(&s, &index, &t, MOVEMENT_GATHER);
}
if(index.order > 1) {
int * dims = new int[index.order + 1];
......
......@@ -21,6 +21,8 @@
#include "../../XTensor.h"
#include "../../XName.h"
#include "../../XBLAS.h"
#include "VectorBuffer.h"
#include "ReduceMax.h"
#include "ReduceMax.cuh"
......@@ -76,18 +78,75 @@ void _ReduceMax(const XTensor * input, XTensor * output, int dim)
}
blockSize = stride * strideNum;
for(int k = 0; k < blockNum; k++){
DTYPE * ip = (DTYPE*)input->data + blockSize * k;
DTYPE * op = (DTYPE*)output->data + stride * k;
for(int i = 0; i < stride; i++){
DTYPE max = FLOAT_MIN;
DTYPE * ipe = ip + blockSize;
for(DTYPE * ipb = ip + i; ipb < ipe; ipb += stride){
DTYPE v = *ipb;
if(max < v)
max = v;
if(input->dimSizeRDI[0] % (4 * 32 / sizeof(DTYPE)) == 0 && input->dimSizeRDI[0] >= 32){
int vecBufLength = 32 / sizeof(DTYPE);
if(dimRDI == 0){
//data is contiguous in dim 0
for(int i = 0; i < blockNum; i++){
DTYPE * ip = (DTYPE*)input->data + blockSize * i;
DTYPE * op = (DTYPE*)output->data + i;
VectorBuffer vecBuf[4];
for(int j = 0; j < 4; j++){
vecBuf[j] = VectorBuffer::loadu((DTYPE*)(ip) + j * vecBufLength);
}
for(int j = 1; j < strideNum / 32; j++){
const DTYPE* ptr = (DTYPE*)(ip + j * vecBufLength);
vecBuf[0] = vecBuf[0].maxData(VectorBuffer::loadu(ptr + 0 * vecBufLength));
vecBuf[1] = vecBuf[1].maxData(VectorBuffer::loadu(ptr + 1 * vecBufLength));
vecBuf[2] = vecBuf[2].maxData(VectorBuffer::loadu(ptr + 2 * vecBufLength));
vecBuf[3] = vecBuf[3].maxData(VectorBuffer::loadu(ptr + 3 * vecBufLength));
}
vecBuf[0] = vecBuf[0].maxData(vecBuf[1]);
vecBuf[0] = vecBuf[0].maxData(vecBuf[2]);
vecBuf[0] = vecBuf[0].maxData(vecBuf[3]);
DTYPE maxN = DTYPE_MIN;
for(int k = 0; k < vecBufLength; k++){
maxN = MAX(maxN,vecBuf[0][k]);
}
*op = maxN;
}
} else{
//data is separated
for(int i = 0; i < blockNum; i++){
for(int j = 0; j < input->dimSizeRDI[0] / 32; j++){
DTYPE * ip = (DTYPE*)input->data + blockSize * i;
DTYPE * op = (DTYPE*)output->data + stride * i;
VectorBuffer vecBuf[4];
for(int k = 0; k < 4; k++){
vecBuf[k] = VectorBuffer::loadu((DTYPE*)(ip) + (j * 4 + k) * 32 / sizeof(DTYPE));
}
for(int k = 1; k < strideNum; k++){
DTYPE * ptr = ip + k * stride + (j * 4) * vecBufLength;
vecBuf[0] = vecBuf[0].maxData(VectorBuffer::loadu(ptr + 0 * vecBufLength));
vecBuf[1] = vecBuf[1].maxData(VectorBuffer::loadu(ptr + 1 * vecBufLength));
vecBuf[2] = vecBuf[2].maxData(VectorBuffer::loadu(ptr + 2 * vecBufLength));
vecBuf[3] = vecBuf[3].maxData(VectorBuffer::loadu(ptr + 3 * vecBufLength));
}
for(int k = 0; k < 4; k++){
for(int l = 0; l < vecBufLength; l++)
*(op + j * 32 + 8 * k + l) = vecBuf[k][l];
}
}
}
}
}//run vector buffer
else{
for(int k = 0; k < blockNum; k++){
DTYPE * ip = (DTYPE*)input->data + blockSize * k;
DTYPE * op = (DTYPE*)output->data + stride * k;
for(int i = 0; i < stride; i++){
DTYPE max = DTYPE_MIN;
DTYPE * ipe = ip + blockSize;
for(DTYPE * ipb = ip + i; ipb < ipe; ipb += stride){
DTYPE v = *ipb;
if(max < v)
max = v;
}
*(op + i) = max;
}
*(op + i) = max;
}
}
}
......@@ -122,8 +181,10 @@ XTensor ReduceMax(const XTensor &input, int dim)
_ReduceMax(&input, &output, dim);
/* tensor connection */
XLink::MakeLink(&input, NULL, &output, REDUCE_REDUCEMAX);
XLink::AddParamToHeadInt(&output, dim);
if (input.enableGrad) {
XLink::MakeLink(&input, NULL, &output, REDUCE_REDUCEMAX);
XLink::AddParamToHeadInt(&output, dim);
}
/* destroy variables */
delete[] dimSize;
......@@ -162,7 +223,7 @@ void ReduceMax(const XTensor &input, XTensor &output, int dim)
/* call _ReduceMax function */
_ReduceMax(&input, &output, dim);
if (output.enableGrad) {
if (input.enableGrad) {
/* tensor connections */
XLink::MakeLink(&input, NULL, &output, REDUCE_REDUCEMAX);
XLink::AddParamToHeadInt(&output, dim);
......
......@@ -41,19 +41,19 @@ float shflDownReduceMax(float input)
"{"
".reg .f32 r0;"
".reg .pred p;"
"shfl.down.b32 r0, %1, 0x10, 0x1f;"
"shfl.sync.down.b32 r0, %1, 0x10, 0x1f,0xffffffff;"
"setp.lt.f32 p,%1,r0;"
"@p mov.f32 %1,r0;"
"shfl.down.b32 r0, %1, 0x8, 0xf;"
"shfl.sync.down.b32 r0, %1, 0x8, 0xf,0xffffffff;"
"setp.lt.f32 p,%1,r0;"
"@p mov.f32 %1,r0;"
"shfl.down.b32 r0, %1, 0x4, 0x7;"
"shfl.sync.down.b32 r0, %1, 0x4, 0x7,0xffffffff;"
"setp.lt.f32 p,%1,r0;"
"@p mov.f32 %1,r0;"
"shfl.down.b32 r0, %1, 0x2, 0x3;"
"shfl.sync.down.b32 r0, %1, 0x2, 0x3,0xffffffff;"
"setp.lt.f32 p,%1,r0;"
"@p mov.f32 %1,r0;"
"shfl.down.b32 r0, %1, 0x1, 0x1;"
"shfl.sync.down.b32 r0, %1, 0x1, 0x1,0xffffffff;"
"setp.lt.f32 p, %1, r0; "
"@p mov.f32 %1,r0;"
"mov.f32 %0,%1;"
......@@ -73,19 +73,19 @@ int shflDownReduceMax(int input)
"{"
".reg .s32 r0;"
".reg .pred p;"
"shfl.down.b32 r0, %1, 0x10, 0x1f;"
"shfl.sync.down.b32 r0, %1, 0x10, 0x1f,0xffffffff;"
"setp.lt.s32 p,%1,r0;"
"@p mov.s32 %1,r0;"
"shfl.down.b32 r0, %1, 0x8, 0xf;"
"shfl.sync.down.b32 r0, %1, 0x8, 0xf,0xffffffff;"
"setp.lt.s32 p,%1,r0;"
"@p mov.s32 %1,r0;"
"shfl.down.b32 r0, %1, 0x4, 0x7;"
"shfl.sync.down.b32 r0, %1, 0x4, 0x7,0xffffffff;"
"setp.lt.s32 p,%1,r0;"
"@p mov.s32 %1,r0;"
"shfl.down.b32 r0, %1, 0x2, 0x3;"
"shfl.sync.down.b32 r0, %1, 0x2, 0x3,0xffffffff;"
"setp.lt.s32 p,%1,r0;"
"@p mov.s32 %1,r0;"
"shfl.down.b32 r0, %1, 0x1, 0x1;"
"shfl.sync.down.b32 r0, %1, 0x1, 0x1,0xffffffff;"
"setp.lt.s32 p, %1, r0; "
"@p mov.s32 %1,r0;"
"mov.s32 %0,%1;"
......
......@@ -77,8 +77,10 @@ XTensor ReduceMean(const XTensor &input, int dim)
_ReduceMean(&input, &output, dim);
/* tensor connection */
XLink::MakeLink(&input, NULL, &output, REDUCE_REDUCEMEAN);
XLink::AddParamToHeadInt(&output, dim);
if (input.enableGrad) {
XLink::MakeLink(&input, NULL, &output, REDUCE_REDUCEMEAN);
XLink::AddParamToHeadInt(&output, dim);
}
/* destroy variables */
delete[] dimSize;
......@@ -119,7 +121,7 @@ void ReduceMean(const XTensor &input, XTensor &output, int dim)
/* call _ReduceMean function */
_ReduceMean(&input, &output, dim);
if (output.enableGrad) {
if (input.enableGrad) {
/* tensor connections */
XLink::MakeLink(&input, NULL, &output, REDUCE_REDUCEMEAN);
XLink::AddParamToHeadInt(&output, dim);
......
......@@ -37,15 +37,15 @@ float shflDownReduceSum(float input)
asm volatile(
"{"
".reg .f32 r0;"
"shfl.down.b32 r0, %1, 0x10, 0x1f;"
"shfl.sync.down.b32 r0, %1, 0x10, 0x1f,0xffffffff;"
"add.f32 %1, r0, %1;"
"shfl.down.b32 r0, %1, 0x8, 0xf;"
"shfl.sync.down.b32 r0, %1, 0x8, 0xf,0xffffffff;"
"add.f32 %1, r0, %1;"
"shfl.down.b32 r0, %1, 0x4, 0x7;"
"shfl.sync.down.b32 r0, %1, 0x4, 0x7,0xffffffff;"
"add.f32 %1, r0, %1;"
"shfl.down.b32 r0, %1, 0x2, 0x3;"
"shfl.sync.down.b32 r0, %1, 0x2, 0x3,0xffffffff;"
"add.f32 %1, r0, %1;"
"shfl.down.b32 r0, %1, 0x1, 0x1;"
"shfl.sync.down.b32 r0, %1, 0x1, 0x1,0xffffffff;"
"add.f32 %0, r0, %1;"
"}"
: "=f"(output) : "f"(input));
......@@ -62,15 +62,15 @@ int shflDownReduceSum(int input)
asm volatile(
"{"
".reg .s32 r0;"
"shfl.down.b32 r0, %1, 0x10, 0x1f;"
"shfl.sync.down.b32 r0, %1, 0x10, 0x1f,0xffffffff;"
"add.s32 %1, r0, %1;"
"shfl.down.b32 r0, %1, 0x8, 0xf;"
"shfl.sync.down.b32 r0, %1, 0x8, 0xf,0xffffffff;"
"add.s32 %1, r0, %1;"
"shfl.down.b32 r0, %1, 0x4, 0x7;"
"shfl.sync.down.b32 r0, %1, 0x4, 0x7,0xffffffff;"
"add.s32 %1, r0, %1;"
"shfl.down.b32 r0, %1, 0x2, 0x3;"
"shfl.sync.down.b32 r0, %1, 0x2, 0x3,0xffffffff;"
"add.s32 %1, r0, %1;"
"shfl.down.b32 r0, %1, 0x1, 0x1;"
"shfl.sync.down.b32 r0, %1, 0x1, 0x1,0xffffffff;"
"add.s32 %0, r0, %1;"
"}"
: "=r"(output) : "r"(input));
......
......@@ -73,8 +73,10 @@ XTensor ReduceSumSquared(const XTensor &input, int dim, const XTensor &shift)
_ReduceSumSquared(&input, &output, dim, &shift);
/* tensor connection */
XLink::MakeLink(&input, &shift, &output, REDUCE_REDUCESUMSQUARED);
XLink::AddParamToHeadInt(&output, dim);
if (input.enableGrad) {
XLink::MakeLink(&input, &shift, &output, REDUCE_REDUCESUMSQUARED);
XLink::AddParamToHeadInt(&output, dim);
}
/* destroy variables */
delete[] dimSize;
......@@ -116,7 +118,7 @@ void ReduceSumSquared(const XTensor &input, XTensor &output, int dim, const XTen
/* call _ReduceSumSquared function */
_ReduceSumSquared(&input, &output, dim, &shift);
if (output.enableGrad) {
if (input.enableGrad) {
/* tensor connections */
XLink::MakeLink(&input, &shift, &output, REDUCE_REDUCESUMSQUARED);
XLink::AddParamToHeadInt(&output, dim);
......
......@@ -76,8 +76,10 @@ XTensor ReduceVariance(const XTensor &input, int dim, const XTensor &mean)
_ReduceVariance(&input, &output, dim, &mean);
/* tensor connection */
XLink::MakeLink(&input, &mean, &output, REDUCE_REDUCEVARIANCE);
XLink::AddParamToHeadInt(&output, dim);
if (input.enableGrad) {
XLink::MakeLink(&input, &mean, &output, REDUCE_REDUCEVARIANCE);
XLink::AddParamToHeadInt(&output, dim);
}
/* destroy variables */
delete[] dimSize;
......@@ -119,7 +121,7 @@ void ReduceVariance(const XTensor &input, XTensor &output, int dim, const XTenso
/* call _ReduceVariance function */
_ReduceVariance(&input, &output, dim, &mean);
if (output.enableGrad) {
if (input.enableGrad) {
/* tensor connection */
XLink::MakeLink(&input, &mean, &output, REDUCE_REDUCEVARIANCE);
XLink::AddParamToHeadInt(&output, dim);
......
/* NiuTrans.Tensor - an open-source tensor library
* Copyright (C) 2017, Natural Language Processing Lab, Northestern University.
* All rights reserved.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
/*
* $Created by: ZHANG Yuhao (email: zhangyuhao@stu.neu.edu.cn) 2019-07-23
*/
#include "VectorBuffer.h"
namespace nts {
/* data size for each buffer */
int VectorBuffer::size()
{
return 32 / sizeof(DTYPE);
}
/* constructor */
VectorBuffer::VectorBuffer()
{
}
/*
constructor
initial values with val
*/
VectorBuffer::VectorBuffer(DTYPE val)
{
for (int i = 0; i != size(); i++) {
values[i] = val;
}
}
/* load data */
VectorBuffer VectorBuffer::loadu(const DTYPE* ptr, bool isExp , DTYPE power , DTYPE* bias )
{
int count = 32 / sizeof(DTYPE);
VectorBuffer vec;
if (isExp) {
if (bias == NULL) {
if (power == (DTYPE)1.0) {
for (int i = 0; i != count; i++) {
vec.values[i] = (DTYPE)exp(*(ptr + i));
}
}
else if (power == (DTYPE)2.0) {
for (int i = 0; i != count; i++) {
vec.values[i] = (DTYPE)exp((*(ptr + i)) * (*(ptr + i)));
}
}
else if (power == (DTYPE)0.5) {
for (int i = 0; i != count; i++) {
vec.values[i] = (DTYPE)exp(sqrt(*(ptr + i)));
}
}
else {
for (int i = 0; i != count; i++) {
vec.values[i] = (DTYPE)exp(pow(*(ptr + i), power));
}
}
}/*is bias == NULL*/
else {
if (power == (DTYPE)1.0) {
for (int i = 0; i != count; i++) {
vec.values[i] = (DTYPE)exp(*(ptr + i) - bias[i]);
}
}
else if (power == (DTYPE)2.0) {
for (int i = 0; i != count; i++) {
DTYPE value = *(ptr + i) - bias[i];
vec.values[i] = (DTYPE)exp(value * value);
}
}
else if (power == (DTYPE)0.5) {
for (int i = 0; i != count; i++) {
vec.values[i] = (DTYPE)exp(sqrt(*(ptr + i) - bias[i]));
}
}
else {
for (int i = 0; i != count; i++) {
vec.values[i] = (DTYPE)exp(pow(*(ptr + i) - bias[i], power));
}
}
}
}//isExp
else {
if (bias == NULL) {
if (power == (DTYPE)1.0) {
memcpy(vec.values, ptr, count * sizeof(DTYPE));
}
else if (power == (DTYPE)2.0) {
for (int i = 0; i != count; i++) {
vec.values[i] = (*(ptr + i)) * (*(ptr + i));
}
}
else if (power == (DTYPE)0.5) {
for (int i = 0; i != count; i++) {
vec.values[i] = (DTYPE)sqrt(*(ptr + i));
}
}
else {
for (int i = 0; i != count; i++) {
vec.values[i] = (DTYPE)pow(*(ptr + i), power);
}
}
}// if bias == NULL
else {
if (power == (DTYPE)1.0) {
for (int i = 0; i != count; i++) {
vec.values[i] = *(ptr + i) - bias[i];
}
}
else if (power == (DTYPE)2.0) {
for (int i = 0; i != count; i++) {
DTYPE value = *(ptr + i) - bias[i];
vec.values[i] = value * value;
}
}
else if (power == (DTYPE)0.5) {
for (int i = 0; i != count; i++) {
vec.values[i] = (DTYPE)sqrt(*(ptr + i) - bias[i]);
}
}
else {
for (int i = 0; i != count; i++) {
vec.values[i] = (DTYPE)pow(*(ptr + i) - bias[i], power);
}
}
}
}
return vec;
}
/* overloading [] */
const DTYPE& VectorBuffer::operator[](int idx)const
{
return values[idx];
}
/* overloading + */
VectorBuffer VectorBuffer::operator+(const VectorBuffer &a)
{
for (int i = 0; i != a.size(); i++) {
this->values[i] = a[i] + this->values[i];
}
return *this;
}
/* conculte the max of two buffer */
VectorBuffer VectorBuffer::maxData(const VectorBuffer &a) {
for (int i = 0; i != a.size(); i++) {
this->values[i] = MAX(a[i], this->values[i]);
}
return *this;
}
}/* end of the nts (NiuTrans.Tensor) namespace */
\ No newline at end of file
/* NiuTrans.Tensor - an open-source tensor library
* Copyright (C) 2017, Natural Language Processing Lab, Northestern University.
* All rights reserved.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
/*
* $Created by: ZHANG Yuhao (email: zhangyuhao@stu.neu.edu.cn) 2019-07-23
*/
//#include <cstring>
#include <math.h>
#include "../../XGlobal.h"
namespace nts {
class VectorBuffer {
private:
/* buffer for concluter */
DTYPE values[32 / sizeof(DTYPE)] = { 0 };
public:
/* data size for each buffer */
static int size();
/* constructor */
VectorBuffer();
/* constructor */
VectorBuffer(DTYPE val);
/* load data */
static VectorBuffer loadu(const DTYPE* ptr, bool isExp = false, DTYPE power = (DTYPE)1.0F, DTYPE* bias = NULL);
/* overloading [] */
const DTYPE& operator[](int idx)const;
/* overloading + */
VectorBuffer operator+(const VectorBuffer &a);
/* conculte the max of two buffer */
VectorBuffer maxData(const VectorBuffer &a);
};
}
\ No newline at end of file
......@@ -99,9 +99,11 @@ XTensor Concatenate(const TensorList &smalls, int dim)
_Merge(&smalls, &big, dim);
/* tensor connection */
XLink::MakeLink(&smalls, &big, SHAPE_MERGE);
XLink::AddParamToHeadInt(&big, dim);
if (tensor->enableGrad) {
XLink::MakeLink(&smalls, &big, SHAPE_MERGE);
XLink::AddParamToHeadInt(&big, dim);
}
/* destroy variables */
delete[] dimSize;
......@@ -127,8 +129,10 @@ XTensor Concatenate(const TensorList &smalls, int dim)
_ConcatenateSolely(&smalls, &big, dim);
/* tensor connection */
XLink::MakeLink(&smalls, &big, SHAPE_CONCATENATE);
XLink::AddParamToHeadInt(&big, dim);
if (tensor->enableGrad) {
XLink::MakeLink(&smalls, &big, SHAPE_CONCATENATE);
XLink::AddParamToHeadInt(&big, dim);
}
/* destroy variables */
delete[] dimSize;
......@@ -309,9 +313,11 @@ XTensor Concatenate(const XTensor &smallA, const XTensor &smallB, int dim)
_Merge(&smalls, &big, dim);
/* tensor connection */
XLink::MakeLink(&smalls, &big, SHAPE_MERGE);
XLink::AddParamToHeadInt(&big, dim);
if (tensor->enableGrad) {
XLink::MakeLink(&smalls, &big, SHAPE_MERGE);
XLink::AddParamToHeadInt(&big, dim);
}
/* destroy variables */
delete[] dimSize;
......@@ -337,8 +343,10 @@ XTensor Concatenate(const XTensor &smallA, const XTensor &smallB, int dim)
_ConcatenateSolely(&smalls, &big, dim);
/* tensor connection */
XLink::MakeLink(&smalls, &big, SHAPE_CONCATENATE);
XLink::AddParamToHeadInt(&big, dim);
if (tensor->enableGrad) {
XLink::MakeLink(&smalls, &big, SHAPE_CONCATENATE);
XLink::AddParamToHeadInt(&big, dim);
}
/* destroy variables */
delete[] dimSize;
......
......@@ -222,9 +222,11 @@ XTensor Merge(const XTensor &s, int whereToMerge, int leadingDim)
_Merge(&s, &t, whereToMerge, leadingDim);
/* tensor connections */
XLink::MakeLink(&s, NULL, &t, SHAPE_MERGE);
XLink::AddParamToHeadInt(&t, whereToMerge);
XLink::AddParamToHeadInt(&t, leadingDim);
if (s.enableGrad) {
XLink::MakeLink(&s, NULL, &t, SHAPE_MERGE);
XLink::AddParamToHeadInt(&t, whereToMerge);
XLink::AddParamToHeadInt(&t, leadingDim);
}
/* destroy variables */
delete[] dimSize;
......@@ -261,7 +263,7 @@ void Merge(const XTensor &s, XTensor &t, int whereToMerge, int leadingDim)
/* call _Merge function */
_Merge(&s, &t, whereToMerge, leadingDim);
if (t.enableGrad) {
if (s.enableGrad) {
/* tensor connections */
XLink::MakeLink(&s, NULL, &t, SHAPE_MERGE);
XLink::AddParamToHeadInt(&t, whereToMerge);
......@@ -412,8 +414,10 @@ XTensor Merge(const TensorList &smalls, int whereToMerge)
_Merge(&smalls, &big, whereToMerge);
/* tensor connections */
XLink::MakeLink(&smalls, &big, SHAPE_MERGE_LIST);
XLink::AddParamToHeadInt(&big, whereToMerge);
if (tensor->enableGrad) {
XLink::MakeLink(&smalls, &big, SHAPE_MERGE_LIST);
XLink::AddParamToHeadInt(&big, whereToMerge);
}
/* destroy variables */
delete[] dimSize;
......@@ -453,8 +457,10 @@ XTensor Merge(const XTensor &smallA, const XTensor &smallB, int whereToMerge)
_Merge(&smalls, &big, whereToMerge);
/* tensor connections */
XLink::MakeLink(&smalls, &big, SHAPE_MERGE_LIST);
XLink::AddParamToHeadInt(&big, whereToMerge);
if (smallA.enableGrad) {
XLink::MakeLink(&smalls, &big, SHAPE_MERGE_LIST);
XLink::AddParamToHeadInt(&big, whereToMerge);
}
/* destroy variables */
delete[] dimSize;
......
......@@ -43,7 +43,9 @@ XTensor Reshape(XTensor &s, int order, int * dimSize)
t.Reshape(order, dimSize);
/* tensor connections */
XLink::MakeLink(&s, NULL, &t, SHAPE_RESHAPE);
if (s.enableGrad) {
XLink::MakeLink(&s, NULL, &t, SHAPE_RESHAPE);
}
return t;
}
......@@ -57,7 +59,7 @@ void Reshape(XTensor &s, XTensor &t, int order, int * dimSize)
/* call Reshape function */
t.Reshape(order, dimSize);
if (t.enableGrad) {
if (s.enableGrad) {
/* tensor connections */
XLink::MakeLink(&s, NULL, &t, SHAPE_RESHAPE);
}
......
......@@ -217,9 +217,11 @@ XTensor Split(const XTensor &s, int whereToSplit, int splitNum)
_Split(&s, &t, whereToSplit, splitNum);
/* tensor connections */
XLink::MakeLink(&s, NULL, &t, SHAPE_SPLIT);
XLink::AddParamToHeadInt(&t, whereToSplit);
XLink::AddParamToHeadInt(&t, splitNum);
if (s.enableGrad) {
XLink::MakeLink(&s, NULL, &t, SHAPE_SPLIT);
XLink::AddParamToHeadInt(&t, whereToSplit);
XLink::AddParamToHeadInt(&t, splitNum);
}
/* destroy variables */
delete[] dimSize;
......@@ -251,7 +253,7 @@ void Split(const XTensor &s, XTensor &t, int whereToSplit, int splitNum)
/* call _Split function */
_Split(&s, &t, whereToSplit, splitNum);
if (t.enableGrad) {
if (s.enableGrad) {
/* tensor connections */
XLink::MakeLink(&s, NULL, &t, SHAPE_SPLIT);
XLink::AddParamToHeadInt(&t, whereToSplit);
......@@ -409,12 +411,15 @@ void Split(const XTensor &big, TensorList &smalls, int whereToSplit, int splitNu
/* tensor connections */
for(int i = 0; i < smalls.count; i++){
XTensor * s = (XTensor*)smalls.Get(i);
XLink::MakeLink(&big, NULL, s, SHAPE_SPLIT_LIST);
XLink::AddParamToHeadInt(s, whereToSplit);
/* it is tricky here that we keep the id of each
block, rather than the total number of the splits */
XLink::AddParamToHeadInt(s, i);
if (s->enableGrad) {
XLink::MakeLink(&big, NULL, s, SHAPE_SPLIT_LIST);
XLink::AddParamToHeadInt(s, whereToSplit);
/* it is tricky here that we keep the id of each
block, rather than the total number of the splits */
XLink::AddParamToHeadInt(s, i);
}
}
}
......
......@@ -121,7 +121,9 @@ XTensor Squeeze(XTensor & source, int leadingDim)
_Squeeze(&source, &target, leadingDim);
/* tensor connections */
XLink::MakeLink(&source, NULL, &target, SHAPE_SQUEEZE);
if (source.enableGrad) {
XLink::MakeLink(&source, NULL, &target, SHAPE_SQUEEZE);
}
return target;
}
......@@ -135,7 +137,7 @@ void Squeeze(XTensor & source, XTensor & target, int leadingDim)
/* call _Squeeze function */
_Squeeze(&source, &target, leadingDim);
if (target.enableGrad) {
if (source.enableGrad) {
/* tensor connections */
XLink::MakeLink(&source, NULL, &target, SHAPE_SQUEEZE);
}
......
......@@ -144,9 +144,11 @@ XTensor Transpose(const XTensor &a, const int i, const int j)
_Transpose(&a, &b, i, j);
/* tensor connection */
XLink::MakeLink(&a, NULL, &b, SHAPE_TRANSPOSE);
XLink::AddParamToHeadInt(&b, i);
XLink::AddParamToHeadInt(&b, j);
if (a.enableGrad) {
XLink::MakeLink(&a, NULL, &b, SHAPE_TRANSPOSE);
XLink::AddParamToHeadInt(&b, i);
XLink::AddParamToHeadInt(&b, j);
}
/* destroy variables */
delete[] dimSize;
......
......@@ -156,9 +156,11 @@ XTensor Unsqueeze(const XTensor &a, int dim, int dSize)
_Unsqueeze(&a, &b, dim, dSize);
/* tensor connections */
XLink::MakeLink(&a, NULL, &b, SHAPE_UNSQUEEZE);
XLink::AddParamToHeadInt(&b, dim);
XLink::AddParamToHeadInt(&b, dSize);
if (a.enableGrad) {
XLink::MakeLink(&a, NULL, &b, SHAPE_UNSQUEEZE);
XLink::AddParamToHeadInt(&b, dim);
XLink::AddParamToHeadInt(&b, dSize);
}
/* destroy variables */
delete[] dimSize;
......@@ -191,7 +193,7 @@ void Unsqueeze(const XTensor &a, XTensor &b, int dim, int dSize)
/* call _Unsqueeze function */
_Unsqueeze(&a, &b, dim, dSize);
if (b.enableGrad) {
if (a.enableGrad) {
/* tensor connections */
XLink::MakeLink(&a, NULL, &b, SHAPE_UNSQUEEZE);
XLink::AddParamToHeadInt(&b, dim);
......
......@@ -377,8 +377,8 @@ get the top-k items
template<class T> __global__
void KernelTopK3(T * input, int stride, int strideNum, int blockNum, int k, T minValue, T * output, int * index)
{
__shared__ CudaHeapNode<T> heapData[(SHARED_MEMORY_SIZE - 1024 * sizeof(T)) / sizeof(CudaHeapNode<T>)];
__shared__ T eachHeapMaxValue[1024];
__shared__ CudaHeapNode<T> heapData[(SHARED_MEMORY_SIZE - 512 * sizeof(T)) / sizeof(CudaHeapNode<T>)];
__shared__ T eachHeapMaxValue[512];
/*optimization k size the parameter must more than half of k*/
int parameter = 0;
......@@ -429,7 +429,7 @@ void KernelTopK3(T * input, int stride, int strideNum, int blockNum, int k, T mi
}
__syncthreads();
/*to merge the heap use another way*/
/* to merge the heap use another way */
T minData = minValue;
int heapLimit = heap.count / 2;
if (heapLimit % 2 == 0 && heapLimit != 0) heapLimit -= 1;
......@@ -438,12 +438,13 @@ void KernelTopK3(T * input, int stride, int strideNum, int blockNum, int k, T mi
minData = heap.items[counter].value;
}
eachHeapMaxValue[threadIdx.y * blockDim.x + threadIdx.x] = minData;
//need more optimation
if (i == 0) {
int threadLimit = (threadIdx.y + 1) * blockDim.x;
int threadLimit = threadIdx.y * blockDim.x + min(blockDim.x,strideNum);
CudaXHeap<MIN_HEAP, T> chooseHeap(k, heapData + k * ((blockDim.x * blockDim.y) + threadIdx.y));
int counter = threadIdx.y * blockDim.x;
for (; counter < threadIdx.y * blockDim.x + k; ++counter) {
for (; counter < threadIdx.y * blockDim.x + min(k, blockDim.x); ++counter) {
chooseHeap.Push(counter, eachHeapMaxValue[counter]);
}
for (; counter < threadLimit; ++counter) {
......@@ -451,15 +452,16 @@ void KernelTopK3(T * input, int stride, int strideNum, int blockNum, int k, T mi
chooseHeap.ReplaceTop(counter, eachHeapMaxValue[counter]);
}
}
int heapNum = chooseHeap.count;
CudaXHeap<MIN_HEAP, T> ansHeapData(k, k - parameter, heapData + k * chooseHeap.items[0].index);
int miss = parameter;
for (counter = 1; counter < k; ++counter) {
for (counter = 1; counter < heapNum; ++counter) {
chooseHeap.items[0] = chooseHeap.items[chooseHeap.count - 1];
chooseHeap.count--;
chooseHeap.Down(0);
CudaHeapNode<T> * cmpHeapData = heapData + k * (chooseHeap.items[0].index);
int cmpHeapLimit = 0;
if (counter + heapLimit <= k - parameter){
if (counter + heapLimit <= k - parameter && heapNum == k){
cmpHeapLimit = heapLimit;
}
/* take the max data from the minHeap,so start search from the leaf node */
......@@ -840,7 +842,7 @@ void _CudaTopK(const XTensor * a, XTensor * b, XTensor * index, int dim, int k)
/* we run the kernel if the heaps can fit into the shared memory */
cudaGrids[1] *= cudaBlocks[1];
cudaBlocks[1] = 1;
if ((cudaBlocks[0] * cudaBlocks[1] + 1) * k * (a->unitSize + sizeof(int)) < SHARED_MEMORY_SIZE) {
if ((cudaBlocks[0] * cudaBlocks[1] + 1) * k * (a->unitSize + sizeof(int)) + (512 * sizeof(int))< SHARED_MEMORY_SIZE) {
if (a->dataType == DEFAULT_DTYPE) {
KernelTopK3<DTYPE> <<<dim3(cudaGrids[0], cudaGrids[1]), dim3(cudaBlocks[0], cudaBlocks[1]) >>>
((DTYPE*)a->data, stride, strideNumA, blockNum, k, DTYPE_MIN,
......@@ -869,7 +871,7 @@ void _CudaTopK(const XTensor * a, XTensor * b, XTensor * index, int dim, int k)
//delete indexA;
int workerNum = WORKERSNUM;
GDevs.GetCudaThread2D(a->mem->devID,
GDevs.GetCudaThread2D(a->devID,
workerNum, stride * blockNum, MAX_INT,
cudaGrids, cudaBlocks);
if (a->dataType == DEFAULT_DTYPE) {
......
......@@ -81,8 +81,10 @@ XTensor DropoutWithIndex(const XTensor &x, XTensor &maskIndex, DTYPE scale)
_ScaleAndShiftMe(&c, scale);
/* tensor connections */
XLink::MakeLink(&x, &maskIndex, &c, MOVEMENT_DROPOUTWITHINDEX);
XLink::AddParamToHead(&c, scale);
if (x.enableGrad) {
XLink::MakeLink(&x, &maskIndex, &c, MOVEMENT_DROPOUTWITHINDEX);
XLink::AddParamToHead(&c, scale);
}
return c;
}
......
......@@ -78,7 +78,9 @@ XTensor HardTanH(const XTensor &x)
_HardTanH(&x, &y);
/* tensor connection */
XLink::MakeLink(&x, NULL, &y, FUNC_HARDTANH);
if (x.enableGrad) {
XLink::MakeLink(&x, NULL, &y, FUNC_HARDTANH);
}
return y;
}
......@@ -92,7 +94,7 @@ void HardTanH(const XTensor &x, XTensor &y)
/* call _HardTanH function */
_HardTanH(&x, &y);
if (y.enableGrad) {
if (x.enableGrad) {
/* tensor connection */
XLink::MakeLink(&x, NULL, &y, FUNC_HARDTANH);
}
......
......@@ -54,7 +54,9 @@ XTensor Identity(const XTensor &x)
_Identity(&x, &y);
/* tensor connection */
XLink::MakeLink(&x, NULL, &y, FUNC_IDENTITY);
if (x.enableGrad) {
XLink::MakeLink(&x, NULL, &y, FUNC_IDENTITY);
}
return y;
}
......@@ -68,7 +70,7 @@ void Identity(const XTensor &x, XTensor &y)
/* call _Identity function */
_Identity(&x, &y);
if (y.enableGrad) {
if (x.enableGrad) {
/* tensor connection */
XLink::MakeLink(&x, NULL, &y, FUNC_IDENTITY);
}
......
......@@ -188,8 +188,10 @@ XTensor LogSoftmax(const XTensor &x, int leadDim)
_LogSoftmax(&x, &y, ld);
/* tensor connection */
XLink::MakeLink(&x, NULL, &y, FUNC_LOGSOFTMAX);
XLink::AddParamToHeadInt(&y, ld);
if (x.enableGrad) {
XLink::MakeLink(&x, NULL, &y, FUNC_LOGSOFTMAX);
XLink::AddParamToHeadInt(&y, ld);
}
return y;
}
......@@ -215,7 +217,7 @@ void LogSoftmax(const XTensor &x, XTensor &y, int leadDim)
/* call _LogSoftmax function */
_LogSoftmax(&x, &y, ld);
if (y.enableGrad) {
if (x.enableGrad) {
/* tensor connection */
XLink::MakeLink(&x, NULL, &y, FUNC_LOGSOFTMAX);
XLink::AddParamToHeadInt(&y, ld);
......
......@@ -70,7 +70,9 @@ XTensor Rectify(const XTensor &x)
_Rectify(&x, &y);
/* tensor connection */
XLink::MakeLink(&x, NULL, &y, FUNC_RECTIFY);
if (x.enableGrad) {
XLink::MakeLink(&x, NULL, &y, FUNC_RECTIFY);
}
return y;
}
......@@ -84,7 +86,7 @@ void Rectify(const XTensor &x, XTensor &y)
/* call _Rectify function */
_Rectify(&x, &y);
if (y.enableGrad) {
if (x.enableGrad) {
/* tensor connection */
XLink::MakeLink(&x, NULL, &y, FUNC_RECTIFY);
}
......
......@@ -73,7 +73,9 @@ XTensor Sigmoid(const XTensor &x)
_Sigmoid(&x, &y);
/* tensor connection */
XLink::MakeLink(&x, NULL, &y, FUNC_SIGMOID);
if (x.enableGrad) {
XLink::MakeLink(&x, NULL, &y, FUNC_SIGMOID);
}
return y;
}
......@@ -87,7 +89,7 @@ void Sigmoid(const XTensor &x, XTensor &y)
/* call _Sigmoid function */
_Sigmoid(&x, &y);
if (y.enableGrad) {
if (x.enableGrad) {
/* tensor connection */
XLink::MakeLink(&x, NULL, &y, FUNC_SIGMOID);
}
......
......@@ -142,8 +142,10 @@ XTensor Softmax(const XTensor &x, int leadDim)
_Softmax(&x, &y, ld);
/* tensor connection */
XLink::MakeLink(&x, NULL, &y, FUNC_SOFTMAX);
XLink::AddParamToHeadInt(&y, ld);
if (x.enableGrad) {
XLink::MakeLink(&x, NULL, &y, FUNC_SOFTMAX);
XLink::AddParamToHeadInt(&y, ld);
}
return y;
}
......@@ -161,7 +163,7 @@ void Softmax(const XTensor &x, XTensor &y, int leadDim)
/* call _Softmax function */
_Softmax(&x, &y, ld);
if (y.enableGrad) {
if (x.enableGrad) {
/* tensor connection */
XLink::MakeLink(&x, NULL, &y, FUNC_SOFTMAX);
XLink::AddParamToHeadInt(&y, ld);
......
......@@ -171,7 +171,7 @@ float broadcast(float input)
float output;
asm(
"{"
"shfl.idx.b32 %0,%1,0x0,0x1f;"
"shfl.sync.idx.b32 %0,%1,0x0,0x1f,0xffffffff;"
"}"
:"=f"(output) : "f"(input)
);
......
......@@ -277,8 +277,11 @@ XTensor CrossEntropy(const XTensor & output, const XTensor & gold,
tails.Add((XTensor*)&gold);
tails.Add(weight);
tails.Add(padding);
XLink::MakeLink(&tails, &loss, LOSS_CROSSENTROPY);
XLink::AddParamToHeadInt(&loss, dim);
if (output.enableGrad) {
XLink::MakeLink(&tails, &loss, LOSS_CROSSENTROPY);
XLink::AddParamToHeadInt(&loss, dim);
}
return loss;
}
......@@ -302,8 +305,11 @@ XTensor CrossEntropy(const XTensor & output, const XTensor & gold,
tails.Add((XTensor*)&gold);
tails.Add(weight);
tails.Add((XTensor*)&padding);
XLink::MakeLink(&tails, &loss, LOSS_CROSSENTROPY);
XLink::AddParamToHeadInt(&loss, dim);
if (output.enableGrad) {
XLink::MakeLink(&tails, &loss, LOSS_CROSSENTROPY);
XLink::AddParamToHeadInt(&loss, dim);
}
return loss;
}
......
/* NiuTrans.Tensor - an open-source tensor library
* Copyright (C) 2018, Natural Language Processing Lab, Northestern University.
* All rights reserved.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
/*
* $Created by: Xu Chen (email: hello_master1954@163.com) 2018-07-12
*/
#include "../core/math/Unary.h"
#include "TAbsolute.h"
namespace nts { // namespace nts(NiuTrans.Tensor)
/*
case 1: test Absolute function.
Set every entry to its absolute value.
*/
bool TestAbsolute1()
{
/* a tensor of size (3, 2) */
int order = 2;
int * dimSize = new int[order];
dimSize[0] = 3;
dimSize[1] = 2;
int unitNum = 1;
for (int i = 0; i < order; i++)
unitNum *= dimSize[i];
DTYPE aData[3][2] = { {1.0F, -2.0F},
{0.5F, -4.0F},
{0.0F, 6.0F} };
DTYPE answer[3][2] = { {1.0F, 2.0F},
{0.5F, 4.0F},
{0.0F, 6.0F} };
/* CPU test */
bool cpuTest = true;
/* create tensors */
XTensor * a = NewTensor(order, dimSize);
XTensor * b = NewTensor(order, dimSize);
XTensor * aMe = NewTensor(order, dimSize);
XTensor bUser;
/* initialize variables */
a->SetData(aData, unitNum);
aMe->SetData(aData, unitNum);
/* call Absolute function */
_Absolute(a, b);
_AbsoluteMe(aMe);
bUser = Absolute(*a);
/* check results */
cpuTest = b->CheckData(answer, unitNum, 1e-4F) && aMe->CheckData(answer, unitNum, 1e-4F) && bUser.CheckData(answer, unitNum, 1e-4F);
#ifdef USE_CUDA
/* GPU test */
bool gpuTest = true;
/* create tensor */
XTensor * aGPU = NewTensor(order, dimSize, X_FLOAT, 1.0F, 0);
XTensor * bGPU = NewTensor(order, dimSize, X_FLOAT, 1.0F, 0);
XTensor * aMeGPU = NewTensor(order, dimSize, X_FLOAT, 1.0F, 0);
XTensor bUserGPU;
/* Initialize variables */
aGPU->SetData(aData, unitNum);
aMeGPU->SetData(aData, unitNum);
/* call Absolute function */
_Absolute(aGPU, bGPU);
_AbsoluteMe(aMeGPU);
bUserGPU = Absolute(*aGPU);
/* check results */
gpuTest = bGPU->CheckData(answer, unitNum, 1e-4F) && aMeGPU->CheckData(answer, unitNum, 1e-4F) && bUserGPU.CheckData(answer, unitNum, 1e-4F);
/* destroy variables */
delete a;
delete b;
delete aMe;
delete aGPU;
delete bGPU;
delete aMeGPU;
delete[] dimSize;
return cpuTest && gpuTest;
#else
/* destroy variables */
delete a;
delete b;
delete aMe;
delete[] dimSize;
return cpuTest;
#endif // USE_CUDA
}
/* other cases */
/*
TODO!!
*/
/* test for Absolute Function */
bool TestAbsolute()
{
XPRINT(0, stdout, "[TEST Absolute] set every entry to its absolute value \n");
bool returnFlag = true, caseFlag = true;
/* case 1 test */
caseFlag = TestAbsolute1();
if (!caseFlag) {
returnFlag = false;
XPRINT(0, stdout, ">> case 1 failed!\n");
}
else
XPRINT(0, stdout, ">> case 1 passed!\n");
/* other cases test */
/*
TODO!!
*/
if (returnFlag) {
XPRINT(0, stdout, ">> All Passed!\n");
}
else
XPRINT(0, stdout, ">> Failed!\n");
XPRINT(0, stdout, "\n");
return returnFlag;
}
} // namespace nts(NiuTrans.Tensor)
/* NiuTrans.Tensor - an open-source tensor library
* Copyright (C) 2017, Natural Language Processing Lab, Northestern University.
* All rights reserved.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
/*
* $Created by: Xu Chen (email: hello_master1954@163.com) 2018-07-12
*/
#ifndef __TEST_ABSOLUTE_H__
#define __TEST_ABSOLUTE_H__
namespace nts { // namespace nts(NiuTrans.Tensor)
/* test for Absolute Function */
bool TestAbsolute();
} // namespace nts(NiuTrans.Tensor)
#endif // __TEST_ABSOLUTE_H__
/* NiuTrans.Tensor - an open-source tensor library
* Copyright (C) 2017, Natural Language Processing Lab, Northestern University.
* All rights reserved.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
/*
* $Created by: Lin Ye (email: linye2015@outlook.com) 2018-08-03
*/
#include "../XTensor.h"
#include "../core/math/Clip.h"
#include "TClip.h"
namespace nts { // namespace nts(NiuTrans.Tensor)
/*
case 1: test Clip function.
Set every entry to its clip value.
*/
bool TestClip1()
{
/* a tensor of size (3, 2) */
int aOrder = 2;
int * aDimSize = new int[aOrder];
aDimSize[0] = 3;
aDimSize[1] = 2;
int aUnitNum = 1;
for (int i = 0; i < aOrder; i++)
aUnitNum *= aDimSize[i];
DTYPE aData[3][2] = { {1.0F, -2.0F},
{0.0F, 4.0F},
{5.0F, -6.0F} };
DTYPE answer[3][2] = { {1.0F, -1.0F},
{0.0F, 1.0F},
{1.0F, -1.0F} };
/* CPU test */
bool cpuTest = true;
/* create tensors */
XTensor * a = NewTensor(aOrder, aDimSize);
XTensor * b = NewTensor(aOrder, aDimSize);
XTensor * aMe = NewTensor(aOrder, aDimSize);
XTensor bUser;
/* initialize variables */
a->SetData(aData, aUnitNum);
aMe->SetData(aData, aUnitNum);
/* call Clip function */
_Clip(a, b, -1.0, 1.0);
_ClipMe(aMe, -1.0, 1.0);
bUser = Clip(*a, -1.0, 1.0);
/* check results */
cpuTest = b->CheckData(answer, aUnitNum, 1e-4F) &&
aMe->CheckData(answer, aUnitNum, 1e-4F) &&
bUser.CheckData(answer, aUnitNum, 1e-4F);
#ifdef USE_CUDA
/* GPU test */
bool gpuTest = true;
/* create tensor */
XTensor * aGPU = NewTensor(aOrder, aDimSize, X_FLOAT, 1.0F, 0);
XTensor * bGPU = NewTensor(aOrder, aDimSize, X_FLOAT, 1.0F, 0);
XTensor * aMeGPU = NewTensor(aOrder, aDimSize, X_FLOAT, 1.0F, 0);
XTensor bUserGPU;
/* Initialize variables */
aGPU->SetData(aData, aUnitNum);
aMeGPU->SetData(aData, aUnitNum);
/* call Clip function */
_Clip(aGPU, bGPU, -1.0, 1.0);
_ClipMe(aMeGPU, -1.0, 1.0);
bUserGPU = Clip(*aGPU, -1.0, 1.0);
/* check results */
gpuTest = bGPU->CheckData(answer, aUnitNum, 1e-4F) &&
aMeGPU->CheckData(answer, aUnitNum, 1e-4F) &&
bUserGPU.CheckData(answer, aUnitNum, 1e-4F);
/* destroy variables */
delete a;
delete b;
delete aMe;
delete aGPU;
delete bGPU;
delete aMeGPU;
delete[] aDimSize;
return cpuTest && gpuTest;
#else
/* destroy variables */
delete a;
delete b;
delete aMe;
delete[] aDimSize;
return cpuTest;
#endif // USE_CUDA
}
/* other cases */
/*
TODO!!
*/
/* test for Clip Function */
bool TestClip()
{
XPRINT(0, stdout, "[TEST Clip] set every entry to its clip value \n");
bool returnFlag = true, caseFlag = true;
/* case 1 test */
caseFlag = TestClip1();
if (!caseFlag) {
returnFlag = false;
XPRINT(0, stdout, ">> case 1 failed!\n");
}
else
XPRINT(0, stdout, ">> case 1 passed!\n");
/* other cases test */
/*
TODO!!
*/
if (returnFlag) {
XPRINT(0, stdout, ">> All Passed!\n");
}
else
XPRINT(0, stdout, ">> Failed!\n");
XPRINT(0, stdout, "\n");
return returnFlag;
}
} // namespace nts(NiuTrans.Tensor)
/* NiuTrans.Tensor - an open-source tensor library
* Copyright (C) 2017, Natural Language Processing Lab, Northestern University.
* All rights reserved.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
/*
* $Created by: Lin Ye (email: linye2015@outlook.com) 2018-08-03
*/
#ifndef __TEST_CLIP_H__
#define __TEST_CLIP_H__
#include "../core/math/Clip.h"
namespace nts { // namespace nts(NiuTrans.Tensor)
/* test for Clip Function */
extern "C"
bool TestClip();
} // namespace nts(NiuTrans.Tensor)
#endif // __TEST_CLIP_H__
/* NiuTrans.Tensor - an open-source tensor library
* Copyright (C) 2017, Natural Language Processing Lab, Northestern University.
* All rights reserved.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
/*
* $Created by: Xu Chen (email: hello_master1954@163.com) 2018-07-12
*/
#include "../XTensor.h"
#include "../core/math/Compare.h"
#include "TCompare.h"
namespace nts { // namespace nts(NiuTrans.Tensor)
/*
case 1: test Equal function.
Comapre whether every entry is equal to the specified value.
*/
bool TestCompare1()
{
/* a tensor of size (3, 2) */
int aOrder = 2;
int * aDimSize = new int[aOrder];
aDimSize[0] = 3;
aDimSize[1] = 2;
int aUnitNum = 1;
for (int i = 0; i < aOrder; i++)
aUnitNum *= aDimSize[i];
DTYPE aData[3][2] = { {1.0F, -2.0F},
{0.0F, 4.0F},
{5.0F, 1.0F} };
DTYPE answer[3][2] = { {1.0F, 0.0F},
{0.0F, 0.0F},
{0.0F, 1.0F} };
/* CPU test */
bool cpuTest = true;
/* create tensors */
XTensor * a = NewTensor(aOrder, aDimSize);
XTensor * b = NewTensor(aOrder, aDimSize);
XTensor * aMe = NewTensor(aOrder, aDimSize);
XTensor bUser;
/* initialize variables */
a->SetData(aData, aUnitNum);
aMe->SetData(aData, aUnitNum);
/* call Equal function */
_Equal(a, b, 1.0);
_EqualMe(aMe, 1.0);
bUser = Equal(*a, 1.0);
/* check results */
cpuTest = b->CheckData(answer, aUnitNum, 1e-4F) &&
aMe->CheckData(answer, aUnitNum, 1e-4F) &&
bUser.CheckData(answer, aUnitNum, 1e-4F);
#ifdef USE_CUDA
/* GPU test */
bool gpuTest = true;
/* create tensor */
XTensor * aGPU = NewTensor(aOrder, aDimSize, X_FLOAT, 1.0F, 0);
XTensor * bGPU = NewTensor(aOrder, aDimSize, X_FLOAT, 1.0F, 0);
XTensor * aMeGPU = NewTensor(aOrder, aDimSize, X_FLOAT, 1.0F, 0);
XTensor bUserGPU;
/* Initialize variables */
aGPU->SetData(aData, aUnitNum);
aMeGPU->SetData(aData, aUnitNum);
/* call Equal function */
_Equal(aGPU, bGPU, 1.0);
_EqualMe(aMeGPU, 1.0);
bUserGPU = Equal(*aGPU, 1.0);
/* check results */
gpuTest = bGPU->CheckData(answer, aUnitNum, 1e-4F) &&
aMeGPU->CheckData(answer, aUnitNum, 1e-4F) &&
bUserGPU.CheckData(answer, aUnitNum, 1e-4F);
/* destroy variables */
delete a;
delete b;
delete aMe;
delete aGPU;
delete bGPU;
delete aMeGPU;
delete[] aDimSize;
return cpuTest && gpuTest;
#else
/* destroy variables */
delete a;
delete b;
delete aMe;
delete[] aDimSize;
return cpuTest;
#endif // USE_CUDA
}
/* other cases */
/*
TODO!!
*/
/* test for Compare Function */
bool TestCompare()
{
XPRINT(0, stdout, "[TEST Compare] compare every entry with specified value \n");
bool returnFlag = true, caseFlag = true;
/* case 1 test */
caseFlag = TestCompare1();
if (!caseFlag) {
returnFlag = false;
XPRINT(0, stdout, ">> case 1 failed!\n");
}
else
XPRINT(0, stdout, ">> case 1 passed!\n");
/* other cases test */
/*
TODO!!
*/
if (returnFlag) {
XPRINT(0, stdout, ">> All Passed!\n");
}
else
XPRINT(0, stdout, ">> Failed!\n");
XPRINT(0, stdout, "\n");
return returnFlag;
}
} // namespace nts(NiuTrans.Tensor)
/* NiuTrans.Tensor - an open-source tensor library
* Copyright (C) 2017, Natural Language Processing Lab, Northestern University.
* All rights reserved.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
/*
* $Created by: Xu Chen (email: hello_master1954@163.com) 2018-12-10
*/
#ifndef __TEST_Compare_H__
#define __TEST_Compare_H__
namespace nts { // namespace nts(NiuTrans.Tensor)
/* test for Compare Function */
bool TestCompare();
} // namespace nts(NiuTrans.Tensor)
#endif // __TEST_Compare_H__
/* NiuTrans.Tensor - an open-source tensor library
* Copyright (C) 2017, Natural Language Processing Lab, Northestern University.
* All rights reserved.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
/*
* $Created by: Lin Ye (email: linye2015@outlook.com) 2018-06-14
*/
#ifndef __TEST_CONCATENATE_H__
#define __TEST_CONCATENATE_H__
#include "../core/shape/Concatenate.h"
namespace nts { // namespace nts(NiuTrans.Tensor)
/* test for Concatenate Function */
bool TestConcatenate();
} // namespace nts(NiuTrans.Tensor)
#endif // __TEST_CONCATENATE_H__
/* NiuTrans.Tensor - an open-source tensor library
* Copyright (C) 2017, Natural Language Processing Lab, Northestern University.
* All rights reserved.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
/*
* $Created by: Lin Ye (email: linye2015@outlook.com) 2018-06-14
*/
#ifndef __TEST_CONCATENATESOLELY_H__
#define __TEST_CONCATENATESOLELY_H__
#include "../core/shape/ConcatenateSolely.h"
namespace nts { // namespace nts(NiuTrans.Tensor)
/* test for ConcatenateSolely Function */
bool TestConcatenateSolely();
} // namespace nts(NiuTrans.Tensor)
#endif // __TEST_CONCATENATESOLELY_H__
/* NiuTrans.Tensor - an open-source tensor library
* Copyright (C) 2018, Natural Language Processing Lab, Northestern University.
* All rights reserved.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
/*
* $Created by: Xu Chen (email: hello_master1954@163.com) 2018-07-12
*/
#include "TConvertDataType.h"
#include "../core/arithmetic/MatrixMul.h"
namespace nts { // namespace nts(NiuTrans.Tensor)
/*
case 1: test ConvertDataType function.
In this case, the flaot32 data type is converted to int32 data type.
*/
bool TestConvertDataType1()
{
/* a tensor of size (3, 2) */
int aOrder = 2;
int * aDimSize = new int[aOrder];
aDimSize[0] = 3;
aDimSize[1] = 2;
int aUnitNum = 1;
for (int i = 0; i < aOrder; i++)
aUnitNum *= aDimSize[i];
DTYPE aData[3][2] = { {1.0F, 2.0F},
{0.5F, 4.0F},
{5.0F, 6.0F} };
int answer[3][2] = { {1, 2},
{0, 4},
{5, 6} };
/* CPU test */
bool cpuTest = true;
/* create tensors */
XTensor * a = NewTensor(aOrder, aDimSize);
XTensor * b = NewTensor(aOrder, aDimSize, X_INT);
/* initialize variables */
a->SetData(aData, aUnitNum);
b->SetZeroAll();
/* call ConvertDataType function */
_ConvertDataType(a, b);
/* check results */
cpuTest = b->CheckData(answer, aUnitNum);
#ifdef USE_CUDA
/* GPU test */
bool gpuTest = true;
/* create tensor */
XTensor * aGPU = NewTensor(aOrder, aDimSize, X_FLOAT, 1.0F, 0);
XTensor * bGPU = NewTensor(aOrder, aDimSize, X_INT, 1.0F, 0);
/* Initialize variables */
aGPU->SetData(aData, aUnitNum);
/* call ConvertDataType function */
_ConvertDataType(aGPU, bGPU);
/* check results */
gpuTest = bGPU->CheckData(answer, aUnitNum);
/* destroy variables */
delete a;
delete b;
delete aGPU;
delete bGPU;
delete[] aDimSize;
return cpuTest && gpuTest;
#else
/* destroy variables */
delete a;
delete b;
delete[] aDimSize;
return cpuTest;
#endif // USE_CUDA
}
/*
case 2: test ConvertDataType function.
In this case, the int32 data type is converted to float32 data type.
*/
bool TestConvertDataType2()
{
/* a tensor of size (3, 2) */
int aOrder = 2;
int * aDimSize = new int[aOrder];
aDimSize[0] = 3;
aDimSize[1] = 2;
int aUnitNum = 1;
for (int i = 0; i < aOrder; i++)
aUnitNum *= aDimSize[i];
int aData[3][2] = { {1, 2},
{0, 4},
{5, 6} };
DTYPE answer[3][2] = { {1.0F, 2.0F},
{0.0F, 4.0F},
{5.0F, 6.0F} };
/* CPU test */
bool cpuTest = true;
/* create tensors */
XTensor * a = NewTensor(aOrder, aDimSize, X_INT);
XTensor * b = NewTensor(aOrder, aDimSize);
/* initialize variables */
a->SetData(aData, aUnitNum);
b->SetZeroAll();
/* call ConvertDataType function */
_ConvertDataType(a, b);
/* check results */
cpuTest = b->CheckData(answer, aUnitNum, 1e-4F);
#ifdef USE_CUDA
/* GPU test */
bool gpuTest = true;
/* create tensor */
XTensor * aGPU = NewTensor(aOrder, aDimSize, X_INT, 1.0F, 0);
XTensor * bGPU = NewTensor(aOrder, aDimSize, X_FLOAT, 1.0F, 0);
/* Initialize variables */
aGPU->SetData(aData, aUnitNum);
/* call ConvertDataType function */
_ConvertDataType(aGPU, bGPU);
/* check results */
gpuTest = bGPU->CheckData(answer, aUnitNum, 1e-4F);
/* destroy variables */
delete a;
delete b;
delete aGPU;
delete bGPU;
delete[] aDimSize;
return cpuTest && gpuTest;
#else
/* destroy variables */
delete a;
delete b;
delete[] aDimSize;
return cpuTest;
#endif // USE_CUDA
}
/*
case 3: test ConvertDataType function.
In this case, the float data type is converted to float16 data type.
*/
bool TestConvertDataType3()
{
int order = 2;
/* a tensor of size (3, 2) */
int * dimSize1 = new int[order];
dimSize1[0] = 3;
dimSize1[1] = 2;
int unitNum1 = 1;
for (int i = 0; i < order; i++)
unitNum1 *= dimSize1[i];
/* a tensor of size (3, 2) */
int * dimSize2 = new int[order];
dimSize2[0] = 2;
dimSize2[1] = 3;
int unitNum2 = 1;
for (int i = 0; i < order; i++)
unitNum2 *= dimSize2[i];
/* a tensor of size (3, 3) */
int * dimSize3 = new int[order];
dimSize3[0] = 3;
dimSize3[1] = 3;
int unitNum3 = 1;
for (int i = 0; i < order; i++)
unitNum3 *= dimSize3[i];
DTYPE data1[3][2] = { {1.0F, -2.0F},
{0.5F, -4.0F},
{0.0F, 6.0F} };
DTYPE data2[2][3] = { {1.0F, 2.0F, 3.0F},
{0.0F, 4.0F, 5.0F} };
DTYPE answer[3][3] = { {1.0F, -6.0F, -7.0F},
{0.5F, -15.0F, -18.5F},
{0.0F, 24.0F, 30.0F} };
/* CPU test */
bool cpuTest = true;
/* create tensors */
XTensor * a = NewTensor(order, dimSize1, X_FLOAT, 1.0F, -1);
XTensor * b = NewTensor(order, dimSize1, X_FLOAT16, 1.0F, -1);
XTensor * c = NewTensor(order, dimSize1, X_FLOAT, 1.0F, -1);
/* initialize variables */
a->SetData(data1, unitNum1);
/* call ConvertDataType function (We have not implemented this yet...) */
//_ConvertDataType(a, b);
//_ConvertDataType(b, c);
/* check results */
//cpuTest = a->CheckData(data1, unitNum1, 1e-4F);
#ifdef USE_CUDA
/* GPU test */
bool gpuTest = true;
/* create tensor */
XTensor * aGPU = NewTensor(order, dimSize1, X_FLOAT, 1.0F, 0);
XTensor * bGPU = NewTensor(order, dimSize2, X_FLOAT, 1.0F, 0);
XTensor * cGPU = NewTensor(order, dimSize1, X_FLOAT16, 1.0F, 0);
XTensor * dGPU = NewTensor(order, dimSize2, X_FLOAT16, 1.0F, 0);
XTensor * eGPU = NewTensor(order, dimSize3, X_FLOAT16, 1.0F, 0);
XTensor * fGPU = NewTensor(order, dimSize3, X_FLOAT, 1.0F, 0);
/* Initialize variables */
aGPU->SetData(data1, unitNum1);
bGPU->SetData(data2, unitNum2);
/* call ConvertDataType function */
_ConvertDataType(aGPU, cGPU);
_ConvertDataType(bGPU, dGPU);
_MatrixMul(cGPU, X_NOTRANS, dGPU, X_NOTRANS, eGPU);
_ConvertDataType(eGPU, fGPU);
/* check results */
gpuTest = fGPU->CheckData(answer, unitNum3, 1e-4F);
/* destroy variables */
delete a;
delete b;
delete c;
delete aGPU;
delete bGPU;
delete cGPU;
delete[] dimSize1;
delete[] dimSize2;
delete[] dimSize3;
return cpuTest && gpuTest;
#else
/* destroy variables */
delete a;
delete b;
delete c;
delete[] dimSize1;
delete[] dimSize2;
delete[] dimSize3;
return cpuTest;
#endif // USE_CUDA
}
/* other cases */
/*
TODO!!
*/
/* test for ConvertDataType Function */
bool TestConvertDataType()
{
XPRINT(0, stdout, "[TEST ConvertDataType] convert data type \n");
bool returnFlag = true, caseFlag = true;
/* case 1 test */
caseFlag = TestConvertDataType1();
if (!caseFlag) {
returnFlag = false;
XPRINT(0, stdout, ">> case 1 failed!\n");
}
else
XPRINT(0, stdout, ">> case 1 passed!\n");
/* case 2 test */
caseFlag = TestConvertDataType2();
if (!caseFlag) {
returnFlag = false;
XPRINT(0, stdout, ">> case 2 failed!\n");
}
else
XPRINT(0, stdout, ">> case 2 passed!\n");
/* case 3 test */
caseFlag = TestConvertDataType3();
if (!caseFlag) {
returnFlag = false;
XPRINT(0, stdout, ">> case 3 failed!\n");
}
else
XPRINT(0, stdout, ">> case 3 passed!\n");
/* other cases test */
/*
TODO!!
*/
if (returnFlag) {
XPRINT(0, stdout, ">> All Passed!\n");
}
else
XPRINT(0, stdout, ">> Failed!\n");
XPRINT(0, stdout, "\n");
return returnFlag;
}
} // namespace nts(NiuTrans.Tensor)
/* NiuTrans.Tensor - an open-source tensor library
* Copyright (C) 2017, Natural Language Processing Lab, Northestern University.
* All rights reserved.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
/*
* $Created by: Xu Chen (email: hello_master1954@163.com) 2018-07-12
*/
#ifndef __TEST_CONVERTDATATYPE_H__
#define __TEST_CONVERTDATATYPE_H__
#include "../core/getandset/ConvertDataType.h"
namespace nts { // namespace nts(NiuTrans.Tensor)
/* test for ConvertDataType Function */
bool TestConvertDataType();
} // namespace nts(NiuTrans.Tensor)
#endif // __TEST_CONVERTDATATYPE_H__
/* NiuTrans.Tensor - an open-source tensor library
* Copyright (C) 2017, Natural Language Processing Lab, Northestern University.
* All rights reserved.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
/*
* $Created by: Xu Chen (email: hello_master1954@163.com) 2018-06-27
*/
#ifndef __TEST_COPYINDEXED_H__
#define __TEST_COPYINDEXED_H__
#include "../core/movement/CopyIndexed.h"
namespace nts { // namespace nts(NiuTrans.Tensor)
/* test for CopyIndexed Function */
bool TestCopyIndexed();
} // namespace nts(NiuTrans.Tensor)
#endif // __TEST_COPYINDEXED_H__
/* NiuTrans.Tensor - an open-source tensor library
* Copyright (C) 2017, Natural Language Processing Lab, Northestern University.
* All rights reserved.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
/*
* $Created by: Xu Chen (email: hello_master1954@163.com) 2018-06-27
*/
#include "../XUtility.h"
#include "TCopyValues.h"
namespace nts { // namespace nts(NiuTrans.Tensor)
/* case 1: copy tensor s to tensor t */
bool TestCopyValues1()
{
/* a input tensor of size (2, 4) */
int sOrder = 2;
int * sDimSize = new int[sOrder];
sDimSize[0] = 2;
sDimSize[1] = 4;
int sUnitNum = 1;
for (int i = 0; i < sOrder; i++)
sUnitNum *= sDimSize[i];
DTYPE sData[2][4] = { {0.0F, 1.0F, 2.0F, 3.0F},
{4.0F, 5.0F, 6.0F, 7.0F} };
/* CPU test */
bool cpuTest = true;
/* create tensors */
XTensor * s = NewTensor(sOrder, sDimSize);
XTensor * t = NewTensor(sOrder, sDimSize);
XTensor tUser;
/* initialize variables */
s->SetData(sData, sUnitNum);
t->SetZeroAll();
/* call CopyValues function */
_CopyValues(s, t);
tUser = CopyValues(*s);
/* check results */
cpuTest = t->CheckData(sData, sUnitNum) && tUser.CheckData(sData, sUnitNum);
#ifdef USE_CUDA
/* GPU test */
bool gpuTest = true;
/* create tensors */
XTensor * sGPU = NewTensor(sOrder, sDimSize, X_FLOAT, 1.0F, 0);
XTensor * tGPU = NewTensor(sOrder, sDimSize, X_FLOAT, 1.0F, 0);
XTensor tUserGPU;
/* initialize variables */
sGPU->SetData(sData, sUnitNum);
tGPU->SetData(sData, sUnitNum);
/* call CopyValues function */
_CopyValues(sGPU, tGPU);
tUserGPU = CopyValues(*sGPU);
/* check results */
gpuTest = tGPU->CheckData(sData, sUnitNum) && tUser.CheckData(sData, sUnitNum);
/* destroy variables */
delete s;
delete t;
delete sGPU;
delete tGPU;
delete[] sDimSize;
return cpuTest && gpuTest;
#else
/* destroy variables */
delete s;
delete t;
delete[] sDimSize;
return cpuTest;
#endif // USE_CUDA
}
/* other cases */
/*
TODO!!
*/
/* test for CopyValues Function */
bool TestCopyValues()
{
XPRINT(0, stdout, "[TEST CopyValues] copy tensor s to tensor t \n");
bool returnFlag = true, caseFlag = true;
/* case 1 test */
caseFlag = TestCopyValues1();
if (!caseFlag) {
returnFlag = false;
XPRINT(0, stdout, ">> case 1 failed!\n");
}
else
XPRINT(0, stdout, ">> case 1 passed!\n");
/* other cases test */
/*
TODO!!
*/
if (returnFlag) {
XPRINT(0, stdout, ">> All Passed!\n");
}
else
XPRINT(0, stdout, ">> Failed!\n");
XPRINT(0, stdout, "\n");
return returnFlag;
}
} // namespace nts(NiuTrans.Tensor)
/* NiuTrans.Tensor - an open-source tensor library
* Copyright (C) 2017, Natural Language Processing Lab, Northestern University.
* All rights reserved.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
/*
* $Created by: Xu Chen (email: hello_master1954@163.com) 2018-06-27
*/
#ifndef __TEST_COPYVALUES_H__
#define __TEST_COPYVALUES_H__
#include "../core/movement/CopyValues.h"
namespace nts { // namespace nts(NiuTrans.Tensor)
/* test for CopyValues Function */
bool TestCopyValues();
} // namespace nts(NiuTrans.Tensor)
#endif // __TEST_COPYVALUES_H__
/* NiuTrans.Tensor - an open-source tensor library
* Copyright (C) 2017, Natural Language Processing Lab, Northestern University.
* All rights reserved.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
/*
* $Created by: Xu Chen (email: hello_master1954@163.com) 2018-09-18
*/
#include "../core/math/Unary.h"
#include "TCos.h"
namespace nts { // namespace nts(NiuTrans.Tensor)
/*
case 1: test Cos function.
Set every entry to its cosine value.
*/
bool TestCos1()
{
/* a tensor of size (3, 2) */
int order = 2;
int * dimSize = new int[order];
dimSize[0] = 3;
dimSize[1] = 2;
int unitNum = 1;
for (int i = 0; i < order; i++)
unitNum *= dimSize[i];
DTYPE aData[3][2] = { {1.0F, 2.0F},
{-1.0F, -2.0F},
{0.0F, 0.5F} };
DTYPE answer[3][2] = { {0.5403F, -0.4161F},
{0.5403F, -0.4161F},
{1.0F, 0.8776F} };
/* CPU test */
bool cpuTest = true;
/* create tensors */
XTensor * a = NewTensor(order, dimSize);
XTensor * b = NewTensor(order, dimSize);
XTensor * aMe = NewTensor(order, dimSize);
XTensor bUser;
/* initialize variables */
a->SetData(aData, unitNum);
aMe->SetData(aData, unitNum);
/* call Cos function */
_Cos(a, b);
_CosMe(aMe);
bUser = Cos(*a);
/* check results */
cpuTest = b->CheckData(answer, unitNum, 1e-4F) && aMe->CheckData(answer, unitNum, 1e-4F) && bUser.CheckData(answer, unitNum, 1e-4F);
#ifdef USE_CUDA
/* GPU test */
bool gpuTest = true;
/* create tensor */
XTensor * aGPU = NewTensor(order, dimSize, X_FLOAT, 1.0F, 0);
XTensor * bGPU = NewTensor(order, dimSize, X_FLOAT, 1.0F, 0);
XTensor * aMeGPU = NewTensor(order, dimSize, X_FLOAT, 1.0F, 0);
XTensor bUserGPU;
/* Initialize variables */
aGPU->SetData(aData, unitNum);
aMeGPU->SetData(aData, unitNum);
/* call Cos function */
_Cos(aGPU, bGPU);
_CosMe(aMeGPU);
bUserGPU = Cos(*aGPU);
/* check results */
gpuTest = bGPU->CheckData(answer, unitNum, 1e-4F) && aMeGPU->CheckData(answer, unitNum, 1e-4F) && bUserGPU.CheckData(answer, unitNum, 1e-4F);
/* destroy variables */
delete a;
delete b;
delete aMe;
delete aGPU;
delete bGPU;
delete aMeGPU;
delete[] dimSize;
return cpuTest && gpuTest;
#else
/* destroy variables */
delete a;
delete b;
delete aMe;
delete[] dimSize;
return cpuTest;
#endif // USE_CUDA
}
/* other cases */
/*
TODO!!
*/
/* test for Cos Function */
bool TestCos()
{
XPRINT(0, stdout, "[TEST Cos] set every entry to its cosine value \n");
bool returnFlag = true, caseFlag = true;
/* case 1 test */
caseFlag = TestCos1();
if (!caseFlag) {
returnFlag = false;
XPRINT(0, stdout, ">> case 1 failed!\n");
}
else
XPRINT(0, stdout, ">> case 1 passed!\n");
/* other cases test */
/*
TODO!!
*/
if (returnFlag) {
XPRINT(0, stdout, ">> All Passed!\n");
}
else
XPRINT(0, stdout, ">> Failed!\n");
XPRINT(0, stdout, "\n");
return returnFlag;
}
} // namespace nts(NiuTrans.Tensor)
/* NiuTrans.Tensor - an open-source tensor library
* Copyright (C) 2017, Natural Language Processing Lab, Northestern University.
* All rights reserved.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
/*
* $Created by: Xu Chen (email: hello_master1954@163.com) 2018-07-31
*/
#ifndef __TEST_SIN_H__
#define __TEST_SIN_H__
namespace nts { // namespace nts(NiuTrans.Tensor)
/* test for Sin Function */
bool TestSin();
} // namespace nts(NiuTrans.Tensor)
#endif // __TEST_SIN_H__
/* NiuTrans.Tensor - an open-source tensor library
* Copyright (C) 2017, Natural Language Processing Lab, Northestern University.
* All rights reserved.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
/*
* $Created by: Xu Chen (email: hello_master1954@163.com) 2018-07-31
*/
#ifndef __TEST_COS_H__
#define __TEST_COS_H__
namespace nts { // namespace nts(NiuTrans.Tensor)
/* test for Cos Function */
bool TestCos();
} // namespace nts(NiuTrans.Tensor)
#endif // __TEST_COS_H__
/* NiuTrans.Tensor - an open-source tensor library
* Copyright (C) 2017, Natural Language Processing Lab, Northestern University.
* All rights reserved.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
/*
* $Created by: Xu Chen (email: hello_master1954@163.com) 2018-09-17
*/
#ifndef __TEST_CROSSENTROPY_H__
#define __TEST_CROSSENTROPY_H__
namespace nts { // namespace nts(NiuTrans.Tensor)
/* test for CrossEntropy Function */
bool TestCrossEntropy();
} // namespace nts(NiuTrans.Tensor)
#endif // __TEST_CROSSENTROPY_H__
/* NiuTrans.Tensor - an open-source tensor library
* Copyright (C) 2017, Natural Language Processing Lab, Northestern University.
* All rights reserved.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
/*
* $Created by: Xu Chen (email: hello_master1954@163.com) 2018-08-01
*/
#include "TDiv.h"
namespace nts { // namespace nts(NiuTrans.Tensor)
/*
case 1: element-wise division of two tensors
c(i) = a(i)/b(i) + \alpha * c(i)
In this case, (2, 2) (2, 2) -> (2, 2), leadingDim=0, alpha=0.
*/
bool TestDiv1()
{
/* a source tensor of size (2, 2) */
int sOrder1 = 2;
int * sDimSize1 = new int[sOrder1];
sDimSize1[0] = 2;
sDimSize1[1] = 2;
int sUnitNum1 = 1;
for (int i = 0; i < sOrder1; i++)
sUnitNum1 *= sDimSize1[i];
/* a source tensor of size (2, 2) */
int sOrder2 = 2;
int * sDimSize2 = new int[sOrder2];
sDimSize2[0] = 2;
sDimSize2[1] = 2;
int sUnitNum2 = 1;
for (int i = 0; i < sOrder2; i++)
sUnitNum2 *= sDimSize2[i];
/* a target tensor of size (2, 2) */
int tOrder = 2;
int * tDimSize = new int[tOrder];
tDimSize[0] = 2;
tDimSize[1] = 2;
int tUnitNum = 1;
for (int i = 0; i < tOrder; i++)
tUnitNum *= tDimSize[i];
DTYPE sData1[2][2] = { {0.0F, 1.0F},
{2.0F, 3.0F} };
DTYPE sData2[2][2] = { {1.0F, 1.0F},
{4.0F, 9.0F} };
DTYPE answer[2][2] = { {0.0F, 1.0F},
{0.5F, 0.3333F} };
/* CPU test */
bool cpuTest = true;
/* create tensors */
XTensor * s1 = NewTensor(sOrder1, sDimSize1);
XTensor * s2 = NewTensor(sOrder2, sDimSize2);
XTensor * t = NewTensor(tOrder, tDimSize);
XTensor * tMe = NewTensor(tOrder, tDimSize);
XTensor tUser;
/* initialize variables */
s1->SetData(sData1, sUnitNum1);
tMe->SetData(sData1, sUnitNum1);
s2->SetData(sData2, sUnitNum2);
t->SetZeroAll();
/* call Div function */
_Div(s1, s2, t, 0, 0);
_DivMe(tMe, s2, 0, 0);
tUser = Div(*s1, *s2, 0);
/* check results */
cpuTest = t->CheckData(answer, tUnitNum, 1e-4F) &&
tMe->CheckData(answer, tUnitNum, 1e-4F) &&
tUser.CheckData(answer, tUnitNum, 1e-4F);
#ifdef USE_CUDA
/* GPU test */
bool gpuTest = true;
/* create tensor */
XTensor * sGPU1 = NewTensor(sOrder1, sDimSize1, X_FLOAT, 1.0F, 0);
XTensor * sGPU2 = NewTensor(sOrder2, sDimSize2, X_FLOAT, 1.0F, 0);
XTensor * tGPU = NewTensor(tOrder, tDimSize, X_FLOAT, 1.0F, 0);
XTensor * tMeGPU = NewTensor(tOrder, tDimSize, X_FLOAT, 1.0F, 0);
XTensor tUserGPU;
/* Initialize variables */
sGPU1->SetData(sData1, sUnitNum1);
tMeGPU->SetData(sData1, sUnitNum1);
sGPU2->SetData(sData2, sUnitNum2);
tGPU->SetZeroAll();
/* call Div function */
_Div(sGPU1, sGPU2, tGPU, 0, 0);
_DivMe(tMeGPU, sGPU2, 0, 0);
tUserGPU = Div(*sGPU1, *sGPU2, 0);
/* check results */
gpuTest = tGPU->CheckData(answer, tUnitNum, 1e-4F) &&
tMeGPU->CheckData(answer, tUnitNum, 1e-4F) &&
tUserGPU.CheckData(answer, tUnitNum, 1e-4F);
/* destroy variables */
delete s1;
delete s2;
delete t;
delete tMe;
delete sGPU1;
delete sGPU2;
delete tGPU;
delete tMeGPU;
delete[] sDimSize1;
delete[] sDimSize2;
delete[] tDimSize;
return cpuTest && gpuTest;
#else
/* destroy variables */
delete s1;
delete s2;
delete t;
delete tMe;
delete[] sDimSize1;
delete[] sDimSize2;
delete[] tDimSize;
return cpuTest;
#endif // USE_CUDA
}
/* other cases */
/*
TODO!!
*/
/* test for Div Function */
bool TestDiv()
{
XPRINT(0, stdout, "[TEST Div] element-wise division of two tensors \n");
bool returnFlag = true, caseFlag = true;
/* case 1 test */
caseFlag = TestDiv1();
if (!caseFlag) {
returnFlag = false;
XPRINT(0, stdout, ">> case 1 failed!\n");
}
else
XPRINT(0, stdout, ">> case 1 passed!\n");
/* other cases test */
/*
TODO!!
*/
if (returnFlag) {
XPRINT(0, stdout, ">> All Passed!\n");
}
else
XPRINT(0, stdout, ">> Failed!\n");
XPRINT(0, stdout, "\n");
return returnFlag;
}
} // namespace nts(NiuTrans.Tensor)
/* NiuTrans.Tensor - an open-source tensor library
* Copyright (C) 2017, Natural Language Processing Lab, Northestern University.
* All rights reserved.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
/*
* $Created by: Xu Chen (email: hello_master1954@163.com) 2018-08-01
*/
#ifndef __TEST_DIV_H__
#define __TEST_DIV_H__
#include "../core/arithmetic/Div.h"
namespace nts { // namespace nts(NiuTrans.Tensor)
/* test for Div Function */
extern "C"
bool TestDiv();
} // namespace nts(NiuTrans.Tensor)
#endif // __TEST_DIV_H__
/* NiuTrans.Tensor - an open-source tensor library
* Copyright (C) 2017, Natural Language Processing Lab, Northestern University.
* All rights reserved.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
/*
* $Created by: Xu Chen (email: hello_master1954@163.com) 2018-08-14
*/
#include "TDivDim.h"
#include "../core/arithmetic/DivDim.h"
#include "../XTensor.h"
namespace nts { // namespace nts(NiuTrans.Tensor)
/*
case 1: tensor division c = a/b + \alpha * c
where the size of b is equal to the n-th dimension of a,
i.e., a is divided with b by broadcasting.
In this case, (2, 4) / (2) = (2, 4), n = 0, alpha = 0.0.
*/
bool TestDivDim1()
{
/* a tensor of size (2, 4) */
int aOrder = 2;
int * aDimSize = new int[aOrder];
aDimSize[0] = 2;
aDimSize[1] = 4;
int aUnitNum = 1;
for (int i = 0; i < aOrder; i++)
aUnitNum *= aDimSize[i];
/* a tensor of size (2) */
int bOrder = 1;
int * bDimSize = new int[bOrder];
bDimSize[0] = 2;
int bUnitNum = 1;
for (int i = 0; i < bOrder; i++)
bUnitNum *= bDimSize[i];
DTYPE aData[2][4] = { {0.0F, 1.0F, 2.0F, 3.0F},
{4.0F, 5.0F, 6.0F, 7.0F} };
DTYPE bData[2] = {1.0F, -1.0F};
DTYPE answer[2][4] = { {0.0F, 1.0F, 2.0F, 3.0F},
{-4.0F, -5.0F, -6.0F, -7.0F} };
/* CPU test */
bool cpuTest = true;
/* create tensors */
XTensor * a = NewTensor(aOrder, aDimSize);
XTensor * b = NewTensor(bOrder, bDimSize);
XTensor * c = NewTensor(aOrder, aDimSize);
XTensor * cMe = NewTensor(aOrder, aDimSize);
XTensor cUser;
/* initialize variables */
a->SetData(aData, aUnitNum);
cMe->SetData(aData, aUnitNum);
b->SetData(bData, bUnitNum);
c->SetZeroAll();
/* call DivDim function */
_DivDim(a, b, c, 0);
_DivDim(cMe, b, 0);
cUser = DivDim(*a, *b, 0);
/* check results */
cpuTest = c->CheckData(answer, aUnitNum) &&
cMe->CheckData(answer, aUnitNum) &&
cUser.CheckData(answer, aUnitNum);
#ifdef USE_CUDA
/* GPU test */
bool gpuTest = true;
/* create tensor */
XTensor * aGPU = NewTensor(aOrder, aDimSize, X_FLOAT, 1.0F, 0);
XTensor * bGPU = NewTensor(bOrder, bDimSize, X_FLOAT, 1.0F, 0);
XTensor * cGPU = NewTensor(aOrder, aDimSize, X_FLOAT, 1.0F, 0);
XTensor * cMeGPU = NewTensor(aOrder, aDimSize, X_FLOAT, 1.0F, 0);
XTensor cUserGPU;
/* Initialize variables */
aGPU->SetData(aData, aUnitNum);
cMeGPU->SetData(aData, aUnitNum);
bGPU->SetData(bData, bUnitNum);
cGPU->SetZeroAll();
/* call sum function */
_DivDim(aGPU, bGPU, cGPU, 0);
_DivDim(cMeGPU, bGPU, 0);
cUserGPU = DivDim(*aGPU, *bGPU, 0);
/* check results */
gpuTest = cGPU->CheckData(answer, aUnitNum) &&
cMeGPU->CheckData(answer, aUnitNum) &&
cUserGPU.CheckData(answer, aUnitNum);
/* destroy variables */
delete a;
delete b;
delete c;
delete cMe;
delete aGPU;
delete bGPU;
delete cGPU;
delete cMeGPU;
delete[] aDimSize;
delete[] bDimSize;
return cpuTest && gpuTest;
#else
/* destroy variables */
delete a;
delete b;
delete c;
delete cMe;
delete[] aDimSize;
delete[] bDimSize;
return cpuTest;
#endif // USE_CUDA
}
/*
case 2: tensor division c = a/b + \alpha * c
where the size of b is equal to the n-th dimension of a,
i.e., a is divided with b by broadcasting.
In this case, (2, 4) / (2, 2) = (2, 4), n = 1.
*/
bool TestDivDim2()
{
/* a tensor of size (2, 4) */
int aOrder = 2;
int * aDimSize = new int[aOrder];
aDimSize[0] = 2;
aDimSize[1] = 4;
int aUnitNum = 1;
for (int i = 0; i < aOrder; i++)
aUnitNum *= aDimSize[i];
/* a tensor of size (2, 2) */
int bOrder = 2;
int * bDimSize = new int[bOrder];
bDimSize[0] = 2;
bDimSize[1] = 2;
int bUnitNum = 1;
for (int i = 0; i < bOrder; i++)
bUnitNum *= bDimSize[i];
DTYPE aData[2][4] = { {0.0F, 1.0F, 2.0F, 3.0F},
{4.0F, 5.0F, 6.0F, 7.0F} };
DTYPE bData[2][2] = { {1.0F, -1.0F},
{-1.0F, 1.0F} };
DTYPE answer[2][4] = { {0.0F, -1.0F, -2.0F, 3.0F},
{4.0F, -5.0F, -6.0F, 7.0F} };
/* CPU test */
bool cpuTest = true;
/* create tensors */
XTensor * a = NewTensor(aOrder, aDimSize);
XTensor * b = NewTensor(bOrder, bDimSize);
XTensor * c = NewTensor(aOrder, aDimSize);
XTensor * cMe = NewTensor(aOrder, aDimSize);
XTensor cUser;
/* initialize variables */
a->SetData(aData, aUnitNum);
cMe->SetData(aData, aUnitNum);
b->SetData(bData, bUnitNum);
c->SetZeroAll();
/* call DivDim function */
_DivDim(a, b, c, 1);
_DivDim(cMe, b, 1);
cUser = DivDim(*a, *b, 1);
/* check results */
cpuTest = c->CheckData(answer, aUnitNum) &&
cMe->CheckData(answer, aUnitNum) &&
cUser.CheckData(answer, aUnitNum);
#ifdef USE_CUDA
/* GPU test */
bool gpuTest = true;
/* create tensor */
XTensor * aGPU = NewTensor(aOrder, aDimSize, X_FLOAT, 1.0F, 0);
XTensor * bGPU = NewTensor(bOrder, bDimSize, X_FLOAT, 1.0F, 0);
XTensor * cGPU = NewTensor(aOrder, aDimSize, X_FLOAT, 1.0F, 0);
XTensor * cMeGPU = NewTensor(aOrder, aDimSize, X_FLOAT, 1.0F, 0);
XTensor cUserGPU;
/* Initialize variables */
aGPU->SetData(aData, aUnitNum);
cMeGPU->SetData(aData, aUnitNum);
bGPU->SetData(bData, bUnitNum);
cGPU->SetZeroAll();
/* call sum function */
_DivDim(aGPU, bGPU, cGPU, 1);
_DivDim(cMeGPU, bGPU, 1);
cUserGPU = DivDim(*aGPU, *bGPU, 1);
/* check results */
gpuTest = cGPU->CheckData(answer, aUnitNum) &&
cMeGPU->CheckData(answer, aUnitNum) &&
cUserGPU.CheckData(answer, aUnitNum);
/* destroy variables */
delete a;
delete b;
delete c;
delete cMe;
delete aGPU;
delete bGPU;
delete cGPU;
delete cMeGPU;
delete[] aDimSize;
delete[] bDimSize;
return cpuTest && gpuTest;
#else
/* destroy variables */
delete a;
delete b;
delete c;
delete cMe;
delete[] aDimSize;
delete[] bDimSize;
return cpuTest;
#endif // USE_CUDA
}
/* other cases */
/*
TODO!!
*/
/* test for DivDim Function */
bool TestDivDim()
{
XPRINT(0, stdout, "[TEST DIVDIM] tensor division c(i) = a/b + \\alpha * c by broadcasting\n");
bool returnFlag = true, caseFlag = true;
/* case 1 test */
caseFlag = TestDivDim1();
if (!caseFlag) {
returnFlag = false;
XPRINT(0, stdout, ">> case 1 failed!\n");
}
else
XPRINT(0, stdout, ">> case 1 passed!\n");
/* case 2 test */
caseFlag = TestDivDim2();
if (!caseFlag) {
returnFlag = false;
XPRINT(0, stdout, ">> case 2 failed!\n");
}
else
XPRINT(0, stdout, ">> case 2 passed!\n");
/* other cases test */
/*
TODO!!
*/
if (returnFlag) {
XPRINT(0, stdout, ">> All Passed!\n");
}
else
XPRINT(0, stdout, ">> Failed!\n");
XPRINT(0, stdout, "\n");
return returnFlag;
}
} // namespace nts(NiuTrans.Tensor)
/* NiuTrans.Tensor - an open-source tensor library
* Copyright (C) 2017, Natural Language Processing Lab, Northestern University.
* All rights reserved.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
/*
* $Created by: Xu Chen (email: hello_master1954@163.com) 2018-08-14
*/
#ifndef __TEST_DIVDIM_H__
#define __TEST_DIVDIM_H__
#include "../core/arithmetic/DivDim.h"
namespace nts { // namespace nts(NiuTrans.Tensor)
/* test for DivDim Function */
bool TestDivDim();
} // namespace nts(NiuTrans.Tensor)
#endif // __TEST_DIVDIM_H__
/* NiuTrans.Tensor - an open-source tensor library
* Copyright (C) 2017, Natural Language Processing Lab, Northestern University.
* All rights reserved.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
/*
* $Created by: Xu Chen (email: hello_master1954@163.com) 2018-09-12
*/
#include "../XUtility.h"
#include "TDropout.h"
#include "../core/getandset/SetData.h"
namespace nts { // namespace nts(NiuTrans.Tensor)
/*
case 1: test Dropout function.
*/
bool TestDropout1()
{
/* a input tensor of size (4, 5) */
int order = 3;
int * dimSize = new int[order];
dimSize[0] = 40;
dimSize[1] = 50;
dimSize[2] = 60;
int unitNum = 1;
for (int i = 0; i < order; i++)
unitNum *= dimSize[i];
/* CPU test */
bool cpuTest = true;
/* create tensors */
XTensor * x = NewTensor(order, dimSize);
XTensor * y = NewTensor(order, dimSize);
XTensor yUser;
/* initialize variables */
_SetDataFixedFloat(x, 1.0F);
y->SetZeroAll();
/* call Dropout function */
float dropProb = 0.2F;
int seed = 20;
_Dropout(x, y, seed, dropProb);
yUser = Dropout(*x, dropProb);
/* check result */
int zeroNum1 = 0;
int zeroNum2 = 0;
float * data1 = (float*)y->data;
float * data2 = (float*)yUser.data;
for (int i = 0; i < unitNum; i++){
DTYPE tmp1 = data1[i];
DTYPE tmp2 = data2[i];
if(tmp1 == 0.0F)
zeroNum1 += 1;
if(tmp2 == 0.0F)
zeroNum2 += 1;
}
printf("CPU Test:\n");
printf("In tensor y, there are %d units.\n", unitNum);
printf("There are %d zero units by Dropout layer with probability %.2f.\n", zeroNum1, dropProb);
printf("In tensor yUser, there are %d units.\n", unitNum);
printf("There are %d zero units by Dropout layer with default probability %.2f.\n", zeroNum2, dropProb);
#ifdef USE_CUDA
/* GPU test */
bool gpuTest = true;
/* create tensors */
XTensor * xGPU = NewTensor(order, dimSize, X_FLOAT, 1.0F, 0);
XTensor * yGPU = NewTensor(order, dimSize, X_FLOAT, 1.0F, 0);
XTensor yUserGPU;
/* initialize variables */
_SetDataFixedFloat(xGPU, 1.0F);
yGPU->SetZeroAll();
/* call Dropout function */
_Dropout(xGPU, yGPU, seed, dropProb);
yUserGPU = Dropout(*xGPU, dropProb);
/* check result */
zeroNum1 = 0;
zeroNum2 = 0;
data1 = (float*)y->data;
data2 = (float*)yUser.data;
for (int i = 0; i < unitNum; i++){
DTYPE tmp1 = data1[i];
DTYPE tmp2 = data2[i];
if(tmp1 == 0.0F)
zeroNum1 += 1;
if(tmp2 == 0.0F)
zeroNum2 += 1;
}
printf("CPU Test:\n");
printf("In tensor y, there are %d units.\n", unitNum);
printf("There are %d zero units by Dropout layer with probability %.2f.\n", zeroNum1, dropProb);
printf("In tensor yUser, there are %d units.\n", unitNum);
printf("There are %d zero units by Dropout layer with default probability %.2f.\n", zeroNum2, dropProb);
/* destroy variables */
delete x;
delete y;
delete xGPU;
delete yGPU;
delete[] dimSize;
return cpuTest && gpuTest;
#else
/* destroy variables */
delete x;
delete y;
delete[] dimSize;
return cpuTest;
#endif // USE_CUDA
}
/*
case 2: test Dropout function and backward computation.
*/
bool TestDropout2()
{
/* a input tensor of size (4, 5) */
int order = 2;
int * dimSize = new int[order];
dimSize[0] = 4;
dimSize[1] = 5;
int unitNum = 1;
for (int i = 0; i < order; i++)
unitNum *= dimSize[i];
/* CPU test */
bool cpuTest = true;
/* create tensors */
XTensor * x = NewTensor(order, dimSize);
XTensor * y = NewTensor(order, dimSize);
XTensor * dedx = NewTensor(order, dimSize);
XTensor * dedy = NewTensor(order, dimSize);
/* initialize variables */
_SetDataFixedFloat(x, 1.0F);
y->SetZeroAll();
dedx->SetZeroAll();
_SetDataFixedFloat(dedy, 1.5F);
/* call Dropout function */
float dropProb = 0.5F;
int seed = 1;
_Dropout(x, y, seed, dropProb);
_DropoutBackward(y, x, dedy, dedx, 1, dropProb);
/* check result */
//y->Dump(stderr, "y");
//dedx->Dump(stderr, "dedy");
#ifdef USE_CUDA
/* GPU test */
bool gpuTest = true;
/* create tensors */
XTensor * xGPU = NewTensor(order, dimSize, X_FLOAT, 1.0F, 0);
XTensor * yGPU = NewTensor(order, dimSize, X_FLOAT, 1.0F, 0);
XTensor * dedxGPU = NewTensor(order, dimSize, X_FLOAT, 1.0F, 0);
XTensor * dedyGPU = NewTensor(order, dimSize, X_FLOAT, 1.0F, 0);
/* initialize variables */
_SetDataFixedFloat(xGPU, 1.0F);
yGPU->SetZeroAll();
dedxGPU->SetZeroAll();
_SetDataFixedFloat(dedyGPU, 1.5F);
/* call Dropout function */
_Dropout(xGPU, yGPU, seed, dropProb);
_DropoutBackward(yGPU, xGPU, dedyGPU, dedxGPU, 1, dropProb);
/* check result */
//yGPU->Dump(stderr, "yGPU");
//dedxGPU->Dump(stderr, "dedyGPU");
/* destroy variables */
delete x;
delete y;
delete dedx;
delete dedy;
delete xGPU;
delete yGPU;
delete dedxGPU;
delete dedyGPU;
delete[] dimSize;
return cpuTest && gpuTest;
#else
/* destroy variables */
delete x;
delete y;
delete dedx;
delete dedy;
delete[] dimSize;
return cpuTest;
#endif // USE_CUDA
}
/* other cases */
/*
TODO!!
*/
/* test for Dropout Function */
bool TestDropout()
{
XPRINT(0, stdout, "[TEST DROPOUT] dropout function and its backward computation \n");
bool returnFlag = true, caseFlag = true;
/* case 1 test */
caseFlag = TestDropout1();
if (!caseFlag) {
returnFlag = false;
XPRINT(0, stdout, ">> case 1 failed!\n");
}
else
XPRINT(0, stdout, ">> case 1 passed!\n");
/* case 2 test */
caseFlag = TestDropout2();
if (!caseFlag) {
returnFlag = false;
XPRINT(0, stdout, ">> case 2 failed!\n");
}
else
XPRINT(0, stdout, ">> case 2 passed!\n");
/* other cases test */
/*
TODO!!
*/
if (returnFlag) {
XPRINT(0, stdout, ">> All Passed!\n");
}
else
XPRINT(0, stdout, ">> Failed!\n");
XPRINT(0, stdout, "\n");
return returnFlag;
}
} // namespace nts(NiuTrans.Tensor)
Markdown 格式
0%
您添加了 0 到此讨论。请谨慎行事。
请先完成此评论的编辑!
注册 或者 后发表评论