Commit 0e585782 by xuchen

Merge with liyinqiao branch and add stack function.

parent 93bc3158
...@@ -76,8 +76,8 @@ void BackwardTest() ...@@ -76,8 +76,8 @@ void BackwardTest()
c.enableGrad = false; c.enableGrad = false;
XTensor mean; XTensor mean;
XTensor origin; XTensor origin;
InitTensor2D(&a, 2, 3); InitTensor2DV2(&a, 2, 3);
InitTensor1D(&b, 2); InitTensor1DV2(&b, 2);
a.SetZeroAll(); a.SetZeroAll();
b.SetZeroAll(); b.SetZeroAll();
...@@ -121,9 +121,9 @@ void TransposeTest() ...@@ -121,9 +121,9 @@ void TransposeTest()
int nnn = GDevs.nGPU; int nnn = GDevs.nGPU;
InitTensor3D(&x, B, N, H, X_FLOAT, 0); InitTensor3DV2(&x, B, N, H, X_FLOAT, 0);
InitTensor4D(&y, K, B, N, H/K, X_FLOAT, 0); InitTensor4DV2(&y, K, B, N, H/K, X_FLOAT, 0);
InitTensor3D(&z, B, N, H, X_FLOAT, 0); InitTensor3DV2(&z, B, N, H, X_FLOAT, 0);
cudaEvent_t ctime0; cudaEvent_t ctime0;
cudaEvent_t ctime1; cudaEvent_t ctime1;
...@@ -191,9 +191,9 @@ void SumDimTest() ...@@ -191,9 +191,9 @@ void SumDimTest()
int b = 7; int b = 7;
int c = 3; int c = 3;
InitTensor3D(&x, a, b, c, X_FLOAT, -1); InitTensor3DV2(&x, a, b, c, X_FLOAT, -1);
InitTensor1D(&y, c, X_FLOAT, -1); InitTensor1DV2(&y, c, X_FLOAT, -1);
InitTensor3D(&z, a, b, c, X_FLOAT, -1); InitTensor3DV2(&z, a, b, c, X_FLOAT, -1);
x.SetZeroAll(); x.SetZeroAll();
y.SetZeroAll(); y.SetZeroAll();
......
...@@ -281,7 +281,7 @@ void XShapeGrad::GradMergeList(XTensor * node, bool isEfficient) ...@@ -281,7 +281,7 @@ void XShapeGrad::GradMergeList(XTensor * node, bool isEfficient)
smallsGrad.Add(tail->grad); smallsGrad.Add(tail->grad);
if(i > 1){ if(i > 1){
CheckNTErrors(XTensor::IsSameShaped(last, tail), CheckNTErrors(_IsSameShaped(last, tail),
"Input tensors must be of the same size!"); "Input tensors must be of the same size!");
} }
...@@ -391,7 +391,7 @@ void XShapeGrad::GradSplit(XTensor * node, bool isEfficient) ...@@ -391,7 +391,7 @@ void XShapeGrad::GradSplit(XTensor * node, bool isEfficient)
/* if the tensor is used somewhere else, we need another SUM /* if the tensor is used somewhere else, we need another SUM
for gradient accumulation */ for gradient accumulation */
else{ else{
XTensor * inputGradTMP = NewTensorBuf(input, input->devID, input->mem); XTensor * inputGradTMP = NewTensorBufV2(input, input->devID, input->mem);
_Merge(node->grad, inputGradTMP, whereToSplit + 1, 0); _Merge(node->grad, inputGradTMP, whereToSplit + 1, 0);
_Sum(input->grad, inputGradTMP, input->grad); _Sum(input->grad, inputGradTMP, input->grad);
...@@ -475,7 +475,7 @@ void XShapeGrad::GradSplitListPost(XTensor * node, bool isEfficient) ...@@ -475,7 +475,7 @@ void XShapeGrad::GradSplitListPost(XTensor * node, bool isEfficient)
somewhere else, we need another SUM for gradient somewhere else, we need another SUM for gradient
accumulation */ accumulation */
else{ else{
XTensor * nodeGradTMP = NewTensorBuf(node, node->devID, node->mem); XTensor * nodeGradTMP = NewTensorBufV2(node, node->devID, node->mem);
_Merge(&splits, nodeGradTMP, whereToSplit + 1); _Merge(&splits, nodeGradTMP, whereToSplit + 1);
_Sum(node->grad, nodeGradTMP, node->grad); _Sum(node->grad, nodeGradTMP, node->grad);
...@@ -501,7 +501,7 @@ void XShapeGrad::GradTranspose(XTensor * node, bool isEfficient) ...@@ -501,7 +501,7 @@ void XShapeGrad::GradTranspose(XTensor * node, bool isEfficient)
XTensor * output = node; XTensor * output = node;
XTensor * input = income.tails[0]; XTensor * input = income.tails[0];
XTensor * b = NewTensorBuf(input, input->devID, input->mem); XTensor * b = NewTensorBufV2(input, input->devID, input->mem);
XNoder::MakeGrad(input); XNoder::MakeGrad(input);
int i = income.GetParamInt(0); int i = income.GetParamInt(0);
...@@ -543,7 +543,7 @@ void XShapeGrad::GradUnsqueeze(XTensor * node, bool isEfficient) ...@@ -543,7 +543,7 @@ void XShapeGrad::GradUnsqueeze(XTensor * node, bool isEfficient)
CheckNTErrors(dSize == output->GetDim(dim), "Wrong dim size for UNSQUEEZE!"); CheckNTErrors(dSize == output->GetDim(dim), "Wrong dim size for UNSQUEEZE!");
CheckNTErrors(output->unitNum = input->unitNum * dSize, "Wrong tensor size!"); CheckNTErrors(output->unitNum = input->unitNum * dSize, "Wrong tensor size!");
XTensor * g = NewTensorBuf(input->grad, input->devID, input->mem); XTensor * g = NewTensorBufV2(input->grad, input->devID, input->mem);
_ReduceSum(output->grad, g, dim); _ReduceSum(output->grad, g, dim);
_Sum(input->grad, g, input->grad); _Sum(input->grad, g, input->grad);
......
...@@ -29,7 +29,7 @@ void XNoder::MakeGrad(XTensor * node) ...@@ -29,7 +29,7 @@ void XNoder::MakeGrad(XTensor * node)
if(node == NULL) if(node == NULL)
return; return;
if(!XTensor::IsSameShaped(node, node->grad)){ if(!_IsSameShaped(node, node->grad)){
delete node->grad; delete node->grad;
node->grad = NewTensor(node); node->grad = NewTensor(node);
node->grad->SetZeroAll(); node->grad->SetZeroAll();
......
...@@ -20,7 +20,7 @@ ...@@ -20,7 +20,7 @@
* $Created by: XIAO Tong (xiaotong@mail.neu.edu.cn) 2018-07-18 * $Created by: XIAO Tong (xiaotong@mail.neu.edu.cn) 2018-07-18
*/ */
#include "../tensor/XTensor.h" #include "../tensor/core/CHeader.h"
#ifndef __XNODER_H__ #ifndef __XNODER_H__
#define __XNODER_H__ #define __XNODER_H__
......
...@@ -242,13 +242,13 @@ void Check(FNNModel &model) ...@@ -242,13 +242,13 @@ void Check(FNNModel &model)
/* make a hard copy of the fnn model */ /* make a hard copy of the fnn model */
void Copy(FNNModel &tgt, FNNModel &src) void Copy(FNNModel &tgt, FNNModel &src)
{ {
InitTensorV2(&tgt.embeddingW, &src.embeddingW); InitTensor(&tgt.embeddingW, &src.embeddingW);
for(int i = 0; i < MAX_HIDDEN_NUM; i++){ for(int i = 0; i < MAX_HIDDEN_NUM; i++){
InitTensorV2(&tgt.hiddenW[i], &src.hiddenW[i]); InitTensor(&tgt.hiddenW[i], &src.hiddenW[i]);
InitTensorV2(&tgt.hiddenB[i], &src.hiddenB[i]); InitTensor(&tgt.hiddenB[i], &src.hiddenB[i]);
} }
InitTensorV2(&tgt.outputW, &src.outputW); InitTensor(&tgt.outputW, &src.outputW);
InitTensorV2(&tgt.outputB, &src.outputB); InitTensor(&tgt.outputB, &src.outputB);
tgt.n = src.n; tgt.n = src.n;
tgt.eSize = src.eSize; tgt.eSize = src.eSize;
...@@ -300,7 +300,7 @@ initialize a 1d tensor using the fnn model setting ...@@ -300,7 +300,7 @@ initialize a 1d tensor using the fnn model setting
*/ */
void InitModelTensor1D(XTensor &tensor, int num, FNNModel &model) void InitModelTensor1D(XTensor &tensor, int num, FNNModel &model)
{ {
InitTensor1DV2(&tensor, num, X_FLOAT, model.devID); InitTensor1D(&tensor, num, X_FLOAT, model.devID);
} }
/* /*
...@@ -312,7 +312,7 @@ initialize a 2d tensor using the fnn model setting ...@@ -312,7 +312,7 @@ initialize a 2d tensor using the fnn model setting
*/ */
void InitModelTensor2D(XTensor &tensor, int rowNum, int colNum, FNNModel &model) void InitModelTensor2D(XTensor &tensor, int rowNum, int colNum, FNNModel &model)
{ {
InitTensor2DV2(&tensor, rowNum, colNum, X_FLOAT, model.devID); InitTensor2D(&tensor, rowNum, colNum, X_FLOAT, model.devID);
} }
...@@ -594,14 +594,14 @@ get prediction probabilites of the gold words ...@@ -594,14 +594,14 @@ get prediction probabilites of the gold words
float GetProb(XTensor &output, XTensor &gold, XTensor * wordProbs) float GetProb(XTensor &output, XTensor &gold, XTensor * wordProbs)
{ {
XTensor probs; XTensor probs;
InitTensorV2(&probs, &output); InitTensor(&probs, &output);
/* probs[i,j] = output[i,j] * gold[i,j] */ /* probs[i,j] = output[i,j] * gold[i,j] */
Multiply(output, gold, probs); Multiply(output, gold, probs);
/* probability of each word */ /* probability of each word */
XTensor wprobs; XTensor wprobs;
InitTensor1DV2(&wprobs, output.GetDim(0), output.dataType, output.devID); InitTensor1D(&wprobs, output.GetDim(0), output.dataType, output.devID);
ReduceSum(probs, wprobs, 1); ReduceSum(probs, wprobs, 1);
if(wordProbs != NULL) if(wordProbs != NULL)
CopyValues(wprobs, *wordProbs); CopyValues(wprobs, *wordProbs);
...@@ -615,7 +615,7 @@ float GetProb(XTensor &output, XTensor &gold, XTensor * wordProbs) ...@@ -615,7 +615,7 @@ float GetProb(XTensor &output, XTensor &gold, XTensor * wordProbs)
/* probability for the batch */ /* probability for the batch */
XTensor result; XTensor result;
InitTensor1DV2(&result, 1, X_FLOAT, output.devID); InitTensor1D(&result, 1, X_FLOAT, output.devID);
ReduceSum(probs, result, 1); ReduceSum(probs, result, 1);
return result.Get1D(0); return result.Get1D(0);
...@@ -716,7 +716,7 @@ The indexed cell is set to 1, and 0 otherwise. ...@@ -716,7 +716,7 @@ The indexed cell is set to 1, and 0 otherwise.
void InitZeroOneTensor2D(XTensor &tensor, int rowNum, int colNum, int * rows, int * cols, void InitZeroOneTensor2D(XTensor &tensor, int rowNum, int colNum, int * rows, int * cols,
int itemNum, int devID) int itemNum, int devID)
{ {
InitTensor2DV2(&tensor, rowNum, colNum, X_FLOAT, devID); InitTensor2D(&tensor, rowNum, colNum, X_FLOAT, devID);
tensor.SetZeroAll(); tensor.SetZeroAll();
...@@ -808,7 +808,7 @@ void Forward(XTensor inputs[], XTensor &output, FNNModel &model, FNNNet &net) ...@@ -808,7 +808,7 @@ void Forward(XTensor inputs[], XTensor &output, FNNModel &model, FNNNet &net)
/* make a 2d tensor for the bias term */ /* make a 2d tensor for the bias term */
XTensor b2D; XTensor b2D;
InitTensorV2(&b2D, &s); InitTensor(&b2D, &s);
Unsqueeze(b, b2D, 0, batchSize); Unsqueeze(b, b2D, 0, batchSize);
/* introduce bias term: /* introduce bias term:
...@@ -840,7 +840,7 @@ void Forward(XTensor inputs[], XTensor &output, FNNModel &model, FNNNet &net) ...@@ -840,7 +840,7 @@ void Forward(XTensor inputs[], XTensor &output, FNNModel &model, FNNNet &net)
MatrixMul(h_last, X_NOTRANS, w, X_NOTRANS, s); MatrixMul(h_last, X_NOTRANS, w, X_NOTRANS, s);
XTensor b2D; XTensor b2D;
InitTensorV2(&b2D, &s); InitTensor(&b2D, &s);
Unsqueeze(b, b2D, 0, batchSize); Unsqueeze(b, b2D, 0, batchSize);
Sum(s, b2D, s); Sum(s, b2D, s);
...@@ -905,8 +905,8 @@ void Backward(XTensor inputs[], XTensor &output, XTensor &gold, LOSS_FUNCTION_NA ...@@ -905,8 +905,8 @@ void Backward(XTensor inputs[], XTensor &output, XTensor &gold, LOSS_FUNCTION_NA
XTensor dedsHidden; XTensor dedsHidden;
XTensor dedxBottom; XTensor dedxBottom;
if (depth > 0) if (depth > 0)
InitTensorV2(&dedsHidden, &dedx); InitTensor(&dedsHidden, &dedx);
InitTensorV2(&dedxBottom, &net.embeddingCat); InitTensor(&dedxBottom, &net.embeddingCat);
/* back-propagation from top to bottom in the stack of hidden layers /* back-propagation from top to bottom in the stack of hidden layers
for each layer, h = f(s) for each layer, h = f(s)
...@@ -944,7 +944,7 @@ void Backward(XTensor inputs[], XTensor &output, XTensor &gold, LOSS_FUNCTION_NA ...@@ -944,7 +944,7 @@ void Backward(XTensor inputs[], XTensor &output, XTensor &gold, LOSS_FUNCTION_NA
/* back-propagation for the embedding layer */ /* back-propagation for the embedding layer */
for (int i = 0; i < n - 1; i++) { for (int i = 0; i < n - 1; i++) {
XTensor * dedy = NewTensor2DV2(batchSize, model.eSize, X_FLOAT, model.devID); XTensor * dedy = NewTensor2D(batchSize, model.eSize, X_FLOAT, model.devID);
eList.Add(dedy); eList.Add(dedy);
} }
...@@ -996,7 +996,7 @@ void ForwardAutoDiff(NGram * ngrams, int batch, XTensor &output, FNNModel &model ...@@ -996,7 +996,7 @@ void ForwardAutoDiff(NGram * ngrams, int batch, XTensor &output, FNNModel &model
} }
} }
InitTensor1DV2(&words, size, X_INT, model.devID); InitTensor1D(&words, size, X_INT, model.devID);
words.SetData(index, size); words.SetData(index, size);
embeddingBig = Gather(model.embeddingW, words); embeddingBig = Gather(model.embeddingW, words);
...@@ -1176,7 +1176,7 @@ void Test(const char * test, const char * result, FNNModel &model) ...@@ -1176,7 +1176,7 @@ void Test(const char * test, const char * result, FNNModel &model)
/* prediction probabilities */ /* prediction probabilities */
XTensor probs; XTensor probs;
InitTensor1DV2(&probs, ngramNum); InitTensor1D(&probs, ngramNum);
/* get probabilities */ /* get probabilities */
float prob = GetProb(output, gold, &probs); float prob = GetProb(output, gold, &probs);
......
...@@ -69,11 +69,11 @@ void T2TAttention::InitModel(int argc, char ** argv, ...@@ -69,11 +69,11 @@ void T2TAttention::InitModel(int argc, char ** argv,
LoadParamFloat(argc, argv, "attminmax", &minmax, 0.1F); LoadParamFloat(argc, argv, "attminmax", &minmax, 0.1F);
LoadParamFloat(argc, argv, "dropoutatt", &dropoutP, 0); LoadParamFloat(argc, argv, "dropoutatt", &dropoutP, 0);
InitTensor2DV2(&wk, d, dk, X_FLOAT, devID); InitTensor2D(&wk, d, dk, X_FLOAT, devID);
InitTensor2DV2(&wq, d, dk, X_FLOAT, devID); InitTensor2D(&wq, d, dk, X_FLOAT, devID);
InitTensor2DV2(&wv, d, dv, X_FLOAT, devID); InitTensor2D(&wv, d, dv, X_FLOAT, devID);
InitTensor2DV2(&wa, d, d, X_FLOAT, devID); InitTensor2D(&wa, d, d, X_FLOAT, devID);
InitTensor2DV2(&wbig, d, 3 * d, X_FLOAT, devID); InitTensor2D(&wbig, d, 3 * d, X_FLOAT, devID);
float scale = 1.0F; float scale = 1.0F;
_SetDataFanInOut(&wk, scale); _SetDataFanInOut(&wk, scale);
...@@ -128,9 +128,9 @@ XTensor T2TAttention::MakeBig(XTensor &kqv, XTensor &mask, bool isTraining) ...@@ -128,9 +128,9 @@ XTensor T2TAttention::MakeBig(XTensor &kqv, XTensor &mask, bool isTraining)
int d2 = kqv2.GetDim(1); int d2 = kqv2.GetDim(1);
int d3 = kqv2.GetDim(2) / 3; int d3 = kqv2.GetDim(2) / 3;
InitTensor3DV2(&k2, d1, d2, d3, X_FLOAT, devID); InitTensor3D(&k2, d1, d2, d3, X_FLOAT, devID);
InitTensor3DV2(&q2, d1, d2, d3, X_FLOAT, devID); InitTensor3D(&q2, d1, d2, d3, X_FLOAT, devID);
InitTensor3DV2(&v2, d1, d2, d3, X_FLOAT, devID); InitTensor3D(&v2, d1, d2, d3, X_FLOAT, devID);
split.Add(&q2); split.Add(&q2);
split.Add(&k2); split.Add(&k2);
......
...@@ -365,11 +365,11 @@ int T2TBatchLoader::LoadBatchLM(FILE * file, ...@@ -365,11 +365,11 @@ int T2TBatchLoader::LoadBatchLM(FILE * file,
dims[1] = max; dims[1] = max;
dims[2] = vSize; dims[2] = vSize;
InitTensor2DV2(batchEnc, sc, max, X_INT, devID); InitTensor2D(batchEnc, sc, max, X_INT, devID);
InitTensor2DV2(label, sc, max, X_INT, devID); InitTensor2D(label, sc, max, X_INT, devID);
InitTensorV2(gold, 3, dims, X_FLOAT, devID); InitTensor(gold, 3, dims, X_FLOAT, devID);
InitTensor2DV2(paddingEnc, sc, max, X_FLOAT, devID); InitTensor2D(paddingEnc, sc, max, X_FLOAT, devID);
InitTensor2DV2(paddingDec, sc, max, X_FLOAT, devID); InitTensor2D(paddingDec, sc, max, X_FLOAT, devID);
batchEnc->SetZeroAll(); batchEnc->SetZeroAll();
label->SetZeroAll(); label->SetZeroAll();
...@@ -433,12 +433,12 @@ int T2TBatchLoader::LoadBatchLM(FILE * file, ...@@ -433,12 +433,12 @@ int T2TBatchLoader::LoadBatchLM(FILE * file,
paddingEnc->SetDataBatched(paddingEncOffsets, 1.0F, wCount); paddingEnc->SetDataBatched(paddingEncOffsets, 1.0F, wCount);
paddingDec->SetDataBatched(paddingDecOffsets, 1.0F, wCount); paddingDec->SetDataBatched(paddingDecOffsets, 1.0F, wCount);
/*XTensor * tmp = NewTensorBufV2(paddingEnc, devID); /*XTensor * tmp = NewTensorBuf(paddingEnc, devID);
_ConvertDataType(batchEnc, tmp); _ConvertDataType(batchEnc, tmp);
_NotEqual(tmp, paddingEnc, 0); _NotEqual(tmp, paddingEnc, 0);
DelTensorBuf(tmp); DelTensorBuf(tmp);
XTensor * tmp2 = NewTensorBufV2(paddingDec, devID); XTensor * tmp2 = NewTensorBuf(paddingDec, devID);
_ConvertDataType(batchEnc, tmp2); _ConvertDataType(batchEnc, tmp2);
_NotEqual(tmp2, paddingDec, 0); _NotEqual(tmp2, paddingDec, 0);
DelTensorBuf(tmp2);*/ DelTensorBuf(tmp2);*/
...@@ -563,12 +563,12 @@ int T2TBatchLoader::LoadBatchMT(FILE * file, ...@@ -563,12 +563,12 @@ int T2TBatchLoader::LoadBatchMT(FILE * file,
int sCount = sc/2; int sCount = sc/2;
int seqSize = 0; int seqSize = 0;
InitTensor2DV2(batchEnc, sCount, maxEnc, X_INT, devID); InitTensor2D(batchEnc, sCount, maxEnc, X_INT, devID);
InitTensor2DV2(paddingEnc, sCount, maxEnc, X_FLOAT, devID); InitTensor2D(paddingEnc, sCount, maxEnc, X_FLOAT, devID);
InitTensor2DV2(batchDec, sCount, maxDec, X_INT, devID); InitTensor2D(batchDec, sCount, maxDec, X_INT, devID);
InitTensor2DV2(paddingDec, sCount, maxDec, X_FLOAT, devID); InitTensor2D(paddingDec, sCount, maxDec, X_FLOAT, devID);
InitTensor2DV2(label, sCount, maxDec, X_INT, devID); InitTensor2D(label, sCount, maxDec, X_INT, devID);
//InitTensorV2(gold, 3, dimsDec, X_FLOAT, devID); //InitTensor(gold, 3, dimsDec, X_FLOAT, devID);
batchEnc->SetZeroAll(); batchEnc->SetZeroAll();
paddingEnc->SetZeroAll(); paddingEnc->SetZeroAll();
...@@ -607,7 +607,7 @@ int T2TBatchLoader::LoadBatchMT(FILE * file, ...@@ -607,7 +607,7 @@ int T2TBatchLoader::LoadBatchMT(FILE * file,
ws = wCountEnc; ws = wCountEnc;
batchEnc->SetData(batchEncValues, batchEnc->unitNum); batchEnc->SetData(batchEncValues, batchEnc->unitNum);
paddingEnc->SetDataBatched(paddingEncOffsets, 1.0F, wCountEnc); paddingEnc->SetDataBatched(paddingEncOffsets, 1.0F, wCountEnc);
//XTensor * tmp = NewTensorBufV2(paddingEnc, devID); //XTensor * tmp = NewTensorBuf(paddingEnc, devID);
//_ConvertDataType(batchEnc, tmp); //_ConvertDataType(batchEnc, tmp);
//tmp->Dump(stderr, "tmp:"); //tmp->Dump(stderr, "tmp:");
//_NotEqual(tmp, paddingEnc, 0); //_NotEqual(tmp, paddingEnc, 0);
...@@ -656,7 +656,7 @@ int T2TBatchLoader::LoadBatchMT(FILE * file, ...@@ -656,7 +656,7 @@ int T2TBatchLoader::LoadBatchMT(FILE * file,
label->SetData(labelValues, label->unitNum); label->SetData(labelValues, label->unitNum);
paddingDec->SetDataBatched(paddingDecOffsets, 1.0F, wCountPad); paddingDec->SetDataBatched(paddingDecOffsets, 1.0F, wCountPad);
//XTensor * tmp2 = NewTensorBufV2(paddingDec, devID); //XTensor * tmp2 = NewTensorBuf(paddingDec, devID);
//_ConvertDataType(batchDec, tmp2); //_ConvertDataType(batchDec, tmp2);
//_NotEqual(tmp2, paddingDec, 0); //_NotEqual(tmp2, paddingDec, 0);
//DelTensorBuf(tmp2); //DelTensorBuf(tmp2);
......
...@@ -61,7 +61,7 @@ void T2TEmbedder::InitModel(int argc, char ** argv, int myDevID, bool isEnc) ...@@ -61,7 +61,7 @@ void T2TEmbedder::InitModel(int argc, char ** argv, int myDevID, bool isEnc)
LoadParamInt(argc, argv, "d", &eSize, DEFAULT_EMBEDDING_SIZE); LoadParamInt(argc, argv, "d", &eSize, DEFAULT_EMBEDDING_SIZE);
LoadParamInt(argc, argv, "d", &d, DEFAULT_EMBEDDING_SIZE); LoadParamInt(argc, argv, "d", &d, DEFAULT_EMBEDDING_SIZE);
InitTensor2DV2(&w, vSize, eSize, X_FLOAT, devID); InitTensor2D(&w, vSize, eSize, X_FLOAT, devID);
DTYPE v = 1.0F/(float)sqrt((float)eSize); DTYPE v = 1.0F/(float)sqrt((float)eSize);
w.SetDataRandn(0, v); w.SetDataRandn(0, v);
...@@ -78,7 +78,7 @@ make positional embeddings (of size eSize * length) ...@@ -78,7 +78,7 @@ make positional embeddings (of size eSize * length)
*/ */
void T2TEmbedder::MakePosEmbedding(int eSize, int d, int length) void T2TEmbedder::MakePosEmbedding(int eSize, int d, int length)
{ {
InitTensor2DV2(&posEmbeddingBase, length, eSize, X_FLOAT, devID); InitTensor2D(&posEmbeddingBase, length, eSize, X_FLOAT, devID);
float * data = new float[posEmbeddingBase.unitNum]; float * data = new float[posEmbeddingBase.unitNum];
...@@ -142,9 +142,9 @@ XTensor T2TEmbedder::Make(XTensor &input) ...@@ -142,9 +142,9 @@ XTensor T2TEmbedder::Make(XTensor &input)
/* we make positional embeddings first */ /* we make positional embeddings first */
//if(!match){ //if(!match){
if(true){ if(true){
InitTensorV2(&posEmbedding, input.order + 1, dims, X_FLOAT, devID); InitTensor(&posEmbedding, input.order + 1, dims, X_FLOAT, devID);
XTensor * posTMP = NewTensorBufV2(2, dims + 1, X_FLOAT, devID); XTensor * posTMP = NewTensorBuf(2, dims + 1, X_FLOAT, devID);
_CopyValues(&posEmbeddingBase, 0, posTMP->unitNum, posTMP, 0); _CopyValues(&posEmbeddingBase, 0, posTMP->unitNum, posTMP, 0);
_Unsqueeze(posTMP, &posEmbedding, 0, dims[0]); _Unsqueeze(posTMP, &posEmbedding, 0, dims[0]);
......
...@@ -60,11 +60,11 @@ void T2TFNN::InitModel(int argc, char ** argv, int myDevID) ...@@ -60,11 +60,11 @@ void T2TFNN::InitModel(int argc, char ** argv, int myDevID)
LoadParamFloat(argc, argv, "fnnminmax", &minmax, 0.1F); LoadParamFloat(argc, argv, "fnnminmax", &minmax, 0.1F);
LoadParamFloat(argc, argv, "dropoutfnn", &dropoutP, 0); LoadParamFloat(argc, argv, "dropoutfnn", &dropoutP, 0);
InitTensor2DV2(&w1, inSize, hSize, X_FLOAT, devID); InitTensor2D(&w1, inSize, hSize, X_FLOAT, devID);
InitTensor1DV2(&b1, hSize, X_FLOAT, devID); InitTensor1D(&b1, hSize, X_FLOAT, devID);
InitTensor2DV2(&w2, hSize, outSize, X_FLOAT, devID); InitTensor2D(&w2, hSize, outSize, X_FLOAT, devID);
InitTensor1DV2(&b2, outSize, X_FLOAT, devID); InitTensor1D(&b2, outSize, X_FLOAT, devID);
float scale = 1.0F; float scale = 1.0F;
_SetDataFanInOut(&w1, scale); _SetDataFanInOut(&w1, scale);
......
...@@ -53,8 +53,8 @@ void T2TLN::InitModel(int argc, char ** argv, int myDevID) ...@@ -53,8 +53,8 @@ void T2TLN::InitModel(int argc, char ** argv, int myDevID)
d = 0; d = 0;
LoadParamInt(argc, argv, "d", &d, DEFAULT_EMBEDDING_SIZE); LoadParamInt(argc, argv, "d", &d, DEFAULT_EMBEDDING_SIZE);
InitTensor1DV2(&w, d, X_FLOAT, devID); InitTensor1D(&w, d, X_FLOAT, devID);
InitTensor1DV2(&b, d, X_FLOAT, devID); InitTensor1D(&b, d, X_FLOAT, devID);
w.SetDataRand(1.0F, 1.0F); w.SetDataRand(1.0F, 1.0F);
b.SetZeroAll(); b.SetZeroAll();
......
...@@ -132,7 +132,7 @@ void T2TModel::MakeLM(XTensor &input, XTensor &output, XTensor &padding, bool is ...@@ -132,7 +132,7 @@ void T2TModel::MakeLM(XTensor &input, XTensor &output, XTensor &padding, bool is
dims[0] = nhead; dims[0] = nhead;
dims[input.order + 1] = len; dims[input.order + 1] = len;
XTensor mask; XTensor mask;
InitTensorV2(&mask, input.order + 2, dims, X_FLOAT, padding.devID); InitTensor(&mask, input.order + 2, dims, X_FLOAT, padding.devID);
/* a upper triangular matrix where the cells of the upper triangular are set to -1e-9. /* a upper triangular matrix where the cells of the upper triangular are set to -1e-9.
this matrix can be used to prevent the attention to current or following words in this matrix can be used to prevent the attention to current or following words in
...@@ -146,14 +146,14 @@ void T2TModel::MakeLM(XTensor &input, XTensor &output, XTensor &padding, bool is ...@@ -146,14 +146,14 @@ void T2TModel::MakeLM(XTensor &input, XTensor &output, XTensor &padding, bool is
dimsPadding[padding.order - 1] = padding.GetDim(-1); dimsPadding[padding.order - 1] = padding.GetDim(-1);
dimsPadding[padding.order] = padding.GetDim(-1); dimsPadding[padding.order] = padding.GetDim(-1);
XTensor * padding2 = NewTensorBufV2(padding.order + 1, dimsPadding, padding.dataType, XTensor * padding2 = NewTensorBuf(padding.order + 1, dimsPadding, padding.dataType,
padding.devID); padding.devID);
for(int i = 0; i < padding2->order; i++) for(int i = 0; i < padding2->order; i++)
dimsPadding[i + 1] = padding2->GetDim(i); dimsPadding[i + 1] = padding2->GetDim(i);
dimsPadding[0] = nhead; dimsPadding[0] = nhead;
//XTensor * padding3 = NewTensorBufV2(padding.order + 2, dimsPadding, padding.dataType, //XTensor * padding3 = NewTensorBuf(padding.order + 2, dimsPadding, padding.dataType,
// padding.devID); // padding.devID);
// //
///* mask of the padding */ ///* mask of the padding */
...@@ -224,7 +224,7 @@ void T2TModel::MakeMTMask(XTensor &inputEnc, XTensor &inputDec, ...@@ -224,7 +224,7 @@ void T2TModel::MakeMTMask(XTensor &inputEnc, XTensor &inputDec,
dims[i + 1] = inputDec.GetDim(i); dims[i + 1] = inputDec.GetDim(i);
dims[0] = nhead; dims[0] = nhead;
dims[inputDec.order + 1] = len; dims[inputDec.order + 1] = len;
InitTensorV2(&maskDec, inputDec.order + 2, dims, X_FLOAT, paddingDec.devID); InitTensor(&maskDec, inputDec.order + 2, dims, X_FLOAT, paddingDec.devID);
/* an upper triangular matrix where the cells of the upper triangular are set to -1e-9. /* an upper triangular matrix where the cells of the upper triangular are set to -1e-9.
this matrix can be used to prevent the attention to current or following words in this matrix can be used to prevent the attention to current or following words in
...@@ -234,11 +234,11 @@ void T2TModel::MakeMTMask(XTensor &inputEnc, XTensor &inputDec, ...@@ -234,11 +234,11 @@ void T2TModel::MakeMTMask(XTensor &inputEnc, XTensor &inputDec,
/* encoder-decoder mask that prevents the attention to padding dummy words */ /* encoder-decoder mask that prevents the attention to padding dummy words */
dims[inputDec.order + 1] = inputEnc.GetDim(inputEnc.order - 1); dims[inputDec.order + 1] = inputEnc.GetDim(inputEnc.order - 1);
InitTensorV2(&maskEncDec, inputDec.order + 2, dims, X_FLOAT, paddingEnc.devID); InitTensor(&maskEncDec, inputDec.order + 2, dims, X_FLOAT, paddingEnc.devID);
XTensor * maskEncDecTMPEnc = NewTensorBufV2(paddingEnc.order + 1, dims + 1, paddingEnc.dataType, XTensor * maskEncDecTMPEnc = NewTensorBuf(paddingEnc.order + 1, dims + 1, paddingEnc.dataType,
paddingEnc.devID); paddingEnc.devID);
XTensor * maskEncDecTMPDec = NewTensorBufV2(maskEncDecTMPEnc, paddingEnc.devID); XTensor * maskEncDecTMPDec = NewTensorBuf(maskEncDecTMPEnc, paddingEnc.devID);
_Unsqueeze(&paddingEnc, maskEncDecTMPEnc, paddingEnc.order - 1, paddingDec.GetDim(-1)); _Unsqueeze(&paddingEnc, maskEncDecTMPEnc, paddingEnc.order - 1, paddingDec.GetDim(-1));
_ScaleAndShiftMe(maskEncDecTMPEnc, 1e9F, -1e9F); _ScaleAndShiftMe(maskEncDecTMPEnc, 1e9F, -1e9F);
...@@ -254,14 +254,14 @@ void T2TModel::MakeMTMask(XTensor &inputEnc, XTensor &inputDec, ...@@ -254,14 +254,14 @@ void T2TModel::MakeMTMask(XTensor &inputEnc, XTensor &inputDec,
dimsPadding[paddingEnc.order - 1] = paddingEnc.GetDim(-1); dimsPadding[paddingEnc.order - 1] = paddingEnc.GetDim(-1);
dimsPadding[paddingEnc.order] = paddingEnc.GetDim(-1); dimsPadding[paddingEnc.order] = paddingEnc.GetDim(-1);
XTensor * padding2 = NewTensorBufV2(paddingEnc.order + 1, dimsPadding, paddingEnc.dataType, XTensor * padding2 = NewTensorBuf(paddingEnc.order + 1, dimsPadding, paddingEnc.dataType,
paddingEnc.devID); paddingEnc.devID);
for (int i = 0; i < padding2->order; i++) for (int i = 0; i < padding2->order; i++)
dimsPadding[i + 1] = padding2->GetDim(i); dimsPadding[i + 1] = padding2->GetDim(i);
dimsPadding[0] = nhead; dimsPadding[0] = nhead;
XTensor * padding3 = NewTensorBufV2(paddingEnc.order + 2, dimsPadding, paddingEnc.dataType, XTensor * padding3 = NewTensorBuf(paddingEnc.order + 2, dimsPadding, paddingEnc.dataType,
paddingEnc.devID); paddingEnc.devID);
/* mask of the padding */ /* mask of the padding */
...@@ -270,7 +270,7 @@ void T2TModel::MakeMTMask(XTensor &inputEnc, XTensor &inputDec, ...@@ -270,7 +270,7 @@ void T2TModel::MakeMTMask(XTensor &inputEnc, XTensor &inputDec,
_ScaleAndShiftMe(padding3, 1e9F, -1e9F); _ScaleAndShiftMe(padding3, 1e9F, -1e9F);
InitTensorV2(&maskEnc, padding3); InitTensor(&maskEnc, padding3);
maskEnc.SetZeroAll(); maskEnc.SetZeroAll();
/* generate the mask on the source language side (for padding) */ /* generate the mask on the source language side (for padding) */
...@@ -298,14 +298,14 @@ void T2TModel::MakeMTMaskEnc(XTensor &inputEnc, XTensor &paddingEnc, XTensor &ma ...@@ -298,14 +298,14 @@ void T2TModel::MakeMTMaskEnc(XTensor &inputEnc, XTensor &paddingEnc, XTensor &ma
dimsPadding[paddingEnc.order - 1] = paddingEnc.GetDim(-1); dimsPadding[paddingEnc.order - 1] = paddingEnc.GetDim(-1);
dimsPadding[paddingEnc.order] = paddingEnc.GetDim(-1); dimsPadding[paddingEnc.order] = paddingEnc.GetDim(-1);
XTensor * padding2 = NewTensorBufV2(paddingEnc.order + 1, dimsPadding, paddingEnc.dataType, XTensor * padding2 = NewTensorBuf(paddingEnc.order + 1, dimsPadding, paddingEnc.dataType,
paddingEnc.devID); paddingEnc.devID);
for (int i = 0; i < padding2->order; i++) for (int i = 0; i < padding2->order; i++)
dimsPadding[i + 1] = padding2->GetDim(i); dimsPadding[i + 1] = padding2->GetDim(i);
dimsPadding[0] = nhead; dimsPadding[0] = nhead;
XTensor * padding3 = NewTensorBufV2(paddingEnc.order + 2, dimsPadding, paddingEnc.dataType, XTensor * padding3 = NewTensorBuf(paddingEnc.order + 2, dimsPadding, paddingEnc.dataType,
paddingEnc.devID); paddingEnc.devID);
/* mask of the padding */ /* mask of the padding */
...@@ -314,7 +314,7 @@ void T2TModel::MakeMTMaskEnc(XTensor &inputEnc, XTensor &paddingEnc, XTensor &ma ...@@ -314,7 +314,7 @@ void T2TModel::MakeMTMaskEnc(XTensor &inputEnc, XTensor &paddingEnc, XTensor &ma
_ScaleAndShiftMe(padding3, 1e9F, -1e9F); _ScaleAndShiftMe(padding3, 1e9F, -1e9F);
InitTensorV2(&maskEnc, padding3); InitTensor(&maskEnc, padding3);
maskEnc.SetZeroAll(); maskEnc.SetZeroAll();
/* generate the mask on the source language side (for padding) */ /* generate the mask on the source language side (for padding) */
...@@ -344,7 +344,7 @@ void T2TModel::MakeMTMaskDec(XTensor &inputEnc, XTensor &inputDec, ...@@ -344,7 +344,7 @@ void T2TModel::MakeMTMaskDec(XTensor &inputEnc, XTensor &inputDec,
dims[i + 1] = inputDec.GetDim(i); dims[i + 1] = inputDec.GetDim(i);
dims[0] = nhead; dims[0] = nhead;
dims[inputDec.order + 1] = len; dims[inputDec.order + 1] = len;
InitTensorV2(&maskDec, inputDec.order + 2, dims, X_FLOAT, paddingDec.devID); InitTensor(&maskDec, inputDec.order + 2, dims, X_FLOAT, paddingDec.devID);
/* An upper triangular matrix where the cells of the upper triangular are set to -1e-9. /* An upper triangular matrix where the cells of the upper triangular are set to -1e-9.
This matrix can be used to block the attention to current or following words in This matrix can be used to block the attention to current or following words in
...@@ -359,11 +359,11 @@ void T2TModel::MakeMTMaskDec(XTensor &inputEnc, XTensor &inputDec, ...@@ -359,11 +359,11 @@ void T2TModel::MakeMTMaskDec(XTensor &inputEnc, XTensor &inputDec,
/* encoder-decoder mask that prevents the attention to padding dummy words */ /* encoder-decoder mask that prevents the attention to padding dummy words */
dims[inputDec.order + 1] = inputEnc.GetDim(inputEnc.order - 1); dims[inputDec.order + 1] = inputEnc.GetDim(inputEnc.order - 1);
InitTensorV2(&maskEncDec, inputDec.order + 2, dims, X_FLOAT, paddingEnc.devID); InitTensor(&maskEncDec, inputDec.order + 2, dims, X_FLOAT, paddingEnc.devID);
XTensor * maskEncDecTMPEnc = NewTensorBufV2(paddingEnc.order + 1, dims + 1, paddingEnc.dataType, XTensor * maskEncDecTMPEnc = NewTensorBuf(paddingEnc.order + 1, dims + 1, paddingEnc.dataType,
paddingEnc.devID); paddingEnc.devID);
XTensor * maskEncDecTMPDec = NewTensorBufV2(maskEncDecTMPEnc, paddingEnc.devID); XTensor * maskEncDecTMPDec = NewTensorBuf(maskEncDecTMPEnc, paddingEnc.devID);
_Unsqueeze(&paddingEnc, maskEncDecTMPEnc, paddingEnc.order - 1, paddingDec.GetDim(-1)); _Unsqueeze(&paddingEnc, maskEncDecTMPEnc, paddingEnc.order - 1, paddingDec.GetDim(-1));
......
...@@ -58,7 +58,7 @@ void T2TOutput::InitModel(int argc, char ** argv, int myDevID) ...@@ -58,7 +58,7 @@ void T2TOutput::InitModel(int argc, char ** argv, int myDevID)
LoadParamInt(argc, argv, "d", &hSize, DEFAULT_EMBEDDING_SIZE); LoadParamInt(argc, argv, "d", &hSize, DEFAULT_EMBEDDING_SIZE);
LoadParamFloat(argc, argv, "outputminmax", &minmax, 0.08F); LoadParamFloat(argc, argv, "outputminmax", &minmax, 0.08F);
InitTensor2DV2(&w, hSize, vSize, X_FLOAT, devID); InitTensor2D(&w, hSize, vSize, X_FLOAT, devID);
float scale = 1.0F; float scale = 1.0F;
float finfout = (float)sqrt(6.0F * scale/(hSize + vSize)); float finfout = (float)sqrt(6.0F * scale/(hSize + vSize));
......
...@@ -105,9 +105,9 @@ void T2TPredictor::Create(T2TModel * model, XTensor * top, const XTensor * input ...@@ -105,9 +105,9 @@ void T2TPredictor::Create(T2TModel * model, XTensor * top, const XTensor * input
dims[i] = input->GetDim(i); dims[i] = input->GetDim(i);
dims[input->order - 1] = beamSize; dims[input->order - 1] = beamSize;
InitTensorV2(&state->probPath, input->order, dims, X_FLOAT, input->devID); InitTensor(&state->probPath, input->order, dims, X_FLOAT, input->devID);
InitTensorV2(&state->nstep, input->order, dims, X_FLOAT, input->devID); InitTensor(&state->nstep, input->order, dims, X_FLOAT, input->devID);
InitTensorV2(&state->endMark, input->order, dims, X_INT, input->devID); InitTensor(&state->endMark, input->order, dims, X_INT, input->devID);
state->probPath.SetZeroAll(); state->probPath.SetZeroAll();
state->nstep.SetZeroAll(); state->nstep.SetZeroAll();
...@@ -170,7 +170,7 @@ void T2TPredictor::Predict(T2TStateBundle * next, XTensor * encoding, ...@@ -170,7 +170,7 @@ void T2TPredictor::Predict(T2TStateBundle * next, XTensor * encoding,
dims[i] = inputEnc->GetDim(i); dims[i] = inputEnc->GetDim(i);
dims[inputEnc->order - 1] = 1; dims[inputEnc->order - 1] = 1;
InitTensorV2(&first, inputEnc->order, dims, X_INT, inputEnc->devID); InitTensor(&first, inputEnc->order, dims, X_INT, inputEnc->devID);
_SetDataFixedInt(&first, startSymbol); _SetDataFixedInt(&first, startSymbol);
/* add a new word into the input sequence of the decoder side */ /* add a new word into the input sequence of the decoder side */
...@@ -194,7 +194,7 @@ void T2TPredictor::Predict(T2TStateBundle * next, XTensor * encoding, ...@@ -194,7 +194,7 @@ void T2TPredictor::Predict(T2TStateBundle * next, XTensor * encoding,
dims[inputDec.order - 1] = inputDec.GetDim(-1); dims[inputDec.order - 1] = inputDec.GetDim(-1);
XTensor paddingDec; XTensor paddingDec;
InitTensorV2(&paddingDec, inputDec.order, dims, X_INT, paddingEnc->devID); InitTensor(&paddingDec, inputDec.order, dims, X_INT, paddingEnc->devID);
SetDataFixedInt(paddingDec, 1); SetDataFixedInt(paddingDec, 1);
XTensor maskDec; XTensor maskDec;
...@@ -213,8 +213,8 @@ void T2TPredictor::Predict(T2TStateBundle * next, XTensor * encoding, ...@@ -213,8 +213,8 @@ void T2TPredictor::Predict(T2TStateBundle * next, XTensor * encoding,
int stride = decoding.GetDim(decoding.order - 2); int stride = decoding.GetDim(decoding.order - 2);
InitTensor1DV2(&selectSrc, 1, X_INT); InitTensor1D(&selectSrc, 1, X_INT);
InitTensor1DV2(&selectTgt, 1, X_INT); InitTensor1D(&selectTgt, 1, X_INT);
selectSrc.SetInt(stride - 1, 0); selectSrc.SetInt(stride - 1, 0);
selectTgt.SetInt(0, 0); selectTgt.SetInt(0, 0);
...@@ -257,7 +257,7 @@ XTensor T2TPredictor::GeneratePaths(T2TStateBundle * state) ...@@ -257,7 +257,7 @@ XTensor T2TPredictor::GeneratePaths(T2TStateBundle * state)
} }
XTensor path; XTensor path;
InitTensor2DV2(&path, state->stateNum, distance, X_INT); InitTensor2D(&path, state->stateNum, distance, X_INT);
path.SetZeroAll(); path.SetZeroAll();
for(int i = 0; i < state->stateNum; i++){ for(int i = 0; i < state->stateNum; i++){
......
...@@ -192,8 +192,8 @@ void T2TSearch::Score(T2TStateBundle * prev, T2TStateBundle * beam) ...@@ -192,8 +192,8 @@ void T2TSearch::Score(T2TStateBundle * prev, T2TStateBundle * beam)
for(int i = 0; i < order; i++) for(int i = 0; i < order; i++)
dims[i] = prob.GetDim(i); dims[i] = prob.GetDim(i);
InitTensorV2(&score, &prob); InitTensor(&score, &prob);
InitTensorV2(&probPath, &prob); InitTensor(&probPath, &prob);
prob.Reshape(prob.unitNum/outputSize, outputSize); prob.Reshape(prob.unitNum/outputSize, outputSize);
score.Reshape(score.unitNum/outputSize, outputSize); score.Reshape(score.unitNum/outputSize, outputSize);
...@@ -204,8 +204,8 @@ void T2TSearch::Score(T2TStateBundle * prev, T2TStateBundle * beam) ...@@ -204,8 +204,8 @@ void T2TSearch::Score(T2TStateBundle * prev, T2TStateBundle * beam)
_SumDim(&prob, &probPathPrev, &probPath, 0); _SumDim(&prob, &probPathPrev, &probPath, 0);
InitTensorV2(&len, &lenPrev); InitTensor(&len, &lenPrev);
InitTensorV2(&lp, &lenPrev); InitTensor(&lp, &lenPrev);
_ScaleAndShift(&lenPrev, &len, 1.0F, 1.0F); _ScaleAndShift(&lenPrev, &len, 1.0F, 1.0F);
...@@ -225,7 +225,7 @@ void T2TSearch::Score(T2TStateBundle * prev, T2TStateBundle * beam) ...@@ -225,7 +225,7 @@ void T2TSearch::Score(T2TStateBundle * prev, T2TStateBundle * beam)
_SumDim(&score, &firstMask, &score, 0); _SumDim(&score, &firstMask, &score, 0);
} }
InitTensorV2(&mask, InitTensor(&mask,
prev->endMark.order, prev->endMark.dimSize, X_FLOAT, prev->endMark.order, prev->endMark.dimSize, X_FLOAT,
prev->endMark.devID); prev->endMark.devID);
_SetDataFixedCond(&mask, &prev->endMark, -1e9F); _SetDataFixedCond(&mask, &prev->endMark, -1e9F);
...@@ -279,11 +279,11 @@ void T2TSearch::Generate(T2TStateBundle * beam) ...@@ -279,11 +279,11 @@ void T2TSearch::Generate(T2TStateBundle * beam)
dimsTopK[order - 3] = dimsBeam[order - 3]; dimsTopK[order - 3] = dimsBeam[order - 3];
dimsTopK[order - 1] = beamSize; dimsTopK[order - 1] = beamSize;
InitTensorV2(&scoreTopK, order, dimsTopK, score.dataType, InitTensor(&scoreTopK, order, dimsTopK, score.dataType,
score.devID); score.devID);
InitTensorV2(&index, order, dimsTopK, X_INT, InitTensor(&index, order, dimsTopK, X_INT,
score.devID); score.devID);
InitTensorV2(&preID, order, dimsTopK, X_INT, -1); InitTensor(&preID, order, dimsTopK, X_INT, -1);
score.Reshape(order, dimsBeam); score.Reshape(order, dimsBeam);
...@@ -307,25 +307,25 @@ void T2TSearch::Generate(T2TStateBundle * beam) ...@@ -307,25 +307,25 @@ void T2TSearch::Generate(T2TStateBundle * beam)
score.Reshape(order, dims); score.Reshape(order, dims);
/* we keep the top-k scores */ /* we keep the top-k scores */
InitTensorV2(&score, &scoreTopK); InitTensor(&score, &scoreTopK);
CopyValues(scoreTopK, score); CopyValues(scoreTopK, score);
/* CPU data (TODO: remove GPU->CPU data copy!!!) */ /* CPU data (TODO: remove GPU->CPU data copy!!!) */
XTensor indexGPU; XTensor indexGPU;
indexGPU = CopyValues(index); indexGPU = CopyValues(index);
//InitTensor(&indexCPU, index.order, index.dimSize, index.dataType, index.denseRatio, -1); //InitTensorV2(&indexCPU, index.order, index.dimSize, index.dataType, index.denseRatio, -1);
//CopyValues(index, indexCPU); //CopyValues(index, indexCPU);
for (int i = 0; i < indexGPU.unitNum; i++) for (int i = 0; i < indexGPU.unitNum; i++)
indexGPU.SetInt(i * stride + indexGPU.GetInt(i), i); indexGPU.SetInt(i * stride + indexGPU.GetInt(i), i);
CheckNTErrors(XTensor::IsSameShaped(&prob, &probPath), "Wrong tensor shape!"); CheckNTErrors(IsSameShaped(prob, probPath), "Wrong tensor shape!");
/* sequence probability of top-k candidates */ /* sequence probability of top-k candidates */
XTensor probPathTopK; XTensor probPathTopK;
InitTensorV2(&probPathTopK, &scoreTopK); InitTensor(&probPathTopK, &scoreTopK);
XTensor probTopK; XTensor probTopK;
InitTensorV2(&probTopK, &scoreTopK); InitTensor(&probTopK, &scoreTopK);
for (int i = 0; i < probPath.order; i++) { for (int i = 0; i < probPath.order; i++) {
dims[i] = probPath.GetDim(i); dims[i] = probPath.GetDim(i);
...@@ -381,7 +381,7 @@ void T2TSearch::Expand(T2TStateBundle * prev, T2TStateBundle * beam) ...@@ -381,7 +381,7 @@ void T2TSearch::Expand(T2TStateBundle * prev, T2TStateBundle * beam)
InitTensorOnCPU(&probPath, &probPathRef); InitTensorOnCPU(&probPath, &probPathRef);
InitTensorOnCPU(&prediction, &predictionRef); InitTensorOnCPU(&prediction, &predictionRef);
InitTensorOnCPU(&endMarkCPU, &predictionRef); InitTensorOnCPU(&endMarkCPU, &predictionRef);
InitTensorV2(&endMark, &predictionRef); InitTensor(&endMark, &predictionRef);
/* we copy the data to CPU because the frequent access to GPU is slow /* we copy the data to CPU because the frequent access to GPU is slow
and we can speed-up the process by doing the job on CPU. */ and we can speed-up the process by doing the job on CPU. */
...@@ -502,7 +502,7 @@ void T2TSearch::Dump(XTensor * output) ...@@ -502,7 +502,7 @@ void T2TSearch::Dump(XTensor * output)
int dims[3] = {batchSize, beamSize, maxLength}; int dims[3] = {batchSize, beamSize, maxLength};
int * words = new int[maxLength]; int * words = new int[maxLength];
InitTensorV2(output, 3, dims, X_INT); InitTensor(output, 3, dims, X_INT);
SetDataFixedInt(*output, -1); SetDataFixedInt(*output, -1);
/* heap for an input sentence in the batch */ /* heap for an input sentence in the batch */
...@@ -587,7 +587,7 @@ XTensor T2TSearch::MakeFirstMask(T2TStateBundle * beam) ...@@ -587,7 +587,7 @@ XTensor T2TSearch::MakeFirstMask(T2TStateBundle * beam)
for (int i = 0; i < order - 1; i++) for (int i = 0; i < order - 1; i++)
dims[i] = prob.GetDim(i); dims[i] = prob.GetDim(i);
InitTensorV2(&mask, order - 1, dims, X_FLOAT); InitTensor(&mask, order - 1, dims, X_FLOAT);
mask.SetZeroAll(); mask.SetZeroAll();
for (int i = 0; i < mask.unitNum; i++) { for (int i = 0; i < mask.unitNum; i++) {
......
...@@ -367,7 +367,7 @@ void T2TTrainer::Test(const char * fn, const char * ofn, T2TModel * model) ...@@ -367,7 +367,7 @@ void T2TTrainer::Test(const char * fn, const char * ofn, T2TModel * model)
/* prediction probabilities */ /* prediction probabilities */
XTensor probs; XTensor probs;
InitTensor1DV2(&probs, bSize * length); InitTensor1D(&probs, bSize * length);
XTensor labelOnehot; XTensor labelOnehot;
...@@ -452,13 +452,13 @@ get word probabilities for a batch of sequences ...@@ -452,13 +452,13 @@ get word probabilities for a batch of sequences
float T2TTrainer::GetProb(XTensor * output, XTensor * gold, XTensor * wordProbs) float T2TTrainer::GetProb(XTensor * output, XTensor * gold, XTensor * wordProbs)
{ {
XTensor probs; XTensor probs;
InitTensor(&probs, output); InitTensorV2(&probs, output);
_Multiply(output, gold, &probs); _Multiply(output, gold, &probs);
/* probability of each word */ /* probability of each word */
XTensor wprobs; XTensor wprobs;
InitTensor1DV2(&wprobs, output->unitNum/output->GetDim(-1), X_FLOAT, output->devID); InitTensor1D(&wprobs, output->unitNum/output->GetDim(-1), X_FLOAT, output->devID);
int dims[2] = {output->unitNum/output->GetDim(-1), output->GetDim(-1)}; int dims[2] = {output->unitNum/output->GetDim(-1), output->GetDim(-1)};
probs.Reshape(2, dims); probs.Reshape(2, dims);
...@@ -475,7 +475,7 @@ float T2TTrainer::GetProb(XTensor * output, XTensor * gold, XTensor * wordProbs) ...@@ -475,7 +475,7 @@ float T2TTrainer::GetProb(XTensor * output, XTensor * gold, XTensor * wordProbs)
/* probability for the batch */ /* probability for the batch */
XTensor result; XTensor result;
InitTensor1DV2(&result, 1, X_FLOAT, output->devID); InitTensor1D(&result, 1, X_FLOAT, output->devID);
_ReduceSum(&probs, &result, 1); _ReduceSum(&probs, &result, 1);
return result.Get1D(0); return result.Get1D(0);
...@@ -522,7 +522,7 @@ void T2TTrainer::Update(T2TModel * model, const float lr) ...@@ -522,7 +522,7 @@ void T2TTrainer::Update(T2TModel * model, const float lr)
_ScaleAndShiftMe(v, (1.0F - adamBeta2), 0); _ScaleAndShiftMe(v, (1.0F - adamBeta2), 0);
/* v2 = m / (sqrt(v) + delta) */ /* v2 = m / (sqrt(v) + delta) */
XTensor * v2 = NewTensorBufV2(v, v->devID); XTensor * v2 = NewTensorBuf(v, v->devID);
_Power(v, v2, 0.5F); _Power(v, v2, 0.5F);
_ScaleAndShiftMe(v2, 1.0F, d); _ScaleAndShiftMe(v2, 1.0F, d);
_Div(m, v2, v2); _Div(m, v2, v2);
...@@ -593,7 +593,7 @@ void T2TTrainer::PadOutput(XTensor * output, XTensor * gold, XTensor * padding) ...@@ -593,7 +593,7 @@ void T2TTrainer::PadOutput(XTensor * output, XTensor * gold, XTensor * padding)
output->Reshape(output->unitNum/dimso[output->order - 1], dimso[output->order - 1]); output->Reshape(output->unitNum/dimso[output->order - 1], dimso[output->order - 1]);
XTensor * padding2 = NewTensorBufV2(1, &padding->unitNum, X_FLOAT, padding->devID); XTensor * padding2 = NewTensorBuf(1, &padding->unitNum, X_FLOAT, padding->devID);
_CopyValues(padding, padding2); _CopyValues(padding, padding2);
_MultiplyDim(output, padding2, output, 0); _MultiplyDim(output, padding2, output, 0);
...@@ -647,7 +647,7 @@ void T2TTrainer::LabelSmooth(XTensor * gold, XTensor * smoothed, DTYPE p) ...@@ -647,7 +647,7 @@ void T2TTrainer::LabelSmooth(XTensor * gold, XTensor * smoothed, DTYPE p)
DTYPE q = 1.0F - p; DTYPE q = 1.0F - p;
DTYPE gift = p / n; DTYPE gift = p / n;
InitTensorV2(smoothed, gold); InitTensor(smoothed, gold);
_CopyValues(gold, smoothed); _CopyValues(gold, smoothed);
if(p == 0) if(p == 0)
......
...@@ -30,8 +30,9 @@ ...@@ -30,8 +30,9 @@
#include "XDevice.h" #include "XDevice.h"
#include "./test/Test.h" #include "./test/Test.h"
#include "./core/CHeader.h" #include "./core/CHeader.h"
#include "./loss/CrossEntropy.h" #include "./XBLAS.h"
#include "./core/sort/TopK.h"
#include "./core/movement/Gather.h"
//#define CRTDBG_MAP_ALLOC //#define CRTDBG_MAP_ALLOC
//#include <stdlib.h> //#include <stdlib.h>
//#include <crtdbg.h> //#include <crtdbg.h>
...@@ -40,9 +41,6 @@ using namespace nts; ...@@ -40,9 +41,6 @@ using namespace nts;
void SmallTest(); void SmallTest();
void TransposeTest(); void TransposeTest();
void LittleTest();
void T2TTest();
void T2TTest2();
void PowerTest(); void PowerTest();
int main( int argc, const char ** argv ) int main( int argc, const char ** argv )
...@@ -167,127 +165,5 @@ void TransposeTest() ...@@ -167,127 +165,5 @@ void TransposeTest()
delete[] data; delete[] data;
} }
void LittleTest()
{
int a = 5000;
int b = 100000;
int c = a*b;
printf("%d\n", c);
exit(1);
}
void T2TTest()
{
XTensor * input;
XTensor * weight;
XTensor * output;
XTensor * gold;
XTensor * dedy;
XTensor * dedx;
XTensor * dedxTmp;
XTensor * dedw;
XTensor * padding;
DTYPE loss;
int * dimSize = new int[2];
dimSize[0] = 256;
dimSize[1] = 10001;
int * dimSize2 = new int[3];
dimSize2[0] = 2;
dimSize2[1] = 31;
dimSize2[2] = 256;
int * dimSize3 = new int[3];
dimSize3[0] = 2;
dimSize3[1] = 31;
dimSize3[2] = 10001;
int * dimSize4 = new int[2];
dimSize4[0] = 2;
dimSize4[1] = 31;
input = NewTensor(3, dimSize2, X_FLOAT, 1.0F, 0);
weight = NewTensor(2, dimSize, X_FLOAT, 1.0F, 0);
dedw = NewTensor(2, dimSize, X_FLOAT, 1.0F, 0);
gold = NewTensor(3, dimSize3, X_FLOAT, 1.0F, 0);
output = NewTensor(3, dimSize3, X_FLOAT, 1.0F, 0);
dedy = NewTensor(3, dimSize3, X_FLOAT, 1.0F, 0);
dedx = NewTensor(3, dimSize3, X_FLOAT, 1.0F, 0);
dedxTmp = NewTensor(3, dimSize3, X_FLOAT, 1.0F, 0);
padding = NewTensor(2, dimSize4, X_FLOAT, 1.0F, 0);
//weight = NewTensor(2, dimSize);
//dedw = NewTensor(2, dimSize);
//input = NewTensor(3, dimSize2);
//gold = NewTensor(3, dimSize3);
//output = NewTensor(3, dimSize3);
//dedy = NewTensor(3, dimSize3);
//dedx = NewTensor(3, dimSize3);
//dedxTmp = NewTensor(3, dimSize3);
//padding = NewTensor(2, dimSize4);
myRead(input, "x.txt", "x");
myRead(weight, "w.txt", "w");
myRead(gold, "gold.txt", "gold");
myRead(padding, "padding.txt", "padding");
XTensor inter;
inter = MMul(*input, *weight);
_Softmax(&inter, output, 2);
//_LogMe(output);
loss = _CrossEntropyFast(output, gold, REDUCE_MEAN, NULL, padding);
printf("loss: %f\n", loss);
_CrossEntropyBackward(dedy, output, gold, NULL);
//_CrossEntropyBackward(dedy, output, gold, NULL, padding);
myDump(dedy, "dedy.txt", "dedy");
_SoftmaxBackward(NULL, output, input, dedy, dedx, NULL, -1, NOLOSS);
_Sub(output, gold, dedxTmp);
myDump(dedx, "dedx.txt", "dedx");
dedx->Dump(stderr, "dedx", 200);
dedxTmp->Dump(stderr, "dedxTmp", 200);
input->Reshape(input->unitNum/input->GetDim(-1), input->GetDim(-1));
dedx->Reshape(dedx->unitNum/dedx->GetDim(-1), dedx->GetDim(-1));
_MatrixMulBatched(input, X_TRANS, dedx, X_NOTRANS, dedw);
myDump(dedw, "dedw.txt", "dedw");
}
void T2TTest2()
{
int dimSize[3];
dimSize[0] = 161;
dimSize[1] = 47;
dimSize[2] = 10001;
XTensor * probs = NewTensor(3, dimSize, X_FLOAT, 1.0F, 0);
//XTensor * probs = NewTensor(3, dimSize, X_FLOAT, 1.0F, -1);
//myRead(probs, "probs.txt", " ");
_SetDataFixedFloat(probs, 1.0F);
probs->Reshape(1, probs->unitNum);
DTYPE sum = _ReduceSumAll(probs);
printf("%e\n", sum);
//XTensor tmp;
//tmp = IsNonZero(*probs);
//DTYPE nonZeroNum = ReduceSumAll(tmp);
//printf("%f\n", nonZeroNum);
//
//DTYPE gpu = ReduceSum(*probs, 1).Get2D(0, 0);
//printf("%e\n", gpu);
}
/* NiuTrans.Tensor - an open-source tensor library
* Copyright (C) 2017, Natural Language Processing Lab, Northestern University.
* All rights reserved.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
/*
* $Created by: LI Yinqiao (email: li.yin.qiao.2012@hotmail.com) 2019-10-21
*/
#ifndef __XCALL_H__
#define __XCALL_H__
#include "XTensor.h"
namespace nts { // namespace nts(NiuTrans.Tensor)
/*
* we define the "new and delete" functions below
*/
/* initialize a XTensor V2 */
void InitTensorV2(XTensor * tensor,
const int myOrder, const int * myDimSize, const TENSOR_DATA_TYPE myDataType = X_FLOAT,
const float myDenseRatio = 1.0F, const int myDevID = -1, XMem * myMem = NULL);
/* initialize a dense XTensor */
void InitTensor(XTensor * tensor,
const int myOrder, const int * myDimSize, const TENSOR_DATA_TYPE myDataType = X_FLOAT,
const int myDevID = -1, const bool isEnableGrad = true);
/* initialize a dense vector V2 */
void InitTensor1DV2(XTensor * tensor, const int num,
const TENSOR_DATA_TYPE myDataType = X_FLOAT, const int myDevID = -1, XMem * myMem = NULL);
/* initialize a dense vector */
void InitTensor1D(XTensor * tensor, const int num,
const TENSOR_DATA_TYPE myDataType = X_FLOAT, const int myDevID = -1, const bool isEnableGrad = true);
/* initialize a dense matrix V2 */
void InitTensor2DV2(XTensor * tensor, const int rowNum, const int colNum,
const TENSOR_DATA_TYPE myDataType = X_FLOAT, const int myDevID = -1, XMem * myMem = NULL);
/* initialize a dense matrix */
void InitTensor2D(XTensor * tensor, const int rowNum, const int colNum,
const TENSOR_DATA_TYPE myDataType = X_FLOAT, const int myDevID = -1, const bool isEnableGrad = true);
/* initialize a dense 3d tensor V2 */
void InitTensor3DV2(XTensor * tensor, const int d0, const int d1, const int d2,
const TENSOR_DATA_TYPE myDataType = X_FLOAT, const int myDevID = -1, XMem * myMem = NULL);
/* initialize a dense 3d tensor */
void InitTensor3D(XTensor * tensor, const int d0, const int d1, const int d2,
const TENSOR_DATA_TYPE myDataType = X_FLOAT, const int myDevID = -1, const bool isEnableGrad = true);
/* initialize a dense 4d tensor V2 */
void InitTensor4DV2(XTensor * tensor, const int d0, const int d1, const int d2, const int d3,
const TENSOR_DATA_TYPE myDataType = X_FLOAT, const int myDevID = -1, XMem * myMem = NULL);
/* initialize a dense 4d tensor */
void InitTensor4D(XTensor * tensor, const int d0, const int d1, const int d2, const int d3,
const TENSOR_DATA_TYPE myDataType = X_FLOAT, const int myDevID = -1, const bool isEnableGrad = true);
/* initialize a dense 5d tensor V2 */
void InitTensor5DV2(XTensor * tensor, const int d0, const int d1, const int d2, const int d3, const int d4,
const TENSOR_DATA_TYPE myDataType = X_FLOAT, const int myDevID = -1, XMem * myMem = NULL);
/* initialize a dense 5d tensor */
void InitTensor5D(XTensor * tensor, const int d0, const int d1, const int d2, const int d3, const int d4,
const TENSOR_DATA_TYPE myDataType = X_FLOAT, const int myDevID = -1, const bool isEnableGrad = true);
/* initialize a tensor with a reference tensor V2 */
void InitTensorV2(XTensor * tensor, const XTensor * reference);
/* initialize a tensor with a reference tensor */
void InitTensor(XTensor * tensor, const XTensor * reference);
/* initialize a tensor on the CPU with a reference tensor */
void InitTensorOnCPU(XTensor * tensor, const XTensor * reference);
/* generate a XTensor with no initialization */
XTensor * NewTensor();
/* generate a XTensor V2 */
XTensor * NewTensorV2(const int myOrder, const int * myDimSize, const TENSOR_DATA_TYPE myDataType = X_FLOAT,
const float myDenseRatio = 1.0F, const int myDevID = -1, XMem * myMem = NULL);
/* generate a dense XTensor */
XTensor * NewTensor(const int myOrder, const int * myDimSize, const TENSOR_DATA_TYPE myDataType = X_FLOAT,
const int myDevID = -1, const bool isEnableGrad = true);
/* generate a XTensor which allocates data on the buffer V2 */
XTensor * NewTensorBufV2(const int myOrder, const int * myDimSize,
const TENSOR_DATA_TYPE myDataType = X_FLOAT, const float myDenseRatio = 1.0F,
const int myDevID = -1, XMem * myMem = NULL);
/* generate a dense XTensor which allocates data on the buffer */
XTensor * NewTensorBuf(const int myOrder, const int * myDimSize,
const TENSOR_DATA_TYPE myDataType = X_FLOAT, const int myDevID = -1, const bool isEnableGrad = true);
/* generate a XTensor which allocates data on the buffer V2 */
XTensor * NewTensorBufV2(const XTensor * reference, int devID, XMem * myMem);
/* generate a XTensor which allocates data on the buffer */
XTensor * NewTensorBuf(const XTensor * reference, int devID, const bool isEnableGrad = true);
/* generate a dense vector V2 */
XTensor * NewTensor1DV2(const int num, const TENSOR_DATA_TYPE myDataType = X_FLOAT, const int myDevID = -1,
XMem * myMem = NULL);
/* generate a dense vector */
XTensor * NewTensor1D(const int num, const TENSOR_DATA_TYPE myDataType = X_FLOAT, const int myDevID = -1, const bool isEnableGrad = true);
/* generate a dense matrix V2 */
XTensor * NewTensor2DV2(const int rowNum, const int colNum,
const TENSOR_DATA_TYPE myDataType = X_FLOAT,
const int myDevID = -1, XMem * myMem = NULL);
/* generate a dense matrix */
XTensor * NewTensor2D(const int rowNum, const int colNum,
const TENSOR_DATA_TYPE myDataType = X_FLOAT,
const int myDevID = -1, const bool isEnableGrad = true);
/* generate a dense 3d tensor V2 */
XTensor * NewTensor3DV2(const int d0, const int d1, const int d2,
const TENSOR_DATA_TYPE myDataType = X_FLOAT,
const int myDevID = -1, XMem * myMem = NULL);
/* generate a dense 3d tensor */
XTensor * NewTensor3D(const int d0, const int d1, const int d2,
const TENSOR_DATA_TYPE myDataType = X_FLOAT,
const int myDevID = -1, const bool isEnableGrad = true);
/* generate a dense 4d tensor V2 */
XTensor * NewTensor4DV2(const int d0, const int d1, const int d2, const int d3,
const TENSOR_DATA_TYPE myDataType = X_FLOAT,
const int myDevID = -1, XMem * myMem = NULL);
/* generate a dense 4d tensor */
XTensor * NewTensor4D(const int d0, const int d1, const int d2, const int d3,
const TENSOR_DATA_TYPE myDataType = X_FLOAT,
const int myDevID = -1, const bool isEnableGrad = true);
/* generate a dense 5d tensor V2 */
XTensor * NewTensor5DV2(const int d0, const int d1, const int d2, const int d3, const int d4,
const TENSOR_DATA_TYPE myDataType = X_FLOAT,
const int myDevID = -1, XMem * myMem = NULL);
/* generate a dense 5d tensor */
XTensor * NewTensor5D(const int d0, const int d1, const int d2, const int d3, const int d4,
const TENSOR_DATA_TYPE myDataType = X_FLOAT,
const int myDevID = -1, const bool isEnableGrad = true);
/* generate a dense vector by range */
XTensor * NewTensorRange(int lower, int upper, int step, const TENSOR_DATA_TYPE myDataType = X_INT, const int myDevID = -1, const bool isEnableGrad = true);
/* generate a copy of XTensor (with a reference to a given tensor) */
XTensor * NewTensor(const XTensor * a, bool isFilledData = true);
/* free the data space of a given tensor */
void DelTensor(XTensor * tensor);
/* free the data space of a given tensor (on the buffer) */
void DelTensorBuf(XTensor * tensor);
} // namespace nts(NiuTrans.Tensor)
#endif // __XCALL_H__
\ No newline at end of file
/* NiuTrans.Tensor - an open-source tensor library /* NiuTrans.Tensor - an open-source tensor library
* Copyright (C) 2017, Natural Language Processing Lab, Northestern University. * Copyright (C) 2017, Natural Language Processing Lab, Northestern University.
* All rights reserved. * All rights reserved.
* *
...@@ -50,14 +50,6 @@ int CONST_MINUSONE = -1; ...@@ -50,14 +50,6 @@ int CONST_MINUSONE = -1;
bool CONST_TRUE = true; bool CONST_TRUE = true;
int verboseLevel = 0; int verboseLevel = 0;
bool useBLAS = false;
#ifdef USE_CUDA
bool useCUDA = true;
#else
bool useCUDA = false;
#endif
FILE * tmpLog = NULL; FILE * tmpLog = NULL;
double myTime = 0; double myTime = 0;
......
...@@ -135,8 +135,6 @@ extern bool CONST_TRUE; ...@@ -135,8 +135,6 @@ extern bool CONST_TRUE;
#define NIUTRANSNNDEBUG #define NIUTRANSNNDEBUG
extern int verboseLevel; extern int verboseLevel;
extern bool useBLAS;
extern bool useCUDA;
#define FFLUSH(FILEH) \ #define FFLUSH(FILEH) \
{ \ { \
......
...@@ -1562,9 +1562,9 @@ void XMemManager::GetBufferSize(MTYPE freeMem, MTYPE * myBufSize) ...@@ -1562,9 +1562,9 @@ void XMemManager::GetBufferSize(MTYPE freeMem, MTYPE * myBufSize)
if (freeMem >= MILLION * 512){ if (freeMem >= MILLION * 512){
*myBufSize = MILLION * 128; *myBufSize = MILLION * 128;
if (freeMem >= MILLION * 1024) { if (freeMem >= MILLION * 1024) {
*myBufSize = MILLION * 256; *myBufSize = MILLION * 128;
if (freeMem >= MILLION * 2048) if (freeMem >= MILLION * 2048)
*myBufSize = MILLION * 512; *myBufSize = MILLION * 128;
} }
} }
} }
......
...@@ -86,11 +86,14 @@ ...@@ -86,11 +86,14 @@
#include "shape/Stack.h" #include "shape/Stack.h"
#include "shape/Transpose.h" #include "shape/Transpose.h"
#include "shape/Unsqueeze.h" #include "shape/Unsqueeze.h"
#include "shape/IsSameShaped.h"
#include "sort/Sort.h" #include "sort/Sort.h"
#include "sort/TopK.h" #include "sort/TopK.h"
#include "utilities/XMatrixSegment.h" #include "utilities/XMatrixSegment.h"
#include "utilities/FlushToMem.h" #include "utilities/FlushToMem.h"
#include "utilities/CheckData.h"
#include "utilities/SetAscendingOrder.h"
#endif // __CHEADER_H__ #endif // __CHEADER_H__
...@@ -22,6 +22,7 @@ ...@@ -22,6 +22,7 @@
#include "../../XTensor.h" #include "../../XTensor.h"
#include "../../XName.h" #include "../../XName.h"
#include "../../XUtility.h" #include "../../XUtility.h"
#include "../shape/IsSameShaped.h"
#include "Div.h" #include "Div.h"
#include "Div.cuh" #include "Div.cuh"
#include "DivDim.h" #include "DivDim.h"
...@@ -48,9 +49,6 @@ void _Div(const XTensor * a, const XTensor * b, XTensor * c, DTYPE alpha, int le ...@@ -48,9 +49,6 @@ void _Div(const XTensor * a, const XTensor * b, XTensor * c, DTYPE alpha, int le
"Unmatched tensors!"); "Unmatched tensors!");
CheckDev(a->devID, b->devID); CheckDev(a->devID, b->devID);
int leadingDimRDI = a->order - leadingDim - 1;
#ifdef USE_CUDA #ifdef USE_CUDA
if (a->devID >= 0 || b->devID >= 0 || c->devID >= 0) { if (a->devID >= 0 || b->devID >= 0 || c->devID >= 0) {
_CudaDiv(a, b, c, alpha, leadingDim); _CudaDiv(a, b, c, alpha, leadingDim);
...@@ -63,17 +61,17 @@ void _Div(const XTensor * a, const XTensor * b, XTensor * c, DTYPE alpha, int le ...@@ -63,17 +61,17 @@ void _Div(const XTensor * a, const XTensor * b, XTensor * c, DTYPE alpha, int le
int blockSizeB = 1; int blockSizeB = 1;
int blockSizeC = 1; int blockSizeC = 1;
int blockNum = 1; int blockNum = 1;
int dimensionSizeA = a->dimSizeRDI[leadingDimRDI]; int dimensionSizeA = a->dimSize[leadingDim];
int dimensionSizeB = b->dimSizeRDI[leadingDimRDI]; int dimensionSizeB = b->dimSize[leadingDim];
int dimensionSizeC = c->dimSizeRDI[leadingDimRDI]; int dimensionSizeC = c->dimSize[leadingDim];
for (int i = 0; i < a->order; i++) { for (int i = 0; i < a->order; i++) {
if (i != leadingDimRDI) { if (i != leadingDim) {
CheckNTErrors((a->dimSizeRDI[i] == b->dimSizeRDI[i] && a->dimSizeRDI[i] == c->dimSizeRDI[i]), CheckNTErrors((a->dimSize[i] == b->dimSize[i] && a->dimSize[i] == c->dimSize[i]),
"Unmatched tensors!"); "Unmatched tensors!");
} }
if (i < leadingDimRDI) if (i > leadingDim)
stride *= a->dimSizeRDI[i]; stride *= a->dimSize[i];
} }
blockSizeA = stride * dimensionSizeA; blockSizeA = stride * dimensionSizeA;
...@@ -168,7 +166,7 @@ int GetDivDimIndex(const XTensor &a, const XTensor &b) ...@@ -168,7 +166,7 @@ int GetDivDimIndex(const XTensor &a, const XTensor &b)
{ {
if(a.order < b.order) if(a.order < b.order)
return -1; return -1;
if(XTensor::IsSameShaped(&a, &b)) if(IsSameShaped(a, b))
return -1; return -1;
int hitCount = 0; int hitCount = 0;
...@@ -253,8 +251,8 @@ where i is the index of the item ...@@ -253,8 +251,8 @@ where i is the index of the item
*/ */
void Div(const XTensor &a, const XTensor &b, XTensor &c, DTYPE alpha, int leadingDim) void Div(const XTensor &a, const XTensor &b, XTensor &c, DTYPE alpha, int leadingDim)
{ {
if (!c.isInit || !XTensor::IsSameShaped(&a, &c)) { if (!c.isInit || !IsSameShaped(a, c)) {
InitTensor(&c, &a); InitTensorV2(&c, &a);
} }
int n = GetDivDimIndex(a, b); int n = GetDivDimIndex(a, b);
......
...@@ -122,7 +122,6 @@ where i is the item index ...@@ -122,7 +122,6 @@ where i is the item index
*/ */
void _CudaDiv(const XTensor * a, const XTensor * b, XTensor * c, DTYPE alpha, int leadingDim) void _CudaDiv(const XTensor * a, const XTensor * b, XTensor * c, DTYPE alpha, int leadingDim)
{ {
int leadingDimRDI = a->order - leadingDim - 1;
CheckNTErrors((a->unitNum <= c->unitNum && b->unitNum <= c->unitNum), CheckNTErrors((a->unitNum <= c->unitNum && b->unitNum <= c->unitNum),
"Unmatched tensors in multiplication!"); "Unmatched tensors in multiplication!");
CheckNTErrors((a->order == b->order && a->order == c->order), "Unmatched tensors!"); CheckNTErrors((a->order == b->order && a->order == c->order), "Unmatched tensors!");
...@@ -130,18 +129,18 @@ void _CudaDiv(const XTensor * a, const XTensor * b, XTensor * c, DTYPE alpha, in ...@@ -130,18 +129,18 @@ void _CudaDiv(const XTensor * a, const XTensor * b, XTensor * c, DTYPE alpha, in
int stride = 1; int stride = 1;
int blockSizeA = 1; int blockSizeA = 1;
int blockNum = 1; int blockNum = 1;
int dimensionSizeA = a->dimSizeRDI[leadingDimRDI]; int dimensionSizeA = a->dimSize[leadingDim];
int dimensionSizeB = b->dimSizeRDI[leadingDimRDI]; int dimensionSizeB = b->dimSize[leadingDim];
int dimensionSizeC = c->dimSizeRDI[leadingDimRDI]; int dimensionSizeC = c->dimSize[leadingDim];
for (int i = 0; i < a->order; i++) { for (int i = 0; i < a->order; i++) {
if (i != leadingDimRDI) { if (i != leadingDim) {
CheckNTErrors((a->dimSizeRDI[i] == b->dimSizeRDI[i] && CheckNTErrors((a->dimSize[i] == b->dimSize[i] &&
a->dimSizeRDI[i] == c->dimSizeRDI[i]), a->dimSize[i] == c->dimSize[i]),
"Unmatched tensors!"); "Unmatched tensors!");
} }
if (i < leadingDimRDI) if (i > leadingDim)
stride *= a->dimSizeRDI[i]; stride *= a->dimSize[i];
} }
blockSizeA = stride * dimensionSizeA; blockSizeA = stride * dimensionSizeA;
......
...@@ -26,6 +26,7 @@ ...@@ -26,6 +26,7 @@
#include "../../XName.h" #include "../../XName.h"
#include "../../XUtility.h" #include "../../XUtility.h"
#include "../movement/CopyValues.h" #include "../movement/CopyValues.h"
#include "../shape/IsSameShaped.h"
namespace nts { // namespace nts(NiuTrans.Tensor) namespace nts { // namespace nts(NiuTrans.Tensor)
...@@ -56,7 +57,7 @@ void _DivDim(const XTensor * a, const XTensor * b, XTensor * c, int n, DTYPE alp ...@@ -56,7 +57,7 @@ void _DivDim(const XTensor * a, const XTensor * b, XTensor * c, int n, DTYPE alp
CheckDev(a->devID, b->devID); CheckDev(a->devID, b->devID);
if(XTensor::IsSameShaped(a, b)){ if(_IsSameShaped(a, b)){
_Div(a, b, c, alpha); _Div(a, b, c, alpha);
return; return;
} }
...@@ -188,8 +189,8 @@ i.e., a is divided with b by broadcasting ...@@ -188,8 +189,8 @@ i.e., a is divided with b by broadcasting
*/ */
void DivDim(const XTensor &a, const XTensor &b, XTensor &c, int n, DTYPE alpha) void DivDim(const XTensor &a, const XTensor &b, XTensor &c, int n, DTYPE alpha)
{ {
if (!c.isInit || !XTensor::IsSameShaped(&a, &c)) { if (!c.isInit || !IsSameShaped(a, c)) {
InitTensor(&c, &a); InitTensorV2(&c, &a);
} }
/* call _Div function */ /* call _Div function */
......
...@@ -24,6 +24,7 @@ ...@@ -24,6 +24,7 @@
#include "../../XTensor.h" #include "../../XTensor.h"
#include "../../XName.h" #include "../../XName.h"
#include "../../XUtility.h" #include "../../XUtility.h"
#include "../shape/IsSameShaped.h"
#include "Mask.h" #include "Mask.h"
#include "Mask.cuh" #include "Mask.cuh"
...@@ -171,8 +172,8 @@ where i is the index of the element ...@@ -171,8 +172,8 @@ where i is the index of the element
*/ */
void Mask(const XTensor &a, const XTensor &mask, XTensor &c, DTYPE alpha) void Mask(const XTensor &a, const XTensor &mask, XTensor &c, DTYPE alpha)
{ {
if (!c.isInit || !XTensor::IsSameShaped(&a, &c)) { if (!c.isInit || !IsSameShaped(a, c)) {
InitTensor(&c, &a); InitTensorV2(&c, &a);
} }
/* call _Mask function */ /* call _Mask function */
......
...@@ -22,6 +22,7 @@ ...@@ -22,6 +22,7 @@
#include "../../XTensor.h" #include "../../XTensor.h"
#include "../../XDevice.h" #include "../../XDevice.h"
#include "../../XName.h" #include "../../XName.h"
#include "../shape/IsSameShaped.h"
#include "MatrixMulBatched.h" #include "MatrixMulBatched.h"
#include "XTensorBLAS.h" #include "XTensorBLAS.h"
#include "MatrixMul2D.h" #include "MatrixMul2D.h"
...@@ -94,27 +95,27 @@ void _MatrixMulBatchedGPU(const XTensor * a, MATRIX_TRANS_TYPE transposedA, ...@@ -94,27 +95,27 @@ void _MatrixMulBatchedGPU(const XTensor * a, MATRIX_TRANS_TYPE transposedA,
"Input tensor and output tensor must have same order!"); "Input tensor and output tensor must have same order!");
CheckNTErrors(a->devID >= 0 && b->devID >= 0 && c->devID >= 0, "The tensors must be on GPUs"); CheckNTErrors(a->devID >= 0 && b->devID >= 0 && c->devID >= 0, "The tensors must be on GPUs");
int an = transposedA == X_TRANS ? a->dimSizeRDI[0] : a->dimSizeRDI[1]; int an = transposedA == X_TRANS ? a->dimSize[a->order - 1] : a->dimSize[a->order - 2];
int am = transposedA == X_TRANS ? a->dimSizeRDI[1] : a->dimSizeRDI[0]; int am = transposedA == X_TRANS ? a->dimSize[a->order - 2] : a->dimSize[a->order - 1];
int bn = transposedB == X_TRANS ? b->dimSizeRDI[0] : b->dimSizeRDI[1]; int bn = transposedB == X_TRANS ? b->dimSize[b->order - 1] : b->dimSize[b->order - 2];
int bm = transposedB == X_TRANS ? b->dimSizeRDI[1] : b->dimSizeRDI[0]; int bm = transposedB == X_TRANS ? b->dimSize[b->order - 2] : b->dimSize[b->order - 1];
int cn = c->dimSizeRDI[1]; int cn = c->dimSize[c->order - 2];
int cm = c->dimSizeRDI[0]; int cm = c->dimSize[c->order - 1];
CheckNTErrors((am == bn && an == cn && bm == cm), "Unmatched tensors in multiplication!"); CheckNTErrors((am == bn && an == cn && bm == cm), "Unmatched tensors in multiplication!");
int aBlockSize = a->dimSizeRDI[0] * a->dimSizeRDI[1]; int aBlockSize = a->dimSize[a->order - 1] * a->dimSize[a->order - 2];
int bBlockSize = b->dimSizeRDI[0] * b->dimSizeRDI[1]; int bBlockSize = b->dimSize[b->order - 1] * b->dimSize[b->order - 2];
int cBlockSize = c->dimSizeRDI[0] * c->dimSizeRDI[1]; int cBlockSize = c->dimSize[c->order - 1] * c->dimSize[c->order - 2];
int aRealBlockSize = aBlockSize * a->unitSize; int aRealBlockSize = aBlockSize * a->unitSize;
int bRealBlockSize = bBlockSize * b->unitSize; int bRealBlockSize = bBlockSize * b->unitSize;
int cRealBlockSize = cBlockSize * c->unitSize; int cRealBlockSize = cBlockSize * c->unitSize;
int blockNum = 1; int blockNum = 1;
for (int i = 2; i < a->order; i++) { for (int i = 0; i < a->order - 2; i++) {
CheckNTErrors((a->dimSizeRDI[i] == c->dimSizeRDI[i]), "Incorrect tensor sizes!"); CheckNTErrors((a->dimSize[i] == c->dimSize[i]), "Incorrect tensor sizes!");
CheckNTErrors((b->dimSizeRDI[i] == c->dimSizeRDI[i]), "Incorrect tensor sizes!"); CheckNTErrors((b->dimSize[i] == c->dimSize[i]), "Incorrect tensor sizes!");
blockNum *= a->dimSizeRDI[i]; blockNum *= a->dimSize[i];
} }
int devIDBackup = 0; int devIDBackup = 0;
...@@ -125,9 +126,9 @@ void _MatrixMulBatchedGPU(const XTensor * a, MATRIX_TRANS_TYPE transposedA, ...@@ -125,9 +126,9 @@ void _MatrixMulBatchedGPU(const XTensor * a, MATRIX_TRANS_TYPE transposedA,
a->data, transposedA, a->dataType, aBlockSize, a->data, transposedA, a->dataType, aBlockSize,
b->data, transposedB, b->dataType, bBlockSize, b->data, transposedB, b->dataType, bBlockSize,
c->data, c->dataType, cBlockSize, blockNum, c->data, c->dataType, cBlockSize, blockNum,
a->dimSizeRDI[1], a->dimSizeRDI[0], a->dimSize[a->order - 2], a->dimSize[a->order - 1],
b->dimSizeRDI[1], b->dimSizeRDI[0], b->dimSize[b->order - 2], b->dimSize[b->order - 1],
c->dimSizeRDI[1], c->dimSizeRDI[0], alpha, beta); c->dimSize[c->order - 2], c->dimSize[c->order - 1], alpha, beta);
BacktoCudaDev(a->devID, devIDBackup); BacktoCudaDev(a->devID, devIDBackup);
#endif #endif
...@@ -163,36 +164,36 @@ void _MatrixMulBatchedCPU(const XTensor * a, MATRIX_TRANS_TYPE transposedA, ...@@ -163,36 +164,36 @@ void _MatrixMulBatchedCPU(const XTensor * a, MATRIX_TRANS_TYPE transposedA,
"Input tensor and output tensor must have same order!"); "Input tensor and output tensor must have same order!");
int an = transposedA == X_TRANS ? a->dimSizeRDI[0] : a->dimSizeRDI[1]; int an = transposedA == X_TRANS ? a->dimSize[a->order - 1] : a->dimSize[a->order - 2];
int am = transposedA == X_TRANS ? a->dimSizeRDI[1] : a->dimSizeRDI[0]; int am = transposedA == X_TRANS ? a->dimSize[a->order - 2] : a->dimSize[a->order - 1];
int bn = transposedB == X_TRANS ? b->dimSizeRDI[0] : b->dimSizeRDI[1]; int bn = transposedB == X_TRANS ? b->dimSize[b->order - 1] : b->dimSize[b->order - 2];
int bm = transposedB == X_TRANS ? b->dimSizeRDI[1] : b->dimSizeRDI[0]; int bm = transposedB == X_TRANS ? b->dimSize[b->order - 2] : b->dimSize[b->order - 1];
int cn = c->dimSizeRDI[1]; int cn = c->dimSize[c->order - 2];
int cm = c->dimSizeRDI[0]; int cm = c->dimSize[c->order - 1];
CheckNTErrors(am == bn && an == cn && bm == cm, "Unmatched tensors in multiplication!"); CheckNTErrors(am == bn && an == cn && bm == cm, "Unmatched tensors in multiplication!");
int aBlockSize = a->dimSizeRDI[0] * a->dimSizeRDI[1]; int aBlockSize = a->dimSize[a->order - 1] * a->dimSize[a->order - 2];
int bBlockSize = b->dimSizeRDI[0] * b->dimSizeRDI[1]; int bBlockSize = b->dimSize[b->order - 1] * b->dimSize[b->order - 2];
int cBlockSize = c->dimSizeRDI[0] * c->dimSizeRDI[1]; int cBlockSize = c->dimSize[c->order - 1] * c->dimSize[c->order - 2];
int aRealBlockSize = aBlockSize * a->unitSize; int aRealBlockSize = aBlockSize * a->unitSize;
int bRealBlockSize = bBlockSize * b->unitSize; int bRealBlockSize = bBlockSize * b->unitSize;
int cRealBlockSize = cBlockSize * c->unitSize; int cRealBlockSize = cBlockSize * c->unitSize;
int blockNum = 1; int blockNum = 1;
for (int i = 2; i < a->order; i++) { for (int i = 0; i < a->order - 2; i++) {
CheckNTErrors((a->dimSizeRDI[i] == c->dimSizeRDI[i]), "Incorrect tensor sizes!"); CheckNTErrors((a->dimSize[i] == c->dimSize[i]), "Incorrect tensor sizes!");
CheckNTErrors((b->dimSizeRDI[i] == c->dimSizeRDI[i]), "Incorrect tensor sizes!"); CheckNTErrors((b->dimSize[i] == c->dimSize[i]), "Incorrect tensor sizes!");
blockNum *= a->dimSizeRDI[i]; blockNum *= a->dimSize[i];
} }
int aDimSize[2] = {-a->dimSizeRDI[1], a->dimSizeRDI[0]}; int aDimSize[2] = {-a->dimSize[a->order - 2], a->dimSize[a->order - 1]};
int bDimSize[2] = {-b->dimSizeRDI[1], b->dimSizeRDI[0]}; int bDimSize[2] = {-b->dimSize[b->order - 2], b->dimSize[b->order - 1]};
int cDimSize[2] = {-c->dimSizeRDI[1], c->dimSizeRDI[0]}; int cDimSize[2] = {-c->dimSize[c->order - 2], c->dimSize[c->order - 1]};
XTensor * ai = NewTensor2D(aDimSize[0], aDimSize[1], a->dataType, a->devID, a->mem); XTensor * ai = NewTensor2DV2(aDimSize[0], aDimSize[1], a->dataType, a->devID, a->mem);
XTensor * bi = NewTensor2D(bDimSize[0], bDimSize[1], b->dataType, b->devID, b->mem); XTensor * bi = NewTensor2DV2(bDimSize[0], bDimSize[1], b->dataType, b->devID, b->mem);
XTensor * ci = NewTensor2D(cDimSize[0], cDimSize[1], c->dataType, c->devID, c->mem); XTensor * ci = NewTensor2DV2(cDimSize[0], cDimSize[1], c->dataType, c->devID, c->mem);
for (int i = 0; i < blockNum; i++) { for (int i = 0; i < blockNum; i++) {
ai->data = (char*)a->data + i * aRealBlockSize; ai->data = (char*)a->data + i * aRealBlockSize;
...@@ -242,9 +243,9 @@ void _MatrixMulBatchedCPU(const TensorList * a, MATRIX_TRANS_TYPE transposedA, ...@@ -242,9 +243,9 @@ void _MatrixMulBatchedCPU(const TensorList * a, MATRIX_TRANS_TYPE transposedA,
XTensor * ai = (XTensor*)a->GetItem(i); XTensor * ai = (XTensor*)a->GetItem(i);
XTensor * bi = (XTensor*)b->GetItem(i); XTensor * bi = (XTensor*)b->GetItem(i);
XTensor * ci = (XTensor*)c->GetItem(i); XTensor * ci = (XTensor*)c->GetItem(i);
if (!XTensor::IsSameShaped(aim, ai) || if (!_IsSameShaped(aim, ai) ||
!XTensor::IsSameShaped(bim, bi) || !_IsSameShaped(bim, bi) ||
!XTensor::IsSameShaped(cim, ci)) !_IsSameShaped(cim, ci))
{ {
isUniform = false; isUniform = false;
break; break;
...@@ -291,10 +292,10 @@ XTensor MatrixMulBatched(const XTensor &a, MATRIX_TRANS_TYPE transposedA, const ...@@ -291,10 +292,10 @@ XTensor MatrixMulBatched(const XTensor &a, MATRIX_TRANS_TYPE transposedA, const
CheckNTErrors(a.order >= 2 && b.order >= 2, "Input tensors must have a order >= 2!"); CheckNTErrors(a.order >= 2 && b.order >= 2, "Input tensors must have a order >= 2!");
CheckNTErrors(a.order == b.order, "Input tensor and output tensor must have same order!"); CheckNTErrors(a.order == b.order, "Input tensor and output tensor must have same order!");
int an = transposedA == X_TRANS ? a.dimSizeRDI[0] : a.dimSizeRDI[1]; int an = transposedA == X_TRANS ? a.dimSize[a.order - 1] : a.dimSize[a.order - 2];
int am = transposedA == X_TRANS ? a.dimSizeRDI[1] : a.dimSizeRDI[0]; int am = transposedA == X_TRANS ? a.dimSize[a.order - 2] : a.dimSize[a.order - 1];
int bn = transposedB == X_TRANS ? b.dimSizeRDI[0] : b.dimSizeRDI[1]; int bn = transposedB == X_TRANS ? b.dimSize[b.order - 1] : b.dimSize[b.order - 2];
int bm = transposedB == X_TRANS ? b.dimSizeRDI[1] : b.dimSizeRDI[0]; int bm = transposedB == X_TRANS ? b.dimSize[b.order - 2] : b.dimSize[b.order - 1];
CheckNTErrors(am == bn, "Unmatched tensors in multiplication!"); CheckNTErrors(am == bn, "Unmatched tensors in multiplication!");
...@@ -349,10 +350,10 @@ XTensor MatrixMulBatched(const XTensor &a, const XTensor &b, ...@@ -349,10 +350,10 @@ XTensor MatrixMulBatched(const XTensor &a, const XTensor &b,
CheckNTErrors(a.order >= 2 && b.order >= 2, "Input tensors must have a order >= 2!"); CheckNTErrors(a.order >= 2 && b.order >= 2, "Input tensors must have a order >= 2!");
CheckNTErrors(a.order == b.order, "Input tensor and output tensor must have same order!"); CheckNTErrors(a.order == b.order, "Input tensor and output tensor must have same order!");
int an = a.dimSizeRDI[1]; int an = a.dimSize[a.order - 2];
int am = a.dimSizeRDI[0]; int am = a.dimSize[a.order - 1];
int bn = b.dimSizeRDI[1]; int bn = b.dimSize[b.order - 2];
int bm = b.dimSizeRDI[0]; int bm = b.dimSize[b.order - 1];
CheckNTErrors(am == bn, "Unmatched tensors in multiplication!"); CheckNTErrors(am == bn, "Unmatched tensors in multiplication!");
......
...@@ -37,7 +37,7 @@ int GetSumIndex(const XTensor &a, const XTensor &b) ...@@ -37,7 +37,7 @@ int GetSumIndex(const XTensor &a, const XTensor &b)
{ {
if (a.order < b.order) if (a.order < b.order)
return -1; return -1;
if (XTensor::IsSameShaped(&a, &b)) if (IsSameShaped(a, b))
return -1; return -1;
int hitCount = 0; int hitCount = 0;
...@@ -71,26 +71,27 @@ XTensor MulAndShift(const XTensor &x, const XTensor &w, const XTensor &b, ...@@ -71,26 +71,27 @@ XTensor MulAndShift(const XTensor &x, const XTensor &w, const XTensor &b,
CheckNTErrors(x.dataType == w.dataType, "Input tensors should have the same data type!"); CheckNTErrors(x.dataType == w.dataType, "Input tensors should have the same data type!");
CheckNTErrors(x.order >= 2 && w.order >= 2, "Input tensors must have a order >= 2!"); CheckNTErrors(x.order >= 2 && w.order >= 2, "Input tensors must have a order >= 2!");
int xn = x.dimSizeRDI[1]; int xn = x.dimSize[x.order - 2];
int xm = x.dimSizeRDI[0]; int xm = x.dimSize[x.order - 1];
int wn = w.dimSizeRDI[1]; int wn = w.dimSize[w.order - 2];
int wm = w.dimSizeRDI[0]; int wm = w.dimSize[w.order - 1];
CheckNTErrors(xm == wn, "Unmatched tensors in multiplication!"); CheckNTErrors(xm == wn, "Unmatched tensors in multiplication!");
int order = x.order + w.order - 2; int order = x.order + w.order - 2;
int sub = 0; int sub = 0;
int * dimSize = new int[order]; int * dimSize = new int[order];
for (int i = 2; i < x.order; i++) for (int i = 0; i < x.order - 2; i++)
dimSize[sub++] = x.dimSizeRDI[x.order + 1 - i]; dimSize[sub++] = x.dimSize[i];
for (int i = 2; i < w.order; i++) for (int i = 0; i < w.order - 2; i++)
dimSize[sub++] = w.dimSizeRDI[w.order + 1 - i]; dimSize[sub++] = w.dimSize[i];
dimSize[sub++] = xn; dimSize[sub++] = xn;
dimSize[sub++] = wm; dimSize[sub++] = wm;
float dr = (!x.isSparse || !w.isSparse) ? 1.0F : MAX(x.denseRatio, w.denseRatio); float dr = (!x.isSparse || !w.isSparse) ? 1.0F : MAX(x.denseRatio, w.denseRatio);
XTensor * tmp = NewTensorBuf(order, dimSize, x.dataType, dr, x.devID, x.mem); XTensor * tmp = NewTensorBufV2(order, dimSize, x.dataType, dr, x.devID, x.mem);
/* call _MatrixMul function */ /* call _MatrixMul function */
_MatrixMul(&x, X_NOTRANS, &w, X_NOTRANS, tmp, alpha, 0, parallelRunner); _MatrixMul(&x, X_NOTRANS, &w, X_NOTRANS, tmp, alpha, 0, parallelRunner);
...@@ -148,24 +149,24 @@ XTensor MulAndShift(const XTensor& x, MATRIX_TRANS_TYPE transposedA, ...@@ -148,24 +149,24 @@ XTensor MulAndShift(const XTensor& x, MATRIX_TRANS_TYPE transposedA,
CheckNTErrors(x.dataType == w.dataType, "Input tensors should have the same data type!"); CheckNTErrors(x.dataType == w.dataType, "Input tensors should have the same data type!");
CheckNTErrors(x.order >= 2 && w.order >= 2, "Input tensors must have a order >= 2!"); CheckNTErrors(x.order >= 2 && w.order >= 2, "Input tensors must have a order >= 2!");
int xn = transposedA == X_TRANS ? x.dimSizeRDI[0] : x.dimSizeRDI[1]; int xn = transposedA == X_TRANS ? x.dimSize[x.order - 1] : x.dimSize[x.order - 2];
int xm = transposedA == X_TRANS ? x.dimSizeRDI[1] : x.dimSizeRDI[0]; int xm = transposedA == X_TRANS ? x.dimSize[x.order - 2] : x.dimSize[x.order - 1];
int wn = transposedB == X_TRANS ? w.dimSizeRDI[0] : w.dimSizeRDI[1]; int wn = transposedB == X_TRANS ? w.dimSize[w.order - 1] : w.dimSize[w.order - 2];
int wm = transposedB == X_TRANS ? w.dimSizeRDI[1] : w.dimSizeRDI[0]; int wm = transposedB == X_TRANS ? w.dimSize[w.order - 2] : w.dimSize[w.order - 1];
int order = x.order + w.order - 2; int order = x.order + w.order - 2;
int sub = 0; int sub = 0;
int * dimSize = new int[order]; int * dimSize = new int[order];
for (int i = 2; i < x.order; i++) for (int i = 0; i < x.order - 2; i++)
dimSize[sub++] = x.dimSizeRDI[x.order + 1 - i]; dimSize[sub++] = x.dimSize[i];
for (int i = 2; i < w.order; i++) for (int i = 0; i < w.order - 2; i++)
dimSize[sub++] = w.dimSizeRDI[w.order + 1 - i]; dimSize[sub++] = w.dimSize[i];
dimSize[sub++] = xn; dimSize[sub++] = xn;
dimSize[sub++] = wm; dimSize[sub++] = wm;
float dr = (!x.isSparse || !w.isSparse) ? 1.0F : MAX(x.denseRatio, w.denseRatio); float dr = (!x.isSparse || !w.isSparse) ? 1.0F : MAX(x.denseRatio, w.denseRatio);
XTensor * tmp = NewTensorBuf(order, dimSize, x.dataType, dr, x.devID, x.mem); XTensor * tmp = NewTensorBufV2(order, dimSize, x.dataType, dr, x.devID, x.mem);
/* call _MatrixMul function */ /* call _MatrixMul function */
_MatrixMul(&x, transposedA, &w, transposedB, tmp, alpha, 0, parallelRunner); _MatrixMul(&x, transposedA, &w, transposedB, tmp, alpha, 0, parallelRunner);
......
...@@ -22,6 +22,7 @@ ...@@ -22,6 +22,7 @@
#include "../../XTensor.h" #include "../../XTensor.h"
#include "../../XName.h" #include "../../XName.h"
#include "../../XUtility.h" #include "../../XUtility.h"
#include "../shape/IsSameShaped.h"
#include "Multiply.h" #include "Multiply.h"
#include "Multiply.cuh" #include "Multiply.cuh"
#include "MultiplyDim.h" #include "MultiplyDim.h"
...@@ -48,9 +49,6 @@ void _Multiply(const XTensor * a, const XTensor * b, XTensor * c, DTYPE alpha, i ...@@ -48,9 +49,6 @@ void _Multiply(const XTensor * a, const XTensor * b, XTensor * c, DTYPE alpha, i
"Unmatched tensors!"); "Unmatched tensors!");
CheckDev(a->devID, b->devID); CheckDev(a->devID, b->devID);
int leadingDimRDI = a->order - leadingDim - 1;
#ifdef USE_CUDA #ifdef USE_CUDA
if (a->devID >= 0 || b->devID >= 0 || c->devID >= 0) { if (a->devID >= 0 || b->devID >= 0 || c->devID >= 0) {
_CudaMultiply(a, b, c, alpha, leadingDim); _CudaMultiply(a, b, c, alpha, leadingDim);
...@@ -63,18 +61,18 @@ void _Multiply(const XTensor * a, const XTensor * b, XTensor * c, DTYPE alpha, i ...@@ -63,18 +61,18 @@ void _Multiply(const XTensor * a, const XTensor * b, XTensor * c, DTYPE alpha, i
int blockSizeB = 1; int blockSizeB = 1;
int blockSizeC = 1; int blockSizeC = 1;
int blockNum = 1; int blockNum = 1;
int dimensionSizeA = a->dimSizeRDI[leadingDimRDI]; int dimensionSizeA = a->dimSize[leadingDim];
int dimensionSizeB = b->dimSizeRDI[leadingDimRDI]; int dimensionSizeB = b->dimSize[leadingDim];
int dimensionSizeC = c->dimSizeRDI[leadingDimRDI]; int dimensionSizeC = c->dimSize[leadingDim];
for (int i = 0; i < a->order; i++) { for (int i = 0; i < a->order; i++) {
if (i != leadingDimRDI) { if (i != leadingDim) {
CheckNTErrors((a->dimSizeRDI[i] == b->dimSizeRDI[i] && CheckNTErrors((a->dimSize[i] == b->dimSize[i] &&
a->dimSizeRDI[i] == c->dimSizeRDI[i]), a->dimSize[i] == c->dimSize[i]),
"Unmatched tensors!"); "Unmatched tensors!");
} }
if (i < leadingDimRDI) if (i > leadingDim)
stride *= a->dimSizeRDI[i]; stride *= a->dimSize[i];
} }
blockSizeA = stride * dimensionSizeA; blockSizeA = stride * dimensionSizeA;
...@@ -169,7 +167,7 @@ int GetMultiplyDimIndex(const XTensor &a, const XTensor &b) ...@@ -169,7 +167,7 @@ int GetMultiplyDimIndex(const XTensor &a, const XTensor &b)
{ {
if(a.order < b.order) if(a.order < b.order)
return -1; return -1;
if(XTensor::IsSameShaped(&a, &b)) if(IsSameShaped(a, b))
return -1; return -1;
int hitCount = 0; int hitCount = 0;
...@@ -254,8 +252,8 @@ where i is the index of the item ...@@ -254,8 +252,8 @@ where i is the index of the item
*/ */
void Multiply(const XTensor &a, const XTensor &b, XTensor &c, DTYPE alpha, int leadingDim) void Multiply(const XTensor &a, const XTensor &b, XTensor &c, DTYPE alpha, int leadingDim)
{ {
if (!c.isInit || !XTensor::IsSameShaped(&a, &c)) { if (!c.isInit || !IsSameShaped(a, c)) {
InitTensor(&c, &a); InitTensorV2(&c, &a);
} }
int n = GetMultiplyDimIndex(a, b); int n = GetMultiplyDimIndex(a, b);
......
...@@ -122,26 +122,25 @@ where i is the item index ...@@ -122,26 +122,25 @@ where i is the item index
*/ */
void _CudaMultiply(const XTensor * a, const XTensor * b, XTensor * c, DTYPE alpha, int leadingDim) void _CudaMultiply(const XTensor * a, const XTensor * b, XTensor * c, DTYPE alpha, int leadingDim)
{ {
int leadingDimRDI = a->order - leadingDim - 1; CheckNTErrors((a->unitNum <= c->unitNum && b->unitNum <= c->unitNum),
CheckNTErrors(a->unitNum <= c->unitNum && b->unitNum <= c->unitNum,
"Unmatched tensors in multiplication!"); "Unmatched tensors in multiplication!");
CheckNTErrors(a->order == b->order && a->order == c->order, "Unmatched tensors!"); CheckNTErrors((a->order == b->order && a->order == c->order), "Unmatched tensors!");
int stride = 1; int stride = 1;
int blockSizeA = 1; int blockSizeA = 1;
int blockNum = 1; int blockNum = 1;
int dimensionSizeA = a->dimSizeRDI[leadingDimRDI]; int dimensionSizeA = a->dimSize[leadingDim];
int dimensionSizeB = b->dimSizeRDI[leadingDimRDI]; int dimensionSizeB = b->dimSize[leadingDim];
int dimensionSizeC = c->dimSizeRDI[leadingDimRDI]; int dimensionSizeC = c->dimSize[leadingDim];
for (int i = 0; i < a->order; i++) { for (int i = 0; i < a->order; i++) {
if (i != leadingDimRDI) { if (i != leadingDim) {
CheckNTErrors((a->dimSizeRDI[i] == b->dimSizeRDI[i] && CheckNTErrors((a->dimSize[i] == b->dimSize[i] &&
a->dimSizeRDI[i] == c->dimSizeRDI[i]), a->dimSize[i] == c->dimSize[i]),
"Unmatched tensors!"); "Unmatched tensors!");
} }
if (i < leadingDimRDI) if (i > leadingDim)
stride *= a->dimSizeRDI[i]; stride *= a->dimSize[i];
} }
blockSizeA = stride * dimensionSizeA; blockSizeA = stride * dimensionSizeA;
......
...@@ -24,6 +24,7 @@ ...@@ -24,6 +24,7 @@
#include "MultiplyDim.h" #include "MultiplyDim.h"
#include "MultiplyDim.cuh" #include "MultiplyDim.cuh"
#include "../shape/Unsqueeze.h" #include "../shape/Unsqueeze.h"
#include "../shape/IsSameShaped.h"
#include "../../XName.h" #include "../../XName.h"
#include "../../XUtility.h" #include "../../XUtility.h"
#include "../movement/CopyValues.h" #include "../movement/CopyValues.h"
...@@ -57,7 +58,7 @@ void _MultiplyDim(const XTensor * a, const XTensor * b, XTensor * c, int n, DTYP ...@@ -57,7 +58,7 @@ void _MultiplyDim(const XTensor * a, const XTensor * b, XTensor * c, int n, DTYP
CheckDev(a->devID, b->devID); CheckDev(a->devID, b->devID);
if(XTensor::IsSameShaped(a, b)){ if(_IsSameShaped(a, b)){
_Multiply(a, b, c, alpha); _Multiply(a, b, c, alpha);
return; return;
} }
...@@ -203,8 +204,8 @@ i.e., a is multiplied with b by broadcasting ...@@ -203,8 +204,8 @@ i.e., a is multiplied with b by broadcasting
*/ */
void MultiplyDim(const XTensor &a, const XTensor &b, XTensor &c, int n) void MultiplyDim(const XTensor &a, const XTensor &b, XTensor &c, int n)
{ {
if (!c.isInit || !XTensor::IsSameShaped(&a, &c)) { if (!c.isInit || !IsSameShaped(a, c)) {
InitTensor(&c, &a); InitTensorV2(&c, &a);
} }
/* call _Multiply function */ /* call _Multiply function */
...@@ -280,8 +281,8 @@ void _MultiplyBroadcast(const XTensor * a, const XTensor * b, XTensor * c, DTYPE ...@@ -280,8 +281,8 @@ void _MultiplyBroadcast(const XTensor * a, const XTensor * b, XTensor * c, DTYPE
dimsS[0] = -dimsS[0]; dimsS[0] = -dimsS[0];
dimsT[0] = -dimsT[0]; dimsT[0] = -dimsT[0];
XTensor * s = NewTensor(order - (j - i), dimsS, a->dataType, a->denseRatio, a->devID, a->mem); XTensor * s = NewTensorV2(order - (j - i), dimsS, a->dataType, a->denseRatio, a->devID, a->mem);
XTensor * t = NewTensor(order - (j - i) + 1, dimsT, b->dataType, b->denseRatio, b->devID, b->mem); XTensor * t = NewTensorV2(order - (j - i) + 1, dimsT, b->dataType, b->denseRatio, b->devID, b->mem);
if(count == 0) if(count == 0)
source = b->data; source = b->data;
...@@ -371,8 +372,8 @@ where some of dimensions of b can be of size 1 ...@@ -371,8 +372,8 @@ where some of dimensions of b can be of size 1
*/ */
void MultiplyBroadcast(const XTensor &a, const XTensor &b, XTensor &c) void MultiplyBroadcast(const XTensor &a, const XTensor &b, XTensor &c)
{ {
if (!c.isInit || !XTensor::IsSameShaped(&a, &c)) { if (!c.isInit || !IsSameShaped(a, c)) {
InitTensor(&c, &a); InitTensorV2(&c, &a);
} }
/* call _SumBroadcast function */ /* call _SumBroadcast function */
......
...@@ -22,6 +22,7 @@ ...@@ -22,6 +22,7 @@
#include "../../XTensor.h" #include "../../XTensor.h"
#include "../../XName.h" #include "../../XName.h"
#include "../../XUtility.h" #include "../../XUtility.h"
#include "../shape/IsSameShaped.h"
#include "Sub.h" #include "Sub.h"
#include "Sub.cuh" #include "Sub.cuh"
#include "SubDim.h" #include "SubDim.h"
...@@ -149,7 +150,7 @@ int GetSubDimIndex(const XTensor &a, const XTensor &b) ...@@ -149,7 +150,7 @@ int GetSubDimIndex(const XTensor &a, const XTensor &b)
{ {
if(a.order < b.order) if(a.order < b.order)
return -1; return -1;
if(XTensor::IsSameShaped(&a, &b)) if(IsSameShaped(a, b))
return -1; return -1;
int hitCount = 0; int hitCount = 0;
...@@ -223,8 +224,8 @@ tensor subtraction c = a - b * \beta ...@@ -223,8 +224,8 @@ tensor subtraction c = a - b * \beta
*/ */
void Sub(const XTensor &a, const XTensor &b, XTensor &c, DTYPE beta) void Sub(const XTensor &a, const XTensor &b, XTensor &c, DTYPE beta)
{ {
if (!c.isInit || !XTensor::IsSameShaped(&a, &c)) { if (!c.isInit || !IsSameShaped(a, c)) {
InitTensor(&c, &a); InitTensorV2(&c, &a);
} }
int n = GetSubDimIndex(a, b); int n = GetSubDimIndex(a, b);
......
...@@ -26,6 +26,7 @@ ...@@ -26,6 +26,7 @@
#include "../../XName.h" #include "../../XName.h"
#include "../../XUtility.h" #include "../../XUtility.h"
#include "../movement/CopyValues.h" #include "../movement/CopyValues.h"
#include "../shape/IsSameShaped.h"
namespace nts { // namespace nts(NiuTrans.Tensor) namespace nts { // namespace nts(NiuTrans.Tensor)
...@@ -61,7 +62,7 @@ void _SubDim(const XTensor * a, const XTensor * b, XTensor * c, int n, DTYPE bet ...@@ -61,7 +62,7 @@ void _SubDim(const XTensor * a, const XTensor * b, XTensor * c, int n, DTYPE bet
return; return;
} }
if (XTensor::IsSameShaped(a, b)) { if (_IsSameShaped(a, b)) {
_Sub(a, b, c, beta); _Sub(a, b, c, beta);
return; return;
} }
...@@ -188,8 +189,8 @@ i.e., a is subtracted with b by broadcasting ...@@ -188,8 +189,8 @@ i.e., a is subtracted with b by broadcasting
*/ */
void SubDim(const XTensor &a, const XTensor &b, XTensor &c, int n, DTYPE beta) void SubDim(const XTensor &a, const XTensor &b, XTensor &c, int n, DTYPE beta)
{ {
if (!c.isInit || !XTensor::IsSameShaped(&a, &c)) { if (!c.isInit || !IsSameShaped(a, c)) {
InitTensor(&c, &a); InitTensorV2(&c, &a);
} }
/* call _Sub function */ /* call _Sub function */
......
...@@ -24,6 +24,7 @@ ...@@ -24,6 +24,7 @@
#include "../../XUtility.h" #include "../../XUtility.h"
#include "../../XBLAS.h" #include "../../XBLAS.h"
#include "../movement/CopyValues.h" #include "../movement/CopyValues.h"
#include "../shape/IsSameShaped.h"
#include "Sum.h" #include "Sum.h"
#include "Sum.cuh" #include "Sum.cuh"
#include "SumDim.h" #include "SumDim.h"
...@@ -183,7 +184,7 @@ int GetSumDimIndex(const XTensor &a, const XTensor &b) ...@@ -183,7 +184,7 @@ int GetSumDimIndex(const XTensor &a, const XTensor &b)
{ {
if(a.order < b.order) if(a.order < b.order)
return -1; return -1;
if(XTensor::IsSameShaped(&a, &b)) if(IsSameShaped(a, b))
return -1; return -1;
int hitCount = 0; int hitCount = 0;
...@@ -256,8 +257,8 @@ tensor summation c = a + b * \beta ...@@ -256,8 +257,8 @@ tensor summation c = a + b * \beta
*/ */
void Sum(const XTensor &a, const XTensor &b, XTensor &c, DTYPE beta) void Sum(const XTensor &a, const XTensor &b, XTensor &c, DTYPE beta)
{ {
if (!c.isInit || !XTensor::IsSameShaped(&a, &c)) { if (!c.isInit || !IsSameShaped(a, c)) {
InitTensor(&c, &a); InitTensorV2(&c, &a);
} }
int n = GetSumDimIndex(a, b); int n = GetSumDimIndex(a, b);
......
...@@ -26,6 +26,7 @@ ...@@ -26,6 +26,7 @@
#include "SumDim.h" #include "SumDim.h"
#include "SumDim.cuh" #include "SumDim.cuh"
#include "../shape/Unsqueeze.h" #include "../shape/Unsqueeze.h"
#include "../shape/IsSameShaped.h"
#include "../../XName.h" #include "../../XName.h"
#include "../../XUtility.h" #include "../../XUtility.h"
#include "../movement/CopyValues.h" #include "../movement/CopyValues.h"
...@@ -64,25 +65,11 @@ void _SumDim(const XTensor * a, const XTensor * b, XTensor * c, int n, DTYPE bet ...@@ -64,25 +65,11 @@ void _SumDim(const XTensor * a, const XTensor * b, XTensor * c, int n, DTYPE bet
return; return;
} }
if(XTensor::IsSameShaped(a, b)){ if(_IsSameShaped(a, b)){
_Sum(a, b, c, beta); _Sum(a, b, c, beta);
return; return;
} }
/*int dims[MAX_TENSOR_DIM_NUM];
for(int i = 0; i < a->order; i++)
dims[i] = 1;
dims[n] = a->GetDim(n);
XTensor * b2 = NewTensor(a->order, dims, b->dataType, b->denseRatio, b->devID, b->mem);
_CopyValues(b, b2);
_SumBroadcast(a, b2, c, beta);
DelTensor(b2);
return;*/
if(a->devID >= 0 || b->devID >= 0 || c->devID >= 0){ if(a->devID >= 0 || b->devID >= 0 || c->devID >= 0){
#ifdef USE_CUDA #ifdef USE_CUDA
_CudaSumDim(a, b, c, n, beta); _CudaSumDim(a, b, c, n, beta);
...@@ -205,8 +192,8 @@ i.e., a is summed with b by broadcasting ...@@ -205,8 +192,8 @@ i.e., a is summed with b by broadcasting
*/ */
void SumDim(const XTensor &a, const XTensor &b, XTensor &c, int n, DTYPE beta) void SumDim(const XTensor &a, const XTensor &b, XTensor &c, int n, DTYPE beta)
{ {
if (!c.isInit || !XTensor::IsSameShaped(&a, &c)) { if (!c.isInit || !IsSameShaped(a, c)) {
InitTensor(&c, &a); InitTensorV2(&c, &a);
} }
/* call _SumDim function */ /* call _SumDim function */
...@@ -281,8 +268,8 @@ void _SumBroadcast(const XTensor * a, const XTensor * b, XTensor * c, DTYPE beta ...@@ -281,8 +268,8 @@ void _SumBroadcast(const XTensor * a, const XTensor * b, XTensor * c, DTYPE beta
dimsS[0] = -dimsS[0]; dimsS[0] = -dimsS[0];
dimsT[0] = -dimsT[0]; dimsT[0] = -dimsT[0];
XTensor * s = NewTensor(order - (j - i), dimsS, a->dataType, a->denseRatio, a->devID, a->mem); XTensor * s = NewTensorV2(order - (j - i), dimsS, a->dataType, a->denseRatio, a->devID, a->mem);
XTensor * t = NewTensor(order - (j - i) + 1, dimsT, b->dataType, b->denseRatio, b->devID, b->mem); XTensor * t = NewTensorV2(order - (j - i) + 1, dimsT, b->dataType, b->denseRatio, b->devID, b->mem);
if(count == 0) if(count == 0)
source = b->data; source = b->data;
...@@ -374,8 +361,8 @@ c = a + b * \beta ...@@ -374,8 +361,8 @@ c = a + b * \beta
*/ */
void SumBroadcast(const XTensor &a, const XTensor &b, XTensor &c, DTYPE beta) void SumBroadcast(const XTensor &a, const XTensor &b, XTensor &c, DTYPE beta)
{ {
if (!c.isInit || !XTensor::IsSameShaped(&a, &c)) { if (!c.isInit || !IsSameShaped(a, c)) {
InitTensor(&c, &a); InitTensorV2(&c, &a);
} }
/* call _SumBroadcast function */ /* call _SumBroadcast function */
......
...@@ -22,6 +22,7 @@ ...@@ -22,6 +22,7 @@
#include "../../XUtility.h" #include "../../XUtility.h"
#include "../../XDevice.h" #include "../../XDevice.h"
#include "../../XTensor.h" #include "../../XTensor.h"
#include "../shape/IsSameShaped.h"
#include "XTensorBLAS.h" #include "XTensorBLAS.h"
namespace nts { // namespace nts(NiuTrans.Tensor) namespace nts { // namespace nts(NiuTrans.Tensor)
...@@ -224,9 +225,9 @@ void _CudaBLASMatrixMULList(cublasHandle_t * handle, ...@@ -224,9 +225,9 @@ void _CudaBLASMatrixMULList(cublasHandle_t * handle,
XTensor * ai = (XTensor*)a->GetItem(i); XTensor * ai = (XTensor*)a->GetItem(i);
XTensor * bi = (XTensor*)b->GetItem(i); XTensor * bi = (XTensor*)b->GetItem(i);
XTensor * ci = (XTensor*)c->GetItem(i); XTensor * ci = (XTensor*)c->GetItem(i);
if (!XTensor::IsSameShaped(aim, ai) || if (!_IsSameShaped(aim, ai) ||
!XTensor::IsSameShaped(bim, bi) || !_IsSameShaped(bim, bi) ||
!XTensor::IsSameShaped(cim, ci)) !_IsSameShaped(cim, ci))
{ {
isUniform = false; isUniform = false;
break; break;
......
...@@ -131,7 +131,7 @@ void ConvertDataType(const XTensor & input, XTensor & output, TENSOR_DATA_TYPE d ...@@ -131,7 +131,7 @@ void ConvertDataType(const XTensor & input, XTensor & output, TENSOR_DATA_TYPE d
{ {
if (!output.isInit || input.dataType != output.dataType) { if (!output.isInit || input.dataType != output.dataType) {
float dr = (!input.isSparse) ? 1.0F : input.denseRatio; float dr = (!input.isSparse) ? 1.0F : input.denseRatio;
InitTensor(&output, input.order, input.dimSize, dataType, dr, input.devID, input.mem); InitTensorV2(&output, input.order, input.dimSize, dataType, dr, input.devID, input.mem);
} }
_ConvertDataType(&input, &output); _ConvertDataType(&input, &output);
......
...@@ -85,7 +85,7 @@ XTensor OnehotToIndex(const XTensor & onehot, int size) ...@@ -85,7 +85,7 @@ XTensor OnehotToIndex(const XTensor & onehot, int size)
CheckNTErrors(onehot.dataType == X_INT, "The onehot tensor must be in X_INT!") CheckNTErrors(onehot.dataType == X_INT, "The onehot tensor must be in X_INT!")
XTensor index; XTensor index;
InitTensor(&index, onehot.order - 1, onehot.dimSize, X_INT, 1.0F, onehot.devID, onehot.mem); InitTensorV2(&index, onehot.order - 1, onehot.dimSize, X_INT, 1.0F, onehot.devID, onehot.mem);
index.SetTMPFlag(); index.SetTMPFlag();
_OnehotToIndex(&onehot, &index, size); _OnehotToIndex(&onehot, &index, size);
...@@ -140,6 +140,47 @@ void _IndexToOnehot(const XTensor * index, XTensor * onehot, ...@@ -140,6 +140,47 @@ void _IndexToOnehot(const XTensor * index, XTensor * onehot,
} }
/* /*
convert index tensor to onehot tensor
>> index - index tensor, which value is an integer num
>> onehot - onehot tensor, which value is 0 or 1
>> size - the last dimension size of the onehot tensor
*/
void _IndexToOnehot(int * index, int n, XTensor * onehot, int size, float labelSmoothingP)
{
/*CheckNTErrors(onehot->GetDim(-1) == size, "Illegal tensor dimension!");
CheckNTErrors(onehot->dataType == X_INT, "The onehot tensor must be in X_INT!")
onehot->SetZeroAll();
#ifdef USE_CUDA
if (onehot->devID >= 0) {
delete[] cudaIndex;
return;
}
#endif
int blockNum = n;
int stride = size;
int * indexData = (int *)index;
int * onehotData = (int *)onehot->data;
for (int i = 0; i < blockNum; i++) {
int id = indexData[i];
int * od = onehotData + i * stride;
od[id] = 1;
}*/
XTensor* cudaIndex = NewTensor1DV2(n, X_INT, onehot->devID);
cudaIndex->SetData(index, n);
_IndexToOnehot(cudaIndex, onehot, size, labelSmoothingP);
delete[] cudaIndex;
}
/*
convert onehot tensor to index tensor (return an XTensor structure) convert onehot tensor to index tensor (return an XTensor structure)
make a new tensor to keep the result and return it make a new tensor to keep the result and return it
...@@ -159,7 +200,7 @@ XTensor IndexToOnehot(const XTensor & index, int size, float labelSmoothingP) ...@@ -159,7 +200,7 @@ XTensor IndexToOnehot(const XTensor & index, int size, float labelSmoothingP)
int * dim = new int[order + 1]; int * dim = new int[order + 1];
memcpy(dim, index.dimSize, order * sizeof(int)); memcpy(dim, index.dimSize, order * sizeof(int));
dim[order] = size; dim[order] = size;
InitTensor(&onehot, index.order + 1, dim, X_FLOAT, 1.0F, index.devID, index.mem); InitTensorV2(&onehot, index.order + 1, dim, X_FLOAT, 1.0F, index.devID, index.mem);
_IndexToOnehot(&index, &onehot, size, labelSmoothingP); _IndexToOnehot(&index, &onehot, size, labelSmoothingP);
......
...@@ -36,6 +36,9 @@ XTensor OnehotToIndex(const XTensor & onehot, int num); ...@@ -36,6 +36,9 @@ XTensor OnehotToIndex(const XTensor & onehot, int num);
/* convert index tensor to onehot tensor */ /* convert index tensor to onehot tensor */
void _IndexToOnehot(const XTensor * index, XTensor * onehot, int size, float labelSmoothingP); void _IndexToOnehot(const XTensor * index, XTensor * onehot, int size, float labelSmoothingP);
/* convert index tensor to onehot tensor */
void _IndexToOnehot(int * index, int n, XTensor * onehot, int size, float labelSmoothingP);
/* convert index tensor to onehot tensor (return an XTensor structure) /* convert index tensor to onehot tensor (return an XTensor structure)
make a new tensor to keep the result and return it */ make a new tensor to keep the result and return it */
XTensor IndexToOnehot(const XTensor & index, int num, float labelSmoothingP); XTensor IndexToOnehot(const XTensor & index, int num, float labelSmoothingP);
......
...@@ -26,6 +26,114 @@ ...@@ -26,6 +26,114 @@
namespace nts{ // namespace nts(NiuTrans.Tensor) namespace nts{ // namespace nts(NiuTrans.Tensor)
/* /*
generate a tensor with selected data in index along the given dimension
c = select(a)
>> a - input tensor
>> c - result tensor
>> index - the selected index
>> dim - the dimension along with which we do the job
*/
void _Select(const XTensor * a, XTensor * c, int* index, int dim)
{
CheckNTErrors(a != NULL && c != NULL, "empty tensors!");
CheckNTErrors(a->order == c->order, "The input and output tensors must in the same order!");
CheckNTErrors(dim >= 0 && dim < a->order, "The input dimension is out of bounds!");
CheckNTErrors(a->dataType == c->dataType, "The tensor must be of the same data type!");
int stride = 1;
for (int i = dim + 1; i < a->order; i++)
stride *= a->dimSize[i];
int copyTimes = 1;
for (int i = 0; i < dim; i++)
{
copyTimes *= a->dimSize[i];
}
int cot = c->dimSize[dim];
int blockSize = stride * a->unitSize;
int stepSizeS = stride * a->dimSize[dim] * a->unitSize;
int stepSizeT = stride * c->dimSize[dim] * a->unitSize;
char * s = (char*)a->data;
char * t = (char*)c->data;
for (int i = 0; i < copyTimes; i++) {
for (int j = 0; j < cot; ++j) {
XMemCopy(t + j * blockSize, c->devID, s + index[j] * blockSize, a->devID, blockSize);
}
s += stepSizeS;
t += stepSizeT;
}
}
/*
generate a tensor with selected data in index along the given dimension
c = select(a)
>> a - input tensor
>> c - result tensor
>> index - the selected index
>> dim - the dimension along with which we do the job
*/
void _Select(const XTensor * a, XTensor * c, XTensor* index, int dim)
{
if (index->devID >= 0)
{
int* indexCPU = new int[index->unitNum];
XMemCopy(indexCPU, -1, index->data,index->devID, index->unitNum * sizeof(int));
_Select(a, c, indexCPU, dim);
delete[] indexCPU;
}
else
{
_Select(a, c, (int *)index->data, dim);
}
}
/*
c = select(a)
>> a - input tensor
>> index - the selected index
>> dim - the dimension along with which we do the job
<< return - the result of the generated tensor with selected data
*/
XTensor Select(const XTensor &a, XTensor &index, int dim)
{
int order = a.order;
int * dimSize = new int[order];
CheckNTErrors(dim >= 0 && dim < a.order, "The input dimension is out of bounds!");
for (int i = 0; i < a.order; i++) {
if (i == dim) {
dimSize[i] = index.dimSize[0];
}
else
dimSize[i] = a.dimSize[i];
}
float dr = (!a.isSparse) ? 1.0F : a.denseRatio;
XTensor c(order, dimSize, a.dataType, dr, a.devID, a.mem);
c.SetTMPFlag();
/* call _SelectRange function */
_Select(&a, &c, &index, dim);
/* tensor connection */
if (a.enableGrad) {
XLink::MakeLink(&a, &index, &c, GETANDSET_SELECT);
XLink::AddParamToHeadInt(&c, dim);
}
/* destroy variables */
delete[] dimSize;
return c;
}
/*
generate a tensor with selected data in range[low,high] along the given dimension generate a tensor with selected data in range[low,high] along the given dimension
c = select(a) c = select(a)
...@@ -58,13 +166,12 @@ void _SelectRange(const XTensor * a, XTensor * c, int dim, int low, int high) ...@@ -58,13 +166,12 @@ void _SelectRange(const XTensor * a, XTensor * c, int dim, int low, int high)
} }
int stride = 1; int stride = 1;
int dimRDI = a->order - dim - 1; for(int i = dim + 1; i < a->order; i++)
for(int i = 0; i < dimRDI; i++) stride *= a->dimSize[i];
stride *= a->dimSizeRDI[i];
int copyTimes = 1; int copyTimes = 1;
for (int i = dimRDI + 1; i < a->order; i++) for (int i = 0; i < dim; i++)
copyTimes *= a->dimSizeRDI[i]; copyTimes *= a->dimSize[i];
int blockSize = stride * (high - low) * a->unitSize; int blockSize = stride * (high - low) * a->unitSize;
int stepSizeS = stride * a->dimSize[dim] * a->unitSize; int stepSizeS = stride * a->dimSize[dim] * a->unitSize;
......
...@@ -27,13 +27,16 @@ ...@@ -27,13 +27,16 @@
namespace nts{ // namespace nts(NiuTrans.Tensor) namespace nts{ // namespace nts(NiuTrans.Tensor)
/* generate a tensor with selected data c = select(a) */ /* generate a tensor with selected data c = select(a) */
void _Select(const XTensor * a, XTensor * c, XTensor * indexCPU); void _Select(const XTensor * a, XTensor * c, int* index, int dim);
/* generate a tensor with selected data c = select(a) */
void _Select(const XTensor * a, XTensor * c, XTensor* index, int dim);
/* /*
generate a tensor with selected data c = select(a) (returna a XTensor structure) generate a tensor with selected data c = select(a) (returna a XTensor structure)
make a new tensor to keep the result and return it make a new tensor to keep the result and return it
*/ */
XTensor Select(const XTensor &a, XTensor &indexCPU); XTensor Select(const XTensor &a, XTensor &index, int dim);
/* /*
generate a tensor with selected data in range[low,high] along the given dimension generate a tensor with selected data in range[low,high] along the given dimension
......
...@@ -470,7 +470,7 @@ void _SetDataLowTri(XTensor * tensor, DTYPE p, int shift) ...@@ -470,7 +470,7 @@ void _SetDataLowTri(XTensor * tensor, DTYPE p, int shift)
void _SetDataRand(XTensor * tensor, int rNum, int cNum) void _SetDataRand(XTensor * tensor, int rNum, int cNum)
{ {
if (tensor == NULL || tensor->isInit == false || tensor->order !=2 ) { if (tensor == NULL || tensor->isInit == false || tensor->order !=2 ) {
InitTensor2D(tensor, rNum, cNum); InitTensor2DV2(tensor, rNum, cNum);
} }
_SetDataRand(tensor, 0.0F, 1.0F); _SetDataRand(tensor, 0.0F, 1.0F);
...@@ -519,7 +519,7 @@ void _SetDataRand(XTensor * tensor, DTYPE lower, DTYPE upper) ...@@ -519,7 +519,7 @@ void _SetDataRand(XTensor * tensor, DTYPE lower, DTYPE upper)
#ifdef USE_CUDA #ifdef USE_CUDA
_CudaSetDataRand(tensor, lower, upper); _CudaSetDataRand(tensor, lower, upper);
#endif #endif
//XTensor * t2 = NewTensor(tensor->order, tensor->dimSize, tensor->dataType, tensor->denseRatio, -1); //XTensor * t2 = NewTensorV2(tensor->order, tensor->dimSize, tensor->dataType, tensor->denseRatio, -1);
//_SetDataRand(t2, low, high); //_SetDataRand(t2, low, high);
//_CopyValues(t2, tensor); //_CopyValues(t2, tensor);
//delete t2; //delete t2;
......
...@@ -21,6 +21,7 @@ ...@@ -21,6 +21,7 @@
#include <math.h> #include <math.h>
#include "../../XName.h" #include "../../XName.h"
#include "../shape/IsSameShaped.h"
#include "Binary.h" #include "Binary.h"
#include "Binary.cuh" #include "Binary.cuh"
...@@ -77,7 +78,7 @@ void _funcName(const XTensor * a, XTensor * b, T num) ...@@ -77,7 +78,7 @@ void _funcName(const XTensor * a, XTensor * b, T num)
_cudaFuncName(a, b, num); \ _cudaFuncName(a, b, num); \
return; \ return; \
} \ } \
CheckNTErrors((XTensor::IsSameShaped(a, b)), \ CheckNTErrors((_IsSameShaped(a, b)), \
"Input tensors should have the same data type!"); \ "Input tensors should have the same data type!"); \
if (a->dataType == X_INT) { \ if (a->dataType == X_INT) { \
int * d = (int*)a->data; \ int * d = (int*)a->data; \
...@@ -112,7 +113,7 @@ void _funcName(const XTensor * a, XTensor * b, T num) ...@@ -112,7 +113,7 @@ void _funcName(const XTensor * a, XTensor * b, T num)
if (a->devID >= 0) { \ if (a->devID >= 0) { \
ShowNTErrors("No GPU devices support!") \ ShowNTErrors("No GPU devices support!") \
} \ } \
CheckNTErrors((XTensor::IsSameShaped(a, b)), \ CheckNTErrors((_IsSameShaped(a, b)), \
"Input tensors should have the same data type!"); \ "Input tensors should have the same data type!"); \
if (a->dataType == X_INT) { \ if (a->dataType == X_INT) { \
int * d = (int*)a->data; \ int * d = (int*)a->data; \
...@@ -169,8 +170,8 @@ XTensor funcName(const XTensor &a, T num) ...@@ -169,8 +170,8 @@ XTensor funcName(const XTensor &a, T num)
_funcName(&a, &b, num); \ _funcName(&a, &b, num); \
if(a.enableGrad){ \ if(a.enableGrad){ \
XLink::MakeLink(&a, NULL, &b, operationId); \ XLink::MakeLink(&a, NULL, &b, operationId); \
} \
XLink::AddParamToHead(&b, num); \ XLink::AddParamToHead(&b, num); \
} \
return b; \ return b; \
} \ } \
template XTensor funcName<int>(const XTensor&, int); \ template XTensor funcName<int>(const XTensor&, int); \
...@@ -181,8 +182,8 @@ template XTensor funcName<double>(const XTensor&, double); ...@@ -181,8 +182,8 @@ template XTensor funcName<double>(const XTensor&, double);
template<class T> \ template<class T> \
void funcName(const XTensor &a, XTensor &b, T num) \ void funcName(const XTensor &a, XTensor &b, T num) \
{ \ { \
if (!b.isInit || !XTensor::IsSameShaped(&a, &b)) { \ if (!b.isInit || !IsSameShaped(a, b)) { \
InitTensor(&b, &a); \ InitTensorV2(&b, &a); \
} \ } \
_funcName(&a, &b, num); \ _funcName(&a, &b, num); \
if (a.enableGrad) { \ if (a.enableGrad) { \
......
...@@ -23,6 +23,7 @@ ...@@ -23,6 +23,7 @@
#include "../../XDevice.h" #include "../../XDevice.h"
#include "../../XUtility.h" #include "../../XUtility.h"
#include "../../XName.h" #include "../../XName.h"
#include "../shape/IsSameShaped.h"
#include "Binary.h" #include "Binary.h"
#include "Binary.cuh" #include "Binary.cuh"
...@@ -89,7 +90,7 @@ void Kernel##funcName(T1 * a, T1 * b, int size, T2 num) ...@@ -89,7 +90,7 @@ void Kernel##funcName(T1 * a, T1 * b, int size, T2 num)
template<class T> \ template<class T> \
void _Cuda##funcName(const XTensor * a, XTensor * b, T num) \ void _Cuda##funcName(const XTensor * a, XTensor * b, T num) \
{ \ { \
CheckNTErrors((XTensor::IsSameShaped(a, b)), \ CheckNTErrors((_IsSameShaped(a, b)), \
"Input tensors should have the same type!"); \ "Input tensors should have the same type!"); \
CheckNTErrors((a->isSparse == false), "TODO!"); \ CheckNTErrors((a->isSparse == false), "TODO!"); \
\ \
......
...@@ -21,6 +21,7 @@ ...@@ -21,6 +21,7 @@
#include "../../XTensor.h" #include "../../XTensor.h"
#include "../../XName.h" #include "../../XName.h"
#include "../shape/IsSameShaped.h"
#include "Clip.h" #include "Clip.h"
#include "Clip.cuh" #include "Clip.cuh"
...@@ -43,7 +44,7 @@ void _Clip(const XTensor * a, XTensor * b, DTYPE lower, DTYPE upper) ...@@ -43,7 +44,7 @@ void _Clip(const XTensor * a, XTensor * b, DTYPE lower, DTYPE upper)
} }
#endif #endif
CheckNTErrors((XTensor::IsSameShaped(a, b)), "Input tensors should have the same type!"); CheckNTErrors((_IsSameShaped(a, b)), "Input tensors should have the same type!");
CheckNTErrors((a->dataType == DEFAULT_DTYPE), "TODO!"); CheckNTErrors((a->dataType == DEFAULT_DTYPE), "TODO!");
DTYPE * d = (DTYPE*)a->data; DTYPE * d = (DTYPE*)a->data;
...@@ -110,8 +111,8 @@ XTensor Clip(const XTensor & a, DTYPE lower, DTYPE upper) ...@@ -110,8 +111,8 @@ XTensor Clip(const XTensor & a, DTYPE lower, DTYPE upper)
void Clip(const XTensor & a, XTensor & b, DTYPE lower, DTYPE upper) void Clip(const XTensor & a, XTensor & b, DTYPE lower, DTYPE upper)
{ {
if (!b.isInit || !XTensor::IsSameShaped(&a, &b)) { if (!b.isInit || !IsSameShaped(a, b)) {
InitTensor(&b, &a); InitTensorV2(&b, &a);
} }
/* call _Clip function */ /* call _Clip function */
......
...@@ -21,6 +21,7 @@ ...@@ -21,6 +21,7 @@
#include "../../XDevice.h" #include "../../XDevice.h"
#include "../../XTensor.h" #include "../../XTensor.h"
#include "../shape/IsSameShaped.h"
#include "Clip.h" #include "Clip.h"
#include "Clip.cuh" #include "Clip.cuh"
...@@ -36,7 +37,7 @@ set each entry to its clip value (CUDA Kernel) ...@@ -36,7 +37,7 @@ set each entry to its clip value (CUDA Kernel)
>> size - size of the data array >> size - size of the data array
*/ */
__global__ __global__
void KernelClip(DTYPE * a, DTYPE * b, DTYPE lower, DTYPE upper, int size) void KernelClip(DTYPE * a, DTYPE * b, DTYPE lower, DTYPE upper, int size)
{ {
int i = blockDim.x * blockIdx.x + threadIdx.x; int i = blockDim.x * blockIdx.x + threadIdx.x;
...@@ -74,7 +75,7 @@ set each entry to its clip value ...@@ -74,7 +75,7 @@ set each entry to its clip value
*/ */
void _CudaClip(const XTensor * a, XTensor * b, DTYPE lower, DTYPE upper) void _CudaClip(const XTensor * a, XTensor * b, DTYPE lower, DTYPE upper)
{ {
CheckNTErrors((XTensor::IsSameShaped(a, b)), "Input tensors should have the same type!"); CheckNTErrors((_IsSameShaped(a, b)), "Input tensors should have the same type!");
CheckNTErrors((a->isSparse == false), "TODO!"); CheckNTErrors((a->isSparse == false), "TODO!");
int gridSize[3]; int gridSize[3];
......
...@@ -22,6 +22,7 @@ ...@@ -22,6 +22,7 @@
#include "../../XTensor.h" #include "../../XTensor.h"
#include "../../XDevice.h" #include "../../XDevice.h"
#include "../../XName.h" #include "../../XName.h"
#include "../shape/IsSameShaped.h"
#include "Compare.h" #include "Compare.h"
#include "Compare.cuh" #include "Compare.cuh"
...@@ -42,7 +43,7 @@ DTYPE myIsNotEqual(DTYPE a, DTYPE b) ...@@ -42,7 +43,7 @@ DTYPE myIsNotEqual(DTYPE a, DTYPE b)
#define _SIMPLE_COMPARE_FUNCTION(_funcName, _cudaFuncName, origFunc) \ #define _SIMPLE_COMPARE_FUNCTION(_funcName, _cudaFuncName, origFunc) \
void _funcName(const XTensor * a, XTensor * b, DTYPE number) \ void _funcName(const XTensor * a, XTensor * b, DTYPE number) \
{ \ { \
CheckNTErrors((XTensor::IsSameShaped(a, b)), \ CheckNTErrors((_IsSameShaped(a, b)), \
"Input tensors should have the same type!"); \ "Input tensors should have the same type!"); \
CheckNTErrors((a->dataType == DEFAULT_DTYPE), "TODO!"); \ CheckNTErrors((a->dataType == DEFAULT_DTYPE), "TODO!"); \
/* run it on GPUs */ \ /* run it on GPUs */ \
...@@ -59,7 +60,7 @@ void _funcName(const XTensor * a, XTensor * b, DTYPE number) ...@@ -59,7 +60,7 @@ void _funcName(const XTensor * a, XTensor * b, DTYPE number)
#define _SIMPLE_COMPARE_FUNCTION(_funcName, origFunc) \ #define _SIMPLE_COMPARE_FUNCTION(_funcName, origFunc) \
void _funcName(const XTensor * a, XTensor * b, DTYPE number) \ void _funcName(const XTensor * a, XTensor * b, DTYPE number) \
{ \ { \
CheckNTErrors((XTensor::IsSameShaped(a, b)), \ CheckNTErrors((_IsSameShaped(a, b)), \
"Input tensors should have the same type!"); \ "Input tensors should have the same type!"); \
CheckNTErrors((a->dataType == DEFAULT_DTYPE), "TODO!"); \ CheckNTErrors((a->dataType == DEFAULT_DTYPE), "TODO!"); \
/* run it on GPUs */ \ /* run it on GPUs */ \
...@@ -97,8 +98,8 @@ XTensor funcName(const XTensor &a, DTYPE number) ...@@ -97,8 +98,8 @@ XTensor funcName(const XTensor &a, DTYPE number)
#define SIMPLE_COMPARE_FUNCTION_VOID(funcName, _funcName, operationId) \ #define SIMPLE_COMPARE_FUNCTION_VOID(funcName, _funcName, operationId) \
void funcName(const XTensor &a, XTensor &b, DTYPE number) \ void funcName(const XTensor &a, XTensor &b, DTYPE number) \
{ \ { \
if (!b.isInit || !XTensor::IsSameShaped(&a, &b)) { \ if (!b.isInit || !IsSameShaped(a, b)) { \
InitTensor(&b, &a); \ InitTensorV2(&b, &a); \
} \ } \
_funcName(&a, &b, number); \ _funcName(&a, &b, number); \
} }
...@@ -130,7 +131,7 @@ SIMPLE_COMPARE_FUNCTION_VOID(NotEqual, _NotEqual, MATH_NOTEQUAL) ...@@ -130,7 +131,7 @@ SIMPLE_COMPARE_FUNCTION_VOID(NotEqual, _NotEqual, MATH_NOTEQUAL)
#define _SIMPLE_MAX_MIN_FUNCTION(_funcName, _cudaFuncName, origFunc) \ #define _SIMPLE_MAX_MIN_FUNCTION(_funcName, _cudaFuncName, origFunc) \
void _funcName(const XTensor * a, const XTensor * b, XTensor * c) \ void _funcName(const XTensor * a, const XTensor * b, XTensor * c) \
{ \ { \
CheckNTErrors((XTensor::IsSameShaped(a, b, c)), \ CheckNTErrors((_IsSameShaped(a, b, c)), \
"Input and output tensors should have the same type!"); \ "Input and output tensors should have the same type!"); \
CheckNTErrors((a->dataType == DEFAULT_DTYPE), "TODO!"); \ CheckNTErrors((a->dataType == DEFAULT_DTYPE), "TODO!"); \
CheckDev(a->devID, b->devID); \ CheckDev(a->devID, b->devID); \
...@@ -150,7 +151,7 @@ void _funcName(const XTensor * a, const XTensor * b, XTensor * c) ...@@ -150,7 +151,7 @@ void _funcName(const XTensor * a, const XTensor * b, XTensor * c)
#define _SIMPLE_MAX_MIN_FUNCTION(_funcName, origFunc) \ #define _SIMPLE_MAX_MIN_FUNCTION(_funcName, origFunc) \
void _funcName(const XTensor * a, const XTensor * b, XTensor *c) \ void _funcName(const XTensor * a, const XTensor * b, XTensor *c) \
{ \ { \
CheckNTErrors((XTensor::IsSameShaped(a, b, c)), \ CheckNTErrors((_IsSameShaped(a, b, c)), \
"Input and output tensors should have the same type!"); \ "Input and output tensors should have the same type!"); \
CheckNTErrors((a->dataType == DEFAULT_DTYPE), "TODO!"); \ CheckNTErrors((a->dataType == DEFAULT_DTYPE), "TODO!"); \
CheckDev(a, b); \ CheckDev(a, b); \
...@@ -191,7 +192,7 @@ XTensor funcName(const XTensor & a, const XTensor & b) ...@@ -191,7 +192,7 @@ XTensor funcName(const XTensor & a, const XTensor & b)
#define SIMPLE_MAX_MIN_FUNCTION_VOID(funcName, _funcName, operationId) \ #define SIMPLE_MAX_MIN_FUNCTION_VOID(funcName, _funcName, operationId) \
void funcName(const XTensor &a, const XTensor &b, XTensor c) \ void funcName(const XTensor &a, const XTensor &b, XTensor c) \
{ \ { \
if (!c.isInit || !XTensor::IsSameShaped(&a, &c)) { \ if (!c.isInit || !_IsSameShaped(&a, &c)) { \
InitTensor(&c, &a); \ InitTensor(&c, &a); \
} \ } \
_funcName(&a, &b, &c); \ _funcName(&a, &b, &c); \
......
...@@ -22,6 +22,7 @@ ...@@ -22,6 +22,7 @@
#include <math.h> #include <math.h>
#include "../../XTensor.h" #include "../../XTensor.h"
#include "../../XName.h" #include "../../XName.h"
#include "../shape/IsSameShaped.h"
#include "Normalize.h" #include "Normalize.h"
#include "Normalize.cuh" #include "Normalize.cuh"
...@@ -46,26 +47,25 @@ void _Normalize(const XTensor * input, XTensor * output, int dim, ...@@ -46,26 +47,25 @@ void _Normalize(const XTensor * input, XTensor * output, int dim,
const XTensor * mean, const XTensor * var, const XTensor * mean, const XTensor * var,
const XTensor * a, const XTensor * b, DTYPE epsilon) const XTensor * a, const XTensor * b, DTYPE epsilon)
{ {
int dimRDI = input->order - dim - 1; CheckNTErrors((_IsSameShaped(input, output)), "Unmatched input tensors!");
CheckNTErrors((XTensor::IsSameShaped(input, output)), "Unmatched input tensors!"); CheckNTErrors((_IsSameShaped(a, b)), "Unmatched input tensors");
CheckNTErrors((XTensor::IsSameShaped(a, b)), "Unmatched input tensors"); CheckNTErrors((_IsSameShaped(mean, var)), "Unmatched input tensors");
CheckNTErrors((XTensor::IsSameShaped(mean, var)), "Unmatched input tensors");
CheckNTErrors((input && output && mean && var && a && b), "Empty input tensors!"); CheckNTErrors((input && output && mean && var && a && b), "Empty input tensors!");
CheckNTErrors((dimRDI >= 0 && dimRDI < input->order), "Incorrect reduction dimension!"); CheckNTErrors((dim >= 0 && dim < input->order), "Incorrect reduction dimension!");
CheckNTErrors((input->order == mean->order + 1), "Incorrect reduction dimension!"); CheckNTErrors((input->order == mean->order + 1), "Incorrect reduction dimension!");
int stride = 1; int stride = 1;
int strideNum = input->dimSizeRDI[dimRDI]; int strideNum = input->dimSize[dim];
int blockSize = 1; int blockSize = 1;
int blockNum = 1; int blockNum = 1;
for (int i = 0; i < input->order; i++) { for (int i = 0; i < input->order; i++) {
if (i < dimRDI) { if (i < dim) {
CheckNTErrors((input->dimSizeRDI[i] == mean->dimSizeRDI[i]), "Wrong size!"); CheckNTErrors((input->dimSize[i] == mean->dimSize[i]), "Wrong size!");
stride *= input->dimSizeRDI[i]; blockNum *= input->dimSize[i];
} }
else if (i > dimRDI) { else if (i > dim) {
CheckNTErrors((input->dimSizeRDI[i] == mean->dimSizeRDI[i - 1]), "Wrong size!"); CheckNTErrors((input->dimSize[i] == mean->dimSize[i - 1]), "Wrong size!");
blockNum *= input->dimSizeRDI[i]; stride *= input->dimSize[i];
} }
} }
blockSize = stride * strideNum; blockSize = stride * strideNum;
...@@ -203,8 +203,8 @@ void Normalize(const XTensor &input, XTensor &output, int dim, ...@@ -203,8 +203,8 @@ void Normalize(const XTensor &input, XTensor &output, int dim,
const XTensor &mean, const XTensor &var, const XTensor &mean, const XTensor &var,
const XTensor &a, const XTensor &b, DTYPE epsilon) const XTensor &a, const XTensor &b, DTYPE epsilon)
{ {
if (!output.isInit || !XTensor::IsSameShaped(&input, &output)) { if (!output.isInit || !IsSameShaped(input, output)) {
InitTensor(&output, &input); InitTensorV2(&output, &input);
} }
/* call _Normalize function */ /* call _Normalize function */
......
...@@ -95,15 +95,14 @@ void _CudaNormalize(const XTensor * input, XTensor * output, int dim, ...@@ -95,15 +95,14 @@ void _CudaNormalize(const XTensor * input, XTensor * output, int dim,
{ {
CheckNTErrors((input->dataType == DEFAULT_DTYPE), "TODO!"); CheckNTErrors((input->dataType == DEFAULT_DTYPE), "TODO!");
int dimRDI = input->order - dim - 1;
int stride = 1; int stride = 1;
int strideNum = input->dimSizeRDI[dimRDI]; int strideNum = input->dimSize[dim];
int blockNum = 1; int blockNum = 1;
for (int i = 0; i < input->order; i++) { for (int i = 0; i < input->order; i++) {
if (i < dimRDI) if (i > dim)
stride *= input->dimSizeRDI[i]; stride *= input->dimSize[i];
else if (i > dimRDI) else if (i < dim)
blockNum *= input->dimSizeRDI[i]; blockNum *= input->dimSize[i];
} }
int cudaGridSize[3]; int cudaGridSize[3];
......
...@@ -22,6 +22,7 @@ ...@@ -22,6 +22,7 @@
#include "../../XTensor.h" #include "../../XTensor.h"
#include "../../XName.h" #include "../../XName.h"
#include "../../XUtility.h" #include "../../XUtility.h"
#include "../shape/IsSameShaped.h"
#include "ScaleAndShift.h" #include "ScaleAndShift.h"
#include "ScaleAndShift.cuh" #include "ScaleAndShift.cuh"
...@@ -147,8 +148,8 @@ b = a * scale + shift ...@@ -147,8 +148,8 @@ b = a * scale + shift
*/ */
void ScaleAndShift(const XTensor & a, XTensor & b, DTYPE scale, DTYPE shift) void ScaleAndShift(const XTensor & a, XTensor & b, DTYPE scale, DTYPE shift)
{ {
if (!b.isInit || !XTensor::IsSameShaped(&a, &b)) { if (!b.isInit || !IsSameShaped(a, b)) {
InitTensor(&b, &a); InitTensorV2(&b, &a);
} }
/* call _ScaleAndShift function */ /* call _ScaleAndShift function */
......
...@@ -22,6 +22,7 @@ ...@@ -22,6 +22,7 @@
#include <math.h> #include <math.h>
#include "../../XName.h" #include "../../XName.h"
#include "../shape/IsSameShaped.h"
#include "Unary.h" #include "Unary.h"
#include "Unary.cuh" #include "Unary.cuh"
...@@ -77,7 +78,7 @@ void _funcName(const XTensor * a, XTensor * b) ...@@ -77,7 +78,7 @@ void _funcName(const XTensor * a, XTensor * b)
_cudaFuncName(a, b); \ _cudaFuncName(a, b); \
return; \ return; \
} \ } \
CheckNTErrors((XTensor::IsSameShaped(a, b)), \ CheckNTErrors((_IsSameShaped(a, b)), \
"Input tensors should have the same type!"); \ "Input tensors should have the same type!"); \
if (a->dataType == X_INT) { \ if (a->dataType == X_INT) { \
int * d = (int*)a->data; \ int * d = (int*)a->data; \
...@@ -108,7 +109,7 @@ void _funcName(const XTensor * a, XTensor * b) ...@@ -108,7 +109,7 @@ void _funcName(const XTensor * a, XTensor * b)
if (a->devID >= 0) { \ if (a->devID >= 0) { \
ShowNTErrors("No GPU devices support!") \ ShowNTErrors("No GPU devices support!") \
} \ } \
CheckNTErrors((XTensor::IsSameShaped(a, b)), \ CheckNTErrors((_IsSameShaped(a, b)), \
"Input tensors should have the same type!"); \ "Input tensors should have the same type!"); \
if (a->dataType == X_INT) { \ if (a->dataType == X_INT) { \
int * d = (int*)a->data; \ int * d = (int*)a->data; \
...@@ -160,8 +161,8 @@ XTensor funcName(const XTensor & a) ...@@ -160,8 +161,8 @@ XTensor funcName(const XTensor & a)
#define SIMPLE_UNARY_FUNCTION_VOID(funcName, _funcName, operationId) \ #define SIMPLE_UNARY_FUNCTION_VOID(funcName, _funcName, operationId) \
void funcName(const XTensor & a, XTensor & b) \ void funcName(const XTensor & a, XTensor & b) \
{ \ { \
if (!b.isInit || !XTensor::IsSameShaped(&a, &b)) { \ if (!b.isInit || !IsSameShaped(a, b)) { \
InitTensor(&b, &a); \ InitTensorV2(&b, &a); \
} \ } \
_funcName(&a, &b); \ _funcName(&a, &b); \
if (a.enableGrad) { \ if (a.enableGrad) { \
......
...@@ -22,6 +22,7 @@ ...@@ -22,6 +22,7 @@
#include <math.h> #include <math.h>
#include "../../XDevice.h" #include "../../XDevice.h"
#include "../../XName.h" #include "../../XName.h"
#include "../shape/IsSameShaped.h"
#include "Unary.h" #include "Unary.h"
#include "Unary.cuh" #include "Unary.cuh"
#include<cuda_runtime.h> #include<cuda_runtime.h>
...@@ -154,7 +155,7 @@ void Kernel##funcName(T * a, T * b, int size) \ ...@@ -154,7 +155,7 @@ void Kernel##funcName(T * a, T * b, int size) \
} \ } \
void _Cuda##funcName(const XTensor * a, XTensor * b) \ void _Cuda##funcName(const XTensor * a, XTensor * b) \
{ \ { \
CheckNTErrors((XTensor::IsSameShaped(a, b)), \ CheckNTErrors((_IsSameShaped(a, b)), \
"Input tensors should have the same type!"); \ "Input tensors should have the same type!"); \
CheckNTErrors(a->isSparse == false, "TODO!"); \ CheckNTErrors(a->isSparse == false, "TODO!"); \
\ \
......
...@@ -20,6 +20,7 @@ ...@@ -20,6 +20,7 @@
*/ */
#include "../../XTensor.h" #include "../../XTensor.h"
#include "../shape/IsSameShaped.h"
#include "CopyInGrid.h" #include "CopyInGrid.h"
#include "CopyBlocksInGrid.h" #include "CopyBlocksInGrid.h"
...@@ -38,14 +39,13 @@ in the k-th grid ...@@ -38,14 +39,13 @@ in the k-th grid
*/ */
void _CopyInGrid(const XTensor * s, XTensor * t, int * index, int blockDim, int blockNumInGrid, bool isIndexOnDev) void _CopyInGrid(const XTensor * s, XTensor * t, int * index, int blockDim, int blockNumInGrid, bool isIndexOnDev)
{ {
CheckNTErrors((XTensor::IsSameShaped(s, t)), "Unmatched tensors!"); CheckNTErrors((_IsSameShaped(s, t)), "Unmatched tensors!");
int blockDimRDI = s->order - blockDim - 1;
int blockSize = 1; int blockSize = 1;
int blockNum = blockNumInGrid; int blockNum = blockNumInGrid;
int gridNum = 1; int gridNum = 1;
for (int i = 0; i < blockDimRDI; i++) for (int i = blockDim; i < s->order; i++)
blockSize *= s->dimSizeRDI[i]; blockSize *= s->dimSize[i];
CheckNTErrors((s->unitNum % (blockSize * blockNum) == 0), "Illegal block number!"); CheckNTErrors((s->unitNum % (blockSize * blockNum) == 0), "Illegal block number!");
gridNum = s->unitNum / (blockSize * blockNum); gridNum = s->unitNum / (blockSize * blockNum);
......
...@@ -24,6 +24,7 @@ ...@@ -24,6 +24,7 @@
#include "CopyBlocks.h" #include "CopyBlocks.h"
#include "Gather.h" #include "Gather.h"
#include "../../XName.h" #include "../../XName.h"
#include "../utilities/SetAscendingOrder.h"
namespace nts { // namespace nts(NiuTrans.Tensor) namespace nts { // namespace nts(NiuTrans.Tensor)
...@@ -52,26 +53,28 @@ void _CopyIndexed(const XTensor * s, XTensor * t, int dim, ...@@ -52,26 +53,28 @@ void _CopyIndexed(const XTensor * s, XTensor * t, int dim,
CheckNTErrors(dim < s->order && dim < t->order, "A too larget dimension specified!"); CheckNTErrors(dim < s->order && dim < t->order, "A too larget dimension specified!");
CheckNTErrors(s->unitSize == t->unitSize, "Unmatched tensors!"); CheckNTErrors(s->unitSize == t->unitSize, "Unmatched tensors!");
int dimRDI = s->order - dim - 1;
int blockSizeSrc = 1; int blockSizeSrc = 1;
int blockSizeTgt = 1; int blockSizeTgt = 1;
int blockNumSrc = 1; int blockNumSrc = 1;
int blockNumTgt = 1; int blockNumTgt = 1;
int leadDimSizeSrc = s->dimSizeRDI[dimRDI]; int leadDimSizeSrc = s->dimSize[dim];
int leadDimSizeTgt = t->dimSizeRDI[dimRDI]; int leadDimSizeTgt = t->dimSize[dim];
int indexOffsetNum = 1; int indexOffsetNum = 1;
for (int i = 0; i < dimRDI; i++) { for (int i = dim + 1; i < s->order; i++) {
blockSizeSrc *= s->dimSizeRDI[i]; blockSizeSrc *= s->dimSize[i];
blockSizeTgt *= t->dimSizeRDI[i]; }
for (int i = dim + 1; i < t->order; i++) {
blockSizeTgt *= t->dimSize[i];
}
for (int i = 0; i <= dim; i++)
{
blockNumSrc *= s->dimSize[i];
blockNumTgt *= t->dimSize[i];
} }
for (int i = dimRDI; i < s->order; i++)
blockNumSrc *= s->dimSizeRDI[i];
for (int i = dimRDI; i < t->order; i++)
blockNumTgt *= t->dimSizeRDI[i];
CheckNTErrors(blockSizeSrc == blockSizeTgt, "Unmatched tensors!"); CheckNTErrors(blockSizeSrc == blockSizeTgt, "Unmatched tensors!");
indexOffsetNum = blockNumSrc / s->dimSizeRDI[dimRDI]; indexOffsetNum = blockNumSrc / s->dimSize[dim];
int realIndexSize = indexOffsetNum * indexSize * copyNum; int realIndexSize = indexOffsetNum * indexSize * copyNum;
int * realSrcIndex = new int[realIndexSize]; int * realSrcIndex = new int[realIndexSize];
...@@ -206,7 +209,7 @@ void _CopyIndexed(const XTensor * s, XTensor * t, int dim, ...@@ -206,7 +209,7 @@ void _CopyIndexed(const XTensor * s, XTensor * t, int dim,
const XTensor * srcIndex, int copyNum) const XTensor * srcIndex, int copyNum)
{ {
XTensor * tgtIndex = NewTensor(srcIndex); XTensor * tgtIndex = NewTensor(srcIndex);
tgtIndex->SetAscendingOrder(0); SetAscendingOrder(*tgtIndex, 0);
_CopyIndexed(s, t, dim, srcIndex, tgtIndex, copyNum); _CopyIndexed(s, t, dim, srcIndex, tgtIndex, copyNum);
delete tgtIndex; delete tgtIndex;
...@@ -218,14 +221,14 @@ make a new tensor to keep the result and return it ...@@ -218,14 +221,14 @@ make a new tensor to keep the result and return it
>> s - the source tensor >> s - the source tensor
>> dim - the leading dimension to define "sub-tensors" >> dim - the leading dimension to define "sub-tensors"
e.g., for a tensor of size (4, 2, 3) and dim = 0, e.g., for a tensor of size (3, 2, 4) and dim = 2,
we have 4 sub-tensors of size (2, 3) we have 4 sub-tensors of size (3,2)
>> srcIndex - index of the source sub-tensors >> srcIndex - index of the source sub-tensors
>> indexSize - length of srcIndex (and tgtIndex) >> indexSize - length of srcIndex (and tgtIndex)
>> tgtIndex - index of the target sub-tensors >> tgtIndex - index of the target sub-tensors
>> copyNum - number of the sub-tensors we copy for each source index, >> copyNum - number of the sub-tensors we copy for each source index,
e.g., for srcIndex = [0,1] and copyNum = 2, e.g., for srcIndex = [1,4] and copyNum = 2,
we actually copy the source sub-tensors 0, 1, 1 and 2 we actually copy the source sub-tensors 1, 2, 4, 5
<< return - the result of copying indexed sub-tensors << return - the result of copying indexed sub-tensors
*/ */
XTensor CopyIndexed(const XTensor & s, int dim, XTensor CopyIndexed(const XTensor & s, int dim,
...@@ -276,14 +279,14 @@ make a new tensor to keep the result and return it ...@@ -276,14 +279,14 @@ make a new tensor to keep the result and return it
>> s - the source tensor >> s - the source tensor
>> dim - the leading dimension to define "sub-tensors" >> dim - the leading dimension to define "sub-tensors"
e.g., for a tensor of size (4, 2, 3) and dim = 0, e.g., for a tensor of size (3, 2, 4) and dim = 2,
we have 4 sub-tensors of size (2, 3) we have 4 sub-tensors of size (3,2)
>> srcIndex - index of the source sub-tensors >> srcIndex - index of the source sub-tensors
>> indexSize - length of srcIndex (and tgtIndex) >> indexSize - length of srcIndex (and tgtIndex)
>> tgtIndex - index of the target sub-tensors >> tgtIndex - index of the target sub-tensors
>> copyNum - number of the sub-tensors we copy for each source index, >> copyNum - number of the sub-tensors we copy for each source index,
e.g., for srcIndex = [0,1] and copyNum = 2, e.g., for srcIndex = [1,4] and copyNum = 2,
we actually copy the source sub-tensors 0, 1, 1 and 2 we actually copy the source sub-tensors 1, 2, 4, 5
<< return - the result of copying indexed sub-tensors << return - the result of copying indexed sub-tensors
*/ */
XTensor CopyIndexed(const XTensor &s, int dim, int * srcIndex, int indexSize, int * tgtIndex, int copyNum) XTensor CopyIndexed(const XTensor &s, int dim, int * srcIndex, int indexSize, int * tgtIndex, int copyNum)
......
...@@ -33,6 +33,29 @@ gather indexed sub-tensors ...@@ -33,6 +33,29 @@ gather indexed sub-tensors
>> s - the source tensor >> s - the source tensor
>> t - the target tensor >> t - the target tensor
>> srcIndex - index of the source sub-tensors
>> dim - the leading dimension to define "sub-tensors"
e.g., for a tensor of size (3, 2, 4) and dim = 2,
we have 4 sub-tensors of size (3, 2)
*/
void _Gather(const XTensor * s, XTensor * t, XTensor * srcIndex, int dim)
{
CheckNTErrors((s && t), "Invalid tensors!");
CheckNTErrors(s->devID == t->devID, "the data must be kept on the same device!");
CheckNTErrors((t->unitSize == srcIndex->unitSize), "Unmatched tensors!");
#ifdef USE_CUDA
if (s->devID >= 0 && t->devID >= 0) {
_CudaGather(s, t, srcIndex, dim);
return;
}
#endif
}
/*
gather indexed sub-tensors
>> s - the source tensor
>> t - the target tensor
>> srcIndex - the tensor to save the index of the source tensor >> srcIndex - the tensor to save the index of the source tensor
*/ */
void _Gather(const XTensor * s, XTensor * t, XTensor * srcIndex) void _Gather(const XTensor * s, XTensor * t, XTensor * srcIndex)
...@@ -79,10 +102,15 @@ XTensor Gather(XTensor &s, XTensor &index) ...@@ -79,10 +102,15 @@ XTensor Gather(XTensor &s, XTensor &index)
CheckNTErrors(s.order == 2, "The order of the input tensor must be 2!"); CheckNTErrors(s.order == 2, "The order of the input tensor must be 2!");
int order = index.order + 1; int order = s.order;
int * dimSize = new int[order]; int * dimSize = new int[order];
memcpy(dimSize, index.dimSize, index.order * sizeof(int));
dimSize[index.order] = s.GetDim(-1); for (int i = 0; i < s.order; i++) {
if (i == dim)
dimSize[i] = index.unitNum;
else
dimSize[i] = s.dimSize[i];
}
float dr = (!s.isSparse) ? 1.0F : s.denseRatio; float dr = (!s.isSparse) ? 1.0F : s.denseRatio;
XTensor t(order, dimSize, s.dataType, dr, s.devID, s.mem); XTensor t(order, dimSize, s.dataType, dr, s.devID, s.mem);
...@@ -93,11 +121,25 @@ XTensor Gather(XTensor &s, XTensor &index) ...@@ -93,11 +121,25 @@ XTensor Gather(XTensor &s, XTensor &index)
_Gather(&s, &t, &index); _Gather(&s, &t, &index);
/* tensor connection */ /* tensor connection */
if (s.enableGrad) { if (s.enableGrad)
{
XLink::MakeLink(&s, &index, &t, MOVEMENT_GATHER); XLink::MakeLink(&s, &index, &t, MOVEMENT_GATHER);
} }
if(index.order > 1) {
int * dims = new int[index.order + 1];
memcpy(dims, index.dimSize, index.order * sizeof(int));
dims[index.order] = t.GetDim(-1);
XTensor tt;
tt = Reshape(t, index.order + 1, dims);
delete[] dims;
return tt;
}
else {
return t; return t;
}
} }
} // namespace nts(NiuTrans.Tensor) } // namespace nts(NiuTrans.Tensor)
\ No newline at end of file
...@@ -68,6 +68,35 @@ void KernelGather(DTYPE * sData, DTYPE * tData, int * sIndex, int indexSize, int ...@@ -68,6 +68,35 @@ void KernelGather(DTYPE * sData, DTYPE * tData, int * sIndex, int indexSize, int
/* /*
gather indexed sub-tensors(cuda version) gather indexed sub-tensors(cuda version)
>> sData - the data pointer of the source tensor
>> tData - the data pointer of the target tensor
>> sIndex - the index of the source tensor
>> indexSize - the size of the srcIndex
>> stride - stride of a data block
>> strideNum - strideNum of a data block
>> blockNum - block size of data
*/
__global__
void KernelGather(DTYPE * sData, DTYPE * tData, int * sIndex, int stride, int strideNum, int blockNum)
{
int idx = blockDim.x * blockIdx.x + threadIdx.x;
int idy = blockDim.y * blockIdx.y + threadIdx.y;
int blockIndex = idy / stride;
int offsetInBlock = idy % stride;
int size = stride * strideNum * blockNum;
#pragma unroll
for (int i = idx * stride + stride * strideNum * blockIndex + offsetInBlock;
i < stride * strideNum * blockIndex + offsetInBlock + stride * strideNum && i < size;
i += stride * blockDim.x) {
tData[i] = sData[sIndex[i]];
}
}
/*
gather indexed sub-tensors(cuda version)
>> s - the source tensor >> s - the source tensor
>> t - the target tensor >> t - the target tensor
>> srcIndex - the tensor to save the index of the source tensor >> srcIndex - the tensor to save the index of the source tensor
...@@ -117,6 +146,44 @@ void _CudaGather(const XTensor * s, XTensor * t, XTensor * srcIndex) ...@@ -117,6 +146,44 @@ void _CudaGather(const XTensor * s, XTensor * t, XTensor * srcIndex)
BacktoCudaDev(devID, devIDBackup); BacktoCudaDev(devID, devIDBackup);
} }
/*
gather indexed sub-tensors(cuda version)
>> s - the source tensor
>> t - the target tensor
>> srcIndex - the tensor to save the index of the source tensor
>> dim - the leading dimension to define "sub-tensors"
*/
void _CudaGather(const XTensor * s, XTensor * t, XTensor * srcIndex, int dim)
{
int devID = srcIndex->devID;
XMem * mem = s->mem;
int stride = 1;
int blockNum = 1;
int indexSize = srcIndex->unitNum;
int strideNum = srcIndex->dimSize[dim];
for (int i = 0; i < dim; i++)
blockNum *= srcIndex->dimSize[i];
for (int i = dim + 1; i < srcIndex->order; i++)
stride *= srcIndex->dimSize[i];
int * sIndex = NULL;
if (srcIndex->devID < 0) {
sIndex = mem != NULL ?
(int*)mem->AllocBuf(mem->devID, sizeof(int) * indexSize) :
(int*)XMemAlloc(mem->devID, sizeof(int) * indexSize);
XMemCopy(sIndex, devID, srcIndex, -1, sizeof(int) * indexSize);
}
else
sIndex = (int *)srcIndex->data;
int cudaGrids[3];
int cudaBlocks[3];
GDevs.GetCudaThread2D(devID, max(32, strideNum), stride*blockNum, MAX_INT, cudaGrids, cudaBlocks);
KernelGather << <dim3(cudaGrids[0], cudaGrids[1]), dim3(cudaBlocks[0], cudaBlocks[1]) >> > ((DTYPE *)s->data, (DTYPE *)t->data, sIndex, stride, strideNum, blockNum);
}
#endif // USE_CUDA #endif // USE_CUDA
} // namespace nts(NiuTrans.Tensor) } // namespace nts(NiuTrans.Tensor)
\ No newline at end of file
...@@ -32,6 +32,8 @@ namespace nts { // namespace nts(NiuTrans.Tensor) ...@@ -32,6 +32,8 @@ namespace nts { // namespace nts(NiuTrans.Tensor)
/* gather indexed sub-tensors(cuda version) */ /* gather indexed sub-tensors(cuda version) */
void _CudaGather(const XTensor * s, XTensor * t, XTensor * srcIndex); void _CudaGather(const XTensor * s, XTensor * t, XTensor * srcIndex);
void _CudaGather(const XTensor * s, XTensor * t, XTensor * srcIndex,int dim);
#endif // USE_CUDA #endif // USE_CUDA
} // namespace nts(NiuTrans.Tensor) } // namespace nts(NiuTrans.Tensor)
......
...@@ -29,6 +29,9 @@ namespace nts { // namespace nts(NiuTrans.Tensor) ...@@ -29,6 +29,9 @@ namespace nts { // namespace nts(NiuTrans.Tensor)
/* gather selected sub-tensors */ /* gather selected sub-tensors */
void _Gather(const XTensor * s, XTensor * t, XTensor * srcIndex); void _Gather(const XTensor * s, XTensor * t, XTensor * srcIndex);
/* gather selected sub-tensors accoding to the dimension */
void _Gather(const XTensor * s, XTensor * t, XTensor * srcIndex, int dim);
/* gather selected sub-tensors (return an XTensor structure) /* gather selected sub-tensors (return an XTensor structure)
make a new tensor to keep the result and return it */ make a new tensor to keep the result and return it */
XTensor Gather(XTensor &s, XTensor &index); XTensor Gather(XTensor &s, XTensor &index);
......
...@@ -31,6 +31,9 @@ namespace nts{ // namespace nts(NiuTrans.Tensor) ...@@ -31,6 +31,9 @@ namespace nts{ // namespace nts(NiuTrans.Tensor)
/* get the max-valued items along a dimension of the tensor (cuda version) */ /* get the max-valued items along a dimension of the tensor (cuda version) */
void _CudaReduceMax(const XTensor * input, XTensor * output, int dim); void _CudaReduceMax(const XTensor * input, XTensor * output, int dim);
/* get the min-valued items along a dimension of the tensor (cuda version) */
void _CudaReduceMin(const XTensor * input, XTensor * output, int dim);
#endif // USE_CUDA #endif // USE_CUDA
} // namespace nts(NiuTrans.Tensor) } // namespace nts(NiuTrans.Tensor)
......
...@@ -29,14 +29,20 @@ namespace nts{ // namespace nts(NiuTrans.Tensor) ...@@ -29,14 +29,20 @@ namespace nts{ // namespace nts(NiuTrans.Tensor)
/* get the max value of the items along a dimension of the tensor. */ /* get the max value of the items along a dimension of the tensor. */
void _ReduceMax(const XTensor * input, XTensor * output, int dim); void _ReduceMax(const XTensor * input, XTensor * output, int dim);
/* get the min value of the items along a dimension of the tensor. */
void _ReduceMin(const XTensor * input, XTensor * output, int dim);
/* /*
get the max value of the items along a dimension of the tensor (return an XTensor structure) get the max value of the items along a dimension of the tensor (return an XTensor structure)
make a new tensor to keep the result and return it make a new tensor to keep the result and return it
*/ */
XTensor ReduceMax(const XTensor &input, int dim); XTensor ReduceMax(const XTensor &input, int dim);
/* get the max value of the items along a dimension of the tensor. */ /*
void ReduceMax(const XTensor &input, XTensor &output, int dim); get the min value of the items along a dimension of the tensor (return an XTensor structure)
make a new tensor to keep the result and return it
*/
XTensor ReduceMin(const XTensor &input, int dim);
} // namespace nts(NiuTrans.Tensor) } // namespace nts(NiuTrans.Tensor)
......
...@@ -39,8 +39,7 @@ void _ReduceMean(const XTensor * input, XTensor * output, int dim) ...@@ -39,8 +39,7 @@ void _ReduceMean(const XTensor * input, XTensor * output, int dim)
{ {
CheckNTErrors((input->order > dim), "Illegal dimension specified!"); CheckNTErrors((input->order > dim), "Illegal dimension specified!");
int dimRDI = input->order - dim - 1; int num = input->dimSize[dim];
int num = input->dimSizeRDI[dimRDI];
_ReduceSum(input, output, dim); _ReduceSum(input, output, dim);
_ScaleAndShiftMe(output, (DTYPE)1/num, 0); _ScaleAndShiftMe(output, (DTYPE)1/num, 0);
...@@ -112,7 +111,7 @@ void ReduceMean(const XTensor &input, XTensor &output, int dim) ...@@ -112,7 +111,7 @@ void ReduceMean(const XTensor &input, XTensor &output, int dim)
} }
float dr = (!input.isSparse) ? 1.0F : input.denseRatio; float dr = (!input.isSparse) ? 1.0F : input.denseRatio;
InitTensor(&output, order, dimSize, input.dataType, dr, input.devID, input.mem); InitTensorV2(&output, order, dimSize, input.dataType, dr, input.devID, input.mem);
/* destroy variables */ /* destroy variables */
delete[] dimSize; delete[] dimSize;
......
...@@ -22,6 +22,7 @@ ...@@ -22,6 +22,7 @@
#include <math.h> #include <math.h>
#include "ReduceSum.h" #include "ReduceSum.h"
#include "ReduceSum.cuh" #include "ReduceSum.cuh"
#include "../shape/IsSameShaped.h"
#include "../../XName.h" #include "../../XName.h"
#include "../../XBLAS.h" #include "../../XBLAS.h"
#include "VectorBuffer.h" #include "VectorBuffer.h"
...@@ -51,17 +52,16 @@ void _ReduceSum(const XTensor * input, XTensor * output, int dim, const XTensor ...@@ -51,17 +52,16 @@ void _ReduceSum(const XTensor * input, XTensor * output, int dim, const XTensor
CheckNTErrors((input->order == output->order + 1), "Incorrect tensor sizes!"); CheckNTErrors((input->order == output->order + 1), "Incorrect tensor sizes!");
CheckNTErrors((input->order > dim && dim >=0), "Illegal dimension to reduce!"); CheckNTErrors((input->order > dim && dim >=0), "Illegal dimension to reduce!");
CheckNTErrors((input->dataType == output->dataType), "Unmatched data types!"); CheckNTErrors((input->dataType == output->dataType), "Unmatched data types!");
CheckNTErrors((shift == NULL || XTensor::IsSameShaped(output, shift)), "Incorrect shift tensor size!"); CheckNTErrors((shift == NULL || _IsSameShaped(output, shift)), "Incorrect shift tensor size!");
int dimRDI = input->order - dim - 1; CheckNTErrors(dim < input->order, "Wrong dimension!");
CheckNTErrors(dimRDI >= 0, "Wrong dimension!");
for(int i = 0; i < input->order; i++){ for(int i = 0; i < input->order; i++){
if(i < dimRDI){ if(i < dim){
CheckNTErrors((input->dimSizeRDI[i] == output->dimSizeRDI[i]), "Unmatched tensors!"); CheckNTErrors((input->dimSize[i] == output->dimSize[i]), "Unmatched tensors!");
} }
else if(i > dimRDI){ else if(i > dim){
CheckNTErrors((input->dimSizeRDI[i] == output->dimSizeRDI[i - 1]), "Unmatched tensors!"); CheckNTErrors((input->dimSize[i] == output->dimSize[i - 1]), "Unmatched tensors!");
} }
} }
...@@ -74,21 +74,21 @@ void _ReduceSum(const XTensor * input, XTensor * output, int dim, const XTensor ...@@ -74,21 +74,21 @@ void _ReduceSum(const XTensor * input, XTensor * output, int dim, const XTensor
CheckNTErrors((input->dataType == DEFAULT_DTYPE), "TODO!"); CheckNTErrors((input->dataType == DEFAULT_DTYPE), "TODO!");
int stride = 1; int stride = 1;
int strideNum = input->dimSizeRDI[dimRDI]; int strideNum = input->dimSize[dim];
int blockSize = 1; int blockSize = 1;
int blockNum = 1; int blockNum = 1;
for (int i = 0; i < input->order; i++) { for (int i = 0; i < input->order; i++) {
if (i < dimRDI) if (i < dim)
stride *= input->dimSizeRDI[i]; blockNum *= input->dimSize[i];
else if (i > dimRDI) else if (i > dim)
blockNum *= input->dimSizeRDI[i]; stride *= input->dimSize[i];
} }
blockSize = stride * strideNum; blockSize = stride * strideNum;
if(input->dimSizeRDI[0] % (4 * 32 / sizeof(DTYPE)) == 0 && input->dimSizeRDI[0] >= 32){ if(input->dimSize[input->order - 1] % (4 * 32 / sizeof(DTYPE)) == 0 && input->dimSize[input->order - 1] >= 32){
int vecBufLength = 32 / sizeof(DTYPE); int vecBufLength = 32 / sizeof(DTYPE);
if(dimRDI == 0){ if(dim == input->order - 1){
//data is contiguous in dim 0 //data is contiguous in dim 0
for(int i = 0; i < blockNum; i++){ for(int i = 0; i < blockNum; i++){
// stride = 1 // stride = 1
...@@ -122,7 +122,7 @@ void _ReduceSum(const XTensor * input, XTensor * output, int dim, const XTensor ...@@ -122,7 +122,7 @@ void _ReduceSum(const XTensor * input, XTensor * output, int dim, const XTensor
} else{ } else{
//data is separated //data is separated
for(int i = 0; i < blockNum; i++){ for(int i = 0; i < blockNum; i++){
for(int j = 0; j < input->dimSizeRDI[0] / 32; j++){ for(int j = 0; j < input->dimSize[input->order - 1] / 32; j++){
DTYPE * ip = (DTYPE*)input->data + blockSize * i; DTYPE * ip = (DTYPE*)input->data + blockSize * i;
DTYPE * op = (DTYPE*)output->data + stride * i; DTYPE * op = (DTYPE*)output->data + stride * i;
DTYPE * sp = shift != NULL ? (DTYPE*)shift->data + stride * i : NULL; DTYPE * sp = shift != NULL ? (DTYPE*)shift->data + stride * i : NULL;
...@@ -334,7 +334,7 @@ void ReduceSum(const XTensor &input, XTensor &output, int dim, const XTensor &sh ...@@ -334,7 +334,7 @@ void ReduceSum(const XTensor &input, XTensor &output, int dim, const XTensor &sh
} }
float dr = (!input.isSparse) ? 1.0F : input.denseRatio; float dr = (!input.isSparse) ? 1.0F : input.denseRatio;
InitTensor(&output, order, dimSize, input.dataType, dr, input.devID, input.mem); InitTensorV2(&output, order, dimSize, input.dataType, dr, input.devID, input.mem);
/* destroy variables */ /* destroy variables */
delete[] dimSize; delete[] dimSize;
...@@ -429,7 +429,7 @@ void ReduceSum(const XTensor &input, XTensor &output, int dim, DTYPE power, bool ...@@ -429,7 +429,7 @@ void ReduceSum(const XTensor &input, XTensor &output, int dim, DTYPE power, bool
} }
float dr = (!input.isSparse) ? 1.0F : input.denseRatio; float dr = (!input.isSparse) ? 1.0F : input.denseRatio;
InitTensor(&output, order, dimSize, input.dataType, dr, input.devID, input.mem); InitTensorV2(&output, order, dimSize, input.dataType, dr, input.devID, input.mem);
/* destroy variables */ /* destroy variables */
delete[] dimSize; delete[] dimSize;
......
...@@ -692,13 +692,12 @@ void _CudaReduceSum(const XTensor * input, XTensor * output, int dim, const XTen ...@@ -692,13 +692,12 @@ void _CudaReduceSum(const XTensor * input, XTensor * output, int dim, const XTen
CheckNTErrors(input->dataType == output->dataType, "Unmatched data types!"); CheckNTErrors(input->dataType == output->dataType, "Unmatched data types!");
CheckNTErrors(shift == NULL || output->unitNum == shift->unitNum, "Incorrect shift tensor size!"); CheckNTErrors(shift == NULL || output->unitNum == shift->unitNum, "Incorrect shift tensor size!");
int dimRDI = input->order - dim - 1;
for(int i = 0; i < input->order; i++){ for(int i = 0; i < input->order; i++){
if(i < dimRDI){ if(i < dim){
CheckNTErrors(input->dimSizeRDI[i] == output->dimSizeRDI[i], "Unmatched tensors!"); CheckNTErrors(input->dimSize[i] == output->dimSize[i], "Unmatched tensors!");
} }
else if(i > dimRDI){ else if(i > dim){
CheckNTErrors(input->dimSizeRDI[i] == output->dimSizeRDI[i - 1], "Unmatched tensors!"); CheckNTErrors(input->dimSize[i] == output->dimSize[i - 1], "Unmatched tensors!");
} }
} }
...@@ -709,32 +708,24 @@ void _CudaReduceSum(const XTensor * input, XTensor * output, int dim, const XTen ...@@ -709,32 +708,24 @@ void _CudaReduceSum(const XTensor * input, XTensor * output, int dim, const XTen
int cudaBlockSize[3]; int cudaBlockSize[3];
int iter = 0; int iter = 0;
int stride = 1; int stride = 1;
int strideNum = input->dimSizeRDI[dimRDI]; int strideNum = input->dimSize[dim];
int blockSize = 1; int blockSize = 1;
int blockNum = 1; int blockNum = 1;
for (int i = 0; i < input->order; i++) { for (int i = 0; i < input->order; i++) {
if (i < dimRDI) if (i < dim)
stride *= input->dimSizeRDI[i]; blockNum *= input->dimSize[i];
else if (i > dimRDI) else if (i > dim)
blockNum *= input->dimSizeRDI[i]; stride *= input->dimSize[i];
} }
blockSize = stride * strideNum; blockSize = stride * strideNum;
int devID = input->devID; int devID = input->devID;
XMem * mem = input->mem; int devIDBackup;
ProtectCudaDev(devID, devIDBackup);
GDevs.GetCudaThread2D(devID, strideNum, stride * blockNum, MAX_INT, cudaGridSize, cudaBlockSize);
int bufSize = input->unitSize * cudaGridSize[0] * stride * blockNum * 2;
DTYPE * buf = mem != NULL ? (DTYPE*)mem->AllocBuf(mem->devID, bufSize) : (DTYPE*)XMemAlloc(input->devID, bufSize);
DTYPE * buf1 = buf;
DTYPE * buf2 = buf + cudaGridSize[0] * stride * blockNum;
DTYPE * sp = shift != NULL ? (DTYPE*)shift->data : NULL; DTYPE * sp = shift != NULL ? (DTYPE*)shift->data : NULL;
int devIDBackup;
ProtectCudaDev(input->devID, devIDBackup);
if (stride == 1 && blockNum >= 10) { if (stride == 1 && blockNum >= 10) {
dim3 grids; dim3 grids;
dim3 blocks; dim3 blocks;
...@@ -751,7 +742,7 @@ void _CudaReduceSum(const XTensor * input, XTensor * output, int dim, const XTen ...@@ -751,7 +742,7 @@ void _CudaReduceSum(const XTensor * input, XTensor * output, int dim, const XTen
strideNum, blockNum, sp, power, isExp); strideNum, blockNum, sp, power, isExp);
} }
} }
else if (stride != 1 && stride * blockNum > 4096){ else if (stride != 1 && stride * blockNum > 4096) {
//GDevs->GetGridAndBlockSize2D(devID, stride * blockNum, strideNum,MAX_INT, cudaGridSize, cudaBlockSize); //GDevs->GetGridAndBlockSize2D(devID, stride * blockNum, strideNum,MAX_INT, cudaGridSize, cudaBlockSize);
//unsigned int* goutput = (unsigned int *)input->data; //unsigned int* goutput = (unsigned int *)input->data;
//convert2uintV2 << <dim3(cudaGridSize[0], cudaGridSize[1]), dim3(cudaBlockSize[0], cudaBlockSize[1]) >> > ((float*)input->data, goutput, stride, strideNum, blockNum, strideNum*blockNum*stride); //convert2uintV2 << <dim3(cudaGridSize[0], cudaGridSize[1]), dim3(cudaBlockSize[0], cudaBlockSize[1]) >> > ((float*)input->data, goutput, stride, strideNum, blockNum, strideNum*blockNum*stride);
...@@ -761,6 +752,14 @@ void _CudaReduceSum(const XTensor * input, XTensor * output, int dim, const XTen ...@@ -761,6 +752,14 @@ void _CudaReduceSum(const XTensor * input, XTensor * output, int dim, const XTen
strideNum, blockNum,sp, power, isExp); strideNum, blockNum,sp, power, isExp);
} }
else { else {
XMem * mem = input->mem;
GDevs.GetCudaThread2D(devID, strideNum, stride * blockNum, MAX_INT, cudaGridSize, cudaBlockSize);
int bufSize = input->unitSize * cudaGridSize[0] * stride * blockNum * 2;
DTYPE * buf = mem != NULL ? (DTYPE*)mem->AllocBuf(mem->devID, bufSize) : (DTYPE*)XMemAlloc(devID, bufSize);
DTYPE * buf1 = buf;
DTYPE * buf2 = buf + cudaGridSize[0] * stride * blockNum;
do { do {
if (input->dataType == DEFAULT_DTYPE) { if (input->dataType == DEFAULT_DTYPE) {
DTYPE * iData = NULL; DTYPE * iData = NULL;
...@@ -904,13 +903,15 @@ void _CudaReduceSum(const XTensor * input, XTensor * output, int dim, const XTen ...@@ -904,13 +903,15 @@ void _CudaReduceSum(const XTensor * input, XTensor * output, int dim, const XTen
iter++; iter++;
} while (strideNum > 1); } while (strideNum > 1);
}
ProtectCudaDev(input->devID, devIDBackup);
if (mem != NULL) if (mem != NULL)
mem->ReleaseBuf(mem->devID, bufSize); mem->ReleaseBuf(mem->devID, bufSize);
else else
XMemFree(input->devID, buf); XMemFree(devID, buf);
}
BacktoCudaDev(devID, devIDBackup);
} }
#endif // USE_CUDA #endif // USE_CUDA
......
...@@ -49,8 +49,8 @@ DTYPE _ReduceSumAll(const XTensor * source) ...@@ -49,8 +49,8 @@ DTYPE _ReduceSumAll(const XTensor * source)
int dims[2] = {1, source->unitNum}; int dims[2] = {1, source->unitNum};
int one = 1; int one = 1;
XTensor * all = NewTensorBuf(2, dims, source->dataType, source->denseRatio, source->devID, source->mem); XTensor * all = NewTensorBufV2(2, dims, source->dataType, source->denseRatio, source->devID, source->mem);
XTensor * result = NewTensorBuf(1, &one, source->dataType, 1.0F, source->devID, source->mem); XTensor * result = NewTensorBufV2(1, &one, source->dataType, 1.0F, source->devID, source->mem);
_CopyValues(source, all); _CopyValues(source, all);
_ReduceSum(all, result, 1); _ReduceSum(all, result, 1);
...@@ -74,7 +74,7 @@ DTYPE _ReduceSumAll(const XTensor * source) ...@@ -74,7 +74,7 @@ DTYPE _ReduceSumAll(const XTensor * source)
int leadingDim = big->order - 1; int leadingDim = big->order - 1;
int * dimSize; int * dimSize;
dimSize = getDimSize(big, leadingDim); dimSize = getDimSize(big, leadingDim);
XTensor * little = NewTensor(big->order - 1, dimSize, source->dataType, source->denseRatio, XTensor * little = NewTensorV2(big->order - 1, dimSize, source->dataType, source->denseRatio,
source->devID, source->mem); source->devID, source->mem);
_ReduceSum(big, little, leadingDim); _ReduceSum(big, little, leadingDim);
......
...@@ -109,7 +109,7 @@ void ReduceSumSquared(const XTensor &input, XTensor &output, int dim, const XTen ...@@ -109,7 +109,7 @@ void ReduceSumSquared(const XTensor &input, XTensor &output, int dim, const XTen
} }
float dr = (!input.isSparse) ? 1.0F : input.denseRatio; float dr = (!input.isSparse) ? 1.0F : input.denseRatio;
InitTensor(&output, order, dimSize, input.dataType, dr, input.devID, input.mem); InitTensorV2(&output, order, dimSize, input.dataType, dr, input.devID, input.mem);
/* destroy variables */ /* destroy variables */
delete[] dimSize; delete[] dimSize;
......
...@@ -38,8 +38,7 @@ For a 1-dimensional data array a, variance = 1/n * \sum_i (a_i - mean)^2 ...@@ -38,8 +38,7 @@ For a 1-dimensional data array a, variance = 1/n * \sum_i (a_i - mean)^2
*/ */
void _ReduceVariance(const XTensor * input, XTensor * output, int dim, const XTensor * mean) void _ReduceVariance(const XTensor * input, XTensor * output, int dim, const XTensor * mean)
{ {
int dimRDI = input->order - dim - 1; int num = input->dimSize[dim];
int num = input->dimSizeRDI[dimRDI];
_ReduceSum(input, output, dim, mean, 2.0F); _ReduceSum(input, output, dim, mean, 2.0F);
_ScaleAndShiftMe(output, (DTYPE)1 / num, 0); _ScaleAndShiftMe(output, (DTYPE)1 / num, 0);
} }
...@@ -112,7 +111,7 @@ void ReduceVariance(const XTensor &input, XTensor &output, int dim, const XTenso ...@@ -112,7 +111,7 @@ void ReduceVariance(const XTensor &input, XTensor &output, int dim, const XTenso
} }
float dr = (!input.isSparse) ? 1.0F : input.denseRatio; float dr = (!input.isSparse) ? 1.0F : input.denseRatio;
InitTensor(&output, order, dimSize, input.dataType, dr, input.devID, input.mem); InitTensorV2(&output, order, dimSize, input.dataType, dr, input.devID, input.mem);
/* destroy variables */ /* destroy variables */
delete[] dimSize; delete[] dimSize;
......
...@@ -20,7 +20,7 @@ ...@@ -20,7 +20,7 @@
*/ */
#include "VectorBuffer.h" #include "VectorBuffer.h"
#include "math.h"
namespace nts { namespace nts {
/* data size for each buffer */ /* data size for each buffer */
int VectorBuffer::size() int VectorBuffer::size()
...@@ -168,4 +168,12 @@ VectorBuffer VectorBuffer::maxData(const VectorBuffer &a) { ...@@ -168,4 +168,12 @@ VectorBuffer VectorBuffer::maxData(const VectorBuffer &a) {
return *this; return *this;
} }
/* conculte the max of two buffer */
VectorBuffer VectorBuffer::minData(const VectorBuffer &a) {
for (int i = 0; i != a.size(); i++) {
this->values[i] = MIN(a[i], this->values[i]);
}
return *this;
}
}/* end of the nts (NiuTrans.Tensor) namespace */ }/* end of the nts (NiuTrans.Tensor) namespace */
\ No newline at end of file
...@@ -19,8 +19,6 @@ ...@@ -19,8 +19,6 @@
* $Created by: ZHANG Yuhao (email: zhangyuhao@stu.neu.edu.cn) 2019-07-23 * $Created by: ZHANG Yuhao (email: zhangyuhao@stu.neu.edu.cn) 2019-07-23
*/ */
//#include <cstring>
#include <math.h>
#include "../../XGlobal.h" #include "../../XGlobal.h"
namespace nts { namespace nts {
...@@ -49,5 +47,8 @@ public: ...@@ -49,5 +47,8 @@ public:
/* conculte the max of two buffer */ /* conculte the max of two buffer */
VectorBuffer maxData(const VectorBuffer &a); VectorBuffer maxData(const VectorBuffer &a);
/* conculte the max of two buffer */
VectorBuffer minData(const VectorBuffer &a);
}; };
} }
\ No newline at end of file
...@@ -21,6 +21,7 @@ ...@@ -21,6 +21,7 @@
#include "../../XTensor.h" #include "../../XTensor.h"
#include "../../XName.h" #include "../../XName.h"
#include "../shape/IsSameShaped.h"
#include "Concatenate.h" #include "Concatenate.h"
#include "Merge.h" #include "Merge.h"
#include "ConcatenateSolely.h" #include "ConcatenateSolely.h"
...@@ -44,7 +45,7 @@ void _Concatenate(const TensorList * smalls, XTensor * big, int dim) ...@@ -44,7 +45,7 @@ void _Concatenate(const TensorList * smalls, XTensor * big, int dim)
XTensor * a = (XTensor*)smalls->GetItem(i - 1); XTensor * a = (XTensor*)smalls->GetItem(i - 1);
XTensor * b = (XTensor*)smalls->GetItem(i); XTensor * b = (XTensor*)smalls->GetItem(i);
CheckNTErrors((a && b), "Empty input tensors!"); CheckNTErrors((a && b), "Empty input tensors!");
if (!XTensor::IsSameShaped(a, b)) if (!_IsSameShaped(a, b))
uniform = false; uniform = false;
} }
...@@ -76,7 +77,7 @@ XTensor Concatenate(const TensorList &smalls, int dim) ...@@ -76,7 +77,7 @@ XTensor Concatenate(const TensorList &smalls, int dim)
XTensor * a = (XTensor*)smalls.GetItem(i - 1); XTensor * a = (XTensor*)smalls.GetItem(i - 1);
XTensor * b = (XTensor*)smalls.GetItem(i); XTensor * b = (XTensor*)smalls.GetItem(i);
CheckNTErrors((a && b), "Empty input tensors!"); CheckNTErrors((a && b), "Empty input tensors!");
if (!XTensor::IsSameShaped(a, b)) if (!_IsSameShaped(a, b))
uniform = false; uniform = false;
} }
XTensor * tensor = (XTensor*)smalls.GetItem(0); XTensor * tensor = (XTensor*)smalls.GetItem(0);
...@@ -189,7 +190,7 @@ void Concatenate(const TensorList & smalls, XTensor & big, int dim) ...@@ -189,7 +190,7 @@ void Concatenate(const TensorList & smalls, XTensor & big, int dim)
XTensor * a = (XTensor*)smalls.GetItem(i - 1); XTensor * a = (XTensor*)smalls.GetItem(i - 1);
XTensor * b = (XTensor*)smalls.GetItem(i); XTensor * b = (XTensor*)smalls.GetItem(i);
CheckNTErrors((a && b), "Empty input tensors!"); CheckNTErrors((a && b), "Empty input tensors!");
if (!XTensor::IsSameShaped(a, b)) if (!_IsSameShaped(a, b))
uniform = false; uniform = false;
} }
...@@ -207,7 +208,7 @@ void Concatenate(const TensorList & smalls, XTensor & big, int dim) ...@@ -207,7 +208,7 @@ void Concatenate(const TensorList & smalls, XTensor & big, int dim)
} }
float dr = (!tensor->isSparse) ? 1.0F : tensor->denseRatio; float dr = (!tensor->isSparse) ? 1.0F : tensor->denseRatio;
InitTensor(&big, order, dimSize, tensor->dataType, dr, tensor->devID, tensor->mem); InitTensorV2(&big, order, dimSize, tensor->dataType, dr, tensor->devID, tensor->mem);
} }
else { else {
for (int i = 0; i < tensor->order; i++) for (int i = 0; i < tensor->order; i++)
...@@ -222,7 +223,7 @@ void Concatenate(const TensorList & smalls, XTensor & big, int dim) ...@@ -222,7 +223,7 @@ void Concatenate(const TensorList & smalls, XTensor & big, int dim)
dimSize[dim] = catDimSize; dimSize[dim] = catDimSize;
float dr = (!tensor->isSparse) ? 1.0F : tensor->denseRatio; float dr = (!tensor->isSparse) ? 1.0F : tensor->denseRatio;
InitTensor(&big, order, dimSize, tensor->dataType, dr, tensor->devID, tensor->mem); InitTensorV2(&big, order, dimSize, tensor->dataType, dr, tensor->devID, tensor->mem);
} }
/* destroy variables */ /* destroy variables */
delete[] dimSize; delete[] dimSize;
...@@ -290,7 +291,7 @@ XTensor Concatenate(const XTensor &smallA, const XTensor &smallB, int dim) ...@@ -290,7 +291,7 @@ XTensor Concatenate(const XTensor &smallA, const XTensor &smallB, int dim)
XTensor * a = (XTensor*)smalls.Get(i - 1); XTensor * a = (XTensor*)smalls.Get(i - 1);
XTensor * b = (XTensor*)smalls.Get(i); XTensor * b = (XTensor*)smalls.Get(i);
CheckNTErrors((a && b), "Empty input tensors!"); CheckNTErrors((a && b), "Empty input tensors!");
if (!XTensor::IsSameShaped(a, b)) if (!_IsSameShaped(a, b))
uniform = false; uniform = false;
} }
XTensor * tensor = (XTensor*)smalls.Get(0); XTensor * tensor = (XTensor*)smalls.Get(0);
......
...@@ -39,30 +39,29 @@ void _ConcatenateSolely(const TensorList * smalls, XTensor * big, int dim) ...@@ -39,30 +39,29 @@ void _ConcatenateSolely(const TensorList * smalls, XTensor * big, int dim)
CheckNTErrors(big->order > dim && dim >= 0, "Illegal dimension to concatenate!"); CheckNTErrors(big->order > dim && dim >= 0, "Illegal dimension to concatenate!");
int catDimSize = 0; int catDimSize = 0;
int dimRDI = big->order - dim - 1;
for (int i = 0; i < smalls->count; i++) { for (int i = 0; i < smalls->count; i++) {
XTensor * tensor = (XTensor*)smalls->GetItem(i); XTensor * tensor = (XTensor*)smalls->GetItem(i);
CheckNTErrors((big->order == tensor->order), "Unmatched tensor orders!"); CheckNTErrors((big->order == tensor->order), "Unmatched tensor orders!");
for (int j = 0; j < big->order; j++) { for (int j = 0; j < big->order; j++) {
if (j != dimRDI) { if (j != dim) {
CheckNTErrors((big->dimSizeRDI[j] == tensor->dimSizeRDI[j]), "Unmatched tensor sizes!"); CheckNTErrors((big->dimSize[j] == tensor->dimSize[j]), "Unmatched tensor sizes!");
} }
else { else {
catDimSize += tensor->dimSizeRDI[j]; catDimSize += tensor->dimSize[j];
} }
} }
} }
CheckNTErrors((catDimSize == big->dimSizeRDI[dimRDI]), "Unmatched tensor sizes!"); CheckNTErrors((catDimSize == big->dimSize[dim]), "Unmatched tensor sizes!");
int stride = 1; int stride = 1;
for (int i = 0; i < dimRDI; i++)
stride *= big->dimSizeRDI[i];
int blockNum = 1; int blockNum = 1;
for (int i = dimRDI + 1; i < big->order; i++) for (int i = 0; i < dim; i++)
blockNum *= big->dimSizeRDI[i]; blockNum *= big->dimSize[i];
for (int i = dim + 1; i < big->order; i++)
stride *= big->dimSize[i];
int offset = 0; int offset = 0;
...@@ -74,8 +73,8 @@ void _ConcatenateSolely(const TensorList * smalls, XTensor * big, int dim) ...@@ -74,8 +73,8 @@ void _ConcatenateSolely(const TensorList * smalls, XTensor * big, int dim)
if (smalls->count <= MIN_TENSOR_CAT_NUM) { if (smalls->count <= MIN_TENSOR_CAT_NUM) {
for (int i = 0; i < smalls->count; i++) { for (int i = 0; i < smalls->count; i++) {
XTensor * tensor = (XTensor*)smalls->GetItem(i); XTensor * tensor = (XTensor*)smalls->GetItem(i);
int sPitch = stride * tensor->dimSizeRDI[dimRDI] * tensor->unitSize; int sPitch = stride * tensor->dimSize[dim] * tensor->unitSize;
int tPitch = stride * big->dimSizeRDI[dimRDI] * big->unitSize; int tPitch = stride * big->dimSize[dim] * big->unitSize;
int mSize = sPitch; int mSize = sPitch;
int n = blockNum; int n = blockNum;
XMemCopy2D((char*)big->data + offset, tPitch, big->devID, XMemCopy2D((char*)big->data + offset, tPitch, big->devID,
...@@ -89,7 +88,7 @@ void _ConcatenateSolely(const TensorList * smalls, XTensor * big, int dim) ...@@ -89,7 +88,7 @@ void _ConcatenateSolely(const TensorList * smalls, XTensor * big, int dim)
int * blockSizes = new int[smalls->count]; int * blockSizes = new int[smalls->count];
for (int i = 0; i < smalls->count; i++) { for (int i = 0; i < smalls->count; i++) {
XTensor * tensor = (XTensor*)smalls->GetItem(i); XTensor * tensor = (XTensor*)smalls->GetItem(i);
blockSizes[i] = stride * tensor->dimSizeRDI[dimRDI] * tensor->unitSize; blockSizes[i] = stride * tensor->dimSize[dim] * tensor->unitSize;
sourceArrays->Add((char*)tensor->data); sourceArrays->Add((char*)tensor->data);
} }
......
/* NiuTrans.Tensor - an open-source tensor library
* Copyright (C) 2017, Natural Language Processing Lab, Northestern University.
* All rights reserved.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
/*
* $Created by: LI Yinqiao (email: li.yin.qiao.2012@hotmail.com) 2019-10-22
*/
#include "../../XTensor.h"
#include "IsSameShaped.h"
namespace nts { // namespace nts(NiuTrans.Tensor)
/*
check whether the two matrices are in the same type and size
>> a - input tensor
>> b - anther tensor to compare with
<< return - whether the two input tensors are identical
*/
bool _IsSameShaped(const XTensor * a, const XTensor * b)
{
if(a == NULL || b == NULL)
return false;
if(a->order != b->order)
return false;
for(int i = 0; i < a->order; i++){
if(a->dimSize[i] != b->dimSize[i])
return false;
}
if(a->dataType != b->dataType)
return false;
if(a->denseRatio != b->denseRatio)
return false;
if(a->isSparse != b->isSparse)
return false;
return true;
}
/*
check whether the two matrices are in the same type and size
>> a - input tensor
>> b - anther tensor to compare with
<< return - whether the two input tensors are identical
*/
bool IsSameShaped(const XTensor & a, const XTensor & b)
{
return _IsSameShaped(&a, &b);
}
/*
check whether the three matrices are in the same type and size
>> a - input tensor
>> b - anther tensor to compare with
>> c - a tensor again
<< return - whether the two input tensors are identical
*/
bool _IsSameShaped(const XTensor * a, const XTensor * b, const XTensor * c)
{
return IsSameShaped(a, b) && IsSameShaped(a, c);
}
/*
check whether the three matrices are in the same type and size
>> a - input tensor
>> b - anther tensor to compare with
>> c - a tensor again
<< return - whether the two input tensors are identical
*/
bool IsSameShaped(const XTensor & a, const XTensor & b, const XTensor & c)
{
return _IsSameShaped(&a, &b, &c);
}
} // namespace nts(NiuTrans.Tensor)
\ No newline at end of file
/* NiuTrans.Tensor - an open-source tensor library
* Copyright (C) 2017, Natural Language Processing Lab, Northestern University.
* All rights reserved.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
/*
* $Created by: LI Yinqiao (email: li.yin.qiao.2012@hotmail.com) 2019-10-22
*/
#ifndef __ISSAMESHAPED_H__
#define __ISSAMESHAPED_H__
#include "../../XTensor.h"
namespace nts { // namespace nts(NiuTrans.Tensor)
/* judge whether the two matrices are in the same type and size */
bool _IsSameShaped(const XTensor * a, const XTensor * b);
/* judge whether the two matrices are in the same type and size */
bool IsSameShaped(const XTensor & a, const XTensor & b);
/* judge whether the three matrices are in the same type and size */
bool _IsSameShaped(const XTensor * a, const XTensor * b, const XTensor * c);
/* judge whether the three matrices are in the same type and size */
bool IsSameShaped(const XTensor & a, const XTensor & b, const XTensor & c);
} // namespace nts(NiuTrans.Tensor)
#endif // __ISSAMESHAPED_H__
\ No newline at end of file
...@@ -22,6 +22,7 @@ ...@@ -22,6 +22,7 @@
#include "../../XTensor.h" #include "../../XTensor.h"
#include "../../XUtility.h" #include "../../XUtility.h"
#include "../../XName.h" #include "../../XName.h"
#include "../shape/IsSameShaped.h"
#include "Merge.h" #include "Merge.h"
#include "MakeMergeBlockIndex.h" #include "MakeMergeBlockIndex.h"
#include "../movement/CopyBlocksOnSite.h" #include "../movement/CopyBlocksOnSite.h"
...@@ -45,10 +46,8 @@ void _Merge(const XTensor * s, XTensor * t, int whereToMerge, int leadingDim) ...@@ -45,10 +46,8 @@ void _Merge(const XTensor * s, XTensor * t, int whereToMerge, int leadingDim)
if(leadingDim < 0) if(leadingDim < 0)
leadingDim = 0; leadingDim = 0;
int whereToMergeRDI = s->order - whereToMerge - 1; if (leadingDim >= s->order)
int leadingDimRDI = s->order - leadingDim - 1; leadingDim = leadingDim - s->order;
if (leadingDimRDI < 0)
leadingDimRDI = s->order - 1;
CheckNTErrors((s != NULL && t != NULL), "Invalid tensors!"); CheckNTErrors((s != NULL && t != NULL), "Invalid tensors!");
CheckNTErrors((s->devID == t->devID || (s->devID < 0 && t->devID < 0)), CheckNTErrors((s->devID == t->devID || (s->devID < 0 && t->devID < 0)),
...@@ -56,19 +55,20 @@ void _Merge(const XTensor * s, XTensor * t, int whereToMerge, int leadingDim) ...@@ -56,19 +55,20 @@ void _Merge(const XTensor * s, XTensor * t, int whereToMerge, int leadingDim)
CheckNTErrors((s->unitNum == t->unitNum && s->unitSize == t->unitSize), "Unmatched tensors!"); CheckNTErrors((s->unitNum == t->unitNum && s->unitSize == t->unitSize), "Unmatched tensors!");
CheckNTErrors((s->order == t->order + 1), "Unmatched tensors!"); CheckNTErrors((s->order == t->order + 1), "Unmatched tensors!");
CheckNTErrors((leadingDimRDI > whereToMergeRDI), "Invalid leading dimension!"); CheckNTErrors((leadingDim < whereToMerge), "Invalid leading dimension!");
for (int i = 0; i < s->order; i++) { for (int i = 0; i < s->order; i++) {
if (i == whereToMergeRDI) { if (i == whereToMerge) {
CheckNTErrors((t->dimSizeRDI[i] == s->dimSizeRDI[i] * s->dimSizeRDI[leadingDimRDI]),
CheckNTErrors((t->dimSize[i - 1] == s->dimSize[i] * s->dimSize[leadingDim]),
"Unmatched tensor sizes!"); "Unmatched tensor sizes!");
} }
else if (i < leadingDimRDI){ else if (i < leadingDim){
CheckNTErrors((s->dimSizeRDI[i] == t->dimSizeRDI[i]), CheckNTErrors((s->dimSize[i] == t->dimSize[i]),
"Unmatched tensor sizes!"); "Unmatched tensor sizes!");
} }
else if (i > leadingDimRDI) { else if (i > leadingDim) {
CheckNTErrors((s->dimSizeRDI[i] == t->dimSizeRDI[i - 1]), CheckNTErrors((s->dimSize[i] == t->dimSize[i - 1]),
"Unmatched tensor sizes!"); "Unmatched tensor sizes!");
} }
} }
...@@ -77,14 +77,14 @@ void _Merge(const XTensor * s, XTensor * t, int whereToMerge, int leadingDim) ...@@ -77,14 +77,14 @@ void _Merge(const XTensor * s, XTensor * t, int whereToMerge, int leadingDim)
int blockNum = 1; int blockNum = 1;
int gridSize = 1; int gridSize = 1;
int gridNum = 1; int gridNum = 1;
int mergedNum = s->dimSizeRDI[leadingDimRDI]; int mergedNum = s->dimSize[leadingDim];
for (int i = 0; i < s->order; i++) { for (int i = 0; i < s->order; i++) {
if (i <= leadingDimRDI) { if (i >= leadingDim) {
if (i <= whereToMergeRDI) if (i >= whereToMerge)
blockSize *= s->dimSizeRDI[i]; blockSize *= s->dimSize[i];
else else
blockNum *= s->dimSizeRDI[i]; blockNum *= s->dimSize[i];
} }
} }
...@@ -121,7 +121,7 @@ void _Merge(const XTensor * s, XTensor * t, int whereToMerge, int leadingDim) ...@@ -121,7 +121,7 @@ void _Merge(const XTensor * s, XTensor * t, int whereToMerge, int leadingDim)
if (!isOnSameDevice) if (!isOnSameDevice)
dataTMP = mem != NULL ? mem->AllocBuf(mem->devID, size) : XMemAlloc(mem->devID, size); dataTMP = mem != NULL ? mem->AllocBuf(mem->devID, size) : XMemAlloc(mem->devID, size);
int blockNumInMerge = s->dimSizeRDI[leadingDimRDI]; int blockNumInMerge = s->dimSize[leadingDim];
int splitSizeInGrid = gridSize / blockNumInMerge; int splitSizeInGrid = gridSize / blockNumInMerge;
int realBlockSize = blockSize * t->unitSize; int realBlockSize = blockSize * t->unitSize;
...@@ -254,7 +254,7 @@ void Merge(const XTensor &s, XTensor &t, int whereToMerge, int leadingDim) ...@@ -254,7 +254,7 @@ void Merge(const XTensor &s, XTensor &t, int whereToMerge, int leadingDim)
} }
float dr = (!s.isSparse) ? 1.0F : s.denseRatio; float dr = (!s.isSparse) ? 1.0F : s.denseRatio;
InitTensor(&t, order, dimSize, s.dataType, dr, s.devID, s.mem); InitTensorV2(&t, order, dimSize, s.dataType, dr, s.devID, s.mem);
/* destroy variables */ /* destroy variables */
delete[] dimSize; delete[] dimSize;
...@@ -310,12 +310,11 @@ void _Merge(const TensorList * smalls, XTensor * t, int whereToMerge) ...@@ -310,12 +310,11 @@ void _Merge(const TensorList * smalls, XTensor * t, int whereToMerge)
int mergedNum = smalls->count; int mergedNum = smalls->count;
XTensor * s0 = smalls->GetItem(0); XTensor * s0 = smalls->GetItem(0);
int whereToMergeRDI = s0->order - whereToMerge - 1;
for (int i = 0; i < s0->order; i++) { for (int i = 0; i < s0->order; i++) {
if (i <= whereToMergeRDI) if (i >= whereToMerge)
blockSize *= s0->dimSizeRDI[i]; blockSize *= s0->dimSize[i];
else else
blockNum *= s0->dimSizeRDI[i]; blockNum *= s0->dimSize[i];
} }
CheckNTErrors((s0->unitNum % (blockSize * blockNum) == 0), "Incorrect size!"); CheckNTErrors((s0->unitNum % (blockSize * blockNum) == 0), "Incorrect size!");
...@@ -433,7 +432,7 @@ merge two tensors into a big tensor (return an XTensor structure) ...@@ -433,7 +432,7 @@ merge two tensors into a big tensor (return an XTensor structure)
*/ */
XTensor Merge(const XTensor &smallA, const XTensor &smallB, int whereToMerge) XTensor Merge(const XTensor &smallA, const XTensor &smallB, int whereToMerge)
{ {
CheckNTErrors(XTensor::IsSameShaped(&smallA, &smallB), CheckNTErrors(IsSameShaped(smallA, smallB),
"The two tensors must be of the same size!"); "The two tensors must be of the same size!");
int order = smallA.order; int order = smallA.order;
......
...@@ -46,8 +46,6 @@ void Merge(const TensorList &smalls, XTensor &t, int whereToMerge); ...@@ -46,8 +46,6 @@ void Merge(const TensorList &smalls, XTensor &t, int whereToMerge);
/* merge two tensors into a big tensor (return an XTensor structure) */ /* merge two tensors into a big tensor (return an XTensor structure) */
XTensor Merge(const XTensor &smallA, const XTensor &smallB, int whereToMerge); XTensor Merge(const XTensor &smallA, const XTensor &smallB, int whereToMerge);
void Merge(const XTensor &smallA, const XTensor &smallB, XTensor &t, int whereToMerge);
} // namespace nts(NiuTrans.Tensor) } // namespace nts(NiuTrans.Tensor)
#endif // __MERGE_H__ #endif // __MERGE_H__
\ No newline at end of file
...@@ -22,6 +22,7 @@ ...@@ -22,6 +22,7 @@
#include "../../XTensor.h" #include "../../XTensor.h"
#include "../../XName.h" #include "../../XName.h"
#include "../movement/CopyValues.h" #include "../movement/CopyValues.h"
#include "../shape/IsSameShaped.h"
#include "Reshape.h" #include "Reshape.h"
namespace nts { // namespace nts(NiuTrans.Tensor) namespace nts { // namespace nts(NiuTrans.Tensor)
...@@ -52,8 +53,8 @@ XTensor Reshape(XTensor &s, int order, int * dimSize) ...@@ -52,8 +53,8 @@ XTensor Reshape(XTensor &s, int order, int * dimSize)
void Reshape(XTensor &s, XTensor &t, int order, int * dimSize) void Reshape(XTensor &s, XTensor &t, int order, int * dimSize)
{ {
if (!t.isInit || !XTensor::IsSameShaped(&t, &s)) { if (!t.isInit || !IsSameShaped(t, s)) {
InitTensor(&t, &s); InitTensorV2(&t, &s);
} }
/* call Reshape function */ /* call Reshape function */
......
...@@ -31,7 +31,7 @@ ...@@ -31,7 +31,7 @@
namespace nts { // namespace nts(NiuTrans.Tensor) namespace nts { // namespace nts(NiuTrans.Tensor)
/* /*
transform a tensor by splitting it, e.g., (N, M) -> (3, N/3, M) transform a tensor by splitting it, e.g., (N, M) -> (N/3, M, 3)
>> s - the source tensor >> s - the source tensor
>> t - the target tensor (for return) >> t - the target tensor (for return)
...@@ -46,23 +46,22 @@ void _Split(const XTensor * s, XTensor * t, int whereToSplit, int splitNum) ...@@ -46,23 +46,22 @@ void _Split(const XTensor * s, XTensor * t, int whereToSplit, int splitNum)
CheckNTErrors((s->unitNum == t->unitNum && s->unitSize == t->unitSize), "Unmatched tensors!"); CheckNTErrors((s->unitNum == t->unitNum && s->unitSize == t->unitSize), "Unmatched tensors!");
CheckNTErrors((s->order == t->order - 1), "Unmatched tensors!"); CheckNTErrors((s->order == t->order - 1), "Unmatched tensors!");
CheckNTErrors((t->dimSizeRDI[t->order - 1] == splitNum), "Incorrect tensor sizes!"); CheckNTErrors((t->dimSize[0] == splitNum), "Incorrect tensor sizes!");
int whereToSplitRDI = s->order - whereToSplit - 1;
for (int i = 0; i < s->order; i++) { for (int i = 0; i < s->order; i++) {
if (i == whereToSplitRDI) { if (i == whereToSplit) {
CheckNTErrors((s->dimSizeRDI[i] == t->dimSizeRDI[i] * splitNum), CheckNTErrors((s->dimSize[i] == t->dimSize[i + 1] * splitNum),
"Unmatched tensor sizes!"); "Unmatched tensor sizes!");
} }
else { else {
CheckNTErrors((s->dimSizeRDI[i] == t->dimSizeRDI[i]), CheckNTErrors((s->dimSize[i] == t->dimSize[i + 1]),
"Unmatched tensor sizes!"); "Unmatched tensor sizes!");
} }
} }
/* for the case that we split the last dimension. Actually /* for the case that we split the last dimension. Actually
(N, M) and (3, N/3, M) have the same memory layout */ (N, M) and (N, M/3, 3) have the same memory layout */
if (s->order - 1 == whereToSplitRDI) { if (0 == whereToSplit) {
XMemCopy(t->data, t->devID, s->data, s->devID, s->unitNum * s->unitSize); XMemCopy(t->data, t->devID, s->data, s->devID, s->unitNum * s->unitSize);
return; return;
} }
...@@ -70,14 +69,14 @@ void _Split(const XTensor * s, XTensor * t, int whereToSplit, int splitNum) ...@@ -70,14 +69,14 @@ void _Split(const XTensor * s, XTensor * t, int whereToSplit, int splitNum)
int blockSize = 1; int blockSize = 1;
int blockNum = 1; int blockNum = 1;
for (int i = 0; i < s->order; i++) { for (int i = 0; i < s->order; i++) {
if (i == whereToSplitRDI) { if (i == whereToSplit) {
blockSize *= s->dimSizeRDI[i] / splitNum; blockSize *= s->dimSize[i] / splitNum;
blockNum *= splitNum; blockNum *= splitNum;
} }
else if (i < whereToSplitRDI) else if (i > whereToSplit)
blockSize *= s->dimSizeRDI[i]; blockSize *= s->dimSize[i];
else else
blockNum *= s->dimSizeRDI[i]; blockNum *= s->dimSize[i];
} }
CheckNTErrors((blockNum % splitNum == 0), "Incorrect split number!"); CheckNTErrors((blockNum % splitNum == 0), "Incorrect split number!");
...@@ -184,7 +183,7 @@ bool CheckSplitSize(const XTensor * s, const XTensor * t, int whereToSplit, int ...@@ -184,7 +183,7 @@ bool CheckSplitSize(const XTensor * s, const XTensor * t, int whereToSplit, int
} }
/* /*
transform a tensor by splitting it, e.g., (N, M) -> (3, N/3, M) (return an XTensor structure) transform a tensor by splitting it, e.g., (N, M) -> (N/3, M, 3) (return an XTensor structure)
make a new tensor to keep the result and return it make a new tensor to keep the result and return it
>> s - the source tensor >> s - the source tensor
...@@ -244,7 +243,7 @@ void Split(const XTensor &s, XTensor &t, int whereToSplit, int splitNum) ...@@ -244,7 +243,7 @@ void Split(const XTensor &s, XTensor &t, int whereToSplit, int splitNum)
} }
float dr = (!s.isSparse) ? 1.0F : s.denseRatio; float dr = (!s.isSparse) ? 1.0F : s.denseRatio;
InitTensor(&t, order, dimSize, s.dataType, dr, s.devID, s.mem); InitTensorV2(&t, order, dimSize, s.dataType, dr, s.devID, s.mem);
/* destroy variables */ /* destroy variables */
delete[] dimSize; delete[] dimSize;
...@@ -276,7 +275,6 @@ void _Split(const XTensor * big, TensorList * smalls, int whereToSplit, int spli ...@@ -276,7 +275,6 @@ void _Split(const XTensor * big, TensorList * smalls, int whereToSplit, int spli
CheckNTErrors((smalls->count == splitNum), "Unmatched tensors!"); CheckNTErrors((smalls->count == splitNum), "Unmatched tensors!");
CheckNTErrors((smalls->count > 0), "Wrong input!"); CheckNTErrors((smalls->count > 0), "Wrong input!");
int whereToSplitRDI = big->order - whereToSplit - 1;
bool uniform = true; bool uniform = true;
for (int i = 0; i < smalls->count; i++) { for (int i = 0; i < smalls->count; i++) {
...@@ -292,14 +290,14 @@ void _Split(const XTensor * big, TensorList * smalls, int whereToSplit, int spli ...@@ -292,14 +290,14 @@ void _Split(const XTensor * big, TensorList * smalls, int whereToSplit, int spli
int blockSize = 1; int blockSize = 1;
int blockNum = 1; int blockNum = 1;
for (int i = 0; i < big->order; i++) { for (int i = 0; i < big->order; i++) {
if (i == whereToSplitRDI) { if (i == whereToSplit) {
blockSize *= big->dimSizeRDI[i] / splitNum; blockSize *= big->dimSize[i] / splitNum;
blockNum *= splitNum; blockNum *= splitNum;
} }
else if (i < whereToSplitRDI) else if (i > whereToSplit)
blockSize *= big->dimSizeRDI[i]; blockSize *= big->dimSize[i];
else else
blockNum *= big->dimSizeRDI[i]; blockNum *= big->dimSize[i];
} }
CheckNTErrors((blockNum % splitNum == 0), "Incorrect split number!"); CheckNTErrors((blockNum % splitNum == 0), "Incorrect split number!");
......
...@@ -21,6 +21,7 @@ ...@@ -21,6 +21,7 @@
#include "Squeeze.h" #include "Squeeze.h"
#include "../movement/CopyValues.h" #include "../movement/CopyValues.h"
#include "../shape/IsSameShaped.h"
#include "../../XName.h" #include "../../XName.h"
namespace nts{ // namespace nts(NiuTrans.Tensor) namespace nts{ // namespace nts(NiuTrans.Tensor)
...@@ -37,7 +38,7 @@ void _Squeeze(XTensor * source, XTensor * target, int leadingDim) ...@@ -37,7 +38,7 @@ void _Squeeze(XTensor * source, XTensor * target, int leadingDim)
{ {
int order = target->order; int order = target->order;
CheckNTErrors(XTensor::IsSameShaped(source, target), CheckNTErrors(_IsSameShaped(source, target),
"The source and target tensor must be of the same size!"); "The source and target tensor must be of the same size!");
CheckNTErrors(leadingDim >= -1 && leadingDim < order, CheckNTErrors(leadingDim >= -1 && leadingDim < order,
"Wrong leading dimension"); "Wrong leading dimension");
...@@ -130,8 +131,8 @@ XTensor Squeeze(XTensor & source, int leadingDim) ...@@ -130,8 +131,8 @@ XTensor Squeeze(XTensor & source, int leadingDim)
void Squeeze(XTensor & source, XTensor & target, int leadingDim) void Squeeze(XTensor & source, XTensor & target, int leadingDim)
{ {
if (!target.isInit || !XTensor::IsSameShaped(&source, &target)) { if (!target.isInit || !IsSameShaped(source, target)) {
InitTensor(&target, &source); InitTensorV2(&target, &source);
} }
/* call _Squeeze function */ /* call _Squeeze function */
......
...@@ -20,6 +20,7 @@ ...@@ -20,6 +20,7 @@
*/ */
#include "Stack.h" #include "Stack.h"
#include "IsSameShaped.h"
#include "../../XUtility.h" #include "../../XUtility.h"
#include "../../XName.h" #include "../../XName.h"
...@@ -37,7 +38,7 @@ void _Stack(const TensorList * smalls, XTensor * t, int dim) ...@@ -37,7 +38,7 @@ void _Stack(const TensorList * smalls, XTensor * t, int dim)
for (int i = 1; i < count; i++) { for (int i = 1; i < count; i++) {
XTensor * tmp1 = smalls->GetItem(i); XTensor * tmp1 = smalls->GetItem(i);
XTensor * tmp2 = smalls->GetItem(i-1); XTensor * tmp2 = smalls->GetItem(i-1);
CheckNTErrors(XTensor::IsSameShaped(tmp1, tmp2), "The input tensor must be same size!"); CheckNTErrors(_IsSameShaped(tmp1, tmp2), "The input tensor must be same size!");
} }
int blockSize = 1; int blockSize = 1;
...@@ -165,7 +166,7 @@ void Stack(const TensorList &smalls, XTensor &t, int dim) ...@@ -165,7 +166,7 @@ void Stack(const TensorList &smalls, XTensor &t, int dim)
} }
float dr = (!tensor->isSparse) ? 1.0F : tensor->denseRatio; float dr = (!tensor->isSparse) ? 1.0F : tensor->denseRatio;
InitTensor(&t, order, dimSize, tensor->dataType, dr, tensor->devID, tensor->mem); InitTensorV2(&t, order, dimSize, tensor->dataType, dr, tensor->devID, tensor->mem);
/* destroy variables */ /* destroy variables */
delete[] dimSize; delete[] dimSize;
......
...@@ -42,16 +42,15 @@ void _Unsqueeze(const XTensor * a, XTensor * b, int dim, int dSize) ...@@ -42,16 +42,15 @@ void _Unsqueeze(const XTensor * a, XTensor * b, int dim, int dSize)
CheckNTErrors((a->order == b->order - 1), "Unmatched tensors!"); CheckNTErrors((a->order == b->order - 1), "Unmatched tensors!");
CheckNTErrors((a->unitSize == b->unitSize), "Unmatched tensors!"); CheckNTErrors((a->unitSize == b->unitSize), "Unmatched tensors!");
int dimRDI = b->order - dim - 1;
for (int i = 0; i < b->order; i++) { for (int i = 0; i < b->order; i++) {
if (i < dimRDI) { if (i < dim) {
CheckNTErrors((a->dimSizeRDI[i] == b->dimSizeRDI[i]), "Unmatched tensors!"); CheckNTErrors((a->dimSize[i] == b->dimSize[i]), "Unmatched tensors!");
} }
else if (i > dimRDI) { else if (i > dim) {
CheckNTErrors((a->dimSizeRDI[i - 1] == b->dimSizeRDI[i]), "Unmatched tensors!"); CheckNTErrors((a->dimSize[i - 1] == b->dimSize[i]), "Unmatched tensors!");
} }
else { else {
CheckNTErrors((dSize == b->dimSizeRDI[i]), "Unmatched tensors!"); CheckNTErrors((dSize == b->dimSize[i]), "Unmatched tensors!");
} }
} }
...@@ -60,8 +59,8 @@ void _Unsqueeze(const XTensor * a, XTensor * b, int dim, int dSize) ...@@ -60,8 +59,8 @@ void _Unsqueeze(const XTensor * a, XTensor * b, int dim, int dSize)
int blockNumA = 1; int blockNumA = 1;
int blockNumB = 1; int blockNumB = 1;
for (int i = 0; i < dimRDI; i++) for (int i = dim; i < a->order; i++)
blockSize *= a->dimSizeRDI[i]; blockSize *= a->dimSize[i];
realBlockSize = blockSize * a->unitSize; realBlockSize = blockSize * a->unitSize;
...@@ -184,7 +183,7 @@ void Unsqueeze(const XTensor &a, XTensor &b, int dim, int dSize) ...@@ -184,7 +183,7 @@ void Unsqueeze(const XTensor &a, XTensor &b, int dim, int dSize)
} }
float dr = (!a.isSparse) ? 1.0F : a.denseRatio; float dr = (!a.isSparse) ? 1.0F : a.denseRatio;
InitTensor(&b, order, dimSize, a.dataType, dr, a.devID, a.mem); InitTensorV2(&b, order, dimSize, a.dataType, dr, a.devID, a.mem);
/* destroy variables */ /* destroy variables */
delete[] dimSize; delete[] dimSize;
......
...@@ -235,9 +235,8 @@ void _CudaUnsqueeze(const XTensor * a, XTensor * b, int dim, int dSize) ...@@ -235,9 +235,8 @@ void _CudaUnsqueeze(const XTensor * a, XTensor * b, int dim, int dSize)
int blockSize = 1; int blockSize = 1;
int blockNumA = 1; int blockNumA = 1;
int blockNumB = 1; int blockNumB = 1;
int dimRDI = b->order - dim - 1; for (int i = dim; i < a->order; i++)
for (int i = 0; i < dimRDI; i++) blockSize *= a->dimSize[i];
blockSize *= a->dimSizeRDI[i];
blockNumA = a->unitNum / blockSize; blockNumA = a->unitNum / blockSize;
blockNumB = b->unitNum / blockSize; blockNumB = b->unitNum / blockSize;
...@@ -250,7 +249,7 @@ void _CudaUnsqueeze(const XTensor * a, XTensor * b, int dim, int dSize) ...@@ -250,7 +249,7 @@ void _CudaUnsqueeze(const XTensor * a, XTensor * b, int dim, int dSize)
int devIDBackup = 0; int devIDBackup = 0;
ProtectCudaDev(a->devID, devIDBackup); ProtectCudaDev(a->devID, devIDBackup);
if (dimRDI == 0) { if (dim == b->order - 1) {
GDevs.GetCudaThread2D(a->devID, dSize, blockNumA, MAX_INT, cudaGrids, cudaBlocks); GDevs.GetCudaThread2D(a->devID, dSize, blockNumA, MAX_INT, cudaGrids, cudaBlocks);
if (a->dataType == X_FLOAT && b->dataType == X_FLOAT) { if (a->dataType == X_FLOAT && b->dataType == X_FLOAT) {
......
...@@ -22,6 +22,8 @@ ...@@ -22,6 +22,8 @@
#include <math.h> #include <math.h>
#include "../../XTensor.h" #include "../../XTensor.h"
#include "../movement/CopyValues.h" #include "../movement/CopyValues.h"
#include "../shape/IsSameShaped.h"
#include "../utilities/SetAscendingOrder.h"
#include "../../XUtility.h" #include "../../XUtility.h"
#include "../../XName.h" #include "../../XName.h"
#include "Sort.h" #include "Sort.h"
...@@ -40,14 +42,13 @@ void _Sort(const XTensor * a, XTensor * b, XTensor * index, int dim) ...@@ -40,14 +42,13 @@ void _Sort(const XTensor * a, XTensor * b, XTensor * index, int dim)
{ {
dim = MODX(dim, a->order); dim = MODX(dim, a->order);
CheckNTErrors((XTensor::IsSameShaped(a, b)), "Input tensors should have the same type!"); CheckNTErrors((_IsSameShaped(a, b)), "Input tensors should have the same type!");
CheckNTErrors((dim >= 0 && dim < a->order), "Incorrect dimension specified!"); CheckNTErrors((dim >= 0 && dim < a->order), "Incorrect dimension specified!");
CheckNTErrors((a->order == index->order), "Unmatched input tensors!"); CheckNTErrors((a->order == index->order), "Unmatched input tensors!");
CheckNTErrors((index->dataType == X_INT), "Wrong data type!"); CheckNTErrors((index->dataType == X_INT), "Wrong data type!");
int dimRDI = a->order - dim - 1;
/* make the index tensor */ /* make the index tensor */
index->SetAscendingOrder(dim); SetAscendingOrder(*index, dim);
if (a->devID >= 0) { if (a->devID >= 0) {
#ifdef USE_CUDA #ifdef USE_CUDA
...@@ -58,13 +59,13 @@ void _Sort(const XTensor * a, XTensor * b, XTensor * index, int dim) ...@@ -58,13 +59,13 @@ void _Sort(const XTensor * a, XTensor * b, XTensor * index, int dim)
} }
else { else {
int stride = 1; int stride = 1;
int strideNum = a->dimSizeRDI[dimRDI];
for (int i = 0; i < dimRDI; i++)
stride *= a->dimSizeRDI[i];
int blockNum = 1; int blockNum = 1;
for (int i = dimRDI + 1; i < a->order; i++) int strideNum = a->dimSize[dim];
blockNum *= a->dimSizeRDI[i]; for (int i = 0; i < dim; i++)
blockNum *= a->dimSize[i];
for (int i = dim + 1; i < a->order; i++)
stride *= a->dimSize[i];
int blockSize = stride * strideNum; int blockSize = stride * strideNum;
_CopyValues(a, b); _CopyValues(a, b);
......
...@@ -217,20 +217,19 @@ void _CudaSortBig(const XTensor * a, XTensor * b, XTensor * indexA, XTensor * in ...@@ -217,20 +217,19 @@ void _CudaSortBig(const XTensor * a, XTensor * b, XTensor * indexA, XTensor * in
CheckNTErrors((a->order > dim && dim >= 0), "Incorrect dimension specified!"); CheckNTErrors((a->order > dim && dim >= 0), "Incorrect dimension specified!");
CheckNTErrors((a->dataType == DEFAULT_DTYPE), "TODO!"); CheckNTErrors((a->dataType == DEFAULT_DTYPE), "TODO!");
int dimRDI = a->order - dim - 1; if (k < 0 || k > b->dimSize[dim])
if (k < 0 || k > b->dimSizeRDI[dimRDI]) k = b->dimSize[dim];
k = b->dimSizeRDI[dimRDI];
XMem * mem = a->mem; XMem * mem = a->mem;
int stride = 1; int stride = 1;
int strideNum = a->dimSizeRDI[dimRDI];
for (int i = 0; i < dimRDI; i++)
stride *= a->dimSizeRDI[i];
int blockNum = 1; int blockNum = 1;
for (int i = dimRDI + 1; i < a->order; i++) int strideNum = a->dimSize[dim];
blockNum *= a->dimSizeRDI[i]; for (int i = 0; i < dim; i++)
blockNum *= a->dimSize[i];
for (int i = dim + 1; i < a->order; i++)
stride *= a->dimSize[i];
int m = GetNextPower2(strideNum); int m = GetNextPower2(strideNum);
int n = stride * blockNum; int n = stride * blockNum;
......
...@@ -45,15 +45,14 @@ void _TopK(const XTensor * a, XTensor * b, XTensor * index, int dim, int k) ...@@ -45,15 +45,14 @@ void _TopK(const XTensor * a, XTensor * b, XTensor * index, int dim, int k)
CheckNTErrors(index == NULL || a->order == index->order, "Unmatched input tensors!"); CheckNTErrors(index == NULL || a->order == index->order, "Unmatched input tensors!");
CheckNTErrors(index->dataType == X_INT, "Wrong data type!"); CheckNTErrors(index->dataType == X_INT, "Wrong data type!");
int dimRDI = a->order - dim - 1;
for (int i = 0; i < a->order; i++) { for (int i = 0; i < a->order; i++) {
if (i == dimRDI) { if (i == dim) {
CheckNTErrors(b->dimSizeRDI[i] == k, "A too large K"); CheckNTErrors((b->dimSize[i] == k), "A too large K");
CheckNTErrors(index == NULL || index->dimSizeRDI[i] == k, "Wrong size!"); CheckNTErrors((index == NULL || index->dimSize[i] == k), "Wrong size!");
} }
else { else {
CheckNTErrors(b->dimSizeRDI[i] == a->dimSizeRDI[i], "Wrong size!"); CheckNTErrors((b->dimSize[i] == a->dimSize[i]), "Wrong size!");
CheckNTErrors(index == NULL || index->dimSizeRDI[i] == a->dimSizeRDI[i], "Wrong size!"); CheckNTErrors((index == NULL || index->dimSize[i] == a->dimSize[i]), "Wrong size!");
} }
} }
...@@ -68,14 +67,14 @@ void _TopK(const XTensor * a, XTensor * b, XTensor * index, int dim, int k) ...@@ -68,14 +67,14 @@ void _TopK(const XTensor * a, XTensor * b, XTensor * index, int dim, int k)
CheckNTErrors((a->dataType == DEFAULT_DTYPE), "TODO!"); CheckNTErrors((a->dataType == DEFAULT_DTYPE), "TODO!");
int stride = 1; int stride = 1;
int strideNumA = a->dimSizeRDI[dimRDI];
int strideNumB = b->dimSizeRDI[dimRDI];
for (int i = 0; i < dimRDI; i++)
stride *= a->dimSizeRDI[i];
int blockNum = 1; int blockNum = 1;
for (int i = dimRDI + 1; i < a->order; i++) int strideNumA = a->dimSize[dim];
blockNum *= a->dimSizeRDI[i]; int strideNumB = b->dimSize[dim];
for (int i = 0; i < dim; i++)
blockNum *= a->dimSize[i];
for (int i = dim + 1; i < a->order; i++)
stride *= a->dimSize[i];
int blockSizeA = stride * strideNumA; int blockSizeA = stride * strideNumA;
int blockSizeB = stride * strideNumB; int blockSizeB = stride * strideNumB;
......
...@@ -22,6 +22,7 @@ ...@@ -22,6 +22,7 @@
#include "../../XDevice.h" #include "../../XDevice.h"
#include "../../XUtility.h" #include "../../XUtility.h"
#include "../../XTensor.h" #include "../../XTensor.h"
#include "../utilities/SetAscendingOrder.h"
#include "TopK.h" #include "TopK.h"
#include "TopK.cuh" #include "TopK.cuh"
#include "Sort.cuh" #include "Sort.cuh"
...@@ -811,15 +812,14 @@ void _CudaTopK(const XTensor * a, XTensor * b, XTensor * index, int dim, int k) ...@@ -811,15 +812,14 @@ void _CudaTopK(const XTensor * a, XTensor * b, XTensor * index, int dim, int k)
CheckNTErrors((index->dataType == X_INT), "Wrong data type!"); CheckNTErrors((index->dataType == X_INT), "Wrong data type!");
CheckNTErrors((b->dimSize[dim] == k), "A too large K"); CheckNTErrors((b->dimSize[dim] == k), "A too large K");
int dimRDI = a->order - dim - 1;
int stride = 1; int stride = 1;
int strideNumA = a->dimSizeRDI[dimRDI];
for (int i = 0; i < dimRDI; i++)
stride *= a->dimSizeRDI[i];
int blockNum = 1; int blockNum = 1;
for (int i = dimRDI + 1; i < a->order; i++) int strideNumA = a->dimSize[dim];
blockNum *= a->dimSizeRDI[i]; for (int i = 0; i < dim; i++)
blockNum *= a->dimSize[i];
for (int i = dim + 1; i < a->order; i++)
stride *= a->dimSize[i];
int workerNum = blockNum < 16 ? 64 : 32; int workerNum = blockNum < 16 ? 64 : 32;
/* adjust the thread num according size of k for fitting the share memory size */ /* adjust the thread num according size of k for fitting the share memory size */
...@@ -862,7 +862,7 @@ void _CudaTopK(const XTensor * a, XTensor * b, XTensor * index, int dim, int k) ...@@ -862,7 +862,7 @@ void _CudaTopK(const XTensor * a, XTensor * b, XTensor * index, int dim, int k)
//indexA->data = a->mem != NULL ? a->mem->AllocBuf(a->devID, a->unitNum * sizeof(int)) : XMemAlloc(a->devID, a->unitNum * sizeof(int)); //indexA->data = a->mem != NULL ? a->mem->AllocBuf(a->devID, a->unitNum * sizeof(int)) : XMemAlloc(a->devID, a->unitNum * sizeof(int));
/* make the index tensor */ /* make the index tensor */
//indexA->SetAscendingOrder(dim); //SetAscendingOrder(*indexA, dim);
//_CudaSortBig(a, b, indexA, index, dim, k); //_CudaSortBig(a, b, indexA, index, dim, k);
......
/* NiuTrans.Tensor - an open-source tensor library
* Copyright (C) 2017, Natural Language Processing Lab, Northestern University.
* All rights reserved.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
/*
* $Created by: LI Yinqiao (email: li.yin.qiao.2012@hotmail.com) 2019-10-22
*/
#include "../../XTensor.h"
#include "../../XUtility.h"
#include "CheckData.h"
namespace nts { // namespace nts(NiuTrans.Tensor)
/* compare two numbers */
bool IsFloatEqual(DTYPE a, DTYPE b, float absError, float relError)
{
if(a == b)
return true;
if(fabs(a - b) < absError)
return true;
if(fabs(a) < fabs(b))
return (fabs((a - b) / b) < relError) ? true : false;
else
return (fabs((a - b) / a) < relError) ? true : false;
}
/* check whether the data array is the same as the answer
>> tensor - input tensor
>> d - input data (it must be on CPUs)
>> num - number of data items
>> beg - where we start this in the data array of the tensor
*/
bool _CheckData(const XTensor * tensor, const void * d, int num, int beg)
{
if (tensor->data == NULL || d == NULL)
return false;
CheckNTErrors(!tensor->isSparse, "TODO");
CheckNTErrors(num == tensor->unitNum - beg, "Illegal size!");
if (tensor->devID < 0) {
return !memcmp(tensor->data, d, num * tensor->unitSize);
}
#ifdef USE_CUDA
else {
char * copy = new char[num * tensor->unitSize];
XMemCopy(copy, -1, tensor->data, tensor->devID, num * tensor->unitSize);
int cmpResult = memcmp(copy, d, num * tensor->unitSize);
bool result = (cmpResult == 0) ? true : false;
delete[] copy;
return result;
}
#endif
return true;
}
/* check whether the data array is the same as the answer
>> tensor - input tensor
>> d - input data (it must be on CPUs)
>> num - number of data items
>> tolerance - error value we tolerant between result and answer
>> beg - where we start this in the data array of the tensor
*/bool _CheckData(const XTensor * tensor, const void * d, int num, float tolerance, int beg)
{
if (tensor->data == NULL || d == NULL)
return false;
CheckNTErrors(!tensor->isSparse, "TODO");
CheckNTErrors(num == tensor->unitNum - beg, "Illegal size!");
DTYPE * valuePrt = (DTYPE*)tensor->data;
DTYPE value = 0;
DTYPE * answerPrt = (DTYPE*)d;
for (int i = beg; i < num; i++) {
value = ToCPU(tensor->devID, valuePrt);
if(IsFloatEqual(value, *answerPrt, tolerance, 1e-4F) == false)
return false;
valuePrt++;
answerPrt++;
}
return true;
}
} // namespace nts(NiuTrans.Tensor)
\ No newline at end of file
/* NiuTrans.Tensor - an open-source tensor library
* Copyright (C) 2017, Natural Language Processing Lab, Northestern University.
* All rights reserved.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
/*
* $Created by: LI Yinqiao (email: li.yin.qiao.2012@hotmail.com) 2019-10-22
*/
#ifndef __CHECKDATA_H__
#define __CHECKDATA_H__
#include "../../XTensor.h"
namespace nts { // namespace nts(NiuTrans.Tensor)
/* check whether the data array is the same as the answer */
bool _CheckData(const XTensor * tensor, const void * answer, int num, int beg = 0);
/* check whether the data array is the same as the answer */
bool _CheckData(const XTensor * tensor, const void * answer, int num, float tolerance, int beg = 0);
} // namespace nts(NiuTrans.Tensor)
#endif // __CHECKDATA_H__
\ No newline at end of file
/* NiuTrans.Tensor - an open-source tensor library
* Copyright (C) 2017, Natural Language Processing Lab, Northestern University.
* All rights reserved.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
/*
* $Created by: LI Yinqiao (email: li.yin.qiao.2012@hotmail.com) 2019-10-23
*/
#include "../../XTensor.h"
#include "SetAscendingOrder.cuh"
#include "SetAscendingOrder.h"
namespace nts { // namespace nts(NiuTrans.Tensor)
/*
set the cell to the ascending order along a given dimension
>> tensor - input tensor
>> dim - the dimension specified
*/
void SetAscendingOrder(XTensor & tensor, int dim)
{
CheckNTErrors(dim < tensor.order, "Wrong dimension specified!");
CheckNTErrors(tensor.dataType == X_INT, "TODO!");
if(dim < 0){
int o = tensor.order;
int ds[MAX_TENSOR_DIM_NUM];
memcpy(ds, tensor.dimSize, sizeof(int) * tensor.order);
tensor.Reshape(tensor.unitNum);
SetAscendingOrder(tensor, 0);
tensor.Reshape(o, ds);
return;
}
if(tensor.devID >= 0){
#ifdef USE_CUDA
CudaSetAscendingOrder(&tensor, dim);
#else
ShowNTErrors("Plesae specify USE_CUDA and recompile the code!");
#endif
}
else{
int stride = 1;
int blockNum = 1;
int strideNum = tensor.dimSize[dim];
for(int i = 0; i < dim; i++)
blockNum *= tensor.dimSize[i];
for(int i = dim + 1; i < tensor.order; i++)
stride *= tensor.dimSize[i];
for(int k = 0; k < blockNum; k++){
for(int j = 0; j < strideNum; j++){
int * d = (int*)tensor.data + stride * strideNum * k + stride * j;
for(int i = 0; i < stride; i++)
d[i] = j;
}
}
}
}
} // namespace nts(NiuTrans.Tensor)
\ No newline at end of file
...@@ -67,15 +67,14 @@ void CudaSetAscendingOrder(XTensor * a, int dim) ...@@ -67,15 +67,14 @@ void CudaSetAscendingOrder(XTensor * a, int dim)
{ {
CheckNTErrors((a->dataType == X_INT), "TODO!"); CheckNTErrors((a->dataType == X_INT), "TODO!");
int dimRDI = a->order - dim - 1;
int stride = 1; int stride = 1;
int strideNum = a->dimSizeRDI[dimRDI];
for(int i = 0; i < dimRDI; i++)
stride *= a->dimSizeRDI[i];
int blockNum = 1; int blockNum = 1;
for(int i = dimRDI + 1; i < a->order; i++) int strideNum = a->dimSize[dim];
blockNum *= a->dimSizeRDI[i]; for(int i = 0; i < dim; i++)
blockNum *= a->dimSize[i];
for(int i = dim + 1; i < a->order; i++)
stride *= a->dimSize[i];
int gridSize[3]; int gridSize[3];
int blockSize[3]; int blockSize[3];
......
/* NiuTrans.Tensor - an open-source tensor library
* Copyright (C) 2017, Natural Language Processing Lab, Northestern University.
* All rights reserved.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
/*
* $Created by: LI Yinqiao (email: li.yin.qiao.2012@hotmail.com) 2019-10-23
*/
#ifndef __SETASCENDINGORDER_H__
#define __SETASCENDINGORDER_H__
#include "../../XTensor.h"
namespace nts { // namespace nts(NiuTrans.Tensor)
/* set the cell to the ascending order along a given dimension */
void SetAscendingOrder(XTensor & tensor, int dim);
} // namespace nts(NiuTrans.Tensor)
#endif // __SETASCENDINGORDER_H__
\ No newline at end of file
...@@ -68,7 +68,7 @@ void _Dropout(const XTensor * x, XTensor * y, unsigned int seed, DTYPE dropProb, ...@@ -68,7 +68,7 @@ void _Dropout(const XTensor * x, XTensor * y, unsigned int seed, DTYPE dropProb,
for (int i = 0; i < unitNum; i++) for (int i = 0; i < unitNum; i++)
maskArray[i] = RandomBernoulli(dropProb, scaleFactor); maskArray[i] = RandomBernoulli(dropProb, scaleFactor);
XTensor * mask = NewTensor1D(unitNum, x->dataType, x->devID, x->mem); XTensor * mask = NewTensor1DV2(unitNum, x->dataType, x->devID, x->mem);
mask->SetData(maskArray, unitNum); mask->SetData(maskArray, unitNum);
/* call Multiply function for mask */ /* call Multiply function for mask */
...@@ -113,7 +113,7 @@ void _DropoutBackward(const XTensor * y, const XTensor * x, ...@@ -113,7 +113,7 @@ void _DropoutBackward(const XTensor * y, const XTensor * x,
for (int i = 0; i < unitNum; i++) for (int i = 0; i < unitNum; i++)
maskArray[i] = RandomBernoulli(dropProb, scaleFactor); maskArray[i] = RandomBernoulli(dropProb, scaleFactor);
XTensor * mask = NewTensor1D(unitNum, x->dataType, x->devID, x->mem); XTensor * mask = NewTensor1DV2(unitNum, x->dataType, x->devID, x->mem);
mask->SetData(maskArray, unitNum); mask->SetData(maskArray, unitNum);
/* call MultiplyDim function for mask */ /* call MultiplyDim function for mask */
...@@ -149,13 +149,12 @@ XTensor Dropout(const XTensor &x, DTYPE dropProb, int leadingDim, int leadingDim ...@@ -149,13 +149,12 @@ XTensor Dropout(const XTensor &x, DTYPE dropProb, int leadingDim, int leadingDim
CheckNTErrors(dropProb >= 0.0 && dropProb <= 1.0, "The probability must be 0-1!"); CheckNTErrors(dropProb >= 0.0 && dropProb <= 1.0, "The probability must be 0-1!");
XTensor mask; XTensor mask;
// int * maskArrayInt = NULL;
DTYPE * maskArray = NULL; DTYPE * maskArray = NULL;
DTYPE scaleFactor = (DTYPE)1.0 / ((DTYPE)1.0 - dropProb); DTYPE scaleFactor = (DTYPE)1.0 / ((DTYPE)1.0 - dropProb);
if(leadingDim < 0 && leadingDim2 < 0){ if(leadingDim < 0 && leadingDim2 < 0){
XTensor mask; XTensor mask;
InitTensor(&mask, &x); InitTensorV2(&mask, &x);
_SetDataRandP(&mask, 0, 1.0F, dropProb, scaleFactor); _SetDataRandP(&mask, 0, 1.0F, dropProb, scaleFactor);
...@@ -169,7 +168,7 @@ XTensor Dropout(const XTensor &x, DTYPE dropProb, int leadingDim, int leadingDim ...@@ -169,7 +168,7 @@ XTensor Dropout(const XTensor &x, DTYPE dropProb, int leadingDim, int leadingDim
maskArrayInt[i] = rand() % x.unitNum; maskArrayInt[i] = rand() % x.unitNum;
XTensor maskindex; XTensor maskindex;
InitTensor1D(&maskindex, unitNum, X_INT, x.devID, x.mem); InitTensor1DV2(&maskindex, unitNum, X_INT, x.devID, x.mem);
maskindex.SetData(maskArrayInt, unitNum); maskindex.SetData(maskArrayInt, unitNum);
...@@ -192,7 +191,7 @@ XTensor Dropout(const XTensor &x, DTYPE dropProb, int leadingDim, int leadingDim ...@@ -192,7 +191,7 @@ XTensor Dropout(const XTensor &x, DTYPE dropProb, int leadingDim, int leadingDim
maskArray[i] = RandomBernoulli(dropProb, scaleFactor); maskArray[i] = RandomBernoulli(dropProb, scaleFactor);
XTensor mask; XTensor mask;
InitTensor1D(&mask, unitNum, x.dataType, x.devID, x.mem); InitTensor1DV2(&mask, unitNum, x.dataType, x.devID, x.mem);
mask.SetData(maskArray, unitNum); mask.SetData(maskArray, unitNum);
delete[] maskArray; delete[] maskArray;
...@@ -221,7 +220,7 @@ XTensor Dropout(const XTensor &x, DTYPE dropProb, int leadingDim, int leadingDim ...@@ -221,7 +220,7 @@ XTensor Dropout(const XTensor &x, DTYPE dropProb, int leadingDim, int leadingDim
dims[n] = x.GetDim(n); dims[n] = x.GetDim(n);
dims[m] = x.GetDim(m); dims[m] = x.GetDim(m);
InitTensor(&mask, x.order, dims, x.dataType, x.denseRatio,x.devID, x.mem); InitTensorV2(&mask, x.order, dims, x.dataType, x.denseRatio,x.devID, x.mem);
mask.SetData(maskArray, unitNum); mask.SetData(maskArray, unitNum);
delete[] maskArray; delete[] maskArray;
...@@ -251,7 +250,7 @@ XTensor DropoutWithoutBroadcast(const XTensor &x, DTYPE dropProb) ...@@ -251,7 +250,7 @@ XTensor DropoutWithoutBroadcast(const XTensor &x, DTYPE dropProb)
maskArray[i] = RandomBernoulli(dropProb, scaleFactor); maskArray[i] = RandomBernoulli(dropProb, scaleFactor);
XTensor mask; XTensor mask;
InitTensor(&mask, x.order, x.dimSize, x.dataType, x.denseRatio, x.devID, x.mem); InitTensorV2(&mask, x.order, x.dimSize, x.dataType, x.denseRatio, x.devID, x.mem);
mask.SetData(maskArray, unitNum); mask.SetData(maskArray, unitNum);
delete[] maskArray; delete[] maskArray;
......
...@@ -68,7 +68,7 @@ XTensor DropoutWithIndex(const XTensor &x, XTensor &maskIndex, DTYPE scale) ...@@ -68,7 +68,7 @@ XTensor DropoutWithIndex(const XTensor &x, XTensor &maskIndex, DTYPE scale)
dimSize[i] = x.dimSize[i]; dimSize[i] = x.dimSize[i];
} }
InitTensor1D(&c, x.unitNum, x.dataType, x.devID, x.mem); InitTensor1DV2(&c, x.unitNum, x.dataType, x.devID, x.mem);
_SetDataFixedFloat(&c, 1.0F); _SetDataFixedFloat(&c, 1.0F);
......
...@@ -21,6 +21,7 @@ ...@@ -21,6 +21,7 @@
#include <stdlib.h> #include <stdlib.h>
#include "../XName.h" #include "../XName.h"
#include "../../tensor/core/shape/IsSameShaped.h"
#include "HardTanH.h" #include "HardTanH.h"
#include "HardTanH.cuh" #include "HardTanH.cuh"
...@@ -36,7 +37,7 @@ y = 1 if x > 1 ...@@ -36,7 +37,7 @@ y = 1 if x > 1
*/ */
void _HardTanH(const XTensor * x, XTensor * y) void _HardTanH(const XTensor * x, XTensor * y)
{ {
CheckNTErrors(XTensor::IsSameShaped(x, y), CheckNTErrors(_IsSameShaped(x, y),
"The input tensor and output tensor must have the same shape!") "The input tensor and output tensor must have the same shape!")
#ifdef USE_CUDA #ifdef USE_CUDA
...@@ -87,8 +88,8 @@ XTensor HardTanH(const XTensor &x) ...@@ -87,8 +88,8 @@ XTensor HardTanH(const XTensor &x)
void HardTanH(const XTensor &x, XTensor &y) void HardTanH(const XTensor &x, XTensor &y)
{ {
if (!y.isInit || !XTensor::IsSameShaped(&y, &x)) { if (!y.isInit || !IsSameShaped(y, x)) {
InitTensor(&y, &x); InitTensorV2(&y, &x);
} }
/* call _HardTanH function */ /* call _HardTanH function */
......
...@@ -23,6 +23,7 @@ ...@@ -23,6 +23,7 @@
#include "../XName.h" #include "../XName.h"
#include "../XUtility.h" #include "../XUtility.h"
#include "../core/movement/CopyValues.h" #include "../core/movement/CopyValues.h"
#include "../core/shape/IsSameShaped.h"
namespace nts{ // namespace nts(NiuTrans.Tensor) namespace nts{ // namespace nts(NiuTrans.Tensor)
...@@ -33,7 +34,7 @@ identity function y = x ...@@ -33,7 +34,7 @@ identity function y = x
*/ */
void _Identity(const XTensor * x, XTensor * y) void _Identity(const XTensor * x, XTensor * y)
{ {
CheckNTErrors(XTensor::IsSameShaped(x, y), CheckNTErrors(_IsSameShaped(x, y),
"The input tensor and output tensor must have the same shape!") "The input tensor and output tensor must have the same shape!")
_CopyValues(x, y); _CopyValues(x, y);
} }
...@@ -63,8 +64,8 @@ XTensor Identity(const XTensor &x) ...@@ -63,8 +64,8 @@ XTensor Identity(const XTensor &x)
void Identity(const XTensor &x, XTensor &y) void Identity(const XTensor &x, XTensor &y)
{ {
if (!y.isInit || !y.IsSameShaped(&y, &x)) { if (!y.isInit || !IsSameShaped(y, x)) {
InitTensor(&y, &x); InitTensorV2(&y, &x);
} }
/* call _Identity function */ /* call _Identity function */
......
...@@ -27,6 +27,7 @@ ...@@ -27,6 +27,7 @@
#include "../core/reduce/ReduceSum.h" #include "../core/reduce/ReduceSum.h"
#include "../core/reduce/ReduceMax.h" #include "../core/reduce/ReduceMax.h"
#include "../core/movement/CopyValues.h" #include "../core/movement/CopyValues.h"
#include "../core/shape/IsSameShaped.h"
namespace nts { // namespace nts(NiuTrans.Tensor) namespace nts { // namespace nts(NiuTrans.Tensor)
...@@ -49,7 +50,6 @@ void _LogSoftmax(const XTensor * x, XTensor * y, int leadDim) ...@@ -49,7 +50,6 @@ void _LogSoftmax(const XTensor * x, XTensor * y, int leadDim)
return; return;
} }
int leadDimRDI = x->order - leadDim - 1;
if (!x->isSparse && !y->isSparse && if (!x->isSparse && !y->isSparse &&
x->dataType == DEFAULT_DTYPE && y->dataType == DEFAULT_DTYPE) x->dataType == DEFAULT_DTYPE && y->dataType == DEFAULT_DTYPE)
{ {
...@@ -69,36 +69,36 @@ void _LogSoftmax(const XTensor * x, XTensor * y, int leadDim) ...@@ -69,36 +69,36 @@ void _LogSoftmax(const XTensor * x, XTensor * y, int leadDim)
XTensor * blockMax = NULL; XTensor * blockMax = NULL;
XTensor * blockSum = NULL; XTensor * blockSum = NULL;
int dimensionSize = y->dimSizeRDI[leadDimRDI]; int dimensionSize = y->dimSize[leadDim];
int stride = 1; int stride = 1;
int blockSize = 1; int blockSize = 1;
int blockNum = 1; int blockNum = 1;
for (int i = 0; i < leadDimRDI; i++) for (int i = leadDim + 1; i < x->order; i++)
stride *= y->dimSizeRDI[i]; stride *= y->dimSize[i];
blockSize = stride * dimensionSize; blockSize = stride * dimensionSize;
blockNum = y->unitNum / blockSize; blockNum = y->unitNum / blockSize;
max = NewTensorBuf(x->order - 1, dimSize, x->dataType, x->denseRatio, x->devID, mem); max = NewTensorBufV2(x->order - 1, dimSize, x->dataType, x->denseRatio, x->devID, mem);
sum = NewTensorBuf(x->order - 1, dimSize, x->dataType, x->denseRatio, x->devID, mem); sum = NewTensorBufV2(x->order - 1, dimSize, x->dataType, x->denseRatio, x->devID, mem);
_ReduceMax(x, max, leadDim); _ReduceMax(x, max, leadDim);
_ReduceSum(x, sum, leadDim, max, 1.0F, true); _ReduceSum(x, sum, leadDim, max, 1.0F, true);
if (x->devID >= 0) { if (x->devID >= 0) {
if(leadDimRDI == 0){ if(leadDim == x->order - 1){
blockSize = y->unitNum; blockSize = y->unitNum;
blockNum = 1; blockNum = 1;
blockx = NewTensor2D(blockSize/dimensionSize, -dimensionSize, x->dataType, x->devID, mem); blockx = NewTensor2DV2(blockSize/dimensionSize, -dimensionSize, x->dataType, x->devID, mem);
blocky = NewTensor2D(blockSize/dimensionSize, -dimensionSize, x->dataType, x->devID, mem); blocky = NewTensor2DV2(blockSize/dimensionSize, -dimensionSize, x->dataType, x->devID, mem);
blockMax = NewTensor2D(blockSize/dimensionSize, -1, x->dataType, x->devID, mem); blockMax = NewTensor2DV2(blockSize/dimensionSize, -1, x->dataType, x->devID, mem);
blockSum = NewTensor2D(blockSize/dimensionSize, -1, x->dataType, x->devID, mem); blockSum = NewTensor2DV2(blockSize/dimensionSize, -1, x->dataType, x->devID, mem);
} }
else{ else{
blockx = NewTensor2D(-stride, dimensionSize, x->dataType, x->devID, mem); blockx = NewTensor2DV2(-stride, dimensionSize, x->dataType, x->devID, mem);
blocky = NewTensor2D(-stride, dimensionSize, x->dataType, x->devID, mem); blocky = NewTensor2DV2(-stride, dimensionSize, x->dataType, x->devID, mem);
blockMax = NewTensor2D(-stride, 1, x->dataType, x->devID, mem); blockMax = NewTensor2DV2(-stride, 1, x->dataType, x->devID, mem);
blockSum = NewTensor2D(-stride, 1, x->dataType, x->devID, mem); blockSum = NewTensor2DV2(-stride, 1, x->dataType, x->devID, mem);
} }
} }
...@@ -137,7 +137,7 @@ void _LogSoftmax(const XTensor * x, XTensor * y, int leadDim) ...@@ -137,7 +137,7 @@ void _LogSoftmax(const XTensor * x, XTensor * y, int leadDim)
blockMax->data = mp; blockMax->data = mp;
blockSum->data = sp; blockSum->data = sp;
#ifdef USE_CUDA #ifdef USE_CUDA
if(leadDimRDI == 0) if(leadDim == x->order - 1)
_CudaLogSoftmaxSumMax(blockx, blocky, 1, blockSum, blockMax); _CudaLogSoftmaxSumMax(blockx, blocky, 1, blockSum, blockMax);
else else
_CudaLogSoftmaxSumMax(blockx, blocky, leadDim, blockSum, blockMax); _CudaLogSoftmaxSumMax(blockx, blocky, leadDim, blockSum, blockMax);
...@@ -210,8 +210,8 @@ void LogSoftmax(const XTensor &x, XTensor &y, int leadDim) ...@@ -210,8 +210,8 @@ void LogSoftmax(const XTensor &x, XTensor &y, int leadDim)
if (ld < 0) if (ld < 0)
ld = x.order - 1; ld = x.order - 1;
if (!y.isInit || !XTensor::IsSameShaped(&y, &x)) { if (!y.isInit || !IsSameShaped(y, x)) {
InitTensor(&y, &x); InitTensorV2(&y, &x);
} }
/* call _LogSoftmax function */ /* call _LogSoftmax function */
...@@ -298,7 +298,6 @@ void _LogSoftmaxBackward(XTensor * gold, XTensor * y, XTensor * x, ...@@ -298,7 +298,6 @@ void _LogSoftmaxBackward(XTensor * gold, XTensor * y, XTensor * x,
if(leadDim < 0) if(leadDim < 0)
leadDim = y->order - 1; leadDim = y->order - 1;
int leadDimRDI = y->order - leadDim - 1;
#ifdef USE_CUDA #ifdef USE_CUDA
if (gold->devID >= 0) { if (gold->devID >= 0) {
_CudaLogSoftmaxBackward(gold, y, x, dedy, dedx, padding, leadDim, lossName); _CudaLogSoftmaxBackward(gold, y, x, dedy, dedx, padding, leadDim, lossName);
...@@ -306,12 +305,12 @@ void _LogSoftmaxBackward(XTensor * gold, XTensor * y, XTensor * x, ...@@ -306,12 +305,12 @@ void _LogSoftmaxBackward(XTensor * gold, XTensor * y, XTensor * x,
} }
#endif #endif
int dimensionSize = y->dimSizeRDI[leadDimRDI]; int dimensionSize = y->dimSize[leadDim];
int stride = 1; int stride = 1;
int blockSize = 1; int blockSize = 1;
int blockNum = 1; int blockNum = 1;
for (int i = 0; i < leadDimRDI; i++) for (int i = leadDim + 1; i < y->order; i++)
stride *= y->dimSizeRDI[i]; stride *= y->dimSize[i];
blockSize = stride * dimensionSize; blockSize = stride * dimensionSize;
blockNum = y->unitNum / blockSize; blockNum = y->unitNum / blockSize;
...@@ -338,10 +337,10 @@ void _LogSoftmaxBackward(XTensor * gold, XTensor * y, XTensor * x, ...@@ -338,10 +337,10 @@ void _LogSoftmaxBackward(XTensor * gold, XTensor * y, XTensor * x,
int key = gold->GetKeyInSparse(i); int key = gold->GetKeyInSparse(i);
DTYPE value = gold->GetInSparse(i); DTYPE value = gold->GetInSparse(i);
int offset = key; int offset = key;
if (dedx->dimSizeRDI[0] != gm) { if (dedx->dimSize[dedx->order - 1] != gm) {
int mi = key % gm; int mi = key % gm;
int ni = key / gm; int ni = key / gm;
int key2 = ni * dedx->dimSizeRDI[0] + mi; int key2 = ni * dedx->dimSize[dedx->order - 1] + mi;
offset = key2; offset = key2;
} }
if (key >= 0 && key < size) if (key >= 0 && key < size)
...@@ -352,7 +351,7 @@ void _LogSoftmaxBackward(XTensor * gold, XTensor * y, XTensor * x, ...@@ -352,7 +351,7 @@ void _LogSoftmaxBackward(XTensor * gold, XTensor * y, XTensor * x,
} }
} }
else { else {
CheckNTErrors((XTensor::IsSameShaped(gold, y)), "The tensors must be of the same size!"); CheckNTErrors((_IsSameShaped(gold, y)), "The tensors must be of the same size!");
for (int k = 0; k < blockNum; k++) { for (int k = 0; k < blockNum; k++) {
gp = (DTYPE*)gold->data + k * blockSize; gp = (DTYPE*)gold->data + k * blockSize;
op = (DTYPE*)y->data + k * blockSize; op = (DTYPE*)y->data + k * blockSize;
...@@ -395,10 +394,10 @@ void _LogSoftmaxBackward(XTensor * gold, XTensor * y, XTensor * x, ...@@ -395,10 +394,10 @@ void _LogSoftmaxBackward(XTensor * gold, XTensor * y, XTensor * x,
int key = gold->GetKeyInSparse(i); int key = gold->GetKeyInSparse(i);
DTYPE value = gold->GetInSparse(i); DTYPE value = gold->GetInSparse(i);
int offset = key; int offset = key;
if (dedx->dimSizeRDI[0] != gm) { if (dedx->dimSize[dedx->order - 1] != gm) {
int mi = key % gm; int mi = key % gm;
int ni = key / gm; int ni = key / gm;
int key2 = ni * dedx->dimSizeRDI[0] + mi; int key2 = ni * dedx->dimSize[dedx->order - 1] + mi;
offset = key2; offset = key2;
} }
if (key >= 0 && key < size) if (key >= 0 && key < size)
...@@ -406,7 +405,7 @@ void _LogSoftmaxBackward(XTensor * gold, XTensor * y, XTensor * x, ...@@ -406,7 +405,7 @@ void _LogSoftmaxBackward(XTensor * gold, XTensor * y, XTensor * x,
} }
} }
else { else {
CheckNTErrors((XTensor::IsSameShaped(gold, y)), "The tensors must be of the same size!"); CheckNTErrors((_IsSameShaped(gold, y)), "The tensors must be of the same size!");
for (int k = 0; k < blockNum; k++) { for (int k = 0; k < blockNum; k++) {
gp = (DTYPE*)gold->data + k * blockSize; gp = (DTYPE*)gold->data + k * blockSize;
op = (DTYPE*)y->data + k * blockSize; op = (DTYPE*)y->data + k * blockSize;
...@@ -430,11 +429,11 @@ void _LogSoftmaxBackward(XTensor * gold, XTensor * y, XTensor * x, ...@@ -430,11 +429,11 @@ void _LogSoftmaxBackward(XTensor * gold, XTensor * y, XTensor * x,
/* for columns with no xs we set dE/ds = 0 */ /* for columns with no xs we set dE/ds = 0 */
if (gold != NULL && gold->isSparse) { if (gold != NULL && gold->isSparse) {
CheckNTErrors((gold->order == 2), "The gold standard tensor must be of order 2!"); CheckNTErrors((gold->order == 2), "The gold standard tensor must be of order 2!");
if ((gold->dimSize[1] > 1 && !gold->isAllValued[0]) || gold->dimSize[1] != dedx->dimSizeRDI[0]) { if ((gold->dimSize[1] > 1 && !gold->isAllValued[0]) || gold->dimSize[1] != dedx->dimSize[dedx->order - 1]) {
int gn = gold->dimSize[0]; int gn = gold->dimSize[0];
int gm = gold->dimSize[1]; int gm = gold->dimSize[1];
int sm = dedx->dimSizeRDI[0]; int sm = dedx->dimSize[dedx->order - 1];
int sn = dedx->dimSizeRDI[1]; int sn = dedx->dimSize[dedx->order - 2];
int * flags = new int[sm]; int * flags = new int[sm];
memset(flags, 0, sizeof(int)*sm); memset(flags, 0, sizeof(int)*sm);
......
Markdown 格式
0%
您添加了 0 到此讨论。请谨慎行事。
请先完成此评论的编辑!
注册 或者 后发表评论