Commit 0e585782 by xuchen

Merge with liyinqiao branch and add stack function.

parent 93bc3158
......@@ -76,8 +76,8 @@ void BackwardTest()
c.enableGrad = false;
XTensor mean;
XTensor origin;
InitTensor2D(&a, 2, 3);
InitTensor1D(&b, 2);
InitTensor2DV2(&a, 2, 3);
InitTensor1DV2(&b, 2);
a.SetZeroAll();
b.SetZeroAll();
......@@ -121,9 +121,9 @@ void TransposeTest()
int nnn = GDevs.nGPU;
InitTensor3D(&x, B, N, H, X_FLOAT, 0);
InitTensor4D(&y, K, B, N, H/K, X_FLOAT, 0);
InitTensor3D(&z, B, N, H, X_FLOAT, 0);
InitTensor3DV2(&x, B, N, H, X_FLOAT, 0);
InitTensor4DV2(&y, K, B, N, H/K, X_FLOAT, 0);
InitTensor3DV2(&z, B, N, H, X_FLOAT, 0);
cudaEvent_t ctime0;
cudaEvent_t ctime1;
......@@ -191,9 +191,9 @@ void SumDimTest()
int b = 7;
int c = 3;
InitTensor3D(&x, a, b, c, X_FLOAT, -1);
InitTensor1D(&y, c, X_FLOAT, -1);
InitTensor3D(&z, a, b, c, X_FLOAT, -1);
InitTensor3DV2(&x, a, b, c, X_FLOAT, -1);
InitTensor1DV2(&y, c, X_FLOAT, -1);
InitTensor3DV2(&z, a, b, c, X_FLOAT, -1);
x.SetZeroAll();
y.SetZeroAll();
......
......@@ -281,7 +281,7 @@ void XShapeGrad::GradMergeList(XTensor * node, bool isEfficient)
smallsGrad.Add(tail->grad);
if(i > 1){
CheckNTErrors(XTensor::IsSameShaped(last, tail),
CheckNTErrors(_IsSameShaped(last, tail),
"Input tensors must be of the same size!");
}
......@@ -391,7 +391,7 @@ void XShapeGrad::GradSplit(XTensor * node, bool isEfficient)
/* if the tensor is used somewhere else, we need another SUM
for gradient accumulation */
else{
XTensor * inputGradTMP = NewTensorBuf(input, input->devID, input->mem);
XTensor * inputGradTMP = NewTensorBufV2(input, input->devID, input->mem);
_Merge(node->grad, inputGradTMP, whereToSplit + 1, 0);
_Sum(input->grad, inputGradTMP, input->grad);
......@@ -450,7 +450,7 @@ void XShapeGrad::GradSplitListPost(XTensor * node, bool isEfficient)
if(income.typeID == SHAPE_SPLIT_LIST){
int w = income.GetParamInt(0);
int splitID = income.GetParamInt(1);
if(whereToSplit < 0)
whereToSplit = w;
splitNum++;
......@@ -475,7 +475,7 @@ void XShapeGrad::GradSplitListPost(XTensor * node, bool isEfficient)
somewhere else, we need another SUM for gradient
accumulation */
else{
XTensor * nodeGradTMP = NewTensorBuf(node, node->devID, node->mem);
XTensor * nodeGradTMP = NewTensorBufV2(node, node->devID, node->mem);
_Merge(&splits, nodeGradTMP, whereToSplit + 1);
_Sum(node->grad, nodeGradTMP, node->grad);
......@@ -501,7 +501,7 @@ void XShapeGrad::GradTranspose(XTensor * node, bool isEfficient)
XTensor * output = node;
XTensor * input = income.tails[0];
XTensor * b = NewTensorBuf(input, input->devID, input->mem);
XTensor * b = NewTensorBufV2(input, input->devID, input->mem);
XNoder::MakeGrad(input);
int i = income.GetParamInt(0);
......@@ -543,7 +543,7 @@ void XShapeGrad::GradUnsqueeze(XTensor * node, bool isEfficient)
CheckNTErrors(dSize == output->GetDim(dim), "Wrong dim size for UNSQUEEZE!");
CheckNTErrors(output->unitNum = input->unitNum * dSize, "Wrong tensor size!");
XTensor * g = NewTensorBuf(input->grad, input->devID, input->mem);
XTensor * g = NewTensorBufV2(input->grad, input->devID, input->mem);
_ReduceSum(output->grad, g, dim);
_Sum(input->grad, g, input->grad);
......
......@@ -29,7 +29,7 @@ void XNoder::MakeGrad(XTensor * node)
if(node == NULL)
return;
if(!XTensor::IsSameShaped(node, node->grad)){
if(!_IsSameShaped(node, node->grad)){
delete node->grad;
node->grad = NewTensor(node);
node->grad->SetZeroAll();
......
......@@ -20,7 +20,7 @@
* $Created by: XIAO Tong (xiaotong@mail.neu.edu.cn) 2018-07-18
*/
#include "../tensor/XTensor.h"
#include "../tensor/core/CHeader.h"
#ifndef __XNODER_H__
#define __XNODER_H__
......
......@@ -242,13 +242,13 @@ void Check(FNNModel &model)
/* make a hard copy of the fnn model */
void Copy(FNNModel &tgt, FNNModel &src)
{
InitTensorV2(&tgt.embeddingW, &src.embeddingW);
InitTensor(&tgt.embeddingW, &src.embeddingW);
for(int i = 0; i < MAX_HIDDEN_NUM; i++){
InitTensorV2(&tgt.hiddenW[i], &src.hiddenW[i]);
InitTensorV2(&tgt.hiddenB[i], &src.hiddenB[i]);
InitTensor(&tgt.hiddenW[i], &src.hiddenW[i]);
InitTensor(&tgt.hiddenB[i], &src.hiddenB[i]);
}
InitTensorV2(&tgt.outputW, &src.outputW);
InitTensorV2(&tgt.outputB, &src.outputB);
InitTensor(&tgt.outputW, &src.outputW);
InitTensor(&tgt.outputB, &src.outputB);
tgt.n = src.n;
tgt.eSize = src.eSize;
......@@ -300,7 +300,7 @@ initialize a 1d tensor using the fnn model setting
*/
void InitModelTensor1D(XTensor &tensor, int num, FNNModel &model)
{
InitTensor1DV2(&tensor, num, X_FLOAT, model.devID);
InitTensor1D(&tensor, num, X_FLOAT, model.devID);
}
/*
......@@ -312,7 +312,7 @@ initialize a 2d tensor using the fnn model setting
*/
void InitModelTensor2D(XTensor &tensor, int rowNum, int colNum, FNNModel &model)
{
InitTensor2DV2(&tensor, rowNum, colNum, X_FLOAT, model.devID);
InitTensor2D(&tensor, rowNum, colNum, X_FLOAT, model.devID);
}
......@@ -594,14 +594,14 @@ get prediction probabilites of the gold words
float GetProb(XTensor &output, XTensor &gold, XTensor * wordProbs)
{
XTensor probs;
InitTensorV2(&probs, &output);
InitTensor(&probs, &output);
/* probs[i,j] = output[i,j] * gold[i,j] */
Multiply(output, gold, probs);
/* probability of each word */
XTensor wprobs;
InitTensor1DV2(&wprobs, output.GetDim(0), output.dataType, output.devID);
InitTensor1D(&wprobs, output.GetDim(0), output.dataType, output.devID);
ReduceSum(probs, wprobs, 1);
if(wordProbs != NULL)
CopyValues(wprobs, *wordProbs);
......@@ -615,7 +615,7 @@ float GetProb(XTensor &output, XTensor &gold, XTensor * wordProbs)
/* probability for the batch */
XTensor result;
InitTensor1DV2(&result, 1, X_FLOAT, output.devID);
InitTensor1D(&result, 1, X_FLOAT, output.devID);
ReduceSum(probs, result, 1);
return result.Get1D(0);
......@@ -716,7 +716,7 @@ The indexed cell is set to 1, and 0 otherwise.
void InitZeroOneTensor2D(XTensor &tensor, int rowNum, int colNum, int * rows, int * cols,
int itemNum, int devID)
{
InitTensor2DV2(&tensor, rowNum, colNum, X_FLOAT, devID);
InitTensor2D(&tensor, rowNum, colNum, X_FLOAT, devID);
tensor.SetZeroAll();
......@@ -808,7 +808,7 @@ void Forward(XTensor inputs[], XTensor &output, FNNModel &model, FNNNet &net)
/* make a 2d tensor for the bias term */
XTensor b2D;
InitTensorV2(&b2D, &s);
InitTensor(&b2D, &s);
Unsqueeze(b, b2D, 0, batchSize);
/* introduce bias term:
......@@ -840,7 +840,7 @@ void Forward(XTensor inputs[], XTensor &output, FNNModel &model, FNNNet &net)
MatrixMul(h_last, X_NOTRANS, w, X_NOTRANS, s);
XTensor b2D;
InitTensorV2(&b2D, &s);
InitTensor(&b2D, &s);
Unsqueeze(b, b2D, 0, batchSize);
Sum(s, b2D, s);
......@@ -905,8 +905,8 @@ void Backward(XTensor inputs[], XTensor &output, XTensor &gold, LOSS_FUNCTION_NA
XTensor dedsHidden;
XTensor dedxBottom;
if (depth > 0)
InitTensorV2(&dedsHidden, &dedx);
InitTensorV2(&dedxBottom, &net.embeddingCat);
InitTensor(&dedsHidden, &dedx);
InitTensor(&dedxBottom, &net.embeddingCat);
/* back-propagation from top to bottom in the stack of hidden layers
for each layer, h = f(s)
......@@ -944,7 +944,7 @@ void Backward(XTensor inputs[], XTensor &output, XTensor &gold, LOSS_FUNCTION_NA
/* back-propagation for the embedding layer */
for (int i = 0; i < n - 1; i++) {
XTensor * dedy = NewTensor2DV2(batchSize, model.eSize, X_FLOAT, model.devID);
XTensor * dedy = NewTensor2D(batchSize, model.eSize, X_FLOAT, model.devID);
eList.Add(dedy);
}
......@@ -996,7 +996,7 @@ void ForwardAutoDiff(NGram * ngrams, int batch, XTensor &output, FNNModel &model
}
}
InitTensor1DV2(&words, size, X_INT, model.devID);
InitTensor1D(&words, size, X_INT, model.devID);
words.SetData(index, size);
embeddingBig = Gather(model.embeddingW, words);
......@@ -1176,7 +1176,7 @@ void Test(const char * test, const char * result, FNNModel &model)
/* prediction probabilities */
XTensor probs;
InitTensor1DV2(&probs, ngramNum);
InitTensor1D(&probs, ngramNum);
/* get probabilities */
float prob = GetProb(output, gold, &probs);
......
......@@ -69,11 +69,11 @@ void T2TAttention::InitModel(int argc, char ** argv,
LoadParamFloat(argc, argv, "attminmax", &minmax, 0.1F);
LoadParamFloat(argc, argv, "dropoutatt", &dropoutP, 0);
InitTensor2DV2(&wk, d, dk, X_FLOAT, devID);
InitTensor2DV2(&wq, d, dk, X_FLOAT, devID);
InitTensor2DV2(&wv, d, dv, X_FLOAT, devID);
InitTensor2DV2(&wa, d, d, X_FLOAT, devID);
InitTensor2DV2(&wbig, d, 3 * d, X_FLOAT, devID);
InitTensor2D(&wk, d, dk, X_FLOAT, devID);
InitTensor2D(&wq, d, dk, X_FLOAT, devID);
InitTensor2D(&wv, d, dv, X_FLOAT, devID);
InitTensor2D(&wa, d, d, X_FLOAT, devID);
InitTensor2D(&wbig, d, 3 * d, X_FLOAT, devID);
float scale = 1.0F;
_SetDataFanInOut(&wk, scale);
......@@ -128,9 +128,9 @@ XTensor T2TAttention::MakeBig(XTensor &kqv, XTensor &mask, bool isTraining)
int d2 = kqv2.GetDim(1);
int d3 = kqv2.GetDim(2) / 3;
InitTensor3DV2(&k2, d1, d2, d3, X_FLOAT, devID);
InitTensor3DV2(&q2, d1, d2, d3, X_FLOAT, devID);
InitTensor3DV2(&v2, d1, d2, d3, X_FLOAT, devID);
InitTensor3D(&k2, d1, d2, d3, X_FLOAT, devID);
InitTensor3D(&q2, d1, d2, d3, X_FLOAT, devID);
InitTensor3D(&v2, d1, d2, d3, X_FLOAT, devID);
split.Add(&q2);
split.Add(&k2);
......
......@@ -365,11 +365,11 @@ int T2TBatchLoader::LoadBatchLM(FILE * file,
dims[1] = max;
dims[2] = vSize;
InitTensor2DV2(batchEnc, sc, max, X_INT, devID);
InitTensor2DV2(label, sc, max, X_INT, devID);
InitTensorV2(gold, 3, dims, X_FLOAT, devID);
InitTensor2DV2(paddingEnc, sc, max, X_FLOAT, devID);
InitTensor2DV2(paddingDec, sc, max, X_FLOAT, devID);
InitTensor2D(batchEnc, sc, max, X_INT, devID);
InitTensor2D(label, sc, max, X_INT, devID);
InitTensor(gold, 3, dims, X_FLOAT, devID);
InitTensor2D(paddingEnc, sc, max, X_FLOAT, devID);
InitTensor2D(paddingDec, sc, max, X_FLOAT, devID);
batchEnc->SetZeroAll();
label->SetZeroAll();
......@@ -433,12 +433,12 @@ int T2TBatchLoader::LoadBatchLM(FILE * file,
paddingEnc->SetDataBatched(paddingEncOffsets, 1.0F, wCount);
paddingDec->SetDataBatched(paddingDecOffsets, 1.0F, wCount);
/*XTensor * tmp = NewTensorBufV2(paddingEnc, devID);
/*XTensor * tmp = NewTensorBuf(paddingEnc, devID);
_ConvertDataType(batchEnc, tmp);
_NotEqual(tmp, paddingEnc, 0);
DelTensorBuf(tmp);
XTensor * tmp2 = NewTensorBufV2(paddingDec, devID);
XTensor * tmp2 = NewTensorBuf(paddingDec, devID);
_ConvertDataType(batchEnc, tmp2);
_NotEqual(tmp2, paddingDec, 0);
DelTensorBuf(tmp2);*/
......@@ -563,12 +563,12 @@ int T2TBatchLoader::LoadBatchMT(FILE * file,
int sCount = sc/2;
int seqSize = 0;
InitTensor2DV2(batchEnc, sCount, maxEnc, X_INT, devID);
InitTensor2DV2(paddingEnc, sCount, maxEnc, X_FLOAT, devID);
InitTensor2DV2(batchDec, sCount, maxDec, X_INT, devID);
InitTensor2DV2(paddingDec, sCount, maxDec, X_FLOAT, devID);
InitTensor2DV2(label, sCount, maxDec, X_INT, devID);
//InitTensorV2(gold, 3, dimsDec, X_FLOAT, devID);
InitTensor2D(batchEnc, sCount, maxEnc, X_INT, devID);
InitTensor2D(paddingEnc, sCount, maxEnc, X_FLOAT, devID);
InitTensor2D(batchDec, sCount, maxDec, X_INT, devID);
InitTensor2D(paddingDec, sCount, maxDec, X_FLOAT, devID);
InitTensor2D(label, sCount, maxDec, X_INT, devID);
//InitTensor(gold, 3, dimsDec, X_FLOAT, devID);
batchEnc->SetZeroAll();
paddingEnc->SetZeroAll();
......@@ -607,7 +607,7 @@ int T2TBatchLoader::LoadBatchMT(FILE * file,
ws = wCountEnc;
batchEnc->SetData(batchEncValues, batchEnc->unitNum);
paddingEnc->SetDataBatched(paddingEncOffsets, 1.0F, wCountEnc);
//XTensor * tmp = NewTensorBufV2(paddingEnc, devID);
//XTensor * tmp = NewTensorBuf(paddingEnc, devID);
//_ConvertDataType(batchEnc, tmp);
//tmp->Dump(stderr, "tmp:");
//_NotEqual(tmp, paddingEnc, 0);
......@@ -656,7 +656,7 @@ int T2TBatchLoader::LoadBatchMT(FILE * file,
label->SetData(labelValues, label->unitNum);
paddingDec->SetDataBatched(paddingDecOffsets, 1.0F, wCountPad);
//XTensor * tmp2 = NewTensorBufV2(paddingDec, devID);
//XTensor * tmp2 = NewTensorBuf(paddingDec, devID);
//_ConvertDataType(batchDec, tmp2);
//_NotEqual(tmp2, paddingDec, 0);
//DelTensorBuf(tmp2);
......
......@@ -61,7 +61,7 @@ void T2TEmbedder::InitModel(int argc, char ** argv, int myDevID, bool isEnc)
LoadParamInt(argc, argv, "d", &eSize, DEFAULT_EMBEDDING_SIZE);
LoadParamInt(argc, argv, "d", &d, DEFAULT_EMBEDDING_SIZE);
InitTensor2DV2(&w, vSize, eSize, X_FLOAT, devID);
InitTensor2D(&w, vSize, eSize, X_FLOAT, devID);
DTYPE v = 1.0F/(float)sqrt((float)eSize);
w.SetDataRandn(0, v);
......@@ -78,7 +78,7 @@ make positional embeddings (of size eSize * length)
*/
void T2TEmbedder::MakePosEmbedding(int eSize, int d, int length)
{
InitTensor2DV2(&posEmbeddingBase, length, eSize, X_FLOAT, devID);
InitTensor2D(&posEmbeddingBase, length, eSize, X_FLOAT, devID);
float * data = new float[posEmbeddingBase.unitNum];
......@@ -142,9 +142,9 @@ XTensor T2TEmbedder::Make(XTensor &input)
/* we make positional embeddings first */
//if(!match){
if(true){
InitTensorV2(&posEmbedding, input.order + 1, dims, X_FLOAT, devID);
InitTensor(&posEmbedding, input.order + 1, dims, X_FLOAT, devID);
XTensor * posTMP = NewTensorBufV2(2, dims + 1, X_FLOAT, devID);
XTensor * posTMP = NewTensorBuf(2, dims + 1, X_FLOAT, devID);
_CopyValues(&posEmbeddingBase, 0, posTMP->unitNum, posTMP, 0);
_Unsqueeze(posTMP, &posEmbedding, 0, dims[0]);
......
......@@ -60,11 +60,11 @@ void T2TFNN::InitModel(int argc, char ** argv, int myDevID)
LoadParamFloat(argc, argv, "fnnminmax", &minmax, 0.1F);
LoadParamFloat(argc, argv, "dropoutfnn", &dropoutP, 0);
InitTensor2DV2(&w1, inSize, hSize, X_FLOAT, devID);
InitTensor1DV2(&b1, hSize, X_FLOAT, devID);
InitTensor2D(&w1, inSize, hSize, X_FLOAT, devID);
InitTensor1D(&b1, hSize, X_FLOAT, devID);
InitTensor2DV2(&w2, hSize, outSize, X_FLOAT, devID);
InitTensor1DV2(&b2, outSize, X_FLOAT, devID);
InitTensor2D(&w2, hSize, outSize, X_FLOAT, devID);
InitTensor1D(&b2, outSize, X_FLOAT, devID);
float scale = 1.0F;
_SetDataFanInOut(&w1, scale);
......
......@@ -53,8 +53,8 @@ void T2TLN::InitModel(int argc, char ** argv, int myDevID)
d = 0;
LoadParamInt(argc, argv, "d", &d, DEFAULT_EMBEDDING_SIZE);
InitTensor1DV2(&w, d, X_FLOAT, devID);
InitTensor1DV2(&b, d, X_FLOAT, devID);
InitTensor1D(&w, d, X_FLOAT, devID);
InitTensor1D(&b, d, X_FLOAT, devID);
w.SetDataRand(1.0F, 1.0F);
b.SetZeroAll();
......
......@@ -132,7 +132,7 @@ void T2TModel::MakeLM(XTensor &input, XTensor &output, XTensor &padding, bool is
dims[0] = nhead;
dims[input.order + 1] = len;
XTensor mask;
InitTensorV2(&mask, input.order + 2, dims, X_FLOAT, padding.devID);
InitTensor(&mask, input.order + 2, dims, X_FLOAT, padding.devID);
/* a upper triangular matrix where the cells of the upper triangular are set to -1e-9.
this matrix can be used to prevent the attention to current or following words in
......@@ -146,14 +146,14 @@ void T2TModel::MakeLM(XTensor &input, XTensor &output, XTensor &padding, bool is
dimsPadding[padding.order - 1] = padding.GetDim(-1);
dimsPadding[padding.order] = padding.GetDim(-1);
XTensor * padding2 = NewTensorBufV2(padding.order + 1, dimsPadding, padding.dataType,
XTensor * padding2 = NewTensorBuf(padding.order + 1, dimsPadding, padding.dataType,
padding.devID);
for(int i = 0; i < padding2->order; i++)
dimsPadding[i + 1] = padding2->GetDim(i);
dimsPadding[0] = nhead;
//XTensor * padding3 = NewTensorBufV2(padding.order + 2, dimsPadding, padding.dataType,
//XTensor * padding3 = NewTensorBuf(padding.order + 2, dimsPadding, padding.dataType,
// padding.devID);
//
///* mask of the padding */
......@@ -224,7 +224,7 @@ void T2TModel::MakeMTMask(XTensor &inputEnc, XTensor &inputDec,
dims[i + 1] = inputDec.GetDim(i);
dims[0] = nhead;
dims[inputDec.order + 1] = len;
InitTensorV2(&maskDec, inputDec.order + 2, dims, X_FLOAT, paddingDec.devID);
InitTensor(&maskDec, inputDec.order + 2, dims, X_FLOAT, paddingDec.devID);
/* an upper triangular matrix where the cells of the upper triangular are set to -1e-9.
this matrix can be used to prevent the attention to current or following words in
......@@ -234,11 +234,11 @@ void T2TModel::MakeMTMask(XTensor &inputEnc, XTensor &inputDec,
/* encoder-decoder mask that prevents the attention to padding dummy words */
dims[inputDec.order + 1] = inputEnc.GetDim(inputEnc.order - 1);
InitTensorV2(&maskEncDec, inputDec.order + 2, dims, X_FLOAT, paddingEnc.devID);
InitTensor(&maskEncDec, inputDec.order + 2, dims, X_FLOAT, paddingEnc.devID);
XTensor * maskEncDecTMPEnc = NewTensorBufV2(paddingEnc.order + 1, dims + 1, paddingEnc.dataType,
XTensor * maskEncDecTMPEnc = NewTensorBuf(paddingEnc.order + 1, dims + 1, paddingEnc.dataType,
paddingEnc.devID);
XTensor * maskEncDecTMPDec = NewTensorBufV2(maskEncDecTMPEnc, paddingEnc.devID);
XTensor * maskEncDecTMPDec = NewTensorBuf(maskEncDecTMPEnc, paddingEnc.devID);
_Unsqueeze(&paddingEnc, maskEncDecTMPEnc, paddingEnc.order - 1, paddingDec.GetDim(-1));
_ScaleAndShiftMe(maskEncDecTMPEnc, 1e9F, -1e9F);
......@@ -254,14 +254,14 @@ void T2TModel::MakeMTMask(XTensor &inputEnc, XTensor &inputDec,
dimsPadding[paddingEnc.order - 1] = paddingEnc.GetDim(-1);
dimsPadding[paddingEnc.order] = paddingEnc.GetDim(-1);
XTensor * padding2 = NewTensorBufV2(paddingEnc.order + 1, dimsPadding, paddingEnc.dataType,
XTensor * padding2 = NewTensorBuf(paddingEnc.order + 1, dimsPadding, paddingEnc.dataType,
paddingEnc.devID);
for (int i = 0; i < padding2->order; i++)
dimsPadding[i + 1] = padding2->GetDim(i);
dimsPadding[0] = nhead;
XTensor * padding3 = NewTensorBufV2(paddingEnc.order + 2, dimsPadding, paddingEnc.dataType,
XTensor * padding3 = NewTensorBuf(paddingEnc.order + 2, dimsPadding, paddingEnc.dataType,
paddingEnc.devID);
/* mask of the padding */
......@@ -270,7 +270,7 @@ void T2TModel::MakeMTMask(XTensor &inputEnc, XTensor &inputDec,
_ScaleAndShiftMe(padding3, 1e9F, -1e9F);
InitTensorV2(&maskEnc, padding3);
InitTensor(&maskEnc, padding3);
maskEnc.SetZeroAll();
/* generate the mask on the source language side (for padding) */
......@@ -298,14 +298,14 @@ void T2TModel::MakeMTMaskEnc(XTensor &inputEnc, XTensor &paddingEnc, XTensor &ma
dimsPadding[paddingEnc.order - 1] = paddingEnc.GetDim(-1);
dimsPadding[paddingEnc.order] = paddingEnc.GetDim(-1);
XTensor * padding2 = NewTensorBufV2(paddingEnc.order + 1, dimsPadding, paddingEnc.dataType,
XTensor * padding2 = NewTensorBuf(paddingEnc.order + 1, dimsPadding, paddingEnc.dataType,
paddingEnc.devID);
for (int i = 0; i < padding2->order; i++)
dimsPadding[i + 1] = padding2->GetDim(i);
dimsPadding[0] = nhead;
XTensor * padding3 = NewTensorBufV2(paddingEnc.order + 2, dimsPadding, paddingEnc.dataType,
XTensor * padding3 = NewTensorBuf(paddingEnc.order + 2, dimsPadding, paddingEnc.dataType,
paddingEnc.devID);
/* mask of the padding */
......@@ -314,7 +314,7 @@ void T2TModel::MakeMTMaskEnc(XTensor &inputEnc, XTensor &paddingEnc, XTensor &ma
_ScaleAndShiftMe(padding3, 1e9F, -1e9F);
InitTensorV2(&maskEnc, padding3);
InitTensor(&maskEnc, padding3);
maskEnc.SetZeroAll();
/* generate the mask on the source language side (for padding) */
......@@ -344,7 +344,7 @@ void T2TModel::MakeMTMaskDec(XTensor &inputEnc, XTensor &inputDec,
dims[i + 1] = inputDec.GetDim(i);
dims[0] = nhead;
dims[inputDec.order + 1] = len;
InitTensorV2(&maskDec, inputDec.order + 2, dims, X_FLOAT, paddingDec.devID);
InitTensor(&maskDec, inputDec.order + 2, dims, X_FLOAT, paddingDec.devID);
/* An upper triangular matrix where the cells of the upper triangular are set to -1e-9.
This matrix can be used to block the attention to current or following words in
......@@ -359,11 +359,11 @@ void T2TModel::MakeMTMaskDec(XTensor &inputEnc, XTensor &inputDec,
/* encoder-decoder mask that prevents the attention to padding dummy words */
dims[inputDec.order + 1] = inputEnc.GetDim(inputEnc.order - 1);
InitTensorV2(&maskEncDec, inputDec.order + 2, dims, X_FLOAT, paddingEnc.devID);
InitTensor(&maskEncDec, inputDec.order + 2, dims, X_FLOAT, paddingEnc.devID);
XTensor * maskEncDecTMPEnc = NewTensorBufV2(paddingEnc.order + 1, dims + 1, paddingEnc.dataType,
XTensor * maskEncDecTMPEnc = NewTensorBuf(paddingEnc.order + 1, dims + 1, paddingEnc.dataType,
paddingEnc.devID);
XTensor * maskEncDecTMPDec = NewTensorBufV2(maskEncDecTMPEnc, paddingEnc.devID);
XTensor * maskEncDecTMPDec = NewTensorBuf(maskEncDecTMPEnc, paddingEnc.devID);
_Unsqueeze(&paddingEnc, maskEncDecTMPEnc, paddingEnc.order - 1, paddingDec.GetDim(-1));
......
......@@ -58,7 +58,7 @@ void T2TOutput::InitModel(int argc, char ** argv, int myDevID)
LoadParamInt(argc, argv, "d", &hSize, DEFAULT_EMBEDDING_SIZE);
LoadParamFloat(argc, argv, "outputminmax", &minmax, 0.08F);
InitTensor2DV2(&w, hSize, vSize, X_FLOAT, devID);
InitTensor2D(&w, hSize, vSize, X_FLOAT, devID);
float scale = 1.0F;
float finfout = (float)sqrt(6.0F * scale/(hSize + vSize));
......
......@@ -105,9 +105,9 @@ void T2TPredictor::Create(T2TModel * model, XTensor * top, const XTensor * input
dims[i] = input->GetDim(i);
dims[input->order - 1] = beamSize;
InitTensorV2(&state->probPath, input->order, dims, X_FLOAT, input->devID);
InitTensorV2(&state->nstep, input->order, dims, X_FLOAT, input->devID);
InitTensorV2(&state->endMark, input->order, dims, X_INT, input->devID);
InitTensor(&state->probPath, input->order, dims, X_FLOAT, input->devID);
InitTensor(&state->nstep, input->order, dims, X_FLOAT, input->devID);
InitTensor(&state->endMark, input->order, dims, X_INT, input->devID);
state->probPath.SetZeroAll();
state->nstep.SetZeroAll();
......@@ -170,7 +170,7 @@ void T2TPredictor::Predict(T2TStateBundle * next, XTensor * encoding,
dims[i] = inputEnc->GetDim(i);
dims[inputEnc->order - 1] = 1;
InitTensorV2(&first, inputEnc->order, dims, X_INT, inputEnc->devID);
InitTensor(&first, inputEnc->order, dims, X_INT, inputEnc->devID);
_SetDataFixedInt(&first, startSymbol);
/* add a new word into the input sequence of the decoder side */
......@@ -194,7 +194,7 @@ void T2TPredictor::Predict(T2TStateBundle * next, XTensor * encoding,
dims[inputDec.order - 1] = inputDec.GetDim(-1);
XTensor paddingDec;
InitTensorV2(&paddingDec, inputDec.order, dims, X_INT, paddingEnc->devID);
InitTensor(&paddingDec, inputDec.order, dims, X_INT, paddingEnc->devID);
SetDataFixedInt(paddingDec, 1);
XTensor maskDec;
......@@ -213,8 +213,8 @@ void T2TPredictor::Predict(T2TStateBundle * next, XTensor * encoding,
int stride = decoding.GetDim(decoding.order - 2);
InitTensor1DV2(&selectSrc, 1, X_INT);
InitTensor1DV2(&selectTgt, 1, X_INT);
InitTensor1D(&selectSrc, 1, X_INT);
InitTensor1D(&selectTgt, 1, X_INT);
selectSrc.SetInt(stride - 1, 0);
selectTgt.SetInt(0, 0);
......@@ -257,7 +257,7 @@ XTensor T2TPredictor::GeneratePaths(T2TStateBundle * state)
}
XTensor path;
InitTensor2DV2(&path, state->stateNum, distance, X_INT);
InitTensor2D(&path, state->stateNum, distance, X_INT);
path.SetZeroAll();
for(int i = 0; i < state->stateNum; i++){
......
......@@ -192,8 +192,8 @@ void T2TSearch::Score(T2TStateBundle * prev, T2TStateBundle * beam)
for(int i = 0; i < order; i++)
dims[i] = prob.GetDim(i);
InitTensorV2(&score, &prob);
InitTensorV2(&probPath, &prob);
InitTensor(&score, &prob);
InitTensor(&probPath, &prob);
prob.Reshape(prob.unitNum/outputSize, outputSize);
score.Reshape(score.unitNum/outputSize, outputSize);
......@@ -204,8 +204,8 @@ void T2TSearch::Score(T2TStateBundle * prev, T2TStateBundle * beam)
_SumDim(&prob, &probPathPrev, &probPath, 0);
InitTensorV2(&len, &lenPrev);
InitTensorV2(&lp, &lenPrev);
InitTensor(&len, &lenPrev);
InitTensor(&lp, &lenPrev);
_ScaleAndShift(&lenPrev, &len, 1.0F, 1.0F);
......@@ -225,7 +225,7 @@ void T2TSearch::Score(T2TStateBundle * prev, T2TStateBundle * beam)
_SumDim(&score, &firstMask, &score, 0);
}
InitTensorV2(&mask,
InitTensor(&mask,
prev->endMark.order, prev->endMark.dimSize, X_FLOAT,
prev->endMark.devID);
_SetDataFixedCond(&mask, &prev->endMark, -1e9F);
......@@ -279,11 +279,11 @@ void T2TSearch::Generate(T2TStateBundle * beam)
dimsTopK[order - 3] = dimsBeam[order - 3];
dimsTopK[order - 1] = beamSize;
InitTensorV2(&scoreTopK, order, dimsTopK, score.dataType,
InitTensor(&scoreTopK, order, dimsTopK, score.dataType,
score.devID);
InitTensorV2(&index, order, dimsTopK, X_INT,
InitTensor(&index, order, dimsTopK, X_INT,
score.devID);
InitTensorV2(&preID, order, dimsTopK, X_INT, -1);
InitTensor(&preID, order, dimsTopK, X_INT, -1);
score.Reshape(order, dimsBeam);
......@@ -307,25 +307,25 @@ void T2TSearch::Generate(T2TStateBundle * beam)
score.Reshape(order, dims);
/* we keep the top-k scores */
InitTensorV2(&score, &scoreTopK);
InitTensor(&score, &scoreTopK);
CopyValues(scoreTopK, score);
/* CPU data (TODO: remove GPU->CPU data copy!!!) */
XTensor indexGPU;
indexGPU = CopyValues(index);
//InitTensor(&indexCPU, index.order, index.dimSize, index.dataType, index.denseRatio, -1);
//InitTensorV2(&indexCPU, index.order, index.dimSize, index.dataType, index.denseRatio, -1);
//CopyValues(index, indexCPU);
for (int i = 0; i < indexGPU.unitNum; i++)
indexGPU.SetInt(i * stride + indexGPU.GetInt(i), i);
CheckNTErrors(XTensor::IsSameShaped(&prob, &probPath), "Wrong tensor shape!");
CheckNTErrors(IsSameShaped(prob, probPath), "Wrong tensor shape!");
/* sequence probability of top-k candidates */
XTensor probPathTopK;
InitTensorV2(&probPathTopK, &scoreTopK);
InitTensor(&probPathTopK, &scoreTopK);
XTensor probTopK;
InitTensorV2(&probTopK, &scoreTopK);
InitTensor(&probTopK, &scoreTopK);
for (int i = 0; i < probPath.order; i++) {
dims[i] = probPath.GetDim(i);
......@@ -381,7 +381,7 @@ void T2TSearch::Expand(T2TStateBundle * prev, T2TStateBundle * beam)
InitTensorOnCPU(&probPath, &probPathRef);
InitTensorOnCPU(&prediction, &predictionRef);
InitTensorOnCPU(&endMarkCPU, &predictionRef);
InitTensorV2(&endMark, &predictionRef);
InitTensor(&endMark, &predictionRef);
/* we copy the data to CPU because the frequent access to GPU is slow
and we can speed-up the process by doing the job on CPU. */
......@@ -502,7 +502,7 @@ void T2TSearch::Dump(XTensor * output)
int dims[3] = {batchSize, beamSize, maxLength};
int * words = new int[maxLength];
InitTensorV2(output, 3, dims, X_INT);
InitTensor(output, 3, dims, X_INT);
SetDataFixedInt(*output, -1);
/* heap for an input sentence in the batch */
......@@ -587,7 +587,7 @@ XTensor T2TSearch::MakeFirstMask(T2TStateBundle * beam)
for (int i = 0; i < order - 1; i++)
dims[i] = prob.GetDim(i);
InitTensorV2(&mask, order - 1, dims, X_FLOAT);
InitTensor(&mask, order - 1, dims, X_FLOAT);
mask.SetZeroAll();
for (int i = 0; i < mask.unitNum; i++) {
......
......@@ -367,7 +367,7 @@ void T2TTrainer::Test(const char * fn, const char * ofn, T2TModel * model)
/* prediction probabilities */
XTensor probs;
InitTensor1DV2(&probs, bSize * length);
InitTensor1D(&probs, bSize * length);
XTensor labelOnehot;
......@@ -452,13 +452,13 @@ get word probabilities for a batch of sequences
float T2TTrainer::GetProb(XTensor * output, XTensor * gold, XTensor * wordProbs)
{
XTensor probs;
InitTensor(&probs, output);
InitTensorV2(&probs, output);
_Multiply(output, gold, &probs);
/* probability of each word */
XTensor wprobs;
InitTensor1DV2(&wprobs, output->unitNum/output->GetDim(-1), X_FLOAT, output->devID);
InitTensor1D(&wprobs, output->unitNum/output->GetDim(-1), X_FLOAT, output->devID);
int dims[2] = {output->unitNum/output->GetDim(-1), output->GetDim(-1)};
probs.Reshape(2, dims);
......@@ -475,7 +475,7 @@ float T2TTrainer::GetProb(XTensor * output, XTensor * gold, XTensor * wordProbs)
/* probability for the batch */
XTensor result;
InitTensor1DV2(&result, 1, X_FLOAT, output->devID);
InitTensor1D(&result, 1, X_FLOAT, output->devID);
_ReduceSum(&probs, &result, 1);
return result.Get1D(0);
......@@ -522,7 +522,7 @@ void T2TTrainer::Update(T2TModel * model, const float lr)
_ScaleAndShiftMe(v, (1.0F - adamBeta2), 0);
/* v2 = m / (sqrt(v) + delta) */
XTensor * v2 = NewTensorBufV2(v, v->devID);
XTensor * v2 = NewTensorBuf(v, v->devID);
_Power(v, v2, 0.5F);
_ScaleAndShiftMe(v2, 1.0F, d);
_Div(m, v2, v2);
......@@ -593,7 +593,7 @@ void T2TTrainer::PadOutput(XTensor * output, XTensor * gold, XTensor * padding)
output->Reshape(output->unitNum/dimso[output->order - 1], dimso[output->order - 1]);
XTensor * padding2 = NewTensorBufV2(1, &padding->unitNum, X_FLOAT, padding->devID);
XTensor * padding2 = NewTensorBuf(1, &padding->unitNum, X_FLOAT, padding->devID);
_CopyValues(padding, padding2);
_MultiplyDim(output, padding2, output, 0);
......@@ -647,7 +647,7 @@ void T2TTrainer::LabelSmooth(XTensor * gold, XTensor * smoothed, DTYPE p)
DTYPE q = 1.0F - p;
DTYPE gift = p / n;
InitTensorV2(smoothed, gold);
InitTensor(smoothed, gold);
_CopyValues(gold, smoothed);
if(p == 0)
......
......@@ -30,8 +30,9 @@
#include "XDevice.h"
#include "./test/Test.h"
#include "./core/CHeader.h"
#include "./loss/CrossEntropy.h"
#include "./XBLAS.h"
#include "./core/sort/TopK.h"
#include "./core/movement/Gather.h"
//#define CRTDBG_MAP_ALLOC
//#include <stdlib.h>
//#include <crtdbg.h>
......@@ -40,9 +41,6 @@ using namespace nts;
void SmallTest();
void TransposeTest();
void LittleTest();
void T2TTest();
void T2TTest2();
void PowerTest();
int main( int argc, const char ** argv )
......@@ -167,127 +165,5 @@ void TransposeTest()
delete[] data;
}
void LittleTest()
{
int a = 5000;
int b = 100000;
int c = a*b;
printf("%d\n", c);
exit(1);
}
void T2TTest()
{
XTensor * input;
XTensor * weight;
XTensor * output;
XTensor * gold;
XTensor * dedy;
XTensor * dedx;
XTensor * dedxTmp;
XTensor * dedw;
XTensor * padding;
DTYPE loss;
int * dimSize = new int[2];
dimSize[0] = 256;
dimSize[1] = 10001;
int * dimSize2 = new int[3];
dimSize2[0] = 2;
dimSize2[1] = 31;
dimSize2[2] = 256;
int * dimSize3 = new int[3];
dimSize3[0] = 2;
dimSize3[1] = 31;
dimSize3[2] = 10001;
int * dimSize4 = new int[2];
dimSize4[0] = 2;
dimSize4[1] = 31;
input = NewTensor(3, dimSize2, X_FLOAT, 1.0F, 0);
weight = NewTensor(2, dimSize, X_FLOAT, 1.0F, 0);
dedw = NewTensor(2, dimSize, X_FLOAT, 1.0F, 0);
gold = NewTensor(3, dimSize3, X_FLOAT, 1.0F, 0);
output = NewTensor(3, dimSize3, X_FLOAT, 1.0F, 0);
dedy = NewTensor(3, dimSize3, X_FLOAT, 1.0F, 0);
dedx = NewTensor(3, dimSize3, X_FLOAT, 1.0F, 0);
dedxTmp = NewTensor(3, dimSize3, X_FLOAT, 1.0F, 0);
padding = NewTensor(2, dimSize4, X_FLOAT, 1.0F, 0);
//weight = NewTensor(2, dimSize);
//dedw = NewTensor(2, dimSize);
//input = NewTensor(3, dimSize2);
//gold = NewTensor(3, dimSize3);
//output = NewTensor(3, dimSize3);
//dedy = NewTensor(3, dimSize3);
//dedx = NewTensor(3, dimSize3);
//dedxTmp = NewTensor(3, dimSize3);
//padding = NewTensor(2, dimSize4);
myRead(input, "x.txt", "x");
myRead(weight, "w.txt", "w");
myRead(gold, "gold.txt", "gold");
myRead(padding, "padding.txt", "padding");
XTensor inter;
inter = MMul(*input, *weight);
_Softmax(&inter, output, 2);
//_LogMe(output);
loss = _CrossEntropyFast(output, gold, REDUCE_MEAN, NULL, padding);
printf("loss: %f\n", loss);
_CrossEntropyBackward(dedy, output, gold, NULL);
//_CrossEntropyBackward(dedy, output, gold, NULL, padding);
myDump(dedy, "dedy.txt", "dedy");
_SoftmaxBackward(NULL, output, input, dedy, dedx, NULL, -1, NOLOSS);
_Sub(output, gold, dedxTmp);
myDump(dedx, "dedx.txt", "dedx");
dedx->Dump(stderr, "dedx", 200);
dedxTmp->Dump(stderr, "dedxTmp", 200);
input->Reshape(input->unitNum/input->GetDim(-1), input->GetDim(-1));
dedx->Reshape(dedx->unitNum/dedx->GetDim(-1), dedx->GetDim(-1));
_MatrixMulBatched(input, X_TRANS, dedx, X_NOTRANS, dedw);
myDump(dedw, "dedw.txt", "dedw");
}
void T2TTest2()
{
int dimSize[3];
dimSize[0] = 161;
dimSize[1] = 47;
dimSize[2] = 10001;
XTensor * probs = NewTensor(3, dimSize, X_FLOAT, 1.0F, 0);
//XTensor * probs = NewTensor(3, dimSize, X_FLOAT, 1.0F, -1);
//myRead(probs, "probs.txt", " ");
_SetDataFixedFloat(probs, 1.0F);
probs->Reshape(1, probs->unitNum);
DTYPE sum = _ReduceSumAll(probs);
printf("%e\n", sum);
//XTensor tmp;
//tmp = IsNonZero(*probs);
//DTYPE nonZeroNum = ReduceSumAll(tmp);
//printf("%f\n", nonZeroNum);
//
//DTYPE gpu = ReduceSum(*probs, 1).Get2D(0, 0);
//printf("%e\n", gpu);
}
/* NiuTrans.Tensor - an open-source tensor library
* Copyright (C) 2017, Natural Language Processing Lab, Northestern University.
* All rights reserved.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
/*
* $Created by: LI Yinqiao (email: li.yin.qiao.2012@hotmail.com) 2019-10-21
*/
#ifndef __XCALL_H__
#define __XCALL_H__
#include "XTensor.h"
namespace nts { // namespace nts(NiuTrans.Tensor)
/*
* we define the "new and delete" functions below
*/
/* initialize a XTensor V2 */
void InitTensorV2(XTensor * tensor,
const int myOrder, const int * myDimSize, const TENSOR_DATA_TYPE myDataType = X_FLOAT,
const float myDenseRatio = 1.0F, const int myDevID = -1, XMem * myMem = NULL);
/* initialize a dense XTensor */
void InitTensor(XTensor * tensor,
const int myOrder, const int * myDimSize, const TENSOR_DATA_TYPE myDataType = X_FLOAT,
const int myDevID = -1, const bool isEnableGrad = true);
/* initialize a dense vector V2 */
void InitTensor1DV2(XTensor * tensor, const int num,
const TENSOR_DATA_TYPE myDataType = X_FLOAT, const int myDevID = -1, XMem * myMem = NULL);
/* initialize a dense vector */
void InitTensor1D(XTensor * tensor, const int num,
const TENSOR_DATA_TYPE myDataType = X_FLOAT, const int myDevID = -1, const bool isEnableGrad = true);
/* initialize a dense matrix V2 */
void InitTensor2DV2(XTensor * tensor, const int rowNum, const int colNum,
const TENSOR_DATA_TYPE myDataType = X_FLOAT, const int myDevID = -1, XMem * myMem = NULL);
/* initialize a dense matrix */
void InitTensor2D(XTensor * tensor, const int rowNum, const int colNum,
const TENSOR_DATA_TYPE myDataType = X_FLOAT, const int myDevID = -1, const bool isEnableGrad = true);
/* initialize a dense 3d tensor V2 */
void InitTensor3DV2(XTensor * tensor, const int d0, const int d1, const int d2,
const TENSOR_DATA_TYPE myDataType = X_FLOAT, const int myDevID = -1, XMem * myMem = NULL);
/* initialize a dense 3d tensor */
void InitTensor3D(XTensor * tensor, const int d0, const int d1, const int d2,
const TENSOR_DATA_TYPE myDataType = X_FLOAT, const int myDevID = -1, const bool isEnableGrad = true);
/* initialize a dense 4d tensor V2 */
void InitTensor4DV2(XTensor * tensor, const int d0, const int d1, const int d2, const int d3,
const TENSOR_DATA_TYPE myDataType = X_FLOAT, const int myDevID = -1, XMem * myMem = NULL);
/* initialize a dense 4d tensor */
void InitTensor4D(XTensor * tensor, const int d0, const int d1, const int d2, const int d3,
const TENSOR_DATA_TYPE myDataType = X_FLOAT, const int myDevID = -1, const bool isEnableGrad = true);
/* initialize a dense 5d tensor V2 */
void InitTensor5DV2(XTensor * tensor, const int d0, const int d1, const int d2, const int d3, const int d4,
const TENSOR_DATA_TYPE myDataType = X_FLOAT, const int myDevID = -1, XMem * myMem = NULL);
/* initialize a dense 5d tensor */
void InitTensor5D(XTensor * tensor, const int d0, const int d1, const int d2, const int d3, const int d4,
const TENSOR_DATA_TYPE myDataType = X_FLOAT, const int myDevID = -1, const bool isEnableGrad = true);
/* initialize a tensor with a reference tensor V2 */
void InitTensorV2(XTensor * tensor, const XTensor * reference);
/* initialize a tensor with a reference tensor */
void InitTensor(XTensor * tensor, const XTensor * reference);
/* initialize a tensor on the CPU with a reference tensor */
void InitTensorOnCPU(XTensor * tensor, const XTensor * reference);
/* generate a XTensor with no initialization */
XTensor * NewTensor();
/* generate a XTensor V2 */
XTensor * NewTensorV2(const int myOrder, const int * myDimSize, const TENSOR_DATA_TYPE myDataType = X_FLOAT,
const float myDenseRatio = 1.0F, const int myDevID = -1, XMem * myMem = NULL);
/* generate a dense XTensor */
XTensor * NewTensor(const int myOrder, const int * myDimSize, const TENSOR_DATA_TYPE myDataType = X_FLOAT,
const int myDevID = -1, const bool isEnableGrad = true);
/* generate a XTensor which allocates data on the buffer V2 */
XTensor * NewTensorBufV2(const int myOrder, const int * myDimSize,
const TENSOR_DATA_TYPE myDataType = X_FLOAT, const float myDenseRatio = 1.0F,
const int myDevID = -1, XMem * myMem = NULL);
/* generate a dense XTensor which allocates data on the buffer */
XTensor * NewTensorBuf(const int myOrder, const int * myDimSize,
const TENSOR_DATA_TYPE myDataType = X_FLOAT, const int myDevID = -1, const bool isEnableGrad = true);
/* generate a XTensor which allocates data on the buffer V2 */
XTensor * NewTensorBufV2(const XTensor * reference, int devID, XMem * myMem);
/* generate a XTensor which allocates data on the buffer */
XTensor * NewTensorBuf(const XTensor * reference, int devID, const bool isEnableGrad = true);
/* generate a dense vector V2 */
XTensor * NewTensor1DV2(const int num, const TENSOR_DATA_TYPE myDataType = X_FLOAT, const int myDevID = -1,
XMem * myMem = NULL);
/* generate a dense vector */
XTensor * NewTensor1D(const int num, const TENSOR_DATA_TYPE myDataType = X_FLOAT, const int myDevID = -1, const bool isEnableGrad = true);
/* generate a dense matrix V2 */
XTensor * NewTensor2DV2(const int rowNum, const int colNum,
const TENSOR_DATA_TYPE myDataType = X_FLOAT,
const int myDevID = -1, XMem * myMem = NULL);
/* generate a dense matrix */
XTensor * NewTensor2D(const int rowNum, const int colNum,
const TENSOR_DATA_TYPE myDataType = X_FLOAT,
const int myDevID = -1, const bool isEnableGrad = true);
/* generate a dense 3d tensor V2 */
XTensor * NewTensor3DV2(const int d0, const int d1, const int d2,
const TENSOR_DATA_TYPE myDataType = X_FLOAT,
const int myDevID = -1, XMem * myMem = NULL);
/* generate a dense 3d tensor */
XTensor * NewTensor3D(const int d0, const int d1, const int d2,
const TENSOR_DATA_TYPE myDataType = X_FLOAT,
const int myDevID = -1, const bool isEnableGrad = true);
/* generate a dense 4d tensor V2 */
XTensor * NewTensor4DV2(const int d0, const int d1, const int d2, const int d3,
const TENSOR_DATA_TYPE myDataType = X_FLOAT,
const int myDevID = -1, XMem * myMem = NULL);
/* generate a dense 4d tensor */
XTensor * NewTensor4D(const int d0, const int d1, const int d2, const int d3,
const TENSOR_DATA_TYPE myDataType = X_FLOAT,
const int myDevID = -1, const bool isEnableGrad = true);
/* generate a dense 5d tensor V2 */
XTensor * NewTensor5DV2(const int d0, const int d1, const int d2, const int d3, const int d4,
const TENSOR_DATA_TYPE myDataType = X_FLOAT,
const int myDevID = -1, XMem * myMem = NULL);
/* generate a dense 5d tensor */
XTensor * NewTensor5D(const int d0, const int d1, const int d2, const int d3, const int d4,
const TENSOR_DATA_TYPE myDataType = X_FLOAT,
const int myDevID = -1, const bool isEnableGrad = true);
/* generate a dense vector by range */
XTensor * NewTensorRange(int lower, int upper, int step, const TENSOR_DATA_TYPE myDataType = X_INT, const int myDevID = -1, const bool isEnableGrad = true);
/* generate a copy of XTensor (with a reference to a given tensor) */
XTensor * NewTensor(const XTensor * a, bool isFilledData = true);
/* free the data space of a given tensor */
void DelTensor(XTensor * tensor);
/* free the data space of a given tensor (on the buffer) */
void DelTensorBuf(XTensor * tensor);
} // namespace nts(NiuTrans.Tensor)
#endif // __XCALL_H__
\ No newline at end of file
/* NiuTrans.Tensor - an open-source tensor library
/* NiuTrans.Tensor - an open-source tensor library
* Copyright (C) 2017, Natural Language Processing Lab, Northestern University.
* All rights reserved.
*
......@@ -50,14 +50,6 @@ int CONST_MINUSONE = -1;
bool CONST_TRUE = true;
int verboseLevel = 0;
bool useBLAS = false;
#ifdef USE_CUDA
bool useCUDA = true;
#else
bool useCUDA = false;
#endif
FILE * tmpLog = NULL;
double myTime = 0;
......
......@@ -135,8 +135,6 @@ extern bool CONST_TRUE;
#define NIUTRANSNNDEBUG
extern int verboseLevel;
extern bool useBLAS;
extern bool useCUDA;
#define FFLUSH(FILEH) \
{ \
......
......@@ -1562,9 +1562,9 @@ void XMemManager::GetBufferSize(MTYPE freeMem, MTYPE * myBufSize)
if (freeMem >= MILLION * 512){
*myBufSize = MILLION * 128;
if (freeMem >= MILLION * 1024) {
*myBufSize = MILLION * 256;
*myBufSize = MILLION * 128;
if (freeMem >= MILLION * 2048)
*myBufSize = MILLION * 512;
*myBufSize = MILLION * 128;
}
}
}
......
......@@ -86,11 +86,14 @@
#include "shape/Stack.h"
#include "shape/Transpose.h"
#include "shape/Unsqueeze.h"
#include "shape/IsSameShaped.h"
#include "sort/Sort.h"
#include "sort/TopK.h"
#include "utilities/XMatrixSegment.h"
#include "utilities/FlushToMem.h"
#include "utilities/CheckData.h"
#include "utilities/SetAscendingOrder.h"
#endif // __CHEADER_H__
......@@ -22,6 +22,7 @@
#include "../../XTensor.h"
#include "../../XName.h"
#include "../../XUtility.h"
#include "../shape/IsSameShaped.h"
#include "Div.h"
#include "Div.cuh"
#include "DivDim.h"
......@@ -48,9 +49,6 @@ void _Div(const XTensor * a, const XTensor * b, XTensor * c, DTYPE alpha, int le
"Unmatched tensors!");
CheckDev(a->devID, b->devID);
int leadingDimRDI = a->order - leadingDim - 1;
#ifdef USE_CUDA
if (a->devID >= 0 || b->devID >= 0 || c->devID >= 0) {
_CudaDiv(a, b, c, alpha, leadingDim);
......@@ -63,17 +61,17 @@ void _Div(const XTensor * a, const XTensor * b, XTensor * c, DTYPE alpha, int le
int blockSizeB = 1;
int blockSizeC = 1;
int blockNum = 1;
int dimensionSizeA = a->dimSizeRDI[leadingDimRDI];
int dimensionSizeB = b->dimSizeRDI[leadingDimRDI];
int dimensionSizeC = c->dimSizeRDI[leadingDimRDI];
int dimensionSizeA = a->dimSize[leadingDim];
int dimensionSizeB = b->dimSize[leadingDim];
int dimensionSizeC = c->dimSize[leadingDim];
for (int i = 0; i < a->order; i++) {
if (i != leadingDimRDI) {
CheckNTErrors((a->dimSizeRDI[i] == b->dimSizeRDI[i] && a->dimSizeRDI[i] == c->dimSizeRDI[i]),
if (i != leadingDim) {
CheckNTErrors((a->dimSize[i] == b->dimSize[i] && a->dimSize[i] == c->dimSize[i]),
"Unmatched tensors!");
}
if (i < leadingDimRDI)
stride *= a->dimSizeRDI[i];
if (i > leadingDim)
stride *= a->dimSize[i];
}
blockSizeA = stride * dimensionSizeA;
......@@ -168,7 +166,7 @@ int GetDivDimIndex(const XTensor &a, const XTensor &b)
{
if(a.order < b.order)
return -1;
if(XTensor::IsSameShaped(&a, &b))
if(IsSameShaped(a, b))
return -1;
int hitCount = 0;
......@@ -253,8 +251,8 @@ where i is the index of the item
*/
void Div(const XTensor &a, const XTensor &b, XTensor &c, DTYPE alpha, int leadingDim)
{
if (!c.isInit || !XTensor::IsSameShaped(&a, &c)) {
InitTensor(&c, &a);
if (!c.isInit || !IsSameShaped(a, c)) {
InitTensorV2(&c, &a);
}
int n = GetDivDimIndex(a, b);
......
......@@ -122,7 +122,6 @@ where i is the item index
*/
void _CudaDiv(const XTensor * a, const XTensor * b, XTensor * c, DTYPE alpha, int leadingDim)
{
int leadingDimRDI = a->order - leadingDim - 1;
CheckNTErrors((a->unitNum <= c->unitNum && b->unitNum <= c->unitNum),
"Unmatched tensors in multiplication!");
CheckNTErrors((a->order == b->order && a->order == c->order), "Unmatched tensors!");
......@@ -130,18 +129,18 @@ void _CudaDiv(const XTensor * a, const XTensor * b, XTensor * c, DTYPE alpha, in
int stride = 1;
int blockSizeA = 1;
int blockNum = 1;
int dimensionSizeA = a->dimSizeRDI[leadingDimRDI];
int dimensionSizeB = b->dimSizeRDI[leadingDimRDI];
int dimensionSizeC = c->dimSizeRDI[leadingDimRDI];
int dimensionSizeA = a->dimSize[leadingDim];
int dimensionSizeB = b->dimSize[leadingDim];
int dimensionSizeC = c->dimSize[leadingDim];
for (int i = 0; i < a->order; i++) {
if (i != leadingDimRDI) {
CheckNTErrors((a->dimSizeRDI[i] == b->dimSizeRDI[i] &&
a->dimSizeRDI[i] == c->dimSizeRDI[i]),
if (i != leadingDim) {
CheckNTErrors((a->dimSize[i] == b->dimSize[i] &&
a->dimSize[i] == c->dimSize[i]),
"Unmatched tensors!");
}
if (i < leadingDimRDI)
stride *= a->dimSizeRDI[i];
if (i > leadingDim)
stride *= a->dimSize[i];
}
blockSizeA = stride * dimensionSizeA;
......
......@@ -26,6 +26,7 @@
#include "../../XName.h"
#include "../../XUtility.h"
#include "../movement/CopyValues.h"
#include "../shape/IsSameShaped.h"
namespace nts { // namespace nts(NiuTrans.Tensor)
......@@ -56,7 +57,7 @@ void _DivDim(const XTensor * a, const XTensor * b, XTensor * c, int n, DTYPE alp
CheckDev(a->devID, b->devID);
if(XTensor::IsSameShaped(a, b)){
if(_IsSameShaped(a, b)){
_Div(a, b, c, alpha);
return;
}
......@@ -188,8 +189,8 @@ i.e., a is divided with b by broadcasting
*/
void DivDim(const XTensor &a, const XTensor &b, XTensor &c, int n, DTYPE alpha)
{
if (!c.isInit || !XTensor::IsSameShaped(&a, &c)) {
InitTensor(&c, &a);
if (!c.isInit || !IsSameShaped(a, c)) {
InitTensorV2(&c, &a);
}
/* call _Div function */
......
......@@ -24,6 +24,7 @@
#include "../../XTensor.h"
#include "../../XName.h"
#include "../../XUtility.h"
#include "../shape/IsSameShaped.h"
#include "Mask.h"
#include "Mask.cuh"
......@@ -171,8 +172,8 @@ where i is the index of the element
*/
void Mask(const XTensor &a, const XTensor &mask, XTensor &c, DTYPE alpha)
{
if (!c.isInit || !XTensor::IsSameShaped(&a, &c)) {
InitTensor(&c, &a);
if (!c.isInit || !IsSameShaped(a, c)) {
InitTensorV2(&c, &a);
}
/* call _Mask function */
......
......@@ -22,6 +22,7 @@
#include "../../XTensor.h"
#include "../../XDevice.h"
#include "../../XName.h"
#include "../shape/IsSameShaped.h"
#include "MatrixMulBatched.h"
#include "XTensorBLAS.h"
#include "MatrixMul2D.h"
......@@ -94,27 +95,27 @@ void _MatrixMulBatchedGPU(const XTensor * a, MATRIX_TRANS_TYPE transposedA,
"Input tensor and output tensor must have same order!");
CheckNTErrors(a->devID >= 0 && b->devID >= 0 && c->devID >= 0, "The tensors must be on GPUs");
int an = transposedA == X_TRANS ? a->dimSizeRDI[0] : a->dimSizeRDI[1];
int am = transposedA == X_TRANS ? a->dimSizeRDI[1] : a->dimSizeRDI[0];
int bn = transposedB == X_TRANS ? b->dimSizeRDI[0] : b->dimSizeRDI[1];
int bm = transposedB == X_TRANS ? b->dimSizeRDI[1] : b->dimSizeRDI[0];
int cn = c->dimSizeRDI[1];
int cm = c->dimSizeRDI[0];
int an = transposedA == X_TRANS ? a->dimSize[a->order - 1] : a->dimSize[a->order - 2];
int am = transposedA == X_TRANS ? a->dimSize[a->order - 2] : a->dimSize[a->order - 1];
int bn = transposedB == X_TRANS ? b->dimSize[b->order - 1] : b->dimSize[b->order - 2];
int bm = transposedB == X_TRANS ? b->dimSize[b->order - 2] : b->dimSize[b->order - 1];
int cn = c->dimSize[c->order - 2];
int cm = c->dimSize[c->order - 1];
CheckNTErrors((am == bn && an == cn && bm == cm), "Unmatched tensors in multiplication!");
int aBlockSize = a->dimSizeRDI[0] * a->dimSizeRDI[1];
int bBlockSize = b->dimSizeRDI[0] * b->dimSizeRDI[1];
int cBlockSize = c->dimSizeRDI[0] * c->dimSizeRDI[1];
int aBlockSize = a->dimSize[a->order - 1] * a->dimSize[a->order - 2];
int bBlockSize = b->dimSize[b->order - 1] * b->dimSize[b->order - 2];
int cBlockSize = c->dimSize[c->order - 1] * c->dimSize[c->order - 2];
int aRealBlockSize = aBlockSize * a->unitSize;
int bRealBlockSize = bBlockSize * b->unitSize;
int cRealBlockSize = cBlockSize * c->unitSize;
int blockNum = 1;
for (int i = 2; i < a->order; i++) {
CheckNTErrors((a->dimSizeRDI[i] == c->dimSizeRDI[i]), "Incorrect tensor sizes!");
CheckNTErrors((b->dimSizeRDI[i] == c->dimSizeRDI[i]), "Incorrect tensor sizes!");
blockNum *= a->dimSizeRDI[i];
for (int i = 0; i < a->order - 2; i++) {
CheckNTErrors((a->dimSize[i] == c->dimSize[i]), "Incorrect tensor sizes!");
CheckNTErrors((b->dimSize[i] == c->dimSize[i]), "Incorrect tensor sizes!");
blockNum *= a->dimSize[i];
}
int devIDBackup = 0;
......@@ -125,9 +126,9 @@ void _MatrixMulBatchedGPU(const XTensor * a, MATRIX_TRANS_TYPE transposedA,
a->data, transposedA, a->dataType, aBlockSize,
b->data, transposedB, b->dataType, bBlockSize,
c->data, c->dataType, cBlockSize, blockNum,
a->dimSizeRDI[1], a->dimSizeRDI[0],
b->dimSizeRDI[1], b->dimSizeRDI[0],
c->dimSizeRDI[1], c->dimSizeRDI[0], alpha, beta);
a->dimSize[a->order - 2], a->dimSize[a->order - 1],
b->dimSize[b->order - 2], b->dimSize[b->order - 1],
c->dimSize[c->order - 2], c->dimSize[c->order - 1], alpha, beta);
BacktoCudaDev(a->devID, devIDBackup);
#endif
......@@ -163,36 +164,36 @@ void _MatrixMulBatchedCPU(const XTensor * a, MATRIX_TRANS_TYPE transposedA,
"Input tensor and output tensor must have same order!");
int an = transposedA == X_TRANS ? a->dimSizeRDI[0] : a->dimSizeRDI[1];
int am = transposedA == X_TRANS ? a->dimSizeRDI[1] : a->dimSizeRDI[0];
int bn = transposedB == X_TRANS ? b->dimSizeRDI[0] : b->dimSizeRDI[1];
int bm = transposedB == X_TRANS ? b->dimSizeRDI[1] : b->dimSizeRDI[0];
int cn = c->dimSizeRDI[1];
int cm = c->dimSizeRDI[0];
int an = transposedA == X_TRANS ? a->dimSize[a->order - 1] : a->dimSize[a->order - 2];
int am = transposedA == X_TRANS ? a->dimSize[a->order - 2] : a->dimSize[a->order - 1];
int bn = transposedB == X_TRANS ? b->dimSize[b->order - 1] : b->dimSize[b->order - 2];
int bm = transposedB == X_TRANS ? b->dimSize[b->order - 2] : b->dimSize[b->order - 1];
int cn = c->dimSize[c->order - 2];
int cm = c->dimSize[c->order - 1];
CheckNTErrors(am == bn && an == cn && bm == cm, "Unmatched tensors in multiplication!");
int aBlockSize = a->dimSizeRDI[0] * a->dimSizeRDI[1];
int bBlockSize = b->dimSizeRDI[0] * b->dimSizeRDI[1];
int cBlockSize = c->dimSizeRDI[0] * c->dimSizeRDI[1];
int aBlockSize = a->dimSize[a->order - 1] * a->dimSize[a->order - 2];
int bBlockSize = b->dimSize[b->order - 1] * b->dimSize[b->order - 2];
int cBlockSize = c->dimSize[c->order - 1] * c->dimSize[c->order - 2];
int aRealBlockSize = aBlockSize * a->unitSize;
int bRealBlockSize = bBlockSize * b->unitSize;
int cRealBlockSize = cBlockSize * c->unitSize;
int blockNum = 1;
for (int i = 2; i < a->order; i++) {
CheckNTErrors((a->dimSizeRDI[i] == c->dimSizeRDI[i]), "Incorrect tensor sizes!");
CheckNTErrors((b->dimSizeRDI[i] == c->dimSizeRDI[i]), "Incorrect tensor sizes!");
blockNum *= a->dimSizeRDI[i];
for (int i = 0; i < a->order - 2; i++) {
CheckNTErrors((a->dimSize[i] == c->dimSize[i]), "Incorrect tensor sizes!");
CheckNTErrors((b->dimSize[i] == c->dimSize[i]), "Incorrect tensor sizes!");
blockNum *= a->dimSize[i];
}
int aDimSize[2] = {-a->dimSizeRDI[1], a->dimSizeRDI[0]};
int bDimSize[2] = {-b->dimSizeRDI[1], b->dimSizeRDI[0]};
int cDimSize[2] = {-c->dimSizeRDI[1], c->dimSizeRDI[0]};
int aDimSize[2] = {-a->dimSize[a->order - 2], a->dimSize[a->order - 1]};
int bDimSize[2] = {-b->dimSize[b->order - 2], b->dimSize[b->order - 1]};
int cDimSize[2] = {-c->dimSize[c->order - 2], c->dimSize[c->order - 1]};
XTensor * ai = NewTensor2D(aDimSize[0], aDimSize[1], a->dataType, a->devID, a->mem);
XTensor * bi = NewTensor2D(bDimSize[0], bDimSize[1], b->dataType, b->devID, b->mem);
XTensor * ci = NewTensor2D(cDimSize[0], cDimSize[1], c->dataType, c->devID, c->mem);
XTensor * ai = NewTensor2DV2(aDimSize[0], aDimSize[1], a->dataType, a->devID, a->mem);
XTensor * bi = NewTensor2DV2(bDimSize[0], bDimSize[1], b->dataType, b->devID, b->mem);
XTensor * ci = NewTensor2DV2(cDimSize[0], cDimSize[1], c->dataType, c->devID, c->mem);
for (int i = 0; i < blockNum; i++) {
ai->data = (char*)a->data + i * aRealBlockSize;
......@@ -242,9 +243,9 @@ void _MatrixMulBatchedCPU(const TensorList * a, MATRIX_TRANS_TYPE transposedA,
XTensor * ai = (XTensor*)a->GetItem(i);
XTensor * bi = (XTensor*)b->GetItem(i);
XTensor * ci = (XTensor*)c->GetItem(i);
if (!XTensor::IsSameShaped(aim, ai) ||
!XTensor::IsSameShaped(bim, bi) ||
!XTensor::IsSameShaped(cim, ci))
if (!_IsSameShaped(aim, ai) ||
!_IsSameShaped(bim, bi) ||
!_IsSameShaped(cim, ci))
{
isUniform = false;
break;
......@@ -291,10 +292,10 @@ XTensor MatrixMulBatched(const XTensor &a, MATRIX_TRANS_TYPE transposedA, const
CheckNTErrors(a.order >= 2 && b.order >= 2, "Input tensors must have a order >= 2!");
CheckNTErrors(a.order == b.order, "Input tensor and output tensor must have same order!");
int an = transposedA == X_TRANS ? a.dimSizeRDI[0] : a.dimSizeRDI[1];
int am = transposedA == X_TRANS ? a.dimSizeRDI[1] : a.dimSizeRDI[0];
int bn = transposedB == X_TRANS ? b.dimSizeRDI[0] : b.dimSizeRDI[1];
int bm = transposedB == X_TRANS ? b.dimSizeRDI[1] : b.dimSizeRDI[0];
int an = transposedA == X_TRANS ? a.dimSize[a.order - 1] : a.dimSize[a.order - 2];
int am = transposedA == X_TRANS ? a.dimSize[a.order - 2] : a.dimSize[a.order - 1];
int bn = transposedB == X_TRANS ? b.dimSize[b.order - 1] : b.dimSize[b.order - 2];
int bm = transposedB == X_TRANS ? b.dimSize[b.order - 2] : b.dimSize[b.order - 1];
CheckNTErrors(am == bn, "Unmatched tensors in multiplication!");
......@@ -349,10 +350,10 @@ XTensor MatrixMulBatched(const XTensor &a, const XTensor &b,
CheckNTErrors(a.order >= 2 && b.order >= 2, "Input tensors must have a order >= 2!");
CheckNTErrors(a.order == b.order, "Input tensor and output tensor must have same order!");
int an = a.dimSizeRDI[1];
int am = a.dimSizeRDI[0];
int bn = b.dimSizeRDI[1];
int bm = b.dimSizeRDI[0];
int an = a.dimSize[a.order - 2];
int am = a.dimSize[a.order - 1];
int bn = b.dimSize[b.order - 2];
int bm = b.dimSize[b.order - 1];
CheckNTErrors(am == bn, "Unmatched tensors in multiplication!");
......
......@@ -37,7 +37,7 @@ int GetSumIndex(const XTensor &a, const XTensor &b)
{
if (a.order < b.order)
return -1;
if (XTensor::IsSameShaped(&a, &b))
if (IsSameShaped(a, b))
return -1;
int hitCount = 0;
......@@ -71,26 +71,27 @@ XTensor MulAndShift(const XTensor &x, const XTensor &w, const XTensor &b,
CheckNTErrors(x.dataType == w.dataType, "Input tensors should have the same data type!");
CheckNTErrors(x.order >= 2 && w.order >= 2, "Input tensors must have a order >= 2!");
int xn = x.dimSizeRDI[1];
int xm = x.dimSizeRDI[0];
int wn = w.dimSizeRDI[1];
int wm = w.dimSizeRDI[0];
int xn = x.dimSize[x.order - 2];
int xm = x.dimSize[x.order - 1];
int wn = w.dimSize[w.order - 2];
int wm = w.dimSize[w.order - 1];
CheckNTErrors(xm == wn, "Unmatched tensors in multiplication!");
int order = x.order + w.order - 2;
int sub = 0;
int * dimSize = new int[order];
for (int i = 2; i < x.order; i++)
dimSize[sub++] = x.dimSizeRDI[x.order + 1 - i];
for (int i = 2; i < w.order; i++)
dimSize[sub++] = w.dimSizeRDI[w.order + 1 - i];
for (int i = 0; i < x.order - 2; i++)
dimSize[sub++] = x.dimSize[i];
for (int i = 0; i < w.order - 2; i++)
dimSize[sub++] = w.dimSize[i];
dimSize[sub++] = xn;
dimSize[sub++] = wm;
float dr = (!x.isSparse || !w.isSparse) ? 1.0F : MAX(x.denseRatio, w.denseRatio);
XTensor * tmp = NewTensorBuf(order, dimSize, x.dataType, dr, x.devID, x.mem);
XTensor * tmp = NewTensorBufV2(order, dimSize, x.dataType, dr, x.devID, x.mem);
/* call _MatrixMul function */
_MatrixMul(&x, X_NOTRANS, &w, X_NOTRANS, tmp, alpha, 0, parallelRunner);
......@@ -148,24 +149,24 @@ XTensor MulAndShift(const XTensor& x, MATRIX_TRANS_TYPE transposedA,
CheckNTErrors(x.dataType == w.dataType, "Input tensors should have the same data type!");
CheckNTErrors(x.order >= 2 && w.order >= 2, "Input tensors must have a order >= 2!");
int xn = transposedA == X_TRANS ? x.dimSizeRDI[0] : x.dimSizeRDI[1];
int xm = transposedA == X_TRANS ? x.dimSizeRDI[1] : x.dimSizeRDI[0];
int wn = transposedB == X_TRANS ? w.dimSizeRDI[0] : w.dimSizeRDI[1];
int wm = transposedB == X_TRANS ? w.dimSizeRDI[1] : w.dimSizeRDI[0];
int xn = transposedA == X_TRANS ? x.dimSize[x.order - 1] : x.dimSize[x.order - 2];
int xm = transposedA == X_TRANS ? x.dimSize[x.order - 2] : x.dimSize[x.order - 1];
int wn = transposedB == X_TRANS ? w.dimSize[w.order - 1] : w.dimSize[w.order - 2];
int wm = transposedB == X_TRANS ? w.dimSize[w.order - 2] : w.dimSize[w.order - 1];
int order = x.order + w.order - 2;
int sub = 0;
int * dimSize = new int[order];
for (int i = 2; i < x.order; i++)
dimSize[sub++] = x.dimSizeRDI[x.order + 1 - i];
for (int i = 2; i < w.order; i++)
dimSize[sub++] = w.dimSizeRDI[w.order + 1 - i];
for (int i = 0; i < x.order - 2; i++)
dimSize[sub++] = x.dimSize[i];
for (int i = 0; i < w.order - 2; i++)
dimSize[sub++] = w.dimSize[i];
dimSize[sub++] = xn;
dimSize[sub++] = wm;
float dr = (!x.isSparse || !w.isSparse) ? 1.0F : MAX(x.denseRatio, w.denseRatio);
XTensor * tmp = NewTensorBuf(order, dimSize, x.dataType, dr, x.devID, x.mem);
XTensor * tmp = NewTensorBufV2(order, dimSize, x.dataType, dr, x.devID, x.mem);
/* call _MatrixMul function */
_MatrixMul(&x, transposedA, &w, transposedB, tmp, alpha, 0, parallelRunner);
......
......@@ -22,6 +22,7 @@
#include "../../XTensor.h"
#include "../../XName.h"
#include "../../XUtility.h"
#include "../shape/IsSameShaped.h"
#include "Multiply.h"
#include "Multiply.cuh"
#include "MultiplyDim.h"
......@@ -48,9 +49,6 @@ void _Multiply(const XTensor * a, const XTensor * b, XTensor * c, DTYPE alpha, i
"Unmatched tensors!");
CheckDev(a->devID, b->devID);
int leadingDimRDI = a->order - leadingDim - 1;
#ifdef USE_CUDA
if (a->devID >= 0 || b->devID >= 0 || c->devID >= 0) {
_CudaMultiply(a, b, c, alpha, leadingDim);
......@@ -63,18 +61,18 @@ void _Multiply(const XTensor * a, const XTensor * b, XTensor * c, DTYPE alpha, i
int blockSizeB = 1;
int blockSizeC = 1;
int blockNum = 1;
int dimensionSizeA = a->dimSizeRDI[leadingDimRDI];
int dimensionSizeB = b->dimSizeRDI[leadingDimRDI];
int dimensionSizeC = c->dimSizeRDI[leadingDimRDI];
int dimensionSizeA = a->dimSize[leadingDim];
int dimensionSizeB = b->dimSize[leadingDim];
int dimensionSizeC = c->dimSize[leadingDim];
for (int i = 0; i < a->order; i++) {
if (i != leadingDimRDI) {
CheckNTErrors((a->dimSizeRDI[i] == b->dimSizeRDI[i] &&
a->dimSizeRDI[i] == c->dimSizeRDI[i]),
if (i != leadingDim) {
CheckNTErrors((a->dimSize[i] == b->dimSize[i] &&
a->dimSize[i] == c->dimSize[i]),
"Unmatched tensors!");
}
if (i < leadingDimRDI)
stride *= a->dimSizeRDI[i];
if (i > leadingDim)
stride *= a->dimSize[i];
}
blockSizeA = stride * dimensionSizeA;
......@@ -169,7 +167,7 @@ int GetMultiplyDimIndex(const XTensor &a, const XTensor &b)
{
if(a.order < b.order)
return -1;
if(XTensor::IsSameShaped(&a, &b))
if(IsSameShaped(a, b))
return -1;
int hitCount = 0;
......@@ -254,8 +252,8 @@ where i is the index of the item
*/
void Multiply(const XTensor &a, const XTensor &b, XTensor &c, DTYPE alpha, int leadingDim)
{
if (!c.isInit || !XTensor::IsSameShaped(&a, &c)) {
InitTensor(&c, &a);
if (!c.isInit || !IsSameShaped(a, c)) {
InitTensorV2(&c, &a);
}
int n = GetMultiplyDimIndex(a, b);
......
......@@ -122,26 +122,25 @@ where i is the item index
*/
void _CudaMultiply(const XTensor * a, const XTensor * b, XTensor * c, DTYPE alpha, int leadingDim)
{
int leadingDimRDI = a->order - leadingDim - 1;
CheckNTErrors(a->unitNum <= c->unitNum && b->unitNum <= c->unitNum,
CheckNTErrors((a->unitNum <= c->unitNum && b->unitNum <= c->unitNum),
"Unmatched tensors in multiplication!");
CheckNTErrors(a->order == b->order && a->order == c->order, "Unmatched tensors!");
CheckNTErrors((a->order == b->order && a->order == c->order), "Unmatched tensors!");
int stride = 1;
int blockSizeA = 1;
int blockNum = 1;
int dimensionSizeA = a->dimSizeRDI[leadingDimRDI];
int dimensionSizeB = b->dimSizeRDI[leadingDimRDI];
int dimensionSizeC = c->dimSizeRDI[leadingDimRDI];
int dimensionSizeA = a->dimSize[leadingDim];
int dimensionSizeB = b->dimSize[leadingDim];
int dimensionSizeC = c->dimSize[leadingDim];
for (int i = 0; i < a->order; i++) {
if (i != leadingDimRDI) {
CheckNTErrors((a->dimSizeRDI[i] == b->dimSizeRDI[i] &&
a->dimSizeRDI[i] == c->dimSizeRDI[i]),
if (i != leadingDim) {
CheckNTErrors((a->dimSize[i] == b->dimSize[i] &&
a->dimSize[i] == c->dimSize[i]),
"Unmatched tensors!");
}
if (i < leadingDimRDI)
stride *= a->dimSizeRDI[i];
if (i > leadingDim)
stride *= a->dimSize[i];
}
blockSizeA = stride * dimensionSizeA;
......
......@@ -24,6 +24,7 @@
#include "MultiplyDim.h"
#include "MultiplyDim.cuh"
#include "../shape/Unsqueeze.h"
#include "../shape/IsSameShaped.h"
#include "../../XName.h"
#include "../../XUtility.h"
#include "../movement/CopyValues.h"
......@@ -57,7 +58,7 @@ void _MultiplyDim(const XTensor * a, const XTensor * b, XTensor * c, int n, DTYP
CheckDev(a->devID, b->devID);
if(XTensor::IsSameShaped(a, b)){
if(_IsSameShaped(a, b)){
_Multiply(a, b, c, alpha);
return;
}
......@@ -203,8 +204,8 @@ i.e., a is multiplied with b by broadcasting
*/
void MultiplyDim(const XTensor &a, const XTensor &b, XTensor &c, int n)
{
if (!c.isInit || !XTensor::IsSameShaped(&a, &c)) {
InitTensor(&c, &a);
if (!c.isInit || !IsSameShaped(a, c)) {
InitTensorV2(&c, &a);
}
/* call _Multiply function */
......@@ -280,8 +281,8 @@ void _MultiplyBroadcast(const XTensor * a, const XTensor * b, XTensor * c, DTYPE
dimsS[0] = -dimsS[0];
dimsT[0] = -dimsT[0];
XTensor * s = NewTensor(order - (j - i), dimsS, a->dataType, a->denseRatio, a->devID, a->mem);
XTensor * t = NewTensor(order - (j - i) + 1, dimsT, b->dataType, b->denseRatio, b->devID, b->mem);
XTensor * s = NewTensorV2(order - (j - i), dimsS, a->dataType, a->denseRatio, a->devID, a->mem);
XTensor * t = NewTensorV2(order - (j - i) + 1, dimsT, b->dataType, b->denseRatio, b->devID, b->mem);
if(count == 0)
source = b->data;
......@@ -371,8 +372,8 @@ where some of dimensions of b can be of size 1
*/
void MultiplyBroadcast(const XTensor &a, const XTensor &b, XTensor &c)
{
if (!c.isInit || !XTensor::IsSameShaped(&a, &c)) {
InitTensor(&c, &a);
if (!c.isInit || !IsSameShaped(a, c)) {
InitTensorV2(&c, &a);
}
/* call _SumBroadcast function */
......
......@@ -22,6 +22,7 @@
#include "../../XTensor.h"
#include "../../XName.h"
#include "../../XUtility.h"
#include "../shape/IsSameShaped.h"
#include "Sub.h"
#include "Sub.cuh"
#include "SubDim.h"
......@@ -149,7 +150,7 @@ int GetSubDimIndex(const XTensor &a, const XTensor &b)
{
if(a.order < b.order)
return -1;
if(XTensor::IsSameShaped(&a, &b))
if(IsSameShaped(a, b))
return -1;
int hitCount = 0;
......@@ -223,8 +224,8 @@ tensor subtraction c = a - b * \beta
*/
void Sub(const XTensor &a, const XTensor &b, XTensor &c, DTYPE beta)
{
if (!c.isInit || !XTensor::IsSameShaped(&a, &c)) {
InitTensor(&c, &a);
if (!c.isInit || !IsSameShaped(a, c)) {
InitTensorV2(&c, &a);
}
int n = GetSubDimIndex(a, b);
......
......@@ -26,6 +26,7 @@
#include "../../XName.h"
#include "../../XUtility.h"
#include "../movement/CopyValues.h"
#include "../shape/IsSameShaped.h"
namespace nts { // namespace nts(NiuTrans.Tensor)
......@@ -61,7 +62,7 @@ void _SubDim(const XTensor * a, const XTensor * b, XTensor * c, int n, DTYPE bet
return;
}
if (XTensor::IsSameShaped(a, b)) {
if (_IsSameShaped(a, b)) {
_Sub(a, b, c, beta);
return;
}
......@@ -188,8 +189,8 @@ i.e., a is subtracted with b by broadcasting
*/
void SubDim(const XTensor &a, const XTensor &b, XTensor &c, int n, DTYPE beta)
{
if (!c.isInit || !XTensor::IsSameShaped(&a, &c)) {
InitTensor(&c, &a);
if (!c.isInit || !IsSameShaped(a, c)) {
InitTensorV2(&c, &a);
}
/* call _Sub function */
......
......@@ -24,6 +24,7 @@
#include "../../XUtility.h"
#include "../../XBLAS.h"
#include "../movement/CopyValues.h"
#include "../shape/IsSameShaped.h"
#include "Sum.h"
#include "Sum.cuh"
#include "SumDim.h"
......@@ -77,7 +78,7 @@ void _Sum(const XTensor * a, const XTensor * b, XTensor * c, DTYPE beta)
else {
if (!a->isSparse && !b->isSparse) {
CheckNTErrors(!c->isSparse, "Illegal use of sparse tensor in addition!");
if (a->dataType == DEFAULT_DTYPE &&
b->dataType == DEFAULT_DTYPE &&
c->dataType == DEFAULT_DTYPE)
......@@ -183,7 +184,7 @@ int GetSumDimIndex(const XTensor &a, const XTensor &b)
{
if(a.order < b.order)
return -1;
if(XTensor::IsSameShaped(&a, &b))
if(IsSameShaped(a, b))
return -1;
int hitCount = 0;
......@@ -256,8 +257,8 @@ tensor summation c = a + b * \beta
*/
void Sum(const XTensor &a, const XTensor &b, XTensor &c, DTYPE beta)
{
if (!c.isInit || !XTensor::IsSameShaped(&a, &c)) {
InitTensor(&c, &a);
if (!c.isInit || !IsSameShaped(a, c)) {
InitTensorV2(&c, &a);
}
int n = GetSumDimIndex(a, b);
......
......@@ -26,6 +26,7 @@
#include "SumDim.h"
#include "SumDim.cuh"
#include "../shape/Unsqueeze.h"
#include "../shape/IsSameShaped.h"
#include "../../XName.h"
#include "../../XUtility.h"
#include "../movement/CopyValues.h"
......@@ -64,25 +65,11 @@ void _SumDim(const XTensor * a, const XTensor * b, XTensor * c, int n, DTYPE bet
return;
}
if(XTensor::IsSameShaped(a, b)){
if(_IsSameShaped(a, b)){
_Sum(a, b, c, beta);
return;
}
/*int dims[MAX_TENSOR_DIM_NUM];
for(int i = 0; i < a->order; i++)
dims[i] = 1;
dims[n] = a->GetDim(n);
XTensor * b2 = NewTensor(a->order, dims, b->dataType, b->denseRatio, b->devID, b->mem);
_CopyValues(b, b2);
_SumBroadcast(a, b2, c, beta);
DelTensor(b2);
return;*/
if(a->devID >= 0 || b->devID >= 0 || c->devID >= 0){
#ifdef USE_CUDA
_CudaSumDim(a, b, c, n, beta);
......@@ -205,8 +192,8 @@ i.e., a is summed with b by broadcasting
*/
void SumDim(const XTensor &a, const XTensor &b, XTensor &c, int n, DTYPE beta)
{
if (!c.isInit || !XTensor::IsSameShaped(&a, &c)) {
InitTensor(&c, &a);
if (!c.isInit || !IsSameShaped(a, c)) {
InitTensorV2(&c, &a);
}
/* call _SumDim function */
......@@ -281,8 +268,8 @@ void _SumBroadcast(const XTensor * a, const XTensor * b, XTensor * c, DTYPE beta
dimsS[0] = -dimsS[0];
dimsT[0] = -dimsT[0];
XTensor * s = NewTensor(order - (j - i), dimsS, a->dataType, a->denseRatio, a->devID, a->mem);
XTensor * t = NewTensor(order - (j - i) + 1, dimsT, b->dataType, b->denseRatio, b->devID, b->mem);
XTensor * s = NewTensorV2(order - (j - i), dimsS, a->dataType, a->denseRatio, a->devID, a->mem);
XTensor * t = NewTensorV2(order - (j - i) + 1, dimsT, b->dataType, b->denseRatio, b->devID, b->mem);
if(count == 0)
source = b->data;
......@@ -374,8 +361,8 @@ c = a + b * \beta
*/
void SumBroadcast(const XTensor &a, const XTensor &b, XTensor &c, DTYPE beta)
{
if (!c.isInit || !XTensor::IsSameShaped(&a, &c)) {
InitTensor(&c, &a);
if (!c.isInit || !IsSameShaped(a, c)) {
InitTensorV2(&c, &a);
}
/* call _SumBroadcast function */
......
......@@ -22,6 +22,7 @@
#include "../../XUtility.h"
#include "../../XDevice.h"
#include "../../XTensor.h"
#include "../shape/IsSameShaped.h"
#include "XTensorBLAS.h"
namespace nts { // namespace nts(NiuTrans.Tensor)
......@@ -224,9 +225,9 @@ void _CudaBLASMatrixMULList(cublasHandle_t * handle,
XTensor * ai = (XTensor*)a->GetItem(i);
XTensor * bi = (XTensor*)b->GetItem(i);
XTensor * ci = (XTensor*)c->GetItem(i);
if (!XTensor::IsSameShaped(aim, ai) ||
!XTensor::IsSameShaped(bim, bi) ||
!XTensor::IsSameShaped(cim, ci))
if (!_IsSameShaped(aim, ai) ||
!_IsSameShaped(bim, bi) ||
!_IsSameShaped(cim, ci))
{
isUniform = false;
break;
......
......@@ -131,7 +131,7 @@ void ConvertDataType(const XTensor & input, XTensor & output, TENSOR_DATA_TYPE d
{
if (!output.isInit || input.dataType != output.dataType) {
float dr = (!input.isSparse) ? 1.0F : input.denseRatio;
InitTensor(&output, input.order, input.dimSize, dataType, dr, input.devID, input.mem);
InitTensorV2(&output, input.order, input.dimSize, dataType, dr, input.devID, input.mem);
}
_ConvertDataType(&input, &output);
......
......@@ -85,7 +85,7 @@ XTensor OnehotToIndex(const XTensor & onehot, int size)
CheckNTErrors(onehot.dataType == X_INT, "The onehot tensor must be in X_INT!")
XTensor index;
InitTensor(&index, onehot.order - 1, onehot.dimSize, X_INT, 1.0F, onehot.devID, onehot.mem);
InitTensorV2(&index, onehot.order - 1, onehot.dimSize, X_INT, 1.0F, onehot.devID, onehot.mem);
index.SetTMPFlag();
_OnehotToIndex(&onehot, &index, size);
......@@ -139,6 +139,47 @@ void _IndexToOnehot(const XTensor * index, XTensor * onehot,
}
/*
convert index tensor to onehot tensor
>> index - index tensor, which value is an integer num
>> onehot - onehot tensor, which value is 0 or 1
>> size - the last dimension size of the onehot tensor
*/
void _IndexToOnehot(int * index, int n, XTensor * onehot, int size, float labelSmoothingP)
{
/*CheckNTErrors(onehot->GetDim(-1) == size, "Illegal tensor dimension!");
CheckNTErrors(onehot->dataType == X_INT, "The onehot tensor must be in X_INT!")
onehot->SetZeroAll();
#ifdef USE_CUDA
if (onehot->devID >= 0) {
delete[] cudaIndex;
return;
}
#endif
int blockNum = n;
int stride = size;
int * indexData = (int *)index;
int * onehotData = (int *)onehot->data;
for (int i = 0; i < blockNum; i++) {
int id = indexData[i];
int * od = onehotData + i * stride;
od[id] = 1;
}*/
XTensor* cudaIndex = NewTensor1DV2(n, X_INT, onehot->devID);
cudaIndex->SetData(index, n);
_IndexToOnehot(cudaIndex, onehot, size, labelSmoothingP);
delete[] cudaIndex;
}
/*
convert onehot tensor to index tensor (return an XTensor structure)
make a new tensor to keep the result and return it
......@@ -159,7 +200,7 @@ XTensor IndexToOnehot(const XTensor & index, int size, float labelSmoothingP)
int * dim = new int[order + 1];
memcpy(dim, index.dimSize, order * sizeof(int));
dim[order] = size;
InitTensor(&onehot, index.order + 1, dim, X_FLOAT, 1.0F, index.devID, index.mem);
InitTensorV2(&onehot, index.order + 1, dim, X_FLOAT, 1.0F, index.devID, index.mem);
_IndexToOnehot(&index, &onehot, size, labelSmoothingP);
......
......@@ -36,6 +36,9 @@ XTensor OnehotToIndex(const XTensor & onehot, int num);
/* convert index tensor to onehot tensor */
void _IndexToOnehot(const XTensor * index, XTensor * onehot, int size, float labelSmoothingP);
/* convert index tensor to onehot tensor */
void _IndexToOnehot(int * index, int n, XTensor * onehot, int size, float labelSmoothingP);
/* convert index tensor to onehot tensor (return an XTensor structure)
make a new tensor to keep the result and return it */
XTensor IndexToOnehot(const XTensor & index, int num, float labelSmoothingP);
......
......@@ -25,6 +25,114 @@
namespace nts{ // namespace nts(NiuTrans.Tensor)
/*
generate a tensor with selected data in index along the given dimension
c = select(a)
>> a - input tensor
>> c - result tensor
>> index - the selected index
>> dim - the dimension along with which we do the job
*/
void _Select(const XTensor * a, XTensor * c, int* index, int dim)
{
CheckNTErrors(a != NULL && c != NULL, "empty tensors!");
CheckNTErrors(a->order == c->order, "The input and output tensors must in the same order!");
CheckNTErrors(dim >= 0 && dim < a->order, "The input dimension is out of bounds!");
CheckNTErrors(a->dataType == c->dataType, "The tensor must be of the same data type!");
int stride = 1;
for (int i = dim + 1; i < a->order; i++)
stride *= a->dimSize[i];
int copyTimes = 1;
for (int i = 0; i < dim; i++)
{
copyTimes *= a->dimSize[i];
}
int cot = c->dimSize[dim];
int blockSize = stride * a->unitSize;
int stepSizeS = stride * a->dimSize[dim] * a->unitSize;
int stepSizeT = stride * c->dimSize[dim] * a->unitSize;
char * s = (char*)a->data;
char * t = (char*)c->data;
for (int i = 0; i < copyTimes; i++) {
for (int j = 0; j < cot; ++j) {
XMemCopy(t + j * blockSize, c->devID, s + index[j] * blockSize, a->devID, blockSize);
}
s += stepSizeS;
t += stepSizeT;
}
}
/*
generate a tensor with selected data in index along the given dimension
c = select(a)
>> a - input tensor
>> c - result tensor
>> index - the selected index
>> dim - the dimension along with which we do the job
*/
void _Select(const XTensor * a, XTensor * c, XTensor* index, int dim)
{
if (index->devID >= 0)
{
int* indexCPU = new int[index->unitNum];
XMemCopy(indexCPU, -1, index->data,index->devID, index->unitNum * sizeof(int));
_Select(a, c, indexCPU, dim);
delete[] indexCPU;
}
else
{
_Select(a, c, (int *)index->data, dim);
}
}
/*
c = select(a)
>> a - input tensor
>> index - the selected index
>> dim - the dimension along with which we do the job
<< return - the result of the generated tensor with selected data
*/
XTensor Select(const XTensor &a, XTensor &index, int dim)
{
int order = a.order;
int * dimSize = new int[order];
CheckNTErrors(dim >= 0 && dim < a.order, "The input dimension is out of bounds!");
for (int i = 0; i < a.order; i++) {
if (i == dim) {
dimSize[i] = index.dimSize[0];
}
else
dimSize[i] = a.dimSize[i];
}
float dr = (!a.isSparse) ? 1.0F : a.denseRatio;
XTensor c(order, dimSize, a.dataType, dr, a.devID, a.mem);
c.SetTMPFlag();
/* call _SelectRange function */
_Select(&a, &c, &index, dim);
/* tensor connection */
if (a.enableGrad) {
XLink::MakeLink(&a, &index, &c, GETANDSET_SELECT);
XLink::AddParamToHeadInt(&c, dim);
}
/* destroy variables */
delete[] dimSize;
return c;
}
/*
generate a tensor with selected data in range[low,high] along the given dimension
......@@ -58,13 +166,12 @@ void _SelectRange(const XTensor * a, XTensor * c, int dim, int low, int high)
}
int stride = 1;
int dimRDI = a->order - dim - 1;
for(int i = 0; i < dimRDI; i++)
stride *= a->dimSizeRDI[i];
for(int i = dim + 1; i < a->order; i++)
stride *= a->dimSize[i];
int copyTimes = 1;
for (int i = dimRDI + 1; i < a->order; i++)
copyTimes *= a->dimSizeRDI[i];
for (int i = 0; i < dim; i++)
copyTimes *= a->dimSize[i];
int blockSize = stride * (high - low) * a->unitSize;
int stepSizeS = stride * a->dimSize[dim] * a->unitSize;
......
......@@ -27,13 +27,16 @@
namespace nts{ // namespace nts(NiuTrans.Tensor)
/* generate a tensor with selected data c = select(a) */
void _Select(const XTensor * a, XTensor * c, XTensor * indexCPU);
void _Select(const XTensor * a, XTensor * c, int* index, int dim);
/* generate a tensor with selected data c = select(a) */
void _Select(const XTensor * a, XTensor * c, XTensor* index, int dim);
/*
generate a tensor with selected data c = select(a) (returna a XTensor structure)
make a new tensor to keep the result and return it
*/
XTensor Select(const XTensor &a, XTensor &indexCPU);
XTensor Select(const XTensor &a, XTensor &index, int dim);
/*
generate a tensor with selected data in range[low,high] along the given dimension
......
......@@ -470,7 +470,7 @@ void _SetDataLowTri(XTensor * tensor, DTYPE p, int shift)
void _SetDataRand(XTensor * tensor, int rNum, int cNum)
{
if (tensor == NULL || tensor->isInit == false || tensor->order !=2 ) {
InitTensor2D(tensor, rNum, cNum);
InitTensor2DV2(tensor, rNum, cNum);
}
_SetDataRand(tensor, 0.0F, 1.0F);
......@@ -519,7 +519,7 @@ void _SetDataRand(XTensor * tensor, DTYPE lower, DTYPE upper)
#ifdef USE_CUDA
_CudaSetDataRand(tensor, lower, upper);
#endif
//XTensor * t2 = NewTensor(tensor->order, tensor->dimSize, tensor->dataType, tensor->denseRatio, -1);
//XTensor * t2 = NewTensorV2(tensor->order, tensor->dimSize, tensor->dataType, tensor->denseRatio, -1);
//_SetDataRand(t2, low, high);
//_CopyValues(t2, tensor);
//delete t2;
......
......@@ -21,6 +21,7 @@
#include <math.h>
#include "../../XName.h"
#include "../shape/IsSameShaped.h"
#include "Binary.h"
#include "Binary.cuh"
......@@ -77,7 +78,7 @@ void _funcName(const XTensor * a, XTensor * b, T num)
_cudaFuncName(a, b, num); \
return; \
} \
CheckNTErrors((XTensor::IsSameShaped(a, b)), \
CheckNTErrors((_IsSameShaped(a, b)), \
"Input tensors should have the same data type!"); \
if (a->dataType == X_INT) { \
int * d = (int*)a->data; \
......@@ -112,7 +113,7 @@ void _funcName(const XTensor * a, XTensor * b, T num)
if (a->devID >= 0) { \
ShowNTErrors("No GPU devices support!") \
} \
CheckNTErrors((XTensor::IsSameShaped(a, b)), \
CheckNTErrors((_IsSameShaped(a, b)), \
"Input tensors should have the same data type!"); \
if (a->dataType == X_INT) { \
int * d = (int*)a->data; \
......@@ -169,8 +170,8 @@ XTensor funcName(const XTensor &a, T num)
_funcName(&a, &b, num); \
if(a.enableGrad){ \
XLink::MakeLink(&a, NULL, &b, operationId); \
XLink::AddParamToHead(&b, num); \
} \
XLink::AddParamToHead(&b, num); \
return b; \
} \
template XTensor funcName<int>(const XTensor&, int); \
......@@ -181,8 +182,8 @@ template XTensor funcName<double>(const XTensor&, double);
template<class T> \
void funcName(const XTensor &a, XTensor &b, T num) \
{ \
if (!b.isInit || !XTensor::IsSameShaped(&a, &b)) { \
InitTensor(&b, &a); \
if (!b.isInit || !IsSameShaped(a, b)) { \
InitTensorV2(&b, &a); \
} \
_funcName(&a, &b, num); \
if (a.enableGrad) { \
......
......@@ -23,6 +23,7 @@
#include "../../XDevice.h"
#include "../../XUtility.h"
#include "../../XName.h"
#include "../shape/IsSameShaped.h"
#include "Binary.h"
#include "Binary.cuh"
......@@ -89,7 +90,7 @@ void Kernel##funcName(T1 * a, T1 * b, int size, T2 num)
template<class T> \
void _Cuda##funcName(const XTensor * a, XTensor * b, T num) \
{ \
CheckNTErrors((XTensor::IsSameShaped(a, b)), \
CheckNTErrors((_IsSameShaped(a, b)), \
"Input tensors should have the same type!"); \
CheckNTErrors((a->isSparse == false), "TODO!"); \
\
......
......@@ -21,6 +21,7 @@
#include "../../XTensor.h"
#include "../../XName.h"
#include "../shape/IsSameShaped.h"
#include "Clip.h"
#include "Clip.cuh"
......@@ -43,7 +44,7 @@ void _Clip(const XTensor * a, XTensor * b, DTYPE lower, DTYPE upper)
}
#endif
CheckNTErrors((XTensor::IsSameShaped(a, b)), "Input tensors should have the same type!");
CheckNTErrors((_IsSameShaped(a, b)), "Input tensors should have the same type!");
CheckNTErrors((a->dataType == DEFAULT_DTYPE), "TODO!");
DTYPE * d = (DTYPE*)a->data;
......@@ -110,8 +111,8 @@ XTensor Clip(const XTensor & a, DTYPE lower, DTYPE upper)
void Clip(const XTensor & a, XTensor & b, DTYPE lower, DTYPE upper)
{
if (!b.isInit || !XTensor::IsSameShaped(&a, &b)) {
InitTensor(&b, &a);
if (!b.isInit || !IsSameShaped(a, b)) {
InitTensorV2(&b, &a);
}
/* call _Clip function */
......
......@@ -21,6 +21,7 @@
#include "../../XDevice.h"
#include "../../XTensor.h"
#include "../shape/IsSameShaped.h"
#include "Clip.h"
#include "Clip.cuh"
......@@ -36,7 +37,7 @@ set each entry to its clip value (CUDA Kernel)
>> size - size of the data array
*/
__global__
void KernelClip(DTYPE * a, DTYPE * b, DTYPE lower, DTYPE upper, int size)
void KernelClip(DTYPE * a, DTYPE * b, DTYPE lower, DTYPE upper, int size)
{
int i = blockDim.x * blockIdx.x + threadIdx.x;
......@@ -74,7 +75,7 @@ set each entry to its clip value
*/
void _CudaClip(const XTensor * a, XTensor * b, DTYPE lower, DTYPE upper)
{
CheckNTErrors((XTensor::IsSameShaped(a, b)), "Input tensors should have the same type!");
CheckNTErrors((_IsSameShaped(a, b)), "Input tensors should have the same type!");
CheckNTErrors((a->isSparse == false), "TODO!");
int gridSize[3];
......
......@@ -22,6 +22,7 @@
#include "../../XTensor.h"
#include "../../XDevice.h"
#include "../../XName.h"
#include "../shape/IsSameShaped.h"
#include "Compare.h"
#include "Compare.cuh"
......@@ -42,7 +43,7 @@ DTYPE myIsNotEqual(DTYPE a, DTYPE b)
#define _SIMPLE_COMPARE_FUNCTION(_funcName, _cudaFuncName, origFunc) \
void _funcName(const XTensor * a, XTensor * b, DTYPE number) \
{ \
CheckNTErrors((XTensor::IsSameShaped(a, b)), \
CheckNTErrors((_IsSameShaped(a, b)), \
"Input tensors should have the same type!"); \
CheckNTErrors((a->dataType == DEFAULT_DTYPE), "TODO!"); \
/* run it on GPUs */ \
......@@ -59,7 +60,7 @@ void _funcName(const XTensor * a, XTensor * b, DTYPE number)
#define _SIMPLE_COMPARE_FUNCTION(_funcName, origFunc) \
void _funcName(const XTensor * a, XTensor * b, DTYPE number) \
{ \
CheckNTErrors((XTensor::IsSameShaped(a, b)), \
CheckNTErrors((_IsSameShaped(a, b)), \
"Input tensors should have the same type!"); \
CheckNTErrors((a->dataType == DEFAULT_DTYPE), "TODO!"); \
/* run it on GPUs */ \
......@@ -97,8 +98,8 @@ XTensor funcName(const XTensor &a, DTYPE number)
#define SIMPLE_COMPARE_FUNCTION_VOID(funcName, _funcName, operationId) \
void funcName(const XTensor &a, XTensor &b, DTYPE number) \
{ \
if (!b.isInit || !XTensor::IsSameShaped(&a, &b)) { \
InitTensor(&b, &a); \
if (!b.isInit || !IsSameShaped(a, b)) { \
InitTensorV2(&b, &a); \
} \
_funcName(&a, &b, number); \
}
......@@ -130,7 +131,7 @@ SIMPLE_COMPARE_FUNCTION_VOID(NotEqual, _NotEqual, MATH_NOTEQUAL)
#define _SIMPLE_MAX_MIN_FUNCTION(_funcName, _cudaFuncName, origFunc) \
void _funcName(const XTensor * a, const XTensor * b, XTensor * c) \
{ \
CheckNTErrors((XTensor::IsSameShaped(a, b, c)), \
CheckNTErrors((_IsSameShaped(a, b, c)), \
"Input and output tensors should have the same type!"); \
CheckNTErrors((a->dataType == DEFAULT_DTYPE), "TODO!"); \
CheckDev(a->devID, b->devID); \
......@@ -150,7 +151,7 @@ void _funcName(const XTensor * a, const XTensor * b, XTensor * c)
#define _SIMPLE_MAX_MIN_FUNCTION(_funcName, origFunc) \
void _funcName(const XTensor * a, const XTensor * b, XTensor *c) \
{ \
CheckNTErrors((XTensor::IsSameShaped(a, b, c)), \
CheckNTErrors((_IsSameShaped(a, b, c)), \
"Input and output tensors should have the same type!"); \
CheckNTErrors((a->dataType == DEFAULT_DTYPE), "TODO!"); \
CheckDev(a, b); \
......@@ -191,7 +192,7 @@ XTensor funcName(const XTensor & a, const XTensor & b)
#define SIMPLE_MAX_MIN_FUNCTION_VOID(funcName, _funcName, operationId) \
void funcName(const XTensor &a, const XTensor &b, XTensor c) \
{ \
if (!c.isInit || !XTensor::IsSameShaped(&a, &c)) { \
if (!c.isInit || !_IsSameShaped(&a, &c)) { \
InitTensor(&c, &a); \
} \
_funcName(&a, &b, &c); \
......
......@@ -22,6 +22,7 @@
#include <math.h>
#include "../../XTensor.h"
#include "../../XName.h"
#include "../shape/IsSameShaped.h"
#include "Normalize.h"
#include "Normalize.cuh"
......@@ -46,26 +47,25 @@ void _Normalize(const XTensor * input, XTensor * output, int dim,
const XTensor * mean, const XTensor * var,
const XTensor * a, const XTensor * b, DTYPE epsilon)
{
int dimRDI = input->order - dim - 1;
CheckNTErrors((XTensor::IsSameShaped(input, output)), "Unmatched input tensors!");
CheckNTErrors((XTensor::IsSameShaped(a, b)), "Unmatched input tensors");
CheckNTErrors((XTensor::IsSameShaped(mean, var)), "Unmatched input tensors");
CheckNTErrors((_IsSameShaped(input, output)), "Unmatched input tensors!");
CheckNTErrors((_IsSameShaped(a, b)), "Unmatched input tensors");
CheckNTErrors((_IsSameShaped(mean, var)), "Unmatched input tensors");
CheckNTErrors((input && output && mean && var && a && b), "Empty input tensors!");
CheckNTErrors((dimRDI >= 0 && dimRDI < input->order), "Incorrect reduction dimension!");
CheckNTErrors((dim >= 0 && dim < input->order), "Incorrect reduction dimension!");
CheckNTErrors((input->order == mean->order + 1), "Incorrect reduction dimension!");
int stride = 1;
int strideNum = input->dimSizeRDI[dimRDI];
int strideNum = input->dimSize[dim];
int blockSize = 1;
int blockNum = 1;
for (int i = 0; i < input->order; i++) {
if (i < dimRDI) {
CheckNTErrors((input->dimSizeRDI[i] == mean->dimSizeRDI[i]), "Wrong size!");
stride *= input->dimSizeRDI[i];
if (i < dim) {
CheckNTErrors((input->dimSize[i] == mean->dimSize[i]), "Wrong size!");
blockNum *= input->dimSize[i];
}
else if (i > dimRDI) {
CheckNTErrors((input->dimSizeRDI[i] == mean->dimSizeRDI[i - 1]), "Wrong size!");
blockNum *= input->dimSizeRDI[i];
else if (i > dim) {
CheckNTErrors((input->dimSize[i] == mean->dimSize[i - 1]), "Wrong size!");
stride *= input->dimSize[i];
}
}
blockSize = stride * strideNum;
......@@ -203,8 +203,8 @@ void Normalize(const XTensor &input, XTensor &output, int dim,
const XTensor &mean, const XTensor &var,
const XTensor &a, const XTensor &b, DTYPE epsilon)
{
if (!output.isInit || !XTensor::IsSameShaped(&input, &output)) {
InitTensor(&output, &input);
if (!output.isInit || !IsSameShaped(input, output)) {
InitTensorV2(&output, &input);
}
/* call _Normalize function */
......
......@@ -95,15 +95,14 @@ void _CudaNormalize(const XTensor * input, XTensor * output, int dim,
{
CheckNTErrors((input->dataType == DEFAULT_DTYPE), "TODO!");
int dimRDI = input->order - dim - 1;
int stride = 1;
int strideNum = input->dimSizeRDI[dimRDI];
int stride = 1;
int strideNum = input->dimSize[dim];
int blockNum = 1;
for (int i = 0; i < input->order; i++) {
if (i < dimRDI)
stride *= input->dimSizeRDI[i];
else if (i > dimRDI)
blockNum *= input->dimSizeRDI[i];
if (i > dim)
stride *= input->dimSize[i];
else if (i < dim)
blockNum *= input->dimSize[i];
}
int cudaGridSize[3];
......
......@@ -22,6 +22,7 @@
#include "../../XTensor.h"
#include "../../XName.h"
#include "../../XUtility.h"
#include "../shape/IsSameShaped.h"
#include "ScaleAndShift.h"
#include "ScaleAndShift.cuh"
......@@ -147,8 +148,8 @@ b = a * scale + shift
*/
void ScaleAndShift(const XTensor & a, XTensor & b, DTYPE scale, DTYPE shift)
{
if (!b.isInit || !XTensor::IsSameShaped(&a, &b)) {
InitTensor(&b, &a);
if (!b.isInit || !IsSameShaped(a, b)) {
InitTensorV2(&b, &a);
}
/* call _ScaleAndShift function */
......
......@@ -22,6 +22,7 @@
#include <math.h>
#include "../../XName.h"
#include "../shape/IsSameShaped.h"
#include "Unary.h"
#include "Unary.cuh"
......@@ -77,7 +78,7 @@ void _funcName(const XTensor * a, XTensor * b)
_cudaFuncName(a, b); \
return; \
} \
CheckNTErrors((XTensor::IsSameShaped(a, b)), \
CheckNTErrors((_IsSameShaped(a, b)), \
"Input tensors should have the same type!"); \
if (a->dataType == X_INT) { \
int * d = (int*)a->data; \
......@@ -108,7 +109,7 @@ void _funcName(const XTensor * a, XTensor * b)
if (a->devID >= 0) { \
ShowNTErrors("No GPU devices support!") \
} \
CheckNTErrors((XTensor::IsSameShaped(a, b)), \
CheckNTErrors((_IsSameShaped(a, b)), \
"Input tensors should have the same type!"); \
if (a->dataType == X_INT) { \
int * d = (int*)a->data; \
......@@ -160,8 +161,8 @@ XTensor funcName(const XTensor & a)
#define SIMPLE_UNARY_FUNCTION_VOID(funcName, _funcName, operationId) \
void funcName(const XTensor & a, XTensor & b) \
{ \
if (!b.isInit || !XTensor::IsSameShaped(&a, &b)) { \
InitTensor(&b, &a); \
if (!b.isInit || !IsSameShaped(a, b)) { \
InitTensorV2(&b, &a); \
} \
_funcName(&a, &b); \
if (a.enableGrad) { \
......
......@@ -22,6 +22,7 @@
#include <math.h>
#include "../../XDevice.h"
#include "../../XName.h"
#include "../shape/IsSameShaped.h"
#include "Unary.h"
#include "Unary.cuh"
#include<cuda_runtime.h>
......@@ -154,7 +155,7 @@ void Kernel##funcName(T * a, T * b, int size) \
} \
void _Cuda##funcName(const XTensor * a, XTensor * b) \
{ \
CheckNTErrors((XTensor::IsSameShaped(a, b)), \
CheckNTErrors((_IsSameShaped(a, b)), \
"Input tensors should have the same type!"); \
CheckNTErrors(a->isSparse == false, "TODO!"); \
\
......
......@@ -20,6 +20,7 @@
*/
#include "../../XTensor.h"
#include "../shape/IsSameShaped.h"
#include "CopyInGrid.h"
#include "CopyBlocksInGrid.h"
......@@ -38,14 +39,13 @@ in the k-th grid
*/
void _CopyInGrid(const XTensor * s, XTensor * t, int * index, int blockDim, int blockNumInGrid, bool isIndexOnDev)
{
CheckNTErrors((XTensor::IsSameShaped(s, t)), "Unmatched tensors!");
CheckNTErrors((_IsSameShaped(s, t)), "Unmatched tensors!");
int blockDimRDI = s->order - blockDim - 1;
int blockSize = 1;
int blockNum = blockNumInGrid;
int gridNum = 1;
for (int i = 0; i < blockDimRDI; i++)
blockSize *= s->dimSizeRDI[i];
for (int i = blockDim; i < s->order; i++)
blockSize *= s->dimSize[i];
CheckNTErrors((s->unitNum % (blockSize * blockNum) == 0), "Illegal block number!");
gridNum = s->unitNum / (blockSize * blockNum);
......
......@@ -24,6 +24,7 @@
#include "CopyBlocks.h"
#include "Gather.h"
#include "../../XName.h"
#include "../utilities/SetAscendingOrder.h"
namespace nts { // namespace nts(NiuTrans.Tensor)
......@@ -52,26 +53,28 @@ void _CopyIndexed(const XTensor * s, XTensor * t, int dim,
CheckNTErrors(dim < s->order && dim < t->order, "A too larget dimension specified!");
CheckNTErrors(s->unitSize == t->unitSize, "Unmatched tensors!");
int dimRDI = s->order - dim - 1;
int blockSizeSrc = 1;
int blockSizeTgt = 1;
int blockNumSrc = 1;
int blockNumTgt = 1;
int leadDimSizeSrc = s->dimSizeRDI[dimRDI];
int leadDimSizeTgt = t->dimSizeRDI[dimRDI];
int leadDimSizeSrc = s->dimSize[dim];
int leadDimSizeTgt = t->dimSize[dim];
int indexOffsetNum = 1;
for (int i = 0; i < dimRDI; i++) {
blockSizeSrc *= s->dimSizeRDI[i];
blockSizeTgt *= t->dimSizeRDI[i];
for (int i = dim + 1; i < s->order; i++) {
blockSizeSrc *= s->dimSize[i];
}
for (int i = dim + 1; i < t->order; i++) {
blockSizeTgt *= t->dimSize[i];
}
for (int i = 0; i <= dim; i++)
{
blockNumSrc *= s->dimSize[i];
blockNumTgt *= t->dimSize[i];
}
for (int i = dimRDI; i < s->order; i++)
blockNumSrc *= s->dimSizeRDI[i];
for (int i = dimRDI; i < t->order; i++)
blockNumTgt *= t->dimSizeRDI[i];
CheckNTErrors(blockSizeSrc == blockSizeTgt, "Unmatched tensors!");
indexOffsetNum = blockNumSrc / s->dimSizeRDI[dimRDI];
indexOffsetNum = blockNumSrc / s->dimSize[dim];
int realIndexSize = indexOffsetNum * indexSize * copyNum;
int * realSrcIndex = new int[realIndexSize];
......@@ -206,7 +209,7 @@ void _CopyIndexed(const XTensor * s, XTensor * t, int dim,
const XTensor * srcIndex, int copyNum)
{
XTensor * tgtIndex = NewTensor(srcIndex);
tgtIndex->SetAscendingOrder(0);
SetAscendingOrder(*tgtIndex, 0);
_CopyIndexed(s, t, dim, srcIndex, tgtIndex, copyNum);
delete tgtIndex;
......@@ -218,14 +221,14 @@ make a new tensor to keep the result and return it
>> s - the source tensor
>> dim - the leading dimension to define "sub-tensors"
e.g., for a tensor of size (4, 2, 3) and dim = 0,
we have 4 sub-tensors of size (2, 3)
e.g., for a tensor of size (3, 2, 4) and dim = 2,
we have 4 sub-tensors of size (3,2)
>> srcIndex - index of the source sub-tensors
>> indexSize - length of srcIndex (and tgtIndex)
>> tgtIndex - index of the target sub-tensors
>> copyNum - number of the sub-tensors we copy for each source index,
e.g., for srcIndex = [0,1] and copyNum = 2,
we actually copy the source sub-tensors 0, 1, 1 and 2
e.g., for srcIndex = [1,4] and copyNum = 2,
we actually copy the source sub-tensors 1, 2, 4, 5
<< return - the result of copying indexed sub-tensors
*/
XTensor CopyIndexed(const XTensor & s, int dim,
......@@ -276,14 +279,14 @@ make a new tensor to keep the result and return it
>> s - the source tensor
>> dim - the leading dimension to define "sub-tensors"
e.g., for a tensor of size (4, 2, 3) and dim = 0,
we have 4 sub-tensors of size (2, 3)
e.g., for a tensor of size (3, 2, 4) and dim = 2,
we have 4 sub-tensors of size (3,2)
>> srcIndex - index of the source sub-tensors
>> indexSize - length of srcIndex (and tgtIndex)
>> tgtIndex - index of the target sub-tensors
>> copyNum - number of the sub-tensors we copy for each source index,
e.g., for srcIndex = [0,1] and copyNum = 2,
we actually copy the source sub-tensors 0, 1, 1 and 2
e.g., for srcIndex = [1,4] and copyNum = 2,
we actually copy the source sub-tensors 1, 2, 4, 5
<< return - the result of copying indexed sub-tensors
*/
XTensor CopyIndexed(const XTensor &s, int dim, int * srcIndex, int indexSize, int * tgtIndex, int copyNum)
......
......@@ -33,6 +33,29 @@ gather indexed sub-tensors
>> s - the source tensor
>> t - the target tensor
>> srcIndex - index of the source sub-tensors
>> dim - the leading dimension to define "sub-tensors"
e.g., for a tensor of size (3, 2, 4) and dim = 2,
we have 4 sub-tensors of size (3, 2)
*/
void _Gather(const XTensor * s, XTensor * t, XTensor * srcIndex, int dim)
{
CheckNTErrors((s && t), "Invalid tensors!");
CheckNTErrors(s->devID == t->devID, "the data must be kept on the same device!");
CheckNTErrors((t->unitSize == srcIndex->unitSize), "Unmatched tensors!");
#ifdef USE_CUDA
if (s->devID >= 0 && t->devID >= 0) {
_CudaGather(s, t, srcIndex, dim);
return;
}
#endif
}
/*
gather indexed sub-tensors
>> s - the source tensor
>> t - the target tensor
>> srcIndex - the tensor to save the index of the source tensor
*/
void _Gather(const XTensor * s, XTensor * t, XTensor * srcIndex)
......@@ -79,10 +102,15 @@ XTensor Gather(XTensor &s, XTensor &index)
CheckNTErrors(s.order == 2, "The order of the input tensor must be 2!");
int order = index.order + 1;
int order = s.order;
int * dimSize = new int[order];
memcpy(dimSize, index.dimSize, index.order * sizeof(int));
dimSize[index.order] = s.GetDim(-1);
for (int i = 0; i < s.order; i++) {
if (i == dim)
dimSize[i] = index.unitNum;
else
dimSize[i] = s.dimSize[i];
}
float dr = (!s.isSparse) ? 1.0F : s.denseRatio;
XTensor t(order, dimSize, s.dataType, dr, s.devID, s.mem);
......@@ -93,11 +121,25 @@ XTensor Gather(XTensor &s, XTensor &index)
_Gather(&s, &t, &index);
/* tensor connection */
if (s.enableGrad) {
if (s.enableGrad)
{
XLink::MakeLink(&s, &index, &t, MOVEMENT_GATHER);
}
return t;
if(index.order > 1) {
int * dims = new int[index.order + 1];
memcpy(dims, index.dimSize, index.order * sizeof(int));
dims[index.order] = t.GetDim(-1);
XTensor tt;
tt = Reshape(t, index.order + 1, dims);
delete[] dims;
return tt;
}
else {
return t;
}
}
} // namespace nts(NiuTrans.Tensor)
\ No newline at end of file
......@@ -68,6 +68,35 @@ void KernelGather(DTYPE * sData, DTYPE * tData, int * sIndex, int indexSize, int
/*
gather indexed sub-tensors(cuda version)
>> sData - the data pointer of the source tensor
>> tData - the data pointer of the target tensor
>> sIndex - the index of the source tensor
>> indexSize - the size of the srcIndex
>> stride - stride of a data block
>> strideNum - strideNum of a data block
>> blockNum - block size of data
*/
__global__
void KernelGather(DTYPE * sData, DTYPE * tData, int * sIndex, int stride, int strideNum, int blockNum)
{
int idx = blockDim.x * blockIdx.x + threadIdx.x;
int idy = blockDim.y * blockIdx.y + threadIdx.y;
int blockIndex = idy / stride;
int offsetInBlock = idy % stride;
int size = stride * strideNum * blockNum;
#pragma unroll
for (int i = idx * stride + stride * strideNum * blockIndex + offsetInBlock;
i < stride * strideNum * blockIndex + offsetInBlock + stride * strideNum && i < size;
i += stride * blockDim.x) {
tData[i] = sData[sIndex[i]];
}
}
/*
gather indexed sub-tensors(cuda version)
>> s - the source tensor
>> t - the target tensor
>> srcIndex - the tensor to save the index of the source tensor
......@@ -117,6 +146,44 @@ void _CudaGather(const XTensor * s, XTensor * t, XTensor * srcIndex)
BacktoCudaDev(devID, devIDBackup);
}
/*
gather indexed sub-tensors(cuda version)
>> s - the source tensor
>> t - the target tensor
>> srcIndex - the tensor to save the index of the source tensor
>> dim - the leading dimension to define "sub-tensors"
*/
void _CudaGather(const XTensor * s, XTensor * t, XTensor * srcIndex, int dim)
{
int devID = srcIndex->devID;
XMem * mem = s->mem;
int stride = 1;
int blockNum = 1;
int indexSize = srcIndex->unitNum;
int strideNum = srcIndex->dimSize[dim];
for (int i = 0; i < dim; i++)
blockNum *= srcIndex->dimSize[i];
for (int i = dim + 1; i < srcIndex->order; i++)
stride *= srcIndex->dimSize[i];
int * sIndex = NULL;
if (srcIndex->devID < 0) {
sIndex = mem != NULL ?
(int*)mem->AllocBuf(mem->devID, sizeof(int) * indexSize) :
(int*)XMemAlloc(mem->devID, sizeof(int) * indexSize);
XMemCopy(sIndex, devID, srcIndex, -1, sizeof(int) * indexSize);
}
else
sIndex = (int *)srcIndex->data;
int cudaGrids[3];
int cudaBlocks[3];
GDevs.GetCudaThread2D(devID, max(32, strideNum), stride*blockNum, MAX_INT, cudaGrids, cudaBlocks);
KernelGather << <dim3(cudaGrids[0], cudaGrids[1]), dim3(cudaBlocks[0], cudaBlocks[1]) >> > ((DTYPE *)s->data, (DTYPE *)t->data, sIndex, stride, strideNum, blockNum);
}
#endif // USE_CUDA
} // namespace nts(NiuTrans.Tensor)
\ No newline at end of file
......@@ -32,6 +32,8 @@ namespace nts { // namespace nts(NiuTrans.Tensor)
/* gather indexed sub-tensors(cuda version) */
void _CudaGather(const XTensor * s, XTensor * t, XTensor * srcIndex);
void _CudaGather(const XTensor * s, XTensor * t, XTensor * srcIndex,int dim);
#endif // USE_CUDA
} // namespace nts(NiuTrans.Tensor)
......
......@@ -29,6 +29,9 @@ namespace nts { // namespace nts(NiuTrans.Tensor)
/* gather selected sub-tensors */
void _Gather(const XTensor * s, XTensor * t, XTensor * srcIndex);
/* gather selected sub-tensors accoding to the dimension */
void _Gather(const XTensor * s, XTensor * t, XTensor * srcIndex, int dim);
/* gather selected sub-tensors (return an XTensor structure)
make a new tensor to keep the result and return it */
XTensor Gather(XTensor &s, XTensor &index);
......
......@@ -31,6 +31,9 @@ namespace nts{ // namespace nts(NiuTrans.Tensor)
/* get the max-valued items along a dimension of the tensor (cuda version) */
void _CudaReduceMax(const XTensor * input, XTensor * output, int dim);
/* get the min-valued items along a dimension of the tensor (cuda version) */
void _CudaReduceMin(const XTensor * input, XTensor * output, int dim);
#endif // USE_CUDA
} // namespace nts(NiuTrans.Tensor)
......
......@@ -29,14 +29,20 @@ namespace nts{ // namespace nts(NiuTrans.Tensor)
/* get the max value of the items along a dimension of the tensor. */
void _ReduceMax(const XTensor * input, XTensor * output, int dim);
/* get the min value of the items along a dimension of the tensor. */
void _ReduceMin(const XTensor * input, XTensor * output, int dim);
/*
get the max value of the items along a dimension of the tensor (return an XTensor structure)
make a new tensor to keep the result and return it
*/
XTensor ReduceMax(const XTensor &input, int dim);
/* get the max value of the items along a dimension of the tensor. */
void ReduceMax(const XTensor &input, XTensor &output, int dim);
/*
get the min value of the items along a dimension of the tensor (return an XTensor structure)
make a new tensor to keep the result and return it
*/
XTensor ReduceMin(const XTensor &input, int dim);
} // namespace nts(NiuTrans.Tensor)
......
......@@ -39,8 +39,7 @@ void _ReduceMean(const XTensor * input, XTensor * output, int dim)
{
CheckNTErrors((input->order > dim), "Illegal dimension specified!");
int dimRDI = input->order - dim - 1;
int num = input->dimSizeRDI[dimRDI];
int num = input->dimSize[dim];
_ReduceSum(input, output, dim);
_ScaleAndShiftMe(output, (DTYPE)1/num, 0);
......@@ -112,7 +111,7 @@ void ReduceMean(const XTensor &input, XTensor &output, int dim)
}
float dr = (!input.isSparse) ? 1.0F : input.denseRatio;
InitTensor(&output, order, dimSize, input.dataType, dr, input.devID, input.mem);
InitTensorV2(&output, order, dimSize, input.dataType, dr, input.devID, input.mem);
/* destroy variables */
delete[] dimSize;
......
......@@ -22,6 +22,7 @@
#include <math.h>
#include "ReduceSum.h"
#include "ReduceSum.cuh"
#include "../shape/IsSameShaped.h"
#include "../../XName.h"
#include "../../XBLAS.h"
#include "VectorBuffer.h"
......@@ -51,17 +52,16 @@ void _ReduceSum(const XTensor * input, XTensor * output, int dim, const XTensor
CheckNTErrors((input->order == output->order + 1), "Incorrect tensor sizes!");
CheckNTErrors((input->order > dim && dim >=0), "Illegal dimension to reduce!");
CheckNTErrors((input->dataType == output->dataType), "Unmatched data types!");
CheckNTErrors((shift == NULL || XTensor::IsSameShaped(output, shift)), "Incorrect shift tensor size!");
CheckNTErrors((shift == NULL || _IsSameShaped(output, shift)), "Incorrect shift tensor size!");
int dimRDI = input->order - dim - 1;
CheckNTErrors(dimRDI >= 0, "Wrong dimension!");
CheckNTErrors(dim < input->order, "Wrong dimension!");
for(int i = 0; i < input->order; i++){
if(i < dimRDI){
CheckNTErrors((input->dimSizeRDI[i] == output->dimSizeRDI[i]), "Unmatched tensors!");
if(i < dim){
CheckNTErrors((input->dimSize[i] == output->dimSize[i]), "Unmatched tensors!");
}
else if(i > dimRDI){
CheckNTErrors((input->dimSizeRDI[i] == output->dimSizeRDI[i - 1]), "Unmatched tensors!");
else if(i > dim){
CheckNTErrors((input->dimSize[i] == output->dimSize[i - 1]), "Unmatched tensors!");
}
}
......@@ -74,21 +74,21 @@ void _ReduceSum(const XTensor * input, XTensor * output, int dim, const XTensor
CheckNTErrors((input->dataType == DEFAULT_DTYPE), "TODO!");
int stride = 1;
int strideNum = input->dimSizeRDI[dimRDI];
int strideNum = input->dimSize[dim];
int blockSize = 1;
int blockNum = 1;
for (int i = 0; i < input->order; i++) {
if (i < dimRDI)
stride *= input->dimSizeRDI[i];
else if (i > dimRDI)
blockNum *= input->dimSizeRDI[i];
if (i < dim)
blockNum *= input->dimSize[i];
else if (i > dim)
stride *= input->dimSize[i];
}
blockSize = stride * strideNum;
if(input->dimSizeRDI[0] % (4 * 32 / sizeof(DTYPE)) == 0 && input->dimSizeRDI[0] >= 32){
if(input->dimSize[input->order - 1] % (4 * 32 / sizeof(DTYPE)) == 0 && input->dimSize[input->order - 1] >= 32){
int vecBufLength = 32 / sizeof(DTYPE);
if(dimRDI == 0){
if(dim == input->order - 1){
//data is contiguous in dim 0
for(int i = 0; i < blockNum; i++){
// stride = 1
......@@ -122,7 +122,7 @@ void _ReduceSum(const XTensor * input, XTensor * output, int dim, const XTensor
} else{
//data is separated
for(int i = 0; i < blockNum; i++){
for(int j = 0; j < input->dimSizeRDI[0] / 32; j++){
for(int j = 0; j < input->dimSize[input->order - 1] / 32; j++){
DTYPE * ip = (DTYPE*)input->data + blockSize * i;
DTYPE * op = (DTYPE*)output->data + stride * i;
DTYPE * sp = shift != NULL ? (DTYPE*)shift->data + stride * i : NULL;
......@@ -334,7 +334,7 @@ void ReduceSum(const XTensor &input, XTensor &output, int dim, const XTensor &sh
}
float dr = (!input.isSparse) ? 1.0F : input.denseRatio;
InitTensor(&output, order, dimSize, input.dataType, dr, input.devID, input.mem);
InitTensorV2(&output, order, dimSize, input.dataType, dr, input.devID, input.mem);
/* destroy variables */
delete[] dimSize;
......@@ -429,7 +429,7 @@ void ReduceSum(const XTensor &input, XTensor &output, int dim, DTYPE power, bool
}
float dr = (!input.isSparse) ? 1.0F : input.denseRatio;
InitTensor(&output, order, dimSize, input.dataType, dr, input.devID, input.mem);
InitTensorV2(&output, order, dimSize, input.dataType, dr, input.devID, input.mem);
/* destroy variables */
delete[] dimSize;
......
......@@ -692,13 +692,12 @@ void _CudaReduceSum(const XTensor * input, XTensor * output, int dim, const XTen
CheckNTErrors(input->dataType == output->dataType, "Unmatched data types!");
CheckNTErrors(shift == NULL || output->unitNum == shift->unitNum, "Incorrect shift tensor size!");
int dimRDI = input->order - dim - 1;
for(int i = 0; i < input->order; i++){
if(i < dimRDI){
CheckNTErrors(input->dimSizeRDI[i] == output->dimSizeRDI[i], "Unmatched tensors!");
if(i < dim){
CheckNTErrors(input->dimSize[i] == output->dimSize[i], "Unmatched tensors!");
}
else if(i > dimRDI){
CheckNTErrors(input->dimSizeRDI[i] == output->dimSizeRDI[i - 1], "Unmatched tensors!");
else if(i > dim){
CheckNTErrors(input->dimSize[i] == output->dimSize[i - 1], "Unmatched tensors!");
}
}
......@@ -709,31 +708,23 @@ void _CudaReduceSum(const XTensor * input, XTensor * output, int dim, const XTen
int cudaBlockSize[3];
int iter = 0;
int stride = 1;
int strideNum = input->dimSizeRDI[dimRDI];
int strideNum = input->dimSize[dim];
int blockSize = 1;
int blockNum = 1;
for (int i = 0; i < input->order; i++) {
if (i < dimRDI)
stride *= input->dimSizeRDI[i];
else if (i > dimRDI)
blockNum *= input->dimSizeRDI[i];
if (i < dim)
blockNum *= input->dimSize[i];
else if (i > dim)
stride *= input->dimSize[i];
}
blockSize = stride * strideNum;
int devID = input->devID;
XMem * mem = input->mem;
GDevs.GetCudaThread2D(devID, strideNum, stride * blockNum, MAX_INT, cudaGridSize, cudaBlockSize);
int devIDBackup;
ProtectCudaDev(devID, devIDBackup);
int bufSize = input->unitSize * cudaGridSize[0] * stride * blockNum * 2;
DTYPE * buf = mem != NULL ? (DTYPE*)mem->AllocBuf(mem->devID, bufSize) : (DTYPE*)XMemAlloc(input->devID, bufSize);
DTYPE * buf1 = buf;
DTYPE * buf2 = buf + cudaGridSize[0] * stride * blockNum;
DTYPE * sp = shift != NULL ? (DTYPE*)shift->data : NULL;
int devIDBackup;
ProtectCudaDev(input->devID, devIDBackup);
if (stride == 1 && blockNum >= 10) {
dim3 grids;
......@@ -751,7 +742,7 @@ void _CudaReduceSum(const XTensor * input, XTensor * output, int dim, const XTen
strideNum, blockNum, sp, power, isExp);
}
}
else if (stride != 1 && stride * blockNum > 4096){
else if (stride != 1 && stride * blockNum > 4096) {
//GDevs->GetGridAndBlockSize2D(devID, stride * blockNum, strideNum,MAX_INT, cudaGridSize, cudaBlockSize);
//unsigned int* goutput = (unsigned int *)input->data;
//convert2uintV2 << <dim3(cudaGridSize[0], cudaGridSize[1]), dim3(cudaBlockSize[0], cudaBlockSize[1]) >> > ((float*)input->data, goutput, stride, strideNum, blockNum, strideNum*blockNum*stride);
......@@ -761,6 +752,14 @@ void _CudaReduceSum(const XTensor * input, XTensor * output, int dim, const XTen
strideNum, blockNum,sp, power, isExp);
}
else {
XMem * mem = input->mem;
GDevs.GetCudaThread2D(devID, strideNum, stride * blockNum, MAX_INT, cudaGridSize, cudaBlockSize);
int bufSize = input->unitSize * cudaGridSize[0] * stride * blockNum * 2;
DTYPE * buf = mem != NULL ? (DTYPE*)mem->AllocBuf(mem->devID, bufSize) : (DTYPE*)XMemAlloc(devID, bufSize);
DTYPE * buf1 = buf;
DTYPE * buf2 = buf + cudaGridSize[0] * stride * blockNum;
do {
if (input->dataType == DEFAULT_DTYPE) {
DTYPE * iData = NULL;
......@@ -904,13 +903,15 @@ void _CudaReduceSum(const XTensor * input, XTensor * output, int dim, const XTen
iter++;
} while (strideNum > 1);
if (mem != NULL)
mem->ReleaseBuf(mem->devID, bufSize);
else
XMemFree(devID, buf);
}
ProtectCudaDev(input->devID, devIDBackup);
if (mem != NULL)
mem->ReleaseBuf(mem->devID, bufSize);
else
XMemFree(input->devID, buf);
BacktoCudaDev(devID, devIDBackup);
}
#endif // USE_CUDA
......
......@@ -49,8 +49,8 @@ DTYPE _ReduceSumAll(const XTensor * source)
int dims[2] = {1, source->unitNum};
int one = 1;
XTensor * all = NewTensorBuf(2, dims, source->dataType, source->denseRatio, source->devID, source->mem);
XTensor * result = NewTensorBuf(1, &one, source->dataType, 1.0F, source->devID, source->mem);
XTensor * all = NewTensorBufV2(2, dims, source->dataType, source->denseRatio, source->devID, source->mem);
XTensor * result = NewTensorBufV2(1, &one, source->dataType, 1.0F, source->devID, source->mem);
_CopyValues(source, all);
_ReduceSum(all, result, 1);
......@@ -74,7 +74,7 @@ DTYPE _ReduceSumAll(const XTensor * source)
int leadingDim = big->order - 1;
int * dimSize;
dimSize = getDimSize(big, leadingDim);
XTensor * little = NewTensor(big->order - 1, dimSize, source->dataType, source->denseRatio,
XTensor * little = NewTensorV2(big->order - 1, dimSize, source->dataType, source->denseRatio,
source->devID, source->mem);
_ReduceSum(big, little, leadingDim);
......
......@@ -109,7 +109,7 @@ void ReduceSumSquared(const XTensor &input, XTensor &output, int dim, const XTen
}
float dr = (!input.isSparse) ? 1.0F : input.denseRatio;
InitTensor(&output, order, dimSize, input.dataType, dr, input.devID, input.mem);
InitTensorV2(&output, order, dimSize, input.dataType, dr, input.devID, input.mem);
/* destroy variables */
delete[] dimSize;
......
......@@ -38,8 +38,7 @@ For a 1-dimensional data array a, variance = 1/n * \sum_i (a_i - mean)^2
*/
void _ReduceVariance(const XTensor * input, XTensor * output, int dim, const XTensor * mean)
{
int dimRDI = input->order - dim - 1;
int num = input->dimSizeRDI[dimRDI];
int num = input->dimSize[dim];
_ReduceSum(input, output, dim, mean, 2.0F);
_ScaleAndShiftMe(output, (DTYPE)1 / num, 0);
}
......@@ -112,7 +111,7 @@ void ReduceVariance(const XTensor &input, XTensor &output, int dim, const XTenso
}
float dr = (!input.isSparse) ? 1.0F : input.denseRatio;
InitTensor(&output, order, dimSize, input.dataType, dr, input.devID, input.mem);
InitTensorV2(&output, order, dimSize, input.dataType, dr, input.devID, input.mem);
/* destroy variables */
delete[] dimSize;
......
......@@ -20,7 +20,7 @@
*/
#include "VectorBuffer.h"
#include "math.h"
namespace nts {
/* data size for each buffer */
int VectorBuffer::size()
......@@ -168,4 +168,12 @@ VectorBuffer VectorBuffer::maxData(const VectorBuffer &a) {
return *this;
}
/* conculte the max of two buffer */
VectorBuffer VectorBuffer::minData(const VectorBuffer &a) {
for (int i = 0; i != a.size(); i++) {
this->values[i] = MIN(a[i], this->values[i]);
}
return *this;
}
}/* end of the nts (NiuTrans.Tensor) namespace */
\ No newline at end of file
......@@ -19,8 +19,6 @@
* $Created by: ZHANG Yuhao (email: zhangyuhao@stu.neu.edu.cn) 2019-07-23
*/
//#include <cstring>
#include <math.h>
#include "../../XGlobal.h"
namespace nts {
......@@ -49,5 +47,8 @@ public:
/* conculte the max of two buffer */
VectorBuffer maxData(const VectorBuffer &a);
/* conculte the max of two buffer */
VectorBuffer minData(const VectorBuffer &a);
};
}
\ No newline at end of file
......@@ -21,6 +21,7 @@
#include "../../XTensor.h"
#include "../../XName.h"
#include "../shape/IsSameShaped.h"
#include "Concatenate.h"
#include "Merge.h"
#include "ConcatenateSolely.h"
......@@ -44,7 +45,7 @@ void _Concatenate(const TensorList * smalls, XTensor * big, int dim)
XTensor * a = (XTensor*)smalls->GetItem(i - 1);
XTensor * b = (XTensor*)smalls->GetItem(i);
CheckNTErrors((a && b), "Empty input tensors!");
if (!XTensor::IsSameShaped(a, b))
if (!_IsSameShaped(a, b))
uniform = false;
}
......@@ -76,7 +77,7 @@ XTensor Concatenate(const TensorList &smalls, int dim)
XTensor * a = (XTensor*)smalls.GetItem(i - 1);
XTensor * b = (XTensor*)smalls.GetItem(i);
CheckNTErrors((a && b), "Empty input tensors!");
if (!XTensor::IsSameShaped(a, b))
if (!_IsSameShaped(a, b))
uniform = false;
}
XTensor * tensor = (XTensor*)smalls.GetItem(0);
......@@ -189,7 +190,7 @@ void Concatenate(const TensorList & smalls, XTensor & big, int dim)
XTensor * a = (XTensor*)smalls.GetItem(i - 1);
XTensor * b = (XTensor*)smalls.GetItem(i);
CheckNTErrors((a && b), "Empty input tensors!");
if (!XTensor::IsSameShaped(a, b))
if (!_IsSameShaped(a, b))
uniform = false;
}
......@@ -207,7 +208,7 @@ void Concatenate(const TensorList & smalls, XTensor & big, int dim)
}
float dr = (!tensor->isSparse) ? 1.0F : tensor->denseRatio;
InitTensor(&big, order, dimSize, tensor->dataType, dr, tensor->devID, tensor->mem);
InitTensorV2(&big, order, dimSize, tensor->dataType, dr, tensor->devID, tensor->mem);
}
else {
for (int i = 0; i < tensor->order; i++)
......@@ -222,7 +223,7 @@ void Concatenate(const TensorList & smalls, XTensor & big, int dim)
dimSize[dim] = catDimSize;
float dr = (!tensor->isSparse) ? 1.0F : tensor->denseRatio;
InitTensor(&big, order, dimSize, tensor->dataType, dr, tensor->devID, tensor->mem);
InitTensorV2(&big, order, dimSize, tensor->dataType, dr, tensor->devID, tensor->mem);
}
/* destroy variables */
delete[] dimSize;
......@@ -290,7 +291,7 @@ XTensor Concatenate(const XTensor &smallA, const XTensor &smallB, int dim)
XTensor * a = (XTensor*)smalls.Get(i - 1);
XTensor * b = (XTensor*)smalls.Get(i);
CheckNTErrors((a && b), "Empty input tensors!");
if (!XTensor::IsSameShaped(a, b))
if (!_IsSameShaped(a, b))
uniform = false;
}
XTensor * tensor = (XTensor*)smalls.Get(0);
......
......@@ -39,30 +39,29 @@ void _ConcatenateSolely(const TensorList * smalls, XTensor * big, int dim)
CheckNTErrors(big->order > dim && dim >= 0, "Illegal dimension to concatenate!");
int catDimSize = 0;
int dimRDI = big->order - dim - 1;
for (int i = 0; i < smalls->count; i++) {
XTensor * tensor = (XTensor*)smalls->GetItem(i);
CheckNTErrors((big->order == tensor->order), "Unmatched tensor orders!");
for (int j = 0; j < big->order; j++) {
if (j != dimRDI) {
CheckNTErrors((big->dimSizeRDI[j] == tensor->dimSizeRDI[j]), "Unmatched tensor sizes!");
if (j != dim) {
CheckNTErrors((big->dimSize[j] == tensor->dimSize[j]), "Unmatched tensor sizes!");
}
else {
catDimSize += tensor->dimSizeRDI[j];
catDimSize += tensor->dimSize[j];
}
}
}
CheckNTErrors((catDimSize == big->dimSizeRDI[dimRDI]), "Unmatched tensor sizes!");
CheckNTErrors((catDimSize == big->dimSize[dim]), "Unmatched tensor sizes!");
int stride = 1;
for (int i = 0; i < dimRDI; i++)
stride *= big->dimSizeRDI[i];
int blockNum = 1;
for (int i = dimRDI + 1; i < big->order; i++)
blockNum *= big->dimSizeRDI[i];
for (int i = 0; i < dim; i++)
blockNum *= big->dimSize[i];
for (int i = dim + 1; i < big->order; i++)
stride *= big->dimSize[i];
int offset = 0;
......@@ -74,8 +73,8 @@ void _ConcatenateSolely(const TensorList * smalls, XTensor * big, int dim)
if (smalls->count <= MIN_TENSOR_CAT_NUM) {
for (int i = 0; i < smalls->count; i++) {
XTensor * tensor = (XTensor*)smalls->GetItem(i);
int sPitch = stride * tensor->dimSizeRDI[dimRDI] * tensor->unitSize;
int tPitch = stride * big->dimSizeRDI[dimRDI] * big->unitSize;
int sPitch = stride * tensor->dimSize[dim] * tensor->unitSize;
int tPitch = stride * big->dimSize[dim] * big->unitSize;
int mSize = sPitch;
int n = blockNum;
XMemCopy2D((char*)big->data + offset, tPitch, big->devID,
......@@ -85,11 +84,11 @@ void _ConcatenateSolely(const TensorList * smalls, XTensor * big, int dim)
}
}
else {
StrList* sourceArrays = new StrList(smalls->count);
StrList* sourceArrays = new StrList(smalls->count);
int * blockSizes = new int[smalls->count];
for (int i = 0; i < smalls->count; i++) {
XTensor * tensor = (XTensor*)smalls->GetItem(i);
blockSizes[i] = stride * tensor->dimSizeRDI[dimRDI] * tensor->unitSize;
blockSizes[i] = stride * tensor->dimSize[dim] * tensor->unitSize;
sourceArrays->Add((char*)tensor->data);
}
......
/* NiuTrans.Tensor - an open-source tensor library
* Copyright (C) 2017, Natural Language Processing Lab, Northestern University.
* All rights reserved.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
/*
* $Created by: LI Yinqiao (email: li.yin.qiao.2012@hotmail.com) 2019-10-22
*/
#include "../../XTensor.h"
#include "IsSameShaped.h"
namespace nts { // namespace nts(NiuTrans.Tensor)
/*
check whether the two matrices are in the same type and size
>> a - input tensor
>> b - anther tensor to compare with
<< return - whether the two input tensors are identical
*/
bool _IsSameShaped(const XTensor * a, const XTensor * b)
{
if(a == NULL || b == NULL)
return false;
if(a->order != b->order)
return false;
for(int i = 0; i < a->order; i++){
if(a->dimSize[i] != b->dimSize[i])
return false;
}
if(a->dataType != b->dataType)
return false;
if(a->denseRatio != b->denseRatio)
return false;
if(a->isSparse != b->isSparse)
return false;
return true;
}
/*
check whether the two matrices are in the same type and size
>> a - input tensor
>> b - anther tensor to compare with
<< return - whether the two input tensors are identical
*/
bool IsSameShaped(const XTensor & a, const XTensor & b)
{
return _IsSameShaped(&a, &b);
}
/*
check whether the three matrices are in the same type and size
>> a - input tensor
>> b - anther tensor to compare with
>> c - a tensor again
<< return - whether the two input tensors are identical
*/
bool _IsSameShaped(const XTensor * a, const XTensor * b, const XTensor * c)
{
return IsSameShaped(a, b) && IsSameShaped(a, c);
}
/*
check whether the three matrices are in the same type and size
>> a - input tensor
>> b - anther tensor to compare with
>> c - a tensor again
<< return - whether the two input tensors are identical
*/
bool IsSameShaped(const XTensor & a, const XTensor & b, const XTensor & c)
{
return _IsSameShaped(&a, &b, &c);
}
} // namespace nts(NiuTrans.Tensor)
\ No newline at end of file
/* NiuTrans.Tensor - an open-source tensor library
* Copyright (C) 2017, Natural Language Processing Lab, Northestern University.
* All rights reserved.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
/*
* $Created by: LI Yinqiao (email: li.yin.qiao.2012@hotmail.com) 2019-10-22
*/
#ifndef __ISSAMESHAPED_H__
#define __ISSAMESHAPED_H__
#include "../../XTensor.h"
namespace nts { // namespace nts(NiuTrans.Tensor)
/* judge whether the two matrices are in the same type and size */
bool _IsSameShaped(const XTensor * a, const XTensor * b);
/* judge whether the two matrices are in the same type and size */
bool IsSameShaped(const XTensor & a, const XTensor & b);
/* judge whether the three matrices are in the same type and size */
bool _IsSameShaped(const XTensor * a, const XTensor * b, const XTensor * c);
/* judge whether the three matrices are in the same type and size */
bool IsSameShaped(const XTensor & a, const XTensor & b, const XTensor & c);
} // namespace nts(NiuTrans.Tensor)
#endif // __ISSAMESHAPED_H__
\ No newline at end of file
......@@ -22,6 +22,7 @@
#include "../../XTensor.h"
#include "../../XUtility.h"
#include "../../XName.h"
#include "../shape/IsSameShaped.h"
#include "Merge.h"
#include "MakeMergeBlockIndex.h"
#include "../movement/CopyBlocksOnSite.h"
......@@ -45,10 +46,8 @@ void _Merge(const XTensor * s, XTensor * t, int whereToMerge, int leadingDim)
if(leadingDim < 0)
leadingDim = 0;
int whereToMergeRDI = s->order - whereToMerge - 1;
int leadingDimRDI = s->order - leadingDim - 1;
if (leadingDimRDI < 0)
leadingDimRDI = s->order - 1;
if (leadingDim >= s->order)
leadingDim = leadingDim - s->order;
CheckNTErrors((s != NULL && t != NULL), "Invalid tensors!");
CheckNTErrors((s->devID == t->devID || (s->devID < 0 && t->devID < 0)),
......@@ -56,19 +55,20 @@ void _Merge(const XTensor * s, XTensor * t, int whereToMerge, int leadingDim)
CheckNTErrors((s->unitNum == t->unitNum && s->unitSize == t->unitSize), "Unmatched tensors!");
CheckNTErrors((s->order == t->order + 1), "Unmatched tensors!");
CheckNTErrors((leadingDimRDI > whereToMergeRDI), "Invalid leading dimension!");
CheckNTErrors((leadingDim < whereToMerge), "Invalid leading dimension!");
for (int i = 0; i < s->order; i++) {
if (i == whereToMergeRDI) {
CheckNTErrors((t->dimSizeRDI[i] == s->dimSizeRDI[i] * s->dimSizeRDI[leadingDimRDI]),
if (i == whereToMerge) {
CheckNTErrors((t->dimSize[i - 1] == s->dimSize[i] * s->dimSize[leadingDim]),
"Unmatched tensor sizes!");
}
else if (i < leadingDimRDI){
CheckNTErrors((s->dimSizeRDI[i] == t->dimSizeRDI[i]),
else if (i < leadingDim){
CheckNTErrors((s->dimSize[i] == t->dimSize[i]),
"Unmatched tensor sizes!");
}
else if (i > leadingDimRDI) {
CheckNTErrors((s->dimSizeRDI[i] == t->dimSizeRDI[i - 1]),
else if (i > leadingDim) {
CheckNTErrors((s->dimSize[i] == t->dimSize[i - 1]),
"Unmatched tensor sizes!");
}
}
......@@ -77,14 +77,14 @@ void _Merge(const XTensor * s, XTensor * t, int whereToMerge, int leadingDim)
int blockNum = 1;
int gridSize = 1;
int gridNum = 1;
int mergedNum = s->dimSizeRDI[leadingDimRDI];
int mergedNum = s->dimSize[leadingDim];
for (int i = 0; i < s->order; i++) {
if (i <= leadingDimRDI) {
if (i <= whereToMergeRDI)
blockSize *= s->dimSizeRDI[i];
if (i >= leadingDim) {
if (i >= whereToMerge)
blockSize *= s->dimSize[i];
else
blockNum *= s->dimSizeRDI[i];
blockNum *= s->dimSize[i];
}
}
......@@ -121,7 +121,7 @@ void _Merge(const XTensor * s, XTensor * t, int whereToMerge, int leadingDim)
if (!isOnSameDevice)
dataTMP = mem != NULL ? mem->AllocBuf(mem->devID, size) : XMemAlloc(mem->devID, size);
int blockNumInMerge = s->dimSizeRDI[leadingDimRDI];
int blockNumInMerge = s->dimSize[leadingDim];
int splitSizeInGrid = gridSize / blockNumInMerge;
int realBlockSize = blockSize * t->unitSize;
......@@ -254,7 +254,7 @@ void Merge(const XTensor &s, XTensor &t, int whereToMerge, int leadingDim)
}
float dr = (!s.isSparse) ? 1.0F : s.denseRatio;
InitTensor(&t, order, dimSize, s.dataType, dr, s.devID, s.mem);
InitTensorV2(&t, order, dimSize, s.dataType, dr, s.devID, s.mem);
/* destroy variables */
delete[] dimSize;
......@@ -284,7 +284,7 @@ void _Merge(const TensorList * smalls, XTensor * t, int whereToMerge)
CheckNTErrors((smalls != NULL), "Invalid list!");
CheckNTErrors((smalls->count > 0), "Empty list!");
CheckNTErrors((whereToMerge >= 0 && whereToMerge < t->order), "Wrong range of whereToMerge");
CheckNTErrors((whereToMerge >= 0 && whereToMerge < t->order), "Wrong range of whereToMerge");
bool uniform = true;
......@@ -310,12 +310,11 @@ void _Merge(const TensorList * smalls, XTensor * t, int whereToMerge)
int mergedNum = smalls->count;
XTensor * s0 = smalls->GetItem(0);
int whereToMergeRDI = s0->order - whereToMerge - 1;
for (int i = 0; i < s0->order; i++) {
if (i <= whereToMergeRDI)
blockSize *= s0->dimSizeRDI[i];
if (i >= whereToMerge)
blockSize *= s0->dimSize[i];
else
blockNum *= s0->dimSizeRDI[i];
blockNum *= s0->dimSize[i];
}
CheckNTErrors((s0->unitNum % (blockSize * blockNum) == 0), "Incorrect size!");
......@@ -433,7 +432,7 @@ merge two tensors into a big tensor (return an XTensor structure)
*/
XTensor Merge(const XTensor &smallA, const XTensor &smallB, int whereToMerge)
{
CheckNTErrors(XTensor::IsSameShaped(&smallA, &smallB),
CheckNTErrors(IsSameShaped(smallA, smallB),
"The two tensors must be of the same size!");
int order = smallA.order;
......
......@@ -46,8 +46,6 @@ void Merge(const TensorList &smalls, XTensor &t, int whereToMerge);
/* merge two tensors into a big tensor (return an XTensor structure) */
XTensor Merge(const XTensor &smallA, const XTensor &smallB, int whereToMerge);
void Merge(const XTensor &smallA, const XTensor &smallB, XTensor &t, int whereToMerge);
} // namespace nts(NiuTrans.Tensor)
#endif // __MERGE_H__
\ No newline at end of file
......@@ -22,6 +22,7 @@
#include "../../XTensor.h"
#include "../../XName.h"
#include "../movement/CopyValues.h"
#include "../shape/IsSameShaped.h"
#include "Reshape.h"
namespace nts { // namespace nts(NiuTrans.Tensor)
......@@ -52,8 +53,8 @@ XTensor Reshape(XTensor &s, int order, int * dimSize)
void Reshape(XTensor &s, XTensor &t, int order, int * dimSize)
{
if (!t.isInit || !XTensor::IsSameShaped(&t, &s)) {
InitTensor(&t, &s);
if (!t.isInit || !IsSameShaped(t, s)) {
InitTensorV2(&t, &s);
}
/* call Reshape function */
......
......@@ -31,7 +31,7 @@
namespace nts { // namespace nts(NiuTrans.Tensor)
/*
transform a tensor by splitting it, e.g., (N, M) -> (3, N/3, M)
transform a tensor by splitting it, e.g., (N, M) -> (N/3, M, 3)
>> s - the source tensor
>> t - the target tensor (for return)
......@@ -46,23 +46,22 @@ void _Split(const XTensor * s, XTensor * t, int whereToSplit, int splitNum)
CheckNTErrors((s->unitNum == t->unitNum && s->unitSize == t->unitSize), "Unmatched tensors!");
CheckNTErrors((s->order == t->order - 1), "Unmatched tensors!");
CheckNTErrors((t->dimSizeRDI[t->order - 1] == splitNum), "Incorrect tensor sizes!");
CheckNTErrors((t->dimSize[0] == splitNum), "Incorrect tensor sizes!");
int whereToSplitRDI = s->order - whereToSplit - 1;
for (int i = 0; i < s->order; i++) {
if (i == whereToSplitRDI) {
CheckNTErrors((s->dimSizeRDI[i] == t->dimSizeRDI[i] * splitNum),
if (i == whereToSplit) {
CheckNTErrors((s->dimSize[i] == t->dimSize[i + 1] * splitNum),
"Unmatched tensor sizes!");
}
else {
CheckNTErrors((s->dimSizeRDI[i] == t->dimSizeRDI[i]),
CheckNTErrors((s->dimSize[i] == t->dimSize[i + 1]),
"Unmatched tensor sizes!");
}
}
/* for the case that we split the last dimension. Actually
(N, M) and (3, N/3, M) have the same memory layout */
if (s->order - 1 == whereToSplitRDI) {
(N, M) and (N, M/3, 3) have the same memory layout */
if (0 == whereToSplit) {
XMemCopy(t->data, t->devID, s->data, s->devID, s->unitNum * s->unitSize);
return;
}
......@@ -70,14 +69,14 @@ void _Split(const XTensor * s, XTensor * t, int whereToSplit, int splitNum)
int blockSize = 1;
int blockNum = 1;
for (int i = 0; i < s->order; i++) {
if (i == whereToSplitRDI) {
blockSize *= s->dimSizeRDI[i] / splitNum;
if (i == whereToSplit) {
blockSize *= s->dimSize[i] / splitNum;
blockNum *= splitNum;
}
else if (i < whereToSplitRDI)
blockSize *= s->dimSizeRDI[i];
else if (i > whereToSplit)
blockSize *= s->dimSize[i];
else
blockNum *= s->dimSizeRDI[i];
blockNum *= s->dimSize[i];
}
CheckNTErrors((blockNum % splitNum == 0), "Incorrect split number!");
......@@ -184,7 +183,7 @@ bool CheckSplitSize(const XTensor * s, const XTensor * t, int whereToSplit, int
}
/*
transform a tensor by splitting it, e.g., (N, M) -> (3, N/3, M) (return an XTensor structure)
transform a tensor by splitting it, e.g., (N, M) -> (N/3, M, 3) (return an XTensor structure)
make a new tensor to keep the result and return it
>> s - the source tensor
......@@ -244,7 +243,7 @@ void Split(const XTensor &s, XTensor &t, int whereToSplit, int splitNum)
}
float dr = (!s.isSparse) ? 1.0F : s.denseRatio;
InitTensor(&t, order, dimSize, s.dataType, dr, s.devID, s.mem);
InitTensorV2(&t, order, dimSize, s.dataType, dr, s.devID, s.mem);
/* destroy variables */
delete[] dimSize;
......@@ -276,7 +275,6 @@ void _Split(const XTensor * big, TensorList * smalls, int whereToSplit, int spli
CheckNTErrors((smalls->count == splitNum), "Unmatched tensors!");
CheckNTErrors((smalls->count > 0), "Wrong input!");
int whereToSplitRDI = big->order - whereToSplit - 1;
bool uniform = true;
for (int i = 0; i < smalls->count; i++) {
......@@ -292,14 +290,14 @@ void _Split(const XTensor * big, TensorList * smalls, int whereToSplit, int spli
int blockSize = 1;
int blockNum = 1;
for (int i = 0; i < big->order; i++) {
if (i == whereToSplitRDI) {
blockSize *= big->dimSizeRDI[i] / splitNum;
if (i == whereToSplit) {
blockSize *= big->dimSize[i] / splitNum;
blockNum *= splitNum;
}
else if (i < whereToSplitRDI)
blockSize *= big->dimSizeRDI[i];
else if (i > whereToSplit)
blockSize *= big->dimSize[i];
else
blockNum *= big->dimSizeRDI[i];
blockNum *= big->dimSize[i];
}
CheckNTErrors((blockNum % splitNum == 0), "Incorrect split number!");
......
......@@ -21,6 +21,7 @@
#include "Squeeze.h"
#include "../movement/CopyValues.h"
#include "../shape/IsSameShaped.h"
#include "../../XName.h"
namespace nts{ // namespace nts(NiuTrans.Tensor)
......@@ -37,7 +38,7 @@ void _Squeeze(XTensor * source, XTensor * target, int leadingDim)
{
int order = target->order;
CheckNTErrors(XTensor::IsSameShaped(source, target),
CheckNTErrors(_IsSameShaped(source, target),
"The source and target tensor must be of the same size!");
CheckNTErrors(leadingDim >= -1 && leadingDim < order,
"Wrong leading dimension");
......@@ -130,8 +131,8 @@ XTensor Squeeze(XTensor & source, int leadingDim)
void Squeeze(XTensor & source, XTensor & target, int leadingDim)
{
if (!target.isInit || !XTensor::IsSameShaped(&source, &target)) {
InitTensor(&target, &source);
if (!target.isInit || !IsSameShaped(source, target)) {
InitTensorV2(&target, &source);
}
/* call _Squeeze function */
......
......@@ -20,6 +20,7 @@
*/
#include "Stack.h"
#include "IsSameShaped.h"
#include "../../XUtility.h"
#include "../../XName.h"
......@@ -37,7 +38,7 @@ void _Stack(const TensorList * smalls, XTensor * t, int dim)
for (int i = 1; i < count; i++) {
XTensor * tmp1 = smalls->GetItem(i);
XTensor * tmp2 = smalls->GetItem(i-1);
CheckNTErrors(XTensor::IsSameShaped(tmp1, tmp2), "The input tensor must be same size!");
CheckNTErrors(_IsSameShaped(tmp1, tmp2), "The input tensor must be same size!");
}
int blockSize = 1;
......@@ -165,7 +166,7 @@ void Stack(const TensorList &smalls, XTensor &t, int dim)
}
float dr = (!tensor->isSparse) ? 1.0F : tensor->denseRatio;
InitTensor(&t, order, dimSize, tensor->dataType, dr, tensor->devID, tensor->mem);
InitTensorV2(&t, order, dimSize, tensor->dataType, dr, tensor->devID, tensor->mem);
/* destroy variables */
delete[] dimSize;
......
......@@ -42,16 +42,15 @@ void _Unsqueeze(const XTensor * a, XTensor * b, int dim, int dSize)
CheckNTErrors((a->order == b->order - 1), "Unmatched tensors!");
CheckNTErrors((a->unitSize == b->unitSize), "Unmatched tensors!");
int dimRDI = b->order - dim - 1;
for (int i = 0; i < b->order; i++) {
if (i < dimRDI) {
CheckNTErrors((a->dimSizeRDI[i] == b->dimSizeRDI[i]), "Unmatched tensors!");
if (i < dim) {
CheckNTErrors((a->dimSize[i] == b->dimSize[i]), "Unmatched tensors!");
}
else if (i > dimRDI) {
CheckNTErrors((a->dimSizeRDI[i - 1] == b->dimSizeRDI[i]), "Unmatched tensors!");
else if (i > dim) {
CheckNTErrors((a->dimSize[i - 1] == b->dimSize[i]), "Unmatched tensors!");
}
else {
CheckNTErrors((dSize == b->dimSizeRDI[i]), "Unmatched tensors!");
CheckNTErrors((dSize == b->dimSize[i]), "Unmatched tensors!");
}
}
......@@ -60,8 +59,8 @@ void _Unsqueeze(const XTensor * a, XTensor * b, int dim, int dSize)
int blockNumA = 1;
int blockNumB = 1;
for (int i = 0; i < dimRDI; i++)
blockSize *= a->dimSizeRDI[i];
for (int i = dim; i < a->order; i++)
blockSize *= a->dimSize[i];
realBlockSize = blockSize * a->unitSize;
......@@ -184,7 +183,7 @@ void Unsqueeze(const XTensor &a, XTensor &b, int dim, int dSize)
}
float dr = (!a.isSparse) ? 1.0F : a.denseRatio;
InitTensor(&b, order, dimSize, a.dataType, dr, a.devID, a.mem);
InitTensorV2(&b, order, dimSize, a.dataType, dr, a.devID, a.mem);
/* destroy variables */
delete[] dimSize;
......
......@@ -235,9 +235,8 @@ void _CudaUnsqueeze(const XTensor * a, XTensor * b, int dim, int dSize)
int blockSize = 1;
int blockNumA = 1;
int blockNumB = 1;
int dimRDI = b->order - dim - 1;
for (int i = 0; i < dimRDI; i++)
blockSize *= a->dimSizeRDI[i];
for (int i = dim; i < a->order; i++)
blockSize *= a->dimSize[i];
blockNumA = a->unitNum / blockSize;
blockNumB = b->unitNum / blockSize;
......@@ -250,7 +249,7 @@ void _CudaUnsqueeze(const XTensor * a, XTensor * b, int dim, int dSize)
int devIDBackup = 0;
ProtectCudaDev(a->devID, devIDBackup);
if (dimRDI == 0) {
if (dim == b->order - 1) {
GDevs.GetCudaThread2D(a->devID, dSize, blockNumA, MAX_INT, cudaGrids, cudaBlocks);
if (a->dataType == X_FLOAT && b->dataType == X_FLOAT) {
......
......@@ -22,6 +22,8 @@
#include <math.h>
#include "../../XTensor.h"
#include "../movement/CopyValues.h"
#include "../shape/IsSameShaped.h"
#include "../utilities/SetAscendingOrder.h"
#include "../../XUtility.h"
#include "../../XName.h"
#include "Sort.h"
......@@ -40,14 +42,13 @@ void _Sort(const XTensor * a, XTensor * b, XTensor * index, int dim)
{
dim = MODX(dim, a->order);
CheckNTErrors((XTensor::IsSameShaped(a, b)), "Input tensors should have the same type!");
CheckNTErrors((_IsSameShaped(a, b)), "Input tensors should have the same type!");
CheckNTErrors((dim >= 0 && dim < a->order), "Incorrect dimension specified!");
CheckNTErrors((a->order == index->order), "Unmatched input tensors!");
CheckNTErrors((index->dataType == X_INT), "Wrong data type!");
int dimRDI = a->order - dim - 1;
/* make the index tensor */
index->SetAscendingOrder(dim);
SetAscendingOrder(*index, dim);
if (a->devID >= 0) {
#ifdef USE_CUDA
......@@ -58,16 +59,16 @@ void _Sort(const XTensor * a, XTensor * b, XTensor * index, int dim)
}
else {
int stride = 1;
int strideNum = a->dimSizeRDI[dimRDI];
for (int i = 0; i < dimRDI; i++)
stride *= a->dimSizeRDI[i];
int blockNum = 1;
for (int i = dimRDI + 1; i < a->order; i++)
blockNum *= a->dimSizeRDI[i];
int strideNum = a->dimSize[dim];
for (int i = 0; i < dim; i++)
blockNum *= a->dimSize[i];
for (int i = dim + 1; i < a->order; i++)
stride *= a->dimSize[i];
int blockSize = stride * strideNum;
_CopyValues(a, b);
_CopyValues(a, b);
for (int k = 0; k < blockNum; k++) {
for (int i = 0; i < stride; i++) {
void * dataB = (char*)b->data + (k * blockSize + i) * b->unitSize;
......
......@@ -217,20 +217,19 @@ void _CudaSortBig(const XTensor * a, XTensor * b, XTensor * indexA, XTensor * in
CheckNTErrors((a->order > dim && dim >= 0), "Incorrect dimension specified!");
CheckNTErrors((a->dataType == DEFAULT_DTYPE), "TODO!");
int dimRDI = a->order - dim - 1;
if (k < 0 || k > b->dimSizeRDI[dimRDI])
k = b->dimSizeRDI[dimRDI];
if (k < 0 || k > b->dimSize[dim])
k = b->dimSize[dim];
XMem * mem = a->mem;
int stride = 1;
int strideNum = a->dimSizeRDI[dimRDI];
for (int i = 0; i < dimRDI; i++)
stride *= a->dimSizeRDI[i];
int blockNum = 1;
for (int i = dimRDI + 1; i < a->order; i++)
blockNum *= a->dimSizeRDI[i];
int strideNum = a->dimSize[dim];
for (int i = 0; i < dim; i++)
blockNum *= a->dimSize[i];
for (int i = dim + 1; i < a->order; i++)
stride *= a->dimSize[i];
int m = GetNextPower2(strideNum);
int n = stride * blockNum;
......
......@@ -45,15 +45,14 @@ void _TopK(const XTensor * a, XTensor * b, XTensor * index, int dim, int k)
CheckNTErrors(index == NULL || a->order == index->order, "Unmatched input tensors!");
CheckNTErrors(index->dataType == X_INT, "Wrong data type!");
int dimRDI = a->order - dim - 1;
for (int i = 0; i < a->order; i++) {
if (i == dimRDI) {
CheckNTErrors(b->dimSizeRDI[i] == k, "A too large K");
CheckNTErrors(index == NULL || index->dimSizeRDI[i] == k, "Wrong size!");
if (i == dim) {
CheckNTErrors((b->dimSize[i] == k), "A too large K");
CheckNTErrors((index == NULL || index->dimSize[i] == k), "Wrong size!");
}
else {
CheckNTErrors(b->dimSizeRDI[i] == a->dimSizeRDI[i], "Wrong size!");
CheckNTErrors(index == NULL || index->dimSizeRDI[i] == a->dimSizeRDI[i], "Wrong size!");
CheckNTErrors((b->dimSize[i] == a->dimSize[i]), "Wrong size!");
CheckNTErrors((index == NULL || index->dimSize[i] == a->dimSize[i]), "Wrong size!");
}
}
......@@ -68,14 +67,14 @@ void _TopK(const XTensor * a, XTensor * b, XTensor * index, int dim, int k)
CheckNTErrors((a->dataType == DEFAULT_DTYPE), "TODO!");
int stride = 1;
int strideNumA = a->dimSizeRDI[dimRDI];
int strideNumB = b->dimSizeRDI[dimRDI];
for (int i = 0; i < dimRDI; i++)
stride *= a->dimSizeRDI[i];
int blockNum = 1;
for (int i = dimRDI + 1; i < a->order; i++)
blockNum *= a->dimSizeRDI[i];
int strideNumA = a->dimSize[dim];
int strideNumB = b->dimSize[dim];
for (int i = 0; i < dim; i++)
blockNum *= a->dimSize[i];
for (int i = dim + 1; i < a->order; i++)
stride *= a->dimSize[i];
int blockSizeA = stride * strideNumA;
int blockSizeB = stride * strideNumB;
......
......@@ -22,6 +22,7 @@
#include "../../XDevice.h"
#include "../../XUtility.h"
#include "../../XTensor.h"
#include "../utilities/SetAscendingOrder.h"
#include "TopK.h"
#include "TopK.cuh"
#include "Sort.cuh"
......@@ -811,15 +812,14 @@ void _CudaTopK(const XTensor * a, XTensor * b, XTensor * index, int dim, int k)
CheckNTErrors((index->dataType == X_INT), "Wrong data type!");
CheckNTErrors((b->dimSize[dim] == k), "A too large K");
int dimRDI = a->order - dim - 1;
int stride = 1;
int strideNumA = a->dimSizeRDI[dimRDI];
for (int i = 0; i < dimRDI; i++)
stride *= a->dimSizeRDI[i];
int blockNum = 1;
for (int i = dimRDI + 1; i < a->order; i++)
blockNum *= a->dimSizeRDI[i];
int strideNumA = a->dimSize[dim];
for (int i = 0; i < dim; i++)
blockNum *= a->dimSize[i];
for (int i = dim + 1; i < a->order; i++)
stride *= a->dimSize[i];
int workerNum = blockNum < 16 ? 64 : 32;
/* adjust the thread num according size of k for fitting the share memory size */
......@@ -828,7 +828,7 @@ void _CudaTopK(const XTensor * a, XTensor * b, XTensor * index, int dim, int k)
else if (k < 22) workerNum = 128;
else if (k < 44) workerNum = 64;
else workerNum = 32;
int cudaGrids[3];
int cudaBlocks[3];
......@@ -862,7 +862,7 @@ void _CudaTopK(const XTensor * a, XTensor * b, XTensor * index, int dim, int k)
//indexA->data = a->mem != NULL ? a->mem->AllocBuf(a->devID, a->unitNum * sizeof(int)) : XMemAlloc(a->devID, a->unitNum * sizeof(int));
/* make the index tensor */
//indexA->SetAscendingOrder(dim);
//SetAscendingOrder(*indexA, dim);
//_CudaSortBig(a, b, indexA, index, dim, k);
......
/* NiuTrans.Tensor - an open-source tensor library
* Copyright (C) 2017, Natural Language Processing Lab, Northestern University.
* All rights reserved.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
/*
* $Created by: LI Yinqiao (email: li.yin.qiao.2012@hotmail.com) 2019-10-22
*/
#include "../../XTensor.h"
#include "../../XUtility.h"
#include "CheckData.h"
namespace nts { // namespace nts(NiuTrans.Tensor)
/* compare two numbers */
bool IsFloatEqual(DTYPE a, DTYPE b, float absError, float relError)
{
if(a == b)
return true;
if(fabs(a - b) < absError)
return true;
if(fabs(a) < fabs(b))
return (fabs((a - b) / b) < relError) ? true : false;
else
return (fabs((a - b) / a) < relError) ? true : false;
}
/* check whether the data array is the same as the answer
>> tensor - input tensor
>> d - input data (it must be on CPUs)
>> num - number of data items
>> beg - where we start this in the data array of the tensor
*/
bool _CheckData(const XTensor * tensor, const void * d, int num, int beg)
{
if (tensor->data == NULL || d == NULL)
return false;
CheckNTErrors(!tensor->isSparse, "TODO");
CheckNTErrors(num == tensor->unitNum - beg, "Illegal size!");
if (tensor->devID < 0) {
return !memcmp(tensor->data, d, num * tensor->unitSize);
}
#ifdef USE_CUDA
else {
char * copy = new char[num * tensor->unitSize];
XMemCopy(copy, -1, tensor->data, tensor->devID, num * tensor->unitSize);
int cmpResult = memcmp(copy, d, num * tensor->unitSize);
bool result = (cmpResult == 0) ? true : false;
delete[] copy;
return result;
}
#endif
return true;
}
/* check whether the data array is the same as the answer
>> tensor - input tensor
>> d - input data (it must be on CPUs)
>> num - number of data items
>> tolerance - error value we tolerant between result and answer
>> beg - where we start this in the data array of the tensor
*/bool _CheckData(const XTensor * tensor, const void * d, int num, float tolerance, int beg)
{
if (tensor->data == NULL || d == NULL)
return false;
CheckNTErrors(!tensor->isSparse, "TODO");
CheckNTErrors(num == tensor->unitNum - beg, "Illegal size!");
DTYPE * valuePrt = (DTYPE*)tensor->data;
DTYPE value = 0;
DTYPE * answerPrt = (DTYPE*)d;
for (int i = beg; i < num; i++) {
value = ToCPU(tensor->devID, valuePrt);
if(IsFloatEqual(value, *answerPrt, tolerance, 1e-4F) == false)
return false;
valuePrt++;
answerPrt++;
}
return true;
}
} // namespace nts(NiuTrans.Tensor)
\ No newline at end of file
/* NiuTrans.Tensor - an open-source tensor library
* Copyright (C) 2017, Natural Language Processing Lab, Northestern University.
* All rights reserved.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
/*
* $Created by: LI Yinqiao (email: li.yin.qiao.2012@hotmail.com) 2019-10-22
*/
#ifndef __CHECKDATA_H__
#define __CHECKDATA_H__
#include "../../XTensor.h"
namespace nts { // namespace nts(NiuTrans.Tensor)
/* check whether the data array is the same as the answer */
bool _CheckData(const XTensor * tensor, const void * answer, int num, int beg = 0);
/* check whether the data array is the same as the answer */
bool _CheckData(const XTensor * tensor, const void * answer, int num, float tolerance, int beg = 0);
} // namespace nts(NiuTrans.Tensor)
#endif // __CHECKDATA_H__
\ No newline at end of file
/* NiuTrans.Tensor - an open-source tensor library
* Copyright (C) 2017, Natural Language Processing Lab, Northestern University.
* All rights reserved.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
/*
* $Created by: LI Yinqiao (email: li.yin.qiao.2012@hotmail.com) 2019-10-23
*/
#include "../../XTensor.h"
#include "SetAscendingOrder.cuh"
#include "SetAscendingOrder.h"
namespace nts { // namespace nts(NiuTrans.Tensor)
/*
set the cell to the ascending order along a given dimension
>> tensor - input tensor
>> dim - the dimension specified
*/
void SetAscendingOrder(XTensor & tensor, int dim)
{
CheckNTErrors(dim < tensor.order, "Wrong dimension specified!");
CheckNTErrors(tensor.dataType == X_INT, "TODO!");
if(dim < 0){
int o = tensor.order;
int ds[MAX_TENSOR_DIM_NUM];
memcpy(ds, tensor.dimSize, sizeof(int) * tensor.order);
tensor.Reshape(tensor.unitNum);
SetAscendingOrder(tensor, 0);
tensor.Reshape(o, ds);
return;
}
if(tensor.devID >= 0){
#ifdef USE_CUDA
CudaSetAscendingOrder(&tensor, dim);
#else
ShowNTErrors("Plesae specify USE_CUDA and recompile the code!");
#endif
}
else{
int stride = 1;
int blockNum = 1;
int strideNum = tensor.dimSize[dim];
for(int i = 0; i < dim; i++)
blockNum *= tensor.dimSize[i];
for(int i = dim + 1; i < tensor.order; i++)
stride *= tensor.dimSize[i];
for(int k = 0; k < blockNum; k++){
for(int j = 0; j < strideNum; j++){
int * d = (int*)tensor.data + stride * strideNum * k + stride * j;
for(int i = 0; i < stride; i++)
d[i] = j;
}
}
}
}
} // namespace nts(NiuTrans.Tensor)
\ No newline at end of file
......@@ -67,15 +67,14 @@ void CudaSetAscendingOrder(XTensor * a, int dim)
{
CheckNTErrors((a->dataType == X_INT), "TODO!");
int dimRDI = a->order - dim - 1;
int stride = 1;
int strideNum = a->dimSizeRDI[dimRDI];
for(int i = 0; i < dimRDI; i++)
stride *= a->dimSizeRDI[i];
int stride = 1;
int blockNum = 1;
for(int i = dimRDI + 1; i < a->order; i++)
blockNum *= a->dimSizeRDI[i];
int strideNum = a->dimSize[dim];
for(int i = 0; i < dim; i++)
blockNum *= a->dimSize[i];
for(int i = dim + 1; i < a->order; i++)
stride *= a->dimSize[i];
int gridSize[3];
int blockSize[3];
......
/* NiuTrans.Tensor - an open-source tensor library
* Copyright (C) 2017, Natural Language Processing Lab, Northestern University.
* All rights reserved.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
/*
* $Created by: LI Yinqiao (email: li.yin.qiao.2012@hotmail.com) 2019-10-23
*/
#ifndef __SETASCENDINGORDER_H__
#define __SETASCENDINGORDER_H__
#include "../../XTensor.h"
namespace nts { // namespace nts(NiuTrans.Tensor)
/* set the cell to the ascending order along a given dimension */
void SetAscendingOrder(XTensor & tensor, int dim);
} // namespace nts(NiuTrans.Tensor)
#endif // __SETASCENDINGORDER_H__
\ No newline at end of file
......@@ -68,7 +68,7 @@ void _Dropout(const XTensor * x, XTensor * y, unsigned int seed, DTYPE dropProb,
for (int i = 0; i < unitNum; i++)
maskArray[i] = RandomBernoulli(dropProb, scaleFactor);
XTensor * mask = NewTensor1D(unitNum, x->dataType, x->devID, x->mem);
XTensor * mask = NewTensor1DV2(unitNum, x->dataType, x->devID, x->mem);
mask->SetData(maskArray, unitNum);
/* call Multiply function for mask */
......@@ -113,7 +113,7 @@ void _DropoutBackward(const XTensor * y, const XTensor * x,
for (int i = 0; i < unitNum; i++)
maskArray[i] = RandomBernoulli(dropProb, scaleFactor);
XTensor * mask = NewTensor1D(unitNum, x->dataType, x->devID, x->mem);
XTensor * mask = NewTensor1DV2(unitNum, x->dataType, x->devID, x->mem);
mask->SetData(maskArray, unitNum);
/* call MultiplyDim function for mask */
......@@ -149,13 +149,12 @@ XTensor Dropout(const XTensor &x, DTYPE dropProb, int leadingDim, int leadingDim
CheckNTErrors(dropProb >= 0.0 && dropProb <= 1.0, "The probability must be 0-1!");
XTensor mask;
// int * maskArrayInt = NULL;
DTYPE * maskArray = NULL;
DTYPE scaleFactor = (DTYPE)1.0 / ((DTYPE)1.0 - dropProb);
if(leadingDim < 0 && leadingDim2 < 0){
XTensor mask;
InitTensor(&mask, &x);
InitTensorV2(&mask, &x);
_SetDataRandP(&mask, 0, 1.0F, dropProb, scaleFactor);
......@@ -169,7 +168,7 @@ XTensor Dropout(const XTensor &x, DTYPE dropProb, int leadingDim, int leadingDim
maskArrayInt[i] = rand() % x.unitNum;
XTensor maskindex;
InitTensor1D(&maskindex, unitNum, X_INT, x.devID, x.mem);
InitTensor1DV2(&maskindex, unitNum, X_INT, x.devID, x.mem);
maskindex.SetData(maskArrayInt, unitNum);
......@@ -192,7 +191,7 @@ XTensor Dropout(const XTensor &x, DTYPE dropProb, int leadingDim, int leadingDim
maskArray[i] = RandomBernoulli(dropProb, scaleFactor);
XTensor mask;
InitTensor1D(&mask, unitNum, x.dataType, x.devID, x.mem);
InitTensor1DV2(&mask, unitNum, x.dataType, x.devID, x.mem);
mask.SetData(maskArray, unitNum);
delete[] maskArray;
......@@ -221,7 +220,7 @@ XTensor Dropout(const XTensor &x, DTYPE dropProb, int leadingDim, int leadingDim
dims[n] = x.GetDim(n);
dims[m] = x.GetDim(m);
InitTensor(&mask, x.order, dims, x.dataType, x.denseRatio,x.devID, x.mem);
InitTensorV2(&mask, x.order, dims, x.dataType, x.denseRatio,x.devID, x.mem);
mask.SetData(maskArray, unitNum);
delete[] maskArray;
......@@ -251,7 +250,7 @@ XTensor DropoutWithoutBroadcast(const XTensor &x, DTYPE dropProb)
maskArray[i] = RandomBernoulli(dropProb, scaleFactor);
XTensor mask;
InitTensor(&mask, x.order, x.dimSize, x.dataType, x.denseRatio, x.devID, x.mem);
InitTensorV2(&mask, x.order, x.dimSize, x.dataType, x.denseRatio, x.devID, x.mem);
mask.SetData(maskArray, unitNum);
delete[] maskArray;
......@@ -259,4 +258,4 @@ XTensor DropoutWithoutBroadcast(const XTensor &x, DTYPE dropProb)
return Multiply(x, mask);
}
} // namespace nts(NiuTrans.Tensor)
\ No newline at end of file
} // namespace nts(NiuTrans.Tensor)
......@@ -68,7 +68,7 @@ XTensor DropoutWithIndex(const XTensor &x, XTensor &maskIndex, DTYPE scale)
dimSize[i] = x.dimSize[i];
}
InitTensor1D(&c, x.unitNum, x.dataType, x.devID, x.mem);
InitTensor1DV2(&c, x.unitNum, x.dataType, x.devID, x.mem);
_SetDataFixedFloat(&c, 1.0F);
......
......@@ -21,6 +21,7 @@
#include <stdlib.h>
#include "../XName.h"
#include "../../tensor/core/shape/IsSameShaped.h"
#include "HardTanH.h"
#include "HardTanH.cuh"
......@@ -36,7 +37,7 @@ y = 1 if x > 1
*/
void _HardTanH(const XTensor * x, XTensor * y)
{
CheckNTErrors(XTensor::IsSameShaped(x, y),
CheckNTErrors(_IsSameShaped(x, y),
"The input tensor and output tensor must have the same shape!")
#ifdef USE_CUDA
......@@ -87,8 +88,8 @@ XTensor HardTanH(const XTensor &x)
void HardTanH(const XTensor &x, XTensor &y)
{
if (!y.isInit || !XTensor::IsSameShaped(&y, &x)) {
InitTensor(&y, &x);
if (!y.isInit || !IsSameShaped(y, x)) {
InitTensorV2(&y, &x);
}
/* call _HardTanH function */
......
......@@ -23,6 +23,7 @@
#include "../XName.h"
#include "../XUtility.h"
#include "../core/movement/CopyValues.h"
#include "../core/shape/IsSameShaped.h"
namespace nts{ // namespace nts(NiuTrans.Tensor)
......@@ -33,7 +34,7 @@ identity function y = x
*/
void _Identity(const XTensor * x, XTensor * y)
{
CheckNTErrors(XTensor::IsSameShaped(x, y),
CheckNTErrors(_IsSameShaped(x, y),
"The input tensor and output tensor must have the same shape!")
_CopyValues(x, y);
}
......@@ -63,8 +64,8 @@ XTensor Identity(const XTensor &x)
void Identity(const XTensor &x, XTensor &y)
{
if (!y.isInit || !y.IsSameShaped(&y, &x)) {
InitTensor(&y, &x);
if (!y.isInit || !IsSameShaped(y, x)) {
InitTensorV2(&y, &x);
}
/* call _Identity function */
......
......@@ -27,6 +27,7 @@
#include "../core/reduce/ReduceSum.h"
#include "../core/reduce/ReduceMax.h"
#include "../core/movement/CopyValues.h"
#include "../core/shape/IsSameShaped.h"
namespace nts { // namespace nts(NiuTrans.Tensor)
......@@ -49,7 +50,6 @@ void _LogSoftmax(const XTensor * x, XTensor * y, int leadDim)
return;
}
int leadDimRDI = x->order - leadDim - 1;
if (!x->isSparse && !y->isSparse &&
x->dataType == DEFAULT_DTYPE && y->dataType == DEFAULT_DTYPE)
{
......@@ -69,36 +69,36 @@ void _LogSoftmax(const XTensor * x, XTensor * y, int leadDim)
XTensor * blockMax = NULL;
XTensor * blockSum = NULL;
int dimensionSize = y->dimSizeRDI[leadDimRDI];
int dimensionSize = y->dimSize[leadDim];
int stride = 1;
int blockSize = 1;
int blockNum = 1;
for (int i = 0; i < leadDimRDI; i++)
stride *= y->dimSizeRDI[i];
for (int i = leadDim + 1; i < x->order; i++)
stride *= y->dimSize[i];
blockSize = stride * dimensionSize;
blockNum = y->unitNum / blockSize;
max = NewTensorBuf(x->order - 1, dimSize, x->dataType, x->denseRatio, x->devID, mem);
sum = NewTensorBuf(x->order - 1, dimSize, x->dataType, x->denseRatio, x->devID, mem);
max = NewTensorBufV2(x->order - 1, dimSize, x->dataType, x->denseRatio, x->devID, mem);
sum = NewTensorBufV2(x->order - 1, dimSize, x->dataType, x->denseRatio, x->devID, mem);
_ReduceMax(x, max, leadDim);
_ReduceSum(x, sum, leadDim, max, 1.0F, true);
if (x->devID >= 0) {
if(leadDimRDI == 0){
if(leadDim == x->order - 1){
blockSize = y->unitNum;
blockNum = 1;
blockx = NewTensor2D(blockSize/dimensionSize, -dimensionSize, x->dataType, x->devID, mem);
blocky = NewTensor2D(blockSize/dimensionSize, -dimensionSize, x->dataType, x->devID, mem);
blockMax = NewTensor2D(blockSize/dimensionSize, -1, x->dataType, x->devID, mem);
blockSum = NewTensor2D(blockSize/dimensionSize, -1, x->dataType, x->devID, mem);
blockx = NewTensor2DV2(blockSize/dimensionSize, -dimensionSize, x->dataType, x->devID, mem);
blocky = NewTensor2DV2(blockSize/dimensionSize, -dimensionSize, x->dataType, x->devID, mem);
blockMax = NewTensor2DV2(blockSize/dimensionSize, -1, x->dataType, x->devID, mem);
blockSum = NewTensor2DV2(blockSize/dimensionSize, -1, x->dataType, x->devID, mem);
}
else{
blockx = NewTensor2D(-stride, dimensionSize, x->dataType, x->devID, mem);
blocky = NewTensor2D(-stride, dimensionSize, x->dataType, x->devID, mem);
blockMax = NewTensor2D(-stride, 1, x->dataType, x->devID, mem);
blockSum = NewTensor2D(-stride, 1, x->dataType, x->devID, mem);
blockx = NewTensor2DV2(-stride, dimensionSize, x->dataType, x->devID, mem);
blocky = NewTensor2DV2(-stride, dimensionSize, x->dataType, x->devID, mem);
blockMax = NewTensor2DV2(-stride, 1, x->dataType, x->devID, mem);
blockSum = NewTensor2DV2(-stride, 1, x->dataType, x->devID, mem);
}
}
......@@ -137,7 +137,7 @@ void _LogSoftmax(const XTensor * x, XTensor * y, int leadDim)
blockMax->data = mp;
blockSum->data = sp;
#ifdef USE_CUDA
if(leadDimRDI == 0)
if(leadDim == x->order - 1)
_CudaLogSoftmaxSumMax(blockx, blocky, 1, blockSum, blockMax);
else
_CudaLogSoftmaxSumMax(blockx, blocky, leadDim, blockSum, blockMax);
......@@ -210,8 +210,8 @@ void LogSoftmax(const XTensor &x, XTensor &y, int leadDim)
if (ld < 0)
ld = x.order - 1;
if (!y.isInit || !XTensor::IsSameShaped(&y, &x)) {
InitTensor(&y, &x);
if (!y.isInit || !IsSameShaped(y, x)) {
InitTensorV2(&y, &x);
}
/* call _LogSoftmax function */
......@@ -298,7 +298,6 @@ void _LogSoftmaxBackward(XTensor * gold, XTensor * y, XTensor * x,
if(leadDim < 0)
leadDim = y->order - 1;
int leadDimRDI = y->order - leadDim - 1;
#ifdef USE_CUDA
if (gold->devID >= 0) {
_CudaLogSoftmaxBackward(gold, y, x, dedy, dedx, padding, leadDim, lossName);
......@@ -306,12 +305,12 @@ void _LogSoftmaxBackward(XTensor * gold, XTensor * y, XTensor * x,
}
#endif
int dimensionSize = y->dimSizeRDI[leadDimRDI];
int dimensionSize = y->dimSize[leadDim];
int stride = 1;
int blockSize = 1;
int blockNum = 1;
for (int i = 0; i < leadDimRDI; i++)
stride *= y->dimSizeRDI[i];
for (int i = leadDim + 1; i < y->order; i++)
stride *= y->dimSize[i];
blockSize = stride * dimensionSize;
blockNum = y->unitNum / blockSize;
......@@ -338,10 +337,10 @@ void _LogSoftmaxBackward(XTensor * gold, XTensor * y, XTensor * x,
int key = gold->GetKeyInSparse(i);
DTYPE value = gold->GetInSparse(i);
int offset = key;
if (dedx->dimSizeRDI[0] != gm) {
if (dedx->dimSize[dedx->order - 1] != gm) {
int mi = key % gm;
int ni = key / gm;
int key2 = ni * dedx->dimSizeRDI[0] + mi;
int key2 = ni * dedx->dimSize[dedx->order - 1] + mi;
offset = key2;
}
if (key >= 0 && key < size)
......@@ -352,7 +351,7 @@ void _LogSoftmaxBackward(XTensor * gold, XTensor * y, XTensor * x,
}
}
else {
CheckNTErrors((XTensor::IsSameShaped(gold, y)), "The tensors must be of the same size!");
CheckNTErrors((_IsSameShaped(gold, y)), "The tensors must be of the same size!");
for (int k = 0; k < blockNum; k++) {
gp = (DTYPE*)gold->data + k * blockSize;
op = (DTYPE*)y->data + k * blockSize;
......@@ -395,10 +394,10 @@ void _LogSoftmaxBackward(XTensor * gold, XTensor * y, XTensor * x,
int key = gold->GetKeyInSparse(i);
DTYPE value = gold->GetInSparse(i);
int offset = key;
if (dedx->dimSizeRDI[0] != gm) {
if (dedx->dimSize[dedx->order - 1] != gm) {
int mi = key % gm;
int ni = key / gm;
int key2 = ni * dedx->dimSizeRDI[0] + mi;
int key2 = ni * dedx->dimSize[dedx->order - 1] + mi;
offset = key2;
}
if (key >= 0 && key < size)
......@@ -406,7 +405,7 @@ void _LogSoftmaxBackward(XTensor * gold, XTensor * y, XTensor * x,
}
}
else {
CheckNTErrors((XTensor::IsSameShaped(gold, y)), "The tensors must be of the same size!");
CheckNTErrors((_IsSameShaped(gold, y)), "The tensors must be of the same size!");
for (int k = 0; k < blockNum; k++) {
gp = (DTYPE*)gold->data + k * blockSize;
op = (DTYPE*)y->data + k * blockSize;
......@@ -430,11 +429,11 @@ void _LogSoftmaxBackward(XTensor * gold, XTensor * y, XTensor * x,
/* for columns with no xs we set dE/ds = 0 */
if (gold != NULL && gold->isSparse) {
CheckNTErrors((gold->order == 2), "The gold standard tensor must be of order 2!");
if ((gold->dimSize[1] > 1 && !gold->isAllValued[0]) || gold->dimSize[1] != dedx->dimSizeRDI[0]) {
if ((gold->dimSize[1] > 1 && !gold->isAllValued[0]) || gold->dimSize[1] != dedx->dimSize[dedx->order - 1]) {
int gn = gold->dimSize[0];
int gm = gold->dimSize[1];
int sm = dedx->dimSizeRDI[0];
int sn = dedx->dimSizeRDI[1];
int sm = dedx->dimSize[dedx->order - 1];
int sn = dedx->dimSize[dedx->order - 2];
int * flags = new int[sm];
memset(flags, 0, sizeof(int)*sm);
......
Markdown 格式
0%
您添加了 0 到此讨论。请谨慎行事。
请先完成此评论的编辑!
注册 或者 后发表评论