Commit 3d6f1230 by liyinqiao

Support scalar tensor & bug fixed.

1. Support scalar tensor for Sum operation;
2. Bounds check;
3. GetSumDimIndex function bug fixed;
4. New unit test for Sum function;
2. Minor bugs fixed.
parent 90052ba7
...@@ -863,7 +863,7 @@ XTensor * NewTensor(const XTensor * a, bool isFilledData) ...@@ -863,7 +863,7 @@ XTensor * NewTensor(const XTensor * a, bool isFilledData)
memset(dims, 0, sizeof(int) * MAX_TENSOR_DIM_NUM); memset(dims, 0, sizeof(int) * MAX_TENSOR_DIM_NUM);
if(a->order > 0) if(a->order >= 0)
memcpy(dims, a->dimSize, sizeof(int) * a->order); memcpy(dims, a->dimSize, sizeof(int) * a->order);
if(!isFilledData) if(!isFilledData)
......
...@@ -114,7 +114,7 @@ constructor ...@@ -114,7 +114,7 @@ constructor
*/ */
XTensor::XTensor(const int myOrder, int myDevID, XMem * myMem) XTensor::XTensor(const int myOrder, int myDevID, XMem * myMem)
{ {
CheckNTErrors((myOrder >= 0), "Illegal tensor order1"); CheckNTErrors((myOrder >= 0), "Illegal tensor order!");
Init(); Init();
SetDataPointer(); SetDataPointer();
...@@ -725,7 +725,7 @@ get offset (3D) ...@@ -725,7 +725,7 @@ get offset (3D)
*/ */
MTYPE XTensor::GetOffset3D(int d0, int d1, int d2) const MTYPE XTensor::GetOffset3D(int d0, int d1, int d2) const
{ {
CheckNTErrors(order == 3, "Cannot get a 3d cell for a tensor whose order is not 2!"); CheckNTErrors(order == 3, "Cannot get a 3d cell for a tensor whose order is not 3!");
CheckNTErrors(d0 >= 0 && d0 < dimSize[0], "dimension 0 is out of range!"); CheckNTErrors(d0 >= 0 && d0 < dimSize[0], "dimension 0 is out of range!");
CheckNTErrors(d1 >= 0 && d1 < dimSize[1], "dimension 1 is out of range!"); CheckNTErrors(d1 >= 0 && d1 < dimSize[1], "dimension 1 is out of range!");
CheckNTErrors(d2 >= 0 && d2 < dimSize[2], "dimension 2 is out of range!"); CheckNTErrors(d2 >= 0 && d2 < dimSize[2], "dimension 2 is out of range!");
...@@ -1067,7 +1067,7 @@ get the value of a cell in a 3d tensor ...@@ -1067,7 +1067,7 @@ get the value of a cell in a 3d tensor
*/ */
DTYPE XTensor::Get3D(int d0, int d1, int d2) const DTYPE XTensor::Get3D(int d0, int d1, int d2) const
{ {
CheckNTErrors((order == 3), "Cannot get a 2d cell for a tensor whose order is not 2!"); CheckNTErrors((order == 3), "Cannot get a 2d cell for a tensor whose order is not 3!");
CheckNTErrors((d0 >= 0 && d0 < dimSize[0]), "dimension 0 is out of range!"); CheckNTErrors((d0 >= 0 && d0 < dimSize[0]), "dimension 0 is out of range!");
CheckNTErrors((d1 >= 0 && d1 < dimSize[1]), "dimension 1 is out of range!"); CheckNTErrors((d1 >= 0 && d1 < dimSize[1]), "dimension 1 is out of range!");
CheckNTErrors((d2 >= 0 && d2 < dimSize[2]), "dimension 2 is out of range!"); CheckNTErrors((d2 >= 0 && d2 < dimSize[2]), "dimension 2 is out of range!");
...@@ -1155,7 +1155,7 @@ get the value of a cell in a 3d tensor in int type ...@@ -1155,7 +1155,7 @@ get the value of a cell in a 3d tensor in int type
*/ */
int XTensor::Get3DInt(int d0, int d1, int d2) const int XTensor::Get3DInt(int d0, int d1, int d2) const
{ {
CheckNTErrors(order == 3, "Cannot get a 2d cell for a tensor whose order is not 2!"); CheckNTErrors(order == 3, "Cannot get a 2d cell for a tensor whose order is not 3!");
CheckNTErrors(d0 >= 0 && d0 < dimSize[0], "dimension 0 is out of range!"); CheckNTErrors(d0 >= 0 && d0 < dimSize[0], "dimension 0 is out of range!");
CheckNTErrors(d1 >= 0 && d1 < dimSize[1], "dimension 1 is out of range!"); CheckNTErrors(d1 >= 0 && d1 < dimSize[1], "dimension 1 is out of range!");
CheckNTErrors(d2 >= 0 && d2 < dimSize[2], "dimension 2 is out of range!"); CheckNTErrors(d2 >= 0 && d2 < dimSize[2], "dimension 2 is out of range!");
...@@ -1288,7 +1288,7 @@ set the value of a cell in a 3d tensor in default type ...@@ -1288,7 +1288,7 @@ set the value of a cell in a 3d tensor in default type
*/ */
bool XTensor::Set3D(DTYPE value, int d0, int d1, int d2) bool XTensor::Set3D(DTYPE value, int d0, int d1, int d2)
{ {
CheckNTErrors(order == 3, "Cannot get a 2d cell for a tensor whose order is not 2!"); CheckNTErrors(order == 3, "Cannot get a 2d cell for a tensor whose order is not 3!");
CheckNTErrors(d0 >= 0 && d0 < dimSize[0], "dimension 0 is out of range!"); CheckNTErrors(d0 >= 0 && d0 < dimSize[0], "dimension 0 is out of range!");
CheckNTErrors(d1 >= 0 && d1 < dimSize[1], "dimension 1 is out of range!"); CheckNTErrors(d1 >= 0 && d1 < dimSize[1], "dimension 1 is out of range!");
CheckNTErrors(d2 >= 0 && d2 < dimSize[2], "dimension 2 is out of range!"); CheckNTErrors(d2 >= 0 && d2 < dimSize[2], "dimension 2 is out of range!");
...@@ -1390,7 +1390,7 @@ set the integer value of a cell in a 3d tensor in default type ...@@ -1390,7 +1390,7 @@ set the integer value of a cell in a 3d tensor in default type
*/ */
bool XTensor::Set3DInt(int value, int d0, int d1, int d2) bool XTensor::Set3DInt(int value, int d0, int d1, int d2)
{ {
CheckNTErrors(order == 3, "Cannot get a 2d cell for a tensor whose order is not 2!"); CheckNTErrors(order == 3, "Cannot get a 2d cell for a tensor whose order is not 3!");
CheckNTErrors(d0 >= 0 && d0 < dimSize[0], "dimension 0 is out of range!"); CheckNTErrors(d0 >= 0 && d0 < dimSize[0], "dimension 0 is out of range!");
CheckNTErrors(d1 >= 0 && d1 < dimSize[1], "dimension 1 is out of range!"); CheckNTErrors(d1 >= 0 && d1 < dimSize[1], "dimension 1 is out of range!");
CheckNTErrors(d2 >= 0 && d2 < dimSize[2], "dimension 2 is out of range!"); CheckNTErrors(d2 >= 0 && d2 < dimSize[2], "dimension 2 is out of range!");
...@@ -1733,6 +1733,9 @@ void XTensor::Dump(FILE * file, const char * label, const int n, const int beg, ...@@ -1733,6 +1733,9 @@ void XTensor::Dump(FILE * file, const char * label, const int n, const int beg,
if(isInit){ if(isInit){
fprintf(file, "order=%d dimsize=", order); fprintf(file, "order=%d dimsize=", order);
if (order == 0) {
fprintf(file, "%d,", dimSize[0]);
}
for (int i = 0; i < order; i++) { for (int i = 0; i < order; i++) {
fprintf(file, "%d", dimSize[i]); fprintf(file, "%d", dimSize[i]);
if (i < order - 1) if (i < order - 1)
......
...@@ -25,6 +25,7 @@ ...@@ -25,6 +25,7 @@
#include "../../XBLAS.h" #include "../../XBLAS.h"
#include "../movement/CopyValues.h" #include "../movement/CopyValues.h"
#include "../shape/IsSameShaped.h" #include "../shape/IsSameShaped.h"
#include "../math/ScaleAndShift.h"
#include "Sum.h" #include "Sum.h"
#include "Sum.cuh" #include "Sum.cuh"
#include "SumDim.h" #include "SumDim.h"
...@@ -187,21 +188,26 @@ int GetSumDimIndex(const XTensor &a, const XTensor &b) ...@@ -187,21 +188,26 @@ int GetSumDimIndex(const XTensor &a, const XTensor &b)
if(IsSameShaped(a, b)) if(IsSameShaped(a, b))
return -1; return -1;
int hitCount = 0;
int hitDim = -1; int hitDim = -1;
bool isHit = false;
for(int i = 0; i < b.order; i++){ for(int i = 0; i < b.order; i++){
if(b.dimSize[b.order - 1 - i] == 1) if(b.dimSize[b.order - 1 - i] == 1)
continue; continue;
else if(b.dimSize[b.order - 1 - i] == a.dimSize[a.order - 1 - i]){ else {
hitCount++; if (isHit == true)
hitDim = a.order - b.order + i; return -1;
else
isHit = true;
for (int j = 0; j < a.order; j++){
if (b.dimSize[b.order - 1 - i] == a.dimSize[a.order - 1 - j]){
hitDim = a.order - 1 - j;
break;
}
}
} }
} }
if(hitCount == 1)
return hitDim; return hitDim;
else
return -1;
} }
/* /*
...@@ -218,6 +224,11 @@ XTensor Sum(const XTensor &a, const XTensor &b, DTYPE beta) ...@@ -218,6 +224,11 @@ XTensor Sum(const XTensor &a, const XTensor &b, DTYPE beta)
XTensor c(&a); XTensor c(&a);
c.SetTMPFlag(); c.SetTMPFlag();
if (b.order == 0){
DTYPE shift = b.Get0D() * beta;
ScaleAndShift(a, c, 1.0F, shift);
}
else {
int n = GetSumDimIndex(a, b); int n = GetSumDimIndex(a, b);
if(n == -1){ if(n == -1){
...@@ -244,7 +255,7 @@ XTensor Sum(const XTensor &a, const XTensor &b, DTYPE beta) ...@@ -244,7 +255,7 @@ XTensor Sum(const XTensor &a, const XTensor &b, DTYPE beta)
else{ else{
ShowNTErrors("Something is wrong!"); ShowNTErrors("Something is wrong!");
} }
}
return c; return c;
} }
...@@ -261,6 +272,11 @@ void Sum(const XTensor &a, const XTensor &b, XTensor &c, DTYPE beta) ...@@ -261,6 +272,11 @@ void Sum(const XTensor &a, const XTensor &b, XTensor &c, DTYPE beta)
InitTensorV2(&c, &a); InitTensorV2(&c, &a);
} }
if (b.order == 0){
DTYPE shift = b.Get0D() * beta;
ScaleAndShift(a, c, 1.0F, shift);
}
else {
int n = GetSumDimIndex(a, b); int n = GetSumDimIndex(a, b);
if (n == -1) { if (n == -1) {
...@@ -287,6 +303,7 @@ void Sum(const XTensor &a, const XTensor &b, XTensor &c, DTYPE beta) ...@@ -287,6 +303,7 @@ void Sum(const XTensor &a, const XTensor &b, XTensor &c, DTYPE beta)
else { else {
ShowNTErrors("Something is wrong!"); ShowNTErrors("Something is wrong!");
} }
}
} }
} // namespace nts(NiuTrans.Tensor) } // namespace nts(NiuTrans.Tensor)
...@@ -215,6 +215,305 @@ bool TestSum2() ...@@ -215,6 +215,305 @@ bool TestSum2()
#endif // USE_CUDA #endif // USE_CUDA
} }
/* case 3: tensor summation c = a + b * \beta, which b is a scalar tensor */
bool TestSum3()
{
/* a tensor of size (2, 4) */
int aOrder = 2;
int * aDimSize = new int[aOrder];
aDimSize[0] = 2;
aDimSize[1] = 4;
int aUnitNum = 1;
for (int i = 0; i < aOrder; i++)
aUnitNum *= aDimSize[i];
/* a scalar */
int bOrder = 0;
int * bDimSize = new int[MAX_TENSOR_DIM_NUM];
int bUnitNum = 1;
/* a tensor of size (2, 4) */
int cOrder = 2;
int * cDimSize = new int[cOrder];
cDimSize[0] = 2;
cDimSize[1] = 4;
int cUnitNum = 1;
for (int i = 0; i < cOrder; i++)
cUnitNum *= cDimSize[i];
DTYPE aData[2][4] = { {0.0F, 1.0F, 2.0F, 3.0F},
{4.0F, 5.0F, 6.0F, 7.0F} };
DTYPE bData[1] = {-1.0F};
DTYPE beta = 2.0F;
DTYPE answer[2][4] = { {-2.0F, -1.0F, 0.0F, 1.0F},
{2.0F, 3.0F, 4.0F, 5.0F} };
/* CPU test */
bool cpuTest = true;
/* create tensors */
XTensor * a = NewTensorV2(aOrder, aDimSize);
XTensor * b = NewTensorV2(bOrder, bDimSize);
XTensor cUser;
/* initialize variables */
a->SetData(aData, aUnitNum);
b->SetData(bData, bUnitNum);
/* call Sum function */
cUser = Sum(*a, *b, beta);
/* check results */
cpuTest = _CheckData(&cUser, answer, cUnitNum);
#ifdef USE_CUDA
/* GPU test */
bool gpuTest = true;
/* create tensor */
XTensor * aGPU = NewTensorV2(aOrder, aDimSize, X_FLOAT, 1.0F, 0);
XTensor * bGPU = NewTensorV2(bOrder, bDimSize, X_FLOAT, 1.0F, 0);
XTensor cUserGPU;
/* Initialize variables */
aGPU->SetData(aData, aUnitNum);
bGPU->SetData(bData, bUnitNum);
/* call Sum function */
cUserGPU = Sum(*aGPU, *bGPU, beta);
/* check results */
gpuTest = _CheckData(&cUserGPU, answer, cUunitNum);
/* destroy variables */
delete a;
delete b;
delete aGPU;
delete bGPU;
delete[] aDimSize;
delete[] bDimSize;
delete[] cDimSize;
return cpuTest && gpuTest;
#else
/* destroy variables */
delete a;
delete b;
delete[] aDimSize;
delete[] bDimSize;
delete[] cDimSize;
return cpuTest;
#endif // USE_CUDA
}
/* case 4: tensor summation c = a + b * \beta, which b is a 1d tensor */
bool TestSum4()
{
/* a tensor of size (3, 4, 2) */
int aOrder = 3;
int * aDimSize = new int[aOrder];
aDimSize[0] = 3;
aDimSize[1] = 4;
aDimSize[2] = 2;
int aUnitNum = 1;
for (int i = 0; i < aOrder; i++)
aUnitNum *= aDimSize[i];
/* a tensor of size (4) */
int bOrder = 1;
int * bDimSize = new int[bOrder];
bDimSize[0] = 4;
int bUnitNum = 1;
for (int i = 0; i < bOrder; i++)
bUnitNum *= bDimSize[i];
/* a tensor of size (3, 4, 2) */
int cOrder = 3;
int * cDimSize = new int[cOrder];
cDimSize[0] = 3;
cDimSize[1] = 4;
cDimSize[2] = 2;
int cUnitNum = 1;
for (int i = 0; i < cOrder; i++)
cUnitNum *= cDimSize[i];
DTYPE aData[3][4][2] = { { {0.0F, 1.0F}, {2.0F, 3.0F}, {4.0F, 5.0F}, {6.0F, 7.0F} },
{ {0.0F, -1.0F}, {-2.0F, -3.0F}, {-4.0F, -5.0F}, {-6.0F, -7.0F} },
{ {0.0F, 1.0F}, {2.0F, 3.0F}, {4.0F, 5.0F}, {6.0F, 7.0F} } };
DTYPE bData[4] = {-1.0F, 0.0F, 1.0F, 2.0F};
DTYPE beta = 2.0F;
DTYPE answer[3][4][2] = { { {-2.0F, -1.0F}, {2.0F, 3.0F}, {6.0F, 7.0F}, {10.0F, 11.0F} },
{ {-2.0F, -3.0F}, {-2.0F, -3.0F}, {-2.0F, -3.0F}, {-2.0F, -3.0F} },
{ {-2.0F, -1.0F}, {2.0F, 3.0F}, {6.0F, 7.0F}, {10.0F, 11.0F} } };
/* CPU test */
bool cpuTest = true;
/* create tensors */
XTensor * a = NewTensorV2(aOrder, aDimSize);
XTensor * b = NewTensorV2(bOrder, bDimSize);
XTensor cUser;
/* initialize variables */
a->SetData(aData, aUnitNum);
b->SetData(bData, bUnitNum);
/* call Sum function */
cUser = Sum(*a, *b, beta);
/* check results */
cpuTest = _CheckData(&cUser, answer, cUnitNum);
#ifdef USE_CUDA
/* GPU test */
bool gpuTest = true;
/* create tensor */
XTensor * aGPU = NewTensorV2(aOrder, aDimSize, X_FLOAT, 1.0F, 0);
XTensor * bGPU = NewTensorV2(bOrder, bDimSize, X_FLOAT, 1.0F, 0);
XTensor cUserGPU;
/* Initialize variables */
aGPU->SetData(aData, aUnitNum);
bGPU->SetData(bData, bUnitNum);
/* call Sum function */
cUserGPU = Sum(*aGPU, *bGPU, beta);
/* check results */
gpuTest = _CheckData(&cUserGPU, answer, cUunitNum);
/* destroy variables */
delete a;
delete b;
delete aGPU;
delete bGPU;
delete[] aDimSize;
delete[] bDimSize;
delete[] cDimSize;
return cpuTest && gpuTest;
#else
/* destroy variables */
delete a;
delete b;
delete[] aDimSize;
delete[] bDimSize;
delete[] cDimSize;
return cpuTest;
#endif // USE_CUDA
}
/* case 5: tensor summation c = a + b * \beta, which b is a 1d tensor */
bool TestSum5()
{
/* a tensor of size (4, 4) */
int aOrder = 2;
int * aDimSize = new int[aOrder];
aDimSize[0] = 4;
aDimSize[1] = 4;
int aUnitNum = 1;
for (int i = 0; i < aOrder; i++)
aUnitNum *= aDimSize[i];
/* a tensor of size (4) */
int bOrder = 1;
int * bDimSize = new int[bOrder];
bDimSize[0] = 4;
int bUnitNum = 1;
for (int i = 0; i < bOrder; i++)
bUnitNum *= bDimSize[i];
/* a tensor of size (4, 4) */
int cOrder = 2;
int * cDimSize = new int[cOrder];
cDimSize[0] = 4;
cDimSize[1] = 4;
int cUnitNum = 1;
for (int i = 0; i < cOrder; i++)
cUnitNum *= cDimSize[i];
DTYPE aData[4][4] = { {0.0F, 1.0F, 2.0F, 3.0F },
{4.0F, 5.0F, 6.0F, 7.0F },
{0.0F, -1.0F, -2.0F, -3.0F },
{-4.0F, -5.0F, -6.0F, -7.0F } };
DTYPE bData[4] = {-1.0F, 0.0F, 1.0F, 2.0F};
DTYPE beta = 2.0F;
DTYPE answer[4][4] = { {-2.0F, 1.0F, 4.0F, 7.0F },
{2.0F, 5.0F, 8.0F, 11.0F },
{-2.0F, -1.0F, 0.0F, 1.0F },
{-6.0F, -5.0F, -4.0F, -3.0F } };
/* CPU test */
bool cpuTest = true;
/* create tensors */
XTensor * a = NewTensorV2(aOrder, aDimSize);
XTensor * b = NewTensorV2(bOrder, bDimSize);
XTensor cUser;
/* initialize variables */
a->SetData(aData, aUnitNum);
b->SetData(bData, bUnitNum);
/* call Sum function */
cUser = Sum(*a, *b, beta);
/* check results */
cpuTest = _CheckData(&cUser, answer, cUnitNum);
#ifdef USE_CUDA
/* GPU test */
bool gpuTest = true;
/* create tensor */
XTensor * aGPU = NewTensorV2(aOrder, aDimSize, X_FLOAT, 1.0F, 0);
XTensor * bGPU = NewTensorV2(bOrder, bDimSize, X_FLOAT, 1.0F, 0);
XTensor cUserGPU;
/* Initialize variables */
aGPU->SetData(aData, aUnitNum);
bGPU->SetData(bData, bUnitNum);
/* call Sum function */
cUserGPU = Sum(*aGPU, *bGPU, beta);
/* check results */
gpuTest = _CheckData(&cUserGPU, answer, cUunitNum);
/* destroy variables */
delete a;
delete b;
delete aGPU;
delete bGPU;
delete[] aDimSize;
delete[] bDimSize;
delete[] cDimSize;
return cpuTest && gpuTest;
#else
/* destroy variables */
delete a;
delete b;
delete[] aDimSize;
delete[] bDimSize;
delete[] cDimSize;
return cpuTest;
#endif // USE_CUDA
}
/* other cases */ /* other cases */
/* /*
TODO!! TODO!!
...@@ -244,6 +543,33 @@ bool TestSum() ...@@ -244,6 +543,33 @@ bool TestSum()
else else
XPRINT(0, stdout, ">> case 2 passed!\n"); XPRINT(0, stdout, ">> case 2 passed!\n");
/* case 3 test */
caseFlag = TestSum3();
if (!caseFlag) {
returnFlag = false;
XPRINT(0, stdout, ">> case 3 failed!\n");
}
else
XPRINT(0, stdout, ">> case 3 passed!\n");
/* case 4 test */
caseFlag = TestSum4();
if (!caseFlag) {
returnFlag = false;
XPRINT(0, stdout, ">> case 4 failed!\n");
}
else
XPRINT(0, stdout, ">> case 4 passed!\n");
/* case 5 test */
caseFlag = TestSum5();
if (!caseFlag) {
returnFlag = false;
XPRINT(0, stdout, ">> case 5 failed!\n");
}
else
XPRINT(0, stdout, ">> case 5 passed!\n");
/* other cases test */ /* other cases test */
/* /*
TODO!! TODO!!
......
Markdown 格式
0%
您添加了 0 到此讨论。请谨慎行事。
请先完成此评论的编辑!
注册 或者 后发表评论