Commit 95b74dbb by linye

int and int8 sum supported

parent f151f061
...@@ -17,7 +17,7 @@ ...@@ -17,7 +17,7 @@
/* /*
* $Created by: XIAO Tong (email: xiaotong@mail.neu.edu.cn) 2018-04-24 * $Created by: XIAO Tong (email: xiaotong@mail.neu.edu.cn) 2018-04-24
* $Update by: Lin Ye (linye2015@outlook.com) 2019-07-02 float16 added * $Update by: Lin Ye (linye2015@outlook.com) 2019-07-02 float16 int added
*/ */
#include "../../XDevice.h" #include "../../XDevice.h"
...@@ -48,6 +48,16 @@ void KernelADD(T * a, T * b, T * c, int size, T beta) ...@@ -48,6 +48,16 @@ void KernelADD(T * a, T * b, T * c, int size, T beta)
} }
__global__
void KernelADDInt(int * a, int * b, int * c, int size, DTYPE beta)
{
int i = blockDim.x * blockIdx.x + threadIdx.x;
if (i < size)
c[i] = a[i] + b[i] * (int)beta;
}
/* /*
tensor summation c = a + b * \beta (cuda version) tensor summation c = a + b * \beta (cuda version)
>> a - a tensor >> a - a tensor
...@@ -65,7 +75,9 @@ void _CudaSum(const XTensor * a, const XTensor * b, XTensor * c, DTYPE beta) ...@@ -65,7 +75,9 @@ void _CudaSum(const XTensor * a, const XTensor * b, XTensor * c, DTYPE beta)
CheckNTErrors((a->devID == b->devID && a->devID == c->devID), CheckNTErrors((a->devID == b->devID && a->devID == c->devID),
"The tensors must be on the same!"); "The tensors must be on the same!");
CheckNTErrors((a->dataType == DEFAULT_DTYPE && b->dataType == DEFAULT_DTYPE && c->dataType == DEFAULT_DTYPE) || CheckNTErrors((a->dataType == DEFAULT_DTYPE && b->dataType == DEFAULT_DTYPE && c->dataType == DEFAULT_DTYPE) ||
(a->dataType == X_FLOAT16 && b->dataType == X_FLOAT16 && c->dataType == X_FLOAT16), (a->dataType == X_FLOAT16 && b->dataType == X_FLOAT16 && c->dataType == X_FLOAT16) ||
(a->dataType == X_INT && b->dataType == X_INT && c->dataType == X_INT) ||
(a->dataType == X_INT8 && b->dataType == X_INT8 && c->dataType == X_INT8),
"The sum function does not support this datatype."); "The sum function does not support this datatype.");
int devIDBackup = XDevice::GetGPUDevice(); int devIDBackup = XDevice::GetGPUDevice();
...@@ -119,7 +131,32 @@ void _CudaSum(const XTensor * a, const XTensor * b, XTensor * c, DTYPE beta) ...@@ -119,7 +131,32 @@ void _CudaSum(const XTensor * a, const XTensor * b, XTensor * c, DTYPE beta)
unsigned short temp = FloatToFloat16(beta); unsigned short temp = FloatToFloat16(beta);
half beta1 = *((half *)&temp); half beta1 = *((half *)&temp);
KernelADD << <blocks, threads >> >((__half *)a->data, (__half *)b->data, (__half *)c->data, a->unitNum, beta1); KernelADD << <blocks, threads >> >((__half *)a->data, (__half *)b->data, (__half *)c->data, a->unitNum, beta1);
}
else if (a->dataType == X_INT &&
b->dataType == X_INT &&
c->dataType == X_INT)
{
int gridSize[3], blockSize[3];
GDevs.GetCudaThread(a->devID, a->unitNum, gridSize, blockSize);
dim3 blocks(gridSize[0]);
dim3 threads(blockSize[0]);
int beta1 = (int)beta;
KernelADD << <blocks, threads >> >((int *)a->data, (int *)b->data, (int *)c->data, a->unitNum, beta1);
}
else if (a->dataType == X_INT8 &&
b->dataType == X_INT8 &&
c->dataType == X_INT8)
{
int gridSize[3], blockSize[3];
GDevs.GetCudaThread(a->devID, a->unitNum, gridSize, blockSize);
dim3 blocks(gridSize[0]);
dim3 threads(blockSize[0]);
__int8 beta1 = (__int8)beta;
KernelADD << <blocks, threads >> >((__int8 *)a->data, (__int8 *)b->data, (__int8 *)c->data, a->unitNum, beta1);
} }
else { else {
// TODO!! // TODO!!
......
...@@ -17,7 +17,7 @@ ...@@ -17,7 +17,7 @@
/* /*
* $Created by: LI Yinqiao (li.yin.qiao.2012@hotmail.com) 2018-04-30 * $Created by: LI Yinqiao (li.yin.qiao.2012@hotmail.com) 2018-04-30
* $Update by: Lin Ye (linye2015@outlook.com) 2019-07-06 float16 added * $Update by: Lin Ye (linye2015@outlook.com) 2019-07-06 float16 int added
*/ */
#include "TSum.h" #include "TSum.h"
...@@ -384,6 +384,344 @@ bool TestSum4() ...@@ -384,6 +384,344 @@ bool TestSum4()
#endif // USE_CUDA #endif // USE_CUDA
} }
/* case 5: int tensor summation c = a + b * \beta */
bool TestSum5()
{
/* a tensor of size (2, 4) */
int order = 2;
int * dimSize = new int[order];
dimSize[0] = 2;
dimSize[1] = 4;
int unitNum = 1;
for (int i = 0; i < order; i++)
unitNum *= dimSize[i];
DTYPE aData[2][4] = { {0.0F, 1.0F, 2.0F, 3.0F},
{4.0F, 5.0F, 6.0F, 7.0F} };
DTYPE bData[2][4] = { {1.0F, -1.0F, -3.0F, -5.0F},
{-7.0F, -9.0F, -11.0F, -13.0F} };
DTYPE answer[2][4] = { {1.0F, 0.0F, -1.0F, -2.0F},
{-3.0F, -4.0F, -5.0F, -6.0F} };
/* CPU test */
bool cpuTest = true;
#ifdef USE_CUDA
/* GPU test */
bool gpuTest = true;
/* create tensor */
XTensor * aGPU = NewTensor(order, dimSize, X_FLOAT, 1.0F, 0);
XTensor * bGPU = NewTensor(order, dimSize, X_FLOAT, 1.0F, 0);
XTensor * cGPU = NewTensor(order, dimSize, X_FLOAT, 1.0F, 0);
XTensor * cMeGPU = NewTensor(order, dimSize, X_FLOAT, 1.0F, 0);
XTensor cUserGPU;
/* create int tensors */
XTensor intAGPU;
XTensor intBGPU;
XTensor intCGPU;
XTensor intCMeGPU;
XTensor intCUserGPU;
/* Initialize variables */
aGPU->SetData(aData, unitNum);
cMeGPU->SetData(aData, unitNum);
bGPU->SetData(bData, unitNum);
cGPU->SetZeroAll();
/* convert data type from float to int */
intAGPU = ConvertDataType(*aGPU, X_INT);
intBGPU = ConvertDataType(*bGPU, X_INT);
intCGPU = ConvertDataType(*cGPU, X_INT);
intCMeGPU = ConvertDataType(*cMeGPU, X_INT);
/* call Sum function */
_Sum(&intAGPU, &intBGPU, &intCGPU);
_SumMe(&intCMeGPU, &intBGPU);
intCUserGPU = Sum(intAGPU, intBGPU);
/* convert data type from int to float */
_ConvertDataType(&intCGPU, cGPU);
_ConvertDataType(&intCMeGPU, cMeGPU);
cUserGPU = ConvertDataType(intCUserGPU, X_FLOAT);
/* check results */
gpuTest = cGPU->CheckData(answer, unitNum) &&
cMeGPU->CheckData(answer, unitNum) &&
cUserGPU.CheckData(answer, unitNum);
/* destroy variables */
delete aGPU;
delete bGPU;
delete cGPU;
delete cMeGPU;
delete[] dimSize;
return cpuTest && gpuTest;
#else
/* destroy variables */
delete[] dimSize;
return cpuTest;
#endif // USE_CUDA
}
/* case 6: int tensor summation c = a + b * \beta */
bool TestSum6()
{
/* a tensor of size (2, 4) */
int order = 2;
int * dimSize = new int[order];
dimSize[0] = 2;
dimSize[1] = 4;
int unitNum = 1;
for (int i = 0; i < order; i++) {
unitNum *= dimSize[i];
}
DTYPE aData[2][4] = { {0.0F, 1.0F, 2.0F, 3.0F},
{4.0F, 5.0F, 6.0F, 7.0F} };
DTYPE bData[2][4] = { {1.0F, -1.0F, -3.0F, -5.0F},
{-7.0F, -9.0F, -11.0F, -13.0F} };
DTYPE answer[2][4] = { {2.0F, -1.0F, -4.0F, -7.0F},
{-10.0F, -13.0F, -16.0F, -19.0F} };
float beta = 2.0F;
/* CPU test */
bool cpuTest = true;
#ifdef USE_CUDA
/* GPU test */
bool gpuTest = true;
/* create tensors */
XTensor * aGPU = NewTensor(order, dimSize, X_FLOAT, 1.0F, 0);
XTensor * bGPU = NewTensor(order, dimSize, X_FLOAT, 1.0F, 0);
XTensor * cGPU = NewTensor(order, dimSize, X_FLOAT, 1.0F, 0);
XTensor * cMeGPU = NewTensor(order, dimSize, X_FLOAT, 1.0F, 0);
XTensor cUserGPU;
/* create int tensors */
XTensor intAGPU;
XTensor intBGPU;
XTensor intCGPU;
XTensor intCMeGPU;
XTensor intCUserGPU;
/* Initialize variables */
aGPU->SetData(aData, unitNum);
cMeGPU->SetData(aData, unitNum);
bGPU->SetData(bData, unitNum);
cGPU->SetZeroAll();
/* convert data type from float to int */
intAGPU = ConvertDataType(*aGPU, X_INT);
intBGPU = ConvertDataType(*bGPU, X_INT);
intCGPU = ConvertDataType(*cGPU, X_INT);
intCMeGPU = ConvertDataType(*cMeGPU, X_INT);
/* call Sum function */
_Sum(&intAGPU, &intBGPU, &intCGPU, beta);
_SumMe(&intCMeGPU, &intBGPU, beta);
intCUserGPU = Sum(intAGPU, intBGPU, beta);
/* convert data type from int to float */
_ConvertDataType(&intCGPU, cGPU);
_ConvertDataType(&intCMeGPU, cMeGPU);
cUserGPU = ConvertDataType(intCUserGPU, X_FLOAT);
/* check results */
gpuTest = cGPU->CheckData(answer, unitNum) &&
cMeGPU->CheckData(answer, unitNum) &&
cUserGPU.CheckData(answer, unitNum);
/* destroy variables */
delete aGPU;
delete bGPU;
delete cGPU;
delete cMeGPU;
delete[] dimSize;
return cpuTest && gpuTest;
#else
/* destroy variables */
delete[] dimSize;
return cpuTest;
#endif // USE_CUDA
}
/* case 7: int8 tensor summation c = a + b * \beta */
bool TestSum7()
{
/* a tensor of size (2, 4) */
int order = 2;
int * dimSize = new int[order];
dimSize[0] = 2;
dimSize[1] = 4;
int unitNum = 1;
for (int i = 0; i < order; i++)
unitNum *= dimSize[i];
DTYPE aData[2][4] = { {0.0F, 1.0F, 2.0F, 3.0F},
{4.0F, 5.0F, 6.0F, 7.0F} };
DTYPE bData[2][4] = { {1.0F, -1.0F, -3.0F, -5.0F},
{-7.0F, -9.0F, -11.0F, -13.0F} };
DTYPE answer[2][4] = { {1.0F, 0.0F, -1.0F, -2.0F},
{-3.0F, -4.0F, -5.0F, -6.0F} };
/* CPU test */
bool cpuTest = true;
#ifdef USE_CUDA
/* GPU test */
bool gpuTest = true;
/* create tensor */
XTensor * aGPU = NewTensor(order, dimSize, X_FLOAT, 1.0F, 0);
XTensor * bGPU = NewTensor(order, dimSize, X_FLOAT, 1.0F, 0);
XTensor * cGPU = NewTensor(order, dimSize, X_FLOAT, 1.0F, 0);
XTensor * cMeGPU = NewTensor(order, dimSize, X_FLOAT, 1.0F, 0);
XTensor cUserGPU;
/* create int8 tensors */
XTensor int8AGPU;
XTensor int8BGPU;
XTensor int8CGPU;
XTensor int8CMeGPU;
XTensor int8CUserGPU;
/* Initialize variables */
aGPU->SetData(aData, unitNum);
cMeGPU->SetData(aData, unitNum);
bGPU->SetData(bData, unitNum);
cGPU->SetZeroAll();
/* convert data type from float to int8 */
int8AGPU = ConvertDataType(*aGPU, X_INT8);
int8BGPU = ConvertDataType(*bGPU, X_INT8);
int8CGPU = ConvertDataType(*cGPU, X_INT8);
int8CMeGPU = ConvertDataType(*cMeGPU, X_INT8);
/* call Sum function */
_Sum(&int8AGPU, &int8BGPU, &int8CGPU);
_SumMe(&int8CMeGPU, &int8BGPU);
int8CUserGPU = Sum(int8AGPU, int8BGPU);
/* convert data type from int8 to float */
_ConvertDataType(&int8CGPU, cGPU);
_ConvertDataType(&int8CMeGPU, cMeGPU);
cUserGPU = ConvertDataType(int8CUserGPU, X_FLOAT);
/* check results */
gpuTest = cGPU->CheckData(answer, unitNum) &&
cMeGPU->CheckData(answer, unitNum) &&
cUserGPU.CheckData(answer, unitNum);
/* destroy variables */
delete aGPU;
delete bGPU;
delete cGPU;
delete cMeGPU;
delete[] dimSize;
return cpuTest && gpuTest;
#else
/* destroy variables */
delete[] dimSize;
return cpuTest;
#endif // USE_CUDA
}
/* case 8: int8 tensor summation c = a + b * \beta */
bool TestSum8()
{
/* a tensor of size (2, 4) */
int order = 2;
int * dimSize = new int[order];
dimSize[0] = 2;
dimSize[1] = 4;
int unitNum = 1;
for (int i = 0; i < order; i++) {
unitNum *= dimSize[i];
}
DTYPE aData[2][4] = { {0.0F, 1.0F, 2.0F, 3.0F},
{4.0F, 5.0F, 6.0F, 7.0F} };
DTYPE bData[2][4] = { {1.0F, -1.0F, -3.0F, -5.0F},
{-7.0F, -9.0F, -11.0F, -13.0F} };
DTYPE answer[2][4] = { {2.0F, -1.0F, -4.0F, -7.0F},
{-10.0F, -13.0F, -16.0F, -19.0F} };
float beta = 2.0F;
/* CPU test */
bool cpuTest = true;
#ifdef USE_CUDA
/* GPU test */
bool gpuTest = true;
/* create tensors */
XTensor * aGPU = NewTensor(order, dimSize, X_FLOAT, 1.0F, 0);
XTensor * bGPU = NewTensor(order, dimSize, X_FLOAT, 1.0F, 0);
XTensor * cGPU = NewTensor(order, dimSize, X_FLOAT, 1.0F, 0);
XTensor * cMeGPU = NewTensor(order, dimSize, X_FLOAT, 1.0F, 0);
XTensor cUserGPU;
/* create int8 tensors */
XTensor int8AGPU;
XTensor int8BGPU;
XTensor int8CGPU;
XTensor int8CMeGPU;
XTensor int8CUserGPU;
/* Initialize variables */
aGPU->SetData(aData, unitNum);
cMeGPU->SetData(aData, unitNum);
bGPU->SetData(bData, unitNum);
cGPU->SetZeroAll();
/* convert data type from float to int8 */
int8AGPU = ConvertDataType(*aGPU, X_INT8);
int8BGPU = ConvertDataType(*bGPU, X_INT8);
int8CGPU = ConvertDataType(*cGPU, X_INT8);
int8CMeGPU = ConvertDataType(*cMeGPU, X_INT8);
/* call Sum function */
_Sum(&int8AGPU, &int8BGPU, &int8CGPU, beta);
_SumMe(&int8CMeGPU, &int8BGPU, beta);
int8CUserGPU = Sum(int8AGPU, int8BGPU, beta);
/* convert data type from int8 to float */
_ConvertDataType(&int8CGPU, cGPU);
_ConvertDataType(&int8CMeGPU, cMeGPU);
cUserGPU = ConvertDataType(int8CUserGPU, X_FLOAT);
/* check results */
gpuTest = cGPU->CheckData(answer, unitNum) &&
cMeGPU->CheckData(answer, unitNum) &&
cUserGPU.CheckData(answer, unitNum);
/* destroy variables */
delete aGPU;
delete bGPU;
delete cGPU;
delete cMeGPU;
delete[] dimSize;
return cpuTest && gpuTest;
#else
/* destroy variables */
delete[] dimSize;
return cpuTest;
#endif // USE_CUDA
}
/* other cases */ /* other cases */
/* /*
...@@ -432,6 +770,42 @@ bool TestSum() ...@@ -432,6 +770,42 @@ bool TestSum()
else else
XPRINT(0, stdout, ">> case 4 passed!\n"); XPRINT(0, stdout, ">> case 4 passed!\n");
/* case 5 test */
caseFlag = TestSum5();
if (!caseFlag) {
returnFlag = false;
XPRINT(0, stdout, ">> case 5 failed!\n");
}
else
XPRINT(0, stdout, ">> case 5 passed!\n");
/* case 6 test */
caseFlag = TestSum6();
if (!caseFlag) {
returnFlag = false;
XPRINT(0, stdout, ">> case 6 failed!\n");
}
else
XPRINT(0, stdout, ">> case 6 passed!\n");
/* case 7 test */
caseFlag = TestSum7();
if (!caseFlag) {
returnFlag = false;
XPRINT(0, stdout, ">> case 7 failed!\n");
}
else
XPRINT(0, stdout, ">> case 7 passed!\n");
/* case 8 test */
caseFlag = TestSum8();
if (!caseFlag) {
returnFlag = false;
XPRINT(0, stdout, ">> case 8 failed!\n");
}
else
XPRINT(0, stdout, ">> case 8 passed!\n");
/* other cases test */ /* other cases test */
/* /*
TODO!! TODO!!
......
...@@ -35,10 +35,10 @@ bool Test() ...@@ -35,10 +35,10 @@ bool Test()
//wrong = !TestConcatenate() || wrong; //wrong = !TestConcatenate() || wrong;
//wrong = !TestConcatenateSolely() || wrong; //wrong = !TestConcatenateSolely() || wrong;
//wrong = !TestCos() || wrong; //wrong = !TestCos() || wrong;
wrong = !TestConvertDataType() || wrong; //wrong = !TestConvertDataType() || wrong;
//wrong = !TestCopyIndexed() || wrong; //wrong = !TestCopyIndexed() || wrong;
//wrong = !TestCopyValues() || wrong; //wrong = !TestCopyValues() || wrong;
wrong = !TestDiv() || wrong; //wrong = !TestDiv() || wrong;
//wrong = !TestDivDim() || wrong; //wrong = !TestDivDim() || wrong;
//wrong = !TestExp() || wrong; //wrong = !TestExp() || wrong;
//wrong = !TestGather() || wrong; //wrong = !TestGather() || wrong;
...@@ -70,7 +70,7 @@ bool Test() ...@@ -70,7 +70,7 @@ bool Test()
//wrong = !TestSplit() || wrong; //wrong = !TestSplit() || wrong;
//wrong = !TestSpread() || wrong; //wrong = !TestSpread() || wrong;
//wrong = !TestSub() || wrong; //wrong = !TestSub() || wrong;
//wrong = !TestSum() || wrong; wrong = !TestSum() || wrong;
//wrong = !TestSumByColumnTV() || wrong; //wrong = !TestSumByColumnTV() || wrong;
//wrong = !TestSumByColumnVT() || wrong; //wrong = !TestSumByColumnVT() || wrong;
//wrong = !TestSumDim() || wrong; //wrong = !TestSumDim() || wrong;
......
Markdown 格式
0%
您添加了 0 到此讨论。请谨慎行事。
请先完成此评论的编辑!
注册 或者 后发表评论