Commit fe868e5c by linye

update

parent 6a3d713a
......@@ -169,7 +169,7 @@ void _CudaMultiplyDim(const XTensor * a, const XTensor * b, XTensor * c, int n,
ShowNTErrors("Something is wrong!");
}
}
if (a->dataType == X_FLOAT16) {
else if (a->dataType == X_FLOAT16) {
unsigned short temp = FloatToFloat16(alpha);
half alpha1 = *((half *)&temp);
if (stride > 1) {
......
......@@ -43,26 +43,6 @@ void KernelNegate(T * a, T * b, int size)
b[i] = -a[i];
}
///*
//set each entry to its negtive value (CUDA Kernel)
//This is for float16 computation
//>> a - pointer to the input data array
//>> b - pointer to the output data array
//>> size - size of the data array
//*/
//__global__
//void KernelNegate(__half * a, __half * b, int size)
//{
// int i = blockDim.x * blockIdx.x + threadIdx.x;
//
//#if __CUDA_ARCH__ >= 530 || !defined(__CUDA_ARCH__)
// if (i < size)
// b[i] = __hsub(__float2half(0), a[i]);
//#else
// if (i < size)
// b[i] = __float2half(-__half2float(a[i]));
//#endif
//}
/*
set each entry to its negtive value
......
......@@ -29,12 +29,9 @@ namespace nts { // namespace nts(NiuTrans.Tensor)
#ifdef USE_CUDA
/* set each entry to its negtive value (CUDA Kernel) */
template <class T>
__global__
void KernelNegate(DTYPE * a, DTYPE * b, int size);
/* set each entry to its negtive value (CUDA Kernel) with float16 data type*/
__global__
void KernelNegate(__half * a, __half * b, int size);
void KernelNegate(T * a, T * b, int size);
/* set each entry to its negtive value */
void _CudaNegate(const XTensor * a, XTensor * b);
......
......@@ -17,7 +17,7 @@
/*
* $Created by: XIAO Tong (email: xiaotong@mail.neu.edu.cn) 2018-04-24
* $Update by: Lin Ye (email: linye2015@outlook.com) 2019-07-06 float16 added
* $Update by: Lin Ye (email: linye2015@outlook.com) 2019-07-06 float16/int8 added
*/
#include "../../XUtility.h"
......
......@@ -83,7 +83,7 @@ void KernelLogSoftmaxComputeByRow(T * x, T * max, T * sum, T * y, int rowNum, in
int key = i * colNum + j;
if (dataType == X_FLOAT) {
DTYPE r = log((DTYPE)exp(x[key] - inputMax[threadIdx.x]) / (DTYPE)inputSum[threadIdx.x]);
DTYPE r = log((DTYPE)exp((DTYPE)(x[key] - inputMax[threadIdx.x])) / (DTYPE)inputSum[threadIdx.x]);
if (isnan(r))
r = LOGPROB_MIN;
......@@ -137,7 +137,7 @@ void KernelLogSoftmaxComputeByCol(T * x, T * max, T * sum, T * y, int rowNum, in
if (i < rowNum && j < colNum) {
int key = i * colNum + j;
if (dataType == X_FLOAT) {
DTYPE r = log((DTYPE)exp(x[key] - inputMax[threadIdx.y]) / (DTYPE)inputSum[threadIdx.y]);
DTYPE r = log((DTYPE)exp((DTYPE)(x[key] - inputMax[threadIdx.y])) / (DTYPE)inputSum[threadIdx.y]);
if (isnan(r))
r = LOGPROB_MIN;
......@@ -247,10 +247,10 @@ void KernelExpLoss(T * dedy, T * dedx, T * y, int size, LOSS_FUNCTION_NAME lossN
if (i < size) {
/* dE/dx_j = exp(y_j) */
if (lossName == CROSSENTROPY)
dedx[i] = exp(y[i]);
dedx[i] = exp(((DTYPE)y[i]));
/* dE/dx_j = exp(y_j) */
else if (lossName == SQUAREDERROR)
dedx[i] = exp(y[i]);
dedx[i] = exp(((DTYPE)y[i]));
else if (lossName == ONEHOTERROR)
dedx[i] = 0;
else
......@@ -283,13 +283,13 @@ void KernelLogSoftmaxBackwardDEDS(T * dedy, T * dedx, T * gold, T * y, T * x,
DTYPE r = 0;
/* dE/ds_j = exp(y_j) */
if (lossName == CROSSENTROPY)
r = -(DTYPE)gold[i] + (DTYPE)exp(y[i]);
r = -(DTYPE)gold[i] + (DTYPE)exp(((DTYPE)y[i]));
/* dE/ds_j = exp(y_j) */
else if (lossName == SQUAREDERROR)
r = -(DTYPE)gold[i] + (DTYPE)exp(y[i]);
r = -(DTYPE)gold[i] + (DTYPE)exp(((DTYPE)y[i]));
else if (lossName == ONEHOTERROR) {
if ((DTYPE)gold[i] == 1.0)
r = -(DTYPE)gold[i] + (DTYPE)exp(y[i]);
r = -(DTYPE)gold[i] + (DTYPE)exp(((DTYPE)y[i]));
else
r = 0;
}
......@@ -366,7 +366,7 @@ void KernelLogSoftmaxBackwardDEDSSparseByRow(T * dedy, T * dedx, void * gold, T
else if (lossName == ONEHOTERROR) {
int offset = colNum * ni + mi;
if (value == 1.0F)
dedx[offset] += (-value + exp(y[offset]));
dedx[offset] += (-value + exp(((DTYPE)y[offset])));
//dedx[offset] += -value * 0.005;
}
}
......
......@@ -17,11 +17,13 @@
/*
* $Created by: Lin Ye (email: linye2015@outlook.com) 2018-08-03
* $Update by: Lin Ye (email: linye2015@outlook.com) 2019-07-12 float16/int/int8 added
*/
#include "../XTensor.h"
#include "../core/math/Clip.h"
#include "TClip.h"
#include "../core/getandset/ConvertDataType.h"
namespace nts { // namespace nts(NiuTrans.Tensor)
......@@ -116,6 +118,251 @@ bool TestClip1()
#endif // USE_CUDA
}
/*
case 2: float16 test Clip function.
Set every entry to its clip value.
*/
bool TestClip2()
{
/* a tensor of size (3, 2) */
int aOrder = 2;
int * aDimSize = new int[aOrder];
aDimSize[0] = 3;
aDimSize[1] = 2;
int aUnitNum = 1;
for (int i = 0; i < aOrder; i++)
aUnitNum *= aDimSize[i];
DTYPE aData[3][2] = { {1.0F, -2.0F},
{0.0F, 4.0F},
{5.0F, -6.0F} };
DTYPE answer[3][2] = { {1.0F, -1.0F},
{0.0F, 1.0F},
{1.0F, -1.0F} };
/* CPU test */
bool cpuTest = true;
#ifdef USE_CUDA
/* GPU test */
bool gpuTest = true;
/* create tensor */
XTensor * aGPU = NewTensor(aOrder, aDimSize, X_FLOAT, 1.0F, 0);
XTensor * bGPU = NewTensor(aOrder, aDimSize, X_FLOAT, 1.0F, 0);
XTensor * aMeGPU = NewTensor(aOrder, aDimSize, X_FLOAT, 1.0F, 0);
XTensor bUserGPU;
/* create float16 tensor */
XTensor aHalfGPU;
XTensor bHalfGPU;
XTensor aMeHalfGPU;
XTensor bUserHalfGPU;
/* Initialize variables */
aGPU->SetData(aData, aUnitNum);
aMeGPU->SetData(aData, aUnitNum);
/* convert data type from float to float16 */
aHalfGPU = ConvertDataType(*aGPU, X_FLOAT16);
aMeHalfGPU = ConvertDataType(*aMeGPU, X_FLOAT16);
bHalfGPU = ConvertDataType(*bGPU, X_FLOAT16);
/* call clip function */
_Clip(&aHalfGPU, &bHalfGPU, -1.0, 1.0);
_ClipMe(&aMeHalfGPU, -1.0, 1.0);
bUserHalfGPU = Clip(aHalfGPU, -1.0, 1.0);
/* convert data type from float16 to float */
_ConvertDataType(&bHalfGPU, bGPU);
_ConvertDataType(&aMeHalfGPU, aMeGPU);
bUserGPU = ConvertDataType(bUserHalfGPU, X_FLOAT);
/* check results */
gpuTest = bGPU->CheckData(answer, aUnitNum, 1e-4F) &&
aMeGPU->CheckData(answer, aUnitNum, 1e-4F) &&
bUserGPU.CheckData(answer, aUnitNum, 1e-4F);
/* destroy variables */
delete aGPU;
delete bGPU;
delete aMeGPU;
delete[] aDimSize;
return cpuTest && gpuTest;
#else
/* destroy variables */
delete[] aDimSize;
return cpuTest;
#endif // USE_CUDA
}
/*
case 3: int32 test Clip function.
Set every entry to its clip value.
*/
bool TestClip3()
{
/* a tensor of size (3, 2) */
int aOrder = 2;
int * aDimSize = new int[aOrder];
aDimSize[0] = 3;
aDimSize[1] = 2;
int aUnitNum = 1;
for (int i = 0; i < aOrder; i++)
aUnitNum *= aDimSize[i];
DTYPE aData[3][2] = { {1.0F, -2.0F},
{0.0F, 4.0F},
{5.0F, -6.0F} };
DTYPE answer[3][2] = { {1.0F, -1.0F},
{0.0F, 1.0F},
{1.0F, -1.0F} };
/* CPU test */
bool cpuTest = true;
#ifdef USE_CUDA
/* GPU test */
bool gpuTest = true;
/* create tensor */
XTensor * aGPU = NewTensor(aOrder, aDimSize, X_FLOAT, 1.0F, 0);
XTensor * bGPU = NewTensor(aOrder, aDimSize, X_FLOAT, 1.0F, 0);
XTensor * aMeGPU = NewTensor(aOrder, aDimSize, X_FLOAT, 1.0F, 0);
XTensor bUserGPU;
/* create int32 tensor */
XTensor aInt32GPU;
XTensor bInt32GPU;
XTensor aMeInt32GPU;
XTensor bUserInt32GPU;
/* Initialize variables */
aGPU->SetData(aData, aUnitNum);
aMeGPU->SetData(aData, aUnitNum);
/* convert data type from float to int32 */
aInt32GPU = ConvertDataType(*aGPU, X_INT);
aMeInt32GPU = ConvertDataType(*aMeGPU, X_INT);
bInt32GPU = ConvertDataType(*bGPU, X_INT);
/* call clip function */
_Clip(&aInt32GPU, &bInt32GPU, -1.0, 1.0);
_ClipMe(&aMeInt32GPU, -1.0, 1.0);
bUserInt32GPU = Clip(aInt32GPU, -1.0, 1.0);
/* convert data type from int32 to float */
_ConvertDataType(&bInt32GPU, bGPU);
_ConvertDataType(&aMeInt32GPU, aMeGPU);
bUserGPU = ConvertDataType(bUserInt32GPU, X_FLOAT);
/* check results */
gpuTest = bGPU->CheckData(answer, aUnitNum, 1e-4F) &&
aMeGPU->CheckData(answer, aUnitNum, 1e-4F) &&
bUserGPU.CheckData(answer, aUnitNum, 1e-4F);
/* destroy variables */
delete aGPU;
delete bGPU;
delete aMeGPU;
delete[] aDimSize;
return cpuTest && gpuTest;
#else
/* destroy variables */
delete[] aDimSize;
return cpuTest;
#endif // USE_CUDA
}
/*
case 4: int8 test Clip function.
Set every entry to its clip value.
*/
bool TestClip4()
{
/* a tensor of size (3, 2) */
int aOrder = 2;
int * aDimSize = new int[aOrder];
aDimSize[0] = 3;
aDimSize[1] = 2;
int aUnitNum = 1;
for (int i = 0; i < aOrder; i++)
aUnitNum *= aDimSize[i];
DTYPE aData[3][2] = { {1.0F, -2.0F},
{0.0F, 4.0F},
{5.0F, -6.0F} };
DTYPE answer[3][2] = { {1.0F, -1.0F},
{0.0F, 1.0F},
{1.0F, -1.0F} };
/* CPU test */
bool cpuTest = true;
#ifdef USE_CUDA
/* GPU test */
bool gpuTest = true;
/* create tensor */
XTensor * aGPU = NewTensor(aOrder, aDimSize, X_FLOAT, 1.0F, 0);
XTensor * bGPU = NewTensor(aOrder, aDimSize, X_FLOAT, 1.0F, 0);
XTensor * aMeGPU = NewTensor(aOrder, aDimSize, X_FLOAT, 1.0F, 0);
XTensor bUserGPU;
/* create int8 tensor */
XTensor aInt8GPU;
XTensor bInt8GPU;
XTensor aMeInt8GPU;
XTensor bUserInt8GPU;
/* Initialize variables */
aGPU->SetData(aData, aUnitNum);
aMeGPU->SetData(aData, aUnitNum);
/* convert data type from float to int8 */
aInt8GPU = ConvertDataType(*aGPU, X_INT8);
aMeInt8GPU = ConvertDataType(*aMeGPU, X_INT8);
bInt8GPU = ConvertDataType(*bGPU, X_INT8);
/* call clip function */
_Clip(&aInt8GPU, &bInt8GPU, -1.0, 1.0);
_ClipMe(&aMeInt8GPU, -1.0, 1.0);
bUserInt8GPU = Clip(aInt8GPU, -1.0, 1.0);
/* convert data type from int8 to float */
_ConvertDataType(&bInt8GPU, bGPU);
_ConvertDataType(&aMeInt8GPU, aMeGPU);
bUserGPU = ConvertDataType(bUserInt8GPU, X_FLOAT);
/* check results */
gpuTest = bGPU->CheckData(answer, aUnitNum, 1e-4F) &&
aMeGPU->CheckData(answer, aUnitNum, 1e-4F) &&
bUserGPU.CheckData(answer, aUnitNum, 1e-4F);
/* destroy variables */
delete aGPU;
delete bGPU;
delete aMeGPU;
delete[] aDimSize;
return cpuTest && gpuTest;
#else
/* destroy variables */
delete[] aDimSize;
return cpuTest;
#endif // USE_CUDA
}
/* other cases */
/*
TODO!!
......@@ -137,6 +384,36 @@ bool TestClip()
else
XPRINT(0, stdout, ">> case 1 passed!\n");
/* case 2 test */
caseFlag = TestClip2();
if (!caseFlag) {
returnFlag = false;
XPRINT(0, stdout, ">> case 2 failed!\n");
}
else
XPRINT(0, stdout, ">> case 2 passed!\n");
/* case 3 test */
caseFlag = TestClip3();
if (!caseFlag) {
returnFlag = false;
XPRINT(0, stdout, ">> case 3 failed!\n");
}
else
XPRINT(0, stdout, ">> case 3 passed!\n");
/* case 4 test */
caseFlag = TestClip4();
if (!caseFlag) {
returnFlag = false;
XPRINT(0, stdout, ">> case 4 failed!\n");
}
else
XPRINT(0, stdout, ">> case 4 passed!\n");
/* other cases test */
/*
TODO!!
......
......@@ -17,7 +17,7 @@
/*
* $Created by: Xu Chen (email: hello_master1954@163.com) 2018-07-02
* $Update by: Lin Ye (email: linye2015@outlook.com) 2019-07-06 float16 added
* $Update by: Lin Ye (email: linye2015@outlook.com) 2019-07-12 float16 added
*/
#include "../XUtility.h"
......@@ -313,11 +313,6 @@ bool TestLogSoftmax3()
#endif // USE_CUDA
}
/* other cases */
/*
TODO!!
*/
/*
case 4: float16 test LogSoftmax function.
LogSoftmax function: y = log(e^x / \sum_{i} e^{x_i})
......@@ -370,10 +365,11 @@ bool TestLogSoftmax4()
/* convert data type from float16 to float */
_ConvertDataType(&yHalfGPU, yGPU);
yUserGPU = ConvertDataType(yHalfGPU, X_FLOAT);
yUserGPU = ConvertDataType(yUserHalfGPU, X_FLOAT);
/* check result */
gpuTest = yGPU->CheckData(answer, unitNum, 1e-1F) && yUserGPU.CheckData(answer, unitNum, 1e-1F);
gpuTest = yGPU->CheckData(answer, unitNum, 1e-2F) &&
yUserGPU.CheckData(answer, unitNum, 1e-2F);
/* destroy variables */
delete xGPU;
......@@ -389,6 +385,188 @@ bool TestLogSoftmax4()
#endif // USE_CUDA
}
/*
case 5: float16 test LogSoftmaxBackward function.
dE/dx = dE/dy * dy/dx
log softmax: y_i = log(e^{x_i} / \sum_{k} e^{x_k})
In this case, LossName=CROSSENTROPY.
*/
bool TestLogSoftmax5()
{
/* a tensor of size (1, 3) */
int order = 2;
int * dimSize = new int[order];
dimSize[0] = 1;
dimSize[1] = 3;
int unitNum = 1;
for (int i = 0; i < order; i++)
unitNum *= dimSize[i];
DTYPE xData[1][3] = {0.0F, 1.0F, 2.0F};
DTYPE gData[1][3] = {0.5F, 0.8F, 1.5F};
DTYPE yAnswer[1][3] = {-2.4076F, -1.4076F, -0.4076F};
DTYPE dedxAnswer[1][3] = {-0.4100F, -0.5553F, -0.8348F};
/* CPU test */
bool cpuTest = true;
#ifdef USE_CUDA
/* GPU test */
bool gpuTest = true;
/* create tensors */
XTensor * xGPU = NewTensor(order, dimSize, X_FLOAT, 1.0F, 0);
XTensor * yGPU = NewTensor(order, dimSize, X_FLOAT, 1.0F, 0);
XTensor * gGPU = NewTensor(order, dimSize, X_FLOAT, 1.0F, 0);
XTensor * dedyGPU = NewTensor(order, dimSize, X_FLOAT, 1.0F, 0);
XTensor * dedxGPU = NewTensor(order, dimSize, X_FLOAT, 1.0F, 0);
/* create float16 tensors */
XTensor xHalfGPU;
XTensor yHalfGPU;
XTensor gHalfGPU;
XTensor dedyHalfGPU;
XTensor dedxHalfGPU;
/* initialize variables */
xGPU->SetData(xData, unitNum);
gGPU->SetData(gData, unitNum);
yGPU->SetZeroAll();
dedxGPU->SetZeroAll();
dedyGPU->SetZeroAll();
/* convert data type from float to float16 */
xHalfGPU = ConvertDataType(*xGPU, X_FLOAT16);
yHalfGPU = ConvertDataType(*yGPU, X_FLOAT16);
gHalfGPU = ConvertDataType(*gGPU, X_FLOAT16);
dedyHalfGPU = ConvertDataType(*dedyGPU, X_FLOAT16);
dedxHalfGPU = ConvertDataType(*dedxGPU, X_FLOAT16);
/* call logsoftmax function */
_LogSoftmax(&xHalfGPU, &yHalfGPU, 1);
/* call logsoftmaxbackward function */
_LogSoftmaxBackward(&gHalfGPU, &yHalfGPU, &xHalfGPU, &dedyHalfGPU, &dedxHalfGPU, NULL, 1, CROSSENTROPY);
/* convert data type from float16 to float */
_ConvertDataType(&yHalfGPU, yGPU);
_ConvertDataType(&dedxHalfGPU, dedxGPU);
/* check result */
gpuTest = yGPU->CheckData(yAnswer, unitNum, 1e-2F) &&
dedxGPU->CheckData(dedxAnswer, unitNum, 1e-2F);
/* destroy variables */
delete xGPU;
delete yGPU;
delete gGPU;
delete dedxGPU;
delete dedyGPU;
delete[] dimSize;
return cpuTest && gpuTest;
#else
/* destroy variables */
delete[] dimSize;
return cpuTest;
#endif // USE_CUDA
}
/*
case 6: float16 test LogSoftmaxBackward function.
dE/dx = dE/dy * dy/dx
log softmax: y_i = log(e^{x_i} / \sum_{k} e^{x_k})
In this case, LossName=SQUAREDERROR
*/
bool TestLogSoftmax6()
{
/* a tensor of size (1, 3) */
int order = 2;
int * dimSize = new int[order];
dimSize[0] = 1;
dimSize[1] = 3;
int unitNum = 1;
for (int i = 0; i < order; i++)
unitNum *= dimSize[i];
DTYPE xData[1][3] = {0.0F, 1.0F, 2.0F};
DTYPE gData[1][3] = {0.5F, 0.8F, 1.5F};
DTYPE yAnswer[1][3] = {-2.4076F, -1.4076F, -0.4076F};
DTYPE dedxAnswer[1][3] = {-0.4100F, -0.5553F, -0.8348F};
/* CPU test */
bool cpuTest = true;
#ifdef USE_CUDA
/* GPU test */
bool gpuTest = true;
/* create tensors */
XTensor * xGPU = NewTensor(order, dimSize, X_FLOAT, 1.0F, 0);
XTensor * yGPU = NewTensor(order, dimSize, X_FLOAT, 1.0F, 0);
XTensor * gGPU = NewTensor(order, dimSize, X_FLOAT, 1.0F, 0);
XTensor * dedyGPU = NewTensor(order, dimSize, X_FLOAT, 1.0F, 0);
XTensor * dedxGPU = NewTensor(order, dimSize, X_FLOAT, 1.0F, 0);
/* create float16 tensors */
XTensor xHalfGPU;
XTensor yHalfGPU;
XTensor gHalfGPU;
XTensor dedyHalfGPU;
XTensor dedxHalfGPU;
/* initialize variables */
xGPU->SetData(xData, unitNum);
gGPU->SetData(gData, unitNum);
yGPU->SetZeroAll();
dedxGPU->SetZeroAll();
dedyGPU->SetZeroAll();
/* convert data type from float to float16 */
xHalfGPU = ConvertDataType(*xGPU, X_FLOAT16);
yHalfGPU = ConvertDataType(*yGPU, X_FLOAT16);
gHalfGPU = ConvertDataType(*gGPU, X_FLOAT16);
dedyHalfGPU = ConvertDataType(*dedyGPU, X_FLOAT16);
dedxHalfGPU = ConvertDataType(*dedxGPU, X_FLOAT16);
/* call logsoftmax function */
_LogSoftmax(&xHalfGPU, &yHalfGPU, 1);
/* call logsoftmaxbackward function */
_LogSoftmaxBackward(&gHalfGPU, &yHalfGPU, &xHalfGPU, &dedyHalfGPU, &dedxHalfGPU, NULL, 1, SQUAREDERROR);
/* convert data type from float16 to float */
_ConvertDataType(&yHalfGPU, yGPU);
_ConvertDataType(&dedxHalfGPU, dedxGPU);
/* check result */
gpuTest = yGPU->CheckData(yAnswer, unitNum, 1e-2F) &&
dedxGPU->CheckData(dedxAnswer, unitNum, 1e-2F);
/* destroy variables */
delete xGPU;
delete yGPU;
delete gGPU;
delete dedxGPU;
delete dedyGPU;
delete[] dimSize;
return cpuTest && gpuTest;
#else
/* destroy variables */
delete[] dimSize;
return cpuTest;
#endif // USE_CUDA
}
/* other cases */
/*
TODO!!
*/
/* test for LogSoftmax Function */
bool TestLogSoftmax()
......@@ -436,6 +614,26 @@ bool TestLogSoftmax()
else
XPRINT(0, stdout, ">> case 4 passed!\n");
/* case 5 test */
caseFlag = TestLogSoftmax5();
if (!caseFlag) {
returnFlag = false;
XPRINT(0, stdout, ">> case 5 failed!\n");
}
else
XPRINT(0, stdout, ">> case 5 passed!\n");
/* case 6 test */
caseFlag = TestLogSoftmax6();
if (!caseFlag) {
returnFlag = false;
XPRINT(0, stdout, ">> case 6 failed!\n");
}
else
XPRINT(0, stdout, ">> case 6 passed!\n");
/* other cases test */
/*
TODO!!
......
......@@ -17,11 +17,13 @@
/*
* $Created by: Xu Chen (email: hello_master1954@163.com) 2018-07-30
* $Update by: Lin Ye (email: linye2015@outlook.com) 2019-07-12 float16/int/int8 added
*/
#include "TMultiplyDim.h"
#include "../core/arithmetic/MultiplyDim.h"
#include "../XTensor.h"
#include "../core/getandset/ConvertDataType.h"
namespace nts { // namespace nts(NiuTrans.Tensor)
/*
......@@ -248,6 +250,205 @@ bool TestMultiplyDim2()
#endif // USE_CUDA
}
/*
case 3: float16 tensor multiplication c = a * b + \alpha * c
where the size of b is equal to the n-th dimension of a,
i.e., a is multiplied with b by broadcasting
In this case, (2, 4) * (2) = (2, 4), n = 0.
*/
bool TestMultiplyDim3()
{
/* a tensor of size (2, 4) */
int aOrder = 2;
int * aDimSize = new int[aOrder];
aDimSize[0] = 2;
aDimSize[1] = 4;
int aUnitNum = 1;
for (int i = 0; i < aOrder; i++)
aUnitNum *= aDimSize[i];
/* a tensor of size (2) */
int bOrder = 1;
int * bDimSize = new int[bOrder];
bDimSize[0] = 2;
int bUnitNum = 1;
for (int i = 0; i < bOrder; i++)
bUnitNum *= bDimSize[i];
DTYPE aData[2][4] = { {0.0F, 1.0F, 2.0F, 3.0F},
{4.0F, 5.0F, 6.0F, 7.0F} };
DTYPE bData[2] = {1.0F, -1.0F};
DTYPE answer[2][4] = { {0.0F, 1.0F, 2.0F, 3.0F},
{-4.0F, -5.0F, -6.0F, -7.0F} };
/* CPU test */
bool cpuTest = true;
#ifdef USE_CUDA
/* GPU test */
bool gpuTest = true;
/* create tensor */
XTensor * aGPU = NewTensor(aOrder, aDimSize, X_FLOAT, 1.0F, 0);
XTensor * bGPU = NewTensor(bOrder, bDimSize, X_FLOAT, 1.0F, 0);
XTensor * cGPU = NewTensor(aOrder, aDimSize, X_FLOAT, 1.0F, 0);
XTensor * cMeGPU = NewTensor(aOrder, aDimSize, X_FLOAT, 1.0F, 0);
XTensor cUserGPU;
/* create float16 tensor */
XTensor aHalfGPU;
XTensor bHalfGPU;
XTensor cHalfGPU;
XTensor cMeHalfGPU;
XTensor cUserHalfGPU;
/* Initialize variables */
aGPU->SetData(aData, aUnitNum);
cMeGPU->SetData(aData, aUnitNum);
bGPU->SetData(bData, bUnitNum);
cGPU->SetZeroAll();
/* convert data type from float to float16 */
aHalfGPU = ConvertDataType(*aGPU, X_FLOAT16);
bHalfGPU = ConvertDataType(*bGPU, X_FLOAT16);
cHalfGPU = ConvertDataType(*cGPU, X_FLOAT16);
cMeHalfGPU = ConvertDataType(*cMeGPU, X_FLOAT16);
/* call multiplydim function */
_MultiplyDim(&aHalfGPU, &bHalfGPU, &cHalfGPU, 0);
_MultiplyDimMe(&cMeHalfGPU, &bHalfGPU, 0);
cUserHalfGPU = MultiplyDim(aHalfGPU, bHalfGPU, 0);
/* convert data type from float16 to float */
_ConvertDataType(&cHalfGPU, cGPU);
_ConvertDataType(&cMeHalfGPU, cMeGPU);
cUserGPU = ConvertDataType(cUserHalfGPU, X_FLOAT);
/* check results */
gpuTest = cGPU->CheckData(answer, aUnitNum) &&
cMeGPU->CheckData(answer, aUnitNum) &&
cUserGPU.CheckData(answer, aUnitNum);
/* destroy variables */
delete aGPU;
delete bGPU;
delete cGPU;
delete cMeGPU;
delete[] aDimSize;
delete[] bDimSize;
return cpuTest && gpuTest;
#else
/* destroy variables */
delete[] aDimSize;
delete[] bDimSize;
return cpuTest;
#endif // USE_CUDA
}
/*
case 4: flaot16 tensor multiplication c = a*b + \alpha * c
where the size of b is equal to the n-th dimension of a,
i.e., a is multiplied with b by broadcasting.
In this case, (2, 4) * (4) = (2, 4), n = 1.
*/
bool TestMultiplyDim4()
{
/* a tensor of size (2, 4) */
int aOrder = 2;
int * aDimSize = new int[aOrder];
aDimSize[0] = 2;
aDimSize[1] = 4;
int aUnitNum = 1;
for (int i = 0; i < aOrder; i++)
aUnitNum *= aDimSize[i];
/* a tensor of size (4) */
int bOrder = 1;
int * bDimSize = new int[bOrder];
bDimSize[0] = 4;
int bUnitNum = 1;
for (int i = 0; i < bOrder; i++)
bUnitNum *= bDimSize[i];
DTYPE aData[2][4] = { {0.0F, 1.0F, 2.0F, 3.0F},
{4.0F, 5.0F, 6.0F, 7.0F} };
DTYPE bData[4] = {1.0F, -1.0F , 1.0F, -1.0F};
DTYPE answer[2][4] = { {0.0F, -1.0F, 2.0F, -3.0F},
{4.0F, -5.0F, 6.0F, -7.0F} };
/* CPU test */
bool cpuTest = true;
#ifdef USE_CUDA
/* GPU test */
bool gpuTest = true;
/* create tensor */
XTensor * aGPU = NewTensor(aOrder, aDimSize, X_FLOAT, 1.0F, 0);
XTensor * bGPU = NewTensor(bOrder, bDimSize, X_FLOAT, 1.0F, 0);
XTensor * cGPU = NewTensor(aOrder, aDimSize, X_FLOAT, 1.0F, 0);
XTensor * cMeGPU = NewTensor(aOrder, aDimSize, X_FLOAT, 1.0F, 0);
XTensor cUserGPU;
/* create float16 tensor */
XTensor aHalfGPU;
XTensor bHalfGPU;
XTensor cHalfGPU;
XTensor cMeHalfGPU;
XTensor cUserHalfGPU;
/* Initialize variables */
aGPU->SetData(aData, aUnitNum);
cMeGPU->SetData(aData, aUnitNum);
bGPU->SetData(bData, bUnitNum);
cGPU->SetZeroAll();
/* convert data type from float to float16 */
aHalfGPU = ConvertDataType(*aGPU, X_FLOAT16);
bHalfGPU = ConvertDataType(*bGPU, X_FLOAT16);
cHalfGPU = ConvertDataType(*cGPU, X_FLOAT16);
cMeHalfGPU = ConvertDataType(*cMeGPU, X_FLOAT16);
/* call multiplydim function */
_MultiplyDim(&aHalfGPU, &bHalfGPU, &cHalfGPU, 1);
_MultiplyDimMe(&cMeHalfGPU, &bHalfGPU, 1);
cUserHalfGPU = MultiplyDim(aHalfGPU, bHalfGPU, 1);
/* convert data type from float16 to float */
_ConvertDataType(&cHalfGPU, cGPU);
_ConvertDataType(&cMeHalfGPU, cMeGPU);
cUserGPU = ConvertDataType(cUserHalfGPU, X_FLOAT);
/* check results */
gpuTest = cGPU->CheckData(answer, aUnitNum) &&
cMeGPU->CheckData(answer, aUnitNum) &&
cUserGPU.CheckData(answer, aUnitNum);
/* destroy variables */
delete aGPU;
delete bGPU;
delete cGPU;
delete cMeGPU;
delete[] aDimSize;
delete[] bDimSize;
return cpuTest && gpuTest;
#else
/* destroy variables */
delete[] aDimSize;
delete[] bDimSize;
return cpuTest;
#endif // USE_CUDA
}
/* test for MultiplyDim Function */
bool TestMultiplyDim()
{
......@@ -272,6 +473,24 @@ bool TestMultiplyDim()
else
XPRINT(0, stdout, ">> case 2 passed!\n");
/* case 3 test */
caseFlag = TestMultiplyDim3();
if (!caseFlag) {
returnFlag = false;
XPRINT(0, stdout, ">> case 3 failed!\n");
}
else
XPRINT(0, stdout, ">> case 3 passed!\n");
/* case 4 test */
caseFlag = TestMultiplyDim4();
if (!caseFlag) {
returnFlag = false;
XPRINT(0, stdout, ">> case 4 failed!\n");
}
else
XPRINT(0, stdout, ">> case 4 passed!\n");
/* other cases test */
/*
TODO!!
......
......@@ -17,9 +17,11 @@
/*
* $Created by: Lin Ye (email: linye2015@outlook.com) 2018-06-14
* $Update by: Lin Ye (email: linye2015@outlook.com) 2019-07-12 float16/int/int8 added
*/
#include "TNegate.h"
#include "../core/getandset/ConvertDataType.h"
namespace nts { // namespace nts(NiuTrans.Tensor)
......@@ -191,6 +193,86 @@ bool TestNegate2()
#endif // USE_CUDA
}
/* case 3: float16 set every entry to its minus value */
bool TestNegate3()
{
/* a tensor of size (3, 2) */
int aOrder = 2;
int * aDimSize = new int[aOrder];
aDimSize[0] = 3;
aDimSize[1] = 2;
int aUnitNum = 1;
for (int i = 0; i < aOrder; i++)
aUnitNum *= aDimSize[i];
DTYPE aData[3][2] = { {1.0F, -2.0F},
{-3.0F, 4.0F},
{5.0F, -6.0F} };
DTYPE answer[3][2] = { {-1.0F, 2.0F},
{3.0F, -4.0F},
{-5.0F, 6.0F} };
/* CPU test */
bool cpuTest = true;
#ifdef USE_CUDA
/* GPU test */
bool gpuTest = true;
/* create tensor */
XTensor * aGPU = NewTensor(aOrder, aDimSize, X_FLOAT, 1.0F, 0);
XTensor * bGPU = NewTensor(aOrder, aDimSize, X_FLOAT, 1.0F, 0);
XTensor * aMeGPU = NewTensor(aOrder, aDimSize, X_FLOAT, 1.0F, 0);
XTensor bUserGPU;
/* create float16 tensor */
XTensor aHalfGPU;
XTensor bHalfGPU;
XTensor aMeHalfGPU;
XTensor bUserHalfGPU;
/* Initialize variables */
aGPU->SetData(aData, aUnitNum);
aMeGPU->SetData(aData, aUnitNum);
/* convert data type from float to float16 */
aHalfGPU = ConvertDataType(*aGPU, X_FLOAT16);
aMeHalfGPU = ConvertDataType(*aMeGPU, X_FLOAT16);
bHalfGPU = ConvertDataType(*bGPU, X_FLOAT16);
/* call negate function */
_Negate(&aHalfGPU, &bHalfGPU);
_NegateMe(&aMeHalfGPU);
bUserHalfGPU = Negate(aHalfGPU);
/* convert data type from float16 to float */
_ConvertDataType(&bHalfGPU, bGPU);
_ConvertDataType(&aMeHalfGPU, aMeGPU);
bUserGPU = ConvertDataType(bUserHalfGPU, X_FLOAT);
/* check results */
gpuTest = bGPU->CheckData(answer, aUnitNum, 1e-4F) &&
aMeGPU->CheckData(answer, aUnitNum, 1e-4F) &&
bUserGPU.CheckData(answer, aUnitNum, 1e-4F);
/* destroy variables */
delete aGPU;
delete bGPU;
delete aMeGPU;
delete[] aDimSize;
return cpuTest && gpuTest;
#else
/* destroy variables */
delete[] aDimSize;
return cpuTest;
#endif // USE_CUDA
}
/*
/* other cases */
/*
TODO!!
......@@ -222,6 +304,16 @@ bool TestNegate()
else
XPRINT(0, stdout, ">> case 2 passed!\n");
/* case 3 test */
caseFlag = TestNegate3();
if (!caseFlag) {
returnFlag = false;
XPRINT(0, stdout, ">> case 3 failed!\n");
}
else
XPRINT(0, stdout, ">> case 3 passed!\n");
/* other cases test */
/*
TODO!!
......
......@@ -17,9 +17,11 @@
/*
* $Created by: Xu Chen (email: hello_master1954@163.com) 2018-06-27
* $Update by: Lin Ye (email: linye2015@outlook.com) 2019-07-12 float16/int/int8 added
*/
#include "TScaleAndShift.h"
#include "../core/getandset/ConvertDataType.h"
namespace nts { // namespace nts(NiuTrans.Tensor)
......@@ -113,6 +115,254 @@ bool TestScaleAndShift1()
#endif // USE_CUDA
}
/*
case 2: flaot16 scale and shift all tensor entires.
p = p * scale + shift
*/
bool TestScaleAndShift2()
{
/* a input tensor of size (2, 4) */
int sOrder = 2;
int * sDimSize = new int[sOrder];
sDimSize[0] = 2;
sDimSize[1] = 4;
int sUnitNum = 1;
for (int i = 0; i < sOrder; i++)
sUnitNum *= sDimSize[i];
DTYPE sData[2][4] = { {0.0F, 1.0F, 2.0F, 3.0F},
{4.0F, 5.0F, 6.0F, 7.0F} };
DTYPE answer[2][4] = { {0.5F, 2.5F, 4.5F, 6.5F},
{8.5F, 10.5F, 12.5F, 14.5F} };
DTYPE scaleFactor = 2.0F;
DTYPE shiftFactor = 0.5F;
/* CPU test */
bool cpuTest = true;
#ifdef USE_CUDA
/* GPU test */
bool gpuTest = true;
/* create tensors */
XTensor * sGPU = NewTensor(sOrder, sDimSize, X_FLOAT, 1.0F, 0);
XTensor * tGPU = NewTensor(sOrder, sDimSize, X_FLOAT, 1.0F, 0);
XTensor * tMeGPU = NewTensor(sOrder, sDimSize, X_FLOAT, 1.0F, 0);
XTensor tUserGPU;
/* create float16 tensor */
XTensor sHalfGPU;
XTensor tHalfGPU;
XTensor tMeHalfGPU;
XTensor tUserHalfGPU;
/* initialize variables */
sGPU->SetData(sData, sUnitNum);
tMeGPU->SetData(sData, sUnitNum);
/* convert data type from float to float16 */
sHalfGPU = ConvertDataType(*sGPU, X_FLOAT16);
tMeHalfGPU = ConvertDataType(*tMeGPU, X_FLOAT16);
tHalfGPU = ConvertDataType(*tGPU, X_FLOAT16);
/* call scaleandshift function */
_ScaleAndShift(&sHalfGPU, &tHalfGPU, scaleFactor, shiftFactor);
_ScaleAndShiftMe(&tMeHalfGPU, scaleFactor, shiftFactor);
tUserHalfGPU = ScaleAndShift(sHalfGPU, scaleFactor, shiftFactor);
/* convert data type from float16 to float */
_ConvertDataType(&tHalfGPU, tGPU);
_ConvertDataType(&tMeHalfGPU, tMeGPU);
tUserGPU = ConvertDataType(tUserHalfGPU, X_FLOAT);
/* check results */
gpuTest = tGPU->CheckData(answer, sUnitNum) &&
tMeGPU->CheckData(answer, sUnitNum) &&
tUserGPU.CheckData(answer, sUnitNum);
/* destroy variables */
delete sGPU;
delete tGPU;
delete tMeGPU;
delete[] sDimSize;
return cpuTest && gpuTest;
#else
/* destroy variables */
delete[] sDimSize;
return cpuTest;
#endif // USE_CUDA
}
/*
case 3: int32 scale and shift all tensor entires.
p = p * scale + shift
*/
bool TestScaleAndShift3()
{
/* a input tensor of size (2, 4) */
int sOrder = 2;
int * sDimSize = new int[sOrder];
sDimSize[0] = 2;
sDimSize[1] = 4;
int sUnitNum = 1;
for (int i = 0; i < sOrder; i++)
sUnitNum *= sDimSize[i];
DTYPE sData[2][4] = { {0.0F, 1.0F, 2.0F, 3.0F},
{4.0F, 5.0F, 6.0F, 7.0F} };
DTYPE answer[2][4] = { {1.0F, 3.0F, 5.0F, 7.0F},
{9.0F, 11.0F, 13.0F, 15.0F} };
DTYPE scaleFactor = 2.0F;
DTYPE shiftFactor = 1.8F;
/* CPU test */
bool cpuTest = true;
#ifdef USE_CUDA
/* GPU test */
bool gpuTest = true;
/* create tensors */
XTensor * sGPU = NewTensor(sOrder, sDimSize, X_FLOAT, 1.0F, 0);
XTensor * tGPU = NewTensor(sOrder, sDimSize, X_FLOAT, 1.0F, 0);
XTensor * tMeGPU = NewTensor(sOrder, sDimSize, X_FLOAT, 1.0F, 0);
XTensor tUserGPU;
/* create int32 tensor */
XTensor sInt32GPU;
XTensor tInt32GPU;
XTensor tMeInt32GPU;
XTensor tUserInt32GPU;
/* initialize variables */
sGPU->SetData(sData, sUnitNum);
tMeGPU->SetData(sData, sUnitNum);
/* convert data type from float to int32 */
sInt32GPU = ConvertDataType(*sGPU, X_INT);
tMeInt32GPU = ConvertDataType(*tMeGPU, X_INT);
tInt32GPU = ConvertDataType(tGPU, X_INT);
/* call scaleandshift function */
_ScaleAndShift(&sInt32GPU, &tInt32GPU, scaleFactor, shiftFactor);
_ScaleAndShiftMe(&tMeInt32GPU, scaleFactor, shiftFactor);
tUserInt32GPU = ScaleAndShift(sInt32GPU, scaleFactor, shiftFactor);
/* convert data type from int32 to float */
_ConvertDataType(&tInt32GPU, tGPU);
_ConvertDataType(&tMeInt32GPU, tMeGPU);
tUserGPU = ConvertDataType(tUserInt32GPU, X_FLOAT);
/* check results */
gpuTest = tGPU->CheckData(answer, sUnitNum) &&
tMeGPU->CheckData(answer, sUnitNum) &&
tUserGPU.CheckData(answer, sUnitNum);
/* destroy variables */
delete sGPU;
delete tGPU;
delete tMeGPU;
delete[] sDimSize;
return cpuTest && gpuTest;
#else
/* destroy variables */
delete[] sDimSize;
return cpuTest;
#endif // USE_CUDA
}
/*
case 4: int8 scale and shift all tensor entires.
p = p * scale + shift
*/
bool TestScaleAndShift4()
{
/* a input tensor of size (2, 4) */
int sOrder = 2;
int * sDimSize = new int[sOrder];
sDimSize[0] = 2;
sDimSize[1] = 4;
int sUnitNum = 1;
for (int i = 0; i < sOrder; i++)
sUnitNum *= sDimSize[i];
DTYPE sData[2][4] = { {0.0F, 1.0F, 2.0F, 3.0F},
{4.0F, 5.0F, 6.0F, 7.0F} };
DTYPE answer[2][4] = { {1.0F, 3.0F, 5.0F, 7.0F},
{9.0F, 11.0F, 13.0F, 15.0F} };
DTYPE scaleFactor = 2.0F;
DTYPE shiftFactor = 1.8F;
/* CPU test */
bool cpuTest = true;
#ifdef USE_CUDA
/* GPU test */
bool gpuTest = true;
/* create tensors */
XTensor * sGPU = NewTensor(sOrder, sDimSize, X_FLOAT, 1.0F, 0);
XTensor * tGPU = NewTensor(sOrder, sDimSize, X_FLOAT, 1.0F, 0);
XTensor * tMeGPU = NewTensor(sOrder, sDimSize, X_FLOAT, 1.0F, 0);
XTensor tUserGPU;
/* create int8 tensor */
XTensor sInt8GPU;
XTensor tInt8GPU;
XTensor tMeInt8GPU;
XTensor tUserInt8GPU;
/* initialize variables */
sGPU->SetData(sData, sUnitNum);
tMeGPU->SetData(sData, sUnitNum);
/* convert data type from float to int8 */
sInt8GPU = ConvertDataType(*sGPU, X_INT8);
tMeInt8GPU = ConvertDataType(*tMeGPU, X_INT8);
tInt8GPU = ConvertDataType(*tGPU, X_INT8);
/* call scaleandshift function */
_ScaleAndShift(&sInt8GPU, &tInt8GPU, scaleFactor, shiftFactor);
_ScaleAndShiftMe(&tMeInt8GPU, scaleFactor, shiftFactor);
tUserInt8GPU = ScaleAndShift(sInt8GPU, scaleFactor, shiftFactor);
/* convert data type from int8 to float */
_ConvertDataType(&tInt8GPU, tGPU);
_ConvertDataType(&tMeInt8GPU, tMeGPU);
tUserGPU = ConvertDataType(tUserInt8GPU, X_FLOAT);
/* check results */
gpuTest = tGPU->CheckData(answer, sUnitNum) &&
tMeGPU->CheckData(answer, sUnitNum) &&
tUserGPU.CheckData(answer, sUnitNum);
/* destroy variables */
delete sGPU;
delete tGPU;
delete tMeGPU;
delete[] sDimSize;
return cpuTest && gpuTest;
#else
/* destroy variables */
delete[] sDimSize;
return cpuTest;
#endif // USE_CUDA
}
/* other cases */
/*
TODO!!
......@@ -133,6 +383,33 @@ bool TestScaleAndShift()
else
XPRINT(0, stdout, ">> case 1 passed!\n");
/* case 2 test */
caseFlag = TestScaleAndShift2();
if (!caseFlag) {
returnFlag = false;
XPRINT(0, stdout, ">> case 2 failed!\n");
}
else
XPRINT(0, stdout, ">> case 2 passed!\n");
/* case 3 test */
caseFlag = TestScaleAndShift3();
if (!caseFlag) {
returnFlag = false;
XPRINT(0, stdout, ">> case 3 failed!\n");
}
else
XPRINT(0, stdout, ">> case 3 passed!\n");
/* case 4 test */
caseFlag = TestScaleAndShift4();
if (!caseFlag) {
returnFlag = false;
XPRINT(0, stdout, ">> case 4 failed!\n");
}
else
XPRINT(0, stdout, ">> case 4 passed!\n");
/* other cases test */
/*
TODO!!
......
......@@ -70,7 +70,7 @@ bool Test()
//wrong = !TestSplit() || wrong;
//wrong = !TestSpread() || wrong;
//wrong = !TestSub() || wrong;
wrong = !TestSum() || wrong;
//wrong = !TestSum() || wrong;
//wrong = !TestSumByColumnTV() || wrong;
//wrong = !TestSumByColumnVT() || wrong;
//wrong = !TestSumDim() || wrong;
......
Markdown 格式
0%
您添加了 0 到此讨论。请谨慎行事。
请先完成此评论的编辑!
注册 或者 后发表评论