Commit 953421c3 by ltb

add replace xiao test case with linye's

parent 3e2f47b2
......@@ -17,11 +17,13 @@
/*
* $Created by: Lin Ye (email: linye2015@outlook.com) 2018-08-03
* $Update by: Lin Ye (email: linye2015@outlook.com) 2019-07-12 float16/int/int8 added
*/
#include "../XTensor.h"
#include "../core/math/Clip.h"
#include "TClip.h"
#include "../core/getandset/ConvertDataType.h"
namespace nts { // namespace nts(NiuTrans.Tensor)
......@@ -116,6 +118,251 @@ bool TestClip1()
#endif // USE_CUDA
}
/*
case 2: float16 test Clip function.
Set every entry to its clip value.
*/
bool TestClip2()
{
/* a tensor of size (3, 2) */
int aOrder = 2;
int * aDimSize = new int[aOrder];
aDimSize[0] = 3;
aDimSize[1] = 2;
int aUnitNum = 1;
for (int i = 0; i < aOrder; i++)
aUnitNum *= aDimSize[i];
DTYPE aData[3][2] = { {1.0F, -2.0F},
{0.0F, 4.0F},
{5.0F, -6.0F} };
DTYPE answer[3][2] = { {1.0F, -1.0F},
{0.0F, 1.0F},
{1.0F, -1.0F} };
/* CPU test */
bool cpuTest = true;
#ifdef USE_CUDA
/* GPU test */
bool gpuTest = true;
/* create tensor */
XTensor * aGPU = NewTensor(aOrder, aDimSize, X_FLOAT, 1.0F, 0);
XTensor * bGPU = NewTensor(aOrder, aDimSize, X_FLOAT, 1.0F, 0);
XTensor * aMeGPU = NewTensor(aOrder, aDimSize, X_FLOAT, 1.0F, 0);
XTensor bUserGPU;
/* create float16 tensor */
XTensor aHalfGPU;
XTensor bHalfGPU;
XTensor aMeHalfGPU;
XTensor bUserHalfGPU;
/* Initialize variables */
aGPU->SetData(aData, aUnitNum);
aMeGPU->SetData(aData, aUnitNum);
/* convert data type from float to float16 */
aHalfGPU = ConvertDataType(*aGPU, X_FLOAT16);
aMeHalfGPU = ConvertDataType(*aMeGPU, X_FLOAT16);
bHalfGPU = ConvertDataType(*bGPU, X_FLOAT16);
/* call clip function */
_Clip(&aHalfGPU, &bHalfGPU, -1.0, 1.0);
_ClipMe(&aMeHalfGPU, -1.0, 1.0);
bUserHalfGPU = Clip(aHalfGPU, -1.0, 1.0);
/* convert data type from float16 to float */
_ConvertDataType(&bHalfGPU, bGPU);
_ConvertDataType(&aMeHalfGPU, aMeGPU);
bUserGPU = ConvertDataType(bUserHalfGPU, X_FLOAT);
/* check results */
gpuTest = bGPU->CheckData(answer, aUnitNum, 1e-4F) &&
aMeGPU->CheckData(answer, aUnitNum, 1e-4F) &&
bUserGPU.CheckData(answer, aUnitNum, 1e-4F);
/* destroy variables */
delete aGPU;
delete bGPU;
delete aMeGPU;
delete[] aDimSize;
return cpuTest && gpuTest;
#else
/* destroy variables */
delete[] aDimSize;
return cpuTest;
#endif // USE_CUDA
}
/*
case 3: int32 test Clip function.
Set every entry to its clip value.
*/
bool TestClip3()
{
/* a tensor of size (3, 2) */
int aOrder = 2;
int * aDimSize = new int[aOrder];
aDimSize[0] = 3;
aDimSize[1] = 2;
int aUnitNum = 1;
for (int i = 0; i < aOrder; i++)
aUnitNum *= aDimSize[i];
DTYPE aData[3][2] = { {1.0F, -2.0F},
{0.0F, 4.0F},
{5.0F, -6.0F} };
DTYPE answer[3][2] = { {1.0F, -1.0F},
{0.0F, 1.0F},
{1.0F, -1.0F} };
/* CPU test */
bool cpuTest = true;
#ifdef USE_CUDA
/* GPU test */
bool gpuTest = true;
/* create tensor */
XTensor * aGPU = NewTensor(aOrder, aDimSize, X_FLOAT, 1.0F, 0);
XTensor * bGPU = NewTensor(aOrder, aDimSize, X_FLOAT, 1.0F, 0);
XTensor * aMeGPU = NewTensor(aOrder, aDimSize, X_FLOAT, 1.0F, 0);
XTensor bUserGPU;
/* create int32 tensor */
XTensor aInt32GPU;
XTensor bInt32GPU;
XTensor aMeInt32GPU;
XTensor bUserInt32GPU;
/* Initialize variables */
aGPU->SetData(aData, aUnitNum);
aMeGPU->SetData(aData, aUnitNum);
/* convert data type from float to int32 */
aInt32GPU = ConvertDataType(*aGPU, X_INT);
aMeInt32GPU = ConvertDataType(*aMeGPU, X_INT);
bInt32GPU = ConvertDataType(*bGPU, X_INT);
/* call clip function */
_Clip(&aInt32GPU, &bInt32GPU, -1.0, 1.0);
_ClipMe(&aMeInt32GPU, -1.0, 1.0);
bUserInt32GPU = Clip(aInt32GPU, -1.0, 1.0);
/* convert data type from int32 to float */
_ConvertDataType(&bInt32GPU, bGPU);
_ConvertDataType(&aMeInt32GPU, aMeGPU);
bUserGPU = ConvertDataType(bUserInt32GPU, X_FLOAT);
/* check results */
gpuTest = bGPU->CheckData(answer, aUnitNum, 1e-4F) &&
aMeGPU->CheckData(answer, aUnitNum, 1e-4F) &&
bUserGPU.CheckData(answer, aUnitNum, 1e-4F);
/* destroy variables */
delete aGPU;
delete bGPU;
delete aMeGPU;
delete[] aDimSize;
return cpuTest && gpuTest;
#else
/* destroy variables */
delete[] aDimSize;
return cpuTest;
#endif // USE_CUDA
}
/*
case 4: int8 test Clip function.
Set every entry to its clip value.
*/
bool TestClip4()
{
/* a tensor of size (3, 2) */
int aOrder = 2;
int * aDimSize = new int[aOrder];
aDimSize[0] = 3;
aDimSize[1] = 2;
int aUnitNum = 1;
for (int i = 0; i < aOrder; i++)
aUnitNum *= aDimSize[i];
DTYPE aData[3][2] = { {1.0F, -2.0F},
{0.0F, 4.0F},
{5.0F, -6.0F} };
DTYPE answer[3][2] = { {1.0F, -1.0F},
{0.0F, 1.0F},
{1.0F, -1.0F} };
/* CPU test */
bool cpuTest = true;
#ifdef USE_CUDA
/* GPU test */
bool gpuTest = true;
/* create tensor */
XTensor * aGPU = NewTensor(aOrder, aDimSize, X_FLOAT, 1.0F, 0);
XTensor * bGPU = NewTensor(aOrder, aDimSize, X_FLOAT, 1.0F, 0);
XTensor * aMeGPU = NewTensor(aOrder, aDimSize, X_FLOAT, 1.0F, 0);
XTensor bUserGPU;
/* create int8 tensor */
XTensor aInt8GPU;
XTensor bInt8GPU;
XTensor aMeInt8GPU;
XTensor bUserInt8GPU;
/* Initialize variables */
aGPU->SetData(aData, aUnitNum);
aMeGPU->SetData(aData, aUnitNum);
/* convert data type from float to int8 */
aInt8GPU = ConvertDataType(*aGPU, X_INT8);
aMeInt8GPU = ConvertDataType(*aMeGPU, X_INT8);
bInt8GPU = ConvertDataType(*bGPU, X_INT8);
/* call clip function */
_Clip(&aInt8GPU, &bInt8GPU, -1.0, 1.0);
_ClipMe(&aMeInt8GPU, -1.0, 1.0);
bUserInt8GPU = Clip(aInt8GPU, -1.0, 1.0);
/* convert data type from int8 to float */
_ConvertDataType(&bInt8GPU, bGPU);
_ConvertDataType(&aMeInt8GPU, aMeGPU);
bUserGPU = ConvertDataType(bUserInt8GPU, X_FLOAT);
/* check results */
gpuTest = bGPU->CheckData(answer, aUnitNum, 1e-4F) &&
aMeGPU->CheckData(answer, aUnitNum, 1e-4F) &&
bUserGPU.CheckData(answer, aUnitNum, 1e-4F);
/* destroy variables */
delete aGPU;
delete bGPU;
delete aMeGPU;
delete[] aDimSize;
return cpuTest && gpuTest;
#else
/* destroy variables */
delete[] aDimSize;
return cpuTest;
#endif // USE_CUDA
}
/* other cases */
/*
TODO!!
......@@ -137,6 +384,36 @@ bool TestClip()
else
XPRINT(0, stdout, ">> case 1 passed!\n");
/* case 2 test */
caseFlag = TestClip2();
if (!caseFlag) {
returnFlag = false;
XPRINT(0, stdout, ">> case 2 failed!\n");
}
else
XPRINT(0, stdout, ">> case 2 passed!\n");
/* case 3 test */
caseFlag = TestClip3();
if (!caseFlag) {
returnFlag = false;
XPRINT(0, stdout, ">> case 3 failed!\n");
}
else
XPRINT(0, stdout, ">> case 3 passed!\n");
/* case 4 test */
caseFlag = TestClip4();
if (!caseFlag) {
returnFlag = false;
XPRINT(0, stdout, ">> case 4 failed!\n");
}
else
XPRINT(0, stdout, ">> case 4 passed!\n");
/* other cases test */
/*
TODO!!
......
......@@ -30,7 +30,7 @@ In this case, 2 * (2, 1) -> (2, 2), dim=1.
bool TestConcatenate1()
{
/* create list */
TensorList * sList = new TensorList();
XList * sList = new XList();
/* a source tensor of size (2, 1) */
int sOrder1 = 2;
......@@ -157,7 +157,7 @@ In this case, 2 * (2, 1) -> (4, 1), dim=0.
bool TestConcatenate2()
{
/* create list */
TensorList * sList = new TensorList();
XList * sList = new XList();
/* a source tensor of size (2, 1) */
int sOrder1 = 2;
......@@ -286,7 +286,7 @@ In this case, (2, 1) + (2, 2) -> (2, 3), dim=1.
bool TestConcatenate3()
{
/* create list */
TensorList * sList = new TensorList();
XList * sList = new XList();
/* a source tensor of size (2, 1) */
int sOrder1 = 2;
......
......@@ -31,7 +31,7 @@ In this case, 2 * (2, 1) -> (2, 2), dim=1.
bool TestConcatenateSolely1()
{
/* create list */
TensorList * sList = new TensorList();
XList * sList = new XList();
/* a source tensor of size (2, 1) */
int sOrder1 = 2;
......@@ -154,7 +154,7 @@ In this case, 2 * (2, 1) -> (4, 1), dim=0.
bool TestConcatenateSolely2()
{
/* create list */
TensorList * sList = new TensorList();
XList * sList = new XList();
/* a source tensor of size (2, 1) */
int sOrder1 = 2;
......@@ -279,7 +279,7 @@ In this case, (2, 1) + (2, 2) -> (2, 3), dim=1.
bool TestConcatenateSolely3()
{
/* create list */
TensorList * sList = new TensorList();
XList * sList = new XList();
/* a source tensor of size (2, 1) */
int sOrder1 = 2;
......
......@@ -17,6 +17,7 @@
/*
* $Created by: Xu Chen (email: hello_master1954@163.com) 2018-07-12
* $Update by: Lin Ye (email: linye2015@outlook.com) 2019-07-06 int8 added
*/
#include "TConvertDataType.h"
......@@ -26,7 +27,7 @@ namespace nts { // namespace nts(NiuTrans.Tensor)
/*
case 1: test ConvertDataType function.
In this case, the flaot32 data type is converted to int32 data type.
In this case, the float32 data type is converted to int32 data type.
*/
bool TestConvertDataType1()
......@@ -177,7 +178,7 @@ bool TestConvertDataType2()
/*
case 3: test ConvertDataType function.
In this case, the float data type is converted to float16 data type.
In this case, the float32 data type is converted to float16 data type.
*/
bool TestConvertDataType3()
{
......@@ -290,6 +291,130 @@ bool TestConvertDataType3()
#endif // USE_CUDA
}
/*
case 4: test ConvertDataType function.
In this case, the float32 data type is converted to int8 data type.
*/
bool TestConvertDataType4()
{
/* a tensor of size (3, 2) */
int aOrder = 2;
int * aDimSize = new int[aOrder];
aDimSize[0] = 3;
aDimSize[1] = 2;
int aUnitNum = 1;
for (int i = 0; i < aOrder; i++)
aUnitNum *= aDimSize[i];
DTYPE aData[3][2] = { {1.0F, 2.0F},
{0.5F, 4.0F},
{5.0F, 6.0F} };
int answer[3][2] = { {1, 2},
{0, 4},
{5, 6} };
/* CPU test */
bool cpuTest = true;
#ifdef USE_CUDA
/* GPU test */
bool gpuTest = true;
/* create tensor */
XTensor * aGPU = NewTensor(aOrder, aDimSize, X_FLOAT, 1.0F, 0);
XTensor * bGPU = NewTensor(aOrder, aDimSize, X_INT8, 1.0F, 0);
XTensor * cGPU = NewTensor(aOrder, aDimSize, X_FLOAT, 1.0F, 0);
XTensor * dGPU = NewTensor(aOrder, aDimSize, X_INT, 1.0F, 0);
/* Initialize variables */
aGPU->SetData(aData, aUnitNum);
/* call ConvertDataType function */
_ConvertDataType(aGPU, bGPU);
_ConvertDataType(bGPU, cGPU);
_ConvertDataType(cGPU, dGPU);
/* check results */
gpuTest = dGPU->CheckData(answer, aUnitNum);
/* destroy variables */
delete aGPU;
delete bGPU;
delete cGPU;
delete dGPU;
delete[] aDimSize;
return cpuTest && gpuTest;
#else
/* destroy variables */
delete[] aDimSize;
return cpuTest;
#endif // USE_CUDA
}
/*
case 5: test ConvertDataType function.
In this case, the int data type is converted to int8 data type.
*/
bool TestConvertDataType5()
{
/* a tensor of size (3, 2) */
int aOrder = 2;
int * aDimSize = new int[aOrder];
aDimSize[0] = 3;
aDimSize[1] = 2;
int aUnitNum = 1;
for (int i = 0; i < aOrder; i++)
aUnitNum *= aDimSize[i];
int aData[3][2] = { {1, 2},
{0, 4},
{5, 6} };
int answer[3][2] = { {1, 2},
{0, 4},
{5, 6} };
/* CPU test */
bool cpuTest = true;
#ifdef USE_CUDA
/* GPU test */
bool gpuTest = true;
/* create tensor */
XTensor * aGPU = NewTensor(aOrder, aDimSize, X_INT, 1.0F, 0);
XTensor * bGPU = NewTensor(aOrder, aDimSize, X_INT8, 1.0F, 0);
XTensor * cGPU = NewTensor(aOrder, aDimSize, X_INT, 1.0F, 0);
/* Initialize variables */
aGPU->SetData(aData, aUnitNum);
/* call ConvertDataType function */
_ConvertDataType(aGPU, bGPU);
_ConvertDataType(bGPU, cGPU);
/* check results */
gpuTest = cGPU->CheckData(answer, aUnitNum);
/* destroy variables */
delete aGPU;
delete bGPU;
delete cGPU;
delete[] aDimSize;
return cpuTest && gpuTest;
#else
/* destroy variables */
delete[] aDimSize;
return cpuTest;
#endif // USE_CUDA
}
/* other cases */
/*
TODO!!
......@@ -331,6 +456,26 @@ bool TestConvertDataType()
else
XPRINT(0, stdout, ">> case 3 passed!\n");
/* case 4 test */
caseFlag = TestConvertDataType4();
if (!caseFlag) {
returnFlag = false;
XPRINT(0, stdout, ">> case 4 failed!\n");
}
else
XPRINT(0, stdout, ">> case 4 passed!\n");
/* case 5 test */
caseFlag = TestConvertDataType5();
if (!caseFlag) {
returnFlag = false;
XPRINT(0, stdout, ">> case 5 failed!\n");
}
else
XPRINT(0, stdout, ">> case 5 passed!\n");
/* other cases test */
/*
TODO!!
......
......@@ -21,6 +21,7 @@
#include <math.h>
#include "TCrossEntropy.h"
#include "../loss/CrossEntropy.h"
#include "../core/math/ScaleAndShift.h"
namespace nts { // namespace nts(NiuTrans.Tensor)
......
......@@ -22,8 +22,6 @@
#ifndef __TEST_CROSSENTROPY_H__
#define __TEST_CROSSENTROPY_H__
#include "../loss/CrossEntropy.h"
namespace nts { // namespace nts(NiuTrans.Tensor)
/* test for CrossEntropy Function */
......
......@@ -17,9 +17,11 @@
/*
* $Created by: Xu Chen (email: hello_master1954@163.com) 2018-08-01
* $Update by: Lin Ye (email: linye2015@outlook.com) 2019-07-06 float16 added
*/
#include "TDiv.h"
#include "../core/getandset/ConvertDataType.h"
namespace nts { // namespace nts(NiuTrans.Tensor)
......@@ -148,6 +150,120 @@ bool TestDiv1()
#endif // USE_CUDA
}
/*
case 2: float16 element-wise division of two tensors
c(i) = a(i)/b(i) + \alpha * c(i)
In this case, (2, 2) (2, 2) -> (2, 2), leadingDim=0, alpha=0.
*/
bool TestDiv2()
{
/* a source tensor of size (2, 2) */
int sOrder1 = 2;
int * sDimSize1 = new int[sOrder1];
sDimSize1[0] = 2;
sDimSize1[1] = 2;
int sUnitNum1 = 1;
for (int i = 0; i < sOrder1; i++)
sUnitNum1 *= sDimSize1[i];
/* a source tensor of size (2, 2) */
int sOrder2 = 2;
int * sDimSize2 = new int[sOrder2];
sDimSize2[0] = 2;
sDimSize2[1] = 2;
int sUnitNum2 = 1;
for (int i = 0; i < sOrder2; i++)
sUnitNum2 *= sDimSize2[i];
/* a target tensor of size (2, 2) */
int tOrder = 2;
int * tDimSize = new int[tOrder];
tDimSize[0] = 2;
tDimSize[1] = 2;
int tUnitNum = 1;
for (int i = 0; i < tOrder; i++)
tUnitNum *= tDimSize[i];
DTYPE sData1[2][2] = { {0.0F, 1.0F},
{2.0F, 3.0F} };
DTYPE sData2[2][2] = { {1.0F, 1.0F},
{4.0F, 9.0F} };
DTYPE answer[2][2] = { {0.0F, 1.0F},
{0.5F, 0.3333F} };
/* CPU test */
bool cpuTest = true;
#ifdef USE_CUDA
/* GPU test */
bool gpuTest = true;
/* create tensor */
XTensor * sGPU1 = NewTensor(sOrder1, sDimSize1, X_FLOAT, 1.0F, 0);
XTensor * sGPU2 = NewTensor(sOrder2, sDimSize2, X_FLOAT, 1.0F, 0);
XTensor * tGPU = NewTensor(tOrder, tDimSize, X_FLOAT, 1.0F, 0);
XTensor * tMeGPU = NewTensor(tOrder, tDimSize, X_FLOAT, 1.0F, 0);
XTensor tUserGPU;
/* create float16 tensor */
XTensor sHalfGPU1;
XTensor sHalfGPU2;
XTensor tHalfGPU;
XTensor tMeHalfGPU;
XTensor tUserHalfGPU;
/* Initialize variables */
sGPU1->SetData(sData1, sUnitNum1);
tMeGPU->SetData(sData1, sUnitNum1);
sGPU2->SetData(sData2, sUnitNum2);
tGPU->SetZeroAll();
/* convert data type from float to float16 */
sHalfGPU1 = ConvertDataType(*sGPU1, X_FLOAT16);
sHalfGPU2 = ConvertDataType(*sGPU2, X_FLOAT16);
tHalfGPU = ConvertDataType(*tGPU, X_FLOAT16);
tMeHalfGPU = ConvertDataType(*tMeGPU, X_FLOAT16);
/* call div function */
_Div(&sHalfGPU1, &sHalfGPU2, &tHalfGPU, 0, 0);
_DivMe(&tMeHalfGPU, &sHalfGPU2, 0, 0);
tUserHalfGPU = Div(sHalfGPU1, sHalfGPU2, 0);
/* convert data type from float16 to float */
_ConvertDataType(&tHalfGPU, tGPU);
_ConvertDataType(&tMeHalfGPU, tMeGPU);
tUserGPU = ConvertDataType(tUserHalfGPU, X_FLOAT);
/* check results */
gpuTest = tGPU->CheckData(answer, tUnitNum, 1e-4F) &&
tMeGPU->CheckData(answer, tUnitNum, 1e-4F) &&
tUserGPU.CheckData(answer, tUnitNum, 1e-4F);
/* destroy variables */
delete sGPU1;
delete sGPU2;
delete tGPU;
delete tMeGPU;
delete[] sDimSize1;
delete[] sDimSize2;
delete[] tDimSize;
return cpuTest && gpuTest;
#else
/* destroy variables */
delete[] sDimSize1;
delete[] sDimSize2;
delete[] tDimSize;
return cpuTest;
#endif // USE_CUDA
}
/* other cases */
/*
TODO!!
......@@ -169,6 +285,16 @@ bool TestDiv()
else
XPRINT(0, stdout, ">> case 1 passed!\n");
/* case 2 test */
caseFlag = TestDiv2();
if (!caseFlag) {
returnFlag = false;
XPRINT(0, stdout, ">> case 2 failed!\n");
}
else
XPRINT(0, stdout, ">> case 2 passed!\n");
/* other cases test */
/*
TODO!!
......
......@@ -17,11 +17,13 @@
/*
* $Created by: Xu Chen (email: hello_master1954@163.com) 2018-08-14
* $Update by: Lin Ye (email: linye2015@outlook.com) 2019-07-15 float16 added
*/
#include "TDivDim.h"
#include "../core/arithmetic/DivDim.h"
#include "../XTensor.h"
#include "../core/getandset/ConvertDataType.h"
namespace nts { // namespace nts(NiuTrans.Tensor)
......@@ -251,6 +253,207 @@ bool TestDivDim2()
#endif // USE_CUDA
}
/*
case 3: float16 tensor division c = a/b + \alpha * c
where the size of b is equal to the n-th dimension of a,
i.e., a is divided with b by broadcasting.
In this case, (2, 4) / (2) = (2, 4), n = 0, alpha = 0.0.
*/
bool TestDivDim3()
{
/* a tensor of size (2, 4) */
int aOrder = 2;
int * aDimSize = new int[aOrder];
aDimSize[0] = 2;
aDimSize[1] = 4;
int aUnitNum = 1;
for (int i = 0; i < aOrder; i++)
aUnitNum *= aDimSize[i];
/* a tensor of size (2) */
int bOrder = 1;
int * bDimSize = new int[bOrder];
bDimSize[0] = 2;
int bUnitNum = 1;
for (int i = 0; i < bOrder; i++)
bUnitNum *= bDimSize[i];
DTYPE aData[2][4] = { {0.0F, 1.0F, 2.0F, 3.0F},
{4.0F, 5.0F, 6.0F, 7.0F} };
DTYPE bData[2] = {1.0F, -1.0F};
DTYPE answer[2][4] = { {0.0F, 1.0F, 2.0F, 3.0F},
{-4.0F, -5.0F, -6.0F, -7.0F} };
/* CPU test */
bool cpuTest = true;
#ifdef USE_CUDA
/* GPU test */
bool gpuTest = true;
/* create tensor */
XTensor * aGPU = NewTensor(aOrder, aDimSize, X_FLOAT, 1.0F, 0);
XTensor * bGPU = NewTensor(bOrder, bDimSize, X_FLOAT, 1.0F, 0);
XTensor * cGPU = NewTensor(aOrder, aDimSize, X_FLOAT, 1.0F, 0);
XTensor * cMeGPU = NewTensor(aOrder, aDimSize, X_FLOAT, 1.0F, 0);
XTensor cUserGPU;
/* create float16 tensor */
XTensor aHalfGPU;
XTensor bHalfGPU;
XTensor cHalfGPU;
XTensor cMeHalfGPU;
XTensor cUserHalfGPU;
/* Initialize variables */
aGPU->SetData(aData, aUnitNum);
cMeGPU->SetData(aData, aUnitNum);
bGPU->SetData(bData, bUnitNum);
cGPU->SetZeroAll();
/* convert data type from float to float16 */
aHalfGPU = ConvertDataType(*aGPU, X_FLOAT16);
bHalfGPU = ConvertDataType(*bGPU, X_FLOAT16);
cHalfGPU = ConvertDataType(*cGPU, X_FLOAT16);
cMeHalfGPU = ConvertDataType(*cMeGPU, X_FLOAT16);
/* call sum function */
_DivDim(&aHalfGPU, &bHalfGPU, &cHalfGPU, 0);
_DivDim(&cMeHalfGPU, &bHalfGPU, 0);
cUserHalfGPU = DivDim(aHalfGPU, bHalfGPU, 0);
/* convert data type from float16 to float */
_ConvertDataType(&cHalfGPU, cGPU);
_ConvertDataType(&cMeHalfGPU, cMeGPU);
cUserGPU = ConvertDataType(cUserHalfGPU, X_FLOAT);
/* check results */
gpuTest = cGPU->CheckData(answer, aUnitNum) &&
cMeGPU->CheckData(answer, aUnitNum) &&
cUserGPU.CheckData(answer, aUnitNum);
/* destroy variables */
delete aGPU;
delete bGPU;
delete cGPU;
delete cMeGPU;
delete[] aDimSize;
delete[] bDimSize;
return cpuTest && gpuTest;
#else
/* destroy variables */
delete[] aDimSize;
delete[] bDimSize;
return cpuTest;
#endif // USE_CUDA
}
/*
case 4: float16 tensor division c = a/b + \alpha * c
where the size of b is equal to the n-th dimension of a,
i.e., a is divided with b by broadcasting.
In this case, (2, 4) / (2, 2) = (2, 4), n = 1.
*/
bool TestDivDim4()
{
/* a tensor of size (2, 4) */
int aOrder = 2;
int * aDimSize = new int[aOrder];
aDimSize[0] = 2;
aDimSize[1] = 4;
int aUnitNum = 1;
for (int i = 0; i < aOrder; i++)
aUnitNum *= aDimSize[i];
/* a tensor of size (2, 2) */
int bOrder = 2;
int * bDimSize = new int[bOrder];
bDimSize[0] = 2;
bDimSize[1] = 2;
int bUnitNum = 1;
for (int i = 0; i < bOrder; i++)
bUnitNum *= bDimSize[i];
DTYPE aData[2][4] = { {0.0F, 1.0F, 2.0F, 3.0F},
{4.0F, 5.0F, 6.0F, 7.0F} };
DTYPE bData[2][2] = { {1.0F, -1.0F},
{-1.0F, 1.0F} };
DTYPE answer[2][4] = { {0.0F, -1.0F, -2.0F, 3.0F},
{4.0F, -5.0F, -6.0F, 7.0F} };
/* CPU test */
bool cpuTest = true;
#ifdef USE_CUDA
/* GPU test */
bool gpuTest = true;
/* create tensor */
XTensor * aGPU = NewTensor(aOrder, aDimSize, X_FLOAT, 1.0F, 0);
XTensor * bGPU = NewTensor(bOrder, bDimSize, X_FLOAT, 1.0F, 0);
XTensor * cGPU = NewTensor(aOrder, aDimSize, X_FLOAT, 1.0F, 0);
XTensor * cMeGPU = NewTensor(aOrder, aDimSize, X_FLOAT, 1.0F, 0);
XTensor cUserGPU;
/* create float16 tensor */
XTensor aHalfGPU;
XTensor bHalfGPU;
XTensor cHalfGPU;
XTensor cMeHalfGPU;
XTensor cUserHalfGPU;
/* Initialize variables */
aGPU->SetData(aData, aUnitNum);
cMeGPU->SetData(aData, aUnitNum);
bGPU->SetData(bData, bUnitNum);
cGPU->SetZeroAll();
/* convert data type from float to float16 */
aHalfGPU = ConvertDataType(*aGPU, X_FLOAT16);
bHalfGPU = ConvertDataType(*bGPU, X_FLOAT16);
cHalfGPU = ConvertDataType(*cGPU, X_FLOAT16);
cMeHalfGPU = ConvertDataType(*cMeGPU, X_FLOAT16);
/* call sum function */
_DivDim(&aHalfGPU, &bHalfGPU, &cHalfGPU, 1);
_DivDim(&cMeHalfGPU, &bHalfGPU, 1);
cUserHalfGPU = DivDim(aHalfGPU, bHalfGPU, 1);
/* convert data type from float16 to float */
_ConvertDataType(&cHalfGPU, cGPU);
_ConvertDataType(&cMeHalfGPU, cMeGPU);
cUserGPU = ConvertDataType(cUserHalfGPU, X_FLOAT);
/* check results */
gpuTest = cGPU->CheckData(answer, aUnitNum) &&
cMeGPU->CheckData(answer, aUnitNum) &&
cUserGPU.CheckData(answer, aUnitNum);
/* destroy variables */
delete aGPU;
delete bGPU;
delete cGPU;
delete cMeGPU;
delete[] aDimSize;
delete[] bDimSize;
return cpuTest && gpuTest;
#else
/* destroy variables */
delete[] aDimSize;
delete[] bDimSize;
return cpuTest;
#endif // USE_CUDA
}
/* other cases */
/*
TODO!!
......@@ -280,6 +483,24 @@ bool TestDivDim()
else
XPRINT(0, stdout, ">> case 2 passed!\n");
/* case 3 test */
caseFlag = TestDivDim3();
if (!caseFlag) {
returnFlag = false;
XPRINT(0, stdout, ">> case 3 failed!\n");
}
else
XPRINT(0, stdout, ">> case 3 passed!\n");
/* case 4 test */
caseFlag = TestDivDim4();
if (!caseFlag) {
returnFlag = false;
XPRINT(0, stdout, ">> case 4 failed!\n");
}
else
XPRINT(0, stdout, ">> case 4 passed!\n");
/* other cases test */
/*
TODO!!
......
......@@ -50,7 +50,7 @@ bool TestDropout1()
XTensor yUser;
/* initialize variables */
_SetDataFixedFloat(x, 1.0F);
_SetDataFixed(x, 1.0F);
y->SetZeroAll();
/* call Dropout function */
......@@ -88,7 +88,7 @@ bool TestDropout1()
XTensor yUserGPU;
/* initialize variables */
_SetDataFixedFloat(xGPU, 1.0F);
_SetDataFixed(xGPU, 1.0F);
yGPU->SetZeroAll();
/* call Dropout function */
......@@ -157,10 +157,10 @@ bool TestDropout2()
XTensor * dedy = NewTensor(order, dimSize);
/* initialize variables */
_SetDataFixedFloat(x, 1.0F);
_SetDataFixed(x, 1.0F);
y->SetZeroAll();
dedx->SetZeroAll();
_SetDataFixedFloat(dedy, 1.5F);
_SetDataFixed(dedy, 1.5F);
/* call Dropout function */
float dropProb = 0.5F;
......@@ -183,10 +183,10 @@ bool TestDropout2()
XTensor * dedyGPU = NewTensor(order, dimSize, X_FLOAT, 1.0F, 0);
/* initialize variables */
_SetDataFixedFloat(xGPU, 1.0F);
_SetDataFixed(xGPU, 1.0F);
yGPU->SetZeroAll();
dedxGPU->SetZeroAll();
_SetDataFixedFloat(dedyGPU, 1.5F);
_SetDataFixed(dedyGPU, 1.5F);
/* call Dropout function */
_Dropout(xGPU, yGPU, seed, dropProb);
......
......@@ -154,6 +154,15 @@ bool TestGather2()
for (int i = 0; i < tOrder; i++)
tUnitNum *= tDimSize[i];
/* a index tensor of size (1) */
int indexOrder = 1;
int * indexDimSize = new int[indexOrder];
indexDimSize[0] = 1;
int indexUnitNum = 1;
for (int i = 0; i < indexOrder; i++)
indexUnitNum *= indexDimSize[i];
DTYPE sData[3][2][3] = { { {0.0F, -1.0F, 2.0F},
{2.0F, 1.0F, 3.0F} },
{ {1.0F, 2.0F, 4.0F},
......@@ -174,16 +183,21 @@ bool TestGather2()
/* create tensors */
XTensor * s = NewTensor(sOrder, sDimSize);
XTensor * t = NewTensor(tOrder, tDimSize);
XTensor * index = NewTensor(indexOrder, indexDimSize, X_INT);
XTensor tUser;
/* initialize variables */
s->SetData(sData, sUnitNum);
t->SetZeroAll();
index->SetData(srcIndex, indexSize);
/* call Gather function */
_Gather(s, t, dim, srcIndex, indexSize);
_Gather(s, t, dim, index);
tUser = Gather(*s, dim, *index);
/* check results */
cpuTest = t->CheckData(answer, tUnitNum);
cpuTest = t->CheckData(answer, tUnitNum) &&
tUser.CheckData(answer, tUnitNum);
#ifdef USE_CUDA
/* GPU test */
......@@ -192,33 +206,42 @@ bool TestGather2()
/* create tensors */
XTensor * sGPU = NewTensor(sOrder, sDimSize, X_FLOAT, 1.0F, 0);
XTensor * tGPU = NewTensor(sOrder, tDimSize, X_FLOAT, 1.0F, 0);
XTensor * indexGPU = NewTensor(indexOrder, indexDimSize, X_INT, 1.0F, 0);
XTensor tUserGPU;
/* initialize variables */
sGPU->SetData(sData, sUnitNum);
tGPU->SetZeroAll();
indexGPU->SetData(srcIndex, indexSize);
/* call Gather function */
_Gather(sGPU, tGPU, dim, srcIndex, indexSize);
_Gather(sGPU, tGPU, dim, indexGPU);
tUserGPU = Gather(*sGPU, dim, *indexGPU);
/* check results */
gpuTest = tGPU->CheckData(answer, tUnitNum);
gpuTest = tGPU->CheckData(answer, tUnitNum) &&
tUserGPU.CheckData(answer, tUnitNum);
/* destroy variables */
delete s;
delete t;
delete index;
delete sGPU;
delete tGPU;
delete indexGPU;
delete[] sDimSize;
delete[] tDimSize;
delete[] indexDimSize;
return cpuTest && gpuTest;
#else
/* destroy variables */
delete s;
delete t;
delete index;
delete[] sDimSize;
delete[] tDimSize;
delete[] indexDimSize;
return cpuTest;
#endif // USE_CUDA
......@@ -286,8 +309,8 @@ bool TestGather3()
index->SetData(srcIndex, indexSize);
/* call Gather function */
_Gather(s, t, dim, srcIndex, indexSize);
tUser = Gather(*s, *index);
_Gather(s, t, dim, index);
tUser = Gather(*s, dim, *index);
/* check results */
cpuTest = t->CheckData(answer, tUnitNum) &&
......@@ -309,8 +332,8 @@ bool TestGather3()
indexGPU->SetData(srcIndex, indexSize);
/* call Gather function */
_Gather(sGPU, tGPU, dim, srcIndex, indexSize);
tUserGPU = Gather(*sGPU, *indexGPU);
_Gather(sGPU, tGPU, dim, indexGPU);
tUserGPU = Gather(*sGPU, dim, *indexGPU);
/* check results */
gpuTest = tGPU->CheckData(answer, tUnitNum) &&
......@@ -341,6 +364,111 @@ bool TestGather3()
#endif // USE_CUDA
}
/*
case 4: float16 gather indexed sub-tensors
In this case, (3, 3) -> (2, 3), dim = 0,
srcIndex = [0, 2]
*/
bool TestGather4()
{
/* a input tensor of size (3, 3) */
int sOrder = 2;
int * sDimSize = new int[sOrder];
sDimSize[0] = 3;
sDimSize[1] = 3;
int sUnitNum = 1;
for (int i = 0; i < sOrder; i++)
sUnitNum *= sDimSize[i];
/* a output tensor of size (2, 3) */
int tOrder = 2;
int * tDimSize = new int[tOrder];
tDimSize[0] = 2;
tDimSize[1] = 3;
int tUnitNum = 1;
for (int i = 0; i < tOrder; i++)
tUnitNum *= tDimSize[i];
/* a index tensor of size (2) */
int indexOrder = 1;
int * indexDimSize = new int[indexOrder];
indexDimSize[0] = 2;
int indexUnitNum = 1;
for (int i = 0; i < indexOrder; i++)
indexUnitNum *= indexDimSize[i];
DTYPE sData[3][3] = { {0.0F, -1.0F, 2.0F},
{2.0F, 1.0F, 3.0F},
{1.0F, 2.0F, 4.0F} };
DTYPE answer[2][3] = { {0.0F, -1.0F, 2.0F},
{1.0F, 2.0F, 4.0F} };
int dim = 0;
int indexSize = 2;
int srcIndex[2] = { 0, 2 };
/* CPU test */
bool cpuTest = true;
#ifdef USE_CUDA
/* GPU test */
bool gpuTest = true;
/* create tensors */
XTensor * sGPU = NewTensor(sOrder, sDimSize, X_FLOAT, 1.0F, 0);
XTensor * tGPU = NewTensor(sOrder, tDimSize, X_FLOAT, 1.0F, 0);
XTensor * indexGPU = NewTensor(indexOrder, indexDimSize, X_INT, 1.0F, 0);
XTensor tUserGPU;
/* create float16 tensors */
XTensor sHalfGPU;
XTensor tHalfGPU;
XTensor tUserHalfGPU;
/* initialize variables */
sGPU->SetData(sData, sUnitNum);
tGPU->SetZeroAll();
indexGPU->SetData(srcIndex, indexSize);
/* convert data type from float to float16 */
sHalfGPU = ConvertDataType(*sGPU, X_FLOAT16);
tHalfGPU = ConvertDataType(*tGPU, X_FLOAT16);
/* call gather function */
_Gather(&sHalfGPU, &tHalfGPU, indexGPU);
tUserHalfGPU = Gather(sHalfGPU, *indexGPU);
/* convert data type from float16 to float */
_ConvertDataType(&tHalfGPU, tGPU);
tUserGPU = ConvertDataType(tUserHalfGPU, X_FLOAT);
/* check results */
gpuTest = tGPU->CheckData(answer, tUnitNum) &&
tUserGPU.CheckData(answer, tUnitNum);
/* destroy variables */
delete sGPU;
delete tGPU;
delete indexGPU;
delete[] sDimSize;
delete[] tDimSize;
delete[] indexDimSize;
return cpuTest && gpuTest;
#else
/* destroy variables */;
delete[] sDimSize;
delete[] tDimSize;
delete[] indexDimSize;
return cpuTest;
#endif // USE_CUDA
}
/* other cases */
/*
TODO!!
......@@ -370,7 +498,7 @@ bool TestGather()
else
XPRINT(0, stdout, ">> case 2 passed!\n");
/* case 2 test */
/* case 3 test */
caseFlag = TestGather3();
if (!caseFlag) {
returnFlag = false;
......@@ -379,6 +507,15 @@ bool TestGather()
else
XPRINT(0, stdout, ">> case 3 passed!\n");
/* case 4 test */
caseFlag = TestGather4();
if (!caseFlag) {
returnFlag = false;
XPRINT(0, stdout, ">> case 4 failed!\n");
}
else
XPRINT(0, stdout, ">> case 4 passed!\n");
/* other cases test */
/*
TODO!!
......
......@@ -23,6 +23,7 @@
#define __TEST_GATHER_H__
#include "../core/movement/Gather.h"
#include "../core/getandset/ConvertDataType.h"
namespace nts { // namespace nts(NiuTrans.Tensor)
......
......@@ -17,10 +17,12 @@
/*
* $Created by: Lin Ye (email: linye2015@outlook.com) 2018-06-20
* $Update by: Lin Ye (email: linye2015@outlook.com) 2019-07-15 float16 added
*/
#include "../XTensor.h"
#include "THardTanH.h"
#include "../core/getandset/ConvertDataType.h"
namespace nts { // namespace nts(NiuTrans.Tensor)
......@@ -222,6 +224,182 @@ bool TestHardTanH2()
#endif // USE_CUDA
}
/*
case 3: float16 test HardTanH function.
y = 1 if x > 1
x if -1 <= x <= 1
-1 if x < -1
*/
bool TestHardTanH3()
{
/* a tensor of size (2, 3) */
int order = 2;
int * dimSize = new int[order];
dimSize[0] = 2;
dimSize[1] = 3;
int unitNum = 1;
for (int i = 0; i < order; i++)
unitNum *= dimSize[i];
DTYPE xData[2][3] = { {0.5F, -1.0F, 2.0F},
{3.5F, -4.5F, 1.0F} };
DTYPE answer[2][3] = { {0.5F, -1.0F, 1.0F},
{1.0F, -1.0F, 1.0F} };
/* CPU test */
bool cpuTest = true;
#ifdef USE_CUDA
/* GPU test */
bool gpuTest = true;
/* create tensor */
XTensor * xGPU = NewTensor(order, dimSize, X_FLOAT, 1.0F, 0);
XTensor * yGPU = NewTensor(order, dimSize, X_FLOAT, 1.0F, 0);
XTensor yUserGPU;
/* create float16 tensor */
XTensor xHalfGPU;
XTensor yHalfGPU;
XTensor yUserHalfGPU;
/* Initialize variables */
xGPU->SetData(xData, unitNum);
yGPU->SetZeroAll();
/* convert data type from float to float16 */
xHalfGPU = ConvertDataType(*xGPU, X_FLOAT16);
yHalfGPU = ConvertDataType(*yGPU, X_FLOAT16);
/* call hardtanh function */
_HardTanH(&xHalfGPU, &yHalfGPU);
yUserHalfGPU = HardTanH(xHalfGPU);
/* convert data type from float16 to float */
_ConvertDataType(&yHalfGPU, yGPU);
yUserGPU = ConvertDataType(yUserHalfGPU, X_FLOAT);
/* check results */
gpuTest = yGPU->CheckData(answer, unitNum, 1e-4F) && yUserGPU.CheckData(answer, unitNum, 1e-4F);
/* destroy variables */
delete xGPU;
delete yGPU;
delete[] dimSize;
return cpuTest && gpuTest;
#else
/* destroy variables */
delete[] dimSize;
return cpuTest;
#endif // USE_CUDA
}
/*
case 4: float16 test backward computation of HardTanH function.
dE/dx = dE/dy * dy/dx
hard tanh: y = 1 if x > 1
x if -1 <= x <= 1
-1 if x< -1
and dy/dx = 1 if -1 <= x <= 1
0 otherwise
In this case, lossName=SQUAREDERROR.
*/
bool TestHardTanH4()
{
/* a tensor of size (2, 3) */
int order = 2;
int * dimSize = new int[order];
dimSize[0] = 2;
dimSize[1] = 3;
int unitNum = 1;
for (int i = 0; i < order; i++)
unitNum *= dimSize[i];
DTYPE xData[2][3] = { {0.5F, -1.0F, 2.0F},
{3.5F, -4.5F, 1.0F} };
DTYPE goldData[2][3] = { {1.0F, 1.0F, 1.0F},
{1.0F, 1.0F, 1.0F} };
DTYPE yAnswer[2][3] = { {0.5F, -1.0F, 1.0F},
{1.0F, -1.0F, 1.0F} };
DTYPE dedyAnswer[2][3] = { {-0.5F, -2.0F, 0.0F},
{0.0F, -2.0F, 0.0F} };
DTYPE dedxAnswer[2][3] = { {-0.5F, -2.0F, 0.0F},
{0.0F, 0.0F, -0.0F} };
/* CPU test */
bool cpuTest = true;
#ifdef USE_CUDA
/* GPU test */
bool gpuTest = true;
/* create tensors */
XTensor * xGPU = NewTensor(order, dimSize, X_FLOAT, 1.0F, 0);
XTensor * yGPU = NewTensor(order, dimSize, X_FLOAT, 1.0F, 0);
XTensor * goldGPU = NewTensor(order, dimSize, X_FLOAT, 1.0F, 0);
XTensor * dedyGPU = NewTensor(order, dimSize, X_FLOAT, 1.0F, 0);
XTensor * dedxGPU = NewTensor(order, dimSize, X_FLOAT, 1.0F, 0);
/* create float16 tensors */
XTensor xHalfGPU;
XTensor yHalfGPU;
XTensor goldHalfGPU;
XTensor dedyHalfGPU;
XTensor dedxHalfGPU;
/* initialize variables */
xGPU->SetData(xData, unitNum);
goldGPU->SetData(goldData, unitNum);
yGPU->SetZeroAll();
dedyGPU->SetZeroAll();
dedxGPU->SetZeroAll();
/* convert data type from float to float16 */
xHalfGPU = ConvertDataType(*xGPU, X_FLOAT16);
yHalfGPU = ConvertDataType(*yGPU, X_FLOAT16);
goldHalfGPU = ConvertDataType(*goldGPU, X_FLOAT16);
dedyHalfGPU = ConvertDataType(*dedyGPU, X_FLOAT16);
dedxHalfGPU = ConvertDataType(*dedxGPU, X_FLOAT16);
/* call hardtanh function */
_HardTanH(&xHalfGPU, &yHalfGPU);
/* call hardtanhbackward function */
_HardTanHBackward(&goldHalfGPU, &yHalfGPU, &xHalfGPU, &dedyHalfGPU, &dedxHalfGPU, SQUAREDERROR);
/* convert data type from float16 to float */
_ConvertDataType(&yHalfGPU, yGPU);
_ConvertDataType(&dedyHalfGPU, dedyGPU);
_ConvertDataType(&dedxHalfGPU, dedxGPU);
/* check results */
gpuTest = yGPU->CheckData(yAnswer, unitNum, 1e-4F) &&
dedxGPU->CheckData(dedxAnswer, unitNum, 1e-4F) &&
dedyGPU->CheckData(dedyAnswer, unitNum, 1e-4F);
/* destroy variables */
delete xGPU;
delete yGPU;
delete goldGPU;
delete dedxGPU;
delete dedyGPU;
delete[] dimSize;
return cpuTest && gpuTest;
#else
/* destroy variables */
delete[] dimSize;
return cpuTest;
#endif // USE_CUDA
}
/* other cases */
/*
TODO!!
......@@ -253,6 +431,26 @@ bool TestHardTanH()
else
XPRINT(0, stdout, ">> case 2 passed!\n");
/* case 3 test */
caseFlag = TestHardTanH3();
if (!caseFlag) {
returnFlag = false;
XPRINT(0, stdout, ">> case 3 failed!\n");
}
else
XPRINT(0, stdout, ">> case 3 passed!\n");
/* case 4 test */
caseFlag = TestHardTanH4();
if (!caseFlag) {
returnFlag = false;
XPRINT(0, stdout, ">> case 4 failed!\n");
}
else
XPRINT(0, stdout, ">> case 4 passed!\n");
/* other cases test */
/*
TODO!!
......
......@@ -17,10 +17,12 @@
/*
* $Created by: Xu Chen (email: hello_master1954@163.com) 2018-07-02
* $Update by: Lin Ye (email: linye2015@outlook.com) 2019-07-12 float16 added
*/
#include "../XUtility.h"
#include "TLogSoftmax.h"
#include "../core/getandset/ConvertDataType.h"
namespace nts { // namespace nts(NiuTrans.Tensor)
......@@ -311,6 +313,256 @@ bool TestLogSoftmax3()
#endif // USE_CUDA
}
/*
case 4: float16 test LogSoftmax function.
LogSoftmax function: y = log(e^x / \sum_{i} e^{x_i})
*/
bool TestLogSoftmax4()
{
/* a tensor of size (2, 3) */
int order = 2;
int * dimSize = new int[order];
dimSize[0] = 2;
dimSize[1] = 3;
int unitNum = 1;
for (int i = 0; i < order; i++)
unitNum *= dimSize[i];
DTYPE xData[2][3] = { {0.0F, 1.0F, 2.0F},
{0.5F, 0.7F, 1.4F} };
DTYPE answer[2][3] = { {-2.4076F, -1.4076F, -0.4076F},
{-1.5435F, -1.3435F, -0.6435F} };
/* CPU test */
bool cpuTest = true;
#ifdef USE_CUDA
/* GPU test */
bool gpuTest = true;
/* create tensors */
XTensor * xGPU = NewTensor(order, dimSize, X_FLOAT, 1.0F, 0);
XTensor * yGPU = NewTensor(order, dimSize, X_FLOAT, 1.0F, 0);
XTensor yUserGPU;
/* create float16 tensors */
XTensor xHalfGPU;
XTensor yHalfGPU;
XTensor yUserHalfGPU;
/* initialize variables */
xGPU->SetData(xData, unitNum);
yGPU->SetZeroAll();
/* convert data type from float to float16 */
xHalfGPU = ConvertDataType(*xGPU, X_FLOAT16);
yHalfGPU = ConvertDataType(*yGPU, X_FLOAT16);
/* call logsoftmax function */
_LogSoftmax(&xHalfGPU, &yHalfGPU, 1);
yUserHalfGPU = LogSoftmax(xHalfGPU, 1);
/* convert data type from float16 to float */
_ConvertDataType(&yHalfGPU, yGPU);
yUserGPU = ConvertDataType(yUserHalfGPU, X_FLOAT);
/* check result */
gpuTest = yGPU->CheckData(answer, unitNum, 1e-2F) &&
yUserGPU.CheckData(answer, unitNum, 1e-2F);
/* destroy variables */
delete xGPU;
delete yGPU;
delete[] dimSize;
return cpuTest && gpuTest;
#else
/* destroy variables */
delete[] dimSize;
return cpuTest;
#endif // USE_CUDA
}
/*
case 5: float16 test LogSoftmaxBackward function.
dE/dx = dE/dy * dy/dx
log softmax: y_i = log(e^{x_i} / \sum_{k} e^{x_k})
In this case, LossName=CROSSENTROPY.
*/
bool TestLogSoftmax5()
{
/* a tensor of size (1, 3) */
int order = 2;
int * dimSize = new int[order];
dimSize[0] = 1;
dimSize[1] = 3;
int unitNum = 1;
for (int i = 0; i < order; i++)
unitNum *= dimSize[i];
DTYPE xData[1][3] = {0.0F, 1.0F, 2.0F};
DTYPE gData[1][3] = {0.5F, 0.8F, 1.5F};
DTYPE yAnswer[1][3] = {-2.4076F, -1.4076F, -0.4076F};
DTYPE dedxAnswer[1][3] = {-0.4100F, -0.5553F, -0.8348F};
/* CPU test */
bool cpuTest = true;
#ifdef USE_CUDA
/* GPU test */
bool gpuTest = true;
/* create tensors */
XTensor * xGPU = NewTensor(order, dimSize, X_FLOAT, 1.0F, 0);
XTensor * yGPU = NewTensor(order, dimSize, X_FLOAT, 1.0F, 0);
XTensor * gGPU = NewTensor(order, dimSize, X_FLOAT, 1.0F, 0);
XTensor * dedyGPU = NewTensor(order, dimSize, X_FLOAT, 1.0F, 0);
XTensor * dedxGPU = NewTensor(order, dimSize, X_FLOAT, 1.0F, 0);
/* create float16 tensors */
XTensor xHalfGPU;
XTensor yHalfGPU;
XTensor gHalfGPU;
XTensor dedyHalfGPU;
XTensor dedxHalfGPU;
/* initialize variables */
xGPU->SetData(xData, unitNum);
gGPU->SetData(gData, unitNum);
yGPU->SetZeroAll();
dedxGPU->SetZeroAll();
dedyGPU->SetZeroAll();
/* convert data type from float to float16 */
xHalfGPU = ConvertDataType(*xGPU, X_FLOAT16);
yHalfGPU = ConvertDataType(*yGPU, X_FLOAT16);
gHalfGPU = ConvertDataType(*gGPU, X_FLOAT16);
dedyHalfGPU = ConvertDataType(*dedyGPU, X_FLOAT16);
dedxHalfGPU = ConvertDataType(*dedxGPU, X_FLOAT16);
/* call logsoftmax function */
_LogSoftmax(&xHalfGPU, &yHalfGPU, 1);
/* call logsoftmaxbackward function */
_LogSoftmaxBackward(&gHalfGPU, &yHalfGPU, &xHalfGPU, &dedyHalfGPU, &dedxHalfGPU, NULL, 1, CROSSENTROPY);
/* convert data type from float16 to float */
_ConvertDataType(&yHalfGPU, yGPU);
_ConvertDataType(&dedxHalfGPU, dedxGPU);
/* check result */
gpuTest = yGPU->CheckData(yAnswer, unitNum, 1e-2F) &&
dedxGPU->CheckData(dedxAnswer, unitNum, 1e-2F);
/* destroy variables */
delete xGPU;
delete yGPU;
delete gGPU;
delete dedxGPU;
delete dedyGPU;
delete[] dimSize;
return cpuTest && gpuTest;
#else
/* destroy variables */
delete[] dimSize;
return cpuTest;
#endif // USE_CUDA
}
/*
case 6: float16 test LogSoftmaxBackward function.
dE/dx = dE/dy * dy/dx
log softmax: y_i = log(e^{x_i} / \sum_{k} e^{x_k})
In this case, LossName=SQUAREDERROR
*/
bool TestLogSoftmax6()
{
/* a tensor of size (1, 3) */
int order = 2;
int * dimSize = new int[order];
dimSize[0] = 1;
dimSize[1] = 3;
int unitNum = 1;
for (int i = 0; i < order; i++)
unitNum *= dimSize[i];
DTYPE xData[1][3] = {0.0F, 1.0F, 2.0F};
DTYPE gData[1][3] = {0.5F, 0.8F, 1.5F};
DTYPE yAnswer[1][3] = {-2.4076F, -1.4076F, -0.4076F};
DTYPE dedxAnswer[1][3] = {-0.4100F, -0.5553F, -0.8348F};
/* CPU test */
bool cpuTest = true;
#ifdef USE_CUDA
/* GPU test */
bool gpuTest = true;
/* create tensors */
XTensor * xGPU = NewTensor(order, dimSize, X_FLOAT, 1.0F, 0);
XTensor * yGPU = NewTensor(order, dimSize, X_FLOAT, 1.0F, 0);
XTensor * gGPU = NewTensor(order, dimSize, X_FLOAT, 1.0F, 0);
XTensor * dedyGPU = NewTensor(order, dimSize, X_FLOAT, 1.0F, 0);
XTensor * dedxGPU = NewTensor(order, dimSize, X_FLOAT, 1.0F, 0);
/* create float16 tensors */
XTensor xHalfGPU;
XTensor yHalfGPU;
XTensor gHalfGPU;
XTensor dedyHalfGPU;
XTensor dedxHalfGPU;
/* initialize variables */
xGPU->SetData(xData, unitNum);
gGPU->SetData(gData, unitNum);
yGPU->SetZeroAll();
dedxGPU->SetZeroAll();
dedyGPU->SetZeroAll();
/* convert data type from float to float16 */
xHalfGPU = ConvertDataType(*xGPU, X_FLOAT16);
yHalfGPU = ConvertDataType(*yGPU, X_FLOAT16);
gHalfGPU = ConvertDataType(*gGPU, X_FLOAT16);
dedyHalfGPU = ConvertDataType(*dedyGPU, X_FLOAT16);
dedxHalfGPU = ConvertDataType(*dedxGPU, X_FLOAT16);
/* call logsoftmax function */
_LogSoftmax(&xHalfGPU, &yHalfGPU, 1);
/* call logsoftmaxbackward function */
_LogSoftmaxBackward(&gHalfGPU, &yHalfGPU, &xHalfGPU, &dedyHalfGPU, &dedxHalfGPU, NULL, 1, SQUAREDERROR);
/* convert data type from float16 to float */
_ConvertDataType(&yHalfGPU, yGPU);
_ConvertDataType(&dedxHalfGPU, dedxGPU);
/* check result */
gpuTest = yGPU->CheckData(yAnswer, unitNum, 1e-2F) &&
dedxGPU->CheckData(dedxAnswer, unitNum, 1e-2F);
/* destroy variables */
delete xGPU;
delete yGPU;
delete gGPU;
delete dedxGPU;
delete dedyGPU;
delete[] dimSize;
return cpuTest && gpuTest;
#else
/* destroy variables */
delete[] dimSize;
return cpuTest;
#endif // USE_CUDA
}
/* other cases */
/*
TODO!!
......@@ -352,6 +604,36 @@ bool TestLogSoftmax()
else
XPRINT(0, stdout, ">> case 3 passed!\n");
/* case 4 test */
caseFlag = TestLogSoftmax4();
if (!caseFlag) {
returnFlag = false;
XPRINT(0, stdout, ">> case 4 failed!\n");
}
else
XPRINT(0, stdout, ">> case 4 passed!\n");
/* case 5 test */
caseFlag = TestLogSoftmax5();
if (!caseFlag) {
returnFlag = false;
XPRINT(0, stdout, ">> case 5 failed!\n");
}
else
XPRINT(0, stdout, ">> case 5 passed!\n");
/* case 6 test */
caseFlag = TestLogSoftmax6();
if (!caseFlag) {
returnFlag = false;
XPRINT(0, stdout, ">> case 6 failed!\n");
}
else
XPRINT(0, stdout, ">> case 6 passed!\n");
/* other cases test */
/*
TODO!!
......
......@@ -17,6 +17,7 @@
/*
* $Created by: Xu Chen (email: hello_master1954@163.com) 2018-06-14
* $Update by: Lin Ye (email: linye2015@outlook.com) 2019-07-07 float16/int8 added
*/
#include "TMatrixMul.h"
......@@ -507,6 +508,304 @@ bool TestMatrixMul4()
#endif // USE_CUDA
}
/*
case 5: float16 matrix multiplication.
In this case, float16 a=(2, 3), float16 b=(3, 2) -> float16 c=(2, 2),
transposedA=X_NOTRANS, transposedB=X_NOTRANS.
*/
bool TestMatrixMul5()
{
/* a source tensor of size (2, 3) */
int sOrder1 = 2;
int * sDimSize1 = new int[sOrder1];
sDimSize1[0] = 2;
sDimSize1[1] = 3;
int sUnitNum1 = 1;
for (int i = 0; i < sOrder1; i++)
sUnitNum1 *= sDimSize1[i];
/* a source tensor of size (3, 2) */
int sOrder2 = 2;
int * sDimSize2 = new int[sOrder2];
sDimSize2[0] = 3;
sDimSize2[1] = 2;
int sUnitNum2 = 1;
for (int i = 0; i < sOrder2; i++)
sUnitNum2 *= sDimSize2[i];
/* a target tensor of size (2, 2) */
int tOrder = 2;
int * tDimSize = new int[tOrder];
tDimSize[0] = 2;
tDimSize[1] = 2;
int tUnitNum = 1;
for (int i = 0; i < tOrder; i++)
tUnitNum *= tDimSize[i];
DTYPE sData1[2][3] = { {1.0F, 2.0F, 3.0F},
{-4.0F, 5.0F, 6.0F} };
DTYPE sData2[3][2] = { {0.0F, -1.0F},
{1.0F, 2.0F},
{2.0F, 1.0F} };
DTYPE answer[2][2] = { {8.0F, 6.0F},
{17.0F, 20.0F} };
/* CPU test */
bool cpuTest = true;
#ifdef USE_CUDA
/* GPU test */
bool gpuTest = true;
/* create tensor */
XTensor * sGPU1 = NewTensor(sOrder1, sDimSize1, X_FLOAT, 1.0F, 0);
XTensor * sGPU2 = NewTensor(sOrder2, sDimSize2, X_FLOAT, 1.0F, 0);
XTensor * tGPU = NewTensor(tOrder, tDimSize, X_FLOAT, 1.0F, 0);
XTensor tUserGPU;
/* create float16 tensors */
XTensor halfSGPU1;
XTensor halfSGPU2;
XTensor halfTGPU;
XTensor halfTUserGPU;
/* Initialize variables */
sGPU1->SetData(sData1, sUnitNum1);
sGPU2->SetData(sData2, sUnitNum2);
tGPU->SetZeroAll();
/* convert data type from float to float16 */
halfSGPU1 = ConvertDataType(*sGPU1, X_FLOAT16);
halfSGPU2 = ConvertDataType(*sGPU2, X_FLOAT16);
halfTGPU = ConvertDataType(*tGPU, X_FLOAT16);
/* call MatrixMul function */
_MatrixMul(&halfSGPU1, X_NOTRANS, &halfSGPU2, X_NOTRANS, &halfTGPU);
halfTUserGPU = MatrixMul(halfSGPU1, X_NOTRANS, halfSGPU2, X_NOTRANS);
/* convert data type from float16 to float */
_ConvertDataType(&halfTGPU, tGPU);
tUserGPU = ConvertDataType(halfTUserGPU, X_FLOAT);
/* check results */
gpuTest = tGPU->CheckData(answer, tUnitNum) && tUserGPU.CheckData(answer, tUnitNum);
/* destroy variables */
delete sGPU1;
delete sGPU2;
delete tGPU;
delete[] sDimSize1;
delete[] sDimSize2;
delete[] tDimSize;
return cpuTest && gpuTest;
#else
/* destroy variables */
delete[] sDimSize1;
delete[] sDimSize2;
delete[] tDimSize;
return cpuTest;
#endif // USE_CUDA
}
/*
case 6: float16 matrix multiplication.
In this case, float16 a=(2, 3), float16 b=(3, 2) -> float32 c=(2, 2),
transposedA=X_NOTRANS, transposedB=X_NOTRANS.
*/
bool TestMatrixMul6()
{
/* a source tensor of size (2, 3) */
int sOrder1 = 2;
int * sDimSize1 = new int[sOrder1];
sDimSize1[0] = 2;
sDimSize1[1] = 3;
int sUnitNum1 = 1;
for (int i = 0; i < sOrder1; i++)
sUnitNum1 *= sDimSize1[i];
/* a source tensor of size (3, 2) */
int sOrder2 = 2;
int * sDimSize2 = new int[sOrder2];
sDimSize2[0] = 3;
sDimSize2[1] = 2;
int sUnitNum2 = 1;
for (int i = 0; i < sOrder2; i++)
sUnitNum2 *= sDimSize2[i];
/* a target tensor of size (2, 2) */
int tOrder = 2;
int * tDimSize = new int[tOrder];
tDimSize[0] = 2;
tDimSize[1] = 2;
int tUnitNum = 1;
for (int i = 0; i < tOrder; i++)
tUnitNum *= tDimSize[i];
DTYPE sData1[2][3] = { {1.0F, 2.0F, 3.0F},
{-4.0F, 5.0F, 6.0F} };
DTYPE sData2[3][2] = { {0.0F, -1.0F},
{1.0F, 2.0F},
{2.0F, 1.0F} };
DTYPE answer[2][2] = { {8.0F, 6.0F},
{17.0F, 20.0F} };
/* CPU test */
bool cpuTest = true;
#ifdef USE_CUDA
/* GPU test */
bool gpuTest = true;
/* create tensor */
XTensor * sGPU1 = NewTensor(sOrder1, sDimSize1, X_FLOAT, 1.0F, 0);
XTensor * sGPU2 = NewTensor(sOrder2, sDimSize2, X_FLOAT, 1.0F, 0);
XTensor * tGPU = NewTensor(tOrder, tDimSize, X_FLOAT, 1.0F, 0);
XTensor tUserGPU;
/* create float16 tensors */
XTensor halfSGPU1;
XTensor halfSGPU2;
/* Initialize variables */
sGPU1->SetData(sData1, sUnitNum1);
sGPU2->SetData(sData2, sUnitNum2);
tGPU->SetZeroAll();
/* convert data type from float to float16 */
halfSGPU1 = ConvertDataType(*sGPU1, X_FLOAT16);
halfSGPU2 = ConvertDataType(*sGPU2, X_FLOAT16);
/* call MatrixMul function */
_MatrixMul(&halfSGPU1, X_NOTRANS, &halfSGPU2, X_NOTRANS, tGPU);
tUserGPU = MatrixMul(halfSGPU1, X_NOTRANS, halfSGPU2, X_NOTRANS, X_FLOAT);
/* check results */
gpuTest = tGPU->CheckData(answer, tUnitNum) && tUserGPU.CheckData(answer, tUnitNum);
/* destroy variables */
delete sGPU1;
delete sGPU2;
delete tGPU;
delete[] sDimSize1;
delete[] sDimSize2;
delete[] tDimSize;
return cpuTest && gpuTest;
#else
/* destroy variables */
delete[] sDimSize1;
delete[] sDimSize2;
delete[] tDimSize;
return cpuTest;
#endif // USE_CUDA
}
/*
case 7: int8 matrix multiplication.
In this case, int8 a=(2, 3), int8 b=(3, 2) -> float32 c=(2, 2),
transposedA=X_NOTRANS, transposedB=X_NOTRANS.
*/
bool TestMatrixMul7()
{
/* a source tensor of size (2, 3) */
int sOrder1 = 2;
int * sDimSize1 = new int[sOrder1];
sDimSize1[0] = 2;
sDimSize1[1] = 3;
int sUnitNum1 = 1;
for (int i = 0; i < sOrder1; i++)
sUnitNum1 *= sDimSize1[i];
/* a source tensor of size (3, 2) */
int sOrder2 = 2;
int * sDimSize2 = new int[sOrder2];
sDimSize2[0] = 3;
sDimSize2[1] = 2;
int sUnitNum2 = 1;
for (int i = 0; i < sOrder2; i++)
sUnitNum2 *= sDimSize2[i];
/* a target tensor of size (2, 2) */
int tOrder = 2;
int * tDimSize = new int[tOrder];
tDimSize[0] = 2;
tDimSize[1] = 2;
int tUnitNum = 1;
for (int i = 0; i < tOrder; i++)
tUnitNum *= tDimSize[i];
DTYPE sData1[2][3] = { {1, 2, 3},
{-4, 5, 6} };
DTYPE sData2[3][2] = { {0, -1},
{1, 2},
{2, 1} };
DTYPE answer[2][2] = { {8, 6},
{17, 20} };
/* CPU test */
bool cpuTest = true;
#ifdef USE_CUDA
/* GPU test */
bool gpuTest = true;
/* create tensor */
XTensor * sGPU1 = NewTensor(sOrder1, sDimSize1, X_FLOAT, 1.0F, 0);
XTensor * sGPU2 = NewTensor(sOrder2, sDimSize2, X_FLOAT, 1.0F, 0);
XTensor * tGPU = NewTensor(tOrder, tDimSize, X_FLOAT, 1.0F, 0);
XTensor tUserGPU;
/* create int8 tensors */
XTensor int8SGPU1;
XTensor int8SGPU2;
/* Initialize variables */
sGPU1->SetData(sData1, sUnitNum1);
sGPU2->SetData(sData2, sUnitNum2);
tGPU->SetZeroAll();
/* convert data type from float to int8 */
int8SGPU1 = ConvertDataType(*sGPU1, X_INT8);
int8SGPU2 = ConvertDataType(*sGPU2, X_INT8);
/* call MatrixMul function */
_MatrixMul(&int8SGPU1, X_NOTRANS, &int8SGPU2, X_NOTRANS, tGPU);
tUserGPU = MatrixMul(int8SGPU1, X_NOTRANS, int8SGPU2, X_NOTRANS, X_FLOAT);
/* check results */
gpuTest = tGPU->CheckData(answer, tUnitNum) && tUserGPU.CheckData(answer, tUnitNum);
/* destroy variables */
delete sGPU1;
delete sGPU2;
delete tGPU;
delete[] sDimSize1;
delete[] sDimSize2;
delete[] tDimSize;
return cpuTest && gpuTest;
#else
/* destroy variables */
delete[] sDimSize1;
delete[] sDimSize2;
delete[] tDimSize;
return cpuTest;
#endif // USE_CUDA
}
/* other cases */
/*
......@@ -556,6 +855,33 @@ bool TestMatrixMul()
else
XPRINT(0, stdout, ">> case 4 passed!\n");
/* case 5 test */
caseFlag = TestMatrixMul5();
if (!caseFlag) {
returnFlag = false;
XPRINT(0, stdout, ">> case 5 failed!\n");
}
else
XPRINT(0, stdout, ">> case 5 passed!\n");
/* case 6 test */
caseFlag = TestMatrixMul6();
if (!caseFlag) {
returnFlag = false;
XPRINT(0, stdout, ">> case 6 failed!\n");
}
else
XPRINT(0, stdout, ">> case 6 passed!\n");
/* case 7 test */
caseFlag = TestMatrixMul7();
if (!caseFlag) {
returnFlag = false;
XPRINT(0, stdout, ">> case 7 failed!\n");
}
else
XPRINT(0, stdout, ">> case 7 passed!\n");
/* other cases test */
/*
TODO!!
......
......@@ -23,6 +23,7 @@
#define __TEST_MATRIXMUL_H__
#include "../core/arithmetic/MatrixMul.h"
#include "../core/getandset/ConvertDataType.h"
namespace nts { // namespace nts(NiuTrans.Tensor)
......
......@@ -246,7 +246,7 @@ In this case, 2 * (2, 4) -> (4, 4), whereToMerge=0.
bool TestMerge3()
{
/* create list */
TensorList * smallList = new TensorList();
XList * smallList = new XList();
/* a small tensor of size (2, 4) */
int sOrder = 2;
......@@ -364,7 +364,7 @@ In this case, 2 * (2, 4) -> (2, 8), whereToMerge=1.
bool TestMerge4()
{
/* create list */
TensorList * smallList = new TensorList();
XList * smallList = new XList();
/* a small tensor of size (2, 4) */
int sOrder = 2;
......
......@@ -17,11 +17,13 @@
/*
* $Created by: Xu Chen (email: hello_master1954@163.com) 2018-07-30
* $Update by: Lin Ye (email: linye2015@outlook.com) 2019-07-12 float16/int/int8 added
*/
#include "TMultiplyDim.h"
#include "../core/arithmetic/MultiplyDim.h"
#include "../XTensor.h"
#include "../core/getandset/ConvertDataType.h"
namespace nts { // namespace nts(NiuTrans.Tensor)
/*
......@@ -248,6 +250,205 @@ bool TestMultiplyDim2()
#endif // USE_CUDA
}
/*
case 3: float16 tensor multiplication c = a * b + \alpha * c
where the size of b is equal to the n-th dimension of a,
i.e., a is multiplied with b by broadcasting
In this case, (2, 4) * (2) = (2, 4), n = 0.
*/
bool TestMultiplyDim3()
{
/* a tensor of size (2, 4) */
int aOrder = 2;
int * aDimSize = new int[aOrder];
aDimSize[0] = 2;
aDimSize[1] = 4;
int aUnitNum = 1;
for (int i = 0; i < aOrder; i++)
aUnitNum *= aDimSize[i];
/* a tensor of size (2) */
int bOrder = 1;
int * bDimSize = new int[bOrder];
bDimSize[0] = 2;
int bUnitNum = 1;
for (int i = 0; i < bOrder; i++)
bUnitNum *= bDimSize[i];
DTYPE aData[2][4] = { {0.0F, 1.0F, 2.0F, 3.0F},
{4.0F, 5.0F, 6.0F, 7.0F} };
DTYPE bData[2] = {1.0F, -1.0F};
DTYPE answer[2][4] = { {0.0F, 1.0F, 2.0F, 3.0F},
{-4.0F, -5.0F, -6.0F, -7.0F} };
/* CPU test */
bool cpuTest = true;
#ifdef USE_CUDA
/* GPU test */
bool gpuTest = true;
/* create tensor */
XTensor * aGPU = NewTensor(aOrder, aDimSize, X_FLOAT, 1.0F, 0);
XTensor * bGPU = NewTensor(bOrder, bDimSize, X_FLOAT, 1.0F, 0);
XTensor * cGPU = NewTensor(aOrder, aDimSize, X_FLOAT, 1.0F, 0);
XTensor * cMeGPU = NewTensor(aOrder, aDimSize, X_FLOAT, 1.0F, 0);
XTensor cUserGPU;
/* create float16 tensor */
XTensor aHalfGPU;
XTensor bHalfGPU;
XTensor cHalfGPU;
XTensor cMeHalfGPU;
XTensor cUserHalfGPU;
/* Initialize variables */
aGPU->SetData(aData, aUnitNum);
cMeGPU->SetData(aData, aUnitNum);
bGPU->SetData(bData, bUnitNum);
cGPU->SetZeroAll();
/* convert data type from float to float16 */
aHalfGPU = ConvertDataType(*aGPU, X_FLOAT16);
bHalfGPU = ConvertDataType(*bGPU, X_FLOAT16);
cHalfGPU = ConvertDataType(*cGPU, X_FLOAT16);
cMeHalfGPU = ConvertDataType(*cMeGPU, X_FLOAT16);
/* call multiplydim function */
_MultiplyDim(&aHalfGPU, &bHalfGPU, &cHalfGPU, 0);
_MultiplyDimMe(&cMeHalfGPU, &bHalfGPU, 0);
cUserHalfGPU = MultiplyDim(aHalfGPU, bHalfGPU, 0);
/* convert data type from float16 to float */
_ConvertDataType(&cHalfGPU, cGPU);
_ConvertDataType(&cMeHalfGPU, cMeGPU);
cUserGPU = ConvertDataType(cUserHalfGPU, X_FLOAT);
/* check results */
gpuTest = cGPU->CheckData(answer, aUnitNum) &&
cMeGPU->CheckData(answer, aUnitNum) &&
cUserGPU.CheckData(answer, aUnitNum);
/* destroy variables */
delete aGPU;
delete bGPU;
delete cGPU;
delete cMeGPU;
delete[] aDimSize;
delete[] bDimSize;
return cpuTest && gpuTest;
#else
/* destroy variables */
delete[] aDimSize;
delete[] bDimSize;
return cpuTest;
#endif // USE_CUDA
}
/*
case 4: flaot16 tensor multiplication c = a*b + \alpha * c
where the size of b is equal to the n-th dimension of a,
i.e., a is multiplied with b by broadcasting.
In this case, (2, 4) * (4) = (2, 4), n = 1.
*/
bool TestMultiplyDim4()
{
/* a tensor of size (2, 4) */
int aOrder = 2;
int * aDimSize = new int[aOrder];
aDimSize[0] = 2;
aDimSize[1] = 4;
int aUnitNum = 1;
for (int i = 0; i < aOrder; i++)
aUnitNum *= aDimSize[i];
/* a tensor of size (4) */
int bOrder = 1;
int * bDimSize = new int[bOrder];
bDimSize[0] = 4;
int bUnitNum = 1;
for (int i = 0; i < bOrder; i++)
bUnitNum *= bDimSize[i];
DTYPE aData[2][4] = { {0.0F, 1.0F, 2.0F, 3.0F},
{4.0F, 5.0F, 6.0F, 7.0F} };
DTYPE bData[4] = {1.0F, -1.0F , 1.0F, -1.0F};
DTYPE answer[2][4] = { {0.0F, -1.0F, 2.0F, -3.0F},
{4.0F, -5.0F, 6.0F, -7.0F} };
/* CPU test */
bool cpuTest = true;
#ifdef USE_CUDA
/* GPU test */
bool gpuTest = true;
/* create tensor */
XTensor * aGPU = NewTensor(aOrder, aDimSize, X_FLOAT, 1.0F, 0);
XTensor * bGPU = NewTensor(bOrder, bDimSize, X_FLOAT, 1.0F, 0);
XTensor * cGPU = NewTensor(aOrder, aDimSize, X_FLOAT, 1.0F, 0);
XTensor * cMeGPU = NewTensor(aOrder, aDimSize, X_FLOAT, 1.0F, 0);
XTensor cUserGPU;
/* create float16 tensor */
XTensor aHalfGPU;
XTensor bHalfGPU;
XTensor cHalfGPU;
XTensor cMeHalfGPU;
XTensor cUserHalfGPU;
/* Initialize variables */
aGPU->SetData(aData, aUnitNum);
cMeGPU->SetData(aData, aUnitNum);
bGPU->SetData(bData, bUnitNum);
cGPU->SetZeroAll();
/* convert data type from float to float16 */
aHalfGPU = ConvertDataType(*aGPU, X_FLOAT16);
bHalfGPU = ConvertDataType(*bGPU, X_FLOAT16);
cHalfGPU = ConvertDataType(*cGPU, X_FLOAT16);
cMeHalfGPU = ConvertDataType(*cMeGPU, X_FLOAT16);
/* call multiplydim function */
_MultiplyDim(&aHalfGPU, &bHalfGPU, &cHalfGPU, 1);
_MultiplyDimMe(&cMeHalfGPU, &bHalfGPU, 1);
cUserHalfGPU = MultiplyDim(aHalfGPU, bHalfGPU, 1);
/* convert data type from float16 to float */
_ConvertDataType(&cHalfGPU, cGPU);
_ConvertDataType(&cMeHalfGPU, cMeGPU);
cUserGPU = ConvertDataType(cUserHalfGPU, X_FLOAT);
/* check results */
gpuTest = cGPU->CheckData(answer, aUnitNum) &&
cMeGPU->CheckData(answer, aUnitNum) &&
cUserGPU.CheckData(answer, aUnitNum);
/* destroy variables */
delete aGPU;
delete bGPU;
delete cGPU;
delete cMeGPU;
delete[] aDimSize;
delete[] bDimSize;
return cpuTest && gpuTest;
#else
/* destroy variables */
delete[] aDimSize;
delete[] bDimSize;
return cpuTest;
#endif // USE_CUDA
}
/* test for MultiplyDim Function */
bool TestMultiplyDim()
{
......@@ -272,6 +473,24 @@ bool TestMultiplyDim()
else
XPRINT(0, stdout, ">> case 2 passed!\n");
/* case 3 test */
caseFlag = TestMultiplyDim3();
if (!caseFlag) {
returnFlag = false;
XPRINT(0, stdout, ">> case 3 failed!\n");
}
else
XPRINT(0, stdout, ">> case 3 passed!\n");
/* case 4 test */
caseFlag = TestMultiplyDim4();
if (!caseFlag) {
returnFlag = false;
XPRINT(0, stdout, ">> case 4 failed!\n");
}
else
XPRINT(0, stdout, ">> case 4 passed!\n");
/* other cases test */
/*
TODO!!
......
......@@ -17,9 +17,11 @@
/*
* $Created by: Lin Ye (email: linye2015@outlook.com) 2018-06-14
* $Update by: Lin Ye (email: linye2015@outlook.com) 2019-07-12 float16/int/int8 added
*/
#include "TNegate.h"
#include "../core/getandset/ConvertDataType.h"
namespace nts { // namespace nts(NiuTrans.Tensor)
......@@ -191,6 +193,86 @@ bool TestNegate2()
#endif // USE_CUDA
}
/* case 3: float16 set every entry to its minus value */
bool TestNegate3()
{
/* a tensor of size (3, 2) */
int aOrder = 2;
int * aDimSize = new int[aOrder];
aDimSize[0] = 3;
aDimSize[1] = 2;
int aUnitNum = 1;
for (int i = 0; i < aOrder; i++)
aUnitNum *= aDimSize[i];
DTYPE aData[3][2] = { {1.0F, -2.0F},
{-3.0F, 4.0F},
{5.0F, -6.0F} };
DTYPE answer[3][2] = { {-1.0F, 2.0F},
{3.0F, -4.0F},
{-5.0F, 6.0F} };
/* CPU test */
bool cpuTest = true;
#ifdef USE_CUDA
/* GPU test */
bool gpuTest = true;
/* create tensor */
XTensor * aGPU = NewTensor(aOrder, aDimSize, X_FLOAT, 1.0F, 0);
XTensor * bGPU = NewTensor(aOrder, aDimSize, X_FLOAT, 1.0F, 0);
XTensor * aMeGPU = NewTensor(aOrder, aDimSize, X_FLOAT, 1.0F, 0);
XTensor bUserGPU;
/* create float16 tensor */
XTensor aHalfGPU;
XTensor bHalfGPU;
XTensor aMeHalfGPU;
XTensor bUserHalfGPU;
/* Initialize variables */
aGPU->SetData(aData, aUnitNum);
aMeGPU->SetData(aData, aUnitNum);
/* convert data type from float to float16 */
aHalfGPU = ConvertDataType(*aGPU, X_FLOAT16);
aMeHalfGPU = ConvertDataType(*aMeGPU, X_FLOAT16);
bHalfGPU = ConvertDataType(*bGPU, X_FLOAT16);
/* call negate function */
_Negate(&aHalfGPU, &bHalfGPU);
_NegateMe(&aMeHalfGPU);
bUserHalfGPU = Negate(aHalfGPU);
/* convert data type from float16 to float */
_ConvertDataType(&bHalfGPU, bGPU);
_ConvertDataType(&aMeHalfGPU, aMeGPU);
bUserGPU = ConvertDataType(bUserHalfGPU, X_FLOAT);
/* check results */
gpuTest = bGPU->CheckData(answer, aUnitNum, 1e-4F) &&
aMeGPU->CheckData(answer, aUnitNum, 1e-4F) &&
bUserGPU.CheckData(answer, aUnitNum, 1e-4F);
/* destroy variables */
delete aGPU;
delete bGPU;
delete aMeGPU;
delete[] aDimSize;
return cpuTest && gpuTest;
#else
/* destroy variables */
delete[] aDimSize;
return cpuTest;
#endif // USE_CUDA
}
/*
/* other cases */
/*
TODO!!
......@@ -222,6 +304,16 @@ bool TestNegate()
else
XPRINT(0, stdout, ">> case 2 passed!\n");
/* case 3 test */
caseFlag = TestNegate3();
if (!caseFlag) {
returnFlag = false;
XPRINT(0, stdout, ">> case 3 failed!\n");
}
else
XPRINT(0, stdout, ">> case 3 passed!\n");
/* other cases test */
/*
TODO!!
......
......@@ -17,9 +17,11 @@
/*
* $Created by: Xu Chen (email: hello_master1954@163.com) 2018-06-30
* $Update by: Lin Ye (email: linye2015@outlook.com) 2019-07-06 float16 added
*/
#include "TReduceMax.h"
#include "../core/getandset/ConvertDataType.h"
namespace nts { // namespace nts(NiuTrans.Tensor)
......@@ -86,8 +88,8 @@ bool TestReduceMax1()
tUser2 = ReduceMax(*s, 1);
/* check results */
cpuTest = t1->CheckData(answer1, tUnitNum1) && tUser1.CheckData(answer1, tUnitNum1)
&& t2->CheckData(answer2, tUnitNum2) && tUser2.CheckData(answer2, tUnitNum2);
cpuTest = t1->CheckData(answer1, tUnitNum1) && tUser1.CheckData(answer1, tUnitNum1) &&
t2->CheckData(answer2, tUnitNum2) && tUser2.CheckData(answer2, tUnitNum2);
#ifdef USE_CUDA
/* GPU test */
......@@ -112,8 +114,8 @@ bool TestReduceMax1()
tUserGPU2 = ReduceMax(*sGPU, 1);
/* check results */
gpuTest = tGPU1->CheckData(answer1, tUnitNum1) && tUserGPU1.CheckData(answer1, tUnitNum1)
&& tGPU2->CheckData(answer2, tUnitNum2) && tUserGPU2.CheckData(answer2, tUnitNum2);
gpuTest = tGPU1->CheckData(answer1, tUnitNum1) && tUserGPU1.CheckData(answer1, tUnitNum1) &&
tGPU2->CheckData(answer2, tUnitNum2) && tUserGPU2.CheckData(answer2, tUnitNum2);
/* destroy variables */
delete s;
......@@ -140,6 +142,113 @@ bool TestReduceMax1()
#endif // USE_CUDA
}
/*
case 2: float16 get the max value of the items along a dimension of the tensor.
In this case,
(2, 4) -> (4), dim = 0
(2, 4) -> (2), dim = 1
*/
bool TestReduceMax2()
{
/* a input tensor of size (2, 4) */
int sOrder = 2;
int * sDimSize = new int[sOrder];
sDimSize[0] = 2;
sDimSize[1] = 4;
int sUnitNum = 1;
for (int i = 0; i < sOrder; i++)
sUnitNum *= sDimSize[i];
/* a output tensor of size (4) */
int tOrder1 = 1;
int * tDimSize1 = new int[tOrder1];
tDimSize1[0] = 4;
int tUnitNum1 = 1;
for (int i = 0; i < tOrder1; i++)
tUnitNum1 *= tDimSize1[i];
/* a output tensor of size (2) */
int tOrder2 = 1;
int * tDimSize2 = new int[tOrder2];
tDimSize2[0] = 2;
int tUnitNum2 = 1;
for (int i = 0; i < tOrder2; i++)
tUnitNum2 *= tDimSize2[i];
DTYPE sData[2][4] = { {0.0F, 5.0F, 2.0F, 3.0F},
{4.0F, 1.0F, 6.0F, 7.0F} };
DTYPE answer1[4] = {4.0F, 5.0F, 6.0F, 7.0F};
DTYPE answer2[2] = {5.0F, 7.0F};
/* CPU test */
bool cpuTest = true;
#ifdef USE_CUDA
/* GPU test */
bool gpuTest = true;
/* create tensors */
XTensor * sGPU = NewTensor(sOrder, sDimSize, X_FLOAT, 1.0F, 0);
XTensor * tGPU1 = NewTensor(tOrder1, tDimSize1, X_FLOAT, 1.0F, 0);
XTensor * tGPU2 = NewTensor(tOrder2, tDimSize2, X_FLOAT, 1.0F, 0);
XTensor tUserGPU1;
XTensor tUserGPU2;
/* create float16 tensors */
XTensor sHalfGPU;
XTensor tHalfGPU1;
XTensor tHalfGPU2;
XTensor tUserHalfGPU1;
XTensor tUserHalfGPU2;
/* initialize variables */
sGPU->SetData(sData, sUnitNum);
tGPU1->SetZeroAll();
tGPU2->SetZeroAll();
/* convert data type form float to float16 */
sHalfGPU = ConvertDataType(*sGPU, X_FLOAT16);
tHalfGPU1 = ConvertDataType(*tGPU1, X_FLOAT16);
tHalfGPU2 = ConvertDataType(*tGPU2, X_FLOAT16);
/* call reducemax function */
_ReduceMax(&sHalfGPU, &tHalfGPU1, 0);
_ReduceMax(&sHalfGPU, &tHalfGPU2, 1);
tUserHalfGPU1 = ReduceMax(sHalfGPU, 0);
tUserHalfGPU2 = ReduceMax(sHalfGPU, 1);
/* convert data type from float16 to float */
_ConvertDataType(&tHalfGPU1, tGPU1);
_ConvertDataType(&tHalfGPU2, tGPU2);
tUserGPU1 = ConvertDataType(tUserHalfGPU1, X_FLOAT);
tUserGPU2 = ConvertDataType(tUserHalfGPU2, X_FLOAT);
/* check results */
gpuTest = tGPU1->CheckData(answer1, tUnitNum1) && tUserGPU1.CheckData(answer1, tUnitNum1) &&
tGPU2->CheckData(answer2, tUnitNum2) && tUserGPU2.CheckData(answer2, tUnitNum2);
/* destroy variables */
delete sGPU;
delete tGPU1;
delete tGPU2;
delete[] sDimSize;
delete[] tDimSize1;
delete[] tDimSize2;
return cpuTest && gpuTest;
#else
/* destroy variables */
delete[] sDimSize;
delete[] tDimSize1;
delete[] tDimSize2;
return cpuTest;
#endif // USE_CUDA
}
/* other cases */
/*
TODO!!
......@@ -160,6 +269,15 @@ bool TestReduceMax()
else
XPRINT(0, stdout, ">> case 1 passed!\n");
/* case 2 test */
caseFlag = TestReduceMax2();
if (!caseFlag) {
returnFlag = false;
XPRINT(0, stdout, ">> case 2 failed!\n");
}
else
XPRINT(0, stdout, ">> case 2 passed!\n");
/* other cases test */
/*
TODO!!
......
......@@ -17,10 +17,12 @@
/*
* $Created by: LI Yinqiao (email: li.yin.qiao.2012@hotmail.com) 2018-04-30
* $Update by: Lin Ye (email: linye2015@outlook.com) 2019-07-06 float16 added
*/
#include "TReduceSum.h"
#include "../core/getandset/SetData.h"
#include "../core/getandset/ConvertDataType.h"
namespace nts { // namespace nts(NiuTrans.Tensor)
......@@ -194,8 +196,8 @@ bool TestReduceSum2()
XTensor tUser;
/* initialize variables */
_SetDataFixedFloat(s, 1.0F);
_SetDataFixedFloat(answer, (float)s->GetDim(1));
_SetDataFixed(s, 1.0F);
_SetDataFixed(answer, (float)s->GetDim(1));
/* call ReduceSum function */
_ReduceSum(s, t, 1);
......@@ -214,7 +216,7 @@ bool TestReduceSum2()
XTensor tUserGPU;
/* initialize variables */
_SetDataFixedFloat(sGPU, 1.0F);
_SetDataFixed(sGPU, 1.0F);
/* call ReduceSum function */
_ReduceSum(sGPU, tGPU, 1);
......@@ -283,8 +285,8 @@ bool TestReduceSum3()
XTensor tUser;
/* initialize variables */
_SetDataFixedFloat(s, 1.0F);
_SetDataFixedFloat(answer, (float)s->GetDim(1));
_SetDataFixed(s, 1.0F);
_SetDataFixed(answer, (float)s->GetDim(1));
/* call ReduceSum function */
_ReduceSum(s, t, 1);
......@@ -303,7 +305,7 @@ bool TestReduceSum3()
XTensor tUserGPU;
/* initialize variables */
_SetDataFixedFloat(sGPU, 1.0F);
_SetDataFixed(sGPU, 1.0F);
/* call ReduceSum function */
_ReduceSum(sGPU, tGPU, 1);
......@@ -372,8 +374,8 @@ bool TestReduceSum4()
XTensor tUser;
/* initialize variables */
_SetDataFixedFloat(s, 1.0F);
_SetDataFixedFloat(answer, (float)s->GetDim(1));
_SetDataFixed(s, 1.0F);
_SetDataFixed(answer, (float)s->GetDim(1));
/* call ReduceSum function */
_ReduceSum(s, t, 1);
......@@ -392,7 +394,7 @@ bool TestReduceSum4()
XTensor tUserGPU;
/* initialize variables */
_SetDataFixedFloat(sGPU, 1.0F);
_SetDataFixed(sGPU, 1.0F);
/* call ReduceSum function */
_ReduceSum(sGPU, tGPU, 1);
......@@ -463,8 +465,8 @@ bool TestReduceSum5()
XTensor tUser;
/* initialize variables */
_SetDataFixedFloat(s, 1.0F);
_SetDataFixedFloat(answer, (float)s->GetDim(1));
_SetDataFixed(s, 1.0F);
_SetDataFixed(answer, (float)s->GetDim(1));
/* call ReduceSum function */
_ReduceSum(s, t, 1);
......@@ -483,7 +485,7 @@ bool TestReduceSum5()
XTensor tUserGPU;
/* initialize variables */
_SetDataFixedFloat(sGPU, 1.0F);
_SetDataFixed(sGPU, 1.0F);
/* call ReduceSum function */
_ReduceSum(sGPU, tGPU, 1);
......@@ -514,7 +516,6 @@ bool TestReduceSum5()
#endif // USE_CUDA
}
/*
case 6: test ReduceSum function.
Sum the items along a dimension of the tensor.
......@@ -555,8 +556,8 @@ bool TestReduceSum6()
XTensor tUser;
/* initialize variables */
_SetDataFixedFloat(s, 1.0F);
_SetDataFixedFloat(answer, (float)s->GetDim(1));
_SetDataFixed(s, 1.0F);
_SetDataFixed(answer, (float)s->GetDim(1));
/* call ReduceSum function */
_ReduceSum(s, t, 1);
......@@ -575,7 +576,7 @@ bool TestReduceSum6()
XTensor tUserGPU;
/* initialize variables */
_SetDataFixedFloat(sGPU, 1.0F);
_SetDataFixed(sGPU, 1.0F);
/* call ReduceSum function */
_ReduceSum(sGPU, tGPU, 1);
......@@ -607,6 +608,126 @@ bool TestReduceSum6()
}
/*
case 7: float16 test ReduceSum function.
Sum the items along a dimension of the tensor.
In this case,
(2, 4) -> (4), dim = 0
(2, 4) -> (2), dim = 1
*/
bool TestReduceSum7()
{
/* a tensor of size (2, 4) */
int sOrder = 2;
int * sDimSize = new int[sOrder];
sDimSize[0] = 2;
sDimSize[1] = 4;
int sUnitNum = 1;
for (int i = 0; i < sOrder; i++)
sUnitNum *= sDimSize[i];
/* a tensor of size (4) */
int tOrder1 = 1;
int * tDimSize1 = new int[tOrder1];
tDimSize1[0] = 4;
int tUnitNum1 = 1;
for (int i = 0; i < tOrder1; i++)
tUnitNum1 *= tDimSize1[i];
/* a tensor of size (2) */
int tOrder2 = 1;
int * tDimSize2 = new int[tOrder2];
tDimSize2[0] = 2;
int tUnitNum2 = 1;
for (int i = 0; i < tOrder2; i++)
tUnitNum2 *= tDimSize2[i];
DTYPE sData[2][4] = { {0.0F, 1.0F, 2.0F, 3.0F},
{4.0F, 5.0F, 6.0F, 7.0F} };
DTYPE answer1[4] = {4.0F, 6.0F, 8.0F, 10.0F};
DTYPE answer2[2] = {6.0F, 22.0F};
/* CPU test */
bool cpuTest = true;
#ifdef USE_CUDA
/* GPU test */
bool gpuTest = true;
/* create tensors */
XTensor * sGPU = NewTensor(sOrder, sDimSize, X_FLOAT, 1.0F, 0);
XTensor * shiftGPU1 = NewTensor(tOrder1, tDimSize1, X_FLOAT, 1.0F, 0);
XTensor * shiftGPU2 = NewTensor(tOrder2, tDimSize2, X_FLOAT, 1.0F, 0);
XTensor * tGPU1 = NewTensor(tOrder1, tDimSize1, X_FLOAT, 1.0F, 0);
XTensor * tGPU2 = NewTensor(tOrder2, tDimSize2, X_FLOAT, 1.0F, 0);
XTensor tUserGPU1;
XTensor tUserGPU2;
/* create float16 tensors */
XTensor sHalfGPU;
XTensor shiftHalfGPU1;
XTensor shiftHalfGPU2;
XTensor tHalfGPU1;
XTensor tHalfGPU2;
XTensor tUserHalfGPU1;
XTensor tUserHalfGPU2;
/* initialize variables */
sGPU->SetData(sData, sUnitNum);
shiftGPU1->SetZeroAll();
shiftGPU2->SetZeroAll();
tGPU1->SetZeroAll();
tGPU2->SetZeroAll();
/* convert data type from float to float16 */
sHalfGPU = ConvertDataType(*sGPU, X_FLOAT16);
shiftHalfGPU1 = ConvertDataType(*shiftGPU1, X_FLOAT16);
shiftHalfGPU2 = ConvertDataType(*shiftGPU2, X_FLOAT16);
tHalfGPU1 = ConvertDataType(*tGPU1, X_FLOAT16);
tHalfGPU2 = ConvertDataType(*tGPU2, X_FLOAT16);
/* call reducesum function */
_ReduceSum(&sHalfGPU, &tHalfGPU1, 0);
_ReduceSum(&sHalfGPU, &tHalfGPU2, 1);
tUserHalfGPU1 = ReduceSum(sHalfGPU, 0, shiftHalfGPU1);
tUserHalfGPU2 = ReduceSum(sHalfGPU, 1, shiftHalfGPU2);
/* convert data type from float16 to float */
_ConvertDataType(&tHalfGPU1, tGPU1);
_ConvertDataType(&tHalfGPU2, tGPU2);
tUserGPU1 = ConvertDataType(tUserHalfGPU1, X_FLOAT);
tUserGPU2 = ConvertDataType(tUserHalfGPU2, X_FLOAT);
/* check results */
gpuTest = tGPU1->CheckData(answer1, tUnitNum1) && tUserGPU1.CheckData(answer1, tUnitNum1) &&
tGPU2->CheckData(answer2, tUnitNum2) && tUserGPU2.CheckData(answer2, tUnitNum2);
/* destroy variables */
delete sGPU;
delete shiftGPU1;
delete shiftGPU2;
delete tGPU1;
delete tGPU2;
delete[] sDimSize;
delete[] tDimSize1;
delete[] tDimSize2;
return cpuTest && gpuTest;
#else
/* destroy variables */
delete[] sDimSize;
delete[] tDimSize1;
delete[] tDimSize2;
return cpuTest;
#endif // USE_CUDA
}
/* other cases */
/*
TODO!!
......@@ -672,6 +793,15 @@ bool TestReduceSum()
else
XPRINT(0, stdout, ">> case 6 passed!\n");
/* case 7 test */
caseFlag = TestReduceSum7();
if (!caseFlag) {
returnFlag = false;
XPRINT(0, stdout, ">> case 7 failed!\n");
}
else
XPRINT(0, stdout, ">> case 7 passed!\n");
/* other cases test */
/*
TODO!!
......
......@@ -17,9 +17,11 @@
/*
* $Created by: Xu Chen (email: hello_master1954@163.com) 2018-06-27
* $Update by: Lin Ye (email: linye2015@outlook.com) 2019-07-12 float16/int/int8 added
*/
#include "TScaleAndShift.h"
#include "../core/getandset/ConvertDataType.h"
namespace nts { // namespace nts(NiuTrans.Tensor)
......@@ -113,6 +115,254 @@ bool TestScaleAndShift1()
#endif // USE_CUDA
}
/*
case 2: flaot16 scale and shift all tensor entires.
p = p * scale + shift
*/
bool TestScaleAndShift2()
{
/* a input tensor of size (2, 4) */
int sOrder = 2;
int * sDimSize = new int[sOrder];
sDimSize[0] = 2;
sDimSize[1] = 4;
int sUnitNum = 1;
for (int i = 0; i < sOrder; i++)
sUnitNum *= sDimSize[i];
DTYPE sData[2][4] = { {0.0F, 1.0F, 2.0F, 3.0F},
{4.0F, 5.0F, 6.0F, 7.0F} };
DTYPE answer[2][4] = { {0.5F, 2.5F, 4.5F, 6.5F},
{8.5F, 10.5F, 12.5F, 14.5F} };
DTYPE scaleFactor = 2.0F;
DTYPE shiftFactor = 0.5F;
/* CPU test */
bool cpuTest = true;
#ifdef USE_CUDA
/* GPU test */
bool gpuTest = true;
/* create tensors */
XTensor * sGPU = NewTensor(sOrder, sDimSize, X_FLOAT, 1.0F, 0);
XTensor * tGPU = NewTensor(sOrder, sDimSize, X_FLOAT, 1.0F, 0);
XTensor * tMeGPU = NewTensor(sOrder, sDimSize, X_FLOAT, 1.0F, 0);
XTensor tUserGPU;
/* create float16 tensor */
XTensor sHalfGPU;
XTensor tHalfGPU;
XTensor tMeHalfGPU;
XTensor tUserHalfGPU;
/* initialize variables */
sGPU->SetData(sData, sUnitNum);
tMeGPU->SetData(sData, sUnitNum);
/* convert data type from float to float16 */
sHalfGPU = ConvertDataType(*sGPU, X_FLOAT16);
tMeHalfGPU = ConvertDataType(*tMeGPU, X_FLOAT16);
tHalfGPU = ConvertDataType(*tGPU, X_FLOAT16);
/* call scaleandshift function */
_ScaleAndShift(&sHalfGPU, &tHalfGPU, scaleFactor, shiftFactor);
_ScaleAndShiftMe(&tMeHalfGPU, scaleFactor, shiftFactor);
tUserHalfGPU = ScaleAndShift(sHalfGPU, scaleFactor, shiftFactor);
/* convert data type from float16 to float */
_ConvertDataType(&tHalfGPU, tGPU);
_ConvertDataType(&tMeHalfGPU, tMeGPU);
tUserGPU = ConvertDataType(tUserHalfGPU, X_FLOAT);
/* check results */
gpuTest = tGPU->CheckData(answer, sUnitNum) &&
tMeGPU->CheckData(answer, sUnitNum) &&
tUserGPU.CheckData(answer, sUnitNum);
/* destroy variables */
delete sGPU;
delete tGPU;
delete tMeGPU;
delete[] sDimSize;
return cpuTest && gpuTest;
#else
/* destroy variables */
delete[] sDimSize;
return cpuTest;
#endif // USE_CUDA
}
/*
case 3: int32 scale and shift all tensor entires.
p = p * scale + shift
*/
bool TestScaleAndShift3()
{
/* a input tensor of size (2, 4) */
int sOrder = 2;
int * sDimSize = new int[sOrder];
sDimSize[0] = 2;
sDimSize[1] = 4;
int sUnitNum = 1;
for (int i = 0; i < sOrder; i++)
sUnitNum *= sDimSize[i];
DTYPE sData[2][4] = { {0.0F, 1.0F, 2.0F, 3.0F},
{4.0F, 5.0F, 6.0F, 7.0F} };
DTYPE answer[2][4] = { {1.0F, 3.0F, 5.0F, 7.0F},
{9.0F, 11.0F, 13.0F, 15.0F} };
DTYPE scaleFactor = 2.0F;
DTYPE shiftFactor = 1.8F;
/* CPU test */
bool cpuTest = true;
#ifdef USE_CUDA
/* GPU test */
bool gpuTest = true;
/* create tensors */
XTensor * sGPU = NewTensor(sOrder, sDimSize, X_FLOAT, 1.0F, 0);
XTensor * tGPU = NewTensor(sOrder, sDimSize, X_FLOAT, 1.0F, 0);
XTensor * tMeGPU = NewTensor(sOrder, sDimSize, X_FLOAT, 1.0F, 0);
XTensor tUserGPU;
/* create int32 tensor */
XTensor sInt32GPU;
XTensor tInt32GPU;
XTensor tMeInt32GPU;
XTensor tUserInt32GPU;
/* initialize variables */
sGPU->SetData(sData, sUnitNum);
tMeGPU->SetData(sData, sUnitNum);
/* convert data type from float to int32 */
sInt32GPU = ConvertDataType(*sGPU, X_INT);
tMeInt32GPU = ConvertDataType(*tMeGPU, X_INT);
tInt32GPU = ConvertDataType(tGPU, X_INT);
/* call scaleandshift function */
_ScaleAndShift(&sInt32GPU, &tInt32GPU, scaleFactor, shiftFactor);
_ScaleAndShiftMe(&tMeInt32GPU, scaleFactor, shiftFactor);
tUserInt32GPU = ScaleAndShift(sInt32GPU, scaleFactor, shiftFactor);
/* convert data type from int32 to float */
_ConvertDataType(&tInt32GPU, tGPU);
_ConvertDataType(&tMeInt32GPU, tMeGPU);
tUserGPU = ConvertDataType(tUserInt32GPU, X_FLOAT);
/* check results */
gpuTest = tGPU->CheckData(answer, sUnitNum) &&
tMeGPU->CheckData(answer, sUnitNum) &&
tUserGPU.CheckData(answer, sUnitNum);
/* destroy variables */
delete sGPU;
delete tGPU;
delete tMeGPU;
delete[] sDimSize;
return cpuTest && gpuTest;
#else
/* destroy variables */
delete[] sDimSize;
return cpuTest;
#endif // USE_CUDA
}
/*
case 4: int8 scale and shift all tensor entires.
p = p * scale + shift
*/
bool TestScaleAndShift4()
{
/* a input tensor of size (2, 4) */
int sOrder = 2;
int * sDimSize = new int[sOrder];
sDimSize[0] = 2;
sDimSize[1] = 4;
int sUnitNum = 1;
for (int i = 0; i < sOrder; i++)
sUnitNum *= sDimSize[i];
DTYPE sData[2][4] = { {0.0F, 1.0F, 2.0F, 3.0F},
{4.0F, 5.0F, 6.0F, 7.0F} };
DTYPE answer[2][4] = { {1.0F, 3.0F, 5.0F, 7.0F},
{9.0F, 11.0F, 13.0F, 15.0F} };
DTYPE scaleFactor = 2.0F;
DTYPE shiftFactor = 1.8F;
/* CPU test */
bool cpuTest = true;
#ifdef USE_CUDA
/* GPU test */
bool gpuTest = true;
/* create tensors */
XTensor * sGPU = NewTensor(sOrder, sDimSize, X_FLOAT, 1.0F, 0);
XTensor * tGPU = NewTensor(sOrder, sDimSize, X_FLOAT, 1.0F, 0);
XTensor * tMeGPU = NewTensor(sOrder, sDimSize, X_FLOAT, 1.0F, 0);
XTensor tUserGPU;
/* create int8 tensor */
XTensor sInt8GPU;
XTensor tInt8GPU;
XTensor tMeInt8GPU;
XTensor tUserInt8GPU;
/* initialize variables */
sGPU->SetData(sData, sUnitNum);
tMeGPU->SetData(sData, sUnitNum);
/* convert data type from float to int8 */
sInt8GPU = ConvertDataType(*sGPU, X_INT8);
tMeInt8GPU = ConvertDataType(*tMeGPU, X_INT8);
tInt8GPU = ConvertDataType(*tGPU, X_INT8);
/* call scaleandshift function */
_ScaleAndShift(&sInt8GPU, &tInt8GPU, scaleFactor, shiftFactor);
_ScaleAndShiftMe(&tMeInt8GPU, scaleFactor, shiftFactor);
tUserInt8GPU = ScaleAndShift(sInt8GPU, scaleFactor, shiftFactor);
/* convert data type from int8 to float */
_ConvertDataType(&tInt8GPU, tGPU);
_ConvertDataType(&tMeInt8GPU, tMeGPU);
tUserGPU = ConvertDataType(tUserInt8GPU, X_FLOAT);
/* check results */
gpuTest = tGPU->CheckData(answer, sUnitNum) &&
tMeGPU->CheckData(answer, sUnitNum) &&
tUserGPU.CheckData(answer, sUnitNum);
/* destroy variables */
delete sGPU;
delete tGPU;
delete tMeGPU;
delete[] sDimSize;
return cpuTest && gpuTest;
#else
/* destroy variables */
delete[] sDimSize;
return cpuTest;
#endif // USE_CUDA
}
/* other cases */
/*
TODO!!
......@@ -133,6 +383,33 @@ bool TestScaleAndShift()
else
XPRINT(0, stdout, ">> case 1 passed!\n");
/* case 2 test */
caseFlag = TestScaleAndShift2();
if (!caseFlag) {
returnFlag = false;
XPRINT(0, stdout, ">> case 2 failed!\n");
}
else
XPRINT(0, stdout, ">> case 2 passed!\n");
/* case 3 test */
caseFlag = TestScaleAndShift3();
if (!caseFlag) {
returnFlag = false;
XPRINT(0, stdout, ">> case 3 failed!\n");
}
else
XPRINT(0, stdout, ">> case 3 passed!\n");
/* case 4 test */
caseFlag = TestScaleAndShift4();
if (!caseFlag) {
returnFlag = false;
XPRINT(0, stdout, ">> case 4 failed!\n");
}
else
XPRINT(0, stdout, ">> case 4 passed!\n");
/* other cases test */
/*
TODO!!
......
......@@ -17,9 +17,11 @@
/*
* $Created by: Xu Chen (email: hello_master1954@163.com) 2018-07-12
* $Update by: Lin Ye (email: linye2015@outlook.com) 2019-07-24 float16 added
*/
#include "TSign.h"
#include "../core/getandset/ConvertDataType.h"
namespace nts { // namespace nts(NiuTrans.Tensor)
......@@ -110,6 +112,88 @@ bool TestSign1()
#endif // USE_CUDA
}
/*
case 2: float16 test Sign function.
Set every entry to its sign value.
*/
bool TestSign2()
{
/* a tensor of size (3, 2) */
int aOrder = 2;
int * aDimSize = new int[aOrder];
aDimSize[0] = 3;
aDimSize[1] = 2;
int aUnitNum = 1;
for (int i = 0; i < aOrder; i++)
aUnitNum *= aDimSize[i];
DTYPE aData[3][2] = { {1.0F, -2.0F},
{0.0F, 4.0F},
{5.0F, -6.0F} };
DTYPE answer[3][2] = { {1.0F, -1.0F},
{0.0F, 1.0F},
{1.0F, -1.0F} };
/* CPU test */
bool cpuTest = true;
#ifdef USE_CUDA
/* GPU test */
bool gpuTest = true;
/* create tensor */
XTensor * aGPU = NewTensor(aOrder, aDimSize, X_FLOAT, 1.0F, 0);
XTensor * bGPU = NewTensor(aOrder, aDimSize, X_FLOAT, 1.0F, 0);
XTensor * aMeGPU = NewTensor(aOrder, aDimSize, X_FLOAT, 1.0F, 0);
XTensor bUserGPU;
/* create float16 tensor */
XTensor aHalfGPU;
XTensor bHalfGPU;
XTensor aMeHalfGPU;
XTensor bUserHalfGPU;
/* Initialize variables */
aGPU->SetData(aData, aUnitNum);
aMeGPU->SetData(aData, aUnitNum);
/* convert data type from float to float16 */
aHalfGPU = ConvertDataType(*aGPU, X_FLOAT16);
aMeHalfGPU = ConvertDataType(*aMeGPU, X_FLOAT16);
bHalfGPU = ConvertDataType(*bGPU, X_FLOAT16);
/* call Sign function */
_Sign(&aHalfGPU, &bHalfGPU);
_SignMe(&aMeHalfGPU);
bUserHalfGPU = Sign(aHalfGPU);
/* convert data type from float16 to float */
_ConvertDataType(&bHalfGPU, bGPU);
_ConvertDataType(&aMeHalfGPU, aMeGPU);
bUserGPU = ConvertDataType(bUserHalfGPU, X_FLOAT);
/* check results */
gpuTest = bGPU->CheckData(answer, aUnitNum, 1e-4F) &&
aMeGPU->CheckData(answer, aUnitNum, 1e-4F) &&
bUserGPU.CheckData(answer, aUnitNum, 1e-4F);
/* destroy variables */
delete aGPU;
delete bGPU;
delete aMeGPU;
delete[] aDimSize;
return cpuTest && gpuTest;
#else
/* destroy variables */
delete[] aDimSize;
return cpuTest;
#endif // USE_CUDA
}
/* other cases */
/*
TODO!!
......@@ -131,6 +215,16 @@ bool TestSign()
else
XPRINT(0, stdout, ">> case 1 passed!\n");
/* case 2 test */
caseFlag = TestSign2();
if (!caseFlag) {
returnFlag = false;
XPRINT(0, stdout, ">> case 2 failed!\n");
}
else
XPRINT(0, stdout, ">> case 2 passed!\n");
/* other cases test */
/*
TODO!!
......
......@@ -222,8 +222,8 @@ In this case, (3, 4) -> 2 * (3, 2) , whereToSplit=1, splitNum=2.
bool TestSplit3()
{
/* create list */
TensorList * tList = new TensorList();
TensorList tUserList;
XList * tList = new XList();
XList tUserList;
/* a source tensor of size (3, 4) */
int sOrder = 2;
......
......@@ -90,7 +90,7 @@ bool TestSpread1()
XTensor * modify = NewTensor(dataOrder, dataDimSize);
/* Initialize variables */
_SetDataFixedFloat(s, 0.0F);
_SetDataFixed(s, 0.0F);
modify->SetData(data, dataUnitNum);
/* call _Spread function */
......@@ -108,7 +108,7 @@ bool TestSpread1()
XTensor * modifyGPU = NewTensor(dataOrder, dataDimSize, X_FLOAT, 1.0F, 0);
/* Initialize variables */
_SetDataFixedFloat(sGPU, 0.0F);
_SetDataFixed(sGPU, 0.0F);
modifyGPU->SetData(data, dataUnitNum);
/* call _Spread function */
......@@ -134,7 +134,7 @@ bool TestSpread1()
}
/*
case 2: test _SpreadForGather function
case 2: test _SpreadForGather and _SpreadForCopyIndexed function
spread a collection tensor to source tensor
*/
bool TestSpread2()
......@@ -192,22 +192,22 @@ bool TestSpread2()
XTensor * s2 = NewTensor(sOrder, sDimSize);
XTensor * t = NewTensor(tOrder, tDimSize);
XTensor * sIndex = NewTensor(indexOrder, indexDimSize, X_INT);
XTensor * cIndex = NewTensor(indexOrder, indexDimSize, X_INT);
XTensor * tIndex = NewTensor(indexOrder, indexDimSize, X_INT);
/* initialize variables */
s1->SetData(sData, sUnitNum);
s2->SetData(sData, sUnitNum);
t->SetData(tData, tUnitNum);
sIndex->SetData(srcIndex, indexSize);
cIndex->SetData(tgtIndex, indexSize);
tIndex->SetData(tgtIndex, indexSize);
/* call _SpreadForGather function */
_SpreadForCopyIndexed(s1, t, dim, sIndex, cIndex, 1);
_SpreadForCopyIndexed(s1, t, dim, sIndex, tIndex, 1);
_SpreadForGather(s2, t, sIndex);
/* check results */
cpuTest = s1->CheckData(answer, tUnitNum) &&
s2->CheckData(answer, tUnitNum);
cpuTest = s1->CheckData(answer, sUnitNum) &&
s2->CheckData(answer, sUnitNum);
#ifdef USE_CUDA
/* GPU test */
......@@ -218,34 +218,34 @@ bool TestSpread2()
XTensor * sGPU2 = NewTensor(sOrder, sDimSize, X_FLOAT, 1.0F, 0);
XTensor * tGPU = NewTensor(sOrder, tDimSize, X_FLOAT, 1.0F, 0);
XTensor * sIndexGPU = NewTensor(indexOrder, indexDimSize, X_INT, 1.0F, 0);
XTensor * cIndexGPU = NewTensor(indexOrder, indexDimSize, X_INT, 1.0F, 0);
XTensor * tIndexGPU = NewTensor(indexOrder, indexDimSize, X_INT, 1.0F, 0);
/* initialize variables */
sGPU1->SetData(sData, sUnitNum);
sGPU2->SetData(sData, sUnitNum);
tGPU->SetData(tData, tUnitNum);
sIndexGPU->SetData(srcIndex, indexSize);
cIndexGPU->SetData(tgtIndex, indexSize);
tIndexGPU->SetData(tgtIndex, indexSize);
/* call _SpreadForGather function */
_SpreadForCopyIndexed(sGPU1, tGPU, dim, sIndex, cIndex, 1);
_SpreadForCopyIndexed(sGPU1, tGPU, dim, sIndexGPU, tIndexGPU, 1);
_SpreadForGather(sGPU2, tGPU, sIndexGPU);
/* check results */
gpuTest = sGPU1->CheckData(answer, tUnitNum) &&
sGPU2->CheckData(answer, tUnitNum);
gpuTest = sGPU1->CheckData(answer, sUnitNum) &&
sGPU2->CheckData(answer, sUnitNum);
/* destroy variables */
delete s1;
delete s2;
delete t;
delete sIndex;
delete cIndex;
delete tIndex;
delete sGPU1;
delete sGPU2;
delete tGPU;
delete sIndexGPU;
delete cIndexGPU;
delete tIndexGPU;
delete[] sDimSize;
delete[] tDimSize;
delete[] indexDimSize;
......@@ -257,7 +257,142 @@ bool TestSpread2()
delete s2;
delete t;
delete sIndex;
delete cIndex;
delete tIndex;
delete[] sDimSize;
delete[] tDimSize;
delete[] indexDimSize;
return cpuTest;
#endif // USE_CUDA
}
/*
case 3: test _SpreadForGather and _SpreadForCopyIndexed function
spread a collection tensor to source tensor
*/
bool TestSpread3()
{
/* a input tensor of size (3, 3) */
int sOrder = 2;
int * sDimSize = new int[sOrder];
sDimSize[0] = 3;
sDimSize[1] = 3;
int sUnitNum = 1;
for (int i = 0; i < sOrder; i++)
sUnitNum *= sDimSize[i];
/* a output tensor of size (2, 3) */
int tOrder = 2;
int * tDimSize = new int[tOrder];
tDimSize[0] = 3;
tDimSize[1] = 2;
int tUnitNum = 1;
for (int i = 0; i < tOrder; i++)
tUnitNum *= tDimSize[i];
/* a index tensor of size (2) */
int indexOrder = 1;
int * indexDimSize = new int[indexOrder];
indexDimSize[0] = 2;
int indexUnitNum = 1;
for (int i = 0; i < indexOrder; i++)
indexUnitNum *= indexDimSize[i];
DTYPE sData[3][3] = { {0.0F, 0.0F, 2.0F},
{2.0F, 1.0F, 3.0F},
{2.0F, 2.0F, 4.0F} };
DTYPE tData[3][2] = { {0.0F, -1.0F},
{2.0F, 1.0F},
{2.0F, 0.0F} };
DTYPE answer[3][3] = { {-1.0F, 0.0F, 2.0F},
{3.0F, 1.0F, 5.0F},
{2.0F, 2.0F, 6.0F} };
int dim = 1;
int indexSize = 2;
int srcIndex[2] = {0, 2};
int tgtIndex[2] = {1, 0};
/* CPU test */
bool cpuTest = true;
/* create tensors */
XTensor * s1 = NewTensor(sOrder, sDimSize);
XTensor * s2 = NewTensor(sOrder, sDimSize);
XTensor * t = NewTensor(tOrder, tDimSize);
XTensor * sIndex = NewTensor(indexOrder, indexDimSize, X_INT);
XTensor * tIndex = NewTensor(indexOrder, indexDimSize, X_INT);
/* initialize variables */
s1->SetData(sData, sUnitNum);
s2->SetData(sData, sUnitNum);
t->SetData(tData, tUnitNum);
sIndex->SetData(srcIndex, indexSize);
tIndex->SetData(tgtIndex, indexSize);
/* call _SpreadForGather function */
_SpreadForCopyIndexed(s1, t, dim, sIndex, tIndex, 1);
_SpreadForCopyIndexed(s2, t, dim, sIndex, tIndex, 1);
/* check results */
cpuTest = s1->CheckData(answer, sUnitNum) &&
s2->CheckData(answer, sUnitNum);
#ifdef USE_CUDA
/* GPU test */
bool gpuTest = true;
/* create tensors */
XTensor * sGPU1 = NewTensor(sOrder, sDimSize, X_FLOAT, 1.0F, 0);
XTensor * sGPU2 = NewTensor(sOrder, sDimSize, X_FLOAT, 1.0F, 0);
XTensor * tGPU = NewTensor(sOrder, tDimSize, X_FLOAT, 1.0F, 0);
XTensor * sIndexGPU = NewTensor(indexOrder, indexDimSize, X_INT, 1.0F, 0);
XTensor * tIndexGPU = NewTensor(indexOrder, indexDimSize, X_INT, 1.0F, 0);
/* initialize variables */
sGPU1->SetData(sData, sUnitNum);
sGPU2->SetData(sData, sUnitNum);
tGPU->SetData(tData, tUnitNum);
sIndexGPU->SetData(srcIndex, indexSize);
tIndexGPU->SetData(tgtIndex, indexSize);
/* call _SpreadForGather function */
_SpreadForCopyIndexed(sGPU1, tGPU, dim, sIndexGPU, tIndexGPU, 1);
_SpreadForCopyIndexed(sGPU2, tGPU, dim, sIndexGPU, tIndexGPU, 1);
/* check results */
gpuTest = sGPU1->CheckData(answer, sUnitNum) &&
sGPU2->CheckData(answer, sUnitNum);
/* destroy variables */
delete s1;
delete s2;
delete t;
delete sIndex;
delete tIndex;
delete sGPU1;
delete sGPU2;
delete tGPU;
delete sIndexGPU;
delete tIndexGPU;
delete[] sDimSize;
delete[] tDimSize;
delete[] indexDimSize;
return cpuTest && gpuTest;
#else
/* destroy variables */
delete s1;
delete s2;
delete t;
delete sIndex;
delete tIndex;
delete[] sDimSize;
delete[] tDimSize;
delete[] indexDimSize;
......@@ -286,6 +421,24 @@ bool TestSpread()
else
XPRINT(0, stdout, ">> case 1 passed!\n");
/* case 1 test */
caseFlag = TestSpread2();
if (!caseFlag) {
returnFlag = false;
XPRINT(0, stdout, ">> case 2 failed!\n");
}
else
XPRINT(0, stdout, ">> case 2 passed!\n");
/* case 1 test */
caseFlag = TestSpread3();
if (!caseFlag) {
returnFlag = false;
XPRINT(0, stdout, ">> case 3 failed!\n");
}
else
XPRINT(0, stdout, ">> case 3 passed!\n");
/* other cases test */
/*
TODO!!
......
......@@ -17,9 +17,11 @@
/*
* $Created by: Xu Chen (email: hello_master1954@163.com) 2018-08-01
* $Update by: Lin Ye (email: linye2015@outlook.com) 2019-07-24 float16 added
*/
#include "TSub.h"
#include "../core/getandset/ConvertDataType.h"
namespace nts { // namespace nts(NiuTrans.Tensor)
......@@ -214,6 +216,177 @@ bool TestSub2()
#endif // USE_CUDA
}
/* case 3: float16 tensor subtraction c = a - b * \beta */
bool TestSub3()
{
/* a tensor of size (2, 4) */
int order = 2;
int * dimSize = new int[order];
dimSize[0] = 2;
dimSize[1] = 4;
int unitNum = 1;
for (int i = 0; i < order; i++)
unitNum *= dimSize[i];
DTYPE aData[2][4] = { {0.0F, 1.0F, 2.0F, 3.0F},
{4.0F, 5.0F, 6.0F, 7.0F} };
DTYPE bData[2][4] = { {1.0F, -1.0F, -3.0F, -5.0F},
{-7.0F, -9.0F, -11.0F, -13.0F} };
DTYPE answer[2][4] = { {-1.0F, 2.0F, 5.0F, 8.0F},
{11.0F, 14.0F, 17.0F, 20.0F} };
/* CPU test */
bool cpuTest = true;
#ifdef USE_CUDA
/* GPU test */
bool gpuTest = true;
/* create tensor */
XTensor * aGPU = NewTensor(order, dimSize, X_FLOAT, 1.0F, 0);
XTensor * bGPU = NewTensor(order, dimSize, X_FLOAT, 1.0F, 0);
XTensor * cGPU = NewTensor(order, dimSize, X_FLOAT, 1.0F, 0);
XTensor * cMeGPU = NewTensor(order, dimSize, X_FLOAT, 1.0F, 0);
XTensor cUserGPU;
/* create float16 tensor */
XTensor aHalfGPU;
XTensor bHalfGPU;
XTensor cHalfGPU;
XTensor cMeHalfGPU;
XTensor cUserHalfGPU;
/* Initialize variables */
aGPU->SetData(aData, unitNum);
cMeGPU->SetData(aData, unitNum);
bGPU->SetData(bData, unitNum);
cGPU->SetZeroAll();
/* convert data type from float to float16 */
aHalfGPU = ConvertDataType(*aGPU, X_FLOAT16);
bHalfGPU = ConvertDataType(*bGPU, X_FLOAT16);
cHalfGPU = ConvertDataType(*cGPU, X_FLOAT16);
cMeHalfGPU = ConvertDataType(*cMeGPU, X_FLOAT16);
/* call Sub function */
_Sub(&aHalfGPU, &bHalfGPU, &cHalfGPU);
_SubMe(&cMeHalfGPU, &bHalfGPU);
cUserHalfGPU = Sub(aHalfGPU, bHalfGPU);
/* convert data type from float16 to float */
_ConvertDataType(&cHalfGPU, cGPU);
_ConvertDataType(&cMeHalfGPU, cMeGPU);
cUserGPU = ConvertDataType(cUserHalfGPU, X_FLOAT);
/* check results */
gpuTest = cGPU->CheckData(answer, unitNum, 1e-4F) &&
cMeGPU->CheckData(answer, unitNum, 1e-4F) &&
cUserGPU.CheckData(answer, unitNum, 1e-4F);
/* destroy variables */
delete aGPU;
delete bGPU;
delete cGPU;
delete cMeGPU;
delete[] dimSize;
return cpuTest && gpuTest;
#else
/* destroy variables */
delete[] dimSize;
return cpuTest;
#endif // USE_CUDA
}
/* case 4: float16 tensor subtraction c = a - b * \beta */
bool TestSub4()
{
/* a tensor of size (2, 4) */
int order = 2;
int * dimSize = new int[order];
dimSize[0] = 2;
dimSize[1] = 4;
int unitNum = 1;
for (int i = 0; i < order; i++) {
unitNum *= dimSize[i];
}
DTYPE aData[2][4] = { {0.0F, 1.0F, 2.0F, 3.0F},
{4.0F, 5.0F, 6.0F, 7.0F} };
DTYPE bData[2][4] = { {1.0F, -1.0F, -3.0F, -5.0F},
{-7.0F, -9.0F, -11.0F, -13.0F} };
DTYPE answer[2][4] = { {-0.5F, 1.5F, 3.5F, 5.5F},
{7.5F, 9.5F, 11.5F, 13.5F} };
float beta = 0.5F;
/* CPU test */
bool cpuTest = true;
#ifdef USE_CUDA
/* GPU test */
bool gpuTest = true;
/* create tensor */
XTensor * aGPU = NewTensor(order, dimSize, X_FLOAT, 1.0F, 0);
XTensor * bGPU = NewTensor(order, dimSize, X_FLOAT, 1.0F, 0);
XTensor * cGPU = NewTensor(order, dimSize, X_FLOAT, 1.0F, 0);
XTensor * cMeGPU = NewTensor(order, dimSize, X_FLOAT, 1.0F, 0);
XTensor cUserGPU;
/* create float16 tensor */
XTensor aHalfGPU;
XTensor bHalfGPU;
XTensor cHalfGPU;
XTensor cMeHalfGPU;
XTensor cUserHalfGPU;
/* Initialize variables */
aGPU->SetData(aData, unitNum);
cMeGPU->SetData(aData, unitNum);
bGPU->SetData(bData, unitNum);
cGPU->SetZeroAll();
/* convert data type from float to float16 */
aHalfGPU = ConvertDataType(*aGPU, X_FLOAT16);
bHalfGPU = ConvertDataType(*bGPU, X_FLOAT16);
cHalfGPU = ConvertDataType(*cGPU, X_FLOAT16);
cMeHalfGPU = ConvertDataType(*cMeGPU, X_FLOAT16);
/* call Sub function */
_Sub(&aHalfGPU, &bHalfGPU, &cHalfGPU, beta);
_SubMe(&cMeHalfGPU, &bHalfGPU, beta);
cUserHalfGPU = Sub(aHalfGPU, bHalfGPU, beta);
/* convert data type from float16 to float */
_ConvertDataType(&cHalfGPU, cGPU);
_ConvertDataType(&cMeHalfGPU, cMeGPU);
cUserGPU = ConvertDataType(cUserHalfGPU, X_FLOAT);
/* check results */
gpuTest = cGPU->CheckData(answer, unitNum, 1e-4F) &&
cMeGPU->CheckData(answer, unitNum, 1e-4F) &&
cUserGPU.CheckData(answer, unitNum, 1e-4F);
/* destroy variables */
delete aGPU;
delete bGPU;
delete cGPU;
delete cMeGPU;
delete[] dimSize;
return cpuTest && gpuTest;
#else
/* destroy variables */
delete[] dimSize;
return cpuTest;
#endif // USE_CUDA
}
/* other cases */
/*
TODO!!
......@@ -243,6 +416,24 @@ bool TestSub()
else
XPRINT(0, stdout, ">> case 2 passed!\n");
/* case 3 test */
caseFlag = TestSub3();
if (!caseFlag) {
returnFlag = false;
XPRINT(0, stdout, ">> case 3 failed!\n");
}
else
XPRINT(0, stdout, ">> case 3 passed!\n");
/* case 4 test */
caseFlag = TestSub4();
if (!caseFlag) {
returnFlag = false;
XPRINT(0, stdout, ">> case 4 failed!\n");
}
else
XPRINT(0, stdout, ">> case 4 passed!\n");
/* other cases test */
/*
TODO!!
......
......@@ -17,11 +17,13 @@
/*
* $Created by: Lin Ye (email: linye2015@outlook.com) 2018-08-13
* $Update by: Lin Ye (email: linye2015@outlook.com) 2019-07-24 float16 added
*/
#include "TSubDim.h"
#include "../core/arithmetic/SubDim.h"
#include "../XTensor.h"
#include "../core/getandset/ConvertDataType.h"
namespace nts { // namespace nts(NiuTrans.Tensor)
......@@ -249,6 +251,206 @@ bool TestSubDim2()
#endif // USE_CUDA
}
/*
case 3: float16 tensor subtraction c = a - b * \beta
where the size of b is equal to the n-th dimension of a,
i.e., a is subtracted with b by broadcasting
*/
bool TestSubDim3()
{
/* a tensor of size (2, 4) */
int aOrder = 2;
int * aDimSize = new int[aOrder];
aDimSize[0] = 2;
aDimSize[1] = 4;
int aUnitNum = 1;
for (int i = 0; i < aOrder; i++)
aUnitNum *= aDimSize[i];
/* a tensor of size (2) */
int bOrder = 1;
int * bDimSize = new int[bOrder];
bDimSize[0] = 2;
int bUnitNum = 1;
for (int i = 0; i < bOrder; i++)
bUnitNum *= bDimSize[i];
DTYPE aData[2][4] = { {0.0F, 1.0F, 2.0F, 3.0F},
{4.0F, 5.0F, 6.0F, 7.0F} };
DTYPE bData[2] = {1.0F, -1.0F};
DTYPE answer[2][4] = { {-1.0F, 0.0F, 1.0F, 2.0F},
{5.0F, 6.0F, 7.0F, 8.0F} };
/* CPU test */
bool cpuTest = true;
#ifdef USE_CUDA
/* GPU test */
bool gpuTest = true;
/* create tensor */
XTensor * aGPU = NewTensor(aOrder, aDimSize, X_FLOAT, 1.0F, 0);
XTensor * bGPU = NewTensor(bOrder, bDimSize, X_FLOAT, 1.0F, 0);
XTensor * cGPU = NewTensor(aOrder, aDimSize, X_FLOAT, 1.0F, 0);
XTensor * cMeGPU = NewTensor(aOrder, aDimSize, X_FLOAT, 1.0F, 0);
XTensor cUserGPU;
/* create float16 tensor */
XTensor aHalfGPU;
XTensor bHalfGPU;
XTensor cHalfGPU;
XTensor cMeHalfGPU;
XTensor cUserHalfGPU;
/* Initialize variables */
aGPU->SetData(aData, aUnitNum);
cMeGPU->SetData(aData, aUnitNum);
bGPU->SetData(bData, bUnitNum);
cGPU->SetZeroAll();
/* convert data type from float to float16 */
aHalfGPU = ConvertDataType(*aGPU, X_FLOAT16);
bHalfGPU = ConvertDataType(*bGPU, X_FLOAT16);
cHalfGPU = ConvertDataType(*cGPU, X_FLOAT16);
cMeHalfGPU = ConvertDataType(*cMeGPU, X_FLOAT16);
/* call sub function */
_SubDim(&aHalfGPU, &bHalfGPU, &cHalfGPU, 0);
_SubDim(&cMeHalfGPU, &bHalfGPU, 0);
cUserHalfGPU = SubDim(aHalfGPU, bHalfGPU, 0);
/* convert data type from float16 to float */
_ConvertDataType(&cHalfGPU, cGPU);
_ConvertDataType(&cMeHalfGPU, cMeGPU);
cUserGPU = ConvertDataType(cUserHalfGPU, X_FLOAT);
/* check results */
gpuTest = cGPU->CheckData(answer, aUnitNum) &&
cMeGPU->CheckData(answer, aUnitNum) &&
cUserGPU.CheckData(answer, aUnitNum);
/* destroy variables */
delete aGPU;
delete bGPU;
delete cGPU;
delete cMeGPU;
delete[] aDimSize;
delete[] bDimSize;
return cpuTest && gpuTest;
#else
/* destroy variables */
delete[] aDimSize;
delete[] bDimSize;
return cpuTest;
#endif // USE_CUDA
}
/*
case 4: float16 tensor subtraction c = a - b * \beta
where the size of b is equal to the n-th dimension of a,
i.e., a is subtracted with b by broadcasting
*/
bool TestSubDim4()
{
/* a tensor of size (2, 4) */
int aOrder = 2;
int * aDimSize = new int[aOrder];
aDimSize[0] = 2;
aDimSize[1] = 4;
int aUnitNum = 1;
for (int i = 0; i < aOrder; i++)
aUnitNum *= aDimSize[i];
/* a tensor of size (2, 2) */
int bOrder = 2;
int * bDimSize = new int[bOrder];
bDimSize[0] = 2;
bDimSize[1] = 2;
int bUnitNum = 1;
for (int i = 0; i < bOrder; i++)
bUnitNum *= bDimSize[i];
DTYPE aData[2][4] = { {0.0F, 1.0F, 2.0F, 3.0F},
{4.0F, 5.0F, 6.0F, 7.0F} };
DTYPE bData[2][2] = { {1.0F, -1.0F},
{-1.0F, 1.0F} };
DTYPE answer[2][4] = { {-1.0F, 2.0F, 3.0F, 2.0F},
{3.0F, 6.0F, 7.0F, 6.0F} };
/* CPU test */
bool cpuTest = true;
#ifdef USE_CUDA
/* GPU test */
bool gpuTest = true;
/* create tensor */
XTensor * aGPU = NewTensor(aOrder, aDimSize, X_FLOAT, 1.0F, 0);
XTensor * bGPU = NewTensor(bOrder, bDimSize, X_FLOAT, 1.0F, 0);
XTensor * cGPU = NewTensor(aOrder, aDimSize, X_FLOAT, 1.0F, 0);
XTensor * cMeGPU = NewTensor(aOrder, aDimSize, X_FLOAT, 1.0F, 0);
XTensor cUserGPU;
/* create float16 tensor */
XTensor aHalfGPU;
XTensor bHalfGPU;
XTensor cHalfGPU;
XTensor cMeHalfGPU;
XTensor cUserHalfGPU;
/* Initialize variables */
aGPU->SetData(aData, aUnitNum);
cMeGPU->SetData(aData, aUnitNum);
bGPU->SetData(bData, bUnitNum);
cGPU->SetZeroAll();
/* convert data type from float to float16 */
aHalfGPU = ConvertDataType(*aGPU, X_FLOAT16);
bHalfGPU = ConvertDataType(*bGPU, X_FLOAT16);
cHalfGPU = ConvertDataType(*cGPU, X_FLOAT16);
cMeHalfGPU = ConvertDataType(*cMeGPU, X_FLOAT16);
/* call sub function */
_SubDim(&aHalfGPU, &bHalfGPU, &cHalfGPU, 1);
_SubDim(&cMeHalfGPU, &bHalfGPU, 1);
cUserHalfGPU = SubDim(aHalfGPU, bHalfGPU, 1);
/* convert data type from float16 to float */
_ConvertDataType(&cHalfGPU, cGPU);
_ConvertDataType(&cMeHalfGPU, cMeGPU);
cUserGPU = ConvertDataType(cUserHalfGPU, X_FLOAT);
/* check results */
gpuTest = cGPU->CheckData(answer, aUnitNum) &&
cMeGPU->CheckData(answer, aUnitNum) &&
cUserGPU.CheckData(answer, aUnitNum);
/* destroy variables */
delete aGPU;
delete bGPU;
delete cGPU;
delete cMeGPU;
delete[] aDimSize;
delete[] bDimSize;
return cpuTest && gpuTest;
#else
/* destroy variables */
delete[] aDimSize;
delete[] bDimSize;
return cpuTest;
#endif // USE_CUDA
}
/* other cases */
/*
TODO!!
......@@ -278,6 +480,24 @@ bool TestSubDim()
else
XPRINT(0, stdout, ">> case 2 passed!\n");
/* case 3 test */
caseFlag = TestSubDim3();
if (!caseFlag) {
returnFlag = false;
XPRINT(0, stdout, ">> case 3 failed!\n");
}
else
XPRINT(0, stdout, ">> case 3 passed!\n");
/* case 4 test */
caseFlag = TestSubDim4();
if (!caseFlag) {
returnFlag = false;
XPRINT(0, stdout, ">> case 4 failed!\n");
}
else
XPRINT(0, stdout, ">> case 4 passed!\n");
/* other cases test */
/*
TODO!!
......
......@@ -23,6 +23,7 @@
#define __TEST_SUM_H__
#include "../core/arithmetic/Sum.h"
#include "../core/getandset/ConvertDataType.h"
namespace nts { // namespace nts(NiuTrans.Tensor)
......
......@@ -29,65 +29,67 @@ bool Test()
bool wrong = false;
XPRINT(0, stdout, "Testing the XTensor utilites ... \n\n");
wrong = !TestAbsolute() || wrong;
wrong = !TestClip() || wrong;
wrong = !TestCompare() || wrong;
wrong = !TestConcatenate() || wrong;
wrong = !TestConcatenateSolely() || wrong;
wrong = !TestCos() || wrong;
//wrong = !TestAbsolute() || wrong;
//wrong = !TestClip() || wrong;
//wrong = !TestCompare() || wrong;
//wrong = !TestConcatenate() || wrong;
//wrong = !TestConcatenateSolely() || wrong;
//wrong = !TestCos() || wrong;
//wrong = !TestConvertDataType() || wrong;
wrong = !TestCopyIndexed() || wrong;
wrong = !TestCopyValues() || wrong;
wrong = !TestDiv() || wrong;
wrong = !TestDivDim() || wrong;
wrong = !TestExp() || wrong;
wrong = !TestGather() || wrong;
wrong = !TestLog() || wrong;
wrong = !TestMatrixMul() || wrong;
wrong = !TestMatrixMul2D() || wrong;
wrong = !TestMatrixMul2DParallel() || wrong;
wrong = !TestMatrixMulBatched() || wrong;
wrong = !TestMerge() || wrong;
wrong = !TestMultiply() || wrong;
wrong = !TestMultiplyDim() || wrong;
wrong = !TestNegate() || wrong;
wrong = !TestNormalize() || wrong;
wrong = !TestPower() || wrong;
wrong = !TestReduceMax() || wrong;
wrong = !TestReduceMean() || wrong;
wrong = !TestReduceSum() || wrong;
wrong = !TestReduceSumAll() || wrong;
wrong = !TestReduceSumSquared() || wrong;
wrong = !TestReduceVariance() || wrong;
wrong = !TestRound() || wrong;
wrong = !TestScaleAndShift() || wrong;
wrong = !TestSelect() || wrong;
wrong = !TestSetAscendingOrder() || wrong;
wrong = !TestSetData() || wrong;
wrong = !TestSign() || wrong;
wrong = !TestSin() || wrong;
wrong = !TestSort() || wrong;
wrong = !TestSplit() || wrong;
wrong = !TestSub() || wrong;
wrong = !TestSum() || wrong;
wrong = !TestSumByColumnTV() || wrong;
wrong = !TestSumByColumnVT() || wrong;
//wrong = !TestCopyIndexed() || wrong;
//wrong = !TestCopyValues() || wrong;
//wrong = !TestDiv() || wrong;
//wrong = !TestDivDim() || wrong;
//wrong = !TestExp() || wrong;
//wrong = !TestGather() || wrong;
//wrong = !TestLog() || wrong;
//wrong = !TestMatrixMul() || wrong;
//wrong = !TestMatrixMul2D() || wrong;
//wrong = !TestMatrixMul2DParallel() || wrong;
//wrong = !TestMatrixMulBatched() || wrong;
//wrong = !TestMerge() || wrong;
//wrong = !TestMultiply() || wrong;
//wrong = !TestMultiplyDim() || wrong;
//wrong = !TestNegate() || wrong;
//wrong = !TestNormalize() || wrong;
//wrong = !TestPower() || wrong;
//wrong = !TestReduceMax() || wrong;
//wrong = !TestReduceMean() || wrong;
//wrong = !TestReduceSum() || wrong;
//wrong = !TestReduceSumAll() || wrong;
//wrong = !TestReduceSumSquared() || wrong;
//wrong = !TestReduceVariance() || wrong;
//wrong = !TestRound() || wrong;
//wrong = !TestScaleAndShift() || wrong;
//wrong = !TestSelect() || wrong;
//wrong = !TestSetAscendingOrder() || wrong;
//wrong = !TestSetData() || wrong;
//wrong = !TestSign() || wrong;
//wrong = !TestSin() || wrong;
//wrong = !TestSort() || wrong;
//wrong = !TestSplit() || wrong;
//wrong = !TestSpread() || wrong;
//wrong = !TestSub() || wrong;
//wrong = !TestSubDim() || wrong;
//wrong = !TestSum() || wrong;
//wrong = !TestSumByColumnTV() || wrong;
//wrong = !TestSumByColumnVT() || wrong;
wrong = !TestSumDim() || wrong;
wrong = !TestTan() || wrong;
wrong = !TestTranspose() || wrong;
//wrong = !TestTan() || wrong;
//wrong = !TestTranspose() || wrong;
//wrong = !TestTopK() || wrong;
wrong = !TestUnsqueeze() || wrong;
wrong = !TestXMem() || wrong;
//wrong = !TestUnsqueeze() || wrong;
//wrong = !TestXMem() || wrong;
wrong = !TestCrossEntropy() || wrong;
wrong = !TestDropout() || wrong;
wrong = !TestHardTanH() || wrong;
wrong = !TestIdentity() || wrong;
wrong = !TestLogSoftmax() || wrong;
wrong = !TestLoss() || wrong;
wrong = !TestRectify() || wrong;
wrong = !TestSigmoid() || wrong;
wrong = !TestSoftmax() || wrong;
//wrong = !TestCrossEntropy() || wrong;
//wrong = !TestDropout() || wrong;
//wrong = !TestHardTanH() || wrong;
//wrong = !TestIdentity() || wrong;
//wrong = !TestLogSoftmax() || wrong;
//wrong = !TestLoss() || wrong;
//wrong = !TestRectify() || wrong;
//wrong = !TestSigmoid() || wrong;
//wrong = !TestSoftmax() || wrong;
/* other test */
/*
......
......@@ -61,7 +61,9 @@
#include "TSin.h"
#include "TSort.h"
#include "TSplit.h"
#include "TSpread.h"
#include "TSub.h"
#include "TSubDim.h"
#include "TSum.h"
#include "TSumByColumnTV.h"
#include "TSumByColumnVT.h"
......
Markdown 格式
0%
您添加了 0 到此讨论。请谨慎行事。
请先完成此评论的编辑!
注册 或者 后发表评论