Commit 100f4611 by liyinqiao

Bug fixed: 1. MatrixMul, Select, Sort, TopK, Loss; 2. Add other tests.

parent a3a7145f
......@@ -58,12 +58,12 @@ void MatrixMul(XTensor * a, MATRIX_TRANS_TYPE transposedA,
CheckNTErrors((a->order >= 2 && b->order >= 2 && c->order >= 2),
"Input tensors must have a order > 2!");
int an = transposedA == X_TRANS ? a->dimSize[1] : a->dimSize[0];
int am = transposedA == X_TRANS ? a->dimSize[0] : a->dimSize[1];
int bn = transposedB == X_TRANS ? b->dimSize[1] : b->dimSize[0];
int bm = transposedB == X_TRANS ? b->dimSize[0] : b->dimSize[1];
int cn = c->dimSize[0];
int cm = c->dimSize[1];
int an = transposedA == X_TRANS ? a->dimSizeRDI[0] : a->dimSizeRDI[1];
int am = transposedA == X_TRANS ? a->dimSizeRDI[1] : a->dimSizeRDI[0];
int bn = transposedB == X_TRANS ? b->dimSizeRDI[0] : b->dimSizeRDI[1];
int bm = transposedB == X_TRANS ? b->dimSizeRDI[1] : b->dimSizeRDI[0];
int cn = c->dimSizeRDI[1];
int cm = c->dimSizeRDI[0];
CheckNTErrors((am == bn && an == cn && bm == cm),
"Unmatched tensors in multiplication!");
......@@ -79,13 +79,13 @@ void MatrixMul(XTensor * a, MATRIX_TRANS_TYPE transposedA,
int cBlockNum = 1;
for (int i = 2; i < a->order; i++) {
CheckNTErrors((a->dimSizeRDI[i] == c->dimSizeRDI[i]), "Incorrect tensor sizes!");
CheckNTErrors((a->dimSizeRDI[i] == c->dimSizeRDI[i - 2 + b->order]), "Incorrect tensor sizes!");
aBlockNum *= a->dimSizeRDI[i];
cBlockNum *= a->dimSizeRDI[i];
}
for (int i = 2; i < b->order; i++) {
CheckNTErrors((b->dimSizeRDI[i] == c->dimSizeRDI[i - 2 + a->order]), "Incorrect tensor sizes!");
CheckNTErrors((b->dimSizeRDI[i] == c->dimSizeRDI[i]), "Incorrect tensor sizes!");
bBlockNum *= b->dimSizeRDI[i];
cBlockNum *= b->dimSizeRDI[i];
}
......@@ -93,9 +93,9 @@ void MatrixMul(XTensor * a, MATRIX_TRANS_TYPE transposedA,
XList * aList = new XList(10);
XList * bList = new XList(10);
XList * cList = new XList(10);
int aDimSize[2] = { -a->dimSize[0], a->dimSize[1] };
int bDimSize[2] = { -b->dimSize[0], b->dimSize[1] };
int cDimSize[2] = { -c->dimSize[0], c->dimSize[1] };
int aDimSize[2] = { a->dimSizeRDI[1], a->dimSizeRDI[0] };
int bDimSize[2] = { b->dimSizeRDI[1], b->dimSizeRDI[0] };
int cDimSize[2] = { c->dimSizeRDI[1], c->dimSizeRDI[0] };
bool isSparseMul = false;
......
......@@ -52,12 +52,12 @@ void MatrixMulBatched(XTensor * a, MATRIX_TRANS_TYPE transposedA,
CheckNTErrors((a->order >= 2 && b->order >= 2 && c->order >= 2),
"Input tensors must have a order > 2!");
int an = transposedA == X_TRANS ? a->dimSize[1] : a->dimSize[0];
int am = transposedA == X_TRANS ? a->dimSize[0] : a->dimSize[1];
int bn = transposedB == X_TRANS ? b->dimSize[1] : b->dimSize[0];
int bm = transposedB == X_TRANS ? b->dimSize[0] : b->dimSize[1];
int cn = c->dimSize[0];
int cm = c->dimSize[1];
int an = transposedA == X_TRANS ? a->dimSizeRDI[0] : a->dimSizeRDI[1];
int am = transposedA == X_TRANS ? a->dimSizeRDI[1] : a->dimSizeRDI[0];
int bn = transposedB == X_TRANS ? b->dimSizeRDI[0] : b->dimSizeRDI[1];
int bm = transposedB == X_TRANS ? b->dimSizeRDI[1] : b->dimSizeRDI[0];
int cn = c->dimSizeRDI[1];
int cm = c->dimSizeRDI[0];
CheckNTErrors((am == bn && an == cn && bm == cm),
"Unmatched tensors in multiplication!");
......@@ -79,9 +79,9 @@ void MatrixMulBatched(XTensor * a, MATRIX_TRANS_TYPE transposedA,
XList * aList = new XList(10);
XList * bList = new XList(10);
XList * cList = new XList(10);
int aDimSize[2] = { -a->dimSizeRDI[0], a->dimSizeRDI[1] };
int bDimSize[2] = { -b->dimSizeRDI[0], b->dimSizeRDI[1] };
int cDimSize[2] = { -c->dimSizeRDI[0], c->dimSizeRDI[1] };
int aDimSize[2] = { -a->dimSizeRDI[1], a->dimSizeRDI[0] };
int bDimSize[2] = { -b->dimSizeRDI[1], b->dimSizeRDI[0] };
int cDimSize[2] = { -c->dimSizeRDI[1], c->dimSizeRDI[0] };
for (int p = 0; p < blockNum; p++) {
void * ap = (char*)a->data + aRealBlockSize * p;
......@@ -106,8 +106,9 @@ void MatrixMulBatched(XTensor * a, MATRIX_TRANS_TYPE transposedA,
int devIDBackup;
ProtectCudaDev(a->devID, devIDBackup);
CudaBLASMatrixMULList(a->mem != NULL ? a->mem->GetCublasHandle() : GDevs.GetCudaHandle(a->devID),
aList, transposedA,
cublasHandle_t * handle = a->mem != NULL ? a->mem->GetCublasHandle() : GDevs.GetCudaHandle(a->devID);
CudaBLASMatrixMULList(handle,
aList, transposedA,
bList, transposedB,
cList, aList->count,
alpha, beta);
......
......@@ -47,23 +47,28 @@ void SelectRange(XTensor * a, int dim, int low, int high, XTensor * c)
for(int i = 0; i < a->order; i++){
if(i == dim){
CheckNTErrors(low > 0 && low < a->dimSize[dim], "Illegal range specified!");
CheckNTErrors(high > 0 && high < a->dimSize[dim], "Illegal range specified!");
CheckNTErrors(high > 0 && high <= a->dimSize[dim], "Illegal range specified!");
}
else{
CheckNTErrors(a->dimSize[i] == c->dimSize[i], "The size of the dimensions should be same!");
}
}
int dimRDI = a->order - dim - 1;
int stride = 1;
for(int i = 0; i < dim; i++)
for(int i = 0; i < dimRDI; i++)
stride *= a->dimSizeRDI[i];
int copyTimes = 1;
for (int i = dimRDI + 1; i < a->order; i++)
copyTimes *= a->dimSizeRDI[i];
int blockSize = stride * (high - low) * a->unitSize;
int stepSizeS = stride * a->dimSize[dim] * a->unitSize;
int stepSizeT = stride * c->dimSize[dim] * a->unitSize;
char * s = (char*)a->data + stride * low * a->unitSize;
char * t = (char*)c->data;
for(int i = 0; i < high - low; i++){
for(int i = 0; i < copyTimes; i++){
XMemCopy(t, c->devID, s, a->devID, blockSize);
s += stepSizeS;
t += stepSizeT;
......
......@@ -235,10 +235,6 @@ void CudaSortBig(XTensor * a, XTensor * b, XTensor * indexA, XTensor * indexB, i
int m = GetNextPower2(strideNum);
int n = stride * blockNum;
/* recheck */
/*void * buf = mem->AllocBuf(mem->devID, n * m * a->unitSize);
void * bufIndex = (indexA != NULL && indexB != NULL) ? mem->AllocBuf(mem->devID, n * m * sizeof(int)) : NULL;*/
/* change by liyinqiao */
void * buf = mem != NULL ? mem->AllocBuf(a->devID, n * m * a->unitSize) : XMemAlloc(a->devID, n * m * a->unitSize);
void * bufIndex = NULL;
if (indexA != NULL && indexB != NULL) {
......@@ -294,11 +290,6 @@ void CudaSortBig(XTensor * a, XTensor * b, XTensor * indexA, XTensor * indexB, i
KernelReorganizeBack<int> << <dim3(cudaGrids[1], cudaGrids[0]), dim3(cudaBlocks[1], cudaBlocks[0]) >> >
(bufIndex, indexB->data, m, n, stride, k, blockNum);
/* recheck */
/*mem->ReleaseBuf(mem->devID, n * m * a->unitSize);
if (indexA != NULL && indexB != NULL)
mem->ReleaseBuf(mem->devID, n * m * sizeof(int));*/
/* change by liyinqiao */
if (mem != NULL)
mem->ReleaseBuf(a->devID, n * m * a->unitSize);
else
......
......@@ -20,6 +20,7 @@
*/
#include "../XDevice.h"
#include "../XUtility.h"
#include "../XTensor.h"
#include "TopK.h"
#include "TopK.cuh"
......@@ -393,7 +394,7 @@ void CudaTopK(XTensor * a, XTensor * b, XTensor * index, int dim, int k)
int cudaGrids[3];
int cudaBlocks[3];
GDevs.GetCudaThread2D(a->mem->devID,
GDevs.GetCudaThread2D(a->devID,
workerNum, stride * blockNum, MAX_INT,
cudaGrids, cudaBlocks);
......@@ -434,14 +435,17 @@ void CudaTopK(XTensor * a, XTensor * b, XTensor * index, int dim, int k)
memcpy(dimSize, a->dimSize, sizeof(int) * a->order);
dimSize[0] = -dimSize[0];
XTensor * indexA = new XTensor(a->order, dimSize, X_INT, 1.0F, a->mem);
indexA->data = a->mem->AllocBuf(a->devID, a->unitNum * sizeof(int));
indexA->data = a->mem != NULL ? a->mem->AllocBuf(a->devID, a->unitNum * sizeof(int)) : XMemAlloc(a->devID, a->unitNum * sizeof(int));
/* make the index tensor */
indexA->SetAscendingOrder(dim);
CudaSortBig(a, b, indexA, index, dim, k);
a->mem->ReleaseBuf(a->devID, a->unitNum * sizeof(int));
if (a->mem != NULL)
a->mem->ReleaseBuf(a->devID, a->unitNum * sizeof(int));
else
XMemFree(a->devID, indexA->data);
delete indexA;
}
......
......@@ -374,15 +374,15 @@ void LossBackward(XTensor * dedy, XTensor * t, XTensor * y,
LOSS_FUNCTION_NAME LFName,
int leadDim, int tBeg, int tLen, int yBeg)
{
CheckNTErrors((tLen >= 0 && tLen < y->unitNum), "Illegal input length!");
CheckNTErrors((tLen < y->unitNum), "Illegal input length!");
CheckNTErrors((XTensor::IsIdentical(t, y)&& XTensor::IsIdentical(dedy, y)),
"The input tensors must be of the same size!");
CheckNTErrors((t->dimSizeRDI[0] == 1 && y->dimSizeRDI[0] == 1 && dedy->dimSizeRDI[1] == 1), "TODO!");
//CheckNTErrors((t->dimSizeRDI[0] == 1 && y->dimSizeRDI[0] == 1 && dedy->dimSizeRDI[0] == 1), "TODO!");
CheckNTErrors((t->order > leadDim && leadDim >= 0), "Illegal leading dimension!");
CheckNTErrors((t->dataType == DEFAULT_DTYPE && y->dataType == DEFAULT_DTYPE),
"TODO!");
int leadDimRDI = y->order - leadDim - 1;
int leadDimRDI = leadDim >= 0 ? y->order - leadDim - 1 : -1;
if(leadDimRDI < 0){
leadDimRDI = y->dimSizeRDI[y->order - 1];
tBeg = 0;
......
......@@ -24,8 +24,8 @@
namespace nts { // namespace nts(NiuTrans.Tensor)
/* case 1: matrix multiplication.
* In this case, a=(2, 3), b=(3, 2) -> c=(2, 2), transposedA=X_NOTRANS,
transposedB=X_NOTRANS.
* In this case, a=(2, 3), b=(3, 2) -> c=(2, 2),
* transposedA=X_NOTRANS, transposedB=X_NOTRANS.
*/
bool TestMatrixMul1()
{
......@@ -59,13 +59,13 @@ bool TestMatrixMul1()
for (int i = 0; i < tOrder; i++)
tUnitNum *= tDimSize[i];
DTYPE sData1[2][3] = { {1.0, 2.0, 3.0},
{-4.0, 5.0, 6.0} };
DTYPE sData2[3][2] = { {0.0, -1.0},
{1.0, 2.0},
{2.0, 1.0} };
DTYPE answer[2][2] = { {8.0, 6.0},
{17.0, 20.0} };
DTYPE sData1[2][3] = { {1.0F, 2.0F, 3.0F},
{-4.0F, 5.0F, 6.0F} };
DTYPE sData2[3][2] = { {0.0F, -1.0F},
{1.0F, 2.0F},
{2.0F, 1.0F} };
DTYPE answer[2][2] = { {8.0F, 6.0F},
{17.0F, 20.0F} };
/* CPU test */
bool cpuTest = true;
......@@ -107,22 +107,33 @@ bool TestMatrixMul1()
gpuTest = tGPU->CheckData(answer, tUnitNum);
/* destroy variables */
delete s1, s2, t, sGPU1, sGPU2, tGPU;
delete[] sDimSize1, sDimSize2, tDimSize;
delete s1;
delete s2;
delete t;
delete sGPU1;
delete sGPU2;
delete tGPU;
delete[] sDimSize1;
delete[] sDimSize2;
delete[] tDimSize;
return cpuTest && gpuTest;
#else
/* destroy variables */
delete s1, s2, t;
delete[] sDimSize1, sDimSize2, tDimSize;
delete s1;
delete s2;
delete t;
delete[] sDimSize1;
delete[] sDimSize2;
delete[] tDimSize;
return cpuTest;
#endif // USE_CUDA
}
/* case 2: matrix multiplication.
* In this case, a=(3, 2), b=(3, 2) -> c=(2, 2), transposedA=X_TRANS,
transposedB=X_NOTRANS.
* In this case, a=(3, 2), b=(3, 2) -> c=(2, 2),
* transposedA=X_TRANS, transposedB=X_NOTRANS.
*/
bool TestMatrixMul2()
{
......@@ -136,7 +147,7 @@ bool TestMatrixMul2()
for (int i = 0; i < sOrder1; i++)
sUnitNum1 *= sDimSize1[i];
/* a source tensor of size (2, 3) */
/* a source tensor of size (3, 2) */
int sOrder2 = 2;
int * sDimSize2 = new int[sOrder2];
sDimSize2[0] = 3;
......@@ -156,14 +167,14 @@ bool TestMatrixMul2()
for (int i = 0; i < tOrder; i++)
tUnitNum *= tDimSize[i];
DTYPE sData1[3][2] = { {1.0, -4.0},
{2.0, 5.0},
{3.0, 6.0} };
DTYPE sData2[3][2] = { {0.0, -1.0},
{1.0, 2.0},
{2.0, 1.0} };
DTYPE answer[2][2] = { {8.0, 6.0},
{17.0, 20.0} };
DTYPE sData1[3][2] = { {1.0F, -4.0F},
{2.0F, 5.0F},
{3.0F, 6.0F} };
DTYPE sData2[3][2] = { {0.0F, -1.0F},
{1.0F, 2.0F},
{2.0F, 1.0F} };
DTYPE answer[2][2] = { {8.0F, 6.0F},
{17.0F, 20.0F} };
/* CPU test */
bool cpuTest = true;
......@@ -205,22 +216,33 @@ bool TestMatrixMul2()
gpuTest = tGPU->CheckData(answer, tUnitNum);
/* destroy variables */
delete s1, s2, t, sGPU1, sGPU2, tGPU;
delete[] sDimSize1, sDimSize2, tDimSize;
delete s1;
delete s2;
delete t;
delete sGPU1;
delete sGPU2;
delete tGPU;
delete[] sDimSize1;
delete[] sDimSize2;
delete[] tDimSize;
return cpuTest && gpuTest;
#else
/* destroy variables */
delete s1, s2, t;
delete[] sDimSize1, sDimSize2, tDimSize;
delete s1;
delete s2;
delete t;
delete[] sDimSize1;
delete[] sDimSize2;
delete[] tDimSize;
return cpuTest;
#endif // USE_CUDA
}
/* case 3: matrix multiplication.
* In this case, a=(3, 2, 3), b=(2, 3, 2) -> c=(3, 2, 2, 2), transposedA=X_NOTRANS,
transposedB=X_NOTRANS.
* In this case, a=(3, 2, 3), b=(2, 3, 2) -> c=(3, 2, 2, 2),
* transposedA=X_NOTRANS, transposedB=X_NOTRANS.
*/
bool TestMatrixMul3()
{
......@@ -258,20 +280,30 @@ bool TestMatrixMul3()
for (int i = 0; i < tOrder; i++)
tUnitNum *= tDimSize[i];
DTYPE sData1[3][2][3] = { { {0.0, -1.0, 2.0},
{2.0, 1.0, 3.0} },
{ {1.0, 2.0, 4.0},
{3.0, 1.0, 2.0}},
{ {-1.0, 3.0, 2.0},
{1.0, -1.0, 0.0} } };
DTYPE sData2[2][3][2] = { { {1.0, 2.0},
{-4.0, 3.0},
{2.0, 6.0} },
{ {1.0, 2.0},
{-4.0, 3.0},
{2.0, 6.0} } };
DTYPE answer[2][2] = { {8.0, 6.0},
{17.0, 20.0} };
DTYPE sData1[3][2][3] = { { {0.0F, -1.0F, 2.0},
{2.0F, 1.0F, 3.0} },
{ {1.0F, 2.0F, 4.0},
{3.0F, 1.0F, 2.0}},
{ {-1.0F, 3.0F, 2.0},
{1.0F, -1.0F, 0.0} } };
DTYPE sData2[2][3][2] = { { {1.0F, 2.0F},
{-4.0F, 3.0F},
{2.0F, 6.0F} },
{ {1.0F, 2.0F},
{3.0F, 4.0F},
{5.0F, 6.0F} } };
DTYPE answer[3][2][2][2] = { { { {8.0F, 9.0F},
{4.0F, 25.0F} },
{ {7.0F, 8.0F},
{20.0F, 26.0F} } },
{ { {1.0F, 32.0F},
{3.0F, 21.0F} },
{ {27.0F, 34.0F},
{16.0F, 22.0F} } },
{ { {-9.0F, 19.0F},
{5.0F, -1.0F} },
{ {18.0F, 22.0F},
{-2.0F, -2.0F} } } };
/* CPU test */
bool cpuTest = true;
......@@ -289,17 +321,123 @@ bool TestMatrixMul3()
/* call MatrixMul function */
MatrixMul(s1, X_NOTRANS, s2, X_NOTRANS, t);
XPRINT(0, stdout, "\ntarget data\n[");
DTYPE* check_data = (DTYPE*)t->data;
for (int i = 0; i < tUnitNum; i++)
printf("%f ", *check_data++);
printf("]\n");
/* check results */
cpuTest = t->CheckData(answer, tUnitNum);
int * size = new int(tOrder);
size = t->dimSize;
for (int i = 0; i < tOrder; i++) {
printf("size %d: %d\n", i, *size++);
}
#ifdef USE_CUDA
/* GPU test */
bool gpuTest = true;
/* create tensor */
XTensor * sGPU1 = NewTensor(sOrder1, sDimSize1, X_FLOAT, 1.0F, 0);
XTensor * sGPU2 = NewTensor(sOrder2, sDimSize2, X_FLOAT, 1.0F, 0);
XTensor * tGPU = NewTensor(tOrder, tDimSize, X_FLOAT, 1.0F, 0);
/* Initialize variables */
sGPU1->SetData(sData1, sUnitNum1);
sGPU2->SetData(sData2, sUnitNum2);
tGPU->SetZeroAll();
/* call MatrixMul function */
MatrixMul(sGPU1, X_NOTRANS, sGPU2, X_NOTRANS, tGPU);
/* check results */
gpuTest = tGPU->CheckData(answer, tUnitNum);
/* destroy variables */
delete s1;
delete s2;
delete t;
delete sGPU1;
delete sGPU2;
delete tGPU;
delete[] sDimSize1;
delete[] sDimSize2;
delete[] tDimSize;
return cpuTest && gpuTest;
#else
/* destroy variables */
delete s1;
delete s2;
delete t;
delete[] sDimSize1;
delete[] sDimSize2;
delete[] tDimSize;
return cpuTest;
#endif // USE_CUDA
}
/* case 4: matrix multiplication.
* In this case, a=(3, 2, 3), b=(3, 2) -> c=(3, 2, 2),
* transposedA=X_NOTRANS, transposedB=X_NOTRANS.
*/
bool TestMatrixMul4()
{
/* a source tensor of size (3, 2, 3) */
int sOrder1 = 3;
int * sDimSize1 = new int[sOrder1];
sDimSize1[0] = 3;
sDimSize1[1] = 2;
sDimSize1[2] = 3;
int sUnitNum1 = 1;
for (int i = 0; i < sOrder1; i++)
sUnitNum1 *= sDimSize1[i];
/* a source tensor of size (3, 2) */
int sOrder2 = 2;
int * sDimSize2 = new int[sOrder2];
sDimSize2[0] = 3;
sDimSize2[1] = 2;
int sUnitNum2 = 1;
for (int i = 0; i < sOrder2; i++)
sUnitNum2 *= sDimSize2[i];
/* a target tensor of size (3, 2, 2) */
int tOrder = 3;
int * tDimSize = new int[tOrder];
tDimSize[0] = 3;
tDimSize[1] = 2;
tDimSize[2] = 2;
int tUnitNum = 1;
for (int i = 0; i < tOrder; i++)
tUnitNum *= tDimSize[i];
DTYPE sData1[3][2][3] = { { {0.0F, -1.0F, 2.0F},
{2.0F, 1.0F, 3.0F} },
{ {1.0F, 2.0F, 4.0F},
{3.0F, 1.0F, 2.0F}},
{ {-1.0F, 3.0F, 2.0F},
{1.0F, -1.0F, 0.0F} } };
DTYPE sData2[3][2] = { {1.0F, 2.0F},
{3.0F, 4.0F},
{5.0F, 6.0F} };
DTYPE answer[3][2][2] = { { {7.0F, 8.0F},
{20.0F, 26.0F} },
{ {27.0F, 34.0F},
{16.0F, 22.0F} },
{ {18.0F, 22.0F},
{-2.0F, -2.0F} } };
/* CPU test */
bool cpuTest = true;
/* create tensors */
XTensor * s1 = NewTensor(sOrder1, sDimSize1);
XTensor * s2 = NewTensor(sOrder2, sDimSize2);
XTensor * t = NewTensor(tOrder, tDimSize);
/* initialize variables */
s1->SetData(sData1, sUnitNum1);
s2->SetData(sData2, sUnitNum2);
t->SetZeroAll();
/* call MatrixMul function */
MatrixMul(s1, X_NOTRANS, s2, X_NOTRANS, t);
/* check results */
cpuTest = t->CheckData(answer, tUnitNum);
......@@ -325,14 +463,25 @@ bool TestMatrixMul3()
gpuTest = tGPU->CheckData(answer, tUnitNum);
/* destroy variables */
delete s1, s2, t, sGPU1, sGPU2, tGPU;
delete[] sDimSize1, sDimSize2, tDimSize;
delete s1;
delete s2;
delete t;
delete sGPU1;
delete sGPU2;
delete tGPU;
delete[] sDimSize1;
delete[] sDimSize2;
delete[] tDimSize;
return cpuTest && gpuTest;
#else
/* destroy variables */
delete s1, s2, t;
delete[] sDimSize1, sDimSize2, tDimSize;
delete s1;
delete s2;
delete t;
delete[] sDimSize1;
delete[] sDimSize2;
delete[] tDimSize;
return cpuTest;
#endif // USE_CUDA
......@@ -348,7 +497,7 @@ bool TestMatrixMul3()
extern "C"
bool TestMatrixMul()
{
XPRINT(0, stdout, "[TEST MATRIXMUL] -------------\n");
XPRINT(0, stdout, "[TEST MATRIXMUL] matrix multiplication \n");
bool returnFlag = true, caseFlag = true;
/* case 1 test */
......@@ -370,14 +519,23 @@ bool TestMatrixMul()
else
XPRINT(0, stdout, ">> case 2 passed!\n");
///* case 3 test */
//caseFlag = TestMatrixMul3();
//if (!caseFlag) {
// returnFlag = false;
// XPRINT(0, stdout, ">> case 3 failed!\n");
//}
//else
// XPRINT(0, stdout, ">> case 3 passed!\n");
/* case 3 test */
caseFlag = TestMatrixMul3();
if (!caseFlag) {
returnFlag = false;
XPRINT(0, stdout, ">> case 3 failed!\n");
}
else
XPRINT(0, stdout, ">> case 3 passed!\n");
/* case 4 test */
caseFlag = TestMatrixMul4();
if (!caseFlag) {
returnFlag = false;
XPRINT(0, stdout, ">> case 4 failed!\n");
}
else
XPRINT(0, stdout, ">> case 4 passed!\n");
/* other cases test */
/*
......
......@@ -31,6 +31,7 @@ bool Test()
wrong = !TestConcatenate() || wrong;
wrong = !TestConcatenateSolely() || wrong;
wrong = !TestCopyValues() || wrong;
wrong = !TestMatrixMul() || wrong;
wrong = !TestMatrixMul2D() || wrong;
wrong = !TestMatrixMulBatchedCPU() || wrong;
......@@ -42,12 +43,19 @@ bool Test()
wrong = !TestReduceMax() || wrong;
wrong = !TestReduceMean() || wrong;
wrong = !TestReduceSum() || wrong;
wrong = !TestReduceSumSquared() || wrong;
wrong = !TestReduceVariance() || wrong;
wrong = !TestScaleAndShift() || wrong;
wrong = !TestSelect() || wrong;
wrong = !TestSort() || wrong;
wrong = !TestSplit() || wrong;
wrong = !TestSum() || wrong;
wrong = !TestTopK() || wrong;
wrong = !TestUnsqueeze() || wrong;
wrong = !TestXMem() || wrong;
//wrong = !TestHardTanH() || wrong;
wrong = !TestIdentity || wrong;
//wrong = !TestLoss() || wrong;
//wrong = !TestRectify() || wrong;
wrong = !TestSigmoid() || wrong;
......
......@@ -24,6 +24,7 @@
#include "TConcatenate.h"
#include "TConcatenateSolely.h"
#include "TCopyValues.h"
#include "TMatrixMul.h"
#include "TMatrixMul2D.h"
#include "TMatrixMULBatchedCPU.h"
......@@ -35,12 +36,19 @@
#include "TReduceMax.h"
#include "TReduceMean.h"
#include "TReduceSum.h"
#include "TReduceSumSquared.h"
#include "TReduceVariance.h"
#include "TScaleAndShift.h"
#include "TSelect.h"
#include "TSort.h"
#include "TSplit.h"
#include "TSum.h"
#include "TTopK.h"
#include "TUnsqueeze.h"
#include "TXMem.h"
#include "THardTanH.h"
#include "TIdentity.h"
#include "TLoss.h"
#include "TRectify.h"
#include "TSigmoid.h"
......
Markdown 格式
0%
您添加了 0 到此讨论。请谨慎行事。
请先完成此评论的编辑!
注册 或者 后发表评论