Commit fc5a630a by ltb

main test

parent 3187918c
......@@ -50,6 +50,13 @@ void ClipFP16Test();
void ScaleAndShiftFP16Test();
void InitTensorFP16Test();
void MultiplyDimTime();
void TimeTestGemm();
void TimeTest();
void TimeInt8AndFloat32();
void TestCPUhalf();
using namespace nts;
using namespace fnnlm;
using namespace transformer;
......@@ -109,6 +116,373 @@ int main(int argc, const char ** argv )
return 0;
}
void TestCPUhalf() {
int memSize = 1024;
int devId = 0;
int dim1 = 1024;
int dim2 = 32;
XMem * mem;
mem = new XMem(devId, FREE_ON_THE_FLY, (MTYPE)MILLION * 256, 1024, MILLION * 128);
mem->SetDesiredSize(devId, 0, (MTYPE)memSize * MILLION);
XTensor a;
XTensor b;
XTensor c;
//XMem *mem = new XMem(devId, FREE_ON_THE_FLY, 128 * MILLION, 1024, 128 * MILLION);
//mem->SetDesiredSize(0,0,memSize*MILLION);
InitTensor2D(&a, dim1, dim1, X_FLOAT, devId);
InitTensor2D(&b, dim2, dim2, X_FLOAT, devId);
InitTensor2D(&c, dim1, dim1, X_FLOAT, devId);
}
void TimeInt8AndFloat32() {
XMem * mem;
int memSize = 1024;
int devId = 2;
int dim = 512;
mem = new XMem(devId, FREE_ON_THE_FLY, (MTYPE)MILLION * 256, 1024, MILLION * 128);
mem->SetDesiredSize(devId, 0, (MTYPE)memSize * MILLION);
XTensor a;
XTensor b;
XTensor c;
InitTensor2D(&a, dim, dim, X_FLOAT, devId, mem);
InitTensor2D(&b, dim, dim, X_FLOAT, devId, mem);
InitTensor2D(&c, dim, dim, X_FLOAT, devId, mem);
a.SetDataRand(-1.0F, 1.0F);
b.SetDataRand(-1.0F, 1.0F);
XTensor inta;
XTensor intb;
XTensor intc;
InitTensor2D(&inta, dim, dim, X_INT, devId, mem);
InitTensor2D(&intb, dim, dim, X_INT, devId, mem);
InitTensor2D(&intc, dim, dim, X_FLOAT, devId, mem);
XTensor tmp;
InitTensor2D(&tmp, dim, dim, X_FLOAT, devId, mem);
tmp.SetDataRand(-100000.0F, 100000.0F);
inta = ConvertDataType(tmp, X_INT8);
intb = ConvertDataType(tmp, X_INT8);
int repeat = 10000;
printf("test on matrixmul\n");
double start_matrixmul32 = GetClockSec();
for (int i = 0; i < repeat; i++) {
_MatrixMul(&a, X_NOTRANS, &b, X_NOTRANS, &c);
}
double elapsed_matrixmul32 = GetClockSec() - start_matrixmul32;
printf("elapsed_matrixmul32=%.2fs\n", elapsed_matrixmul32);
double start_int8 = GetClockSec();
for (int i = 0; i < repeat; i++) {
_MatrixMul(&inta, X_NOTRANS, &intb, X_NOTRANS, &intc);
}
double elapsed_int8 = GetClockSec() - start_int8;
printf("elapsed_int8=%.2fs\n", elapsed_int8);
}
void TimeTest() {
XMem * mem;
int memSize = 1024;
int devId = 0;
int dim = 512;
mem = new XMem(devId, FREE_ON_THE_FLY, (MTYPE)MILLION * 256, 1024, MILLION * 128);
mem->SetDesiredSize(devId, 0, (MTYPE)memSize * MILLION);
XTensor a;
XTensor b;
XTensor c;
XTensor halfa;
XTensor halfb;
XTensor halfc;
InitTensor2D(&a, dim, dim, X_FLOAT, devId, mem);
InitTensor2D(&b, dim, dim, X_FLOAT, devId, mem);
InitTensor2D(&c, dim, dim, X_FLOAT, devId, mem);
InitTensor2D(&halfc, dim, dim, X_FLOAT16, devId, mem);
a.SetDataRand(-1.0F, 1.0F);
b.SetDataRand(-1.0F, 1.0F);
halfa = ConvertDataType(a, X_FLOAT16);
halfb = ConvertDataType(b, X_FLOAT16);
int repeat = 100000;
printf("=========================================\n");
printf("test on sum\n");
double start_sum32 = GetClockSec();
for (int i = 0; i < repeat; i++) {
c = Sum(&a, &b);
}
double elapsed_sum32 = GetClockSec() - start_sum32;
printf("elapsed_sum32=%.2fs\n", elapsed_sum32);
double start_sum16 = GetClockSec();
for (int i = 0; i < repeat; i++) {
halfc = Sum(&halfa, &halfb);
}
double elapsed_sum16 = GetClockSec() - start_sum16;
printf("elapsed_sum16=%.2fs\n", elapsed_sum16);
printf("=========================================\n");
/*printf("test on sub\n");
double start_sub32 = GetClockSec();
for (int i = 0; i < repeat; i++) {
c = Sub(&a, &b);
}
double elapsed_sub32 = GetClockSec() - start_sub32;
printf("elapsed_sub32=%.2fs\n", elapsed_sub32);
double start_sub16 = GetClockSec();
for (int i = 0; i < repeat; i++) {
halfc = Sub(&halfa, &halfb);
}
double elapsed_sub16 = GetClockSec() - start_sub16;
printf("elapsed_sub16=%.2fs\n", elapsed_sub16);
printf("=========================================\n");*/
/*printf("test on div\n");
double start_div32 = GetClockSec();
for (int i = 0; i < repeat; i++) {
c = Div(&a, &b);
}
double elapsed_div32 = GetClockSec() - start_div32;
printf("elapsed_div32=%.2fs\n", elapsed_div32);
double start_div16 = GetClockSec();
for (int i = 0; i < repeat; i++) {
halfc = Div(&halfa, &halfb);
}
double elapsed_div16 = GetClockSec() - start_div16;
printf("elapsed_div16=%.2fs\n", elapsed_div16);
printf("=========================================\n");*/
/*printf("test on multiply\n");
double start_multiply32 = GetClockSec();
for (int i = 0; i < repeat; i++) {
c = Multiply(&a, &b);
}
double elapsed_multiply32 = GetClockSec() - start_multiply32;
printf("elapsed_multiply32=%.2fs\n", elapsed_multiply32);
double start_multiply16 = GetClockSec();
for (int i = 0; i < repeat; i++) {
halfc = Multiply(&halfa, &halfb);
}
double elapsed_multiply16 = GetClockSec() - start_multiply16;
printf("elapsed_multiply16=%.2fs\n", elapsed_multiply16);
printf("=========================================\n");*/
printf("test on scaleandshift\n");
double start_scaleandshift32 = GetClockSec();
for (int i = 0; i < repeat; i++) {
c = ScaleAndShift(&a, 1, 0);
}
double elapsed_scaleandshift32 = GetClockSec() - start_scaleandshift32;
printf("elapsed_scaleandshift32=%.2fs\n", elapsed_scaleandshift32);
double start_scaleandshift16 = GetClockSec();
for (int i = 0; i < repeat; i++) {
halfc = ScaleAndShift(&halfa, 1, 0);
}
double elapsed_scaleandshift16 = GetClockSec() - start_scaleandshift16;
printf("elapsed_scaleandshift16=%.2fs\n", elapsed_scaleandshift16);
printf("=========================================\n");
printf("test on reducesum\n");
double start_reducesum32 = GetClockSec();
for (int i = 0; i < repeat; i++) {
c = ReduceSum(&a, 1);
}
double elapsed_reducesum32 = GetClockSec() - start_reducesum32;
printf("elapsed_reducesum32=%.2fs\n", elapsed_reducesum32);
double start_reducesum16 = GetClockSec();
for (int i = 0; i < repeat; i++) {
halfc = ReduceSum(&halfa, 1);
}
double elapsed_reducesum16 = GetClockSec() - start_reducesum16;
printf("elapsed_reducesum16=%.2fs\n", elapsed_reducesum16);
printf("=========================================\n");
printf("test on reducemax\n");
double start_reducemax32 = GetClockSec();
for (int i = 0; i < repeat; i++) {
c = ReduceMax(&a, 1);
}
double elapsed_reducemax32 = GetClockSec() - start_reducemax32;
printf("elapsed_reducemax32=%.2fs\n", elapsed_reducemax32);
double start_reducemax16 = GetClockSec();
for (int i = 0; i < repeat; i++) {
halfc = ReduceMax(&halfa, 1);
}
double elapsed_reducemax16 = GetClockSec() - start_reducemax16;
printf("elapsed_reducemax16=%.2fs\n", elapsed_reducemax16);
printf("=========================================\n");
printf("test on logsoftmax\n");
double start_logsoftmax32 = GetClockSec();
for (int i = 0; i < repeat; i++) {
c = LogSoftmax(&a, 1);
}
double elapsed_logsoftmax32 = GetClockSec() - start_logsoftmax32;
printf("elapsed_logsoftmax32=%.2fs\n", elapsed_logsoftmax32);
double start_logsoftmax16 = GetClockSec();
for (int i = 0; i < repeat; i++) {
halfc = LogSoftmax(&halfa, 1);
}
double elapsed_logsoftmax16 = GetClockSec() - start_logsoftmax16;
printf("elapsed_logsoftmax16=%.2fs\n", elapsed_logsoftmax16);
printf("=========================================\n");
printf("test on matrixmul\n");
double start_matrixmul32 = GetClockSec();
for (int i = 0; i < repeat; i++) {
c = MatrixMul(&a, &b);
}
double elapsed_matrixmul32 = GetClockSec() - start_matrixmul32;
printf("elapsed_matrixmul32=%.2fs\n", elapsed_matrixmul32);
double start_matrixmul16 = GetClockSec();
for (int i = 0; i < repeat; i++) {
halfc = MatrixMul(&halfa, &halfb);
}
double elapsed_matrixmul16 = GetClockSec() - start_matrixmul16;
printf("elapsed_matrixmul16=%.2fs\n", elapsed_matrixmul16);
printf("=========================================\n");
printf("test on convert\n");
double start_convert32to16 = GetClockSec();
for (int i = 0; i < repeat; i++) {
halfa = ConvertDataType(a, X_FLOAT16);
}
double elapsed_convert32to16 = GetClockSec() - start_convert32to16;
printf("elapsed_convert32to16=%.2fs\n", elapsed_convert32to16);
double start_convert16to32 = GetClockSec();
for (int i = 0; i < repeat; i++) {
a = ConvertDataType(halfa, X_FLOAT);
}
double elapsed_convert16to32 = GetClockSec() - start_convert16to32;
printf("elapsed_convert16to32=%.2fs\n", elapsed_convert16to32);
printf("=========================================\n");
delete mem;
}
void MultiplyDimTime() {
int memSize = 1024;
int devId = 0;
int dim1 = 1024;
int dim2 = 32;
XMem * mem;
mem = new XMem(devId, FREE_ON_THE_FLY, (MTYPE)MILLION * 256, 1024, MILLION * 128);
mem->SetDesiredSize(devId, 0, (MTYPE)memSize * MILLION);
XTensor a;
XTensor b;
XTensor c;
//XMem *mem = new XMem(devId, FREE_ON_THE_FLY, 128 * MILLION, 1024, 128 * MILLION);
//mem->SetDesiredSize(0,0,memSize*MILLION);
InitTensor2D(&a, dim1, dim1, X_FLOAT, devId);
InitTensor2D(&b, dim2, dim2, X_FLOAT, devId);
InitTensor2D(&c, dim1, dim1, X_FLOAT, devId);
a.SetDataRand(-1.0F, 1.0F);
b.SetDataRandn(-1.0F, 1.0F);
int repeat = 2000;
printf("test on MultiplyDim\n");
double start = GetClockSec();
for (int j = 0; j <= repeat; j++) {
c = MultiplyDim(&a, &b, 0);
}
double elapsed = GetClockSec() - start;
printf("elapsed_MultiplyDim32=%.4fs \n", elapsed);
XTensor halfa;
XTensor halfb;
XTensor halfc;
InitTensor2D(&halfc, dim1, dim1, X_FLOAT16, devId, mem);
halfa = ConvertDataType(a, X_FLOAT16);
halfb = ConvertDataType(b, X_FLOAT16);
double starthalf = GetClockSec();
for (int i = 0; i < repeat; i++) {
halfc = MultiplyDim(&halfa, &halfb, 0);
}
double elapsedhalf = GetClockSec() - starthalf;
printf("elapsed_MultiplyDim16=%.4fs\n", elapsedhalf);
}
void TimeTestGemm() {
XMem * mem;
int memSize = 1024;
delete mem;
mem = new XMem(0, FREE_ON_THE_FLY, (MTYPE)MILLION * 256, 1024, MILLION * 128);
mem->SetDesiredSize(0, 0, (MTYPE)memSize * MILLION);
XTensor a;
XTensor b;
XTensor c;
XTensor halfa;
XTensor halfb;
XTensor halfc;
int dim1 = 512;
int dim2 = 1024;
//InitTensor3D(&a, 86, 48, 256, X_FLOAT, 0, mem);
//InitTensor2D(&b, 256, 256, X_FLOAT, 0, mem);
//InitTensor4D(&a, 8, 86, 48, 48, X_FLOAT, 0, mem);
//InitTensor4D(&b, 8, 86, 48, 32, X_FLOAT, 0, mem);
InitTensor2D(&a, dim1, dim2, X_FLOAT, 0, mem);
InitTensor2D(&b, dim1, dim2, X_FLOAT, 0, mem);
//InitTensor4D(&a, 8, 86, 48, 32, X_FLOAT, 0);
//InitTensor4D(&b, 8, 86, 48, 32, X_FLOAT, 0);
a.SetDataRand(-1.0F, 1.0F);
b.SetDataRand(-1.0F, 1.0F);
halfa = ConvertDataType(a, X_FLOAT16);
halfb = ConvertDataType(b, X_FLOAT16);
//a.Dump(&a, stderr, "a:", 10);
//b.Dump(&b, stderr, "b:", 10);
//halfa.Dump(&a, stderr, "halfa:", 10);
//halfb.Dump(&b, stderr, "halfb:", 10);
int repeat = 10000;
printf("=========================================\n");
double start_matrixmul16 = GetClockSec();
for (int i = 0; i < repeat; i++) {
halfc = BMMul(halfa, X_NOTRANS, halfb, X_TRANS);
}
double elapsed_matrixmul16 = GetClockSec() - start_matrixmul16;
printf("elapsed_matrixmul16=%.4fs\n", elapsed_matrixmul16);
printf("------------------------------------------\n");
double start_matrixmul32 = GetClockSec();
for (int i = 0; i < repeat; i++) {
c = BMMul(a, X_NOTRANS, b, X_TRANS);
}
double elapsed_matrixmul32 = GetClockSec() - start_matrixmul32;
printf("elapsed_matrixmul32=%.4fs\n", elapsed_matrixmul32);
printf("=========================================\n");
c.Dump(&c, stderr, "c:", 10);
halfc.Dump(&halfc, stderr, "halfc:", 10);
}
void InitTensorFP16Test() {
XTensor a;
InitTensor2D(&a, 1, 10, X_FLOAT, 0);
......
Markdown 格式
0%
您添加了 0 到此讨论。请谨慎行事。
请先完成此评论的编辑!
注册 或者 后发表评论