main test

fc5a630a · ltb · 3187918c · fc5a630a
Commit fc5a630a authored Jul 27, 2019 by ltb
--- a/source/network/Main.cpp
+++ b/source/network/Main.cpp
@@ -50,6 +50,13 @@ void ClipFP16Test();
 void ScaleAndShiftFP16Test();
 void InitTensorFP16Test();

+void MultiplyDimTime();
+void TimeTestGemm();
+void TimeTest();
+void TimeInt8AndFloat32();
+void TestCPUhalf();
+
+
 using namespace nts;
 using namespace fnnlm;
 using namespace transformer;
@@ -109,6 +116,373 @@ int main(int argc, const char ** argv )
    return 0;
 }

+void TestCPUhalf() {
+	int memSize = 1024;
+	int devId = 0;
+	int dim1 = 1024;
+	int dim2 = 32;
+	XMem * mem;
+	mem = new XMem(devId, FREE_ON_THE_FLY, (MTYPE)MILLION * 256, 1024, MILLION * 128);
+	mem->SetDesiredSize(devId, 0, (MTYPE)memSize * MILLION);
+
+	XTensor a;
+	XTensor b;
+	XTensor c;
+
+	//XMem *mem = new XMem(devId, FREE_ON_THE_FLY, 128 * MILLION, 1024, 128 * MILLION);
+	//mem->SetDesiredSize(0,0,memSize*MILLION);
+
+	InitTensor2D(&a, dim1, dim1, X_FLOAT, devId);
+	InitTensor2D(&b, dim2, dim2, X_FLOAT, devId);
+	InitTensor2D(&c, dim1, dim1, X_FLOAT, devId);
+
+}
+
+void TimeInt8AndFloat32() {
+	XMem * mem;
+	int memSize = 1024;
+	int devId = 2;
+	int dim = 512;
+	mem = new XMem(devId, FREE_ON_THE_FLY, (MTYPE)MILLION * 256, 1024, MILLION * 128);
+	mem->SetDesiredSize(devId, 0, (MTYPE)memSize * MILLION);
+
+	XTensor a;
+	XTensor b;
+	XTensor c;
+	InitTensor2D(&a, dim, dim, X_FLOAT, devId, mem);
+	InitTensor2D(&b, dim, dim, X_FLOAT, devId, mem);
+	InitTensor2D(&c, dim, dim, X_FLOAT, devId, mem);
+	a.SetDataRand(-1.0F, 1.0F);
+	b.SetDataRand(-1.0F, 1.0F);
+
+	XTensor inta;
+	XTensor intb;
+	XTensor intc;
+	InitTensor2D(&inta, dim, dim, X_INT, devId, mem);
+	InitTensor2D(&intb, dim, dim, X_INT, devId, mem);
+	InitTensor2D(&intc, dim, dim, X_FLOAT, devId, mem);
+
+	XTensor tmp;
+	InitTensor2D(&tmp, dim, dim, X_FLOAT, devId, mem);
+	tmp.SetDataRand(-100000.0F, 100000.0F);
+	inta = ConvertDataType(tmp, X_INT8);
+	intb = ConvertDataType(tmp, X_INT8);
+
+	int repeat = 10000;
+	printf("test on matrixmul\n");
+	double start_matrixmul32 = GetClockSec();
+	for (int i = 0; i < repeat; i++) {
+		_MatrixMul(&a, X_NOTRANS, &b, X_NOTRANS, &c);
+	}
+	double elapsed_matrixmul32 = GetClockSec() - start_matrixmul32;
+	printf("elapsed_matrixmul32=%.2fs\n", elapsed_matrixmul32);
+
+	double start_int8 = GetClockSec();
+	for (int i = 0; i < repeat; i++) {
+		_MatrixMul(&inta, X_NOTRANS, &intb, X_NOTRANS, &intc);
+	}
+	double elapsed_int8 = GetClockSec() - start_int8;
+	printf("elapsed_int8=%.2fs\n", elapsed_int8);
+
+
+}
+
+void TimeTest() {
+
+	XMem * mem;
+	int memSize = 1024;
+	int devId = 0;
+	int dim = 512;
+	mem = new XMem(devId, FREE_ON_THE_FLY, (MTYPE)MILLION * 256, 1024, MILLION * 128);
+	mem->SetDesiredSize(devId, 0, (MTYPE)memSize * MILLION);
+
+	XTensor a;
+	XTensor b;
+	XTensor c;
+
+	XTensor halfa;
+	XTensor halfb;
+	XTensor halfc;
+
+	InitTensor2D(&a, dim, dim, X_FLOAT, devId, mem);
+	InitTensor2D(&b, dim, dim, X_FLOAT, devId, mem);
+	InitTensor2D(&c, dim, dim, X_FLOAT, devId, mem);
+
+	InitTensor2D(&halfc, dim, dim, X_FLOAT16, devId, mem);
+
+	a.SetDataRand(-1.0F, 1.0F);
+	b.SetDataRand(-1.0F, 1.0F);
+
+	halfa = ConvertDataType(a, X_FLOAT16);
+	halfb = ConvertDataType(b, X_FLOAT16);
+
+	int repeat = 100000;
+
+	printf("=========================================\n");
+	printf("test on sum\n");
+	double start_sum32 = GetClockSec();
+	for (int i = 0; i < repeat; i++) {
+		c = Sum(&a, &b);
+	}
+	double elapsed_sum32 = GetClockSec() - start_sum32;
+	printf("elapsed_sum32=%.2fs\n", elapsed_sum32);
+
+	double start_sum16 = GetClockSec();
+	for (int i = 0; i < repeat; i++) {
+		halfc = Sum(&halfa, &halfb);
+	}
+	double elapsed_sum16 = GetClockSec() - start_sum16;
+	printf("elapsed_sum16=%.2fs\n", elapsed_sum16);
+	printf("=========================================\n");
+
+	/*printf("test on sub\n");
+	double start_sub32 = GetClockSec();
+	for (int i = 0; i < repeat; i++) {
+		c = Sub(&a, &b);
+	}
+	double elapsed_sub32 = GetClockSec() - start_sub32;
+	printf("elapsed_sub32=%.2fs\n", elapsed_sub32);
+
+	double start_sub16 = GetClockSec();
+	for (int i = 0; i < repeat; i++) {
+		halfc = Sub(&halfa, &halfb);
+	}
+	double elapsed_sub16 = GetClockSec() - start_sub16;
+	printf("elapsed_sub16=%.2fs\n", elapsed_sub16);
+	printf("=========================================\n");*/
+
+	/*printf("test on div\n");
+	double start_div32 = GetClockSec();
+	for (int i = 0; i < repeat; i++) {
+		c = Div(&a, &b);
+	}
+	double elapsed_div32 = GetClockSec() - start_div32;
+	printf("elapsed_div32=%.2fs\n", elapsed_div32);
+
+	double start_div16 = GetClockSec();
+	for (int i = 0; i < repeat; i++) {
+		halfc = Div(&halfa, &halfb);
+	}
+	double elapsed_div16 = GetClockSec() - start_div16;
+	printf("elapsed_div16=%.2fs\n", elapsed_div16);
+	printf("=========================================\n");*/
+
+	/*printf("test on multiply\n");
+	double start_multiply32 = GetClockSec();
+	for (int i = 0; i < repeat; i++) {
+		c = Multiply(&a, &b);
+	}
+	double elapsed_multiply32 = GetClockSec() - start_multiply32;
+	printf("elapsed_multiply32=%.2fs\n", elapsed_multiply32);
+
+	double start_multiply16 = GetClockSec();
+	for (int i = 0; i < repeat; i++) {
+		halfc = Multiply(&halfa, &halfb);
+	}
+	double elapsed_multiply16 = GetClockSec() - start_multiply16;
+	printf("elapsed_multiply16=%.2fs\n", elapsed_multiply16);
+	printf("=========================================\n");*/
+
+	printf("test on scaleandshift\n");
+	double start_scaleandshift32 = GetClockSec();
+	for (int i = 0; i < repeat; i++) {
+		c = ScaleAndShift(&a, 1, 0);
+	}
+	double elapsed_scaleandshift32 = GetClockSec() - start_scaleandshift32;
+	printf("elapsed_scaleandshift32=%.2fs\n", elapsed_scaleandshift32);
+
+	double start_scaleandshift16 = GetClockSec();
+	for (int i = 0; i < repeat; i++) {
+		halfc = ScaleAndShift(&halfa, 1, 0);
+	}
+	double elapsed_scaleandshift16 = GetClockSec() - start_scaleandshift16;
+	printf("elapsed_scaleandshift16=%.2fs\n", elapsed_scaleandshift16);
+	printf("=========================================\n");
+
+	printf("test on reducesum\n");
+	double start_reducesum32 = GetClockSec();
+	for (int i = 0; i < repeat; i++) {
+		c = ReduceSum(&a, 1);
+	}
+	double elapsed_reducesum32 = GetClockSec() - start_reducesum32;
+	printf("elapsed_reducesum32=%.2fs\n", elapsed_reducesum32);
+
+	double start_reducesum16 = GetClockSec();
+	for (int i = 0; i < repeat; i++) {
+		halfc = ReduceSum(&halfa, 1);
+	}
+	double elapsed_reducesum16 = GetClockSec() - start_reducesum16;
+	printf("elapsed_reducesum16=%.2fs\n", elapsed_reducesum16);
+	printf("=========================================\n");
+
+	printf("test on reducemax\n");
+	double start_reducemax32 = GetClockSec();
+	for (int i = 0; i < repeat; i++) {
+		c = ReduceMax(&a, 1);
+	}
+	double elapsed_reducemax32 = GetClockSec() - start_reducemax32;
+	printf("elapsed_reducemax32=%.2fs\n", elapsed_reducemax32);
+
+	double start_reducemax16 = GetClockSec();
+	for (int i = 0; i < repeat; i++) {
+		halfc = ReduceMax(&halfa, 1);
+	}
+	double elapsed_reducemax16 = GetClockSec() - start_reducemax16;
+	printf("elapsed_reducemax16=%.2fs\n", elapsed_reducemax16);
+	printf("=========================================\n");
+
+	printf("test on logsoftmax\n");
+	double start_logsoftmax32 = GetClockSec();
+	for (int i = 0; i < repeat; i++) {
+		c = LogSoftmax(&a, 1);
+	}
+	double elapsed_logsoftmax32 = GetClockSec() - start_logsoftmax32;
+	printf("elapsed_logsoftmax32=%.2fs\n", elapsed_logsoftmax32);
+
+	double start_logsoftmax16 = GetClockSec();
+	for (int i = 0; i < repeat; i++) {
+		halfc = LogSoftmax(&halfa, 1);
+	}
+	double elapsed_logsoftmax16 = GetClockSec() - start_logsoftmax16;
+	printf("elapsed_logsoftmax16=%.2fs\n", elapsed_logsoftmax16);
+	printf("=========================================\n");
+
+	printf("test on matrixmul\n");
+	double start_matrixmul32 = GetClockSec();
+	for (int i = 0; i < repeat; i++) {
+		c = MatrixMul(&a, &b);
+	}
+	double elapsed_matrixmul32 = GetClockSec() - start_matrixmul32;
+	printf("elapsed_matrixmul32=%.2fs\n", elapsed_matrixmul32);
+
+	double start_matrixmul16 = GetClockSec();
+	for (int i = 0; i < repeat; i++) {
+		halfc = MatrixMul(&halfa, &halfb);
+	}
+	double elapsed_matrixmul16 = GetClockSec() - start_matrixmul16;
+	printf("elapsed_matrixmul16=%.2fs\n", elapsed_matrixmul16);
+	printf("=========================================\n");
+	printf("test on convert\n");
+	double start_convert32to16 = GetClockSec();
+	for (int i = 0; i < repeat; i++) {
+		halfa = ConvertDataType(a, X_FLOAT16);
+	}
+	double elapsed_convert32to16 = GetClockSec() - start_convert32to16;
+	printf("elapsed_convert32to16=%.2fs\n", elapsed_convert32to16);
+
+	double start_convert16to32 = GetClockSec();
+	for (int i = 0; i < repeat; i++) {
+		a = ConvertDataType(halfa, X_FLOAT);
+	}
+	double elapsed_convert16to32 = GetClockSec() - start_convert16to32;
+	printf("elapsed_convert16to32=%.2fs\n", elapsed_convert16to32);
+	printf("=========================================\n");
+	delete mem;
+}
+
+void MultiplyDimTime() {
+	int memSize = 1024;
+	int devId = 0;
+	int dim1 = 1024;
+	int dim2 = 32;
+	XMem * mem;
+	mem = new XMem(devId, FREE_ON_THE_FLY, (MTYPE)MILLION * 256, 1024, MILLION * 128);
+	mem->SetDesiredSize(devId, 0, (MTYPE)memSize * MILLION);
+
+	XTensor a;
+	XTensor b;
+	XTensor c;
+
+	//XMem *mem = new XMem(devId, FREE_ON_THE_FLY, 128 * MILLION, 1024, 128 * MILLION);
+	//mem->SetDesiredSize(0,0,memSize*MILLION);
+
+	InitTensor2D(&a, dim1, dim1, X_FLOAT, devId);
+	InitTensor2D(&b, dim2, dim2, X_FLOAT, devId);
+	InitTensor2D(&c, dim1, dim1, X_FLOAT, devId);
+
+	a.SetDataRand(-1.0F, 1.0F);
+	b.SetDataRandn(-1.0F, 1.0F);
+
+	int repeat = 2000;
+	printf("test on MultiplyDim\n");
+
+	double start = GetClockSec();
+	for (int j = 0; j <= repeat; j++) {
+		c = MultiplyDim(&a, &b, 0);
+	}
+	double elapsed = GetClockSec() - start;
+	printf("elapsed_MultiplyDim32=%.4fs \n", elapsed);
+
+	XTensor halfa;
+	XTensor halfb;
+	XTensor halfc;
+	InitTensor2D(&halfc, dim1, dim1, X_FLOAT16, devId, mem);
+	halfa = ConvertDataType(a, X_FLOAT16);
+	halfb = ConvertDataType(b, X_FLOAT16);
+	double starthalf = GetClockSec();
+	for (int i = 0; i < repeat; i++) {
+		halfc = MultiplyDim(&halfa, &halfb, 0);
+	}
+	double elapsedhalf = GetClockSec() - starthalf;
+	printf("elapsed_MultiplyDim16=%.4fs\n", elapsedhalf);
+}
+
+void TimeTestGemm() {
+	XMem * mem;
+	int memSize = 1024;
+	delete mem;
+	mem = new XMem(0, FREE_ON_THE_FLY, (MTYPE)MILLION * 256, 1024, MILLION * 128);
+	mem->SetDesiredSize(0, 0, (MTYPE)memSize * MILLION);
+
+	XTensor a;
+	XTensor b;
+	XTensor c;
+	XTensor halfa;
+	XTensor halfb;
+	XTensor halfc;
+	int dim1 = 512;
+	int dim2 = 1024;
+	//InitTensor3D(&a, 86, 48, 256, X_FLOAT, 0, mem);
+	//InitTensor2D(&b, 256, 256, X_FLOAT, 0, mem);
+
+	//InitTensor4D(&a, 8, 86, 48, 48, X_FLOAT, 0, mem);
+	//InitTensor4D(&b, 8, 86, 48, 32, X_FLOAT, 0, mem);
+	InitTensor2D(&a, dim1, dim2, X_FLOAT, 0, mem);
+	InitTensor2D(&b, dim1, dim2, X_FLOAT, 0, mem);
+	//InitTensor4D(&a, 8, 86, 48, 32, X_FLOAT, 0);
+	//InitTensor4D(&b, 8, 86, 48, 32, X_FLOAT, 0);
+
+	a.SetDataRand(-1.0F, 1.0F);
+	b.SetDataRand(-1.0F, 1.0F);
+	halfa = ConvertDataType(a, X_FLOAT16);
+	halfb = ConvertDataType(b, X_FLOAT16);
+
+	//a.Dump(&a, stderr, "a:", 10);
+	//b.Dump(&b, stderr, "b:", 10);
+
+	//halfa.Dump(&a, stderr, "halfa:", 10);
+	//halfb.Dump(&b, stderr, "halfb:", 10);
+
+	int repeat = 10000;
+	printf("=========================================\n");
+	double start_matrixmul16 = GetClockSec();
+	for (int i = 0; i < repeat; i++) {
+		halfc = BMMul(halfa, X_NOTRANS, halfb, X_TRANS);
+	}
+	double elapsed_matrixmul16 = GetClockSec() - start_matrixmul16;
+	printf("elapsed_matrixmul16=%.4fs\n", elapsed_matrixmul16);
+	printf("------------------------------------------\n");
+	double start_matrixmul32 = GetClockSec();
+	for (int i = 0; i < repeat; i++) {
+		c = BMMul(a, X_NOTRANS, b, X_TRANS);
+	}
+	double elapsed_matrixmul32 = GetClockSec() - start_matrixmul32;
+	printf("elapsed_matrixmul32=%.4fs\n", elapsed_matrixmul32);
+	printf("=========================================\n");
+	c.Dump(&c, stderr, "c:", 10);
+	halfc.Dump(&halfc, stderr, "halfc:", 10);
+}
+
 void InitTensorFP16Test() {
    XTensor a;
    InitTensor2D(&a, 1, 10, X_FLOAT, 0);