Commit be38e4e5 by xiaotong

better CPU code

parent de3aeee1
...@@ -84,17 +84,25 @@ void _MatrixMulBatched(const XTensor * a, MATRIX_TRANS_TYPE transposedA, ...@@ -84,17 +84,25 @@ void _MatrixMulBatched(const XTensor * a, MATRIX_TRANS_TYPE transposedA,
XList * aList = new XList(10); XList * aList = new XList(10);
XList * bList = new XList(10); XList * bList = new XList(10);
XList * cList = new XList(10); XList * cList = new XList(10);
int aDimSize[2] = { -a->dimSizeRDI[1], a->dimSizeRDI[0] }; int aDimSize[2] = {-a->dimSizeRDI[1], a->dimSizeRDI[0]};
int bDimSize[2] = { -b->dimSizeRDI[1], b->dimSizeRDI[0] }; int bDimSize[2] = {-b->dimSizeRDI[1], b->dimSizeRDI[0]};
int cDimSize[2] = { -c->dimSizeRDI[1], c->dimSizeRDI[0] }; int cDimSize[2] = {-c->dimSizeRDI[1], c->dimSizeRDI[0]};
XTensor * tensorBuf = new XTensor[blockNum * 3];
XTensor * aBuf = tensorBuf;
XTensor * bBuf = tensorBuf + blockNum;
XTensor * cBuf = tensorBuf + blockNum * 2;
for (int p = 0; p < blockNum; p++) { for (int p = 0; p < blockNum; p++) {
void * ap = (char*)a->data + aRealBlockSize * p; void * ap = (char*)a->data + aRealBlockSize * p;
void * bp = (char*)b->data + bRealBlockSize * p; void * bp = (char*)b->data + bRealBlockSize * p;
void * cp = (char*)c->data + cRealBlockSize * p; void * cp = (char*)c->data + cRealBlockSize * p;
XTensor * ai = NewTensor(2, aDimSize, a->dataType, a->denseRatio, a->devID, a->mem); XTensor * ai = aBuf + p;
XTensor * bi = NewTensor(2, bDimSize, b->dataType, b->denseRatio, b->devID, b->mem); XTensor * bi = bBuf + p;
XTensor * ci = NewTensor(2, cDimSize, c->dataType, c->denseRatio, c->devID, c->mem); XTensor * ci = cBuf + p;
InitTensor(ai, 2, aDimSize, a->dataType, a->denseRatio, a->devID, a->mem);
InitTensor(bi, 2, bDimSize, b->dataType, b->denseRatio, b->devID, b->mem);
InitTensor(ci, 2, cDimSize, c->dataType, c->denseRatio, c->devID, c->mem);
ai->data = ap; ai->data = ap;
bi->data = bp; bi->data = bp;
ci->data = cp; ci->data = cp;
...@@ -132,22 +140,21 @@ void _MatrixMulBatched(const XTensor * a, MATRIX_TRANS_TYPE transposedA, ...@@ -132,22 +140,21 @@ void _MatrixMulBatched(const XTensor * a, MATRIX_TRANS_TYPE transposedA,
for (int i = 0; i < aList->count; i++) { for (int i = 0; i < aList->count; i++) {
XTensor * ai = (XTensor*)aList->GetItem(i); XTensor * ai = (XTensor*)aList->GetItem(i);
ai->data = NULL; ai->data = NULL;;
delete ai;
} }
for (int i = 0; i < bList->count; i++) { for (int i = 0; i < bList->count; i++) {
XTensor * bi = (XTensor*)bList->GetItem(i); XTensor * bi = (XTensor*)bList->GetItem(i);
bi->data = NULL; bi->data = NULL;
delete bi;
} }
for (int i = 0; i < cList->count; i++) { for (int i = 0; i < cList->count; i++) {
XTensor * ci = (XTensor*)cList->GetItem(i); XTensor * ci = (XTensor*)cList->GetItem(i);
ci->data = NULL; ci->data = NULL;
delete ci;
} }
delete[] tensorBuf;
delete aList; delete aList;
delete bList; delete bList;
delete cList; delete cList;
......
Markdown 格式
0%
您添加了 0 到此讨论。请谨慎行事。
请先完成此评论的编辑!
注册 或者 后发表评论