Commit 4cabf059 by xiaotong

better memory allocation

parent cf149d57
...@@ -257,24 +257,32 @@ void _CudaSpreadForGather(XTensor * source, XTensor * collection, int dim, ...@@ -257,24 +257,32 @@ void _CudaSpreadForGather(XTensor * source, XTensor * collection, int dim,
dim3 blocks(cudaGrids[0], cudaGrids[1]); dim3 blocks(cudaGrids[0], cudaGrids[1]);
dim3 threads(cudaBlocks[0], cudaBlocks[1]); dim3 threads(cudaBlocks[0], cudaBlocks[1]);
DTYPE * s = (DTYPE*)source->data;
DTYPE * c = (DTYPE*)collection->data;
XMem * mem = source->mem; XMem * mem = source->mem;
int * si = mem != NULL ? int * si = mem != NULL ?
(int*)mem->AllocBuf(mem->devID, sizeof(int) * indexSize * 2) : (int*)mem->AllocBuf(mem->devID, sizeof(int) * indexSize) :
(int*)XMemAlloc(mem->devID, sizeof(int) * indexSize * 2); (int*)XMemAlloc(source->devID, sizeof(int) * indexSize);
int * ci = si + indexSize; int * ci = mem != NULL ?
(int*)mem->AllocBuf(mem->devID, sizeof(int) * indexSize) :
(int*)XMemAlloc(collection->devID, sizeof(int) * indexSize);
XMemCopy(si, source->devID, srcIndex, -1, sizeof(int) * indexSize);
XMemCopy(ci, collection->devID, collIndex, -1, sizeof(int) * indexSize);
//XMemCopy(si, source->devID, srcIndex, -1, sizeof(int));
//XMemCopy(ci, collection->devID, collIndex, -1, sizeof(int));
XMemCopy(si, mem->devID, srcIndex, -1, sizeof(int) * indexSize); DTYPE * s = (DTYPE*)source->data;
XMemCopy(ci, mem->devID, collIndex, -1, sizeof(int) * indexSize); DTYPE * c = (DTYPE*)collection->data;
KernelSpreadForGatherFuzed<<<blocks, threads >>>(s, c, blockNum, blockSizeSrc, blockSizeColl, stride, indexSize, si, ci); KernelSpreadForGatherFuzed<<<blocks, threads >>>(s, c, blockNum, blockSizeSrc, blockSizeColl, stride, indexSize, si, ci);
if(mem != NULL) if (mem != NULL) {
mem->ReleaseBuf(mem->devID, sizeof(int) * indexSize * 2); mem->ReleaseBuf(mem->devID, sizeof(int) * indexSize);
else mem->ReleaseBuf(mem->devID, sizeof(int) * indexSize);
XMemFree(mem->devID, si); }
else {
XMemFree(source->devID, si);
XMemFree(collection->devID, ci);
}
} }
BacktoCudaDev(source->devID, devIDBackup); BacktoCudaDev(source->devID, devIDBackup);
......
Markdown 格式
0%
您添加了 0 到此讨论。请谨慎行事。
请先完成此评论的编辑!
注册 或者 后发表评论