Commit 197fac6d by xiaotong

kill the unneccessary allocatoin on devices that are never be used

parent 860980dd
......@@ -41,6 +41,7 @@ XDevManager GDevs;
XDevice::XDevice()
{
stream = NULL;
isInitialized = false;
Clear();
#ifdef USE_CUDA
......@@ -126,6 +127,7 @@ void XDevice::Init(int myDevID)
#endif
}
isInitialized = true;
}
/* clear it */
......@@ -152,6 +154,9 @@ void XDevice::Clear()
/* get cublas handle */
cublasHandle_t * XDevice::GetCublasHandle()
{
if (!isInitialized)
Init(devID);
if(!isHandleReady){
MUTEX_LOCK(cublasMutex);
int devIDBackup = 0;
......@@ -169,6 +174,9 @@ cublasHandle_t * XDevice::GetCublasHandle()
/* get the stream of cuda */
cudaStream_t * XDevice::GetCudaStream()
{
if (!isInitialized)
Init(devID);
CheckNTErrors(stream != NULL, "the stream is not initialized!");
return &stream->stream;
......@@ -279,33 +287,13 @@ void XDevManager::Init()
exit(1);
}
cudaDeviceProp prop[64];
for(int i = 0; i < GPUCount; i++){
GPUs[i].Init(i);
cudaGetDeviceProperties(&prop[i], i);
GPUs[i].devID = i;
//GPUs[i].Init(i);
}
#ifdef USA_CUDA_P2P
for(int i = 0; i < GPUCount; i++){
cudaSetDevice(i);
for(int j = 0; j < GPUCount; j++){
if(i == j)
continue;
int access;
cudaDeviceCanAccessPeer(&access, i, j);
bool hasUVA = (prop[i].unifiedAddressing && prop[j].unifiedAddressing);
fprintf(stderr, "device %d -> device %d access:%d UVA:%d\n", i, j, access, hasUVA ? 1 : 0);
if(access != 0){
CheckNTErrors((hasUVA == true), "at least one GPU does not support UVA.")
CheckNTErrors((cudaDeviceEnablePeerAccess(j, 0)==cudaSuccess), "cannot set cuda p2t mode!");
}
}
}
#endif
#endif
nGPU = GPUCount;
}
......@@ -351,6 +339,9 @@ into blocks
*/
int XDevManager::GetCudaThread(const int devID, const int n, int * gridSize, int * blockSize)
{
if (!GPUs[devID].isInitialized)
GPUs[devID].Init(devID);
memset(gridSize, 0, sizeof(int) * 3);
memset(blockSize, 0, sizeof(int) * 3);
......@@ -402,6 +393,9 @@ into blocks
*/
int XDevManager::GetCudaThread2D(const int devID, const int n, const int m, int nLimit, int * gridSize, int * blockSize)
{
if (!GPUs[devID].isInitialized)
GPUs[devID].Init(devID);
memset(gridSize, 0, sizeof(int) * 3);
memset(blockSize, 0, sizeof(int) * 3);
......
......@@ -67,6 +67,9 @@ public:
/* warp size of an (Navida) GPU */
int GPUWarpSize;
/* indicates whether the device class has been initialized */
bool isInitialized;
/*
max grid size (or number of blocks) of an (Navida) GPU
NOTE: the grid size is alone with three dimensions (x, y, z)
......
Markdown 格式
0%
您添加了 0 到此讨论。请谨慎行事。
请先完成此评论的编辑!
注册 或者 后发表评论