Commit 197fac6d by xiaotong

kill the unneccessary allocatoin on devices that are never be used

parent 860980dd
...@@ -41,6 +41,7 @@ XDevManager GDevs; ...@@ -41,6 +41,7 @@ XDevManager GDevs;
XDevice::XDevice() XDevice::XDevice()
{ {
stream = NULL; stream = NULL;
isInitialized = false;
Clear(); Clear();
#ifdef USE_CUDA #ifdef USE_CUDA
...@@ -126,6 +127,7 @@ void XDevice::Init(int myDevID) ...@@ -126,6 +127,7 @@ void XDevice::Init(int myDevID)
#endif #endif
} }
isInitialized = true;
} }
/* clear it */ /* clear it */
...@@ -152,6 +154,9 @@ void XDevice::Clear() ...@@ -152,6 +154,9 @@ void XDevice::Clear()
/* get cublas handle */ /* get cublas handle */
cublasHandle_t * XDevice::GetCublasHandle() cublasHandle_t * XDevice::GetCublasHandle()
{ {
if (!isInitialized)
Init(devID);
if(!isHandleReady){ if(!isHandleReady){
MUTEX_LOCK(cublasMutex); MUTEX_LOCK(cublasMutex);
int devIDBackup = 0; int devIDBackup = 0;
...@@ -169,6 +174,9 @@ cublasHandle_t * XDevice::GetCublasHandle() ...@@ -169,6 +174,9 @@ cublasHandle_t * XDevice::GetCublasHandle()
/* get the stream of cuda */ /* get the stream of cuda */
cudaStream_t * XDevice::GetCudaStream() cudaStream_t * XDevice::GetCudaStream()
{ {
if (!isInitialized)
Init(devID);
CheckNTErrors(stream != NULL, "the stream is not initialized!"); CheckNTErrors(stream != NULL, "the stream is not initialized!");
return &stream->stream; return &stream->stream;
...@@ -279,33 +287,13 @@ void XDevManager::Init() ...@@ -279,33 +287,13 @@ void XDevManager::Init()
exit(1); exit(1);
} }
cudaDeviceProp prop[64];
for(int i = 0; i < GPUCount; i++){ for(int i = 0; i < GPUCount; i++){
GPUs[i].Init(i); GPUs[i].devID = i;
cudaGetDeviceProperties(&prop[i], i); //GPUs[i].Init(i);
} }
#ifdef USA_CUDA_P2P
for(int i = 0; i < GPUCount; i++){
cudaSetDevice(i);
for(int j = 0; j < GPUCount; j++){
if(i == j)
continue;
int access;
cudaDeviceCanAccessPeer(&access, i, j);
bool hasUVA = (prop[i].unifiedAddressing && prop[j].unifiedAddressing);
fprintf(stderr, "device %d -> device %d access:%d UVA:%d\n", i, j, access, hasUVA ? 1 : 0);
if(access != 0){
CheckNTErrors((hasUVA == true), "at least one GPU does not support UVA.")
CheckNTErrors((cudaDeviceEnablePeerAccess(j, 0)==cudaSuccess), "cannot set cuda p2t mode!");
}
}
}
#endif
#endif #endif
nGPU = GPUCount; nGPU = GPUCount;
} }
...@@ -351,6 +339,9 @@ into blocks ...@@ -351,6 +339,9 @@ into blocks
*/ */
int XDevManager::GetCudaThread(const int devID, const int n, int * gridSize, int * blockSize) int XDevManager::GetCudaThread(const int devID, const int n, int * gridSize, int * blockSize)
{ {
if (!GPUs[devID].isInitialized)
GPUs[devID].Init(devID);
memset(gridSize, 0, sizeof(int) * 3); memset(gridSize, 0, sizeof(int) * 3);
memset(blockSize, 0, sizeof(int) * 3); memset(blockSize, 0, sizeof(int) * 3);
...@@ -402,6 +393,9 @@ into blocks ...@@ -402,6 +393,9 @@ into blocks
*/ */
int XDevManager::GetCudaThread2D(const int devID, const int n, const int m, int nLimit, int * gridSize, int * blockSize) int XDevManager::GetCudaThread2D(const int devID, const int n, const int m, int nLimit, int * gridSize, int * blockSize)
{ {
if (!GPUs[devID].isInitialized)
GPUs[devID].Init(devID);
memset(gridSize, 0, sizeof(int) * 3); memset(gridSize, 0, sizeof(int) * 3);
memset(blockSize, 0, sizeof(int) * 3); memset(blockSize, 0, sizeof(int) * 3);
......
...@@ -67,6 +67,9 @@ public: ...@@ -67,6 +67,9 @@ public:
/* warp size of an (Navida) GPU */ /* warp size of an (Navida) GPU */
int GPUWarpSize; int GPUWarpSize;
/* indicates whether the device class has been initialized */
bool isInitialized;
/* /*
max grid size (or number of blocks) of an (Navida) GPU max grid size (or number of blocks) of an (Navida) GPU
NOTE: the grid size is alone with three dimensions (x, y, z) NOTE: the grid size is alone with three dimensions (x, y, z)
......
Markdown 格式
0%
您添加了 0 到此讨论。请谨慎行事。
请先完成此评论的编辑!
注册 或者 后发表评论