Commit c103b9f3 by xuchen

fix the bug and add the device reset function

parent 6df1ecc9
...@@ -28,6 +28,7 @@ ...@@ -28,6 +28,7 @@
#include "XDevice.h" #include "XDevice.h"
#include "XGlobal.h" #include "XGlobal.h"
#include "XThread.h" #include "XThread.h"
#include "XUtility.h"
#include "XList.h" #include "XList.h"
/* the nts (NiuTrans.Tensor) namespace */ /* the nts (NiuTrans.Tensor) namespace */
...@@ -48,25 +49,39 @@ XDevice::XDevice() ...@@ -48,25 +49,39 @@ XDevice::XDevice()
#ifdef USE_CUDA #ifdef USE_CUDA
MUTEX_INIT(cublasMutex); MUTEX_INIT(cublasMutex);
isHandleReady = false; isHandleReady = false;
isGenReady = false;
#endif #endif
} }
/* de-constructor */ /* de-constructor */
XDevice::~XDevice() XDevice::~XDevice()
{ {
if (!isInitialized)
return;
#ifdef USE_CUDA #ifdef USE_CUDA
MUTEX_DELE(cublasMutex); MUTEX_DELE(cublasMutex);
if(isHandleReady) if (isHandleReady) {
cublasDestroy(cublasHandle); cublasDestroy(cublasHandle);
if(stream != NULL) isHandleReady = false;
delete stream; }
if (isGenReady) {
curandDestroyGenerator(gen); curandDestroyGenerator(gen);
isGenReady = false;
}
if (stream != NULL) {
delete stream;
stream = NULL;
}
#endif #endif
} }
/* initialize it and get the device information */ /* initialize it and get the device information */
void XDevice::Init(int myDevID) void XDevice::Init(int myDevID)
{ {
if (isInitialized)
return;
Clear(); Clear();
devID = myDevID; devID = myDevID;
...@@ -86,6 +101,7 @@ void XDevice::Init(int myDevID) ...@@ -86,6 +101,7 @@ void XDevice::Init(int myDevID)
curandCreateGenerator(&gen, CURAND_RNG_PSEUDO_DEFAULT); curandCreateGenerator(&gen, CURAND_RNG_PSEUDO_DEFAULT);
curandSetPseudoRandomGeneratorSeed(gen, seed); curandSetPseudoRandomGeneratorSeed(gen, seed);
isGenReady = true;
if(cudaGetDeviceProperties(&prop, devID) != cudaSuccess){ if(cudaGetDeviceProperties(&prop, devID) != cudaSuccess){
XPRINT1(0, stderr, "cannot get GPU(%d) information.", devID); XPRINT1(0, stderr, "cannot get GPU(%d) information.", devID);
...@@ -142,6 +158,13 @@ void XDevice::Clear() ...@@ -142,6 +158,13 @@ void XDevice::Clear()
{ {
devID = -100; devID = -100;
memSize = 0; memSize = 0;
name[0] = 0;
name2[0] = 0;
isUVASupported = false;
// TODO: cublasDestroy(cublasHandle);
#ifdef USE_CUDA
GPUWarpSize = 0; GPUWarpSize = 0;
memset(GPUMaxGridSize, 0, sizeof(int) * 3); memset(GPUMaxGridSize, 0, sizeof(int) * 3);
...@@ -149,11 +172,43 @@ void XDevice::Clear() ...@@ -149,11 +172,43 @@ void XDevice::Clear()
GPUMaxThreadNum = 0; GPUMaxThreadNum = 0;
name[0] = 0; MUTEX_DELE(cublasMutex);
name2[0] = 0; if (isHandleReady) {
cublasDestroy(cublasHandle);
isHandleReady = false;
}
if (isGenReady) {
curandDestroyGenerator(gen);
isGenReady = false;
}
if (stream != NULL) {
delete stream;
stream = NULL;
}
#endif
isInitialized = false;
}
isUVASupported = false; void XDevice::Reset()
// TODO: cublasDestroy(cublasHandle); {
Clear();
if (!isInitialized)
return;
for (int i = 0; i < MAX_CPU_MEM_NUM; i++)
delete GMems.GetMem(devID);
#ifdef USE_CUDA
if (devID >= 0) {
int devIDBackup = -1;
cudaGetDevice(&devIDBackup);
cudaSetDevice(devID);
cudaDeviceReset();
cudaSetDevice(devIDBackup);
}
#endif
} }
#ifdef USE_CUDA #ifdef USE_CUDA
...@@ -266,6 +321,7 @@ void XDevice::SetFastFlagsAllDevices() ...@@ -266,6 +321,7 @@ void XDevice::SetFastFlagsAllDevices()
/* constructor */ /* constructor */
XDevManager::XDevManager() XDevManager::XDevManager()
{ {
isInitialized = false;
Clear(); Clear();
Init(); Init();
} }
...@@ -279,6 +335,9 @@ XDevManager::~XDevManager() ...@@ -279,6 +335,9 @@ XDevManager::~XDevManager()
/* initialization */ /* initialization */
void XDevManager::Init() void XDevManager::Init()
{ {
if (isInitialized)
return;
srand((unsigned int)time(NULL)); srand((unsigned int)time(NULL));
Clear(); Clear();
...@@ -306,6 +365,7 @@ void XDevManager::Init() ...@@ -306,6 +365,7 @@ void XDevManager::Init()
#endif #endif
nGPU = GPUCount; nGPU = GPUCount;
isInitialized = true;
} }
/* clear it */ /* clear it */
...@@ -316,6 +376,8 @@ void XDevManager::Clear() ...@@ -316,6 +376,8 @@ void XDevManager::Clear()
for(int i = 0; i < MAX_GPU_NUM; i++) for(int i = 0; i < MAX_GPU_NUM; i++)
GPUs[i].Clear(); GPUs[i].Clear();
isInitialized = false;
} }
#ifdef USE_CUDA #ifdef USE_CUDA
...@@ -469,55 +531,6 @@ int XDevManager::GetCudaThread2D(const int devID, const int n, const int m, int ...@@ -469,55 +531,6 @@ int XDevManager::GetCudaThread2D(const int devID, const int n, const int m, int
return 0; return 0;
} }
/*
split a string
>> inputString - a line of string
>> separator - separate by what
>> items - splitting result
<< return - how many items are there
*/
int SplitALine(char * inputString, const char * seperator, StrList* items)
{
items->Clear();
if(inputString == NULL || seperator == NULL)
return 0;
int inputLen = (int)strlen(inputString);
int sepLen = (int)strlen(seperator);
if(inputLen == 0)
return 0;
if(sepLen == 0){
char * item = new char[inputLen + 1];
strcpy(item, inputString);
items->Add(item);
}
else{
char * p = inputString;
char * item = NULL;
while(p != NULL){
char * q = strstr(p, seperator);
if(q == NULL){
item = new char[inputLen - (p - inputString) + 1];
memcpy(item, p, inputLen - (p - inputString) + 1);
item[inputLen - (p - inputString)] = '\0'; // no use?
p = NULL;
}
else{
item = new char[q - p + 1];
memcpy(item, p, q - p);
item[q - p] = '\0';
p = q + sepLen;
}
items->Add(item);
}
}
return items->count;
}
/* /*
get device ids for the given device information get device ids for the given device information
......
...@@ -113,6 +113,9 @@ public: ...@@ -113,6 +113,9 @@ public:
/* specify if the handle is initialized */ /* specify if the handle is initialized */
bool isHandleReady; bool isHandleReady;
/* specify if the generator is initialized */
bool isGenReady;
/* generater of random numbers */ /* generater of random numbers */
curandGenerator_t gen; curandGenerator_t gen;
#endif #endif
...@@ -131,6 +134,9 @@ public: ...@@ -131,6 +134,9 @@ public:
/* clear it */ /* clear it */
void Clear(); void Clear();
/* reset it */
void Reset();
#ifdef USE_CUDA #ifdef USE_CUDA
/* get cublas handle */ /* get cublas handle */
cublasHandle_t * GetCublasHandle(); cublasHandle_t * GetCublasHandle();
...@@ -178,6 +184,9 @@ public: ...@@ -178,6 +184,9 @@ public:
/* number of GPUs */ /* number of GPUs */
int nGPU; int nGPU;
/* indicates whether the the management of devices has been initialized */
bool isInitialized;
public: public:
/* constructor */ /* constructor */
XDevManager(); XDevManager();
......
...@@ -31,8 +31,8 @@ ...@@ -31,8 +31,8 @@
/* the nts (NiuTrans.Tensor) namespace */ /* the nts (NiuTrans.Tensor) namespace */
namespace nts{ namespace nts{
int testxmemid = 0; //int testxmemid = 0;
void * recordp = NULL; //void * recordp = NULL;
/* /*
for managing the memories for managing the memories
...@@ -1480,9 +1480,13 @@ void XMem::ShowMemUsage(FILE * file) ...@@ -1480,9 +1480,13 @@ void XMem::ShowMemUsage(FILE * file)
total += blocks[i].size; total += blocks[i].size;
} }
} }
MTYPE bufSizeTotal = bufSize;
MTYPE bufSizeUsed = bufUsed;
fprintf(file, "mem:%.1fMB used:%.1fMB usage:%.3f\n", fprintf(file, "mem:%.1fMB used:%.1fMB usage:%.3f\n",
(DTYPE)total/MILLION, (DTYPE)used/MILLION, (DTYPE)used/total); (DTYPE)total/ 1024 / 1024, (DTYPE)used/ 1024 / 1024, (DTYPE)used/total);
fprintf(file, "buf:%.1fMB used:%.1fMB usage:%.3f\n",
(DTYPE)bufSizeTotal / 1024 / 1024, (DTYPE)bufSizeUsed / 1024 / 1024, (DTYPE)bufSizeUsed / bufSizeTotal);
} }
#ifdef USE_CUDA #ifdef USE_CUDA
...@@ -1632,7 +1636,8 @@ XMem * XMemManager::GetMem(const int devID) ...@@ -1632,7 +1636,8 @@ XMem * XMemManager::GetMem(const int devID)
mem = GPUMems + devID; mem = GPUMems + devID;
} }
else{ else{
XPRINT1(0, stderr, "Cannot get the memory (%d). Please check your device id!", devID); XPRINT1(0, stderr, "Please check your device id (%d)!", devID);
ShowNTErrors("Cannot get the memory!");
} }
} }
......
...@@ -480,12 +480,10 @@ public: ...@@ -480,12 +480,10 @@ public:
/* managing the memories */ /* managing the memories */
extern XMemManager GMems; extern XMemManager GMems;
//extern XMem * GMem;
//extern int testxmemid;
extern XMem * GMem; //extern void * recordp;
extern int testxmemid;
extern void * recordp;
} /* end of the nts (NiuTrans.Tensor) namespace */ } /* end of the nts (NiuTrans.Tensor) namespace */
......
...@@ -834,7 +834,7 @@ int CompXFloat(const void * a, const void * b) ...@@ -834,7 +834,7 @@ int CompXFloat(const void * a, const void * b)
void ResetGPUDevices() void ResetGPUDevices()
{ {
#ifdef USE_CUDA #ifdef USE_CUDA
#if CUDART_VERSION < 10000
cudaThreadExit(); cudaThreadExit();
return; return;
...@@ -845,7 +845,60 @@ void ResetGPUDevices() ...@@ -845,7 +845,60 @@ void ResetGPUDevices()
cudaSetDevice(i); cudaSetDevice(i);
cudaDeviceReset(); cudaDeviceReset();
}*/ }*/
#else
ShowNTErrors("TODO!");
#endif
#endif #endif
} }
/*
split a string
>> inputString - a line of string
>> separator - separate by what
>> items - splitting result
<< return - how many items are there
*/
int SplitALine(char* inputString, const char* seperator, StrList* items)
{
items->Clear();
if (inputString == NULL || seperator == NULL)
return 0;
int inputLen = (int)strlen(inputString);
int sepLen = (int)strlen(seperator);
if (inputLen == 0)
return 0;
if (sepLen == 0) {
char* item = new char[inputLen + 1];
strcpy(item, inputString);
items->Add(item);
}
else {
char* p = inputString;
char* item = NULL;
while (p != NULL) {
char* q = strstr(p, seperator);
if (q == NULL) {
item = new char[inputLen - (p - inputString) + 1];
memcpy(item, p, inputLen - (p - inputString) + 1);
item[inputLen - (p - inputString)] = '\0'; // no use?
p = NULL;
}
else {
item = new char[q - p + 1];
memcpy(item, p, q - p);
item[q - p] = '\0';
p = q + sepLen;
}
items->Add(item);
}
}
return items->count;
}
} // namespace nts(NiuTrans.Tensor) } // namespace nts(NiuTrans.Tensor)
...@@ -59,6 +59,8 @@ extern double GetClockSec(); ...@@ -59,6 +59,8 @@ extern double GetClockSec();
extern void XQSort(void * data, void * index, int num, int width, int stride, int (*comp)(const void *, const void *)); extern void XQSort(void * data, void * index, int num, int width, int stride, int (*comp)(const void *, const void *));
extern int CompXFloat(const void * a, const void * b); extern int CompXFloat(const void * a, const void * b);
int SplitALine(char* inputString, const char* seperator, StrList* items);
#ifdef USE_CUDA #ifdef USE_CUDA
extern void XMemCopyAsync(void * t, int devIDT, const void * s, int devIDS, size_t size, cudaStream_t stream, int streamDevID); extern void XMemCopyAsync(void * t, int devIDT, const void * s, int devIDS, size_t size, cudaStream_t stream, int streamDevID);
#else #else
......
Markdown 格式
0%
您添加了 0 到此讨论。请谨慎行事。
请先完成此评论的编辑!
注册 或者 后发表评论