Commit c103b9f3 by xuchen

fix the bug and add the device reset function

parent 6df1ecc9
......@@ -28,6 +28,7 @@
#include "XDevice.h"
#include "XGlobal.h"
#include "XThread.h"
#include "XUtility.h"
#include "XList.h"
/* the nts (NiuTrans.Tensor) namespace */
......@@ -48,25 +49,39 @@ XDevice::XDevice()
#ifdef USE_CUDA
MUTEX_INIT(cublasMutex);
isHandleReady = false;
isGenReady = false;
#endif
}
/* de-constructor */
XDevice::~XDevice()
{
if (!isInitialized)
return;
#ifdef USE_CUDA
MUTEX_DELE(cublasMutex);
if(isHandleReady)
if (isHandleReady) {
cublasDestroy(cublasHandle);
if(stream != NULL)
isHandleReady = false;
}
if (isGenReady) {
curandDestroyGenerator(gen);
isGenReady = false;
}
if (stream != NULL) {
delete stream;
curandDestroyGenerator(gen);
stream = NULL;
}
#endif
}
/* initialize it and get the device information */
void XDevice::Init(int myDevID)
{
if (isInitialized)
return;
Clear();
devID = myDevID;
......@@ -86,6 +101,7 @@ void XDevice::Init(int myDevID)
curandCreateGenerator(&gen, CURAND_RNG_PSEUDO_DEFAULT);
curandSetPseudoRandomGeneratorSeed(gen, seed);
isGenReady = true;
if(cudaGetDeviceProperties(&prop, devID) != cudaSuccess){
XPRINT1(0, stderr, "cannot get GPU(%d) information.", devID);
......@@ -142,6 +158,13 @@ void XDevice::Clear()
{
devID = -100;
memSize = 0;
name[0] = 0;
name2[0] = 0;
isUVASupported = false;
// TODO: cublasDestroy(cublasHandle);
#ifdef USE_CUDA
GPUWarpSize = 0;
memset(GPUMaxGridSize, 0, sizeof(int) * 3);
......@@ -149,11 +172,43 @@ void XDevice::Clear()
GPUMaxThreadNum = 0;
name[0] = 0;
name2[0] = 0;
MUTEX_DELE(cublasMutex);
if (isHandleReady) {
cublasDestroy(cublasHandle);
isHandleReady = false;
}
if (isGenReady) {
curandDestroyGenerator(gen);
isGenReady = false;
}
if (stream != NULL) {
delete stream;
stream = NULL;
}
#endif
isInitialized = false;
}
isUVASupported = false;
// TODO: cublasDestroy(cublasHandle);
void XDevice::Reset()
{
Clear();
if (!isInitialized)
return;
for (int i = 0; i < MAX_CPU_MEM_NUM; i++)
delete GMems.GetMem(devID);
#ifdef USE_CUDA
if (devID >= 0) {
int devIDBackup = -1;
cudaGetDevice(&devIDBackup);
cudaSetDevice(devID);
cudaDeviceReset();
cudaSetDevice(devIDBackup);
}
#endif
}
#ifdef USE_CUDA
......@@ -266,6 +321,7 @@ void XDevice::SetFastFlagsAllDevices()
/* constructor */
XDevManager::XDevManager()
{
isInitialized = false;
Clear();
Init();
}
......@@ -279,6 +335,9 @@ XDevManager::~XDevManager()
/* initialization */
void XDevManager::Init()
{
if (isInitialized)
return;
srand((unsigned int)time(NULL));
Clear();
......@@ -306,6 +365,7 @@ void XDevManager::Init()
#endif
nGPU = GPUCount;
isInitialized = true;
}
/* clear it */
......@@ -316,6 +376,8 @@ void XDevManager::Clear()
for(int i = 0; i < MAX_GPU_NUM; i++)
GPUs[i].Clear();
isInitialized = false;
}
#ifdef USE_CUDA
......@@ -469,55 +531,6 @@ int XDevManager::GetCudaThread2D(const int devID, const int n, const int m, int
return 0;
}
/*
split a string
>> inputString - a line of string
>> separator - separate by what
>> items - splitting result
<< return - how many items are there
*/
int SplitALine(char * inputString, const char * seperator, StrList* items)
{
items->Clear();
if(inputString == NULL || seperator == NULL)
return 0;
int inputLen = (int)strlen(inputString);
int sepLen = (int)strlen(seperator);
if(inputLen == 0)
return 0;
if(sepLen == 0){
char * item = new char[inputLen + 1];
strcpy(item, inputString);
items->Add(item);
}
else{
char * p = inputString;
char * item = NULL;
while(p != NULL){
char * q = strstr(p, seperator);
if(q == NULL){
item = new char[inputLen - (p - inputString) + 1];
memcpy(item, p, inputLen - (p - inputString) + 1);
item[inputLen - (p - inputString)] = '\0'; // no use?
p = NULL;
}
else{
item = new char[q - p + 1];
memcpy(item, p, q - p);
item[q - p] = '\0';
p = q + sepLen;
}
items->Add(item);
}
}
return items->count;
}
/*
get device ids for the given device information
......
......@@ -112,6 +112,9 @@ public:
/* specify if the handle is initialized */
bool isHandleReady;
/* specify if the generator is initialized */
bool isGenReady;
/* generater of random numbers */
curandGenerator_t gen;
......@@ -131,6 +134,9 @@ public:
/* clear it */
void Clear();
/* reset it */
void Reset();
#ifdef USE_CUDA
/* get cublas handle */
cublasHandle_t * GetCublasHandle();
......@@ -178,6 +184,9 @@ public:
/* number of GPUs */
int nGPU;
/* indicates whether the the management of devices has been initialized */
bool isInitialized;
public:
/* constructor */
XDevManager();
......
......@@ -31,8 +31,8 @@
/* the nts (NiuTrans.Tensor) namespace */
namespace nts{
int testxmemid = 0;
void * recordp = NULL;
//int testxmemid = 0;
//void * recordp = NULL;
/*
for managing the memories
......@@ -1480,9 +1480,13 @@ void XMem::ShowMemUsage(FILE * file)
total += blocks[i].size;
}
}
MTYPE bufSizeTotal = bufSize;
MTYPE bufSizeUsed = bufUsed;
fprintf(file, "mem:%.1fMB used:%.1fMB usage:%.3f\n",
(DTYPE)total/MILLION, (DTYPE)used/MILLION, (DTYPE)used/total);
(DTYPE)total/ 1024 / 1024, (DTYPE)used/ 1024 / 1024, (DTYPE)used/total);
fprintf(file, "buf:%.1fMB used:%.1fMB usage:%.3f\n",
(DTYPE)bufSizeTotal / 1024 / 1024, (DTYPE)bufSizeUsed / 1024 / 1024, (DTYPE)bufSizeUsed / bufSizeTotal);
}
#ifdef USE_CUDA
......@@ -1632,7 +1636,8 @@ XMem * XMemManager::GetMem(const int devID)
mem = GPUMems + devID;
}
else{
XPRINT1(0, stderr, "Cannot get the memory (%d). Please check your device id!", devID);
XPRINT1(0, stderr, "Please check your device id (%d)!", devID);
ShowNTErrors("Cannot get the memory!");
}
}
......
......@@ -480,12 +480,10 @@ public:
/* managing the memories */
extern XMemManager GMems;
//extern XMem * GMem;
extern XMem * GMem;
extern int testxmemid;
extern void * recordp;
//extern int testxmemid;
//extern void * recordp;
} /* end of the nts (NiuTrans.Tensor) namespace */
......
......@@ -834,7 +834,7 @@ int CompXFloat(const void * a, const void * b)
void ResetGPUDevices()
{
#ifdef USE_CUDA
#if CUDART_VERSION < 10000
cudaThreadExit();
return;
......@@ -845,7 +845,60 @@ void ResetGPUDevices()
cudaSetDevice(i);
cudaDeviceReset();
}*/
#else
ShowNTErrors("TODO!");
#endif
#endif
}
/*
split a string
>> inputString - a line of string
>> separator - separate by what
>> items - splitting result
<< return - how many items are there
*/
int SplitALine(char* inputString, const char* seperator, StrList* items)
{
items->Clear();
if (inputString == NULL || seperator == NULL)
return 0;
int inputLen = (int)strlen(inputString);
int sepLen = (int)strlen(seperator);
if (inputLen == 0)
return 0;
if (sepLen == 0) {
char* item = new char[inputLen + 1];
strcpy(item, inputString);
items->Add(item);
}
else {
char* p = inputString;
char* item = NULL;
while (p != NULL) {
char* q = strstr(p, seperator);
if (q == NULL) {
item = new char[inputLen - (p - inputString) + 1];
memcpy(item, p, inputLen - (p - inputString) + 1);
item[inputLen - (p - inputString)] = '\0'; // no use?
p = NULL;
}
else {
item = new char[q - p + 1];
memcpy(item, p, q - p);
item[q - p] = '\0';
p = q + sepLen;
}
items->Add(item);
}
}
return items->count;
}
} // namespace nts(NiuTrans.Tensor)
......@@ -59,6 +59,8 @@ extern double GetClockSec();
extern void XQSort(void * data, void * index, int num, int width, int stride, int (*comp)(const void *, const void *));
extern int CompXFloat(const void * a, const void * b);
int SplitALine(char* inputString, const char* seperator, StrList* items);
#ifdef USE_CUDA
extern void XMemCopyAsync(void * t, int devIDT, const void * s, int devIDS, size_t size, cudaStream_t stream, int streamDevID);
#else
......
Markdown 格式
0%
您添加了 0 到此讨论。请谨慎行事。
请先完成此评论的编辑!
注册 或者 后发表评论