Commit 2cc0a82d by liyinqiao

Bug fixed and clean the codes.

1. Try to fix the bugs in destroy the stream of XDevice (Uncheck).
2. Fix the bug of memory leak in ReduceSumAll function.
2. Adjust the directory structure.
3. Fix the minor errors.
parent 9f12ebd2
Munich 18@@ 56 : Four maps that will change your view of the city
A mental asylum , where today young people are said to meet .
A cryp@@ t chap@@ el , where they are now dig@@ ging t@@ unn@@ els for the S @@@ -@@ @ Bahn .
Al@@ lo@@ t@@ ment holders cul@@ tiv@@ ate the soil of former farmers .
The oldest official map of Munich brings cap@@ tiv@@ ating stories to light .
This source diff could not be displayed because it is too large. You can view the blob instead.
......@@ -43,7 +43,6 @@ typedef enum CBLAS_UPLO {CblasUpper=121, CblasLower=122} CBLAS_UPLO;
typedef enum CBLAS_DIAG {CblasNonUnit=131, CblasUnit=132} CBLAS_DIAG;
typedef enum CBLAS_SIDE {CblasLeft=141, CblasRight=142} CBLAS_SIDE;
#if defined(USE_BLAS)
#ifdef OPENBLAS
#define XBLAS_SGEMM cblas_sgemm
......
......@@ -58,8 +58,6 @@ XDevice::~XDevice()
MUTEX_DELE(cublasMutex);
if(isHandleReady)
cublasDestroy(cublasHandle);
if(stream != NULL)
delete stream;
curandDestroyGenerator(gen);
#endif
}
......@@ -263,6 +261,13 @@ void XDevice::SetFastFlagsAllDevices()
#endif
}
/* delete the default stream for the device */
void XDevice::DelDeviceStream()
{
if(stream != NULL)
delete stream;
}
/* constructor */
XDevManager::XDevManager()
{
......@@ -287,7 +292,7 @@ void XDevManager::Init()
nCPU = 1;
for(int i = 0; i < nCPU; i++)
CPUs[0].Init(-1);
CPUs[i].Init(-1);
/* GPUs */
int GPUCount = 0;
......@@ -606,5 +611,16 @@ char * XDevManager::GetDevString(int devID)
}
}
/* delete the streams for all devices */
void XDevManager::DelDeviceStream()
{
for(int i = 0; i < GDevs.nCPU; i++) {
GDevs.CPUs[i].DelDeviceStream();
}
for(int i = 0; i < GDevs.nGPU; i++) {
GDevs.GPUs[i].DelDeviceStream();
}
}
} /* end of the nts (NiuTrans.Tensor) namespace */
......@@ -64,20 +64,20 @@ public:
/* size of the memory */
int memSize;
/* warp size of an (Navida) GPU */
/* warp size of an (Nvidia) GPU */
int GPUWarpSize;
/* indicates whether the device class has been initialized */
bool isInitialized;
/*
max grid size (or number of blocks) of an (Navida) GPU
max grid size (or number of blocks) of an (Nvidia) GPU
NOTE: the grid size is alone with three dimensions (x, y, z)
*/
int GPUMaxGridSize[3];
/*
max block size (or number of threads per block) of an (Navida) GPU
max block size (or number of threads per block) of an (Nvidia) GPU
NOTE: the block size is alone with three dimensions (x, y, z)
*/
int GPUMaxBlockSize[3];
......@@ -158,6 +158,9 @@ public:
/* reset cuda flag for more efficient cuda execution (all devices) */
static
void SetFastFlagsAllDevices();
/* delete the default stream for the device (call it before deleting the XDevice) */
void DelDeviceStream();
};
/*
......@@ -216,6 +219,10 @@ public:
/* get the device information in string */
char * GetDevString(int devID);
/* delete the streams for all devices */
static
void DelDeviceStream();
};
/* managing the devices */
......
......@@ -147,6 +147,12 @@ XTensor::XTensor(const int myOrder, const int* myDimSize, const TENSOR_DATA_TYPE
if(order >= 0)
Resize(myOrder, myDimSize, myDataType, myDenseRatio);
#ifdef USE_CUDA
int gpuNum = 0;
cudaError_t error = cudaGetDeviceCount(&gpuNum);
#endif
atexit(XDevManager::DelDeviceStream);
}
/* copy constructor */
......
......@@ -80,6 +80,7 @@ void _ReduceSumAll(const XTensor * source, DTYPE * value)
_ReduceSumAll(source, target);
*value = target->Get0D();
delete[] dimSize;
DelTensorBuf(target);
}
......
......@@ -108,7 +108,7 @@ bool TestDropout1()
if(tmp2 == 0.0F)
zeroNum2 += 1;
}
printf("CPU Test:\n");
printf("GPU Test:\n");
printf("In tensor y, there are %d units.\n", unitNum);
printf("There are %d zero units by Dropout layer with probability %.2f.\n", zeroNum1, dropProb);
printf("In tensor yUser, there are %d units.\n", unitNum);
......
Markdown 格式
0%
您添加了 0 到此讨论。请谨慎行事。
请先完成此评论的编辑!
注册 或者 后发表评论