Commit c63a9754 by liyinqiao

Bug fixed.

1. Fix the bugs in SetDevice functions and FlushToMem functions.
2. Fix the minor errors.
parent 29c26ce6
......@@ -1632,7 +1632,8 @@ XMem * XMemManager::GetMem(const int devID)
mem = GPUMems + devID;
}
else{
XPRINT1(0, stderr, "Cannot get the memory (%d). Please check your device id!", devID);
XPRINT1(0, stderr, "Please check your device id (%d)!", devID);
ShowNTErrors("Cannot get the memory!");
}
}
......
......@@ -520,9 +520,8 @@ relocate the data on the target device
*/
void XTensor::SetDevice(int myDevId, XMem* myMem)
{
if(myMem == NULL){
if(myMem == NULL)
myMem = GMems.GetMem(myDevId);
}
FlushToMem(myMem);
isInGlobalMem = false;
}
......@@ -1914,6 +1913,7 @@ void XTensor::FlushToMem(XMem* targetMem)
else if (mem != targetMem) {
void* tmpData = targetMem->Alloc(targetMem->devID, GetDataSizeInChar());
XMemCopy(tmpData, targetMem->devID, data, devID, GetDataSizeInChar());
mem->Release(data, GetDataSizeInChar(), signature);
data = tmpData;
mem = targetMem;
devID = mem->devID;
......@@ -1925,13 +1925,19 @@ void XTensor::FlushToMem(XMem* targetMem)
else {
if (devID >= 0) {
#ifdef USE_CUDA
CudaGPUToCPUFlush(this);
mem = targetMem;
devID = mem->devID;
CudaGPUToCPUFlush(this, targetMem->devID, targetMem);
#else
ShowNTErrors("Recompile the code with USE_CUDA!");
#endif
}
else if (mem != targetMem) {
void* tmpData = targetMem->Alloc(targetMem->devID, GetDataSizeInChar());
XMemCopy(tmpData, targetMem->devID, data, devID, GetDataSizeInChar());
mem->Release(data, GetDataSizeInChar(), signature);
data = tmpData;
mem = targetMem;
devID = mem->devID;
}
}
}
......
......@@ -78,6 +78,8 @@ void CudaCPUToGPUFlush(TensorList * mList, int devID, XMem * GPUMem)
if(m->mem == NULL)
delete[] (char*)m->data;
else
m->mem->Release(m->data, m->GetDataSizeInChar(), m->signature);
m->dataHost = NULL;
m->data = GPUData + p;
......@@ -94,7 +96,36 @@ void CudaCPUToGPUFlush(TensorList * mList, int devID, XMem * GPUMem)
#endif
}
/* copy the data from GPU memory to CPU memory */
/* copy the data from GPU memory to CPU memory (memory pool) */
void CudaGPUToCPUFlush(XTensor * tensor, int devID, XMem * CPUMem)
{
#ifdef USE_CUDA
CheckNTErrors((tensor->devID >= 0), "Cannot do cpu-flush on matrices that are already on CPU.");
/* compute the requried memory size */
int size = 0;
if (m->isSparse)
size = sizeof(int) + (sizeof(int) + tensor->unitSize) * tensor->unitNumNonZero;
else
size = tensor->unitSize * tensor->unitNum;
char * CPUData = CPUMem != NULL ? (char*)CPUMem->Alloc(CPUMem->devID, size):
(char*)XMemAlloc(devID, size);
/* copy from CPU memory to GPU memory */
cudaMemcpy(CPUData, tensor->data, size, cudaMemcpyDeviceToHost);
if (tensor->dataHost != NULL)
delete[](char*)tensor->dataHost;
tensor->dataHost = NULL;
tensor->mem->Release(tensor->data, tensor->GetDataSizeInChar(), tensor->signature);
tensor->data = CPUData;
tensor->devID = CPUMem != NULL ? CPUMem->devID : devID;
tensor->mem = CPUMem;
#endif
}
/* copy the data from GPU memory to CPU memory ((dataHost)) and do not delete the data */
void CudaGPUToCPUFlush(XTensor * tensor)
{
CheckNTErrors((sizeof(DTYPE) == tensor->unitSize), "Unsupported data type.");
......
Markdown 格式
0%
您添加了 0 到此讨论。请谨慎行事。
请先完成此评论的编辑!
注册 或者 后发表评论