Commit c63a9754 by liyinqiao

Bug fixed.

1. Fix the bugs in SetDevice functions and FlushToMem functions.
2. Fix the minor errors.
parent 29c26ce6
...@@ -1632,7 +1632,8 @@ XMem * XMemManager::GetMem(const int devID) ...@@ -1632,7 +1632,8 @@ XMem * XMemManager::GetMem(const int devID)
mem = GPUMems + devID; mem = GPUMems + devID;
} }
else{ else{
XPRINT1(0, stderr, "Cannot get the memory (%d). Please check your device id!", devID); XPRINT1(0, stderr, "Please check your device id (%d)!", devID);
ShowNTErrors("Cannot get the memory!");
} }
} }
......
...@@ -520,9 +520,8 @@ relocate the data on the target device ...@@ -520,9 +520,8 @@ relocate the data on the target device
*/ */
void XTensor::SetDevice(int myDevId, XMem* myMem) void XTensor::SetDevice(int myDevId, XMem* myMem)
{ {
if(myMem == NULL){ if(myMem == NULL)
myMem = GMems.GetMem(myDevId); myMem = GMems.GetMem(myDevId);
}
FlushToMem(myMem); FlushToMem(myMem);
isInGlobalMem = false; isInGlobalMem = false;
} }
...@@ -1914,6 +1913,7 @@ void XTensor::FlushToMem(XMem* targetMem) ...@@ -1914,6 +1913,7 @@ void XTensor::FlushToMem(XMem* targetMem)
else if (mem != targetMem) { else if (mem != targetMem) {
void* tmpData = targetMem->Alloc(targetMem->devID, GetDataSizeInChar()); void* tmpData = targetMem->Alloc(targetMem->devID, GetDataSizeInChar());
XMemCopy(tmpData, targetMem->devID, data, devID, GetDataSizeInChar()); XMemCopy(tmpData, targetMem->devID, data, devID, GetDataSizeInChar());
mem->Release(data, GetDataSizeInChar(), signature);
data = tmpData; data = tmpData;
mem = targetMem; mem = targetMem;
devID = mem->devID; devID = mem->devID;
...@@ -1925,13 +1925,19 @@ void XTensor::FlushToMem(XMem* targetMem) ...@@ -1925,13 +1925,19 @@ void XTensor::FlushToMem(XMem* targetMem)
else { else {
if (devID >= 0) { if (devID >= 0) {
#ifdef USE_CUDA #ifdef USE_CUDA
CudaGPUToCPUFlush(this); CudaGPUToCPUFlush(this, targetMem->devID, targetMem);
mem = targetMem;
devID = mem->devID;
#else #else
ShowNTErrors("Recompile the code with USE_CUDA!"); ShowNTErrors("Recompile the code with USE_CUDA!");
#endif #endif
} }
else if (mem != targetMem) {
void* tmpData = targetMem->Alloc(targetMem->devID, GetDataSizeInChar());
XMemCopy(tmpData, targetMem->devID, data, devID, GetDataSizeInChar());
mem->Release(data, GetDataSizeInChar(), signature);
data = tmpData;
mem = targetMem;
devID = mem->devID;
}
} }
} }
......
...@@ -78,6 +78,8 @@ void CudaCPUToGPUFlush(TensorList * mList, int devID, XMem * GPUMem) ...@@ -78,6 +78,8 @@ void CudaCPUToGPUFlush(TensorList * mList, int devID, XMem * GPUMem)
if(m->mem == NULL) if(m->mem == NULL)
delete[] (char*)m->data; delete[] (char*)m->data;
else
m->mem->Release(m->data, m->GetDataSizeInChar(), m->signature);
m->dataHost = NULL; m->dataHost = NULL;
m->data = GPUData + p; m->data = GPUData + p;
...@@ -94,7 +96,36 @@ void CudaCPUToGPUFlush(TensorList * mList, int devID, XMem * GPUMem) ...@@ -94,7 +96,36 @@ void CudaCPUToGPUFlush(TensorList * mList, int devID, XMem * GPUMem)
#endif #endif
} }
/* copy the data from GPU memory to CPU memory */ /* copy the data from GPU memory to CPU memory (memory pool) */
void CudaGPUToCPUFlush(XTensor * tensor, int devID, XMem * CPUMem)
{
#ifdef USE_CUDA
CheckNTErrors((tensor->devID >= 0), "Cannot do cpu-flush on matrices that are already on CPU.");
/* compute the requried memory size */
int size = 0;
if (m->isSparse)
size = sizeof(int) + (sizeof(int) + tensor->unitSize) * tensor->unitNumNonZero;
else
size = tensor->unitSize * tensor->unitNum;
char * CPUData = CPUMem != NULL ? (char*)CPUMem->Alloc(CPUMem->devID, size):
(char*)XMemAlloc(devID, size);
/* copy from CPU memory to GPU memory */
cudaMemcpy(CPUData, tensor->data, size, cudaMemcpyDeviceToHost);
if (tensor->dataHost != NULL)
delete[](char*)tensor->dataHost;
tensor->dataHost = NULL;
tensor->mem->Release(tensor->data, tensor->GetDataSizeInChar(), tensor->signature);
tensor->data = CPUData;
tensor->devID = CPUMem != NULL ? CPUMem->devID : devID;
tensor->mem = CPUMem;
#endif
}
/* copy the data from GPU memory to CPU memory ((dataHost)) and do not delete the data */
void CudaGPUToCPUFlush(XTensor * tensor) void CudaGPUToCPUFlush(XTensor * tensor)
{ {
CheckNTErrors((sizeof(DTYPE) == tensor->unitSize), "Unsupported data type."); CheckNTErrors((sizeof(DTYPE) == tensor->unitSize), "Unsupported data type.");
......
Markdown 格式
0%
您添加了 0 到此讨论。请谨慎行事。
请先完成此评论的编辑!
注册 或者 后发表评论