Commit f74b1c17 by xiaotong

better way of memory copy

parent ae990819
NiuTrans.Tensor.vcxproj NiuTrans.Tensor.vcxproj
NiuTrans.Tensor.vcxproj.filters NiuTrans.Tensor.vcxproj.filters
x64/ x64/
vc140.pdb
NiuTrans.Tensor.vcxproj.user
NiuTrans.Tensor.aps
...@@ -20,6 +20,7 @@ ...@@ -20,6 +20,7 @@
*/ */
#include "../../XDevice.h" #include "../../XDevice.h"
#include "../../XUtility.h"
#include "Sum.cuh" #include "Sum.cuh"
namespace nts { // namespace nts(NiuTrans.Tensor) namespace nts { // namespace nts(NiuTrans.Tensor)
......
...@@ -117,7 +117,7 @@ void CudaGPUToCPUFlush(XTensor * tensor) ...@@ -117,7 +117,7 @@ void CudaGPUToCPUFlush(XTensor * tensor)
else { else {
tensor->dataHost = new char[tensor->unitNum * tensor->unitSize]; tensor->dataHost = new char[tensor->unitNum * tensor->unitSize];
if (tensor->data != NULL) if (tensor->data != NULL)
cudaMemcpy(tensor->dataHost, tensor->data, tensor->unitNum * tensor->unitSize, cudaMemcpyDeviceToHost); XMemCopy(tensor->dataHost, -1, tensor->data, tensor->devID, tensor->unitNum * tensor->unitSize);
else else
memset(tensor->dataHost, 0, tensor->unitNum * tensor->unitSize); memset(tensor->dataHost, 0, tensor->unitNum * tensor->unitSize);
} }
......
Markdown 格式
0%
您添加了 0 到此讨论。请谨慎行事。
请先完成此评论的编辑!
注册 或者 后发表评论