Commit f74b1c17 by xiaotong

better way of memory copy

parent ae990819
NiuTrans.Tensor.vcxproj
NiuTrans.Tensor.vcxproj.filters
x64/
vc140.pdb
NiuTrans.Tensor.vcxproj.user
NiuTrans.Tensor.aps
......@@ -20,6 +20,7 @@
*/
#include "../../XDevice.h"
#include "../../XUtility.h"
#include "Sum.cuh"
namespace nts { // namespace nts(NiuTrans.Tensor)
......
......@@ -117,7 +117,7 @@ void CudaGPUToCPUFlush(XTensor * tensor)
else {
tensor->dataHost = new char[tensor->unitNum * tensor->unitSize];
if (tensor->data != NULL)
cudaMemcpy(tensor->dataHost, tensor->data, tensor->unitNum * tensor->unitSize, cudaMemcpyDeviceToHost);
XMemCopy(tensor->dataHost, -1, tensor->data, tensor->devID, tensor->unitNum * tensor->unitSize);
else
memset(tensor->dataHost, 0, tensor->unitNum * tensor->unitSize);
}
......
Markdown 格式
0%
您添加了 0 到此讨论。请谨慎行事。
请先完成此评论的编辑!
注册 或者 后发表评论