Commit df76b612 by xiaotong

better code of multi-gpu runs

parent 182633ac
......@@ -128,8 +128,6 @@ XTensor AttEncoder::Make(XTensor &input, XTensor &mask, bool skipInputRes)
/* layer normalization */
x = attLayerNorms[i].Make(res);
llnum = -1;
}
/* fnn */
......
No preview for this file type
......@@ -552,11 +552,17 @@ void XTensor::SetZeroAll(XStream * stream)
if(devID >= 0){
#ifdef USE_CUDA
int size = sizeof(int) + (sizeof(int)+sizeof(DTYPE)) * unitNumNonZero;
int devIDBackup = 0;
cudaGetDevice(&devIDBackup);
cudaSetDevice(devID);
if(stream == NULL)
cudaMemset(data, 0, size);
else
cudaMemsetAsync(data, 0, size, stream->stream);
cudaSetDevice(devIDBackup);
#endif
}
else
......@@ -567,10 +573,16 @@ void XTensor::SetZeroAll(XStream * stream)
else{
if(devID >= 0){
#ifdef USE_CUDA
int devIDBackup = 0;
cudaGetDevice(&devIDBackup);
cudaSetDevice(devID);
if(stream == NULL)
cudaMemset(data, 0, unitNum * unitSize);
else
cudaMemsetAsync(data, 0, unitNum * unitSize, stream->stream);
cudaSetDevice(devIDBackup);
#endif
}
else
......
Markdown 格式
0%
您添加了 0 到此讨论。请谨慎行事。
请先完成此评论的编辑!
注册 或者 后发表评论