Commit df76b612 by xiaotong

better code of multi-gpu runs

parent 182633ac
...@@ -128,8 +128,6 @@ XTensor AttEncoder::Make(XTensor &input, XTensor &mask, bool skipInputRes) ...@@ -128,8 +128,6 @@ XTensor AttEncoder::Make(XTensor &input, XTensor &mask, bool skipInputRes)
/* layer normalization */ /* layer normalization */
x = attLayerNorms[i].Make(res); x = attLayerNorms[i].Make(res);
llnum = -1;
} }
/* fnn */ /* fnn */
......
No preview for this file type
...@@ -553,10 +553,16 @@ void XTensor::SetZeroAll(XStream * stream) ...@@ -553,10 +553,16 @@ void XTensor::SetZeroAll(XStream * stream)
#ifdef USE_CUDA #ifdef USE_CUDA
int size = sizeof(int) + (sizeof(int)+sizeof(DTYPE)) * unitNumNonZero; int size = sizeof(int) + (sizeof(int)+sizeof(DTYPE)) * unitNumNonZero;
int devIDBackup = 0;
cudaGetDevice(&devIDBackup);
cudaSetDevice(devID);
if(stream == NULL) if(stream == NULL)
cudaMemset(data, 0, size); cudaMemset(data, 0, size);
else else
cudaMemsetAsync(data, 0, size, stream->stream); cudaMemsetAsync(data, 0, size, stream->stream);
cudaSetDevice(devIDBackup);
#endif #endif
} }
else else
...@@ -567,10 +573,16 @@ void XTensor::SetZeroAll(XStream * stream) ...@@ -567,10 +573,16 @@ void XTensor::SetZeroAll(XStream * stream)
else{ else{
if(devID >= 0){ if(devID >= 0){
#ifdef USE_CUDA #ifdef USE_CUDA
int devIDBackup = 0;
cudaGetDevice(&devIDBackup);
cudaSetDevice(devID);
if(stream == NULL) if(stream == NULL)
cudaMemset(data, 0, unitNum * unitSize); cudaMemset(data, 0, unitNum * unitSize);
else else
cudaMemsetAsync(data, 0, unitNum * unitSize, stream->stream); cudaMemsetAsync(data, 0, unitNum * unitSize, stream->stream);
cudaSetDevice(devIDBackup);
#endif #endif
} }
else else
......
Markdown 格式
0%
您添加了 0 到此讨论。请谨慎行事。
请先完成此评论的编辑!
注册 或者 后发表评论