Commit ba4c8180 by liyinqiao

Bug fixed and clean codes. (IMPORTANT)

1. Fix the bugs for MatrixMul functions which may cause nan in some cases. (IMPORTANT)
2. Fix the minor errors.
parent a85e2079
......@@ -159,6 +159,10 @@ void _MatrixMul(const XTensor * a, MATRIX_TRANS_TYPE transposedA,
"The code must be run on the same GPU!");
int devIDBackup;
if (beta == 0)
c->SetZeroAll();
ProtectCudaDev(a->devID, devIDBackup);
cublasHandle_t * handle = a->mem != NULL ? a->mem->GetCublasHandle() : GDevs.GetCudaHandle(a->devID);
......
......@@ -50,7 +50,7 @@ void _MatrixMul2D(const XTensor * a, MATRIX_TRANS_TYPE transposedA,
XPRunner * parallelRunner, XStream * stream)
{
CheckNTErrors((a && b && c), "Empty input tensors!");
CheckNTErrors((a->dataType == b->dataType), "Input tensors should have the same data type!");
CheckNTErrors((a->dataType == b->dataType), "Input tensors should have the same data type!");
CheckNTErrors((a->order == 2 && b->order == 2 && c->order == 2),
"Input tensors must have a order = 2!");
......@@ -83,9 +83,9 @@ void _MatrixMul2D(const XTensor * a, MATRIX_TRANS_TYPE transposedA,
c->dataType == DEFAULT_DTYPE)
{
#if defined(USE_BLAS)
_MatrixMULCPU(a, transposedA, b, transposedB, c, alpha, beta);
_MatrixMULCPU(a, transposedA, b, transposedB, c, alpha, beta);
#else
_MatrixMul2DParallel(a, transposedA, b, transposedB, c, alpha, beta, parallelRunner);
_MatrixMul2DParallel(a, transposedA, b, transposedB, c, alpha, beta, parallelRunner);
#endif
}
else {
......
......@@ -156,6 +156,9 @@ void _CudaMatrixMul2D(const XTensor * a, MATRIX_TRANS_TYPE transposedA,
if (stream != NULL)
cublasSetStream(*handle, stream->stream);
if (beta == 0)
c->SetZeroAll();
if (a->dataType == X_FLOAT && b->dataType == X_FLOAT && c->dataType == X_FLOAT) {
_CudaBLASMatrixMUL(handle, a->data, transposedA, a->dataType,
b->data, transposedB, a->dataType, c->data, c->dataType,
......
......@@ -54,6 +54,9 @@ void _MatrixMul2DParallel(const XTensor * a, MATRIX_TRANS_TYPE transposedA,
int aColNum = am;
int bColNum = bm;
if (beta == 0)
c->SetZeroAll();
/* a * b */
if (transposedA == X_NOTRANS && transposedB == X_NOTRANS) {
RunParallel2D(parallelRunner, (void*)_MatrixMul2DMultiTheading, an * am * bm,
......
......@@ -118,6 +118,9 @@ void _MatrixMulBatchedGPU(const XTensor * a, MATRIX_TRANS_TYPE transposedA,
blockNum *= a->dimSize[i];
}
if (beta == 0)
c->SetZeroAll();
int devIDBackup = 0;
ProtectCudaDev(a->devID, devIDBackup);
......@@ -260,7 +263,7 @@ void _MatrixMulBatchedCPU(const TensorList * a, MATRIX_TRANS_TYPE transposedA,
CheckNTErrors((bi->order == 2), "2d tensor (i.e., matrix) is required!");
CheckNTErrors((ci->order == 2), "2d tensor (i.e., matrix) is required!");
#ifdef USE_BLAS
_MatrixMULCPU(ai, transposedA, bi, transposedB, ci, alpha, beta);
_MatrixMULCPU(ai, transposedA, bi, transposedB, ci, alpha, beta);
#else
_MatrixMul2D(ai, transposedA, bi, transposedB, ci, alpha, beta);
#endif
......
Markdown 格式
0%
您添加了 0 到此讨论。请谨慎行事。
请先完成此评论的编辑!
注册 或者 后发表评论