Commit b8d2319d by xuchen

optimize implementation of Sum

parent 18a08a65
...@@ -89,31 +89,11 @@ void _Sum(const XTensor * a, const XTensor * b, XTensor * c, DTYPE beta) ...@@ -89,31 +89,11 @@ void _Sum(const XTensor * a, const XTensor * b, XTensor * c, DTYPE beta)
/* when c != a, OpenBLAS needs to copy a to c first. This operation /* when c != a, OpenBLAS needs to copy a to c first. This operation
slow down the speed, so just use OpenBLAS when c == a */ slow down the speed, so just use OpenBLAS when c == a */
#if defined(USE_BLAS) #if defined(USE_BLAS)
if( c == a){ if (c == a) {
AXPY(a->unitNum,beta,bp,1,cp,1); AXPY(a->unitNum, beta, bp, 1, cp, 1);
} else{ return;
int num = a->unitNum;
if (num % 4 == 0) {
for (int i = 0; i < num; i += 4) {
cp[i] = ap[i] + bp[i] * beta;
cp[i + 1] = ap[i + 1] + bp[i + 1] * beta;
cp[i + 2] = ap[i + 2] + bp[i + 2] * beta;
cp[i + 3] = ap[i + 3] + bp[i + 3] * beta;
}
}
else if (num % 2 == 0) {
for (int i = 0; i < num; i += 2) {
cp[i] = ap[i] + bp[i] * beta;
cp[i + 1] = ap[i + 1] + bp[i + 1] * beta;
}
}
else {
for (int i = 0; i < num; i++) {
cp[i] = ap[i] + bp[i] * beta;
}
}
} }
#else #endif
/* unrolling */ /* unrolling */
int num = a->unitNum; int num = a->unitNum;
if (num % 4 == 0) { if (num % 4 == 0) {
...@@ -135,7 +115,6 @@ void _Sum(const XTensor * a, const XTensor * b, XTensor * c, DTYPE beta) ...@@ -135,7 +115,6 @@ void _Sum(const XTensor * a, const XTensor * b, XTensor * c, DTYPE beta)
cp[i] = ap[i] + bp[i] * beta; cp[i] = ap[i] + bp[i] * beta;
} }
} }
#endif
} }
else { else {
// TODO!! // TODO!!
......
Markdown 格式
0%
您添加了 0 到此讨论。请谨慎行事。
请先完成此评论的编辑!
注册 或者 后发表评论