Commit b8d2319d by xuchen

optimize implementation of Sum

parent 18a08a65
...@@ -89,54 +89,33 @@ void _Sum(const XTensor * a, const XTensor * b, XTensor * c, DTYPE beta) ...@@ -89,54 +89,33 @@ void _Sum(const XTensor * a, const XTensor * b, XTensor * c, DTYPE beta)
/* when c != a, OpenBLAS needs to copy a to c first. This operation /* when c != a, OpenBLAS needs to copy a to c first. This operation
slow down the speed, so just use OpenBLAS when c == a */ slow down the speed, so just use OpenBLAS when c == a */
#if defined(USE_BLAS) #if defined(USE_BLAS)
if( c == a){ if (c == a) {
AXPY(a->unitNum,beta,bp,1,cp,1); AXPY(a->unitNum, beta, bp, 1, cp, 1);
} else{ return;
int num = a->unitNum;
if (num % 4 == 0) {
for (int i = 0; i < num; i += 4) {
cp[i] = ap[i] + bp[i] * beta;
cp[i + 1] = ap[i + 1] + bp[i + 1] * beta;
cp[i + 2] = ap[i + 2] + bp[i + 2] * beta;
cp[i + 3] = ap[i + 3] + bp[i + 3] * beta;
}
}
else if (num % 2 == 0) {
for (int i = 0; i < num; i += 2) {
cp[i] = ap[i] + bp[i] * beta;
cp[i + 1] = ap[i + 1] + bp[i + 1] * beta;
}
}
else {
for (int i = 0; i < num; i++) {
cp[i] = ap[i] + bp[i] * beta;
}
}
} }
#else #endif
/* unrolling */ /* unrolling */
int num = a->unitNum; int num = a->unitNum;
if (num % 4 == 0) { if (num % 4 == 0) {
for (int i = 0; i < num; i += 4) { for (int i = 0; i < num; i += 4) {
cp[i] = ap[i] + bp[i] * beta; cp[i] = ap[i] + bp[i] * beta;
cp[i + 1] = ap[i + 1] + bp[i + 1] * beta; cp[i + 1] = ap[i + 1] + bp[i + 1] * beta;
cp[i + 2] = ap[i + 2] + bp[i + 2] * beta; cp[i + 2] = ap[i + 2] + bp[i + 2] * beta;
cp[i + 3] = ap[i + 3] + bp[i + 3] * beta; cp[i + 3] = ap[i + 3] + bp[i + 3] * beta;
}
} }
else if (num % 2 == 0) { }
for (int i = 0; i < num; i += 2) { else if (num % 2 == 0) {
cp[i] = ap[i] + bp[i] * beta; for (int i = 0; i < num; i += 2) {
cp[i + 1] = ap[i + 1] + bp[i + 1] * beta; cp[i] = ap[i] + bp[i] * beta;
} cp[i + 1] = ap[i + 1] + bp[i + 1] * beta;
} }
else { }
for (int i = 0; i < num; i++) { else {
cp[i] = ap[i] + bp[i] * beta; for (int i = 0; i < num; i++) {
} cp[i] = ap[i] + bp[i] * beta;
} }
#endif
} }
}
else { else {
// TODO!! // TODO!!
ShowNTErrors("TODO!"); ShowNTErrors("TODO!");
......
Markdown 格式
0%
您添加了 0 到此讨论。请谨慎行事。
请先完成此评论的编辑!
注册 或者 后发表评论