update cuda code for Sum

561a9e58 · xiaotong · f5161a0d · 561a9e58 · 561a9e58 · 561a9e58
Commit 561a9e58 authored Jul 10, 2018 by xiaotong
--- a/source/core/arithmetic/Sum.cpp
+++ b/source/core/arithmetic/Sum.cpp
@@ -37,7 +37,7 @@ return a pointer
 */
 void _Sum(const XTensor * a, const XTensor * b, XTensor * c, DTYPE beta)
 {
-    CheckNTErrors(a && b && c, "Empty tensors in addition!");
+    CheckNTErrors(a && b && c, "Empty tensor input!");
    CheckNTErrors(a->unitNum == b->unitNum && a->unitNum == c->unitNum,
                  "Unmatched tensors in addition!");
    CheckNTErrors(a->dataType == b->dataType && a->dataType == c->dataType,

--- a/source/core/arithmetic/Sum.cu
+++ b/source/core/arithmetic/Sum.cu
@@ -51,11 +51,9 @@ tensor summation c = a + b * \beta (cuda version)
 >> c - where we put a+b*\beta. we save it in a if c is NULL
 >> beta - the scaling factor
 */
-void _CudaSum(XTensor * a, XTensor * b, XTensor * c, DTYPE beta)
+void _CudaSum(const XTensor * a, const XTensor * b, XTensor * c, DTYPE beta)
 {
-    if (c == NULL)
-        c = a;
-
+    CheckNTErrors(a && b && c, "Empty tensor input!");
    CheckNTErrors((a->unitNum == b->unitNum && a->unitNum == c->unitNum),
                  "Unmatched tensors in addition!");
    CheckNTErrors((a->dataType == b->dataType && a->dataType == c->dataType),

--- a/source/core/arithmetic/Sum.cuh
+++ b/source/core/arithmetic/Sum.cuh
@@ -34,7 +34,7 @@ void KernelADD(DTYPE * a, DTYPE * b, DTYPE * c, int size, DTYPE beta = (DTYPE)1.

 /* tensor summation c = a + b * \beta (cuda version) */
 extern "C"
-void _CudaSum(XTensor * a, XTensor * b, XTensor * c = NULL, DTYPE beta = (DTYPE)1.0);
+void _CudaSum(const XTensor * a, const XTensor * b, XTensor * c = NULL, DTYPE beta = (DTYPE)1.0);

 /*  tensor summation c = a + b * \beta (cuda version) with an input handle */
 extern "C"

--- a/source/core/movement/CopyValues.cu
+++ b/source/core/movement/CopyValues.cu
@@ -35,7 +35,7 @@ copy a range of elements from a source vector to a target vector
 >> stream - the stream for creating the job pipeline
 << return - succeed or not
 */
-bool CudaCopyValues(XTensor * s, XTensor * t, XStream * stream)
+bool CudaCopyValues(const XTensor * s, XTensor * t, XStream * stream)
 {
    if (s == NULL || t == NULL)
        return false;
@@ -67,10 +67,10 @@ bool CudaCopyValues(XTensor * s, XTensor * t, XStream * stream)
    }
    /* sparse -> sparse */
    else if (s->isSparse && t->isSparse &&
-        s->dataType == DEFAULT_DTYPE &&
-        t->dataType == DEFAULT_DTYPE)
+             s->dataType == DEFAULT_DTYPE &&
+             t->dataType == DEFAULT_DTYPE)
    {
-        int num = s->GetNonzeroSize();
+        int num = s->unitNumNonZero;
        int size = sizeof(int) + num * (s->unitSize + sizeof(int));

        if (stream == NULL)

--- a/source/core/movement/CopyValues.cuh
+++ b/source/core/movement/CopyValues.cuh
@@ -30,7 +30,7 @@ namespace nts { // namespace nts(NiuTrans.Tensor)

 /* copy all elements from a source matrix to a target matrix */
 extern "C"
-bool CudaCopyValues(XTensor * s, XTensor * t, XStream * stream = NULL);
+bool CudaCopyValues(const XTensor * s, XTensor * t, XStream * stream = NULL);

 #endif // USE_CUDA


--- a/source/test/TSum.cpp
+++ b/source/test/TSum.cpp
@@ -73,7 +73,7 @@ bool TestSum1()
    bGPU->SetData(bData, unitNum);

    /* call sum function */
-    _Sum(aGPU, bGPU);
+    _Sum(aGPU, bGPU, aGPU);

    /* check results */
    gpuTest = aGPU->CheckData(answer, unitNum);