Support 0d tensor(scalar) for ReduceSumAll function.

Use tensor as the bottom implementation and support plain scalar. Note: DO NOT support the backward for ReduceSumAll function.

Support 0d tensor(scalar) for ReduceSumAll function.
Use tensor as the bottom implementation and support plain scalar. Note: DO NOT support the backward for ReduceSumAll function.
84ea960a · liyinqiao · 李垠桥 · 36135d25 · 84ea960a · 84ea960a
Commit 84ea960a authored Jan 09, 2020 by liyinqiao Committed by 李垠桥 Jan 09, 2020
--- a/source/network/XBackwardMath.cpp
+++ b/source/network/XBackwardMath.cpp
@@ -101,6 +101,8 @@ void XMathGrad::MakeGrad(XTensor * node, bool isEfficient)
        GradReduceMean(node, isEfficient);
    else if(operID == REDUCE_REDUCESUM)
        GradReduceSum(node, isEfficient);
+    else if(operID == REDUCE_REDUCESUMALL)
+        GradReduceSumAll(node, isEfficient);
    else if(operID == REDUCE_REDUCESUMSQUARED)
        GradReduceSumSquared(node, isEfficient);
    else if(operID == REDUCE_REDUCEVARIANCE)
@@ -1394,6 +1396,16 @@ void XMathGrad::GradReduceSum(XTensor * node, bool isEfficient)
 }

 /*
+gradient for reduceSumAll
+for
+TODO
+*/
+void XMathGrad::GradReduceSumAll(XTensor * node, bool isEfficient)
+{
+    ShowNTErrors("TODO!");
+}
+
+/*
 gradient for reduceSumSquared
 for
 c = \sum_i (a_i - b)^2 

--- a/source/network/XBackwardMath.h
+++ b/source/network/XBackwardMath.h
@@ -173,6 +173,10 @@ private:
    static
    void GradReduceSum(XTensor * node, bool isEfficient);

+    /* gradient for reduceSumAll */
+    static
+    void GradReduceSumAll(XTensor * node, bool isEfficient);
+
    /* gradient for reduceSumSquared */
    static
    void GradReduceSumSquared(XTensor * node, bool isEfficient);

--- a/source/sample/fnnlm/FNNLM.cpp
+++ b/source/sample/fnnlm/FNNLM.cpp
@@ -493,7 +493,8 @@ void Train(const char * train, bool isShuffled, FNNModel &model)
                Update(model, grad, learningRate, true);

                /* get probabilities */
-                float prob = ReduceSumAll(lossTensor);
+                float prob;
+                _ReduceSumAll(&lossTensor, &prob);
                loss += prob;
            }


--- a/source/sample/transformer/T2TTrainer.cpp
+++ b/source/sample/transformer/T2TTrainer.cpp
@@ -213,7 +213,8 @@ void T2TTrainer::Train(const char * fn, const char * validFN, const char * model
            //float prob = GetProb(&output, &labelOnehot, NULL);
            XTensor lossTensor;
            lossTensor = CrossEntropy(output, labelOnehot, paddingDec);
-            float prob = ReduceSumAll(lossTensor);
+            float prob;
+            _ReduceSumAll(&lossTensor, &prob);

            DTYPE lossLocal = prob / wc;
            bool doUpdate = (!IsNAN(lossLocal) && !IsINF(lossLocal) && lossLocal < 1e3F);
@@ -624,7 +625,8 @@ void T2TTrainer::RescaleOutput(XTensor * output, XTensor * gold, XTensor * paddi
    CheckNTErrors(output->order == 3, "Wrong dimension number!");
    CheckNTErrors(gold->order == 3, "Wrong dimension number!");

-    DTYPE count = _ReduceSumAll(padding);
+    DTYPE count;
+    _ReduceSumAll(padding, &count);
    
    _ExpMe(output);
    _ScaleAndShiftMe(output, 1/count);

--- a/source/tensor/XName.cpp
+++ b/source/tensor/XName.cpp
@@ -105,6 +105,8 @@ const char * GetOPName(int type)
            return "R_REDUCEMEAN";
        else if (type == REDUCE_REDUCESUM)
            return "R_REDUCESUM";
+        else if (type == REDUCE_REDUCESUMALL)
+            return "R_REDUCESUMALL";
        else if (type == REDUCE_REDUCESUMSQUARED)
            return "R_REDUCESUMSQUARED";
        else if (type == REDUCE_REDUCEVARIANCE)

--- a/source/tensor/XName.h
+++ b/source/tensor/XName.h
@@ -76,7 +76,8 @@ namespace nts { // namespace nts(NiuTrans.Tensor)
 #define REDUCE_REDUCEMAX        REDUCE + 1
 #define REDUCE_REDUCEMEAN       REDUCE_REDUCEMAX + 1
 #define REDUCE_REDUCESUM        REDUCE_REDUCEMEAN + 1
-#define REDUCE_REDUCESUMSQUARED REDUCE_REDUCESUM + 1
+#define REDUCE_REDUCESUMALL     REDUCE_REDUCESUM + 1
+#define REDUCE_REDUCESUMSQUARED REDUCE_REDUCESUMALL + 1
 #define REDUCE_REDUCEVARIANCE   REDUCE_REDUCESUMSQUARED + 1

 /* data and shape related operations */

--- a/source/tensor/core/reduce/ReduceSumAll.cpp
+++ b/source/tensor/core/reduce/ReduceSumAll.cpp
@@ -16,11 +16,12 @@
 */

 /*
- * $Created by: Xu Chen (email: hello_master1954@163.com) 2018-09-27
+ * $Created by: LI Yinqqiao (email: li.yin.qiao.2012@hotmail.com) 2020-01-09
 */

 #include "ReduceSumAll.h"
 #include "ReduceSum.h"
+#include "../../XName.h"
 #include "../movement/CopyValues.h"

 namespace nts{ // namespace nts(NiuTrans.Tensor)
@@ -42,55 +43,68 @@ int * getDimSize(const XTensor * tensor, int n)
 /*
 sum all the items of the tensor (It should be optimized!)
 >> source - the inpute tensor
-<< return - the total summation
+<< target - the total summation
 */
-DTYPE _ReduceSumAll(const XTensor * source)
+void _ReduceSumAll(const XTensor * source, XTensor * target)
 {
-    int dims[2] = {1, source->unitNum};
-    int one = 1;
+    CheckNTErrors((source->devID == target->devID || (source->devID < 0 && target->devID < 0)),
+                  "This code must be run on the same device!");
+    CheckNTErrors((source && target), "Empty input or output tensors!");
+    CheckNTErrors((target->order == 0), "Incorrect target tensor sizes!");
+    CheckNTErrors((target->unitNum == 1), "Illegal dimension to reduce!");
+    CheckNTErrors((source->dataType == target->dataType), "Unmatched data types!");

-    XTensor * all = NewTensorBufV2(2, dims, source->dataType, source->denseRatio, source->devID, source->mem);
-    XTensor * result = NewTensorBufV2(1, &one, source->dataType, 1.0F, source->devID, source->mem);
-    
-    _CopyValues(source, all);
-    _ReduceSum(all, result, 1);
-    
-    DTYPE r = result->Get1D(0);
-    
-    DelTensorBuf(result);
-    DelTensorBuf(all);
-    
-    return r;
+    int dims[1] = {source->unitNum};

-    /*int order = source->order;
-    DTYPE summation;
+    XTensor * all = NewTensorBufV2(1, dims, source->dataType, source->denseRatio, source->devID, source->mem);
+
+    _CopyValues(source, all);
+    _ReduceSum(all, target, 0);

-    XTensor * big = NewTensor(source);
-    _CopyValues(source, big);
-    for(int i = order - 1; i >= 0; i--) {
-        if(i == 0)
-            big->Reshape(1, big->unitNum);
+    DelTensorBuf(all);
+}

-        int leadingDim = big->order - 1;
-        int * dimSize;
-        dimSize = getDimSize(big, leadingDim);
-        XTensor * little = NewTensorV2(big->order - 1, dimSize, source->dataType, source->denseRatio, 
-                                     source->devID, source->mem);
-        
-        _ReduceSum(big, little, leadingDim);
+/*
+sum all the items of the tensor (It should be optimized!)
+>> source - the inpute tensor
+<< value - the total summation
+*/
+void _ReduceSumAll(const XTensor * source, DTYPE * value)
+{
+    int * dimSize = new int[MAX_TENSOR_DIM_NUM];
+    float dr = (!source->isSparse) ? 1.0F : source->denseRatio;
+    XTensor * target = NewTensorBufV2(0, dimSize, source->dataType, source->denseRatio, source->devID, source->mem);
+    target->SetTMPFlag();
+
+    /* call _ReduceSum function */
+    _ReduceSumAll(source, target);
+    *value = target->Get0D();
+}

-        delete big;
-        delete dimSize;
+/*
+sum all the items of the tensor
+>> source - the inpute tensor
+<< return - the total summation
+*/
+XTensor ReduceSumAll(const XTensor & source)
+{
+    int * dimSize = new int[MAX_TENSOR_DIM_NUM];
+    float dr = (!source.isSparse) ? 1.0F : source.denseRatio;
+    XTensor target(0, dimSize, source.dataType, dr, source.devID, source.mem);
+    target.SetTMPFlag();

-        big = NewTensor(little);
-        _CopyValues(little, big);
+    /* call _ReduceSum function */
+    _ReduceSumAll(&source, &target);

-        delete little;
+    /* tensor connection */
+    if (source.enableGrad) {
+        XLink::MakeLink(&source, NULL, &target, REDUCE_REDUCESUMALL);
    }
-    summation = big->Get1D(0);
-    delete big;

-    return summation;*/
+    /* destroy variables */
+    delete[] dimSize;
+
+    return target;
 }

 /*
@@ -98,9 +112,11 @@ sum all the items of the tensor
 >> source - the inpute tensor
 << return - the total summation   
 */
-DTYPE ReduceSumAll(const XTensor & source)
+DTYPE ReduceSumAllValue(const XTensor & source)
 {
-    return _ReduceSumAll(&source);
+    XTensor target;
+    target = ReduceSumAll(source);
+    return target.Get0D();
 }

 } // namespace nts(NiuTrans.Tensor)
\ No newline at end of file
--- a/source/tensor/core/reduce/ReduceSumAll.h
+++ b/source/tensor/core/reduce/ReduceSumAll.h
@@ -16,7 +16,7 @@
 */

 /*
- * $Created by: Xu Chen (email: hello_master1954@163.com) 2018-09-27
+ * $Created by: LI Yinqqiao (email: li.yin.qiao.2012@hotmail.com) 2020-01-09
 */


@@ -28,10 +28,16 @@
 namespace nts{ // namespace nts(NiuTrans.Tensor)

 /* sum all the items of the tensor */
-DTYPE _ReduceSumAll(const XTensor * source);
+void _ReduceSumAll(const XTensor * source, XTensor * target);

 /* sum all the items of the tensor */
-DTYPE ReduceSumAll(const XTensor & source);
+void _ReduceSumAll(const XTensor * source, DTYPE * target);
+
+/* sum all the items of the tensor */
+XTensor ReduceSumAll(const XTensor & source);
+
+/* sum all the items of the tensor */
+DTYPE ReduceSumAllValue(const XTensor & source);

 } // namespace nts(NiuTrans.Tensor)


--- a/source/tensor/loss/CrossEntropy.cpp
+++ b/source/tensor/loss/CrossEntropy.cpp
@@ -358,21 +358,21 @@ DTYPE _CrossEntropy(const XTensor * output, const XTensor * gold,

    _CrossEntropy(output, gold, lossBuf, weight, padding, leadingDim);

-    loss = _ReduceSumAll(lossBuf);
+    _ReduceSumAll(lossBuf, &loss);

    if(reduceWay == REDUCE_MEAN) {
-        int nonZeroNum;
+        DTYPE nonZeroNum;
        if(padding == NULL) {
-            nonZeroNum = lossBuf->unitNum;
+            nonZeroNum = (DTYPE)lossBuf->unitNum;
        }
        else {
            XTensor * tmp = NewTensorBufV2(padding, padding->devID, padding->mem);
            _IsNonZero(padding, tmp);
-            nonZeroNum = (int)_ReduceSumAll(tmp);
+            _ReduceSumAll(tmp, &nonZeroNum);
            DelTensorBuf(tmp);
        }

-        loss = loss / (DTYPE)nonZeroNum;
+        loss = loss / nonZeroNum;
    }
    else if(reduceWay == REDUCE_SUM) {
        /* don't need to do anything */
@@ -675,8 +675,9 @@ void _CrossEntropyBackward(XTensor * dedy, const XTensor * output,
    if(padding != NULL) {
        XTensor * tmp = NewTensor(padding);
        _IsNonZero(padding, tmp);
-        int nonZeroNum = (int)_ReduceSumAll(tmp);
-        _ScaleAndShiftMe(dedy, (DTYPE)1.0/(DTYPE)nonZeroNum);
+        DTYPE nonZeroNum;
+        _ReduceSumAll(tmp, &nonZeroNum);
+        _ScaleAndShiftMe(dedy, (DTYPE)1.0/nonZeroNum);
        delete tmp;
    }
    else {

--- a/source/tensor/loss/CrossEntropy.cu
+++ b/source/tensor/loss/CrossEntropy.cu
@@ -123,21 +123,21 @@ DTYPE _CudaCrossEntropyFast(const XTensor * output, const XTensor * gold,

    _CudaCrossEntropyFast(output, gold, lossBuf, weight, padding, leadingDim);

-    loss = _ReduceSumAll(lossBuf);
+    _ReduceSumAll(lossBuf, &loss);

    if(reduceWay == REDUCE_MEAN) {
-        int nonZeroNum;
+        DTYPE nonZeroNum;
        if(padding == NULL) {
-            nonZeroNum = lossBuf->unitNum;
+            nonZeroNum = (DTYPE)lossBuf->unitNum;
        }
        else {
            XTensor * tmp = NewTensorBufV2(padding, padding->devID, padding->mem);
            _IsNonZero(padding, tmp);
-            nonZeroNum = (int)_ReduceSumAll(tmp);
+            _ReduceSumAll(tmp, &nonZeroNum);
            DelTensorBuf(tmp);
        }

-        loss = loss / (DTYPE)nonZeroNum;
+        loss = loss / nonZeroNum;
    }
    else if(reduceWay == REDUCE_SUM) {
        /* don't need to do anything */
@@ -199,8 +199,9 @@ void _CudaCrossEntropyBackward(XTensor * dedy, const XTensor * output,
    if(padding != NULL) {
        XTensor * tmp = NewTensor(padding);
        _IsNonZero(padding, tmp);
-        int nonZeroNum = (int)_ReduceSumAll(tmp);
-        _ScaleAndShiftMe(dedy, (DTYPE)1.0/(DTYPE)nonZeroNum);
+        DTYPE nonZeroNum;
+        _ReduceSumAll(tmp, &nonZeroNum);
+        _ScaleAndShiftMe(dedy, (DTYPE)1.0/nonZeroNum);
        delete tmp;
    }
    else {

--- a/source/tensor/test/TReduceSumAll.cpp
+++ b/source/tensor/test/TReduceSumAll.cpp
@@ -55,7 +55,7 @@ bool TestReduceSumAll1()
    s->SetData(sData, sUnitNum);

    /* call ReduceSumAll function */
-    summation = _ReduceSumAll(s);
+    summation = ReduceSumAllValue(*s);

    /* check results */
    cpuTest = (fabs(answer - summation) < 1e-4F);