broadcasting

a8304bed · xiaotong · 14f245fa · a8304bed · a8304bed · a8304bed
Commit a8304bed authored Dec 28, 2018 by xiaotong
--- a/source/tensor/XName.cpp
+++ b/source/tensor/XName.cpp
@@ -67,6 +67,8 @@ const char * GetOPName(int type)
            return "M_MULTIPLY";
        else if (type == MATH_MULTIPLYDIM)
            return "M_MULTIPLYDIM";
+        else if (type == MATH_MULTIPLYBROADCAST)
+            return "M_MULTIPLYBROADCAST";
        else if (type == MATH_NEGATE)
            return "M_NEGATE";
        else if (type == MATH_NORMALIZE)
@@ -85,6 +87,8 @@ const char * GetOPName(int type)
            return "M_SUM";
        else if (type == MATH_SUMDIM)
            return "M_SUMDIM";
+        else if (type == MATH_SUMBROADCAST)
+            return "M_SUMBROADCAST";
        else if (type == REDUCE_REDUCEMAX)
            return "R_REDUCEMAX";
        else if (type == REDUCE_REDUCEMEAN)

--- a/source/tensor/XName.h
+++ b/source/tensor/XName.h
@@ -52,7 +52,8 @@ namespace nts { // namespace nts(NiuTrans.Tensor)
 #define MATH_MATRIXMULBATCHED   MATH_MATRIXMUL + 1
 #define MATH_MULTIPLY           MATH_MATRIXMULBATCHED + 1
 #define MATH_MULTIPLYDIM        MATH_MULTIPLY + 1
-#define MATH_NEGATE             MATH_MULTIPLYDIM + 1
+#define MATH_MULTIPLYBROADCAST  MATH_MULTIPLYDIM + 1
+#define MATH_NEGATE             MATH_MULTIPLYBROADCAST + 1
 #define MATH_NORMALIZE          MATH_NEGATE + 1
 #define MATH_POWER              MATH_NORMALIZE + 1
 #define MATH_SCALEANDSHIFT      MATH_POWER + 1
@@ -61,8 +62,9 @@ namespace nts { // namespace nts(NiuTrans.Tensor)
 #define MATH_SUBDIM             MATH_SUB + 1
 #define MATH_SUM                MATH_SUBDIM + 1
 #define MATH_SUMDIM             MATH_SUM + 1
+#define MATH_SUMBROADCAST       MATH_SUMDIM + 1

-#define REDUCE                  MATH_SUMDIM + 1
+#define REDUCE                  MATH_SUMBROADCAST + 1
 #define REDUCE_REDUCEMAX        REDUCE + 1
 #define REDUCE_REDUCEMEAN       REDUCE_REDUCEMAX + 1
 #define REDUCE_REDUCESUM        REDUCE_REDUCEMEAN + 1

--- a/source/tensor/core/arithmetic/MultiplyDim.cpp
+++ b/source/tensor/core/arithmetic/MultiplyDim.cpp
@@ -22,7 +22,9 @@
 #include "Multiply.h"
 #include "MultiplyDim.h"
 #include "MultiplyDim.cuh"
+#include "../shape/Unsqueeze.h"
 #include "../../XName.h"
+#include "../../XUtility.h"
 #include "../movement/CopyValues.h"

 namespace nts { // namespace nts(NiuTrans.Tensor)
@@ -135,29 +137,168 @@ void _MultiplyDimMe(XTensor * a, const XTensor * b, int n, DTYPE alpha)
 tensor multiplication (return an XTensor structure and make tensor connections)
 make a new tensor to keep the result and return it

-c = a * b + \alpha * c
+c = a * b
 where the size of b is equal to the n-th dimension of a,
 i.e., a is multiplied with b by broadcasting

 >> a - a tensor
 >> b - another tensor whose size is equal to that of dimension n of a
 >> n - the dimension index
->> alpha - the scaling factor
 << return - the result tensor by tensor multiplication
 */
-XTensor MultiplyDim(const XTensor &a, const XTensor &b, int n, DTYPE alpha)
+XTensor MultiplyDim(const XTensor &a, const XTensor &b, int n)
 {
    XTensor c(&a);
    c.SetTMPFlag();

    /* call _Multiply function */
-    _MultiplyDim(&a, &b, &c, n, alpha);
+    _MultiplyDim(&a, &b, &c, n, 0);

    /* tensor connections */
    XLink::MakeLink(&a, &b, &c, MATH_MULTIPLYDIM);
    XLink::AddParamToHeadInt(&c, n);
-    XLink::AddParamToHead(&c, alpha);
+    XLink::AddParamToHead(&c, 0);
+
+    return c;
+}

+/* 
+tensor broadcast multiplication
+c = a * b + c * \beta 
+where some of dimensions of b can be of size 1
+
+>> a - a tensor
+>> b - another tensor that would be broadcasted
+>> c - the resulting tensor
+>> beta - the scaling factor
+*/
+void _MultiplyBroadcast(const XTensor * a, const XTensor * b, XTensor * c, DTYPE beta)
+{
+    CheckNTErrors(a->order == b->order, "Wrong tensor orders!");
+    CheckNTErrors(a->order == c->order, "Wrong tensor orders!");
+    CheckNTErrors(a->order > 0, "TODO!");
+    
+    int order = a->order;
+    int count = 0;
+    void * source = 0;
+    void * target = 0;
+    
+    for(int i = 0; i < order; i++){
+        if(a->GetDim(i) == b->GetDim(i))
+            continue;
+        
+        if(b->GetDim(i) == 1){
+            int fitSize = a->GetDim(i);
+            int j = i + 1;
+            
+            /* we define a range over dimensions. It is to be unsqueezed */
+            for(; j < order; j++){
+                if(a->GetDim(j) == b->GetDim(j))
+                    break;
+                fitSize *= a->GetDim(j);
+            }
+            
+            int dimsS[MAX_TENSOR_DIM_NUM];
+            int dimsT[MAX_TENSOR_DIM_NUM];
+            
+            for(int k = 0; k < i; k++){
+                dimsS[k] = a->GetDim(k);
+                dimsT[k] = a->GetDim(k);
+            }
+            
+            dimsT[i] = fitSize;
+            
+            bool isLast = true;
+            for(int k = j; k < order; k++){
+                dimsS[i + k - j + 0] = b->GetDim(k);
+                dimsT[i + k - j + 1] = b->GetDim(k);
+                if(a->GetDim(k) != b->GetDim(k)){
+                    if(b->GetDim(k) == 1)
+                        isLast = false;
+                    else{
+                        ShowNTErrors("Wrong dimension size!")
+                    }
+                }
+            }
+            
+            dimsS[0] = -dimsS[0];
+            dimsT[0] = -dimsT[0];
+            
+            XTensor * s = NewTensor(order - (j - i), dimsS, a->dataType, a->denseRatio, a->devID, a->mem);
+            XTensor * t = NewTensor(order - (j - i) + 1, dimsT, b->dataType, b->denseRatio, b->devID, b->mem);
+            
+            if(count == 0)
+                source = b->data;
+            else{
+                source = target;
+            }
+            
+            target = t->mem != NULL ?
+                     t->mem->AllocBuf(t->devID, t->unitNum * t->unitSize):
+                     XMemAlloc(t->devID, t->unitNum * t->unitSize);
+            
+            s->data = source;
+            t->data = target;
+            
+            _Unsqueeze(s, t, i, fitSize);
+            
+            /* free the memory space of the one before the last allocation */
+            if(count > 0){
+                int size = s->unitNum * s->unitSize;
+                if(t->mem != NULL)
+                    t->mem->ReleaseBuf(t->devID, size);
+                else
+                    XMemFree(t->devID, source);
+            }
+            
+            /* we do multiplication here */
+            if(isLast){
+                CheckNTErrors(t->unitNum == c->unitNum, "Wrong tensor size!");
+                _Multiply(a, t, c, beta);
+                if(t->mem != NULL)
+                    t->mem->ReleaseBuf(t->devID, t->unitNum * t->unitSize);
+                else
+                    XMemFree(t->devID, target);
+                target = NULL;
+            }
+            
+            s->data = NULL;
+            t->data = NULL;
+            DelTensor(s);
+            DelTensor(t);
+            
+            i = j;
+            count++;
+        }
+    }
+
+    if(count == 0)
+        _Multiply(a, b, c, beta);
+    
+    CheckNTErrors(target == NULL, "Something is wrong!");
+}
+
+/* 
+tensor broadcast multiplication
+c = a * b
+where some of dimensions of b can be of size 1
+
+>> a - a tensor
+>> b - another tensor that would be broadcasted
+<< return - the resulting tensor c
+*/
+XTensor MultiplyBroadcast(const XTensor &a, const XTensor &b)
+{
+    XTensor c(&a);
+    c.SetTMPFlag();
+    
+    /* call _SumBroadcast function */
+    _MultiplyBroadcast(&a, &b, &c, 0);
+    
+    /* tensor connections */
+    XLink::MakeLink(&a, &b, &c, MATH_MULTIPLYBROADCAST);
+    XLink::AddParamToHead(&c, 0);
+    
    return c;
 }


--- a/source/tensor/core/arithmetic/MultiplyDim.h
+++ b/source/tensor/core/arithmetic/MultiplyDim.h
@@ -34,9 +34,16 @@ void _MultiplyDim(const XTensor * a, const XTensor * b, XTensor * c, int n, DTYP
   i.e., a is multiplied with b by broadcasting. we keep the result in the input tensor a and return nothing */
 void _MultiplyDimMe(XTensor * a, const XTensor * b, int n, DTYPE alpha = 0.0);

-/* tensor multiplication c = a * b + \alpha * c where the size of b is equal to the n-th dimension of a,
+/* tensor multiplication c = a * b where the size of b is equal to the n-th dimension of a,
   i.e., a is multiplied with b by broadcasting. We make a new tensor c to keep the result and return it */
-XTensor MultiplyDim(const XTensor &a, const XTensor &b, int n, DTYPE alpha = 0.0);
+XTensor MultiplyDim(const XTensor &a, const XTensor &b, int n);
+
+/* tensor multiplication summation c = a * b + c * \beta where some of dimensions of b can be of size 1 */
+void _MultiplyBroadcast(const XTensor * a, const XTensor * b, XTensor * c, DTYPE beta = (DTYPE)1.0);
+
+/* tensor broadcast multiplication c = a * b where some of dimensions of b can be of size 1.
+   we return the resulting tensor here */
+XTensor MultiplyBroadcast(const XTensor &a, const XTensor &b);

 } // namespace nts(NiuTrans.Tensor)


--- a/source/tensor/core/arithmetic/SumDim.cpp
+++ b/source/tensor/core/arithmetic/SumDim.cpp
@@ -170,7 +170,7 @@ XTensor SumDim(const XTensor &a, const XTensor &b, int n, DTYPE beta)
    XTensor c(&a);
    c.SetTMPFlag();
    
-    /* call _Sum function */
+    /* call _SumDim function */
    _SumDim(&a, &b, &c, n, beta);
    
    /* tensor connections */
@@ -295,5 +295,31 @@ void _SumBroadcast(const XTensor * a, const XTensor * b, XTensor * c, DTYPE beta
    
    CheckNTErrors(target == NULL, "Something is wrong!");
 }
+
+/* 
+tensor broadcast summation c = a + b * \beta where some of dimensions of b can be of size 1
+c = a + b * \beta
+
+we return c here
+
+>> a - a tensor
+>> b - another tensor that would be broadcasted
+>> beta - the scaling factor
+<< return - the resulting tensor c
+*/
+XTensor SumBroadcast(const XTensor &a, const XTensor &b, DTYPE beta)
+{
+    XTensor c(&a);
+    c.SetTMPFlag();
+    
+    /* call _SumBroadcast function */
+    _SumBroadcast(&a, &b, &c, beta);
+    
+    /* tensor connections */
+    XLink::MakeLink(&a, &b, &c, MATH_SUMBROADCAST);
+    XLink::AddParamToHead(&c, beta);
+    
+    return c;
+}
    
 }
--- a/source/tensor/core/arithmetic/SumDim.h
+++ b/source/tensor/core/arithmetic/SumDim.h
@@ -44,6 +44,10 @@ XTensor SumDim(const XTensor &a, const XTensor &b, int n, DTYPE beta = (DTYPE)1.

 /* tensor broadcast summation c = a + b * \beta where some of dimensions of b can be of size 1 */
 void _SumBroadcast(const XTensor * a, const XTensor * b, XTensor * c, DTYPE beta = (DTYPE)1.0);
+
+/* tensor broadcast summation c = a + b * \beta where some of dimensions of b can be of size 1.
+   we return the resulting tensor here */
+XTensor SumBroadcast(const XTensor &a, const XTensor &b, DTYPE beta = (DTYPE)1.0);
    
 } // namespace nts(NiuTrans.Tensor)