add CheckDev to check if two tensors are on the same device

86cb0715 · xiaotong · ac5afe2b · 86cb0715 · 86cb0715 · 86cb0715
Commit 86cb0715 authored Jun 12, 2019 by xiaotong
--- a/source/tensor/XDevice.h
+++ b/source/tensor/XDevice.h
@@ -236,6 +236,18 @@ extern XDevManager GDevs;
        cudaSetDevice(devIDBackup); \
 } \

+#define CheckDev(a, b) \
+{ \
+    if((a < 0 && b >= 0) || (a >= 0 && b < 0)){ \
+        fprintf(stderr, "[ERROR] (%s line %d): we must run the code on the same device (%d vs %d)\n", __FILENAME__, __LINE__, a, b); \
+        exit(1); \
+    } \
+    else if (a >= 0 && b >= 0 && a != b) { \
+        fprintf(stderr, "[ERROR] (%s line %d): we must run the code on the same device (%d vs %d)\n", __FILENAME__, __LINE__, a, b); \
+        exit(1); \
+    } \
+} \
+
 } /* end of the nts (NiuTrans.Tensor) namespace */

 #endif
--- a/source/tensor/XGlobal.h
+++ b/source/tensor/XGlobal.h
@@ -49,7 +49,7 @@ namespace nts {

 #define _XINLINE_  

-//#define DOUBELPRICSION
+    //#define DOUBELPRICSION

 #ifdef DOUBELPRICSION
 #define DTYPE double

--- a/source/tensor/core/arithmetic/Div.cpp
+++ b/source/tensor/core/arithmetic/Div.cpp
@@ -21,6 +21,7 @@

 #include "../../XTensor.h"
 #include "../../XName.h"
+#include "../../XUtility.h"
 #include "Div.h"
 #include "Div.cuh"
 #include "DivDim.h"
@@ -41,12 +42,15 @@ where i is the index of the item
 */
 void _Div(const XTensor * a, const XTensor * b, XTensor * c, DTYPE alpha, int leadingDim)
 {
-	int leadingDimRDI = a->order - leadingDim - 1;
    CheckNTErrors((a->unitNum <= c->unitNum && b->unitNum <= c->unitNum),
                  "Unmatched tensors in multiplication!");
    CheckNTErrors((a->order == b->order && a->order == c->order), 
                  "Unmatched tensors!");

+    CheckDev(a->devID, b->devID);
+
+    int leadingDimRDI = a->order - leadingDim - 1;
+
 #ifdef USE_CUDA
    if (a->devID >= 0 || b->devID >= 0 || c->devID >= 0) {
        _CudaDiv(a, b, c, alpha, leadingDim);

--- a/source/tensor/core/arithmetic/DivDim.cpp
+++ b/source/tensor/core/arithmetic/DivDim.cpp
@@ -24,6 +24,7 @@
 #include "DivDim.h"
 #include "DivDim.cuh"
 #include "../../XName.h"
+#include "../../XUtility.h"
 #include "../movement/CopyValues.h"

 namespace nts { // namespace nts(NiuTrans.Tensor)
@@ -53,6 +54,8 @@ void _DivDim(const XTensor * a, const XTensor * b, XTensor * c, int n, DTYPE alp
    CheckNTErrors(!a->isSparse && !b->isSparse && !c->isSparse, "Dense tensors are required!");
    CheckNTErrors(a->dimSize[n] == b->unitNum, "Wrong tensor size!");

+    CheckDev(a->devID, b->devID);
+
    if(XTensor::IsSameShaped(a, b)){
        _Div(a, b, c, alpha);
        return;

--- a/source/tensor/core/arithmetic/Multiply.cpp
+++ b/source/tensor/core/arithmetic/Multiply.cpp
@@ -21,6 +21,7 @@

 #include "../../XTensor.h"
 #include "../../XName.h"
+#include "../../XUtility.h"
 #include "Multiply.h"
 #include "Multiply.cuh"
 #include "MultiplyDim.h"
@@ -41,12 +42,15 @@ where i is the index of the item
 */
 void _Multiply(const XTensor * a, const XTensor * b, XTensor * c, DTYPE alpha, int leadingDim)
 {
-	int leadingDimRDI = a->order - leadingDim - 1;
    CheckNTErrors((a->unitNum <= c->unitNum && b->unitNum <= c->unitNum),
                  "Unmatched tensors in multiplication!");
    CheckNTErrors((a->order == b->order && a->order == c->order), 
                  "Unmatched tensors!");

+    CheckDev(a->devID, b->devID);
+
+    int leadingDimRDI = a->order - leadingDim - 1;
+
 #ifdef USE_CUDA
    if (a->devID >= 0 || b->devID >= 0 || c->devID >= 0) {
        _CudaMultiply(a, b, c, alpha, leadingDim);

--- a/source/tensor/core/arithmetic/MultiplyDim.cpp
+++ b/source/tensor/core/arithmetic/MultiplyDim.cpp
@@ -55,6 +55,8 @@ void _MultiplyDim(const XTensor * a, const XTensor * b, XTensor * c, int n, DTYP
    CheckNTErrors(!a->isSparse && !b->isSparse && !c->isSparse, "Dense tensors are required!");
    CheckNTErrors(a->dimSize[n] == b->unitNum, "Wrong tensor size!");

+    CheckDev(a->devID, b->devID);
+
    if(XTensor::IsSameShaped(a, b)){
        _Multiply(a, b, c, alpha);
        return;

--- a/source/tensor/core/arithmetic/Sub.cpp
+++ b/source/tensor/core/arithmetic/Sub.cpp
@@ -44,6 +44,8 @@ void _Sub(const XTensor * a, const XTensor * b, XTensor * c, DTYPE beta)
    CheckNTErrors(a->dataType == b->dataType && a->dataType == c->dataType,
                  "Unmatched tensors in addition!");

+    CheckDev(a->devID, b->devID);
+
    if (a->devID >= 0 || b->devID >= 0 || c->devID >= 0) {

 #ifdef USE_CUDA

--- a/source/tensor/core/arithmetic/SubDim.cpp
+++ b/source/tensor/core/arithmetic/SubDim.cpp
@@ -24,6 +24,7 @@
 #include "SubDim.h"
 #include "SubDim.cuh"
 #include "../../XName.h"
+#include "../../XUtility.h"
 #include "../movement/CopyValues.h"

 namespace nts { // namespace nts(NiuTrans.Tensor)
@@ -53,6 +54,8 @@ void _SubDim(const XTensor * a, const XTensor * b, XTensor * c, int n, DTYPE bet
 	CheckNTErrors(!a->isSparse && !b->isSparse && !c->isSparse, "Dense tensors are required!");
 	CheckNTErrors(a->dimSize[n] == b->unitNum, "Wrong tensor size!");

+    CheckDev(a->devID, b->devID);
+
 	if (beta == 0) {
 		_CopyValues(a, c);
 		return;

--- a/source/tensor/core/arithmetic/Sum.cpp
+++ b/source/tensor/core/arithmetic/Sum.cpp
@@ -45,6 +45,8 @@ void _Sum(const XTensor * a, const XTensor * b, XTensor * c, DTYPE beta)
    CheckNTErrors(a->dataType == b->dataType && a->dataType == c->dataType,
                  "Unmatched tensors in addition!");

+    CheckDev(a->devID, b->devID);
+
    if(beta == 0){
        _CopyValues(a, c);
        return;

--- a/source/tensor/core/arithmetic/SumDim.cpp
+++ b/source/tensor/core/arithmetic/SumDim.cpp
@@ -57,6 +57,8 @@ void _SumDim(const XTensor * a, const XTensor * b, XTensor * c, int n, DTYPE bet
    CheckNTErrors(!a->isSparse && !b->isSparse && !c->isSparse, "Dense tensors are required!");
    CheckNTErrors(a->dimSize[n] == b->unitNum, "Wrong tensor size!");

+    CheckDev(a->devID, b->devID);
+
    if(beta == 0){
        _CopyValues(a, c);
        return;

--- a/source/tensor/core/arithmetic/SumDim.cu
+++ b/source/tensor/core/arithmetic/SumDim.cu
@@ -84,26 +84,23 @@ void KernelAddWithCol(T * a, T * b, T * c, int rowNum, int colNum, int blockSize
    int colIndex = blockDim.x * blockIdx.x + threadIdx.x;
    int row = blockDim.y * blockIdx.y + threadIdx.y;

-    int col = colIndex % blockSize;
-    int block = colIndex / blockSize;
+    int col = colIndex % colNum;
+    int block = colIndex / colNum;

-    if(row >= rowNum || block >= blockNum)
+    if (row >= rowNum || block >= blockNum)
        return;

-    if(threadIdx.x == 0){
-        printf("(%d %d) ", row, block);
+    if (threadIdx.x == 0)
        bv[threadIdx.y] = b[row];
-    }

-    /*
    __syncthreads();

    int offset = block * blockSize + row * colNum + col;
-    
-    if(betaFired)
+
+    if (betaFired)
        c[offset] = a[offset] + bv[threadIdx.y] * beta;
    else
-        c[offset] = a[offset] + bv[threadIdx.y];*/
+        c[offset] = a[offset] + bv[threadIdx.y];
 }

 /*