Commit 86cb0715 by xiaotong

add CheckDev to check if two tensors are on the same device

parent ac5afe2b
...@@ -236,6 +236,18 @@ extern XDevManager GDevs; ...@@ -236,6 +236,18 @@ extern XDevManager GDevs;
cudaSetDevice(devIDBackup); \ cudaSetDevice(devIDBackup); \
} \ } \
#define CheckDev(a, b) \
{ \
if((a < 0 && b >= 0) || (a >= 0 && b < 0)){ \
fprintf(stderr, "[ERROR] (%s line %d): we must run the code on the same device (%d vs %d)\n", __FILENAME__, __LINE__, a, b); \
exit(1); \
} \
else if (a >= 0 && b >= 0 && a != b) { \
fprintf(stderr, "[ERROR] (%s line %d): we must run the code on the same device (%d vs %d)\n", __FILENAME__, __LINE__, a, b); \
exit(1); \
} \
} \
} /* end of the nts (NiuTrans.Tensor) namespace */ } /* end of the nts (NiuTrans.Tensor) namespace */
#endif #endif
...@@ -49,7 +49,7 @@ namespace nts { ...@@ -49,7 +49,7 @@ namespace nts {
#define _XINLINE_ #define _XINLINE_
//#define DOUBELPRICSION //#define DOUBELPRICSION
#ifdef DOUBELPRICSION #ifdef DOUBELPRICSION
#define DTYPE double #define DTYPE double
......
...@@ -21,6 +21,7 @@ ...@@ -21,6 +21,7 @@
#include "../../XTensor.h" #include "../../XTensor.h"
#include "../../XName.h" #include "../../XName.h"
#include "../../XUtility.h"
#include "Div.h" #include "Div.h"
#include "Div.cuh" #include "Div.cuh"
#include "DivDim.h" #include "DivDim.h"
...@@ -41,12 +42,15 @@ where i is the index of the item ...@@ -41,12 +42,15 @@ where i is the index of the item
*/ */
void _Div(const XTensor * a, const XTensor * b, XTensor * c, DTYPE alpha, int leadingDim) void _Div(const XTensor * a, const XTensor * b, XTensor * c, DTYPE alpha, int leadingDim)
{ {
int leadingDimRDI = a->order - leadingDim - 1;
CheckNTErrors((a->unitNum <= c->unitNum && b->unitNum <= c->unitNum), CheckNTErrors((a->unitNum <= c->unitNum && b->unitNum <= c->unitNum),
"Unmatched tensors in multiplication!"); "Unmatched tensors in multiplication!");
CheckNTErrors((a->order == b->order && a->order == c->order), CheckNTErrors((a->order == b->order && a->order == c->order),
"Unmatched tensors!"); "Unmatched tensors!");
CheckDev(a->devID, b->devID);
int leadingDimRDI = a->order - leadingDim - 1;
#ifdef USE_CUDA #ifdef USE_CUDA
if (a->devID >= 0 || b->devID >= 0 || c->devID >= 0) { if (a->devID >= 0 || b->devID >= 0 || c->devID >= 0) {
_CudaDiv(a, b, c, alpha, leadingDim); _CudaDiv(a, b, c, alpha, leadingDim);
......
...@@ -24,6 +24,7 @@ ...@@ -24,6 +24,7 @@
#include "DivDim.h" #include "DivDim.h"
#include "DivDim.cuh" #include "DivDim.cuh"
#include "../../XName.h" #include "../../XName.h"
#include "../../XUtility.h"
#include "../movement/CopyValues.h" #include "../movement/CopyValues.h"
namespace nts { // namespace nts(NiuTrans.Tensor) namespace nts { // namespace nts(NiuTrans.Tensor)
...@@ -53,6 +54,8 @@ void _DivDim(const XTensor * a, const XTensor * b, XTensor * c, int n, DTYPE alp ...@@ -53,6 +54,8 @@ void _DivDim(const XTensor * a, const XTensor * b, XTensor * c, int n, DTYPE alp
CheckNTErrors(!a->isSparse && !b->isSparse && !c->isSparse, "Dense tensors are required!"); CheckNTErrors(!a->isSparse && !b->isSparse && !c->isSparse, "Dense tensors are required!");
CheckNTErrors(a->dimSize[n] == b->unitNum, "Wrong tensor size!"); CheckNTErrors(a->dimSize[n] == b->unitNum, "Wrong tensor size!");
CheckDev(a->devID, b->devID);
if(XTensor::IsSameShaped(a, b)){ if(XTensor::IsSameShaped(a, b)){
_Div(a, b, c, alpha); _Div(a, b, c, alpha);
return; return;
......
...@@ -21,6 +21,7 @@ ...@@ -21,6 +21,7 @@
#include "../../XTensor.h" #include "../../XTensor.h"
#include "../../XName.h" #include "../../XName.h"
#include "../../XUtility.h"
#include "Multiply.h" #include "Multiply.h"
#include "Multiply.cuh" #include "Multiply.cuh"
#include "MultiplyDim.h" #include "MultiplyDim.h"
...@@ -41,12 +42,15 @@ where i is the index of the item ...@@ -41,12 +42,15 @@ where i is the index of the item
*/ */
void _Multiply(const XTensor * a, const XTensor * b, XTensor * c, DTYPE alpha, int leadingDim) void _Multiply(const XTensor * a, const XTensor * b, XTensor * c, DTYPE alpha, int leadingDim)
{ {
int leadingDimRDI = a->order - leadingDim - 1;
CheckNTErrors((a->unitNum <= c->unitNum && b->unitNum <= c->unitNum), CheckNTErrors((a->unitNum <= c->unitNum && b->unitNum <= c->unitNum),
"Unmatched tensors in multiplication!"); "Unmatched tensors in multiplication!");
CheckNTErrors((a->order == b->order && a->order == c->order), CheckNTErrors((a->order == b->order && a->order == c->order),
"Unmatched tensors!"); "Unmatched tensors!");
CheckDev(a->devID, b->devID);
int leadingDimRDI = a->order - leadingDim - 1;
#ifdef USE_CUDA #ifdef USE_CUDA
if (a->devID >= 0 || b->devID >= 0 || c->devID >= 0) { if (a->devID >= 0 || b->devID >= 0 || c->devID >= 0) {
_CudaMultiply(a, b, c, alpha, leadingDim); _CudaMultiply(a, b, c, alpha, leadingDim);
......
...@@ -55,6 +55,8 @@ void _MultiplyDim(const XTensor * a, const XTensor * b, XTensor * c, int n, DTYP ...@@ -55,6 +55,8 @@ void _MultiplyDim(const XTensor * a, const XTensor * b, XTensor * c, int n, DTYP
CheckNTErrors(!a->isSparse && !b->isSparse && !c->isSparse, "Dense tensors are required!"); CheckNTErrors(!a->isSparse && !b->isSparse && !c->isSparse, "Dense tensors are required!");
CheckNTErrors(a->dimSize[n] == b->unitNum, "Wrong tensor size!"); CheckNTErrors(a->dimSize[n] == b->unitNum, "Wrong tensor size!");
CheckDev(a->devID, b->devID);
if(XTensor::IsSameShaped(a, b)){ if(XTensor::IsSameShaped(a, b)){
_Multiply(a, b, c, alpha); _Multiply(a, b, c, alpha);
return; return;
......
...@@ -44,6 +44,8 @@ void _Sub(const XTensor * a, const XTensor * b, XTensor * c, DTYPE beta) ...@@ -44,6 +44,8 @@ void _Sub(const XTensor * a, const XTensor * b, XTensor * c, DTYPE beta)
CheckNTErrors(a->dataType == b->dataType && a->dataType == c->dataType, CheckNTErrors(a->dataType == b->dataType && a->dataType == c->dataType,
"Unmatched tensors in addition!"); "Unmatched tensors in addition!");
CheckDev(a->devID, b->devID);
if (a->devID >= 0 || b->devID >= 0 || c->devID >= 0) { if (a->devID >= 0 || b->devID >= 0 || c->devID >= 0) {
#ifdef USE_CUDA #ifdef USE_CUDA
......
...@@ -24,6 +24,7 @@ ...@@ -24,6 +24,7 @@
#include "SubDim.h" #include "SubDim.h"
#include "SubDim.cuh" #include "SubDim.cuh"
#include "../../XName.h" #include "../../XName.h"
#include "../../XUtility.h"
#include "../movement/CopyValues.h" #include "../movement/CopyValues.h"
namespace nts { // namespace nts(NiuTrans.Tensor) namespace nts { // namespace nts(NiuTrans.Tensor)
...@@ -53,6 +54,8 @@ void _SubDim(const XTensor * a, const XTensor * b, XTensor * c, int n, DTYPE bet ...@@ -53,6 +54,8 @@ void _SubDim(const XTensor * a, const XTensor * b, XTensor * c, int n, DTYPE bet
CheckNTErrors(!a->isSparse && !b->isSparse && !c->isSparse, "Dense tensors are required!"); CheckNTErrors(!a->isSparse && !b->isSparse && !c->isSparse, "Dense tensors are required!");
CheckNTErrors(a->dimSize[n] == b->unitNum, "Wrong tensor size!"); CheckNTErrors(a->dimSize[n] == b->unitNum, "Wrong tensor size!");
CheckDev(a->devID, b->devID);
if (beta == 0) { if (beta == 0) {
_CopyValues(a, c); _CopyValues(a, c);
return; return;
......
...@@ -45,6 +45,8 @@ void _Sum(const XTensor * a, const XTensor * b, XTensor * c, DTYPE beta) ...@@ -45,6 +45,8 @@ void _Sum(const XTensor * a, const XTensor * b, XTensor * c, DTYPE beta)
CheckNTErrors(a->dataType == b->dataType && a->dataType == c->dataType, CheckNTErrors(a->dataType == b->dataType && a->dataType == c->dataType,
"Unmatched tensors in addition!"); "Unmatched tensors in addition!");
CheckDev(a->devID, b->devID);
if(beta == 0){ if(beta == 0){
_CopyValues(a, c); _CopyValues(a, c);
return; return;
......
...@@ -57,6 +57,8 @@ void _SumDim(const XTensor * a, const XTensor * b, XTensor * c, int n, DTYPE bet ...@@ -57,6 +57,8 @@ void _SumDim(const XTensor * a, const XTensor * b, XTensor * c, int n, DTYPE bet
CheckNTErrors(!a->isSparse && !b->isSparse && !c->isSparse, "Dense tensors are required!"); CheckNTErrors(!a->isSparse && !b->isSparse && !c->isSparse, "Dense tensors are required!");
CheckNTErrors(a->dimSize[n] == b->unitNum, "Wrong tensor size!"); CheckNTErrors(a->dimSize[n] == b->unitNum, "Wrong tensor size!");
CheckDev(a->devID, b->devID);
if(beta == 0){ if(beta == 0){
_CopyValues(a, c); _CopyValues(a, c);
return; return;
......
...@@ -84,26 +84,23 @@ void KernelAddWithCol(T * a, T * b, T * c, int rowNum, int colNum, int blockSize ...@@ -84,26 +84,23 @@ void KernelAddWithCol(T * a, T * b, T * c, int rowNum, int colNum, int blockSize
int colIndex = blockDim.x * blockIdx.x + threadIdx.x; int colIndex = blockDim.x * blockIdx.x + threadIdx.x;
int row = blockDim.y * blockIdx.y + threadIdx.y; int row = blockDim.y * blockIdx.y + threadIdx.y;
int col = colIndex % blockSize; int col = colIndex % colNum;
int block = colIndex / blockSize; int block = colIndex / colNum;
if(row >= rowNum || block >= blockNum) if (row >= rowNum || block >= blockNum)
return; return;
if(threadIdx.x == 0){ if (threadIdx.x == 0)
printf("(%d %d) ", row, block);
bv[threadIdx.y] = b[row]; bv[threadIdx.y] = b[row];
}
/*
__syncthreads(); __syncthreads();
int offset = block * blockSize + row * colNum + col; int offset = block * blockSize + row * colNum + col;
if(betaFired) if (betaFired)
c[offset] = a[offset] + bv[threadIdx.y] * beta; c[offset] = a[offset] + bv[threadIdx.y] * beta;
else else
c[offset] = a[offset] + bv[threadIdx.y];*/ c[offset] = a[offset] + bv[threadIdx.y];
} }
/* /*
......
Markdown 格式
0%
您添加了 0 到此讨论。请谨慎行事。
请先完成此评论的编辑!
注册 或者 后发表评论