Commit 86cb0715 by xiaotong

add CheckDev to check if two tensors are on the same device

parent ac5afe2b
......@@ -236,6 +236,18 @@ extern XDevManager GDevs;
cudaSetDevice(devIDBackup); \
} \
#define CheckDev(a, b) \
{ \
if((a < 0 && b >= 0) || (a >= 0 && b < 0)){ \
fprintf(stderr, "[ERROR] (%s line %d): we must run the code on the same device (%d vs %d)\n", __FILENAME__, __LINE__, a, b); \
exit(1); \
} \
else if (a >= 0 && b >= 0 && a != b) { \
fprintf(stderr, "[ERROR] (%s line %d): we must run the code on the same device (%d vs %d)\n", __FILENAME__, __LINE__, a, b); \
exit(1); \
} \
} \
} /* end of the nts (NiuTrans.Tensor) namespace */
#endif
......@@ -49,7 +49,7 @@ namespace nts {
#define _XINLINE_
//#define DOUBELPRICSION
//#define DOUBELPRICSION
#ifdef DOUBELPRICSION
#define DTYPE double
......
......@@ -21,6 +21,7 @@
#include "../../XTensor.h"
#include "../../XName.h"
#include "../../XUtility.h"
#include "Div.h"
#include "Div.cuh"
#include "DivDim.h"
......@@ -41,12 +42,15 @@ where i is the index of the item
*/
void _Div(const XTensor * a, const XTensor * b, XTensor * c, DTYPE alpha, int leadingDim)
{
int leadingDimRDI = a->order - leadingDim - 1;
CheckNTErrors((a->unitNum <= c->unitNum && b->unitNum <= c->unitNum),
"Unmatched tensors in multiplication!");
CheckNTErrors((a->order == b->order && a->order == c->order),
"Unmatched tensors!");
CheckDev(a->devID, b->devID);
int leadingDimRDI = a->order - leadingDim - 1;
#ifdef USE_CUDA
if (a->devID >= 0 || b->devID >= 0 || c->devID >= 0) {
_CudaDiv(a, b, c, alpha, leadingDim);
......
......@@ -24,6 +24,7 @@
#include "DivDim.h"
#include "DivDim.cuh"
#include "../../XName.h"
#include "../../XUtility.h"
#include "../movement/CopyValues.h"
namespace nts { // namespace nts(NiuTrans.Tensor)
......@@ -53,6 +54,8 @@ void _DivDim(const XTensor * a, const XTensor * b, XTensor * c, int n, DTYPE alp
CheckNTErrors(!a->isSparse && !b->isSparse && !c->isSparse, "Dense tensors are required!");
CheckNTErrors(a->dimSize[n] == b->unitNum, "Wrong tensor size!");
CheckDev(a->devID, b->devID);
if(XTensor::IsSameShaped(a, b)){
_Div(a, b, c, alpha);
return;
......
......@@ -21,6 +21,7 @@
#include "../../XTensor.h"
#include "../../XName.h"
#include "../../XUtility.h"
#include "Multiply.h"
#include "Multiply.cuh"
#include "MultiplyDim.h"
......@@ -41,12 +42,15 @@ where i is the index of the item
*/
void _Multiply(const XTensor * a, const XTensor * b, XTensor * c, DTYPE alpha, int leadingDim)
{
int leadingDimRDI = a->order - leadingDim - 1;
CheckNTErrors((a->unitNum <= c->unitNum && b->unitNum <= c->unitNum),
"Unmatched tensors in multiplication!");
CheckNTErrors((a->order == b->order && a->order == c->order),
"Unmatched tensors!");
CheckDev(a->devID, b->devID);
int leadingDimRDI = a->order - leadingDim - 1;
#ifdef USE_CUDA
if (a->devID >= 0 || b->devID >= 0 || c->devID >= 0) {
_CudaMultiply(a, b, c, alpha, leadingDim);
......
......@@ -55,6 +55,8 @@ void _MultiplyDim(const XTensor * a, const XTensor * b, XTensor * c, int n, DTYP
CheckNTErrors(!a->isSparse && !b->isSparse && !c->isSparse, "Dense tensors are required!");
CheckNTErrors(a->dimSize[n] == b->unitNum, "Wrong tensor size!");
CheckDev(a->devID, b->devID);
if(XTensor::IsSameShaped(a, b)){
_Multiply(a, b, c, alpha);
return;
......
......@@ -44,6 +44,8 @@ void _Sub(const XTensor * a, const XTensor * b, XTensor * c, DTYPE beta)
CheckNTErrors(a->dataType == b->dataType && a->dataType == c->dataType,
"Unmatched tensors in addition!");
CheckDev(a->devID, b->devID);
if (a->devID >= 0 || b->devID >= 0 || c->devID >= 0) {
#ifdef USE_CUDA
......
......@@ -24,6 +24,7 @@
#include "SubDim.h"
#include "SubDim.cuh"
#include "../../XName.h"
#include "../../XUtility.h"
#include "../movement/CopyValues.h"
namespace nts { // namespace nts(NiuTrans.Tensor)
......@@ -53,6 +54,8 @@ void _SubDim(const XTensor * a, const XTensor * b, XTensor * c, int n, DTYPE bet
CheckNTErrors(!a->isSparse && !b->isSparse && !c->isSparse, "Dense tensors are required!");
CheckNTErrors(a->dimSize[n] == b->unitNum, "Wrong tensor size!");
CheckDev(a->devID, b->devID);
if (beta == 0) {
_CopyValues(a, c);
return;
......
......@@ -45,6 +45,8 @@ void _Sum(const XTensor * a, const XTensor * b, XTensor * c, DTYPE beta)
CheckNTErrors(a->dataType == b->dataType && a->dataType == c->dataType,
"Unmatched tensors in addition!");
CheckDev(a->devID, b->devID);
if(beta == 0){
_CopyValues(a, c);
return;
......
......@@ -57,6 +57,8 @@ void _SumDim(const XTensor * a, const XTensor * b, XTensor * c, int n, DTYPE bet
CheckNTErrors(!a->isSparse && !b->isSparse && !c->isSparse, "Dense tensors are required!");
CheckNTErrors(a->dimSize[n] == b->unitNum, "Wrong tensor size!");
CheckDev(a->devID, b->devID);
if(beta == 0){
_CopyValues(a, c);
return;
......
......@@ -84,26 +84,23 @@ void KernelAddWithCol(T * a, T * b, T * c, int rowNum, int colNum, int blockSize
int colIndex = blockDim.x * blockIdx.x + threadIdx.x;
int row = blockDim.y * blockIdx.y + threadIdx.y;
int col = colIndex % blockSize;
int block = colIndex / blockSize;
int col = colIndex % colNum;
int block = colIndex / colNum;
if(row >= rowNum || block >= blockNum)
if (row >= rowNum || block >= blockNum)
return;
if(threadIdx.x == 0){
printf("(%d %d) ", row, block);
if (threadIdx.x == 0)
bv[threadIdx.y] = b[row];
}
/*
__syncthreads();
int offset = block * blockSize + row * colNum + col;
if(betaFired)
if (betaFired)
c[offset] = a[offset] + bv[threadIdx.y] * beta;
else
c[offset] = a[offset] + bv[threadIdx.y];*/
c[offset] = a[offset] + bv[threadIdx.y];
}
/*
......
Markdown 格式
0%
您添加了 0 到此讨论。请谨慎行事。
请先完成此评论的编辑!
注册 或者 后发表评论