Commit 992ee9e9 by xiaotong

fix the bug of memory allocation

parent 2ed5a029
......@@ -211,6 +211,9 @@ XTensor::~XTensor()
XLink::ClearIncoming(this);
DestroyData();
if(grad != NULL)
delete grad;
}
/* initialize member variables */
......@@ -237,7 +240,9 @@ void XTensor::Init()
memset(isAllValued, 0, sizeof(bool) * MAX_TENSOR_DIM_NUM);
isInit = false;
isTmp = false;
isGrad = false;
visitMark = 0;
grad = NULL;
}
/* delete data arrays */
......@@ -294,7 +299,7 @@ XTensor& XTensor::operator= (const XTensor& tensor)
}
else{
DestroyData();
if(isInit){
if(!isInit){
devID = tensor.devID;
mem = tensor.mem;
}
......@@ -1043,7 +1048,7 @@ int XTensor::GetNonzeroSize()
/*
set the tensor as "temporary"
>> myIsTMP - flag
>> myIsTMP - the flag
*/
void XTensor::SetTMP(bool myIsTmp)
{
......@@ -1051,6 +1056,15 @@ void XTensor::SetTMP(bool myIsTmp)
}
/*
set the tensor as "keep-gradient"
>> myIsGrad - the flag
*/
void XTensor::SetGrad(bool myIsGrad)
{
isGrad = myIsGrad;
}
/*
resize a tensor with a specified tensor size
>> myOrder - order of the tensor
>> myDimSize - the size of each dimension
......@@ -1105,7 +1119,7 @@ bool XTensor::Resize(const int myOrder, const int * myDimSize,
if(isSparse){
/*
for sparse matrices, we use a list of tuple (key, value),
ordered by key. Take a (2-dimensional) matrice as examples,
ordered by key. Take a (2-dimensional) matrix as an examples,
we have key = m * i + j;
The data array is
---------
......@@ -1148,9 +1162,9 @@ bool XTensor::Resize(const int myOrder, const int * myDimSize,
if(filledData){
/* allocate the new one */
if(mem == NULL){
data = (void*)new char[unitNum * unitSize];
data = XMemAlloc(devID, unitNum * unitSize);
#if defined(UNSAFE_BUT_FAST_MEM)
memset(data, 0, unitNum * unitSize);
XMemSet(devID, data, 0, unitNum * unitSize);
#endif
}
else
......
......@@ -139,8 +139,14 @@ public:
/* indicates whether the tensor is created temporarily */
bool isTmp;
/* indicates whether the tensor keeps the gradient when used as model parameters */
bool isGrad;
/* mark for traversing the gragh */
unsigned int visitMark;
/* gradient (for back-propagation) */
XTensor * grad;
/*
the link used to form networks. Note that when we compute on tensors, we actually create a
......@@ -300,6 +306,9 @@ public:
/* set the tensor as "temporary" */
void SetTMP(bool myIsTmp = true);
/* set the tensor as "keep-gradient" */
void SetGrad(bool myIsGrad = true);
/* resize a matrix with a specified matrix size */
bool Resize(const int myOrder, const int * myDimSize,
const TENSOR_DATA_TYPE myDataType = DEFAULT_DTYPE,
......
......@@ -176,12 +176,16 @@ void XMemCopy(void * t, int devIDT, const void * s, int devIDS, size_t size)
}
#ifdef USE_CUDA
else if(devIDT >= 0 && devIDS < 0){
CheckNTErrors((cudaMemcpy(t, s, size, cudaMemcpyHostToDevice) == cudaSuccess),
"cudaMemcpy error (cudaMemcpyHostToDevice)");
cudaError_t error = cudaMemcpy(t, s, size, cudaMemcpyHostToDevice);
if(error != cudaSuccess){
ShowNTErrors("cudaMemcpy error (cudaMemcpyHostToDevice)");
}
}
else if(devIDT < 0 && devIDS >= 0){
CheckNTErrors((cudaMemcpy(t, s, size, cudaMemcpyDeviceToHost) == cudaSuccess),
"cudaMemcpy error (cudaMemcpyDeviceToHost)");
cudaError_t error = cudaMemcpy(t, s, size, cudaMemcpyDeviceToHost);
if(error != cudaSuccess){
ShowNTErrors("cudaMemcpy error (cudaMemcpyDeviceToHost)");
}
}
else{
//if(devIDT == devIDS){
......
......@@ -80,13 +80,13 @@ void _MatrixMul(const XTensor * a, MATRIX_TRANS_TYPE transposedA,
int cBlockNum = 1;
for (int i = 2; i < a->order; i++) {
CheckNTErrors((a->dimSizeRDI[i] == c->dimSizeRDI[i - 2 + b->order]), "Incorrect tensor sizes!");
CheckNTErrors(a->dimSizeRDI[i] == c->dimSizeRDI[i - 2 + b->order], "Incorrect tensor sizes!");
aBlockNum *= a->dimSizeRDI[i];
cBlockNum *= a->dimSizeRDI[i];
}
for (int i = 2; i < b->order; i++) {
CheckNTErrors((b->dimSizeRDI[i] == c->dimSizeRDI[i]), "Incorrect tensor sizes!");
CheckNTErrors(b->dimSizeRDI[i] == c->dimSizeRDI[i], "Incorrect tensor sizes!");
bBlockNum *= b->dimSizeRDI[i];
cBlockNum *= b->dimSizeRDI[i];
}
......@@ -224,10 +224,10 @@ XTensor MatrixMul(const XTensor &a, MATRIX_TRANS_TYPE transposedA, const XTensor
int order = a.order + b.order - 2;
int sub = 0;
int * dimSize = new int[order];
for (int i = 2; i < b.order; i++)
dimSize[sub++] = b.dimSizeRDI[b.order + 1 - i];
for (int i = 2; i < a.order; i++)
dimSize[sub++] = a.dimSizeRDI[a.order + 1 - i];
for (int i = 2; i < b.order; i++)
dimSize[sub++] = b.dimSizeRDI[b.order + 1 - i];
dimSize[sub++] = an;
dimSize[sub++] = bm;
......
......@@ -298,12 +298,12 @@ void _CudaBLASMatrixMULList(cublasHandle_t * handle,
cudaMemcpy(cpGPU, cp, sizeof(DTYPE*) * c->count, cudaMemcpyHostToDevice);
_CudaBLASMatrixMULBatched(handle,
(const void**)apGPU, transposedA, a0->dataType,
(const void**)bpGPU, transposedB, b0->dataType,
(void**)cpGPU, c0->dataType, a->count,
a0->dimSize[0], a0->dimSize[1],
b0->dimSize[0], b0->dimSize[1],
c0->dimSize[0], c0->dimSize[1], alpha, beta);
(const void**)apGPU, transposedA, a0->dataType,
(const void**)bpGPU, transposedB, b0->dataType,
(void**)cpGPU, c0->dataType, a->count,
a0->dimSize[0], a0->dimSize[1],
b0->dimSize[0], b0->dimSize[1],
c0->dimSize[0], c0->dimSize[1], alpha, beta);
delete[] ap;
delete[] bp;
delete[] cp;
......
......@@ -75,6 +75,7 @@ bool TestMatrixMul1()
XTensor * s1 = NewTensor(sOrder1, sDimSize1);
XTensor * s2 = NewTensor(sOrder2, sDimSize2);
XTensor * t = NewTensor(tOrder, tDimSize);
XTensor tUser;
/* initialize variables */
s1->SetData(sData1, sUnitNum1);
......@@ -83,9 +84,10 @@ bool TestMatrixMul1()
/* call MatrixMul function */
_MatrixMul(s1, X_NOTRANS, s2, X_NOTRANS, t);
tUser = MatrixMul(*s1, X_NOTRANS, *s2, X_NOTRANS);
/* check results */
cpuTest = t->CheckData(answer, tUnitNum);
cpuTest = t->CheckData(answer, tUnitNum) && tUser.CheckData(answer, tUnitNum);
#ifdef USE_CUDA
/* GPU test */
......@@ -95,6 +97,7 @@ bool TestMatrixMul1()
XTensor * sGPU1 = NewTensor(sOrder1, sDimSize1, X_FLOAT, 1.0F, 0);
XTensor * sGPU2 = NewTensor(sOrder2, sDimSize2, X_FLOAT, 1.0F, 0);
XTensor * tGPU = NewTensor(tOrder, tDimSize, X_FLOAT, 1.0F, 0);
XTensor tUserGPU;
/* Initialize variables */
sGPU1->SetData(sData1, sUnitNum1);
......@@ -103,9 +106,10 @@ bool TestMatrixMul1()
/* call MatrixMul function */
_MatrixMul(sGPU1, X_NOTRANS, sGPU2, X_NOTRANS, tGPU);
tUserGPU = MatrixMul(*sGPU1, X_NOTRANS, *sGPU2, X_NOTRANS);
/* check results */
gpuTest = tGPU->CheckData(answer, tUnitNum);
gpuTest = tGPU->CheckData(answer, tUnitNum) && tUserGPU.CheckData(answer, tUnitNum);
/* destroy variables */
delete s1;
......@@ -185,6 +189,7 @@ bool TestMatrixMul2()
XTensor * s1 = NewTensor(sOrder1, sDimSize1);
XTensor * s2 = NewTensor(sOrder2, sDimSize2);
XTensor * t = NewTensor(tOrder, tDimSize);
XTensor tUser;
/* initialize variables */
s1->SetData(sData1, sUnitNum1);
......@@ -193,9 +198,10 @@ bool TestMatrixMul2()
/* call MatrixMul function */
_MatrixMul(s1, X_TRANS, s2, X_NOTRANS, t);
tUser = MatrixMul(*s1, X_TRANS, *s2, X_NOTRANS);
/* check results */
cpuTest = t->CheckData(answer, tUnitNum);
cpuTest = t->CheckData(answer, tUnitNum) && tUser.CheckData(answer, tUnitNum);
#ifdef USE_CUDA
/* GPU test */
......@@ -205,6 +211,7 @@ bool TestMatrixMul2()
XTensor * sGPU1 = NewTensor(sOrder1, sDimSize1, X_FLOAT, 1.0F, 0);
XTensor * sGPU2 = NewTensor(sOrder2, sDimSize2, X_FLOAT, 1.0F, 0);
XTensor * tGPU = NewTensor(tOrder, tDimSize, X_FLOAT, 1.0F, 0);
XTensor tUserGPU;
/* Initialize variables */
sGPU1->SetData(sData1, sUnitNum1);
......@@ -213,9 +220,10 @@ bool TestMatrixMul2()
/* call MatrixMul function */
_MatrixMul(sGPU1, X_TRANS, sGPU2, X_NOTRANS, tGPU);
tUserGPU = MatrixMul(*sGPU1, X_TRANS, *sGPU2, X_NOTRANS);
/* check results */
gpuTest = tGPU->CheckData(answer, tUnitNum);
gpuTest = tGPU->CheckData(answer, tUnitNum) && tUserGPU.CheckData(answer, tUnitNum);
/* destroy variables */
delete s1;
......@@ -315,6 +323,7 @@ bool TestMatrixMul3()
XTensor * s1 = NewTensor(sOrder1, sDimSize1);
XTensor * s2 = NewTensor(sOrder2, sDimSize2);
XTensor * t = NewTensor(tOrder, tDimSize);
XTensor tUser;
/* initialize variables */
s1->SetData(sData1, sUnitNum1);
......@@ -323,9 +332,10 @@ bool TestMatrixMul3()
/* call MatrixMul function */
_MatrixMul(s1, X_NOTRANS, s2, X_NOTRANS, t);
tUser = MatrixMul(*s1, X_NOTRANS, *s2, X_NOTRANS);
/* check results */
cpuTest = t->CheckData(answer, tUnitNum);
cpuTest = t->CheckData(answer, tUnitNum) && tUser.CheckData(answer, tUnitNum);
#ifdef USE_CUDA
/* GPU test */
......@@ -335,6 +345,7 @@ bool TestMatrixMul3()
XTensor * sGPU1 = NewTensor(sOrder1, sDimSize1, X_FLOAT, 1.0F, 0);
XTensor * sGPU2 = NewTensor(sOrder2, sDimSize2, X_FLOAT, 1.0F, 0);
XTensor * tGPU = NewTensor(tOrder, tDimSize, X_FLOAT, 1.0F, 0);
XTensor tUserGPU;
/* Initialize variables */
sGPU1->SetData(sData1, sUnitNum1);
......@@ -343,9 +354,10 @@ bool TestMatrixMul3()
/* call MatrixMul function */
_MatrixMul(sGPU1, X_NOTRANS, sGPU2, X_NOTRANS, tGPU);
tUserGPU = MatrixMul(*sGPU1, X_NOTRANS, *sGPU2, X_NOTRANS);
/* check results */
gpuTest = tGPU->CheckData(answer, tUnitNum);
gpuTest = tGPU->CheckData(answer, tUnitNum) && tUserGPU.CheckData(answer, tUnitNum);
/* destroy variables */
delete s1;
......@@ -434,6 +446,7 @@ bool TestMatrixMul4()
XTensor * s1 = NewTensor(sOrder1, sDimSize1);
XTensor * s2 = NewTensor(sOrder2, sDimSize2);
XTensor * t = NewTensor(tOrder, tDimSize);
XTensor tUser;
/* initialize variables */
s1->SetData(sData1, sUnitNum1);
......@@ -442,9 +455,10 @@ bool TestMatrixMul4()
/* call MatrixMul function */
_MatrixMul(s1, X_NOTRANS, s2, X_NOTRANS, t);
tUser = MatrixMul(*s1, X_NOTRANS, *s2, X_NOTRANS);
/* check results */
cpuTest = t->CheckData(answer, tUnitNum);
cpuTest = t->CheckData(answer, tUnitNum) && tUser.CheckData(answer, tUnitNum);
#ifdef USE_CUDA
/* GPU test */
......@@ -454,6 +468,7 @@ bool TestMatrixMul4()
XTensor * sGPU1 = NewTensor(sOrder1, sDimSize1, X_FLOAT, 1.0F, 0);
XTensor * sGPU2 = NewTensor(sOrder2, sDimSize2, X_FLOAT, 1.0F, 0);
XTensor * tGPU = NewTensor(tOrder, tDimSize, X_FLOAT, 1.0F, 0);
XTensor tUserGPU;
/* Initialize variables */
sGPU1->SetData(sData1, sUnitNum1);
......@@ -462,9 +477,10 @@ bool TestMatrixMul4()
/* call MatrixMul function */
_MatrixMul(sGPU1, X_NOTRANS, sGPU2, X_NOTRANS, tGPU);
tUserGPU = MatrixMul(*sGPU1, X_NOTRANS, *sGPU2, X_NOTRANS);
/* check results */
gpuTest = tGPU->CheckData(answer, tUnitNum);
gpuTest = tGPU->CheckData(answer, tUnitNum) && tUserGPU.CheckData(answer, tUnitNum);
/* destroy variables */
delete s1;
......
Markdown 格式
0%
您添加了 0 到此讨论。请谨慎行事。
请先完成此评论的编辑!
注册 或者 后发表评论