Commit 992ee9e9 by xiaotong

fix the bug of memory allocation

parent 2ed5a029
...@@ -211,6 +211,9 @@ XTensor::~XTensor() ...@@ -211,6 +211,9 @@ XTensor::~XTensor()
XLink::ClearIncoming(this); XLink::ClearIncoming(this);
DestroyData(); DestroyData();
if(grad != NULL)
delete grad;
} }
/* initialize member variables */ /* initialize member variables */
...@@ -237,7 +240,9 @@ void XTensor::Init() ...@@ -237,7 +240,9 @@ void XTensor::Init()
memset(isAllValued, 0, sizeof(bool) * MAX_TENSOR_DIM_NUM); memset(isAllValued, 0, sizeof(bool) * MAX_TENSOR_DIM_NUM);
isInit = false; isInit = false;
isTmp = false; isTmp = false;
isGrad = false;
visitMark = 0; visitMark = 0;
grad = NULL;
} }
/* delete data arrays */ /* delete data arrays */
...@@ -294,7 +299,7 @@ XTensor& XTensor::operator= (const XTensor& tensor) ...@@ -294,7 +299,7 @@ XTensor& XTensor::operator= (const XTensor& tensor)
} }
else{ else{
DestroyData(); DestroyData();
if(isInit){ if(!isInit){
devID = tensor.devID; devID = tensor.devID;
mem = tensor.mem; mem = tensor.mem;
} }
...@@ -1043,7 +1048,7 @@ int XTensor::GetNonzeroSize() ...@@ -1043,7 +1048,7 @@ int XTensor::GetNonzeroSize()
/* /*
set the tensor as "temporary" set the tensor as "temporary"
>> myIsTMP - flag >> myIsTMP - the flag
*/ */
void XTensor::SetTMP(bool myIsTmp) void XTensor::SetTMP(bool myIsTmp)
{ {
...@@ -1051,6 +1056,15 @@ void XTensor::SetTMP(bool myIsTmp) ...@@ -1051,6 +1056,15 @@ void XTensor::SetTMP(bool myIsTmp)
} }
/* /*
set the tensor as "keep-gradient"
>> myIsGrad - the flag
*/
void XTensor::SetGrad(bool myIsGrad)
{
isGrad = myIsGrad;
}
/*
resize a tensor with a specified tensor size resize a tensor with a specified tensor size
>> myOrder - order of the tensor >> myOrder - order of the tensor
>> myDimSize - the size of each dimension >> myDimSize - the size of each dimension
...@@ -1105,7 +1119,7 @@ bool XTensor::Resize(const int myOrder, const int * myDimSize, ...@@ -1105,7 +1119,7 @@ bool XTensor::Resize(const int myOrder, const int * myDimSize,
if(isSparse){ if(isSparse){
/* /*
for sparse matrices, we use a list of tuple (key, value), for sparse matrices, we use a list of tuple (key, value),
ordered by key. Take a (2-dimensional) matrice as examples, ordered by key. Take a (2-dimensional) matrix as an examples,
we have key = m * i + j; we have key = m * i + j;
The data array is The data array is
--------- ---------
...@@ -1148,9 +1162,9 @@ bool XTensor::Resize(const int myOrder, const int * myDimSize, ...@@ -1148,9 +1162,9 @@ bool XTensor::Resize(const int myOrder, const int * myDimSize,
if(filledData){ if(filledData){
/* allocate the new one */ /* allocate the new one */
if(mem == NULL){ if(mem == NULL){
data = (void*)new char[unitNum * unitSize]; data = XMemAlloc(devID, unitNum * unitSize);
#if defined(UNSAFE_BUT_FAST_MEM) #if defined(UNSAFE_BUT_FAST_MEM)
memset(data, 0, unitNum * unitSize); XMemSet(devID, data, 0, unitNum * unitSize);
#endif #endif
} }
else else
......
...@@ -139,8 +139,14 @@ public: ...@@ -139,8 +139,14 @@ public:
/* indicates whether the tensor is created temporarily */ /* indicates whether the tensor is created temporarily */
bool isTmp; bool isTmp;
/* indicates whether the tensor keeps the gradient when used as model parameters */
bool isGrad;
/* mark for traversing the gragh */ /* mark for traversing the gragh */
unsigned int visitMark; unsigned int visitMark;
/* gradient (for back-propagation) */
XTensor * grad;
/* /*
the link used to form networks. Note that when we compute on tensors, we actually create a the link used to form networks. Note that when we compute on tensors, we actually create a
...@@ -300,6 +306,9 @@ public: ...@@ -300,6 +306,9 @@ public:
/* set the tensor as "temporary" */ /* set the tensor as "temporary" */
void SetTMP(bool myIsTmp = true); void SetTMP(bool myIsTmp = true);
/* set the tensor as "keep-gradient" */
void SetGrad(bool myIsGrad = true);
/* resize a matrix with a specified matrix size */ /* resize a matrix with a specified matrix size */
bool Resize(const int myOrder, const int * myDimSize, bool Resize(const int myOrder, const int * myDimSize,
const TENSOR_DATA_TYPE myDataType = DEFAULT_DTYPE, const TENSOR_DATA_TYPE myDataType = DEFAULT_DTYPE,
......
...@@ -176,12 +176,16 @@ void XMemCopy(void * t, int devIDT, const void * s, int devIDS, size_t size) ...@@ -176,12 +176,16 @@ void XMemCopy(void * t, int devIDT, const void * s, int devIDS, size_t size)
} }
#ifdef USE_CUDA #ifdef USE_CUDA
else if(devIDT >= 0 && devIDS < 0){ else if(devIDT >= 0 && devIDS < 0){
CheckNTErrors((cudaMemcpy(t, s, size, cudaMemcpyHostToDevice) == cudaSuccess), cudaError_t error = cudaMemcpy(t, s, size, cudaMemcpyHostToDevice);
"cudaMemcpy error (cudaMemcpyHostToDevice)"); if(error != cudaSuccess){
ShowNTErrors("cudaMemcpy error (cudaMemcpyHostToDevice)");
}
} }
else if(devIDT < 0 && devIDS >= 0){ else if(devIDT < 0 && devIDS >= 0){
CheckNTErrors((cudaMemcpy(t, s, size, cudaMemcpyDeviceToHost) == cudaSuccess), cudaError_t error = cudaMemcpy(t, s, size, cudaMemcpyDeviceToHost);
"cudaMemcpy error (cudaMemcpyDeviceToHost)"); if(error != cudaSuccess){
ShowNTErrors("cudaMemcpy error (cudaMemcpyDeviceToHost)");
}
} }
else{ else{
//if(devIDT == devIDS){ //if(devIDT == devIDS){
......
...@@ -80,13 +80,13 @@ void _MatrixMul(const XTensor * a, MATRIX_TRANS_TYPE transposedA, ...@@ -80,13 +80,13 @@ void _MatrixMul(const XTensor * a, MATRIX_TRANS_TYPE transposedA,
int cBlockNum = 1; int cBlockNum = 1;
for (int i = 2; i < a->order; i++) { for (int i = 2; i < a->order; i++) {
CheckNTErrors((a->dimSizeRDI[i] == c->dimSizeRDI[i - 2 + b->order]), "Incorrect tensor sizes!"); CheckNTErrors(a->dimSizeRDI[i] == c->dimSizeRDI[i - 2 + b->order], "Incorrect tensor sizes!");
aBlockNum *= a->dimSizeRDI[i]; aBlockNum *= a->dimSizeRDI[i];
cBlockNum *= a->dimSizeRDI[i]; cBlockNum *= a->dimSizeRDI[i];
} }
for (int i = 2; i < b->order; i++) { for (int i = 2; i < b->order; i++) {
CheckNTErrors((b->dimSizeRDI[i] == c->dimSizeRDI[i]), "Incorrect tensor sizes!"); CheckNTErrors(b->dimSizeRDI[i] == c->dimSizeRDI[i], "Incorrect tensor sizes!");
bBlockNum *= b->dimSizeRDI[i]; bBlockNum *= b->dimSizeRDI[i];
cBlockNum *= b->dimSizeRDI[i]; cBlockNum *= b->dimSizeRDI[i];
} }
...@@ -224,10 +224,10 @@ XTensor MatrixMul(const XTensor &a, MATRIX_TRANS_TYPE transposedA, const XTensor ...@@ -224,10 +224,10 @@ XTensor MatrixMul(const XTensor &a, MATRIX_TRANS_TYPE transposedA, const XTensor
int order = a.order + b.order - 2; int order = a.order + b.order - 2;
int sub = 0; int sub = 0;
int * dimSize = new int[order]; int * dimSize = new int[order];
for (int i = 2; i < b.order; i++)
dimSize[sub++] = b.dimSizeRDI[b.order + 1 - i];
for (int i = 2; i < a.order; i++) for (int i = 2; i < a.order; i++)
dimSize[sub++] = a.dimSizeRDI[a.order + 1 - i]; dimSize[sub++] = a.dimSizeRDI[a.order + 1 - i];
for (int i = 2; i < b.order; i++)
dimSize[sub++] = b.dimSizeRDI[b.order + 1 - i];
dimSize[sub++] = an; dimSize[sub++] = an;
dimSize[sub++] = bm; dimSize[sub++] = bm;
......
...@@ -298,12 +298,12 @@ void _CudaBLASMatrixMULList(cublasHandle_t * handle, ...@@ -298,12 +298,12 @@ void _CudaBLASMatrixMULList(cublasHandle_t * handle,
cudaMemcpy(cpGPU, cp, sizeof(DTYPE*) * c->count, cudaMemcpyHostToDevice); cudaMemcpy(cpGPU, cp, sizeof(DTYPE*) * c->count, cudaMemcpyHostToDevice);
_CudaBLASMatrixMULBatched(handle, _CudaBLASMatrixMULBatched(handle,
(const void**)apGPU, transposedA, a0->dataType, (const void**)apGPU, transposedA, a0->dataType,
(const void**)bpGPU, transposedB, b0->dataType, (const void**)bpGPU, transposedB, b0->dataType,
(void**)cpGPU, c0->dataType, a->count, (void**)cpGPU, c0->dataType, a->count,
a0->dimSize[0], a0->dimSize[1], a0->dimSize[0], a0->dimSize[1],
b0->dimSize[0], b0->dimSize[1], b0->dimSize[0], b0->dimSize[1],
c0->dimSize[0], c0->dimSize[1], alpha, beta); c0->dimSize[0], c0->dimSize[1], alpha, beta);
delete[] ap; delete[] ap;
delete[] bp; delete[] bp;
delete[] cp; delete[] cp;
......
...@@ -75,6 +75,7 @@ bool TestMatrixMul1() ...@@ -75,6 +75,7 @@ bool TestMatrixMul1()
XTensor * s1 = NewTensor(sOrder1, sDimSize1); XTensor * s1 = NewTensor(sOrder1, sDimSize1);
XTensor * s2 = NewTensor(sOrder2, sDimSize2); XTensor * s2 = NewTensor(sOrder2, sDimSize2);
XTensor * t = NewTensor(tOrder, tDimSize); XTensor * t = NewTensor(tOrder, tDimSize);
XTensor tUser;
/* initialize variables */ /* initialize variables */
s1->SetData(sData1, sUnitNum1); s1->SetData(sData1, sUnitNum1);
...@@ -83,9 +84,10 @@ bool TestMatrixMul1() ...@@ -83,9 +84,10 @@ bool TestMatrixMul1()
/* call MatrixMul function */ /* call MatrixMul function */
_MatrixMul(s1, X_NOTRANS, s2, X_NOTRANS, t); _MatrixMul(s1, X_NOTRANS, s2, X_NOTRANS, t);
tUser = MatrixMul(*s1, X_NOTRANS, *s2, X_NOTRANS);
/* check results */ /* check results */
cpuTest = t->CheckData(answer, tUnitNum); cpuTest = t->CheckData(answer, tUnitNum) && tUser.CheckData(answer, tUnitNum);
#ifdef USE_CUDA #ifdef USE_CUDA
/* GPU test */ /* GPU test */
...@@ -95,6 +97,7 @@ bool TestMatrixMul1() ...@@ -95,6 +97,7 @@ bool TestMatrixMul1()
XTensor * sGPU1 = NewTensor(sOrder1, sDimSize1, X_FLOAT, 1.0F, 0); XTensor * sGPU1 = NewTensor(sOrder1, sDimSize1, X_FLOAT, 1.0F, 0);
XTensor * sGPU2 = NewTensor(sOrder2, sDimSize2, X_FLOAT, 1.0F, 0); XTensor * sGPU2 = NewTensor(sOrder2, sDimSize2, X_FLOAT, 1.0F, 0);
XTensor * tGPU = NewTensor(tOrder, tDimSize, X_FLOAT, 1.0F, 0); XTensor * tGPU = NewTensor(tOrder, tDimSize, X_FLOAT, 1.0F, 0);
XTensor tUserGPU;
/* Initialize variables */ /* Initialize variables */
sGPU1->SetData(sData1, sUnitNum1); sGPU1->SetData(sData1, sUnitNum1);
...@@ -103,9 +106,10 @@ bool TestMatrixMul1() ...@@ -103,9 +106,10 @@ bool TestMatrixMul1()
/* call MatrixMul function */ /* call MatrixMul function */
_MatrixMul(sGPU1, X_NOTRANS, sGPU2, X_NOTRANS, tGPU); _MatrixMul(sGPU1, X_NOTRANS, sGPU2, X_NOTRANS, tGPU);
tUserGPU = MatrixMul(*sGPU1, X_NOTRANS, *sGPU2, X_NOTRANS);
/* check results */ /* check results */
gpuTest = tGPU->CheckData(answer, tUnitNum); gpuTest = tGPU->CheckData(answer, tUnitNum) && tUserGPU.CheckData(answer, tUnitNum);
/* destroy variables */ /* destroy variables */
delete s1; delete s1;
...@@ -185,6 +189,7 @@ bool TestMatrixMul2() ...@@ -185,6 +189,7 @@ bool TestMatrixMul2()
XTensor * s1 = NewTensor(sOrder1, sDimSize1); XTensor * s1 = NewTensor(sOrder1, sDimSize1);
XTensor * s2 = NewTensor(sOrder2, sDimSize2); XTensor * s2 = NewTensor(sOrder2, sDimSize2);
XTensor * t = NewTensor(tOrder, tDimSize); XTensor * t = NewTensor(tOrder, tDimSize);
XTensor tUser;
/* initialize variables */ /* initialize variables */
s1->SetData(sData1, sUnitNum1); s1->SetData(sData1, sUnitNum1);
...@@ -193,9 +198,10 @@ bool TestMatrixMul2() ...@@ -193,9 +198,10 @@ bool TestMatrixMul2()
/* call MatrixMul function */ /* call MatrixMul function */
_MatrixMul(s1, X_TRANS, s2, X_NOTRANS, t); _MatrixMul(s1, X_TRANS, s2, X_NOTRANS, t);
tUser = MatrixMul(*s1, X_TRANS, *s2, X_NOTRANS);
/* check results */ /* check results */
cpuTest = t->CheckData(answer, tUnitNum); cpuTest = t->CheckData(answer, tUnitNum) && tUser.CheckData(answer, tUnitNum);
#ifdef USE_CUDA #ifdef USE_CUDA
/* GPU test */ /* GPU test */
...@@ -205,6 +211,7 @@ bool TestMatrixMul2() ...@@ -205,6 +211,7 @@ bool TestMatrixMul2()
XTensor * sGPU1 = NewTensor(sOrder1, sDimSize1, X_FLOAT, 1.0F, 0); XTensor * sGPU1 = NewTensor(sOrder1, sDimSize1, X_FLOAT, 1.0F, 0);
XTensor * sGPU2 = NewTensor(sOrder2, sDimSize2, X_FLOAT, 1.0F, 0); XTensor * sGPU2 = NewTensor(sOrder2, sDimSize2, X_FLOAT, 1.0F, 0);
XTensor * tGPU = NewTensor(tOrder, tDimSize, X_FLOAT, 1.0F, 0); XTensor * tGPU = NewTensor(tOrder, tDimSize, X_FLOAT, 1.0F, 0);
XTensor tUserGPU;
/* Initialize variables */ /* Initialize variables */
sGPU1->SetData(sData1, sUnitNum1); sGPU1->SetData(sData1, sUnitNum1);
...@@ -213,9 +220,10 @@ bool TestMatrixMul2() ...@@ -213,9 +220,10 @@ bool TestMatrixMul2()
/* call MatrixMul function */ /* call MatrixMul function */
_MatrixMul(sGPU1, X_TRANS, sGPU2, X_NOTRANS, tGPU); _MatrixMul(sGPU1, X_TRANS, sGPU2, X_NOTRANS, tGPU);
tUserGPU = MatrixMul(*sGPU1, X_TRANS, *sGPU2, X_NOTRANS);
/* check results */ /* check results */
gpuTest = tGPU->CheckData(answer, tUnitNum); gpuTest = tGPU->CheckData(answer, tUnitNum) && tUserGPU.CheckData(answer, tUnitNum);
/* destroy variables */ /* destroy variables */
delete s1; delete s1;
...@@ -315,6 +323,7 @@ bool TestMatrixMul3() ...@@ -315,6 +323,7 @@ bool TestMatrixMul3()
XTensor * s1 = NewTensor(sOrder1, sDimSize1); XTensor * s1 = NewTensor(sOrder1, sDimSize1);
XTensor * s2 = NewTensor(sOrder2, sDimSize2); XTensor * s2 = NewTensor(sOrder2, sDimSize2);
XTensor * t = NewTensor(tOrder, tDimSize); XTensor * t = NewTensor(tOrder, tDimSize);
XTensor tUser;
/* initialize variables */ /* initialize variables */
s1->SetData(sData1, sUnitNum1); s1->SetData(sData1, sUnitNum1);
...@@ -323,9 +332,10 @@ bool TestMatrixMul3() ...@@ -323,9 +332,10 @@ bool TestMatrixMul3()
/* call MatrixMul function */ /* call MatrixMul function */
_MatrixMul(s1, X_NOTRANS, s2, X_NOTRANS, t); _MatrixMul(s1, X_NOTRANS, s2, X_NOTRANS, t);
tUser = MatrixMul(*s1, X_NOTRANS, *s2, X_NOTRANS);
/* check results */ /* check results */
cpuTest = t->CheckData(answer, tUnitNum); cpuTest = t->CheckData(answer, tUnitNum) && tUser.CheckData(answer, tUnitNum);
#ifdef USE_CUDA #ifdef USE_CUDA
/* GPU test */ /* GPU test */
...@@ -335,6 +345,7 @@ bool TestMatrixMul3() ...@@ -335,6 +345,7 @@ bool TestMatrixMul3()
XTensor * sGPU1 = NewTensor(sOrder1, sDimSize1, X_FLOAT, 1.0F, 0); XTensor * sGPU1 = NewTensor(sOrder1, sDimSize1, X_FLOAT, 1.0F, 0);
XTensor * sGPU2 = NewTensor(sOrder2, sDimSize2, X_FLOAT, 1.0F, 0); XTensor * sGPU2 = NewTensor(sOrder2, sDimSize2, X_FLOAT, 1.0F, 0);
XTensor * tGPU = NewTensor(tOrder, tDimSize, X_FLOAT, 1.0F, 0); XTensor * tGPU = NewTensor(tOrder, tDimSize, X_FLOAT, 1.0F, 0);
XTensor tUserGPU;
/* Initialize variables */ /* Initialize variables */
sGPU1->SetData(sData1, sUnitNum1); sGPU1->SetData(sData1, sUnitNum1);
...@@ -343,9 +354,10 @@ bool TestMatrixMul3() ...@@ -343,9 +354,10 @@ bool TestMatrixMul3()
/* call MatrixMul function */ /* call MatrixMul function */
_MatrixMul(sGPU1, X_NOTRANS, sGPU2, X_NOTRANS, tGPU); _MatrixMul(sGPU1, X_NOTRANS, sGPU2, X_NOTRANS, tGPU);
tUserGPU = MatrixMul(*sGPU1, X_NOTRANS, *sGPU2, X_NOTRANS);
/* check results */ /* check results */
gpuTest = tGPU->CheckData(answer, tUnitNum); gpuTest = tGPU->CheckData(answer, tUnitNum) && tUserGPU.CheckData(answer, tUnitNum);
/* destroy variables */ /* destroy variables */
delete s1; delete s1;
...@@ -434,6 +446,7 @@ bool TestMatrixMul4() ...@@ -434,6 +446,7 @@ bool TestMatrixMul4()
XTensor * s1 = NewTensor(sOrder1, sDimSize1); XTensor * s1 = NewTensor(sOrder1, sDimSize1);
XTensor * s2 = NewTensor(sOrder2, sDimSize2); XTensor * s2 = NewTensor(sOrder2, sDimSize2);
XTensor * t = NewTensor(tOrder, tDimSize); XTensor * t = NewTensor(tOrder, tDimSize);
XTensor tUser;
/* initialize variables */ /* initialize variables */
s1->SetData(sData1, sUnitNum1); s1->SetData(sData1, sUnitNum1);
...@@ -442,9 +455,10 @@ bool TestMatrixMul4() ...@@ -442,9 +455,10 @@ bool TestMatrixMul4()
/* call MatrixMul function */ /* call MatrixMul function */
_MatrixMul(s1, X_NOTRANS, s2, X_NOTRANS, t); _MatrixMul(s1, X_NOTRANS, s2, X_NOTRANS, t);
tUser = MatrixMul(*s1, X_NOTRANS, *s2, X_NOTRANS);
/* check results */ /* check results */
cpuTest = t->CheckData(answer, tUnitNum); cpuTest = t->CheckData(answer, tUnitNum) && tUser.CheckData(answer, tUnitNum);
#ifdef USE_CUDA #ifdef USE_CUDA
/* GPU test */ /* GPU test */
...@@ -454,6 +468,7 @@ bool TestMatrixMul4() ...@@ -454,6 +468,7 @@ bool TestMatrixMul4()
XTensor * sGPU1 = NewTensor(sOrder1, sDimSize1, X_FLOAT, 1.0F, 0); XTensor * sGPU1 = NewTensor(sOrder1, sDimSize1, X_FLOAT, 1.0F, 0);
XTensor * sGPU2 = NewTensor(sOrder2, sDimSize2, X_FLOAT, 1.0F, 0); XTensor * sGPU2 = NewTensor(sOrder2, sDimSize2, X_FLOAT, 1.0F, 0);
XTensor * tGPU = NewTensor(tOrder, tDimSize, X_FLOAT, 1.0F, 0); XTensor * tGPU = NewTensor(tOrder, tDimSize, X_FLOAT, 1.0F, 0);
XTensor tUserGPU;
/* Initialize variables */ /* Initialize variables */
sGPU1->SetData(sData1, sUnitNum1); sGPU1->SetData(sData1, sUnitNum1);
...@@ -462,9 +477,10 @@ bool TestMatrixMul4() ...@@ -462,9 +477,10 @@ bool TestMatrixMul4()
/* call MatrixMul function */ /* call MatrixMul function */
_MatrixMul(sGPU1, X_NOTRANS, sGPU2, X_NOTRANS, tGPU); _MatrixMul(sGPU1, X_NOTRANS, sGPU2, X_NOTRANS, tGPU);
tUserGPU = MatrixMul(*sGPU1, X_NOTRANS, *sGPU2, X_NOTRANS);
/* check results */ /* check results */
gpuTest = tGPU->CheckData(answer, tUnitNum); gpuTest = tGPU->CheckData(answer, tUnitNum) && tUserGPU.CheckData(answer, tUnitNum);
/* destroy variables */ /* destroy variables */
delete s1; delete s1;
......
Markdown 格式
0%
您添加了 0 到此讨论。请谨慎行事。
请先完成此评论的编辑!
注册 或者 后发表评论