Commit 1037cc81 by xiaotong

dump info

parent c44d1a79
...@@ -270,7 +270,7 @@ bool T2TTrainer::Train(const char * fn, const char * validFN, const char * model ...@@ -270,7 +270,7 @@ bool T2TTrainer::Train(const char * fn, const char * validFN, const char * model
break; break;
} }
if (step % 100 == 0) { if (step % 1 == 0) {
double elapsed = GetClockSec() - startT; double elapsed = GetClockSec() - startT;
XPRINT8(0, stderr, "[INFO] lr=%.2e, elapsed=%.1fs, step=%d, epoch=%d, word=%d, loss=%.3f, ppl=%.3f, sppl=%.3f", XPRINT8(0, stderr, "[INFO] lr=%.2e, elapsed=%.1fs, step=%d, epoch=%d, word=%d, loss=%.3f, ppl=%.3f, sppl=%.3f",
lr, elapsed, step, curEpoch, wordCountTotal, loss/wordCount, exp(loss/wordCount), exp(-prob/wc)); lr, elapsed, step, curEpoch, wordCountTotal, loss/wordCount, exp(loss/wordCount), exp(-prob/wc));
...@@ -279,6 +279,17 @@ bool T2TTrainer::Train(const char * fn, const char * validFN, const char * model ...@@ -279,6 +279,17 @@ bool T2TTrainer::Train(const char * fn, const char * validFN, const char * model
XPRINT(0, stderr, "\n"); XPRINT(0, stderr, "\n");
} }
XMem * mem = model->mem;
MTYPE used = 0;
MTYPE total = 0;
for(int i = 0; i < mem->blockNum; i++){
if(mem->blocks[i].mem != NULL){
used += mem->blocks[i].used;
total += mem->blocks[i].size;
}
}
fprintf(stderr, "mem: %lld %lld\n", used, total);
if(nStepCheckpoint > 0 && ++nStepCheck >= nStepCheckpoint){ if(nStepCheckpoint > 0 && ++nStepCheck >= nStepCheckpoint){
MakeCheckpoint(model, validFN, modelFN, "step", step); MakeCheckpoint(model, validFN, modelFN, "step", step);
nStepCheck = 0; nStepCheck = 0;
......
...@@ -63,6 +63,7 @@ ...@@ -63,6 +63,7 @@
/* the nts (NiuTrans.Tensor) namespace */ /* the nts (NiuTrans.Tensor) namespace */
namespace nts{ namespace nts{
MTYPE xtensorMemUsed = 0;
int tensorIDGlobal = 0; int tensorIDGlobal = 0;
MUTEX_HANDLE tensorMutex; MUTEX_HANDLE tensorMutex;
XTensor NULLTensor; XTensor NULLTensor;
...@@ -254,6 +255,10 @@ void XTensor::Init() ...@@ -254,6 +255,10 @@ void XTensor::Init()
/* delete data arrays */ /* delete data arrays */
void XTensor::DestroyData() void XTensor::DestroyData()
{ {
MUTEX_LOCK(tensorMutex);
xtensorMemUsed += GetDataSizeInChar();
MUTEX_UNLOCK(tensorMutex);
if(data != NULL && mem == NULL && !isShared) if(data != NULL && mem == NULL && !isShared)
XMemFree(devID, data); XMemFree(devID, data);
else if(data != NULL && isInGlobalMem) else if(data != NULL && isInGlobalMem)
...@@ -1296,8 +1301,12 @@ bool XTensor::Resize(const int myOrder, const int * myDimSize, ...@@ -1296,8 +1301,12 @@ bool XTensor::Resize(const int myOrder, const int * myDimSize,
if(data != NULL){ if(data != NULL){
if (mem == NULL) if (mem == NULL)
XMemFree(devID, data); XMemFree(devID, data);
else else{
mem->Release(data, GetDataSizeInChar(), signature); mem->Release(data, GetDataSizeInChar(), signature);
MUTEX_LOCK(tensorMutex);
xtensorMemUsed -= GetDataSizeInChar();
MUTEX_UNLOCK(tensorMutex);
}
} }
signature = mem != NULL ? mem->GetSignature() : 0; signature = mem != NULL ? mem->GetSignature() : 0;
...@@ -1367,6 +1376,10 @@ bool XTensor::Resize(const int myOrder, const int * myDimSize, ...@@ -1367,6 +1376,10 @@ bool XTensor::Resize(const int myOrder, const int * myDimSize,
else else
d = (int*)mem->Alloc(mem->devID, size); d = (int*)mem->Alloc(mem->devID, size);
MUTEX_LOCK(tensorMutex);
xtensorMemUsed += size;
MUTEX_UNLOCK(tensorMutex);
if(d == NULL) if(d == NULL)
return false; return false;
......
...@@ -387,6 +387,9 @@ public: ...@@ -387,6 +387,9 @@ public:
void FreeData(XTensor * matrix, XMem * myMem = NULL, bool useBuf = false); void FreeData(XTensor * matrix, XMem * myMem = NULL, bool useBuf = false);
}; };
/* record of mem consumption (for XTensor) */
extern MTYPE xtensorMemUsed;
/* we make a unique id for every tensor */ /* we make a unique id for every tensor */
extern int tensorIDGlobal; extern int tensorIDGlobal;
extern MUTEX_HANDLE tensorMutex; extern MUTEX_HANDLE tensorMutex;
......
Markdown 格式
0%
您添加了 0 到此讨论。请谨慎行事。
请先完成此评论的编辑!
注册 或者 后发表评论