Commit 1037cc81 by xiaotong

dump info

parent c44d1a79
......@@ -270,7 +270,7 @@ bool T2TTrainer::Train(const char * fn, const char * validFN, const char * model
break;
}
if (step % 100 == 0) {
if (step % 1 == 0) {
double elapsed = GetClockSec() - startT;
XPRINT8(0, stderr, "[INFO] lr=%.2e, elapsed=%.1fs, step=%d, epoch=%d, word=%d, loss=%.3f, ppl=%.3f, sppl=%.3f",
lr, elapsed, step, curEpoch, wordCountTotal, loss/wordCount, exp(loss/wordCount), exp(-prob/wc));
......@@ -279,6 +279,17 @@ bool T2TTrainer::Train(const char * fn, const char * validFN, const char * model
XPRINT(0, stderr, "\n");
}
XMem * mem = model->mem;
MTYPE used = 0;
MTYPE total = 0;
for(int i = 0; i < mem->blockNum; i++){
if(mem->blocks[i].mem != NULL){
used += mem->blocks[i].used;
total += mem->blocks[i].size;
}
}
fprintf(stderr, "mem: %lld %lld\n", used, total);
if(nStepCheckpoint > 0 && ++nStepCheck >= nStepCheckpoint){
MakeCheckpoint(model, validFN, modelFN, "step", step);
nStepCheck = 0;
......
......@@ -63,6 +63,7 @@
/* the nts (NiuTrans.Tensor) namespace */
namespace nts{
MTYPE xtensorMemUsed = 0;
int tensorIDGlobal = 0;
MUTEX_HANDLE tensorMutex;
XTensor NULLTensor;
......@@ -254,6 +255,10 @@ void XTensor::Init()
/* delete data arrays */
void XTensor::DestroyData()
{
MUTEX_LOCK(tensorMutex);
xtensorMemUsed += GetDataSizeInChar();
MUTEX_UNLOCK(tensorMutex);
if(data != NULL && mem == NULL && !isShared)
XMemFree(devID, data);
else if(data != NULL && isInGlobalMem)
......@@ -1296,8 +1301,12 @@ bool XTensor::Resize(const int myOrder, const int * myDimSize,
if(data != NULL){
if (mem == NULL)
XMemFree(devID, data);
else
else{
mem->Release(data, GetDataSizeInChar(), signature);
MUTEX_LOCK(tensorMutex);
xtensorMemUsed -= GetDataSizeInChar();
MUTEX_UNLOCK(tensorMutex);
}
}
signature = mem != NULL ? mem->GetSignature() : 0;
......@@ -1367,6 +1376,10 @@ bool XTensor::Resize(const int myOrder, const int * myDimSize,
else
d = (int*)mem->Alloc(mem->devID, size);
MUTEX_LOCK(tensorMutex);
xtensorMemUsed += size;
MUTEX_UNLOCK(tensorMutex);
if(d == NULL)
return false;
......
......@@ -387,6 +387,9 @@ public:
void FreeData(XTensor * matrix, XMem * myMem = NULL, bool useBuf = false);
};
/* record of mem consumption (for XTensor) */
extern MTYPE xtensorMemUsed;
/* we make a unique id for every tensor */
extern int tensorIDGlobal;
extern MUTEX_HANDLE tensorMutex;
......
Markdown 格式
0%
您添加了 0 到此讨论。请谨慎行事。
请先完成此评论的编辑!
注册 或者 后发表评论