Commit 58d5fc31 by xiaotong

fix the bug of memory coflict

parent 57fd3510
......@@ -148,18 +148,16 @@ void XNet::Backward(XList &roots, XList &golds, LOSS_FUNCTION_NAME loss)
/* back-propagation from output to input */
for(int i = nodes.count - 1; i >= 0; i--){
XTensor * node = (XTensor*)nodes.Get(i);;
if(node->visitMark == NODE_FINISHED)
continue;
//if(i == 1)
// return;
BackwardNode(node);
XTensor * node = (XTensor*)nodes.Get(i);
if(node->mem != NULL){
CheckNTErrors(node->mem->bufUsed == 0, "Illegal access of buffer!");
CheckNTErrors(node->mem->bufUsed < BUF_PITCH, "Illegal access of buffer!");
}
if(node->visitMark == NODE_FINISHED)
continue;
BackwardNode(node);
}
}
......
......@@ -111,6 +111,9 @@ XTensor T2TEmbedder::Make(XTensor &input)
memcpy(dims, input.dimSize, input.order * sizeof(int));
dims[input.order - 1] = eSize;
XTensor wordEmbedding;
XTensor posEmbedding;
bool match = (posEmbedding.order == input.order);
if(match){
for(int i = 0; i < input.order; i++){
......@@ -130,8 +133,6 @@ XTensor T2TEmbedder::Make(XTensor &input)
DelTensorBuf(posTMP);
}
XTensor wordEmbedding;
/* then we make word embeddings */
wordEmbedding = Linear(MMul(input, w), (float)sqrt((float)d));
......
......@@ -63,9 +63,6 @@ public:
the embedding processing by re-loading. */
XTensor posEmbeddingBase;
/* positional embeddings */
XTensor posEmbedding;
public:
/* constructor */
T2TEmbedder();
......
......@@ -62,7 +62,7 @@ void T2TModel::InitModel(int argc, const char ** argv)
if(useMem){
delete mem;
mem = new XMem(devID, UNI_FREE, (MTYPE)MILLION * 128, 1024, MILLION * 128);
mem = new XMem(devID, UNI_FREE, (MTYPE)MILLION * 256, 1024, MILLION * 128);
mem->SetDesiredSize(devID, 0, (MTYPE)memSize * MILLION);
}
......@@ -117,4 +117,74 @@ void T2TModel::Make(XTensor &input, XTensor &output)
}
}
/*
get parameter matrics
>> list - the list that keeps the parameter matrics
*/
void T2TModel::GetParams(XList &list)
{
list.Clear();
list.Add(&outputLayer.w);
for(int i = 0; i < encoder.nlayer; i++){
list.Add(&encoder.fnns[i].w1);
list.Add(&encoder.fnns[i].b1);
list.Add(&encoder.fnns[i].w2);
list.Add(&encoder.fnns[i].b2);
list.Add(&encoder.attentions[i].wk);
list.Add(&encoder.attentions[i].wq);
list.Add(&encoder.attentions[i].wv);
list.Add(&encoder.fnnLayerNorms[i].w);
list.Add(&encoder.fnnLayerNorms[i].b);
list.Add(&encoder.attLayerNorms[i].w);
list.Add(&encoder.attLayerNorms[i].b);
}
list.Add(&encoder.embedder.w);
}
/*
dump the parameters
>> fn - where to keep the model
>> model - the model
*/
void T2TModel::Dump(const char * fn)
{
FILE * file = fopen(fn, "wb");
CheckNTErrors(file, "Cannot open the model file");
XList params(100);
GetParams(params);
for(int i = 0; i < params.count; i++){
XTensor * p = (XTensor*)params.Get(i);
p->Dump(file, "param:");
}
fclose(file);
XPRINT(0, stderr, "[INFO] model saved\n");
}
/* read the parameters */
void T2TModel::Read(const char * fn)
{
FILE * file = fopen(fn, "rb");
CheckNTErrors(file, "Cannot open the model file");
XList params(100);
GetParams(params);
for(int i = 0; i < params.count; i++){
XTensor * p = (XTensor*)params.Get(i);
p->Read(file, "param:");
}
fclose(file);
XPRINT(0, stderr, "[INFO] model loaded\n");
}
}
......@@ -73,6 +73,15 @@ public:
/* make the entire network (with the output softmax layer) */
void Make(XTensor &input, XTensor &output);
/* get parameter matrics */
void GetParams(XList &list);
/* dump the parameters */
void Dump(const char * fn);
/* read the parameters */
void Read(const char * fn);
};
}
......
......@@ -55,7 +55,7 @@ void T2TTrainer::Init(int argc, const char ** argv)
bool useMem = false;
LoadParamBool(argc, argv, "mem", &useMem, useMem);
LoadParamFloat(argc, argv, "lrate", &lrate, 0.001F);
LoadParamFloat(argc, argv, "lrate", &lrate, 1.0F);
LoadParamInt(argc, argv, "sbatch", &sBatchSize, 1);
LoadParamInt(argc, argv, "wbatch", &wBatchSize, 1);
LoadParamInt(argc, argv, "nepoch", &nepoch, 1);
......@@ -72,6 +72,9 @@ void T2TTrainer::Init(int argc, const char ** argv)
}
FILE * tf = NULL;
int tc = 0;
/*
train the model
>> fn - training data file
......@@ -97,6 +100,9 @@ void T2TTrainer::Train(const char * fn, T2TModel * model)
mem->SetPin();
XNet net;
tf = fopen("tmp.xx.txt", "wb");
tc = 0;
double startT = GetClockSec();
......@@ -132,8 +138,7 @@ void T2TTrainer::Train(const char * fn, T2TModel * model)
net.Backward(output, batch, CROSSENTROPY);
/* learning rate */
lr = (1 / (float)sqrt((float)d)) * (float)MIN(pow(step + 1, -0.5), (step + 1) * pow(nwarmup, -1.5));
//lr = 0.00005F;
lr = lrate * (1 / (float)sqrt((float)d)) * (float)MIN(pow(step + 1, -0.5), (step + 1) * pow(nwarmup, -1.5));
/* update the parameters */
Update(model, lr);
......@@ -167,6 +172,8 @@ void T2TTrainer::Train(const char * fn, T2TModel * model)
}
double elapsed = GetClockSec() - startT;
fclose(tf);
XPRINT6(0, stderr, "[INFO] lr=%.2e, elapsed=%.1fs, step=%d, epoch=%d, word=%d, ppl=%.3f\n",
lr, elapsed, step, epoch, wordCountTotal, exp(loss / wordCount));
......@@ -297,18 +304,25 @@ int T2TTrainer::LoadBatch(FILE * file, XTensor * batch, XTensor * padding,
dims[1] = max;
dims[2] = vs;
//if(batch->order != 3 || batch->GetDim(0) != dims[0] ||
// batch->GetDim(1) != dims[1] || batch->GetDim(2) != dims[2]){
InitTensor(batch, 3, dims, X_FLOAT, 1.0F, devID, mem);
//}
InitTensor(batch, 3, dims, X_FLOAT, 1.0F, devID, mem);
InitTensor2D(padding, sc, max, X_FLOAT, devID, mem);
//if(padding->order != 2 || padding->GetDim(0) != sc ||
// padding->GetDim(1) != max){
InitTensor2D(padding, sc, max, X_FLOAT, devID, mem);
//}
if(batch->grad == NULL)
XNoder::MakeGrad(batch);
else
InitTensor(batch->grad, 3, dims, X_FLOAT, 1.0F, devID, mem);
if(padding->grad == NULL)
XNoder::MakeGrad(padding);
else
InitTensor2D(padding->grad, sc, max, X_FLOAT, devID, mem);
batch->SetZeroAll();
padding->SetZeroAll();
batch->grad->SetZeroAll();
padding->grad->SetZeroAll();
//fprintf(tf, "batch %d(%d)\n", tc++, sc);
/* this might be slow on GPUs :( */
for(int s = seq; s < seq + sc; s++){
......@@ -316,8 +330,15 @@ int T2TTrainer::LoadBatch(FILE * file, XTensor * batch, XTensor * padding,
batch->Set3D(1.0F, s - seq, w, buf[seqOffset[s] + w]);
padding->Set2D(1.0F, s - seq, w);
wCount++;
//fprintf(tf, "%d", buf[seqOffset[s] + w]);
//if(w < seqLen[s] - 1)
// fprintf(tf, " ");
//else
// fprintf(tf, "\n");
}
}
fflush(tf);
}
return sc;
......@@ -374,23 +395,7 @@ void T2TTrainer::Update(T2TModel * model, const float lr)
{
XList ws(100);
ws.Add(&model->outputLayer.w);
for(int i = 0; i < model->encoder.nlayer; i++){
ws.Add(&model->encoder.fnns[i].w1);
ws.Add(&model->encoder.fnns[i].b1);
ws.Add(&model->encoder.fnns[i].w2);
ws.Add(&model->encoder.fnns[i].b2);
ws.Add(&model->encoder.attentions[i].wk);
ws.Add(&model->encoder.attentions[i].wq);
ws.Add(&model->encoder.attentions[i].wv);
ws.Add(&model->encoder.fnnLayerNorms[i].w);
ws.Add(&model->encoder.fnnLayerNorms[i].b);
ws.Add(&model->encoder.attLayerNorms[i].w);
ws.Add(&model->encoder.attLayerNorms[i].b);
}
ws.Add(&model->encoder.embedder.w);
model->GetParams(ws);
for(int i = 0; i < ws.count; i++){
XTensor * para = (XTensor*)ws.Get(i);
......@@ -402,26 +407,6 @@ void T2TTrainer::Update(T2TModel * model, const float lr)
CheckNTErrors(para != NULL, "NULL parameter tensor!");
CheckNTErrors(paraGrad != NULL, "NULL gradient tensor!");
/*
DTYPE * d = new DTYPE[para->unitNum * para->unitSize];
DTYPE * g = new DTYPE[para->unitNum * para->unitSize];
XMemCopy(d, -1, para->data, para->devID, para->unitNum * para->unitSize);
XMemCopy(g, -1, paraGrad->data, paraGrad->devID, para->unitNum * para->unitSize);
for (int i = 0; i < para->unitNum; i++) {
if (IsNAN(d[i]) || IsINF(d[i])) {
int nnn = 0;
}
if (IsNAN(g[i]) || IsINF(g[i])) {
int nnn = 0;
}
}
delete[] d;
delete[] g;
*/
/* the delta rule */
_Sum(para, paraGrad, para, -lr);
......@@ -438,23 +423,7 @@ void T2TTrainer::PrepareModel(T2TModel * model)
{
XList ws(100);
ws.Add(&model->outputLayer.w);
for(int i = 0; i < model->encoder.nlayer; i++){
ws.Add(&model->encoder.fnns[i].w1);
ws.Add(&model->encoder.fnns[i].b1);
ws.Add(&model->encoder.fnns[i].w2);
ws.Add(&model->encoder.fnns[i].b2);
ws.Add(&model->encoder.attentions[i].wk);
ws.Add(&model->encoder.attentions[i].wq);
ws.Add(&model->encoder.attentions[i].wv);
ws.Add(&model->encoder.fnnLayerNorms[i].w);
ws.Add(&model->encoder.fnnLayerNorms[i].b);
ws.Add(&model->encoder.attLayerNorms[i].w);
ws.Add(&model->encoder.attLayerNorms[i].b);
}
ws.Add(&model->encoder.embedder.w);
model->GetParams(ws);
for(int i = 0; i < ws.count; i++){
XTensor * para = (XTensor*)ws.Get(i);
......
......@@ -38,20 +38,34 @@ int TransformerMain(int argc, const char ** argv)
ShowParams(argc, argv);
char * trainFN = new char[MAX_LINE_LENGTH];
char * modelFN = new char[MAX_LINE_LENGTH];
char * testFN = new char[MAX_LINE_LENGTH];
LoadParamString(argc, argv, "train", trainFN, "");
LoadParamString(argc, argv, "model", modelFN, "");
T2TModel model;
model.InitModel(argc, argv);
/* learn model parameters */
if(strcmp(trainFN, "")){
T2TTrainer trainer;
trainer.Init(argc, argv);
trainer.Train(trainFN, &model);
}
/* save the final model */
if(strcmp(modelFN, "") && strcmp(trainFN, ""))
model.Dump(modelFN);
/* load the model if neccessary */
if(strcmp(modelFN, ""))
model.Read(modelFN);
delete[] trainFN;
delete[] modelFN;
delete[] testFN;
fclose(tmpFILE);
......
......@@ -523,7 +523,7 @@ release a piece of memory in the buffer
void XMem::ReleaseBuf(int myDevID, MTYPE mySize, int pitch)
{
CheckNTErrors((bufUsed >= mySize),
"Cannot allocate the memory. Please specify a larger buffer in XMem!");
"Cannot allocate the memory. Please specify a larger buffer in XMem!");
MTYPE backOffset = 0;
......
Markdown 格式
0%
您添加了 0 到此讨论。请谨慎行事。
请先完成此评论的编辑!
注册 或者 后发表评论