Commit 58d5fc31 by xiaotong

fix the bug of memory coflict

parent 57fd3510
...@@ -148,18 +148,16 @@ void XNet::Backward(XList &roots, XList &golds, LOSS_FUNCTION_NAME loss) ...@@ -148,18 +148,16 @@ void XNet::Backward(XList &roots, XList &golds, LOSS_FUNCTION_NAME loss)
/* back-propagation from output to input */ /* back-propagation from output to input */
for(int i = nodes.count - 1; i >= 0; i--){ for(int i = nodes.count - 1; i >= 0; i--){
XTensor * node = (XTensor*)nodes.Get(i);; XTensor * node = (XTensor*)nodes.Get(i);
if(node->mem != NULL){
CheckNTErrors(node->mem->bufUsed < BUF_PITCH, "Illegal access of buffer!");
}
if(node->visitMark == NODE_FINISHED) if(node->visitMark == NODE_FINISHED)
continue; continue;
//if(i == 1)
// return;
BackwardNode(node); BackwardNode(node);
if(node->mem != NULL){
CheckNTErrors(node->mem->bufUsed == 0, "Illegal access of buffer!");
}
} }
} }
......
...@@ -111,6 +111,9 @@ XTensor T2TEmbedder::Make(XTensor &input) ...@@ -111,6 +111,9 @@ XTensor T2TEmbedder::Make(XTensor &input)
memcpy(dims, input.dimSize, input.order * sizeof(int)); memcpy(dims, input.dimSize, input.order * sizeof(int));
dims[input.order - 1] = eSize; dims[input.order - 1] = eSize;
XTensor wordEmbedding;
XTensor posEmbedding;
bool match = (posEmbedding.order == input.order); bool match = (posEmbedding.order == input.order);
if(match){ if(match){
for(int i = 0; i < input.order; i++){ for(int i = 0; i < input.order; i++){
...@@ -130,8 +133,6 @@ XTensor T2TEmbedder::Make(XTensor &input) ...@@ -130,8 +133,6 @@ XTensor T2TEmbedder::Make(XTensor &input)
DelTensorBuf(posTMP); DelTensorBuf(posTMP);
} }
XTensor wordEmbedding;
/* then we make word embeddings */ /* then we make word embeddings */
wordEmbedding = Linear(MMul(input, w), (float)sqrt((float)d)); wordEmbedding = Linear(MMul(input, w), (float)sqrt((float)d));
......
...@@ -63,9 +63,6 @@ public: ...@@ -63,9 +63,6 @@ public:
the embedding processing by re-loading. */ the embedding processing by re-loading. */
XTensor posEmbeddingBase; XTensor posEmbeddingBase;
/* positional embeddings */
XTensor posEmbedding;
public: public:
/* constructor */ /* constructor */
T2TEmbedder(); T2TEmbedder();
......
...@@ -62,7 +62,7 @@ void T2TModel::InitModel(int argc, const char ** argv) ...@@ -62,7 +62,7 @@ void T2TModel::InitModel(int argc, const char ** argv)
if(useMem){ if(useMem){
delete mem; delete mem;
mem = new XMem(devID, UNI_FREE, (MTYPE)MILLION * 128, 1024, MILLION * 128); mem = new XMem(devID, UNI_FREE, (MTYPE)MILLION * 256, 1024, MILLION * 128);
mem->SetDesiredSize(devID, 0, (MTYPE)memSize * MILLION); mem->SetDesiredSize(devID, 0, (MTYPE)memSize * MILLION);
} }
...@@ -117,4 +117,74 @@ void T2TModel::Make(XTensor &input, XTensor &output) ...@@ -117,4 +117,74 @@ void T2TModel::Make(XTensor &input, XTensor &output)
} }
} }
/*
get parameter matrics
>> list - the list that keeps the parameter matrics
*/
void T2TModel::GetParams(XList &list)
{
list.Clear();
list.Add(&outputLayer.w);
for(int i = 0; i < encoder.nlayer; i++){
list.Add(&encoder.fnns[i].w1);
list.Add(&encoder.fnns[i].b1);
list.Add(&encoder.fnns[i].w2);
list.Add(&encoder.fnns[i].b2);
list.Add(&encoder.attentions[i].wk);
list.Add(&encoder.attentions[i].wq);
list.Add(&encoder.attentions[i].wv);
list.Add(&encoder.fnnLayerNorms[i].w);
list.Add(&encoder.fnnLayerNorms[i].b);
list.Add(&encoder.attLayerNorms[i].w);
list.Add(&encoder.attLayerNorms[i].b);
}
list.Add(&encoder.embedder.w);
}
/*
dump the parameters
>> fn - where to keep the model
>> model - the model
*/
void T2TModel::Dump(const char * fn)
{
FILE * file = fopen(fn, "wb");
CheckNTErrors(file, "Cannot open the model file");
XList params(100);
GetParams(params);
for(int i = 0; i < params.count; i++){
XTensor * p = (XTensor*)params.Get(i);
p->Dump(file, "param:");
}
fclose(file);
XPRINT(0, stderr, "[INFO] model saved\n");
}
/* read the parameters */
void T2TModel::Read(const char * fn)
{
FILE * file = fopen(fn, "rb");
CheckNTErrors(file, "Cannot open the model file");
XList params(100);
GetParams(params);
for(int i = 0; i < params.count; i++){
XTensor * p = (XTensor*)params.Get(i);
p->Read(file, "param:");
}
fclose(file);
XPRINT(0, stderr, "[INFO] model loaded\n");
}
} }
...@@ -73,6 +73,15 @@ public: ...@@ -73,6 +73,15 @@ public:
/* make the entire network (with the output softmax layer) */ /* make the entire network (with the output softmax layer) */
void Make(XTensor &input, XTensor &output); void Make(XTensor &input, XTensor &output);
/* get parameter matrics */
void GetParams(XList &list);
/* dump the parameters */
void Dump(const char * fn);
/* read the parameters */
void Read(const char * fn);
}; };
} }
......
...@@ -55,7 +55,7 @@ void T2TTrainer::Init(int argc, const char ** argv) ...@@ -55,7 +55,7 @@ void T2TTrainer::Init(int argc, const char ** argv)
bool useMem = false; bool useMem = false;
LoadParamBool(argc, argv, "mem", &useMem, useMem); LoadParamBool(argc, argv, "mem", &useMem, useMem);
LoadParamFloat(argc, argv, "lrate", &lrate, 0.001F); LoadParamFloat(argc, argv, "lrate", &lrate, 1.0F);
LoadParamInt(argc, argv, "sbatch", &sBatchSize, 1); LoadParamInt(argc, argv, "sbatch", &sBatchSize, 1);
LoadParamInt(argc, argv, "wbatch", &wBatchSize, 1); LoadParamInt(argc, argv, "wbatch", &wBatchSize, 1);
LoadParamInt(argc, argv, "nepoch", &nepoch, 1); LoadParamInt(argc, argv, "nepoch", &nepoch, 1);
...@@ -72,6 +72,9 @@ void T2TTrainer::Init(int argc, const char ** argv) ...@@ -72,6 +72,9 @@ void T2TTrainer::Init(int argc, const char ** argv)
} }
FILE * tf = NULL;
int tc = 0;
/* /*
train the model train the model
>> fn - training data file >> fn - training data file
...@@ -98,6 +101,9 @@ void T2TTrainer::Train(const char * fn, T2TModel * model) ...@@ -98,6 +101,9 @@ void T2TTrainer::Train(const char * fn, T2TModel * model)
XNet net; XNet net;
tf = fopen("tmp.xx.txt", "wb");
tc = 0;
double startT = GetClockSec(); double startT = GetClockSec();
for(epoch = 0; epoch < nepoch; epoch++){ for(epoch = 0; epoch < nepoch; epoch++){
...@@ -132,8 +138,7 @@ void T2TTrainer::Train(const char * fn, T2TModel * model) ...@@ -132,8 +138,7 @@ void T2TTrainer::Train(const char * fn, T2TModel * model)
net.Backward(output, batch, CROSSENTROPY); net.Backward(output, batch, CROSSENTROPY);
/* learning rate */ /* learning rate */
lr = (1 / (float)sqrt((float)d)) * (float)MIN(pow(step + 1, -0.5), (step + 1) * pow(nwarmup, -1.5)); lr = lrate * (1 / (float)sqrt((float)d)) * (float)MIN(pow(step + 1, -0.5), (step + 1) * pow(nwarmup, -1.5));
//lr = 0.00005F;
/* update the parameters */ /* update the parameters */
Update(model, lr); Update(model, lr);
...@@ -168,6 +173,8 @@ void T2TTrainer::Train(const char * fn, T2TModel * model) ...@@ -168,6 +173,8 @@ void T2TTrainer::Train(const char * fn, T2TModel * model)
double elapsed = GetClockSec() - startT; double elapsed = GetClockSec() - startT;
fclose(tf);
XPRINT6(0, stderr, "[INFO] lr=%.2e, elapsed=%.1fs, step=%d, epoch=%d, word=%d, ppl=%.3f\n", XPRINT6(0, stderr, "[INFO] lr=%.2e, elapsed=%.1fs, step=%d, epoch=%d, word=%d, ppl=%.3f\n",
lr, elapsed, step, epoch, wordCountTotal, exp(loss / wordCount)); lr, elapsed, step, epoch, wordCountTotal, exp(loss / wordCount));
XPRINT3(0, stderr, "[INFO] training finished (took %.1fs, step=%d and epoch=%d)\n", XPRINT3(0, stderr, "[INFO] training finished (took %.1fs, step=%d and epoch=%d)\n",
...@@ -297,18 +304,25 @@ int T2TTrainer::LoadBatch(FILE * file, XTensor * batch, XTensor * padding, ...@@ -297,18 +304,25 @@ int T2TTrainer::LoadBatch(FILE * file, XTensor * batch, XTensor * padding,
dims[1] = max; dims[1] = max;
dims[2] = vs; dims[2] = vs;
//if(batch->order != 3 || batch->GetDim(0) != dims[0] ||
// batch->GetDim(1) != dims[1] || batch->GetDim(2) != dims[2]){
InitTensor(batch, 3, dims, X_FLOAT, 1.0F, devID, mem); InitTensor(batch, 3, dims, X_FLOAT, 1.0F, devID, mem);
//}
//if(padding->order != 2 || padding->GetDim(0) != sc ||
// padding->GetDim(1) != max){
InitTensor2D(padding, sc, max, X_FLOAT, devID, mem); InitTensor2D(padding, sc, max, X_FLOAT, devID, mem);
//}
if(batch->grad == NULL)
XNoder::MakeGrad(batch);
else
InitTensor(batch->grad, 3, dims, X_FLOAT, 1.0F, devID, mem);
if(padding->grad == NULL)
XNoder::MakeGrad(padding);
else
InitTensor2D(padding->grad, sc, max, X_FLOAT, devID, mem);
batch->SetZeroAll(); batch->SetZeroAll();
padding->SetZeroAll(); padding->SetZeroAll();
batch->grad->SetZeroAll();
padding->grad->SetZeroAll();
//fprintf(tf, "batch %d(%d)\n", tc++, sc);
/* this might be slow on GPUs :( */ /* this might be slow on GPUs :( */
for(int s = seq; s < seq + sc; s++){ for(int s = seq; s < seq + sc; s++){
...@@ -316,8 +330,15 @@ int T2TTrainer::LoadBatch(FILE * file, XTensor * batch, XTensor * padding, ...@@ -316,8 +330,15 @@ int T2TTrainer::LoadBatch(FILE * file, XTensor * batch, XTensor * padding,
batch->Set3D(1.0F, s - seq, w, buf[seqOffset[s] + w]); batch->Set3D(1.0F, s - seq, w, buf[seqOffset[s] + w]);
padding->Set2D(1.0F, s - seq, w); padding->Set2D(1.0F, s - seq, w);
wCount++; wCount++;
//fprintf(tf, "%d", buf[seqOffset[s] + w]);
//if(w < seqLen[s] - 1)
// fprintf(tf, " ");
//else
// fprintf(tf, "\n");
} }
} }
fflush(tf);
} }
return sc; return sc;
...@@ -374,23 +395,7 @@ void T2TTrainer::Update(T2TModel * model, const float lr) ...@@ -374,23 +395,7 @@ void T2TTrainer::Update(T2TModel * model, const float lr)
{ {
XList ws(100); XList ws(100);
ws.Add(&model->outputLayer.w); model->GetParams(ws);
for(int i = 0; i < model->encoder.nlayer; i++){
ws.Add(&model->encoder.fnns[i].w1);
ws.Add(&model->encoder.fnns[i].b1);
ws.Add(&model->encoder.fnns[i].w2);
ws.Add(&model->encoder.fnns[i].b2);
ws.Add(&model->encoder.attentions[i].wk);
ws.Add(&model->encoder.attentions[i].wq);
ws.Add(&model->encoder.attentions[i].wv);
ws.Add(&model->encoder.fnnLayerNorms[i].w);
ws.Add(&model->encoder.fnnLayerNorms[i].b);
ws.Add(&model->encoder.attLayerNorms[i].w);
ws.Add(&model->encoder.attLayerNorms[i].b);
}
ws.Add(&model->encoder.embedder.w);
for(int i = 0; i < ws.count; i++){ for(int i = 0; i < ws.count; i++){
XTensor * para = (XTensor*)ws.Get(i); XTensor * para = (XTensor*)ws.Get(i);
...@@ -402,26 +407,6 @@ void T2TTrainer::Update(T2TModel * model, const float lr) ...@@ -402,26 +407,6 @@ void T2TTrainer::Update(T2TModel * model, const float lr)
CheckNTErrors(para != NULL, "NULL parameter tensor!"); CheckNTErrors(para != NULL, "NULL parameter tensor!");
CheckNTErrors(paraGrad != NULL, "NULL gradient tensor!"); CheckNTErrors(paraGrad != NULL, "NULL gradient tensor!");
/*
DTYPE * d = new DTYPE[para->unitNum * para->unitSize];
DTYPE * g = new DTYPE[para->unitNum * para->unitSize];
XMemCopy(d, -1, para->data, para->devID, para->unitNum * para->unitSize);
XMemCopy(g, -1, paraGrad->data, paraGrad->devID, para->unitNum * para->unitSize);
for (int i = 0; i < para->unitNum; i++) {
if (IsNAN(d[i]) || IsINF(d[i])) {
int nnn = 0;
}
if (IsNAN(g[i]) || IsINF(g[i])) {
int nnn = 0;
}
}
delete[] d;
delete[] g;
*/
/* the delta rule */ /* the delta rule */
_Sum(para, paraGrad, para, -lr); _Sum(para, paraGrad, para, -lr);
...@@ -438,23 +423,7 @@ void T2TTrainer::PrepareModel(T2TModel * model) ...@@ -438,23 +423,7 @@ void T2TTrainer::PrepareModel(T2TModel * model)
{ {
XList ws(100); XList ws(100);
ws.Add(&model->outputLayer.w); model->GetParams(ws);
for(int i = 0; i < model->encoder.nlayer; i++){
ws.Add(&model->encoder.fnns[i].w1);
ws.Add(&model->encoder.fnns[i].b1);
ws.Add(&model->encoder.fnns[i].w2);
ws.Add(&model->encoder.fnns[i].b2);
ws.Add(&model->encoder.attentions[i].wk);
ws.Add(&model->encoder.attentions[i].wq);
ws.Add(&model->encoder.attentions[i].wv);
ws.Add(&model->encoder.fnnLayerNorms[i].w);
ws.Add(&model->encoder.fnnLayerNorms[i].b);
ws.Add(&model->encoder.attLayerNorms[i].w);
ws.Add(&model->encoder.attLayerNorms[i].b);
}
ws.Add(&model->encoder.embedder.w);
for(int i = 0; i < ws.count; i++){ for(int i = 0; i < ws.count; i++){
XTensor * para = (XTensor*)ws.Get(i); XTensor * para = (XTensor*)ws.Get(i);
......
...@@ -38,20 +38,34 @@ int TransformerMain(int argc, const char ** argv) ...@@ -38,20 +38,34 @@ int TransformerMain(int argc, const char ** argv)
ShowParams(argc, argv); ShowParams(argc, argv);
char * trainFN = new char[MAX_LINE_LENGTH]; char * trainFN = new char[MAX_LINE_LENGTH];
char * modelFN = new char[MAX_LINE_LENGTH];
char * testFN = new char[MAX_LINE_LENGTH];
LoadParamString(argc, argv, "train", trainFN, ""); LoadParamString(argc, argv, "train", trainFN, "");
LoadParamString(argc, argv, "model", modelFN, "");
T2TModel model; T2TModel model;
model.InitModel(argc, argv); model.InitModel(argc, argv);
/* learn model parameters */
if(strcmp(trainFN, "")){ if(strcmp(trainFN, "")){
T2TTrainer trainer; T2TTrainer trainer;
trainer.Init(argc, argv); trainer.Init(argc, argv);
trainer.Train(trainFN, &model); trainer.Train(trainFN, &model);
} }
/* save the final model */
if(strcmp(modelFN, "") && strcmp(trainFN, ""))
model.Dump(modelFN);
/* load the model if neccessary */
if(strcmp(modelFN, ""))
model.Read(modelFN);
delete[] trainFN; delete[] trainFN;
delete[] modelFN;
delete[] testFN;
fclose(tmpFILE); fclose(tmpFILE);
......
Markdown 格式
0%
您添加了 0 到此讨论。请谨慎行事。
请先完成此评论的编辑!
注册 或者 后发表评论