Commit 412e53a8 by xiaotong

bug fixes for multi-threading

parent 87bb27ee
...@@ -107,12 +107,23 @@ void XLeader::SetServerModel(XConfig * config, XModel * model) ...@@ -107,12 +107,23 @@ void XLeader::SetServerModel(XConfig * config, XModel * model)
{ {
XList members; XList members;
for (int i = 0; i < jworkers.count; i++) { for (int i = 0; i < jworkers.count; i++) {
XModel * member = (XModel*)jworkers.GetItem(i); XModel * member = ((XWorkerJob*)jworkers[i])->GetModel();
members.Add(member); members.Add(member);
} }
SetServerModel(config, model, &members); SetServerModel(config, model, &members);
} }
/* initialize the models for running them */
void XLeader::InitForRun()
{
serverModel.InitForRun();
for (int i = 0; i < jworkers.count; i++) {
XModel * model = ((XWorkerJob*)jworkers[i])->GetModel();
model->InitForRun();
}
}
/* get loss */ /* get loss */
float XLeader::GetLoss() float XLeader::GetLoss()
...@@ -256,12 +267,14 @@ run the model (for one time). Basically this is a map-reduce process. ...@@ -256,12 +267,14 @@ run the model (for one time). Basically this is a map-reduce process.
bool XLeader::Run(XConfig * config, DataDistributeBase * dataDistributor, bool XLeader::Run(XConfig * config, DataDistributeBase * dataDistributor,
XModel * model, XOptimizer * optimizer) XModel * model, XOptimizer * optimizer)
{ {
bool isDataOK = true; bool isDataOK = true;
int activeJobCount = 0; int activeJobCount = 0;
int* active = new int[jworkers.count]; int* active = new int[jworkers.count];
InitForRun();
for (int i = 0; i < jworkers.count; i++) for (int i = 0; i < jworkers.count; i++)
active[i] = 0; active[i] = 0;
/* Feed the input to each worker and geneate the output. /* Feed the input to each worker and geneate the output.
For each worker, we define a job queue and enqueue jobs For each worker, we define a job queue and enqueue jobs
......
...@@ -108,6 +108,9 @@ public: ...@@ -108,6 +108,9 @@ public:
/* set the server model */ /* set the server model */
void SetServerModel(XConfig * config, XModel * model); void SetServerModel(XConfig * config, XModel * model);
/* initialize the models for running them */
void InitForRun();
/* get loss */ /* get loss */
float GetLoss(); float GetLoss();
......
...@@ -126,6 +126,16 @@ bool XModel::CheckParam() ...@@ -126,6 +126,16 @@ bool XModel::CheckParam()
return true; return true;
} }
/* initial model for running the it */
void XModel::InitForRun()
{
for (int i = 0; i < params.count; i++) {
XTensor * param = (XTensor*)params[i];
param->isGradFinished = false;
flags[i] = PARAM_STATE_NOT_READY;
}
}
/* refresh the model */ /* refresh the model */
void XModel::RefreshMe() void XModel::RefreshMe()
......
...@@ -94,6 +94,9 @@ public: ...@@ -94,6 +94,9 @@ public:
/* check if the parameters are well-defined for training */ /* check if the parameters are well-defined for training */
bool CheckParam(); bool CheckParam();
/* initial model for running the it */
void InitForRun();
/* refresh the model */ /* refresh the model */
void RefreshMe(); void RefreshMe();
...@@ -110,4 +113,4 @@ public: ...@@ -110,4 +113,4 @@ public:
} }
#endif // __XMODEL_H__ #endif // __XMODEL_H__
\ No newline at end of file
Markdown 格式
0%
您添加了 0 到此讨论。请谨慎行事。
请先完成此评论的编辑!
注册 或者 后发表评论