Commit fbd915c6 by xiaotong

updates

parent d69372b3
......@@ -485,6 +485,9 @@ unsigned int GetNextPower2(unsigned int n)
/* sleep for a while */
void XSleep(int sleepTime)
{
if (sleepTime <= 0)
return;
#ifdef _WIN32
Sleep((DWORD)sleepTime);
#else
......
......@@ -39,6 +39,7 @@ XParamKeeper::XParamKeeper()
{
param = NULL;
flag = PARAM_STATE_NOT_READY;
trainFlag = PARAM_STATE_NOT_READY;
MUTEX_INIT(accessLock);
MUTEX_INIT(trainLock);
}
......@@ -153,9 +154,36 @@ bool XModel::CheckParam()
/* initial model for running the it */
void XModel::InitForRun()
{
RefreshMe();
}
/* lock the parameter states (wait for unlocking them when
a run of training is finished) */
void XModel::LockParamsForTraining()
{
for (int i = 0; i < paramNum; i++) {
params[i].param->isGradFinished = false;
params[i].flag = PARAM_STATE_NOT_READY;
params[i].trainFlag = PARAM_STATE_NOT_READY;
MUTEX_LOCK(params[i].trainLock);
/* where is UNLOCK? We will do this when the training (a step)
is finsished. Then, WaitForUnlockedParams() can continue. In
such a way, we implement a START-WAIT process in each run
of training (a step) */
}
}
/* unlock the parameter states */
void XModel::WaitForUnlockedParams()
{
for (int i = 0; i < paramNum; i++) {
/* the lock proceeds only when the trainLock is unlocked
in training. In this way, we are actually waiting for
the FINISHED signal from other workers/threads. */
MUTEX_LOCK(params[i].trainLock);
CheckNTErrors(params[i].trainFlag == PARAM_STATE_UPDATED,
"the state of the parameter is wrong!");
MUTEX_UNLOCK(params[i].trainLock);
}
}
......@@ -165,6 +193,7 @@ void XModel::RefreshMe()
for (int i = 0; i < paramNum; i++) {
params[i].param->isGradFinished = false;
params[i].flag = PARAM_STATE_NOT_READY;
params[i].trainFlag = PARAM_STATE_NOT_READY;
}
}
......
......@@ -60,6 +60,11 @@ public:
/* the parameter state */
PARAM_STATE flag;
/* the state of the entire training process
(choosing from PARAM_STATE_NOT_READY and
PARAM_STATE_UPDATED */
PARAM_STATE trainFlag;
/* a mutex for locking and unlocking the parameter */
MUTEX_HANDLE accessLock;
......@@ -119,6 +124,13 @@ public:
/* check if the parameters are well-defined for training */
bool CheckParam();
/* lock the parameter states (wait for unlocking them when
a run of training is finished) */
void LockParamsForTraining();
/* wait for unlocked the parameter states */
void WaitForUnlockedParams();
/* initial model for running the it */
void InitForRun();
......
......@@ -223,4 +223,49 @@ bool XWorkerBroadcast::AddJobBroadcast(XModel * source, XList * targetList)
return true;
}
/*
mark the state of the parameter to FINISHED
>> source - the model that we are updating
>> pid - the parameter index
*/
void XWorkerBroadcast::FinishUpdateSingle(XModel * source, int pid)
{
source->params[pid].trainFlag = PARAM_STATE_UPDATED;
MUTEX_UNLOCK(source->params[pid].trainLock);
}
/* wrapper of FinishUpdateSingle */
void XWorkerBroadcast::FinishSingle(XList * args)
{
XWorkerBroadcast * broadcaster = (XWorkerBroadcast*)args->GetItem(0);
XModel * source = (XModel*)args->GetItem(1);
int pid = args->GetInt(2);
broadcaster->FinishUpdateSingle(source, pid);
}
/*
add a new job of finishing the update
>> source - the model that we are updating
>> pid - the parameter index
*/
bool XWorkerBroadcast::AddJobFinish(XModel * source, int pid)
{
CheckNTErrors(source != NULL, "no input source tensor!");
CheckNTErrors(pid >= 0 && pid < source->paramNum, "illegal parameter index!");
XList args;
args.Add(this);
args.Add(source);
args.AddInt(pid);
if (isInstantRun)
XWorkerBroadcast::FinishSingle(&args);
else
queue.EnqueueJob((void*)(char*)XWorkerBroadcast::FinishSingle, &args);
return true;
}
}
......@@ -82,6 +82,16 @@ public:
/* add a new job of broadcasting data (for a model) */
bool AddJobBroadcast(XModel * source, XList * targetList);
/* mark the state of the parameter to FINISHED */
void FinishUpdateSingle(XModel * source, int pid);
/* wrapper of FinishUpdateSingle */
static
void FinishSingle(XList * args);
/* add a new job of finishing the update */
bool AddJobFinish(XModel * source, int pid);
};
}
......
Markdown 格式
0%
您添加了 0 到此讨论。请谨慎行事。
请先完成此评论的编辑!
注册 或者 后发表评论