Commit e3455593 by xiaotong

new udpates and reorganizing XModel

parent 923af6c2
......@@ -26,7 +26,7 @@
* $Created by: XIAO Tong (xiaotong@mail.neu.edu.cn) 2021-02-25
*/
#include "XNetTemplate.h"
#include "XBaseTemplate.h"
namespace nts { // namespace nts(NiuTrans.Tensor)
......@@ -79,39 +79,4 @@ bool DataDistributeBase::GetBatchSafe(XList * args)
return r;
}
/*******************************
* neural network template
*******************************/
/* constructor */
NetBase::NetBase()
{
MUTEX_INIT(netMutex);
}
/* de-constructor */
NetBase::~NetBase()
{
MUTEX_DELE(netMutex);
}
/* run the net */
bool NetBase::Run(XList * args)
{
ShowNTErrors("NetBase::Run must be overloaded!");
return true;
}
/* run the net (for multi-threading */
bool NetBase::RunSafe(XList * args)
{
bool r;
MUTEX_LOCK(netMutex);
r = Run(args);
MUTEX_UNLOCK(netMutex);
return r;
}
}
......@@ -36,7 +36,7 @@
namespace nts { // namespace nts(NiuTrans.Tensor)
/*
data distributor template. It distribute batches of data to workers.
data distributor template. It distributes batches of data to workers.
The use of data distributor follows:
Start() -> GetBatch() -> ... -> GetBatch() -> End()
......@@ -76,29 +76,6 @@ protected:
bool GetBatchSafe(XList * args);
};
/* neural network template */
class NetBase
{
protected:
/* mutex of the net */
MUTEX_HANDLE netMutex;
public:
/* constructor */
NetBase();
/* de-constructor */
~NetBase();
/* run the net */
virtual
bool Run(XList * args);
protected:
/* run the net (for multi-threading */
bool RunSafe(XList * args);
};
}
#endif // __XNETTEMPLATE_H__
......
......@@ -47,6 +47,15 @@ XLeader::~XLeader()
{
}
/* intialize the leader */
void XLeader::Init()
{
for (int i = 0; i < jworkers.count; i++) {
delete (XWorkerJob*)jworkers.GetItem(i);
}
jworkers.Clear();
}
/* set id */
void XLeader::SetID(int myID)
{
......@@ -68,4 +77,25 @@ void XLeader::SetMode(XLEADER_MODE myMode)
mode = myMode;
}
/*
add a number of job workers (given their device ids)
>> model - the neural network
>> ids - the array of device ids
*/
void XLeader::AddJobWorker(XModel * model, int * ids)
{
}
/*
run the model (for one time)
>> config - the configuration
>> dataDistributor - data distributor
>> model - the neural network that we want to run
>> optimizer - the optimization method
*/
void XLeader::Run(XConfig * config, DataDistributeBase * dataDistributor,
XModel * model, XOptimizer * optimizer)
{
}
} /* end of the nts (NiuTrans.Tensor) namespace */
......@@ -36,11 +36,16 @@
#define __XLEADER_H__
#include "XModel.h"
#include "XNetTemplate.h"
#include "XOptimizer.h"
#include "XBaseTemplate.h"
#include "XWorkerJob.h"
#include "../tensor/XConfig.h"
#include "../tensor/XList.h"
namespace nts { // namespace nts(NiuTrans.Tensor)
#define MAX_NUM_OF_WORKERS 1024
/*
conmmunication mode of a leader. This offers a way of organizing a hierachy of the work
1) run as a standalone program
......@@ -60,6 +65,9 @@ protected:
/* communication mode */
XLEADER_MODE mode;
/* job workers of the leader */
XList jworkers;
public:
/* constructor */
XLeader();
......@@ -67,6 +75,9 @@ public:
/* de-constructor */
~XLeader();
/* intialize the leader */
void Init();
/* set id */
void SetID(int myID);
......@@ -75,9 +86,13 @@ public:
/* set the communication mode */
void SetMode(XLEADER_MODE myMode);
/* add a number of job workers (given their device ids) */
void AddJobWorker(XModel * model, int * ids);
/* run the model (for one time) */
void Run(XConfig * config, DataDistributeBase * dataDistributor, XModel * modelParams, NetBase * net);
void Run(XConfig * config, DataDistributeBase * dataDistributor,
XModel * model, XOptimizer * optimizer);
};
}
......
......@@ -36,11 +36,14 @@ namespace nts {
/* constructor */
XModel::XModel()
{
MUTEX_INIT(modelMutex);
}
/* de-constructor */
XModel::~XModel()
{
Clear();
MUTEX_DELE(modelMutex);
}
/* clear the model */
......@@ -49,6 +52,27 @@ void XModel::Clear()
params.Clear();
}
/*
clone the model (would be overloaded)
>> devID - the device on that we keep the model
<< return - a cloned model
*/
XModel * XModel::Clone(int devID)
{
ShowNTErrors("XModel::Clone() should be overloaded!");
return NULL;
}
/*
run the neural network
>> args - the arguments
*/
bool XModel::Run(XList * args)
{
ShowNTErrors("NetBase::Run must be overloaded!");
return true;
}
/* reset the flag of parameters (the flag is used in data transfer) */
void XModel::RefreshMe()
{
......@@ -68,4 +92,16 @@ void XModel::Refresh(XList * args)
model->RefreshMe();
}
/* run the neural network (for multi-threading */
bool XModel::RunSafe(XList * args)
{
bool r;
MUTEX_LOCK(netMutex);
r = Run(args);
MUTEX_UNLOCK(netMutex);
return r;
}
} /* end of the nts (NiuTrans.Tensor) namespace */
......@@ -41,6 +41,10 @@ namespace nts { // namespace nts(NiuTrans.Tensor)
/* a model template for training */
class XModel
{
protected:
/* mutex of the model */
MUTEX_HANDLE modelMutex;
public:
/* the list of model parameters (pointers to the parameter tensor) */
TensorList params;
......@@ -53,15 +57,31 @@ public:
/* de-constructor */
~XModel();
/* clear the model */
/* clear the model (would be overloaded) */
virtual
void Clear();
/* clone the model (would be overloaded) */
virtual
XModel * Clone(int devID);
/* run the neural network (would be overloaded) */
virtual
bool Run(XList * args);
public:
/* reset the flag of parameters (the flag is used in data transfer) */
void RefreshMe();
/* wrapper of RefreshMe */
static
void Refresh(XList * args);
protected:
/* run the neural network (for multi-threading) */
bool RunSafe(XList * args);
};
}
......
......@@ -44,16 +44,14 @@ XTrainer::~XTrainer()
run the trainer (this is the core process)
>> config - configuration
>> dataDistributor - the data distributor that generates an input for the net each time
>> modelParams - the parameter keeper
>> net - the neural network
>> model - the neural network
*/
void XTrainer::Run(XConfig * config, DataDistributeBase * dataDistributor,
XModel * modelParams, NetBase * net)
XModel * model)
{
CheckNTErrors(config != NULL, "No input config!");
CheckNTErrors(dataDistributor != NULL, "No input data distributor!");
CheckNTErrors(modelParams != NULL, "No input model parameter keeper!");
CheckNTErrors(net != NULL, "No input neural network!");
CheckNTErrors(model != NULL, "No input neural network!");
int nepoch = config->GetInt("nepoch", 50);
int nstep = config->GetInt("nstep", 100000);
......
......@@ -70,7 +70,7 @@ public:
/* run the leader (this is the core process) */
virtual
void Run(XConfig * config, DataDistributeBase * dataDistributor, XModel * modelParams, NetBase * net);
void Run(XConfig * config, DataDistributeBase * dataDistributor, XModel * model);
};
}
#endif // __XTRAINER_H__
\ No newline at end of file
......@@ -26,12 +26,14 @@
*/
#include "XWorkerJob.h"
#include "../tensor/XList.h"
namespace nts { // namespace nts(NiuTrans.Tensor)
/* constructor */
XWorkerJob::XWorkerJob()
{
}
/* de-constructor */
......@@ -39,19 +41,31 @@ XWorkerJob::~XWorkerJob()
{
}
/* set the model */
void XWorkerJob::SetModel(XModel * myModel)
{
model = myModel;
}
/* get the model */
XModel * XWorkerJob::GetModel()
{
return model;
}
/*
add a new job of model refreshment
>> paramKeeper - keeper of the model parameters
>> myModel - the model
<< return - succeeded or not
*/
bool XWorkerJob::AddJobRefresh(XModel * paramKeeper)
bool XWorkerJob::AddJobRefresh(XModel * myModel)
{
CheckNTErrors(paramKeeper != NULL, "no parameter keeper!");
CheckNTErrors(myModel != NULL, "no parameter keeper!");
XList args(1);
args.Add(paramKeeper);
args.Add(myModel);
queue.EnqueueJob((void*)&paramKeeper->Refresh, &args);
queue.EnqueueJob((void*)&myModel->Refresh, &args);
return true;
}
......@@ -59,20 +73,20 @@ bool XWorkerJob::AddJobRefresh(XModel * paramKeeper)
/*
add a new job of neural network forward and backward computation (with the input)
>> func - the function that calls the run of the neural network
>> net - the neural network
>> myModel - the model
>> inputs - inputs of the neural network
>> outputs - outputs of the neural network
<< return - succeeded or not
*/
bool XWorkerJob::AddJobNeuralNet(void * func, void * net, XList * inputs, XList * outputs)
bool XWorkerJob::AddJobNeuralNet(void * func, XModel * myModel, XList * inputs, XList * outputs)
{
CheckNTErrors(func != NULL, "no input function!");
CheckNTErrors(net != NULL, "no input neural network!");
CheckNTErrors(myModel != NULL, "no input neural network!");
XList args;
args.AddList(inputs);
args.AddList(outputs);
args.Add(net);
args.Add(myModel);
queue.EnqueueJob(func, &args);
......
......@@ -31,6 +31,8 @@
#include "XWorker.h"
#include "XModel.h"
#include "XBaseTemplate.h"
#include "../tensor/XList.h"
namespace nts { // namespace nts(NiuTrans.Tensor)
......@@ -38,6 +40,8 @@ namespace nts { // namespace nts(NiuTrans.Tensor)
class XWorkerJob : public XWorker
{
protected:
/* the model */
XModel * model;
public:
......@@ -47,11 +51,17 @@ public:
/* de-constructor */
~XWorkerJob();
/* set the parameter keeper */
void SetModel(XModel * myModel);
/* get the parameter keeper */
XModel * GetModel();
/* add a new job of model refreshment */
bool AddJobRefresh(XModel * paramKeeper);
bool AddJobRefresh(XModel * myModel);
/* add a new job of neural network forward and backward computation (with the input) */
bool AddJobNeuralNet(void * func, void * net, XList * inputs, XList * outputs);
bool AddJobNeuralNet(void * func, XModel * myModel, XList * inputs, XList * outputs);
};
}
......
Markdown 格式
0%
您添加了 0 到此讨论。请谨慎行事。
请先完成此评论的编辑!
注册 或者 后发表评论