Commit 1c9973c9 by xiaotong

updates

parent b87c6f74
...@@ -80,18 +80,20 @@ void TestTrain() ...@@ -80,18 +80,20 @@ void TestTrain()
config.Add("lrate", 0.001F); config.Add("lrate", 0.001F);
config.Add("nstep", 100000); config.Add("nstep", 100000);
config.Add("nepoch", 5); config.Add("nepoch", 5);
config.Add("jobdev0", -1); config.Add("jobdev0", 0);
//config.Add("jobdev1", -1); config.Add("jobdev1", -1);
//config.Add("jobdev2", -1); config.Add("jobdev2", -1);
//config.Add("jobdev3", -1); //config.Add("jobdev3", -1);
//config.Add("jobdev4", -1); //config.Add("jobdev4", -1);
int serverDevID = config.GetInt("jobdev0", -1);
TTDataLoader loader; TTDataLoader loader;
loader.SetFileName("ttrain.txt"); loader.SetFileName("ttrain.txt");
loader.SetBatchSize(config.GetInt("batchsize", TT_BATCH_SIZE)); loader.SetBatchSize(config.GetInt("batchsize", TT_BATCH_SIZE));
TTModel model; TTModel model;
model.Init(config, -1); model.Init(config, serverDevID);
tmpTT = model.params[0].param; tmpTT = model.params[0].param;
...@@ -218,6 +220,10 @@ bool TTDataLoader::GetBatchSimple(XList * inputs, XList * golds) ...@@ -218,6 +220,10 @@ bool TTDataLoader::GetBatchSimple(XList * inputs, XList * golds)
/* constructor */ /* constructor */
TTModel::TTModel() TTModel::TTModel()
{ {
devID = -1;
vSize = 0;
eSize = 0;
hSize = 0;
} }
/* de-constructor */ /* de-constructor */
...@@ -236,11 +242,13 @@ initialize the model ...@@ -236,11 +242,13 @@ initialize the model
>> myConfig - configuration >> myConfig - configuration
>> devID - device id >> devID - device id
*/ */
void TTModel::Init(XConfig &myConfig, int devID) void TTModel::Init(XConfig &myConfig, int myDevID)
{ {
Clear(); Clear();
SetConfig(myConfig); SetConfig(myConfig);
devID = myDevID;
vSize = MAX_INT_IN_TTRAIN + 1; vSize = MAX_INT_IN_TTRAIN + 1;
eSize = config.GetInt("esize", TT_EMBEDDING_SIZE); eSize = config.GetInt("esize", TT_EMBEDDING_SIZE);
hSize = config.GetInt("hsize", TT_HIDDEN_SIZE); hSize = config.GetInt("hsize", TT_HIDDEN_SIZE);
...@@ -331,6 +339,11 @@ bool TTModel::RunSimple(XList * inputs, XList * outputs, XList * golds, XList* l ...@@ -331,6 +339,11 @@ bool TTModel::RunSimple(XList * inputs, XList * outputs, XList * golds, XList* l
XTensor * loss = (XTensor*)losses->GetItem(0); XTensor * loss = (XTensor*)losses->GetItem(0);
XTensor goldOneHot; XTensor goldOneHot;
/* place all input data on the correct device */
input->FlushToDevice(devID);
output->FlushToDevice(devID);
gold->FlushToDevice(devID);
XNet net; XNet net;
/* create the neural network and run it */ /* create the neural network and run it */
......
...@@ -49,8 +49,8 @@ namespace nts { // namespace nts(NiuTrans.Tensor) ...@@ -49,8 +49,8 @@ namespace nts { // namespace nts(NiuTrans.Tensor)
#define MAX_SAMPLE_LINE_LENGTH 128 #define MAX_SAMPLE_LINE_LENGTH 128
#define MAX_SAMPLE_SIZE 4 #define MAX_SAMPLE_SIZE 4
#define TT_BATCH_SIZE 256 #define TT_BATCH_SIZE 256
#define TT_EMBEDDING_SIZE 64 #define TT_EMBEDDING_SIZE 128
#define TT_HIDDEN_SIZE 256 #define TT_HIDDEN_SIZE 512
extern XTensor * tmpTT; extern XTensor * tmpTT;
...@@ -136,7 +136,7 @@ public: ...@@ -136,7 +136,7 @@ public:
void SetConfig(XConfig &myConfig); void SetConfig(XConfig &myConfig);
/* initialize the parameters */ /* initialize the parameters */
void Init(XConfig &myConfig, int devID); void Init(XConfig &myConfig, int myDevID);
/* create the model */ /* create the model */
void Forward(int devID, XTensor * input, XTensor * output); void Forward(int devID, XTensor * input, XTensor * output);
......
...@@ -265,7 +265,7 @@ void XLeader::AddJobWorker(XModel * model, int n, int * ids) ...@@ -265,7 +265,7 @@ void XLeader::AddJobWorker(XModel * model, int n, int * ids)
} }
/* we clone the input model */ /* we clone the input model */
for (int i = 0; i < n - 1; i++) { for (int i = 1; i < n; i++) {
XWorkerJob * worker = new XWorkerJob(); XWorkerJob * worker = new XWorkerJob();
worker->SetModel(model->Clone(ids[i])); worker->SetModel(model->Clone(ids[i]));
jworkers.Add(worker); jworkers.Add(worker);
......
...@@ -63,9 +63,9 @@ void XOptimizer::Clear() ...@@ -63,9 +63,9 @@ void XOptimizer::Clear()
void XOptimizer::ShowSettings() void XOptimizer::ShowSettings()
{ {
XPRINT(1, stderr, "[INFO] Optimizer Setup:\n"); XPRINT(1, stderr, "[INFO] Optimizer Setup:\n");
XPRINT1(1, stderr, " nstep = %d\n", nstep); XPRINT2(1, stderr, "%25s = %d\n", "nstep", nstep);
XPRINT1(1, stderr, " nepoch = %d\n", nepoch); XPRINT2(1, stderr, "%25s = %d\n", "nepoch", nepoch);
XPRINT1(1, stderr, " lrate = %.3f\n", lrate); XPRINT2(1, stderr, "%25s = %.3f\n", "lrate", lrate);
} }
/* /*
......
...@@ -102,6 +102,7 @@ void XTrainer::Run(XConfig * config, DataDistributeBase * dataDistributor, ...@@ -102,6 +102,7 @@ void XTrainer::Run(XConfig * config, DataDistributeBase * dataDistributor,
GetDevIDs(config, ids, jobNum, MAX_DEVICE_NUM_TRAINING); GetDevIDs(config, ids, jobNum, MAX_DEVICE_NUM_TRAINING);
optimizer->ShowSettings(); optimizer->ShowSettings();
this->ShowSettings(config);
/* create the server and workers */ /* create the server and workers */
XLeader leader; XLeader leader;
...@@ -148,4 +149,27 @@ void XTrainer::Run(XConfig * config, DataDistributeBase * dataDistributor, ...@@ -148,4 +149,27 @@ void XTrainer::Run(XConfig * config, DataDistributeBase * dataDistributor,
delete[] ids; delete[] ids;
} }
/* show settings of training */
void XTrainer::ShowSettings(XConfig* config)
{
int workerNum = 0;
int* ids = new int[MAX_DEVICE_NUM_TRAINING];
GetDevIDs(config, ids, workerNum, MAX_DEVICE_NUM_TRAINING);
XPRINT(1, stderr, "[INFO] Training Setup:\n");
XPRINT2(1, stderr, "%25s = %d\n", "nworker", workerNum);
if (workerNum > 0) {
XPRINT2(1, stderr, "%25s = device[%d]\n", "worker0(server)", ids[0]);
for (int i = 1; i < workerNum; i++) {
char name[32];
sprintf(name, "worker%d", i);
XPRINT2(1, stderr, "%25s = device[%d]\n", name, ids[i]);
}
}
delete[] ids;
}
} /* end of the nts (NiuTrans.Tensor) namespace */ } /* end of the nts (NiuTrans.Tensor) namespace */
...@@ -77,6 +77,9 @@ public: ...@@ -77,6 +77,9 @@ public:
virtual virtual
void Run(XConfig * config, DataDistributeBase * dataDistributor, void Run(XConfig * config, DataDistributeBase * dataDistributor,
XModel * model, XOptimizer * optimizer); XModel * model, XOptimizer * optimizer);
/* show settings of training */
void ShowSettings(XConfig * config);
}; };
} }
#endif // __XTRAINER_H__ #endif // __XTRAINER_H__
\ No newline at end of file
Markdown 格式
0%
您添加了 0 到此讨论。请谨慎行事。
请先完成此评论的编辑!
注册 或者 后发表评论