Commit 08bd5aec by xiaotong

updates

parent 412e53a8
...@@ -39,6 +39,10 @@ using namespace nmt; ...@@ -39,6 +39,10 @@ using namespace nmt;
int main( int argc, const char ** argv ) int main( int argc, const char ** argv )
{ {
XConfig config;
config.Create(argc - 1, argv + 1);
verboseLevel = config.GetInt("verbose", 1);
if (argc > 1 && !strcmp(argv[1], "-test")) if (argc > 1 && !strcmp(argv[1], "-test"))
Test(); Test();
else if (argc > 1 && !strcmp(argv[1], "-testtrain")) else if (argc > 1 && !strcmp(argv[1], "-testtrain"))
......
...@@ -21,11 +21,11 @@ ...@@ -21,11 +21,11 @@
/* /*
* We test XTrain here. It is simple, we design a simple task in that we * We test XTrain here. It is simple, we design a simple task in that we
* make the model to predict an integer D (0-100) from three input integers * make the model to predict an integer D (0-100) from four input integers
* A, B and C (0-100). We generate a number of samples with different values * A, B, C and D (0-100). We generate a number of samples with different values
* of A, B and C. The gold standard is * of A, B, C and D. The gold standard is
* *
* D = (int)(sqrt(A * B) + C)/2 * D = (int)(sqrt(A * B) + abs(C - D))/2
* *
* Our model is a two-layer feed-forward neural network. It can be treated * Our model is a two-layer feed-forward neural network. It can be treated
* as a classifier rather than a regression model. * as a classifier rather than a regression model.
...@@ -47,7 +47,7 @@ void GeneateTTrainData(const char * fileName) ...@@ -47,7 +47,7 @@ void GeneateTTrainData(const char * fileName)
FILE * file = fopen(fileName, "wb"); FILE * file = fopen(fileName, "wb");
CheckNTErrors(file, "Cannot open the file"); CheckNTErrors(file, "Cannot open the file");
fprintf(stderr, "Generating data ... "); XPRINT(1, stderr, "[INFO] Generating data ... ");
int sampleNum = MAX_SAMPLE_NUM_IN_TTRAIN; int sampleNum = MAX_SAMPLE_NUM_IN_TTRAIN;
int range = MAX_INT_IN_TTRAIN; int range = MAX_INT_IN_TTRAIN;
...@@ -60,11 +60,12 @@ void GeneateTTrainData(const char * fileName) ...@@ -60,11 +60,12 @@ void GeneateTTrainData(const char * fileName)
int A = (int)(((float)rand() / RAND_MAX) * range); int A = (int)(((float)rand() / RAND_MAX) * range);
int B = (int)(((float)rand() / RAND_MAX) * range); int B = (int)(((float)rand() / RAND_MAX) * range);
int C = (int)(((float)rand() / RAND_MAX) * range); int C = (int)(((float)rand() / RAND_MAX) * range);
int D = (int)((sqrt(A * B) + C) / 2); int D = (int)(((float)rand() / RAND_MAX) * range);
fprintf(file, "%d %d %d %d\n", A, B, C, D); int E = (int)((sqrt(A * B) + abs(C - D)) / 2);
fprintf(file, "%d %d %d %d %d\n", A, B, C, D, E);
} }
fprintf(stderr, "%d samples in \"%s\" [done]\n", sampleNum, fileName); XPRINT2(1, stderr, "%d samples in \"%s\" [DONE]\n", sampleNum, fileName);
fclose(file); fclose(file);
} }
...@@ -76,7 +77,9 @@ void TestTrain() ...@@ -76,7 +77,9 @@ void TestTrain()
XConfig config; XConfig config;
config.Add("dev", -1); config.Add("dev", -1);
config.Add("lrate", 0.1F); config.Add("lrate", 0.001F);
config.Add("nstep", 10000);
config.Add("nepoch", 5);
TTDataLoader loader; TTDataLoader loader;
loader.SetFileName("ttrain.txt"); loader.SetFileName("ttrain.txt");
...@@ -165,30 +168,31 @@ bool TTDataLoader::GetBatchSimple(XList * inputs, XList * golds) ...@@ -165,30 +168,31 @@ bool TTDataLoader::GetBatchSimple(XList * inputs, XList * golds)
char * line = new char[MAX_SAMPLE_LINE_LENGTH]; char * line = new char[MAX_SAMPLE_LINE_LENGTH];
int * inputBatch = new int[batchSize * sampleSize]; int * inputBatch = new int[batchSize * sampleSize];
int * goldBatch = new int[batchSize]; int * goldBatch = new int[batchSize];
int A, B, C, D; int A, B, C, D, E;
while (fgets(line, MAX_SAMPLE_LINE_LENGTH, file)) { while (fgets(line, MAX_SAMPLE_LINE_LENGTH, file)) {
if (count == batchSize) if (count == batchSize)
break; break;
if (sscanf(line, "%d %d %d %d", &A, &B, &C, &D) < 4) { if (sscanf(line, "%d %d %d %d %d", &A, &B, &C, &D, &E) < sampleSize + 1) {
ShowNTErrors("Wrong format in the training file!"); ShowNTErrors("Wrong format in the training file!");
} }
inputBatch[count * 3] = A; inputBatch[count * sampleSize] = A;
inputBatch[count * 3 + 1] = B; inputBatch[count * sampleSize + 1] = B;
inputBatch[count * 3 + 2] = C; inputBatch[count * sampleSize + 2] = C;
goldBatch[count] = D; inputBatch[count * sampleSize + 3] = D;
goldBatch[count] = E;
count++; count++;
} }
if (count > 0) { if (count > 0) {
InitTensor2D(input, count, 3, X_INT); InitTensor2D(input, count, 4, X_INT);
InitTensor2D(gold, count, 1, X_INT); InitTensor2D(gold, count, 1, X_INT);
input->SetData(inputBatch, count * 3); input->SetData(inputBatch, count * 4);
gold->SetData(goldBatch, count); gold->SetData(goldBatch, count);
} }
...@@ -237,7 +241,7 @@ void TTModel::Init(XConfig &myConfig, int devID) ...@@ -237,7 +241,7 @@ void TTModel::Init(XConfig &myConfig, int devID)
hSize = config.GetInt("hsize", TT_HIDDEN_SIZE); hSize = config.GetInt("hsize", TT_HIDDEN_SIZE);
InitTensor2D(&embeddingW, vSize, eSize, X_FLOAT, devID); InitTensor2D(&embeddingW, vSize, eSize, X_FLOAT, devID);
InitTensor2D(&hiddenW, 3 * eSize, hSize, X_FLOAT, devID); InitTensor2D(&hiddenW, MAX_SAMPLE_SIZE * eSize, hSize, X_FLOAT, devID);
InitTensor2D(&outputW, hSize, vSize, X_FLOAT, devID); InitTensor2D(&outputW, hSize, vSize, X_FLOAT, devID);
embeddingW.SetName("embeddingw"); embeddingW.SetName("embeddingw");
...@@ -306,7 +310,7 @@ run the neural network ...@@ -306,7 +310,7 @@ run the neural network
*/ */
bool TTModel::RunSimple(XList * inputs, XList * outputs, XList * golds, XList* losses) bool TTModel::RunSimple(XList * inputs, XList * outputs, XList * golds, XList* losses)
{ {
fprintf(stderr, "run simple 0\n"); //fprintf(stderr, "run simple 0\n");
CheckNTErrors(inputs != NULL && inputs->count >= 1, "Wrong arguments!"); CheckNTErrors(inputs != NULL && inputs->count >= 1, "Wrong arguments!");
CheckNTErrors(outputs != NULL && outputs->count >= 1, "Wrong arguments!"); CheckNTErrors(outputs != NULL && outputs->count >= 1, "Wrong arguments!");
CheckNTErrors(golds != NULL && golds->count >= 1, "Wrong arguments!"); CheckNTErrors(golds != NULL && golds->count >= 1, "Wrong arguments!");
...@@ -326,7 +330,7 @@ bool TTModel::RunSimple(XList * inputs, XList * outputs, XList * golds, XList* l ...@@ -326,7 +330,7 @@ bool TTModel::RunSimple(XList * inputs, XList * outputs, XList * golds, XList* l
/* gold standard in ong-hot representaiton */ /* gold standard in ong-hot representaiton */
goldOneHot = IndexToOnehot(*gold, vSize, 0.0F); goldOneHot = IndexToOnehot(*gold, vSize, 0.0F);
int* dims = new int[goldOneHot.order]; int * dims = new int[goldOneHot.order];
for (int i = 0; i < goldOneHot.order - 2; i++) for (int i = 0; i < goldOneHot.order - 2; i++)
dims[i] = goldOneHot.GetDim(i); dims[i] = goldOneHot.GetDim(i);
dims[goldOneHot.order - 2] = goldOneHot.GetDim(goldOneHot.order - 1); dims[goldOneHot.order - 2] = goldOneHot.GetDim(goldOneHot.order - 1);
...@@ -340,7 +344,7 @@ bool TTModel::RunSimple(XList * inputs, XList * outputs, XList * golds, XList* l ...@@ -340,7 +344,7 @@ bool TTModel::RunSimple(XList * inputs, XList * outputs, XList * golds, XList* l
delete[] dims; delete[] dims;
fprintf(stderr, "run simple 1\n"); //fprintf(stderr, "run simple 1\n");
return true; return true;
} }
......
...@@ -44,12 +44,12 @@ ...@@ -44,12 +44,12 @@
namespace nts { // namespace nts(NiuTrans.Tensor) namespace nts { // namespace nts(NiuTrans.Tensor)
#define MAX_SAMPLE_NUM_IN_TTRAIN 100000 #define MAX_SAMPLE_NUM_IN_TTRAIN 200000
#define MAX_INT_IN_TTRAIN 100 #define MAX_INT_IN_TTRAIN 100
#define MAX_SAMPLE_LINE_LENGTH 128 #define MAX_SAMPLE_LINE_LENGTH 128
#define MAX_SAMPLE_SIZE 3 #define MAX_SAMPLE_SIZE 4
#define TT_BATCH_SIZE 256 #define TT_BATCH_SIZE 256
#define TT_EMBEDDING_SIZE 256 #define TT_EMBEDDING_SIZE 64
#define TT_HIDDEN_SIZE 256 #define TT_HIDDEN_SIZE 256
extern XTensor * tmpTT; extern XTensor * tmpTT;
......
...@@ -418,12 +418,7 @@ void XLeader::WaitForFinishing(int sleepTime) ...@@ -418,12 +418,7 @@ void XLeader::WaitForFinishing(int sleepTime)
if (finished) if (finished)
break; break;
#ifdef _WIN32 XSleep(sleepTime);
Sleep((DWORD)sleepTime);
#else
sleep((unsigned)sleepTime / 1000);
#endif
} }
} }
......
...@@ -48,7 +48,7 @@ ...@@ -48,7 +48,7 @@
namespace nts { // namespace nts(NiuTrans.Tensor) namespace nts { // namespace nts(NiuTrans.Tensor)
#define MAX_NUM_OF_WORKERS 1024 #define MAX_NUM_OF_WORKERS 1024
#define SLEEP_TIME_IN_WAITING_FOR_JOBS 10 #define SLEEP_TIME_IN_WAITING_FOR_JOBS 20
/* /*
conmmunication mode of a leader. This offers a way of organizing a hierachy of the work conmmunication mode of a leader. This offers a way of organizing a hierachy of the work
......
...@@ -60,6 +60,14 @@ void XOptimizer::Clear() ...@@ -60,6 +60,14 @@ void XOptimizer::Clear()
lrate = 0; lrate = 0;
} }
void XOptimizer::ShowSettings()
{
XPRINT(1, stderr, "[INFO] Optimizer Setup:\n");
XPRINT1(1, stderr, " nstep = %d\n", nstep);
XPRINT1(1, stderr, " nepoch = %d\n", nepoch);
XPRINT1(1, stderr, " lrate = %.3f\n", lrate);
}
/* /*
prepare for the update prepare for the update
>> model - the model that we want to update >> model - the model that we want to update
......
...@@ -62,6 +62,10 @@ public: ...@@ -62,6 +62,10 @@ public:
/* clear the optimizer */ /* clear the optimizer */
virtual virtual
void Clear(); void Clear();
/* show settings */
virtual
void ShowSettings();
/* prepare for the update */ /* prepare for the update */
virtual virtual
......
...@@ -94,8 +94,6 @@ void XTrainer::Run(XConfig * config, DataDistributeBase * dataDistributor, ...@@ -94,8 +94,6 @@ void XTrainer::Run(XConfig * config, DataDistributeBase * dataDistributor,
CheckNTErrors(dataDistributor != NULL, "No input data distributor!"); CheckNTErrors(dataDistributor != NULL, "No input data distributor!");
CheckNTErrors(model != NULL, "No input neural network!"); CheckNTErrors(model != NULL, "No input neural network!");
int nepoch = config->GetInt("nepoch", 50);
int nstep = config->GetInt("nstep", 100000);
int epoch = 0; int epoch = 0;
int step = 0; int step = 0;
int jobNum = 0; int jobNum = 0;
...@@ -103,6 +101,8 @@ void XTrainer::Run(XConfig * config, DataDistributeBase * dataDistributor, ...@@ -103,6 +101,8 @@ void XTrainer::Run(XConfig * config, DataDistributeBase * dataDistributor,
int * ids = new int[MAX_DEVICE_NUM_TRAINING]; int * ids = new int[MAX_DEVICE_NUM_TRAINING];
GetDevIDs(config, ids, jobNum, MAX_DEVICE_NUM_TRAINING); GetDevIDs(config, ids, jobNum, MAX_DEVICE_NUM_TRAINING);
optimizer->ShowSettings();
/* create the server and workers */ /* create the server and workers */
XLeader leader; XLeader leader;
leader.Init(); leader.Init();
...@@ -114,8 +114,12 @@ void XTrainer::Run(XConfig * config, DataDistributeBase * dataDistributor, ...@@ -114,8 +114,12 @@ void XTrainer::Run(XConfig * config, DataDistributeBase * dataDistributor,
leader.SetServerModel(config, model); leader.SetServerModel(config, model);
leader.Start(); leader.Start();
double startT = GetClockSec();
XPRINT(1, stderr, "[INFO] Initializing the model ... [DONE]\n");
/* train the model */ /* train the model */
for (epoch = 0; epoch < nepoch; epoch++) { for (epoch = 0; epoch < optimizer->nepoch; epoch++) {
bool ok = true; bool ok = true;
dataDistributor->Start(); dataDistributor->Start();
...@@ -127,23 +131,21 @@ void XTrainer::Run(XConfig * config, DataDistributeBase * dataDistributor, ...@@ -127,23 +131,21 @@ void XTrainer::Run(XConfig * config, DataDistributeBase * dataDistributor,
float loss = leader.GetLoss() / leader.GetSampleNum(); float loss = leader.GetLoss() / leader.GetSampleNum();
if ((step + 1) % 1 == 0) if ((step + 1) % 100 == 0)
fprintf(stderr, "epoch:%d step:%d sample:%d loss:%f predict:%d\n", XPRINT5(1, stderr, "[INFO] elapsed=%.1fs epoch:%d step:%d sample:%d loss:%f\n",
epoch + 1, step + 1, leader.GetSampleNum(), loss, leader.GetPredictNum()); GetClockSec() - startT, epoch + 1, step + 1, leader.GetSampleNum(), loss);
if (step++ >= nstep) if (step++ >= optimizer->nstep)
break; break;
} }
dataDistributor->End(); dataDistributor->End();
if (step >= nstep) if (step >= optimizer->nstep)
break; break;
} }
delete[] ids; delete[] ids;
fprintf(stderr, "epoch:%d step:%d\n", epoch, step);
} }
} /* end of the nts (NiuTrans.Tensor) namespace */ } /* end of the nts (NiuTrans.Tensor) namespace */
...@@ -32,6 +32,7 @@ ...@@ -32,6 +32,7 @@
#define __XWORKER_H__ #define __XWORKER_H__
#include "../tensor/XQueue.h" #include "../tensor/XQueue.h"
#include "../tensor/XUtility.h"
namespace nts { // namespace nts(NiuTrans.Tensor) namespace nts { // namespace nts(NiuTrans.Tensor)
......
...@@ -90,11 +90,7 @@ void XWorkerBroadcast::BroadcastData(XModel * source, XList * targetList, long s ...@@ -90,11 +90,7 @@ void XWorkerBroadcast::BroadcastData(XModel * source, XList * targetList, long s
if (finished == sp.count * targetList->count) if (finished == sp.count * targetList->count)
break; break;
#ifdef _WIN32 XSleep(sleepTime);
Sleep((DWORD)sleepTime);
#else
sleep((unsigned)sleepTime / 1000);
#endif
} }
delete[] finishedFlag; delete[] finishedFlag;
...@@ -106,7 +102,7 @@ wrapper of BroadcastData ...@@ -106,7 +102,7 @@ wrapper of BroadcastData
*/ */
void XWorkerBroadcast::Broadcast(XList * args) void XWorkerBroadcast::Broadcast(XList * args)
{ {
fprintf(stderr, "broadcast 0\n"); //fprintf(stderr, "broadcast 0\n");
XWorkerBroadcast * broadcaster = (XWorkerBroadcast*)args->GetItem(0); XWorkerBroadcast * broadcaster = (XWorkerBroadcast*)args->GetItem(0);
XModel * source = (XModel*)args->GetItem(1); XModel * source = (XModel*)args->GetItem(1);
...@@ -119,7 +115,7 @@ void XWorkerBroadcast::Broadcast(XList * args) ...@@ -119,7 +115,7 @@ void XWorkerBroadcast::Broadcast(XList * args)
} }
broadcaster->BroadcastData(source, &target, SLEEP_TIME_IN_BROADCASTING); broadcaster->BroadcastData(source, &target, SLEEP_TIME_IN_BROADCASTING);
fprintf(stderr, "broadcast 1\n"); //fprintf(stderr, "broadcast 1\n");
} }
/* /*
......
...@@ -35,7 +35,7 @@ ...@@ -35,7 +35,7 @@
namespace nts { // namespace nts(NiuTrans.Tensor) namespace nts { // namespace nts(NiuTrans.Tensor)
#define SLEEP_TIME_IN_BROADCASTING 10 #define SLEEP_TIME_IN_BROADCASTING 20
/* /*
data broadcasting method data broadcasting method
......
...@@ -160,11 +160,7 @@ void XWorkerCollect::CollectData(XList * sourceList, XModel * target, long sleep ...@@ -160,11 +160,7 @@ void XWorkerCollect::CollectData(XList * sourceList, XModel * target, long sleep
if (finished == tp.count * sourceList->count) if (finished == tp.count * sourceList->count)
break; break;
#ifdef _WIN32 XSleep(sleepTime);
Sleep((DWORD)sleepTime);
#else
sleep((unsigned)sleepTime / 1000);
#endif
} }
/* reset the flags */ /* reset the flags */
...@@ -175,7 +171,7 @@ void XWorkerCollect::CollectData(XList * sourceList, XModel * target, long sleep ...@@ -175,7 +171,7 @@ void XWorkerCollect::CollectData(XList * sourceList, XModel * target, long sleep
/* wrapper of CollectData */ /* wrapper of CollectData */
void XWorkerCollect::Collect(XList * args) void XWorkerCollect::Collect(XList * args)
{ {
fprintf(stderr, "collect data 0\n"); //fprintf(stderr, "collect data 0\n");
XWorkerCollect * collecter = (XWorkerCollect*)args->GetItem(0); XWorkerCollect * collecter = (XWorkerCollect*)args->GetItem(0);
int sourceNum = args->GetItemInt(1); int sourceNum = args->GetItemInt(1);
...@@ -192,7 +188,7 @@ void XWorkerCollect::Collect(XList * args) ...@@ -192,7 +188,7 @@ void XWorkerCollect::Collect(XList * args)
collecter->CollectData(&source, target, SLEEP_TIME_IN_COLLECTING); collecter->CollectData(&source, target, SLEEP_TIME_IN_COLLECTING);
fprintf(stderr, "collect data 1\n"); //fprintf(stderr, "collect data 1\n");
} }
/* /*
...@@ -298,11 +294,7 @@ void XWorkerCollect::CollectOtherData(XList* sourceList, XNNRecord* target, long ...@@ -298,11 +294,7 @@ void XWorkerCollect::CollectOtherData(XList* sourceList, XNNRecord* target, long
if (finished == sourceList->count) if (finished == sourceList->count)
break; break;
#ifdef _WIN32 XSleep(sleepTime);
Sleep((DWORD)sleepTime);
#else
sleep((unsigned)sleepTime / 1000);
#endif
} }
delete[] flags; delete[] flags;
......
...@@ -35,8 +35,8 @@ ...@@ -35,8 +35,8 @@
namespace nts { // namespace nts(NiuTrans.Tensor) namespace nts { // namespace nts(NiuTrans.Tensor)
#define SLEEP_TIME_IN_COLLECTING 10 #define SLEEP_TIME_IN_COLLECTING 20
#define SLEEP_TIME_IN_COLLECTING_OTHER 10 #define SLEEP_TIME_IN_COLLECTING_OTHER 40
/* /*
data collection method data collection method
......
...@@ -86,11 +86,7 @@ void XWorkerUpdate::UpdateModel(XModel * model, XOptimizer * optimizer, long sle ...@@ -86,11 +86,7 @@ void XWorkerUpdate::UpdateModel(XModel * model, XOptimizer * optimizer, long sle
if (finished == params.count) if (finished == params.count)
break; break;
#ifdef _WIN32 XSleep(sleepTime);
Sleep((DWORD)sleepTime);
#else
sleep((unsigned)sleepTime / 1000);
#endif
} }
optimizer->Note(model); optimizer->Note(model);
...@@ -102,7 +98,7 @@ wrapper of UpdateModel ...@@ -102,7 +98,7 @@ wrapper of UpdateModel
*/ */
void XWorkerUpdate::Update(XList * args) void XWorkerUpdate::Update(XList * args)
{ {
fprintf(stderr, "update 0\n"); //fprintf(stderr, "update 0\n");
CheckNTErrors(args != NULL && args->count >= 3, "Illegal argument list!"); CheckNTErrors(args != NULL && args->count >= 3, "Illegal argument list!");
...@@ -112,7 +108,7 @@ void XWorkerUpdate::Update(XList * args) ...@@ -112,7 +108,7 @@ void XWorkerUpdate::Update(XList * args)
updater->UpdateModel(model, optimizer, SLEEP_TIME_IN_MODEL_UPDATE); updater->UpdateModel(model, optimizer, SLEEP_TIME_IN_MODEL_UPDATE);
fprintf(stderr, "update 1\n"); //fprintf(stderr, "update 1\n");
} }
/* /*
......
...@@ -33,7 +33,7 @@ ...@@ -33,7 +33,7 @@
namespace nts { // namespace nts(NiuTrans.Tensor) namespace nts { // namespace nts(NiuTrans.Tensor)
#define SLEEP_TIME_IN_MODEL_UPDATE 10 #define SLEEP_TIME_IN_MODEL_UPDATE 20
/* The class defines the model-update worker */ /* The class defines the model-update worker */
class XWorkerUpdate : public XWorker class XWorkerUpdate : public XWorker
......
Markdown 格式
0%
您添加了 0 到此讨论。请谨慎行事。
请先完成此评论的编辑!
注册 或者 后发表评论