Commit 883673c6 by xiaotong

add TTrain

parent 6ec2d28c
...@@ -27,6 +27,7 @@ ...@@ -27,6 +27,7 @@
#include "./tensor/test/Test.h" #include "./tensor/test/Test.h"
#include "./sample/fnnlm/FNNLM.h" #include "./sample/fnnlm/FNNLM.h"
#include "./sample/transformer/NMT.h" #include "./sample/transformer/NMT.h"
#include "./train/TTrain.h"
//#define CRTDBG_MAP_ALLOC //#define CRTDBG_MAP_ALLOC
//#include <stdlib.h> //#include <stdlib.h>
...@@ -38,8 +39,10 @@ using namespace nmt; ...@@ -38,8 +39,10 @@ using namespace nmt;
int main( int argc, const char ** argv ) int main( int argc, const char ** argv )
{ {
if(argc > 1 && !strcmp(argv[1], "-test")) if (argc > 1 && !strcmp(argv[1], "-test"))
Test(); Test();
else if (argc > 1 && !strcmp(argv[1], "-testtrain"))
TestTrain(argc - 1, argv + 1);
else if(argc > 1 && !strcmp(argv[1], "-fnnlm")) else if(argc > 1 && !strcmp(argv[1], "-fnnlm"))
FNNLMMain(argc - 1, argv + 1); FNNLMMain(argc - 1, argv + 1);
else if(argc > 1 && !strcmp(argv[1], "-t2t")) else if(argc > 1 && !strcmp(argv[1], "-t2t"))
...@@ -47,7 +50,8 @@ int main( int argc, const char ** argv ) ...@@ -47,7 +50,8 @@ int main( int argc, const char ** argv )
else{ else{
fprintf(stderr, "Thanks for using NiuTensor! This is a library for building\n"); fprintf(stderr, "Thanks for using NiuTensor! This is a library for building\n");
fprintf(stderr, "neural networks in an easy way. \n\n"); fprintf(stderr, "neural networks in an easy way. \n\n");
fprintf(stderr, "Run this program with \"-test\" for unit test!\n"); fprintf(stderr, " Run this program with \"-test\" for unit test!\n");
fprintf(stderr, "Or run this program with \"-testtrain\" for test of the trainer!\n");
fprintf(stderr, "Or run this program with \"-fnnlm\" for sample FNNLM!\n"); fprintf(stderr, "Or run this program with \"-fnnlm\" for sample FNNLM!\n");
fprintf(stderr, "Or run this program with \"-t2t\" for sample Transformer!\n"); fprintf(stderr, "Or run this program with \"-t2t\" for sample Transformer!\n");
} }
......
...@@ -46,6 +46,18 @@ XConfig::~XConfig() ...@@ -46,6 +46,18 @@ XConfig::~XConfig()
delete[] args; delete[] args;
} }
/* clear it */
void XConfig::Clear()
{
for (int i = 0; i < n; i++) {
delete[] args[i];
}
delete[] args;
n = 0;
args = NULL;
nReal = 0;
}
/* /*
create a config create a config
>> myN - number of the input arguments >> myN - number of the input arguments
...@@ -238,6 +250,36 @@ float XConfig::GetFloat(const char * name, float defaultP) ...@@ -238,6 +250,36 @@ float XConfig::GetFloat(const char * name, float defaultP)
return r; return r;
} }
/* get item number */
int XConfig::GetItemNum()
{
return n;
}
/*
get the item with offset i
>> i - offset
*/
char * XConfig::GetItem(int i)
{
if (i < n && i >= 0)
return args[i];
else
return NULL;
}
/*
initialize with another config model
>> myConfig - the configure model that we want to copy
*/
void XConfig::CreateFromMe(XConfig & myConfig)
{
Clear();
for (int i = 0; i < myConfig.GetItemNum(); i++)
Add(myConfig.GetItem(i), i);
}
/* /*
load the value of an argument (in integer) load the value of an argument (in integer)
>> argc - number of arguments >> argc - number of arguments
......
...@@ -55,6 +55,9 @@ public: ...@@ -55,6 +55,9 @@ public:
/* de-constructor */ /* de-constructor */
~XConfig(); ~XConfig();
/* clear it */
void Clear();
/* create a config */ /* create a config */
void Create(const int myN, const char ** myArgs); void Create(const int myN, const char ** myArgs);
...@@ -92,6 +95,15 @@ public: ...@@ -92,6 +95,15 @@ public:
/* get the value of an argument (in float) */ /* get the value of an argument (in float) */
float GetFloat(const char * name, float defaultP); float GetFloat(const char * name, float defaultP);
/* get item number */
int GetItemNum();
/* get the item with offset i */
char * GetItem(int i);
/* initialize with another config model */
void CreateFromMe(XConfig &myConfig);
}; };
#define MAX_PARAM_NUM 100 #define MAX_PARAM_NUM 100
......
/*
* NiuTrans.Tensor - an open-source tensor library
* Copyright (C) 2016-2021
* Natural Language Processing Lab, Northeastern University
* and
* NiuTrans Research
* All rights reserved.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
/*
* We test XTrain here. It is simple, we design a simple task in that we
* make the model to predict an integer D (0-100) from three input integers
* A, B and C (0-100). We generate a number of samples with different values
* of A, B and C. The gold standard is
*
* D = (int)(sqrt(A * B) + C)/2
*
* Our model is a two-layer feed-forward neural network. It can be treated
* as a classifier rather than a regression model.
*
* $Created by: XIAO Tong (xiaotong@mail.neu.edu.cn) 2021-03-03
*/
#include "TTrain.h"
#include "../tensor/core/CHeader.h"
#include "../tensor/function/FHeader.h"
namespace nts { // namespace nts(NiuTrans.Tensor)
/* genreate the training data file */
void GeneateTTrainData(const char * fileName)
{
FILE * file = fopen(fileName, "wb");
CheckNTErrors(file, "Cannot open the file");
fprintf(stderr, "Generating data ... ");
int sampleNum = MAX_SAMPLE_NUM_IN_TTRAIN;
int range = MAX_INT_IN_TTRAIN;
fprintf(file, "%d\n", sampleNum);
srand(1);
for (int i = 0; i < sampleNum; i++) {
int A = (int)(((float)rand() / RAND_MAX) * range);
int B = (int)(((float)rand() / RAND_MAX) * range);
int C = (int)(((float)rand() / RAND_MAX) * range);
int D = (int)((sqrt(A * B) + C) / 2);
fprintf(file, "%d %d %d %d\n", A, B, C, D);
}
fprintf(stderr, "%d samples in \"%s\" [done]\n", sampleNum, fileName);
fclose(file);
}
/* run the test */
void TestTrain(int argc, const char ** argv)
{
GeneateTTrainData("ttrain.txt");
XConfig config;
config.Create(argc, argv);
TTDataLoader loader;
loader.SetFileName("ttrain.txt");
loader.SetBatchSize(config.GetInt("batchsize", TT_BATCH_SIZE));
TTModel model;
model.Init(config, -1);
XOptimizer optimizer;
optimizer.Init(config);
XTrainer trainer;
trainer.Run(&config, &loader, &model, &optimizer);
}
/*****************************
* data loader
******************************/
/* constructor */
TTDataLoader::TTDataLoader()
{
fileName = new char[MAX_FILE_NAME_LENGTH];
file = NULL;
batchSize = TT_BATCH_SIZE;
}
/* de-constructor */
TTDataLoader::~TTDataLoader()
{
delete[] fileName;
}
/* set file name */
void TTDataLoader::SetFileName(const char * myFileName)
{
strcpy(fileName, myFileName);
}
/* set batch size */
void TTDataLoader::SetBatchSize(int myBatchSize)
{
batchSize = myBatchSize;
}
/* start the process */
bool TTDataLoader::Start()
{
FILE * file = fopen(fileName, "wb");
CheckNTErrors(file, "Cannot open the file");
/* skip the first line */
char * line = new char[MAX_SAMPLE_LINE_LENGTH];
fgets(line, MAX_SAMPLE_LINE_LENGTH - 1, file);
delete[] line;
return true;
}
/* end the process */
bool TTDataLoader::End()
{
fclose(file);
return true;
}
/* get a batch of samples */
bool TTDataLoader::GetBatch(XList * args)
{
XTensor * input = NULL;
XTensor * gold = NULL;
XTensor * output = NULL;
if (args->count == 0) {
input = new XTensor();
args->Add(input);
}
else
input = (XTensor*)args->GetItem(0);
if (args->count == 1) {
output = new XTensor();
args->Add(output);
}
if (args->count == 2) {
gold = new XTensor();
args->Add(gold);
}
else
gold = (XTensor*)args->GetItem(1);
int count = 0;
int sampleSize = MAX_SAMPLE_SIZE;
char * line = new char[MAX_SAMPLE_LINE_LENGTH];
int * inputBatch = new int[batchSize * sampleSize];
int * goldBatch = new int[batchSize];
int A, B, C, D;
while (fgets(line, MAX_SAMPLE_LINE_LENGTH - 1, file)) {
if (count++ == batchSize)
break;
if (sscanf(line, "%d %d %d %d", &A, &B, &C, &D) < 4) {
ShowNTErrors("Wrong format in the training file!");
}
inputBatch[count * 3] = A;
inputBatch[count * 3 + 1] = B;
inputBatch[count * 3 + 2] = C;
goldBatch[count] = D;
}
InitTensor2D(input, count, 3, X_INT);
InitTensor2D(gold, count, 1, X_INT);
input->SetData(input, count * 3);
gold->SetData(gold, count);
delete[] line;
delete[] inputBatch;
delete[] goldBatch;
return true;
}
/*****************************
* the neural model
******************************/
/* constructor */
TTModel::TTModel()
{
}
/* de-constructor */
TTModel::~TTModel()
{
}
/* config it */
void TTModel::SetConfig(XConfig &myConfig)
{
config.CreateFromMe(myConfig);
}
/* initialize the model */
void TTModel::Init(XConfig &myConfig, int devID)
{
SetConfig(myConfig);
int vSize = MAX_INT_IN_TTRAIN + 1;
int eSize = config.GetInt("esize", TT_EMBEDDING_SIZE);
int hSize = config.GetInt("hsize", TT_HIDDEN_SIZE);
InitTensor2D(&embeddingW, vSize, eSize, X_FLOAT, devID);
InitTensor2D(&hiddenW, 3 * eSize, eSize, X_FLOAT, devID);
embeddingW.SetDataRand(-0.1F, 0.1F);
hiddenW.SetDataRand(-0.1F, 0.1F);
}
/* create the model */
void TTModel::Forward(int devID, XTensor * input, XTensor * output)
{
XTensor embedding;
XTensor embeddingCat;
XTensor hidden;
embedding = Gather(embeddingW, *input);
embeddingCat = Merge(embedding, 0, 1);
hidden = MMul(embeddingCat, hiddenW);
*output = Softmax(hidden, 0);
}
/* clear the model */
void TTModel::Clear()
{
config.Clear();
}
/* clone the model */
XModel * TTModel::Clone(int devID)
{
TTModel * model = new TTModel();
model->SetConfig(config);
model->Init(config, devID);
return model;
}
/* run the neural network */
bool TTModel::RunMe(XList * args)
{
CheckNTErrors(args != NULL && args->count >= 3, "Illegal input arguments!");
XTensor * input = (XTensor*)args->GetItem(0);
XTensor * output = (XTensor*)args->GetItem(1);
XTensor * gold = (XTensor*)args->GetItem(2);
XTensor loss;
XNet net;
Forward(devID, input, output);
loss = CrossEntropy(output, gold);
net.Backward(loss);
return true;
}
}
/*
* NiuTrans.Tensor - an open-source tensor library
* Copyright (C) 2016-2021
* Natural Language Processing Lab, Northeastern University
* and
* NiuTrans Research
* All rights reserved.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
/*
* We test XTrain here. It is simple, we design a simple task in that we
* make the model to predict an integer D (0-100) from three input integers
* A, B and C (0-100). We generate a number of samples with different values
* of A, B and C. The gold standard is
*
* D = (int)(sqrt(A * B) + C)/2
*
* Our model is a two-layer feed-forward neural network. It can be treated
* as a classifier rather than a regression model.
*
* $Created by: XIAO Tong (xiaotong@mail.neu.edu.cn) 2021-03-03
* The express train was updated this year. It just takes me two hours and
* a half from Shenyang to Beijing.
*/
#ifndef __TTRAIN_H__
#define __TTRAIN_H__
#include <stdio.h>
#include <stdlib.h>
#include "XTrainer.h"
namespace nts { // namespace nts(NiuTrans.Tensor)
#define MAX_SAMPLE_NUM_IN_TTRAIN 100000
#define MAX_INT_IN_TTRAIN 100
#define MAX_SAMPLE_LINE_LENGTH 128
#define MAX_SAMPLE_SIZE 3
#define TT_BATCH_SIZE 256
#define TT_EMBEDDING_SIZE 256
#define TT_HIDDEN_SIZE 256
/* genreate the training data file */
void GeneateTTrainData(const char * fileName);
/* run the test */
extern
void TestTrain(int argc, const char ** argv);
/* data loader */
class TTDataLoader : public DataDistributeBase
{
protected:
/* file name */
char * fileName;
/* file handle */
FILE * file;
/* batch size */
int batchSize;
public:
/* constructor */
TTDataLoader();
/* de-constructor */
~TTDataLoader();
/* set file name */
void SetFileName(const char * myFileName);
/* set batch size */
void SetBatchSize(int myBatchSize);
/* start the process */
bool Start();
/* end the process */
bool End();
/* get a batch of samples */
bool GetBatch(XList * args);
};
/* the model */
class TTModel : public XModel
{
protected:
/* device id */
int devID;
/* configuration */
XConfig config;
/* embedding matrix of the input */
XTensor embeddingW;
/* parameter matrix of the hidden layer */
XTensor hiddenW;
public:
/* constructor */
TTModel();
/* de-constructor */
~TTModel();
/* config it */
void SetConfig(XConfig &myConfig);
/* initialize the parameters */
void Init(XConfig &myConfig, int devID);
/* create the model */
void Forward(int devID, XTensor * input, XTensor * output);
/* clear the model */
void Clear();
/* clone the model */
XModel * Clone(int devID);
/* run the neural network */
bool RunMe(XList * args);
};
/* */
}
#endif
\ No newline at end of file
...@@ -45,7 +45,7 @@ XOptimizer::~XOptimizer() ...@@ -45,7 +45,7 @@ XOptimizer::~XOptimizer()
initialize the optimizer initialize the optimizer
>> config - the configuration >> config - the configuration
*/ */
void XOptimizer::Init(XConfig * config) void XOptimizer::Init(XConfig &config)
{ {
} }
......
...@@ -57,7 +57,7 @@ public: ...@@ -57,7 +57,7 @@ public:
/* initialize the optimizer */ /* initialize the optimizer */
virtual virtual
void Init(XConfig * config); void Init(XConfig &config);
/* clear the optimizer */ /* clear the optimizer */
virtual virtual
......
...@@ -68,9 +68,11 @@ public: ...@@ -68,9 +68,11 @@ public:
/* de-constructor */ /* de-constructor */
~XTrainer(); ~XTrainer();
protected:
/* get the device ids of the jobs */ /* get the device ids of the jobs */
void GetDevIDs(XConfig * config, int * ids, int & num, int maxDevNum); void GetDevIDs(XConfig * config, int * ids, int & num, int maxDevNum);
public:
/* run the leader (this is the core process) */ /* run the leader (this is the core process) */
virtual virtual
void Run(XConfig * config, DataDistributeBase * dataDistributor, void Run(XConfig * config, DataDistributeBase * dataDistributor,
......
...@@ -39,6 +39,11 @@ XWorkerJob::XWorkerJob() ...@@ -39,6 +39,11 @@ XWorkerJob::XWorkerJob()
/* de-constructor */ /* de-constructor */
XWorkerJob::~XWorkerJob() XWorkerJob::~XWorkerJob()
{ {
for (int i = 0; i < inputs.count; i++)
delete (XTensor*)inputs[i];
for (int i = 0; i < outputs.count; i++)
delete (XTensor*)outputs[i];
} }
/* set the model */ /* set the model */
......
Markdown 格式
0%
您添加了 0 到此讨论。请谨慎行事。
请先完成此评论的编辑!
注册 或者 后发表评论