add TTrain

883673c6 · xiaotong · 6ec2d28c · 883673c6 · 883673c6 · 883673c6
Commit 883673c6 authored Mar 03, 2021 by xiaotong
--- a/source/Main.cpp
+++ b/source/Main.cpp
@@ -27,6 +27,7 @@
 #include "./tensor/test/Test.h"
 #include "./sample/fnnlm/FNNLM.h"
 #include "./sample/transformer/NMT.h"
+#include "./train/TTrain.h"
 //#define CRTDBG_MAP_ALLOC
 //#include <stdlib.h>
@@ -38,8 +39,10 @@ using namespace nmt;
 int main( int argc, const char ** argv )
 {
-    if(argc > 1 && !strcmp(argv[1], "-test"))
+    if (argc > 1 && !strcmp(argv[1], "-test"))
        Test();
+    else if (argc > 1 && !strcmp(argv[1], "-testtrain"))
+        TestTrain(argc - 1, argv + 1);
    else if(argc > 1 && !strcmp(argv[1], "-fnnlm"))
        FNNLMMain(argc - 1, argv + 1);
    else if(argc > 1 && !strcmp(argv[1], "-t2t"))
@@ -47,7 +50,8 @@ int main( int argc, const char ** argv )
    else{
        fprintf(stderr, "Thanks for using NiuTensor! This is a library for building\n");
        fprintf(stderr, "neural networks in an easy way. \n\n");
-        fprintf(stderr, "Run this program with \"-test\" for unit test!\n");
+        fprintf(stderr, "   Run this program with \"-test\" for unit test!\n");
+        fprintf(stderr, "Or run this program with \"-testtrain\" for test of the trainer!\n");
        fprintf(stderr, "Or run this program with \"-fnnlm\" for sample FNNLM!\n");
        fprintf(stderr, "Or run this program with \"-t2t\" for sample Transformer!\n");
    }

--- a/source/tensor/XConfig.cpp
+++ b/source/tensor/XConfig.cpp
@@ -46,6 +46,18 @@ XConfig::~XConfig()
    delete[] args;
 }
+/* clear it */
+void XConfig::Clear()
+{
+    for (int i = 0; i < n; i++) {
+        delete[] args[i];
+    }
+    delete[] args;
+    n = 0;
+    args = NULL;
+    nReal = 0;
+}
 /* 
 create a config 
 >> myN - number of the input arguments
@@ -238,6 +250,36 @@ float XConfig::GetFloat(const char * name, float defaultP)
    return r;
 }
+/* get item number */
+int XConfig::GetItemNum()
+{
+    return n;
+}
+/* 
+get the item with offset i 
+>> i - offset
+*/
+char * XConfig::GetItem(int i)
+{
+    if (i < n && i >= 0)
+        return args[i];
+    else
+        return NULL;
+}
+/* 
+initialize with another config model 
+>> myConfig - the configure model that we want to copy
+*/
+void XConfig::CreateFromMe(XConfig & myConfig)
+{
+    Clear();
+    for (int i = 0; i < myConfig.GetItemNum(); i++)
+        Add(myConfig.GetItem(i), i);
+}
 /*
 load the value of an argument (in integer)
 >> argc - number of arguments

--- a/source/tensor/XConfig.h
+++ b/source/tensor/XConfig.h
@@ -55,6 +55,9 @@ public:
    /* de-constructor */
    ~XConfig();
+    /* clear it */
+    void Clear();
    /* create a config */
    void Create(const int myN, const char ** myArgs);
@@ -92,6 +95,15 @@ public:
    /* get the value of an argument (in float) */
    float GetFloat(const char * name, float defaultP);
+    /* get item number */
+    int GetItemNum();
+    /* get the item with offset i */
+    char * GetItem(int i);
+    /* initialize with another config model */
+    void CreateFromMe(XConfig &myConfig);
 };
 #define MAX_PARAM_NUM 100

--- a/source/train/TTrain.cpp
+++ b/source/train/TTrain.cpp
+/*
+* NiuTrans.Tensor - an open-source tensor library
+* Copyright (C) 2016-2021
+* Natural Language Processing Lab, Northeastern University
+* and
+* NiuTrans Research
+* All rights reserved.
+*
+* Licensed under the Apache License, Version 2.0 (the "License");
+* you may not use this file except in compliance with the License.
+* You may obtain a copy of the License at
+*
+*   http://www.apache.org/licenses/LICENSE-2.0
+*
+* Unless required by applicable law or agreed to in writing, software
+* distributed under the License is distributed on an "AS IS" BASIS,
+* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+* See the License for the specific language governing permissions and
+* limitations under the License.
+*/
+/*
+* We test XTrain here. It is simple, we design a simple task in that we
+* make the model to predict an integer D (0-100) from three input integers
+* A, B and C (0-100). We generate a number of samples with different values
+* of A, B and C. The gold standard is
+*
+*          D = (int)(sqrt(A * B) + C)/2
+*
+* Our model is a two-layer feed-forward neural network. It can be treated
+* as a classifier rather than a regression model.
+*
+* $Created by: XIAO Tong (xiaotong@mail.neu.edu.cn) 2021-03-03
+*/
+#include "TTrain.h"
+#include "../tensor/core/CHeader.h"
+#include "../tensor/function/FHeader.h"
+namespace nts { // namespace nts(NiuTrans.Tensor)
+/* genreate the training data file */
+void GeneateTTrainData(const char * fileName)
+{
+    FILE * file = fopen(fileName, "wb");
+    CheckNTErrors(file, "Cannot open the file");
+    fprintf(stderr, "Generating data ... ");
+    int sampleNum = MAX_SAMPLE_NUM_IN_TTRAIN;
+    int range = MAX_INT_IN_TTRAIN;
+    fprintf(file, "%d\n", sampleNum);
+    srand(1);
+    for (int i = 0; i < sampleNum; i++) {
+        int A = (int)(((float)rand() / RAND_MAX) * range);
+        int B = (int)(((float)rand() / RAND_MAX) * range);
+        int C = (int)(((float)rand() / RAND_MAX) * range);
+        int D = (int)((sqrt(A * B) + C) / 2);
+        fprintf(file, "%d %d %d %d\n", A, B, C, D);
+    }
+    fprintf(stderr, "%d samples in \"%s\" [done]\n", sampleNum, fileName);
+    fclose(file);
+}
+/* run the test */
+void TestTrain(int argc, const char ** argv)
+{
+    GeneateTTrainData("ttrain.txt");
+    XConfig config;
+    config.Create(argc, argv);
+    TTDataLoader loader;
+    loader.SetFileName("ttrain.txt");
+    loader.SetBatchSize(config.GetInt("batchsize", TT_BATCH_SIZE));
+    TTModel model;
+    model.Init(config, -1);
+    XOptimizer optimizer;
+    optimizer.Init(config);
+    XTrainer trainer;
+    trainer.Run(&config, &loader, &model, &optimizer);
+}
+/*****************************
+* data loader
+******************************/
+/* constructor */
+TTDataLoader::TTDataLoader()
+{
+    fileName = new char[MAX_FILE_NAME_LENGTH];
+    file = NULL;
+    batchSize = TT_BATCH_SIZE;
+}
+/* de-constructor */
+TTDataLoader::~TTDataLoader()
+{
+    delete[] fileName;
+}
+/* set file name */
+void TTDataLoader::SetFileName(const char * myFileName)
+{
+    strcpy(fileName, myFileName);
+}
+/* set batch size */
+void TTDataLoader::SetBatchSize(int myBatchSize)
+{
+    batchSize = myBatchSize;
+}
+/* start the process */
+bool TTDataLoader::Start()
+{
+    FILE * file = fopen(fileName, "wb");
+    CheckNTErrors(file, "Cannot open the file");
+    /* skip the first line */
+    char * line = new char[MAX_SAMPLE_LINE_LENGTH];
+    fgets(line, MAX_SAMPLE_LINE_LENGTH - 1, file);
+    delete[] line;
+    return true;
+}
+/* end the process */
+bool TTDataLoader::End()
+{
+    fclose(file);
+    return true;
+}
+/* get a batch of samples */
+bool TTDataLoader::GetBatch(XList * args)
+{
+    XTensor * input = NULL;
+    XTensor * gold = NULL;
+    XTensor * output = NULL;
+    if (args->count == 0) {
+        input = new XTensor();
+        args->Add(input);
+    }
+    else
+        input = (XTensor*)args->GetItem(0);
+    if (args->count == 1) {
+        output = new XTensor();
+        args->Add(output);
+    }
+    if (args->count == 2) {
+        gold = new XTensor();
+        args->Add(gold);
+    }
+    else
+        gold = (XTensor*)args->GetItem(1);
+    int count = 0;
+    int sampleSize = MAX_SAMPLE_SIZE;
+    char * line = new char[MAX_SAMPLE_LINE_LENGTH];
+    int * inputBatch = new int[batchSize * sampleSize];
+    int * goldBatch = new int[batchSize];
+    int A, B, C, D;
+    while (fgets(line, MAX_SAMPLE_LINE_LENGTH - 1, file)) {
+        if (count++ == batchSize)
+            break;
+        if (sscanf(line, "%d %d %d %d", &A, &B, &C, &D) < 4) {
+            ShowNTErrors("Wrong format in the training file!");
+        }
+        inputBatch[count * 3] = A;
+        inputBatch[count * 3 + 1] = B;
+        inputBatch[count * 3 + 2] = C;
+        goldBatch[count] = D;
+    }
+    InitTensor2D(input, count, 3, X_INT);
+    InitTensor2D(gold, count, 1, X_INT);
+    input->SetData(input, count * 3);
+    gold->SetData(gold, count);
+    delete[] line;
+    delete[] inputBatch;
+    delete[] goldBatch;
+    return true;
+}
+/*****************************
+* the neural model
+******************************/
+/* constructor */
+TTModel::TTModel()
+{
+}
+/* de-constructor */
+TTModel::~TTModel()
+{
+}
+/* config it */
+void TTModel::SetConfig(XConfig &myConfig)
+{
+    config.CreateFromMe(myConfig);
+}
+/* initialize the model */
+void TTModel::Init(XConfig &myConfig, int devID)
+{
+    SetConfig(myConfig);
+    int vSize = MAX_INT_IN_TTRAIN + 1;
+    int eSize = config.GetInt("esize", TT_EMBEDDING_SIZE);
+    int hSize = config.GetInt("hsize", TT_HIDDEN_SIZE);
+    InitTensor2D(&embeddingW, vSize, eSize, X_FLOAT, devID);
+    InitTensor2D(&hiddenW, 3 * eSize, eSize, X_FLOAT, devID);
+    embeddingW.SetDataRand(-0.1F, 0.1F);
+    hiddenW.SetDataRand(-0.1F, 0.1F);
+}
+/* create the model */
+void TTModel::Forward(int devID, XTensor * input, XTensor * output)
+{
+    XTensor embedding;
+    XTensor embeddingCat;
+    XTensor hidden;
+    embedding = Gather(embeddingW, *input);
+    embeddingCat = Merge(embedding, 0, 1);
+    hidden = MMul(embeddingCat, hiddenW);
+    *output = Softmax(hidden, 0);
+}
+/* clear the model */
+void TTModel::Clear()
+{
+    config.Clear();
+}
+/* clone the model */
+XModel * TTModel::Clone(int devID)
+{
+    TTModel * model = new TTModel();
+    model->SetConfig(config);
+    model->Init(config, devID);
+    return model;
+}
+/* run the neural network */
+bool TTModel::RunMe(XList * args)
+{
+    CheckNTErrors(args != NULL && args->count >= 3, "Illegal input arguments!");
+    XTensor * input = (XTensor*)args->GetItem(0);
+    XTensor * output = (XTensor*)args->GetItem(1);
+    XTensor * gold = (XTensor*)args->GetItem(2);
+    XTensor loss;
+    XNet net;
+    Forward(devID, input, output);
+    loss = CrossEntropy(output, gold);
+    net.Backward(loss);
+    return true;
+}
+}
--- a/source/train/TTrain.h
+++ b/source/train/TTrain.h
+/*
+* NiuTrans.Tensor - an open-source tensor library
+* Copyright (C) 2016-2021
+* Natural Language Processing Lab, Northeastern University
+* and
+* NiuTrans Research
+* All rights reserved.
+*
+* Licensed under the Apache License, Version 2.0 (the "License");
+* you may not use this file except in compliance with the License.
+* You may obtain a copy of the License at
+*
+*   http://www.apache.org/licenses/LICENSE-2.0
+*
+* Unless required by applicable law or agreed to in writing, software
+* distributed under the License is distributed on an "AS IS" BASIS,
+* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+* See the License for the specific language governing permissions and
+* limitations under the License.
+*/
+/*
+* We test XTrain here. It is simple, we design a simple task in that we
+* make the model to predict an integer D (0-100) from three input integers 
+* A, B and C (0-100). We generate a number of samples with different values
+* of A, B and C. The gold standard is 
+*     
+*          D = (int)(sqrt(A * B) + C)/2
+* 
+* Our model is a two-layer feed-forward neural network. It can be treated
+* as a classifier rather than a regression model.
+*
+* $Created by: XIAO Tong (xiaotong@mail.neu.edu.cn) 2021-03-03
+* The express train was updated this year. It just takes me two hours and
+* a half from Shenyang to Beijing.
+*/
+#ifndef __TTRAIN_H__
+#define __TTRAIN_H__
+#include <stdio.h>
+#include <stdlib.h>
+#include "XTrainer.h"
+namespace nts { // namespace nts(NiuTrans.Tensor)
+#define MAX_SAMPLE_NUM_IN_TTRAIN 100000
+#define MAX_INT_IN_TTRAIN 100
+#define MAX_SAMPLE_LINE_LENGTH 128
+#define MAX_SAMPLE_SIZE 3
+#define TT_BATCH_SIZE 256
+#define TT_EMBEDDING_SIZE 256
+#define TT_HIDDEN_SIZE 256
+/* genreate the training data file */
+void GeneateTTrainData(const char * fileName);
+/* run the test */
+extern
+void TestTrain(int argc, const char ** argv);
+/* data loader */
+class TTDataLoader : public DataDistributeBase
+{
+protected:
+    /* file name */
+    char * fileName;
+    /* file handle */
+    FILE * file;
+    /* batch size */
+    int batchSize;
+public:
+    /* constructor */
+    TTDataLoader();
+    /* de-constructor */
+    ~TTDataLoader();
+    /* set file name */
+    void SetFileName(const char * myFileName);
+    /* set batch size */
+    void SetBatchSize(int myBatchSize);
+    /* start the process */
+    bool Start();
+    /* end the process */
+    bool End();
+    /* get a batch of samples */
+    bool GetBatch(XList * args);
+};
+/* the model */
+class TTModel : public XModel
+{
+protected:
+    /* device id */
+    int devID;
+    /* configuration */
+    XConfig config;
+    /* embedding matrix of the input */
+    XTensor embeddingW;
+    /* parameter matrix of the hidden layer */
+    XTensor hiddenW;
+public:
+    /* constructor */
+    TTModel();
+    /* de-constructor */
+    ~TTModel();
+    /* config it */
+    void SetConfig(XConfig &myConfig);
+    /* initialize the parameters */
+    void Init(XConfig &myConfig, int devID);
+    /* create the model */
+    void Forward(int devID, XTensor * input, XTensor * output);
+    /* clear the model */
+    void Clear();
+    /* clone the model */
+    XModel * Clone(int devID);
+    /* run the neural network */
+    bool RunMe(XList * args);
+};
+/*  */
+}
+#endif
\ No newline at end of file
--- a/source/train/XOptimizer.cpp
+++ b/source/train/XOptimizer.cpp
@@ -45,7 +45,7 @@ XOptimizer::~XOptimizer()
 initialize the optimizer 
 >> config - the configuration
 */
-void XOptimizer::Init(XConfig * config)
+void XOptimizer::Init(XConfig &config)
 {
 }

--- a/source/train/XOptimizer.h
+++ b/source/train/XOptimizer.h
@@ -57,7 +57,7 @@ public:
    /* initialize the optimizer */
    virtual
-    void Init(XConfig * config);
+    void Init(XConfig &config);
    /* clear the optimizer */
    virtual

--- a/source/train/XTrainer.h
+++ b/source/train/XTrainer.h
@@ -68,9 +68,11 @@ public:
    /* de-constructor */
    ~XTrainer();
+protected:
    /* get the device ids of the jobs */
    void GetDevIDs(XConfig * config, int * ids, int & num, int maxDevNum);
+public:
    /* run the leader (this is the core process) */
    virtual
    void Run(XConfig * config, DataDistributeBase * dataDistributor, 

--- a/source/train/XWorkerJob.cpp
+++ b/source/train/XWorkerJob.cpp
@@ -39,6 +39,11 @@ XWorkerJob::XWorkerJob()
 /* de-constructor */
 XWorkerJob::~XWorkerJob()
 {
+    for (int i = 0; i < inputs.count; i++)
+        delete (XTensor*)inputs[i];
+    for (int i = 0; i < outputs.count; i++)
+        delete (XTensor*)outputs[i];
 }
 /* set the model */