make the sample dir in higher level

a7f2f309 · xiaotong · 9b87b785 · a7f2f309 · a7f2f309 · a7f2f309
Commit a7f2f309 authored Jul 19, 2018 by xiaotong
--- a/source/network/Main.cpp
+++ b/source/network/Main.cpp
@@ -23,12 +23,14 @@
 #include "XNet.h"
 #include "../tensor/function/FHeader.h"
 #include "../tensor/core/CHeader.h"
+#include "../sample/fnnlm/FNNLM.h"
 //#define CRTDBG_MAP_ALLOC
 //#include <stdlib.h>
 //#include <crtdbg.h>
 using namespace nts;
+using namespace samplefnnlm;
 int main( int argc, const char ** argv )
@@ -36,10 +38,13 @@ int main( int argc, const char ** argv )
    if(argc > 1 && !strcmp(argv[1], "-test"))
        1;//Test();
+    else if(argc > 1 && !strcmp(argv[1], "-fnnlm"))
+        FNNLMMain(argc - 1, argv + 1);
    else{
        fprintf(stderr, "Thanks for using NiuTrans.Network! This is a library for building\n");
        fprintf(stderr, "neural networks in an easy way. \n\n");
        fprintf(stderr, "Run this program with \"-test\" for unit test!\n");
+        fprintf(stderr, "Or run this program with \"-fnnlm\" for sample FNNLM!\n");
    }
    XNet net;

--- a/source/tensor/sample/fnnlm/FNNLM.cpp
+++ b/source/tensor/sample/fnnlm/FNNLM.cpp
@@ -27,10 +27,11 @@
 #include <math.h>
 #include "FNNLM.h"
-#include "../../XGlobal.h"
+#include "../../tensor/XGlobal.h"
-#include "../../XUtility.h"
+#include "../../tensor/XUtility.h"
-#include "../../XDevice.h"
+#include "../../tensor/XDevice.h"
-#include "../../function/FHeader.h"
+#include "../../tensor/function/FHeader.h"
+#include "../../network/XNet.h"
 namespace samplefnnlm
 {
@@ -50,6 +51,7 @@ float minmax = 0.08F;                 // range [-p,p] for parameter initializati
 int sentBatch = 0;                    // batch size at the sentence level
 int wordBatch = 1;                    // batch size at the word level
 bool shuffled = false;                // shuffled the training data file or not
+bool autoDiff = false;                // indicator of automatic differentiation
 void LoadArgs(int argc, const char ** argv, FNNModel &model);
 void Init(FNNModel &model);
@@ -59,7 +61,7 @@ void Clear(FNNModel &model);
 void InitModelTensor1D(XTensor &tensor, int num, FNNModel &model);
 void InitModelTensor2D(XTensor &tensor, int rowNum, int colNum, FNNModel &model);
 void Train(const char * train, bool isShuffled, FNNModel &model);
-void Update(FNNModel &model, FNNModel &grad, float epsilon);
+void Update(FNNModel &model, FNNModel &grad, float epsilon, bool isNodeGrad);
 float GetProb(XTensor &output, XTensor &gold, XTensor * wordProbs = NULL);
 void Dump(const char * fn, FNNModel &model);
 void Read(const char * fn, FNNModel &model);
@@ -71,6 +73,8 @@ void MakeWordBatch(XTensor &batch, NGram * ngrams, int ngramNum, int n, int vSiz
 void Forward(XTensor inputs[], XTensor &output, FNNModel &model, FNNNet &net);
 void Backward(XTensor inputs[], XTensor &output, XTensor &gold, LOSS_FUNCTION_NAME loss, 
              FNNModel &model, FNNModel &grad, FNNNet &net);
+void FBInOne(XTensor inputs[], XTensor &output, XTensor &gold, LOSS_FUNCTION_NAME loss, 
+             FNNModel &model, XNet &net);
 /* 
 entry of the program 
@@ -98,6 +102,7 @@ arguments:
 -devid D: the id of the device used
           -1: GPU, >=0: GPUs
 -mempool: use memory pools for memory management
+ -autodiff: use automatic differentiation for training
 where S=string, D=integer and F=float.
 All words in the training and test data files
@@ -182,6 +187,8 @@ void LoadArgs(int argc, const char ** argv, FNNModel &model)
            wordBatch = atoi(argv[i + 1]);
        if(!strcmp(argv[i], "-shuffle"))
            shuffled = true;
+        if(!strcmp(argv[i], "-autodiff"))
+            autoDiff = true;
        if(!strcmp(argv[i], "-dev") && i + 1 < argc)
            model.devID = atoi(argv[i + 1]);
    }
@@ -350,6 +357,9 @@ void Train(const char * train, bool isShuffled, FNNModel &model)
    FNNModel grad;
    Copy(grad, model);
+    /* XNet for automatic differentiation */
+    XNet autoDiffer;
    double startT = GetClockSec();
    /* iterate for a number of epochs */
@@ -380,9 +390,6 @@ void Train(const char * train, bool isShuffled, FNNModel &model)
            /* the gold standard */
            XTensor gold;
-            /* prepare an empty network for building the fnn */
-            FNNNet net;
            /* make the input tensor for position i */
            for(int i = 0; i < model.n - 1; i++)
                MakeWordBatch(inputs[i], ngrams, ngramNum, i, model.vSize, model.devID, model.mem);
@@ -390,6 +397,10 @@ void Train(const char * train, bool isShuffled, FNNModel &model)
            /* make the gold tensor */
            MakeWordBatch(gold, ngrams, ngramNum, model.n - 1, model.vSize, model.devID, model.mem);
+            if(!autoDiff){
+                /* prepare an empty network for building the fnn */
+                FNNNet net;
                /* gradident = 0 */
                Clear(grad);
@@ -400,7 +411,15 @@ void Train(const char * train, bool isShuffled, FNNModel &model)
                Backward(inputs, output, gold, CROSSENTROPY, model, grad, net);
                /* update model parameters */
-            Update(model, grad, learningRate);
+                Update(model, grad, learningRate, false);
+            }
+            else{
+                /* forward + backward process */
+                FBInOne(inputs, output, gold, CROSSENTROPY, model, autoDiffer);
+                /* update model parameters */
+                Update(model, grad, learningRate, true);
+            }
            /* get probabilities */
            float prob = GetProb(output, gold);
@@ -442,26 +461,45 @@ update the model parameters using the delta rule
 >> model - the model to update
 >> grad - gradients
 >> epsilon - learning rate
+>> isNodeGrad - indicates whether the gradient is associated with the node
 */
-void Update(FNNModel &model, FNNModel &grad, float epsilon)
+void Update(FNNModel &model, FNNModel &grad, float epsilon, bool isNodeGrad)
 {
    XList paraList(10);
    XList gradList(10);
    paraList.Add(&model.outputW);
-    gradList.Add(&grad.outputW);
    paraList.Add(&model.outputB);
-    gradList.Add(&grad.outputB);
    for (int i = 0; i < model.hDepth; i++) {
        paraList.Add(&model.hiddenW[i]);
-        gradList.Add(&grad.hiddenW[i]);
        paraList.Add(&model.hiddenB[i]);
-        gradList.Add(&grad.hiddenB[i]);
    }
    paraList.Add(&model.embeddingW);
+    if(!isNodeGrad){
+        gradList.Add(&grad.outputW);
+        gradList.Add(&grad.outputB);
+        for (int i = 0; i < model.hDepth; i++) {
+            gradList.Add(&grad.hiddenW[i]);
+            gradList.Add(&grad.hiddenB[i]);
+        }
+;
        gradList.Add(&grad.embeddingW);
+    }
+    else{
+        paraList.Add(model.outputW.grad);
+        paraList.Add(&model.outputB.grad);
+        for (int i = 0; i < model.hDepth; i++) {
+            paraList.Add(&model.hiddenW[i].grad);
+            paraList.Add(&model.hiddenB[i].grad);
+        }
+        paraList.Add(&model.embeddingW.grad);
+    }
    for (int i = 0; i < paraList.count; i++) {
        XTensor * para = (XTensor*)paraList.GetItem(i);
@@ -864,6 +902,54 @@ void Backward(XTensor inputs[], XTensor &output, XTensor &gold, LOSS_FUNCTION_NA
 }
 /*
+forward + backward in one procedure
+>> inputs - input word representations
+>> output - output probability
+>> gold - gold standard
+>> loss - loss function name
+>> model - the fnn model
+*/
+void FBInOne(XTensor inputs[], XTensor &output, XTensor &gold, 
+             LOSS_FUNCTION_NAME loss, FNNModel &model, XNet &net)
+{
+    int batchSize = gold.GetDim(0);
+    int n = model.n;
+    int depth = model.hDepth;
+    XTensor words;
+    XTensor embeddingBig;
+    XTensor hidden;
+    XTensor b;
+    XList inputList(n - 1);
+    for(int i = 0; i < n - 1; i++)
+        inputList.Add(inputs + i);
+    /* represent n - 1 words in one tensor */
+    words = Merge(inputList, 0);
+    /* word embedding */
+    embeddingBig = MMul(words, model.embeddingW);
+    /* input of the first hidden layer */
+    hidden = Split(embeddingBig, 0, n - 1);
+    /* hidden layers */
+    for(int i = 0; i < depth; i++){
+        b = Unsqueeze(model.hiddenB[i], 1, batchSize);
+        hidden = MMul(hidden, model.hiddenW) + b;
+    }
+    b = Unsqueeze(model.outputB, 1, batchSize);
+    /* output layer */
+    output = LogSoftmax(MMul(hidden, model.outputW) + b, 1);
+    /* automatic differentiation */
+    net.Backward(output);
+}
+/* 
 dump the model to the disk space
 >> fn - where to keep the model
 >> model - the fnn model

--- a/source/tensor/sample/fnnlm/FNNLM.h
+++ b/source/tensor/sample/fnnlm/FNNLM.h
@@ -30,9 +30,9 @@
 #ifndef __FNNLM_H__
 #define __FNNLM_H__
-#include "../../XGlobal.h"
+#include "../../tensor/XGlobal.h"
-#include "../../XTensor.h"
+#include "../../tensor/XTensor.h"
-#include "../../core/CHeader.h"
+#include "../../tensor/core/CHeader.h"
 using namespace nts;

--- a/source/tensor/Main.cpp
+++ b/source/tensor/Main.cpp
@@ -28,7 +28,6 @@
 #include <time.h>
 #include "XTensor.h"
 #include "XDevice.h"
-#include "./sample/fnnlm/FNNLM.h"
 #include "./test/Test.h"
 //#define CRTDBG_MAP_ALLOC
@@ -36,7 +35,6 @@
 //#include <crtdbg.h> 
 using namespace nts;
-using namespace samplefnnlm;
 void SmallTest();
@@ -52,13 +50,10 @@ int main( int argc, const char ** argv )
    if(argc > 1 && !strcmp(argv[1], "-test"))
        Test();
-    else if(argc > 1 && !strcmp(argv[1], "-fnnlm"))
-        FNNLMMain(argc - 1, argv + 1);
    else{
        fprintf(stderr, "Thanks for using NiuTrans.Tensor! This is a library that eases the\n");
        fprintf(stderr, "use of tensors. All you need is to ... \n\n");
        fprintf(stderr, "Run this program with \"-test\" for unit test!\n");
-        fprintf(stderr, "Or run this program with \"-fnnlm\" for sample FNNLM!\n");
    }
    //_CrtDumpMemoryLeaks();