Commit a7f2f309 by xiaotong

make the sample dir in higher level

parent 9b87b785
...@@ -23,12 +23,14 @@ ...@@ -23,12 +23,14 @@
#include "XNet.h" #include "XNet.h"
#include "../tensor/function/FHeader.h" #include "../tensor/function/FHeader.h"
#include "../tensor/core/CHeader.h" #include "../tensor/core/CHeader.h"
#include "../sample/fnnlm/FNNLM.h"
//#define CRTDBG_MAP_ALLOC //#define CRTDBG_MAP_ALLOC
//#include <stdlib.h> //#include <stdlib.h>
//#include <crtdbg.h> //#include <crtdbg.h>
using namespace nts; using namespace nts;
using namespace samplefnnlm;
int main( int argc, const char ** argv ) int main( int argc, const char ** argv )
...@@ -36,10 +38,13 @@ int main( int argc, const char ** argv ) ...@@ -36,10 +38,13 @@ int main( int argc, const char ** argv )
if(argc > 1 && !strcmp(argv[1], "-test")) if(argc > 1 && !strcmp(argv[1], "-test"))
1;//Test(); 1;//Test();
else if(argc > 1 && !strcmp(argv[1], "-fnnlm"))
FNNLMMain(argc - 1, argv + 1);
else{ else{
fprintf(stderr, "Thanks for using NiuTrans.Network! This is a library for building\n"); fprintf(stderr, "Thanks for using NiuTrans.Network! This is a library for building\n");
fprintf(stderr, "neural networks in an easy way. \n\n"); fprintf(stderr, "neural networks in an easy way. \n\n");
fprintf(stderr, "Run this program with \"-test\" for unit test!\n"); fprintf(stderr, "Run this program with \"-test\" for unit test!\n");
fprintf(stderr, "Or run this program with \"-fnnlm\" for sample FNNLM!\n");
} }
XNet net; XNet net;
......
...@@ -27,10 +27,11 @@ ...@@ -27,10 +27,11 @@
#include <math.h> #include <math.h>
#include "FNNLM.h" #include "FNNLM.h"
#include "../../XGlobal.h" #include "../../tensor/XGlobal.h"
#include "../../XUtility.h" #include "../../tensor/XUtility.h"
#include "../../XDevice.h" #include "../../tensor/XDevice.h"
#include "../../function/FHeader.h" #include "../../tensor/function/FHeader.h"
#include "../../network/XNet.h"
namespace samplefnnlm namespace samplefnnlm
{ {
...@@ -50,6 +51,7 @@ float minmax = 0.08F; // range [-p,p] for parameter initializati ...@@ -50,6 +51,7 @@ float minmax = 0.08F; // range [-p,p] for parameter initializati
int sentBatch = 0; // batch size at the sentence level int sentBatch = 0; // batch size at the sentence level
int wordBatch = 1; // batch size at the word level int wordBatch = 1; // batch size at the word level
bool shuffled = false; // shuffled the training data file or not bool shuffled = false; // shuffled the training data file or not
bool autoDiff = false; // indicator of automatic differentiation
void LoadArgs(int argc, const char ** argv, FNNModel &model); void LoadArgs(int argc, const char ** argv, FNNModel &model);
void Init(FNNModel &model); void Init(FNNModel &model);
...@@ -59,7 +61,7 @@ void Clear(FNNModel &model); ...@@ -59,7 +61,7 @@ void Clear(FNNModel &model);
void InitModelTensor1D(XTensor &tensor, int num, FNNModel &model); void InitModelTensor1D(XTensor &tensor, int num, FNNModel &model);
void InitModelTensor2D(XTensor &tensor, int rowNum, int colNum, FNNModel &model); void InitModelTensor2D(XTensor &tensor, int rowNum, int colNum, FNNModel &model);
void Train(const char * train, bool isShuffled, FNNModel &model); void Train(const char * train, bool isShuffled, FNNModel &model);
void Update(FNNModel &model, FNNModel &grad, float epsilon); void Update(FNNModel &model, FNNModel &grad, float epsilon, bool isNodeGrad);
float GetProb(XTensor &output, XTensor &gold, XTensor * wordProbs = NULL); float GetProb(XTensor &output, XTensor &gold, XTensor * wordProbs = NULL);
void Dump(const char * fn, FNNModel &model); void Dump(const char * fn, FNNModel &model);
void Read(const char * fn, FNNModel &model); void Read(const char * fn, FNNModel &model);
...@@ -71,6 +73,8 @@ void MakeWordBatch(XTensor &batch, NGram * ngrams, int ngramNum, int n, int vSiz ...@@ -71,6 +73,8 @@ void MakeWordBatch(XTensor &batch, NGram * ngrams, int ngramNum, int n, int vSiz
void Forward(XTensor inputs[], XTensor &output, FNNModel &model, FNNNet &net); void Forward(XTensor inputs[], XTensor &output, FNNModel &model, FNNNet &net);
void Backward(XTensor inputs[], XTensor &output, XTensor &gold, LOSS_FUNCTION_NAME loss, void Backward(XTensor inputs[], XTensor &output, XTensor &gold, LOSS_FUNCTION_NAME loss,
FNNModel &model, FNNModel &grad, FNNNet &net); FNNModel &model, FNNModel &grad, FNNNet &net);
void FBInOne(XTensor inputs[], XTensor &output, XTensor &gold, LOSS_FUNCTION_NAME loss,
FNNModel &model, XNet &net);
/* /*
entry of the program entry of the program
...@@ -98,6 +102,7 @@ arguments: ...@@ -98,6 +102,7 @@ arguments:
-devid D: the id of the device used -devid D: the id of the device used
-1: GPU, >=0: GPUs -1: GPU, >=0: GPUs
-mempool: use memory pools for memory management -mempool: use memory pools for memory management
-autodiff: use automatic differentiation for training
where S=string, D=integer and F=float. where S=string, D=integer and F=float.
All words in the training and test data files All words in the training and test data files
...@@ -182,6 +187,8 @@ void LoadArgs(int argc, const char ** argv, FNNModel &model) ...@@ -182,6 +187,8 @@ void LoadArgs(int argc, const char ** argv, FNNModel &model)
wordBatch = atoi(argv[i + 1]); wordBatch = atoi(argv[i + 1]);
if(!strcmp(argv[i], "-shuffle")) if(!strcmp(argv[i], "-shuffle"))
shuffled = true; shuffled = true;
if(!strcmp(argv[i], "-autodiff"))
autoDiff = true;
if(!strcmp(argv[i], "-dev") && i + 1 < argc) if(!strcmp(argv[i], "-dev") && i + 1 < argc)
model.devID = atoi(argv[i + 1]); model.devID = atoi(argv[i + 1]);
} }
...@@ -350,6 +357,9 @@ void Train(const char * train, bool isShuffled, FNNModel &model) ...@@ -350,6 +357,9 @@ void Train(const char * train, bool isShuffled, FNNModel &model)
FNNModel grad; FNNModel grad;
Copy(grad, model); Copy(grad, model);
/* XNet for automatic differentiation */
XNet autoDiffer;
double startT = GetClockSec(); double startT = GetClockSec();
/* iterate for a number of epochs */ /* iterate for a number of epochs */
...@@ -380,9 +390,6 @@ void Train(const char * train, bool isShuffled, FNNModel &model) ...@@ -380,9 +390,6 @@ void Train(const char * train, bool isShuffled, FNNModel &model)
/* the gold standard */ /* the gold standard */
XTensor gold; XTensor gold;
/* prepare an empty network for building the fnn */
FNNNet net;
/* make the input tensor for position i */ /* make the input tensor for position i */
for(int i = 0; i < model.n - 1; i++) for(int i = 0; i < model.n - 1; i++)
MakeWordBatch(inputs[i], ngrams, ngramNum, i, model.vSize, model.devID, model.mem); MakeWordBatch(inputs[i], ngrams, ngramNum, i, model.vSize, model.devID, model.mem);
...@@ -390,6 +397,10 @@ void Train(const char * train, bool isShuffled, FNNModel &model) ...@@ -390,6 +397,10 @@ void Train(const char * train, bool isShuffled, FNNModel &model)
/* make the gold tensor */ /* make the gold tensor */
MakeWordBatch(gold, ngrams, ngramNum, model.n - 1, model.vSize, model.devID, model.mem); MakeWordBatch(gold, ngrams, ngramNum, model.n - 1, model.vSize, model.devID, model.mem);
if(!autoDiff){
/* prepare an empty network for building the fnn */
FNNNet net;
/* gradident = 0 */ /* gradident = 0 */
Clear(grad); Clear(grad);
...@@ -400,7 +411,15 @@ void Train(const char * train, bool isShuffled, FNNModel &model) ...@@ -400,7 +411,15 @@ void Train(const char * train, bool isShuffled, FNNModel &model)
Backward(inputs, output, gold, CROSSENTROPY, model, grad, net); Backward(inputs, output, gold, CROSSENTROPY, model, grad, net);
/* update model parameters */ /* update model parameters */
Update(model, grad, learningRate); Update(model, grad, learningRate, false);
}
else{
/* forward + backward process */
FBInOne(inputs, output, gold, CROSSENTROPY, model, autoDiffer);
/* update model parameters */
Update(model, grad, learningRate, true);
}
/* get probabilities */ /* get probabilities */
float prob = GetProb(output, gold); float prob = GetProb(output, gold);
...@@ -442,26 +461,45 @@ update the model parameters using the delta rule ...@@ -442,26 +461,45 @@ update the model parameters using the delta rule
>> model - the model to update >> model - the model to update
>> grad - gradients >> grad - gradients
>> epsilon - learning rate >> epsilon - learning rate
>> isNodeGrad - indicates whether the gradient is associated with the node
*/ */
void Update(FNNModel &model, FNNModel &grad, float epsilon) void Update(FNNModel &model, FNNModel &grad, float epsilon, bool isNodeGrad)
{ {
XList paraList(10); XList paraList(10);
XList gradList(10); XList gradList(10);
paraList.Add(&model.outputW); paraList.Add(&model.outputW);
gradList.Add(&grad.outputW);
paraList.Add(&model.outputB); paraList.Add(&model.outputB);
gradList.Add(&grad.outputB);
for (int i = 0; i < model.hDepth; i++) { for (int i = 0; i < model.hDepth; i++) {
paraList.Add(&model.hiddenW[i]); paraList.Add(&model.hiddenW[i]);
gradList.Add(&grad.hiddenW[i]);
paraList.Add(&model.hiddenB[i]); paraList.Add(&model.hiddenB[i]);
gradList.Add(&grad.hiddenB[i]);
} }
paraList.Add(&model.embeddingW); paraList.Add(&model.embeddingW);
if(!isNodeGrad){
gradList.Add(&grad.outputW);
gradList.Add(&grad.outputB);
for (int i = 0; i < model.hDepth; i++) {
gradList.Add(&grad.hiddenW[i]);
gradList.Add(&grad.hiddenB[i]);
}
;
gradList.Add(&grad.embeddingW); gradList.Add(&grad.embeddingW);
}
else{
paraList.Add(model.outputW.grad);
paraList.Add(&model.outputB.grad);
for (int i = 0; i < model.hDepth; i++) {
paraList.Add(&model.hiddenW[i].grad);
paraList.Add(&model.hiddenB[i].grad);
}
paraList.Add(&model.embeddingW.grad);
}
for (int i = 0; i < paraList.count; i++) { for (int i = 0; i < paraList.count; i++) {
XTensor * para = (XTensor*)paraList.GetItem(i); XTensor * para = (XTensor*)paraList.GetItem(i);
...@@ -864,6 +902,54 @@ void Backward(XTensor inputs[], XTensor &output, XTensor &gold, LOSS_FUNCTION_NA ...@@ -864,6 +902,54 @@ void Backward(XTensor inputs[], XTensor &output, XTensor &gold, LOSS_FUNCTION_NA
} }
/* /*
forward + backward in one procedure
>> inputs - input word representations
>> output - output probability
>> gold - gold standard
>> loss - loss function name
>> model - the fnn model
*/
void FBInOne(XTensor inputs[], XTensor &output, XTensor &gold,
LOSS_FUNCTION_NAME loss, FNNModel &model, XNet &net)
{
int batchSize = gold.GetDim(0);
int n = model.n;
int depth = model.hDepth;
XTensor words;
XTensor embeddingBig;
XTensor hidden;
XTensor b;
XList inputList(n - 1);
for(int i = 0; i < n - 1; i++)
inputList.Add(inputs + i);
/* represent n - 1 words in one tensor */
words = Merge(inputList, 0);
/* word embedding */
embeddingBig = MMul(words, model.embeddingW);
/* input of the first hidden layer */
hidden = Split(embeddingBig, 0, n - 1);
/* hidden layers */
for(int i = 0; i < depth; i++){
b = Unsqueeze(model.hiddenB[i], 1, batchSize);
hidden = MMul(hidden, model.hiddenW) + b;
}
b = Unsqueeze(model.outputB, 1, batchSize);
/* output layer */
output = LogSoftmax(MMul(hidden, model.outputW) + b, 1);
/* automatic differentiation */
net.Backward(output);
}
/*
dump the model to the disk space dump the model to the disk space
>> fn - where to keep the model >> fn - where to keep the model
>> model - the fnn model >> model - the fnn model
......
...@@ -30,9 +30,9 @@ ...@@ -30,9 +30,9 @@
#ifndef __FNNLM_H__ #ifndef __FNNLM_H__
#define __FNNLM_H__ #define __FNNLM_H__
#include "../../XGlobal.h" #include "../../tensor/XGlobal.h"
#include "../../XTensor.h" #include "../../tensor/XTensor.h"
#include "../../core/CHeader.h" #include "../../tensor/core/CHeader.h"
using namespace nts; using namespace nts;
......
...@@ -28,7 +28,6 @@ ...@@ -28,7 +28,6 @@
#include <time.h> #include <time.h>
#include "XTensor.h" #include "XTensor.h"
#include "XDevice.h" #include "XDevice.h"
#include "./sample/fnnlm/FNNLM.h"
#include "./test/Test.h" #include "./test/Test.h"
//#define CRTDBG_MAP_ALLOC //#define CRTDBG_MAP_ALLOC
...@@ -36,7 +35,6 @@ ...@@ -36,7 +35,6 @@
//#include <crtdbg.h> //#include <crtdbg.h>
using namespace nts; using namespace nts;
using namespace samplefnnlm;
void SmallTest(); void SmallTest();
...@@ -52,13 +50,10 @@ int main( int argc, const char ** argv ) ...@@ -52,13 +50,10 @@ int main( int argc, const char ** argv )
if(argc > 1 && !strcmp(argv[1], "-test")) if(argc > 1 && !strcmp(argv[1], "-test"))
Test(); Test();
else if(argc > 1 && !strcmp(argv[1], "-fnnlm"))
FNNLMMain(argc - 1, argv + 1);
else{ else{
fprintf(stderr, "Thanks for using NiuTrans.Tensor! This is a library that eases the\n"); fprintf(stderr, "Thanks for using NiuTrans.Tensor! This is a library that eases the\n");
fprintf(stderr, "use of tensors. All you need is to ... \n\n"); fprintf(stderr, "use of tensors. All you need is to ... \n\n");
fprintf(stderr, "Run this program with \"-test\" for unit test!\n"); fprintf(stderr, "Run this program with \"-test\" for unit test!\n");
fprintf(stderr, "Or run this program with \"-fnnlm\" for sample FNNLM!\n");
} }
//_CrtDumpMemoryLeaks(); //_CrtDumpMemoryLeaks();
......
Markdown 格式
0%
您添加了 0 到此讨论。请谨慎行事。
请先完成此评论的编辑!
注册 或者 后发表评论