Commit a7f2f309 by xiaotong

make the sample dir in higher level

parent 9b87b785
......@@ -23,12 +23,14 @@
#include "XNet.h"
#include "../tensor/function/FHeader.h"
#include "../tensor/core/CHeader.h"
#include "../sample/fnnlm/FNNLM.h"
//#define CRTDBG_MAP_ALLOC
//#include <stdlib.h>
//#include <crtdbg.h>
using namespace nts;
using namespace samplefnnlm;
int main( int argc, const char ** argv )
......@@ -36,10 +38,13 @@ int main( int argc, const char ** argv )
if(argc > 1 && !strcmp(argv[1], "-test"))
1;//Test();
else if(argc > 1 && !strcmp(argv[1], "-fnnlm"))
FNNLMMain(argc - 1, argv + 1);
else{
fprintf(stderr, "Thanks for using NiuTrans.Network! This is a library for building\n");
fprintf(stderr, "neural networks in an easy way. \n\n");
fprintf(stderr, "Run this program with \"-test\" for unit test!\n");
fprintf(stderr, "Or run this program with \"-fnnlm\" for sample FNNLM!\n");
}
XNet net;
......
......@@ -27,10 +27,11 @@
#include <math.h>
#include "FNNLM.h"
#include "../../XGlobal.h"
#include "../../XUtility.h"
#include "../../XDevice.h"
#include "../../function/FHeader.h"
#include "../../tensor/XGlobal.h"
#include "../../tensor/XUtility.h"
#include "../../tensor/XDevice.h"
#include "../../tensor/function/FHeader.h"
#include "../../network/XNet.h"
namespace samplefnnlm
{
......@@ -50,6 +51,7 @@ float minmax = 0.08F; // range [-p,p] for parameter initializati
int sentBatch = 0; // batch size at the sentence level
int wordBatch = 1; // batch size at the word level
bool shuffled = false; // shuffled the training data file or not
bool autoDiff = false; // indicator of automatic differentiation
void LoadArgs(int argc, const char ** argv, FNNModel &model);
void Init(FNNModel &model);
......@@ -59,7 +61,7 @@ void Clear(FNNModel &model);
void InitModelTensor1D(XTensor &tensor, int num, FNNModel &model);
void InitModelTensor2D(XTensor &tensor, int rowNum, int colNum, FNNModel &model);
void Train(const char * train, bool isShuffled, FNNModel &model);
void Update(FNNModel &model, FNNModel &grad, float epsilon);
void Update(FNNModel &model, FNNModel &grad, float epsilon, bool isNodeGrad);
float GetProb(XTensor &output, XTensor &gold, XTensor * wordProbs = NULL);
void Dump(const char * fn, FNNModel &model);
void Read(const char * fn, FNNModel &model);
......@@ -71,6 +73,8 @@ void MakeWordBatch(XTensor &batch, NGram * ngrams, int ngramNum, int n, int vSiz
void Forward(XTensor inputs[], XTensor &output, FNNModel &model, FNNNet &net);
void Backward(XTensor inputs[], XTensor &output, XTensor &gold, LOSS_FUNCTION_NAME loss,
FNNModel &model, FNNModel &grad, FNNNet &net);
void FBInOne(XTensor inputs[], XTensor &output, XTensor &gold, LOSS_FUNCTION_NAME loss,
FNNModel &model, XNet &net);
/*
entry of the program
......@@ -98,6 +102,7 @@ arguments:
-devid D: the id of the device used
-1: GPU, >=0: GPUs
-mempool: use memory pools for memory management
-autodiff: use automatic differentiation for training
where S=string, D=integer and F=float.
All words in the training and test data files
......@@ -182,6 +187,8 @@ void LoadArgs(int argc, const char ** argv, FNNModel &model)
wordBatch = atoi(argv[i + 1]);
if(!strcmp(argv[i], "-shuffle"))
shuffled = true;
if(!strcmp(argv[i], "-autodiff"))
autoDiff = true;
if(!strcmp(argv[i], "-dev") && i + 1 < argc)
model.devID = atoi(argv[i + 1]);
}
......@@ -350,6 +357,9 @@ void Train(const char * train, bool isShuffled, FNNModel &model)
FNNModel grad;
Copy(grad, model);
/* XNet for automatic differentiation */
XNet autoDiffer;
double startT = GetClockSec();
/* iterate for a number of epochs */
......@@ -380,9 +390,6 @@ void Train(const char * train, bool isShuffled, FNNModel &model)
/* the gold standard */
XTensor gold;
/* prepare an empty network for building the fnn */
FNNNet net;
/* make the input tensor for position i */
for(int i = 0; i < model.n - 1; i++)
MakeWordBatch(inputs[i], ngrams, ngramNum, i, model.vSize, model.devID, model.mem);
......@@ -390,6 +397,10 @@ void Train(const char * train, bool isShuffled, FNNModel &model)
/* make the gold tensor */
MakeWordBatch(gold, ngrams, ngramNum, model.n - 1, model.vSize, model.devID, model.mem);
if(!autoDiff){
/* prepare an empty network for building the fnn */
FNNNet net;
/* gradident = 0 */
Clear(grad);
......@@ -400,7 +411,15 @@ void Train(const char * train, bool isShuffled, FNNModel &model)
Backward(inputs, output, gold, CROSSENTROPY, model, grad, net);
/* update model parameters */
Update(model, grad, learningRate);
Update(model, grad, learningRate, false);
}
else{
/* forward + backward process */
FBInOne(inputs, output, gold, CROSSENTROPY, model, autoDiffer);
/* update model parameters */
Update(model, grad, learningRate, true);
}
/* get probabilities */
float prob = GetProb(output, gold);
......@@ -442,26 +461,45 @@ update the model parameters using the delta rule
>> model - the model to update
>> grad - gradients
>> epsilon - learning rate
>> isNodeGrad - indicates whether the gradient is associated with the node
*/
void Update(FNNModel &model, FNNModel &grad, float epsilon)
void Update(FNNModel &model, FNNModel &grad, float epsilon, bool isNodeGrad)
{
XList paraList(10);
XList gradList(10);
paraList.Add(&model.outputW);
gradList.Add(&grad.outputW);
paraList.Add(&model.outputB);
gradList.Add(&grad.outputB);
for (int i = 0; i < model.hDepth; i++) {
paraList.Add(&model.hiddenW[i]);
gradList.Add(&grad.hiddenW[i]);
paraList.Add(&model.hiddenB[i]);
gradList.Add(&grad.hiddenB[i]);
}
paraList.Add(&model.embeddingW);
if(!isNodeGrad){
gradList.Add(&grad.outputW);
gradList.Add(&grad.outputB);
for (int i = 0; i < model.hDepth; i++) {
gradList.Add(&grad.hiddenW[i]);
gradList.Add(&grad.hiddenB[i]);
}
;
gradList.Add(&grad.embeddingW);
}
else{
paraList.Add(model.outputW.grad);
paraList.Add(&model.outputB.grad);
for (int i = 0; i < model.hDepth; i++) {
paraList.Add(&model.hiddenW[i].grad);
paraList.Add(&model.hiddenB[i].grad);
}
paraList.Add(&model.embeddingW.grad);
}
for (int i = 0; i < paraList.count; i++) {
XTensor * para = (XTensor*)paraList.GetItem(i);
......@@ -864,6 +902,54 @@ void Backward(XTensor inputs[], XTensor &output, XTensor &gold, LOSS_FUNCTION_NA
}
/*
forward + backward in one procedure
>> inputs - input word representations
>> output - output probability
>> gold - gold standard
>> loss - loss function name
>> model - the fnn model
*/
void FBInOne(XTensor inputs[], XTensor &output, XTensor &gold,
LOSS_FUNCTION_NAME loss, FNNModel &model, XNet &net)
{
int batchSize = gold.GetDim(0);
int n = model.n;
int depth = model.hDepth;
XTensor words;
XTensor embeddingBig;
XTensor hidden;
XTensor b;
XList inputList(n - 1);
for(int i = 0; i < n - 1; i++)
inputList.Add(inputs + i);
/* represent n - 1 words in one tensor */
words = Merge(inputList, 0);
/* word embedding */
embeddingBig = MMul(words, model.embeddingW);
/* input of the first hidden layer */
hidden = Split(embeddingBig, 0, n - 1);
/* hidden layers */
for(int i = 0; i < depth; i++){
b = Unsqueeze(model.hiddenB[i], 1, batchSize);
hidden = MMul(hidden, model.hiddenW) + b;
}
b = Unsqueeze(model.outputB, 1, batchSize);
/* output layer */
output = LogSoftmax(MMul(hidden, model.outputW) + b, 1);
/* automatic differentiation */
net.Backward(output);
}
/*
dump the model to the disk space
>> fn - where to keep the model
>> model - the fnn model
......
......@@ -30,9 +30,9 @@
#ifndef __FNNLM_H__
#define __FNNLM_H__
#include "../../XGlobal.h"
#include "../../XTensor.h"
#include "../../core/CHeader.h"
#include "../../tensor/XGlobal.h"
#include "../../tensor/XTensor.h"
#include "../../tensor/core/CHeader.h"
using namespace nts;
......
......@@ -28,7 +28,6 @@
#include <time.h>
#include "XTensor.h"
#include "XDevice.h"
#include "./sample/fnnlm/FNNLM.h"
#include "./test/Test.h"
//#define CRTDBG_MAP_ALLOC
......@@ -36,7 +35,6 @@
//#include <crtdbg.h>
using namespace nts;
using namespace samplefnnlm;
void SmallTest();
......@@ -52,13 +50,10 @@ int main( int argc, const char ** argv )
if(argc > 1 && !strcmp(argv[1], "-test"))
Test();
else if(argc > 1 && !strcmp(argv[1], "-fnnlm"))
FNNLMMain(argc - 1, argv + 1);
else{
fprintf(stderr, "Thanks for using NiuTrans.Tensor! This is a library that eases the\n");
fprintf(stderr, "use of tensors. All you need is to ... \n\n");
fprintf(stderr, "Run this program with \"-test\" for unit test!\n");
fprintf(stderr, "Or run this program with \"-fnnlm\" for sample FNNLM!\n");
}
//_CrtDumpMemoryLeaks();
......
Markdown 格式
0%
您添加了 0 到此讨论。请谨慎行事。
请先完成此评论的编辑!
注册 或者 后发表评论