Commit ae990819 by xiaotong

new code for back propagation for fnnlm

parent be38e4e5
......@@ -35,7 +35,6 @@ using namespace samplefnnlm;
int main( int argc, const char ** argv )
{
if(argc > 1 && !strcmp(argv[1], "-test"))
1;//Test();
else if(argc > 1 && !strcmp(argv[1], "-fnnlm"))
......@@ -47,6 +46,8 @@ int main( int argc, const char ** argv )
fprintf(stderr, "Or run this program with \"-fnnlm\" for sample FNNLM!\n");
}
return 0;
XNet net;
XTensor a;
XTensor b;
......
......@@ -143,7 +143,7 @@ void XNet::Backward(XList &roots, XList &golds, LOSS_FUNCTION_NAME loss)
/* back-propagation from output to input */
for(int i = nodes.count - 1; i >= 0; i--){
XTensor * node = (XTensor*)nodes.Get(i);
XTensor * node = (XTensor*)nodes.Get(i);;
if(node->visitMark == NODE_FINISHED)
continue;
......
......@@ -57,7 +57,7 @@ void LoadArgs(int argc, const char ** argv, FNNModel &model);
void Init(FNNModel &model);
void Check(FNNModel &model);
void Copy(FNNModel &tgt, FNNModel &src);
void Clear(FNNModel &model);
void Clear(FNNModel &model, bool isNodeGrad);
void InitModelTensor1D(XTensor &tensor, int num, FNNModel &model);
void InitModelTensor2D(XTensor &tensor, int rowNum, int colNum, FNNModel &model);
void Train(const char * train, bool isShuffled, FNNModel &model);
......@@ -230,16 +230,37 @@ void Copy(FNNModel &tgt, FNNModel &src)
}
}
/* reset model parameters */
void Clear(FNNModel &model)
/*
reset model parameters
>> model - the model whose parameter (gradient) is set to 0
>> isNodeGrad - indicates whether the tensor node keeps the
gradient information
*/
void Clear(FNNModel &model, bool isNodeGrad)
{
if (isNodeGrad) {
if(model.embeddingW.grad != NULL)
model.embeddingW.grad->SetZeroAll();
for (int i = 0; i < MAX_HIDDEN_NUM; i++) {
if(model.hiddenW[i].grad != NULL)
model.hiddenW[i].grad->SetZeroAll();
if(model.hiddenB[i].grad != NULL)
model.hiddenB[i].grad->SetZeroAll();
}
if(model.outputW.grad != NULL)
model.outputW.grad->SetZeroAll();
if(model.outputB.grad != NULL)
model.outputB.grad->SetZeroAll();
}
else {
model.embeddingW.SetZeroAll();
for(int i = 0; i < MAX_HIDDEN_NUM; i++){
for (int i = 0; i < MAX_HIDDEN_NUM; i++) {
model.hiddenW[i].SetZeroAll();
model.hiddenB[i].SetZeroAll();
}
model.outputW.SetZeroAll();
model.outputB.SetZeroAll();
}
}
/*
......@@ -401,7 +422,7 @@ void Train(const char * train, bool isShuffled, FNNModel &model)
FNNNet net;
/* gradident = 0 */
Clear(grad);
Clear(grad, false);
/* forward computation */
Forward(inputs, output, model, net);
......@@ -413,6 +434,9 @@ void Train(const char * train, bool isShuffled, FNNModel &model)
Update(model, grad, learningRate, false);
}
else{
/* gradient = 0 */
Clear(model, true);
/* forward + backward process */
ForwardAutoDiff(inputs, output, model);
......@@ -507,6 +531,9 @@ void Update(FNNModel &model, FNNModel &grad, float epsilon, bool isNodeGrad)
XTensor * para = (XTensor*)paraList.GetItem(i);
XTensor * paraGrad = (XTensor*)gradList.GetItem(i);
//fprintf(stderr, "%d\n", i);
//paraGrad->Dump(stderr, "grad:", 10);
/* the delta rule */
_Sum(para, paraGrad, para, -epsilon);
}
......@@ -936,14 +963,16 @@ void ForwardAutoDiff(XTensor inputs[], XTensor &output, FNNModel &model)
/* hidden layers */
for(int i = 0; i < depth; i++){
b = Unsqueeze(model.hiddenB[i], 1, batchSize);
hidden = MMul(hidden, model.hiddenW) + b;
b = Unsqueeze(model.hiddenB[i], 0, batchSize);
hidden = MMul(hidden, model.hiddenW[i]) + b;
}
b = Unsqueeze(model.outputB, 1, batchSize);
b = Unsqueeze(model.outputB, 0, batchSize);
/* output layer */
output = LogSoftmax(MMul(hidden, model.outputW) + b, 1);
//XLink::ShowNetwork(stderr, &output);
}
/*
......
......@@ -1439,6 +1439,21 @@ void XTensor::Dump(FILE * file, const char * label, const int n, const int verbo
}
/*
dump data to a file
>> tensor - tensor whose data is dumped
>> file - where to domp the data
>> label - label of the tensor
>> n - number of items to dump
>> verbose - verbose level
*/
void XTensor::Dump(const XTensor * tensor, FILE * file, const char * label, const int n, const int verbose)
{
XTensor a(tensor->order, tensor->dimSize, tensor->dataType, tensor->denseRatio, tensor->devID, tensor->mem);
_CopyValues(tensor, &a);
a.Dump(file, label, n, verbose);
}
/*
read data from a file
>> file - where to load the data
>> label - label of the tensor
......@@ -1687,13 +1702,13 @@ void InitTensor(XTensor * tensor,
dims[0] = -abs(dims[0]);
tensor->Resize(myOrder, dims, myDataType, myDenseRatio);
if(myDevID == CURRENT_GPU)
if (myDevID == CURRENT_GPU)
tensor->devID = XDevice::GetGPUDevice();
else
tensor->devID = myDevID;
tensor->Resize(myOrder, dims, myDataType, myDenseRatio);
if(allocated)
XTensor::AllocateData(tensor);
}
......
......@@ -328,6 +328,10 @@ public:
/* dump data to a file */
void Dump(FILE * file, const char * label = NULL, const int n = -1, const int verbose = 0);
/* dump data to a file */
static
void Dump(const XTensor * tensor, FILE * file, const char * label = NULL, const int n = -1, const int verbose = 0);
/* read data from a file */
void Read(FILE * file, const char * label = NULL);
......
Markdown 格式
0%
您添加了 0 到此讨论。请谨慎行事。
请先完成此评论的编辑!
注册 或者 后发表评论