Commit 37b7e09b by xiaotong

fix the bug in dimension setting in the back propagation of Merge

parent 9e5887dd
......@@ -71,9 +71,11 @@ dE/da = split(dE/dc)
void XShapeGrad::GradMerge(XTensor * node)
{
XLink &income = node->income;
CheckNTErrors(income.tailNum == 0, "Wrong input tensor number for MERGE!");
XTensor * input = income.tails[0];
CheckNTErrors(income.tailNum == 1, "Wrong input tensor number for MERGE!");
CheckNTErrors(node->order == input->order - 1, "wrong tensor orders!");
int whereToMerge = income.GetParamInt(0);
int leadDim = income.GetParamInt(1);
......@@ -99,7 +101,7 @@ void XShapeGrad::GradMerge(XTensor * node)
input->devID, input->mem);
dims[whereToMerge - leadDim] *= dims[0];
XTensor gradNodeSmall(node->order - leadDim, dims,
XTensor gradNodeSmall(node->order - leadDim, dims + leadDim + 1,
node->dataType, node->denseRatio,
node->devID, node->mem);
......@@ -109,7 +111,7 @@ void XShapeGrad::GradMerge(XTensor * node)
for(int i = 0; i < blockNum; i++){
gradNodeSmall.data = (char*)node->grad->data + i * blockSize;
gradInputSmall.data = (char*)input->grad->data + i * blockSize;
_Split(&gradNodeSmall, &gradInputSmall, whereToMerge - leadDim, input->dimSize[leadDim]);
_Split(&gradNodeSmall, &gradInputSmall, whereToMerge - leadDim - 1, input->dimSize[leadDim]);
}
}
......@@ -123,7 +125,7 @@ void XShapeGrad::GradMerge(XTensor * node)
for(int i = 0; i < blockNum; i++){
gradNodeSmall.data = (char*)node->grad->data + i * blockSize;
gradInputSmall.data = (char*)input->grad->data + i * blockSize;
_Split(&gradNodeSmall, &gradInputSmallBuf, whereToMerge - leadDim, input->dimSize[leadDim]);
_Split(&gradNodeSmall, &gradInputSmallBuf, whereToMerge - leadDim - 1, input->dimSize[leadDim]);
_Sum(&gradInputSmall, &gradInputSmallBuf, &gradInputSmall);
}
}
......
......@@ -73,8 +73,7 @@ void MakeWordBatch(XTensor &batch, NGram * ngrams, int ngramNum, int n, int vSiz
void Forward(XTensor inputs[], XTensor &output, FNNModel &model, FNNNet &net);
void Backward(XTensor inputs[], XTensor &output, XTensor &gold, LOSS_FUNCTION_NAME loss,
FNNModel &model, FNNModel &grad, FNNNet &net);
void FBInOne(XTensor inputs[], XTensor &output, XTensor &gold, LOSS_FUNCTION_NAME loss,
FNNModel &model, XNet &net);
void ForwardAutoDiff(XTensor inputs[], XTensor &output, FNNModel &model);
/*
entry of the program
......@@ -415,7 +414,10 @@ void Train(const char * train, bool isShuffled, FNNModel &model)
}
else{
/* forward + backward process */
FBInOne(inputs, output, gold, CROSSENTROPY, model, autoDiffer);
ForwardAutoDiff(inputs, output, model);
/* automatic differentiation */
autoDiffer.Backward(output, gold, CROSSENTROPY);
/* update model parameters */
Update(model, grad, learningRate, true);
......@@ -902,17 +904,14 @@ void Backward(XTensor inputs[], XTensor &output, XTensor &gold, LOSS_FUNCTION_NA
}
/*
forward + backward in one procedure
forward process (with tensor connections)
>> inputs - input word representations
>> output - output probability
>> gold - gold standard
>> loss - loss function name
>> model - the fnn model
*/
void FBInOne(XTensor inputs[], XTensor &output, XTensor &gold,
LOSS_FUNCTION_NAME loss, FNNModel &model, XNet &net)
void ForwardAutoDiff(XTensor inputs[], XTensor &output, FNNModel &model)
{
int batchSize = gold.GetDim(0);
int batchSize = inputs[0].GetDim(0);
int n = model.n;
int depth = model.hDepth;
......@@ -945,9 +944,6 @@ void FBInOne(XTensor inputs[], XTensor &output, XTensor &gold,
/* output layer */
output = LogSoftmax(MMul(hidden, model.outputW) + b, 1);
/* automatic differentiation */
net.Backward(output);
}
/*
......
......@@ -301,7 +301,7 @@ void Split(const XTensor &big, XList &smalls, int whereToSplit, int splitNum)
XLink::AddParamToHeadInt(s, whereToSplit);
/* it is tricky here that we keep the id of each
block, rather than the total number of splits */
block, rather than the total number of the splits */
XLink::AddParamToHeadInt(s, i);
}
}
......
Markdown 格式
0%
您添加了 0 到此讨论。请谨慎行事。
请先完成此评论的编辑!
注册 或者 后发表评论