Commit b409f07f by xuchen

fix a bug in the softmax function. It executes backward comptuation incorrectly…

fix a bug in the softmax function. It executes backward comptuation incorrectly (in the condition of cpu computation and NOLOSS).
I found this bug by chance, then I spent one night confirming that something must be wrong and I spent one night finding the location of bug.
Finally, I fixed this bug in five minutes.
parent ceb5b101
......@@ -30,7 +30,7 @@ namespace nts { // namespace nts(NiuTrans.Tensor)
void _Gather(const XTensor * s, XTensor * t, int dim, int * srcIndex, int indexSize);
/* gather selected sub-tensors (return a XTensor structure)
make a new tensor to keep the result and return it */
make a new tensor to keep the result and return it */
XTensor Gather(const XTensor &s, int dim, int * srcIndex, int indexSize);
} // namespace nts(NiuTrans.Tensor)
......
......@@ -62,7 +62,6 @@ void _Spread(XTensor * source, XTensor * collection, int dim,
int * srcIndex, int indexSize, int * collIndex)
{
int order = source->order;
int size = source->GetDim(dim);
CheckNTErrors(source->dataType == DEFAULT_DTYPE, "TODO!");
CheckNTErrors(dim >= 0 && dim < order, "Illegal dimension!");
......@@ -150,7 +149,6 @@ void _SpreadForGather(XTensor * source, XTensor * collection, int dim,
int * srcIndex, int indexSize, int * collIndex)
{
int order = source->order;
int size = source->GetDim(dim);
CheckNTErrors(source->dataType == DEFAULT_DTYPE, "TODO!");
CheckNTErrors(dim >= 0 && dim < order, "Illegal dimension!");
......
......@@ -61,42 +61,39 @@ void _CrossEntropy(const XTensor * output, const XTensor * gold,
CheckNTErrors(loss->order == output->order - 1, "Wrong loss dimension!");
CheckNTErrors(gold->dataType == DEFAULT_DTYPE && output->dataType == DEFAULT_DTYPE, "TODO!");
XTensor * logInter = NewTensorBuf(output, output->devID, output->mem);
XTensor * mulInter = NewTensorBuf(output, output->devID, output->mem);
XTensor * negInter = NewTensorBuf(output, output->devID, output->mem);
XTensor * logBuf = NewTensorBuf(output, output->devID, output->mem);
XTensor * mulBuf = NewTensorBuf(output, output->devID, output->mem);
XTensor * negBuf = NewTensorBuf(output, output->devID, output->mem);
/* l = log(output) */
_Log(output, logBuf);
if(weight != NULL){
XTensor * weightBuf = NewTensorBuf(output, output->devID, output->mem);
/* multiply gold and weight by broadcast wg = mulDim(g * w) */
/* multiply gold with weight by broadcast wg = mulDim(g * w) */
_MultiplyDim(gold, weight, weightBuf, n, 0);
/* multiply weighted gold and log(output) wgl = mul(wg, l) */
/* multiply weighted gold with log(output) wgl = mul(wg, l) */
_Multiply(weightBuf, logBuf, mulBuf, 0);
DelTensorBuf(weightBuf);
}
else{
/* multiply gold and log(output) gl = mul(g, l) */
/* multiply gold with log(output) gl = mul(g, l) */
_Multiply(gold, logBuf, mulBuf, 0);
}
/* negate multiply result n = negate(mul) */
/* negate result n = negate(mul) */
_NegateMe(mulBuf);
_ReduceSum(mulBuf, loss, n);
DelTensorBuf(negInter);
DelTensorBuf(mulInter);
DelTensorBuf(logInter);
DelTensorBuf(mulBuf);
DelTensorBuf(logBuf);
}
/*
compute the cross entropy loss (implementation manually)
compute the cross entropy loss (faster implementation with optimized code)
loss = sum_{i} (-gold_i * log(output_i))
where gold and output are distributions
......@@ -108,13 +105,13 @@ where gold and output are distributions
>> padding - specify a target value that is ignored and does not contribute to the loss computation
>> leadingDim - the leading dimension for the output
*/
void _CrossEntropyManual(const XTensor * output, const XTensor * gold,
XTensor * loss, const XTensor * weight,
const XTensor * padding, int leadingDim)
void _CrossEntropyFast(const XTensor * output, const XTensor * gold,
XTensor * loss, const XTensor * weight,
const XTensor * padding, int leadingDim)
{
#ifdef USE_CUDA
if(output->devID >= 0) {
_CudaCrossEntropyManual(output, gold, loss, weight, padding, leadingDim);
_CudaCrossEntropyFast(output, gold, loss, weight, padding, leadingDim);
return;
}
#endif
......@@ -263,21 +260,22 @@ DTYPE _CrossEntropy(const XTensor * output, const XTensor * gold,
XTensor * logBuf = NewTensorBuf(output, output->devID, output->mem);
XTensor * mulBuf = NewTensorBuf(output, output->devID, output->mem);
XTensor * negBuf = NewTensorBuf(output, output->devID, output->mem);
/* l = log(output) */
_Log(output, logBuf);
if(weight != NULL){
XTensor * weightBuf = NewTensorBuf(output, output->devID, output->mem);
/* multiply gold and weight by broadcast wg = mulDim(g * w) */
/* multiply gold with weight by broadcast wg = mulDim(g * w) */
_MultiplyDim(gold, weight, weightBuf, n, 0);
/* multiply weighted gold and log(output) wgl = mul(wg, l) */
/* multiply weighted gold with log(output) wgl = mul(wg, l) */
_Multiply(weightBuf, logBuf, mulBuf, 0);
DelTensorBuf(weightBuf);
}
else{
/* multiply gold and log(output) gl = mul(g, l) */
/* multiply gold with log(output) gl = mul(g, l) */
_Multiply(gold, logBuf, mulBuf, 0);
}
......@@ -291,7 +289,6 @@ DTYPE _CrossEntropy(const XTensor * output, const XTensor * gold,
/* reduce sum all classes */
_ReduceSum(mulBuf, lossInter, n);
DelTensorBuf(negBuf);
DelTensorBuf(mulBuf);
DelTensorBuf(logBuf);
......@@ -334,7 +331,7 @@ DTYPE _CrossEntropy(const XTensor * output, const XTensor * gold,
}
/*
compute the cross entropy loss (implementation manually)
compute the cross entropy loss (faster implementation with optimized code)
loss = sum_{i} (-gold_i * log(output_i))
where gold and output are distributions
......@@ -347,13 +344,13 @@ where gold and output are distributions
>> leadingDim - the leading dimension for the output
<< return - the cross entropy loss that is a scalar
*/
DTYPE _CrossEntropyManual(const XTensor * output, const XTensor * gold,
LOSS_COMPUTE_WAY reduceWay, const XTensor * weight,
const XTensor * padding, int leadingDim)
DTYPE _CrossEntropyFast(const XTensor * output, const XTensor * gold,
LOSS_COMPUTE_WAY reduceWay, const XTensor * weight,
const XTensor * padding, int leadingDim)
{
#ifdef USE_CUDA
if(output->devID >= 0) {
return _CudaCrossEntropyManual(output, gold, reduceWay, weight, padding, leadingDim);
return _CudaCrossEntropyFast(output, gold, reduceWay, weight, padding, leadingDim);
}
#endif
......@@ -459,7 +456,7 @@ DTYPE _CrossEntropyManual(const XTensor * output, const XTensor * gold,
}
/*
backward compuation for cross entropy function (tensor version)
backward compuation for cross entropy function
loss = sum_{i} (-t_i * log(y_i))
dE/dy_i = -t_i / y_i
......@@ -566,7 +563,7 @@ void _CrossEntropyBackward(XTensor * dedy, const XTensor * output, const XTensor
if(padding != NULL) {
XTensor * tmp(padding);
_IsZero(padding, tmp);
int nonZeroNum = _ReduceSumAll(tmp);
int nonZeroNum = (int)_ReduceSumAll(tmp);
_ScaleAndShiftMe(dedy, (DTYPE)1.0/(DTYPE)nonZeroNum);
delete tmp;
}
......
......@@ -111,7 +111,7 @@ where gold and output are distributions
>> padding - specify a target value that is ignored and does not contribute to the loss computation
>> leadingDim - the leading dimension for the output
*/
void _CudaCrossEntropyManual(const XTensor * output, const XTensor * gold,
void _CudaCrossEntropyFast(const XTensor * output, const XTensor * gold,
XTensor * loss, const XTensor * weight,
const XTensor * padding, int leadingDim)
{
......@@ -201,9 +201,9 @@ where gold and output are distributions
>> leadingDim - the leading dimension for the output
<< return - the cross entropy loss that is a scalar
*/
DTYPE _CudaCrossEntropyManual(const XTensor * output, const XTensor * gold,
LOSS_COMPUTE_WAY reduceWay, const XTensor * weight,
const XTensor * padding, int leadingDim)
DTYPE _CudaCrossEntropyFast(const XTensor * output, const XTensor * gold,
LOSS_COMPUTE_WAY reduceWay, const XTensor * weight,
const XTensor * padding, int leadingDim)
{
DTYPE loss = 0;
......@@ -232,7 +232,7 @@ DTYPE _CudaCrossEntropyManual(const XTensor * output, const XTensor * gold,
XTensor * lossInter = NewTensor(output->order - 1, dimSize, output->dataType, output->denseRatio, output->devID, output->mem);
_CudaCrossEntropyManual(output, gold, lossInter, weight, padding, leadingDim);
_CudaCrossEntropyFast(output, gold, lossInter, weight, padding, leadingDim);
loss = _ReduceSumAll(lossInter);
......@@ -400,7 +400,7 @@ void _CudaCrossEntropyBackward(XTensor * dedy, const XTensor * output, const XTe
if(padding != NULL) {
XTensor * tmp(padding);
_IsZero(padding, tmp);
int nonZeroNum = _ReduceSumAll(tmp);
int nonZeroNum = (int)_ReduceSumAll(tmp);
_ScaleAndShiftMe(dedy, (DTYPE)1.0/(DTYPE)nonZeroNum);
delete tmp;
}
......
......@@ -27,13 +27,13 @@
namespace nts{ // namespace nts(NiuTrans.Tensor)
/* compute the cross entropy loss (tensor version) */
void _CudaCrossEntropyManual(const XTensor * output, const XTensor * gold,
/* compute the cross entropy loss */
void _CudaCrossEntropyFast(const XTensor * output, const XTensor * gold,
XTensor * loss, const XTensor * weight = NULL,
const XTensor * padding = NULL, int leadingDim = -1);
/* compute the cross entropy loss (scalar version) */
DTYPE _CudaCrossEntropyManual(const XTensor * output, const XTensor * gold,
/* compute the cross entropy loss */
DTYPE _CudaCrossEntropyFast(const XTensor * output, const XTensor * gold,
LOSS_COMPUTE_WAY reduceWay, const XTensor * weight = NULL,
const XTensor * padding = NULL, int leadingDim = -1);
......
......@@ -31,25 +31,25 @@ REDUCE_SUM,
REDUCE_MEAN
};
/* compute the cross entropy loss (tensor version) */
/* compute the cross entropy loss */
void _CrossEntropy(const XTensor * output, const XTensor * gold,
XTensor * loss, const XTensor * weight = NULL,
const XTensor * padding = NULL, int leadingDim = -1);
/* compute the cross entropy loss (tensor version) */
void _CrossEntropyManual(const XTensor * output, const XTensor * gold,
/* compute the cross entropy loss */
void _CrossEntropyFast(const XTensor * output, const XTensor * gold,
XTensor * loss, const XTensor * weight = NULL,
const XTensor * padding = NULL, int leadingDim = -1);
/* compute the cross entropy loss (scalar version) */
/* compute the cross entropy loss (return the loss) */
DTYPE _CrossEntropy(const XTensor * output, const XTensor * gold,
LOSS_COMPUTE_WAY reduceWay, const XTensor * weight = NULL,
const XTensor * padding = NULL, int leadingDim = -1);
/* compute the cross entropy loss (scalar version) */
DTYPE _CrossEntropyManual(const XTensor * output, const XTensor * gold,
LOSS_COMPUTE_WAY reduceWay = REDUCE_MEAN, const XTensor * weight = NULL,
const XTensor * padding = NULL, int leadingDim = -1);
/* compute the cross entropy loss (return the loss) */
DTYPE _CrossEntropyFast(const XTensor * output, const XTensor * gold,
LOSS_COMPUTE_WAY reduceWay = REDUCE_MEAN, const XTensor * weight = NULL,
const XTensor * padding = NULL, int leadingDim = -1);
/* backward computation of cross entropy function */
void _CrossEntropyBackward(XTensor * dedy, const XTensor * output, const XTensor * gold,
......
......@@ -297,9 +297,10 @@ void _SoftmaxBackward(XTensor * gold, XTensor * y, XTensor * x,
\beta = \sum_i (dE/dy_i * y_i)
*/
for(int k = 0; k < blockNum; k++){
op = (DTYPE*)y->data + k * blockSize;
sp = (DTYPE*)dedx->data + k * blockSize;
for(int m = 0; m < blockNum; m++){
yp = (DTYPE*)dedy->data + m * blockSize;
op = (DTYPE*)y->data + m * blockSize;
sp = (DTYPE*)dedx->data + m * blockSize;
int nCols = stride;
for(int k = 0; k < stride; k++){
......
......@@ -61,7 +61,7 @@ bool TestCrossEntropy1()
gold->SetData(goldData, unitNum);
/* call CrossEntropy function */
_CrossEntropyManual(output, gold, loss);
_CrossEntropyFast(output, gold, loss);
error2 = _CrossEntropy(output, gold, REDUCE_SUM);
error1 = loss->Get1D(0);
......@@ -83,7 +83,7 @@ bool TestCrossEntropy1()
goldGPU->SetData(goldData, unitNum);
/* call CrossEntropy function */
_CrossEntropyManual(outputGPU, goldGPU, lossGPU);
_CrossEntropyFast(outputGPU, goldGPU, lossGPU);
error1 = lossGPU->Get1D(0);
error2 = _CrossEntropy(outputGPU, goldGPU, REDUCE_SUM);
......@@ -163,8 +163,8 @@ bool TestCrossEntropy2()
/* call CrossEntropy function */
error1 = _CrossEntropy(output, gold, REDUCE_SUM);
error2 = _CrossEntropy(output, gold, REDUCE_MEAN);
error3 = _CrossEntropyManual(output, gold, REDUCE_SUM);
error4 = _CrossEntropyManual(output, gold, REDUCE_MEAN);
error3 = _CrossEntropyFast(output, gold, REDUCE_SUM);
error4 = _CrossEntropyFast(output, gold, REDUCE_MEAN);
/* check results */
cpuTest = (fabs(error1 - answer1) < 1e-4F &&
......@@ -191,8 +191,8 @@ bool TestCrossEntropy2()
/* call CrossEntropy function */
error1 = _CrossEntropy(outputGPU, goldGPU, REDUCE_SUM);
error2 = _CrossEntropy(outputGPU, goldGPU, REDUCE_MEAN);
error3 = _CrossEntropyManual(outputGPU, goldGPU, REDUCE_SUM);
error4 = _CrossEntropyManual(outputGPU, goldGPU, REDUCE_MEAN);
error3 = _CrossEntropyFast(outputGPU, goldGPU, REDUCE_SUM);
error4 = _CrossEntropyFast(outputGPU, goldGPU, REDUCE_MEAN);
/* check results */
gpuTest = (fabs(error1 - answer1) < 1e-4F &&
......@@ -272,7 +272,7 @@ bool TestCrossEntropy3()
gold->Set2D(1.0F, 3, 3);
/* call CrossEntropy function */
_CrossEntropyManual(output, gold, loss, weight);
_CrossEntropyFast(output, gold, loss, weight);
/* check results */
cpuTest = loss->CheckData(answer, 4, 1e-4F);
......@@ -297,7 +297,7 @@ bool TestCrossEntropy3()
goldGPU->Set2D(1.0F, 3, 3);
/* call CrossEntropy function */
_CrossEntropyManual(outputGPU, goldGPU, lossGPU, weightGPU);
_CrossEntropyFast(outputGPU, goldGPU, lossGPU, weightGPU);
/* check results */
gpuTest = lossGPU->CheckData(answer, 4, 1e-4F);
......@@ -361,7 +361,7 @@ bool TestCrossEntropy4()
_ScaleAndShiftMe(gold, 1, 2);
/* call CrossEntropy function */
error = _CrossEntropyManual(output, gold);
error = _CrossEntropyFast(output, gold);
/* check results */
cpuTest = (fabs(error - answer) < 1e-4);
......@@ -381,7 +381,7 @@ bool TestCrossEntropy4()
_ScaleAndShiftMe(goldGPU, 1, 2);
/* call CrossEntropy function */
error = _CrossEntropyManual(outputGPU, goldGPU);
error = _CrossEntropyFast(outputGPU, goldGPU);
/* check results */
gpuTest = (fabs(error - answer) < 1e-4);
......
Markdown 格式
0%
您添加了 0 到此讨论。请谨慎行事。
请先完成此评论的编辑!
注册 或者 后发表评论