Commit 52c0e35a by xuchen

fix bugs and improve the implementation in the CrossEntropy function

parent b409f07f
......@@ -35,6 +35,8 @@ const char * GetOPName(int type)
return "M_EXP";
else if (type == MATH_FLOOR)
return "M_FLOOR";
else if (type == MATH_ISNONZERO)
return "M_ISNONZERO";
else if (type == MATH_ISZERO)
return "M_ISZERO";
else if (type == MATH_LOG)
......
......@@ -35,7 +35,8 @@ namespace nts { // namespace nts(NiuTrans.Tensor)
#define MATH_CEIL MATH_ABSOLUTE + 1
#define MATH_EXP MATH_CEIL + 1
#define MATH_FLOOR MATH_EXP + 1
#define MATH_ISZERO MATH_FLOOR + 1
#define MATH_ISNONZERO MATH_FLOOR + 1
#define MATH_ISZERO MATH_ISNONZERO + 1
#define MATH_LOG MATH_ISZERO + 1
#define MATH_SQRT MATH_LOG + 1
#define MATH_SQUARE MATH_SQRT + 1
......
......@@ -2135,7 +2135,7 @@ generate a copy of XTensor
>> isFilledData - indicates whether we allocate the data for
the newly-generated tensor
*/
XTensor * NewTensor(XTensor * a, bool isFilledData)
XTensor * NewTensor(const XTensor * a, bool isFilledData)
{
int dims[MAX_TENSOR_DIM_NUM];
......
......@@ -450,7 +450,7 @@ XTensor * NewTensor5D(const int d0, const int d1, const int d2, const int d3, co
const int myDevID = -1, XMem * myMem = NULL);
/* generate a copy of XTensor (with a reference to a given tensor) */
XTensor * NewTensor(XTensor * a, bool isFilledData = true);
XTensor * NewTensor(const XTensor * a, bool isFilledData = true);
/* free the data space of a given tensor */
void DelTensor(XTensor * tensor);
......
......@@ -37,6 +37,11 @@ DTYPE round(DTYPE r)
return (r > 0.0) ? (DTYPE)floor(r + 0.5) : (DTYPE)ceil(r - 0.5);
}
DTYPE isnonzero(DTYPE r)
{
return (r != 0.0) ? (DTYPE)1.0 : (DTYPE)0.0;
}
DTYPE iszero(DTYPE r)
{
return (r == 0.0) ? (DTYPE)1.0 : (DTYPE)0.0;
......@@ -93,6 +98,10 @@ _SIMPLE_UNARY_FUNCTION(_Floor, _CudaFloor, floor)
_SIMPLE_UNARY_FUNCTION_ME(_FloorMe, _Floor)
SIMPLE_UNARY_FUNCTION(Floor, _Floor, MATH_FLOOR)
_SIMPLE_UNARY_FUNCTION(_IsNonZero, _CudaIsNonZero, isnonzero)
_SIMPLE_UNARY_FUNCTION_ME(_IsNonZeroMe, _IsNonZero)
SIMPLE_UNARY_FUNCTION(IsNonZero, _IsNonZero, MATH_ISNONZERO)
_SIMPLE_UNARY_FUNCTION(_IsZero, _CudaIsZero, iszero)
_SIMPLE_UNARY_FUNCTION_ME(_IsZeroMe, _IsZero)
SIMPLE_UNARY_FUNCTION(IsZero, _IsZero, MATH_ISZERO)
......@@ -173,6 +182,10 @@ _SIMPLE_UNARY_FUNCTION(_Floor, floor)
_SIMPLE_UNARY_FUNCTION_ME(_FloorMe, _Floor)
SIMPLE_UNARY_FUNCTION(Floor, _Floor, MATH_FLOOR)
_SIMPLE_UNARY_FUNCTION(_IsNonZero, isnonzero)
_SIMPLE_UNARY_FUNCTION_ME(_IsNonZeroMe, _IsNonZero)
SIMPLE_UNARY_FUNCTION(IsNonZero, _IsNonZero, MATH_ISNONZERO)
_SIMPLE_UNARY_FUNCTION(_IsZero, iszero)
_SIMPLE_UNARY_FUNCTION_ME(_IsZeroMe, _IsZero)
SIMPLE_UNARY_FUNCTION(IsZero, _IsZero, MATH_ISZERO)
......
......@@ -41,11 +41,18 @@ DTYPE cudaround(DTYPE r)
}
__device__
DTYPE cudaisnonzero(DTYPE r)
{
return (r != 0.0) ? (DTYPE)1.0 : (DTYPE)0.0;
}
__device__
DTYPE cudaiszero(DTYPE r)
{
return (r == 0.0) ? (DTYPE)1.0 : (DTYPE)0.0;
}
#define SIMPLE_UNARY_FUNCTION_GPU(funcName, origFunc) \
__global__ \
void Kernel##funcName(DTYPE * a, DTYPE * b, int size) \
......@@ -96,6 +103,7 @@ SIMPLE_UNARY_FUNCTION_GPU(Absolute, fabs)
SIMPLE_UNARY_FUNCTION_GPU(Ceil, ceil)
SIMPLE_UNARY_FUNCTION_GPU(Exp, exp)
SIMPLE_UNARY_FUNCTION_GPU(Floor, floor)
SIMPLE_UNARY_FUNCTION_GPU(IsNonZero, cudaisnonzero)
SIMPLE_UNARY_FUNCTION_GPU(IsZero, cudaiszero)
SIMPLE_UNARY_FUNCTION_GPU(Log, log)
SIMPLE_UNARY_FUNCTION_GPU(Round, cudaround)
......
......@@ -66,6 +66,15 @@ void KernelFloor(__half * a, __half * b, int size);
/* set each entry to its floor value */
void _CudaFloor(const XTensor * a, XTensor * b);
/* if source entry is non-zero, set target entry to be one, otherwise zero (CUDA Kernel) */
__global__
void KernelIsNonZero(DTYPE * a, DTYPE * b, int size);
/* if source entry is non-zero, set target entry to be one, otherwise zero (CUDA Kernel) with float16 data type*/
__global__
void KernelIsNonZero(__half * a, __half * b, int size);
/* if source entry is non-zero, set target entry to be one, otherwise zero */
void _CudaIsNonZero(const XTensor * a, XTensor * b);
/* if source entry is zero, set target entry to be one, otherwise zero (CUDA Kernel) */
__global__
void KernelIsZero(DTYPE * a, DTYPE * b, int size);
......
......@@ -63,6 +63,15 @@ void _FloorMe(XTensor * a);
make a new tensor to keep the result and return it */
XTensor Floor(const XTensor & a);
/* if source entry is non-zero, set target entry to be one, otherwise zero */
void _IsNonZero(const XTensor *a, XTensor *b);
/* if source entry is non-zero, set target entry to be one, otherwise zero (do it on site)
keep the result in the input tensor a and return nothing */
void _IsNonZeroMe(XTensor *a);
/* if source entry is non-zero, set target entry to be one, otherwise zero (return a XTensor structure)
make a new tensor to keep the result and return it */
XTensor IsNonZero(const XTensor &a);
/* if source entry is zero, set target entry to be one, otherwise zero */
void _IsZero(const XTensor *a, XTensor *b);
/* if source entry is zero, set target entry to be one, otherwise zero (do it on site)
......
......@@ -296,7 +296,7 @@ DTYPE _CrossEntropy(const XTensor * output, const XTensor * gold,
/* compute the total loss */
if(padding != NULL) {
XTensor * temp(lossInter);
XTensor * temp = NewTensor(lossInter);
_Multiply(lossInter, padding, temp);
loss = _ReduceSumAll(temp);
delete temp;
......@@ -305,17 +305,18 @@ DTYPE _CrossEntropy(const XTensor * output, const XTensor * gold,
loss = _ReduceSumAll(lossInter);
if(reduceWay == REDUCE_MEAN) {
if(padding != NULL) {
XTensor * zeroIndicator = NewTensorBuf(padding, padding->devID, padding->mem);
_IsZero(padding, zeroIndicator);
int reduceSize = (int)_ReduceSumAll(zeroIndicator);
loss = loss / (DTYPE)(padding->unitNum - reduceSize);
DelTensorBuf(zeroIndicator);
int nonZeroNum;
if(padding == NULL) {
nonZeroNum = lossInter->unitNum;
}
else {
XTensor * tmp = NewTensor(padding);
_IsNonZero(padding, tmp);
nonZeroNum = (int)_ReduceSumAll(tmp);
delete tmp;
}
else
loss = loss / (DTYPE)lossInter->unitNum;
loss = loss / (DTYPE)nonZeroNum;
}
else if(reduceWay == REDUCE_SUM) {
/* don't need to do anything */
......@@ -471,7 +472,7 @@ with respect to gold standard, and y this the model output
>> leadingDim - the leading dimension for the output
*/
void _CrossEntropyBackward(XTensor * dedy, const XTensor * output, const XTensor * gold,
const XTensor * weight, XTensor * padding,
const XTensor * weight, const XTensor * padding,
int leadingDim)
{
#ifdef USE_CUDA
......@@ -561,8 +562,8 @@ void _CrossEntropyBackward(XTensor * dedy, const XTensor * output, const XTensor
}
if(padding != NULL) {
XTensor * tmp(padding);
_IsZero(padding, tmp);
XTensor * tmp = NewTensor(padding);
_IsNonZero(padding, tmp);
int nonZeroNum = (int)_ReduceSumAll(tmp);
_ScaleAndShiftMe(dedy, (DTYPE)1.0/(DTYPE)nonZeroNum);
delete tmp;
......
......@@ -237,18 +237,18 @@ DTYPE _CudaCrossEntropyFast(const XTensor * output, const XTensor * gold,
loss = _ReduceSumAll(lossInter);
if(reduceWay == REDUCE_MEAN) {
int totalNum;
int nonZeroNum;
if(padding == NULL) {
totalNum = lossInter->unitNum;
nonZeroNum = lossInter->unitNum;
}
else {
XTensor * zeroIndicator = NewTensorBuf(output, output->devID, output->mem);
_IsZero(padding, zeroIndicator);
totalNum = lossInter->unitNum - (int)_ReduceSumAll(zeroIndicator);
DelTensorBuf(zeroIndicator);
XTensor * tmp = NewTensor(padding);
_IsNonZero(padding, tmp);
nonZeroNum = (int)_ReduceSumAll(tmp);
delete tmp;
}
loss = loss / (DTYPE)totalNum;
loss = loss / (DTYPE)nonZeroNum;
}
return loss;
......@@ -328,9 +328,9 @@ with respect to gold standard, and y this the model output
>> padding - specify a target value that is ignored and does not contribute to the loss computation
>> leadingDim - the leading dimension for the output
*/
void _CudaCrossEntropyBackward(XTensor * dedy, const XTensor * output, const XTensor * gold,
const XTensor * weight, XTensor * padding,
int leadingDim)
void _CudaCrossEntropyBackward(XTensor * dedy, const XTensor * output,
const XTensor * gold, const XTensor * weight,
const XTensor * padding, int leadingDim)
{
int order = output->order;
int n = leadingDim < 0 ? output->order - 1 : leadingDim;
......@@ -398,8 +398,8 @@ void _CudaCrossEntropyBackward(XTensor * dedy, const XTensor * output, const XTe
}
if(padding != NULL) {
XTensor * tmp(padding);
_IsZero(padding, tmp);
XTensor * tmp = NewTensor(padding);
_IsNonZero(padding, tmp);
int nonZeroNum = (int)_ReduceSumAll(tmp);
_ScaleAndShiftMe(dedy, (DTYPE)1.0/(DTYPE)nonZeroNum);
delete tmp;
......
......@@ -29,18 +29,18 @@ namespace nts{ // namespace nts(NiuTrans.Tensor)
/* compute the cross entropy loss */
void _CudaCrossEntropyFast(const XTensor * output, const XTensor * gold,
XTensor * loss, const XTensor * weight = NULL,
const XTensor * padding = NULL, int leadingDim = -1);
XTensor * loss, const XTensor * weight = NULL,
const XTensor * padding = NULL, int leadingDim = -1);
/* compute the cross entropy loss */
DTYPE _CudaCrossEntropyFast(const XTensor * output, const XTensor * gold,
LOSS_COMPUTE_WAY reduceWay, const XTensor * weight = NULL,
const XTensor * padding = NULL, int leadingDim = -1);
LOSS_COMPUTE_WAY reduceWay, const XTensor * weight = NULL,
const XTensor * padding = NULL, int leadingDim = -1);
/* backward computation of cross entropy function */
void _CudaCrossEntropyBackward(XTensor * dedy, const XTensor * output, const XTensor * gold,
const XTensor * weight = NULL, XTensor * padding = NULL,
int leadingDim = -1);
void _CudaCrossEntropyBackward(XTensor * dedy, const XTensor * output,
const XTensor * gold, const XTensor * weight = NULL,
const XTensor * padding = NULL, int leadingDim = -1);
} // namespace nts(NiuTrans.Tensor)
......
......@@ -53,7 +53,7 @@ DTYPE _CrossEntropyFast(const XTensor * output, const XTensor * gold,
/* backward computation of cross entropy function */
void _CrossEntropyBackward(XTensor * dedy, const XTensor * output, const XTensor * gold,
const XTensor * weight = NULL, XTensor * padding = NULL,
const XTensor * weight = NULL, const XTensor * padding = NULL,
int leadingDim = -1);
} // namespace nts(NiuTrans.Tensor)
......
Markdown 格式
0%
您添加了 0 到此讨论。请谨慎行事。
请先完成此评论的编辑!
注册 或者 后发表评论