Commit 52c0e35a by xuchen

fix bugs and improve the implementation in the CrossEntropy function

parent b409f07f
...@@ -35,6 +35,8 @@ const char * GetOPName(int type) ...@@ -35,6 +35,8 @@ const char * GetOPName(int type)
return "M_EXP"; return "M_EXP";
else if (type == MATH_FLOOR) else if (type == MATH_FLOOR)
return "M_FLOOR"; return "M_FLOOR";
else if (type == MATH_ISNONZERO)
return "M_ISNONZERO";
else if (type == MATH_ISZERO) else if (type == MATH_ISZERO)
return "M_ISZERO"; return "M_ISZERO";
else if (type == MATH_LOG) else if (type == MATH_LOG)
......
...@@ -35,7 +35,8 @@ namespace nts { // namespace nts(NiuTrans.Tensor) ...@@ -35,7 +35,8 @@ namespace nts { // namespace nts(NiuTrans.Tensor)
#define MATH_CEIL MATH_ABSOLUTE + 1 #define MATH_CEIL MATH_ABSOLUTE + 1
#define MATH_EXP MATH_CEIL + 1 #define MATH_EXP MATH_CEIL + 1
#define MATH_FLOOR MATH_EXP + 1 #define MATH_FLOOR MATH_EXP + 1
#define MATH_ISZERO MATH_FLOOR + 1 #define MATH_ISNONZERO MATH_FLOOR + 1
#define MATH_ISZERO MATH_ISNONZERO + 1
#define MATH_LOG MATH_ISZERO + 1 #define MATH_LOG MATH_ISZERO + 1
#define MATH_SQRT MATH_LOG + 1 #define MATH_SQRT MATH_LOG + 1
#define MATH_SQUARE MATH_SQRT + 1 #define MATH_SQUARE MATH_SQRT + 1
......
...@@ -2135,7 +2135,7 @@ generate a copy of XTensor ...@@ -2135,7 +2135,7 @@ generate a copy of XTensor
>> isFilledData - indicates whether we allocate the data for >> isFilledData - indicates whether we allocate the data for
the newly-generated tensor the newly-generated tensor
*/ */
XTensor * NewTensor(XTensor * a, bool isFilledData) XTensor * NewTensor(const XTensor * a, bool isFilledData)
{ {
int dims[MAX_TENSOR_DIM_NUM]; int dims[MAX_TENSOR_DIM_NUM];
......
...@@ -450,7 +450,7 @@ XTensor * NewTensor5D(const int d0, const int d1, const int d2, const int d3, co ...@@ -450,7 +450,7 @@ XTensor * NewTensor5D(const int d0, const int d1, const int d2, const int d3, co
const int myDevID = -1, XMem * myMem = NULL); const int myDevID = -1, XMem * myMem = NULL);
/* generate a copy of XTensor (with a reference to a given tensor) */ /* generate a copy of XTensor (with a reference to a given tensor) */
XTensor * NewTensor(XTensor * a, bool isFilledData = true); XTensor * NewTensor(const XTensor * a, bool isFilledData = true);
/* free the data space of a given tensor */ /* free the data space of a given tensor */
void DelTensor(XTensor * tensor); void DelTensor(XTensor * tensor);
......
...@@ -37,6 +37,11 @@ DTYPE round(DTYPE r) ...@@ -37,6 +37,11 @@ DTYPE round(DTYPE r)
return (r > 0.0) ? (DTYPE)floor(r + 0.5) : (DTYPE)ceil(r - 0.5); return (r > 0.0) ? (DTYPE)floor(r + 0.5) : (DTYPE)ceil(r - 0.5);
} }
DTYPE isnonzero(DTYPE r)
{
return (r != 0.0) ? (DTYPE)1.0 : (DTYPE)0.0;
}
DTYPE iszero(DTYPE r) DTYPE iszero(DTYPE r)
{ {
return (r == 0.0) ? (DTYPE)1.0 : (DTYPE)0.0; return (r == 0.0) ? (DTYPE)1.0 : (DTYPE)0.0;
...@@ -93,6 +98,10 @@ _SIMPLE_UNARY_FUNCTION(_Floor, _CudaFloor, floor) ...@@ -93,6 +98,10 @@ _SIMPLE_UNARY_FUNCTION(_Floor, _CudaFloor, floor)
_SIMPLE_UNARY_FUNCTION_ME(_FloorMe, _Floor) _SIMPLE_UNARY_FUNCTION_ME(_FloorMe, _Floor)
SIMPLE_UNARY_FUNCTION(Floor, _Floor, MATH_FLOOR) SIMPLE_UNARY_FUNCTION(Floor, _Floor, MATH_FLOOR)
_SIMPLE_UNARY_FUNCTION(_IsNonZero, _CudaIsNonZero, isnonzero)
_SIMPLE_UNARY_FUNCTION_ME(_IsNonZeroMe, _IsNonZero)
SIMPLE_UNARY_FUNCTION(IsNonZero, _IsNonZero, MATH_ISNONZERO)
_SIMPLE_UNARY_FUNCTION(_IsZero, _CudaIsZero, iszero) _SIMPLE_UNARY_FUNCTION(_IsZero, _CudaIsZero, iszero)
_SIMPLE_UNARY_FUNCTION_ME(_IsZeroMe, _IsZero) _SIMPLE_UNARY_FUNCTION_ME(_IsZeroMe, _IsZero)
SIMPLE_UNARY_FUNCTION(IsZero, _IsZero, MATH_ISZERO) SIMPLE_UNARY_FUNCTION(IsZero, _IsZero, MATH_ISZERO)
...@@ -173,6 +182,10 @@ _SIMPLE_UNARY_FUNCTION(_Floor, floor) ...@@ -173,6 +182,10 @@ _SIMPLE_UNARY_FUNCTION(_Floor, floor)
_SIMPLE_UNARY_FUNCTION_ME(_FloorMe, _Floor) _SIMPLE_UNARY_FUNCTION_ME(_FloorMe, _Floor)
SIMPLE_UNARY_FUNCTION(Floor, _Floor, MATH_FLOOR) SIMPLE_UNARY_FUNCTION(Floor, _Floor, MATH_FLOOR)
_SIMPLE_UNARY_FUNCTION(_IsNonZero, isnonzero)
_SIMPLE_UNARY_FUNCTION_ME(_IsNonZeroMe, _IsNonZero)
SIMPLE_UNARY_FUNCTION(IsNonZero, _IsNonZero, MATH_ISNONZERO)
_SIMPLE_UNARY_FUNCTION(_IsZero, iszero) _SIMPLE_UNARY_FUNCTION(_IsZero, iszero)
_SIMPLE_UNARY_FUNCTION_ME(_IsZeroMe, _IsZero) _SIMPLE_UNARY_FUNCTION_ME(_IsZeroMe, _IsZero)
SIMPLE_UNARY_FUNCTION(IsZero, _IsZero, MATH_ISZERO) SIMPLE_UNARY_FUNCTION(IsZero, _IsZero, MATH_ISZERO)
......
...@@ -41,11 +41,18 @@ DTYPE cudaround(DTYPE r) ...@@ -41,11 +41,18 @@ DTYPE cudaround(DTYPE r)
} }
__device__ __device__
DTYPE cudaisnonzero(DTYPE r)
{
return (r != 0.0) ? (DTYPE)1.0 : (DTYPE)0.0;
}
__device__
DTYPE cudaiszero(DTYPE r) DTYPE cudaiszero(DTYPE r)
{ {
return (r == 0.0) ? (DTYPE)1.0 : (DTYPE)0.0; return (r == 0.0) ? (DTYPE)1.0 : (DTYPE)0.0;
} }
#define SIMPLE_UNARY_FUNCTION_GPU(funcName, origFunc) \ #define SIMPLE_UNARY_FUNCTION_GPU(funcName, origFunc) \
__global__ \ __global__ \
void Kernel##funcName(DTYPE * a, DTYPE * b, int size) \ void Kernel##funcName(DTYPE * a, DTYPE * b, int size) \
...@@ -96,6 +103,7 @@ SIMPLE_UNARY_FUNCTION_GPU(Absolute, fabs) ...@@ -96,6 +103,7 @@ SIMPLE_UNARY_FUNCTION_GPU(Absolute, fabs)
SIMPLE_UNARY_FUNCTION_GPU(Ceil, ceil) SIMPLE_UNARY_FUNCTION_GPU(Ceil, ceil)
SIMPLE_UNARY_FUNCTION_GPU(Exp, exp) SIMPLE_UNARY_FUNCTION_GPU(Exp, exp)
SIMPLE_UNARY_FUNCTION_GPU(Floor, floor) SIMPLE_UNARY_FUNCTION_GPU(Floor, floor)
SIMPLE_UNARY_FUNCTION_GPU(IsNonZero, cudaisnonzero)
SIMPLE_UNARY_FUNCTION_GPU(IsZero, cudaiszero) SIMPLE_UNARY_FUNCTION_GPU(IsZero, cudaiszero)
SIMPLE_UNARY_FUNCTION_GPU(Log, log) SIMPLE_UNARY_FUNCTION_GPU(Log, log)
SIMPLE_UNARY_FUNCTION_GPU(Round, cudaround) SIMPLE_UNARY_FUNCTION_GPU(Round, cudaround)
......
...@@ -66,6 +66,15 @@ void KernelFloor(__half * a, __half * b, int size); ...@@ -66,6 +66,15 @@ void KernelFloor(__half * a, __half * b, int size);
/* set each entry to its floor value */ /* set each entry to its floor value */
void _CudaFloor(const XTensor * a, XTensor * b); void _CudaFloor(const XTensor * a, XTensor * b);
/* if source entry is non-zero, set target entry to be one, otherwise zero (CUDA Kernel) */
__global__
void KernelIsNonZero(DTYPE * a, DTYPE * b, int size);
/* if source entry is non-zero, set target entry to be one, otherwise zero (CUDA Kernel) with float16 data type*/
__global__
void KernelIsNonZero(__half * a, __half * b, int size);
/* if source entry is non-zero, set target entry to be one, otherwise zero */
void _CudaIsNonZero(const XTensor * a, XTensor * b);
/* if source entry is zero, set target entry to be one, otherwise zero (CUDA Kernel) */ /* if source entry is zero, set target entry to be one, otherwise zero (CUDA Kernel) */
__global__ __global__
void KernelIsZero(DTYPE * a, DTYPE * b, int size); void KernelIsZero(DTYPE * a, DTYPE * b, int size);
......
...@@ -63,6 +63,15 @@ void _FloorMe(XTensor * a); ...@@ -63,6 +63,15 @@ void _FloorMe(XTensor * a);
make a new tensor to keep the result and return it */ make a new tensor to keep the result and return it */
XTensor Floor(const XTensor & a); XTensor Floor(const XTensor & a);
/* if source entry is non-zero, set target entry to be one, otherwise zero */
void _IsNonZero(const XTensor *a, XTensor *b);
/* if source entry is non-zero, set target entry to be one, otherwise zero (do it on site)
keep the result in the input tensor a and return nothing */
void _IsNonZeroMe(XTensor *a);
/* if source entry is non-zero, set target entry to be one, otherwise zero (return a XTensor structure)
make a new tensor to keep the result and return it */
XTensor IsNonZero(const XTensor &a);
/* if source entry is zero, set target entry to be one, otherwise zero */ /* if source entry is zero, set target entry to be one, otherwise zero */
void _IsZero(const XTensor *a, XTensor *b); void _IsZero(const XTensor *a, XTensor *b);
/* if source entry is zero, set target entry to be one, otherwise zero (do it on site) /* if source entry is zero, set target entry to be one, otherwise zero (do it on site)
......
...@@ -296,7 +296,7 @@ DTYPE _CrossEntropy(const XTensor * output, const XTensor * gold, ...@@ -296,7 +296,7 @@ DTYPE _CrossEntropy(const XTensor * output, const XTensor * gold,
/* compute the total loss */ /* compute the total loss */
if(padding != NULL) { if(padding != NULL) {
XTensor * temp(lossInter); XTensor * temp = NewTensor(lossInter);
_Multiply(lossInter, padding, temp); _Multiply(lossInter, padding, temp);
loss = _ReduceSumAll(temp); loss = _ReduceSumAll(temp);
delete temp; delete temp;
...@@ -305,17 +305,18 @@ DTYPE _CrossEntropy(const XTensor * output, const XTensor * gold, ...@@ -305,17 +305,18 @@ DTYPE _CrossEntropy(const XTensor * output, const XTensor * gold,
loss = _ReduceSumAll(lossInter); loss = _ReduceSumAll(lossInter);
if(reduceWay == REDUCE_MEAN) { if(reduceWay == REDUCE_MEAN) {
if(padding != NULL) { int nonZeroNum;
XTensor * zeroIndicator = NewTensorBuf(padding, padding->devID, padding->mem); if(padding == NULL) {
nonZeroNum = lossInter->unitNum;
_IsZero(padding, zeroIndicator); }
int reduceSize = (int)_ReduceSumAll(zeroIndicator); else {
loss = loss / (DTYPE)(padding->unitNum - reduceSize); XTensor * tmp = NewTensor(padding);
_IsNonZero(padding, tmp);
DelTensorBuf(zeroIndicator); nonZeroNum = (int)_ReduceSumAll(tmp);
delete tmp;
} }
else
loss = loss / (DTYPE)lossInter->unitNum; loss = loss / (DTYPE)nonZeroNum;
} }
else if(reduceWay == REDUCE_SUM) { else if(reduceWay == REDUCE_SUM) {
/* don't need to do anything */ /* don't need to do anything */
...@@ -471,7 +472,7 @@ with respect to gold standard, and y this the model output ...@@ -471,7 +472,7 @@ with respect to gold standard, and y this the model output
>> leadingDim - the leading dimension for the output >> leadingDim - the leading dimension for the output
*/ */
void _CrossEntropyBackward(XTensor * dedy, const XTensor * output, const XTensor * gold, void _CrossEntropyBackward(XTensor * dedy, const XTensor * output, const XTensor * gold,
const XTensor * weight, XTensor * padding, const XTensor * weight, const XTensor * padding,
int leadingDim) int leadingDim)
{ {
#ifdef USE_CUDA #ifdef USE_CUDA
...@@ -561,8 +562,8 @@ void _CrossEntropyBackward(XTensor * dedy, const XTensor * output, const XTensor ...@@ -561,8 +562,8 @@ void _CrossEntropyBackward(XTensor * dedy, const XTensor * output, const XTensor
} }
if(padding != NULL) { if(padding != NULL) {
XTensor * tmp(padding); XTensor * tmp = NewTensor(padding);
_IsZero(padding, tmp); _IsNonZero(padding, tmp);
int nonZeroNum = (int)_ReduceSumAll(tmp); int nonZeroNum = (int)_ReduceSumAll(tmp);
_ScaleAndShiftMe(dedy, (DTYPE)1.0/(DTYPE)nonZeroNum); _ScaleAndShiftMe(dedy, (DTYPE)1.0/(DTYPE)nonZeroNum);
delete tmp; delete tmp;
......
...@@ -237,18 +237,18 @@ DTYPE _CudaCrossEntropyFast(const XTensor * output, const XTensor * gold, ...@@ -237,18 +237,18 @@ DTYPE _CudaCrossEntropyFast(const XTensor * output, const XTensor * gold,
loss = _ReduceSumAll(lossInter); loss = _ReduceSumAll(lossInter);
if(reduceWay == REDUCE_MEAN) { if(reduceWay == REDUCE_MEAN) {
int totalNum; int nonZeroNum;
if(padding == NULL) { if(padding == NULL) {
totalNum = lossInter->unitNum; nonZeroNum = lossInter->unitNum;
} }
else { else {
XTensor * zeroIndicator = NewTensorBuf(output, output->devID, output->mem); XTensor * tmp = NewTensor(padding);
_IsZero(padding, zeroIndicator); _IsNonZero(padding, tmp);
totalNum = lossInter->unitNum - (int)_ReduceSumAll(zeroIndicator); nonZeroNum = (int)_ReduceSumAll(tmp);
DelTensorBuf(zeroIndicator); delete tmp;
} }
loss = loss / (DTYPE)totalNum; loss = loss / (DTYPE)nonZeroNum;
} }
return loss; return loss;
...@@ -328,9 +328,9 @@ with respect to gold standard, and y this the model output ...@@ -328,9 +328,9 @@ with respect to gold standard, and y this the model output
>> padding - specify a target value that is ignored and does not contribute to the loss computation >> padding - specify a target value that is ignored and does not contribute to the loss computation
>> leadingDim - the leading dimension for the output >> leadingDim - the leading dimension for the output
*/ */
void _CudaCrossEntropyBackward(XTensor * dedy, const XTensor * output, const XTensor * gold, void _CudaCrossEntropyBackward(XTensor * dedy, const XTensor * output,
const XTensor * weight, XTensor * padding, const XTensor * gold, const XTensor * weight,
int leadingDim) const XTensor * padding, int leadingDim)
{ {
int order = output->order; int order = output->order;
int n = leadingDim < 0 ? output->order - 1 : leadingDim; int n = leadingDim < 0 ? output->order - 1 : leadingDim;
...@@ -398,8 +398,8 @@ void _CudaCrossEntropyBackward(XTensor * dedy, const XTensor * output, const XTe ...@@ -398,8 +398,8 @@ void _CudaCrossEntropyBackward(XTensor * dedy, const XTensor * output, const XTe
} }
if(padding != NULL) { if(padding != NULL) {
XTensor * tmp(padding); XTensor * tmp = NewTensor(padding);
_IsZero(padding, tmp); _IsNonZero(padding, tmp);
int nonZeroNum = (int)_ReduceSumAll(tmp); int nonZeroNum = (int)_ReduceSumAll(tmp);
_ScaleAndShiftMe(dedy, (DTYPE)1.0/(DTYPE)nonZeroNum); _ScaleAndShiftMe(dedy, (DTYPE)1.0/(DTYPE)nonZeroNum);
delete tmp; delete tmp;
......
...@@ -29,18 +29,18 @@ namespace nts{ // namespace nts(NiuTrans.Tensor) ...@@ -29,18 +29,18 @@ namespace nts{ // namespace nts(NiuTrans.Tensor)
/* compute the cross entropy loss */ /* compute the cross entropy loss */
void _CudaCrossEntropyFast(const XTensor * output, const XTensor * gold, void _CudaCrossEntropyFast(const XTensor * output, const XTensor * gold,
XTensor * loss, const XTensor * weight = NULL, XTensor * loss, const XTensor * weight = NULL,
const XTensor * padding = NULL, int leadingDim = -1); const XTensor * padding = NULL, int leadingDim = -1);
/* compute the cross entropy loss */ /* compute the cross entropy loss */
DTYPE _CudaCrossEntropyFast(const XTensor * output, const XTensor * gold, DTYPE _CudaCrossEntropyFast(const XTensor * output, const XTensor * gold,
LOSS_COMPUTE_WAY reduceWay, const XTensor * weight = NULL, LOSS_COMPUTE_WAY reduceWay, const XTensor * weight = NULL,
const XTensor * padding = NULL, int leadingDim = -1); const XTensor * padding = NULL, int leadingDim = -1);
/* backward computation of cross entropy function */ /* backward computation of cross entropy function */
void _CudaCrossEntropyBackward(XTensor * dedy, const XTensor * output, const XTensor * gold, void _CudaCrossEntropyBackward(XTensor * dedy, const XTensor * output,
const XTensor * weight = NULL, XTensor * padding = NULL, const XTensor * gold, const XTensor * weight = NULL,
int leadingDim = -1); const XTensor * padding = NULL, int leadingDim = -1);
} // namespace nts(NiuTrans.Tensor) } // namespace nts(NiuTrans.Tensor)
......
...@@ -53,7 +53,7 @@ DTYPE _CrossEntropyFast(const XTensor * output, const XTensor * gold, ...@@ -53,7 +53,7 @@ DTYPE _CrossEntropyFast(const XTensor * output, const XTensor * gold,
/* backward computation of cross entropy function */ /* backward computation of cross entropy function */
void _CrossEntropyBackward(XTensor * dedy, const XTensor * output, const XTensor * gold, void _CrossEntropyBackward(XTensor * dedy, const XTensor * output, const XTensor * gold,
const XTensor * weight = NULL, XTensor * padding = NULL, const XTensor * weight = NULL, const XTensor * padding = NULL,
int leadingDim = -1); int leadingDim = -1);
} // namespace nts(NiuTrans.Tensor) } // namespace nts(NiuTrans.Tensor)
......
Markdown 格式
0%
您添加了 0 到此讨论。请谨慎行事。
请先完成此评论的编辑!
注册 或者 后发表评论