Commit 1b50554a by xuchen

Merge branch 'xuchen' into xiaotong-working

parents cf43c58c 102db468
...@@ -66,8 +66,8 @@ void _Multiply(const XTensor * a, const XTensor * b, XTensor * c, DTYPE alpha, i ...@@ -66,8 +66,8 @@ void _Multiply(const XTensor * a, const XTensor * b, XTensor * c, DTYPE alpha, i
for (int i = 0; i < a->order; i++) { for (int i = 0; i < a->order; i++) {
if (i != leadingDimRDI) { if (i != leadingDimRDI) {
CheckNTErrors((a->dimSizeRDI[i] == b->dimSizeRDI[i] && CheckNTErrors((a->dimSizeRDI[i] == b->dimSizeRDI[i] &&
a->dimSizeRDI[i] == c->dimSizeRDI[i]), a->dimSizeRDI[i] == c->dimSizeRDI[i]),
"Unmatched tensors!"); "Unmatched tensors!");
} }
if (i < leadingDimRDI) if (i < leadingDimRDI)
stride *= a->dimSizeRDI[i]; stride *= a->dimSizeRDI[i];
......
...@@ -77,7 +77,7 @@ where |a_lead| means the size of the leading dimension of a ...@@ -77,7 +77,7 @@ where |a_lead| means the size of the leading dimension of a
*/ */
template<int nonZeroAlpha> __global__ template<int nonZeroAlpha> __global__
void KernelMulElementWiseTensorDynamic(DTYPE * a, DTYPE * b, DTYPE * c, DTYPE alpha, void KernelMulElementWiseTensorDynamic(DTYPE * a, DTYPE * b, DTYPE * c, DTYPE alpha,
int stride, int ldSizeA, int ldSizeB, int ldSizeC, int blockNum) int stride, int ldSizeA, int ldSizeB, int ldSizeC, int blockNum)
{ {
__shared__ DTYPE* ap[MAX_CUDA_THREAD_NUM_PER_BLOCK]; __shared__ DTYPE* ap[MAX_CUDA_THREAD_NUM_PER_BLOCK];
__shared__ DTYPE* bp[MAX_CUDA_THREAD_NUM_PER_BLOCK]; __shared__ DTYPE* bp[MAX_CUDA_THREAD_NUM_PER_BLOCK];
...@@ -171,14 +171,12 @@ void _CudaMultiply(const XTensor * a, const XTensor * b, XTensor * c, DTYPE alph ...@@ -171,14 +171,12 @@ void _CudaMultiply(const XTensor * a, const XTensor * b, XTensor * c, DTYPE alph
if (alpha == 0) { if (alpha == 0) {
KernelMulElementWiseTensorDynamic<0> << <blocks, threads >> > KernelMulElementWiseTensorDynamic<0> << <blocks, threads >> >
((DTYPE*)a->data, (DTYPE*)b->data, (DTYPE*)c->data, 0, ((DTYPE*)a->data, (DTYPE*)b->data, (DTYPE*)c->data, 0,
stride, dimensionSizeA, dimensionSizeB, dimensionSizeC, stride, dimensionSizeA, dimensionSizeB, dimensionSizeC, blockNum);
blockNum);
} }
else { else {
KernelMulElementWiseTensorDynamic<1> << <blocks, threads >> > KernelMulElementWiseTensorDynamic<1> << <blocks, threads >> >
((DTYPE*)a->data, (DTYPE*)b->data, (DTYPE*)c->data, alpha, ((DTYPE*)a->data, (DTYPE*)b->data, (DTYPE*)c->data, alpha,
stride, dimensionSizeA, dimensionSizeB, dimensionSizeC, stride, dimensionSizeA, dimensionSizeB, dimensionSizeC, blockNum);
blockNum);
} }
} }
} }
......
...@@ -25,7 +25,7 @@ ...@@ -25,7 +25,7 @@
#include "Dropout.h" #include "Dropout.h"
#include "Dropout.cuh" #include "Dropout.cuh"
#include "../core/arithmetic/Multiply.h" #include "../core/arithmetic/Multiply.h"
#include "../core/arithmetic/SumDim.h" #include "../core/arithmetic/MultiplyDim.h"
#include "../core/math/ScaleAndShift.h" #include "../core/math/ScaleAndShift.h"
namespace nts{ // namespace nts(NiuTrans.Tensor namespace nts{ // namespace nts(NiuTrans.Tensor
...@@ -44,40 +44,35 @@ the same inference procedure as that with no use of dropout on the test data. ...@@ -44,40 +44,35 @@ the same inference procedure as that with no use of dropout on the test data.
>> x - input tensor >> x - input tensor
>> y - output tensor >> y - output tensor
>> prob - probability to set an element to zero >> seed - random seed
>> dropProb - probability to set an element to zero
>> leadingDim - the dimension which we generate the random numbers and perform broadcasting
*/ */
void _Dropout(const XTensor *x, XTensor *y, unsigned int seed, DTYPE prob) void _Dropout(const XTensor * x, XTensor * y, unsigned int seed, DTYPE dropProb, int leadingDim)
{ {
CheckNTErrors(prob >= 0.0 && prob <= 1.0, "The probability must be 0-1!"); CheckNTErrors(dropProb >= 0.0 && dropProb <= 1.0, "The probability must be 0-1!");
DTYPE scaleFactor = (DTYPE)1.0 / ((DTYPE)1.0 - prob); int n = leadingDim < 0 ? x->order - 1 : leadingDim;
CheckNTErrors(n >= 0 && n < x->order, "Wrong leadingDim!");
DTYPE scaleFactor = (DTYPE)1.0 / ((DTYPE)1.0 - dropProb);
/* generate a mask tensor again with special probability */ /* generate a mask tensor again with special probability */
srand(seed); int unitNum = x->dimSize[n];
int unitNum = x->unitNum;
DTYPE * maskArray = new DTYPE[unitNum]; DTYPE * maskArray = new DTYPE[unitNum];
for (int i = 0; i < unitNum; i++)
maskArray[i] = RandomBernoulli(prob, 1.0F);
XTensor * maskTensor = NewTensorBuf(x, x->devID, x->mem); srand(seed);
maskTensor->SetData(maskArray, unitNum); for (int i = 0; i < unitNum; i++)
maskArray[i] = RandomBernoulli(dropProb, scaleFactor);
#ifdef USE_CUDA XTensor * mask = NewTensor1D(unitNum, x->dataType, x->devID, x->mem);
if(x->devID >=0 || y->devID >= 0){ mask->SetData(maskArray, unitNum);
_CudaDropout(x, y, maskTensor, scaleFactor);
DelTensorBuf(maskTensor);
delete[] maskArray;
return;
}
#endif
XTensor * inter = NewTensorBuf(x, x->devID, x->mem); /* call Multiply function for mask */
_Multiply(x, maskTensor, inter); _MultiplyDim(x, mask, y, n, 0);
_ScaleAndShift(inter, y, scaleFactor, 0);
DelTensorBuf(inter); delete mask;
DelTensorBuf(maskTensor);
delete[] maskArray; delete[] maskArray;
} }
...@@ -90,44 +85,39 @@ dE/dx = dE/dy * dy/dx ...@@ -90,44 +85,39 @@ dE/dx = dE/dy * dy/dx
>> x - input of the dropout function >> x - input of the dropout function
>> dedy - dE/dy >> dedy - dE/dy
>> dedx - dE/dx >> dedx - dE/dx
>> prob - probability to set an element zero >> seed - random seed
>> dropProb - probability to set an element to zero
>> leadingDim - the dimension which we generate the random numbers and perform broadcasting
*/ */
void _DropoutBackward(const XTensor * y, const XTensor * x, void _DropoutBackward(const XTensor * y, const XTensor * x,
const XTensor * dedy, XTensor * dedx, const XTensor * dedy, XTensor * dedx,
unsigned int seed, DTYPE prob) unsigned int seed, DTYPE dropProb, int leadingDim)
{ {
CheckNTErrors(dropProb >= 0.0 && dropProb <= 1.0, "The probability must be 0-1!");
int n = leadingDim < 0 ? x->order - 1 : leadingDim;
CheckNTErrors(n >= 0 && n < x->order, "Wrong leadingDim!");
if(x->dataType == DEFAULT_DTYPE && y->dataType == DEFAULT_DTYPE) if(x->dataType == DEFAULT_DTYPE && y->dataType == DEFAULT_DTYPE)
{ {
int unitNum = y->unitNum; DTYPE scaleFactor = (DTYPE)1.0F / ((DTYPE)1.0F - dropProb);
DTYPE scaleFactor = (DTYPE)1.0F / ((DTYPE)1.0F - prob);
/* generate a mask tensor again with special probability */ /* generate a mask tensor again with special probability */
srand(seed); int unitNum = x->dimSize[n];
DTYPE * maskArray = new DTYPE[unitNum]; DTYPE * maskArray = new DTYPE[unitNum];
srand(seed);
for (int i = 0; i < unitNum; i++) for (int i = 0; i < unitNum; i++)
maskArray[i] = RandomBernoulli(prob, 1.0F); maskArray[i] = RandomBernoulli(dropProb, scaleFactor);
XTensor * maskTensor = NewTensorBuf(x, x->devID, x->mem);
maskTensor->SetData(maskArray, unitNum);
#ifdef USE_CUDA
if(x->devID >= 0 || y->devID >= 0){
_CudaDropoutBackward(y, x, dedy, dedx, maskTensor, scaleFactor);
DelTensorBuf(maskTensor);
delete[] maskArray;
return;
}
#endif
DTYPE * dedyp = (DTYPE*)dedy->data; XTensor * mask = NewTensor1D(unitNum, x->dataType, x->devID, x->mem);
DTYPE * dedxp = (DTYPE*)dedx->data; mask->SetData(maskArray, unitNum);
/* dE/dx = dE/dy * dy/dx */ /* call MultiplyDim function for mask */
for(int i = 0; i < unitNum; i++) _MultiplyDim(dedy, mask, dedx, n, 0);
dedxp[i] = dedyp[i] * maskArray[i] * scaleFactor;
DelTensorBuf(maskTensor); delete mask;
delete[] maskArray; delete[] maskArray;
} }
else else
...@@ -147,14 +137,18 @@ to mark the tensor with probability p in the inference phase. Instead we perform ...@@ -147,14 +137,18 @@ to mark the tensor with probability p in the inference phase. Instead we perform
the same inference procedure as that with no use of dropout on the test data. the same inference procedure as that with no use of dropout on the test data.
>> x - input tensor >> x - input tensor
>> y - output tensor >> dropProb - probability to set an element to zero
>> prob - probability to set an element to zero >> leadingDim - the dimension which we generate the random numbers and perform broadcasting
>> leadDim - the dimension along which we generate the random numbers
*/ */
XTensor Dropout(const XTensor &x, DTYPE prob, int leadDim) XTensor Dropout(const XTensor &x, DTYPE dropProb, int leadingDim)
{ {
int n = leadDim < 0 ? x.order - 1 : leadDim; CheckNTErrors(dropProb >= 0.0 && dropProb <= 1.0, "The probability must be 0-1!");
DTYPE scaleFactor = (DTYPE)1.0 / ((DTYPE)1.0 - prob);
int n = leadingDim < 0 ? x.order - 1 : leadingDim;
CheckNTErrors(n >= 0 && n < x.order, "Wrong leadingDim!");
DTYPE scaleFactor = (DTYPE)1.0 / ((DTYPE)1.0 - dropProb);
/* generate a mask tensor with probability p */ /* generate a mask tensor with probability p */
int unitNum = x.dimSize[n]; int unitNum = x.dimSize[n];
...@@ -162,20 +156,15 @@ XTensor Dropout(const XTensor &x, DTYPE prob, int leadDim) ...@@ -162,20 +156,15 @@ XTensor Dropout(const XTensor &x, DTYPE prob, int leadDim)
srand((unsigned int)time(NULL)); srand((unsigned int)time(NULL));
for (int i = 0; i < unitNum; i++) for (int i = 0; i < unitNum; i++)
maskArray[i] = RandomBernoulli(prob, scaleFactor); maskArray[i] = RandomBernoulli(dropProb, scaleFactor);
XTensor mask(&x);
mask.SetZeroAll();
XTensor * maskVector = NewTensorBuf(1, &unitNum, X_FLOAT, 1.0F, x.devID, x.mem);
maskVector->SetData(maskArray, unitNum);
_SumDim(&mask, maskVector, &mask, n); XTensor mask;
InitTensor1D(&mask, unitNum, x.dataType, x.devID, x.mem);
mask.SetData(maskArray, unitNum);
delete[] maskArray; delete[] maskArray;
DelTensorBuf(maskVector);
return Multiply(x, mask); return MultiplyDim(x, mask, n, 0);
} }
} // namespace nts(NiuTrans.Tensor) } // namespace nts(NiuTrans.Tensor)
...@@ -28,21 +28,21 @@ ...@@ -28,21 +28,21 @@
namespace nts{ // namespace nts(NiuTrans.Tensor) namespace nts{ // namespace nts(NiuTrans.Tensor)
/* generate a random bernoulli number */ /* generate a random bernoulli number */
inline DTYPE RandomBernoulli(DTYPE prob, DTYPE value) inline DTYPE RandomBernoulli(DTYPE dropProb, DTYPE value)
{ {
return (DTYPE)rand()/(DTYPE)RAND_MAX >= prob ? (DTYPE)value : 0; return (DTYPE)rand()/(DTYPE)RAND_MAX >= dropProb ? (DTYPE)value : 0;
} }
/* dropout function */ /* dropout function */
void _Dropout(const XTensor * x, XTensor * y, unsigned int seed, DTYPE prob); void _Dropout(const XTensor * x, XTensor * y, unsigned int seed, DTYPE dropProb, int leadingDim = -1);
/* de/dx */ /* de/dx */
void _DropoutBackward(const XTensor * y, const XTensor * x, void _DropoutBackward(const XTensor * y, const XTensor * x,
const XTensor * dedy, XTensor * dedx, const XTensor * dedy, XTensor * dedx,
unsigned int seed, DTYPE prob); unsigned int seed, DTYPE dropProb, int leadingDim = -1);
/* dropout function */ /* dropout function */
XTensor Dropout(const XTensor &x, DTYPE prob, int leadDim = -1); XTensor Dropout(const XTensor &x, DTYPE dropProb, int leadingDim = -1);
} // namespace nts(NiuTrans.Tensor) } // namespace nts(NiuTrans.Tensor)
......
...@@ -31,10 +31,11 @@ case 1: test Dropout function. ...@@ -31,10 +31,11 @@ case 1: test Dropout function.
bool TestDropout1() bool TestDropout1()
{ {
/* a input tensor of size (4, 5) */ /* a input tensor of size (4, 5) */
int order = 2; int order = 3;
int * dimSize = new int[order]; int * dimSize = new int[order];
dimSize[0] = 40; dimSize[0] = 40;
dimSize[1] = 50; dimSize[1] = 50;
dimSize[2] = 60;
int unitNum = 1; int unitNum = 1;
for (int i = 0; i < order; i++) for (int i = 0; i < order; i++)
...@@ -49,14 +50,14 @@ bool TestDropout1() ...@@ -49,14 +50,14 @@ bool TestDropout1()
XTensor yUser; XTensor yUser;
/* initialize variables */ /* initialize variables */
x->SetDataRand(0, 1); _SetDataFixedFloat(x, 1.0F);
y->SetZeroAll(); y->SetZeroAll();
/* call Dropout function */ /* call Dropout function */
float prob = 0.2F; float dropProb = 0.2F;
int seed = 20; int seed = 20;
_Dropout(x, y, seed, prob); _Dropout(x, y, seed, dropProb);
yUser = Dropout(*x, 0.5F); yUser = Dropout(*x, dropProb);
/* check result */ /* check result */
int zeroNum1 = 0; int zeroNum1 = 0;
...@@ -73,9 +74,9 @@ bool TestDropout1() ...@@ -73,9 +74,9 @@ bool TestDropout1()
} }
printf("CPU Test:\n"); printf("CPU Test:\n");
printf("In tensor y, there are %d units.\n", unitNum); printf("In tensor y, there are %d units.\n", unitNum);
printf("There are %d zero units by Dropout layer with probability %.2f.\n", zeroNum1, prob); printf("There are %d zero units by Dropout layer with probability %.2f.\n", zeroNum1, dropProb);
printf("In tensor yUser, there are %d units.\n", unitNum); printf("In tensor yUser, there are %d units.\n", unitNum);
printf("There are %d zero units by Dropout layer with default probability %.2f.\n", zeroNum2, 0.5F); printf("There are %d zero units by Dropout layer with default probability %.2f.\n", zeroNum2, dropProb);
#ifdef USE_CUDA #ifdef USE_CUDA
/* GPU test */ /* GPU test */
...@@ -87,12 +88,12 @@ bool TestDropout1() ...@@ -87,12 +88,12 @@ bool TestDropout1()
XTensor yUserGPU; XTensor yUserGPU;
/* initialize variables */ /* initialize variables */
xGPU->SetDataRand(0, 1); _SetDataFixedFloat(xGPU, 1.0F);
yGPU->SetZeroAll(); yGPU->SetZeroAll();
/* call Dropout function */ /* call Dropout function */
_Dropout(xGPU, yGPU, seed, prob); _Dropout(xGPU, yGPU, seed, dropProb);
yUserGPU = Dropout(*xGPU, 0.5F); yUserGPU = Dropout(*xGPU, dropProb);
/* check result */ /* check result */
zeroNum1 = 0; zeroNum1 = 0;
...@@ -109,9 +110,9 @@ bool TestDropout1() ...@@ -109,9 +110,9 @@ bool TestDropout1()
} }
printf("CPU Test:\n"); printf("CPU Test:\n");
printf("In tensor y, there are %d units.\n", unitNum); printf("In tensor y, there are %d units.\n", unitNum);
printf("There are %d zero units by Dropout layer with probability %.2f.\n", zeroNum1, prob); printf("There are %d zero units by Dropout layer with probability %.2f.\n", zeroNum1, dropProb);
printf("In tensor yUser, there are %d units.\n", unitNum); printf("In tensor yUser, there are %d units.\n", unitNum);
printf("There are %d zero units by Dropout layer with default probability %.2f.\n", zeroNum2, 0.5F); printf("There are %d zero units by Dropout layer with default probability %.2f.\n", zeroNum2, dropProb);
/* destroy variables */ /* destroy variables */
delete x; delete x;
...@@ -159,13 +160,13 @@ bool TestDropout2() ...@@ -159,13 +160,13 @@ bool TestDropout2()
_SetDataFixedFloat(x, 1.0F); _SetDataFixedFloat(x, 1.0F);
y->SetZeroAll(); y->SetZeroAll();
dedx->SetZeroAll(); dedx->SetZeroAll();
_SetDataFixedFloat(dedy, 1.0F); _SetDataFixedFloat(dedy, 1.5F);
/* call Dropout function */ /* call Dropout function */
float prob = 0.5F; float dropProb = 0.5F;
int seed = 1; int seed = 1;
_Dropout(x, y, seed, prob); _Dropout(x, y, seed, dropProb);
_DropoutBackward(y, x, dedy, dedx, 1, prob); _DropoutBackward(y, x, dedy, dedx, 1, dropProb);
/* check result */ /* check result */
y->Dump(stderr, "y"); y->Dump(stderr, "y");
...@@ -185,11 +186,11 @@ bool TestDropout2() ...@@ -185,11 +186,11 @@ bool TestDropout2()
_SetDataFixedFloat(xGPU, 1.0F); _SetDataFixedFloat(xGPU, 1.0F);
yGPU->SetZeroAll(); yGPU->SetZeroAll();
dedxGPU->SetZeroAll(); dedxGPU->SetZeroAll();
_SetDataFixedFloat(dedyGPU, 1.0F); _SetDataFixedFloat(dedyGPU, 1.5F);
/* call Dropout function */ /* call Dropout function */
_Dropout(xGPU, yGPU, seed, prob); _Dropout(xGPU, yGPU, seed, dropProb);
_DropoutBackward(yGPU, xGPU, dedyGPU, dedxGPU, 1, prob); _DropoutBackward(yGPU, xGPU, dedyGPU, dedxGPU, 1, dropProb);
/* check result */ /* check result */
yGPU->Dump(stderr, "yGPU"); yGPU->Dump(stderr, "yGPU");
......
Markdown 格式
0%
您添加了 0 到此讨论。请谨慎行事。
请先完成此评论的编辑!
注册 或者 后发表评论