Commit 1b50554a by xuchen

Merge branch 'xuchen' into xiaotong-working

parents cf43c58c 102db468
......@@ -66,8 +66,8 @@ void _Multiply(const XTensor * a, const XTensor * b, XTensor * c, DTYPE alpha, i
for (int i = 0; i < a->order; i++) {
if (i != leadingDimRDI) {
CheckNTErrors((a->dimSizeRDI[i] == b->dimSizeRDI[i] &&
a->dimSizeRDI[i] == c->dimSizeRDI[i]),
"Unmatched tensors!");
a->dimSizeRDI[i] == c->dimSizeRDI[i]),
"Unmatched tensors!");
}
if (i < leadingDimRDI)
stride *= a->dimSizeRDI[i];
......
......@@ -77,7 +77,7 @@ where |a_lead| means the size of the leading dimension of a
*/
template<int nonZeroAlpha> __global__
void KernelMulElementWiseTensorDynamic(DTYPE * a, DTYPE * b, DTYPE * c, DTYPE alpha,
int stride, int ldSizeA, int ldSizeB, int ldSizeC, int blockNum)
int stride, int ldSizeA, int ldSizeB, int ldSizeC, int blockNum)
{
__shared__ DTYPE* ap[MAX_CUDA_THREAD_NUM_PER_BLOCK];
__shared__ DTYPE* bp[MAX_CUDA_THREAD_NUM_PER_BLOCK];
......@@ -171,14 +171,12 @@ void _CudaMultiply(const XTensor * a, const XTensor * b, XTensor * c, DTYPE alph
if (alpha == 0) {
KernelMulElementWiseTensorDynamic<0> << <blocks, threads >> >
((DTYPE*)a->data, (DTYPE*)b->data, (DTYPE*)c->data, 0,
stride, dimensionSizeA, dimensionSizeB, dimensionSizeC,
blockNum);
stride, dimensionSizeA, dimensionSizeB, dimensionSizeC, blockNum);
}
else {
KernelMulElementWiseTensorDynamic<1> << <blocks, threads >> >
((DTYPE*)a->data, (DTYPE*)b->data, (DTYPE*)c->data, alpha,
stride, dimensionSizeA, dimensionSizeB, dimensionSizeC,
blockNum);
stride, dimensionSizeA, dimensionSizeB, dimensionSizeC, blockNum);
}
}
}
......
......@@ -25,7 +25,7 @@
#include "Dropout.h"
#include "Dropout.cuh"
#include "../core/arithmetic/Multiply.h"
#include "../core/arithmetic/SumDim.h"
#include "../core/arithmetic/MultiplyDim.h"
#include "../core/math/ScaleAndShift.h"
namespace nts{ // namespace nts(NiuTrans.Tensor
......@@ -44,40 +44,35 @@ the same inference procedure as that with no use of dropout on the test data.
>> x - input tensor
>> y - output tensor
>> prob - probability to set an element to zero
>> seed - random seed
>> dropProb - probability to set an element to zero
>> leadingDim - the dimension which we generate the random numbers and perform broadcasting
*/
void _Dropout(const XTensor *x, XTensor *y, unsigned int seed, DTYPE prob)
void _Dropout(const XTensor * x, XTensor * y, unsigned int seed, DTYPE dropProb, int leadingDim)
{
CheckNTErrors(prob >= 0.0 && prob <= 1.0, "The probability must be 0-1!");
DTYPE scaleFactor = (DTYPE)1.0 / ((DTYPE)1.0 - prob);
CheckNTErrors(dropProb >= 0.0 && dropProb <= 1.0, "The probability must be 0-1!");
int n = leadingDim < 0 ? x->order - 1 : leadingDim;
CheckNTErrors(n >= 0 && n < x->order, "Wrong leadingDim!");
DTYPE scaleFactor = (DTYPE)1.0 / ((DTYPE)1.0 - dropProb);
/* generate a mask tensor again with special probability */
srand(seed);
int unitNum = x->unitNum;
int unitNum = x->dimSize[n];
DTYPE * maskArray = new DTYPE[unitNum];
for (int i = 0; i < unitNum; i++)
maskArray[i] = RandomBernoulli(prob, 1.0F);
XTensor * maskTensor = NewTensorBuf(x, x->devID, x->mem);
maskTensor->SetData(maskArray, unitNum);
srand(seed);
for (int i = 0; i < unitNum; i++)
maskArray[i] = RandomBernoulli(dropProb, scaleFactor);
#ifdef USE_CUDA
if(x->devID >=0 || y->devID >= 0){
_CudaDropout(x, y, maskTensor, scaleFactor);
DelTensorBuf(maskTensor);
delete[] maskArray;
return;
}
#endif
XTensor * mask = NewTensor1D(unitNum, x->dataType, x->devID, x->mem);
mask->SetData(maskArray, unitNum);
XTensor * inter = NewTensorBuf(x, x->devID, x->mem);
_Multiply(x, maskTensor, inter);
_ScaleAndShift(inter, y, scaleFactor, 0);
/* call Multiply function for mask */
_MultiplyDim(x, mask, y, n, 0);
DelTensorBuf(inter);
DelTensorBuf(maskTensor);
delete mask;
delete[] maskArray;
}
......@@ -90,44 +85,39 @@ dE/dx = dE/dy * dy/dx
>> x - input of the dropout function
>> dedy - dE/dy
>> dedx - dE/dx
>> prob - probability to set an element zero
>> seed - random seed
>> dropProb - probability to set an element to zero
>> leadingDim - the dimension which we generate the random numbers and perform broadcasting
*/
void _DropoutBackward(const XTensor * y, const XTensor * x,
const XTensor * dedy, XTensor * dedx,
unsigned int seed, DTYPE prob)
unsigned int seed, DTYPE dropProb, int leadingDim)
{
CheckNTErrors(dropProb >= 0.0 && dropProb <= 1.0, "The probability must be 0-1!");
int n = leadingDim < 0 ? x->order - 1 : leadingDim;
CheckNTErrors(n >= 0 && n < x->order, "Wrong leadingDim!");
if(x->dataType == DEFAULT_DTYPE && y->dataType == DEFAULT_DTYPE)
{
int unitNum = y->unitNum;
DTYPE scaleFactor = (DTYPE)1.0F / ((DTYPE)1.0F - prob);
DTYPE scaleFactor = (DTYPE)1.0F / ((DTYPE)1.0F - dropProb);
/* generate a mask tensor again with special probability */
srand(seed);
int unitNum = x->dimSize[n];
DTYPE * maskArray = new DTYPE[unitNum];
srand(seed);
for (int i = 0; i < unitNum; i++)
maskArray[i] = RandomBernoulli(prob, 1.0F);
XTensor * maskTensor = NewTensorBuf(x, x->devID, x->mem);
maskTensor->SetData(maskArray, unitNum);
#ifdef USE_CUDA
if(x->devID >= 0 || y->devID >= 0){
_CudaDropoutBackward(y, x, dedy, dedx, maskTensor, scaleFactor);
DelTensorBuf(maskTensor);
delete[] maskArray;
return;
}
#endif
maskArray[i] = RandomBernoulli(dropProb, scaleFactor);
DTYPE * dedyp = (DTYPE*)dedy->data;
DTYPE * dedxp = (DTYPE*)dedx->data;
XTensor * mask = NewTensor1D(unitNum, x->dataType, x->devID, x->mem);
mask->SetData(maskArray, unitNum);
/* dE/dx = dE/dy * dy/dx */
for(int i = 0; i < unitNum; i++)
dedxp[i] = dedyp[i] * maskArray[i] * scaleFactor;
/* call MultiplyDim function for mask */
_MultiplyDim(dedy, mask, dedx, n, 0);
DelTensorBuf(maskTensor);
delete mask;
delete[] maskArray;
}
else
......@@ -147,14 +137,18 @@ to mark the tensor with probability p in the inference phase. Instead we perform
the same inference procedure as that with no use of dropout on the test data.
>> x - input tensor
>> y - output tensor
>> prob - probability to set an element to zero
>> leadDim - the dimension along which we generate the random numbers
>> dropProb - probability to set an element to zero
>> leadingDim - the dimension which we generate the random numbers and perform broadcasting
*/
XTensor Dropout(const XTensor &x, DTYPE prob, int leadDim)
XTensor Dropout(const XTensor &x, DTYPE dropProb, int leadingDim)
{
int n = leadDim < 0 ? x.order - 1 : leadDim;
DTYPE scaleFactor = (DTYPE)1.0 / ((DTYPE)1.0 - prob);
CheckNTErrors(dropProb >= 0.0 && dropProb <= 1.0, "The probability must be 0-1!");
int n = leadingDim < 0 ? x.order - 1 : leadingDim;
CheckNTErrors(n >= 0 && n < x.order, "Wrong leadingDim!");
DTYPE scaleFactor = (DTYPE)1.0 / ((DTYPE)1.0 - dropProb);
/* generate a mask tensor with probability p */
int unitNum = x.dimSize[n];
......@@ -162,20 +156,15 @@ XTensor Dropout(const XTensor &x, DTYPE prob, int leadDim)
srand((unsigned int)time(NULL));
for (int i = 0; i < unitNum; i++)
maskArray[i] = RandomBernoulli(prob, scaleFactor);
XTensor mask(&x);
mask.SetZeroAll();
XTensor * maskVector = NewTensorBuf(1, &unitNum, X_FLOAT, 1.0F, x.devID, x.mem);
maskVector->SetData(maskArray, unitNum);
maskArray[i] = RandomBernoulli(dropProb, scaleFactor);
_SumDim(&mask, maskVector, &mask, n);
XTensor mask;
InitTensor1D(&mask, unitNum, x.dataType, x.devID, x.mem);
mask.SetData(maskArray, unitNum);
delete[] maskArray;
DelTensorBuf(maskVector);
return Multiply(x, mask);
return MultiplyDim(x, mask, n, 0);
}
} // namespace nts(NiuTrans.Tensor)
......@@ -28,21 +28,21 @@
namespace nts{ // namespace nts(NiuTrans.Tensor)
/* generate a random bernoulli number */
inline DTYPE RandomBernoulli(DTYPE prob, DTYPE value)
inline DTYPE RandomBernoulli(DTYPE dropProb, DTYPE value)
{
return (DTYPE)rand()/(DTYPE)RAND_MAX >= prob ? (DTYPE)value : 0;
return (DTYPE)rand()/(DTYPE)RAND_MAX >= dropProb ? (DTYPE)value : 0;
}
/* dropout function */
void _Dropout(const XTensor * x, XTensor * y, unsigned int seed, DTYPE prob);
void _Dropout(const XTensor * x, XTensor * y, unsigned int seed, DTYPE dropProb, int leadingDim = -1);
/* de/dx */
void _DropoutBackward(const XTensor * y, const XTensor * x,
const XTensor * dedy, XTensor * dedx,
unsigned int seed, DTYPE prob);
unsigned int seed, DTYPE dropProb, int leadingDim = -1);
/* dropout function */
XTensor Dropout(const XTensor &x, DTYPE prob, int leadDim = -1);
XTensor Dropout(const XTensor &x, DTYPE dropProb, int leadingDim = -1);
} // namespace nts(NiuTrans.Tensor)
......
......@@ -31,10 +31,11 @@ case 1: test Dropout function.
bool TestDropout1()
{
/* a input tensor of size (4, 5) */
int order = 2;
int order = 3;
int * dimSize = new int[order];
dimSize[0] = 40;
dimSize[1] = 50;
dimSize[2] = 60;
int unitNum = 1;
for (int i = 0; i < order; i++)
......@@ -49,14 +50,14 @@ bool TestDropout1()
XTensor yUser;
/* initialize variables */
x->SetDataRand(0, 1);
_SetDataFixedFloat(x, 1.0F);
y->SetZeroAll();
/* call Dropout function */
float prob = 0.2F;
float dropProb = 0.2F;
int seed = 20;
_Dropout(x, y, seed, prob);
yUser = Dropout(*x, 0.5F);
_Dropout(x, y, seed, dropProb);
yUser = Dropout(*x, dropProb);
/* check result */
int zeroNum1 = 0;
......@@ -73,9 +74,9 @@ bool TestDropout1()
}
printf("CPU Test:\n");
printf("In tensor y, there are %d units.\n", unitNum);
printf("There are %d zero units by Dropout layer with probability %.2f.\n", zeroNum1, prob);
printf("There are %d zero units by Dropout layer with probability %.2f.\n", zeroNum1, dropProb);
printf("In tensor yUser, there are %d units.\n", unitNum);
printf("There are %d zero units by Dropout layer with default probability %.2f.\n", zeroNum2, 0.5F);
printf("There are %d zero units by Dropout layer with default probability %.2f.\n", zeroNum2, dropProb);
#ifdef USE_CUDA
/* GPU test */
......@@ -87,12 +88,12 @@ bool TestDropout1()
XTensor yUserGPU;
/* initialize variables */
xGPU->SetDataRand(0, 1);
_SetDataFixedFloat(xGPU, 1.0F);
yGPU->SetZeroAll();
/* call Dropout function */
_Dropout(xGPU, yGPU, seed, prob);
yUserGPU = Dropout(*xGPU, 0.5F);
_Dropout(xGPU, yGPU, seed, dropProb);
yUserGPU = Dropout(*xGPU, dropProb);
/* check result */
zeroNum1 = 0;
......@@ -109,9 +110,9 @@ bool TestDropout1()
}
printf("CPU Test:\n");
printf("In tensor y, there are %d units.\n", unitNum);
printf("There are %d zero units by Dropout layer with probability %.2f.\n", zeroNum1, prob);
printf("There are %d zero units by Dropout layer with probability %.2f.\n", zeroNum1, dropProb);
printf("In tensor yUser, there are %d units.\n", unitNum);
printf("There are %d zero units by Dropout layer with default probability %.2f.\n", zeroNum2, 0.5F);
printf("There are %d zero units by Dropout layer with default probability %.2f.\n", zeroNum2, dropProb);
/* destroy variables */
delete x;
......@@ -159,13 +160,13 @@ bool TestDropout2()
_SetDataFixedFloat(x, 1.0F);
y->SetZeroAll();
dedx->SetZeroAll();
_SetDataFixedFloat(dedy, 1.0F);
_SetDataFixedFloat(dedy, 1.5F);
/* call Dropout function */
float prob = 0.5F;
float dropProb = 0.5F;
int seed = 1;
_Dropout(x, y, seed, prob);
_DropoutBackward(y, x, dedy, dedx, 1, prob);
_Dropout(x, y, seed, dropProb);
_DropoutBackward(y, x, dedy, dedx, 1, dropProb);
/* check result */
y->Dump(stderr, "y");
......@@ -185,11 +186,11 @@ bool TestDropout2()
_SetDataFixedFloat(xGPU, 1.0F);
yGPU->SetZeroAll();
dedxGPU->SetZeroAll();
_SetDataFixedFloat(dedyGPU, 1.0F);
_SetDataFixedFloat(dedyGPU, 1.5F);
/* call Dropout function */
_Dropout(xGPU, yGPU, seed, prob);
_DropoutBackward(yGPU, xGPU, dedyGPU, dedxGPU, 1, prob);
_Dropout(xGPU, yGPU, seed, dropProb);
_DropoutBackward(yGPU, xGPU, dedyGPU, dedxGPU, 1, dropProb);
/* check result */
yGPU->Dump(stderr, "yGPU");
......
Markdown 格式
0%
您添加了 0 到此讨论。请谨慎行事。
请先完成此评论的编辑!
注册 或者 后发表评论