Commit 2e20824a by xiaotong

better code of dropout

parent df76b612
...@@ -39,20 +39,19 @@ DTYPE RandomBernoulli(DTYPE prob) ...@@ -39,20 +39,19 @@ DTYPE RandomBernoulli(DTYPE prob)
/* /*
dropout function dropout function
It randomly zeroes some of the elements of the input tensor
with probability p via a Bernoulli distribution.
During training, randomly zeroes some of the elements of the input tensor See "Improving neural networks by preventing co-adaptation of feature detectors"
with probability p using samples from a Bernoulli distribution. for more details.
The elements to zero are randomized on every forward call.
This has proven to be an effective technique for regularization and Here, the output is scaled by a factor of \frac{1}{1-p} so that we do not need
preventing the co-adaptation of neurons as described in the paper to mark the tensor with probability p in the inference phase. Instead we perform
"Improving neural networks by preventing co-adaptation of feature detectors". the same inference procedure as that with no use of dropout on the test data.
Furthermore, the outputs are scaled by a factor of \frac{1}{1-p} during training.
This means that during evaluation the module simply computes an identity function.
>> x - input tensor >> x - input tensor
>> y - output tensor >> y - output tensor
>> prob - probability to set an element zero >> prob - probability to set an element to zero
*/ */
void _Dropout(const XTensor *x, XTensor *y, unsigned int seed, DTYPE prob) void _Dropout(const XTensor *x, XTensor *y, unsigned int seed, DTYPE prob)
{ {
...@@ -90,55 +89,7 @@ void _Dropout(const XTensor *x, XTensor *y, unsigned int seed, DTYPE prob) ...@@ -90,55 +89,7 @@ void _Dropout(const XTensor *x, XTensor *y, unsigned int seed, DTYPE prob)
} }
/* /*
dropout function (return a XTensor structure) backward computation of the dropout function
make a new tensor to keep the result and return it
During training, randomly zeroes some of the elements of the input tensor
with probability p using samples from a Bernoulli distribution.
The elements to zero are randomized on every forward call.
This has proven to be an effective technique for regularization and
preventing the co-adaptation of neurons as described in the paper
"Improving neural networks by preventing co-adaptation of feature detectors".
Furthermore, the outputs are scaled by a factor of \frac{1}{1-p} during training.
This means that during evaluation the module simply computes an identity function.
>> x - input tensor
>> y - output tensor
>> prob - probability to set an element zero
*/
XTensor Dropout(const XTensor &x, DTYPE prob)
{
XTensor y(&x);
y.SetTMP();
DTYPE scaleFactor = (DTYPE)1.0 / ((DTYPE)1.0 - prob);
/* generate a mask tensor again with special probability */
srand((unsigned int)time(NULL));
int unitNum = x.unitNum;
DTYPE * maskArray = new DTYPE[unitNum];
for (int i = 0; i < unitNum; i++)
maskArray[i] = RandomBernoulli(prob);
XTensor maskTensor(&x);
maskTensor.SetData(maskArray, unitNum);
XTensor inter;
inter = Multiply(x, maskTensor);
y = ScaleAndShift(inter, scaleFactor, 0);
delete[] maskArray;
///* tensor connection */
//XLink::MakeLink(&x, NULL, &y, FUNC_DROPOUT);
//XLink::AddParamToHead(&y, prob);
return y;
}
/*
backward computation of dropout function
dE/dx = dE/dy * dy/dx dE/dx = dE/dy * dy/dx
...@@ -166,7 +117,7 @@ void _DropoutBackward(const XTensor * y, const XTensor * x, ...@@ -166,7 +117,7 @@ void _DropoutBackward(const XTensor * y, const XTensor * x,
XTensor * maskTensor = NewTensorBuf(x, x->devID, x->mem); XTensor * maskTensor = NewTensorBuf(x, x->devID, x->mem);
maskTensor->SetData(maskArray, unitNum); maskTensor->SetData(maskArray, unitNum);
#ifdef USE_CUDA #ifdef USE_CUDA
if(x->devID >= 0 || y->devID >= 0){ if(x->devID >= 0 || y->devID >= 0){
_CudaDropoutBackward(y, x, dedy, dedx, maskTensor, scaleFactor); _CudaDropoutBackward(y, x, dedy, dedx, maskTensor, scaleFactor);
...@@ -174,7 +125,7 @@ void _DropoutBackward(const XTensor * y, const XTensor * x, ...@@ -174,7 +125,7 @@ void _DropoutBackward(const XTensor * y, const XTensor * x,
delete[] maskArray; delete[] maskArray;
return; return;
} }
#endif #endif
DTYPE * dedyp = (DTYPE*)dedy->data; DTYPE * dedyp = (DTYPE*)dedy->data;
DTYPE * dedxp = (DTYPE*)dedx->data; DTYPE * dedxp = (DTYPE*)dedx->data;
...@@ -190,4 +141,49 @@ void _DropoutBackward(const XTensor * y, const XTensor * x, ...@@ -190,4 +141,49 @@ void _DropoutBackward(const XTensor * y, const XTensor * x,
ShowNTErrors("TODO!"); ShowNTErrors("TODO!");
} }
/*
dropout function (we make tensor connections here)
It randomly zeroes some of the elements of the input tensor
with probability p via a Bernoulli distribution.
See "Improving neural networks by preventing co-adaptation of feature detectors"
for more details.
Here, the output is scaled by a factor of \frac{1}{1-p} so that we do not need
to mark the tensor with probability p in the inference phase. Instead we perform
the same inference procedure as that with no use of dropout on the test data.
>> x - input tensor
>> y - output tensor
>> prob - probability to set an element to zero
*/
XTensor Dropout(const XTensor &x, DTYPE prob)
{
DTYPE scaleFactor = (DTYPE)1.0 / ((DTYPE)1.0 - prob);
/* generate a mask tensor again with special probability */
srand((unsigned int)time(NULL));
int unitNum = x.unitNum;
DTYPE * maskArray = new DTYPE[unitNum];
for (int i = 0; i < unitNum; i++)
maskArray[i] = RandomBernoulli(prob);
XTensor maskTensor(&x);
maskTensor.SetData(maskArray, unitNum);
XTensor y;
XTensor inter;
inter = Multiply(x, maskTensor);
y = ScaleAndShift(inter, scaleFactor, 0);
delete[] maskArray;
///* tensor connection */
//XLink::MakeLink(&x, NULL, &y, FUNC_DROPOUT);
//XLink::AddParamToHead(&y, prob);
return y;
}
} // namespace nts(NiuTrans.Tensor) } // namespace nts(NiuTrans.Tensor)
...@@ -30,14 +30,14 @@ namespace nts{ // namespace nts(NiuTrans.Tensor) ...@@ -30,14 +30,14 @@ namespace nts{ // namespace nts(NiuTrans.Tensor)
/* dropout function */ /* dropout function */
void _Dropout(const XTensor * x, XTensor * y, unsigned int seed, DTYPE prob = 0.5); void _Dropout(const XTensor * x, XTensor * y, unsigned int seed, DTYPE prob = 0.5);
/* dropout function */
XTensor Dropout(const XTensor &x, DTYPE prob = 0.5);
/* de/dx */ /* de/dx */
void _DropoutBackward(const XTensor * y, const XTensor * x, void _DropoutBackward(const XTensor * y, const XTensor * x,
const XTensor * dedy, XTensor * dedx, const XTensor * dedy, XTensor * dedx,
unsigned int seed, DTYPE prob = 0.5); unsigned int seed, DTYPE prob = 0.5);
/* dropout function */
XTensor Dropout(const XTensor &x, DTYPE prob = 0.5);
} // namespace nts(NiuTrans.Tensor) } // namespace nts(NiuTrans.Tensor)
#endif // __DROPOUT_H__ #endif // __DROPOUT_H__
Markdown 格式
0%
您添加了 0 到此讨论。请谨慎行事。
请先完成此评论的编辑!
注册 或者 后发表评论