Dropout.cpp

/* NiuTrans.Tensor - an open-source tensor library
 * Copyright (C) 2017, Natural Language Processing Lab, Northestern University. 
 * All rights reserved.
 *
 * Licensed under the Apache License, Version 2.0 (the "License");
 * you may not use this file except in compliance with the License.
 * You may obtain a copy of the License at
 *
 *   http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */

/*
 * $Created by: Xu Chen (email: hello_master1954@163.com) 2018-09-12
 */

#include "../XName.h"
#include <time.h>
#include "Dropout.h"
#include "Dropout.cuh"
#include "../core/arithmetic/Multiply.h"
#include "../core/arithmetic/MultiplyDim.h"
#include "../core/math/ScaleAndShift.h"

namespace nts{ // namespace nts(NiuTrans.Tensor

/*
dropout function
It randomly zeroes some of the elements of the input tensor
with probability p via a Bernoulli distribution.

See "Improving neural networks by preventing co-adaptation of feature detectors"
for more details.

Here, the output is scaled by a factor of \frac{1}{1-p} so that we do not need
to mark the tensor with probability p in the inference phase. Instead we perform
the same inference procedure as that with no use of dropout on the test data.
 
>> x - input tensor
>> y - output tensor
>> seed - random seed
>> dropProb - probability to set an element to zero
>> leadingDim - the dimension which we generate the random numbers and perform broadcasting
*/
void _Dropout(const XTensor * x, XTensor * y, unsigned int seed, DTYPE dropProb, int leadingDim)
{
    CheckNTErrors(dropProb >= 0.0 && dropProb <= 1.0, "The probability must be 0-1!");

    int n = leadingDim < 0 ? x->order - 1 : leadingDim;

    CheckNTErrors(n >= 0 && n < x->order, "Wrong leadingDim!");

    DTYPE scaleFactor = (DTYPE)1.0 / ((DTYPE)1.0 - dropProb);
    
    /* generate a mask tensor again with special probability */
    int unitNum = x->dimSize[n];
    DTYPE * maskArray = new DTYPE[unitNum];

    srand(seed);
    for (int i = 0; i < unitNum; i++)
        maskArray[i] = RandomBernoulli(dropProb, scaleFactor);

    XTensor * mask = NewTensor1D(unitNum, x->dataType, x->devID, x->mem);
    mask->SetData(maskArray, unitNum);

    /* call Multiply function for mask */
    _MultiplyDim(x, mask, y, n, 0);
    
    delete mask;
    delete[] maskArray;
}

/* 
backward computation of the dropout function

dE/dx = dE/dy * dy/dx

>> y - output of the dropout function
>> x - input of the dropout function
>> dedy - dE/dy
>> dedx - dE/dx
>> seed - random seed
>> dropProb - probability to set an element to zero
>> leadingDim - the dimension which we generate the random numbers and perform broadcasting
*/
void _DropoutBackward(const XTensor * y, const XTensor * x, 
                      const XTensor * dedy, XTensor * dedx, 
                      unsigned int seed, DTYPE dropProb, int leadingDim)
{
    CheckNTErrors(dropProb >= 0.0 && dropProb <= 1.0, "The probability must be 0-1!");

    int n = leadingDim < 0 ? x->order - 1 : leadingDim;

    CheckNTErrors(n >= 0 && n < x->order, "Wrong leadingDim!");

    if(x->dataType == DEFAULT_DTYPE && y->dataType == DEFAULT_DTYPE)
    {
        DTYPE scaleFactor = (DTYPE)1.0F / ((DTYPE)1.0F - dropProb);

        /* generate a mask tensor again with special probability */
        int unitNum = x->dimSize[n];
        DTYPE * maskArray = new DTYPE[unitNum];
        
        srand(seed);
        for (int i = 0; i < unitNum; i++)
            maskArray[i] = RandomBernoulli(dropProb, scaleFactor);

        XTensor * mask = NewTensor1D(unitNum, x->dataType, x->devID, x->mem);
        mask->SetData(maskArray, unitNum);

        /* call MultiplyDim function for mask */
        _MultiplyDim(dedy, mask, dedx, n, 0);

        delete mask;
        delete[] maskArray;
    }
    else
        ShowNTErrors("TODO!");
}
    
/*
dropout function (we make tensor connections here)
It randomly zeroes some of the elements of the input tensor
with probability p via a Bernoulli distribution.
 
See "Improving neural networks by preventing co-adaptation of feature detectors"
for more details.
 
Here, the output is scaled by a factor of \frac{1}{1-p} so that we do not need
to mark the tensor with probability p in the inference phase. Instead we perform
the same inference procedure as that with no use of dropout on the test data.
 
>> x - input tensor
>> dropProb - probability to set an element to zero
>> leadingDim - the dimension which we generate the random numbers and perform broadcasting
*/
XTensor Dropout(const XTensor &x, DTYPE dropProb, int leadingDim)
{
    CheckNTErrors(dropProb >= 0.0 && dropProb <= 1.0, "The probability must be 0-1!");

    int n = leadingDim < 0 ? x.order - 1 : leadingDim;

    CheckNTErrors(n >= 0 && n < x.order, "Wrong leadingDim!");

    DTYPE scaleFactor = (DTYPE)1.0 / ((DTYPE)1.0 - dropProb);
    
    /* generate a mask tensor with probability p */
    int unitNum = x.dimSize[n];
    DTYPE * maskArray = new DTYPE[unitNum];

    srand((unsigned int)time(NULL));
    for (int i = 0; i < unitNum; i++)
        maskArray[i] = RandomBernoulli(dropProb, scaleFactor);
    
    XTensor mask;
    InitTensor1D(&mask, unitNum, x.dataType, x.devID, x.mem);
    mask.SetData(maskArray, unitNum);

    delete[] maskArray;
    
    return MultiplyDim(x, mask, n, 0);
}

} // namespace nts(NiuTrans.Tensor)