Dropout.cpp 5.51 KB
Newer Older
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26
/* NiuTrans.Tensor - an open-source tensor library
 * Copyright (C) 2017, Natural Language Processing Lab, Northestern University. 
 * All rights reserved.
 *
 * Licensed under the Apache License, Version 2.0 (the "License");
 * you may not use this file except in compliance with the License.
 * You may obtain a copy of the License at
 *
 *   http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */

/*
 * $Created by: Xu Chen (email: hello_master1954@163.com) 2018-09-12
 */

#include "../XName.h"
#include <time.h>
#include "Dropout.h"
#include "Dropout.cuh"
#include "../core/arithmetic/Multiply.h"
27
#include "../core/arithmetic/MultiplyDim.h"
28 29 30 31 32 33
#include "../core/math/ScaleAndShift.h"

namespace nts{ // namespace nts(NiuTrans.Tensor

/*
dropout function
xiaotong committed
34 35
It randomly zeroes some of the elements of the input tensor
with probability p via a Bernoulli distribution.
36

xiaotong committed
37 38
See "Improving neural networks by preventing co-adaptation of feature detectors"
for more details.
39

xiaotong committed
40 41 42 43
Here, the output is scaled by a factor of \frac{1}{1-p} so that we do not need
to mark the tensor with probability p in the inference phase. Instead we perform
the same inference procedure as that with no use of dropout on the test data.
 
44 45
>> x - input tensor
>> y - output tensor
46 47 48
>> seed - random seed
>> dropProb - probability to set an element to zero
>> leadingDim - the dimension which we generate the random numbers and perform broadcasting
49
*/
50
void _Dropout(const XTensor * x, XTensor * y, unsigned int seed, DTYPE dropProb, int leadingDim)
51
{
52
    CheckNTErrors(dropProb >= 0.0 && dropProb <= 1.0, "The probability must be 0-1!");
53

54
    int n = leadingDim < 0 ? x->order - 1 : leadingDim;
55

56
    CheckNTErrors(n >= 0 && n < x->order, "Wrong leadingDim!");
57

58
    DTYPE scaleFactor = (DTYPE)1.0 / ((DTYPE)1.0 - dropProb);
59 60
    
    /* generate a mask tensor again with special probability */
61
    int unitNum = x->dimSize[n];
62 63
    DTYPE * maskArray = new DTYPE[unitNum];

64 65 66
    srand(seed);
    for (int i = 0; i < unitNum; i++)
        maskArray[i] = RandomBernoulli(dropProb, scaleFactor);
67

68 69
    XTensor * mask = NewTensor1D(unitNum, x->dataType, x->devID, x->mem);
    mask->SetData(maskArray, unitNum);
70

71 72
    /* call Multiply function for mask */
    _MultiplyDim(x, mask, y, n, 0);
73
    
74
    delete mask;
75 76 77 78
    delete[] maskArray;
}

/* 
xiaotong committed
79
backward computation of the dropout function
80 81 82 83 84 85 86

dE/dx = dE/dy * dy/dx

>> y - output of the dropout function
>> x - input of the dropout function
>> dedy - dE/dy
>> dedx - dE/dx
87 88 89
>> seed - random seed
>> dropProb - probability to set an element to zero
>> leadingDim - the dimension which we generate the random numbers and perform broadcasting
90 91 92
*/
void _DropoutBackward(const XTensor * y, const XTensor * x, 
                      const XTensor * dedy, XTensor * dedx, 
93
                      unsigned int seed, DTYPE dropProb, int leadingDim)
94
{
95 96 97 98 99 100
    CheckNTErrors(dropProb >= 0.0 && dropProb <= 1.0, "The probability must be 0-1!");

    int n = leadingDim < 0 ? x->order - 1 : leadingDim;

    CheckNTErrors(n >= 0 && n < x->order, "Wrong leadingDim!");

101 102
    if(x->dataType == DEFAULT_DTYPE && y->dataType == DEFAULT_DTYPE)
    {
103
        DTYPE scaleFactor = (DTYPE)1.0F / ((DTYPE)1.0F - dropProb);
104 105

        /* generate a mask tensor again with special probability */
106
        int unitNum = x->dimSize[n];
107
        DTYPE * maskArray = new DTYPE[unitNum];
108 109
        
        srand(seed);
110
        for (int i = 0; i < unitNum; i++)
111
            maskArray[i] = RandomBernoulli(dropProb, scaleFactor);
112

113 114
        XTensor * mask = NewTensor1D(unitNum, x->dataType, x->devID, x->mem);
        mask->SetData(maskArray, unitNum);
115

116 117
        /* call MultiplyDim function for mask */
        _MultiplyDim(dedy, mask, dedx, n, 0);
118

119
        delete mask;
120 121 122 123 124
        delete[] maskArray;
    }
    else
        ShowNTErrors("TODO!");
}
xiaotong committed
125 126
    
/*
xiaotong committed
127 128 129
dropout function (we make tensor connections here)
It randomly zeroes some of the elements of the input tensor
with probability p via a Bernoulli distribution.
xiaotong committed
130
 
xiaotong committed
131 132
See "Improving neural networks by preventing co-adaptation of feature detectors"
for more details.
xiaotong committed
133
 
xiaotong committed
134 135 136
Here, the output is scaled by a factor of \frac{1}{1-p} so that we do not need
to mark the tensor with probability p in the inference phase. Instead we perform
the same inference procedure as that with no use of dropout on the test data.
xiaotong committed
137
 
xiaotong committed
138
>> x - input tensor
139 140
>> dropProb - probability to set an element to zero
>> leadingDim - the dimension which we generate the random numbers and perform broadcasting
xiaotong committed
141
*/
142
XTensor Dropout(const XTensor &x, DTYPE dropProb, int leadingDim)
xiaotong committed
143
{
144 145 146 147 148 149 150
    CheckNTErrors(dropProb >= 0.0 && dropProb <= 1.0, "The probability must be 0-1!");

    int n = leadingDim < 0 ? x.order - 1 : leadingDim;

    CheckNTErrors(n >= 0 && n < x.order, "Wrong leadingDim!");

    DTYPE scaleFactor = (DTYPE)1.0 / ((DTYPE)1.0 - dropProb);
xiaotong committed
151
    
xiaotong committed
152
    /* generate a mask tensor with probability p */
153
    int unitNum = x.dimSize[n];
xiaotong committed
154
    DTYPE * maskArray = new DTYPE[unitNum];
xiaotong committed
155 156

    srand((unsigned int)time(NULL));
xiaotong committed
157
    for (int i = 0; i < unitNum; i++)
158
        maskArray[i] = RandomBernoulli(dropProb, scaleFactor);
159
    
160 161 162
    XTensor mask;
    InitTensor1D(&mask, unitNum, x.dataType, x.devID, x.mem);
    mask.SetData(maskArray, unitNum);
xiaotong committed
163

xiaotong committed
164 165
    delete[] maskArray;
    
166
    return MultiplyDim(x, mask, n, 0);
xiaotong committed
167
}
168

xiaotong committed
169
} // namespace nts(NiuTrans.Tensor)