Dropout.cu 3.87 KB
Newer Older
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146
/* NiuTrans.Tensor - an open-source tensor library
 * Copyright (C) 2017, Natural Language Processing Lab, Northestern University. 
 * All rights reserved.
 *
 * Licensed under the Apache License, Version 2.0 (the "License");
 * you may not use this file except in compliance with the License.
 * You may obtain a copy of the License at
 *
 *   http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */

/*
 * $Created by: Xu Chen (email: hello_master1954@163.com) 2018-09-12
 */

#include "Dropout.h"
#include "Dropout.cuh"
#include "Loss.cuh"
#include "../XDevice.h"

#ifdef USE_CUDA

// the CUDA stuff
#include <cuda_runtime.h>
#include <cublas_v2.h>
#include <cuda.h>

#endif

namespace nts{ // namespace nts(NiuTrans.Tensor)

#ifdef USE_CUDA

/* 
dropout function (Cuda kernel) 
>> x - input data pointer
>> y - output data pointer
>> m - mask indicator to set zero 
>> s - the scale factor
>> size - size of input/output
*/
__global__ 
void KernelDropoutCompute(DTYPE * x, DTYPE * y, DTYPE * m, DTYPE s, int size)
{
    int i = blockDim.x * blockIdx.x + threadIdx.x;

    if (i < size){
        y[i] = x[i] * m[i] * s;
    }
}

/*
dropout function (Cuda version)
>> x - input tensor
>> y - output tensor
>> mask - mask tensor to set 0
>> scaleFactor - the scale factor
*/
void _CudaDropout(const XTensor * x, XTensor * y, const XTensor * mask, DTYPE scaleFactor)
{
    if(x->dataType == DEFAULT_DTYPE && y->dataType == DEFAULT_DTYPE){

        CheckNTErrors(!x->isSparse && !y->isSparse, "the activation function (rectify) does not support sparse matrices.");
        CheckNTErrors(x->unitNum && y->unitNum, "we require two vectors with the same length.");

        int gridSize[3], blockSize[3];

        GDevs.GetCudaThread(x->devID, x->unitNum, gridSize, blockSize);

        int devIDBackup;
        ProtectCudaDev(x->devID, devIDBackup);

        KernelDropoutCompute<<<dim3(gridSize[0]), dim3(blockSize[0])>>>((DTYPE*)x->data, (DTYPE*)y->data, (DTYPE*)mask->data, scaleFactor, x->unitNum);

        BacktoCudaDev(x->devID, devIDBackup);
    }
    else
        ShowNTErrors("TODO!");
}

/* 
backward computation of dropout function (Cuda kernel)

dE/dx = dE/dy * dy/dx

>> dedy - dE/dy
>> dedx - dE/dx
>> m - mask indicator to set zero 
>> s - the scale factor
>> size - size of input/output
*/
__global__
void KernelDropoutBackward(DTYPE * dedy, DTYPE * dedx, 
                           DTYPE * m, DTYPE s, int size)
{
    int i = blockDim.x * blockIdx.x + threadIdx.x;

    if (i < size){
        dedx[i] = dedy[i] * m[i] * s;
    }
}

/* 
backward computation of dropout function (Cuda version)

dE/dx = dE/dy * dy/dx

>> y - output of the dropout function
>> x - input of the dropout function
>> dedy - dE/dy
>> dedx - dE/dx
>> mask - mask tensor to set 0
>> scaleFactor - the scale factor
*/
void _CudaDropoutBackward(const XTensor * y, const XTensor * x,
                          const XTensor * dedy, XTensor * dedx,
                          const XTensor * mask, DTYPE scaleFactor)
{
    int gridSize[3], blockSize[3];

    if(x->dataType == DEFAULT_DTYPE && y->dataType == DEFAULT_DTYPE){
        GDevs.GetCudaThread(x->devID, x->unitNum, gridSize, blockSize);

        int devIDBackup;
        ProtectCudaDev(x->devID, devIDBackup);

        /* dE/ds = dE/dy * dy/ds */
        KernelDropoutBackward<<<dim3(gridSize[0]),dim3(blockSize[0])>>>
                              ((DTYPE*)dedy->data, (DTYPE*)dedx->data, 
                               (DTYPE*)mask->data, scaleFactor, x->unitNum);

        BacktoCudaDev(x->devID, devIDBackup);
    }
    else
        ShowNTErrors("TODO!");
}

#endif

} // namespace nts(NiuTrans.Tensor)