implement Dropout with index to reduce mem

270d0ff6 · 姜雨帆 · 591d6121 · 270d0ff6 · 270d0ff6 · 270d0ff6
Commit 270d0ff6 authored Mar 23, 2019 by 姜雨帆
--- a/source/network/Main.cpp
+++ b/source/network/Main.cpp
@@ -42,9 +42,11 @@ using namespace transformer;

 int main( int argc, const char ** argv )
 {
+    //sample();
    //_CrtSetBreakAlloc(896);
    //BackwardTest();
    //return 0;
+    //Test();

    if(argc > 1 && !strcmp(argv[1], "-fnnlm"))
        FNNLMMain(argc - 1, argv + 1);
@@ -208,4 +210,4 @@ void SumDimTest()
    z.Dump(stderr, "z:");

    delete[] data;
-}
\ No newline at end of file
+}
--- a/source/network/XBackwardMath.cpp
+++ b/source/network/XBackwardMath.cpp
@@ -719,12 +719,18 @@ void XMathGrad::GradMultiply(XTensor * node, bool isEfficient)

    XTensor * a = income.tails[0]; 
    XTensor * b = income.tails[1];
-    XNoder::MakeGrad(a);
-    XNoder::MakeGrad(b);

    CheckNTErrors(XTensor::IsSameShaped(a, b), "Wrong sized input tensors!");
-    _Multiply(node->grad, b, a->grad, 1.0F);
-    _Multiply(node->grad, a, b->grad, 1.0F);
+
+    if (!isEfficient || a->isGrad) {
+        XNoder::MakeGrad(a);
+        _Multiply(node->grad, b, a->grad, 1.0F);
+    }
+
+    if (!isEfficient || b->isGrad) {
+        XNoder::MakeGrad(b);
+        _Multiply(node->grad, a, b->grad, 1.0F);;
+    }

    node->visitMark = NODE_FINISHED;
 }
@@ -889,88 +895,8 @@ gradient for normalize
 */
 void XMathGrad::GradNormalize(XTensor * node, bool isEfficient)
 {
-    ShowNTErrors("This is really a bad piece of code!!!");
+    ShowNTErrors("TODO!");
    
-    XLink &income = node->income;
-    CheckNTErrors(income.tailNum == 5, "Wrong input tensor number for NORMALIZE!");
-
-    XTensor * input = income.tails[0];
-    XTensor * mean = income.tails[1];
-    XTensor * var = income.tails[2];
-    XTensor * a = income.tails[3];
-    XTensor * b = income.tails[4];
-    XTensor * c = NewTensor(var);
-    XTensor * d = NewTensor(a);
-    XTensor * e = NewTensor(a);
-    XTensor * f = NewTensor(a);
-    XTensor * g = NewTensor(a);
-    XTensor * h = NewTensor(a);
-    XTensor * i = NewTensor(a);
-    XTensor * j = NewTensor(a);
-    XTensor * k = NewTensor(var);
-    XTensor * p = NewTensor(var);
-    XTensor * q = NewTensor(var);
-    XTensor * r = NewTensor(a);
-    XTensor * x = NewTensor(mean);
-    XTensor * y = NewTensor(mean);
-    XTensor * z = NewTensor(mean);
-    DTYPE epsilon = income.GetParam(1);
-
-    int dim = income.GetParamInt(0);
-    int n = a->GetDim(dim);
-    XNoder::MakeGrad(input);
-    XNoder::MakeGrad(mean);
-    XNoder::MakeGrad(var);
-    XNoder::MakeGrad(a);
-    XNoder::MakeGrad(b);
-
-    /* dEdinput */
-    _ScaleAndShift(var, c, 1.0F, epsilon);
-    _Unsqueeze(c, d, dim, n);
-    _Power(d, e, -0.5F);
-    _Multiply(a, e, f);
-    _Multiply(node->grad, f, input->grad, 1.0F);
-
-    /* dEdmean */
-    _ScaleAndShift(f, g, -1.0F);
-    _ReduceSum(g, x, dim);
-    _ReduceSum(node->grad, y, dim);
-    _Multiply(y, x, mean->grad, 1.0F);
-
-    /* dEdvar */
-    _Unsqueeze(mean, h, dim, n);
-    _Sub(input, h, i);
-    _Multiply(a, i, j);
-    _Power(var, k, -1.5F);
-    _ScaleAndShift(k, p, -0.5F);
-    _ReduceSum(j, z, dim);
-    _Multiply(z, p, q);
-    _Multiply(y, q, var->grad, 1.0F);
-
-    /* dEda */
-    _Multiply(i, e, r);
-    _Multiply(node->grad, r, a->grad, 1.0F);
-
-    /* dEdb */
-    _Sum(b->grad, node->grad, b->grad);
-
-    node->visitMark = NODE_FINISHED;
-
-    delete c;
-    delete d;
-    delete e;
-    delete f;
-    delete g;
-    delete h;
-    delete i;
-    delete j;
-    delete k;
-    delete p;
-    delete q;
-    delete r;
-    delete x;
-    delete y;
-    delete z;
 }

 /*

--- a/source/network/XBackwardShape.cpp
+++ b/source/network/XBackwardShape.cpp
@@ -43,6 +43,8 @@ void XShapeGrad::MakeGrad(XTensor * node, bool isEfficent)
        GradCopyIndexed(node, isEfficent);
    else if(operID == MOVEMENT_GATHER)
        GradGather(node, isEfficent);
+    else if (operID == MOVEMENT_DROPOUTWITHINDEX)
+        GradDropoutWithIndex(node, isEfficent);
    else if(operID == SHAPE_MERGE)
        GradMerge(node, isEfficent);
    else if(operID == SHAPE_MERGE_LIST)
@@ -115,7 +117,7 @@ dE/da = spreadforgather(b)
 void XShapeGrad::GradGather(XTensor * node, bool isEfficent)
 {
    XLink &income = node->income;
-    CheckNTErrors(income.tailNum > 0, "Wrong input tensor number for CopyIndexed!");
+    CheckNTErrors(income.tailNum > 0, "Wrong input tensor number for Gather!");

    XTensor * input = income.tails[0];
    XTensor * index = income.tails[1];
@@ -126,6 +128,43 @@ void XShapeGrad::GradGather(XTensor * node, bool isEfficent)
    node->visitMark = NODE_FINISHED;
 }

+/*
+gradient computation for DropoutWithIndex function
+*/
+void XShapeGrad::GradDropoutWithIndex(XTensor * node, bool isEfficent)
+{
+    XLink &income = node->income;
+    CheckNTErrors(income.tailNum > 0, "Wrong input tensor number for DropoutWithIndex!");
+
+    XTensor * input = income.tails[0];
+    XTensor * index = income.tails[1];
+    DTYPE scale = income.GetParam(0);
+    XNoder::MakeGrad(input);
+
+    //_Identity(node->grad, input->grad);
+    _CopyValues(node->grad, input->grad);
+
+    int order = node->grad->order;
+    int * dimSize = new int[order];
+
+    for (int i = 0; i < order; i++) {
+        dimSize[i] = node->grad->dimSize[i];
+    }
+
+    int order1 = 1;
+    int * dimSize1 = new int[order1];
+    dimSize1[0] = input->grad->unitNum;
+    
+    input->grad->Reshape(order1, dimSize1);
+
+    _DropoutWithIndex(node->grad, index, input->grad);
+    _ScaleAndShiftMe(input->grad, scale);
+
+    input->grad->Reshape(order, dimSize);
+
+    node->visitMark = NODE_FINISHED;
+}
+
 /* 
 gradient for merge
 for 

--- a/source/network/XBackwardShape.h
+++ b/source/network/XBackwardShape.h
@@ -54,6 +54,10 @@ private:
    static
    void GradGather(XTensor * node, bool isEfficent);

+    /* gradient computation for copying indexed sub-tensors: b = gather(a, index) */
+    static
+    void GradDropoutWithIndex(XTensor * node, bool isEfficent);
+
    /* gradient computation for merge: c = merge(a, b, ...) */
    static
    void GradMerge(XTensor * node, bool isEfficent);

--- a/source/network/XNet.cpp
+++ b/source/network/XNet.cpp
@@ -458,4 +458,4 @@ void XNet::ShowNetwork(FILE * file, XTensor * node)
    }
 }

-}
\ No newline at end of file
+}
--- a/source/tensor/XLink.cpp
+++ b/source/tensor/XLink.cpp
@@ -530,6 +530,88 @@ void XLink::Replace(const XTensor * oldOne, XTensor * newOne)
    }
 }

+
+/*
+copy a node with another, i.e., we add the links to the new node
+>> src - the node to be copied
+>> tgt - the new node
+*/
+void XLink::Copy(const XTensor * reference, XTensor * target)
+{
+    if (reference == NULL || target == NULL)
+        return;
+
+    XLink &newIncome = target->income;
+    XLink &newOutgo = target->outgo;
+
+    XLink::ClearOutgoing(target);
+    XLink::ClearIncoming(target);
+
+    /* incoming nodes */
+    if (reference->income.typeID != 0) {
+        if (newIncome.tailNum < reference->income.tailNum) {
+            delete[] newIncome.tails;
+            newIncome.tails = new XTensor*[reference->income.tailNum];
+        }
+
+        newIncome.SetType(reference->income.typeID);
+        newIncome.head = target;
+        newIncome.tailNum = reference->income.tailNum;
+        memcpy(newIncome.tails, reference->income.tails, sizeof(XTensor*) * newIncome.tailNum);
+
+        int paraArraySize = reference->income.paramNum * reference->income.paramSize;
+        newIncome.params = new char[paraArraySize];
+        memcpy(newIncome.params, reference->income.params, paraArraySize);
+        newIncome.paramNum = reference->income.paramNum;
+
+        /* update the link to each child node */
+        for (int i = 0; i < newIncome.tailNum; i++) {
+            XTensor * child = newIncome.tails[i];
+            XLink &childOutgo = child->outgo;
+            bool hit = false;
+            for (int j = 0; j < childOutgo.tailNum; j++) {
+                if (childOutgo.tails[j] == reference) {
+                    //childOutgo.tails[j] = target;
+                    childOutgo.AddTail(target);
+                    hit = true;
+                    break;
+                }
+            }
+
+            if (childOutgo.tailNum > 0) {
+                CheckNTErrors(hit, "No proper node found in child.outgo edge!");
+            }
+        }
+    }
+
+    if (newOutgo.tailNum < reference->outgo.tailNum) {
+        delete[] newOutgo.tails;
+        newOutgo.tails = new XTensor*[reference->outgo.tailNum];
+    }
+
+    /* outgoing nodes */
+    newOutgo.head = target;
+    newOutgo.tailNum = reference->outgo.tailNum;
+    memcpy(newOutgo.tails, reference->outgo.tails, sizeof(XTensor*) * newOutgo.tailNum);
+
+    /* update the link to each parent node */
+    for (int i = 0; i < newOutgo.tailNum; i++) {
+        XTensor * parent = newOutgo.tails[i];
+        XLink &parentIncome = parent->income;
+        bool hit = false;
+        for (int j = 0; j < parentIncome.tailNum; j++) {
+            if (parentIncome.tails[j] == reference) {
+                //parentIncome.tails[j] = target;
+                parentIncome.AddTail(target);
+                hit = true;
+            }
+        }
+
+        if (parentIncome.tailNum > 0) {
+            CheckNTErrors(hit, "No proper node found in parent.income edge!");
+        }
+    }
+}
 /* 
 copy incoming edges of a given node
 >> reference - the node we copy from

--- a/source/tensor/XLink.h
+++ b/source/tensor/XLink.h
@@ -33,7 +33,7 @@ namespace nts{ // namespace nts(NiuTrans.Tensor)
 /* cross reference */
 struct XTensor;

-#define MAX_OP_NAME_LENGTH 16
+#define MAX_OP_NAME_LENGTH 64
 #define PARAM_UNTI_SIZE    64

 /*
@@ -138,7 +138,7 @@ struct XLink
    static
    void MakeLink(const XTensor * t1, const XTensor * t2, XTensor * h, int id);

-    /* create a hyper edge with three input tensors and a output tensor */
+    /* create a hyper edge with two input tensors and a output tensor */
    static
    void MakeLink(const XTensor * t1, const XTensor * t2, const XTensor * t3, XTensor * h, int id);

@@ -174,6 +174,10 @@ struct XLink
    static 
    void Replace(const XTensor * oldOne, XTensor * newOne);

+    /* copy a node with another, i.e., we add the links to the new node */
+    static
+    void Copy(const XTensor * reference, XTensor * target);
+
    /* copy links of a given node */
    static
    void CopyIncoming(const XTensor * reference, XTensor * target);

--- a/source/tensor/XName.cpp
+++ b/source/tensor/XName.cpp
@@ -111,6 +111,8 @@ const char * GetOPName(int type)
            return "M_COPYVALUES";
        else if (type == MOVEMENT_GATHER)
            return "M_GATHER";
+        else if (type == MOVEMENT_DROPOUTWITHINDEX)
+            return "M_DROPOUTWITHINDEX";
        else if (type == SHAPE_CONCATENATE)
            return "S_CONCATENATE";
        else if (type == SHAPE_MERGE)

--- a/source/tensor/XName.h
+++ b/source/tensor/XName.h
@@ -57,7 +57,7 @@ namespace nts { // namespace nts(NiuTrans.Tensor)
 #define MATH_NORMALIZE          MATH_NEGATE + 1
 #define MATH_POWER              MATH_NORMALIZE + 1
 #define MATH_SCALEANDSHIFT      MATH_POWER + 1
-#define MATH_MULANDSHIFT        MATH_SCALEANDSHIFT + 1
+#define MATH_MULANDSHIFT          MATH_SCALEANDSHIFT + 1
 #define MATH_SIGN               MATH_MULANDSHIFT + 1
 #define MATH_SUB                MATH_SIGN + 1
 #define MATH_SUBDIM             MATH_SUB + 1
@@ -81,8 +81,9 @@ namespace nts { // namespace nts(NiuTrans.Tensor)
 #define MOVEMENT_COPYINDEXED    MOVEMENT + 1
 #define MOVEMENT_COPYVALUES     MOVEMENT_COPYINDEXED + 1
 #define MOVEMENT_GATHER         MOVEMENT_COPYVALUES + 1
+#define MOVEMENT_DROPOUTWITHINDEX         MOVEMENT_GATHER + 1

-#define SHAPE                   MOVEMENT_GATHER + 1
+#define SHAPE                   MOVEMENT_DROPOUTWITHINDEX + 1
 #define SHAPE_CONCATENATE       SHAPE + 1
 #define SHAPE_MERGE             SHAPE_CONCATENATE + 1
 #define SHAPE_MERGE_LIST        SHAPE_MERGE + 1

--- a/source/tensor/XTensor.cpp
+++ b/source/tensor/XTensor.cpp
@@ -211,7 +211,6 @@ XTensor::~XTensor()
        
        XLink::Replace(this, newTensor);
    }
-    
    XLink::ClearOutgoing(this);
    XLink::ClearIncoming(this);
    
@@ -294,7 +293,7 @@ void XTensor::ShallowCopy(const XTensor &tensor)
 /* overloading of the equal-sign */
 XTensor& XTensor::operator= (const XTensor& tensor)
 {
-    
+
    /* we must make a hard copy of the tensor if it is the input
       of another node. */
    if(outgo.tailNum > 0){
@@ -364,6 +363,7 @@ XTensor& XTensor::operator= (const XTensor& tensor)

        /* create tensor links for the new tensor */
        XLink::Replace(&tensor, this);
+        //XLink::Copy(&tensor, this);
    }

    return *this;

--- a/source/tensor/core/CHeader.h
+++ b/source/tensor/core/CHeader.h
@@ -95,4 +95,5 @@
 #include "utilities/XMatrixSegment.h"
 #include "utilities/FlushToMem.h"

+#include "../function/DropoutWithIndex.h"
 #endif // __CHEADER_H__
--- a/source/tensor/core/getandset/OnehotAndIndex.cu
+++ b/source/tensor/core/getandset/OnehotAndIndex.cu
@@ -155,4 +155,4 @@ void _CudaIndexToOnehot(XTensor * index, XTensor * onehot, int size, float confi

 #endif // USE_CUDA

-} // namespace nts(NiuTrans.Tensor)
\ No newline at end of file
+} // namespace nts(NiuTrans.Tensor)
--- a/source/tensor/function/Dropout.cpp
+++ b/source/tensor/function/Dropout.cpp
@@ -27,6 +27,7 @@
 #include "../core/arithmetic/MultiplyDim.h"
 #include "../core/math/ScaleAndShift.h"
 #include "../core/getandset/SetData.h"
+#include "DropoutWithIndex.h"

 namespace nts{ // namespace nts(NiuTrans.Tensor

@@ -147,16 +148,34 @@ XTensor Dropout(const XTensor &x, DTYPE dropProb, int leadingDim, int leadingDim
    CheckNTErrors(dropProb >= 0.0 && dropProb <= 1.0, "The probability must be 0-1!");

    XTensor mask;
+    int * maskArrayInt = NULL;
    DTYPE * maskArray = NULL;
    DTYPE scaleFactor = (DTYPE)1.0 / ((DTYPE)1.0 - dropProb);

    if(leadingDim < 0 && leadingDim2 < 0){
-        XTensor mask;
-        InitTensor(&mask, &x);
+        //XTensor mask;
+        //InitTensor(&mask, &x);
+
+        //_SetDataRandP(&mask, 0, 1.0F, dropProb, scaleFactor);
+
+        //return Multiply(x, mask);
+
+        /* dropout with index */
+        int unitNum = floor(x.unitNum*dropProb);
+        maskArrayInt = new int[unitNum];
+
+        for (int i = 0; i < unitNum; i++)
+            maskArrayInt[i] = rand() % x.unitNum;
+
+        XTensor maskindex;
+        InitTensor1D(&maskindex, unitNum, X_INT, x.devID, x.mem);
+
+        maskindex.SetData(maskArrayInt, unitNum);

-        _SetDataRandP(&mask, 0, 1.0F, dropProb, scaleFactor);
+        delete[] maskArrayInt;
+
+        return DropoutWithIndex(x, maskindex, scaleFactor);

-        return Multiply(x, mask);
    }
    else if(leadingDim2 < 0){
        int n = leadingDim;
@@ -209,7 +228,6 @@ XTensor Dropout(const XTensor &x, DTYPE dropProb, int leadingDim, int leadingDim
        return MultiplyBroadcast(x, mask);
    }

-    
 }

 /* 

--- a/source/tensor/function/DropoutWithIndex.cpp
+++ b/source/tensor/function/DropoutWithIndex.cpp
+/* NiuTrans.Tensor - an open-source tensor library
+* Copyright (C) 2017, Natural Language Processing Lab, Northestern University.
+* All rights reserved.
+*
+* Licensed under the Apache License, Version 2.0 (the "License");
+* you may not use this file except in compliance with the License.
+* You may obtain a copy of the License at
+*
+*   http://www.apache.org/licenses/LICENSE-2.0
+*
+* Unless required by applicable law or agreed to in writing, software
+* distributed under the License is distributed on an "AS IS" BASIS,
+* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+* See the License for the specific language governing permissions and
+* limitations under the License.
+*/
+
+/*
+* $Created by: Jiang Yufan (email: jiangyufan2018@outlook.com) 2019-03-20
+*/
+
+#include "DropoutWithIndex.h"
+#include "DropoutWithIndex.cuh"
+#include "../core/CHeader.h"
+#include "../XName.h"
+#include "Identity.h"
+
+namespace nts {
+
+/*
+This is a special implementation of "dropout" to reduce memory with maskIndex.
+
+>> x - input tensor
+>> maskIndex - mask index tensor
+>> c - output tensor
+*/
+void _DropoutWithIndex(const XTensor * x, XTensor * maskIndex, XTensor * c)
+{
+    CheckNTErrors(maskIndex->order == 1, "Illegal tensor order!");
+
+#ifdef USE_CUDA
+    if (maskIndex->devID >= 0 || x->devID >= 0 || c->devID >= 0) {
+        _CudaDropoutWithIndex(x, maskIndex, c);
+        return;
+    }
+#endif
+
+    // TODO!!
+    ShowNTErrors("TODO!");
+}
+
+/*
+This is a special implementation of "dropout" to reduce memory with maskIndex.
+
+>> x - input tensor
+>> maskIndex - mask index tensor
+>> c - output tensor
+>> scale - scale factor
+*/
+XTensor DropoutWithIndex(const XTensor &x, XTensor &maskIndex, DTYPE scale)
+{
+    XTensor c;
+
+    int order = x.order;
+    int * dimSize = new int[order];
+
+    for (int i = 0; i < order; i++) {
+        dimSize[i] = x.dimSize[i];
+    }
+
+    InitTensor1D(&c, x.unitNum, x.dataType, x.devID, x.mem);
+
+    _SetDataFixedFloat(&c, 1.0F);
+
+    _DropoutWithIndex(&x, &maskIndex, &c);
+
+    c.Reshape(order, dimSize);
+
+    _MultiplyMe(&c, &x);
+
+    _ScaleAndShiftMe(&c, scale);
+
+    /* tensor connections */
+    XLink::MakeLink(&x, &maskIndex, &c, MOVEMENT_DROPOUTWITHINDEX);
+    XLink::AddParamToHead(&c, scale);
+
+    return c;
+}
+
+}// namespace nts(NiuTrans.Tensor)
\ No newline at end of file
--- a/source/tensor/function/DropoutWithIndex.cu
+++ b/source/tensor/function/DropoutWithIndex.cu
+/* NiuTrans.Tensor - an open-source tensor library
+* Copyright (C) 2017, Natural Language Processing Lab, Northestern University.
+* All rights reserved.
+*
+* Licensed under the Apache License, Version 2.0 (the "License");
+* you may not use this file except in compliance with the License.
+* You may obtain a copy of the License at
+*
+*   http://www.apache.org/licenses/LICENSE-2.0
+*
+* Unless required by applicable law or agreed to in writing, software
+* distributed under the License is distributed on an "AS IS" BASIS,
+* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+* See the License for the specific language governing permissions and
+* limitations under the License.
+*/
+
+/*
+* $Created by: Jiang Yufan (email: jiangyufan2018@outlook.com) 2019-03-20
+*/
+
+#include "DropoutWithIndex.cuh"
+#include "../XDevice.h"
+
+namespace nts { // namespace nts(NiuTrans.Tensor)
+
+#ifdef USE_CUDA
+__global__
+
+/*
+This is a special implementation of "dropout" to reduce memory with maskIndex.
+
+>> tData - the data pointer of the target tensor
+>> sIndex - mask index
+>> size - the size of the sIndex
+*/
+void KernelDropoutWithIndex1D(DTYPE * tData, int * sIndex, int size)
+{
+    /* block id */
+    int i = blockDim.x * blockIdx.x + threadIdx.x;
+    
+    DTYPE * t = tData;
+    
+    if (i < size) {
+
+        int id = sIndex[i];
+
+        t[id] = DTYPE(0.0F);
+    }
+    
+}
+
+/*
+This is a special implementation of "dropout" to reduce memory with maskIndex.
+
+>> x - input tensor
+>> maskIndex - mask index tensor
+>> c - output tensor
+*/
+void _CudaDropoutWithIndex(const XTensor * x, XTensor * maskIndex, XTensor * c)
+{
+    int devID = c->devID;
+
+    int blockNum = maskIndex->unitNum;
+
+    int cudaGrids[3];
+    int cudaBlocks[3];
+
+    int devIDBackup;
+    ProtectCudaDev(devID, devIDBackup);
+
+    GDevs.GetCudaThread(devID, blockNum, cudaGrids, cudaBlocks);
+
+    dim3 blocks(cudaGrids[0]);
+    dim3 threads(cudaBlocks[0]);
+
+    DTYPE * tData = (DTYPE*)c->data;
+    int * sIndex = NULL;
+
+    sIndex = (int *)maskIndex->data;
+
+    KernelDropoutWithIndex1D <<<blocks, threads >>>(tData, sIndex, blockNum);
+
+    BacktoCudaDev(devID, devIDBackup);
+}
+
+#endif // USE_CUDA
+
+} // namespace nts(NiuTrans.Tensor)
\ No newline at end of file
--- a/source/tensor/function/DropoutWithIndex.cuh
+++ b/source/tensor/function/DropoutWithIndex.cuh
+/* NiuTrans.Tensor - an open-source tensor library
+* Copyright (C) 2017, Natural Language Processing Lab, Northestern University.
+* All rights reserved.
+*
+* Licensed under the Apache License, Version 2.0 (the "License");
+* you may not use this file except in compliance with the License.
+* You may obtain a copy of the License at
+*
+*   http://www.apache.org/licenses/LICENSE-2.0
+*
+* Unless required by applicable law or agreed to in writing, software
+* distributed under the License is distributed on an "AS IS" BASIS,
+* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+* See the License for the specific language governing permissions and
+* limitations under the License.
+*/
+
+/*
+* $Created by: Jiang Yufan (email: jiangyufan2018@outlook.com) 2019-03-20
+*/
+
+#ifndef __DROPOUTWITHINDEX_CUH__
+#define __DROPOUTWITHINDEX_CUH__
+
+#include "../XTensor.h"
+#include "DropoutWithIndex.h"
+
+namespace nts { // namespace nts(NiuTrans.Tensor)
+
+#ifdef USE_CUDA
+
+/* dropout with index (cuda version) */
+void _CudaDropoutWithIndex(const XTensor * x, XTensor * maskIndex, XTensor * c);
+
+#endif // USE_CUDA
+
+} // namespace nts(NiuTrans.Tensor)
+
+#endif // __DROPOUTWITHINDEX_CUH__
--- a/source/tensor/function/DropoutWithIndex.h
+++ b/source/tensor/function/DropoutWithIndex.h
+/* NiuTrans.Tensor - an open-source tensor library
+* Copyright (C) 2017, Natural Language Processing Lab, Northestern University.
+* All rights reserved.
+*
+* Licensed under the Apache License, Version 2.0 (the "License");
+* you may not use this file except in compliance with the License.
+* You may obtain a copy of the License at
+*
+*   http://www.apache.org/licenses/LICENSE-2.0
+*
+* Unless required by applicable law or agreed to in writing, software
+* distributed under the License is distributed on an "AS IS" BASIS,
+* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+* See the License for the specific language governing permissions and
+* limitations under the License.
+*/
+
+/*
+* $Created by: Jiang Yufan (email: jiangyufan2018@outlook.com) 2019-03-20
+*/
+
+#ifndef __DROPOUTWITHINDEX_H__
+#define __DROPOUTWITHINDEX_H__
+
+#include "../XTensor.h"
+
+namespace nts {
+
+void _DropoutWithIndex(const XTensor * x, XTensor * maskIndex, XTensor * c);
+
+XTensor DropoutWithIndex(const XTensor &x, XTensor &mask, DTYPE scale);
+
+} // namespace nts(NiuTrans.Tensor)
+
+#endif // !__DROPOUTWITHINDEX_H__
+