Commit 270d0ff6 by 姜雨帆

implement Dropout with index to reduce mem

parent 591d6121
...@@ -42,9 +42,11 @@ using namespace transformer; ...@@ -42,9 +42,11 @@ using namespace transformer;
int main( int argc, const char ** argv ) int main( int argc, const char ** argv )
{ {
//sample();
//_CrtSetBreakAlloc(896); //_CrtSetBreakAlloc(896);
//BackwardTest(); //BackwardTest();
//return 0; //return 0;
//Test();
if(argc > 1 && !strcmp(argv[1], "-fnnlm")) if(argc > 1 && !strcmp(argv[1], "-fnnlm"))
FNNLMMain(argc - 1, argv + 1); FNNLMMain(argc - 1, argv + 1);
...@@ -208,4 +210,4 @@ void SumDimTest() ...@@ -208,4 +210,4 @@ void SumDimTest()
z.Dump(stderr, "z:"); z.Dump(stderr, "z:");
delete[] data; delete[] data;
} }
\ No newline at end of file
...@@ -719,12 +719,18 @@ void XMathGrad::GradMultiply(XTensor * node, bool isEfficient) ...@@ -719,12 +719,18 @@ void XMathGrad::GradMultiply(XTensor * node, bool isEfficient)
XTensor * a = income.tails[0]; XTensor * a = income.tails[0];
XTensor * b = income.tails[1]; XTensor * b = income.tails[1];
XNoder::MakeGrad(a);
XNoder::MakeGrad(b);
CheckNTErrors(XTensor::IsSameShaped(a, b), "Wrong sized input tensors!"); CheckNTErrors(XTensor::IsSameShaped(a, b), "Wrong sized input tensors!");
_Multiply(node->grad, b, a->grad, 1.0F);
_Multiply(node->grad, a, b->grad, 1.0F); if (!isEfficient || a->isGrad) {
XNoder::MakeGrad(a);
_Multiply(node->grad, b, a->grad, 1.0F);
}
if (!isEfficient || b->isGrad) {
XNoder::MakeGrad(b);
_Multiply(node->grad, a, b->grad, 1.0F);;
}
node->visitMark = NODE_FINISHED; node->visitMark = NODE_FINISHED;
} }
...@@ -889,88 +895,8 @@ gradient for normalize ...@@ -889,88 +895,8 @@ gradient for normalize
*/ */
void XMathGrad::GradNormalize(XTensor * node, bool isEfficient) void XMathGrad::GradNormalize(XTensor * node, bool isEfficient)
{ {
ShowNTErrors("This is really a bad piece of code!!!"); ShowNTErrors("TODO!");
XLink &income = node->income;
CheckNTErrors(income.tailNum == 5, "Wrong input tensor number for NORMALIZE!");
XTensor * input = income.tails[0];
XTensor * mean = income.tails[1];
XTensor * var = income.tails[2];
XTensor * a = income.tails[3];
XTensor * b = income.tails[4];
XTensor * c = NewTensor(var);
XTensor * d = NewTensor(a);
XTensor * e = NewTensor(a);
XTensor * f = NewTensor(a);
XTensor * g = NewTensor(a);
XTensor * h = NewTensor(a);
XTensor * i = NewTensor(a);
XTensor * j = NewTensor(a);
XTensor * k = NewTensor(var);
XTensor * p = NewTensor(var);
XTensor * q = NewTensor(var);
XTensor * r = NewTensor(a);
XTensor * x = NewTensor(mean);
XTensor * y = NewTensor(mean);
XTensor * z = NewTensor(mean);
DTYPE epsilon = income.GetParam(1);
int dim = income.GetParamInt(0);
int n = a->GetDim(dim);
XNoder::MakeGrad(input);
XNoder::MakeGrad(mean);
XNoder::MakeGrad(var);
XNoder::MakeGrad(a);
XNoder::MakeGrad(b);
/* dEdinput */
_ScaleAndShift(var, c, 1.0F, epsilon);
_Unsqueeze(c, d, dim, n);
_Power(d, e, -0.5F);
_Multiply(a, e, f);
_Multiply(node->grad, f, input->grad, 1.0F);
/* dEdmean */
_ScaleAndShift(f, g, -1.0F);
_ReduceSum(g, x, dim);
_ReduceSum(node->grad, y, dim);
_Multiply(y, x, mean->grad, 1.0F);
/* dEdvar */
_Unsqueeze(mean, h, dim, n);
_Sub(input, h, i);
_Multiply(a, i, j);
_Power(var, k, -1.5F);
_ScaleAndShift(k, p, -0.5F);
_ReduceSum(j, z, dim);
_Multiply(z, p, q);
_Multiply(y, q, var->grad, 1.0F);
/* dEda */
_Multiply(i, e, r);
_Multiply(node->grad, r, a->grad, 1.0F);
/* dEdb */
_Sum(b->grad, node->grad, b->grad);
node->visitMark = NODE_FINISHED;
delete c;
delete d;
delete e;
delete f;
delete g;
delete h;
delete i;
delete j;
delete k;
delete p;
delete q;
delete r;
delete x;
delete y;
delete z;
} }
/* /*
......
...@@ -43,6 +43,8 @@ void XShapeGrad::MakeGrad(XTensor * node, bool isEfficent) ...@@ -43,6 +43,8 @@ void XShapeGrad::MakeGrad(XTensor * node, bool isEfficent)
GradCopyIndexed(node, isEfficent); GradCopyIndexed(node, isEfficent);
else if(operID == MOVEMENT_GATHER) else if(operID == MOVEMENT_GATHER)
GradGather(node, isEfficent); GradGather(node, isEfficent);
else if (operID == MOVEMENT_DROPOUTWITHINDEX)
GradDropoutWithIndex(node, isEfficent);
else if(operID == SHAPE_MERGE) else if(operID == SHAPE_MERGE)
GradMerge(node, isEfficent); GradMerge(node, isEfficent);
else if(operID == SHAPE_MERGE_LIST) else if(operID == SHAPE_MERGE_LIST)
...@@ -115,7 +117,7 @@ dE/da = spreadforgather(b) ...@@ -115,7 +117,7 @@ dE/da = spreadforgather(b)
void XShapeGrad::GradGather(XTensor * node, bool isEfficent) void XShapeGrad::GradGather(XTensor * node, bool isEfficent)
{ {
XLink &income = node->income; XLink &income = node->income;
CheckNTErrors(income.tailNum > 0, "Wrong input tensor number for CopyIndexed!"); CheckNTErrors(income.tailNum > 0, "Wrong input tensor number for Gather!");
XTensor * input = income.tails[0]; XTensor * input = income.tails[0];
XTensor * index = income.tails[1]; XTensor * index = income.tails[1];
...@@ -126,6 +128,43 @@ void XShapeGrad::GradGather(XTensor * node, bool isEfficent) ...@@ -126,6 +128,43 @@ void XShapeGrad::GradGather(XTensor * node, bool isEfficent)
node->visitMark = NODE_FINISHED; node->visitMark = NODE_FINISHED;
} }
/*
gradient computation for DropoutWithIndex function
*/
void XShapeGrad::GradDropoutWithIndex(XTensor * node, bool isEfficent)
{
XLink &income = node->income;
CheckNTErrors(income.tailNum > 0, "Wrong input tensor number for DropoutWithIndex!");
XTensor * input = income.tails[0];
XTensor * index = income.tails[1];
DTYPE scale = income.GetParam(0);
XNoder::MakeGrad(input);
//_Identity(node->grad, input->grad);
_CopyValues(node->grad, input->grad);
int order = node->grad->order;
int * dimSize = new int[order];
for (int i = 0; i < order; i++) {
dimSize[i] = node->grad->dimSize[i];
}
int order1 = 1;
int * dimSize1 = new int[order1];
dimSize1[0] = input->grad->unitNum;
input->grad->Reshape(order1, dimSize1);
_DropoutWithIndex(node->grad, index, input->grad);
_ScaleAndShiftMe(input->grad, scale);
input->grad->Reshape(order, dimSize);
node->visitMark = NODE_FINISHED;
}
/* /*
gradient for merge gradient for merge
for for
......
...@@ -54,6 +54,10 @@ private: ...@@ -54,6 +54,10 @@ private:
static static
void GradGather(XTensor * node, bool isEfficent); void GradGather(XTensor * node, bool isEfficent);
/* gradient computation for copying indexed sub-tensors: b = gather(a, index) */
static
void GradDropoutWithIndex(XTensor * node, bool isEfficent);
/* gradient computation for merge: c = merge(a, b, ...) */ /* gradient computation for merge: c = merge(a, b, ...) */
static static
void GradMerge(XTensor * node, bool isEfficent); void GradMerge(XTensor * node, bool isEfficent);
......
...@@ -458,4 +458,4 @@ void XNet::ShowNetwork(FILE * file, XTensor * node) ...@@ -458,4 +458,4 @@ void XNet::ShowNetwork(FILE * file, XTensor * node)
} }
} }
} }
\ No newline at end of file
...@@ -530,6 +530,88 @@ void XLink::Replace(const XTensor * oldOne, XTensor * newOne) ...@@ -530,6 +530,88 @@ void XLink::Replace(const XTensor * oldOne, XTensor * newOne)
} }
} }
/*
copy a node with another, i.e., we add the links to the new node
>> src - the node to be copied
>> tgt - the new node
*/
void XLink::Copy(const XTensor * reference, XTensor * target)
{
if (reference == NULL || target == NULL)
return;
XLink &newIncome = target->income;
XLink &newOutgo = target->outgo;
XLink::ClearOutgoing(target);
XLink::ClearIncoming(target);
/* incoming nodes */
if (reference->income.typeID != 0) {
if (newIncome.tailNum < reference->income.tailNum) {
delete[] newIncome.tails;
newIncome.tails = new XTensor*[reference->income.tailNum];
}
newIncome.SetType(reference->income.typeID);
newIncome.head = target;
newIncome.tailNum = reference->income.tailNum;
memcpy(newIncome.tails, reference->income.tails, sizeof(XTensor*) * newIncome.tailNum);
int paraArraySize = reference->income.paramNum * reference->income.paramSize;
newIncome.params = new char[paraArraySize];
memcpy(newIncome.params, reference->income.params, paraArraySize);
newIncome.paramNum = reference->income.paramNum;
/* update the link to each child node */
for (int i = 0; i < newIncome.tailNum; i++) {
XTensor * child = newIncome.tails[i];
XLink &childOutgo = child->outgo;
bool hit = false;
for (int j = 0; j < childOutgo.tailNum; j++) {
if (childOutgo.tails[j] == reference) {
//childOutgo.tails[j] = target;
childOutgo.AddTail(target);
hit = true;
break;
}
}
if (childOutgo.tailNum > 0) {
CheckNTErrors(hit, "No proper node found in child.outgo edge!");
}
}
}
if (newOutgo.tailNum < reference->outgo.tailNum) {
delete[] newOutgo.tails;
newOutgo.tails = new XTensor*[reference->outgo.tailNum];
}
/* outgoing nodes */
newOutgo.head = target;
newOutgo.tailNum = reference->outgo.tailNum;
memcpy(newOutgo.tails, reference->outgo.tails, sizeof(XTensor*) * newOutgo.tailNum);
/* update the link to each parent node */
for (int i = 0; i < newOutgo.tailNum; i++) {
XTensor * parent = newOutgo.tails[i];
XLink &parentIncome = parent->income;
bool hit = false;
for (int j = 0; j < parentIncome.tailNum; j++) {
if (parentIncome.tails[j] == reference) {
//parentIncome.tails[j] = target;
parentIncome.AddTail(target);
hit = true;
}
}
if (parentIncome.tailNum > 0) {
CheckNTErrors(hit, "No proper node found in parent.income edge!");
}
}
}
/* /*
copy incoming edges of a given node copy incoming edges of a given node
>> reference - the node we copy from >> reference - the node we copy from
......
...@@ -33,7 +33,7 @@ namespace nts{ // namespace nts(NiuTrans.Tensor) ...@@ -33,7 +33,7 @@ namespace nts{ // namespace nts(NiuTrans.Tensor)
/* cross reference */ /* cross reference */
struct XTensor; struct XTensor;
#define MAX_OP_NAME_LENGTH 16 #define MAX_OP_NAME_LENGTH 64
#define PARAM_UNTI_SIZE 64 #define PARAM_UNTI_SIZE 64
/* /*
...@@ -138,7 +138,7 @@ struct XLink ...@@ -138,7 +138,7 @@ struct XLink
static static
void MakeLink(const XTensor * t1, const XTensor * t2, XTensor * h, int id); void MakeLink(const XTensor * t1, const XTensor * t2, XTensor * h, int id);
/* create a hyper edge with three input tensors and a output tensor */ /* create a hyper edge with two input tensors and a output tensor */
static static
void MakeLink(const XTensor * t1, const XTensor * t2, const XTensor * t3, XTensor * h, int id); void MakeLink(const XTensor * t1, const XTensor * t2, const XTensor * t3, XTensor * h, int id);
...@@ -174,6 +174,10 @@ struct XLink ...@@ -174,6 +174,10 @@ struct XLink
static static
void Replace(const XTensor * oldOne, XTensor * newOne); void Replace(const XTensor * oldOne, XTensor * newOne);
/* copy a node with another, i.e., we add the links to the new node */
static
void Copy(const XTensor * reference, XTensor * target);
/* copy links of a given node */ /* copy links of a given node */
static static
void CopyIncoming(const XTensor * reference, XTensor * target); void CopyIncoming(const XTensor * reference, XTensor * target);
......
...@@ -111,6 +111,8 @@ const char * GetOPName(int type) ...@@ -111,6 +111,8 @@ const char * GetOPName(int type)
return "M_COPYVALUES"; return "M_COPYVALUES";
else if (type == MOVEMENT_GATHER) else if (type == MOVEMENT_GATHER)
return "M_GATHER"; return "M_GATHER";
else if (type == MOVEMENT_DROPOUTWITHINDEX)
return "M_DROPOUTWITHINDEX";
else if (type == SHAPE_CONCATENATE) else if (type == SHAPE_CONCATENATE)
return "S_CONCATENATE"; return "S_CONCATENATE";
else if (type == SHAPE_MERGE) else if (type == SHAPE_MERGE)
......
...@@ -57,7 +57,7 @@ namespace nts { // namespace nts(NiuTrans.Tensor) ...@@ -57,7 +57,7 @@ namespace nts { // namespace nts(NiuTrans.Tensor)
#define MATH_NORMALIZE MATH_NEGATE + 1 #define MATH_NORMALIZE MATH_NEGATE + 1
#define MATH_POWER MATH_NORMALIZE + 1 #define MATH_POWER MATH_NORMALIZE + 1
#define MATH_SCALEANDSHIFT MATH_POWER + 1 #define MATH_SCALEANDSHIFT MATH_POWER + 1
#define MATH_MULANDSHIFT MATH_SCALEANDSHIFT + 1 #define MATH_MULANDSHIFT MATH_SCALEANDSHIFT + 1
#define MATH_SIGN MATH_MULANDSHIFT + 1 #define MATH_SIGN MATH_MULANDSHIFT + 1
#define MATH_SUB MATH_SIGN + 1 #define MATH_SUB MATH_SIGN + 1
#define MATH_SUBDIM MATH_SUB + 1 #define MATH_SUBDIM MATH_SUB + 1
...@@ -81,8 +81,9 @@ namespace nts { // namespace nts(NiuTrans.Tensor) ...@@ -81,8 +81,9 @@ namespace nts { // namespace nts(NiuTrans.Tensor)
#define MOVEMENT_COPYINDEXED MOVEMENT + 1 #define MOVEMENT_COPYINDEXED MOVEMENT + 1
#define MOVEMENT_COPYVALUES MOVEMENT_COPYINDEXED + 1 #define MOVEMENT_COPYVALUES MOVEMENT_COPYINDEXED + 1
#define MOVEMENT_GATHER MOVEMENT_COPYVALUES + 1 #define MOVEMENT_GATHER MOVEMENT_COPYVALUES + 1
#define MOVEMENT_DROPOUTWITHINDEX MOVEMENT_GATHER + 1
#define SHAPE MOVEMENT_GATHER + 1 #define SHAPE MOVEMENT_DROPOUTWITHINDEX + 1
#define SHAPE_CONCATENATE SHAPE + 1 #define SHAPE_CONCATENATE SHAPE + 1
#define SHAPE_MERGE SHAPE_CONCATENATE + 1 #define SHAPE_MERGE SHAPE_CONCATENATE + 1
#define SHAPE_MERGE_LIST SHAPE_MERGE + 1 #define SHAPE_MERGE_LIST SHAPE_MERGE + 1
......
...@@ -211,7 +211,6 @@ XTensor::~XTensor() ...@@ -211,7 +211,6 @@ XTensor::~XTensor()
XLink::Replace(this, newTensor); XLink::Replace(this, newTensor);
} }
XLink::ClearOutgoing(this); XLink::ClearOutgoing(this);
XLink::ClearIncoming(this); XLink::ClearIncoming(this);
...@@ -294,7 +293,7 @@ void XTensor::ShallowCopy(const XTensor &tensor) ...@@ -294,7 +293,7 @@ void XTensor::ShallowCopy(const XTensor &tensor)
/* overloading of the equal-sign */ /* overloading of the equal-sign */
XTensor& XTensor::operator= (const XTensor& tensor) XTensor& XTensor::operator= (const XTensor& tensor)
{ {
/* we must make a hard copy of the tensor if it is the input /* we must make a hard copy of the tensor if it is the input
of another node. */ of another node. */
if(outgo.tailNum > 0){ if(outgo.tailNum > 0){
...@@ -364,6 +363,7 @@ XTensor& XTensor::operator= (const XTensor& tensor) ...@@ -364,6 +363,7 @@ XTensor& XTensor::operator= (const XTensor& tensor)
/* create tensor links for the new tensor */ /* create tensor links for the new tensor */
XLink::Replace(&tensor, this); XLink::Replace(&tensor, this);
//XLink::Copy(&tensor, this);
} }
return *this; return *this;
......
...@@ -95,4 +95,5 @@ ...@@ -95,4 +95,5 @@
#include "utilities/XMatrixSegment.h" #include "utilities/XMatrixSegment.h"
#include "utilities/FlushToMem.h" #include "utilities/FlushToMem.h"
#include "../function/DropoutWithIndex.h"
#endif // __CHEADER_H__ #endif // __CHEADER_H__
...@@ -155,4 +155,4 @@ void _CudaIndexToOnehot(XTensor * index, XTensor * onehot, int size, float confi ...@@ -155,4 +155,4 @@ void _CudaIndexToOnehot(XTensor * index, XTensor * onehot, int size, float confi
#endif // USE_CUDA #endif // USE_CUDA
} // namespace nts(NiuTrans.Tensor) } // namespace nts(NiuTrans.Tensor)
\ No newline at end of file
...@@ -27,6 +27,7 @@ ...@@ -27,6 +27,7 @@
#include "../core/arithmetic/MultiplyDim.h" #include "../core/arithmetic/MultiplyDim.h"
#include "../core/math/ScaleAndShift.h" #include "../core/math/ScaleAndShift.h"
#include "../core/getandset/SetData.h" #include "../core/getandset/SetData.h"
#include "DropoutWithIndex.h"
namespace nts{ // namespace nts(NiuTrans.Tensor namespace nts{ // namespace nts(NiuTrans.Tensor
...@@ -147,16 +148,34 @@ XTensor Dropout(const XTensor &x, DTYPE dropProb, int leadingDim, int leadingDim ...@@ -147,16 +148,34 @@ XTensor Dropout(const XTensor &x, DTYPE dropProb, int leadingDim, int leadingDim
CheckNTErrors(dropProb >= 0.0 && dropProb <= 1.0, "The probability must be 0-1!"); CheckNTErrors(dropProb >= 0.0 && dropProb <= 1.0, "The probability must be 0-1!");
XTensor mask; XTensor mask;
int * maskArrayInt = NULL;
DTYPE * maskArray = NULL; DTYPE * maskArray = NULL;
DTYPE scaleFactor = (DTYPE)1.0 / ((DTYPE)1.0 - dropProb); DTYPE scaleFactor = (DTYPE)1.0 / ((DTYPE)1.0 - dropProb);
if(leadingDim < 0 && leadingDim2 < 0){ if(leadingDim < 0 && leadingDim2 < 0){
XTensor mask; //XTensor mask;
InitTensor(&mask, &x); //InitTensor(&mask, &x);
//_SetDataRandP(&mask, 0, 1.0F, dropProb, scaleFactor);
//return Multiply(x, mask);
/* dropout with index */
int unitNum = floor(x.unitNum*dropProb);
maskArrayInt = new int[unitNum];
for (int i = 0; i < unitNum; i++)
maskArrayInt[i] = rand() % x.unitNum;
XTensor maskindex;
InitTensor1D(&maskindex, unitNum, X_INT, x.devID, x.mem);
maskindex.SetData(maskArrayInt, unitNum);
_SetDataRandP(&mask, 0, 1.0F, dropProb, scaleFactor); delete[] maskArrayInt;
return DropoutWithIndex(x, maskindex, scaleFactor);
return Multiply(x, mask);
} }
else if(leadingDim2 < 0){ else if(leadingDim2 < 0){
int n = leadingDim; int n = leadingDim;
...@@ -209,7 +228,6 @@ XTensor Dropout(const XTensor &x, DTYPE dropProb, int leadingDim, int leadingDim ...@@ -209,7 +228,6 @@ XTensor Dropout(const XTensor &x, DTYPE dropProb, int leadingDim, int leadingDim
return MultiplyBroadcast(x, mask); return MultiplyBroadcast(x, mask);
} }
} }
/* /*
......
/* NiuTrans.Tensor - an open-source tensor library
* Copyright (C) 2017, Natural Language Processing Lab, Northestern University.
* All rights reserved.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
/*
* $Created by: Jiang Yufan (email: jiangyufan2018@outlook.com) 2019-03-20
*/
#include "DropoutWithIndex.h"
#include "DropoutWithIndex.cuh"
#include "../core/CHeader.h"
#include "../XName.h"
#include "Identity.h"
namespace nts {
/*
This is a special implementation of "dropout" to reduce memory with maskIndex.
>> x - input tensor
>> maskIndex - mask index tensor
>> c - output tensor
*/
void _DropoutWithIndex(const XTensor * x, XTensor * maskIndex, XTensor * c)
{
CheckNTErrors(maskIndex->order == 1, "Illegal tensor order!");
#ifdef USE_CUDA
if (maskIndex->devID >= 0 || x->devID >= 0 || c->devID >= 0) {
_CudaDropoutWithIndex(x, maskIndex, c);
return;
}
#endif
// TODO!!
ShowNTErrors("TODO!");
}
/*
This is a special implementation of "dropout" to reduce memory with maskIndex.
>> x - input tensor
>> maskIndex - mask index tensor
>> c - output tensor
>> scale - scale factor
*/
XTensor DropoutWithIndex(const XTensor &x, XTensor &maskIndex, DTYPE scale)
{
XTensor c;
int order = x.order;
int * dimSize = new int[order];
for (int i = 0; i < order; i++) {
dimSize[i] = x.dimSize[i];
}
InitTensor1D(&c, x.unitNum, x.dataType, x.devID, x.mem);
_SetDataFixedFloat(&c, 1.0F);
_DropoutWithIndex(&x, &maskIndex, &c);
c.Reshape(order, dimSize);
_MultiplyMe(&c, &x);
_ScaleAndShiftMe(&c, scale);
/* tensor connections */
XLink::MakeLink(&x, &maskIndex, &c, MOVEMENT_DROPOUTWITHINDEX);
XLink::AddParamToHead(&c, scale);
return c;
}
}// namespace nts(NiuTrans.Tensor)
\ No newline at end of file
/* NiuTrans.Tensor - an open-source tensor library
* Copyright (C) 2017, Natural Language Processing Lab, Northestern University.
* All rights reserved.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
/*
* $Created by: Jiang Yufan (email: jiangyufan2018@outlook.com) 2019-03-20
*/
#include "DropoutWithIndex.cuh"
#include "../XDevice.h"
namespace nts { // namespace nts(NiuTrans.Tensor)
#ifdef USE_CUDA
__global__
/*
This is a special implementation of "dropout" to reduce memory with maskIndex.
>> tData - the data pointer of the target tensor
>> sIndex - mask index
>> size - the size of the sIndex
*/
void KernelDropoutWithIndex1D(DTYPE * tData, int * sIndex, int size)
{
/* block id */
int i = blockDim.x * blockIdx.x + threadIdx.x;
DTYPE * t = tData;
if (i < size) {
int id = sIndex[i];
t[id] = DTYPE(0.0F);
}
}
/*
This is a special implementation of "dropout" to reduce memory with maskIndex.
>> x - input tensor
>> maskIndex - mask index tensor
>> c - output tensor
*/
void _CudaDropoutWithIndex(const XTensor * x, XTensor * maskIndex, XTensor * c)
{
int devID = c->devID;
int blockNum = maskIndex->unitNum;
int cudaGrids[3];
int cudaBlocks[3];
int devIDBackup;
ProtectCudaDev(devID, devIDBackup);
GDevs.GetCudaThread(devID, blockNum, cudaGrids, cudaBlocks);
dim3 blocks(cudaGrids[0]);
dim3 threads(cudaBlocks[0]);
DTYPE * tData = (DTYPE*)c->data;
int * sIndex = NULL;
sIndex = (int *)maskIndex->data;
KernelDropoutWithIndex1D <<<blocks, threads >>>(tData, sIndex, blockNum);
BacktoCudaDev(devID, devIDBackup);
}
#endif // USE_CUDA
} // namespace nts(NiuTrans.Tensor)
\ No newline at end of file
/* NiuTrans.Tensor - an open-source tensor library
* Copyright (C) 2017, Natural Language Processing Lab, Northestern University.
* All rights reserved.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
/*
* $Created by: Jiang Yufan (email: jiangyufan2018@outlook.com) 2019-03-20
*/
#ifndef __DROPOUTWITHINDEX_CUH__
#define __DROPOUTWITHINDEX_CUH__
#include "../XTensor.h"
#include "DropoutWithIndex.h"
namespace nts { // namespace nts(NiuTrans.Tensor)
#ifdef USE_CUDA
/* dropout with index (cuda version) */
void _CudaDropoutWithIndex(const XTensor * x, XTensor * maskIndex, XTensor * c);
#endif // USE_CUDA
} // namespace nts(NiuTrans.Tensor)
#endif // __DROPOUTWITHINDEX_CUH__
/* NiuTrans.Tensor - an open-source tensor library
* Copyright (C) 2017, Natural Language Processing Lab, Northestern University.
* All rights reserved.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
/*
* $Created by: Jiang Yufan (email: jiangyufan2018@outlook.com) 2019-03-20
*/
#ifndef __DROPOUTWITHINDEX_H__
#define __DROPOUTWITHINDEX_H__
#include "../XTensor.h"
namespace nts {
void _DropoutWithIndex(const XTensor * x, XTensor * maskIndex, XTensor * c);
XTensor DropoutWithIndex(const XTensor &x, XTensor &mask, DTYPE scale);
} // namespace nts(NiuTrans.Tensor)
#endif // !__DROPOUTWITHINDEX_H__
Markdown 格式
0%
您添加了 0 到此讨论。请谨慎行事。
请先完成此评论的编辑!
注册 或者 后发表评论