Commit 270d0ff6 by 姜雨帆

implement Dropout with index to reduce mem

parent 591d6121
......@@ -42,9 +42,11 @@ using namespace transformer;
int main( int argc, const char ** argv )
{
//sample();
//_CrtSetBreakAlloc(896);
//BackwardTest();
//return 0;
//Test();
if(argc > 1 && !strcmp(argv[1], "-fnnlm"))
FNNLMMain(argc - 1, argv + 1);
......
......@@ -719,12 +719,18 @@ void XMathGrad::GradMultiply(XTensor * node, bool isEfficient)
XTensor * a = income.tails[0];
XTensor * b = income.tails[1];
XNoder::MakeGrad(a);
XNoder::MakeGrad(b);
CheckNTErrors(XTensor::IsSameShaped(a, b), "Wrong sized input tensors!");
if (!isEfficient || a->isGrad) {
XNoder::MakeGrad(a);
_Multiply(node->grad, b, a->grad, 1.0F);
_Multiply(node->grad, a, b->grad, 1.0F);
}
if (!isEfficient || b->isGrad) {
XNoder::MakeGrad(b);
_Multiply(node->grad, a, b->grad, 1.0F);;
}
node->visitMark = NODE_FINISHED;
}
......@@ -889,88 +895,8 @@ gradient for normalize
*/
void XMathGrad::GradNormalize(XTensor * node, bool isEfficient)
{
ShowNTErrors("This is really a bad piece of code!!!");
XLink &income = node->income;
CheckNTErrors(income.tailNum == 5, "Wrong input tensor number for NORMALIZE!");
XTensor * input = income.tails[0];
XTensor * mean = income.tails[1];
XTensor * var = income.tails[2];
XTensor * a = income.tails[3];
XTensor * b = income.tails[4];
XTensor * c = NewTensor(var);
XTensor * d = NewTensor(a);
XTensor * e = NewTensor(a);
XTensor * f = NewTensor(a);
XTensor * g = NewTensor(a);
XTensor * h = NewTensor(a);
XTensor * i = NewTensor(a);
XTensor * j = NewTensor(a);
XTensor * k = NewTensor(var);
XTensor * p = NewTensor(var);
XTensor * q = NewTensor(var);
XTensor * r = NewTensor(a);
XTensor * x = NewTensor(mean);
XTensor * y = NewTensor(mean);
XTensor * z = NewTensor(mean);
DTYPE epsilon = income.GetParam(1);
int dim = income.GetParamInt(0);
int n = a->GetDim(dim);
XNoder::MakeGrad(input);
XNoder::MakeGrad(mean);
XNoder::MakeGrad(var);
XNoder::MakeGrad(a);
XNoder::MakeGrad(b);
/* dEdinput */
_ScaleAndShift(var, c, 1.0F, epsilon);
_Unsqueeze(c, d, dim, n);
_Power(d, e, -0.5F);
_Multiply(a, e, f);
_Multiply(node->grad, f, input->grad, 1.0F);
/* dEdmean */
_ScaleAndShift(f, g, -1.0F);
_ReduceSum(g, x, dim);
_ReduceSum(node->grad, y, dim);
_Multiply(y, x, mean->grad, 1.0F);
/* dEdvar */
_Unsqueeze(mean, h, dim, n);
_Sub(input, h, i);
_Multiply(a, i, j);
_Power(var, k, -1.5F);
_ScaleAndShift(k, p, -0.5F);
_ReduceSum(j, z, dim);
_Multiply(z, p, q);
_Multiply(y, q, var->grad, 1.0F);
/* dEda */
_Multiply(i, e, r);
_Multiply(node->grad, r, a->grad, 1.0F);
/* dEdb */
_Sum(b->grad, node->grad, b->grad);
node->visitMark = NODE_FINISHED;
ShowNTErrors("TODO!");
delete c;
delete d;
delete e;
delete f;
delete g;
delete h;
delete i;
delete j;
delete k;
delete p;
delete q;
delete r;
delete x;
delete y;
delete z;
}
/*
......
......@@ -43,6 +43,8 @@ void XShapeGrad::MakeGrad(XTensor * node, bool isEfficent)
GradCopyIndexed(node, isEfficent);
else if(operID == MOVEMENT_GATHER)
GradGather(node, isEfficent);
else if (operID == MOVEMENT_DROPOUTWITHINDEX)
GradDropoutWithIndex(node, isEfficent);
else if(operID == SHAPE_MERGE)
GradMerge(node, isEfficent);
else if(operID == SHAPE_MERGE_LIST)
......@@ -115,7 +117,7 @@ dE/da = spreadforgather(b)
void XShapeGrad::GradGather(XTensor * node, bool isEfficent)
{
XLink &income = node->income;
CheckNTErrors(income.tailNum > 0, "Wrong input tensor number for CopyIndexed!");
CheckNTErrors(income.tailNum > 0, "Wrong input tensor number for Gather!");
XTensor * input = income.tails[0];
XTensor * index = income.tails[1];
......@@ -127,6 +129,43 @@ void XShapeGrad::GradGather(XTensor * node, bool isEfficent)
}
/*
gradient computation for DropoutWithIndex function
*/
void XShapeGrad::GradDropoutWithIndex(XTensor * node, bool isEfficent)
{
XLink &income = node->income;
CheckNTErrors(income.tailNum > 0, "Wrong input tensor number for DropoutWithIndex!");
XTensor * input = income.tails[0];
XTensor * index = income.tails[1];
DTYPE scale = income.GetParam(0);
XNoder::MakeGrad(input);
//_Identity(node->grad, input->grad);
_CopyValues(node->grad, input->grad);
int order = node->grad->order;
int * dimSize = new int[order];
for (int i = 0; i < order; i++) {
dimSize[i] = node->grad->dimSize[i];
}
int order1 = 1;
int * dimSize1 = new int[order1];
dimSize1[0] = input->grad->unitNum;
input->grad->Reshape(order1, dimSize1);
_DropoutWithIndex(node->grad, index, input->grad);
_ScaleAndShiftMe(input->grad, scale);
input->grad->Reshape(order, dimSize);
node->visitMark = NODE_FINISHED;
}
/*
gradient for merge
for
c = merge(a_0, a_1, ...)
......
......@@ -54,6 +54,10 @@ private:
static
void GradGather(XTensor * node, bool isEfficent);
/* gradient computation for copying indexed sub-tensors: b = gather(a, index) */
static
void GradDropoutWithIndex(XTensor * node, bool isEfficent);
/* gradient computation for merge: c = merge(a, b, ...) */
static
void GradMerge(XTensor * node, bool isEfficent);
......
......@@ -530,6 +530,88 @@ void XLink::Replace(const XTensor * oldOne, XTensor * newOne)
}
}
/*
copy a node with another, i.e., we add the links to the new node
>> src - the node to be copied
>> tgt - the new node
*/
void XLink::Copy(const XTensor * reference, XTensor * target)
{
if (reference == NULL || target == NULL)
return;
XLink &newIncome = target->income;
XLink &newOutgo = target->outgo;
XLink::ClearOutgoing(target);
XLink::ClearIncoming(target);
/* incoming nodes */
if (reference->income.typeID != 0) {
if (newIncome.tailNum < reference->income.tailNum) {
delete[] newIncome.tails;
newIncome.tails = new XTensor*[reference->income.tailNum];
}
newIncome.SetType(reference->income.typeID);
newIncome.head = target;
newIncome.tailNum = reference->income.tailNum;
memcpy(newIncome.tails, reference->income.tails, sizeof(XTensor*) * newIncome.tailNum);
int paraArraySize = reference->income.paramNum * reference->income.paramSize;
newIncome.params = new char[paraArraySize];
memcpy(newIncome.params, reference->income.params, paraArraySize);
newIncome.paramNum = reference->income.paramNum;
/* update the link to each child node */
for (int i = 0; i < newIncome.tailNum; i++) {
XTensor * child = newIncome.tails[i];
XLink &childOutgo = child->outgo;
bool hit = false;
for (int j = 0; j < childOutgo.tailNum; j++) {
if (childOutgo.tails[j] == reference) {
//childOutgo.tails[j] = target;
childOutgo.AddTail(target);
hit = true;
break;
}
}
if (childOutgo.tailNum > 0) {
CheckNTErrors(hit, "No proper node found in child.outgo edge!");
}
}
}
if (newOutgo.tailNum < reference->outgo.tailNum) {
delete[] newOutgo.tails;
newOutgo.tails = new XTensor*[reference->outgo.tailNum];
}
/* outgoing nodes */
newOutgo.head = target;
newOutgo.tailNum = reference->outgo.tailNum;
memcpy(newOutgo.tails, reference->outgo.tails, sizeof(XTensor*) * newOutgo.tailNum);
/* update the link to each parent node */
for (int i = 0; i < newOutgo.tailNum; i++) {
XTensor * parent = newOutgo.tails[i];
XLink &parentIncome = parent->income;
bool hit = false;
for (int j = 0; j < parentIncome.tailNum; j++) {
if (parentIncome.tails[j] == reference) {
//parentIncome.tails[j] = target;
parentIncome.AddTail(target);
hit = true;
}
}
if (parentIncome.tailNum > 0) {
CheckNTErrors(hit, "No proper node found in parent.income edge!");
}
}
}
/*
copy incoming edges of a given node
>> reference - the node we copy from
......
......@@ -33,7 +33,7 @@ namespace nts{ // namespace nts(NiuTrans.Tensor)
/* cross reference */
struct XTensor;
#define MAX_OP_NAME_LENGTH 16
#define MAX_OP_NAME_LENGTH 64
#define PARAM_UNTI_SIZE 64
/*
......@@ -138,7 +138,7 @@ struct XLink
static
void MakeLink(const XTensor * t1, const XTensor * t2, XTensor * h, int id);
/* create a hyper edge with three input tensors and a output tensor */
/* create a hyper edge with two input tensors and a output tensor */
static
void MakeLink(const XTensor * t1, const XTensor * t2, const XTensor * t3, XTensor * h, int id);
......@@ -174,6 +174,10 @@ struct XLink
static
void Replace(const XTensor * oldOne, XTensor * newOne);
/* copy a node with another, i.e., we add the links to the new node */
static
void Copy(const XTensor * reference, XTensor * target);
/* copy links of a given node */
static
void CopyIncoming(const XTensor * reference, XTensor * target);
......
......@@ -111,6 +111,8 @@ const char * GetOPName(int type)
return "M_COPYVALUES";
else if (type == MOVEMENT_GATHER)
return "M_GATHER";
else if (type == MOVEMENT_DROPOUTWITHINDEX)
return "M_DROPOUTWITHINDEX";
else if (type == SHAPE_CONCATENATE)
return "S_CONCATENATE";
else if (type == SHAPE_MERGE)
......
......@@ -81,8 +81,9 @@ namespace nts { // namespace nts(NiuTrans.Tensor)
#define MOVEMENT_COPYINDEXED MOVEMENT + 1
#define MOVEMENT_COPYVALUES MOVEMENT_COPYINDEXED + 1
#define MOVEMENT_GATHER MOVEMENT_COPYVALUES + 1
#define MOVEMENT_DROPOUTWITHINDEX MOVEMENT_GATHER + 1
#define SHAPE MOVEMENT_GATHER + 1
#define SHAPE MOVEMENT_DROPOUTWITHINDEX + 1
#define SHAPE_CONCATENATE SHAPE + 1
#define SHAPE_MERGE SHAPE_CONCATENATE + 1
#define SHAPE_MERGE_LIST SHAPE_MERGE + 1
......
......@@ -211,7 +211,6 @@ XTensor::~XTensor()
XLink::Replace(this, newTensor);
}
XLink::ClearOutgoing(this);
XLink::ClearIncoming(this);
......@@ -364,6 +363,7 @@ XTensor& XTensor::operator= (const XTensor& tensor)
/* create tensor links for the new tensor */
XLink::Replace(&tensor, this);
//XLink::Copy(&tensor, this);
}
return *this;
......
......@@ -95,4 +95,5 @@
#include "utilities/XMatrixSegment.h"
#include "utilities/FlushToMem.h"
#include "../function/DropoutWithIndex.h"
#endif // __CHEADER_H__
......@@ -27,6 +27,7 @@
#include "../core/arithmetic/MultiplyDim.h"
#include "../core/math/ScaleAndShift.h"
#include "../core/getandset/SetData.h"
#include "DropoutWithIndex.h"
namespace nts{ // namespace nts(NiuTrans.Tensor
......@@ -147,16 +148,34 @@ XTensor Dropout(const XTensor &x, DTYPE dropProb, int leadingDim, int leadingDim
CheckNTErrors(dropProb >= 0.0 && dropProb <= 1.0, "The probability must be 0-1!");
XTensor mask;
int * maskArrayInt = NULL;
DTYPE * maskArray = NULL;
DTYPE scaleFactor = (DTYPE)1.0 / ((DTYPE)1.0 - dropProb);
if(leadingDim < 0 && leadingDim2 < 0){
XTensor mask;
InitTensor(&mask, &x);
//XTensor mask;
//InitTensor(&mask, &x);
_SetDataRandP(&mask, 0, 1.0F, dropProb, scaleFactor);
//_SetDataRandP(&mask, 0, 1.0F, dropProb, scaleFactor);
//return Multiply(x, mask);
/* dropout with index */
int unitNum = floor(x.unitNum*dropProb);
maskArrayInt = new int[unitNum];
for (int i = 0; i < unitNum; i++)
maskArrayInt[i] = rand() % x.unitNum;
XTensor maskindex;
InitTensor1D(&maskindex, unitNum, X_INT, x.devID, x.mem);
maskindex.SetData(maskArrayInt, unitNum);
delete[] maskArrayInt;
return DropoutWithIndex(x, maskindex, scaleFactor);
return Multiply(x, mask);
}
else if(leadingDim2 < 0){
int n = leadingDim;
......@@ -209,7 +228,6 @@ XTensor Dropout(const XTensor &x, DTYPE dropProb, int leadingDim, int leadingDim
return MultiplyBroadcast(x, mask);
}
}
/*
......
/* NiuTrans.Tensor - an open-source tensor library
* Copyright (C) 2017, Natural Language Processing Lab, Northestern University.
* All rights reserved.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
/*
* $Created by: Jiang Yufan (email: jiangyufan2018@outlook.com) 2019-03-20
*/
#include "DropoutWithIndex.h"
#include "DropoutWithIndex.cuh"
#include "../core/CHeader.h"
#include "../XName.h"
#include "Identity.h"
namespace nts {
/*
This is a special implementation of "dropout" to reduce memory with maskIndex.
>> x - input tensor
>> maskIndex - mask index tensor
>> c - output tensor
*/
void _DropoutWithIndex(const XTensor * x, XTensor * maskIndex, XTensor * c)
{
CheckNTErrors(maskIndex->order == 1, "Illegal tensor order!");
#ifdef USE_CUDA
if (maskIndex->devID >= 0 || x->devID >= 0 || c->devID >= 0) {
_CudaDropoutWithIndex(x, maskIndex, c);
return;
}
#endif
// TODO!!
ShowNTErrors("TODO!");
}
/*
This is a special implementation of "dropout" to reduce memory with maskIndex.
>> x - input tensor
>> maskIndex - mask index tensor
>> c - output tensor
>> scale - scale factor
*/
XTensor DropoutWithIndex(const XTensor &x, XTensor &maskIndex, DTYPE scale)
{
XTensor c;
int order = x.order;
int * dimSize = new int[order];
for (int i = 0; i < order; i++) {
dimSize[i] = x.dimSize[i];
}
InitTensor1D(&c, x.unitNum, x.dataType, x.devID, x.mem);
_SetDataFixedFloat(&c, 1.0F);
_DropoutWithIndex(&x, &maskIndex, &c);
c.Reshape(order, dimSize);
_MultiplyMe(&c, &x);
_ScaleAndShiftMe(&c, scale);
/* tensor connections */
XLink::MakeLink(&x, &maskIndex, &c, MOVEMENT_DROPOUTWITHINDEX);
XLink::AddParamToHead(&c, scale);
return c;
}
}// namespace nts(NiuTrans.Tensor)
\ No newline at end of file
/* NiuTrans.Tensor - an open-source tensor library
* Copyright (C) 2017, Natural Language Processing Lab, Northestern University.
* All rights reserved.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
/*
* $Created by: Jiang Yufan (email: jiangyufan2018@outlook.com) 2019-03-20
*/
#include "DropoutWithIndex.cuh"
#include "../XDevice.h"
namespace nts { // namespace nts(NiuTrans.Tensor)
#ifdef USE_CUDA
__global__
/*
This is a special implementation of "dropout" to reduce memory with maskIndex.
>> tData - the data pointer of the target tensor
>> sIndex - mask index
>> size - the size of the sIndex
*/
void KernelDropoutWithIndex1D(DTYPE * tData, int * sIndex, int size)
{
/* block id */
int i = blockDim.x * blockIdx.x + threadIdx.x;
DTYPE * t = tData;
if (i < size) {
int id = sIndex[i];
t[id] = DTYPE(0.0F);
}
}
/*
This is a special implementation of "dropout" to reduce memory with maskIndex.
>> x - input tensor
>> maskIndex - mask index tensor
>> c - output tensor
*/
void _CudaDropoutWithIndex(const XTensor * x, XTensor * maskIndex, XTensor * c)
{
int devID = c->devID;
int blockNum = maskIndex->unitNum;
int cudaGrids[3];
int cudaBlocks[3];
int devIDBackup;
ProtectCudaDev(devID, devIDBackup);
GDevs.GetCudaThread(devID, blockNum, cudaGrids, cudaBlocks);
dim3 blocks(cudaGrids[0]);
dim3 threads(cudaBlocks[0]);
DTYPE * tData = (DTYPE*)c->data;
int * sIndex = NULL;
sIndex = (int *)maskIndex->data;
KernelDropoutWithIndex1D <<<blocks, threads >>>(tData, sIndex, blockNum);
BacktoCudaDev(devID, devIDBackup);
}
#endif // USE_CUDA
} // namespace nts(NiuTrans.Tensor)
\ No newline at end of file
/* NiuTrans.Tensor - an open-source tensor library
* Copyright (C) 2017, Natural Language Processing Lab, Northestern University.
* All rights reserved.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
/*
* $Created by: Jiang Yufan (email: jiangyufan2018@outlook.com) 2019-03-20
*/
#ifndef __DROPOUTWITHINDEX_CUH__
#define __DROPOUTWITHINDEX_CUH__
#include "../XTensor.h"
#include "DropoutWithIndex.h"
namespace nts { // namespace nts(NiuTrans.Tensor)
#ifdef USE_CUDA
/* dropout with index (cuda version) */
void _CudaDropoutWithIndex(const XTensor * x, XTensor * maskIndex, XTensor * c);
#endif // USE_CUDA
} // namespace nts(NiuTrans.Tensor)
#endif // __DROPOUTWITHINDEX_CUH__
/* NiuTrans.Tensor - an open-source tensor library
* Copyright (C) 2017, Natural Language Processing Lab, Northestern University.
* All rights reserved.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
/*
* $Created by: Jiang Yufan (email: jiangyufan2018@outlook.com) 2019-03-20
*/
#ifndef __DROPOUTWITHINDEX_H__
#define __DROPOUTWITHINDEX_H__
#include "../XTensor.h"
namespace nts {
void _DropoutWithIndex(const XTensor * x, XTensor * maskIndex, XTensor * c);
XTensor DropoutWithIndex(const XTensor &x, XTensor &mask, DTYPE scale);
} // namespace nts(NiuTrans.Tensor)
#endif // !__DROPOUTWITHINDEX_H__
Markdown 格式
0%
您添加了 0 到此讨论。请谨慎行事。
请先完成此评论的编辑!
注册 或者 后发表评论