Commit ceb5b101 by xuchen

1. add gather function 2. add cross entropy forward computation and backward…

1. add gather function 2. add cross entropy forward computation and backward computation 3. code optimization 4. merge with xiaotong-working branch
parent 102db468
...@@ -29,10 +29,8 @@ ...@@ -29,10 +29,8 @@
namespace nts{ namespace nts{
/* compute dE/dx of a node */ /* compute dE/dx of a node */
void XFuncGrad::MakeGrad(XTensor * node) void XFuncGrad::MakeGrad(XTensor * node, bool isEfficient)
{ {
XLink &income = node->income; XLink &income = node->income;
int operID = income.typeID; int operID = income.typeID;
......
...@@ -35,7 +35,7 @@ class XFuncGrad ...@@ -35,7 +35,7 @@ class XFuncGrad
public: public:
/* compute dE/dx of a node */ /* compute dE/dx of a node */
static static
void MakeGrad(XTensor * node); void MakeGrad(XTensor * node, bool isEfficient);
/* indicates whether the node is for an activation function */ /* indicates whether the node is for an activation function */
static static
......
...@@ -28,69 +28,73 @@ ...@@ -28,69 +28,73 @@
namespace nts{ namespace nts{
/* compute dE/dx of a node */ /* compute dE/dx of a node */
void XMathGrad::MakeGrad(XTensor * node) void XMathGrad::MakeGrad(XTensor * node, bool isEfficient)
{ {
if(!isEfficient){
CheckNTErrors(node->grad != NULL, "No gradient found!"); CheckNTErrors(node->grad != NULL, "No gradient found!");
}
else{
CheckNTErrors(!node->isGrad || node->grad != NULL, "No gradient found!");
}
XLink &income = node->income; XLink &income = node->income;
int operID = income.typeID; int operID = income.typeID;
if(operID == MATH_ABSOLUTE) if(operID == MATH_ABSOLUTE)
GradAbsolute(node); GradAbsolute(node, isEfficient);
else if(operID == MATH_COS) else if(operID == MATH_COS)
GradCos(node); GradCos(node, isEfficient);
else if(operID == MATH_EXP) else if(operID == MATH_EXP)
GradExp(node); GradExp(node, isEfficient);
else if(operID == MATH_LOG) else if(operID == MATH_LOG)
GradLog(node); GradLog(node, isEfficient);
else if(operID == MATH_ROUND) else if(operID == MATH_ROUND)
GradRound(node); GradRound(node, isEfficient);
else if(operID == MATH_SIGN) else if(operID == MATH_SIGN)
GradSign(node); GradSign(node, isEfficient);
else if(operID == MATH_SIN) else if(operID == MATH_SIN)
GradSin(node); GradSin(node, isEfficient);
else if(operID == MATH_TAN) else if(operID == MATH_TAN)
GradTan(node); GradTan(node, isEfficient);
else if(operID == MATH_CLIP) else if(operID == MATH_CLIP)
GradClip(node); GradClip(node, isEfficient);
else if(operID == MATH_DIV) else if(operID == MATH_DIV)
GradDiv(node); GradDiv(node, isEfficient);
else if(operID == MATH_DIVDIM) else if(operID == MATH_DIVDIM)
GradDivDim(node); GradDivDim(node, isEfficient);
else if(operID == MATH_MATRIXMUL) else if(operID == MATH_MATRIXMUL)
GradMatrixMul(node); GradMatrixMul(node, isEfficient);
else if(operID == MATH_MATRIXMULBATCHED) else if(operID == MATH_MATRIXMULBATCHED)
GradMatrixMulBatched(node); GradMatrixMulBatched(node, isEfficient);
else if(operID == MATH_MULTIPLY) else if(operID == MATH_MULTIPLY)
GradMultiply(node); GradMultiply(node, isEfficient);
else if(operID == MATH_MULTIPLYDIM) else if(operID == MATH_MULTIPLYDIM)
GradMultiplyDim(node); GradMultiplyDim(node, isEfficient);
else if(operID == MATH_NEGATE) else if(operID == MATH_NEGATE)
GradNegate(node); GradNegate(node, isEfficient);
else if(operID == MATH_NORMALIZE) else if(operID == MATH_NORMALIZE)
GradNormalize(node); GradNormalize(node, isEfficient);
else if(operID == MATH_POWER) else if(operID == MATH_POWER)
GradPower(node); GradPower(node, isEfficient);
else if(operID == MATH_SCALEANDSHIFT) else if(operID == MATH_SCALEANDSHIFT)
GradScaleAndShift(node); GradScaleAndShift(node, isEfficient);
else if(operID == MATH_SUB) else if(operID == MATH_SUB)
GradSub(node); GradSub(node, isEfficient);
else if(operID == MATH_SUBDIM) else if(operID == MATH_SUBDIM)
GradSubDim(node); GradSubDim(node, isEfficient);
else if(operID == MATH_SUM) else if(operID == MATH_SUM)
GradSum(node); GradSum(node, isEfficient);
else if(operID == MATH_SUMDIM) else if(operID == MATH_SUMDIM)
GradSumDim(node); GradSumDim(node, isEfficient);
else if(operID == REDUCE_REDUCEMEAN) else if(operID == REDUCE_REDUCEMEAN)
GradReduceMean(node); GradReduceMean(node, isEfficient);
else if(operID == REDUCE_REDUCESUM) else if(operID == REDUCE_REDUCESUM)
GradReduceSum(node); GradReduceSum(node, isEfficient);
else if(operID == REDUCE_REDUCESUMSQUARED) else if(operID == REDUCE_REDUCESUMSQUARED)
GradReduceSumSquared(node); GradReduceSumSquared(node, isEfficient);
else if(operID == REDUCE_REDUCEVARIANCE) else if(operID == REDUCE_REDUCEVARIANCE)
GradReduceVariance(node); GradReduceVariance(node, isEfficient);
else{ else{
ShowNTErrors("TODO!"); ShowNTErrors("TODO!");
} }
...@@ -111,8 +115,10 @@ we have ...@@ -111,8 +115,10 @@ we have
dE/da = dE/dc a >= 0 dE/da = dE/dc a >= 0
-dE/dc a < 0 -dE/dc a < 0
>> node - the node (c) for backward computation >> node - the node (c) for backward computation
>> isEfficient - indicates whether the computation is in
an efficient manner
*/ */
void XMathGrad::GradAbsolute(XTensor * node) void XMathGrad::GradAbsolute(XTensor * node, bool isEfficient)
{ {
XLink &income = node->income; XLink &income = node->income;
CheckNTErrors(income.tailNum == 1, "Wrong input tensor number for ABSOLUTE!"); CheckNTErrors(income.tailNum == 1, "Wrong input tensor number for ABSOLUTE!");
...@@ -137,8 +143,10 @@ c = cos(a) ...@@ -137,8 +143,10 @@ c = cos(a)
we have we have
dE/da = dE/dc * -sin(a) dE/da = dE/dc * -sin(a)
>> node - the node (c) for backward computation >> node - the node (c) for backward computation
>> isEfficient - indicates whether the computation is in
an efficient manner
*/ */
void XMathGrad::GradCos(XTensor * node) void XMathGrad::GradCos(XTensor * node, bool isEfficient)
{ {
XLink &income = node->income; XLink &income = node->income;
CheckNTErrors(income.tailNum == 1, "Wrong input tensor number for COS!"); CheckNTErrors(income.tailNum == 1, "Wrong input tensor number for COS!");
...@@ -164,8 +172,10 @@ c = exp(a) ...@@ -164,8 +172,10 @@ c = exp(a)
we have we have
dE/da = dE/dc * exp(a) dE/da = dE/dc * exp(a)
>> node - the node (c) for backward computation >> node - the node (c) for backward computation
>> isEfficient - indicates whether the computation is in
an efficient manner
*/ */
void XMathGrad::GradExp(XTensor * node) void XMathGrad::GradExp(XTensor * node, bool isEfficient)
{ {
XLink &income = node->income; XLink &income = node->income;
CheckNTErrors(income.tailNum == 1, "Wrong input tensor number for EXP!"); CheckNTErrors(income.tailNum == 1, "Wrong input tensor number for EXP!");
...@@ -190,8 +200,10 @@ c = log(a) ...@@ -190,8 +200,10 @@ c = log(a)
we have we have
dE/da = dE/dc * 1/a dE/da = dE/dc * 1/a
>> node - the node (c) for backward computation >> node - the node (c) for backward computation
>> isEfficient - indicates whether the computation is in
an efficient manner
*/ */
void XMathGrad::GradLog(XTensor * node) void XMathGrad::GradLog(XTensor * node, bool isEfficient)
{ {
XLink &income = node->income; XLink &income = node->income;
CheckNTErrors(income.tailNum == 1, "Wrong input tensor number for LOG!"); CheckNTErrors(income.tailNum == 1, "Wrong input tensor number for LOG!");
...@@ -212,8 +224,10 @@ c = round(a) ...@@ -212,8 +224,10 @@ c = round(a)
we have we have
dE/da = 0 dE/da = 0
>> node - the node (c) for backward computation >> node - the node (c) for backward computation
>> isEfficient - indicates whether the computation is in
an efficient manner
*/ */
void XMathGrad::GradRound(XTensor * node) void XMathGrad::GradRound(XTensor * node, bool isEfficient)
{ {
XLink &income = node->income; XLink &income = node->income;
CheckNTErrors(income.tailNum == 1, "Wrong input tensor number for ROUND!"); CheckNTErrors(income.tailNum == 1, "Wrong input tensor number for ROUND!");
...@@ -231,8 +245,10 @@ c = sign(a) ...@@ -231,8 +245,10 @@ c = sign(a)
we have we have
dE/da = 0 dE/da = 0
>> node - the node (c) for backward computation >> node - the node (c) for backward computation
>> isEfficient - indicates whether the computation is in
an efficient manner
*/ */
void XMathGrad::GradSign(XTensor * node) void XMathGrad::GradSign(XTensor * node, bool isEfficient)
{ {
XLink &income = node->income; XLink &income = node->income;
CheckNTErrors(income.tailNum == 1, "Wrong input tensor number for SIGN!"); CheckNTErrors(income.tailNum == 1, "Wrong input tensor number for SIGN!");
...@@ -250,8 +266,10 @@ c = sin(a) ...@@ -250,8 +266,10 @@ c = sin(a)
we have we have
dE/da = dE/dc * cos(a) dE/da = dE/dc * cos(a)
>> node - the node (c) for backward computation >> node - the node (c) for backward computation
>> isEfficient - indicates whether the computation is in
an efficient manner
*/ */
void XMathGrad::GradSin(XTensor * node) void XMathGrad::GradSin(XTensor * node, bool isEfficient)
{ {
XLink &income = node->income; XLink &income = node->income;
CheckNTErrors(income.tailNum == 1, "Wrong input tensor number for SIN!"); CheckNTErrors(income.tailNum == 1, "Wrong input tensor number for SIN!");
...@@ -276,8 +294,10 @@ c = tan(a) ...@@ -276,8 +294,10 @@ c = tan(a)
we have we have
dE/da = dE/dc * 1/(cos(a))^2 dE/da = dE/dc * 1/(cos(a))^2
>> node - the node (c) for backward computation >> node - the node (c) for backward computation
>> isEfficient - indicates whether the computation is in
an efficient manner
*/ */
void XMathGrad::GradTan(XTensor * node) void XMathGrad::GradTan(XTensor * node, bool isEfficient)
{ {
XLink &income = node->income; XLink &income = node->income;
CheckNTErrors(income.tailNum == 1, "Wrong input tensor number for TAN!"); CheckNTErrors(income.tailNum == 1, "Wrong input tensor number for TAN!");
...@@ -302,8 +322,10 @@ we have ...@@ -302,8 +322,10 @@ we have
dE/da = 1 lower < a < upper dE/da = 1 lower < a < upper
dE/da = 0 otherwise dE/da = 0 otherwise
>> node - the node (c) for backward computation >> node - the node (c) for backward computation
>> isEfficient - indicates whether the computation is in
an efficient manner
*/ */
void XMathGrad::GradClip(XTensor * node) void XMathGrad::GradClip(XTensor * node, bool isEfficient)
{ {
XLink &income = node->income; XLink &income = node->income;
CheckNTErrors(income.tailNum == 1, "Wrong input tensor number for CLIP!"); CheckNTErrors(income.tailNum == 1, "Wrong input tensor number for CLIP!");
...@@ -332,8 +354,10 @@ we have ...@@ -332,8 +354,10 @@ we have
dE/da = dE/dc / b dE/da = dE/dc / b
dE/db = dE/dc * a / -b^2 dE/db = dE/dc * a / -b^2
>> node - the node (c) for backward computation >> node - the node (c) for backward computation
>> isEfficient - indicates whether the computation is in
an efficient manner
*/ */
void XMathGrad::GradDiv(XTensor * node) void XMathGrad::GradDiv(XTensor * node, bool isEfficient)
{ {
XLink &income = node->income; XLink &income = node->income;
CheckNTErrors(income.tailNum == 2, "Wrong input tensor number for DIVIDE!"); CheckNTErrors(income.tailNum == 2, "Wrong input tensor number for DIVIDE!");
...@@ -365,8 +389,12 @@ c = a / b ...@@ -365,8 +389,12 @@ c = a / b
where the size of b is equal to dimension n of a, i.e., |b| = a.dimSize[n] where the size of b is equal to dimension n of a, i.e., |b| = a.dimSize[n]
dE/da = dE/dc * (1/b) dE/da = dE/dc * (1/b)
dE/db = (dE/dc * (-a/b^2)).reduce(0,...,n-1,n+1,...) dE/db = (dE/dc * (-a/b^2)).reduce(0,...,n-1,n+1,...)
>> node - the node (c) for backward computation
>> isEfficient - indicates whether the computation is in
an efficient manner
*/ */
void XMathGrad::GradDivDim(XTensor * node) void XMathGrad::GradDivDim(XTensor * node, bool isEfficient)
{ {
XLink &income = node->income; XLink &income = node->income;
CheckNTErrors(income.tailNum == 2, "Wrong input tensor number for DIVDIM!"); CheckNTErrors(income.tailNum == 2, "Wrong input tensor number for DIVDIM!");
...@@ -391,7 +419,7 @@ void XMathGrad::GradDivDim(XTensor * node) ...@@ -391,7 +419,7 @@ void XMathGrad::GradDivDim(XTensor * node)
XTensor * interGradTMP = NewTensorBuf(node->grad, node->devID, node->mem); XTensor * interGradTMP = NewTensorBuf(node->grad, node->devID, node->mem);
_Negate(a, aTMP1); _Negate(a, aTMP1);
_Power(b, bTMP, -2); _Power(b, bTMP, -2.0F);
_MultiplyDim(aTMP1, bTMP, aTMP2, n); _MultiplyDim(aTMP1, bTMP, aTMP2, n);
_Multiply(node->grad, aTMP2, interGradTMP); _Multiply(node->grad, aTMP2, interGradTMP);
...@@ -405,17 +433,17 @@ void XMathGrad::GradDivDim(XTensor * node) ...@@ -405,17 +433,17 @@ void XMathGrad::GradDivDim(XTensor * node)
size of b. Then we can reduce the matrix into a row vector. */ size of b. Then we can reduce the matrix into a row vector. */
interGradTMP->Reshape(2, reshapedSize); interGradTMP->Reshape(2, reshapedSize);
if(b->outgo.tailNum > 1){ //if(b->outgo.tailNum > 1){
XTensor * bGradTMP = NewTensorBuf(b->grad, b->devID, b->mem); XTensor * bGradTMP = NewTensorBuf(b->grad, b->devID, b->mem);
_ReduceSum(interGradTMP, bGradTMP, 0); _ReduceSum(interGradTMP, bGradTMP, 0);
_Sum(b->grad, bGradTMP, b->grad); _Sum(b->grad, bGradTMP, b->grad);
DelTensorBuf(bGradTMP); DelTensorBuf(bGradTMP);
} /*}
else{ else{
_ReduceSum(interGradTMP, b->grad, 0); _ReduceSum(interGradTMP, b->grad, 0);
} }*/
} }
else{ else{
int reshapedSize[MAX_TENSOR_DIM_NUM]; int reshapedSize[MAX_TENSOR_DIM_NUM];
...@@ -437,17 +465,17 @@ void XMathGrad::GradDivDim(XTensor * node) ...@@ -437,17 +465,17 @@ void XMathGrad::GradDivDim(XTensor * node)
XTensor * interGrad = NewTensorBuf(2, reshapedSize, b->dataType, b->denseRatio, b->devID, b->mem); XTensor * interGrad = NewTensorBuf(2, reshapedSize, b->dataType, b->denseRatio, b->devID, b->mem);
_ReduceSum(interGradTMP, interGrad, 2); _ReduceSum(interGradTMP, interGrad, 2);
if(b->outgo.tailNum > 1){ //if(b->outgo.tailNum > 1){
XTensor * bGradTMP2 = NewTensorBuf(b->grad, b->devID, b->mem); XTensor * bGradTMP2 = NewTensorBuf(b->grad, b->devID, b->mem);
_ReduceSum(interGrad, bGradTMP2, 0); _ReduceSum(interGrad, bGradTMP2, 0);
_Sum(b->grad, bGradTMP2, b->grad); _Sum(b->grad, bGradTMP2, b->grad);
DelTensorBuf(bGradTMP2); DelTensorBuf(bGradTMP2);
} /*}
else{ else{
_ReduceSum(interGrad, b->grad, 0); _ReduceSum(interGrad, b->grad, 0);
} }*/
DelTensorBuf(interGrad); DelTensorBuf(interGrad);
} }
...@@ -466,8 +494,10 @@ we have ...@@ -466,8 +494,10 @@ we have
dE/da = dE/dc * b^T * \alpha dE/da = dE/dc * b^T * \alpha
dE/db = a^T * dE/dc * \alpha dE/db = a^T * dE/dc * \alpha
>> node - the node (c) for backward computation >> node - the node (c) for backward computation
>> isEfficient - indicates whether the computation is in
an efficient manner
*/ */
void XMathGrad::GradMatrixMul(XTensor * node) void XMathGrad::GradMatrixMul(XTensor * node, bool isEfficient)
{ {
XLink &income = node->income; XLink &income = node->income;
CheckNTErrors(income.tailNum == 2, "Wrong input tensor number for MULTIPLY!"); CheckNTErrors(income.tailNum == 2, "Wrong input tensor number for MULTIPLY!");
...@@ -479,7 +509,9 @@ void XMathGrad::GradMatrixMul(XTensor * node) ...@@ -479,7 +509,9 @@ void XMathGrad::GradMatrixMul(XTensor * node)
MATRIX_TRANS_TYPE transB = income.GetParamTrans(1); MATRIX_TRANS_TYPE transB = income.GetParamTrans(1);
DTYPE alpha = income.GetParam(2); DTYPE alpha = income.GetParam(2);
if(!isEfficient || a->isGrad)
XNoder::MakeGrad(a); XNoder::MakeGrad(a);
if(!isEfficient || b->isGrad)
XNoder::MakeGrad(b); XNoder::MakeGrad(b);
XTensor * c = node; XTensor * c = node;
...@@ -487,9 +519,9 @@ void XMathGrad::GradMatrixMul(XTensor * node) ...@@ -487,9 +519,9 @@ void XMathGrad::GradMatrixMul(XTensor * node)
XTensor * deda = a->grad; XTensor * deda = a->grad;
XTensor * dedb = b->grad; XTensor * dedb = b->grad;
if(deda->order == 2 && dedb->order == 2) if(a->order == 2 && b->order == 2)
GradMatrixMul(a, deda, transA, b, dedb, transB, dedc, alpha); GradMatrixMul(a, deda, transA, b, dedb, transB, dedc, alpha, isEfficient);
else if(transA == X_NOTRANS && deda->order > 2 && dedb->order == 2){ else if(transA == X_NOTRANS && a->order > 2 && b->order == 2){
int orderBackupA = a->order; int orderBackupA = a->order;
int orderBackupC = c->order; int orderBackupC = c->order;
int dimsBackupA[MAX_TENSOR_DIM_NUM]; int dimsBackupA[MAX_TENSOR_DIM_NUM];
...@@ -499,13 +531,15 @@ void XMathGrad::GradMatrixMul(XTensor * node) ...@@ -499,13 +531,15 @@ void XMathGrad::GradMatrixMul(XTensor * node)
a->Reshape(a->unitNum/a->GetDim(-1), a->GetDim(-1)); a->Reshape(a->unitNum/a->GetDim(-1), a->GetDim(-1));
c->Reshape(c->unitNum/c->GetDim(-1), c->GetDim(-1)); c->Reshape(c->unitNum/c->GetDim(-1), c->GetDim(-1));
if(!isEfficient || a->isGrad)
deda->Reshape(deda->unitNum/deda->GetDim(-1), deda->GetDim(-1)); deda->Reshape(deda->unitNum/deda->GetDim(-1), deda->GetDim(-1));
dedc->Reshape(dedc->unitNum/dedc->GetDim(-1), dedc->GetDim(-1)); dedc->Reshape(dedc->unitNum/dedc->GetDim(-1), dedc->GetDim(-1));
GradMatrixMul(a, deda, transA, b, dedb, transB, dedc, alpha); GradMatrixMul(a, deda, transA, b, dedb, transB, dedc, alpha, isEfficient);
a->Reshape(orderBackupA, dimsBackupA); a->Reshape(orderBackupA, dimsBackupA);
c->Reshape(orderBackupC, dimsBackupC); c->Reshape(orderBackupC, dimsBackupC);
if(!isEfficient || a->isGrad)
deda->Reshape(orderBackupA, dimsBackupA); deda->Reshape(orderBackupA, dimsBackupA);
dedc->Reshape(orderBackupC, dimsBackupC); dedc->Reshape(orderBackupC, dimsBackupC);
} }
...@@ -524,18 +558,22 @@ gradient for matrix multiply: c = matmul(a, b) * \alpha ...@@ -524,18 +558,22 @@ gradient for matrix multiply: c = matmul(a, b) * \alpha
>> dedb - dE/db >> dedb - dE/db
>> dedc - dE/dc >> dedc - dE/dc
>> alpha - the scalar >> alpha - the scalar
>> isEfficient - indicates whether the computation is in
an efficient manner
*/ */
void XMathGrad::GradMatrixMul(XTensor * a, XTensor * deda, MATRIX_TRANS_TYPE transA, void XMathGrad::GradMatrixMul(XTensor * a, XTensor * deda, MATRIX_TRANS_TYPE transA,
XTensor * b, XTensor * dedb, MATRIX_TRANS_TYPE transB, XTensor * b, XTensor * dedb, MATRIX_TRANS_TYPE transB,
XTensor * dedc, DTYPE alpha) XTensor * dedc, DTYPE alpha, bool isEfficient)
{ {
/* c = a * b * \alpha */ /* c = a * b * \alpha */
if(transA == X_NOTRANS && transB == X_NOTRANS){ if(transA == X_NOTRANS && transB == X_NOTRANS){
/* dE/da = dE/dc * b^T * \alpha */ /* dE/da = dE/dc * b^T * \alpha */
if(!isEfficient || a->isGrad)
_MatrixMul(dedc, X_NOTRANS, b, X_TRANS, deda, alpha, 1.0F); _MatrixMul(dedc, X_NOTRANS, b, X_TRANS, deda, alpha, 1.0F);
/* dE/db = a^T * dE/dc * \alpha */ /* dE/db = a^T * dE/dc * \alpha */
if(!isEfficient || b->isGrad)
_MatrixMul(a, X_TRANS, dedc, X_NOTRANS, dedb, alpha, 1.0F); _MatrixMul(a, X_TRANS, dedc, X_NOTRANS, dedb, alpha, 1.0F);
} }
...@@ -544,9 +582,11 @@ void XMathGrad::GradMatrixMul(XTensor * a, XTensor * deda, MATRIX_TRANS_TYPE tra ...@@ -544,9 +582,11 @@ void XMathGrad::GradMatrixMul(XTensor * a, XTensor * deda, MATRIX_TRANS_TYPE tra
/* dE/da = (dE/dc * b^T)^T * \alpha /* dE/da = (dE/dc * b^T)^T * \alpha
= b * dE/dc^T * \alpha */ = b * dE/dc^T * \alpha */
if(!isEfficient || a->isGrad)
_MatrixMul(b, X_NOTRANS, dedc, X_TRANS, deda, alpha, 1.0F); _MatrixMul(b, X_NOTRANS, dedc, X_TRANS, deda, alpha, 1.0F);
/* dE/db = a * dE/dc * \alpha */ /* dE/db = a * dE/dc * \alpha */
if(!isEfficient || b->isGrad)
_MatrixMul(a, X_NOTRANS, dedc, X_NOTRANS, dedb, alpha, 1.0F); _MatrixMul(a, X_NOTRANS, dedc, X_NOTRANS, dedb, alpha, 1.0F);
} }
...@@ -554,10 +594,12 @@ void XMathGrad::GradMatrixMul(XTensor * a, XTensor * deda, MATRIX_TRANS_TYPE tra ...@@ -554,10 +594,12 @@ void XMathGrad::GradMatrixMul(XTensor * a, XTensor * deda, MATRIX_TRANS_TYPE tra
else if(transA == X_NOTRANS && transB == X_TRANS){ else if(transA == X_NOTRANS && transB == X_TRANS){
/* dE/da = dE/dc * b * \alpha */ /* dE/da = dE/dc * b * \alpha */
if(!isEfficient || a->isGrad)
_MatrixMul(dedc, X_NOTRANS, b, X_NOTRANS, deda, alpha, 1.0F); _MatrixMul(dedc, X_NOTRANS, b, X_NOTRANS, deda, alpha, 1.0F);
/* dE/db = (a^T * dE/dc)^T * \alpha /* dE/db = (a^T * dE/dc)^T * \alpha
= dE/dc^T * a * \alpha */ = dE/dc^T * a * \alpha */
if(!isEfficient || b->isGrad)
_MatrixMul(dedc, X_TRANS, a, X_NOTRANS, dedb, alpha, 1.0F); _MatrixMul(dedc, X_TRANS, a, X_NOTRANS, dedb, alpha, 1.0F);
} }
...@@ -566,10 +608,12 @@ void XMathGrad::GradMatrixMul(XTensor * a, XTensor * deda, MATRIX_TRANS_TYPE tra ...@@ -566,10 +608,12 @@ void XMathGrad::GradMatrixMul(XTensor * a, XTensor * deda, MATRIX_TRANS_TYPE tra
/* dE/da = (dE/dc * b)^T * \alpha /* dE/da = (dE/dc * b)^T * \alpha
= b^T * dE/dc^T * \alpha */ = b^T * dE/dc^T * \alpha */
if(!isEfficient || a->isGrad)
_MatrixMul(b, X_TRANS, dedc, X_TRANS, deda, alpha, 1.0F); _MatrixMul(b, X_TRANS, dedc, X_TRANS, deda, alpha, 1.0F);
/* dE/db = (a * dE/dc)^T * \alpha /* dE/db = (a * dE/dc)^T * \alpha
= dE/dc^T * a^T * \alpha */ = dE/dc^T * a^T * \alpha */
if(!isEfficient || b->isGrad)
_MatrixMul(dedc, X_TRANS, a, X_TRANS, dedb, alpha, 1.0F); _MatrixMul(dedc, X_TRANS, a, X_TRANS, dedb, alpha, 1.0F);
} }
} }
...@@ -582,8 +626,10 @@ we have ...@@ -582,8 +626,10 @@ we have
dE/da_i = dE/dc_i * b_i^T * \alpha dE/da_i = dE/dc_i * b_i^T * \alpha
dE/db_i = a_i^T * dE/dc_i * \alpha dE/db_i = a_i^T * dE/dc_i * \alpha
>> node - the node (c) for backward computation >> node - the node (c) for backward computation
>> isEfficient - indicates whether the computation is in
an efficient manner
*/ */
void XMathGrad::GradMatrixMulBatched(XTensor * node) void XMathGrad::GradMatrixMulBatched(XTensor * node, bool isEfficient)
{ {
XLink &income = node->income; XLink &income = node->income;
CheckNTErrors(income.tailNum == 2, "Wrong input tensor number for MULTIPLY!"); CheckNTErrors(income.tailNum == 2, "Wrong input tensor number for MULTIPLY!");
...@@ -657,8 +703,10 @@ we have ...@@ -657,8 +703,10 @@ we have
dE/da = dE/dc * b dE/da = dE/dc * b
dE/db = dE/dc * a dE/db = dE/dc * a
>> node - the node (c) for backward computation >> node - the node (c) for backward computation
>> isEfficient - indicates whether the computation is in
an efficient manner
*/ */
void XMathGrad::GradMultiply(XTensor * node) void XMathGrad::GradMultiply(XTensor * node, bool isEfficient)
{ {
XLink &income = node->income; XLink &income = node->income;
CheckNTErrors(income.tailNum == 2, "Wrong input tensor number for MULTIPLY!"); CheckNTErrors(income.tailNum == 2, "Wrong input tensor number for MULTIPLY!");
...@@ -681,8 +729,12 @@ c = a * b ...@@ -681,8 +729,12 @@ c = a * b
where the size of b is equal to dimension n of a, i.e., |b| = a.dimSize[n] where the size of b is equal to dimension n of a, i.e., |b| = a.dimSize[n]
dE/da = dE/dc * b dE/da = dE/dc * b
dE/db = (dE/dc * a).reduce(0,...,n-1,n+1,...) dE/db = (dE/dc * a).reduce(0,...,n-1,n+1,...)
>> node - the node (c) for backward computation
>> isEfficient - indicates whether the computation is in
an efficient manner
*/ */
void XMathGrad::GradMultiplyDim(XTensor * node) void XMathGrad::GradMultiplyDim(XTensor * node, bool isEfficient)
{ {
XLink &income = node->income; XLink &income = node->income;
CheckNTErrors(income.tailNum == 2, "Wrong input tensor number for MULTIPLYDIM!"); CheckNTErrors(income.tailNum == 2, "Wrong input tensor number for MULTIPLYDIM!");
...@@ -713,17 +765,17 @@ void XMathGrad::GradMultiplyDim(XTensor * node) ...@@ -713,17 +765,17 @@ void XMathGrad::GradMultiplyDim(XTensor * node)
size of b. Then we can reduce the matrix into a row vector. */ size of b. Then we can reduce the matrix into a row vector. */
bGradTMP->Reshape(2, reshapedSize); bGradTMP->Reshape(2, reshapedSize);
if(b->outgo.tailNum > 1){ //if(b->outgo.tailNum > 1){
XTensor * bGradTMP2 = NewTensorBuf(b->grad, b->devID, b->mem); XTensor * bGradTMP2 = NewTensorBuf(b->grad, b->devID, b->mem);
_ReduceSum(bGradTMP, bGradTMP2, 0); _ReduceSum(bGradTMP, bGradTMP2, 0);
_Sum(b->grad, bGradTMP2, b->grad); _Sum(b->grad, bGradTMP2, b->grad);
DelTensorBuf(bGradTMP2); DelTensorBuf(bGradTMP2);
} /*}
else{ else{
_ReduceSum(bGradTMP, b->grad, 0); _ReduceSum(bGradTMP, b->grad, 0);
} }*/
} }
else{ else{
int reshapedSize[MAX_TENSOR_DIM_NUM]; int reshapedSize[MAX_TENSOR_DIM_NUM];
...@@ -745,17 +797,17 @@ void XMathGrad::GradMultiplyDim(XTensor * node) ...@@ -745,17 +797,17 @@ void XMathGrad::GradMultiplyDim(XTensor * node)
XTensor * interGrad = NewTensorBuf(2, reshapedSize, b->dataType, b->denseRatio, b->devID, b->mem); XTensor * interGrad = NewTensorBuf(2, reshapedSize, b->dataType, b->denseRatio, b->devID, b->mem);
_ReduceSum(bGradTMP, interGrad, 2); _ReduceSum(bGradTMP, interGrad, 2);
if(b->outgo.tailNum > 1){ //if(b->outgo.tailNum > 1){
XTensor * bGradTMP2 = NewTensorBuf(b->grad, b->devID, b->mem); XTensor * bGradTMP2 = NewTensorBuf(b->grad, b->devID, b->mem);
_ReduceSum(interGrad, bGradTMP2, 0); _ReduceSum(interGrad, bGradTMP2, 0);
_Sum(b->grad, bGradTMP2, b->grad); _Sum(b->grad, bGradTMP2, b->grad);
DelTensorBuf(bGradTMP2); DelTensorBuf(bGradTMP2);
} /*}
else{ else{
_ReduceSum(interGrad, b->grad, 0); _ReduceSum(interGrad, b->grad, 0);
} }*/
DelTensorBuf(interGrad); DelTensorBuf(interGrad);
} }
...@@ -771,8 +823,10 @@ c = -a ...@@ -771,8 +823,10 @@ c = -a
we have we have
dE/da = dE/dc * (-1) dE/da = dE/dc * (-1)
>> node - the node (c) for backward computation >> node - the node (c) for backward computation
>> isEfficient - indicates whether the computation is in
an efficient manner
*/ */
void XMathGrad::GradNegate(XTensor * node) void XMathGrad::GradNegate(XTensor * node, bool isEfficient)
{ {
XLink &income = node->income; XLink &income = node->income;
CheckNTErrors(income.tailNum == 1, "Wrong input tensor number for NEGATE!"); CheckNTErrors(income.tailNum == 1, "Wrong input tensor number for NEGATE!");
...@@ -793,8 +847,10 @@ void XMathGrad::GradNegate(XTensor * node) ...@@ -793,8 +847,10 @@ void XMathGrad::GradNegate(XTensor * node)
/* /*
gradient for normalize gradient for normalize
>> node - the node (c) for backward computation >> node - the node (c) for backward computation
>> isEfficient - indicates whether the computation is in
an efficient manner
*/ */
void XMathGrad::GradNormalize(XTensor * node) void XMathGrad::GradNormalize(XTensor * node, bool isEfficient)
{ {
ShowNTErrors("This is really a bad piece of code!!!"); ShowNTErrors("This is really a bad piece of code!!!");
...@@ -887,8 +943,10 @@ c = pow(a,p) ...@@ -887,8 +943,10 @@ c = pow(a,p)
we have we have
dE/da = (dE/dc) * p * a^(p-1) dE/da = (dE/dc) * p * a^(p-1)
>> node - the node (c) for backward computation >> node - the node (c) for backward computation
>> isEfficient - indicates whether the computation is in
an efficient manner
*/ */
void XMathGrad::GradPower(XTensor * node) void XMathGrad::GradPower(XTensor * node, bool isEfficient)
{ {
XLink &income = node->income; XLink &income = node->income;
CheckNTErrors(income.tailNum == 1, "Wrong input tensor number for POWER!"); CheckNTErrors(income.tailNum == 1, "Wrong input tensor number for POWER!");
...@@ -916,8 +974,10 @@ c = a * scale + shift ...@@ -916,8 +974,10 @@ c = a * scale + shift
we have we have
dE/da = dE/dc * scale dE/da = dE/dc * scale
>> node - the node (c) for backward computation >> node - the node (c) for backward computation
>> isEfficient - indicates whether the computation is in
an efficient manner
*/ */
void XMathGrad::GradScaleAndShift(XTensor * node) void XMathGrad::GradScaleAndShift(XTensor * node, bool isEfficient)
{ {
XLink &income = node->income; XLink &income = node->income;
CheckNTErrors(income.tailNum == 1, "Wrong input tensor number for SCALEANDSHIFT!"); CheckNTErrors(income.tailNum == 1, "Wrong input tensor number for SCALEANDSHIFT!");
...@@ -941,8 +1001,10 @@ we have ...@@ -941,8 +1001,10 @@ we have
dE/da = dE/dc dE/da = dE/dc
dE/db = -dE/dc * \beta dE/db = -dE/dc * \beta
>> node - the node (c) for backward computation >> node - the node (c) for backward computation
>> isEfficient - indicates whether the computation is in
an efficient manner
*/ */
void XMathGrad::GradSub(XTensor * node) void XMathGrad::GradSub(XTensor * node, bool isEfficient)
{ {
XLink &income = node->income; XLink &income = node->income;
CheckNTErrors(income.tailNum == 2, "Wrong input tensor number for SUBSTRACT!"); CheckNTErrors(income.tailNum == 2, "Wrong input tensor number for SUBSTRACT!");
...@@ -966,8 +1028,11 @@ c = a - b * \beta ...@@ -966,8 +1028,11 @@ c = a - b * \beta
where the size of b is equal to dimension n of a, i.e., |b| = a.dimSize[n] where the size of b is equal to dimension n of a, i.e., |b| = a.dimSize[n]
dE/da = dE/dc dE/da = dE/dc
dE/db = - dE/dc * b.reduce(0,...,n-1,n+1,...) * \beta dE/db = - dE/dc * b.reduce(0,...,n-1,n+1,...) * \beta
>> node - the node (c) for backward computation
>> isEfficient - indicates whether the computation is in
an efficient manner
*/ */
void XMathGrad::GradSubDim(XTensor * node) void XMathGrad::GradSubDim(XTensor * node, bool isEfficient)
{ {
XLink &income = node->income; XLink &income = node->income;
CheckNTErrors(income.tailNum == 2, "Wrong input tensor number for SUBDIM!"); CheckNTErrors(income.tailNum == 2, "Wrong input tensor number for SUBDIM!");
...@@ -994,20 +1059,20 @@ void XMathGrad::GradSubDim(XTensor * node) ...@@ -994,20 +1059,20 @@ void XMathGrad::GradSubDim(XTensor * node)
size of b. Then we can reduce the matrix into a row vector. */ size of b. Then we can reduce the matrix into a row vector. */
node->grad->Reshape(2, reshapedSize); node->grad->Reshape(2, reshapedSize);
if(b->outgo.tailNum > 1){ //if(b->outgo.tailNum > 1){
XTensor * bGradTMP = NewTensorBuf(b->grad, b->devID, b->mem); XTensor * bGradTMP = NewTensorBuf(b->grad, b->devID, b->mem);
_ReduceSum(node->grad, bGradTMP, 0); _ReduceSum(node->grad, bGradTMP, 0);
if(beta != 1.0F) if(beta != 1.0F)
_ScaleAndShiftMe(bGradTMP, beta); _ScaleAndShiftMe(bGradTMP, beta);
_Sub(b->grad, bGradTMP, b->grad); _Sub(b->grad, bGradTMP, b->grad);
DelTensorBuf(bGradTMP); DelTensorBuf(bGradTMP);
} /*}
else{ else{
_ReduceSum(node->grad, b->grad, 0); _ReduceSum(node->grad, b->grad, 0);
if(beta != 1.0F) if(beta != 1.0F)
_ScaleAndShiftMe(b->grad, beta); _ScaleAndShiftMe(b->grad, beta);
_ScaleAndShiftMe(b->grad, -1.0F); _ScaleAndShiftMe(b->grad, -1.0F);
} }*/
node->grad->Reshape(order, dimSize); node->grad->Reshape(order, dimSize);
} }
...@@ -1032,20 +1097,20 @@ void XMathGrad::GradSubDim(XTensor * node) ...@@ -1032,20 +1097,20 @@ void XMathGrad::GradSubDim(XTensor * node)
_ReduceSum(node->grad, interGrad, 2); _ReduceSum(node->grad, interGrad, 2);
if(b->outgo.tailNum > 1){ //if(b->outgo.tailNum > 1){
XTensor * bGradTMP = NewTensorBuf(b->grad, b->devID, b->mem); XTensor * bGradTMP = NewTensorBuf(b->grad, b->devID, b->mem);
_ReduceSum(interGrad, bGradTMP, 0); _ReduceSum(interGrad, bGradTMP, 0);
if(beta != 1.0F) if(beta != 1.0F)
_ScaleAndShiftMe(bGradTMP, beta); _ScaleAndShiftMe(bGradTMP, beta);
_Sub(b->grad, bGradTMP, b->grad); _Sub(b->grad, bGradTMP, b->grad);
DelTensorBuf(bGradTMP); DelTensorBuf(bGradTMP);
} /*}
else{ else{
_ReduceSum(interGrad, b->grad, 0); _ReduceSum(interGrad, b->grad, 0);
if(beta != 1.0F) if(beta != 1.0F)
_ScaleAndShiftMe(b->grad, beta); _ScaleAndShiftMe(b->grad, beta);
_ScaleAndShiftMe(b->grad, -1.0F); _ScaleAndShiftMe(b->grad, -1.0F);
} }*/
node->grad->Reshape(order, dimSize); node->grad->Reshape(order, dimSize);
...@@ -1063,9 +1128,12 @@ c = a + b * \beta ...@@ -1063,9 +1128,12 @@ c = a + b * \beta
we have we have
dE/da = dE/dc dE/da = dE/dc
dE/db = dE/dc * \beta dE/db = dE/dc * \beta
>> node - the node (c) for backward computation >> node - the node (c) for backward computation
>> isEfficient - indicates whether the computation is in
an efficient manner
*/ */
void XMathGrad::GradSum(XTensor * node) void XMathGrad::GradSum(XTensor * node, bool isEfficient)
{ {
XLink &income = node->income; XLink &income = node->income;
CheckNTErrors(income.tailNum == 2, "Wrong input tensor number for SUM!"); CheckNTErrors(income.tailNum == 2, "Wrong input tensor number for SUM!");
...@@ -1074,11 +1142,15 @@ void XMathGrad::GradSum(XTensor * node) ...@@ -1074,11 +1142,15 @@ void XMathGrad::GradSum(XTensor * node)
XTensor * b = income.tails[1]; XTensor * b = income.tails[1];
DTYPE beta = income.GetParam(0); DTYPE beta = income.GetParam(0);
if(!isEfficient || a->isGrad){
XNoder::MakeGrad(a); XNoder::MakeGrad(a);
XNoder::MakeGrad(b);
_Sum(a->grad, node->grad, a->grad); _Sum(a->grad, node->grad, a->grad);
}
if(!isEfficient || b->isGrad){
XNoder::MakeGrad(b);
_Sum(b->grad, node->grad, b->grad, beta); _Sum(b->grad, node->grad, b->grad, beta);
}
node->visitMark = NODE_FINISHED; node->visitMark = NODE_FINISHED;
} }
...@@ -1088,9 +1160,13 @@ gradient for sum with one dimension ...@@ -1088,9 +1160,13 @@ gradient for sum with one dimension
c = a + b * \beta c = a + b * \beta
where the size of b is equal to dimension n of a, i.e., |b| = a.dimSize[n] where the size of b is equal to dimension n of a, i.e., |b| = a.dimSize[n]
dE/da = dE/dc dE/da = dE/dc
dE/db = dE/dc * b.reduce(0,...,n-1,n+1,...) * \beta dE/db = dE/dc * a.reduce(0,...,n-1,n+1,...) * \beta
>> node - the node (c) for backward computation
>> isEfficient - indicates whether the computation is in
an efficient manner
*/ */
void XMathGrad::GradSumDim(XTensor * node) void XMathGrad::GradSumDim(XTensor * node, bool isEfficient)
{ {
XLink &income = node->income; XLink &income = node->income;
CheckNTErrors(income.tailNum == 2, "Wrong input tensor number for SUMDIM!"); CheckNTErrors(income.tailNum == 2, "Wrong input tensor number for SUMDIM!");
...@@ -1117,19 +1193,19 @@ void XMathGrad::GradSumDim(XTensor * node) ...@@ -1117,19 +1193,19 @@ void XMathGrad::GradSumDim(XTensor * node)
size of b. Then we can reduce the matrix into a row vector. */ size of b. Then we can reduce the matrix into a row vector. */
node->grad->Reshape(2, reshapedSize); node->grad->Reshape(2, reshapedSize);
if(b->outgo.tailNum > 1){ //if(b->outgo.tailNum > 1){
XTensor * bGradTMP = NewTensorBuf(b->grad, b->devID, b->mem); XTensor * bGradTMP = NewTensorBuf(b->grad, b->devID, b->mem);
_ReduceSum(node->grad, bGradTMP, 0); _ReduceSum(node->grad, bGradTMP, 0);
if(beta != 1.0F) if(beta != 1.0F)
_ScaleAndShiftMe(bGradTMP, beta); _ScaleAndShiftMe(bGradTMP, beta);
_Sum(bGradTMP, b->grad, b->grad); _Sum(bGradTMP, b->grad, b->grad);
DelTensorBuf(bGradTMP); DelTensorBuf(bGradTMP);
} /*}
else{ else{
_ReduceSum(node->grad, b->grad, 0); _ReduceSum(node->grad, b->grad, 0);
if(beta != 1.0F) if(beta != 1.0F)
_ScaleAndShiftMe(b->grad, beta); _ScaleAndShiftMe(b->grad, beta);
} }*/
node->grad->Reshape(order, dimSize); node->grad->Reshape(order, dimSize);
} }
...@@ -1154,19 +1230,19 @@ void XMathGrad::GradSumDim(XTensor * node) ...@@ -1154,19 +1230,19 @@ void XMathGrad::GradSumDim(XTensor * node)
_ReduceSum(node->grad, interGrad, 2); _ReduceSum(node->grad, interGrad, 2);
if(b->outgo.tailNum > 1){ //if(b->outgo.tailNum > 1){
XTensor * bGradTMP = NewTensorBuf(b->grad, b->devID, b->mem); XTensor * bGradTMP = NewTensorBuf(b->grad, b->devID, b->mem);
_ReduceSum(interGrad, bGradTMP, 0); _ReduceSum(interGrad, bGradTMP, 0);
if(beta != 1.0F) if(beta != 1.0F)
_ScaleAndShiftMe(bGradTMP, beta); _ScaleAndShiftMe(bGradTMP, beta);
_Sum(bGradTMP, b->grad, b->grad); _Sum(bGradTMP, b->grad, b->grad);
DelTensorBuf(bGradTMP); DelTensorBuf(bGradTMP);
} /*}
else{ else{
_ReduceSum(interGrad, b->grad, 0); _ReduceSum(interGrad, b->grad, 0);
if(beta != 1.0F) if(beta != 1.0F)
_ScaleAndShiftMe(b->grad, beta); _ScaleAndShiftMe(b->grad, beta);
} }*/
node->grad->Reshape(order, dimSize); node->grad->Reshape(order, dimSize);
...@@ -1183,9 +1259,12 @@ for ...@@ -1183,9 +1259,12 @@ for
c = reduceMean(a, dim) c = reduceMean(a, dim)
we have we have
dE/da = Unsqueeze(dE/dc) * 1/dimSizeA[dim] dE/da = Unsqueeze(dE/dc) * 1/dimSizeA[dim]
>> node - the node (c) for backward computation >> node - the node (c) for backward computation
>> isEfficient - indicates whether the computation is in
an efficient manner
*/ */
void XMathGrad::GradReduceMean(XTensor * node) void XMathGrad::GradReduceMean(XTensor * node, bool isEfficient)
{ {
XLink &income = node->income; XLink &income = node->income;
CheckNTErrors(income.tailNum == 1, "Wrong input tensor number for Reduce!"); CheckNTErrors(income.tailNum == 1, "Wrong input tensor number for Reduce!");
...@@ -1213,9 +1292,12 @@ for ...@@ -1213,9 +1292,12 @@ for
c = reduceSum(a, dim) c = reduceSum(a, dim)
we have we have
dE/da = Unsqueeze(dE/dc) * 1 dE/da = Unsqueeze(dE/dc) * 1
>> node - the node (c) for backward computation >> node - the node (c) for backward computation
>> isEfficient - indicates whether the computation is in
an efficient manner
*/ */
void XMathGrad::GradReduceSum(XTensor * node) void XMathGrad::GradReduceSum(XTensor * node, bool isEfficient)
{ {
XLink &income = node->income; XLink &income = node->income;
CheckNTErrors(income.tailNum == 1, "Wrong input tensor number for Reduce!"); CheckNTErrors(income.tailNum == 1, "Wrong input tensor number for Reduce!");
...@@ -1243,9 +1325,12 @@ c = \sum_i (a_i - b)^2 ...@@ -1243,9 +1325,12 @@ c = \sum_i (a_i - b)^2
we have we have
dE/da = Unsqueeze(dE/dc) * 2a dE/da = Unsqueeze(dE/dc) * 2a
dE/db = dE/dc * -2 * n * b dE/db = dE/dc * -2 * n * b
>> node - the node (c) for backward computation >> node - the node (c) for backward computation
>> isEfficient - indicates whether the computation is in
an efficient manner
*/ */
void XMathGrad::GradReduceSumSquared(XTensor * node) void XMathGrad::GradReduceSumSquared(XTensor * node, bool isEfficient)
{ {
XLink &income = node->income; XLink &income = node->income;
CheckNTErrors(income.tailNum == 2, "Wrong input tensor number for Reduce!"); CheckNTErrors(income.tailNum == 2, "Wrong input tensor number for Reduce!");
...@@ -1292,9 +1377,12 @@ where b is the mean, and n is the size of a ...@@ -1292,9 +1377,12 @@ where b is the mean, and n is the size of a
we have we have
dE/da = Unsqueeze(dE/dc) * 2a/n dE/da = Unsqueeze(dE/dc) * 2a/n
dE/db = dE/dc * -2 * b dE/db = dE/dc * -2 * b
>> node - the node (c) for backward computation >> node - the node (c) for backward computation
>> isEfficient - indicates whether the computation is in
an efficient manner
*/ */
void XMathGrad::GradReduceVariance(XTensor * node) void XMathGrad::GradReduceVariance(XTensor * node, bool isEfficient)
{ {
XLink &income = node->income; XLink &income = node->income;
CheckNTErrors(income.tailNum == 2, "Wrong input tensor number for Reduce!"); CheckNTErrors(income.tailNum == 2, "Wrong input tensor number for Reduce!");
......
...@@ -33,7 +33,7 @@ class XMathGrad ...@@ -33,7 +33,7 @@ class XMathGrad
public: public:
/* compute dE/dx of a node */ /* compute dE/dx of a node */
static static
void MakeGrad(XTensor * node); void MakeGrad(XTensor * node, bool isEfficient);
/* indicates whether the node is for a math operation */ /* indicates whether the node is for a math operation */
static static
...@@ -43,121 +43,121 @@ private: ...@@ -43,121 +43,121 @@ private:
/* gradient for absolute */ /* gradient for absolute */
static static
void GradAbsolute(XTensor * node); void GradAbsolute(XTensor * node, bool isEfficient);
/* gradient for cos */ /* gradient for cos */
static static
void GradCos(XTensor * node); void GradCos(XTensor * node, bool isEfficient);
/* gradient for exp */ /* gradient for exp */
static static
void GradExp(XTensor * node); void GradExp(XTensor * node, bool isEfficient);
/* gradient for log: c = log(a) */ /* gradient for log: c = log(a) */
static static
void GradLog(XTensor * node); void GradLog(XTensor * node, bool isEfficient);
/* gradient for round */ /* gradient for round */
static static
void GradRound(XTensor * node); void GradRound(XTensor * node, bool isEfficient);
/* gradient for sign */ /* gradient for sign */
static static
void GradSign(XTensor * node); void GradSign(XTensor * node, bool isEfficient);
/* gradient for sin */ /* gradient for sin */
static static
void GradSin(XTensor * node); void GradSin(XTensor * node, bool isEfficient);
/* gradient for tan */ /* gradient for tan */
static static
void GradTan(XTensor * node); void GradTan(XTensor * node, bool isEfficient);
/* gradient for clip */ /* gradient for clip */
static static
void GradClip(XTensor * node); void GradClip(XTensor * node, bool isEfficient);
/* gradient for Divide */ /* gradient for Divide */
static static
void GradDiv(XTensor * node); void GradDiv(XTensor * node, bool isEfficient);
/* gradient for DivideDim */ /* gradient for DivideDim */
static static
void GradDivDim(XTensor * node); void GradDivDim(XTensor * node, bool isEfficient);
/* gradient for matrix multiply: c = matmul(a, b) * \alpha */ /* gradient for matrix multiply: c = matmul(a, b) * \alpha */
static static
void GradMatrixMul(XTensor * node); void GradMatrixMul(XTensor * node, bool isEfficient);
/* gradient for matrix multiply: c = matmul(a, b) * \alpha */ /* gradient for matrix multiply: c = matmul(a, b) * \alpha */
static static
void GradMatrixMul(XTensor * a, XTensor * deda, MATRIX_TRANS_TYPE transA, void GradMatrixMul(XTensor * a, XTensor * deda, MATRIX_TRANS_TYPE transA,
XTensor * b, XTensor * dedb, MATRIX_TRANS_TYPE transB, XTensor * b, XTensor * dedb, MATRIX_TRANS_TYPE transB,
XTensor * dedc, DTYPE alpha); XTensor * dedc, DTYPE alpha, bool isEfficient);
/* gradient for matrix multiply in batch mode. /* gradient for matrix multiply in batch mode.
for each batch: c_i = matmul(a_i, b_i) * \alpha */ for each batch: c_i = matmul(a_i, b_i) * \alpha */
static static
void GradMatrixMulBatched(XTensor * node); void GradMatrixMulBatched(XTensor * node, bool isEfficient);
/* gradient for multiply (dot production): c = a * b * \alpha */ /* gradient for multiply (dot production): c = a * b * \alpha */
static static
void GradMultiply(XTensor * node); void GradMultiply(XTensor * node, bool isEfficient);
/* gradient for multiply one dimension: c = a * b * \alpha /* gradient for multiply one dimension: c = a * b * \alpha
where the size of b is equal to that of one dimension of a */ where the size of b is equal to that of one dimension of a */
static static
void GradMultiplyDim(XTensor * node); void GradMultiplyDim(XTensor * node, bool isEfficient);
/* gradient for negate */ /* gradient for negate */
static static
void GradNegate(XTensor * node); void GradNegate(XTensor * node, bool isEfficient);
/* gradient for normalize */ /* gradient for normalize */
static static
void GradNormalize(XTensor * node); void GradNormalize(XTensor * node, bool isEfficient);
/* gradient for power */ /* gradient for power */
static static
void GradPower(XTensor * node); void GradPower(XTensor * node, bool isEfficient);
/* gradient for ScaleAndShift */ /* gradient for ScaleAndShift */
static static
void GradScaleAndShift(XTensor * node); void GradScaleAndShift(XTensor * node, bool isEfficient);
/* gradient for Minus */ /* gradient for Minus */
static static
void GradSub(XTensor * node); void GradSub(XTensor * node, bool isEfficient);
/* gradient for sub with one dimension: c = a - b * \beta /* gradient for sub with one dimension: c = a - b * \beta
where the size of b is equal to that of one dimension of a */ where the size of b is equal to that of one dimension of a */
static static
void GradSubDim(XTensor * node); void GradSubDim(XTensor * node, bool isEfficient);
/* gradient for sum: c = a + b * \beta */ /* gradient for sum: c = a + b * \beta */
static static
void GradSum(XTensor * node); void GradSum(XTensor * node, bool isEfficient);
/* gradient for sum with one dimension: c = a + b * \beta /* gradient for sum with one dimension: c = a + b * \beta
where the size of b is equal to that of one dimension of a */ where the size of b is equal to that of one dimension of a */
static static
void GradSumDim(XTensor * node); void GradSumDim(XTensor * node, bool isEfficient);
/* gradient for reduceMean */ /* gradient for reduceMean */
static static
void GradReduceMean(XTensor * node); void GradReduceMean(XTensor * node, bool isEfficient);
/* gradient for reduceSum */ /* gradient for reduceSum */
static static
void GradReduceSum(XTensor * node); void GradReduceSum(XTensor * node, bool isEfficient);
/* gradient for reduceSumSquared */ /* gradient for reduceSumSquared */
static static
void GradReduceSumSquared(XTensor * node); void GradReduceSumSquared(XTensor * node, bool isEfficient);
/* gradient for reduceVariance */ /* gradient for reduceVariance */
static static
void GradReduceVariance(XTensor * node); void GradReduceVariance(XTensor * node, bool isEfficient);
}; };
} }
......
...@@ -26,29 +26,34 @@ ...@@ -26,29 +26,34 @@
#include "XBackwardShape.h" #include "XBackwardShape.h"
#include "../tensor/XName.h" #include "../tensor/XName.h"
#include "../tensor/core/CHeader.h" #include "../tensor/core/CHeader.h"
#include "../tensor/core/getandset/SetData.h"
namespace nts{ namespace nts{
/* compute dE/dx of a node */ /* compute dE/dx of a node */
void XShapeGrad::MakeGrad(XTensor * node) void XShapeGrad::MakeGrad(XTensor * node, bool isEfficent)
{ {
CheckNTErrors(node->grad != NULL, "No gradient found!"); CheckNTErrors(node->grad != NULL, "No gradient found!");
XLink &income = node->income; XLink &income = node->income;
int operID = income.typeID; int operID = income.typeID;
if(operID == SHAPE_MERGE) if(operID == MOVEMENT_COPYINDEXED)
GradMerge(node); GradCopyIndexed(node, isEfficent);
else if(operID == SHAPE_MERGE)
GradMerge(node, isEfficent);
else if(operID == SHAPE_MERGE_LIST) else if(operID == SHAPE_MERGE_LIST)
GradMergeList(node); GradMergeList(node, isEfficent);
else if(operID == SHAPE_UNSQUEEZE) else if(operID == SHAPE_RESHAPE)
GradUnsqueeze(node); GradReshape(node, isEfficent);
else if(operID == SHAPE_SPLIT) else if(operID == SHAPE_SPLIT)
GradSplit(node); GradSplit(node, isEfficent);
else if(operID == SHAPE_SPLIT_LIST) else if(operID == SHAPE_SPLIT_LIST)
GradSplitList(node); GradSplitList(node, isEfficent);
else if (operID == SHAPE_TRANSPOSE) else if (operID == SHAPE_TRANSPOSE)
GradTranspose(node); GradTranspose(node, isEfficent);
else if(operID == SHAPE_UNSQUEEZE)
GradUnsqueeze(node, isEfficent);
else{ else{
ShowNTErrors("TODO!"); ShowNTErrors("TODO!");
} }
...@@ -62,10 +67,54 @@ bool XShapeGrad::IsShapeOP(XTensor * node) ...@@ -62,10 +67,54 @@ bool XShapeGrad::IsShapeOP(XTensor * node)
} }
/* post processing of a node */ /* post processing of a node */
void XShapeGrad::PostProcessing(XTensor * node, int typeID) void XShapeGrad::PostProcessing(XTensor * node, int typeID, bool isEfficent)
{ {
if(typeID == SHAPE_SPLIT_LIST) if(typeID == SHAPE_SPLIT_LIST)
GradSplitListPost(node); GradSplitListPost(node, isEfficent);
}
/*
gradient computation for copying indexed sub-tensors
for
b = copyindexed(a)
we have
dE/da = spread(b)
>> node - the node (c) for backward computation
>> isEfficient - indicates whether the computation is in
an efficient manner
*/
void XShapeGrad::GradCopyIndexed(XTensor * node, bool isEfficent)
{
XLink &income = node->income;
CheckNTErrors(income.tailNum > 0, "Wrong input tensor number for CopyIndexed!");
int dim = income.GetParamInt(0);
int * srcIndex = (int *)income.GetParamPointer(1);
int indexSize = income.GetParamInt(2);
int * tgtIndex = (int *)income.GetParamPointer(3);
int copyNum = income.GetParamInt(4);
int realIndexSize = indexSize * copyNum;
int * realSrcIndex = new int[realIndexSize];
int * realTgtIndex = new int[realIndexSize];
for(int i = 0; i < indexSize; i++) {
for(int j = 0; j < copyNum; j++) {
realSrcIndex[i * copyNum + j] = srcIndex[i] + j;
realTgtIndex[i * copyNum + j] = tgtIndex[i] + j;
}
}
XTensor * input = income.tails[0];
XNoder::MakeGrad(input);
_SpreadForGather(input->grad, node->grad, dim, realSrcIndex, realIndexSize, realTgtIndex);
delete[] realSrcIndex;
delete[] realTgtIndex;
delete[] srcIndex;
delete[] tgtIndex;
node->visitMark = NODE_FINISHED;
} }
/* /*
...@@ -80,14 +129,16 @@ dE/db_1 = dE/dc_{split_1} ...@@ -80,14 +129,16 @@ dE/db_1 = dE/dc_{split_1}
i.e., i.e.,
dE/da = split(dE/dc) dE/da = split(dE/dc)
>> node - the node (c) for backward computation >> node - the node (c) for backward computation
>> isEfficient - indicates whether the computation is in
an efficient manner
*/ */
void XShapeGrad::GradMerge(XTensor * node) void XShapeGrad::GradMerge(XTensor * node, bool isEfficent)
{ {
XLink &income = node->income; XLink &income = node->income;
XTensor * input = income.tails[0]; XTensor * input = income.tails[0];
CheckNTErrors(income.tailNum == 1, "Wrong input tensor number for MERGE!"); CheckNTErrors(income.tailNum == 1, "Wrong input tensor number for MERGE!");
CheckNTErrors(node->order == input->order - 1, "wrong tensor orders!"); CheckNTErrors(node->order == input->order - 1, "Wrong tensor orders!");
int whereToMerge = income.GetParamInt(0); int whereToMerge = income.GetParamInt(0);
int leadDim = income.GetParamInt(1); int leadDim = income.GetParamInt(1);
...@@ -162,8 +213,10 @@ dE/db = dE/dc_{split_1} ...@@ -162,8 +213,10 @@ dE/db = dE/dc_{split_1}
i.e., i.e.,
list(dE/da, dE/db, ...) = split(dE/dc) list(dE/da, dE/db, ...) = split(dE/dc)
>> node - the node (c) for backward computation >> node - the node (c) for backward computation
>> isEfficient - indicates whether the computation is in
an efficient manner
*/ */
void XShapeGrad::GradMergeList(XTensor * node) void XShapeGrad::GradMergeList(XTensor * node, bool isEfficient)
{ {
XLink &income = node->income; XLink &income = node->income;
CheckNTErrors(income.tailNum > 0, "Wrong input tensor number for MERGE!"); CheckNTErrors(income.tailNum > 0, "Wrong input tensor number for MERGE!");
...@@ -233,14 +286,46 @@ void XShapeGrad::GradMergeList(XTensor * node) ...@@ -233,14 +286,46 @@ void XShapeGrad::GradMergeList(XTensor * node)
} }
/* /*
gradient computation for reshaping a tensor
for
b = reshape(a)
we have
dE/da = reshape(dE/db)
>> node - the node (c) for backward computation
>> isEfficient - indicates whether the computation is in
an efficient manner
*/
void XShapeGrad::GradReshape(XTensor * node, bool isEfficent)
{
XLink &income = node->income;
XTensor * input = income.tails[0];
XNoder::MakeGrad(input);
CheckNTErrors(income.tailNum == 1, "Wrong input tensor number for MERGE!");
int order = income.GetParamInt(0);
int * dimSize = (int *)income.GetParamPointer(1);
node->grad->Reshape(order, dimSize);
_CopyValues(node->grad, input->grad);
delete[] dimSize;
node->visitMark = NODE_FINISHED;
}
/*
gradient computation for split: gradient computation for split:
for for
c = split(a) c = split(a)
we have we have
dE/da = merge(dE/dc) dE/da = merge(dE/dc)
>> node - the node (c) for backward computation >> node - the node (c) for backward computation
>> isEfficient - indicates whether the computation is in
an efficient manner
*/ */
void XShapeGrad::GradSplit(XTensor * node) void XShapeGrad::GradSplit(XTensor * node, bool isEfficient)
{ {
XLink &income = node->income; XLink &income = node->income;
XTensor * input = income.tails[0]; XTensor * input = income.tails[0];
...@@ -262,10 +347,12 @@ void XShapeGrad::GradSplit(XTensor * node) ...@@ -262,10 +347,12 @@ void XShapeGrad::GradSplit(XTensor * node)
/* if the tensor is used somewhere else, we need another SUM /* if the tensor is used somewhere else, we need another SUM
for gradient accumulation */ for gradient accumulation */
else{ else{
XTensor inputGradTMP(input); XTensor * inputGradTMP = NewTensorBuf(input, input->devID, input->mem);
_Merge(node->grad, &inputGradTMP, whereToSplit + 1, 0); _Merge(node->grad, inputGradTMP, whereToSplit + 1, 0);
_Sum(input->grad, &inputGradTMP, input->grad); _Sum(input->grad, inputGradTMP, input->grad);
DelTensorBuf(inputGradTMP);
} }
node->visitMark = NODE_FINISHED; node->visitMark = NODE_FINISHED;
...@@ -279,8 +366,10 @@ list(c_1, ...) = split(a) ...@@ -279,8 +366,10 @@ list(c_1, ...) = split(a)
we have we have
dE/da = merge(dE/c_1, ...) dE/da = merge(dE/c_1, ...)
>> node - the node (c) for backward computation >> node - the node (c) for backward computation
>> isEfficient - indicates whether the computation is in
an efficient manner
*/ */
void XShapeGrad::GradSplitList(XTensor * node) void XShapeGrad::GradSplitList(XTensor * node, bool isEfficient)
{ {
XLink &income = node->income; XLink &income = node->income;
XTensor * input = income.tails[0]; XTensor * input = income.tails[0];
...@@ -299,8 +388,10 @@ have been processed. We do this in a post-processing ...@@ -299,8 +388,10 @@ have been processed. We do this in a post-processing
manner because we can fuze multiple memory copy jobs manner because we can fuze multiple memory copy jobs
one time. This is good for system speed up. one time. This is good for system speed up.
>> node - the node (c) for backward computation >> node - the node (c) for backward computation
>> isEfficient - indicates whether the computation is in
an efficient manner
*/ */
void XShapeGrad::GradSplitListPost(XTensor * node) void XShapeGrad::GradSplitListPost(XTensor * node, bool isEfficient)
{ {
/* we compute the gradient for current node, rather than for /* we compute the gradient for current node, rather than for
child node, i.e., we use the outgoing edge here */ child node, i.e., we use the outgoing edge here */
...@@ -337,71 +428,84 @@ void XShapeGrad::GradSplitListPost(XTensor * node) ...@@ -337,71 +428,84 @@ void XShapeGrad::GradSplitListPost(XTensor * node)
somewhere else, we need another SUM for gradient somewhere else, we need another SUM for gradient
accumulation */ accumulation */
else{ else{
XTensor nodeGradTMP(node); XTensor * nodeGradTMP = NewTensorBuf(node, node->devID, node->mem);
_Merge(&splits, nodeGradTMP, whereToSplit + 1);
_Sum(node->grad, nodeGradTMP, node->grad);
_Merge(&splits, &nodeGradTMP, whereToSplit + 1); DelTensorBuf(nodeGradTMP);
_Sum(node->grad, &nodeGradTMP, node->grad);
} }
} }
/* /*
gradient for unsqueezing a tensor gradient for transposing a tensor
for for
c = unsqueeze(a) c = Transpose(a)
we have we have
dE/da = reduecesum(dE/dc) dE/da = Transpose(dE/dc)
>> node - the node (c) for backward computation >> node - the node (c) for backward computation
>> isEfficient - indicates whether the computation is in
an efficient manner
*/ */
void XShapeGrad::GradUnsqueeze(XTensor * node) void XShapeGrad::GradTranspose(XTensor * node, bool isEfficient)
{ {
XLink &income = node->income; XLink &income = node->income;
CheckNTErrors(income.tailNum == 1, "Wrong input tensor number for UNSQUEEZE!"); CheckNTErrors(income.tailNum == 1, "Wrong input tensor number for TRANSPOSE!");
XTensor * output = node; XTensor * output = node;
XTensor * input = income.tails[0]; XTensor * input = income.tails[0];
XTensor * b = NewTensorBuf(input, input->devID, input->mem);
XNoder::MakeGrad(input); XNoder::MakeGrad(input);
int dim = income.GetParamInt(0); int i = income.GetParamInt(0);
int dSize = income.GetParamInt(1); int j = income.GetParamInt(1);
CheckNTErrors(dSize == output->GetDim(dim), "Wrong dim size for UNSQUEEZE!"); CheckNTErrors(input->order > i && i >= 0, "index of dimension is out of scope!");
CheckNTErrors(output->unitNum = input->unitNum * dSize, "Wrong tensor size!"); CheckNTErrors(input->order > j && j >= 0, "index of dimension is out of scope!");
_ReduceSum(output->grad, input->grad, dim); _Transpose(output->grad, b, i, j);
_Sum(input->grad, b, input->grad);
DelTensorBuf(b);
node->visitMark = NODE_FINISHED; node->visitMark = NODE_FINISHED;
delete b;
} }
/* /*
gradient for transposing a tensor gradient for unsqueezing a tensor
for for
c = Transpose(a) c = unsqueeze(a)
we have we have
dE/da = Transpose(dE/dc) dE/da = reduecesum(dE/dc)
>> node - the node (c) for backward computation >> node - the node (c) for backward computation
>> isEfficient - indicates whether the computation is in
an efficient manner
*/ */
void XShapeGrad::GradTranspose(XTensor * node) void XShapeGrad::GradUnsqueeze(XTensor * node, bool isEfficient)
{ {
XLink &income = node->income; XLink &income = node->income;
CheckNTErrors(income.tailNum == 1, "Wrong input tensor number for TRANSPOSE!"); CheckNTErrors(income.tailNum == 1, "Wrong input tensor number for UNSQUEEZE!");
XTensor * output = node; XTensor * output = node;
XTensor * input = income.tails[0]; XTensor * input = income.tails[0];
XTensor * b = NewTensor(input);
XNoder::MakeGrad(input); XNoder::MakeGrad(input);
int i = income.GetParamInt(0); int dim = income.GetParamInt(0);
int j = income.GetParamInt(1); int dSize = income.GetParamInt(1);
CheckNTErrors(input->order > i && i >= 0, "index of dimension is out of scope!"); CheckNTErrors(dSize == output->GetDim(dim), "Wrong dim size for UNSQUEEZE!");
CheckNTErrors(input->order > j && j >= 0, "index of dimension is out of scope!"); CheckNTErrors(output->unitNum = input->unitNum * dSize, "Wrong tensor size!");
_Transpose(output->grad, b, i, j); XTensor * g = NewTensorBuf(input->grad, input->devID, input->mem);
_Sum(input->grad, b, input->grad);
node->visitMark = NODE_FINISHED; _ReduceSum(output->grad, g, dim);
_Sum(input->grad, g, input->grad);
delete b; DelTensorBuf(g);
node->visitMark = NODE_FINISHED;
} }
} }
\ No newline at end of file
...@@ -34,7 +34,7 @@ class XShapeGrad ...@@ -34,7 +34,7 @@ class XShapeGrad
public: public:
/* compute dE/dx of a node */ /* compute dE/dx of a node */
static static
void MakeGrad(XTensor * node); void MakeGrad(XTensor * node, bool isEfficent);
/* indicates whether the node is for a shaping operation */ /* indicates whether the node is for a shaping operation */
static static
...@@ -42,38 +42,47 @@ public: ...@@ -42,38 +42,47 @@ public:
/* post processing of a node */ /* post processing of a node */
static static
void PostProcessing(XTensor * node, int typeId); void PostProcessing(XTensor * node, int typeId, bool isEfficent);
private: private:
/* gradient computation for copying indexed sub-tensors: b = copyindexed(a, srcIndex, indexSize, tgtIndex, copyNum) */
static
void GradCopyIndexed(XTensor * node, bool isEfficent);
/* gradient computation for merge: c = merge(a, b, ...) */ /* gradient computation for merge: c = merge(a, b, ...) */
static static
void GradMerge(XTensor * node); void GradMerge(XTensor * node, bool isEfficent);
/* gradient computation for merging a list of tensors : c = merge(list(a, b, ...)) */ /* gradient computation for merging a list of tensors : c = merge(list(a, b, ...)) */
static static
void GradMergeList(XTensor * node); void GradMergeList(XTensor * node, bool isEfficent);
/* gradient computation for transposing a tensor : b = transpose(a) */
static
void GradTranspose(XTensor * node, bool isEfficent);
/* gradient computation for reshaping a tensor: c = reshape(a) */
static
void GradReshape(XTensor * node, bool isEfficent);
/* gradient computation for split: c = split(a) */ /* gradient computation for split: c = split(a) */
static static
void GradSplit(XTensor * node); void GradSplit(XTensor * node, bool isEfficent);
/* gradient computation for spliting. we return the list of the splits : list(c_1, ...) = split(a) */ /* gradient computation for spliting. we return the list of the splits : list(c_1, ...) = split(a) */
static static
void GradSplitList(XTensor * node); void GradSplitList(XTensor * node, bool isEfficent);
/* gradient computation for spliting. we return the list of the splits : list(c_1, ...) = split(a). /* gradient computation for spliting. we return the list of the splits : list(c_1, ...) = split(a).
this method is called only when all nodes of spliting have been processed. We do this in a post-processing this method is called only when all nodes of spliting have been processed. We do this in a post-processing
manner because we can fuze multiple memory copy jobs one time. This is good for system speed up. */ manner because we can fuze multiple memory copy jobs one time. This is good for system speed up. */
static static
void GradSplitListPost(XTensor * node); void GradSplitListPost(XTensor * node, bool isEfficent);
/* gradient computation for unsqueezing a tensor : c = unsqueeze(a) */
static
void GradUnsqueeze(XTensor * node);
/* gradient computation for unsqueezing a tensor : c = unsqueeze(a) */ /* gradient computation for unsqueezing a tensor : c = unsqueeze(a) */
static static
void GradTranspose(XTensor * node); void GradUnsqueeze(XTensor * node, bool isEfficent);
}; };
......
...@@ -55,6 +55,7 @@ void XNetClearAll() ...@@ -55,6 +55,7 @@ void XNetClearAll()
XNet::XNet() XNet::XNet()
{ {
nodes.Clear(); nodes.Clear();
isGradEfficient = true;
} }
/* de-constructor */ /* de-constructor */
...@@ -115,6 +116,10 @@ void XNet::Backward(XList &roots, XList &golds, LOSS_FUNCTION_NAME loss) ...@@ -115,6 +116,10 @@ void XNet::Backward(XList &roots, XList &golds, LOSS_FUNCTION_NAME loss)
{ {
Traverse(roots); Traverse(roots);
/* label tensors where the backward computation is neccessary */
if(isGradEfficient)
MakeEfficientNet();
for(int i = 0; i < nodes.count; i++){ for(int i = 0; i < nodes.count; i++){
XTensor * node = (XTensor*)nodes.Get(i); XTensor * node = (XTensor*)nodes.Get(i);
node->visitMark = NODE_UNFINISHED; node->visitMark = NODE_UNFINISHED;
...@@ -154,10 +159,19 @@ void XNet::Backward(XList &roots, XList &golds, LOSS_FUNCTION_NAME loss) ...@@ -154,10 +159,19 @@ void XNet::Backward(XList &roots, XList &golds, LOSS_FUNCTION_NAME loss)
CheckNTErrors(node->mem->bufUsed < BUF_PITCH, "Illegal access of buffer!"); CheckNTErrors(node->mem->bufUsed < BUF_PITCH, "Illegal access of buffer!");
} }
if(node->visitMark == NODE_FINISHED) if(node->visitMark != NODE_FINISHED)
continue; BackwardNode(node, isGradEfficient);
if(isGradEfficient){
XLink & outgo = node->outgo;
for(int i = 0; i < outgo.tailNum; i++){
XTensor * parent = outgo.tails[i];
ClearGrad(parent);
}
BackwardNode(node); if(XNoder::IsLeaf(node))
ClearGrad(node);
}
} }
} }
...@@ -179,27 +193,32 @@ void XNet::Backward(XList &roots, LOSS_FUNCTION_NAME loss) ...@@ -179,27 +193,32 @@ void XNet::Backward(XList &roots, LOSS_FUNCTION_NAME loss)
/* /*
backward computation for a given node backward computation for a given node
>> node - the node keeps the result of an operation (e.g., activation function) >> node - the node keeps the result of an operation (e.g., activation function)
>> isEfficient - indicates whether the back-propagation is compuated in an
efficient manner
*/ */
void XNet::BackwardNode(XTensor * node) void XNet::BackwardNode(XTensor * node, bool isEfficent)
{ {
if(node == NULL || node->visitMark == NODE_FINISHED) if(node == NULL || node->visitMark == NODE_FINISHED)
return; return;
if(!XNoder::IsLeaf(node)){ if(!XNoder::IsLeaf(node)){
/* post processing for parent nodes */ /* post processing for parent nodes */
BackwardNodePost(node); BackwardNodePost(node, isEfficent);
/* process the current node */ /* process the current node */
if(XMathGrad::IsMathOP(node)) if(XMathGrad::IsMathOP(node))
XMathGrad::MakeGrad(node); XMathGrad::MakeGrad(node, isEfficent);
else if(XFuncGrad::IsFunc(node)) else if(XFuncGrad::IsFunc(node))
XFuncGrad::MakeGrad(node); XFuncGrad::MakeGrad(node, isEfficent);
else if(XShapeGrad::IsShapeOP(node)) else if(XShapeGrad::IsShapeOP(node))
XShapeGrad::MakeGrad(node); XShapeGrad::MakeGrad(node, isEfficent);
else{ else{
ShowNTErrors("Wrong node type!"); ShowNTErrors("Wrong node type!");
} }
} }
else{
node->visitMark = NODE_FINISHED;
}
} }
/* /*
...@@ -207,7 +226,7 @@ backward computation (in post processing) for a given node ...@@ -207,7 +226,7 @@ backward computation (in post processing) for a given node
>> node - the node whose parent nodes are not processed yet. So >> node - the node whose parent nodes are not processed yet. So
we do the job at the child node. we do the job at the child node.
*/ */
void XNet::BackwardNodePost(XTensor * node) void XNet::BackwardNodePost(XTensor * node, bool isEfficent)
{ {
bool isSplitList = false; bool isSplitList = false;
XLink &outgo = node->outgo; XLink &outgo = node->outgo;
...@@ -217,7 +236,7 @@ void XNet::BackwardNodePost(XTensor * node) ...@@ -217,7 +236,7 @@ void XNet::BackwardNodePost(XTensor * node)
} }
if(isSplitList) if(isSplitList)
XShapeGrad::PostProcessing(node, SHAPE_SPLIT_LIST); XShapeGrad::PostProcessing(node, SHAPE_SPLIT_LIST, isEfficent);
} }
/* /*
...@@ -284,6 +303,8 @@ void XNet::TarjanVisit(XTensor * node, XList &orders, const unsigned int code) ...@@ -284,6 +303,8 @@ void XNet::TarjanVisit(XTensor * node, XList &orders, const unsigned int code)
node->visitMark = code + 2; node->visitMark = code + 2;
orders.Add(node); orders.Add(node);
} }
else if(node->visitMark == code + 2){
}
} }
/* /*
...@@ -304,4 +325,62 @@ void XNet::Dump(FILE * file) ...@@ -304,4 +325,62 @@ void XNet::Dump(FILE * file)
} }
} }
/*
set the flag of gradient-efficient
>> flag - the flag
*/
void XNet::SetGradEfficientFlag(bool flag)
{
isGradEfficient = flag;
}
/* generate the gradient-efficient flag for every node */
void XNet::MakeEfficientNet()
{
/* back-propagation from output to input */
for(int i = 0; i < nodes.count; i++){
XTensor * node = (XTensor*)nodes.Get(i);
XLink &income = node->income;
for(int j = 0; j < income.tailNum; j++){
XTensor * child = income.tails[j];
if(child->isGrad || child->isVar){
node->SetGradFlag(true);
break;
}
}
}
}
/*
clear the graident information if the node is no use
>> node - the node that we want to clear
*/
void XNet::ClearGrad(XTensor * node)
{
if(node->isVar)
return;
if(node->grad == NULL)
return;
if(node->visitMark != NODE_FINISHED)
return;
XLink & income = node->income;
bool finished = true;
for(int i = 0; i < income.tailNum; i++){
XTensor * child = income.tails[i];
if(child->visitMark != NODE_FINISHED){
finished = false;
break;
}
}
if(finished){
//fprintf(stderr, "del %d %ld\n", node->id, node->grad->unitNum);
delete node->grad;
node->grad = NULL;
}
}
} }
\ No newline at end of file
...@@ -47,6 +47,9 @@ struct XNet ...@@ -47,6 +47,9 @@ struct XNet
/* input nodes of the network */ /* input nodes of the network */
XList inputs; XList inputs;
/* indicates whether the network just keeps the gradient for parameter tensors */
bool isGradEfficient;
/* constructor */ /* constructor */
XNet(); XNet();
...@@ -71,10 +74,10 @@ struct XNet ...@@ -71,10 +74,10 @@ struct XNet
void Backward(XList &roots, LOSS_FUNCTION_NAME loss = NOLOSS); void Backward(XList &roots, LOSS_FUNCTION_NAME loss = NOLOSS);
/* backward computation for a given node */ /* backward computation for a given node */
void BackwardNode(XTensor * node); void BackwardNode(XTensor * node, bool isEfficent = false);
/* backward computation (in post processing) for a given node */ /* backward computation (in post processing) for a given node */
void BackwardNodePost(XTensor * node); void BackwardNodePost(XTensor * node, bool isEfficent = false);
/* traverse the net and find the topological order by /* traverse the net and find the topological order by
depth-first search (Tarjan's algorithm) */ depth-first search (Tarjan's algorithm) */
...@@ -89,6 +92,15 @@ struct XNet ...@@ -89,6 +92,15 @@ struct XNet
/* dump network information */ /* dump network information */
void Dump(FILE * file); void Dump(FILE * file);
/* set the flag of gradient-efficient */
void SetGradEfficientFlag(bool flag = true);
/* generate the gradient-efficient flag for every node */
void MakeEfficientNet();
/* clear the graident information if the node is no use */
void ClearGrad(XTensor * node);
}; };
/* we make a unique id for every tensor */ /* we make a unique id for every tensor */
......
...@@ -74,6 +74,7 @@ void Forward(XTensor inputs[], XTensor &output, FNNModel &model, FNNNet &net); ...@@ -74,6 +74,7 @@ void Forward(XTensor inputs[], XTensor &output, FNNModel &model, FNNNet &net);
void Backward(XTensor inputs[], XTensor &output, XTensor &gold, LOSS_FUNCTION_NAME loss, void Backward(XTensor inputs[], XTensor &output, XTensor &gold, LOSS_FUNCTION_NAME loss,
FNNModel &model, FNNModel &grad, FNNNet &net); FNNModel &model, FNNModel &grad, FNNNet &net);
void ForwardAutoDiff(XTensor inputs[], XTensor &output, FNNModel &model); void ForwardAutoDiff(XTensor inputs[], XTensor &output, FNNModel &model);
void ForwardAutoDiff(NGram * ngrams, int batch, XTensor &output, FNNModel &model);
/* /*
entry of the program entry of the program
...@@ -476,7 +477,12 @@ void Train(const char * train, bool isShuffled, FNNModel &model) ...@@ -476,7 +477,12 @@ void Train(const char * train, bool isShuffled, FNNModel &model)
Clear(model, true); Clear(model, true);
/* forward + backward process */ /* forward + backward process */
ForwardAutoDiff(inputs, output, model);
/* this is implemented by gather function */
ForwardAutoDiff(ngrams, ngramNum, output, model);
/* this is implemented by multiply function */
//ForwardAutoDiff(inputs, output, model);
/* automatic differentiation */ /* automatic differentiation */
autoDiffer.Backward(output, gold, CROSSENTROPY); autoDiffer.Backward(output, gold, CROSSENTROPY);
...@@ -975,7 +981,55 @@ void Backward(XTensor inputs[], XTensor &output, XTensor &gold, LOSS_FUNCTION_NA ...@@ -975,7 +981,55 @@ void Backward(XTensor inputs[], XTensor &output, XTensor &gold, LOSS_FUNCTION_NA
} }
/* /*
forward process (with tensor connections) forward process (with tensor connections) (this is implemented by gather function)
>> ngrams - the loaded ngrams
>> batch - the tensor encoding a batch of words
>> output - output probability
>> model - the fnn model
*/
void ForwardAutoDiff(NGram * ngrams, int batch, XTensor &output, FNNModel &model)
{
int n = model.n;
int depth = model.hDepth;
XTensor words;
XTensor embeddingBig;
XTensor hidden;
XTensor b;
int size = batch * (n-1);
int * index = new int[size];
for(int i = 0; i < batch; i++){
for (int j = 0; j < n-1; j++){
int a = i * (n - 1) + j;
index[a] = ngrams[i].words[j];
}
}
XTensor embedding;
embedding = Gather(model.embeddingW, 0, index, size);
delete[] index;
int dimSize[2];
dimSize[0] = embedding.GetDim(0) / (n - 1);
dimSize[1] = embedding.GetDim(1) * (n - 1);
hidden = Reshape(embedding, embedding.order, dimSize);
/* hidden layers */
for(int i = 0; i < depth; i++)
hidden = MMul(hidden, model.hiddenW[i]) + model.hiddenB[i];
/* output layer */
output = LogSoftmax(MMul(hidden, model.outputW) + model.outputB, 1);
//XLink::ShowNetwork(stderr, &output);
}
/*
forward process (with tensor connections) (this is implemented by multiply function)
>> inputs - input word representations >> inputs - input word representations
>> output - output probability >> output - output probability
>> model - the fnn model >> model - the fnn model
...@@ -1123,7 +1177,11 @@ void Test(const char * test, const char * result, FNNModel &model) ...@@ -1123,7 +1177,11 @@ void Test(const char * test, const char * result, FNNModel &model)
Forward(inputs, output, model, net); Forward(inputs, output, model, net);
} }
else { else {
ForwardAutoDiff(inputs, output, model); /* this is implemented by gather function */
ForwardAutoDiff(ngrams, ngramNum, output, model);
/* this is implemented by multiply function */
//ForwardAutoDiff(inputs, output, model);
} }
/* prediction probabilities */ /* prediction probabilities */
......
...@@ -53,7 +53,7 @@ initialize the model ...@@ -53,7 +53,7 @@ initialize the model
>> myDevID - device id >> myDevID - device id
>> myMem - the memory pool >> myMem - the memory pool
*/ */
void T2TAttention::InitModel(int argc, const char ** argv, void T2TAttention::InitModel(int argc, char ** argv,
bool myIsMasked, int myIgnored, bool myIsMasked, int myIgnored,
int myDevID, XMem * myMem) int myDevID, XMem * myMem)
{ {
...@@ -69,18 +69,22 @@ void T2TAttention::InitModel(int argc, const char ** argv, ...@@ -69,18 +69,22 @@ void T2TAttention::InitModel(int argc, const char ** argv,
LoadParamInt(argc, argv, "d", &dv, DEFAULT_EMBEDDING_SIZE); LoadParamInt(argc, argv, "d", &dv, DEFAULT_EMBEDDING_SIZE);
LoadParamInt(argc, argv, "d", &d, DEFAULT_EMBEDDING_SIZE); LoadParamInt(argc, argv, "d", &d, DEFAULT_EMBEDDING_SIZE);
LoadParamFloat(argc, argv, "attminmax", &minmax, 0.1F); LoadParamFloat(argc, argv, "attminmax", &minmax, 0.1F);
LoadParamFloat(argc, argv, "dropoutatt", &dropoutP, 0);
InitTensor2D(&wk, d, dk, X_FLOAT, devID, mem); InitTensor2D(&wk, d, dk, X_FLOAT, devID, mem);
InitTensor2D(&wq, d, dk, X_FLOAT, devID, mem); InitTensor2D(&wq, d, dk, X_FLOAT, devID, mem);
InitTensor2D(&wv, d, dv, X_FLOAT, devID, mem); InitTensor2D(&wv, d, dv, X_FLOAT, devID, mem);
InitTensor2D(&wa, d, d, X_FLOAT, devID, mem);
float scale = 1.0F; float scale = 1.0F;
float finfoutk = (float)sqrt(6.0F * scale/(d + dk)); float finfoutk = (float)sqrt(6.0F * scale/(d + dk));
float finfoutv = (float)sqrt(6.0F * scale/(d + dv)); float finfoutv = (float)sqrt(6.0F * scale/(d + dv));
float finfouta = (float)sqrt(6.0F * scale / (d + d));
wk.SetDataRand(-finfoutk, finfoutk); wk.SetDataRand(-finfoutk, finfoutk);
wq.SetDataRand(-finfoutk, finfoutk); wq.SetDataRand(-finfoutk, finfoutk);
wv.SetDataRand(-finfoutv, finfoutv); wv.SetDataRand(-finfoutv, finfoutv);
wa.SetDataRand(-finfouta, finfouta);
} }
/* /*
...@@ -90,10 +94,11 @@ make the network ...@@ -90,10 +94,11 @@ make the network
and H = vector size of each position and H = vector size of each position
>> q - queries >> q - queries
>> v - values >> v - values
>> maske - as it is >> mask - as it is
>> isTraining - indicates whether the model is used for training
<< return - multi-attention result << return - multi-attention result
*/ */
XTensor T2TAttention::Make(XTensor &k, XTensor &q, XTensor &v, XTensor &mask) XTensor T2TAttention::Make(XTensor &k, XTensor &q, XTensor &v, XTensor &mask, bool isTraining)
{ {
XTensor k2; XTensor k2;
XTensor q2; XTensor q2;
...@@ -123,14 +128,17 @@ XTensor T2TAttention::Make(XTensor &k, XTensor &q, XTensor &v, XTensor &mask) ...@@ -123,14 +128,17 @@ XTensor T2TAttention::Make(XTensor &k, XTensor &q, XTensor &v, XTensor &mask)
if(isMasked) if(isMasked)
dot = dot + mask; dot = dot + mask;
dot = Linear(dot, 1.0F/(float)sqrt((float)dk)); dot = Linear(dot, 1.0F/(float)sqrt((float)dk/nhead));
scalar = Softmax(dot, -1); scalar = Softmax(dot, -1);
if(isTraining && dropoutP > 0)
scalar = Dropout(scalar, dropoutP);
att = BMMul(scalar, vheads); att = BMMul(scalar, vheads);
/* concatenate the heads */ /* concatenate the heads */
return Merge(att, att.order - 1); return MMul(Merge(att, att.order - 1), wa);
} }
} }
...@@ -57,6 +57,9 @@ public: ...@@ -57,6 +57,9 @@ public:
/* transformation matrix for V */ /* transformation matrix for V */
XTensor wv; XTensor wv;
/* transformation after dot-product attention */
XTensor wa;
/* size of transformed Q and K */ /* size of transformed Q and K */
int dk; int dk;
...@@ -76,6 +79,9 @@ public: ...@@ -76,6 +79,9 @@ public:
/* indicates whether the model is used for training */ /* indicates whether the model is used for training */
bool isTraining; bool isTraining;
/* dropout probability */
DTYPE dropoutP;
public: public:
/* constructor */ /* constructor */
T2TAttention(); T2TAttention();
...@@ -84,12 +90,12 @@ public: ...@@ -84,12 +90,12 @@ public:
~T2TAttention(); ~T2TAttention();
/* initialize the model */ /* initialize the model */
void InitModel(int argc, const char ** argv, void InitModel(int argc, char ** argv,
bool myIsMasked, int myIgnored, bool myIsMasked, int myIgnored,
int myDevID = -1, XMem * myMem = NULL); int myDevID = -1, XMem * myMem = NULL);
/* make the network */ /* make the network */
XTensor Make(XTensor &k, XTensor &q, XTensor &v, XTensor &mask); XTensor Make(XTensor &k, XTensor &q, XTensor &v, XTensor &mask, bool isTraining);
}; };
} }
......
...@@ -34,7 +34,7 @@ class AttDecoder : T2TDecoder ...@@ -34,7 +34,7 @@ class AttDecoder : T2TDecoder
{ {
public: public:
/* initialize the model */ /* initialize the model */
void InitModel(int argc, const char ** argv); void InitModel(int argc, char ** argv);
}; };
} }
......
...@@ -48,7 +48,7 @@ initialize the model ...@@ -48,7 +48,7 @@ initialize the model
>> myDevID - device id >> myDevID - device id
>> myMem - the memory pool >> myMem - the memory pool
*/ */
void T2TEmbedder::InitModel(int argc, const char ** argv, int myDevID, XMem * myMem) void T2TEmbedder::InitModel(int argc, char ** argv, int myDevID, XMem * myMem)
{ {
devID = myDevID; devID = myDevID;
mem = myMem; mem = myMem;
...@@ -60,7 +60,8 @@ void T2TEmbedder::InitModel(int argc, const char ** argv, int myDevID, XMem * my ...@@ -60,7 +60,8 @@ void T2TEmbedder::InitModel(int argc, const char ** argv, int myDevID, XMem * my
InitTensor2D(&w, vSize, eSize, X_FLOAT, devID, mem); InitTensor2D(&w, vSize, eSize, X_FLOAT, devID, mem);
w.SetDataRandn(0, 1.0F/(float)sqrt((float)eSize)); DTYPE v = 1.0F/(float)sqrt((float)eSize);
w.SetDataRand(-v, v);
/* create the positional embedding matrix */ /* create the positional embedding matrix */
MakePosEmbedding(eSize, d, maxLength); MakePosEmbedding(eSize, d, maxLength);
...@@ -79,6 +80,17 @@ void T2TEmbedder::MakePosEmbedding(int eSize, int d, int length) ...@@ -79,6 +80,17 @@ void T2TEmbedder::MakePosEmbedding(int eSize, int d, int length)
for(int pos = 0; pos < length; pos++){ for(int pos = 0; pos < length; pos++){
float * dp = data + pos * eSize; float * dp = data + pos * eSize;
int channelSize = eSize / 2;
int offset = 0;
for(int i = 0; i < channelSize; i++){
dp[offset++] = (float)sin(pos/pow(10000.0F, 2.0F*i/(d - 2)));
}
for(int i = 0; i < channelSize; i++){
dp[offset++] = (float)cos(pos/pow(10000.0F, 2.0F*i/(d - 2)));
}
/*
for(int k = 0; k < eSize; k++){ for(int k = 0; k < eSize; k++){
if(k % 2 == 0){ if(k % 2 == 0){
int i = k/2; int i = k/2;
...@@ -89,6 +101,7 @@ void T2TEmbedder::MakePosEmbedding(int eSize, int d, int length) ...@@ -89,6 +101,7 @@ void T2TEmbedder::MakePosEmbedding(int eSize, int d, int length)
dp[k] = (float)cos(pos/pow(10000.0F, 2.0F*i/d)); dp[k] = (float)cos(pos/pow(10000.0F, 2.0F*i/d));
} }
} }
*/
} }
posEmbeddingBase.SetData(data, posEmbeddingBase.unitNum); posEmbeddingBase.SetData(data, posEmbeddingBase.unitNum);
...@@ -135,7 +148,7 @@ XTensor T2TEmbedder::Make(XTensor &input) ...@@ -135,7 +148,7 @@ XTensor T2TEmbedder::Make(XTensor &input)
} }
/* then we make word embeddings */ /* then we make word embeddings */
wordEmbedding = Linear(MMul(input, w), (float)sqrt((float)d)); wordEmbedding = Linear(MMul(input, w), (float)sqrt((float)eSize));
/* we sum over the two embeddings */ /* we sum over the two embeddings */
return wordEmbedding + posEmbedding; return wordEmbedding + posEmbedding;
......
...@@ -71,7 +71,7 @@ public: ...@@ -71,7 +71,7 @@ public:
~T2TEmbedder(); ~T2TEmbedder();
/* initialize the model */ /* initialize the model */
void InitModel(int argc, const char ** argv, int myDevID = -1, XMem * myMem = NULL); void InitModel(int argc, char ** argv, int myDevID = -1, XMem * myMem = NULL);
/* make positional embeddings */ /* make positional embeddings */
void MakePosEmbedding(int eSize, int d, int length); void MakePosEmbedding(int eSize, int d, int length);
......
...@@ -51,7 +51,7 @@ initialize the model ...@@ -51,7 +51,7 @@ initialize the model
>> myDevID - device id >> myDevID - device id
>> myMem - the memory pool >> myMem - the memory pool
*/ */
void AttEncoder::InitModel(int argc, const char ** argv, void AttEncoder::InitModel(int argc, char ** argv,
bool myIsMasked, int myIgnored, bool myIsMasked, int myIgnored,
int myDevID, XMem * myMem) int myDevID, XMem * myMem)
{ {
...@@ -89,16 +89,17 @@ void AttEncoder::InitModel(int argc, const char ** argv, ...@@ -89,16 +89,17 @@ void AttEncoder::InitModel(int argc, const char ** argv,
make the encoding network make the encoding network
>> input - the input tensor of the encoder >> input - the input tensor of the encoder
>> mask - the mask that indicate each position is valid >> mask - the mask that indicate each position is valid
>> skipInputRes - indicates whether we skip the residual connection of the first layer >> isTraining - indicates whether the model is used for training
>> isTraining - indicates whether the model is for training
<< return - the output tensor of the encoder << return - the output tensor of the encoder
*/ */
XTensor AttEncoder::Make(XTensor &input, XTensor &mask, bool skipInputRes, bool isTraining) XTensor AttEncoder::Make(XTensor &input, XTensor &mask, bool isTraining)
{ {
XTensor x; XTensor x;
x = embedder.Make(input); x = embedder.Make(input);
//x.Dump(tmpFILE, "embedding: ");
/* dropout */ /* dropout */
if(isTraining && dropoutP > 0) if(isTraining && dropoutP > 0)
x = Dropout(x, dropoutP); x = Dropout(x, dropoutP);
...@@ -109,23 +110,8 @@ XTensor AttEncoder::Make(XTensor &input, XTensor &mask, bool skipInputRes, bool ...@@ -109,23 +110,8 @@ XTensor AttEncoder::Make(XTensor &input, XTensor &mask, bool skipInputRes, bool
XTensor fnn; XTensor fnn;
XTensor res; XTensor res;
/* we skip the residual connection for the first layer if
the encoder is used in language modeling. */
if(skipInputRes && i == 0){
/* self attention */
att = attentions[i].Make(x, x, x, mask);
/* dropout */
if(isTraining && dropoutP > 0)
att = Dropout(att, dropoutP);
/* layer normalization */
x = attLayerNorms[i].Make(att);
}
else{
/* self attention */ /* self attention */
att = attentions[i].Make(x, x, x, mask); att = attentions[i].Make(x, x, x, mask, isTraining);
/* dropout */ /* dropout */
if(isTraining && dropoutP > 0) if(isTraining && dropoutP > 0)
...@@ -136,10 +122,9 @@ XTensor AttEncoder::Make(XTensor &input, XTensor &mask, bool skipInputRes, bool ...@@ -136,10 +122,9 @@ XTensor AttEncoder::Make(XTensor &input, XTensor &mask, bool skipInputRes, bool
/* layer normalization */ /* layer normalization */
x = attLayerNorms[i].Make(res); x = attLayerNorms[i].Make(res);
}
/* fnn */ /* fnn */
fnn = fnns[i].Make(x); fnn = fnns[i].Make(x, isTraining);
/* dropout */ /* dropout */
if(isTraining && dropoutP > 0) if(isTraining && dropoutP > 0)
...@@ -150,9 +135,6 @@ XTensor AttEncoder::Make(XTensor &input, XTensor &mask, bool skipInputRes, bool ...@@ -150,9 +135,6 @@ XTensor AttEncoder::Make(XTensor &input, XTensor &mask, bool skipInputRes, bool
/* layer normalization */ /* layer normalization */
x = fnnLayerNorms[i].Make(res); x = fnnLayerNorms[i].Make(res);
if(isTraining && dropoutP > 0)
x = Dropout(x, dropoutP);
} }
return x; return x;
......
...@@ -40,7 +40,7 @@ class T2TEncoder ...@@ -40,7 +40,7 @@ class T2TEncoder
{ {
public: public:
virtual virtual
XTensor Make(XTensor &input, XTensor &mask, bool skipInputRes, bool isTraining) = 0; XTensor Make(XTensor &input, XTensor &mask, bool isTraining) = 0;
}; };
/* /*
...@@ -49,7 +49,7 @@ the encoder based on RNN ...@@ -49,7 +49,7 @@ the encoder based on RNN
class RNNEncoder : T2TEncoder class RNNEncoder : T2TEncoder
{ {
public: public:
XTensor Make(XTensor &input, XTensor &mask, bool skipInputRes, bool isTraining); XTensor Make(XTensor &input, XTensor &mask, bool isTraining);
}; };
...@@ -113,12 +113,12 @@ public: ...@@ -113,12 +113,12 @@ public:
~AttEncoder(); ~AttEncoder();
/* initialize the model */ /* initialize the model */
void InitModel(int argc, const char ** argv, void InitModel(int argc, char ** argv,
bool myIsMasked, int myIgnored, bool myIsMasked, int myIgnored,
int myDevID = -1, XMem * myMem = NULL); int myDevID = -1, XMem * myMem = NULL);
/* make the encoding network */ /* make the encoding network */
XTensor Make(XTensor &input, XTensor &mask, bool skipInputRes, bool isTraining); XTensor Make(XTensor &input, XTensor &mask, bool isTraining);
}; };
......
...@@ -49,7 +49,7 @@ initialize the model ...@@ -49,7 +49,7 @@ initialize the model
>> myDevID - device id >> myDevID - device id
>> myMem - the memory pool >> myMem - the memory pool
*/ */
void T2TFNN::InitModel(int argc, const char ** argv, int myDevID, XMem * myMem) void T2TFNN::InitModel(int argc, char ** argv, int myDevID, XMem * myMem)
{ {
devID = myDevID; devID = myDevID;
mem = myMem; mem = myMem;
...@@ -58,8 +58,9 @@ void T2TFNN::InitModel(int argc, const char ** argv, int myDevID, XMem * myMem) ...@@ -58,8 +58,9 @@ void T2TFNN::InitModel(int argc, const char ** argv, int myDevID, XMem * myMem)
LoadParamInt(argc, argv, "d", &inSize, DEFAULT_EMBEDDING_SIZE); LoadParamInt(argc, argv, "d", &inSize, DEFAULT_EMBEDDING_SIZE);
LoadParamInt(argc, argv, "d", &outSize, DEFAULT_EMBEDDING_SIZE); LoadParamInt(argc, argv, "d", &outSize, DEFAULT_EMBEDDING_SIZE);
LoadParamInt(argc, argv, "fnnh", &hSize, DEFAULT_EMBEDDING_SIZE * 4); LoadParamInt(argc, argv, "fnnh", &hSize, outSize * 4);
LoadParamFloat(argc, argv, "fnnminmax", &minmax, 0.1F); LoadParamFloat(argc, argv, "fnnminmax", &minmax, 0.1F);
LoadParamFloat(argc, argv, "dropoutfnn", &dropoutP, 0);
InitTensor2D(&w1, inSize, hSize, X_FLOAT, devID, mem); InitTensor2D(&w1, inSize, hSize, X_FLOAT, devID, mem);
InitTensor1D(&b1, hSize, X_FLOAT, devID, mem); InitTensor1D(&b1, hSize, X_FLOAT, devID, mem);
...@@ -83,13 +84,16 @@ y = max(0, x * w1 + b1) * w2 + b2 ...@@ -83,13 +84,16 @@ y = max(0, x * w1 + b1) * w2 + b2
>> input - the input tensor >> input - the input tensor
>> return - the output tensor >> return - the output tensor
*/ */
XTensor T2TFNN::Make(XTensor &input) XTensor T2TFNN::Make(XTensor &input, bool isTraining)
{ {
XTensor t1; XTensor t1;
/* t1 = max(0, x * w1 + b1) */ /* t1 = max(0, x * w1 + b1) */
t1 = Rectify(MMul(input, w1) + b1); t1 = Rectify(MMul(input, w1) + b1);
if(isTraining && dropoutP > 0)
t1 = Dropout(t1, dropoutP);
/* result = t1 * w2 + b2 */ /* result = t1 * w2 + b2 */
return MMul(t1, w2) + b2; return MMul(t1, w2) + b2;
} }
......
...@@ -60,6 +60,9 @@ public: ...@@ -60,6 +60,9 @@ public:
/* bias of transformation 2 */ /* bias of transformation 2 */
XTensor b2; XTensor b2;
/* dropout probability */
DTYPE dropoutP;
public: public:
/* constructor */ /* constructor */
...@@ -69,10 +72,10 @@ public: ...@@ -69,10 +72,10 @@ public:
~T2TFNN(); ~T2TFNN();
/* initialize the model */ /* initialize the model */
void InitModel(int argc, const char ** argv, int myDevID = -1, XMem * myMem = NULL); void InitModel(int argc, char ** argv, int myDevID = -1, XMem * myMem = NULL);
/* make the network */ /* make the network */
XTensor Make(XTensor &input); XTensor Make(XTensor &input, bool isTraining);
}; };
......
...@@ -33,6 +33,7 @@ T2TLN::T2TLN() ...@@ -33,6 +33,7 @@ T2TLN::T2TLN()
{ {
devID = -1; devID = -1;
mem = NULL; mem = NULL;
d = 0;
} }
/* de-constructor */ /* de-constructor */
...@@ -47,19 +48,19 @@ initialize the model ...@@ -47,19 +48,19 @@ initialize the model
>> myDevID - device id >> myDevID - device id
>> myMem - the memory pool >> myMem - the memory pool
*/ */
void T2TLN::InitModel(int argc, const char ** argv, int myDevID, XMem * myMem) void T2TLN::InitModel(int argc, char ** argv, int myDevID, XMem * myMem)
{ {
devID = myDevID; devID = myDevID;
mem = myMem; mem = myMem;
int d = 0; d = 0;
LoadParamInt(argc, argv, "d", &d, DEFAULT_EMBEDDING_SIZE); LoadParamInt(argc, argv, "d", &d, DEFAULT_EMBEDDING_SIZE);
InitTensor2D(&w, d, d, X_FLOAT, devID, mem); InitTensor1D(&w, d, X_FLOAT, devID, mem);
InitTensor1D(&b, d, X_FLOAT, devID, mem); InitTensor1D(&b, d, X_FLOAT, devID, mem);
float scale = 1.0F; float scale = 1.0F;
float finfout = (float)sqrt(6.0F * scale / (d + d)); float finfout = (float)sqrt(6.0F * scale / d);
w.SetDataRand(-finfout, finfout); w.SetDataRand(-finfout, finfout);
b.SetZeroAll(); b.SetZeroAll();
...@@ -90,16 +91,17 @@ XTensor T2TLN::Make(XTensor &input) ...@@ -90,16 +91,17 @@ XTensor T2TLN::Make(XTensor &input)
/* standard = sqrt(variance) */ /* standard = sqrt(variance) */
standard = Power(variance, 0.5F); standard = Power(variance, 0.5F);
/* unsqueeze mean and standard deviation to fit them into /* unsqueeze mean and standard deviation to fit them into
the same shape of x */ the same shape of x */
meanFilled = Unsqueeze(mean, x.order - 1, x.GetDim(-1)); meanFilled = Unsqueeze(mean, x.order - 1, x.GetDim(-1));
standardFilled = Unsqueeze(standard, x.order - 1, x.GetDim(-1)); standardFilled = Unsqueeze(standard, x.order - 1, x.GetDim(-1));
/* x' = (x - \mu)/standard */ /* x' = (x - \mu)/standard */
xn = (x - meanFilled)/standardFilled; xn = (x - meanFilled) / standardFilled;
/* result = x' * w + b */ /* result = x' * w + b */
return MMul(xn, w) + b; return xn * w + b;
} }
} }
...@@ -46,6 +46,9 @@ public: ...@@ -46,6 +46,9 @@ public:
/* the bias term b */ /* the bias term b */
XTensor b; XTensor b;
/* dimension size of the model */
int d;
public: public:
/* constructor */ /* constructor */
T2TLN(); T2TLN();
...@@ -54,7 +57,7 @@ public: ...@@ -54,7 +57,7 @@ public:
~T2TLN(); ~T2TLN();
/* initialize the model */ /* initialize the model */
void InitModel(int argc, const char ** argv, int myDevID = -1, XMem * myMem = NULL); void InitModel(int argc, char ** argv, int myDevID = -1, XMem * myMem = NULL);
/* make the network */ /* make the network */
XTensor Make(XTensor &input); XTensor Make(XTensor &input);
......
...@@ -48,7 +48,7 @@ initialize the model ...@@ -48,7 +48,7 @@ initialize the model
>> argc - number of arguments >> argc - number of arguments
>> argv - list of pointers to the arguments >> argv - list of pointers to the arguments
*/ */
void T2TModel::InitModel(int argc, const char ** argv) void T2TModel::InitModel(int argc, char ** argv)
{ {
bool useMem = false; bool useMem = false;
int memSize = 0; int memSize = 0;
...@@ -64,25 +64,32 @@ void T2TModel::InitModel(int argc, const char ** argv) ...@@ -64,25 +64,32 @@ void T2TModel::InitModel(int argc, const char ** argv)
if(useMem){ if(useMem){
delete mem; delete mem;
mem = new XMem(devID, isMemFreeOTF ? FREE_ON_THE_FLY : UNI_FREE, (MTYPE)MILLION * 256, 1024, MILLION * 128); mem = new XMem(devID, FREE_ON_THE_FLY, (MTYPE)MILLION * 256, 1024, MILLION * 128);
mem->SetDesiredSize(devID, 0, (MTYPE)memSize * MILLION); mem->SetDesiredSize(devID, 0, (MTYPE)memSize * MILLION);
} }
encoder.InitModel(argc, argv, isLM, 0, devID, mem); encoder.InitModel(argc, argv, isLM, 0, devID, mem);
outputLayer.InitModel(argc, argv, devID, mem); outputLayer.InitModel(argc, argv, devID, mem);
XList params(10);
GetParams(params);
for(int i = 0; i < params.count; i++){
XTensor * param = (XTensor*)params.Get(i);
param->SetVarFlag();
}
} }
/* /*
make the encoding network make the encoding network
>> input - input tensor >> input - input tensor
>> mask - the mask for positions that are/not involved in computation >> mask - the mask for positions that are/not involved in computation
>> skipInputRes - indicates whether we skip the residual connection of the first layer
>> isTraining - indicates whether we are training the model >> isTraining - indicates whether we are training the model
<< return - encoding result << return - encoding result
*/ */
XTensor T2TModel::MakeEncoding(XTensor &input, XTensor &mask, bool skipInputRes, bool isTraining) XTensor T2TModel::MakeEncoding(XTensor &input, XTensor &mask, bool isTraining)
{ {
return encoder.Make(input, mask, skipInputRes, isTraining); return encoder.Make(input, mask, isTraining);
} }
/* /*
...@@ -134,9 +141,9 @@ void T2TModel::Make(XTensor &input, XTensor &output, XTensor &padding, bool isTr ...@@ -134,9 +141,9 @@ void T2TModel::Make(XTensor &input, XTensor &output, XTensor &padding, bool isTr
_ScaleAndShiftMe(padding3, 1e9F, -1e9F); _ScaleAndShiftMe(padding3, 1e9F, -1e9F);
//_Sum(&mask, padding3, &mask); _Sum(&mask, padding3, &mask);
encoding = MakeEncoding(input, mask, true, isTraining); encoding = MakeEncoding(input, mask, isTraining);
outputLayer.Make(encoding, output); outputLayer.Make(encoding, output);
delete[] dims; delete[] dims;
...@@ -167,6 +174,7 @@ void T2TModel::GetParams(XList &list) ...@@ -167,6 +174,7 @@ void T2TModel::GetParams(XList &list)
list.Add(&encoder.attentions[i].wk); list.Add(&encoder.attentions[i].wk);
list.Add(&encoder.attentions[i].wq); list.Add(&encoder.attentions[i].wq);
list.Add(&encoder.attentions[i].wv); list.Add(&encoder.attentions[i].wv);
list.Add(&encoder.attentions[i].wa);
list.Add(&encoder.fnnLayerNorms[i].w); list.Add(&encoder.fnnLayerNorms[i].w);
list.Add(&encoder.fnnLayerNorms[i].b); list.Add(&encoder.fnnLayerNorms[i].b);
list.Add(&encoder.attLayerNorms[i].w); list.Add(&encoder.attLayerNorms[i].w);
......
...@@ -66,10 +66,10 @@ public: ...@@ -66,10 +66,10 @@ public:
~T2TModel(); ~T2TModel();
/* initialize the model */ /* initialize the model */
void InitModel(int argc, const char ** argv); void InitModel(int argc, char ** argv);
/* make the encoding network */ /* make the encoding network */
XTensor MakeEncoding(XTensor &input, XTensor &mask, bool skipInputRes, bool isTraining); XTensor MakeEncoding(XTensor &input, XTensor &mask, bool isTraining);
/* make the entire network (with the output softmax layer) */ /* make the entire network (with the output softmax layer) */
void Make(XTensor &input, XTensor &output, XTensor &padding, bool isTraining); void Make(XTensor &input, XTensor &output, XTensor &padding, bool isTraining);
......
...@@ -49,7 +49,7 @@ initialize the model ...@@ -49,7 +49,7 @@ initialize the model
>> myDevID - device id >> myDevID - device id
>> myMem - the memory pool >> myMem - the memory pool
*/ */
void T2TOutput::InitModel(int argc, const char ** argv, int myDevID, XMem * myMem) void T2TOutput::InitModel(int argc, char ** argv, int myDevID, XMem * myMem)
{ {
devID = myDevID; devID = myDevID;
mem = myMem; mem = myMem;
......
...@@ -59,7 +59,7 @@ public: ...@@ -59,7 +59,7 @@ public:
~T2TOutput(); ~T2TOutput();
/* initialize the model */ /* initialize the model */
void InitModel(int argc, const char ** argv, int myDevID = -1, XMem * myMem = NULL); void InitModel(int argc, char ** argv, int myDevID = -1, XMem * myMem = NULL);
/* make the network */ /* make the network */
XTensor Make(XTensor &input); XTensor Make(XTensor &input);
......
...@@ -26,6 +26,11 @@ ...@@ -26,6 +26,11 @@
#include "../../tensor/core/CHeader.h" #include "../../tensor/core/CHeader.h"
#include "../../network/XNoder.h" #include "../../network/XNoder.h"
#ifndef WIN32
#include <sys/time.h>
#include <unistd.h>
#endif
namespace transformer namespace transformer
{ {
...@@ -33,15 +38,25 @@ namespace transformer ...@@ -33,15 +38,25 @@ namespace transformer
T2TTrainer::T2TTrainer() T2TTrainer::T2TTrainer()
{ {
seqLen = NULL; seqLen = NULL;
seqLen2 = NULL;
nseqBuf = 0; nseqBuf = 0;
nextSeq = -1; nextSeq = -1;
argNum = 0;
argArray = NULL;
buf = NULL;
buf2 = NULL;
bufSize = 0;
seqOffset = NULL;
} }
/* de-constructor */ /* de-constructor */
T2TTrainer::~T2TTrainer() T2TTrainer::~T2TTrainer()
{ {
delete[] buf; delete[] buf;
delete[] buf2;
delete[] seqLen; delete[] seqLen;
delete[] seqLen2;
delete[] seqOffset; delete[] seqOffset;
for(int i = 0; i < moments.count; i++){ for(int i = 0; i < moments.count; i++){
...@@ -53,6 +68,11 @@ T2TTrainer::~T2TTrainer() ...@@ -53,6 +68,11 @@ T2TTrainer::~T2TTrainer()
XTensor * m = (XTensor*)moments2nd.Get(i); XTensor * m = (XTensor*)moments2nd.Get(i);
delete m; delete m;
} }
for(int i = 0; i < argNum; i++)
delete[] argArray[i];
delete[] argArray;
} }
/* /*
...@@ -60,8 +80,15 @@ initialization ...@@ -60,8 +80,15 @@ initialization
>> argc - number of arguments >> argc - number of arguments
>> argv - list of pointers to the arguments >> argv - list of pointers to the arguments
*/ */
void T2TTrainer::Init(int argc, const char ** argv) void T2TTrainer::Init(int argc, char ** argv)
{ {
argNum = argc;
argArray = new char*[argc];
for(int i = 0; i < argNum; i++){
argArray[i] = new char[strlen(argv[i]) + 1];
strcpy(argArray[i], argv[i]);
}
bool useMem = false; bool useMem = false;
LoadParamBool(argc, argv, "mem", &useMem, useMem); LoadParamBool(argc, argv, "mem", &useMem, useMem);
...@@ -78,16 +105,23 @@ void T2TTrainer::Init(int argc, const char ** argv) ...@@ -78,16 +105,23 @@ void T2TTrainer::Init(int argc, const char ** argv)
LoadParamInt(argc, argv, "bufsize", &bufSize, 50000); LoadParamInt(argc, argv, "bufsize", &bufSize, 50000);
LoadParamBool(argc, argv, "adam", &useAdam, false); LoadParamBool(argc, argv, "adam", &useAdam, false);
LoadParamFloat(argc, argv, "adambeta1", &adamBeta1, 0.9F); LoadParamFloat(argc, argv, "adambeta1", &adamBeta1, 0.9F);
LoadParamFloat(argc, argv, "adambeta2", &adamBeta2, 0.999F); LoadParamFloat(argc, argv, "adambeta2", &adamBeta2, 0.98F);
LoadParamFloat(argc, argv, "adamdelta", &adamDelta, 1e-8F); LoadParamFloat(argc, argv, "adamdelta", &adamDelta, 1e-9F);
LoadParamBool(argc, argv, "shuffled", &isShuffled, false);
LoadParamFloat(argc, argv, "labelsmoothing", &labelSmoothingP, 0);
LoadParamInt(argc, argv, "nstepcheckpoint", &nStepCheckpoint, -1);
LoadParamBool(argc, argv, "epochcheckpoint", &useEpochCheckpoint, false);
LoadParamInt(argc, argv, "updatestep", &updateStep, 1);
LoadParamBool(argc, argv, "doubledend", &isDoubledEnd, false);
buf = new int[bufSize]; buf = new int[bufSize];
buf2 = new int[bufSize];
seqLen = new int[bufSize]; seqLen = new int[bufSize];
seqLen2 = new int[bufSize];
seqOffset = new int[bufSize]; seqOffset = new int[bufSize];
adamBeta1T = 1.0F; adamBeta1T = 1.0F;
adamBeta2T = 1.0F; adamBeta2T = 1.0F;
} }
int tc = 0; int tc = 0;
...@@ -95,9 +129,11 @@ int tc = 0; ...@@ -95,9 +129,11 @@ int tc = 0;
/* /*
train the model train the model
>> fn - training data file >> fn - training data file
>> validFN - validation data file
>> modelFN - where we keep the model
>> model - model to train >> model - model to train
*/ */
void T2TTrainer::Train(const char * fn, T2TModel * model) void T2TTrainer::Train(const char * fn, const char * validFN, const char * modelFN, T2TModel * model)
{ {
int epoch = 0; int epoch = 0;
int step = 0; int step = 0;
...@@ -107,33 +143,40 @@ void T2TTrainer::Train(const char * fn, T2TModel * model) ...@@ -107,33 +143,40 @@ void T2TTrainer::Train(const char * fn, T2TModel * model)
bool isEnd = false; bool isEnd = false;
float loss = 0; float loss = 0;
float lr = 0; float lr = 0;
int nStepCheck = 0;
int nCheckpoint = 0;
int nSkipped = 0;
int gradStep = 0;
int validStep = 0;
char * trainFN = new char[(int)strlen(fn) + 10];
strcpy(trainFN, fn);
#ifndef WIN32
if(isShuffled)
sprintf(trainFN, "%s.random", fn);
#endif
PrepareModel(model); PrepareModel(model);
int devID = model->devID; int devID = model->devID;
XMem * mem = model->mem; XMem * mem = model->mem;
if(mem != NULL && mem->mode == UNI_FREE)
mem->SetPin();
XNet net; XNet net;
tf = fopen("tmp.xx.txt", "wb");
tc = 0;
double startT = GetClockSec(); double startT = GetClockSec();
for(epoch = 1; epoch <= nepoch; epoch++){ for(epoch = 1; epoch <= nepoch; epoch++){
#ifndef WIN32
if(isShuffled)
Shuffle(fn, trainFN);
#endif
FILE * file = fopen(fn, "rb"); FILE * file = fopen(trainFN, "rb");
CheckNTErrors(file, "cannot open training file!"); CheckNTErrors(file, "cannot open training file!");
wordCount = 0; wordCount = 0;
loss = 0; loss = 0;
if(mem != NULL)
mem->BackToPin();
/* batch of input sequences */ /* batch of input sequences */
XTensor batch; XTensor batch;
...@@ -143,7 +186,12 @@ void T2TTrainer::Train(const char * fn, T2TModel * model) ...@@ -143,7 +186,12 @@ void T2TTrainer::Train(const char * fn, T2TModel * model)
/* gold standard */ /* gold standard */
XTensor gold; XTensor gold;
while(LoadBatch(file, true, &batch, &padding, &gold, NULL, 1, vSize, sBatchSize, wBatchSize, isLenSorted, wc, devID, mem)){ /* label smoothed gold standard (if needed) */
XTensor goldSmoothed;
while (LoadBatch(file, true, &batch, &padding, &gold, NULL, 1, vSize, sBatchSize, wBatchSize, isLenSorted, wc, devID, mem)) {
CheckNTErrors(batch.order == 3, "wrong tensor order of the sequence batch");
/* output probabilities */ /* output probabilities */
XTensor output; XTensor output;
...@@ -151,26 +199,57 @@ void T2TTrainer::Train(const char * fn, T2TModel * model) ...@@ -151,26 +199,57 @@ void T2TTrainer::Train(const char * fn, T2TModel * model)
/* make the network */ /* make the network */
model->Make(batch, output, padding, true); model->Make(batch, output, padding, true);
/* make paddings for the output */
if(output.GetDim(0) > 1)
PadOutput(&output, &padding);
/* back-propagation for obtaining gradients */ /* back-propagation for obtaining gradients */
net.Backward(output, gold, CROSSENTROPY); if (labelSmoothingP > 0)
LabelSmooth(&gold, &goldSmoothed, labelSmoothingP);
/* learning rate */ /* make paddings for the output */
lr = lrate * (1.0F / (float)sqrt((float)d)) * (float)MIN(pow((float)step + 1, -0.5F - lrbias), ((float)step + 1) * pow((float)nwarmup, -1.5F - lrbias)); if (output.GetDim(0) > 1)
PadOutput(&output, &gold, &padding);
/* update the parameters */
Update(model, lr);
/* get probabilities */ /* get probabilities */
float prob = GetProb(&output, &gold, NULL); float prob = GetProb(&output, &gold, NULL);
DTYPE lossLocal = -prob / wc;
bool doUpdate = (!IsNAN(lossLocal) && !IsINF(lossLocal) && lossLocal < 1e3F);
XTensor &g = labelSmoothingP > 0 ? goldSmoothed : gold;
if (doUpdate) {
/* recale the output for normalized loss */
RescaleOutput(&output, &g, &padding);
/* back-propagation */
net.Backward(output, g, CROSSENTROPY);
/*for(int i = 0; i < net.nodes.count; i++){
XTensor * node = (XTensor*)net.nodes.Get(i);
XLink::ShowNode(stderr, node);
}
exit(0);*/
gradStep += 1;
loss += -prob; loss += -prob;
wordCount += wc; wordCount += wc;
wordCountTotal += wc; wordCountTotal += wc;
/* update the parameters */
if(gradStep == updateStep){
/* learning rate */
lr = lrate * (1.0F / (float)sqrt((float)d)) * (float)MIN(pow((float)validStep + 1, -0.5F - lrbias), ((float)validStep + 1) * pow((float)nwarmup, -1.5F - lrbias));
/* model update */
Update(model, lr);
gradStep = 0;
validStep++;
}
}
else
nSkipped++;
if(++step >= nstep){ if(++step >= nstep){
isEnd = true; isEnd = true;
break; break;
...@@ -178,33 +257,39 @@ void T2TTrainer::Train(const char * fn, T2TModel * model) ...@@ -178,33 +257,39 @@ void T2TTrainer::Train(const char * fn, T2TModel * model)
if (step % 1 == 0) { if (step % 1 == 0) {
double elapsed = GetClockSec() - startT; double elapsed = GetClockSec() - startT;
XPRINT7(0, stderr, "[INFO] lr=%.2e, elapsed=%.1fs, step=%d, epoch=%d, word=%d, ppl=%.3f, sppl=%.3f\n", XPRINT8(0, stderr, "[INFO] lr=%.2e, elapsed=%.1fs, step=%d, epoch=%d, word=%d, loss=%.3f, ppl=%.3f, sppl=%.3f",
lr, elapsed, step, epoch, wordCountTotal, exp(loss / wordCount), exp(-prob/wc)); lr, elapsed, step, epoch, wordCountTotal, loss/wordCount, exp(loss/wordCount), exp(-prob/wc));
if (!doUpdate)
XPRINT(0, stderr, " (no update)");
XPRINT(0, stderr, "\n");
} }
if(mem != NULL && mem->mode == UNI_FREE) if(nStepCheckpoint > 0 && ++nStepCheck >= nStepCheckpoint){
mem->BackToPin(); MakeCheckpoint(model, validFN, modelFN, "step", step);
nStepCheck = 0;
nCheckpoint++;
}
} }
fclose(file); fclose(file);
if (isEnd) if (isEnd)
break; break;
}
if(mem != NULL && mem->mode == UNI_FREE) if(useEpochCheckpoint)
mem->BackToPin(); MakeCheckpoint(model, validFN, modelFN, "epoch", epoch);
}
double elapsed = GetClockSec() - startT; double elapsed = GetClockSec() - startT;
fclose(tf);
epoch = MIN(epoch, nepoch); epoch = MIN(epoch, nepoch);
XPRINT6(0, stderr, "[INFO] lr=%.2e, elapsed=%.1fs, step=%d, epoch=%d, word=%d, ppl=%.3f\n", XPRINT7(0, stderr, "[INFO] lr=%.2e, elapsed=%.1fs, step=%d, epoch=%d, word=%d, loss=%.3f, ppl=%.3f\n",
lr, elapsed, step, epoch, wordCountTotal, exp(loss / wordCount)); lr, elapsed, step, epoch, wordCountTotal, loss/wordCount, exp(loss/wordCount));
XPRINT3(0, stderr, "[INFO] training finished (took %.1fs, step=%d and epoch=%d)\n", XPRINT4(0, stderr, "[INFO] training finished (took %.1fs, step=%d, skipped=%d and epoch=%d)\n",
elapsed, step, epoch); elapsed, step, nSkipped, epoch);
delete[] trainFN;
} }
/* /*
...@@ -218,6 +303,7 @@ void T2TTrainer::Test(const char * fn, const char * ofn, T2TModel * model) ...@@ -218,6 +303,7 @@ void T2TTrainer::Test(const char * fn, const char * ofn, T2TModel * model)
int wc = 0; int wc = 0;
int wordCount = 0; int wordCount = 0;
int wordCountTotal = 0; int wordCountTotal = 0;
int sentCount = 0;
float loss = 0; float loss = 0;
/* data files */ /* data files */
...@@ -231,16 +317,10 @@ void T2TTrainer::Test(const char * fn, const char * ofn, T2TModel * model) ...@@ -231,16 +317,10 @@ void T2TTrainer::Test(const char * fn, const char * ofn, T2TModel * model)
XNet net; XNet net;
tf = fopen("tmp.xx.txt", "wb");
tc = 0;
double startT = GetClockSec(); double startT = GetClockSec();
wordCount = 0; wordCount = 0;
if(mem != NULL && mem->mode == UNI_FREE)
mem->BackToPin();
/* batch of input sequences */ /* batch of input sequences */
XTensor batch; XTensor batch;
...@@ -255,7 +335,7 @@ void T2TTrainer::Test(const char * fn, const char * ofn, T2TModel * model) ...@@ -255,7 +335,7 @@ void T2TTrainer::Test(const char * fn, const char * ofn, T2TModel * model)
ClearBuf(); ClearBuf();
while(LoadBatch(file, true, &batch, &padding, &gold, seqs, 1, vSize, 1, 1, isLenSorted, wc, devID, mem)){ while(LoadBatch(file, true, &batch, &padding, &gold, seqs, 1, vSize, 1, 1, false, wc, devID, mem)){
CheckNTErrors(batch.order == 3, "wrong tensor order of the sequence batch"); CheckNTErrors(batch.order == 3, "wrong tensor order of the sequence batch");
...@@ -302,14 +382,9 @@ void T2TTrainer::Test(const char * fn, const char * ofn, T2TModel * model) ...@@ -302,14 +382,9 @@ void T2TTrainer::Test(const char * fn, const char * ofn, T2TModel * model)
loss += -prob; loss += -prob;
wordCount += wc; wordCount += wc;
wordCountTotal += wc; wordCountTotal += wc;
sentCount += 1;
if(mem != NULL && mem->mode == UNI_FREE)
mem->BackToPin();
} }
if(mem != NULL && mem->mode == UNI_FREE)
mem->BackToPin();
fclose(file); fclose(file);
fclose(ofile); fclose(ofile);
...@@ -317,20 +392,51 @@ void T2TTrainer::Test(const char * fn, const char * ofn, T2TModel * model) ...@@ -317,20 +392,51 @@ void T2TTrainer::Test(const char * fn, const char * ofn, T2TModel * model)
double elapsed = GetClockSec() - startT; double elapsed = GetClockSec() - startT;
fclose(tf);
XPRINT3(0, stderr, "[INFO] test finished (took %.1fs, word=%d, and ppl=%.3f)\n", XPRINT3(0, stderr, "[INFO] test finished (took %.1fs, word=%d, and ppl=%.3f)\n",
elapsed,wordCountTotal, exp(loss / wordCount)); elapsed,wordCountTotal, exp(loss / wordCount));
} }
/*
make a checkpoint
>> model - the model
>> validFN - validation data file
>> modelFN - model data file
>> label - label of the model
>> id - id of the checkpoint
*/
void T2TTrainer::MakeCheckpoint(T2TModel * model, const char * validFN, const char * modelFN, const char * label, int id)
{
char * fn = new char[MAX_LINE_LENGTH];
char * fn2 = new char[MAX_LINE_LENGTH];
sprintf(fn, "%s.%s.%03d", modelFN, label, id);
sprintf(fn2, "%s.%s.%03d.output", modelFN, label, id);
model->Dump(fn);
if(validFN != NULL){
T2TTrainer trainer;
trainer.Init(argNum, argArray);
trainer.Test(validFN, fn2, model);
}
delete[] fn;
delete[] fn2;
}
char line[MAX_SEQUENCE_LENGTH]; char line[MAX_SEQUENCE_LENGTH];
struct SampleNode struct SampleNode
{ {
int id; int id;
int * p;
int size; int size;
int value;
}; };
int CompareSampleNode(const void * a, const void * b)
{
return ((SampleNode*)b)->value - ((SampleNode*)a)->value;
}
/* /*
load data to buffer load data to buffer
>> file - where to load data >> file - where to load data
...@@ -403,14 +509,46 @@ int T2TTrainer::LoadBuf(FILE * file, bool isSorted, int step) ...@@ -403,14 +509,46 @@ int T2TTrainer::LoadBuf(FILE * file, bool isSorted, int step)
nseqBuf = seqCount; nseqBuf = seqCount;
nextSeq = 0; nextSeq = 0;
/* sort the sequences by length */
if (isSorted) { if (isSorted) {
SampleNode * nodes = new SampleNode[seqCount]; SampleNode * nodes = new SampleNode[seqCount];
int count = 0; int count = 0;
int offset = 0;
for (int i = 0; i < seqCount; i += step) { for (int i = 0; i < seqCount; i += step) {
nodes[count].id = count; SampleNode &node = nodes[count];
nodes[count].size = seqLen[i]; node.id = count;
node.p = buf + offset;
node.size = 0;
for(int j = 0; j < step; j++)
node.size += seqLen[i + j];
node.value = seqLen[i];
count++; count++;
offset += node.size;
}
qsort(nodes, seqCount, sizeof(SampleNode), CompareSampleNode);
count = 0;
offset = 0;
for(int i = 0; i < seqCount; i++){
SampleNode &node = nodes[count];
//fprintf(stderr, "%d %d %d\n", node.size, node.id, node.value);
memcpy(buf2 + offset, node.p, sizeof(int) * node.size);
for(int j = 0; j < step; j++){
seqLen2[count + j] = seqLen[node.id + j];
seqOffset[count + j] = offset + (j > 0 ? seqLen[node.id + j - 1] : 0);
}
count += step;
offset += node.size;
} }
int * tmp = buf;
buf = buf2;
buf2 = tmp;
tmp = seqLen;
seqLen = seqLen2;
seqLen2 = tmp;
delete[] nodes; delete[] nodes;
} }
...@@ -449,7 +587,7 @@ int T2TTrainer::LoadBatch(FILE * file, bool isLM, ...@@ -449,7 +587,7 @@ int T2TTrainer::LoadBatch(FILE * file, bool isLM,
int devID, XMem * mem) int devID, XMem * mem)
{ {
if(nextSeq < 0 || nextSeq >= nseqBuf) if(nextSeq < 0 || nextSeq >= nseqBuf)
LoadBuf(file, isSorted); LoadBuf(file, isSorted, step);
int seq = MAX(nextSeq, 0); int seq = MAX(nextSeq, 0);
int wc = 0; int wc = 0;
...@@ -457,7 +595,9 @@ int T2TTrainer::LoadBatch(FILE * file, bool isLM, ...@@ -457,7 +595,9 @@ int T2TTrainer::LoadBatch(FILE * file, bool isLM,
int sc = 0; int sc = 0;
int max = 0; int max = 0;
while(seq + sc < nseqBuf){ while(seq + sc < nseqBuf){
wn = seqLen[seq + sc]; int len = isDoubledEnd ? seqLen[seq + sc] : seqLen[seq + sc] - 1;
CheckNTErrors(len > 0, "Empty sequence!");
wn = len;
wc += wn; wc += wn;
sc += 1; sc += 1;
...@@ -512,13 +652,19 @@ int T2TTrainer::LoadBatch(FILE * file, bool isLM, ...@@ -512,13 +652,19 @@ int T2TTrainer::LoadBatch(FILE * file, bool isLM,
/* this might be slow on GPUs :( */ /* this might be slow on GPUs :( */
for(int s = seq; s < seq + sc; s++){ for(int s = seq; s < seq + sc; s++){
for(int w = 0; w < seqLen[s]; w++){ int len = isDoubledEnd ? seqLen[s] : seqLen[s] - 1;
CheckNTErrors(len <= max, "Something is wrong!");
for(int w = 0; w < len; w++){
batch->Set3D(1.0F, s - seq, w, buf[seqOffset[s] + w]); batch->Set3D(1.0F, s - seq, w, buf[seqOffset[s] + w]);
padding->Set2D(1.0F, s - seq, w); padding->Set2D(1.0F, s - seq, w);
if(w > 0) if(w > 0)
output->Set3D(1.0F, s - seq, w - 1, buf[seqOffset[s] + w]); output->Set3D(1.0F, s - seq, w - 1, buf[seqOffset[s] + w]);
if(w == seqLen[s] - 1) if(w == len - 1){
if(isDoubledEnd)
output->Set3D(1.0F, s - seq, w, buf[seqOffset[s] + w]); output->Set3D(1.0F, s - seq, w, buf[seqOffset[s] + w]);
else
output->Set3D(1.0F, s - seq, w, buf[seqOffset[s] + w + 1]);
}
wCount++; wCount++;
/*fprintf(tf, "%d", buf[seqOffset[s] + w]); /*fprintf(tf, "%d", buf[seqOffset[s] + w]);
if(w < seqLen[s] - 1) if(w < seqLen[s] - 1)
...@@ -530,7 +676,7 @@ int T2TTrainer::LoadBatch(FILE * file, bool isLM, ...@@ -530,7 +676,7 @@ int T2TTrainer::LoadBatch(FILE * file, bool isLM,
} }
if(seqs != NULL){ if(seqs != NULL){
for(int w = seqLen[s]; w < max; w++) for(int w = len; w < max; w++)
seqs[seqSize++] = -1; seqs[seqSize++] = -1;
} }
} }
...@@ -542,6 +688,23 @@ int T2TTrainer::LoadBatch(FILE * file, bool isLM, ...@@ -542,6 +688,23 @@ int T2TTrainer::LoadBatch(FILE * file, bool isLM,
} }
/* /*
shuffle lines of the file
>> srcFile - the source file to shuffle
>> tgtFile - the resulting file
*/
void T2TTrainer::Shuffle(const char * srcFile, const char * tgtFile)
{
char * line = new char[MAX_LINE_LENGTH];
#ifndef WIN32
sprintf(line, "shuf %s > %s", srcFile, tgtFile);
system(line);
#else
ShowNTErrors("Cannot shuffle the file on WINDOWS systems!");
#endif
delete[] line;
}
/*
get word probabilities for a batch of sequences get word probabilities for a batch of sequences
>> output - word distribution for each position >> output - word distribution for each position
>> gold - gold standard >> gold - gold standard
...@@ -610,12 +773,12 @@ void T2TTrainer::Update(T2TModel * model, const float lr) ...@@ -610,12 +773,12 @@ void T2TTrainer::Update(T2TModel * model, const float lr)
DTYPE e = lr * (DTYPE)sqrt(1 - adamBeta2T) / (1 - adamBeta1T); DTYPE e = lr * (DTYPE)sqrt(1 - adamBeta2T) / (1 - adamBeta1T);
DTYPE d = adamDelta * (DTYPE)sqrt(1 - adamBeta2T); DTYPE d = adamDelta * (DTYPE)sqrt(1 - adamBeta2T);
/* m = beat_1 * m + (1-beta_1) * grad */ /* m = beta_1 * m + (1-beta_1) * grad */
XTensor * m = (XTensor*)moments.Get(i); XTensor * m = (XTensor*)moments.Get(i);
_ScaleAndShiftMe(m, adamBeta1, 0); _ScaleAndShiftMe(m, adamBeta1, 0);
_Sum(m, paraGrad, m, (1.0F - adamBeta1)); _Sum(m, paraGrad, m, (1.0F - adamBeta1));
/* v = beat_2 * v + (1-beta_2) * grad * grad*/ /* v = beta_2 * v + (1-beta_2) * grad * grad*/
XTensor * v = (XTensor*)moments2nd.Get(i); XTensor * v = (XTensor*)moments2nd.Get(i);
_Multiply(paraGrad, paraGrad, v, adamBeta2/(1.0F - adamBeta2)); _Multiply(paraGrad, paraGrad, v, adamBeta2/(1.0F - adamBeta2));
_ScaleAndShiftMe(v, (1.0F - adamBeta2), 0); _ScaleAndShiftMe(v, (1.0F - adamBeta2), 0);
...@@ -676,9 +839,11 @@ void T2TTrainer::PrepareModel(T2TModel * model) ...@@ -676,9 +839,11 @@ void T2TTrainer::PrepareModel(T2TModel * model)
/* /*
do padding on the output do padding on the output
>> output - output tensor of the network >> output - output tensor of the network
>> gold - gold standard
>> padding - padding of a batch of sentences >> padding - padding of a batch of sentences
>> lsP - smoothing factor
*/ */
void T2TTrainer::PadOutput(XTensor * output, XTensor * padding) void T2TTrainer::PadOutput(XTensor * output, XTensor * gold, XTensor * padding)
{ {
if(output == NULL || padding == NULL) if(output == NULL || padding == NULL)
return; return;
...@@ -693,14 +858,68 @@ void T2TTrainer::PadOutput(XTensor * output, XTensor * padding) ...@@ -693,14 +858,68 @@ void T2TTrainer::PadOutput(XTensor * output, XTensor * padding)
XTensor * padding2 = NewTensorBuf(1, &padding->unitNum, X_FLOAT, 1.0F, padding->devID, padding->mem); XTensor * padding2 = NewTensorBuf(1, &padding->unitNum, X_FLOAT, 1.0F, padding->devID, padding->mem);
_CopyValues(padding, padding2); _CopyValues(padding, padding2);
_MultiplyDim(output, padding2, output, 0);
_ScaleAndShiftMe(padding2, 1e9F, -1e9F); _ScaleAndShiftMe(padding2, 1e9F, -1e9F);
_SumDim(output, padding2, output, 0); _SumDim(output, padding2, output, 0);
output->Reshape(on, dimso); output->Reshape(on, dimso);
if(gold != NULL){
gold->Reshape(gold->unitNum/dimso[gold->order - 1], dimso[gold->order - 1]);
_CopyValues(padding, padding2);
_MultiplyDim(gold, padding2, gold, 0);
gold->Reshape(on, dimso);
}
delete[] dimso; delete[] dimso;
DelTensorBuf(padding2); DelTensorBuf(padding2);
} }
/*
recale the output and gold tensors for normalized loss
>> output - output tensor of the network
>> gold - gold standard
>> padding - padding of a batch of sentences
*/
void T2TTrainer::RescaleOutput(XTensor * output, XTensor * gold, XTensor * padding)
{
CheckNTErrors(output->order == 3, "Wrong dimension number!");
CheckNTErrors(gold->order == 3, "Wrong dimension number!");
int num = padding->GetDim(0);
XTensor * factor = NewTensorBuf(1, &num, padding->dataType, 1.0F, padding->devID, padding->mem);
_ReduceSum(padding, factor, padding->order - 1);
_ExpMe(output);
_DivDim(output, factor, output, 0);
_LogMe(output);
_DivDim(gold, factor, gold, 0);
DelTensorBuf(factor);
}
/*
perform label smoothing
>> gold - gold standard
>> smoothed - result of label smoothing
>> p - smoothing factor
*/
void T2TTrainer::LabelSmooth(XTensor * gold, XTensor * smoothed, DTYPE p)
{
CheckNTErrors(p >= 0 && p <= 1.0F, "Smoothing factor must be in range [0,1]");
int n = gold->GetDim(-1);
DTYPE q = 1.0F - p;
DTYPE gift = p / n;
InitTensor(smoothed, gold);
_CopyValues(gold, smoothed);
if(p == 0)
return;
_ScaleAndShiftMe(smoothed, q, gift);
}
} }
...@@ -37,15 +37,27 @@ namespace transformer ...@@ -37,15 +37,27 @@ namespace transformer
class T2TTrainer class T2TTrainer
{ {
public: public:
/* paramter number */
int argNum;
/* parameter array */
char ** argArray;
/* buffer for loading words */ /* buffer for loading words */
int * buf; int * buf;
/* another buffer */
int * buf2;
/* buffer size */ /* buffer size */
int bufSize; int bufSize;
/* length of each sequence */ /* length of each sequence */
int * seqLen; int * seqLen;
/* another array */
int * seqLen2;
/* offset of the first word for each sequence */ /* offset of the first word for each sequence */
int * seqOffset; int * seqOffset;
...@@ -101,6 +113,24 @@ public: ...@@ -101,6 +113,24 @@ public:
/* list of the 2nd order moment of the parameter matrics */ /* list of the 2nd order moment of the parameter matrics */
XList moments2nd; XList moments2nd;
/* indicates whether the data file is shuffled for training */
bool isShuffled;
/* the factor of label smoothing */
DTYPE labelSmoothingP;
/* number of steps after which we make a checkpoint */
int nStepCheckpoint;
/* indicates whether we make a checkpoint after each traing epoch */
bool useEpochCheckpoint;
/* number of batches on which we do model update */
int updateStep;
/* indicates whether we double the </s> symble for the output of lms */
bool isDoubledEnd;
public: public:
/* constructor */ /* constructor */
T2TTrainer(); T2TTrainer();
...@@ -109,14 +139,17 @@ public: ...@@ -109,14 +139,17 @@ public:
~T2TTrainer(); ~T2TTrainer();
/* initialize the trainer */ /* initialize the trainer */
void Init(int argc, const char ** argv); void Init(int argc, char ** argv);
/* train the model */ /* train the model */
void Train(const char * fn, T2TModel * model); void Train(const char * fn, const char * validFN, const char * modelFN, T2TModel * model);
/* test the model */ /* test the model */
void Test(const char * fn, const char * ofn, T2TModel * model); void Test(const char * fn, const char * ofn, T2TModel * model);
/* make a checkpoint */
void MakeCheckpoint(T2TModel * model, const char * validFN, const char * modelFN, const char * label, int id);
/* load data to buffer */ /* load data to buffer */
int LoadBuf(FILE * file, bool isSorted, int step); int LoadBuf(FILE * file, bool isSorted, int step);
...@@ -131,6 +164,9 @@ public: ...@@ -131,6 +164,9 @@ public:
bool isSorted, int &wCount, bool isSorted, int &wCount,
int devID, XMem * mem); int devID, XMem * mem);
/* shuffle the data file */
void Shuffle(const char * srcFile, const char * tgtFile);
/* get word probabilities for a batch of sequences */ /* get word probabilities for a batch of sequences */
float GetProb(XTensor * output, XTensor * gold, XTensor * wordProbs); float GetProb(XTensor * output, XTensor * gold, XTensor * wordProbs);
...@@ -141,7 +177,13 @@ public: ...@@ -141,7 +177,13 @@ public:
void PrepareModel(T2TModel * model); void PrepareModel(T2TModel * model);
/* do padding on the output */ /* do padding on the output */
void PadOutput(XTensor * output, XTensor * padding); void PadOutput(XTensor * output, XTensor * gold, XTensor * padding);
/* recale the output and gold tensors for normalized loss */
void RescaleOutput(XTensor * output, XTensor * gold, XTensor * padding);
/* perform label smoothing */
void LabelSmooth(XTensor * gold, XTensor * smoothed, DTYPE p);
}; };
......
...@@ -30,7 +30,7 @@ FILE * tmpFILE; ...@@ -30,7 +30,7 @@ FILE * tmpFILE;
int llnum = 0; int llnum = 0;
FILE * tf = NULL; FILE * tf = NULL;
void LoadParamString(int argc, const char ** argv, const char * name, char * p, const char * defaultP) void LoadParamString(int argc, char ** argv, const char * name, char * p, const char * defaultP)
{ {
char vname[128]; char vname[128];
vname[0] = '-'; vname[0] = '-';
...@@ -47,7 +47,7 @@ void LoadParamString(int argc, const char ** argv, const char * name, char * p, ...@@ -47,7 +47,7 @@ void LoadParamString(int argc, const char ** argv, const char * name, char * p,
strcpy(p, defaultP); strcpy(p, defaultP);
} }
void LoadParamInt(int argc, const char ** argv, const char * name, int * p, int defaultP) void LoadParamInt(int argc, char ** argv, const char * name, int * p, int defaultP)
{ {
char vname[128]; char vname[128];
vname[0] = '-'; vname[0] = '-';
...@@ -64,7 +64,7 @@ void LoadParamInt(int argc, const char ** argv, const char * name, int * p, int ...@@ -64,7 +64,7 @@ void LoadParamInt(int argc, const char ** argv, const char * name, int * p, int
*p = defaultP; *p = defaultP;
} }
void LoadParamBool(int argc, const char ** argv, const char * name, bool * p, bool defaultP) void LoadParamBool(int argc, char ** argv, const char * name, bool * p, bool defaultP)
{ {
char vname[128]; char vname[128];
vname[0] = '-'; vname[0] = '-';
...@@ -81,7 +81,7 @@ void LoadParamBool(int argc, const char ** argv, const char * name, bool * p, bo ...@@ -81,7 +81,7 @@ void LoadParamBool(int argc, const char ** argv, const char * name, bool * p, bo
*p = defaultP; *p = defaultP;
} }
void LoadParamFloat(int argc, const char ** argv, const char * name, float * p, float defaultP) void LoadParamFloat(int argc, char ** argv, const char * name, float * p, float defaultP)
{ {
char vname[128]; char vname[128];
vname[0] = '-'; vname[0] = '-';
...@@ -98,7 +98,7 @@ void LoadParamFloat(int argc, const char ** argv, const char * name, float * p, ...@@ -98,7 +98,7 @@ void LoadParamFloat(int argc, const char ** argv, const char * name, float * p,
*p = defaultP; *p = defaultP;
} }
void ShowParams(int argc, const char ** argv) void ShowParams(int argc, char ** argv)
{ {
fprintf(stderr, "args:\n"); fprintf(stderr, "args:\n");
for(int i = 0; i < argc; i++){ for(int i = 0; i < argc; i++){
......
...@@ -30,13 +30,13 @@ namespace transformer ...@@ -30,13 +30,13 @@ namespace transformer
extern FILE * tmpFILE; extern FILE * tmpFILE;
/* load arguments */ /* load arguments */
void LoadParamString(int argc, const char ** argv, const char * name, char * p, const char * defaultP); void LoadParamString(int argc, char ** argv, const char * name, char * p, const char * defaultP);
void LoadParamInt(int argc, const char ** argv, const char * name, int * p, int defaultP); void LoadParamInt(int argc, char ** argv, const char * name, int * p, int defaultP);
void LoadParamBool(int argc, const char ** argv, const char * name, bool * p, bool defaultP); void LoadParamBool(int argc, char ** argv, const char * name, bool * p, bool defaultP);
void LoadParamFloat(int argc, const char ** argv, const char * name, float * p, float defaultP); void LoadParamFloat(int argc, char ** argv, const char * name, float * p, float defaultP);
/* show arguments */ /* show arguments */
void ShowParams(int argc, const char ** argv); void ShowParams(int argc, char ** argv);
extern int llnum; extern int llnum;
extern FILE * tf; extern FILE * tf;
......
...@@ -19,6 +19,7 @@ ...@@ -19,6 +19,7 @@
* $Created by: XIAO Tong (xiaotong@mail.neu.edu.cn) 2018-07-31 * $Created by: XIAO Tong (xiaotong@mail.neu.edu.cn) 2018-07-31
*/ */
#include <math.h>
#include "Transformer.h" #include "Transformer.h"
#include "T2TModel.h" #include "T2TModel.h"
#include "T2TUtility.h" #include "T2TUtility.h"
...@@ -33,30 +34,38 @@ int TransformerMain(int argc, const char ** argv) ...@@ -33,30 +34,38 @@ int TransformerMain(int argc, const char ** argv)
if(argc == 0) if(argc == 0)
return 1; return 1;
fprintf(stderr, "%e\n", log(1e-8F));
char ** args = new char*[argc];
for(int i = 0; i < argc; i++){
args[i] = new char[strlen(argv[i]) + 1];
strcpy(args[i], argv[i]);
}
tmpFILE = fopen("tmp.txt", "wb"); tmpFILE = fopen("tmp.txt", "wb");
ShowParams(argc, argv); ShowParams(argc, args);
char * trainFN = new char[MAX_LINE_LENGTH]; char * trainFN = new char[MAX_LINE_LENGTH];
char * modelFN = new char[MAX_LINE_LENGTH]; char * modelFN = new char[MAX_LINE_LENGTH];
char * testFN = new char[MAX_LINE_LENGTH]; char * testFN = new char[MAX_LINE_LENGTH];
char * outputFN = new char[MAX_LINE_LENGTH]; char * outputFN = new char[MAX_LINE_LENGTH];
LoadParamString(argc, argv, "train", trainFN, ""); LoadParamString(argc, args, "train", trainFN, "");
LoadParamString(argc, argv, "model", modelFN, ""); LoadParamString(argc, args, "model", modelFN, "");
LoadParamString(argc, argv, "test", testFN, ""); LoadParamString(argc, args, "test", testFN, "");
LoadParamString(argc, argv, "output", outputFN, ""); LoadParamString(argc, args, "output", outputFN, "");
T2TTrainer trainer; T2TTrainer trainer;
trainer.Init(argc, argv); trainer.Init(argc, args);
T2TModel model; T2TModel model;
model.InitModel(argc, argv); model.InitModel(argc, args);
/* learn model parameters */ /* learn model parameters */
if(strcmp(trainFN, "")) if(strcmp(trainFN, ""))
trainer.Train(trainFN, &model); trainer.Train(trainFN, testFN, strcmp(modelFN, "") ? modelFN : "checkpoint.model", &model);
/* save the final model */ /* save the final model */
if(strcmp(modelFN, "") && strcmp(trainFN, "")) if(strcmp(modelFN, "") && strcmp(trainFN, ""))
...@@ -66,15 +75,22 @@ int TransformerMain(int argc, const char ** argv) ...@@ -66,15 +75,22 @@ int TransformerMain(int argc, const char ** argv)
if(strcmp(modelFN, "")) if(strcmp(modelFN, ""))
model.Read(modelFN); model.Read(modelFN);
T2TTrainer tester;
tester.Init(argc, args);
/* test the model on the new data */ /* test the model on the new data */
if(strcmp(testFN, "") && strcmp(outputFN, "")) if(strcmp(testFN, "") && strcmp(outputFN, ""))
trainer.Test(testFN, outputFN, &model); tester.Test(testFN, outputFN, &model);
delete[] trainFN; delete[] trainFN;
delete[] modelFN; delete[] modelFN;
delete[] testFN; delete[] testFN;
delete[] outputFN; delete[] outputFN;
for(int i = 0; i < argc; i++)
delete[] args[i];
delete[] args;
fclose(tmpFILE); fclose(tmpFILE);
return 0; return 0;
......
...@@ -55,6 +55,9 @@ namespace nts { ...@@ -55,6 +55,9 @@ namespace nts {
#define DTYPE_MIN (DTYPE)-3.40E+38 #define DTYPE_MIN (DTYPE)-3.40E+38
#endif #endif
#define LOGPROB_MIN (DTYPE)-2E+1
#define GRAD_MAX (DTYPE)1E+5
#if WIN32 #if WIN32
#define DELIMITER '\\' #define DELIMITER '\\'
#else #else
...@@ -148,6 +151,7 @@ extern bool useCUDA; ...@@ -148,6 +151,7 @@ extern bool useCUDA;
#define XPRINT5(VERBOSE,FILEH,STR,ARG,ARG2,ARG3,ARG4,ARG5) {if(VERBOSE<=verboseLevel) {fprintf(FILEH,STR,ARG,ARG2,ARG3,ARG4,ARG5);FFLUSH(FILEH);}} #define XPRINT5(VERBOSE,FILEH,STR,ARG,ARG2,ARG3,ARG4,ARG5) {if(VERBOSE<=verboseLevel) {fprintf(FILEH,STR,ARG,ARG2,ARG3,ARG4,ARG5);FFLUSH(FILEH);}}
#define XPRINT6(VERBOSE,FILEH,STR,ARG,ARG2,ARG3,ARG4,ARG5,ARG6) {if(VERBOSE<=verboseLevel) {fprintf(FILEH,STR,ARG,ARG2,ARG3,ARG4,ARG5,ARG6);FFLUSH(FILEH);}} #define XPRINT6(VERBOSE,FILEH,STR,ARG,ARG2,ARG3,ARG4,ARG5,ARG6) {if(VERBOSE<=verboseLevel) {fprintf(FILEH,STR,ARG,ARG2,ARG3,ARG4,ARG5,ARG6);FFLUSH(FILEH);}}
#define XPRINT7(VERBOSE,FILEH,STR,ARG,ARG2,ARG3,ARG4,ARG5,ARG6,ARG7) {if(VERBOSE<=verboseLevel) {fprintf(FILEH,STR,ARG,ARG2,ARG3,ARG4,ARG5,ARG6,ARG7);FFLUSH(FILEH);}} #define XPRINT7(VERBOSE,FILEH,STR,ARG,ARG2,ARG3,ARG4,ARG5,ARG6,ARG7) {if(VERBOSE<=verboseLevel) {fprintf(FILEH,STR,ARG,ARG2,ARG3,ARG4,ARG5,ARG6,ARG7);FFLUSH(FILEH);}}
#define XPRINT8(VERBOSE,FILEH,STR,ARG,ARG2,ARG3,ARG4,ARG5,ARG6,ARG7,ARG8) {if(VERBOSE<=verboseLevel) {fprintf(FILEH,STR,ARG,ARG2,ARG3,ARG4,ARG5,ARG6,ARG7,ARG8);FFLUSH(FILEH);}}
#define B2I(V) V==0?false:true #define B2I(V) V==0?false:true
......
...@@ -265,6 +265,18 @@ int XLink::GetParamInt(int i) ...@@ -265,6 +265,18 @@ int XLink::GetParamInt(int i)
} }
/* /*
get a paramter in integer
>> i - id of the parameter
<< return - the parameter in integer
*/
void * XLink::GetParamPointer(int i)
{
CheckNTErrors(params != NULL, "parameter array cannot be empty!");
char * p = (char*)params + i * paramSize;
return *(int **)p;
}
/*
get a parameter in MATRIX_TRANS_TYPE get a parameter in MATRIX_TRANS_TYPE
>> i - id of the parameter >> i - id of the parameter
<< return - the parameter in MATRIX_TRANS_TYPE << return - the parameter in MATRIX_TRANS_TYPE
...@@ -401,8 +413,7 @@ add a boolean parameter ...@@ -401,8 +413,7 @@ add a boolean parameter
*/ */
void XLink::AddParamToHeadBool(XTensor * h, bool param) void XLink::AddParamToHeadBool(XTensor * h, bool param)
{ {
if(h != NULL) CheckNTErrors(h != NULL, "head tensor cannot be empty!");
return;
h->income.AddParam(&param, sizeof(bool)); h->income.AddParam(&param, sizeof(bool));
} }
...@@ -413,8 +424,7 @@ add a pointer parameter ...@@ -413,8 +424,7 @@ add a pointer parameter
*/ */
void XLink::AddParamToHeadPointer(XTensor * h, void * param) void XLink::AddParamToHeadPointer(XTensor * h, void * param)
{ {
if(h != NULL) CheckNTErrors(h != NULL, "head tensor cannot be empty!");
return;
h->income.AddParam(&param, sizeof(param)); h->income.AddParam(&param, sizeof(param));
} }
...@@ -589,9 +599,24 @@ show the network encoded in a root node (tensor) ...@@ -589,9 +599,24 @@ show the network encoded in a root node (tensor)
*/ */
void XLink::ShowNetwork(FILE * file, XTensor * root) void XLink::ShowNetwork(FILE * file, XTensor * root)
{ {
fprintf(file, "node %d - ", root->id);
XLink &income = root->income; XLink &income = root->income;
for(int i = 0; i < income.tailNum; i++){
XTensor * child = income.tails[i];
ShowNetwork(file, child);
}
}
/*
show a node
>> file - file to dump information
>> root - pointer to the node
*/
void XLink::ShowNode(FILE * file, XTensor * node)
{
fprintf(file, "node %d - ", node->id);
XLink &income = node->income;
if(income.head == NULL){ if(income.head == NULL){
fprintf(file, "income[%d]: null ", income.tailNum); fprintf(file, "income[%d]: null ", income.tailNum);
} }
...@@ -607,7 +632,7 @@ void XLink::ShowNetwork(FILE * file, XTensor * root) ...@@ -607,7 +632,7 @@ void XLink::ShowNetwork(FILE * file, XTensor * root)
} }
fprintf(stderr, ", "); fprintf(stderr, ", ");
XLink &outgo = root->outgo; XLink &outgo = node->outgo;
if(outgo.head == NULL || outgo.tailNum == 0){ if(outgo.head == NULL || outgo.tailNum == 0){
fprintf(file, "outgo[%d]: null ", outgo.tailNum); fprintf(file, "outgo[%d]: null ", outgo.tailNum);
} }
...@@ -623,11 +648,6 @@ void XLink::ShowNetwork(FILE * file, XTensor * root) ...@@ -623,11 +648,6 @@ void XLink::ShowNetwork(FILE * file, XTensor * root)
} }
fprintf(stderr, "\n"); fprintf(stderr, "\n");
for(int i = 0; i < income.tailNum; i++){
XTensor * child = income.tails[i];
ShowNetwork(file, child);
}
} }
} // namespace nts(NiuTrans.Tensor) } // namespace nts(NiuTrans.Tensor)
......
...@@ -128,6 +128,9 @@ struct XLink ...@@ -128,6 +128,9 @@ struct XLink
/* get a paramter in integer */ /* get a paramter in integer */
int GetParamInt(int i); int GetParamInt(int i);
/* get a paramter in pointer */
void * GetParamPointer(int i);
/* get a parameter in MATRIX_TRANS_TYPE */ /* get a parameter in MATRIX_TRANS_TYPE */
MATRIX_TRANS_TYPE GetParamTrans(int i); MATRIX_TRANS_TYPE GetParamTrans(int i);
...@@ -178,6 +181,10 @@ struct XLink ...@@ -178,6 +181,10 @@ struct XLink
/* show the network encoded in a root node (tensor) */ /* show the network encoded in a root node (tensor) */
static static
void ShowNetwork(FILE * file, XTensor * root); void ShowNetwork(FILE * file, XTensor * root);
/* show a node */
static
void ShowNode(FILE * file, XTensor * node);
}; };
} // namespace nts(NiuTrans.Tensor) } // namespace nts(NiuTrans.Tensor)
......
...@@ -600,7 +600,7 @@ void * XMem::AllocStandard(int myDevID, MTYPE mySize, bool myIsRebuiltIndex) ...@@ -600,7 +600,7 @@ void * XMem::AllocStandard(int myDevID, MTYPE mySize, bool myIsRebuiltIndex)
void * result = NULL; void * result = NULL;
/* search for the memory piece avialable for the allocation */ /* search for the memory piece avialable for the allocation */
for(int i = indexEntryNum; i > index; i--){ for(int i = index; i <= indexEntryNum; i++){
if(i == indexEntryNum){ if(i == indexEntryNum){
entry = memIndex + index; entry = memIndex + index;
CheckNTErrors(mySize >= minSizeIndex[index], "Wrong index!"); CheckNTErrors(mySize >= minSizeIndex[index], "Wrong index!");
...@@ -667,7 +667,7 @@ void * XMem::AllocStandard(int myDevID, MTYPE mySize, bool myIsRebuiltIndex) ...@@ -667,7 +667,7 @@ void * XMem::AllocStandard(int myDevID, MTYPE mySize, bool myIsRebuiltIndex)
hit->size = mySize; hit->size = mySize;
hit->head.state = 2; hit->head.state = 2;
hit->pReal = beg; hit->pReal = beg;
blocks[hit->head.blockID].used += mySize; blocks[hit->head.blockID].used += head->size;
RemoveFreeIndexNode(hit); RemoveFreeIndexNode(hit);
AddAllocIndexNode(hit); AddAllocIndexNode(hit);
...@@ -690,7 +690,7 @@ void * XMem::AllocStandard(int myDevID, MTYPE mySize, bool myIsRebuiltIndex) ...@@ -690,7 +690,7 @@ void * XMem::AllocStandard(int myDevID, MTYPE mySize, bool myIsRebuiltIndex)
continue; continue;
if (block->mem == NULL) { if (block->mem == NULL) {
block->size = MAX(maxBlockSize, mySize + 2 * MY_PITCH); block->size = MAX(block->sizeDesired, mySize + 2 * MY_PITCH);
if (myDevID < 0) { if (myDevID < 0) {
block->mem = new char[block->size]; block->mem = new char[block->size];
memset(block->mem, 0, block->size); memset(block->mem, 0, block->size);
...@@ -719,8 +719,9 @@ void * XMem::AllocStandard(int myDevID, MTYPE mySize, bool myIsRebuiltIndex) ...@@ -719,8 +719,9 @@ void * XMem::AllocStandard(int myDevID, MTYPE mySize, bool myIsRebuiltIndex)
newNode->head.indexNode = newNode; newNode->head.indexNode = newNode;
newNode->p = block->mem; newNode->p = block->mem;
newNode->pReal = NULL; newNode->pReal = NULL;
newNode->size = (char*)block->mem + mySize - //newNode->size = (char*)block->mem + block->size -
(char*)GetPitchedAddress(block->mem, MY_PITCH); // (char*)GetPitchedAddress(block->mem, MY_PITCH);
newNode->size = mySize;
AddFreeIndexNode(newNode); AddFreeIndexNode(newNode);
...@@ -1041,9 +1042,14 @@ void XMem::RebuildIndex() ...@@ -1041,9 +1042,14 @@ void XMem::RebuildIndex()
/* make a new index node */ /* make a new index node */
MPieceNode * newNode = memIndex2 + nodeNumUsed2++; MPieceNode * newNode = memIndex2 + nodeNumUsed2++;
newNode->p = p; newNode->p = p;
if(head->state == 1){
newNode->size = (char*)p + head->size -
( head->state == 1 ? (char*)GetPitchedAddress((char*)p, MY_PITCH) : (char*)head->indexNode->pReal);
}
else
newNode->size = node->size; newNode->size = node->size;
//newNode->size = (char*)p + head->size -
// ( head->state == 1 ? (char*)GetPitchedAddress((char*)p, MY_PITCH) : (char*)head->indexNode->pReal);
newNode->pre = NULL; newNode->pre = NULL;
newNode->next = NULL; newNode->next = NULL;
......
...@@ -35,6 +35,8 @@ const char * GetOPName(int type) ...@@ -35,6 +35,8 @@ const char * GetOPName(int type)
return "M_EXP"; return "M_EXP";
else if (type == MATH_FLOOR) else if (type == MATH_FLOOR)
return "M_FLOOR"; return "M_FLOOR";
else if (type == MATH_ISZERO)
return "M_ISZERO";
else if (type == MATH_LOG) else if (type == MATH_LOG)
return "M_LOG"; return "M_LOG";
else if (type == MATH_SQRT) else if (type == MATH_SQRT)
...@@ -107,10 +109,14 @@ const char * GetOPName(int type) ...@@ -107,10 +109,14 @@ const char * GetOPName(int type)
return "S_MERGE_LIST"; return "S_MERGE_LIST";
else if (type == SHAPE_PERMUTE) else if (type == SHAPE_PERMUTE)
return "S_PERMUTE"; return "S_PERMUTE";
else if (type == SHAPE_RESHAPE)
return "S_RESHAPE";
else if (type == SHAPE_SPLIT) else if (type == SHAPE_SPLIT)
return "S_SPLIT"; return "S_SPLIT";
else if (type == SHAPE_SPLIT_LIST) else if (type == SHAPE_SPLIT_LIST)
return "S_SPLIT_LIST"; return "S_SPLIT_LIST";
else if (type == SHAPE_SQUEEZE)
return "S_SQUEEZE";
else if (type == SHAPE_TRANSPOSE) else if (type == SHAPE_TRANSPOSE)
return "S_TRANSPOSE"; return "S_TRANSPOSE";
else if (type == SHAPE_UNSQUEEZE) else if (type == SHAPE_UNSQUEEZE)
......
...@@ -35,7 +35,8 @@ namespace nts { // namespace nts(NiuTrans.Tensor) ...@@ -35,7 +35,8 @@ namespace nts { // namespace nts(NiuTrans.Tensor)
#define MATH_CEIL MATH_ABSOLUTE + 1 #define MATH_CEIL MATH_ABSOLUTE + 1
#define MATH_EXP MATH_CEIL + 1 #define MATH_EXP MATH_CEIL + 1
#define MATH_FLOOR MATH_EXP + 1 #define MATH_FLOOR MATH_EXP + 1
#define MATH_LOG MATH_FLOOR + 1 #define MATH_ISZERO MATH_FLOOR + 1
#define MATH_LOG MATH_ISZERO + 1
#define MATH_SQRT MATH_LOG + 1 #define MATH_SQRT MATH_LOG + 1
#define MATH_SQUARE MATH_SQRT + 1 #define MATH_SQUARE MATH_SQRT + 1
#define MATH_SIN MATH_SQUARE + 1 #define MATH_SIN MATH_SQUARE + 1
...@@ -81,9 +82,11 @@ namespace nts { // namespace nts(NiuTrans.Tensor) ...@@ -81,9 +82,11 @@ namespace nts { // namespace nts(NiuTrans.Tensor)
#define SHAPE_MERGE SHAPE_CONCATENATE + 1 #define SHAPE_MERGE SHAPE_CONCATENATE + 1
#define SHAPE_MERGE_LIST SHAPE_MERGE + 1 #define SHAPE_MERGE_LIST SHAPE_MERGE + 1
#define SHAPE_PERMUTE SHAPE_MERGE_LIST + 1 #define SHAPE_PERMUTE SHAPE_MERGE_LIST + 1
#define SHAPE_SPLIT SHAPE_PERMUTE + 1 #define SHAPE_RESHAPE SHAPE_PERMUTE + 1
#define SHAPE_SPLIT SHAPE_RESHAPE + 1
#define SHAPE_SPLIT_LIST SHAPE_SPLIT + 1 #define SHAPE_SPLIT_LIST SHAPE_SPLIT + 1
#define SHAPE_TRANSPOSE SHAPE_SPLIT_LIST + 1 #define SHAPE_SQUEEZE SHAPE_SPLIT_LIST + 1
#define SHAPE_TRANSPOSE SHAPE_SQUEEZE + 1
#define SHAPE_UNSQUEEZE SHAPE_TRANSPOSE + 1 #define SHAPE_UNSQUEEZE SHAPE_TRANSPOSE + 1
#define SORT SHAPE_UNSQUEEZE + 1 #define SORT SHAPE_UNSQUEEZE + 1
......
...@@ -38,6 +38,7 @@ ...@@ -38,6 +38,7 @@
#include "XMem.h" #include "XMem.h"
#include "XHeap.h" #include "XHeap.h"
#include "XBLAS.h" #include "XBLAS.h"
#include "XName.h"
#include "core/shape/MergeBlockLists.h" #include "core/shape/MergeBlockLists.h"
#include "core/movement/CopyValues.h" #include "core/movement/CopyValues.h"
#include "core/arithmetic/Sum.h" #include "core/arithmetic/Sum.h"
...@@ -45,6 +46,7 @@ ...@@ -45,6 +46,7 @@
#include "core/arithmetic/Sub.h" #include "core/arithmetic/Sub.h"
#include "core/arithmetic/Div.h" #include "core/arithmetic/Div.h"
#include "core/math/ScaleAndShift.h" #include "core/math/ScaleAndShift.h"
#include "function/Identity.h"
#ifdef USE_CUDA #ifdef USE_CUDA
...@@ -202,7 +204,7 @@ XTensor::~XTensor() ...@@ -202,7 +204,7 @@ XTensor::~XTensor()
dims[0] = -dims[0]; dims[0] = -dims[0];
XTensor * newTensor = new XTensor(order, dims, dataType, denseRatio, devID, mem); XTensor * newTensor = new XTensor(order, dims, dataType, denseRatio, devID, mem);
newTensor->SetTMP(); newTensor->SetTMPFlag();
newTensor->data = data; newTensor->data = data;
data = NULL; data = NULL;
...@@ -244,6 +246,7 @@ void XTensor::Init() ...@@ -244,6 +246,7 @@ void XTensor::Init()
isInit = false; isInit = false;
isTmp = false; isTmp = false;
isGrad = false; isGrad = false;
isVar = false;
visitMark = 0; visitMark = 0;
grad = NULL; grad = NULL;
} }
...@@ -289,6 +292,7 @@ void XTensor::ShallowCopy(const XTensor &tensor) ...@@ -289,6 +292,7 @@ void XTensor::ShallowCopy(const XTensor &tensor)
/* overloading of the equal-sign */ /* overloading of the equal-sign */
XTensor& XTensor::operator= (const XTensor& tensor) XTensor& XTensor::operator= (const XTensor& tensor)
{ {
/* we must make a hard copy of the tensor if it is the input /* we must make a hard copy of the tensor if it is the input
of another node. */ of another node. */
if(outgo.tailNum > 0){ if(outgo.tailNum > 0){
...@@ -297,7 +301,7 @@ XTensor& XTensor::operator= (const XTensor& tensor) ...@@ -297,7 +301,7 @@ XTensor& XTensor::operator= (const XTensor& tensor)
dims[0] = -dims[0]; dims[0] = -dims[0];
XTensor * newTensor = new XTensor(order, dims, dataType, denseRatio, devID, mem); XTensor * newTensor = new XTensor(order, dims, dataType, denseRatio, devID, mem);
newTensor->SetTMP(); newTensor->SetTMPFlag();
newTensor->data = data; newTensor->data = data;
newTensor->dataHost = dataHost; newTensor->dataHost = dataHost;
newTensor->signature = tensor.signature; newTensor->signature = tensor.signature;
...@@ -311,6 +315,21 @@ XTensor& XTensor::operator= (const XTensor& tensor) ...@@ -311,6 +315,21 @@ XTensor& XTensor::operator= (const XTensor& tensor)
dataHost = NULL; dataHost = NULL;
} }
if(false && !tensor.isTmp){
/* NOTE: this might lead to additional data copy on Mac machines */
/* we make an identity transformation here */
if(outgo.tailNum > 0)
XLink::ClearOutgoing(this);
XLink::ClearIncoming(this);
if(!IsSameShaped(this, &tensor))
Resize(tensor.order, tensor.dimSize, tensor.dataType, tensor.denseRatio);
_Identity(&tensor, this);
XLink::MakeLink(&tensor, NULL, this, FUNC_IDENTITY);
}
else{
/* hard copy of the data array */ /* hard copy of the data array */
int size = unitNum * unitSize; int size = unitNum * unitSize;
if( isInit && !isSparse && !tensor.isSparse && if( isInit && !isSparse && !tensor.isSparse &&
...@@ -343,6 +362,7 @@ XTensor& XTensor::operator= (const XTensor& tensor) ...@@ -343,6 +362,7 @@ XTensor& XTensor::operator= (const XTensor& tensor)
/* create tensor links for the new tensor */ /* create tensor links for the new tensor */
XLink::Replace(&tensor, this); XLink::Replace(&tensor, this);
}
return *this; return *this;
} }
...@@ -353,24 +373,48 @@ XTensor XTensor::operator+ (const XTensor& tensor) ...@@ -353,24 +373,48 @@ XTensor XTensor::operator+ (const XTensor& tensor)
return Sum(*this, tensor); return Sum(*this, tensor);
} }
/* overloading of the plus-sign */
XTensor XTensor::operator+ (const DTYPE shift)
{
return ScaleAndShift(*this, 1, shift);
}
/* overloading of the multiply-sign */ /* overloading of the multiply-sign */
XTensor XTensor::operator* (const XTensor& tensor) XTensor XTensor::operator* (const XTensor& tensor)
{ {
return Multiply(*this, tensor); return Multiply(*this, tensor);
} }
/* overloading of the multiply-sign */
XTensor XTensor::operator* (const DTYPE scale)
{
return ScaleAndShift(*this, scale, 0);
}
/* overloading of the minus-sign */ /* overloading of the minus-sign */
XTensor XTensor::operator- (const XTensor& tensor) XTensor XTensor::operator- (const XTensor& tensor)
{ {
return Sub(*this, tensor); return Sub(*this, tensor);
} }
/* overloading of the minus-sign */
XTensor XTensor::operator- (const DTYPE shift)
{
return ScaleAndShift(*this, 1, -shift);
}
/* overloading of the division-sign */ /* overloading of the division-sign */
XTensor XTensor::operator/ (const XTensor& tensor) XTensor XTensor::operator/ (const XTensor& tensor)
{ {
return Div(*this, tensor); return Div(*this, tensor);
} }
/* overloading of the division-sign */
XTensor XTensor::operator/ (const DTYPE scale)
{
return ScaleAndShift(*this, (DTYPE)1/scale, 0);
}
/* /*
linear transformation b = a * \scale + \shift linear transformation b = a * \scale + \shift
>> scale - the slope >> scale - the slope
...@@ -419,7 +463,7 @@ judge whether the three matrices are in the same type and size ...@@ -419,7 +463,7 @@ judge whether the three matrices are in the same type and size
>> c - a tensor again >> c - a tensor again
<< return - whether the two input tensors are identical << return - whether the two input tensors are identical
*/ */
bool XTensor::IsSameShaped(XTensor * a, XTensor * b, XTensor * c) bool XTensor::IsSameShaped(const XTensor * a, const XTensor * b, const XTensor * c)
{ {
return IsSameShaped(a, b) && IsSameShaped(a, c); return IsSameShaped(a, b) && IsSameShaped(a, c);
} }
...@@ -440,7 +484,7 @@ void XTensor::SetDim(int * myDimSize) ...@@ -440,7 +484,7 @@ void XTensor::SetDim(int * myDimSize)
get the size of a given dimension get the size of a given dimension
>> dim - the given dim we are looking at >> dim - the given dim we are looking at
*/ */
int XTensor::GetDim(const int dim) int XTensor::GetDim(const int dim) const
{ {
CheckNTErrors(dim < order, "dimenision is out of range!"); CheckNTErrors(dim < order, "dimenision is out of range!");
...@@ -746,6 +790,20 @@ void XTensor::SetDataPointer() ...@@ -746,6 +790,20 @@ void XTensor::SetDataPointer()
dataP = &data; dataP = &data;
} }
/* compare two number */
bool IsFloatEqual(DTYPE a, DTYPE b, float absError, float relError)
{
if(a == b)
return true;
if(fabs(a - b) < absError)
return true;
if(fabs(a) < fabs(b))
return (fabs(a - b) / b < relError) ? true : false;
else
return (fabs(a - b) / a < relError) ? true : false;
}
/* check whether the data array is the same as the answer */
bool XTensor::CheckData(const void * d, int num, float tolerance, int beg) bool XTensor::CheckData(const void * d, int num, float tolerance, int beg)
{ {
if (data == NULL || d == NULL) if (data == NULL || d == NULL)
...@@ -759,7 +817,7 @@ bool XTensor::CheckData(const void * d, int num, float tolerance, int beg) ...@@ -759,7 +817,7 @@ bool XTensor::CheckData(const void * d, int num, float tolerance, int beg)
DTYPE * answerPrt = (DTYPE*)d; DTYPE * answerPrt = (DTYPE*)d;
for (int i = beg; i < num; i++) { for (int i = beg; i < num; i++) {
value = ToCPU(devID, valuePrt); value = ToCPU(devID, valuePrt);
if (fabs(value - *answerPrt) > tolerance) if(IsFloatEqual(value, *answerPrt, tolerance, 1e-4F) == false)
return false; return false;
valuePrt++; valuePrt++;
answerPrt++; answerPrt++;
...@@ -1125,7 +1183,7 @@ int XTensor::GetNonzeroSize() ...@@ -1125,7 +1183,7 @@ int XTensor::GetNonzeroSize()
set the tensor as "temporary" set the tensor as "temporary"
>> myIsTMP - the flag >> myIsTMP - the flag
*/ */
void XTensor::SetTMP(bool myIsTmp) void XTensor::SetTMPFlag(bool myIsTmp)
{ {
isTmp = myIsTmp; isTmp = myIsTmp;
} }
...@@ -1134,12 +1192,23 @@ void XTensor::SetTMP(bool myIsTmp) ...@@ -1134,12 +1192,23 @@ void XTensor::SetTMP(bool myIsTmp)
set the tensor as "keep-gradient" set the tensor as "keep-gradient"
>> myIsGrad - the flag >> myIsGrad - the flag
*/ */
void XTensor::SetGrad(bool myIsGrad) void XTensor::SetGradFlag(bool myIsGrad)
{ {
isGrad = myIsGrad; isGrad = myIsGrad;
} }
/* /*
set the tensor as "variable"
>> myIsVar - the flag
*/
void XTensor::SetVarFlag(bool myIsVar)
{
isVar = myIsVar;
if(isVar)
SetGradFlag(true);
}
/*
resize a tensor with a specified tensor size resize a tensor with a specified tensor size
>> myOrder - order of the tensor >> myOrder - order of the tensor
>> myDimSize - the size of each dimension >> myDimSize - the size of each dimension
...@@ -1415,9 +1484,18 @@ void XTensor::Dump(FILE * file, const char * label, const int n, const int beg, ...@@ -1415,9 +1484,18 @@ void XTensor::Dump(FILE * file, const char * label, const int n, const int beg,
} }
} }
else { else if(dataType == X_INT) {
ShowNTErrors("TODO!"); int end = MIN(n > 0 ? beg + n : beg + unitNum, unitNum);
for(int i = beg; i < end; i++){
int f = ((int*)d)[i];
if(i == beg)
fprintf(file, "%d", f);
else
fprintf(file, " %d", f);
}
} }
else
ShowNTErrors("TODO!");
} }
else { else {
int num = this->unitNumNonZero > 0 ? *(int*)d : 0; int num = this->unitNumNonZero > 0 ? *(int*)d : 0;
......
...@@ -145,6 +145,9 @@ public: ...@@ -145,6 +145,9 @@ public:
/* indicates whether the tensor keeps the gradient when used as model parameters */ /* indicates whether the tensor keeps the gradient when used as model parameters */
bool isGrad; bool isGrad;
/* indicates whether the tensor is used as paramters (or variables) */
bool isVar;
/* mark for traversing the gragh */ /* mark for traversing the gragh */
unsigned int visitMark; unsigned int visitMark;
...@@ -202,15 +205,27 @@ public: ...@@ -202,15 +205,27 @@ public:
/* overloading of the plus-sign */ /* overloading of the plus-sign */
XTensor operator+ (const XTensor &tensor); XTensor operator+ (const XTensor &tensor);
/* overloading of the plus-sign */
XTensor operator+ (const DTYPE shift);
/* overloading of the multiply-sign */ /* overloading of the multiply-sign */
XTensor operator* (const XTensor &tensor); XTensor operator* (const XTensor &tensor);
/* overloading of the multiply-sign */
XTensor operator* (const DTYPE scale);
/* overloading of the minus-sign */ /* overloading of the minus-sign */
XTensor operator- (const XTensor &tensor); XTensor operator- (const XTensor &tensor);
/* overloading of the minus-sign */
XTensor operator- (const DTYPE shift);
/* overloading of the division-sign */ /* overloading of the division-sign */
XTensor operator/ (const XTensor &tensor); XTensor operator/ (const XTensor &tensor);
/* overloading of the division-sign */
XTensor operator/ (const DTYPE scale);
/* linear transformation */ /* linear transformation */
XTensor Lin(DTYPE scale, DTYPE shift = 0); XTensor Lin(DTYPE scale, DTYPE shift = 0);
...@@ -220,13 +235,13 @@ public: ...@@ -220,13 +235,13 @@ public:
/* judge whether the three matrices are in the same type and size */ /* judge whether the three matrices are in the same type and size */
static static
bool IsSameShaped(XTensor * a, XTensor * b, XTensor * c); bool IsSameShaped(const XTensor * a, const XTensor * b, const XTensor * c);
/* set the size of each dimension */ /* set the size of each dimension */
void SetDim(int * myDimSize); void SetDim(int * myDimSize);
/* get the size of a given dimension */ /* get the size of a given dimension */
int GetDim(const int dim); int GetDim(const int dim) const;
/* reshape the tensor */ /* reshape the tensor */
void Reshape(const int order, const int * myDimSize); void Reshape(const int order, const int * myDimSize);
...@@ -319,10 +334,13 @@ public: ...@@ -319,10 +334,13 @@ public:
int GetNonzeroSize(); int GetNonzeroSize();
/* set the tensor as "temporary" */ /* set the tensor as "temporary" */
void SetTMP(bool myIsTmp = true); void SetTMPFlag(bool myIsTmp = true);
/* set the tensor as "keep-gradient" */ /* set the tensor as "keep-gradient" */
void SetGrad(bool myIsGrad = true); void SetGradFlag(bool myIsGrad = true);
/* set the tensor as "variable" */
void SetVarFlag(bool myIsVar = true);
/* resize a matrix with a specified matrix size */ /* resize a matrix with a specified matrix size */
bool Resize(const int myOrder, const int * myDimSize, bool Resize(const int myOrder, const int * myDimSize,
......
...@@ -63,11 +63,14 @@ ...@@ -63,11 +63,14 @@
#include "movement/CopyIndexed.h" #include "movement/CopyIndexed.h"
#include "movement/CopyInGrid.h" #include "movement/CopyInGrid.h"
#include "movement/CopyValues.h" #include "movement/CopyValues.h"
#include "movement/Gather.h"
#include "movement/Spread.h"
#include "reduce/ReduceMax.h" #include "reduce/ReduceMax.h"
#include "reduce/ReduceMean.h" #include "reduce/ReduceMean.h"
#include "reduce/ReduceStandardVariance.h" #include "reduce/ReduceStandardVariance.h"
#include "reduce/ReduceSum.h" #include "reduce/ReduceSum.h"
#include "reduce/ReduceSumAll.h"
#include "reduce/ReduceSumSquared.h" #include "reduce/ReduceSumSquared.h"
#include "reduce/ReduceVariance.h" #include "reduce/ReduceVariance.h"
...@@ -77,8 +80,10 @@ ...@@ -77,8 +80,10 @@
#include "shape/MakeSplitBlockIndex.h" #include "shape/MakeSplitBlockIndex.h"
#include "shape/Merge.h" #include "shape/Merge.h"
#include "shape/MergeBlockLists.h" #include "shape/MergeBlockLists.h"
#include "shape/Reshape.h"
#include "shape/Permute.h" #include "shape/Permute.h"
#include "shape/Split.h" #include "shape/Split.h"
#include "shape/Squeeze.h"
#include "shape/Transpose.h" #include "shape/Transpose.h"
#include "shape/Unsqueeze.h" #include "shape/Unsqueeze.h"
......
...@@ -147,6 +147,8 @@ int GetDivDimIndex(const XTensor &a, const XTensor &b) ...@@ -147,6 +147,8 @@ int GetDivDimIndex(const XTensor &a, const XTensor &b)
{ {
if(a.order < b.order) if(a.order < b.order)
return -1; return -1;
if(XTensor::IsSameShaped(&a, &b))
return -1;
int hitCount = 0; int hitCount = 0;
int hitDim = -1; int hitDim = -1;
...@@ -181,7 +183,7 @@ where i is the index of the item ...@@ -181,7 +183,7 @@ where i is the index of the item
XTensor Div(const XTensor &a, const XTensor &b, DTYPE alpha, int leadingDim) XTensor Div(const XTensor &a, const XTensor &b, DTYPE alpha, int leadingDim)
{ {
XTensor c(&a); XTensor c(&a);
c.SetTMP(); c.SetTMPFlag();
int n = GetDivDimIndex(a, b); int n = GetDivDimIndex(a, b);
......
...@@ -150,7 +150,7 @@ i.e., a is divided with b by broadcasting ...@@ -150,7 +150,7 @@ i.e., a is divided with b by broadcasting
XTensor DivDim(const XTensor &a, const XTensor &b, int n, DTYPE alpha) XTensor DivDim(const XTensor &a, const XTensor &b, int n, DTYPE alpha)
{ {
XTensor c(&a); XTensor c(&a);
c.SetTMP(); c.SetTMPFlag();
/* call _Div function */ /* call _Div function */
_DivDim(&a, &b, &c, n, alpha); _DivDim(&a, &b, &c, n, alpha);
......
...@@ -249,7 +249,7 @@ XTensor MatrixMul(const XTensor &a, MATRIX_TRANS_TYPE transposedA, ...@@ -249,7 +249,7 @@ XTensor MatrixMul(const XTensor &a, MATRIX_TRANS_TYPE transposedA,
float dr = (!a.isSparse || !b.isSparse) ? 1.0F : MAX(a.denseRatio, b.denseRatio); float dr = (!a.isSparse || !b.isSparse) ? 1.0F : MAX(a.denseRatio, b.denseRatio);
XTensor c(order, dimSize, a.dataType, dr, a.devID, a.mem); XTensor c(order, dimSize, a.dataType, dr, a.devID, a.mem);
c.SetTMP(); c.SetTMPFlag();
/* call _MatrixMul function */ /* call _MatrixMul function */
_MatrixMul(&a, transposedA, &b, transposedB, &c, alpha, 0, parallelRunner); _MatrixMul(&a, transposedA, &b, transposedB, &c, alpha, 0, parallelRunner);
...@@ -299,7 +299,7 @@ XTensor MatrixMul(const XTensor &a, const XTensor &b, ...@@ -299,7 +299,7 @@ XTensor MatrixMul(const XTensor &a, const XTensor &b,
float dr = (!a.isSparse || !b.isSparse) ? 1.0F : MAX(a.denseRatio, b.denseRatio); float dr = (!a.isSparse || !b.isSparse) ? 1.0F : MAX(a.denseRatio, b.denseRatio);
XTensor c(order, dimSize, a.dataType, dr, a.devID, a.mem); XTensor c(order, dimSize, a.dataType, dr, a.devID, a.mem);
c.SetTMP(); c.SetTMPFlag();
/* call _MatrixMul function */ /* call _MatrixMul function */
_MatrixMul(&a, X_NOTRANS, &b, X_NOTRANS, &c, alpha, 0, parallelRunner); _MatrixMul(&a, X_NOTRANS, &b, X_NOTRANS, &c, alpha, 0, parallelRunner);
......
...@@ -314,7 +314,7 @@ XTensor MatrixMulBatched(const XTensor &a, MATRIX_TRANS_TYPE transposedA, const ...@@ -314,7 +314,7 @@ XTensor MatrixMulBatched(const XTensor &a, MATRIX_TRANS_TYPE transposedA, const
float dr = (!a.isSparse || !b.isSparse) ? 1.0F : MAX(a.denseRatio, b.denseRatio); float dr = (!a.isSparse || !b.isSparse) ? 1.0F : MAX(a.denseRatio, b.denseRatio);
XTensor c(order, dimSize, a.dataType, dr, a.devID, a.mem); XTensor c(order, dimSize, a.dataType, dr, a.devID, a.mem);
c.SetTMP(); c.SetTMPFlag();
/*call _MatrixMulBatched function */ /*call _MatrixMulBatched function */
_MatrixMulBatched(&a, transposedA, &b, transposedB, &c, alpha, 0, parallelRunner); _MatrixMulBatched(&a, transposedA, &b, transposedB, &c, alpha, 0, parallelRunner);
...@@ -370,7 +370,7 @@ XTensor MatrixMulBatched(const XTensor &a, const XTensor &b, ...@@ -370,7 +370,7 @@ XTensor MatrixMulBatched(const XTensor &a, const XTensor &b,
float dr = (!a.isSparse || !b.isSparse) ? 1.0F : MAX(a.denseRatio, b.denseRatio); float dr = (!a.isSparse || !b.isSparse) ? 1.0F : MAX(a.denseRatio, b.denseRatio);
XTensor c(order, dimSize, a.dataType, dr, a.devID, a.mem); XTensor c(order, dimSize, a.dataType, dr, a.devID, a.mem);
c.SetTMP(); c.SetTMPFlag();
/*call _MatrixMulBatched function */ /*call _MatrixMulBatched function */
_MatrixMulBatched(&a, X_NOTRANS, &b, X_NOTRANS, &c, alpha, 0, parallelRunner); _MatrixMulBatched(&a, X_NOTRANS, &b, X_NOTRANS, &c, alpha, 0, parallelRunner);
......
...@@ -148,6 +148,8 @@ int GetMultiplyDimIndex(const XTensor &a, const XTensor &b) ...@@ -148,6 +148,8 @@ int GetMultiplyDimIndex(const XTensor &a, const XTensor &b)
{ {
if(a.order < b.order) if(a.order < b.order)
return -1; return -1;
if(XTensor::IsSameShaped(&a, &b))
return -1;
int hitCount = 0; int hitCount = 0;
int hitDim = -1; int hitDim = -1;
...@@ -182,7 +184,7 @@ XTensor Multiply(const XTensor &a, const XTensor &b, DTYPE alpha, int leadingDim ...@@ -182,7 +184,7 @@ XTensor Multiply(const XTensor &a, const XTensor &b, DTYPE alpha, int leadingDim
{ {
XTensor c(&a); XTensor c(&a);
c.SetTMP(); c.SetTMPFlag();
int n = GetMultiplyDimIndex(a, b); int n = GetMultiplyDimIndex(a, b);
......
...@@ -148,7 +148,7 @@ i.e., a is multiplied with b by broadcasting ...@@ -148,7 +148,7 @@ i.e., a is multiplied with b by broadcasting
XTensor MultiplyDim(const XTensor &a, const XTensor &b, int n, DTYPE alpha) XTensor MultiplyDim(const XTensor &a, const XTensor &b, int n, DTYPE alpha)
{ {
XTensor c(&a); XTensor c(&a);
c.SetTMP(); c.SetTMPFlag();
/* call _Multiply function */ /* call _Multiply function */
_MultiplyDim(&a, &b, &c, n, alpha); _MultiplyDim(&a, &b, &c, n, alpha);
......
...@@ -68,7 +68,7 @@ make a new tensor to keep the result and return it ...@@ -68,7 +68,7 @@ make a new tensor to keep the result and return it
XTensor Negate(const XTensor & a) XTensor Negate(const XTensor & a)
{ {
XTensor b(&a); XTensor b(&a);
b.SetTMP(); b.SetTMPFlag();
/* call _Negate function */ /* call _Negate function */
_Negate(&a, &b); _Negate(&a, &b);
......
...@@ -74,7 +74,7 @@ make a new tensor to keep the result and return it ...@@ -74,7 +74,7 @@ make a new tensor to keep the result and return it
XTensor Sign(const XTensor & a) XTensor Sign(const XTensor & a)
{ {
XTensor b(&a); XTensor b(&a);
b.SetTMP(); b.SetTMPFlag();
/* call _Sign function */ /* call _Sign function */
_Sign(&a, &b); _Sign(&a, &b);
......
...@@ -134,6 +134,8 @@ int GetSubDimIndex(const XTensor &a, const XTensor &b) ...@@ -134,6 +134,8 @@ int GetSubDimIndex(const XTensor &a, const XTensor &b)
{ {
if(a.order < b.order) if(a.order < b.order)
return -1; return -1;
if(XTensor::IsSameShaped(&a, &b))
return -1;
int hitCount = 0; int hitCount = 0;
int hitDim = -1; int hitDim = -1;
...@@ -164,7 +166,7 @@ make a new tensor c to keep the result and return it ...@@ -164,7 +166,7 @@ make a new tensor c to keep the result and return it
XTensor Sub(const XTensor &a, const XTensor &b, DTYPE beta) XTensor Sub(const XTensor &a, const XTensor &b, DTYPE beta)
{ {
XTensor c(&a); XTensor c(&a);
c.SetTMP(); c.SetTMPFlag();
int n = GetSubDimIndex(a, b); int n = GetSubDimIndex(a, b);
......
...@@ -150,7 +150,7 @@ i.e., a is subtracted with b by broadcasting ...@@ -150,7 +150,7 @@ i.e., a is subtracted with b by broadcasting
XTensor SubDim(const XTensor &a, const XTensor &b, int n, DTYPE beta) XTensor SubDim(const XTensor &a, const XTensor &b, int n, DTYPE beta)
{ {
XTensor c(&a); XTensor c(&a);
c.SetTMP(); c.SetTMPFlag();
/* call _Sub function */ /* call _Sub function */
_SubDim(&a, &b, &c, n, beta); _SubDim(&a, &b, &c, n, beta);
......
...@@ -139,6 +139,8 @@ int GetSumDimIndex(const XTensor &a, const XTensor &b) ...@@ -139,6 +139,8 @@ int GetSumDimIndex(const XTensor &a, const XTensor &b)
{ {
if(a.order < b.order) if(a.order < b.order)
return -1; return -1;
if(XTensor::IsSameShaped(&a, &b))
return -1;
int hitCount = 0; int hitCount = 0;
int hitDim = -1; int hitDim = -1;
...@@ -169,7 +171,7 @@ make a new tensor c to keep the result and return it ...@@ -169,7 +171,7 @@ make a new tensor c to keep the result and return it
XTensor Sum(const XTensor &a, const XTensor &b, DTYPE beta) XTensor Sum(const XTensor &a, const XTensor &b, DTYPE beta)
{ {
XTensor c(&a); XTensor c(&a);
c.SetTMP(); c.SetTMPFlag();
int n = GetSumDimIndex(a, b); int n = GetSumDimIndex(a, b);
......
...@@ -150,7 +150,7 @@ i.e., a is summed with b by broadcasting ...@@ -150,7 +150,7 @@ i.e., a is summed with b by broadcasting
XTensor SumDim(const XTensor &a, const XTensor &b, int n, DTYPE beta) XTensor SumDim(const XTensor &a, const XTensor &b, int n, DTYPE beta)
{ {
XTensor c(&a); XTensor c(&a);
c.SetTMP(); c.SetTMPFlag();
/* call _Sum function */ /* call _Sum function */
_SumDim(&a, &b, &c, n, beta); _SumDim(&a, &b, &c, n, beta);
......
...@@ -111,7 +111,7 @@ XTensor SelectRange(const XTensor &a, int dim, int low, int high) ...@@ -111,7 +111,7 @@ XTensor SelectRange(const XTensor &a, int dim, int low, int high)
float dr = (!a.isSparse) ? 1.0F : a.denseRatio; float dr = (!a.isSparse) ? 1.0F : a.denseRatio;
XTensor c(order, dimSize, a.dataType, dr, a.devID, a.mem); XTensor c(order, dimSize, a.dataType, dr, a.devID, a.mem);
c.SetTMP(); c.SetTMPFlag();
/* call _SelectRange function */ /* call _SelectRange function */
_SelectRange(&a, &c, dim, low, high); _SelectRange(&a, &c, dim, low, high);
......
...@@ -234,7 +234,7 @@ void _SetDataDim(XTensor * tensor, int beg, int len, int dim, DTYPE p) ...@@ -234,7 +234,7 @@ void _SetDataDim(XTensor * tensor, int beg, int len, int dim, DTYPE p)
int n = tensor->order; int n = tensor->order;
CheckNTErrors(tensor->dataType == DEFAULT_DTYPE, "TODO!"); CheckNTErrors(tensor->dataType == DEFAULT_DTYPE, "TODO!");
CheckNTErrors(dim < n && dim > 0, "Illegal dimension!"); CheckNTErrors(dim < n && dim >= 0, "Illegal dimension!");
CheckNTErrors(beg >= 0 && beg < tensor->GetDim(dim), "Illegal beginning position!"); CheckNTErrors(beg >= 0 && beg < tensor->GetDim(dim), "Illegal beginning position!");
CheckNTErrors(beg + len >= 0 && beg + len < tensor->GetDim(dim), "Illegal length!"); CheckNTErrors(beg + len >= 0 && beg + len < tensor->GetDim(dim), "Illegal length!");
...@@ -264,11 +264,78 @@ void _SetDataDim(XTensor * tensor, int beg, int len, int dim, DTYPE p) ...@@ -264,11 +264,78 @@ void _SetDataDim(XTensor * tensor, int beg, int len, int dim, DTYPE p)
} }
/* /*
modify data items along with a given index and dimension (and keep the remaining items unchanged)
>> source - the tensor whose data array would be modified
>> modify - the tensor whose data array would be used to modify the source tensor
>> dim - the dimension along which we modify the tensor
>> index - index of the given dimension
e.g., given a source tensor (3, 3)
1 2 3
4 5 6
7 8 9
given a modified tensor (3)
1 2 3
when dim = 0, index = 1, we have
1 2 3
1 2 3
7 8 9
i.e., we set entries of row 1 to {1, 2, 3}
*/
void _SetDataIndexed(XTensor * source, XTensor * modify, int dim, int index)
{
int order = source->order;
int size = source->GetDim(dim);
CheckNTErrors(source->dataType == DEFAULT_DTYPE, "TODO!");
CheckNTErrors(dim >= 0 && dim < order, "Illegal dimension!");
CheckNTErrors(index >= 0 && index < size, "Illegal index!");
for(int i = 0; i < order - 1; i++){
if(i < dim){
CheckNTErrors(modify->GetDim(i) == source->GetDim(i), "Illegal dimension!");
}
else if(i >= dim){
CheckNTErrors(modify->GetDim(i) == source->GetDim(i+1), "Illegal dimension!");
}
}
if(source->devID < 0 && modify->devID < 0){
int stride = 1;
int blockSize = 1;
int blockNum = 1;
for(int i = order - 1; i > dim; i--){
stride *= source->GetDim(i);
}
blockSize = stride * source->GetDim(dim);
blockNum = source->unitNum / blockSize;
for(int i = 0; i < blockNum; i++){
DTYPE * d = (DTYPE*)source->data + blockSize * i + index * stride;
DTYPE * p = (DTYPE*)modify->data + stride * i;
for(int j = 0; j < stride; j++)
d[j] = p[j];
}
}
else if(source->devID >= 0 && modify->devID >= 0) {
#ifdef USE_CUDA
_CudaSetDataIndexed(source, modify, dim, index);
#else
ShowNTErrors("Please specify USE_CUDA and recompile the code!");
#endif
}
else{
ShowNTErrors("TODO!");
}
}
/*
generate data as lower triangular matrics for last two dimensions generate data as lower triangular matrics for last two dimensions
>> tensor - the tensor whose data to be set >> tensor - the tensor whose data to be set
>> p - the value for each entry of the lower triangular matrics >> p - the value for each entry of the lower triangular matrics
>> shift - the offset from diagonal >> shift - the offset from diagonal
e.g., for a 3* 3 tensor, e.g., for a 3 * 3 tensor,
when p = 1 ans shift = 0, we have when p = 1 ans shift = 0, we have
1 0 0 1 0 0
1 1 0 1 1 0
...@@ -363,7 +430,6 @@ void _SetDataRand(XTensor * tensor, DTYPE lower, DTYPE upper) ...@@ -363,7 +430,6 @@ void _SetDataRand(XTensor * tensor, DTYPE lower, DTYPE upper)
} }
} }
/* /*
generate data items with a normal distribution with specified mean and standard deviation generate data items with a normal distribution with specified mean and standard deviation
>> mean - mean or expectation of the distribution >> mean - mean or expectation of the distribution
......
...@@ -231,7 +231,7 @@ void _CudaSetDataDim(XTensor * tensor, int beg, int len, int dim, DTYPE p) ...@@ -231,7 +231,7 @@ void _CudaSetDataDim(XTensor * tensor, int beg, int len, int dim, DTYPE p)
int n = tensor->order; int n = tensor->order;
CheckNTErrors(tensor->dataType == DEFAULT_DTYPE, "TODO!"); CheckNTErrors(tensor->dataType == DEFAULT_DTYPE, "TODO!");
CheckNTErrors(dim < n && dim > 0, "Illegal dimension!"); CheckNTErrors(dim < n && dim >= 0, "Illegal dimension!");
CheckNTErrors(beg >= 0 && beg < tensor->GetDim(dim), "Illegal beginning position!"); CheckNTErrors(beg >= 0 && beg < tensor->GetDim(dim), "Illegal beginning position!");
CheckNTErrors(beg + len >= 0 && beg + len < tensor->GetDim(dim), "Illegal length!"); CheckNTErrors(beg + len >= 0 && beg + len < tensor->GetDim(dim), "Illegal length!");
...@@ -255,12 +255,95 @@ void _CudaSetDataDim(XTensor * tensor, int beg, int len, int dim, DTYPE p) ...@@ -255,12 +255,95 @@ void _CudaSetDataDim(XTensor * tensor, int beg, int len, int dim, DTYPE p)
int devIDBackup; int devIDBackup;
ProtectCudaDev(tensor->devID, devIDBackup); ProtectCudaDev(tensor->devID, devIDBackup);
KernelSetDataDim<<<blocks, threads >>>((DTYPE*)tensor->data, beg * stride, len * stride, blockSize, blockNum, p); KernelSetDataDim<<<blocks, threads >>>((DTYPE*)tensor->data, beg * stride,
len * stride, blockSize, blockNum, p);
BacktoCudaDev(tensor->devID, devIDBackup); BacktoCudaDev(tensor->devID, devIDBackup);
} }
/* /*
modify data items along with a given index and dimension
(and keep the remaining items unchanged) - kernel version
>> s - the pointer whose data would be modified
>> m - the pointer whose data would be used to modify the data pointed by s
>> blockNum - number of data blocks
>> blockSize - size of a data block
>> stride - stride of a data block
*/
__global__
void KernelSetDataIndexed(DTYPE * s, DTYPE * m, int blockNum, int blockSize, int stride)
{
/* offset in each block */
int i = blockDim.x * blockIdx.x + threadIdx.x;
/* block id */
int j = blockDim.y * blockIdx.y + threadIdx.y;
if(i >= stride || j >= blockNum)
return;
int x = blockSize * j + i;
int y = stride * j + i;
s[x] = m[y];
}
/*
modify data items along with a given index and dimension (and keep the remaining items unchanged)
>> source - the tensor whose data array would be modified
>> modify - the tensor whose data array would be used to modify the source tensor
>> dim - the dimension along which we modify the tensor
>> index - index of the given dimension
e.g., given a source tensor (3, 3)
1 2 3
4 5 6
7 8 9
given a modified tensor (3)
1 2 3
when dim = 0, index = 1, we have
1 2 3
1 2 3
7 8 9
i.e., we set entries of row 1 to {1, 2, 3}
*/
void _CudaSetDataIndexed(XTensor * source, XTensor * modify, int dim, int index)
{
int order = source->order;
int size = source->GetDim(dim);
CheckNTErrors(source->dataType == DEFAULT_DTYPE, "TODO!");
CheckNTErrors(dim >= 0 && dim < order, "Illegal dimension!");
CheckNTErrors(index >= 0 && index < size, "Illegal index!");
int stride = 1;
int blockSize = 1;
int blockNum = 1;
for(int i = order - 1; i > dim; i--){
stride *= source->GetDim(i);
}
blockSize = stride * source->GetDim(dim);
blockNum = source->unitNum / blockSize;
int cudaGrids[3];
int cudaBlocks[3];
GDevs.GetCudaThread2D(source->devID, stride, blockNum, MAX_INT, cudaGrids, cudaBlocks);
dim3 blocks(cudaGrids[0], cudaGrids[1]);
dim3 threads(cudaBlocks[0], cudaBlocks[1]);
int devIDBackup;
ProtectCudaDev(source->devID, devIDBackup);
KernelSetDataIndexed<<<blocks, threads >>>((DTYPE*)source->data + index * stride, (DTYPE*)modify->data,
blockNum, blockSize, stride);
BacktoCudaDev(source->devID, devIDBackup);
}
/*
set lower triangular matrics for each block set lower triangular matrics for each block
>> d - pointer to the data array >> d - pointer to the data array
>> l - row number (or column number) of each block, i.e, >> l - row number (or column number) of each block, i.e,
......
...@@ -40,6 +40,9 @@ void _CudaSetDataFixedDouble(XTensor * tensor, double p); ...@@ -40,6 +40,9 @@ void _CudaSetDataFixedDouble(XTensor * tensor, double p);
/* set data items along with a given dimension (and keep the remaining items unchanged) */ /* set data items along with a given dimension (and keep the remaining items unchanged) */
void _CudaSetDataDim(XTensor * tensor, int beg, int len, int dim, DTYPE p); void _CudaSetDataDim(XTensor * tensor, int beg, int len, int dim, DTYPE p);
/* modify data items along with a given index and dimension (and keep the remaining items unchanged) */
void _CudaSetDataIndexed(XTensor * source, XTensor * modify, int dim, int index);
/* generate data as lower triangular matrics for last two dimensions (cuda version) */ /* generate data as lower triangular matrics for last two dimensions (cuda version) */
void _CudaSetDataLowTri(XTensor * tensor, DTYPE p, int shift); void _CudaSetDataLowTri(XTensor * tensor, DTYPE p, int shift);
......
...@@ -48,6 +48,9 @@ void _SetDataFixedDouble(XTensor * tensor, double p); ...@@ -48,6 +48,9 @@ void _SetDataFixedDouble(XTensor * tensor, double p);
/* set data items along with a given dimension (and keep the remaining items unchanged) */ /* set data items along with a given dimension (and keep the remaining items unchanged) */
void _SetDataDim(XTensor * tensor, int beg, int len, int dim, DTYPE p); void _SetDataDim(XTensor * tensor, int beg, int len, int dim, DTYPE p);
/* modify data items along with a given index and dimension (and keep the remaining items unchanged) */
void _SetDataIndexed(XTensor * source, XTensor * modify, int dim, int index);
/* generate data as lower triangular matrics for last two dimensions */ /* generate data as lower triangular matrics for last two dimensions */
void _SetDataLowTri(XTensor * tensor, DTYPE p, int shift); void _SetDataLowTri(XTensor * tensor, DTYPE p, int shift);
......
...@@ -81,7 +81,7 @@ make a new tensor to keep the result and return it ...@@ -81,7 +81,7 @@ make a new tensor to keep the result and return it
XTensor Clip(const XTensor & a, DTYPE lower, DTYPE upper) XTensor Clip(const XTensor & a, DTYPE lower, DTYPE upper)
{ {
XTensor b(&a); XTensor b(&a);
b.SetTMP(); b.SetTMPFlag();
/* call _Clip function */ /* call _Clip function */
_Clip(&a, &b, lower, upper); _Clip(&a, &b, lower, upper);
......
/* NiuTrans.Tensor - an open-source tensor library /* NiuTrans.Tensor - an open-source tensor library
* Copyright (C) 2017, Natural Language Processing Lab, Northestern University. * Copyright (C) 2017, Natural Language Processing Lab, Northestern University.
* All rights reserved. * All rights reserved.
* *
* Licensed under the Apache License, Version 2.0 (the "License"); * Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License. * you may not use this file except in compliance with the License.
* You may obtain a copy of the License at * You may obtain a copy of the License at
* *
* http://www.apache.org/licenses/LICENSE-2.0 * http://www.apache.org/licenses/LICENSE-2.0
* *
* Unless required by applicable law or agreed to in writing, software * Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS, * distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and * See the License for the specific language governing permissions and
* limitations under the License. * limitations under the License.
*/ */
/* /*
* $Created by: Lin Ye (email: linye2015@outlook.com) 2018-08-03 * $Created by: Lin Ye (email: linye2015@outlook.com) 2018-08-03
*/ */
#ifndef __CLIP_H__ #ifndef __CLIP_H__
#define __CLIP_H__ #define __CLIP_H__
...@@ -29,16 +30,12 @@ namespace nts { // namespace nts(NiuTrans.Tensor) ...@@ -29,16 +30,12 @@ namespace nts { // namespace nts(NiuTrans.Tensor)
/* set every entry to its clip value */ /* set every entry to its clip value */
void _Clip(const XTensor * a, XTensor * b, DTYPE lower, DTYPE upper); void _Clip(const XTensor * a, XTensor * b, DTYPE lower, DTYPE upper);
/* /* set every entry to its clip value (do it on site)
set every entry to its clip value (do it on site) keep the result in the input tensor a and return nothing */
keep the result in the input tensor a and return nothing
*/
void _ClipMe(XTensor * a, DTYPE lower, DTYPE upper); void _ClipMe(XTensor * a, DTYPE lower, DTYPE upper);
/* /* set every entry to its clip value (return a XTensor structure)
set every entry to its clip value (return a XTensor structure) make a new tensor to keep the result and return it */
make a new tensor to keep the result and return it
*/
XTensor Clip(const XTensor & a, DTYPE lower, DTYPE upper); XTensor Clip(const XTensor & a, DTYPE lower, DTYPE upper);
/* /*
......
...@@ -132,7 +132,7 @@ where a and b are the scalar and bias respectively, and \epsilon is the adjustme ...@@ -132,7 +132,7 @@ where a and b are the scalar and bias respectively, and \epsilon is the adjustme
XTensor Normalize(const XTensor &input, int dim, const XTensor &mean, const XTensor &var, const XTensor &a, const XTensor &b, DTYPE epsilon) XTensor Normalize(const XTensor &input, int dim, const XTensor &mean, const XTensor &var, const XTensor &a, const XTensor &b, DTYPE epsilon)
{ {
XTensor output(&input); XTensor output(&input);
output.SetTMP(); output.SetTMPFlag();
/* call _Normalize function */ /* call _Normalize function */
_Normalize(&input, &output, dim, &mean, &var, &a, &b, epsilon); _Normalize(&input, &output, dim, &mean, &var, &a, &b, epsilon);
......
...@@ -90,7 +90,7 @@ make a new tensor to keep the result and return it ...@@ -90,7 +90,7 @@ make a new tensor to keep the result and return it
XTensor Power(const XTensor & a, DTYPE p) XTensor Power(const XTensor & a, DTYPE p)
{ {
XTensor b(&a); XTensor b(&a);
b.SetTMP(); b.SetTMPFlag();
/* call _Power function */ /* call _Power function */
_Power(&a, &b, p); _Power(&a, &b, p);
......
...@@ -105,7 +105,7 @@ b = a * scale + shift ...@@ -105,7 +105,7 @@ b = a * scale + shift
XTensor ScaleAndShift(const XTensor &a, DTYPE scale, DTYPE shift) XTensor ScaleAndShift(const XTensor &a, DTYPE scale, DTYPE shift)
{ {
XTensor b(&a); XTensor b(&a);
b.SetTMP(); b.SetTMPFlag();
/* call _ScaleAndShift function */ /* call _ScaleAndShift function */
_ScaleAndShift(&a, &b, scale, shift); _ScaleAndShift(&a, &b, scale, shift);
......
/* NiuTrans.Tensor - an open-source tensor library /* NiuTrans.Tensor - an open-source tensor library
* Copyright (C) 2017, Natural Language Processing Lab, Northestern University. * Copyright (C) 2017, Natural Language Processing Lab, Northestern University.
* All rights reserved. * All rights reserved.
* *
* Licensed under the Apache License, Version 2.0 (the "License"); * Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License. * you may not use this file except in compliance with the License.
* You may obtain a copy of the License at * You may obtain a copy of the License at
* *
* http://www.apache.org/licenses/LICENSE-2.0 * http://www.apache.org/licenses/LICENSE-2.0
* *
* Unless required by applicable law or agreed to in writing, software * Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS, * distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and * See the License for the specific language governing permissions and
* limitations under the License. * limitations under the License.
*/ */
/* /*
* $Created by: Xu Chen (email: hello_master1954@163.com) 2018-07-31 * $Created by: Xu Chen (email: hello_master1954@163.com) 2018-07-31
*/ */
#include <math.h> #include <math.h>
#include "../../XName.h" #include "../../XName.h"
...@@ -36,6 +37,11 @@ DTYPE round(DTYPE r) ...@@ -36,6 +37,11 @@ DTYPE round(DTYPE r)
return (r > 0.0) ? (DTYPE)floor(r + 0.5) : (DTYPE)ceil(r - 0.5); return (r > 0.0) ? (DTYPE)floor(r + 0.5) : (DTYPE)ceil(r - 0.5);
} }
DTYPE iszero(DTYPE r)
{
return (r == 0.0) ? (DTYPE)1.0 : (DTYPE)0.0;
}
#ifdef USE_CUDA #ifdef USE_CUDA
/* define three marco separately, specify the respective function names (GPU mode) */ /* define three marco separately, specify the respective function names (GPU mode) */
#define _SIMPLE_UNARY_FUNCTION(_funcName, _cudaFuncName, origFunc) \ #define _SIMPLE_UNARY_FUNCTION(_funcName, _cudaFuncName, origFunc) \
...@@ -65,7 +71,7 @@ void _funcNameMe(XTensor * a) \ ...@@ -65,7 +71,7 @@ void _funcNameMe(XTensor * a) \
XTensor funcName(const XTensor &a) \ XTensor funcName(const XTensor &a) \
{ \ { \
XTensor b(&a); \ XTensor b(&a); \
b.SetTMP(); \ b.SetTMPFlag(); \
_funcName(&a, &b); \ _funcName(&a, &b); \
XLink::MakeLink(&a, NULL, &b, operationId); \ XLink::MakeLink(&a, NULL, &b, operationId); \
return b; \ return b; \
...@@ -87,6 +93,10 @@ _SIMPLE_UNARY_FUNCTION(_Floor, _CudaFloor, floor) ...@@ -87,6 +93,10 @@ _SIMPLE_UNARY_FUNCTION(_Floor, _CudaFloor, floor)
_SIMPLE_UNARY_FUNCTION_ME(_FloorMe, _Floor) _SIMPLE_UNARY_FUNCTION_ME(_FloorMe, _Floor)
SIMPLE_UNARY_FUNCTION(Floor, _Floor, MATH_FLOOR) SIMPLE_UNARY_FUNCTION(Floor, _Floor, MATH_FLOOR)
_SIMPLE_UNARY_FUNCTION(_IsZero, _CudaIsZero, iszero)
_SIMPLE_UNARY_FUNCTION_ME(_IsZeroMe, _IsZero)
SIMPLE_UNARY_FUNCTION(IsZero, _IsZero, MATH_ISZERO)
_SIMPLE_UNARY_FUNCTION(_Log, _CudaLog, log) _SIMPLE_UNARY_FUNCTION(_Log, _CudaLog, log)
_SIMPLE_UNARY_FUNCTION_ME(_LogMe, _Log) _SIMPLE_UNARY_FUNCTION_ME(_LogMe, _Log)
SIMPLE_UNARY_FUNCTION(Log, _Log, MATH_LOG) SIMPLE_UNARY_FUNCTION(Log, _Log, MATH_LOG)
...@@ -140,7 +150,7 @@ void _funcNameMe(XTensor * a) \ ...@@ -140,7 +150,7 @@ void _funcNameMe(XTensor * a) \
XTensor funcName(const XTensor &a) \ XTensor funcName(const XTensor &a) \
{ \ { \
XTensor b(&a); \ XTensor b(&a); \
b.SetTMP(); \ b.SetTMPFlag(); \
_funcName(&a, &b); \ _funcName(&a, &b); \
XLink::MakeLink(&a, NULL, &b, operationId); \ XLink::MakeLink(&a, NULL, &b, operationId); \
return b; \ return b; \
...@@ -163,6 +173,10 @@ _SIMPLE_UNARY_FUNCTION(_Floor, floor) ...@@ -163,6 +173,10 @@ _SIMPLE_UNARY_FUNCTION(_Floor, floor)
_SIMPLE_UNARY_FUNCTION_ME(_FloorMe, _Floor) _SIMPLE_UNARY_FUNCTION_ME(_FloorMe, _Floor)
SIMPLE_UNARY_FUNCTION(Floor, _Floor, MATH_FLOOR) SIMPLE_UNARY_FUNCTION(Floor, _Floor, MATH_FLOOR)
_SIMPLE_UNARY_FUNCTION(_IsZero, iszero)
_SIMPLE_UNARY_FUNCTION_ME(_IsZeroMe, _IsZero)
SIMPLE_UNARY_FUNCTION(IsZero, _IsZero, MATH_ISZERO)
_SIMPLE_UNARY_FUNCTION(_Log, log) _SIMPLE_UNARY_FUNCTION(_Log, log)
_SIMPLE_UNARY_FUNCTION_ME(_LogMe, _Log) _SIMPLE_UNARY_FUNCTION_ME(_LogMe, _Log)
SIMPLE_UNARY_FUNCTION(Log, _Log, MATH_LOG) SIMPLE_UNARY_FUNCTION(Log, _Log, MATH_LOG)
......
/* NiuTrans.Tensor - an open-source tensor library /* NiuTrans.Tensor - an open-source tensor library
* Copyright (C) 2017, Natural Language Processing Lab, Northestern University. * Copyright (C) 2017, Natural Language Processing Lab, Northestern University.
* All rights reserved. * All rights reserved.
* *
* Licensed under the Apache License, Version 2.0 (the "License"); * Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License. * you may not use this file except in compliance with the License.
* You may obtain a copy of the License at * You may obtain a copy of the License at
* *
* http://www.apache.org/licenses/LICENSE-2.0 * http://www.apache.org/licenses/LICENSE-2.0
* *
* Unless required by applicable law or agreed to in writing, software * Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS, * distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and * See the License for the specific language governing permissions and
* limitations under the License. * limitations under the License.
*/ */
/* /*
* $Created by: Xu Chen (email: hello_master1954@163.com) 2018-07-31 * $Created by: Xu Chen (email: hello_master1954@163.com) 2018-07-31
*/ */
#include <math.h> #include <math.h>
#include "../../XDevice.h" #include "../../XDevice.h"
...@@ -28,17 +29,23 @@ ...@@ -28,17 +29,23 @@
namespace nts { namespace nts {
__device__ __device__
DTYPE CudaSquare(DTYPE x) DTYPE cudasquare(DTYPE x)
{ {
return x * x; return x * x;
} }
__device__ __device__
DTYPE CudaRound(DTYPE r) DTYPE cudaround(DTYPE r)
{ {
return (r > 0.0) ? (DTYPE)floor(r + 0.5) : (DTYPE)ceil(r - 0.5); return (r > 0.0) ? (DTYPE)floor(r + 0.5) : (DTYPE)ceil(r - 0.5);
} }
__device__
DTYPE cudaiszero(DTYPE r)
{
return (r == 0.0) ? (DTYPE)1.0 : (DTYPE)0.0;
}
#define SIMPLE_UNARY_FUNCTION_GPU(funcName, origFunc) \ #define SIMPLE_UNARY_FUNCTION_GPU(funcName, origFunc) \
__global__ \ __global__ \
void Kernel##funcName(DTYPE * a, DTYPE * b, int size) \ void Kernel##funcName(DTYPE * a, DTYPE * b, int size) \
...@@ -89,10 +96,11 @@ SIMPLE_UNARY_FUNCTION_GPU(Absolute, fabs) ...@@ -89,10 +96,11 @@ SIMPLE_UNARY_FUNCTION_GPU(Absolute, fabs)
SIMPLE_UNARY_FUNCTION_GPU(Ceil, ceil) SIMPLE_UNARY_FUNCTION_GPU(Ceil, ceil)
SIMPLE_UNARY_FUNCTION_GPU(Exp, exp) SIMPLE_UNARY_FUNCTION_GPU(Exp, exp)
SIMPLE_UNARY_FUNCTION_GPU(Floor, floor) SIMPLE_UNARY_FUNCTION_GPU(Floor, floor)
SIMPLE_UNARY_FUNCTION_GPU(IsZero, cudaiszero)
SIMPLE_UNARY_FUNCTION_GPU(Log, log) SIMPLE_UNARY_FUNCTION_GPU(Log, log)
SIMPLE_UNARY_FUNCTION_GPU(Round, CudaRound) SIMPLE_UNARY_FUNCTION_GPU(Round, cudaround)
SIMPLE_UNARY_FUNCTION_GPU(Sqrt, sqrt) SIMPLE_UNARY_FUNCTION_GPU(Sqrt, sqrt)
SIMPLE_UNARY_FUNCTION_GPU(Square, CudaSquare) SIMPLE_UNARY_FUNCTION_GPU(Square, cudasquare)
SIMPLE_UNARY_FUNCTION_GPU(Sin, sin) SIMPLE_UNARY_FUNCTION_GPU(Sin, sin)
SIMPLE_UNARY_FUNCTION_GPU(Cos, cos) SIMPLE_UNARY_FUNCTION_GPU(Cos, cos)
......
/* NiuTrans.Tensor - an open-source tensor library /* NiuTrans.Tensor - an open-source tensor library
* Copyright (C) 2017, Natural Language Processing Lab, Northestern University. * Copyright (C) 2017, Natural Language Processing Lab, Northestern University.
* All rights reserved. * All rights reserved.
* *
* Licensed under the Apache License, Version 2.0 (the "License"); * Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License. * you may not use this file except in compliance with the License.
* You may obtain a copy of the License at * You may obtain a copy of the License at
* *
* http://www.apache.org/licenses/LICENSE-2.0 * http://www.apache.org/licenses/LICENSE-2.0
* *
* Unless required by applicable law or agreed to in writing, software * Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS, * distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and * See the License for the specific language governing permissions and
* limitations under the License. * limitations under the License.
*/ */
/* /*
* $Created by: Xu Chen (email: hello_master1954@163.com) 2018-07-31 * $Created by: Xu Chen (email: hello_master1954@163.com) 2018-07-31
*/ */
#ifndef __UNARY_CUH__ #ifndef __UNARY_CUH__
#define __UNARY_CUH__ #define __UNARY_CUH__
...@@ -65,6 +66,15 @@ void KernelFloor(__half * a, __half * b, int size); ...@@ -65,6 +66,15 @@ void KernelFloor(__half * a, __half * b, int size);
/* set each entry to its floor value */ /* set each entry to its floor value */
void _CudaFloor(const XTensor * a, XTensor * b); void _CudaFloor(const XTensor * a, XTensor * b);
/* if source entry is zero, set target entry to be one, otherwise zero (CUDA Kernel) */
__global__
void KernelIsZero(DTYPE * a, DTYPE * b, int size);
/* if source entry is zero, set target entry to be one, otherwise zero (CUDA Kernel) with float16 data type*/
__global__
void KernelIsZero(__half * a, __half * b, int size);
/* if source entry is zero, set target entry to be one, otherwise zero */
void _CudaIsZero(const XTensor * a, XTensor * b);
/* set each entry to its logarithm value (CUDA Kernel) */ /* set each entry to its logarithm value (CUDA Kernel) */
__global__ __global__
void KernelLog(DTYPE * a, DTYPE * b, int size); void KernelLog(DTYPE * a, DTYPE * b, int size);
......
/* NiuTrans.Tensor - an open-source tensor library /* NiuTrans.Tensor - an open-source tensor library
* Copyright (C) 2017, Natural Language Processing Lab, Northestern University. * Copyright (C) 2017, Natural Language Processing Lab, Northestern University.
* All rights reserved. * All rights reserved.
* *
* Licensed under the Apache License, Version 2.0 (the "License"); * Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License. * you may not use this file except in compliance with the License.
* You may obtain a copy of the License at * You may obtain a copy of the License at
* *
* http://www.apache.org/licenses/LICENSE-2.0 * http://www.apache.org/licenses/LICENSE-2.0
* *
* Unless required by applicable law or agreed to in writing, software * Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS, * distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and * See the License for the specific language governing permissions and
* limitations under the License. * limitations under the License.
*/ */
/* /*
* $Created by: Xu Chen (email: hello_master1954@163.com) 2018-07-31 * $Created by: Xu Chen (email: hello_master1954@163.com) 2018-07-31
*/ */
#ifndef __UNARY_H__ #ifndef __UNARY_H__
#define __UNARY_H__ #define __UNARY_H__
...@@ -62,6 +63,15 @@ void _FloorMe(XTensor * a); ...@@ -62,6 +63,15 @@ void _FloorMe(XTensor * a);
make a new tensor to keep the result and return it */ make a new tensor to keep the result and return it */
XTensor Floor(const XTensor & a); XTensor Floor(const XTensor & a);
/* if source entry is zero, set target entry to be one, otherwise zero */
void _IsZero(const XTensor *a, XTensor *b);
/* if source entry is zero, set target entry to be one, otherwise zero (do it on site)
keep the result in the input tensor a and return nothing */
void _IsZeroMe(XTensor *a);
/* if source entry is zero, set target entry to be one, otherwise zero (return a XTensor structure)
make a new tensor to keep the result and return it */
XTensor IsZero(const XTensor &a);
/* set every entry to its logarithm value */ /* set every entry to its logarithm value */
void _Log(const XTensor * a, XTensor * b); void _Log(const XTensor * a, XTensor * b);
/* set every entry to its logarithm value (do it on site) /* set every entry to its logarithm value (do it on site)
......
...@@ -32,7 +32,7 @@ copy indexed sub-tensors ...@@ -32,7 +32,7 @@ copy indexed sub-tensors
>> t - the target tensor >> t - the target tensor
>> dim - the leading dimension to define "sub-tensors" >> dim - the leading dimension to define "sub-tensors"
e.g., for a tensor of size (3, 2, 4) and dim = 2, e.g., for a tensor of size (3, 2, 4) and dim = 2,
we have 4 sub-tensors of size (3,2) we have 4 sub-tensors of size (3, 2)
>> srcIndex - index of the source sub-tensors >> srcIndex - index of the source sub-tensors
>> indexSize - length of srcIndex (and tgtIndex) >> indexSize - length of srcIndex (and tgtIndex)
>> tgtIndex - index of the target sub-tensors >> tgtIndex - index of the target sub-tensors
...@@ -130,17 +130,25 @@ XTensor CopyIndexed(const XTensor &s, int dim, int * srcIndex, int indexSize, in ...@@ -130,17 +130,25 @@ XTensor CopyIndexed(const XTensor &s, int dim, int * srcIndex, int indexSize, in
float dr = (!s.isSparse) ? 1.0F : s.denseRatio; float dr = (!s.isSparse) ? 1.0F : s.denseRatio;
XTensor t(order, dimSize, s.dataType, dr, s.devID, s.mem); XTensor t(order, dimSize, s.dataType, dr, s.devID, s.mem);
t.SetTMP(); t.SetTMPFlag();
/* call _CopyIndexed function */ /* call _CopyIndexed function */
_CopyIndexed(&s, &t, dim, srcIndex, indexSize, tgtIndex, copyNum); _CopyIndexed(&s, &t, dim, srcIndex, indexSize, tgtIndex, copyNum);
/* care: we must malloc a new array for save index,
because the source indexs may be freed. */
int * saveSrcIndex = new int[indexSize];
memcpy(saveSrcIndex, srcIndex, indexSize * sizeof(int));
int * saveTgtIndex = new int[indexSize];
memcpy(saveTgtIndex, tgtIndex, indexSize * sizeof(int));
/* tensor connection */ /* tensor connection */
XLink::MakeLink(&s, NULL, &t, MOVEMENT_COPYINDEXED); XLink::MakeLink(&s, NULL, &t, MOVEMENT_COPYINDEXED);
XLink::AddParamToHeadInt(&t, dim); XLink::AddParamToHeadInt(&t, dim);
XLink::AddParamToHeadPointer(&t, srcIndex); XLink::AddParamToHeadPointer(&t, saveSrcIndex);
XLink::AddParamToHeadInt(&t, indexSize); XLink::AddParamToHeadInt(&t, indexSize);
XLink::AddParamToHeadPointer(&t, tgtIndex); XLink::AddParamToHeadPointer(&t, saveTgtIndex);
XLink::AddParamToHeadInt(&t, copyNum); XLink::AddParamToHeadInt(&t, copyNum);
/* destroy variables */ /* destroy variables */
......
/* NiuTrans.Tensor - an open-source tensor library /* NiuTrans.Tensor - an open-source tensor library
* Copyright (C) 2017, Natural Language Processing Lab, Northestern University. * Copyright (C) 2017, Natural Language Processing Lab, Northestern University.
* All rights reserved. * All rights reserved.
* *
* Licensed under the Apache License, Version 2.0 (the "License"); * Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License. * you may not use this file except in compliance with the License.
* You may obtain a copy of the License at * You may obtain a copy of the License at
* *
* http://www.apache.org/licenses/LICENSE-2.0 * http://www.apache.org/licenses/LICENSE-2.0
* *
* Unless required by applicable law or agreed to in writing, software * Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS, * distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and * See the License for the specific language governing permissions and
* limitations under the License. * limitations under the License.
*/ */
/* /*
* $Created by: XIAO Tong (email: xiaotong@mail.neu.edu.cn) 2018-04-24 * $Created by: XIAO Tong (email: xiaotong@mail.neu.edu.cn) 2018-04-24
*/ */
#ifndef __COPYINDEXED_H__ #ifndef __COPYINDEXED_H__
#define __COPYINDEXED_H__ #define __COPYINDEXED_H__
......
...@@ -108,7 +108,7 @@ make a new tensor to keep the result and return it ...@@ -108,7 +108,7 @@ make a new tensor to keep the result and return it
XTensor CopyValues(const XTensor &s, XStream * stream) XTensor CopyValues(const XTensor &s, XStream * stream)
{ {
XTensor t(&s); XTensor t(&s);
t.SetTMP(); t.SetTMPFlag();
/* call _CopyValues function */ /* call _CopyValues function */
_CopyValues(&s, &t, stream); _CopyValues(&s, &t, stream);
......
/* NiuTrans.Tensor - an open-source tensor library
* Copyright (C) 2017, Natural Language Processing Lab, Northestern University.
* All rights reserved.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
/*
* $Created by: Xu Chen (email: hello_master1954@163.com) 2018-09-18
*/
#include "Gather.h"
#include "CopyIndexed.h"
namespace nts{ // namespace nts(NiuTrans.Tensor)
/*
gather indexed sub-tensors
>> s - the source tensor
>> t - the target tensor
>> dim - the leading dimension to define "sub-tensors"
e.g., for a tensor of size (3, 2, 4) and dim = 2,
we have 4 sub-tensors of size (3, 2)
>> srcIndex - index of the source sub-tensors
>> indexSize - length of srcIndex (and tgtIndex)
*/
void _Gather(const XTensor * s, XTensor * t, int dim, int * srcIndex, int indexSize)
{
int * tgtIndex = new int[indexSize];
for(int i = 0; i < indexSize; i++)
tgtIndex[i] = i;
_CopyIndexed(s, t, dim, srcIndex, indexSize, tgtIndex, 1);
delete[] tgtIndex;
}
/*
gather indexed sub-tensors (return a XTensor structure)
make a new tensor to keep the result and return it
>> s - the source tensor
>> dim - the leading dimension to define "sub-tensors"
e.g., for a tensor of size (3, 2, 4) and dim = 2,
we have 4 sub-tensors of size (3, 2)
>> srcIndex - index of the source sub-tensors
>> indexSize - length of srcIndex (and tgtIndex)
<< return - the result of copying indexed sub-tensors
Notice: the index must be on the CPU!!!
*/
XTensor Gather(const XTensor &s, int dim, int * srcIndex, int indexSize)
{
int * tgtIndex = new int[indexSize];
for(int i = 0; i < indexSize; i++)
tgtIndex[i] = i;
/* call CopyIndexed function */
XTensor result;
result = CopyIndexed(s, dim, srcIndex, indexSize, tgtIndex, 1);
delete[] tgtIndex;
return result;
}
} // namespace nts(NiuTrans.Tensor)
\ No newline at end of file
/* NiuTrans.Tensor - an open-source tensor library
* Copyright (C) 2017, Natural Language Processing Lab, Northestern University.
* All rights reserved.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
/*
* $Created by: Xu Chen (email: hello_master1954@163.com) 2018-09-18
*/
#ifndef __GATHER_H__
#define __GATHER_H__
#include "../../XTensor.h"
namespace nts { // namespace nts(NiuTrans.Tensor)
/* gather selected sub-tensors */
void _Gather(const XTensor * s, XTensor * t, int dim, int * srcIndex, int indexSize);
/* gather selected sub-tensors (return a XTensor structure)
make a new tensor to keep the result and return it */
XTensor Gather(const XTensor &s, int dim, int * srcIndex, int indexSize);
} // namespace nts(NiuTrans.Tensor)
#endif // __GATHER_H__
\ No newline at end of file
/* NiuTrans.Tensor - an open-source tensor library
* Copyright (C) 2017, Natural Language Processing Lab, Northestern University.
* All rights reserved.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
/*
* $Created by: Xu Chen (email: hello_master1954@163.com) 2018-09-25
*/
#include "Spread.h"
#include "Spread.cuh"
namespace nts { // namespace nts(NiuTrans.Tensor)
/*
This is core assignment for spread function.
>> sData - the data pointer of the source tensor
>> cData - the data pointer of collection tensor
>> blockNum - number of data blocks
>> blockSizeSrc - size of source data block
>> blockSizeColl - size of source data block
>> stride - stride of a data block
*/
void _Assignment(DTYPE * sData, DTYPE * cData, int blockNum,
int blockSizeSrc, int blockSizeColl, int stride)
{
for (int i = 0; i < blockNum; i++) {
DTYPE * s = sData + blockSizeSrc * i;
DTYPE * c = cData + blockSizeColl * i;
for(int j = 0; j < stride; j++)
s[j] = c[j];
}
}
/*
spread a collection tensor to source tensor.
This is a inverse operation compared to gather.
>> source - the source tensor whose data would be modified
>> collection - the collection whose data would be spread to source tensor
>> dim - the leading dimension to define "sub-tensors"
e.g., for a tensor of size (3, 2, 4) and dim = 2,
we have 4 sub-tensors of size (3, 2)
>> srcIndex - index of the source sub-tensors
>> indexSize - length of srcIndex (and collIndex)
>> collIndex - index of the gathered sub-tensors
*/
void _Spread(XTensor * source, XTensor * collection, int dim,
int * srcIndex, int indexSize, int * collIndex)
{
int order = source->order;
int size = source->GetDim(dim);
CheckNTErrors(source->dataType == DEFAULT_DTYPE, "TODO!");
CheckNTErrors(dim >= 0 && dim < order, "Illegal dimension!");
for(int i = 0; i < order; i++){
if(i < dim){
CheckNTErrors(collection->GetDim(i) == source->GetDim(i), "Illegal dimension!");
}
else if(i > dim){
CheckNTErrors(collection->GetDim(i) == source->GetDim(i), "Illegal dimension!");
}
else{
CheckNTErrors(collection->GetDim(i) == indexSize, "Illegal dimension!");
}
}
#ifdef USE_CUDA
if(source->devID >= 0 && collection->devID >= 0) {
_CudaSpread(source, collection, dim, srcIndex, indexSize, collIndex);
return;
}
#endif
int blockSizeSrc = 1;
int blockSizeColl = 1;
int blockNum = 1;
int stride = 1;
for (int i = dim + 1; i < order; i++) {
stride *= source->GetDim(i);
}
blockSizeSrc = stride * source->GetDim(dim);
blockSizeColl = stride * collection->GetDim(dim);
blockNum = source->unitNum / blockSizeSrc;
DTYPE * sData = (DTYPE*)source->data;
DTYPE * cData = (DTYPE*)collection->data;
for(int i = 0; i < indexSize; i++){
int src = srcIndex[i];
int tgt = collIndex[i];
DTYPE * s = sData + src * stride;
DTYPE * c = cData + tgt * stride;
_Assignment(s, c, blockNum, blockSizeSrc, blockSizeColl, stride);
}
}
/*
This is core assignment for backward computation of gather function.
Care of the operator "+=" instead of "=".
>> sData - the data pointer of the source tensor
>> cData - the data pointer of collection tensor
>> blockNum - number of data blocks
>> blockSizeSrc - size of source data block
>> blockSizeColl - size of source data block
>> stride - stride of a data block
*/
void _AssignmentForGather(DTYPE * sData, DTYPE * cData, int blockNum,
int blockSizeSrc, int blockSizeColl, int stride)
{
for (int i = 0; i < blockNum; i++) {
DTYPE * s = sData + blockSizeSrc * i;
DTYPE * c = cData + blockSizeColl * i;
for(int j = 0; j < stride; j++)
s[j] += c[j];
}
}
/*
spread a collection tensor to source tensor.
And this is a special spread function for backward computation of gather function.
>> source - the source tensor whose data would be modified
>> collection - the collection whose data would be spread to source tensor
>> dim - the leading dimension to define "sub-tensors"
e.g., for a tensor of size (3, 2, 4) and dim = 2,
we have 4 sub-tensors of size (3, 2)
>> srcIndex - index of the source sub-tensors
>> indexSize - length of srcIndex (and collIndex)
>> collIndex - index of the gathered sub-tensors
*/
void _SpreadForGather(XTensor * source, XTensor * collection, int dim,
int * srcIndex, int indexSize, int * collIndex)
{
int order = source->order;
int size = source->GetDim(dim);
CheckNTErrors(source->dataType == DEFAULT_DTYPE, "TODO!");
CheckNTErrors(dim >= 0 && dim < order, "Illegal dimension!");
for(int i = 0; i < order; i++){
if(i < dim){
CheckNTErrors(collection->GetDim(i) == source->GetDim(i), "Illegal dimension!");
}
else if(i > dim){
CheckNTErrors(collection->GetDim(i) == source->GetDim(i), "Illegal dimension!");
}
else{
CheckNTErrors(collection->GetDim(i) == indexSize, "Illegal dimension!");
}
}
#ifdef USE_CUDA
if(source->devID >= 0 && collection->devID >= 0) {
_CudaSpreadForGather(source, collection, dim, srcIndex, indexSize, collIndex);
return;
}
#endif
int blockSizeSrc = 1;
int blockSizeColl = 1;
int blockNum = 1;
int stride = 1;
for (int i = dim + 1; i < order; i++) {
stride *= source->GetDim(i);
}
blockSizeSrc = stride * source->GetDim(dim);
blockSizeColl = stride * collection->GetDim(dim);
blockNum = source->unitNum / blockSizeSrc;
DTYPE * sData = (DTYPE*)source->data;
DTYPE * cData = (DTYPE*)collection->data;
for(int i = 0; i < indexSize; i++){
int src = srcIndex[i];
int tgt = collIndex[i];
DTYPE * s = sData + src * stride;
DTYPE * c = cData + tgt * stride;
_AssignmentForGather(s, c, blockNum, blockSizeSrc, blockSizeColl, stride);
}
}
} // namespace nts(NiuTrans.Tensor)
\ No newline at end of file
/* NiuTrans.Tensor - an open-source tensor library
* Copyright (C) 2017, Natural Language Processing Lab, Northestern University.
* All rights reserved.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
/*
* $Created by: Xu Chen (email: hello_master1954@163.com) 2018-09-25
*/
#ifndef __SPREAD_CUH__
#define __SPREAD_CUH__
#include "../../XTensor.h"
#include "../../XDevice.h"
#include "Spread.cuh"
namespace nts { // namespace nts(NiuTrans.Tensor)
/*
This is core assignment for spread function.
>> sData - the data pointer of the source tensor
>> cData - the data pointer of collection tensor
>> blockNum - the number of data blocks
>> blockSizeSrc - the size of source data block
>> blockSizeColl - the size of source data block
>> stride - the stride of a data block
*/
__global__
void KernelSpread(DTYPE * sData, DTYPE * cData, int blockNum,
int blockSizeSrc, int blockSizeColl, int stride)
{
/* block id */
int i = blockDim.x * blockIdx.x + threadIdx.x;
/* offset in each block */
int j = blockDim.y * blockIdx.y + threadIdx.y;
if(i >= blockNum || j >= stride)
return;
DTYPE * s = sData + blockSizeSrc * i;
DTYPE * c = cData + blockSizeColl * i;
s[j] = c[j];
}
/*
spread a collection tensor to source tensor (cuda version).
This is a inverse operation compared to gather.
>> source - the source tensor whose data would be modified
>> collection - the collection whose data would be spread to source tensor
>> dim - the leading dimension to define "sub-tensors"
e.g., for a tensor of size (3, 2, 4) and dim = 2,
we have 4 sub-tensors of size (3, 2)
>> srcIndex - index of the source sub-tensors
>> indexSize - length of srcIndex (and collIndex)
>> collIndex - index of the gathered sub-tensors
*/
void _CudaSpread(XTensor * source, XTensor * collection, int dim,
int * srcIndex, int indexSize, int * collIndex)
{
int order = source->order;
CheckNTErrors(source->dataType == DEFAULT_DTYPE, "TODO!");
CheckNTErrors(dim >= 0 && dim < order, "Illegal dimension!");
int blockSizeSrc = 1;
int blockSizeColl = 1;
int blockNum = 1;
int stride = 1;
for (int i = dim + 1; i < order; i++) {
stride *= source->GetDim(i);
}
blockSizeSrc = stride * source->GetDim(dim);
blockSizeColl = stride * collection->GetDim(dim);
blockNum = source->unitNum / blockSizeSrc;
int cudaGrids[3];
int cudaBlocks[3];
GDevs.GetCudaThread2D(source->devID, blockNum, stride, MAX_INT, cudaGrids, cudaBlocks);
dim3 blocks(cudaGrids[0], cudaGrids[1]);
dim3 threads(cudaBlocks[0], cudaBlocks[1]);
int devIDBackup;
ProtectCudaDev(source->devID, devIDBackup);
DTYPE * sData = (DTYPE*)source->data;
DTYPE * cData = (DTYPE*)collection->data;
for(int i = 0; i < indexSize; i++) {
int src = srcIndex[i];
int tgt = collIndex[i];
DTYPE * s = sData + src * stride;
DTYPE * c = cData + tgt * stride;
KernelSpread<<<blocks, threads >>>(s, c, blockNum, blockSizeSrc, blockSizeColl, stride);
}
BacktoCudaDev(source->devID, devIDBackup);
}
/*
This is core assignment for backward computation of gather function.
Care of the operator "+=" instead of "=".
>> sData - the data pointer of the source tensor
>> cData - the data pointer of collection tensor
>> blockNum - number of data blocks
>> blockSizeSrc - size of source data block
>> blockSizeColl - size of source data block
>> stride - stride of a data block
*/
__global__
void KernelSpreadForGather(DTYPE * sData, DTYPE * cData, int blockNum,
int blockSizeSrc, int blockSizeColl, int stride)
{
/* block id */
int i = blockDim.x * blockIdx.x + threadIdx.x;
/* offset in each block */
int j = blockDim.y * blockIdx.y + threadIdx.y;
if(i >= blockNum || j >= stride)
return;
DTYPE * s = sData + blockSizeSrc * i;
DTYPE * c = cData + blockSizeColl * i;
s[j] += c[j];
}
/*
spread a collection tensor to source tensor (cuda version).
And this is a special spread function for backward computation of gather function.
>> source - the source tensor whose data would be modified
>> collection - the collection whose data would be spread to source tensor
>> dim - the leading dimension to define "sub-tensors"
e.g., for a tensor of size (3, 2, 4) and dim = 2,
we have 4 sub-tensors of size (3, 2)
>> srcIndex - index of the source sub-tensors
>> indexSize - length of srcIndex (and collIndex)
>> collIndex - index of the gathered sub-tensors
*/
void _CudaSpreadForGather(XTensor * source, XTensor * collection, int dim,
int * srcIndex, int indexSize, int * collIndex)
{
int order = source->order;
CheckNTErrors(source->dataType == DEFAULT_DTYPE, "TODO!");
CheckNTErrors(dim >= 0 && dim < order, "Illegal dimension!");
int blockSizeSrc = 1;
int blockSizeColl = 1;
int blockNum = 1;
int stride = 1;
for (int i = dim + 1; i < order; i++) {
stride *= source->GetDim(i);
}
blockSizeSrc = stride * source->GetDim(dim);
blockSizeColl = stride * collection->GetDim(dim);
blockNum = source->unitNum / blockSizeSrc;
int cudaGrids[3];
int cudaBlocks[3];
GDevs.GetCudaThread2D(source->devID, blockNum, stride, MAX_INT, cudaGrids, cudaBlocks);
dim3 blocks(cudaGrids[0], cudaGrids[1]);
dim3 threads(cudaBlocks[0], cudaBlocks[1]);
int devIDBackup;
ProtectCudaDev(source->devID, devIDBackup);
DTYPE * sData = (DTYPE*)source->data;
DTYPE * cData = (DTYPE*)collection->data;
for(int i = 0; i < indexSize; i++) {
int src = srcIndex[i];
int tgt = collIndex[i];
DTYPE * s = sData + src * stride;
DTYPE * c = cData + tgt * stride;
KernelSpreadForGather<<<blocks, threads >>>(s, c, blockNum, blockSizeSrc, blockSizeColl, stride);
}
BacktoCudaDev(source->devID, devIDBackup);
}
} // namespace nts(NiuTrans.Tensor)
#endif // __SPREAD_CUH__
\ No newline at end of file
/* NiuTrans.Tensor - an open-source tensor library
* Copyright (C) 2017, Natural Language Processing Lab, Northestern University.
* All rights reserved.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
/*
* $Created by: Xu Chen (email: hello_master1954@163.com) 2018-09-25
*/
#ifndef __SPREAD_CUH__
#define __SPREAD_CUH__
#include "../../XTensor.h"
namespace nts { // namespace nts(NiuTrans.Tensor)
/* spread a collection tensor to source tensor (cuda version) */
void _CudaSpread(XTensor * source, XTensor * collection, int dim,
int * srcIndex, int indexSize, int * collIndex);
/* special spread function for backward computation of gather function (cuda version) */
void _CudaSpreadForGather(XTensor * source, XTensor * collection, int dim,
int * srcIndex, int indexSize, int * collIndex);
} // namespace nts(NiuTrans.Tensor)
#endif // __SPREAD_CUH__
\ No newline at end of file
/* NiuTrans.Tensor - an open-source tensor library
* Copyright (C) 2017, Natural Language Processing Lab, Northestern University.
* All rights reserved.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
/*
* $Created by: Xu Chen (email: hello_master1954@163.com) 2018-09-25
*/
#ifndef __SPREAD_H__
#define __SPREAD_H__
#include "../../XTensor.h"
namespace nts { // namespace nts(NiuTrans.Tensor)
/* spread a collection tensor to source tensor */
void _Spread(XTensor * source, XTensor * collection, int dim,
int * srcIndex, int indexSize, int * collIndex);
/* spread a collection tensor to source tensor (return a XTensor structure)
make a new tensor to keep the result and return it */
void Spread(XTensor * source, XTensor * collection, int dim,
int * srcIndex, int indexSize, int * collIndex);
/* special spread function for backward computation of gather function */
void _SpreadForGather(XTensor * source, XTensor * collection, int dim,
int * srcIndex, int indexSize, int * collIndex);
} // namespace nts(NiuTrans.Tensor)
#endif // __SPREAD_H__
\ No newline at end of file
...@@ -114,7 +114,7 @@ XTensor ReduceMax(const XTensor &input, int dim) ...@@ -114,7 +114,7 @@ XTensor ReduceMax(const XTensor &input, int dim)
float dr = (!input.isSparse) ? 1.0F : input.denseRatio; float dr = (!input.isSparse) ? 1.0F : input.denseRatio;
XTensor output(order, dimSize, input.dataType, dr, input.devID, input.mem); XTensor output(order, dimSize, input.dataType, dr, input.devID, input.mem);
output.SetTMP(); output.SetTMPFlag();
/* call _ReduceMax function */ /* call _ReduceMax function */
_ReduceMax(&input, &output, dim); _ReduceMax(&input, &output, dim);
......
...@@ -71,7 +71,7 @@ XTensor ReduceMean(const XTensor &input, int dim) ...@@ -71,7 +71,7 @@ XTensor ReduceMean(const XTensor &input, int dim)
float dr = (!input.isSparse) ? 1.0F : input.denseRatio; float dr = (!input.isSparse) ? 1.0F : input.denseRatio;
XTensor output(order, dimSize, input.dataType, dr, input.devID, input.mem); XTensor output(order, dimSize, input.dataType, dr, input.devID, input.mem);
output.SetTMP(); output.SetTMPFlag();
/* call _ReduceMean function */ /* call _ReduceMean function */
_ReduceMean(&input, &output, dim); _ReduceMean(&input, &output, dim);
......
...@@ -225,7 +225,7 @@ XTensor ReduceSum(const XTensor &input, int dim, const XTensor &shift, DTYPE pow ...@@ -225,7 +225,7 @@ XTensor ReduceSum(const XTensor &input, int dim, const XTensor &shift, DTYPE pow
float dr = (!input.isSparse) ? 1.0F : input.denseRatio; float dr = (!input.isSparse) ? 1.0F : input.denseRatio;
XTensor output(order, dimSize, input.dataType, dr, input.devID, input.mem); XTensor output(order, dimSize, input.dataType, dr, input.devID, input.mem);
output.SetTMP(); output.SetTMPFlag();
/* call _ReduceSum function */ /* call _ReduceSum function */
_ReduceSum(&input, &output, dim, &shift, power, isExp); _ReduceSum(&input, &output, dim, &shift, power, isExp);
...@@ -271,7 +271,7 @@ XTensor ReduceSum(const XTensor &input, int dim, DTYPE power, bool isExp) ...@@ -271,7 +271,7 @@ XTensor ReduceSum(const XTensor &input, int dim, DTYPE power, bool isExp)
float dr = (!input.isSparse) ? 1.0F : input.denseRatio; float dr = (!input.isSparse) ? 1.0F : input.denseRatio;
XTensor output(order, dimSize, input.dataType, dr, input.devID, input.mem); XTensor output(order, dimSize, input.dataType, dr, input.devID, input.mem);
output.SetTMP(); output.SetTMPFlag();
/* call _ReduceSum function */ /* call _ReduceSum function */
_ReduceSum(&input, &output, dim, NULL, power, isExp); _ReduceSum(&input, &output, dim, NULL, power, isExp);
......
/* NiuTrans.Tensor - an open-source tensor library
* Copyright (C) 2017, Natural Language Processing Lab, Northestern University.
* All rights reserved.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
/*
* $Created by: Xu Chen (email: hello_master1954@163.com) 2018-09-27
*/
#include "ReduceSumAll.h"
#include "ReduceSum.h"
#include "../movement/CopyValues.h"
namespace nts{ // namespace nts(NiuTrans.Tensor)
int * getDimSize(const XTensor * tensor, int n)
{
int order = tensor->order;
int * dimSize = new int[order - 1];
for (int i = 0; i < order; i++) {
if(i < n)
dimSize[i] = tensor->dimSize[i];
else if(i > n)
dimSize[i - 1] = tensor->dimSize[i];
}
return dimSize;
}
/*
sum all the items of the tensor (It should be optimized!)
>> source - the inpute tensor
<< return - the total summation
*/
DTYPE _ReduceSumAll(XTensor * source)
{
int order = source->order;
DTYPE summation;
XTensor * big = NewTensor(source);
_CopyValues(source, big);
for(int i = 0; i < order; i++) {
if(i == order - 1)
big->Reshape(big->unitNum, 1);
int * dimSize;
dimSize = getDimSize(big, 0);
XTensor * little = NewTensor(big->order - 1, dimSize, source->dataType, source->denseRatio, source->devID, source->mem);
_ReduceSum(big, little, 0);
delete big;
delete dimSize;
big = NewTensor(little);
_CopyValues(little, big);
delete little;
}
summation = big->Get1D(0);
delete big;
return summation;
}
/*
sum all the items of the tensor
>> source - the inpute tensor
<< return - the total summation
*/
DTYPE ReduceSumAll(XTensor & source)
{
return _ReduceSumAll(&source);
}
} // namespace nts(NiuTrans.Tensor)
\ No newline at end of file
/* NiuTrans.Tensor - an open-source tensor library
* Copyright (C) 2017, Natural Language Processing Lab, Northestern University.
* All rights reserved.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
/*
* $Created by: Xu Chen (email: hello_master1954@163.com) 2018-09-27
*/
#ifndef __REDUCESUMALL_H__
#define __REDUCESUMALL_H__
#include "../../XTensor.h"
namespace nts{ // namespace nts(NiuTrans.Tensor)
/* sum all the items of the tensor */
DTYPE _ReduceSumAll(XTensor * source);
/* sum all the items of the tensor */
DTYPE ReduceSumAll(XTensor & source);
} // namespace nts(NiuTrans.Tensor)
#endif // __REDUCESUMALL_H__
\ No newline at end of file
...@@ -67,7 +67,7 @@ XTensor ReduceSumSquared(const XTensor &input, int dim, const XTensor &shift) ...@@ -67,7 +67,7 @@ XTensor ReduceSumSquared(const XTensor &input, int dim, const XTensor &shift)
float dr = (!input.isSparse) ? 1.0F : input.denseRatio; float dr = (!input.isSparse) ? 1.0F : input.denseRatio;
XTensor output(order, dimSize, input.dataType, dr, input.devID, input.mem); XTensor output(order, dimSize, input.dataType, dr, input.devID, input.mem);
output.SetTMP(); output.SetTMPFlag();
/* call _ReduceSumSquared function */ /* call _ReduceSumSquared function */
_ReduceSumSquared(&input, &output, dim, &shift); _ReduceSumSquared(&input, &output, dim, &shift);
......
...@@ -70,7 +70,7 @@ XTensor ReduceVariance(const XTensor &input, int dim, const XTensor &mean) ...@@ -70,7 +70,7 @@ XTensor ReduceVariance(const XTensor &input, int dim, const XTensor &mean)
float dr = (!input.isSparse) ? 1.0F : input.denseRatio; float dr = (!input.isSparse) ? 1.0F : input.denseRatio;
XTensor output(order, dimSize, input.dataType, dr, input.devID, input.mem); XTensor output(order, dimSize, input.dataType, dr, input.devID, input.mem);
output.SetTMP(); output.SetTMPFlag();
/* call _ReduceVariance function */ /* call _ReduceVariance function */
_ReduceVariance(&input, &output, dim, &mean); _ReduceVariance(&input, &output, dim, &mean);
......
...@@ -93,7 +93,7 @@ XTensor Concatenate(const XList &smalls, int dim) ...@@ -93,7 +93,7 @@ XTensor Concatenate(const XList &smalls, int dim)
float dr = (!tensor->isSparse) ? 1.0F : tensor->denseRatio; float dr = (!tensor->isSparse) ? 1.0F : tensor->denseRatio;
XTensor big(order, dimSize, tensor->dataType, dr, tensor->devID, tensor->mem); XTensor big(order, dimSize, tensor->dataType, dr, tensor->devID, tensor->mem);
big.SetTMP(); big.SetTMPFlag();
/* call _Merge function */ /* call _Merge function */
_Merge(&smalls, &big, dim); _Merge(&smalls, &big, dim);
...@@ -121,7 +121,7 @@ XTensor Concatenate(const XList &smalls, int dim) ...@@ -121,7 +121,7 @@ XTensor Concatenate(const XList &smalls, int dim)
float dr = (!tensor->isSparse) ? 1.0F : tensor->denseRatio; float dr = (!tensor->isSparse) ? 1.0F : tensor->denseRatio;
XTensor big(order, dimSize, tensor->dataType, dr, tensor->devID, tensor->mem); XTensor big(order, dimSize, tensor->dataType, dr, tensor->devID, tensor->mem);
big.SetTMP(); big.SetTMPFlag();
/* call _ConcatenateSolely function */ /* call _ConcatenateSolely function */
_ConcatenateSolely(&smalls, &big, dim); _ConcatenateSolely(&smalls, &big, dim);
...@@ -194,7 +194,7 @@ XTensor Concatenate(const XTensor &smallA, const XTensor &smallB, int dim) ...@@ -194,7 +194,7 @@ XTensor Concatenate(const XTensor &smallA, const XTensor &smallB, int dim)
float dr = (!tensor->isSparse) ? 1.0F : tensor->denseRatio; float dr = (!tensor->isSparse) ? 1.0F : tensor->denseRatio;
XTensor big(order, dimSize, tensor->dataType, dr, tensor->devID, tensor->mem); XTensor big(order, dimSize, tensor->dataType, dr, tensor->devID, tensor->mem);
big.SetTMP(); big.SetTMPFlag();
/* call _Merge function */ /* call _Merge function */
_Merge(&smalls, &big, dim); _Merge(&smalls, &big, dim);
...@@ -222,7 +222,7 @@ XTensor Concatenate(const XTensor &smallA, const XTensor &smallB, int dim) ...@@ -222,7 +222,7 @@ XTensor Concatenate(const XTensor &smallA, const XTensor &smallB, int dim)
float dr = (!tensor->isSparse) ? 1.0F : tensor->denseRatio; float dr = (!tensor->isSparse) ? 1.0F : tensor->denseRatio;
XTensor big(order, dimSize, tensor->dataType, dr, tensor->devID, tensor->mem); XTensor big(order, dimSize, tensor->dataType, dr, tensor->devID, tensor->mem);
big.SetTMP(); big.SetTMPFlag();
/* call _ConcatenateSolely function */ /* call _ConcatenateSolely function */
_ConcatenateSolely(&smalls, &big, dim); _ConcatenateSolely(&smalls, &big, dim);
......
...@@ -183,7 +183,7 @@ XTensor Merge(const XTensor &s, int whereToMerge, int leadingDim) ...@@ -183,7 +183,7 @@ XTensor Merge(const XTensor &s, int whereToMerge, int leadingDim)
float dr = (!s.isSparse) ? 1.0F : s.denseRatio; float dr = (!s.isSparse) ? 1.0F : s.denseRatio;
XTensor t(order, dimSize, s.dataType, dr, s.devID, s.mem); XTensor t(order, dimSize, s.dataType, dr, s.devID, s.mem);
t.SetTMP(); t.SetTMPFlag();
/* call _Merge function */ /* call _Merge function */
_Merge(&s, &t, whereToMerge, leadingDim); _Merge(&s, &t, whereToMerge, leadingDim);
...@@ -334,7 +334,7 @@ XTensor Merge(const XList &smalls, int whereToMerge) ...@@ -334,7 +334,7 @@ XTensor Merge(const XList &smalls, int whereToMerge)
float dr = (!tensor->isSparse) ? 1.0F : tensor->denseRatio; float dr = (!tensor->isSparse) ? 1.0F : tensor->denseRatio;
XTensor big(order, dimSize, tensor->dataType, dr, tensor->devID, tensor->mem); XTensor big(order, dimSize, tensor->dataType, dr, tensor->devID, tensor->mem);
big.SetTMP(); big.SetTMPFlag();
/* call _Merge function */ /* call _Merge function */
_Merge(&smalls, &big, whereToMerge); _Merge(&smalls, &big, whereToMerge);
...@@ -371,7 +371,7 @@ XTensor Merge(const XTensor &smallA, const XTensor &smallB, int whereToMerge) ...@@ -371,7 +371,7 @@ XTensor Merge(const XTensor &smallA, const XTensor &smallB, int whereToMerge)
float dr = (!smallA.isSparse) ? 1.0F : smallA.denseRatio; float dr = (!smallA.isSparse) ? 1.0F : smallA.denseRatio;
XTensor big(order, dimSize, smallA.dataType, dr, smallA.devID, smallA.mem); XTensor big(order, dimSize, smallA.dataType, dr, smallA.devID, smallA.mem);
big.SetTMP(); big.SetTMPFlag();
XList smalls(2); XList smalls(2);
smalls.Add(&smallA); smalls.Add(&smallA);
......
/* NiuTrans.Tensor - an open-source tensor library
* Copyright (C) 2017, Natural Language Processing Lab, Northestern University.
* All rights reserved.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
/*
* $Created by: Xu Chen (email: hello_master1954@163.com) 2018-09-25
*/
#include "../../XTensor.h"
#include "../../XName.h"
#include "../movement/CopyValues.h"
#include "Reshape.h"
namespace nts { // namespace nts(NiuTrans.Tensor)
/*
reshape the tensor
>> s - the input tensor
>> order - order of the tensor
>> dimSize - the size of each dimension
<< return - the output tensor
*/
XTensor Reshape(XTensor &s, int order, int * dimSize)
{
XTensor t(&s);
t.SetTMPFlag();
_CopyValues(&s, &t);
int oriOrder = s.order;
int * oriDimSize = new int[order];
memcpy(oriDimSize, s.dimSize, sizeof(int) * order);
/* call Reshape function */
t.Reshape(order, dimSize);
/* tensor connections */
XLink::MakeLink(&s, NULL, &t, SHAPE_RESHAPE);
XLink::AddParamToHeadInt(&t, oriOrder);
XLink::AddParamToHeadPointer(&t, oriDimSize);
return t;
}
} // namespace nts(NiuTrans.Tensor)
/* NiuTrans.Tensor - an open-source tensor library
* Copyright (C) 2017, Natural Language Processing Lab, Northestern University.
* All rights reserved.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
/*
* $Created by: Xu Chen (email: hello_master1954@163.com) 2018-09-25
*/
#ifndef __RESHAPE_H__
#define __RESHAPE_H__
#include "../../XTensor.h"
namespace nts { // namespace nts(NiuTrans.Tensor)
/* reshape the tensor */
XTensor Reshape(XTensor &s, int order, int * dimSize);
} // namespace nts(NiuTrans.Tensor)
#endif // __RESHAPE_H__
...@@ -184,7 +184,7 @@ XTensor Split(const XTensor &s, int whereToSplit, int splitNum) ...@@ -184,7 +184,7 @@ XTensor Split(const XTensor &s, int whereToSplit, int splitNum)
float dr = (!s.isSparse) ? 1.0F : s.denseRatio; float dr = (!s.isSparse) ? 1.0F : s.denseRatio;
XTensor t(order, dimSize, s.dataType, dr, s.devID, s.mem); XTensor t(order, dimSize, s.dataType, dr, s.devID, s.mem);
t.SetTMP(); t.SetTMPFlag();
/* call _Split function */ /* call _Split function */
_Split(&s, &t, whereToSplit, splitNum); _Split(&s, &t, whereToSplit, splitNum);
......
/* NiuTrans.Tensor - an open-source tensor library
* Copyright (C) 2017, Natural Language Processing Lab, Northestern University.
* All rights reserved.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
/*
* $Created by: Xu Chen (email: hello_master1954@163.com) 2018-09-27
*/
#include "Squeeze.h"
#include "../movement/CopyValues.h"
#include "../../XName.h"
namespace nts{ // namespace nts(NiuTrans.Tensor)
/*
squeeze the tensor along the specified dimension
>> source - the input tensor
>> target - the output tensor
>> leadingDim - the dimension that we would squeeze
if leadingDim = -1, squeeze all dimensions that are 1
else, squeeze the specified dimension
*/
void _Squeeze(XTensor * source, XTensor * target, int leadingDim)
{
int order = target->order;
CheckNTErrors(XTensor::IsSameShaped(source, target),
"The source and target tensor must be of the same size!");
CheckNTErrors(leadingDim >= -1 && leadingDim < order,
"Wrong leading dimension");
_CopyValues(source, target);
if(leadingDim < 0) {
int * newDimSize = new int[order];
int newOrder = 0;
for(int i = 0; i < order; i++) {
int dim = source->GetDim(i);
if(dim > 1) {
newDimSize[newOrder] = dim;
newOrder += 1;
}
}
target->Reshape(newOrder, newDimSize);
delete[] newDimSize;
}
else {
if(source->GetDim(leadingDim) > 1)
return;
int newOrder = order - 1;
int * newDimSize = new int[newOrder];
for(int i = 0; i < order; i++)
if(i < leadingDim)
newDimSize[i] = source->GetDim(i);
else if(i > leadingDim)
newDimSize[i - 1] = source->GetDim(i);
target->Reshape(newOrder, newDimSize);
delete[] newDimSize;
}
}
/*
squeeze the tensor along the specified dimension (do it on site)
keep the result in the input tensor a and return nothing
>> source - the input tensor
>> leadingDim - the dimension that we would squeeze
if leadingDim = -1, squeeze all dimensions that are 1
else, squeeze the specified dimension
*/
void _SqueezeMe(XTensor * source, int leadingDim)
{
_Squeeze(source, source, leadingDim);
}
/*
squeeze the tensor along the specified dimension (return a XTensor structure)
make a new tensor to keep the result and return it
>> source - the input tensor
>> leadingDim - the dimension that we would squeeze
if leadingDim = -1, squeeze all dimensions that are 1
else, squeeze the specified dimension
<< return - the output tensor after squeeze operation
*/
XTensor Squeeze(XTensor & source, int leadingDim)
{
XTensor target(&source);
target.SetTMPFlag();
/* call _Squeeze function */
_Squeeze(&source, &target, leadingDim);
/* tensor connections */
XLink::MakeLink(&source, NULL, &target, SHAPE_SQUEEZE);
return target;
}
} // namespace nts(NiuTrans.Tensor)
\ No newline at end of file
/* NiuTrans.Tensor - an open-source tensor library
* Copyright (C) 2017, Natural Language Processing Lab, Northestern University.
* All rights reserved.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
/*
* $Created by: Xu Chen (email: hello_master1954@163.com) 2018-09-27
*/
#ifndef __SQUEEZE_H__
#define __SQUEEZE_H__
#include "../../XTensor.h"
namespace nts{ // namespace nts(NiuTrans.Tensor)
/* squeeze the tensor along the specified dimension */
void _Squeeze(XTensor * source, XTensor * target, int leadingDim = -1);
/* squeeze the tensor along the specified dimension (do it on site)
keep the result in the input tensor a and return nothing */
void _SqueezeMe(XTensor * source, int leadingDim = -1);
/* squeeze the tensor along the specified dimension (return a XTensor structure)
make a new tensor to keep the result and return it */
XTensor Squeeze(XTensor & source, int leadingDim = -1);
} // namespace nts(NiuTrans.Tensor)
#endif // __SQUEEZE_H__
\ No newline at end of file
...@@ -138,7 +138,7 @@ XTensor Transpose(const XTensor &a, const int i, const int j) ...@@ -138,7 +138,7 @@ XTensor Transpose(const XTensor &a, const int i, const int j)
float dr = (!a.isSparse) ? 1.0F : a.denseRatio; float dr = (!a.isSparse) ? 1.0F : a.denseRatio;
XTensor b(order, dimSize, a.dataType, dr, a.devID, a.mem); XTensor b(order, dimSize, a.dataType, dr, a.devID, a.mem);
b.SetTMP(); b.SetTMPFlag();
/* call _Transpose function */ /* call _Transpose function */
_Transpose(&a, &b, i, j); _Transpose(&a, &b, i, j);
......
...@@ -122,7 +122,7 @@ XTensor Unsqueeze(const XTensor &a, int dim, int dSize) ...@@ -122,7 +122,7 @@ XTensor Unsqueeze(const XTensor &a, int dim, int dSize)
float dr = (!a.isSparse) ? 1.0F : a.denseRatio; float dr = (!a.isSparse) ? 1.0F : a.denseRatio;
XTensor b(order, dimSize, a.dataType, dr, a.devID, a.mem); XTensor b(order, dimSize, a.dataType, dr, a.devID, a.mem);
b.SetTMP(); b.SetTMPFlag();
/* call _Unsqueeze function */ /* call _Unsqueeze function */
_Unsqueeze(&a, &b, dim, dSize); _Unsqueeze(&a, &b, dim, dSize);
......
/* NiuTrans.Tensor - an open-source tensor library /* NiuTrans.Tensor - an open-source tensor library
* Copyright (C) 2017, Natural Language Processing Lab, Northestern University. * Copyright (C) 2017, Natural Language Processing Lab, Northestern University.
* All rights reserved. * All rights reserved.
* *
* Licensed under the Apache License, Version 2.0 (the "License"); * Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License. * you may not use this file except in compliance with the License.
* You may obtain a copy of the License at * You may obtain a copy of the License at
* *
* http://www.apache.org/licenses/LICENSE-2.0 * http://www.apache.org/licenses/LICENSE-2.0
* *
* Unless required by applicable law or agreed to in writing, software * Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS, * distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and * See the License for the specific language governing permissions and
* limitations under the License. * limitations under the License.
*/ */
/* /*
* $Created by: XIAO Tong (email: xiaotong@mail.neu.edu.cn) 2018-04-24 * $Created by: XIAO Tong (email: xiaotong@mail.neu.edu.cn) 2018-04-24
*/ */
#ifndef __UNSQUEEZE_H__ #ifndef __UNSQUEEZE_H__
#define __UNSQUEEZE_H__ #define __UNSQUEEZE_H__
...@@ -26,14 +26,13 @@ ...@@ -26,14 +26,13 @@
namespace nts { // namespace nts(NiuTrans.Tensor) namespace nts { // namespace nts(NiuTrans.Tensor)
/* insert a dimension by copying the blocks for x times (where x is the size of the inerted dimension) */ /* insert a dimension by copying the blocks for x times
(where x is the size of the inerted dimension) */
void _Unsqueeze(const XTensor * a, XTensor * b, int dim, int dSize); void _Unsqueeze(const XTensor * a, XTensor * b, int dim, int dSize);
/* /* insert a dimension by copying the blocks for x times
insert a dimension by copying the blocks for x times (where x is the size of the inerted dimension) (return a XTensor structure)
(where x is the size of the inerted dimension) (return a XTensor structure) make a new tensor to keep the result and return it */
make a new tensor to keep the result and return it
*/
XTensor Unsqueeze(const XTensor &a, int dim, int dSize); XTensor Unsqueeze(const XTensor &a, int dim, int dSize);
} // namespace nts(NiuTrans.Tensor) } // namespace nts(NiuTrans.Tensor)
......
/* NiuTrans.Tensor - an open-source tensor library
* Copyright (C) 2017, Natural Language Processing Lab, Northestern University.
* All rights reserved.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
/*
* $Created by: Xu Chen (email: hello_master1954@163.com) 2018-09-17
*/
#include <math.h>
#include "CrossEntropy.h"
#include "CrossEntropy.cuh"
#include "../core/arithmetic/MultiplyDim.h"
#include "../core/arithmetic/Multiply.h"
#include "../core/math/Unary.h"
#include "../core/math/ScaleAndShift.h"
#include "../core/arithmetic/Negate.h"
#include "../core/reduce/ReduceSum.h"
#include "../core/reduce/ReduceSumAll.h"
namespace nts{ // namespace nts(NiuTrans.Tensor)
/*
compute the cross entropy loss
loss = sum_{i} (-gold_i * log(output_i))
where gold and output are distributions
>> output - model prediction
>> gold - gold standard
>> loss - compute loss
>> weight - a rescaling weight given to each class
>> padding - specify a target value that is ignored and does not contribute to the loss computation
>> leadingDim - the leading dimension for the output
*/
void _CrossEntropy(const XTensor * output, const XTensor * gold,
XTensor * loss, const XTensor * weight,
const XTensor * padding, int leadingDim)
{
int n = leadingDim < 0 ? output->order - 1 : leadingDim;
CheckNTErrors(n >= 0 && n < output->order, "Wrong leadingDim!");
int unitNum = output->dimSize[n];
CheckNTErrors(XTensor::IsSameShaped(output, gold),
"The output tensor and gold tensor must be of the same size!");
CheckNTErrors(weight == NULL || weight->unitNum == unitNum, "Wrong weight tensor!");
CheckNTErrors(padding == NULL || XTensor::IsSameShaped(padding, loss), "The loss tensor and padding tensor must be same shape!");
CheckNTErrors(loss->order == output->order - 1, "Wrong loss dimension!");
CheckNTErrors(gold->dataType == DEFAULT_DTYPE && output->dataType == DEFAULT_DTYPE, "TODO!");
XTensor * logInter = NewTensorBuf(output, output->devID, output->mem);
XTensor * mulInter = NewTensorBuf(output, output->devID, output->mem);
XTensor * negInter = NewTensorBuf(output, output->devID, output->mem);
XTensor * logBuf = NewTensorBuf(output, output->devID, output->mem);
XTensor * mulBuf = NewTensorBuf(output, output->devID, output->mem);
XTensor * negBuf = NewTensorBuf(output, output->devID, output->mem);
/* l = log(output) */
_Log(output, logBuf);
if(weight != NULL){
XTensor * weightBuf = NewTensorBuf(output, output->devID, output->mem);
/* multiply gold and weight by broadcast wg = mulDim(g * w) */
_MultiplyDim(gold, weight, weightBuf, n, 0);
/* multiply weighted gold and log(output) wgl = mul(wg, l) */
_Multiply(weightBuf, logBuf, mulBuf, 0);
DelTensorBuf(weightBuf);
}
else{
/* multiply gold and log(output) gl = mul(g, l) */
_Multiply(gold, logBuf, mulBuf, 0);
}
/* negate multiply result n = negate(mul) */
_NegateMe(mulBuf);
_ReduceSum(mulBuf, loss, n);
DelTensorBuf(negInter);
DelTensorBuf(mulInter);
DelTensorBuf(logInter);
}
/*
compute the cross entropy loss (implementation manually)
loss = sum_{i} (-gold_i * log(output_i))
where gold and output are distributions
>> output - model prediction
>> gold - gold standard
>> loss - compute loss
>> weight - a rescaling weight given to each class
>> padding - specify a target value that is ignored and does not contribute to the loss computation
>> leadingDim - the leading dimension for the output
*/
void _CrossEntropyManual(const XTensor * output, const XTensor * gold,
XTensor * loss, const XTensor * weight,
const XTensor * padding, int leadingDim)
{
#ifdef USE_CUDA
if(output->devID >= 0) {
_CudaCrossEntropyManual(output, gold, loss, weight, padding, leadingDim);
return;
}
#endif
int order = output->order;
int n = leadingDim < 0 ? output->order - 1 : leadingDim;
int leadingDimSize = output->GetDim(n);
CheckNTErrors(n >= 0 && n < output->order,
"Wrong leadingDim!");
CheckNTErrors(XTensor::IsSameShaped(output, gold),
"The output tensor and gold tensor must be of the same size!");
CheckNTErrors(weight == NULL || weight->unitNum == leadingDimSize,
"Wrong weight tensor!");
CheckNTErrors(padding == NULL || XTensor::IsSameShaped(padding, loss),
"The loss tensor and padding tensor must be same shape!");
CheckNTErrors(loss->order == output->order - 1,
"Wrong loss dimension!");
CheckNTErrors(gold->dataType == DEFAULT_DTYPE && output->dataType == DEFAULT_DTYPE,
"TODO!");
int blockNum = 1;
int blockSize = 1;
int stride = 1;
for(int i = n + 1; i < order; i++)
stride *= output->GetDim(i);
blockSize = stride * leadingDimSize;
blockNum = output->unitNum / blockSize;
DTYPE * outputData = (DTYPE*)output->data;
DTYPE * goldData = (DTYPE*)gold->data;
DTYPE * lossData = (DTYPE*)loss->data;
DTYPE tmpLoss;
if(weight == NULL) {
if(padding == NULL) {
for(int i = 0; i < blockNum; i++) {
int beg = i * blockSize;
tmpLoss = 0;
for(int j = 0; j < blockSize; j++)
tmpLoss += -(*(goldData + beg + j)) *
(DTYPE)log(*(outputData + beg + j));
*(lossData + i) = tmpLoss;
}
}
else {
DTYPE * paddingData = (DTYPE*)padding->data;
for(int i = 0; i < blockNum; i++) {
int beg = i * blockSize;
if(*(paddingData + i) == 0)
*(lossData + i) = 0;
else{
tmpLoss = 0;
for(int j = 0; j < blockSize; j++)
tmpLoss += -(*(goldData + beg + j)) *
(DTYPE)log(*(outputData + beg + j));
*(lossData + i) = tmpLoss;
}
}
}
}
else {
DTYPE * weightData = (DTYPE*)weight->data;
if(padding == NULL) {
for(int i = 0; i < blockNum; i++) {
int beg = i * blockSize;
tmpLoss = 0;
for(int j = 0; j < blockSize; j++)
tmpLoss += -(*(goldData + beg + j)) *
(DTYPE)log(*(outputData + beg + j)) *
(*(weightData + j));
*(lossData + i) = tmpLoss;
}
}
else {
DTYPE * paddingData = (DTYPE*)padding->data;
for(int i = 0; i < blockNum; i++) {
int beg = i * blockSize;
if(*(paddingData + i) == 0)
*(lossData + i) = 0;
else{
tmpLoss = 0;
for(int j = 0; j < blockSize; j++)
tmpLoss += -(*(goldData + beg + j)) *
(DTYPE)log(*(outputData + beg + j)) *
(*(weightData + j));
*(lossData + i) = tmpLoss;
}
}
}
}
}
/*
get the dimSize after reduce operation
>> tensor - a tensor to be reduced
>> n - the reduce dimension
<< return - the pointer of dimSize
*/
int * reduceDimSize(const XTensor * tensor, int n)
{
int order = tensor->order;
int * dimSize = new int[order - 1];
for (int i = 0; i < order; i++) {
if(i < n)
dimSize[i] = tensor->dimSize[i];
else if(i > n)
dimSize[i - 1] = tensor->dimSize[i];
}
return dimSize;
}
/*
compute the cross entropy loss
loss = sum_{i} (-gold_i * log(output_i))
where gold and output are distributions
>> output - model prediction
>> gold - gold standard
>> reduce - loss compute way, sum or mean
>> weight - a rescaling weight given to each class
>> padding - specify a target value that is ignored and does not contribute to the loss computation
>> leadingDim - the leading dimension for the output
*/
DTYPE _CrossEntropy(const XTensor * output, const XTensor * gold,
LOSS_COMPUTE_WAY reduceWay, const XTensor * weight,
const XTensor * padding, int leadingDim)
{
int n = leadingDim < 0 ? output->order - 1 : leadingDim;
CheckNTErrors(n >= 0 && n < output->order, "Wrong leadingDim!");
int unitNum = output->dimSize[n];
CheckNTErrors(XTensor::IsSameShaped(output, gold),
"The output tensor and gold tensor must be of the same size!");
CheckNTErrors(weight == NULL || weight->unitNum == unitNum, "Wrong weight tensor!");
CheckNTErrors(padding == NULL || padding->order == output->order - 1, "The loss tensor and padding tensor must be same shape!");
CheckNTErrors(gold->dataType == DEFAULT_DTYPE && output->dataType == DEFAULT_DTYPE, "TODO!");
XTensor * logBuf = NewTensorBuf(output, output->devID, output->mem);
XTensor * mulBuf = NewTensorBuf(output, output->devID, output->mem);
XTensor * negBuf = NewTensorBuf(output, output->devID, output->mem);
/* l = log(output) */
_Log(output, logBuf);
if(weight != NULL){
XTensor * weightBuf = NewTensorBuf(output, output->devID, output->mem);
/* multiply gold and weight by broadcast wg = mulDim(g * w) */
_MultiplyDim(gold, weight, weightBuf, n, 0);
/* multiply weighted gold and log(output) wgl = mul(wg, l) */
_Multiply(weightBuf, logBuf, mulBuf, 0);
DelTensorBuf(weightBuf);
}
else{
/* multiply gold and log(output) gl = mul(g, l) */
_Multiply(gold, logBuf, mulBuf, 0);
}
/* negate multiply result n = negate(mul) */
_NegateMe(mulBuf);
int * dimSize;
dimSize = reduceDimSize(output, n);
XTensor * lossInter = NewTensor(output->order - 1, dimSize, output->dataType, output->denseRatio, output->devID, output->mem);
/* reduce sum all classes */
_ReduceSum(mulBuf, lossInter, n);
DelTensorBuf(negBuf);
DelTensorBuf(mulBuf);
DelTensorBuf(logBuf);
DTYPE loss;
/* compute the total loss */
if(padding != NULL) {
XTensor * temp(lossInter);
_Multiply(lossInter, padding, temp);
loss = _ReduceSumAll(temp);
delete temp;
}
else
loss = _ReduceSumAll(lossInter);
if(reduceWay == REDUCE_MEAN) {
if(padding != NULL) {
XTensor * zeroIndicator = NewTensorBuf(padding, padding->devID, padding->mem);
_IsZero(padding, zeroIndicator);
int reduceSize = (int)_ReduceSumAll(zeroIndicator);
loss = loss / (DTYPE)(padding->unitNum - reduceSize);
DelTensorBuf(zeroIndicator);
}
else
loss = loss / (DTYPE)lossInter->unitNum;
}
else if(reduceWay == REDUCE_SUM) {
/* don't need to do anything */
}
else {
ShowNTErrors("TODO");
}
delete[] dimSize;
delete lossInter;
return loss;
}
/*
compute the cross entropy loss (implementation manually)
loss = sum_{i} (-gold_i * log(output_i))
where gold and output are distributions
>> output - model prediction
>> gold - gold standard
>> reduceWay - loss compute way, sum or mean
>> weight - a rescaling weight given to each class
>> padding - specify a target value that is ignored and does not contribute to the loss computation
>> leadingDim - the leading dimension for the output
<< return - the cross entropy loss that is a scalar
*/
DTYPE _CrossEntropyManual(const XTensor * output, const XTensor * gold,
LOSS_COMPUTE_WAY reduceWay, const XTensor * weight,
const XTensor * padding, int leadingDim)
{
#ifdef USE_CUDA
if(output->devID >= 0) {
return _CudaCrossEntropyManual(output, gold, reduceWay, weight, padding, leadingDim);
}
#endif
int order = output->order;
int n = leadingDim < 0 ? output->order - 1 : leadingDim;
int leadingDimSize = output->GetDim(n);
CheckNTErrors(n >= 0 && n < output->order,
"Wrong leadingDim!");
CheckNTErrors(XTensor::IsSameShaped(output, gold),
"The output tensor and gold tensor must be of the same size!");
CheckNTErrors(weight == NULL || weight->unitNum == leadingDimSize,
"Wrong weight tensor!");
CheckNTErrors(padding == NULL || padding->order == output->order - 1,
"Wrong padding tensor!");
CheckNTErrors(gold->dataType == DEFAULT_DTYPE && output->dataType == DEFAULT_DTYPE,
"TODO!");
int blockNum = 1;
int blockSize = 1;
int stride = 1;
for(int i = n + 1; i < order; i++)
stride *= output->GetDim(i);
blockSize = stride * leadingDimSize;
blockNum = output->unitNum / blockSize;
DTYPE * outputData = (DTYPE*)output->data;
DTYPE * goldData = (DTYPE*)gold->data;
DTYPE loss = 0;
int nonZeroNum = 0;
if(weight == NULL) {
if(padding == NULL) {
nonZeroNum = blockNum;
for(int i = 0; i < blockNum; i++) {
int beg = i * blockSize;
for(int j = 0; j < blockSize; j++)
loss += -(*(goldData + beg + j)) *
(DTYPE)log(*(outputData + beg + j));
}
}
else {
DTYPE * paddingData = (DTYPE*)padding->data;
for(int i = 0; i < blockNum; i++) {
if(*(paddingData + i) == 0)
continue;
else{
nonZeroNum += 1;
int beg = i * blockSize;
for(int j = 0; j < blockSize; j++)
loss += -(*(goldData + beg + j)) *
(DTYPE)log(*(outputData + beg + j));
}
}
}
}
else {
DTYPE * weightData = (DTYPE*)weight->data;
if(padding == NULL) {
nonZeroNum = blockNum;
for(int i = 0; i < blockNum; i++) {
int beg = i * blockSize;
for(int j = 0; j < blockSize; j++)
loss += -(*(goldData + beg + j)) *
(DTYPE)log(*(outputData + beg + j)) *
(*(weightData + j));
}
}
else {
DTYPE * paddingData = (DTYPE*)padding->data;
for(int i = 0; i < blockNum; i++) {
if(*(paddingData + i) == 0)
continue;
else{
nonZeroNum += 1;
int beg = i * blockSize;
for(int j = 0; j < blockSize; j++)
loss += -(*(goldData + beg + j)) *
(DTYPE)log(*(outputData + beg + j)) *
(*(weightData + j));
}
}
}
}
if(reduceWay == REDUCE_MEAN) {
loss = loss / (DTYPE)nonZeroNum;
}
else if(reduceWay == REDUCE_SUM) {
/* don't need to do anything */
}
else {
ShowNTErrors("TODO");
}
return loss;
}
/*
backward compuation for cross entropy function (tensor version)
loss = sum_{i} (-t_i * log(y_i))
dE/dy_i = -t_i / y_i
where E is the error(loss) function that measure the errors in y
with respect to gold standard, and y this the model output
>> dedy - dE/dy (for return)
>> output - model prediction
>> gold - gold standard
>> weight - a rescaling weight given to each class
>> padding - specify a target value that is ignored and does not contribute to the loss computation
>> leadingDim - the leading dimension for the output
*/
void _CrossEntropyBackward(XTensor * dedy, const XTensor * output, const XTensor * gold,
const XTensor * weight, XTensor * padding,
int leadingDim)
{
#ifdef USE_CUDA
if(output->devID >= 0) {
_CudaCrossEntropyBackward(dedy, output, gold, weight, padding, leadingDim);
return;
}
#endif
int order = output->order;
int n = leadingDim < 0 ? output->order - 1 : leadingDim;
int leadingDimSize = output->GetDim(n);
int unitSize = dedy->unitSize;
CheckNTErrors(n >= 0 && n < output->order,
"Wrong leading dimension!");
CheckNTErrors(XTensor::IsSameShaped(dedy, output, gold),
"The output tensor and gold tensor must be of the same size!");
CheckNTErrors(weight == NULL || weight->unitNum == leadingDimSize,
"Wrong weight tensor!");
CheckNTErrors(padding == NULL || padding->order == output->order - 1,
"Wrong padding tensor!");
CheckNTErrors(gold->dataType == DEFAULT_DTYPE && output->dataType == DEFAULT_DTYPE,
"TODO!");
int blockNum = 1;
int blockSize = 1;
int stride = 1;
for(int i = n + 1; i < order; i++)
stride *= output->GetDim(i);
blockSize = stride * leadingDimSize;
blockNum = output->unitNum / blockSize;
DTYPE * dedyData = (DTYPE*)dedy->data;
DTYPE * outputData = (DTYPE*)output->data;
DTYPE * goldData = (DTYPE*)gold->data;
if(weight == NULL) {
if(padding == NULL) {
for(int i = 0; i < blockNum; i++) {
int beg = i * blockSize;
for(int j = 0; j < blockSize; j++)
*(dedyData + beg + j) = -(*(goldData + beg + j)) /
(*(outputData + beg + j));
}
}
else {
DTYPE * paddingData = (DTYPE*)padding->data;
for(int i = 0; i < blockNum; i++) {
int beg = i * blockSize;
if(*(paddingData + i) == 0)
memset(dedyData + beg, 0, blockSize * unitSize);
else
for(int j = 0; j < blockSize; j++)
*(dedyData + beg + j) = -(*(goldData + beg + j)) /
(*(outputData + beg + j));
}
}
}
else {
DTYPE * weightData = (DTYPE*)weight->data;
if(padding == NULL) {
for(int i = 0; i < blockNum; i++) {
int beg = i * blockSize;
for(int j = 0; j < blockSize; j++)
*(dedyData + beg + j) = -(*(weightData + j)) *
(*(goldData + beg + j)) /
(*(outputData + beg + j));
}
}
else {
DTYPE * paddingData = (DTYPE*)padding->data;
for(int i = 0; i < blockNum; i++) {
int beg = i * blockSize;
if(*(paddingData + i) == 0)
memset(dedyData + beg, 0, blockSize * unitSize);
else
for(int j = 0; j < blockSize; j++) {
*(dedyData + beg + j) = -(*(weightData + j)) *
(*(goldData + beg + j)) /
(*(outputData + beg + j));
}
}
}
}
if(padding != NULL) {
XTensor * tmp(padding);
_IsZero(padding, tmp);
int nonZeroNum = _ReduceSumAll(tmp);
_ScaleAndShiftMe(dedy, (DTYPE)1.0/(DTYPE)nonZeroNum);
delete tmp;
}
else {
_ScaleAndShiftMe(dedy, (DTYPE)1.0/(DTYPE)blockNum);
}
}
} // namespace nts(NiuTrans.Tensor)
\ No newline at end of file
/* NiuTrans.Tensor - an open-source tensor library
* Copyright (C) 2017, Natural Language Processing Lab, Northestern University.
* All rights reserved.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
/*
* $Created by: Xu Chen (email: hello_master1954@163.com) 2018-09-17
*/
#ifndef __CROSSENTROPY_CUH__
#define __CROSSENTROPY_CUH__
#include "../XTensor.h"
#include "../XDevice.h"
#include "CrossEntropy.cuh"
#include "CrossEntropy.h"
#include "../core/reduce/ReduceSumAll.h"
#include "../core/math/Unary.h"
#include "../core/math/ScaleAndShift.h"
namespace nts{ // namespace nts(NiuTrans.Tensor)
/*
compute the cross entropy loss (cuda kernel)
>> outputData - the data pointer of output tensor
>> goldData - the data pointer of gold tensor
>> lossData - the data pointer of loss tensor
>> weightData - the data pointer of weight tensor
>> paddingData - the data pointer of padding tensor
>> blockNum - the number of data blocks
>> stride - the size of a data block
*/
__global__
void KernelCrossEntropy(DTYPE * outputData, DTYPE * goldData,
DTYPE * lossData, DTYPE * weightData,
DTYPE * paddingData, int blockNum, int blockSize)
{
/* block id */
int i = blockDim.x * blockIdx.x + threadIdx.x;
if(i >= blockNum)
return;
int beg = i * blockSize;
DTYPE tmpLoss = 0;
if(weightData == NULL) {
if(paddingData == NULL) {
tmpLoss = 0;
for(int j = 0; j < blockSize; j++)
tmpLoss += -(*(goldData + beg + j)) *
(DTYPE)log(*(outputData + beg + j));
*(lossData + i) = tmpLoss;
}
else {
if(*(paddingData + i) == 0)
*(lossData + i) = tmpLoss;
else{
for(int j = 0; j < blockSize; j++)
tmpLoss += -(*(goldData + beg + j)) *
(DTYPE)log(*(outputData + beg + j));
*(lossData + i) = tmpLoss;
}
}
}
else {
if(paddingData == NULL) {
for(int j = 0; j < blockSize; j++)
tmpLoss += -(*(goldData + beg + j)) *
(DTYPE)log(*(outputData + beg + j)) *
(*(weightData + j));
*(lossData + i) = tmpLoss;
}
else {
if(*(paddingData + i) == 0)
*(lossData + i) = tmpLoss;
else{
tmpLoss = 0;
for(int j = 0; j < blockSize; j++)
tmpLoss += -(*(goldData + beg + j)) *
(DTYPE)log(*(outputData + beg + j)) *
(*(weightData + j));
*(lossData + i) = tmpLoss;
}
}
}
}
/*
compute the cross entropy loss (cuda version)
loss = sum_{i} (-gold_i * log(output_i))
where gold and output are distributions
>> output - model prediction
>> gold - gold standard
>> loss - compute loss
>> weight - a rescaling weight given to each class
>> padding - specify a target value that is ignored and does not contribute to the loss computation
>> leadingDim - the leading dimension for the output
*/
void _CudaCrossEntropyManual(const XTensor * output, const XTensor * gold,
XTensor * loss, const XTensor * weight,
const XTensor * padding, int leadingDim)
{
int order = output->order;
int n = leadingDim < 0 ? output->order - 1 : leadingDim;
int leadingDimSize = output->GetDim(n);
CheckNTErrors(n >= 0 && n < output->order,
"Wrong leadingDim!");
CheckNTErrors(XTensor::IsSameShaped(output, gold),
"The output tensor and gold tensor must be of the same size!");
CheckNTErrors(weight == NULL || weight->unitNum == leadingDimSize,
"Wrong weight tensor!");
CheckNTErrors(padding == NULL || XTensor::IsSameShaped(padding, loss),
"The loss tensor and padding tensor must be same shape!");
CheckNTErrors(loss->order == output->order - 1,
"Wrong loss dimension!");
CheckNTErrors(gold->dataType == DEFAULT_DTYPE && output->dataType == DEFAULT_DTYPE,
"TODO!");
int blockNum = 1;
int blockSize = 1;
int stride = 1;
for(int i = n + 1; i < order; i++)
stride *= output->GetDim(i);
blockSize = stride * leadingDimSize;
blockNum = output->unitNum / blockSize;
int cudaGrids[3];
int cudaBlocks[3];
//GDevs.GetCudaThread2D(output->devID, blockNum, blockSize, MAX_INT, cudaGrids, cudaBlocks);
GDevs.GetCudaThread(output->devID, blockNum, cudaGrids, cudaBlocks);
dim3 blocks(cudaGrids[0], cudaGrids[1]);
dim3 threads(cudaBlocks[0], cudaBlocks[1]);
int devIDBackup;
ProtectCudaDev(output->devID, devIDBackup);
DTYPE * outputData = (DTYPE*)output->data;
DTYPE * goldData = (DTYPE*)gold->data;
DTYPE * lossData = (DTYPE*)loss->data;
if(weight == NULL) {
if(padding == NULL)
KernelCrossEntropy<<<dim3(cudaGrids[0]), dim3(cudaBlocks[0]) >>>
(outputData, goldData, lossData,
NULL, NULL,
blockNum, blockSize);
else
KernelCrossEntropy<<<dim3(cudaGrids[0]), dim3(cudaBlocks[0]) >>>
(outputData, goldData, lossData,
NULL, (DTYPE*)padding->data,
blockNum, blockSize);
}
else {
if(padding == NULL)
KernelCrossEntropy<<<dim3(cudaGrids[0]), dim3(cudaBlocks[0]) >>>
(outputData, goldData, lossData,
(DTYPE*)weight->data, NULL,
blockNum, blockSize);
else
KernelCrossEntropy<<<dim3(cudaGrids[0]), dim3(cudaBlocks[0]) >>>
(outputData, goldData, lossData,
(DTYPE*)weight->data, (DTYPE*)padding->data,
blockNum, blockSize);
}
BacktoCudaDev(output->devID, devIDBackup);
}
/*
compute the cross entropy loss (scalar version)
loss = sum_{i} (-gold_i * log(output_i))
where gold and output are distributions
>> output - model prediction
>> gold - gold standard
>> reduceWay - loss compute way, sum or mean
>> weight - a rescaling weight given to each class
>> padding - specify a target value that is ignored and does not contribute to the loss computation
>> leadingDim - the leading dimension for the output
<< return - the cross entropy loss that is a scalar
*/
DTYPE _CudaCrossEntropyManual(const XTensor * output, const XTensor * gold,
LOSS_COMPUTE_WAY reduceWay, const XTensor * weight,
const XTensor * padding, int leadingDim)
{
DTYPE loss = 0;
int order = output->order;
int n = leadingDim < 0 ? output->order - 1 : leadingDim;
int leadingDimSize = output->GetDim(n);
CheckNTErrors(n >= 0 && n < output->order,
"Wrong leadingDim!");
CheckNTErrors(XTensor::IsSameShaped(output, gold),
"The output tensor and gold tensor must be of the same size!");
CheckNTErrors(weight == NULL || weight->unitNum == leadingDimSize,
"Wrong weight tensor!");
CheckNTErrors(padding == NULL || padding->order == output->order - 1,
"Wrong padding tensor!");
CheckNTErrors(gold->dataType == DEFAULT_DTYPE && output->dataType == DEFAULT_DTYPE,
"TODO!");
int * dimSize = new int[output->order - 1];
for (int i = 0; i < order; i++) {
if(i < n)
dimSize[i] = output->dimSize[i];
else if(i > n)
dimSize[i - 1] = output->dimSize[i];
}
XTensor * lossInter = NewTensor(output->order - 1, dimSize, output->dataType, output->denseRatio, output->devID, output->mem);
_CudaCrossEntropyManual(output, gold, lossInter, weight, padding, leadingDim);
loss = _ReduceSumAll(lossInter);
if(reduceWay == REDUCE_MEAN) {
int totalNum;
if(padding == NULL) {
totalNum = lossInter->unitNum;
}
else {
XTensor * zeroIndicator = NewTensorBuf(output, output->devID, output->mem);
_IsZero(padding, zeroIndicator);
totalNum = lossInter->unitNum - (int)_ReduceSumAll(zeroIndicator);
DelTensorBuf(zeroIndicator);
}
loss = loss / (DTYPE)totalNum;
}
return loss;
}
/*
backward computation of cross entropy function (kernel version)
>> dedyData - the data pointer of dedy tensor
>> outputData - the data pointer of output tensor
>> goldData - the data pointer of gold tensor
>> weightData - the data pointer of weight tensor
>> paddingData - the data pointer of padding tensor
>> blockNum - the number of data blocks
>> blockSize - the size of a data block
*/
__global__
void KernelCrossEntropyBackward(DTYPE * dedyData, DTYPE * outputData, DTYPE * goldData,
DTYPE * weightData, DTYPE * paddingData,
int blockNum, int blockSize)
{
/* block id */
int i = blockDim.x * blockIdx.x + threadIdx.x;
if(i >= blockNum)
return;
int beg = i * blockSize;
if(weightData == NULL) {
if(paddingData == NULL) {
for(int j = 0; j < blockSize; j++)
*(dedyData + beg + j) = -(*(goldData + beg + j)) /
(*(outputData + beg + j));
}
else {
if(*(paddingData + i) == 0)
memset(dedyData + beg, 0, blockSize * sizeof(DTYPE));
else
for(int j = 0; j < blockSize; j++)
*(dedyData + beg + j) = -(*(goldData + beg + j)) /
(*(outputData + beg + j));
}
}
else {
if(paddingData == NULL) {
for(int j = 0; j < blockSize; j++)
*(dedyData + beg + j) = -(*(weightData + j)) *
(*(goldData + beg + j)) /
(*(outputData + beg + j));
}
else {
if(*(paddingData + i) == 0)
memset(dedyData + beg, 0, blockSize * sizeof(DTYPE));
else
for(int j = 0; j < blockSize; j++) {
*(dedyData + beg + j) = -(*(weightData + j)) *
(*(goldData + beg + j)) /
(*(outputData + beg + j));
}
}
}
}
/*
backward computation of cross entropy function
loss = sum_{i} (-t_i * log(y_i))
dE/dy_i = -t_i / y_i
where E is the error(loss) function that measure the errors in y
with respect to gold standard, and y this the model output
>> dedy - dE/dy (for return)
>> output - model prediction
>> gold - gold standard
>> weight - a rescaling weight given to each class
>> padding - specify a target value that is ignored and does not contribute to the loss computation
>> leadingDim - the leading dimension for the output
*/
void _CudaCrossEntropyBackward(XTensor * dedy, const XTensor * output, const XTensor * gold,
const XTensor * weight, XTensor * padding,
int leadingDim)
{
int order = output->order;
int n = leadingDim < 0 ? output->order - 1 : leadingDim;
int leadingDimSize = output->GetDim(n);
CheckNTErrors(n >= 0 && n < output->order,
"Wrong leading dimension!");
CheckNTErrors(XTensor::IsSameShaped(dedy, output, gold),
"The output tensor and gold tensor must be of the same size!");
CheckNTErrors(weight == NULL || weight->unitNum == leadingDimSize,
"Wrong weight tensor!");
CheckNTErrors(padding == NULL || padding->order == output->order - 1,
"Wrong padding tensor!");
CheckNTErrors(gold->dataType == DEFAULT_DTYPE && output->dataType == DEFAULT_DTYPE,
"TODO!");
int blockNum = 1;
int blockSize = 1;
int stride = 1;
for(int i = n + 1; i < order; i++)
stride *= output->GetDim(i);
blockSize = stride * leadingDimSize;
blockNum = output->unitNum / blockSize;
int cudaGrids[3];
int cudaBlocks[3];
GDevs.GetCudaThread(output->devID, blockNum, cudaGrids, cudaBlocks);
dim3 blocks(cudaGrids[0], cudaGrids[1]);
dim3 threads(cudaBlocks[0], cudaBlocks[1]);
int devIDBackup;
ProtectCudaDev(output->devID, devIDBackup);
DTYPE * dedyData = (DTYPE*)dedy->data;
DTYPE * outputData = (DTYPE*)output->data;
DTYPE * goldData = (DTYPE*)gold->data;
if(weight == NULL) {
if(padding == NULL)
KernelCrossEntropyBackward<<<dim3(cudaGrids[0]), dim3(cudaBlocks[0]) >>>
(dedyData, outputData, goldData,
NULL, NULL,
blockNum, blockSize);
else
KernelCrossEntropyBackward<<<dim3(cudaGrids[0]), dim3(cudaBlocks[0]) >>>
(dedyData, outputData, goldData,
NULL, (DTYPE*)padding->data,
blockNum, blockSize);
}
else {
if(padding == NULL)
KernelCrossEntropyBackward<<<dim3(cudaGrids[0]), dim3(cudaBlocks[0]) >>>
(dedyData, outputData, goldData,
(DTYPE*)weight->data, NULL,
blockNum, blockSize);
else
KernelCrossEntropyBackward<<<dim3(cudaGrids[0]), dim3(cudaBlocks[0]) >>>
(dedyData, outputData, goldData,
(DTYPE*)weight->data, (DTYPE*)padding->data,
blockNum, blockSize);
}
if(padding != NULL) {
XTensor * tmp(padding);
_IsZero(padding, tmp);
int nonZeroNum = _ReduceSumAll(tmp);
_ScaleAndShiftMe(dedy, (DTYPE)1.0/(DTYPE)nonZeroNum);
delete tmp;
}
else {
_ScaleAndShiftMe(dedy, (DTYPE)1.0/(DTYPE)blockNum);
}
BacktoCudaDev(output->devID, devIDBackup);
}
} // namespace nts(NiuTrans.Tensor)
#endif // __CROSSENTROPY_CUH__
\ No newline at end of file
/* NiuTrans.Tensor - an open-source tensor library
* Copyright (C) 2017, Natural Language Processing Lab, Northestern University.
* All rights reserved.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
/*
* $Created by: Xu Chen (email: hello_master1954@163.com) 2018-09-17
*/
#ifndef __CROSSENTROPY_CUH__
#define __CROSSENTROPY_CUH__
#include "../XTensor.h"
#include "CrossEntropy.h"
namespace nts{ // namespace nts(NiuTrans.Tensor)
/* compute the cross entropy loss (tensor version) */
void _CudaCrossEntropyManual(const XTensor * output, const XTensor * gold,
XTensor * loss, const XTensor * weight = NULL,
const XTensor * padding = NULL, int leadingDim = -1);
/* compute the cross entropy loss (scalar version) */
DTYPE _CudaCrossEntropyManual(const XTensor * output, const XTensor * gold,
LOSS_COMPUTE_WAY reduceWay, const XTensor * weight = NULL,
const XTensor * padding = NULL, int leadingDim = -1);
/* backward computation of cross entropy function */
void _CudaCrossEntropyBackward(XTensor * dedy, const XTensor * output, const XTensor * gold,
const XTensor * weight = NULL, XTensor * padding = NULL,
int leadingDim = -1);
} // namespace nts(NiuTrans.Tensor)
#endif // __CROSSENTROPY_CUH__
\ No newline at end of file
/* NiuTrans.Tensor - an open-source tensor library
* Copyright (C) 2017, Natural Language Processing Lab, Northestern University.
* All rights reserved.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
/*
* $Created by: Xu Chen (email: hello_master1954@163.com) 2018-09-17
*/
#ifndef __CROSSENTROPY_H__
#define __CROSSENTROPY_H__
#include "../XTensor.h"
namespace nts{ // namespace nts(NiuTrans.Tensor)
enum LOSS_COMPUTE_WAY{
REDUCE_SUM,
REDUCE_MEAN
};
/* compute the cross entropy loss (tensor version) */
void _CrossEntropy(const XTensor * output, const XTensor * gold,
XTensor * loss, const XTensor * weight = NULL,
const XTensor * padding = NULL, int leadingDim = -1);
/* compute the cross entropy loss (tensor version) */
void _CrossEntropyManual(const XTensor * output, const XTensor * gold,
XTensor * loss, const XTensor * weight = NULL,
const XTensor * padding = NULL, int leadingDim = -1);
/* compute the cross entropy loss (scalar version) */
DTYPE _CrossEntropy(const XTensor * output, const XTensor * gold,
LOSS_COMPUTE_WAY reduceWay, const XTensor * weight = NULL,
const XTensor * padding = NULL, int leadingDim = -1);
/* compute the cross entropy loss (scalar version) */
DTYPE _CrossEntropyManual(const XTensor * output, const XTensor * gold,
LOSS_COMPUTE_WAY reduceWay = REDUCE_MEAN, const XTensor * weight = NULL,
const XTensor * padding = NULL, int leadingDim = -1);
/* backward computation of cross entropy function */
void _CrossEntropyBackward(XTensor * dedy, const XTensor * output, const XTensor * gold,
const XTensor * weight = NULL, XTensor * padding = NULL,
int leadingDim = -1);
} // namespace nts(NiuTrans.Tensor)
#endif // __CROSSENTROPY_H__
\ No newline at end of file
...@@ -20,7 +20,6 @@ ...@@ -20,7 +20,6 @@
*/ */
#include "../XName.h" #include "../XName.h"
#include <math.h>
#include <time.h> #include <time.h>
#include "Dropout.h" #include "Dropout.h"
#include "Dropout.cuh" #include "Dropout.cuh"
......
...@@ -23,7 +23,6 @@ ...@@ -23,7 +23,6 @@
#define __DROPOUT_H__ #define __DROPOUT_H__
#include "../XTensor.h" #include "../XTensor.h"
#include "Loss.h"
namespace nts{ // namespace nts(NiuTrans.Tensor) namespace nts{ // namespace nts(NiuTrans.Tensor)
......
...@@ -26,6 +26,7 @@ ...@@ -26,6 +26,7 @@
#include "../XTensor.h" #include "../XTensor.h"
#include "CrossEntropy.h"
#include "Dropout.h" #include "Dropout.h"
#include "HardTanH.h" #include "HardTanH.h"
#include "Identity.h" #include "Identity.h"
......
...@@ -23,6 +23,7 @@ ...@@ -23,6 +23,7 @@
#include "../XName.h" #include "../XName.h"
#include "HardTanH.h" #include "HardTanH.h"
#include "HardTanH.cuh" #include "HardTanH.cuh"
#include "CrossEntropy.h"
namespace nts{ // namespace nts(NiuTrans.Tensor) namespace nts{ // namespace nts(NiuTrans.Tensor)
...@@ -72,7 +73,7 @@ y = 1 if x > 1 ...@@ -72,7 +73,7 @@ y = 1 if x > 1
XTensor HardTanH(const XTensor &x) XTensor HardTanH(const XTensor &x)
{ {
XTensor y(&x); XTensor y(&x);
y.SetTMP(); y.SetTMPFlag();
/* call _HardTanH function */ /* call _HardTanH function */
_HardTanH(&x, &y); _HardTanH(&x, &y);
...@@ -118,7 +119,9 @@ void _HardTanHBackward(XTensor * gold, XTensor * y, XTensor * x, ...@@ -118,7 +119,9 @@ void _HardTanHBackward(XTensor * gold, XTensor * y, XTensor * x,
if(x->dataType == DEFAULT_DTYPE && y->dataType == DEFAULT_DTYPE){ if(x->dataType == DEFAULT_DTYPE && y->dataType == DEFAULT_DTYPE){
/* calculate dE/dy */ /* calculate dE/dy */
if(lossName != NOLOSS) if(lossName == CROSSENTROPY)
_CrossEntropyBackward(dedy, y, gold);
else if(lossName != NOLOSS)
_LossBackward(dedy, gold, y, lossName); _LossBackward(dedy, gold, y, lossName);
DTYPE * dedyp = (DTYPE*)dedy->data; DTYPE * dedyp = (DTYPE*)dedy->data;
......
...@@ -22,6 +22,7 @@ ...@@ -22,6 +22,7 @@
#include "HardTanH.h" #include "HardTanH.h"
#include "HardTanH.cuh" #include "HardTanH.cuh"
#include "Loss.cuh" #include "Loss.cuh"
#include "CrossEntropy.cuh"
#include "../XDevice.h" #include "../XDevice.h"
namespace nts{ // namespace nts(NiuTrans.Tensor) namespace nts{ // namespace nts(NiuTrans.Tensor)
...@@ -136,8 +137,10 @@ void _CudaHardTanHBackward(XTensor * gold, XTensor * y, XTensor * x, ...@@ -136,8 +137,10 @@ void _CudaHardTanHBackward(XTensor * gold, XTensor * y, XTensor * x,
if(x->dataType == DEFAULT_DTYPE && y->dataType == DEFAULT_DTYPE){ if(x->dataType == DEFAULT_DTYPE && y->dataType == DEFAULT_DTYPE){
/* calculate dE/dy */ /* calculate dE/dy */
if(lossName != NOLOSS) if(lossName == CROSSENTROPY)
_LossBackward(dedy, gold, y, lossName); _CudaCrossEntropyBackward(dedy, y, gold);
else if(lossName != NOLOSS)
_CudaLossBackward(dedy, gold, y, lossName);
int gridSize[3], blockSize[3]; int gridSize[3], blockSize[3];
......
...@@ -21,6 +21,7 @@ ...@@ -21,6 +21,7 @@
#include "../XName.h" #include "../XName.h"
#include "Identity.h" #include "Identity.h"
#include "CrossEntropy.h"
#include "../XUtility.h" #include "../XUtility.h"
#include "../core/movement/CopyValues.h" #include "../core/movement/CopyValues.h"
...@@ -46,7 +47,7 @@ make a new tensor to keep the result and return it ...@@ -46,7 +47,7 @@ make a new tensor to keep the result and return it
XTensor Identity(const XTensor &x) XTensor Identity(const XTensor &x)
{ {
XTensor y(&x); XTensor y(&x);
y.SetTMP(); y.SetTMPFlag();
/* call _Identity function */ /* call _Identity function */
_Identity(&x, &y); _Identity(&x, &y);
...@@ -78,7 +79,9 @@ void _IdentityBackward(XTensor * gold, XTensor * y, XTensor * x, ...@@ -78,7 +79,9 @@ void _IdentityBackward(XTensor * gold, XTensor * y, XTensor * x,
if(x->dataType == DEFAULT_DTYPE && y->dataType == DEFAULT_DTYPE) if(x->dataType == DEFAULT_DTYPE && y->dataType == DEFAULT_DTYPE)
{ {
/* calculate dE/dy */ /* calculate dE/dy */
if(lossName != NOLOSS) if(lossName == CROSSENTROPY)
_CrossEntropyBackward(dedy, y, gold);
else if(lossName != NOLOSS)
_LossBackward(dedy, gold, y, lossName); _LossBackward(dedy, gold, y, lossName);
if(dedy->data != dedx->data) if(dedy->data != dedx->data)
......
...@@ -122,10 +122,11 @@ void _LogSoftmax(const XTensor * x, XTensor * y, int leadDim) ...@@ -122,10 +122,11 @@ void _LogSoftmax(const XTensor * x, XTensor * y, int leadDim)
for (int i = 0; i < n; i++) { for (int i = 0; i < n; i++) {
DTYPE r = (DTYPE)log(exp(ip[i * m + j] - mp[j]) / sp[j]); DTYPE r = (DTYPE)log(exp(ip[i * m + j] - mp[j]) / sp[j]);
if (IsNAN(r)) if (IsNAN(r))
r = DTYPE_MIN; r = LOGPROB_MIN;
if (IsINF(r)) if (IsINF(r))
r = DTYPE_MIN; r = LOGPROB_MIN;
op[i * m + j] = r;
op[i * m + j] = MAX(r, LOGPROB_MIN);
} }
} }
} }
...@@ -181,7 +182,7 @@ XTensor LogSoftmax(const XTensor &x, int leadDim) ...@@ -181,7 +182,7 @@ XTensor LogSoftmax(const XTensor &x, int leadDim)
ld = x.order - 1; ld = x.order - 1;
XTensor y(&x); XTensor y(&x);
y.SetTMP(); y.SetTMPFlag();
/* call _LogSoftmax function */ /* call _LogSoftmax function */
_LogSoftmax(&x, &y, ld); _LogSoftmax(&x, &y, ld);
......
...@@ -78,11 +78,13 @@ void KernelLogSoftmaxComputeByRow(DTYPE * x, DTYPE * max, DTYPE * sum, DTYPE * y ...@@ -78,11 +78,13 @@ void KernelLogSoftmaxComputeByRow(DTYPE * x, DTYPE * max, DTYPE * sum, DTYPE * y
if (i < rowNum && j < colNum) { if (i < rowNum && j < colNum) {
int key = i * colNum + j; int key = i * colNum + j;
DTYPE r = log(exp(x[key] - inputMax[threadIdx.x]) / inputSum[threadIdx.x]); DTYPE r = log(exp(x[key] - inputMax[threadIdx.x]) / inputSum[threadIdx.x]);
if (isnan(r)) if (isnan(r))
r = DTYPE_MIN; r = LOGPROB_MIN;
if (isinf(r)) if (isinf(r))
r = DTYPE_MIN; r = LOGPROB_MIN;
y[key] = r;
y[key] = MAX(r, LOGPROB_MIN);
} }
} }
...@@ -123,11 +125,18 @@ void KernelLogSoftmaxComputeByCol(DTYPE * x, DTYPE * max, DTYPE * sum, DTYPE * y ...@@ -123,11 +125,18 @@ void KernelLogSoftmaxComputeByCol(DTYPE * x, DTYPE * max, DTYPE * sum, DTYPE * y
if (i < rowNum && j < colNum) { if (i < rowNum && j < colNum) {
int key = i * colNum + j; int key = i * colNum + j;
DTYPE r = log(exp(x[key] - inputMax[threadIdx.y]) / inputSum[threadIdx.y]); DTYPE r = log(exp(x[key] - inputMax[threadIdx.y]) / inputSum[threadIdx.y]);
/*if (r < LOGPROB_MIN)
{
printf("min %e %e, %e %e, %e %e\n", r, x[key] - inputMax[threadIdx.y], x[key], inputMax[threadIdx.y], exp(x[key] - inputMax[threadIdx.y]), inputSum[threadIdx.y]);
}*/
if (isnan(r)) if (isnan(r))
r = DTYPE_MIN; r = LOGPROB_MIN;
if (isinf(r)) if (isinf(r))
r = DTYPE_MIN; r = LOGPROB_MIN;
y[key] = r;
y[key] = MAX(r, LOGPROB_MIN);
} }
} }
...@@ -228,21 +237,29 @@ void KernelLogSoftmaxBackwardDEDS(DTYPE * dedy, DTYPE * dedx, DTYPE * gold, DTYP ...@@ -228,21 +237,29 @@ void KernelLogSoftmaxBackwardDEDS(DTYPE * dedy, DTYPE * dedx, DTYPE * gold, DTYP
int i = blockDim.x * blockIdx.x + threadIdx.x; int i = blockDim.x * blockIdx.x + threadIdx.x;
if (i < size) { if (i < size) {
DTYPE r = 0;
/* dE/ds_j = exp(y_j) */ /* dE/ds_j = exp(y_j) */
if (lossName == CROSSENTROPY) if (lossName == CROSSENTROPY)
dedx[i] = -gold[i] + exp(y[i]); r = -gold[i] + exp(y[i]);
/* dE/ds_j = exp(y_j) */ /* dE/ds_j = exp(y_j) */
else if (lossName == SQUAREDERROR) else if (lossName == SQUAREDERROR)
dedx[i] = -gold[i] + exp(y[i]); r = -gold[i] + exp(y[i]);
else if (lossName == ONEHOTERROR) { else if (lossName == ONEHOTERROR) {
if (gold[i] == 1.0F) if (gold[i] == 1.0F)
dedx[i] = -gold[i] + exp(y[i]); r = -gold[i] + exp(y[i]);
else else
dedx[i] = 0; r = 0;
} }
else { else {
dedx[i] = dedy[i]; r = dedy[i];
} }
if (isnan(r))
r = 0;
if (isinf(r))
r = 0;
dedx[i] = r;
} }
} }
......
...@@ -16,8 +16,8 @@ ...@@ -16,8 +16,8 @@
*/ */
/* /*
* $Created by: XIAO Tong (email: xiaotong@mail.neu.edu.cn) 2018-04-24 * $Created by: XIAO Tong (email: xiaotong@mail.neu.edu.cn) 2018-04-24
*/ */
#include <math.h> #include <math.h>
#include "Loss.h" #include "Loss.h"
......
...@@ -22,6 +22,7 @@ ...@@ -22,6 +22,7 @@
#include "../XName.h" #include "../XName.h"
#include "Rectify.h" #include "Rectify.h"
#include "Rectify.cuh" #include "Rectify.cuh"
#include "CrossEntropy.h"
namespace nts{ // namespace nts(NiuTrans.Tensor) namespace nts{ // namespace nts(NiuTrans.Tensor)
...@@ -65,7 +66,7 @@ make a new tensor to keep the result and return it ...@@ -65,7 +66,7 @@ make a new tensor to keep the result and return it
XTensor Rectify(const XTensor &x) XTensor Rectify(const XTensor &x)
{ {
XTensor y(&x); XTensor y(&x);
y.SetTMP(); y.SetTMPFlag();
/* call _Rectify function */ /* call _Rectify function */
_Rectify(&x, &y); _Rectify(&x, &y);
...@@ -116,7 +117,9 @@ void _RectifyBackward(XTensor * gold, XTensor * y, XTensor * x, ...@@ -116,7 +117,9 @@ void _RectifyBackward(XTensor * gold, XTensor * y, XTensor * x,
if(x->dataType == DEFAULT_DTYPE && y->dataType == DEFAULT_DTYPE) if(x->dataType == DEFAULT_DTYPE && y->dataType == DEFAULT_DTYPE)
{ {
/* calculate dE/dy */ /* calculate dE/dy */
if(lossName != NOLOSS) if(lossName == CROSSENTROPY)
_CrossEntropyBackward(dedy, y, gold);
else if(lossName != NOLOSS)
_LossBackward(dedy, gold, y, lossName); _LossBackward(dedy, gold, y, lossName);
DTYPE * dedyp = (DTYPE*)dedy->data; DTYPE * dedyp = (DTYPE*)dedy->data;
......
...@@ -22,6 +22,7 @@ ...@@ -22,6 +22,7 @@
#include "Rectify.h" #include "Rectify.h"
#include "Rectify.cuh" #include "Rectify.cuh"
#include "Loss.cuh" #include "Loss.cuh"
#include "CrossEntropy.cuh"
#include "../XDevice.h" #include "../XDevice.h"
namespace nts{ // namespace nts(NiuTrans.Tensor) namespace nts{ // namespace nts(NiuTrans.Tensor)
...@@ -133,7 +134,9 @@ void _CudaRectifyBackward(XTensor * gold, XTensor * y, XTensor * x, ...@@ -133,7 +134,9 @@ void _CudaRectifyBackward(XTensor * gold, XTensor * y, XTensor * x,
if(x->dataType == DEFAULT_DTYPE && y->dataType == DEFAULT_DTYPE){ if(x->dataType == DEFAULT_DTYPE && y->dataType == DEFAULT_DTYPE){
/* calculate dE/dy */ /* calculate dE/dy */
if(lossName != NOLOSS) if(lossName == CROSSENTROPY)
_CudaCrossEntropyBackward(dedy, y, gold);
else if(lossName != NOLOSS)
_CudaLossBackward(dedy, gold, y, lossName); _CudaLossBackward(dedy, gold, y, lossName);
int gridSize[3], blockSize[3]; int gridSize[3], blockSize[3];
......
...@@ -23,6 +23,7 @@ ...@@ -23,6 +23,7 @@
#include <math.h> #include <math.h>
#include "Sigmoid.h" #include "Sigmoid.h"
#include "Sigmoid.cuh" #include "Sigmoid.cuh"
#include "CrossEntropy.h"
namespace nts{ // namespace nts(NiuTrans.Tensor) namespace nts{ // namespace nts(NiuTrans.Tensor)
...@@ -63,7 +64,7 @@ make a new tensor to keep the result and return it ...@@ -63,7 +64,7 @@ make a new tensor to keep the result and return it
XTensor Sigmoid(const XTensor &x) XTensor Sigmoid(const XTensor &x)
{ {
XTensor y(&x); XTensor y(&x);
y.SetTMP(); y.SetTMPFlag();
/* call _Sigmoid function */ /* call _Sigmoid function */
_Sigmoid(&x, &y); _Sigmoid(&x, &y);
...@@ -107,7 +108,9 @@ void _SigmoidBackward(XTensor * gold, XTensor * y, XTensor * x, ...@@ -107,7 +108,9 @@ void _SigmoidBackward(XTensor * gold, XTensor * y, XTensor * x,
if(x->dataType == DEFAULT_DTYPE && y->dataType == DEFAULT_DTYPE) if(x->dataType == DEFAULT_DTYPE && y->dataType == DEFAULT_DTYPE)
{ {
/* calculate dE/dy */ /* calculate dE/dy */
if(lossName != NOLOSS) if(lossName == CROSSENTROPY)
_CrossEntropyBackward(dedy, y, gold);
else if(lossName != NOLOSS)
_LossBackward(dedy, gold, y, lossName); _LossBackward(dedy, gold, y, lossName);
DTYPE * dedyp = (DTYPE*)dedy->data; DTYPE * dedyp = (DTYPE*)dedy->data;
......
...@@ -22,6 +22,7 @@ ...@@ -22,6 +22,7 @@
#include "Sigmoid.h" #include "Sigmoid.h"
#include "Sigmoid.cuh" #include "Sigmoid.cuh"
#include "Loss.cuh" #include "Loss.cuh"
#include "CrossEntropy.cuh"
#include "../XDevice.h" #include "../XDevice.h"
#ifdef USE_CUDA #ifdef USE_CUDA
...@@ -128,7 +129,9 @@ void _CudaSigmoidBackward(XTensor * gold, XTensor * y, XTensor * x, ...@@ -128,7 +129,9 @@ void _CudaSigmoidBackward(XTensor * gold, XTensor * y, XTensor * x,
{ {
if(x->dataType == DEFAULT_DTYPE && y->dataType == DEFAULT_DTYPE){ if(x->dataType == DEFAULT_DTYPE && y->dataType == DEFAULT_DTYPE){
/* calculate dE/dy */ /* calculate dE/dy */
if(lossName != NOLOSS) if(lossName == CROSSENTROPY)
_CudaCrossEntropyBackward(dedy, y, gold);
else if(lossName != NOLOSS)
_LossBackward(dedy, gold, y, lossName); _LossBackward(dedy, gold, y, lossName);
......
...@@ -136,7 +136,7 @@ XTensor Softmax(const XTensor &x, int leadDim) ...@@ -136,7 +136,7 @@ XTensor Softmax(const XTensor &x, int leadDim)
ld = x.order - 1; ld = x.order - 1;
XTensor y(&x); XTensor y(&x);
y.SetTMP(); y.SetTMPFlag();
/* call _Softmax function */ /* call _Softmax function */
_Softmax(&x, &y, ld); _Softmax(&x, &y, ld);
......
/* NiuTrans.Tensor - an open-source tensor library /* NiuTrans.Tensor - an open-source tensor library
* Copyright (C) 2017, Natural Language Processing Lab, Northestern University. * Copyright (C) 2017, Natural Language Processing Lab, Northestern University.
* All rights reserved. * All rights reserved.
* *
* Licensed under the Apache License, Version 2.0 (the "License"); * Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License. * you may not use this file except in compliance with the License.
* You may obtain a copy of the License at * You may obtain a copy of the License at
* *
* http://www.apache.org/licenses/LICENSE-2.0 * http://www.apache.org/licenses/LICENSE-2.0
* *
* Unless required by applicable law or agreed to in writing, software * Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS, * distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and * See the License for the specific language governing permissions and
* limitations under the License. * limitations under the License.
*/ */
/* /*
* $Created by: Xu Chen (email: hello_master1954@163.com) 2018-06-27 * $Created by: Xu Chen (email: hello_master1954@163.com) 2018-06-27
*/ */
#include "TCopyIndexed.h" #include "TCopyIndexed.h"
...@@ -344,6 +344,235 @@ bool TestCopyIndexed3() ...@@ -344,6 +344,235 @@ bool TestCopyIndexed3()
#endif // USE_CUDA #endif // USE_CUDA
} }
/*
case 4: copy indexed sub-tensors
In this case, (3, 2, 3) -> (3, 2, 2), dim = 2, indexSize = 2,
srcIndex = [0, 2], tgtIndex = [0, 1], copyNum = 1.
*/
bool TestCopyIndexed4()
{
/* a input tensor of size (3, 2, 3) */
int sOrder = 3;
int * sDimSize = new int[sOrder];
sDimSize[0] = 3;
sDimSize[1] = 2;
sDimSize[2] = 3;
int sUnitNum = 1;
for (int i = 0; i < sOrder; i++)
sUnitNum *= sDimSize[i];
/* a output tensor of size (3, 2, 2) */
int tOrder = 3;
int * tDimSize = new int[tOrder];
tDimSize[0] = 3;
tDimSize[1] = 2;
tDimSize[2] = 2;
int tUnitNum = 1;
for (int i = 0; i < tOrder; i++)
tUnitNum *= tDimSize[i];
/* a index tensor of size(2) */
int iOrder = 3;
int * iDimSize = new int[iOrder];
iDimSize[0] = 3;
iDimSize[1] = 2;
iDimSize[2] = 2;
int iUnitNum = 1;
for (int i = 0; i < iOrder; i++)
iUnitNum *= iDimSize[i];
DTYPE sData[3][2][3] = { { {0.0F, -1.0F, 2.0F},
{2.0F, 1.0F, 3.0F} },
{ {1.0F, 2.0F, 4.0F},
{3.0F, 1.0F, 2.0F}},
{ {-1.0F, 3.0F, 2.0F},
{1.0F, -1.0F, 0.0F} } };
DTYPE answer[3][2][2] = { { {0.0F, 2.0F},
{2.0F, 3.0F} },
{ {1.0F, 4.0F},
{3.0F, 2.0F}},
{ {-1.0F, 2.0F},
{1.0F, 0.0F} } };
int dim = 2;
int indexSize = 2;
int srcIndex[2] = {0, 2};
int tgtIndex[2] = {0, 1};
int copyNum = 1;
/* CPU test */
bool cpuTest = true;
/* create tensors */
XTensor * s = NewTensor(sOrder, sDimSize);
XTensor * t = NewTensor(tOrder, tDimSize);
XTensor * index = NewTensor(tOrder, tDimSize, X_INT);
XTensor tUser;
/* initialize variables */
s->SetData(sData, sUnitNum);
t->SetZeroAll();
index->SetData(srcIndex, iUnitNum);
/* call CopyIndexed function */
_CopyIndexed(s, t, dim, (int*)index->data, indexSize, tgtIndex, copyNum);
tUser = CopyIndexed(*s, dim, (int*)index->data, indexSize, tgtIndex, copyNum);
/* check results */
cpuTest = t->CheckData(answer, tUnitNum) && tUser.CheckData(answer, tUnitNum);
#ifdef USE_CUDA
/* GPU test */
bool gpuTest = true;
/* create tensors */
XTensor * sGPU = NewTensor(sOrder, sDimSize, X_FLOAT, 1.0F, 0);
XTensor * tGPU = NewTensor(sOrder, tDimSize, X_FLOAT, 1.0F, 0);
XTensor tUserGPU;
/* initialize variables */
sGPU->SetData(sData, sUnitNum);
tGPU->SetZeroAll();
/* call CopyIndexed function */
_CopyIndexed(sGPU, tGPU, dim, (int*)index->data, indexSize, tgtIndex, copyNum);
tUserGPU = CopyIndexed(*sGPU, dim, srcIndex, indexSize, tgtIndex, copyNum);
/* check results */
gpuTest = tGPU->CheckData(answer, tUnitNum) && tUserGPU.CheckData(answer, tUnitNum);
/* destroy variables */
delete s;
delete t;
delete index;
delete sGPU;
delete tGPU;
delete[] sDimSize;
delete[] tDimSize;
return cpuTest && gpuTest;
#else
/* destroy variables */
delete s;
delete t;
delete[] sDimSize;
delete[] tDimSize;
return cpuTest;
#endif // USE_CUDA
}
/*
case 5: copy indexed sub-tensors
In this case, (3, 2, 3) -> (3, 2, 2), dim = 2, indexSize = 1,
srcIndex = [0, 1], tgtIndex = [0, 2], copyNum = 2.
*/
bool TestCopyIndexed5()
{
/* a input tensor of size (3, 2, 3) */
int sOrder = 3;
int * sDimSize = new int[sOrder];
sDimSize[0] = 3;
sDimSize[1] = 2;
sDimSize[2] = 3;
int sUnitNum = 1;
for (int i = 0; i < sOrder; i++)
sUnitNum *= sDimSize[i];
/* a output tensor of size (3, 2, 2) */
int tOrder = 3;
int * tDimSize = new int[tOrder];
tDimSize[0] = 3;
tDimSize[1] = 2;
tDimSize[2] = 4;
int tUnitNum = 1;
for (int i = 0; i < tOrder; i++)
tUnitNum *= tDimSize[i];
DTYPE sData[3][2][3] = { { {0.0F, -1.0F, 2.0F},
{2.0F, 1.0F, 3.0F} },
{ {1.0F, 2.0F, 4.0F},
{3.0F, 1.0F, 2.0F}},
{ {-1.0F, 3.0F, 2.0F},
{1.0F, -1.0F, 0.0F} } };
DTYPE answer[3][2][4] = { { {0.0F, -1.0F, -1.0F, 2.0F},
{2.0F, 1.0F, 1.0F, 3.0F} },
{ {1.0F, 2.0F, 2.0F, 4.0F},
{3.0F, 1.0F, 1.0F, 2.0F}},
{ {-1.0F, 3.0F, 3.0F, 2.0F},
{1.0F, -1.0F, -1.0F, 0.0F} } };
int dim = 2;
int indexSize = 2;
int srcIndex[2] = {0, 1};
int tgtIndex[2] = {0, 2};
int copyNum = 2;
/* CPU test */
bool cpuTest = true;
/* create tensors */
XTensor * s = NewTensor(sOrder, sDimSize);
XTensor * t = NewTensor(tOrder, tDimSize);
XTensor tUser;
/* initialize variables */
s->SetData(sData, sUnitNum);
t->SetZeroAll();
/* call CopyIndexed function */
_CopyIndexed(s, t, dim, srcIndex, indexSize, tgtIndex, copyNum);
tUser = CopyIndexed(*s, dim, srcIndex, indexSize, tgtIndex, copyNum);
/* check results */
cpuTest = t->CheckData(answer, tUnitNum) && tUser.CheckData(answer, tUnitNum);
#ifdef USE_CUDA
/* GPU test */
bool gpuTest = true;
/* create tensors */
XTensor * sGPU = NewTensor(sOrder, sDimSize, X_FLOAT, 1.0F, 0);
XTensor * tGPU = NewTensor(sOrder, tDimSize, X_FLOAT, 1.0F, 0);
XTensor tUserGPU;
/* initialize variables */
sGPU->SetData(sData, sUnitNum);
tGPU->SetZeroAll();
/* call CopyIndexed function */
_CopyIndexed(sGPU, tGPU, dim, srcIndex, indexSize, tgtIndex, copyNum);
tUserGPU = CopyIndexed(*sGPU, dim, srcIndex, indexSize, tgtIndex, copyNum);
/* check results */
gpuTest = tGPU->CheckData(answer, tUnitNum) && tUserGPU.CheckData(answer, tUnitNum);
/* destroy variables */
delete s;
delete t;
delete sGPU;
delete tGPU;
delete[] sDimSize;
delete[] tDimSize;
return cpuTest && gpuTest;
#else
/* destroy variables */
delete s;
delete t;
delete[] sDimSize;
delete[] tDimSize;
return cpuTest;
#endif // USE_CUDA
}
/* other cases */ /* other cases */
/* /*
TODO!! TODO!!
...@@ -382,6 +611,24 @@ bool TestCopyIndexed() ...@@ -382,6 +611,24 @@ bool TestCopyIndexed()
else else
XPRINT(0, stdout, ">> case 3 passed!\n"); XPRINT(0, stdout, ">> case 3 passed!\n");
/* case 4 test */
caseFlag = TestCopyIndexed4();
if (!caseFlag) {
returnFlag = false;
XPRINT(0, stdout, ">> case 4 failed!\n");
}
else
XPRINT(0, stdout, ">> case 4 passed!\n");
/* case 5 test */
caseFlag = TestCopyIndexed5();
if (!caseFlag) {
returnFlag = false;
XPRINT(0, stdout, ">> case 5 failed!\n");
}
else
XPRINT(0, stdout, ">> case 5 passed!\n");
/* other cases test */ /* other cases test */
/* /*
TODO!! TODO!!
......
/* NiuTrans.Tensor - an open-source tensor library /* NiuTrans.Tensor - an open-source tensor library
* Copyright (C) 2017, Natural Language Processing Lab, Northestern University. * Copyright (C) 2017, Natural Language Processing Lab, Northestern University.
* All rights reserved. * All rights reserved.
* *
* Licensed under the Apache License, Version 2.0 (the "License"); * Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License. * you may not use this file except in compliance with the License.
* You may obtain a copy of the License at * You may obtain a copy of the License at
* *
* http://www.apache.org/licenses/LICENSE-2.0 * http://www.apache.org/licenses/LICENSE-2.0
* *
* Unless required by applicable law or agreed to in writing, software * Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS, * distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and * See the License for the specific language governing permissions and
* limitations under the License. * limitations under the License.
*/ */
/* /*
* $Created by: Xu Chen (email: hello_master1954@163.com) 2018-06-27 * $Created by: Xu Chen (email: hello_master1954@163.com) 2018-06-27
*/ */
#ifndef __TEST_COPYINDEXED_H__ #ifndef __TEST_COPYINDEXED_H__
#define __TEST_COPYINDEXED_H__ #define __TEST_COPYINDEXED_H__
......
/* NiuTrans.Tensor - an open-source tensor library /* NiuTrans.Tensor - an open-source tensor library
* Copyright (C) 2017, Natural Language Processing Lab, Northestern University. * Copyright (C) 2017, Natural Language Processing Lab, Northestern University.
* All rights reserved. * All rights reserved.
* *
* Licensed under the Apache License, Version 2.0 (the "License"); * Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License. * you may not use this file except in compliance with the License.
* You may obtain a copy of the License at * You may obtain a copy of the License at
* *
* http://www.apache.org/licenses/LICENSE-2.0 * http://www.apache.org/licenses/LICENSE-2.0
* *
* Unless required by applicable law or agreed to in writing, software * Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS, * distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and * See the License for the specific language governing permissions and
* limitations under the License. * limitations under the License.
*/ */
/* /*
* $Created by: Xu Chen (email: hello_master1954@163.com) 2018-07-31 * $Created by: Xu Chen (email: hello_master1954@163.com) 2018-09-18
*/ */
#include "../core/math/Unary.h" #include "../core/math/Unary.h"
#include "TCos.h" #include "TCos.h"
......
/* NiuTrans.Tensor - an open-source tensor library
* Copyright (C) 2017, Natural Language Processing Lab, Northestern University.
* All rights reserved.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
/*
* $Created by: Xu Chen (email: hello_master1954@163.com) 2018-09-17
*/
#include <math.h>
#include "TCrossEntropy.h"
#include "../core/math/ScaleAndShift.h"
namespace nts { // namespace nts(NiuTrans.Tensor)
/*
case 1: test CrossEntropy function.
loss = sum_{i} (-t_i * log(y_i))
where t_i is the gold standard and y_i is the model output.
*/
bool TestCrossEntropy1()
{
/* a tensor of size (1, 4) */
int order = 2;
int * dimSize = new int[order];
dimSize[0] = 1;
dimSize[1] = 4;
int unitNum = 1;
for (int i = 0; i < order; i++)
unitNum *= dimSize[i];
DTYPE outputData[4] = {0.25F, 0.25F, 0.25F, 0.25F};
DTYPE goldData[4] = {0.5F, 0.5F, 0.0F, 0.0F};
DTYPE answer = 1.3863F;
DTYPE error1;
DTYPE error2;
/* CPU test */
bool cpuTest = true;
/* create tensors */
XTensor * output = NewTensor(order, dimSize);
XTensor * gold = NewTensor(order, dimSize);
XTensor * loss = NewTensor1D(1);
/* initialize variables */
output->SetData(outputData, unitNum);
gold->SetData(goldData, unitNum);
/* call CrossEntropy function */
_CrossEntropyManual(output, gold, loss);
error2 = _CrossEntropy(output, gold, REDUCE_SUM);
error1 = loss->Get1D(0);
/* check results */
cpuTest = (fabs(error1 - answer) < 1e-4F &&
fabs(error2 - answer) < 1e-4F);
#ifdef USE_CUDA
/* GPU test */
bool gpuTest = true;
/* create tensor */
XTensor * outputGPU = NewTensor(order, dimSize, X_FLOAT, 1.0F, 0);
XTensor * goldGPU = NewTensor(order, dimSize, X_FLOAT, 1.0F, 0);
XTensor * lossGPU = NewTensor1D(1, X_FLOAT, 0);
/* Initialize variables */
outputGPU->SetData(outputData, unitNum);
goldGPU->SetData(goldData, unitNum);
/* call CrossEntropy function */
_CrossEntropyManual(outputGPU, goldGPU, lossGPU);
error1 = lossGPU->Get1D(0);
error2 = _CrossEntropy(outputGPU, goldGPU, REDUCE_SUM);
/* check results */
gpuTest = (fabs(error1 - answer) < 1e-4F &&
fabs(error2 - answer) < 1e-4F);
/* destroy variables */
delete output;
delete gold;
delete loss;
delete outputGPU;
delete goldGPU;
delete lossGPU;
delete[] dimSize;
return cpuTest && gpuTest;
#else
/* destroy variables */
delete output;
delete gold;
delete loss;
delete[] dimSize;
return cpuTest;
#endif // USE_CUDA
}
/*
case 2: test CrossEntropy function.
loss = sum_{i} (-t_i * log(y_i))
where t_i is the gold standard and y_i is the model output.
*/
bool TestCrossEntropy2()
{
/* a tensor of size (4, 10) */
int order = 2;
int * dimSize = new int[order];
dimSize[0] = 4;
dimSize[1] = 10;
int unitNum = 1;
for (int i = 0; i < order; i++)
unitNum *= dimSize[i];
DTYPE outputData[4][10] = { {0.5F, 2.6F, 0.3F, 1.7F, 0.6F,
0.1F, 0.7F, 1.3F, 0.4F, 0.6F},
{0.5F, 1.6F, 0.2F, 1.1F, 0.3F,
0.8F, 2.2F, 0.1F, 0.1F, 0.8F},
{0.2F, 0.5F, 1.1F, 1.2F, 0.6F,
0.1F, 0.2F, 0.7F, 0.5F, 0.7F},
{0.2F, 1.7F, 0.6F, 1.5F, 0.8F,
0.1F, 0.8F, 0.1F, 0.6F, 0.2F} };
DTYPE answer1 = 4.3275F;
DTYPE answer2 = 1.0818F;
DTYPE error1;
DTYPE error2;
DTYPE error3;
DTYPE error4;
/* CPU test */
bool cpuTest = true;
/* create tensors */
XTensor * output = NewTensor(order, dimSize);
XTensor * gold = NewTensor(order, dimSize);
/* initialize variables */
output->SetData(outputData, unitNum);
gold->SetZeroAll();
gold->Set2D(1.0F, 0, 9);
gold->Set2D(1.0F, 1, 7);
gold->Set2D(1.0F, 2, 2);
gold->Set2D(1.0F, 3, 9);
/* call CrossEntropy function */
error1 = _CrossEntropy(output, gold, REDUCE_SUM);
error2 = _CrossEntropy(output, gold, REDUCE_MEAN);
error3 = _CrossEntropyManual(output, gold, REDUCE_SUM);
error4 = _CrossEntropyManual(output, gold, REDUCE_MEAN);
/* check results */
cpuTest = (fabs(error1 - answer1) < 1e-4F &&
fabs(error2 - answer2) < 1e-4F &&
fabs(error3 - answer1) < 1e-4F &&
fabs(error4 - answer2) < 1e-4F);
#ifdef USE_CUDA
/* GPU test */
bool gpuTest = true;
/* create tensor */
XTensor * outputGPU = NewTensor(order, dimSize, X_FLOAT, 1.0F, 0);
XTensor * goldGPU = NewTensor(order, dimSize, X_FLOAT, 1.0F, 0);
/* Initialize variables */
outputGPU->SetData(outputData, unitNum);
goldGPU->SetZeroAll();
goldGPU->Set2D(1.0F, 0, 9);
goldGPU->Set2D(1.0F, 1, 7);
goldGPU->Set2D(1.0F, 2, 2);
goldGPU->Set2D(1.0F, 3, 9);
/* call CrossEntropy function */
error1 = _CrossEntropy(outputGPU, goldGPU, REDUCE_SUM);
error2 = _CrossEntropy(outputGPU, goldGPU, REDUCE_MEAN);
error3 = _CrossEntropyManual(outputGPU, goldGPU, REDUCE_SUM);
error4 = _CrossEntropyManual(outputGPU, goldGPU, REDUCE_MEAN);
/* check results */
gpuTest = (fabs(error1 - answer1) < 1e-4F &&
fabs(error2 - answer2) < 1e-4F &&
fabs(error3 - answer1) < 1e-4F &&
fabs(error4 - answer2) < 1e-4F);
/* destroy variables */
delete output;
delete gold;
delete outputGPU;
delete goldGPU;
delete[] dimSize;
return cpuTest && gpuTest;
#else
/* destroy variables */
delete output;
delete gold;
delete[] dimSize;
return cpuTest;
#endif // USE_CUDA
}
/*
case 3: test CrossEntropy function.
loss = sum_{i} (-t_i * log(y_i))
where t_i is the gold standard and y_i is the model output.
In this case, I compute the cross entropy with weight.
*/
bool TestCrossEntropy3()
{
/* a output tensor of size (4, 4) */
int order = 2;
int * dimSize = new int[order];
dimSize[0] = 4;
dimSize[1] = 4;
int unitNum = 1;
for (int i = 0; i < order; i++)
unitNum *= dimSize[i];
/* a weight tensor of size (4) */
int wOrder = 1;
int * wDimSize = new int[wOrder];
wDimSize[0] = 4;
int wUnitNum = 1;
for (int i = 0; i < wOrder; i++)
wUnitNum *= wDimSize[i];
DTYPE outputData[4][4] = { {0.3F, 0.2F, 0.3F, 0.2F},
{0.1F, 0.4F, 0.2F, 0.3F},
{0.7F, 0.1F, 0.1F, 0.1F},
{0.5F, 0.1F, 0.2F, 0.2F} };
DTYPE weightData[4] = {2.0F, 1.0F, 5.0F, 0.0F};
DTYPE answer[4] = {2.4079F, 0.9163F, 11.5129F, 0.0F};
/* CPU test */
bool cpuTest = true;
/* create tensors */
XTensor * output = NewTensor(order, dimSize);
XTensor * gold = NewTensor(order, dimSize);
XTensor * loss = NewTensor1D(4);
XTensor * weight = NewTensor(wOrder, wDimSize);
/* initialize variables */
output->SetData(outputData, unitNum);
weight->SetData(weightData, wUnitNum);
gold->SetZeroAll();
gold->Set2D(1.0F, 0, 0);
gold->Set2D(1.0F, 1, 1);
gold->Set2D(1.0F, 2, 2);
gold->Set2D(1.0F, 3, 3);
/* call CrossEntropy function */
_CrossEntropyManual(output, gold, loss, weight);
/* check results */
cpuTest = loss->CheckData(answer, 4, 1e-4F);
#ifdef USE_CUDA
/* GPU test */
bool gpuTest = true;
/* create tensor */
XTensor * outputGPU = NewTensor(order, dimSize, X_FLOAT, 1.0F, 0);
XTensor * goldGPU = NewTensor(order, dimSize, X_FLOAT, 1.0F, 0);
XTensor * lossGPU = NewTensor1D(4, X_FLOAT, 0);
XTensor * weightGPU = NewTensor(wOrder, wDimSize, X_FLOAT, 1.0F, 0);
/* Initialize variables */
outputGPU->SetData(outputData, unitNum);
weightGPU->SetData(weightData, wUnitNum);
goldGPU->SetZeroAll();
goldGPU->Set2D(1.0F, 0, 0);
goldGPU->Set2D(1.0F, 1, 1);
goldGPU->Set2D(1.0F, 2, 2);
goldGPU->Set2D(1.0F, 3, 3);
/* call CrossEntropy function */
_CrossEntropyManual(outputGPU, goldGPU, lossGPU, weightGPU);
/* check results */
gpuTest = lossGPU->CheckData(answer, 4, 1e-4F);
/* destroy variables */
delete output;
delete gold;
delete loss;
delete weight;
delete outputGPU;
delete goldGPU;
delete lossGPU;
delete weightGPU;
delete[] dimSize;
return cpuTest && gpuTest;
#else
/* destroy variables */
delete output;
delete gold;
delete loss;
delete weight;
delete[] dimSize;
return cpuTest;
#endif // USE_CUDA
}
/*
case 4: test CrossEntropy function.
loss = sum_{i} (-t_i * log(y_i))
where t_i is the gold standard and y_i is the model output.
*/
bool TestCrossEntropy4()
{
/* a tensor of size (10, 1) */
int order = 2;
int * dimSize = new int[order];
dimSize[0] = 10;
dimSize[1] = 1;
int unitNum = 1;
for (int i = 0; i < order; i++)
unitNum *= dimSize[i];
/* CPU test */
bool cpuTest = true;
DTYPE answer = 0.0F;
DTYPE error;
/* create tensors */
XTensor * output = NewTensor(order, dimSize);
XTensor * gold = NewTensor(order, dimSize);
/* initialize variables */
output->SetZeroAll();
gold->SetZeroAll();
_ScaleAndShiftMe(output, 1, 1);
_ScaleAndShiftMe(gold, 1, 2);
/* call CrossEntropy function */
error = _CrossEntropyManual(output, gold);
/* check results */
cpuTest = (fabs(error - answer) < 1e-4);
#ifdef USE_CUDA
/* GPU test */
bool gpuTest = true;
/* create tensor */
XTensor * outputGPU = NewTensor(order, dimSize, X_FLOAT, 1.0F, 0);
XTensor * goldGPU = NewTensor(order, dimSize, X_FLOAT, 1.0F, 0);
/* Initialize variables */
outputGPU->SetZeroAll();
goldGPU->SetZeroAll();
_ScaleAndShiftMe(outputGPU, 1, 1);
_ScaleAndShiftMe(goldGPU, 1, 2);
/* call CrossEntropy function */
error = _CrossEntropyManual(outputGPU, goldGPU);
/* check results */
gpuTest = (fabs(error - answer) < 1e-4);
/* destroy variables */
delete output;
delete gold;
delete outputGPU;
delete goldGPU;
delete[] dimSize;
return cpuTest && gpuTest;
#else
/* destroy variables */
delete output;
delete gold;
delete[] dimSize;
return cpuTest;
#endif // USE_CUDA
}
/* other cases */
/*
TODO!!
*/
/* test for CrossEntropy Function */
bool TestCrossEntropy()
{
XPRINT(0, stdout, "[TEST CrossEntropy] compute the cross entropy loss and backward gradient \n");
bool returnFlag = true, caseFlag = true;
/* case 1 test */
caseFlag = TestCrossEntropy1();
if (!caseFlag) {
returnFlag = false;
XPRINT(0, stdout, ">> case 1 failed!\n");
}
else
XPRINT(0, stdout, ">> case 1 passed!\n");
/* case 2 test */
caseFlag = TestCrossEntropy2();
if (!caseFlag) {
returnFlag = false;
XPRINT(0, stdout, ">> case 2 failed!\n");
}
else
XPRINT(0, stdout, ">> case 2 passed!\n");
/* case 3 test */
caseFlag = TestCrossEntropy3();
if (!caseFlag) {
returnFlag = false;
XPRINT(0, stdout, ">> case 3 failed!\n");
}
else
XPRINT(0, stdout, ">> case 3 passed!\n");
/* case 4 test */
caseFlag = TestCrossEntropy4();
if (!caseFlag) {
returnFlag = false;
XPRINT(0, stdout, ">> case 4 failed!\n");
}
else
XPRINT(0, stdout, ">> case 4 passed!\n");
///* other cases test */
///*
//TODO!!
//*/
if (returnFlag) {
XPRINT(0, stdout, ">> All Passed!\n");
}
else
XPRINT(0, stdout, ">> Failed!\n");
XPRINT(0, stdout, "\n");
return returnFlag;
}
} // namespace nts(NiuTrans.Tensor)
/* NiuTrans.Tensor - an open-source tensor library
* Copyright (C) 2017, Natural Language Processing Lab, Northestern University.
* All rights reserved.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
/*
* $Created by: Xu Chen (email: hello_master1954@163.com) 2018-09-17
*/
#ifndef __TEST_CROSSENTROPY_H__
#define __TEST_CROSSENTROPY_H__
#include "../function/CrossEntropy.h"
namespace nts { // namespace nts(NiuTrans.Tensor)
/* test for CrossEntropy Function */
bool TestCrossEntropy();
} // namespace nts(NiuTrans.Tensor)
#endif // __TEST_CROSSENTROPY_H__
...@@ -259,7 +259,7 @@ bool TestDivDim2() ...@@ -259,7 +259,7 @@ bool TestDivDim2()
/* test for DivDim Function */ /* test for DivDim Function */
bool TestDivDim() bool TestDivDim()
{ {
XPRINT(0, stdout, "[TEST DIVDIM] tensor division c(i) = a/b + \alpha * c by broadcasting\n"); XPRINT(0, stdout, "[TEST DIVDIM] tensor division c(i) = a/b + \\alpha * c by broadcasting\n");
bool returnFlag = true, caseFlag = true; bool returnFlag = true, caseFlag = true;
/* case 1 test */ /* case 1 test */
......
...@@ -54,10 +54,10 @@ bool TestDropout1() ...@@ -54,10 +54,10 @@ bool TestDropout1()
y->SetZeroAll(); y->SetZeroAll();
/* call Dropout function */ /* call Dropout function */
float drop_prob = 0.2F; float dropProb = 0.2F;
int seed = 20; int seed = 20;
_Dropout(x, y, seed, drop_prob); _Dropout(x, y, seed, dropProb);
yUser = Dropout(*x, drop_prob); yUser = Dropout(*x, dropProb);
/* check result */ /* check result */
int zeroNum1 = 0; int zeroNum1 = 0;
...@@ -74,9 +74,9 @@ bool TestDropout1() ...@@ -74,9 +74,9 @@ bool TestDropout1()
} }
printf("CPU Test:\n"); printf("CPU Test:\n");
printf("In tensor y, there are %d units.\n", unitNum); printf("In tensor y, there are %d units.\n", unitNum);
printf("There are %d zero units by Dropout layer with probability %.2f.\n", zeroNum1, drop_prob); printf("There are %d zero units by Dropout layer with probability %.2f.\n", zeroNum1, dropProb);
printf("In tensor yUser, there are %d units.\n", unitNum); printf("In tensor yUser, there are %d units.\n", unitNum);
printf("There are %d zero units by Dropout layer with default probability %.2f.\n", zeroNum2, drop_prob); printf("There are %d zero units by Dropout layer with default probability %.2f.\n", zeroNum2, dropProb);
#ifdef USE_CUDA #ifdef USE_CUDA
/* GPU test */ /* GPU test */
...@@ -92,8 +92,8 @@ bool TestDropout1() ...@@ -92,8 +92,8 @@ bool TestDropout1()
yGPU->SetZeroAll(); yGPU->SetZeroAll();
/* call Dropout function */ /* call Dropout function */
_Dropout(xGPU, yGPU, seed, drop_prob); _Dropout(xGPU, yGPU, seed, dropProb);
yUserGPU = Dropout(*xGPU, drop_prob); yUserGPU = Dropout(*xGPU, dropProb);
/* check result */ /* check result */
zeroNum1 = 0; zeroNum1 = 0;
...@@ -110,9 +110,9 @@ bool TestDropout1() ...@@ -110,9 +110,9 @@ bool TestDropout1()
} }
printf("CPU Test:\n"); printf("CPU Test:\n");
printf("In tensor y, there are %d units.\n", unitNum); printf("In tensor y, there are %d units.\n", unitNum);
printf("There are %d zero units by Dropout layer with probability %.2f.\n", zeroNum1, drop_prob); printf("There are %d zero units by Dropout layer with probability %.2f.\n", zeroNum1, dropProb);
printf("In tensor yUser, there are %d units.\n", unitNum); printf("In tensor yUser, there are %d units.\n", unitNum);
printf("There are %d zero units by Dropout layer with default probability %.2f.\n", zeroNum2, drop_prob); printf("There are %d zero units by Dropout layer with default probability %.2f.\n", zeroNum2, dropProb);
/* destroy variables */ /* destroy variables */
delete x; delete x;
...@@ -163,10 +163,10 @@ bool TestDropout2() ...@@ -163,10 +163,10 @@ bool TestDropout2()
_SetDataFixedFloat(dedy, 1.5F); _SetDataFixedFloat(dedy, 1.5F);
/* call Dropout function */ /* call Dropout function */
float drop_prob = 0.5F; float dropProb = 0.5F;
int seed = 1; int seed = 1;
_Dropout(x, y, seed, drop_prob); _Dropout(x, y, seed, dropProb);
_DropoutBackward(y, x, dedy, dedx, 1, drop_prob); _DropoutBackward(y, x, dedy, dedx, 1, dropProb);
/* check result */ /* check result */
y->Dump(stderr, "y"); y->Dump(stderr, "y");
...@@ -189,8 +189,8 @@ bool TestDropout2() ...@@ -189,8 +189,8 @@ bool TestDropout2()
_SetDataFixedFloat(dedyGPU, 1.5F); _SetDataFixedFloat(dedyGPU, 1.5F);
/* call Dropout function */ /* call Dropout function */
_Dropout(xGPU, yGPU, seed, drop_prob); _Dropout(xGPU, yGPU, seed, dropProb);
_DropoutBackward(yGPU, xGPU, dedyGPU, dedxGPU, 1, drop_prob); _DropoutBackward(yGPU, xGPU, dedyGPU, dedxGPU, 1, dropProb);
/* check result */ /* check result */
yGPU->Dump(stderr, "yGPU"); yGPU->Dump(stderr, "yGPU");
......
/* NiuTrans.Tensor - an open-source tensor library
* Copyright (C) 2017, Natural Language Processing Lab, Northestern University.
* All rights reserved.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
/*
* $Created by: Xu Chen (email: hello_master1954@163.com) 2018-09-18
*/
#include "TGather.h"
namespace nts { // namespace nts(NiuTrans.Tensor)
/*
case 1: gather indexed sub-tensors
In this case, (3, 2, 3) -> (3, 2, 2), dim = 2,
srcIndex = [0, 2], indexSize = 2
*/
bool TestGather1()
{
/* a input tensor of size (3, 2, 3) */
int sOrder = 3;
int * sDimSize = new int[sOrder];
sDimSize[0] = 3;
sDimSize[1] = 2;
sDimSize[2] = 3;
int sUnitNum = 1;
for (int i = 0; i < sOrder; i++)
sUnitNum *= sDimSize[i];
/* a output tensor of size (3, 2, 2) */
int tOrder = 3;
int * tDimSize = new int[tOrder];
tDimSize[0] = 3;
tDimSize[1] = 2;
tDimSize[2] = 2;
int tUnitNum = 1;
for (int i = 0; i < tOrder; i++)
tUnitNum *= tDimSize[i];
DTYPE sData[3][2][3] = { { {0.0F, -1.0F, 2.0F},
{2.0F, 1.0F, 3.0F} },
{ {1.0F, 2.0F, 4.0F},
{3.0F, 1.0F, 2.0F}},
{ {-1.0F, 3.0F, 2.0F},
{1.0F, -1.0F, 0.0F} } };
DTYPE answer[3][2][2] = { { {0.0F, 2.0F},
{2.0F, 3.0F} },
{ {1.0F, 4.0F},
{3.0F, 2.0F}},
{ {-1.0F, 2.0F},
{1.0F, 0.0F} } };
int dim = 2;
int indexSize = 2;
int srcIndex[2] = {0, 2};
/* CPU test */
bool cpuTest = true;
/* create tensors */
XTensor * s = NewTensor(sOrder, sDimSize);
XTensor * t = NewTensor(tOrder, tDimSize);
XTensor tUser;
/* initialize variables */
s->SetData(sData, sUnitNum);
t->SetZeroAll();
/* call Gather function */
_Gather(s, t, dim, srcIndex, indexSize);
tUser = Gather(*s, dim, srcIndex, indexSize);
/* check results */
cpuTest = t->CheckData(answer, tUnitNum) && tUser.CheckData(answer, tUnitNum);
#ifdef USE_CUDA
/* GPU test */
bool gpuTest = true;
/* create tensors */
XTensor * sGPU = NewTensor(sOrder, sDimSize, X_FLOAT, 1.0F, 0);
XTensor * tGPU = NewTensor(sOrder, tDimSize, X_FLOAT, 1.0F, 0);
XTensor tUserGPU;
/* initialize variables */
sGPU->SetData(sData, sUnitNum);
tGPU->SetZeroAll();
/* call Gather function */
_Gather(sGPU, tGPU, dim, srcIndex, indexSize);
tUserGPU = Gather(*sGPU, dim, srcIndex, indexSize);
/* check results */
gpuTest = tGPU->CheckData(answer, tUnitNum) && tUserGPU.CheckData(answer, tUnitNum);
/* destroy variables */
delete s;
delete t;
delete sGPU;
delete tGPU;
delete[] sDimSize;
delete[] tDimSize;
return cpuTest && gpuTest;
#else
/* destroy variables */
delete s;
delete t;
delete[] sDimSize;
delete[] tDimSize;
return cpuTest;
#endif // USE_CUDA
}
/*
case 2: gather indexed sub-tensors
In this case, (3, 2, 3) -> (3, 1, 3), dim = 1,
srcIndex = [0], indexSize = 1
*/
bool TestGather2()
{
/* a input tensor of size (3, 2, 3) */
int sOrder = 3;
int * sDimSize = new int[sOrder];
sDimSize[0] = 3;
sDimSize[1] = 2;
sDimSize[2] = 3;
int sUnitNum = 1;
for (int i = 0; i < sOrder; i++)
sUnitNum *= sDimSize[i];
/* a output tensor of size (3, 1, 3) */
int tOrder = 3;
int * tDimSize = new int[tOrder];
tDimSize[0] = 3;
tDimSize[1] = 1;
tDimSize[2] = 3;
int tUnitNum = 1;
for (int i = 0; i < tOrder; i++)
tUnitNum *= tDimSize[i];
DTYPE sData[3][2][3] = { { {0.0F, -1.0F, 2.0F},
{2.0F, 1.0F, 3.0F} },
{ {1.0F, 2.0F, 4.0F},
{3.0F, 1.0F, 2.0F}},
{ {-1.0F, 3.0F, 2.0F},
{1.0F, -1.0F, 0.0F} } };
DTYPE answer[3][1][3] = { { {0.0F, -1.0F, 2.0F} },
{ {1.0F, 2.0F, 4.0F} } ,
{ {-1.0F, 3.0F, 2.0F} } };
int dim = 1;
int indexSize = 1;
int srcIndex[2] = {0};
/* CPU test */
bool cpuTest = true;
/* create tensors */
XTensor * s = NewTensor(sOrder, sDimSize);
XTensor * t = NewTensor(tOrder, tDimSize);
XTensor tUser;
/* initialize variables */
s->SetData(sData, sUnitNum);
t->SetZeroAll();
/* call Gather function */
_Gather(s, t, dim, srcIndex, indexSize);
tUser = Gather(*s, dim, srcIndex, indexSize);
/* check results */
cpuTest = t->CheckData(answer, tUnitNum) && tUser.CheckData(answer, tUnitNum);
#ifdef USE_CUDA
/* GPU test */
bool gpuTest = true;
/* create tensors */
XTensor * sGPU = NewTensor(sOrder, sDimSize, X_FLOAT, 1.0F, 0);
XTensor * tGPU = NewTensor(sOrder, tDimSize, X_FLOAT, 1.0F, 0);
XTensor tUserGPU;
/* initialize variables */
sGPU->SetData(sData, sUnitNum);
tGPU->SetZeroAll();
/* call Gather function */
_Gather(sGPU, tGPU, dim, srcIndex, indexSize);
tUserGPU = Gather(*sGPU, dim, srcIndex, indexSize);
/* check results */
gpuTest = tGPU->CheckData(answer, tUnitNum) && tUserGPU.CheckData(answer, tUnitNum);
/* destroy variables */
delete s;
delete t;
delete sGPU;
delete tGPU;
delete[] sDimSize;
delete[] tDimSize;
return cpuTest && gpuTest;
#else
/* destroy variables */
delete s;
delete t;
delete[] sDimSize;
delete[] tDimSize;
return cpuTest;
#endif // USE_CUDA
}
/* other cases */
/*
TODO!!
*/
/* test for Gather Function */
bool TestGather()
{
XPRINT(0, stdout, "[TEST Gather] gather indexed sub-tensors \n");
bool returnFlag = true, caseFlag = true;
/* case 1 test */
caseFlag = TestGather1();
if (!caseFlag) {
returnFlag = false;
XPRINT(0, stdout, ">> case 1 failed!\n");
}
else
XPRINT(0, stdout, ">> case 1 passed!\n");
/* case 2 test */
caseFlag = TestGather2();
if (!caseFlag) {
returnFlag = false;
XPRINT(0, stdout, ">> case 2 failed!\n");
}
else
XPRINT(0, stdout, ">> case 2 passed!\n");
/* other cases test */
/*
TODO!!
*/
if (returnFlag) {
XPRINT(0, stdout, ">> All Passed!\n");
}
else
XPRINT(0, stdout, ">> Failed!\n");
XPRINT(0, stdout, "\n");
return returnFlag;
}
} // namespace nts(NiuTrans.Tensor)
/* NiuTrans.Tensor - an open-source tensor library
* Copyright (C) 2017, Natural Language Processing Lab, Northestern University.
* All rights reserved.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
/*
* $Created by: Xu Chen (email: hello_master1954@163.com) 2018-09-18
*/
#ifndef __TEST_GATHER_H__
#define __TEST_GATHER_H__
#include "../core/movement/Gather.h"
namespace nts { // namespace nts(NiuTrans.Tensor)
/* test for Gather Function */
bool TestGather();
} // namespace nts(NiuTrans.Tensor)
#endif // __TEST_GATHER_H__
/* NiuTrans.Tensor - an open-source tensor library /* NiuTrans.Tensor - an open-source tensor library
* Copyright (C) 2017, Natural Language Processing Lab, Northestern University. * Copyright (C) 2017, Natural Language Processing Lab, Northestern University.
* All rights reserved. * All rights reserved.
* *
* Licensed under the Apache License, Version 2.0 (the "License"); * Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License. * you may not use this file except in compliance with the License.
* You may obtain a copy of the License at * You may obtain a copy of the License at
* *
* http://www.apache.org/licenses/LICENSE-2.0 * http://www.apache.org/licenses/LICENSE-2.0
* *
* Unless required by applicable law or agreed to in writing, software * Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS, * distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and * See the License for the specific language governing permissions and
* limitations under the License. * limitations under the License.
*/ */
/* /*
* $Created by: LI Yinqiao (li.yin.qiao.2012@hotmail.com) 2018-04-30 * $Created by: LI Yinqiao (li.yin.qiao.2012@hotmail.com) 2018-04-30
*/ */
#ifndef __TEST_LOSS_H__ #ifndef __TEST_LOSS_H__
#define __TEST_LOSS_H__ #define __TEST_LOSS_H__
......
...@@ -251,7 +251,7 @@ bool TestMultiplyDim2() ...@@ -251,7 +251,7 @@ bool TestMultiplyDim2()
/* test for MultiplyDim Function */ /* test for MultiplyDim Function */
bool TestMultiplyDim() bool TestMultiplyDim()
{ {
XPRINT(0, stdout, "[TEST MULTIPLYDIM] tensor multiplication c = a * b + \alpha * c by broadcasting\n"); XPRINT(0, stdout, "[TEST MULTIPLYDIM] tensor multiplication c = a * b + \\alpha * c by broadcasting\n");
bool returnFlag = true, caseFlag = true; bool returnFlag = true, caseFlag = true;
/* case 1 test */ /* case 1 test */
......
/* NiuTrans.Tensor - an open-source tensor library
* Copyright (C) 2017, Natural Language Processing Lab, Northestern University.
* All rights reserved.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
/*
* $Created by: Xu Chen (email: hello_master1954@163.com) 2018-09-27
*/
#include "TReduceSumAll.h"
#include <math.h>
namespace nts { // namespace nts(NiuTrans.Tensor)
/*
case 1: test ReduceSumAll function
sum all the items of the tensor
*/
bool TestReduceSumAll1()
{
/* a tensor of size (2, 4) */
int sOrder = 2;
int * sDimSize = new int[sOrder];
sDimSize[0] = 2;
sDimSize[1] = 4;
int sUnitNum = 1;
for (int i = 0; i < sOrder; i++)
sUnitNum *= sDimSize[i];
DTYPE sData[2][4] = { {0.0F, 1.0F, 2.0F, 3.0F},
{4.0F, 5.0F, 6.0F, 7.0F} };
DTYPE summation;
DTYPE answer = 28.0F;
/* CPU test */
bool cpuTest = true;
/* create tensors */
XTensor * s = NewTensor(sOrder, sDimSize);
/* initialize variables */
s->SetData(sData, sUnitNum);
/* call ReduceSumAll function */
summation = _ReduceSumAll(s);
/* check results */
cpuTest = (fabs(answer - summation) < 1e-4F);
#ifdef USE_CUDA
/* GPU test */
bool gpuTest = true;
/* create tensors */
XTensor * sGPU = NewTensor(sOrder, sDimSize, X_FLOAT, 1.0F, 0);
/* initialize variables */
sGPU->SetData(sData, sUnitNum);
/* call ReduceSumAll function */
summation = _ReduceSumAll(sGPU);
/* check results */
gpuTest = (fabs(answer - summation) < 1e-4F);
/* destroy variables */
delete s;
delete sGPU;
delete[] sDimSize;
return cpuTest && gpuTest;
#else
/* destroy variables */
delete s;
delete[] sDimSize;
return cpuTest;
#endif // USE_CUDA
}
/* other cases */
/*
TODO!!
*/
/* test for ReduceSumAll Function */
bool TestReduceSumAll()
{
XPRINT(0, stdout, "[TEST ReduceSumAll] sum the items along a dimension of the tensor.\n");
bool returnFlag = true, caseFlag = true;
/* case 1 test */
caseFlag = TestReduceSumAll1();
if (!caseFlag) {
returnFlag = false;
XPRINT(0, stdout, ">> case 1 failed!\n");
}
else
XPRINT(0, stdout, ">> case 1 passed!\n");
/* other cases test */
/*
TODO!!
*/
if (returnFlag) {
XPRINT(0, stdout, ">> All Passed!\n");
}
else
XPRINT(0, stdout, ">> Failed!\n");
XPRINT(0, stdout, "\n");
return returnFlag;
}
} // namespace nts(NiuTrans.Tensor)
/* NiuTrans.Tensor - an open-source tensor library
* Copyright (C) 2017, Natural Language Processing Lab, Northestern University.
* All rights reserved.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
/*
* $Created by: Xu Chen (email: hello_master1954@163.com) 2018-09-27
*/
#ifndef __TEST_REDUCESUMALL_H__
#define __TEST_REDUCESUMALL_H__
#include "../core/reduce/ReduceSumAll.h"
namespace nts { // namespace nts(NiuTrans.Tensor)
/* test for ReduceSumAll Function */
bool TestReduceSumAll();
} // namespace nts(NiuTrans.Tensor)
#endif // __TEST_REDUCESUMALL_H__
/* NiuTrans.Tensor - an open-source tensor library /* NiuTrans.Tensor - an open-source tensor library
* Copyright (C) 2017, Natural Language Processing Lab, Northestern University. * Copyright (C) 2017, Natural Language Processing Lab, Northestern University.
* All rights reserved. * All rights reserved.
* *
* Licensed under the Apache License, Version 2.0 (the "License"); * Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License. * you may not use this file except in compliance with the License.
* You may obtain a copy of the License at * You may obtain a copy of the License at
* *
* http://www.apache.org/licenses/LICENSE-2.0 * http://www.apache.org/licenses/LICENSE-2.0
* *
* Unless required by applicable law or agreed to in writing, software * Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS, * distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and * See the License for the specific language governing permissions and
* limitations under the License. * limitations under the License.
*/ */
/* /*
* $Created by: Xu Chen (email: hello_master1954@163.com) 2018-07-06 * $Created by: Xu Chen (email: hello_master1954@163.com) 2018-07-06
*/ */
#include "TSetData.h" #include "TSetData.h"
#include "../core/getandset/SetData.h"
namespace nts { // namespace nts(NiuTrans.Tensor) namespace nts { // namespace nts(NiuTrans.Tensor)
...@@ -80,6 +81,327 @@ bool TestSetData1() ...@@ -80,6 +81,327 @@ bool TestSetData1()
#endif // USE_CUDA #endif // USE_CUDA
} }
/*
case 2: test SetDataIndexed function.
modify data items along with a given dimension.
*/
bool TestSetData2()
{
/* a input tensor of size (2, 4) */
int sOrder = 2;
int * sDimSize = new int[sOrder];
sDimSize[0] = 2;
sDimSize[1] = 4;
int sUnitNum = 1;
for (int i = 0; i < sOrder; i++)
sUnitNum *= sDimSize[i];
/* a data tensor of size (4) for GPU test */
int dataOrder = 1;
int * dataDimSize = new int[dataOrder];
dataDimSize[0] = 4;
int dataUnitNum = 1;
for (int i = 0; i < dataOrder; i++)
dataUnitNum *= dataDimSize[i];
DTYPE data[4] = {0.0F, 1.0F, 2.0F, 3.0F};
DTYPE answer[2][4] = { {1.0F, 1.0F, 1.0F, 1.0F},
{0.0F, 1.0F, 2.0F, 3.0F} };
/* CPU test */
bool cpuTest = true;
/* create tensors */
XTensor * s = NewTensor(sOrder, sDimSize);
XTensor * modify = NewTensor(dataOrder, dataDimSize);
/* Initialize variables */
_SetDataFixedFloat(s, 1.0F);
modify->SetData(data, dataUnitNum);
/* call SetDataIndexed function */
_SetDataIndexed(s, modify, 0, 1);
/* check results */
cpuTest = s->CheckData(answer, sUnitNum, 1e-5F);
#ifdef USE_CUDA
/* GPU test */
bool gpuTest = true;
/* create tensors */
XTensor * sGPU = NewTensor(sOrder, sDimSize, X_FLOAT, 1.0F, 0);
XTensor * modifyGPU = NewTensor(dataOrder, dataDimSize, X_FLOAT, 1.0F, 0);
/* Initialize variables */
_SetDataFixedFloat(sGPU, 1.0F);
modifyGPU->SetData(data, dataUnitNum);
/* call SetDataIndexed function */
_SetDataIndexed(sGPU, modifyGPU, 0, 1);
gpuTest = sGPU->CheckData(answer, sUnitNum, 1e-5F);
/* destroy variables */
delete s;
delete modify;
delete sGPU;
delete modifyGPU;
delete[] sDimSize;
return cpuTest && gpuTest;
#else
/* destroy variables */
delete s;
delete modify;
delete[] sDimSize;
return cpuTest;
#endif // USE_CUDA
}
/*
case 3: test SetDataIndexed function.
modify data items along with a given dimension.
*/
bool TestSetData3()
{
/* a input tensor of size (2, 4, 3) */
int sOrder = 3;
int * sDimSize = new int[sOrder];
sDimSize[0] = 2;
sDimSize[1] = 4;
sDimSize[2] = 3;
int sUnitNum = 1;
for (int i = 0; i < sOrder; i++)
sUnitNum *= sDimSize[i];
/* a data tensor of size (2, 3) for GPU test */
int dataOrder = 2;
int * dataDimSize = new int[dataOrder];
dataDimSize[0] = 2;
dataDimSize[1] = 3;
int dataUnitNum = 1;
for (int i = 0; i < dataOrder; i++)
dataUnitNum *= dataDimSize[i];
DTYPE data[2][3] = { {0.0F, 1.0F, 2.0F},
{3.0F, 4.0F, 5.0F} };
DTYPE answer[2][4][3] = { { {1.0F, 1.0F, 1.0F},
{0.0F, 1.0F, 2.0F},
{1.0F, 1.0F, 1.0F},
{1.0F, 1.0F, 1.0F} },
{ {1.0F, 1.0F, 1.0F},
{3.0F, 4.0F, 5.0F},
{1.0F, 1.0F, 1.0F},
{1.0F, 1.0F, 1.0F} } };
/* CPU test */
bool cpuTest = true;
/* create tensors */
XTensor * s = NewTensor(sOrder, sDimSize);
XTensor * modify = NewTensor(dataOrder, dataDimSize);
/* Initialize variables */
_SetDataFixedFloat(s, 1.0F);
modify->SetData(data, dataUnitNum);
/* call SetDataIndexed function */
_SetDataFixedFloat(s, 1.0F);
_SetDataIndexed(s, modify, 1, 1);
/* check results */
cpuTest = s->CheckData(answer, sUnitNum, 1e-5F);
#ifdef USE_CUDA
/* GPU test */
bool gpuTest = true;
/* create tensors */
XTensor * sGPU = NewTensor(sOrder, sDimSize, X_FLOAT, 1.0F, 0);
XTensor * modifyGPU = NewTensor(dataOrder, dataDimSize, X_FLOAT, 1.0F, 0);
/* Initialize variables */
_SetDataFixedFloat(sGPU, 1.0F);
modifyGPU->SetData(data, dataUnitNum);
/* call SetDataIndexed function */
_SetDataIndexed(sGPU, modifyGPU, 1, 1);
gpuTest = sGPU->CheckData(answer, sUnitNum, 1e-5F);
/* destroy variables */
delete s;
delete modify;
delete sGPU;
delete modifyGPU;
delete[] sDimSize;
return cpuTest && gpuTest;
#else
/* destroy variables */
delete s;
delete modify;
delete[] sDimSize;
return cpuTest;
#endif // USE_CUDA
}
/*
case 4: test SetDataDim function.
set data items along with a given dimension (and keep the remaining items unchanged)
*/
bool TestSetData4()
{
/* a input tensor of size (3, 3) */
int order = 2;
int * dimSize = new int[order];
dimSize[0] = 3;
dimSize[1] = 3;
int unitNum = 1;
for (int i = 0; i < order; i++)
unitNum *= dimSize[i];
DTYPE sData[3][3] = { {1.0F, 2.0F, 3.0F},
{4.0F, 5.0F, 6.0F},
{7.0F, 8.0F, 9.0F} };
DTYPE answer[3][3] = { {1.0F, 2.0F, 3.0F},
{0.0F, 0.0F, 0.0F},
{7.0F, 8.0F, 9.0F} };
/* CPU test */
bool cpuTest = true;
/* create tensors */
XTensor * s = NewTensor(order, dimSize);
/* initialize variables */
s->SetData(sData, unitNum);
/* call _SetDataDim function */
_SetDataDim(s, 1, 1, 0, 0);
/* check results */
cpuTest = s->CheckData(answer, unitNum, 1e-4F);
#ifdef USE_CUDA
/* GPU test */
bool gpuTest = true;
/* create tensors */
XTensor * sGPU = NewTensor(order, dimSize, X_FLOAT, 1.0F, 0);
/* initialize variables */
sGPU->SetData(sData, unitNum);
/* call _SetDataDim function */
_SetDataDim(sGPU, 1, 1, 0, 0);
gpuTest = sGPU->CheckData(answer, unitNum, 1e-4F);
/* destroy variables */
delete s;
delete sGPU;
delete[] dimSize;
return cpuTest && gpuTest;
#else
/* destroy variables */
delete s;
delete[] dimSize;
return cpuTest;
#endif // USE_CUDA
}
/*
case 5: test SetDataDim function.
set data items along with a given dimension (and keep the remaining items unchanged)
*/
bool TestSetData5()
{
/* a input tensor of size (2, 4, 3) */
int order = 3;
int * dimSize = new int[order];
dimSize[0] = 2;
dimSize[1] = 4;
dimSize[2] = 3;
int unitNum = 1;
for (int i = 0; i < order; i++)
unitNum *= dimSize[i];
DTYPE data[2][4][3] = { { {1.0F, 1.0F, 1.0F},
{0.0F, 1.0F, 2.0F},
{1.0F, 1.0F, 1.0F},
{1.0F, 1.0F, 1.0F} },
{ {1.0F, 1.0F, 1.0F},
{3.0F, 4.0F, 5.0F},
{1.0F, 1.0F, 1.0F},
{1.0F, 1.0F, 1.0F} } };
DTYPE answer[2][4][3] = { { {1.0F, 1.0F, 1.0F},
{0.0F, 1.0F, 2.0F},
{5.0F, 5.0F, 5.0F},
{1.0F, 1.0F, 1.0F} },
{ {1.0F, 1.0F, 1.0F},
{3.0F, 4.0F, 5.0F},
{5.0F, 5.0F, 5.0F},
{1.0F, 1.0F, 1.0F} } };
/* CPU test */
bool cpuTest = true;
/* create tensors */
XTensor * s = NewTensor(order, dimSize);
/* initialize variables */
s->SetData(data, unitNum);
/* call _SetDataDim function */
_SetDataDim(s, 2, 1, 1, 5.0F);
/* check results */
cpuTest = s->CheckData(answer, unitNum, 1e-4F);
#ifdef USE_CUDA
/* GPU test */
bool gpuTest = true;
/* create tensors */
XTensor * sGPU = NewTensor(order, dimSize, X_FLOAT, 1.0F, 0);
/* initialize variables */
sGPU->SetData(data, unitNum);
/* call _SetDataDim function */
_SetDataDim(sGPU, 2, 1, 1, 5.0F);
gpuTest = sGPU->CheckData(answer, unitNum, 1e-4F);
/* destroy variables */
delete s;
delete sGPU;
delete[] dimSize;
return cpuTest && gpuTest;
#else
/* destroy variables */
delete s;
delete[] dimSize;
return cpuTest;
#endif // USE_CUDA
}
/* other cases */ /* other cases */
/* /*
TODO!! TODO!!
...@@ -100,6 +422,42 @@ bool TestSetData() ...@@ -100,6 +422,42 @@ bool TestSetData()
else else
XPRINT(0, stdout, ">> case 1 passed!\n"); XPRINT(0, stdout, ">> case 1 passed!\n");
/* case 2 test */
caseFlag = TestSetData2();
if (!caseFlag) {
returnFlag = false;
XPRINT(0, stdout, ">> case 2 failed!\n");
}
else
XPRINT(0, stdout, ">> case 2 passed!\n");
/* case 3 test */
caseFlag = TestSetData3();
if (!caseFlag) {
returnFlag = false;
XPRINT(0, stdout, ">> case 3 failed!\n");
}
else
XPRINT(0, stdout, ">> case 3 passed!\n");
/* case 4 test */
caseFlag = TestSetData4();
if (!caseFlag) {
returnFlag = false;
XPRINT(0, stdout, ">> case 4 failed!\n");
}
else
XPRINT(0, stdout, ">> case 4 passed!\n");
/* case 5 test */
caseFlag = TestSetData5();
if (!caseFlag) {
returnFlag = false;
XPRINT(0, stdout, ">> case 5 failed!\n");
}
else
XPRINT(0, stdout, ">> case 5 passed!\n");
/* other cases test */ /* other cases test */
/* /*
TODO!! TODO!!
......
/* NiuTrans.Tensor - an open-source tensor library
* Copyright (C) 2017, Natural Language Processing Lab, Northestern University.
* All rights reserved.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
/*
* $Created by: Xu Chen (email: hello_master1954@163.com) 2018-09-25
*/
#include "TSpread.h"
#include "../core/getandset/SetData.h"
#include "../core/movement/Spread.h"
namespace nts { // namespace nts(NiuTrans.Tensor)
/*
case 1: test _Spread function.
spread a collection tensor to source tensor.
*/
bool TestSpread1()
{
/* a input tensor of size (2, 4, 3) */
int sOrder = 3;
int * sDimSize = new int[sOrder];
sDimSize[0] = 4;
sDimSize[1] = 4;
sDimSize[2] = 3;
int sUnitNum = 1;
for (int i = 0; i < sOrder; i++)
sUnitNum *= sDimSize[i];
/* a data tensor of size (2, 4, 3) */
int dataOrder = 3;
int * dataDimSize = new int[dataOrder];
dataDimSize[0] = 2;
dataDimSize[1] = 4;
dataDimSize[2] = 3;
int dataUnitNum = 1;
for (int i = 0; i < dataOrder; i++)
dataUnitNum *= dataDimSize[i];
int srcIndex[2] = {0, 1};
int tgtIndex[2] = {0, 1};
DTYPE data[2][4][3] = { { {1.0F, 1.0F, 1.0F},
{0.0F, 1.0F, 2.0F},
{1.0F, 1.0F, 1.0F},
{1.0F, 1.0F, 1.0F} },
{ {1.0F, 1.0F, 1.0F},
{3.0F, 4.0F, 5.0F},
{1.0F, 1.0F, 1.0F},
{1.0F, 1.0F, 1.0F} } };
DTYPE answer[4][4][3] = { { {1.0F, 1.0F, 1.0F},
{0.0F, 1.0F, 2.0F},
{1.0F, 1.0F, 1.0F},
{1.0F, 1.0F, 1.0F} },
{ {1.0F, 1.0F, 1.0F},
{3.0F, 4.0F, 5.0F},
{1.0F, 1.0F, 1.0F},
{1.0F, 1.0F, 1.0F} },
{ {0.0F, 0.0F, 0.0F},
{0.0F, 0.0F, 0.0F},
{0.0F, 0.0F, 0.0F} },
{ {0.0F, 0.0F, 0.0F},
{0.0F, 0.0F, 0.0F},
{0.0F, 0.0F, 0.0F} },
};
/* CPU test */
bool cpuTest = true;
/* create tensors */
XTensor * s = NewTensor(sOrder, sDimSize);
XTensor * modify = NewTensor(dataOrder, dataDimSize);
/* Initialize variables */
_SetDataFixedFloat(s, 0.0F);
modify->SetData(data, dataUnitNum);
/* call _Spread function */
_Spread(s, modify, 0, srcIndex, 2, tgtIndex);
/* check results */
cpuTest = s->CheckData(answer, sUnitNum, 1e-5F);
#ifdef USE_CUDA
/* GPU test */
bool gpuTest = true;
/* create tensors */
XTensor * sGPU = NewTensor(sOrder, sDimSize, X_FLOAT, 1.0F, 0);
XTensor * modifyGPU = NewTensor(dataOrder, dataDimSize, X_FLOAT, 1.0F, 0);
/* Initialize variables */
_SetDataFixedFloat(sGPU, 0.0F);
modifyGPU->SetData(data, dataUnitNum);
/* call _Spread function */
_Spread(sGPU, modifyGPU, 0, srcIndex, 2, tgtIndex);
gpuTest = sGPU->CheckData(answer, sUnitNum, 1e-5F);
/* destroy variables */
delete s;
delete modify;
delete sGPU;
delete modifyGPU;
delete[] sDimSize;
return cpuTest && gpuTest;
#else
/* destroy variables */
delete s;
delete[] sDimSize;
return cpuTest;
#endif // USE_CUDA
}
/* other cases */
/*
TODO!!
*/
/* test for Spread Function */
bool TestSpread()
{
XPRINT(0, stdout, "[TEST Spread] spread a collection tensor to source tensor \n");
bool returnFlag = true, caseFlag = true;
/* case 1 test */
caseFlag = TestSpread1();
if (!caseFlag) {
returnFlag = false;
XPRINT(0, stdout, ">> case 1 failed!\n");
}
else
XPRINT(0, stdout, ">> case 1 passed!\n");
/* other cases test */
/*
TODO!!
*/
if (returnFlag) {
XPRINT(0, stdout, ">> All Passed!\n");
}
else
XPRINT(0, stdout, ">> Failed!\n");
XPRINT(0, stdout, "\n");
return returnFlag;
}
} // namespace nts(NiuTrans.Tensor)
/* NiuTrans.Tensor - an open-source tensor library
* Copyright (C) 2017, Natural Language Processing Lab, Northestern University.
* All rights reserved.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
/*
* $Created by: Xu Chen (email: hello_master1954@163.com) 2018-09-25
*/
#ifndef __TEST_SPREAD_H__
#define __TEST_SPREAD_H__
#include "../XTensor.h"
namespace nts { // namespace nts(NiuTrans.Tensor)
/* test for Spread Function */
bool TestSpread();
} // namespace nts(NiuTrans.Tensor)
#endif // __TEST_SPREAD_H__
\ No newline at end of file
...@@ -40,6 +40,7 @@ bool Test() ...@@ -40,6 +40,7 @@ bool Test()
wrong = !TestDiv() || wrong; wrong = !TestDiv() || wrong;
wrong = !TestDivDim() || wrong; wrong = !TestDivDim() || wrong;
wrong = !TestExp() || wrong; wrong = !TestExp() || wrong;
wrong = !TestGather() || wrong;
wrong = !TestLog() || wrong; wrong = !TestLog() || wrong;
wrong = !TestMatrixMul() || wrong; wrong = !TestMatrixMul() || wrong;
wrong = !TestMatrixMul2D() || wrong; wrong = !TestMatrixMul2D() || wrong;
...@@ -54,6 +55,7 @@ bool Test() ...@@ -54,6 +55,7 @@ bool Test()
wrong = !TestReduceMax() || wrong; wrong = !TestReduceMax() || wrong;
wrong = !TestReduceMean() || wrong; wrong = !TestReduceMean() || wrong;
wrong = !TestReduceSum() || wrong; wrong = !TestReduceSum() || wrong;
wrong = !TestReduceSumAll() || wrong;
wrong = !TestReduceSumSquared() || wrong; wrong = !TestReduceSumSquared() || wrong;
wrong = !TestReduceVariance() || wrong; wrong = !TestReduceVariance() || wrong;
wrong = !TestRound() || wrong; wrong = !TestRound() || wrong;
...@@ -76,6 +78,7 @@ bool Test() ...@@ -76,6 +78,7 @@ bool Test()
wrong = !TestUnsqueeze() || wrong; wrong = !TestUnsqueeze() || wrong;
wrong = !TestXMem() || wrong; wrong = !TestXMem() || wrong;
wrong = !TestCrossEntropy() || wrong;
wrong = !TestDropout() || wrong; wrong = !TestDropout() || wrong;
wrong = !TestHardTanH() || wrong; wrong = !TestHardTanH() || wrong;
wrong = !TestIdentity() || wrong; wrong = !TestIdentity() || wrong;
......
...@@ -33,6 +33,7 @@ ...@@ -33,6 +33,7 @@
#include "TDiv.h" #include "TDiv.h"
#include "TDivDim.h" #include "TDivDim.h"
#include "TExp.h" #include "TExp.h"
#include "TGather.h"
#include "TLog.h" #include "TLog.h"
#include "TMatrixMul.h" #include "TMatrixMul.h"
#include "TMatrixMul2D.h" #include "TMatrixMul2D.h"
...@@ -47,6 +48,7 @@ ...@@ -47,6 +48,7 @@
#include "TReduceMax.h" #include "TReduceMax.h"
#include "TReduceMean.h" #include "TReduceMean.h"
#include "TReduceSum.h" #include "TReduceSum.h"
#include "TReduceSumAll.h"
#include "TReduceSumSquared.h" #include "TReduceSumSquared.h"
#include "TReduceVariance.h" #include "TReduceVariance.h"
#include "TRound.h" #include "TRound.h"
...@@ -69,6 +71,7 @@ ...@@ -69,6 +71,7 @@
#include "TUnsqueeze.h" #include "TUnsqueeze.h"
#include "TXMem.h" #include "TXMem.h"
#include "TCrossEntropy.h"
#include "TDropout.h" #include "TDropout.h"
#include "THardTanH.h" #include "THardTanH.h"
#include "TIdentity.h" #include "TIdentity.h"
......
Markdown 格式
0%
您添加了 0 到此讨论。请谨慎行事。
请先完成此评论的编辑!
注册 或者 后发表评论